From a902c236c9f0883a1cdec365490c57857e278147 Mon Sep 17 00:00:00 2001
From: Mario Fetka <mario.fetka@gmail.com>
Date: Sat, 24 Nov 2012 17:08:51 +0100
Subject: [PATCH] Initial patches commit

---
 ...bility-patch-for-v5-network-controll.patch |    553 +
 ...-I-O-context-code-for-BFQ-v5-for-3.2.patch |    299 +
 ...compatibility-patch-for-v5-interface.patch |    391 +
 ...ps-kconfig-build-bits-for-BFQ-v5-3.2.patch |     46 +
 ...fa-backward-compatibility-with-broke.patch |     69 +
 ...troduce-the-BFQ-v5-I-O-sched-for-3.2.patch |   5986 +
 3.2.34/01patch-2.6.33_atopcnt.patch           |    174 +
 3.2.34/02patch-2.6.33_atopacct.patch          |    125 +
 3.2.34/3.2.0-ck1.patch                        |   9093 +
 3.2.34/3rd-3rdparty-1.0-tree.patch            |    181 +
 .../3rd-3rdparty-button_hotplug-0.4.1.patch   |    372 +
 ...3rd-3rdparty-gpio_button_hotplug-0.1.patch |    472 +
 3.2.34/3rd-3rdparty-gpio_event_drv-0.1.patch  |   1354 +
 3.2.34/3rd-3rdparty-merge.patch               |    156 +
 3.2.34/3rd-3rdparty-netatop-0.1.1.patch       |   1769 +
 3.2.34/910-kobject_uevent.patch               |     21 +
 3.2.34/911-kobject_add_broadcast_uevent.patch |     85 +
 .../Add_CONFIG_VFAT_FS_DUALNAMES_option.patch |    145 +
 3.2.34/accessfs-3.2-0.26.patch                |   1036 +
 ...over-ide-drivers-when-both-are-built.patch |     36 +
 3.2.34/aufs3-standalone-3.2.patch             |  30657 +++
 3.2.34/bump/1021_linux-3.2.22.patch           |   1245 +
 3.2.34/bump/1022_linux-3.2.23.patch           |   1862 +
 3.2.34/bump/1023_linux-3.2.24.patch           |   4684 +
 3.2.34/bump/1024_linux-3.2.25.patch           |   4503 +
 3.2.34/bump/1025_linux-3.2.26.patch           |    238 +
 3.2.34/bump/1026_linux-3.2.27.patch           |   3188 +
 3.2.34/bump/1027_linux-3.2.28.patch           |   1114 +
 3.2.34/bump/1028_linux-3.2.29.patch           |   4279 +
 3.2.34/bump/1029_linux-3.2.30.patch           |   5552 +
 3.2.34/bump/1030_linux-3.2.31.patch           |   3327 +
 3.2.34/bump/1031_linux-3.2.32.patch           |   6206 +
 3.2.34/bump/1032_linux-3.2.33.patch           |   3450 +
 3.2.34/bump/1033_linux-3.2.34.patch           |   3678 +
 3.2.34/cloneconfig.patch                      |     41 +
 3.2.34/colored-printk-3.2.33.patch            |    337 +
 3.2.34/hz-432-kconfig-option.patch            |     25 +
 3.2.34/hz-864-kconfig-option.patch            |     25 +
 3.2.34/imqmq-3.2.patch                        |   1603 +
 ...press-kernel-modules-on-installation.patch |    137 +
 3.2.34/kernel-3.2-lsproduo.patch              |    569 +
 3.2.34/kernel-3.2-lsql.patch                  |    439 +
 3.2.34/kernel-3.2-lsxhl.patch                 |    387 +
 3.2.34/kernel-3.4.0-layer7-2.22.patch         |   2132 +
 3.2.34/kirkwood-jumbo-frame.patch             |    135 +
 3.2.34/linux-2.6-defaults-fat-utf8.patch      |     15 +
 3.2.34/linux-2.6-x86-tune-generic.patch       |     13 +
 3.2.34/linux-3.2-e2c-0.4.58.patch             |   7807 +
 3.2.34/linux-3.2.33-zfs.patch                 | 201830 +++++++++++++++
 3.2.34/lschlv2.patch                          |    256 +
 ...net-netfilter-IFWLOG-2.6.35-buildfix.patch |     32 +
 ...net-netfilter-IFWLOG-2.6.37-buildfix.patch |     15 +
 3.2.34/net-netfilter-IFWLOG-mdv.patch         |    264 +
 3.2.34/net-netfilter-IFWLOG.patch             |    269 +
 .../net-netfilter-psd-2.6.35-buildfix.patch   |     11 +
 3.2.34/net-netfilter-psd-mdv.patch            |    235 +
 3.2.34/net-netfilter-psd.patch                |    420 +
 ...implement-rfc-1123-for-ftp-conntrack.patch |    190 +
 3.2.34/netfilter-ip_conntrack_slp.patch       |    185 +
 ...2-btrfs-Introduce-btrfs_get_maps_dev.patch |     39 +
 ...btrfs-0900-add-allocator-tracepoints.patch |    304 +
 ...-8001-rewrite-btrfs_trim_block_group.patch |    299 +
 ...k-and-disable-irq-during-space-alloc.patch |     40 +
 ...-8013-sector-size-check-during-mount.patch |     43 +
 ...ctl-to-determine-size-of-compressed-.patch |    158 +
 ...e-lzo-the-default-compression-scheme.patch |     68 +
 ...8024-workaround-for-cleaner-deadlock.patch |     32 +
 ...bal-block_rsv-when-creating-a-new-bl.patch |     61 +
 ...le-deadlock-when-opening-a-seed-devi.patch |     84 +
 ...rfs-allow-cross-subvolume-file-clone.patch |     47 +
 3.2.34/series                                 |     68 +
 3.2.34/uksm-0.1.2.1-for-v3.2.ge.31.patch      |   7032 +
 ...rkwood-Add-support-for-Buffalo-LS-VL.patch |    381 +
 ...kwood-Add-support-for-Buffalo-LS-WVL.patch |    538 +
 3.2.34/vserver-3.2.34-vs2.3.2.15.patch        |  26125 ++
 3.2.34/wrapfs-v3.2.2-45-ga5296eb.patch        |   2084 +
 ...bility-patch-for-v5-network-controll.patch |    553 +
 ...ps-kconfig-build-bits-for-BFQ-v5-3.3.patch |     99 +
 ...compatibility-patch-for-v5-interface.patch |    391 +
 ...troduce-the-BFQ-v5-I-O-sched-for-3.3.patch |   5624 +
 ...fa-backward-compatibility-with-broke.patch |     69 +
 3.3.8/01patch-2.6.33_atopcnt.patch            |    174 +
 3.3.8/02patch-2.6.33_atopacct.patch           |    125 +
 3.3.8/3.3-ck1.patch                           |   8782 +
 3.3.8/3rd-3rdparty-1.0-tree.patch             |    181 +
 3.3.8/3rd-3rdparty-button_hotplug-0.4.1.patch |    372 +
 ...3rd-3rdparty-gpio_button_hotplug-0.1.patch |    472 +
 3.3.8/3rd-3rdparty-gpio_event_drv-0.1.patch   |   1354 +
 3.3.8/3rd-3rdparty-merge.patch                |    156 +
 3.3.8/3rd-3rdparty-netatop-0.1.1.patch        |   1769 +
 3.3.8/600-netfilter_layer7_2.22.patch         |   2142 +
 3.3.8/601-netfilter_layer7_pktmatch.patch     |    108 +
 3.3.8/602-netfilter_layer7_match.patch        |     51 +
 3.3.8/603-netfilter_layer7_2.6.36_fix.patch   |     61 +
 3.3.8/604-netfilter_cisco_794x_iphone.patch   |    118 +
 ...etfilter_match_bypass_default_checks.patch |     93 +
 ...netfilter_match_bypass_default_table.patch |     81 +
 ...netfilter_match_reduce_memory_access.patch |     16 +
 ...-netfilter_optional_tcp_window_check.patch |     36 +
 3.3.8/620-sched_esfq.patch                    |    791 +
 3.3.8/621-sched_act_connmark.patch            |    172 +
 3.3.8/910-kobject_uevent.patch                |     21 +
 3.3.8/911-kobject_add_broadcast_uevent.patch  |     85 +
 .../Add_CONFIG_VFAT_FS_DUALNAMES_option.patch |    145 +
 3.3.8/accessfs-3.2-0.26.patch                 |   1036 +
 ...over-ide-drivers-when-both-are-built.patch |     36 +
 3.3.8/aufs-3.x-rcN.patch                      |  29364 +++
 3.3.8/cloneconfig.patch                       |     41 +
 3.3.8/colored-printk-3.3.8.patch              |    337 +
 ...-directory-updates-during-log-replay.patch |     54 +
 ...ockdep-warning-in-miscdev-operations.patch |    103 +
 ...v-file-ops-on-inherited-passed-files.patch |     95 +
 ...ly-flag-before-doing-privileged-open.patch |     42 +
 ...le_check_list-on-ELOOP_CVE-2012-3375.patch |     35 +
 ...drop_write-call-in-ext4_ioc_move_ext.patch |     31 +
 ...on-for-ext3-file-systems-w-uninit_bg.patch |     73 +
 ...riggerable-bug-from-generic_setlease.patch |     39 +
 ...le-length-is-corrupted_CVE-2012-3400.patch |     51 +
 ...ading-of-sparing-table_CVE-2012-3400.patch |    132 +
 ...-of-abusing-i-in-udf_load_logicalvol.patch |     32 +
 3.3.8/hz-432-kconfig-option.patch             |     25 +
 3.3.8/hz-864-kconfig-option.patch             |     25 +
 3.3.8/imqmq-3.3.patch                         |   1613 +
 ...press-kernel-modules-on-installation.patch |    137 +
 3.3.8/kirkwood-jumbo-frame.patch              |    135 +
 3.3.8/linux-2.6-defaults-fat-utf8.patch       |     15 +
 3.3.8/linux-2.6-x86-tune-generic.patch        |     13 +
 3.3.8/linux-3.4-e2c-0.4.59.patch              |   7781 +
 3.3.8/lschlv2.patch                           |    256 +
 ...net-netfilter-IFWLOG-2.6.35-buildfix.patch |     32 +
 ...net-netfilter-IFWLOG-2.6.37-buildfix.patch |     15 +
 3.3.8/net-netfilter-IFWLOG-mdv.patch          |    264 +
 3.3.8/net-netfilter-IFWLOG.patch              |    269 +
 3.3.8/net-netfilter-psd-2.6.35-buildfix.patch |     11 +
 3.3.8/net-netfilter-psd-mdv.patch             |    235 +
 3.3.8/net-netfilter-psd.patch                 |    420 +
 ...implement-rfc-1123-for-ftp-conntrack.patch |    190 +
 3.3.8/netfilter-ip_conntrack_slp.patch        |    185 +
 3.3.8/series                                  |     87 +
 3.3.8/uksm-0.1.2.1-for-v3.3.ge.8.patch        |   7023 +
 ...rkwood-Add-support-for-Buffalo-LS-VL.patch |    381 +
 ...kwood-Add-support-for-Buffalo-LS-WVL.patch |    538 +
 ...on-Add-support-for-Buffalo-LS-PRODUO.patch |    569 +
 ...ood-Add-support-for-Buffalo-LS-CHLv2.patch |    278 +
 ...kwood-Add-support-for-Buffalo-LS-XHL.patch |    388 +
 ...-orion-Add-support-for-Buffalo-LS-QL.patch |    439 +
 3.3.8/vserver-3.3.8-vs2.3.3.4.patch           |  26065 ++
 3.3.8/wrapfs-v3.3-rc1-429-g65388bc.patch      |   1913 +
 148 files changed, 455965 insertions(+)
 create mode 100644 3.2.34/0001-AppArmor-compatibility-patch-for-v5-network-controll.patch
 create mode 100644 3.2.34/0001-block-prepare-I-O-context-code-for-BFQ-v5-for-3.2.patch
 create mode 100644 3.2.34/0002-AppArmor-compatibility-patch-for-v5-interface.patch
 create mode 100644 3.2.34/0002-block-cgroups-kconfig-build-bits-for-BFQ-v5-3.2.patch
 create mode 100644 3.2.34/0003-AppArmor-Allow-dfa-backward-compatibility-with-broke.patch
 create mode 100644 3.2.34/0003-block-introduce-the-BFQ-v5-I-O-sched-for-3.2.patch
 create mode 100644 3.2.34/01patch-2.6.33_atopcnt.patch
 create mode 100644 3.2.34/02patch-2.6.33_atopacct.patch
 create mode 100644 3.2.34/3.2.0-ck1.patch
 create mode 100644 3.2.34/3rd-3rdparty-1.0-tree.patch
 create mode 100644 3.2.34/3rd-3rdparty-button_hotplug-0.4.1.patch
 create mode 100644 3.2.34/3rd-3rdparty-gpio_button_hotplug-0.1.patch
 create mode 100644 3.2.34/3rd-3rdparty-gpio_event_drv-0.1.patch
 create mode 100644 3.2.34/3rd-3rdparty-merge.patch
 create mode 100644 3.2.34/3rd-3rdparty-netatop-0.1.1.patch
 create mode 100644 3.2.34/910-kobject_uevent.patch
 create mode 100644 3.2.34/911-kobject_add_broadcast_uevent.patch
 create mode 100644 3.2.34/Add_CONFIG_VFAT_FS_DUALNAMES_option.patch
 create mode 100644 3.2.34/accessfs-3.2-0.26.patch
 create mode 100644 3.2.34/ata-prefer-ata-drivers-over-ide-drivers-when-both-are-built.patch
 create mode 100644 3.2.34/aufs3-standalone-3.2.patch
 create mode 100644 3.2.34/bump/1021_linux-3.2.22.patch
 create mode 100644 3.2.34/bump/1022_linux-3.2.23.patch
 create mode 100644 3.2.34/bump/1023_linux-3.2.24.patch
 create mode 100644 3.2.34/bump/1024_linux-3.2.25.patch
 create mode 100644 3.2.34/bump/1025_linux-3.2.26.patch
 create mode 100644 3.2.34/bump/1026_linux-3.2.27.patch
 create mode 100644 3.2.34/bump/1027_linux-3.2.28.patch
 create mode 100644 3.2.34/bump/1028_linux-3.2.29.patch
 create mode 100644 3.2.34/bump/1029_linux-3.2.30.patch
 create mode 100644 3.2.34/bump/1030_linux-3.2.31.patch
 create mode 100644 3.2.34/bump/1031_linux-3.2.32.patch
 create mode 100644 3.2.34/bump/1032_linux-3.2.33.patch
 create mode 100644 3.2.34/bump/1033_linux-3.2.34.patch
 create mode 100644 3.2.34/cloneconfig.patch
 create mode 100644 3.2.34/colored-printk-3.2.33.patch
 create mode 100644 3.2.34/hz-432-kconfig-option.patch
 create mode 100644 3.2.34/hz-864-kconfig-option.patch
 create mode 100644 3.2.34/imqmq-3.2.patch
 create mode 100644 3.2.34/kbuild-compress-kernel-modules-on-installation.patch
 create mode 100644 3.2.34/kernel-3.2-lsproduo.patch
 create mode 100644 3.2.34/kernel-3.2-lsql.patch
 create mode 100644 3.2.34/kernel-3.2-lsxhl.patch
 create mode 100644 3.2.34/kernel-3.4.0-layer7-2.22.patch
 create mode 100644 3.2.34/kirkwood-jumbo-frame.patch
 create mode 100644 3.2.34/linux-2.6-defaults-fat-utf8.patch
 create mode 100644 3.2.34/linux-2.6-x86-tune-generic.patch
 create mode 100644 3.2.34/linux-3.2-e2c-0.4.58.patch
 create mode 100644 3.2.34/linux-3.2.33-zfs.patch
 create mode 100644 3.2.34/lschlv2.patch
 create mode 100644 3.2.34/net-netfilter-IFWLOG-2.6.35-buildfix.patch
 create mode 100644 3.2.34/net-netfilter-IFWLOG-2.6.37-buildfix.patch
 create mode 100644 3.2.34/net-netfilter-IFWLOG-mdv.patch
 create mode 100644 3.2.34/net-netfilter-IFWLOG.patch
 create mode 100644 3.2.34/net-netfilter-psd-2.6.35-buildfix.patch
 create mode 100644 3.2.34/net-netfilter-psd-mdv.patch
 create mode 100644 3.2.34/net-netfilter-psd.patch
 create mode 100644 3.2.34/netfilter-implement-rfc-1123-for-ftp-conntrack.patch
 create mode 100644 3.2.34/netfilter-ip_conntrack_slp.patch
 create mode 100644 3.2.34/patches.suse/0002-btrfs-Introduce-btrfs_get_maps_dev.patch
 create mode 100644 3.2.34/patches.suse/btrfs-0900-add-allocator-tracepoints.patch
 create mode 100644 3.2.34/patches.suse/btrfs-8001-rewrite-btrfs_trim_block_group.patch
 create mode 100644 3.2.34/patches.suse/btrfs-8007-lock-and-disable-irq-during-space-alloc.patch
 create mode 100644 3.2.34/patches.suse/btrfs-8013-sector-size-check-during-mount.patch
 create mode 100644 3.2.34/patches.suse/btrfs-8014-add-new-ioctl-to-determine-size-of-compressed-.patch
 create mode 100644 3.2.34/patches.suse/btrfs-8015-make-lzo-the-default-compression-scheme.patch
 create mode 100644 3.2.34/patches.suse/btrfs-8024-workaround-for-cleaner-deadlock.patch
 create mode 100644 3.2.34/patches.suse/btrfs-8025-update-global-block_rsv-when-creating-a-new-bl.patch
 create mode 100644 3.2.34/patches.suse/btrfs-8026-fix-possible-deadlock-when-opening-a-seed-devi.patch
 create mode 100644 3.2.34/patches.suse/btrfs-allow-cross-subvolume-file-clone.patch
 create mode 100644 3.2.34/series
 create mode 100644 3.2.34/uksm-0.1.2.1-for-v3.2.ge.31.patch
 create mode 100644 3.2.34/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-VL.patch
 create mode 100644 3.2.34/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-WVL.patch
 create mode 100644 3.2.34/vserver-3.2.34-vs2.3.2.15.patch
 create mode 100644 3.2.34/wrapfs-v3.2.2-45-ga5296eb.patch
 create mode 100644 3.3.8/0001-AppArmor-compatibility-patch-for-v5-network-controll.patch
 create mode 100644 3.3.8/0001-block-cgroups-kconfig-build-bits-for-BFQ-v5-3.3.patch
 create mode 100644 3.3.8/0002-AppArmor-compatibility-patch-for-v5-interface.patch
 create mode 100644 3.3.8/0002-block-introduce-the-BFQ-v5-I-O-sched-for-3.3.patch
 create mode 100644 3.3.8/0003-AppArmor-Allow-dfa-backward-compatibility-with-broke.patch
 create mode 100644 3.3.8/01patch-2.6.33_atopcnt.patch
 create mode 100644 3.3.8/02patch-2.6.33_atopacct.patch
 create mode 100644 3.3.8/3.3-ck1.patch
 create mode 100644 3.3.8/3rd-3rdparty-1.0-tree.patch
 create mode 100644 3.3.8/3rd-3rdparty-button_hotplug-0.4.1.patch
 create mode 100644 3.3.8/3rd-3rdparty-gpio_button_hotplug-0.1.patch
 create mode 100644 3.3.8/3rd-3rdparty-gpio_event_drv-0.1.patch
 create mode 100644 3.3.8/3rd-3rdparty-merge.patch
 create mode 100644 3.3.8/3rd-3rdparty-netatop-0.1.1.patch
 create mode 100644 3.3.8/600-netfilter_layer7_2.22.patch
 create mode 100644 3.3.8/601-netfilter_layer7_pktmatch.patch
 create mode 100644 3.3.8/602-netfilter_layer7_match.patch
 create mode 100644 3.3.8/603-netfilter_layer7_2.6.36_fix.patch
 create mode 100644 3.3.8/604-netfilter_cisco_794x_iphone.patch
 create mode 100644 3.3.8/610-netfilter_match_bypass_default_checks.patch
 create mode 100644 3.3.8/611-netfilter_match_bypass_default_table.patch
 create mode 100644 3.3.8/612-netfilter_match_reduce_memory_access.patch
 create mode 100644 3.3.8/613-netfilter_optional_tcp_window_check.patch
 create mode 100644 3.3.8/620-sched_esfq.patch
 create mode 100644 3.3.8/621-sched_act_connmark.patch
 create mode 100644 3.3.8/910-kobject_uevent.patch
 create mode 100644 3.3.8/911-kobject_add_broadcast_uevent.patch
 create mode 100644 3.3.8/Add_CONFIG_VFAT_FS_DUALNAMES_option.patch
 create mode 100644 3.3.8/accessfs-3.2-0.26.patch
 create mode 100644 3.3.8/ata-prefer-ata-drivers-over-ide-drivers-when-both-are-built.patch
 create mode 100644 3.3.8/aufs-3.x-rcN.patch
 create mode 100644 3.3.8/cloneconfig.patch
 create mode 100644 3.3.8/colored-printk-3.3.8.patch
 create mode 100644 3.3.8/fs-btrfs-run-delayed-directory-updates-during-log-replay.patch
 create mode 100644 3.3.8/fs-ecryptfs-fix-lockdep-warning-in-miscdev-operations.patch
 create mode 100644 3.3.8/fs-ecryptfs-gracefully-refuse-miscdev-file-ops-on-inherited-passed-files.patch
 create mode 100644 3.3.8/fs-ecryptfs-properly-check-for-o_rdonly-flag-before-doing-privileged-open.patch
 create mode 100644 3.3.8/fs-epoll-clear-the-tfile_check_list-on-ELOOP_CVE-2012-3375.patch
 create mode 100644 3.3.8/fs-ext4-fix-duplicated-mnt_drop_write-call-in-ext4_ioc_move_ext.patch
 create mode 100644 3.3.8/fs-ext4-fix-the-free-blocks-calculation-for-ext3-file-systems-w-uninit_bg.patch
 create mode 100644 3.3.8/fs-remove-easily-user-triggerable-bug-from-generic_setlease.patch
 create mode 100644 3.3.8/fs-udf-avoid-run-away-loop-when-partition-table-length-is-corrupted_CVE-2012-3400.patch
 create mode 100644 3.3.8/fs-udf-fortify-loading-of-sparing-table_CVE-2012-3400.patch
 create mode 100644 3.3.8/fs-udf-use-ret-instead-of-abusing-i-in-udf_load_logicalvol.patch
 create mode 100644 3.3.8/hz-432-kconfig-option.patch
 create mode 100644 3.3.8/hz-864-kconfig-option.patch
 create mode 100644 3.3.8/imqmq-3.3.patch
 create mode 100644 3.3.8/kbuild-compress-kernel-modules-on-installation.patch
 create mode 100644 3.3.8/kirkwood-jumbo-frame.patch
 create mode 100644 3.3.8/linux-2.6-defaults-fat-utf8.patch
 create mode 100644 3.3.8/linux-2.6-x86-tune-generic.patch
 create mode 100644 3.3.8/linux-3.4-e2c-0.4.59.patch
 create mode 100644 3.3.8/lschlv2.patch
 create mode 100644 3.3.8/net-netfilter-IFWLOG-2.6.35-buildfix.patch
 create mode 100644 3.3.8/net-netfilter-IFWLOG-2.6.37-buildfix.patch
 create mode 100644 3.3.8/net-netfilter-IFWLOG-mdv.patch
 create mode 100644 3.3.8/net-netfilter-IFWLOG.patch
 create mode 100644 3.3.8/net-netfilter-psd-2.6.35-buildfix.patch
 create mode 100644 3.3.8/net-netfilter-psd-mdv.patch
 create mode 100644 3.3.8/net-netfilter-psd.patch
 create mode 100644 3.3.8/netfilter-implement-rfc-1123-for-ftp-conntrack.patch
 create mode 100644 3.3.8/netfilter-ip_conntrack_slp.patch
 create mode 100644 3.3.8/series
 create mode 100644 3.3.8/uksm-0.1.2.1-for-v3.3.ge.8.patch
 create mode 100644 3.3.8/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-VL.patch
 create mode 100644 3.3.8/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-WVL.patch
 create mode 100644 3.3.8/v3.2-ARM-orion-Add-support-for-Buffalo-LS-PRODUO.patch
 create mode 100644 3.3.8/v3.3-ARM-kirkwood-Add-support-for-Buffalo-LS-CHLv2.patch
 create mode 100644 3.3.8/v3.3-ARM-kirkwood-Add-support-for-Buffalo-LS-XHL.patch
 create mode 100644 3.3.8/v3.3-ARM-orion-Add-support-for-Buffalo-LS-QL.patch
 create mode 100644 3.3.8/vserver-3.3.8-vs2.3.3.4.patch
 create mode 100644 3.3.8/wrapfs-v3.3-rc1-429-g65388bc.patch

diff --git a/3.2.34/0001-AppArmor-compatibility-patch-for-v5-network-controll.patch b/3.2.34/0001-AppArmor-compatibility-patch-for-v5-network-controll.patch
new file mode 100644
index 0000000..00c8712
--- /dev/null
+++ b/3.2.34/0001-AppArmor-compatibility-patch-for-v5-network-controll.patch
@@ -0,0 +1,553 @@
+From dc13dec93dbd04bfa7a9ba67df1b8ed3431d8d48 Mon Sep 17 00:00:00 2001
+From: John Johansen <john.johansen@canonical.com>
+Date: Wed, 10 Aug 2011 22:02:39 -0700
+Subject: [PATCH 1/3] AppArmor: compatibility patch for v5 network controll
+
+Add compatibility for v5 network rules.
+
+Signed-off-by: John Johansen <john.johansen@canonical.com>
+---
+ include/linux/lsm_audit.h          |    4 +
+ security/apparmor/Makefile         |   19 ++++-
+ security/apparmor/include/net.h    |   40 +++++++++
+ security/apparmor/include/policy.h |    3 +
+ security/apparmor/lsm.c            |  112 +++++++++++++++++++++++
+ security/apparmor/net.c            |  170 ++++++++++++++++++++++++++++++++++++
+ security/apparmor/policy.c         |    1 +
+ security/apparmor/policy_unpack.c  |   48 ++++++++++-
+ 8 files changed, 394 insertions(+), 3 deletions(-)
+ create mode 100644 security/apparmor/include/net.h
+ create mode 100644 security/apparmor/net.c
+
+diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
+index 88e78de..c63979a 100644
+--- a/include/linux/lsm_audit.h
++++ b/include/linux/lsm_audit.h
+@@ -124,6 +124,10 @@ struct common_audit_data {
+ 					u32 denied;
+ 					uid_t ouid;
+ 				} fs;
++				struct {
++					int type, protocol;
++					struct sock *sk;
++				} net;
+ 			};
+ 		} apparmor_audit_data;
+ #endif
+diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
+index 2dafe50..7cefef9 100644
+--- a/security/apparmor/Makefile
++++ b/security/apparmor/Makefile
+@@ -4,9 +4,9 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o
+ 
+ apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \
+               path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
+-              resource.o sid.o file.o
++              resource.o sid.o file.o net.o
+ 
+-clean-files := capability_names.h rlim_names.h
++clean-files := capability_names.h rlim_names.h af_names.h
+ 
+ 
+ # Build a lower case string table of capability names
+@@ -44,9 +44,24 @@ cmd_make-rlim = echo "static const char *rlim_names[] = {" > $@ ;\
+ 	sed -r -n "s/^\# ?define[ \t]+(RLIMIT_[A-Z0-9_]+).*/\1,/p" $< >> $@ ;\
+ 	echo "};" >> $@
+ 
++# Build a lower case string table of address family names.
++# Transform lines from
++# #define AF_INET		2	/* Internet IP Protocol 	*/
++# to
++# [2] = "inet",
++quiet_cmd_make-af = GEN     $@
++cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\
++	sed $< >> $@ -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e \
++	  's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+).*/[\2] = "\L\1",/p';\
++	echo "};" >> $@
++
++
+ $(obj)/capability.o : $(obj)/capability_names.h
+ $(obj)/resource.o : $(obj)/rlim_names.h
++$(obj)/net.o : $(obj)/af_names.h
+ $(obj)/capability_names.h : $(srctree)/include/linux/capability.h
+ 	$(call cmd,make-caps)
+ $(obj)/rlim_names.h : $(srctree)/include/asm-generic/resource.h
+ 	$(call cmd,make-rlim)
++$(obj)/af_names.h : $(srctree)/include/linux/socket.h
++	$(call cmd,make-af)
+\ No newline at end of file
+diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h
+new file mode 100644
+index 0000000..3c7d599
+--- /dev/null
++++ b/security/apparmor/include/net.h
+@@ -0,0 +1,40 @@
++/*
++ * AppArmor security module
++ *
++ * This file contains AppArmor network mediation definitions.
++ *
++ * Copyright (C) 1998-2008 Novell/SUSE
++ * Copyright 2009-2010 Canonical Ltd.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation, version 2 of the
++ * License.
++ */
++
++#ifndef __AA_NET_H
++#define __AA_NET_H
++
++#include <net/sock.h>
++
++/* struct aa_net - network confinement data
++ * @allowed: basic network families permissions
++ * @audit_network: which network permissions to force audit
++ * @quiet_network: which network permissions to quiet rejects
++ */
++struct aa_net {
++	u16 allow[AF_MAX];
++	u16 audit[AF_MAX];
++	u16 quiet[AF_MAX];
++};
++
++extern int aa_net_perm(int op, struct aa_profile *profile, u16 family,
++		       int type, int protocol, struct sock *sk);
++extern int aa_revalidate_sk(int op, struct sock *sk);
++
++static inline void aa_free_net_rules(struct aa_net *new)
++{
++	/* NOP */
++}
++
++#endif /* __AA_NET_H */
+diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
+index aeda5cf..6776929 100644
+--- a/security/apparmor/include/policy.h
++++ b/security/apparmor/include/policy.h
+@@ -27,6 +27,7 @@
+ #include "capability.h"
+ #include "domain.h"
+ #include "file.h"
++#include "net.h"
+ #include "resource.h"
+ 
+ extern const char *profile_mode_names[];
+@@ -145,6 +146,7 @@ struct aa_namespace {
+  * @size: the memory consumed by this profiles rules
+  * @file: The set of rules governing basic file access and domain transitions
+  * @caps: capabilities for the profile
++ * @net: network controls for the profile
+  * @rlimits: rlimits for the profile
+  *
+  * The AppArmor profile contains the basic confinement data.  Each profile
+@@ -181,6 +183,7 @@ struct aa_profile {
+ 
+ 	struct aa_file_rules file;
+ 	struct aa_caps caps;
++	struct aa_net net;
+ 	struct aa_rlimit rlimits;
+ };
+ 
+diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
+index 3d2fd14..aa293ae 100644
+--- a/security/apparmor/lsm.c
++++ b/security/apparmor/lsm.c
+@@ -32,6 +32,7 @@
+ #include "include/context.h"
+ #include "include/file.h"
+ #include "include/ipc.h"
++#include "include/net.h"
+ #include "include/path.h"
+ #include "include/policy.h"
+ #include "include/procattr.h"
+@@ -621,6 +622,104 @@ static int apparmor_task_setrlimit(struct task_struct *task,
+ 	return error;
+ }
+ 
++static int apparmor_socket_create(int family, int type, int protocol, int kern)
++{
++	struct aa_profile *profile;
++	int error = 0;
++
++	if (kern)
++		return 0;
++
++	profile = __aa_current_profile();
++	if (!unconfined(profile))
++		error = aa_net_perm(OP_CREATE, profile, family, type, protocol,
++				    NULL);
++	return error;
++}
++
++static int apparmor_socket_bind(struct socket *sock,
++				struct sockaddr *address, int addrlen)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_BIND, sk);
++}
++
++static int apparmor_socket_connect(struct socket *sock,
++				   struct sockaddr *address, int addrlen)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_CONNECT, sk);
++}
++
++static int apparmor_socket_listen(struct socket *sock, int backlog)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_LISTEN, sk);
++}
++
++static int apparmor_socket_accept(struct socket *sock, struct socket *newsock)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_ACCEPT, sk);
++}
++
++static int apparmor_socket_sendmsg(struct socket *sock,
++				   struct msghdr *msg, int size)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_SENDMSG, sk);
++}
++
++static int apparmor_socket_recvmsg(struct socket *sock,
++				   struct msghdr *msg, int size, int flags)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_RECVMSG, sk);
++}
++
++static int apparmor_socket_getsockname(struct socket *sock)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_GETSOCKNAME, sk);
++}
++
++static int apparmor_socket_getpeername(struct socket *sock)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_GETPEERNAME, sk);
++}
++
++static int apparmor_socket_getsockopt(struct socket *sock, int level,
++				      int optname)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_GETSOCKOPT, sk);
++}
++
++static int apparmor_socket_setsockopt(struct socket *sock, int level,
++				      int optname)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_SETSOCKOPT, sk);
++}
++
++static int apparmor_socket_shutdown(struct socket *sock, int how)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_SOCK_SHUTDOWN, sk);
++}
++
+ static struct security_operations apparmor_ops = {
+ 	.name =				"apparmor",
+ 
+@@ -652,6 +751,19 @@ static struct security_operations apparmor_ops = {
+ 	.getprocattr =			apparmor_getprocattr,
+ 	.setprocattr =			apparmor_setprocattr,
+ 
++	.socket_create =		apparmor_socket_create,
++	.socket_bind =			apparmor_socket_bind,
++	.socket_connect =		apparmor_socket_connect,
++	.socket_listen =		apparmor_socket_listen,
++	.socket_accept =		apparmor_socket_accept,
++	.socket_sendmsg =		apparmor_socket_sendmsg,
++	.socket_recvmsg =		apparmor_socket_recvmsg,
++	.socket_getsockname =		apparmor_socket_getsockname,
++	.socket_getpeername =		apparmor_socket_getpeername,
++	.socket_getsockopt =		apparmor_socket_getsockopt,
++	.socket_setsockopt =		apparmor_socket_setsockopt,
++	.socket_shutdown =		apparmor_socket_shutdown,
++
+ 	.cred_alloc_blank =		apparmor_cred_alloc_blank,
+ 	.cred_free =			apparmor_cred_free,
+ 	.cred_prepare =			apparmor_cred_prepare,
+diff --git a/security/apparmor/net.c b/security/apparmor/net.c
+new file mode 100644
+index 0000000..1765901
+--- /dev/null
++++ b/security/apparmor/net.c
+@@ -0,0 +1,170 @@
++/*
++ * AppArmor security module
++ *
++ * This file contains AppArmor network mediation
++ *
++ * Copyright (C) 1998-2008 Novell/SUSE
++ * Copyright 2009-2010 Canonical Ltd.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation, version 2 of the
++ * License.
++ */
++
++#include "include/apparmor.h"
++#include "include/audit.h"
++#include "include/context.h"
++#include "include/net.h"
++#include "include/policy.h"
++
++#include "af_names.h"
++
++static const char *sock_type_names[] = {
++	"unknown(0)",
++	"stream",
++	"dgram",
++	"raw",
++	"rdm",
++	"seqpacket",
++	"dccp",
++	"unknown(7)",
++	"unknown(8)",
++	"unknown(9)",
++	"packet",
++};
++
++/* audit callback for net specific fields */
++static void audit_cb(struct audit_buffer *ab, void *va)
++{
++	struct common_audit_data *sa = va;
++
++	audit_log_format(ab, " family=");
++	if (address_family_names[sa->u.net.family]) {
++		audit_log_string(ab, address_family_names[sa->u.net.family]);
++	} else {
++		audit_log_format(ab, " \"unknown(%d)\"", sa->u.net.family);
++	}
++
++	audit_log_format(ab, " sock_type=");
++	if (sock_type_names[sa->aad.net.type]) {
++		audit_log_string(ab, sock_type_names[sa->aad.net.type]);
++	} else {
++		audit_log_format(ab, "\"unknown(%d)\"", sa->aad.net.type);
++	}
++
++	audit_log_format(ab, " protocol=%d", sa->aad.net.protocol);
++}
++
++/**
++ * audit_net - audit network access
++ * @profile: profile being enforced  (NOT NULL)
++ * @op: operation being checked
++ * @family: network family
++ * @type:   network type
++ * @protocol: network protocol
++ * @sk: socket auditing is being applied to
++ * @error: error code for failure else 0
++ *
++ * Returns: %0 or sa->error else other errorcode on failure
++ */
++static int audit_net(struct aa_profile *profile, int op, u16 family, int type,
++		     int protocol, struct sock *sk, int error)
++{
++	int audit_type = AUDIT_APPARMOR_AUTO;
++	struct common_audit_data sa;
++	if (sk) {
++		COMMON_AUDIT_DATA_INIT(&sa, NET);
++	} else {
++		COMMON_AUDIT_DATA_INIT(&sa, NONE);
++	}
++	/* todo fill in socket addr info */
++
++	sa.aad.op = op,
++	sa.u.net.family = family;
++	sa.u.net.sk = sk;
++	sa.aad.net.type = type;
++	sa.aad.net.protocol = protocol;
++	sa.aad.error = error;
++
++	if (likely(!sa.aad.error)) {
++		u16 audit_mask = profile->net.audit[sa.u.net.family];
++		if (likely((AUDIT_MODE(profile) != AUDIT_ALL) &&
++			   !(1 << sa.aad.net.type & audit_mask)))
++			return 0;
++		audit_type = AUDIT_APPARMOR_AUDIT;
++	} else {
++		u16 quiet_mask = profile->net.quiet[sa.u.net.family];
++		u16 kill_mask = 0;
++		u16 denied = (1 << sa.aad.net.type) & ~quiet_mask;
++
++		if (denied & kill_mask)
++			audit_type = AUDIT_APPARMOR_KILL;
++
++		if ((denied & quiet_mask) &&
++		    AUDIT_MODE(profile) != AUDIT_NOQUIET &&
++		    AUDIT_MODE(profile) != AUDIT_ALL)
++			return COMPLAIN_MODE(profile) ? 0 : sa.aad.error;
++	}
++
++	return aa_audit(audit_type, profile, GFP_KERNEL, &sa, audit_cb);
++}
++
++/**
++ * aa_net_perm - very course network access check
++ * @op: operation being checked
++ * @profile: profile being enforced  (NOT NULL)
++ * @family: network family
++ * @type:   network type
++ * @protocol: network protocol
++ *
++ * Returns: %0 else error if permission denied
++ */
++int aa_net_perm(int op, struct aa_profile *profile, u16 family, int type,
++		int protocol, struct sock *sk)
++{
++	u16 family_mask;
++	int error;
++
++	if ((family < 0) || (family >= AF_MAX))
++		return -EINVAL;
++
++	if ((type < 0) || (type >= SOCK_MAX))
++		return -EINVAL;
++
++	/* unix domain and netlink sockets are handled by ipc */
++	if (family == AF_UNIX || family == AF_NETLINK)
++		return 0;
++
++	family_mask = profile->net.allow[family];
++
++	error = (family_mask & (1 << type)) ? 0 : -EACCES;
++
++	return audit_net(profile, op, family, type, protocol, sk, error);
++}
++
++/**
++ * aa_revalidate_sk - Revalidate access to a sock
++ * @op: operation being checked
++ * @sk: sock being revalidated  (NOT NULL)
++ *
++ * Returns: %0 else error if permission denied
++ */
++int aa_revalidate_sk(int op, struct sock *sk)
++{
++	struct aa_profile *profile;
++	int error = 0;
++
++	/* aa_revalidate_sk should not be called from interrupt context
++	 * don't mediate these calls as they are not task related
++	 */
++	if (in_interrupt())
++		return 0;
++
++	profile = __aa_current_profile();
++	if (!unconfined(profile))
++		error = aa_net_perm(op, profile, sk->sk_family, sk->sk_type,
++				    sk->sk_protocol, sk);
++
++	return error;
++}
+diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
+index 4f0eade..4d5ce13 100644
+--- a/security/apparmor/policy.c
++++ b/security/apparmor/policy.c
+@@ -745,6 +745,7 @@ static void free_profile(struct aa_profile *profile)
+ 
+ 	aa_free_file_rules(&profile->file);
+ 	aa_free_cap_rules(&profile->caps);
++	aa_free_net_rules(&profile->net);
+ 	aa_free_rlimit_rules(&profile->rlimits);
+ 
+ 	aa_free_sid(profile->sid);
+diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
+index d6d9a57..f4874c4 100644
+--- a/security/apparmor/policy_unpack.c
++++ b/security/apparmor/policy_unpack.c
+@@ -190,6 +190,19 @@ fail:
+ 	return 0;
+ }
+ 
++static bool unpack_u16(struct aa_ext *e, u16 *data, const char *name)
++{
++	if (unpack_nameX(e, AA_U16, name)) {
++		if (!inbounds(e, sizeof(u16)))
++			return 0;
++		if (data)
++			*data = le16_to_cpu(get_unaligned((u16 *) e->pos));
++		e->pos += sizeof(u16);
++		return 1;
++	}
++	return 0;
++}
++
+ static bool unpack_u32(struct aa_ext *e, u32 *data, const char *name)
+ {
+ 	if (unpack_nameX(e, AA_U32, name)) {
+@@ -468,7 +481,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e)
+ {
+ 	struct aa_profile *profile = NULL;
+ 	const char *name = NULL;
+-	int error = -EPROTO;
++	size_t size = 0;
++	int i, error = -EPROTO;
+ 	kernel_cap_t tmpcap;
+ 	u32 tmp;
+ 
+@@ -559,6 +573,38 @@ static struct aa_profile *unpack_profile(struct aa_ext *e)
+ 	if (!unpack_rlimits(e, profile))
+ 		goto fail;
+ 
++	size = unpack_array(e, "net_allowed_af");
++	if (size) {
++
++		for (i = 0; i < size; i++) {
++			/* discard extraneous rules that this kernel will
++			 * never request
++			 */
++			if (i >= AF_MAX) {
++				u16 tmp;
++				if (!unpack_u16(e, &tmp, NULL) ||
++				    !unpack_u16(e, &tmp, NULL) ||
++				    !unpack_u16(e, &tmp, NULL))
++					goto fail;
++				continue;
++			}
++			if (!unpack_u16(e, &profile->net.allow[i], NULL))
++				goto fail;
++			if (!unpack_u16(e, &profile->net.audit[i], NULL))
++				goto fail;
++			if (!unpack_u16(e, &profile->net.quiet[i], NULL))
++				goto fail;
++		}
++		if (!unpack_nameX(e, AA_ARRAYEND, NULL))
++			goto fail;
++		/*
++		 * allow unix domain and netlink sockets they are handled
++		 * by IPC
++		 */
++	}
++	profile->net.allow[AF_UNIX] = 0xffff;
++	profile->net.allow[AF_NETLINK] = 0xffff;
++
+ 	/* get file rules */
+ 	profile->file.dfa = unpack_dfa(e);
+ 	if (IS_ERR(profile->file.dfa)) {
+-- 
+1.7.5.4
+
diff --git a/3.2.34/0001-block-prepare-I-O-context-code-for-BFQ-v5-for-3.2.patch b/3.2.34/0001-block-prepare-I-O-context-code-for-BFQ-v5-for-3.2.patch
new file mode 100644
index 0000000..ea52c70
--- /dev/null
+++ b/3.2.34/0001-block-prepare-I-O-context-code-for-BFQ-v5-for-3.2.patch
@@ -0,0 +1,299 @@
+From f9072731bedac6f6373dd75798b5a801ce614c02 Mon Sep 17 00:00:00 2001
+From: Arianna Avanzini <avanzini.arianna@gmail.com>
+Date: Mon, 19 Dec 2011 16:33:41 +0100
+Subject: [PATCH 1/3] block: prepare I/O context code for BFQ-v5 for 3.2
+
+BFQ uses struct cfq_io_context to store its per-process per-device data,
+reusing the same code for cic handling of CFQ.  The code is not shared
+ATM to minimize the impact of these patches.
+
+This patch introduces a new hlist to each io_context to store all the
+cic's allocated by BFQ to allow calling the right destructor on module
+unload; the radix tree used for cic lookup needs to be duplicated
+because it can contain dead keys inserted by a scheduler and later
+retrieved by the other one.
+
+Update the io_context exit and free paths to take care also of
+the BFQ cic's.
+
+Change the type of cfqq inside struct cfq_io_context to void *
+to use it also for BFQ per-queue data.
+
+A new bfq-specific ioprio_changed field is necessary, too, to avoid
+clobbering cfq's one, so switch ioprio_changed to a bitmap, with one
+element per scheduler.
+
+Signed-off-by: Fabio Checconi <fabio@gandalf.sssup.it>
+Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
+Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
+---
+ block/Kconfig.iosched     |   26 ++++++++++++++++++++++++++
+ block/blk-ioc.c           |   30 +++++++++++++++++-------------
+ block/cfq-iosched.c       |   10 +++++++---
+ fs/ioprio.c               |    9 +++++++--
+ include/linux/iocontext.h |   18 +++++++++++++++---
+ 5 files changed, 72 insertions(+), 21 deletions(-)
+
+diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
+index 3199b76..5905452 100644
+--- a/block/Kconfig.iosched
++++ b/block/Kconfig.iosched
+@@ -43,6 +43,28 @@ config CFQ_GROUP_IOSCHED
+ 	---help---
+ 	  Enable group IO scheduling in CFQ.
+ 
++config IOSCHED_BFQ
++	tristate "BFQ I/O scheduler"
++	depends on EXPERIMENTAL
++	default n
++	---help---
++	  The BFQ I/O scheduler tries to distribute bandwidth among
++	  all processes according to their weights.
++	  It aims at distributing the bandwidth as desired, independently of
++	  the disk parameters and with any workload. It also tries to
++	  guarantee low latency to interactive and soft real-time
++	  applications.  If compiled built-in (saying Y here), BFQ can
++	  be configured to support hierarchical scheduling.
++
++config CGROUP_BFQIO
++	bool "BFQ hierarchical scheduling support"
++	depends on CGROUPS && IOSCHED_BFQ=y
++	default n
++	---help---
++	  Enable hierarchical scheduling in BFQ, using the cgroups
++	  filesystem interface.  The name of the subsystem will be
++	  bfqio.
++
+ choice
+ 	prompt "Default I/O scheduler"
+ 	default DEFAULT_CFQ
+@@ -56,6 +78,9 @@ choice
+ 	config DEFAULT_CFQ
+ 		bool "CFQ" if IOSCHED_CFQ=y
+ 
++	config DEFAULT_BFQ
++		bool "BFQ" if IOSCHED_BFQ=y
++
+ 	config DEFAULT_NOOP
+ 		bool "No-op"
+ 
+@@ -65,6 +90,7 @@ config DEFAULT_IOSCHED
+ 	string
+ 	default "deadline" if DEFAULT_DEADLINE
+ 	default "cfq" if DEFAULT_CFQ
++	default "bfq" if DEFAULT_BFQ
+ 	default "noop" if DEFAULT_NOOP
+ 
+ endmenu
+diff --git a/block/blk-ioc.c b/block/blk-ioc.c
+index 6f9bbd9..d0d16d4 100644
+--- a/block/blk-ioc.c
++++ b/block/blk-ioc.c
+@@ -5,6 +5,7 @@
+ #include <linux/module.h>
+ #include <linux/init.h>
+ #include <linux/bio.h>
++#include <linux/bitmap.h>
+ #include <linux/blkdev.h>
+ #include <linux/bootmem.h>	/* for max_pfn/max_low_pfn */
+ #include <linux/slab.h>
+@@ -16,13 +17,12 @@
+  */
+ static struct kmem_cache *iocontext_cachep;
+ 
+-static void cfq_dtor(struct io_context *ioc)
++static void hlist_sched_dtor(struct io_context *ioc, struct hlist_head *list)
+ {
+-	if (!hlist_empty(&ioc->cic_list)) {
++	if (!hlist_empty(list)) {
+ 		struct cfq_io_context *cic;
+ 
+-		cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context,
+-								cic_list);
++		cic = hlist_entry(list->first, struct cfq_io_context, cic_list);
+ 		cic->dtor(ioc);
+ 	}
+ }
+@@ -40,7 +40,9 @@ int put_io_context(struct io_context *ioc)
+ 
+ 	if (atomic_long_dec_and_test(&ioc->refcount)) {
+ 		rcu_read_lock();
+-		cfq_dtor(ioc);
++
++		hlist_sched_dtor(ioc, &ioc->cic_list);
++		hlist_sched_dtor(ioc, &ioc->bfq_cic_list);
+ 		rcu_read_unlock();
+ 
+ 		kmem_cache_free(iocontext_cachep, ioc);
+@@ -50,15 +52,14 @@ int put_io_context(struct io_context *ioc)
+ }
+ EXPORT_SYMBOL(put_io_context);
+ 
+-static void cfq_exit(struct io_context *ioc)
++static void hlist_sched_exit(struct io_context *ioc, struct hlist_head *list)
+ {
+ 	rcu_read_lock();
+ 
+-	if (!hlist_empty(&ioc->cic_list)) {
++	if (!hlist_empty(list)) {
+ 		struct cfq_io_context *cic;
+ 
+-		cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context,
+-								cic_list);
++		cic = hlist_entry(list->first, struct cfq_io_context, cic_list);
+ 		cic->exit(ioc);
+ 	}
+ 	rcu_read_unlock();
+@@ -74,9 +75,10 @@ void exit_io_context(struct task_struct *task)
+ 	task->io_context = NULL;
+ 	task_unlock(task);
+ 
+-	if (atomic_dec_and_test(&ioc->nr_tasks))
+-		cfq_exit(ioc);
+-
++	if (atomic_dec_and_test(&ioc->nr_tasks)) {
++		hlist_sched_exit(ioc, &ioc->cic_list);
++		hlist_sched_exit(ioc, &ioc->bfq_cic_list);
++	}
+ 	put_io_context(ioc);
+ }
+ 
+@@ -89,12 +91,14 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
+ 		atomic_long_set(&ioc->refcount, 1);
+ 		atomic_set(&ioc->nr_tasks, 1);
+ 		spin_lock_init(&ioc->lock);
+-		ioc->ioprio_changed = 0;
++		bitmap_zero(ioc->ioprio_changed, IOC_IOPRIO_CHANGED_BITS);
+ 		ioc->ioprio = 0;
+ 		ioc->last_waited = 0; /* doesn't matter... */
+ 		ioc->nr_batch_requests = 0; /* because this is 0 */
+ 		INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
+ 		INIT_HLIST_HEAD(&ioc->cic_list);
++		INIT_RADIX_TREE(&ioc->bfq_radix_root, GFP_ATOMIC | __GFP_HIGH);
++		INIT_HLIST_HEAD(&ioc->bfq_cic_list);
+ 		ioc->ioc_data = NULL;
+ #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
+ 		ioc->cgroup_changed = 0;
+diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
+index 3548705..a120a31 100644
+--- a/block/cfq-iosched.c
++++ b/block/cfq-iosched.c
+@@ -2946,7 +2946,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
+ static void cfq_ioc_set_ioprio(struct io_context *ioc)
+ {
+ 	call_for_each_cic(ioc, changed_ioprio);
+-	ioc->ioprio_changed = 0;
+ }
+ 
+ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+@@ -3238,8 +3237,13 @@ retry:
+ 		goto err_free;
+ 
+ out:
+-	smp_read_barrier_depends();
+-	if (unlikely(ioc->ioprio_changed))
++	/*
++	 * test_and_clear_bit() implies a memory barrier, paired with
++	 * the wmb() in fs/ioprio.c, so the value seen for ioprio is the
++	 * new one.
++	 */
++	if (unlikely(test_and_clear_bit(IOC_CFQ_IOPRIO_CHANGED,
++					ioc->ioprio_changed)))
+ 		cfq_ioc_set_ioprio(ioc);
+ 
+ #ifdef CONFIG_CFQ_GROUP_IOSCHED
+diff --git a/fs/ioprio.c b/fs/ioprio.c
+index f79dab8..6b0cb885 100644
+--- a/fs/ioprio.c
++++ b/fs/ioprio.c
+@@ -31,7 +31,7 @@
+ 
+ int set_task_ioprio(struct task_struct *task, int ioprio)
+ {
+-	int err;
++	int err, i;
+ 	struct io_context *ioc;
+ 	const struct cred *cred = current_cred(), *tcred;
+ 
+@@ -61,12 +61,17 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
+ 			err = -ENOMEM;
+ 			break;
+ 		}
++		/* let other ioc users see the new values */
++		smp_wmb();
+ 		task->io_context = ioc;
+ 	} while (1);
+ 
+ 	if (!err) {
+ 		ioc->ioprio = ioprio;
+-		ioc->ioprio_changed = 1;
++		/* make sure schedulers see the new ioprio value */
++		wmb();
++		for (i = 0; i < IOC_IOPRIO_CHANGED_BITS; i++)
++			set_bit(i, ioc->ioprio_changed);
+ 	}
+ 
+ 	task_unlock(task);
+diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
+index 5037a0a..69fdd58 100644
+--- a/include/linux/iocontext.h
++++ b/include/linux/iocontext.h
+@@ -1,10 +1,10 @@
+ #ifndef IOCONTEXT_H
+ #define IOCONTEXT_H
+ 
++#include <linux/bitmap.h>
+ #include <linux/radix-tree.h>
+ #include <linux/rcupdate.h>
+ 
+-struct cfq_queue;
+ struct cfq_ttime {
+ 	unsigned long last_end_request;
+ 
+@@ -16,7 +16,7 @@ struct cfq_ttime {
+ struct cfq_io_context {
+ 	void *key;
+ 
+-	struct cfq_queue *cfqq[2];
++	void *cfqq[2];
+ 
+ 	struct io_context *ioc;
+ 
+@@ -32,6 +32,16 @@ struct cfq_io_context {
+ };
+ 
+ /*
++ * Indexes into the ioprio_changed bitmap.  A bit set indicates that
++ * the corresponding I/O scheduler needs to see a ioprio update.
++ */
++enum {
++	IOC_CFQ_IOPRIO_CHANGED,
++	IOC_BFQ_IOPRIO_CHANGED,
++	IOC_IOPRIO_CHANGED_BITS
++};
++
++/*
+  * I/O subsystem state of the associated processes.  It is refcounted
+  * and kmalloc'ed. These could be shared between processes.
+  */
+@@ -43,7 +53,7 @@ struct io_context {
+ 	spinlock_t lock;
+ 
+ 	unsigned short ioprio;
+-	unsigned short ioprio_changed;
++	DECLARE_BITMAP(ioprio_changed, IOC_IOPRIO_CHANGED_BITS);
+ 
+ #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
+ 	unsigned short cgroup_changed;
+@@ -57,6 +67,8 @@ struct io_context {
+ 
+ 	struct radix_tree_root radix_root;
+ 	struct hlist_head cic_list;
++	struct radix_tree_root bfq_radix_root;
++	struct hlist_head bfq_cic_list;
+ 	void __rcu *ioc_data;
+ };
+ 
+-- 
+1.7.10.4
+
diff --git a/3.2.34/0002-AppArmor-compatibility-patch-for-v5-interface.patch b/3.2.34/0002-AppArmor-compatibility-patch-for-v5-interface.patch
new file mode 100644
index 0000000..10d4640
--- /dev/null
+++ b/3.2.34/0002-AppArmor-compatibility-patch-for-v5-interface.patch
@@ -0,0 +1,391 @@
+From a2515f25ad5a7833ddc5a032d34eee6a5ddee3a2 Mon Sep 17 00:00:00 2001
+From: John Johansen <john.johansen@canonical.com>
+Date: Wed, 10 Aug 2011 22:02:40 -0700
+Subject: [PATCH 2/3] AppArmor: compatibility patch for v5 interface
+
+Signed-off-by: John Johansen <john.johansen@canonical.com>
+---
+ security/apparmor/Kconfig              |    9 +
+ security/apparmor/Makefile             |    1 +
+ security/apparmor/apparmorfs-24.c      |  287 ++++++++++++++++++++++++++++++++
+ security/apparmor/apparmorfs.c         |   18 ++-
+ security/apparmor/include/apparmorfs.h |    6 +
+ 5 files changed, 319 insertions(+), 2 deletions(-)
+ create mode 100644 security/apparmor/apparmorfs-24.c
+
+diff --git a/security/apparmor/Kconfig b/security/apparmor/Kconfig
+index 9b9013b..51ebf96 100644
+--- a/security/apparmor/Kconfig
++++ b/security/apparmor/Kconfig
+@@ -29,3 +29,12 @@ config SECURITY_APPARMOR_BOOTPARAM_VALUE
+ 	  boot.
+ 
+ 	  If you are unsure how to answer this question, answer 1.
++
++config SECURITY_APPARMOR_COMPAT_24
++	bool "Enable AppArmor 2.4 compatability"
++	depends on SECURITY_APPARMOR
++	default y
++	help
++	  This option enables compatability with AppArmor 2.4.  It is
++          recommended if compatability with older versions of AppArmor
++          is desired.
+diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
+index 7cefef9..0bb604b 100644
+--- a/security/apparmor/Makefile
++++ b/security/apparmor/Makefile
+@@ -5,6 +5,7 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o
+ apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \
+               path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
+               resource.o sid.o file.o net.o
++apparmor-$(CONFIG_SECURITY_APPARMOR_COMPAT_24) += apparmorfs-24.o
+ 
+ clean-files := capability_names.h rlim_names.h af_names.h
+ 
+diff --git a/security/apparmor/apparmorfs-24.c b/security/apparmor/apparmorfs-24.c
+new file mode 100644
+index 0000000..dc8c744
+--- /dev/null
++++ b/security/apparmor/apparmorfs-24.c
+@@ -0,0 +1,287 @@
++/*
++ * AppArmor security module
++ *
++ * This file contains AppArmor /sys/kernel/secrutiy/apparmor interface functions
++ *
++ * Copyright (C) 1998-2008 Novell/SUSE
++ * Copyright 2009-2010 Canonical Ltd.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation, version 2 of the
++ * License.
++ *
++ *
++ * This file contain functions providing an interface for <= AppArmor 2.4
++ * compatibility.  It is dependent on CONFIG_SECURITY_APPARMOR_COMPAT_24
++ * being set (see Makefile).
++ */
++
++#include <linux/security.h>
++#include <linux/vmalloc.h>
++#include <linux/module.h>
++#include <linux/seq_file.h>
++#include <linux/uaccess.h>
++#include <linux/namei.h>
++
++#include "include/apparmor.h"
++#include "include/audit.h"
++#include "include/context.h"
++#include "include/policy.h"
++
++
++/* apparmor/matching */
++static ssize_t aa_matching_read(struct file *file, char __user *buf,
++				size_t size, loff_t *ppos)
++{
++	const char matching[] = "pattern=aadfa audit perms=crwxamlk/ "
++	    "user::other";
++
++	return simple_read_from_buffer(buf, size, ppos, matching,
++				       sizeof(matching) - 1);
++}
++
++const struct file_operations aa_fs_matching_fops = {
++	.read = aa_matching_read,
++};
++
++/* apparmor/features */
++static ssize_t aa_features_read(struct file *file, char __user *buf,
++				size_t size, loff_t *ppos)
++{
++	const char features[] = "file=3.1 capability=2.0 network=1.0 "
++	    "change_hat=1.5 change_profile=1.1 " "aanamespaces=1.1 rlimit=1.1";
++
++	return simple_read_from_buffer(buf, size, ppos, features,
++				       sizeof(features) - 1);
++}
++
++const struct file_operations aa_fs_features_fops = {
++	.read = aa_features_read,
++};
++
++/**
++ * __next_namespace - find the next namespace to list
++ * @root: root namespace to stop search at (NOT NULL)
++ * @ns: current ns position (NOT NULL)
++ *
++ * Find the next namespace from @ns under @root and handle all locking needed
++ * while switching current namespace.
++ *
++ * Returns: next namespace or NULL if at last namespace under @root
++ * NOTE: will not unlock root->lock
++ */
++static struct aa_namespace *__next_namespace(struct aa_namespace *root,
++					     struct aa_namespace *ns)
++{
++	struct aa_namespace *parent;
++
++	/* is next namespace a child */
++	if (!list_empty(&ns->sub_ns)) {
++		struct aa_namespace *next;
++		next = list_first_entry(&ns->sub_ns, typeof(*ns), base.list);
++		read_lock(&next->lock);
++		return next;
++	}
++
++	/* check if the next ns is a sibling, parent, gp, .. */
++	parent = ns->parent;
++	while (parent) {
++		read_unlock(&ns->lock);
++		list_for_each_entry_continue(ns, &parent->sub_ns, base.list) {
++			read_lock(&ns->lock);
++			return ns;
++		}
++		if (parent == root)
++			return NULL;
++		ns = parent;
++		parent = parent->parent;
++	}
++
++	return NULL;
++}
++
++/**
++ * __first_profile - find the first profile in a namespace
++ * @root: namespace that is root of profiles being displayed (NOT NULL)
++ * @ns: namespace to start in   (NOT NULL)
++ *
++ * Returns: unrefcounted profile or NULL if no profile
++ */
++static struct aa_profile *__first_profile(struct aa_namespace *root,
++					  struct aa_namespace *ns)
++{
++	for ( ; ns; ns = __next_namespace(root, ns)) {
++		if (!list_empty(&ns->base.profiles))
++			return list_first_entry(&ns->base.profiles,
++						struct aa_profile, base.list);
++	}
++	return NULL;
++}
++
++/**
++ * __next_profile - step to the next profile in a profile tree
++ * @profile: current profile in tree (NOT NULL)
++ *
++ * Perform a depth first taversal on the profile tree in a namespace
++ *
++ * Returns: next profile or NULL if done
++ * Requires: profile->ns.lock to be held
++ */
++static struct aa_profile *__next_profile(struct aa_profile *p)
++{
++	struct aa_profile *parent;
++	struct aa_namespace *ns = p->ns;
++
++	/* is next profile a child */
++	if (!list_empty(&p->base.profiles))
++		return list_first_entry(&p->base.profiles, typeof(*p),
++					base.list);
++
++	/* is next profile a sibling, parent sibling, gp, subling, .. */
++	parent = p->parent;
++	while (parent) {
++		list_for_each_entry_continue(p, &parent->base.profiles,
++					     base.list)
++				return p;
++		p = parent;
++		parent = parent->parent;
++	}
++
++	/* is next another profile in the namespace */
++	list_for_each_entry_continue(p, &ns->base.profiles, base.list)
++		return p;
++
++	return NULL;
++}
++
++/**
++ * next_profile - step to the next profile in where ever it may be
++ * @root: root namespace  (NOT NULL)
++ * @profile: current profile  (NOT NULL)
++ *
++ * Returns: next profile or NULL if there isn't one
++ */
++static struct aa_profile *next_profile(struct aa_namespace *root,
++				       struct aa_profile *profile)
++{
++	struct aa_profile *next = __next_profile(profile);
++	if (next)
++		return next;
++
++	/* finished all profiles in namespace move to next namespace */
++	return __first_profile(root, __next_namespace(root, profile->ns));
++}
++
++/**
++ * p_start - start a depth first traversal of profile tree
++ * @f: seq_file to fill
++ * @pos: current position
++ *
++ * Returns: first profile under current namespace or NULL if none found
++ *
++ * acquires first ns->lock
++ */
++static void *p_start(struct seq_file *f, loff_t *pos)
++	__acquires(root->lock)
++{
++	struct aa_profile *profile = NULL;
++	struct aa_namespace *root = aa_current_profile()->ns;
++	loff_t l = *pos;
++	f->private = aa_get_namespace(root);
++
++
++	/* find the first profile */
++	read_lock(&root->lock);
++	profile = __first_profile(root, root);
++
++	/* skip to position */
++	for (; profile && l > 0; l--)
++		profile = next_profile(root, profile);
++
++	return profile;
++}
++
++/**
++ * p_next - read the next profile entry
++ * @f: seq_file to fill
++ * @p: profile previously returned
++ * @pos: current position
++ *
++ * Returns: next profile after @p or NULL if none
++ *
++ * may acquire/release locks in namespace tree as necessary
++ */
++static void *p_next(struct seq_file *f, void *p, loff_t *pos)
++{
++	struct aa_profile *profile = p;
++	struct aa_namespace *root = f->private;
++	(*pos)++;
++
++	return next_profile(root, profile);
++}
++
++/**
++ * p_stop - stop depth first traversal
++ * @f: seq_file we are filling
++ * @p: the last profile writen
++ *
++ * Release all locking done by p_start/p_next on namespace tree
++ */
++static void p_stop(struct seq_file *f, void *p)
++	__releases(root->lock)
++{
++	struct aa_profile *profile = p;
++	struct aa_namespace *root = f->private, *ns;
++
++	if (profile) {
++		for (ns = profile->ns; ns && ns != root; ns = ns->parent)
++			read_unlock(&ns->lock);
++	}
++	read_unlock(&root->lock);
++	aa_put_namespace(root);
++}
++
++/**
++ * seq_show_profile - show a profile entry
++ * @f: seq_file to file
++ * @p: current position (profile)    (NOT NULL)
++ *
++ * Returns: error on failure
++ */
++static int seq_show_profile(struct seq_file *f, void *p)
++{
++	struct aa_profile *profile = (struct aa_profile *)p;
++	struct aa_namespace *root = f->private;
++
++	if (profile->ns != root)
++		seq_printf(f, ":%s://", aa_ns_name(root, profile->ns));
++	seq_printf(f, "%s (%s)\n", profile->base.hname,
++		   COMPLAIN_MODE(profile) ? "complain" : "enforce");
++
++	return 0;
++}
++
++static const struct seq_operations aa_fs_profiles_op = {
++	.start = p_start,
++	.next = p_next,
++	.stop = p_stop,
++	.show = seq_show_profile,
++};
++
++static int profiles_open(struct inode *inode, struct file *file)
++{
++	return seq_open(file, &aa_fs_profiles_op);
++}
++
++static int profiles_release(struct inode *inode, struct file *file)
++{
++	return seq_release(inode, file);
++}
++
++const struct file_operations aa_fs_profiles_fops = {
++	.open = profiles_open,
++	.read = seq_read,
++	.llseek = seq_lseek,
++	.release = profiles_release,
++};
+diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
+index 0848292..28c52ac 100644
+--- a/security/apparmor/apparmorfs.c
++++ b/security/apparmor/apparmorfs.c
+@@ -187,7 +187,11 @@ void __init aa_destroy_aafs(void)
+ 		aafs_remove(".remove");
+ 		aafs_remove(".replace");
+ 		aafs_remove(".load");
+-
++#ifdef CONFIG_SECURITY_APPARMOR_COMPAT_24
++		aafs_remove("profiles");
++		aafs_remove("matching");
++		aafs_remove("features");
++#endif
+ 		securityfs_remove(aa_fs_dentry);
+ 		aa_fs_dentry = NULL;
+ 	}
+@@ -218,7 +222,17 @@ int __init aa_create_aafs(void)
+ 		aa_fs_dentry = NULL;
+ 		goto error;
+ 	}
+-
++#ifdef CONFIG_SECURITY_APPARMOR_COMPAT_24
++	error = aafs_create("matching", 0444, &aa_fs_matching_fops);
++	if (error)
++		goto error;
++	error = aafs_create("features", 0444, &aa_fs_features_fops);
++	if (error)
++		goto error;
++#endif
++	error = aafs_create("profiles", 0440, &aa_fs_profiles_fops);
++	if (error)
++		goto error;
+ 	error = aafs_create(".load", 0640, &aa_fs_profile_load);
+ 	if (error)
+ 		goto error;
+diff --git a/security/apparmor/include/apparmorfs.h b/security/apparmor/include/apparmorfs.h
+index cb1e93a..14f955c 100644
+--- a/security/apparmor/include/apparmorfs.h
++++ b/security/apparmor/include/apparmorfs.h
+@@ -17,4 +17,10 @@
+ 
+ extern void __init aa_destroy_aafs(void);
+ 
++#ifdef CONFIG_SECURITY_APPARMOR_COMPAT_24
++extern const struct file_operations aa_fs_matching_fops;
++extern const struct file_operations aa_fs_features_fops;
++extern const struct file_operations aa_fs_profiles_fops;
++#endif
++
+ #endif /* __AA_APPARMORFS_H */
+-- 
+1.7.5.4
+
diff --git a/3.2.34/0002-block-cgroups-kconfig-build-bits-for-BFQ-v5-3.2.patch b/3.2.34/0002-block-cgroups-kconfig-build-bits-for-BFQ-v5-3.2.patch
new file mode 100644
index 0000000..18b9dae
--- /dev/null
+++ b/3.2.34/0002-block-cgroups-kconfig-build-bits-for-BFQ-v5-3.2.patch
@@ -0,0 +1,46 @@
+From 9396d61706a827dfc4dcdfdc8f687e0e9e24f53d Mon Sep 17 00:00:00 2001
+From: Arianna Avanzini <avanzini.arianna@gmail.com>
+Date: Mon, 19 Dec 2011 16:34:01 +0100
+Subject: [PATCH 2/3] block: cgroups, kconfig, build bits for BFQ-v5-3.2
+
+Add a Kconfig option and do the related Makefile changes to compile
+the BFQ I/O scheduler.  Also let the cgroups subsystem know about the
+BFQ I/O controller.
+
+Signed-off-by: Fabio Checconi <fabio@gandalf.sssup.it>
+Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
+Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
+---
+ block/Makefile                |    1 +
+ include/linux/cgroup_subsys.h |    6 ++++++
+ 2 files changed, 7 insertions(+)
+
+diff --git a/block/Makefile b/block/Makefile
+index 514c6e4..653d27b 100644
+--- a/block/Makefile
++++ b/block/Makefile
+@@ -14,6 +14,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
+ obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
+ obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
+ obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
++obj-$(CONFIG_IOSCHED_BFQ)	+= bfq-iosched.o
+ 
+ obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o
+ obj-$(CONFIG_BLK_DEV_INTEGRITY)	+= blk-integrity.o
+diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
+index ac663c1..c966638 100644
+--- a/include/linux/cgroup_subsys.h
++++ b/include/linux/cgroup_subsys.h
+@@ -64,3 +64,9 @@ SUBSYS(perf)
+ #endif
+ 
+ /* */
++
++#ifdef CONFIG_CGROUP_BFQIO
++SUBSYS(bfqio)
++#endif
++
++/* */
+-- 
+1.7.10.4
+
diff --git a/3.2.34/0003-AppArmor-Allow-dfa-backward-compatibility-with-broke.patch b/3.2.34/0003-AppArmor-Allow-dfa-backward-compatibility-with-broke.patch
new file mode 100644
index 0000000..be32585
--- /dev/null
+++ b/3.2.34/0003-AppArmor-Allow-dfa-backward-compatibility-with-broke.patch
@@ -0,0 +1,69 @@
+From 7a10d093f9779f42cb8d6affcb6a4436d3ebd6d3 Mon Sep 17 00:00:00 2001
+From: John Johansen <john.johansen@canonical.com>
+Date: Wed, 10 Aug 2011 22:02:41 -0700
+Subject: [PATCH 3/3] AppArmor: Allow dfa backward compatibility with broken
+ userspace
+
+The apparmor_parser when compiling policy could generate invalid dfas
+that did not have sufficient padding to avoid invalid references, when
+used by the kernel.  The kernels check to verify the next/check table
+size was broken meaning invalid dfas were being created by userspace
+and not caught.
+
+To remain compatible with old tools that are not fixed, pad the loaded
+dfas next/check table.  The dfa's themselves are valid except for the
+high padding for potentially invalid transitions (high bounds error),
+which have a maximimum is 256 entries.  So just allocate an extra null filled
+256 entries for the next/check tables.  This will guarentee all bounds
+are good and invalid transitions go to the null (0) state.
+
+Signed-off-by: John Johansen <john.johansen@canonical.com>
+---
+ security/apparmor/match.c |   17 +++++++++++++++++
+ 1 files changed, 17 insertions(+), 0 deletions(-)
+
+diff --git a/security/apparmor/match.c b/security/apparmor/match.c
+index 94de6b4..081491e 100644
+--- a/security/apparmor/match.c
++++ b/security/apparmor/match.c
+@@ -57,8 +57,17 @@ static struct table_header *unpack_table(char *blob, size_t bsize)
+ 	if (bsize < tsize)
+ 		goto out;
+ 
++	/* Pad table allocation for next/check by 256 entries to remain
++	 * backwards compatible with old (buggy) tools and remain safe without
++	 * run time checks
++	 */
++	if (th.td_id == YYTD_ID_NXT || th.td_id == YYTD_ID_CHK)
++		tsize += 256 * th.td_flags;
++
+ 	table = kvmalloc(tsize);
+ 	if (table) {
++		/* ensure the pad is clear, else there will be errors */
++		memset(table, 0, tsize);
+ 		*table = th;
+ 		if (th.td_flags == YYTD_DATA8)
+ 			UNPACK_ARRAY(table->td_data, blob, th.td_lolen,
+@@ -134,11 +143,19 @@ static int verify_dfa(struct aa_dfa *dfa, int flags)
+ 		goto out;
+ 
+ 	if (flags & DFA_FLAG_VERIFY_STATES) {
++		int warning = 0;
+ 		for (i = 0; i < state_count; i++) {
+ 			if (DEFAULT_TABLE(dfa)[i] >= state_count)
+ 				goto out;
+ 			/* TODO: do check that DEF state recursion terminates */
+ 			if (BASE_TABLE(dfa)[i] + 255 >= trans_count) {
++				if (warning)
++					continue;
++				printk(KERN_WARNING "AppArmor DFA next/check "
++				       "upper bounds error fixed, upgrade "
++				       "user space tools \n");
++				warning = 1;
++			} else if (BASE_TABLE(dfa)[i] >= trans_count) {
+ 				printk(KERN_ERR "AppArmor DFA next/check upper "
+ 				       "bounds error\n");
+ 				goto out;
+-- 
+1.7.5.4
+
diff --git a/3.2.34/0003-block-introduce-the-BFQ-v5-I-O-sched-for-3.2.patch b/3.2.34/0003-block-introduce-the-BFQ-v5-I-O-sched-for-3.2.patch
new file mode 100644
index 0000000..077d54f
--- /dev/null
+++ b/3.2.34/0003-block-introduce-the-BFQ-v5-I-O-sched-for-3.2.patch
@@ -0,0 +1,5986 @@
+From 8502cfecae9cfffbd1dc0379b2b2ab48d05cf48a Mon Sep 17 00:00:00 2001
+From: Arianna Avanzini <avanzini.arianna@gmail.com>
+Date: Mon, 19 Dec 2011 16:34:45 +0100
+Subject: [PATCH 3/3] block: introduce the BFQ-v5 I/O sched for 3.2
+
+Add the BFQ-v5 I/O scheduler to 3.2.
+The general structure is borrowed from CFQ, as much code. A (bfq_)queue is
+associated to each task doing I/O on a device, and each time a scheduling
+decision has to be taken a queue is selected and it is served until it expires.
+
+    - Slices are given in the service domain: tasks are assigned budgets,
+      measured in number of sectors. Once got the disk, a task must
+      however consume its assigned budget within a configurable maximum time
+      (by default, the maximum possible value of the budgets is automatically
+      computed to comply with this timeout). This allows the desired latency
+      vs "throughput boosting" tradeoff to be set.
+
+    - Budgets are scheduled according to a variant of WF2Q+, implemented
+      using an augmented rb-tree to take eligibility into account while
+      preserving an O(log N) overall complexity.
+
+    - A low-latency tunable is provided; if enabled, both interactive and soft
+      real-time applications are guaranteed very low latency.
+
+    - Latency guarantees are preserved also in presence of NCQ.
+
+    - High throughput with flash-based devices, while still preserving
+      latency guarantees.
+
+    - Useful features borrowed from CFQ: cooperating-queues merging (with
+      some additional optimizations with respect to the original CFQ version),
+      static fallback queue for OOM.
+
+    - BFQ supports full hierarchical scheduling, exporting a cgroups
+      interface.  Each node has a full scheduler, so each group can
+      be assigned its own ioprio and an ioprio_class.
+
+    - If the cgroups interface is used, weights can be explictly assigned,
+      otherwise ioprio values are mapped to weights using the relation
+      weight = IOPRIO_BE_NR - ioprio.
+
+    - ioprio classes are served in strict priority order, i.e., lower
+      priority queues are not served as long as there are higher priority
+      queues.  Among queues in the same class the bandwidth is distributed
+      in proportion to the weights of each queue. A very thin extra bandwidth
+      is however guaranteed to the Idle class, to prevent it from starving.
+
+Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
+Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
+---
+ block/bfq-cgroup.c  |  831 ++++++++++++++
+ block/bfq-ioc.c     |  380 +++++++
+ block/bfq-iosched.c | 3021 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ block/bfq-sched.c   | 1066 ++++++++++++++++++
+ block/bfq.h         |  593 ++++++++++
+ 5 files changed, 5891 insertions(+)
+ create mode 100644 block/bfq-cgroup.c
+ create mode 100644 block/bfq-ioc.c
+ create mode 100644 block/bfq-iosched.c
+ create mode 100644 block/bfq-sched.c
+ create mode 100644 block/bfq.h
+
+diff --git a/block/bfq-cgroup.c block/bfq-cgroup.c
+new file mode 100644
+index 0000000..74ae73b
+--- /dev/null
++++ block/bfq-cgroup.c
+@@ -0,0 +1,831 @@
++/*
++ * BFQ: CGROUPS support.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file.
++ */
++
++#ifdef CONFIG_CGROUP_BFQIO
++static struct bfqio_cgroup bfqio_root_cgroup = {
++	.weight = BFQ_DEFAULT_GRP_WEIGHT,
++	.ioprio = BFQ_DEFAULT_GRP_IOPRIO,
++	.ioprio_class = BFQ_DEFAULT_GRP_CLASS,
++};
++
++static inline void bfq_init_entity(struct bfq_entity *entity,
++				   struct bfq_group *bfqg)
++{
++	entity->weight = entity->new_weight;
++	entity->orig_weight = entity->new_weight;
++	entity->ioprio = entity->new_ioprio;
++	entity->ioprio_class = entity->new_ioprio_class;
++	entity->parent = bfqg->my_entity;
++	entity->sched_data = &bfqg->sched_data;
++}
++
++static struct bfqio_cgroup *cgroup_to_bfqio(struct cgroup *cgroup)
++{
++	return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id),
++			    struct bfqio_cgroup, css);
++}
++
++/*
++ * Search the bfq_group for bfqd into the hash table (by now only a list)
++ * of bgrp.  Must be called under rcu_read_lock().
++ */
++static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp,
++					    struct bfq_data *bfqd)
++{
++	struct bfq_group *bfqg;
++	struct hlist_node *n;
++	void *key;
++
++	hlist_for_each_entry_rcu(bfqg, n, &bgrp->group_data, group_node) {
++		key = rcu_dereference(bfqg->bfqd);
++		if (key == bfqd)
++			return bfqg;
++	}
++
++	return NULL;
++}
++
++static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp,
++					 struct bfq_group *bfqg)
++{
++	struct bfq_entity *entity = &bfqg->entity;
++
++	entity->weight = entity->new_weight = bgrp->weight;
++	entity->orig_weight = entity->new_weight;
++	entity->ioprio = entity->new_ioprio = bgrp->ioprio;
++	entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class;
++	entity->ioprio_changed = 1;
++	entity->my_sched_data = &bfqg->sched_data;
++}
++
++static inline void bfq_group_set_parent(struct bfq_group *bfqg,
++					struct bfq_group *parent)
++{
++	struct bfq_entity *entity;
++
++	BUG_ON(parent == NULL);
++	BUG_ON(bfqg == NULL);
++
++	entity = &bfqg->entity;
++	entity->parent = parent->my_entity;
++	entity->sched_data = &parent->sched_data;
++}
++
++/**
++ * bfq_group_chain_alloc - allocate a chain of groups.
++ * @bfqd: queue descriptor.
++ * @cgroup: the leaf cgroup this chain starts from.
++ *
++ * Allocate a chain of groups starting from the one belonging to
++ * @cgroup up to the root cgroup.  Stop if a cgroup on the chain
++ * to the root has already an allocated group on @bfqd.
++ */
++static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd,
++					       struct cgroup *cgroup)
++{
++	struct bfqio_cgroup *bgrp;
++	struct bfq_group *bfqg, *prev = NULL, *leaf = NULL;
++
++	for (; cgroup != NULL; cgroup = cgroup->parent) {
++		bgrp = cgroup_to_bfqio(cgroup);
++
++		bfqg = bfqio_lookup_group(bgrp, bfqd);
++		if (bfqg != NULL) {
++			/*
++			 * All the cgroups in the path from there to the
++			 * root must have a bfq_group for bfqd, so we don't
++			 * need any more allocations.
++			 */
++			break;
++		}
++
++		bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC);
++		if (bfqg == NULL)
++			goto cleanup;
++
++		bfq_group_init_entity(bgrp, bfqg);
++		bfqg->my_entity = &bfqg->entity;
++
++		if (leaf == NULL) {
++			leaf = bfqg;
++			prev = leaf;
++		} else {
++			bfq_group_set_parent(prev, bfqg);
++			/*
++			 * Build a list of allocated nodes using the bfqd
++			 * filed, that is still unused and will be initialized
++			 * only after the node will be connected.
++			 */
++			prev->bfqd = bfqg;
++			prev = bfqg;
++		}
++	}
++
++	return leaf;
++
++cleanup:
++	while (leaf != NULL) {
++		prev = leaf;
++		leaf = leaf->bfqd;
++		kfree(prev);
++	}
++
++	return NULL;
++}
++
++/**
++ * bfq_group_chain_link - link an allocatd group chain to a cgroup hierarchy.
++ * @bfqd: the queue descriptor.
++ * @cgroup: the leaf cgroup to start from.
++ * @leaf: the leaf group (to be associated to @cgroup).
++ *
++ * Try to link a chain of groups to a cgroup hierarchy, connecting the
++ * nodes bottom-up, so we can be sure that when we find a cgroup in the
++ * hierarchy that already as a group associated to @bfqd all the nodes
++ * in the path to the root cgroup have one too.
++ *
++ * On locking: the queue lock protects the hierarchy (there is a hierarchy
++ * per device) while the bfqio_cgroup lock protects the list of groups
++ * belonging to the same cgroup.
++ */
++static void bfq_group_chain_link(struct bfq_data *bfqd, struct cgroup *cgroup,
++				 struct bfq_group *leaf)
++{
++	struct bfqio_cgroup *bgrp;
++	struct bfq_group *bfqg, *next, *prev = NULL;
++	unsigned long flags;
++
++	assert_spin_locked(bfqd->queue->queue_lock);
++
++	for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) {
++		bgrp = cgroup_to_bfqio(cgroup);
++		next = leaf->bfqd;
++
++		bfqg = bfqio_lookup_group(bgrp, bfqd);
++		BUG_ON(bfqg != NULL);
++
++		spin_lock_irqsave(&bgrp->lock, flags);
++
++		rcu_assign_pointer(leaf->bfqd, bfqd);
++		hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data);
++		hlist_add_head(&leaf->bfqd_node, &bfqd->group_list);
++
++		spin_unlock_irqrestore(&bgrp->lock, flags);
++
++		prev = leaf;
++		leaf = next;
++	}
++
++	BUG_ON(cgroup == NULL && leaf != NULL);
++	if (cgroup != NULL && prev != NULL) {
++		bgrp = cgroup_to_bfqio(cgroup);
++		bfqg = bfqio_lookup_group(bgrp, bfqd);
++		bfq_group_set_parent(prev, bfqg);
++	}
++}
++
++/**
++ * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup.
++ * @bfqd: queue descriptor.
++ * @cgroup: cgroup being searched for.
++ *
++ * Return a group associated to @bfqd in @cgroup, allocating one if
++ * necessary.  When a group is returned all the cgroups in the path
++ * to the root have a group associated to @bfqd.
++ *
++ * If the allocation fails, return the root group: this breaks guarantees
++ * but is a safe fallbak.  If this loss becames a problem it can be
++ * mitigated using the equivalent weight (given by the product of the
++ * weights of the groups in the path from @group to the root) in the
++ * root scheduler.
++ *
++ * We allocate all the missing nodes in the path from the leaf cgroup
++ * to the root and we connect the nodes only after all the allocations
++ * have been successful.
++ */
++static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
++					      struct cgroup *cgroup)
++{
++	struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup);
++	struct bfq_group *bfqg;
++
++	bfqg = bfqio_lookup_group(bgrp, bfqd);
++	if (bfqg != NULL)
++		return bfqg;
++
++	bfqg = bfq_group_chain_alloc(bfqd, cgroup);
++	if (bfqg != NULL)
++		bfq_group_chain_link(bfqd, cgroup, bfqg);
++	else
++		bfqg = bfqd->root_group;
++
++	return bfqg;
++}
++
++/**
++ * bfq_bfqq_move - migrate @bfqq to @bfqg.
++ * @bfqd: queue descriptor.
++ * @bfqq: the queue to move.
++ * @entity: @bfqq's entity.
++ * @bfqg: the group to move to.
++ *
++ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
++ * it on the new one.  Avoid putting the entity on the old group idle tree.
++ *
++ * Must be called under the queue lock; the cgroup owning @bfqg must
++ * not disappear (by now this just means that we are called under
++ * rcu_read_lock()).
++ */
++static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++			  struct bfq_entity *entity, struct bfq_group *bfqg)
++{
++	int busy, resume;
++
++	busy = bfq_bfqq_busy(bfqq);
++	resume = !RB_EMPTY_ROOT(&bfqq->sort_list);
++
++	BUG_ON(resume && !entity->on_st);
++	BUG_ON(busy && !resume && entity->on_st && bfqq != bfqd->active_queue);
++
++	if (busy) {
++		BUG_ON(atomic_read(&bfqq->ref) < 2);
++
++		if (!resume)
++			bfq_del_bfqq_busy(bfqd, bfqq, 0);
++		else
++			bfq_deactivate_bfqq(bfqd, bfqq, 0);
++	} else if (entity->on_st)
++		bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
++
++	/*
++	 * Here we use a reference to bfqg.  We don't need a refcounter
++	 * as the cgroup reference will not be dropped, so that its
++	 * destroy() callback will not be invoked.
++	 */
++	entity->parent = bfqg->my_entity;
++	entity->sched_data = &bfqg->sched_data;
++
++	if (busy && resume)
++		bfq_activate_bfqq(bfqd, bfqq);
++}
++
++/**
++ * __bfq_cic_change_cgroup - move @cic to @cgroup.
++ * @bfqd: the queue descriptor.
++ * @cic: the cic to move.
++ * @cgroup: the cgroup to move to.
++ *
++ * Move cic to cgroup, assuming that bfqd->queue is locked; the caller
++ * has to make sure that the reference to cgroup is valid across the call.
++ *
++ * NOTE: an alternative approach might have been to store the current
++ * cgroup in bfqq and getting a reference to it, reducing the lookup
++ * time here, at the price of slightly more complex code.
++ */
++static struct bfq_group *__bfq_cic_change_cgroup(struct bfq_data *bfqd,
++						 struct cfq_io_context *cic,
++						 struct cgroup *cgroup)
++{
++	struct bfq_queue *async_bfqq = cic_to_bfqq(cic, 0);
++	struct bfq_queue *sync_bfqq = cic_to_bfqq(cic, 1);
++	struct bfq_entity *entity;
++	struct bfq_group *bfqg;
++
++	bfqg = bfq_find_alloc_group(bfqd, cgroup);
++	if (async_bfqq != NULL) {
++		entity = &async_bfqq->entity;
++
++		if (entity->sched_data != &bfqg->sched_data) {
++			cic_set_bfqq(cic, NULL, 0);
++			bfq_log_bfqq(bfqd, async_bfqq,
++				     "cic_change_group: %p %d",
++				     async_bfqq, atomic_read(&async_bfqq->ref));
++			bfq_put_queue(async_bfqq);
++		}
++	}
++
++	if (sync_bfqq != NULL) {
++		entity = &sync_bfqq->entity;
++		if (entity->sched_data != &bfqg->sched_data)
++			bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
++	}
++
++	return bfqg;
++}
++
++/**
++ * bfq_cic_change_cgroup - move @cic to @cgroup.
++ * @cic: the cic being migrated.
++ * @cgroup: the destination cgroup.
++ *
++ * When the task owning @cic is moved to @cgroup, @cic is immediately
++ * moved into its new parent group.
++ */
++static void bfq_cic_change_cgroup(struct cfq_io_context *cic,
++				  struct cgroup *cgroup)
++{
++	struct bfq_data *bfqd;
++	unsigned long uninitialized_var(flags);
++
++	bfqd = bfq_get_bfqd_locked(&cic->key, &flags);
++	if (bfqd != NULL &&
++	    !strncmp(bfqd->queue->elevator->elevator_type->elevator_name,
++		     "bfq", ELV_NAME_MAX)) {
++		__bfq_cic_change_cgroup(bfqd, cic, cgroup);
++		bfq_put_bfqd_unlock(bfqd, &flags);
++	}
++}
++
++/**
++ * bfq_cic_update_cgroup - update the cgroup of @cic.
++ * @cic: the @cic to update.
++ *
++ * Make sure that @cic is enqueued in the cgroup of the current task.
++ * We need this in addition to moving cics during the cgroup attach
++ * phase because the task owning @cic could be at its first disk
++ * access or we may end up in the root cgroup as the result of a
++ * memory allocation failure and here we try to move to the right
++ * group.
++ *
++ * Must be called under the queue lock.  It is safe to use the returned
++ * value even after the rcu_read_unlock() as the migration/destruction
++ * paths act under the queue lock too.  IOW it is impossible to race with
++ * group migration/destruction and end up with an invalid group as:
++ *   a) here cgroup has not yet been destroyed, nor its destroy callback
++ *      has started execution, as current holds a reference to it,
++ *   b) if it is destroyed after rcu_read_unlock() [after current is
++ *      migrated to a different cgroup] its attach() callback will have
++ *      taken care of remove all the references to the old cgroup data.
++ */
++static struct bfq_group *bfq_cic_update_cgroup(struct cfq_io_context *cic)
++{
++	struct bfq_data *bfqd = cic->key;
++	struct bfq_group *bfqg;
++	struct cgroup *cgroup;
++
++	BUG_ON(bfqd == NULL);
++
++	rcu_read_lock();
++	cgroup = task_cgroup(current, bfqio_subsys_id);
++	bfqg = __bfq_cic_change_cgroup(bfqd, cic, cgroup);
++	rcu_read_unlock();
++
++	return bfqg;
++}
++
++/**
++ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
++ * @st: the service tree being flushed.
++ */
++static inline void bfq_flush_idle_tree(struct bfq_service_tree *st)
++{
++	struct bfq_entity *entity = st->first_idle;
++
++	for (; entity != NULL; entity = st->first_idle)
++		__bfq_deactivate_entity(entity, 0);
++}
++
++/**
++ * bfq_reparent_leaf_entity - move leaf entity to the root_group.
++ * @bfqd: the device data structure with the root group.
++ * @entity: the entity to move.
++ */
++static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
++					    struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++	BUG_ON(bfqq == NULL);
++	bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
++	return;
++}
++
++/**
++ * bfq_reparent_active_entities - move to the root group all active entities.
++ * @bfqd: the device data structure with the root group.
++ * @bfqg: the group to move from.
++ * @st: the service tree with the entities.
++ *
++ * Needs queue_lock to be taken and reference to be valid over the call.
++ */
++static inline void bfq_reparent_active_entities(struct bfq_data *bfqd,
++						struct bfq_group *bfqg,
++						struct bfq_service_tree *st)
++{
++	struct rb_root *active = &st->active;
++	struct bfq_entity *entity = NULL;
++
++	if (!RB_EMPTY_ROOT(&st->active))
++		entity = bfq_entity_of(rb_first(active));
++
++	for (; entity != NULL ; entity = bfq_entity_of(rb_first(active)))
++		bfq_reparent_leaf_entity(bfqd, entity);
++
++	if (bfqg->sched_data.active_entity != NULL)
++		bfq_reparent_leaf_entity(bfqd, bfqg->sched_data.active_entity);
++
++	return;
++}
++
++/**
++ * bfq_destroy_group - destroy @bfqg.
++ * @bgrp: the bfqio_cgroup containing @bfqg.
++ * @bfqg: the group being destroyed.
++ *
++ * Destroy @bfqg, making sure that it is not referenced from its parent.
++ */
++static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg)
++{
++	struct bfq_data *bfqd;
++	struct bfq_service_tree *st;
++	struct bfq_entity *entity = bfqg->my_entity;
++	unsigned long uninitialized_var(flags);
++	int i;
++
++	hlist_del(&bfqg->group_node);
++
++	/*
++	 * Empty all service_trees belonging to this group before deactivating
++	 * the group itself.
++	 */
++	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
++		st = bfqg->sched_data.service_tree + i;
++
++		/*
++		 * The idle tree may still contain bfq_queues belonging
++		 * to exited task because they never migrated to a different
++		 * cgroup from the one being destroyed now.  Noone else
++		 * can access them so it's safe to act without any lock.
++		 */
++		bfq_flush_idle_tree(st);
++
++		/*
++		 * It may happen that some queues are still active
++		 * (busy) upon group destruction (if the corresponding
++		 * processes have been forced to terminate). We move
++		 * all the leaf entities corresponding to these queues
++		 * to the root_group.
++		 * Also, it may happen that the group has an entity
++		 * under service, which is disconnected from the active
++		 * tree: it must be moved, too.
++		 * There is no need to put the sync queues, as the
++		 * scheduler has taken no reference.
++		 */
++		bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
++		if (bfqd != NULL) {
++			bfq_reparent_active_entities(bfqd, bfqg, st);
++			bfq_put_bfqd_unlock(bfqd, &flags);
++		}
++		BUG_ON(!RB_EMPTY_ROOT(&st->active));
++		BUG_ON(!RB_EMPTY_ROOT(&st->idle));
++	}
++	BUG_ON(bfqg->sched_data.next_active != NULL);
++	BUG_ON(bfqg->sched_data.active_entity != NULL);
++
++	/*
++	 * We may race with device destruction, take extra care when
++	 * dereferencing bfqg->bfqd.
++	 */
++	bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
++	if (bfqd != NULL) {
++		hlist_del(&bfqg->bfqd_node);
++		__bfq_deactivate_entity(entity, 0);
++		bfq_put_async_queues(bfqd, bfqg);
++		bfq_put_bfqd_unlock(bfqd, &flags);
++	}
++	BUG_ON(entity->tree != NULL);
++
++	/*
++	 * No need to defer the kfree() to the end of the RCU grace
++	 * period: we are called from the destroy() callback of our
++	 * cgroup, so we can be sure that noone is a) still using
++	 * this cgroup or b) doing lookups in it.
++	 */
++	kfree(bfqg);
++}
++
++/**
++ * bfq_disconnect_groups - diconnect @bfqd from all its groups.
++ * @bfqd: the device descriptor being exited.
++ *
++ * When the device exits we just make sure that no lookup can return
++ * the now unused group structures.  They will be deallocated on cgroup
++ * destruction.
++ */
++static void bfq_disconnect_groups(struct bfq_data *bfqd)
++{
++	struct hlist_node *pos, *n;
++	struct bfq_group *bfqg;
++
++	bfq_log(bfqd, "disconnect_groups beginning") ;
++	hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) {
++		hlist_del(&bfqg->bfqd_node);
++
++		__bfq_deactivate_entity(bfqg->my_entity, 0);
++
++		/*
++		 * Don't remove from the group hash, just set an
++		 * invalid key.  No lookups can race with the
++		 * assignment as bfqd is being destroyed; this
++		 * implies also that new elements cannot be added
++		 * to the list.
++		 */
++		rcu_assign_pointer(bfqg->bfqd, NULL);
++
++		bfq_log(bfqd, "disconnect_groups: put async for group %p",
++			bfqg) ;
++		bfq_put_async_queues(bfqd, bfqg);
++	}
++}
++
++static inline void bfq_free_root_group(struct bfq_data *bfqd)
++{
++	struct bfqio_cgroup *bgrp = &bfqio_root_cgroup;
++	struct bfq_group *bfqg = bfqd->root_group;
++
++	bfq_put_async_queues(bfqd, bfqg);
++
++	spin_lock_irq(&bgrp->lock);
++	hlist_del_rcu(&bfqg->group_node);
++	spin_unlock_irq(&bgrp->lock);
++
++	/*
++	 * No need to synchronize_rcu() here: since the device is gone
++	 * there cannot be any read-side access to its root_group.
++	 */
++	kfree(bfqg);
++}
++
++static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
++{
++	struct bfq_group *bfqg;
++	struct bfqio_cgroup *bgrp;
++	int i;
++
++	bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
++	if (bfqg == NULL)
++		return NULL;
++
++	bfqg->entity.parent = NULL;
++	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
++		bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
++
++	bgrp = &bfqio_root_cgroup;
++	spin_lock_irq(&bgrp->lock);
++	rcu_assign_pointer(bfqg->bfqd, bfqd);
++	hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data);
++	spin_unlock_irq(&bgrp->lock);
++
++	return bfqg;
++}
++
++#define SHOW_FUNCTION(__VAR)						\
++static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup,		\
++				       struct cftype *cftype)		\
++{									\
++	struct bfqio_cgroup *bgrp;					\
++	u64 ret;							\
++									\
++	if (!cgroup_lock_live_group(cgroup))				\
++		return -ENODEV;						\
++									\
++	bgrp = cgroup_to_bfqio(cgroup);					\
++	spin_lock_irq(&bgrp->lock);					\
++	ret = bgrp->__VAR;						\
++	spin_unlock_irq(&bgrp->lock);					\
++									\
++	cgroup_unlock();						\
++									\
++	return ret;							\
++}
++
++SHOW_FUNCTION(weight);
++SHOW_FUNCTION(ioprio);
++SHOW_FUNCTION(ioprio_class);
++#undef SHOW_FUNCTION
++
++#define STORE_FUNCTION(__VAR, __MIN, __MAX)				\
++static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup,		\
++					struct cftype *cftype,		\
++					u64 val)			\
++{									\
++	struct bfqio_cgroup *bgrp;					\
++	struct bfq_group *bfqg;						\
++	struct hlist_node *n;						\
++									\
++	if (val < (__MIN) || val > (__MAX))				\
++		return -EINVAL;						\
++									\
++	if (!cgroup_lock_live_group(cgroup))				\
++		return -ENODEV;						\
++									\
++	bgrp = cgroup_to_bfqio(cgroup);					\
++									\
++	spin_lock_irq(&bgrp->lock);					\
++	bgrp->__VAR = (unsigned short)val;				\
++	hlist_for_each_entry(bfqg, n, &bgrp->group_data, group_node) {	\
++		bfqg->entity.new_##__VAR = (unsigned short)val;		\
++		smp_wmb();						\
++		bfqg->entity.ioprio_changed = 1;			\
++	}								\
++	spin_unlock_irq(&bgrp->lock);					\
++									\
++	cgroup_unlock();						\
++									\
++	return 0;							\
++}
++
++STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT);
++STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1);
++STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE);
++#undef STORE_FUNCTION
++
++static struct cftype bfqio_files[] = {
++	{
++		.name = "weight",
++		.read_u64 = bfqio_cgroup_weight_read,
++		.write_u64 = bfqio_cgroup_weight_write,
++	},
++	{
++		.name = "ioprio",
++		.read_u64 = bfqio_cgroup_ioprio_read,
++		.write_u64 = bfqio_cgroup_ioprio_write,
++	},
++	{
++		.name = "ioprio_class",
++		.read_u64 = bfqio_cgroup_ioprio_class_read,
++		.write_u64 = bfqio_cgroup_ioprio_class_write,
++	},
++};
++
++static int bfqio_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
++{
++	return cgroup_add_files(cgroup, subsys, bfqio_files,
++				ARRAY_SIZE(bfqio_files));
++}
++
++static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys *subsys,
++						struct cgroup *cgroup)
++{
++	struct bfqio_cgroup *bgrp;
++
++	if (cgroup->parent != NULL) {
++		bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL);
++		if (bgrp == NULL)
++			return ERR_PTR(-ENOMEM);
++	} else
++		bgrp = &bfqio_root_cgroup;
++
++	spin_lock_init(&bgrp->lock);
++	INIT_HLIST_HEAD(&bgrp->group_data);
++	bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO;
++	bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS;
++
++	return &bgrp->css;
++}
++
++/*
++ * We cannot support shared io contexts, as we have no mean to support
++ * two tasks with the same ioc in two different groups without major rework
++ * of the main cic/bfqq data structures.  By now we allow a task to change
++ * its cgroup only if it's the only owner of its ioc; the drawback of this
++ * behavior is that a group containing a task that forked using CLONE_IO
++ * will not be destroyed until the tasks sharing the ioc die.
++ */
++static int bfqio_can_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
++			    struct task_struct *tsk)
++{
++	struct io_context *ioc;
++	int ret = 0;
++
++	/* task_lock() is needed to avoid races with exit_io_context() */
++	task_lock(tsk);
++	ioc = tsk->io_context;
++	if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1)
++		/*
++		 * ioc == NULL means that the task is either too young or
++		 * exiting: if it has still no ioc the ioc can't be shared,
++		 * if the task is exiting the attach will fail anyway, no
++		 * matter what we return here.
++		 */
++		ret = -EINVAL;
++	task_unlock(tsk);
++
++	return ret;
++}
++
++static void bfqio_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
++			 struct cgroup *prev, struct task_struct *tsk)
++{
++	struct io_context *ioc;
++	struct cfq_io_context *cic;
++	struct hlist_node *n;
++
++	task_lock(tsk);
++	ioc = tsk->io_context;
++	if (ioc != NULL) {
++		BUG_ON(atomic_long_read(&ioc->refcount) == 0);
++		atomic_long_inc(&ioc->refcount);
++	}
++	task_unlock(tsk);
++
++	if (ioc == NULL)
++		return;
++
++	rcu_read_lock();
++	hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list)
++		bfq_cic_change_cgroup(cic, cgroup);
++	rcu_read_unlock();
++
++	put_io_context(ioc);
++}
++
++static void bfqio_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
++{
++	struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup);
++	struct hlist_node *n, *tmp;
++	struct bfq_group *bfqg;
++
++	/*
++	 * Since we are destroying the cgroup, there are no more tasks
++	 * referencing it, and all the RCU grace periods that may have
++	 * referenced it are ended (as the destruction of the parent
++	 * cgroup is RCU-safe); bgrp->group_data will not be accessed by
++	 * anything else and we don't need any synchronization.
++	 */
++	hlist_for_each_entry_safe(bfqg, n, tmp, &bgrp->group_data, group_node)
++		bfq_destroy_group(bgrp, bfqg);
++
++	BUG_ON(!hlist_empty(&bgrp->group_data));
++
++	kfree(bgrp);
++}
++
++struct cgroup_subsys bfqio_subsys = {
++	.name = "bfqio",
++	.create = bfqio_create,
++	.can_attach = bfqio_can_attach,
++	.attach = bfqio_attach,
++	.destroy = bfqio_destroy,
++	.populate = bfqio_populate,
++	.subsys_id = bfqio_subsys_id,
++};
++#else
++static inline void bfq_init_entity(struct bfq_entity *entity,
++				   struct bfq_group *bfqg)
++{
++	entity->weight = entity->new_weight;
++	entity->orig_weight = entity->new_weight;
++	entity->ioprio = entity->new_ioprio;
++	entity->ioprio_class = entity->new_ioprio_class;
++	entity->sched_data = &bfqg->sched_data;
++}
++
++static inline struct bfq_group *
++bfq_cic_update_cgroup(struct cfq_io_context *cic)
++{
++	struct bfq_data *bfqd = cic->key;
++	return bfqd->root_group;
++}
++
++static inline void bfq_bfqq_move(struct bfq_data *bfqd,
++				 struct bfq_queue *bfqq,
++				 struct bfq_entity *entity,
++				 struct bfq_group *bfqg)
++{
++}
++
++static inline void bfq_disconnect_groups(struct bfq_data *bfqd)
++{
++	bfq_put_async_queues(bfqd, bfqd->root_group);
++}
++
++static inline void bfq_free_root_group(struct bfq_data *bfqd)
++{
++	kfree(bfqd->root_group);
++}
++
++static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
++{
++	struct bfq_group *bfqg;
++	int i;
++
++	bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
++	if (bfqg == NULL)
++		return NULL;
++
++	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
++		bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
++
++	return bfqg;
++}
++#endif
+diff --git a/block/bfq-ioc.c block/bfq-ioc.c
+new file mode 100644
+index 0000000..8f2b6c6
+--- /dev/null
++++ block/bfq-ioc.c
+@@ -0,0 +1,380 @@
++/*
++ * BFQ: I/O context handling.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ */
++
++/**
++ * bfq_cic_free_rcu - deferred cic freeing.
++ * @head: RCU head of the cic to free.
++ *
++ * Free the cic containing @head and, if it was the last one and
++ * the module is exiting wake up anyone waiting for its deallocation
++ * (see bfq_exit()).
++ */
++static void bfq_cic_free_rcu(struct rcu_head *head)
++{
++	struct cfq_io_context *cic;
++
++	cic = container_of(head, struct cfq_io_context, rcu_head);
++
++	kmem_cache_free(bfq_ioc_pool, cic);
++	elv_ioc_count_dec(bfq_ioc_count);
++
++	if (bfq_ioc_gone != NULL) {
++		spin_lock(&bfq_ioc_gone_lock);
++		if (bfq_ioc_gone != NULL &&
++		    !elv_ioc_count_read(bfq_ioc_count)) {
++			complete(bfq_ioc_gone);
++			bfq_ioc_gone = NULL;
++		}
++		spin_unlock(&bfq_ioc_gone_lock);
++	}
++}
++
++static void bfq_cic_free(struct cfq_io_context *cic)
++{
++	call_rcu(&cic->rcu_head, bfq_cic_free_rcu);
++}
++
++/**
++ * cic_free_func - disconnect a cic ready to be freed.
++ * @ioc: the io_context @cic belongs to.
++ * @cic: the cic to be freed.
++ *
++ * Remove @cic from the @ioc radix tree hash and from its cic list,
++ * deferring the deallocation of @cic to the end of the current RCU
++ * grace period.  This assumes that __bfq_exit_single_io_context()
++ * has already been called for @cic.
++ */
++static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
++{
++	unsigned long flags;
++	unsigned long dead_key = (unsigned long) cic->key;
++
++	BUG_ON(!(dead_key & CIC_DEAD_KEY));
++
++	spin_lock_irqsave(&ioc->lock, flags);
++	radix_tree_delete(&ioc->bfq_radix_root,
++		dead_key >> CIC_DEAD_INDEX_SHIFT);
++	hlist_del_init_rcu(&cic->cic_list);
++	spin_unlock_irqrestore(&ioc->lock, flags);
++
++	bfq_cic_free(cic);
++}
++
++static void bfq_free_io_context(struct io_context *ioc)
++{
++	/*
++	 * ioc->refcount is zero here, or we are called from elv_unregister(),
++	 * so no more cic's are allowed to be linked into this ioc.  So it
++	 * should be ok to iterate over the known list, we will see all cic's
++	 * since no new ones are added.
++	 */
++	call_for_each_cic(ioc, cic_free_func);
++}
++
++/**
++ * __bfq_exit_single_io_context - deassociate @cic from any running task.
++ * @bfqd: bfq_data on which @cic is valid.
++ * @cic: the cic being exited.
++ *
++ * Whenever no more tasks are using @cic or @bfqd is deallocated we
++ * need to invalidate its entry in the radix tree hash table and to
++ * release the queues it refers to.
++ *
++ * Called under the queue lock.
++ */
++static void __bfq_exit_single_io_context(struct bfq_data *bfqd,
++					 struct cfq_io_context *cic)
++{
++	struct io_context *ioc = cic->ioc;
++
++	list_del_init(&cic->queue_list);
++
++	/*
++	 * Make sure dead mark is seen for dead queues
++	 */
++	smp_wmb();
++	rcu_assign_pointer(cic->key, bfqd_dead_key(bfqd));
++
++	/*
++	 * No write-side locking as no task is using @ioc (they're exited
++	 * or bfqd is being deallocated.
++	 */
++	rcu_read_lock();
++	if (rcu_dereference(ioc->ioc_data) == cic) {
++		rcu_read_unlock();
++		spin_lock(&ioc->lock);
++		rcu_assign_pointer(ioc->ioc_data, NULL);
++		spin_unlock(&ioc->lock);
++	} else
++		rcu_read_unlock();
++
++	if (cic->cfqq[BLK_RW_ASYNC] != NULL) {
++		bfq_exit_bfqq(bfqd, cic->cfqq[BLK_RW_ASYNC]);
++		cic->cfqq[BLK_RW_ASYNC] = NULL;
++	}
++
++	if (cic->cfqq[BLK_RW_SYNC] != NULL) {
++		bfq_exit_bfqq(bfqd, cic->cfqq[BLK_RW_SYNC]);
++		cic->cfqq[BLK_RW_SYNC] = NULL;
++	}
++}
++
++/**
++ * bfq_exit_single_io_context - deassociate @cic from @ioc (unlocked version).
++ * @ioc: the io_context @cic belongs to.
++ * @cic: the cic being exited.
++ *
++ * Take the queue lock and call __bfq_exit_single_io_context() to do the
++ * rest of the work.  We take care of possible races with bfq_exit_queue()
++ * using bfq_get_bfqd_locked() (and abusing a little bit the RCU mechanism).
++ */
++static void bfq_exit_single_io_context(struct io_context *ioc,
++				       struct cfq_io_context *cic)
++{
++	struct bfq_data *bfqd;
++	unsigned long uninitialized_var(flags);
++
++	bfqd = bfq_get_bfqd_locked(&cic->key, &flags);
++	if (bfqd != NULL) {
++		__bfq_exit_single_io_context(bfqd, cic);
++		bfq_put_bfqd_unlock(bfqd, &flags);
++	}
++}
++
++/**
++ * bfq_exit_io_context - deassociate @ioc from all cics it owns.
++ * @ioc: the @ioc being exited.
++ *
++ * No more processes are using @ioc we need to clean up and put the
++ * internal structures we have that belongs to that process.  Loop
++ * through all its cics, locking their queues and exiting them.
++ */
++static void bfq_exit_io_context(struct io_context *ioc)
++{
++	call_for_each_cic(ioc, bfq_exit_single_io_context);
++}
++
++static struct cfq_io_context *bfq_alloc_io_context(struct bfq_data *bfqd,
++						   gfp_t gfp_mask)
++{
++	struct cfq_io_context *cic;
++
++	cic = kmem_cache_alloc_node(bfq_ioc_pool, gfp_mask | __GFP_ZERO,
++							bfqd->queue->node);
++	if (cic != NULL) {
++		cic->ttime.last_end_request = jiffies;
++		INIT_LIST_HEAD(&cic->queue_list);
++		INIT_HLIST_NODE(&cic->cic_list);
++		cic->dtor = bfq_free_io_context;
++		cic->exit = bfq_exit_io_context;
++		elv_ioc_count_inc(bfq_ioc_count);
++	}
++
++	return cic;
++}
++
++/**
++ * bfq_drop_dead_cic - free an exited cic.
++ * @bfqd: bfq data for the device in use.
++ * @ioc: io_context owning @cic.
++ * @cic: the @cic to free.
++ *
++ * We drop cfq io contexts lazily, so we may find a dead one.
++ */
++static void bfq_drop_dead_cic(struct bfq_data *bfqd, struct io_context *ioc,
++			      struct cfq_io_context *cic)
++{
++	unsigned long flags;
++
++	WARN_ON(!list_empty(&cic->queue_list));
++	BUG_ON(cic->key != bfqd_dead_key(bfqd));
++
++	spin_lock_irqsave(&ioc->lock, flags);
++
++	BUG_ON(ioc->ioc_data == cic);
++
++	/*
++	 * With shared I/O contexts two lookups may race and drop the
++	 * same cic more than one time: RCU guarantees that the storage
++	 * will not be freed too early, here we make sure that we do
++	 * not try to remove the cic from the hashing structures multiple
++	 * times.
++	 */
++	if (!hlist_unhashed(&cic->cic_list)) {
++		radix_tree_delete(&ioc->bfq_radix_root, bfqd->cic_index);
++		hlist_del_init_rcu(&cic->cic_list);
++		bfq_cic_free(cic);
++	}
++
++	spin_unlock_irqrestore(&ioc->lock, flags);
++}
++
++/**
++ * bfq_cic_lookup - search into @ioc a cic associated to @bfqd.
++ * @bfqd: the lookup key.
++ * @ioc: the io_context of the process doing I/O.
++ *
++ * If @ioc already has a cic associated to @bfqd return it, return %NULL
++ * otherwise.
++ */
++static struct cfq_io_context *bfq_cic_lookup(struct bfq_data *bfqd,
++					     struct io_context *ioc)
++{
++	struct cfq_io_context *cic;
++	unsigned long flags;
++	void *k;
++
++	if (unlikely(ioc == NULL))
++		return NULL;
++
++	rcu_read_lock();
++
++	/* We maintain a last-hit cache, to avoid browsing over the tree. */
++	cic = rcu_dereference(ioc->ioc_data);
++	if (cic != NULL) {
++		k = rcu_dereference(cic->key);
++		if (k == bfqd)
++			goto out;
++	}
++
++	do {
++		cic = radix_tree_lookup(&ioc->bfq_radix_root,
++					bfqd->cic_index);
++		if (cic == NULL)
++			goto out;
++
++		k = rcu_dereference(cic->key);
++		if (unlikely(k != bfqd)) {
++			rcu_read_unlock();
++			bfq_drop_dead_cic(bfqd, ioc, cic);
++			rcu_read_lock();
++			continue;
++		}
++
++		spin_lock_irqsave(&ioc->lock, flags);
++		rcu_assign_pointer(ioc->ioc_data, cic);
++		spin_unlock_irqrestore(&ioc->lock, flags);
++		break;
++	} while (1);
++
++out:
++	rcu_read_unlock();
++
++	return cic;
++}
++
++/**
++ * bfq_cic_link - add @cic to @ioc.
++ * @bfqd: bfq_data @cic refers to.
++ * @ioc: io_context @cic belongs to.
++ * @cic: the cic to link.
++ * @gfp_mask: the mask to use for radix tree preallocations.
++ *
++ * Add @cic to @ioc, using @bfqd as the search key.  This enables us to
++ * lookup the process specific cfq io context when entered from the block
++ * layer.  Also adds @cic to a per-bfqd list, used when this queue is
++ * removed.
++ */
++static int bfq_cic_link(struct bfq_data *bfqd, struct io_context *ioc,
++			struct cfq_io_context *cic, gfp_t gfp_mask)
++{
++	unsigned long flags;
++	int ret;
++
++	ret = radix_tree_preload(gfp_mask);
++	if (ret == 0) {
++		cic->ioc = ioc;
++
++		/* No write-side locking, cic is not published yet. */
++		rcu_assign_pointer(cic->key, bfqd);
++
++		spin_lock_irqsave(&ioc->lock, flags);
++		ret = radix_tree_insert(&ioc->bfq_radix_root,
++					bfqd->cic_index, cic);
++		if (ret == 0)
++			hlist_add_head_rcu(&cic->cic_list, &ioc->bfq_cic_list);
++		spin_unlock_irqrestore(&ioc->lock, flags);
++
++		radix_tree_preload_end();
++
++		if (ret == 0) {
++			spin_lock_irqsave(bfqd->queue->queue_lock, flags);
++			list_add(&cic->queue_list, &bfqd->cic_list);
++			spin_unlock_irqrestore(bfqd->queue->queue_lock, flags);
++		}
++	}
++
++	if (ret != 0)
++		printk(KERN_ERR "bfq: cic link failed!\n");
++
++	return ret;
++}
++
++/**
++ * bfq_ioc_set_ioprio - signal a priority change to the cics belonging to @ioc.
++ * @ioc: the io_context changing its priority.
++ */
++static inline void bfq_ioc_set_ioprio(struct io_context *ioc)
++{
++	call_for_each_cic(ioc, bfq_changed_ioprio);
++}
++
++/**
++ * bfq_get_io_context - return the @cic associated to @bfqd in @ioc.
++ * @bfqd: the search key.
++ * @gfp_mask: the mask to use for cic allocation.
++ *
++ * Setup general io context and cfq io context.  There can be several cfq
++ * io contexts per general io context, if this process is doing io to more
++ * than one device managed by cfq.
++ */
++static struct cfq_io_context *bfq_get_io_context(struct bfq_data *bfqd,
++						 gfp_t gfp_mask)
++{
++	struct io_context *ioc = NULL;
++	struct cfq_io_context *cic;
++
++	might_sleep_if(gfp_mask & __GFP_WAIT);
++
++	ioc = get_io_context(gfp_mask, bfqd->queue->node);
++	if (ioc == NULL)
++		return NULL;
++
++	/* Lookup for an existing cic. */
++	cic = bfq_cic_lookup(bfqd, ioc);
++	if (cic != NULL)
++		goto out;
++
++	/* Alloc one if needed. */
++	cic = bfq_alloc_io_context(bfqd, gfp_mask);
++	if (cic == NULL)
++		goto err;
++
++	/* Link it into the ioc's radix tree and cic list. */
++	if (bfq_cic_link(bfqd, ioc, cic, gfp_mask) != 0)
++		goto err_free;
++
++out:
++	/*
++	 * test_and_clear_bit() implies a memory barrier, paired with
++	 * the wmb() in fs/ioprio.c, so the value seen for ioprio is the
++	 * new one.
++	 */
++	if (unlikely(test_and_clear_bit(IOC_BFQ_IOPRIO_CHANGED,
++					ioc->ioprio_changed)))
++		bfq_ioc_set_ioprio(ioc);
++
++	return cic;
++err_free:
++	bfq_cic_free(cic);
++err:
++	put_io_context(ioc);
++	return NULL;
++}
+diff --git a/block/bfq-iosched.c block/bfq-iosched.c
+new file mode 100644
+index 0000000..576cd03
+--- /dev/null
++++ block/bfq-iosched.c
+@@ -0,0 +1,3021 @@
++/*
++ * BFQ, or Budget Fair Queueing, disk scheduler.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file.
++ *
++ * BFQ is a proportional share disk scheduling algorithm based on the
++ * slice-by-slice service scheme of CFQ. But BFQ assigns budgets,
++ * measured in number of sectors, to tasks instead of time slices.
++ * The disk is not granted to the active task for a given time slice,
++ * but until it has exahusted its assigned budget.  This change from
++ * the time to the service domain allows BFQ to distribute the disk
++ * bandwidth among tasks as desired, without any distortion due to
++ * ZBR, workload fluctuations or other factors. BFQ uses an ad hoc
++ * internal scheduler, called B-WF2Q+, to schedule tasks according to
++ * their budgets.  Thanks to this accurate scheduler, BFQ can afford
++ * to assign high budgets to disk-bound non-seeky tasks (to boost the
++ * throughput), and yet guarantee low latencies to interactive and
++ * soft real-time applications.
++ *
++ * BFQ has been introduced in [1], where the interested reader can
++ * find an accurate description of the algorithm, the bandwidth
++ * distribution and latency guarantees it provides, plus formal proofs
++ * of all the properties.  With respect to the algorithm presented in
++ * the paper, this implementation adds several little heuristics, and
++ * a hierarchical extension, based on H-WF2Q+.
++ *
++ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with
++ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N)
++ * complexity derives from the one introduced with EEVDF in [3].
++ *
++ * [1] P. Valente and F. Checconi, ``High Throughput Disk Scheduling
++ *     with Deterministic Guarantees on Bandwidth Distribution,'',
++ *     IEEE Transactions on Computer, May 2010.
++ *
++ *     http://algo.ing.unimo.it/people/paolo/disk_sched/bfq-techreport.pdf
++ *
++ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing
++ *     Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689,
++ *     Oct 1997.
++ *
++ *     http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz
++ *
++ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline
++ *     First: A Flexible and Accurate Mechanism for Proportional Share
++ *     Resource Allocation,'' technical report.
++ *
++ *     http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf
++ */
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/blkdev.h>
++#include <linux/cgroup.h>
++#include <linux/elevator.h>
++#include <linux/jiffies.h>
++#include <linux/rbtree.h>
++#include <linux/ioprio.h>
++#include "bfq.h"
++
++/* Max number of dispatches in one round of service. */
++static const int bfq_quantum = 4;
++
++/* Expiration time of sync (0) and async (1) requests, in jiffies. */
++static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
++
++/* Maximum backwards seek, in KiB. */
++static const int bfq_back_max = 16 * 1024;
++
++/* Penalty of a backwards seek, in number of sectors. */
++static const int bfq_back_penalty = 2;
++
++/* Idling period duration, in jiffies. */
++static int bfq_slice_idle = HZ / 125;
++
++/* Default maximum budget values, in sectors and number of requests. */
++static const int bfq_default_max_budget = 16 * 1024;
++static const int bfq_max_budget_async_rq = 4;
++
++/*
++ * Async to sync throughput distribution is controlled as follows:
++ * when an async request is served, the entity is charged the number
++ * of sectors of the request, multipled by the factor below
++ */
++static const int bfq_async_charge_factor = 10;
++
++/* Default timeout values, in jiffies, approximating CFQ defaults. */
++static const int bfq_timeout_sync = HZ / 8;
++static int bfq_timeout_async = HZ / 25;
++
++struct kmem_cache *bfq_pool;
++struct kmem_cache *bfq_ioc_pool;
++
++static DEFINE_PER_CPU(unsigned long, bfq_ioc_count);
++static struct completion *bfq_ioc_gone;
++static DEFINE_SPINLOCK(bfq_ioc_gone_lock);
++
++static DEFINE_SPINLOCK(cic_index_lock);
++static DEFINE_IDA(cic_index_ida);
++
++/* Below this threshold (in ms), we consider thinktime immediate. */
++#define BFQ_MIN_TT		2
++
++/* hw_tag detection: parallel requests threshold and min samples needed. */
++#define BFQ_HW_QUEUE_THRESHOLD	4
++#define BFQ_HW_QUEUE_SAMPLES	32
++
++#define BFQQ_SEEK_THR	 (sector_t)(8 * 1024)
++#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR)
++
++/* Min samples used for peak rate estimation (for autotuning). */
++#define BFQ_PEAK_RATE_SAMPLES	32
++
++/* Shift used for peak rate fixed precision calculations. */
++#define BFQ_RATE_SHIFT		16
++
++/*
++ * The duration of the weight raising for interactive applications is
++ * computed automatically (as default behaviour), using the following
++ * formula: duration = (R / r) * T, where r is the peak rate of the
++ * disk, and R and T are two reference parameters. In particular, R is
++ * the peak rate of a reference disk, and T is about the maximum time
++ * for starting popular large applications on that disk, under BFQ and
++ * while reading two files in parallel. Finally, BFQ uses two
++ * different pairs (R, T) depending on whether the disk is rotational
++ * or non-rotational.
++ */
++#define T_rot			(msecs_to_jiffies(5500))
++#define T_nonrot		(msecs_to_jiffies(2000))
++/* Next two quantities are in sectors/usec, left-shifted by BFQ_RATE_SHIFT */
++#define R_rot			17415
++#define R_nonrot		34791
++
++#define BFQ_SERVICE_TREE_INIT	((struct bfq_service_tree)		\
++				{ RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })
++
++#define RQ_CIC(rq)		\
++	((struct cfq_io_context *) (rq)->elevator_private[0])
++#define RQ_BFQQ(rq)		((rq)->elevator_private[1])
++
++#include "bfq-ioc.c"
++#include "bfq-sched.c"
++#include "bfq-cgroup.c"
++
++#define bfq_class_idle(bfqq)	((bfqq)->entity.ioprio_class ==\
++				 IOPRIO_CLASS_IDLE)
++#define bfq_class_rt(bfqq)	((bfqq)->entity.ioprio_class ==\
++				 IOPRIO_CLASS_RT)
++
++#define bfq_sample_valid(samples)	((samples) > 80)
++
++/*
++ * We regard a request as SYNC, if either it's a read or has the SYNC bit
++ * set (in which case it could also be a direct WRITE).
++ */
++static inline int bfq_bio_sync(struct bio *bio)
++{
++	if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC))
++		return 1;
++
++	return 0;
++}
++
++/*
++ * Scheduler run of queue, if there are requests pending and no one in the
++ * driver that will restart queueing.
++ */
++static inline void bfq_schedule_dispatch(struct bfq_data *bfqd)
++{
++	if (bfqd->queued != 0) {
++		bfq_log(bfqd, "schedule dispatch");
++		kblockd_schedule_work(bfqd->queue, &bfqd->unplug_work);
++	}
++}
++
++/*
++ * Lifted from AS - choose which of rq1 and rq2 that is best served now.
++ * We choose the request that is closesr to the head right now.  Distance
++ * behind the head is penalized and only allowed to a certain extent.
++ */
++static struct request *bfq_choose_req(struct bfq_data *bfqd,
++				      struct request *rq1,
++				      struct request *rq2,
++				      sector_t last)
++{
++	sector_t s1, s2, d1 = 0, d2 = 0;
++	unsigned long back_max;
++#define BFQ_RQ1_WRAP	0x01 /* request 1 wraps */
++#define BFQ_RQ2_WRAP	0x02 /* request 2 wraps */
++	unsigned wrap = 0; /* bit mask: requests behind the disk head? */
++
++	if (rq1 == NULL || rq1 == rq2)
++		return rq2;
++	if (rq2 == NULL)
++		return rq1;
++
++	if (rq_is_sync(rq1) && !rq_is_sync(rq2))
++		return rq1;
++	else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
++		return rq2;
++	if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
++		return rq1;
++	else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META))
++		return rq2;
++
++	s1 = blk_rq_pos(rq1);
++	s2 = blk_rq_pos(rq2);
++
++	/*
++	 * By definition, 1KiB is 2 sectors.
++	 */
++	back_max = bfqd->bfq_back_max * 2;
++
++	/*
++	 * Strict one way elevator _except_ in the case where we allow
++	 * short backward seeks which are biased as twice the cost of a
++	 * similar forward seek.
++	 */
++	if (s1 >= last)
++		d1 = s1 - last;
++	else if (s1 + back_max >= last)
++		d1 = (last - s1) * bfqd->bfq_back_penalty;
++	else
++		wrap |= BFQ_RQ1_WRAP;
++
++	if (s2 >= last)
++		d2 = s2 - last;
++	else if (s2 + back_max >= last)
++		d2 = (last - s2) * bfqd->bfq_back_penalty;
++	else
++		wrap |= BFQ_RQ2_WRAP;
++
++	/* Found required data */
++
++	/*
++	 * By doing switch() on the bit mask "wrap" we avoid having to
++	 * check two variables for all permutations: --> faster!
++	 */
++	switch (wrap) {
++	case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
++		if (d1 < d2)
++			return rq1;
++		else if (d2 < d1)
++			return rq2;
++		else {
++			if (s1 >= s2)
++				return rq1;
++			else
++				return rq2;
++		}
++
++	case BFQ_RQ2_WRAP:
++		return rq1;
++	case BFQ_RQ1_WRAP:
++		return rq2;
++	case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */
++	default:
++		/*
++		 * Since both rqs are wrapped,
++		 * start with the one that's further behind head
++		 * (--> only *one* back seek required),
++		 * since back seek takes more time than forward.
++		 */
++		if (s1 <= s2)
++			return rq1;
++		else
++			return rq2;
++	}
++}
++
++static struct bfq_queue *
++bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,
++		     sector_t sector, struct rb_node **ret_parent,
++		     struct rb_node ***rb_link)
++{
++	struct rb_node **p, *parent;
++	struct bfq_queue *bfqq = NULL;
++
++	parent = NULL;
++	p = &root->rb_node;
++	while (*p) {
++		struct rb_node **n;
++
++		parent = *p;
++		bfqq = rb_entry(parent, struct bfq_queue, pos_node);
++
++		/*
++		 * Sort strictly based on sector. Smallest to the left,
++		 * largest to the right.
++		 */
++		if (sector > blk_rq_pos(bfqq->next_rq))
++			n = &(*p)->rb_right;
++		else if (sector < blk_rq_pos(bfqq->next_rq))
++			n = &(*p)->rb_left;
++		else
++			break;
++		p = n;
++		bfqq = NULL;
++	}
++
++	*ret_parent = parent;
++	if (rb_link)
++		*rb_link = p;
++
++	bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d",
++		(long long unsigned)sector,
++		bfqq != NULL ? bfqq->pid : 0);
++
++	return bfqq;
++}
++
++static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	struct rb_node **p, *parent;
++	struct bfq_queue *__bfqq;
++
++	if (bfqq->pos_root != NULL) {
++		rb_erase(&bfqq->pos_node, bfqq->pos_root);
++		bfqq->pos_root = NULL;
++	}
++
++	if (bfq_class_idle(bfqq))
++		return;
++	if (!bfqq->next_rq)
++		return;
++
++	bfqq->pos_root = &bfqd->rq_pos_tree;
++	__bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,
++			blk_rq_pos(bfqq->next_rq), &parent, &p);
++	if (__bfqq == NULL) {
++		rb_link_node(&bfqq->pos_node, parent, p);
++		rb_insert_color(&bfqq->pos_node, bfqq->pos_root);
++	} else
++		bfqq->pos_root = NULL;
++}
++
++static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
++					struct bfq_queue *bfqq,
++					struct request *last)
++{
++	struct rb_node *rbnext = rb_next(&last->rb_node);
++	struct rb_node *rbprev = rb_prev(&last->rb_node);
++	struct request *next = NULL, *prev = NULL;
++
++	BUG_ON(RB_EMPTY_NODE(&last->rb_node));
++
++	if (rbprev != NULL)
++		prev = rb_entry_rq(rbprev);
++
++	if (rbnext != NULL)
++		next = rb_entry_rq(rbnext);
++	else {
++		rbnext = rb_first(&bfqq->sort_list);
++		if (rbnext && rbnext != &last->rb_node)
++			next = rb_entry_rq(rbnext);
++	}
++
++	return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last));
++}
++
++static void bfq_del_rq_rb(struct request *rq)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++	struct bfq_data *bfqd = bfqq->bfqd;
++	const int sync = rq_is_sync(rq);
++
++	BUG_ON(bfqq->queued[sync] == 0);
++	bfqq->queued[sync]--;
++	bfqd->queued--;
++
++	elv_rb_del(&bfqq->sort_list, rq);
++
++	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
++		if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->active_queue)
++			bfq_del_bfqq_busy(bfqd, bfqq, 1);
++		/*
++		 * Remove queue from request-position tree as it is empty.
++		 */
++		if (bfqq->pos_root != NULL) {
++			rb_erase(&bfqq->pos_node, bfqq->pos_root);
++			bfqq->pos_root = NULL;
++		}
++	}
++}
++
++/* see the definition of bfq_async_charge_factor for details */
++static inline unsigned long bfq_serv_to_charge(struct request *rq,
++					       struct bfq_queue *bfqq)
++{
++	return blk_rq_sectors(rq) *
++		(1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->raising_coeff == 1) *
++		bfq_async_charge_factor));
++}
++
++/**
++ * bfq_updated_next_req - update the queue after a new next_rq selection.
++ * @bfqd: the device data the queue belongs to.
++ * @bfqq: the queue to update.
++ *
++ * If the first request of a queue changes we make sure that the queue
++ * has enough budget to serve at least its first request (if the
++ * request has grown).  We do this because if the queue has not enough
++ * budget for its first request, it has to go through two dispatch
++ * rounds to actually get it dispatched.
++ */
++static void bfq_updated_next_req(struct bfq_data *bfqd,
++				 struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
++	struct request *next_rq = bfqq->next_rq;
++	unsigned long new_budget;
++
++	if (next_rq == NULL)
++		return;
++
++	if (bfqq == bfqd->active_queue)
++		/*
++		 * In order not to break guarantees, budgets cannot be
++		 * changed after an entity has been selected.
++		 */
++		return;
++
++	BUG_ON(entity->tree != &st->active);
++	BUG_ON(entity == entity->sched_data->active_entity);
++
++	new_budget = max_t(unsigned long, bfqq->max_budget,
++			   bfq_serv_to_charge(next_rq, bfqq));
++	entity->budget = new_budget;
++	bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", new_budget);
++	bfq_activate_bfqq(bfqd, bfqq);
++}
++
++static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd)
++{
++	u64 dur;
++
++	if (bfqd->bfq_raising_max_time > 0)
++		return bfqd->bfq_raising_max_time;
++
++	dur = bfqd->RT_prod;
++	do_div(dur, bfqd->peak_rate);
++
++	return dur;
++}
++
++static void bfq_add_rq_rb(struct request *rq)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++	struct bfq_entity *entity = &bfqq->entity;
++	struct bfq_data *bfqd = bfqq->bfqd;
++	struct request *next_rq, *prev;
++	unsigned long old_raising_coeff = bfqq->raising_coeff;
++	int idle_for_long_time = bfqq->budget_timeout +
++		bfqd->bfq_raising_min_idle_time < jiffies;
++
++	bfq_log_bfqq(bfqd, bfqq, "add_rq_rb %d", rq_is_sync(rq));
++	bfqq->queued[rq_is_sync(rq)]++;
++	bfqd->queued++;
++
++	elv_rb_add(&bfqq->sort_list, rq);
++
++	/*
++	 * Check if this request is a better next-serve candidate.
++	 */
++	prev = bfqq->next_rq;
++	next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
++	BUG_ON(next_rq == NULL);
++	bfqq->next_rq = next_rq;
++
++	/*
++	 * Adjust priority tree position, if next_rq changes.
++	 */
++	if (prev != bfqq->next_rq)
++		bfq_rq_pos_tree_add(bfqd, bfqq);
++
++	if (!bfq_bfqq_busy(bfqq)) {
++		int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 &&
++			bfqq->soft_rt_next_start < jiffies;
++		entity->budget = max_t(unsigned long, bfqq->max_budget,
++				       bfq_serv_to_charge(next_rq, bfqq));
++
++		if (! bfqd->low_latency)
++			goto add_bfqq_busy;
++
++		/*
++		 * If the queue is not being boosted and has been idle
++		 * for enough time, start a weight-raising period
++		 */
++		if(old_raising_coeff == 1 && (idle_for_long_time || soft_rt)) {
++			bfqq->raising_coeff = bfqd->bfq_raising_coeff;
++			if (idle_for_long_time)
++				bfqq->raising_cur_max_time =
++					bfq_wrais_duration(bfqd);
++			else
++				bfqq->raising_cur_max_time =
++					bfqd->bfq_raising_rt_max_time;
++			bfq_log_bfqq(bfqd, bfqq,
++				     "wrais starting at %llu msec,"
++				     "rais_max_time %u",
++				     bfqq->last_rais_start_finish,
++				     jiffies_to_msecs(bfqq->
++					raising_cur_max_time));
++		} else if (old_raising_coeff > 1) {
++			if (idle_for_long_time)
++				bfqq->raising_cur_max_time =
++					bfq_wrais_duration(bfqd);
++			else if (bfqq->raising_cur_max_time ==
++				 bfqd->bfq_raising_rt_max_time &&
++				 !soft_rt) {
++				bfqq->raising_coeff = 1;
++				bfq_log_bfqq(bfqd, bfqq,
++					     "wrais ending at %llu msec,"
++					     "rais_max_time %u",
++					     bfqq->last_rais_start_finish,
++					     jiffies_to_msecs(bfqq->
++						raising_cur_max_time));
++				}
++		}
++		if (old_raising_coeff != bfqq->raising_coeff)
++			entity->ioprio_changed = 1;
++add_bfqq_busy:
++		bfq_add_bfqq_busy(bfqd, bfqq);
++        } else {
++                if(bfqd->low_latency && old_raising_coeff == 1 &&
++			!rq_is_sync(rq) &&
++			bfqq->last_rais_start_finish +
++                        bfqd->bfq_raising_min_inter_arr_async < jiffies) {
++                        bfqq->raising_coeff = bfqd->bfq_raising_coeff;
++			bfqq->raising_cur_max_time = bfq_wrais_duration(bfqd);
++
++			entity->ioprio_changed = 1;
++			bfq_log_bfqq(bfqd, bfqq,
++				     "non-idle wrais starting at %llu msec,"
++				     "rais_max_time %u",
++				     bfqq->last_rais_start_finish,
++				     jiffies_to_msecs(bfqq->
++					raising_cur_max_time));
++                }
++                bfq_updated_next_req(bfqd, bfqq);
++	}
++
++	if(bfqd->low_latency &&
++		(old_raising_coeff == 1 || bfqq->raising_coeff == 1 ||
++		 idle_for_long_time))
++		bfqq->last_rais_start_finish = jiffies;
++}
++
++static void bfq_reposition_rq_rb(struct bfq_queue *bfqq, struct request *rq)
++{
++	elv_rb_del(&bfqq->sort_list, rq);
++	bfqq->queued[rq_is_sync(rq)]--;
++	bfqq->bfqd->queued--;
++	bfq_add_rq_rb(rq);
++}
++
++static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd,
++					  struct bio *bio)
++{
++	struct task_struct *tsk = current;
++	struct cfq_io_context *cic;
++	struct bfq_queue *bfqq;
++
++	cic = bfq_cic_lookup(bfqd, tsk->io_context);
++	if (cic == NULL)
++		return NULL;
++
++	bfqq = cic_to_bfqq(cic, bfq_bio_sync(bio));
++	if (bfqq != NULL) {
++		sector_t sector = bio->bi_sector + bio_sectors(bio);
++
++		return elv_rb_find(&bfqq->sort_list, sector);
++	}
++
++	return NULL;
++}
++
++static void bfq_activate_request(struct request_queue *q, struct request *rq)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++
++	bfqd->rq_in_driver++;
++	bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
++	bfq_log(bfqd, "activate_request: new bfqd->last_position %llu",
++		(long long unsigned)bfqd->last_position);
++}
++
++static void bfq_deactivate_request(struct request_queue *q, struct request *rq)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++
++	WARN_ON(bfqd->rq_in_driver == 0);
++	bfqd->rq_in_driver--;
++}
++
++static void bfq_remove_request(struct request *rq)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++	struct bfq_data *bfqd = bfqq->bfqd;
++
++	if (bfqq->next_rq == rq) {
++		bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);
++		bfq_updated_next_req(bfqd, bfqq);
++	}
++
++	list_del_init(&rq->queuelist);
++	bfq_del_rq_rb(rq);
++
++	if (rq->cmd_flags & REQ_META) {
++		WARN_ON(bfqq->meta_pending == 0);
++		bfqq->meta_pending--;
++	}
++}
++
++static int bfq_merge(struct request_queue *q, struct request **req,
++		     struct bio *bio)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct request *__rq;
++
++	__rq = bfq_find_rq_fmerge(bfqd, bio);
++	if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) {
++		*req = __rq;
++		return ELEVATOR_FRONT_MERGE;
++	}
++
++	return ELEVATOR_NO_MERGE;
++}
++
++static void bfq_merged_request(struct request_queue *q, struct request *req,
++			       int type)
++{
++	if (type == ELEVATOR_FRONT_MERGE) {
++		struct bfq_queue *bfqq = RQ_BFQQ(req);
++
++		bfq_reposition_rq_rb(bfqq, req);
++	}
++}
++
++static void bfq_merged_requests(struct request_queue *q, struct request *rq,
++				struct request *next)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++
++	/*
++	 * Reposition in fifo if next is older than rq.
++	 */
++	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
++	    time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
++		list_move(&rq->queuelist, &next->queuelist);
++		rq_set_fifo_time(rq, rq_fifo_time(next));
++	}
++
++	if (bfqq->next_rq == next)
++		bfqq->next_rq = rq;
++
++	bfq_remove_request(next);
++}
++
++static int bfq_allow_merge(struct request_queue *q, struct request *rq,
++			   struct bio *bio)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct cfq_io_context *cic;
++	struct bfq_queue *bfqq;
++
++	/* Disallow merge of a sync bio into an async request. */
++	if (bfq_bio_sync(bio) && !rq_is_sync(rq))
++		return 0;
++
++	/*
++	 * Lookup the bfqq that this bio will be queued with. Allow
++	 * merge only if rq is queued there.
++	 */
++	cic = bfq_cic_lookup(bfqd, current->io_context);
++	if (cic == NULL)
++		return 0;
++
++	bfqq = cic_to_bfqq(cic, bfq_bio_sync(bio));
++	return bfqq == RQ_BFQQ(rq);
++}
++
++static void __bfq_set_active_queue(struct bfq_data *bfqd,
++				   struct bfq_queue *bfqq)
++{
++	if (bfqq != NULL) {
++		bfq_mark_bfqq_must_alloc(bfqq);
++		bfq_mark_bfqq_budget_new(bfqq);
++		bfq_clear_bfqq_fifo_expire(bfqq);
++
++		bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
++
++		bfq_log_bfqq(bfqd, bfqq, "set_active_queue, cur-budget = %lu",
++			     bfqq->entity.budget);
++	}
++
++	bfqd->active_queue = bfqq;
++}
++
++/*
++ * Get and set a new active queue for service.
++ */
++static struct bfq_queue *bfq_set_active_queue(struct bfq_data *bfqd,
++					      struct bfq_queue *bfqq)
++{
++	if (!bfqq)
++		bfqq = bfq_get_next_queue(bfqd);
++	else
++		bfq_get_next_queue_forced(bfqd, bfqq);
++
++	__bfq_set_active_queue(bfqd, bfqq);
++	return bfqq;
++}
++
++static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd,
++					  struct request *rq)
++{
++	if (blk_rq_pos(rq) >= bfqd->last_position)
++		return blk_rq_pos(rq) - bfqd->last_position;
++	else
++		return bfqd->last_position - blk_rq_pos(rq);
++}
++
++/*
++ * Return true if bfqq has no request pending and rq is close enough to
++ * bfqd->last_position, or if rq is closer to bfqd->last_position than
++ * bfqq->next_rq
++ */
++static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq)
++{
++	return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR;
++}
++
++static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
++{
++	struct rb_root *root = &bfqd->rq_pos_tree;
++	struct rb_node *parent, *node;
++	struct bfq_queue *__bfqq;
++	sector_t sector = bfqd->last_position;
++
++	if (RB_EMPTY_ROOT(root))
++		return NULL;
++
++	/*
++	 * First, if we find a request starting at the end of the last
++	 * request, choose it.
++	 */
++	__bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL);
++	if (__bfqq != NULL)
++		return __bfqq;
++
++	/*
++	 * If the exact sector wasn't found, the parent of the NULL leaf
++	 * will contain the closest sector (rq_pos_tree sorted by next_request
++	 * position).
++	 */
++	__bfqq = rb_entry(parent, struct bfq_queue, pos_node);
++	if (bfq_rq_close(bfqd, __bfqq->next_rq))
++		return __bfqq;
++
++	if (blk_rq_pos(__bfqq->next_rq) < sector)
++		node = rb_next(&__bfqq->pos_node);
++	else
++		node = rb_prev(&__bfqq->pos_node);
++	if (node == NULL)
++		return NULL;
++
++	__bfqq = rb_entry(node, struct bfq_queue, pos_node);
++	if (bfq_rq_close(bfqd, __bfqq->next_rq))
++		return __bfqq;
++
++	return NULL;
++}
++
++/*
++ * bfqd - obvious
++ * cur_bfqq - passed in so that we don't decide that the current queue
++ *            is closely cooperating with itself.
++ *
++ * We are assuming that cur_bfqq has dispatched at least one request,
++ * and that bfqd->last_position reflects a position on the disk associated
++ * with the I/O issued by cur_bfqq.
++ */
++static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,
++					      struct bfq_queue *cur_bfqq)
++{
++	struct bfq_queue *bfqq;
++
++	if (bfq_class_idle(cur_bfqq))
++		return NULL;
++	if (!bfq_bfqq_sync(cur_bfqq))
++		return NULL;
++	if (BFQQ_SEEKY(cur_bfqq))
++		return NULL;
++
++	/* If device has only one backlogged bfq_queue, don't search. */
++	if (bfqd->busy_queues == 1)
++		return NULL;
++
++	/*
++	 * We should notice if some of the queues are cooperating, e.g.
++	 * working closely on the same area of the disk. In that case,
++	 * we can group them together and don't waste time idling.
++	 */
++	bfqq = bfqq_close(bfqd);
++	if (bfqq == NULL || bfqq == cur_bfqq)
++		return NULL;
++
++	/*
++	 * Do not merge queues from different bfq_groups.
++	*/
++	if (bfqq->entity.parent != cur_bfqq->entity.parent)
++		return NULL;
++
++	/*
++	 * It only makes sense to merge sync queues.
++	 */
++	if (!bfq_bfqq_sync(bfqq))
++		return NULL;
++	if (BFQQ_SEEKY(bfqq))
++		return NULL;
++
++	/*
++	 * Do not merge queues of different priority classes.
++	 */
++	if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq))
++		return NULL;
++
++	return bfqq;
++}
++
++/*
++ * If enough samples have been computed, return the current max budget
++ * stored in bfqd, which is dynamically updated according to the
++ * estimated disk peak rate; otherwise return the default max budget
++ */
++static inline unsigned long bfq_max_budget(struct bfq_data *bfqd)
++{
++	if (bfqd->budgets_assigned < 194)
++		return bfq_default_max_budget;
++	else
++		return bfqd->bfq_max_budget;
++}
++
++/*
++ * Return min budget, which is a fraction of the current or default
++ * max budget (trying with 1/32)
++ */
++static inline unsigned long bfq_min_budget(struct bfq_data *bfqd)
++{
++	if (bfqd->budgets_assigned < 194)
++		return bfq_default_max_budget;
++	else
++		return bfqd->bfq_max_budget / 32;
++}
++
++/*
++ * Decides whether idling should be done for given device and
++ * given active queue.
++ */
++static inline bool bfq_queue_nonrot_noidle(struct bfq_data *bfqd,
++					   struct bfq_queue *active_bfqq)
++{
++	if (active_bfqq == NULL)
++		return false;
++	/*
++	 * If device is SSD it has no seek penalty, disable idling; but
++	 * do so only if:
++	 * - device does not support queuing, otherwise we still have
++	 *   a problem with sync vs async workloads;
++	 * - the queue is not weight-raised, to preserve guarantees.
++	 */
++	return (blk_queue_nonrot(bfqd->queue) && bfqd->hw_tag &&
++		active_bfqq->raising_coeff == 1);
++}
++
++static void bfq_arm_slice_timer(struct bfq_data *bfqd)
++{
++	struct bfq_queue *bfqq = bfqd->active_queue;
++	struct cfq_io_context *cic;
++	unsigned long sl;
++
++	WARN_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
++
++	if (bfq_queue_nonrot_noidle(bfqd, bfqq))
++		return;
++
++	/* Idling is disabled, either manually or by past process history. */
++	if (bfqd->bfq_slice_idle == 0 || !bfq_bfqq_idle_window(bfqq))
++		return;
++
++	/* Tasks have exited, don't wait. */
++	cic = bfqd->active_cic;
++	if (cic == NULL || atomic_read(&cic->ioc->nr_tasks) == 0)
++		return;
++
++	bfq_mark_bfqq_wait_request(bfqq);
++
++	/*
++	 * We don't want to idle for seeks, but we do want to allow
++	 * fair distribution of slice time for a process doing back-to-back
++	 * seeks. So allow a little bit of time for him to submit a new rq.
++	 *
++	 * To prevent processes with (partly) seeky workloads from
++	 * being too ill-treated, grant them a small fraction of the
++	 * assigned budget before reducing the waiting time to
++	 * BFQ_MIN_TT. This happened to help reduce latency.
++	 */
++	sl = bfqd->bfq_slice_idle;
++	if (bfq_sample_valid(bfqq->seek_samples) && BFQQ_SEEKY(bfqq) &&
++	    bfqq->entity.service > bfq_max_budget(bfqd) / 8 &&
++	    bfqq->raising_coeff == 1)
++		sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));
++	else if (bfqq->raising_coeff > 1)
++		sl = sl * 3;
++	bfqd->last_idling_start = ktime_get();
++	mod_timer(&bfqd->idle_slice_timer, jiffies + sl);
++	bfq_log(bfqd, "arm idle: %u/%u ms",
++		jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));
++}
++
++/*
++ * Set the maximum time for the active queue to consume its
++ * budget. This prevents seeky processes from lowering the disk
++ * throughput (always guaranteed with a time slice scheme as in CFQ).
++ */
++static void bfq_set_budget_timeout(struct bfq_data *bfqd)
++{
++	struct bfq_queue *bfqq = bfqd->active_queue;
++	unsigned int timeout_coeff;
++	if (bfqq->raising_cur_max_time == bfqd->bfq_raising_rt_max_time)
++		timeout_coeff = 1;
++	else
++		timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
++
++	bfqd->last_budget_start = ktime_get();
++
++	bfq_clear_bfqq_budget_new(bfqq);
++	bfqq->budget_timeout = jiffies +
++		bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff;
++
++	bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u",
++		jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] *
++		timeout_coeff));
++}
++
++/*
++ * Move request from internal lists to the request queue dispatch list.
++ */
++static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++
++	bfq_remove_request(rq);
++	bfqq->dispatched++;
++	elv_dispatch_sort(q, rq);
++
++	if (bfq_bfqq_sync(bfqq))
++		bfqd->sync_flight++;
++}
++
++/*
++ * Return expired entry, or NULL to just start from scratch in rbtree.
++ */
++static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
++{
++	struct request *rq = NULL;
++
++	if (bfq_bfqq_fifo_expire(bfqq))
++		return NULL;
++
++	bfq_mark_bfqq_fifo_expire(bfqq);
++
++	if (list_empty(&bfqq->fifo))
++		return NULL;
++
++	rq = rq_entry_fifo(bfqq->fifo.next);
++
++	if (time_before(jiffies, rq_fifo_time(rq)))
++		return NULL;
++
++	return rq;
++}
++
++/*
++ * Must be called with the queue_lock held.
++ */
++static int bfqq_process_refs(struct bfq_queue *bfqq)
++{
++	int process_refs, io_refs;
++
++	io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
++	process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
++	BUG_ON(process_refs < 0);
++	return process_refs;
++}
++
++static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
++{
++	int process_refs, new_process_refs;
++	struct bfq_queue *__bfqq;
++
++	/*
++	 * If there are no process references on the new_bfqq, then it is
++	 * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
++	 * may have dropped their last reference (not just their last process
++	 * reference).
++	 */
++	if (!bfqq_process_refs(new_bfqq))
++		return;
++
++	/* Avoid a circular list and skip interim queue merges. */
++	while ((__bfqq = new_bfqq->new_bfqq)) {
++		if (__bfqq == bfqq)
++			return;
++		new_bfqq = __bfqq;
++	}
++
++	process_refs = bfqq_process_refs(bfqq);
++	new_process_refs = bfqq_process_refs(new_bfqq);
++	/*
++	 * If the process for the bfqq has gone away, there is no
++	 * sense in merging the queues.
++	 */
++	if (process_refs == 0 || new_process_refs == 0)
++		return;
++
++	/*
++	 * Merge in the direction of the lesser amount of work.
++	 */
++	if (new_process_refs >= process_refs) {
++		bfqq->new_bfqq = new_bfqq;
++		atomic_add(process_refs, &new_bfqq->ref);
++	} else {
++		new_bfqq->new_bfqq = bfqq;
++		atomic_add(new_process_refs, &bfqq->ref);
++	}
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
++		new_bfqq->pid);
++}
++
++static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++	return entity->budget - entity->service;
++}
++
++static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	BUG_ON(bfqq != bfqd->active_queue);
++
++	__bfq_bfqd_reset_active(bfqd);
++
++	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
++		bfq_del_bfqq_busy(bfqd, bfqq, 1);
++		/*
++		 * overloading budget_timeout field to store when
++		 * the queue remains with no backlog, used by
++		 * the weight-raising mechanism
++		 */
++		bfqq->budget_timeout = jiffies ;
++	}
++	else {
++		bfq_activate_bfqq(bfqd, bfqq);
++		/*
++		 * Resort priority tree of potential close cooperators.
++		 */
++		bfq_rq_pos_tree_add(bfqd, bfqq);
++	}
++
++	/*
++	 * If this bfqq is shared between multiple processes, check
++	 * to make sure that those processes are still issuing I/Os
++	 * within the mean seek distance. If not, it may be time to
++	 * break the queues apart again.
++	 */
++	if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq))
++		bfq_mark_bfqq_split_coop(bfqq);
++}
++
++/**
++ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior.
++ * @bfqd: device data.
++ * @bfqq: queue to update.
++ * @reason: reason for expiration.
++ *
++ * Handle the feedback on @bfqq budget.  See the body for detailed
++ * comments.
++ */
++static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
++				     struct bfq_queue *bfqq,
++				     enum bfqq_expiration reason)
++{
++	struct request *next_rq;
++	unsigned long budget, min_budget;
++
++	budget = bfqq->max_budget;
++	min_budget = bfq_min_budget(bfqd);
++
++	BUG_ON(bfqq != bfqd->active_queue);
++
++	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu",
++		bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
++	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu",
++		budget, bfq_min_budget(bfqd));
++	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",
++		bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->active_queue));
++
++	if (bfq_bfqq_sync(bfqq)) {
++		switch (reason) {
++		/*
++		 * Caveat: in all the following cases we trade latency
++		 * for throughput.
++		 */
++		case BFQ_BFQQ_TOO_IDLE:
++			/*
++			 * This is the only case where we may reduce
++			 * the budget: if there is no requets of the
++			 * process still waiting for completion, then
++			 * we assume (tentatively) that the timer has
++			 * expired because the batch of requests of
++			 * the process could have been served with a
++			 * smaller budget.  Hence, betting that
++			 * process will behave in the same way when it
++			 * becomes backlogged again, we reduce its
++			 * next budget.  As long as we guess right,
++			 * this budget cut reduces the latency
++			 * experienced by the process.
++			 *
++			 * However, if there are still outstanding
++			 * requests, then the process may have not yet
++			 * issued its next request just because it is
++			 * still waiting for the completion of some of
++			 * the still oustanding ones.  So in this
++			 * subcase we do not reduce its budget, on the
++			 * contrary we increase it to possibly boost
++			 * the throughput, as discussed in the
++			 * comments to the BUDGET_TIMEOUT case.
++			 */
++			if (bfqq->dispatched > 0) /* still oustanding reqs */
++				budget = min(budget * 2, bfqd->bfq_max_budget);
++			else {
++				if (budget > 5 * min_budget)
++					budget -= 4 * min_budget;
++				else
++					budget = min_budget;
++			}
++			break;
++		case BFQ_BFQQ_BUDGET_TIMEOUT:
++			/*
++			 * We double the budget here because: 1) it
++			 * gives the chance to boost the throughput if
++			 * this is not a seeky process (which may have
++			 * bumped into this timeout because of, e.g.,
++			 * ZBR), 2) together with charge_full_budget
++			 * it helps give seeky processes higher
++			 * timestamps, and hence be served less
++			 * frequently.
++			 */
++			budget = min(budget * 2, bfqd->bfq_max_budget);
++			break;
++		case BFQ_BFQQ_BUDGET_EXHAUSTED:
++			/*
++			 * The process still has backlog, and did not
++			 * let either the budget timeout or the disk
++			 * idling timeout expire. Hence it is not
++			 * seeky, has a short thinktime and may be
++			 * happy with a higher budget too. So
++			 * definitely increase the budget of this good
++			 * candidate to boost the disk throughput.
++			 */
++			budget = min(budget * 4, bfqd->bfq_max_budget);
++			break;
++		case BFQ_BFQQ_NO_MORE_REQUESTS:
++		       /*
++			* Leave the budget unchanged.
++			*/
++		default:
++			return;
++		}
++	} else /* async queue */
++	    /* async queues get always the maximum possible budget
++	     * (their ability to dispatch is limited by
++	     * @bfqd->bfq_max_budget_async_rq).
++	     */
++		budget = bfqd->bfq_max_budget;
++
++	bfqq->max_budget = budget;
++
++	if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 &&
++	    bfqq->max_budget > bfqd->bfq_max_budget)
++		bfqq->max_budget = bfqd->bfq_max_budget;
++
++	/*
++	 * Make sure that we have enough budget for the next request.
++	 * Since the finish time of the bfqq must be kept in sync with
++	 * the budget, be sure to call __bfq_bfqq_expire() after the
++	 * update.
++	 */
++	next_rq = bfqq->next_rq;
++	if (next_rq != NULL)
++		bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget,
++					    bfq_serv_to_charge(next_rq, bfqq));
++	else
++		bfqq->entity.budget = bfqq->max_budget;
++
++	bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu",
++			next_rq != NULL ? blk_rq_sectors(next_rq) : 0,
++			bfqq->entity.budget);
++}
++
++static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout)
++{
++	unsigned long max_budget;
++
++	/*
++	 * The max_budget calculated when autotuning is equal to the
++	 * amount of sectors transfered in timeout_sync at the
++	 * estimated peak rate.
++	 */
++	max_budget = (unsigned long)(peak_rate * 1000 *
++				     timeout >> BFQ_RATE_SHIFT);
++
++	return max_budget;
++}
++
++/*
++ * In addition to updating the peak rate, checks whether the process
++ * is "slow", and returns 1 if so. This slow flag is used, in addition
++ * to the budget timeout, to reduce the amount of service provided to
++ * seeky processes, and hence reduce their chances to lower the
++ * throughput. See the code for more details.
++ */
++static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++				int compensate, enum bfqq_expiration reason)
++{
++	u64 bw, usecs, expected, timeout;
++	ktime_t delta;
++	int update = 0;
++
++	if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq))
++		return 0;
++
++	if (compensate)
++		delta = bfqd->last_idling_start;
++	else
++		delta = ktime_get();
++	delta = ktime_sub(delta, bfqd->last_budget_start);
++	usecs = ktime_to_us(delta);
++
++	/* Don't trust short/unrealistic values. */
++	if (usecs < 100 || usecs >= LONG_MAX)
++		return 0;
++
++	/*
++	 * Calculate the bandwidth for the last slice.  We use a 64 bit
++	 * value to store the peak rate, in sectors per usec in fixed
++	 * point math.  We do so to have enough precision in the estimate
++	 * and to avoid overflows.
++	 */
++	bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT;
++	do_div(bw, (unsigned long)usecs);
++
++	timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
++
++	/*
++	 * Use only long (> 20ms) intervals to filter out spikes for
++	 * the peak rate estimation.
++	 */
++	if (usecs > 20000) {
++		if (bw > bfqd->peak_rate ||
++		   (!BFQQ_SEEKY(bfqq) &&
++		    reason == BFQ_BFQQ_BUDGET_TIMEOUT)) {
++			bfq_log(bfqd, "measured bw =%llu", bw);
++			/*
++			 * To smooth oscillations use a low-pass filter with
++			 * alpha=7/8, i.e.,
++			 * new_rate = (7/8) * old_rate + (1/8) * bw
++			 */
++			do_div(bw, 8);
++			bfqd->peak_rate *= 7;
++			do_div(bfqd->peak_rate, 8);
++			bfqd->peak_rate += bw;
++			update = 1;
++			bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate);
++		}
++
++		update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1;
++
++		if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES)
++			bfqd->peak_rate_samples++;
++
++		if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES &&
++		    update && bfqd->bfq_user_max_budget == 0) {
++			bfqd->bfq_max_budget =
++				bfq_calc_max_budget(bfqd->peak_rate, timeout);
++			bfq_log(bfqd, "new max_budget=%lu",
++				bfqd->bfq_max_budget);
++		}
++	}
++
++	/*
++	 * If the process has been served for a too short time
++	 * interval to let its possible sequential accesses prevail on
++	 * the initial seek time needed to move the disk head on the
++	 * first sector it requested, then give the process a chance
++	 * and for the moment return false.
++	 */
++	if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8)
++		return 0;
++
++	/*
++	 * A process is considered ``slow'' (i.e., seeky, so that we
++	 * cannot treat it fairly in the service domain, as it would
++	 * slow down too much the other processes) if, when a slice
++	 * ends for whatever reason, it has received service at a
++	 * rate that would not be high enough to complete the budget
++	 * before the budget timeout expiration.
++	 */
++	expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT;
++
++	/*
++	 * Caveat: processes doing IO in the slower disk zones will
++	 * tend to be slow(er) even if not seeky. And the estimated
++	 * peak rate will actually be an average over the disk
++	 * surface. Hence, to not be too harsh with unlucky processes,
++	 * we keep a budget/3 margin of safety before declaring a
++	 * process slow.
++	 */
++	return expected > (4 * bfqq->entity.budget) / 3;
++}
++
++/**
++ * bfq_bfqq_expire - expire a queue.
++ * @bfqd: device owning the queue.
++ * @bfqq: the queue to expire.
++ * @compensate: if true, compensate for the time spent idling.
++ * @reason: the reason causing the expiration.
++ *
++ *
++ * If the process associated to the queue is slow (i.e., seeky), or in
++ * case of budget timeout, or, finally, if it is async, we
++ * artificially charge it an entire budget (independently of the
++ * actual service it received). As a consequence, the queue will get
++ * higher timestamps than the correct ones upon reactivation, and
++ * hence it will be rescheduled as if it had received more service
++ * than what it actually received. In the end, this class of processes
++ * will receive less service in proportion to how slowly they consume
++ * their budgets (and hence how seriously they tend to lower the
++ * throughput).
++ *
++ * In contrast, when a queue expires because it has been idling for
++ * too much or because it exhausted its budget, we do not touch the
++ * amount of service it has received. Hence when the queue will be
++ * reactivated and its timestamps updated, the latter will be in sync
++ * with the actual service received by the queue until expiration.
++ *
++ * Charging a full budget to the first type of queues and the exact
++ * service to the others has the effect of using the WF2Q+ policy to
++ * schedule the former on a timeslice basis, without violating the
++ * service domain guarantees of the latter.
++ */
++static void bfq_bfqq_expire(struct bfq_data *bfqd,
++			    struct bfq_queue *bfqq,
++			    int compensate,
++			    enum bfqq_expiration reason)
++{
++	int slow;
++	BUG_ON(bfqq != bfqd->active_queue);
++
++	/* Update disk peak rate for autotuning and check whether the
++	 * process is slow (see bfq_update_peak_rate).
++	 */
++	slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason);
++
++	/*
++	 * As above explained, 'punish' slow (i.e., seeky), timed-out
++	 * and async queues, to favor sequential sync workloads.
++	 *
++	 * Processes doing IO in the slower disk zones will tend to be
++	 * slow(er) even if not seeky. Hence, since the estimated peak
++	 * rate is actually an average over the disk surface, these
++	 * processes may timeout just for bad luck. To avoid punishing
++	 * them we do not charge a full budget to a process that
++	 * succeeded in consuming at least 2/3 of its budget.
++	 */
++	if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
++		     bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3))
++		bfq_bfqq_charge_full_budget(bfqq);
++
++	if (bfqd->low_latency && bfqq->raising_coeff == 1)
++		bfqq->last_rais_start_finish = jiffies;
++
++	if (bfqd->low_latency && bfqd->bfq_raising_max_softrt_rate > 0) {
++	    if(reason != BFQ_BFQQ_BUDGET_TIMEOUT)
++		bfqq->soft_rt_next_start =
++			jiffies +
++			HZ * bfqq->entity.service /
++			bfqd->bfq_raising_max_softrt_rate;
++		else
++			bfqq->soft_rt_next_start = -1; /* infinity */
++	}
++	bfq_log_bfqq(bfqd, bfqq,
++		"expire (%d, slow %d, num_disp %d, idle_win %d)", reason, slow,
++		bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
++
++	/* Increase, decrease or leave budget unchanged according to reason */
++	__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
++	__bfq_bfqq_expire(bfqd, bfqq);
++}
++
++/*
++ * Budget timeout is not implemented through a dedicated timer, but
++ * just checked on request arrivals and completions, as well as on
++ * idle timer expirations.
++ */
++static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
++{
++	if (bfq_bfqq_budget_new(bfqq))
++		return 0;
++
++	if (time_before(jiffies, bfqq->budget_timeout))
++		return 0;
++
++	return 1;
++}
++
++/*
++ * If we expire a queue that is waiting for the arrival of a new
++ * request, we may prevent the fictitious timestamp backshifting that
++ * allows the guarantees of the queue to be preserved (see [1] for
++ * this tricky aspect). Hence we return true only if this condition
++ * does not hold, or if the queue is slow enough to deserve only to be
++ * kicked off for preserving a high throughput.
++*/
++static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
++{
++	bfq_log_bfqq(bfqq->bfqd, bfqq,
++		"may_budget_timeout: wr %d left %d timeout %d",
++		bfq_bfqq_wait_request(bfqq),
++			bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3,
++		bfq_bfqq_budget_timeout(bfqq));
++
++	return (!bfq_bfqq_wait_request(bfqq) ||
++		bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3)
++		&&
++		bfq_bfqq_budget_timeout(bfqq);
++}
++
++/*
++ * Select a queue for service.  If we have a current active queue,
++ * check whether to continue servicing it, or retrieve and set a new one.
++ */
++static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
++{
++	struct bfq_queue *bfqq, *new_bfqq = NULL;
++	struct request *next_rq;
++	enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;
++
++	bfqq = bfqd->active_queue;
++	if (bfqq == NULL)
++		goto new_queue;
++
++	bfq_log_bfqq(bfqd, bfqq, "select_queue: already active queue");
++
++	/*
++         * If another queue has a request waiting within our mean seek
++         * distance, let it run. The expire code will check for close
++         * cooperators and put the close queue at the front of the
++         * service tree. If possible, merge the expiring queue with the
++         * new bfqq.
++         */
++        new_bfqq = bfq_close_cooperator(bfqd, bfqq);
++        if (new_bfqq != NULL && bfqq->new_bfqq == NULL)
++                bfq_setup_merge(bfqq, new_bfqq);
++
++	if (bfq_may_expire_for_budg_timeout(bfqq))
++		goto expire;
++
++	next_rq = bfqq->next_rq;
++	/*
++	 * If bfqq has requests queued and it has enough budget left to
++	 * serve them, keep the queue, otherwise expire it.
++	 */
++	if (next_rq != NULL) {
++		if (bfq_serv_to_charge(next_rq, bfqq) >
++			bfq_bfqq_budget_left(bfqq)) {
++			reason = BFQ_BFQQ_BUDGET_EXHAUSTED;
++			goto expire;
++		} else {
++			/*
++			 * The idle timer may be pending because we may not
++			 * disable disk idling even when a new request arrives
++			 */
++			if (timer_pending(&bfqd->idle_slice_timer)) {
++				/*
++				 * If we get here: 1) at least a new request
++				 * has arrived but we have not disabled the
++				 * timer because the request was too small,
++				 * 2) then the block layer has unplugged the
++				 * device, causing the dispatch to be invoked.
++				 *
++				 * Since the device is unplugged, now the
++				 * requests are probably large enough to
++				 * provide a reasonable throughput.
++				 * So we disable idling.
++				 */
++				bfq_clear_bfqq_wait_request(bfqq);
++				del_timer(&bfqd->idle_slice_timer);
++			}
++			if (new_bfqq == NULL)
++				goto keep_queue;
++			else
++				goto expire;
++		}
++	}
++
++	/*
++	 * No requests pending.  If there is no cooperator, and the active
++	 * queue still has requests in flight or is idling for a new request,
++	 * then keep it.
++	 */
++	if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) ||
++		(bfqq->dispatched != 0 && bfq_bfqq_idle_window(bfqq) &&
++		 !bfq_queue_nonrot_noidle(bfqd, bfqq)))) {
++		bfqq = NULL;
++		goto keep_queue;
++	} else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) {
++		/*
++		 * Expiring the queue because there is a close cooperator,
++		 * cancel timer.
++		 */
++		bfq_clear_bfqq_wait_request(bfqq);
++		del_timer(&bfqd->idle_slice_timer);
++	}
++
++	reason = BFQ_BFQQ_NO_MORE_REQUESTS;
++expire:
++	bfq_bfqq_expire(bfqd, bfqq, 0, reason);
++new_queue:
++	bfqq = bfq_set_active_queue(bfqd, new_bfqq);
++	bfq_log(bfqd, "select_queue: new queue %d returned",
++		bfqq != NULL ? bfqq->pid : 0);
++keep_queue:
++	return bfqq;
++}
++
++static void update_raising_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	if (bfqq->raising_coeff > 1) { /* queue is being boosted */
++		struct bfq_entity *entity = &bfqq->entity;
++
++		bfq_log_bfqq(bfqd, bfqq,
++			"raising period dur %u/%u msec, "
++			"old raising coeff %u, w %d(%d)",
++			jiffies_to_msecs(jiffies -
++				bfqq->last_rais_start_finish),
++			jiffies_to_msecs(bfqq->raising_cur_max_time),
++			bfqq->raising_coeff,
++			bfqq->entity.weight, bfqq->entity.orig_weight);
++
++		BUG_ON(bfqq != bfqd->active_queue && entity->weight !=
++			entity->orig_weight * bfqq->raising_coeff);
++		if(entity->ioprio_changed)
++			bfq_log_bfqq(bfqd, bfqq,
++			"WARN: pending prio change");
++		/*
++		 * If too much time has elapsed from the beginning
++		 * of this weight-raising period and process is not soft
++		 * real-time, stop it
++		 */
++		if (jiffies - bfqq->last_rais_start_finish >
++			bfqq->raising_cur_max_time) {
++			int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 &&
++				bfqq->soft_rt_next_start < jiffies;
++
++			bfqq->last_rais_start_finish = jiffies;
++			if (soft_rt)
++				bfqq->raising_cur_max_time =
++					bfqd->bfq_raising_rt_max_time;
++			else {
++				bfqq->raising_coeff = 1;
++				entity->ioprio_changed = 1;
++				__bfq_entity_update_weight_prio(
++					bfq_entity_service_tree(entity),
++					entity);
++			}
++		}
++	}
++}
++
++
++/*
++ * Dispatch one request from bfqq, moving it to the request queue
++ * dispatch list.
++ */
++static int bfq_dispatch_request(struct bfq_data *bfqd,
++				struct bfq_queue *bfqq)
++{
++	int dispatched = 0;
++	struct request *rq;
++	unsigned long service_to_charge;
++
++	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
++
++	/* Follow expired path, else get first next available. */
++	rq = bfq_check_fifo(bfqq);
++	if (rq == NULL)
++		rq = bfqq->next_rq;
++	service_to_charge = bfq_serv_to_charge(rq, bfqq);
++
++	if (service_to_charge > bfq_bfqq_budget_left(bfqq)) {
++		/*
++		 * This may happen if the next rq is chosen
++		 * in fifo order instead of sector order.
++		 * The budget is properly dimensioned
++		 * to be always sufficient to serve the next request
++		 * only if it is chosen in sector order. The reason is
++		 * that it would be quite inefficient and little useful
++		 * to always make sure that the budget is large enough
++		 * to serve even the possible next rq in fifo order.
++		 * In fact, requests are seldom served in fifo order.
++		 *
++		 * Expire the queue for budget exhaustion, and
++		 * make sure that the next act_budget is enough
++		 * to serve the next request, even if it comes
++		 * from the fifo expired path.
++		 */
++		bfqq->next_rq = rq;
++		/*
++		 * Since this dispatch is failed, make sure that
++		 * a new one will be performed
++		 */
++		if (!bfqd->rq_in_driver)
++			bfq_schedule_dispatch(bfqd);
++		goto expire;
++	}
++
++	/* Finally, insert request into driver dispatch list. */
++	bfq_bfqq_served(bfqq, service_to_charge);
++	bfq_dispatch_insert(bfqd->queue, rq);
++
++	update_raising_data(bfqd, bfqq);
++
++	bfq_log_bfqq(bfqd, bfqq, "dispatched %u sec req (%llu), "
++			"budg left %lu",
++			blk_rq_sectors(rq),
++			(long long unsigned)blk_rq_pos(rq),
++			bfq_bfqq_budget_left(bfqq));
++
++	dispatched++;
++
++	if (bfqd->active_cic == NULL) {
++		atomic_long_inc(&RQ_CIC(rq)->ioc->refcount);
++		bfqd->active_cic = RQ_CIC(rq);
++	}
++
++	if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) &&
++	    dispatched >= bfqd->bfq_max_budget_async_rq) ||
++	    bfq_class_idle(bfqq)))
++		goto expire;
++
++	return dispatched;
++
++expire:
++	bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED);
++	return dispatched;
++}
++
++static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq)
++{
++	int dispatched = 0;
++
++	while (bfqq->next_rq != NULL) {
++		bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq);
++		dispatched++;
++	}
++
++	BUG_ON(!list_empty(&bfqq->fifo));
++	return dispatched;
++}
++
++/*
++ * Drain our current requests.  Used for barriers and when switching
++ * io schedulers on-the-fly.
++ */
++static int bfq_forced_dispatch(struct bfq_data *bfqd)
++{
++	struct bfq_queue *bfqq, *n;
++	struct bfq_service_tree *st;
++	int dispatched = 0;
++
++	bfqq = bfqd->active_queue;
++	if (bfqq != NULL)
++		__bfq_bfqq_expire(bfqd, bfqq);
++
++	/*
++	 * Loop through classes, and be careful to leave the scheduler
++	 * in a consistent state, as feedback mechanisms and vtime
++	 * updates cannot be disabled during the process.
++	 */
++	list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) {
++		st = bfq_entity_service_tree(&bfqq->entity);
++
++		dispatched += __bfq_forced_dispatch_bfqq(bfqq);
++		bfqq->max_budget = bfq_max_budget(bfqd);
++
++		bfq_forget_idle(st);
++	}
++
++	BUG_ON(bfqd->busy_queues != 0);
++
++	return dispatched;
++}
++
++static int bfq_dispatch_requests(struct request_queue *q, int force)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct bfq_queue *bfqq;
++	int max_dispatch;
++
++	bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues);
++	if (bfqd->busy_queues == 0)
++		return 0;
++
++	if (unlikely(force))
++		return bfq_forced_dispatch(bfqd);
++
++	if((bfqq = bfq_select_queue(bfqd)) == NULL)
++		return 0;
++
++	max_dispatch = bfqd->bfq_quantum;
++	if (bfq_class_idle(bfqq))
++		max_dispatch = 1;
++
++	if (!bfq_bfqq_sync(bfqq))
++		max_dispatch = bfqd->bfq_max_budget_async_rq;
++
++	if (bfqq->dispatched >= max_dispatch) {
++		if (bfqd->busy_queues > 1)
++			return 0;
++		if (bfqq->dispatched >= 4 * max_dispatch)
++			return 0;
++	}
++
++	if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq))
++		return 0;
++
++	bfq_clear_bfqq_wait_request(bfqq);
++	BUG_ON(timer_pending(&bfqd->idle_slice_timer));
++
++	if (! bfq_dispatch_request(bfqd, bfqq))
++		return 0;
++
++	bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d"
++		     "(max_disp %d)", bfqq->pid, max_dispatch);
++
++	return 1;
++}
++
++/*
++ * Task holds one reference to the queue, dropped when task exits.  Each rq
++ * in-flight on this queue also holds a reference, dropped when rq is freed.
++ *
++ * Queue lock must be held here.
++ */
++static void bfq_put_queue(struct bfq_queue *bfqq)
++{
++	struct bfq_data *bfqd = bfqq->bfqd;
++
++	BUG_ON(atomic_read(&bfqq->ref) <= 0);
++
++	bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq,
++		     atomic_read(&bfqq->ref));
++	if (!atomic_dec_and_test(&bfqq->ref))
++		return;
++
++	BUG_ON(rb_first(&bfqq->sort_list) != NULL);
++	BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0);
++	BUG_ON(bfqq->entity.tree != NULL);
++	BUG_ON(bfq_bfqq_busy(bfqq));
++	BUG_ON(bfqd->active_queue == bfqq);
++
++	bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq);
++
++	kmem_cache_free(bfq_pool, bfqq);
++}
++
++static void bfq_put_cooperator(struct bfq_queue *bfqq)
++{
++	struct bfq_queue *__bfqq, *next;
++
++	/*
++	 * If this queue was scheduled to merge with another queue, be
++	 * sure to drop the reference taken on that queue (and others in
++	 * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs.
++	 */
++	__bfqq = bfqq->new_bfqq;
++	while (__bfqq) {
++		if (__bfqq == bfqq) {
++			WARN(1, "bfqq->new_bfqq loop detected.\n");
++			break;
++		}
++		next = __bfqq->new_bfqq;
++		bfq_put_queue(__bfqq);
++		__bfqq = next;
++	}
++}
++
++static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	if (bfqq == bfqd->active_queue) {
++		__bfq_bfqq_expire(bfqd, bfqq);
++		bfq_schedule_dispatch(bfqd);
++	}
++
++	bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq,
++		     atomic_read(&bfqq->ref));
++
++	bfq_put_cooperator(bfqq);
++
++	bfq_put_queue(bfqq);
++}
++
++/*
++ * Update the entity prio values; note that the new values will not
++ * be used until the next (re)activation.
++ */
++static void bfq_init_prio_data(struct bfq_queue *bfqq, struct io_context *ioc)
++{
++	struct task_struct *tsk = current;
++	int ioprio_class;
++
++	if (!bfq_bfqq_prio_changed(bfqq))
++		return;
++
++	ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio);
++	switch (ioprio_class) {
++	default:
++		printk(KERN_ERR "bfq: bad prio %x\n", ioprio_class);
++	case IOPRIO_CLASS_NONE:
++		/*
++		 * No prio set, inherit CPU scheduling settings.
++		 */
++		bfqq->entity.new_ioprio = task_nice_ioprio(tsk);
++		bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk);
++		break;
++	case IOPRIO_CLASS_RT:
++		bfqq->entity.new_ioprio = task_ioprio(ioc);
++		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT;
++		break;
++	case IOPRIO_CLASS_BE:
++		bfqq->entity.new_ioprio = task_ioprio(ioc);
++		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE;
++		break;
++	case IOPRIO_CLASS_IDLE:
++		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE;
++		bfqq->entity.new_ioprio = 7;
++		bfq_clear_bfqq_idle_window(bfqq);
++		break;
++	}
++
++	bfqq->entity.ioprio_changed = 1;
++
++	/*
++	 * Keep track of original prio settings in case we have to temporarily
++	 * elevate the priority of this queue.
++	 */
++	bfqq->org_ioprio = bfqq->entity.new_ioprio;
++	bfq_clear_bfqq_prio_changed(bfqq);
++}
++
++static void bfq_changed_ioprio(struct io_context *ioc,
++			       struct cfq_io_context *cic)
++{
++	struct bfq_data *bfqd;
++	struct bfq_queue *bfqq, *new_bfqq;
++	struct bfq_group *bfqg;
++	unsigned long uninitialized_var(flags);
++
++	bfqd = bfq_get_bfqd_locked(&cic->key, &flags);
++	if (unlikely(bfqd == NULL))
++		return;
++
++	bfqq = cic->cfqq[BLK_RW_ASYNC];
++	if (bfqq != NULL) {
++		bfqg = container_of(bfqq->entity.sched_data, struct bfq_group,
++				    sched_data);
++		new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, cic->ioc,
++					 GFP_ATOMIC);
++		if (new_bfqq != NULL) {
++			cic->cfqq[BLK_RW_ASYNC] = new_bfqq;
++			bfq_log_bfqq(bfqd, bfqq,
++				     "changed_ioprio: bfqq %p %d",
++				     bfqq, atomic_read(&bfqq->ref));
++			bfq_put_queue(bfqq);
++		}
++	}
++
++	bfqq = cic->cfqq[BLK_RW_SYNC];
++	if (bfqq != NULL)
++		bfq_mark_bfqq_prio_changed(bfqq);
++
++	bfq_put_bfqd_unlock(bfqd, &flags);
++}
++
++static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++			  pid_t pid, int is_sync)
++{
++	RB_CLEAR_NODE(&bfqq->entity.rb_node);
++	INIT_LIST_HEAD(&bfqq->fifo);
++
++	atomic_set(&bfqq->ref, 0);
++	bfqq->bfqd = bfqd;
++
++	bfq_mark_bfqq_prio_changed(bfqq);
++
++	if (is_sync) {
++		if (!bfq_class_idle(bfqq))
++			bfq_mark_bfqq_idle_window(bfqq);
++		bfq_mark_bfqq_sync(bfqq);
++	}
++
++	/* Tentative initial value to trade off between thr and lat */
++	bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;
++	bfqq->pid = pid;
++
++	bfqq->raising_coeff = 1;
++	bfqq->last_rais_start_finish = 0;
++	bfqq->soft_rt_next_start = -1;
++}
++
++static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd,
++					      struct bfq_group *bfqg,
++					      int is_sync,
++					      struct io_context *ioc,
++					      gfp_t gfp_mask)
++{
++	struct bfq_queue *bfqq, *new_bfqq = NULL;
++	struct cfq_io_context *cic;
++
++retry:
++	cic = bfq_cic_lookup(bfqd, ioc);
++	/* cic always exists here */
++	bfqq = cic_to_bfqq(cic, is_sync);
++
++	/*
++	 * Always try a new alloc if we fall back to the OOM bfqq
++	 * originally, since it should just be a temporary situation.
++	 */
++	if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) {
++		bfqq = NULL;
++		if (new_bfqq != NULL) {
++			bfqq = new_bfqq;
++			new_bfqq = NULL;
++		} else if (gfp_mask & __GFP_WAIT) {
++			spin_unlock_irq(bfqd->queue->queue_lock);
++			new_bfqq = kmem_cache_alloc_node(bfq_pool,
++					gfp_mask | __GFP_ZERO,
++					bfqd->queue->node);
++			spin_lock_irq(bfqd->queue->queue_lock);
++			if (new_bfqq != NULL)
++				goto retry;
++		} else {
++			bfqq = kmem_cache_alloc_node(bfq_pool,
++					gfp_mask | __GFP_ZERO,
++					bfqd->queue->node);
++		}
++
++		if (bfqq != NULL) {
++			bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync);
++			bfq_log_bfqq(bfqd, bfqq, "allocated");
++		} else {
++			bfqq = &bfqd->oom_bfqq;
++			bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");
++		}
++
++		bfq_init_prio_data(bfqq, ioc);
++		bfq_init_entity(&bfqq->entity, bfqg);
++	}
++
++	if (new_bfqq != NULL)
++		kmem_cache_free(bfq_pool, new_bfqq);
++
++	return bfqq;
++}
++
++static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
++					       struct bfq_group *bfqg,
++					       int ioprio_class, int ioprio)
++{
++	switch (ioprio_class) {
++	case IOPRIO_CLASS_RT:
++		return &bfqg->async_bfqq[0][ioprio];
++	case IOPRIO_CLASS_BE:
++		return &bfqg->async_bfqq[1][ioprio];
++	case IOPRIO_CLASS_IDLE:
++		return &bfqg->async_idle_bfqq;
++	default:
++		BUG();
++	}
++}
++
++static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
++				       struct bfq_group *bfqg, int is_sync,
++				       struct io_context *ioc, gfp_t gfp_mask)
++{
++	const int ioprio = task_ioprio(ioc);
++	const int ioprio_class = task_ioprio_class(ioc);
++	struct bfq_queue **async_bfqq = NULL;
++	struct bfq_queue *bfqq = NULL;
++
++	if (!is_sync) {
++		async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
++						  ioprio);
++		bfqq = *async_bfqq;
++	}
++
++	if (bfqq == NULL)
++		bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, ioc, gfp_mask);
++
++	/*
++	 * Pin the queue now that it's allocated, scheduler exit will prune it.
++	 */
++	if (!is_sync && *async_bfqq == NULL) {
++		atomic_inc(&bfqq->ref);
++		bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d",
++			     bfqq, atomic_read(&bfqq->ref));
++		*async_bfqq = bfqq;
++	}
++
++	atomic_inc(&bfqq->ref);
++	bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq,
++		     atomic_read(&bfqq->ref));
++	return bfqq;
++}
++
++static void bfq_update_io_thinktime(struct bfq_data *bfqd,
++				    struct cfq_io_context *cic)
++{
++	unsigned long elapsed = jiffies - cic->ttime.last_end_request;
++	unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle);
++
++	cic->ttime.ttime_samples = (7*cic->ttime.ttime_samples + 256) / 8;
++	cic->ttime.ttime_total = (7*cic->ttime.ttime_total + 256*ttime) / 8;
++	cic->ttime.ttime_mean = (cic->ttime.ttime_total + 128) / cic->ttime.ttime_samples;
++}
++
++static void bfq_update_io_seektime(struct bfq_data *bfqd,
++				   struct bfq_queue *bfqq,
++				   struct request *rq)
++{
++	sector_t sdist;
++	u64 total;
++
++	if (bfqq->last_request_pos < blk_rq_pos(rq))
++		sdist = blk_rq_pos(rq) - bfqq->last_request_pos;
++	else
++		sdist = bfqq->last_request_pos - blk_rq_pos(rq);
++
++	/*
++	 * Don't allow the seek distance to get too large from the
++	 * odd fragment, pagein, etc.
++	 */
++	if (bfqq->seek_samples == 0) /* first request, not really a seek */
++		sdist = 0;
++	else if (bfqq->seek_samples <= 60) /* second & third seek */
++		sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024);
++	else
++		sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64);
++
++	bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8;
++	bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8;
++	total = bfqq->seek_total + (bfqq->seek_samples/2);
++	do_div(total, bfqq->seek_samples);
++	if (bfq_bfqq_coop(bfqq)) {
++		/*
++		 * If the mean seektime increases for a (non-seeky) shared
++		 * queue, some cooperator is likely to be idling too much.
++		 * On the contrary,  if it decreases, some cooperator has
++		 * probably waked up.
++		 *
++		 */
++		if ((sector_t)total < bfqq->seek_mean)
++			bfq_mark_bfqq_some_coop_idle(bfqq) ;
++		else if ((sector_t)total > bfqq->seek_mean)
++			bfq_clear_bfqq_some_coop_idle(bfqq) ;
++	}
++	bfqq->seek_mean = (sector_t)total;
++
++	bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist,
++			(u64)bfqq->seek_mean);
++}
++
++/*
++ * Disable idle window if the process thinks too long or seeks so much that
++ * it doesn't matter.
++ */
++static void bfq_update_idle_window(struct bfq_data *bfqd,
++				   struct bfq_queue *bfqq,
++				   struct cfq_io_context *cic)
++{
++	int enable_idle;
++
++	/* Don't idle for async or idle io prio class. */
++	if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
++		return;
++
++	enable_idle = bfq_bfqq_idle_window(bfqq);
++
++	if (atomic_read(&cic->ioc->nr_tasks) == 0 ||
++	    bfqd->bfq_slice_idle == 0 ||
++		(bfqd->hw_tag && BFQQ_SEEKY(bfqq) &&
++			bfqq->raising_coeff == 1))
++		enable_idle = 0;
++	else if (bfq_sample_valid(cic->ttime.ttime_samples)) {
++		if (cic->ttime.ttime_mean > bfqd->bfq_slice_idle &&
++			bfqq->raising_coeff == 1)
++			enable_idle = 0;
++		else
++			enable_idle = 1;
++	}
++	bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d",
++		enable_idle);
++
++	if (enable_idle)
++		bfq_mark_bfqq_idle_window(bfqq);
++	else
++		bfq_clear_bfqq_idle_window(bfqq);
++}
++
++/*
++ * Called when a new fs request (rq) is added to bfqq.  Check if there's
++ * something we should do about it.
++ */
++static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++			    struct request *rq)
++{
++	struct cfq_io_context *cic = RQ_CIC(rq);
++
++	if (rq->cmd_flags & REQ_META)
++		bfqq->meta_pending++;
++
++	bfq_update_io_thinktime(bfqd, cic);
++	bfq_update_io_seektime(bfqd, bfqq, rq);
++	if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
++	    !BFQQ_SEEKY(bfqq))
++		bfq_update_idle_window(bfqd, bfqq, cic);
++
++	bfq_log_bfqq(bfqd, bfqq,
++		     "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",
++		     bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq),
++		     (long long unsigned)bfqq->seek_mean);
++
++	bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
++
++	if (bfqq == bfqd->active_queue) {
++		/*
++		 * If there is just this request queued and the request
++		 * is small, just exit.
++		 * In this way, if the disk is being idled to wait for a new
++		 * request from the active queue, we avoid unplugging the
++		 * device now.
++		 *
++		 * By doing so, we spare the disk to be committed
++		 * to serve just a small request. On the contrary, we wait for
++		 * the block layer to decide when to unplug the device:
++		 * hopefully, new requests will be merged to this
++		 * one quickly, then the device will be unplugged
++		 * and larger requests will be dispatched.
++		 */
++	        if (bfqq->queued[rq_is_sync(rq)] == 1 &&
++		    blk_rq_sectors(rq) < 32) {
++		        return;
++		}
++		if (bfq_bfqq_wait_request(bfqq)) {
++			/*
++			 * If we are waiting for a request for this queue, let
++			 * it rip immediately and flag that we must not expire
++			 * this queue just now.
++			 */
++			bfq_clear_bfqq_wait_request(bfqq);
++			del_timer(&bfqd->idle_slice_timer);
++			/*
++			 * Here we can safely expire the queue, in
++			 * case of budget timeout, without wasting
++			 * guarantees
++			 */
++			if (bfq_bfqq_budget_timeout(bfqq))
++				bfq_bfqq_expire(bfqd, bfqq, 0,
++						BFQ_BFQQ_BUDGET_TIMEOUT);
++			__blk_run_queue(bfqd->queue);
++		}
++	}
++}
++
++static void bfq_insert_request(struct request_queue *q, struct request *rq)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++
++	assert_spin_locked(bfqd->queue->queue_lock);
++	bfq_init_prio_data(bfqq, RQ_CIC(rq)->ioc);
++
++	bfq_add_rq_rb(rq);
++
++	rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]);
++	list_add_tail(&rq->queuelist, &bfqq->fifo);
++
++	bfq_rq_enqueued(bfqd, bfqq, rq);
++}
++
++static void bfq_update_hw_tag(struct bfq_data *bfqd)
++{
++	bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver,
++				     bfqd->rq_in_driver);
++
++	if (bfqd->hw_tag == 1)
++		return;
++
++	/*
++	 * This sample is valid if the number of outstanding requests
++	 * is large enough to allow a queueing behavior.  Note that the
++	 * sum is not exact, as it's not taking into account deactivated
++	 * requests.
++	 */
++	if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD)
++		return;
++
++	if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES)
++		return;
++
++	bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD;
++	bfqd->max_rq_in_driver = 0;
++	bfqd->hw_tag_samples = 0;
++}
++
++static void bfq_completed_request(struct request_queue *q, struct request *rq)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++	struct bfq_data *bfqd = bfqq->bfqd;
++	const int sync = rq_is_sync(rq);
++
++	bfq_log_bfqq(bfqd, bfqq, "completed %u sects req (%d)",
++			blk_rq_sectors(rq), sync);
++
++	bfq_update_hw_tag(bfqd);
++
++	WARN_ON(!bfqd->rq_in_driver);
++	WARN_ON(!bfqq->dispatched);
++	bfqd->rq_in_driver--;
++	bfqq->dispatched--;
++
++	if (bfq_bfqq_sync(bfqq))
++		bfqd->sync_flight--;
++
++	if (sync)
++		RQ_CIC(rq)->ttime.last_end_request = jiffies;
++
++	/*
++	 * If this is the active queue, check if it needs to be expired,
++	 * or if we want to idle in case it has no pending requests.
++	 */
++	if (bfqd->active_queue == bfqq) {
++		if (bfq_bfqq_budget_new(bfqq))
++			bfq_set_budget_timeout(bfqd);
++
++		/* Idling is disabled also for cooperation issues:
++		 * 1) there is a close cooperator for the queue, or
++		 * 2) the queue is shared and some cooperator is likely
++		 *    to be idle (in this case, by not arming the idle timer,
++		 *    we try to slow down the queue, to prevent the zones
++		 *    of the disk accessed by the active cooperators to become
++		 *    too distant from the zone that will be accessed by the
++		 *    currently idle cooperators)
++		 */
++		if (bfq_may_expire_for_budg_timeout(bfqq))
++			bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT);
++		else if (sync &&
++			(bfqd->rq_in_driver == 0 ||
++				bfqq->raising_coeff > 1)
++			&& RB_EMPTY_ROOT(&bfqq->sort_list)
++			&& !bfq_close_cooperator(bfqd, bfqq)
++			&& (!bfq_bfqq_coop(bfqq) ||
++				!bfq_bfqq_some_coop_idle(bfqq)))
++			bfq_arm_slice_timer(bfqd);
++	}
++
++	if (!bfqd->rq_in_driver)
++		bfq_schedule_dispatch(bfqd);
++}
++
++static inline int __bfq_may_queue(struct bfq_queue *bfqq)
++{
++	if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) {
++		bfq_clear_bfqq_must_alloc(bfqq);
++		return ELV_MQUEUE_MUST;
++	}
++
++	return ELV_MQUEUE_MAY;
++}
++
++static int bfq_may_queue(struct request_queue *q, int rw)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct task_struct *tsk = current;
++	struct cfq_io_context *cic;
++	struct bfq_queue *bfqq;
++
++	/*
++	 * Don't force setup of a queue from here, as a call to may_queue
++	 * does not necessarily imply that a request actually will be queued.
++	 * So just lookup a possibly existing queue, or return 'may queue'
++	 * if that fails.
++	 */
++	cic = bfq_cic_lookup(bfqd, tsk->io_context);
++	if (cic == NULL)
++		return ELV_MQUEUE_MAY;
++
++	bfqq = cic_to_bfqq(cic, rw_is_sync(rw));
++	if (bfqq != NULL) {
++		bfq_init_prio_data(bfqq, cic->ioc);
++
++		return __bfq_may_queue(bfqq);
++	}
++
++	return ELV_MQUEUE_MAY;
++}
++
++/*
++ * Queue lock held here.
++ */
++static void bfq_put_request(struct request *rq)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++
++	if (bfqq != NULL) {
++		const int rw = rq_data_dir(rq);
++
++		BUG_ON(!bfqq->allocated[rw]);
++		bfqq->allocated[rw]--;
++
++		put_io_context(RQ_CIC(rq)->ioc);
++
++		rq->elevator_private[0] = NULL;
++		rq->elevator_private[1] = NULL;
++
++		bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d",
++			     bfqq, atomic_read(&bfqq->ref));
++		bfq_put_queue(bfqq);
++	}
++}
++
++static struct bfq_queue *
++bfq_merge_bfqqs(struct bfq_data *bfqd, struct cfq_io_context *cic,
++                struct bfq_queue *bfqq)
++{
++        bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
++		(long unsigned)bfqq->new_bfqq->pid);
++        cic_set_bfqq(cic, bfqq->new_bfqq, 1);
++        bfq_mark_bfqq_coop(bfqq->new_bfqq);
++        bfq_put_queue(bfqq);
++        return cic_to_bfqq(cic, 1);
++}
++
++/*
++ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this
++ * was the last process referring to said bfqq.
++ */
++static struct bfq_queue *
++bfq_split_bfqq(struct cfq_io_context *cic, struct bfq_queue *bfqq)
++{
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");
++	if (bfqq_process_refs(bfqq) == 1) {
++		bfqq->pid = current->pid;
++		bfq_clear_bfqq_some_coop_idle(bfqq);
++		bfq_clear_bfqq_coop(bfqq);
++		bfq_clear_bfqq_split_coop(bfqq);
++		return bfqq;
++	}
++
++	cic_set_bfqq(cic, NULL, 1);
++
++	bfq_put_cooperator(bfqq);
++
++	bfq_put_queue(bfqq);
++	return NULL;
++}
++
++/*
++ * Allocate bfq data structures associated with this request.
++ */
++static int bfq_set_request(struct request_queue *q, struct request *rq,
++			   gfp_t gfp_mask)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct cfq_io_context *cic;
++	const int rw = rq_data_dir(rq);
++	const int is_sync = rq_is_sync(rq);
++	struct bfq_queue *bfqq;
++	struct bfq_group *bfqg;
++	unsigned long flags;
++
++	might_sleep_if(gfp_mask & __GFP_WAIT);
++
++	cic = bfq_get_io_context(bfqd, gfp_mask);
++
++	spin_lock_irqsave(q->queue_lock, flags);
++
++	if (cic == NULL)
++		goto queue_fail;
++
++	bfqg = bfq_cic_update_cgroup(cic);
++
++new_queue:
++	bfqq = cic_to_bfqq(cic, is_sync);
++	if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) {
++		bfqq = bfq_get_queue(bfqd, bfqg, is_sync, cic->ioc, gfp_mask);
++		cic_set_bfqq(cic, bfqq, is_sync);
++	} else {
++		/*
++		 * If the queue was seeky for too long, break it apart.
++		 */
++		if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
++			bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
++			bfqq = bfq_split_bfqq(cic, bfqq);
++			if (!bfqq)
++				goto new_queue;
++		}
++
++		/*
++		 * Check to see if this queue is scheduled to merge with
++		 * another closely cooperating queue. The merging of queues
++		 * happens here as it must be done in process context.
++		 * The reference on new_bfqq was taken in merge_bfqqs.
++		 */
++		if (bfqq->new_bfqq != NULL)
++			bfqq = bfq_merge_bfqqs(bfqd, cic, bfqq);
++	}
++
++	bfqq->allocated[rw]++;
++	atomic_inc(&bfqq->ref);
++	bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq,
++		     atomic_read(&bfqq->ref));
++
++	spin_unlock_irqrestore(q->queue_lock, flags);
++
++	rq->elevator_private[0] = cic;
++	rq->elevator_private[1] = bfqq;
++
++	return 0;
++
++queue_fail:
++	if (cic != NULL)
++		put_io_context(cic->ioc);
++
++	bfq_schedule_dispatch(bfqd);
++	spin_unlock_irqrestore(q->queue_lock, flags);
++
++	return 1;
++}
++
++static void bfq_kick_queue(struct work_struct *work)
++{
++	struct bfq_data *bfqd =
++		container_of(work, struct bfq_data, unplug_work);
++	struct request_queue *q = bfqd->queue;
++
++	spin_lock_irq(q->queue_lock);
++	__blk_run_queue(q);
++	spin_unlock_irq(q->queue_lock);
++}
++
++/*
++ * Handler of the expiration of the timer running if the active_queue
++ * is idling inside its time slice.
++ */
++static void bfq_idle_slice_timer(unsigned long data)
++{
++	struct bfq_data *bfqd = (struct bfq_data *)data;
++	struct bfq_queue *bfqq;
++	unsigned long flags;
++	enum bfqq_expiration reason;
++
++	spin_lock_irqsave(bfqd->queue->queue_lock, flags);
++
++	bfqq = bfqd->active_queue;
++	/*
++	 * Theoretical race here: active_queue can be NULL or different
++	 * from the queue that was idling if the timer handler spins on
++	 * the queue_lock and a new request arrives for the current
++	 * queue and there is a full dispatch cycle that changes the
++	 * active_queue.  This can hardly happen, but in the worst case
++	 * we just expire a queue too early.
++	 */
++	if (bfqq != NULL) {
++		bfq_log_bfqq(bfqd, bfqq, "slice_timer expired");
++		if (bfq_bfqq_budget_timeout(bfqq))
++			/*
++			 * Also here the queue can be safely expired
++			 * for budget timeout without wasting
++			 * guarantees
++			 */
++			reason = BFQ_BFQQ_BUDGET_TIMEOUT;
++		else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0)
++			/*
++			 * The queue may not be empty upon timer expiration,
++			 * because we may not disable the timer when the first
++			 * request of the active queue arrives during
++			 * disk idling
++			 */
++			reason = BFQ_BFQQ_TOO_IDLE;
++		else
++			goto schedule_dispatch;
++
++		bfq_bfqq_expire(bfqd, bfqq, 1, reason);
++	}
++
++schedule_dispatch:
++	bfq_schedule_dispatch(bfqd);
++
++	spin_unlock_irqrestore(bfqd->queue->queue_lock, flags);
++}
++
++static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
++{
++	del_timer_sync(&bfqd->idle_slice_timer);
++	cancel_work_sync(&bfqd->unplug_work);
++}
++
++static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd,
++					struct bfq_queue **bfqq_ptr)
++{
++	struct bfq_group *root_group = bfqd->root_group;
++	struct bfq_queue *bfqq = *bfqq_ptr;
++
++	bfq_log(bfqd, "put_async_bfqq: %p", bfqq);
++	if (bfqq != NULL) {
++		bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group);
++		bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d",
++			     bfqq, atomic_read(&bfqq->ref));
++		bfq_put_queue(bfqq);
++		*bfqq_ptr = NULL;
++	}
++}
++
++/*
++ * Release all the bfqg references to its async queues.  If we are
++ * deallocating the group these queues may still contain requests, so
++ * we reparent them to the root cgroup (i.e., the only one that will
++ * exist for sure untill all the requests on a device are gone).
++ */
++static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
++{
++	int i, j;
++
++	for (i = 0; i < 2; i++)
++		for (j = 0; j < IOPRIO_BE_NR; j++)
++			__bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]);
++
++	__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
++}
++
++static void bfq_exit_queue(struct elevator_queue *e)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	struct request_queue *q = bfqd->queue;
++	struct bfq_queue *bfqq, *n;
++	struct cfq_io_context *cic;
++
++	bfq_shutdown_timer_wq(bfqd);
++
++	spin_lock_irq(q->queue_lock);
++
++	while (!list_empty(&bfqd->cic_list)) {
++		cic = list_entry(bfqd->cic_list.next, struct cfq_io_context,
++				 queue_list);
++		__bfq_exit_single_io_context(bfqd, cic);
++	}
++
++	BUG_ON(bfqd->active_queue != NULL);
++	list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
++		bfq_deactivate_bfqq(bfqd, bfqq, 0);
++
++	bfq_disconnect_groups(bfqd);
++	spin_unlock_irq(q->queue_lock);
++
++	bfq_shutdown_timer_wq(bfqd);
++
++	spin_lock(&cic_index_lock);
++	ida_remove(&cic_index_ida, bfqd->cic_index);
++	spin_unlock(&cic_index_lock);
++
++	/* Wait for cic->key accessors to exit their grace periods. */
++	synchronize_rcu();
++
++	BUG_ON(timer_pending(&bfqd->idle_slice_timer));
++
++	bfq_free_root_group(bfqd);
++	kfree(bfqd);
++}
++
++static int bfq_alloc_cic_index(void)
++{
++	int index, error;
++
++	do {
++		if (!ida_pre_get(&cic_index_ida, GFP_KERNEL))
++			return -ENOMEM;
++
++		spin_lock(&cic_index_lock);
++		error = ida_get_new(&cic_index_ida, &index);
++		spin_unlock(&cic_index_lock);
++		if (error && error != -EAGAIN)
++			return error;
++	} while (error);
++
++	return index;
++}
++
++static void *bfq_init_queue(struct request_queue *q)
++{
++	struct bfq_group *bfqg;
++	struct bfq_data *bfqd;
++	int i;
++
++	i = bfq_alloc_cic_index();
++	if (i < 0)
++		return NULL;
++
++	bfqd = kmalloc_node(sizeof(*bfqd), GFP_KERNEL | __GFP_ZERO, q->node);
++	if (bfqd == NULL)
++		return NULL;
++
++	bfqd->cic_index = i;
++
++	/*
++	 * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
++	 * Grab a permanent reference to it, so that the normal code flow
++	 * will not attempt to free it.
++	 */
++	bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0);
++	atomic_inc(&bfqd->oom_bfqq.ref);
++
++	INIT_LIST_HEAD(&bfqd->cic_list);
++
++	bfqd->queue = q;
++
++	bfqg = bfq_alloc_root_group(bfqd, q->node);
++	if (bfqg == NULL) {
++		kfree(bfqd);
++		return NULL;
++	}
++
++	bfqd->root_group = bfqg;
++
++	init_timer(&bfqd->idle_slice_timer);
++	bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
++	bfqd->idle_slice_timer.data = (unsigned long)bfqd;
++
++	bfqd->rq_pos_tree = RB_ROOT;
++
++	INIT_WORK(&bfqd->unplug_work, bfq_kick_queue);
++
++	INIT_LIST_HEAD(&bfqd->active_list);
++	INIT_LIST_HEAD(&bfqd->idle_list);
++
++	bfqd->hw_tag = -1;
++
++	bfqd->bfq_max_budget = bfq_default_max_budget;
++
++	bfqd->bfq_quantum = bfq_quantum;
++	bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0];
++	bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1];
++	bfqd->bfq_back_max = bfq_back_max;
++	bfqd->bfq_back_penalty = bfq_back_penalty;
++	bfqd->bfq_slice_idle = bfq_slice_idle;
++	bfqd->bfq_class_idle_last_service = 0;
++	bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq;
++	bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async;
++	bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync;
++
++	bfqd->low_latency = true;
++
++	bfqd->bfq_raising_coeff = 20;
++	bfqd->bfq_raising_rt_max_time = msecs_to_jiffies(300);
++	bfqd->bfq_raising_max_time = 0;
++	bfqd->bfq_raising_min_idle_time = msecs_to_jiffies(2000);
++	bfqd->bfq_raising_min_inter_arr_async = msecs_to_jiffies(500);
++	bfqd->bfq_raising_max_softrt_rate = 7000;
++
++	/* Initially estimate the device's peak rate as the reference rate */
++	if (blk_queue_nonrot(bfqd->queue)) {
++		bfqd->RT_prod = R_nonrot * T_nonrot;
++		bfqd->peak_rate = R_nonrot;
++	} else {
++		bfqd->RT_prod = R_rot * T_rot;
++		bfqd->peak_rate = R_rot;
++	}
++
++	return bfqd;
++}
++
++static void bfq_slab_kill(void)
++{
++	if (bfq_pool != NULL)
++		kmem_cache_destroy(bfq_pool);
++	if (bfq_ioc_pool != NULL)
++		kmem_cache_destroy(bfq_ioc_pool);
++}
++
++static int __init bfq_slab_setup(void)
++{
++	bfq_pool = KMEM_CACHE(bfq_queue, 0);
++	if (bfq_pool == NULL)
++		goto fail;
++
++	bfq_ioc_pool = kmem_cache_create("bfq_io_context",
++					 sizeof(struct cfq_io_context),
++					 __alignof__(struct cfq_io_context),
++					 0, NULL);
++	if (bfq_ioc_pool == NULL)
++		goto fail;
++
++	return 0;
++fail:
++	bfq_slab_kill();
++	return -ENOMEM;
++}
++
++static ssize_t bfq_var_show(unsigned int var, char *page)
++{
++	return sprintf(page, "%d\n", var);
++}
++
++static ssize_t bfq_var_store(unsigned long *var, const char *page, size_t count)
++{
++	unsigned long new_val;
++	int ret = strict_strtoul(page, 10, &new_val);
++
++	if (ret == 0)
++		*var = new_val;
++
++	return count;
++}
++
++static ssize_t bfq_raising_max_time_show(struct elevator_queue *e, char *page)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	return sprintf(page, "%d\n", bfqd->bfq_raising_max_time > 0 ?
++		       bfqd->bfq_raising_max_time :
++		       bfq_wrais_duration(bfqd));
++}
++
++static ssize_t bfq_weights_show(struct elevator_queue *e, char *page)
++{
++	struct bfq_queue *bfqq;
++	struct bfq_data *bfqd = e->elevator_data;
++	ssize_t num_char = 0;
++
++	num_char += sprintf(page + num_char, "Active:\n");
++	list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) {
++		num_char += sprintf(page + num_char,
++			"pid%d: weight %hu, dur %d/%u\n",
++			bfqq->pid,
++			bfqq->entity.weight,
++			jiffies_to_msecs(jiffies -
++				bfqq->last_rais_start_finish),
++			jiffies_to_msecs(bfqq->raising_cur_max_time));
++	}
++	num_char += sprintf(page + num_char, "Idle:\n");
++	list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) {
++			num_char += sprintf(page + num_char,
++				"pid%d: weight %hu, dur %d/%u\n",
++				bfqq->pid,
++				bfqq->entity.weight,
++				jiffies_to_msecs(jiffies -
++					bfqq->last_rais_start_finish),
++				jiffies_to_msecs(bfqq->raising_cur_max_time));
++	}
++	return num_char;
++}
++
++#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
++static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
++{									\
++	struct bfq_data *bfqd = e->elevator_data;			\
++	unsigned int __data = __VAR;					\
++	if (__CONV)							\
++		__data = jiffies_to_msecs(__data);			\
++	return bfq_var_show(__data, (page));				\
++}
++SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0);
++SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1);
++SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1);
++SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
++SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
++SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1);
++SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
++SHOW_FUNCTION(bfq_max_budget_async_rq_show, bfqd->bfq_max_budget_async_rq, 0);
++SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1);
++SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1);
++SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
++SHOW_FUNCTION(bfq_raising_coeff_show, bfqd->bfq_raising_coeff, 0);
++SHOW_FUNCTION(bfq_raising_rt_max_time_show, bfqd->bfq_raising_rt_max_time, 1);
++SHOW_FUNCTION(bfq_raising_min_idle_time_show, bfqd->bfq_raising_min_idle_time,
++	1);
++SHOW_FUNCTION(bfq_raising_min_inter_arr_async_show,
++	      bfqd->bfq_raising_min_inter_arr_async,
++	      1);
++SHOW_FUNCTION(bfq_raising_max_softrt_rate_show,
++	bfqd->bfq_raising_max_softrt_rate, 0);
++#undef SHOW_FUNCTION
++
++#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
++static ssize_t								\
++__FUNC(struct elevator_queue *e, const char *page, size_t count)	\
++{									\
++	struct bfq_data *bfqd = e->elevator_data;			\
++	unsigned long __data;						\
++	int ret = bfq_var_store(&__data, (page), count);		\
++	if (__data < (MIN))						\
++		__data = (MIN);						\
++	else if (__data > (MAX))					\
++		__data = (MAX);						\
++	if (__CONV)							\
++		*(__PTR) = msecs_to_jiffies(__data);			\
++	else								\
++		*(__PTR) = __data;					\
++	return ret;							\
++}
++STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0);
++STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1,
++		INT_MAX, 1);
++STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1,
++		INT_MAX, 1);
++STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
++STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,
++		INT_MAX, 0);
++STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1);
++STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq,
++		1, INT_MAX, 0);
++STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0,
++		INT_MAX, 1);
++STORE_FUNCTION(bfq_raising_coeff_store, &bfqd->bfq_raising_coeff, 1,
++		INT_MAX, 0);
++STORE_FUNCTION(bfq_raising_max_time_store, &bfqd->bfq_raising_max_time, 0,
++		INT_MAX, 1);
++STORE_FUNCTION(bfq_raising_rt_max_time_store, &bfqd->bfq_raising_rt_max_time, 0,
++		INT_MAX, 1);
++STORE_FUNCTION(bfq_raising_min_idle_time_store,
++	       &bfqd->bfq_raising_min_idle_time, 0, INT_MAX, 1);
++STORE_FUNCTION(bfq_raising_min_inter_arr_async_store,
++	       &bfqd->bfq_raising_min_inter_arr_async, 0, INT_MAX, 1);
++STORE_FUNCTION(bfq_raising_max_softrt_rate_store,
++	       &bfqd->bfq_raising_max_softrt_rate, 0, INT_MAX, 0);
++#undef STORE_FUNCTION
++
++/* do nothing for the moment */
++static ssize_t bfq_weights_store(struct elevator_queue *e,
++				    const char *page, size_t count)
++{
++	return count;
++}
++
++static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)
++{
++	u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
++
++	if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES)
++		return bfq_calc_max_budget(bfqd->peak_rate, timeout);
++	else
++		return bfq_default_max_budget;
++}
++
++static ssize_t bfq_max_budget_store(struct elevator_queue *e,
++				    const char *page, size_t count)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	unsigned long __data;
++	int ret = bfq_var_store(&__data, (page), count);
++
++	if (__data == 0)
++		bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);
++	else {
++		if (__data > INT_MAX)
++			__data = INT_MAX;
++		bfqd->bfq_max_budget = __data;
++	}
++
++	bfqd->bfq_user_max_budget = __data;
++
++	return ret;
++}
++
++static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
++				      const char *page, size_t count)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	unsigned long __data;
++	int ret = bfq_var_store(&__data, (page), count);
++
++	if (__data < 1)
++		__data = 1;
++	else if (__data > INT_MAX)
++		__data = INT_MAX;
++
++	bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data);
++	if (bfqd->bfq_user_max_budget == 0)
++		bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);
++
++	return ret;
++}
++
++static ssize_t bfq_low_latency_store(struct elevator_queue *e,
++				     const char *page, size_t count)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	unsigned long __data;
++	int ret = bfq_var_store(&__data, (page), count);
++
++	if (__data > 1)
++		__data = 1;
++	bfqd->low_latency = __data;
++
++	return ret;
++}
++
++#define BFQ_ATTR(name) \
++	__ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store)
++
++static struct elv_fs_entry bfq_attrs[] = {
++	BFQ_ATTR(quantum),
++	BFQ_ATTR(fifo_expire_sync),
++	BFQ_ATTR(fifo_expire_async),
++	BFQ_ATTR(back_seek_max),
++	BFQ_ATTR(back_seek_penalty),
++	BFQ_ATTR(slice_idle),
++	BFQ_ATTR(max_budget),
++	BFQ_ATTR(max_budget_async_rq),
++	BFQ_ATTR(timeout_sync),
++	BFQ_ATTR(timeout_async),
++	BFQ_ATTR(low_latency),
++	BFQ_ATTR(raising_coeff),
++	BFQ_ATTR(raising_max_time),
++	BFQ_ATTR(raising_rt_max_time),
++	BFQ_ATTR(raising_min_idle_time),
++	BFQ_ATTR(raising_min_inter_arr_async),
++	BFQ_ATTR(raising_max_softrt_rate),
++	BFQ_ATTR(weights),
++	__ATTR_NULL
++};
++
++static struct elevator_type iosched_bfq = {
++	.ops = {
++		.elevator_merge_fn =		bfq_merge,
++		.elevator_merged_fn =		bfq_merged_request,
++		.elevator_merge_req_fn =	bfq_merged_requests,
++		.elevator_allow_merge_fn =	bfq_allow_merge,
++		.elevator_dispatch_fn =		bfq_dispatch_requests,
++		.elevator_add_req_fn =		bfq_insert_request,
++		.elevator_activate_req_fn =	bfq_activate_request,
++		.elevator_deactivate_req_fn =	bfq_deactivate_request,
++		.elevator_completed_req_fn =	bfq_completed_request,
++		.elevator_former_req_fn =	elv_rb_former_request,
++		.elevator_latter_req_fn =	elv_rb_latter_request,
++		.elevator_set_req_fn =		bfq_set_request,
++		.elevator_put_req_fn =		bfq_put_request,
++		.elevator_may_queue_fn =	bfq_may_queue,
++		.elevator_init_fn =		bfq_init_queue,
++		.elevator_exit_fn =		bfq_exit_queue,
++		.trim =				bfq_free_io_context,
++	},
++	.elevator_attrs =	bfq_attrs,
++	.elevator_name =	"bfq",
++	.elevator_owner =	THIS_MODULE,
++};
++
++static int __init bfq_init(void)
++{
++	/*
++	 * Can be 0 on HZ < 1000 setups.
++	 */
++	if (bfq_slice_idle == 0)
++		bfq_slice_idle = 1;
++
++	if (bfq_timeout_async == 0)
++		bfq_timeout_async = 1;
++
++	if (bfq_slab_setup())
++		return -ENOMEM;
++
++	elv_register(&iosched_bfq);
++
++	return 0;
++}
++
++static void __exit bfq_exit(void)
++{
++	DECLARE_COMPLETION_ONSTACK(all_gone);
++	elv_unregister(&iosched_bfq);
++	bfq_ioc_gone = &all_gone;
++	/* bfq_ioc_gone's update must be visible before reading bfq_ioc_count */
++	smp_wmb();
++	if (elv_ioc_count_read(bfq_ioc_count) != 0)
++		wait_for_completion(&all_gone);
++	ida_destroy(&cic_index_ida);
++	bfq_slab_kill();
++}
++
++module_init(bfq_init);
++module_exit(bfq_exit);
++
++MODULE_AUTHOR("Fabio Checconi, Paolo Valente");
++MODULE_LICENSE("GPL");
++MODULE_DESCRIPTION("Budget Fair Queueing IO scheduler");
+diff --git a/block/bfq-sched.c block/bfq-sched.c
+new file mode 100644
+index 0000000..fd50b7f
+--- /dev/null
++++ block/bfq-sched.c
+@@ -0,0 +1,1066 @@
++/*
++ * BFQ: Hierarchical B-WF2Q+ scheduler.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ */
++
++#ifdef CONFIG_CGROUP_BFQIO
++#define for_each_entity(entity)	\
++	for (; entity != NULL; entity = entity->parent)
++
++#define for_each_entity_safe(entity, parent) \
++	for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
++
++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
++						 int extract,
++						 struct bfq_data *bfqd);
++
++static inline void bfq_update_budget(struct bfq_entity *next_active)
++{
++	struct bfq_entity *bfqg_entity;
++	struct bfq_group *bfqg;
++	struct bfq_sched_data *group_sd;
++
++	BUG_ON(next_active == NULL);
++
++	group_sd = next_active->sched_data;
++
++	bfqg = container_of(group_sd, struct bfq_group, sched_data);
++	/*
++	 * bfq_group's my_entity field is not NULL only if the group
++	 * is not the root group. We must not touch the root entity
++	 * as it must never become an active entity.
++	 */
++	bfqg_entity = bfqg->my_entity;
++	if (bfqg_entity != NULL)
++		bfqg_entity->budget = next_active->budget;
++}
++
++static int bfq_update_next_active(struct bfq_sched_data *sd)
++{
++	struct bfq_entity *next_active;
++
++	if (sd->active_entity != NULL)
++		/* will update/requeue at the end of service */
++		return 0;
++
++	/*
++	 * NOTE: this can be improved in many ways, such as returning
++	 * 1 (and thus propagating upwards the update) only when the
++	 * budget changes, or caching the bfqq that will be scheduled
++	 * next from this subtree.  By now we worry more about
++	 * correctness than about performance...
++	 */
++	next_active = bfq_lookup_next_entity(sd, 0, NULL);
++	sd->next_active = next_active;
++
++	if (next_active != NULL)
++		bfq_update_budget(next_active);
++
++	return 1;
++}
++
++static inline void bfq_check_next_active(struct bfq_sched_data *sd,
++					 struct bfq_entity *entity)
++{
++	BUG_ON(sd->next_active != entity);
++}
++#else
++#define for_each_entity(entity)	\
++	for (; entity != NULL; entity = NULL)
++
++#define for_each_entity_safe(entity, parent) \
++	for (parent = NULL; entity != NULL; entity = parent)
++
++static inline int bfq_update_next_active(struct bfq_sched_data *sd)
++{
++	return 0;
++}
++
++static inline void bfq_check_next_active(struct bfq_sched_data *sd,
++					 struct bfq_entity *entity)
++{
++}
++
++static inline void bfq_update_budget(struct bfq_entity *next_active)
++{
++}
++#endif
++
++/*
++ * Shift for timestamp calculations.  This actually limits the maximum
++ * service allowed in one timestamp delta (small shift values increase it),
++ * the maximum total weight that can be used for the queues in the system
++ * (big shift values increase it), and the period of virtual time wraparounds.
++ */
++#define WFQ_SERVICE_SHIFT	22
++
++/**
++ * bfq_gt - compare two timestamps.
++ * @a: first ts.
++ * @b: second ts.
++ *
++ * Return @a > @b, dealing with wrapping correctly.
++ */
++static inline int bfq_gt(u64 a, u64 b)
++{
++	return (s64)(a - b) > 0;
++}
++
++static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = NULL;
++
++	BUG_ON(entity == NULL);
++
++	if (entity->my_sched_data == NULL)
++		bfqq = container_of(entity, struct bfq_queue, entity);
++
++	return bfqq;
++}
++
++
++/**
++ * bfq_delta - map service into the virtual time domain.
++ * @service: amount of service.
++ * @weight: scale factor (weight of an entity or weight sum).
++ */
++static inline u64 bfq_delta(unsigned long service,
++					unsigned long weight)
++{
++	u64 d = (u64)service << WFQ_SERVICE_SHIFT;
++
++	do_div(d, weight);
++	return d;
++}
++
++/**
++ * bfq_calc_finish - assign the finish time to an entity.
++ * @entity: the entity to act upon.
++ * @service: the service to be charged to the entity.
++ */
++static inline void bfq_calc_finish(struct bfq_entity *entity,
++				   unsigned long service)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++	BUG_ON(entity->weight == 0);
++
++	entity->finish = entity->start +
++		bfq_delta(service, entity->weight);
++
++	if (bfqq != NULL) {
++		bfq_log_bfqq(bfqq->bfqd, bfqq,
++			"calc_finish: serv %lu, w %d",
++			service, entity->weight);
++		bfq_log_bfqq(bfqq->bfqd, bfqq,
++			"calc_finish: start %llu, finish %llu, delta %llu",
++			entity->start, entity->finish,
++			bfq_delta(service, entity->weight));
++	}
++}
++
++/**
++ * bfq_entity_of - get an entity from a node.
++ * @node: the node field of the entity.
++ *
++ * Convert a node pointer to the relative entity.  This is used only
++ * to simplify the logic of some functions and not as the generic
++ * conversion mechanism because, e.g., in the tree walking functions,
++ * the check for a %NULL value would be redundant.
++ */
++static inline struct bfq_entity *bfq_entity_of(struct rb_node *node)
++{
++	struct bfq_entity *entity = NULL;
++
++	if (node != NULL)
++		entity = rb_entry(node, struct bfq_entity, rb_node);
++
++	return entity;
++}
++
++/**
++ * bfq_extract - remove an entity from a tree.
++ * @root: the tree root.
++ * @entity: the entity to remove.
++ */
++static inline void bfq_extract(struct rb_root *root,
++			       struct bfq_entity *entity)
++{
++	BUG_ON(entity->tree != root);
++
++	entity->tree = NULL;
++	rb_erase(&entity->rb_node, root);
++}
++
++/**
++ * bfq_idle_extract - extract an entity from the idle tree.
++ * @st: the service tree of the owning @entity.
++ * @entity: the entity being removed.
++ */
++static void bfq_idle_extract(struct bfq_service_tree *st,
++			     struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct rb_node *next;
++
++	BUG_ON(entity->tree != &st->idle);
++
++	if (entity == st->first_idle) {
++		next = rb_next(&entity->rb_node);
++		st->first_idle = bfq_entity_of(next);
++	}
++
++	if (entity == st->last_idle) {
++		next = rb_prev(&entity->rb_node);
++		st->last_idle = bfq_entity_of(next);
++	}
++
++	bfq_extract(&st->idle, entity);
++
++	if (bfqq != NULL)
++		list_del(&bfqq->bfqq_list);
++}
++
++/**
++ * bfq_insert - generic tree insertion.
++ * @root: tree root.
++ * @entity: entity to insert.
++ *
++ * This is used for the idle and the active tree, since they are both
++ * ordered by finish time.
++ */
++static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)
++{
++	struct bfq_entity *entry;
++	struct rb_node **node = &root->rb_node;
++	struct rb_node *parent = NULL;
++
++	BUG_ON(entity->tree != NULL);
++
++	while (*node != NULL) {
++		parent = *node;
++		entry = rb_entry(parent, struct bfq_entity, rb_node);
++
++		if (bfq_gt(entry->finish, entity->finish))
++			node = &parent->rb_left;
++		else
++			node = &parent->rb_right;
++	}
++
++	rb_link_node(&entity->rb_node, parent, node);
++	rb_insert_color(&entity->rb_node, root);
++
++	entity->tree = root;
++}
++
++/**
++ * bfq_update_min - update the min_start field of a entity.
++ * @entity: the entity to update.
++ * @node: one of its children.
++ *
++ * This function is called when @entity may store an invalid value for
++ * min_start due to updates to the active tree.  The function  assumes
++ * that the subtree rooted at @node (which may be its left or its right
++ * child) has a valid min_start value.
++ */
++static inline void bfq_update_min(struct bfq_entity *entity,
++				  struct rb_node *node)
++{
++	struct bfq_entity *child;
++
++	if (node != NULL) {
++		child = rb_entry(node, struct bfq_entity, rb_node);
++		if (bfq_gt(entity->min_start, child->min_start))
++			entity->min_start = child->min_start;
++	}
++}
++
++/**
++ * bfq_update_active_node - recalculate min_start.
++ * @node: the node to update.
++ *
++ * @node may have changed position or one of its children may have moved,
++ * this function updates its min_start value.  The left and right subtrees
++ * are assumed to hold a correct min_start value.
++ */
++static inline void bfq_update_active_node(struct rb_node *node)
++{
++	struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);
++
++	entity->min_start = entity->start;
++	bfq_update_min(entity, node->rb_right);
++	bfq_update_min(entity, node->rb_left);
++}
++
++/**
++ * bfq_update_active_tree - update min_start for the whole active tree.
++ * @node: the starting node.
++ *
++ * @node must be the deepest modified node after an update.  This function
++ * updates its min_start using the values held by its children, assuming
++ * that they did not change, and then updates all the nodes that may have
++ * changed in the path to the root.  The only nodes that may have changed
++ * are the ones in the path or their siblings.
++ */
++static void bfq_update_active_tree(struct rb_node *node)
++{
++	struct rb_node *parent;
++
++up:
++	bfq_update_active_node(node);
++
++	parent = rb_parent(node);
++	if (parent == NULL)
++		return;
++
++	if (node == parent->rb_left && parent->rb_right != NULL)
++		bfq_update_active_node(parent->rb_right);
++	else if (parent->rb_left != NULL)
++		bfq_update_active_node(parent->rb_left);
++
++	node = parent;
++	goto up;
++}
++
++/**
++ * bfq_active_insert - insert an entity in the active tree of its group/device.
++ * @st: the service tree of the entity.
++ * @entity: the entity being inserted.
++ *
++ * The active tree is ordered by finish time, but an extra key is kept
++ * per each node, containing the minimum value for the start times of
++ * its children (and the node itself), so it's possible to search for
++ * the eligible node with the lowest finish time in logarithmic time.
++ */
++static void bfq_active_insert(struct bfq_service_tree *st,
++			      struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct rb_node *node = &entity->rb_node;
++
++	bfq_insert(&st->active, entity);
++
++	if (node->rb_left != NULL)
++		node = node->rb_left;
++	else if (node->rb_right != NULL)
++		node = node->rb_right;
++
++	bfq_update_active_tree(node);
++
++	if (bfqq != NULL)
++		list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);
++}
++
++/**
++ * bfq_ioprio_to_weight - calc a weight from an ioprio.
++ * @ioprio: the ioprio value to convert.
++ */
++static unsigned short bfq_ioprio_to_weight(int ioprio)
++{
++	WARN_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);
++	return IOPRIO_BE_NR - ioprio;
++}
++
++/**
++ * bfq_weight_to_ioprio - calc an ioprio from a weight.
++ * @weight: the weight value to convert.
++ *
++ * To preserve as mush as possible the old only-ioprio user interface,
++ * 0 is used as an escape ioprio value for weights (numerically) equal or
++ * larger than IOPRIO_BE_NR
++ */
++static unsigned short bfq_weight_to_ioprio(int weight)
++{
++	WARN_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT);
++	return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight;
++}
++
++static inline void bfq_get_entity(struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++	if (bfqq != NULL) {
++		atomic_inc(&bfqq->ref);
++		bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
++			     bfqq, atomic_read(&bfqq->ref));
++	}
++}
++
++/**
++ * bfq_find_deepest - find the deepest node that an extraction can modify.
++ * @node: the node being removed.
++ *
++ * Do the first step of an extraction in an rb tree, looking for the
++ * node that will replace @node, and returning the deepest node that
++ * the following modifications to the tree can touch.  If @node is the
++ * last node in the tree return %NULL.
++ */
++static struct rb_node *bfq_find_deepest(struct rb_node *node)
++{
++	struct rb_node *deepest;
++
++	if (node->rb_right == NULL && node->rb_left == NULL)
++		deepest = rb_parent(node);
++	else if (node->rb_right == NULL)
++		deepest = node->rb_left;
++	else if (node->rb_left == NULL)
++		deepest = node->rb_right;
++	else {
++		deepest = rb_next(node);
++		if (deepest->rb_right != NULL)
++			deepest = deepest->rb_right;
++		else if (rb_parent(deepest) != node)
++			deepest = rb_parent(deepest);
++	}
++
++	return deepest;
++}
++
++/**
++ * bfq_active_extract - remove an entity from the active tree.
++ * @st: the service_tree containing the tree.
++ * @entity: the entity being removed.
++ */
++static void bfq_active_extract(struct bfq_service_tree *st,
++			       struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct rb_node *node;
++
++	node = bfq_find_deepest(&entity->rb_node);
++	bfq_extract(&st->active, entity);
++
++	if (node != NULL)
++		bfq_update_active_tree(node);
++
++	if (bfqq != NULL)
++		list_del(&bfqq->bfqq_list);
++}
++
++/**
++ * bfq_idle_insert - insert an entity into the idle tree.
++ * @st: the service tree containing the tree.
++ * @entity: the entity to insert.
++ */
++static void bfq_idle_insert(struct bfq_service_tree *st,
++			    struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct bfq_entity *first_idle = st->first_idle;
++	struct bfq_entity *last_idle = st->last_idle;
++
++	if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish))
++		st->first_idle = entity;
++	if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish))
++		st->last_idle = entity;
++
++	bfq_insert(&st->idle, entity);
++
++	if (bfqq != NULL)
++		list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);
++}
++
++/**
++ * bfq_forget_entity - remove an entity from the wfq trees.
++ * @st: the service tree.
++ * @entity: the entity being removed.
++ *
++ * Update the device status and forget everything about @entity, putting
++ * the device reference to it, if it is a queue.  Entities belonging to
++ * groups are not refcounted.
++ */
++static void bfq_forget_entity(struct bfq_service_tree *st,
++			      struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++	BUG_ON(!entity->on_st);
++
++	entity->on_st = 0;
++	st->wsum -= entity->weight;
++	if (bfqq != NULL) {
++		bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d",
++			     bfqq, atomic_read(&bfqq->ref));
++		bfq_put_queue(bfqq);
++	}
++}
++
++/**
++ * bfq_put_idle_entity - release the idle tree ref of an entity.
++ * @st: service tree for the entity.
++ * @entity: the entity being released.
++ */
++static void bfq_put_idle_entity(struct bfq_service_tree *st,
++				struct bfq_entity *entity)
++{
++	bfq_idle_extract(st, entity);
++	bfq_forget_entity(st, entity);
++}
++
++/**
++ * bfq_forget_idle - update the idle tree if necessary.
++ * @st: the service tree to act upon.
++ *
++ * To preserve the global O(log N) complexity we only remove one entry here;
++ * as the idle tree will not grow indefinitely this can be done safely.
++ */
++static void bfq_forget_idle(struct bfq_service_tree *st)
++{
++	struct bfq_entity *first_idle = st->first_idle;
++	struct bfq_entity *last_idle = st->last_idle;
++
++	if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL &&
++	    !bfq_gt(last_idle->finish, st->vtime)) {
++		/*
++		 * Forget the whole idle tree, increasing the vtime past
++		 * the last finish time of idle entities.
++		 */
++		st->vtime = last_idle->finish;
++	}
++
++	if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime))
++		bfq_put_idle_entity(st, first_idle);
++}
++
++static struct bfq_service_tree *
++__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
++			 struct bfq_entity *entity)
++{
++	struct bfq_service_tree *new_st = old_st;
++
++	if (entity->ioprio_changed) {
++		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++		BUG_ON(old_st->wsum < entity->weight);
++		old_st->wsum -= entity->weight;
++
++		if (entity->new_weight != entity->orig_weight) {
++			entity->orig_weight = entity->new_weight;
++			entity->ioprio =
++				bfq_weight_to_ioprio(entity->orig_weight);
++		} else if (entity->new_ioprio != entity->ioprio) {
++			entity->ioprio = entity->new_ioprio;
++			entity->orig_weight =
++					bfq_ioprio_to_weight(entity->ioprio);
++		} else
++			entity->new_weight = entity->orig_weight =
++				bfq_ioprio_to_weight(entity->ioprio);
++
++		entity->ioprio_class = entity->new_ioprio_class;
++		entity->ioprio_changed = 0;
++
++		/*
++		 * NOTE: here we may be changing the weight too early,
++		 * this will cause unfairness.  The correct approach
++		 * would have required additional complexity to defer
++		 * weight changes to the proper time instants (i.e.,
++		 * when entity->finish <= old_st->vtime).
++		 */
++		new_st = bfq_entity_service_tree(entity);
++		entity->weight = entity->orig_weight *
++			(bfqq != NULL ? bfqq->raising_coeff : 1);
++		new_st->wsum += entity->weight;
++
++		if (new_st != old_st)
++			entity->start = new_st->vtime;
++	}
++
++	return new_st;
++}
++
++/**
++ * bfq_bfqq_served - update the scheduler status after selection for service.
++ * @bfqq: the queue being served.
++ * @served: bytes to transfer.
++ *
++ * NOTE: this can be optimized, as the timestamps of upper level entities
++ * are synchronized every time a new bfqq is selected for service.  By now,
++ * we keep it to better check consistency.
++ */
++static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++	struct bfq_service_tree *st;
++
++	for_each_entity(entity) {
++		st = bfq_entity_service_tree(entity);
++
++		entity->service += served;
++		BUG_ON(entity->service > entity->budget);
++		BUG_ON(st->wsum == 0);
++
++		st->vtime += bfq_delta(served, st->wsum);
++		bfq_forget_idle(st);
++	}
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served);
++}
++
++/**
++ * bfq_bfqq_charge_full_budget - set the service to the entity budget.
++ * @bfqq: the queue that needs a service update.
++ *
++ * When it's not possible to be fair in the service domain, because
++ * a queue is not consuming its budget fast enough (the meaning of
++ * fast depends on the timeout parameter), we charge it a full
++ * budget.  In this way we should obtain a sort of time-domain
++ * fairness among all the seeky/slow queues.
++ */
++static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget");
++
++	bfq_bfqq_served(bfqq, entity->budget - entity->service);
++}
++
++/**
++ * __bfq_activate_entity - activate an entity.
++ * @entity: the entity being activated.
++ *
++ * Called whenever an entity is activated, i.e., it is not active and one
++ * of its children receives a new request, or has to be reactivated due to
++ * budget exhaustion.  It uses the current budget of the entity (and the
++ * service received if @entity is active) of the queue to calculate its
++ * timestamps.
++ */
++static void __bfq_activate_entity(struct bfq_entity *entity)
++{
++	struct bfq_sched_data *sd = entity->sched_data;
++	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
++
++	if (entity == sd->active_entity) {
++		BUG_ON(entity->tree != NULL);
++		/*
++		 * If we are requeueing the current entity we have
++		 * to take care of not charging to it service it has
++		 * not received.
++		 */
++		bfq_calc_finish(entity, entity->service);
++		entity->start = entity->finish;
++		sd->active_entity = NULL;
++	} else if (entity->tree == &st->active) {
++		/*
++		 * Requeueing an entity due to a change of some
++		 * next_active entity below it.  We reuse the old
++		 * start time.
++		 */
++		bfq_active_extract(st, entity);
++	} else if (entity->tree == &st->idle) {
++		/*
++		 * Must be on the idle tree, bfq_idle_extract() will
++		 * check for that.
++		 */
++		bfq_idle_extract(st, entity);
++		entity->start = bfq_gt(st->vtime, entity->finish) ?
++				       st->vtime : entity->finish;
++	} else {
++		/*
++		 * The finish time of the entity may be invalid, and
++		 * it is in the past for sure, otherwise the queue
++		 * would have been on the idle tree.
++		 */
++		entity->start = st->vtime;
++		st->wsum += entity->weight;
++		bfq_get_entity(entity);
++
++		BUG_ON(entity->on_st);
++		entity->on_st = 1;
++	}
++
++	st = __bfq_entity_update_weight_prio(st, entity);
++	bfq_calc_finish(entity, entity->budget);
++	bfq_active_insert(st, entity);
++}
++
++/**
++ * bfq_activate_entity - activate an entity and its ancestors if necessary.
++ * @entity: the entity to activate.
++ *
++ * Activate @entity and all the entities on the path from it to the root.
++ */
++static void bfq_activate_entity(struct bfq_entity *entity)
++{
++	struct bfq_sched_data *sd;
++
++	for_each_entity(entity) {
++		__bfq_activate_entity(entity);
++
++		sd = entity->sched_data;
++		if (!bfq_update_next_active(sd))
++			/*
++			 * No need to propagate the activation to the
++			 * upper entities, as they will be updated when
++			 * the active entity is rescheduled.
++			 */
++			break;
++	}
++}
++
++/**
++ * __bfq_deactivate_entity - deactivate an entity from its service tree.
++ * @entity: the entity to deactivate.
++ * @requeue: if false, the entity will not be put into the idle tree.
++ *
++ * Deactivate an entity, independently from its previous state.  If the
++ * entity was not on a service tree just return, otherwise if it is on
++ * any scheduler tree, extract it from that tree, and if necessary
++ * and if the caller did not specify @requeue, put it on the idle tree.
++ *
++ * Return %1 if the caller should update the entity hierarchy, i.e.,
++ * if the entity was under service or if it was the next_active for
++ * its sched_data; return %0 otherwise.
++ */
++static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
++{
++	struct bfq_sched_data *sd = entity->sched_data;
++	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
++	int was_active = entity == sd->active_entity;
++	int ret = 0;
++
++	if (!entity->on_st)
++		return 0;
++
++	BUG_ON(was_active && entity->tree != NULL);
++
++	if (was_active) {
++		bfq_calc_finish(entity, entity->service);
++		sd->active_entity = NULL;
++	} else if (entity->tree == &st->active)
++		bfq_active_extract(st, entity);
++	else if (entity->tree == &st->idle)
++		bfq_idle_extract(st, entity);
++	else if (entity->tree != NULL)
++		BUG();
++
++	if (was_active || sd->next_active == entity)
++		ret = bfq_update_next_active(sd);
++
++	if (!requeue || !bfq_gt(entity->finish, st->vtime))
++		bfq_forget_entity(st, entity);
++	else
++		bfq_idle_insert(st, entity);
++
++	BUG_ON(sd->active_entity == entity);
++	BUG_ON(sd->next_active == entity);
++
++	return ret;
++}
++
++/**
++ * bfq_deactivate_entity - deactivate an entity.
++ * @entity: the entity to deactivate.
++ * @requeue: true if the entity can be put on the idle tree
++ */
++static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
++{
++	struct bfq_sched_data *sd;
++	struct bfq_entity *parent;
++
++	for_each_entity_safe(entity, parent) {
++		sd = entity->sched_data;
++
++		if (!__bfq_deactivate_entity(entity, requeue))
++			/*
++			 * The parent entity is still backlogged, and
++			 * we don't need to update it as it is still
++			 * under service.
++			 */
++			break;
++
++		if (sd->next_active != NULL)
++			/*
++			 * The parent entity is still backlogged and
++			 * the budgets on the path towards the root
++			 * need to be updated.
++			 */
++			goto update;
++
++		/*
++		 * If we reach there the parent is no more backlogged and
++		 * we want to propagate the dequeue upwards.
++		 */
++		requeue = 1;
++	}
++
++	return;
++
++update:
++	entity = parent;
++	for_each_entity(entity) {
++		__bfq_activate_entity(entity);
++
++		sd = entity->sched_data;
++		if (!bfq_update_next_active(sd))
++			break;
++	}
++}
++
++/**
++ * bfq_update_vtime - update vtime if necessary.
++ * @st: the service tree to act upon.
++ *
++ * If necessary update the service tree vtime to have at least one
++ * eligible entity, skipping to its start time.  Assumes that the
++ * active tree of the device is not empty.
++ *
++ * NOTE: this hierarchical implementation updates vtimes quite often,
++ * we may end up with reactivated tasks getting timestamps after a
++ * vtime skip done because we needed a ->first_active entity on some
++ * intermediate node.
++ */
++static void bfq_update_vtime(struct bfq_service_tree *st)
++{
++	struct bfq_entity *entry;
++	struct rb_node *node = st->active.rb_node;
++
++	entry = rb_entry(node, struct bfq_entity, rb_node);
++	if (bfq_gt(entry->min_start, st->vtime)) {
++		st->vtime = entry->min_start;
++		bfq_forget_idle(st);
++	}
++}
++
++/**
++ * bfq_first_active - find the eligible entity with the smallest finish time
++ * @st: the service tree to select from.
++ *
++ * This function searches the first schedulable entity, starting from the
++ * root of the tree and going on the left every time on this side there is
++ * a subtree with at least one eligible (start >= vtime) entity.  The path
++ * on the right is followed only if a) the left subtree contains no eligible
++ * entities and b) no eligible entity has been found yet.
++ */
++static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
++{
++	struct bfq_entity *entry, *first = NULL;
++	struct rb_node *node = st->active.rb_node;
++
++	while (node != NULL) {
++		entry = rb_entry(node, struct bfq_entity, rb_node);
++left:
++		if (!bfq_gt(entry->start, st->vtime))
++			first = entry;
++
++		BUG_ON(bfq_gt(entry->min_start, st->vtime));
++
++		if (node->rb_left != NULL) {
++			entry = rb_entry(node->rb_left,
++					 struct bfq_entity, rb_node);
++			if (!bfq_gt(entry->min_start, st->vtime)) {
++				node = node->rb_left;
++				goto left;
++			}
++		}
++		if (first != NULL)
++			break;
++		node = node->rb_right;
++	}
++
++	BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active));
++	return first;
++}
++
++/**
++ * __bfq_lookup_next_entity - return the first eligible entity in @st.
++ * @st: the service tree.
++ *
++ * Update the virtual time in @st and return the first eligible entity
++ * it contains.
++ */
++static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,
++						   bool force)
++{
++	struct bfq_entity *entity, *new_next_active = NULL;
++
++	if (RB_EMPTY_ROOT(&st->active))
++		return NULL;
++
++	bfq_update_vtime(st);
++	entity = bfq_first_active_entity(st);
++	BUG_ON(bfq_gt(entity->start, st->vtime));
++
++	/*
++	 * If the chosen entity does not match with the sched_data's
++	 * next_active and we are forcedly serving the IDLE priority
++	 * class tree, bubble up budget update.
++	 */
++	if (unlikely(force && entity != entity->sched_data->next_active)) {
++		new_next_active = entity;
++		for_each_entity(new_next_active)
++			bfq_update_budget(new_next_active);
++	}
++
++	return entity;
++}
++
++/**
++ * bfq_lookup_next_entity - return the first eligible entity in @sd.
++ * @sd: the sched_data.
++ * @extract: if true the returned entity will be also extracted from @sd.
++ *
++ * NOTE: since we cache the next_active entity at each level of the
++ * hierarchy, the complexity of the lookup can be decreased with
++ * absolutely no effort just returning the cached next_active value;
++ * we prefer to do full lookups to test the consistency of * the data
++ * structures.
++ */
++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
++						 int extract,
++						 struct bfq_data *bfqd)
++{
++	struct bfq_service_tree *st = sd->service_tree;
++	struct bfq_entity *entity;
++	int i=0;
++
++	BUG_ON(sd->active_entity != NULL);
++
++	if (bfqd != NULL &&
++	    jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) {
++		entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, true);
++		if (entity != NULL) {
++			i = BFQ_IOPRIO_CLASSES - 1;
++			bfqd->bfq_class_idle_last_service = jiffies;
++			sd->next_active = entity;
++		}
++	}
++	for (; i < BFQ_IOPRIO_CLASSES; i++) {
++		entity = __bfq_lookup_next_entity(st + i, false);
++		if (entity != NULL) {
++			if (extract) {
++				bfq_check_next_active(sd, entity);
++				bfq_active_extract(st + i, entity);
++				sd->active_entity = entity;
++				sd->next_active = NULL;
++			}
++			break;
++		}
++	}
++
++	return entity;
++}
++
++/*
++ * Get next queue for service.
++ */
++static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
++{
++	struct bfq_entity *entity = NULL;
++	struct bfq_sched_data *sd;
++	struct bfq_queue *bfqq;
++
++	BUG_ON(bfqd->active_queue != NULL);
++
++	if (bfqd->busy_queues == 0)
++		return NULL;
++
++	sd = &bfqd->root_group->sched_data;
++	for (; sd != NULL; sd = entity->my_sched_data) {
++		entity = bfq_lookup_next_entity(sd, 1, bfqd);
++		BUG_ON(entity == NULL);
++		entity->service = 0;
++	}
++
++	bfqq = bfq_entity_to_bfqq(entity);
++	BUG_ON(bfqq == NULL);
++
++	return bfqq;
++}
++
++/*
++ * Forced extraction of the given queue.
++ */
++static void bfq_get_next_queue_forced(struct bfq_data *bfqd,
++				      struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity;
++	struct bfq_sched_data *sd;
++
++	BUG_ON(bfqd->active_queue != NULL);
++
++	entity = &bfqq->entity;
++	/*
++	 * Bubble up extraction/update from the leaf to the root.
++	*/
++	for_each_entity(entity) {
++		sd = entity->sched_data;
++		bfq_update_budget(entity);
++		bfq_update_vtime(bfq_entity_service_tree(entity));
++		bfq_active_extract(bfq_entity_service_tree(entity), entity);
++		sd->active_entity = entity;
++		sd->next_active = NULL;
++		entity->service = 0;
++	}
++
++	return;
++}
++
++static void __bfq_bfqd_reset_active(struct bfq_data *bfqd)
++{
++	if (bfqd->active_cic != NULL) {
++		put_io_context(bfqd->active_cic->ioc);
++		bfqd->active_cic = NULL;
++	}
++
++	bfqd->active_queue = NULL;
++	del_timer(&bfqd->idle_slice_timer);
++}
++
++static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++				int requeue)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++
++	if (bfqq == bfqd->active_queue)
++		__bfq_bfqd_reset_active(bfqd);
++
++	bfq_deactivate_entity(entity, requeue);
++}
++
++static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++
++	bfq_activate_entity(entity);
++}
++
++/*
++ * Called when the bfqq no longer has requests pending, remove it from
++ * the service tree.
++ */
++static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++			      int requeue)
++{
++	BUG_ON(!bfq_bfqq_busy(bfqq));
++	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
++
++	bfq_log_bfqq(bfqd, bfqq, "del from busy");
++
++	bfq_clear_bfqq_busy(bfqq);
++
++	BUG_ON(bfqd->busy_queues == 0);
++	bfqd->busy_queues--;
++
++	bfq_deactivate_bfqq(bfqd, bfqq, requeue);
++}
++
++/*
++ * Called when an inactive queue receives a new request.
++ */
++static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	BUG_ON(bfq_bfqq_busy(bfqq));
++	BUG_ON(bfqq == bfqd->active_queue);
++
++	bfq_log_bfqq(bfqd, bfqq, "add to busy");
++
++	bfq_activate_bfqq(bfqd, bfqq);
++
++	bfq_mark_bfqq_busy(bfqq);
++	bfqd->busy_queues++;
++}
+diff --git a/block/bfq.h block/bfq.h
+new file mode 100644
+index 0000000..f23a9a5
+--- /dev/null
++++ block/bfq.h
+@@ -0,0 +1,593 @@
++/*
++ * BFQ-v5 for 3.2.0: data structures and common functions prototypes.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ */
++
++#ifndef _BFQ_H
++#define _BFQ_H
++
++#include <linux/blktrace_api.h>
++#include <linux/hrtimer.h>
++#include <linux/ioprio.h>
++#include <linux/rbtree.h>
++
++#define BFQ_IOPRIO_CLASSES	3
++#define BFQ_CL_IDLE_TIMEOUT	HZ/5
++
++#define BFQ_MIN_WEIGHT	1
++#define BFQ_MAX_WEIGHT	1000
++
++#define BFQ_DEFAULT_GRP_WEIGHT	10
++#define BFQ_DEFAULT_GRP_IOPRIO	0
++#define BFQ_DEFAULT_GRP_CLASS	IOPRIO_CLASS_BE
++
++struct bfq_entity;
++
++/**
++ * struct bfq_service_tree - per ioprio_class service tree.
++ * @active: tree for active entities (i.e., those backlogged).
++ * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i).
++ * @first_idle: idle entity with minimum F_i.
++ * @last_idle: idle entity with maximum F_i.
++ * @vtime: scheduler virtual time.
++ * @wsum: scheduler weight sum; active and idle entities contribute to it.
++ *
++ * Each service tree represents a B-WF2Q+ scheduler on its own.  Each
++ * ioprio_class has its own independent scheduler, and so its own
++ * bfq_service_tree.  All the fields are protected by the queue lock
++ * of the containing bfqd.
++ */
++struct bfq_service_tree {
++	struct rb_root active;
++	struct rb_root idle;
++
++	struct bfq_entity *first_idle;
++	struct bfq_entity *last_idle;
++
++	u64 vtime;
++	unsigned long wsum;
++};
++
++/**
++ * struct bfq_sched_data - multi-class scheduler.
++ * @active_entity: entity under service.
++ * @next_active: head-of-the-line entity in the scheduler.
++ * @service_tree: array of service trees, one per ioprio_class.
++ *
++ * bfq_sched_data is the basic scheduler queue.  It supports three
++ * ioprio_classes, and can be used either as a toplevel queue or as
++ * an intermediate queue on a hierarchical setup.
++ * @next_active points to the active entity of the sched_data service
++ * trees that will be scheduled next.
++ *
++ * The supported ioprio_classes are the same as in CFQ, in descending
++ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
++ * Requests from higher priority queues are served before all the
++ * requests from lower priority queues; among requests of the same
++ * queue requests are served according to B-WF2Q+.
++ * All the fields are protected by the queue lock of the containing bfqd.
++ */
++struct bfq_sched_data {
++	struct bfq_entity *active_entity;
++	struct bfq_entity *next_active;
++	struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
++};
++
++/**
++ * struct bfq_entity - schedulable entity.
++ * @rb_node: service_tree member.
++ * @on_st: flag, true if the entity is on a tree (either the active or
++ *         the idle one of its service_tree).
++ * @finish: B-WF2Q+ finish timestamp (aka F_i).
++ * @start: B-WF2Q+ start timestamp (aka S_i).
++ * @tree: tree the entity is enqueued into; %NULL if not on a tree.
++ * @min_start: minimum start time of the (active) subtree rooted at
++ *             this entity; used for O(log N) lookups into active trees.
++ * @service: service received during the last round of service.
++ * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight.
++ * @weight: weight of the queue
++ * @parent: parent entity, for hierarchical scheduling.
++ * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the
++ *                 associated scheduler queue, %NULL on leaf nodes.
++ * @sched_data: the scheduler queue this entity belongs to.
++ * @ioprio: the ioprio in use.
++ * @new_weight: when a weight change is requested, the new weight value.
++ * @orig_weight: original weight, used to implement weight boosting
++ * @new_ioprio: when an ioprio change is requested, the new ioprio value.
++ * @ioprio_class: the ioprio_class in use.
++ * @new_ioprio_class: when an ioprio_class change is requested, the new
++ *                    ioprio_class value.
++ * @ioprio_changed: flag, true when the user requested a weight, ioprio or
++ *                  ioprio_class change.
++ *
++ * A bfq_entity is used to represent either a bfq_queue (leaf node in the
++ * cgroup hierarchy) or a bfq_group into the upper level scheduler.  Each
++ * entity belongs to the sched_data of the parent group in the cgroup
++ * hierarchy.  Non-leaf entities have also their own sched_data, stored
++ * in @my_sched_data.
++ *
++ * Each entity stores independently its priority values; this would
++ * allow different weights on different devices, but this
++ * functionality is not exported to userspace by now.  Priorities and
++ * weights are updated lazily, first storing the new values into the
++ * new_* fields, then setting the @ioprio_changed flag.  As soon as
++ * there is a transition in the entity state that allows the priority
++ * update to take place the effective and the requested priority
++ * values are synchronized.
++ *
++ * Unless cgroups are used, the weight value is calculated from the
++ * ioprio to export the same interface as CFQ.  When dealing with
++ * ``well-behaved'' queues (i.e., queues that do not spend too much
++ * time to consume their budget and have true sequential behavior, and
++ * when there are no external factors breaking anticipation) the
++ * relative weights at each level of the cgroups hierarchy should be
++ * guaranteed.  All the fields are protected by the queue lock of the
++ * containing bfqd.
++ */
++struct bfq_entity {
++	struct rb_node rb_node;
++
++	int on_st;
++
++	u64 finish;
++	u64 start;
++
++	struct rb_root *tree;
++
++	u64 min_start;
++
++	unsigned long service, budget;
++	unsigned short weight, new_weight;
++	unsigned short orig_weight;
++
++	struct bfq_entity *parent;
++
++	struct bfq_sched_data *my_sched_data;
++	struct bfq_sched_data *sched_data;
++
++	unsigned short ioprio, new_ioprio;
++	unsigned short ioprio_class, new_ioprio_class;
++
++	int ioprio_changed;
++};
++
++struct bfq_group;
++
++/**
++ * struct bfq_queue - leaf schedulable entity.
++ * @ref: reference counter.
++ * @bfqd: parent bfq_data.
++ * @new_bfqq: shared bfq_queue if queue is cooperating with
++ *           one or more other queues.
++ * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree).
++ * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree).
++ * @sort_list: sorted list of pending requests.
++ * @next_rq: if fifo isn't expired, next request to serve.
++ * @queued: nr of requests queued in @sort_list.
++ * @allocated: currently allocated requests.
++ * @meta_pending: pending metadata requests.
++ * @fifo: fifo list of requests in sort_list.
++ * @entity: entity representing this queue in the scheduler.
++ * @max_budget: maximum budget allowed from the feedback mechanism.
++ * @budget_timeout: budget expiration (in jiffies).
++ * @dispatched: number of requests on the dispatch list or inside driver.
++ * @org_ioprio: saved ioprio during boosted periods.
++ * @flags: status flags.
++ * @bfqq_list: node for active/idle bfqq list inside our bfqd.
++ * @seek_samples: number of seeks sampled
++ * @seek_total: sum of the distances of the seeks sampled
++ * @seek_mean: mean seek distance
++ * @last_request_pos: position of the last request enqueued
++ * @pid: pid of the process owning the queue, used for logging purposes.
++ * @last_rais_start_time: last (idle -> weight-raised) transition attempt
++ * @raising_cur_max_time: current max raising time for this queue
++ *
++ * A bfq_queue is a leaf request queue; it can be associated to an io_context
++ * or more (if it is an async one).  @cgroup holds a reference to the
++ * cgroup, to be sure that it does not disappear while a bfqq still
++ * references it (mostly to avoid races between request issuing and task
++ * migration followed by cgroup distruction).
++ * All the fields are protected by the queue lock of the containing bfqd.
++ */
++struct bfq_queue {
++	atomic_t ref;
++	struct bfq_data *bfqd;
++
++	/* fields for cooperating queues handling */
++	struct bfq_queue *new_bfqq;
++	struct rb_node pos_node;
++	struct rb_root *pos_root;
++
++	struct rb_root sort_list;
++	struct request *next_rq;
++	int queued[2];
++	int allocated[2];
++	int meta_pending;
++	struct list_head fifo;
++
++	struct bfq_entity entity;
++
++	unsigned long max_budget;
++	unsigned long budget_timeout;
++
++	int dispatched;
++
++	unsigned short org_ioprio;
++
++	unsigned int flags;
++
++	struct list_head bfqq_list;
++
++	unsigned int seek_samples;
++	u64 seek_total;
++	sector_t seek_mean;
++	sector_t last_request_pos;
++
++	pid_t pid;
++
++	/* weight-raising fields */
++	unsigned int raising_cur_max_time;
++	u64 last_rais_start_finish, soft_rt_next_start;
++	unsigned int raising_coeff;
++};
++
++/**
++ * struct bfq_data - per device data structure.
++ * @queue: request queue for the managed device.
++ * @root_group: root bfq_group for the device.
++ * @rq_pos_tree: rbtree sorted by next_request position,
++ *		used when determining if two or more queues
++ *		have interleaving requests (see bfq_close_cooperator).
++ * @busy_queues: number of bfq_queues containing requests (including the
++ *		 queue under service, even if it is idling).
++ * @queued: number of queued requests.
++ * @rq_in_driver: number of requests dispatched and waiting for completion.
++ * @sync_flight: number of sync requests in the driver.
++ * @max_rq_in_driver: max number of reqs in driver in the last @hw_tag_samples
++ *		      completed requests .
++ * @hw_tag_samples: nr of samples used to calculate hw_tag.
++ * @hw_tag: flag set to one if the driver is showing a queueing behavior.
++ * @budgets_assigned: number of budgets assigned.
++ * @idle_slice_timer: timer set when idling for the next sequential request
++ *                    from the queue under service.
++ * @unplug_work: delayed work to restart dispatching on the request queue.
++ * @active_queue: bfq_queue under service.
++ * @active_cic: cfq_io_context (cic) associated with the @active_queue.
++ * @last_position: on-disk position of the last served request.
++ * @last_budget_start: beginning of the last budget.
++ * @last_idling_start: beginning of the last idle slice.
++ * @peak_rate: peak transfer rate observed for a budget.
++ * @peak_rate_samples: number of samples used to calculate @peak_rate.
++ * @bfq_max_budget: maximum budget allotted to a bfq_queue before rescheduling.
++ * @cic_index: use small consequent indexes as radix tree keys to reduce depth
++ * @cic_list: list of all the cics active on the bfq_data device.
++ * @group_list: list of all the bfq_groups active on the device.
++ * @active_list: list of all the bfq_queues active on the device.
++ * @idle_list: list of all the bfq_queues idle on the device.
++ * @bfq_quantum: max number of requests dispatched per dispatch round.
++ * @bfq_fifo_expire: timeout for async/sync requests; when it expires
++ *                   requests are served in fifo order.
++ * @bfq_back_penalty: weight of backward seeks wrt forward ones.
++ * @bfq_back_max: maximum allowed backward seek.
++ * @bfq_slice_idle: maximum idling time.
++ * @bfq_user_max_budget: user-configured max budget value (0 for auto-tuning).
++ * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to
++ *                           async queues.
++ * @bfq_timeout: timeout for bfq_queues to consume their budget; used to
++ *               to prevent seeky queues to impose long latencies to well
++ *               behaved ones (this also implies that seeky queues cannot
++ *               receive guarantees in the service domain; after a timeout
++ *               they are charged for the whole allocated budget, to try
++ *               to preserve a behavior reasonably fair among them, but
++ *               without service-domain guarantees).
++ * @bfq_raising_coeff: Maximum factor by which the weight of a boosted
++ *                            queue is multiplied
++ * @bfq_raising_max_time: maximum duration of a weight-raising period (jiffies)
++ * @bfq_raising_rt_max_time: maximum duration for soft real-time processes
++ * @bfq_raising_min_idle_time: minimum idle period after which weight-raising
++ *			       may be reactivated for a queue (in jiffies)
++ * @bfq_raising_min_inter_arr_async: minimum period between request arrivals
++ *                                   after which weight-raising may be
++ *                                   reactivated for an already busy queue
++ *                                   (in jiffies)
++ * @bfq_raising_max_softrt_rate: max service-rate for a soft real-time queue,
++ *			         sectors per seconds
++ * @RT_prod: cached value of the product R*T used for computing the maximum
++ * 	     duration of the weight raising automatically
++ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions
++ *
++ * All the fields are protected by the @queue lock.
++ */
++struct bfq_data {
++	struct request_queue *queue;
++
++	struct bfq_group *root_group;
++
++	struct rb_root rq_pos_tree;
++
++	int busy_queues;
++	int queued;
++	int rq_in_driver;
++	int sync_flight;
++
++	int max_rq_in_driver;
++	int hw_tag_samples;
++	int hw_tag;
++
++	int budgets_assigned;
++
++	struct timer_list idle_slice_timer;
++	struct work_struct unplug_work;
++
++	struct bfq_queue *active_queue;
++	struct cfq_io_context *active_cic;
++
++	sector_t last_position;
++
++	ktime_t last_budget_start;
++	ktime_t last_idling_start;
++	int peak_rate_samples;
++	u64 peak_rate;
++	unsigned long bfq_max_budget;
++
++	unsigned int cic_index;
++	struct list_head cic_list;
++	struct hlist_head group_list;
++	struct list_head active_list;
++	struct list_head idle_list;
++
++	unsigned int bfq_quantum;
++	unsigned int bfq_fifo_expire[2];
++	unsigned int bfq_back_penalty;
++	unsigned int bfq_back_max;
++	unsigned int bfq_slice_idle;
++	u64 bfq_class_idle_last_service;
++
++	unsigned int bfq_user_max_budget;
++	unsigned int bfq_max_budget_async_rq;
++	unsigned int bfq_timeout[2];
++
++	bool low_latency;
++
++	/* parameters of the low_latency heuristics */
++	unsigned int bfq_raising_coeff;
++	unsigned int bfq_raising_max_time;
++	unsigned int bfq_raising_rt_max_time;
++	unsigned int bfq_raising_min_idle_time;
++	unsigned int bfq_raising_min_inter_arr_async;
++	unsigned int bfq_raising_max_softrt_rate;
++	u64 RT_prod;
++
++	struct bfq_queue oom_bfqq;
++};
++
++enum bfqq_state_flags {
++	BFQ_BFQQ_FLAG_busy = 0,		/* has requests or is under service */
++	BFQ_BFQQ_FLAG_wait_request,	/* waiting for a request */
++	BFQ_BFQQ_FLAG_must_alloc,	/* must be allowed rq alloc */
++	BFQ_BFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */
++	BFQ_BFQQ_FLAG_idle_window,	/* slice idling enabled */
++	BFQ_BFQQ_FLAG_prio_changed,	/* task priority has changed */
++	BFQ_BFQQ_FLAG_sync,		/* synchronous queue */
++	BFQ_BFQQ_FLAG_budget_new,	/* no completion with this budget */
++	BFQ_BFQQ_FLAG_coop,		/* bfqq is shared */
++	BFQ_BFQQ_FLAG_split_coop,	/* shared bfqq will be splitted */
++	BFQ_BFQQ_FLAG_some_coop_idle,   /* some cooperator is inactive */
++};
++
++#define BFQ_BFQQ_FNS(name)						\
++static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq)		\
++{									\
++	(bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name);			\
++}									\
++static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq)	\
++{									\
++	(bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name);			\
++}									\
++static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq)		\
++{									\
++	return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0;	\
++}
++
++BFQ_BFQQ_FNS(busy);
++BFQ_BFQQ_FNS(wait_request);
++BFQ_BFQQ_FNS(must_alloc);
++BFQ_BFQQ_FNS(fifo_expire);
++BFQ_BFQQ_FNS(idle_window);
++BFQ_BFQQ_FNS(prio_changed);
++BFQ_BFQQ_FNS(sync);
++BFQ_BFQQ_FNS(budget_new);
++BFQ_BFQQ_FNS(coop);
++BFQ_BFQQ_FNS(split_coop);
++BFQ_BFQQ_FNS(some_coop_idle);
++#undef BFQ_BFQQ_FNS
++
++/* Logging facilities. */
++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
++	blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args)
++
++#define bfq_log(bfqd, fmt, args...) \
++	blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
++
++/* Expiration reasons. */
++enum bfqq_expiration {
++	BFQ_BFQQ_TOO_IDLE = 0,		/* queue has been idling for too long */
++	BFQ_BFQQ_BUDGET_TIMEOUT,	/* budget took too long to be used */
++	BFQ_BFQQ_BUDGET_EXHAUSTED,	/* budget consumed */
++	BFQ_BFQQ_NO_MORE_REQUESTS,	/* the queue has no more requests */
++};
++
++#ifdef CONFIG_CGROUP_BFQIO
++/**
++ * struct bfq_group - per (device, cgroup) data structure.
++ * @entity: schedulable entity to insert into the parent group sched_data.
++ * @sched_data: own sched_data, to contain child entities (they may be
++ *              both bfq_queues and bfq_groups).
++ * @group_node: node to be inserted into the bfqio_cgroup->group_data
++ *              list of the containing cgroup's bfqio_cgroup.
++ * @bfqd_node: node to be inserted into the @bfqd->group_list list
++ *             of the groups active on the same device; used for cleanup.
++ * @bfqd: the bfq_data for the device this group acts upon.
++ * @async_bfqq: array of async queues for all the tasks belonging to
++ *              the group, one queue per ioprio value per ioprio_class,
++ *              except for the idle class that has only one queue.
++ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).
++ * @my_entity: pointer to @entity, %NULL for the toplevel group; used
++ *             to avoid too many special cases during group creation/migration.
++ *
++ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
++ * there is a set of bfq_groups, each one collecting the lower-level
++ * entities belonging to the group that are acting on the same device.
++ *
++ * Locking works as follows:
++ *    o @group_node is protected by the bfqio_cgroup lock, and is accessed
++ *      via RCU from its readers.
++ *    o @bfqd is protected by the queue lock, RCU is used to access it
++ *      from the readers.
++ *    o All the other fields are protected by the @bfqd queue lock.
++ */
++struct bfq_group {
++	struct bfq_entity entity;
++	struct bfq_sched_data sched_data;
++
++	struct hlist_node group_node;
++	struct hlist_node bfqd_node;
++
++	void *bfqd;
++
++	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
++	struct bfq_queue *async_idle_bfqq;
++
++	struct bfq_entity *my_entity;
++};
++
++/**
++ * struct bfqio_cgroup - bfq cgroup data structure.
++ * @css: subsystem state for bfq in the containing cgroup.
++ * @weight: cgroup weight.
++ * @ioprio: cgroup ioprio.
++ * @ioprio_class: cgroup ioprio_class.
++ * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data.
++ * @group_data: list containing the bfq_group belonging to this cgroup.
++ *
++ * @group_data is accessed using RCU, with @lock protecting the updates,
++ * @ioprio and @ioprio_class are protected by @lock.
++ */
++struct bfqio_cgroup {
++	struct cgroup_subsys_state css;
++
++	unsigned short weight, ioprio, ioprio_class;
++
++	spinlock_t lock;
++	struct hlist_head group_data;
++};
++#else
++struct bfq_group {
++	struct bfq_sched_data sched_data;
++
++	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
++	struct bfq_queue *async_idle_bfqq;
++};
++#endif
++
++static inline struct bfq_service_tree *
++bfq_entity_service_tree(struct bfq_entity *entity)
++{
++	struct bfq_sched_data *sched_data = entity->sched_data;
++	unsigned int idx = entity->ioprio_class - 1;
++
++	BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
++	BUG_ON(sched_data == NULL);
++
++	return sched_data->service_tree + idx;
++}
++
++static inline struct bfq_queue *cic_to_bfqq(struct cfq_io_context *cic,
++					    int is_sync)
++{
++	return cic->cfqq[!!is_sync];
++}
++
++static inline void cic_set_bfqq(struct cfq_io_context *cic,
++				struct bfq_queue *bfqq, int is_sync)
++{
++	cic->cfqq[!!is_sync] = bfqq;
++}
++
++static inline void call_for_each_cic(struct io_context *ioc,
++				     void (*func)(struct io_context *,
++				     struct cfq_io_context *))
++{
++	struct cfq_io_context *cic;
++	struct hlist_node *n;
++
++	rcu_read_lock();
++	hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list)
++		func(ioc, cic);
++	rcu_read_unlock();
++}
++
++#define CIC_DEAD_KEY    1ul
++#define CIC_DEAD_INDEX_SHIFT    1
++
++static inline void *bfqd_dead_key(struct bfq_data *bfqd)
++{
++	return (void *)(bfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
++}
++
++/**
++ * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer.
++ * @ptr: a pointer to a bfqd.
++ * @flags: storage for the flags to be saved.
++ *
++ * This function allows cic->key and bfqg->bfqd to be protected by the
++ * queue lock of the bfqd they reference; the pointer is dereferenced
++ * under RCU, so the storage for bfqd is assured to be safe as long
++ * as the RCU read side critical section does not end.  After the
++ * bfqd->queue->queue_lock is taken the pointer is rechecked, to be
++ * sure that no other writer accessed it.  If we raced with a writer,
++ * the function returns NULL, with the queue unlocked, otherwise it
++ * returns the dereferenced pointer, with the queue locked.
++ */
++static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr,
++						   unsigned long *flags)
++{
++	struct bfq_data *bfqd;
++
++	rcu_read_lock();
++	bfqd = rcu_dereference(*(struct bfq_data **)ptr);
++
++	if (bfqd != NULL && !((unsigned long) bfqd & CIC_DEAD_KEY)) {
++		spin_lock_irqsave(bfqd->queue->queue_lock, *flags);
++		if (*ptr == bfqd)
++			goto out;
++		spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
++	}
++
++	bfqd = NULL;
++out:
++	rcu_read_unlock();
++	return bfqd;
++}
++
++static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd,
++				       unsigned long *flags)
++{
++	spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
++}
++
++static void bfq_changed_ioprio(struct io_context *ioc,
++			       struct cfq_io_context *cic);
++static void bfq_put_queue(struct bfq_queue *bfqq);
++static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);
++static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
++				       struct bfq_group *bfqg, int is_sync,
++				       struct io_context *ioc, gfp_t gfp_mask);
++static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
++static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
++#endif
+-- 
+1.7.10.4
+
diff --git a/3.2.34/01patch-2.6.33_atopcnt.patch b/3.2.34/01patch-2.6.33_atopcnt.patch
new file mode 100644
index 0000000..28bf733
--- /dev/null
+++ b/3.2.34/01patch-2.6.33_atopcnt.patch
@@ -0,0 +1,174 @@
+diff --git a/block/blk-core.c b/block/blk-core.c
+index d1a9a0a..8b54acb 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -73,6 +73,17 @@ static void drive_stat_acct(struct request *rq, int new_io)
+ 		part_inc_in_flight(part, rw);
+ 	}
+ 
++ switch (rw) {             /* ATOP */
++   case READ:              /* ATOP */
++     current->group_leader->stat.dsk_rio += new_io;    /* ATOP */
++     current->group_leader->stat.dsk_rsz += blk_rq_sectors(rq);  /* ATOP */
++     break;                /* ATOP */
++   case WRITE:             /* ATOP */
++     current->group_leader->stat.dsk_wio += new_io;          /* ATOP */
++     current->group_leader->stat.dsk_wsz += blk_rq_sectors(rq);  /* ATOP */
++     break;        /* ATOP */
++ }                 /* ATOP */
++
+ 	part_stat_unlock();
+ }
+ 
+diff --git a/fs/proc/array.c b/fs/proc/array.c
+index 13b5d07..cac522e 100644
+--- a/fs/proc/array.c
++++ b/fs/proc/array.c
+@@ -515,6 +515,25 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+ 		(unsigned long long)delayacct_blkio_ticks(task),
+ 		cputime_to_clock_t(gtime),
+ 		cputime_to_clock_t(cgtime));
++
++		seq_printf(m,                                /* ATOP */
++			"%lu %llu %lu %llu %lu %llu %lu "    /* ATOP */
++			"%llu %lu %llu %lu %llu %lu %lu\n",  /* ATOP */
++			task->stat.dsk_rio,                  /* ATOP */
++			task->stat.dsk_rsz,                  /* ATOP */
++			task->stat.dsk_wio,                  /* ATOP */
++			task->stat.dsk_wsz,                  /* ATOP */
++			task->stat.tcp_snd,                  /* ATOP */
++			task->stat.tcp_ssz,                  /* ATOP */
++			task->stat.tcp_rcv,                  /* ATOP */
++			task->stat.tcp_rsz,                  /* ATOP */
++			task->stat.udp_snd,                  /* ATOP */
++			task->stat.udp_ssz,                  /* ATOP */
++			task->stat.udp_rcv,                  /* ATOP */
++			task->stat.udp_rsz,                  /* ATOP */
++			task->stat.raw_snd,                  /* ATOP */
++			task->stat.raw_rcv);                 /* ATOP */
++ 
+ 	if (mm)
+ 		mmput(mm);
+ 	return 0;
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 78efe7c..22391bf 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1512,6 +1512,17 @@ struct task_struct {
+ #endif
+ 	atomic_t fs_excl;	/* holding fs exclusive resources */
+ 	struct rcu_head rcu;
++ 
++	struct {                                   /* ATOP */
++		unsigned long      dsk_rio, dsk_wio;   /* ATOP */
++		unsigned long long dsk_rsz, dsk_wsz;   /* ATOP */
++		unsigned long      tcp_snd, tcp_rcv;   /* ATOP */
++		unsigned long long tcp_ssz, tcp_rsz;   /* ATOP */
++		unsigned long      udp_snd, udp_rcv;   /* ATOP */
++		unsigned long long udp_ssz, udp_rsz;   /* ATOP */
++		unsigned long      raw_snd, raw_rcv;   /* ATOP */
++	} stat;                                    /* ATOP */
++
+ 
+ 	/*
+ 	 * cache last used pipe for splice
+diff --git a/kernel/acct.c b/kernel/acct.c
+index a6605ca..d5df53a 100644
+--- a/kernel/acct.c
++++ b/kernel/acct.c
+@@ -565,7 +565,7 @@ static void do_acct_process(struct bsd_acct_struct *acct,
+ 	ac.ac_exitcode = pacct->ac_exitcode;
+ 	spin_unlock_irq(&current->sighand->siglock);
+ 	ac.ac_io = encode_comp_t(0 /* current->io_usage */);	/* %% */
+-	ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
++  ac.ac_rw = encode_comp_t(current->stat.dsk_rio + current->stat.dsk_wio); /* ATOP */
+ 	ac.ac_swaps = encode_comp_t(0);
+ 
+ 	/*
+diff --git a/kernel/fork.c b/kernel/fork.c
+index f88bd98..bab2085 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -683,6 +683,14 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
+ 
+ 	tsk->min_flt = tsk->maj_flt = 0;
+ 	tsk->nvcsw = tsk->nivcsw = 0;
++	tsk->stat.dsk_rio = tsk->stat.dsk_wio = 0;    /* ATOP */
++	tsk->stat.dsk_rsz = tsk->stat.dsk_wsz = 0;    /* ATOP */
++	tsk->stat.tcp_snd = tsk->stat.tcp_rcv = 0;    /* ATOP */
++	tsk->stat.tcp_ssz = tsk->stat.tcp_rsz = 0;    /* ATOP */
++	tsk->stat.udp_snd = tsk->stat.udp_rcv = 0;    /* ATOP */
++	tsk->stat.udp_ssz = tsk->stat.udp_rsz = 0;    /* ATOP */
++	tsk->stat.raw_snd = tsk->stat.raw_rcv = 0;    /* ATOP */
++
+ #ifdef CONFIG_DETECT_HUNG_TASK
+ 	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+ #endif
+diff --git a/net/socket.c b/net/socket.c
+index 769c386..3ba19f6 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -547,10 +547,28 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+ 	si->size = size;
+ 
+ 	err = security_socket_sendmsg(sock, msg, size);
+-	if (err)
+-		return err;
+-
+-	return sock->ops->sendmsg(iocb, sock, msg, size);
++  if (!err)
++    err = sock->ops->sendmsg(iocb, sock, msg, size);
++
++	if (err >= 0 && sock->sk) {                                   /* ATOP */
++		switch (sock->sk->sk_family) {                        /* ATOP */
++		   case PF_INET:                                      /* ATOP */
++		   case PF_INET6:                                     /* ATOP */
++			switch (sock->sk->sk_type) {                   /* ATOP */
++			   case SOCK_STREAM:                           /* ATOP */
++           current->group_leader->stat.tcp_snd++;    /* ATOP */
++			     current->group_leader->stat.tcp_ssz+=size;/* ATOP */
++			     break;                                    /* ATOP */
++			   case SOCK_DGRAM:                            /* ATOP */
++			     current->group_leader->stat.udp_snd++;    /* ATOP */
++           current->group_leader->stat.udp_ssz+=size;/* ATOP */
++			     break;                                    /* ATOP */
++			   case SOCK_RAW:                              /* ATOP */
++			     current->group_leader->stat.raw_snd++;    /* ATOP */
++		  }                                              /* ATOP */
++   }                                                /* ATOP */
++ }                                                  /* ATOP */
++       return err;
+ }
+ 
+ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+@@ -682,7 +700,29 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+ {
+ 	int err = security_socket_recvmsg(sock, msg, size, flags);
+ 
+-	return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
++	if (!err)
++		err = __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
++
++	if (err >= 0 && sock->sk) {					/* ATOP */
++		switch (sock->sk->sk_family) {				/* ATOP */
++			case PF_INET:					/* ATOP */
++		   	case PF_INET6:					/* ATOP */
++				switch (sock->sk->sk_type) {		/* ATOP */
++			   		case SOCK_STREAM:		/* ATOP */
++			    			current->group_leader->stat.tcp_rcv++;		/* ATOP */
++						current->group_leader->stat.tcp_rsz+=err;	/* ATOP */
++						break;			/* ATOP */
++					case SOCK_DGRAM:		/* ATOP */
++						current->group_leader->stat.udp_rcv++;		/* ATOP */
++						current->group_leader->stat.udp_rsz+=err;	/* ATOP */
++						break;			/* ATOP */
++					case SOCK_RAW:			/* ATOP */
++						current->group_leader->stat.raw_rcv++;		/* ATOP */
++						break;			/* ATOP */
++				}					/* ATOP */
++		}							/* ATOP */
++	}								/* ATOP */
++	return err;
+ }
+ 
+ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
diff --git a/3.2.34/02patch-2.6.33_atopacct.patch b/3.2.34/02patch-2.6.33_atopacct.patch
new file mode 100644
index 0000000..74e6a1c
--- /dev/null
+++ b/3.2.34/02patch-2.6.33_atopacct.patch
@@ -0,0 +1,125 @@
+Index: linux-2.6.28/include/linux/acct.h
+===================================================================
+--- linux-2.6.28.orig/include/linux/acct.h	2009-01-14 13:02:24.000000000 +0100
++++ linux-2.6.28/include/linux/acct.h	2009-01-14 13:03:33.000000000 +0100
+@@ -97,6 +97,54 @@
+ 	char		ac_comm[ACCT_COMM];	/* Command Name */
+ };
+ 
++struct acct_atop
++{
++	char		ac_flag;		/* Flags */
++	char		ac_version;		/* Always set to ACCT_VERSION */
++	__u32		ac_pid;			/* Process ID */
++	__u32		ac_ppid;		/* Parent Process ID */
++	__u16		ac_uid16;		/* LSB of Real User ID */
++	__u16		ac_gid16;		/* LSB of Real Group ID */
++	__u16		ac_tty;			/* Control Terminal */
++	__u32		ac_btime;		/* Process Creation Time */
++	comp_t		ac_utime;		/* User Time */
++	comp_t		ac_stime;		/* System Time */
++	comp_t		ac_etime;		/* Elapsed Time */
++	comp_t		ac_mem;			/* Virtual  Memory */
++	comp_t		ac_rss;			/* Resident Memory */
++	comp_t		ac_io;			/* Chars Transferred */
++	comp_t		ac_rw;			/* Blocks Read or Written */
++	comp_t		ac_bread;		/* Blocks Read */
++	comp_t		ac_bwrite;		/* Blocks Written */
++	comp2_t		ac_dskrsz;		/* Cum. blocks read */
++	comp2_t		ac_dskwsz;		/* Cum. blocks written */
++	comp_t		ac_tcpsnd;		/* TCP send requests */
++	comp_t		ac_tcprcv;		/* TCP recv requests */
++	comp2_t		ac_tcpssz;		/* TCP cum. length   */
++	comp2_t		ac_tcprsz;		/* TCP cum. length   */
++	comp_t		ac_udpsnd;		/* UDP send requests */
++	comp_t		ac_udprcv;		/* UDP recv requests */
++	comp2_t		ac_udpssz;		/* UDP cum. length   */
++	comp2_t		ac_udprsz;		/* UDP cum. length   */
++	comp_t		ac_rawsnd;		/* RAW send requests */
++	comp_t		ac_rawrcv;		/* RAW recv requests */
++	comp_t		ac_minflt;		/* Minor Pagefaults */
++	comp_t		ac_majflt;		/* Major Pagefaults */
++	comp_t		ac_swaps;		/* Number of Swaps */
++/* m68k had no padding here. */
++#if !defined(CONFIG_M68K) || !defined(__KERNEL__)
++	__u16		ac_ahz;			/* AHZ */
++#endif
++	__u32		ac_exitcode;		/* Exitcode */
++	char		ac_comm[ACCT_COMM + 1];	/* Command Name */
++	__u8		ac_etime_hi;		/* Elapsed Time MSB */
++	__u16		ac_etime_lo;		/* Elapsed Time LSB */
++	__u32		ac_uid;			/* Real User ID */
++	__u32		ac_gid;			/* Real Group ID */
++};
++
++
++
+ /*
+  *  accounting flags
+  */
+@@ -146,7 +194,13 @@
+  * 5: new binary incompatible format (128 bytes, second half)
+  *
+  */
++#define CONFIG_PROCESS_ACCT_ATOP
+ 
++#ifdef CONFIG_PROCESS_ACCT_ATOP
++#define ACCT_VERSION	6
++#define AHZ		(USER_HZ)
++typedef struct acct_atop acct_t;
++#else
+ #ifdef CONFIG_BSD_PROCESS_ACCT_V3
+ #define ACCT_VERSION	3
+ #define AHZ		100
+@@ -160,6 +214,7 @@
+ #define AHZ		(USER_HZ)
+ typedef struct acct acct_t;
+ #endif
++#endif
+ 
+ #else
+ #define ACCT_VERSION	2
+Index: linux-2.6.28/kernel/acct.c
+===================================================================
+--- linux-2.6.28.orig/kernel/acct.c	2009-01-14 13:03:31.000000000 +0100
++++ linux-2.6.28/kernel/acct.c	2009-01-14 13:03:33.000000000 +0100
+@@ -405,7 +405,7 @@
+ 	return exp;
+ }
+ 
+-#if ACCT_VERSION==1 || ACCT_VERSION==2
++#if ACCT_VERSION==1 || ACCT_VERSION==2 || ACCT_VERSION==6
+ /*
+  * encode an u64 into a comp2_t (24 bits)
+  *
+@@ -552,6 +552,30 @@
+ 	ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
+ 	rcu_read_unlock();
+ #endif
++#if ACCT_VERSION==6        /* ATOP */
++	ac.ac_pid    = current->pid;
++	ac.ac_ppid   = current->parent->pid;
++       ac.ac_uid16  = ac.ac_uid;
++       ac.ac_gid16  = ac.ac_gid;
++	ac.ac_ahz    = AHZ;
++	ac.ac_bread  = encode_comp_t(current->stat.dsk_rio);
++	ac.ac_bwrite = encode_comp_t(current->stat.dsk_wio);
++	ac.ac_dskrsz = encode_comp2_t(current->stat.dsk_rsz);
++	ac.ac_dskwsz = encode_comp2_t(current->stat.dsk_wsz);
++	ac.ac_tcpsnd = encode_comp_t(current->stat.tcp_snd);
++	ac.ac_tcprcv = encode_comp_t(current->stat.tcp_rcv);
++	ac.ac_tcpssz = encode_comp2_t(current->stat.tcp_ssz);
++	ac.ac_tcprsz = encode_comp2_t(current->stat.tcp_rsz);
++	ac.ac_udpsnd = encode_comp_t(current->stat.udp_snd);
++	ac.ac_udprcv = encode_comp_t(current->stat.udp_rcv);
++	ac.ac_udpssz = encode_comp2_t(current->stat.udp_ssz);
++	ac.ac_udprsz = encode_comp2_t(current->stat.udp_rsz);
++	ac.ac_rawsnd = encode_comp_t(current->stat.raw_snd);
++	ac.ac_rawrcv = encode_comp_t(current->stat.raw_rcv);
++	ac.ac_rss    = current->mm ?
++		encode_comp_t(get_mm_rss(current->mm)<<(PAGE_SHIFT-10)) :
++		encode_comp_t(0);
++#endif
+ 
+ 	spin_lock_irq(&current->sighand->siglock);
+ 	tty = current->signal->tty;	/* Safe as we hold the siglock */
diff --git a/3.2.34/3.2.0-ck1.patch b/3.2.34/3.2.0-ck1.patch
new file mode 100644
index 0000000..a81b2c4
--- /dev/null
+++ b/3.2.34/3.2.0-ck1.patch
@@ -0,0 +1,9093 @@
+Index: linux-3.2-ck1/arch/powerpc/platforms/cell/spufs/sched.c
+===================================================================
+--- linux-3.2-ck1.orig/arch/powerpc/platforms/cell/spufs/sched.c	2012-01-16 10:07:27.897097267 +1100
++++ linux-3.2-ck1/arch/powerpc/platforms/cell/spufs/sched.c	2012-01-16 10:07:31.336097029 +1100
+@@ -63,11 +63,6 @@ static struct timer_list spusched_timer;
+ static struct timer_list spuloadavg_timer;
+ 
+ /*
+- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
+- */
+-#define NORMAL_PRIO		120
+-
+-/*
+  * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
+  * tick for every 10 CPU scheduler ticks.
+  */
+Index: linux-3.2-ck1/Documentation/scheduler/sched-BFS.txt
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ linux-3.2-ck1/Documentation/scheduler/sched-BFS.txt	2012-01-16 10:07:31.336097029 +1100
+@@ -0,0 +1,347 @@
++BFS - The Brain Fuck Scheduler by Con Kolivas.
++
++Goals.
++
++The goal of the Brain Fuck Scheduler, referred to as BFS from here on, is to
++completely do away with the complex designs of the past for the cpu process
++scheduler and instead implement one that is very simple in basic design.
++The main focus of BFS is to achieve excellent desktop interactivity and
++responsiveness without heuristics and tuning knobs that are difficult to
++understand, impossible to model and predict the effect of, and when tuned to
++one workload cause massive detriment to another.
++
++
++Design summary.
++
++BFS is best described as a single runqueue, O(n) lookup, earliest effective
++virtual deadline first design, loosely based on EEVDF (earliest eligible virtual
++deadline first) and my previous Staircase Deadline scheduler. Each component
++shall be described in order to understand the significance of, and reasoning for
++it. The codebase when the first stable version was released was approximately
++9000 lines less code than the existing mainline linux kernel scheduler (in
++2.6.31). This does not even take into account the removal of documentation and
++the cgroups code that is not used.
++
++Design reasoning.
++
++The single runqueue refers to the queued but not running processes for the
++entire system, regardless of the number of CPUs. The reason for going back to
++a single runqueue design is that once multiple runqueues are introduced,
++per-CPU or otherwise, there will be complex interactions as each runqueue will
++be responsible for the scheduling latency and fairness of the tasks only on its
++own runqueue, and to achieve fairness and low latency across multiple CPUs, any
++advantage in throughput of having CPU local tasks causes other disadvantages.
++This is due to requiring a very complex balancing system to at best achieve some
++semblance of fairness across CPUs and can only maintain relatively low latency
++for tasks bound to the same CPUs, not across them. To increase said fairness
++and latency across CPUs, the advantage of local runqueue locking, which makes
++for better scalability, is lost due to having to grab multiple locks.
++
++A significant feature of BFS is that all accounting is done purely based on CPU
++used and nowhere is sleep time used in any way to determine entitlement or
++interactivity. Interactivity "estimators" that use some kind of sleep/run
++algorithm are doomed to fail to detect all interactive tasks, and to falsely tag
++tasks that aren't interactive as being so. The reason for this is that it is
++close to impossible to determine that when a task is sleeping, whether it is
++doing it voluntarily, as in a userspace application waiting for input in the
++form of a mouse click or otherwise, or involuntarily, because it is waiting for
++another thread, process, I/O, kernel activity or whatever. Thus, such an
++estimator will introduce corner cases, and more heuristics will be required to
++cope with those corner cases, introducing more corner cases and failed
++interactivity detection and so on. Interactivity in BFS is built into the design
++by virtue of the fact that tasks that are waking up have not used up their quota
++of CPU time, and have earlier effective deadlines, thereby making it very likely
++they will preempt any CPU bound task of equivalent nice level. See below for
++more information on the virtual deadline mechanism. Even if they do not preempt
++a running task, because the rr interval is guaranteed to have a bound upper
++limit on how long a task will wait for, it will be scheduled within a timeframe
++that will not cause visible interface jitter.
++
++
++Design details.
++
++Task insertion.
++
++BFS inserts tasks into each relevant queue as an O(1) insertion into a double
++linked list. On insertion, *every* running queue is checked to see if the newly
++queued task can run on any idle queue, or preempt the lowest running task on the
++system. This is how the cross-CPU scheduling of BFS achieves significantly lower
++latency per extra CPU the system has. In this case the lookup is, in the worst
++case scenario, O(n) where n is the number of CPUs on the system.
++
++Data protection.
++
++BFS has one single lock protecting the process local data of every task in the
++global queue. Thus every insertion, removal and modification of task data in the
++global runqueue needs to grab the global lock. However, once a task is taken by
++a CPU, the CPU has its own local data copy of the running process' accounting
++information which only that CPU accesses and modifies (such as during a
++timer tick) thus allowing the accounting data to be updated lockless. Once a
++CPU has taken a task to run, it removes it from the global queue. Thus the
++global queue only ever has, at most,
++
++	(number of tasks requesting cpu time) - (number of logical CPUs) + 1
++
++tasks in the global queue. This value is relevant for the time taken to look up
++tasks during scheduling. This will increase if many tasks with CPU affinity set
++in their policy to limit which CPUs they're allowed to run on if they outnumber
++the number of CPUs. The +1 is because when rescheduling a task, the CPU's
++currently running task is put back on the queue. Lookup will be described after
++the virtual deadline mechanism is explained.
++
++Virtual deadline.
++
++The key to achieving low latency, scheduling fairness, and "nice level"
++distribution in BFS is entirely in the virtual deadline mechanism. The one
++tunable in BFS is the rr_interval, or "round robin interval". This is the
++maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy)
++tasks of the same nice level will be running for, or looking at it the other
++way around, the longest duration two tasks of the same nice level will be
++delayed for. When a task requests cpu time, it is given a quota (time_slice)
++equal to the rr_interval and a virtual deadline. The virtual deadline is
++offset from the current time in jiffies by this equation:
++
++	jiffies + (prio_ratio * rr_interval)
++
++The prio_ratio is determined as a ratio compared to the baseline of nice -20
++and increases by 10% per nice level. The deadline is a virtual one only in that
++no guarantee is placed that a task will actually be scheduled by this time, but
++it is used to compare which task should go next. There are three components to
++how a task is next chosen. First is time_slice expiration. If a task runs out
++of its time_slice, it is descheduled, the time_slice is refilled, and the
++deadline reset to that formula above. Second is sleep, where a task no longer
++is requesting CPU for whatever reason. The time_slice and deadline are _not_
++adjusted in this case and are just carried over for when the task is next
++scheduled. Third is preemption, and that is when a newly waking task is deemed
++higher priority than a currently running task on any cpu by virtue of the fact
++that it has an earlier virtual deadline than the currently running task. The
++earlier deadline is the key to which task is next chosen for the first and
++second cases. Once a task is descheduled, it is put back on the queue, and an
++O(n) lookup of all queued-but-not-running tasks is done to determine which has
++the earliest deadline and that task is chosen to receive CPU next.
++
++The CPU proportion of different nice tasks works out to be approximately the
++
++	(prio_ratio difference)^2
++
++The reason it is squared is that a task's deadline does not change while it is
++running unless it runs out of time_slice. Thus, even if the time actually
++passes the deadline of another task that is queued, it will not get CPU time
++unless the current running task deschedules, and the time "base" (jiffies) is
++constantly moving.
++
++Task lookup.
++
++BFS has 103 priority queues. 100 of these are dedicated to the static priority
++of realtime tasks, and the remaining 3 are, in order of best to worst priority,
++SCHED_ISO (isochronous), SCHED_NORMAL, and SCHED_IDLEPRIO (idle priority
++scheduling). When a task of these priorities is queued, a bitmap of running
++priorities is set showing which of these priorities has tasks waiting for CPU
++time. When a CPU is made to reschedule, the lookup for the next task to get
++CPU time is performed in the following way:
++
++First the bitmap is checked to see what static priority tasks are queued. If
++any realtime priorities are found, the corresponding queue is checked and the
++first task listed there is taken (provided CPU affinity is suitable) and lookup
++is complete. If the priority corresponds to a SCHED_ISO task, they are also
++taken in FIFO order (as they behave like SCHED_RR). If the priority corresponds
++to either SCHED_NORMAL or SCHED_IDLEPRIO, then the lookup becomes O(n). At this
++stage, every task in the runlist that corresponds to that priority is checked
++to see which has the earliest set deadline, and (provided it has suitable CPU
++affinity) it is taken off the runqueue and given the CPU. If a task has an
++expired deadline, it is taken and the rest of the lookup aborted (as they are
++chosen in FIFO order).
++
++Thus, the lookup is O(n) in the worst case only, where n is as described
++earlier, as tasks may be chosen before the whole task list is looked over.
++
++
++Scalability.
++
++The major limitations of BFS will be that of scalability, as the separate
++runqueue designs will have less lock contention as the number of CPUs rises.
++However they do not scale linearly even with separate runqueues as multiple
++runqueues will need to be locked concurrently on such designs to be able to
++achieve fair CPU balancing, to try and achieve some sort of nice-level fairness
++across CPUs, and to achieve low enough latency for tasks on a busy CPU when
++other CPUs would be more suited. BFS has the advantage that it requires no
++balancing algorithm whatsoever, as balancing occurs by proxy simply because
++all CPUs draw off the global runqueue, in priority and deadline order. Despite
++the fact that scalability is _not_ the prime concern of BFS, it both shows very
++good scalability to smaller numbers of CPUs and is likely a more scalable design
++at these numbers of CPUs.
++
++It also has some very low overhead scalability features built into the design
++when it has been deemed their overhead is so marginal that they're worth adding.
++The first is the local copy of the running process' data to the CPU it's running
++on to allow that data to be updated lockless where possible. Then there is
++deference paid to the last CPU a task was running on, by trying that CPU first
++when looking for an idle CPU to use the next time it's scheduled. Finally there
++is the notion of "sticky" tasks that are flagged when they are involuntarily
++descheduled, meaning they still want further CPU time. This sticky flag is
++used to bias heavily against those tasks being scheduled on a different CPU
++unless that CPU would be otherwise idle. When a cpu frequency governor is used
++that scales with CPU load, such as ondemand, sticky tasks are not scheduled
++on a different CPU at all, preferring instead to go idle. This means the CPU
++they were bound to is more likely to increase its speed while the other CPU
++will go idle, thus speeding up total task execution time and likely decreasing
++power usage. This is the only scenario where BFS will allow a CPU to go idle
++in preference to scheduling a task on the earliest available spare CPU.
++
++The real cost of migrating a task from one CPU to another is entirely dependant
++on the cache footprint of the task, how cache intensive the task is, how long
++it's been running on that CPU to take up the bulk of its cache, how big the CPU
++cache is, how fast and how layered the CPU cache is, how fast a context switch
++is... and so on. In other words, it's close to random in the real world where we
++do more than just one sole workload. The only thing we can be sure of is that
++it's not free. So BFS uses the principle that an idle CPU is a wasted CPU and
++utilising idle CPUs is more important than cache locality, and cache locality
++only plays a part after that.
++
++When choosing an idle CPU for a waking task, the cache locality is determined
++according to where the task last ran and then idle CPUs are ranked from best
++to worst to choose the most suitable idle CPU based on cache locality, NUMA
++node locality and hyperthread sibling business. They are chosen in the
++following preference (if idle):
++
++* Same core, idle or busy cache, idle threads
++* Other core, same cache, idle or busy cache, idle threads.
++* Same node, other CPU, idle cache, idle threads.
++* Same node, other CPU, busy cache, idle threads.
++* Same core, busy threads.
++* Other core, same cache, busy threads.
++* Same node, other CPU, busy threads.
++* Other node, other CPU, idle cache, idle threads.
++* Other node, other CPU, busy cache, idle threads.
++* Other node, other CPU, busy threads.
++
++This shows the SMT or "hyperthread" awareness in the design as well which will
++choose a real idle core first before a logical SMT sibling which already has
++tasks on the physical CPU.
++
++Early benchmarking of BFS suggested scalability dropped off at the 16 CPU mark.
++However this benchmarking was performed on an earlier design that was far less
++scalable than the current one so it's hard to know how scalable it is in terms
++of both CPUs (due to the global runqueue) and heavily loaded machines (due to
++O(n) lookup) at this stage. Note that in terms of scalability, the number of
++_logical_ CPUs matters, not the number of _physical_ CPUs. Thus, a dual (2x)
++quad core (4X) hyperthreaded (2X) machine is effectively a 16X. Newer benchmark
++results are very promising indeed, without needing to tweak any knobs, features
++or options. Benchmark contributions are most welcome.
++
++
++Features
++
++As the initial prime target audience for BFS was the average desktop user, it
++was designed to not need tweaking, tuning or have features set to obtain benefit
++from it. Thus the number of knobs and features has been kept to an absolute
++minimum and should not require extra user input for the vast majority of cases.
++There are precisely 2 tunables, and 2 extra scheduling policies. The rr_interval
++and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO policies. In addition
++to this, BFS also uses sub-tick accounting. What BFS does _not_ now feature is
++support for CGROUPS. The average user should neither need to know what these
++are, nor should they need to be using them to have good desktop behaviour.
++
++rr_interval
++
++There is only one "scheduler" tunable, the round robin interval. This can be
++accessed in
++
++	/proc/sys/kernel/rr_interval
++
++The value is in milliseconds, and the default value is set to 6ms. Valid values
++are from 1 to 1000. Decreasing the value will decrease latencies at the cost of
++decreasing throughput, while increasing it will improve throughput, but at the
++cost of worsening latencies. The accuracy of the rr interval is limited by HZ
++resolution of the kernel configuration. Thus, the worst case latencies are
++usually slightly higher than this actual value. BFS uses "dithering" to try and
++minimise the effect the Hz limitation has. The default value of 6 is not an
++arbitrary one. It is based on the fact that humans can detect jitter at
++approximately 7ms, so aiming for much lower latencies is pointless under most
++circumstances. It is worth noting this fact when comparing the latency
++performance of BFS to other schedulers. Worst case latencies being higher than
++7ms are far worse than average latencies not being in the microsecond range.
++Experimentation has shown that rr intervals being increased up to 300 can
++improve throughput but beyond that, scheduling noise from elsewhere prevents
++further demonstrable throughput.
++
++Isochronous scheduling.
++
++Isochronous scheduling is a unique scheduling policy designed to provide
++near-real-time performance to unprivileged (ie non-root) users without the
++ability to starve the machine indefinitely. Isochronous tasks (which means
++"same time") are set using, for example, the schedtool application like so:
++
++	schedtool -I -e amarok
++
++This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works
++is that it has a priority level between true realtime tasks and SCHED_NORMAL
++which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie,
++if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval
++rate). However if ISO tasks run for more than a tunable finite amount of time,
++they are then demoted back to SCHED_NORMAL scheduling. This finite amount of
++time is the percentage of _total CPU_ available across the machine, configurable
++as a percentage in the following "resource handling" tunable (as opposed to a
++scheduler tunable):
++
++	/proc/sys/kernel/iso_cpu
++
++and is set to 70% by default. It is calculated over a rolling 5 second average
++Because it is the total CPU available, it means that on a multi CPU machine, it
++is possible to have an ISO task running as realtime scheduling indefinitely on
++just one CPU, as the other CPUs will be available. Setting this to 100 is the
++equivalent of giving all users SCHED_RR access and setting it to 0 removes the
++ability to run any pseudo-realtime tasks.
++
++A feature of BFS is that it detects when an application tries to obtain a
++realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the
++appropriate privileges to use those policies. When it detects this, it will
++give the task SCHED_ISO policy instead. Thus it is transparent to the user.
++Because some applications constantly set their policy as well as their nice
++level, there is potential for them to undo the override specified by the user
++on the command line of setting the policy to SCHED_ISO. To counter this, once
++a task has been set to SCHED_ISO policy, it needs superuser privileges to set
++it back to SCHED_NORMAL. This will ensure the task remains ISO and all child
++processes and threads will also inherit the ISO policy.
++
++Idleprio scheduling.
++
++Idleprio scheduling is a scheduling policy designed to give out CPU to a task
++_only_ when the CPU would be otherwise idle. The idea behind this is to allow
++ultra low priority tasks to be run in the background that have virtually no
++effect on the foreground tasks. This is ideally suited to distributed computing
++clients (like setiathome, folding, mprime etc) but can also be used to start
++a video encode or so on without any slowdown of other tasks. To avoid this
++policy from grabbing shared resources and holding them indefinitely, if it
++detects a state where the task is waiting on I/O, the machine is about to
++suspend to ram and so on, it will transiently schedule them as SCHED_NORMAL. As
++per the Isochronous task management, once a task has been scheduled as IDLEPRIO,
++it cannot be put back to SCHED_NORMAL without superuser privileges. Tasks can
++be set to start as SCHED_IDLEPRIO with the schedtool command like so:
++
++	schedtool -D -e ./mprime
++
++Subtick accounting.
++
++It is surprisingly difficult to get accurate CPU accounting, and in many cases,
++the accounting is done by simply determining what is happening at the precise
++moment a timer tick fires off. This becomes increasingly inaccurate as the
++timer tick frequency (HZ) is lowered. It is possible to create an application
++which uses almost 100% CPU, yet by being descheduled at the right time, records
++zero CPU usage. While the main problem with this is that there are possible
++security implications, it is also difficult to determine how much CPU a task
++really does use. BFS tries to use the sub-tick accounting from the TSC clock,
++where possible, to determine real CPU usage. This is not entirely reliable, but
++is far more likely to produce accurate CPU usage data than the existing designs
++and will not show tasks as consuming no CPU usage when they actually are. Thus,
++the amount of CPU reported as being used by BFS will more accurately represent
++how much CPU the task itself is using (as is shown for example by the 'time'
++application), so the reported values may be quite different to other schedulers.
++Values reported as the 'load' are more prone to problems with this design, but
++per process values are closer to real usage. When comparing throughput of BFS
++to other designs, it is important to compare the actual completed work in terms
++of total wall clock time taken and total work done, rather than the reported
++"cpu usage".
++
++
++Con Kolivas <kernel@kolivas.org> Tue, 5 Apr 2011
+Index: linux-3.2-ck1/Documentation/sysctl/kernel.txt
+===================================================================
+--- linux-3.2-ck1.orig/Documentation/sysctl/kernel.txt	2012-01-16 10:07:27.895097268 +1100
++++ linux-3.2-ck1/Documentation/sysctl/kernel.txt	2012-01-16 10:07:31.336097029 +1100
+@@ -33,6 +33,7 @@ show up in /proc/sys/kernel:
+ - domainname
+ - hostname
+ - hotplug
++- iso_cpu
+ - kptr_restrict
+ - kstack_depth_to_print       [ X86 only ]
+ - l2cr                        [ PPC only ]
+@@ -58,6 +59,7 @@ show up in /proc/sys/kernel:
+ - randomize_va_space
+ - real-root-dev               ==> Documentation/initrd.txt
+ - reboot-cmd                  [ SPARC only ]
++- rr_interval
+ - rtsig-max
+ - rtsig-nr
+ - sem
+@@ -300,6 +302,16 @@ kernel stack.
+ 
+ ==============================================================
+ 
++iso_cpu: (BFS CPU scheduler only).
++
++This sets the percentage cpu that the unprivileged SCHED_ISO tasks can
++run effectively at realtime priority, averaged over a rolling five
++seconds over the -whole- system, meaning all cpus.
++
++Set to 70 (percent) by default.
++
++==============================================================
++
+ l2cr: (PPC only)
+ 
+ This flag controls the L2 cache of G3 processor boards. If
+@@ -495,6 +507,20 @@ rebooting. ???
+ 
+ ==============================================================
+ 
++rr_interval: (BFS CPU scheduler only)
++
++This is the smallest duration that any cpu process scheduling unit
++will run for. Increasing this value can increase throughput of cpu
++bound tasks substantially but at the expense of increased latencies
++overall. Conversely decreasing it will decrease average and maximum
++latencies but at the expense of throughput. This value is in
++milliseconds and the default value chosen depends on the number of
++cpus available at scheduler initialisation with a minimum of 6.
++
++Valid values are from 1-1000.
++
++==============================================================
++
+ rtsig-max & rtsig-nr:
+ 
+ The file rtsig-max can be used to tune the maximum number
+Index: linux-3.2-ck1/fs/proc/base.c
+===================================================================
+--- linux-3.2-ck1.orig/fs/proc/base.c	2012-01-16 10:07:27.896097267 +1100
++++ linux-3.2-ck1/fs/proc/base.c	2012-01-16 10:07:31.337097029 +1100
+@@ -411,7 +411,7 @@ static int proc_pid_stack(struct seq_fil
+ static int proc_pid_schedstat(struct task_struct *task, char *buffer)
+ {
+ 	return sprintf(buffer, "%llu %llu %lu\n",
+-			(unsigned long long)task->se.sum_exec_runtime,
++			(unsigned long long)tsk_seruntime(task),
+ 			(unsigned long long)task->sched_info.run_delay,
+ 			task->sched_info.pcount);
+ }
+Index: linux-3.2-ck1/include/linux/init_task.h
+===================================================================
+--- linux-3.2-ck1.orig/include/linux/init_task.h	2012-01-16 10:07:27.896097267 +1100
++++ linux-3.2-ck1/include/linux/init_task.h	2012-01-16 10:07:31.337097029 +1100
+@@ -126,12 +126,70 @@ extern struct cred init_cred;
+ # define INIT_PERF_EVENTS(tsk)
+ #endif
+ 
+-#define INIT_TASK_COMM "swapper"
+-
+ /*
+  *  INIT_TASK is used to set up the first task table, touch at
+  * your own risk!. Base=0, limit=0x1fffff (=2MB)
+  */
++#ifdef CONFIG_SCHED_BFS
++#define INIT_TASK_COMM "BFS"
++#define INIT_TASK(tsk)	\
++{									\
++	.state		= 0,						\
++	.stack		= &init_thread_info,				\
++	.usage		= ATOMIC_INIT(2),				\
++	.flags		= PF_KTHREAD,					\
++	.prio		= NORMAL_PRIO,					\
++	.static_prio	= MAX_PRIO-20,					\
++	.normal_prio	= NORMAL_PRIO,					\
++	.deadline	= 0,						\
++	.policy		= SCHED_NORMAL,					\
++	.cpus_allowed	= CPU_MASK_ALL,					\
++	.mm		= NULL,						\
++	.active_mm	= &init_mm,					\
++	.run_list	= LIST_HEAD_INIT(tsk.run_list),			\
++	.time_slice	= HZ,					\
++	.tasks		= LIST_HEAD_INIT(tsk.tasks),			\
++	INIT_PUSHABLE_TASKS(tsk)					\
++	.ptraced	= LIST_HEAD_INIT(tsk.ptraced),			\
++	.ptrace_entry	= LIST_HEAD_INIT(tsk.ptrace_entry),		\
++	.real_parent	= &tsk,						\
++	.parent		= &tsk,						\
++	.children	= LIST_HEAD_INIT(tsk.children),			\
++	.sibling	= LIST_HEAD_INIT(tsk.sibling),			\
++	.group_leader	= &tsk,						\
++	RCU_INIT_POINTER(.real_cred, &init_cred),			\
++	RCU_INIT_POINTER(.cred, &init_cred),				\
++	.comm		= INIT_TASK_COMM,				\
++	.thread		= INIT_THREAD,					\
++	.fs		= &init_fs,					\
++	.files		= &init_files,					\
++	.signal		= &init_signals,				\
++	.sighand	= &init_sighand,				\
++	.nsproxy	= &init_nsproxy,				\
++	.pending	= {						\
++		.list = LIST_HEAD_INIT(tsk.pending.list),		\
++		.signal = {{0}}},					\
++	.blocked	= {{0}},					\
++	.alloc_lock	= __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),		\
++	.journal_info	= NULL,						\
++	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
++	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),		\
++	.timer_slack_ns = 50000, /* 50 usec default slack */		\
++	.pids = {							\
++		[PIDTYPE_PID]  = INIT_PID_LINK(PIDTYPE_PID),		\
++		[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),		\
++		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
++	},								\
++	INIT_IDS							\
++	INIT_PERF_EVENTS(tsk)						\
++	INIT_TRACE_IRQFLAGS						\
++	INIT_LOCKDEP							\
++	INIT_FTRACE_GRAPH						\
++	INIT_TRACE_RECURSION						\
++	INIT_TASK_RCU_PREEMPT(tsk)					\
++}
++#else /* CONFIG_SCHED_BFS */
++#define INIT_TASK_COMM "swapper"
+ #define INIT_TASK(tsk)	\
+ {									\
+ 	.state		= 0,						\
+@@ -194,7 +252,7 @@ extern struct cred init_cred;
+ 	INIT_TRACE_RECURSION						\
+ 	INIT_TASK_RCU_PREEMPT(tsk)					\
+ }
+-
++#endif /* CONFIG_SCHED_BFS */
+ 
+ #define INIT_CPU_TIMERS(cpu_timers)					\
+ {									\
+Index: linux-3.2-ck1/include/linux/ioprio.h
+===================================================================
+--- linux-3.2-ck1.orig/include/linux/ioprio.h	2012-01-16 10:07:27.896097267 +1100
++++ linux-3.2-ck1/include/linux/ioprio.h	2012-01-16 10:07:31.338097029 +1100
+@@ -64,6 +64,8 @@ static inline int task_ioprio_class(stru
+ 
+ static inline int task_nice_ioprio(struct task_struct *task)
+ {
++	if (iso_task(task))
++		return 0;
+ 	return (task_nice(task) + 20) / 5;
+ }
+ 
+Index: linux-3.2-ck1/include/linux/sched.h
+===================================================================
+--- linux-3.2-ck1.orig/include/linux/sched.h	2012-01-16 10:07:27.896097267 +1100
++++ linux-3.2-ck1/include/linux/sched.h	2012-01-16 10:07:32.577096941 +1100
+@@ -37,8 +37,15 @@
+ #define SCHED_FIFO		1
+ #define SCHED_RR		2
+ #define SCHED_BATCH		3
+-/* SCHED_ISO: reserved but not implemented yet */
++/* SCHED_ISO: Implemented on BFS only */
+ #define SCHED_IDLE		5
++#define SCHED_IDLEPRIO		SCHED_IDLE
++#ifdef CONFIG_SCHED_BFS
++#define SCHED_ISO		4
++#define SCHED_MAX		(SCHED_IDLEPRIO)
++#define SCHED_RANGE(policy)	((policy) <= SCHED_MAX)
++#endif
++
+ /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
+ #define SCHED_RESET_ON_FORK     0x40000000
+ 
+@@ -269,8 +276,6 @@ extern asmlinkage void schedule_tail(str
+ extern void init_idle(struct task_struct *idle, int cpu);
+ extern void init_idle_bootup_task(struct task_struct *idle);
+ 
+-extern int runqueue_is_locked(int cpu);
+-
+ #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
+ extern void select_nohz_load_balancer(int stop_tick);
+ extern int get_nohz_timer_target(void);
+@@ -1226,15 +1231,31 @@ struct task_struct {
+ 
+ #ifdef CONFIG_SMP
+ 	struct llist_node wake_entry;
+-	int on_cpu;
+ #endif
+-	int on_rq;
++#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_BFS)
++	bool on_cpu;
++#endif
++#ifndef CONFIG_SCHED_BFS
++	bool on_rq;
++#endif
+ 
+ 	int prio, static_prio, normal_prio;
+ 	unsigned int rt_priority;
++#ifdef CONFIG_SCHED_BFS
++	int time_slice;
++	u64 deadline;
++	struct list_head run_list;
++	u64 last_ran;
++	u64 sched_time; /* sched_clock time spent running */
++#ifdef CONFIG_SMP
++	bool sticky; /* Soft affined flag */
++#endif
++	unsigned long rt_timeout;
++#else /* CONFIG_SCHED_BFS */
+ 	const struct sched_class *sched_class;
+ 	struct sched_entity se;
+ 	struct sched_rt_entity rt;
++#endif
+ 
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+ 	/* list of struct preempt_notifier: */
+@@ -1341,6 +1362,9 @@ struct task_struct {
+ 	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */
+ 
+ 	cputime_t utime, stime, utimescaled, stimescaled;
++#ifdef CONFIG_SCHED_BFS
++	unsigned long utime_pc, stime_pc;
++#endif
+ 	cputime_t gtime;
+ #ifndef CONFIG_VIRT_CPU_ACCOUNTING
+ 	cputime_t prev_utime, prev_stime;
+@@ -1574,6 +1598,67 @@ struct task_struct {
+ #endif
+ };
+ 
++#ifdef CONFIG_SCHED_BFS
++bool grunqueue_is_locked(void);
++void grq_unlock_wait(void);
++void cpu_scaling(int cpu);
++void cpu_nonscaling(int cpu);
++int above_background_load(void);
++#define tsk_seruntime(t)		((t)->sched_time)
++#define tsk_rttimeout(t)		((t)->rt_timeout)
++
++static inline void tsk_cpus_current(struct task_struct *p)
++{
++}
++
++static inline int runqueue_is_locked(int cpu)
++{
++	return grunqueue_is_locked();
++}
++
++static inline void print_scheduler_version(void)
++{
++	printk(KERN_INFO"BFS CPU scheduler v0.416 by Con Kolivas.\n");
++}
++
++static inline bool iso_task(struct task_struct *p)
++{
++	return (p->policy == SCHED_ISO);
++}
++#else /* CFS */
++extern int runqueue_is_locked(int cpu);
++static inline void cpu_scaling(int cpu)
++{
++}
++
++static inline void cpu_nonscaling(int cpu)
++{
++}
++#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
++#define tsk_rttimeout(t)	((t)->rt.timeout)
++
++static inline void tsk_cpus_current(struct task_struct *p)
++{
++	p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
++}
++
++static inline void print_scheduler_version(void)
++{
++	printk(KERN_INFO"CFS CPU scheduler.\n");
++}
++
++static inline bool iso_task(struct task_struct *p)
++{
++	return false;
++}
++
++/* Anyone feel like implementing this? */
++static inline int above_background_load(void)
++{
++	return 1;
++}
++#endif /* CONFIG_SCHED_BFS */
++
+ /* Future-safe accessor for struct task_struct's cpus_allowed. */
+ #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
+ 
+@@ -1591,10 +1676,20 @@ struct task_struct {
+  */
+ 
+ #define MAX_USER_RT_PRIO	100
+-#define MAX_RT_PRIO		MAX_USER_RT_PRIO
++#define MAX_RT_PRIO		(MAX_USER_RT_PRIO + 1)
++#define DEFAULT_PRIO		(MAX_RT_PRIO + 20)
+ 
++#ifdef CONFIG_SCHED_BFS
++#define PRIO_RANGE		(40)
++#define MAX_PRIO		(MAX_RT_PRIO + PRIO_RANGE)
++#define ISO_PRIO		(MAX_RT_PRIO)
++#define NORMAL_PRIO		(MAX_RT_PRIO + 1)
++#define IDLE_PRIO		(MAX_RT_PRIO + 2)
++#define PRIO_LIMIT		((IDLE_PRIO) + 1)
++#else /* CONFIG_SCHED_BFS */
+ #define MAX_PRIO		(MAX_RT_PRIO + 40)
+-#define DEFAULT_PRIO		(MAX_RT_PRIO + 20)
++#define NORMAL_PRIO		DEFAULT_PRIO
++#endif /* CONFIG_SCHED_BFS */
+ 
+ static inline int rt_prio(int prio)
+ {
+@@ -1961,7 +2056,7 @@ extern unsigned long long
+ task_sched_runtime(struct task_struct *task);
+ 
+ /* sched_exec is called by processes performing an exec */
+-#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_BFS)
+ extern void sched_exec(void);
+ #else
+ #define sched_exec()   {}
+@@ -2606,7 +2701,7 @@ static inline unsigned int task_cpu(cons
+ 	return 0;
+ }
+ 
+-static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
++static inline void set_task_cpu(struct task_struct *p, int cpu)
+ {
+ }
+ 
+Index: linux-3.2-ck1/init/Kconfig
+===================================================================
+--- linux-3.2-ck1.orig/init/Kconfig	2012-01-16 10:07:27.897097267 +1100
++++ linux-3.2-ck1/init/Kconfig	2012-01-16 10:07:31.338097029 +1100
+@@ -29,6 +29,19 @@ config IRQ_WORK
+ 
+ menu "General setup"
+ 
++config SCHED_BFS
++	bool "BFS cpu scheduler"
++	---help---
++	  The Brain Fuck CPU Scheduler for excellent interactivity and
++	  responsiveness on the desktop and solid scalability on normal
++          hardware. Not recommended for 4096 CPUs.
++
++	  Currently incompatible with the Group CPU scheduler, and RCU TORTURE
++          TEST so these options are disabled.
++
++          Say Y here.
++	default y
++
+ config EXPERIMENTAL
+ 	bool "Prompt for development and/or incomplete code/drivers"
+ 	---help---
+@@ -626,6 +639,7 @@ config PROC_PID_CPUSET
+ 
+ config CGROUP_CPUACCT
+ 	bool "Simple CPU accounting cgroup subsystem"
++	depends on !SCHED_BFS
+ 	help
+ 	  Provides a simple Resource Controller for monitoring the
+ 	  total CPU consumed by the tasks in a cgroup.
+@@ -702,7 +716,7 @@ config CGROUP_PERF
+ 
+ menuconfig CGROUP_SCHED
+ 	bool "Group CPU scheduler"
+-	depends on EXPERIMENTAL
++	depends on EXPERIMENTAL && !SCHED_BFS
+ 	default n
+ 	help
+ 	  This feature lets CPU scheduler recognize task groups and control CPU
+@@ -828,6 +842,7 @@ endif # NAMESPACES
+ 
+ config SCHED_AUTOGROUP
+ 	bool "Automatic process group scheduling"
++	depends on !SCHED_BFS
+ 	select EVENTFD
+ 	select CGROUPS
+ 	select CGROUP_SCHED
+Index: linux-3.2-ck1/init/main.c
+===================================================================
+--- linux-3.2-ck1.orig/init/main.c	2012-01-16 10:07:27.897097267 +1100
++++ linux-3.2-ck1/init/main.c	2012-01-16 10:07:31.339097029 +1100
+@@ -763,6 +763,7 @@ static noinline int init_post(void)
+ 	system_state = SYSTEM_RUNNING;
+ 	numa_default_policy();
+ 
++	print_scheduler_version();
+ 
+ 	current->signal->flags |= SIGNAL_UNKILLABLE;
+ 
+Index: linux-3.2-ck1/kernel/delayacct.c
+===================================================================
+--- linux-3.2-ck1.orig/kernel/delayacct.c	2012-01-16 10:07:27.897097267 +1100
++++ linux-3.2-ck1/kernel/delayacct.c	2012-01-16 10:07:31.339097029 +1100
+@@ -130,7 +130,7 @@ int __delayacct_add_tsk(struct taskstats
+ 	 */
+ 	t1 = tsk->sched_info.pcount;
+ 	t2 = tsk->sched_info.run_delay;
+-	t3 = tsk->se.sum_exec_runtime;
++	t3 = tsk_seruntime(tsk);
+ 
+ 	d->cpu_count += t1;
+ 
+Index: linux-3.2-ck1/kernel/exit.c
+===================================================================
+--- linux-3.2-ck1.orig/kernel/exit.c	2012-01-16 10:07:27.897097267 +1100
++++ linux-3.2-ck1/kernel/exit.c	2012-01-16 10:07:31.339097029 +1100
+@@ -131,7 +131,7 @@ static void __exit_signal(struct task_st
+ 		sig->inblock += task_io_get_inblock(tsk);
+ 		sig->oublock += task_io_get_oublock(tsk);
+ 		task_io_accounting_add(&sig->ioac, &tsk->ioac);
+-		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
++		sig->sum_sched_runtime += tsk_seruntime(tsk);
+ 	}
+ 
+ 	sig->nr_threads--;
+Index: linux-3.2-ck1/kernel/posix-cpu-timers.c
+===================================================================
+--- linux-3.2-ck1.orig/kernel/posix-cpu-timers.c	2012-01-16 10:07:27.897097267 +1100
++++ linux-3.2-ck1/kernel/posix-cpu-timers.c	2012-01-16 10:07:31.340097028 +1100
+@@ -512,7 +512,7 @@ static void cleanup_timers(struct list_h
+ void posix_cpu_timers_exit(struct task_struct *tsk)
+ {
+ 	cleanup_timers(tsk->cpu_timers,
+-		       tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
++		       tsk->utime, tsk->stime, tsk_seruntime(tsk));
+ 
+ }
+ void posix_cpu_timers_exit_group(struct task_struct *tsk)
+@@ -522,7 +522,7 @@ void posix_cpu_timers_exit_group(struct
+ 	cleanup_timers(tsk->signal->cpu_timers,
+ 		       cputime_add(tsk->utime, sig->utime),
+ 		       cputime_add(tsk->stime, sig->stime),
+-		       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
++		       tsk_seruntime(tsk) + sig->sum_sched_runtime);
+ }
+ 
+ static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
+@@ -953,7 +953,7 @@ static void check_thread_timers(struct t
+ 		struct cpu_timer_list *t = list_first_entry(timers,
+ 						      struct cpu_timer_list,
+ 						      entry);
+-		if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
++		if (!--maxfire || tsk_seruntime(tsk) < t->expires.sched) {
+ 			tsk->cputime_expires.sched_exp = t->expires.sched;
+ 			break;
+ 		}
+@@ -970,7 +970,7 @@ static void check_thread_timers(struct t
+ 			ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
+ 
+ 		if (hard != RLIM_INFINITY &&
+-		    tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
++		    tsk_rttimeout(tsk) > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
+ 			/*
+ 			 * At the hard limit, we just die.
+ 			 * No need to calculate anything else now.
+@@ -978,7 +978,7 @@ static void check_thread_timers(struct t
+ 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
+ 			return;
+ 		}
+-		if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
++		if (tsk_rttimeout(tsk) > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
+ 			/*
+ 			 * At the soft limit, send a SIGXCPU every second.
+ 			 */
+@@ -1280,7 +1280,7 @@ static inline int fastpath_timer_check(s
+ 		struct task_cputime task_sample = {
+ 			.utime = tsk->utime,
+ 			.stime = tsk->stime,
+-			.sum_exec_runtime = tsk->se.sum_exec_runtime
++			.sum_exec_runtime = tsk_seruntime(tsk)
+ 		};
+ 
+ 		if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
+Index: linux-3.2-ck1/kernel/sched_bfs.c
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ linux-3.2-ck1/kernel/sched_bfs.c	2012-01-16 10:07:31.343097028 +1100
+@@ -0,0 +1,7197 @@
++/*
++ *  kernel/sched_bfs.c, was sched.c
++ *
++ *  Kernel scheduler and related syscalls
++ *
++ *  Copyright (C) 1991-2002  Linus Torvalds
++ *
++ *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
++ *		make semaphores SMP safe
++ *  1998-11-19	Implemented schedule_timeout() and related stuff
++ *		by Andrea Arcangeli
++ *  2002-01-04	New ultra-scalable O(1) scheduler by Ingo Molnar:
++ *		hybrid priority-list and round-robin design with
++ *		an array-switch method of distributing timeslices
++ *		and per-CPU runqueues.  Cleanups and useful suggestions
++ *		by Davide Libenzi, preemptible kernel bits by Robert Love.
++ *  2003-09-03	Interactivity tuning by Con Kolivas.
++ *  2004-04-02	Scheduler domains code by Nick Piggin
++ *  2007-04-15  Work begun on replacing all interactivity tuning with a
++ *              fair scheduling design by Con Kolivas.
++ *  2007-05-05  Load balancing (smp-nice) and other improvements
++ *              by Peter Williams
++ *  2007-05-06  Interactivity improvements to CFS by Mike Galbraith
++ *  2007-07-01  Group scheduling enhancements by Srivatsa Vaddagiri
++ *  2007-11-29  RT balancing improvements by Steven Rostedt, Gregory Haskins,
++ *              Thomas Gleixner, Mike Kravetz
++ *  now		Brainfuck deadline scheduling policy by Con Kolivas deletes
++ *              a whole lot of those previous things.
++ */
++
++#include <linux/mm.h>
++#include <linux/module.h>
++#include <linux/nmi.h>
++#include <linux/init.h>
++#include <asm/uaccess.h>
++#include <linux/highmem.h>
++#include <asm/mmu_context.h>
++#include <linux/interrupt.h>
++#include <linux/capability.h>
++#include <linux/completion.h>
++#include <linux/kernel_stat.h>
++#include <linux/debug_locks.h>
++#include <linux/perf_event.h>
++#include <linux/security.h>
++#include <linux/notifier.h>
++#include <linux/profile.h>
++#include <linux/freezer.h>
++#include <linux/vmalloc.h>
++#include <linux/blkdev.h>
++#include <linux/delay.h>
++#include <linux/smp.h>
++#include <linux/threads.h>
++#include <linux/timer.h>
++#include <linux/rcupdate.h>
++#include <linux/cpu.h>
++#include <linux/cpuset.h>
++#include <linux/cpumask.h>
++#include <linux/percpu.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <linux/syscalls.h>
++#include <linux/times.h>
++#include <linux/tsacct_kern.h>
++#include <linux/kprobes.h>
++#include <linux/delayacct.h>
++#include <linux/log2.h>
++#include <linux/bootmem.h>
++#include <linux/ftrace.h>
++#include <linux/slab.h>
++#include <linux/init_task.h>
++
++#include <asm/tlb.h>
++#include <asm/unistd.h>
++#include <asm/mutex.h>
++#ifdef CONFIG_PARAVIRT
++#include <asm/paravirt.h>
++#endif
++
++#include "sched_cpupri.h"
++#include "workqueue_sched.h"
++
++#define CREATE_TRACE_POINTS
++#include <trace/events/sched.h>
++
++#define rt_prio(prio)		unlikely((prio) < MAX_RT_PRIO)
++#define rt_task(p)		rt_prio((p)->prio)
++#define rt_queue(rq)		rt_prio((rq)->rq_prio)
++#define batch_task(p)		(unlikely((p)->policy == SCHED_BATCH))
++#define is_rt_policy(policy)	((policy) == SCHED_FIFO || \
++					(policy) == SCHED_RR)
++#define has_rt_policy(p)	unlikely(is_rt_policy((p)->policy))
++#define idleprio_task(p)	unlikely((p)->policy == SCHED_IDLEPRIO)
++#define iso_task(p)		unlikely((p)->policy == SCHED_ISO)
++#define iso_queue(rq)		unlikely((rq)->rq_policy == SCHED_ISO)
++#define ISO_PERIOD		((5 * HZ * grq.noc) + 1)
++
++/*
++ * Convert user-nice values [ -20 ... 0 ... 19 ]
++ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
++ * and back.
++ */
++#define NICE_TO_PRIO(nice)	(MAX_RT_PRIO + (nice) + 20)
++#define PRIO_TO_NICE(prio)	((prio) - MAX_RT_PRIO - 20)
++#define TASK_NICE(p)		PRIO_TO_NICE((p)->static_prio)
++
++/*
++ * 'User priority' is the nice value converted to something we
++ * can work with better when scaling various scheduler parameters,
++ * it's a [ 0 ... 39 ] range.
++ */
++#define USER_PRIO(p)		((p) - MAX_RT_PRIO)
++#define TASK_USER_PRIO(p)	USER_PRIO((p)->static_prio)
++#define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))
++#define SCHED_PRIO(p)		((p) + MAX_RT_PRIO)
++#define STOP_PRIO		(MAX_RT_PRIO - 1)
++
++/*
++ * Some helpers for converting to/from various scales. Use shifts to get
++ * approximate multiples of ten for less overhead.
++ */
++#define JIFFIES_TO_NS(TIME)	((TIME) * (1000000000 / HZ))
++#define JIFFY_NS		(1000000000 / HZ)
++#define HALF_JIFFY_NS		(1000000000 / HZ / 2)
++#define HALF_JIFFY_US		(1000000 / HZ / 2)
++#define MS_TO_NS(TIME)		((TIME) << 20)
++#define MS_TO_US(TIME)		((TIME) << 10)
++#define NS_TO_MS(TIME)		((TIME) >> 20)
++#define NS_TO_US(TIME)		((TIME) >> 10)
++
++#define RESCHED_US	(100) /* Reschedule if less than this many μs left */
++
++/*
++ * This is the time all tasks within the same priority round robin.
++ * Value is in ms and set to a minimum of 6ms. Scales with number of cpus.
++ * Tunable via /proc interface.
++ */
++int rr_interval __read_mostly = 6;
++
++/*
++ * sched_iso_cpu - sysctl which determines the cpu percentage SCHED_ISO tasks
++ * are allowed to run five seconds as real time tasks. This is the total over
++ * all online cpus.
++ */
++int sched_iso_cpu __read_mostly = 70;
++
++/*
++ * The relative length of deadline for each priority(nice) level.
++ */
++static int prio_ratios[PRIO_RANGE] __read_mostly;
++
++/*
++ * The quota handed out to tasks of all priority levels when refilling their
++ * time_slice.
++ */
++static inline int timeslice(void)
++{
++	return MS_TO_US(rr_interval);
++}
++
++/*
++ * The global runqueue data that all CPUs work off. Data is protected either
++ * by the global grq lock, or the discrete lock that precedes the data in this
++ * struct.
++ */
++struct global_rq {
++	raw_spinlock_t lock;
++	unsigned long nr_running;
++	unsigned long nr_uninterruptible;
++	unsigned long long nr_switches;
++	struct list_head queue[PRIO_LIMIT];
++	DECLARE_BITMAP(prio_bitmap, PRIO_LIMIT + 1);
++#ifdef CONFIG_SMP
++	unsigned long qnr; /* queued not running */
++	cpumask_t cpu_idle_map;
++	bool idle_cpus;
++#endif
++	int noc; /* num_online_cpus stored and updated when it changes */
++	u64 niffies; /* Nanosecond jiffies */
++	unsigned long last_jiffy; /* Last jiffy we updated niffies */
++
++	raw_spinlock_t iso_lock;
++	int iso_ticks;
++	int iso_refractory;
++};
++
++#ifdef CONFIG_SMP
++
++/*
++ * We add the notion of a root-domain which will be used to define per-domain
++ * variables. Each exclusive cpuset essentially defines an island domain by
++ * fully partitioning the member cpus from any other cpuset. Whenever a new
++ * exclusive cpuset is created, we also create and attach a new root-domain
++ * object.
++ *
++ */
++struct root_domain {
++	atomic_t refcount;
++	atomic_t rto_count;
++	struct rcu_head rcu;
++	cpumask_var_t span;
++	cpumask_var_t online;
++
++	/*
++	 * The "RT overload" flag: it gets set if a CPU has more than
++	 * one runnable RT task.
++	 */
++	cpumask_var_t rto_mask;
++	struct cpupri cpupri;
++};
++
++/*
++ * By default the system creates a single root-domain with all cpus as
++ * members (mimicking the global state we have today).
++ */
++static struct root_domain def_root_domain;
++
++#endif /* CONFIG_SMP */
++
++/* There can be only one */
++static struct global_rq grq;
++
++/*
++ * This is the main, per-CPU runqueue data structure.
++ * This data should only be modified by the local cpu.
++ */
++struct rq {
++#ifdef CONFIG_SMP
++#ifdef CONFIG_NO_HZ
++	u64 nohz_stamp;
++	unsigned char in_nohz_recently;
++#endif
++#endif
++
++	struct task_struct *curr, *idle, *stop;
++	struct mm_struct *prev_mm;
++
++	/* Stored data about rq->curr to work outside grq lock */
++	u64 rq_deadline;
++	unsigned int rq_policy;
++	int rq_time_slice;
++	u64 rq_last_ran;
++	int rq_prio;
++	bool rq_running; /* There is a task running */
++
++	/* Accurate timekeeping data */
++	u64 timekeep_clock;
++	unsigned long user_pc, nice_pc, irq_pc, softirq_pc, system_pc,
++		iowait_pc, idle_pc;
++	long account_pc;
++	atomic_t nr_iowait;
++
++#ifdef CONFIG_SMP
++	int cpu;		/* cpu of this runqueue */
++	bool online;
++	bool scaling; /* This CPU is managed by a scaling CPU freq governor */
++	struct task_struct *sticky_task;
++
++	struct root_domain *rd;
++	struct sched_domain *sd;
++	int *cpu_locality; /* CPU relative cache distance */
++#ifdef CONFIG_SCHED_SMT
++	bool (*siblings_idle)(int cpu);
++	/* See if all smt siblings are idle */
++	cpumask_t smt_siblings;
++#endif
++#ifdef CONFIG_SCHED_MC
++	bool (*cache_idle)(int cpu);
++	/* See if all cache siblings are idle */
++	cpumask_t cache_siblings;
++#endif
++	u64 last_niffy; /* Last time this RQ updated grq.niffies */
++#endif
++#ifdef CONFIG_IRQ_TIME_ACCOUNTING
++	u64 prev_irq_time;
++#endif
++#ifdef CONFIG_PARAVIRT
++	u64 prev_steal_time;
++#endif
++#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
++	u64 prev_steal_time_rq;
++#endif
++
++	u64 clock, old_clock, last_tick;
++	u64 clock_task;
++	bool dither;
++
++#ifdef CONFIG_SCHEDSTATS
++
++	/* latency stats */
++	struct sched_info rq_sched_info;
++	unsigned long long rq_cpu_time;
++	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
++
++	/* sys_sched_yield() stats */
++	unsigned int yld_count;
++
++	/* schedule() stats */
++	unsigned int sched_switch;
++	unsigned int sched_count;
++	unsigned int sched_goidle;
++
++	/* try_to_wake_up() stats */
++	unsigned int ttwu_count;
++	unsigned int ttwu_local;
++#endif
++};
++
++static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp;
++static DEFINE_MUTEX(sched_hotcpu_mutex);
++
++#ifdef CONFIG_SMP
++/*
++ * sched_domains_mutex serialises calls to init_sched_domains,
++ * detach_destroy_domains and partition_sched_domains.
++ */
++static DEFINE_MUTEX(sched_domains_mutex);
++
++/*
++ * By default the system creates a single root-domain with all cpus as
++ * members (mimicking the global state we have today).
++ */
++static struct root_domain def_root_domain;
++
++int __weak arch_sd_sibling_asym_packing(void)
++{
++       return 0*SD_ASYM_PACKING;
++}
++#endif
++
++#define rcu_dereference_check_sched_domain(p) \
++	rcu_dereference_check((p), \
++			      lockdep_is_held(&sched_domains_mutex))
++
++/*
++ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
++ * See detach_destroy_domains: synchronize_sched for details.
++ *
++ * The domain tree of any CPU may only be accessed from within
++ * preempt-disabled sections.
++ */
++#define for_each_domain(cpu, __sd) \
++	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
++
++static inline void update_rq_clock(struct rq *rq);
++
++/*
++ * Sanity check should sched_clock return bogus values. We make sure it does
++ * not appear to go backwards, and use jiffies to determine the maximum and
++ * minimum it could possibly have increased, and round down to the nearest
++ * jiffy when it falls outside this.
++ */
++static inline void niffy_diff(s64 *niff_diff, int jiff_diff)
++{
++	unsigned long min_diff, max_diff;
++
++	if (jiff_diff > 1)
++		min_diff = JIFFIES_TO_NS(jiff_diff - 1);
++	else
++		min_diff = 1;
++	/*  Round up to the nearest tick for maximum */
++	max_diff = JIFFIES_TO_NS(jiff_diff + 1);
++
++	if (unlikely(*niff_diff < min_diff || *niff_diff > max_diff))
++		*niff_diff = min_diff;
++}
++
++#ifdef CONFIG_SMP
++#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
++#define this_rq()		(&__get_cpu_var(runqueues))
++#define task_rq(p)		cpu_rq(task_cpu(p))
++#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
++static inline int cpu_of(struct rq *rq)
++{
++	return rq->cpu;
++}
++
++/*
++ * Niffies are a globally increasing nanosecond counter. Whenever a runqueue
++ * clock is updated with the grq.lock held, it is an opportunity to update the
++ * niffies value. Any CPU can update it by adding how much its clock has
++ * increased since it last updated niffies, minus any added niffies by other
++ * CPUs.
++ */
++static inline void update_clocks(struct rq *rq)
++{
++	s64 ndiff;
++	long jdiff;
++
++	update_rq_clock(rq);
++	ndiff = rq->clock - rq->old_clock;
++	/* old_clock is only updated when we are updating niffies */
++	rq->old_clock = rq->clock;
++	ndiff -= grq.niffies - rq->last_niffy;
++	jdiff = jiffies - grq.last_jiffy;
++	niffy_diff(&ndiff, jdiff);
++	grq.last_jiffy += jdiff;
++	grq.niffies += ndiff;
++	rq->last_niffy = grq.niffies;
++}
++#else /* CONFIG_SMP */
++static struct rq *uprq;
++#define cpu_rq(cpu)	(uprq)
++#define this_rq()	(uprq)
++#define task_rq(p)	(uprq)
++#define cpu_curr(cpu)	((uprq)->curr)
++static inline int cpu_of(struct rq *rq)
++{
++	return 0;
++}
++
++static inline void update_clocks(struct rq *rq)
++{
++	s64 ndiff;
++	long jdiff;
++
++	update_rq_clock(rq);
++	ndiff = rq->clock - rq->old_clock;
++	rq->old_clock = rq->clock;
++	jdiff = jiffies - grq.last_jiffy;
++	niffy_diff(&ndiff, jdiff);
++	grq.last_jiffy += jdiff;
++	grq.niffies += ndiff;
++}
++#endif
++#define raw_rq()	(&__raw_get_cpu_var(runqueues))
++
++#include "sched_stats.h"
++
++#ifndef prepare_arch_switch
++# define prepare_arch_switch(next)	do { } while (0)
++#endif
++#ifndef finish_arch_switch
++# define finish_arch_switch(prev)	do { } while (0)
++#endif
++
++/*
++ * All common locking functions performed on grq.lock. rq->clock is local to
++ * the CPU accessing it so it can be modified just with interrupts disabled
++ * when we're not updating niffies.
++ * Looking up task_rq must be done under grq.lock to be safe.
++ */
++static void update_rq_clock_task(struct rq *rq, s64 delta);
++
++static inline void update_rq_clock(struct rq *rq)
++{
++	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
++
++	rq->clock += delta;
++	update_rq_clock_task(rq, delta);
++}
++
++static inline bool task_running(struct task_struct *p)
++{
++	return p->on_cpu;
++}
++
++static inline void grq_lock(void)
++	__acquires(grq.lock)
++{
++	raw_spin_lock(&grq.lock);
++}
++
++static inline void grq_unlock(void)
++	__releases(grq.lock)
++{
++	raw_spin_unlock(&grq.lock);
++}
++
++static inline void grq_lock_irq(void)
++	__acquires(grq.lock)
++{
++	raw_spin_lock_irq(&grq.lock);
++}
++
++static inline void time_lock_grq(struct rq *rq)
++	__acquires(grq.lock)
++{
++	grq_lock();
++	update_clocks(rq);
++}
++
++static inline void grq_unlock_irq(void)
++	__releases(grq.lock)
++{
++	raw_spin_unlock_irq(&grq.lock);
++}
++
++static inline void grq_lock_irqsave(unsigned long *flags)
++	__acquires(grq.lock)
++{
++	raw_spin_lock_irqsave(&grq.lock, *flags);
++}
++
++static inline void grq_unlock_irqrestore(unsigned long *flags)
++	__releases(grq.lock)
++{
++	raw_spin_unlock_irqrestore(&grq.lock, *flags);
++}
++
++static inline struct rq
++*task_grq_lock(struct task_struct *p, unsigned long *flags)
++	__acquires(grq.lock)
++{
++	grq_lock_irqsave(flags);
++	return task_rq(p);
++}
++
++static inline struct rq
++*time_task_grq_lock(struct task_struct *p, unsigned long *flags)
++	__acquires(grq.lock)
++{
++	struct rq *rq = task_grq_lock(p, flags);
++	update_clocks(rq);
++	return rq;
++}
++
++static inline struct rq *task_grq_lock_irq(struct task_struct *p)
++	__acquires(grq.lock)
++{
++	grq_lock_irq();
++	return task_rq(p);
++}
++
++static inline void time_task_grq_lock_irq(struct task_struct *p)
++	__acquires(grq.lock)
++{
++	struct rq *rq = task_grq_lock_irq(p);
++	update_clocks(rq);
++}
++
++static inline void task_grq_unlock_irq(void)
++	__releases(grq.lock)
++{
++	grq_unlock_irq();
++}
++
++static inline void task_grq_unlock(unsigned long *flags)
++	__releases(grq.lock)
++{
++	grq_unlock_irqrestore(flags);
++}
++
++/**
++ * grunqueue_is_locked
++ *
++ * Returns true if the global runqueue is locked.
++ * This interface allows printk to be called with the runqueue lock
++ * held and know whether or not it is OK to wake up the klogd.
++ */
++bool grunqueue_is_locked(void)
++{
++	return raw_spin_is_locked(&grq.lock);
++}
++
++void grq_unlock_wait(void)
++	__releases(grq.lock)
++{
++	smp_mb(); /* spin-unlock-wait is not a full memory barrier */
++	raw_spin_unlock_wait(&grq.lock);
++}
++
++static inline void time_grq_lock(struct rq *rq, unsigned long *flags)
++	__acquires(grq.lock)
++{
++	local_irq_save(*flags);
++	time_lock_grq(rq);
++}
++
++static inline struct rq *__task_grq_lock(struct task_struct *p)
++	__acquires(grq.lock)
++{
++	grq_lock();
++	return task_rq(p);
++}
++
++static inline void __task_grq_unlock(void)
++	__releases(grq.lock)
++{
++	grq_unlock();
++}
++
++/*
++ * Look for any tasks *anywhere* that are running nice 0 or better. We do
++ * this lockless for overhead reasons since the occasional wrong result
++ * is harmless.
++ */
++int above_background_load(void)
++{
++	struct task_struct *cpu_curr;
++	unsigned long cpu;
++
++	for_each_online_cpu(cpu) {
++		cpu_curr = cpu_rq(cpu)->curr;
++		if (unlikely(!cpu_curr))
++			continue;
++		if (PRIO_TO_NICE(cpu_curr->static_prio) < 1)
++			return 1;
++	}
++	return 0;
++}
++
++#ifndef __ARCH_WANT_UNLOCKED_CTXSW
++static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
++{
++}
++
++static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
++{
++#ifdef CONFIG_DEBUG_SPINLOCK
++	/* this is a valid case when another task releases the spinlock */
++	grq.lock.owner = current;
++#endif
++	/*
++	 * If we are tracking spinlock dependencies then we have to
++	 * fix up the runqueue lock - which gets 'carried over' from
++	 * prev into current:
++	 */
++	spin_acquire(&grq.lock.dep_map, 0, 0, _THIS_IP_);
++
++	grq_unlock_irq();
++}
++
++#else /* __ARCH_WANT_UNLOCKED_CTXSW */
++
++static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
++{
++#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
++	grq_unlock_irq();
++#else
++	grq_unlock();
++#endif
++}
++
++static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
++{
++	smp_wmb();
++#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
++	local_irq_enable();
++#endif
++}
++#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
++
++static inline bool deadline_before(u64 deadline, u64 time)
++{
++	return (deadline < time);
++}
++
++static inline bool deadline_after(u64 deadline, u64 time)
++{
++	return (deadline > time);
++}
++
++/*
++ * A task that is queued but not running will be on the grq run list.
++ * A task that is not running or queued will not be on the grq run list.
++ * A task that is currently running will have ->on_cpu set but not on the
++ * grq run list.
++ */
++static inline bool task_queued(struct task_struct *p)
++{
++	return (!list_empty(&p->run_list));
++}
++
++/*
++ * Removing from the global runqueue. Enter with grq locked.
++ */
++static void dequeue_task(struct task_struct *p)
++{
++	list_del_init(&p->run_list);
++	if (list_empty(grq.queue + p->prio))
++		__clear_bit(p->prio, grq.prio_bitmap);
++}
++
++/*
++ * To determine if it's safe for a task of SCHED_IDLEPRIO to actually run as
++ * an idle task, we ensure none of the following conditions are met.
++ */
++static bool idleprio_suitable(struct task_struct *p)
++{
++	return (!freezing(p) && !signal_pending(p) &&
++		!(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING)));
++}
++
++/*
++ * To determine if a task of SCHED_ISO can run in pseudo-realtime, we check
++ * that the iso_refractory flag is not set.
++ */
++static bool isoprio_suitable(void)
++{
++	return !grq.iso_refractory;
++}
++
++/*
++ * Adding to the global runqueue. Enter with grq locked.
++ */
++static void enqueue_task(struct task_struct *p)
++{
++	if (!rt_task(p)) {
++		/* Check it hasn't gotten rt from PI */
++		if ((idleprio_task(p) && idleprio_suitable(p)) ||
++		   (iso_task(p) && isoprio_suitable()))
++			p->prio = p->normal_prio;
++		else
++			p->prio = NORMAL_PRIO;
++	}
++	__set_bit(p->prio, grq.prio_bitmap);
++	list_add_tail(&p->run_list, grq.queue + p->prio);
++	sched_info_queued(p);
++}
++
++/* Only idle task does this as a real time task*/
++static inline void enqueue_task_head(struct task_struct *p)
++{
++	__set_bit(p->prio, grq.prio_bitmap);
++	list_add(&p->run_list, grq.queue + p->prio);
++	sched_info_queued(p);
++}
++
++static inline void requeue_task(struct task_struct *p)
++{
++	sched_info_queued(p);
++}
++
++/*
++ * Returns the relative length of deadline all compared to the shortest
++ * deadline which is that of nice -20.
++ */
++static inline int task_prio_ratio(struct task_struct *p)
++{
++	return prio_ratios[TASK_USER_PRIO(p)];
++}
++
++/*
++ * task_timeslice - all tasks of all priorities get the exact same timeslice
++ * length. CPU distribution is handled by giving different deadlines to
++ * tasks of different priorities. Use 128 as the base value for fast shifts.
++ */
++static inline int task_timeslice(struct task_struct *p)
++{
++	return (rr_interval * task_prio_ratio(p) / 128);
++}
++
++#ifdef CONFIG_SMP
++/*
++ * qnr is the "queued but not running" count which is the total number of
++ * tasks on the global runqueue list waiting for cpu time but not actually
++ * currently running on a cpu.
++ */
++static inline void inc_qnr(void)
++{
++	grq.qnr++;
++}
++
++static inline void dec_qnr(void)
++{
++	grq.qnr--;
++}
++
++static inline int queued_notrunning(void)
++{
++	return grq.qnr;
++}
++
++/*
++ * The cpu_idle_map stores a bitmap of all the CPUs currently idle to
++ * allow easy lookup of whether any suitable idle CPUs are available.
++ * It's cheaper to maintain a binary yes/no if there are any idle CPUs on the
++ * idle_cpus variable than to do a full bitmask check when we are busy.
++ */
++static inline void set_cpuidle_map(int cpu)
++{
++	if (likely(cpu_online(cpu))) {
++		cpu_set(cpu, grq.cpu_idle_map);
++		grq.idle_cpus = true;
++	}
++}
++
++static inline void clear_cpuidle_map(int cpu)
++{
++	cpu_clear(cpu, grq.cpu_idle_map);
++	if (cpus_empty(grq.cpu_idle_map))
++		grq.idle_cpus = false;
++}
++
++static bool suitable_idle_cpus(struct task_struct *p)
++{
++	if (!grq.idle_cpus)
++		return false;
++	return (cpus_intersects(p->cpus_allowed, grq.cpu_idle_map));
++}
++
++#define CPUIDLE_DIFF_THREAD	(1)
++#define CPUIDLE_DIFF_CORE	(2)
++#define CPUIDLE_CACHE_BUSY	(4)
++#define CPUIDLE_DIFF_CPU	(8)
++#define CPUIDLE_THREAD_BUSY	(16)
++#define CPUIDLE_DIFF_NODE	(32)
++
++static void resched_task(struct task_struct *p);
++
++/*
++ * The best idle CPU is chosen according to the CPUIDLE ranking above where the
++ * lowest value would give the most suitable CPU to schedule p onto next. The
++ * order works out to be the following:
++ *
++ * Same core, idle or busy cache, idle threads
++ * Other core, same cache, idle or busy cache, idle threads.
++ * Same node, other CPU, idle cache, idle threads.
++ * Same node, other CPU, busy cache, idle threads.
++ * Same core, busy threads.
++ * Other core, same cache, busy threads.
++ * Same node, other CPU, busy threads.
++ * Other node, other CPU, idle cache, idle threads.
++ * Other node, other CPU, busy cache, idle threads.
++ * Other node, other CPU, busy threads.
++ */
++static void
++resched_best_mask(int best_cpu, struct rq *rq, cpumask_t *tmpmask)
++{
++	unsigned int best_ranking = CPUIDLE_DIFF_NODE | CPUIDLE_THREAD_BUSY |
++		CPUIDLE_DIFF_CPU | CPUIDLE_CACHE_BUSY | CPUIDLE_DIFF_CORE |
++		CPUIDLE_DIFF_THREAD;
++	int cpu_tmp;
++
++	if (cpu_isset(best_cpu, *tmpmask))
++		goto out;
++
++	for_each_cpu_mask(cpu_tmp, *tmpmask) {
++		unsigned int ranking;
++		struct rq *tmp_rq;
++
++		ranking = 0;
++		tmp_rq = cpu_rq(cpu_tmp);
++
++#ifdef CONFIG_NUMA
++		if (rq->cpu_locality[cpu_tmp] > 3)
++			ranking |= CPUIDLE_DIFF_NODE;
++		else
++#endif
++		if (rq->cpu_locality[cpu_tmp] > 2)
++			ranking |= CPUIDLE_DIFF_CPU;
++#ifdef CONFIG_SCHED_MC
++		if (rq->cpu_locality[cpu_tmp] == 2)
++			ranking |= CPUIDLE_DIFF_CORE;
++		if (!(tmp_rq->cache_idle(cpu_tmp)))
++			ranking |= CPUIDLE_CACHE_BUSY;
++#endif
++#ifdef CONFIG_SCHED_SMT
++		if (rq->cpu_locality[cpu_tmp] == 1)
++			ranking |= CPUIDLE_DIFF_THREAD;
++		if (!(tmp_rq->siblings_idle(cpu_tmp)))
++			ranking |= CPUIDLE_THREAD_BUSY;
++#endif
++		if (ranking < best_ranking) {
++			best_cpu = cpu_tmp;
++			best_ranking = ranking;
++		}
++	}
++out:
++	resched_task(cpu_rq(best_cpu)->curr);
++}
++
++static void resched_best_idle(struct task_struct *p)
++{
++	cpumask_t tmpmask;
++
++	cpus_and(tmpmask, p->cpus_allowed, grq.cpu_idle_map);
++	resched_best_mask(task_cpu(p), task_rq(p), &tmpmask);
++}
++
++static inline void resched_suitable_idle(struct task_struct *p)
++{
++	if (suitable_idle_cpus(p))
++		resched_best_idle(p);
++}
++/*
++ * Flags to tell us whether this CPU is running a CPU frequency governor that
++ * has slowed its speed or not. No locking required as the very rare wrongly
++ * read value would be harmless.
++ */
++void cpu_scaling(int cpu)
++{
++	cpu_rq(cpu)->scaling = true;
++}
++
++void cpu_nonscaling(int cpu)
++{
++	cpu_rq(cpu)->scaling = false;
++}
++
++static inline bool scaling_rq(struct rq *rq)
++{
++	return rq->scaling;
++}
++#else /* CONFIG_SMP */
++static inline void inc_qnr(void)
++{
++}
++
++static inline void dec_qnr(void)
++{
++}
++
++static inline int queued_notrunning(void)
++{
++	return grq.nr_running;
++}
++
++static inline void set_cpuidle_map(int cpu)
++{
++}
++
++static inline void clear_cpuidle_map(int cpu)
++{
++}
++
++static inline bool suitable_idle_cpus(struct task_struct *p)
++{
++	return uprq->curr == uprq->idle;
++}
++
++static inline void resched_suitable_idle(struct task_struct *p)
++{
++}
++
++void cpu_scaling(int __unused)
++{
++}
++
++void cpu_nonscaling(int __unused)
++{
++}
++
++/*
++ * Although CPUs can scale in UP, there is nowhere else for tasks to go so this
++ * always returns 0.
++ */
++static inline bool scaling_rq(struct rq *rq)
++{
++	return false;
++}
++#endif /* CONFIG_SMP */
++EXPORT_SYMBOL_GPL(cpu_scaling);
++EXPORT_SYMBOL_GPL(cpu_nonscaling);
++
++/*
++ * activate_idle_task - move idle task to the _front_ of runqueue.
++ */
++static inline void activate_idle_task(struct task_struct *p)
++{
++	enqueue_task_head(p);
++	grq.nr_running++;
++	inc_qnr();
++}
++
++static inline int normal_prio(struct task_struct *p)
++{
++	if (has_rt_policy(p))
++		return MAX_RT_PRIO - 1 - p->rt_priority;
++	if (idleprio_task(p))
++		return IDLE_PRIO;
++	if (iso_task(p))
++		return ISO_PRIO;
++	return NORMAL_PRIO;
++}
++
++/*
++ * Calculate the current priority, i.e. the priority
++ * taken into account by the scheduler. This value might
++ * be boosted by RT tasks as it will be RT if the task got
++ * RT-boosted. If not then it returns p->normal_prio.
++ */
++static int effective_prio(struct task_struct *p)
++{
++	p->normal_prio = normal_prio(p);
++	/*
++	 * If we are RT tasks or we were boosted to RT priority,
++	 * keep the priority unchanged. Otherwise, update priority
++	 * to the normal priority:
++	 */
++	if (!rt_prio(p->prio))
++		return p->normal_prio;
++	return p->prio;
++}
++
++/*
++ * activate_task - move a task to the runqueue. Enter with grq locked.
++ */
++static void activate_task(struct task_struct *p, struct rq *rq)
++{
++	update_clocks(rq);
++
++	/*
++	 * Sleep time is in units of nanosecs, so shift by 20 to get a
++	 * milliseconds-range estimation of the amount of time that the task
++	 * spent sleeping:
++	 */
++	if (unlikely(prof_on == SLEEP_PROFILING)) {
++		if (p->state == TASK_UNINTERRUPTIBLE)
++			profile_hits(SLEEP_PROFILING, (void *)get_wchan(p),
++				     (rq->clock - p->last_ran) >> 20);
++	}
++
++	p->prio = effective_prio(p);
++	if (task_contributes_to_load(p))
++		grq.nr_uninterruptible--;
++	enqueue_task(p);
++	grq.nr_running++;
++	inc_qnr();
++}
++
++static inline void clear_sticky(struct task_struct *p);
++
++/*
++ * deactivate_task - If it's running, it's not on the grq and we can just
++ * decrement the nr_running. Enter with grq locked.
++ */
++static inline void deactivate_task(struct task_struct *p)
++{
++	if (task_contributes_to_load(p))
++		grq.nr_uninterruptible++;
++	grq.nr_running--;
++	clear_sticky(p);
++}
++
++#ifdef CONFIG_SMP
++void set_task_cpu(struct task_struct *p, unsigned int cpu)
++{
++#ifdef CONFIG_LOCKDEP
++	/*
++	 * The caller should hold grq lock.
++	 */
++	WARN_ON_ONCE(debug_locks && !lockdep_is_held(&grq.lock));
++#endif
++	trace_sched_migrate_task(p, cpu);
++	if (task_cpu(p) != cpu)
++		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
++
++	/*
++	 * After ->cpu is set up to a new value, task_grq_lock(p, ...) can be
++	 * successfully executed on another CPU. We must ensure that updates of
++	 * per-task data have been completed by this moment.
++	 */
++	smp_wmb();
++	task_thread_info(p)->cpu = cpu;
++}
++
++static inline void clear_sticky(struct task_struct *p)
++{
++	p->sticky = false;
++}
++
++static inline bool task_sticky(struct task_struct *p)
++{
++	return p->sticky;
++}
++
++/* Reschedule the best idle CPU that is not this one. */
++static void
++resched_closest_idle(struct rq *rq, int cpu, struct task_struct *p)
++{
++	cpumask_t tmpmask;
++
++	cpus_and(tmpmask, p->cpus_allowed, grq.cpu_idle_map);
++	cpu_clear(cpu, tmpmask);
++	if (cpus_empty(tmpmask))
++		return;
++	resched_best_mask(cpu, rq, &tmpmask);
++}
++
++/*
++ * We set the sticky flag on a task that is descheduled involuntarily meaning
++ * it is awaiting further CPU time. If the last sticky task is still sticky
++ * but unlucky enough to not be the next task scheduled, we unstick it and try
++ * to find it an idle CPU. Realtime tasks do not stick to minimise their
++ * latency at all times.
++ */
++static inline void
++swap_sticky(struct rq *rq, int cpu, struct task_struct *p)
++{
++	if (rq->sticky_task) {
++		if (rq->sticky_task == p) {
++			p->sticky = true;
++			return;
++		}
++		if (task_sticky(rq->sticky_task)) {
++			clear_sticky(rq->sticky_task);
++			resched_closest_idle(rq, cpu, rq->sticky_task);
++		}
++	}
++	if (!rt_task(p)) {
++		p->sticky = true;
++		rq->sticky_task = p;
++	} else {
++		resched_closest_idle(rq, cpu, p);
++		rq->sticky_task = NULL;
++	}
++}
++
++static inline void unstick_task(struct rq *rq, struct task_struct *p)
++{
++	rq->sticky_task = NULL;
++	clear_sticky(p);
++}
++#else
++static inline void clear_sticky(struct task_struct *p)
++{
++}
++
++static inline bool task_sticky(struct task_struct *p)
++{
++	return false;
++}
++
++static inline void
++swap_sticky(struct rq *rq, int cpu, struct task_struct *p)
++{
++}
++
++static inline void unstick_task(struct rq *rq, struct task_struct *p)
++{
++}
++#endif
++
++/*
++ * Move a task off the global queue and take it to a cpu for it will
++ * become the running task.
++ */
++static inline void take_task(int cpu, struct task_struct *p)
++{
++	set_task_cpu(p, cpu);
++	dequeue_task(p);
++	clear_sticky(p);
++	dec_qnr();
++}
++
++/*
++ * Returns a descheduling task to the grq runqueue unless it is being
++ * deactivated.
++ */
++static inline void return_task(struct task_struct *p, int deactivate)
++{
++	if (deactivate)
++		deactivate_task(p);
++	else {
++		inc_qnr();
++		enqueue_task(p);
++	}
++}
++
++/*
++ * resched_task - mark a task 'to be rescheduled now'.
++ *
++ * On UP this means the setting of the need_resched flag, on SMP it
++ * might also involve a cross-CPU call to trigger the scheduler on
++ * the target CPU.
++ */
++#ifdef CONFIG_SMP
++
++#ifndef tsk_is_polling
++#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
++#endif
++
++static void resched_task(struct task_struct *p)
++{
++	int cpu;
++
++	assert_raw_spin_locked(&grq.lock);
++
++	if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
++		return;
++
++	set_tsk_thread_flag(p, TIF_NEED_RESCHED);
++
++	cpu = task_cpu(p);
++	if (cpu == smp_processor_id())
++		return;
++
++	/* NEED_RESCHED must be visible before we test polling */
++	smp_mb();
++	if (!tsk_is_polling(p))
++		smp_send_reschedule(cpu);
++}
++
++#else
++static inline void resched_task(struct task_struct *p)
++{
++	assert_raw_spin_locked(&grq.lock);
++	set_tsk_need_resched(p);
++}
++#endif
++
++/**
++ * task_curr - is this task currently executing on a CPU?
++ * @p: the task in question.
++ */
++inline int task_curr(const struct task_struct *p)
++{
++	return cpu_curr(task_cpu(p)) == p;
++}
++
++#ifdef CONFIG_SMP
++struct migration_req {
++	struct task_struct *task;
++	int dest_cpu;
++};
++
++/*
++ * wait_task_inactive - wait for a thread to unschedule.
++ *
++ * If @match_state is nonzero, it's the @p->state value just checked and
++ * not expected to change.  If it changes, i.e. @p might have woken up,
++ * then return zero.  When we succeed in waiting for @p to be off its CPU,
++ * we return a positive number (its total switch count).  If a second call
++ * a short while later returns the same number, the caller can be sure that
++ * @p has remained unscheduled the whole time.
++ *
++ * The caller must ensure that the task *will* unschedule sometime soon,
++ * else this function might spin for a *long* time. This function can't
++ * be called with interrupts off, or it may introduce deadlock with
++ * smp_call_function() if an IPI is sent by the same process we are
++ * waiting to become inactive.
++ */
++unsigned long wait_task_inactive(struct task_struct *p, long match_state)
++{
++	unsigned long flags;
++	bool running, on_rq;
++	unsigned long ncsw;
++	struct rq *rq;
++
++	for (;;) {
++		/*
++		 * We do the initial early heuristics without holding
++		 * any task-queue locks at all. We'll only try to get
++		 * the runqueue lock when things look like they will
++		 * work out! In the unlikely event rq is dereferenced
++		 * since we're lockless, grab it again.
++		 */
++#ifdef CONFIG_SMP
++retry_rq:
++		rq = task_rq(p);
++		if (unlikely(!rq))
++			goto retry_rq;
++#else /* CONFIG_SMP */
++		rq = task_rq(p);
++#endif
++		/*
++		 * If the task is actively running on another CPU
++		 * still, just relax and busy-wait without holding
++		 * any locks.
++		 *
++		 * NOTE! Since we don't hold any locks, it's not
++		 * even sure that "rq" stays as the right runqueue!
++		 * But we don't care, since this will return false
++		 * if the runqueue has changed and p is actually now
++		 * running somewhere else!
++		 */
++		while (task_running(p) && p == rq->curr) {
++			if (match_state && unlikely(p->state != match_state))
++				return 0;
++			cpu_relax();
++		}
++
++		/*
++		 * Ok, time to look more closely! We need the grq
++		 * lock now, to be *sure*. If we're wrong, we'll
++		 * just go back and repeat.
++		 */
++		rq = task_grq_lock(p, &flags);
++		trace_sched_wait_task(p);
++		running = task_running(p);
++		on_rq = task_queued(p);
++		ncsw = 0;
++		if (!match_state || p->state == match_state)
++			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
++		task_grq_unlock(&flags);
++
++		/*
++		 * If it changed from the expected state, bail out now.
++		 */
++		if (unlikely(!ncsw))
++			break;
++
++		/*
++		 * Was it really running after all now that we
++		 * checked with the proper locks actually held?
++		 *
++		 * Oops. Go back and try again..
++		 */
++		if (unlikely(running)) {
++			cpu_relax();
++			continue;
++		}
++
++		/*
++		 * It's not enough that it's not actively running,
++		 * it must be off the runqueue _entirely_, and not
++		 * preempted!
++		 *
++		 * So if it was still runnable (but just not actively
++		 * running right now), it's preempted, and we should
++		 * yield - it could be a while.
++		 */
++		if (unlikely(on_rq)) {
++			ktime_t to = ktime_set(0, NSEC_PER_SEC / HZ);
++
++			set_current_state(TASK_UNINTERRUPTIBLE);
++			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
++			continue;
++		}
++
++		/*
++		 * Ahh, all good. It wasn't running, and it wasn't
++		 * runnable, which means that it will never become
++		 * running in the future either. We're all done!
++		 */
++		break;
++	}
++
++	return ncsw;
++}
++
++/***
++ * kick_process - kick a running thread to enter/exit the kernel
++ * @p: the to-be-kicked thread
++ *
++ * Cause a process which is running on another CPU to enter
++ * kernel-mode, without any delay. (to get signals handled.)
++ *
++ * NOTE: this function doesn't have to take the runqueue lock,
++ * because all it wants to ensure is that the remote task enters
++ * the kernel. If the IPI races and the task has been migrated
++ * to another CPU then no harm is done and the purpose has been
++ * achieved as well.
++ */
++void kick_process(struct task_struct *p)
++{
++	int cpu;
++
++	preempt_disable();
++	cpu = task_cpu(p);
++	if ((cpu != smp_processor_id()) && task_curr(p))
++		smp_send_reschedule(cpu);
++	preempt_enable();
++}
++EXPORT_SYMBOL_GPL(kick_process);
++#endif
++
++#define rq_idle(rq)	((rq)->rq_prio == PRIO_LIMIT)
++
++/*
++ * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the
++ * basis of earlier deadlines. SCHED_IDLEPRIO don't preempt anything else or
++ * between themselves, they cooperatively multitask. An idle rq scores as
++ * prio PRIO_LIMIT so it is always preempted.
++ */
++static inline bool
++can_preempt(struct task_struct *p, int prio, u64 deadline)
++{
++	/* Better static priority RT task or better policy preemption */
++	if (p->prio < prio)
++		return true;
++	if (p->prio > prio)
++		return false;
++	/* SCHED_NORMAL, BATCH and ISO will preempt based on deadline */
++	if (!deadline_before(p->deadline, deadline))
++		return false;
++	return true;
++}
++
++#ifdef CONFIG_SMP
++#ifdef CONFIG_HOTPLUG_CPU
++/*
++ * Check to see if there is a task that is affined only to offline CPUs but
++ * still wants runtime. This happens to kernel threads during suspend/halt and
++ * disabling of CPUs.
++ */
++static inline bool online_cpus(struct task_struct *p)
++{
++	return (likely(cpus_intersects(cpu_online_map, p->cpus_allowed)));
++}
++#else /* CONFIG_HOTPLUG_CPU */
++/* All available CPUs are always online without hotplug. */
++static inline bool online_cpus(struct task_struct *p)
++{
++	return true;
++}
++#endif
++
++/*
++ * Check to see if p can run on cpu, and if not, whether there are any online
++ * CPUs it can run on instead.
++ */
++static inline bool needs_other_cpu(struct task_struct *p, int cpu)
++{
++	if (unlikely(!cpu_isset(cpu, p->cpus_allowed)))
++		return true;
++	return false;
++}
++
++/*
++ * When all else is equal, still prefer this_rq.
++ */
++static void try_preempt(struct task_struct *p, struct rq *this_rq)
++{
++	struct rq *highest_prio_rq;
++	int cpu, highest_prio;
++	u64 latest_deadline;
++	cpumask_t tmp;
++
++	/*
++	 * We clear the sticky flag here because for a task to have called
++	 * try_preempt with the sticky flag enabled means some complicated
++	 * re-scheduling has occurred and we should ignore the sticky flag.
++	 */
++	clear_sticky(p);
++
++	if (suitable_idle_cpus(p)) {
++		resched_best_idle(p);
++		return;
++	}
++
++	/* IDLEPRIO tasks never preempt anything */
++	if (p->policy == SCHED_IDLEPRIO)
++		return;
++
++	if (likely(online_cpus(p)))
++		cpus_and(tmp, cpu_online_map, p->cpus_allowed);
++	else
++		return;
++
++	highest_prio = p->prio;
++	highest_prio_rq = this_rq;
++	latest_deadline = this_rq->rq_deadline;
++
++	for_each_cpu_mask(cpu, tmp) {
++		struct rq *rq;
++		int rq_prio;
++
++		rq = cpu_rq(cpu);
++		rq_prio = rq->rq_prio;
++		if (rq_prio < highest_prio)
++			continue;
++
++		if (rq_prio > highest_prio ||
++		    deadline_after(rq->rq_deadline, latest_deadline)) {
++			latest_deadline = rq->rq_deadline;
++			highest_prio = rq_prio;
++			highest_prio_rq = rq;
++		}
++	}
++
++	if (!can_preempt(p, highest_prio, highest_prio_rq->rq_deadline))
++		return;
++
++	resched_task(highest_prio_rq->curr);
++}
++#else /* CONFIG_SMP */
++static inline bool needs_other_cpu(struct task_struct *p, int cpu)
++{
++	return false;
++}
++
++static void try_preempt(struct task_struct *p, struct rq *this_rq)
++{
++	if (p->policy == SCHED_IDLEPRIO)
++		return;
++	if (can_preempt(p, uprq->rq_prio, uprq->rq_deadline))
++		resched_task(uprq->curr);
++}
++#endif /* CONFIG_SMP */
++
++static void
++ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
++{
++#ifdef CONFIG_SCHEDSTATS
++	struct rq *rq = this_rq();
++
++#ifdef CONFIG_SMP
++	int this_cpu = smp_processor_id();
++
++	if (cpu == this_cpu)
++		schedstat_inc(rq, ttwu_local);
++	else {
++		struct sched_domain *sd;
++
++		rcu_read_lock();
++		for_each_domain(this_cpu, sd) {
++			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
++				schedstat_inc(sd, ttwu_wake_remote);
++				break;
++			}
++		}
++		rcu_read_unlock();
++	}
++
++#endif /* CONFIG_SMP */
++
++	schedstat_inc(rq, ttwu_count);
++#endif /* CONFIG_SCHEDSTATS */
++}
++
++static inline void ttwu_activate(struct task_struct *p, struct rq *rq,
++				 bool is_sync)
++{
++	activate_task(p, rq);
++
++	/*
++	 * Sync wakeups (i.e. those types of wakeups where the waker
++	 * has indicated that it will leave the CPU in short order)
++	 * don't trigger a preemption if there are no idle cpus,
++	 * instead waiting for current to deschedule.
++	 */
++	if (!is_sync || suitable_idle_cpus(p))
++		try_preempt(p, rq);
++}
++
++static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
++					bool success)
++{
++	trace_sched_wakeup(p, success);
++	p->state = TASK_RUNNING;
++
++	/*
++	 * if a worker is waking up, notify workqueue. Note that on BFS, we
++	 * don't really know what cpu it will be, so we fake it for
++	 * wq_worker_waking_up :/
++	 */
++	if ((p->flags & PF_WQ_WORKER) && success)
++		wq_worker_waking_up(p, cpu_of(rq));
++}
++
++#ifdef CONFIG_SMP
++void scheduler_ipi(void)
++{
++}
++#endif /* CONFIG_SMP */
++
++/***
++ * try_to_wake_up - wake up a thread
++ * @p: the thread to be awakened
++ * @state: the mask of task states that can be woken
++ * @wake_flags: wake modifier flags (WF_*)
++ *
++ * Put it on the run-queue if it's not already there. The "current"
++ * thread is always on the run-queue (except when the actual
++ * re-schedule is in progress), and as such you're allowed to do
++ * the simpler "current->state = TASK_RUNNING" to mark yourself
++ * runnable without the overhead of this.
++ *
++ * Returns %true if @p was woken up, %false if it was already running
++ * or @state didn't match @p's state.
++ */
++static bool try_to_wake_up(struct task_struct *p, unsigned int state,
++			  int wake_flags)
++{
++	bool success = false;
++	unsigned long flags;
++	struct rq *rq;
++	int cpu;
++
++	get_cpu();
++
++	/* This barrier is undocumented, probably for p->state? くそ */
++	smp_wmb();
++
++	/*
++	 * No need to do time_lock_grq as we only need to update the rq clock
++	 * if we activate the task
++	 */
++	rq = task_grq_lock(p, &flags);
++	cpu = task_cpu(p);
++
++	/* state is a volatile long, どうして、分からない */
++	if (!((unsigned int)p->state & state))
++		goto out_unlock;
++
++	if (task_queued(p) || task_running(p))
++		goto out_running;
++
++	ttwu_activate(p, rq, wake_flags & WF_SYNC);
++	success = true;
++
++out_running:
++	ttwu_post_activation(p, rq, success);
++out_unlock:
++	task_grq_unlock(&flags);
++
++	ttwu_stat(p, cpu, wake_flags);
++
++	put_cpu();
++
++	return success;
++}
++
++/**
++ * try_to_wake_up_local - try to wake up a local task with grq lock held
++ * @p: the thread to be awakened
++ *
++ * Put @p on the run-queue if it's not already there. The caller must
++ * ensure that grq is locked and, @p is not the current task.
++ * grq stays locked over invocation.
++ */
++static void try_to_wake_up_local(struct task_struct *p)
++{
++	struct rq *rq = task_rq(p);
++	bool success = false;
++
++	lockdep_assert_held(&grq.lock);
++
++	if (!(p->state & TASK_NORMAL))
++		return;
++
++	if (!task_queued(p)) {
++		if (likely(!task_running(p))) {
++			schedstat_inc(rq, ttwu_count);
++			schedstat_inc(rq, ttwu_local);
++		}
++		ttwu_activate(p, rq, false);
++		ttwu_stat(p, smp_processor_id(), 0);
++		success = true;
++	}
++	ttwu_post_activation(p, rq, success);
++}
++
++/**
++ * wake_up_process - Wake up a specific process
++ * @p: The process to be woken up.
++ *
++ * Attempt to wake up the nominated process and move it to the set of runnable
++ * processes.  Returns 1 if the process was woken up, 0 if it was already
++ * running.
++ *
++ * It may be assumed that this function implies a write memory barrier before
++ * changing the task state if and only if any tasks are woken up.
++ */
++int wake_up_process(struct task_struct *p)
++{
++	return try_to_wake_up(p, TASK_ALL, 0);
++}
++EXPORT_SYMBOL(wake_up_process);
++
++int wake_up_state(struct task_struct *p, unsigned int state)
++{
++	return try_to_wake_up(p, state, 0);
++}
++
++static void time_slice_expired(struct task_struct *p);
++
++/*
++ * Perform scheduler related setup for a newly forked process p.
++ * p is forked by current.
++ */
++void sched_fork(struct task_struct *p)
++{
++	struct task_struct *curr;
++	int cpu = get_cpu();
++	struct rq *rq;
++
++#ifdef CONFIG_PREEMPT_NOTIFIERS
++	INIT_HLIST_HEAD(&p->preempt_notifiers);
++#endif
++	/*
++	 * We mark the process as running here. This guarantees that
++	 * nobody will actually run it, and a signal or other external
++	 * event cannot wake it up and insert it on the runqueue either.
++	 */
++	p->state = TASK_RUNNING;
++	set_task_cpu(p, cpu);
++
++	/* Should be reset in fork.c but done here for ease of bfs patching */
++	p->sched_time = p->stime_pc = p->utime_pc = 0;
++
++	/*
++	 * Revert to default priority/policy on fork if requested.
++	 */
++	if (unlikely(p->sched_reset_on_fork)) {
++		if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
++			p->policy = SCHED_NORMAL;
++			p->normal_prio = normal_prio(p);
++		}
++
++		if (PRIO_TO_NICE(p->static_prio) < 0) {
++			p->static_prio = NICE_TO_PRIO(0);
++			p->normal_prio = p->static_prio;
++		}
++
++		/*
++		 * We don't need the reset flag anymore after the fork. It has
++		 * fulfilled its duty:
++		 */
++		p->sched_reset_on_fork = 0;
++	}
++
++	curr = current;
++	/*
++	 * Make sure we do not leak PI boosting priority to the child.
++	 */
++	p->prio = curr->normal_prio;
++
++	INIT_LIST_HEAD(&p->run_list);
++#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
++	if (unlikely(sched_info_on()))
++		memset(&p->sched_info, 0, sizeof(p->sched_info));
++#endif
++
++	p->on_cpu = false;
++	clear_sticky(p);
++
++#ifdef CONFIG_PREEMPT_COUNT
++	/* Want to start with kernel preemption disabled. */
++	task_thread_info(p)->preempt_count = 1;
++#endif
++	if (unlikely(p->policy == SCHED_FIFO))
++		goto out;
++	/*
++	 * Share the timeslice between parent and child, thus the
++	 * total amount of pending timeslices in the system doesn't change,
++	 * resulting in more scheduling fairness. If it's negative, it won't
++	 * matter since that's the same as being 0. current's time_slice is
++	 * actually in rq_time_slice when it's running, as is its last_ran
++	 * value. rq->rq_deadline is only modified within schedule() so it
++	 * is always equal to current->deadline.
++	 */
++	rq = task_grq_lock_irq(curr);
++	if (likely(rq->rq_time_slice >= RESCHED_US * 2)) {
++		rq->rq_time_slice /= 2;
++		p->time_slice = rq->rq_time_slice;
++	} else {
++		/*
++		 * Forking task has run out of timeslice. Reschedule it and
++		 * start its child with a new time slice and deadline. The
++		 * child will end up running first because its deadline will
++		 * be slightly earlier.
++		 */
++		rq->rq_time_slice = 0;
++		set_tsk_need_resched(curr);
++		time_slice_expired(p);
++	}
++	p->last_ran = rq->rq_last_ran;
++	task_grq_unlock_irq();
++out:
++	put_cpu();
++}
++
++/*
++ * wake_up_new_task - wake up a newly created task for the first time.
++ *
++ * This function will do some initial scheduler statistics housekeeping
++ * that must be done for every newly created context, then puts the task
++ * on the runqueue and wakes it.
++ */
++void wake_up_new_task(struct task_struct *p)
++{
++	struct task_struct *parent;
++	unsigned long flags;
++	struct rq *rq;
++
++	rq = task_grq_lock(p, &flags);
++	p->state = TASK_RUNNING;
++	parent = p->parent;
++	/* Unnecessary but small chance that the parent changed CPU */
++	set_task_cpu(p, task_cpu(parent));
++	activate_task(p, rq);
++	trace_sched_wakeup_new(p, 1);
++	if (rq->curr == parent && !suitable_idle_cpus(p)) {
++		/*
++		 * The VM isn't cloned, so we're in a good position to
++		 * do child-runs-first in anticipation of an exec. This
++		 * usually avoids a lot of COW overhead.
++		 */
++		resched_task(parent);
++	} else
++		try_preempt(p, rq);
++	task_grq_unlock(&flags);
++}
++
++#ifdef CONFIG_PREEMPT_NOTIFIERS
++
++/**
++ * preempt_notifier_register - tell me when current is being preempted & rescheduled
++ * @notifier: notifier struct to register
++ */
++void preempt_notifier_register(struct preempt_notifier *notifier)
++{
++	hlist_add_head(&notifier->link, &current->preempt_notifiers);
++}
++EXPORT_SYMBOL_GPL(preempt_notifier_register);
++
++/**
++ * preempt_notifier_unregister - no longer interested in preemption notifications
++ * @notifier: notifier struct to unregister
++ *
++ * This is safe to call from within a preemption notifier.
++ */
++void preempt_notifier_unregister(struct preempt_notifier *notifier)
++{
++	hlist_del(&notifier->link);
++}
++EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
++
++static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
++{
++	struct preempt_notifier *notifier;
++	struct hlist_node *node;
++
++	hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
++		notifier->ops->sched_in(notifier, raw_smp_processor_id());
++}
++
++static void
++fire_sched_out_preempt_notifiers(struct task_struct *curr,
++				 struct task_struct *next)
++{
++	struct preempt_notifier *notifier;
++	struct hlist_node *node;
++
++	hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
++		notifier->ops->sched_out(notifier, next);
++}
++
++#else /* !CONFIG_PREEMPT_NOTIFIERS */
++
++static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
++{
++}
++
++static void
++fire_sched_out_preempt_notifiers(struct task_struct *curr,
++				 struct task_struct *next)
++{
++}
++
++#endif /* CONFIG_PREEMPT_NOTIFIERS */
++
++/**
++ * prepare_task_switch - prepare to switch tasks
++ * @rq: the runqueue preparing to switch
++ * @next: the task we are going to switch to.
++ *
++ * This is called with the rq lock held and interrupts off. It must
++ * be paired with a subsequent finish_task_switch after the context
++ * switch.
++ *
++ * prepare_task_switch sets up locking and calls architecture specific
++ * hooks.
++ */
++static inline void
++prepare_task_switch(struct rq *rq, struct task_struct *prev,
++		    struct task_struct *next)
++{
++	sched_info_switch(prev, next);
++	perf_event_task_sched_out(prev, next);
++	fire_sched_out_preempt_notifiers(prev, next);
++	prepare_lock_switch(rq, next);
++	prepare_arch_switch(next);
++	trace_sched_switch(prev, next);
++}
++
++/**
++ * finish_task_switch - clean up after a task-switch
++ * @rq: runqueue associated with task-switch
++ * @prev: the thread we just switched away from.
++ *
++ * finish_task_switch must be called after the context switch, paired
++ * with a prepare_task_switch call before the context switch.
++ * finish_task_switch will reconcile locking set up by prepare_task_switch,
++ * and do any other architecture-specific cleanup actions.
++ *
++ * Note that we may have delayed dropping an mm in context_switch(). If
++ * so, we finish that here outside of the runqueue lock.  (Doing it
++ * with the lock held can cause deadlocks; see schedule() for
++ * details.)
++ */
++static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
++	__releases(grq.lock)
++{
++	struct mm_struct *mm = rq->prev_mm;
++	long prev_state;
++
++	rq->prev_mm = NULL;
++
++	/*
++	 * A task struct has one reference for the use as "current".
++	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
++	 * schedule one last time. The schedule call will never return, and
++	 * the scheduled task must drop that reference.
++	 * The test for TASK_DEAD must occur while the runqueue locks are
++	 * still held, otherwise prev could be scheduled on another cpu, die
++	 * there before we look at prev->state, and then the reference would
++	 * be dropped twice.
++	 *		Manfred Spraul <manfred@colorfullife.com>
++	 */
++	prev_state = prev->state;
++	finish_arch_switch(prev);
++#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
++	local_irq_disable();
++#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
++	perf_event_task_sched_in(prev, current);
++#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
++	local_irq_enable();
++#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
++	finish_lock_switch(rq, prev);
++
++	fire_sched_in_preempt_notifiers(current);
++	if (mm)
++		mmdrop(mm);
++	if (unlikely(prev_state == TASK_DEAD)) {
++		/*
++		 * Remove function-return probe instances associated with this
++		 * task and put them back on the free list.
++		 */
++		kprobe_flush_task(prev);
++		put_task_struct(prev);
++	}
++}
++
++/**
++ * schedule_tail - first thing a freshly forked thread must call.
++ * @prev: the thread we just switched away from.
++ */
++asmlinkage void schedule_tail(struct task_struct *prev)
++	__releases(grq.lock)
++{
++	struct rq *rq = this_rq();
++
++	finish_task_switch(rq, prev);
++#ifdef __ARCH_WANT_UNLOCKED_CTXSW
++	/* In this case, finish_task_switch does not reenable preemption */
++	preempt_enable();
++#endif
++	if (current->set_child_tid)
++		put_user(current->pid, current->set_child_tid);
++}
++
++/*
++ * context_switch - switch to the new MM and the new
++ * thread's register state.
++ */
++static inline void
++context_switch(struct rq *rq, struct task_struct *prev,
++	       struct task_struct *next)
++{
++	struct mm_struct *mm, *oldmm;
++
++	prepare_task_switch(rq, prev, next);
++
++	mm = next->mm;
++	oldmm = prev->active_mm;
++	/*
++	 * For paravirt, this is coupled with an exit in switch_to to
++	 * combine the page table reload and the switch backend into
++	 * one hypercall.
++	 */
++	arch_start_context_switch(prev);
++
++	if (!mm) {
++		next->active_mm = oldmm;
++		atomic_inc(&oldmm->mm_count);
++		enter_lazy_tlb(oldmm, next);
++	} else
++		switch_mm(oldmm, mm, next);
++
++	if (!prev->mm) {
++		prev->active_mm = NULL;
++		rq->prev_mm = oldmm;
++	}
++	/*
++	 * Since the runqueue lock will be released by the next
++	 * task (which is an invalid locking op but in the case
++	 * of the scheduler it's an obvious special-case), so we
++	 * do an early lockdep release here:
++	 */
++#ifndef __ARCH_WANT_UNLOCKED_CTXSW
++	spin_release(&grq.lock.dep_map, 1, _THIS_IP_);
++#endif
++
++	/* Here we just switch the register state and the stack. */
++	switch_to(prev, next, prev);
++
++	barrier();
++	/*
++	 * this_rq must be evaluated again because prev may have moved
++	 * CPUs since it called schedule(), thus the 'rq' on its stack
++	 * frame will be invalid.
++	 */
++	finish_task_switch(this_rq(), prev);
++}
++
++/*
++ * nr_running, nr_uninterruptible and nr_context_switches:
++ *
++ * externally visible scheduler statistics: current number of runnable
++ * threads, current number of uninterruptible-sleeping threads, total
++ * number of context switches performed since bootup. All are measured
++ * without grabbing the grq lock but the occasional inaccurate result
++ * doesn't matter so long as it's positive.
++ */
++unsigned long nr_running(void)
++{
++	long nr = grq.nr_running;
++
++	if (unlikely(nr < 0))
++		nr = 0;
++	return (unsigned long)nr;
++}
++
++unsigned long nr_uninterruptible(void)
++{
++	long nu = grq.nr_uninterruptible;
++
++	if (unlikely(nu < 0))
++		nu = 0;
++	return nu;
++}
++
++unsigned long long nr_context_switches(void)
++{
++	long long ns = grq.nr_switches;
++
++	/* This is of course impossible */
++	if (unlikely(ns < 0))
++		ns = 1;
++	return (unsigned long long)ns;
++}
++
++unsigned long nr_iowait(void)
++{
++	unsigned long i, sum = 0;
++
++	for_each_possible_cpu(i)
++		sum += atomic_read(&cpu_rq(i)->nr_iowait);
++
++	return sum;
++}
++
++unsigned long nr_iowait_cpu(int cpu)
++{
++	struct rq *this = cpu_rq(cpu);
++	return atomic_read(&this->nr_iowait);
++}
++
++unsigned long nr_active(void)
++{
++	return nr_running() + nr_uninterruptible();
++}
++
++/* Beyond a task running on this CPU, load is equal everywhere on BFS */
++unsigned long this_cpu_load(void)
++{
++	return this_rq()->rq_running +
++		((queued_notrunning() + nr_uninterruptible()) / grq.noc);
++}
++
++/* Variables and functions for calc_load */
++static unsigned long calc_load_update;
++unsigned long avenrun[3];
++EXPORT_SYMBOL(avenrun);
++
++/**
++ * get_avenrun - get the load average array
++ * @loads:	pointer to dest load array
++ * @offset:	offset to add
++ * @shift:	shift count to shift the result left
++ *
++ * These values are estimates at best, so no need for locking.
++ */
++void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
++{
++	loads[0] = (avenrun[0] + offset) << shift;
++	loads[1] = (avenrun[1] + offset) << shift;
++	loads[2] = (avenrun[2] + offset) << shift;
++}
++
++static unsigned long
++calc_load(unsigned long load, unsigned long exp, unsigned long active)
++{
++	load *= exp;
++	load += active * (FIXED_1 - exp);
++	return load >> FSHIFT;
++}
++
++/*
++ * calc_load - update the avenrun load estimates every LOAD_FREQ seconds.
++ */
++void calc_global_load(unsigned long ticks)
++{
++	long active;
++
++	if (time_before(jiffies, calc_load_update))
++		return;
++	active = nr_active() * FIXED_1;
++
++	avenrun[0] = calc_load(avenrun[0], EXP_1, active);
++	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
++	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
++
++	calc_load_update = jiffies + LOAD_FREQ;
++}
++
++DEFINE_PER_CPU(struct kernel_stat, kstat);
++
++EXPORT_PER_CPU_SYMBOL(kstat);
++
++#ifdef CONFIG_IRQ_TIME_ACCOUNTING
++
++/*
++ * There are no locks covering percpu hardirq/softirq time.
++ * They are only modified in account_system_vtime, on corresponding CPU
++ * with interrupts disabled. So, writes are safe.
++ * They are read and saved off onto struct rq in update_rq_clock().
++ * This may result in other CPU reading this CPU's irq time and can
++ * race with irq/account_system_vtime on this CPU. We would either get old
++ * or new value with a side effect of accounting a slice of irq time to wrong
++ * task when irq is in progress while we read rq->clock. That is a worthy
++ * compromise in place of having locks on each irq in account_system_time.
++ */
++static DEFINE_PER_CPU(u64, cpu_hardirq_time);
++static DEFINE_PER_CPU(u64, cpu_softirq_time);
++
++static DEFINE_PER_CPU(u64, irq_start_time);
++static int sched_clock_irqtime;
++
++void enable_sched_clock_irqtime(void)
++{
++	sched_clock_irqtime = 1;
++}
++
++void disable_sched_clock_irqtime(void)
++{
++	sched_clock_irqtime = 0;
++}
++
++#ifndef CONFIG_64BIT
++static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
++
++static inline void irq_time_write_begin(void)
++{
++	__this_cpu_inc(irq_time_seq.sequence);
++	smp_wmb();
++}
++
++static inline void irq_time_write_end(void)
++{
++	smp_wmb();
++	__this_cpu_inc(irq_time_seq.sequence);
++}
++
++static inline u64 irq_time_read(int cpu)
++{
++	u64 irq_time;
++	unsigned seq;
++
++	do {
++		seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
++		irq_time = per_cpu(cpu_softirq_time, cpu) +
++			   per_cpu(cpu_hardirq_time, cpu);
++	} while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
++
++	return irq_time;
++}
++#else /* CONFIG_64BIT */
++static inline void irq_time_write_begin(void)
++{
++}
++
++static inline void irq_time_write_end(void)
++{
++}
++
++static inline u64 irq_time_read(int cpu)
++{
++	return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
++}
++#endif /* CONFIG_64BIT */
++
++/*
++ * Called before incrementing preempt_count on {soft,}irq_enter
++ * and before decrementing preempt_count on {soft,}irq_exit.
++ */
++void account_system_vtime(struct task_struct *curr)
++{
++	unsigned long flags;
++	s64 delta;
++	int cpu;
++
++	if (!sched_clock_irqtime)
++		return;
++
++	local_irq_save(flags);
++
++	cpu = smp_processor_id();
++	delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
++	__this_cpu_add(irq_start_time, delta);
++
++	irq_time_write_begin();
++	/*
++	 * We do not account for softirq time from ksoftirqd here.
++	 * We want to continue accounting softirq time to ksoftirqd thread
++	 * in that case, so as not to confuse scheduler with a special task
++	 * that do not consume any time, but still wants to run.
++	 */
++	if (hardirq_count())
++		__this_cpu_add(cpu_hardirq_time, delta);
++	else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
++		__this_cpu_add(cpu_softirq_time, delta);
++
++	irq_time_write_end();
++	local_irq_restore(flags);
++}
++EXPORT_SYMBOL_GPL(account_system_vtime);
++
++#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
++
++#ifdef CONFIG_PARAVIRT
++static inline u64 steal_ticks(u64 steal)
++{
++	if (unlikely(steal > NSEC_PER_SEC))
++		return div_u64(steal, TICK_NSEC);
++
++	return __iter_div_u64_rem(steal, TICK_NSEC, &steal);
++}
++#endif
++
++static void update_rq_clock_task(struct rq *rq, s64 delta)
++{
++#ifdef CONFIG_IRQ_TIME_ACCOUNTING
++	s64 irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
++
++	/*
++	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
++	 * this case when a previous update_rq_clock() happened inside a
++	 * {soft,}irq region.
++	 *
++	 * When this happens, we stop ->clock_task and only update the
++	 * prev_irq_time stamp to account for the part that fit, so that a next
++	 * update will consume the rest. This ensures ->clock_task is
++	 * monotonic.
++	 *
++	 * It does however cause some slight miss-attribution of {soft,}irq
++	 * time, a more accurate solution would be to update the irq_time using
++	 * the current rq->clock timestamp, except that would require using
++	 * atomic ops.
++	 */
++	if (irq_delta > delta)
++		irq_delta = delta;
++
++	rq->prev_irq_time += irq_delta;
++	delta -= irq_delta;
++#endif
++#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
++	if (static_branch((&paravirt_steal_rq_enabled))) {
++		u64 st, steal = paravirt_steal_clock(cpu_of(rq));
++
++		steal -= rq->prev_steal_time_rq;
++
++		if (unlikely(steal > delta))
++			steal = delta;
++
++		st = steal_ticks(steal);
++		steal = st * TICK_NSEC;
++
++		rq->prev_steal_time_rq += steal;
++
++		delta -= steal;
++	}
++#endif
++
++	rq->clock_task += delta;
++}
++
++#ifdef CONFIG_IRQ_TIME_ACCOUNTING
++static void irqtime_account_hi_si(void)
++{
++	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
++	u64 latest_ns;
++	s64 ns_diff;
++
++	latest_ns = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time));
++	ns_diff = latest_ns - cpustat->irq;
++	if (ns_diff > 0)
++		cpustat->irq = cputime64_add(cpustat->irq, ns_diff);
++
++	latest_ns = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time));
++	ns_diff = latest_ns - cpustat->softirq;
++	if (ns_diff > 0)
++		cpustat->softirq = cputime64_add(cpustat->softirq, ns_diff);
++}
++#else /* CONFIG_IRQ_TIME_ACCOUNTING */
++
++#define sched_clock_irqtime	(0)
++
++static inline void irqtime_account_hi_si(void)
++{
++}
++#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
++
++static __always_inline bool steal_account_process_tick(void)
++{
++#ifdef CONFIG_PARAVIRT
++	if (static_branch(&paravirt_steal_enabled)) {
++		u64 steal, st = 0;
++
++		steal = paravirt_steal_clock(smp_processor_id());
++		steal -= this_rq()->prev_steal_time;
++
++		st = steal_ticks(steal);
++		this_rq()->prev_steal_time += st * TICK_NSEC;
++
++		account_steal_time(st);
++		return st;
++	}
++#endif
++	return false;
++}
++
++/*
++ * On each tick, see what percentage of that tick was attributed to each
++ * component and add the percentage to the _pc values. Once a _pc value has
++ * accumulated one tick's worth, account for that. This means the total
++ * percentage of load components will always be 128 (pseudo 100) per tick.
++ */
++static void pc_idle_time(struct rq *rq, unsigned long pc)
++{
++	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
++	cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
++
++	if (atomic_read(&rq->nr_iowait) > 0) {
++		rq->iowait_pc += pc;
++		if (rq->iowait_pc >= 128) {
++			rq->iowait_pc %= 128;
++			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
++		}
++	} else {
++		rq->idle_pc += pc;
++		if (rq->idle_pc >= 128) {
++			rq->idle_pc %= 128;
++			cpustat->idle = cputime64_add(cpustat->idle, tmp);
++		}
++	}
++}
++
++static void
++pc_system_time(struct rq *rq, struct task_struct *p, int hardirq_offset,
++	       unsigned long pc, unsigned long ns)
++{
++	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
++	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
++	cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
++
++	p->stime_pc += pc;
++	if (p->stime_pc >= 128) {
++		p->stime_pc %= 128;
++		p->stime = cputime_add(p->stime, cputime_one_jiffy);
++		p->stimescaled = cputime_add(p->stimescaled, one_jiffy_scaled);
++		account_group_system_time(p, cputime_one_jiffy);
++		acct_update_integrals(p);
++	}
++	p->sched_time += ns;
++
++	if (hardirq_count() - hardirq_offset) {
++		rq->irq_pc += pc;
++		if (rq->irq_pc >= 128) {
++			rq->irq_pc %= 128;
++			cpustat->irq = cputime64_add(cpustat->irq, tmp);
++		}
++	} else if (in_serving_softirq()) {
++		rq->softirq_pc += pc;
++		if (rq->softirq_pc >= 128) {
++			rq->softirq_pc %= 128;
++			cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
++		}
++	} else {
++		rq->system_pc += pc;
++		if (rq->system_pc >= 128) {
++			rq->system_pc %= 128;
++			cpustat->system = cputime64_add(cpustat->system, tmp);
++		}
++	}
++}
++
++static void pc_user_time(struct rq *rq, struct task_struct *p,
++			 unsigned long pc, unsigned long ns)
++{
++	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
++	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
++	cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
++
++	p->utime_pc += pc;
++	if (p->utime_pc >= 128) {
++		p->utime_pc %= 128;
++		p->utime = cputime_add(p->utime, cputime_one_jiffy);
++		p->utimescaled = cputime_add(p->utimescaled, one_jiffy_scaled);
++		account_group_user_time(p, cputime_one_jiffy);
++		acct_update_integrals(p);
++	}
++	p->sched_time += ns;
++
++	if (this_cpu_ksoftirqd() == p) {
++		/*
++		 * ksoftirqd time do not get accounted in cpu_softirq_time.
++		 * So, we have to handle it separately here.
++		 */
++		rq->softirq_pc += pc;
++		if (rq->softirq_pc >= 128) {
++			rq->softirq_pc %= 128;
++			cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
++		}
++	}
++
++	if (TASK_NICE(p) > 0 || idleprio_task(p)) {
++		rq->nice_pc += pc;
++		if (rq->nice_pc >= 128) {
++			rq->nice_pc %= 128;
++			cpustat->nice = cputime64_add(cpustat->nice, tmp);
++		}
++	} else {
++		rq->user_pc += pc;
++		if (rq->user_pc >= 128) {
++			rq->user_pc %= 128;
++			cpustat->user = cputime64_add(cpustat->user, tmp);
++		}
++	}
++}
++
++/*
++ * Convert nanoseconds to pseudo percentage of one tick. Use 128 for fast
++ * shifts instead of 100
++ */
++#define NS_TO_PC(NS)	(NS * 128 / JIFFY_NS)
++
++/*
++ * This is called on clock ticks and on context switches.
++ * Bank in p->sched_time the ns elapsed since the last tick or switch.
++ * CPU scheduler quota accounting is also performed here in microseconds.
++ */
++static void
++update_cpu_clock(struct rq *rq, struct task_struct *p, int tick)
++{
++	long account_ns = rq->clock - rq->timekeep_clock;
++	struct task_struct *idle = rq->idle;
++	unsigned long account_pc;
++
++	if (unlikely(account_ns < 0))
++		account_ns = 0;
++
++	account_pc = NS_TO_PC(account_ns);
++
++	if (tick) {
++		int user_tick;
++
++		/* Accurate tick timekeeping */
++		rq->account_pc += account_pc - 128;
++		if (rq->account_pc < 0) {
++			/*
++			 * Small errors in micro accounting may not make the
++			 * accounting add up to 128 each tick so we keep track
++			 * of the percentage and round it up when less than 128
++			 */
++			account_pc += -rq->account_pc;
++			rq->account_pc = 0;
++		}
++		if (steal_account_process_tick())
++			goto ts_account;
++
++		user_tick = user_mode(get_irq_regs());
++
++		if (user_tick)
++			pc_user_time(rq, p, account_pc, account_ns);
++		else if (p != idle || (irq_count() != HARDIRQ_OFFSET))
++			pc_system_time(rq, p, HARDIRQ_OFFSET,
++				       account_pc, account_ns);
++		else
++			pc_idle_time(rq, account_pc);
++
++		if (sched_clock_irqtime)
++			irqtime_account_hi_si();
++	} else {
++		/* Accurate subtick timekeeping */
++		rq->account_pc += account_pc;
++		if (p == idle)
++			pc_idle_time(rq, account_pc);
++		else
++			pc_user_time(rq, p, account_pc, account_ns);
++	}
++
++ts_account:
++	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
++	if (rq->rq_policy != SCHED_FIFO && p != idle) {
++		s64 time_diff = rq->clock - rq->rq_last_ran;
++
++		niffy_diff(&time_diff, 1);
++		rq->rq_time_slice -= NS_TO_US(time_diff);
++	}
++	rq->rq_last_ran = rq->timekeep_clock = rq->clock;
++}
++
++/*
++ * Return any ns on the sched_clock that have not yet been accounted in
++ * @p in case that task is currently running.
++ *
++ * Called with task_grq_lock() held.
++ */
++static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
++{
++	u64 ns = 0;
++
++	if (p == rq->curr) {
++		update_clocks(rq);
++		ns = rq->clock_task - rq->rq_last_ran;
++		if (unlikely((s64)ns < 0))
++			ns = 0;
++	}
++
++	return ns;
++}
++
++unsigned long long task_delta_exec(struct task_struct *p)
++{
++	unsigned long flags;
++	struct rq *rq;
++	u64 ns;
++
++	rq = task_grq_lock(p, &flags);
++	ns = do_task_delta_exec(p, rq);
++	task_grq_unlock(&flags);
++
++	return ns;
++}
++
++/*
++ * Return accounted runtime for the task.
++ * In case the task is currently running, return the runtime plus current's
++ * pending runtime that have not been accounted yet.
++ */
++unsigned long long task_sched_runtime(struct task_struct *p)
++{
++	unsigned long flags;
++	struct rq *rq;
++	u64 ns;
++
++	rq = task_grq_lock(p, &flags);
++	ns = p->sched_time + do_task_delta_exec(p, rq);
++	task_grq_unlock(&flags);
++
++	return ns;
++}
++
++/* Compatibility crap for removal */
++void account_user_time(struct task_struct *p, cputime_t cputime,
++		       cputime_t cputime_scaled)
++{
++}
++
++void account_idle_time(cputime_t cputime)
++{
++}
++
++/*
++ * Account guest cpu time to a process.
++ * @p: the process that the cpu time gets accounted to
++ * @cputime: the cpu time spent in virtual machine since the last update
++ * @cputime_scaled: cputime scaled by cpu frequency
++ */
++static void account_guest_time(struct task_struct *p, cputime_t cputime,
++			       cputime_t cputime_scaled)
++{
++	cputime64_t tmp;
++	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
++
++	tmp = cputime_to_cputime64(cputime);
++
++	/* Add guest time to process. */
++	p->utime = cputime_add(p->utime, cputime);
++	p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
++	account_group_user_time(p, cputime);
++	p->gtime = cputime_add(p->gtime, cputime);
++
++	/* Add guest time to cpustat. */
++	if (TASK_NICE(p) > 0) {
++		cpustat->nice = cputime64_add(cpustat->nice, tmp);
++		cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp);
++	} else {
++		cpustat->user = cputime64_add(cpustat->user, tmp);
++		cpustat->guest = cputime64_add(cpustat->guest, tmp);
++	}
++}
++
++/*
++ * Account system cpu time to a process and desired cpustat field
++ * @p: the process that the cpu time gets accounted to
++ * @cputime: the cpu time spent in kernel space since the last update
++ * @cputime_scaled: cputime scaled by cpu frequency
++ * @target_cputime64: pointer to cpustat field that has to be updated
++ */
++static inline
++void __account_system_time(struct task_struct *p, cputime_t cputime,
++			cputime_t cputime_scaled, cputime64_t *target_cputime64)
++{
++	cputime64_t tmp = cputime_to_cputime64(cputime);
++
++	/* Add system time to process. */
++	p->stime = cputime_add(p->stime, cputime);
++	p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
++	account_group_system_time(p, cputime);
++
++	/* Add system time to cpustat. */
++	*target_cputime64 = cputime64_add(*target_cputime64, tmp);
++
++	/* Account for system time used */
++	acct_update_integrals(p);
++}
++
++/*
++ * Account system cpu time to a process.
++ * @p: the process that the cpu time gets accounted to
++ * @hardirq_offset: the offset to subtract from hardirq_count()
++ * @cputime: the cpu time spent in kernel space since the last update
++ * @cputime_scaled: cputime scaled by cpu frequency
++ * This is for guest only now.
++ */
++void account_system_time(struct task_struct *p, int hardirq_offset,
++			 cputime_t cputime, cputime_t cputime_scaled)
++{
++
++	if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0))
++		account_guest_time(p, cputime, cputime_scaled);
++}
++
++/*
++ * Account for involuntary wait time.
++ * @steal: the cpu time spent in involuntary wait
++ */
++void account_steal_time(cputime_t cputime)
++{
++	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
++	cputime64_t cputime64 = cputime_to_cputime64(cputime);
++
++	cpustat->steal = cputime64_add(cpustat->steal, cputime64);
++}
++
++/*
++ * Account for idle time.
++ * @cputime: the cpu time spent in idle wait
++ */
++static void account_idle_times(cputime_t cputime)
++{
++	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
++	cputime64_t cputime64 = cputime_to_cputime64(cputime);
++	struct rq *rq = this_rq();
++
++	if (atomic_read(&rq->nr_iowait) > 0)
++		cpustat->iowait = cputime64_add(cpustat->iowait, cputime64);
++	else
++		cpustat->idle = cputime64_add(cpustat->idle, cputime64);
++}
++
++#ifndef CONFIG_VIRT_CPU_ACCOUNTING
++
++void account_process_tick(struct task_struct *p, int user_tick)
++{
++}
++
++/*
++ * Account multiple ticks of steal time.
++ * @p: the process from which the cpu time has been stolen
++ * @ticks: number of stolen ticks
++ */
++void account_steal_ticks(unsigned long ticks)
++{
++	account_steal_time(jiffies_to_cputime(ticks));
++}
++
++/*
++ * Account multiple ticks of idle time.
++ * @ticks: number of stolen ticks
++ */
++void account_idle_ticks(unsigned long ticks)
++{
++	account_idle_times(jiffies_to_cputime(ticks));
++}
++#endif
++
++static inline void grq_iso_lock(void)
++	__acquires(grq.iso_lock)
++{
++	raw_spin_lock(&grq.iso_lock);
++}
++
++static inline void grq_iso_unlock(void)
++	__releases(grq.iso_lock)
++{
++	raw_spin_unlock(&grq.iso_lock);
++}
++
++/*
++ * Functions to test for when SCHED_ISO tasks have used their allocated
++ * quota as real time scheduling and convert them back to SCHED_NORMAL.
++ * Where possible, the data is tested lockless, to avoid grabbing iso_lock
++ * because the occasional inaccurate result won't matter. However the
++ * tick data is only ever modified under lock. iso_refractory is only simply
++ * set to 0 or 1 so it's not worth grabbing the lock yet again for that.
++ */
++static void set_iso_refractory(void)
++{
++	grq.iso_refractory = 1;
++}
++
++static void clear_iso_refractory(void)
++{
++	grq.iso_refractory = 0;
++}
++
++/*
++ * Test if SCHED_ISO tasks have run longer than their alloted period as RT
++ * tasks and set the refractory flag if necessary. There is 10% hysteresis
++ * for unsetting the flag. 115/128 is ~90/100 as a fast shift instead of a
++ * slow division.
++ */
++static unsigned int test_ret_isorefractory(struct rq *rq)
++{
++	if (likely(!grq.iso_refractory)) {
++		if (grq.iso_ticks > ISO_PERIOD * sched_iso_cpu)
++			set_iso_refractory();
++	} else {
++		if (grq.iso_ticks < ISO_PERIOD * (sched_iso_cpu * 115 / 128))
++			clear_iso_refractory();
++	}
++	return grq.iso_refractory;
++}
++
++static void iso_tick(void)
++{
++	grq_iso_lock();
++	grq.iso_ticks += 100;
++	grq_iso_unlock();
++}
++
++/* No SCHED_ISO task was running so decrease rq->iso_ticks */
++static inline void no_iso_tick(void)
++{
++	if (grq.iso_ticks) {
++		grq_iso_lock();
++		grq.iso_ticks -= grq.iso_ticks / ISO_PERIOD + 1;
++		if (unlikely(grq.iso_refractory && grq.iso_ticks <
++		    ISO_PERIOD * (sched_iso_cpu * 115 / 128)))
++			clear_iso_refractory();
++		grq_iso_unlock();
++	}
++}
++
++static bool rq_running_iso(struct rq *rq)
++{
++	return rq->rq_prio == ISO_PRIO;
++}
++
++/* This manages tasks that have run out of timeslice during a scheduler_tick */
++static void task_running_tick(struct rq *rq)
++{
++	struct task_struct *p;
++
++	/*
++	 * If a SCHED_ISO task is running we increment the iso_ticks. In
++	 * order to prevent SCHED_ISO tasks from causing starvation in the
++	 * presence of true RT tasks we account those as iso_ticks as well.
++	 */
++	if ((rt_queue(rq) || (iso_queue(rq) && !grq.iso_refractory))) {
++		if (grq.iso_ticks <= (ISO_PERIOD * 128) - 128)
++			iso_tick();
++	} else
++		no_iso_tick();
++
++	if (iso_queue(rq)) {
++		if (unlikely(test_ret_isorefractory(rq))) {
++			if (rq_running_iso(rq)) {
++				/*
++				 * SCHED_ISO task is running as RT and limit
++				 * has been hit. Force it to reschedule as
++				 * SCHED_NORMAL by zeroing its time_slice
++				 */
++				rq->rq_time_slice = 0;
++			}
++		}
++	}
++
++	/* SCHED_FIFO tasks never run out of timeslice. */
++	if (rq->rq_policy == SCHED_FIFO)
++		return;
++	/*
++	 * Tasks that were scheduled in the first half of a tick are not
++	 * allowed to run into the 2nd half of the next tick if they will
++	 * run out of time slice in the interim. Otherwise, if they have
++	 * less than RESCHED_US μs of time slice left they will be rescheduled.
++	 */
++	if (rq->dither) {
++		if (rq->rq_time_slice > HALF_JIFFY_US)
++			return;
++		else
++			rq->rq_time_slice = 0;
++	} else if (rq->rq_time_slice >= RESCHED_US)
++			return;
++
++	/* p->time_slice < RESCHED_US. We only modify task_struct under grq lock */
++	p = rq->curr;
++	requeue_task(p);
++	grq_lock();
++	set_tsk_need_resched(p);
++	grq_unlock();
++}
++
++void wake_up_idle_cpu(int cpu);
++
++/*
++ * This function gets called by the timer code, with HZ frequency.
++ * We call it with interrupts disabled. The data modified is all
++ * local to struct rq so we don't need to grab grq lock.
++ */
++void scheduler_tick(void)
++{
++	int cpu __maybe_unused = smp_processor_id();
++	struct rq *rq = cpu_rq(cpu);
++
++	sched_clock_tick();
++	/* grq lock not grabbed, so only update rq clock */
++	update_rq_clock(rq);
++	update_cpu_clock(rq, rq->curr, 1);
++	if (!rq_idle(rq))
++		task_running_tick(rq);
++	else
++		no_iso_tick();
++	rq->last_tick = rq->clock;
++	perf_event_task_tick();
++}
++
++notrace unsigned long get_parent_ip(unsigned long addr)
++{
++	if (in_lock_functions(addr)) {
++		addr = CALLER_ADDR2;
++		if (in_lock_functions(addr))
++			addr = CALLER_ADDR3;
++	}
++	return addr;
++}
++
++#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
++				defined(CONFIG_PREEMPT_TRACER))
++void __kprobes add_preempt_count(int val)
++{
++#ifdef CONFIG_DEBUG_PREEMPT
++	/*
++	 * Underflow?
++	 */
++	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
++		return;
++#endif
++	preempt_count() += val;
++#ifdef CONFIG_DEBUG_PREEMPT
++	/*
++	 * Spinlock count overflowing soon?
++	 */
++	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
++				PREEMPT_MASK - 10);
++#endif
++	if (preempt_count() == val)
++		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
++}
++EXPORT_SYMBOL(add_preempt_count);
++
++void __kprobes sub_preempt_count(int val)
++{
++#ifdef CONFIG_DEBUG_PREEMPT
++	/*
++	 * Underflow?
++	 */
++	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
++		return;
++	/*
++	 * Is the spinlock portion underflowing?
++	 */
++	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
++			!(preempt_count() & PREEMPT_MASK)))
++		return;
++#endif
++
++	if (preempt_count() == val)
++		trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
++	preempt_count() -= val;
++}
++EXPORT_SYMBOL(sub_preempt_count);
++#endif
++
++/*
++ * Deadline is "now" in niffies + (offset by priority). Setting the deadline
++ * is the key to everything. It distributes cpu fairly amongst tasks of the
++ * same nice value, it proportions cpu according to nice level, it means the
++ * task that last woke up the longest ago has the earliest deadline, thus
++ * ensuring that interactive tasks get low latency on wake up. The CPU
++ * proportion works out to the square of the virtual deadline difference, so
++ * this equation will give nice 19 3% CPU compared to nice 0.
++ */
++static inline u64 prio_deadline_diff(int user_prio)
++{
++	return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128));
++}
++
++static inline u64 task_deadline_diff(struct task_struct *p)
++{
++	return prio_deadline_diff(TASK_USER_PRIO(p));
++}
++
++static inline u64 static_deadline_diff(int static_prio)
++{
++	return prio_deadline_diff(USER_PRIO(static_prio));
++}
++
++static inline int longest_deadline_diff(void)
++{
++	return prio_deadline_diff(39);
++}
++
++static inline int ms_longest_deadline_diff(void)
++{
++	return NS_TO_MS(longest_deadline_diff());
++}
++
++/*
++ * The time_slice is only refilled when it is empty and that is when we set a
++ * new deadline.
++ */
++static void time_slice_expired(struct task_struct *p)
++{
++	p->time_slice = timeslice();
++	p->deadline = grq.niffies + task_deadline_diff(p);
++}
++
++/*
++ * Timeslices below RESCHED_US are considered as good as expired as there's no
++ * point rescheduling when there's so little time left. SCHED_BATCH tasks
++ * have been flagged be not latency sensitive and likely to be fully CPU
++ * bound so every time they're rescheduled they have their time_slice
++ * refilled, but get a new later deadline to have little effect on
++ * SCHED_NORMAL tasks.
++
++ */
++static inline void check_deadline(struct task_struct *p)
++{
++	if (p->time_slice < RESCHED_US || batch_task(p))
++		time_slice_expired(p);
++}
++
++/*
++ * O(n) lookup of all tasks in the global runqueue. The real brainfuck
++ * of lock contention and O(n). It's not really O(n) as only the queued,
++ * but not running tasks are scanned, and is O(n) queued in the worst case
++ * scenario only because the right task can be found before scanning all of
++ * them.
++ * Tasks are selected in this order:
++ * Real time tasks are selected purely by their static priority and in the
++ * order they were queued, so the lowest value idx, and the first queued task
++ * of that priority value is chosen.
++ * If no real time tasks are found, the SCHED_ISO priority is checked, and
++ * all SCHED_ISO tasks have the same priority value, so they're selected by
++ * the earliest deadline value.
++ * If no SCHED_ISO tasks are found, SCHED_NORMAL tasks are selected by the
++ * earliest deadline.
++ * Finally if no SCHED_NORMAL tasks are found, SCHED_IDLEPRIO tasks are
++ * selected by the earliest deadline.
++ */
++static inline struct
++task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
++{
++	u64 dl, uninitialized_var(earliest_deadline);
++	struct task_struct *p, *edt = idle;
++	struct list_head *queue;
++	int idx = 0;
++
++retry:
++	idx = find_next_bit(grq.prio_bitmap, PRIO_LIMIT, idx);
++	if (idx >= PRIO_LIMIT)
++		goto out;
++	queue = grq.queue + idx;
++
++	if (idx < MAX_RT_PRIO) {
++		/* We found an rt task */
++		list_for_each_entry(p, queue, run_list) {
++			/* Make sure cpu affinity is ok */
++			if (needs_other_cpu(p, cpu))
++				continue;
++			edt = p;
++			goto out_take;
++		}
++		/* None of the RT tasks at this priority can run on this cpu */
++		++idx;
++		goto retry;
++	}
++
++	list_for_each_entry(p, queue, run_list) {
++		/* Make sure cpu affinity is ok */
++		if (needs_other_cpu(p, cpu))
++			continue;
++
++		/*
++		 * Soft affinity happens here by not scheduling a task with
++		 * its sticky flag set that ran on a different CPU last when
++		 * the CPU is scaling, or by greatly biasing against its
++		 * deadline when not.
++		 */
++		if (task_rq(p) != rq && task_sticky(p)) {
++			if (scaling_rq(rq))
++				continue;
++			else
++				dl = p->deadline + longest_deadline_diff();
++		} else
++			dl = p->deadline;
++
++		/*
++		 * No rt tasks. Find the earliest deadline task. Now we're in
++		 * O(n) territory. This is what we silenced the compiler for
++		 * with uninitialized_var(): edt will always start as idle.
++		 */
++		if (edt == idle ||
++		    deadline_before(dl, earliest_deadline)) {
++			earliest_deadline = dl;
++			edt = p;
++		}
++	}
++	if (edt == idle) {
++		if (++idx < PRIO_LIMIT)
++			goto retry;
++		goto out;
++	}
++out_take:
++	take_task(cpu, edt);
++out:
++	return edt;
++}
++
++/*
++ * Print scheduling while atomic bug:
++ */
++static noinline void __schedule_bug(struct task_struct *prev)
++{
++	struct pt_regs *regs = get_irq_regs();
++
++	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
++		prev->comm, prev->pid, preempt_count());
++
++	debug_show_held_locks(prev);
++	print_modules();
++	if (irqs_disabled())
++		print_irqtrace_events(prev);
++
++	if (regs)
++		show_regs(regs);
++	else
++		dump_stack();
++}
++
++/*
++ * Various schedule()-time debugging checks and statistics:
++ */
++static inline void schedule_debug(struct task_struct *prev)
++{
++	/*
++	 * Test if we are atomic. Since do_exit() needs to call into
++	 * schedule() atomically, we ignore that path for now.
++	 * Otherwise, whine if we are scheduling when we should not be.
++	 */
++	if (unlikely(in_atomic_preempt_off() && !prev->exit_state))
++		__schedule_bug(prev);
++	rcu_sleep_check();
++
++	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
++
++	schedstat_inc(this_rq(), sched_count);
++}
++
++/*
++ * The currently running task's information is all stored in rq local data
++ * which is only modified by the local CPU, thereby allowing the data to be
++ * changed without grabbing the grq lock.
++ */
++static inline void set_rq_task(struct rq *rq, struct task_struct *p)
++{
++	rq->rq_time_slice = p->time_slice;
++	rq->rq_deadline = p->deadline;
++	rq->rq_last_ran = p->last_ran = rq->clock;
++	rq->rq_policy = p->policy;
++	rq->rq_prio = p->prio;
++	if (p != rq->idle)
++		rq->rq_running = true;
++	else
++		rq->rq_running = false;
++}
++
++static void reset_rq_task(struct rq *rq, struct task_struct *p)
++{
++	rq->rq_policy = p->policy;
++	rq->rq_prio = p->prio;
++}
++
++/*
++ * schedule() is the main scheduler function.
++ */
++asmlinkage void __sched schedule(void)
++{
++	struct task_struct *prev, *next, *idle;
++	unsigned long *switch_count;
++	int deactivate, cpu;
++	struct rq *rq;
++
++need_resched:
++	preempt_disable();
++
++	cpu = smp_processor_id();
++	rq = cpu_rq(cpu);
++	idle = rq->idle;
++	rcu_note_context_switch(cpu);
++	prev = rq->curr;
++
++	deactivate = 0;
++	schedule_debug(prev);
++
++	grq_lock_irq();
++
++	switch_count = &prev->nivcsw;
++	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
++		if (unlikely(signal_pending_state(prev->state, prev))) {
++			prev->state = TASK_RUNNING;
++		} else {
++			deactivate = 1;
++			/*
++			 * If a worker is going to sleep, notify and
++			 * ask workqueue whether it wants to wake up a
++			 * task to maintain concurrency.  If so, wake
++			 * up the task.
++			 */
++			if (prev->flags & PF_WQ_WORKER) {
++				struct task_struct *to_wakeup;
++
++				to_wakeup = wq_worker_sleeping(prev, cpu);
++				if (to_wakeup) {
++					/* This shouldn't happen, but does */
++					if (unlikely(to_wakeup == prev))
++						deactivate = 0;
++					else
++						try_to_wake_up_local(to_wakeup);
++				}
++			}
++		}
++		switch_count = &prev->nvcsw;
++	}
++
++	/*
++	 * If we are going to sleep and we have plugged IO queued, make
++	 * sure to submit it to avoid deadlocks.
++	 */
++	if (unlikely(deactivate && blk_needs_flush_plug(prev))) {
++		grq_unlock_irq();
++		preempt_enable_no_resched();
++		blk_schedule_flush_plug(prev);
++		goto need_resched;
++	}
++
++	update_clocks(rq);
++	update_cpu_clock(rq, prev, 0);
++	if (rq->clock - rq->last_tick > HALF_JIFFY_NS)
++		rq->dither = false;
++	else
++		rq->dither = true;
++
++	clear_tsk_need_resched(prev);
++
++	if (prev != idle) {
++		/* Update all the information stored on struct rq */
++		prev->time_slice = rq->rq_time_slice;
++		prev->deadline = rq->rq_deadline;
++		check_deadline(prev);
++		prev->last_ran = rq->clock;
++
++		/* Task changed affinity off this CPU */
++		if (needs_other_cpu(prev, cpu))
++			resched_suitable_idle(prev);
++		else if (!deactivate) {
++			if (!queued_notrunning()) {
++				/*
++				* We now know prev is the only thing that is
++				* awaiting CPU so we can bypass rechecking for
++				* the earliest deadline task and just run it
++				* again.
++				*/
++				grq_unlock_irq();
++				goto rerun_prev_unlocked;
++			} else
++				swap_sticky(rq, cpu, prev);
++		}
++		return_task(prev, deactivate);
++	}
++
++	if (unlikely(!queued_notrunning())) {
++		/*
++		 * This CPU is now truly idle as opposed to when idle is
++		 * scheduled as a high priority task in its own right.
++		 */
++		next = idle;
++		schedstat_inc(rq, sched_goidle);
++		set_cpuidle_map(cpu);
++	} else {
++		next = earliest_deadline_task(rq, cpu, idle);
++		if (likely(next->prio != PRIO_LIMIT))
++			clear_cpuidle_map(cpu);
++		else
++			set_cpuidle_map(cpu);
++	}
++
++	if (likely(prev != next)) {
++		/*
++		 * Don't stick tasks when a real time task is going to run as
++		 * they may literally get stuck.
++		 */
++		if (rt_task(next))
++			unstick_task(rq, prev);
++		set_rq_task(rq, next);
++		grq.nr_switches++;
++		prev->on_cpu = false;
++		next->on_cpu = true;
++		rq->curr = next;
++		++*switch_count;
++
++		context_switch(rq, prev, next); /* unlocks the grq */
++		/*
++		 * The context switch have flipped the stack from under us
++		 * and restored the local variables which were saved when
++		 * this task called schedule() in the past. prev == current
++		 * is still correct, but it can be moved to another cpu/rq.
++		 */
++		cpu = smp_processor_id();
++		rq = cpu_rq(cpu);
++		idle = rq->idle;
++	} else
++		grq_unlock_irq();
++
++rerun_prev_unlocked:
++	preempt_enable_no_resched();
++	if (need_resched())
++		goto need_resched;
++}
++EXPORT_SYMBOL(schedule);
++
++#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
++
++static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
++{
++	if (lock->owner != owner)
++		return false;
++
++	/*
++	 * Ensure we emit the owner->on_cpu, dereference _after_ checking
++	 * lock->owner still matches owner, if that fails, owner might
++	 * point to free()d memory, if it still matches, the rcu_read_lock()
++	 * ensures the memory stays valid.
++	 */
++	barrier();
++
++	return owner->on_cpu;
++}
++
++/*
++ * Look out! "owner" is an entirely speculative pointer
++ * access and not reliable.
++ */
++int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
++{
++	rcu_read_lock();
++	while (owner_running(lock, owner)) {
++		if (need_resched())
++			break;
++
++		arch_mutex_cpu_relax();
++	}
++	rcu_read_unlock();
++
++	/*
++	 * We break out the loop above on need_resched() and when the
++	 * owner changed, which is a sign for heavy contention. Return
++	 * success only when lock->owner is NULL.
++	 */
++	return lock->owner == NULL;
++}
++#endif
++
++#ifdef CONFIG_PREEMPT
++/*
++ * this is the entry point to schedule() from in-kernel preemption
++ * off of preempt_enable. Kernel preemptions off return from interrupt
++ * occur there and call schedule directly.
++ */
++asmlinkage void __sched notrace preempt_schedule(void)
++{
++	struct thread_info *ti = current_thread_info();
++
++	/*
++	 * If there is a non-zero preempt_count or interrupts are disabled,
++	 * we do not want to preempt the current task. Just return..
++	 */
++	if (likely(ti->preempt_count || irqs_disabled()))
++		return;
++
++	do {
++		add_preempt_count_notrace(PREEMPT_ACTIVE);
++		schedule();
++		sub_preempt_count_notrace(PREEMPT_ACTIVE);
++
++		/*
++		 * Check again in case we missed a preemption opportunity
++		 * between schedule and now.
++		 */
++		barrier();
++	} while (need_resched());
++}
++EXPORT_SYMBOL(preempt_schedule);
++
++/*
++ * this is the entry point to schedule() from kernel preemption
++ * off of irq context.
++ * Note, that this is called and return with irqs disabled. This will
++ * protect us against recursive calling from irq.
++ */
++asmlinkage void __sched preempt_schedule_irq(void)
++{
++	struct thread_info *ti = current_thread_info();
++
++	/* Catch callers which need to be fixed */
++	BUG_ON(ti->preempt_count || !irqs_disabled());
++
++	do {
++		add_preempt_count(PREEMPT_ACTIVE);
++		local_irq_enable();
++		schedule();
++		local_irq_disable();
++		sub_preempt_count(PREEMPT_ACTIVE);
++
++		/*
++		 * Check again in case we missed a preemption opportunity
++		 * between schedule and now.
++		 */
++		barrier();
++	} while (need_resched());
++}
++
++#endif /* CONFIG_PREEMPT */
++
++int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
++			  void *key)
++{
++	return try_to_wake_up(curr->private, mode, wake_flags);
++}
++EXPORT_SYMBOL(default_wake_function);
++
++/*
++ * The core wakeup function.  Non-exclusive wakeups (nr_exclusive == 0) just
++ * wake everything up.  If it's an exclusive wakeup (nr_exclusive == small +ve
++ * number) then we wake all the non-exclusive tasks and one exclusive task.
++ *
++ * There are circumstances in which we can try to wake a task which has already
++ * started to run but is not in state TASK_RUNNING.  try_to_wake_up() returns
++ * zero in this (rare) case, and we handle it by continuing to scan the queue.
++ */
++static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
++			int nr_exclusive, int wake_flags, void *key)
++{
++	struct list_head *tmp, *next;
++
++	list_for_each_safe(tmp, next, &q->task_list) {
++		wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
++		unsigned int flags = curr->flags;
++
++		if (curr->func(curr, mode, wake_flags, key) &&
++				(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
++			break;
++	}
++}
++
++/**
++ * __wake_up - wake up threads blocked on a waitqueue.
++ * @q: the waitqueue
++ * @mode: which threads
++ * @nr_exclusive: how many wake-one or wake-many threads to wake up
++ * @key: is directly passed to the wakeup function
++ *
++ * It may be assumed that this function implies a write memory barrier before
++ * changing the task state if and only if any tasks are woken up.
++ */
++void __wake_up(wait_queue_head_t *q, unsigned int mode,
++			int nr_exclusive, void *key)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&q->lock, flags);
++	__wake_up_common(q, mode, nr_exclusive, 0, key);
++	spin_unlock_irqrestore(&q->lock, flags);
++}
++EXPORT_SYMBOL(__wake_up);
++
++/*
++ * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
++ */
++void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
++{
++	__wake_up_common(q, mode, 1, 0, NULL);
++}
++EXPORT_SYMBOL_GPL(__wake_up_locked);
++
++void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
++{
++	__wake_up_common(q, mode, 1, 0, key);
++}
++EXPORT_SYMBOL_GPL(__wake_up_locked_key);
++
++/**
++ * __wake_up_sync_key - wake up threads blocked on a waitqueue.
++ * @q: the waitqueue
++ * @mode: which threads
++ * @nr_exclusive: how many wake-one or wake-many threads to wake up
++ * @key: opaque value to be passed to wakeup targets
++ *
++ * The sync wakeup differs that the waker knows that it will schedule
++ * away soon, so while the target thread will be woken up, it will not
++ * be migrated to another CPU - ie. the two threads are 'synchronised'
++ * with each other. This can prevent needless bouncing between CPUs.
++ *
++ * On UP it can prevent extra preemption.
++ *
++ * It may be assumed that this function implies a write memory barrier before
++ * changing the task state if and only if any tasks are woken up.
++ */
++void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
++			int nr_exclusive, void *key)
++{
++	unsigned long flags;
++	int wake_flags = WF_SYNC;
++
++	if (unlikely(!q))
++		return;
++
++	if (unlikely(!nr_exclusive))
++		wake_flags = 0;
++
++	spin_lock_irqsave(&q->lock, flags);
++	__wake_up_common(q, mode, nr_exclusive, wake_flags, key);
++	spin_unlock_irqrestore(&q->lock, flags);
++}
++EXPORT_SYMBOL_GPL(__wake_up_sync_key);
++
++/**
++ * __wake_up_sync - wake up threads blocked on a waitqueue.
++ * @q: the waitqueue
++ * @mode: which threads
++ * @nr_exclusive: how many wake-one or wake-many threads to wake up
++ *
++ * The sync wakeup differs that the waker knows that it will schedule
++ * away soon, so while the target thread will be woken up, it will not
++ * be migrated to another CPU - ie. the two threads are 'synchronised'
++ * with each other. This can prevent needless bouncing between CPUs.
++ *
++ * On UP it can prevent extra preemption.
++ */
++void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
++{
++	unsigned long flags;
++	int sync = 1;
++
++	if (unlikely(!q))
++		return;
++
++	if (unlikely(!nr_exclusive))
++		sync = 0;
++
++	spin_lock_irqsave(&q->lock, flags);
++	__wake_up_common(q, mode, nr_exclusive, sync, NULL);
++	spin_unlock_irqrestore(&q->lock, flags);
++}
++EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
++
++/**
++ * complete: - signals a single thread waiting on this completion
++ * @x:  holds the state of this particular completion
++ *
++ * This will wake up a single thread waiting on this completion. Threads will be
++ * awakened in the same order in which they were queued.
++ *
++ * See also complete_all(), wait_for_completion() and related routines.
++ *
++ * It may be assumed that this function implies a write memory barrier before
++ * changing the task state if and only if any tasks are woken up.
++ */
++void complete(struct completion *x)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&x->wait.lock, flags);
++	x->done++;
++	__wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
++	spin_unlock_irqrestore(&x->wait.lock, flags);
++}
++EXPORT_SYMBOL(complete);
++
++/**
++ * complete_all: - signals all threads waiting on this completion
++ * @x:  holds the state of this particular completion
++ *
++ * This will wake up all threads waiting on this particular completion event.
++ *
++ * It may be assumed that this function implies a write memory barrier before
++ * changing the task state if and only if any tasks are woken up.
++ */
++void complete_all(struct completion *x)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&x->wait.lock, flags);
++	x->done += UINT_MAX/2;
++	__wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
++	spin_unlock_irqrestore(&x->wait.lock, flags);
++}
++EXPORT_SYMBOL(complete_all);
++
++static inline long __sched
++do_wait_for_common(struct completion *x, long timeout, int state)
++{
++	if (!x->done) {
++		DECLARE_WAITQUEUE(wait, current);
++
++		__add_wait_queue_tail_exclusive(&x->wait, &wait);
++		do {
++			if (signal_pending_state(state, current)) {
++				timeout = -ERESTARTSYS;
++				break;
++			}
++			__set_current_state(state);
++			spin_unlock_irq(&x->wait.lock);
++			timeout = schedule_timeout(timeout);
++			spin_lock_irq(&x->wait.lock);
++		} while (!x->done && timeout);
++		__remove_wait_queue(&x->wait, &wait);
++		if (!x->done)
++			return timeout;
++	}
++	x->done--;
++	return timeout ?: 1;
++}
++
++static long __sched
++wait_for_common(struct completion *x, long timeout, int state)
++{
++	might_sleep();
++
++	spin_lock_irq(&x->wait.lock);
++	timeout = do_wait_for_common(x, timeout, state);
++	spin_unlock_irq(&x->wait.lock);
++	return timeout;
++}
++
++/**
++ * wait_for_completion: - waits for completion of a task
++ * @x:  holds the state of this particular completion
++ *
++ * This waits to be signaled for completion of a specific task. It is NOT
++ * interruptible and there is no timeout.
++ *
++ * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
++ * and interrupt capability. Also see complete().
++ */
++void __sched wait_for_completion(struct completion *x)
++{
++	wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
++}
++EXPORT_SYMBOL(wait_for_completion);
++
++/**
++ * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
++ * @x:  holds the state of this particular completion
++ * @timeout:  timeout value in jiffies
++ *
++ * This waits for either a completion of a specific task to be signaled or for a
++ * specified timeout to expire. The timeout is in jiffies. It is not
++ * interruptible.
++ *
++ * The return value is 0 if timed out, and positive (at least 1, or number of
++ * jiffies left till timeout) if completed.
++ */
++unsigned long __sched
++wait_for_completion_timeout(struct completion *x, unsigned long timeout)
++{
++	return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
++}
++EXPORT_SYMBOL(wait_for_completion_timeout);
++
++/**
++ * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
++ * @x:  holds the state of this particular completion
++ *
++ * This waits for completion of a specific task to be signaled. It is
++ * interruptible.
++ *
++ * The return value is -ERESTARTSYS if interrupted, 0 if completed.
++ */
++int __sched wait_for_completion_interruptible(struct completion *x)
++{
++	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
++	if (t == -ERESTARTSYS)
++		return t;
++	return 0;
++}
++EXPORT_SYMBOL(wait_for_completion_interruptible);
++
++/**
++ * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
++ * @x:  holds the state of this particular completion
++ * @timeout:  timeout value in jiffies
++ *
++ * This waits for either a completion of a specific task to be signaled or for a
++ * specified timeout to expire. It is interruptible. The timeout is in jiffies.
++ *
++ * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
++ * positive (at least 1, or number of jiffies left till timeout) if completed.
++ */
++long __sched
++wait_for_completion_interruptible_timeout(struct completion *x,
++					  unsigned long timeout)
++{
++	return wait_for_common(x, timeout, TASK_INTERRUPTIBLE);
++}
++EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
++
++/**
++ * wait_for_completion_killable: - waits for completion of a task (killable)
++ * @x:  holds the state of this particular completion
++ *
++ * This waits to be signaled for completion of a specific task. It can be
++ * interrupted by a kill signal.
++ *
++ * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
++ * positive (at least 1, or number of jiffies left till timeout) if completed.
++ */
++int __sched wait_for_completion_killable(struct completion *x)
++{
++	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
++	if (t == -ERESTARTSYS)
++		return t;
++	return 0;
++}
++EXPORT_SYMBOL(wait_for_completion_killable);
++
++/**
++ * wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
++ * @x:  holds the state of this particular completion
++ * @timeout:  timeout value in jiffies
++ *
++ * This waits for either a completion of a specific task to be
++ * signaled or for a specified timeout to expire. It can be
++ * interrupted by a kill signal. The timeout is in jiffies.
++ */
++long __sched
++wait_for_completion_killable_timeout(struct completion *x,
++				     unsigned long timeout)
++{
++	return wait_for_common(x, timeout, TASK_KILLABLE);
++}
++EXPORT_SYMBOL(wait_for_completion_killable_timeout);
++
++/**
++ *	try_wait_for_completion - try to decrement a completion without blocking
++ *	@x:	completion structure
++ *
++ *	Returns: 0 if a decrement cannot be done without blocking
++ *		 1 if a decrement succeeded.
++ *
++ *	If a completion is being used as a counting completion,
++ *	attempt to decrement the counter without blocking. This
++ *	enables us to avoid waiting if the resource the completion
++ *	is protecting is not available.
++ */
++bool try_wait_for_completion(struct completion *x)
++{
++	unsigned long flags;
++	int ret = 1;
++
++	spin_lock_irqsave(&x->wait.lock, flags);
++	if (!x->done)
++		ret = 0;
++	else
++		x->done--;
++	spin_unlock_irqrestore(&x->wait.lock, flags);
++	return ret;
++}
++EXPORT_SYMBOL(try_wait_for_completion);
++
++/**
++ *	completion_done - Test to see if a completion has any waiters
++ *	@x:	completion structure
++ *
++ *	Returns: 0 if there are waiters (wait_for_completion() in progress)
++ *		 1 if there are no waiters.
++ *
++ */
++bool completion_done(struct completion *x)
++{
++	unsigned long flags;
++	int ret = 1;
++
++	spin_lock_irqsave(&x->wait.lock, flags);
++	if (!x->done)
++		ret = 0;
++	spin_unlock_irqrestore(&x->wait.lock, flags);
++	return ret;
++}
++EXPORT_SYMBOL(completion_done);
++
++static long __sched
++sleep_on_common(wait_queue_head_t *q, int state, long timeout)
++{
++	unsigned long flags;
++	wait_queue_t wait;
++
++	init_waitqueue_entry(&wait, current);
++
++	__set_current_state(state);
++
++	spin_lock_irqsave(&q->lock, flags);
++	__add_wait_queue(q, &wait);
++	spin_unlock(&q->lock);
++	timeout = schedule_timeout(timeout);
++	spin_lock_irq(&q->lock);
++	__remove_wait_queue(q, &wait);
++	spin_unlock_irqrestore(&q->lock, flags);
++
++	return timeout;
++}
++
++void __sched interruptible_sleep_on(wait_queue_head_t *q)
++{
++	sleep_on_common(q, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
++}
++EXPORT_SYMBOL(interruptible_sleep_on);
++
++long __sched
++interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
++{
++	return sleep_on_common(q, TASK_INTERRUPTIBLE, timeout);
++}
++EXPORT_SYMBOL(interruptible_sleep_on_timeout);
++
++void __sched sleep_on(wait_queue_head_t *q)
++{
++	sleep_on_common(q, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
++}
++EXPORT_SYMBOL(sleep_on);
++
++long __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
++{
++	return sleep_on_common(q, TASK_UNINTERRUPTIBLE, timeout);
++}
++EXPORT_SYMBOL(sleep_on_timeout);
++
++#ifdef CONFIG_RT_MUTEXES
++
++/*
++ * rt_mutex_setprio - set the current priority of a task
++ * @p: task
++ * @prio: prio value (kernel-internal form)
++ *
++ * This function changes the 'effective' priority of a task. It does
++ * not touch ->normal_prio like __setscheduler().
++ *
++ * Used by the rt_mutex code to implement priority inheritance logic.
++ */
++void rt_mutex_setprio(struct task_struct *p, int prio)
++{
++	unsigned long flags;
++	int queued, oldprio;
++	struct rq *rq;
++
++	BUG_ON(prio < 0 || prio > MAX_PRIO);
++
++	rq = task_grq_lock(p, &flags);
++
++	trace_sched_pi_setprio(p, prio);
++	oldprio = p->prio;
++	queued = task_queued(p);
++	if (queued)
++		dequeue_task(p);
++	p->prio = prio;
++	if (task_running(p) && prio > oldprio)
++		resched_task(p);
++	if (queued) {
++		enqueue_task(p);
++		try_preempt(p, rq);
++	}
++
++	task_grq_unlock(&flags);
++}
++
++#endif
++
++/*
++ * Adjust the deadline for when the priority is to change, before it's
++ * changed.
++ */
++static inline void adjust_deadline(struct task_struct *p, int new_prio)
++{
++	p->deadline += static_deadline_diff(new_prio) - task_deadline_diff(p);
++}
++
++void set_user_nice(struct task_struct *p, long nice)
++{
++	int queued, new_static, old_static;
++	unsigned long flags;
++	struct rq *rq;
++
++	if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
++		return;
++	new_static = NICE_TO_PRIO(nice);
++	/*
++	 * We have to be careful, if called from sys_setpriority(),
++	 * the task might be in the middle of scheduling on another CPU.
++	 */
++	rq = time_task_grq_lock(p, &flags);
++	/*
++	 * The RT priorities are set via sched_setscheduler(), but we still
++	 * allow the 'normal' nice value to be set - but as expected
++	 * it wont have any effect on scheduling until the task is
++	 * not SCHED_NORMAL/SCHED_BATCH:
++	 */
++	if (has_rt_policy(p)) {
++		p->static_prio = new_static;
++		goto out_unlock;
++	}
++	queued = task_queued(p);
++	if (queued)
++		dequeue_task(p);
++
++	adjust_deadline(p, new_static);
++	old_static = p->static_prio;
++	p->static_prio = new_static;
++	p->prio = effective_prio(p);
++
++	if (queued) {
++		enqueue_task(p);
++		if (new_static < old_static)
++			try_preempt(p, rq);
++	} else if (task_running(p)) {
++		reset_rq_task(rq, p);
++		if (old_static < new_static)
++			resched_task(p);
++	}
++out_unlock:
++	task_grq_unlock(&flags);
++}
++EXPORT_SYMBOL(set_user_nice);
++
++/*
++ * can_nice - check if a task can reduce its nice value
++ * @p: task
++ * @nice: nice value
++ */
++int can_nice(const struct task_struct *p, const int nice)
++{
++	/* convert nice value [19,-20] to rlimit style value [1,40] */
++	int nice_rlim = 20 - nice;
++
++	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
++		capable(CAP_SYS_NICE));
++}
++
++#ifdef __ARCH_WANT_SYS_NICE
++
++/*
++ * sys_nice - change the priority of the current process.
++ * @increment: priority increment
++ *
++ * sys_setpriority is a more generic, but much slower function that
++ * does similar things.
++ */
++SYSCALL_DEFINE1(nice, int, increment)
++{
++	long nice, retval;
++
++	/*
++	 * Setpriority might change our priority at the same moment.
++	 * We don't have to worry. Conceptually one call occurs first
++	 * and we have a single winner.
++	 */
++	if (increment < -40)
++		increment = -40;
++	if (increment > 40)
++		increment = 40;
++
++	nice = TASK_NICE(current) + increment;
++	if (nice < -20)
++		nice = -20;
++	if (nice > 19)
++		nice = 19;
++
++	if (increment < 0 && !can_nice(current, nice))
++		return -EPERM;
++
++	retval = security_task_setnice(current, nice);
++	if (retval)
++		return retval;
++
++	set_user_nice(current, nice);
++	return 0;
++}
++
++#endif
++
++/**
++ * task_prio - return the priority value of a given task.
++ * @p: the task in question.
++ *
++ * This is the priority value as seen by users in /proc.
++ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
++ * from 0 (SCHED_ISO) up to 82 (nice +19 SCHED_IDLEPRIO).
++ */
++int task_prio(const struct task_struct *p)
++{
++	int delta, prio = p->prio - MAX_RT_PRIO;
++
++	/* rt tasks and iso tasks */
++	if (prio <= 0)
++		goto out;
++
++	/* Convert to ms to avoid overflows */
++	delta = NS_TO_MS(p->deadline - grq.niffies);
++	delta = delta * 40 / ms_longest_deadline_diff();
++	if (delta > 0 && delta <= 80)
++		prio += delta;
++	if (idleprio_task(p))
++		prio += 40;
++out:
++	return prio;
++}
++
++/**
++ * task_nice - return the nice value of a given task.
++ * @p: the task in question.
++ */
++int task_nice(const struct task_struct *p)
++{
++	return TASK_NICE(p);
++}
++EXPORT_SYMBOL_GPL(task_nice);
++
++/**
++ * idle_cpu - is a given cpu idle currently?
++ * @cpu: the processor in question.
++ */
++int idle_cpu(int cpu)
++{
++	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
++}
++
++/**
++ * idle_task - return the idle task for a given cpu.
++ * @cpu: the processor in question.
++ */
++struct task_struct *idle_task(int cpu)
++{
++	return cpu_rq(cpu)->idle;
++}
++
++/**
++ * find_process_by_pid - find a process with a matching PID value.
++ * @pid: the pid in question.
++ */
++static inline struct task_struct *find_process_by_pid(pid_t pid)
++{
++	return pid ? find_task_by_vpid(pid) : current;
++}
++
++/* Actually do priority change: must hold grq lock. */
++static void
++__setscheduler(struct task_struct *p, struct rq *rq, int policy, int prio)
++{
++	int oldrtprio, oldprio;
++
++	p->policy = policy;
++	oldrtprio = p->rt_priority;
++	p->rt_priority = prio;
++	p->normal_prio = normal_prio(p);
++	oldprio = p->prio;
++	/* we are holding p->pi_lock already */
++	p->prio = rt_mutex_getprio(p);
++	if (task_running(p)) {
++		reset_rq_task(rq, p);
++		/* Resched only if we might now be preempted */
++		if (p->prio > oldprio || p->rt_priority > oldrtprio)
++			resched_task(p);
++	}
++}
++
++/*
++ * check the target process has a UID that matches the current process's
++ */
++static bool check_same_owner(struct task_struct *p)
++{
++	const struct cred *cred = current_cred(), *pcred;
++	bool match;
++
++	rcu_read_lock();
++	pcred = __task_cred(p);
++	if (cred->user->user_ns == pcred->user->user_ns)
++		match = (cred->euid == pcred->euid ||
++			 cred->euid == pcred->uid);
++	else
++		match = false;
++	rcu_read_unlock();
++	return match;
++}
++
++static int __sched_setscheduler(struct task_struct *p, int policy,
++				const struct sched_param *param, bool user)
++{
++	struct sched_param zero_param = { .sched_priority = 0 };
++	int queued, retval, oldpolicy = -1;
++	unsigned long flags, rlim_rtprio = 0;
++	int reset_on_fork;
++	struct rq *rq;
++
++	/* may grab non-irq protected spin_locks */
++	BUG_ON(in_interrupt());
++
++	if (is_rt_policy(policy) && !capable(CAP_SYS_NICE)) {
++		unsigned long lflags;
++
++		if (!lock_task_sighand(p, &lflags))
++			return -ESRCH;
++		rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
++		unlock_task_sighand(p, &lflags);
++		if (rlim_rtprio)
++			goto recheck;
++		/*
++		 * If the caller requested an RT policy without having the
++		 * necessary rights, we downgrade the policy to SCHED_ISO.
++		 * We also set the parameter to zero to pass the checks.
++		 */
++		policy = SCHED_ISO;
++		param = &zero_param;
++	}
++recheck:
++	/* double check policy once rq lock held */
++	if (policy < 0) {
++		reset_on_fork = p->sched_reset_on_fork;
++		policy = oldpolicy = p->policy;
++	} else {
++		reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
++		policy &= ~SCHED_RESET_ON_FORK;
++
++		if (!SCHED_RANGE(policy))
++			return -EINVAL;
++	}
++
++	/*
++	 * Valid priorities for SCHED_FIFO and SCHED_RR are
++	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
++	 * SCHED_BATCH is 0.
++	 */
++	if (param->sched_priority < 0 ||
++	    (p->mm && param->sched_priority > MAX_USER_RT_PRIO - 1) ||
++	    (!p->mm && param->sched_priority > MAX_RT_PRIO - 1))
++		return -EINVAL;
++	if (is_rt_policy(policy) != (param->sched_priority != 0))
++		return -EINVAL;
++
++	/*
++	 * Allow unprivileged RT tasks to decrease priority:
++	 */
++	if (user && !capable(CAP_SYS_NICE)) {
++		if (is_rt_policy(policy)) {
++			unsigned long rlim_rtprio =
++					task_rlimit(p, RLIMIT_RTPRIO);
++
++			/* can't set/change the rt policy */
++			if (policy != p->policy && !rlim_rtprio)
++				return -EPERM;
++
++			/* can't increase priority */
++			if (param->sched_priority > p->rt_priority &&
++			    param->sched_priority > rlim_rtprio)
++				return -EPERM;
++		} else {
++			switch (p->policy) {
++				/*
++				 * Can only downgrade policies but not back to
++				 * SCHED_NORMAL
++				 */
++				case SCHED_ISO:
++					if (policy == SCHED_ISO)
++						goto out;
++					if (policy == SCHED_NORMAL)
++						return -EPERM;
++					break;
++				case SCHED_BATCH:
++					if (policy == SCHED_BATCH)
++						goto out;
++					if (policy != SCHED_IDLEPRIO)
++						return -EPERM;
++					break;
++				case SCHED_IDLEPRIO:
++					if (policy == SCHED_IDLEPRIO)
++						goto out;
++					return -EPERM;
++				default:
++					break;
++			}
++		}
++
++		/* can't change other user's priorities */
++		if (!check_same_owner(p))
++			return -EPERM;
++
++		/* Normal users shall not reset the sched_reset_on_fork flag */
++		if (p->sched_reset_on_fork && !reset_on_fork)
++			return -EPERM;
++	}
++
++	if (user) {
++		retval = security_task_setscheduler(p);
++		if (retval)
++			return retval;
++	}
++
++	/*
++	 * make sure no PI-waiters arrive (or leave) while we are
++	 * changing the priority of the task:
++	 */
++	raw_spin_lock_irqsave(&p->pi_lock, flags);
++	/*
++	 * To be able to change p->policy safely, the grunqueue lock must be
++	 * held.
++	 */
++	rq = __task_grq_lock(p);
++
++	/*
++	 * Changing the policy of the stop threads its a very bad idea
++	 */
++	if (p == rq->stop) {
++		__task_grq_unlock();
++		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++		return -EINVAL;
++	}
++
++	/*
++	 * If not changing anything there's no need to proceed further:
++	 */
++	if (unlikely(policy == p->policy && (!is_rt_policy(policy) ||
++			param->sched_priority == p->rt_priority))) {
++
++		__task_grq_unlock();
++		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++		return 0;
++	}
++
++	/* recheck policy now with rq lock held */
++	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
++		policy = oldpolicy = -1;
++		__task_grq_unlock();
++		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++		goto recheck;
++	}
++	update_clocks(rq);
++	p->sched_reset_on_fork = reset_on_fork;
++
++	queued = task_queued(p);
++	if (queued)
++		dequeue_task(p);
++	__setscheduler(p, rq, policy, param->sched_priority);
++	if (queued) {
++		enqueue_task(p);
++		try_preempt(p, rq);
++	}
++	__task_grq_unlock();
++	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++
++	rt_mutex_adjust_pi(p);
++out:
++	return 0;
++}
++
++/**
++ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
++ * @p: the task in question.
++ * @policy: new policy.
++ * @param: structure containing the new RT priority.
++ *
++ * NOTE that the task may be already dead.
++ */
++int sched_setscheduler(struct task_struct *p, int policy,
++		       const struct sched_param *param)
++{
++	return __sched_setscheduler(p, policy, param, true);
++}
++
++EXPORT_SYMBOL_GPL(sched_setscheduler);
++
++/**
++ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
++ * @p: the task in question.
++ * @policy: new policy.
++ * @param: structure containing the new RT priority.
++ *
++ * Just like sched_setscheduler, only don't bother checking if the
++ * current context has permission.  For example, this is needed in
++ * stop_machine(): we create temporary high priority worker threads,
++ * but our caller might not have that capability.
++ */
++int sched_setscheduler_nocheck(struct task_struct *p, int policy,
++			       const struct sched_param *param)
++{
++	return __sched_setscheduler(p, policy, param, false);
++}
++
++static int
++do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
++{
++	struct sched_param lparam;
++	struct task_struct *p;
++	int retval;
++
++	if (!param || pid < 0)
++		return -EINVAL;
++	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
++		return -EFAULT;
++
++	rcu_read_lock();
++	retval = -ESRCH;
++	p = find_process_by_pid(pid);
++	if (p != NULL)
++		retval = sched_setscheduler(p, policy, &lparam);
++	rcu_read_unlock();
++
++	return retval;
++}
++
++/**
++ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
++ * @pid: the pid in question.
++ * @policy: new policy.
++ * @param: structure containing the new RT priority.
++ */
++asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
++				       struct sched_param __user *param)
++{
++	/* negative values for policy are not valid */
++	if (policy < 0)
++		return -EINVAL;
++
++	return do_sched_setscheduler(pid, policy, param);
++}
++
++/**
++ * sys_sched_setparam - set/change the RT priority of a thread
++ * @pid: the pid in question.
++ * @param: structure containing the new RT priority.
++ */
++SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
++{
++	return do_sched_setscheduler(pid, -1, param);
++}
++
++/**
++ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
++ * @pid: the pid in question.
++ */
++SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
++{
++	struct task_struct *p;
++	int retval = -EINVAL;
++
++	if (pid < 0)
++		goto out_nounlock;
++
++	retval = -ESRCH;
++	rcu_read_lock();
++	p = find_process_by_pid(pid);
++	if (p) {
++		retval = security_task_getscheduler(p);
++		if (!retval)
++			retval = p->policy;
++	}
++	rcu_read_unlock();
++
++out_nounlock:
++	return retval;
++}
++
++/**
++ * sys_sched_getscheduler - get the RT priority of a thread
++ * @pid: the pid in question.
++ * @param: structure containing the RT priority.
++ */
++SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
++{
++	struct sched_param lp;
++	struct task_struct *p;
++	int retval = -EINVAL;
++
++	if (!param || pid < 0)
++		goto out_nounlock;
++
++	rcu_read_lock();
++	p = find_process_by_pid(pid);
++	retval = -ESRCH;
++	if (!p)
++		goto out_unlock;
++
++	retval = security_task_getscheduler(p);
++	if (retval)
++		goto out_unlock;
++
++	lp.sched_priority = p->rt_priority;
++	rcu_read_unlock();
++
++	/*
++	 * This one might sleep, we cannot do it with a spinlock held ...
++	 */
++	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
++
++out_nounlock:
++	return retval;
++
++out_unlock:
++	rcu_read_unlock();
++	return retval;
++}
++
++long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
++{
++	cpumask_var_t cpus_allowed, new_mask;
++	struct task_struct *p;
++	int retval;
++
++	get_online_cpus();
++	rcu_read_lock();
++
++	p = find_process_by_pid(pid);
++	if (!p) {
++		rcu_read_unlock();
++		put_online_cpus();
++		return -ESRCH;
++	}
++
++	/* Prevent p going away */
++	get_task_struct(p);
++	rcu_read_unlock();
++
++	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
++		retval = -ENOMEM;
++		goto out_put_task;
++	}
++	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
++		retval = -ENOMEM;
++		goto out_free_cpus_allowed;
++	}
++	retval = -EPERM;
++	if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE))
++		goto out_unlock;
++
++	retval = security_task_setscheduler(p);
++	if (retval)
++		goto out_unlock;
++
++	cpuset_cpus_allowed(p, cpus_allowed);
++	cpumask_and(new_mask, in_mask, cpus_allowed);
++again:
++	retval = set_cpus_allowed_ptr(p, new_mask);
++
++	if (!retval) {
++		cpuset_cpus_allowed(p, cpus_allowed);
++		if (!cpumask_subset(new_mask, cpus_allowed)) {
++			/*
++			 * We must have raced with a concurrent cpuset
++			 * update. Just reset the cpus_allowed to the
++			 * cpuset's cpus_allowed
++			 */
++			cpumask_copy(new_mask, cpus_allowed);
++			goto again;
++		}
++	}
++out_unlock:
++	free_cpumask_var(new_mask);
++out_free_cpus_allowed:
++	free_cpumask_var(cpus_allowed);
++out_put_task:
++	put_task_struct(p);
++	put_online_cpus();
++	return retval;
++}
++
++static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
++			     cpumask_t *new_mask)
++{
++	if (len < sizeof(cpumask_t)) {
++		memset(new_mask, 0, sizeof(cpumask_t));
++	} else if (len > sizeof(cpumask_t)) {
++		len = sizeof(cpumask_t);
++	}
++	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
++}
++
++
++/**
++ * sys_sched_setaffinity - set the cpu affinity of a process
++ * @pid: pid of the process
++ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
++ * @user_mask_ptr: user-space pointer to the new cpu mask
++ */
++SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
++		unsigned long __user *, user_mask_ptr)
++{
++	cpumask_var_t new_mask;
++	int retval;
++
++	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
++		return -ENOMEM;
++
++	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
++	if (retval == 0)
++		retval = sched_setaffinity(pid, new_mask);
++	free_cpumask_var(new_mask);
++	return retval;
++}
++
++long sched_getaffinity(pid_t pid, cpumask_t *mask)
++{
++	struct task_struct *p;
++	unsigned long flags;
++	int retval;
++
++	get_online_cpus();
++	rcu_read_lock();
++
++	retval = -ESRCH;
++	p = find_process_by_pid(pid);
++	if (!p)
++		goto out_unlock;
++
++	retval = security_task_getscheduler(p);
++	if (retval)
++		goto out_unlock;
++
++	grq_lock_irqsave(&flags);
++	cpumask_and(mask, tsk_cpus_allowed(p), cpu_online_mask);
++	grq_unlock_irqrestore(&flags);
++
++out_unlock:
++	rcu_read_unlock();
++	put_online_cpus();
++
++	return retval;
++}
++
++/**
++ * sys_sched_getaffinity - get the cpu affinity of a process
++ * @pid: pid of the process
++ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
++ * @user_mask_ptr: user-space pointer to hold the current cpu mask
++ */
++SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
++		unsigned long __user *, user_mask_ptr)
++{
++	int ret;
++	cpumask_var_t mask;
++
++	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
++		return -EINVAL;
++	if (len & (sizeof(unsigned long)-1))
++		return -EINVAL;
++
++	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
++		return -ENOMEM;
++
++	ret = sched_getaffinity(pid, mask);
++	if (ret == 0) {
++		size_t retlen = min_t(size_t, len, cpumask_size());
++
++		if (copy_to_user(user_mask_ptr, mask, retlen))
++			ret = -EFAULT;
++		else
++			ret = retlen;
++	}
++	free_cpumask_var(mask);
++
++	return ret;
++}
++
++/**
++ * sys_sched_yield - yield the current processor to other threads.
++ *
++ * This function yields the current CPU to other tasks. It does this by
++ * scheduling away the current task. If it still has the earliest deadline
++ * it will be scheduled again as the next task.
++ */
++SYSCALL_DEFINE0(sched_yield)
++{
++	struct task_struct *p;
++
++	p = current;
++	grq_lock_irq();
++	schedstat_inc(task_rq(p), yld_count);
++	requeue_task(p);
++
++	/*
++	 * Since we are going to call schedule() anyway, there's
++	 * no need to preempt or enable interrupts:
++	 */
++	__release(grq.lock);
++	spin_release(&grq.lock.dep_map, 1, _THIS_IP_);
++	do_raw_spin_unlock(&grq.lock);
++	preempt_enable_no_resched();
++
++	schedule();
++
++	return 0;
++}
++
++static inline bool should_resched(void)
++{
++	return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
++}
++
++static void __cond_resched(void)
++{
++	/* NOT a real fix but will make voluntary preempt work. 馬鹿な事 */
++	if (unlikely(system_state != SYSTEM_RUNNING))
++		return;
++
++	add_preempt_count(PREEMPT_ACTIVE);
++	schedule();
++	sub_preempt_count(PREEMPT_ACTIVE);
++}
++
++int __sched _cond_resched(void)
++{
++	if (should_resched()) {
++		__cond_resched();
++		return 1;
++	}
++	return 0;
++}
++EXPORT_SYMBOL(_cond_resched);
++
++/*
++ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
++ * call schedule, and on return reacquire the lock.
++ *
++ * This works OK both with and without CONFIG_PREEMPT.  We do strange low-level
++ * operations here to prevent schedule() from being called twice (once via
++ * spin_unlock(), once by hand).
++ */
++int __cond_resched_lock(spinlock_t *lock)
++{
++	int resched = should_resched();
++	int ret = 0;
++
++	lockdep_assert_held(lock);
++
++	if (spin_needbreak(lock) || resched) {
++		spin_unlock(lock);
++		if (resched)
++			__cond_resched();
++		else
++			cpu_relax();
++		ret = 1;
++		spin_lock(lock);
++	}
++	return ret;
++}
++EXPORT_SYMBOL(__cond_resched_lock);
++
++int __sched __cond_resched_softirq(void)
++{
++	BUG_ON(!in_softirq());
++
++	if (should_resched()) {
++		local_bh_enable();
++		__cond_resched();
++		local_bh_disable();
++		return 1;
++	}
++	return 0;
++}
++EXPORT_SYMBOL(__cond_resched_softirq);
++
++/**
++ * yield - yield the current processor to other threads.
++ *
++ * This is a shortcut for kernel-space yielding - it marks the
++ * thread runnable and calls sys_sched_yield().
++ */
++void __sched yield(void)
++{
++	set_current_state(TASK_RUNNING);
++	sys_sched_yield();
++}
++EXPORT_SYMBOL(yield);
++
++/**
++ * yield_to - yield the current processor to another thread in
++ * your thread group, or accelerate that thread toward the
++ * processor it's on.
++ * @p: target task
++ * @preempt: whether task preemption is allowed or not
++ *
++ * It's the caller's job to ensure that the target task struct
++ * can't go away on us before we can do any checks.
++ *
++ * Returns true if we indeed boosted the target task.
++ */
++bool __sched yield_to(struct task_struct *p, bool preempt)
++{
++	unsigned long flags;
++	bool yielded = 0;
++	struct rq *rq;
++
++	rq = this_rq();
++	grq_lock_irqsave(&flags);
++	if (task_running(p) || p->state)
++		goto out_unlock;
++	yielded = 1;
++	if (p->deadline > rq->rq_deadline)
++		p->deadline = rq->rq_deadline;
++	p->time_slice += rq->rq_time_slice;
++	rq->rq_time_slice = 0;
++	if (p->time_slice > timeslice())
++		p->time_slice = timeslice();
++	set_tsk_need_resched(rq->curr);
++out_unlock:
++	grq_unlock_irqrestore(&flags);
++
++	if (yielded)
++		schedule();
++	return yielded;
++}
++EXPORT_SYMBOL_GPL(yield_to);
++
++/*
++ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
++ * that process accounting knows that this is a task in IO wait state.
++ *
++ * But don't do that if it is a deliberate, throttling IO wait (this task
++ * has set its backing_dev_info: the queue against which it should throttle)
++ */
++void __sched io_schedule(void)
++{
++	struct rq *rq = raw_rq();
++
++	delayacct_blkio_start();
++	atomic_inc(&rq->nr_iowait);
++	blk_flush_plug(current);
++	current->in_iowait = 1;
++	schedule();
++	current->in_iowait = 0;
++	atomic_dec(&rq->nr_iowait);
++	delayacct_blkio_end();
++}
++EXPORT_SYMBOL(io_schedule);
++
++long __sched io_schedule_timeout(long timeout)
++{
++	struct rq *rq = raw_rq();
++	long ret;
++
++	delayacct_blkio_start();
++	atomic_inc(&rq->nr_iowait);
++	blk_flush_plug(current);
++	current->in_iowait = 1;
++	ret = schedule_timeout(timeout);
++	current->in_iowait = 0;
++	atomic_dec(&rq->nr_iowait);
++	delayacct_blkio_end();
++	return ret;
++}
++
++/**
++ * sys_sched_get_priority_max - return maximum RT priority.
++ * @policy: scheduling class.
++ *
++ * this syscall returns the maximum rt_priority that can be used
++ * by a given scheduling class.
++ */
++SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
++{
++	int ret = -EINVAL;
++
++	switch (policy) {
++	case SCHED_FIFO:
++	case SCHED_RR:
++		ret = MAX_USER_RT_PRIO-1;
++		break;
++	case SCHED_NORMAL:
++	case SCHED_BATCH:
++	case SCHED_ISO:
++	case SCHED_IDLEPRIO:
++		ret = 0;
++		break;
++	}
++	return ret;
++}
++
++/**
++ * sys_sched_get_priority_min - return minimum RT priority.
++ * @policy: scheduling class.
++ *
++ * this syscall returns the minimum rt_priority that can be used
++ * by a given scheduling class.
++ */
++SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
++{
++	int ret = -EINVAL;
++
++	switch (policy) {
++	case SCHED_FIFO:
++	case SCHED_RR:
++		ret = 1;
++		break;
++	case SCHED_NORMAL:
++	case SCHED_BATCH:
++	case SCHED_ISO:
++	case SCHED_IDLEPRIO:
++		ret = 0;
++		break;
++	}
++	return ret;
++}
++
++/**
++ * sys_sched_rr_get_interval - return the default timeslice of a process.
++ * @pid: pid of the process.
++ * @interval: userspace pointer to the timeslice value.
++ *
++ * this syscall writes the default timeslice value of a given process
++ * into the user-space timespec buffer. A value of '0' means infinity.
++ */
++SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
++		struct timespec __user *, interval)
++{
++	struct task_struct *p;
++	unsigned int time_slice;
++	unsigned long flags;
++	int retval;
++	struct timespec t;
++
++	if (pid < 0)
++		return -EINVAL;
++
++	retval = -ESRCH;
++	rcu_read_lock();
++	p = find_process_by_pid(pid);
++	if (!p)
++		goto out_unlock;
++
++	retval = security_task_getscheduler(p);
++	if (retval)
++		goto out_unlock;
++
++	grq_lock_irqsave(&flags);
++	time_slice = p->policy == SCHED_FIFO ? 0 : MS_TO_NS(task_timeslice(p));
++	grq_unlock_irqrestore(&flags);
++
++	rcu_read_unlock();
++	t = ns_to_timespec(time_slice);
++	retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
++	return retval;
++
++out_unlock:
++	rcu_read_unlock();
++	return retval;
++}
++
++static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
++
++void sched_show_task(struct task_struct *p)
++{
++	unsigned long free = 0;
++	unsigned state;
++
++	state = p->state ? __ffs(p->state) + 1 : 0;
++	printk(KERN_INFO "%-15.15s %c", p->comm,
++		state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
++#if BITS_PER_LONG == 32
++	if (state == TASK_RUNNING)
++		printk(KERN_CONT " running  ");
++	else
++		printk(KERN_CONT " %08lx ", thread_saved_pc(p));
++#else
++	if (state == TASK_RUNNING)
++		printk(KERN_CONT "  running task    ");
++	else
++		printk(KERN_CONT " %016lx ", thread_saved_pc(p));
++#endif
++#ifdef CONFIG_DEBUG_STACK_USAGE
++	free = stack_not_used(p);
++#endif
++	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
++		task_pid_nr(p), task_pid_nr(p->real_parent),
++		(unsigned long)task_thread_info(p)->flags);
++
++	show_stack(p, NULL);
++}
++
++void show_state_filter(unsigned long state_filter)
++{
++	struct task_struct *g, *p;
++
++#if BITS_PER_LONG == 32
++	printk(KERN_INFO
++		"  task                PC stack   pid father\n");
++#else
++	printk(KERN_INFO
++		"  task                        PC stack   pid father\n");
++#endif
++	rcu_read_lock();
++	do_each_thread(g, p) {
++		/*
++		 * reset the NMI-timeout, listing all files on a slow
++		 * console might take a lot of time:
++		 */
++		touch_nmi_watchdog();
++		if (!state_filter || (p->state & state_filter))
++			sched_show_task(p);
++	} while_each_thread(g, p);
++
++	touch_all_softlockup_watchdogs();
++
++	rcu_read_unlock();
++	/*
++	 * Only show locks if all tasks are dumped:
++	 */
++	if (!state_filter)
++		debug_show_all_locks();
++}
++
++#ifdef CONFIG_SMP
++void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
++{
++	cpumask_copy(tsk_cpus_allowed(p), new_mask);
++}
++#endif
++
++/**
++ * init_idle - set up an idle thread for a given CPU
++ * @idle: task in question
++ * @cpu: cpu the idle task belongs to
++ *
++ * NOTE: this function does not set the idle thread's NEED_RESCHED
++ * flag, to make booting more robust.
++ */
++void init_idle(struct task_struct *idle, int cpu)
++{
++	struct rq *rq = cpu_rq(cpu);
++	unsigned long flags;
++
++	time_grq_lock(rq, &flags);
++	idle->last_ran = rq->clock;
++	idle->state = TASK_RUNNING;
++	/* Setting prio to illegal value shouldn't matter when never queued */
++	idle->prio = PRIO_LIMIT;
++	set_rq_task(rq, idle);
++	do_set_cpus_allowed(idle, &cpumask_of_cpu(cpu));
++	/* Silence PROVE_RCU */
++	rcu_read_lock();
++	set_task_cpu(idle, cpu);
++	rcu_read_unlock();
++	rq->curr = rq->idle = idle;
++	idle->on_cpu = 1;
++	grq_unlock_irqrestore(&flags);
++
++	/* Set the preempt count _outside_ the spinlocks! */
++	task_thread_info(idle)->preempt_count = 0;
++
++	ftrace_graph_init_idle_task(idle, cpu);
++#if defined(CONFIG_SMP)
++	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
++#endif
++}
++
++#ifdef CONFIG_SMP
++#ifdef CONFIG_NO_HZ
++void select_nohz_load_balancer(int stop_tick)
++{
++}
++#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
++/**
++ * lowest_flag_domain - Return lowest sched_domain containing flag.
++ * @cpu:	The cpu whose lowest level of sched domain is to
++ *		be returned.
++ * @flag:	The flag to check for the lowest sched_domain
++ *		for the given cpu.
++ *
++ * Returns the lowest sched_domain of a cpu which contains the given flag.
++ */
++static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
++{
++	struct sched_domain *sd;
++
++	for_each_domain(cpu, sd)
++		if (sd && (sd->flags & flag))
++			break;
++
++	return sd;
++}
++
++/**
++ * for_each_flag_domain - Iterates over sched_domains containing the flag.
++ * @cpu:	The cpu whose domains we're iterating over.
++ * @sd:		variable holding the value of the power_savings_sd
++ *		for cpu.
++ * @flag:	The flag to filter the sched_domains to be iterated.
++ *
++ * Iterates over all the scheduler domains for a given cpu that has the 'flag'
++ * set, starting from the lowest sched_domain to the highest.
++ */
++#define for_each_flag_domain(cpu, sd, flag) \
++	for (sd = lowest_flag_domain(cpu, flag); \
++		(sd && (sd->flags & flag)); sd = sd->parent)
++
++#endif /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
++
++static inline void resched_cpu(int cpu)
++{
++	unsigned long flags;
++
++	grq_lock_irqsave(&flags);
++	resched_task(cpu_curr(cpu));
++	grq_unlock_irqrestore(&flags);
++}
++
++/*
++ * In the semi idle case, use the nearest busy cpu for migrating timers
++ * from an idle cpu.  This is good for power-savings.
++ *
++ * We don't do similar optimization for completely idle system, as
++ * selecting an idle cpu will add more delays to the timers than intended
++ * (as that cpu's timer base may not be uptodate wrt jiffies etc).
++ */
++int get_nohz_timer_target(void)
++{
++	int cpu = smp_processor_id();
++	int i;
++	struct sched_domain *sd;
++
++	rcu_read_lock();
++	for_each_domain(cpu, sd) {
++		for_each_cpu(i, sched_domain_span(sd)) {
++			if (!idle_cpu(i))
++				cpu = i;
++			goto unlock;
++		}
++	}
++unlock:
++	rcu_read_unlock();
++	return cpu;
++}
++
++/*
++ * When add_timer_on() enqueues a timer into the timer wheel of an
++ * idle CPU then this timer might expire before the next timer event
++ * which is scheduled to wake up that CPU. In case of a completely
++ * idle system the next event might even be infinite time into the
++ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
++ * leaves the inner idle loop so the newly added timer is taken into
++ * account when the CPU goes back to idle and evaluates the timer
++ * wheel for the next timer event.
++ */
++void wake_up_idle_cpu(int cpu)
++{
++	struct task_struct *idle;
++	struct rq *rq;
++
++	if (cpu == smp_processor_id())
++		return;
++
++	rq = cpu_rq(cpu);
++	idle = rq->idle;
++
++	/*
++	 * This is safe, as this function is called with the timer
++	 * wheel base lock of (cpu) held. When the CPU is on the way
++	 * to idle and has not yet set rq->curr to idle then it will
++	 * be serialised on the timer wheel base lock and take the new
++	 * timer into account automatically.
++	 */
++	if (unlikely(rq->curr != idle))
++		return;
++
++	/*
++	 * We can set TIF_RESCHED on the idle task of the other CPU
++	 * lockless. The worst case is that the other CPU runs the
++	 * idle task through an additional NOOP schedule()
++	 */
++	set_tsk_need_resched(idle);
++
++	/* NEED_RESCHED must be visible before we test polling */
++	smp_mb();
++	if (!tsk_is_polling(idle))
++		smp_send_reschedule(cpu);
++}
++
++#endif /* CONFIG_NO_HZ */
++
++/*
++ * Change a given task's CPU affinity. Migrate the thread to a
++ * proper CPU and schedule it away if the CPU it's executing on
++ * is removed from the allowed bitmask.
++ *
++ * NOTE: the caller must have a valid reference to the task, the
++ * task must not exit() & deallocate itself prematurely. The
++ * call is not atomic; no spinlocks may be held.
++ */
++int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
++{
++	unsigned long flags;
++	int running_wrong = 0;
++	int queued = 0;
++	struct rq *rq;
++	int ret = 0;
++
++	rq = task_grq_lock(p, &flags);
++
++	if (cpumask_equal(tsk_cpus_allowed(p), new_mask))
++		goto out;
++
++	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	queued = task_queued(p);
++
++	do_set_cpus_allowed(p, new_mask);
++
++	/* Can the task run on the task's current CPU? If so, we're done */
++	if (cpumask_test_cpu(task_cpu(p), new_mask))
++		goto out;
++
++	if (task_running(p)) {
++		/* Task is running on the wrong cpu now, reschedule it. */
++		if (rq == this_rq()) {
++			set_tsk_need_resched(p);
++			running_wrong = 1;
++		} else
++			resched_task(p);
++	} else
++		set_task_cpu(p, cpumask_any_and(cpu_active_mask, new_mask));
++
++out:
++	if (queued)
++		try_preempt(p, rq);
++	task_grq_unlock(&flags);
++
++	if (running_wrong)
++		_cond_resched();
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
++
++#ifdef CONFIG_HOTPLUG_CPU
++/* Run through task list and find tasks affined to just the dead cpu, then
++ * allocate a new affinity */
++static void break_sole_affinity(int src_cpu, struct task_struct *idle)
++{
++	struct task_struct *p, *t;
++
++	do_each_thread(t, p) {
++		if (p != idle && !online_cpus(p)) {
++			cpumask_copy(tsk_cpus_allowed(p), cpu_possible_mask);
++			/*
++			 * Don't tell them about moving exiting tasks or
++			 * kernel threads (both mm NULL), since they never
++			 * leave kernel.
++			 */
++			if (p->mm && printk_ratelimit()) {
++				printk(KERN_INFO "process %d (%s) no "
++				       "longer affine to cpu %d\n",
++				       task_pid_nr(p), p->comm, src_cpu);
++			}
++		}
++		clear_sticky(p);
++	} while_each_thread(t, p);
++}
++
++/*
++ * Schedules idle task to be the next runnable task on current CPU.
++ * It does so by boosting its priority to highest possible.
++ * Used by CPU offline code.
++ */
++void sched_idle_next(struct rq *rq, int this_cpu, struct task_struct *idle)
++{
++	/* cpu has to be offline */
++	BUG_ON(cpu_online(this_cpu));
++
++	__setscheduler(idle, rq, SCHED_FIFO, STOP_PRIO);
++
++	activate_idle_task(idle);
++	set_tsk_need_resched(rq->curr);
++}
++
++/*
++ * Ensures that the idle task is using init_mm right before its cpu goes
++ * offline.
++ */
++void idle_task_exit(void)
++{
++	struct mm_struct *mm = current->active_mm;
++
++	BUG_ON(cpu_online(smp_processor_id()));
++
++	if (mm != &init_mm)
++		switch_mm(mm, &init_mm, current);
++	mmdrop(mm);
++}
++#endif /* CONFIG_HOTPLUG_CPU */
++void sched_set_stop_task(int cpu, struct task_struct *stop)
++{
++	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
++	struct sched_param start_param = { .sched_priority = MAX_USER_RT_PRIO - 1 };
++	struct task_struct *old_stop = cpu_rq(cpu)->stop;
++
++	if (stop) {
++		/*
++		 * Make it appear like a SCHED_FIFO task, its something
++		 * userspace knows about and won't get confused about.
++		 *
++		 * Also, it will make PI more or less work without too
++		 * much confusion -- but then, stop work should not
++		 * rely on PI working anyway.
++		 */
++		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
++	}
++
++	cpu_rq(cpu)->stop = stop;
++
++	if (old_stop) {
++		/*
++		 * Reset it back to a normal rt scheduling prio so that
++		 * it can die in pieces.
++		 */
++		sched_setscheduler_nocheck(old_stop, SCHED_FIFO, &start_param);
++	}
++}
++
++
++#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
++
++static struct ctl_table sd_ctl_dir[] = {
++	{
++		.procname	= "sched_domain",
++		.mode		= 0555,
++	},
++	{}
++};
++
++static struct ctl_table sd_ctl_root[] = {
++	{
++		.procname	= "kernel",
++		.mode		= 0555,
++		.child		= sd_ctl_dir,
++	},
++	{}
++};
++
++static struct ctl_table *sd_alloc_ctl_entry(int n)
++{
++	struct ctl_table *entry =
++		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
++
++	return entry;
++}
++
++static void sd_free_ctl_entry(struct ctl_table **tablep)
++{
++	struct ctl_table *entry;
++
++	/*
++	 * In the intermediate directories, both the child directory and
++	 * procname are dynamically allocated and could fail but the mode
++	 * will always be set. In the lowest directory the names are
++	 * static strings and all have proc handlers.
++	 */
++	for (entry = *tablep; entry->mode; entry++) {
++		if (entry->child)
++			sd_free_ctl_entry(&entry->child);
++		if (entry->proc_handler == NULL)
++			kfree(entry->procname);
++	}
++
++	kfree(*tablep);
++	*tablep = NULL;
++}
++
++static void
++set_table_entry(struct ctl_table *entry,
++		const char *procname, void *data, int maxlen,
++		mode_t mode, proc_handler *proc_handler)
++{
++	entry->procname = procname;
++	entry->data = data;
++	entry->maxlen = maxlen;
++	entry->mode = mode;
++	entry->proc_handler = proc_handler;
++}
++
++static struct ctl_table *
++sd_alloc_ctl_domain_table(struct sched_domain *sd)
++{
++	struct ctl_table *table = sd_alloc_ctl_entry(13);
++
++	if (table == NULL)
++		return NULL;
++
++	set_table_entry(&table[0], "min_interval", &sd->min_interval,
++		sizeof(long), 0644, proc_doulongvec_minmax);
++	set_table_entry(&table[1], "max_interval", &sd->max_interval,
++		sizeof(long), 0644, proc_doulongvec_minmax);
++	set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[9], "cache_nice_tries",
++		&sd->cache_nice_tries,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[10], "flags", &sd->flags,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[11], "name", sd->name,
++		CORENAME_MAX_SIZE, 0444, proc_dostring);
++	/* &table[12] is terminator */
++
++	return table;
++}
++
++static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
++{
++	struct ctl_table *entry, *table;
++	struct sched_domain *sd;
++	int domain_num = 0, i;
++	char buf[32];
++
++	for_each_domain(cpu, sd)
++		domain_num++;
++	entry = table = sd_alloc_ctl_entry(domain_num + 1);
++	if (table == NULL)
++		return NULL;
++
++	i = 0;
++	for_each_domain(cpu, sd) {
++		snprintf(buf, 32, "domain%d", i);
++		entry->procname = kstrdup(buf, GFP_KERNEL);
++		entry->mode = 0555;
++		entry->child = sd_alloc_ctl_domain_table(sd);
++		entry++;
++		i++;
++	}
++	return table;
++}
++
++static struct ctl_table_header *sd_sysctl_header;
++static void register_sched_domain_sysctl(void)
++{
++	int i, cpu_num = num_possible_cpus();
++	struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
++	char buf[32];
++
++	WARN_ON(sd_ctl_dir[0].child);
++	sd_ctl_dir[0].child = entry;
++
++	if (entry == NULL)
++		return;
++
++	for_each_possible_cpu(i) {
++		snprintf(buf, 32, "cpu%d", i);
++		entry->procname = kstrdup(buf, GFP_KERNEL);
++		entry->mode = 0555;
++		entry->child = sd_alloc_ctl_cpu_table(i);
++		entry++;
++	}
++
++	WARN_ON(sd_sysctl_header);
++	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
++}
++
++/* may be called multiple times per register */
++static void unregister_sched_domain_sysctl(void)
++{
++	if (sd_sysctl_header)
++		unregister_sysctl_table(sd_sysctl_header);
++	sd_sysctl_header = NULL;
++	if (sd_ctl_dir[0].child)
++		sd_free_ctl_entry(&sd_ctl_dir[0].child);
++}
++#else
++static void register_sched_domain_sysctl(void)
++{
++}
++static void unregister_sched_domain_sysctl(void)
++{
++}
++#endif
++
++static void set_rq_online(struct rq *rq)
++{
++	if (!rq->online) {
++		cpumask_set_cpu(cpu_of(rq), rq->rd->online);
++		rq->online = true;
++	}
++}
++
++static void set_rq_offline(struct rq *rq)
++{
++	if (rq->online) {
++		cpumask_clear_cpu(cpu_of(rq), rq->rd->online);
++		rq->online = false;
++	}
++}
++
++/*
++ * migration_call - callback that gets triggered when a CPU is added.
++ */
++static int __cpuinit
++migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
++{
++	int cpu = (long)hcpu;
++	unsigned long flags;
++	struct rq *rq = cpu_rq(cpu);
++#ifdef CONFIG_HOTPLUG_CPU
++	struct task_struct *idle = rq->idle;
++#endif
++
++	switch (action & ~CPU_TASKS_FROZEN) {
++
++	case CPU_UP_PREPARE:
++		break;
++
++	case CPU_ONLINE:
++		/* Update our root-domain */
++		grq_lock_irqsave(&flags);
++		if (rq->rd) {
++			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
++
++			set_rq_online(rq);
++		}
++		grq.noc = num_online_cpus();
++		grq_unlock_irqrestore(&flags);
++		break;
++
++#ifdef CONFIG_HOTPLUG_CPU
++	case CPU_DEAD:
++		/* Idle task back to normal (off runqueue, low prio) */
++		grq_lock_irq();
++		return_task(idle, 1);
++		idle->static_prio = MAX_PRIO;
++		__setscheduler(idle, rq, SCHED_NORMAL, 0);
++		idle->prio = PRIO_LIMIT;
++		set_rq_task(rq, idle);
++		update_clocks(rq);
++		grq_unlock_irq();
++		break;
++
++	case CPU_DYING:
++		/* Update our root-domain */
++		grq_lock_irqsave(&flags);
++		sched_idle_next(rq, cpu, idle);
++		if (rq->rd) {
++			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
++			set_rq_offline(rq);
++		}
++		break_sole_affinity(cpu, idle);
++		grq.noc = num_online_cpus();
++		grq_unlock_irqrestore(&flags);
++		break;
++#endif
++	}
++	return NOTIFY_OK;
++}
++
++/*
++ * Register at high priority so that task migration (migrate_all_tasks)
++ * happens before everything else.  This has to be lower priority than
++ * the notifier in the perf_counter subsystem, though.
++ */
++static struct notifier_block __cpuinitdata migration_notifier = {
++	.notifier_call = migration_call,
++	.priority = CPU_PRI_MIGRATION,
++};
++
++static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
++				      unsigned long action, void *hcpu)
++{
++	switch (action & ~CPU_TASKS_FROZEN) {
++	case CPU_ONLINE:
++	case CPU_DOWN_FAILED:
++		set_cpu_active((long)hcpu, true);
++		return NOTIFY_OK;
++	default:
++		return NOTIFY_DONE;
++	}
++}
++
++static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
++					unsigned long action, void *hcpu)
++{
++	switch (action & ~CPU_TASKS_FROZEN) {
++	case CPU_DOWN_PREPARE:
++		set_cpu_active((long)hcpu, false);
++		return NOTIFY_OK;
++	default:
++		return NOTIFY_DONE;
++	}
++}
++
++int __init migration_init(void)
++{
++	void *cpu = (void *)(long)smp_processor_id();
++	int err;
++
++	/* Initialise migration for the boot CPU */
++	err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
++	BUG_ON(err == NOTIFY_BAD);
++	migration_call(&migration_notifier, CPU_ONLINE, cpu);
++	register_cpu_notifier(&migration_notifier);
++
++	/* Register cpu active notifiers */
++	cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
++	cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
++
++	return 0;
++}
++early_initcall(migration_init);
++#endif
++
++#ifdef CONFIG_SMP
++
++static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
++
++#ifdef CONFIG_SCHED_DEBUG
++
++static __read_mostly int sched_domain_debug_enabled;
++
++static int __init sched_domain_debug_setup(char *str)
++{
++	sched_domain_debug_enabled = 1;
++
++	return 0;
++}
++early_param("sched_debug", sched_domain_debug_setup);
++
++static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
++				  struct cpumask *groupmask)
++{
++	struct sched_group *group = sd->groups;
++	char str[256];
++
++	cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd));
++	cpumask_clear(groupmask);
++
++	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
++
++	if (!(sd->flags & SD_LOAD_BALANCE)) {
++		printk("does not load-balance\n");
++		if (sd->parent)
++			printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
++					" has parent");
++		return -1;
++	}
++
++	printk(KERN_CONT "span %s level %s\n", str, sd->name);
++
++	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
++		printk(KERN_ERR "ERROR: domain->span does not contain "
++				"CPU%d\n", cpu);
++	}
++	if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
++		printk(KERN_ERR "ERROR: domain->groups does not contain"
++				" CPU%d\n", cpu);
++	}
++
++	printk(KERN_DEBUG "%*s groups:", level + 1, "");
++	do {
++		if (!group) {
++			printk("\n");
++			printk(KERN_ERR "ERROR: group is NULL\n");
++			break;
++		}
++
++		if (!group->sgp->power) {
++			printk(KERN_CONT "\n");
++			printk(KERN_ERR "ERROR: domain->cpu_power not "
++					"set\n");
++			break;
++		}
++
++		if (!cpumask_weight(sched_group_cpus(group))) {
++			printk(KERN_CONT "\n");
++			printk(KERN_ERR "ERROR: empty group\n");
++			break;
++		}
++
++		if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
++			printk(KERN_CONT "\n");
++			printk(KERN_ERR "ERROR: repeated CPUs\n");
++			break;
++		}
++
++		cpumask_or(groupmask, groupmask, sched_group_cpus(group));
++
++		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
++
++		printk(KERN_CONT " %s", str);
++		if (group->sgp->power != SCHED_POWER_SCALE) {
++			printk(KERN_CONT " (cpu_power = %d)",
++				group->sgp->power);
++		}
++
++		group = group->next;
++	} while (group != sd->groups);
++	printk(KERN_CONT "\n");
++
++	if (!cpumask_equal(sched_domain_span(sd), groupmask))
++		printk(KERN_ERR "ERROR: groups don't span domain->span\n");
++
++	if (sd->parent &&
++	    !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
++		printk(KERN_ERR "ERROR: parent span is not a superset "
++			"of domain->span\n");
++	return 0;
++}
++
++static void sched_domain_debug(struct sched_domain *sd, int cpu)
++{
++	int level = 0;
++
++	if (!sched_domain_debug_enabled)
++		return;
++
++	if (!sd) {
++		printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
++		return;
++	}
++
++	printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
++
++	for (;;) {
++		if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
++			break;
++		level++;
++		sd = sd->parent;
++		if (!sd)
++			break;
++	}
++}
++#else /* !CONFIG_SCHED_DEBUG */
++# define sched_domain_debug(sd, cpu) do { } while (0)
++#endif /* CONFIG_SCHED_DEBUG */
++
++static int sd_degenerate(struct sched_domain *sd)
++{
++	if (cpumask_weight(sched_domain_span(sd)) == 1)
++		return 1;
++
++	/* Following flags need at least 2 groups */
++	if (sd->flags & (SD_LOAD_BALANCE |
++			 SD_BALANCE_NEWIDLE |
++			 SD_BALANCE_FORK |
++			 SD_BALANCE_EXEC |
++			 SD_SHARE_CPUPOWER |
++			 SD_SHARE_PKG_RESOURCES)) {
++		if (sd->groups != sd->groups->next)
++			return 0;
++	}
++
++	/* Following flags don't use groups */
++	if (sd->flags & (SD_WAKE_AFFINE))
++		return 0;
++
++	return 1;
++}
++
++static int
++sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
++{
++	unsigned long cflags = sd->flags, pflags = parent->flags;
++
++	if (sd_degenerate(parent))
++		return 1;
++
++	if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
++		return 0;
++
++	/* Flags needing groups don't count if only 1 group in parent */
++	if (parent->groups == parent->groups->next) {
++		pflags &= ~(SD_LOAD_BALANCE |
++				SD_BALANCE_NEWIDLE |
++				SD_BALANCE_FORK |
++				SD_BALANCE_EXEC |
++				SD_SHARE_CPUPOWER |
++				SD_SHARE_PKG_RESOURCES);
++		if (nr_node_ids == 1)
++			pflags &= ~SD_SERIALIZE;
++	}
++	if (~cflags & pflags)
++		return 0;
++
++	return 1;
++}
++
++static void free_rootdomain(struct rcu_head *rcu)
++{
++	struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
++
++	cpupri_cleanup(&rd->cpupri);
++	free_cpumask_var(rd->rto_mask);
++	free_cpumask_var(rd->online);
++	free_cpumask_var(rd->span);
++	kfree(rd);
++}
++
++static void rq_attach_root(struct rq *rq, struct root_domain *rd)
++{
++	struct root_domain *old_rd = NULL;
++	unsigned long flags;
++
++	grq_lock_irqsave(&flags);
++
++	if (rq->rd) {
++		old_rd = rq->rd;
++
++		if (cpumask_test_cpu(rq->cpu, old_rd->online))
++			set_rq_offline(rq);
++
++		cpumask_clear_cpu(rq->cpu, old_rd->span);
++
++		/*
++		 * If we dont want to free the old_rt yet then
++		 * set old_rd to NULL to skip the freeing later
++		 * in this function:
++		 */
++		if (!atomic_dec_and_test(&old_rd->refcount))
++			old_rd = NULL;
++	}
++
++	atomic_inc(&rd->refcount);
++	rq->rd = rd;
++
++	cpumask_set_cpu(rq->cpu, rd->span);
++	if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
++		set_rq_online(rq);
++
++	grq_unlock_irqrestore(&flags);
++
++	if (old_rd)
++		call_rcu_sched(&old_rd->rcu, free_rootdomain);
++}
++
++static int init_rootdomain(struct root_domain *rd)
++{
++	memset(rd, 0, sizeof(*rd));
++
++	if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
++		goto out;
++	if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
++		goto free_span;
++	if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
++		goto free_online;
++
++	if (cpupri_init(&rd->cpupri) != 0)
++		goto free_rto_mask;
++	return 0;
++
++free_rto_mask:
++	free_cpumask_var(rd->rto_mask);
++free_online:
++	free_cpumask_var(rd->online);
++free_span:
++	free_cpumask_var(rd->span);
++out:
++	return -ENOMEM;
++}
++
++static void init_defrootdomain(void)
++{
++	init_rootdomain(&def_root_domain);
++
++	atomic_set(&def_root_domain.refcount, 1);
++}
++
++static struct root_domain *alloc_rootdomain(void)
++{
++	struct root_domain *rd;
++
++	rd = kmalloc(sizeof(*rd), GFP_KERNEL);
++	if (!rd)
++		return NULL;
++
++	if (init_rootdomain(rd) != 0) {
++		kfree(rd);
++		return NULL;
++	}
++
++	return rd;
++}
++
++static void free_sched_groups(struct sched_group *sg, int free_sgp)
++{
++	struct sched_group *tmp, *first;
++
++	if (!sg)
++		return;
++
++	first = sg;
++	do {
++		tmp = sg->next;
++
++		if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
++			kfree(sg->sgp);
++
++		kfree(sg);
++		sg = tmp;
++	} while (sg != first);
++}
++
++static void free_sched_domain(struct rcu_head *rcu)
++{
++	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
++
++	/*
++	 * If its an overlapping domain it has private groups, iterate and
++	 * nuke them all.
++	 */
++	if (sd->flags & SD_OVERLAP) {
++		free_sched_groups(sd->groups, 1);
++	} else if (atomic_dec_and_test(&sd->groups->ref)) {
++		kfree(sd->groups->sgp);
++		kfree(sd->groups);
++	}
++	kfree(sd);
++}
++
++static void destroy_sched_domain(struct sched_domain *sd, int cpu)
++{
++	call_rcu(&sd->rcu, free_sched_domain);
++}
++
++static void destroy_sched_domains(struct sched_domain *sd, int cpu)
++{
++	for (; sd; sd = sd->parent)
++		destroy_sched_domain(sd, cpu);
++}
++
++/*
++ * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
++ * hold the hotplug lock.
++ */
++static void
++cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
++{
++	struct rq *rq = cpu_rq(cpu);
++	struct sched_domain *tmp;
++
++	/* Remove the sched domains which do not contribute to scheduling. */
++	for (tmp = sd; tmp; ) {
++		struct sched_domain *parent = tmp->parent;
++		if (!parent)
++			break;
++
++		if (sd_parent_degenerate(tmp, parent)) {
++			tmp->parent = parent->parent;
++			if (parent->parent)
++				parent->parent->child = tmp;
++			destroy_sched_domain(parent, cpu);
++		} else
++			tmp = tmp->parent;
++	}
++
++	if (sd && sd_degenerate(sd)) {
++		tmp = sd;
++		sd = sd->parent;
++		destroy_sched_domain(tmp, cpu);
++		if (sd)
++			sd->child = NULL;
++	}
++
++	sched_domain_debug(sd, cpu);
++
++	rq_attach_root(rq, rd);
++	tmp = rq->sd;
++	rcu_assign_pointer(rq->sd, sd);
++	destroy_sched_domains(tmp, cpu);
++}
++
++/* cpus with isolated domains */
++static cpumask_var_t cpu_isolated_map;
++
++/* Setup the mask of cpus configured for isolated domains */
++static int __init isolated_cpu_setup(char *str)
++{
++	alloc_bootmem_cpumask_var(&cpu_isolated_map);
++	cpulist_parse(str, cpu_isolated_map);
++	return 1;
++}
++
++__setup("isolcpus=", isolated_cpu_setup);
++
++#define SD_NODES_PER_DOMAIN 16
++
++#ifdef CONFIG_NUMA
++
++/**
++ * find_next_best_node - find the next node to include in a sched_domain
++ * @node: node whose sched_domain we're building
++ * @used_nodes: nodes already in the sched_domain
++ *
++ * Find the next node to include in a given scheduling domain. Simply
++ * finds the closest node not already in the @used_nodes map.
++ *
++ * Should use nodemask_t.
++ */
++static int find_next_best_node(int node, nodemask_t *used_nodes)
++{
++	int i, n, val, min_val, best_node = -1;
++
++	min_val = INT_MAX;
++
++	for (i = 0; i < nr_node_ids; i++) {
++		/* Start at @node */
++		n = (node + i) % nr_node_ids;
++
++		if (!nr_cpus_node(n))
++			continue;
++
++		/* Skip already used nodes */
++		if (node_isset(n, *used_nodes))
++			continue;
++
++		/* Simple min distance search */
++		val = node_distance(node, n);
++
++		if (val < min_val) {
++			min_val = val;
++			best_node = n;
++		}
++	}
++
++	if (best_node != -1)
++		node_set(best_node, *used_nodes);
++	return best_node;
++}
++
++/**
++ * sched_domain_node_span - get a cpumask for a node's sched_domain
++ * @node: node whose cpumask we're constructing
++ * @span: resulting cpumask
++ *
++ * Given a node, construct a good cpumask for its sched_domain to span. It
++ * should be one that prevents unnecessary balancing, but also spreads tasks
++ * out optimally.
++ */
++static void sched_domain_node_span(int node, struct cpumask *span)
++{
++	nodemask_t used_nodes;
++	int i;
++
++	cpumask_clear(span);
++	nodes_clear(used_nodes);
++
++	cpumask_or(span, span, cpumask_of_node(node));
++	node_set(node, used_nodes);
++
++	for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
++		int next_node = find_next_best_node(node, &used_nodes);
++		if (next_node < 0)
++			break;
++		cpumask_or(span, span, cpumask_of_node(next_node));
++	}
++}
++
++static const struct cpumask *cpu_node_mask(int cpu)
++{
++	lockdep_assert_held(&sched_domains_mutex);
++
++	sched_domain_node_span(cpu_to_node(cpu), sched_domains_tmpmask);
++
++	return sched_domains_tmpmask;
++}
++
++static const struct cpumask *cpu_allnodes_mask(int cpu)
++{
++	return cpu_possible_mask;
++}
++#endif /* CONFIG_NUMA */
++
++static const struct cpumask *cpu_cpu_mask(int cpu)
++{
++	return cpumask_of_node(cpu_to_node(cpu));
++}
++
++int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
++
++struct sd_data {
++	struct sched_domain **__percpu sd;
++	struct sched_group **__percpu sg;
++	struct sched_group_power **__percpu sgp;
++};
++
++struct s_data {
++	struct sched_domain ** __percpu sd;
++	struct root_domain	*rd;
++};
++
++enum s_alloc {
++	sa_rootdomain,
++	sa_sd,
++	sa_sd_storage,
++	sa_none,
++};
++
++struct sched_domain_topology_level;
++
++typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
++typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
++
++#define SDTL_OVERLAP	0x01
++
++struct sched_domain_topology_level {
++	sched_domain_init_f init;
++	sched_domain_mask_f mask;
++	int		    flags;
++	struct sd_data      data;
++};
++
++static int
++build_overlap_sched_groups(struct sched_domain *sd, int cpu)
++{
++	struct sched_group *first = NULL, *last = NULL, *groups = NULL, *sg;
++	const struct cpumask *span = sched_domain_span(sd);
++	struct cpumask *covered = sched_domains_tmpmask;
++	struct sd_data *sdd = sd->private;
++	struct sched_domain *child;
++	int i;
++
++	cpumask_clear(covered);
++
++	for_each_cpu(i, span) {
++		struct cpumask *sg_span;
++
++		if (cpumask_test_cpu(i, covered))
++			continue;
++
++		sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
++				GFP_KERNEL, cpu_to_node(i));
++
++		if (!sg)
++			goto fail;
++
++		sg_span = sched_group_cpus(sg);
++
++		child = *per_cpu_ptr(sdd->sd, i);
++		if (child->child) {
++			child = child->child;
++			cpumask_copy(sg_span, sched_domain_span(child));
++		} else
++			cpumask_set_cpu(i, sg_span);
++
++		cpumask_or(covered, covered, sg_span);
++
++		sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
++		atomic_inc(&sg->sgp->ref);
++
++		if (cpumask_test_cpu(cpu, sg_span))
++			groups = sg;
++
++		if (!first)
++			first = sg;
++		if (last)
++			last->next = sg;
++		last = sg;
++		last->next = first;
++	}
++	sd->groups = groups;
++
++	return 0;
++
++fail:
++	free_sched_groups(first, 0);
++
++	return -ENOMEM;
++}
++
++static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
++{
++	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
++	struct sched_domain *child = sd->child;
++
++	if (child)
++		cpu = cpumask_first(sched_domain_span(child));
++
++	if (sg) {
++		*sg = *per_cpu_ptr(sdd->sg, cpu);
++		(*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
++		atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */
++	}
++
++	return cpu;
++}
++
++/*
++ * build_sched_groups will build a circular linked list of the groups
++ * covered by the given span, and will set each group's ->cpumask correctly,
++ * and ->cpu_power to 0.
++ *
++ * Assumes the sched_domain tree is fully constructed
++ */
++static int
++build_sched_groups(struct sched_domain *sd, int cpu)
++{
++	struct sched_group *first = NULL, *last = NULL;
++	struct sd_data *sdd = sd->private;
++	const struct cpumask *span = sched_domain_span(sd);
++	struct cpumask *covered;
++	int i;
++
++	get_group(cpu, sdd, &sd->groups);
++	atomic_inc(&sd->groups->ref);
++
++	if (cpu != cpumask_first(sched_domain_span(sd)))
++		return 0;
++
++	lockdep_assert_held(&sched_domains_mutex);
++	covered = sched_domains_tmpmask;
++
++	cpumask_clear(covered);
++
++	for_each_cpu(i, span) {
++		struct sched_group *sg;
++		int group = get_group(i, sdd, &sg);
++		int j;
++
++		if (cpumask_test_cpu(i, covered))
++			continue;
++
++		cpumask_clear(sched_group_cpus(sg));
++		sg->sgp->power = 0;
++
++		for_each_cpu(j, span) {
++			if (get_group(j, sdd, NULL) != group)
++				continue;
++
++			cpumask_set_cpu(j, covered);
++			cpumask_set_cpu(j, sched_group_cpus(sg));
++		}
++
++		if (!first)
++			first = sg;
++		if (last)
++			last->next = sg;
++		last = sg;
++	}
++	last->next = first;
++
++	return 0;
++}
++
++/*
++ * Initializers for schedule domains
++ * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
++ */
++
++#ifdef CONFIG_SCHED_DEBUG
++# define SD_INIT_NAME(sd, type)		sd->name = #type
++#else
++# define SD_INIT_NAME(sd, type)		do { } while (0)
++#endif
++
++#define SD_INIT_FUNC(type)						\
++static noinline struct sched_domain *					\
++sd_init_##type(struct sched_domain_topology_level *tl, int cpu) 	\
++{									\
++	struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);	\
++	*sd = SD_##type##_INIT;						\
++	SD_INIT_NAME(sd, type);						\
++	sd->private = &tl->data;					\
++	return sd;							\
++}
++
++SD_INIT_FUNC(CPU)
++#ifdef CONFIG_NUMA
++ SD_INIT_FUNC(ALLNODES)
++ SD_INIT_FUNC(NODE)
++#endif
++#ifdef CONFIG_SCHED_SMT
++ SD_INIT_FUNC(SIBLING)
++#endif
++#ifdef CONFIG_SCHED_MC
++ SD_INIT_FUNC(MC)
++#endif
++#ifdef CONFIG_SCHED_BOOK
++ SD_INIT_FUNC(BOOK)
++#endif
++
++static int default_relax_domain_level = -1;
++int sched_domain_level_max;
++
++static int __init setup_relax_domain_level(char *str)
++{
++	unsigned long val;
++
++	val = simple_strtoul(str, NULL, 0);
++	if (val < sched_domain_level_max)
++		default_relax_domain_level = val;
++
++	return 1;
++}
++__setup("relax_domain_level=", setup_relax_domain_level);
++
++static void set_domain_attribute(struct sched_domain *sd,
++				 struct sched_domain_attr *attr)
++{
++	int request;
++
++	if (!attr || attr->relax_domain_level < 0) {
++		if (default_relax_domain_level < 0)
++			return;
++		else
++			request = default_relax_domain_level;
++	} else
++		request = attr->relax_domain_level;
++	if (request < sd->level) {
++		/* turn off idle balance on this domain */
++		sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
++	} else {
++		/* turn on idle balance on this domain */
++		sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
++	}
++}
++
++static void __sdt_free(const struct cpumask *cpu_map);
++static int __sdt_alloc(const struct cpumask *cpu_map);
++
++static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
++				 const struct cpumask *cpu_map)
++{
++	switch (what) {
++	case sa_rootdomain:
++		if (!atomic_read(&d->rd->refcount))
++			free_rootdomain(&d->rd->rcu); /* fall through */
++	case sa_sd:
++		free_percpu(d->sd); /* fall through */
++	case sa_sd_storage:
++		__sdt_free(cpu_map); /* fall through */
++	case sa_none:
++		break;
++	}
++}
++
++static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
++						   const struct cpumask *cpu_map)
++{
++	memset(d, 0, sizeof(*d));
++
++	if (__sdt_alloc(cpu_map))
++		return sa_sd_storage;
++	d->sd = alloc_percpu(struct sched_domain *);
++	if (!d->sd)
++		return sa_sd_storage;
++	d->rd = alloc_rootdomain();
++	if (!d->rd)
++		return sa_sd;
++	return sa_rootdomain;
++}
++
++/*
++ * NULL the sd_data elements we've used to build the sched_domain and
++ * sched_group structure so that the subsequent __free_domain_allocs()
++ * will not free the data we're using.
++ */
++static void claim_allocations(int cpu, struct sched_domain *sd)
++{
++	struct sd_data *sdd = sd->private;
++
++	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
++	*per_cpu_ptr(sdd->sd, cpu) = NULL;
++
++	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
++		*per_cpu_ptr(sdd->sg, cpu) = NULL;
++
++	if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
++		*per_cpu_ptr(sdd->sgp, cpu) = NULL;
++}
++
++#ifdef CONFIG_SCHED_SMT
++static const struct cpumask *cpu_smt_mask(int cpu)
++{
++	return topology_thread_cpumask(cpu);
++}
++#endif
++
++/*
++ * Topology list, bottom-up.
++ */
++static struct sched_domain_topology_level default_topology[] = {
++#ifdef CONFIG_SCHED_SMT
++	{ sd_init_SIBLING, cpu_smt_mask, },
++#endif
++#ifdef CONFIG_SCHED_MC
++	{ sd_init_MC, cpu_coregroup_mask, },
++#endif
++#ifdef CONFIG_SCHED_BOOK
++	{ sd_init_BOOK, cpu_book_mask, },
++#endif
++	{ sd_init_CPU, cpu_cpu_mask, },
++#ifdef CONFIG_NUMA
++	{ sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, },
++	{ sd_init_ALLNODES, cpu_allnodes_mask, },
++#endif
++	{ NULL, },
++};
++
++static struct sched_domain_topology_level *sched_domain_topology = default_topology;
++
++static int __sdt_alloc(const struct cpumask *cpu_map)
++{
++	struct sched_domain_topology_level *tl;
++	int j;
++
++	for (tl = sched_domain_topology; tl->init; tl++) {
++		struct sd_data *sdd = &tl->data;
++
++		sdd->sd = alloc_percpu(struct sched_domain *);
++		if (!sdd->sd)
++			return -ENOMEM;
++
++		sdd->sg = alloc_percpu(struct sched_group *);
++		if (!sdd->sg)
++			return -ENOMEM;
++
++		sdd->sgp = alloc_percpu(struct sched_group_power *);
++		if (!sdd->sgp)
++			return -ENOMEM;
++
++		for_each_cpu(j, cpu_map) {
++			struct sched_domain *sd;
++			struct sched_group *sg;
++			struct sched_group_power *sgp;
++
++		       	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
++					GFP_KERNEL, cpu_to_node(j));
++			if (!sd)
++				return -ENOMEM;
++
++			*per_cpu_ptr(sdd->sd, j) = sd;
++
++			sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
++					GFP_KERNEL, cpu_to_node(j));
++			if (!sg)
++				return -ENOMEM;
++
++			*per_cpu_ptr(sdd->sg, j) = sg;
++
++			sgp = kzalloc_node(sizeof(struct sched_group_power),
++					GFP_KERNEL, cpu_to_node(j));
++			if (!sgp)
++				return -ENOMEM;
++
++			*per_cpu_ptr(sdd->sgp, j) = sgp;
++		}
++	}
++
++	return 0;
++}
++
++static void __sdt_free(const struct cpumask *cpu_map)
++{
++	struct sched_domain_topology_level *tl;
++	int j;
++
++	for (tl = sched_domain_topology; tl->init; tl++) {
++		struct sd_data *sdd = &tl->data;
++
++		for_each_cpu(j, cpu_map) {
++			struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
++			if (sd && (sd->flags & SD_OVERLAP))
++				free_sched_groups(sd->groups, 0);
++			kfree(*per_cpu_ptr(sdd->sd, j));
++			kfree(*per_cpu_ptr(sdd->sg, j));
++			kfree(*per_cpu_ptr(sdd->sgp, j));
++		}
++		free_percpu(sdd->sd);
++		free_percpu(sdd->sg);
++		free_percpu(sdd->sgp);
++	}
++}
++
++struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
++		struct s_data *d, const struct cpumask *cpu_map,
++		struct sched_domain_attr *attr, struct sched_domain *child,
++		int cpu)
++{
++	struct sched_domain *sd = tl->init(tl, cpu);
++	if (!sd)
++		return child;
++
++	set_domain_attribute(sd, attr);
++	cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
++	if (child) {
++		sd->level = child->level + 1;
++		sched_domain_level_max = max(sched_domain_level_max, sd->level);
++		child->parent = sd;
++	}
++	sd->child = child;
++
++	return sd;
++}
++
++/*
++ * Build sched domains for a given set of cpus and attach the sched domains
++ * to the individual cpus
++ */
++static int build_sched_domains(const struct cpumask *cpu_map,
++			       struct sched_domain_attr *attr)
++{
++	enum s_alloc alloc_state = sa_none;
++	struct sched_domain *sd;
++	struct s_data d;
++	int i, ret = -ENOMEM;
++
++	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
++	if (alloc_state != sa_rootdomain)
++		goto error;
++
++	/* Set up domains for cpus specified by the cpu_map. */
++	for_each_cpu(i, cpu_map) {
++		struct sched_domain_topology_level *tl;
++
++		sd = NULL;
++		for (tl = sched_domain_topology; tl->init; tl++) {
++			sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
++			if (tl->flags & SDTL_OVERLAP)
++				sd->flags |= SD_OVERLAP;
++			if (cpumask_equal(cpu_map, sched_domain_span(sd)))
++				break;
++		}
++
++		while (sd->child)
++			sd = sd->child;
++
++		*per_cpu_ptr(d.sd, i) = sd;
++	}
++
++	/* Build the groups for the domains */
++	for_each_cpu(i, cpu_map) {
++		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
++			sd->span_weight = cpumask_weight(sched_domain_span(sd));
++			if (sd->flags & SD_OVERLAP) {
++				if (build_overlap_sched_groups(sd, i))
++					goto error;
++			} else {
++				if (build_sched_groups(sd, i))
++					goto error;
++			}
++		}
++	}
++
++	/* Calculate CPU power for physical packages and nodes */
++	for (i = nr_cpumask_bits-1; i >= 0; i--) {
++		if (!cpumask_test_cpu(i, cpu_map))
++			continue;
++
++		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
++			claim_allocations(i, sd);
++		}
++	}
++
++	/* Attach the domains */
++	rcu_read_lock();
++	for_each_cpu(i, cpu_map) {
++		sd = *per_cpu_ptr(d.sd, i);
++		cpu_attach_domain(sd, d.rd, i);
++	}
++	rcu_read_unlock();
++
++	ret = 0;
++error:
++	__free_domain_allocs(&d, alloc_state, cpu_map);
++	return ret;
++}
++
++static cpumask_var_t *doms_cur;	/* current sched domains */
++static int ndoms_cur;		/* number of sched domains in 'doms_cur' */
++static struct sched_domain_attr *dattr_cur;
++				/* attribues of custom domains in 'doms_cur' */
++
++/*
++ * Special case: If a kmalloc of a doms_cur partition (array of
++ * cpumask) fails, then fallback to a single sched domain,
++ * as determined by the single cpumask fallback_doms.
++ */
++static cpumask_var_t fallback_doms;
++
++/*
++ * arch_update_cpu_topology lets virtualized architectures update the
++ * cpu core maps. It is supposed to return 1 if the topology changed
++ * or 0 if it stayed the same.
++ */
++int __attribute__((weak)) arch_update_cpu_topology(void)
++{
++	return 0;
++}
++
++cpumask_var_t *alloc_sched_domains(unsigned int ndoms)
++{
++	int i;
++	cpumask_var_t *doms;
++
++	doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL);
++	if (!doms)
++		return NULL;
++	for (i = 0; i < ndoms; i++) {
++		if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) {
++			free_sched_domains(doms, i);
++			return NULL;
++		}
++	}
++	return doms;
++}
++
++void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
++{
++	unsigned int i;
++	for (i = 0; i < ndoms; i++)
++		free_cpumask_var(doms[i]);
++	kfree(doms);
++}
++
++/*
++ * Set up scheduler domains and groups. Callers must hold the hotplug lock.
++ * For now this just excludes isolated cpus, but could be used to
++ * exclude other special cases in the future.
++ */
++static int init_sched_domains(const struct cpumask *cpu_map)
++{
++	int err;
++
++	arch_update_cpu_topology();
++	ndoms_cur = 1;
++	doms_cur = alloc_sched_domains(ndoms_cur);
++	if (!doms_cur)
++		doms_cur = &fallback_doms;
++	cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
++	dattr_cur = NULL;
++	err = build_sched_domains(doms_cur[0], NULL);
++	register_sched_domain_sysctl();
++
++	return err;
++}
++
++/*
++ * Detach sched domains from a group of cpus specified in cpu_map
++ * These cpus will now be attached to the NULL domain
++ */
++static void detach_destroy_domains(const struct cpumask *cpu_map)
++{
++	int i;
++
++	rcu_read_lock();
++	for_each_cpu(i, cpu_map)
++		cpu_attach_domain(NULL, &def_root_domain, i);
++	rcu_read_unlock();
++}
++
++/* handle null as "default" */
++static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
++			struct sched_domain_attr *new, int idx_new)
++{
++	struct sched_domain_attr tmp;
++
++	/* fast path */
++	if (!new && !cur)
++		return 1;
++
++	tmp = SD_ATTR_INIT;
++	return !memcmp(cur ? (cur + idx_cur) : &tmp,
++			new ? (new + idx_new) : &tmp,
++			sizeof(struct sched_domain_attr));
++}
++
++/*
++ * Partition sched domains as specified by the 'ndoms_new'
++ * cpumasks in the array doms_new[] of cpumasks. This compares
++ * doms_new[] to the current sched domain partitioning, doms_cur[].
++ * It destroys each deleted domain and builds each new domain.
++ *
++ * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'.
++ * The masks don't intersect (don't overlap.) We should setup one
++ * sched domain for each mask. CPUs not in any of the cpumasks will
++ * not be load balanced. If the same cpumask appears both in the
++ * current 'doms_cur' domains and in the new 'doms_new', we can leave
++ * it as it is.
++ *
++ * The passed in 'doms_new' should be allocated using
++ * alloc_sched_domains.  This routine takes ownership of it and will
++ * free_sched_domains it when done with it. If the caller failed the
++ * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1,
++ * and partition_sched_domains() will fallback to the single partition
++ * 'fallback_doms', it also forces the domains to be rebuilt.
++ *
++ * If doms_new == NULL it will be replaced with cpu_online_mask.
++ * ndoms_new == 0 is a special case for destroying existing domains,
++ * and it will not create the default domain.
++ *
++ * Call with hotplug lock held
++ */
++void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
++			     struct sched_domain_attr *dattr_new)
++{
++	int i, j, n;
++	int new_topology;
++
++	mutex_lock(&sched_domains_mutex);
++
++	/* always unregister in case we don't destroy any domains */
++	unregister_sched_domain_sysctl();
++
++	/* Let architecture update cpu core mappings. */
++	new_topology = arch_update_cpu_topology();
++
++	n = doms_new ? ndoms_new : 0;
++
++	/* Destroy deleted domains */
++	for (i = 0; i < ndoms_cur; i++) {
++		for (j = 0; j < n && !new_topology; j++) {
++			if (cpumask_equal(doms_cur[i], doms_new[j])
++			    && dattrs_equal(dattr_cur, i, dattr_new, j))
++				goto match1;
++		}
++		/* no match - a current sched domain not in new doms_new[] */
++		detach_destroy_domains(doms_cur[i]);
++match1:
++		;
++	}
++
++	if (doms_new == NULL) {
++		ndoms_cur = 0;
++		doms_new = &fallback_doms;
++		cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
++		WARN_ON_ONCE(dattr_new);
++	}
++
++	/* Build new domains */
++	for (i = 0; i < ndoms_new; i++) {
++		for (j = 0; j < ndoms_cur && !new_topology; j++) {
++			if (cpumask_equal(doms_new[i], doms_cur[j])
++			    && dattrs_equal(dattr_new, i, dattr_cur, j))
++				goto match2;
++		}
++		/* no match - add a new doms_new */
++		build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
++match2:
++		;
++	}
++
++	/* Remember the new sched domains */
++	if (doms_cur != &fallback_doms)
++		free_sched_domains(doms_cur, ndoms_cur);
++	kfree(dattr_cur);	/* kfree(NULL) is safe */
++	doms_cur = doms_new;
++	dattr_cur = dattr_new;
++	ndoms_cur = ndoms_new;
++
++	register_sched_domain_sysctl();
++
++	mutex_unlock(&sched_domains_mutex);
++}
++
++#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
++static void reinit_sched_domains(void)
++{
++	get_online_cpus();
++
++	/* Destroy domains first to force the rebuild */
++	partition_sched_domains(0, NULL, NULL);
++
++	rebuild_sched_domains();
++	put_online_cpus();
++}
++
++static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
++{
++	unsigned int level = 0;
++
++	if (sscanf(buf, "%u", &level) != 1)
++		return -EINVAL;
++
++	/*
++	 * level is always be positive so don't check for
++	 * level < POWERSAVINGS_BALANCE_NONE which is 0
++	 * What happens on 0 or 1 byte write,
++	 * need to check for count as well?
++	 */
++
++	if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS)
++		return -EINVAL;
++
++	if (smt)
++		sched_smt_power_savings = level;
++	else
++		sched_mc_power_savings = level;
++
++	reinit_sched_domains();
++
++	return count;
++}
++
++#ifdef CONFIG_SCHED_MC
++static ssize_t sched_mc_power_savings_show(struct sysdev_class *class,
++					   struct sysdev_class_attribute *attr,
++					   char *page)
++{
++	return sprintf(page, "%u\n", sched_mc_power_savings);
++}
++static ssize_t sched_mc_power_savings_store(struct sysdev_class *class,
++					    struct sysdev_class_attribute *attr,
++					    const char *buf, size_t count)
++{
++	return sched_power_savings_store(buf, count, 0);
++}
++static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644,
++			 sched_mc_power_savings_show,
++			 sched_mc_power_savings_store);
++#endif
++
++#ifdef CONFIG_SCHED_SMT
++static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev,
++					    struct sysdev_class_attribute *attr,
++					    char *page)
++{
++	return sprintf(page, "%u\n", sched_smt_power_savings);
++}
++static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev,
++					     struct sysdev_class_attribute *attr,
++					     const char *buf, size_t count)
++{
++	return sched_power_savings_store(buf, count, 1);
++}
++static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644,
++		   sched_smt_power_savings_show,
++		   sched_smt_power_savings_store);
++#endif
++
++int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
++{
++	int err = 0;
++
++#ifdef CONFIG_SCHED_SMT
++	if (smt_capable())
++		err = sysfs_create_file(&cls->kset.kobj,
++					&attr_sched_smt_power_savings.attr);
++#endif
++#ifdef CONFIG_SCHED_MC
++	if (!err && mc_capable())
++		err = sysfs_create_file(&cls->kset.kobj,
++					&attr_sched_mc_power_savings.attr);
++#endif
++	return err;
++}
++#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
++
++/*
++ * Update cpusets according to cpu_active mask.  If cpusets are
++ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
++ * around partition_sched_domains().
++ */
++static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
++			     void *hcpu)
++{
++	switch (action & ~CPU_TASKS_FROZEN) {
++	case CPU_ONLINE:
++	case CPU_DOWN_FAILED:
++		cpuset_update_active_cpus();
++		return NOTIFY_OK;
++	default:
++		return NOTIFY_DONE;
++	}
++}
++
++static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
++			       void *hcpu)
++{
++	switch (action & ~CPU_TASKS_FROZEN) {
++	case CPU_DOWN_PREPARE:
++		cpuset_update_active_cpus();
++		return NOTIFY_OK;
++	default:
++		return NOTIFY_DONE;
++	}
++}
++
++#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
++/*
++ * Cheaper version of the below functions in case support for SMT and MC is
++ * compiled in but CPUs have no siblings.
++ */
++static bool sole_cpu_idle(int cpu)
++{
++	return rq_idle(cpu_rq(cpu));
++}
++#endif
++#ifdef CONFIG_SCHED_SMT
++/* All this CPU's SMT siblings are idle */
++static bool siblings_cpu_idle(int cpu)
++{
++	return cpumask_subset(&(cpu_rq(cpu)->smt_siblings),
++			      &grq.cpu_idle_map);
++}
++#endif
++#ifdef CONFIG_SCHED_MC
++/* All this CPU's shared cache siblings are idle */
++static bool cache_cpu_idle(int cpu)
++{
++	return cpumask_subset(&(cpu_rq(cpu)->cache_siblings),
++			      &grq.cpu_idle_map);
++}
++#endif
++
++enum sched_domain_level {
++	SD_LV_NONE = 0,
++	SD_LV_SIBLING,
++	SD_LV_MC,
++	SD_LV_BOOK,
++	SD_LV_CPU,
++	SD_LV_NODE,
++	SD_LV_ALLNODES,
++	SD_LV_MAX
++};
++
++void __init sched_init_smp(void)
++{
++	struct sched_domain *sd;
++	int cpu;
++
++	cpumask_var_t non_isolated_cpus;
++
++	alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
++	alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
++
++	get_online_cpus();
++	mutex_lock(&sched_domains_mutex);
++	init_sched_domains(cpu_active_mask);
++	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
++	if (cpumask_empty(non_isolated_cpus))
++		cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
++	mutex_unlock(&sched_domains_mutex);
++	put_online_cpus();
++
++	hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
++	hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
++
++	/* Move init over to a non-isolated CPU */
++	if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
++		BUG();
++	free_cpumask_var(non_isolated_cpus);
++
++	grq_lock_irq();
++	/*
++	 * Set up the relative cache distance of each online cpu from each
++	 * other in a simple array for quick lookup. Locality is determined
++	 * by the closest sched_domain that CPUs are separated by. CPUs with
++	 * shared cache in SMT and MC are treated as local. Separate CPUs
++	 * (within the same package or physically) within the same node are
++	 * treated as not local. CPUs not even in the same domain (different
++	 * nodes) are treated as very distant.
++	 */
++	for_each_online_cpu(cpu) {
++		struct rq *rq = cpu_rq(cpu);
++		for_each_domain(cpu, sd) {
++			int locality, other_cpu;
++
++#ifdef CONFIG_SCHED_SMT
++			if (sd->level == SD_LV_SIBLING) {
++				for_each_cpu_mask(other_cpu, *sched_domain_span(sd))
++					cpumask_set_cpu(other_cpu, &rq->smt_siblings);
++			}
++#endif
++#ifdef CONFIG_SCHED_MC
++			if (sd->level == SD_LV_MC) {
++				for_each_cpu_mask(other_cpu, *sched_domain_span(sd))
++					cpumask_set_cpu(other_cpu, &rq->cache_siblings);
++			}
++#endif
++			if (sd->level <= SD_LV_SIBLING)
++				locality = 1;
++			else if (sd->level <= SD_LV_MC)
++				locality = 2;
++			else if (sd->level <= SD_LV_NODE)
++				locality = 3;
++			else
++				continue;
++
++			for_each_cpu_mask(other_cpu, *sched_domain_span(sd)) {
++				if (locality < rq->cpu_locality[other_cpu])
++					rq->cpu_locality[other_cpu] = locality;
++			}
++		}
++
++/*
++		 * Each runqueue has its own function in case it doesn't have
++		 * siblings of its own allowing mixed topologies.
++		 */
++#ifdef CONFIG_SCHED_SMT
++		if (cpus_weight(rq->smt_siblings) > 1)
++			rq->siblings_idle = siblings_cpu_idle;
++#endif
++#ifdef CONFIG_SCHED_MC
++		if (cpus_weight(rq->cache_siblings) > 1)
++			rq->cache_idle = cache_cpu_idle;
++#endif
++	}
++	grq_unlock_irq();
++}
++#else
++void __init sched_init_smp(void)
++{
++}
++#endif /* CONFIG_SMP */
++
++unsigned int sysctl_timer_migration = 1;
++
++int in_sched_functions(unsigned long addr)
++{
++	return in_lock_functions(addr) ||
++		(addr >= (unsigned long)__sched_text_start
++		&& addr < (unsigned long)__sched_text_end);
++}
++
++void __init sched_init(void)
++{
++	int i;
++	struct rq *rq;
++
++	prio_ratios[0] = 128;
++	for (i = 1 ; i < PRIO_RANGE ; i++)
++		prio_ratios[i] = prio_ratios[i - 1] * 11 / 10;
++
++	raw_spin_lock_init(&grq.lock);
++	grq.nr_running = grq.nr_uninterruptible = grq.nr_switches = 0;
++	grq.niffies = 0;
++	grq.last_jiffy = jiffies;
++	raw_spin_lock_init(&grq.iso_lock);
++	grq.iso_ticks = grq.iso_refractory = 0;
++	grq.noc = 1;
++#ifdef CONFIG_SMP
++	init_defrootdomain();
++	grq.qnr = grq.idle_cpus = 0;
++	cpumask_clear(&grq.cpu_idle_map);
++#else
++	uprq = &per_cpu(runqueues, 0);
++#endif
++	for_each_possible_cpu(i) {
++		rq = cpu_rq(i);
++		rq->user_pc = rq->nice_pc = rq->softirq_pc = rq->system_pc =
++			      rq->iowait_pc = rq->idle_pc = 0;
++		rq->dither = false;
++#ifdef CONFIG_SMP
++		rq->sticky_task = NULL;
++		rq->last_niffy = 0;
++		rq->sd = NULL;
++		rq->rd = NULL;
++		rq->online = false;
++		rq->cpu = i;
++		rq_attach_root(rq, &def_root_domain);
++#endif
++		atomic_set(&rq->nr_iowait, 0);
++	}
++
++#ifdef CONFIG_SMP
++	nr_cpu_ids = i;
++	/*
++	 * Set the base locality for cpu cache distance calculation to
++	 * "distant" (3). Make sure the distance from a CPU to itself is 0.
++	 */
++	for_each_possible_cpu(i) {
++		int j;
++
++		rq = cpu_rq(i);
++#ifdef CONFIG_SCHED_SMT
++		cpumask_clear(&rq->smt_siblings);
++		cpumask_set_cpu(i, &rq->smt_siblings);
++		rq->siblings_idle = sole_cpu_idle;
++		cpumask_set_cpu(i, &rq->smt_siblings);
++#endif
++#ifdef CONFIG_SCHED_MC
++		cpumask_clear(&rq->cache_siblings);
++		cpumask_set_cpu(i, &rq->cache_siblings);
++		rq->cache_idle = sole_cpu_idle;
++		cpumask_set_cpu(i, &rq->cache_siblings);
++#endif
++		rq->cpu_locality = kmalloc(nr_cpu_ids * sizeof(int *), GFP_ATOMIC);
++		for_each_possible_cpu(j) {
++			if (i == j)
++				rq->cpu_locality[j] = 0;
++			else
++				rq->cpu_locality[j] = 4;
++		}
++	}
++#endif
++
++	for (i = 0; i < PRIO_LIMIT; i++)
++		INIT_LIST_HEAD(grq.queue + i);
++	/* delimiter for bitsearch */
++	__set_bit(PRIO_LIMIT, grq.prio_bitmap);
++
++#ifdef CONFIG_PREEMPT_NOTIFIERS
++	INIT_HLIST_HEAD(&init_task.preempt_notifiers);
++#endif
++
++#ifdef CONFIG_RT_MUTEXES
++	plist_head_init(&init_task.pi_waiters);
++#endif
++
++	/*
++	 * The boot idle thread does lazy MMU switching as well:
++	 */
++	atomic_inc(&init_mm.mm_count);
++	enter_lazy_tlb(&init_mm, current);
++
++	/*
++	 * Make us the idle thread. Technically, schedule() should not be
++	 * called from this thread, however somewhere below it might be,
++	 * but because we are the idle thread, we just pick up running again
++	 * when this runqueue becomes "idle".
++	 */
++	init_idle(current, smp_processor_id());
++
++#ifdef CONFIG_SMP
++	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
++	/* May be allocated at isolcpus cmdline parse time */
++	if (cpu_isolated_map == NULL)
++		zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
++#endif /* SMP */
++}
++
++#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
++static inline int preempt_count_equals(int preempt_offset)
++{
++	int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
++
++	return (nested == preempt_offset);
++}
++
++void __might_sleep(const char *file, int line, int preempt_offset)
++{
++	static unsigned long prev_jiffy;	/* ratelimiting */
++
++	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
++	if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
++	    system_state != SYSTEM_RUNNING || oops_in_progress)
++		return;
++	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
++		return;
++	prev_jiffy = jiffies;
++
++	printk(KERN_ERR
++		"BUG: sleeping function called from invalid context at %s:%d\n",
++			file, line);
++	printk(KERN_ERR
++		"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
++			in_atomic(), irqs_disabled(),
++			current->pid, current->comm);
++
++	debug_show_held_locks(current);
++	if (irqs_disabled())
++		print_irqtrace_events(current);
++	dump_stack();
++}
++EXPORT_SYMBOL(__might_sleep);
++#endif
++
++#ifdef CONFIG_MAGIC_SYSRQ
++void normalize_rt_tasks(void)
++{
++	struct task_struct *g, *p;
++	unsigned long flags;
++	struct rq *rq;
++	int queued;
++
++	read_lock_irq(&tasklist_lock);
++
++	do_each_thread(g, p) {
++		if (!rt_task(p) && !iso_task(p))
++			continue;
++
++		raw_spin_lock_irqsave(&p->pi_lock, flags);
++		rq = __task_grq_lock(p);
++
++		queued = task_queued(p);
++		if (queued)
++			dequeue_task(p);
++		__setscheduler(p, rq, SCHED_NORMAL, 0);
++		if (queued) {
++			enqueue_task(p);
++			try_preempt(p, rq);
++		}
++
++		__task_grq_unlock();
++		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++	} while_each_thread(g, p);
++
++	read_unlock_irq(&tasklist_lock);
++}
++#endif /* CONFIG_MAGIC_SYSRQ */
++
++#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
++/*
++ * These functions are only useful for the IA64 MCA handling, or kdb.
++ *
++ * They can only be called when the whole system has been
++ * stopped - every CPU needs to be quiescent, and no scheduling
++ * activity can take place. Using them for anything else would
++ * be a serious bug, and as a result, they aren't even visible
++ * under any other configuration.
++ */
++
++/**
++ * curr_task - return the current task for a given cpu.
++ * @cpu: the processor in question.
++ *
++ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
++ */
++struct task_struct *curr_task(int cpu)
++{
++	return cpu_curr(cpu);
++}
++
++#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
++
++#ifdef CONFIG_IA64
++/**
++ * set_curr_task - set the current task for a given cpu.
++ * @cpu: the processor in question.
++ * @p: the task pointer to set.
++ *
++ * Description: This function must only be used when non-maskable interrupts
++ * are serviced on a separate stack.  It allows the architecture to switch the
++ * notion of the current task on a cpu in a non-blocking manner.  This function
++ * must be called with all CPU's synchronised, and interrupts disabled, the
++ * and caller must save the original value of the current task (see
++ * curr_task() above) and restore that value before reenabling interrupts and
++ * re-starting the system.
++ *
++ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
++ */
++void set_curr_task(int cpu, struct task_struct *p)
++{
++	cpu_curr(cpu) = p;
++}
++
++#endif
++
++/*
++ * Use precise platform statistics if available:
++ */
++#ifdef CONFIG_VIRT_CPU_ACCOUNTING
++void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
++{
++	*ut = p->utime;
++	*st = p->stime;
++}
++
++void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
++{
++	struct task_cputime cputime;
++
++	thread_group_cputime(p, &cputime);
++
++	*ut = cputime.utime;
++	*st = cputime.stime;
++}
++#else
++
++#ifndef nsecs_to_cputime
++# define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
++#endif
++
++void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
++{
++	cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime);
++
++	rtime = nsecs_to_cputime(p->sched_time);
++
++	if (total) {
++		u64 temp;
++
++		temp = (u64)(rtime * utime);
++		do_div(temp, total);
++		utime = (cputime_t)temp;
++	} else
++		utime = rtime;
++
++	/*
++	 * Compare with previous values, to keep monotonicity:
++	 */
++	p->prev_utime = max(p->prev_utime, utime);
++	p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime));
++
++	*ut = p->prev_utime;
++	*st = p->prev_stime;
++}
++
++/*
++ * Must be called with siglock held.
++ */
++void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
++{
++	struct signal_struct *sig = p->signal;
++	struct task_cputime cputime;
++	cputime_t rtime, utime, total;
++
++	thread_group_cputime(p, &cputime);
++
++	total = cputime_add(cputime.utime, cputime.stime);
++	rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
++
++	if (total) {
++		u64 temp;
++
++		temp = (u64)(rtime * cputime.utime);
++		do_div(temp, total);
++		utime = (cputime_t)temp;
++	} else
++		utime = rtime;
++
++	sig->prev_utime = max(sig->prev_utime, utime);
++	sig->prev_stime = max(sig->prev_stime,
++			      cputime_sub(rtime, sig->prev_utime));
++
++	*ut = sig->prev_utime;
++	*st = sig->prev_stime;
++}
++#endif
++
++inline cputime_t task_gtime(struct task_struct *p)
++{
++	return p->gtime;
++}
++
++void __cpuinit init_idle_bootup_task(struct task_struct *idle)
++{}
++
++#ifdef CONFIG_SCHED_DEBUG
++void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
++{}
++
++void proc_sched_set_task(struct task_struct *p)
++{}
++#endif
++
++#ifdef CONFIG_SMP
++unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
++{
++	return SCHED_LOAD_SCALE;
++}
++
++unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
++{
++	unsigned long weight = cpumask_weight(sched_domain_span(sd));
++	unsigned long smt_gain = sd->smt_gain;
++
++	smt_gain /= weight;
++
++	return smt_gain;
++}
++#endif
+Index: linux-3.2-ck1/kernel/sched.c
+===================================================================
+--- linux-3.2-ck1.orig/kernel/sched.c	2012-01-16 10:07:27.897097267 +1100
++++ linux-3.2-ck1/kernel/sched.c	2012-01-16 10:07:31.344097027 +1100
+@@ -1,3 +1,6 @@
++#ifdef CONFIG_SCHED_BFS
++#include "sched_bfs.c"
++#else
+ /*
+  *  kernel/sched.c
+  *
+@@ -9783,3 +9786,4 @@ struct cgroup_subsys cpuacct_subsys = {
+ 	.subsys_id = cpuacct_subsys_id,
+ };
+ #endif	/* CONFIG_CGROUP_CPUACCT */
++#endif /* CONFIG_SCHED_BFS */
+Index: linux-3.2-ck1/kernel/sysctl.c
+===================================================================
+--- linux-3.2-ck1.orig/kernel/sysctl.c	2012-01-16 10:07:27.897097267 +1100
++++ linux-3.2-ck1/kernel/sysctl.c	2012-01-16 10:07:31.345097026 +1100
+@@ -121,7 +121,12 @@ static int __maybe_unused one = 1;
+ static int __maybe_unused two = 2;
+ static int __maybe_unused three = 3;
+ static unsigned long one_ul = 1;
+-static int one_hundred = 100;
++static int __maybe_unused one_hundred = 100;
++#ifdef CONFIG_SCHED_BFS
++extern int rr_interval;
++extern int sched_iso_cpu;
++static int __read_mostly one_thousand = 1000;
++#endif
+ #ifdef CONFIG_PRINTK
+ static int ten_thousand = 10000;
+ #endif
+@@ -251,7 +256,7 @@ static struct ctl_table root_table[] = {
+ 	{ }
+ };
+ 
+-#ifdef CONFIG_SCHED_DEBUG
++#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_BFS)
+ static int min_sched_granularity_ns = 100000;		/* 100 usecs */
+ static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
+ static int min_wakeup_granularity_ns;			/* 0 usecs */
+@@ -266,6 +271,7 @@ static int max_extfrag_threshold = 1000;
+ #endif
+ 
+ static struct ctl_table kern_table[] = {
++#ifndef CONFIG_SCHED_BFS
+ 	{
+ 		.procname	= "sched_child_runs_first",
+ 		.data		= &sysctl_sched_child_runs_first,
+@@ -383,6 +389,7 @@ static struct ctl_table kern_table[] = {
+ 		.extra1		= &one,
+ 	},
+ #endif
++#endif /* !CONFIG_SCHED_BFS */
+ #ifdef CONFIG_PROVE_LOCKING
+ 	{
+ 		.procname	= "prove_locking",
+@@ -841,6 +848,26 @@ static struct ctl_table kern_table[] = {
+ 		.proc_handler	= proc_dointvec,
+ 	},
+ #endif
++#ifdef CONFIG_SCHED_BFS
++	{
++		.procname	= "rr_interval",
++		.data		= &rr_interval,
++		.maxlen		= sizeof (int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.extra1		= &one,
++		.extra2		= &one_thousand,
++	},
++	{
++		.procname	= "iso_cpu",
++		.data		= &sched_iso_cpu,
++		.maxlen		= sizeof (int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.extra1		= &zero,
++		.extra2		= &one_hundred,
++	},
++#endif
+ #if defined(CONFIG_S390) && defined(CONFIG_SMP)
+ 	{
+ 		.procname	= "spin_retry",
+Index: linux-3.2-ck1/lib/Kconfig.debug
+===================================================================
+--- linux-3.2-ck1.orig/lib/Kconfig.debug	2012-01-16 10:07:27.895097268 +1100
++++ linux-3.2-ck1/lib/Kconfig.debug	2012-01-16 10:07:31.345097026 +1100
+@@ -875,7 +875,7 @@ config BOOT_PRINTK_DELAY
+ 
+ config RCU_TORTURE_TEST
+ 	tristate "torture tests for RCU"
+-	depends on DEBUG_KERNEL
++	depends on DEBUG_KERNEL && !SCHED_BFS
+ 	default n
+ 	help
+ 	  This option provides a kernel module that runs torture tests
+Index: linux-3.2-ck1/include/linux/jiffies.h
+===================================================================
+--- linux-3.2-ck1.orig/include/linux/jiffies.h	2012-01-16 10:07:27.896097267 +1100
++++ linux-3.2-ck1/include/linux/jiffies.h	2012-01-16 10:07:31.345097026 +1100
+@@ -164,7 +164,7 @@ static inline u64 get_jiffies_64(void)
+  * Have the 32 bit jiffies value wrap 5 minutes after boot
+  * so jiffies wrap bugs show up earlier.
+  */
+-#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
++#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-10*HZ))
+ 
+ /*
+  * Change timeval to jiffies, trying to avoid the
+Index: linux-3.2-ck1/drivers/cpufreq/cpufreq.c
+===================================================================
+--- linux-3.2-ck1.orig/drivers/cpufreq/cpufreq.c	2012-01-16 10:07:27.894097269 +1100
++++ linux-3.2-ck1/drivers/cpufreq/cpufreq.c	2012-01-16 10:07:31.346097026 +1100
+@@ -28,6 +28,7 @@
+ #include <linux/cpu.h>
+ #include <linux/completion.h>
+ #include <linux/mutex.h>
++#include <linux/sched.h>
+ #include <linux/syscore_ops.h>
+ 
+ #include <trace/events/power.h>
+@@ -1444,6 +1445,12 @@ int __cpufreq_driver_target(struct cpufr
+ 		target_freq, relation);
+ 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
+ 		retval = cpufreq_driver->target(policy, target_freq, relation);
++	if (likely(retval != -EINVAL)) {
++		if (target_freq == policy->max)
++			cpu_nonscaling(policy->cpu);
++		else
++			cpu_scaling(policy->cpu);
++	}
+ 
+ 	return retval;
+ }
+Index: linux-3.2-ck1/drivers/cpufreq/cpufreq_ondemand.c
+===================================================================
+--- linux-3.2-ck1.orig/drivers/cpufreq/cpufreq_ondemand.c	2012-01-16 10:07:27.894097269 +1100
++++ linux-3.2-ck1/drivers/cpufreq/cpufreq_ondemand.c	2012-01-16 10:07:31.346097026 +1100
+@@ -28,8 +28,8 @@
+  * It helps to keep variable names smaller, simpler
+  */
+ 
+-#define DEF_FREQUENCY_DOWN_DIFFERENTIAL		(10)
+-#define DEF_FREQUENCY_UP_THRESHOLD		(80)
++#define DEF_FREQUENCY_DOWN_DIFFERENTIAL		(26)
++#define DEF_FREQUENCY_UP_THRESHOLD		(63)
+ #define DEF_SAMPLING_DOWN_FACTOR		(1)
+ #define MAX_SAMPLING_DOWN_FACTOR		(100000)
+ #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL	(3)
+@@ -417,10 +417,10 @@ static void dbs_check_cpu(struct cpu_dbs
+ 
+ 	/*
+ 	 * Every sampling_rate, we check, if current idle time is less
+-	 * than 20% (default), then we try to increase frequency
++	 * than 37% (default), then we try to increase frequency
+ 	 * Every sampling_rate, we look for a the lowest
+ 	 * frequency which can sustain the load while keeping idle time over
+-	 * 30%. If such a frequency exist, we try to decrease to this frequency.
++	 * 63%. If such a frequency exist, we try to decrease to this frequency.
+ 	 *
+ 	 * Any frequency increase takes it to the maximum frequency.
+ 	 * Frequency reduction happens at minimum steps of
+Index: linux-3.2-ck1/drivers/cpufreq/cpufreq_conservative.c
+===================================================================
+--- linux-3.2-ck1.orig/drivers/cpufreq/cpufreq_conservative.c	2012-01-16 10:07:27.894097269 +1100
++++ linux-3.2-ck1/drivers/cpufreq/cpufreq_conservative.c	2012-01-16 10:07:31.346097026 +1100
+@@ -29,8 +29,8 @@
+  * It helps to keep variable names smaller, simpler
+  */
+ 
+-#define DEF_FREQUENCY_UP_THRESHOLD		(80)
+-#define DEF_FREQUENCY_DOWN_THRESHOLD		(20)
++#define DEF_FREQUENCY_UP_THRESHOLD		(63)
++#define DEF_FREQUENCY_DOWN_THRESHOLD		(26)
+ 
+ /*
+  * The polling frequency of this governor depends on the capability of
+Index: linux-3.2-ck1/mm/vmscan.c
+===================================================================
+--- linux-3.2-ck1.orig/mm/vmscan.c	2012-01-16 10:07:27.813097272 +1100
++++ linux-3.2-ck1/mm/vmscan.c	2012-01-16 10:07:32.578096942 +1100
+@@ -37,6 +37,7 @@
+ #include <linux/rwsem.h>
+ #include <linux/delay.h>
+ #include <linux/kthread.h>
++#include <linux/timer.h>
+ #include <linux/freezer.h>
+ #include <linux/memcontrol.h>
+ #include <linux/delayacct.h>
+@@ -146,7 +147,7 @@ struct scan_control {
+ /*
+  * From 0 .. 100.  Higher means more swappy.
+  */
+-int vm_swappiness = 60;
++int vm_swappiness = 10;
+ long vm_total_pages;	/* The total number of pages which the VM controls */
+ 
+ static LIST_HEAD(shrinker_list);
+@@ -986,7 +987,7 @@ cull_mlocked:
+ 
+ activate_locked:
+ 		/* Not a candidate for swapping, so reclaim swap space. */
+-		if (PageSwapCache(page) && vm_swap_full())
++		if (PageSwapCache(page))
+ 			try_to_free_swap(page);
+ 		VM_BUG_ON(PageActive(page));
+ 		SetPageActive(page);
+@@ -2089,6 +2090,35 @@ restart:
+ }
+ 
+ /*
++ * Helper functions to adjust nice level of kswapd, based on the priority of
++ * the task (p) that called it. If it is already higher priority we do not
++ * demote its nice level since it is still working on behalf of a higher
++ * priority task. With kernel threads we leave it at nice 0.
++ *
++ * We don't ever run kswapd real time, so if a real time task calls kswapd we
++ * set it to highest SCHED_NORMAL priority.
++ */
++static inline int effective_sc_prio(struct task_struct *p)
++{
++	if (likely(p->mm)) {
++		if (rt_task(p))
++			return -20;
++		if (p->policy == SCHED_IDLEPRIO)
++			return 19;
++		return task_nice(p);
++	}
++	return 0;
++}
++
++static void set_kswapd_nice(struct task_struct *kswapd, int active)
++{
++	long nice = effective_sc_prio(current);
++
++	if (task_nice(kswapd) > nice || !active)
++		set_user_nice(kswapd, nice);
++}
++
++/*
+  * This is the direct reclaim path, for page-allocating processes.  We only
+  * try to reclaim pages from zones which will satisfy the caller's allocation
+  * request.
+@@ -2835,6 +2865,8 @@ static void kswapd_try_to_sleep(pg_data_
+ 	finish_wait(&pgdat->kswapd_wait, &wait);
+ }
+ 
++#define WT_EXPIRY	(HZ * 5)	/* Time to wakeup watermark_timer */
++
+ /*
+  * The background pageout daemon, started as a kernel thread
+  * from the init process.
+@@ -2890,6 +2922,9 @@ static int kswapd(void *p)
+ 	for ( ; ; ) {
+ 		int ret;
+ 
++		/* kswapd has been busy so delay watermark_timer */
++		mod_timer(&pgdat->watermark_timer, jiffies + WT_EXPIRY);
++
+ 		/*
+ 		 * If the last balance_pgdat was unsuccessful it's unlikely a
+ 		 * new request of a similar or harder type will succeed soon
+@@ -2945,6 +2980,7 @@ static int kswapd(void *p)
+ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
+ {
+ 	pg_data_t *pgdat;
++	int active;
+ 
+ 	if (!populated_zone(zone))
+ 		return;
+@@ -2956,7 +2992,9 @@ void wakeup_kswapd(struct zone *zone, in
+ 		pgdat->kswapd_max_order = order;
+ 		pgdat->classzone_idx = min(pgdat->classzone_idx, classzone_idx);
+ 	}
+-	if (!waitqueue_active(&pgdat->kswapd_wait))
++	active = waitqueue_active(&pgdat->kswapd_wait);
++	set_kswapd_nice(pgdat->kswapd, active);
++	if (!active)
+ 		return;
+ 	if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
+ 		return;
+@@ -3068,20 +3106,57 @@ static int __devinit cpu_callback(struct
+ }
+ 
+ /*
++ * We wake up kswapd every WT_EXPIRY till free ram is above pages_lots
++ */
++static void watermark_wakeup(unsigned long data)
++{
++	pg_data_t *pgdat = (pg_data_t *)data;
++	struct timer_list *wt = &pgdat->watermark_timer;
++	int i;
++
++	if (!waitqueue_active(&pgdat->kswapd_wait) || above_background_load())
++		goto out;
++	for (i = pgdat->nr_zones - 1; i >= 0; i--) {
++		struct zone *z = pgdat->node_zones + i;
++
++		if (!populated_zone(z) || is_highmem(z)) {
++			/* We are better off leaving highmem full */
++			continue;
++		}
++		if (!zone_watermark_ok(z, 0, lots_wmark_pages(z), 0, 0)) {
++			wake_up_interruptible(&pgdat->kswapd_wait);
++			goto out;
++		}
++	}
++out:
++	mod_timer(wt, jiffies + WT_EXPIRY);
++	return;
++}
++
++/*
+  * This kswapd start function will be called by init and node-hot-add.
+  * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
+  */
+ int kswapd_run(int nid)
+ {
+ 	pg_data_t *pgdat = NODE_DATA(nid);
++	struct timer_list *wt;
+ 	int ret = 0;
+ 
+ 	if (pgdat->kswapd)
+ 		return 0;
+ 
++	wt = &pgdat->watermark_timer;
++	init_timer(wt);
++	wt->data = (unsigned long)pgdat;
++	wt->function = watermark_wakeup;
++	wt->expires = jiffies + WT_EXPIRY;
++	add_timer(wt);
++
+ 	pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
+ 	if (IS_ERR(pgdat->kswapd)) {
+ 		/* failure at boot is fatal */
++		del_timer(wt);
+ 		BUG_ON(system_state == SYSTEM_BOOTING);
+ 		printk("Failed to start kswapd on node %d\n",nid);
+ 		ret = -1;
+Index: linux-3.2-ck1/include/linux/swap.h
+===================================================================
+--- linux-3.2-ck1.orig/include/linux/swap.h	2012-01-16 10:07:27.777097278 +1100
++++ linux-3.2-ck1/include/linux/swap.h	2012-01-16 10:07:32.751096930 +1100
+@@ -201,7 +201,7 @@ struct swap_list_t {
+ 	int next;	/* swapfile to be used next */
+ };
+ 
+-/* Swap 50% full? Release swapcache more aggressively.. */
++/* Swap 50% full? */
+ #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
+ 
+ /* linux/mm/page_alloc.c */
+@@ -215,6 +215,7 @@ extern unsigned int nr_free_pagecache_pa
+ 
+ 
+ /* linux/mm/swap.c */
++extern void ____lru_cache_add(struct page *, enum lru_list lru, bool tail);
+ extern void __lru_cache_add(struct page *, enum lru_list lru);
+ extern void lru_cache_add_lru(struct page *, enum lru_list lru);
+ extern void lru_add_page_tail(struct zone* zone,
+@@ -238,9 +239,14 @@ static inline void lru_cache_add_anon(st
+ 	__lru_cache_add(page, LRU_INACTIVE_ANON);
+ }
+ 
++static inline void lru_cache_add_file_tail(struct page *page, bool tail)
++{
++	____lru_cache_add(page, LRU_INACTIVE_FILE, tail);
++}
++
+ static inline void lru_cache_add_file(struct page *page)
+ {
+-	__lru_cache_add(page, LRU_INACTIVE_FILE);
++	____lru_cache_add(page, LRU_INACTIVE_FILE, false);
+ }
+ 
+ /* linux/mm/vmscan.c */
+@@ -350,9 +356,10 @@ extern void grab_swap_token(struct mm_st
+ extern void __put_swap_token(struct mm_struct *);
+ extern void disable_swap_token(struct mem_cgroup *memcg);
+ 
++/* Only allow swap token to have effect if swap is full */
+ static inline int has_swap_token(struct mm_struct *mm)
+ {
+-	return (mm == swap_token_mm);
++	return (mm == swap_token_mm && vm_swap_full());
+ }
+ 
+ static inline void put_swap_token(struct mm_struct *mm)
+Index: linux-3.2-ck1/mm/memory.c
+===================================================================
+--- linux-3.2-ck1.orig/mm/memory.c	2012-01-16 10:07:27.745097280 +1100
++++ linux-3.2-ck1/mm/memory.c	2012-01-16 10:07:32.052096979 +1100
+@@ -2984,7 +2984,7 @@ static int do_swap_page(struct mm_struct
+ 	mem_cgroup_commit_charge_swapin(page, ptr);
+ 
+ 	swap_free(entry);
+-	if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
++	if ((vma->vm_flags & VM_LOCKED) || PageMlocked(page))
+ 		try_to_free_swap(page);
+ 	unlock_page(page);
+ 	if (swapcache) {
+Index: linux-3.2-ck1/mm/swapfile.c
+===================================================================
+--- linux-3.2-ck1.orig/mm/swapfile.c	2012-01-16 10:07:27.745097280 +1100
++++ linux-3.2-ck1/mm/swapfile.c	2012-01-16 10:07:32.053096979 +1100
+@@ -288,7 +288,7 @@ checks:
+ 		scan_base = offset = si->lowest_bit;
+ 
+ 	/* reuse swap entry of cache-only swap if not busy. */
+-	if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
++	if (si->swap_map[offset] == SWAP_HAS_CACHE) {
+ 		int swap_was_freed;
+ 		spin_unlock(&swap_lock);
+ 		swap_was_freed = __try_to_reclaim_swap(si, offset);
+@@ -377,7 +377,7 @@ scan:
+ 			spin_lock(&swap_lock);
+ 			goto checks;
+ 		}
+-		if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
++		if (si->swap_map[offset] == SWAP_HAS_CACHE) {
+ 			spin_lock(&swap_lock);
+ 			goto checks;
+ 		}
+@@ -392,7 +392,7 @@ scan:
+ 			spin_lock(&swap_lock);
+ 			goto checks;
+ 		}
+-		if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
++		if (si->swap_map[offset] == SWAP_HAS_CACHE) {
+ 			spin_lock(&swap_lock);
+ 			goto checks;
+ 		}
+@@ -706,8 +706,7 @@ int free_swap_and_cache(swp_entry_t entr
+ 		 * Not mapped elsewhere, or swap space full? Free it!
+ 		 * Also recheck PageSwapCache now page is locked (above).
+ 		 */
+-		if (PageSwapCache(page) && !PageWriteback(page) &&
+-				(!page_mapped(page) || vm_swap_full())) {
++		if (PageSwapCache(page) && !PageWriteback(page)) {
+ 			delete_from_swap_cache(page);
+ 			SetPageDirty(page);
+ 		}
+Index: linux-3.2-ck1/include/linux/mmzone.h
+===================================================================
+--- linux-3.2-ck1.orig/include/linux/mmzone.h	2012-01-16 10:07:27.669097282 +1100
++++ linux-3.2-ck1/include/linux/mmzone.h	2012-01-16 10:07:32.405096951 +1100
+@@ -15,6 +15,7 @@
+ #include <linux/seqlock.h>
+ #include <linux/nodemask.h>
+ #include <linux/pageblock-flags.h>
++#include <linux/timer.h>
+ #include <generated/bounds.h>
+ #include <linux/atomic.h>
+ #include <asm/page.h>
+@@ -181,12 +182,14 @@ enum zone_watermarks {
+ 	WMARK_MIN,
+ 	WMARK_LOW,
+ 	WMARK_HIGH,
++	WMARK_LOTS,
+ 	NR_WMARK
+ };
+ 
+ #define min_wmark_pages(z) (z->watermark[WMARK_MIN])
+ #define low_wmark_pages(z) (z->watermark[WMARK_LOW])
+ #define high_wmark_pages(z) (z->watermark[WMARK_HIGH])
++#define lots_wmark_pages(z) (z->watermark[WMARK_LOTS])
+ 
+ struct per_cpu_pages {
+ 	int count;		/* number of pages in the list */
+@@ -358,7 +361,7 @@ struct zone {
+ 	ZONE_PADDING(_pad1_)
+ 
+ 	/* Fields commonly accessed by the page reclaim scanner */
+-	spinlock_t		lru_lock;	
++	spinlock_t		lru_lock;
+ 	struct zone_lru {
+ 		struct list_head list;
+ 	} lru[NR_LRU_LISTS];
+@@ -654,6 +657,7 @@ typedef struct pglist_data {
+ 	wait_queue_head_t kswapd_wait;
+ 	struct task_struct *kswapd;
+ 	int kswapd_max_order;
++	struct timer_list watermark_timer;
+ 	enum zone_type classzone_idx;
+ } pg_data_t;
+ 
+Index: linux-3.2-ck1/include/linux/mm_inline.h
+===================================================================
+--- linux-3.2-ck1.orig/include/linux/mm_inline.h	2012-01-16 10:07:27.614097289 +1100
++++ linux-3.2-ck1/include/linux/mm_inline.h	2012-01-16 10:07:32.751096930 +1100
+@@ -23,9 +23,12 @@ static inline int page_is_file_cache(str
+ 
+ static inline void
+ __add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l,
+-		       struct list_head *head)
++		       struct list_head *head, bool tail)
+ {
+-	list_add(&page->lru, head);
++	if (tail)
++		list_add_tail(&page->lru, head);
++	else
++		list_add(&page->lru, head);
+ 	__mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
+ 	mem_cgroup_add_lru_list(page, l);
+ }
+@@ -33,7 +36,13 @@ __add_page_to_lru_list(struct zone *zone
+ static inline void
+ add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
+ {
+-	__add_page_to_lru_list(zone, page, l, &zone->lru[l].list);
++	__add_page_to_lru_list(zone, page, l, &zone->lru[l].list, false);
++}
++
++static inline void
++add_page_to_lru_list_tail(struct zone *zone, struct page *page, enum lru_list l)
++{
++	__add_page_to_lru_list(zone, page, l, &zone->lru[l].list, 1);
+ }
+ 
+ static inline void
+Index: linux-3.2-ck1/mm/filemap.c
+===================================================================
+--- linux-3.2-ck1.orig/mm/filemap.c	2012-01-16 10:07:27.615097289 +1100
++++ linux-3.2-ck1/mm/filemap.c	2012-01-16 10:07:32.752096930 +1100
+@@ -495,16 +495,22 @@ out:
+ }
+ EXPORT_SYMBOL(add_to_page_cache_locked);
+ 
+-int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
+-				pgoff_t offset, gfp_t gfp_mask)
++int __add_to_page_cache_lru(struct page *page, struct address_space *mapping,
++				pgoff_t offset, gfp_t gfp_mask, bool tail)
+ {
+ 	int ret;
+ 
+ 	ret = add_to_page_cache(page, mapping, offset, gfp_mask);
+ 	if (ret == 0)
+-		lru_cache_add_file(page);
++		lru_cache_add_file_tail(page, tail);
+ 	return ret;
+ }
++
++int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
++				pgoff_t offset, gfp_t gfp_mask)
++{
++	return __add_to_page_cache_lru(page, mapping, offset, gfp_mask, false);
++}
+ EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
+ 
+ #ifdef CONFIG_NUMA
+Index: linux-3.2-ck1/mm/swap.c
+===================================================================
+--- linux-3.2-ck1.orig/mm/swap.c	2012-01-16 10:07:27.615097289 +1100
++++ linux-3.2-ck1/mm/swap.c	2012-01-16 10:07:32.753096930 +1100
+@@ -371,15 +371,23 @@ void mark_page_accessed(struct page *pag
+ 
+ EXPORT_SYMBOL(mark_page_accessed);
+ 
+-void __lru_cache_add(struct page *page, enum lru_list lru)
++void ______pagevec_lru_add(struct pagevec *pvec, enum lru_list lru, bool tail);
++
++void ____lru_cache_add(struct page *page, enum lru_list lru, bool tail)
+ {
+ 	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
+ 
+ 	page_cache_get(page);
+ 	if (!pagevec_add(pvec, page))
+-		____pagevec_lru_add(pvec, lru);
++		______pagevec_lru_add(pvec, lru, tail);
+ 	put_cpu_var(lru_add_pvecs);
+ }
++EXPORT_SYMBOL(____lru_cache_add);
++
++void __lru_cache_add(struct page *page, enum lru_list lru)
++{
++	____lru_cache_add(page, lru, false);
++}
+ EXPORT_SYMBOL(__lru_cache_add);
+ 
+ /**
+@@ -387,7 +395,7 @@ EXPORT_SYMBOL(__lru_cache_add);
+  * @page: the page to be added to the LRU.
+  * @lru: the LRU list to which the page is added.
+  */
+-void lru_cache_add_lru(struct page *page, enum lru_list lru)
++void __lru_cache_add_lru(struct page *page, enum lru_list lru, bool tail)
+ {
+ 	if (PageActive(page)) {
+ 		VM_BUG_ON(PageUnevictable(page));
+@@ -398,7 +406,12 @@ void lru_cache_add_lru(struct page *page
+ 	}
+ 
+ 	VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page));
+-	__lru_cache_add(page, lru);
++	____lru_cache_add(page, lru, tail);
++}
++
++void lru_cache_add_lru(struct page *page, enum lru_list lru)
++{
++	__lru_cache_add_lru(page, lru, false);
+ }
+ 
+ /**
+@@ -685,7 +698,7 @@ void lru_add_page_tail(struct zone* zone
+ 			head = page->lru.prev;
+ 		else
+ 			head = &zone->lru[lru].list;
+-		__add_page_to_lru_list(zone, page_tail, lru, head);
++		__add_page_to_lru_list(zone, page_tail, lru, head, false);
+ 	} else {
+ 		SetPageUnevictable(page_tail);
+ 		add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE);
+@@ -714,13 +727,18 @@ static void ____pagevec_lru_add_fn(struc
+  * Add the passed pages to the LRU, then drop the caller's refcount
+  * on them.  Reinitialises the caller's pagevec.
+  */
+-void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
++void ______pagevec_lru_add(struct pagevec *pvec, enum lru_list lru, bool tail)
+ {
+ 	VM_BUG_ON(is_unevictable_lru(lru));
+ 
+ 	pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru);
+ }
+ 
++void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
++{
++	______pagevec_lru_add(pvec, lru, false);
++}
++
+ EXPORT_SYMBOL(____pagevec_lru_add);
+ 
+ /*
+Index: linux-3.2-ck1/mm/readahead.c
+===================================================================
+--- linux-3.2-ck1.orig/mm/readahead.c	2012-01-16 10:07:27.615097289 +1100
++++ linux-3.2-ck1/mm/readahead.c	2012-01-16 10:07:32.753096930 +1100
+@@ -17,6 +17,7 @@
+ #include <linux/task_io_accounting_ops.h>
+ #include <linux/pagevec.h>
+ #include <linux/pagemap.h>
++#include <linux/swap.h>
+ 
+ /*
+  * Initialise a struct file's readahead state.  Assumes that the caller has
+@@ -107,7 +108,7 @@ int read_cache_pages(struct address_spac
+ EXPORT_SYMBOL(read_cache_pages);
+ 
+ static int read_pages(struct address_space *mapping, struct file *filp,
+-		struct list_head *pages, unsigned nr_pages)
++		struct list_head *pages, unsigned nr_pages, bool tail)
+ {
+ 	struct blk_plug plug;
+ 	unsigned page_idx;
+@@ -125,8 +126,8 @@ static int read_pages(struct address_spa
+ 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
+ 		struct page *page = list_to_page(pages);
+ 		list_del(&page->lru);
+-		if (!add_to_page_cache_lru(page, mapping,
+-					page->index, GFP_KERNEL)) {
++		if (!__add_to_page_cache_lru(page, mapping,
++					page->index, GFP_KERNEL, tail)) {
+ 			mapping->a_ops->readpage(filp, page);
+ 		}
+ 		page_cache_release(page);
+@@ -139,6 +140,28 @@ out:
+ 	return ret;
+ }
+ 
++static inline int nr_mapped(void)
++{
++	return global_page_state(NR_FILE_MAPPED) +
++		global_page_state(NR_ANON_PAGES);
++}
++
++/*
++ * This examines how large in pages a file size is and returns 1 if it is
++ * more than half the unmapped ram. Avoid doing read_page_state which is
++ * expensive unless we already know it is likely to be large enough.
++ */
++static int large_isize(unsigned long nr_pages)
++{
++	if (nr_pages * 6 > vm_total_pages) {
++		 unsigned long unmapped_ram = vm_total_pages - nr_mapped();
++
++		if (nr_pages * 2 > unmapped_ram)
++			return 1;
++	}
++	return 0;
++}
++
+ /*
+  * __do_page_cache_readahead() actually reads a chunk of disk.  It allocates all
+  * the pages first, then submits them all for I/O. This avoids the very bad
+@@ -196,7 +219,8 @@ __do_page_cache_readahead(struct address
+ 	 * will then handle the error.
+ 	 */
+ 	if (ret)
+-		read_pages(mapping, filp, &page_pool, ret);
++		read_pages(mapping, filp, &page_pool, ret,
++			   large_isize(end_index));
+ 	BUG_ON(!list_empty(&page_pool));
+ out:
+ 	return ret;
+Index: linux-3.2-ck1/include/linux/pagemap.h
+===================================================================
+--- linux-3.2-ck1.orig/include/linux/pagemap.h	2012-01-16 10:07:27.615097289 +1100
++++ linux-3.2-ck1/include/linux/pagemap.h	2012-01-16 10:07:32.754096930 +1100
+@@ -456,6 +456,8 @@ int add_to_page_cache_locked(struct page
+ 				pgoff_t index, gfp_t gfp_mask);
+ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
+ 				pgoff_t index, gfp_t gfp_mask);
++int __add_to_page_cache_lru(struct page *page, struct address_space *mapping,
++				pgoff_t offset, gfp_t gfp_mask, bool tail);
+ extern void delete_from_page_cache(struct page *page);
+ extern void __delete_from_page_cache(struct page *page);
+ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
+Index: linux-3.2-ck1/mm/page-writeback.c
+===================================================================
+--- linux-3.2-ck1.orig/mm/page-writeback.c	2012-01-16 10:07:27.594097290 +1100
++++ linux-3.2-ck1/mm/page-writeback.c	2012-01-16 10:07:32.967096915 +1100
+@@ -59,7 +59,7 @@ static long ratelimit_pages = 32;
+ /*
+  * Start background writeback (via writeback threads) at this percentage
+  */
+-int dirty_background_ratio = 10;
++int dirty_background_ratio = 1;
+ 
+ /*
+  * dirty_background_bytes starts at 0 (disabled) so that it is a function of
+@@ -76,7 +76,7 @@ int vm_highmem_is_dirtyable;
+ /*
+  * The generator of dirty data starts writeback at this percentage
+  */
+-int vm_dirty_ratio = 20;
++int vm_dirty_ratio = 1;
+ 
+ /*
+  * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
+Index: linux-3.2-ck1/arch/x86/Kconfig
+===================================================================
+--- linux-3.2-ck1.orig/arch/x86/Kconfig	2012-01-16 10:07:27.563097292 +1100
++++ linux-3.2-ck1/arch/x86/Kconfig	2012-01-16 10:07:33.128096904 +1100
+@@ -1076,7 +1076,7 @@ endchoice
+ 
+ choice
+ 	depends on EXPERIMENTAL
+-	prompt "Memory split" if EXPERT
++	prompt "Memory split"
+ 	default VMSPLIT_3G
+ 	depends on X86_32
+ 	---help---
+@@ -1096,17 +1096,17 @@ choice
+ 	  option alone!
+ 
+ 	config VMSPLIT_3G
+-		bool "3G/1G user/kernel split"
++		bool "Default 896MB lowmem (3G/1G user/kernel split)"
+ 	config VMSPLIT_3G_OPT
+ 		depends on !X86_PAE
+-		bool "3G/1G user/kernel split (for full 1G low memory)"
++		bool "1GB lowmem (3G/1G user/kernel split)"
+ 	config VMSPLIT_2G
+-		bool "2G/2G user/kernel split"
++		bool "2GB lowmem (2G/2G user/kernel split)"
+ 	config VMSPLIT_2G_OPT
+ 		depends on !X86_PAE
+-		bool "2G/2G user/kernel split (for full 2G low memory)"
++		bool "2GB lowmem (2G/2G user/kernel split)"
+ 	config VMSPLIT_1G
+-		bool "1G/3G user/kernel split"
++		bool "3GB lowmem (1G/3G user/kernel split)"
+ endchoice
+ 
+ config PAGE_OFFSET
+Index: linux-3.2-ck1/kernel/Kconfig.hz
+===================================================================
+--- linux-3.2-ck1.orig/kernel/Kconfig.hz	2012-01-16 10:07:27.544097294 +1100
++++ linux-3.2-ck1/kernel/Kconfig.hz	2012-01-16 10:07:33.619096868 +1100
+@@ -4,7 +4,7 @@
+ 
+ choice
+ 	prompt "Timer frequency"
+-	default HZ_250
++	default HZ_1000
+ 	help
+ 	 Allows the configuration of the timer frequency. It is customary
+ 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
+@@ -23,13 +23,14 @@ choice
+ 	  with lots of processors that may show reduced performance if
+ 	  too many timer interrupts are occurring.
+ 
+-	config HZ_250
++	config HZ_250_NODEFAULT
+ 		bool "250 HZ"
+ 	help
+-	 250 Hz is a good compromise choice allowing server performance
+-	 while also showing good interactive responsiveness even
+-	 on SMP and NUMA systems. If you are going to be using NTSC video
+-	 or multimedia, selected 300Hz instead.
++	 250 HZ is a lousy compromise choice allowing server interactivity
++	 while also showing desktop throughput and no extra power saving on
++	 laptops. No good for anything.
++
++	 Recommend 100 or 1000 instead.
+ 
+ 	config HZ_300
+ 		bool "300 HZ"
+@@ -43,16 +44,82 @@ choice
+ 		bool "1000 HZ"
+ 	help
+ 	 1000 Hz is the preferred choice for desktop systems and other
+-	 systems requiring fast interactive responses to events.
++	 systems requiring fast interactive responses to events. Laptops
++	 can also benefit from this choice without sacrificing battery life
++	 if dynticks is also enabled.
++
++	config HZ_1500
++		bool "1500 HZ"
++	help
++	 1500 Hz is an insane value to use to run broken software that is Hz
++	 limited.
++
++	 Being over 1000, driver breakage is likely.
++
++	config HZ_2000
++		bool "2000 HZ"
++	help
++	 2000 Hz is an insane value to use to run broken software that is Hz
++	 limited.
++
++	 Being over 1000, driver breakage is likely.
++
++	config HZ_3000
++		bool "3000 HZ"
++	help
++	 3000 Hz is an insane value to use to run broken software that is Hz
++	 limited.
++
++	 Being over 1000, driver breakage is likely.
++
++	config HZ_4000
++		bool "4000 HZ"
++	help
++	 4000 Hz is an insane value to use to run broken software that is Hz
++	 limited.
++
++	 Being over 1000, driver breakage is likely.
++
++	config HZ_5000
++		bool "5000 HZ"
++	help
++	 5000 Hz is an obscene value to use to run broken software that is Hz
++	 limited.
++
++	 Being over 1000, driver breakage is likely.
++
++	config HZ_7500
++		bool "7500 HZ"
++	help
++	 7500 Hz is an obscene value to use to run broken software that is Hz
++	 limited.
++
++	 Being over 1000, driver breakage is likely.
++
++	config HZ_10000
++		bool "10000 HZ"
++	help
++	 10000 Hz is an obscene value to use to run broken software that is Hz
++	 limited.
++
++	 Being over 1000, driver breakage is likely.
++
+ 
+ endchoice
+ 
+ config HZ
+ 	int
+ 	default 100 if HZ_100
+-	default 250 if HZ_250
++	default 250 if HZ_250_NODEFAULT
+ 	default 300 if HZ_300
+ 	default 1000 if HZ_1000
++	default 1500 if HZ_1500
++	default 2000 if HZ_2000
++	default 3000 if HZ_3000
++	default 4000 if HZ_4000
++	default 5000 if HZ_5000
++	default 7500 if HZ_7500
++	default 10000 if HZ_10000
+ 
+ config SCHED_HRTICK
+ 	def_bool HIGH_RES_TIMERS && (!SMP || USE_GENERIC_SMP_HELPERS)
+Index: linux-3.2-ck1/arch/x86/kernel/cpu/proc.c
+===================================================================
+--- linux-3.2-ck1.orig/arch/x86/kernel/cpu/proc.c	2012-01-16 10:07:27.477097298 +1100
++++ linux-3.2-ck1/arch/x86/kernel/cpu/proc.c	2012-01-16 10:07:33.618096869 +1100
+@@ -111,7 +111,7 @@ static int show_cpuinfo(struct seq_file
+ 
+ 	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
+ 		   c->loops_per_jiffy/(500000/HZ),
+-		   (c->loops_per_jiffy/(5000/HZ)) % 100);
++		   (c->loops_per_jiffy * 10 /(50000/HZ)) % 100);
+ 
+ #ifdef CONFIG_X86_64
+ 	if (c->x86_tlbsize > 0)
+Index: linux-3.2-ck1/arch/x86/kernel/smpboot.c
+===================================================================
+--- linux-3.2-ck1.orig/arch/x86/kernel/smpboot.c	2012-01-16 10:07:27.477097298 +1100
++++ linux-3.2-ck1/arch/x86/kernel/smpboot.c	2012-01-16 10:07:33.619096868 +1100
+@@ -430,7 +430,7 @@ static void impress_friends(void)
+ 		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+ 		num_online_cpus(),
+ 		bogosum/(500000/HZ),
+-		(bogosum/(5000/HZ))%100);
++		(bogosum * 10/(50000/HZ))%100);
+ 
+ 	pr_debug("Before bogocount - setting activated=1.\n");
+ }
+Index: linux-3.2-ck1/include/linux/nfsd/stats.h
+===================================================================
+--- linux-3.2-ck1.orig/include/linux/nfsd/stats.h	2012-01-16 10:07:27.477097298 +1100
++++ linux-3.2-ck1/include/linux/nfsd/stats.h	2012-01-16 10:07:33.619096868 +1100
+@@ -11,8 +11,8 @@
+ 
+ #include <linux/nfs4.h>
+ 
+-/* thread usage wraps very million seconds (approx one fortnight) */
+-#define	NFSD_USAGE_WRAP	(HZ*1000000)
++/* thread usage wraps every one hundred thousand seconds (approx one day) */
++#define	NFSD_USAGE_WRAP	(HZ*100000)
+ 
+ #ifdef __KERNEL__
+ 
+Index: linux-3.2-ck1/include/net/inet_timewait_sock.h
+===================================================================
+--- linux-3.2-ck1.orig/include/net/inet_timewait_sock.h	2012-01-16 10:07:27.477097298 +1100
++++ linux-3.2-ck1/include/net/inet_timewait_sock.h	2012-01-16 10:07:33.619096868 +1100
+@@ -38,8 +38,8 @@ struct inet_hashinfo;
+  * If time > 4sec, it is "slow" path, no recycling is required,
+  * so that we select tick to get range about 4 seconds.
+  */
+-#if HZ <= 16 || HZ > 4096
+-# error Unsupported: HZ <= 16 or HZ > 4096
++#if HZ <= 16 || HZ > 16384
++# error Unsupported: HZ <= 16 or HZ > 16384
+ #elif HZ <= 32
+ # define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+ #elif HZ <= 64
+@@ -54,8 +54,12 @@ struct inet_hashinfo;
+ # define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+ #elif HZ <= 2048
+ # define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+-#else
++#elif HZ <= 4096
+ # define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
++#elif HZ <= 8192
++# define INET_TWDR_RECYCLE_TICK (13 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
++#else
++# define INET_TWDR_RECYCLE_TICK (14 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+ #endif
+ 
+ /* TIME_WAIT reaping mechanism. */
+Index: linux-3.2-ck1/init/calibrate.c
+===================================================================
+--- linux-3.2-ck1.orig/init/calibrate.c	2012-01-16 10:07:27.477097298 +1100
++++ linux-3.2-ck1/init/calibrate.c	2012-01-16 10:07:33.619096868 +1100
+@@ -278,7 +278,7 @@ void __cpuinit calibrate_delay(void)
+ 	if (!printed)
+ 		pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
+ 			lpj/(500000/HZ),
+-			(lpj/(5000/HZ)) % 100, lpj);
++			(lpj * 10 /(50000 / HZ)) % 100, lpj);
+ 
+ 	loops_per_jiffy = lpj;
+ 	printed = true;
+Index: linux-3.2-ck1/kernel/Kconfig.preempt
+===================================================================
+--- linux-3.2-ck1.orig/kernel/Kconfig.preempt	2012-01-16 10:07:27.461097300 +1100
++++ linux-3.2-ck1/kernel/Kconfig.preempt	2012-01-16 10:07:33.836096853 +1100
+@@ -1,7 +1,7 @@
+ 
+ choice
+ 	prompt "Preemption Model"
+-	default PREEMPT_NONE
++	default PREEMPT
+ 
+ config PREEMPT_NONE
+ 	bool "No Forced Preemption (Server)"
+@@ -17,7 +17,7 @@ config PREEMPT_NONE
+ 	  latencies.
+ 
+ config PREEMPT_VOLUNTARY
+-	bool "Voluntary Kernel Preemption (Desktop)"
++	bool "Voluntary Kernel Preemption (Nothing)"
+ 	help
+ 	  This option reduces the latency of the kernel by adding more
+ 	  "explicit preemption points" to the kernel code. These new
+@@ -31,7 +31,8 @@ config PREEMPT_VOLUNTARY
+ 	  applications to run more 'smoothly' even when the system is
+ 	  under load.
+ 
+-	  Select this if you are building a kernel for a desktop system.
++	  Select this for no system in particular (choose Preemptible
++	  instead on a desktop if you know what's good for you).
+ 
+ config PREEMPT
+ 	bool "Preemptible Kernel (Low-Latency Desktop)"
+Index: linux-3.2-ck1/Makefile
+===================================================================
+--- linux-3.2-ck1.orig/Makefile	2012-01-16 10:07:27.444097302 +1100
++++ linux-3.2-ck1/Makefile	2012-01-16 10:07:33.997096843 +1100
+@@ -10,6 +10,10 @@ NAME = Saber-toothed Squirrel
+ # Comments in this file are targeted only to the developer, do not
+ # expect to learn how to build the kernel reading this file.
+ 
++CKVERSION = -ck1
++CKNAME = BFS Powered
++EXTRAVERSION := $(EXTRAVERSION)$(CKVERSION)
++
+ # Do not:
+ # o  use make's built-in rules and variables
+ #    (this increases performance and avoids hard-to-debug behaviour);
diff --git a/3.2.34/3rd-3rdparty-1.0-tree.patch b/3.2.34/3rd-3rdparty-1.0-tree.patch
new file mode 100644
index 0000000..2a6ed72
--- /dev/null
+++ b/3.2.34/3rd-3rdparty-1.0-tree.patch
@@ -0,0 +1,181 @@
+
+ 3rdparty/mkbuild.pl        |   92 +++++++++++++++++++++++++++++++++++++++++++++
+ Documentation/3rdparty.txt |   76 +++++++++++++++++++++++++++++++++++++
+ 2 files changed, 168 insertions(+)
+
+diff -Nurp linux-2.6.37/3rdparty/mkbuild.pl 3rdparty/mkbuild.pl
+--- linux-2.6.37/3rdparty/mkbuild.pl	1970-01-01 02:00:00.000000000 +0200
++++ 3rdparty/mkbuild.pl	2004-04-23 14:59:03.000000000 +0300
+@@ -0,0 +1,92 @@
++#!/usr/bin/perl -w
++#
++# Version 1.0
++#
++# Copyright 2001 Jeff Garzik <jgarzik@mandrakesoft.com>
++# Copyright 2002 Juan Quintela <quintela@mandrakesoft.com>
++# Copyright 2003 Nicolas Planel <nplanel@mandrakesoft.com>
++#
++# This software may be used and distributed according to the terms
++# of the GNU General Public License, incorporated herein by reference.
++#
++#
++# Run "mkbuild.pl"
++#
++# This program generates the following files
++#	Makefile
++#	Makefile.drivers
++#	Config.in
++# using the information in the subdirs of this directory.
++#
++# subdirs need to have:
++# 	a Config.in file
++#	a Makefile with a O_TARGET/L_TARGET targets
++#	The config.in should set a CONFIG_<module_dir_name> to m/y.
++
++use strict;
++
++opendir(THISDIR, ".");
++# get dirs without . and .. garbage
++my (@modules) = grep(!/\.\.?$/,grep(-d, readdir(THISDIR)));
++closedir(THISDIR);
++
++generate_kconfig(@modules);
++generate_makefile(@modules);
++exit(0);
++
++##########################################################################
++
++sub generate_makefile {
++    my (@modules) = @_;
++
++    local *F;
++    open F, "> Makefile" or die "Cannot create new Makefile: $!\n";
++    print F <<'EOM';
++#
++# THIS IS AN AUTOMATICALLY GENERATED FILE.  DO NOT EDIT.
++#
++
++EOM
++    printf F "obj- := 3rdparty.o  # Dummy rule to force built-in.o to be made\n";
++    printf F "obj-\$(%s) += %s\n", to_CONFIG($_), $_ . '/' foreach @modules;
++}
++
++sub generate_kconfig {
++    my (@modules) = @_;
++
++    local *F;
++    open F, "> Kconfig" or die "Cannot create Kconfig: $!\n";
++    print F <<"EOM";
++#
++# THIS IS AN AUTOMATICALLY GENERATED FILE.  DO NOT EDIT.
++#
++
++menu "Unofficial 3rd party kernel additions"
++
++EOM
++	
++    foreach (@modules) {
++	die "No Kconfig in $_.\n" if ! -r "$_/Kconfig";
++	print F "source 3rdparty/$_/Kconfig\n";
++    }
++    print F "\n\nendmenu\n";
++}
++
++sub to_CONFIG {
++    local $_ = $_[0];
++    tr/a-z/A-Z/;
++    s/[\-\. ]/_/g;
++    "CONFIG_$_";
++}
++
++sub find_target {
++	my ($module_dir) = @_;
++
++	local *F;
++	open(F, "$module_dir/Makefile") or die "$module_dir/Makefile: $!\n";
++	while (<F>) {
++		chomp;
++		return $1 if (/[LO]_TARGET.*:=\s+(\S+)/);
++	}
++}
++
+diff -Nurp linux-2.6.37/Documentation/3rdparty.txt Documentation/3rdparty.txt
+--- linux-2.6.37/Documentation/3rdparty.txt	1970-01-01 02:00:00.000000000 +0200
++++ Documentation/3rdparty.txt	2003-11-22 01:07:26.000000000 +0200
+@@ -0,0 +1,76 @@
++
++Third-Party Kernel Source Module Support, or
++an easy way to add modules to your kernel build.
++
++
++
++Vendors quite often add additional drivers and features to the kernel
++which require nothing more than modifying Kconfig, Makefile, and
++adding one or more files to a sub-directory.  As a single discrete task,
++this is not a problem.  However, using patches to add modules to the
++kernel very often results in patch conflicts, resulting in needless time
++wastage as developers regenerate an otherwise working kernel patch.
++
++This is designed as a solution to these problems.  It is NOT designed as
++a replacement for the kernel build system, but merely as a tool for
++vendors and system administrators to ease the pain of patch management.
++
++The key feature of this system is the distinct lack of patches.  Drivers
++are installed via unpacking a tarball.
++
++
++
++Adding a directory to the build (usually from a tarball)
++--------------------------------------------------------
++If a directory exists inside the 3rdparty sub-directory that contains a
++proper Makefile, it can be added to the build.  It also needs a
++Kconfig file.
++
++	cd /usr/src/linux-2.4.3/3rdparty
++	bzcat /tmp/my-driver2.tar.bz2 | tar xf - # creates "my2" dir
++
++
++Limitations
++-----------
++There are some limitations to this system.  This system is only
++designed to support a very common case.  If you find yourself running
++into limitations (kernel build experts can spot them right off),
++then you should probably be patching the kernel instead of using
++mkbuild.pl for that particular module.
++
++FIXME: actually list the limitations
++
++
++
++Other notes
++-----------
++Link order is controlled by the order of mkbuild.pl executions.
++
++"make mrproper" will erase Makefile.meta, and empty Kconfig, Makefile,
++and Makefile.drivers.
++
++IMPORTANT NOTE: Because this feature modifies the kernel's makefiles and
++configuration system, you MUST complete all mkbuild.pl runs before
++running any "make" command.
++
++Building in the 3rdparty dir
++----------------------------
++
++If you use modules that:
++	- are contained in one subdir with the name of the module
++	- has a Makefile
++	- has a Kconfig file
++
++The system calls the ./mkbuild.pl script.  It will search for
++subdirectories, and will try to build each of them as a module.
++Things to note:
++
++       The dependencies will be done in a module called:
++
++       3rdparty/<module_dir_name>/<module_name>
++
++depending of CONFIG_<module_name_in_uppercase>.
++
++<module_name> is the value of O_TARGET/L_TARGET.
++
++
diff --git a/3.2.34/3rd-3rdparty-button_hotplug-0.4.1.patch b/3.2.34/3rd-3rdparty-button_hotplug-0.4.1.patch
new file mode 100644
index 0000000..a7b4a15
--- /dev/null
+++ b/3.2.34/3rd-3rdparty-button_hotplug-0.4.1.patch
@@ -0,0 +1,372 @@
+Submitted By: Mario Fetka (mario dot fetka at gmail dot com)
+Date: 2012-11-18
+Initial Package Version: 3.2.33
+Origin: openwtr.org packages/system/button-hotplug
+Upstream Status: unknown
+Description: create uevents from button usage
+
+diff -Naur linux-3.2.33-go.orig/3rdparty/button_hotplug/Kconfig 3rdparty/button_hotplug/Kconfig
+--- linux-3.2.33-go.orig/3rdparty/button_hotplug/Kconfig	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/button_hotplug/Kconfig	2012-11-18 14:45:26.000000000 +0000
+@@ -0,0 +1,2 @@
++config BUTTON_HOTPLUG
++	tristate "Button Hotplug driver"
+diff -Naur linux-3.2.33-go.orig/3rdparty/button_hotplug/Makefile 3rdparty/button_hotplug/Makefile
+--- linux-3.2.33-go.orig/3rdparty/button_hotplug/Makefile	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/button_hotplug/Makefile	2012-11-18 14:45:26.000000000 +0000
+@@ -0,0 +1 @@
++obj-${CONFIG_BUTTON_HOTPLUG}	+= button-hotplug.o
+\ No newline at end of file
+diff -Naur linux-3.2.33-go.orig/3rdparty/button_hotplug/button-hotplug.c 3rdparty/button_hotplug/button-hotplug.c
+--- linux-3.2.33-go.orig/3rdparty/button_hotplug/button-hotplug.c	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/button_hotplug/button-hotplug.c	2012-11-18 14:45:26.000000000 +0000
+@@ -0,0 +1,349 @@
++/*
++ *  Button Hotplug driver
++ *
++ *  Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
++ *
++ *  Based on the diag.c - GPIO interface driver for Broadcom boards
++ *    Copyright (C) 2006 Mike Baker <mbm@openwrt.org>,
++ *    Copyright (C) 2006-2007 Felix Fietkau <nbd@openwrt.org>
++ *    Copyright (C) 2008 Andy Boyett <agb@openwrt.org>
++ *
++ *  This program is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License version 2 as published
++ *  by the Free Software Foundation.
++ */
++
++#include <linux/module.h>
++#include <linux/version.h>
++#include <linux/kmod.h>
++#include <linux/input.h>
++
++#include <linux/workqueue.h>
++#include <linux/skbuff.h>
++#include <linux/netlink.h>
++#include <linux/kobject.h>
++
++#define DRV_NAME	"button-hotplug"
++#define DRV_VERSION	"0.4.1"
++#define DRV_DESC	"Button Hotplug driver"
++
++#define BH_SKB_SIZE	2048
++
++#define PFX	DRV_NAME ": "
++
++#undef BH_DEBUG
++
++#ifdef BH_DEBUG
++#define BH_DBG(fmt, args...) printk(KERN_DEBUG "%s: " fmt, DRV_NAME, ##args )
++#else
++#define BH_DBG(fmt, args...) do {} while (0)
++#endif
++
++#define BH_ERR(fmt, args...) printk(KERN_ERR "%s: " fmt, DRV_NAME, ##args )
++
++#ifndef BIT_MASK
++#define BIT_MASK(nr)            (1UL << ((nr) % BITS_PER_LONG))
++#endif
++
++struct bh_priv {
++	unsigned long		*seen;
++	struct input_handle	handle;
++};
++
++struct bh_event {
++	const char		*name;
++	char			*action;
++	unsigned long		seen;
++
++	struct sk_buff		*skb;
++	struct work_struct	work;
++};
++
++struct bh_map {
++	unsigned int	code;
++	const char	*name;
++};
++
++extern u64 uevent_next_seqnum(void);
++
++#define BH_MAP(_code, _name)		\
++	{				\
++		.code = (_code),	\
++		.name = (_name),	\
++	}
++
++static struct bh_map button_map[] = {
++	BH_MAP(BTN_0,		"BTN_0"),
++	BH_MAP(BTN_1,		"BTN_1"),
++	BH_MAP(BTN_2,		"BTN_2"),
++	BH_MAP(BTN_3,		"BTN_3"),
++	BH_MAP(BTN_4,		"BTN_4"),
++	BH_MAP(BTN_5,		"BTN_5"),
++	BH_MAP(BTN_6,		"BTN_6"),
++	BH_MAP(BTN_7,		"BTN_7"),
++	BH_MAP(BTN_8,		"BTN_8"),
++	BH_MAP(BTN_9,		"BTN_9"),
++	BH_MAP(KEY_RESTART,	"reset"),
++#ifdef KEY_WPS_BUTTON
++	BH_MAP(KEY_WPS_BUTTON,	"wps"),
++#endif /* KEY_WPS_BUTTON */
++};
++
++/* -------------------------------------------------------------------------*/
++
++static int bh_event_add_var(struct bh_event *event, int argv,
++		const char *format, ...)
++{
++	static char buf[128];
++	char *s;
++	va_list args;
++	int len;
++
++	if (argv)
++		return 0;
++
++	va_start(args, format);
++	len = vsnprintf(buf, sizeof(buf), format, args);
++	va_end(args);
++
++	if (len >= sizeof(buf)) {
++		BH_ERR("buffer size too small\n");
++		WARN_ON(1);
++		return -ENOMEM;
++	}
++
++	s = skb_put(event->skb, len + 1);
++	strcpy(s, buf);
++
++	BH_DBG("added variable '%s'\n", s);
++
++	return 0;
++}
++
++static int button_hotplug_fill_event(struct bh_event *event)
++{
++	int ret;
++
++	ret = bh_event_add_var(event, 0, "HOME=%s", "/");
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "PATH=%s",
++					"/sbin:/bin:/usr/sbin:/usr/bin");
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "SUBSYSTEM=%s", "button");
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "ACTION=%s", event->action);
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "BUTTON=%s", event->name);
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "SEEN=%ld", event->seen);
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "SEQNUM=%llu", uevent_next_seqnum());
++
++	return ret;
++}
++
++static void button_hotplug_work(struct work_struct *work)
++{
++	struct bh_event *event = container_of(work, struct bh_event, work);
++	int ret = 0;
++
++	event->skb = alloc_skb(BH_SKB_SIZE, GFP_KERNEL);
++	if (!event->skb)
++		goto out_free_event;
++
++	ret = bh_event_add_var(event, 0, "%s@", event->action);
++	if (ret)
++		goto out_free_skb;
++
++	ret = button_hotplug_fill_event(event);
++	if (ret)
++		goto out_free_skb;
++
++	NETLINK_CB(event->skb).dst_group = 1;
++	broadcast_uevent(event->skb, 0, 1, GFP_KERNEL);
++
++ out_free_skb:
++	if (ret) {
++		BH_ERR("work error %d\n", ret);
++		kfree_skb(event->skb);
++	}
++ out_free_event:
++	kfree(event);
++}
++
++static int button_hotplug_create_event(const char *name, unsigned long seen,
++		int pressed)
++{
++	struct bh_event *event;
++
++	BH_DBG("create event, name=%s, seen=%lu, pressed=%d\n",
++		name, seen, pressed);
++
++	event = kzalloc(sizeof(*event), GFP_KERNEL);
++	if (!event)
++		return -ENOMEM;
++
++	event->name = name;
++	event->seen = seen;
++	event->action = pressed ? "pressed" : "released";
++
++	INIT_WORK(&event->work, (void *)(void *)button_hotplug_work);
++	schedule_work(&event->work);
++
++	return 0;
++}
++
++/* -------------------------------------------------------------------------*/
++
++#ifdef	CONFIG_HOTPLUG
++static int button_get_index(unsigned int code)
++{
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(button_map); i++)
++		if (button_map[i].code == code)
++			return i;
++
++	return -1;
++}
++static void button_hotplug_event(struct input_handle *handle,
++			   unsigned int type, unsigned int code, int value)
++{
++	struct bh_priv *priv = handle->private;
++	unsigned long seen = jiffies;
++	int btn;
++
++	BH_DBG("event type=%u, code=%u, value=%d\n", type, code, value);
++
++	if (type != EV_KEY)
++		return;
++
++	btn = button_get_index(code);
++	if (btn < 0)
++		return;
++
++	button_hotplug_create_event(button_map[btn].name,
++			(seen - priv->seen[btn]) / HZ, value);
++	priv->seen[btn] = seen;
++}
++#else
++static void button_hotplug_event(struct input_handle *handle,
++			   unsigned int type, unsigned int code, int value)
++{
++}
++#endif	/* CONFIG_HOTPLUG */
++
++static int button_hotplug_connect(struct input_handler *handler,
++		struct input_dev *dev, const struct input_device_id *id)
++{
++	struct bh_priv *priv;
++	int ret;
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(button_map); i++)
++		if (test_bit(button_map[i].code, dev->keybit))
++			break;
++
++	if (i == ARRAY_SIZE(button_map))
++		return -ENODEV;
++
++	priv = kzalloc(sizeof(*priv) +
++		       (sizeof(unsigned long) * ARRAY_SIZE(button_map)),
++		       GFP_KERNEL);
++	if (!priv)
++		return -ENOMEM;
++
++	priv->seen = (unsigned long *) &priv[1];
++	priv->handle.private = priv;
++	priv->handle.dev = dev;
++	priv->handle.handler = handler;
++	priv->handle.name = DRV_NAME;
++
++	ret = input_register_handle(&priv->handle);
++	if (ret)
++		goto err_free_priv;
++
++	ret = input_open_device(&priv->handle);
++	if (ret)
++		goto err_unregister_handle;
++
++	BH_DBG("connected to %s\n", dev->name);
++
++	return 0;
++
++ err_unregister_handle:
++	input_unregister_handle(&priv->handle);
++
++ err_free_priv:
++	kfree(priv);
++	return ret;
++}
++
++static void button_hotplug_disconnect(struct input_handle *handle)
++{
++	struct bh_priv *priv = handle->private;
++
++	input_close_device(handle);
++	input_unregister_handle(handle);
++
++	kfree(priv);
++}
++
++static const struct input_device_id button_hotplug_ids[] = {
++	{
++                .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
++                .evbit = { BIT_MASK(EV_KEY) },
++        },
++	{
++		/* Terminating entry */
++	},
++};
++
++MODULE_DEVICE_TABLE(input, button_hotplug_ids);
++
++static struct input_handler button_hotplug_handler = {
++	.event =	button_hotplug_event,
++	.connect =	button_hotplug_connect,
++	.disconnect =	button_hotplug_disconnect,
++	.name =		DRV_NAME,
++	.id_table =	button_hotplug_ids,
++};
++
++/* -------------------------------------------------------------------------*/
++
++static int __init button_hotplug_init(void)
++{
++	int ret;
++
++	printk(KERN_INFO DRV_DESC " version " DRV_VERSION "\n");
++	ret = input_register_handler(&button_hotplug_handler);
++	if (ret)
++		BH_ERR("unable to register input handler\n");
++
++	return ret;
++}
++module_init(button_hotplug_init);
++
++static void __exit button_hotplug_exit(void)
++{
++	input_unregister_handler(&button_hotplug_handler);
++}
++module_exit(button_hotplug_exit);
++
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++MODULE_AUTHOR("Gabor Juhos <juhosg@openwrt.org>");
++MODULE_LICENSE("GPL v2");
++
diff --git a/3.2.34/3rd-3rdparty-gpio_button_hotplug-0.1.patch b/3.2.34/3rd-3rdparty-gpio_button_hotplug-0.1.patch
new file mode 100644
index 0000000..6b2e78e
--- /dev/null
+++ b/3.2.34/3rd-3rdparty-gpio_button_hotplug-0.1.patch
@@ -0,0 +1,472 @@
+Submitted By: Mario Fetka (mario dot fetka at gmail dot com)
+Date: 2012-11-18
+Initial Package Version: 3.2.33
+Origin: openwtr.org packages/system/gpio-button-hotplug
+Upstream Status: unknown
+Description: gpio button uevent
+ 
+diff -Naur linux-3.2.33-go.orig/3rdparty/gpio_button_hotplug/Kconfig 3rdparty/gpio_button_hotplug/Kconfig
+--- linux-3.2.33-go.orig/3rdparty/gpio_button_hotplug/Kconfig	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/gpio_button_hotplug/Kconfig	2012-11-18 18:41:43.048939468 +0000
+@@ -0,0 +1,2 @@
++config GPIO_BUTTON_HOTPLUG
++	tristate "GPIO Button Hotplug driver"
+diff -Naur linux-3.2.33-go.orig/3rdparty/gpio_button_hotplug/Makefile 3rdparty/gpio_button_hotplug/Makefile
+--- linux-3.2.33-go.orig/3rdparty/gpio_button_hotplug/Makefile	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/gpio_button_hotplug/Makefile	2012-11-18 14:45:26.000000000 +0000
+@@ -0,0 +1 @@
++obj-${CONFIG_GPIO_BUTTON_HOTPLUG} += gpio-button-hotplug.o
+diff -Naur linux-3.2.33-go.orig/3rdparty/gpio_button_hotplug/gpio-button-hotplug.c 3rdparty/gpio_button_hotplug/gpio-button-hotplug.c
+--- linux-3.2.33-go.orig/3rdparty/gpio_button_hotplug/gpio-button-hotplug.c	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/gpio_button_hotplug/gpio-button-hotplug.c	2012-11-18 14:45:26.000000000 +0000
+@@ -0,0 +1,450 @@
++/*
++ *  GPIO Button Hotplug driver
++ *
++ *  Copyright (C) 2012 Felix Fietkau <nbd@openwrt.org>
++ *  Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
++ *
++ *  Based on the diag.c - GPIO interface driver for Broadcom boards
++ *    Copyright (C) 2006 Mike Baker <mbm@openwrt.org>,
++ *    Copyright (C) 2006-2007 Felix Fietkau <nbd@openwrt.org>
++ *    Copyright (C) 2008 Andy Boyett <agb@openwrt.org>
++ *
++ *  This program is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License version 2 as published
++ *  by the Free Software Foundation.
++ */
++
++#include <linux/module.h>
++#include <linux/version.h>
++#include <linux/kmod.h>
++
++#include <linux/workqueue.h>
++#include <linux/skbuff.h>
++#include <linux/netlink.h>
++#include <linux/kobject.h>
++#include <linux/input.h>
++#include <linux/platform_device.h>
++#include <linux/gpio.h>
++#include <linux/gpio_keys.h>
++
++#define DRV_NAME	"gpio-keys-polled"
++
++#define BH_SKB_SIZE	2048
++
++#define PFX	DRV_NAME ": "
++
++#undef BH_DEBUG
++
++#ifdef BH_DEBUG
++#define BH_DBG(fmt, args...) printk(KERN_DEBUG "%s: " fmt, DRV_NAME, ##args )
++#else
++#define BH_DBG(fmt, args...) do {} while (0)
++#endif
++
++#define BH_ERR(fmt, args...) printk(KERN_ERR "%s: " fmt, DRV_NAME, ##args )
++
++struct bh_priv {
++	unsigned long		seen;
++};
++
++struct bh_event {
++	const char		*name;
++	char			*action;
++	unsigned long		seen;
++
++	struct sk_buff		*skb;
++	struct work_struct	work;
++};
++
++struct bh_map {
++	unsigned int	code;
++	const char	*name;
++};
++
++struct gpio_keys_button_data {
++	struct delayed_work work;
++	struct bh_priv bh;
++	int last_state;
++	int count;
++	int threshold;
++	int can_sleep;
++};
++
++extern u64 uevent_next_seqnum(void);
++
++#define BH_MAP(_code, _name)		\
++	{				\
++		.code = (_code),	\
++		.name = (_name),	\
++	}
++
++static struct bh_map button_map[] = {
++	BH_MAP(BTN_0,		"BTN_0"),
++	BH_MAP(BTN_1,		"BTN_1"),
++	BH_MAP(BTN_2,		"BTN_2"),
++	BH_MAP(BTN_3,		"BTN_3"),
++	BH_MAP(BTN_4,		"BTN_4"),
++	BH_MAP(BTN_5,		"BTN_5"),
++	BH_MAP(BTN_6,		"BTN_6"),
++	BH_MAP(BTN_7,		"BTN_7"),
++	BH_MAP(BTN_8,		"BTN_8"),
++	BH_MAP(BTN_9,		"BTN_9"),
++	BH_MAP(KEY_RESTART,	"reset"),
++#ifdef KEY_WPS_BUTTON
++	BH_MAP(KEY_WPS_BUTTON,	"wps"),
++#endif /* KEY_WPS_BUTTON */
++};
++
++/* -------------------------------------------------------------------------*/
++
++static int bh_event_add_var(struct bh_event *event, int argv,
++		const char *format, ...)
++{
++	static char buf[128];
++	char *s;
++	va_list args;
++	int len;
++
++	if (argv)
++		return 0;
++
++	va_start(args, format);
++	len = vsnprintf(buf, sizeof(buf), format, args);
++	va_end(args);
++
++	if (len >= sizeof(buf)) {
++		BH_ERR("buffer size too small\n");
++		WARN_ON(1);
++		return -ENOMEM;
++	}
++
++	s = skb_put(event->skb, len + 1);
++	strcpy(s, buf);
++
++	BH_DBG("added variable '%s'\n", s);
++
++	return 0;
++}
++
++static int button_hotplug_fill_event(struct bh_event *event)
++{
++	int ret;
++
++	ret = bh_event_add_var(event, 0, "HOME=%s", "/");
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "PATH=%s",
++					"/sbin:/bin:/usr/sbin:/usr/bin");
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "SUBSYSTEM=%s", "button");
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "ACTION=%s", event->action);
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "BUTTON=%s", event->name);
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "SEEN=%ld", event->seen);
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "SEQNUM=%llu", uevent_next_seqnum());
++
++	return ret;
++}
++
++static void button_hotplug_work(struct work_struct *work)
++{
++	struct bh_event *event = container_of(work, struct bh_event, work);
++	int ret = 0;
++
++	event->skb = alloc_skb(BH_SKB_SIZE, GFP_KERNEL);
++	if (!event->skb)
++		goto out_free_event;
++
++	ret = bh_event_add_var(event, 0, "%s@", event->action);
++	if (ret)
++		goto out_free_skb;
++
++	ret = button_hotplug_fill_event(event);
++	if (ret)
++		goto out_free_skb;
++
++	NETLINK_CB(event->skb).dst_group = 1;
++	broadcast_uevent(event->skb, 0, 1, GFP_KERNEL);
++
++ out_free_skb:
++	if (ret) {
++		BH_ERR("work error %d\n", ret);
++		kfree_skb(event->skb);
++	}
++ out_free_event:
++	kfree(event);
++}
++
++static int button_hotplug_create_event(const char *name, unsigned long seen,
++		int pressed)
++{
++	struct bh_event *event;
++
++	BH_DBG("create event, name=%s, seen=%lu, pressed=%d\n",
++		name, seen, pressed);
++
++	event = kzalloc(sizeof(*event), GFP_KERNEL);
++	if (!event)
++		return -ENOMEM;
++
++	event->name = name;
++	event->seen = seen;
++	event->action = pressed ? "pressed" : "released";
++
++	INIT_WORK(&event->work, (void *)(void *)button_hotplug_work);
++	schedule_work(&event->work);
++
++	return 0;
++}
++
++/* -------------------------------------------------------------------------*/
++
++#ifdef	CONFIG_HOTPLUG
++static int button_get_index(unsigned int code)
++{
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(button_map); i++)
++		if (button_map[i].code == code)
++			return i;
++
++	return -1;
++}
++static void button_hotplug_event(struct gpio_keys_button_data *data,
++			   unsigned int type, unsigned int code, int value)
++{
++	struct bh_priv *priv = &data->bh;
++	unsigned long seen = jiffies;
++	int btn;
++
++	BH_DBG("event type=%u, code=%u, value=%d\n", type, code, value);
++
++	if (type != EV_KEY)
++		return;
++
++	btn = button_get_index(code);
++	if (btn < 0)
++		return;
++
++	button_hotplug_create_event(button_map[btn].name,
++			(seen - priv->seen) / HZ, value);
++	priv->seen = seen;
++}
++#else
++static void button_hotplug_event(struct gpio_keys_button_data *data,
++			   unsigned int type, unsigned int code, int value)
++{
++}
++#endif	/* CONFIG_HOTPLUG */
++
++struct gpio_keys_polled_dev {
++	struct delayed_work work;
++
++	struct device *dev;
++	struct gpio_keys_platform_data *pdata;
++	struct gpio_keys_button_data data[0];
++};
++
++static void gpio_keys_polled_check_state(struct gpio_keys_button *button,
++					 struct gpio_keys_button_data *bdata)
++{
++	int state;
++
++	if (bdata->can_sleep)
++		state = !!gpio_get_value_cansleep(button->gpio);
++	else
++		state = !!gpio_get_value(button->gpio);
++
++	state = !!(state ^ button->active_low);
++	if (state != bdata->last_state) {
++		unsigned int type = button->type ?: EV_KEY;
++
++		button_hotplug_event(bdata, type, button->code, state);
++		bdata->count = 0;
++		bdata->last_state = state;
++	}
++}
++
++static void gpio_keys_polled_queue_work(struct gpio_keys_polled_dev *bdev)
++{
++	struct gpio_keys_platform_data *pdata = bdev->pdata;
++	unsigned long delay = msecs_to_jiffies(pdata->poll_interval);
++
++	if (delay >= HZ)
++	    delay = round_jiffies_relative(delay);
++	schedule_delayed_work(&bdev->work, delay);
++}
++
++static void gpio_keys_polled_poll(struct work_struct *work)
++{
++	struct gpio_keys_polled_dev *bdev =
++		container_of(work, struct gpio_keys_polled_dev, work.work);
++	struct gpio_keys_platform_data *pdata = bdev->pdata;
++	int i;
++
++	for (i = 0; i < bdev->pdata->nbuttons; i++) {
++		struct gpio_keys_button_data *bdata = &bdev->data[i];
++
++		if (bdata->count < bdata->threshold)
++			bdata->count++;
++		else
++			gpio_keys_polled_check_state(&pdata->buttons[i], bdata);
++	}
++	gpio_keys_polled_queue_work(bdev);
++}
++
++static void __devinit gpio_keys_polled_open(struct gpio_keys_polled_dev *bdev)
++{
++	struct gpio_keys_platform_data *pdata = bdev->pdata;
++	int i;
++
++	if (pdata->enable)
++		pdata->enable(bdev->dev);
++
++	/* report initial state of the buttons */
++	for (i = 0; i < pdata->nbuttons; i++)
++		gpio_keys_polled_check_state(&pdata->buttons[i], &bdev->data[i]);
++
++	gpio_keys_polled_queue_work(bdev);
++}
++
++static void __devexit gpio_keys_polled_close(struct gpio_keys_polled_dev *bdev)
++{
++	struct gpio_keys_platform_data *pdata = bdev->pdata;
++
++	cancel_delayed_work_sync(&bdev->work);
++
++	if (pdata->disable)
++		pdata->disable(bdev->dev);
++}
++
++static int __devinit gpio_keys_polled_probe(struct platform_device *pdev)
++{
++	struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
++	struct device *dev = &pdev->dev;
++	struct gpio_keys_polled_dev *bdev;
++	int error;
++	int i;
++
++	if (!pdata || !pdata->poll_interval)
++		return -EINVAL;
++
++	bdev = kzalloc(sizeof(struct gpio_keys_polled_dev) +
++		       pdata->nbuttons * sizeof(struct gpio_keys_button_data),
++		       GFP_KERNEL);
++	if (!bdev) {
++		dev_err(dev, "no memory for private data\n");
++		return -ENOMEM;
++	}
++
++	for (i = 0; i < pdata->nbuttons; i++) {
++		struct gpio_keys_button *button = &pdata->buttons[i];
++		struct gpio_keys_button_data *bdata = &bdev->data[i];
++		unsigned int gpio = button->gpio;
++
++		if (button->wakeup) {
++			dev_err(dev, DRV_NAME " does not support wakeup\n");
++			error = -EINVAL;
++			goto err_free_gpio;
++		}
++
++		error = gpio_request(gpio,
++				     button->desc ? button->desc : DRV_NAME);
++		if (error) {
++			dev_err(dev, "unable to claim gpio %u, err=%d\n",
++				gpio, error);
++			goto err_free_gpio;
++		}
++
++		error = gpio_direction_input(gpio);
++		if (error) {
++			dev_err(dev,
++				"unable to set direction on gpio %u, err=%d\n",
++				gpio, error);
++			goto err_free_gpio;
++		}
++
++		bdata->can_sleep = gpio_cansleep(gpio);
++		bdata->last_state = 0;
++		bdata->threshold = DIV_ROUND_UP(button->debounce_interval,
++						pdata->poll_interval);
++	}
++
++	bdev->dev = &pdev->dev;
++	bdev->pdata = pdata;
++	platform_set_drvdata(pdev, bdev);
++
++	INIT_DELAYED_WORK(&bdev->work, gpio_keys_polled_poll);
++
++	gpio_keys_polled_open(bdev);
++
++	return 0;
++
++err_free_gpio:
++	while (--i >= 0)
++		gpio_free(pdata->buttons[i].gpio);
++
++	kfree(bdev);
++	platform_set_drvdata(pdev, NULL);
++
++	return error;
++}
++
++static int __devexit gpio_keys_polled_remove(struct platform_device *pdev)
++{
++	struct gpio_keys_polled_dev *bdev = platform_get_drvdata(pdev);
++	struct gpio_keys_platform_data *pdata = bdev->pdata;
++	int i = pdata->nbuttons;
++
++	gpio_keys_polled_close(bdev);
++
++	while (--i >= 0)
++		gpio_free(pdata->buttons[i].gpio);
++
++	kfree(bdev);
++	platform_set_drvdata(pdev, NULL);
++
++	return 0;
++}
++
++static struct platform_driver gpio_keys_polled_driver = {
++	.probe	= gpio_keys_polled_probe,
++	.remove	= __devexit_p(gpio_keys_polled_remove),
++	.driver	= {
++		.name	= DRV_NAME,
++		.owner	= THIS_MODULE,
++	},
++};
++
++static int __init gpio_keys_polled_init(void)
++{
++	return platform_driver_register(&gpio_keys_polled_driver);
++}
++
++static void __exit gpio_keys_polled_exit(void)
++{
++	platform_driver_unregister(&gpio_keys_polled_driver);
++}
++
++module_init(gpio_keys_polled_init);
++module_exit(gpio_keys_polled_exit);
++
++MODULE_AUTHOR("Gabor Juhos <juhosg@openwrt.org>");
++MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>");
++MODULE_DESCRIPTION("Polled GPIO Buttons hotplug driver");
++MODULE_LICENSE("GPL v2");
++MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/3.2.34/3rd-3rdparty-gpio_event_drv-0.1.patch b/3.2.34/3rd-3rdparty-gpio_event_drv-0.1.patch
new file mode 100644
index 0000000..3a75e28
--- /dev/null
+++ b/3.2.34/3rd-3rdparty-gpio_event_drv-0.1.patch
@@ -0,0 +1,1354 @@
+Submitted By: Mario Fetka (mario dot fetka at gmail dot com)
+Date: 2012-11-18
+Initial Package Version: 3.2.33
+Origin: http://wiki.gumstix.org/index.php?title=GPIO_Event_Driver
+Upstream Status: unknown
+Description: The gpio-event driver consists of a loadable kernel module, 
+which registers an interrupt handler, along with an example user-mode program, 
+which allows the settings to be manipulated and changes to be reported.
+ 
+diff -Naur linux-3.2.33-go.orig/3rdparty/gpio_event_drv/Kconfig 3rdparty/gpio_event_drv/Kconfig
+--- linux-3.2.33-go.orig/3rdparty/gpio_event_drv/Kconfig	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/gpio_event_drv/Kconfig	2012-11-18 19:03:08.020733547 +0000
+@@ -0,0 +1,2 @@
++config GPIO_EVENT_DRV
++	tristate "GPIO Event Driver (requires userspace app)"
+diff -Naur linux-3.2.33-go.orig/3rdparty/gpio_event_drv/Makefile 3rdparty/gpio_event_drv/Makefile
+--- linux-3.2.33-go.orig/3rdparty/gpio_event_drv/Makefile	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/gpio_event_drv/Makefile	2012-11-18 19:02:20.409297191 +0000
+@@ -0,0 +1 @@
++obj-${CONFIG_GPIO_EVENT_DRV}	+= gpio-event-drv.o
+\ No newline at end of file
+diff -Naur linux-3.2.33-go.orig/3rdparty/gpio_event_drv/gpio-event-drv.c 3rdparty/gpio_event_drv/gpio-event-drv.c
+--- linux-3.2.33-go.orig/3rdparty/gpio_event_drv/gpio-event-drv.c	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/gpio_event_drv/gpio-event-drv.c	2012-11-18 10:24:14.000000000 +0000
+@@ -0,0 +1,1210 @@
++/****************************************************************************
++*
++*   Copyright (c) 2006 Dave Hylands     <dhylands@gmail.com>
++*
++*   This program is free software; you can redistribute it and/or modify
++*   it under the terms of the GNU General Public License version 2 as
++*   published by the Free Software Foundation.
++*
++*   Alternatively, this software may be distributed under the terms of BSD
++*   license.
++*
++*   See README and COPYING for more details.
++*
++****************************************************************************
++*
++*   This driver allows multiple GPIO pins to be monitored and allows a user
++*   mode program to be notified when the pin changes.
++*
++****************************************************************************/
++
++/* ---- Include Files ---------------------------------------------------- */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/spinlock.h>
++#include <linux/proc_fs.h>
++#include <linux/sysctl.h>
++#include <linux/poll.h>
++#include <linux/interrupt.h>
++#include <linux/device.h>
++#include <linux/cdev.h>
++#include <linux/seq_file.h>
++#include <linux/spinlock.h>
++#include <linux/version.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/version.h>
++
++#include <asm/uaccess.h>
++#include <asm/ioctls.h>
++
++#include <linux/gpio.h>
++
++#include "gpio-event-drv.h"
++
++/* ---- Public Variables ------------------------------------------------- */
++/* ---- Private Constants and Types -------------------------------------- */
++
++#define GPIO_EVENT_DEV_NAME "gpio-event"
++
++#define DEBUG_ENABLED   1
++
++#if DEBUG_ENABLED
++#   define DEBUG( flag, fmt, args... ) do { if ( gDebug ## flag ) printk( "%s: " fmt, __FUNCTION__ , ## args ); } while (0)
++#else
++#   define DEBUG( flag, fmt, args... )
++#endif  
++
++/* ---- Private Variables ------------------------------------------------ */
++
++static char gBanner[] __initdata = KERN_INFO "GPIO Event Monitor 0.1 Compiled: " __DATE__ " at " __TIME__ "\n";
++
++static  int         gDebugTrace = 0;
++static  int         gDebugIoctl = 0;
++static  int         gDebugError = 1;
++static  int         gLostEvents = 0;
++
++static  struct ctl_table_header    *gSysCtlHeader;
++
++#if ( LINUX_VERSION_CODE >= KERNEL_VERSION( 2, 6, 33 ))
++#define CTL_NAME(x)
++#else
++#define CTL_NAME(x)     .ctl_name = x,
++#endif
++
++static struct ctl_table gSysCtlSample[] =
++{
++    {
++        CTL_NAME(1)
++        .procname       = "lost-events",
++        .data           = &gLostEvents,
++        .maxlen         = sizeof( int ),
++        .mode           = 0644,
++        .proc_handler   = &proc_dointvec
++    },
++    {
++        CTL_NAME(101)
++        .procname       = "debug-trace",
++        .data           = &gDebugTrace,
++        .maxlen         = sizeof( int ),
++        .mode           = 0644,
++        .proc_handler   = &proc_dointvec
++    },
++    {
++        CTL_NAME(102)
++        .procname       = "debug-ioctl",
++        .data           = &gDebugIoctl,
++        .maxlen         = sizeof( int ),
++        .mode           = 0644,
++        .proc_handler   = &proc_dointvec
++    },
++    {
++        CTL_NAME(103)
++        .procname       = "debug-error",
++        .data           = &gDebugError,
++        .maxlen         = sizeof( int ),
++        .mode           = 0644,
++        .proc_handler   = &proc_dointvec
++    },
++    { 0 }
++};
++
++static struct ctl_table gSysCtl[] =
++{
++    {
++        CTL_NAME(CTL_GPIO_EVENT)
++        .procname       = "gpio-event", 
++        .mode           = 0555, 
++        .child          = gSysCtlSample
++    },
++    { 0 }
++};
++
++/*
++ * An instance of GPIO_FileData_t is maintained for file open
++ */
++
++#define GPIO_EVENT_QUEUE_LEN    20
++
++// GPIO_EVENT_BUFFER_SIZE needs to be big enough to hold the ASCII version
++// of the GPIO_Event_t as well as the binary version of the GPIO_Event_t
++
++#define GPIO_EVENT_BUFFER_SIZE  32
++
++typedef struct
++{
++    struct list_head        list;
++    wait_queue_head_t       waitQueue;
++
++    spinlock_t              queueLock;
++    GPIO_Event_t            queueData[ GPIO_EVENT_QUEUE_LEN ];
++    volatile int            getIndex;
++    volatile int            putIndex;
++    volatile int            numEvents;
++
++    GPIO_EventReadMode_t    readMode;
++
++    char                    buffer[ GPIO_EVENT_BUFFER_SIZE ];
++    int                     bufBytes;
++
++} GPIO_FileData_t;
++
++/*
++ * An instance of GPIO_PinData_t is maintained for each GPIO line which is 
++ * monitored, 
++ */ 
++
++typedef enum
++{
++    PIN_LOW             = 0,    // Matches level of GPIO line
++    PIN_HIGH            = 1,
++    PIN_BOUNCING_LOW,
++    PIN_BOUNCING_HIGH,
++} PinState_t;
++
++typedef struct
++{
++    struct  list_head       list;               // list of all pins
++
++    int                     gpio;               // The gpio line being monitored
++
++    // We maintain two lists, a global list of pins, and a list associated with each open
++
++
++    struct  timer_list      debounceTimer;      // Timer to wake u up after an edge
++    uint8_t                 debounceMilliSec;   // debounce time in milliseconds
++    char                    devName[ 16 ];      // gpio xx event
++
++    GPIO_EventEdgeType_t    edgeType;   // Type of edge(s) we're looking for.
++
++    PinState_t              pinState;          // Was the GPIO line low or high?
++
++} GPIO_PinData_t;
++
++static  volatile    int     gReportLostEvents = 1;
++
++static  struct class       *gGpioEventClass = NULL;
++static  struct  cdev        gGpioEventCDev;
++static  dev_t               gGpioEventDevNum = 0;
++
++static  DEFINE_SPINLOCK( gFileListLock );
++static  DEFINE_SPINLOCK( gPinListLock );
++
++static  LIST_HEAD( gFileList );
++static  LIST_HEAD( gPinList );
++
++static struct proc_dir_entry *gProcGpioEvent;
++static struct proc_dir_entry *gProcPins;
++
++
++/* ---- Private Function Prototypes -------------------------------------- */
++/* ---- Functions -------------------------------------------------------- */
++
++typedef struct
++{
++    unsigned    long    flags;
++    struct list_head   *list;
++
++} pin_seq_t;
++
++/****************************************************************************
++*
++*  pin_seq_start
++*
++*   seq_file iterator which goes through the pins being monitored
++*
++****************************************************************************/
++
++static void *pin_seq_start( struct seq_file *s, loff_t *pos )
++{
++    pin_seq_t  *ps;
++    loff_t      i;
++
++    s->private = NULL;
++
++    if (( ps = kcalloc( 1, sizeof( pin_seq_t ), GFP_KERNEL )) == NULL )
++    {
++        return ERR_PTR( -ENOMEM );
++    }
++    s->private = ps;
++
++    spin_lock_irqsave( &gPinListLock, ps->flags );
++
++    if ( list_empty( &gPinList ))
++    {
++        DEBUG( Trace, "list_empty\n" );
++        return NULL;
++    }
++    ps->list = gPinList.next;
++
++    for ( i = 0; i < *pos; i++ ) 
++    {
++        if ( list_is_last( ps->list, &gPinList ))
++        {
++            DEBUG( Trace, "No item @ %llu\n", i + 1 );
++            return NULL;
++        }
++        ps->list = ps->list->next;
++    }
++
++
++    DEBUG( Trace, "ps->list = 0x%08lx, *pos = %llu\n", (long)ps->list, *pos );
++
++    return ps->list;
++
++} // pin_seq_start
++
++/****************************************************************************
++*
++*  pin_seq_show
++*
++*   seq_file iterator which goes through the pins being monitored
++*
++****************************************************************************/
++
++static int pin_seq_show( struct seq_file *s, void *v )
++{
++    GPIO_PinData_t *pin = list_entry( v, GPIO_PinData_t, list );
++    char           *edgeTypeStr;
++
++    DEBUG( Trace, "v = 0x%08lx\n", (long)v );
++
++    switch ( pin->edgeType )
++    {
++        case GPIO_EventRisingEdge:  edgeTypeStr = "Rising ";    break;
++        case GPIO_EventFallingEdge: edgeTypeStr = "Falling";    break;
++        case GPIO_EventBothEdges:   edgeTypeStr = "Both   ";    break;
++        default:                    edgeTypeStr = "Unknown";    break;
++    }
++
++    seq_printf( s, "GPIO: %3d Edge: %s Debounce: %d msec\n", pin->gpio, edgeTypeStr, pin->debounceMilliSec );
++
++    return 0;
++
++} // pin_seq_show
++	
++/****************************************************************************
++*
++*  pin_seq_next
++*
++*   seq_file iterator which goes through the pins being monitored
++*
++****************************************************************************/
++
++static void *pin_seq_next( struct seq_file *s, void *v, loff_t *pos )
++{
++    pin_seq_t  *ps = s->private;
++
++    DEBUG( Trace, "v = 0x%08lx *pos = %llu\n", (long)v, *pos );
++
++    if ( list_is_last( ps->list, &gPinList ))
++    {
++        DEBUG( Trace, "ps->list = 0x%08lx (end of list)\n", (long)ps->list );
++
++        return NULL;
++    }
++    (*pos)++;
++    ps->list = ps->list->next;
++
++    DEBUG( Trace, "ps->list = 0x%08lx\n", (long)ps->list );
++
++    return ps->list;
++
++} // pin_seq_next
++
++/****************************************************************************
++*
++*  pin_seq_stop
++*
++*   seq_file iterator which goes through the pins being monitored
++*
++****************************************************************************/
++
++static void pin_seq_stop( struct seq_file *s, void *v )
++{
++    pin_seq_t  *ps = s->private;
++
++    DEBUG( Trace, "v = 0x%08lx\n", (long)v );
++
++    if ( ps != NULL )
++    {
++        spin_unlock_irqrestore( &gPinListLock, ps->flags );
++        kfree( ps );
++    }
++
++} // pin_seq_stop
++
++/****************************************************************************
++*
++*  pin_seq_ops
++*
++*   Ties all of the pin_seq_xxx routines together.
++*
++****************************************************************************/
++
++static struct seq_operations pin_seq_ops = 
++{
++	.start = pin_seq_start,
++	.next  = pin_seq_next,
++	.stop  = pin_seq_stop,
++	.show  = pin_seq_show
++};
++
++/****************************************************************************
++*
++*  pins_proc_open
++*
++*   Open method for /proc/gpio-event/pin
++*
++****************************************************************************/
++
++static int pins_proc_open( struct inode *inode, struct file *file )
++{
++    DEBUG( Trace, "called\n" );
++
++    return seq_open( file, &pin_seq_ops );
++}
++
++/****************************************************************************
++*
++*  pin_proc_ops
++*
++*   File operations for our /proc/gpio-event/pins file
++*
++****************************************************************************/
++
++static struct file_operations pins_proc_ops = 
++{
++	.owner   = THIS_MODULE,
++	.open    = pins_proc_open,
++	.read    = seq_read,
++	.llseek  = seq_lseek,
++	.release = seq_release
++};
++	
++
++
++/****************************************************************************
++*
++*  find_pin
++*
++*   Searches the list to see if 'gpio' is currently being monitored.
++*
++****************************************************************************/
++
++static GPIO_PinData_t *find_pin( int gpio )
++{
++    struct  list_head   *pin;
++
++    assert_spin_locked( &gPinListLock );
++
++    list_for_each( pin, &gPinList )
++    {
++        GPIO_PinData_t *pinData = list_entry( pin, GPIO_PinData_t, list );
++
++        if ( pinData->gpio == gpio )
++        {
++            return pinData;
++        }
++    }
++
++    return NULL;
++
++} // find_pin
++
++/****************************************************************************
++*
++*  gpio_event_queue_event
++*
++*   Queues an sample event from the bottom half to the top half. This
++*   function queues up the event on every file that's open.
++*
++****************************************************************************/
++
++static void gpio_event_queue_event( const GPIO_Event_t *gpioEvent )
++{
++    unsigned long       flags;
++    struct list_head   *file;
++
++    DEBUG( Trace, "gpio %d:%c@%ld.%06ld\n", 
++           gpioEvent->gpio,
++           gpioEvent->edgeType == GPIO_EventRisingEdge ? 'R' : 'F',
++           gpioEvent->time.tv_sec, 
++           gpioEvent->time.tv_usec );
++
++    // Queue up the event on all of the open files
++    //
++    // This function is only called from the ISR, with interrupts already
++    // disabled.
++
++    spin_lock_irqsave( &gFileListLock, flags );
++
++    list_for_each( file, &gFileList )
++    {
++        GPIO_FileData_t *fileData = list_entry( file, GPIO_FileData_t, list );
++
++        spin_lock( &fileData->queueLock );
++        {
++            if ( fileData->numEvents >= GPIO_EVENT_QUEUE_LEN )
++            {
++                // Queue is full - Only report first event lost
++    
++                if ( gReportLostEvents )
++                {
++                    printk( KERN_ERR "GPIO Event: event lost due to queue full\n" );
++                    gReportLostEvents = 0;
++                }
++                gLostEvents++;
++            }
++            else
++            {
++                fileData->queueData[ fileData->putIndex++ ] = *gpioEvent;
++                if ( fileData->putIndex >= GPIO_EVENT_QUEUE_LEN )
++                {
++                    fileData->putIndex = 0;
++                }
++                fileData->numEvents++;
++            }
++        }
++        spin_unlock( &fileData->queueLock );
++
++        wake_up_interruptible( &fileData->waitQueue );
++    }
++    spin_unlock_irqrestore( &gFileListLock, flags );
++
++} // gpio_event_queue_event
++
++/****************************************************************************
++*
++*  gpio_event_dequeue_event
++*
++*   Removes an event from the queue
++*
++****************************************************************************/
++
++static int gpio_event_dequeue_event( GPIO_FileData_t *fileData, GPIO_Event_t *gpioEvent )
++{
++    unsigned long   flags;
++    int             eventAvailable = 0;
++
++    spin_lock_irqsave( &fileData->queueLock, flags );
++    {
++        if ( fileData->numEvents > 0 )
++        {
++            *gpioEvent = fileData->queueData[ fileData->getIndex++ ];
++            if ( fileData->getIndex >= GPIO_EVENT_QUEUE_LEN )
++            {
++                fileData->getIndex = 0;
++            }
++            fileData->numEvents--;
++
++            eventAvailable = 1;
++
++            if ( fileData->numEvents == 0 )
++            {
++                // Since somebody is reading the queue now, indicate that we
++                // can report lost events again
++
++                gReportLostEvents = 1;
++            }
++        }
++    }
++    spin_unlock_irqrestore( &fileData->queueLock, flags );
++
++    DEBUG( Trace, "gpio %d:%c@%ld.%06ld\n", 
++           gpioEvent->gpio,
++           gpioEvent->edgeType == GPIO_EventRisingEdge ? 'R' : 'F',
++           gpioEvent->time.tv_sec, 
++           gpioEvent->time.tv_usec );
++
++    return eventAvailable;
++
++} // gpio_event_dequeue_event
++
++/****************************************************************************
++*
++*  gpio_event_irq
++*
++****************************************************************************/
++
++static irqreturn_t gpio_event_irq( int irq, void *dev_id )
++{
++    GPIO_PinData_t         *pinData = (GPIO_PinData_t *)dev_id;
++    GPIO_Event_t            gpioEvent;
++    int                     currLevel = gpio_get_value( pinData->gpio );
++
++    // We're called with interrupts disabled.
++
++    (void)irq;
++
++    do_gettimeofday( &gpioEvent.time );
++    gpioEvent.gpio = pinData->gpio;
++
++    if ( pinData->debounceMilliSec == 0 )
++    {
++        // We assume that this is a clean signal
++
++        pinData->pinState = (PinState_t)currLevel;
++
++        if ( pinData->edgeType == GPIO_EventBothEdges )
++        {
++            // There's no register to tell which edge just occurred. So we
++            // assume that it just changed into its current level.
++
++            if ( currLevel )
++            {
++                // Pin is currently high, so this must be a rising edge
++
++                gpioEvent.edgeType = GPIO_EventRisingEdge;
++            }
++            else
++            {
++                // Pin is currently low, so this must be a falling edge
++
++                gpioEvent.edgeType = GPIO_EventFallingEdge;
++            }
++        }
++        else
++        {
++            // If we're only monitoring one type of edge, then that's the one
++            // that happened.
++
++            gpioEvent.edgeType = pinData->edgeType;
++        }
++        gpio_event_queue_event( &gpioEvent );
++    }
++    else
++    {
++        gpioEvent.edgeType = 0;
++
++        // If we need to debounce, then we need to monitor both edges, and
++        // use the debounce timer to figure out the real state. So we don't
++        // actually know which edge we just got. We use a state machine
++        // to track things.
++
++        switch ( pinData->pinState )
++        {
++            case PIN_LOW:
++            {
++                pinData->pinState = PIN_BOUNCING_HIGH;
++                gpioEvent.edgeType = GPIO_EventRisingEdge;
++                break;
++            }
++
++            case PIN_HIGH:
++            {
++                pinData->pinState = PIN_BOUNCING_LOW;
++                gpioEvent.edgeType = GPIO_EventFallingEdge;
++                break;
++            }
++
++            default:
++            {
++                break;
++            }
++        }
++
++        if (( pinData->edgeType & gpioEvent.edgeType ) != 0 )
++        {
++            // This is an edge that the user is interested in - send it along.
++
++            gpio_event_queue_event( &gpioEvent );
++        }
++
++        // Disable interrupts for our gpio to allow debounce to occur. The 
++        // timer will re-enable the interrupt.
++
++        disable_irq_nosync( irq );
++
++        // Since we have no idea when in the current jiffy that the edge
++        // occurred, we add 1 to the calculation to guarantee at least one
++        // whole jiffy.
++
++        mod_timer( &pinData->debounceTimer, jiffies + msecs_to_jiffies( pinData->debounceMilliSec ) + 1 );
++    }
++
++    return IRQ_HANDLED;
++
++} // gpio_event_irq
++
++/****************************************************************************
++*
++*  gpio_event_timer
++*
++****************************************************************************/
++
++void gpio_event_timer( unsigned long data )
++{
++    GPIO_PinData_t         *pinData = (GPIO_PinData_t *)data;
++
++    // This function is called when the debounce timer for a gpio expires.
++    // We record the state of the pin so that we can figure out what the 
++    // next edge will be.
++
++    pinData->pinState = ( gpio_get_value( pinData->gpio ) != 0 );
++
++    // Turn interrupts back on so we can catch the next edge
++
++    enable_irq( gpio_to_irq( pinData->gpio ));
++
++} // gpio_event_timer
++
++/****************************************************************************
++*
++*  gpio_event_monitor
++*
++****************************************************************************/
++
++static int gpio_event_monitor( GPIO_EventMonitor_t *monitor )
++{
++    int             rc = 0;
++    unsigned long   flags;
++    GPIO_PinData_t *pinData;
++    unsigned long   irqFlags;
++
++    spin_lock_irqsave( &gPinListLock, flags );
++
++    if ( monitor->onOff )
++    {
++        // Check to make sure we aren't already monitoring the gpio
++
++        if (( pinData = find_pin( monitor->gpio )) != NULL )
++        {
++            // We are already monitoring the pin. Unmonitor the pin and then
++            // proceed.
++
++            monitor->onOff = 0;
++
++            spin_unlock_irqrestore( &gPinListLock, flags );
++            gpio_event_monitor( monitor );
++            spin_lock_irqsave( &gPinListLock, flags );
++        }
++
++        if (( pinData = kcalloc( 1, sizeof( *pinData ), GFP_KERNEL )) == NULL )
++        {
++            DEBUG( Error, "GPIO %d: Out of memory\n", monitor->gpio );
++            rc = -ENOMEM;
++            goto out;
++        }
++
++        INIT_LIST_HEAD( &pinData->list );
++
++        snprintf( pinData->devName, sizeof( pinData->devName ), "gpio %d event", monitor->gpio );
++
++        // Note:
++        //     Calling request_irq will automatically set the pin to be an input.
++
++        irqFlags = 0;
++
++        if ( monitor->debounceMilliSec == 0 )
++        {
++            // A clean signal is being presented, so we can just look for
++            // a particular edge
++
++            if (( monitor->edgeType & GPIO_EventRisingEdge ) != 0 )
++            {
++                irqFlags |= IRQF_TRIGGER_RISING;
++            }
++            if (( monitor->edgeType & GPIO_EventFallingEdge ) != 0 )
++            {
++                irqFlags |= IRQF_TRIGGER_FALLING;
++            }
++        }
++        else
++        {
++            // Since we need to debounce, we need to look for both types of
++            // edges, since we get both types of edges whenever a bounce
++            // happens.
++
++            irqFlags |= IRQF_TRIGGER_RISING;
++            irqFlags |= IRQF_TRIGGER_FALLING;
++        }
++
++        if (( rc = request_irq( gpio_to_irq( monitor->gpio ), gpio_event_irq, irqFlags, pinData->devName, pinData )) != 0 )
++        {
++            DEBUG( Error, "Unable to register irq for GPIO %d\n", monitor->gpio );
++            kfree( pinData );
++            goto out;
++        }
++
++        pinData->gpio             = monitor->gpio;
++        pinData->edgeType         = monitor->edgeType;
++        pinData->debounceMilliSec = monitor->debounceMilliSec;
++
++        init_timer( &pinData->debounceTimer );
++
++        pinData->debounceTimer.data = (unsigned long)pinData;
++        pinData->debounceTimer.function = gpio_event_timer;
++
++        list_add_tail( &pinData->list, &gPinList );
++
++        if ( gpio_get_value( pinData->gpio ) == 0 )
++        {
++            pinData->pinState = PIN_LOW;
++        }
++        else
++        {
++            pinData->pinState = PIN_HIGH;
++        }
++    }
++    else
++    {
++        if (( pinData = find_pin( monitor->gpio )) == NULL )
++        {
++            DEBUG( Error, "GPIO %d isn't being monitored\n", monitor->gpio );
++            rc = -ENXIO;
++            goto out;
++        }
++
++        // We've found the gpio being monitored - turn things off.
++
++        free_irq( gpio_to_irq( pinData->gpio ), pinData );
++
++        del_timer_sync( &pinData->debounceTimer );
++        list_del( &pinData->list );
++
++        kfree( pinData );
++    }
++
++out:
++
++    spin_unlock_irqrestore( &gPinListLock, flags );
++
++    return rc;
++
++} // gpio_event_monitor
++
++/****************************************************************************
++*
++*   gpio_event_ioctl
++*
++*   Called to process ioctl requests
++*
++*****************************************************************************/
++
++long gpio_event_ioctl( struct file *file, unsigned int cmd, unsigned long arg )
++{
++    GPIO_FileData_t    *fileData;
++
++    DEBUG( Trace, "type: '%c' cmd: 0x%x\n", _IOC_TYPE( cmd ), _IOC_NR( cmd ));
++
++    fileData = file->private_data;
++
++    switch ( cmd )
++    {
++        case GPIO_EVENT_IOCTL_MONITOR_GPIO:
++        {
++            GPIO_EventMonitor_t monitor;
++
++            if ( copy_from_user( &monitor, (void *)arg, sizeof( monitor )) != 0 )
++            {
++                return -EFAULT;
++            }
++            return gpio_event_monitor( &monitor );
++        }
++
++        case GPIO_EVENT_IOCTL_SET_READ_MODE:
++        {
++            fileData->readMode = (GPIO_EventReadMode_t)arg;
++            break;
++        }
++
++        case TCGETS:
++        {
++            // When cat opens this device, we get this ioctl
++            return -ENOTTY;
++        }
++
++        default:
++        {
++            DEBUG( Error, "Unrecognized ioctl: '0x%x'\n", cmd );
++            return -ENOTTY;
++        }
++    }
++
++    return 0;
++
++} // gpio_event_ioctl
++
++/****************************************************************************
++*
++*  gpio_event_open
++*
++****************************************************************************/
++
++static int gpio_event_open( struct inode *inode, struct file *file )
++{
++    unsigned long       flags;
++    GPIO_FileData_t    *fileData;
++
++    DEBUG( Trace, "gpio_event_open called, major = %d, minor = %d\n", MAJOR( inode->i_rdev ),  MINOR( inode->i_rdev ));
++
++    // Allocate a per-open data structure
++
++    if (( fileData = kcalloc( 1, sizeof( *fileData ), GFP_KERNEL )) == NULL )
++    {
++        return -ENOMEM;
++    }
++
++    INIT_LIST_HEAD( &fileData->list );
++
++    init_waitqueue_head( &fileData->waitQueue );
++
++    spin_lock_init( &fileData->queueLock );
++
++    fileData->getIndex = 0;
++    fileData->putIndex = 0;
++    fileData->numEvents = 0;
++    fileData->bufBytes = 0;
++
++    fileData->readMode = GPIO_EventReadModeAscii;
++
++    file->private_data = fileData;
++
++    spin_lock_irqsave( &gFileListLock, flags );
++    {
++        list_add_tail( &fileData->list, &gFileList );
++    }
++    spin_unlock_irqrestore( &gFileListLock, flags );
++    
++    return 0;
++
++} // gpio_event_open
++
++/****************************************************************************
++*
++*  gpio_event_read
++*
++****************************************************************************/
++
++static ssize_t gpio_event_read( struct file *file, char *buffer, size_t spaceRemaining, loff_t *ppos )
++{
++    int                 rc;
++    ssize_t             bytesCopied = 0;
++    ssize_t             bytesToCopy;
++    GPIO_FileData_t    *fileData = file->private_data;
++
++    DEBUG( Trace, "gpio_event_read called, major = %d, minor = %d\n", MAJOR( file->f_dentry->d_inode->i_rdev ), MINOR( file->f_dentry->d_inode->i_rdev ));
++
++    if ( spaceRemaining == 0 )
++    {
++        return 0;
++    }
++
++    // First of all, return any unread data from the previous call
++
++    if ( fileData->bufBytes > 0 )
++    {
++        if ( spaceRemaining < fileData->bufBytes )
++        {
++            bytesCopied = spaceRemaining;
++        }
++        else
++        {
++            bytesCopied = fileData->bufBytes;
++        }
++
++        if ( copy_to_user( &buffer[0], &fileData->buffer[0], bytesCopied ) != 0 )
++        {
++            return -EFAULT;
++        }
++        if ( fileData->bufBytes > bytesCopied )
++        {
++            memmove( &fileData->buffer[ 0 ], &fileData->buffer[ bytesCopied ], fileData->bufBytes - bytesCopied );
++        }
++        fileData->bufBytes -= bytesCopied;
++
++        if ( fileData->bufBytes > 0 )
++        {
++            // We copied some data, but not all of it. Return early.
++
++            return bytesCopied;
++        }
++    }
++
++    do
++    {
++        if ((( file->f_flags & O_NONBLOCK ) != 0 ) && ( fileData->numEvents == 0 ))
++        {
++            // File was opened non-blocking and no more data is available
++            // We don't want to wait for an event, so exit from the loop
++
++            break;
++        }
++
++        rc = wait_event_interruptible( fileData->waitQueue, ( fileData->numEvents > 0 ));
++        if ( rc != 0 )
++        {
++            return rc;
++        }
++
++        if ( fileData->readMode == GPIO_EventReadModeBinary )
++        {
++            gpio_event_dequeue_event( fileData, (GPIO_Event_t *)&fileData->buffer[0] );
++
++            fileData->bufBytes = sizeof( GPIO_Event_t );
++            
++        }
++        else
++        {
++            GPIO_Event_t    gpioEvent;
++
++            gpio_event_dequeue_event( fileData, &gpioEvent );
++
++            // ASCII Mode output:
++            //
++            // nn E tttttttt.tttttt
++            //
++            // Where nn is the base-10 GPIO number
++            //       E is R or F (for rising or falling edge)
++            //       tttttttt.tttttt is the timestamp with microsecond resolution
++
++            fileData->bufBytes = snprintf( fileData->buffer, sizeof( fileData->buffer ), 
++                                           "%2d %c %ld.%06ld\n", 
++                                           gpioEvent.gpio,
++                                           (( gpioEvent.edgeType == GPIO_EventRisingEdge ) ? 'R' : 'F' ),
++                                           gpioEvent.time.tv_sec,
++                                           gpioEvent.time.tv_usec );
++        }
++
++        if ( spaceRemaining >= fileData->bufBytes )
++        {
++            bytesToCopy = fileData->bufBytes;
++        }
++        else
++        {
++            bytesToCopy = spaceRemaining;
++        }
++
++        if ( copy_to_user( &buffer[ bytesCopied ], &fileData->buffer[0], bytesToCopy ) != 0 )
++        {
++            return -EFAULT;
++        }
++        spaceRemaining -= bytesToCopy;
++        bytesCopied += bytesToCopy;
++        fileData->bufBytes -= bytesToCopy;
++
++        if ( fileData->bufBytes > 0 )
++        {
++            // We couldn't copy all of the data out of the buffer. Move the
++            // remaining data to the beginning of the buffer and exit.
++
++            memmove( &fileData->buffer[ 0 ], &fileData->buffer[ bytesToCopy ], fileData->bufBytes );
++            return bytesCopied;
++        }
++    } while (( fileData->numEvents > 0 ) && ( spaceRemaining > 0 ));
++
++    if ((( file->f_flags & O_NONBLOCK ) != 0 ) && ( bytesCopied == 0 ))
++    {
++        // File was opened non-blocking and we didn't copy any data.
++
++        return -EAGAIN;
++    }
++
++    return bytesCopied;
++
++} // gpio_event_read
++
++/****************************************************************************
++*
++*  gpio_event_poll - used by select & poll
++*
++****************************************************************************/
++
++static unsigned int gpio_event_poll(struct file *file, poll_table *wait)
++{
++    unsigned long       flags;
++    GPIO_FileData_t    *fileData = file->private_data;
++    unsigned int        mask = 0;
++
++    poll_wait( file, &fileData->waitQueue, wait );
++
++    spin_lock_irqsave( &fileData->queueLock, flags );
++    {
++        if (( fileData->bufBytes > 0 ) || ( fileData->numEvents > 0 ))
++        {
++            mask |= POLLIN | POLLRDNORM;    // readable
++        }
++    }
++    spin_unlock_irqrestore( &fileData->queueLock, flags );
++
++    return mask;
++
++} // gpio_event_poll
++
++/****************************************************************************
++*
++*  gpio_event_release
++*
++****************************************************************************/
++
++static int gpio_event_release( struct inode *inode, struct file *file )
++{
++    unsigned long       flags;
++    GPIO_FileData_t    *fileData = file->private_data;
++
++    DEBUG( Trace, "gpio_event_release called\n" );
++
++    spin_lock_irqsave( &gFileListLock, flags );
++    {
++        list_del( &fileData->list );
++    }
++    spin_unlock_irqrestore( &gFileListLock, flags );
++
++    kfree( fileData );
++
++    return 0;
++
++} // gpio_event_release
++
++/****************************************************************************
++*
++*   File Operations (these are the device driver entry points)
++*
++****************************************************************************/
++
++struct file_operations gpio_event_fops =
++{
++    owner:      	THIS_MODULE,
++    unlocked_ioctl:     gpio_event_ioctl,
++    open:       	gpio_event_open,
++    poll:       	gpio_event_poll,
++    release:    	gpio_event_release,
++    read:       	gpio_event_read,
++};
++
++/****************************************************************************
++*
++*  gpio_event_init
++*
++*     Called to perform module initialization when the module is loaded
++*
++****************************************************************************/
++
++static int __init gpio_event_init( void )
++{
++    int rc;
++
++    DEBUG( Trace, "called\n" );
++
++    printk( gBanner );
++
++    // Get a major number
++
++    if (( rc = alloc_chrdev_region( &gGpioEventDevNum, 0, 1, GPIO_EVENT_DEV_NAME )) < 0 )
++    {
++        printk( KERN_WARNING "sample: Unable to allocate major, err: %d\n", rc );
++        return rc;
++    }
++    DEBUG( Trace, "allocated major:%d minor:%d\n", MAJOR( gGpioEventDevNum ), MINOR( gGpioEventDevNum ));
++
++    // Register our proc entries.
++
++    gProcGpioEvent = create_proc_entry( "gpio-event", S_IFDIR | S_IRUGO | S_IXUGO, NULL );
++    if ( gProcGpioEvent == NULL )
++    {
++        return -ENOMEM;
++    }
++    gProcPins = create_proc_entry( "pins", 0444, gProcGpioEvent );
++    if ( gProcPins != NULL )
++    {
++        gProcPins->proc_fops = &pins_proc_ops;
++    }
++
++#if ( LINUX_VERSION_CODE <= KERNEL_VERSION( 2, 6, 20 ))
++    gSysCtlHeader = register_sysctl_table( gSysCtl, 0 );
++    if ( gSysCtlHeader != NULL )
++    {
++        gSysCtlHeader->ctl_table->child->de->owner = THIS_MODULE;
++    }
++#else
++    gSysCtlHeader = register_sysctl_table( gSysCtl );
++#endif
++
++    // Register our device. The device becomes "active" as soon as cdev_add 
++    // is called.
++
++    cdev_init( &gGpioEventCDev, &gpio_event_fops );
++    gGpioEventCDev.owner = THIS_MODULE;
++
++    if (( rc = cdev_add( &gGpioEventCDev, gGpioEventDevNum, 1 )) != 0 )
++    {
++        printk( KERN_WARNING "sample: cdev_add failed: %d\n", rc );
++        return rc;
++    }
++
++    // Create a class, so that udev will make the /dev entry
++
++    gGpioEventClass = class_create( THIS_MODULE, GPIO_EVENT_DEV_NAME );
++    if ( IS_ERR( gGpioEventClass ))
++    {
++        printk( KERN_WARNING "sample: Unable to create class\n" );
++        return -1;
++    }
++
++    device_create( gGpioEventClass, NULL, gGpioEventDevNum, NULL, GPIO_EVENT_DEV_NAME );
++
++    return 0;
++
++} // gpio_event_init
++
++/****************************************************************************
++*
++*  gpio_event_exit
++*
++*       Called to perform module cleanup when the module is unloaded.
++*
++****************************************************************************/
++
++static void __exit gpio_event_exit( void )
++{
++    struct  list_head  *next;
++    struct  list_head  *pin;
++    GPIO_EventMonitor_t monitor;
++
++    DEBUG( Trace, "called\n" );
++
++    // If there are any pins which are currently being monitored, then we 
++    // need to unmonitor them.
++
++    memset( &monitor, 0, sizeof( monitor ));
++
++    list_for_each_safe( pin, next, &gPinList )
++    {
++        GPIO_PinData_t *pinData = list_entry( pin, GPIO_PinData_t, list );
++
++        monitor.gpio = pinData->gpio;
++
++        gpio_event_monitor( &monitor );
++    }
++
++    // Deregister our driver
++
++    device_destroy( gGpioEventClass, gGpioEventDevNum );
++    class_destroy( gGpioEventClass );
++
++    cdev_del( &gGpioEventCDev );
++
++    if ( gSysCtlHeader != NULL )
++    {
++        unregister_sysctl_table( gSysCtlHeader );
++    }
++    remove_proc_entry( "pins", gProcGpioEvent );
++    remove_proc_entry( "gpio-event", NULL );    
++                                        
++    unregister_chrdev_region( gGpioEventDevNum, 1 );
++
++} // gpio_event_exit
++
++/****************************************************************************/
++
++module_init(gpio_event_init);
++module_exit(gpio_event_exit);
++
++MODULE_AUTHOR("Dave Hylands");
++MODULE_DESCRIPTION("GPIO Event Driver");
++MODULE_LICENSE("Dual BSD/GPL");
++
+diff -Naur linux-3.2.33-go.orig/3rdparty/gpio_event_drv/gpio-event-drv.h 3rdparty/gpio_event_drv/gpio-event-drv.h
+--- linux-3.2.33-go.orig/3rdparty/gpio_event_drv/gpio-event-drv.h	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/gpio_event_drv/gpio-event-drv.h	2012-11-18 10:24:14.000000000 +0000
+@@ -0,0 +1,115 @@
++/****************************************************************************
++*
++*   Copyright (c) 2006 Dave Hylands     <dhylands@gmail.com>
++*
++*   This program is free software; you can redistribute it and/or modify
++*   it under the terms of the GNU General Public License version 2 as
++*   published by the Free Software Foundation.
++*
++*   Alternatively, this software may be distributed under the terms of BSD
++*   license.
++*
++*   See README and COPYING for more details.
++*
++****************************************************************************
++*
++*   This driver allows multiple GPIO pins to be monitored and allows a user
++*   mode program to be notified when the pin changes.
++*
++****************************************************************************/
++
++#if !defined( GPIO_EVENT_DRV_H )
++#define GPIO_EVENT_DRV_H
++
++/* ---- Include Files ----------------------------------------------------- */
++
++#if defined( __KERNEL__ )
++#   include <linux/types.h>
++#   include <linux/time.h>
++#   include <linux/ioctl.h>
++#else
++#   include <stdint.h>
++#   include <sys/time.h>
++#   include <sys/ioctl.h>
++#endif
++
++
++/* ---- Constants and Types ----------------------------------------------- */
++
++// The ioctl "magic" is just some character value which is used to help
++// detect when incorrect ioctl values are sent down to a driver.
++
++#define GPIO_EVENT_IOCTL_MAGIC  'G'
++
++/**
++ *  Deefines for each of the ioctl commands. Note that since we want to reduce
++ *  the possibility that a user mode program gets out of sync with a given 
++ *  driver, we explicitly assign a value to each enumeration. This makes
++ *  it more difficult to stick new ioctl's in the middle of the list.
++ */
++
++typedef enum
++{
++    GPIO_EVENT_CMD_FIRST                = 0x80,
++
++    GPIO_EVENT_CMD_MONITOR_GPIO         = 0x80,
++    GPIO_EVENT_CMD_SET_READ_MODE        = 0x81,
++
++    /* Insert new ioctls here                                               */
++
++    GPIO_EVENT_CMD_LAST,
++
++} GPIO_EVENT_CMD;
++
++typedef enum
++{
++    GPIO_EventRisingEdge     = 0x01,
++    GPIO_EventFallingEdge    = 0x02,
++    GPIO_EventBothEdges      = GPIO_EventRisingEdge | GPIO_EventFallingEdge,
++
++} GPIO_EventEdgeType_t;
++
++typedef struct
++{
++    uint8_t                 gpio;               // gpio to monitor
++    uint8_t                 onOff;              // 0 = stop monitoring, 1 = start monitoring
++    GPIO_EventEdgeType_t    edgeType;           // Monitor rising/falling/both edges?
++    uint8_t                 debounceMilliSec;   // debounce time in milliseconds
++
++} GPIO_EventMonitor_t;
++
++typedef enum
++{
++    GPIO_EventReadModeAscii     = 0x00,     // Reads return ASCII data (default)
++    GPIO_EventReadModeBinary    = 0x01,     // Reads return Binary data
++
++} GPIO_EventReadMode_t;
++
++/*
++ * Definitions for the actual ioctl commands
++ */
++
++#define GPIO_EVENT_IOCTL_MONITOR_GPIO   _IOW( GPIO_EVENT_IOCTL_MAGIC, GPIO_EVENT_CMD_MONITOR_GPIO,  GPIO_EventMonitor_t ) // arg is GPIO_EventMonitor *
++#define GPIO_EVENT_IOCTL_SET_READ_MODE  _IO(  GPIO_EVENT_IOCTL_MAGIC, GPIO_EVENT_CMD_SET_READ_MODE )                      // arg is int
++
++/*
++ * Definitions for sysctl. The top level define has to be unique system wide.
++ * The kernel defines values 1 thru about 10 (see include/linunx/sysctl.h)
++ */
++
++#define CTL_GPIO_EVENT          0x47504576    // 'GPEv' in hex form
++
++/*
++ * Reads return GPIO_Event_t structures
++ */
++
++typedef struct
++{
++    uint8_t                 gpio;       // GPIO that this event is for
++    GPIO_EventEdgeType_t    edgeType;   // Type of edge detected     
++    struct timeval          time;       // Time the event occurred
++
++} GPIO_Event_t;
++
++#endif  // GPIO_EVENT_DRV_H
++
diff --git a/3.2.34/3rd-3rdparty-merge.patch b/3.2.34/3rd-3rdparty-merge.patch
new file mode 100644
index 0000000..dff4679
--- /dev/null
+++ b/3.2.34/3rd-3rdparty-merge.patch
@@ -0,0 +1,156 @@
+diff -uNr linux-3.2.33-go.orig/arch/alpha/Kconfig linux-3.2.33-go/arch/alpha/Kconfig
+--- linux-3.2.33-go.orig/arch/alpha/Kconfig	2012-11-15 22:08:02.768806792 +0100
++++ linux-3.2.33-go/arch/alpha/Kconfig	2012-11-15 22:08:29.937483632 +0100
+@@ -673,3 +673,4 @@
+ 
+ source "lib/Kconfig"
+ 
++source "3rdparty/Kconfig"
+diff -uNr linux-3.2.33-go.orig/arch/arm/Kconfig linux-3.2.33-go/arch/arm/Kconfig
+--- linux-3.2.33-go.orig/arch/arm/Kconfig	2012-11-15 22:07:59.952839378 +0100
++++ linux-3.2.33-go/arch/arm/Kconfig	2012-11-15 22:14:01.950566716 +0100
+@@ -2259,3 +2259,5 @@
+ source "crypto/Kconfig"
+ 
+ source "lib/Kconfig"
++
++source "3rdparty/Kconfig"
+diff -uNr linux-3.2.33-go.orig/arch/ia64/Kconfig linux-3.2.33-go/arch/ia64/Kconfig
+--- linux-3.2.33-go.orig/arch/ia64/Kconfig	2012-11-15 22:08:00.893828523 +0100
++++ linux-3.2.33-go/arch/ia64/Kconfig	2012-11-15 22:08:29.938483621 +0100
+@@ -669,3 +669,5 @@
+ 
+ config IOMMU_HELPER
+ 	def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
++
++source "3rdparty/Kconfig"
+diff -uNr linux-3.2.33-go.orig/arch/mips/Kconfig linux-3.2.33-go/arch/mips/Kconfig
+--- linux-3.2.33-go.orig/arch/mips/Kconfig	2012-11-15 22:08:02.698807597 +0100
++++ linux-3.2.33-go/arch/mips/Kconfig	2012-11-15 22:08:29.939483610 +0100
+@@ -2485,3 +2485,5 @@
+ source "crypto/Kconfig"
+ 
+ source "lib/Kconfig"
++
++source "3rdparty/Kconfig"
+diff -uNr linux-3.2.33-go.orig/arch/powerpc/Kconfig linux-3.2.33-go/arch/powerpc/Kconfig
+--- linux-3.2.33-go.orig/arch/powerpc/Kconfig	2012-11-15 22:08:01.893816938 +0100
++++ linux-3.2.33-go/arch/powerpc/Kconfig	2012-11-15 22:08:29.940483598 +0100
+@@ -980,3 +980,5 @@
+ 	bool
+ 
+ source "arch/powerpc/kvm/Kconfig"
++
++source "3rdparty/Kconfig"
+diff -uNr linux-3.2.33-go.orig/arch/sparc/Kconfig linux-3.2.33-go/arch/sparc/Kconfig
+--- linux-3.2.33-go.orig/arch/sparc/Kconfig	2012-11-15 22:08:00.130837331 +0100
++++ linux-3.2.33-go/arch/sparc/Kconfig	2012-11-15 22:08:29.941483586 +0100
+@@ -605,3 +605,5 @@
+ source "crypto/Kconfig"
+ 
+ source "lib/Kconfig"
++
++source "3rdparty/Kconfig"
+diff -uNr linux-3.2.33-go.orig/arch/x86/Kconfig linux-3.2.33-go/arch/x86/Kconfig
+--- linux-3.2.33-go.orig/arch/x86/Kconfig	2012-11-15 22:08:00.435833823 +0100
++++ linux-3.2.33-go/arch/x86/Kconfig	2012-11-15 22:08:29.945483540 +0100
+@@ -2179,3 +2179,5 @@
+ source "arch/x86/kvm/Kconfig"
+ 
+ source "lib/Kconfig"
++
++source "3rdparty/Kconfig"
+diff -uNr linux-3.2.33-go.orig/Makefile linux-3.2.33-go/Makefile
+--- linux-3.2.33-go.orig/Makefile	2012-11-15 22:08:03.435799123 +0100
++++ linux-3.2.33-go/Makefile	2012-11-15 22:08:29.946483529 +0100
+@@ -507,7 +507,7 @@
+ 
+ # Objects we will link into vmlinux / subdirs we need to visit
+ init-y		:= init/
+-drivers-y	:= drivers/ sound/ firmware/
++drivers-y	:= drivers/ sound/ firmware/ 3rdparty/
+ net-y		:= net/
+ libs-y		:= lib/
+ core-y		:= usr/
+diff -uNr linux-3.2.33-go.orig/scripts/kconfig/Makefile linux-3.2.33-go/scripts/kconfig/Makefile
+--- linux-3.2.33-go.orig/scripts/kconfig/Makefile	2012-11-15 22:07:58.064861094 +0100
++++ linux-3.2.33-go/scripts/kconfig/Makefile	2012-11-15 22:08:55.603180188 +0100
+@@ -11,29 +11,29 @@
+ Kconfig := Kconfig
+ endif
+ 
+-xconfig: $(obj)/qconf
++xconfig: $(obj)/qconf 3rdparty/Makefile
+ 	$< $(Kconfig)
+ 
+-gconfig: $(obj)/gconf
++gconfig: $(obj)/gconf 3rdparty/Makefile
+ 	$< $(Kconfig)
+ 
+-menuconfig: $(obj)/mconf
++menuconfig: $(obj)/mconf 3rdparty/Makefile
+ 	$< $(Kconfig)
+ 
+-config: $(obj)/conf
++config: $(obj)/conf 3rdparty/Makefile
+ 	$< --oldaskconfig $(Kconfig)
+ 
+-nconfig: $(obj)/nconf
++nconfig: $(obj)/nconf 3rdparty/Makefile
+ 	$< $(Kconfig)
+ 
+-oldconfig: $(obj)/conf
++oldconfig: $(obj)/conf 3rdparty/Makefile
+ 	$< --$@ $(Kconfig)
+ 
+-silentoldconfig: $(obj)/conf
++silentoldconfig: $(obj)/conf 3rdparty/Makefile
+ 	$(Q)mkdir -p include/generated
+ 	$< --$@ $(Kconfig)
+ 
+-localyesconfig localmodconfig: $(obj)/streamline_config.pl $(obj)/conf
++localyesconfig localmodconfig: $(obj)/streamline_config.pl $(obj)/conf 3rdparty/Makefile
+ 	$(Q)mkdir -p include/generated
+ 	$(Q)perl $< --$@ $(srctree) $(Kconfig) > .tmp.config
+ 	$(Q)if [ -f .config ]; then 					\
+@@ -90,18 +90,18 @@
+ 	*)	cat $(CLONECONFIG) > .config.running ;;		\
+ 	esac &&							\
+ 	echo -e "Cloning configuration file $(CLONECONFIG)\n"
+-	$(Q)$< --defconfig=.config.running arch/$(SRCARCH)/Kconfig
++	$(Q)$< --defconfig=.config.running arch/$(SRCARCH)/Kconfig 3rdparty/Makefile
+ 
+ 
+ PHONY += listnewconfig oldnoconfig savedefconfig defconfig
+ 
+-listnewconfig oldnoconfig: $(obj)/conf
++listnewconfig oldnoconfig: $(obj)/conf 3rdparty/Makefile
+ 	$< --$@ $(Kconfig)
+ 
+-savedefconfig: $(obj)/conf
++savedefconfig: $(obj)/conf 3rdparty/Makefile
+ 	$< --$@=defconfig $(Kconfig)
+ 
+-defconfig: $(obj)/conf
++defconfig: $(obj)/conf 3rdparty/Makefile
+ ifeq ($(KBUILD_DEFCONFIG),)
+ 	$< --defconfig $(Kconfig)
+ else
+@@ -109,7 +109,7 @@
+ 	$(Q)$< --defconfig=arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG) $(Kconfig)
+ endif
+ 
+-%_defconfig: $(obj)/conf
++%_defconfig: $(obj)/conf 3rdparty/Makefile
+ 	$(Q)$< --defconfig=arch/$(SRCARCH)/configs/$@ $(Kconfig)
+ 
+ # Help text used by make help
+@@ -186,6 +186,8 @@
+ 	gconf-target := 1
+ endif
+ 
++3rdparty/Makefile:
++	pushd $(srctree)/3rdparty ; $(PERL) ./mkbuild.pl ; popd
+ 
+ ifeq ($(qconf-target),1)
+ 	hostprogs-y += qconf
diff --git a/3.2.34/3rd-3rdparty-netatop-0.1.1.patch b/3.2.34/3rd-3rdparty-netatop-0.1.1.patch
new file mode 100644
index 0000000..a06a77d
--- /dev/null
+++ b/3.2.34/3rd-3rdparty-netatop-0.1.1.patch
@@ -0,0 +1,1769 @@
+diff -uNr linux-3.2.33-go.orig/3rdparty/netatop/Kconfig 3rdparty/netatop/Kconfig
+--- linux-3.2.33-go.orig/3rdparty/netatop/Kconfig	1970-01-01 01:00:00.000000000 +0100
++++ 3rdparty/netatop/Kconfig	2012-11-15 22:48:00.753390796 +0100
+@@ -0,0 +1,8 @@
++config NETATOP
++	tristate "Netatop kernel module"
++	help
++	  The optional kernel module netatop can be loaded to gather statistics 
++	  about the TCP and UDP packets that have been transmitted/received 
++	  per process and per thread
++
++	  If unsure, see you again in six months.
+diff -uNr linux-3.2.33-go.orig/3rdparty/netatop/Makefile 3rdparty/netatop/Makefile
+--- linux-3.2.33-go.orig/3rdparty/netatop/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ 3rdparty/netatop/Makefile	2012-11-15 22:50:01.332957868 +0100
+@@ -0,0 +1,5 @@
++#
++# THIS IS AN AUTOMATICALLY GENERATED FILE.  DO NOT EDIT.
++#
++
++obj-$(CONFIG_NETATOP) += netatop.o
+diff -uNr linux-3.2.33-go.orig/3rdparty/netatop/netatop.c 3rdparty/netatop/netatop.c
+--- linux-3.2.33-go.orig/3rdparty/netatop/netatop.c	1970-01-01 01:00:00.000000000 +0100
++++ 3rdparty/netatop/netatop.c	2012-11-15 22:57:52.989419565 +0100
+@@ -0,0 +1,1687 @@
++/*
++** This module uses the netfilter interface to maintain statistics
++** about the network traffic per task, on level of thread group
++** and individual thread.
++**
++** General setup
++** -------------
++** Once the module is active, it is called for every packet that is
++** transmitted by a local process and every packet that is received
++** from an interface. Not only the packets that contain the user data
++** are passed but also the TCP related protocol packets (SYN, ACK, ...).
++**
++** When the module discovers a packet for a connection (TCP) or local
++** port (UDP) that is new, it creates a sockinfo structure. As soon as
++** possible the sockinfo struct will be connected to a taskinfo struct
++** that represents the proces or thread that is related to the socket.
++** However, the task can only be determined when a packet is transmitted,
++** i.e. the module is called during system call handling in the context
++** of the transmitting process. At that moment the tgid (process) and
++** pid (thread) can be obtained from the process administration to
++** be stored in the module's own taskinfo structs (one for the process,
++** one for the thread).
++** For the time that the sockinfo struct can not be related to a taskinfo
++** struct (e.g. when only packets are received), counters are maintained
++** temporarily in the sockinfo struct. As soon as a related taskinfo struct
++** is discovered when the task transmits, counters will be maintained in
++** the taskinfo struct itself.
++** When packets are only received for a socket (e.g. another machine is
++** sending UDP packets to the local machine) while the local task
++** never responds, no match to a process can be made and the packets
++** remain unidentified by the netatop module. At least one packet should
++** have been sent by a local process to be able to match packets for such
++** socket.
++** In the file /proc/netatop counters can be found that show the total
++** number of packets sent/received and how many of these packets were
++** unidentified (i.e. not accounted to a process/thread).
++**
++** Garbage collection
++** ------------------
++** The module uses a garbage collector to cleanup the unused sockinfo
++** structs if connections do not exist any more (TCP) or have not been
++** used for some time (TCP/UDP).
++** Furthermore, the garbage collector checks if the taskinfo structs
++** still represent existing processes or threads. If not, the taskinfo struct
++** is destroyed (in case of a thread) or it is moved to a separate list of
++** finished processes (in case of a process). Analysis programs can read
++** the taskinfo of such finished process. When the taskinfo of a finished
++** process is not read within 15 seconds, the taskinfo will be destroyed.
++**
++** A garbage collector cycle can be triggered by issueing a getsockopt
++** call from an analysis program (e.g. atop). Apart from that, a time-based
++** garbage collector cycle is issued anyhow every 15 seconds.
++**
++** Interface with user mode
++** ------------------------
++** Programs can open an IP socket and use the getsockopt() system call
++** to issue commands to this module. With the command ATOP_GETCNT_TGID
++** the current counters can be obtained on process level (thread group)
++** and with the command ATOP_GETCNT_PID the counters on thread level.
++** For both commands, the tgid/pid has to be passed of the required thread
++** (group). When the required thread (group) does not exist, an errno ESRCH
++** is given.
++**
++** The command ATOP_GETCNT_EXIT can be issued to obtain the counters of
++** an exited process. As stated above, such command has to be issued
++** within 15 seconds after a process has been declared 'finished' by
++** the garbage collector. Whenever this command is issued and no exited
++** process is in the exitlist, the requesting process is blocked until
++** an exited process is available. 
++**
++** The command NETATOP_FORCE_GC activates the garbage collector of the
++** netatop module to  determine if sockinfo's of old connections/ports
++** can be destroyed and if taskinfo's of exited processes can be
++** The command NETATOP_EMPTY_EXIT can be issued to wait until the exitlist
++** with the taskinfo's of exited processes is empty.
++** ----------------------------------------------------------------------
++** Copyright (C) 2012    Gerlof Langeveld (gerlof.langeveld@atoptool.nl)
++**
++** This program is free software; you can redistribute it and/or modify
++** it under the terms of the GNU General Public License version 2 as
++** published by the Free Software Foundation.
++*/
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/netfilter.h>
++#include <linux/netfilter_ipv4.h>
++#include <linux/sched.h>
++#include <linux/skbuff.h>
++#include <linux/types.h>
++#include <linux/file.h>
++#include <net/sock.h>
++#include <linux/ip.h>
++#include <linux/tcp.h>
++#include <linux/udp.h>
++#include <net/ip.h>
++#include <net/tcp.h>
++#include <net/udp.h>
++
++#include "netatop.h"
++#include "netatopversion.h"
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Gerlof Langeveld <gerlof.langeveld@atoptool.nl>");
++MODULE_DESCRIPTION("Per-task network statistics");
++MODULE_VERSION(NETATOPVERSION);
++
++#define	GCINTERVAL	(HZ*15)		// interval garbage collector (jiffies)
++#define	GCMAXUDP	(HZ*16)		// max inactivity for UDP     (jiffies)
++#define	GCMAXTCP	(HZ*1800)	// max inactivity for TCP     (jiffies)
++#define	GCMAXUNREF	(HZ*60)		// max time without taskref   (jiffies)
++
++#define	SILIMIT		(2048*1024)	// maximum memory for sockinfo structs
++#define	TILIMIT		(1024*1024)	// maximum memory for taskinfo structs
++
++#define NF_IP_PRE_ROUTING       0
++#define NF_IP_LOCAL_IN          1
++#define NF_IP_FORWARD           2
++#define NF_IP_LOCAL_OUT         3
++#define NF_IP_POST_ROUTING      4
++
++/*
++** struct that maintains statistics about the network
++** traffic caused per thread or thread group
++*/
++struct chainer {
++	void			*next;
++	void 			*prev;
++};
++
++struct taskinfobucket;
++
++struct taskinfo {
++	struct chainer		ch;
++
++	pid_t			id;		// tgid or pid
++	char			type;		// 'g' (thread group) or
++						// 't' (thread)
++	unsigned char		state;		// see below
++	char			command[COMLEN];
++	unsigned long		btime;		// start time of process
++	unsigned long long	exittime;	// time inserted in exitlist
++
++	struct taskcount	tc;
++};
++
++// state values above
++#define	CHECKED		1	// verified that task still exists
++#define	INDELETE	2	// task exited but still in hash list
++#define	FINISHED	3	// task on exit list
++
++/*
++** hash tables to find a particular thread group or thread
++*/
++#define	TBUCKS		1024	// must be multiple of 2!
++#define	THASH(x, t)	(((x)+t)&(TBUCKS-1))
++
++struct taskinfobucket {
++	struct chainer	ch;
++	spinlock_t	lock;
++}	thash[TBUCKS];
++
++static unsigned long	nrt;     // current number of taskinfo allocated
++static unsigned long	nrt_ovf; // no taskinfo allocated due to overflow
++static DEFINE_SPINLOCK(nrtlock);
++
++
++static struct taskinfo	*exithead;	// linked list of exited processes
++static struct taskinfo	*exittail;
++static DEFINE_SPINLOCK(exitlock);
++
++static DECLARE_WAIT_QUEUE_HEAD(exitlist_filled);
++static DECLARE_WAIT_QUEUE_HEAD(exitlist_empty);
++
++static unsigned long	nre;	// current number of taskinfo on exitlist
++
++/*
++** structs that uniquely identify a TCP connection (host endian format)
++*/
++struct tcpv4_ident {
++        uint32_t	laddr;  /* local  IP  address */
++        uint32_t	raddr;  /* remote IP  address */
++        uint16_t	lport;  /* local  port number */
++        uint16_t	rport;  /* remote port number */
++};
++
++struct tcpv6_ident {
++        struct in6_addr	laddr;  /* local  IP  address */
++        struct in6_addr	raddr;  /* remote IP  address */
++        uint16_t	lport;  /* local  port number */
++        uint16_t	rport;  /* remote port number */
++};
++
++/*
++** struct to maintain the reference from a socket
++** to a thread and thread-group
++*/
++struct sockinfo {
++	struct chainer		ch;
++
++	unsigned char		last_state;	// last known state of socket
++        uint8_t			proto;  	// protocol
++
++	union keydef {
++		uint16_t		udp;	// UDP ident (only portnumber)
++		struct tcpv4_ident	tcp4;	// TCP connection ident IPv4
++		struct tcpv6_ident	tcp6;	// TCP connection ident IPv6
++	} key;
++
++	struct taskinfo		*tgp;		// ref to thread group
++	struct taskinfo		*thp;		// ref to thread (or NULL)
++
++	short			tgh;		// hash number of thread group
++	short			thh;		// hash number of thread
++
++        unsigned long      	sndpacks;	// temporary counters in case 
++        unsigned long      	sndbytes;	// no relation to process is
++        unsigned long      	rcvpacks; 	// known yet
++        unsigned long      	rcvbytes;
++
++	unsigned long long	lastact;	// last updated (jiffies)
++};
++
++/*
++** hash table to find a socket reference
++*/
++#define	SBUCKS		1024	// must be multiple of 2!
++#define	SHASHTCP4(x)	(((x).raddr+(x).lport+(x).rport)&(SBUCKS-1))
++#define	SHASHUDP(x)	((x)&(SBUCKS-1))
++
++struct {
++	struct chainer	ch;
++	spinlock_t	lock;
++}	shash[SBUCKS];
++
++static unsigned long	nrs;     // current number sockinfo allocated
++static unsigned long	nrs_ovf; // no sockinfo allocated due to overflow
++static DEFINE_SPINLOCK(nrslock);
++
++/*
++** various static counters
++*/
++static unsigned long	icmpsndbytes;
++static unsigned long	icmpsndpacks;
++static unsigned long	icmprcvbytes;
++static unsigned long	icmprcvpacks;
++
++static unsigned long	tcpsndpacks;
++static unsigned long	tcprcvpacks;
++static unsigned long	udpsndpacks;
++static unsigned long	udprcvpacks;
++static unsigned long	unidentudpsndpacks;
++static unsigned long	unidentudprcvpacks;
++static unsigned long	unidenttcpsndpacks;
++static unsigned long	unidenttcprcvpacks;
++
++static unsigned long	unknownproto;
++
++static struct timer_list timer;
++static DEFINE_SPINLOCK(gclock);
++static unsigned long long	gclast;	// last garbage collection (jiffies)
++
++static struct timespec	boottime;
++
++/*
++** function prototypes
++*/
++static void 		analyze_tcpv4_packet(struct sk_buff *,
++				const struct net_device *, int, char,
++				struct iphdr *, void *);
++
++static void 		analyze_udp_packet(struct sk_buff *,
++				const struct net_device *, int, char,
++				struct iphdr *, void *);
++
++static int		sock2task(char, struct sockinfo *,
++			     	struct taskinfo **, short *,
++				struct sk_buff *, const struct net_device *,
++				int, char);
++
++static void		update_taskcounters(struct sk_buff *,
++				const struct net_device *,
++				struct taskinfo *, char);
++
++static void		update_sockcounters(struct sk_buff *,
++				const struct net_device *,
++				struct sockinfo *, char);
++
++static void		sock2task_sync(struct sk_buff *,
++				struct sockinfo *, struct taskinfo *);
++
++static void		register_unident(struct sockinfo *);
++
++static int		calc_reallen(struct sk_buff *,
++			             const struct net_device *);
++
++static void		get_tcpv4_ident(struct iphdr *, void *,
++				char, union keydef *);
++
++static struct sockinfo	*find_sockinfo(int, union keydef *, int, int);
++static struct sockinfo	*make_sockinfo(int, union keydef *, int, int);
++
++static void		wipesockinfo(void);
++static void		wipetaskinfo(void);
++static void		wipetaskexit(void);
++
++static void		garbage_collector(void);
++static void		gcperiodic(unsigned long unused);
++static void		gctaskexit(void);
++static void		gcsockinfo(void);
++static void		gctaskinfo(void);
++
++static void		move_taskinfo(struct taskinfo *);
++static void		delete_taskinfo(struct taskinfo *);
++static void		delete_sockinfo(struct sockinfo *);
++
++static struct taskinfo	*get_taskinfo(pid_t, char);
++
++static int		getsockopt(struct sock *, int, void *, int *);
++
++/*
++** hook definitions
++*/
++static struct nf_hook_ops hookin_ipv4;
++static struct nf_hook_ops hookout_ipv4;
++
++/*
++** getsockopt definitions for communication with user space
++*/
++static struct nf_sockopt_ops sockopts = {
++        .pf             = PF_INET,
++        .get_optmin     = NETATOP_BASE_CTL,
++        .get_optmax     = NETATOP_BASE_CTL+6,
++        .get            = getsockopt,
++        .owner          = THIS_MODULE,
++};
++
++/*
++** hook function to be called for every incoming local packet
++*/
++static unsigned int
++ipv4_hookin(unsigned int hooknum,
++		struct sk_buff *skb,
++		const struct net_device *in,
++		const struct net_device *out,
++		int (*okfn)(struct sk_buff *))
++{
++	struct iphdr	*iph;
++	void		*trh;
++
++	if (skb == NULL)		// useless socket buffer?
++		return NF_ACCEPT;
++
++	/*
++	** get pointer to IP header and transport header
++	*/
++	iph = (struct iphdr *)skb_network_header(skb);
++	trh = ((char *)iph + (iph->ihl * 4));
++
++	/*
++	** react on protocol number
++	*/
++	switch (iph->protocol) {
++	   case IPPROTO_TCP:
++		tcprcvpacks++;
++		analyze_tcpv4_packet(skb, in, 0, 'i', iph, trh);
++		break;
++
++	   case IPPROTO_UDP:
++		udprcvpacks++;
++		analyze_udp_packet(skb, in, 0, 'i', iph, trh);
++		break;
++
++	   case IPPROTO_ICMP:
++		icmprcvpacks++;
++		icmprcvbytes += skb->len + in->hard_header_len + 4;
++		break;
++
++	   default:
++		unknownproto++;
++	}
++
++	// accept every packet after stats gathering
++	return NF_ACCEPT;
++}
++
++/*
++** hook function to be called for every outgoing local packet
++*/
++static unsigned int
++ipv4_hookout(unsigned int hooknum,
++		struct sk_buff *skb,
++		const struct net_device *in,
++		const struct net_device *out,
++		int (*okfn)(struct sk_buff *))
++{
++	int		in_syscall = !in_interrupt();
++	struct iphdr	*iph;
++	void		*trh;
++
++	if (skb == NULL)		// useless socket buffer?
++		return NF_ACCEPT;
++
++	/*
++	** get pointer to IP header and transport header
++	*/
++	iph = (struct iphdr *)skb_network_header(skb);
++	trh = skb_transport_header(skb);
++
++	/*
++	** react on protocol number
++	*/
++	switch (iph->protocol) {
++	   case IPPROTO_TCP:
++		tcpsndpacks++;
++		analyze_tcpv4_packet(skb, out, in_syscall, 'o', iph, trh);
++		break;
++
++	   case IPPROTO_UDP:
++		udpsndpacks++;
++		analyze_udp_packet(skb, out, in_syscall, 'o', iph, trh);
++		break;
++
++	   case IPPROTO_ICMP:
++		icmpsndpacks++;
++		icmpsndbytes += skb->len + out->hard_header_len + 4;
++		break;
++
++	   default:
++		unknownproto++;
++	}
++
++	// accept every packet after stats gathering
++	return NF_ACCEPT;
++}
++
++/*
++** generic function (for input and output) to analyze the current packet
++*/
++static void
++analyze_tcpv4_packet(struct sk_buff *skb,
++	const struct net_device *ndev,	// interface description
++	int in_syscall,			// called during system call?
++	char direction,			// incoming ('i') or outgoing ('o')
++	struct iphdr *iph, void *trh)
++{
++	union keydef		key;
++	struct sockinfo		*sip;
++	int			bs;	// hash bucket for sockinfo
++	unsigned long		sflags;
++
++	/*
++	** determine tcpv4_ident that identifies this TCP packet
++	** and calculate hash bucket in sockinfo hash
++	*/
++	get_tcpv4_ident(iph, trh, direction, &key);
++
++	/*
++	** check if we have seen this tcpv4_ident before with a
++	** corresponding thread and thread group
++	*/
++	bs = SHASHTCP4(key.tcp4);
++
++	spin_lock_irqsave(&shash[bs].lock, sflags);
++
++	if ( (sip = find_sockinfo(IPPROTO_TCP, &key, sizeof key.tcp4, bs))
++								== NULL) {
++		// no sockinfo yet: create one
++		if ( (sip = make_sockinfo(IPPROTO_TCP, &key,
++					sizeof key.tcp4, bs)) == NULL) {
++			if (direction == 'i') 
++				unidenttcprcvpacks++;
++			else
++				unidenttcpsndpacks++;
++			goto unlocks;
++		}
++	}
++
++	if (skb->sk)
++		sip->last_state = skb->sk->sk_state;
++
++	/*
++	** if needed (re)connect the sockinfo to a taskinfo and update
++	** the counters
++	*/
++
++	// connect to thread group and update
++	if (sock2task('g', sip, &sip->tgp, &sip->tgh,
++				skb, ndev, in_syscall, direction)) {
++		// connect to thread and update
++		(void) sock2task('t', sip, &sip->thp, &sip->thh,
++				skb, ndev, in_syscall, direction);
++	}
++
++unlocks:
++	spin_unlock_irqrestore(&shash[bs].lock, sflags);
++}
++
++
++/*
++** generic function (for input and output) to analyze the current packet
++*/
++static void
++analyze_udp_packet(struct sk_buff *skb,
++	const struct net_device *ndev,	// interface description
++	int in_syscall,			// called during system call?
++	char direction,			// incoming ('i') or outgoing ('o')
++	struct iphdr *iph, void *trh)
++{
++	struct udphdr	*udph = (struct udphdr *)trh;
++	uint16_t	udplocal = (direction == 'i' ?
++				    ntohs(udph->dest) : ntohs(udph->source));
++	int		bs;	// hash bucket for sockinfo
++
++	union keydef 	key;
++	struct sockinfo	*sip;
++	unsigned long	sflags;
++
++	/*
++	** check if we have seen this local UDP port before with a
++	** corresponding thread and thread group
++	*/
++	key.udp	= udplocal;
++	bs      = SHASHUDP(udplocal);
++
++	spin_lock_irqsave(&shash[bs].lock, sflags);
++	
++	if ( (sip = find_sockinfo(IPPROTO_UDP, &key, sizeof key.udp, bs))
++								== NULL) {
++		// no sockinfo yet: create one
++		if ( (sip = make_sockinfo(IPPROTO_UDP, &key,
++				sizeof key.udp, bs)) == NULL) {
++			if (direction == 'i') 
++				unidentudprcvpacks++;
++			else
++				unidentudpsndpacks++;
++			goto unlocks;
++		}
++	}
++
++	/*
++	** if needed (re)connect the sockinfo to a taskinfo and update
++	** the counters
++	*/
++
++	// connect to thread group and update
++	if (sock2task('g', sip, &sip->tgp, &sip->tgh,
++				skb, ndev, in_syscall, direction)) {
++		// connect to thread and update
++		(void) sock2task('t', sip, &sip->thp, &sip->thh,
++				skb, ndev, in_syscall, direction);
++	}
++
++unlocks:
++	spin_unlock_irqrestore(&shash[bs].lock, sflags);
++}
++
++/*
++** connect the sockinfo to the correct taskinfo and update the counters
++*/
++static int
++sock2task(char idtype, struct sockinfo *sip, struct taskinfo **tipp,
++	short *hash, struct sk_buff *skb, const struct net_device *ndev,
++	int in_syscall, char direction)
++{
++	pid_t		curid;
++	unsigned long	tflags;
++
++	if (*tipp == NULL) {
++		/*
++		** no taskinfo connected yet for this reference from
++		** sockinfo; to connect to a taskinfo, we must
++		** be in system call handling now --> verify
++		*/
++		if (!in_syscall) {
++			if (idtype == 'g')
++				update_sockcounters(skb, ndev, sip, direction);
++
++			return 0;	// failed
++		}
++
++		/*
++		** try to find existing taskinfo or create new taskinfo
++		*/
++		curid = (idtype == 'g' ? current->tgid : current->pid);
++
++		*hash = THASH(curid, idtype);		// calc hashQ
++
++		spin_lock_irqsave(&thash[*hash].lock, tflags);
++
++		if ( (*tipp = get_taskinfo(curid, idtype)) == NULL) {
++			/*
++			** not possible to connect
++			*/
++			spin_unlock_irqrestore(&thash[*hash].lock, tflags);
++
++			if (idtype == 'g')
++				update_sockcounters(skb, ndev, sip, direction);
++
++			return 0;			// failed
++		}
++
++		/*
++		** new connection made:
++		** update task counters with sock counters
++		*/
++		sock2task_sync(skb, sip, *tipp);
++	} else {
++		/*
++		** already related to thread group or thread
++		** lock existing task
++		*/
++		spin_lock_irqsave(&thash[*hash].lock, tflags);
++
++		/*
++		** check if socket has been passed to another process in the
++		** meantime, like programs as xinetd use to do
++		** if so, connect sockinfo to the new task
++		*/
++		if (in_syscall) {
++			curid = (idtype == 'g' ? current->tgid : current->pid);
++
++			if ((*tipp)->id != curid) {
++				spin_unlock_irqrestore(&thash[*hash].lock,
++									tflags);
++				*hash = THASH(curid, idtype);
++
++				spin_lock_irqsave(&thash[*hash].lock, tflags);
++
++				if ( (*tipp = get_taskinfo(curid, idtype))
++								== NULL) {
++					spin_unlock_irqrestore(
++						&thash[*hash].lock, tflags);
++					return 0;
++				}
++			}
++		}
++	}
++
++	update_taskcounters(skb, ndev, *tipp, direction);
++
++	spin_unlock_irqrestore(&thash[*hash].lock, tflags);
++
++	return 1;
++}
++
++/*
++** update the statistics of a particular thread group or thread
++*/
++static void
++update_taskcounters(struct sk_buff *skb, const struct net_device *ndev,
++		struct taskinfo *tip, char direction)
++{
++	struct iphdr	*iph = (struct iphdr *)skb_network_header(skb);
++	int		reallen = calc_reallen(skb, ndev);
++
++	switch (iph->protocol) {
++	   case IPPROTO_TCP:
++		if (direction == 'i') {
++			tip->tc.tcprcvpacks++;
++			tip->tc.tcprcvbytes += reallen;
++		} else {
++			tip->tc.tcpsndpacks++;
++			tip->tc.tcpsndbytes += reallen;
++		}
++		break;
++
++	   case IPPROTO_UDP:
++		if (direction == 'i') {
++			tip->tc.udprcvpacks++;
++			tip->tc.udprcvbytes += reallen;
++		} else {
++			tip->tc.udpsndpacks++;
++			tip->tc.udpsndbytes += reallen;
++		}
++ 	}
++}
++
++/*
++** update the statistics of a sockinfo without a connected task
++*/
++static void
++update_sockcounters(struct sk_buff *skb, const struct net_device *ndev,
++		struct sockinfo *sip, char direction)
++{
++	int	reallen = calc_reallen(skb, ndev);
++
++	if (direction == 'i') {
++		sip->rcvpacks++;
++		sip->rcvbytes += reallen;
++	} else {
++		sip->sndpacks++;
++		sip->sndbytes += reallen;
++	}
++}
++
++/*
++** add the temporary counters in the sockinfo to the new connected task
++*/
++static void
++sock2task_sync(struct sk_buff *skb, struct sockinfo *sip, struct taskinfo *tip)
++{
++	struct iphdr	*iph = (struct iphdr *)skb_network_header(skb);
++
++	switch (iph->protocol) {
++	   case IPPROTO_TCP:
++		tip->tc.tcprcvpacks	+= sip->rcvpacks;
++		tip->tc.tcprcvbytes 	+= sip->rcvbytes;
++		tip->tc.tcpsndpacks	+= sip->sndpacks;
++		tip->tc.tcpsndbytes	+= sip->sndbytes;
++		break;
++
++	   case IPPROTO_UDP:
++		tip->tc.udprcvpacks	+= sip->rcvpacks;
++		tip->tc.udprcvbytes 	+= sip->rcvbytes;
++		tip->tc.udpsndpacks	+= sip->sndpacks;
++		tip->tc.udpsndbytes	+= sip->sndbytes;
++	}
++}
++
++static void
++register_unident(struct sockinfo *sip)
++{
++	switch (sip->proto) {
++	   case IPPROTO_TCP:
++		unidenttcprcvpacks += sip->rcvpacks;
++		unidenttcpsndpacks += sip->sndpacks;
++		break;
++
++	   case IPPROTO_UDP:
++		unidentudprcvpacks += sip->rcvpacks;
++		unidentudpsndpacks += sip->sndpacks;
++	}
++}
++
++/*
++** calculate the number of bytes that are really sent or received
++*/
++static int
++calc_reallen(struct sk_buff *skb, const struct net_device *ndev)
++{
++	/*
++	** calculate the real load of this packet on the network:
++	**
++	**  - length of IP header, TCP/UDP header and data (skb->len)
++	**
++	**    since packet assembly/disassembly is done by the IP layer
++	**    (we get an input packet that has been assembled already and
++	**    an output packet that still has to be assembled), additional
++	**    IP headers/interface headers and interface headers have
++	**    to be calculated for packets that are larger than the mtu
++	**
++	**  - interface header length + 4 bytes crc
++	*/
++	int reallen = skb->len;
++
++	if (reallen > ndev->mtu)
++		reallen += (reallen / ndev->mtu) *
++			   (sizeof(struct iphdr) + ndev->hard_header_len + 4);
++
++	reallen += ndev->hard_header_len + 4;
++
++	return reallen;
++}
++
++/*
++** find the tcpv4_ident for the current packet, represented by
++** the skb_buff
++*/
++static void
++get_tcpv4_ident(struct iphdr *iph, void *trh, char direction, union keydef *key)
++{
++	struct tcphdr	*tcph = (struct tcphdr *)trh;
++
++	memset(key, 0, sizeof *key); 	// important for memcmp later on
++
++	/*
++	** determine local/remote IP address and
++	** determine local/remote port number
++	*/
++	switch (direction) {
++	   case 'i':	// incoming packet
++		key->tcp4.laddr = ntohl(iph->daddr);
++		key->tcp4.raddr = ntohl(iph->saddr);
++		key->tcp4.lport = ntohs(tcph->dest);
++		key->tcp4.rport = ntohs(tcph->source);
++		break;
++
++	   case 'o':	// outgoing packet
++		key->tcp4.laddr = ntohl(iph->saddr);
++		key->tcp4.raddr = ntohl(iph->daddr);
++		key->tcp4.lport = ntohs(tcph->source);
++		key->tcp4.rport = ntohs(tcph->dest);
++	}
++}
++
++/*
++** search for the sockinfo holding the given address info
++** the appropriate hash bucket must have been locked before calling
++*/
++static struct sockinfo *
++find_sockinfo(int proto, union keydef *identp, int identsz, int hash)
++{
++	struct sockinfo	*sip = shash[hash].ch.next;
++
++	/*
++	** search for appropriate struct
++	*/
++	while (sip != (void *)&shash[hash].ch) {
++		if ( memcmp(&sip->key, identp, identsz) == 0 &&
++						sip->proto == proto) {
++			sip->lastact = jiffies_64;
++			return sip;
++		}
++
++		sip = sip->ch.next;
++	}
++
++	return NULL;	// not existing
++}
++
++/*
++** create a new sockinfo and fill
++** the appropriate hash bucket must have been locked before calling
++*/
++static struct sockinfo *
++make_sockinfo(int proto, union keydef *identp, int identsz, int hash)
++{
++	struct sockinfo	*sip;
++	unsigned long	flags;
++
++	/*
++	** check if the threshold of memory used for sockinfo structs
++	** is reached to avoid that a fork bomb of processes opening
++	** a socket leads to memory overload
++	*/
++	if ( (nrs+1) * sizeof(struct sockinfo) > SILIMIT) {
++		spin_lock_irqsave(&nrslock, flags);
++		nrs_ovf++;
++		spin_unlock_irqrestore(&nrslock, flags);
++		return NULL;
++	}
++
++	if ( (sip = kzalloc(sizeof *sip, GFP_ATOMIC)) == NULL)
++		return NULL;
++
++	spin_lock_irqsave(&nrslock, flags);
++	nrs++;
++	spin_unlock_irqrestore(&nrslock, flags);
++
++	/*
++	** insert new struct in doubly linked list
++	*/
++	sip->ch.next 		= &shash[hash].ch;
++	sip->ch.prev 		=  shash[hash].ch.prev;
++	((struct sockinfo *)shash[hash].ch.prev)->ch.next = sip;
++	shash[hash].ch.prev 	= sip;
++
++	sip->proto 		= proto;
++	sip->lastact 		= jiffies_64;
++	sip->key	 	= *identp;
++
++	return sip;
++}
++
++/*
++** search the taskinfo structure holding the info about the given id/type
++** if such taskinfo is not yet present, create a new one
++*/
++static struct taskinfo *
++get_taskinfo(pid_t id, char type)
++{
++	int		bt = THASH(id, type);
++	struct taskinfo	*tip = thash[bt].ch.next;
++	unsigned long	tflags;
++
++	/*
++	** search if id exists already
++	*/
++	while (tip != (void *)&thash[bt].ch) {
++		if (tip->id == id && tip->type == type)
++			return tip;
++
++		tip = tip->ch.next;
++	}
++
++	/*
++	** check if the threshold of memory used for taskinfo structs
++	** is reached to avoid that a fork bomb of processes opening
++	** a socket lead to memory overload
++	*/
++	if ( (nre+nrt+1) * sizeof(struct taskinfo) > TILIMIT) {
++		spin_lock_irqsave(&nrtlock, tflags);
++		nrt_ovf++;
++		spin_unlock_irqrestore(&nrtlock, tflags);
++		return NULL;
++	}
++
++	/*
++	** id not known yet
++	** add new entry to hash list
++	*/
++	if ( (tip = kzalloc(sizeof *tip, GFP_ATOMIC)) == NULL)
++		return NULL;
++
++	spin_lock_irqsave(&nrtlock, tflags);
++	nrt++;
++	spin_unlock_irqrestore(&nrtlock, tflags);
++
++	/*
++	** insert new struct in doubly linked list
++	** and fill values
++	*/
++	tip->ch.next 		= &thash[bt].ch;
++	tip->ch.prev 		=  thash[bt].ch.prev;
++	((struct taskinfo *)thash[bt].ch.prev)->ch.next = tip;
++	thash[bt].ch.prev  	= tip;
++
++	tip->id    	= id;
++	tip->type    	= type;
++
++	tip->btime 	= current->real_start_time.tv_sec + boottime.tv_sec;
++
++	if (current->real_start_time.tv_nsec + boottime.tv_nsec > NSEC_PER_SEC)
++		tip->btime++;
++
++	strncpy(tip->command, current->comm, COMLEN);
++
++	return tip;
++}
++
++/*
++** function that runs every second to see if a 
++** time-based garbage collection cycle has to be
++** forced (i.e. if no process forces it)
++*/
++static void
++gcperiodic(unsigned long unused)
++{
++	if (jiffies_64 >= gclast + GCINTERVAL)
++		garbage_collector();
++
++	/*
++	** set timer for next second
++	*/
++	timer.expires  = jiffies_64 + HZ;
++	timer.function = gcperiodic;
++	add_timer(&timer);
++}
++
++/*
++** garbage collector that removes:
++** - exited tasks that are not by user mode programs
++** - sockinfo's that are not used any more
++** - taskinfo's that do not exist any more
++**
++** a lock avoids that the garbage collector runs several times in parallel
++*/
++static void
++garbage_collector(void)
++{
++	unsigned long	flags;
++
++	spin_lock_irqsave(&gclock, flags);
++
++	if (jiffies_64 < gclast + (HZ/2)) { // maximum 2 GC cycles per second
++		spin_unlock_irqrestore(&gclock, flags);
++		return;
++	}
++
++	gctaskexit();	// remove remaining taskinfo structs from exit list
++
++	gcsockinfo();	// clean up sockinfo structs in shash list
++
++	gctaskinfo();	// clean up taskinfo structs in thash list
++
++	gclast = jiffies_64;
++
++	spin_unlock_irqrestore(&gclock, flags);
++}
++
++/*
++** tasks in the exitlist can be read by a user mode process for a limited
++** amount of time; this function removes all taskinfo structures that have
++** not been read within that period of time
++** notice that exited processes are chained to the tail, so the oldest
++** can be found at the head
++*/
++static void
++gctaskexit()
++{
++	unsigned long	flags;
++	struct taskinfo	*tip;
++
++	spin_lock_irqsave(&exitlock, flags);
++
++	for (tip=exithead; tip;) {
++		if (jiffies_64 < tip->exittime + GCINTERVAL)
++			break;
++
++		// remove taskinfo from exitlist
++		exithead = tip->ch.next;
++		kfree(tip);
++		nre--;
++		tip = exithead;
++	} 
++
++	/*
++	** if list empty now, then exithead and exittail both NULL
++	** wakeup waiters for emptylist
++	*/
++	if (nre == 0) {
++		exittail = NULL;  
++		wake_up_interruptible(&exitlist_empty);
++	}
++
++	spin_unlock_irqrestore(&exitlock, flags);
++}
++
++/*
++** cleanup sockinfo structures that are connected to finished processes
++*/
++static void
++gcsockinfo()
++{
++	int		i;
++	struct sockinfo	*sip, *sipsave;
++	unsigned long	sflags, tflags;
++
++	/*
++	** go through all sockinfo hash buckets 
++	*/
++	for (i=0; i < SBUCKS; i++) {
++		if (shash[i].ch.next == (void *)&shash[i].ch) 
++			continue;	// quick return without lock
++
++		spin_lock_irqsave(&shash[i].lock, sflags);
++
++		sip  = shash[i].ch.next;
++
++		/*
++		** search all sockinfo structs chained in one bucket
++		*/
++		while (sip != (void *)&shash[i].ch) {
++			/*
++ 			** TCP connections that were not in
++			** state ESTABLISHED or LISTEN can be
++			** eliminated
++			*/ 		
++			if (sip->proto == IPPROTO_TCP) {
++				switch (sip->last_state) {
++				   case TCP_ESTABLISHED:
++				   case TCP_LISTEN:
++					break;
++
++				   default:
++					sipsave = sip->ch.next;
++					delete_sockinfo(sip);
++					sip = sipsave;
++					continue;
++				}
++			}
++
++			/*
++			** check if this sockinfo has no relation
++			** for a while with a thread group
++			** if so, delete the sockinfo
++			*/
++			if (sip->tgp == NULL) {
++				if (sip->lastact + GCMAXUNREF < jiffies_64) {
++					register_unident(sip);
++					sipsave = sip->ch.next;
++					delete_sockinfo(sip);
++					sip = sipsave;
++				} else {
++					sip = sip->ch.next;
++				}
++				continue;
++			}
++
++			/*
++			** check if referred thread group is
++			** already marked as 'indelete' during this
++			** sockinfo search
++			** if so, delete this sockinfo
++			*/
++			spin_lock_irqsave(&thash[sip->tgh].lock, tflags);
++
++			if (sip->tgp->state == INDELETE) {
++				spin_unlock_irqrestore(&thash[sip->tgh].lock,
++									tflags);
++				sipsave = sip->ch.next;
++				delete_sockinfo(sip);
++				sip = sipsave;
++				continue;
++			}
++
++			/*
++			** check if referred thread group still exists;
++			** this step will be skipped if we already verified
++			** the existance of the thread group earlier during
++			** this garbage collection cycle
++			*/
++			if (sip->tgp->state != CHECKED) {
++				/*
++				** connected thread group not yet verified
++				** during this cycle, so check if it still
++				** exists
++				** if not, mark the thread group as 'indelete'
++				** (it can not be deleted right now because
++				** we might find other sockinfo's referring
++				** to this thread group during the current
++				** cycle) and delete this sockinfo
++				** if the thread group exists, just mark
++				** it  as 'checked' for this cycle
++				*/
++				if (find_vpid(sip->tgp->id) == NULL) {
++					sip->tgp->state = INDELETE;
++					spin_unlock_irqrestore(
++						&thash[sip->tgh].lock, tflags);
++
++					sipsave = sip->ch.next;
++					delete_sockinfo(sip);
++					sip = sipsave;
++					continue;
++				} else {
++					sip->tgp->state = CHECKED;
++				}
++			}
++
++			spin_unlock_irqrestore(&thash[sip->tgh].lock, tflags);
++
++			/*
++			** check if this sockinfo has a relation with a thread
++			** if not, skip further handling of this sockinfo
++			*/
++			if (sip->thp == NULL) {
++				sip = sip->ch.next;
++				continue;
++			}
++
++			/*
++			** check if referred thread is already marked
++			** as 'indelete' during this sockinfo search
++			** if so, break connection
++			*/
++			spin_lock_irqsave(&thash[sip->thh].lock, tflags);
++
++			if (sip->thp->state == INDELETE) {
++				spin_unlock_irqrestore(&thash[sip->thh].lock,
++									tflags);
++				sip->thp = NULL;
++				sip = sip->ch.next;
++				continue;
++			}
++
++			/*
++			** check if referred thread is already checked
++			** during this sockinfo search
++			*/
++			if (sip->thp->state == CHECKED) {
++				spin_unlock_irqrestore(&thash[sip->thh].lock,
++								tflags);
++				sip = sip->ch.next;
++				continue;
++			}
++
++			/*
++			** connected thread not yet verified
++			** check if it still exists
++			** if not, mark it as 'indelete' and break connection
++			** if thread exists, mark it 'checked'
++			*/
++			if (find_vpid(sip->thp->id) == NULL) {
++				sip->thp->state = INDELETE;
++				sip->thp = NULL;
++			} else {
++				sip->thp->state = CHECKED;
++			}
++
++			spin_unlock_irqrestore(&thash[sip->thh].lock, tflags);
++
++			/*
++			** check if a TCP port has not been used
++			** for some time --> destroy even if the thread
++			** (group) is still there
++			*/
++			if (sip->proto == IPPROTO_TCP &&
++			    sip->lastact + GCMAXTCP < jiffies_64) {
++				sipsave = sip->ch.next;
++				delete_sockinfo(sip);
++				sip = sipsave;
++				continue;
++			}
++
++			/*
++			** check if a UDP port has not been used
++			** for some time --> destroy even if the thread
++			** (group) is still there
++			** e.g. outgoing DNS requests (to remote port 53) are
++			** issued every time with another source port being
++			** a new object that should not be kept too long;
++			** local well-known ports are useful to keep
++			*/
++			if (sip->proto == IPPROTO_UDP &&
++			    sip->lastact + GCMAXUDP < jiffies_64 &&
++			    sip->key.udp > 1024)                {
++				sipsave = sip->ch.next;
++				delete_sockinfo(sip);
++				sip = sipsave;
++				continue;
++			}
++
++			sip = sip->ch.next;
++		}
++
++		spin_unlock_irqrestore(&shash[i].lock, sflags);
++	}
++}
++
++/*
++** remove taskinfo structures of finished tasks from hash list
++*/
++static void
++gctaskinfo()
++{
++	int		i;
++	struct taskinfo	*tip, *tipsave;
++	unsigned long	tflags;
++
++	/*
++	** go through all taskinfo hash buckets 
++	*/
++	for (i=0; i < TBUCKS; i++) {
++		if (thash[i].ch.next == (void *)&thash[i].ch) 
++			continue;	// quick return without lock
++
++		spin_lock_irqsave(&thash[i].lock, tflags);
++
++		tip = thash[i].ch.next;
++
++		/*
++		** check all taskinfo structs chained to this bucket
++		*/
++		while (tip != (void *)&thash[i].ch) {
++			switch (tip->state) {
++				/*
++				** remove INDELETE tasks from the hash buckets
++				** -- move thread group to exitlist
++				** -- destroy thread right away
++				*/
++			   case INDELETE:
++				tipsave = tip->ch.next;
++
++				if (tip->type == 'g') 
++					move_taskinfo(tip);	// thread group
++				else
++					delete_taskinfo(tip);	// thread
++
++				tip = tipsave;
++				break;
++
++			   case CHECKED:
++				tip->state = 0;
++				tip = tip->ch.next;
++				break;
++
++			   default:	// not checked yet
++				if (find_vpid(tip->id) == NULL) {
++					tipsave = tip->ch.next;
++
++					if (tip->type == 'g') 
++						move_taskinfo(tip);
++					else
++						delete_taskinfo(tip);
++
++					tip = tipsave;
++				} else {
++					tip = tip->ch.next;
++				}
++			}
++		}
++
++		spin_unlock_irqrestore(&thash[i].lock, tflags);
++	}
++}
++
++
++/*
++** remove all sockinfo structs
++*/
++static void
++wipesockinfo()
++{
++	struct sockinfo	*sip, *sipsave;
++	int 		i;
++	unsigned long	sflags;
++
++	for (i=0; i < SBUCKS; i++) {
++		spin_lock_irqsave(&shash[i].lock, sflags);
++
++		sip = shash[i].ch.next;
++
++		/*
++		** free all structs chained in one bucket
++		*/
++		while (sip != (void *)&shash[i].ch) {
++			sipsave = sip->ch.next;
++			delete_sockinfo(sip);
++			sip = sipsave;
++		}
++
++		spin_unlock_irqrestore(&shash[i].lock, sflags);
++	}
++}
++
++/*
++** remove all taskinfo structs from hash list
++*/
++static void
++wipetaskinfo()
++{
++	struct taskinfo	*tip, *tipsave;
++	int 		i;
++	unsigned long	tflags;
++
++	for (i=0; i < TBUCKS; i++) {
++		spin_lock_irqsave(&thash[i].lock, tflags);
++
++		tip = thash[i].ch.next;
++
++		/*
++		** free all structs chained in one bucket
++		*/
++		while (tip != (void *)&thash[i].ch) {
++			tipsave = tip->ch.next;
++			delete_taskinfo(tip);
++			tip = tipsave;
++		}
++
++		spin_unlock_irqrestore(&thash[i].lock, tflags);
++	}
++}
++
++/*
++** remove all taskinfo structs from exit list
++*/
++static void
++wipetaskexit()
++{
++	gctaskexit();
++}
++
++/*
++** move one taskinfo struct from hash bucket to exitlist
++*/
++static void
++move_taskinfo(struct taskinfo *tip)
++{
++	unsigned long 	flags;
++
++	/*
++	** remove from hash list
++	*/
++	((struct taskinfo *)tip->ch.next)->ch.prev = tip->ch.prev;
++	((struct taskinfo *)tip->ch.prev)->ch.next = tip->ch.next;
++
++	spin_lock_irqsave(&nrtlock, flags);
++	nrt--;
++	spin_unlock_irqrestore(&nrtlock, flags);
++
++	/*
++	** add to exitlist
++	*/
++	tip->ch.next 	= NULL;
++	tip->state   	= FINISHED;
++	tip->exittime	= jiffies_64;
++
++	spin_lock_irqsave(&exitlock, flags);
++
++	if (exittail) {			// list filled?
++		exittail->ch.next = tip;
++		exittail          = tip;
++	} else {			// list empty
++		exithead = exittail = tip;
++	}
++
++	nre++;
++
++	wake_up_interruptible(&exitlist_filled);
++
++	spin_unlock_irqrestore(&exitlock, flags);
++}
++
++/*
++** remove one taskinfo struct for the hash bucket chain
++*/
++static void
++delete_taskinfo(struct taskinfo *tip)
++{
++	unsigned long	flags;
++
++	((struct taskinfo *)tip->ch.next)->ch.prev = tip->ch.prev;
++	((struct taskinfo *)tip->ch.prev)->ch.next = tip->ch.next;
++
++	kfree(tip);
++
++	spin_lock_irqsave(&nrtlock, flags);
++	nrt--;
++	spin_unlock_irqrestore(&nrtlock, flags);
++}
++
++/*
++** remove one sockinfo struct for the hash bucket chain
++*/
++static void
++delete_sockinfo(struct sockinfo *sip)
++{
++	unsigned long	flags;
++
++	((struct sockinfo *)sip->ch.next)->ch.prev = sip->ch.prev;
++	((struct sockinfo *)sip->ch.prev)->ch.next = sip->ch.next;
++
++	kfree(sip);
++
++	spin_lock_irqsave(&nrslock, flags);
++	nrs--;
++	spin_unlock_irqrestore(&nrslock, flags);
++}
++
++/*
++** read function for /proc/netatop
++*/
++static int
++netatop_read_proc(char *buf, char **start, off_t offset,
++             	   int count, int *eof, void *data)
++{
++	return sprintf(buf, "tcpsndpacks:  %9lu (unident: %9lu)\n"
++	                    "tcprcvpacks:  %9lu (unident: %9lu)\n"
++	                    "udpsndpacks:  %9lu (unident: %9lu)\n"
++	                    "udprcvpacks:  %9lu (unident: %9lu)\n\n"
++	                    "icmpsndpacks: %9lu\n"
++	                    "icmprcvpacks: %9lu\n\n"
++	                    "#sockinfo:    %9lu (overflow: %8lu)\n"
++	                    "#taskinfo:    %9lu (overflow: %8lu)\n"
++	                    "#taskexit:    %9lu\n",
++				tcpsndpacks,  unidenttcpsndpacks,
++				tcprcvpacks,  unidenttcprcvpacks,
++				udpsndpacks,  unidentudpsndpacks,
++				udprcvpacks,  unidentudprcvpacks,
++				icmpsndpacks, icmprcvpacks,
++ 				nrs,          nrs_ovf,
++				nrt,          nrt_ovf,
++				nre);
++}
++
++/*
++** called when user spce issues system call getsockopt()
++*/
++static int
++getsockopt(struct sock *sk, int cmd, void __user *user, int *len)
++{
++	int			bt;
++	struct taskinfo		*tip;
++	char			tasktype = 't';
++	struct netpertask	npt;
++	unsigned long		tflags;
++
++	/*
++	** verify the proper privileges
++	*/
++	if (!capable(CAP_NET_ADMIN))
++		return -EPERM;
++
++	/*
++	** react on command
++	*/
++	switch (cmd) {
++ 	   case NETATOP_PROBE:
++		break;
++
++ 	   case NETATOP_FORCE_GC:
++		garbage_collector();
++		break;
++
++ 	   case NETATOP_EMPTY_EXIT:
++		while (nre > 0) {
++			if (wait_event_interruptible(exitlist_empty, nre == 0))
++				return -ERESTARTSYS;
++		}
++		break;
++
++ 	   case NETATOP_GETCNT_EXIT:
++		if (nre == 0)
++			wake_up_interruptible(&exitlist_empty);
++
++		if (*len < sizeof(pid_t))
++			return -EINVAL;
++
++		if (*len > sizeof npt)
++			*len = sizeof npt;
++
++		spin_lock_irqsave(&exitlock, tflags);
++
++		/*
++		** check if an exited process is present
++		** if not, wait for it...
++		*/
++		while (nre == 0) {
++			spin_unlock_irqrestore(&exitlock, tflags);
++
++			if ( wait_event_interruptible(exitlist_filled, nre > 0))
++				return -ERESTARTSYS;
++
++			spin_lock_irqsave(&exitlock, tflags);
++		}
++
++		/*
++		** get first eprocess from exitlist and remove it from there
++		*/
++		tip = exithead;
++
++		if ( (exithead = tip->ch.next) == NULL)
++			exittail = NULL;
++
++		nre--;
++
++		spin_unlock_irqrestore(&exitlock, tflags);
++
++		/*
++		** pass relevant info to user mode
++		** and free taskinfo struct
++		*/
++		npt.id		= tip->id;
++		npt.tc		= tip->tc;
++		npt.btime	= tip->btime;
++		memcpy(npt.command, tip->command, COMLEN);
++
++		if (copy_to_user(user, &npt, *len) != 0)
++			return -EFAULT;
++
++		kfree(tip);
++
++		return 0;
++
++ 	   case NETATOP_GETCNT_TGID:
++		tasktype = 'g';
++
++ 	   case NETATOP_GETCNT_PID:
++		if (*len < sizeof(pid_t))
++			return -EINVAL;
++
++		if (*len > sizeof npt)
++			*len = sizeof npt;
++
++		if (copy_from_user(&npt, user, *len) != 0)
++			return -EFAULT;
++
++		/*
++		** search requested id in taskinfo hash
++		*/
++		bt = THASH(npt.id, tasktype);	// calculate hash
++
++		if (thash[bt].ch.next == (void *)&thash[bt].ch)
++			return -ESRCH;		// quick return without lock
++
++		spin_lock_irqsave(&thash[bt].lock, tflags);
++
++		tip = thash[bt].ch.next;
++
++		while (tip != (void *)&thash[bt].ch) {
++			// is this the one?
++			if (tip->id == npt.id && tip->type == tasktype) {
++				/*
++				** found: copy results to user space
++				*/
++				memcpy(npt.command, tip->command, COMLEN);
++				npt.tc    = tip->tc;
++				npt.btime = tip->btime;
++
++				spin_unlock_irqrestore(&thash[bt].lock, tflags);
++
++				if (copy_to_user(user, &npt, *len) != 0)
++					return -EFAULT;
++				else
++					return 0;
++			}
++
++			tip = tip->ch.next;
++		}
++
++		spin_unlock_irqrestore(&thash[bt].lock, tflags);
++		return -ESRCH;
++
++	   default:
++		printk(KERN_INFO "unknown getsockopt command %d\n", cmd);
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++/*
++** called when module loaded
++*/
++int
++init_module()
++{
++	int i;
++
++	/*
++	** initialize various admi
++	*/
++	for (i=0; i < TBUCKS; i++) {
++		thash[i].ch.next = &thash[i].ch;
++		thash[i].ch.prev = &thash[i].ch;
++		spin_lock_init(&thash[i].lock);
++	}
++
++	for (i=0; i < SBUCKS; i++) {
++		shash[i].ch.next = &shash[i].ch;
++		shash[i].ch.prev = &shash[i].ch;
++		spin_lock_init(&shash[i].lock);
++	}
++
++	getboottime(&boottime);
++
++	/*
++	** register getsockopt for user space communication
++	*/
++	if (nf_register_sockopt(&sockopts) < 0)
++		return -1;
++
++	/*
++	** prepare hooks and register
++	*/
++	hookin_ipv4.hooknum	= NF_IP_LOCAL_IN;	// input packs
++	hookin_ipv4.hook	= ipv4_hookin;		// func to call
++	hookin_ipv4.pf		= PF_INET;		// IPV4 packets
++	hookin_ipv4.priority	= NF_IP_PRI_FIRST;	// highest prio
++
++	hookout_ipv4.hooknum	= NF_IP_LOCAL_OUT;	// output packs
++	hookout_ipv4.hook	= ipv4_hookout;		// func to call
++	hookout_ipv4.pf		= PF_INET;		// IPV4 packets
++	hookout_ipv4.priority	= NF_IP_PRI_FIRST;	// highest prio
++
++	nf_register_hook(&hookin_ipv4);			// register hook
++	nf_register_hook(&hookout_ipv4);		// register hook
++
++	/*
++	** create a /proc-entry to produce status-info on request
++	*/
++	create_proc_read_entry("netatop", 0444, NULL, netatop_read_proc, NULL);
++
++	/*
++	** activate timer for periodic call of garbage collector
++	*/
++	init_timer(&timer);
++
++	timer.expires  = jiffies_64 + HZ;
++	timer.function = gcperiodic;
++	add_timer(&timer);
++
++	return 0;		// return success
++}
++
++/*
++** called when module unloaded
++*/
++void
++cleanup_module()
++{
++	nf_unregister_hook(&hookin_ipv4);
++	nf_unregister_hook(&hookout_ipv4);
++
++	remove_proc_entry("netatop", NULL);
++
++	del_timer(&timer);
++
++	nf_unregister_sockopt(&sockopts);
++
++	/*
++	** destroy allocated stats
++	*/
++	wipesockinfo();
++	wipetaskinfo();	
++	wipetaskexit();	
++}
+diff -uNr linux-3.2.33-go.orig/3rdparty/netatop/netatop.h 3rdparty/netatop/netatop.h
+--- linux-3.2.33-go.orig/3rdparty/netatop/netatop.h	1970-01-01 01:00:00.000000000 +0100
++++ 3rdparty/netatop/netatop.h	2012-11-12 18:08:29.000000000 +0100
+@@ -0,0 +1,47 @@
++#define	COMLEN	16
++
++struct taskcount {
++	unsigned long long	tcpsndpacks;
++	unsigned long long	tcpsndbytes;
++	unsigned long long	tcprcvpacks;
++	unsigned long long	tcprcvbytes;
++
++	unsigned long long	udpsndpacks;
++	unsigned long long	udpsndbytes;
++	unsigned long long	udprcvpacks;
++	unsigned long long	udprcvbytes;
++
++	/* space for future extensions */
++};
++
++struct netpertask {
++	pid_t			id;	// tgid or tid (depending on command)
++	unsigned long		btime;
++	char			command[COMLEN];
++
++	struct taskcount	tc;
++};
++
++
++/*
++** getsocktop commands
++*/
++#define NETATOP_BASE_CTL   	15661
++
++// just probe if the netatop module is active
++#define NETATOP_PROBE		(NETATOP_BASE_CTL)
++
++// force garbage collection to make finished processes available
++#define NETATOP_FORCE_GC	(NETATOP_BASE_CTL+1)
++
++// wait until all finished processes are read (blocks until done)
++#define NETATOP_EMPTY_EXIT	(NETATOP_BASE_CTL+2)
++
++// get info for finished process (blocks until available)
++#define NETATOP_GETCNT_EXIT	(NETATOP_BASE_CTL+3)
++
++// get counters for thread group (i.e. process):  input is 'id' (pid)
++#define NETATOP_GETCNT_TGID	(NETATOP_BASE_CTL+4)
++
++// get counters for thread:  input is 'id' (tid)
++#define NETATOP_GETCNT_PID 	(NETATOP_BASE_CTL+5)
+diff -uNr linux-3.2.33-go.orig/3rdparty/netatop/netatopversion.h 3rdparty/netatop/netatopversion.h
+--- linux-3.2.33-go.orig/3rdparty/netatop/netatopversion.h	1970-01-01 01:00:00.000000000 +0100
++++ 3rdparty/netatop/netatopversion.h	2012-11-12 18:08:29.000000000 +0100
+@@ -0,0 +1,2 @@
++#define	NETATOPVERSION	"0.1.1"
++#define	NETATOPDATE	"2012/11/12 18:08:23"
diff --git a/3.2.34/910-kobject_uevent.patch b/3.2.34/910-kobject_uevent.patch
new file mode 100644
index 0000000..aa9a40f
--- /dev/null
+++ b/3.2.34/910-kobject_uevent.patch
@@ -0,0 +1,21 @@
+--- a/lib/kobject_uevent.c
++++ b/lib/kobject_uevent.c
+@@ -50,6 +50,18 @@ static const char *kobject_actions[] = {
+ 	[KOBJ_OFFLINE] =	"offline",
+ };
+ 
++u64 uevent_next_seqnum(void)
++{
++	u64 seq;
++
++	mutex_lock(&uevent_sock_mutex);
++	seq = ++uevent_seqnum;
++	mutex_unlock(&uevent_sock_mutex);
++
++	return seq;
++}
++EXPORT_SYMBOL_GPL(uevent_next_seqnum);
++
+ /**
+  * kobject_action_type - translate action string to numeric type
+  *
diff --git a/3.2.34/911-kobject_add_broadcast_uevent.patch b/3.2.34/911-kobject_add_broadcast_uevent.patch
new file mode 100644
index 0000000..104df13
--- /dev/null
+++ b/3.2.34/911-kobject_add_broadcast_uevent.patch
@@ -0,0 +1,85 @@
+--- a/include/linux/kobject.h
++++ b/include/linux/kobject.h
+@@ -31,6 +31,8 @@
+ #define UEVENT_NUM_ENVP			32	/* number of env pointers */
+ #define UEVENT_BUFFER_SIZE		2048	/* buffer for the variables */
+ 
++struct sk_buff;
++
+ /* path to the userspace helper executed on an event */
+ extern char uevent_helper[];
+ 
+@@ -213,6 +215,10 @@ int add_uevent_var(struct kobj_uevent_en
+ 
+ int kobject_action_type(const char *buf, size_t count,
+ 			enum kobject_action *type);
++
++int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group,
++		     gfp_t allocation);
++
+ #else
+ static inline int kobject_uevent(struct kobject *kobj,
+ 				 enum kobject_action action)
+@@ -229,6 +235,16 @@ int add_uevent_var(struct kobj_uevent_en
+ static inline int kobject_action_type(const char *buf, size_t count,
+ 				      enum kobject_action *type)
+ { return -EINVAL; }
++
++void kfree_skb(struct sk_buff *);
++
++static inline int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group,
++				   gfp_t allocation)
++{
++	kfree_skb(skb);
++	return 0;
++}
++
+ #endif
+ 
+ #endif /* _KOBJECT_H_ */
+--- a/lib/kobject_uevent.c
++++ b/lib/kobject_uevent.c
+@@ -381,6 +381,43 @@ int add_uevent_var(struct kobj_uevent_en
+ EXPORT_SYMBOL_GPL(add_uevent_var);
+ 
+ #if defined(CONFIG_NET)
++int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group,
++		     gfp_t allocation)
++{
++	struct uevent_sock *ue_sk;
++	int err = 0;
++
++	/* send netlink message */
++	mutex_lock(&uevent_sock_mutex);
++	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
++		struct sock *uevent_sock = ue_sk->sk;
++		struct sk_buff *skb2;
++
++		skb2 = skb_clone(skb, allocation);
++		if (!skb2)
++			break;
++
++		err = netlink_broadcast(uevent_sock, skb2, pid, group,
++					allocation);
++		if (err)
++			break;
++	}
++	mutex_unlock(&uevent_sock_mutex);
++
++	kfree_skb(skb);
++	return err;
++}
++#else
++int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group,
++		     gfp_t allocation)
++{
++	kfree_skb(skb);
++	return 0;
++}
++#endif
++EXPORT_SYMBOL_GPL(broadcast_uevent);
++
++#if defined(CONFIG_NET)
+ static int uevent_net_init(struct net *net)
+ {
+ 	struct uevent_sock *ue_sk;
diff --git a/3.2.34/Add_CONFIG_VFAT_FS_DUALNAMES_option.patch b/3.2.34/Add_CONFIG_VFAT_FS_DUALNAMES_option.patch
new file mode 100644
index 0000000..5e3cfe1
--- /dev/null
+++ b/3.2.34/Add_CONFIG_VFAT_FS_DUALNAMES_option.patch
@@ -0,0 +1,145 @@
+diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig
+index 182f9ff..907a5de 100644
+--- a/fs/fat/Kconfig
++++ b/fs/fat/Kconfig
+@@ -74,6 +74,26 @@ config VFAT_FS
+ 	  To compile this as a module, choose M here: the module will be called
+ 	  vfat.
+ 
++config VFAT_FS_DUALNAMES
++	bool "VFAT dual names support"
++	depends on VFAT_FS
++	help
++	  This option provides support for dual filenames on VFAT filesystems.
++	  If this option is disabled then file creation will either put
++	  a short (8.3) name or a long name on the file, but never both.
++	  The field where a shortname would normally go is filled with
++	  invalid characters such that it cannot be considered a valid
++	  short filename.
++
++	  That means that long filenames created with this option
++	  disabled will not be accessible at all to operating systems
++	  that do not understand the VFAT extensions.
++
++	  Users considering enabling this option should consider the implications
++	  of any patents that may exist on dual filenames in VFAT.
++
++	  If unsure, say N
++
+ config FAT_DEFAULT_CODEPAGE
+ 	int "Default codepage for FAT"
+ 	depends on MSDOS_FS || VFAT_FS
+diff --git a/fs/fat/dir.c b/fs/fat/dir.c
+index 38ff75a..cd5d3ec 100644
+--- a/fs/fat/dir.c
++++ b/fs/fat/dir.c
+@@ -415,14 +415,13 @@
+ 			}
+ 			i += chl;
+ 		}
+-		if (!last_u)
+-			continue;
+-
+-		/* Compare shortname */
+-		bufuname[last_u] = 0x0000;
+-		len = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
+-		if (fat_name_match(sbi, name, name_len, bufname, len))
+-			goto found;
++		if (last_u) {
++			/* Compare shortname */
++			bufuname[last_u] = 0x0000;
++			len = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
++			if (fat_name_match(sbi, name, name_len, bufname, len))
++				goto found;
++		}
+ 
+ 		if (nr_slots) {
+ 			void *longname = unicode + FAT_MAX_UNI_CHARS;
+diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
+index 73471b7..894f44d 100644
+--- a/fs/fat/namei_vfat.c
++++ b/fs/fat/namei_vfat.c
+@@ -22,6 +22,7 @@
+ #include <linux/smp_lock.h>
+ #include <linux/buffer_head.h>
+ #include <linux/namei.h>
++#include <linux/random.h>
+ #include "fat.h"
+ 
+ /*
+@@ -586,6 +587,59 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
+ 	return 0;
+ }
+ 
++#ifndef CONFIG_VFAT_FS_DUALNAMES
++/*
++ * build a 11 byte 8.3 buffer which is not a short filename. We want 11
++ * bytes which:
++ *    - will be seen as a constant string to all APIs on Linux and Windows
++ *    - cannot be matched with wildcard patterns
++ *    - cannot be used to access the file
++ *    - has a low probability of collision within a directory
++ *    - has an invalid 3 byte extension
++ *    - contains at least one non-space and non-nul byte
++ */
++static void vfat_build_dummy_83_buffer(struct inode *dir, char *msdos_name)
++{
++	u32 rand_num = random32() & 0x3FFFFFFF;
++	int i;
++
++	/* a value of zero would leave us with only nul and spaces,
++	 * which would not work with older linux systems
++	 */
++	if (rand_num == 0)
++		rand_num = 1;
++
++	/* we start with a space followed by nul as spaces at the
++	 * start of an entry are trimmed in FAT, which means that
++	 * starting the 11 bytes with 0x20 0x00 gives us a value which
++	 * cannot be used to access the file. It also means that the
++	 * value as seen from all Windows and Linux APIs is a constant
++	 */
++	msdos_name[0]  = ' ';
++	msdos_name[1]  = 0;
++
++	/* we use / and 2 nul bytes for the extension. These are
++	 * invalid in FAT and mean that utilities that show the
++	 * directory show no extension, but still work via the long
++	 * name for old Linux kernels
++	 */
++	msdos_name[8]  = '/';
++	msdos_name[9]  = 0;
++	msdos_name[10] = 0;
++
++	/*
++	 * fill the remaining 6 bytes with random invalid values
++	 * This gives us a low collision rate, which means a low
++	 * chance of problems with chkdsk.exe and WindowsXP
++	 */
++	for (i = 2; i < 8; i++) {
++		msdos_name[i] = rand_num & 0x1F;
++		rand_num >>= 5;
++	}
++}
++#endif
++
++
+ static int vfat_build_slots(struct inode *dir, const unsigned char *name,
+ 			    int len, int is_dir, int cluster,
+ 			    struct timespec *ts,
+@@ -628,6 +682,11 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name,
+ 		goto shortname;
+ 	}
+ 
++#ifndef CONFIG_VFAT_FS_DUALNAMES
++	vfat_build_dummy_83_buffer(dir, msdos_name);
++	lcase = 0;
++#endif
++
+ 	/* build the entry of long file name */
+ 	cksum = fat_checksum(msdos_name);
+ 
+-- 
+1.6.0.4
+
+  
diff --git a/3.2.34/accessfs-3.2-0.26.patch b/3.2.34/accessfs-3.2-0.26.patch
new file mode 100644
index 0000000..f36e634
--- /dev/null
+++ b/3.2.34/accessfs-3.2-0.26.patch
@@ -0,0 +1,1036 @@
+diff --git a/Documentation/filesystems/accessfs.txt b/Documentation/filesystems/accessfs.txt
+new file mode 100644
+index 0000000..bf135b5
+--- /dev/null
++++ b/Documentation/filesystems/accessfs.txt
+@@ -0,0 +1,41 @@
++Accessfs is a permission managing filesystem. It allows to control access to
++system resources, based on file permissions. The recommended mount point for
++this file-system is /proc/access, which will appear automatically in the
++/proc filesystem.
++
++Currently there are two modules using accessfs, userports and usercaps.
++
++With userports, you will be able to control access to IP ports based
++on user-/groupid.
++
++There's no need anymore to run internet daemons as root. You can
++individually configure which user/program can bind to protected ports
++(by default, below 1024).
++
++For example, you can say, user www is allowed to bind to port 80 or
++user mail is allowed to bind to port 25. Then, you can run apache as
++user www and sendmail as user mail. Now, you don't have to rely on
++apache or sendmail giving up superuser rights to enhance security.
++
++To use this option, you need to mount the access file system
++and do a chown on the appropriate ports:
++
++# mount -t accessfs none /proc/access
++# chown www /proc/access/net/ip/bind/80
++# chown mail /proc/access/net/ip/bind/25
++
++You can grant access to a group for individual ports as well. Just say:
++
++# chgrp lp /proc/access/net/ip/bind/515
++# chown g+x /proc/access/net/ip/bind/515
++
++With usercaps, you will be able to grant capabilities based on
++user-/groupid (root by default).
++
++For example you can create a group raw and change the capability
++net_raw to this group:
++
++# chgrp raw /proc/access/capabilities/net_raw
++# chmod ug+x /proc/access/capabilities/net_raw
++# chgrp raw /sbin/ping
++# chmod u-s /sbin/ping; chmod g+s /sbin/ping
+diff --git a/fs/Kconfig b/fs/Kconfig
+index 5f4c45d..24f7348 100644
+--- a/fs/Kconfig
++++ b/fs/Kconfig
+@@ -210,6 +210,7 @@
+ # UBIFS File system configuration
+ source "fs/ubifs/Kconfig"
+ source "fs/logfs/Kconfig"
++source "fs/accessfs/Kconfig"
+ source "fs/cramfs/Kconfig"
+ source "fs/squashfs/Kconfig"
+ source "fs/freevxfs/Kconfig"
+diff --git a/fs/Makefile b/fs/Makefile
+index d2c3353..fea1cfc 100644
+--- a/fs/Makefile
++++ b/fs/Makefile
+@@ -121,5 +121,6 @@ obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
+ obj-$(CONFIG_BTRFS_FS)		+= btrfs/
+ obj-$(CONFIG_GFS2_FS)           += gfs2/
+ obj-y				+= exofs/ # Multiple modules
++obj-$(CONFIG_ACCESS_FS)		+= accessfs/
+ obj-$(CONFIG_CEPH_FS)		+= ceph/
+ obj-$(CONFIG_PSTORE)		+= pstore/
+diff --git a/fs/accessfs/Kconfig b/fs/accessfs/Kconfig
+new file mode 100644
+index 0000000..539d6e9
+--- /dev/null
++++ b/fs/accessfs/Kconfig
+@@ -0,0 +1,61 @@
++config ACCESS_FS
++	tristate "Accessfs support (Experimental)"
++	depends on EXPERIMENTAL
++	default n
++	help
++	  This is a new file system to manage permissions. It is not very
++	  useful on its own. You need to enable other options below.
++
++	  If you're unsure, say N.
++
++config ACCESSFS_USER_PORTS
++	tristate "User permission based IP ports"
++	depends on ACCESS_FS && INET
++	select NET_HOOKS
++	default n
++	help
++	  If you say Y here, you will be able to control access to IP ports
++	  based on user-/groupid.
++
++	  If you're unsure, say N.
++
++config ACCESSFS_PROT_SOCK
++	int "Range of protected ports (1024-65536)"
++	depends on ACCESSFS_USER_PORTS
++	default 1024
++	help
++	  Here you can extend the range of protected ports. This is
++	  from 1-1023 inclusive on normal unix systems. One use for this
++	  could be to reserve ports for X11 (port 6000) or database
++	  servers (port 3306 for mysql), so nobody else could grab this port.
++	  The default permission for extended ports is --x--x--x.
++
++	  If you build this as a module, you can specify the range of
++	  protected ports at module load time (max_prot_sock).
++
++	  If you're unsure, say 1024.
++
++config ACCESSFS_IGNORE_NET_BIND_SERVICE
++	bool "Ignore CAP_NET_BIND_SERVICE capability"
++	depends on ACCESSFS_USER_PORTS
++	default n
++	help
++	  This option lets you decide, wether a user with
++	  CAP_NET_BIND_SERVICE capability is able to override
++	  your userport configuration.
++
++	  If you build this as a module, you can specify this
++	  option at module load time (ignore_net_bind_service).
++
++	  If you're unsure, say n.
++
++config ACCESSFS_USER_CAPABILITIES
++	bool "User permission based capabilities"
++	depends on ACCESS_FS = y
++	select SECURITY
++	default n
++	help
++	  If you say Y here, you will be able to grant capabilities based on
++	  user-/groupid (root by default).
++
++	  If you're unsure, say N.
+diff --git a/fs/accessfs/Makefile b/fs/accessfs/Makefile
+new file mode 100644
+index 0000000..63a5647
+--- /dev/null
++++ b/fs/accessfs/Makefile
+@@ -0,0 +1,11 @@
++#
++# Makefile for the linux accessfs routines.
++#
++
++obj-$(CONFIG_ACCESS_FS) += accessfs.o
++obj-$(CONFIG_ACCESSFS_USER_CAPABILITIES) += usercaps.o
++obj-$(CONFIG_ACCESSFS_USER_PORTS) += userports.o
++
++accessfs-objs := inode.o
++usercaps-objs := capabilities.o
++userports-objs := ip.o
+diff --git a/fs/accessfs/capabilities.c b/fs/accessfs/capabilities.c
+new file mode 100644
+index 0000000..1c43f36
+--- /dev/null
++++ b/fs/accessfs/capabilities.c
+@@ -0,0 +1,109 @@
++/* Copyright (c) 2002-2006 Olaf Dietsche
++ *
++ * User based capabilities for Linux.
++ */
++
++#include <linux/accessfs_fs.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/security.h>
++
++/* perl -n -e 'print "\"", lc($1), "\",\n" if (m/^#define\s+CAP_(.+?)\s+\d+$/);' include/linux/capability.h */
++static const char *names[] = {
++	"chown",
++	"dac_override",
++	"dac_read_search",
++	"fowner",
++	"fsetid",
++	"kill",
++	"setgid",
++	"setuid",
++	"setpcap",
++	"linux_immutable",
++	"net_bind_service",
++	"net_broadcast",
++	"net_admin",
++	"net_raw",
++	"ipc_lock",
++	"ipc_owner",
++	"sys_module",
++	"sys_rawio",
++	"sys_chroot",
++	"sys_ptrace",
++	"sys_pacct",
++	"sys_admin",
++	"sys_boot",
++	"sys_nice",
++	"sys_resource",
++	"sys_time",
++	"sys_tty_config",
++	"mknod",
++	"lease",
++	"audit_write",
++	"audit_control",
++	"setfcap",
++	"mac_override",
++	"mac_admin",
++	"syslog",
++	"wake_alarm",
++};
++
++static struct access_attr caps[ARRAY_SIZE(names)];
++
++static int accessfs_capable(struct task_struct *tsk, const struct cred *cred, struct user_namespace *ns, int cap, int audit)
++{
++	if (accessfs_permitted(&caps[cap], MAY_EXEC)) {
++		/* capability granted */
++		return 0;
++	}
++
++	/* capability denied */
++	return -EPERM;
++}
++
++static struct security_operations accessfs_security_ops = {
++	.name = "usercaps",
++	.capable =	accessfs_capable,
++};
++
++static void unregister_capabilities(struct accessfs_direntry *dir, int n)
++{
++	int	i;
++	for (i = 0; i < n; ++i)
++		accessfs_unregister(dir, names[i]);
++}
++
++static int __init init_capabilities(void)
++{
++	struct accessfs_direntry *dir;
++	int i, err;
++	dir = accessfs_make_dirpath("capabilities");
++	if (dir == 0)
++		return -ENOTDIR;
++
++	for (i = 0; i < ARRAY_SIZE(caps); ++i) {
++		caps[i].uid = 0;
++		caps[i].gid = 0;
++		caps[i].mode = S_IXUSR;
++		err = accessfs_register(dir, names[i], &caps[i]);
++		if (err) {
++			unregister_capabilities(dir, i);
++			return err;
++		}
++	}
++
++	if (!security_module_enable(&accessfs_security_ops))
++		return -EAGAIN;
++
++	err = register_security(&accessfs_security_ops);
++	if (err != 0)
++		unregister_capabilities(dir, ARRAY_SIZE(names));
++
++	return err;
++}
++
++security_initcall(init_capabilities);
++
++MODULE_AUTHOR("Olaf Dietsche");
++MODULE_DESCRIPTION("User based capabilities");
++MODULE_LICENSE("GPL v2");
+diff --git a/fs/accessfs/inode.c b/fs/accessfs/inode.c
+new file mode 100644
+index 0000000..a2247e2
+--- /dev/null
++++ b/fs/accessfs/inode.c
+@@ -0,0 +1,431 @@
++/* Copyright (c) 2001-2006 Olaf Dietsche
++ *
++ * Access permission filesystem for Linux.
++ *
++ * 2002 Ben Clifford, create mount point at /proc/access
++ * 2002 Ben Clifford, trying to make it work under 2.5.5-dj2
++ *          (see comments: BENC255 for reminders and todos)
++ *
++ *
++ * BENC255: the kernel doesn't lock BKL for us when entering methods
++ *          (see Documentation/fs/porting.txt)
++ *          Need to look at code here and see if we need either the BKL
++ *          or our own lock - I think probably not.
++ *
++ */
++
++#include <linux/accessfs_fs.h>
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/pagemap.h>
++#include <linux/init.h>
++#include <linux/semaphore.h>
++#include <linux/slab.h>
++#include <linux/string.h>
++#include <linux/proc_fs.h>
++#include <asm/statfs.h>
++#include <asm/uaccess.h>
++
++#define ACCESSFS_MAGIC	0x3c1d36e7
++
++static struct proc_dir_entry *mountdir = NULL;
++
++static DEFINE_MUTEX(accessfs_sem);
++
++static struct inode_operations accessfs_inode_operations;
++static struct file_operations accessfs_dir_file_operations;
++static struct inode_operations accessfs_dir_inode_operations;
++
++static inline void accessfs_readdir_aux(struct file *filp,
++					struct accessfs_direntry *dir,
++					int start, void *dirent,
++					filldir_t filldir)
++{
++	struct list_head *list;
++	int i = 2;
++	list_for_each(list, &dir->children) {
++		struct accessfs_entry *de;
++		if (i++ < start)
++			continue;
++
++		de = list_entry(list, struct accessfs_entry, siblings);
++		if (filldir(dirent, de->name, strlen(de->name), filp->f_pos,
++			    de->ino, DT_UNKNOWN) < 0)
++			break;
++
++		++filp->f_pos;
++	}
++}
++
++static int accessfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
++{
++	int i;
++	struct dentry *dentry = filp->f_dentry;
++	struct accessfs_direntry *dir;
++
++	i = filp->f_pos;
++	switch (i) {
++	case 0:
++		if (filldir(dirent, ".", 1, i, dentry->d_inode->i_ino,
++			    DT_DIR) < 0)
++			break;
++
++		++i;
++		++filp->f_pos;
++		/* NO break; */
++	case 1:
++		if (filldir(dirent, "..", 2, i,
++			    dentry->d_parent->d_inode->i_ino, DT_DIR) < 0)
++			break;
++
++		++i;
++		++filp->f_pos;
++		/* NO break; */
++	default:
++		mutex_lock(&accessfs_sem);
++		dir = dentry->d_inode->i_private;
++		accessfs_readdir_aux(filp, dir, i, dirent, filldir);
++		mutex_unlock(&accessfs_sem);
++		break;
++	}
++
++	return 0;
++}
++
++static struct accessfs_entry *accessfs_lookup_entry(struct accessfs_entry *pe,
++						    const char *name, int len)
++{
++	struct list_head *list;
++	struct accessfs_direntry *dir;
++	if (!S_ISDIR(pe->attr->mode))
++		return NULL;
++
++	dir = (struct accessfs_direntry *) pe;
++	list_for_each(list, &dir->children) {
++		struct accessfs_entry *de = list_entry(list, struct accessfs_entry, siblings);
++		if (strncmp(de->name, name, len) == 0 && de->name[len] == 0)
++			return de;
++	}
++
++	return NULL;
++}
++
++static struct accessfs_direntry	accessfs_rootdir = {
++	{ "/",
++	  LIST_HEAD_INIT(accessfs_rootdir.node.hash),
++	  LIST_HEAD_INIT(accessfs_rootdir.node.siblings),
++	  1, &accessfs_rootdir.attr },
++	NULL, LIST_HEAD_INIT(accessfs_rootdir.children),
++	{ 0, 0, S_IFDIR | 0755 }
++};
++
++static void accessfs_init_inode(struct inode *inode, struct accessfs_entry *pe)
++{
++	static const struct timespec epoch = {0, 0};
++	inode->i_private = pe;
++	inode->i_uid = pe->attr->uid;
++	inode->i_gid = pe->attr->gid;
++	inode->i_mode = pe->attr->mode;
++/*
++	inode->i_blksize = PAGE_CACHE_SIZE;
++	inode->i_blocks = 0;
++	inode->i_rdev = NODEV;
++*/
++	inode->i_atime = inode->i_mtime = inode->i_ctime = epoch;
++	switch (inode->i_mode & S_IFMT) {
++	case S_IFREG:
++		inode->i_op = &accessfs_inode_operations;
++		break;
++	case S_IFDIR:
++		inode->i_op = &accessfs_dir_inode_operations;
++		inode->i_fop = &accessfs_dir_file_operations;
++		break;
++	default:
++		BUG();
++		break;
++	}
++}
++
++static struct inode *accessfs_get_root_inode(struct super_block *sb)
++{
++	struct inode *inode = new_inode(sb);
++	if (inode) {
++		mutex_lock(&accessfs_sem);
++/* 		inode->i_ino = accessfs_rootdir.node.ino; */
++		accessfs_init_inode(inode, &accessfs_rootdir.node);
++		accessfs_rootdir.node.ino = inode->i_ino;
++		mutex_unlock(&accessfs_sem);
++	}
++
++	return inode;
++}
++
++static LIST_HEAD(hash);
++
++static int accessfs_node_init(struct accessfs_direntry *parent,
++			      struct accessfs_entry *de, const char *name,
++			      size_t len, struct access_attr *attr, mode_t mode)
++{
++	static unsigned long ino = 1;
++	de->name = kmalloc(len + 1, GFP_KERNEL);
++	if (de->name == NULL)
++		return -ENOMEM;
++
++	strncpy(de->name, name, len);
++	de->name[len] = 0;
++	de->ino = ++ino;
++	de->attr = attr;
++	de->attr->uid = 0;
++	de->attr->gid = 0;
++	de->attr->mode = mode;
++
++	list_add_tail(&de->hash, &hash);
++	list_add_tail(&de->siblings, &parent->children);
++	return 0;
++}
++
++static int accessfs_mknod(struct accessfs_direntry *dir, const char *name,
++			  struct access_attr *attr)
++{
++	struct accessfs_entry *pe;
++	pe = kmalloc(sizeof(struct accessfs_entry), GFP_KERNEL);
++	if (pe == NULL)
++		return -ENOMEM;
++
++	accessfs_node_init(dir, pe, name, strlen(name), attr,
++			   S_IFREG | attr->mode);
++	return 0;
++}
++
++static struct accessfs_direntry	*accessfs_mkdir(struct accessfs_direntry *parent,
++						const char *name, size_t len)
++{
++	int err;
++	struct accessfs_direntry *dir;
++	dir = kmalloc(sizeof(struct accessfs_direntry), GFP_KERNEL);
++	if (dir == NULL)
++		return NULL;
++
++	dir->parent = parent;
++	INIT_LIST_HEAD(&dir->children);
++	err = accessfs_node_init(parent, &dir->node, name, len, &dir->attr,
++				 S_IFDIR | 0755);
++	if (err) {
++		kfree(dir);
++		dir = 0;
++	}
++
++	return dir;
++}
++
++struct accessfs_direntry *accessfs_make_dirpath(const char *name)
++{
++	struct accessfs_direntry *dir = &accessfs_rootdir;
++	const char *slash;
++	mutex_lock(&accessfs_sem);
++	do {
++		struct accessfs_entry *de;
++		size_t len;
++		while (*name == '/')
++			++name;
++
++		slash = strchr(name, '/');
++		len = slash ? slash - name : strlen(name);
++		de = accessfs_lookup_entry(&dir->node, name, len);
++		if (de == NULL) {
++			dir = accessfs_mkdir(dir, name, len);
++		} else if (S_ISDIR(de->attr->mode)) {
++			dir = (struct accessfs_direntry *) de;
++		} else {
++			dir = NULL;
++		}
++
++		if (dir == NULL)
++			break;
++
++		name = slash  + 1;
++	} while (slash != NULL);
++
++	mutex_unlock(&accessfs_sem);
++	return dir;
++}
++
++static void accessfs_unlink(struct accessfs_entry *pe)
++{
++	list_del_init(&pe->hash);
++	list_del_init(&pe->siblings);
++	kfree(pe->name);
++	kfree(pe);
++}
++
++static int accessfs_notify_change(struct dentry *dentry, struct iattr *iattr)
++{
++	struct accessfs_entry *pe;
++	struct inode *i = dentry->d_inode;
++	int err;
++	err = inode_change_ok(i, iattr);
++	if (err)
++		return err;
++
++	setattr_copy(i, iattr);
++
++	pe = (struct accessfs_entry *) i->i_private;
++	pe->attr->uid = i->i_uid;
++	pe->attr->gid = i->i_gid;
++	pe->attr->mode = i->i_mode;
++	return 0;
++}
++
++static struct inode *accessfs_iget(struct super_block *sb, unsigned long ino)
++{
++	struct list_head *list;
++	struct inode *inode = iget_locked(sb, ino);
++	if (!inode)
++		return ERR_PTR(-ENOMEM);
++
++	if (!(inode->i_state & I_NEW))
++		return inode;
++
++	mutex_lock(&accessfs_sem);
++	list_for_each(list, &hash) {
++		struct accessfs_entry *pe;
++		pe = list_entry(list, struct accessfs_entry, hash);
++		if (pe->ino == ino) {
++			accessfs_init_inode(inode, pe);
++			break;
++		}
++	}
++
++	mutex_unlock(&accessfs_sem);
++	return inode;
++}
++
++static struct dentry *accessfs_lookup(struct inode *dir, struct dentry *dentry,
++				      struct nameidata *nd)
++{
++	struct inode *inode = NULL;
++	struct accessfs_entry *pe;
++	mutex_lock(&accessfs_sem);
++	pe = accessfs_lookup_entry(dir->i_private, dentry->d_name.name,
++				   dentry->d_name.len);
++	mutex_unlock(&accessfs_sem);
++	if (pe)
++		inode = accessfs_iget(dir->i_sb, pe->ino);
++
++	d_add(dentry, inode);
++	return NULL;
++}
++
++static struct inode_operations accessfs_inode_operations = {
++	.setattr =	accessfs_notify_change,
++};
++
++static struct inode_operations accessfs_dir_inode_operations = {
++	.lookup =	accessfs_lookup,
++	.setattr =	accessfs_notify_change,
++};
++
++static struct file_operations accessfs_dir_file_operations = {
++	.readdir =	accessfs_readdir,
++};
++
++static struct super_operations accessfs_ops = {
++	.statfs =	simple_statfs,
++};
++
++static int accessfs_fill_super(struct super_block *sb, void *data, int silent)
++{
++	struct inode *inode;
++	struct dentry *root;
++
++	sb->s_blocksize = PAGE_CACHE_SIZE;
++	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
++	sb->s_magic = ACCESSFS_MAGIC;
++	sb->s_op = &accessfs_ops;
++	inode = accessfs_get_root_inode(sb);
++	if (!inode)
++		return -ENOMEM;
++
++	root = d_alloc_root(inode);
++	if (!root) {
++		iput(inode);
++		return -ENOMEM;
++	}
++
++	sb->s_root = root;
++	return 0;
++}
++
++static struct dentry *accessfs_mount(struct file_system_type *fs_type,
++			   int flags, const char *dev_name, void *data)
++{
++	return mount_single(fs_type, flags, data, accessfs_fill_super);
++}
++
++int accessfs_permitted(struct access_attr *p, int mask)
++{
++	mode_t mode = p->mode;
++	if (current_fsuid() == p->uid)
++		mode >>= 6;
++	else if (in_group_p(p->gid))
++		mode >>= 3;
++
++	return (mode & mask) == mask;
++}
++
++int accessfs_register(struct accessfs_direntry *dir, const char *name,
++		      struct access_attr *attr)
++{
++	int err;
++	if (dir == 0)
++		return -EINVAL;
++
++	mutex_lock(&accessfs_sem);
++	err = accessfs_mknod(dir, name, attr);
++	mutex_unlock(&accessfs_sem);
++	return err;
++}
++
++void accessfs_unregister(struct accessfs_direntry *dir, const char *name)
++{
++	struct accessfs_entry *pe;
++	mutex_lock(&accessfs_sem);
++	pe = accessfs_lookup_entry(&dir->node, name, strlen(name));
++	if (pe)
++		accessfs_unlink(pe);
++
++	mutex_unlock(&accessfs_sem);
++}
++
++static struct file_system_type accessfs_fs_type = {
++	.owner =	THIS_MODULE,
++	.name =		"accessfs",
++	.mount =	accessfs_mount,
++	.kill_sb =	kill_anon_super,
++};
++
++static int __init init_accessfs_fs(void)
++{
++
++	/* create mount point for accessfs */
++	mountdir = proc_mkdir("access", NULL);
++	return register_filesystem(&accessfs_fs_type);
++}
++
++static void __exit exit_accessfs_fs(void)
++{
++	unregister_filesystem(&accessfs_fs_type);
++	remove_proc_entry("access", NULL);
++}
++
++module_init(init_accessfs_fs)
++module_exit(exit_accessfs_fs)
++
++MODULE_AUTHOR("Olaf Dietsche");
++MODULE_DESCRIPTION("Access Filesystem");
++MODULE_LICENSE("GPL v2");
++
++EXPORT_SYMBOL(accessfs_permitted);
++EXPORT_SYMBOL(accessfs_make_dirpath);
++EXPORT_SYMBOL(accessfs_register);
++EXPORT_SYMBOL(accessfs_unregister);
+diff --git a/fs/accessfs/ip.c b/fs/accessfs/ip.c
+new file mode 100644
+index 0000000..bddd2f0
+--- /dev/null
++++ b/fs/accessfs/ip.c
+@@ -0,0 +1,101 @@
++/* Copyright (c) 2002-2006 Olaf Dietsche
++ *
++ * User permission based port access for Linux.
++ */
++
++#include <linux/accessfs_fs.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <net/sock.h>
++
++static int max_prot_sock = CONFIG_ACCESSFS_PROT_SOCK;
++#ifndef CONFIG_ACCESSFS_IGNORE_NET_BIND_SERVICE
++#define CONFIG_ACCESSFS_IGNORE_NET_BIND_SERVICE 0
++#endif
++static int ignore_net_bind_service = CONFIG_ACCESSFS_IGNORE_NET_BIND_SERVICE;
++static struct access_attr *bind_to_port;
++
++static int accessfs_ip_prot_sock(struct socket *sock,
++				 struct sockaddr *uaddr, int addr_len)
++{
++	struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
++	unsigned short snum = ntohs(addr->sin_port);
++	if (snum && snum < max_prot_sock
++	    && !accessfs_permitted(&bind_to_port[snum], MAY_EXEC)
++	    && (ignore_net_bind_service || !capable(CAP_NET_BIND_SERVICE)))
++		return -EACCES;
++
++	return 0;
++}
++
++static int accessfs_ip6_prot_sock(struct socket *sock,
++				  struct sockaddr *uaddr, int addr_len)
++{
++	struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
++	unsigned short snum = ntohs(addr->sin6_port);
++	if (snum && snum < max_prot_sock
++	    && !accessfs_permitted(&bind_to_port[snum], MAY_EXEC)
++	    && !capable(CAP_NET_BIND_SERVICE))
++		return -EACCES;
++
++	return 0;
++}
++
++static struct net_hook_operations ip_net_ops = {
++	.ip_prot_sock =	accessfs_ip_prot_sock,
++	.ip6_prot_sock =	accessfs_ip6_prot_sock,
++};
++
++static int __init init_ip(void)
++{
++	struct accessfs_direntry *dir = accessfs_make_dirpath("net/ip/bind");
++	int i;
++
++	if (max_prot_sock < PROT_SOCK)
++		max_prot_sock = PROT_SOCK;
++	else if (max_prot_sock > 65536)
++		max_prot_sock = 65536;
++
++	bind_to_port = kmalloc(max_prot_sock * sizeof(*bind_to_port),
++			       GFP_KERNEL);
++	if (bind_to_port == 0)
++		return -ENOMEM;
++
++	for (i = 1; i < max_prot_sock; ++i) {
++		char	buf[sizeof("65536")];
++		bind_to_port[i].uid = 0;
++		bind_to_port[i].gid = 0;
++		bind_to_port[i].mode = i < PROT_SOCK ? S_IXUSR : S_IXUGO;
++		sprintf(buf, "%d", i);
++		accessfs_register(dir, buf, &bind_to_port[i]);
++	}
++
++	net_hooks_register(&ip_net_ops);
++	return 0;
++}
++
++static void __exit exit_ip(void)
++{
++	struct accessfs_direntry *dir = accessfs_make_dirpath("net/ip/bind");
++	int i;
++	net_hooks_unregister(&ip_net_ops);
++	for (i = 1; i < max_prot_sock; ++i) {
++		char	buf[sizeof("65536")];
++		sprintf(buf, "%d", i);
++		accessfs_unregister(dir, buf);
++	}
++
++	if (bind_to_port != NULL)
++		kfree(bind_to_port);
++}
++
++module_init(init_ip)
++module_exit(exit_ip)
++
++MODULE_AUTHOR("Olaf Dietsche");
++MODULE_DESCRIPTION("User based IP ports permission");
++MODULE_LICENSE("GPL v2");
++module_param(max_prot_sock, int, 0444);
++MODULE_PARM_DESC(max_prot_sock, "Number of protected ports");
++module_param(ignore_net_bind_service, bool, 0644);
++MODULE_PARM_DESC(ignore_net_bind_service, "Ignore CAP_NET_BIND_SERVICE capability");
+diff --git a/include/linux/accessfs_fs.h b/include/linux/accessfs_fs.h
+new file mode 100644
+index 0000000..ecd914e
+--- /dev/null
++++ b/include/linux/accessfs_fs.h
+@@ -0,0 +1,42 @@
++/* -*- mode: c -*- */
++#ifndef __accessfs_fs_h_included__
++#define __accessfs_fs_h_included__	1
++
++/* Copyright (c) 2001 Olaf Dietsche
++ *
++ * Access permission filesystem for Linux.
++ */
++
++#include <linux/in.h>
++#include <linux/in6.h>
++#include <linux/fs.h>
++#include <linux/init.h>
++#include <net/sock.h>
++
++struct access_attr {
++	uid_t	uid;
++	gid_t	gid;
++	mode_t	mode;
++};
++
++struct accessfs_entry {
++	char	*name;
++	struct list_head	hash;
++	struct list_head	siblings;
++	ino_t	ino;
++	struct access_attr	*attr;
++};
++
++struct accessfs_direntry {
++	struct accessfs_entry	node;
++	struct accessfs_direntry	*parent;
++	struct list_head	children;
++	struct access_attr	attr;
++};
++
++extern int accessfs_permitted(struct access_attr *p, int mask);
++extern struct accessfs_direntry *accessfs_make_dirpath(const char *name);
++extern int accessfs_register(struct accessfs_direntry *dir, const char *name, struct access_attr *attr);
++extern void accessfs_unregister(struct accessfs_direntry *dir, const char *name);
++
++#endif
+diff --git a/include/net/sock.h b/include/net/sock.h
+index 32e3937..5fa9348 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1860,4 +1860,47 @@ extern int sysctl_optmem_max;
+ extern __u32 sysctl_wmem_default;
+ extern __u32 sysctl_rmem_default;
+ 
++/* Networking hooks */
++extern int default_ip_prot_sock(struct socket *sock, struct sockaddr *uaddr,
++				int addr_len);
++extern int default_ip6_prot_sock(struct socket *sock, struct sockaddr *uaddr,
++				 int addr_len);
++#ifdef CONFIG_NET_HOOKS
++struct net_hook_operations {
++	int	(*ip_prot_sock)(struct socket *sock,
++				struct sockaddr *uaddr, int addr_len);
++	int	(*ip6_prot_sock)(struct socket *sock,
++				 struct sockaddr *uaddr, int addr_len);
++};
++
++extern struct net_hook_operations	*net_ops;
++
++extern void net_hooks_register(struct net_hook_operations *ops);
++extern void net_hooks_unregister(struct net_hook_operations *ops);
++
++static inline int ip_prot_sock(struct socket *sock, struct sockaddr *uaddr,
++			       int addr_len)
++{
++	return net_ops->ip_prot_sock(sock, uaddr, addr_len);
++}
++
++static inline int ip6_prot_sock(struct socket *sock, struct sockaddr *uaddr,
++				int addr_len)
++{
++	return net_ops->ip6_prot_sock(sock, uaddr, addr_len);
++}
++#else
++static inline int ip_prot_sock(struct socket *sock, struct sockaddr *uaddr,
++			       int addr_len)
++{
++	return default_ip_prot_sock(sock, uaddr, addr_len);
++}
++
++static inline int ip6_prot_sock(struct socket *sock, struct sockaddr *uaddr,
++				int addr_len)
++{
++	return default_ip6_prot_sock(sock, uaddr, addr_len);
++}
++#endif
++
+ #endif	/* _SOCK_H */
+diff --git a/net/Kconfig b/net/Kconfig
+index a073148..bb5fb42 100644
+--- a/net/Kconfig
++++ b/net/Kconfig
+@@ -75,6 +75,18 @@ config INET
+ if INET
+ source "net/ipv4/Kconfig"
+ source "net/ipv6/Kconfig"
++
++config NET_HOOKS
++	bool "IP: Networking hooks (Experimental)"
++	depends on INET && EXPERIMENTAL
++	default n
++	help
++	  This option enables other kernel parts or modules to hook into the
++	  networking area and provide fine grained control over the access to
++	  IP ports.
++
++	  If you're unsure, say N.
++
+ source "net/netlabel/Kconfig"
+ 
+ endif # if INET
+diff --git a/net/Makefile b/net/Makefile
+index acdde49..4e5dc79 100644
+--- a/net/Makefile
++++ b/net/Makefile
+@@ -61,6 +61,7 @@
+ obj-$(CONFIG_IEEE802154)	+= ieee802154/
+ obj-$(CONFIG_MAC802154)		+= mac802154/
+ 
++obj-$(CONFIG_NET)		+= hooks.o
+ ifeq ($(CONFIG_NET),y)
+ obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
+ endif
+diff --git a/net/hooks.c b/net/hooks.c
+new file mode 100644
+index 0000000..33100e6
+--- /dev/null
++++ b/net/hooks.c
+@@ -0,0 +1,55 @@
++/* Copyright (c) 2002 Olaf Dietsche
++ *
++ * Networking hooks. Currently for IPv4 and IPv6 only.
++ */
++
++#include <linux/module.h>
++#include <linux/in.h>
++#include <linux/in6.h>
++#include <net/sock.h>
++
++int default_ip_prot_sock(struct socket *sock, struct sockaddr *uaddr, int addr_len)
++{
++	struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
++	unsigned short snum = ntohs(addr->sin_port);
++	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
++		return -EACCES;
++
++	return 0;
++}
++
++int default_ip6_prot_sock(struct socket *sock, struct sockaddr *uaddr, int addr_len)
++{
++	struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
++	unsigned short snum = ntohs(addr->sin6_port);
++	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
++		return -EACCES;
++
++	return 0;
++}
++
++EXPORT_SYMBOL(default_ip_prot_sock);
++EXPORT_SYMBOL(default_ip6_prot_sock);
++
++#ifdef CONFIG_NET_HOOKS
++static struct net_hook_operations default_net_ops = {
++	.ip_prot_sock =	default_ip_prot_sock,
++	.ip6_prot_sock =	default_ip6_prot_sock,
++};
++
++struct net_hook_operations *net_ops = &default_net_ops;
++
++void net_hooks_register(struct net_hook_operations *ops)
++{
++	net_ops = ops;
++}
++
++void net_hooks_unregister(struct net_hook_operations *ops)
++{
++	net_ops = &default_net_ops;
++}
++
++EXPORT_SYMBOL(net_ops);
++EXPORT_SYMBOL(net_hooks_register);
++EXPORT_SYMBOL(net_hooks_unregister);
++#endif
+diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
+index 1b5096a..9460a3c 100644
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -495,7 +495,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ 
+ 	snum = ntohs(addr->sin_port);
+ 	err = -EACCES;
+-	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
++	if (ip_prot_sock(sock, uaddr, addr_len))
+ 		goto out;
+ 
+ 	/*      We keep a pair of addresses. rcv_saddr is the one
+diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
+index d27c797..154b1ec 100644
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -281,7 +281,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ 		return -EINVAL;
+ 
+ 	snum = ntohs(addr->sin6_port);
+-	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
++	if (ip6_prot_sock(sock, uaddr, addr_len))
+ 		return -EACCES;
+ 
+ 	lock_sock(sk);
+
diff --git a/3.2.34/ata-prefer-ata-drivers-over-ide-drivers-when-both-are-built.patch b/3.2.34/ata-prefer-ata-drivers-over-ide-drivers-when-both-are-built.patch
new file mode 100644
index 0000000..7af90e4
--- /dev/null
+++ b/3.2.34/ata-prefer-ata-drivers-over-ide-drivers-when-both-are-built.patch
@@ -0,0 +1,36 @@
+>From 9f04e51293b130474504216a477bb2a73cbf59e1 Mon Sep 17 00:00:00 2001
+From: Anssi Hannula <anssi.hannula@iki.fi>
+Date: Thu, 22 Mar 2012 22:29:11 +0200
+Subject: [PATCH] ata: prefer ata drivers over ide drivers when both are built
+
+Currently the old IDE drivers are preferred over ATA drivers when both
+are built, since ide/ is listed first in drivers/Makefile and therefore
+the IDE drivers end up before ATA drivers in modules.order which is used
+by depmod/modprobe for module ordering.
+
+Change it so that ATA drivers are preferred over IDE driver by moving
+the ide/ entry under ata/ in drivers/Makefile.
+
+Signed-off-by: Anssi Hannula <anssi.hannula@iki.fi>
+---
+ drivers/Makefile |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/Makefile b/drivers/Makefile
+index 932e8bf..e8df3d0 100644
+--- a/drivers/Makefile
++++ b/drivers/Makefile
+@@ -47,9 +47,9 @@ obj-$(CONFIG_PARPORT)		+= parport/
+ obj-y				+= base/ block/ misc/ mfd/ nfc/
+ obj-$(CONFIG_NUBUS)		+= nubus/
+ obj-y				+= macintosh/
+-obj-$(CONFIG_IDE)		+= ide/
+ obj-$(CONFIG_SCSI)		+= scsi/
+ obj-$(CONFIG_ATA)		+= ata/
++obj-$(CONFIG_IDE)		+= ide/
+ obj-$(CONFIG_TARGET_CORE)	+= target/
+ obj-$(CONFIG_MTD)		+= mtd/
+ obj-$(CONFIG_SPI)		+= spi/
+-- 
+1.7.9.3
+
diff --git a/3.2.34/aufs3-standalone-3.2.patch b/3.2.34/aufs3-standalone-3.2.patch
new file mode 100644
index 0000000..9a3c1db
--- /dev/null
+++ b/3.2.34/aufs3-standalone-3.2.patch
@@ -0,0 +1,30657 @@
+diff -uNr linux-3.2.0-gentoo-r1.orig//Documentation/ABI/testing/debugfs-aufs linux-3.2.0-gentoo-r1/Documentation/ABI/testing/debugfs-aufs
+--- linux-3.2.0-gentoo-r1.orig//Documentation/ABI/testing/debugfs-aufs	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/Documentation/ABI/testing/debugfs-aufs	2012-01-17 12:11:16.226894357 +0100
+@@ -0,0 +1,37 @@
++What:		/debug/aufs/si_<id>/
++Date:		March 2009
++Contact:	J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++		Under /debug/aufs, a directory named si_<id> is created
++		per aufs mount, where <id> is a unique id generated
++		internally.
++
++What:		/debug/aufs/si_<id>/xib
++Date:		March 2009
++Contact:	J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++		It shows the consumed blocks by xib (External Inode Number
++		Bitmap), its block size and file size.
++		When the aufs mount option 'noxino' is specified, it
++		will be empty. About XINO files, see the aufs manual.
++
++What:		/debug/aufs/si_<id>/xino0, xino1 ... xinoN
++Date:		March 2009
++Contact:	J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++		It shows the consumed blocks by xino (External Inode Number
++		Translation Table), its link count, block size and file
++		size.
++		When the aufs mount option 'noxino' is specified, it
++		will be empty. About XINO files, see the aufs manual.
++
++What:		/debug/aufs/si_<id>/xigen
++Date:		March 2009
++Contact:	J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++		It shows the consumed blocks by xigen (External Inode
++		Generation Table), its block size and file size.
++		If CONFIG_AUFS_EXPORT is disabled, this entry will not
++		be created.
++		When the aufs mount option 'noxino' is specified, it
++		will be empty. About XINO files, see the aufs manual.
+diff -uNr linux-3.2.0-gentoo-r1.orig//Documentation/ABI/testing/sysfs-aufs linux-3.2.0-gentoo-r1/Documentation/ABI/testing/sysfs-aufs
+--- linux-3.2.0-gentoo-r1.orig//Documentation/ABI/testing/sysfs-aufs	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/Documentation/ABI/testing/sysfs-aufs	2012-01-17 12:11:16.226894357 +0100
+@@ -0,0 +1,24 @@
++What:		/sys/fs/aufs/si_<id>/
++Date:		March 2009
++Contact:	J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++		Under /sys/fs/aufs, a directory named si_<id> is created
++		per aufs mount, where <id> is a unique id generated
++		internally.
++
++What:		/sys/fs/aufs/si_<id>/br0, br1 ... brN
++Date:		March 2009
++Contact:	J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++		It shows the abolute path of a member directory (which
++		is called branch) in aufs, and its permission.
++
++What:		/sys/fs/aufs/si_<id>/xi_path
++Date:		March 2009
++Contact:	J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++		It shows the abolute path of XINO (External Inode Number
++		Bitmap, Translation Table and Generation Table) file
++		even if it is the default path.
++		When the aufs mount option 'noxino' is specified, it
++		will be empty. About XINO files, see the aufs manual.
+diff -uNr linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/01intro.txt linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/01intro.txt
+--- linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/01intro.txt	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/01intro.txt	2012-01-17 12:11:16.263931727 +0100
+@@ -0,0 +1,162 @@
++
++# Copyright (C) 2005-2011 Junjiro R. Okajima
++# 
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++# 
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++# 
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++
++Introduction
++----------------------------------------
++
++aufs [ei ju: ef es] | [a u f s]
++1. abbrev. for "advanced multi-layered unification filesystem".
++2. abbrev. for "another unionfs".
++3. abbrev. for "auf das" in German which means "on the" in English.
++   Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
++   But "Filesystem aufs Filesystem" is hard to understand.
++
++AUFS is a filesystem with features:
++- multi layered stackable unification filesystem, the member directory
++  is called as a branch.
++- branch permission and attribute, 'readonly', 'real-readonly',
++  'readwrite', 'whiteout-able', 'link-able whiteout' and their
++  combination.
++- internal "file copy-on-write".
++- logical deletion, whiteout.
++- dynamic branch manipulation, adding, deleting and changing permission.
++- allow bypassing aufs, user's direct branch access.
++- external inode number translation table and bitmap which maintains the
++  persistent aufs inode number.
++- seekable directory, including NFS readdir.
++- file mapping, mmap and sharing pages.
++- pseudo-link, hardlink over branches.
++- loopback mounted filesystem as a branch.
++- several policies to select one among multiple writable branches.
++- revert a single systemcall when an error occurs in aufs.
++- and more...
++
++
++Multi Layered Stackable Unification Filesystem
++----------------------------------------------------------------------
++Most people already knows what it is.
++It is a filesystem which unifies several directories and provides a
++merged single directory. When users access a file, the access will be
++passed/re-directed/converted (sorry, I am not sure which English word is
++correct) to the real file on the member filesystem. The member
++filesystem is called 'lower filesystem' or 'branch' and has a mode
++'readonly' and 'readwrite.' And the deletion for a file on the lower
++readonly branch is handled by creating 'whiteout' on the upper writable
++branch.
++
++On LKML, there have been discussions about UnionMount (Jan Blunck,
++Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
++different approaches to implement the merged-view.
++The former tries putting it into VFS, and the latter implements as a
++separate filesystem.
++(If I misunderstand about these implementations, please let me know and
++I shall correct it. Because it is a long time ago when I read their
++source files last time).
++
++UnionMount's approach will be able to small, but may be hard to share
++branches between several UnionMount since the whiteout in it is
++implemented in the inode on branch filesystem and always
++shared. According to Bharata's post, readdir does not seems to be
++finished yet.
++There are several missing features known in this implementations such as
++- for users, the inode number may change silently. eg. copy-up.
++- link(2) may break by copy-up.
++- read(2) may get an obsoleted filedata (fstat(2) too).
++- fcntl(F_SETLK) may be broken by copy-up.
++- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
++  open(O_RDWR).
++
++Unionfs has a longer history. When I started implementing a stacking filesystem
++(Aug 2005), it already existed. It has virtual super_block, inode,
++dentry and file objects and they have an array pointing lower same kind
++objects. After contributing many patches for Unionfs, I re-started my
++project AUFS (Jun 2006).
++
++In AUFS, the structure of filesystem resembles to Unionfs, but I
++implemented my own ideas, approaches and enhancements and it became
++totally different one.
++
++Comparing DM snapshot and fs based implementation
++- the number of bytes to be copied between devices is much smaller.
++- the type of filesystem must be one and only.
++- the fs must be writable, no readonly fs, even for the lower original
++  device. so the compression fs will not be usable. but if we use
++  loopback mount, we may address this issue.
++  for instance,
++	mount /cdrom/squashfs.img /sq
++	losetup /sq/ext2.img
++	losetup /somewhere/cow
++	dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
++- it will be difficult (or needs more operations) to extract the
++  difference between the original device and COW.
++- DM snapshot-merge may help a lot when users try merging. in the
++  fs-layer union, users will use rsync(1).
++
++
++Several characters/aspects of aufs
++----------------------------------------------------------------------
++
++Aufs has several characters or aspects.
++1. a filesystem, callee of VFS helper
++2. sub-VFS, caller of VFS helper for branches
++3. a virtual filesystem which maintains persistent inode number
++4. reader/writer of files on branches such like an application
++
++1. Callee of VFS Helper
++As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
++unlink(2) from an application reaches sys_unlink() kernel function and
++then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
++calls filesystem specific unlink operation. Actually aufs implements the
++unlink operation but it behaves like a redirector.
++
++2. Caller of VFS Helper for Branches
++aufs_unlink() passes the unlink request to the branch filesystem as if
++it were called from VFS. So the called unlink operation of the branch
++filesystem acts as usual. As a caller of VFS helper, aufs should handle
++every necessary pre/post operation for the branch filesystem.
++- acquire the lock for the parent dir on a branch
++- lookup in a branch
++- revalidate dentry on a branch
++- mnt_want_write() for a branch
++- vfs_unlink() for a branch
++- mnt_drop_write() for a branch
++- release the lock on a branch
++
++3. Persistent Inode Number
++One of the most important issue for a filesystem is to maintain inode
++numbers. This is particularly important to support exporting a
++filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
++backend block device for its own. But some storage is necessary to
++maintain inode number. It may be a large space and may not suit to keep
++in memory. Aufs rents some space from its first writable branch
++filesystem (by default) and creates file(s) on it. These files are
++created by aufs internally and removed soon (currently) keeping opened.
++Note: Because these files are removed, they are totally gone after
++      unmounting aufs. It means the inode numbers are not persistent
++      across unmount or reboot. I have a plan to make them really
++      persistent which will be important for aufs on NFS server.
++
++4. Read/Write Files Internally (copy-on-write)
++Because a branch can be readonly, when you write a file on it, aufs will
++"copy-up" it to the upper writable branch internally. And then write the
++originally requested thing to the file. Generally kernel doesn't
++open/read/write file actively. In aufs, even a single write may cause a
++internal "file copy". This behaviour is very similar to cp(1) command.
++
++Some people may think it is better to pass such work to user space
++helper, instead of doing in kernel space. Actually I am still thinking
++about it. But currently I have implemented it in kernel space.
+diff -uNr linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/02struct.txt linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/02struct.txt
+--- linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/02struct.txt	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/02struct.txt	2012-01-17 12:11:16.280135577 +0100
+@@ -0,0 +1,226 @@
++
++# Copyright (C) 2005-2011 Junjiro R. Okajima
++# 
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++# 
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++# 
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++
++Basic Aufs Internal Structure
++
++Superblock/Inode/Dentry/File Objects
++----------------------------------------------------------------------
++As like an ordinary filesystem, aufs has its own
++superblock/inode/dentry/file objects. All these objects have a
++dynamically allocated array and store the same kind of pointers to the
++lower filesystem, branch.
++For example, when you build a union with one readwrite branch and one
++readonly, mounted /au, /rw and /ro respectively.
++- /au = /rw + /ro
++- /ro/fileA exists but /rw/fileA
++
++Aufs lookup operation finds /ro/fileA and gets dentry for that. These
++pointers are stored in a aufs dentry. The array in aufs dentry will be,
++- [0] = NULL
++- [1] = /ro/fileA
++
++This style of an array is essentially same to the aufs
++superblock/inode/dentry/file objects.
++
++Because aufs supports manipulating branches, ie. add/delete/change
++dynamically, these objects has its own generation. When branches are
++changed, the generation in aufs superblock is incremented. And a
++generation in other object are compared when it is accessed.
++When a generation in other objects are obsoleted, aufs refreshes the
++internal array.
++
++
++Superblock
++----------------------------------------------------------------------
++Additionally aufs superblock has some data for policies to select one
++among multiple writable branches, XIB files, pseudo-links and kobject.
++See below in detail.
++About the policies which supports copy-down a directory, see policy.txt
++too.
++
++
++Branch and XINO(External Inode Number Translation Table)
++----------------------------------------------------------------------
++Every branch has its own xino (external inode number translation table)
++file. The xino file is created and unlinked by aufs internally. When two
++members of a union exist on the same filesystem, they share the single
++xino file.
++The struct of a xino file is simple, just a sequence of aufs inode
++numbers which is indexed by the lower inode number.
++In the above sample, assume the inode number of /ro/fileA is i111 and
++aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
++4(8) bytes at 111 * 4(8) bytes offset in the xino file.
++
++When the inode numbers are not contiguous, the xino file will be sparse
++which has a hole in it and doesn't consume as much disk space as it
++might appear. If your branch filesystem consumes disk space for such
++holes, then you should specify 'xino=' option at mounting aufs.
++
++Also a writable branch has three kinds of "whiteout bases". All these
++are existed when the branch is joined to aufs and the names are
++whiteout-ed doubly, so that users will never see their names in aufs
++hierarchy.
++1. a regular file which will be linked to all whiteouts.
++2. a directory to store a pseudo-link.
++3. a directory to store an "orphan-ed" file temporary.
++
++1. Whiteout Base
++   When you remove a file on a readonly branch, aufs handles it as a
++   logical deletion and creates a whiteout on the upper writable branch
++   as a hardlink of this file in order not to consume inode on the
++   writable branch.
++2. Pseudo-link Dir
++   See below, Pseudo-link.
++3. Step-Parent Dir
++   When "fileC" exists on the lower readonly branch only and it is
++   opened and removed with its parent dir, and then user writes
++   something into it, then aufs copies-up fileC to this
++   directory. Because there is no other dir to store fileC. After
++   creating a file under this dir, the file is unlinked.
++
++Because aufs supports manipulating branches, ie. add/delete/change
++dynamically, a branch has its own id. When the branch order changes, aufs
++finds the new index by searching the branch id.
++
++
++Pseudo-link
++----------------------------------------------------------------------
++Assume "fileA" exists on the lower readonly branch only and it is
++hardlinked to "fileB" on the branch. When you write something to fileA,
++aufs copies-up it to the upper writable branch. Additionally aufs
++creates a hardlink under the Pseudo-link Directory of the writable
++branch. The inode of a pseudo-link is kept in aufs super_block as a
++simple list. If fileB is read after unlinking fileA, aufs returns
++filedata from the pseudo-link instead of the lower readonly
++branch. Because the pseudo-link is based upon the inode, to keep the
++inode number by xino (see above) is important.
++
++All the hardlinks under the Pseudo-link Directory of the writable branch
++should be restored in a proper location later. Aufs provides a utility
++to do this. The userspace helpers executed at remounting and unmounting
++aufs by default.
++During this utility is running, it puts aufs into the pseudo-link
++maintenance mode. In this mode, only the process which began the
++maintenance mode (and its child processes) is allowed to operate in
++aufs. Some other processes which are not related to the pseudo-link will
++be allowed to run too, but the rest have to return an error or wait
++until the maintenance mode ends. If a process already acquires an inode
++mutex (in VFS), it has to return an error.
++
++
++XIB(external inode number bitmap)
++----------------------------------------------------------------------
++Addition to the xino file per a branch, aufs has an external inode number
++bitmap in a superblock object. It is also a file such like a xino file.
++It is a simple bitmap to mark whether the aufs inode number is in-use or
++not.
++To reduce the file I/O, aufs prepares a single memory page to cache xib.
++
++Aufs implements a feature to truncate/refresh both of xino and xib to
++reduce the number of consumed disk blocks for these files.
++
++
++Virtual or Vertical Dir, and Readdir in Userspace
++----------------------------------------------------------------------
++In order to support multiple layers (branches), aufs readdir operation
++constructs a virtual dir block on memory. For readdir, aufs calls
++vfs_readdir() internally for each dir on branches, merges their entries
++with eliminating the whiteout-ed ones, and sets it to file (dir)
++object. So the file object has its entry list until it is closed. The
++entry list will be updated when the file position is zero and becomes
++old. This decision is made in aufs automatically.
++
++The dynamically allocated memory block for the name of entries has a
++unit of 512 bytes (by default) and stores the names contiguously (no
++padding). Another block for each entry is handled by kmem_cache too.
++During building dir blocks, aufs creates hash list and judging whether
++the entry is whiteouted by its upper branch or already listed.
++The merged result is cached in the corresponding inode object and
++maintained by a customizable life-time option.
++
++Some people may call it can be a security hole or invite DoS attack
++since the opened and once readdir-ed dir (file object) holds its entry
++list and becomes a pressure for system memory. But I'd say it is similar
++to files under /proc or /sys. The virtual files in them also holds a
++memory page (generally) while they are opened. When an idea to reduce
++memory for them is introduced, it will be applied to aufs too.
++For those who really hate this situation, I've developed readdir(3)
++library which operates this merging in userspace. You just need to set
++LD_PRELOAD environment variable, and aufs will not consume no memory in
++kernel space for readdir(3).
++
++
++Workqueue
++----------------------------------------------------------------------
++Aufs sometimes requires privilege access to a branch. For instance,
++in copy-up/down operation. When a user process is going to make changes
++to a file which exists in the lower readonly branch only, and the mode
++of one of ancestor directories may not be writable by a user
++process. Here aufs copy-up the file with its ancestors and they may
++require privilege to set its owner/group/mode/etc.
++This is a typical case of a application character of aufs (see
++Introduction).
++
++Aufs uses workqueue synchronously for this case. It creates its own
++workqueue. The workqueue is a kernel thread and has privilege. Aufs
++passes the request to call mkdir or write (for example), and wait for
++its completion. This approach solves a problem of a signal handler
++simply.
++If aufs didn't adopt the workqueue and changed the privilege of the
++process, and if the mkdir/write call arises SIGXFSZ or other signal,
++then the user process might gain a privilege or the generated core file
++was owned by a superuser.
++
++Also aufs uses the system global workqueue ("events" kernel thread) too
++for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
++whiteout base and etc. This is unrelated to a privilege.
++Most of aufs operation tries acquiring a rw_semaphore for aufs
++superblock at the beginning, at the same time waits for the completion
++of all queued asynchronous tasks.
++
++
++Whiteout
++----------------------------------------------------------------------
++The whiteout in aufs is very similar to Unionfs's. That is represented
++by its filename. UnionMount takes an approach of a file mode, but I am
++afraid several utilities (find(1) or something) will have to support it.
++
++Basically the whiteout represents "logical deletion" which stops aufs to
++lookup further, but also it represents "dir is opaque" which also stop
++lookup.
++
++In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
++In order to make several functions in a single systemcall to be
++revertible, aufs adopts an approach to rename a directory to a temporary
++unique whiteouted name.
++For example, in rename(2) dir where the target dir already existed, aufs
++renames the target dir to a temporary unique whiteouted name before the
++actual rename on a branch and then handles other actions (make it opaque,
++update the attributes, etc). If an error happens in these actions, aufs
++simply renames the whiteouted name back and returns an error. If all are
++succeeded, aufs registers a function to remove the whiteouted unique
++temporary name completely and asynchronously to the system global
++workqueue.
++
++
++Copy-up
++----------------------------------------------------------------------
++It is a well-known feature or concept.
++When user modifies a file on a readonly branch, aufs operate "copy-up"
++internally and makes change to the new file on the upper writable branch.
++When the trigger systemcall does not update the timestamps of the parent
++dir, aufs reverts it after copy-up.
+diff -uNr linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/03lookup.txt linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/03lookup.txt
+--- linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/03lookup.txt	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/03lookup.txt	2012-01-17 12:11:16.294024590 +0100
+@@ -0,0 +1,106 @@
++
++# Copyright (C) 2005-2011 Junjiro R. Okajima
++# 
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++# 
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++# 
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++
++Lookup in a Branch
++----------------------------------------------------------------------
++Since aufs has a character of sub-VFS (see Introduction), it operates
++lookup for branches as VFS does. It may be a heavy work. Generally
++speaking struct nameidata is a bigger structure and includes many
++information. But almost all lookup operation in aufs is the simplest
++case, ie. lookup only an entry directly connected to its parent. Digging
++down the directory hierarchy is unnecessary.
++
++VFS has a function lookup_one_len() for that use, but it is not usable
++for a branch filesystem which requires struct nameidata. So aufs
++implements a simple lookup wrapper function. When a branch filesystem
++allows NULL as nameidata, it calls lookup_one_len(). Otherwise it builds
++a simplest nameidata and calls lookup_hash().
++Here aufs applies "a principle in NFSD", ie. if the filesystem supports
++NFS-export, then it has to support NULL as a nameidata parameter for
++->create(), ->lookup() and ->d_revalidate(). So the lookup wrapper in
++aufs tests if ->s_export_op in the branch is NULL or not.
++
++When a branch is a remote filesystem, aufs basically trusts its
++->d_revalidate(), also aufs forces the hardest revalidate tests for
++them.
++For d_revalidate, aufs implements three levels of revalidate tests. See
++"Revalidate Dentry and UDBA" in detail.
++
++
++Loopback Mount
++----------------------------------------------------------------------
++Basically aufs supports any type of filesystem and block device for a
++branch (actually there are some exceptions). But it is prohibited to add
++a loopback mounted one whose backend file exists in a filesystem which is
++already added to aufs. The reason is to protect aufs from a recursive
++lookup. If it was allowed, the aufs lookup operation might re-enter a
++lookup for the loopback mounted branch in the same context, and will
++cause a deadlock.
++
++
++Revalidate Dentry and UDBA (User's Direct Branch Access)
++----------------------------------------------------------------------
++Generally VFS helpers re-validate a dentry as a part of lookup.
++0. digging down the directory hierarchy.
++1. lock the parent dir by its i_mutex.
++2. lookup the final (child) entry.
++3. revalidate it.
++4. call the actual operation (create, unlink, etc.)
++5. unlock the parent dir
++
++If the filesystem implements its ->d_revalidate() (step 3), then it is
++called. Actually aufs implements it and checks the dentry on a branch is
++still valid.
++But it is not enough. Because aufs has to release the lock for the
++parent dir on a branch at the end of ->lookup() (step 2) and
++->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
++held by VFS.
++If the file on a branch is changed directly, eg. bypassing aufs, after
++aufs released the lock, then the subsequent operation may cause
++something unpleasant result.
++
++This situation is a result of VFS architecture, ->lookup() and
++->d_revalidate() is separated. But I never say it is wrong. It is a good
++design from VFS's point of view. It is just not suitable for sub-VFS
++character in aufs.
++
++Aufs supports such case by three level of revalidation which is
++selectable by user.
++1. Simple Revalidate
++   Addition to the native flow in VFS's, confirm the child-parent
++   relationship on the branch just after locking the parent dir on the
++   branch in the "actual operation" (step 4). When this validation
++   fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
++   checks the validation of the dentry on branches.
++2. Monitor Changes Internally by Inotify/Fsnotify
++   Addition to above, in the "actual operation" (step 4) aufs re-lookup
++   the dentry on the branch, and returns EBUSY if it finds different
++   dentry.
++   Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
++   during it is in cache. When the event is notified, aufs registers a
++   function to kernel 'events' thread by schedule_work(). And the
++   function sets some special status to the cached aufs dentry and inode
++   private data. If they are not cached, then aufs has nothing to
++   do. When the same file is accessed through aufs (step 0-3) later,
++   aufs will detect the status and refresh all necessary data.
++   In this mode, aufs has to ignore the event which is fired by aufs
++   itself.
++3. No Extra Validation
++   This is the simplest test and doesn't add any additional revalidation
++   test, and skip therevalidatin in step 4. It is useful and improves
++   aufs performance when system surely hide the aufs branches from user,
++   by over-mounting something (or another method).
+diff -uNr linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/04branch.txt linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/04branch.txt
+--- linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/04branch.txt	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/04branch.txt	2012-01-17 12:11:16.310228440 +0100
+@@ -0,0 +1,76 @@
++
++# Copyright (C) 2005-2011 Junjiro R. Okajima
++# 
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++# 
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++# 
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++
++Branch Manipulation
++
++Since aufs supports dynamic branch manipulation, ie. add/remove a branch
++and changing its permission/attribute, there are a lot of works to do.
++
++
++Add a Branch
++----------------------------------------------------------------------
++o Confirm the adding dir exists outside of aufs, including loopback
++  mount.
++- and other various attributes...
++o Initialize the xino file and whiteout bases if necessary.
++  See struct.txt.
++
++o Check the owner/group/mode of the directory
++  When the owner/group/mode of the adding directory differs from the
++  existing branch, aufs issues a warning because it may impose a
++  security risk.
++  For example, when a upper writable branch has a world writable empty
++  top directory, a malicious user can create any files on the writable
++  branch directly, like copy-up and modify manually. If something like
++  /etc/{passwd,shadow} exists on the lower readonly branch but the upper
++  writable branch, and the writable branch is world-writable, then a
++  malicious guy may create /etc/passwd on the writable branch directly
++  and the infected file will be valid in aufs.
++  I am afraid it can be a security issue, but nothing to do except
++  producing a warning.
++
++
++Delete a Branch
++----------------------------------------------------------------------
++o Confirm the deleting branch is not busy
++  To be general, there is one merit to adopt "remount" interface to
++  manipulate branches. It is to discard caches. At deleting a branch,
++  aufs checks the still cached (and connected) dentries and inodes. If
++  there are any, then they are all in-use. An inode without its
++  corresponding dentry can be alive alone (for example, inotify/fsnotify case).
++
++  For the cached one, aufs checks whether the same named entry exists on
++  other branches.
++  If the cached one is a directory, because aufs provides a merged view
++  to users, as long as one dir is left on any branch aufs can show the
++  dir to users. In this case, the branch can be removed from aufs.
++  Otherwise aufs rejects deleting the branch.
++
++  If any file on the deleting branch is opened by aufs, then aufs
++  rejects deleting.
++
++
++Modify the Permission of a Branch
++----------------------------------------------------------------------
++o Re-initialize or remove the xino file and whiteout bases if necessary.
++  See struct.txt.
++
++o rw --> ro: Confirm the modifying branch is not busy
++  Aufs rejects the request if any of these conditions are true.
++  - a file on the branch is mmap-ed.
++  - a regular file on the branch is opened for write and there is no
++    same named entry on the upper branch.
+diff -uNr linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/05wbr_policy.txt linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/05wbr_policy.txt
+--- linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/05wbr_policy.txt	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/05wbr_policy.txt	2012-01-17 12:11:16.333376797 +0100
+@@ -0,0 +1,65 @@
++
++# Copyright (C) 2005-2011 Junjiro R. Okajima
++# 
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++# 
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++# 
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++
++Policies to Select One among Multiple Writable Branches
++----------------------------------------------------------------------
++When the number of writable branch is more than one, aufs has to decide
++the target branch for file creation or copy-up. By default, the highest
++writable branch which has the parent (or ancestor) dir of the target
++file is chosen (top-down-parent policy).
++By user's request, aufs implements some other policies to select the
++writable branch, for file creation two policies, round-robin and
++most-free-space policies. For copy-up three policies, top-down-parent,
++bottom-up-parent and bottom-up policies.
++
++As expected, the round-robin policy selects the branch in circular. When
++you have two writable branches and creates 10 new files, 5 files will be
++created for each branch. mkdir(2) systemcall is an exception. When you
++create 10 new directories, all will be created on the same branch.
++And the most-free-space policy selects the one which has most free
++space among the writable branches. The amount of free space will be
++checked by aufs internally, and users can specify its time interval.
++
++The policies for copy-up is more simple,
++top-down-parent is equivalent to the same named on in create policy,
++bottom-up-parent selects the writable branch where the parent dir
++exists and the nearest upper one from the copyup-source,
++bottom-up selects the nearest upper writable branch from the
++copyup-source, regardless the existence of the parent dir.
++
++There are some rules or exceptions to apply these policies.
++- If there is a readonly branch above the policy-selected branch and
++  the parent dir is marked as opaque (a variation of whiteout), or the
++  target (creating) file is whiteout-ed on the upper readonly branch,
++  then the result of the policy is ignored and the target file will be
++  created on the nearest upper writable branch than the readonly branch.
++- If there is a writable branch above the policy-selected branch and
++  the parent dir is marked as opaque or the target file is whiteouted
++  on the branch, then the result of the policy is ignored and the target
++  file will be created on the highest one among the upper writable
++  branches who has diropq or whiteout. In case of whiteout, aufs removes
++  it as usual.
++- link(2) and rename(2) systemcalls are exceptions in every policy.
++  They try selecting the branch where the source exists as possible
++  since copyup a large file will take long time. If it can't be,
++  ie. the branch where the source exists is readonly, then they will
++  follow the copyup policy.
++- There is an exception for rename(2) when the target exists.
++  If the rename target exists, aufs compares the index of the branches
++  where the source and the target exists and selects the higher
++  one. If the selected branch is readonly, then aufs follows the
++  copyup policy.
+diff -uNr linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/06mmap.txt linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/06mmap.txt
+--- linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/06mmap.txt	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/06mmap.txt	2012-01-17 12:11:16.333376797 +0100
+@@ -0,0 +1,47 @@
++
++# Copyright (C) 2005-2011 Junjiro R. Okajima
++# 
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++# 
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++# 
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++
++mmap(2) -- File Memory Mapping
++----------------------------------------------------------------------
++In aufs, the file-mapped pages are handled by a branch fs directly, no
++interaction with aufs. It means aufs_mmap() calls the branch fs's
++->mmap().
++This approach is simple and good, but there is one problem.
++Under /proc, several entries show the mmap-ped files by its path (with
++device and inode number), and the printed path will be the path on the
++branch fs's instead of virtual aufs's.
++This is not a problem in most cases, but some utilities lsof(1) (and its
++user) may expect the path on aufs.
++
++To address this issue, aufs adds a new member called vm_prfile in struct
++vm_area_struct (and struct vm_region). The original vm_file points to
++the file on the branch fs in order to handle everything correctly as
++usual. The new vm_prfile points to a virtual file in aufs, and the
++show-functions in procfs refers to vm_prfile if it is set.
++Also we need to maintain several other places where touching vm_file
++such like
++- fork()/clone() copies vma and the reference count of vm_file is
++  incremented.
++- merging vma maintains the ref count too.
++
++This is not a good approach. It just faking the printed path. But it
++leaves all behaviour around f_mapping unchanged. This is surely an
++advantage.
++Actually aufs had adopted another complicated approach which calls
++generic_file_mmap() and handles struct vm_operations_struct. In this
++approach, aufs met a hard problem and I could not solve it without
++switching the approach.
+diff -uNr linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/07export.txt linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/07export.txt
+--- linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/07export.txt	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/07export.txt	2012-01-17 12:11:16.338006469 +0100
+@@ -0,0 +1,59 @@
++
++# Copyright (C) 2005-2011 Junjiro R. Okajima
++# 
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++# 
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++# 
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++
++Export Aufs via NFS
++----------------------------------------------------------------------
++Here is an approach.
++- like xino/xib, add a new file 'xigen' which stores aufs inode
++  generation.
++- iget_locked(): initialize aufs inode generation for a new inode, and
++  store it in xigen file.
++- destroy_inode(): increment aufs inode generation and store it in xigen
++  file. it is necessary even if it is not unlinked, because any data of
++  inode may be changed by UDBA.
++- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
++  build file handle by
++  + branch id (4 bytes)
++  + superblock generation (4 bytes)
++  + inode number (4 or 8 bytes)
++  + parent dir inode number (4 or 8 bytes)
++  + inode generation (4 bytes))
++  + return value of exportfs_encode_fh() for the parent on a branch (4
++    bytes)
++  + file handle for a branch (by exportfs_encode_fh())
++- fh_to_dentry():
++  + find the index of a branch from its id in handle, and check it is
++    still exist in aufs.
++  + 1st level: get the inode number from handle and search it in cache.
++  + 2nd level: if not found, get the parent inode number from handle and
++    search it in cache. and then open the parent dir, find the matching
++    inode number by vfs_readdir() and get its name, and call
++    lookup_one_len() for the target dentry.
++  + 3rd level: if the parent dir is not cached, call
++    exportfs_decode_fh() for a branch and get the parent on a branch,
++    build a pathname of it, convert it a pathname in aufs, call
++    path_lookup(). now aufs gets a parent dir dentry, then handle it as
++    the 2nd level.
++  + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
++    for every branch, but not itself. to get this, (currently) aufs
++    searches in current->nsproxy->mnt_ns list. it may not be a good
++    idea, but I didn't get other approach.
++  + test the generation of the gotten inode.
++- every inode operation: they may get EBUSY due to UDBA. in this case,
++  convert it into ESTALE for NFSD.
++- readdir(): call lockdep_on/off() because filldir in NFSD calls
++  lookup_one_len(), vfs_getattr(), encode_fh() and others.
+diff -uNr linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/08shwh.txt linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/08shwh.txt
+--- linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/08shwh.txt	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/08shwh.txt	2012-01-17 12:11:16.338006469 +0100
+@@ -0,0 +1,53 @@
++
++# Copyright (C) 2005-2011 Junjiro R. Okajima
++# 
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++# 
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++# 
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++
++Show Whiteout Mode (shwh)
++----------------------------------------------------------------------
++Generally aufs hides the name of whiteouts. But in some cases, to show
++them is very useful for users. For instance, creating a new middle layer
++(branch) by merging existing layers.
++
++(borrowing aufs1 HOW-TO from a user, Michael Towers)
++When you have three branches,
++- Bottom: 'system', squashfs (underlying base system), read-only
++- Middle: 'mods', squashfs, read-only
++- Top: 'overlay', ram (tmpfs), read-write
++
++The top layer is loaded at boot time and saved at shutdown, to preserve
++the changes made to the system during the session.
++When larger changes have been made, or smaller changes have accumulated,
++the size of the saved top layer data grows. At this point, it would be
++nice to be able to merge the two overlay branches ('mods' and 'overlay')
++and rewrite the 'mods' squashfs, clearing the top layer and thus
++restoring save and load speed.
++
++This merging is simplified by the use of another aufs mount, of just the
++two overlay branches using the 'shwh' option.
++# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
++	aufs /livesys/merge_union
++
++A merged view of these two branches is then available at
++/livesys/merge_union, and the new feature is that the whiteouts are
++visible!
++Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
++writing to all branches. Also the default mode for all branches is 'ro'.
++It is now possible to save the combined contents of the two overlay
++branches to a new squashfs, e.g.:
++# mksquashfs /livesys/merge_union /path/to/newmods.squash
++
++This new squashfs archive can be stored on the boot device and the
++initramfs will use it to replace the old one at the next boot.
+diff -uNr linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/10dynop.txt linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/10dynop.txt
+--- linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/10dynop.txt	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/10dynop.txt	2012-01-17 12:11:16.340321305 +0100
+@@ -0,0 +1,47 @@
++
++# Copyright (C) 2010-2011 Junjiro R. Okajima
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++#
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++
++Dynamically customizable FS operations
++----------------------------------------------------------------------
++Generally FS operations (struct inode_operations, struct
++address_space_operations, struct file_operations, etc.) are defined as
++"static const", but it never means that FS have only one set of
++operation. Some FS have multiple sets of them. For instance, ext2 has
++three sets, one for XIP, for NOBH, and for normal.
++Since aufs overrides and redirects these operations, sometimes aufs has
++to change its behaviour according to the branch FS type. More imporantly
++VFS acts differently if a function (member in the struct) is set or
++not. It means aufs should have several sets of operations and select one
++among them according to the branch FS definition.
++
++In order to solve this problem and not to affect the behavour of VFS,
++aufs defines these operations dynamically. For instance, aufs defines
++aio_read function for struct file_operations, but it may not be set to
++the file_operations. When the branch FS doesn't have it, aufs doesn't
++set it to its file_operations while the function definition itself is
++still alive. So the behaviour of io_submit(2) will not change, and it
++will return an error when aio_read is not defined.
++
++The lifetime of these dynamically generated operation object is
++maintained by aufs branch object. When the branch is removed from aufs,
++the reference counter of the object is decremented. When it reaches
++zero, the dynamically generated operation object will be freed.
++
++This approach is designed to support AIO (io_submit), Direcit I/O and
++XIP mainly.
++Currently this approach is applied to file_operations and
++vm_operations_struct for regular files only.
+diff -uNr linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/99plan.txt linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/99plan.txt
+--- linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/design/99plan.txt	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/design/99plan.txt	2012-01-17 12:11:16.356525154 +0100
+@@ -0,0 +1,96 @@
++
++# Copyright (C) 2005-2011 Junjiro R. Okajima
++# 
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++# 
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++# 
++# You should have received a copy of the GNU General Public License
++# along with this program; if not, write to the Free Software
++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++
++Plan
++
++Restoring some features which was implemented in aufs1.
++They were dropped in aufs2 in order to make source files simpler and
++easier to be reviewed.
++
++
++Test Only the Highest One for the Directory Permission (dirperm1 option)
++----------------------------------------------------------------------
++Let's try case study.
++- aufs has two branches, upper readwrite and lower readonly.
++  /au = /rw + /ro
++- "dirA" exists under /ro, but /rw. and its mode is 0700.
++- user invoked "chmod a+rx /au/dirA"
++- then "dirA" becomes world readable?
++
++In this case, /ro/dirA is still 0700 since it exists in readonly branch,
++or it may be a natively readonly filesystem. If aufs respects the lower
++branch, it should not respond readdir request from other users. But user
++allowed it by chmod. Should really aufs rejects showing the entries
++under /ro/dirA?
++
++To be honest, I don't have a best solution for this case. So I
++implemented 'dirperm1' and 'nodirperm1' option in aufs1, and leave it to
++users.
++When dirperm1 is specified, aufs checks only the highest one for the
++directory permission, and shows the entries. Otherwise, as usual, checks
++every dir existing on all branches and rejects the request.
++
++As a side effect, dirperm1 option improves the performance of aufs
++because the number of permission check is reduced.
++
++
++Being Another Aufs's Readonly Branch (robr)
++----------------------------------------------------------------------
++Aufs1 allows aufs to be another aufs's readonly branch.
++This feature was developed by a user's request. But it may not be used
++currecnly.
++
++
++Copy-up on Open (coo=)
++----------------------------------------------------------------------
++By default the internal copy-up is executed when it is really necessary.
++It is not done when a file is opened for writing, but when write(2) is
++done. Users who have many (over 100) branches want to know and analyse
++when and what file is copied-up. To insert a new upper branch which
++contains such files only may improve the performance of aufs.
++
++Aufs1 implemented "coo=none | leaf | all" option.
++
++
++Refresh the Opened File (refrof)
++----------------------------------------------------------------------
++This option is implemented in aufs1 but incomplete.
++
++When user reads from a file, he expects to get its latest filedata
++generally. If the file is removed and a new same named file is created,
++the content he gets is unchanged, ie. the unlinked filedata.
++
++Let's try case study again.
++- aufs has two branches.
++  /au = /rw + /ro
++- "fileA" exists under /ro, but /rw.
++- user opened "/au/fileA".
++- he or someone else inserts a branch (/new) between /rw and /ro.
++  /au = /rw + /new + /ro
++- the new branch has "fileA".
++- user reads from the opened "fileA"
++- which filedata should aufs return, from /ro or /new?
++
++Some people says it has to be "from /ro" and it is a semantics of Unix.
++The others say it should be "from /new" because the file is not removed
++and it is equivalent to the case of someone else modifies the file.
++
++Here again I don't have a best and final answer. I got an idea to
++implement 'refrof' and 'norefrof' option. When 'refrof' (REFResh the
++Opened File) is specified (by default), aufs returns the filedata from
++/new.
++Otherwise from /new.
+diff -uNr linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/README linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/README
+--- linux-3.2.0-gentoo-r1.orig//Documentation/filesystems/aufs/README	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/Documentation/filesystems/aufs/README	2012-01-17 12:11:16.229209192 +0100
+@@ -0,0 +1,328 @@
++
++Aufs3 -- advanced multi layered unification filesystem version 3.x
++http://aufs.sf.net
++Junjiro R. Okajima
++
++
++0. Introduction
++----------------------------------------
++In the early days, aufs was entirely re-designed and re-implemented
++Unionfs Version 1.x series. After many original ideas, approaches,
++improvements and implementations, it becomes totally different from
++Unionfs while keeping the basic features.
++Recently, Unionfs Version 2.x series begin taking some of the same
++approaches to aufs1's.
++Unionfs is being developed by Professor Erez Zadok at Stony Brook
++University and his team.
++
++Aufs3 supports linux-3.0 and later.
++If you want older kernel version support, try aufs2-2.6.git or
++aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
++
++Note: it becomes clear that "Aufs was rejected. Let's give it up."
++According to Christoph Hellwig, linux rejects all union-type filesystems
++but UnionMount.
++<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
++
++
++1. Features
++----------------------------------------
++- unite several directories into a single virtual filesystem. The member
++  directory is called as a branch.
++- you can specify the permission flags to the branch, which are 'readonly',
++  'readwrite' and 'whiteout-able.'
++- by upper writable branch, internal copyup and whiteout, files/dirs on
++  readonly branch are modifiable logically.
++- dynamic branch manipulation, add, del.
++- etc...
++
++Also there are many enhancements in aufs1, such as:
++- readdir(3) in userspace.
++- keep inode number by external inode number table
++- keep the timestamps of file/dir in internal copyup operation
++- seekable directory, supporting NFS readdir.
++- whiteout is hardlinked in order to reduce the consumption of inodes
++  on branch
++- do not copyup, nor create a whiteout when it is unnecessary
++- revert a single systemcall when an error occurs in aufs
++- remount interface instead of ioctl
++- maintain /etc/mtab by an external command, /sbin/mount.aufs.
++- loopback mounted filesystem as a branch
++- kernel thread for removing the dir who has a plenty of whiteouts
++- support copyup sparse file (a file which has a 'hole' in it)
++- default permission flags for branches
++- selectable permission flags for ro branch, whether whiteout can
++  exist or not
++- export via NFS.
++- support <sysfs>/fs/aufs and <debugfs>/aufs.
++- support multiple writable branches, some policies to select one
++  among multiple writable branches.
++- a new semantics for link(2) and rename(2) to support multiple
++  writable branches.
++- no glibc changes are required.
++- pseudo hardlink (hardlink over branches)
++- allow a direct access manually to a file on branch, e.g. bypassing aufs.
++  including NFS or remote filesystem branch.
++- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
++- and more...
++
++Currently these features are dropped temporary from aufs3.
++See design/08plan.txt in detail.
++- test only the highest one for the directory permission (dirperm1)
++- copyup on open (coo=)
++- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
++  (robr)
++- statistics of aufs thread (/sys/fs/aufs/stat)
++- delegation mode (dlgt)
++  a delegation of the internal branch access to support task I/O
++  accounting, which also supports Linux Security Modules (LSM) mainly
++  for Suse AppArmor.
++- intent.open/create (file open in a single lookup)
++
++Features or just an idea in the future (see also design/*.txt),
++- reorder the branch index without del/re-add.
++- permanent xino files for NFSD
++- an option for refreshing the opened files after add/del branches
++- 'move' policy for copy-up between two writable branches, after
++  checking free space.
++- light version, without branch manipulation. (unnecessary?)
++- copyup in userspace
++- inotify in userspace
++- readv/writev
++- xattr, acl
++
++
++2. Download
++----------------------------------------
++There were three GIT trees for aufs3, aufs3-linux.git,
++aufs3-standalone.git, and aufs-util.git. Note that there is no "3" in
++"aufs-util.git."
++While the aufs-util is always necessary, you need either of aufs3-linux
++or aufs3-standalone.
++
++The aufs3-linux tree includes the whole linux mainline GIT tree,
++git://git.kernel.org/.../torvalds/linux.git.
++And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
++build aufs3 as an externel kernel module.
++
++On the other hand, the aufs3-standalone tree has only aufs source files
++and necessary patches, and you can select CONFIG_AUFS_FS=m.
++
++You will find GIT branches whose name is in form of "aufs3.x" where "x"
++represents the linux kernel version, "linux-3.x". For instance,
++"aufs3.0" is for linux-3.0. For latest "linux-3.x-rcN", use
++"aufs3.x-rcN" branch.
++
++o aufs3-linux tree
++$ git clone --reference /your/linux/git/tree \
++	git://aufs.git.sourceforge.net/gitroot/aufs/aufs3-linux.git \
++	aufs3-linux.git
++- if you don't have linux GIT tree, then remove "--reference ..."
++$ cd aufs3-linux.git
++$ git checkout origin/aufs3.0
++
++o aufs3-standalone tree
++$ git clone git://aufs.git.sourceforge.net/gitroot/aufs/aufs3-standalone.git \
++	aufs3-standalone.git
++$ cd aufs3-standalone.git
++$ git checkout origin/aufs3.0
++
++o aufs-util tree
++$ git clone git://aufs.git.sourceforge.net/gitroot/aufs/aufs-util.git \
++	aufs-util.git
++$ cd aufs-util.git
++$ git checkout origin/aufs3.0
++
++Note: The 3.x-rcN branch is to be used with `rc' kernel versions ONLY.
++The minor version number, 'x' in '3.x', of aufs may not always
++follow the minor version number of the kernel.
++Because changes in the kernel that cause the use of a new
++minor version number do not always require changes to aufs-util.
++
++Since aufs-util has its own minor version number, you may not be
++able to find a GIT branch in aufs-util for your kernel's
++exact minor version number.
++In this case, you should git-checkout the branch for the
++nearest lower number.
++
++For (an unreleased) example:
++If you are using "linux-3.10" and the "aufs3.10" branch
++does not exit in aufs-util repository, then "aufs3.9", "aufs3.8"
++or something numerically smaller is the branch for your kernel.
++
++Also you can view all branches by
++	$ git branch -a
++
++
++3. Configuration and Compilation
++----------------------------------------
++Make sure you have git-checkout'ed the correct branch.
++
++For aufs3-linux tree,
++- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS.
++- set other aufs configurations if necessary.
++
++For aufs3-standalone tree,
++There are several ways to build.
++
++1.
++- apply ./aufs3-kbuild.patch to your kernel source files.
++- apply ./aufs3-base.patch too.
++- apply ./aufs3-proc_map.patch too, if you want to make /proc/PID/maps (and
++  others including lsof(1)) show the file path on aufs instead of the
++  path on the branch fs.
++- apply ./aufs3-standalone.patch too, if you have a plan to set
++  CONFIG_AUFS_FS=m. otherwise you don't need ./aufs3-standalone.patch.
++- copy ./{Documentation,fs,include/linux/aufs_type.h} files to your
++  kernel source tree. Never copy ./include/linux/Kbuild.
++- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS, you can select either
++  =m or =y.
++- and build your kernel as usual.
++- install the built kernel.
++- install the header files too by "make headers_install".
++- and reboot your system.
++
++2.
++- module only (CONFIG_AUFS_FS=m).
++- apply ./aufs3-base.patch to your kernel source files.
++- apply ./aufs3-proc_map.patch too to your kernel source files,
++  if you want to make /proc/PID/maps (and others including lsof(1)) show
++  the file path on aufs instead of the path on the branch fs.
++- apply ./aufs3-standalone.patch too.
++- build your kernel, don't forget "make headers_install", and reboot.
++- edit ./config.mk and set other aufs configurations if necessary.
++  Note: You should read ./fs/aufs/Kconfig carefully which describes
++  every aufs configurations.
++- build the module by simple "make".
++- you can specify ${KDIR} make variable which points to your kernel
++  source tree.
++- install the files
++  + run "make install" to install the aufs module, or copy the built
++    ./aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
++  + run "make headers_install" to install the aufs header file (you can
++    specify DESTDIR), or copty ./usr/include/linux/aufs_type.h to
++    /usr/include/linux or wherever you like.
++- no need to apply aufs3-kbuild.patch, nor copying source files to your
++  kernel source tree.
++
++Note: The haeder file aufs_type.h is necessary to build aufs-util
++      as well as "make headers_install" in the kernel source tree.
++      headers_install is subject to be forgotten, but it is essentially
++      necessary, not only for building aufs-util.
++      You may not meet problems without headers_install in some older
++      version though.
++
++And then,
++- read README in aufs-util, build and install it
++- note that your distribution may contain an obsoleted version of
++  aufs_type.h in /usr/include/linux or something. When you build aufs
++  utilities, make sure that your compiler refers the correct aufs header
++  file which is built by "make headers_install."
++- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
++  then run "make install_ulib" too. And refer to the aufs manual in
++  detail.
++
++
++4. Usage
++----------------------------------------
++At first, make sure aufs-util are installed, and please read the aufs
++manual, aufs.5 in aufs-util.git tree.
++$ man -l aufs.5
++
++And then,
++$ mkdir /tmp/rw /tmp/aufs
++# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
++
++Here is another example. The result is equivalent.
++# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
++  Or
++# mount -t aufs -o br:/tmp/rw none /tmp/aufs
++# mount -o remount,append:${HOME} /tmp/aufs
++
++Then, you can see whole tree of your home dir through /tmp/aufs. If
++you modify a file under /tmp/aufs, the one on your home directory is
++not affected, instead the same named file will be newly created under
++/tmp/rw. And all of your modification to a file will be applied to
++the one under /tmp/rw. This is called the file based Copy on Write
++(COW) method.
++Aufs mount options are described in aufs.5.
++If you run chroot or something and make your aufs as a root directory,
++then you need to customize the shutdown script. See the aufs manual in
++detail.
++
++Additionally, there are some sample usages of aufs which are a
++diskless system with network booting, and LiveCD over NFS.
++See sample dir in CVS tree on SourceForge.
++
++
++5. Contact
++----------------------------------------
++When you have any problems or strange behaviour in aufs, please let me
++know with:
++- /proc/mounts (instead of the output of mount(8))
++- /sys/module/aufs/*
++- /sys/fs/aufs/* (if you have them)
++- /debug/aufs/* (if you have them)
++- linux kernel version
++  if your kernel is not plain, for example modified by distributor,
++  the url where i can download its source is necessary too.
++- aufs version which was printed at loading the module or booting the
++  system, instead of the date you downloaded.
++- configuration (define/undefine CONFIG_AUFS_xxx)
++- kernel configuration or /proc/config.gz (if you have it)
++- behaviour which you think to be incorrect
++- actual operation, reproducible one is better
++- mailto: aufs-users at lists.sourceforge.net
++
++Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
++and Feature Requests) on SourceForge. Please join and write to
++aufs-users ML.
++
++
++6. Acknowledgements
++----------------------------------------
++Thanks to everyone who have tried and are using aufs, whoever
++have reported a bug or any feedback.
++
++Especially donators:
++Tomas Matejicek(slax.org) made a donation (much more than once).
++	Since Apr 2010, Tomas M (the author of Slax and Linux Live
++	scripts) is making "doubling" donations.
++	Unfortunately I cannot list all of the donators, but I really
++	appriciate.
++	It ends Aug 2010, but the ordinary donation URL is still available.
++	<http://sourceforge.net/donate/index.php?group_id=167503>
++Dai Itasaka made a donation (2007/8).
++Chuck Smith made a donation (2008/4, 10 and 12).
++Henk Schoneveld made a donation (2008/9).
++Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
++Francois Dupoux made a donation (2008/11).
++Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
++	aufs2 GIT tree (2009/2).
++William Grant made a donation (2009/3).
++Patrick Lane made a donation (2009/4).
++The Mail Archive (mail-archive.com) made donations (2009/5).
++Nippy Networks (Ed Wildgoose) made a donation (2009/7).
++New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
++Pavel Pronskiy made a donation (2011/2).
++Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
++	Networks (Ed Wildgoose) made a donation for hardware (2011/3).
++Max Lekomcev (DOM-TV project) made a donation (2011/7 and 12).
++Sam Liddicott made a donation (2011/9).
++
++Thank you very much.
++Donations are always, including future donations, very important and
++helpful for me to keep on developing aufs.
++
++
++7.
++----------------------------------------
++If you are an experienced user, no explanation is needed. Aufs is
++just a linux filesystem.
++
++
++Enjoy!
++
++# Local variables: ;
++# mode: text;
++# End: ;
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/aufs.h linux-3.2.0-gentoo-r1/fs/aufs/aufs.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/aufs.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/aufs.h	2012-01-17 12:11:24.486228052 +0100
+@@ -0,0 +1,60 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * all header files
++ */
++
++#ifndef __AUFS_H__
++#define __AUFS_H__
++
++#ifdef __KERNEL__
++
++#define AuStub(type, name, body, ...) \
++	static inline type name(__VA_ARGS__) { body; }
++
++#define AuStubVoid(name, ...) \
++	AuStub(void, name, , __VA_ARGS__)
++#define AuStubInt0(name, ...) \
++	AuStub(int, name, return 0, __VA_ARGS__)
++
++#include "debug.h"
++
++#include "branch.h"
++#include "cpup.h"
++#include "dcsub.h"
++#include "dbgaufs.h"
++#include "dentry.h"
++#include "dir.h"
++#include "dynop.h"
++#include "file.h"
++#include "fstype.h"
++#include "inode.h"
++#include "loop.h"
++#include "module.h"
++#include "opts.h"
++#include "rwsem.h"
++#include "spl.h"
++#include "super.h"
++#include "sysaufs.h"
++#include "vfsub.h"
++#include "whout.h"
++#include "wkq.h"
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/branch.c linux-3.2.0-gentoo-r1/fs/aufs/branch.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/branch.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/branch.c	2012-01-17 12:11:24.486228052 +0100
+@@ -0,0 +1,1169 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * branch management
++ */
++
++#include <linux/compat.h>
++#include <linux/statfs.h>
++#include "aufs.h"
++
++/*
++ * free a single branch
++ */
++static void au_br_do_free(struct au_branch *br)
++{
++	int i;
++	struct au_wbr *wbr;
++	struct au_dykey **key;
++
++	au_hnotify_fin_br(br);
++
++	if (br->br_xino.xi_file)
++		fput(br->br_xino.xi_file);
++	mutex_destroy(&br->br_xino.xi_nondir_mtx);
++
++	AuDebugOn(atomic_read(&br->br_count));
++
++	wbr = br->br_wbr;
++	if (wbr) {
++		for (i = 0; i < AuBrWh_Last; i++)
++			dput(wbr->wbr_wh[i]);
++		AuDebugOn(atomic_read(&wbr->wbr_wh_running));
++		AuRwDestroy(&wbr->wbr_wh_rwsem);
++	}
++
++	key = br->br_dykey;
++	for (i = 0; i < AuBrDynOp; i++, key++)
++		if (*key)
++			au_dy_put(*key);
++		else
++			break;
++
++	mntput(br->br_mnt);
++	kfree(wbr);
++	kfree(br);
++}
++
++/*
++ * frees all branches
++ */
++void au_br_free(struct au_sbinfo *sbinfo)
++{
++	aufs_bindex_t bmax;
++	struct au_branch **br;
++
++	AuRwMustWriteLock(&sbinfo->si_rwsem);
++
++	bmax = sbinfo->si_bend + 1;
++	br = sbinfo->si_branch;
++	while (bmax--)
++		au_br_do_free(*br++);
++}
++
++/*
++ * find the index of a branch which is specified by @br_id.
++ */
++int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
++{
++	aufs_bindex_t bindex, bend;
++
++	bend = au_sbend(sb);
++	for (bindex = 0; bindex <= bend; bindex++)
++		if (au_sbr_id(sb, bindex) == br_id)
++			return bindex;
++	return -1;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * add a branch
++ */
++
++static int test_overlap(struct super_block *sb, struct dentry *h_adding,
++			struct dentry *h_root)
++{
++	if (unlikely(h_adding == h_root
++		     || au_test_loopback_overlap(sb, h_adding)))
++		return 1;
++	if (h_adding->d_sb != h_root->d_sb)
++		return 0;
++	return au_test_subdir(h_adding, h_root)
++		|| au_test_subdir(h_root, h_adding);
++}
++
++/*
++ * returns a newly allocated branch. @new_nbranch is a number of branches
++ * after adding a branch.
++ */
++static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
++				     int perm)
++{
++	struct au_branch *add_branch;
++	struct dentry *root;
++	int err;
++
++	err = -ENOMEM;
++	root = sb->s_root;
++	add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS);
++	if (unlikely(!add_branch))
++		goto out;
++
++	err = au_hnotify_init_br(add_branch, perm);
++	if (unlikely(err))
++		goto out_br;
++
++	add_branch->br_wbr = NULL;
++	if (au_br_writable(perm)) {
++		/* may be freed separately at changing the branch permission */
++		add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr),
++					     GFP_NOFS);
++		if (unlikely(!add_branch->br_wbr))
++			goto out_hnotify;
++	}
++
++	err = au_sbr_realloc(au_sbi(sb), new_nbranch);
++	if (!err)
++		err = au_di_realloc(au_di(root), new_nbranch);
++	if (!err)
++		err = au_ii_realloc(au_ii(root->d_inode), new_nbranch);
++	if (!err)
++		return add_branch; /* success */
++
++	kfree(add_branch->br_wbr);
++
++out_hnotify:
++	au_hnotify_fin_br(add_branch);
++out_br:
++	kfree(add_branch);
++out:
++	return ERR_PTR(err);
++}
++
++/*
++ * test if the branch permission is legal or not.
++ */
++static int test_br(struct inode *inode, int brperm, char *path)
++{
++	int err;
++
++	err = (au_br_writable(brperm) && IS_RDONLY(inode));
++	if (!err)
++		goto out;
++
++	err = -EINVAL;
++	pr_err("write permission for readonly mount or inode, %s\n", path);
++
++out:
++	return err;
++}
++
++/*
++ * returns:
++ * 0: success, the caller will add it
++ * plus: success, it is already unified, the caller should ignore it
++ * minus: error
++ */
++static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
++{
++	int err;
++	aufs_bindex_t bend, bindex;
++	struct dentry *root;
++	struct inode *inode, *h_inode;
++
++	root = sb->s_root;
++	bend = au_sbend(sb);
++	if (unlikely(bend >= 0
++		     && au_find_dbindex(root, add->path.dentry) >= 0)) {
++		err = 1;
++		if (!remount) {
++			err = -EINVAL;
++			pr_err("%s duplicated\n", add->pathname);
++		}
++		goto out;
++	}
++
++	err = -ENOSPC; /* -E2BIG; */
++	if (unlikely(AUFS_BRANCH_MAX <= add->bindex
++		     || AUFS_BRANCH_MAX - 1 <= bend)) {
++		pr_err("number of branches exceeded %s\n", add->pathname);
++		goto out;
++	}
++
++	err = -EDOM;
++	if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
++		pr_err("bad index %d\n", add->bindex);
++		goto out;
++	}
++
++	inode = add->path.dentry->d_inode;
++	err = -ENOENT;
++	if (unlikely(!inode->i_nlink)) {
++		pr_err("no existence %s\n", add->pathname);
++		goto out;
++	}
++
++	err = -EINVAL;
++	if (unlikely(inode->i_sb == sb)) {
++		pr_err("%s must be outside\n", add->pathname);
++		goto out;
++	}
++
++	if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
++		pr_err("unsupported filesystem, %s (%s)\n",
++		       add->pathname, au_sbtype(inode->i_sb));
++		goto out;
++	}
++
++	err = test_br(add->path.dentry->d_inode, add->perm, add->pathname);
++	if (unlikely(err))
++		goto out;
++
++	if (bend < 0)
++		return 0; /* success */
++
++	err = -EINVAL;
++	for (bindex = 0; bindex <= bend; bindex++)
++		if (unlikely(test_overlap(sb, add->path.dentry,
++					  au_h_dptr(root, bindex)))) {
++			pr_err("%s is overlapped\n", add->pathname);
++			goto out;
++		}
++
++	err = 0;
++	if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
++		h_inode = au_h_dptr(root, 0)->d_inode;
++		if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
++		    || h_inode->i_uid != inode->i_uid
++		    || h_inode->i_gid != inode->i_gid)
++			pr_warning("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
++				   add->pathname,
++				   inode->i_uid, inode->i_gid,
++				   (inode->i_mode & S_IALLUGO),
++				   h_inode->i_uid, h_inode->i_gid,
++				   (h_inode->i_mode & S_IALLUGO));
++	}
++
++out:
++	return err;
++}
++
++/*
++ * initialize or clean the whiteouts for an adding branch
++ */
++static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
++			 int new_perm, struct dentry *h_root)
++{
++	int err, old_perm;
++	aufs_bindex_t bindex;
++	struct mutex *h_mtx;
++	struct au_wbr *wbr;
++	struct au_hinode *hdir;
++
++	wbr = br->br_wbr;
++	old_perm = br->br_perm;
++	br->br_perm = new_perm;
++	hdir = NULL;
++	h_mtx = NULL;
++	bindex = au_br_index(sb, br->br_id);
++	if (0 <= bindex) {
++		hdir = au_hi(sb->s_root->d_inode, bindex);
++		au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
++	} else {
++		h_mtx = &h_root->d_inode->i_mutex;
++		mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
++	}
++	if (!wbr)
++		err = au_wh_init(h_root, br, sb);
++	else {
++		wbr_wh_write_lock(wbr);
++		err = au_wh_init(h_root, br, sb);
++		wbr_wh_write_unlock(wbr);
++	}
++	if (hdir)
++		au_hn_imtx_unlock(hdir);
++	else
++		mutex_unlock(h_mtx);
++	br->br_perm = old_perm;
++
++	if (!err && wbr && !au_br_writable(new_perm)) {
++		kfree(wbr);
++		br->br_wbr = NULL;
++	}
++
++	return err;
++}
++
++static int au_wbr_init(struct au_branch *br, struct super_block *sb,
++		       int perm, struct path *path)
++{
++	int err;
++	struct kstatfs kst;
++	struct au_wbr *wbr;
++	struct dentry *h_dentry;
++
++	wbr = br->br_wbr;
++	au_rw_init(&wbr->wbr_wh_rwsem);
++	memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh));
++	atomic_set(&wbr->wbr_wh_running, 0);
++	wbr->wbr_bytes = 0;
++
++	/*
++	 * a limit for rmdir/rename a dir
++	 * cf. AUFS_MAX_NAMELEN in include/linux/aufs_type.h
++	 */
++	err = vfs_statfs(path, &kst);
++	if (unlikely(err))
++		goto out;
++	err = -EINVAL;
++	h_dentry = path->dentry;
++	if (kst.f_namelen >= NAME_MAX)
++		err = au_br_init_wh(sb, br, perm, h_dentry);
++	else
++		pr_err("%.*s(%s), unsupported namelen %ld\n",
++		       AuDLNPair(h_dentry), au_sbtype(h_dentry->d_sb),
++		       kst.f_namelen);
++
++out:
++	return err;
++}
++
++/* intialize a new branch */
++static int au_br_init(struct au_branch *br, struct super_block *sb,
++		      struct au_opt_add *add)
++{
++	int err;
++
++	err = 0;
++	memset(&br->br_xino, 0, sizeof(br->br_xino));
++	mutex_init(&br->br_xino.xi_nondir_mtx);
++	br->br_perm = add->perm;
++	br->br_mnt = add->path.mnt; /* set first, mntget() later */
++	spin_lock_init(&br->br_dykey_lock);
++	memset(br->br_dykey, 0, sizeof(br->br_dykey));
++	atomic_set(&br->br_count, 0);
++	br->br_xino_upper = AUFS_XINO_TRUNC_INIT;
++	atomic_set(&br->br_xino_running, 0);
++	br->br_id = au_new_br_id(sb);
++	AuDebugOn(br->br_id < 0);
++
++	if (au_br_writable(add->perm)) {
++		err = au_wbr_init(br, sb, add->perm, &add->path);
++		if (unlikely(err))
++			goto out_err;
++	}
++
++	if (au_opt_test(au_mntflags(sb), XINO)) {
++		err = au_xino_br(sb, br, add->path.dentry->d_inode->i_ino,
++				 au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
++		if (unlikely(err)) {
++			AuDebugOn(br->br_xino.xi_file);
++			goto out_err;
++		}
++	}
++
++	sysaufs_br_init(br);
++	mntget(add->path.mnt);
++	goto out; /* success */
++
++out_err:
++	br->br_mnt = NULL;
++out:
++	return err;
++}
++
++static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
++			     struct au_branch *br, aufs_bindex_t bend,
++			     aufs_bindex_t amount)
++{
++	struct au_branch **brp;
++
++	AuRwMustWriteLock(&sbinfo->si_rwsem);
++
++	brp = sbinfo->si_branch + bindex;
++	memmove(brp + 1, brp, sizeof(*brp) * amount);
++	*brp = br;
++	sbinfo->si_bend++;
++	if (unlikely(bend < 0))
++		sbinfo->si_bend = 0;
++}
++
++static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
++			     aufs_bindex_t bend, aufs_bindex_t amount)
++{
++	struct au_hdentry *hdp;
++
++	AuRwMustWriteLock(&dinfo->di_rwsem);
++
++	hdp = dinfo->di_hdentry + bindex;
++	memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
++	au_h_dentry_init(hdp);
++	dinfo->di_bend++;
++	if (unlikely(bend < 0))
++		dinfo->di_bstart = 0;
++}
++
++static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
++			     aufs_bindex_t bend, aufs_bindex_t amount)
++{
++	struct au_hinode *hip;
++
++	AuRwMustWriteLock(&iinfo->ii_rwsem);
++
++	hip = iinfo->ii_hinode + bindex;
++	memmove(hip + 1, hip, sizeof(*hip) * amount);
++	hip->hi_inode = NULL;
++	au_hn_init(hip);
++	iinfo->ii_bend++;
++	if (unlikely(bend < 0))
++		iinfo->ii_bstart = 0;
++}
++
++static void au_br_do_add(struct super_block *sb, struct dentry *h_dentry,
++			 struct au_branch *br, aufs_bindex_t bindex)
++{
++	struct dentry *root;
++	struct inode *root_inode;
++	aufs_bindex_t bend, amount;
++
++	root = sb->s_root;
++	root_inode = root->d_inode;
++	bend = au_sbend(sb);
++	amount = bend + 1 - bindex;
++	au_sbilist_lock();
++	au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
++	au_br_do_add_hdp(au_di(root), bindex, bend, amount);
++	au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
++	au_set_h_dptr(root, bindex, dget(h_dentry));
++	au_set_h_iptr(root_inode, bindex, au_igrab(h_dentry->d_inode),
++		      /*flags*/0);
++	au_sbilist_unlock();
++}
++
++int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
++{
++	int err;
++	aufs_bindex_t bend, add_bindex;
++	struct dentry *root, *h_dentry;
++	struct inode *root_inode;
++	struct au_branch *add_branch;
++
++	root = sb->s_root;
++	root_inode = root->d_inode;
++	IMustLock(root_inode);
++	err = test_add(sb, add, remount);
++	if (unlikely(err < 0))
++		goto out;
++	if (err) {
++		err = 0;
++		goto out; /* success */
++	}
++
++	bend = au_sbend(sb);
++	add_branch = au_br_alloc(sb, bend + 2, add->perm);
++	err = PTR_ERR(add_branch);
++	if (IS_ERR(add_branch))
++		goto out;
++
++	err = au_br_init(add_branch, sb, add);
++	if (unlikely(err)) {
++		au_br_do_free(add_branch);
++		goto out;
++	}
++
++	add_bindex = add->bindex;
++	h_dentry = add->path.dentry;
++	if (!remount)
++		au_br_do_add(sb, h_dentry, add_branch, add_bindex);
++	else {
++		sysaufs_brs_del(sb, add_bindex);
++		au_br_do_add(sb, h_dentry, add_branch, add_bindex);
++		sysaufs_brs_add(sb, add_bindex);
++	}
++
++	if (!add_bindex) {
++		au_cpup_attr_all(root_inode, /*force*/1);
++		sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
++	} else
++		au_add_nlink(root_inode, h_dentry->d_inode);
++
++	/*
++	 * this test/set prevents aufs from handling unnecesary notify events
++	 * of xino files, in case of re-adding a writable branch which was
++	 * once detached from aufs.
++	 */
++	if (au_xino_brid(sb) < 0
++	    && au_br_writable(add_branch->br_perm)
++	    && !au_test_fs_bad_xino(h_dentry->d_sb)
++	    && add_branch->br_xino.xi_file
++	    && add_branch->br_xino.xi_file->f_dentry->d_parent == h_dentry)
++		au_xino_brid_set(sb, add_branch->br_id);
++
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * delete a branch
++ */
++
++/* to show the line number, do not make it inlined function */
++#define AuVerbose(do_info, fmt, ...) do { \
++	if (do_info) \
++		pr_info(fmt, ##__VA_ARGS__); \
++} while (0)
++
++static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart,
++			 aufs_bindex_t bend)
++{
++	return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend;
++}
++
++static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart,
++			 aufs_bindex_t bend)
++{
++	return au_test_ibusy(dentry->d_inode, bstart, bend);
++}
++
++/*
++ * test if the branch is deletable or not.
++ */
++static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
++			    unsigned int sigen, const unsigned int verbose)
++{
++	int err, i, j, ndentry;
++	aufs_bindex_t bstart, bend;
++	struct au_dcsub_pages dpages;
++	struct au_dpage *dpage;
++	struct dentry *d;
++
++	err = au_dpages_init(&dpages, GFP_NOFS);
++	if (unlikely(err))
++		goto out;
++	err = au_dcsub_pages(&dpages, root, NULL, NULL);
++	if (unlikely(err))
++		goto out_dpages;
++
++	for (i = 0; !err && i < dpages.ndpage; i++) {
++		dpage = dpages.dpages + i;
++		ndentry = dpage->ndentry;
++		for (j = 0; !err && j < ndentry; j++) {
++			d = dpage->dentries[j];
++			AuDebugOn(!d->d_count);
++			if (!au_digen_test(d, sigen)) {
++				di_read_lock_child(d, AuLock_IR);
++				if (unlikely(au_dbrange_test(d))) {
++					di_read_unlock(d, AuLock_IR);
++					continue;
++				}
++			} else {
++				di_write_lock_child(d);
++				if (unlikely(au_dbrange_test(d))) {
++					di_write_unlock(d);
++					continue;
++				}
++				err = au_reval_dpath(d, sigen);
++				if (!err)
++					di_downgrade_lock(d, AuLock_IR);
++				else {
++					di_write_unlock(d);
++					break;
++				}
++			}
++
++			/* AuDbgDentry(d); */
++			bstart = au_dbstart(d);
++			bend = au_dbend(d);
++			if (bstart <= bindex
++			    && bindex <= bend
++			    && au_h_dptr(d, bindex)
++			    && au_test_dbusy(d, bstart, bend)) {
++				err = -EBUSY;
++				AuVerbose(verbose, "busy %.*s\n", AuDLNPair(d));
++				AuDbgDentry(d);
++			}
++			di_read_unlock(d, AuLock_IR);
++		}
++	}
++
++out_dpages:
++	au_dpages_free(&dpages);
++out:
++	return err;
++}
++
++static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
++			   unsigned int sigen, const unsigned int verbose)
++{
++	int err;
++	unsigned long long max, ull;
++	struct inode *i, **array;
++	aufs_bindex_t bstart, bend;
++
++	array = au_iarray_alloc(sb, &max);
++	err = PTR_ERR(array);
++	if (IS_ERR(array))
++		goto out;
++
++	err = 0;
++	AuDbg("b%d\n", bindex);
++	for (ull = 0; !err && ull < max; ull++) {
++		i = array[ull];
++		if (i->i_ino == AUFS_ROOT_INO)
++			continue;
++
++		/* AuDbgInode(i); */
++		if (au_iigen(i) == sigen)
++			ii_read_lock_child(i);
++		else {
++			ii_write_lock_child(i);
++			err = au_refresh_hinode_self(i);
++			au_iigen_dec(i);
++			if (!err)
++				ii_downgrade_lock(i);
++			else {
++				ii_write_unlock(i);
++				break;
++			}
++		}
++
++		bstart = au_ibstart(i);
++		bend = au_ibend(i);
++		if (bstart <= bindex
++		    && bindex <= bend
++		    && au_h_iptr(i, bindex)
++		    && au_test_ibusy(i, bstart, bend)) {
++			err = -EBUSY;
++			AuVerbose(verbose, "busy i%lu\n", i->i_ino);
++			AuDbgInode(i);
++		}
++		ii_read_unlock(i);
++	}
++	au_iarray_free(array, max);
++
++out:
++	return err;
++}
++
++static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
++			      const unsigned int verbose)
++{
++	int err;
++	unsigned int sigen;
++
++	sigen = au_sigen(root->d_sb);
++	DiMustNoWaiters(root);
++	IiMustNoWaiters(root->d_inode);
++	di_write_unlock(root);
++	err = test_dentry_busy(root, bindex, sigen, verbose);
++	if (!err)
++		err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
++	di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
++
++	return err;
++}
++
++static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
++			     const aufs_bindex_t bindex,
++			     const aufs_bindex_t bend)
++{
++	struct au_branch **brp, **p;
++
++	AuRwMustWriteLock(&sbinfo->si_rwsem);
++
++	brp = sbinfo->si_branch + bindex;
++	if (bindex < bend)
++		memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
++	sbinfo->si_branch[0 + bend] = NULL;
++	sbinfo->si_bend--;
++
++	p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST);
++	if (p)
++		sbinfo->si_branch = p;
++	/* harmless error */
++}
++
++static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
++			     const aufs_bindex_t bend)
++{
++	struct au_hdentry *hdp, *p;
++
++	AuRwMustWriteLock(&dinfo->di_rwsem);
++
++	hdp = dinfo->di_hdentry;
++	if (bindex < bend)
++		memmove(hdp + bindex, hdp + bindex + 1,
++			sizeof(*hdp) * (bend - bindex));
++	hdp[0 + bend].hd_dentry = NULL;
++	dinfo->di_bend--;
++
++	p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST);
++	if (p)
++		dinfo->di_hdentry = p;
++	/* harmless error */
++}
++
++static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
++			     const aufs_bindex_t bend)
++{
++	struct au_hinode *hip, *p;
++
++	AuRwMustWriteLock(&iinfo->ii_rwsem);
++
++	hip = iinfo->ii_hinode + bindex;
++	if (bindex < bend)
++		memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
++	iinfo->ii_hinode[0 + bend].hi_inode = NULL;
++	au_hn_init(iinfo->ii_hinode + bend);
++	iinfo->ii_bend--;
++
++	p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST);
++	if (p)
++		iinfo->ii_hinode = p;
++	/* harmless error */
++}
++
++static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
++			 struct au_branch *br)
++{
++	aufs_bindex_t bend;
++	struct au_sbinfo *sbinfo;
++	struct dentry *root, *h_root;
++	struct inode *inode, *h_inode;
++	struct au_hinode *hinode;
++
++	SiMustWriteLock(sb);
++
++	root = sb->s_root;
++	inode = root->d_inode;
++	sbinfo = au_sbi(sb);
++	bend = sbinfo->si_bend;
++
++	h_root = au_h_dptr(root, bindex);
++	hinode = au_hi(inode, bindex);
++	h_inode = au_igrab(hinode->hi_inode);
++	au_hiput(hinode);
++
++	au_sbilist_lock();
++	au_br_do_del_brp(sbinfo, bindex, bend);
++	au_br_do_del_hdp(au_di(root), bindex, bend);
++	au_br_do_del_hip(au_ii(inode), bindex, bend);
++	au_sbilist_unlock();
++
++	dput(h_root);
++	iput(h_inode);
++	au_br_do_free(br);
++}
++
++int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
++{
++	int err, rerr, i;
++	unsigned int mnt_flags;
++	aufs_bindex_t bindex, bend, br_id;
++	unsigned char do_wh, verbose;
++	struct au_branch *br;
++	struct au_wbr *wbr;
++
++	err = 0;
++	bindex = au_find_dbindex(sb->s_root, del->h_path.dentry);
++	if (bindex < 0) {
++		if (remount)
++			goto out; /* success */
++		err = -ENOENT;
++		pr_err("%s no such branch\n", del->pathname);
++		goto out;
++	}
++	AuDbg("bindex b%d\n", bindex);
++
++	err = -EBUSY;
++	mnt_flags = au_mntflags(sb);
++	verbose = !!au_opt_test(mnt_flags, VERBOSE);
++	bend = au_sbend(sb);
++	if (unlikely(!bend)) {
++		AuVerbose(verbose, "no more branches left\n");
++		goto out;
++	}
++	br = au_sbr(sb, bindex);
++	i = atomic_read(&br->br_count);
++	if (unlikely(i)) {
++		AuVerbose(verbose, "%d file(s) opened\n", i);
++		goto out;
++	}
++
++	wbr = br->br_wbr;
++	do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
++	if (do_wh) {
++		/* instead of WbrWhMustWriteLock(wbr) */
++		SiMustWriteLock(sb);
++		for (i = 0; i < AuBrWh_Last; i++) {
++			dput(wbr->wbr_wh[i]);
++			wbr->wbr_wh[i] = NULL;
++		}
++	}
++
++	err = test_children_busy(sb->s_root, bindex, verbose);
++	if (unlikely(err)) {
++		if (do_wh)
++			goto out_wh;
++		goto out;
++	}
++
++	err = 0;
++	br_id = br->br_id;
++	if (!remount)
++		au_br_do_del(sb, bindex, br);
++	else {
++		sysaufs_brs_del(sb, bindex);
++		au_br_do_del(sb, bindex, br);
++		sysaufs_brs_add(sb, bindex);
++	}
++
++	if (!bindex) {
++		au_cpup_attr_all(sb->s_root->d_inode, /*force*/1);
++		sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
++	} else
++		au_sub_nlink(sb->s_root->d_inode, del->h_path.dentry->d_inode);
++	if (au_opt_test(mnt_flags, PLINK))
++		au_plink_half_refresh(sb, br_id);
++
++	if (au_xino_brid(sb) == br_id)
++		au_xino_brid_set(sb, -1);
++	goto out; /* success */
++
++out_wh:
++	/* revert */
++	rerr = au_br_init_wh(sb, br, br->br_perm, del->h_path.dentry);
++	if (rerr)
++		pr_warning("failed re-creating base whiteout, %s. (%d)\n",
++			   del->pathname, rerr);
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
++{
++	int err;
++	aufs_bindex_t bstart, bend;
++	struct aufs_ibusy ibusy;
++	struct inode *inode, *h_inode;
++
++	err = -EPERM;
++	if (unlikely(!capable(CAP_SYS_ADMIN)))
++		goto out;
++
++	err = copy_from_user(&ibusy, arg, sizeof(ibusy));
++	if (!err)
++		err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
++	if (unlikely(err)) {
++		err = -EFAULT;
++		AuTraceErr(err);
++		goto out;
++	}
++
++	err = -EINVAL;
++	si_read_lock(sb, AuLock_FLUSH);
++	if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb)))
++		goto out_unlock;
++
++	err = 0;
++	ibusy.h_ino = 0; /* invalid */
++	inode = ilookup(sb, ibusy.ino);
++	if (!inode
++	    || inode->i_ino == AUFS_ROOT_INO
++	    || is_bad_inode(inode))
++		goto out_unlock;
++
++	ii_read_lock_child(inode);
++	bstart = au_ibstart(inode);
++	bend = au_ibend(inode);
++	if (bstart <= ibusy.bindex && ibusy.bindex <= bend) {
++		h_inode = au_h_iptr(inode, ibusy.bindex);
++		if (h_inode && au_test_ibusy(inode, bstart, bend))
++			ibusy.h_ino = h_inode->i_ino;
++	}
++	ii_read_unlock(inode);
++	iput(inode);
++
++out_unlock:
++	si_read_unlock(sb);
++	if (!err) {
++		err = __put_user(ibusy.h_ino, &arg->h_ino);
++		if (unlikely(err)) {
++			err = -EFAULT;
++			AuTraceErr(err);
++		}
++	}
++out:
++	return err;
++}
++
++long au_ibusy_ioctl(struct file *file, unsigned long arg)
++{
++	return au_ibusy(file->f_dentry->d_sb, (void __user *)arg);
++}
++
++#ifdef CONFIG_COMPAT
++long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
++{
++	return au_ibusy(file->f_dentry->d_sb, compat_ptr(arg));
++}
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * change a branch permission
++ */
++
++static void au_warn_ima(void)
++{
++#ifdef CONFIG_IMA
++	/* since it doesn't support mark_files_ro() */
++	AuWarn1("RW -> RO makes IMA to produce wrong message\n");
++#endif
++}
++
++static int do_need_sigen_inc(int a, int b)
++{
++	return au_br_whable(a) && !au_br_whable(b);
++}
++
++static int need_sigen_inc(int old, int new)
++{
++	return do_need_sigen_inc(old, new)
++		|| do_need_sigen_inc(new, old);
++}
++
++static unsigned long long au_farray_cb(void *a,
++				       unsigned long long max __maybe_unused,
++				       void *arg)
++{
++	unsigned long long n;
++	struct file **p, *f;
++	struct super_block *sb = arg;
++
++	n = 0;
++	p = a;
++	lg_global_lock(files_lglock);
++	do_file_list_for_each_entry(sb, f) {
++		if (au_fi(f)
++		    && file_count(f)
++		    && !special_file(f->f_dentry->d_inode->i_mode)) {
++			get_file(f);
++			*p++ = f;
++			n++;
++			AuDebugOn(n > max);
++		}
++	} while_file_list_for_each_entry;
++	lg_global_unlock(files_lglock);
++
++	return n;
++}
++
++static struct file **au_farray_alloc(struct super_block *sb,
++				     unsigned long long *max)
++{
++	*max = atomic_long_read(&au_sbi(sb)->si_nfiles);
++	return au_array_alloc(max, au_farray_cb, sb);
++}
++
++static void au_farray_free(struct file **a, unsigned long long max)
++{
++	unsigned long long ull;
++
++	for (ull = 0; ull < max; ull++)
++		if (a[ull])
++			fput(a[ull]);
++	au_array_free(a);
++}
++
++static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
++{
++	int err, do_warn;
++	unsigned int mnt_flags;
++	unsigned long long ull, max;
++	aufs_bindex_t br_id;
++	unsigned char verbose;
++	struct file *file, *hf, **array;
++	struct inode *inode;
++	struct au_hfile *hfile;
++
++	mnt_flags = au_mntflags(sb);
++	verbose = !!au_opt_test(mnt_flags, VERBOSE);
++
++	array = au_farray_alloc(sb, &max);
++	err = PTR_ERR(array);
++	if (IS_ERR(array))
++		goto out;
++
++	do_warn = 0;
++	br_id = au_sbr_id(sb, bindex);
++	for (ull = 0; ull < max; ull++) {
++		file = array[ull];
++
++		/* AuDbg("%.*s\n", AuDLNPair(file->f_dentry)); */
++		fi_read_lock(file);
++		if (unlikely(au_test_mmapped(file))) {
++			err = -EBUSY;
++			AuVerbose(verbose, "mmapped %.*s\n",
++				  AuDLNPair(file->f_dentry));
++			AuDbgFile(file);
++			FiMustNoWaiters(file);
++			fi_read_unlock(file);
++			goto out_array;
++		}
++
++		inode = file->f_dentry->d_inode;
++		hfile = &au_fi(file)->fi_htop;
++		hf = hfile->hf_file;
++		if (!S_ISREG(inode->i_mode)
++		    || !(file->f_mode & FMODE_WRITE)
++		    || hfile->hf_br->br_id != br_id
++		    || !(hf->f_mode & FMODE_WRITE))
++			array[ull] = NULL;
++		else {
++			do_warn = 1;
++			get_file(file);
++		}
++
++		FiMustNoWaiters(file);
++		fi_read_unlock(file);
++		fput(file);
++	}
++
++	err = 0;
++	if (do_warn)
++		au_warn_ima();
++
++	for (ull = 0; ull < max; ull++) {
++		file = array[ull];
++		if (!file)
++			continue;
++
++		/* todo: already flushed? */
++		/* cf. fs/super.c:mark_files_ro() */
++		/* fi_read_lock(file); */
++		hfile = &au_fi(file)->fi_htop;
++		hf = hfile->hf_file;
++		/* fi_read_unlock(file); */
++		spin_lock(&hf->f_lock);
++		hf->f_mode &= ~FMODE_WRITE;
++		spin_unlock(&hf->f_lock);
++		if (!file_check_writeable(hf)) {
++			file_release_write(hf);
++			mnt_drop_write(hf->f_vfsmnt);
++		}
++	}
++
++out_array:
++	au_farray_free(array, max);
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
++	      int *do_refresh)
++{
++	int err, rerr;
++	aufs_bindex_t bindex;
++	struct path path;
++	struct dentry *root;
++	struct au_branch *br;
++
++	root = sb->s_root;
++	bindex = au_find_dbindex(root, mod->h_root);
++	if (bindex < 0) {
++		if (remount)
++			return 0; /* success */
++		err = -ENOENT;
++		pr_err("%s no such branch\n", mod->path);
++		goto out;
++	}
++	AuDbg("bindex b%d\n", bindex);
++
++	err = test_br(mod->h_root->d_inode, mod->perm, mod->path);
++	if (unlikely(err))
++		goto out;
++
++	br = au_sbr(sb, bindex);
++	if (br->br_perm == mod->perm)
++		return 0; /* success */
++
++	if (au_br_writable(br->br_perm)) {
++		/* remove whiteout base */
++		err = au_br_init_wh(sb, br, mod->perm, mod->h_root);
++		if (unlikely(err))
++			goto out;
++
++		if (!au_br_writable(mod->perm)) {
++			/* rw --> ro, file might be mmapped */
++			DiMustNoWaiters(root);
++			IiMustNoWaiters(root->d_inode);
++			di_write_unlock(root);
++			err = au_br_mod_files_ro(sb, bindex);
++			/* aufs_write_lock() calls ..._child() */
++			di_write_lock_child(root);
++
++			if (unlikely(err)) {
++				rerr = -ENOMEM;
++				br->br_wbr = kmalloc(sizeof(*br->br_wbr),
++						     GFP_NOFS);
++				if (br->br_wbr) {
++					path.mnt = br->br_mnt;
++					path.dentry = mod->h_root;
++					rerr = au_wbr_init(br, sb, br->br_perm,
++							   &path);
++				}
++				if (unlikely(rerr)) {
++					AuIOErr("nested error %d (%d)\n",
++						rerr, err);
++					br->br_perm = mod->perm;
++				}
++			}
++		}
++	} else if (au_br_writable(mod->perm)) {
++		/* ro --> rw */
++		err = -ENOMEM;
++		br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS);
++		if (br->br_wbr) {
++			path.mnt = br->br_mnt;
++			path.dentry = mod->h_root;
++			err = au_wbr_init(br, sb, mod->perm, &path);
++			if (unlikely(err)) {
++				kfree(br->br_wbr);
++				br->br_wbr = NULL;
++			}
++		}
++	}
++
++	if (!err) {
++		*do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
++		br->br_perm = mod->perm;
++	}
++
++out:
++	AuTraceErr(err);
++	return err;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/branch.h linux-3.2.0-gentoo-r1/fs/aufs/branch.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/branch.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/branch.h	2012-01-17 12:11:24.488542888 +0100
+@@ -0,0 +1,230 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * branch filesystems and xino for them
++ */
++
++#ifndef __AUFS_BRANCH_H__
++#define __AUFS_BRANCH_H__
++
++#ifdef __KERNEL__
++
++#include <linux/mount.h>
++#include "dynop.h"
++#include "rwsem.h"
++#include "super.h"
++
++/* ---------------------------------------------------------------------- */
++
++/* a xino file */
++struct au_xino_file {
++	struct file		*xi_file;
++	struct mutex		xi_nondir_mtx;
++
++	/* todo: make xino files an array to support huge inode number */
++
++#ifdef CONFIG_DEBUG_FS
++	struct dentry		 *xi_dbgaufs;
++#endif
++};
++
++/* members for writable branch only */
++enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
++struct au_wbr {
++	struct au_rwsem		wbr_wh_rwsem;
++	struct dentry		*wbr_wh[AuBrWh_Last];
++	atomic_t		wbr_wh_running;
++#define wbr_whbase		wbr_wh[AuBrWh_BASE]	/* whiteout base */
++#define wbr_plink		wbr_wh[AuBrWh_PLINK]	/* pseudo-link dir */
++#define wbr_orph		wbr_wh[AuBrWh_ORPH]	/* dir for orphans */
++
++	/* mfs mode */
++	unsigned long long	wbr_bytes;
++};
++
++/* ext2 has 3 types of operations at least, ext3 has 4 */
++#define AuBrDynOp (AuDyLast * 4)
++
++/* protected by superblock rwsem */
++struct au_branch {
++	struct au_xino_file	br_xino;
++
++	aufs_bindex_t		br_id;
++
++	int			br_perm;
++	struct vfsmount		*br_mnt;
++	spinlock_t		br_dykey_lock;
++	struct au_dykey		*br_dykey[AuBrDynOp];
++	atomic_t		br_count;
++
++	struct au_wbr		*br_wbr;
++
++	/* xino truncation */
++	blkcnt_t		br_xino_upper;	/* watermark in blocks */
++	atomic_t		br_xino_running;
++
++#ifdef CONFIG_AUFS_HFSNOTIFY
++	struct fsnotify_group	*br_hfsn_group;
++	struct fsnotify_ops	br_hfsn_ops;
++#endif
++
++#ifdef CONFIG_SYSFS
++	/* an entry under sysfs per mount-point */
++	char			br_name[8];
++	struct attribute	br_attr;
++#endif
++};
++
++/* ---------------------------------------------------------------------- */
++
++/* branch permissions and attributes */
++#define AuBrPerm_RW		1		/* writable, hardlinkable wh */
++#define AuBrPerm_RO		(1 << 1)	/* readonly */
++#define AuBrPerm_RR		(1 << 2)	/* natively readonly */
++#define AuBrPerm_Mask		(AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR)
++
++#define AuBrRAttr_WH		(1 << 3)	/* whiteout-able */
++
++#define AuBrWAttr_NoLinkWH	(1 << 4)	/* un-hardlinkable whiteouts */
++
++static inline int au_br_writable(int brperm)
++{
++	return brperm & AuBrPerm_RW;
++}
++
++static inline int au_br_whable(int brperm)
++{
++	return brperm & (AuBrPerm_RW | AuBrRAttr_WH);
++}
++
++static inline int au_br_wh_linkable(int brperm)
++{
++	return !(brperm & AuBrWAttr_NoLinkWH);
++}
++
++static inline int au_br_rdonly(struct au_branch *br)
++{
++	return ((br->br_mnt->mnt_sb->s_flags & MS_RDONLY)
++		|| !au_br_writable(br->br_perm))
++		? -EROFS : 0;
++}
++
++static inline int au_br_hnotifyable(int brperm __maybe_unused)
++{
++#ifdef CONFIG_AUFS_HNOTIFY
++	return !(brperm & AuBrPerm_RR);
++#else
++	return 0;
++#endif
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* branch.c */
++struct au_sbinfo;
++void au_br_free(struct au_sbinfo *sinfo);
++int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
++struct au_opt_add;
++int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
++struct au_opt_del;
++int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
++long au_ibusy_ioctl(struct file *file, unsigned long arg);
++#ifdef CONFIG_COMPAT
++long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
++#endif
++struct au_opt_mod;
++int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
++	      int *do_refresh);
++
++/* xino.c */
++static const loff_t au_loff_max = LLONG_MAX;
++
++int au_xib_trunc(struct super_block *sb);
++ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size,
++		   loff_t *pos);
++ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
++		    loff_t *pos);
++struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
++struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
++ino_t au_xino_new_ino(struct super_block *sb);
++void au_xino_delete_inode(struct inode *inode, const int unlinked);
++int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++		  ino_t ino);
++int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++		 ino_t *ino);
++int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
++	       struct file *base_file, int do_test);
++int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
++
++struct au_opt_xino;
++int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
++void au_xino_clr(struct super_block *sb);
++struct file *au_xino_def(struct super_block *sb);
++int au_xino_path(struct seq_file *seq, struct file *file);
++
++/* ---------------------------------------------------------------------- */
++
++/* Superblock to branch */
++static inline
++aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
++{
++	return au_sbr(sb, bindex)->br_id;
++}
++
++static inline
++struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
++{
++	return au_sbr(sb, bindex)->br_mnt;
++}
++
++static inline
++struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
++{
++	return au_sbr_mnt(sb, bindex)->mnt_sb;
++}
++
++static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
++{
++	atomic_dec(&au_sbr(sb, bindex)->br_count);
++}
++
++static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
++{
++	return au_sbr(sb, bindex)->br_perm;
++}
++
++static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
++{
++	return au_br_whable(au_sbr_perm(sb, bindex));
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * wbr_wh_read_lock, wbr_wh_write_lock
++ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
++ */
++AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
++
++#define WbrWhMustNoWaiters(wbr)	AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
++#define WbrWhMustAnyLock(wbr)	AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
++#define WbrWhMustWriteLock(wbr)	AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_BRANCH_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/conf.mk linux-3.2.0-gentoo-r1/fs/aufs/conf.mk
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/conf.mk	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/conf.mk	2012-01-17 12:11:24.488542888 +0100
+@@ -0,0 +1,38 @@
++
++AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
++
++define AuConf
++ifdef ${1}
++AuConfStr += ${1}=${${1}}
++endif
++endef
++
++AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
++	SBILIST \
++	HNOTIFY HFSNOTIFY \
++	EXPORT INO_T_64 \
++	RDU \
++	PROC_MAP \
++	SP_IATTR \
++	SHWH \
++	BR_RAMFS \
++	BR_FUSE POLL \
++	BR_HFSPLUS \
++	BDEV_LOOP \
++	DEBUG MAGIC_SYSRQ
++$(foreach i, ${AuConfAll}, \
++	$(eval $(call AuConf,CONFIG_AUFS_${i})))
++
++AuConfName = ${obj}/conf.str
++${AuConfName}.tmp: FORCE
++	@echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
++${AuConfName}: ${AuConfName}.tmp
++	@diff -q $< $@ > /dev/null 2>&1 || { \
++	echo '  GEN    ' $@; \
++	cp -p $< $@; \
++	}
++FORCE:
++clean-files += ${AuConfName} ${AuConfName}.tmp
++${obj}/sysfs.o: ${AuConfName}
++
++-include ${srctree}/${src}/conf_priv.mk
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/cpup.c linux-3.2.0-gentoo-r1/fs/aufs/cpup.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/cpup.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/cpup.c	2012-01-17 12:11:24.511691245 +0100
+@@ -0,0 +1,1079 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * copy-up functions, see wbr_policy.c for copy-down
++ */
++
++#include <linux/fs_stack.h>
++#include <linux/mm.h>
++#include "aufs.h"
++
++void au_cpup_attr_flags(struct inode *dst, struct inode *src)
++{
++	const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
++		| S_NOATIME | S_NOCMTIME;
++
++	dst->i_flags |= src->i_flags & ~mask;
++	if (au_test_fs_notime(dst->i_sb))
++		dst->i_flags |= S_NOATIME | S_NOCMTIME;
++}
++
++void au_cpup_attr_timesizes(struct inode *inode)
++{
++	struct inode *h_inode;
++
++	h_inode = au_h_iptr(inode, au_ibstart(inode));
++	fsstack_copy_attr_times(inode, h_inode);
++	fsstack_copy_inode_size(inode, h_inode);
++}
++
++void au_cpup_attr_nlink(struct inode *inode, int force)
++{
++	struct inode *h_inode;
++	struct super_block *sb;
++	aufs_bindex_t bindex, bend;
++
++	sb = inode->i_sb;
++	bindex = au_ibstart(inode);
++	h_inode = au_h_iptr(inode, bindex);
++	if (!force
++	    && !S_ISDIR(h_inode->i_mode)
++	    && au_opt_test(au_mntflags(sb), PLINK)
++	    && au_plink_test(inode))
++		return;
++
++	set_nlink(inode, h_inode->i_nlink);
++
++	/*
++	 * fewer nlink makes find(1) noisy, but larger nlink doesn't.
++	 * it may includes whplink directory.
++	 */
++	if (S_ISDIR(h_inode->i_mode)) {
++		bend = au_ibend(inode);
++		for (bindex++; bindex <= bend; bindex++) {
++			h_inode = au_h_iptr(inode, bindex);
++			if (h_inode)
++				au_add_nlink(inode, h_inode);
++		}
++	}
++}
++
++void au_cpup_attr_changeable(struct inode *inode)
++{
++	struct inode *h_inode;
++
++	h_inode = au_h_iptr(inode, au_ibstart(inode));
++	inode->i_mode = h_inode->i_mode;
++	inode->i_uid = h_inode->i_uid;
++	inode->i_gid = h_inode->i_gid;
++	au_cpup_attr_timesizes(inode);
++	au_cpup_attr_flags(inode, h_inode);
++}
++
++void au_cpup_igen(struct inode *inode, struct inode *h_inode)
++{
++	struct au_iinfo *iinfo = au_ii(inode);
++
++	IiMustWriteLock(inode);
++
++	iinfo->ii_higen = h_inode->i_generation;
++	iinfo->ii_hsb1 = h_inode->i_sb;
++}
++
++void au_cpup_attr_all(struct inode *inode, int force)
++{
++	struct inode *h_inode;
++
++	h_inode = au_h_iptr(inode, au_ibstart(inode));
++	au_cpup_attr_changeable(inode);
++	if (inode->i_nlink > 0)
++		au_cpup_attr_nlink(inode, force);
++	inode->i_rdev = h_inode->i_rdev;
++	inode->i_blkbits = h_inode->i_blkbits;
++	au_cpup_igen(inode, h_inode);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
++
++/* keep the timestamps of the parent dir when cpup */
++void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
++		    struct path *h_path)
++{
++	struct inode *h_inode;
++
++	dt->dt_dentry = dentry;
++	dt->dt_h_path = *h_path;
++	h_inode = h_path->dentry->d_inode;
++	dt->dt_atime = h_inode->i_atime;
++	dt->dt_mtime = h_inode->i_mtime;
++	/* smp_mb(); */
++}
++
++void au_dtime_revert(struct au_dtime *dt)
++{
++	struct iattr attr;
++	int err;
++
++	attr.ia_atime = dt->dt_atime;
++	attr.ia_mtime = dt->dt_mtime;
++	attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
++		| ATTR_ATIME | ATTR_ATIME_SET;
++
++	err = vfsub_notify_change(&dt->dt_h_path, &attr);
++	if (unlikely(err))
++		pr_warning("restoring timestamps failed(%d). ignored\n", err);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static noinline_for_stack
++int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src)
++{
++	int err, sbits;
++	struct iattr ia;
++	struct path h_path;
++	struct inode *h_isrc, *h_idst;
++
++	h_path.dentry = au_h_dptr(dst, bindex);
++	h_idst = h_path.dentry->d_inode;
++	h_path.mnt = au_sbr_mnt(dst->d_sb, bindex);
++	h_isrc = h_src->d_inode;
++	ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
++		| ATTR_ATIME | ATTR_MTIME
++		| ATTR_ATIME_SET | ATTR_MTIME_SET;
++	ia.ia_uid = h_isrc->i_uid;
++	ia.ia_gid = h_isrc->i_gid;
++	ia.ia_atime = h_isrc->i_atime;
++	ia.ia_mtime = h_isrc->i_mtime;
++	if (h_idst->i_mode != h_isrc->i_mode
++	    && !S_ISLNK(h_idst->i_mode)) {
++		ia.ia_valid |= ATTR_MODE;
++		ia.ia_mode = h_isrc->i_mode;
++	}
++	sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
++	au_cpup_attr_flags(h_idst, h_isrc);
++	err = vfsub_notify_change(&h_path, &ia);
++
++	/* is this nfs only? */
++	if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
++		ia.ia_valid = ATTR_FORCE | ATTR_MODE;
++		ia.ia_mode = h_isrc->i_mode;
++		err = vfsub_notify_change(&h_path, &ia);
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
++			   char *buf, unsigned long blksize)
++{
++	int err;
++	size_t sz, rbytes, wbytes;
++	unsigned char all_zero;
++	char *p, *zp;
++	struct mutex *h_mtx;
++	/* reduce stack usage */
++	struct iattr *ia;
++
++	zp = page_address(ZERO_PAGE(0));
++	if (unlikely(!zp))
++		return -ENOMEM; /* possible? */
++
++	err = 0;
++	all_zero = 0;
++	while (len) {
++		AuDbg("len %lld\n", len);
++		sz = blksize;
++		if (len < blksize)
++			sz = len;
++
++		rbytes = 0;
++		/* todo: signal_pending? */
++		while (!rbytes || err == -EAGAIN || err == -EINTR) {
++			rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
++			err = rbytes;
++		}
++		if (unlikely(err < 0))
++			break;
++
++		all_zero = 0;
++		if (len >= rbytes && rbytes == blksize)
++			all_zero = !memcmp(buf, zp, rbytes);
++		if (!all_zero) {
++			wbytes = rbytes;
++			p = buf;
++			while (wbytes) {
++				size_t b;
++
++				b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
++				err = b;
++				/* todo: signal_pending? */
++				if (unlikely(err == -EAGAIN || err == -EINTR))
++					continue;
++				if (unlikely(err < 0))
++					break;
++				wbytes -= b;
++				p += b;
++			}
++		} else {
++			loff_t res;
++
++			AuLabel(hole);
++			res = vfsub_llseek(dst, rbytes, SEEK_CUR);
++			err = res;
++			if (unlikely(res < 0))
++				break;
++		}
++		len -= rbytes;
++		err = 0;
++	}
++
++	/* the last block may be a hole */
++	if (!err && all_zero) {
++		AuLabel(last hole);
++
++		err = 1;
++		if (au_test_nfs(dst->f_dentry->d_sb)) {
++			/* nfs requires this step to make last hole */
++			/* is this only nfs? */
++			do {
++				/* todo: signal_pending? */
++				err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
++			} while (err == -EAGAIN || err == -EINTR);
++			if (err == 1)
++				dst->f_pos--;
++		}
++
++		if (err == 1) {
++			ia = (void *)buf;
++			ia->ia_size = dst->f_pos;
++			ia->ia_valid = ATTR_SIZE | ATTR_FILE;
++			ia->ia_file = dst;
++			h_mtx = &dst->f_dentry->d_inode->i_mutex;
++			mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
++			err = vfsub_notify_change(&dst->f_path, ia);
++			mutex_unlock(h_mtx);
++		}
++	}
++
++	return err;
++}
++
++int au_copy_file(struct file *dst, struct file *src, loff_t len)
++{
++	int err;
++	unsigned long blksize;
++	unsigned char do_kfree;
++	char *buf;
++
++	err = -ENOMEM;
++	blksize = dst->f_dentry->d_sb->s_blocksize;
++	if (!blksize || PAGE_SIZE < blksize)
++		blksize = PAGE_SIZE;
++	AuDbg("blksize %lu\n", blksize);
++	do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
++	if (do_kfree)
++		buf = kmalloc(blksize, GFP_NOFS);
++	else
++		buf = (void *)__get_free_page(GFP_NOFS);
++	if (unlikely(!buf))
++		goto out;
++
++	if (len > (1 << 22))
++		AuDbg("copying a large file %lld\n", (long long)len);
++
++	src->f_pos = 0;
++	dst->f_pos = 0;
++	err = au_do_copy_file(dst, src, len, buf, blksize);
++	if (do_kfree)
++		kfree(buf);
++	else
++		free_page((unsigned long)buf);
++
++out:
++	return err;
++}
++
++/*
++ * to support a sparse file which is opened with O_APPEND,
++ * we need to close the file.
++ */
++static int au_cp_regular(struct dentry *dentry, aufs_bindex_t bdst,
++			 aufs_bindex_t bsrc, loff_t len)
++{
++	int err, i;
++	enum { SRC, DST };
++	struct {
++		aufs_bindex_t bindex;
++		unsigned int flags;
++		struct dentry *dentry;
++		struct file *file;
++		void *label, *label_file;
++	} *f, file[] = {
++		{
++			.bindex = bsrc,
++			.flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
++			.file = NULL,
++			.label = &&out,
++			.label_file = &&out_src
++		},
++		{
++			.bindex = bdst,
++			.flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
++			.file = NULL,
++			.label = &&out_src,
++			.label_file = &&out_dst
++		}
++	};
++	struct super_block *sb;
++
++	/* bsrc branch can be ro/rw. */
++	sb = dentry->d_sb;
++	f = file;
++	for (i = 0; i < 2; i++, f++) {
++		f->dentry = au_h_dptr(dentry, f->bindex);
++		f->file = au_h_open(dentry, f->bindex, f->flags, /*file*/NULL);
++		err = PTR_ERR(f->file);
++		if (IS_ERR(f->file))
++			goto *f->label;
++		err = -EINVAL;
++		if (unlikely(!f->file->f_op))
++			goto *f->label_file;
++	}
++
++	/* try stopping to update while we copyup */
++	IMustLock(file[SRC].dentry->d_inode);
++	err = au_copy_file(file[DST].file, file[SRC].file, len);
++
++out_dst:
++	fput(file[DST].file);
++	au_sbr_put(sb, file[DST].bindex);
++out_src:
++	fput(file[SRC].file);
++	au_sbr_put(sb, file[SRC].bindex);
++out:
++	return err;
++}
++
++static int au_do_cpup_regular(struct dentry *dentry, aufs_bindex_t bdst,
++			      aufs_bindex_t bsrc, loff_t len,
++			      struct inode *h_dir, struct path *h_path)
++{
++	int err, rerr;
++	loff_t l;
++
++	err = 0;
++	l = i_size_read(au_h_iptr(dentry->d_inode, bsrc));
++	if (len == -1 || l < len)
++		len = l;
++	if (len)
++		err = au_cp_regular(dentry, bdst, bsrc, len);
++	if (!err)
++		goto out; /* success */
++
++	rerr = vfsub_unlink(h_dir, h_path, /*force*/0);
++	if (rerr) {
++		AuIOErr("failed unlinking cpup-ed %.*s(%d, %d)\n",
++			AuDLNPair(h_path->dentry), err, rerr);
++		err = -EIO;
++	}
++
++out:
++	return err;
++}
++
++static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
++			      struct inode *h_dir)
++{
++	int err, symlen;
++	mm_segment_t old_fs;
++	union {
++		char *k;
++		char __user *u;
++	} sym;
++
++	err = -ENOSYS;
++	if (unlikely(!h_src->d_inode->i_op->readlink))
++		goto out;
++
++	err = -ENOMEM;
++	sym.k = __getname_gfp(GFP_NOFS);
++	if (unlikely(!sym.k))
++		goto out;
++
++	/* unnecessary to support mmap_sem since symlink is not mmap-able */
++	old_fs = get_fs();
++	set_fs(KERNEL_DS);
++	symlen = h_src->d_inode->i_op->readlink(h_src, sym.u, PATH_MAX);
++	err = symlen;
++	set_fs(old_fs);
++
++	if (symlen > 0) {
++		sym.k[symlen] = 0;
++		err = vfsub_symlink(h_dir, h_path, sym.k);
++	}
++	__putname(sym.k);
++
++out:
++	return err;
++}
++
++/* return with the lower dst inode is locked */
++static noinline_for_stack
++int cpup_entry(struct dentry *dentry, aufs_bindex_t bdst,
++	       aufs_bindex_t bsrc, loff_t len, unsigned int flags,
++	       struct dentry *dst_parent)
++{
++	int err;
++	umode_t mode;
++	unsigned int mnt_flags;
++	unsigned char isdir;
++	const unsigned char do_dt = !!au_ftest_cpup(flags, DTIME);
++	struct au_dtime dt;
++	struct path h_path;
++	struct dentry *h_src, *h_dst, *h_parent;
++	struct inode *h_inode, *h_dir;
++	struct super_block *sb;
++
++	/* bsrc branch can be ro/rw. */
++	h_src = au_h_dptr(dentry, bsrc);
++	h_inode = h_src->d_inode;
++	AuDebugOn(h_inode != au_h_iptr(dentry->d_inode, bsrc));
++
++	/* try stopping to be referenced while we are creating */
++	h_dst = au_h_dptr(dentry, bdst);
++	h_parent = h_dst->d_parent; /* dir inode is locked */
++	h_dir = h_parent->d_inode;
++	IMustLock(h_dir);
++	AuDebugOn(h_parent != h_dst->d_parent);
++
++	sb = dentry->d_sb;
++	h_path.mnt = au_sbr_mnt(sb, bdst);
++	if (do_dt) {
++		h_path.dentry = h_parent;
++		au_dtime_store(&dt, dst_parent, &h_path);
++	}
++	h_path.dentry = h_dst;
++
++	isdir = 0;
++	mode = h_inode->i_mode;
++	switch (mode & S_IFMT) {
++	case S_IFREG:
++		/* try stopping to update while we are referencing */
++		IMustLock(h_inode);
++		err = vfsub_create(h_dir, &h_path, mode | S_IWUSR);
++		if (!err)
++			err = au_do_cpup_regular
++				(dentry, bdst, bsrc, len,
++				 au_h_iptr(dst_parent->d_inode, bdst), &h_path);
++		break;
++	case S_IFDIR:
++		isdir = 1;
++		err = vfsub_mkdir(h_dir, &h_path, mode);
++		if (!err) {
++			/*
++			 * strange behaviour from the users view,
++			 * particularry setattr case
++			 */
++			if (au_ibstart(dst_parent->d_inode) == bdst)
++				au_cpup_attr_nlink(dst_parent->d_inode,
++						   /*force*/1);
++			au_cpup_attr_nlink(dentry->d_inode, /*force*/1);
++		}
++		break;
++	case S_IFLNK:
++		err = au_do_cpup_symlink(&h_path, h_src, h_dir);
++		break;
++	case S_IFCHR:
++	case S_IFBLK:
++		AuDebugOn(!capable(CAP_MKNOD));
++		/*FALLTHROUGH*/
++	case S_IFIFO:
++	case S_IFSOCK:
++		err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
++		break;
++	default:
++		AuIOErr("Unknown inode type 0%o\n", mode);
++		err = -EIO;
++	}
++
++	mnt_flags = au_mntflags(sb);
++	if (!au_opt_test(mnt_flags, UDBA_NONE)
++	    && !isdir
++	    && au_opt_test(mnt_flags, XINO)
++	    && h_inode->i_nlink == 1
++	    /* todo: unnecessary? */
++	    /* && dentry->d_inode->i_nlink == 1 */
++	    && bdst < bsrc
++	    && !au_ftest_cpup(flags, KEEPLINO))
++		au_xino_write(sb, bsrc, h_inode->i_ino, /*ino*/0);
++		/* ignore this error */
++
++	if (do_dt)
++		au_dtime_revert(&dt);
++	return err;
++}
++
++/*
++ * copyup the @dentry from @bsrc to @bdst.
++ * the caller must set the both of lower dentries.
++ * @len is for truncating when it is -1 copyup the entire file.
++ * in link/rename cases, @dst_parent may be different from the real one.
++ */
++static int au_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
++			  aufs_bindex_t bsrc, loff_t len, unsigned int flags,
++			  struct dentry *dst_parent)
++{
++	int err, rerr;
++	aufs_bindex_t old_ibstart;
++	unsigned char isdir, plink;
++	struct au_dtime dt;
++	struct path h_path;
++	struct dentry *h_src, *h_dst, *h_parent;
++	struct inode *dst_inode, *h_dir, *inode;
++	struct super_block *sb;
++
++	AuDebugOn(bsrc <= bdst);
++
++	sb = dentry->d_sb;
++	h_path.mnt = au_sbr_mnt(sb, bdst);
++	h_dst = au_h_dptr(dentry, bdst);
++	h_parent = h_dst->d_parent; /* dir inode is locked */
++	h_dir = h_parent->d_inode;
++	IMustLock(h_dir);
++
++	h_src = au_h_dptr(dentry, bsrc);
++	inode = dentry->d_inode;
++
++	if (!dst_parent)
++		dst_parent = dget_parent(dentry);
++	else
++		dget(dst_parent);
++
++	plink = !!au_opt_test(au_mntflags(sb), PLINK);
++	dst_inode = au_h_iptr(inode, bdst);
++	if (dst_inode) {
++		if (unlikely(!plink)) {
++			err = -EIO;
++			AuIOErr("hi%lu(i%lu) exists on b%d "
++				"but plink is disabled\n",
++				dst_inode->i_ino, inode->i_ino, bdst);
++			goto out;
++		}
++
++		if (dst_inode->i_nlink) {
++			const int do_dt = au_ftest_cpup(flags, DTIME);
++
++			h_src = au_plink_lkup(inode, bdst);
++			err = PTR_ERR(h_src);
++			if (IS_ERR(h_src))
++				goto out;
++			if (unlikely(!h_src->d_inode)) {
++				err = -EIO;
++				AuIOErr("i%lu exists on a upper branch "
++					"but not pseudo-linked\n",
++					inode->i_ino);
++				dput(h_src);
++				goto out;
++			}
++
++			if (do_dt) {
++				h_path.dentry = h_parent;
++				au_dtime_store(&dt, dst_parent, &h_path);
++			}
++			h_path.dentry = h_dst;
++			err = vfsub_link(h_src, h_dir, &h_path);
++			if (do_dt)
++				au_dtime_revert(&dt);
++			dput(h_src);
++			goto out;
++		} else
++			/* todo: cpup_wh_file? */
++			/* udba work */
++			au_update_ibrange(inode, /*do_put_zero*/1);
++	}
++
++	old_ibstart = au_ibstart(inode);
++	err = cpup_entry(dentry, bdst, bsrc, len, flags, dst_parent);
++	if (unlikely(err))
++		goto out;
++	dst_inode = h_dst->d_inode;
++	mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
++
++	err = cpup_iattr(dentry, bdst, h_src);
++	isdir = S_ISDIR(dst_inode->i_mode);
++	if (!err) {
++		if (bdst < old_ibstart) {
++			if (S_ISREG(inode->i_mode)) {
++				err = au_dy_iaop(inode, bdst, dst_inode);
++				if (unlikely(err))
++					goto out_rev;
++			}
++			au_set_ibstart(inode, bdst);
++		}
++		au_set_h_iptr(inode, bdst, au_igrab(dst_inode),
++			      au_hi_flags(inode, isdir));
++		mutex_unlock(&dst_inode->i_mutex);
++		if (!isdir
++		    && h_src->d_inode->i_nlink > 1
++		    && plink)
++			au_plink_append(inode, bdst, h_dst);
++		goto out; /* success */
++	}
++
++	/* revert */
++out_rev:
++	h_path.dentry = h_parent;
++	mutex_unlock(&dst_inode->i_mutex);
++	au_dtime_store(&dt, dst_parent, &h_path);
++	h_path.dentry = h_dst;
++	if (!isdir)
++		rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
++	else
++		rerr = vfsub_rmdir(h_dir, &h_path);
++	au_dtime_revert(&dt);
++	if (rerr) {
++		AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
++		err = -EIO;
++	}
++
++out:
++	dput(dst_parent);
++	return err;
++}
++
++struct au_cpup_single_args {
++	int *errp;
++	struct dentry *dentry;
++	aufs_bindex_t bdst, bsrc;
++	loff_t len;
++	unsigned int flags;
++	struct dentry *dst_parent;
++};
++
++static void au_call_cpup_single(void *args)
++{
++	struct au_cpup_single_args *a = args;
++	*a->errp = au_cpup_single(a->dentry, a->bdst, a->bsrc, a->len,
++				  a->flags, a->dst_parent);
++}
++
++/*
++ * prevent SIGXFSZ in copy-up.
++ * testing CAP_MKNOD is for generic fs,
++ * but CAP_FSETID is for xfs only, currently.
++ */
++static int au_cpup_sio_test(struct super_block *sb, umode_t mode)
++{
++	int do_sio;
++
++	do_sio = 0;
++	if (!au_wkq_test()
++	    && (!au_sbi(sb)->si_plink_maint_pid
++		|| au_plink_maint(sb, AuLock_NOPLM))) {
++		switch (mode & S_IFMT) {
++		case S_IFREG:
++			/* no condition about RLIMIT_FSIZE and the file size */
++			do_sio = 1;
++			break;
++		case S_IFCHR:
++		case S_IFBLK:
++			do_sio = !capable(CAP_MKNOD);
++			break;
++		}
++		if (!do_sio)
++			do_sio = ((mode & (S_ISUID | S_ISGID))
++				  && !capable(CAP_FSETID));
++	}
++
++	return do_sio;
++}
++
++int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
++		       aufs_bindex_t bsrc, loff_t len, unsigned int flags,
++		       struct dentry *dst_parent)
++{
++	int err, wkq_err;
++	struct dentry *h_dentry;
++
++	h_dentry = au_h_dptr(dentry, bsrc);
++	if (!au_cpup_sio_test(dentry->d_sb, h_dentry->d_inode->i_mode))
++		err = au_cpup_single(dentry, bdst, bsrc, len, flags,
++				     dst_parent);
++	else {
++		struct au_cpup_single_args args = {
++			.errp		= &err,
++			.dentry		= dentry,
++			.bdst		= bdst,
++			.bsrc		= bsrc,
++			.len		= len,
++			.flags		= flags,
++			.dst_parent	= dst_parent
++		};
++		wkq_err = au_wkq_wait(au_call_cpup_single, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++
++	return err;
++}
++
++/*
++ * copyup the @dentry from the first active lower branch to @bdst,
++ * using au_cpup_single().
++ */
++static int au_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++			  unsigned int flags)
++{
++	int err;
++	aufs_bindex_t bsrc, bend;
++
++	bend = au_dbend(dentry);
++	for (bsrc = bdst + 1; bsrc <= bend; bsrc++)
++		if (au_h_dptr(dentry, bsrc))
++			break;
++
++	err = au_lkup_neg(dentry, bdst);
++	if (!err) {
++		err = au_cpup_single(dentry, bdst, bsrc, len, flags, NULL);
++		if (!err)
++			return 0; /* success */
++
++		/* revert */
++		au_set_h_dptr(dentry, bdst, NULL);
++		au_set_dbstart(dentry, bsrc);
++	}
++
++	return err;
++}
++
++struct au_cpup_simple_args {
++	int *errp;
++	struct dentry *dentry;
++	aufs_bindex_t bdst;
++	loff_t len;
++	unsigned int flags;
++};
++
++static void au_call_cpup_simple(void *args)
++{
++	struct au_cpup_simple_args *a = args;
++	*a->errp = au_cpup_simple(a->dentry, a->bdst, a->len, a->flags);
++}
++
++int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++		       unsigned int flags)
++{
++	int err, wkq_err;
++	struct dentry *parent;
++	struct inode *h_dir;
++
++	parent = dget_parent(dentry);
++	h_dir = au_h_iptr(parent->d_inode, bdst);
++	if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
++	    && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode))
++		err = au_cpup_simple(dentry, bdst, len, flags);
++	else {
++		struct au_cpup_simple_args args = {
++			.errp		= &err,
++			.dentry		= dentry,
++			.bdst		= bdst,
++			.len		= len,
++			.flags		= flags
++		};
++		wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++
++	dput(parent);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * copyup the deleted file for writing.
++ */
++static int au_do_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst,
++			 struct dentry *wh_dentry, struct file *file,
++			 loff_t len)
++{
++	int err;
++	aufs_bindex_t bstart;
++	struct au_dinfo *dinfo;
++	struct dentry *h_d_dst, *h_d_start;
++	struct au_hdentry *hdp;
++
++	dinfo = au_di(dentry);
++	AuRwMustWriteLock(&dinfo->di_rwsem);
++
++	bstart = dinfo->di_bstart;
++	hdp = dinfo->di_hdentry;
++	h_d_dst = hdp[0 + bdst].hd_dentry;
++	dinfo->di_bstart = bdst;
++	hdp[0 + bdst].hd_dentry = wh_dentry;
++	if (file) {
++		h_d_start = hdp[0 + bstart].hd_dentry;
++		hdp[0 + bstart].hd_dentry = au_hf_top(file)->f_dentry;
++	}
++	err = au_cpup_single(dentry, bdst, bstart, len, !AuCpup_DTIME,
++			     /*h_parent*/NULL);
++	if (file) {
++		if (!err)
++			err = au_reopen_nondir(file);
++		hdp[0 + bstart].hd_dentry = h_d_start;
++	}
++	hdp[0 + bdst].hd_dentry = h_d_dst;
++	dinfo->di_bstart = bstart;
++
++	return err;
++}
++
++static int au_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++		      struct file *file)
++{
++	int err;
++	struct au_dtime dt;
++	struct dentry *parent, *h_parent, *wh_dentry;
++	struct au_branch *br;
++	struct path h_path;
++
++	br = au_sbr(dentry->d_sb, bdst);
++	parent = dget_parent(dentry);
++	h_parent = au_h_dptr(parent, bdst);
++	wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
++	err = PTR_ERR(wh_dentry);
++	if (IS_ERR(wh_dentry))
++		goto out;
++
++	h_path.dentry = h_parent;
++	h_path.mnt = br->br_mnt;
++	au_dtime_store(&dt, parent, &h_path);
++	err = au_do_cpup_wh(dentry, bdst, wh_dentry, file, len);
++	if (unlikely(err))
++		goto out_wh;
++
++	dget(wh_dentry);
++	h_path.dentry = wh_dentry;
++	if (!S_ISDIR(wh_dentry->d_inode->i_mode))
++		err = vfsub_unlink(h_parent->d_inode, &h_path, /*force*/0);
++	else
++		err = vfsub_rmdir(h_parent->d_inode, &h_path);
++	if (unlikely(err)) {
++		AuIOErr("failed remove copied-up tmp file %.*s(%d)\n",
++			AuDLNPair(wh_dentry), err);
++		err = -EIO;
++	}
++	au_dtime_revert(&dt);
++	au_set_hi_wh(dentry->d_inode, bdst, wh_dentry);
++
++out_wh:
++	dput(wh_dentry);
++out:
++	dput(parent);
++	return err;
++}
++
++struct au_cpup_wh_args {
++	int *errp;
++	struct dentry *dentry;
++	aufs_bindex_t bdst;
++	loff_t len;
++	struct file *file;
++};
++
++static void au_call_cpup_wh(void *args)
++{
++	struct au_cpup_wh_args *a = args;
++	*a->errp = au_cpup_wh(a->dentry, a->bdst, a->len, a->file);
++}
++
++int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++		   struct file *file)
++{
++	int err, wkq_err;
++	struct dentry *parent, *h_orph, *h_parent, *h_dentry;
++	struct inode *dir, *h_dir, *h_tmpdir, *h_inode;
++	struct au_wbr *wbr;
++
++	parent = dget_parent(dentry);
++	dir = parent->d_inode;
++	h_orph = NULL;
++	h_parent = NULL;
++	h_dir = au_igrab(au_h_iptr(dir, bdst));
++	h_tmpdir = h_dir;
++	if (!h_dir->i_nlink) {
++		wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
++		h_orph = wbr->wbr_orph;
++
++		h_parent = dget(au_h_dptr(parent, bdst));
++		au_set_h_dptr(parent, bdst, dget(h_orph));
++		h_tmpdir = h_orph->d_inode;
++		au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
++
++		/* this temporary unlock is safe */
++		if (file)
++			h_dentry = au_hf_top(file)->f_dentry;
++		else
++			h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
++		h_inode = h_dentry->d_inode;
++		IMustLock(h_inode);
++		mutex_unlock(&h_inode->i_mutex);
++		mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
++		mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++		/* todo: au_h_open_pre()? */
++	}
++
++	if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
++	    && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode))
++		err = au_cpup_wh(dentry, bdst, len, file);
++	else {
++		struct au_cpup_wh_args args = {
++			.errp	= &err,
++			.dentry	= dentry,
++			.bdst	= bdst,
++			.len	= len,
++			.file	= file
++		};
++		wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++
++	if (h_orph) {
++		mutex_unlock(&h_tmpdir->i_mutex);
++		/* todo: au_h_open_post()? */
++		au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
++		au_set_h_dptr(parent, bdst, h_parent);
++	}
++	iput(h_dir);
++	dput(parent);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * generic routine for both of copy-up and copy-down.
++ */
++/* cf. revalidate function in file.c */
++int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
++	       int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
++			 struct dentry *h_parent, void *arg),
++	       void *arg)
++{
++	int err;
++	struct au_pin pin;
++	struct dentry *d, *parent, *h_parent, *real_parent;
++
++	err = 0;
++	parent = dget_parent(dentry);
++	if (IS_ROOT(parent))
++		goto out;
++
++	au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
++		    au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
++
++	/* do not use au_dpage */
++	real_parent = parent;
++	while (1) {
++		dput(parent);
++		parent = dget_parent(dentry);
++		h_parent = au_h_dptr(parent, bdst);
++		if (h_parent)
++			goto out; /* success */
++
++		/* find top dir which is necessary to cpup */
++		do {
++			d = parent;
++			dput(parent);
++			parent = dget_parent(d);
++			di_read_lock_parent3(parent, !AuLock_IR);
++			h_parent = au_h_dptr(parent, bdst);
++			di_read_unlock(parent, !AuLock_IR);
++		} while (!h_parent);
++
++		if (d != real_parent)
++			di_write_lock_child3(d);
++
++		/* somebody else might create while we were sleeping */
++		if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) {
++			if (au_h_dptr(d, bdst))
++				au_update_dbstart(d);
++
++			au_pin_set_dentry(&pin, d);
++			err = au_do_pin(&pin);
++			if (!err) {
++				err = cp(d, bdst, h_parent, arg);
++				au_unpin(&pin);
++			}
++		}
++
++		if (d != real_parent)
++			di_write_unlock(d);
++		if (unlikely(err))
++			break;
++	}
++
++out:
++	dput(parent);
++	return err;
++}
++
++static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
++		       struct dentry *h_parent __maybe_unused ,
++		       void *arg __maybe_unused)
++{
++	return au_sio_cpup_simple(dentry, bdst, -1, AuCpup_DTIME);
++}
++
++int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
++{
++	return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
++}
++
++int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
++{
++	int err;
++	struct dentry *parent;
++	struct inode *dir;
++
++	parent = dget_parent(dentry);
++	dir = parent->d_inode;
++	err = 0;
++	if (au_h_iptr(dir, bdst))
++		goto out;
++
++	di_read_unlock(parent, AuLock_IR);
++	di_write_lock_parent(parent);
++	/* someone else might change our inode while we were sleeping */
++	if (!au_h_iptr(dir, bdst))
++		err = au_cpup_dirs(dentry, bdst);
++	di_downgrade_lock(parent, AuLock_IR);
++
++out:
++	dput(parent);
++	return err;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/cpup.h linux-3.2.0-gentoo-r1/fs/aufs/cpup.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/cpup.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/cpup.h	2012-01-17 12:11:24.532524766 +0100
+@@ -0,0 +1,81 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * copy-up/down functions
++ */
++
++#ifndef __AUFS_CPUP_H__
++#define __AUFS_CPUP_H__
++
++#ifdef __KERNEL__
++
++#include <linux/path.h>
++
++struct inode;
++struct file;
++
++void au_cpup_attr_flags(struct inode *dst, struct inode *src);
++void au_cpup_attr_timesizes(struct inode *inode);
++void au_cpup_attr_nlink(struct inode *inode, int force);
++void au_cpup_attr_changeable(struct inode *inode);
++void au_cpup_igen(struct inode *inode, struct inode *h_inode);
++void au_cpup_attr_all(struct inode *inode, int force);
++
++/* ---------------------------------------------------------------------- */
++
++/* cpup flags */
++#define AuCpup_DTIME	1		/* do dtime_store/revert */
++#define AuCpup_KEEPLINO	(1 << 1)	/* do not clear the lower xino,
++					   for link(2) */
++#define au_ftest_cpup(flags, name)	((flags) & AuCpup_##name)
++#define au_fset_cpup(flags, name) \
++	do { (flags) |= AuCpup_##name; } while (0)
++#define au_fclr_cpup(flags, name) \
++	do { (flags) &= ~AuCpup_##name; } while (0)
++
++int au_copy_file(struct file *dst, struct file *src, loff_t len);
++int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
++		       aufs_bindex_t bsrc, loff_t len, unsigned int flags,
++		       struct dentry *dst_parent);
++int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++		       unsigned int flags);
++int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++		   struct file *file);
++
++int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
++	       int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
++			 struct dentry *h_parent, void *arg),
++	       void *arg);
++int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
++int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
++
++/* ---------------------------------------------------------------------- */
++
++/* keep timestamps when copyup */
++struct au_dtime {
++	struct dentry *dt_dentry;
++	struct path dt_h_path;
++	struct timespec dt_atime, dt_mtime;
++};
++void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
++		    struct path *h_path);
++void au_dtime_revert(struct au_dtime *dt);
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_CPUP_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/dbgaufs.c linux-3.2.0-gentoo-r1/fs/aufs/dbgaufs.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/dbgaufs.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/dbgaufs.c	2012-01-17 12:11:24.532524766 +0100
+@@ -0,0 +1,334 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * debugfs interface
++ */
++
++#include <linux/debugfs.h>
++#include "aufs.h"
++
++#ifndef CONFIG_SYSFS
++#error DEBUG_FS depends upon SYSFS
++#endif
++
++static struct dentry *dbgaufs;
++static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
++
++/* 20 is max digits length of ulong 64 */
++struct dbgaufs_arg {
++	int n;
++	char a[20 * 4];
++};
++
++/*
++ * common function for all XINO files
++ */
++static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
++			      struct file *file)
++{
++	kfree(file->private_data);
++	return 0;
++}
++
++static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
++{
++	int err;
++	struct kstat st;
++	struct dbgaufs_arg *p;
++
++	err = -ENOMEM;
++	p = kmalloc(sizeof(*p), GFP_NOFS);
++	if (unlikely(!p))
++		goto out;
++
++	err = 0;
++	p->n = 0;
++	file->private_data = p;
++	if (!xf)
++		goto out;
++
++	err = vfs_getattr(xf->f_vfsmnt, xf->f_dentry, &st);
++	if (!err) {
++		if (do_fcnt)
++			p->n = snprintf
++				(p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
++				 (long)file_count(xf), st.blocks, st.blksize,
++				 (long long)st.size);
++		else
++			p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
++					st.blocks, st.blksize,
++					(long long)st.size);
++		AuDebugOn(p->n >= sizeof(p->a));
++	} else {
++		p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
++		err = 0;
++	}
++
++out:
++	return err;
++
++}
++
++static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
++			       size_t count, loff_t *ppos)
++{
++	struct dbgaufs_arg *p;
++
++	p = file->private_data;
++	return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int dbgaufs_xib_open(struct inode *inode, struct file *file)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++	struct super_block *sb;
++
++	sbinfo = inode->i_private;
++	sb = sbinfo->si_sb;
++	si_noflush_read_lock(sb);
++	err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
++	si_read_unlock(sb);
++	return err;
++}
++
++static const struct file_operations dbgaufs_xib_fop = {
++	.owner		= THIS_MODULE,
++	.open		= dbgaufs_xib_open,
++	.release	= dbgaufs_xi_release,
++	.read		= dbgaufs_xi_read
++};
++
++/* ---------------------------------------------------------------------- */
++
++#define DbgaufsXi_PREFIX "xi"
++
++static int dbgaufs_xino_open(struct inode *inode, struct file *file)
++{
++	int err;
++	long l;
++	struct au_sbinfo *sbinfo;
++	struct super_block *sb;
++	struct file *xf;
++	struct qstr *name;
++
++	err = -ENOENT;
++	xf = NULL;
++	name = &file->f_dentry->d_name;
++	if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
++		     || memcmp(name->name, DbgaufsXi_PREFIX,
++			       sizeof(DbgaufsXi_PREFIX) - 1)))
++		goto out;
++	err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
++	if (unlikely(err))
++		goto out;
++
++	sbinfo = inode->i_private;
++	sb = sbinfo->si_sb;
++	si_noflush_read_lock(sb);
++	if (l <= au_sbend(sb)) {
++		xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
++		err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
++	} else
++		err = -ENOENT;
++	si_read_unlock(sb);
++
++out:
++	return err;
++}
++
++static const struct file_operations dbgaufs_xino_fop = {
++	.owner		= THIS_MODULE,
++	.open		= dbgaufs_xino_open,
++	.release	= dbgaufs_xi_release,
++	.read		= dbgaufs_xi_read
++};
++
++void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
++{
++	aufs_bindex_t bend;
++	struct au_branch *br;
++	struct au_xino_file *xi;
++
++	if (!au_sbi(sb)->si_dbgaufs)
++		return;
++
++	bend = au_sbend(sb);
++	for (; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		xi = &br->br_xino;
++		if (xi->xi_dbgaufs) {
++			debugfs_remove(xi->xi_dbgaufs);
++			xi->xi_dbgaufs = NULL;
++		}
++	}
++}
++
++void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
++{
++	struct au_sbinfo *sbinfo;
++	struct dentry *parent;
++	struct au_branch *br;
++	struct au_xino_file *xi;
++	aufs_bindex_t bend;
++	char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
++
++	sbinfo = au_sbi(sb);
++	parent = sbinfo->si_dbgaufs;
++	if (!parent)
++		return;
++
++	bend = au_sbend(sb);
++	for (; bindex <= bend; bindex++) {
++		snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
++		br = au_sbr(sb, bindex);
++		xi = &br->br_xino;
++		AuDebugOn(xi->xi_dbgaufs);
++		xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
++						     sbinfo, &dbgaufs_xino_fop);
++		/* ignore an error */
++		if (unlikely(!xi->xi_dbgaufs))
++			AuWarn1("failed %s under debugfs\n", name);
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_EXPORT
++static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++	struct super_block *sb;
++
++	sbinfo = inode->i_private;
++	sb = sbinfo->si_sb;
++	si_noflush_read_lock(sb);
++	err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
++	si_read_unlock(sb);
++	return err;
++}
++
++static const struct file_operations dbgaufs_xigen_fop = {
++	.owner		= THIS_MODULE,
++	.open		= dbgaufs_xigen_open,
++	.release	= dbgaufs_xi_release,
++	.read		= dbgaufs_xi_read
++};
++
++static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
++{
++	int err;
++
++	/*
++	 * This function is a dynamic '__init' fucntion actually,
++	 * so the tiny check for si_rwsem is unnecessary.
++	 */
++	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
++
++	err = -EIO;
++	sbinfo->si_dbgaufs_xigen = debugfs_create_file
++		("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
++		 &dbgaufs_xigen_fop);
++	if (sbinfo->si_dbgaufs_xigen)
++		err = 0;
++
++	return err;
++}
++#else
++static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
++{
++	return 0;
++}
++#endif /* CONFIG_AUFS_EXPORT */
++
++/* ---------------------------------------------------------------------- */
++
++void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
++{
++	/*
++	 * This function is a dynamic '__init' fucntion actually,
++	 * so the tiny check for si_rwsem is unnecessary.
++	 */
++	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
++
++	debugfs_remove_recursive(sbinfo->si_dbgaufs);
++	sbinfo->si_dbgaufs = NULL;
++	kobject_put(&sbinfo->si_kobj);
++}
++
++int dbgaufs_si_init(struct au_sbinfo *sbinfo)
++{
++	int err;
++	char name[SysaufsSiNameLen];
++
++	/*
++	 * This function is a dynamic '__init' fucntion actually,
++	 * so the tiny check for si_rwsem is unnecessary.
++	 */
++	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
++
++	err = -ENOENT;
++	if (!dbgaufs) {
++		AuErr1("/debug/aufs is uninitialized\n");
++		goto out;
++	}
++
++	err = -EIO;
++	sysaufs_name(sbinfo, name);
++	sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
++	if (unlikely(!sbinfo->si_dbgaufs))
++		goto out;
++	kobject_get(&sbinfo->si_kobj);
++
++	sbinfo->si_dbgaufs_xib = debugfs_create_file
++		("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
++		 &dbgaufs_xib_fop);
++	if (unlikely(!sbinfo->si_dbgaufs_xib))
++		goto out_dir;
++
++	err = dbgaufs_xigen_init(sbinfo);
++	if (!err)
++		goto out; /* success */
++
++out_dir:
++	dbgaufs_si_fin(sbinfo);
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void dbgaufs_fin(void)
++{
++	debugfs_remove(dbgaufs);
++}
++
++int __init dbgaufs_init(void)
++{
++	int err;
++
++	err = -EIO;
++	dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
++	if (dbgaufs)
++		err = 0;
++	return err;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/dbgaufs.h linux-3.2.0-gentoo-r1/fs/aufs/dbgaufs.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/dbgaufs.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/dbgaufs.h	2012-01-17 12:11:24.532524766 +0100
+@@ -0,0 +1,49 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * debugfs interface
++ */
++
++#ifndef __DBGAUFS_H__
++#define __DBGAUFS_H__
++
++#ifdef __KERNEL__
++
++struct super_block;
++struct au_sbinfo;
++
++#ifdef CONFIG_DEBUG_FS
++/* dbgaufs.c */
++void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
++void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
++void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
++int dbgaufs_si_init(struct au_sbinfo *sbinfo);
++void dbgaufs_fin(void);
++int __init dbgaufs_init(void);
++#else
++AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
++AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
++AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
++AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
++AuStubVoid(dbgaufs_fin, void)
++AuStubInt0(__init dbgaufs_init, void)
++#endif /* CONFIG_DEBUG_FS */
++
++#endif /* __KERNEL__ */
++#endif /* __DBGAUFS_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/dcsub.c linux-3.2.0-gentoo-r1/fs/aufs/dcsub.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/dcsub.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/dcsub.c	2012-01-17 12:11:24.534839602 +0100
+@@ -0,0 +1,243 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * sub-routines for dentry cache
++ */
++
++#include "aufs.h"
++
++static void au_dpage_free(struct au_dpage *dpage)
++{
++	int i;
++	struct dentry **p;
++
++	p = dpage->dentries;
++	for (i = 0; i < dpage->ndentry; i++)
++		dput(*p++);
++	free_page((unsigned long)dpage->dentries);
++}
++
++int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
++{
++	int err;
++	void *p;
++
++	err = -ENOMEM;
++	dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
++	if (unlikely(!dpages->dpages))
++		goto out;
++
++	p = (void *)__get_free_page(gfp);
++	if (unlikely(!p))
++		goto out_dpages;
++
++	dpages->dpages[0].ndentry = 0;
++	dpages->dpages[0].dentries = p;
++	dpages->ndpage = 1;
++	return 0; /* success */
++
++out_dpages:
++	kfree(dpages->dpages);
++out:
++	return err;
++}
++
++void au_dpages_free(struct au_dcsub_pages *dpages)
++{
++	int i;
++	struct au_dpage *p;
++
++	p = dpages->dpages;
++	for (i = 0; i < dpages->ndpage; i++)
++		au_dpage_free(p++);
++	kfree(dpages->dpages);
++}
++
++static int au_dpages_append(struct au_dcsub_pages *dpages,
++			    struct dentry *dentry, gfp_t gfp)
++{
++	int err, sz;
++	struct au_dpage *dpage;
++	void *p;
++
++	dpage = dpages->dpages + dpages->ndpage - 1;
++	sz = PAGE_SIZE / sizeof(dentry);
++	if (unlikely(dpage->ndentry >= sz)) {
++		AuLabel(new dpage);
++		err = -ENOMEM;
++		sz = dpages->ndpage * sizeof(*dpages->dpages);
++		p = au_kzrealloc(dpages->dpages, sz,
++				 sz + sizeof(*dpages->dpages), gfp);
++		if (unlikely(!p))
++			goto out;
++
++		dpages->dpages = p;
++		dpage = dpages->dpages + dpages->ndpage;
++		p = (void *)__get_free_page(gfp);
++		if (unlikely(!p))
++			goto out;
++
++		dpage->ndentry = 0;
++		dpage->dentries = p;
++		dpages->ndpage++;
++	}
++
++	AuDebugOn(!dentry->d_count);
++	dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
++	return 0; /* success */
++
++out:
++	return err;
++}
++
++int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
++		   au_dpages_test test, void *arg)
++{
++	int err;
++	struct dentry *this_parent;
++	struct list_head *next;
++	struct super_block *sb = root->d_sb;
++
++	err = 0;
++	write_seqlock(&rename_lock);
++	this_parent = root;
++	spin_lock(&this_parent->d_lock);
++repeat:
++	next = this_parent->d_subdirs.next;
++resume:
++	if (this_parent->d_sb == sb
++	    && !IS_ROOT(this_parent)
++	    && au_di(this_parent)
++	    && this_parent->d_count
++	    && (!test || test(this_parent, arg))) {
++		err = au_dpages_append(dpages, this_parent, GFP_ATOMIC);
++		if (unlikely(err))
++			goto out;
++	}
++
++	while (next != &this_parent->d_subdirs) {
++		struct list_head *tmp = next;
++		struct dentry *dentry = list_entry(tmp, struct dentry,
++						   d_u.d_child);
++
++		next = tmp->next;
++		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
++		if (dentry->d_count) {
++			if (!list_empty(&dentry->d_subdirs)) {
++				spin_unlock(&this_parent->d_lock);
++				spin_release(&dentry->d_lock.dep_map, 1,
++					     _RET_IP_);
++				this_parent = dentry;
++				spin_acquire(&this_parent->d_lock.dep_map, 0, 1,
++					     _RET_IP_);
++				goto repeat;
++			}
++			if (dentry->d_sb == sb
++			    && au_di(dentry)
++			    && (!test || test(dentry, arg)))
++				err = au_dpages_append(dpages, dentry,
++						       GFP_ATOMIC);
++		}
++		spin_unlock(&dentry->d_lock);
++		if (unlikely(err))
++			goto out;
++	}
++
++	if (this_parent != root) {
++		struct dentry *tmp;
++		struct dentry *child;
++
++		tmp = this_parent->d_parent;
++		rcu_read_lock();
++		spin_unlock(&this_parent->d_lock);
++		child = this_parent;
++		this_parent = tmp;
++		spin_lock(&this_parent->d_lock);
++		rcu_read_unlock();
++		next = child->d_u.d_child.next;
++		goto resume;
++	}
++
++out:
++	spin_unlock(&this_parent->d_lock);
++	write_sequnlock(&rename_lock);
++	return err;
++}
++
++int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
++		       int do_include, au_dpages_test test, void *arg)
++{
++	int err;
++
++	err = 0;
++	write_seqlock(&rename_lock);
++	spin_lock(&dentry->d_lock);
++	if (do_include
++	    && dentry->d_count
++	    && (!test || test(dentry, arg)))
++		err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
++	spin_unlock(&dentry->d_lock);
++	if (unlikely(err))
++		goto out;
++
++	/*
++	 * vfsmount_lock is unnecessary since this is a traverse in a single
++	 * mount
++	 */
++	while (!IS_ROOT(dentry)) {
++		dentry = dentry->d_parent; /* rename_lock is locked */
++		spin_lock(&dentry->d_lock);
++		if (dentry->d_count
++		    && (!test || test(dentry, arg)))
++			err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
++		spin_unlock(&dentry->d_lock);
++		if (unlikely(err))
++			break;
++	}
++
++out:
++	write_sequnlock(&rename_lock);
++	return err;
++}
++
++static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
++{
++	return au_di(dentry) && dentry->d_sb == arg;
++}
++
++int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
++			    struct dentry *dentry, int do_include)
++{
++	return au_dcsub_pages_rev(dpages, dentry, do_include,
++				  au_dcsub_dpages_aufs, dentry->d_sb);
++}
++
++int au_test_subdir(struct dentry *d1, struct dentry *d2)
++{
++	struct path path[2] = {
++		{
++			.dentry = d1
++		},
++		{
++			.dentry = d2
++		}
++	};
++
++	return path_is_under(path + 0, path + 1);
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/dcsub.h linux-3.2.0-gentoo-r1/fs/aufs/dcsub.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/dcsub.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/dcsub.h	2012-01-17 12:11:24.534839602 +0100
+@@ -0,0 +1,94 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * sub-routines for dentry cache
++ */
++
++#ifndef __AUFS_DCSUB_H__
++#define __AUFS_DCSUB_H__
++
++#ifdef __KERNEL__
++
++#include <linux/dcache.h>
++#include <linux/fs.h>
++
++struct dentry;
++
++struct au_dpage {
++	int ndentry;
++	struct dentry **dentries;
++};
++
++struct au_dcsub_pages {
++	int ndpage;
++	struct au_dpage *dpages;
++};
++
++/* ---------------------------------------------------------------------- */
++
++/* dcsub.c */
++int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
++void au_dpages_free(struct au_dcsub_pages *dpages);
++typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
++int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
++		   au_dpages_test test, void *arg);
++int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
++		       int do_include, au_dpages_test test, void *arg);
++int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
++			    struct dentry *dentry, int do_include);
++int au_test_subdir(struct dentry *d1, struct dentry *d2);
++
++/* ---------------------------------------------------------------------- */
++
++static inline int au_d_hashed_positive(struct dentry *d)
++{
++	int err;
++	struct inode *inode = d->d_inode;
++	err = 0;
++	if (unlikely(d_unhashed(d) || !inode || !inode->i_nlink))
++		err = -ENOENT;
++	return err;
++}
++
++static inline int au_d_alive(struct dentry *d)
++{
++	int err;
++	struct inode *inode;
++	err = 0;
++	if (!IS_ROOT(d))
++		err = au_d_hashed_positive(d);
++	else {
++		inode = d->d_inode;
++		if (unlikely(d_unlinked(d) || !inode || !inode->i_nlink))
++			err = -ENOENT;
++	}
++	return err;
++}
++
++static inline int au_alive_dir(struct dentry *d)
++{
++	int err;
++	err = au_d_alive(d);
++	if (unlikely(err || IS_DEADDIR(d->d_inode)))
++		err = -ENOENT;
++	return err;
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_DCSUB_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/debug.c linux-3.2.0-gentoo-r1/fs/aufs/debug.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/debug.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/debug.c	2012-01-17 12:11:24.553358287 +0100
+@@ -0,0 +1,489 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * debug print functions
++ */
++
++#include <linux/vt_kern.h>
++#include "aufs.h"
++
++int aufs_debug;
++MODULE_PARM_DESC(debug, "debug print");
++module_param_named(debug, aufs_debug, int, S_IRUGO | S_IWUSR | S_IWGRP);
++
++char *au_plevel = KERN_DEBUG;
++#define dpri(fmt, ...) do {					\
++	if ((au_plevel						\
++	     && strcmp(au_plevel, KERN_DEBUG))			\
++	    || au_debug_test())					\
++		printk("%s" fmt, au_plevel, ##__VA_ARGS__);	\
++} while (0)
++
++/* ---------------------------------------------------------------------- */
++
++void au_dpri_whlist(struct au_nhash *whlist)
++{
++	unsigned long ul, n;
++	struct hlist_head *head;
++	struct au_vdir_wh *tpos;
++	struct hlist_node *pos;
++
++	n = whlist->nh_num;
++	head = whlist->nh_head;
++	for (ul = 0; ul < n; ul++) {
++		hlist_for_each_entry(tpos, pos, head, wh_hash)
++			dpri("b%d, %.*s, %d\n",
++			     tpos->wh_bindex,
++			     tpos->wh_str.len, tpos->wh_str.name,
++			     tpos->wh_str.len);
++		head++;
++	}
++}
++
++void au_dpri_vdir(struct au_vdir *vdir)
++{
++	unsigned long ul;
++	union au_vdir_deblk_p p;
++	unsigned char *o;
++
++	if (!vdir || IS_ERR(vdir)) {
++		dpri("err %ld\n", PTR_ERR(vdir));
++		return;
++	}
++
++	dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
++	     vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
++	     vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
++	for (ul = 0; ul < vdir->vd_nblk; ul++) {
++		p.deblk = vdir->vd_deblk[ul];
++		o = p.deblk;
++		dpri("[%lu]: %p\n", ul, o);
++	}
++}
++
++static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
++			struct dentry *wh)
++{
++	char *n = NULL;
++	int l = 0;
++
++	if (!inode || IS_ERR(inode)) {
++		dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
++		return -1;
++	}
++
++	/* the type of i_blocks depends upon CONFIG_LSF */
++	BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
++		     && sizeof(inode->i_blocks) != sizeof(u64));
++	if (wh) {
++		n = (void *)wh->d_name.name;
++		l = wh->d_name.len;
++	}
++
++	dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
++	     " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
++	     bindex, inode,
++	     inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
++	     atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
++	     i_size_read(inode), (unsigned long long)inode->i_blocks,
++	     hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
++	     inode->i_mapping ? inode->i_mapping->nrpages : 0,
++	     inode->i_state, inode->i_flags, inode->i_version,
++	     inode->i_generation,
++	     l ? ", wh " : "", l, n);
++	return 0;
++}
++
++void au_dpri_inode(struct inode *inode)
++{
++	struct au_iinfo *iinfo;
++	aufs_bindex_t bindex;
++	int err, hn;
++
++	err = do_pri_inode(-1, inode, -1, NULL);
++	if (err || !au_test_aufs(inode->i_sb))
++		return;
++
++	iinfo = au_ii(inode);
++	if (!iinfo)
++		return;
++	dpri("i-1: bstart %d, bend %d, gen %d\n",
++	     iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode));
++	if (iinfo->ii_bstart < 0)
++		return;
++	hn = 0;
++	for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) {
++		hn = !!au_hn(iinfo->ii_hinode + bindex);
++		do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn,
++			     iinfo->ii_hinode[0 + bindex].hi_whdentry);
++	}
++}
++
++void au_dpri_dalias(struct inode *inode)
++{
++	struct dentry *d;
++
++	spin_lock(&inode->i_lock);
++	list_for_each_entry(d, &inode->i_dentry, d_alias)
++		au_dpri_dentry(d);
++	spin_unlock(&inode->i_lock);
++}
++
++static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
++{
++	struct dentry *wh = NULL;
++	int hn;
++
++	if (!dentry || IS_ERR(dentry)) {
++		dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
++		return -1;
++	}
++	/* do not call dget_parent() here */
++	/* note: access d_xxx without d_lock */
++	dpri("d%d: %.*s?/%.*s, %s, cnt %d, flags 0x%x\n",
++	     bindex,
++	     AuDLNPair(dentry->d_parent), AuDLNPair(dentry),
++	     dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
++	     dentry->d_count, dentry->d_flags);
++	hn = -1;
++	if (bindex >= 0 && dentry->d_inode && au_test_aufs(dentry->d_sb)) {
++		struct au_iinfo *iinfo = au_ii(dentry->d_inode);
++		if (iinfo) {
++			hn = !!au_hn(iinfo->ii_hinode + bindex);
++			wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
++		}
++	}
++	do_pri_inode(bindex, dentry->d_inode, hn, wh);
++	return 0;
++}
++
++void au_dpri_dentry(struct dentry *dentry)
++{
++	struct au_dinfo *dinfo;
++	aufs_bindex_t bindex;
++	int err;
++	struct au_hdentry *hdp;
++
++	err = do_pri_dentry(-1, dentry);
++	if (err || !au_test_aufs(dentry->d_sb))
++		return;
++
++	dinfo = au_di(dentry);
++	if (!dinfo)
++		return;
++	dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d\n",
++	     dinfo->di_bstart, dinfo->di_bend,
++	     dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry));
++	if (dinfo->di_bstart < 0)
++		return;
++	hdp = dinfo->di_hdentry;
++	for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
++		do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry);
++}
++
++static int do_pri_file(aufs_bindex_t bindex, struct file *file)
++{
++	char a[32];
++
++	if (!file || IS_ERR(file)) {
++		dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
++		return -1;
++	}
++	a[0] = 0;
++	if (bindex < 0
++	    && file->f_dentry
++	    && au_test_aufs(file->f_dentry->d_sb)
++	    && au_fi(file))
++		snprintf(a, sizeof(a), ", gen %d, mmapped %d",
++			 au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
++	dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
++	     bindex, file->f_mode, file->f_flags, (long)file_count(file),
++	     file->f_version, file->f_pos, a);
++	if (file->f_dentry)
++		do_pri_dentry(bindex, file->f_dentry);
++	return 0;
++}
++
++void au_dpri_file(struct file *file)
++{
++	struct au_finfo *finfo;
++	struct au_fidir *fidir;
++	struct au_hfile *hfile;
++	aufs_bindex_t bindex;
++	int err;
++
++	err = do_pri_file(-1, file);
++	if (err || !file->f_dentry || !au_test_aufs(file->f_dentry->d_sb))
++		return;
++
++	finfo = au_fi(file);
++	if (!finfo)
++		return;
++	if (finfo->fi_btop < 0)
++		return;
++	fidir = finfo->fi_hdir;
++	if (!fidir)
++		do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
++	else
++		for (bindex = finfo->fi_btop;
++		     bindex >= 0 && bindex <= fidir->fd_bbot;
++		     bindex++) {
++			hfile = fidir->fd_hfile + bindex;
++			do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
++		}
++}
++
++static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
++{
++	struct vfsmount *mnt;
++	struct super_block *sb;
++
++	if (!br || IS_ERR(br))
++		goto out;
++	mnt = br->br_mnt;
++	if (!mnt || IS_ERR(mnt))
++		goto out;
++	sb = mnt->mnt_sb;
++	if (!sb || IS_ERR(sb))
++		goto out;
++
++	dpri("s%d: {perm 0x%x, id %d, cnt %d, wbr %p}, "
++	     "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
++	     "xino %d\n",
++	     bindex, br->br_perm, br->br_id, atomic_read(&br->br_count),
++	     br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
++	     sb->s_flags, sb->s_count,
++	     atomic_read(&sb->s_active), !!br->br_xino.xi_file);
++	return 0;
++
++out:
++	dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
++	return -1;
++}
++
++void au_dpri_sb(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++	aufs_bindex_t bindex;
++	int err;
++	/* to reuduce stack size */
++	struct {
++		struct vfsmount mnt;
++		struct au_branch fake;
++	} *a;
++
++	/* this function can be called from magic sysrq */
++	a = kzalloc(sizeof(*a), GFP_ATOMIC);
++	if (unlikely(!a)) {
++		dpri("no memory\n");
++		return;
++	}
++
++	a->mnt.mnt_sb = sb;
++	a->fake.br_perm = 0;
++	a->fake.br_mnt = &a->mnt;
++	a->fake.br_xino.xi_file = NULL;
++	atomic_set(&a->fake.br_count, 0);
++	smp_mb(); /* atomic_set */
++	err = do_pri_br(-1, &a->fake);
++	kfree(a);
++	dpri("dev 0x%x\n", sb->s_dev);
++	if (err || !au_test_aufs(sb))
++		return;
++
++	sbinfo = au_sbi(sb);
++	if (!sbinfo)
++		return;
++	dpri("nw %d, gen %u, kobj %d\n",
++	     atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
++	     atomic_read(&sbinfo->si_kobj.kref.refcount));
++	for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
++		do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_dbg_sleep_jiffy(int jiffy)
++{
++	while (jiffy)
++		jiffy = schedule_timeout_uninterruptible(jiffy);
++}
++
++void au_dbg_iattr(struct iattr *ia)
++{
++#define AuBit(name)	if (ia->ia_valid & ATTR_ ## name) \
++				dpri(#name "\n")
++	AuBit(MODE);
++	AuBit(UID);
++	AuBit(GID);
++	AuBit(SIZE);
++	AuBit(ATIME);
++	AuBit(MTIME);
++	AuBit(CTIME);
++	AuBit(ATIME_SET);
++	AuBit(MTIME_SET);
++	AuBit(FORCE);
++	AuBit(ATTR_FLAG);
++	AuBit(KILL_SUID);
++	AuBit(KILL_SGID);
++	AuBit(FILE);
++	AuBit(KILL_PRIV);
++	AuBit(OPEN);
++	AuBit(TIMES_SET);
++#undef	AuBit
++	dpri("ia_file %p\n", ia->ia_file);
++}
++
++/* ---------------------------------------------------------------------- */
++
++void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
++{
++	struct inode *h_inode, *inode = dentry->d_inode;
++	struct dentry *h_dentry;
++	aufs_bindex_t bindex, bend, bi;
++
++	if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
++		return;
++
++	bend = au_dbend(dentry);
++	bi = au_ibend(inode);
++	if (bi < bend)
++		bend = bi;
++	bindex = au_dbstart(dentry);
++	bi = au_ibstart(inode);
++	if (bi > bindex)
++		bindex = bi;
++
++	for (; bindex <= bend; bindex++) {
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (!h_dentry)
++			continue;
++		h_inode = au_h_iptr(inode, bindex);
++		if (unlikely(h_inode != h_dentry->d_inode)) {
++			int old = au_debug_test();
++			if (!old)
++				au_debug(1);
++			AuDbg("b%d, %s:%d\n", bindex, func, line);
++			AuDbgDentry(dentry);
++			AuDbgInode(inode);
++			if (!old)
++				au_debug(0);
++			BUG();
++		}
++	}
++}
++
++void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen)
++{
++	struct dentry *parent;
++
++	parent = dget_parent(dentry);
++	AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
++	AuDebugOn(IS_ROOT(dentry));
++	AuDebugOn(au_digen_test(parent, sigen));
++	dput(parent);
++}
++
++void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen)
++{
++	struct dentry *parent;
++	struct inode *inode;
++
++	parent = dget_parent(dentry);
++	inode = dentry->d_inode;
++	AuDebugOn(inode && S_ISDIR(dentry->d_inode->i_mode));
++	AuDebugOn(au_digen_test(parent, sigen));
++	dput(parent);
++}
++
++void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
++{
++	int err, i, j;
++	struct au_dcsub_pages dpages;
++	struct au_dpage *dpage;
++	struct dentry **dentries;
++
++	err = au_dpages_init(&dpages, GFP_NOFS);
++	AuDebugOn(err);
++	err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
++	AuDebugOn(err);
++	for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
++		dpage = dpages.dpages + i;
++		dentries = dpage->dentries;
++		for (j = dpage->ndentry - 1; !err && j >= 0; j--)
++			AuDebugOn(au_digen_test(dentries[j], sigen));
++	}
++	au_dpages_free(&dpages);
++}
++
++void au_dbg_verify_kthread(void)
++{
++	if (au_wkq_test()) {
++		au_dbg_blocked();
++		/*
++		 * It may be recursive, but udba=notify between two aufs mounts,
++		 * where a single ro branch is shared, is not a problem.
++		 */
++		/* WARN_ON(1); */
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_debug_sbinfo_init(struct au_sbinfo *sbinfo __maybe_unused)
++{
++#ifdef AuForceNoPlink
++	au_opt_clr(sbinfo->si_mntflags, PLINK);
++#endif
++#ifdef AuForceNoXino
++	au_opt_clr(sbinfo->si_mntflags, XINO);
++#endif
++#ifdef AuForceNoRefrof
++	au_opt_clr(sbinfo->si_mntflags, REFROF);
++#endif
++#ifdef AuForceHnotify
++	au_opt_set_udba(sbinfo->si_mntflags, UDBA_HNOTIFY);
++#endif
++#ifdef AuForceRd0
++	sbinfo->si_rdblk = 0;
++	sbinfo->si_rdhash = 0;
++#endif
++}
++
++int __init au_debug_init(void)
++{
++	aufs_bindex_t bindex;
++	struct au_vdir_destr destr;
++
++	bindex = -1;
++	AuDebugOn(bindex >= 0);
++
++	destr.len = -1;
++	AuDebugOn(destr.len < NAME_MAX);
++
++#ifdef CONFIG_4KSTACKS
++	pr_warning("CONFIG_4KSTACKS is defined.\n");
++#endif
++
++#ifdef AuForceNoBrs
++	sysaufs_brs = 0;
++#endif
++
++	return 0;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/debug.h linux-3.2.0-gentoo-r1/fs/aufs/debug.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/debug.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/debug.h	2012-01-17 12:11:24.553358287 +0100
+@@ -0,0 +1,243 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * debug print functions
++ */
++
++#ifndef __AUFS_DEBUG_H__
++#define __AUFS_DEBUG_H__
++
++#ifdef __KERNEL__
++
++#include <asm/system.h>
++#include <linux/module.h>
++#include <linux/kallsyms.h>
++#include <linux/sysrq.h>
++
++#ifdef CONFIG_AUFS_DEBUG
++#define AuDebugOn(a)		BUG_ON(a)
++
++/* module parameter */
++extern int aufs_debug;
++static inline void au_debug(int n)
++{
++	aufs_debug = n;
++	smp_mb();
++}
++
++static inline int au_debug_test(void)
++{
++	return aufs_debug;
++}
++#else
++#define AuDebugOn(a)		do {} while (0)
++AuStubVoid(au_debug, int n)
++AuStubInt0(au_debug_test, void)
++#endif /* CONFIG_AUFS_DEBUG */
++
++/* ---------------------------------------------------------------------- */
++
++/* debug print */
++
++#define AuDbg(fmt, ...) do { \
++	if (au_debug_test()) \
++		pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
++} while (0)
++#define AuLabel(l)		AuDbg(#l "\n")
++#define AuIOErr(fmt, ...)	pr_err("I/O Error, " fmt, ##__VA_ARGS__)
++#define AuWarn1(fmt, ...) do { \
++	static unsigned char _c; \
++	if (!_c++) \
++		pr_warning(fmt, ##__VA_ARGS__); \
++} while (0)
++
++#define AuErr1(fmt, ...) do { \
++	static unsigned char _c; \
++	if (!_c++) \
++		pr_err(fmt, ##__VA_ARGS__); \
++} while (0)
++
++#define AuIOErr1(fmt, ...) do { \
++	static unsigned char _c; \
++	if (!_c++) \
++		AuIOErr(fmt, ##__VA_ARGS__); \
++} while (0)
++
++#define AuUnsupportMsg	"This operation is not supported." \
++			" Please report this application to aufs-users ML."
++#define AuUnsupport(fmt, ...) do { \
++	pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
++	dump_stack(); \
++} while (0)
++
++#define AuTraceErr(e) do { \
++	if (unlikely((e) < 0)) \
++		AuDbg("err %d\n", (int)(e)); \
++} while (0)
++
++#define AuTraceErrPtr(p) do { \
++	if (IS_ERR(p)) \
++		AuDbg("err %ld\n", PTR_ERR(p)); \
++} while (0)
++
++/* dirty macros for debug print, use with "%.*s" and caution */
++#define AuLNPair(qstr)		(qstr)->len, (qstr)->name
++#define AuDLNPair(d)		AuLNPair(&(d)->d_name)
++
++/* ---------------------------------------------------------------------- */
++
++struct au_sbinfo;
++struct au_finfo;
++struct dentry;
++#ifdef CONFIG_AUFS_DEBUG
++extern char *au_plevel;
++struct au_nhash;
++void au_dpri_whlist(struct au_nhash *whlist);
++struct au_vdir;
++void au_dpri_vdir(struct au_vdir *vdir);
++struct inode;
++void au_dpri_inode(struct inode *inode);
++void au_dpri_dalias(struct inode *inode);
++void au_dpri_dentry(struct dentry *dentry);
++struct file;
++void au_dpri_file(struct file *filp);
++struct super_block;
++void au_dpri_sb(struct super_block *sb);
++
++void au_dbg_sleep_jiffy(int jiffy);
++struct iattr;
++void au_dbg_iattr(struct iattr *ia);
++
++#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
++void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
++void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen);
++void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen);
++void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
++void au_dbg_verify_kthread(void);
++
++int __init au_debug_init(void);
++void au_debug_sbinfo_init(struct au_sbinfo *sbinfo);
++#define AuDbgWhlist(w) do { \
++	AuDbg(#w "\n"); \
++	au_dpri_whlist(w); \
++} while (0)
++
++#define AuDbgVdir(v) do { \
++	AuDbg(#v "\n"); \
++	au_dpri_vdir(v); \
++} while (0)
++
++#define AuDbgInode(i) do { \
++	AuDbg(#i "\n"); \
++	au_dpri_inode(i); \
++} while (0)
++
++#define AuDbgDAlias(i) do { \
++	AuDbg(#i "\n"); \
++	au_dpri_dalias(i); \
++} while (0)
++
++#define AuDbgDentry(d) do { \
++	AuDbg(#d "\n"); \
++	au_dpri_dentry(d); \
++} while (0)
++
++#define AuDbgFile(f) do { \
++	AuDbg(#f "\n"); \
++	au_dpri_file(f); \
++} while (0)
++
++#define AuDbgSb(sb) do { \
++	AuDbg(#sb "\n"); \
++	au_dpri_sb(sb); \
++} while (0)
++
++#define AuDbgSleep(sec) do { \
++	AuDbg("sleep %d sec\n", sec); \
++	ssleep(sec); \
++} while (0)
++
++#define AuDbgSleepJiffy(jiffy) do { \
++	AuDbg("sleep %d jiffies\n", jiffy); \
++	au_dbg_sleep_jiffy(jiffy); \
++} while (0)
++
++#define AuDbgIAttr(ia) do { \
++	AuDbg("ia_valid 0x%x\n", (ia)->ia_valid); \
++	au_dbg_iattr(ia); \
++} while (0)
++
++#define AuDbgSym(addr) do {				\
++	char sym[KSYM_SYMBOL_LEN];			\
++	sprint_symbol(sym, (unsigned long)addr);	\
++	AuDbg("%s\n", sym);				\
++} while (0)
++
++#define AuInfoSym(addr) do {				\
++	char sym[KSYM_SYMBOL_LEN];			\
++	sprint_symbol(sym, (unsigned long)addr);	\
++	AuInfo("%s\n", sym);				\
++} while (0)
++#else
++AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
++AuStubVoid(au_dbg_verify_dir_parent, struct dentry *dentry, unsigned int sigen)
++AuStubVoid(au_dbg_verify_nondir_parent, struct dentry *dentry,
++	   unsigned int sigen)
++AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
++AuStubVoid(au_dbg_verify_kthread, void)
++AuStubInt0(__init au_debug_init, void)
++AuStubVoid(au_debug_sbinfo_init, struct au_sbinfo *sbinfo)
++
++#define AuDbgWhlist(w)		do {} while (0)
++#define AuDbgVdir(v)		do {} while (0)
++#define AuDbgInode(i)		do {} while (0)
++#define AuDbgDAlias(i)		do {} while (0)
++#define AuDbgDentry(d)		do {} while (0)
++#define AuDbgFile(f)		do {} while (0)
++#define AuDbgSb(sb)		do {} while (0)
++#define AuDbgSleep(sec)		do {} while (0)
++#define AuDbgSleepJiffy(jiffy)	do {} while (0)
++#define AuDbgIAttr(ia)		do {} while (0)
++#define AuDbgSym(addr)		do {} while (0)
++#define AuInfoSym(addr)		do {} while (0)
++#endif /* CONFIG_AUFS_DEBUG */
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_MAGIC_SYSRQ
++int __init au_sysrq_init(void);
++void au_sysrq_fin(void);
++
++#ifdef CONFIG_HW_CONSOLE
++#define au_dbg_blocked() do { \
++	WARN_ON(1); \
++	handle_sysrq('w'); \
++} while (0)
++#else
++AuStubVoid(au_dbg_blocked, void)
++#endif
++
++#else
++AuStubInt0(__init au_sysrq_init, void)
++AuStubVoid(au_sysrq_fin, void)
++AuStubVoid(au_dbg_blocked, void)
++#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_DEBUG_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/dentry.c linux-3.2.0-gentoo-r1/fs/aufs/dentry.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/dentry.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/dentry.c	2012-01-17 12:11:24.562617629 +0100
+@@ -0,0 +1,1140 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * lookup and dentry operations
++ */
++
++#include <linux/namei.h>
++#include "aufs.h"
++
++static void au_h_nd(struct nameidata *h_nd, struct nameidata *nd)
++{
++	if (nd) {
++		*h_nd = *nd;
++
++		/*
++		 * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
++		 * due to whiteout and branch permission.
++		 */
++		h_nd->flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
++				 | LOOKUP_FOLLOW | LOOKUP_EXCL);
++		/* unnecessary? */
++		h_nd->intent.open.file = NULL;
++	} else
++		memset(h_nd, 0, sizeof(*h_nd));
++}
++
++struct au_lkup_one_args {
++	struct dentry **errp;
++	struct qstr *name;
++	struct dentry *h_parent;
++	struct au_branch *br;
++	struct nameidata *nd;
++};
++
++struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
++			   struct au_branch *br, struct nameidata *nd)
++{
++	struct dentry *h_dentry;
++	int err;
++	struct nameidata h_nd;
++
++	if (au_test_fs_null_nd(h_parent->d_sb))
++		return vfsub_lookup_one_len(name->name, h_parent, name->len);
++
++	au_h_nd(&h_nd, nd);
++	h_nd.path.dentry = h_parent;
++	h_nd.path.mnt = br->br_mnt;
++
++	err = vfsub_name_hash(name->name, &h_nd.last, name->len);
++	h_dentry = ERR_PTR(err);
++	if (!err) {
++		path_get(&h_nd.path);
++		h_dentry = vfsub_lookup_hash(&h_nd);
++		path_put(&h_nd.path);
++	}
++
++	AuTraceErrPtr(h_dentry);
++	return h_dentry;
++}
++
++static void au_call_lkup_one(void *args)
++{
++	struct au_lkup_one_args *a = args;
++	*a->errp = au_lkup_one(a->name, a->h_parent, a->br, a->nd);
++}
++
++#define AuLkup_ALLOW_NEG	1
++#define au_ftest_lkup(flags, name)	((flags) & AuLkup_##name)
++#define au_fset_lkup(flags, name) \
++	do { (flags) |= AuLkup_##name; } while (0)
++#define au_fclr_lkup(flags, name) \
++	do { (flags) &= ~AuLkup_##name; } while (0)
++
++struct au_do_lookup_args {
++	unsigned int		flags;
++	mode_t			type;
++	struct nameidata	*nd;
++};
++
++/*
++ * returns positive/negative dentry, NULL or an error.
++ * NULL means whiteout-ed or not-found.
++ */
++static struct dentry*
++au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
++	     aufs_bindex_t bindex, struct qstr *wh_name,
++	     struct au_do_lookup_args *args)
++{
++	struct dentry *h_dentry;
++	struct inode *h_inode, *inode;
++	struct au_branch *br;
++	int wh_found, opq;
++	unsigned char wh_able;
++	const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
++
++	wh_found = 0;
++	br = au_sbr(dentry->d_sb, bindex);
++	wh_able = !!au_br_whable(br->br_perm);
++	if (wh_able)
++		wh_found = au_wh_test(h_parent, wh_name, br, /*try_sio*/0);
++	h_dentry = ERR_PTR(wh_found);
++	if (!wh_found)
++		goto real_lookup;
++	if (unlikely(wh_found < 0))
++		goto out;
++
++	/* We found a whiteout */
++	/* au_set_dbend(dentry, bindex); */
++	au_set_dbwh(dentry, bindex);
++	if (!allow_neg)
++		return NULL; /* success */
++
++real_lookup:
++	h_dentry = au_lkup_one(&dentry->d_name, h_parent, br, args->nd);
++	if (IS_ERR(h_dentry))
++		goto out;
++
++	h_inode = h_dentry->d_inode;
++	if (!h_inode) {
++		if (!allow_neg)
++			goto out_neg;
++	} else if (wh_found
++		   || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
++		goto out_neg;
++
++	if (au_dbend(dentry) <= bindex)
++		au_set_dbend(dentry, bindex);
++	if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
++		au_set_dbstart(dentry, bindex);
++	au_set_h_dptr(dentry, bindex, h_dentry);
++
++	inode = dentry->d_inode;
++	if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able
++	    || (inode && !S_ISDIR(inode->i_mode)))
++		goto out; /* success */
++
++	mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++	opq = au_diropq_test(h_dentry, br);
++	mutex_unlock(&h_inode->i_mutex);
++	if (opq > 0)
++		au_set_dbdiropq(dentry, bindex);
++	else if (unlikely(opq < 0)) {
++		au_set_h_dptr(dentry, bindex, NULL);
++		h_dentry = ERR_PTR(opq);
++	}
++	goto out;
++
++out_neg:
++	dput(h_dentry);
++	h_dentry = NULL;
++out:
++	return h_dentry;
++}
++
++static int au_test_shwh(struct super_block *sb, const struct qstr *name)
++{
++	if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
++		     && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
++		return -EPERM;
++	return 0;
++}
++
++/*
++ * returns the number of lower positive dentries,
++ * otherwise an error.
++ * can be called at unlinking with @type is zero.
++ */
++int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
++		   struct nameidata *nd)
++{
++	int npositive, err;
++	aufs_bindex_t bindex, btail, bdiropq;
++	unsigned char isdir;
++	struct qstr whname;
++	struct au_do_lookup_args args = {
++		.flags	= 0,
++		.type	= type,
++		.nd	= nd
++	};
++	const struct qstr *name = &dentry->d_name;
++	struct dentry *parent;
++	struct inode *inode;
++
++	err = au_test_shwh(dentry->d_sb, name);
++	if (unlikely(err))
++		goto out;
++
++	err = au_wh_name_alloc(&whname, name);
++	if (unlikely(err))
++		goto out;
++
++	inode = dentry->d_inode;
++	isdir = !!(inode && S_ISDIR(inode->i_mode));
++	if (!type)
++		au_fset_lkup(args.flags, ALLOW_NEG);
++
++	npositive = 0;
++	parent = dget_parent(dentry);
++	btail = au_dbtaildir(parent);
++	for (bindex = bstart; bindex <= btail; bindex++) {
++		struct dentry *h_parent, *h_dentry;
++		struct inode *h_inode, *h_dir;
++
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (h_dentry) {
++			if (h_dentry->d_inode)
++				npositive++;
++			if (type != S_IFDIR)
++				break;
++			continue;
++		}
++		h_parent = au_h_dptr(parent, bindex);
++		if (!h_parent)
++			continue;
++		h_dir = h_parent->d_inode;
++		if (!h_dir || !S_ISDIR(h_dir->i_mode))
++			continue;
++
++		mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
++		h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
++					&args);
++		mutex_unlock(&h_dir->i_mutex);
++		err = PTR_ERR(h_dentry);
++		if (IS_ERR(h_dentry))
++			goto out_parent;
++		au_fclr_lkup(args.flags, ALLOW_NEG);
++
++		if (au_dbwh(dentry) >= 0)
++			break;
++		if (!h_dentry)
++			continue;
++		h_inode = h_dentry->d_inode;
++		if (!h_inode)
++			continue;
++		npositive++;
++		if (!args.type)
++			args.type = h_inode->i_mode & S_IFMT;
++		if (args.type != S_IFDIR)
++			break;
++		else if (isdir) {
++			/* the type of lower may be different */
++			bdiropq = au_dbdiropq(dentry);
++			if (bdiropq >= 0 && bdiropq <= bindex)
++				break;
++		}
++	}
++
++	if (npositive) {
++		AuLabel(positive);
++		au_update_dbstart(dentry);
++	}
++	err = npositive;
++	if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
++		     && au_dbstart(dentry) < 0)) {
++		err = -EIO;
++		AuIOErr("both of real entry and whiteout found, %.*s, err %d\n",
++			AuDLNPair(dentry), err);
++	}
++
++out_parent:
++	dput(parent);
++	kfree(whname.name);
++out:
++	return err;
++}
++
++struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
++			       struct au_branch *br)
++{
++	struct dentry *dentry;
++	int wkq_err;
++
++	if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC))
++		dentry = au_lkup_one(name, parent, br, /*nd*/NULL);
++	else {
++		struct au_lkup_one_args args = {
++			.errp		= &dentry,
++			.name		= name,
++			.h_parent	= parent,
++			.br		= br,
++			.nd		= NULL
++		};
++
++		wkq_err = au_wkq_wait(au_call_lkup_one, &args);
++		if (unlikely(wkq_err))
++			dentry = ERR_PTR(wkq_err);
++	}
++
++	return dentry;
++}
++
++/*
++ * lookup @dentry on @bindex which should be negative.
++ */
++int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	int err;
++	struct dentry *parent, *h_parent, *h_dentry;
++
++	parent = dget_parent(dentry);
++	h_parent = au_h_dptr(parent, bindex);
++	h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent,
++				   au_sbr(dentry->d_sb, bindex));
++	err = PTR_ERR(h_dentry);
++	if (IS_ERR(h_dentry))
++		goto out;
++	if (unlikely(h_dentry->d_inode)) {
++		err = -EIO;
++		AuIOErr("%.*s should be negative on b%d.\n",
++			AuDLNPair(h_dentry), bindex);
++		dput(h_dentry);
++		goto out;
++	}
++
++	err = 0;
++	if (bindex < au_dbstart(dentry))
++		au_set_dbstart(dentry, bindex);
++	if (au_dbend(dentry) < bindex)
++		au_set_dbend(dentry, bindex);
++	au_set_h_dptr(dentry, bindex, h_dentry);
++
++out:
++	dput(parent);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* subset of struct inode */
++struct au_iattr {
++	unsigned long		i_ino;
++	/* unsigned int		i_nlink; */
++	uid_t			i_uid;
++	gid_t			i_gid;
++	u64			i_version;
++/*
++	loff_t			i_size;
++	blkcnt_t		i_blocks;
++*/
++	umode_t			i_mode;
++};
++
++static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
++{
++	ia->i_ino = h_inode->i_ino;
++	/* ia->i_nlink = h_inode->i_nlink; */
++	ia->i_uid = h_inode->i_uid;
++	ia->i_gid = h_inode->i_gid;
++	ia->i_version = h_inode->i_version;
++/*
++	ia->i_size = h_inode->i_size;
++	ia->i_blocks = h_inode->i_blocks;
++*/
++	ia->i_mode = (h_inode->i_mode & S_IFMT);
++}
++
++static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
++{
++	return ia->i_ino != h_inode->i_ino
++		/* || ia->i_nlink != h_inode->i_nlink */
++		|| ia->i_uid != h_inode->i_uid
++		|| ia->i_gid != h_inode->i_gid
++		|| ia->i_version != h_inode->i_version
++/*
++		|| ia->i_size != h_inode->i_size
++		|| ia->i_blocks != h_inode->i_blocks
++*/
++		|| ia->i_mode != (h_inode->i_mode & S_IFMT);
++}
++
++static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
++			      struct au_branch *br)
++{
++	int err;
++	struct au_iattr ia;
++	struct inode *h_inode;
++	struct dentry *h_d;
++	struct super_block *h_sb;
++
++	err = 0;
++	memset(&ia, -1, sizeof(ia));
++	h_sb = h_dentry->d_sb;
++	h_inode = h_dentry->d_inode;
++	if (h_inode)
++		au_iattr_save(&ia, h_inode);
++	else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
++		/* nfs d_revalidate may return 0 for negative dentry */
++		/* fuse d_revalidate always return 0 for negative dentry */
++		goto out;
++
++	/* main purpose is namei.c:cached_lookup() and d_revalidate */
++	h_d = au_lkup_one(&h_dentry->d_name, h_parent, br, /*nd*/NULL);
++	err = PTR_ERR(h_d);
++	if (IS_ERR(h_d))
++		goto out;
++
++	err = 0;
++	if (unlikely(h_d != h_dentry
++		     || h_d->d_inode != h_inode
++		     || (h_inode && au_iattr_test(&ia, h_inode))))
++		err = au_busy_or_stale();
++	dput(h_d);
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
++		struct dentry *h_parent, struct au_branch *br)
++{
++	int err;
++
++	err = 0;
++	if (udba == AuOpt_UDBA_REVAL
++	    && !au_test_fs_remote(h_dentry->d_sb)) {
++		IMustLock(h_dir);
++		err = (h_dentry->d_parent->d_inode != h_dir);
++	} else if (udba != AuOpt_UDBA_NONE)
++		err = au_h_verify_dentry(h_dentry, h_parent, br);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
++{
++	int err;
++	aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
++	struct au_hdentry tmp, *p, *q;
++	struct au_dinfo *dinfo;
++	struct super_block *sb;
++
++	DiMustWriteLock(dentry);
++
++	sb = dentry->d_sb;
++	dinfo = au_di(dentry);
++	bend = dinfo->di_bend;
++	bwh = dinfo->di_bwh;
++	bdiropq = dinfo->di_bdiropq;
++	p = dinfo->di_hdentry + dinfo->di_bstart;
++	for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
++		if (!p->hd_dentry)
++			continue;
++
++		new_bindex = au_br_index(sb, p->hd_id);
++		if (new_bindex == bindex)
++			continue;
++
++		if (dinfo->di_bwh == bindex)
++			bwh = new_bindex;
++		if (dinfo->di_bdiropq == bindex)
++			bdiropq = new_bindex;
++		if (new_bindex < 0) {
++			au_hdput(p);
++			p->hd_dentry = NULL;
++			continue;
++		}
++
++		/* swap two lower dentries, and loop again */
++		q = dinfo->di_hdentry + new_bindex;
++		tmp = *q;
++		*q = *p;
++		*p = tmp;
++		if (tmp.hd_dentry) {
++			bindex--;
++			p--;
++		}
++	}
++
++	dinfo->di_bwh = -1;
++	if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
++		dinfo->di_bwh = bwh;
++
++	dinfo->di_bdiropq = -1;
++	if (bdiropq >= 0
++	    && bdiropq <= au_sbend(sb)
++	    && au_sbr_whable(sb, bdiropq))
++		dinfo->di_bdiropq = bdiropq;
++
++	err = -EIO;
++	dinfo->di_bstart = -1;
++	dinfo->di_bend = -1;
++	bend = au_dbend(parent);
++	p = dinfo->di_hdentry;
++	for (bindex = 0; bindex <= bend; bindex++, p++)
++		if (p->hd_dentry) {
++			dinfo->di_bstart = bindex;
++			break;
++		}
++
++	if (dinfo->di_bstart >= 0) {
++		p = dinfo->di_hdentry + bend;
++		for (bindex = bend; bindex >= 0; bindex--, p--)
++			if (p->hd_dentry) {
++				dinfo->di_bend = bindex;
++				err = 0;
++				break;
++			}
++	}
++
++	return err;
++}
++
++static void au_do_hide(struct dentry *dentry)
++{
++	struct inode *inode;
++
++	inode = dentry->d_inode;
++	if (inode) {
++		if (!S_ISDIR(inode->i_mode)) {
++			if (inode->i_nlink && !d_unhashed(dentry))
++				drop_nlink(inode);
++		} else {
++			clear_nlink(inode);
++			/* stop next lookup */
++			inode->i_flags |= S_DEAD;
++		}
++		smp_mb(); /* necessary? */
++	}
++	d_drop(dentry);
++}
++
++static int au_hide_children(struct dentry *parent)
++{
++	int err, i, j, ndentry;
++	struct au_dcsub_pages dpages;
++	struct au_dpage *dpage;
++	struct dentry *dentry;
++
++	err = au_dpages_init(&dpages, GFP_NOFS);
++	if (unlikely(err))
++		goto out;
++	err = au_dcsub_pages(&dpages, parent, NULL, NULL);
++	if (unlikely(err))
++		goto out_dpages;
++
++	/* in reverse order */
++	for (i = dpages.ndpage - 1; i >= 0; i--) {
++		dpage = dpages.dpages + i;
++		ndentry = dpage->ndentry;
++		for (j = ndentry - 1; j >= 0; j--) {
++			dentry = dpage->dentries[j];
++			if (dentry != parent)
++				au_do_hide(dentry);
++		}
++	}
++
++out_dpages:
++	au_dpages_free(&dpages);
++out:
++	return err;
++}
++
++static void au_hide(struct dentry *dentry)
++{
++	int err;
++	struct inode *inode;
++
++	AuDbgDentry(dentry);
++	inode = dentry->d_inode;
++	if (inode && S_ISDIR(inode->i_mode)) {
++		/* shrink_dcache_parent(dentry); */
++		err = au_hide_children(dentry);
++		if (unlikely(err))
++			AuIOErr("%.*s, failed hiding children, ignored %d\n",
++				AuDLNPair(dentry), err);
++	}
++	au_do_hide(dentry);
++}
++
++/*
++ * By adding a dirty branch, a cached dentry may be affected in various ways.
++ *
++ * a dirty branch is added
++ * - on the top of layers
++ * - in the middle of layers
++ * - to the bottom of layers
++ *
++ * on the added branch there exists
++ * - a whiteout
++ * - a diropq
++ * - a same named entry
++ *   + exist
++ *     * negative --> positive
++ *     * positive --> positive
++ *	 - type is unchanged
++ *	 - type is changed
++ *   + doesn't exist
++ *     * negative --> negative
++ *     * positive --> negative (rejected by au_br_del() for non-dir case)
++ * - none
++ */
++static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
++			       struct au_dinfo *tmp)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	struct {
++		struct dentry *dentry;
++		struct inode *inode;
++		mode_t mode;
++	} orig_h, tmp_h;
++	struct au_hdentry *hd;
++	struct inode *inode, *h_inode;
++	struct dentry *h_dentry;
++
++	err = 0;
++	AuDebugOn(dinfo->di_bstart < 0);
++	orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry;
++	orig_h.inode = orig_h.dentry->d_inode;
++	orig_h.mode = 0;
++	if (orig_h.inode)
++		orig_h.mode = orig_h.inode->i_mode & S_IFMT;
++	memset(&tmp_h, 0, sizeof(tmp_h));
++	if (tmp->di_bstart >= 0) {
++		tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry;
++		tmp_h.inode = tmp_h.dentry->d_inode;
++		if (tmp_h.inode)
++			tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
++	}
++
++	inode = dentry->d_inode;
++	if (!orig_h.inode) {
++		AuDbg("nagative originally\n");
++		if (inode) {
++			au_hide(dentry);
++			goto out;
++		}
++		AuDebugOn(inode);
++		AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
++		AuDebugOn(dinfo->di_bdiropq != -1);
++
++		if (!tmp_h.inode) {
++			AuDbg("negative --> negative\n");
++			/* should have only one negative lower */
++			if (tmp->di_bstart >= 0
++			    && tmp->di_bstart < dinfo->di_bstart) {
++				AuDebugOn(tmp->di_bstart != tmp->di_bend);
++				AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
++				au_set_h_dptr(dentry, dinfo->di_bstart, NULL);
++				au_di_cp(dinfo, tmp);
++				hd = tmp->di_hdentry + tmp->di_bstart;
++				au_set_h_dptr(dentry, tmp->di_bstart,
++					      dget(hd->hd_dentry));
++			}
++			au_dbg_verify_dinode(dentry);
++		} else {
++			AuDbg("negative --> positive\n");
++			/*
++			 * similar to the behaviour of creating with bypassing
++			 * aufs.
++			 * unhash it in order to force an error in the
++			 * succeeding create operation.
++			 * we should not set S_DEAD here.
++			 */
++			d_drop(dentry);
++			/* au_di_swap(tmp, dinfo); */
++			au_dbg_verify_dinode(dentry);
++		}
++	} else {
++		AuDbg("positive originally\n");
++		/* inode may be NULL */
++		AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
++		if (!tmp_h.inode) {
++			AuDbg("positive --> negative\n");
++			/* or bypassing aufs */
++			au_hide(dentry);
++			if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart)
++				dinfo->di_bwh = tmp->di_bwh;
++			if (inode)
++				err = au_refresh_hinode_self(inode);
++			au_dbg_verify_dinode(dentry);
++		} else if (orig_h.mode == tmp_h.mode) {
++			AuDbg("positive --> positive, same type\n");
++			if (!S_ISDIR(orig_h.mode)
++			    && dinfo->di_bstart > tmp->di_bstart) {
++				/*
++				 * similar to the behaviour of removing and
++				 * creating.
++				 */
++				au_hide(dentry);
++				if (inode)
++					err = au_refresh_hinode_self(inode);
++				au_dbg_verify_dinode(dentry);
++			} else {
++				/* fill empty slots */
++				if (dinfo->di_bstart > tmp->di_bstart)
++					dinfo->di_bstart = tmp->di_bstart;
++				if (dinfo->di_bend < tmp->di_bend)
++					dinfo->di_bend = tmp->di_bend;
++				dinfo->di_bwh = tmp->di_bwh;
++				dinfo->di_bdiropq = tmp->di_bdiropq;
++				hd = tmp->di_hdentry;
++				bend = dinfo->di_bend;
++				for (bindex = tmp->di_bstart; bindex <= bend;
++				     bindex++) {
++					if (au_h_dptr(dentry, bindex))
++						continue;
++					h_dentry = hd[bindex].hd_dentry;
++					if (!h_dentry)
++						continue;
++					h_inode = h_dentry->d_inode;
++					AuDebugOn(!h_inode);
++					AuDebugOn(orig_h.mode
++						  != (h_inode->i_mode
++						      & S_IFMT));
++					au_set_h_dptr(dentry, bindex,
++						      dget(h_dentry));
++				}
++				err = au_refresh_hinode(inode, dentry);
++				au_dbg_verify_dinode(dentry);
++			}
++		} else {
++			AuDbg("positive --> positive, different type\n");
++			/* similar to the behaviour of removing and creating */
++			au_hide(dentry);
++			if (inode)
++				err = au_refresh_hinode_self(inode);
++			au_dbg_verify_dinode(dentry);
++		}
++	}
++
++out:
++	return err;
++}
++
++int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
++{
++	int err, ebrange;
++	unsigned int sigen;
++	struct au_dinfo *dinfo, *tmp;
++	struct super_block *sb;
++	struct inode *inode;
++
++	DiMustWriteLock(dentry);
++	AuDebugOn(IS_ROOT(dentry));
++	AuDebugOn(!parent->d_inode);
++
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	sigen = au_sigen(sb);
++	err = au_digen_test(parent, sigen);
++	if (unlikely(err))
++		goto out;
++
++	dinfo = au_di(dentry);
++	err = au_di_realloc(dinfo, au_sbend(sb) + 1);
++	if (unlikely(err))
++		goto out;
++	ebrange = au_dbrange_test(dentry);
++	if (!ebrange)
++		ebrange = au_do_refresh_hdentry(dentry, parent);
++
++	if (d_unhashed(dentry) || ebrange) {
++		AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0);
++		if (inode)
++			err = au_refresh_hinode_self(inode);
++		au_dbg_verify_dinode(dentry);
++		if (!err)
++			goto out_dgen; /* success */
++		goto out;
++	}
++
++	/* temporary dinfo */
++	AuDbgDentry(dentry);
++	err = -ENOMEM;
++	tmp = au_di_alloc(sb, AuLsc_DI_TMP);
++	if (unlikely(!tmp))
++		goto out;
++	au_di_swap(tmp, dinfo);
++	/* returns the number of positive dentries */
++	/*
++	 * if current working dir is removed, it returns an error.
++	 * but the dentry is legal.
++	 */
++	err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0, /*nd*/NULL);
++	AuDbgDentry(dentry);
++	au_di_swap(tmp, dinfo);
++	if (err == -ENOENT)
++		err = 0;
++	if (err >= 0) {
++		/* compare/refresh by dinfo */
++		AuDbgDentry(dentry);
++		err = au_refresh_by_dinfo(dentry, dinfo, tmp);
++		au_dbg_verify_dinode(dentry);
++		AuTraceErr(err);
++	}
++	au_rw_write_unlock(&tmp->di_rwsem);
++	au_di_free(tmp);
++	if (unlikely(err))
++		goto out;
++
++out_dgen:
++	au_update_digen(dentry);
++out:
++	if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
++		AuIOErr("failed refreshing %.*s, %d\n",
++			AuDLNPair(dentry), err);
++		AuDbgDentry(dentry);
++	}
++	AuTraceErr(err);
++	return err;
++}
++
++static noinline_for_stack
++int au_do_h_d_reval(struct dentry *h_dentry, struct nameidata *nd,
++		    struct dentry *dentry, aufs_bindex_t bindex)
++{
++	int err, valid;
++	int (*reval)(struct dentry *, struct nameidata *);
++
++	err = 0;
++	if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
++		goto out;
++	reval = h_dentry->d_op->d_revalidate;
++
++	AuDbg("b%d\n", bindex);
++	if (au_test_fs_null_nd(h_dentry->d_sb))
++		/* it may return tri-state */
++		valid = reval(h_dentry, NULL);
++	else {
++		struct nameidata h_nd;
++		int locked;
++		struct dentry *parent;
++
++		au_h_nd(&h_nd, nd);
++		parent = nd->path.dentry;
++		locked = (nd && nd->path.dentry != dentry);
++		if (locked)
++			di_read_lock_parent(parent, AuLock_IR);
++		BUG_ON(bindex > au_dbend(parent));
++		h_nd.path.dentry = au_h_dptr(parent, bindex);
++		BUG_ON(!h_nd.path.dentry);
++		h_nd.path.mnt = au_sbr(parent->d_sb, bindex)->br_mnt;
++		path_get(&h_nd.path);
++		valid = reval(h_dentry, &h_nd);
++		path_put(&h_nd.path);
++		if (locked)
++			di_read_unlock(parent, AuLock_IR);
++	}
++
++	if (unlikely(valid < 0))
++		err = valid;
++	else if (!valid)
++		err = -EINVAL;
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++/* todo: remove this */
++static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
++			  struct nameidata *nd, int do_udba)
++{
++	int err;
++	umode_t mode, h_mode;
++	aufs_bindex_t bindex, btail, bstart, ibs, ibe;
++	unsigned char plus, unhashed, is_root, h_plus;
++	struct inode *h_inode, *h_cached_inode;
++	struct dentry *h_dentry;
++	struct qstr *name, *h_name;
++
++	err = 0;
++	plus = 0;
++	mode = 0;
++	ibs = -1;
++	ibe = -1;
++	unhashed = !!d_unhashed(dentry);
++	is_root = !!IS_ROOT(dentry);
++	name = &dentry->d_name;
++
++	/*
++	 * Theoretically, REVAL test should be unnecessary in case of
++	 * {FS,I}NOTIFY.
++	 * But {fs,i}notify doesn't fire some necessary events,
++	 *	IN_ATTRIB for atime/nlink/pageio
++	 *	IN_DELETE for NFS dentry
++	 * Let's do REVAL test too.
++	 */
++	if (do_udba && inode) {
++		mode = (inode->i_mode & S_IFMT);
++		plus = (inode->i_nlink > 0);
++		ibs = au_ibstart(inode);
++		ibe = au_ibend(inode);
++	}
++
++	bstart = au_dbstart(dentry);
++	btail = bstart;
++	if (inode && S_ISDIR(inode->i_mode))
++		btail = au_dbtaildir(dentry);
++	for (bindex = bstart; bindex <= btail; bindex++) {
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (!h_dentry)
++			continue;
++
++		AuDbg("b%d, %.*s\n", bindex, AuDLNPair(h_dentry));
++		spin_lock(&h_dentry->d_lock);
++		h_name = &h_dentry->d_name;
++		if (unlikely(do_udba
++			     && !is_root
++			     && (unhashed != !!d_unhashed(h_dentry)
++				 || name->len != h_name->len
++				 || memcmp(name->name, h_name->name, name->len))
++			    )) {
++			AuDbg("unhash 0x%x 0x%x, %.*s %.*s\n",
++				  unhashed, d_unhashed(h_dentry),
++				  AuDLNPair(dentry), AuDLNPair(h_dentry));
++			spin_unlock(&h_dentry->d_lock);
++			goto err;
++		}
++		spin_unlock(&h_dentry->d_lock);
++
++		err = au_do_h_d_reval(h_dentry, nd, dentry, bindex);
++		if (unlikely(err))
++			/* do not goto err, to keep the errno */
++			break;
++
++		/* todo: plink too? */
++		if (!do_udba)
++			continue;
++
++		/* UDBA tests */
++		h_inode = h_dentry->d_inode;
++		if (unlikely(!!inode != !!h_inode))
++			goto err;
++
++		h_plus = plus;
++		h_mode = mode;
++		h_cached_inode = h_inode;
++		if (h_inode) {
++			h_mode = (h_inode->i_mode & S_IFMT);
++			h_plus = (h_inode->i_nlink > 0);
++		}
++		if (inode && ibs <= bindex && bindex <= ibe)
++			h_cached_inode = au_h_iptr(inode, bindex);
++
++		if (unlikely(plus != h_plus
++			     || mode != h_mode
++			     || h_cached_inode != h_inode))
++			goto err;
++		continue;
++
++	err:
++		err = -EINVAL;
++		break;
++	}
++
++	return err;
++}
++
++/* todo: consolidate with do_refresh() and au_reval_for_attr() */
++static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
++{
++	int err;
++	struct dentry *parent;
++
++	if (!au_digen_test(dentry, sigen))
++		return 0;
++
++	parent = dget_parent(dentry);
++	di_read_lock_parent(parent, AuLock_IR);
++	AuDebugOn(au_digen_test(parent, sigen));
++	au_dbg_verify_gen(parent, sigen);
++	err = au_refresh_dentry(dentry, parent);
++	di_read_unlock(parent, AuLock_IR);
++	dput(parent);
++	AuTraceErr(err);
++	return err;
++}
++
++int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
++{
++	int err;
++	struct dentry *d, *parent;
++	struct inode *inode;
++
++	if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
++		return simple_reval_dpath(dentry, sigen);
++
++	/* slow loop, keep it simple and stupid */
++	/* cf: au_cpup_dirs() */
++	err = 0;
++	parent = NULL;
++	while (au_digen_test(dentry, sigen)) {
++		d = dentry;
++		while (1) {
++			dput(parent);
++			parent = dget_parent(d);
++			if (!au_digen_test(parent, sigen))
++				break;
++			d = parent;
++		}
++
++		inode = d->d_inode;
++		if (d != dentry)
++			di_write_lock_child2(d);
++
++		/* someone might update our dentry while we were sleeping */
++		if (au_digen_test(d, sigen)) {
++			/*
++			 * todo: consolidate with simple_reval_dpath(),
++			 * do_refresh() and au_reval_for_attr().
++			 */
++			di_read_lock_parent(parent, AuLock_IR);
++			err = au_refresh_dentry(d, parent);
++			di_read_unlock(parent, AuLock_IR);
++		}
++
++		if (d != dentry)
++			di_write_unlock(d);
++		dput(parent);
++		if (unlikely(err))
++			break;
++	}
++
++	return err;
++}
++
++/*
++ * if valid returns 1, otherwise 0.
++ */
++static int aufs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
++{
++	int valid, err;
++	unsigned int sigen;
++	unsigned char do_udba;
++	struct super_block *sb;
++	struct inode *inode;
++
++	/* todo: support rcu-walk? */
++	if (nd && (nd->flags & LOOKUP_RCU))
++		return -ECHILD;
++
++	valid = 0;
++	if (unlikely(!au_di(dentry)))
++		goto out;
++
++	inode = dentry->d_inode;
++	if (inode && is_bad_inode(inode))
++		goto out;
++
++	valid = 1;
++	sb = dentry->d_sb;
++	/*
++	 * todo: very ugly
++	 * i_mutex of parent dir may be held,
++	 * but we should not return 'invalid' due to busy.
++	 */
++	err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
++	if (unlikely(err)) {
++		valid = err;
++		AuTraceErr(err);
++		goto out;
++	}
++	if (unlikely(au_dbrange_test(dentry))) {
++		err = -EINVAL;
++		AuTraceErr(err);
++		goto out_dgrade;
++	}
++
++	sigen = au_sigen(sb);
++	if (au_digen_test(dentry, sigen)) {
++		AuDebugOn(IS_ROOT(dentry));
++		err = au_reval_dpath(dentry, sigen);
++		if (unlikely(err)) {
++			AuTraceErr(err);
++			goto out_dgrade;
++		}
++	}
++	di_downgrade_lock(dentry, AuLock_IR);
++
++	err = -EINVAL;
++	if (inode && (IS_DEADDIR(inode) || !inode->i_nlink))
++		goto out_inval;
++
++	do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
++	if (do_udba && inode) {
++		aufs_bindex_t bstart = au_ibstart(inode);
++		struct inode *h_inode;
++
++		if (bstart >= 0) {
++			h_inode = au_h_iptr(inode, bstart);
++			if (h_inode && au_test_higen(inode, h_inode))
++				goto out_inval;
++		}
++	}
++
++	err = h_d_revalidate(dentry, inode, nd, do_udba);
++	if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) {
++		err = -EIO;
++		AuDbg("both of real entry and whiteout found, %.*s, err %d\n",
++		      AuDLNPair(dentry), err);
++	}
++	goto out_inval;
++
++out_dgrade:
++	di_downgrade_lock(dentry, AuLock_IR);
++out_inval:
++	aufs_read_unlock(dentry, AuLock_IR);
++	AuTraceErr(err);
++	valid = !err;
++out:
++	if (!valid) {
++		AuDbg("%.*s invalid, %d\n", AuDLNPair(dentry), valid);
++		d_drop(dentry);
++	}
++	return valid;
++}
++
++static void aufs_d_release(struct dentry *dentry)
++{
++	if (au_di(dentry)) {
++		au_di_fin(dentry);
++		au_hn_di_reinit(dentry);
++	}
++}
++
++const struct dentry_operations aufs_dop = {
++	.d_revalidate	= aufs_d_revalidate,
++	.d_release	= aufs_d_release
++};
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/dentry.h linux-3.2.0-gentoo-r1/fs/aufs/dentry.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/dentry.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/dentry.h	2012-01-17 12:11:24.576506644 +0100
+@@ -0,0 +1,237 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * lookup and dentry operations
++ */
++
++#ifndef __AUFS_DENTRY_H__
++#define __AUFS_DENTRY_H__
++
++#ifdef __KERNEL__
++
++#include <linux/dcache.h>
++#include "rwsem.h"
++
++struct au_hdentry {
++	struct dentry		*hd_dentry;
++	aufs_bindex_t		hd_id;
++};
++
++struct au_dinfo {
++	atomic_t		di_generation;
++
++	struct au_rwsem		di_rwsem;
++	aufs_bindex_t		di_bstart, di_bend, di_bwh, di_bdiropq;
++	struct au_hdentry	*di_hdentry;
++} ____cacheline_aligned_in_smp;
++
++/* ---------------------------------------------------------------------- */
++
++/* dentry.c */
++extern const struct dentry_operations aufs_dop;
++struct au_branch;
++struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
++			   struct au_branch *br, struct nameidata *nd);
++struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
++			       struct au_branch *br);
++int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
++		struct dentry *h_parent, struct au_branch *br);
++
++int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
++		   struct nameidata *nd);
++int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex);
++int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
++int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
++
++/* dinfo.c */
++void au_di_init_once(void *_di);
++struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
++void au_di_free(struct au_dinfo *dinfo);
++void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
++void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
++int au_di_init(struct dentry *dentry);
++void au_di_fin(struct dentry *dentry);
++int au_di_realloc(struct au_dinfo *dinfo, int nbr);
++
++void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
++void di_read_unlock(struct dentry *d, int flags);
++void di_downgrade_lock(struct dentry *d, int flags);
++void di_write_lock(struct dentry *d, unsigned int lsc);
++void di_write_unlock(struct dentry *d);
++void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
++void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
++void di_write_unlock2(struct dentry *d1, struct dentry *d2);
++
++struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
++struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
++aufs_bindex_t au_dbtail(struct dentry *dentry);
++aufs_bindex_t au_dbtaildir(struct dentry *dentry);
++
++void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
++		   struct dentry *h_dentry);
++int au_digen_test(struct dentry *dentry, unsigned int sigen);
++int au_dbrange_test(struct dentry *dentry);
++void au_update_digen(struct dentry *dentry);
++void au_update_dbrange(struct dentry *dentry, int do_put_zero);
++void au_update_dbstart(struct dentry *dentry);
++void au_update_dbend(struct dentry *dentry);
++int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
++
++/* ---------------------------------------------------------------------- */
++
++static inline struct au_dinfo *au_di(struct dentry *dentry)
++{
++	return dentry->d_fsdata;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* lock subclass for dinfo */
++enum {
++	AuLsc_DI_CHILD,		/* child first */
++	AuLsc_DI_CHILD2,	/* rename(2), link(2), and cpup at hnotify */
++	AuLsc_DI_CHILD3,	/* copyup dirs */
++	AuLsc_DI_PARENT,
++	AuLsc_DI_PARENT2,
++	AuLsc_DI_PARENT3,
++	AuLsc_DI_TMP		/* temp for replacing dinfo */
++};
++
++/*
++ * di_read_lock_child, di_write_lock_child,
++ * di_read_lock_child2, di_write_lock_child2,
++ * di_read_lock_child3, di_write_lock_child3,
++ * di_read_lock_parent, di_write_lock_parent,
++ * di_read_lock_parent2, di_write_lock_parent2,
++ * di_read_lock_parent3, di_write_lock_parent3,
++ */
++#define AuReadLockFunc(name, lsc) \
++static inline void di_read_lock_##name(struct dentry *d, int flags) \
++{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
++
++#define AuWriteLockFunc(name, lsc) \
++static inline void di_write_lock_##name(struct dentry *d) \
++{ di_write_lock(d, AuLsc_DI_##lsc); }
++
++#define AuRWLockFuncs(name, lsc) \
++	AuReadLockFunc(name, lsc) \
++	AuWriteLockFunc(name, lsc)
++
++AuRWLockFuncs(child, CHILD);
++AuRWLockFuncs(child2, CHILD2);
++AuRWLockFuncs(child3, CHILD3);
++AuRWLockFuncs(parent, PARENT);
++AuRWLockFuncs(parent2, PARENT2);
++AuRWLockFuncs(parent3, PARENT3);
++
++#undef AuReadLockFunc
++#undef AuWriteLockFunc
++#undef AuRWLockFuncs
++
++#define DiMustNoWaiters(d)	AuRwMustNoWaiters(&au_di(d)->di_rwsem)
++#define DiMustAnyLock(d)	AuRwMustAnyLock(&au_di(d)->di_rwsem)
++#define DiMustWriteLock(d)	AuRwMustWriteLock(&au_di(d)->di_rwsem)
++
++/* ---------------------------------------------------------------------- */
++
++/* todo: memory barrier? */
++static inline unsigned int au_digen(struct dentry *d)
++{
++	return atomic_read(&au_di(d)->di_generation);
++}
++
++static inline void au_h_dentry_init(struct au_hdentry *hdentry)
++{
++	hdentry->hd_dentry = NULL;
++}
++
++static inline void au_hdput(struct au_hdentry *hd)
++{
++	if (hd)
++		dput(hd->hd_dentry);
++}
++
++static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
++{
++	DiMustAnyLock(dentry);
++	return au_di(dentry)->di_bstart;
++}
++
++static inline aufs_bindex_t au_dbend(struct dentry *dentry)
++{
++	DiMustAnyLock(dentry);
++	return au_di(dentry)->di_bend;
++}
++
++static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
++{
++	DiMustAnyLock(dentry);
++	return au_di(dentry)->di_bwh;
++}
++
++static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
++{
++	DiMustAnyLock(dentry);
++	return au_di(dentry)->di_bdiropq;
++}
++
++/* todo: hard/soft set? */
++static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	DiMustWriteLock(dentry);
++	au_di(dentry)->di_bstart = bindex;
++}
++
++static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	DiMustWriteLock(dentry);
++	au_di(dentry)->di_bend = bindex;
++}
++
++static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	DiMustWriteLock(dentry);
++	/* dbwh can be outside of bstart - bend range */
++	au_di(dentry)->di_bwh = bindex;
++}
++
++static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	DiMustWriteLock(dentry);
++	au_di(dentry)->di_bdiropq = bindex;
++}
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_HNOTIFY
++static inline void au_digen_dec(struct dentry *d)
++{
++	atomic_dec(&au_di(d)->di_generation);
++}
++
++static inline void au_hn_di_reinit(struct dentry *dentry)
++{
++	dentry->d_fsdata = NULL;
++}
++#else
++AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
++#endif /* CONFIG_AUFS_HNOTIFY */
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_DENTRY_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/dinfo.c linux-3.2.0-gentoo-r1/fs/aufs/dinfo.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/dinfo.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/dinfo.c	2012-01-17 12:11:24.576506644 +0100
+@@ -0,0 +1,543 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * dentry private data
++ */
++
++#include "aufs.h"
++
++void au_di_init_once(void *_dinfo)
++{
++	struct au_dinfo *dinfo = _dinfo;
++	static struct lock_class_key aufs_di;
++
++	au_rw_init(&dinfo->di_rwsem);
++	au_rw_class(&dinfo->di_rwsem, &aufs_di);
++}
++
++struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
++{
++	struct au_dinfo *dinfo;
++	int nbr, i;
++
++	dinfo = au_cache_alloc_dinfo();
++	if (unlikely(!dinfo))
++		goto out;
++
++	nbr = au_sbend(sb) + 1;
++	if (nbr <= 0)
++		nbr = 1;
++	dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
++	if (dinfo->di_hdentry) {
++		au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
++		dinfo->di_bstart = -1;
++		dinfo->di_bend = -1;
++		dinfo->di_bwh = -1;
++		dinfo->di_bdiropq = -1;
++		for (i = 0; i < nbr; i++)
++			dinfo->di_hdentry[i].hd_id = -1;
++		goto out;
++	}
++
++	au_cache_free_dinfo(dinfo);
++	dinfo = NULL;
++
++out:
++	return dinfo;
++}
++
++void au_di_free(struct au_dinfo *dinfo)
++{
++	struct au_hdentry *p;
++	aufs_bindex_t bend, bindex;
++
++	/* dentry may not be revalidated */
++	bindex = dinfo->di_bstart;
++	if (bindex >= 0) {
++		bend = dinfo->di_bend;
++		p = dinfo->di_hdentry + bindex;
++		while (bindex++ <= bend)
++			au_hdput(p++);
++	}
++	kfree(dinfo->di_hdentry);
++	au_cache_free_dinfo(dinfo);
++}
++
++void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
++{
++	struct au_hdentry *p;
++	aufs_bindex_t bi;
++
++	AuRwMustWriteLock(&a->di_rwsem);
++	AuRwMustWriteLock(&b->di_rwsem);
++
++#define DiSwap(v, name)				\
++	do {					\
++		v = a->di_##name;		\
++		a->di_##name = b->di_##name;	\
++		b->di_##name = v;		\
++	} while (0)
++
++	DiSwap(p, hdentry);
++	DiSwap(bi, bstart);
++	DiSwap(bi, bend);
++	DiSwap(bi, bwh);
++	DiSwap(bi, bdiropq);
++	/* smp_mb(); */
++
++#undef DiSwap
++}
++
++void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
++{
++	AuRwMustWriteLock(&dst->di_rwsem);
++	AuRwMustWriteLock(&src->di_rwsem);
++
++	dst->di_bstart = src->di_bstart;
++	dst->di_bend = src->di_bend;
++	dst->di_bwh = src->di_bwh;
++	dst->di_bdiropq = src->di_bdiropq;
++	/* smp_mb(); */
++}
++
++int au_di_init(struct dentry *dentry)
++{
++	int err;
++	struct super_block *sb;
++	struct au_dinfo *dinfo;
++
++	err = 0;
++	sb = dentry->d_sb;
++	dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
++	if (dinfo) {
++		atomic_set(&dinfo->di_generation, au_sigen(sb));
++		/* smp_mb(); */ /* atomic_set */
++		dentry->d_fsdata = dinfo;
++	} else
++		err = -ENOMEM;
++
++	return err;
++}
++
++void au_di_fin(struct dentry *dentry)
++{
++	struct au_dinfo *dinfo;
++
++	dinfo = au_di(dentry);
++	AuRwDestroy(&dinfo->di_rwsem);
++	au_di_free(dinfo);
++}
++
++int au_di_realloc(struct au_dinfo *dinfo, int nbr)
++{
++	int err, sz;
++	struct au_hdentry *hdp;
++
++	AuRwMustWriteLock(&dinfo->di_rwsem);
++
++	err = -ENOMEM;
++	sz = sizeof(*hdp) * (dinfo->di_bend + 1);
++	if (!sz)
++		sz = sizeof(*hdp);
++	hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
++	if (hdp) {
++		dinfo->di_hdentry = hdp;
++		err = 0;
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
++{
++	switch (lsc) {
++	case AuLsc_DI_CHILD:
++		ii_write_lock_child(inode);
++		break;
++	case AuLsc_DI_CHILD2:
++		ii_write_lock_child2(inode);
++		break;
++	case AuLsc_DI_CHILD3:
++		ii_write_lock_child3(inode);
++		break;
++	case AuLsc_DI_PARENT:
++		ii_write_lock_parent(inode);
++		break;
++	case AuLsc_DI_PARENT2:
++		ii_write_lock_parent2(inode);
++		break;
++	case AuLsc_DI_PARENT3:
++		ii_write_lock_parent3(inode);
++		break;
++	default:
++		BUG();
++	}
++}
++
++static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
++{
++	switch (lsc) {
++	case AuLsc_DI_CHILD:
++		ii_read_lock_child(inode);
++		break;
++	case AuLsc_DI_CHILD2:
++		ii_read_lock_child2(inode);
++		break;
++	case AuLsc_DI_CHILD3:
++		ii_read_lock_child3(inode);
++		break;
++	case AuLsc_DI_PARENT:
++		ii_read_lock_parent(inode);
++		break;
++	case AuLsc_DI_PARENT2:
++		ii_read_lock_parent2(inode);
++		break;
++	case AuLsc_DI_PARENT3:
++		ii_read_lock_parent3(inode);
++		break;
++	default:
++		BUG();
++	}
++}
++
++void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
++{
++	au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
++	if (d->d_inode) {
++		if (au_ftest_lock(flags, IW))
++			do_ii_write_lock(d->d_inode, lsc);
++		else if (au_ftest_lock(flags, IR))
++			do_ii_read_lock(d->d_inode, lsc);
++	}
++}
++
++void di_read_unlock(struct dentry *d, int flags)
++{
++	if (d->d_inode) {
++		if (au_ftest_lock(flags, IW)) {
++			au_dbg_verify_dinode(d);
++			ii_write_unlock(d->d_inode);
++		} else if (au_ftest_lock(flags, IR)) {
++			au_dbg_verify_dinode(d);
++			ii_read_unlock(d->d_inode);
++		}
++	}
++	au_rw_read_unlock(&au_di(d)->di_rwsem);
++}
++
++void di_downgrade_lock(struct dentry *d, int flags)
++{
++	if (d->d_inode && au_ftest_lock(flags, IR))
++		ii_downgrade_lock(d->d_inode);
++	au_rw_dgrade_lock(&au_di(d)->di_rwsem);
++}
++
++void di_write_lock(struct dentry *d, unsigned int lsc)
++{
++	au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
++	if (d->d_inode)
++		do_ii_write_lock(d->d_inode, lsc);
++}
++
++void di_write_unlock(struct dentry *d)
++{
++	au_dbg_verify_dinode(d);
++	if (d->d_inode)
++		ii_write_unlock(d->d_inode);
++	au_rw_write_unlock(&au_di(d)->di_rwsem);
++}
++
++void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
++{
++	AuDebugOn(d1 == d2
++		  || d1->d_inode == d2->d_inode
++		  || d1->d_sb != d2->d_sb);
++
++	if (isdir && au_test_subdir(d1, d2)) {
++		di_write_lock_child(d1);
++		di_write_lock_child2(d2);
++	} else {
++		/* there should be no races */
++		di_write_lock_child(d2);
++		di_write_lock_child2(d1);
++	}
++}
++
++void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
++{
++	AuDebugOn(d1 == d2
++		  || d1->d_inode == d2->d_inode
++		  || d1->d_sb != d2->d_sb);
++
++	if (isdir && au_test_subdir(d1, d2)) {
++		di_write_lock_parent(d1);
++		di_write_lock_parent2(d2);
++	} else {
++		/* there should be no races */
++		di_write_lock_parent(d2);
++		di_write_lock_parent2(d1);
++	}
++}
++
++void di_write_unlock2(struct dentry *d1, struct dentry *d2)
++{
++	di_write_unlock(d1);
++	if (d1->d_inode == d2->d_inode)
++		au_rw_write_unlock(&au_di(d2)->di_rwsem);
++	else
++		di_write_unlock(d2);
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	struct dentry *d;
++
++	DiMustAnyLock(dentry);
++
++	if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
++		return NULL;
++	AuDebugOn(bindex < 0);
++	d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
++	AuDebugOn(d && d->d_count <= 0);
++	return d;
++}
++
++/*
++ * extended version of au_h_dptr().
++ * returns a hashed and positive h_dentry in bindex, NULL, or error.
++ */
++struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	struct dentry *h_dentry;
++	struct inode *inode, *h_inode;
++
++	inode = dentry->d_inode;
++	AuDebugOn(!inode);
++
++	h_dentry = NULL;
++	if (au_dbstart(dentry) <= bindex
++	    && bindex <= au_dbend(dentry))
++		h_dentry = au_h_dptr(dentry, bindex);
++	if (h_dentry && !au_d_hashed_positive(h_dentry)) {
++		dget(h_dentry);
++		goto out; /* success */
++	}
++
++	AuDebugOn(bindex < au_ibstart(inode));
++	AuDebugOn(au_ibend(inode) < bindex);
++	h_inode = au_h_iptr(inode, bindex);
++	h_dentry = d_find_alias(h_inode);
++	if (h_dentry) {
++		if (!IS_ERR(h_dentry)) {
++			if (!au_d_hashed_positive(h_dentry))
++				goto out; /* success */
++			dput(h_dentry);
++		} else
++			goto out;
++	}
++
++	if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
++		h_dentry = au_plink_lkup(inode, bindex);
++		AuDebugOn(!h_dentry);
++		if (!IS_ERR(h_dentry)) {
++			if (!au_d_hashed_positive(h_dentry))
++				goto out; /* success */
++			dput(h_dentry);
++			h_dentry = NULL;
++		}
++	}
++
++out:
++	AuDbgDentry(h_dentry);
++	return h_dentry;
++}
++
++aufs_bindex_t au_dbtail(struct dentry *dentry)
++{
++	aufs_bindex_t bend, bwh;
++
++	bend = au_dbend(dentry);
++	if (0 <= bend) {
++		bwh = au_dbwh(dentry);
++		if (!bwh)
++			return bwh;
++		if (0 < bwh && bwh < bend)
++			return bwh - 1;
++	}
++	return bend;
++}
++
++aufs_bindex_t au_dbtaildir(struct dentry *dentry)
++{
++	aufs_bindex_t bend, bopq;
++
++	bend = au_dbtail(dentry);
++	if (0 <= bend) {
++		bopq = au_dbdiropq(dentry);
++		if (0 <= bopq && bopq < bend)
++			bend = bopq;
++	}
++	return bend;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
++		   struct dentry *h_dentry)
++{
++	struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
++	struct au_branch *br;
++
++	DiMustWriteLock(dentry);
++
++	au_hdput(hd);
++	hd->hd_dentry = h_dentry;
++	if (h_dentry) {
++		br = au_sbr(dentry->d_sb, bindex);
++		hd->hd_id = br->br_id;
++	}
++}
++
++int au_dbrange_test(struct dentry *dentry)
++{
++	int err;
++	aufs_bindex_t bstart, bend;
++
++	err = 0;
++	bstart = au_dbstart(dentry);
++	bend = au_dbend(dentry);
++	if (bstart >= 0)
++		AuDebugOn(bend < 0 && bstart > bend);
++	else {
++		err = -EIO;
++		AuDebugOn(bend >= 0);
++	}
++
++	return err;
++}
++
++int au_digen_test(struct dentry *dentry, unsigned int sigen)
++{
++	int err;
++
++	err = 0;
++	if (unlikely(au_digen(dentry) != sigen
++		     || au_iigen_test(dentry->d_inode, sigen)))
++		err = -EIO;
++
++	return err;
++}
++
++void au_update_digen(struct dentry *dentry)
++{
++	atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
++	/* smp_mb(); */ /* atomic_set */
++}
++
++void au_update_dbrange(struct dentry *dentry, int do_put_zero)
++{
++	struct au_dinfo *dinfo;
++	struct dentry *h_d;
++	struct au_hdentry *hdp;
++
++	DiMustWriteLock(dentry);
++
++	dinfo = au_di(dentry);
++	if (!dinfo || dinfo->di_bstart < 0)
++		return;
++
++	hdp = dinfo->di_hdentry;
++	if (do_put_zero) {
++		aufs_bindex_t bindex, bend;
++
++		bend = dinfo->di_bend;
++		for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
++			h_d = hdp[0 + bindex].hd_dentry;
++			if (h_d && !h_d->d_inode)
++				au_set_h_dptr(dentry, bindex, NULL);
++		}
++	}
++
++	dinfo->di_bstart = -1;
++	while (++dinfo->di_bstart <= dinfo->di_bend)
++		if (hdp[0 + dinfo->di_bstart].hd_dentry)
++			break;
++	if (dinfo->di_bstart > dinfo->di_bend) {
++		dinfo->di_bstart = -1;
++		dinfo->di_bend = -1;
++		return;
++	}
++
++	dinfo->di_bend++;
++	while (0 <= --dinfo->di_bend)
++		if (hdp[0 + dinfo->di_bend].hd_dentry)
++			break;
++	AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
++}
++
++void au_update_dbstart(struct dentry *dentry)
++{
++	aufs_bindex_t bindex, bend;
++	struct dentry *h_dentry;
++
++	bend = au_dbend(dentry);
++	for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (!h_dentry)
++			continue;
++		if (h_dentry->d_inode) {
++			au_set_dbstart(dentry, bindex);
++			return;
++		}
++		au_set_h_dptr(dentry, bindex, NULL);
++	}
++}
++
++void au_update_dbend(struct dentry *dentry)
++{
++	aufs_bindex_t bindex, bstart;
++	struct dentry *h_dentry;
++
++	bstart = au_dbstart(dentry);
++	for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) {
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (!h_dentry)
++			continue;
++		if (h_dentry->d_inode) {
++			au_set_dbend(dentry, bindex);
++			return;
++		}
++		au_set_h_dptr(dentry, bindex, NULL);
++	}
++}
++
++int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
++{
++	aufs_bindex_t bindex, bend;
++
++	bend = au_dbend(dentry);
++	for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
++		if (au_h_dptr(dentry, bindex) == h_dentry)
++			return bindex;
++	return -1;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/dir.c linux-3.2.0-gentoo-r1/fs/aufs/dir.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/dir.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/dir.c	2012-01-17 12:11:24.583451150 +0100
+@@ -0,0 +1,634 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * directory operations
++ */
++
++#include <linux/fs_stack.h>
++#include "aufs.h"
++
++void au_add_nlink(struct inode *dir, struct inode *h_dir)
++{
++	unsigned int nlink;
++
++	AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
++
++	nlink = dir->i_nlink;
++	nlink += h_dir->i_nlink - 2;
++	if (h_dir->i_nlink < 2)
++		nlink += 2;
++	set_nlink(dir, nlink);
++}
++
++void au_sub_nlink(struct inode *dir, struct inode *h_dir)
++{
++	unsigned int nlink;
++
++	AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
++
++	nlink = dir->i_nlink;
++	nlink -= h_dir->i_nlink - 2;
++	if (h_dir->i_nlink < 2)
++		nlink -= 2;
++	set_nlink(dir, nlink);
++}
++
++loff_t au_dir_size(struct file *file, struct dentry *dentry)
++{
++	loff_t sz;
++	aufs_bindex_t bindex, bend;
++	struct file *h_file;
++	struct dentry *h_dentry;
++
++	sz = 0;
++	if (file) {
++		AuDebugOn(!file->f_dentry);
++		AuDebugOn(!file->f_dentry->d_inode);
++		AuDebugOn(!S_ISDIR(file->f_dentry->d_inode->i_mode));
++
++		bend = au_fbend_dir(file);
++		for (bindex = au_fbstart(file);
++		     bindex <= bend && sz < KMALLOC_MAX_SIZE;
++		     bindex++) {
++			h_file = au_hf_dir(file, bindex);
++			if (h_file
++			    && h_file->f_dentry
++			    && h_file->f_dentry->d_inode)
++				sz += i_size_read(h_file->f_dentry->d_inode);
++		}
++	} else {
++		AuDebugOn(!dentry);
++		AuDebugOn(!dentry->d_inode);
++		AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
++
++		bend = au_dbtaildir(dentry);
++		for (bindex = au_dbstart(dentry);
++		     bindex <= bend && sz < KMALLOC_MAX_SIZE;
++		     bindex++) {
++			h_dentry = au_h_dptr(dentry, bindex);
++			if (h_dentry && h_dentry->d_inode)
++				sz += i_size_read(h_dentry->d_inode);
++		}
++	}
++	if (sz < KMALLOC_MAX_SIZE)
++		sz = roundup_pow_of_two(sz);
++	if (sz > KMALLOC_MAX_SIZE)
++		sz = KMALLOC_MAX_SIZE;
++	else if (sz < NAME_MAX) {
++		BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
++		sz = AUFS_RDBLK_DEF;
++	}
++	return sz;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int reopen_dir(struct file *file)
++{
++	int err;
++	unsigned int flags;
++	aufs_bindex_t bindex, btail, bstart;
++	struct dentry *dentry, *h_dentry;
++	struct file *h_file;
++
++	/* open all lower dirs */
++	dentry = file->f_dentry;
++	bstart = au_dbstart(dentry);
++	for (bindex = au_fbstart(file); bindex < bstart; bindex++)
++		au_set_h_fptr(file, bindex, NULL);
++	au_set_fbstart(file, bstart);
++
++	btail = au_dbtaildir(dentry);
++	for (bindex = au_fbend_dir(file); btail < bindex; bindex--)
++		au_set_h_fptr(file, bindex, NULL);
++	au_set_fbend_dir(file, btail);
++
++	flags = vfsub_file_flags(file);
++	for (bindex = bstart; bindex <= btail; bindex++) {
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (!h_dentry)
++			continue;
++		h_file = au_hf_dir(file, bindex);
++		if (h_file)
++			continue;
++
++		h_file = au_h_open(dentry, bindex, flags, file);
++		err = PTR_ERR(h_file);
++		if (IS_ERR(h_file))
++			goto out; /* close all? */
++		au_set_h_fptr(file, bindex, h_file);
++	}
++	au_update_figen(file);
++	/* todo: necessary? */
++	/* file->f_ra = h_file->f_ra; */
++	err = 0;
++
++out:
++	return err;
++}
++
++static int do_open_dir(struct file *file, int flags)
++{
++	int err;
++	aufs_bindex_t bindex, btail;
++	struct dentry *dentry, *h_dentry;
++	struct file *h_file;
++
++	FiMustWriteLock(file);
++
++	dentry = file->f_dentry;
++	err = au_alive_dir(dentry);
++	if (unlikely(err))
++		goto out;
++
++	file->f_version = dentry->d_inode->i_version;
++	bindex = au_dbstart(dentry);
++	au_set_fbstart(file, bindex);
++	btail = au_dbtaildir(dentry);
++	au_set_fbend_dir(file, btail);
++	for (; !err && bindex <= btail; bindex++) {
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (!h_dentry)
++			continue;
++
++		h_file = au_h_open(dentry, bindex, flags, file);
++		if (IS_ERR(h_file)) {
++			err = PTR_ERR(h_file);
++			break;
++		}
++		au_set_h_fptr(file, bindex, h_file);
++	}
++	au_update_figen(file);
++	/* todo: necessary? */
++	/* file->f_ra = h_file->f_ra; */
++	if (!err)
++		return 0; /* success */
++
++	/* close all */
++	for (bindex = au_fbstart(file); bindex <= btail; bindex++)
++		au_set_h_fptr(file, bindex, NULL);
++	au_set_fbstart(file, -1);
++	au_set_fbend_dir(file, -1);
++
++out:
++	return err;
++}
++
++static int aufs_open_dir(struct inode *inode __maybe_unused,
++			 struct file *file)
++{
++	int err;
++	struct super_block *sb;
++	struct au_fidir *fidir;
++
++	err = -ENOMEM;
++	sb = file->f_dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	fidir = au_fidir_alloc(sb);
++	if (fidir) {
++		err = au_do_open(file, do_open_dir, fidir);
++		if (unlikely(err))
++			kfree(fidir);
++	}
++	si_read_unlock(sb);
++	return err;
++}
++
++static int aufs_release_dir(struct inode *inode __maybe_unused,
++			    struct file *file)
++{
++	struct au_vdir *vdir_cache;
++	struct au_finfo *finfo;
++	struct au_fidir *fidir;
++	aufs_bindex_t bindex, bend;
++
++	finfo = au_fi(file);
++	fidir = finfo->fi_hdir;
++	if (fidir) {
++		/* remove me from sb->s_files */
++		file_sb_list_del(file);
++
++		vdir_cache = fidir->fd_vdir_cache; /* lock-free */
++		if (vdir_cache)
++			au_vdir_free(vdir_cache);
++
++		bindex = finfo->fi_btop;
++		if (bindex >= 0) {
++			/*
++			 * calls fput() instead of filp_close(),
++			 * since no dnotify or lock for the lower file.
++			 */
++			bend = fidir->fd_bbot;
++			for (; bindex <= bend; bindex++)
++				au_set_h_fptr(file, bindex, NULL);
++		}
++		kfree(fidir);
++		finfo->fi_hdir = NULL;
++	}
++	au_finfo_fin(file);
++	return 0;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_do_flush_dir(struct file *file, fl_owner_t id)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	struct file *h_file;
++
++	err = 0;
++	bend = au_fbend_dir(file);
++	for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
++		h_file = au_hf_dir(file, bindex);
++		if (h_file)
++			err = vfsub_flush(h_file, id);
++	}
++	return err;
++}
++
++static int aufs_flush_dir(struct file *file, fl_owner_t id)
++{
++	return au_do_flush(file, id, au_do_flush_dir);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
++{
++	int err;
++	aufs_bindex_t bend, bindex;
++	struct inode *inode;
++	struct super_block *sb;
++
++	err = 0;
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	IMustLock(inode);
++	bend = au_dbend(dentry);
++	for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
++		struct path h_path;
++
++		if (au_test_ro(sb, bindex, inode))
++			continue;
++		h_path.dentry = au_h_dptr(dentry, bindex);
++		if (!h_path.dentry)
++			continue;
++
++		h_path.mnt = au_sbr_mnt(sb, bindex);
++		err = vfsub_fsync(NULL, &h_path, datasync);
++	}
++
++	return err;
++}
++
++static int au_do_fsync_dir(struct file *file, int datasync)
++{
++	int err;
++	aufs_bindex_t bend, bindex;
++	struct file *h_file;
++	struct super_block *sb;
++	struct inode *inode;
++
++	err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
++	if (unlikely(err))
++		goto out;
++
++	sb = file->f_dentry->d_sb;
++	inode = file->f_dentry->d_inode;
++	bend = au_fbend_dir(file);
++	for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
++		h_file = au_hf_dir(file, bindex);
++		if (!h_file || au_test_ro(sb, bindex, inode))
++			continue;
++
++		err = vfsub_fsync(h_file, &h_file->f_path, datasync);
++	}
++
++out:
++	return err;
++}
++
++/*
++ * @file may be NULL
++ */
++static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
++			  int datasync)
++{
++	int err;
++	struct dentry *dentry;
++	struct super_block *sb;
++	struct mutex *mtx;
++
++	err = 0;
++	dentry = file->f_dentry;
++	mtx = &dentry->d_inode->i_mutex;
++	mutex_lock(mtx);
++	sb = dentry->d_sb;
++	si_noflush_read_lock(sb);
++	if (file)
++		err = au_do_fsync_dir(file, datasync);
++	else {
++		di_write_lock_child(dentry);
++		err = au_do_fsync_dir_no_file(dentry, datasync);
++	}
++	au_cpup_attr_timesizes(dentry->d_inode);
++	di_write_unlock(dentry);
++	if (file)
++		fi_write_unlock(file);
++
++	si_read_unlock(sb);
++	mutex_unlock(mtx);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int aufs_readdir(struct file *file, void *dirent, filldir_t filldir)
++{
++	int err;
++	struct dentry *dentry;
++	struct inode *inode, *h_inode;
++	struct super_block *sb;
++
++	dentry = file->f_dentry;
++	inode = dentry->d_inode;
++	IMustLock(inode);
++
++	sb = dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
++	if (unlikely(err))
++		goto out;
++	err = au_alive_dir(dentry);
++	if (!err)
++		err = au_vdir_init(file);
++	di_downgrade_lock(dentry, AuLock_IR);
++	if (unlikely(err))
++		goto out_unlock;
++
++	h_inode = au_h_iptr(inode, au_ibstart(inode));
++	if (!au_test_nfsd()) {
++		err = au_vdir_fill_de(file, dirent, filldir);
++		fsstack_copy_attr_atime(inode, h_inode);
++	} else {
++		/*
++		 * nfsd filldir may call lookup_one_len(), vfs_getattr(),
++		 * encode_fh() and others.
++		 */
++		atomic_inc(&h_inode->i_count);
++		di_read_unlock(dentry, AuLock_IR);
++		si_read_unlock(sb);
++		err = au_vdir_fill_de(file, dirent, filldir);
++		fsstack_copy_attr_atime(inode, h_inode);
++		fi_write_unlock(file);
++		iput(h_inode);
++
++		AuTraceErr(err);
++		return err;
++	}
++
++out_unlock:
++	di_read_unlock(dentry, AuLock_IR);
++	fi_write_unlock(file);
++out:
++	si_read_unlock(sb);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++#define AuTestEmpty_WHONLY	1
++#define AuTestEmpty_CALLED	(1 << 1)
++#define AuTestEmpty_SHWH	(1 << 2)
++#define au_ftest_testempty(flags, name)	((flags) & AuTestEmpty_##name)
++#define au_fset_testempty(flags, name) \
++	do { (flags) |= AuTestEmpty_##name; } while (0)
++#define au_fclr_testempty(flags, name) \
++	do { (flags) &= ~AuTestEmpty_##name; } while (0)
++
++#ifndef CONFIG_AUFS_SHWH
++#undef AuTestEmpty_SHWH
++#define AuTestEmpty_SHWH	0
++#endif
++
++struct test_empty_arg {
++	struct au_nhash *whlist;
++	unsigned int flags;
++	int err;
++	aufs_bindex_t bindex;
++};
++
++static int test_empty_cb(void *__arg, const char *__name, int namelen,
++			 loff_t offset __maybe_unused, u64 ino,
++			 unsigned int d_type)
++{
++	struct test_empty_arg *arg = __arg;
++	char *name = (void *)__name;
++
++	arg->err = 0;
++	au_fset_testempty(arg->flags, CALLED);
++	/* smp_mb(); */
++	if (name[0] == '.'
++	    && (namelen == 1 || (name[1] == '.' && namelen == 2)))
++		goto out; /* success */
++
++	if (namelen <= AUFS_WH_PFX_LEN
++	    || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
++		if (au_ftest_testempty(arg->flags, WHONLY)
++		    && !au_nhash_test_known_wh(arg->whlist, name, namelen))
++			arg->err = -ENOTEMPTY;
++		goto out;
++	}
++
++	name += AUFS_WH_PFX_LEN;
++	namelen -= AUFS_WH_PFX_LEN;
++	if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
++		arg->err = au_nhash_append_wh
++			(arg->whlist, name, namelen, ino, d_type, arg->bindex,
++			 au_ftest_testempty(arg->flags, SHWH));
++
++out:
++	/* smp_mb(); */
++	AuTraceErr(arg->err);
++	return arg->err;
++}
++
++static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
++{
++	int err;
++	struct file *h_file;
++
++	h_file = au_h_open(dentry, arg->bindex,
++			   O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
++			   /*file*/NULL);
++	err = PTR_ERR(h_file);
++	if (IS_ERR(h_file))
++		goto out;
++
++	err = 0;
++	if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
++	    && !h_file->f_dentry->d_inode->i_nlink)
++		goto out_put;
++
++	do {
++		arg->err = 0;
++		au_fclr_testempty(arg->flags, CALLED);
++		/* smp_mb(); */
++		err = vfsub_readdir(h_file, test_empty_cb, arg);
++		if (err >= 0)
++			err = arg->err;
++	} while (!err && au_ftest_testempty(arg->flags, CALLED));
++
++out_put:
++	fput(h_file);
++	au_sbr_put(dentry->d_sb, arg->bindex);
++out:
++	return err;
++}
++
++struct do_test_empty_args {
++	int *errp;
++	struct dentry *dentry;
++	struct test_empty_arg *arg;
++};
++
++static void call_do_test_empty(void *args)
++{
++	struct do_test_empty_args *a = args;
++	*a->errp = do_test_empty(a->dentry, a->arg);
++}
++
++static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
++{
++	int err, wkq_err;
++	struct dentry *h_dentry;
++	struct inode *h_inode;
++
++	h_dentry = au_h_dptr(dentry, arg->bindex);
++	h_inode = h_dentry->d_inode;
++	/* todo: i_mode changes anytime? */
++	mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++	err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
++	mutex_unlock(&h_inode->i_mutex);
++	if (!err)
++		err = do_test_empty(dentry, arg);
++	else {
++		struct do_test_empty_args args = {
++			.errp	= &err,
++			.dentry	= dentry,
++			.arg	= arg
++		};
++		unsigned int flags = arg->flags;
++
++		wkq_err = au_wkq_wait(call_do_test_empty, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++		arg->flags = flags;
++	}
++
++	return err;
++}
++
++int au_test_empty_lower(struct dentry *dentry)
++{
++	int err;
++	unsigned int rdhash;
++	aufs_bindex_t bindex, bstart, btail;
++	struct au_nhash whlist;
++	struct test_empty_arg arg;
++
++	SiMustAnyLock(dentry->d_sb);
++
++	rdhash = au_sbi(dentry->d_sb)->si_rdhash;
++	if (!rdhash)
++		rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
++	err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
++	if (unlikely(err))
++		goto out;
++
++	arg.flags = 0;
++	arg.whlist = &whlist;
++	bstart = au_dbstart(dentry);
++	if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
++		au_fset_testempty(arg.flags, SHWH);
++	arg.bindex = bstart;
++	err = do_test_empty(dentry, &arg);
++	if (unlikely(err))
++		goto out_whlist;
++
++	au_fset_testempty(arg.flags, WHONLY);
++	btail = au_dbtaildir(dentry);
++	for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
++		struct dentry *h_dentry;
++
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (h_dentry && h_dentry->d_inode) {
++			arg.bindex = bindex;
++			err = do_test_empty(dentry, &arg);
++		}
++	}
++
++out_whlist:
++	au_nhash_wh_free(&whlist);
++out:
++	return err;
++}
++
++int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
++{
++	int err;
++	struct test_empty_arg arg;
++	aufs_bindex_t bindex, btail;
++
++	err = 0;
++	arg.whlist = whlist;
++	arg.flags = AuTestEmpty_WHONLY;
++	if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
++		au_fset_testempty(arg.flags, SHWH);
++	btail = au_dbtaildir(dentry);
++	for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
++		struct dentry *h_dentry;
++
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (h_dentry && h_dentry->d_inode) {
++			arg.bindex = bindex;
++			err = sio_test_empty(dentry, &arg);
++		}
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++const struct file_operations aufs_dir_fop = {
++	.owner		= THIS_MODULE,
++	.llseek		= default_llseek,
++	.read		= generic_read_dir,
++	.readdir	= aufs_readdir,
++	.unlocked_ioctl	= aufs_ioctl_dir,
++#ifdef CONFIG_COMPAT
++	.compat_ioctl	= aufs_compat_ioctl_dir,
++#endif
++	.open		= aufs_open_dir,
++	.release	= aufs_release_dir,
++	.flush		= aufs_flush_dir,
++	.fsync		= aufs_fsync_dir
++};
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/dir.h linux-3.2.0-gentoo-r1/fs/aufs/dir.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/dir.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/dir.h	2012-01-17 12:11:24.604284671 +0100
+@@ -0,0 +1,137 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * directory operations
++ */
++
++#ifndef __AUFS_DIR_H__
++#define __AUFS_DIR_H__
++
++#ifdef __KERNEL__
++
++#include <linux/fs.h>
++
++/* ---------------------------------------------------------------------- */
++
++/* need to be faster and smaller */
++
++struct au_nhash {
++	unsigned int		nh_num;
++	struct hlist_head	*nh_head;
++};
++
++struct au_vdir_destr {
++	unsigned char	len;
++	unsigned char	name[0];
++} __packed;
++
++struct au_vdir_dehstr {
++	struct hlist_node	hash;
++	struct au_vdir_destr	*str;
++} ____cacheline_aligned_in_smp;
++
++struct au_vdir_de {
++	ino_t			de_ino;
++	unsigned char		de_type;
++	/* caution: packed */
++	struct au_vdir_destr	de_str;
++} __packed;
++
++struct au_vdir_wh {
++	struct hlist_node	wh_hash;
++#ifdef CONFIG_AUFS_SHWH
++	ino_t			wh_ino;
++	aufs_bindex_t		wh_bindex;
++	unsigned char		wh_type;
++#else
++	aufs_bindex_t		wh_bindex;
++#endif
++	/* caution: packed */
++	struct au_vdir_destr	wh_str;
++} __packed;
++
++union au_vdir_deblk_p {
++	unsigned char		*deblk;
++	struct au_vdir_de	*de;
++};
++
++struct au_vdir {
++	unsigned char	**vd_deblk;
++	unsigned long	vd_nblk;
++	struct {
++		unsigned long		ul;
++		union au_vdir_deblk_p	p;
++	} vd_last;
++
++	unsigned long	vd_version;
++	unsigned int	vd_deblk_sz;
++	unsigned long	vd_jiffy;
++} ____cacheline_aligned_in_smp;
++
++/* ---------------------------------------------------------------------- */
++
++/* dir.c */
++extern const struct file_operations aufs_dir_fop;
++void au_add_nlink(struct inode *dir, struct inode *h_dir);
++void au_sub_nlink(struct inode *dir, struct inode *h_dir);
++loff_t au_dir_size(struct file *file, struct dentry *dentry);
++int au_test_empty_lower(struct dentry *dentry);
++int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
++
++/* vdir.c */
++unsigned int au_rdhash_est(loff_t sz);
++int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
++void au_nhash_wh_free(struct au_nhash *whlist);
++int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
++			    int limit);
++int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
++int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
++		       unsigned int d_type, aufs_bindex_t bindex,
++		       unsigned char shwh);
++void au_vdir_free(struct au_vdir *vdir);
++int au_vdir_init(struct file *file);
++int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir);
++
++/* ioctl.c */
++long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
++
++#ifdef CONFIG_AUFS_RDU
++/* rdu.c */
++long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
++#ifdef CONFIG_COMPAT
++long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
++			 unsigned long arg);
++#endif
++#else
++static inline long au_rdu_ioctl(struct file *file, unsigned int cmd,
++				unsigned long arg)
++{
++	return -EINVAL;
++}
++#ifdef CONFIG_COMPAT
++static inline long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
++				       unsigned long arg)
++{
++	return -EINVAL;
++}
++#endif
++#endif
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_DIR_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/dynop.c linux-3.2.0-gentoo-r1/fs/aufs/dynop.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/dynop.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/dynop.c	2012-01-17 12:11:24.613544015 +0100
+@@ -0,0 +1,377 @@
++/*
++ * Copyright (C) 2010-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * dynamically customizable operations for regular files
++ */
++
++#include "aufs.h"
++
++#define DyPrSym(key)	AuDbgSym(key->dk_op.dy_hop)
++
++/*
++ * How large will these lists be?
++ * Usually just a few elements, 20-30 at most for each, I guess.
++ */
++static struct au_splhead dynop[AuDyLast];
++
++static struct au_dykey *dy_gfind_get(struct au_splhead *spl, const void *h_op)
++{
++	struct au_dykey *key, *tmp;
++	struct list_head *head;
++
++	key = NULL;
++	head = &spl->head;
++	rcu_read_lock();
++	list_for_each_entry_rcu(tmp, head, dk_list)
++		if (tmp->dk_op.dy_hop == h_op) {
++			key = tmp;
++			kref_get(&key->dk_kref);
++			break;
++		}
++	rcu_read_unlock();
++
++	return key;
++}
++
++static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
++{
++	struct au_dykey **k, *found;
++	const void *h_op = key->dk_op.dy_hop;
++	int i;
++
++	found = NULL;
++	k = br->br_dykey;
++	for (i = 0; i < AuBrDynOp; i++)
++		if (k[i]) {
++			if (k[i]->dk_op.dy_hop == h_op) {
++				found = k[i];
++				break;
++			}
++		} else
++			break;
++	if (!found) {
++		spin_lock(&br->br_dykey_lock);
++		for (; i < AuBrDynOp; i++)
++			if (k[i]) {
++				if (k[i]->dk_op.dy_hop == h_op) {
++					found = k[i];
++					break;
++				}
++			} else {
++				k[i] = key;
++				break;
++			}
++		spin_unlock(&br->br_dykey_lock);
++		BUG_ON(i == AuBrDynOp); /* expand the array */
++	}
++
++	return found;
++}
++
++/* kref_get() if @key is already added */
++static struct au_dykey *dy_gadd(struct au_splhead *spl, struct au_dykey *key)
++{
++	struct au_dykey *tmp, *found;
++	struct list_head *head;
++	const void *h_op = key->dk_op.dy_hop;
++
++	found = NULL;
++	head = &spl->head;
++	spin_lock(&spl->spin);
++	list_for_each_entry(tmp, head, dk_list)
++		if (tmp->dk_op.dy_hop == h_op) {
++			kref_get(&tmp->dk_kref);
++			found = tmp;
++			break;
++		}
++	if (!found)
++		list_add_rcu(&key->dk_list, head);
++	spin_unlock(&spl->spin);
++
++	if (!found)
++		DyPrSym(key);
++	return found;
++}
++
++static void dy_free_rcu(struct rcu_head *rcu)
++{
++	struct au_dykey *key;
++
++	key = container_of(rcu, struct au_dykey, dk_rcu);
++	DyPrSym(key);
++	kfree(key);
++}
++
++static void dy_free(struct kref *kref)
++{
++	struct au_dykey *key;
++	struct au_splhead *spl;
++
++	key = container_of(kref, struct au_dykey, dk_kref);
++	spl = dynop + key->dk_op.dy_type;
++	au_spl_del_rcu(&key->dk_list, spl);
++	call_rcu(&key->dk_rcu, dy_free_rcu);
++}
++
++void au_dy_put(struct au_dykey *key)
++{
++	kref_put(&key->dk_kref, dy_free);
++}
++
++/* ---------------------------------------------------------------------- */
++
++#define DyDbgSize(cnt, op)	AuDebugOn(cnt != sizeof(op)/sizeof(void *))
++
++#ifdef CONFIG_AUFS_DEBUG
++#define DyDbgDeclare(cnt)	unsigned int cnt = 0
++#define DyDbgInc(cnt)		do { cnt++; } while (0)
++#else
++#define DyDbgDeclare(cnt)	do {} while (0)
++#define DyDbgInc(cnt)		do {} while (0)
++#endif
++
++#define DySet(func, dst, src, h_op, h_sb) do {				\
++	DyDbgInc(cnt);							\
++	if (h_op->func) {						\
++		if (src.func)						\
++			dst.func = src.func;				\
++		else							\
++			AuDbg("%s %s\n", au_sbtype(h_sb), #func);	\
++	}								\
++} while (0)
++
++#define DySetForce(func, dst, src) do {		\
++	AuDebugOn(!src.func);			\
++	DyDbgInc(cnt);				\
++	dst.func = src.func;			\
++} while (0)
++
++#define DySetAop(func) \
++	DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
++#define DySetAopForce(func) \
++	DySetForce(func, dyaop->da_op, aufs_aop)
++
++static void dy_aop(struct au_dykey *key, const void *h_op,
++		   struct super_block *h_sb __maybe_unused)
++{
++	struct au_dyaop *dyaop = (void *)key;
++	const struct address_space_operations *h_aop = h_op;
++	DyDbgDeclare(cnt);
++
++	AuDbg("%s\n", au_sbtype(h_sb));
++
++	DySetAop(writepage);
++	DySetAopForce(readpage);	/* force */
++	DySetAop(writepages);
++	DySetAop(set_page_dirty);
++	DySetAop(readpages);
++	DySetAop(write_begin);
++	DySetAop(write_end);
++	DySetAop(bmap);
++	DySetAop(invalidatepage);
++	DySetAop(releasepage);
++	DySetAop(freepage);
++	/* these two will be changed according to an aufs mount option */
++	DySetAop(direct_IO);
++	DySetAop(get_xip_mem);
++	DySetAop(migratepage);
++	DySetAop(launder_page);
++	DySetAop(is_partially_uptodate);
++	DySetAop(error_remove_page);
++
++	DyDbgSize(cnt, *h_aop);
++	dyaop->da_get_xip_mem = h_aop->get_xip_mem;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void dy_bug(struct kref *kref)
++{
++	BUG();
++}
++
++static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
++{
++	struct au_dykey *key, *old;
++	struct au_splhead *spl;
++	struct op {
++		unsigned int sz;
++		void (*set)(struct au_dykey *key, const void *h_op,
++			    struct super_block *h_sb __maybe_unused);
++	};
++	static const struct op a[] = {
++		[AuDy_AOP] = {
++			.sz	= sizeof(struct au_dyaop),
++			.set	= dy_aop
++		}
++	};
++	const struct op *p;
++
++	spl = dynop + op->dy_type;
++	key = dy_gfind_get(spl, op->dy_hop);
++	if (key)
++		goto out_add; /* success */
++
++	p = a + op->dy_type;
++	key = kzalloc(p->sz, GFP_NOFS);
++	if (unlikely(!key)) {
++		key = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	key->dk_op.dy_hop = op->dy_hop;
++	kref_init(&key->dk_kref);
++	p->set(key, op->dy_hop, br->br_mnt->mnt_sb);
++	old = dy_gadd(spl, key);
++	if (old) {
++		kfree(key);
++		key = old;
++	}
++
++out_add:
++	old = dy_bradd(br, key);
++	if (old)
++		/* its ref-count should never be zero here */
++		kref_put(&key->dk_kref, dy_bug);
++out:
++	return key;
++}
++
++/* ---------------------------------------------------------------------- */
++/*
++ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
++ * This behaviour is neccessary to return an error from open(O_DIRECT) instead
++ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
++ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
++ * See the aufs manual in detail.
++ *
++ * To keep this behaviour, aufs has to set NULL to ->get_xip_mem too, and the
++ * performance of fadvise() and madvise() may be affected.
++ */
++static void dy_adx(struct au_dyaop *dyaop, int do_dx)
++{
++	if (!do_dx) {
++		dyaop->da_op.direct_IO = NULL;
++		dyaop->da_op.get_xip_mem = NULL;
++	} else {
++		dyaop->da_op.direct_IO = aufs_aop.direct_IO;
++		dyaop->da_op.get_xip_mem = aufs_aop.get_xip_mem;
++		if (!dyaop->da_get_xip_mem)
++			dyaop->da_op.get_xip_mem = NULL;
++	}
++}
++
++static struct au_dyaop *dy_aget(struct au_branch *br,
++				const struct address_space_operations *h_aop,
++				int do_dx)
++{
++	struct au_dyaop *dyaop;
++	struct au_dynop op;
++
++	op.dy_type = AuDy_AOP;
++	op.dy_haop = h_aop;
++	dyaop = (void *)dy_get(&op, br);
++	if (IS_ERR(dyaop))
++		goto out;
++	dy_adx(dyaop, do_dx);
++
++out:
++	return dyaop;
++}
++
++int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
++		struct inode *h_inode)
++{
++	int err, do_dx;
++	struct super_block *sb;
++	struct au_branch *br;
++	struct au_dyaop *dyaop;
++
++	AuDebugOn(!S_ISREG(h_inode->i_mode));
++	IiMustWriteLock(inode);
++
++	sb = inode->i_sb;
++	br = au_sbr(sb, bindex);
++	do_dx = !!au_opt_test(au_mntflags(sb), DIO);
++	dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
++	err = PTR_ERR(dyaop);
++	if (IS_ERR(dyaop))
++		/* unnecessary to call dy_fput() */
++		goto out;
++
++	err = 0;
++	inode->i_mapping->a_ops = &dyaop->da_op;
++
++out:
++	return err;
++}
++
++/*
++ * Is it safe to replace a_ops during the inode/file is in operation?
++ * Yes, I hope so.
++ */
++int au_dy_irefresh(struct inode *inode)
++{
++	int err;
++	aufs_bindex_t bstart;
++	struct inode *h_inode;
++
++	err = 0;
++	if (S_ISREG(inode->i_mode)) {
++		bstart = au_ibstart(inode);
++		h_inode = au_h_iptr(inode, bstart);
++		err = au_dy_iaop(inode, bstart, h_inode);
++	}
++	return err;
++}
++
++void au_dy_arefresh(int do_dx)
++{
++	struct au_splhead *spl;
++	struct list_head *head;
++	struct au_dykey *key;
++
++	spl = dynop + AuDy_AOP;
++	head = &spl->head;
++	spin_lock(&spl->spin);
++	list_for_each_entry(key, head, dk_list)
++		dy_adx((void *)key, do_dx);
++	spin_unlock(&spl->spin);
++}
++
++/* ---------------------------------------------------------------------- */
++
++void __init au_dy_init(void)
++{
++	int i;
++
++	/* make sure that 'struct au_dykey *' can be any type */
++	BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
++
++	for (i = 0; i < AuDyLast; i++)
++		au_spl_init(dynop + i);
++}
++
++void au_dy_fin(void)
++{
++	int i;
++
++	for (i = 0; i < AuDyLast; i++)
++		WARN_ON(!list_empty(&dynop[i].head));
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/dynop.h linux-3.2.0-gentoo-r1/fs/aufs/dynop.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/dynop.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/dynop.h	2012-01-17 12:11:24.613544015 +0100
+@@ -0,0 +1,76 @@
++/*
++ * Copyright (C) 2010-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * dynamically customizable operations (for regular files only)
++ */
++
++#ifndef __AUFS_DYNOP_H__
++#define __AUFS_DYNOP_H__
++
++#ifdef __KERNEL__
++
++#include "inode.h"
++
++enum {AuDy_AOP, AuDyLast};
++
++struct au_dynop {
++	int						dy_type;
++	union {
++		const void				*dy_hop;
++		const struct address_space_operations	*dy_haop;
++	};
++};
++
++struct au_dykey {
++	union {
++		struct list_head	dk_list;
++		struct rcu_head		dk_rcu;
++	};
++	struct au_dynop		dk_op;
++
++	/*
++	 * during I am in the branch local array, kref is gotten. when the
++	 * branch is removed, kref is put.
++	 */
++	struct kref		dk_kref;
++};
++
++/* stop unioning since their sizes are very different from each other */
++struct au_dyaop {
++	struct au_dykey			da_key;
++	struct address_space_operations	da_op; /* not const */
++	int (*da_get_xip_mem)(struct address_space *, pgoff_t, int,
++			      void **, unsigned long *);
++};
++
++/* ---------------------------------------------------------------------- */
++
++/* dynop.c */
++struct au_branch;
++void au_dy_put(struct au_dykey *key);
++int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
++		struct inode *h_inode);
++int au_dy_irefresh(struct inode *inode);
++void au_dy_arefresh(int do_dio);
++
++void __init au_dy_init(void);
++void au_dy_fin(void);
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_DYNOP_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/export.c linux-3.2.0-gentoo-r1/fs/aufs/export.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/export.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/export.c	2012-01-17 12:11:24.629747864 +0100
+@@ -0,0 +1,804 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * export via nfs
++ */
++
++#include <linux/exportfs.h>
++#include <linux/mnt_namespace.h>
++#include <linux/namei.h>
++#include <linux/nsproxy.h>
++#include <linux/random.h>
++#include <linux/writeback.h>
++#include "aufs.h"
++
++union conv {
++#ifdef CONFIG_AUFS_INO_T_64
++	__u32 a[2];
++#else
++	__u32 a[1];
++#endif
++	ino_t ino;
++};
++
++static ino_t decode_ino(__u32 *a)
++{
++	union conv u;
++
++	BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
++	u.a[0] = a[0];
++#ifdef CONFIG_AUFS_INO_T_64
++	u.a[1] = a[1];
++#endif
++	return u.ino;
++}
++
++static void encode_ino(__u32 *a, ino_t ino)
++{
++	union conv u;
++
++	u.ino = ino;
++	a[0] = u.a[0];
++#ifdef CONFIG_AUFS_INO_T_64
++	a[1] = u.a[1];
++#endif
++}
++
++/* NFS file handle */
++enum {
++	Fh_br_id,
++	Fh_sigen,
++#ifdef CONFIG_AUFS_INO_T_64
++	/* support 64bit inode number */
++	Fh_ino1,
++	Fh_ino2,
++	Fh_dir_ino1,
++	Fh_dir_ino2,
++#else
++	Fh_ino1,
++	Fh_dir_ino1,
++#endif
++	Fh_igen,
++	Fh_h_type,
++	Fh_tail,
++
++	Fh_ino = Fh_ino1,
++	Fh_dir_ino = Fh_dir_ino1
++};
++
++static int au_test_anon(struct dentry *dentry)
++{
++	/* note: read d_flags without d_lock */
++	return !!(dentry->d_flags & DCACHE_DISCONNECTED);
++}
++
++/* ---------------------------------------------------------------------- */
++/* inode generation external table */
++
++void au_xigen_inc(struct inode *inode)
++{
++	loff_t pos;
++	ssize_t sz;
++	__u32 igen;
++	struct super_block *sb;
++	struct au_sbinfo *sbinfo;
++
++	sb = inode->i_sb;
++	AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
++
++	sbinfo = au_sbi(sb);
++	pos = inode->i_ino;
++	pos *= sizeof(igen);
++	igen = inode->i_generation + 1;
++	sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
++			 sizeof(igen), &pos);
++	if (sz == sizeof(igen))
++		return; /* success */
++
++	if (unlikely(sz >= 0))
++		AuIOErr("xigen error (%zd)\n", sz);
++}
++
++int au_xigen_new(struct inode *inode)
++{
++	int err;
++	loff_t pos;
++	ssize_t sz;
++	struct super_block *sb;
++	struct au_sbinfo *sbinfo;
++	struct file *file;
++
++	err = 0;
++	/* todo: dirty, at mount time */
++	if (inode->i_ino == AUFS_ROOT_INO)
++		goto out;
++	sb = inode->i_sb;
++	SiMustAnyLock(sb);
++	if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
++		goto out;
++
++	err = -EFBIG;
++	pos = inode->i_ino;
++	if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
++		AuIOErr1("too large i%lld\n", pos);
++		goto out;
++	}
++	pos *= sizeof(inode->i_generation);
++
++	err = 0;
++	sbinfo = au_sbi(sb);
++	file = sbinfo->si_xigen;
++	BUG_ON(!file);
++
++	if (i_size_read(file->f_dentry->d_inode)
++	    < pos + sizeof(inode->i_generation)) {
++		inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
++		sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
++				 sizeof(inode->i_generation), &pos);
++	} else
++		sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
++				sizeof(inode->i_generation), &pos);
++	if (sz == sizeof(inode->i_generation))
++		goto out; /* success */
++
++	err = sz;
++	if (unlikely(sz >= 0)) {
++		err = -EIO;
++		AuIOErr("xigen error (%zd)\n", sz);
++	}
++
++out:
++	return err;
++}
++
++int au_xigen_set(struct super_block *sb, struct file *base)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++	struct file *file;
++
++	SiMustWriteLock(sb);
++
++	sbinfo = au_sbi(sb);
++	file = au_xino_create2(base, sbinfo->si_xigen);
++	err = PTR_ERR(file);
++	if (IS_ERR(file))
++		goto out;
++	err = 0;
++	if (sbinfo->si_xigen)
++		fput(sbinfo->si_xigen);
++	sbinfo->si_xigen = file;
++
++out:
++	return err;
++}
++
++void au_xigen_clr(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++
++	SiMustWriteLock(sb);
++
++	sbinfo = au_sbi(sb);
++	if (sbinfo->si_xigen) {
++		fput(sbinfo->si_xigen);
++		sbinfo->si_xigen = NULL;
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
++				    ino_t dir_ino)
++{
++	struct dentry *dentry, *d;
++	struct inode *inode;
++	unsigned int sigen;
++
++	dentry = NULL;
++	inode = ilookup(sb, ino);
++	if (!inode)
++		goto out;
++
++	dentry = ERR_PTR(-ESTALE);
++	sigen = au_sigen(sb);
++	if (unlikely(is_bad_inode(inode)
++		     || IS_DEADDIR(inode)
++		     || sigen != au_iigen(inode)))
++		goto out_iput;
++
++	dentry = NULL;
++	if (!dir_ino || S_ISDIR(inode->i_mode))
++		dentry = d_find_alias(inode);
++	else {
++		spin_lock(&inode->i_lock);
++		list_for_each_entry(d, &inode->i_dentry, d_alias) {
++			spin_lock(&d->d_lock);
++			if (!au_test_anon(d)
++			    && d->d_parent->d_inode->i_ino == dir_ino) {
++				dentry = dget_dlock(d);
++				spin_unlock(&d->d_lock);
++				break;
++			}
++			spin_unlock(&d->d_lock);
++		}
++		spin_unlock(&inode->i_lock);
++	}
++	if (unlikely(dentry && au_digen_test(dentry, sigen))) {
++		/* need to refresh */
++		dput(dentry);
++		dentry = NULL;
++	}
++
++out_iput:
++	iput(inode);
++out:
++	AuTraceErrPtr(dentry);
++	return dentry;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* todo: dirty? */
++/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
++
++struct au_compare_mnt_args {
++	/* input */
++	struct super_block *sb;
++
++	/* output */
++	struct vfsmount *mnt;
++};
++
++static int au_compare_mnt(struct vfsmount *mnt, void *arg)
++{
++	struct au_compare_mnt_args *a = arg;
++
++	if (mnt->mnt_sb != a->sb)
++		return 0;
++	a->mnt = mntget(mnt);
++	return 1;
++}
++
++static struct vfsmount *au_mnt_get(struct super_block *sb)
++{
++	int err;
++	struct au_compare_mnt_args args = {
++		.sb = sb
++	};
++	struct mnt_namespace *ns;
++
++	br_read_lock(vfsmount_lock);
++	/* no get/put ?? */
++	AuDebugOn(!current->nsproxy);
++	ns = current->nsproxy->mnt_ns;
++	AuDebugOn(!ns);
++	err = iterate_mounts(au_compare_mnt, &args, ns->root);
++	br_read_unlock(vfsmount_lock);
++	AuDebugOn(!err);
++	AuDebugOn(!args.mnt);
++	return args.mnt;
++}
++
++struct au_nfsd_si_lock {
++	unsigned int sigen;
++	aufs_bindex_t bindex, br_id;
++	unsigned char force_lock;
++};
++
++static int si_nfsd_read_lock(struct super_block *sb,
++			     struct au_nfsd_si_lock *nsi_lock)
++{
++	int err;
++	aufs_bindex_t bindex;
++
++	si_read_lock(sb, AuLock_FLUSH);
++
++	/* branch id may be wrapped around */
++	err = 0;
++	bindex = au_br_index(sb, nsi_lock->br_id);
++	if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
++		goto out; /* success */
++
++	err = -ESTALE;
++	bindex = -1;
++	if (!nsi_lock->force_lock)
++		si_read_unlock(sb);
++
++out:
++	nsi_lock->bindex = bindex;
++	return err;
++}
++
++struct find_name_by_ino {
++	int called, found;
++	ino_t ino;
++	char *name;
++	int namelen;
++};
++
++static int
++find_name_by_ino(void *arg, const char *name, int namelen, loff_t offset,
++		 u64 ino, unsigned int d_type)
++{
++	struct find_name_by_ino *a = arg;
++
++	a->called++;
++	if (a->ino != ino)
++		return 0;
++
++	memcpy(a->name, name, namelen);
++	a->namelen = namelen;
++	a->found = 1;
++	return 1;
++}
++
++static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
++				     struct au_nfsd_si_lock *nsi_lock)
++{
++	struct dentry *dentry, *parent;
++	struct file *file;
++	struct inode *dir;
++	struct find_name_by_ino arg;
++	int err;
++
++	parent = path->dentry;
++	if (nsi_lock)
++		si_read_unlock(parent->d_sb);
++	file = vfsub_dentry_open(path, au_dir_roflags);
++	dentry = (void *)file;
++	if (IS_ERR(file))
++		goto out;
++
++	dentry = ERR_PTR(-ENOMEM);
++	arg.name = __getname_gfp(GFP_NOFS);
++	if (unlikely(!arg.name))
++		goto out_file;
++	arg.ino = ino;
++	arg.found = 0;
++	do {
++		arg.called = 0;
++		/* smp_mb(); */
++		err = vfsub_readdir(file, find_name_by_ino, &arg);
++	} while (!err && !arg.found && arg.called);
++	dentry = ERR_PTR(err);
++	if (unlikely(err))
++		goto out_name;
++	dentry = ERR_PTR(-ENOENT);
++	if (!arg.found)
++		goto out_name;
++
++	/* do not call au_lkup_one() */
++	dir = parent->d_inode;
++	mutex_lock(&dir->i_mutex);
++	dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
++	mutex_unlock(&dir->i_mutex);
++	AuTraceErrPtr(dentry);
++	if (IS_ERR(dentry))
++		goto out_name;
++	AuDebugOn(au_test_anon(dentry));
++	if (unlikely(!dentry->d_inode)) {
++		dput(dentry);
++		dentry = ERR_PTR(-ENOENT);
++	}
++
++out_name:
++	__putname(arg.name);
++out_file:
++	fput(file);
++out:
++	if (unlikely(nsi_lock
++		     && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
++		if (!IS_ERR(dentry)) {
++			dput(dentry);
++			dentry = ERR_PTR(-ESTALE);
++		}
++	AuTraceErrPtr(dentry);
++	return dentry;
++}
++
++static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
++					ino_t dir_ino,
++					struct au_nfsd_si_lock *nsi_lock)
++{
++	struct dentry *dentry;
++	struct path path;
++
++	if (dir_ino != AUFS_ROOT_INO) {
++		path.dentry = decode_by_ino(sb, dir_ino, 0);
++		dentry = path.dentry;
++		if (!path.dentry || IS_ERR(path.dentry))
++			goto out;
++		AuDebugOn(au_test_anon(path.dentry));
++	} else
++		path.dentry = dget(sb->s_root);
++
++	path.mnt = au_mnt_get(sb);
++	dentry = au_lkup_by_ino(&path, ino, nsi_lock);
++	path_put(&path);
++
++out:
++	AuTraceErrPtr(dentry);
++	return dentry;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int h_acceptable(void *expv, struct dentry *dentry)
++{
++	return 1;
++}
++
++static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
++			   char *buf, int len, struct super_block *sb)
++{
++	char *p;
++	int n;
++	struct path path;
++
++	p = d_path(h_rootpath, buf, len);
++	if (IS_ERR(p))
++		goto out;
++	n = strlen(p);
++
++	path.mnt = h_rootpath->mnt;
++	path.dentry = h_parent;
++	p = d_path(&path, buf, len);
++	if (IS_ERR(p))
++		goto out;
++	if (n != 1)
++		p += n;
++
++	path.mnt = au_mnt_get(sb);
++	path.dentry = sb->s_root;
++	p = d_path(&path, buf, len - strlen(p));
++	mntput(path.mnt);
++	if (IS_ERR(p))
++		goto out;
++	if (n != 1)
++		p[strlen(p)] = '/';
++
++out:
++	AuTraceErrPtr(p);
++	return p;
++}
++
++static
++struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
++			      int fh_len, struct au_nfsd_si_lock *nsi_lock)
++{
++	struct dentry *dentry, *h_parent, *root;
++	struct super_block *h_sb;
++	char *pathname, *p;
++	struct vfsmount *h_mnt;
++	struct au_branch *br;
++	int err;
++	struct path path;
++
++	br = au_sbr(sb, nsi_lock->bindex);
++	h_mnt = br->br_mnt;
++	h_sb = h_mnt->mnt_sb;
++	/* todo: call lower fh_to_dentry()? fh_to_parent()? */
++	h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
++				      fh_len - Fh_tail, fh[Fh_h_type],
++				      h_acceptable, /*context*/NULL);
++	dentry = h_parent;
++	if (unlikely(!h_parent || IS_ERR(h_parent))) {
++		AuWarn1("%s decode_fh failed, %ld\n",
++			au_sbtype(h_sb), PTR_ERR(h_parent));
++		goto out;
++	}
++	dentry = NULL;
++	if (unlikely(au_test_anon(h_parent))) {
++		AuWarn1("%s decode_fh returned a disconnected dentry\n",
++			au_sbtype(h_sb));
++		goto out_h_parent;
++	}
++
++	dentry = ERR_PTR(-ENOMEM);
++	pathname = (void *)__get_free_page(GFP_NOFS);
++	if (unlikely(!pathname))
++		goto out_h_parent;
++
++	root = sb->s_root;
++	path.mnt = h_mnt;
++	di_read_lock_parent(root, !AuLock_IR);
++	path.dentry = au_h_dptr(root, nsi_lock->bindex);
++	di_read_unlock(root, !AuLock_IR);
++	p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
++	dentry = (void *)p;
++	if (IS_ERR(p))
++		goto out_pathname;
++
++	si_read_unlock(sb);
++	err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
++	dentry = ERR_PTR(err);
++	if (unlikely(err))
++		goto out_relock;
++
++	dentry = ERR_PTR(-ENOENT);
++	AuDebugOn(au_test_anon(path.dentry));
++	if (unlikely(!path.dentry->d_inode))
++		goto out_path;
++
++	if (ino != path.dentry->d_inode->i_ino)
++		dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
++	else
++		dentry = dget(path.dentry);
++
++out_path:
++	path_put(&path);
++out_relock:
++	if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
++		if (!IS_ERR(dentry)) {
++			dput(dentry);
++			dentry = ERR_PTR(-ESTALE);
++		}
++out_pathname:
++	free_page((unsigned long)pathname);
++out_h_parent:
++	dput(h_parent);
++out:
++	AuTraceErrPtr(dentry);
++	return dentry;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct dentry *
++aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
++		  int fh_type)
++{
++	struct dentry *dentry;
++	__u32 *fh = fid->raw;
++	struct au_branch *br;
++	ino_t ino, dir_ino;
++	struct au_nfsd_si_lock nsi_lock = {
++		.force_lock	= 0
++	};
++
++	dentry = ERR_PTR(-ESTALE);
++	/* it should never happen, but the file handle is unreliable */
++	if (unlikely(fh_len < Fh_tail))
++		goto out;
++	nsi_lock.sigen = fh[Fh_sigen];
++	nsi_lock.br_id = fh[Fh_br_id];
++
++	/* branch id may be wrapped around */
++	br = NULL;
++	if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
++		goto out;
++	nsi_lock.force_lock = 1;
++
++	/* is this inode still cached? */
++	ino = decode_ino(fh + Fh_ino);
++	/* it should never happen */
++	if (unlikely(ino == AUFS_ROOT_INO))
++		goto out;
++
++	dir_ino = decode_ino(fh + Fh_dir_ino);
++	dentry = decode_by_ino(sb, ino, dir_ino);
++	if (IS_ERR(dentry))
++		goto out_unlock;
++	if (dentry)
++		goto accept;
++
++	/* is the parent dir cached? */
++	br = au_sbr(sb, nsi_lock.bindex);
++	atomic_inc(&br->br_count);
++	dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
++	if (IS_ERR(dentry))
++		goto out_unlock;
++	if (dentry)
++		goto accept;
++
++	/* lookup path */
++	dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
++	if (IS_ERR(dentry))
++		goto out_unlock;
++	if (unlikely(!dentry))
++		/* todo?: make it ESTALE */
++		goto out_unlock;
++
++accept:
++	if (!au_digen_test(dentry, au_sigen(sb))
++	    && dentry->d_inode->i_generation == fh[Fh_igen])
++		goto out_unlock; /* success */
++
++	dput(dentry);
++	dentry = ERR_PTR(-ESTALE);
++out_unlock:
++	if (br)
++		atomic_dec(&br->br_count);
++	si_read_unlock(sb);
++out:
++	AuTraceErrPtr(dentry);
++	return dentry;
++}
++
++#if 0 /* reserved for future use */
++/* support subtreecheck option */
++static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
++					int fh_len, int fh_type)
++{
++	struct dentry *parent;
++	__u32 *fh = fid->raw;
++	ino_t dir_ino;
++
++	dir_ino = decode_ino(fh + Fh_dir_ino);
++	parent = decode_by_ino(sb, dir_ino, 0);
++	if (IS_ERR(parent))
++		goto out;
++	if (!parent)
++		parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
++					dir_ino, fh, fh_len);
++
++out:
++	AuTraceErrPtr(parent);
++	return parent;
++}
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++static int aufs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len,
++			  int connectable)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	struct super_block *sb, *h_sb;
++	struct inode *inode;
++	struct dentry *parent, *h_parent;
++	struct au_branch *br;
++
++	AuDebugOn(au_test_anon(dentry));
++
++	parent = NULL;
++	err = -ENOSPC;
++	if (unlikely(*max_len <= Fh_tail)) {
++		AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
++		goto out;
++	}
++
++	err = FILEID_ROOT;
++	if (IS_ROOT(dentry)) {
++		AuDebugOn(dentry->d_inode->i_ino != AUFS_ROOT_INO);
++		goto out;
++	}
++
++	h_parent = NULL;
++	err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_IR | AuLock_GEN);
++	if (unlikely(err))
++		goto out;
++
++	inode = dentry->d_inode;
++	AuDebugOn(!inode);
++	sb = dentry->d_sb;
++#ifdef CONFIG_AUFS_DEBUG
++	if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
++		AuWarn1("NFS-exporting requires xino\n");
++#endif
++	err = -EIO;
++	parent = dget_parent(dentry);
++	di_read_lock_parent(parent, !AuLock_IR);
++	bend = au_dbtaildir(parent);
++	for (bindex = au_dbstart(parent); bindex <= bend; bindex++) {
++		h_parent = au_h_dptr(parent, bindex);
++		if (h_parent) {
++			dget(h_parent);
++			break;
++		}
++	}
++	if (unlikely(!h_parent))
++		goto out_unlock;
++
++	err = -EPERM;
++	br = au_sbr(sb, bindex);
++	h_sb = br->br_mnt->mnt_sb;
++	if (unlikely(!h_sb->s_export_op)) {
++		AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
++		goto out_dput;
++	}
++
++	fh[Fh_br_id] = br->br_id;
++	fh[Fh_sigen] = au_sigen(sb);
++	encode_ino(fh + Fh_ino, inode->i_ino);
++	encode_ino(fh + Fh_dir_ino, parent->d_inode->i_ino);
++	fh[Fh_igen] = inode->i_generation;
++
++	*max_len -= Fh_tail;
++	fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
++					   max_len,
++					   /*connectable or subtreecheck*/0);
++	err = fh[Fh_h_type];
++	*max_len += Fh_tail;
++	/* todo: macros? */
++	if (err != 255)
++		err = 99;
++	else
++		AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
++
++out_dput:
++	dput(h_parent);
++out_unlock:
++	di_read_unlock(parent, !AuLock_IR);
++	dput(parent);
++	aufs_read_unlock(dentry, AuLock_IR);
++out:
++	if (unlikely(err < 0))
++		err = 255;
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int aufs_commit_metadata(struct inode *inode)
++{
++	int err;
++	aufs_bindex_t bindex;
++	struct super_block *sb;
++	struct inode *h_inode;
++	int (*f)(struct inode *inode);
++
++	sb = inode->i_sb;
++	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++	ii_write_lock_child(inode);
++	bindex = au_ibstart(inode);
++	AuDebugOn(bindex < 0);
++	h_inode = au_h_iptr(inode, bindex);
++
++	f = h_inode->i_sb->s_export_op->commit_metadata;
++	if (f)
++		err = f(h_inode);
++	else {
++		struct writeback_control wbc = {
++			.sync_mode	= WB_SYNC_ALL,
++			.nr_to_write	= 0 /* metadata only */
++		};
++
++		err = sync_inode(h_inode, &wbc);
++	}
++
++	au_cpup_attr_timesizes(inode);
++	ii_write_unlock(inode);
++	si_read_unlock(sb);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct export_operations aufs_export_op = {
++	.fh_to_dentry		= aufs_fh_to_dentry,
++	/* .fh_to_parent	= aufs_fh_to_parent, */
++	.encode_fh		= aufs_encode_fh,
++	.commit_metadata	= aufs_commit_metadata
++};
++
++void au_export_init(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++	__u32 u;
++
++	sb->s_export_op = &aufs_export_op;
++	sbinfo = au_sbi(sb);
++	sbinfo->si_xigen = NULL;
++	get_random_bytes(&u, sizeof(u));
++	BUILD_BUG_ON(sizeof(u) != sizeof(int));
++	atomic_set(&sbinfo->si_xigen_next, u);
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/file.c linux-3.2.0-gentoo-r1/fs/aufs/file.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/file.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/file.c	2012-01-17 12:11:24.655211057 +0100
+@@ -0,0 +1,673 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * handling file/dir, and address_space operation
++ */
++
++#include <linux/pagemap.h>
++#include "aufs.h"
++
++/* drop flags for writing */
++unsigned int au_file_roflags(unsigned int flags)
++{
++	flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
++	flags |= O_RDONLY | O_NOATIME;
++	return flags;
++}
++
++/* common functions to regular file and dir */
++struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
++		       struct file *file)
++{
++	struct file *h_file;
++	struct dentry *h_dentry;
++	struct inode *h_inode;
++	struct super_block *sb;
++	struct au_branch *br;
++	struct path h_path;
++	int err, exec_flag;
++
++	/* a race condition can happen between open and unlink/rmdir */
++	h_file = ERR_PTR(-ENOENT);
++	h_dentry = au_h_dptr(dentry, bindex);
++	if (au_test_nfsd() && !h_dentry)
++		goto out;
++	h_inode = h_dentry->d_inode;
++	if (au_test_nfsd() && !h_inode)
++		goto out;
++	spin_lock(&h_dentry->d_lock);
++	err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
++		|| !h_inode
++		/* || !dentry->d_inode->i_nlink */
++		;
++	spin_unlock(&h_dentry->d_lock);
++	if (unlikely(err))
++		goto out;
++
++	sb = dentry->d_sb;
++	br = au_sbr(sb, bindex);
++	h_file = ERR_PTR(-EACCES);
++	exec_flag = flags & __FMODE_EXEC;
++	if (exec_flag && (br->br_mnt->mnt_flags & MNT_NOEXEC))
++		goto out;
++
++	/* drop flags for writing */
++	if (au_test_ro(sb, bindex, dentry->d_inode))
++		flags = au_file_roflags(flags);
++	flags &= ~O_CREAT;
++	atomic_inc(&br->br_count);
++	h_path.dentry = h_dentry;
++	h_path.mnt = br->br_mnt;
++	if (!au_special_file(h_inode->i_mode))
++		h_file = vfsub_dentry_open(&h_path, flags);
++	else {
++		/* this block depends upon the configuration */
++		di_read_unlock(dentry, AuLock_IR);
++		fi_write_unlock(file);
++		si_read_unlock(sb);
++		h_file = vfsub_dentry_open(&h_path, flags);
++		si_noflush_read_lock(sb);
++		fi_write_lock(file);
++		di_read_lock_child(dentry, AuLock_IR);
++	}
++	if (IS_ERR(h_file))
++		goto out_br;
++
++	if (exec_flag) {
++		err = deny_write_access(h_file);
++		if (unlikely(err)) {
++			fput(h_file);
++			h_file = ERR_PTR(err);
++			goto out_br;
++		}
++	}
++	fsnotify_open(h_file);
++	goto out; /* success */
++
++out_br:
++	atomic_dec(&br->br_count);
++out:
++	return h_file;
++}
++
++int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
++	       struct au_fidir *fidir)
++{
++	int err;
++	struct dentry *dentry;
++
++	err = au_finfo_init(file, fidir);
++	if (unlikely(err))
++		goto out;
++
++	dentry = file->f_dentry;
++	di_read_lock_child(dentry, AuLock_IR);
++	err = open(file, vfsub_file_flags(file));
++	di_read_unlock(dentry, AuLock_IR);
++
++	fi_write_unlock(file);
++	if (unlikely(err)) {
++		au_fi(file)->fi_hdir = NULL;
++		au_finfo_fin(file);
++	}
++
++out:
++	return err;
++}
++
++int au_reopen_nondir(struct file *file)
++{
++	int err;
++	aufs_bindex_t bstart;
++	struct dentry *dentry;
++	struct file *h_file, *h_file_tmp;
++
++	dentry = file->f_dentry;
++	AuDebugOn(au_special_file(dentry->d_inode->i_mode));
++	bstart = au_dbstart(dentry);
++	h_file_tmp = NULL;
++	if (au_fbstart(file) == bstart) {
++		h_file = au_hf_top(file);
++		if (file->f_mode == h_file->f_mode)
++			return 0; /* success */
++		h_file_tmp = h_file;
++		get_file(h_file_tmp);
++		au_set_h_fptr(file, bstart, NULL);
++	}
++	AuDebugOn(au_fi(file)->fi_hdir);
++	AuDebugOn(au_fbstart(file) < bstart);
++
++	h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC,
++			   file);
++	err = PTR_ERR(h_file);
++	if (IS_ERR(h_file))
++		goto out; /* todo: close all? */
++
++	err = 0;
++	au_set_fbstart(file, bstart);
++	au_set_h_fptr(file, bstart, h_file);
++	au_update_figen(file);
++	/* todo: necessary? */
++	/* file->f_ra = h_file->f_ra; */
++
++out:
++	if (h_file_tmp)
++		fput(h_file_tmp);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
++			struct dentry *hi_wh)
++{
++	int err;
++	aufs_bindex_t bstart;
++	struct au_dinfo *dinfo;
++	struct dentry *h_dentry;
++	struct au_hdentry *hdp;
++
++	dinfo = au_di(file->f_dentry);
++	AuRwMustWriteLock(&dinfo->di_rwsem);
++
++	bstart = dinfo->di_bstart;
++	dinfo->di_bstart = btgt;
++	hdp = dinfo->di_hdentry;
++	h_dentry = hdp[0 + btgt].hd_dentry;
++	hdp[0 + btgt].hd_dentry = hi_wh;
++	err = au_reopen_nondir(file);
++	hdp[0 + btgt].hd_dentry = h_dentry;
++	dinfo->di_bstart = bstart;
++
++	return err;
++}
++
++static int au_ready_to_write_wh(struct file *file, loff_t len,
++				aufs_bindex_t bcpup)
++{
++	int err;
++	struct inode *inode, *h_inode;
++	struct dentry *dentry, *h_dentry, *hi_wh;
++
++	dentry = file->f_dentry;
++	au_update_dbstart(dentry);
++	inode = dentry->d_inode;
++	h_inode = NULL;
++	if (au_dbstart(dentry) <= bcpup && au_dbend(dentry) >= bcpup) {
++		h_dentry = au_h_dptr(dentry, bcpup);
++		if (h_dentry)
++			h_inode = h_dentry->d_inode;
++	}
++	hi_wh = au_hi_wh(inode, bcpup);
++	if (!hi_wh && !h_inode)
++		err = au_sio_cpup_wh(dentry, bcpup, len, file);
++	else
++		/* already copied-up after unlink */
++		err = au_reopen_wh(file, bcpup, hi_wh);
++
++	if (!err
++	    && inode->i_nlink > 1
++	    && au_opt_test(au_mntflags(dentry->d_sb), PLINK))
++		au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup));
++
++	return err;
++}
++
++/*
++ * prepare the @file for writing.
++ */
++int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
++{
++	int err;
++	aufs_bindex_t bstart, bcpup, dbstart;
++	struct dentry *dentry, *parent, *h_dentry;
++	struct inode *h_inode, *inode;
++	struct super_block *sb;
++	struct file *h_file;
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	AuDebugOn(au_special_file(inode->i_mode));
++	bstart = au_fbstart(file);
++	err = au_test_ro(sb, bstart, inode);
++	if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
++		err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0);
++		goto out;
++	}
++
++	/* need to cpup or reopen */
++	parent = dget_parent(dentry);
++	di_write_lock_parent(parent);
++	err = AuWbrCopyup(au_sbi(sb), dentry);
++	bcpup = err;
++	if (unlikely(err < 0))
++		goto out_dgrade;
++	err = 0;
++
++	if (!d_unhashed(dentry) && !au_h_dptr(parent, bcpup)) {
++		err = au_cpup_dirs(dentry, bcpup);
++		if (unlikely(err))
++			goto out_dgrade;
++	}
++
++	err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE,
++		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++	if (unlikely(err))
++		goto out_dgrade;
++
++	h_dentry = au_hf_top(file)->f_dentry;
++	h_inode = h_dentry->d_inode;
++	dbstart = au_dbstart(dentry);
++	if (dbstart <= bcpup) {
++		h_dentry = au_h_dptr(dentry, bcpup);
++		AuDebugOn(!h_dentry);
++		h_inode = h_dentry->d_inode;
++		AuDebugOn(!h_inode);
++		bstart = bcpup;
++	}
++
++	if (dbstart <= bcpup		/* just reopen */
++	    || !d_unhashed(dentry)	/* copyup and reopen */
++		) {
++		mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++		h_file = au_h_open_pre(dentry, bstart);
++		if (IS_ERR(h_file)) {
++			err = PTR_ERR(h_file);
++			h_file = NULL;
++		} else {
++			di_downgrade_lock(parent, AuLock_IR);
++			if (dbstart > bcpup)
++				err = au_sio_cpup_simple(dentry, bcpup, len,
++							 AuCpup_DTIME);
++			if (!err)
++				err = au_reopen_nondir(file);
++		}
++		mutex_unlock(&h_inode->i_mutex);
++		au_h_open_post(dentry, bstart, h_file);
++	} else {			/* copyup as wh and reopen */
++		/*
++		 * since writable hfsplus branch is not supported,
++		 * h_open_pre/post() are unnecessary.
++		 */
++		mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++		err = au_ready_to_write_wh(file, len, bcpup);
++		di_downgrade_lock(parent, AuLock_IR);
++		mutex_unlock(&h_inode->i_mutex);
++	}
++
++	if (!err) {
++		au_pin_set_parent_lflag(pin, /*lflag*/0);
++		goto out_dput; /* success */
++	}
++	au_unpin(pin);
++	goto out_unlock;
++
++out_dgrade:
++	di_downgrade_lock(parent, AuLock_IR);
++out_unlock:
++	di_read_unlock(parent, AuLock_IR);
++out_dput:
++	dput(parent);
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_do_flush(struct file *file, fl_owner_t id,
++		int (*flush)(struct file *file, fl_owner_t id))
++{
++	int err;
++	struct dentry *dentry;
++	struct super_block *sb;
++	struct inode *inode;
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	si_noflush_read_lock(sb);
++	fi_read_lock(file);
++	ii_read_lock_child(inode);
++
++	err = flush(file, id);
++	au_cpup_attr_timesizes(inode);
++
++	ii_read_unlock(inode);
++	fi_read_unlock(file);
++	si_read_unlock(sb);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
++{
++	int err;
++	aufs_bindex_t bstart;
++	struct au_pin pin;
++	struct au_finfo *finfo;
++	struct dentry *dentry, *parent, *hi_wh;
++	struct inode *inode;
++	struct super_block *sb;
++
++	FiMustWriteLock(file);
++
++	err = 0;
++	finfo = au_fi(file);
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	bstart = au_ibstart(inode);
++	if (bstart == finfo->fi_btop || IS_ROOT(dentry))
++		goto out;
++
++	parent = dget_parent(dentry);
++	if (au_test_ro(sb, bstart, inode)) {
++		di_read_lock_parent(parent, !AuLock_IR);
++		err = AuWbrCopyup(au_sbi(sb), dentry);
++		bstart = err;
++		di_read_unlock(parent, !AuLock_IR);
++		if (unlikely(err < 0))
++			goto out_parent;
++		err = 0;
++	}
++
++	di_read_lock_parent(parent, AuLock_IR);
++	hi_wh = au_hi_wh(inode, bstart);
++	if (!S_ISDIR(inode->i_mode)
++	    && au_opt_test(au_mntflags(sb), PLINK)
++	    && au_plink_test(inode)
++	    && !d_unhashed(dentry)) {
++		err = au_test_and_cpup_dirs(dentry, bstart);
++		if (unlikely(err))
++			goto out_unlock;
++
++		/* always superio. */
++		err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE,
++			     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++		if (!err)
++			err = au_sio_cpup_simple(dentry, bstart, -1,
++						 AuCpup_DTIME);
++		au_unpin(&pin);
++	} else if (hi_wh) {
++		/* already copied-up after unlink */
++		err = au_reopen_wh(file, bstart, hi_wh);
++		*need_reopen = 0;
++	}
++
++out_unlock:
++	di_read_unlock(parent, AuLock_IR);
++out_parent:
++	dput(parent);
++out:
++	return err;
++}
++
++static void au_do_refresh_dir(struct file *file)
++{
++	aufs_bindex_t bindex, bend, new_bindex, brid;
++	struct au_hfile *p, tmp, *q;
++	struct au_finfo *finfo;
++	struct super_block *sb;
++	struct au_fidir *fidir;
++
++	FiMustWriteLock(file);
++
++	sb = file->f_dentry->d_sb;
++	finfo = au_fi(file);
++	fidir = finfo->fi_hdir;
++	AuDebugOn(!fidir);
++	p = fidir->fd_hfile + finfo->fi_btop;
++	brid = p->hf_br->br_id;
++	bend = fidir->fd_bbot;
++	for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) {
++		if (!p->hf_file)
++			continue;
++
++		new_bindex = au_br_index(sb, p->hf_br->br_id);
++		if (new_bindex == bindex)
++			continue;
++		if (new_bindex < 0) {
++			au_set_h_fptr(file, bindex, NULL);
++			continue;
++		}
++
++		/* swap two lower inode, and loop again */
++		q = fidir->fd_hfile + new_bindex;
++		tmp = *q;
++		*q = *p;
++		*p = tmp;
++		if (tmp.hf_file) {
++			bindex--;
++			p--;
++		}
++	}
++
++	p = fidir->fd_hfile;
++	if (!au_test_mmapped(file) && !d_unlinked(file->f_dentry)) {
++		bend = au_sbend(sb);
++		for (finfo->fi_btop = 0; finfo->fi_btop <= bend;
++		     finfo->fi_btop++, p++)
++			if (p->hf_file) {
++				if (p->hf_file->f_dentry
++				    && p->hf_file->f_dentry->d_inode)
++					break;
++				else
++					au_hfput(p, file);
++			}
++	} else {
++		bend = au_br_index(sb, brid);
++		for (finfo->fi_btop = 0; finfo->fi_btop < bend;
++		     finfo->fi_btop++, p++)
++			if (p->hf_file)
++				au_hfput(p, file);
++		bend = au_sbend(sb);
++	}
++
++	p = fidir->fd_hfile + bend;
++	for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop;
++	     fidir->fd_bbot--, p--)
++		if (p->hf_file) {
++			if (p->hf_file->f_dentry
++			    && p->hf_file->f_dentry->d_inode)
++				break;
++			else
++				au_hfput(p, file);
++		}
++	AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
++}
++
++/*
++ * after branch manipulating, refresh the file.
++ */
++static int refresh_file(struct file *file, int (*reopen)(struct file *file))
++{
++	int err, need_reopen;
++	aufs_bindex_t bend, bindex;
++	struct dentry *dentry;
++	struct au_finfo *finfo;
++	struct au_hfile *hfile;
++
++	dentry = file->f_dentry;
++	finfo = au_fi(file);
++	if (!finfo->fi_hdir) {
++		hfile = &finfo->fi_htop;
++		AuDebugOn(!hfile->hf_file);
++		bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id);
++		AuDebugOn(bindex < 0);
++		if (bindex != finfo->fi_btop)
++			au_set_fbstart(file, bindex);
++	} else {
++		err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1);
++		if (unlikely(err))
++			goto out;
++		au_do_refresh_dir(file);
++	}
++
++	err = 0;
++	need_reopen = 1;
++	if (!au_test_mmapped(file))
++		err = au_file_refresh_by_inode(file, &need_reopen);
++	if (!err && need_reopen && !d_unlinked(dentry))
++		err = reopen(file);
++	if (!err) {
++		au_update_figen(file);
++		goto out; /* success */
++	}
++
++	/* error, close all lower files */
++	if (finfo->fi_hdir) {
++		bend = au_fbend_dir(file);
++		for (bindex = au_fbstart(file); bindex <= bend; bindex++)
++			au_set_h_fptr(file, bindex, NULL);
++	}
++
++out:
++	return err;
++}
++
++/* common function to regular file and dir */
++int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
++			  int wlock)
++{
++	int err;
++	unsigned int sigen, figen;
++	aufs_bindex_t bstart;
++	unsigned char pseudo_link;
++	struct dentry *dentry;
++	struct inode *inode;
++
++	err = 0;
++	dentry = file->f_dentry;
++	inode = dentry->d_inode;
++	AuDebugOn(au_special_file(inode->i_mode));
++	sigen = au_sigen(dentry->d_sb);
++	fi_write_lock(file);
++	figen = au_figen(file);
++	di_write_lock_child(dentry);
++	bstart = au_dbstart(dentry);
++	pseudo_link = (bstart != au_ibstart(inode));
++	if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
++		if (!wlock) {
++			di_downgrade_lock(dentry, AuLock_IR);
++			fi_downgrade_lock(file);
++		}
++		goto out; /* success */
++	}
++
++	AuDbg("sigen %d, figen %d\n", sigen, figen);
++	if (au_digen_test(dentry, sigen)) {
++		err = au_reval_dpath(dentry, sigen);
++		AuDebugOn(!err && au_digen_test(dentry, sigen));
++	}
++
++	if (!err)
++		err = refresh_file(file, reopen);
++	if (!err) {
++		if (!wlock) {
++			di_downgrade_lock(dentry, AuLock_IR);
++			fi_downgrade_lock(file);
++		}
++	} else {
++		di_write_unlock(dentry);
++		fi_write_unlock(file);
++	}
++
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* cf. aufs_nopage() */
++/* for madvise(2) */
++static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
++{
++	unlock_page(page);
++	return 0;
++}
++
++/* it will never be called, but necessary to support O_DIRECT */
++static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb,
++			      const struct iovec *iov, loff_t offset,
++			      unsigned long nr_segs)
++{ BUG(); return 0; }
++
++/*
++ * it will never be called, but madvise and fadvise behaves differently
++ * when get_xip_mem is defined
++ */
++static int aufs_get_xip_mem(struct address_space *mapping, pgoff_t pgoff,
++			    int create, void **kmem, unsigned long *pfn)
++{ BUG(); return 0; }
++
++/* they will never be called. */
++#ifdef CONFIG_AUFS_DEBUG
++static int aufs_write_begin(struct file *file, struct address_space *mapping,
++			    loff_t pos, unsigned len, unsigned flags,
++			    struct page **pagep, void **fsdata)
++{ AuUnsupport(); return 0; }
++static int aufs_write_end(struct file *file, struct address_space *mapping,
++			  loff_t pos, unsigned len, unsigned copied,
++			  struct page *page, void *fsdata)
++{ AuUnsupport(); return 0; }
++static int aufs_writepage(struct page *page, struct writeback_control *wbc)
++{ AuUnsupport(); return 0; }
++
++static int aufs_set_page_dirty(struct page *page)
++{ AuUnsupport(); return 0; }
++static void aufs_invalidatepage(struct page *page, unsigned long offset)
++{ AuUnsupport(); }
++static int aufs_releasepage(struct page *page, gfp_t gfp)
++{ AuUnsupport(); return 0; }
++static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
++			    struct page *page)
++{ AuUnsupport(); return 0; }
++static int aufs_launder_page(struct page *page)
++{ AuUnsupport(); return 0; }
++static int aufs_is_partially_uptodate(struct page *page,
++				      read_descriptor_t *desc,
++				      unsigned long from)
++{ AuUnsupport(); return 0; }
++static int aufs_error_remove_page(struct address_space *mapping,
++				  struct page *page)
++{ AuUnsupport(); return 0; }
++#endif /* CONFIG_AUFS_DEBUG */
++
++const struct address_space_operations aufs_aop = {
++	.readpage		= aufs_readpage,
++	.direct_IO		= aufs_direct_IO,
++	.get_xip_mem		= aufs_get_xip_mem,
++#ifdef CONFIG_AUFS_DEBUG
++	.writepage		= aufs_writepage,
++	/* no writepages, because of writepage */
++	.set_page_dirty		= aufs_set_page_dirty,
++	/* no readpages, because of readpage */
++	.write_begin		= aufs_write_begin,
++	.write_end		= aufs_write_end,
++	/* no bmap, no block device */
++	.invalidatepage		= aufs_invalidatepage,
++	.releasepage		= aufs_releasepage,
++	.migratepage		= aufs_migratepage,
++	.launder_page		= aufs_launder_page,
++	.is_partially_uptodate	= aufs_is_partially_uptodate,
++	.error_remove_page	= aufs_error_remove_page
++#endif /* CONFIG_AUFS_DEBUG */
++};
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/file.h linux-3.2.0-gentoo-r1/fs/aufs/file.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/file.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/file.h	2012-01-17 12:11:24.664470399 +0100
+@@ -0,0 +1,298 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * file operations
++ */
++
++#ifndef __AUFS_FILE_H__
++#define __AUFS_FILE_H__
++
++#ifdef __KERNEL__
++
++#include <linux/file.h>
++#include <linux/fs.h>
++#include <linux/poll.h>
++#include "rwsem.h"
++
++struct au_branch;
++struct au_hfile {
++	struct file		*hf_file;
++	struct au_branch	*hf_br;
++};
++
++struct au_vdir;
++struct au_fidir {
++	aufs_bindex_t		fd_bbot;
++	aufs_bindex_t		fd_nent;
++	struct au_vdir		*fd_vdir_cache;
++	struct au_hfile		fd_hfile[];
++};
++
++static inline int au_fidir_sz(int nent)
++{
++	AuDebugOn(nent < 0);
++	return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
++}
++
++struct au_finfo {
++	atomic_t		fi_generation;
++
++	struct au_rwsem		fi_rwsem;
++	aufs_bindex_t		fi_btop;
++
++	/* do not union them */
++	struct {				/* for non-dir */
++		struct au_hfile			fi_htop;
++		atomic_t			fi_mmapped;
++	};
++	struct au_fidir		*fi_hdir;	/* for dir only */
++} ____cacheline_aligned_in_smp;
++
++/* ---------------------------------------------------------------------- */
++
++/* file.c */
++extern const struct address_space_operations aufs_aop;
++unsigned int au_file_roflags(unsigned int flags);
++struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
++		       struct file *file);
++int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
++	       struct au_fidir *fidir);
++int au_reopen_nondir(struct file *file);
++struct au_pin;
++int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
++int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
++			  int wlock);
++int au_do_flush(struct file *file, fl_owner_t id,
++		int (*flush)(struct file *file, fl_owner_t id));
++
++/* poll.c */
++#ifdef CONFIG_AUFS_POLL
++unsigned int aufs_poll(struct file *file, poll_table *wait);
++#endif
++
++#ifdef CONFIG_AUFS_BR_HFSPLUS
++/* hfsplus.c */
++struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex);
++void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
++		    struct file *h_file);
++#else
++static inline
++struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	return NULL;
++}
++
++AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
++	   struct file *h_file);
++#endif
++
++/* f_op.c */
++extern const struct file_operations aufs_file_fop;
++int au_do_open_nondir(struct file *file, int flags);
++int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
++
++#ifdef CONFIG_AUFS_SP_IATTR
++/* f_op_sp.c */
++int au_special_file(umode_t mode);
++void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev);
++#else
++AuStubInt0(au_special_file, umode_t mode)
++static inline void au_init_special_fop(struct inode *inode, umode_t mode,
++				       dev_t rdev)
++{
++	init_special_inode(inode, mode, rdev);
++}
++#endif
++
++/* finfo.c */
++void au_hfput(struct au_hfile *hf, struct file *file);
++void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
++		   struct file *h_file);
++
++void au_update_figen(struct file *file);
++struct au_fidir *au_fidir_alloc(struct super_block *sb);
++int au_fidir_realloc(struct au_finfo *finfo, int nbr);
++
++void au_fi_init_once(void *_fi);
++void au_finfo_fin(struct file *file);
++int au_finfo_init(struct file *file, struct au_fidir *fidir);
++
++/* ioctl.c */
++long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
++#ifdef CONFIG_COMPAT
++long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
++			   unsigned long arg);
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++static inline struct au_finfo *au_fi(struct file *file)
++{
++	return file->private_data;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * fi_read_lock, fi_write_lock,
++ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
++ */
++AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
++
++#define FiMustNoWaiters(f)	AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
++#define FiMustAnyLock(f)	AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
++#define FiMustWriteLock(f)	AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
++
++/* ---------------------------------------------------------------------- */
++
++/* todo: hard/soft set? */
++static inline aufs_bindex_t au_fbstart(struct file *file)
++{
++	FiMustAnyLock(file);
++	return au_fi(file)->fi_btop;
++}
++
++static inline aufs_bindex_t au_fbend_dir(struct file *file)
++{
++	FiMustAnyLock(file);
++	AuDebugOn(!au_fi(file)->fi_hdir);
++	return au_fi(file)->fi_hdir->fd_bbot;
++}
++
++static inline struct au_vdir *au_fvdir_cache(struct file *file)
++{
++	FiMustAnyLock(file);
++	AuDebugOn(!au_fi(file)->fi_hdir);
++	return au_fi(file)->fi_hdir->fd_vdir_cache;
++}
++
++static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
++{
++	FiMustWriteLock(file);
++	au_fi(file)->fi_btop = bindex;
++}
++
++static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex)
++{
++	FiMustWriteLock(file);
++	AuDebugOn(!au_fi(file)->fi_hdir);
++	au_fi(file)->fi_hdir->fd_bbot = bindex;
++}
++
++static inline void au_set_fvdir_cache(struct file *file,
++				      struct au_vdir *vdir_cache)
++{
++	FiMustWriteLock(file);
++	AuDebugOn(!au_fi(file)->fi_hdir);
++	au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
++}
++
++static inline struct file *au_hf_top(struct file *file)
++{
++	FiMustAnyLock(file);
++	AuDebugOn(au_fi(file)->fi_hdir);
++	return au_fi(file)->fi_htop.hf_file;
++}
++
++static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
++{
++	FiMustAnyLock(file);
++	AuDebugOn(!au_fi(file)->fi_hdir);
++	return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
++}
++
++/* todo: memory barrier? */
++static inline unsigned int au_figen(struct file *f)
++{
++	return atomic_read(&au_fi(f)->fi_generation);
++}
++
++static inline void au_set_mmapped(struct file *f)
++{
++	if (atomic_inc_return(&au_fi(f)->fi_mmapped))
++		return;
++	pr_warning("fi_mmapped wrapped around\n");
++	while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
++		;
++}
++
++static inline void au_unset_mmapped(struct file *f)
++{
++	atomic_dec(&au_fi(f)->fi_mmapped);
++}
++
++static inline int au_test_mmapped(struct file *f)
++{
++	return atomic_read(&au_fi(f)->fi_mmapped);
++}
++
++/* customize vma->vm_file */
++
++static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
++				       struct file *file)
++{
++	struct file *f;
++
++	f = vma->vm_file;
++	get_file(file);
++	vma->vm_file = file;
++	fput(f);
++}
++
++#ifdef CONFIG_MMU
++#define AuDbgVmRegion(file, vma) do {} while (0)
++
++static inline void au_vm_file_reset(struct vm_area_struct *vma,
++				    struct file *file)
++{
++	au_do_vm_file_reset(vma, file);
++}
++#else
++#define AuDbgVmRegion(file, vma) \
++	AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
++
++static inline void au_vm_file_reset(struct vm_area_struct *vma,
++				    struct file *file)
++{
++	struct file *f;
++
++	au_do_vm_file_reset(vma, file);
++	f = vma->vm_region->vm_file;
++	get_file(file);
++	vma->vm_region->vm_file = file;
++	fput(f);
++}
++#endif /* CONFIG_MMU */
++
++/* handle vma->vm_prfile */
++static inline void au_vm_prfile_set(struct vm_area_struct *vma,
++				    struct file *file)
++{
++#ifdef CONFIG_AUFS_PROC_MAP
++	get_file(file);
++	vma->vm_prfile = file;
++#ifndef CONFIG_MMU
++	get_file(file);
++	vma->vm_region->vm_prfile = file;
++#endif
++#endif
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_FILE_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/finfo.c linux-3.2.0-gentoo-r1/fs/aufs/finfo.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/finfo.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/finfo.c	2012-01-17 12:11:24.689933591 +0100
+@@ -0,0 +1,156 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * file private data
++ */
++
++#include "aufs.h"
++
++void au_hfput(struct au_hfile *hf, struct file *file)
++{
++	/* todo: direct access f_flags */
++	if (vfsub_file_flags(file) & __FMODE_EXEC)
++		allow_write_access(hf->hf_file);
++	fput(hf->hf_file);
++	hf->hf_file = NULL;
++	atomic_dec(&hf->hf_br->br_count);
++	hf->hf_br = NULL;
++}
++
++void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
++{
++	struct au_finfo *finfo = au_fi(file);
++	struct au_hfile *hf;
++	struct au_fidir *fidir;
++
++	fidir = finfo->fi_hdir;
++	if (!fidir) {
++		AuDebugOn(finfo->fi_btop != bindex);
++		hf = &finfo->fi_htop;
++	} else
++		hf = fidir->fd_hfile + bindex;
++
++	if (hf && hf->hf_file)
++		au_hfput(hf, file);
++	if (val) {
++		FiMustWriteLock(file);
++		hf->hf_file = val;
++		hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex);
++	}
++}
++
++void au_update_figen(struct file *file)
++{
++	atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry));
++	/* smp_mb(); */ /* atomic_set */
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct au_fidir *au_fidir_alloc(struct super_block *sb)
++{
++	struct au_fidir *fidir;
++	int nbr;
++
++	nbr = au_sbend(sb) + 1;
++	if (nbr < 2)
++		nbr = 2; /* initial allocate for 2 branches */
++	fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
++	if (fidir) {
++		fidir->fd_bbot = -1;
++		fidir->fd_nent = nbr;
++		fidir->fd_vdir_cache = NULL;
++	}
++
++	return fidir;
++}
++
++int au_fidir_realloc(struct au_finfo *finfo, int nbr)
++{
++	int err;
++	struct au_fidir *fidir, *p;
++
++	AuRwMustWriteLock(&finfo->fi_rwsem);
++	fidir = finfo->fi_hdir;
++	AuDebugOn(!fidir);
++
++	err = -ENOMEM;
++	p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
++			 GFP_NOFS);
++	if (p) {
++		p->fd_nent = nbr;
++		finfo->fi_hdir = p;
++		err = 0;
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_finfo_fin(struct file *file)
++{
++	struct au_finfo *finfo;
++
++	au_nfiles_dec(file->f_dentry->d_sb);
++
++	finfo = au_fi(file);
++	AuDebugOn(finfo->fi_hdir);
++	AuRwDestroy(&finfo->fi_rwsem);
++	au_cache_free_finfo(finfo);
++}
++
++void au_fi_init_once(void *_finfo)
++{
++	struct au_finfo *finfo = _finfo;
++	static struct lock_class_key aufs_fi;
++
++	au_rw_init(&finfo->fi_rwsem);
++	au_rw_class(&finfo->fi_rwsem, &aufs_fi);
++}
++
++int au_finfo_init(struct file *file, struct au_fidir *fidir)
++{
++	int err, lc_idx;
++	struct au_finfo *finfo;
++	struct dentry *dentry;
++
++	err = -ENOMEM;
++	dentry = file->f_dentry;
++	finfo = au_cache_alloc_finfo();
++	if (unlikely(!finfo))
++		goto out;
++
++	err = 0;
++	au_nfiles_inc(dentry->d_sb);
++	lc_idx = AuLcNonDir_FIINFO;
++	if (fidir)
++		lc_idx = AuLcDir_FIINFO;
++	au_rw_class(&finfo->fi_rwsem, au_lc_key + lc_idx);
++	au_rw_write_lock(&finfo->fi_rwsem);
++	finfo->fi_btop = -1;
++	finfo->fi_hdir = fidir;
++	atomic_set(&finfo->fi_generation, au_digen(dentry));
++	/* smp_mb(); */ /* atomic_set */
++
++	file->private_data = finfo;
++
++out:
++	return err;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/f_op.c linux-3.2.0-gentoo-r1/fs/aufs/f_op.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/f_op.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/f_op.c	2012-01-17 12:11:24.643636878 +0100
+@@ -0,0 +1,729 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * file and vm operations
++ */
++
++#include <linux/fs_stack.h>
++#include <linux/mman.h>
++#include <linux/security.h>
++#include "aufs.h"
++
++int au_do_open_nondir(struct file *file, int flags)
++{
++	int err;
++	aufs_bindex_t bindex;
++	struct file *h_file;
++	struct dentry *dentry;
++	struct au_finfo *finfo;
++
++	FiMustWriteLock(file);
++
++	dentry = file->f_dentry;
++	err = au_d_alive(dentry);
++	if (unlikely(err))
++		goto out;
++
++	finfo = au_fi(file);
++	memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
++	atomic_set(&finfo->fi_mmapped, 0);
++	bindex = au_dbstart(dentry);
++	h_file = au_h_open(dentry, bindex, flags, file);
++	if (IS_ERR(h_file))
++		err = PTR_ERR(h_file);
++	else {
++		au_set_fbstart(file, bindex);
++		au_set_h_fptr(file, bindex, h_file);
++		au_update_figen(file);
++		/* todo: necessary? */
++		/* file->f_ra = h_file->f_ra; */
++	}
++
++out:
++	return err;
++}
++
++static int aufs_open_nondir(struct inode *inode __maybe_unused,
++			    struct file *file)
++{
++	int err;
++	struct super_block *sb;
++
++	AuDbg("%.*s, f_flags 0x%x, f_mode 0x%x\n",
++	      AuDLNPair(file->f_dentry), vfsub_file_flags(file),
++	      file->f_mode);
++
++	sb = file->f_dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	err = au_do_open(file, au_do_open_nondir, /*fidir*/NULL);
++	si_read_unlock(sb);
++	return err;
++}
++
++int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
++{
++	struct au_finfo *finfo;
++	aufs_bindex_t bindex;
++
++	finfo = au_fi(file);
++	bindex = finfo->fi_btop;
++	if (bindex >= 0) {
++		/* remove me from sb->s_files */
++		file_sb_list_del(file);
++		au_set_h_fptr(file, bindex, NULL);
++	}
++
++	au_finfo_fin(file);
++	return 0;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_do_flush_nondir(struct file *file, fl_owner_t id)
++{
++	int err;
++	struct file *h_file;
++
++	err = 0;
++	h_file = au_hf_top(file);
++	if (h_file)
++		err = vfsub_flush(h_file, id);
++	return err;
++}
++
++static int aufs_flush_nondir(struct file *file, fl_owner_t id)
++{
++	return au_do_flush(file, id, au_do_flush_nondir);
++}
++
++/* ---------------------------------------------------------------------- */
++/*
++ * read and write functions acquire [fdi]_rwsem once, but release before
++ * mmap_sem. This is because to stop a race condition between mmap(2).
++ * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
++ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
++ * read functions after [fdi]_rwsem are released, but it should be harmless.
++ */
++
++static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
++			 loff_t *ppos)
++{
++	ssize_t err;
++	struct dentry *dentry;
++	struct file *h_file;
++	struct super_block *sb;
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++	if (unlikely(err))
++		goto out;
++
++	h_file = au_hf_top(file);
++	get_file(h_file);
++	di_read_unlock(dentry, AuLock_IR);
++	fi_read_unlock(file);
++
++	/* filedata may be obsoleted by concurrent copyup, but no problem */
++	err = vfsub_read_u(h_file, buf, count, ppos);
++	/* todo: necessary? */
++	/* file->f_ra = h_file->f_ra; */
++	/* update without lock, I don't think it a problem */
++	fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
++	fput(h_file);
++
++out:
++	si_read_unlock(sb);
++	return err;
++}
++
++/*
++ * todo: very ugly
++ * it locks both of i_mutex and si_rwsem for read in safe.
++ * if the plink maintenance mode continues forever (that is the problem),
++ * may loop forever.
++ */
++static void au_mtx_and_read_lock(struct inode *inode)
++{
++	int err;
++	struct super_block *sb = inode->i_sb;
++
++	while (1) {
++		mutex_lock(&inode->i_mutex);
++		err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++		if (!err)
++			break;
++		mutex_unlock(&inode->i_mutex);
++		si_read_lock(sb, AuLock_NOPLMW);
++		si_read_unlock(sb);
++	}
++}
++
++static ssize_t aufs_write(struct file *file, const char __user *ubuf,
++			  size_t count, loff_t *ppos)
++{
++	ssize_t err;
++	struct au_pin pin;
++	struct dentry *dentry;
++	struct super_block *sb;
++	struct inode *inode;
++	struct file *h_file;
++	char __user *buf = (char __user *)ubuf;
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	au_mtx_and_read_lock(inode);
++
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++	if (unlikely(err))
++		goto out;
++
++	err = au_ready_to_write(file, -1, &pin);
++	di_downgrade_lock(dentry, AuLock_IR);
++	if (unlikely(err)) {
++		di_read_unlock(dentry, AuLock_IR);
++		fi_write_unlock(file);
++		goto out;
++	}
++
++	h_file = au_hf_top(file);
++	get_file(h_file);
++	au_unpin(&pin);
++	di_read_unlock(dentry, AuLock_IR);
++	fi_write_unlock(file);
++
++	err = vfsub_write_u(h_file, buf, count, ppos);
++	ii_write_lock_child(inode);
++	au_cpup_attr_timesizes(inode);
++	inode->i_mode = h_file->f_dentry->d_inode->i_mode;
++	ii_write_unlock(inode);
++	fput(h_file);
++
++out:
++	si_read_unlock(sb);
++	mutex_unlock(&inode->i_mutex);
++	return err;
++}
++
++static ssize_t au_do_aio(struct file *h_file, int rw, struct kiocb *kio,
++			 const struct iovec *iov, unsigned long nv, loff_t pos)
++{
++	ssize_t err;
++	struct file *file;
++	ssize_t (*func)(struct kiocb *, const struct iovec *, unsigned long,
++			loff_t);
++
++	err = security_file_permission(h_file, rw);
++	if (unlikely(err))
++		goto out;
++
++	err = -ENOSYS;
++	func = NULL;
++	if (rw == MAY_READ)
++		func = h_file->f_op->aio_read;
++	else if (rw == MAY_WRITE)
++		func = h_file->f_op->aio_write;
++	if (func) {
++		file = kio->ki_filp;
++		kio->ki_filp = h_file;
++		lockdep_off();
++		err = func(kio, iov, nv, pos);
++		lockdep_on();
++		kio->ki_filp = file;
++	} else
++		/* currently there is no such fs */
++		WARN_ON_ONCE(1);
++
++out:
++	return err;
++}
++
++static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov,
++			     unsigned long nv, loff_t pos)
++{
++	ssize_t err;
++	struct file *file, *h_file;
++	struct dentry *dentry;
++	struct super_block *sb;
++
++	file = kio->ki_filp;
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++	if (unlikely(err))
++		goto out;
++
++	h_file = au_hf_top(file);
++	get_file(h_file);
++	di_read_unlock(dentry, AuLock_IR);
++	fi_read_unlock(file);
++
++	err = au_do_aio(h_file, MAY_READ, kio, iov, nv, pos);
++	/* todo: necessary? */
++	/* file->f_ra = h_file->f_ra; */
++	/* update without lock, I don't think it a problem */
++	fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
++	fput(h_file);
++
++out:
++	si_read_unlock(sb);
++	return err;
++}
++
++static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov,
++			      unsigned long nv, loff_t pos)
++{
++	ssize_t err;
++	struct au_pin pin;
++	struct dentry *dentry;
++	struct inode *inode;
++	struct file *file, *h_file;
++	struct super_block *sb;
++
++	file = kio->ki_filp;
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	au_mtx_and_read_lock(inode);
++
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++	if (unlikely(err))
++		goto out;
++
++	err = au_ready_to_write(file, -1, &pin);
++	di_downgrade_lock(dentry, AuLock_IR);
++	if (unlikely(err)) {
++		di_read_unlock(dentry, AuLock_IR);
++		fi_write_unlock(file);
++		goto out;
++	}
++
++	h_file = au_hf_top(file);
++	get_file(h_file);
++	au_unpin(&pin);
++	di_read_unlock(dentry, AuLock_IR);
++	fi_write_unlock(file);
++
++	err = au_do_aio(h_file, MAY_WRITE, kio, iov, nv, pos);
++	ii_write_lock_child(inode);
++	au_cpup_attr_timesizes(inode);
++	inode->i_mode = h_file->f_dentry->d_inode->i_mode;
++	ii_write_unlock(inode);
++	fput(h_file);
++
++out:
++	si_read_unlock(sb);
++	mutex_unlock(&inode->i_mutex);
++	return err;
++}
++
++static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
++				struct pipe_inode_info *pipe, size_t len,
++				unsigned int flags)
++{
++	ssize_t err;
++	struct file *h_file;
++	struct dentry *dentry;
++	struct super_block *sb;
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++	if (unlikely(err))
++		goto out;
++
++	err = -EINVAL;
++	h_file = au_hf_top(file);
++	get_file(h_file);
++	if (au_test_loopback_kthread()) {
++		au_warn_loopback(h_file->f_dentry->d_sb);
++		if (file->f_mapping != h_file->f_mapping) {
++			file->f_mapping = h_file->f_mapping;
++			smp_mb(); /* unnecessary? */
++		}
++	}
++	di_read_unlock(dentry, AuLock_IR);
++	fi_read_unlock(file);
++
++	err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
++	/* todo: necessasry? */
++	/* file->f_ra = h_file->f_ra; */
++	/* update without lock, I don't think it a problem */
++	fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
++	fput(h_file);
++
++out:
++	si_read_unlock(sb);
++	return err;
++}
++
++static ssize_t
++aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
++		  size_t len, unsigned int flags)
++{
++	ssize_t err;
++	struct au_pin pin;
++	struct dentry *dentry;
++	struct inode *inode;
++	struct file *h_file;
++	struct super_block *sb;
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	au_mtx_and_read_lock(inode);
++
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++	if (unlikely(err))
++		goto out;
++
++	err = au_ready_to_write(file, -1, &pin);
++	di_downgrade_lock(dentry, AuLock_IR);
++	if (unlikely(err)) {
++		di_read_unlock(dentry, AuLock_IR);
++		fi_write_unlock(file);
++		goto out;
++	}
++
++	h_file = au_hf_top(file);
++	get_file(h_file);
++	au_unpin(&pin);
++	di_read_unlock(dentry, AuLock_IR);
++	fi_write_unlock(file);
++
++	err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
++	ii_write_lock_child(inode);
++	au_cpup_attr_timesizes(inode);
++	inode->i_mode = h_file->f_dentry->d_inode->i_mode;
++	ii_write_unlock(inode);
++	fput(h_file);
++
++out:
++	si_read_unlock(sb);
++	mutex_unlock(&inode->i_mutex);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * The locking order around current->mmap_sem.
++ * - in most and regular cases
++ *   file I/O syscall -- aufs_read() or something
++ *	-- si_rwsem for read -- mmap_sem
++ *	(Note that [fdi]i_rwsem are released before mmap_sem).
++ * - in mmap case
++ *   mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
++ * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
++ * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
++ * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
++ * It means that when aufs acquires si_rwsem for write, the process should never
++ * acquire mmap_sem.
++ *
++ * Actually aufs_readdir() holds [fdi]i_rwsem before mmap_sem, but this is not a
++ * problem either since any directory is not able to be mmap-ed.
++ * The similar scenario is applied to aufs_readlink() too.
++ */
++
++/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
++#define AuConv_VM_PROT(f, b)	_calc_vm_trans(f, VM_##b, PROT_##b)
++
++static unsigned long au_arch_prot_conv(unsigned long flags)
++{
++	/* currently ppc64 only */
++#ifdef CONFIG_PPC64
++	/* cf. linux/arch/powerpc/include/asm/mman.h */
++	AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
++	return AuConv_VM_PROT(flags, SAO);
++#else
++	AuDebugOn(arch_calc_vm_prot_bits(-1));
++	return 0;
++#endif
++}
++
++static unsigned long au_prot_conv(unsigned long flags)
++{
++	return AuConv_VM_PROT(flags, READ)
++		| AuConv_VM_PROT(flags, WRITE)
++		| AuConv_VM_PROT(flags, EXEC)
++		| au_arch_prot_conv(flags);
++}
++
++/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
++#define AuConv_VM_MAP(f, b)	_calc_vm_trans(f, VM_##b, MAP_##b)
++
++static unsigned long au_flag_conv(unsigned long flags)
++{
++	return AuConv_VM_MAP(flags, GROWSDOWN)
++		| AuConv_VM_MAP(flags, DENYWRITE)
++		| AuConv_VM_MAP(flags, EXECUTABLE)
++		| AuConv_VM_MAP(flags, LOCKED);
++}
++
++static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
++{
++	int err;
++	unsigned long prot;
++	aufs_bindex_t bstart;
++	const unsigned char wlock
++		= (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
++	struct dentry *dentry;
++	struct super_block *sb;
++	struct file *h_file;
++	struct au_branch *br;
++	struct au_pin pin;
++
++	AuDbgVmRegion(file, vma);
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	lockdep_off();
++	si_read_lock(sb, AuLock_NOPLMW);
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++	if (unlikely(err))
++		goto out;
++
++	if (wlock) {
++		err = au_ready_to_write(file, -1, &pin);
++		di_write_unlock(dentry);
++		if (unlikely(err)) {
++			fi_write_unlock(file);
++			goto out;
++		}
++		au_unpin(&pin);
++	} else
++		di_write_unlock(dentry);
++
++	bstart = au_fbstart(file);
++	br = au_sbr(sb, bstart);
++	h_file = au_hf_top(file);
++	get_file(h_file);
++	au_set_mmapped(file);
++	fi_write_unlock(file);
++	lockdep_on();
++
++	au_vm_file_reset(vma, h_file);
++	prot = au_prot_conv(vma->vm_flags);
++	err = security_file_mmap(h_file, /*reqprot*/prot, prot,
++				 au_flag_conv(vma->vm_flags), vma->vm_start, 0);
++	if (!err)
++		err = h_file->f_op->mmap(h_file, vma);
++	if (unlikely(err))
++		goto out_reset;
++
++	au_vm_prfile_set(vma, file);
++	/* update without lock, I don't think it a problem */
++	fsstack_copy_attr_atime(file->f_dentry->d_inode,
++				h_file->f_dentry->d_inode);
++	goto out_fput; /* success */
++
++out_reset:
++	au_unset_mmapped(file);
++	au_vm_file_reset(vma, file);
++out_fput:
++	fput(h_file);
++	lockdep_off();
++out:
++	si_read_unlock(sb);
++	lockdep_on();
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
++			     int datasync)
++{
++	int err;
++	struct au_pin pin;
++	struct dentry *dentry;
++	struct inode *inode;
++	struct file *h_file;
++	struct super_block *sb;
++
++	dentry = file->f_dentry;
++	inode = dentry->d_inode;
++	sb = dentry->d_sb;
++	mutex_lock(&inode->i_mutex);
++	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++	if (unlikely(err))
++		goto out;
++
++	err = 0; /* -EBADF; */ /* posix? */
++	if (unlikely(!(file->f_mode & FMODE_WRITE)))
++		goto out_si;
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++	if (unlikely(err))
++		goto out_si;
++
++	err = au_ready_to_write(file, -1, &pin);
++	di_downgrade_lock(dentry, AuLock_IR);
++	if (unlikely(err))
++		goto out_unlock;
++	au_unpin(&pin);
++
++	err = -EINVAL;
++	h_file = au_hf_top(file);
++	err = vfsub_fsync(h_file, &h_file->f_path, datasync);
++	au_cpup_attr_timesizes(inode);
++
++out_unlock:
++	di_read_unlock(dentry, AuLock_IR);
++	fi_write_unlock(file);
++out_si:
++	si_read_unlock(sb);
++out:
++	mutex_unlock(&inode->i_mutex);
++	return err;
++}
++
++/* no one supports this operation, currently */
++#if 0
++static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
++{
++	int err;
++	struct au_pin pin;
++	struct dentry *dentry;
++	struct inode *inode;
++	struct file *file, *h_file;
++
++	file = kio->ki_filp;
++	dentry = file->f_dentry;
++	inode = dentry->d_inode;
++	au_mtx_and_read_lock(inode);
++
++	err = 0; /* -EBADF; */ /* posix? */
++	if (unlikely(!(file->f_mode & FMODE_WRITE)))
++		goto out;
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++	if (unlikely(err))
++		goto out;
++
++	err = au_ready_to_write(file, -1, &pin);
++	di_downgrade_lock(dentry, AuLock_IR);
++	if (unlikely(err))
++		goto out_unlock;
++	au_unpin(&pin);
++
++	err = -ENOSYS;
++	h_file = au_hf_top(file);
++	if (h_file->f_op && h_file->f_op->aio_fsync) {
++		struct dentry *h_d;
++		struct mutex *h_mtx;
++
++		h_d = h_file->f_dentry;
++		h_mtx = &h_d->d_inode->i_mutex;
++		if (!is_sync_kiocb(kio)) {
++			get_file(h_file);
++			fput(file);
++		}
++		kio->ki_filp = h_file;
++		err = h_file->f_op->aio_fsync(kio, datasync);
++		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++		if (!err)
++			vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
++		/*ignore*/
++		au_cpup_attr_timesizes(inode);
++		mutex_unlock(h_mtx);
++	}
++
++out_unlock:
++	di_read_unlock(dentry, AuLock_IR);
++	fi_write_unlock(file);
++out:
++	si_read_unlock(inode->sb);
++	mutex_unlock(&inode->i_mutex);
++	return err;
++}
++#endif
++
++static int aufs_fasync(int fd, struct file *file, int flag)
++{
++	int err;
++	struct file *h_file;
++	struct dentry *dentry;
++	struct super_block *sb;
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++	if (unlikely(err))
++		goto out;
++
++	h_file = au_hf_top(file);
++	if (h_file->f_op && h_file->f_op->fasync)
++		err = h_file->f_op->fasync(fd, h_file, flag);
++
++	di_read_unlock(dentry, AuLock_IR);
++	fi_read_unlock(file);
++
++out:
++	si_read_unlock(sb);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* no one supports this operation, currently */
++#if 0
++static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
++			     size_t len, loff_t *pos , int more)
++{
++}
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++const struct file_operations aufs_file_fop = {
++	.owner		= THIS_MODULE,
++
++	.llseek		= default_llseek,
++
++	.read		= aufs_read,
++	.write		= aufs_write,
++	.aio_read	= aufs_aio_read,
++	.aio_write	= aufs_aio_write,
++#ifdef CONFIG_AUFS_POLL
++	.poll		= aufs_poll,
++#endif
++	.unlocked_ioctl	= aufs_ioctl_nondir,
++#ifdef CONFIG_COMPAT
++	.compat_ioctl	= aufs_ioctl_nondir, /* same */
++#endif
++	.mmap		= aufs_mmap,
++	.open		= aufs_open_nondir,
++	.flush		= aufs_flush_nondir,
++	.release	= aufs_release_nondir,
++	.fsync		= aufs_fsync_nondir,
++	/* .aio_fsync	= aufs_aio_fsync_nondir, */
++	.fasync		= aufs_fasync,
++	/* .sendpage	= aufs_sendpage, */
++	.splice_write	= aufs_splice_write,
++	.splice_read	= aufs_splice_read,
++#if 0
++	.aio_splice_write = aufs_aio_splice_write,
++	.aio_splice_read  = aufs_aio_splice_read
++#endif
++};
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/f_op_sp.c linux-3.2.0-gentoo-r1/fs/aufs/f_op_sp.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/f_op_sp.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/f_op_sp.c	2012-01-17 12:11:24.652896221 +0100
+@@ -0,0 +1,298 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * file operations for special files.
++ * while they exist in aufs virtually,
++ * their file I/O is handled out of aufs.
++ */
++
++#include "aufs.h"
++
++static ssize_t aufs_aio_read_sp(struct kiocb *kio, const struct iovec *iov,
++				unsigned long nv, loff_t pos)
++{
++	ssize_t err;
++	aufs_bindex_t bstart;
++	unsigned char wbr;
++	struct file *file, *h_file;
++	struct super_block *sb;
++
++	file = kio->ki_filp;
++	sb = file->f_dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	fi_read_lock(file);
++	bstart = au_fbstart(file);
++	h_file = au_hf_top(file);
++	fi_read_unlock(file);
++	wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
++	si_read_unlock(sb);
++
++	/* do not change the file in kio */
++	AuDebugOn(!h_file->f_op || !h_file->f_op->aio_read);
++	err = h_file->f_op->aio_read(kio, iov, nv, pos);
++	if (err > 0 && wbr)
++		file_accessed(h_file);
++
++	return err;
++}
++
++static ssize_t aufs_aio_write_sp(struct kiocb *kio, const struct iovec *iov,
++				 unsigned long nv, loff_t pos)
++{
++	ssize_t err;
++	aufs_bindex_t bstart;
++	unsigned char wbr;
++	struct super_block *sb;
++	struct file *file, *h_file;
++
++	file = kio->ki_filp;
++	sb = file->f_dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	fi_read_lock(file);
++	bstart = au_fbstart(file);
++	h_file = au_hf_top(file);
++	fi_read_unlock(file);
++	wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
++	si_read_unlock(sb);
++
++	/* do not change the file in kio */
++	AuDebugOn(!h_file->f_op || !h_file->f_op->aio_write);
++	err = h_file->f_op->aio_write(kio, iov, nv, pos);
++	if (err > 0 && wbr)
++		file_update_time(h_file);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int aufs_release_sp(struct inode *inode, struct file *file)
++{
++	int err;
++	struct file *h_file;
++
++	fi_read_lock(file);
++	h_file = au_hf_top(file);
++	fi_read_unlock(file);
++	/* close this fifo in aufs */
++	err = h_file->f_op->release(inode, file); /* ignore */
++	aufs_release_nondir(inode, file); /* ignore */
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* currently, support only FIFO */
++enum {
++	AuSp_FIFO, AuSp_FIFO_R, AuSp_FIFO_W, AuSp_FIFO_RW,
++	/* AuSp_SOCK, AuSp_CHR, AuSp_BLK, */
++	AuSp_Last
++};
++static int aufs_open_sp(struct inode *inode, struct file *file);
++static struct au_sp_fop {
++	int			done;
++	struct file_operations	fop;	/* not 'const' */
++	spinlock_t		spin;
++} au_sp_fop[AuSp_Last] = {
++	[AuSp_FIFO] = {
++		.fop	= {
++			.owner	= THIS_MODULE,
++			.open	= aufs_open_sp
++		}
++	}
++};
++
++static void au_init_fop_sp(struct file *file)
++{
++	struct au_sp_fop *p;
++	int i;
++	struct file *h_file;
++
++	p = au_sp_fop;
++	if (unlikely(!p->done)) {
++		/* initialize first time only */
++		static DEFINE_SPINLOCK(spin);
++
++		spin_lock(&spin);
++		if (!p->done) {
++			BUILD_BUG_ON(sizeof(au_sp_fop)/sizeof(*au_sp_fop)
++				     != AuSp_Last);
++			for (i = 0; i < AuSp_Last; i++)
++				spin_lock_init(&p[i].spin);
++			p->done = 1;
++		}
++		spin_unlock(&spin);
++	}
++
++	switch (file->f_mode & (FMODE_READ | FMODE_WRITE)) {
++	case FMODE_READ:
++		i = AuSp_FIFO_R;
++		break;
++	case FMODE_WRITE:
++		i = AuSp_FIFO_W;
++		break;
++	case FMODE_READ | FMODE_WRITE:
++		i = AuSp_FIFO_RW;
++		break;
++	default:
++		BUG();
++	}
++
++	p += i;
++	if (unlikely(!p->done)) {
++		/* initialize first time only */
++		h_file = au_hf_top(file);
++		spin_lock(&p->spin);
++		if (!p->done) {
++			p->fop = *h_file->f_op;
++			p->fop.owner = THIS_MODULE;
++			if (p->fop.aio_read)
++				p->fop.aio_read = aufs_aio_read_sp;
++			if (p->fop.aio_write)
++				p->fop.aio_write = aufs_aio_write_sp;
++			p->fop.release = aufs_release_sp;
++			p->done = 1;
++		}
++		spin_unlock(&p->spin);
++	}
++	file->f_op = &p->fop;
++}
++
++static int au_cpup_sp(struct dentry *dentry)
++{
++	int err;
++	aufs_bindex_t bcpup;
++	struct au_pin pin;
++	struct au_wr_dir_args wr_dir_args = {
++		.force_btgt	= -1,
++		.flags		= 0
++	};
++
++	AuDbg("%.*s\n", AuDLNPair(dentry));
++
++	di_read_unlock(dentry, AuLock_IR);
++	di_write_lock_child(dentry);
++	err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
++	if (unlikely(err < 0))
++		goto out;
++	bcpup = err;
++	err = 0;
++	if (bcpup == au_dbstart(dentry))
++		goto out; /* success */
++
++	err = au_pin(&pin, dentry, bcpup, au_opt_udba(dentry->d_sb),
++		     AuPin_MNT_WRITE);
++	if (!err) {
++		err = au_sio_cpup_simple(dentry, bcpup, -1, AuCpup_DTIME);
++		au_unpin(&pin);
++	}
++
++out:
++	di_downgrade_lock(dentry, AuLock_IR);
++	return err;
++}
++
++static int au_do_open_sp(struct file *file, int flags)
++{
++	int err;
++	struct dentry *dentry;
++	struct super_block *sb;
++	struct file *h_file;
++	struct inode *h_inode;
++
++	dentry = file->f_dentry;
++	AuDbg("%.*s\n", AuDLNPair(dentry));
++
++	/*
++	 * try copying-up.
++	 * operate on the ro branch is not an error.
++	 */
++	au_cpup_sp(dentry); /* ignore */
++
++	/* prepare h_file */
++	err = au_do_open_nondir(file, vfsub_file_flags(file));
++	if (unlikely(err))
++		goto out;
++
++	sb = dentry->d_sb;
++	h_file = au_hf_top(file);
++	h_inode = h_file->f_dentry->d_inode;
++	di_read_unlock(dentry, AuLock_IR);
++	fi_write_unlock(file);
++	si_read_unlock(sb);
++	/* open this fifo in aufs */
++	err = h_inode->i_fop->open(file->f_dentry->d_inode, file);
++	si_noflush_read_lock(sb);
++	fi_write_lock(file);
++	di_read_lock_child(dentry, AuLock_IR);
++	if (!err)
++		au_init_fop_sp(file);
++
++out:
++	return err;
++}
++
++static int aufs_open_sp(struct inode *inode, struct file *file)
++{
++	int err;
++	struct super_block *sb;
++
++	sb = file->f_dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	err = au_do_open(file, au_do_open_sp, /*fidir*/NULL);
++	si_read_unlock(sb);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev)
++{
++	init_special_inode(inode, mode, rdev);
++
++	switch (mode & S_IFMT) {
++	case S_IFIFO:
++		inode->i_fop = &au_sp_fop[AuSp_FIFO].fop;
++		/*FALLTHROUGH*/
++	case S_IFCHR:
++	case S_IFBLK:
++	case S_IFSOCK:
++		break;
++	default:
++		AuDebugOn(1);
++	}
++}
++
++int au_special_file(umode_t mode)
++{
++	int ret;
++
++	ret = 0;
++	switch (mode & S_IFMT) {
++	case S_IFIFO:
++#if 0
++	case S_IFCHR:
++	case S_IFBLK:
++	case S_IFSOCK:
++#endif
++		ret = 1;
++	}
++
++	return ret;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/fstype.h linux-3.2.0-gentoo-r1/fs/aufs/fstype.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/fstype.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/fstype.h	2012-01-17 12:11:24.701507771 +0100
+@@ -0,0 +1,496 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * judging filesystem type
++ */
++
++#ifndef __AUFS_FSTYPE_H__
++#define __AUFS_FSTYPE_H__
++
++#ifdef __KERNEL__
++
++#include <linux/fs.h>
++#include <linux/magic.h>
++#include <linux/romfs_fs.h>
++
++static inline int au_test_aufs(struct super_block *sb)
++{
++	return sb->s_magic == AUFS_SUPER_MAGIC;
++}
++
++static inline const char *au_sbtype(struct super_block *sb)
++{
++	return sb->s_type->name;
++}
++
++static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE)
++	return sb->s_magic == ROMFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_romfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE)
++	return sb->s_magic == ISOFS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE)
++	return sb->s_magic == CRAMFS_MAGIC;
++#endif
++	return 0;
++}
++
++static inline int au_test_nfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
++	return sb->s_magic == NFS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_fuse(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
++	return sb->s_magic == FUSE_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_xfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE)
++	return sb->s_magic == XFS_SB_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
++{
++#ifdef CONFIG_TMPFS
++	return sb->s_magic == TMPFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE)
++	return !strcmp(au_sbtype(sb), "ecryptfs");
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_smbfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_SMB_FS) || defined(CONFIG_SMB_FS_MODULE)
++	return sb->s_magic == SMB_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_ocfs2(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_OCFS2_FS) || defined(CONFIG_OCFS2_FS_MODULE)
++	return sb->s_magic == OCFS2_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_ocfs2_dlmfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_OCFS2_FS_O2CB) || defined(CONFIG_OCFS2_FS_O2CB_MODULE)
++	return sb->s_magic == DLMFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_coda(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_CODA_FS) || defined(CONFIG_CODA_FS_MODULE)
++	return sb->s_magic == CODA_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_v9fs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_9P_FS) || defined(CONFIG_9P_FS_MODULE)
++	return sb->s_magic == V9FS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_ext4(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_EXT4DEV_FS) || defined(CONFIG_EXT4DEV_FS_MODULE)
++	return sb->s_magic == EXT4_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_sysv(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_SYSV_FS) || defined(CONFIG_SYSV_FS_MODULE)
++	return !strcmp(au_sbtype(sb), "sysv");
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_ramfs(struct super_block *sb)
++{
++	return sb->s_magic == RAMFS_MAGIC;
++}
++
++static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE)
++	return sb->s_magic == UBIFS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_procfs(struct super_block *sb __maybe_unused)
++{
++#ifdef CONFIG_PROC_FS
++	return sb->s_magic == PROC_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
++{
++#ifdef CONFIG_SYSFS
++	return sb->s_magic == SYSFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_configfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE)
++	return sb->s_magic == CONFIGFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_minix(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE)
++	return sb->s_magic == MINIX3_SUPER_MAGIC
++		|| sb->s_magic == MINIX2_SUPER_MAGIC
++		|| sb->s_magic == MINIX2_SUPER_MAGIC2
++		|| sb->s_magic == MINIX_SUPER_MAGIC
++		|| sb->s_magic == MINIX_SUPER_MAGIC2;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_cifs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_CIFS_FS) || defined(CONFIGCIFS_FS_MODULE)
++	return sb->s_magic == CIFS_MAGIC_NUMBER;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_fat(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE)
++	return sb->s_magic == MSDOS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_msdos(struct super_block *sb)
++{
++	return au_test_fat(sb);
++}
++
++static inline int au_test_vfat(struct super_block *sb)
++{
++	return au_test_fat(sb);
++}
++
++static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
++{
++#ifdef CONFIG_SECURITYFS
++	return sb->s_magic == SECURITYFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE)
++	return sb->s_magic == SQUASHFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE)
++	return sb->s_magic == BTRFS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE)
++	return sb->s_magic == XENFS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
++{
++#ifdef CONFIG_DEBUG_FS
++	return sb->s_magic == DEBUGFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE)
++	return sb->s_magic == NILFS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_HFSPLUS_FS) || defined(CONFIG_HFSPLUS_FS_MODULE)
++	return sb->s_magic == HFSPLUS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++/* ---------------------------------------------------------------------- */
++/*
++ * they can't be an aufs branch.
++ */
++static inline int au_test_fs_unsuppoted(struct super_block *sb)
++{
++	return
++#ifndef CONFIG_AUFS_BR_RAMFS
++		au_test_ramfs(sb) ||
++#endif
++		au_test_procfs(sb)
++		|| au_test_sysfs(sb)
++		|| au_test_configfs(sb)
++		|| au_test_debugfs(sb)
++		|| au_test_securityfs(sb)
++		|| au_test_xenfs(sb)
++		|| au_test_ecryptfs(sb)
++		/* || !strcmp(au_sbtype(sb), "unionfs") */
++		|| au_test_aufs(sb); /* will be supported in next version */
++}
++
++/*
++ * If the filesystem supports NFS-export, then it has to support NULL as
++ * a nameidata parameter for ->create(), ->lookup() and ->d_revalidate().
++ * We can apply this principle when we handle a lower filesystem.
++ */
++static inline int au_test_fs_null_nd(struct super_block *sb)
++{
++	return !!sb->s_export_op;
++}
++
++static inline int au_test_fs_remote(struct super_block *sb)
++{
++	return !au_test_tmpfs(sb)
++#ifdef CONFIG_AUFS_BR_RAMFS
++		&& !au_test_ramfs(sb)
++#endif
++		&& !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * Note: these functions (below) are created after reading ->getattr() in all
++ * filesystems under linux/fs. it means we have to do so in every update...
++ */
++
++/*
++ * some filesystems require getattr to refresh the inode attributes before
++ * referencing.
++ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
++ * and leave the work for d_revalidate()
++ */
++static inline int au_test_fs_refresh_iattr(struct super_block *sb)
++{
++	return au_test_nfs(sb)
++		|| au_test_fuse(sb)
++		/* || au_test_smbfs(sb) */	/* untested */
++		/* || au_test_ocfs2(sb) */	/* untested */
++		/* || au_test_btrfs(sb) */	/* untested */
++		/* || au_test_coda(sb) */	/* untested */
++		/* || au_test_v9fs(sb) */	/* untested */
++		;
++}
++
++/*
++ * filesystems which don't maintain i_size or i_blocks.
++ */
++static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
++{
++	return au_test_xfs(sb)
++		|| au_test_btrfs(sb)
++		|| au_test_ubifs(sb)
++		|| au_test_hfsplus(sb)	/* maintained, but incorrect */
++		/* || au_test_ext4(sb) */	/* untested */
++		/* || au_test_ocfs2(sb) */	/* untested */
++		/* || au_test_ocfs2_dlmfs(sb) */ /* untested */
++		/* || au_test_sysv(sb) */	/* untested */
++		/* || au_test_minix(sb) */	/* untested */
++		;
++}
++
++/*
++ * filesystems which don't store the correct value in some of their inode
++ * attributes.
++ */
++static inline int au_test_fs_bad_iattr(struct super_block *sb)
++{
++	return au_test_fs_bad_iattr_size(sb)
++		/* || au_test_cifs(sb) */	/* untested */
++		|| au_test_fat(sb)
++		|| au_test_msdos(sb)
++		|| au_test_vfat(sb);
++}
++
++/* they don't check i_nlink in link(2) */
++static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
++{
++	return au_test_tmpfs(sb)
++#ifdef CONFIG_AUFS_BR_RAMFS
++		|| au_test_ramfs(sb)
++#endif
++		|| au_test_ubifs(sb)
++		|| au_test_btrfs(sb)
++		|| au_test_hfsplus(sb);
++}
++
++/*
++ * filesystems which sets S_NOATIME and S_NOCMTIME.
++ */
++static inline int au_test_fs_notime(struct super_block *sb)
++{
++	return au_test_nfs(sb)
++		|| au_test_fuse(sb)
++		|| au_test_ubifs(sb)
++		/* || au_test_cifs(sb) */	/* untested */
++		;
++}
++
++/*
++ * filesystems which requires replacing i_mapping.
++ */
++static inline int au_test_fs_bad_mapping(struct super_block *sb)
++{
++	return au_test_fuse(sb)
++		|| au_test_ubifs(sb);
++}
++
++/* temporary support for i#1 in cramfs */
++static inline int au_test_fs_unique_ino(struct inode *inode)
++{
++	if (au_test_cramfs(inode->i_sb))
++		return inode->i_ino != 1;
++	return 1;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * the filesystem where the xino files placed must support i/o after unlink and
++ * maintain i_size and i_blocks.
++ */
++static inline int au_test_fs_bad_xino(struct super_block *sb)
++{
++	return au_test_fs_remote(sb)
++		|| au_test_fs_bad_iattr_size(sb)
++#ifdef CONFIG_AUFS_BR_RAMFS
++		|| !(au_test_ramfs(sb) || au_test_fs_null_nd(sb))
++#else
++		|| !au_test_fs_null_nd(sb) /* to keep xino code simple */
++#endif
++		/* don't want unnecessary work for xino */
++		|| au_test_aufs(sb)
++		|| au_test_ecryptfs(sb)
++		|| au_test_nilfs(sb);
++}
++
++static inline int au_test_fs_trunc_xino(struct super_block *sb)
++{
++	return au_test_tmpfs(sb)
++		|| au_test_ramfs(sb);
++}
++
++/*
++ * test if the @sb is real-readonly.
++ */
++static inline int au_test_fs_rr(struct super_block *sb)
++{
++	return au_test_squashfs(sb)
++		|| au_test_iso9660(sb)
++		|| au_test_cramfs(sb)
++		|| au_test_romfs(sb);
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_FSTYPE_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/hfsnotify.c linux-3.2.0-gentoo-r1/fs/aufs/hfsnotify.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/hfsnotify.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/hfsnotify.c	2012-01-17 12:11:24.729285799 +0100
+@@ -0,0 +1,247 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * fsnotify for the lower directories
++ */
++
++#include "aufs.h"
++
++/* FS_IN_IGNORED is unnecessary */
++static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
++				 | FS_CREATE | FS_EVENT_ON_CHILD);
++static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
++
++static void au_hfsn_free_mark(struct fsnotify_mark *mark)
++{
++	struct au_hnotify *hn = container_of(mark, struct au_hnotify,
++					     hn_mark);
++	AuDbg("here\n");
++	hn->hn_mark_dead = 1;
++	smp_mb();
++	wake_up_all(&au_hfsn_wq);
++}
++
++static int au_hfsn_alloc(struct au_hinode *hinode)
++{
++	struct au_hnotify *hn;
++	struct super_block *sb;
++	struct au_branch *br;
++	struct fsnotify_mark *mark;
++	aufs_bindex_t bindex;
++
++	hn = hinode->hi_notify;
++	sb = hn->hn_aufs_inode->i_sb;
++	bindex = au_br_index(sb, hinode->hi_id);
++	br = au_sbr(sb, bindex);
++	hn->hn_mark_dead = 0;
++	mark = &hn->hn_mark;
++	fsnotify_init_mark(mark, au_hfsn_free_mark);
++	mark->mask = AuHfsnMask;
++	/*
++	 * by udba rename or rmdir, aufs assign a new inode to the known
++	 * h_inode, so specify 1 to allow dups.
++	 */
++	return fsnotify_add_mark(mark, br->br_hfsn_group, hinode->hi_inode,
++				 /*mnt*/NULL, /*allow_dups*/1);
++}
++
++static void au_hfsn_free(struct au_hinode *hinode)
++{
++	struct au_hnotify *hn;
++	struct fsnotify_mark *mark;
++
++	hn = hinode->hi_notify;
++	mark = &hn->hn_mark;
++	fsnotify_destroy_mark(mark);
++	fsnotify_put_mark(mark);
++
++	/* TODO: bad approach */
++	wait_event(au_hfsn_wq, hn->hn_mark_dead);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
++{
++	struct fsnotify_mark *mark;
++
++	mark = &hinode->hi_notify->hn_mark;
++	spin_lock(&mark->lock);
++	if (do_set) {
++		AuDebugOn(mark->mask & AuHfsnMask);
++		mark->mask |= AuHfsnMask;
++	} else {
++		AuDebugOn(!(mark->mask & AuHfsnMask));
++		mark->mask &= ~AuHfsnMask;
++	}
++	spin_unlock(&mark->lock);
++	/* fsnotify_recalc_inode_mask(hinode->hi_inode); */
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* #define AuDbgHnotify */
++#ifdef AuDbgHnotify
++static char *au_hfsn_name(u32 mask)
++{
++#ifdef CONFIG_AUFS_DEBUG
++#define test_ret(flag)	if (mask & flag) \
++				return #flag;
++	test_ret(FS_ACCESS);
++	test_ret(FS_MODIFY);
++	test_ret(FS_ATTRIB);
++	test_ret(FS_CLOSE_WRITE);
++	test_ret(FS_CLOSE_NOWRITE);
++	test_ret(FS_OPEN);
++	test_ret(FS_MOVED_FROM);
++	test_ret(FS_MOVED_TO);
++	test_ret(FS_CREATE);
++	test_ret(FS_DELETE);
++	test_ret(FS_DELETE_SELF);
++	test_ret(FS_MOVE_SELF);
++	test_ret(FS_UNMOUNT);
++	test_ret(FS_Q_OVERFLOW);
++	test_ret(FS_IN_IGNORED);
++	test_ret(FS_IN_ISDIR);
++	test_ret(FS_IN_ONESHOT);
++	test_ret(FS_EVENT_ON_CHILD);
++	return "";
++#undef test_ret
++#else
++	return "??";
++#endif
++}
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++static int au_hfsn_handle_event(struct fsnotify_group *group,
++				struct fsnotify_mark *inode_mark,
++				struct fsnotify_mark *vfsmount_mark,
++				struct fsnotify_event *event)
++{
++	int err;
++	struct au_hnotify *hnotify;
++	struct inode *h_dir, *h_inode;
++	__u32 mask;
++	struct qstr h_child_qstr = {
++		.name	= event->file_name,
++		.len	= event->name_len
++	};
++
++	AuDebugOn(event->data_type != FSNOTIFY_EVENT_INODE);
++
++	err = 0;
++	/* if FS_UNMOUNT happens, there must be another bug */
++	mask = event->mask;
++	AuDebugOn(mask & FS_UNMOUNT);
++	if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
++		goto out;
++
++	h_dir = event->to_tell;
++	h_inode = event->inode;
++#ifdef AuDbgHnotify
++	au_debug(1);
++	if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
++	    || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
++		AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
++		      h_dir->i_ino, mask, au_hfsn_name(mask),
++		      AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
++		/* WARN_ON(1); */
++	}
++	au_debug(0);
++#endif
++
++	AuDebugOn(!inode_mark);
++	hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
++	err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
++
++out:
++	return err;
++}
++
++/* isn't it waste to ask every registered 'group'? */
++/* copied from linux/fs/notify/inotify/inotify_fsnotiry.c */
++/* it should be exported to modules */
++static bool au_hfsn_should_send_event(struct fsnotify_group *group,
++				      struct inode *h_inode,
++				      struct fsnotify_mark *inode_mark,
++				      struct fsnotify_mark *vfsmount_mark,
++				      __u32 mask, void *data, int data_type)
++{
++	mask = (mask & ~FS_EVENT_ON_CHILD);
++	return inode_mark->mask & mask;
++}
++
++static struct fsnotify_ops au_hfsn_ops = {
++	.should_send_event	= au_hfsn_should_send_event,
++	.handle_event		= au_hfsn_handle_event
++};
++
++/* ---------------------------------------------------------------------- */
++
++static void au_hfsn_fin_br(struct au_branch *br)
++{
++	if (br->br_hfsn_group)
++		fsnotify_put_group(br->br_hfsn_group);
++}
++
++static int au_hfsn_init_br(struct au_branch *br, int perm)
++{
++	br->br_hfsn_group = NULL;
++	br->br_hfsn_ops = au_hfsn_ops;
++	return 0;
++}
++
++static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
++{
++	int err;
++
++	err = 0;
++	if (udba != AuOpt_UDBA_HNOTIFY
++	    || !au_br_hnotifyable(perm)) {
++		au_hfsn_fin_br(br);
++		br->br_hfsn_group = NULL;
++		goto out;
++	}
++
++	if (br->br_hfsn_group)
++		goto out;
++
++	br->br_hfsn_group = fsnotify_alloc_group(&br->br_hfsn_ops);
++	if (IS_ERR(br->br_hfsn_group)) {
++		err = PTR_ERR(br->br_hfsn_group);
++		pr_err("fsnotify_alloc_group() failed, %d\n", err);
++		br->br_hfsn_group = NULL;
++	}
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++const struct au_hnotify_op au_hnotify_op = {
++	.ctl		= au_hfsn_ctl,
++	.alloc		= au_hfsn_alloc,
++	.free		= au_hfsn_free,
++
++	.reset_br	= au_hfsn_reset_br,
++	.fin_br		= au_hfsn_fin_br,
++	.init_br	= au_hfsn_init_br
++};
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/hfsplus.c linux-3.2.0-gentoo-r1/fs/aufs/hfsplus.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/hfsplus.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/hfsplus.c	2012-01-17 12:11:24.729285799 +0100
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (C) 2010-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * special support for filesystems which aqucires an inode mutex
++ * at final closing a file, eg, hfsplus.
++ *
++ * This trick is very simple and stupid, just to open the file before really
++ * neceeary open to tell hfsplus that this is not the final closing.
++ * The caller should call au_h_open_pre() after acquiring the inode mutex,
++ * and au_h_open_post() after releasing it.
++ */
++
++#include "aufs.h"
++
++struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	struct file *h_file;
++	struct dentry *h_dentry;
++
++	h_dentry = au_h_dptr(dentry, bindex);
++	AuDebugOn(!h_dentry);
++	AuDebugOn(!h_dentry->d_inode);
++	IMustLock(h_dentry->d_inode);
++
++	h_file = NULL;
++	if (au_test_hfsplus(h_dentry->d_sb)
++	    && S_ISREG(h_dentry->d_inode->i_mode))
++		h_file = au_h_open(dentry, bindex,
++				   O_RDONLY | O_NOATIME | O_LARGEFILE,
++				   /*file*/NULL);
++	return h_file;
++}
++
++void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
++		    struct file *h_file)
++{
++	if (h_file) {
++		fput(h_file);
++		au_sbr_put(dentry->d_sb, bindex);
++	}
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/hnotify.c linux-3.2.0-gentoo-r1/fs/aufs/hnotify.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/hnotify.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/hnotify.c	2012-01-17 12:11:24.736230305 +0100
+@@ -0,0 +1,712 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * abstraction to notify the direct changes on lower directories
++ */
++
++#include "aufs.h"
++
++int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
++{
++	int err;
++	struct au_hnotify *hn;
++
++	err = -ENOMEM;
++	hn = au_cache_alloc_hnotify();
++	if (hn) {
++		hn->hn_aufs_inode = inode;
++		hinode->hi_notify = hn;
++		err = au_hnotify_op.alloc(hinode);
++		AuTraceErr(err);
++		if (unlikely(err)) {
++			hinode->hi_notify = NULL;
++			au_cache_free_hnotify(hn);
++			/*
++			 * The upper dir was removed by udba, but the same named
++			 * dir left. In this case, aufs assignes a new inode
++			 * number and set the monitor again.
++			 * For the lower dir, the old monitnor is still left.
++			 */
++			if (err == -EEXIST)
++				err = 0;
++		}
++	}
++
++	AuTraceErr(err);
++	return err;
++}
++
++void au_hn_free(struct au_hinode *hinode)
++{
++	struct au_hnotify *hn;
++
++	hn = hinode->hi_notify;
++	if (hn) {
++		au_hnotify_op.free(hinode);
++		au_cache_free_hnotify(hn);
++		hinode->hi_notify = NULL;
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_hn_ctl(struct au_hinode *hinode, int do_set)
++{
++	if (hinode->hi_notify)
++		au_hnotify_op.ctl(hinode, do_set);
++}
++
++void au_hn_reset(struct inode *inode, unsigned int flags)
++{
++	aufs_bindex_t bindex, bend;
++	struct inode *hi;
++	struct dentry *iwhdentry;
++
++	bend = au_ibend(inode);
++	for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
++		hi = au_h_iptr(inode, bindex);
++		if (!hi)
++			continue;
++
++		/* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
++		iwhdentry = au_hi_wh(inode, bindex);
++		if (iwhdentry)
++			dget(iwhdentry);
++		au_igrab(hi);
++		au_set_h_iptr(inode, bindex, NULL, 0);
++		au_set_h_iptr(inode, bindex, au_igrab(hi),
++			      flags & ~AuHi_XINO);
++		iput(hi);
++		dput(iwhdentry);
++		/* mutex_unlock(&hi->i_mutex); */
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int hn_xino(struct inode *inode, struct inode *h_inode)
++{
++	int err;
++	aufs_bindex_t bindex, bend, bfound, bstart;
++	struct inode *h_i;
++
++	err = 0;
++	if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
++		pr_warning("branch root dir was changed\n");
++		goto out;
++	}
++
++	bfound = -1;
++	bend = au_ibend(inode);
++	bstart = au_ibstart(inode);
++#if 0 /* reserved for future use */
++	if (bindex == bend) {
++		/* keep this ino in rename case */
++		goto out;
++	}
++#endif
++	for (bindex = bstart; bindex <= bend; bindex++)
++		if (au_h_iptr(inode, bindex) == h_inode) {
++			bfound = bindex;
++			break;
++		}
++	if (bfound < 0)
++		goto out;
++
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		h_i = au_h_iptr(inode, bindex);
++		if (!h_i)
++			continue;
++
++		err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
++		/* ignore this error */
++		/* bad action? */
++	}
++
++	/* children inode number will be broken */
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++static int hn_gen_tree(struct dentry *dentry)
++{
++	int err, i, j, ndentry;
++	struct au_dcsub_pages dpages;
++	struct au_dpage *dpage;
++	struct dentry **dentries;
++
++	err = au_dpages_init(&dpages, GFP_NOFS);
++	if (unlikely(err))
++		goto out;
++	err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
++	if (unlikely(err))
++		goto out_dpages;
++
++	for (i = 0; i < dpages.ndpage; i++) {
++		dpage = dpages.dpages + i;
++		dentries = dpage->dentries;
++		ndentry = dpage->ndentry;
++		for (j = 0; j < ndentry; j++) {
++			struct dentry *d;
++
++			d = dentries[j];
++			if (IS_ROOT(d))
++				continue;
++
++			au_digen_dec(d);
++			if (d->d_inode)
++				/* todo: reset children xino?
++				   cached children only? */
++				au_iigen_dec(d->d_inode);
++		}
++	}
++
++out_dpages:
++	au_dpages_free(&dpages);
++
++#if 0
++	/* discard children */
++	dentry_unhash(dentry);
++	dput(dentry);
++#endif
++out:
++	return err;
++}
++
++/*
++ * return 0 if processed.
++ */
++static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
++			   const unsigned int isdir)
++{
++	int err;
++	struct dentry *d;
++	struct qstr *dname;
++
++	err = 1;
++	if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
++		pr_warning("branch root dir was changed\n");
++		err = 0;
++		goto out;
++	}
++
++	if (!isdir) {
++		AuDebugOn(!name);
++		au_iigen_dec(inode);
++		spin_lock(&inode->i_lock);
++		list_for_each_entry(d, &inode->i_dentry, d_alias) {
++			spin_lock(&d->d_lock);
++			dname = &d->d_name;
++			if (dname->len != nlen
++			    && memcmp(dname->name, name, nlen)) {
++				spin_unlock(&d->d_lock);
++				continue;
++			}
++			err = 0;
++			au_digen_dec(d);
++			spin_unlock(&d->d_lock);
++			break;
++		}
++		spin_unlock(&inode->i_lock);
++	} else {
++		au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
++		d = d_find_alias(inode);
++		if (!d) {
++			au_iigen_dec(inode);
++			goto out;
++		}
++
++		spin_lock(&d->d_lock);
++		dname = &d->d_name;
++		if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
++			spin_unlock(&d->d_lock);
++			err = hn_gen_tree(d);
++			spin_lock(&d->d_lock);
++		}
++		spin_unlock(&d->d_lock);
++		dput(d);
++	}
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
++{
++	int err;
++	struct inode *inode;
++
++	inode = dentry->d_inode;
++	if (IS_ROOT(dentry)
++	    /* || (inode && inode->i_ino == AUFS_ROOT_INO) */
++		) {
++		pr_warning("branch root dir was changed\n");
++		return 0;
++	}
++
++	err = 0;
++	if (!isdir) {
++		au_digen_dec(dentry);
++		if (inode)
++			au_iigen_dec(inode);
++	} else {
++		au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
++		if (inode)
++			err = hn_gen_tree(dentry);
++	}
++
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* hnotify job flags */
++#define AuHnJob_XINO0		1
++#define AuHnJob_GEN		(1 << 1)
++#define AuHnJob_DIRENT		(1 << 2)
++#define AuHnJob_ISDIR		(1 << 3)
++#define AuHnJob_TRYXINO0	(1 << 4)
++#define AuHnJob_MNTPNT		(1 << 5)
++#define au_ftest_hnjob(flags, name)	((flags) & AuHnJob_##name)
++#define au_fset_hnjob(flags, name) \
++	do { (flags) |= AuHnJob_##name; } while (0)
++#define au_fclr_hnjob(flags, name) \
++	do { (flags) &= ~AuHnJob_##name; } while (0)
++
++enum {
++	AuHn_CHILD,
++	AuHn_PARENT,
++	AuHnLast
++};
++
++struct au_hnotify_args {
++	struct inode *h_dir, *dir, *h_child_inode;
++	u32 mask;
++	unsigned int flags[AuHnLast];
++	unsigned int h_child_nlen;
++	char h_child_name[];
++};
++
++struct hn_job_args {
++	unsigned int flags;
++	struct inode *inode, *h_inode, *dir, *h_dir;
++	struct dentry *dentry;
++	char *h_name;
++	int h_nlen;
++};
++
++static int hn_job(struct hn_job_args *a)
++{
++	const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
++
++	/* reset xino */
++	if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
++		hn_xino(a->inode, a->h_inode); /* ignore this error */
++
++	if (au_ftest_hnjob(a->flags, TRYXINO0)
++	    && a->inode
++	    && a->h_inode) {
++		mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
++		if (!a->h_inode->i_nlink)
++			hn_xino(a->inode, a->h_inode); /* ignore this error */
++		mutex_unlock(&a->h_inode->i_mutex);
++	}
++
++	/* make the generation obsolete */
++	if (au_ftest_hnjob(a->flags, GEN)) {
++		int err = -1;
++		if (a->inode)
++			err = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
++					      isdir);
++		if (err && a->dentry)
++			hn_gen_by_name(a->dentry, isdir);
++		/* ignore this error */
++	}
++
++	/* make dir entries obsolete */
++	if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
++		struct au_vdir *vdir;
++
++		vdir = au_ivdir(a->inode);
++		if (vdir)
++			vdir->vd_jiffy = 0;
++		/* IMustLock(a->inode); */
++		/* a->inode->i_version++; */
++	}
++
++	/* can do nothing but warn */
++	if (au_ftest_hnjob(a->flags, MNTPNT)
++	    && a->dentry
++	    && d_mountpoint(a->dentry))
++		pr_warning("mount-point %.*s is removed or renamed\n",
++			   AuDLNPair(a->dentry));
++
++	return 0;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
++					   struct inode *dir)
++{
++	struct dentry *dentry, *d, *parent;
++	struct qstr *dname;
++
++	parent = d_find_alias(dir);
++	if (!parent)
++		return NULL;
++
++	dentry = NULL;
++	spin_lock(&parent->d_lock);
++	list_for_each_entry(d, &parent->d_subdirs, d_u.d_child) {
++		/* AuDbg("%.*s\n", AuDLNPair(d)); */
++		spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
++		dname = &d->d_name;
++		if (dname->len != nlen || memcmp(dname->name, name, nlen))
++			goto cont_unlock;
++		if (au_di(d))
++			au_digen_dec(d);
++		else
++			goto cont_unlock;
++		if (d->d_count) {
++			dentry = dget_dlock(d);
++			spin_unlock(&d->d_lock);
++			break;
++		}
++
++	cont_unlock:
++		spin_unlock(&d->d_lock);
++	}
++	spin_unlock(&parent->d_lock);
++	dput(parent);
++
++	if (dentry)
++		di_write_lock_child(dentry);
++
++	return dentry;
++}
++
++static struct inode *lookup_wlock_by_ino(struct super_block *sb,
++					 aufs_bindex_t bindex, ino_t h_ino)
++{
++	struct inode *inode;
++	ino_t ino;
++	int err;
++
++	inode = NULL;
++	err = au_xino_read(sb, bindex, h_ino, &ino);
++	if (!err && ino)
++		inode = ilookup(sb, ino);
++	if (!inode)
++		goto out;
++
++	if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
++		pr_warning("wrong root branch\n");
++		iput(inode);
++		inode = NULL;
++		goto out;
++	}
++
++	ii_write_lock_child(inode);
++
++out:
++	return inode;
++}
++
++static void au_hn_bh(void *_args)
++{
++	struct au_hnotify_args *a = _args;
++	struct super_block *sb;
++	aufs_bindex_t bindex, bend, bfound;
++	unsigned char xino, try_iput;
++	int err;
++	struct inode *inode;
++	ino_t h_ino;
++	struct hn_job_args args;
++	struct dentry *dentry;
++	struct au_sbinfo *sbinfo;
++
++	AuDebugOn(!_args);
++	AuDebugOn(!a->h_dir);
++	AuDebugOn(!a->dir);
++	AuDebugOn(!a->mask);
++	AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
++	      a->mask, a->dir->i_ino, a->h_dir->i_ino,
++	      a->h_child_inode ? a->h_child_inode->i_ino : 0);
++
++	inode = NULL;
++	dentry = NULL;
++	/*
++	 * do not lock a->dir->i_mutex here
++	 * because of d_revalidate() may cause a deadlock.
++	 */
++	sb = a->dir->i_sb;
++	AuDebugOn(!sb);
++	sbinfo = au_sbi(sb);
++	AuDebugOn(!sbinfo);
++	si_write_lock(sb, AuLock_NOPLMW);
++
++	ii_read_lock_parent(a->dir);
++	bfound = -1;
++	bend = au_ibend(a->dir);
++	for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
++		if (au_h_iptr(a->dir, bindex) == a->h_dir) {
++			bfound = bindex;
++			break;
++		}
++	ii_read_unlock(a->dir);
++	if (unlikely(bfound < 0))
++		goto out;
++
++	xino = !!au_opt_test(au_mntflags(sb), XINO);
++	h_ino = 0;
++	if (a->h_child_inode)
++		h_ino = a->h_child_inode->i_ino;
++
++	if (a->h_child_nlen
++	    && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
++		|| au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
++		dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
++					      a->dir);
++	try_iput = 0;
++	if (dentry)
++		inode = dentry->d_inode;
++	if (xino && !inode && h_ino
++	    && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
++		|| au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
++		|| au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
++		inode = lookup_wlock_by_ino(sb, bfound, h_ino);
++		try_iput = 1;
++	    }
++
++	args.flags = a->flags[AuHn_CHILD];
++	args.dentry = dentry;
++	args.inode = inode;
++	args.h_inode = a->h_child_inode;
++	args.dir = a->dir;
++	args.h_dir = a->h_dir;
++	args.h_name = a->h_child_name;
++	args.h_nlen = a->h_child_nlen;
++	err = hn_job(&args);
++	if (dentry) {
++		if (au_di(dentry))
++			di_write_unlock(dentry);
++		dput(dentry);
++	}
++	if (inode && try_iput) {
++		ii_write_unlock(inode);
++		iput(inode);
++	}
++
++	ii_write_lock_parent(a->dir);
++	args.flags = a->flags[AuHn_PARENT];
++	args.dentry = NULL;
++	args.inode = a->dir;
++	args.h_inode = a->h_dir;
++	args.dir = NULL;
++	args.h_dir = NULL;
++	args.h_name = NULL;
++	args.h_nlen = 0;
++	err = hn_job(&args);
++	ii_write_unlock(a->dir);
++
++out:
++	iput(a->h_child_inode);
++	iput(a->h_dir);
++	iput(a->dir);
++	si_write_unlock(sb);
++	au_nwt_done(&sbinfo->si_nowait);
++	kfree(a);
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
++	       struct qstr *h_child_qstr, struct inode *h_child_inode)
++{
++	int err, len;
++	unsigned int flags[AuHnLast], f;
++	unsigned char isdir, isroot, wh;
++	struct inode *dir;
++	struct au_hnotify_args *args;
++	char *p, *h_child_name;
++
++	err = 0;
++	AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
++	dir = igrab(hnotify->hn_aufs_inode);
++	if (!dir)
++		goto out;
++
++	isroot = (dir->i_ino == AUFS_ROOT_INO);
++	wh = 0;
++	h_child_name = (void *)h_child_qstr->name;
++	len = h_child_qstr->len;
++	if (h_child_name) {
++		if (len > AUFS_WH_PFX_LEN
++		    && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
++			h_child_name += AUFS_WH_PFX_LEN;
++			len -= AUFS_WH_PFX_LEN;
++			wh = 1;
++		}
++	}
++
++	isdir = 0;
++	if (h_child_inode)
++		isdir = !!S_ISDIR(h_child_inode->i_mode);
++	flags[AuHn_PARENT] = AuHnJob_ISDIR;
++	flags[AuHn_CHILD] = 0;
++	if (isdir)
++		flags[AuHn_CHILD] = AuHnJob_ISDIR;
++	au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
++	au_fset_hnjob(flags[AuHn_CHILD], GEN);
++	switch (mask & FS_EVENTS_POSS_ON_CHILD) {
++	case FS_MOVED_FROM:
++	case FS_MOVED_TO:
++		au_fset_hnjob(flags[AuHn_CHILD], XINO0);
++		au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
++		/*FALLTHROUGH*/
++	case FS_CREATE:
++		AuDebugOn(!h_child_name || !h_child_inode);
++		break;
++
++	case FS_DELETE:
++		/*
++		 * aufs never be able to get this child inode.
++		 * revalidation should be in d_revalidate()
++		 * by checking i_nlink, i_generation or d_unhashed().
++		 */
++		AuDebugOn(!h_child_name);
++		au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
++		au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
++		break;
++
++	default:
++		AuDebugOn(1);
++	}
++
++	if (wh)
++		h_child_inode = NULL;
++
++	err = -ENOMEM;
++	/* iput() and kfree() will be called in au_hnotify() */
++	args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
++	if (unlikely(!args)) {
++		AuErr1("no memory\n");
++		iput(dir);
++		goto out;
++	}
++	args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
++	args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
++	args->mask = mask;
++	args->dir = dir;
++	args->h_dir = igrab(h_dir);
++	if (h_child_inode)
++		h_child_inode = igrab(h_child_inode); /* can be NULL */
++	args->h_child_inode = h_child_inode;
++	args->h_child_nlen = len;
++	if (len) {
++		p = (void *)args;
++		p += sizeof(*args);
++		memcpy(p, h_child_name, len);
++		p[len] = 0;
++	}
++
++	f = 0;
++	if (!dir->i_nlink)
++		f = AuWkq_NEST;
++	err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
++	if (unlikely(err)) {
++		pr_err("wkq %d\n", err);
++		iput(args->h_child_inode);
++		iput(args->h_dir);
++		iput(args->dir);
++		kfree(args);
++	}
++
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
++{
++	int err;
++
++	AuDebugOn(!(udba & AuOptMask_UDBA));
++
++	err = 0;
++	if (au_hnotify_op.reset_br)
++		err = au_hnotify_op.reset_br(udba, br, perm);
++
++	return err;
++}
++
++int au_hnotify_init_br(struct au_branch *br, int perm)
++{
++	int err;
++
++	err = 0;
++	if (au_hnotify_op.init_br)
++		err = au_hnotify_op.init_br(br, perm);
++
++	return err;
++}
++
++void au_hnotify_fin_br(struct au_branch *br)
++{
++	if (au_hnotify_op.fin_br)
++		au_hnotify_op.fin_br(br);
++}
++
++static void au_hn_destroy_cache(void)
++{
++	kmem_cache_destroy(au_cachep[AuCache_HNOTIFY]);
++	au_cachep[AuCache_HNOTIFY] = NULL;
++}
++
++int __init au_hnotify_init(void)
++{
++	int err;
++
++	err = -ENOMEM;
++	au_cachep[AuCache_HNOTIFY] = AuCache(au_hnotify);
++	if (au_cachep[AuCache_HNOTIFY]) {
++		err = 0;
++		if (au_hnotify_op.init)
++			err = au_hnotify_op.init();
++		if (unlikely(err))
++			au_hn_destroy_cache();
++	}
++	AuTraceErr(err);
++	return err;
++}
++
++void au_hnotify_fin(void)
++{
++	if (au_hnotify_op.fin)
++		au_hnotify_op.fin();
++	/* cf. au_cache_fin() */
++	if (au_cachep[AuCache_HNOTIFY])
++		au_hn_destroy_cache();
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/iinfo.c linux-3.2.0-gentoo-r1/fs/aufs/iinfo.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/iinfo.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/iinfo.c	2012-01-17 12:11:24.780212183 +0100
+@@ -0,0 +1,264 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * inode private data
++ */
++
++#include "aufs.h"
++
++struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
++{
++	struct inode *h_inode;
++
++	IiMustAnyLock(inode);
++
++	h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
++	AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
++	return h_inode;
++}
++
++/* todo: hard/soft set? */
++void au_hiput(struct au_hinode *hinode)
++{
++	au_hn_free(hinode);
++	dput(hinode->hi_whdentry);
++	iput(hinode->hi_inode);
++}
++
++unsigned int au_hi_flags(struct inode *inode, int isdir)
++{
++	unsigned int flags;
++	const unsigned int mnt_flags = au_mntflags(inode->i_sb);
++
++	flags = 0;
++	if (au_opt_test(mnt_flags, XINO))
++		au_fset_hi(flags, XINO);
++	if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
++		au_fset_hi(flags, HNOTIFY);
++	return flags;
++}
++
++void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
++		   struct inode *h_inode, unsigned int flags)
++{
++	struct au_hinode *hinode;
++	struct inode *hi;
++	struct au_iinfo *iinfo = au_ii(inode);
++
++	IiMustWriteLock(inode);
++
++	hinode = iinfo->ii_hinode + bindex;
++	hi = hinode->hi_inode;
++	AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
++
++	if (hi)
++		au_hiput(hinode);
++	hinode->hi_inode = h_inode;
++	if (h_inode) {
++		int err;
++		struct super_block *sb = inode->i_sb;
++		struct au_branch *br;
++
++		AuDebugOn(inode->i_mode
++			  && (h_inode->i_mode & S_IFMT)
++			  != (inode->i_mode & S_IFMT));
++		if (bindex == iinfo->ii_bstart)
++			au_cpup_igen(inode, h_inode);
++		br = au_sbr(sb, bindex);
++		hinode->hi_id = br->br_id;
++		if (au_ftest_hi(flags, XINO)) {
++			err = au_xino_write(sb, bindex, h_inode->i_ino,
++					    inode->i_ino);
++			if (unlikely(err))
++				AuIOErr1("failed au_xino_write() %d\n", err);
++		}
++
++		if (au_ftest_hi(flags, HNOTIFY)
++		    && au_br_hnotifyable(br->br_perm)) {
++			err = au_hn_alloc(hinode, inode);
++			if (unlikely(err))
++				AuIOErr1("au_hn_alloc() %d\n", err);
++		}
++	}
++}
++
++void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
++		  struct dentry *h_wh)
++{
++	struct au_hinode *hinode;
++
++	IiMustWriteLock(inode);
++
++	hinode = au_ii(inode)->ii_hinode + bindex;
++	AuDebugOn(hinode->hi_whdentry);
++	hinode->hi_whdentry = h_wh;
++}
++
++void au_update_iigen(struct inode *inode)
++{
++	atomic_set(&au_ii(inode)->ii_generation, au_sigen(inode->i_sb));
++	/* smp_mb(); */ /* atomic_set */
++}
++
++/* it may be called at remount time, too */
++void au_update_ibrange(struct inode *inode, int do_put_zero)
++{
++	struct au_iinfo *iinfo;
++	aufs_bindex_t bindex, bend;
++
++	iinfo = au_ii(inode);
++	if (!iinfo)
++		return;
++
++	IiMustWriteLock(inode);
++
++	if (do_put_zero && iinfo->ii_bstart >= 0) {
++		for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
++		     bindex++) {
++			struct inode *h_i;
++
++			h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
++			if (h_i && !h_i->i_nlink)
++				au_set_h_iptr(inode, bindex, NULL, 0);
++		}
++	}
++
++	iinfo->ii_bstart = -1;
++	iinfo->ii_bend = -1;
++	bend = au_sbend(inode->i_sb);
++	for (bindex = 0; bindex <= bend; bindex++)
++		if (iinfo->ii_hinode[0 + bindex].hi_inode) {
++			iinfo->ii_bstart = bindex;
++			break;
++		}
++	if (iinfo->ii_bstart >= 0)
++		for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--)
++			if (iinfo->ii_hinode[0 + bindex].hi_inode) {
++				iinfo->ii_bend = bindex;
++				break;
++			}
++	AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend);
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_icntnr_init_once(void *_c)
++{
++	struct au_icntnr *c = _c;
++	struct au_iinfo *iinfo = &c->iinfo;
++	static struct lock_class_key aufs_ii;
++
++	au_rw_init(&iinfo->ii_rwsem);
++	au_rw_class(&iinfo->ii_rwsem, &aufs_ii);
++	inode_init_once(&c->vfs_inode);
++}
++
++int au_iinfo_init(struct inode *inode)
++{
++	struct au_iinfo *iinfo;
++	struct super_block *sb;
++	int nbr, i;
++
++	sb = inode->i_sb;
++	iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
++	nbr = au_sbend(sb) + 1;
++	if (unlikely(nbr <= 0))
++		nbr = 1;
++	iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
++	if (iinfo->ii_hinode) {
++		au_ninodes_inc(sb);
++		for (i = 0; i < nbr; i++)
++			iinfo->ii_hinode[i].hi_id = -1;
++
++		atomic_set(&iinfo->ii_generation, au_sigen(sb));
++		/* smp_mb(); */ /* atomic_set */
++		iinfo->ii_bstart = -1;
++		iinfo->ii_bend = -1;
++		iinfo->ii_vdir = NULL;
++		return 0;
++	}
++	return -ENOMEM;
++}
++
++int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
++{
++	int err, sz;
++	struct au_hinode *hip;
++
++	AuRwMustWriteLock(&iinfo->ii_rwsem);
++
++	err = -ENOMEM;
++	sz = sizeof(*hip) * (iinfo->ii_bend + 1);
++	if (!sz)
++		sz = sizeof(*hip);
++	hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
++	if (hip) {
++		iinfo->ii_hinode = hip;
++		err = 0;
++	}
++
++	return err;
++}
++
++void au_iinfo_fin(struct inode *inode)
++{
++	struct au_iinfo *iinfo;
++	struct au_hinode *hi;
++	struct super_block *sb;
++	aufs_bindex_t bindex, bend;
++	const unsigned char unlinked = !inode->i_nlink;
++
++	iinfo = au_ii(inode);
++	/* bad_inode case */
++	if (!iinfo)
++		return;
++
++	sb = inode->i_sb;
++	au_ninodes_dec(sb);
++	if (si_pid_test(sb))
++		au_xino_delete_inode(inode, unlinked);
++	else {
++		/*
++		 * it is safe to hide the dependency between sbinfo and
++		 * sb->s_umount.
++		 */
++		lockdep_off();
++		si_noflush_read_lock(sb);
++		au_xino_delete_inode(inode, unlinked);
++		si_read_unlock(sb);
++		lockdep_on();
++	}
++
++	if (iinfo->ii_vdir)
++		au_vdir_free(iinfo->ii_vdir);
++
++	bindex = iinfo->ii_bstart;
++	if (bindex >= 0) {
++		hi = iinfo->ii_hinode + bindex;
++		bend = iinfo->ii_bend;
++		while (bindex++ <= bend) {
++			if (hi->hi_inode)
++				au_hiput(hi);
++			hi++;
++		}
++	}
++	kfree(iinfo->ii_hinode);
++	iinfo->ii_hinode = NULL;
++	AuRwDestroy(&iinfo->ii_rwsem);
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/inode.c linux-3.2.0-gentoo-r1/fs/aufs/inode.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/inode.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/inode.c	2012-01-17 12:11:24.791786361 +0100
+@@ -0,0 +1,471 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * inode functions
++ */
++
++#include "aufs.h"
++
++struct inode *au_igrab(struct inode *inode)
++{
++	if (inode) {
++		AuDebugOn(!atomic_read(&inode->i_count));
++		ihold(inode);
++	}
++	return inode;
++}
++
++static void au_refresh_hinode_attr(struct inode *inode, int do_version)
++{
++	au_cpup_attr_all(inode, /*force*/0);
++	au_update_iigen(inode);
++	if (do_version)
++		inode->i_version++;
++}
++
++static int au_ii_refresh(struct inode *inode, int *update)
++{
++	int err, e;
++	umode_t type;
++	aufs_bindex_t bindex, new_bindex;
++	struct super_block *sb;
++	struct au_iinfo *iinfo;
++	struct au_hinode *p, *q, tmp;
++
++	IiMustWriteLock(inode);
++
++	*update = 0;
++	sb = inode->i_sb;
++	type = inode->i_mode & S_IFMT;
++	iinfo = au_ii(inode);
++	err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
++	if (unlikely(err))
++		goto out;
++
++	AuDebugOn(iinfo->ii_bstart < 0);
++	p = iinfo->ii_hinode + iinfo->ii_bstart;
++	for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
++	     bindex++, p++) {
++		if (!p->hi_inode)
++			continue;
++
++		AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
++		new_bindex = au_br_index(sb, p->hi_id);
++		if (new_bindex == bindex)
++			continue;
++
++		if (new_bindex < 0) {
++			*update = 1;
++			au_hiput(p);
++			p->hi_inode = NULL;
++			continue;
++		}
++
++		if (new_bindex < iinfo->ii_bstart)
++			iinfo->ii_bstart = new_bindex;
++		if (iinfo->ii_bend < new_bindex)
++			iinfo->ii_bend = new_bindex;
++		/* swap two lower inode, and loop again */
++		q = iinfo->ii_hinode + new_bindex;
++		tmp = *q;
++		*q = *p;
++		*p = tmp;
++		if (tmp.hi_inode) {
++			bindex--;
++			p--;
++		}
++	}
++	au_update_ibrange(inode, /*do_put_zero*/0);
++	e = au_dy_irefresh(inode);
++	if (unlikely(e && !err))
++		err = e;
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++int au_refresh_hinode_self(struct inode *inode)
++{
++	int err, update;
++
++	err = au_ii_refresh(inode, &update);
++	if (!err)
++		au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
++
++	AuTraceErr(err);
++	return err;
++}
++
++int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
++{
++	int err, e, update;
++	unsigned int flags;
++	umode_t mode;
++	aufs_bindex_t bindex, bend;
++	unsigned char isdir;
++	struct au_hinode *p;
++	struct au_iinfo *iinfo;
++
++	err = au_ii_refresh(inode, &update);
++	if (unlikely(err))
++		goto out;
++
++	update = 0;
++	iinfo = au_ii(inode);
++	p = iinfo->ii_hinode + iinfo->ii_bstart;
++	mode = (inode->i_mode & S_IFMT);
++	isdir = S_ISDIR(mode);
++	flags = au_hi_flags(inode, isdir);
++	bend = au_dbend(dentry);
++	for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
++		struct inode *h_i;
++		struct dentry *h_d;
++
++		h_d = au_h_dptr(dentry, bindex);
++		if (!h_d || !h_d->d_inode)
++			continue;
++
++		AuDebugOn(mode != (h_d->d_inode->i_mode & S_IFMT));
++		if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
++			h_i = au_h_iptr(inode, bindex);
++			if (h_i) {
++				if (h_i == h_d->d_inode)
++					continue;
++				err = -EIO;
++				break;
++			}
++		}
++		if (bindex < iinfo->ii_bstart)
++			iinfo->ii_bstart = bindex;
++		if (iinfo->ii_bend < bindex)
++			iinfo->ii_bend = bindex;
++		au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags);
++		update = 1;
++	}
++	au_update_ibrange(inode, /*do_put_zero*/0);
++	e = au_dy_irefresh(inode);
++	if (unlikely(e && !err))
++		err = e;
++	if (!err)
++		au_refresh_hinode_attr(inode, update && isdir);
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++static int set_inode(struct inode *inode, struct dentry *dentry)
++{
++	int err;
++	unsigned int flags;
++	umode_t mode;
++	aufs_bindex_t bindex, bstart, btail;
++	unsigned char isdir;
++	struct dentry *h_dentry;
++	struct inode *h_inode;
++	struct au_iinfo *iinfo;
++
++	IiMustWriteLock(inode);
++
++	err = 0;
++	isdir = 0;
++	bstart = au_dbstart(dentry);
++	h_inode = au_h_dptr(dentry, bstart)->d_inode;
++	mode = h_inode->i_mode;
++	switch (mode & S_IFMT) {
++	case S_IFREG:
++		btail = au_dbtail(dentry);
++		inode->i_op = &aufs_iop;
++		inode->i_fop = &aufs_file_fop;
++		err = au_dy_iaop(inode, bstart, h_inode);
++		if (unlikely(err))
++			goto out;
++		break;
++	case S_IFDIR:
++		isdir = 1;
++		btail = au_dbtaildir(dentry);
++		inode->i_op = &aufs_dir_iop;
++		inode->i_fop = &aufs_dir_fop;
++		break;
++	case S_IFLNK:
++		btail = au_dbtail(dentry);
++		inode->i_op = &aufs_symlink_iop;
++		break;
++	case S_IFBLK:
++	case S_IFCHR:
++	case S_IFIFO:
++	case S_IFSOCK:
++		btail = au_dbtail(dentry);
++		inode->i_op = &aufs_iop;
++		au_init_special_fop(inode, mode, h_inode->i_rdev);
++		break;
++	default:
++		AuIOErr("Unknown file type 0%o\n", mode);
++		err = -EIO;
++		goto out;
++	}
++
++	/* do not set hnotify for whiteouted dirs (SHWH mode) */
++	flags = au_hi_flags(inode, isdir);
++	if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
++	    && au_ftest_hi(flags, HNOTIFY)
++	    && dentry->d_name.len > AUFS_WH_PFX_LEN
++	    && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
++		au_fclr_hi(flags, HNOTIFY);
++	iinfo = au_ii(inode);
++	iinfo->ii_bstart = bstart;
++	iinfo->ii_bend = btail;
++	for (bindex = bstart; bindex <= btail; bindex++) {
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (h_dentry)
++			au_set_h_iptr(inode, bindex,
++				      au_igrab(h_dentry->d_inode), flags);
++	}
++	au_cpup_attr_all(inode, /*force*/1);
++
++out:
++	return err;
++}
++
++/*
++ * successful returns with iinfo write_locked
++ * minus: errno
++ * zero: success, matched
++ * plus: no error, but unmatched
++ */
++static int reval_inode(struct inode *inode, struct dentry *dentry)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	struct inode *h_inode, *h_dinode;
++
++	/*
++	 * before this function, if aufs got any iinfo lock, it must be only
++	 * one, the parent dir.
++	 * it can happen by UDBA and the obsoleted inode number.
++	 */
++	err = -EIO;
++	if (unlikely(inode->i_ino == parent_ino(dentry)))
++		goto out;
++
++	err = 1;
++	ii_write_lock_new_child(inode);
++	h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode;
++	bend = au_ibend(inode);
++	for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
++		h_inode = au_h_iptr(inode, bindex);
++		if (h_inode && h_inode == h_dinode) {
++			err = 0;
++			if (au_iigen_test(inode, au_digen(dentry)))
++				err = au_refresh_hinode(inode, dentry);
++			break;
++		}
++	}
++
++	if (unlikely(err))
++		ii_write_unlock(inode);
++out:
++	return err;
++}
++
++int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++	   unsigned int d_type, ino_t *ino)
++{
++	int err;
++	struct mutex *mtx;
++
++	/* prevent hardlinked inode number from race condition */
++	mtx = NULL;
++	if (d_type != DT_DIR) {
++		mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
++		mutex_lock(mtx);
++	}
++	err = au_xino_read(sb, bindex, h_ino, ino);
++	if (unlikely(err))
++		goto out;
++
++	if (!*ino) {
++		err = -EIO;
++		*ino = au_xino_new_ino(sb);
++		if (unlikely(!*ino))
++			goto out;
++		err = au_xino_write(sb, bindex, h_ino, *ino);
++		if (unlikely(err))
++			goto out;
++	}
++
++out:
++	if (mtx)
++		mutex_unlock(mtx);
++	return err;
++}
++
++/* successful returns with iinfo write_locked */
++/* todo: return with unlocked? */
++struct inode *au_new_inode(struct dentry *dentry, int must_new)
++{
++	struct inode *inode, *h_inode;
++	struct dentry *h_dentry;
++	struct super_block *sb;
++	struct mutex *mtx;
++	ino_t h_ino, ino;
++	int err;
++	aufs_bindex_t bstart;
++
++	sb = dentry->d_sb;
++	bstart = au_dbstart(dentry);
++	h_dentry = au_h_dptr(dentry, bstart);
++	h_inode = h_dentry->d_inode;
++	h_ino = h_inode->i_ino;
++
++	/*
++	 * stop 'race'-ing between hardlinks under different
++	 * parents.
++	 */
++	mtx = NULL;
++	if (!S_ISDIR(h_inode->i_mode))
++		mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
++
++new_ino:
++	if (mtx)
++		mutex_lock(mtx);
++	err = au_xino_read(sb, bstart, h_ino, &ino);
++	inode = ERR_PTR(err);
++	if (unlikely(err))
++		goto out;
++
++	if (!ino) {
++		ino = au_xino_new_ino(sb);
++		if (unlikely(!ino)) {
++			inode = ERR_PTR(-EIO);
++			goto out;
++		}
++	}
++
++	AuDbg("i%lu\n", (unsigned long)ino);
++	inode = au_iget_locked(sb, ino);
++	err = PTR_ERR(inode);
++	if (IS_ERR(inode))
++		goto out;
++
++	AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
++	if (inode->i_state & I_NEW) {
++		ii_write_lock_new_child(inode);
++		err = set_inode(inode, dentry);
++		if (!err) {
++			unlock_new_inode(inode);
++			goto out; /* success */
++		}
++
++		/*
++		 * iget_failed() calls iput(), but we need to call
++		 * ii_write_unlock() after iget_failed(). so dirty hack for
++		 * i_count.
++		 */
++		atomic_inc(&inode->i_count);
++		iget_failed(inode);
++		ii_write_unlock(inode);
++		au_xino_write(sb, bstart, h_ino, /*ino*/0);
++		/* ignore this error */
++		goto out_iput;
++	} else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
++		/*
++		 * horrible race condition between lookup, readdir and copyup
++		 * (or something).
++		 */
++		if (mtx)
++			mutex_unlock(mtx);
++		err = reval_inode(inode, dentry);
++		if (unlikely(err < 0)) {
++			mtx = NULL;
++			goto out_iput;
++		}
++
++		if (!err) {
++			mtx = NULL;
++			goto out; /* success */
++		} else if (mtx)
++			mutex_lock(mtx);
++	}
++
++	if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode)))
++		AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
++			" b%d, %s, %.*s, hi%lu, i%lu.\n",
++			bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry),
++			(unsigned long)h_ino, (unsigned long)ino);
++	ino = 0;
++	err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
++	if (!err) {
++		iput(inode);
++		if (mtx)
++			mutex_unlock(mtx);
++		goto new_ino;
++	}
++
++out_iput:
++	iput(inode);
++	inode = ERR_PTR(err);
++out:
++	if (mtx)
++		mutex_unlock(mtx);
++	return inode;
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
++	       struct inode *inode)
++{
++	int err;
++
++	err = au_br_rdonly(au_sbr(sb, bindex));
++
++	/* pseudo-link after flushed may happen out of bounds */
++	if (!err
++	    && inode
++	    && au_ibstart(inode) <= bindex
++	    && bindex <= au_ibend(inode)) {
++		/*
++		 * permission check is unnecessary since vfsub routine
++		 * will be called later
++		 */
++		struct inode *hi = au_h_iptr(inode, bindex);
++		if (hi)
++			err = IS_IMMUTABLE(hi) ? -EROFS : 0;
++	}
++
++	return err;
++}
++
++int au_test_h_perm(struct inode *h_inode, int mask)
++{
++	if (!current_fsuid())
++		return 0;
++	return inode_permission(h_inode, mask);
++}
++
++int au_test_h_perm_sio(struct inode *h_inode, int mask)
++{
++	if (au_test_nfs(h_inode->i_sb)
++	    && (mask & MAY_WRITE)
++	    && S_ISDIR(h_inode->i_mode))
++		mask |= MAY_READ; /* force permission check */
++	return au_test_h_perm(h_inode, mask);
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/inode.h linux-3.2.0-gentoo-r1/fs/aufs/inode.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/inode.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/inode.h	2012-01-17 12:11:24.803360540 +0100
+@@ -0,0 +1,554 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * inode operations
++ */
++
++#ifndef __AUFS_INODE_H__
++#define __AUFS_INODE_H__
++
++#ifdef __KERNEL__
++
++#include <linux/fsnotify.h>
++#include "rwsem.h"
++
++struct vfsmount;
++
++struct au_hnotify {
++#ifdef CONFIG_AUFS_HNOTIFY
++#ifdef CONFIG_AUFS_HFSNOTIFY
++	/* never use fsnotify_add_vfsmount_mark() */
++	struct fsnotify_mark		hn_mark;
++	int				hn_mark_dead;
++#endif
++	struct inode			*hn_aufs_inode;	/* no get/put */
++#endif
++} ____cacheline_aligned_in_smp;
++
++struct au_hinode {
++	struct inode		*hi_inode;
++	aufs_bindex_t		hi_id;
++#ifdef CONFIG_AUFS_HNOTIFY
++	struct au_hnotify	*hi_notify;
++#endif
++
++	/* reference to the copied-up whiteout with get/put */
++	struct dentry		*hi_whdentry;
++};
++
++struct au_vdir;
++struct au_iinfo {
++	atomic_t		ii_generation;
++	struct super_block	*ii_hsb1;	/* no get/put */
++
++	struct au_rwsem		ii_rwsem;
++	aufs_bindex_t		ii_bstart, ii_bend;
++	__u32			ii_higen;
++	struct au_hinode	*ii_hinode;
++	struct au_vdir		*ii_vdir;
++};
++
++struct au_icntnr {
++	struct au_iinfo iinfo;
++	struct inode vfs_inode;
++} ____cacheline_aligned_in_smp;
++
++/* au_pin flags */
++#define AuPin_DI_LOCKED		1
++#define AuPin_MNT_WRITE		(1 << 1)
++#define au_ftest_pin(flags, name)	((flags) & AuPin_##name)
++#define au_fset_pin(flags, name) \
++	do { (flags) |= AuPin_##name; } while (0)
++#define au_fclr_pin(flags, name) \
++	do { (flags) &= ~AuPin_##name; } while (0)
++
++struct au_pin {
++	/* input */
++	struct dentry *dentry;
++	unsigned int udba;
++	unsigned char lsc_di, lsc_hi, flags;
++	aufs_bindex_t bindex;
++
++	/* output */
++	struct dentry *parent;
++	struct au_hinode *hdir;
++	struct vfsmount *h_mnt;
++};
++
++/* ---------------------------------------------------------------------- */
++
++static inline struct au_iinfo *au_ii(struct inode *inode)
++{
++	struct au_iinfo *iinfo;
++
++	iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
++	if (iinfo->ii_hinode)
++		return iinfo;
++	return NULL; /* debugging bad_inode case */
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* inode.c */
++struct inode *au_igrab(struct inode *inode);
++int au_refresh_hinode_self(struct inode *inode);
++int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
++int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++	   unsigned int d_type, ino_t *ino);
++struct inode *au_new_inode(struct dentry *dentry, int must_new);
++int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
++	       struct inode *inode);
++int au_test_h_perm(struct inode *h_inode, int mask);
++int au_test_h_perm_sio(struct inode *h_inode, int mask);
++
++static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
++			    ino_t h_ino, unsigned int d_type, ino_t *ino)
++{
++#ifdef CONFIG_AUFS_SHWH
++	return au_ino(sb, bindex, h_ino, d_type, ino);
++#else
++	return 0;
++#endif
++}
++
++/* i_op.c */
++extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop;
++
++/* au_wr_dir flags */
++#define AuWrDir_ADD_ENTRY	1
++#define AuWrDir_ISDIR		(1 << 1)
++#define au_ftest_wrdir(flags, name)	((flags) & AuWrDir_##name)
++#define au_fset_wrdir(flags, name) \
++	do { (flags) |= AuWrDir_##name; } while (0)
++#define au_fclr_wrdir(flags, name) \
++	do { (flags) &= ~AuWrDir_##name; } while (0)
++
++struct au_wr_dir_args {
++	aufs_bindex_t force_btgt;
++	unsigned char flags;
++};
++int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
++	      struct au_wr_dir_args *args);
++
++struct dentry *au_pinned_h_parent(struct au_pin *pin);
++void au_pin_init(struct au_pin *pin, struct dentry *dentry,
++		 aufs_bindex_t bindex, int lsc_di, int lsc_hi,
++		 unsigned int udba, unsigned char flags);
++int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
++	   unsigned int udba, unsigned char flags) __must_check;
++int au_do_pin(struct au_pin *pin) __must_check;
++void au_unpin(struct au_pin *pin);
++
++/* i_op_add.c */
++int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
++	       struct dentry *h_parent, int isdir);
++int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev);
++int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
++int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
++		struct nameidata *nd);
++int aufs_link(struct dentry *src_dentry, struct inode *dir,
++	      struct dentry *dentry);
++int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
++
++/* i_op_del.c */
++int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
++int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
++	       struct dentry *h_parent, int isdir);
++int aufs_unlink(struct inode *dir, struct dentry *dentry);
++int aufs_rmdir(struct inode *dir, struct dentry *dentry);
++
++/* i_op_ren.c */
++int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
++int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
++		struct inode *dir, struct dentry *dentry);
++
++/* iinfo.c */
++struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
++void au_hiput(struct au_hinode *hinode);
++void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
++		  struct dentry *h_wh);
++unsigned int au_hi_flags(struct inode *inode, int isdir);
++
++/* hinode flags */
++#define AuHi_XINO	1
++#define AuHi_HNOTIFY	(1 << 1)
++#define au_ftest_hi(flags, name)	((flags) & AuHi_##name)
++#define au_fset_hi(flags, name) \
++	do { (flags) |= AuHi_##name; } while (0)
++#define au_fclr_hi(flags, name) \
++	do { (flags) &= ~AuHi_##name; } while (0)
++
++#ifndef CONFIG_AUFS_HNOTIFY
++#undef AuHi_HNOTIFY
++#define AuHi_HNOTIFY	0
++#endif
++
++void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
++		   struct inode *h_inode, unsigned int flags);
++
++void au_update_iigen(struct inode *inode);
++void au_update_ibrange(struct inode *inode, int do_put_zero);
++
++void au_icntnr_init_once(void *_c);
++int au_iinfo_init(struct inode *inode);
++void au_iinfo_fin(struct inode *inode);
++int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
++
++#ifdef CONFIG_PROC_FS
++/* plink.c */
++int au_plink_maint(struct super_block *sb, int flags);
++void au_plink_maint_leave(struct au_sbinfo *sbinfo);
++int au_plink_maint_enter(struct super_block *sb);
++#ifdef CONFIG_AUFS_DEBUG
++void au_plink_list(struct super_block *sb);
++#else
++AuStubVoid(au_plink_list, struct super_block *sb)
++#endif
++int au_plink_test(struct inode *inode);
++struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
++void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
++		     struct dentry *h_dentry);
++void au_plink_put(struct super_block *sb, int verbose);
++void au_plink_clean(struct super_block *sb, int verbose);
++void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
++#else
++AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
++AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
++AuStubInt0(au_plink_maint_enter, struct super_block *sb);
++AuStubVoid(au_plink_list, struct super_block *sb);
++AuStubInt0(au_plink_test, struct inode *inode);
++AuStub(struct dentry *, au_plink_lkup, return NULL,
++       struct inode *inode, aufs_bindex_t bindex);
++AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
++	   struct dentry *h_dentry);
++AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
++AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
++AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
++#endif /* CONFIG_PROC_FS */
++
++/* ---------------------------------------------------------------------- */
++
++/* lock subclass for iinfo */
++enum {
++	AuLsc_II_CHILD,		/* child first */
++	AuLsc_II_CHILD2,	/* rename(2), link(2), and cpup at hnotify */
++	AuLsc_II_CHILD3,	/* copyup dirs */
++	AuLsc_II_PARENT,	/* see AuLsc_I_PARENT in vfsub.h */
++	AuLsc_II_PARENT2,
++	AuLsc_II_PARENT3,	/* copyup dirs */
++	AuLsc_II_NEW_CHILD
++};
++
++/*
++ * ii_read_lock_child, ii_write_lock_child,
++ * ii_read_lock_child2, ii_write_lock_child2,
++ * ii_read_lock_child3, ii_write_lock_child3,
++ * ii_read_lock_parent, ii_write_lock_parent,
++ * ii_read_lock_parent2, ii_write_lock_parent2,
++ * ii_read_lock_parent3, ii_write_lock_parent3,
++ * ii_read_lock_new_child, ii_write_lock_new_child,
++ */
++#define AuReadLockFunc(name, lsc) \
++static inline void ii_read_lock_##name(struct inode *i) \
++{ \
++	au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
++}
++
++#define AuWriteLockFunc(name, lsc) \
++static inline void ii_write_lock_##name(struct inode *i) \
++{ \
++	au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
++}
++
++#define AuRWLockFuncs(name, lsc) \
++	AuReadLockFunc(name, lsc) \
++	AuWriteLockFunc(name, lsc)
++
++AuRWLockFuncs(child, CHILD);
++AuRWLockFuncs(child2, CHILD2);
++AuRWLockFuncs(child3, CHILD3);
++AuRWLockFuncs(parent, PARENT);
++AuRWLockFuncs(parent2, PARENT2);
++AuRWLockFuncs(parent3, PARENT3);
++AuRWLockFuncs(new_child, NEW_CHILD);
++
++#undef AuReadLockFunc
++#undef AuWriteLockFunc
++#undef AuRWLockFuncs
++
++/*
++ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
++ */
++AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
++
++#define IiMustNoWaiters(i)	AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
++#define IiMustAnyLock(i)	AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
++#define IiMustWriteLock(i)	AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
++
++/* ---------------------------------------------------------------------- */
++
++static inline void au_icntnr_init(struct au_icntnr *c)
++{
++#ifdef CONFIG_AUFS_DEBUG
++	c->vfs_inode.i_mode = 0;
++#endif
++}
++
++static inline unsigned int au_iigen(struct inode *inode)
++{
++	return atomic_read(&au_ii(inode)->ii_generation);
++}
++
++/* tiny test for inode number */
++/* tmpfs generation is too rough */
++static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
++{
++	struct au_iinfo *iinfo;
++
++	iinfo = au_ii(inode);
++	AuRwMustAnyLock(&iinfo->ii_rwsem);
++	return !(iinfo->ii_hsb1 == h_inode->i_sb
++		 && iinfo->ii_higen == h_inode->i_generation);
++}
++
++static inline void au_iigen_dec(struct inode *inode)
++{
++	atomic_dec(&au_ii(inode)->ii_generation);
++}
++
++static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
++{
++	int err;
++
++	err = 0;
++	if (unlikely(inode && au_iigen(inode) != sigen))
++		err = -EIO;
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
++					aufs_bindex_t bindex)
++{
++	IiMustAnyLock(inode);
++	return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
++}
++
++static inline aufs_bindex_t au_ibstart(struct inode *inode)
++{
++	IiMustAnyLock(inode);
++	return au_ii(inode)->ii_bstart;
++}
++
++static inline aufs_bindex_t au_ibend(struct inode *inode)
++{
++	IiMustAnyLock(inode);
++	return au_ii(inode)->ii_bend;
++}
++
++static inline struct au_vdir *au_ivdir(struct inode *inode)
++{
++	IiMustAnyLock(inode);
++	return au_ii(inode)->ii_vdir;
++}
++
++static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
++{
++	IiMustAnyLock(inode);
++	return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
++}
++
++static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
++{
++	IiMustWriteLock(inode);
++	au_ii(inode)->ii_bstart = bindex;
++}
++
++static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
++{
++	IiMustWriteLock(inode);
++	au_ii(inode)->ii_bend = bindex;
++}
++
++static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
++{
++	IiMustWriteLock(inode);
++	au_ii(inode)->ii_vdir = vdir;
++}
++
++static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
++{
++	IiMustAnyLock(inode);
++	return au_ii(inode)->ii_hinode + bindex;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static inline struct dentry *au_pinned_parent(struct au_pin *pin)
++{
++	if (pin)
++		return pin->parent;
++	return NULL;
++}
++
++static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
++{
++	if (pin && pin->hdir)
++		return pin->hdir->hi_inode;
++	return NULL;
++}
++
++static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
++{
++	if (pin)
++		return pin->hdir;
++	return NULL;
++}
++
++static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
++{
++	if (pin)
++		pin->dentry = dentry;
++}
++
++static inline void au_pin_set_parent_lflag(struct au_pin *pin,
++					   unsigned char lflag)
++{
++	if (pin) {
++		if (lflag)
++			au_fset_pin(pin->flags, DI_LOCKED);
++		else
++			au_fclr_pin(pin->flags, DI_LOCKED);
++	}
++}
++
++static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
++{
++	if (pin) {
++		dput(pin->parent);
++		pin->parent = dget(parent);
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct au_branch;
++#ifdef CONFIG_AUFS_HNOTIFY
++struct au_hnotify_op {
++	void (*ctl)(struct au_hinode *hinode, int do_set);
++	int (*alloc)(struct au_hinode *hinode);
++	void (*free)(struct au_hinode *hinode);
++
++	void (*fin)(void);
++	int (*init)(void);
++
++	int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
++	void (*fin_br)(struct au_branch *br);
++	int (*init_br)(struct au_branch *br, int perm);
++};
++
++/* hnotify.c */
++int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
++void au_hn_free(struct au_hinode *hinode);
++void au_hn_ctl(struct au_hinode *hinode, int do_set);
++void au_hn_reset(struct inode *inode, unsigned int flags);
++int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
++	       struct qstr *h_child_qstr, struct inode *h_child_inode);
++int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
++int au_hnotify_init_br(struct au_branch *br, int perm);
++void au_hnotify_fin_br(struct au_branch *br);
++int __init au_hnotify_init(void);
++void au_hnotify_fin(void);
++
++/* hfsnotify.c */
++extern const struct au_hnotify_op au_hnotify_op;
++
++static inline
++void au_hn_init(struct au_hinode *hinode)
++{
++	hinode->hi_notify = NULL;
++}
++
++static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
++{
++	return hinode->hi_notify;
++}
++
++#else
++static inline
++int au_hn_alloc(struct au_hinode *hinode __maybe_unused,
++		struct inode *inode __maybe_unused)
++{
++	return -EOPNOTSUPP;
++}
++
++static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
++{
++	return NULL;
++}
++
++AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
++AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
++	   int do_set __maybe_unused)
++AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
++	   unsigned int flags __maybe_unused)
++AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
++	   struct au_branch *br __maybe_unused,
++	   int perm __maybe_unused)
++AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
++	   int perm __maybe_unused)
++AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
++AuStubInt0(__init au_hnotify_init, void)
++AuStubVoid(au_hnotify_fin, void)
++AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
++#endif /* CONFIG_AUFS_HNOTIFY */
++
++static inline void au_hn_suspend(struct au_hinode *hdir)
++{
++	au_hn_ctl(hdir, /*do_set*/0);
++}
++
++static inline void au_hn_resume(struct au_hinode *hdir)
++{
++	au_hn_ctl(hdir, /*do_set*/1);
++}
++
++static inline void au_hn_imtx_lock(struct au_hinode *hdir)
++{
++	mutex_lock(&hdir->hi_inode->i_mutex);
++	au_hn_suspend(hdir);
++}
++
++static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir,
++					  unsigned int sc __maybe_unused)
++{
++	mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
++	au_hn_suspend(hdir);
++}
++
++static inline void au_hn_imtx_unlock(struct au_hinode *hdir)
++{
++	au_hn_resume(hdir);
++	mutex_unlock(&hdir->hi_inode->i_mutex);
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_INODE_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/ioctl.c linux-3.2.0-gentoo-r1/fs/aufs/ioctl.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/ioctl.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/ioctl.c	2012-01-17 12:11:24.803360540 +0100
+@@ -0,0 +1,196 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * ioctl
++ * plink-management and readdir in userspace.
++ * assist the pathconf(3) wrapper library.
++ */
++
++#include "aufs.h"
++
++static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
++{
++	int err, fd;
++	aufs_bindex_t wbi, bindex, bend;
++	struct file *h_file;
++	struct super_block *sb;
++	struct dentry *root;
++	struct au_branch *br;
++	struct aufs_wbr_fd wbrfd = {
++		.oflags	= au_dir_roflags,
++		.brid	= -1
++	};
++	const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
++		| O_NOATIME | O_CLOEXEC;
++
++	AuDebugOn(wbrfd.oflags & ~valid);
++
++	if (arg) {
++		err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
++		if (unlikely(err)) {
++			err = -EFAULT;
++			goto out;
++		}
++
++		err = -EINVAL;
++		AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
++		wbrfd.oflags |= au_dir_roflags;
++		AuDbg("0%o\n", wbrfd.oflags);
++		if (unlikely(wbrfd.oflags & ~valid))
++			goto out;
++	}
++
++	fd = get_unused_fd();
++	err = fd;
++	if (unlikely(fd < 0))
++		goto out;
++
++	h_file = ERR_PTR(-EINVAL);
++	wbi = 0;
++	br = NULL;
++	sb = path->dentry->d_sb;
++	root = sb->s_root;
++	aufs_read_lock(root, AuLock_IR);
++	bend = au_sbend(sb);
++	if (wbrfd.brid >= 0) {
++		wbi = au_br_index(sb, wbrfd.brid);
++		if (unlikely(wbi < 0 || wbi > bend))
++			goto out_unlock;
++	}
++
++	h_file = ERR_PTR(-ENOENT);
++	br = au_sbr(sb, wbi);
++	if (!au_br_writable(br->br_perm)) {
++		if (arg)
++			goto out_unlock;
++
++		bindex = wbi + 1;
++		wbi = -1;
++		for (; bindex <= bend; bindex++) {
++			br = au_sbr(sb, bindex);
++			if (au_br_writable(br->br_perm)) {
++				wbi = bindex;
++				br = au_sbr(sb, wbi);
++				break;
++			}
++		}
++	}
++	AuDbg("wbi %d\n", wbi);
++	if (wbi >= 0)
++		h_file = au_h_open(root, wbi, wbrfd.oflags, NULL);
++
++out_unlock:
++	aufs_read_unlock(root, AuLock_IR);
++	err = PTR_ERR(h_file);
++	if (IS_ERR(h_file))
++		goto out_fd;
++
++	atomic_dec(&br->br_count); /* cf. au_h_open() */
++	fd_install(fd, h_file);
++	err = fd;
++	goto out; /* success */
++
++out_fd:
++	put_unused_fd(fd);
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	long err;
++
++	switch (cmd) {
++	case AUFS_CTL_RDU:
++	case AUFS_CTL_RDU_INO:
++		err = au_rdu_ioctl(file, cmd, arg);
++		break;
++
++	case AUFS_CTL_WBR_FD:
++		err = au_wbr_fd(&file->f_path, (void __user *)arg);
++		break;
++
++	case AUFS_CTL_IBUSY:
++		err = au_ibusy_ioctl(file, arg);
++		break;
++
++	default:
++		/* do not call the lower */
++		AuDbg("0x%x\n", cmd);
++		err = -ENOTTY;
++	}
++
++	AuTraceErr(err);
++	return err;
++}
++
++long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	long err;
++
++	switch (cmd) {
++	case AUFS_CTL_WBR_FD:
++		err = au_wbr_fd(&file->f_path, (void __user *)arg);
++		break;
++
++	default:
++		/* do not call the lower */
++		AuDbg("0x%x\n", cmd);
++		err = -ENOTTY;
++	}
++
++	AuTraceErr(err);
++	return err;
++}
++
++#ifdef CONFIG_COMPAT
++long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
++			   unsigned long arg)
++{
++	long err;
++
++	switch (cmd) {
++	case AUFS_CTL_RDU:
++	case AUFS_CTL_RDU_INO:
++		err = au_rdu_compat_ioctl(file, cmd, arg);
++		break;
++
++	case AUFS_CTL_IBUSY:
++		err = au_ibusy_compat_ioctl(file, arg);
++		break;
++
++	default:
++		err = aufs_ioctl_dir(file, cmd, arg);
++	}
++
++	AuTraceErr(err);
++	return err;
++}
++
++#if 0 /* unused yet */
++long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
++			      unsigned long arg)
++{
++	return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
++}
++#endif
++#endif
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/i_op_add.c linux-3.2.0-gentoo-r1/fs/aufs/i_op_add.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/i_op_add.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/i_op_add.c	2012-01-17 12:11:24.757063826 +0100
+@@ -0,0 +1,711 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * inode operations (add entry)
++ */
++
++#include "aufs.h"
++
++/*
++ * final procedure of adding a new entry, except link(2).
++ * remove whiteout, instantiate, copyup the parent dir's times and size
++ * and update version.
++ * if it failed, re-create the removed whiteout.
++ */
++static int epilog(struct inode *dir, aufs_bindex_t bindex,
++		  struct dentry *wh_dentry, struct dentry *dentry)
++{
++	int err, rerr;
++	aufs_bindex_t bwh;
++	struct path h_path;
++	struct inode *inode, *h_dir;
++	struct dentry *wh;
++
++	bwh = -1;
++	if (wh_dentry) {
++		h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
++		IMustLock(h_dir);
++		AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
++		bwh = au_dbwh(dentry);
++		h_path.dentry = wh_dentry;
++		h_path.mnt = au_sbr_mnt(dir->i_sb, bindex);
++		err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
++					  dentry);
++		if (unlikely(err))
++			goto out;
++	}
++
++	inode = au_new_inode(dentry, /*must_new*/1);
++	if (!IS_ERR(inode)) {
++		d_instantiate(dentry, inode);
++		dir = dentry->d_parent->d_inode; /* dir inode is locked */
++		IMustLock(dir);
++		if (au_ibstart(dir) == au_dbstart(dentry))
++			au_cpup_attr_timesizes(dir);
++		dir->i_version++;
++		return 0; /* success */
++	}
++
++	err = PTR_ERR(inode);
++	if (!wh_dentry)
++		goto out;
++
++	/* revert */
++	/* dir inode is locked */
++	wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
++	rerr = PTR_ERR(wh);
++	if (IS_ERR(wh)) {
++		AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
++			AuDLNPair(dentry), err, rerr);
++		err = -EIO;
++	} else
++		dput(wh);
++
++out:
++	return err;
++}
++
++static int au_d_may_add(struct dentry *dentry)
++{
++	int err;
++
++	err = 0;
++	if (unlikely(d_unhashed(dentry)))
++		err = -ENOENT;
++	if (unlikely(dentry->d_inode))
++		err = -EEXIST;
++	return err;
++}
++
++/*
++ * simple tests for the adding inode operations.
++ * following the checks in vfs, plus the parent-child relationship.
++ */
++int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
++	       struct dentry *h_parent, int isdir)
++{
++	int err;
++	umode_t h_mode;
++	struct dentry *h_dentry;
++	struct inode *h_inode;
++
++	err = -ENAMETOOLONG;
++	if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
++		goto out;
++
++	h_dentry = au_h_dptr(dentry, bindex);
++	h_inode = h_dentry->d_inode;
++	if (!dentry->d_inode) {
++		err = -EEXIST;
++		if (unlikely(h_inode))
++			goto out;
++	} else {
++		/* rename(2) case */
++		err = -EIO;
++		if (unlikely(!h_inode || !h_inode->i_nlink))
++			goto out;
++
++		h_mode = h_inode->i_mode;
++		if (!isdir) {
++			err = -EISDIR;
++			if (unlikely(S_ISDIR(h_mode)))
++				goto out;
++		} else if (unlikely(!S_ISDIR(h_mode))) {
++			err = -ENOTDIR;
++			goto out;
++		}
++	}
++
++	err = 0;
++	/* expected parent dir is locked */
++	if (unlikely(h_parent != h_dentry->d_parent))
++		err = -EIO;
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++/*
++ * initial procedure of adding a new entry.
++ * prepare writable branch and the parent dir, lock it,
++ * and lookup whiteout for the new entry.
++ */
++static struct dentry*
++lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
++		  struct dentry *src_dentry, struct au_pin *pin,
++		  struct au_wr_dir_args *wr_dir_args)
++{
++	struct dentry *wh_dentry, *h_parent;
++	struct super_block *sb;
++	struct au_branch *br;
++	int err;
++	unsigned int udba;
++	aufs_bindex_t bcpup;
++
++	AuDbg("%.*s\n", AuDLNPair(dentry));
++
++	err = au_wr_dir(dentry, src_dentry, wr_dir_args);
++	bcpup = err;
++	wh_dentry = ERR_PTR(err);
++	if (unlikely(err < 0))
++		goto out;
++
++	sb = dentry->d_sb;
++	udba = au_opt_udba(sb);
++	err = au_pin(pin, dentry, bcpup, udba,
++		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++	wh_dentry = ERR_PTR(err);
++	if (unlikely(err))
++		goto out;
++
++	h_parent = au_pinned_h_parent(pin);
++	if (udba != AuOpt_UDBA_NONE
++	    && au_dbstart(dentry) == bcpup)
++		err = au_may_add(dentry, bcpup, h_parent,
++				 au_ftest_wrdir(wr_dir_args->flags, ISDIR));
++	else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
++		err = -ENAMETOOLONG;
++	wh_dentry = ERR_PTR(err);
++	if (unlikely(err))
++		goto out_unpin;
++
++	br = au_sbr(sb, bcpup);
++	if (dt) {
++		struct path tmp = {
++			.dentry	= h_parent,
++			.mnt	= br->br_mnt
++		};
++		au_dtime_store(dt, au_pinned_parent(pin), &tmp);
++	}
++
++	wh_dentry = NULL;
++	if (bcpup != au_dbwh(dentry))
++		goto out; /* success */
++
++	wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
++
++out_unpin:
++	if (IS_ERR(wh_dentry))
++		au_unpin(pin);
++out:
++	return wh_dentry;
++}
++
++/* ---------------------------------------------------------------------- */
++
++enum { Mknod, Symlink, Creat };
++struct simple_arg {
++	int type;
++	union {
++		struct {
++			int mode;
++			struct nameidata *nd;
++		} c;
++		struct {
++			const char *symname;
++		} s;
++		struct {
++			int mode;
++			dev_t dev;
++		} m;
++	} u;
++};
++
++static int add_simple(struct inode *dir, struct dentry *dentry,
++		      struct simple_arg *arg)
++{
++	int err;
++	aufs_bindex_t bstart;
++	unsigned char created;
++	struct au_dtime dt;
++	struct au_pin pin;
++	struct path h_path;
++	struct dentry *wh_dentry, *parent;
++	struct inode *h_dir;
++	struct au_wr_dir_args wr_dir_args = {
++		.force_btgt	= -1,
++		.flags		= AuWrDir_ADD_ENTRY
++	};
++
++	AuDbg("%.*s\n", AuDLNPair(dentry));
++	IMustLock(dir);
++
++	parent = dentry->d_parent; /* dir inode is locked */
++	err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
++	if (unlikely(err))
++		goto out;
++	err = au_d_may_add(dentry);
++	if (unlikely(err))
++		goto out_unlock;
++	di_write_lock_parent(parent);
++	wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin,
++				      &wr_dir_args);
++	err = PTR_ERR(wh_dentry);
++	if (IS_ERR(wh_dentry))
++		goto out_parent;
++
++	bstart = au_dbstart(dentry);
++	h_path.dentry = au_h_dptr(dentry, bstart);
++	h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
++	h_dir = au_pinned_h_dir(&pin);
++	switch (arg->type) {
++	case Creat:
++		err = vfsub_create(h_dir, &h_path, arg->u.c.mode);
++		break;
++	case Symlink:
++		err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname);
++		break;
++	case Mknod:
++		err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev);
++		break;
++	default:
++		BUG();
++	}
++	created = !err;
++	if (!err)
++		err = epilog(dir, bstart, wh_dentry, dentry);
++
++	/* revert */
++	if (unlikely(created && err && h_path.dentry->d_inode)) {
++		int rerr;
++		rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
++		if (rerr) {
++			AuIOErr("%.*s revert failure(%d, %d)\n",
++				AuDLNPair(dentry), err, rerr);
++			err = -EIO;
++		}
++		au_dtime_revert(&dt);
++	}
++
++	au_unpin(&pin);
++	dput(wh_dentry);
++
++out_parent:
++	di_write_unlock(parent);
++out_unlock:
++	if (unlikely(err)) {
++		au_update_dbstart(dentry);
++		d_drop(dentry);
++	}
++	aufs_read_unlock(dentry, AuLock_DW);
++out:
++	return err;
++}
++
++int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
++{
++	struct simple_arg arg = {
++		.type = Mknod,
++		.u.m = {
++			.mode	= mode,
++			.dev	= dev
++		}
++	};
++	return add_simple(dir, dentry, &arg);
++}
++
++int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
++{
++	struct simple_arg arg = {
++		.type = Symlink,
++		.u.s.symname = symname
++	};
++	return add_simple(dir, dentry, &arg);
++}
++
++int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
++		struct nameidata *nd)
++{
++	struct simple_arg arg = {
++		.type = Creat,
++		.u.c = {
++			.mode	= mode,
++			.nd	= nd
++		}
++	};
++	return add_simple(dir, dentry, &arg);
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct au_link_args {
++	aufs_bindex_t bdst, bsrc;
++	struct au_pin pin;
++	struct path h_path;
++	struct dentry *src_parent, *parent;
++};
++
++static int au_cpup_before_link(struct dentry *src_dentry,
++			       struct au_link_args *a)
++{
++	int err;
++	struct dentry *h_src_dentry;
++	struct mutex *h_mtx;
++	struct file *h_file;
++
++	di_read_lock_parent(a->src_parent, AuLock_IR);
++	err = au_test_and_cpup_dirs(src_dentry, a->bdst);
++	if (unlikely(err))
++		goto out;
++
++	h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
++	h_mtx = &h_src_dentry->d_inode->i_mutex;
++	err = au_pin(&a->pin, src_dentry, a->bdst,
++		     au_opt_udba(src_dentry->d_sb),
++		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++	if (unlikely(err))
++		goto out;
++	mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++	h_file = au_h_open_pre(src_dentry, a->bsrc);
++	if (IS_ERR(h_file)) {
++		err = PTR_ERR(h_file);
++		h_file = NULL;
++	} else
++		err = au_sio_cpup_simple(src_dentry, a->bdst, -1,
++					 AuCpup_DTIME /* | AuCpup_KEEPLINO */);
++	mutex_unlock(h_mtx);
++	au_h_open_post(src_dentry, a->bsrc, h_file);
++	au_unpin(&a->pin);
++
++out:
++	di_read_unlock(a->src_parent, AuLock_IR);
++	return err;
++}
++
++static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a)
++{
++	int err;
++	unsigned char plink;
++	struct inode *h_inode, *inode;
++	struct dentry *h_src_dentry;
++	struct super_block *sb;
++	struct file *h_file;
++
++	plink = 0;
++	h_inode = NULL;
++	sb = src_dentry->d_sb;
++	inode = src_dentry->d_inode;
++	if (au_ibstart(inode) <= a->bdst)
++		h_inode = au_h_iptr(inode, a->bdst);
++	if (!h_inode || !h_inode->i_nlink) {
++		/* copyup src_dentry as the name of dentry. */
++		au_set_dbstart(src_dentry, a->bdst);
++		au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry));
++		h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode;
++		mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++		h_file = au_h_open_pre(src_dentry, a->bsrc);
++		if (IS_ERR(h_file)) {
++			err = PTR_ERR(h_file);
++			h_file = NULL;
++		} else
++			err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc,
++						 -1, AuCpup_KEEPLINO,
++						 a->parent);
++		mutex_unlock(&h_inode->i_mutex);
++		au_h_open_post(src_dentry, a->bsrc, h_file);
++		au_set_h_dptr(src_dentry, a->bdst, NULL);
++		au_set_dbstart(src_dentry, a->bsrc);
++	} else {
++		/* the inode of src_dentry already exists on a.bdst branch */
++		h_src_dentry = d_find_alias(h_inode);
++		if (!h_src_dentry && au_plink_test(inode)) {
++			plink = 1;
++			h_src_dentry = au_plink_lkup(inode, a->bdst);
++			err = PTR_ERR(h_src_dentry);
++			if (IS_ERR(h_src_dentry))
++				goto out;
++
++			if (unlikely(!h_src_dentry->d_inode)) {
++				dput(h_src_dentry);
++				h_src_dentry = NULL;
++			}
++
++		}
++		if (h_src_dentry) {
++			err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
++					 &a->h_path);
++			dput(h_src_dentry);
++		} else {
++			AuIOErr("no dentry found for hi%lu on b%d\n",
++				h_inode->i_ino, a->bdst);
++			err = -EIO;
++		}
++	}
++
++	if (!err && !plink)
++		au_plink_append(inode, a->bdst, a->h_path.dentry);
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++int aufs_link(struct dentry *src_dentry, struct inode *dir,
++	      struct dentry *dentry)
++{
++	int err, rerr;
++	struct au_dtime dt;
++	struct au_link_args *a;
++	struct dentry *wh_dentry, *h_src_dentry;
++	struct inode *inode;
++	struct super_block *sb;
++	struct au_wr_dir_args wr_dir_args = {
++		/* .force_btgt	= -1, */
++		.flags		= AuWrDir_ADD_ENTRY
++	};
++
++	IMustLock(dir);
++	inode = src_dentry->d_inode;
++	IMustLock(inode);
++
++	err = -ENOMEM;
++	a = kzalloc(sizeof(*a), GFP_NOFS);
++	if (unlikely(!a))
++		goto out;
++
++	a->parent = dentry->d_parent; /* dir inode is locked */
++	err = aufs_read_and_write_lock2(dentry, src_dentry,
++					AuLock_NOPLM | AuLock_GEN);
++	if (unlikely(err))
++		goto out_kfree;
++	err = au_d_hashed_positive(src_dentry);
++	if (unlikely(err))
++		goto out_unlock;
++	err = au_d_may_add(dentry);
++	if (unlikely(err))
++		goto out_unlock;
++
++	a->src_parent = dget_parent(src_dentry);
++	wr_dir_args.force_btgt = au_ibstart(inode);
++
++	di_write_lock_parent(a->parent);
++	wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
++	wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
++				      &wr_dir_args);
++	err = PTR_ERR(wh_dentry);
++	if (IS_ERR(wh_dentry))
++		goto out_parent;
++
++	err = 0;
++	sb = dentry->d_sb;
++	a->bdst = au_dbstart(dentry);
++	a->h_path.dentry = au_h_dptr(dentry, a->bdst);
++	a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
++	a->bsrc = au_ibstart(inode);
++	h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
++	if (!h_src_dentry) {
++		a->bsrc = au_dbstart(src_dentry);
++		h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
++		AuDebugOn(!h_src_dentry);
++	} else if (IS_ERR(h_src_dentry))
++		goto out_parent;
++
++	if (au_opt_test(au_mntflags(sb), PLINK)) {
++		if (a->bdst < a->bsrc
++		    /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
++			err = au_cpup_or_link(src_dentry, a);
++		else
++			err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
++					 &a->h_path);
++		dput(h_src_dentry);
++	} else {
++		/*
++		 * copyup src_dentry to the branch we process,
++		 * and then link(2) to it.
++		 */
++		dput(h_src_dentry);
++		if (a->bdst < a->bsrc
++		    /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
++			au_unpin(&a->pin);
++			di_write_unlock(a->parent);
++			err = au_cpup_before_link(src_dentry, a);
++			di_write_lock_parent(a->parent);
++			if (!err)
++				err = au_pin(&a->pin, dentry, a->bdst,
++					     au_opt_udba(sb),
++					     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++			if (unlikely(err))
++				goto out_wh;
++		}
++		if (!err) {
++			h_src_dentry = au_h_dptr(src_dentry, a->bdst);
++			err = -ENOENT;
++			if (h_src_dentry && h_src_dentry->d_inode)
++				err = vfsub_link(h_src_dentry,
++						 au_pinned_h_dir(&a->pin),
++						 &a->h_path);
++		}
++	}
++	if (unlikely(err))
++		goto out_unpin;
++
++	if (wh_dentry) {
++		a->h_path.dentry = wh_dentry;
++		err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
++					  dentry);
++		if (unlikely(err))
++			goto out_revert;
++	}
++
++	dir->i_version++;
++	if (au_ibstart(dir) == au_dbstart(dentry))
++		au_cpup_attr_timesizes(dir);
++	inc_nlink(inode);
++	inode->i_ctime = dir->i_ctime;
++	d_instantiate(dentry, au_igrab(inode));
++	if (d_unhashed(a->h_path.dentry))
++		/* some filesystem calls d_drop() */
++		d_drop(dentry);
++	goto out_unpin; /* success */
++
++out_revert:
++	rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0);
++	if (unlikely(rerr)) {
++		AuIOErr("%.*s reverting failed(%d, %d)\n",
++			AuDLNPair(dentry), err, rerr);
++		err = -EIO;
++	}
++	au_dtime_revert(&dt);
++out_unpin:
++	au_unpin(&a->pin);
++out_wh:
++	dput(wh_dentry);
++out_parent:
++	di_write_unlock(a->parent);
++	dput(a->src_parent);
++out_unlock:
++	if (unlikely(err)) {
++		au_update_dbstart(dentry);
++		d_drop(dentry);
++	}
++	aufs_read_and_write_unlock2(dentry, src_dentry);
++out_kfree:
++	kfree(a);
++out:
++	return err;
++}
++
++int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
++{
++	int err, rerr;
++	aufs_bindex_t bindex;
++	unsigned char diropq;
++	struct path h_path;
++	struct dentry *wh_dentry, *parent, *opq_dentry;
++	struct mutex *h_mtx;
++	struct super_block *sb;
++	struct {
++		struct au_pin pin;
++		struct au_dtime dt;
++	} *a; /* reduce the stack usage */
++	struct au_wr_dir_args wr_dir_args = {
++		.force_btgt	= -1,
++		.flags		= AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
++	};
++
++	IMustLock(dir);
++
++	err = -ENOMEM;
++	a = kmalloc(sizeof(*a), GFP_NOFS);
++	if (unlikely(!a))
++		goto out;
++
++	err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
++	if (unlikely(err))
++		goto out_free;
++	err = au_d_may_add(dentry);
++	if (unlikely(err))
++		goto out_unlock;
++
++	parent = dentry->d_parent; /* dir inode is locked */
++	di_write_lock_parent(parent);
++	wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
++				      &a->pin, &wr_dir_args);
++	err = PTR_ERR(wh_dentry);
++	if (IS_ERR(wh_dentry))
++		goto out_parent;
++
++	sb = dentry->d_sb;
++	bindex = au_dbstart(dentry);
++	h_path.dentry = au_h_dptr(dentry, bindex);
++	h_path.mnt = au_sbr_mnt(sb, bindex);
++	err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
++	if (unlikely(err))
++		goto out_unpin;
++
++	/* make the dir opaque */
++	diropq = 0;
++	h_mtx = &h_path.dentry->d_inode->i_mutex;
++	if (wh_dentry
++	    || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
++		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++		opq_dentry = au_diropq_create(dentry, bindex);
++		mutex_unlock(h_mtx);
++		err = PTR_ERR(opq_dentry);
++		if (IS_ERR(opq_dentry))
++			goto out_dir;
++		dput(opq_dentry);
++		diropq = 1;
++	}
++
++	err = epilog(dir, bindex, wh_dentry, dentry);
++	if (!err) {
++		inc_nlink(dir);
++		goto out_unpin; /* success */
++	}
++
++	/* revert */
++	if (diropq) {
++		AuLabel(revert opq);
++		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++		rerr = au_diropq_remove(dentry, bindex);
++		mutex_unlock(h_mtx);
++		if (rerr) {
++			AuIOErr("%.*s reverting diropq failed(%d, %d)\n",
++				AuDLNPair(dentry), err, rerr);
++			err = -EIO;
++		}
++	}
++
++out_dir:
++	AuLabel(revert dir);
++	rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
++	if (rerr) {
++		AuIOErr("%.*s reverting dir failed(%d, %d)\n",
++			AuDLNPair(dentry), err, rerr);
++		err = -EIO;
++	}
++	au_dtime_revert(&a->dt);
++out_unpin:
++	au_unpin(&a->pin);
++	dput(wh_dentry);
++out_parent:
++	di_write_unlock(parent);
++out_unlock:
++	if (unlikely(err)) {
++		au_update_dbstart(dentry);
++		d_drop(dentry);
++	}
++	aufs_read_unlock(dentry, AuLock_DW);
++out_free:
++	kfree(a);
++out:
++	return err;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/i_op.c linux-3.2.0-gentoo-r1/fs/aufs/i_op.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/i_op.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/i_op.c	2012-01-17 12:11:24.736230305 +0100
+@@ -0,0 +1,992 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * inode operations (except add/del/rename)
++ */
++
++#include <linux/device_cgroup.h>
++#include <linux/fs_stack.h>
++#include <linux/namei.h>
++#include <linux/security.h>
++#include "aufs.h"
++
++static int h_permission(struct inode *h_inode, int mask,
++			struct vfsmount *h_mnt, int brperm)
++{
++	int err;
++	const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
++
++	err = -EACCES;
++	if ((write_mask && IS_IMMUTABLE(h_inode))
++	    || ((mask & MAY_EXEC)
++		&& S_ISREG(h_inode->i_mode)
++		&& ((h_mnt->mnt_flags & MNT_NOEXEC)
++		    || !(h_inode->i_mode & S_IXUGO))))
++		goto out;
++
++	/*
++	 * - skip the lower fs test in the case of write to ro branch.
++	 * - nfs dir permission write check is optimized, but a policy for
++	 *   link/rename requires a real check.
++	 */
++	if ((write_mask && !au_br_writable(brperm))
++	    || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
++		&& write_mask && !(mask & MAY_READ))
++	    || !h_inode->i_op->permission) {
++		/* AuLabel(generic_permission); */
++		err = generic_permission(h_inode, mask);
++	} else {
++		/* AuLabel(h_inode->permission); */
++		err = h_inode->i_op->permission(h_inode, mask);
++		AuTraceErr(err);
++	}
++
++	if (!err)
++		err = devcgroup_inode_permission(h_inode, mask);
++	if (!err)
++		err = security_inode_permission(h_inode, mask);
++
++#if 0
++	if (!err) {
++		/* todo: do we need to call ima_path_check()? */
++		struct path h_path = {
++			.dentry	=
++			.mnt	= h_mnt
++		};
++		err = ima_path_check(&h_path,
++				     mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
++				     IMA_COUNT_LEAVE);
++	}
++#endif
++
++out:
++	return err;
++}
++
++static int aufs_permission(struct inode *inode, int mask)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	const unsigned char isdir = !!S_ISDIR(inode->i_mode),
++		write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
++	struct inode *h_inode;
++	struct super_block *sb;
++	struct au_branch *br;
++
++	/* todo: support rcu-walk? */
++	if (mask & MAY_NOT_BLOCK)
++		return -ECHILD;
++
++	sb = inode->i_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	ii_read_lock_child(inode);
++#if 0
++	err = au_iigen_test(inode, au_sigen(sb));
++	if (unlikely(err))
++		goto out;
++#endif
++
++	if (!isdir || write_mask) {
++		err = au_busy_or_stale();
++		h_inode = au_h_iptr(inode, au_ibstart(inode));
++		if (unlikely(!h_inode
++			     || (h_inode->i_mode & S_IFMT)
++			     != (inode->i_mode & S_IFMT)))
++			goto out;
++
++		err = 0;
++		bindex = au_ibstart(inode);
++		br = au_sbr(sb, bindex);
++		err = h_permission(h_inode, mask, br->br_mnt, br->br_perm);
++		if (write_mask
++		    && !err
++		    && !special_file(h_inode->i_mode)) {
++			/* test whether the upper writable branch exists */
++			err = -EROFS;
++			for (; bindex >= 0; bindex--)
++				if (!au_br_rdonly(au_sbr(sb, bindex))) {
++					err = 0;
++					break;
++				}
++		}
++		goto out;
++	}
++
++	/* non-write to dir */
++	err = 0;
++	bend = au_ibend(inode);
++	for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
++		h_inode = au_h_iptr(inode, bindex);
++		if (h_inode) {
++			err = au_busy_or_stale();
++			if (unlikely(!S_ISDIR(h_inode->i_mode)))
++				break;
++
++			br = au_sbr(sb, bindex);
++			err = h_permission(h_inode, mask, br->br_mnt,
++					   br->br_perm);
++		}
++	}
++
++out:
++	ii_read_unlock(inode);
++	si_read_unlock(sb);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
++				  struct nameidata *nd)
++{
++	struct dentry *ret, *parent;
++	struct inode *inode;
++	struct super_block *sb;
++	int err, npositive, lc_idx;
++
++	IMustLock(dir);
++
++	sb = dir->i_sb;
++	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++	ret = ERR_PTR(err);
++	if (unlikely(err))
++		goto out;
++
++	ret = ERR_PTR(-ENAMETOOLONG);
++	if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
++		goto out_si;
++	err = au_di_init(dentry);
++	ret = ERR_PTR(err);
++	if (unlikely(err))
++		goto out_si;
++
++	inode = NULL;
++	npositive = 0; /* suppress a warning */
++	parent = dentry->d_parent; /* dir inode is locked */
++	di_read_lock_parent(parent, AuLock_IR);
++	err = au_alive_dir(parent);
++	if (!err)
++		err = au_digen_test(parent, au_sigen(sb));
++	if (!err) {
++		npositive = au_lkup_dentry(dentry, au_dbstart(parent),
++					   /*type*/0, nd);
++		err = npositive;
++	}
++	di_read_unlock(parent, AuLock_IR);
++	ret = ERR_PTR(err);
++	if (unlikely(err < 0))
++		goto out_unlock;
++
++	if (npositive) {
++		inode = au_new_inode(dentry, /*must_new*/0);
++		ret = (void *)inode;
++	}
++	if (IS_ERR(inode)) {
++		inode = NULL;
++		goto out_unlock;
++	}
++
++	ret = d_splice_alias(inode, dentry);
++	if (unlikely(IS_ERR(ret) && inode)) {
++		ii_write_unlock(inode);
++		lc_idx = AuLcNonDir_IIINFO;
++		if (S_ISLNK(inode->i_mode))
++			lc_idx = AuLcSymlink_IIINFO;
++		else if (S_ISDIR(inode->i_mode))
++			lc_idx = AuLcDir_IIINFO;
++		au_rw_class(&au_ii(inode)->ii_rwsem, au_lc_key + lc_idx);
++		iput(inode);
++	}
++
++out_unlock:
++	di_write_unlock(dentry);
++	if (unlikely(IS_ERR(ret) && inode)) {
++		lc_idx = AuLcNonDir_DIINFO;
++		if (S_ISLNK(inode->i_mode))
++			lc_idx = AuLcSymlink_DIINFO;
++		else if (S_ISDIR(inode->i_mode))
++			lc_idx = AuLcDir_DIINFO;
++		au_rw_class(&au_di(dentry)->di_rwsem, au_lc_key + lc_idx);
++	}
++out_si:
++	si_read_unlock(sb);
++out:
++	return ret;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
++			  const unsigned char add_entry, aufs_bindex_t bcpup,
++			  aufs_bindex_t bstart)
++{
++	int err;
++	struct dentry *h_parent;
++	struct inode *h_dir;
++
++	if (add_entry)
++		IMustLock(parent->d_inode);
++	else
++		di_write_lock_parent(parent);
++
++	err = 0;
++	if (!au_h_dptr(parent, bcpup)) {
++		if (bstart < bcpup)
++			err = au_cpdown_dirs(dentry, bcpup);
++		else
++			err = au_cpup_dirs(dentry, bcpup);
++	}
++	if (!err && add_entry) {
++		h_parent = au_h_dptr(parent, bcpup);
++		h_dir = h_parent->d_inode;
++		mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
++		err = au_lkup_neg(dentry, bcpup);
++		/* todo: no unlock here */
++		mutex_unlock(&h_dir->i_mutex);
++
++		AuDbg("bcpup %d\n", bcpup);
++		if (!err) {
++			if (!dentry->d_inode)
++				au_set_h_dptr(dentry, bstart, NULL);
++			au_update_dbrange(dentry, /*do_put_zero*/0);
++		}
++	}
++
++	if (!add_entry)
++		di_write_unlock(parent);
++	if (!err)
++		err = bcpup; /* success */
++
++	AuTraceErr(err);
++	return err;
++}
++
++/*
++ * decide the branch and the parent dir where we will create a new entry.
++ * returns new bindex or an error.
++ * copyup the parent dir if needed.
++ */
++int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
++	      struct au_wr_dir_args *args)
++{
++	int err;
++	aufs_bindex_t bcpup, bstart, src_bstart;
++	const unsigned char add_entry = !!au_ftest_wrdir(args->flags,
++							 ADD_ENTRY);
++	struct super_block *sb;
++	struct dentry *parent;
++	struct au_sbinfo *sbinfo;
++
++	sb = dentry->d_sb;
++	sbinfo = au_sbi(sb);
++	parent = dget_parent(dentry);
++	bstart = au_dbstart(dentry);
++	bcpup = bstart;
++	if (args->force_btgt < 0) {
++		if (src_dentry) {
++			src_bstart = au_dbstart(src_dentry);
++			if (src_bstart < bstart)
++				bcpup = src_bstart;
++		} else if (add_entry) {
++			err = AuWbrCreate(sbinfo, dentry,
++					  au_ftest_wrdir(args->flags, ISDIR));
++			bcpup = err;
++		}
++
++		if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) {
++			if (add_entry)
++				err = AuWbrCopyup(sbinfo, dentry);
++			else {
++				if (!IS_ROOT(dentry)) {
++					di_read_lock_parent(parent, !AuLock_IR);
++					err = AuWbrCopyup(sbinfo, dentry);
++					di_read_unlock(parent, !AuLock_IR);
++				} else
++					err = AuWbrCopyup(sbinfo, dentry);
++			}
++			bcpup = err;
++			if (unlikely(err < 0))
++				goto out;
++		}
++	} else {
++		bcpup = args->force_btgt;
++		AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode));
++	}
++
++	AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
++	err = bcpup;
++	if (bcpup == bstart)
++		goto out; /* success */
++
++	/* copyup the new parent into the branch we process */
++	err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
++	if (err >= 0) {
++		if (!dentry->d_inode) {
++			au_set_h_dptr(dentry, bstart, NULL);
++			au_set_dbstart(dentry, bcpup);
++			au_set_dbend(dentry, bcpup);
++		}
++		AuDebugOn(add_entry && !au_h_dptr(dentry, bcpup));
++	}
++
++out:
++	dput(parent);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct dentry *au_pinned_h_parent(struct au_pin *pin)
++{
++	if (pin && pin->parent)
++		return au_h_dptr(pin->parent, pin->bindex);
++	return NULL;
++}
++
++void au_unpin(struct au_pin *p)
++{
++	if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
++		mnt_drop_write(p->h_mnt);
++	if (!p->hdir)
++		return;
++
++	au_hn_imtx_unlock(p->hdir);
++	if (!au_ftest_pin(p->flags, DI_LOCKED))
++		di_read_unlock(p->parent, AuLock_IR);
++	iput(p->hdir->hi_inode);
++	dput(p->parent);
++	p->parent = NULL;
++	p->hdir = NULL;
++	p->h_mnt = NULL;
++}
++
++int au_do_pin(struct au_pin *p)
++{
++	int err;
++	struct super_block *sb;
++	struct dentry *h_dentry, *h_parent;
++	struct au_branch *br;
++	struct inode *h_dir;
++
++	err = 0;
++	sb = p->dentry->d_sb;
++	br = au_sbr(sb, p->bindex);
++	if (IS_ROOT(p->dentry)) {
++		if (au_ftest_pin(p->flags, MNT_WRITE)) {
++			p->h_mnt = br->br_mnt;
++			err = mnt_want_write(p->h_mnt);
++			if (unlikely(err)) {
++				au_fclr_pin(p->flags, MNT_WRITE);
++				goto out_err;
++			}
++		}
++		goto out;
++	}
++
++	h_dentry = NULL;
++	if (p->bindex <= au_dbend(p->dentry))
++		h_dentry = au_h_dptr(p->dentry, p->bindex);
++
++	p->parent = dget_parent(p->dentry);
++	if (!au_ftest_pin(p->flags, DI_LOCKED))
++		di_read_lock(p->parent, AuLock_IR, p->lsc_di);
++
++	h_dir = NULL;
++	h_parent = au_h_dptr(p->parent, p->bindex);
++	p->hdir = au_hi(p->parent->d_inode, p->bindex);
++	if (p->hdir)
++		h_dir = p->hdir->hi_inode;
++
++	/*
++	 * udba case, or
++	 * if DI_LOCKED is not set, then p->parent may be different
++	 * and h_parent can be NULL.
++	 */
++	if (unlikely(!p->hdir || !h_dir || !h_parent)) {
++		err = -EBUSY;
++		if (!au_ftest_pin(p->flags, DI_LOCKED))
++			di_read_unlock(p->parent, AuLock_IR);
++		dput(p->parent);
++		p->parent = NULL;
++		goto out_err;
++	}
++
++	au_igrab(h_dir);
++	au_hn_imtx_lock_nested(p->hdir, p->lsc_hi);
++
++	if (unlikely(p->hdir->hi_inode != h_parent->d_inode)) {
++		err = -EBUSY;
++		goto out_unpin;
++	}
++	if (h_dentry) {
++		err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br);
++		if (unlikely(err)) {
++			au_fclr_pin(p->flags, MNT_WRITE);
++			goto out_unpin;
++		}
++	}
++
++	if (au_ftest_pin(p->flags, MNT_WRITE)) {
++		p->h_mnt = br->br_mnt;
++		err = mnt_want_write(p->h_mnt);
++		if (unlikely(err)) {
++			au_fclr_pin(p->flags, MNT_WRITE);
++			goto out_unpin;
++		}
++	}
++	goto out; /* success */
++
++out_unpin:
++	au_unpin(p);
++out_err:
++	pr_err("err %d\n", err);
++	err = au_busy_or_stale();
++out:
++	return err;
++}
++
++void au_pin_init(struct au_pin *p, struct dentry *dentry,
++		 aufs_bindex_t bindex, int lsc_di, int lsc_hi,
++		 unsigned int udba, unsigned char flags)
++{
++	p->dentry = dentry;
++	p->udba = udba;
++	p->lsc_di = lsc_di;
++	p->lsc_hi = lsc_hi;
++	p->flags = flags;
++	p->bindex = bindex;
++
++	p->parent = NULL;
++	p->hdir = NULL;
++	p->h_mnt = NULL;
++}
++
++int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
++	   unsigned int udba, unsigned char flags)
++{
++	au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
++		    udba, flags);
++	return au_do_pin(pin);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * ->setattr() and ->getattr() are called in various cases.
++ * chmod, stat: dentry is revalidated.
++ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
++ *		  unhashed.
++ * for ->setattr(), ia->ia_file is passed from ftruncate only.
++ */
++/* todo: consolidate with do_refresh() and simple_reval_dpath() */
++static int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
++{
++	int err;
++	struct inode *inode;
++	struct dentry *parent;
++
++	err = 0;
++	inode = dentry->d_inode;
++	if (au_digen_test(dentry, sigen)) {
++		parent = dget_parent(dentry);
++		di_read_lock_parent(parent, AuLock_IR);
++		err = au_refresh_dentry(dentry, parent);
++		di_read_unlock(parent, AuLock_IR);
++		dput(parent);
++	}
++
++	AuTraceErr(err);
++	return err;
++}
++
++#define AuIcpup_DID_CPUP	1
++#define au_ftest_icpup(flags, name)	((flags) & AuIcpup_##name)
++#define au_fset_icpup(flags, name) \
++	do { (flags) |= AuIcpup_##name; } while (0)
++#define au_fclr_icpup(flags, name) \
++	do { (flags) &= ~AuIcpup_##name; } while (0)
++
++struct au_icpup_args {
++	unsigned char flags;
++	unsigned char pin_flags;
++	aufs_bindex_t btgt;
++	unsigned int udba;
++	struct au_pin pin;
++	struct path h_path;
++	struct inode *h_inode;
++};
++
++static int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
++			    struct au_icpup_args *a)
++{
++	int err;
++	loff_t sz;
++	aufs_bindex_t bstart, ibstart;
++	struct dentry *hi_wh, *parent;
++	struct inode *inode;
++	struct file *h_file;
++	struct au_wr_dir_args wr_dir_args = {
++		.force_btgt	= -1,
++		.flags		= 0
++	};
++
++	bstart = au_dbstart(dentry);
++	inode = dentry->d_inode;
++	if (S_ISDIR(inode->i_mode))
++		au_fset_wrdir(wr_dir_args.flags, ISDIR);
++	/* plink or hi_wh() case */
++	ibstart = au_ibstart(inode);
++	if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
++		wr_dir_args.force_btgt = ibstart;
++	err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
++	if (unlikely(err < 0))
++		goto out;
++	a->btgt = err;
++	if (err != bstart)
++		au_fset_icpup(a->flags, DID_CPUP);
++
++	err = 0;
++	a->pin_flags = AuPin_MNT_WRITE;
++	parent = NULL;
++	if (!IS_ROOT(dentry)) {
++		au_fset_pin(a->pin_flags, DI_LOCKED);
++		parent = dget_parent(dentry);
++		di_write_lock_parent(parent);
++	}
++
++	err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
++	if (unlikely(err))
++		goto out_parent;
++
++	a->h_path.dentry = au_h_dptr(dentry, bstart);
++	a->h_inode = a->h_path.dentry->d_inode;
++	mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
++	sz = -1;
++	if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode))
++		sz = ia->ia_size;
++
++	h_file = NULL;
++	hi_wh = NULL;
++	if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
++		hi_wh = au_hi_wh(inode, a->btgt);
++		if (!hi_wh) {
++			err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL);
++			if (unlikely(err))
++				goto out_unlock;
++			hi_wh = au_hi_wh(inode, a->btgt);
++			/* todo: revalidate hi_wh? */
++		}
++	}
++
++	if (parent) {
++		au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
++		di_downgrade_lock(parent, AuLock_IR);
++		dput(parent);
++		parent = NULL;
++	}
++	if (!au_ftest_icpup(a->flags, DID_CPUP))
++		goto out; /* success */
++
++	if (!d_unhashed(dentry)) {
++		h_file = au_h_open_pre(dentry, bstart);
++		if (IS_ERR(h_file)) {
++			err = PTR_ERR(h_file);
++			h_file = NULL;
++		} else
++			err = au_sio_cpup_simple(dentry, a->btgt, sz,
++						 AuCpup_DTIME);
++		if (!err)
++			a->h_path.dentry = au_h_dptr(dentry, a->btgt);
++	} else if (!hi_wh)
++		a->h_path.dentry = au_h_dptr(dentry, a->btgt);
++	else
++		a->h_path.dentry = hi_wh; /* do not dget here */
++
++out_unlock:
++	mutex_unlock(&a->h_inode->i_mutex);
++	au_h_open_post(dentry, bstart, h_file);
++	a->h_inode = a->h_path.dentry->d_inode;
++	if (!err) {
++		mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
++		goto out; /* success */
++	}
++
++	au_unpin(&a->pin);
++out_parent:
++	if (parent) {
++		di_write_unlock(parent);
++		dput(parent);
++	}
++out:
++	return err;
++}
++
++static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
++{
++	int err;
++	struct inode *inode;
++	struct super_block *sb;
++	struct file *file;
++	struct au_icpup_args *a;
++
++	inode = dentry->d_inode;
++	IMustLock(inode);
++
++	err = -ENOMEM;
++	a = kzalloc(sizeof(*a), GFP_NOFS);
++	if (unlikely(!a))
++		goto out;
++
++	if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
++		ia->ia_valid &= ~ATTR_MODE;
++
++	file = NULL;
++	sb = dentry->d_sb;
++	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++	if (unlikely(err))
++		goto out_kfree;
++
++	if (ia->ia_valid & ATTR_FILE) {
++		/* currently ftruncate(2) only */
++		AuDebugOn(!S_ISREG(inode->i_mode));
++		file = ia->ia_file;
++		err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++		if (unlikely(err))
++			goto out_si;
++		ia->ia_file = au_hf_top(file);
++		a->udba = AuOpt_UDBA_NONE;
++	} else {
++		/* fchmod() doesn't pass ia_file */
++		a->udba = au_opt_udba(sb);
++		di_write_lock_child(dentry);
++		/* no d_unlinked(), to set UDBA_NONE for root */
++		if (d_unhashed(dentry))
++			a->udba = AuOpt_UDBA_NONE;
++		if (a->udba != AuOpt_UDBA_NONE) {
++			AuDebugOn(IS_ROOT(dentry));
++			err = au_reval_for_attr(dentry, au_sigen(sb));
++			if (unlikely(err))
++				goto out_dentry;
++		}
++	}
++
++	err = au_pin_and_icpup(dentry, ia, a);
++	if (unlikely(err < 0))
++		goto out_dentry;
++	if (au_ftest_icpup(a->flags, DID_CPUP)) {
++		ia->ia_file = NULL;
++		ia->ia_valid &= ~ATTR_FILE;
++	}
++
++	a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
++	if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
++	    == (ATTR_MODE | ATTR_CTIME)) {
++		err = security_path_chmod(a->h_path.dentry, a->h_path.mnt,
++					  ia->ia_mode);
++		if (unlikely(err))
++			goto out_unlock;
++	} else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
++		   && (ia->ia_valid & ATTR_CTIME)) {
++		err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
++		if (unlikely(err))
++			goto out_unlock;
++	}
++
++	if (ia->ia_valid & ATTR_SIZE) {
++		struct file *f;
++
++		if (ia->ia_size < i_size_read(inode))
++			/* unmap only */
++			truncate_setsize(inode, ia->ia_size);
++
++		f = NULL;
++		if (ia->ia_valid & ATTR_FILE)
++			f = ia->ia_file;
++		mutex_unlock(&a->h_inode->i_mutex);
++		err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
++		mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
++	} else
++		err = vfsub_notify_change(&a->h_path, ia);
++	if (!err)
++		au_cpup_attr_changeable(inode);
++
++out_unlock:
++	mutex_unlock(&a->h_inode->i_mutex);
++	au_unpin(&a->pin);
++	if (unlikely(err))
++		au_update_dbstart(dentry);
++out_dentry:
++	di_write_unlock(dentry);
++	if (file) {
++		fi_write_unlock(file);
++		ia->ia_file = file;
++		ia->ia_valid |= ATTR_FILE;
++	}
++out_si:
++	si_read_unlock(sb);
++out_kfree:
++	kfree(a);
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++static void au_refresh_iattr(struct inode *inode, struct kstat *st,
++			     unsigned int nlink)
++{
++	unsigned int n;
++
++	inode->i_mode = st->mode;
++	inode->i_uid = st->uid;
++	inode->i_gid = st->gid;
++	inode->i_atime = st->atime;
++	inode->i_mtime = st->mtime;
++	inode->i_ctime = st->ctime;
++
++	au_cpup_attr_nlink(inode, /*force*/0);
++	if (S_ISDIR(inode->i_mode)) {
++		n = inode->i_nlink;
++		n -= nlink;
++		n += st->nlink;
++		set_nlink(inode, n);
++	}
++
++	spin_lock(&inode->i_lock);
++	inode->i_blocks = st->blocks;
++	i_size_write(inode, st->size);
++	spin_unlock(&inode->i_lock);
++}
++
++static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
++			struct dentry *dentry, struct kstat *st)
++{
++	int err;
++	unsigned int mnt_flags;
++	aufs_bindex_t bindex;
++	unsigned char udba_none, positive;
++	struct super_block *sb, *h_sb;
++	struct inode *inode;
++	struct vfsmount *h_mnt;
++	struct dentry *h_dentry;
++
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++	if (unlikely(err))
++		goto out;
++	mnt_flags = au_mntflags(sb);
++	udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
++
++	/* support fstat(2) */
++	if (!d_unlinked(dentry) && !udba_none) {
++		unsigned int sigen = au_sigen(sb);
++		err = au_digen_test(dentry, sigen);
++		if (!err) {
++			di_read_lock_child(dentry, AuLock_IR);
++			err = au_dbrange_test(dentry);
++			if (unlikely(err))
++				goto out_unlock;
++		} else {
++			AuDebugOn(IS_ROOT(dentry));
++			di_write_lock_child(dentry);
++			err = au_dbrange_test(dentry);
++			if (!err)
++				err = au_reval_for_attr(dentry, sigen);
++			di_downgrade_lock(dentry, AuLock_IR);
++			if (unlikely(err))
++				goto out_unlock;
++		}
++	} else
++		di_read_lock_child(dentry, AuLock_IR);
++
++	bindex = au_ibstart(inode);
++	h_mnt = au_sbr_mnt(sb, bindex);
++	h_sb = h_mnt->mnt_sb;
++	if (!au_test_fs_bad_iattr(h_sb) && udba_none)
++		goto out_fill; /* success */
++
++	h_dentry = NULL;
++	if (au_dbstart(dentry) == bindex)
++		h_dentry = dget(au_h_dptr(dentry, bindex));
++	else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
++		h_dentry = au_plink_lkup(inode, bindex);
++		if (IS_ERR(h_dentry))
++			goto out_fill; /* pretending success */
++	}
++	/* illegally overlapped or something */
++	if (unlikely(!h_dentry))
++		goto out_fill; /* pretending success */
++
++	positive = !!h_dentry->d_inode;
++	if (positive)
++		err = vfs_getattr(h_mnt, h_dentry, st);
++	dput(h_dentry);
++	if (!err) {
++		if (positive)
++			au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink);
++		goto out_fill; /* success */
++	}
++	AuTraceErr(err);
++	goto out_unlock;
++
++out_fill:
++	generic_fillattr(inode, st);
++out_unlock:
++	di_read_unlock(dentry, AuLock_IR);
++	si_read_unlock(sb);
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
++		      int bufsiz)
++{
++	int err;
++	struct super_block *sb;
++	struct dentry *h_dentry;
++
++	err = -EINVAL;
++	h_dentry = au_h_dptr(dentry, bindex);
++	if (unlikely(!h_dentry->d_inode->i_op->readlink))
++		goto out;
++
++	err = security_inode_readlink(h_dentry);
++	if (unlikely(err))
++		goto out;
++
++	sb = dentry->d_sb;
++	if (!au_test_ro(sb, bindex, dentry->d_inode)) {
++		vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
++		fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode);
++	}
++	err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz);
++
++out:
++	return err;
++}
++
++static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
++{
++	int err;
++
++	err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
++	if (unlikely(err))
++		goto out;
++	err = au_d_hashed_positive(dentry);
++	if (!err)
++		err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
++	aufs_read_unlock(dentry, AuLock_IR);
++
++out:
++	return err;
++}
++
++static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
++{
++	int err;
++	mm_segment_t old_fs;
++	union {
++		char *k;
++		char __user *u;
++	} buf;
++
++	err = -ENOMEM;
++	buf.k = __getname_gfp(GFP_NOFS);
++	if (unlikely(!buf.k))
++		goto out;
++
++	err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
++	if (unlikely(err))
++		goto out_name;
++
++	err = au_d_hashed_positive(dentry);
++	if (!err) {
++		old_fs = get_fs();
++		set_fs(KERNEL_DS);
++		err = h_readlink(dentry, au_dbstart(dentry), buf.u, PATH_MAX);
++		set_fs(old_fs);
++	}
++	aufs_read_unlock(dentry, AuLock_IR);
++
++	if (err >= 0) {
++		buf.k[err] = 0;
++		/* will be freed by put_link */
++		nd_set_link(nd, buf.k);
++		return NULL; /* success */
++	}
++
++out_name:
++	__putname(buf.k);
++out:
++	path_put(&nd->path);
++	AuTraceErr(err);
++	return ERR_PTR(err);
++}
++
++static void aufs_put_link(struct dentry *dentry __maybe_unused,
++			  struct nameidata *nd, void *cookie __maybe_unused)
++{
++	__putname(nd_get_link(nd));
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void aufs_truncate_range(struct inode *inode __maybe_unused,
++				loff_t start __maybe_unused,
++				loff_t end __maybe_unused)
++{
++	AuUnsupport();
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct inode_operations aufs_symlink_iop = {
++	.permission	= aufs_permission,
++	.setattr	= aufs_setattr,
++	.getattr	= aufs_getattr,
++	.readlink	= aufs_readlink,
++	.follow_link	= aufs_follow_link,
++	.put_link	= aufs_put_link
++};
++
++struct inode_operations aufs_dir_iop = {
++	.create		= aufs_create,
++	.lookup		= aufs_lookup,
++	.link		= aufs_link,
++	.unlink		= aufs_unlink,
++	.symlink	= aufs_symlink,
++	.mkdir		= aufs_mkdir,
++	.rmdir		= aufs_rmdir,
++	.mknod		= aufs_mknod,
++	.rename		= aufs_rename,
++
++	.permission	= aufs_permission,
++	.setattr	= aufs_setattr,
++	.getattr	= aufs_getattr
++};
++
++struct inode_operations aufs_iop = {
++	.permission	= aufs_permission,
++	.setattr	= aufs_setattr,
++	.getattr	= aufs_getattr,
++	.truncate_range	= aufs_truncate_range
++};
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/i_op_del.c linux-3.2.0-gentoo-r1/fs/aufs/i_op_del.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/i_op_del.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/i_op_del.c	2012-01-17 12:11:24.759378661 +0100
+@@ -0,0 +1,478 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * inode operations (del entry)
++ */
++
++#include "aufs.h"
++
++/*
++ * decide if a new whiteout for @dentry is necessary or not.
++ * when it is necessary, prepare the parent dir for the upper branch whose
++ * branch index is @bcpup for creation. the actual creation of the whiteout will
++ * be done by caller.
++ * return value:
++ * 0: wh is unnecessary
++ * plus: wh is necessary
++ * minus: error
++ */
++int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
++{
++	int need_wh, err;
++	aufs_bindex_t bstart;
++	struct super_block *sb;
++
++	sb = dentry->d_sb;
++	bstart = au_dbstart(dentry);
++	if (*bcpup < 0) {
++		*bcpup = bstart;
++		if (au_test_ro(sb, bstart, dentry->d_inode)) {
++			err = AuWbrCopyup(au_sbi(sb), dentry);
++			*bcpup = err;
++			if (unlikely(err < 0))
++				goto out;
++		}
++	} else
++		AuDebugOn(bstart < *bcpup
++			  || au_test_ro(sb, *bcpup, dentry->d_inode));
++	AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
++
++	if (*bcpup != bstart) {
++		err = au_cpup_dirs(dentry, *bcpup);
++		if (unlikely(err))
++			goto out;
++		need_wh = 1;
++	} else {
++		struct au_dinfo *dinfo, *tmp;
++
++		need_wh = -ENOMEM;
++		dinfo = au_di(dentry);
++		tmp = au_di_alloc(sb, AuLsc_DI_TMP);
++		if (tmp) {
++			au_di_cp(tmp, dinfo);
++			au_di_swap(tmp, dinfo);
++			/* returns the number of positive dentries */
++			need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0,
++						 /*nd*/NULL);
++			au_di_swap(tmp, dinfo);
++			au_rw_write_unlock(&tmp->di_rwsem);
++			au_di_free(tmp);
++		}
++	}
++	AuDbg("need_wh %d\n", need_wh);
++	err = need_wh;
++
++out:
++	return err;
++}
++
++/*
++ * simple tests for the del-entry operations.
++ * following the checks in vfs, plus the parent-child relationship.
++ */
++int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
++	       struct dentry *h_parent, int isdir)
++{
++	int err;
++	umode_t h_mode;
++	struct dentry *h_dentry, *h_latest;
++	struct inode *h_inode;
++
++	h_dentry = au_h_dptr(dentry, bindex);
++	h_inode = h_dentry->d_inode;
++	if (dentry->d_inode) {
++		err = -ENOENT;
++		if (unlikely(!h_inode || !h_inode->i_nlink))
++			goto out;
++
++		h_mode = h_inode->i_mode;
++		if (!isdir) {
++			err = -EISDIR;
++			if (unlikely(S_ISDIR(h_mode)))
++				goto out;
++		} else if (unlikely(!S_ISDIR(h_mode))) {
++			err = -ENOTDIR;
++			goto out;
++		}
++	} else {
++		/* rename(2) case */
++		err = -EIO;
++		if (unlikely(h_inode))
++			goto out;
++	}
++
++	err = -ENOENT;
++	/* expected parent dir is locked */
++	if (unlikely(h_parent != h_dentry->d_parent))
++		goto out;
++	err = 0;
++
++	/*
++	 * rmdir a dir may break the consistency on some filesystem.
++	 * let's try heavy test.
++	 */
++	err = -EACCES;
++	if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE)))
++		goto out;
++
++	h_latest = au_sio_lkup_one(&dentry->d_name, h_parent,
++				   au_sbr(dentry->d_sb, bindex));
++	err = -EIO;
++	if (IS_ERR(h_latest))
++		goto out;
++	if (h_latest == h_dentry)
++		err = 0;
++	dput(h_latest);
++
++out:
++	return err;
++}
++
++/*
++ * decide the branch where we operate for @dentry. the branch index will be set
++ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
++ * dir for reverting.
++ * when a new whiteout is necessary, create it.
++ */
++static struct dentry*
++lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
++		    struct au_dtime *dt, struct au_pin *pin)
++{
++	struct dentry *wh_dentry;
++	struct super_block *sb;
++	struct path h_path;
++	int err, need_wh;
++	unsigned int udba;
++	aufs_bindex_t bcpup;
++
++	need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
++	wh_dentry = ERR_PTR(need_wh);
++	if (unlikely(need_wh < 0))
++		goto out;
++
++	sb = dentry->d_sb;
++	udba = au_opt_udba(sb);
++	bcpup = *rbcpup;
++	err = au_pin(pin, dentry, bcpup, udba,
++		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++	wh_dentry = ERR_PTR(err);
++	if (unlikely(err))
++		goto out;
++
++	h_path.dentry = au_pinned_h_parent(pin);
++	if (udba != AuOpt_UDBA_NONE
++	    && au_dbstart(dentry) == bcpup) {
++		err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
++		wh_dentry = ERR_PTR(err);
++		if (unlikely(err))
++			goto out_unpin;
++	}
++
++	h_path.mnt = au_sbr_mnt(sb, bcpup);
++	au_dtime_store(dt, au_pinned_parent(pin), &h_path);
++	wh_dentry = NULL;
++	if (!need_wh)
++		goto out; /* success, no need to create whiteout */
++
++	wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
++	if (IS_ERR(wh_dentry))
++		goto out_unpin;
++
++	/* returns with the parent is locked and wh_dentry is dget-ed */
++	goto out; /* success */
++
++out_unpin:
++	au_unpin(pin);
++out:
++	return wh_dentry;
++}
++
++/*
++ * when removing a dir, rename it to a unique temporary whiteout-ed name first
++ * in order to be revertible and save time for removing many child whiteouts
++ * under the dir.
++ * returns 1 when there are too many child whiteout and caller should remove
++ * them asynchronously. returns 0 when the number of children is enough small to
++ * remove now or the branch fs is a remote fs.
++ * otherwise return an error.
++ */
++static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
++			   struct au_nhash *whlist, struct inode *dir)
++{
++	int rmdir_later, err, dirwh;
++	struct dentry *h_dentry;
++	struct super_block *sb;
++
++	sb = dentry->d_sb;
++	SiMustAnyLock(sb);
++	h_dentry = au_h_dptr(dentry, bindex);
++	err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
++	if (unlikely(err))
++		goto out;
++
++	/* stop monitoring */
++	au_hn_free(au_hi(dentry->d_inode, bindex));
++
++	if (!au_test_fs_remote(h_dentry->d_sb)) {
++		dirwh = au_sbi(sb)->si_dirwh;
++		rmdir_later = (dirwh <= 1);
++		if (!rmdir_later)
++			rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
++							      dirwh);
++		if (rmdir_later)
++			return rmdir_later;
++	}
++
++	err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
++	if (unlikely(err)) {
++		AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n",
++			AuDLNPair(h_dentry), bindex, err);
++		err = 0;
++	}
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++/*
++ * final procedure for deleting a entry.
++ * maintain dentry and iattr.
++ */
++static void epilog(struct inode *dir, struct dentry *dentry,
++		   aufs_bindex_t bindex)
++{
++	struct inode *inode;
++
++	inode = dentry->d_inode;
++	d_drop(dentry);
++	inode->i_ctime = dir->i_ctime;
++
++	if (au_ibstart(dir) == bindex)
++		au_cpup_attr_timesizes(dir);
++	dir->i_version++;
++}
++
++/*
++ * when an error happened, remove the created whiteout and revert everything.
++ */
++static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
++		     aufs_bindex_t bwh, struct dentry *wh_dentry,
++		     struct dentry *dentry, struct au_dtime *dt)
++{
++	int rerr;
++	struct path h_path = {
++		.dentry	= wh_dentry,
++		.mnt	= au_sbr_mnt(dir->i_sb, bindex)
++	};
++
++	rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
++	if (!rerr) {
++		au_set_dbwh(dentry, bwh);
++		au_dtime_revert(dt);
++		return 0;
++	}
++
++	AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
++		AuDLNPair(dentry), err, rerr);
++	return -EIO;
++}
++
++/* ---------------------------------------------------------------------- */
++
++int aufs_unlink(struct inode *dir, struct dentry *dentry)
++{
++	int err;
++	aufs_bindex_t bwh, bindex, bstart;
++	struct au_dtime dt;
++	struct au_pin pin;
++	struct path h_path;
++	struct inode *inode, *h_dir;
++	struct dentry *parent, *wh_dentry;
++
++	IMustLock(dir);
++
++	err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
++	if (unlikely(err))
++		goto out;
++	err = au_d_hashed_positive(dentry);
++	if (unlikely(err))
++		goto out_unlock;
++	inode = dentry->d_inode;
++	IMustLock(inode);
++	err = -EISDIR;
++	if (unlikely(S_ISDIR(inode->i_mode)))
++		goto out_unlock; /* possible? */
++
++	bstart = au_dbstart(dentry);
++	bwh = au_dbwh(dentry);
++	bindex = -1;
++	parent = dentry->d_parent; /* dir inode is locked */
++	di_write_lock_parent(parent);
++	wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin);
++	err = PTR_ERR(wh_dentry);
++	if (IS_ERR(wh_dentry))
++		goto out_parent;
++
++	h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
++	h_path.dentry = au_h_dptr(dentry, bstart);
++	dget(h_path.dentry);
++	if (bindex == bstart) {
++		h_dir = au_pinned_h_dir(&pin);
++		err = vfsub_unlink(h_dir, &h_path, /*force*/0);
++	} else {
++		/* dir inode is locked */
++		h_dir = wh_dentry->d_parent->d_inode;
++		IMustLock(h_dir);
++		err = 0;
++	}
++
++	if (!err) {
++		vfsub_drop_nlink(inode);
++		epilog(dir, dentry, bindex);
++
++		/* update target timestamps */
++		if (bindex == bstart) {
++			vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
++			inode->i_ctime = h_path.dentry->d_inode->i_ctime;
++		} else
++			/* todo: this timestamp may be reverted later */
++			inode->i_ctime = h_dir->i_ctime;
++		goto out_unpin; /* success */
++	}
++
++	/* revert */
++	if (wh_dentry) {
++		int rerr;
++
++		rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
++		if (rerr)
++			err = rerr;
++	}
++
++out_unpin:
++	au_unpin(&pin);
++	dput(wh_dentry);
++	dput(h_path.dentry);
++out_parent:
++	di_write_unlock(parent);
++out_unlock:
++	aufs_read_unlock(dentry, AuLock_DW);
++out:
++	return err;
++}
++
++int aufs_rmdir(struct inode *dir, struct dentry *dentry)
++{
++	int err, rmdir_later;
++	aufs_bindex_t bwh, bindex, bstart;
++	struct au_dtime dt;
++	struct au_pin pin;
++	struct inode *inode;
++	struct dentry *parent, *wh_dentry, *h_dentry;
++	struct au_whtmp_rmdir *args;
++
++	IMustLock(dir);
++
++	err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
++	if (unlikely(err))
++		goto out;
++	err = au_alive_dir(dentry);
++	if (unlikely(err))
++		goto out_unlock;
++	inode = dentry->d_inode;
++	IMustLock(inode);
++	err = -ENOTDIR;
++	if (unlikely(!S_ISDIR(inode->i_mode)))
++		goto out_unlock; /* possible? */
++
++	err = -ENOMEM;
++	args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
++	if (unlikely(!args))
++		goto out_unlock;
++
++	parent = dentry->d_parent; /* dir inode is locked */
++	di_write_lock_parent(parent);
++	err = au_test_empty(dentry, &args->whlist);
++	if (unlikely(err))
++		goto out_parent;
++
++	bstart = au_dbstart(dentry);
++	bwh = au_dbwh(dentry);
++	bindex = -1;
++	wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin);
++	err = PTR_ERR(wh_dentry);
++	if (IS_ERR(wh_dentry))
++		goto out_parent;
++
++	h_dentry = au_h_dptr(dentry, bstart);
++	dget(h_dentry);
++	rmdir_later = 0;
++	if (bindex == bstart) {
++		err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
++		if (err > 0) {
++			rmdir_later = err;
++			err = 0;
++		}
++	} else {
++		/* stop monitoring */
++		au_hn_free(au_hi(inode, bstart));
++
++		/* dir inode is locked */
++		IMustLock(wh_dentry->d_parent->d_inode);
++		err = 0;
++	}
++
++	if (!err) {
++		vfsub_dead_dir(inode);
++		au_set_dbdiropq(dentry, -1);
++		epilog(dir, dentry, bindex);
++
++		if (rmdir_later) {
++			au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
++			args = NULL;
++		}
++
++		goto out_unpin; /* success */
++	}
++
++	/* revert */
++	AuLabel(revert);
++	if (wh_dentry) {
++		int rerr;
++
++		rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
++		if (rerr)
++			err = rerr;
++	}
++
++out_unpin:
++	au_unpin(&pin);
++	dput(wh_dentry);
++	dput(h_dentry);
++out_parent:
++	di_write_unlock(parent);
++	if (args)
++		au_whtmp_rmdir_free(args);
++out_unlock:
++	aufs_read_unlock(dentry, AuLock_DW);
++out:
++	AuTraceErr(err);
++	return err;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/i_op_ren.c linux-3.2.0-gentoo-r1/fs/aufs/i_op_ren.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/i_op_ren.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/i_op_ren.c	2012-01-17 12:11:24.777897347 +0100
+@@ -0,0 +1,1017 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * inode operation (rename entry)
++ * todo: this is crazy monster
++ */
++
++#include "aufs.h"
++
++enum { AuSRC, AuDST, AuSrcDst };
++enum { AuPARENT, AuCHILD, AuParentChild };
++
++#define AuRen_ISDIR	1
++#define AuRen_ISSAMEDIR	(1 << 1)
++#define AuRen_WHSRC	(1 << 2)
++#define AuRen_WHDST	(1 << 3)
++#define AuRen_MNT_WRITE	(1 << 4)
++#define AuRen_DT_DSTDIR	(1 << 5)
++#define AuRen_DIROPQ	(1 << 6)
++#define AuRen_CPUP	(1 << 7)
++#define au_ftest_ren(flags, name)	((flags) & AuRen_##name)
++#define au_fset_ren(flags, name) \
++	do { (flags) |= AuRen_##name; } while (0)
++#define au_fclr_ren(flags, name) \
++	do { (flags) &= ~AuRen_##name; } while (0)
++
++struct au_ren_args {
++	struct {
++		struct dentry *dentry, *h_dentry, *parent, *h_parent,
++			*wh_dentry;
++		struct inode *dir, *inode;
++		struct au_hinode *hdir;
++		struct au_dtime dt[AuParentChild];
++		aufs_bindex_t bstart;
++	} sd[AuSrcDst];
++
++#define src_dentry	sd[AuSRC].dentry
++#define src_dir		sd[AuSRC].dir
++#define src_inode	sd[AuSRC].inode
++#define src_h_dentry	sd[AuSRC].h_dentry
++#define src_parent	sd[AuSRC].parent
++#define src_h_parent	sd[AuSRC].h_parent
++#define src_wh_dentry	sd[AuSRC].wh_dentry
++#define src_hdir	sd[AuSRC].hdir
++#define src_h_dir	sd[AuSRC].hdir->hi_inode
++#define src_dt		sd[AuSRC].dt
++#define src_bstart	sd[AuSRC].bstart
++
++#define dst_dentry	sd[AuDST].dentry
++#define dst_dir		sd[AuDST].dir
++#define dst_inode	sd[AuDST].inode
++#define dst_h_dentry	sd[AuDST].h_dentry
++#define dst_parent	sd[AuDST].parent
++#define dst_h_parent	sd[AuDST].h_parent
++#define dst_wh_dentry	sd[AuDST].wh_dentry
++#define dst_hdir	sd[AuDST].hdir
++#define dst_h_dir	sd[AuDST].hdir->hi_inode
++#define dst_dt		sd[AuDST].dt
++#define dst_bstart	sd[AuDST].bstart
++
++	struct dentry *h_trap;
++	struct au_branch *br;
++	struct au_hinode *src_hinode;
++	struct path h_path;
++	struct au_nhash whlist;
++	aufs_bindex_t btgt, src_bwh, src_bdiropq;
++
++	unsigned int flags;
++
++	struct au_whtmp_rmdir *thargs;
++	struct dentry *h_dst;
++};
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * functions for reverting.
++ * when an error happened in a single rename systemcall, we should revert
++ * everything as if nothing happend.
++ * we don't need to revert the copied-up/down the parent dir since they are
++ * harmless.
++ */
++
++#define RevertFailure(fmt, ...) do { \
++	AuIOErr("revert failure: " fmt " (%d, %d)\n", \
++		##__VA_ARGS__, err, rerr); \
++	err = -EIO; \
++} while (0)
++
++static void au_ren_rev_diropq(int err, struct au_ren_args *a)
++{
++	int rerr;
++
++	au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
++	rerr = au_diropq_remove(a->src_dentry, a->btgt);
++	au_hn_imtx_unlock(a->src_hinode);
++	au_set_dbdiropq(a->src_dentry, a->src_bdiropq);
++	if (rerr)
++		RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry));
++}
++
++static void au_ren_rev_rename(int err, struct au_ren_args *a)
++{
++	int rerr;
++
++	a->h_path.dentry = au_lkup_one(&a->src_dentry->d_name, a->src_h_parent,
++				       a->br, /*nd*/NULL);
++	rerr = PTR_ERR(a->h_path.dentry);
++	if (IS_ERR(a->h_path.dentry)) {
++		RevertFailure("au_lkup_one %.*s", AuDLNPair(a->src_dentry));
++		return;
++	}
++
++	rerr = vfsub_rename(a->dst_h_dir,
++			    au_h_dptr(a->src_dentry, a->btgt),
++			    a->src_h_dir, &a->h_path);
++	d_drop(a->h_path.dentry);
++	dput(a->h_path.dentry);
++	/* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
++	if (rerr)
++		RevertFailure("rename %.*s", AuDLNPair(a->src_dentry));
++}
++
++static void au_ren_rev_cpup(int err, struct au_ren_args *a)
++{
++	int rerr;
++
++	a->h_path.dentry = a->dst_h_dentry;
++	rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0);
++	au_set_h_dptr(a->src_dentry, a->btgt, NULL);
++	au_set_dbstart(a->src_dentry, a->src_bstart);
++	if (rerr)
++		RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry));
++}
++
++static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
++{
++	int rerr;
++
++	a->h_path.dentry = au_lkup_one(&a->dst_dentry->d_name, a->dst_h_parent,
++				       a->br, /*nd*/NULL);
++	rerr = PTR_ERR(a->h_path.dentry);
++	if (IS_ERR(a->h_path.dentry)) {
++		RevertFailure("lookup %.*s", AuDLNPair(a->dst_dentry));
++		return;
++	}
++	if (a->h_path.dentry->d_inode) {
++		d_drop(a->h_path.dentry);
++		dput(a->h_path.dentry);
++		return;
++	}
++
++	rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path);
++	d_drop(a->h_path.dentry);
++	dput(a->h_path.dentry);
++	if (!rerr)
++		au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
++	else
++		RevertFailure("rename %.*s", AuDLNPair(a->h_dst));
++}
++
++static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
++{
++	int rerr;
++
++	a->h_path.dentry = a->src_wh_dentry;
++	rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
++	au_set_dbwh(a->src_dentry, a->src_bwh);
++	if (rerr)
++		RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry));
++}
++#undef RevertFailure
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * when we have to copyup the renaming entry, do it with the rename-target name
++ * in order to minimize the cost (the later actual rename is unnecessary).
++ * otherwise rename it on the target branch.
++ */
++static int au_ren_or_cpup(struct au_ren_args *a)
++{
++	int err;
++	struct dentry *d;
++
++	d = a->src_dentry;
++	if (au_dbstart(d) == a->btgt) {
++		a->h_path.dentry = a->dst_h_dentry;
++		if (au_ftest_ren(a->flags, DIROPQ)
++		    && au_dbdiropq(d) == a->btgt)
++			au_fclr_ren(a->flags, DIROPQ);
++		AuDebugOn(au_dbstart(d) != a->btgt);
++		err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
++				   a->dst_h_dir, &a->h_path);
++	} else {
++		struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
++		struct file *h_file;
++
++		au_fset_ren(a->flags, CPUP);
++		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++		au_set_dbstart(d, a->btgt);
++		au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry));
++		h_file = au_h_open_pre(d, a->src_bstart);
++		if (IS_ERR(h_file)) {
++			err = PTR_ERR(h_file);
++			h_file = NULL;
++		} else
++			err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1,
++						 !AuCpup_DTIME, a->dst_parent);
++		mutex_unlock(h_mtx);
++		au_h_open_post(d, a->src_bstart, h_file);
++		if (!err) {
++			d = a->dst_dentry;
++			au_set_h_dptr(d, a->btgt, NULL);
++			au_update_dbstart(d);
++		} else {
++			au_set_h_dptr(d, a->btgt, NULL);
++			au_set_dbstart(d, a->src_bstart);
++		}
++	}
++	if (!err && a->h_dst)
++		/* it will be set to dinfo later */
++		dget(a->h_dst);
++
++	return err;
++}
++
++/* cf. aufs_rmdir() */
++static int au_ren_del_whtmp(struct au_ren_args *a)
++{
++	int err;
++	struct inode *dir;
++
++	dir = a->dst_dir;
++	SiMustAnyLock(dir->i_sb);
++	if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
++				     au_sbi(dir->i_sb)->si_dirwh)
++	    || au_test_fs_remote(a->h_dst->d_sb)) {
++		err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
++		if (unlikely(err))
++			pr_warning("failed removing whtmp dir %.*s (%d), "
++				   "ignored.\n", AuDLNPair(a->h_dst), err);
++	} else {
++		au_nhash_wh_free(&a->thargs->whlist);
++		a->thargs->whlist = a->whlist;
++		a->whlist.nh_num = 0;
++		au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
++		dput(a->h_dst);
++		a->thargs = NULL;
++	}
++
++	return 0;
++}
++
++/* make it 'opaque' dir. */
++static int au_ren_diropq(struct au_ren_args *a)
++{
++	int err;
++	struct dentry *diropq;
++
++	err = 0;
++	a->src_bdiropq = au_dbdiropq(a->src_dentry);
++	a->src_hinode = au_hi(a->src_inode, a->btgt);
++	au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
++	diropq = au_diropq_create(a->src_dentry, a->btgt);
++	au_hn_imtx_unlock(a->src_hinode);
++	if (IS_ERR(diropq))
++		err = PTR_ERR(diropq);
++	dput(diropq);
++
++	return err;
++}
++
++static int do_rename(struct au_ren_args *a)
++{
++	int err;
++	struct dentry *d, *h_d;
++
++	/* prepare workqueue args for asynchronous rmdir */
++	h_d = a->dst_h_dentry;
++	if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) {
++		err = -ENOMEM;
++		a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
++		if (unlikely(!a->thargs))
++			goto out;
++		a->h_dst = dget(h_d);
++	}
++
++	/* create whiteout for src_dentry */
++	if (au_ftest_ren(a->flags, WHSRC)) {
++		a->src_bwh = au_dbwh(a->src_dentry);
++		AuDebugOn(a->src_bwh >= 0);
++		a->src_wh_dentry
++			= au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
++		err = PTR_ERR(a->src_wh_dentry);
++		if (IS_ERR(a->src_wh_dentry))
++			goto out_thargs;
++	}
++
++	/* lookup whiteout for dentry */
++	if (au_ftest_ren(a->flags, WHDST)) {
++		h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
++				 a->br);
++		err = PTR_ERR(h_d);
++		if (IS_ERR(h_d))
++			goto out_whsrc;
++		if (!h_d->d_inode)
++			dput(h_d);
++		else
++			a->dst_wh_dentry = h_d;
++	}
++
++	/* rename dentry to tmpwh */
++	if (a->thargs) {
++		err = au_whtmp_ren(a->dst_h_dentry, a->br);
++		if (unlikely(err))
++			goto out_whdst;
++
++		d = a->dst_dentry;
++		au_set_h_dptr(d, a->btgt, NULL);
++		err = au_lkup_neg(d, a->btgt);
++		if (unlikely(err))
++			goto out_whtmp;
++		a->dst_h_dentry = au_h_dptr(d, a->btgt);
++	}
++
++	/* cpup src */
++	if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) {
++		struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
++		struct file *h_file;
++
++		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++		AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart);
++		h_file = au_h_open_pre(a->src_dentry, a->src_bstart);
++		if (IS_ERR(h_file)) {
++			err = PTR_ERR(h_file);
++			h_file = NULL;
++		} else
++			err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1,
++						 !AuCpup_DTIME);
++		mutex_unlock(h_mtx);
++		au_h_open_post(a->src_dentry, a->src_bstart, h_file);
++		if (unlikely(err))
++			goto out_whtmp;
++	}
++
++	/* rename by vfs_rename or cpup */
++	d = a->dst_dentry;
++	if (au_ftest_ren(a->flags, ISDIR)
++	    && (a->dst_wh_dentry
++		|| au_dbdiropq(d) == a->btgt
++		/* hide the lower to keep xino */
++		|| a->btgt < au_dbend(d)
++		|| au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
++		au_fset_ren(a->flags, DIROPQ);
++	err = au_ren_or_cpup(a);
++	if (unlikely(err))
++		/* leave the copied-up one */
++		goto out_whtmp;
++
++	/* make dir opaque */
++	if (au_ftest_ren(a->flags, DIROPQ)) {
++		err = au_ren_diropq(a);
++		if (unlikely(err))
++			goto out_rename;
++	}
++
++	/* update target timestamps */
++	AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
++	a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
++	vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
++	a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime;
++
++	/* remove whiteout for dentry */
++	if (a->dst_wh_dentry) {
++		a->h_path.dentry = a->dst_wh_dentry;
++		err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
++					  a->dst_dentry);
++		if (unlikely(err))
++			goto out_diropq;
++	}
++
++	/* remove whtmp */
++	if (a->thargs)
++		au_ren_del_whtmp(a); /* ignore this error */
++
++	err = 0;
++	goto out_success;
++
++out_diropq:
++	if (au_ftest_ren(a->flags, DIROPQ))
++		au_ren_rev_diropq(err, a);
++out_rename:
++	if (!au_ftest_ren(a->flags, CPUP))
++		au_ren_rev_rename(err, a);
++	else
++		au_ren_rev_cpup(err, a);
++	dput(a->h_dst);
++out_whtmp:
++	if (a->thargs)
++		au_ren_rev_whtmp(err, a);
++out_whdst:
++	dput(a->dst_wh_dentry);
++	a->dst_wh_dentry = NULL;
++out_whsrc:
++	if (a->src_wh_dentry)
++		au_ren_rev_whsrc(err, a);
++out_success:
++	dput(a->src_wh_dentry);
++	dput(a->dst_wh_dentry);
++out_thargs:
++	if (a->thargs) {
++		dput(a->h_dst);
++		au_whtmp_rmdir_free(a->thargs);
++		a->thargs = NULL;
++	}
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * test if @dentry dir can be rename destination or not.
++ * success means, it is a logically empty dir.
++ */
++static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
++{
++	return au_test_empty(dentry, whlist);
++}
++
++/*
++ * test if @dentry dir can be rename source or not.
++ * if it can, return 0 and @children is filled.
++ * success means,
++ * - it is a logically empty dir.
++ * - or, it exists on writable branch and has no children including whiteouts
++ *       on the lower branch.
++ */
++static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
++{
++	int err;
++	unsigned int rdhash;
++	aufs_bindex_t bstart;
++
++	bstart = au_dbstart(dentry);
++	if (bstart != btgt) {
++		struct au_nhash whlist;
++
++		SiMustAnyLock(dentry->d_sb);
++		rdhash = au_sbi(dentry->d_sb)->si_rdhash;
++		if (!rdhash)
++			rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
++							   dentry));
++		err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
++		if (unlikely(err))
++			goto out;
++		err = au_test_empty(dentry, &whlist);
++		au_nhash_wh_free(&whlist);
++		goto out;
++	}
++
++	if (bstart == au_dbtaildir(dentry))
++		return 0; /* success */
++
++	err = au_test_empty_lower(dentry);
++
++out:
++	if (err == -ENOTEMPTY) {
++		AuWarn1("renaming dir who has child(ren) on multiple branches,"
++			" is not supported\n");
++		err = -EXDEV;
++	}
++	return err;
++}
++
++/* side effect: sets whlist and h_dentry */
++static int au_ren_may_dir(struct au_ren_args *a)
++{
++	int err;
++	unsigned int rdhash;
++	struct dentry *d;
++
++	d = a->dst_dentry;
++	SiMustAnyLock(d->d_sb);
++
++	err = 0;
++	if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
++		rdhash = au_sbi(d->d_sb)->si_rdhash;
++		if (!rdhash)
++			rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
++		err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
++		if (unlikely(err))
++			goto out;
++
++		au_set_dbstart(d, a->dst_bstart);
++		err = may_rename_dstdir(d, &a->whlist);
++		au_set_dbstart(d, a->btgt);
++	}
++	a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
++	if (unlikely(err))
++		goto out;
++
++	d = a->src_dentry;
++	a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
++	if (au_ftest_ren(a->flags, ISDIR)) {
++		err = may_rename_srcdir(d, a->btgt);
++		if (unlikely(err)) {
++			au_nhash_wh_free(&a->whlist);
++			a->whlist.nh_num = 0;
++		}
++	}
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * simple tests for rename.
++ * following the checks in vfs, plus the parent-child relationship.
++ */
++static int au_may_ren(struct au_ren_args *a)
++{
++	int err, isdir;
++	struct inode *h_inode;
++
++	if (a->src_bstart == a->btgt) {
++		err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
++				 au_ftest_ren(a->flags, ISDIR));
++		if (unlikely(err))
++			goto out;
++		err = -EINVAL;
++		if (unlikely(a->src_h_dentry == a->h_trap))
++			goto out;
++	}
++
++	err = 0;
++	if (a->dst_bstart != a->btgt)
++		goto out;
++
++	err = -ENOTEMPTY;
++	if (unlikely(a->dst_h_dentry == a->h_trap))
++		goto out;
++
++	err = -EIO;
++	h_inode = a->dst_h_dentry->d_inode;
++	isdir = !!au_ftest_ren(a->flags, ISDIR);
++	if (!a->dst_dentry->d_inode) {
++		if (unlikely(h_inode))
++			goto out;
++		err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent,
++				 isdir);
++	} else {
++		if (unlikely(!h_inode || !h_inode->i_nlink))
++			goto out;
++		err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent,
++				 isdir);
++		if (unlikely(err))
++			goto out;
++	}
++
++out:
++	if (unlikely(err == -ENOENT || err == -EEXIST))
++		err = -EIO;
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * locking order
++ * (VFS)
++ * - src_dir and dir by lock_rename()
++ * - inode if exitsts
++ * (aufs)
++ * - lock all
++ *   + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
++ *     + si_read_lock
++ *     + di_write_lock2_child()
++ *       + di_write_lock_child()
++ *	   + ii_write_lock_child()
++ *       + di_write_lock_child2()
++ *	   + ii_write_lock_child2()
++ *     + src_parent and parent
++ *       + di_write_lock_parent()
++ *	   + ii_write_lock_parent()
++ *       + di_write_lock_parent2()
++ *	   + ii_write_lock_parent2()
++ *   + lower src_dir and dir by vfsub_lock_rename()
++ *   + verify the every relationships between child and parent. if any
++ *     of them failed, unlock all and return -EBUSY.
++ */
++static void au_ren_unlock(struct au_ren_args *a)
++{
++	struct super_block *sb;
++
++	sb = a->dst_dentry->d_sb;
++	if (au_ftest_ren(a->flags, MNT_WRITE))
++		mnt_drop_write(a->br->br_mnt);
++	vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
++			    a->dst_h_parent, a->dst_hdir);
++}
++
++static int au_ren_lock(struct au_ren_args *a)
++{
++	int err;
++	unsigned int udba;
++
++	err = 0;
++	a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
++	a->src_hdir = au_hi(a->src_dir, a->btgt);
++	a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
++	a->dst_hdir = au_hi(a->dst_dir, a->btgt);
++	a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
++				      a->dst_h_parent, a->dst_hdir);
++	udba = au_opt_udba(a->src_dentry->d_sb);
++	if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode
++		     || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode))
++		err = au_busy_or_stale();
++	if (!err && au_dbstart(a->src_dentry) == a->btgt)
++		err = au_h_verify(a->src_h_dentry, udba,
++				  a->src_h_parent->d_inode, a->src_h_parent,
++				  a->br);
++	if (!err && au_dbstart(a->dst_dentry) == a->btgt)
++		err = au_h_verify(a->dst_h_dentry, udba,
++				  a->dst_h_parent->d_inode, a->dst_h_parent,
++				  a->br);
++	if (!err) {
++		err = mnt_want_write(a->br->br_mnt);
++		if (unlikely(err))
++			goto out_unlock;
++		au_fset_ren(a->flags, MNT_WRITE);
++		goto out; /* success */
++	}
++
++	err = au_busy_or_stale();
++
++out_unlock:
++	au_ren_unlock(a);
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void au_ren_refresh_dir(struct au_ren_args *a)
++{
++	struct inode *dir;
++
++	dir = a->dst_dir;
++	dir->i_version++;
++	if (au_ftest_ren(a->flags, ISDIR)) {
++		/* is this updating defined in POSIX? */
++		au_cpup_attr_timesizes(a->src_inode);
++		au_cpup_attr_nlink(dir, /*force*/1);
++	}
++
++	if (au_ibstart(dir) == a->btgt)
++		au_cpup_attr_timesizes(dir);
++
++	if (au_ftest_ren(a->flags, ISSAMEDIR))
++		return;
++
++	dir = a->src_dir;
++	dir->i_version++;
++	if (au_ftest_ren(a->flags, ISDIR))
++		au_cpup_attr_nlink(dir, /*force*/1);
++	if (au_ibstart(dir) == a->btgt)
++		au_cpup_attr_timesizes(dir);
++}
++
++static void au_ren_refresh(struct au_ren_args *a)
++{
++	aufs_bindex_t bend, bindex;
++	struct dentry *d, *h_d;
++	struct inode *i, *h_i;
++	struct super_block *sb;
++
++	d = a->dst_dentry;
++	d_drop(d);
++	if (a->h_dst)
++		/* already dget-ed by au_ren_or_cpup() */
++		au_set_h_dptr(d, a->btgt, a->h_dst);
++
++	i = a->dst_inode;
++	if (i) {
++		if (!au_ftest_ren(a->flags, ISDIR))
++			vfsub_drop_nlink(i);
++		else {
++			vfsub_dead_dir(i);
++			au_cpup_attr_timesizes(i);
++		}
++		au_update_dbrange(d, /*do_put_zero*/1);
++	} else {
++		bend = a->btgt;
++		for (bindex = au_dbstart(d); bindex < bend; bindex++)
++			au_set_h_dptr(d, bindex, NULL);
++		bend = au_dbend(d);
++		for (bindex = a->btgt + 1; bindex <= bend; bindex++)
++			au_set_h_dptr(d, bindex, NULL);
++		au_update_dbrange(d, /*do_put_zero*/0);
++	}
++
++	d = a->src_dentry;
++	au_set_dbwh(d, -1);
++	bend = au_dbend(d);
++	for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
++		h_d = au_h_dptr(d, bindex);
++		if (h_d)
++			au_set_h_dptr(d, bindex, NULL);
++	}
++	au_set_dbend(d, a->btgt);
++
++	sb = d->d_sb;
++	i = a->src_inode;
++	if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
++		return; /* success */
++
++	bend = au_ibend(i);
++	for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
++		h_i = au_h_iptr(i, bindex);
++		if (h_i) {
++			au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
++			/* ignore this error */
++			au_set_h_iptr(i, bindex, NULL, 0);
++		}
++	}
++	au_set_ibend(i, a->btgt);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* mainly for link(2) and rename(2) */
++int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
++{
++	aufs_bindex_t bdiropq, bwh;
++	struct dentry *parent;
++	struct au_branch *br;
++
++	parent = dentry->d_parent;
++	IMustLock(parent->d_inode); /* dir is locked */
++
++	bdiropq = au_dbdiropq(parent);
++	bwh = au_dbwh(dentry);
++	br = au_sbr(dentry->d_sb, btgt);
++	if (au_br_rdonly(br)
++	    || (0 <= bdiropq && bdiropq < btgt)
++	    || (0 <= bwh && bwh < btgt))
++		btgt = -1;
++
++	AuDbg("btgt %d\n", btgt);
++	return btgt;
++}
++
++/* sets src_bstart, dst_bstart and btgt */
++static int au_ren_wbr(struct au_ren_args *a)
++{
++	int err;
++	struct au_wr_dir_args wr_dir_args = {
++		/* .force_btgt	= -1, */
++		.flags		= AuWrDir_ADD_ENTRY
++	};
++
++	a->src_bstart = au_dbstart(a->src_dentry);
++	a->dst_bstart = au_dbstart(a->dst_dentry);
++	if (au_ftest_ren(a->flags, ISDIR))
++		au_fset_wrdir(wr_dir_args.flags, ISDIR);
++	wr_dir_args.force_btgt = a->src_bstart;
++	if (a->dst_inode && a->dst_bstart < a->src_bstart)
++		wr_dir_args.force_btgt = a->dst_bstart;
++	wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
++	err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
++	a->btgt = err;
++
++	return err;
++}
++
++static void au_ren_dt(struct au_ren_args *a)
++{
++	a->h_path.dentry = a->src_h_parent;
++	au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
++	if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
++		a->h_path.dentry = a->dst_h_parent;
++		au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
++	}
++
++	au_fclr_ren(a->flags, DT_DSTDIR);
++	if (!au_ftest_ren(a->flags, ISDIR))
++		return;
++
++	a->h_path.dentry = a->src_h_dentry;
++	au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
++	if (a->dst_h_dentry->d_inode) {
++		au_fset_ren(a->flags, DT_DSTDIR);
++		a->h_path.dentry = a->dst_h_dentry;
++		au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
++	}
++}
++
++static void au_ren_rev_dt(int err, struct au_ren_args *a)
++{
++	struct dentry *h_d;
++	struct mutex *h_mtx;
++
++	au_dtime_revert(a->src_dt + AuPARENT);
++	if (!au_ftest_ren(a->flags, ISSAMEDIR))
++		au_dtime_revert(a->dst_dt + AuPARENT);
++
++	if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
++		h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
++		h_mtx = &h_d->d_inode->i_mutex;
++		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++		au_dtime_revert(a->src_dt + AuCHILD);
++		mutex_unlock(h_mtx);
++
++		if (au_ftest_ren(a->flags, DT_DSTDIR)) {
++			h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
++			h_mtx = &h_d->d_inode->i_mutex;
++			mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++			au_dtime_revert(a->dst_dt + AuCHILD);
++			mutex_unlock(h_mtx);
++		}
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
++		struct inode *_dst_dir, struct dentry *_dst_dentry)
++{
++	int err, flags;
++	/* reduce stack space */
++	struct au_ren_args *a;
++
++	AuDbg("%.*s, %.*s\n", AuDLNPair(_src_dentry), AuDLNPair(_dst_dentry));
++	IMustLock(_src_dir);
++	IMustLock(_dst_dir);
++
++	err = -ENOMEM;
++	BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
++	a = kzalloc(sizeof(*a), GFP_NOFS);
++	if (unlikely(!a))
++		goto out;
++
++	a->src_dir = _src_dir;
++	a->src_dentry = _src_dentry;
++	a->src_inode = a->src_dentry->d_inode;
++	a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
++	a->dst_dir = _dst_dir;
++	a->dst_dentry = _dst_dentry;
++	a->dst_inode = a->dst_dentry->d_inode;
++	a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
++	if (a->dst_inode) {
++		IMustLock(a->dst_inode);
++		au_igrab(a->dst_inode);
++	}
++
++	err = -ENOTDIR;
++	flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
++	if (S_ISDIR(a->src_inode->i_mode)) {
++		au_fset_ren(a->flags, ISDIR);
++		if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode)))
++			goto out_free;
++		err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
++						AuLock_DIR | flags);
++	} else
++		err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
++						flags);
++	if (unlikely(err))
++		goto out_free;
++
++	err = au_d_hashed_positive(a->src_dentry);
++	if (unlikely(err))
++		goto out_unlock;
++	err = -ENOENT;
++	if (a->dst_inode) {
++		/*
++		 * If it is a dir, VFS unhash dst_dentry before this
++		 * function. It means we cannot rely upon d_unhashed().
++		 */
++		if (unlikely(!a->dst_inode->i_nlink))
++			goto out_unlock;
++		if (!S_ISDIR(a->dst_inode->i_mode)) {
++			err = au_d_hashed_positive(a->dst_dentry);
++			if (unlikely(err))
++				goto out_unlock;
++		} else if (unlikely(IS_DEADDIR(a->dst_inode)))
++			goto out_unlock;
++	} else if (unlikely(d_unhashed(a->dst_dentry)))
++		goto out_unlock;
++
++	au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
++	di_write_lock_parent(a->dst_parent);
++
++	/* which branch we process */
++	err = au_ren_wbr(a);
++	if (unlikely(err < 0))
++		goto out_parent;
++	a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
++	a->h_path.mnt = a->br->br_mnt;
++
++	/* are they available to be renamed */
++	err = au_ren_may_dir(a);
++	if (unlikely(err))
++		goto out_children;
++
++	/* prepare the writable parent dir on the same branch */
++	if (a->dst_bstart == a->btgt) {
++		au_fset_ren(a->flags, WHDST);
++	} else {
++		err = au_cpup_dirs(a->dst_dentry, a->btgt);
++		if (unlikely(err))
++			goto out_children;
++	}
++
++	if (a->src_dir != a->dst_dir) {
++		/*
++		 * this temporary unlock is safe,
++		 * because both dir->i_mutex are locked.
++		 */
++		di_write_unlock(a->dst_parent);
++		di_write_lock_parent(a->src_parent);
++		err = au_wr_dir_need_wh(a->src_dentry,
++					au_ftest_ren(a->flags, ISDIR),
++					&a->btgt);
++		di_write_unlock(a->src_parent);
++		di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
++		au_fclr_ren(a->flags, ISSAMEDIR);
++	} else
++		err = au_wr_dir_need_wh(a->src_dentry,
++					au_ftest_ren(a->flags, ISDIR),
++					&a->btgt);
++	if (unlikely(err < 0))
++		goto out_children;
++	if (err)
++		au_fset_ren(a->flags, WHSRC);
++
++	/* lock them all */
++	err = au_ren_lock(a);
++	if (unlikely(err))
++		goto out_children;
++
++	if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
++		err = au_may_ren(a);
++	else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
++		err = -ENAMETOOLONG;
++	if (unlikely(err))
++		goto out_hdir;
++
++	/* store timestamps to be revertible */
++	au_ren_dt(a);
++
++	/* here we go */
++	err = do_rename(a);
++	if (unlikely(err))
++		goto out_dt;
++
++	/* update dir attributes */
++	au_ren_refresh_dir(a);
++
++	/* dput/iput all lower dentries */
++	au_ren_refresh(a);
++
++	goto out_hdir; /* success */
++
++out_dt:
++	au_ren_rev_dt(err, a);
++out_hdir:
++	au_ren_unlock(a);
++out_children:
++	au_nhash_wh_free(&a->whlist);
++	if (err && a->dst_inode && a->dst_bstart != a->btgt) {
++		AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt);
++		au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
++		au_set_dbstart(a->dst_dentry, a->dst_bstart);
++	}
++out_parent:
++	if (!err)
++		d_move(a->src_dentry, a->dst_dentry);
++	else {
++		au_update_dbstart(a->dst_dentry);
++		if (!a->dst_inode)
++			d_drop(a->dst_dentry);
++	}
++	if (au_ftest_ren(a->flags, ISSAMEDIR))
++		di_write_unlock(a->dst_parent);
++	else
++		di_write_unlock2(a->src_parent, a->dst_parent);
++out_unlock:
++	aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
++out_free:
++	iput(a->dst_inode);
++	if (a->thargs)
++		au_whtmp_rmdir_free(a->thargs);
++	kfree(a);
++out:
++	AuTraceErr(err);
++	return err;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/Kconfig linux-3.2.0-gentoo-r1/fs/aufs/Kconfig
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/Kconfig	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/Kconfig	2012-01-17 12:11:24.463079695 +0100
+@@ -0,0 +1,203 @@
++config AUFS_FS
++	tristate "Aufs (Advanced multi layered unification filesystem) support"
++	depends on EXPERIMENTAL
++	help
++	Aufs is a stackable unification filesystem such as Unionfs,
++	which unifies several directories and provides a merged single
++	directory.
++	In the early days, aufs was entirely re-designed and
++	re-implemented Unionfs Version 1.x series. Introducing many
++	original ideas, approaches and improvements, it becomes totally
++	different from Unionfs while keeping the basic features.
++
++if AUFS_FS
++choice
++	prompt "Maximum number of branches"
++	default AUFS_BRANCH_MAX_127
++	help
++	Specifies the maximum number of branches (or member directories)
++	in a single aufs. The larger value consumes more system
++	resources and has a minor impact to performance.
++config AUFS_BRANCH_MAX_127
++	bool "127"
++	help
++	Specifies the maximum number of branches (or member directories)
++	in a single aufs. The larger value consumes more system
++	resources and has a minor impact to performance.
++config AUFS_BRANCH_MAX_511
++	bool "511"
++	help
++	Specifies the maximum number of branches (or member directories)
++	in a single aufs. The larger value consumes more system
++	resources and has a minor impact to performance.
++config AUFS_BRANCH_MAX_1023
++	bool "1023"
++	help
++	Specifies the maximum number of branches (or member directories)
++	in a single aufs. The larger value consumes more system
++	resources and has a minor impact to performance.
++config AUFS_BRANCH_MAX_32767
++	bool "32767"
++	help
++	Specifies the maximum number of branches (or member directories)
++	in a single aufs. The larger value consumes more system
++	resources and has a minor impact to performance.
++endchoice
++
++config AUFS_SBILIST
++	bool
++	depends on AUFS_MAGIC_SYSRQ || PROC_FS
++	default y
++	help
++	Automatic configuration for internal use.
++	When aufs supports Magic SysRq or /proc, enabled automatically.
++
++config AUFS_HNOTIFY
++	bool "Detect direct branch access (bypassing aufs)"
++	help
++	If you want to modify files on branches directly, eg. bypassing aufs,
++	and want aufs to detect the changes of them fully, then enable this
++	option and use 'udba=notify' mount option.
++	Currently there is only one available configuration, "fsnotify".
++	It will have a negative impact to the performance.
++	See detail in aufs.5.
++
++choice
++	prompt "method" if AUFS_HNOTIFY
++	default AUFS_HFSNOTIFY
++config AUFS_HFSNOTIFY
++	bool "fsnotify"
++	select FSNOTIFY
++endchoice
++
++config AUFS_EXPORT
++	bool "NFS-exportable aufs"
++	depends on EXPORTFS
++	help
++	If you want to export your mounted aufs via NFS, then enable this
++	option. There are several requirements for this configuration.
++	See detail in aufs.5.
++
++config AUFS_INO_T_64
++	bool
++	depends on AUFS_EXPORT
++	depends on 64BIT && !(ALPHA || S390)
++	default y
++	help
++	Automatic configuration for internal use.
++	/* typedef unsigned long/int __kernel_ino_t */
++	/* alpha and s390x are int */
++
++config AUFS_RDU
++	bool "Readdir in userspace"
++	help
++	Aufs has two methods to provide a merged view for a directory,
++	by a user-space library and by kernel-space natively. The latter
++	is always enabled but sometimes large and slow.
++	If you enable this option, install the library in aufs2-util
++	package, and set some environment variables for your readdir(3),
++	then the work will be handled in user-space which generally
++	shows better performance in most cases.
++	See detail in aufs.5.
++
++config AUFS_PROC_MAP
++	bool "support for /proc/maps and lsof(1)"
++	depends on PROC_FS
++	help
++	When you issue mmap(2) in aufs, it is actually a direct mmap(2)
++	call to the file on the branch fs since the file in aufs is
++	purely virtual. And the file path printed in /proc/maps (and
++	others) will be the path on the branch fs. In most cases, it
++	does no harm. But some utilities like lsof(1) may confuse since
++	the utility or user may expect the file path in aufs to be
++	printed.
++	To address this issue, aufs provides a patch which introduces a
++	new member called vm_prfile into struct vm_are_struct. The patch
++	is meaningless without enabling this configuration since nobody
++	sets the new vm_prfile member.
++	If you don't apply the patch, then enabling this configuration
++	will cause a compile error.
++	This approach is fragile since if someone else make some changes
++	around vm_file, then vm_prfile may not work anymore. As a
++	workaround such case, aufs provides this configuration. If you
++	disable it, then lsof(1) may produce incorrect result but the
++	problem will be gone even if the aufs patch is applied (I hope).
++
++config AUFS_SP_IATTR
++	bool "Respect the attributes (mtime/ctime mainly) of special files"
++	help
++	When you write something to a special file, some attributes of it
++	(mtime/ctime mainly) may be updated. Generally such updates are
++	less important (actually some device drivers and NFS ignore
++	it). But some applications (such like test program) requires
++	such updates. If you need these updates, then enable this
++	configuration which introduces some overhead.
++	Currently this configuration handles FIFO only.
++
++config AUFS_SHWH
++	bool "Show whiteouts"
++	help
++	If you want to make the whiteouts in aufs visible, then enable
++	this option and specify 'shwh' mount option. Although it may
++	sounds like philosophy or something, but in technically it
++	simply shows the name of whiteout with keeping its behaviour.
++
++config AUFS_BR_RAMFS
++	bool "Ramfs (initramfs/rootfs) as an aufs branch"
++	help
++	If you want to use ramfs as an aufs branch fs, then enable this
++	option. Generally tmpfs is recommended.
++	Aufs prohibited them to be a branch fs by default, because
++	initramfs becomes unusable after switch_root or something
++	generally. If you sets initramfs as an aufs branch and boot your
++	system by switch_root, you will meet a problem easily since the
++	files in initramfs may be inaccessible.
++	Unless you are going to use ramfs as an aufs branch fs without
++	switch_root or something, leave it N.
++
++config AUFS_BR_FUSE
++	bool "Fuse fs as an aufs branch"
++	depends on FUSE_FS
++	select AUFS_POLL
++	help
++	If you want to use fuse-based userspace filesystem as an aufs
++	branch fs, then enable this option.
++	It implements the internal poll(2) operation which is
++	implemented by fuse only (curretnly).
++
++config AUFS_POLL
++	bool
++	help
++	Automatic configuration for internal use.
++
++config AUFS_BR_HFSPLUS
++	bool "Hfsplus as an aufs branch"
++	depends on HFSPLUS_FS
++	default y
++	help
++	If you want to use hfsplus fs as an aufs branch fs, then enable
++	this option. This option introduces a small overhead at
++	copying-up a file on hfsplus.
++
++config AUFS_BDEV_LOOP
++	bool
++	depends on BLK_DEV_LOOP
++	default y
++	help
++	Automatic configuration for internal use.
++	Convert =[ym] into =y.
++
++config AUFS_DEBUG
++	bool "Debug aufs"
++	help
++	Enable this to compile aufs internal debug code.
++	It will have a negative impact to the performance.
++
++config AUFS_MAGIC_SYSRQ
++	bool
++	depends on AUFS_DEBUG && MAGIC_SYSRQ
++	default y
++	help
++	Automatic configuration for internal use.
++	When aufs supports Magic SysRq, enabled automatically.
++endif
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/loop.c linux-3.2.0-gentoo-r1/fs/aufs/loop.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/loop.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/loop.c	2012-01-17 12:11:24.826508897 +0100
+@@ -0,0 +1,133 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * support for loopback block device as a branch
++ */
++
++#include <linux/loop.h>
++#include "aufs.h"
++
++/*
++ * test if two lower dentries have overlapping branches.
++ */
++int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
++{
++	struct super_block *h_sb;
++	struct loop_device *l;
++
++	h_sb = h_adding->d_sb;
++	if (MAJOR(h_sb->s_dev) != LOOP_MAJOR)
++		return 0;
++
++	l = h_sb->s_bdev->bd_disk->private_data;
++	h_adding = l->lo_backing_file->f_dentry;
++	/*
++	 * h_adding can be local NFS.
++	 * in this case aufs cannot detect the loop.
++	 */
++	if (unlikely(h_adding->d_sb == sb))
++		return 1;
++	return !!au_test_subdir(h_adding, sb->s_root);
++}
++
++/* true if a kernel thread named 'loop[0-9].*' accesses a file */
++int au_test_loopback_kthread(void)
++{
++	int ret;
++	struct task_struct *tsk = current;
++
++	ret = 0;
++	if (tsk->flags & PF_KTHREAD) {
++		const char c = tsk->comm[4];
++		ret = ('0' <= c && c <= '9'
++		       && !strncmp(tsk->comm, "loop", 4));
++	}
++
++	return ret;
++}
++
++/* ---------------------------------------------------------------------- */
++
++#define au_warn_loopback_step	16
++static int au_warn_loopback_nelem = au_warn_loopback_step;
++static unsigned long *au_warn_loopback_array;
++
++void au_warn_loopback(struct super_block *h_sb)
++{
++	int i, new_nelem;
++	unsigned long *a, magic;
++	static DEFINE_SPINLOCK(spin);
++
++	magic = h_sb->s_magic;
++	spin_lock(&spin);
++	a = au_warn_loopback_array;
++	for (i = 0; i < au_warn_loopback_nelem && *a; i++)
++		if (a[i] == magic) {
++			spin_unlock(&spin);
++			return;
++		}
++
++	/* h_sb is new to us, print it */
++	if (i < au_warn_loopback_nelem) {
++		a[i] = magic;
++		goto pr;
++	}
++
++	/* expand the array */
++	new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
++	a = au_kzrealloc(au_warn_loopback_array,
++			 au_warn_loopback_nelem * sizeof(unsigned long),
++			 new_nelem * sizeof(unsigned long), GFP_ATOMIC);
++	if (a) {
++		au_warn_loopback_nelem = new_nelem;
++		au_warn_loopback_array = a;
++		a[i] = magic;
++		goto pr;
++	}
++
++	spin_unlock(&spin);
++	AuWarn1("realloc failed, ignored\n");
++	return;
++
++pr:
++	spin_unlock(&spin);
++	pr_warning("you may want to try another patch for loopback file "
++		   "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
++}
++
++int au_loopback_init(void)
++{
++	int err;
++	struct super_block *sb __maybe_unused;
++
++	AuDebugOn(sizeof(sb->s_magic) != sizeof(unsigned long));
++
++	err = 0;
++	au_warn_loopback_array = kcalloc(au_warn_loopback_step,
++					 sizeof(unsigned long), GFP_NOFS);
++	if (unlikely(!au_warn_loopback_array))
++		err = -ENOMEM;
++
++	return err;
++}
++
++void au_loopback_fin(void)
++{
++	kfree(au_warn_loopback_array);
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/loop.h linux-3.2.0-gentoo-r1/fs/aufs/loop.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/loop.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/loop.h	2012-01-17 12:11:24.828823733 +0100
+@@ -0,0 +1,50 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * support for loopback mount as a branch
++ */
++
++#ifndef __AUFS_LOOP_H__
++#define __AUFS_LOOP_H__
++
++#ifdef __KERNEL__
++
++struct dentry;
++struct super_block;
++
++#ifdef CONFIG_AUFS_BDEV_LOOP
++/* loop.c */
++int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
++int au_test_loopback_kthread(void);
++void au_warn_loopback(struct super_block *h_sb);
++
++int au_loopback_init(void);
++void au_loopback_fin(void);
++#else
++AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
++	   struct dentry *h_adding)
++AuStubInt0(au_test_loopback_kthread, void)
++AuStubVoid(au_warn_loopback, struct super_block *h_sb)
++
++AuStubInt0(au_loopback_init, void)
++AuStubVoid(au_loopback_fin, void)
++#endif /* BLK_DEV_LOOP */
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_LOOP_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/magic.mk linux-3.2.0-gentoo-r1/fs/aufs/magic.mk
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/magic.mk	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/magic.mk	2012-01-17 12:11:24.828823733 +0100
+@@ -0,0 +1,54 @@
++
++# defined in ${srctree}/fs/fuse/inode.c
++# tristate
++ifdef CONFIG_FUSE_FS
++ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
++endif
++
++# defined in ${srctree}/fs/ocfs2/ocfs2_fs.h
++# tristate
++ifdef CONFIG_OCFS2_FS
++ccflags-y += -DOCFS2_SUPER_MAGIC=0x7461636f
++endif
++
++# defined in ${srctree}/fs/ocfs2/dlm/userdlm.h
++# tristate
++ifdef CONFIG_OCFS2_FS_O2CB
++ccflags-y += -DDLMFS_MAGIC=0x76a9f425
++endif
++
++# defined in ${srctree}/fs/cifs/cifsfs.c
++# tristate
++ifdef CONFIG_CIFS_FS
++ccflags-y += -DCIFS_MAGIC_NUMBER=0xFF534D42
++endif
++
++# defined in ${srctree}/fs/xfs/xfs_sb.h
++# tristate
++ifdef CONFIG_XFS_FS
++ccflags-y += -DXFS_SB_MAGIC=0x58465342
++endif
++
++# defined in ${srctree}/fs/configfs/mount.c
++# tristate
++ifdef CONFIG_CONFIGFS_FS
++ccflags-y += -DCONFIGFS_MAGIC=0x62656570
++endif
++
++# defined in ${srctree}/fs/9p/v9fs.h
++# tristate
++ifdef CONFIG_9P_FS
++ccflags-y += -DV9FS_MAGIC=0x01021997
++endif
++
++# defined in ${srctree}/fs/ubifs/ubifs.h
++# tristate
++ifdef CONFIG_UBIFS_FS
++ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
++endif
++
++# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
++# tristate
++ifdef CONFIG_HFSPLUS_FS
++ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
++endif
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/Makefile linux-3.2.0-gentoo-r1/fs/aufs/Makefile
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/Makefile	2012-01-17 12:11:24.465394530 +0100
+@@ -0,0 +1,42 @@
++
++include ${src}/magic.mk
++ifeq (${CONFIG_AUFS_FS},m)
++include ${src}/conf.mk
++endif
++-include ${src}/priv_def.mk
++
++# cf. include/linux/kernel.h
++# enable pr_debug
++ccflags-y += -DDEBUG
++# sparse requires the full pathname
++ifdef M
++ccflags-y += -include ${M}/../../include/linux/aufs_type.h
++else
++ccflags-y += -include ${srctree}/include/linux/aufs_type.h
++endif
++
++obj-$(CONFIG_AUFS_FS) += aufs.o
++aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
++	wkq.o vfsub.o dcsub.o \
++	cpup.o whout.o wbr_policy.o \
++	dinfo.o dentry.o \
++	dynop.o \
++	finfo.o file.o f_op.o \
++	dir.o vdir.o \
++	iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
++	ioctl.o
++
++# all are boolean
++aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
++aufs-$(CONFIG_SYSFS) += sysfs.o
++aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
++aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
++aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
++aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
++aufs-$(CONFIG_AUFS_EXPORT) += export.o
++aufs-$(CONFIG_AUFS_POLL) += poll.o
++aufs-$(CONFIG_AUFS_RDU) += rdu.o
++aufs-$(CONFIG_AUFS_SP_IATTR) += f_op_sp.o
++aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
++aufs-$(CONFIG_AUFS_DEBUG) += debug.o
++aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/module.c linux-3.2.0-gentoo-r1/fs/aufs/module.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/module.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/module.c	2012-01-17 12:11:24.828823733 +0100
+@@ -0,0 +1,195 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * module global variables and operations
++ */
++
++#include <linux/module.h>
++#include <linux/seq_file.h>
++#include "aufs.h"
++
++void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
++{
++	if (new_sz <= nused)
++		return p;
++
++	p = krealloc(p, new_sz, gfp);
++	if (p)
++		memset(p + nused, 0, new_sz - nused);
++	return p;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * aufs caches
++ */
++struct kmem_cache *au_cachep[AuCache_Last];
++static int __init au_cache_init(void)
++{
++	au_cachep[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
++	if (au_cachep[AuCache_DINFO])
++		/* SLAB_DESTROY_BY_RCU */
++		au_cachep[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
++							au_icntnr_init_once);
++	if (au_cachep[AuCache_ICNTNR])
++		au_cachep[AuCache_FINFO] = AuCacheCtor(au_finfo,
++						       au_fi_init_once);
++	if (au_cachep[AuCache_FINFO])
++		au_cachep[AuCache_VDIR] = AuCache(au_vdir);
++	if (au_cachep[AuCache_VDIR])
++		au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
++	if (au_cachep[AuCache_DEHSTR])
++		return 0;
++
++	return -ENOMEM;
++}
++
++static void au_cache_fin(void)
++{
++	int i;
++
++	/* including AuCache_HNOTIFY */
++	for (i = 0; i < AuCache_Last; i++)
++		if (au_cachep[i]) {
++			kmem_cache_destroy(au_cachep[i]);
++			au_cachep[i] = NULL;
++		}
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_dir_roflags;
++
++#ifdef CONFIG_AUFS_SBILIST
++/*
++ * iterate_supers_type() doesn't protect us from
++ * remounting (branch management)
++ */
++struct au_splhead au_sbilist;
++#endif
++
++struct lock_class_key au_lc_key[AuLcKey_Last];
++
++/*
++ * functions for module interface.
++ */
++MODULE_LICENSE("GPL");
++/* MODULE_LICENSE("GPL v2"); */
++MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
++MODULE_DESCRIPTION(AUFS_NAME
++	" -- Advanced multi layered unification filesystem");
++MODULE_VERSION(AUFS_VERSION);
++
++/* this module parameter has no meaning when SYSFS is disabled */
++int sysaufs_brs = 1;
++MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
++module_param_named(brs, sysaufs_brs, int, S_IRUGO);
++
++/* ---------------------------------------------------------------------- */
++
++static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
++
++int au_seq_path(struct seq_file *seq, struct path *path)
++{
++	return seq_path(seq, path, au_esc_chars);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int __init aufs_init(void)
++{
++	int err, i;
++	char *p;
++
++	p = au_esc_chars;
++	for (i = 1; i <= ' '; i++)
++		*p++ = i;
++	*p++ = '\\';
++	*p++ = '\x7f';
++	*p = 0;
++
++	au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
++
++	au_sbilist_init();
++	sysaufs_brs_init();
++	au_debug_init();
++	au_dy_init();
++	err = sysaufs_init();
++	if (unlikely(err))
++		goto out;
++	err = au_procfs_init();
++	if (unlikely(err))
++		goto out_sysaufs;
++	err = au_wkq_init();
++	if (unlikely(err))
++		goto out_procfs;
++	err = au_loopback_init();
++	if (unlikely(err))
++		goto out_wkq;
++	err = au_hnotify_init();
++	if (unlikely(err))
++		goto out_loopback;
++	err = au_sysrq_init();
++	if (unlikely(err))
++		goto out_hin;
++	err = au_cache_init();
++	if (unlikely(err))
++		goto out_sysrq;
++	err = register_filesystem(&aufs_fs_type);
++	if (unlikely(err))
++		goto out_cache;
++	/* since we define pr_fmt, call printk directly */
++	printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
++	goto out; /* success */
++
++out_cache:
++	au_cache_fin();
++out_sysrq:
++	au_sysrq_fin();
++out_hin:
++	au_hnotify_fin();
++out_loopback:
++	au_loopback_fin();
++out_wkq:
++	au_wkq_fin();
++out_procfs:
++	au_procfs_fin();
++out_sysaufs:
++	sysaufs_fin();
++	au_dy_fin();
++out:
++	return err;
++}
++
++static void __exit aufs_exit(void)
++{
++	unregister_filesystem(&aufs_fs_type);
++	au_cache_fin();
++	au_sysrq_fin();
++	au_hnotify_fin();
++	au_loopback_fin();
++	au_wkq_fin();
++	au_procfs_fin();
++	sysaufs_fin();
++	au_dy_fin();
++}
++
++module_init(aufs_init);
++module_exit(aufs_exit);
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/module.h linux-3.2.0-gentoo-r1/fs/aufs/module.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/module.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/module.h	2012-01-17 12:11:24.840397910 +0100
+@@ -0,0 +1,107 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * module initialization and module-global
++ */
++
++#ifndef __AUFS_MODULE_H__
++#define __AUFS_MODULE_H__
++
++#ifdef __KERNEL__
++
++#include <linux/slab.h>
++
++struct path;
++struct seq_file;
++
++/* module parameters */
++extern int sysaufs_brs;
++
++/* ---------------------------------------------------------------------- */
++
++extern int au_dir_roflags;
++
++enum {
++	AuLcNonDir_FIINFO,
++	AuLcNonDir_DIINFO,
++	AuLcNonDir_IIINFO,
++
++	AuLcDir_FIINFO,
++	AuLcDir_DIINFO,
++	AuLcDir_IIINFO,
++
++	AuLcSymlink_DIINFO,
++	AuLcSymlink_IIINFO,
++
++	AuLcKey_Last
++};
++extern struct lock_class_key au_lc_key[AuLcKey_Last];
++
++void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
++int au_seq_path(struct seq_file *seq, struct path *path);
++
++#ifdef CONFIG_PROC_FS
++/* procfs.c */
++int __init au_procfs_init(void);
++void au_procfs_fin(void);
++#else
++AuStubInt0(au_procfs_init, void);
++AuStubVoid(au_procfs_fin, void);
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++/* kmem cache */
++enum {
++	AuCache_DINFO,
++	AuCache_ICNTNR,
++	AuCache_FINFO,
++	AuCache_VDIR,
++	AuCache_DEHSTR,
++#ifdef CONFIG_AUFS_HNOTIFY
++	AuCache_HNOTIFY,
++#endif
++	AuCache_Last
++};
++
++#define AuCacheFlags		(SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
++#define AuCache(type)		KMEM_CACHE(type, AuCacheFlags)
++#define AuCacheCtor(type, ctor)	\
++	kmem_cache_create(#type, sizeof(struct type), \
++			  __alignof__(struct type), AuCacheFlags, ctor)
++
++extern struct kmem_cache *au_cachep[];
++
++#define AuCacheFuncs(name, index) \
++static inline struct au_##name *au_cache_alloc_##name(void) \
++{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \
++static inline void au_cache_free_##name(struct au_##name *p) \
++{ kmem_cache_free(au_cachep[AuCache_##index], p); }
++
++AuCacheFuncs(dinfo, DINFO);
++AuCacheFuncs(icntnr, ICNTNR);
++AuCacheFuncs(finfo, FINFO);
++AuCacheFuncs(vdir, VDIR);
++AuCacheFuncs(vdir_dehstr, DEHSTR);
++#ifdef CONFIG_AUFS_HNOTIFY
++AuCacheFuncs(hnotify, HNOTIFY);
++#endif
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_MODULE_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/opts.c linux-3.2.0-gentoo-r1/fs/aufs/opts.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/opts.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/opts.c	2012-01-17 12:11:24.868175939 +0100
+@@ -0,0 +1,1677 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * mount options/flags
++ */
++
++#include <linux/namei.h>
++#include <linux/types.h> /* a distribution requires */
++#include <linux/parser.h>
++#include "aufs.h"
++
++/* ---------------------------------------------------------------------- */
++
++enum {
++	Opt_br,
++	Opt_add, Opt_del, Opt_mod, Opt_reorder, Opt_append, Opt_prepend,
++	Opt_idel, Opt_imod, Opt_ireorder,
++	Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash, Opt_rendir,
++	Opt_rdblk_def, Opt_rdhash_def,
++	Opt_xino, Opt_zxino, Opt_noxino,
++	Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
++	Opt_trunc_xino_path, Opt_itrunc_xino,
++	Opt_trunc_xib, Opt_notrunc_xib,
++	Opt_shwh, Opt_noshwh,
++	Opt_plink, Opt_noplink, Opt_list_plink,
++	Opt_udba,
++	Opt_dio, Opt_nodio,
++	/* Opt_lock, Opt_unlock, */
++	Opt_cmd, Opt_cmd_args,
++	Opt_diropq_a, Opt_diropq_w,
++	Opt_warn_perm, Opt_nowarn_perm,
++	Opt_wbr_copyup, Opt_wbr_create,
++	Opt_refrof, Opt_norefrof,
++	Opt_verbose, Opt_noverbose,
++	Opt_sum, Opt_nosum, Opt_wsum,
++	Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
++};
++
++static match_table_t options = {
++	{Opt_br, "br=%s"},
++	{Opt_br, "br:%s"},
++
++	{Opt_add, "add=%d:%s"},
++	{Opt_add, "add:%d:%s"},
++	{Opt_add, "ins=%d:%s"},
++	{Opt_add, "ins:%d:%s"},
++	{Opt_append, "append=%s"},
++	{Opt_append, "append:%s"},
++	{Opt_prepend, "prepend=%s"},
++	{Opt_prepend, "prepend:%s"},
++
++	{Opt_del, "del=%s"},
++	{Opt_del, "del:%s"},
++	/* {Opt_idel, "idel:%d"}, */
++	{Opt_mod, "mod=%s"},
++	{Opt_mod, "mod:%s"},
++	/* {Opt_imod, "imod:%d:%s"}, */
++
++	{Opt_dirwh, "dirwh=%d"},
++
++	{Opt_xino, "xino=%s"},
++	{Opt_noxino, "noxino"},
++	{Opt_trunc_xino, "trunc_xino"},
++	{Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
++	{Opt_notrunc_xino, "notrunc_xino"},
++	{Opt_trunc_xino_path, "trunc_xino=%s"},
++	{Opt_itrunc_xino, "itrunc_xino=%d"},
++	/* {Opt_zxino, "zxino=%s"}, */
++	{Opt_trunc_xib, "trunc_xib"},
++	{Opt_notrunc_xib, "notrunc_xib"},
++
++#ifdef CONFIG_PROC_FS
++	{Opt_plink, "plink"},
++#else
++	{Opt_ignore_silent, "plink"},
++#endif
++
++	{Opt_noplink, "noplink"},
++
++#ifdef CONFIG_AUFS_DEBUG
++	{Opt_list_plink, "list_plink"},
++#endif
++
++	{Opt_udba, "udba=%s"},
++
++	{Opt_dio, "dio"},
++	{Opt_nodio, "nodio"},
++
++	{Opt_diropq_a, "diropq=always"},
++	{Opt_diropq_a, "diropq=a"},
++	{Opt_diropq_w, "diropq=whiteouted"},
++	{Opt_diropq_w, "diropq=w"},
++
++	{Opt_warn_perm, "warn_perm"},
++	{Opt_nowarn_perm, "nowarn_perm"},
++
++	/* keep them temporary */
++	{Opt_ignore_silent, "coo=%s"},
++	{Opt_ignore_silent, "nodlgt"},
++	{Opt_ignore_silent, "nodirperm1"},
++	{Opt_ignore_silent, "clean_plink"},
++
++#ifdef CONFIG_AUFS_SHWH
++	{Opt_shwh, "shwh"},
++#endif
++	{Opt_noshwh, "noshwh"},
++
++	{Opt_rendir, "rendir=%d"},
++
++	{Opt_refrof, "refrof"},
++	{Opt_norefrof, "norefrof"},
++
++	{Opt_verbose, "verbose"},
++	{Opt_verbose, "v"},
++	{Opt_noverbose, "noverbose"},
++	{Opt_noverbose, "quiet"},
++	{Opt_noverbose, "q"},
++	{Opt_noverbose, "silent"},
++
++	{Opt_sum, "sum"},
++	{Opt_nosum, "nosum"},
++	{Opt_wsum, "wsum"},
++
++	{Opt_rdcache, "rdcache=%d"},
++	{Opt_rdblk, "rdblk=%d"},
++	{Opt_rdblk_def, "rdblk=def"},
++	{Opt_rdhash, "rdhash=%d"},
++	{Opt_rdhash_def, "rdhash=def"},
++
++	{Opt_wbr_create, "create=%s"},
++	{Opt_wbr_create, "create_policy=%s"},
++	{Opt_wbr_copyup, "cpup=%s"},
++	{Opt_wbr_copyup, "copyup=%s"},
++	{Opt_wbr_copyup, "copyup_policy=%s"},
++
++	/* internal use for the scripts */
++	{Opt_ignore_silent, "si=%s"},
++
++	{Opt_br, "dirs=%s"},
++	{Opt_ignore, "debug=%d"},
++	{Opt_ignore, "delete=whiteout"},
++	{Opt_ignore, "delete=all"},
++	{Opt_ignore, "imap=%s"},
++
++	/* temporary workaround, due to old mount(8)? */
++	{Opt_ignore_silent, "relatime"},
++
++	{Opt_err, NULL}
++};
++
++/* ---------------------------------------------------------------------- */
++
++static const char *au_parser_pattern(int val, struct match_token *token)
++{
++	while (token->pattern) {
++		if (token->token == val)
++			return token->pattern;
++		token++;
++	}
++	BUG();
++	return "??";
++}
++
++/* ---------------------------------------------------------------------- */
++
++static match_table_t brperm = {
++	{AuBrPerm_RO, AUFS_BRPERM_RO},
++	{AuBrPerm_RR, AUFS_BRPERM_RR},
++	{AuBrPerm_RW, AUFS_BRPERM_RW},
++	{0, NULL}
++};
++
++static match_table_t brrattr = {
++	{AuBrRAttr_WH, AUFS_BRRATTR_WH},
++	{0, NULL}
++};
++
++static match_table_t brwattr = {
++	{AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
++	{0, NULL}
++};
++
++#define AuBrStr_LONGEST	AUFS_BRPERM_RW "+" AUFS_BRWATTR_NLWH
++
++static int br_attr_val(char *str, match_table_t table, substring_t args[])
++{
++	int attr, v;
++	char *p;
++
++	attr = 0;
++	do {
++		p = strchr(str, '+');
++		if (p)
++			*p = 0;
++		v = match_token(str, table, args);
++		if (v)
++			attr |= v;
++		else {
++			if (p)
++				*p = '+';
++			pr_warning("ignored branch attribute %s\n", str);
++			break;
++		}
++		if (p)
++			str = p + 1;
++	} while (p);
++
++	return attr;
++}
++
++static int noinline_for_stack br_perm_val(char *perm)
++{
++	int val;
++	char *p;
++	substring_t args[MAX_OPT_ARGS];
++
++	p = strchr(perm, '+');
++	if (p)
++		*p = 0;
++	val = match_token(perm, brperm, args);
++	if (!val) {
++		if (p)
++			*p = '+';
++		pr_warning("ignored branch permission %s\n", perm);
++		val = AuBrPerm_RO;
++		goto out;
++	}
++	if (!p)
++		goto out;
++
++	switch (val) {
++	case AuBrPerm_RO:
++	case AuBrPerm_RR:
++		val |= br_attr_val(p + 1, brrattr, args);
++		break;
++	case AuBrPerm_RW:
++		val |= br_attr_val(p + 1, brwattr, args);
++		break;
++	}
++
++out:
++	return val;
++}
++
++/* Caller should free the return value */
++char *au_optstr_br_perm(int brperm)
++{
++	char *p, a[sizeof(AuBrStr_LONGEST)];
++	int sz;
++
++#define SetPerm(str) do {			\
++		sz = sizeof(str);		\
++		memcpy(a, str, sz);		\
++		p = a + sz - 1;			\
++	} while (0)
++
++#define AppendAttr(flag, str) do {			\
++		if (brperm & flag) {		\
++			sz = sizeof(str);	\
++			*p++ = '+';		\
++			memcpy(p, str, sz);	\
++			p += sz - 1;		\
++		}				\
++	} while (0)
++
++	switch (brperm & AuBrPerm_Mask) {
++	case AuBrPerm_RO:
++		SetPerm(AUFS_BRPERM_RO);
++		break;
++	case AuBrPerm_RR:
++		SetPerm(AUFS_BRPERM_RR);
++		break;
++	case AuBrPerm_RW:
++		SetPerm(AUFS_BRPERM_RW);
++		break;
++	default:
++		AuDebugOn(1);
++	}
++
++	AppendAttr(AuBrRAttr_WH, AUFS_BRRATTR_WH);
++	AppendAttr(AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH);
++
++	AuDebugOn(strlen(a) >= sizeof(a));
++	return kstrdup(a, GFP_NOFS);
++#undef SetPerm
++#undef AppendAttr
++}
++
++/* ---------------------------------------------------------------------- */
++
++static match_table_t udbalevel = {
++	{AuOpt_UDBA_REVAL, "reval"},
++	{AuOpt_UDBA_NONE, "none"},
++#ifdef CONFIG_AUFS_HNOTIFY
++	{AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
++#ifdef CONFIG_AUFS_HFSNOTIFY
++	{AuOpt_UDBA_HNOTIFY, "fsnotify"},
++#endif
++#endif
++	{-1, NULL}
++};
++
++static int noinline_for_stack udba_val(char *str)
++{
++	substring_t args[MAX_OPT_ARGS];
++
++	return match_token(str, udbalevel, args);
++}
++
++const char *au_optstr_udba(int udba)
++{
++	return au_parser_pattern(udba, (void *)udbalevel);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static match_table_t au_wbr_create_policy = {
++	{AuWbrCreate_TDP, "tdp"},
++	{AuWbrCreate_TDP, "top-down-parent"},
++	{AuWbrCreate_RR, "rr"},
++	{AuWbrCreate_RR, "round-robin"},
++	{AuWbrCreate_MFS, "mfs"},
++	{AuWbrCreate_MFS, "most-free-space"},
++	{AuWbrCreate_MFSV, "mfs:%d"},
++	{AuWbrCreate_MFSV, "most-free-space:%d"},
++
++	{AuWbrCreate_MFSRR, "mfsrr:%d"},
++	{AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
++	{AuWbrCreate_PMFS, "pmfs"},
++	{AuWbrCreate_PMFSV, "pmfs:%d"},
++
++	{-1, NULL}
++};
++
++/*
++ * cf. linux/lib/parser.c and cmdline.c
++ * gave up calling memparse() since it uses simple_strtoull() instead of
++ * kstrto...().
++ */
++static int noinline_for_stack
++au_match_ull(substring_t *s, unsigned long long *result)
++{
++	int err;
++	unsigned int len;
++	char a[32];
++
++	err = -ERANGE;
++	len = s->to - s->from;
++	if (len + 1 <= sizeof(a)) {
++		memcpy(a, s->from, len);
++		a[len] = '\0';
++		err = kstrtoull(a, 0, result);
++	}
++	return err;
++}
++
++static int au_wbr_mfs_wmark(substring_t *arg, char *str,
++			    struct au_opt_wbr_create *create)
++{
++	int err;
++	unsigned long long ull;
++
++	err = 0;
++	if (!au_match_ull(arg, &ull))
++		create->mfsrr_watermark = ull;
++	else {
++		pr_err("bad integer in %s\n", str);
++		err = -EINVAL;
++	}
++
++	return err;
++}
++
++static int au_wbr_mfs_sec(substring_t *arg, char *str,
++			  struct au_opt_wbr_create *create)
++{
++	int n, err;
++
++	err = 0;
++	if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
++		create->mfs_second = n;
++	else {
++		pr_err("bad integer in %s\n", str);
++		err = -EINVAL;
++	}
++
++	return err;
++}
++
++static int noinline_for_stack
++au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
++{
++	int err, e;
++	substring_t args[MAX_OPT_ARGS];
++
++	err = match_token(str, au_wbr_create_policy, args);
++	create->wbr_create = err;
++	switch (err) {
++	case AuWbrCreate_MFSRRV:
++		e = au_wbr_mfs_wmark(&args[0], str, create);
++		if (!e)
++			e = au_wbr_mfs_sec(&args[1], str, create);
++		if (unlikely(e))
++			err = e;
++		break;
++	case AuWbrCreate_MFSRR:
++		e = au_wbr_mfs_wmark(&args[0], str, create);
++		if (unlikely(e)) {
++			err = e;
++			break;
++		}
++		/*FALLTHROUGH*/
++	case AuWbrCreate_MFS:
++	case AuWbrCreate_PMFS:
++		create->mfs_second = AUFS_MFS_DEF_SEC;
++		break;
++	case AuWbrCreate_MFSV:
++	case AuWbrCreate_PMFSV:
++		e = au_wbr_mfs_sec(&args[0], str, create);
++		if (unlikely(e))
++			err = e;
++		break;
++	}
++
++	return err;
++}
++
++const char *au_optstr_wbr_create(int wbr_create)
++{
++	return au_parser_pattern(wbr_create, (void *)au_wbr_create_policy);
++}
++
++static match_table_t au_wbr_copyup_policy = {
++	{AuWbrCopyup_TDP, "tdp"},
++	{AuWbrCopyup_TDP, "top-down-parent"},
++	{AuWbrCopyup_BUP, "bup"},
++	{AuWbrCopyup_BUP, "bottom-up-parent"},
++	{AuWbrCopyup_BU, "bu"},
++	{AuWbrCopyup_BU, "bottom-up"},
++	{-1, NULL}
++};
++
++static int noinline_for_stack au_wbr_copyup_val(char *str)
++{
++	substring_t args[MAX_OPT_ARGS];
++
++	return match_token(str, au_wbr_copyup_policy, args);
++}
++
++const char *au_optstr_wbr_copyup(int wbr_copyup)
++{
++	return au_parser_pattern(wbr_copyup, (void *)au_wbr_copyup_policy);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
++
++static void dump_opts(struct au_opts *opts)
++{
++#ifdef CONFIG_AUFS_DEBUG
++	/* reduce stack space */
++	union {
++		struct au_opt_add *add;
++		struct au_opt_del *del;
++		struct au_opt_mod *mod;
++		struct au_opt_xino *xino;
++		struct au_opt_xino_itrunc *xino_itrunc;
++		struct au_opt_wbr_create *create;
++	} u;
++	struct au_opt *opt;
++
++	opt = opts->opt;
++	while (opt->type != Opt_tail) {
++		switch (opt->type) {
++		case Opt_add:
++			u.add = &opt->add;
++			AuDbg("add {b%d, %s, 0x%x, %p}\n",
++				  u.add->bindex, u.add->pathname, u.add->perm,
++				  u.add->path.dentry);
++			break;
++		case Opt_del:
++		case Opt_idel:
++			u.del = &opt->del;
++			AuDbg("del {%s, %p}\n",
++			      u.del->pathname, u.del->h_path.dentry);
++			break;
++		case Opt_mod:
++		case Opt_imod:
++			u.mod = &opt->mod;
++			AuDbg("mod {%s, 0x%x, %p}\n",
++				  u.mod->path, u.mod->perm, u.mod->h_root);
++			break;
++		case Opt_append:
++			u.add = &opt->add;
++			AuDbg("append {b%d, %s, 0x%x, %p}\n",
++				  u.add->bindex, u.add->pathname, u.add->perm,
++				  u.add->path.dentry);
++			break;
++		case Opt_prepend:
++			u.add = &opt->add;
++			AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
++				  u.add->bindex, u.add->pathname, u.add->perm,
++				  u.add->path.dentry);
++			break;
++		case Opt_dirwh:
++			AuDbg("dirwh %d\n", opt->dirwh);
++			break;
++		case Opt_rdcache:
++			AuDbg("rdcache %d\n", opt->rdcache);
++			break;
++		case Opt_rdblk:
++			AuDbg("rdblk %u\n", opt->rdblk);
++			break;
++		case Opt_rdblk_def:
++			AuDbg("rdblk_def\n");
++			break;
++		case Opt_rdhash:
++			AuDbg("rdhash %u\n", opt->rdhash);
++			break;
++		case Opt_rdhash_def:
++			AuDbg("rdhash_def\n");
++			break;
++		case Opt_xino:
++			u.xino = &opt->xino;
++			AuDbg("xino {%s %.*s}\n",
++				  u.xino->path,
++				  AuDLNPair(u.xino->file->f_dentry));
++			break;
++		case Opt_trunc_xino:
++			AuLabel(trunc_xino);
++			break;
++		case Opt_notrunc_xino:
++			AuLabel(notrunc_xino);
++			break;
++		case Opt_trunc_xino_path:
++		case Opt_itrunc_xino:
++			u.xino_itrunc = &opt->xino_itrunc;
++			AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
++			break;
++
++		case Opt_noxino:
++			AuLabel(noxino);
++			break;
++		case Opt_trunc_xib:
++			AuLabel(trunc_xib);
++			break;
++		case Opt_notrunc_xib:
++			AuLabel(notrunc_xib);
++			break;
++		case Opt_shwh:
++			AuLabel(shwh);
++			break;
++		case Opt_noshwh:
++			AuLabel(noshwh);
++			break;
++		case Opt_plink:
++			AuLabel(plink);
++			break;
++		case Opt_noplink:
++			AuLabel(noplink);
++			break;
++		case Opt_list_plink:
++			AuLabel(list_plink);
++			break;
++		case Opt_udba:
++			AuDbg("udba %d, %s\n",
++				  opt->udba, au_optstr_udba(opt->udba));
++			break;
++		case Opt_dio:
++			AuLabel(dio);
++			break;
++		case Opt_nodio:
++			AuLabel(nodio);
++			break;
++		case Opt_diropq_a:
++			AuLabel(diropq_a);
++			break;
++		case Opt_diropq_w:
++			AuLabel(diropq_w);
++			break;
++		case Opt_warn_perm:
++			AuLabel(warn_perm);
++			break;
++		case Opt_nowarn_perm:
++			AuLabel(nowarn_perm);
++			break;
++		case Opt_refrof:
++			AuLabel(refrof);
++			break;
++		case Opt_norefrof:
++			AuLabel(norefrof);
++			break;
++		case Opt_verbose:
++			AuLabel(verbose);
++			break;
++		case Opt_noverbose:
++			AuLabel(noverbose);
++			break;
++		case Opt_sum:
++			AuLabel(sum);
++			break;
++		case Opt_nosum:
++			AuLabel(nosum);
++			break;
++		case Opt_wsum:
++			AuLabel(wsum);
++			break;
++		case Opt_wbr_create:
++			u.create = &opt->wbr_create;
++			AuDbg("create %d, %s\n", u.create->wbr_create,
++				  au_optstr_wbr_create(u.create->wbr_create));
++			switch (u.create->wbr_create) {
++			case AuWbrCreate_MFSV:
++			case AuWbrCreate_PMFSV:
++				AuDbg("%d sec\n", u.create->mfs_second);
++				break;
++			case AuWbrCreate_MFSRR:
++				AuDbg("%llu watermark\n",
++					  u.create->mfsrr_watermark);
++				break;
++			case AuWbrCreate_MFSRRV:
++				AuDbg("%llu watermark, %d sec\n",
++					  u.create->mfsrr_watermark,
++					  u.create->mfs_second);
++				break;
++			}
++			break;
++		case Opt_wbr_copyup:
++			AuDbg("copyup %d, %s\n", opt->wbr_copyup,
++				  au_optstr_wbr_copyup(opt->wbr_copyup));
++			break;
++		default:
++			BUG();
++		}
++		opt++;
++	}
++#endif
++}
++
++void au_opts_free(struct au_opts *opts)
++{
++	struct au_opt *opt;
++
++	opt = opts->opt;
++	while (opt->type != Opt_tail) {
++		switch (opt->type) {
++		case Opt_add:
++		case Opt_append:
++		case Opt_prepend:
++			path_put(&opt->add.path);
++			break;
++		case Opt_del:
++		case Opt_idel:
++			path_put(&opt->del.h_path);
++			break;
++		case Opt_mod:
++		case Opt_imod:
++			dput(opt->mod.h_root);
++			break;
++		case Opt_xino:
++			fput(opt->xino.file);
++			break;
++		}
++		opt++;
++	}
++}
++
++static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
++		   aufs_bindex_t bindex)
++{
++	int err;
++	struct au_opt_add *add = &opt->add;
++	char *p;
++
++	add->bindex = bindex;
++	add->perm = AuBrPerm_RO;
++	add->pathname = opt_str;
++	p = strchr(opt_str, '=');
++	if (p) {
++		*p++ = 0;
++		if (*p)
++			add->perm = br_perm_val(p);
++	}
++
++	err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
++	if (!err) {
++		if (!p) {
++			add->perm = AuBrPerm_RO;
++			if (au_test_fs_rr(add->path.dentry->d_sb))
++				add->perm = AuBrPerm_RR;
++			else if (!bindex && !(sb_flags & MS_RDONLY))
++				add->perm = AuBrPerm_RW;
++		}
++		opt->type = Opt_add;
++		goto out;
++	}
++	pr_err("lookup failed %s (%d)\n", add->pathname, err);
++	err = -EINVAL;
++
++out:
++	return err;
++}
++
++static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
++{
++	int err;
++
++	del->pathname = args[0].from;
++	AuDbg("del path %s\n", del->pathname);
++
++	err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
++	if (unlikely(err))
++		pr_err("lookup failed %s (%d)\n", del->pathname, err);
++
++	return err;
++}
++
++#if 0 /* reserved for future use */
++static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
++			      struct au_opt_del *del, substring_t args[])
++{
++	int err;
++	struct dentry *root;
++
++	err = -EINVAL;
++	root = sb->s_root;
++	aufs_read_lock(root, AuLock_FLUSH);
++	if (bindex < 0 || au_sbend(sb) < bindex) {
++		pr_err("out of bounds, %d\n", bindex);
++		goto out;
++	}
++
++	err = 0;
++	del->h_path.dentry = dget(au_h_dptr(root, bindex));
++	del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
++
++out:
++	aufs_read_unlock(root, !AuLock_IR);
++	return err;
++}
++#endif
++
++static int noinline_for_stack
++au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
++{
++	int err;
++	struct path path;
++	char *p;
++
++	err = -EINVAL;
++	mod->path = args[0].from;
++	p = strchr(mod->path, '=');
++	if (unlikely(!p)) {
++		pr_err("no permssion %s\n", args[0].from);
++		goto out;
++	}
++
++	*p++ = 0;
++	err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
++	if (unlikely(err)) {
++		pr_err("lookup failed %s (%d)\n", mod->path, err);
++		goto out;
++	}
++
++	mod->perm = br_perm_val(p);
++	AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
++	mod->h_root = dget(path.dentry);
++	path_put(&path);
++
++out:
++	return err;
++}
++
++#if 0 /* reserved for future use */
++static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
++			      struct au_opt_mod *mod, substring_t args[])
++{
++	int err;
++	struct dentry *root;
++
++	err = -EINVAL;
++	root = sb->s_root;
++	aufs_read_lock(root, AuLock_FLUSH);
++	if (bindex < 0 || au_sbend(sb) < bindex) {
++		pr_err("out of bounds, %d\n", bindex);
++		goto out;
++	}
++
++	err = 0;
++	mod->perm = br_perm_val(args[1].from);
++	AuDbg("mod path %s, perm 0x%x, %s\n",
++	      mod->path, mod->perm, args[1].from);
++	mod->h_root = dget(au_h_dptr(root, bindex));
++
++out:
++	aufs_read_unlock(root, !AuLock_IR);
++	return err;
++}
++#endif
++
++static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
++			      substring_t args[])
++{
++	int err;
++	struct file *file;
++
++	file = au_xino_create(sb, args[0].from, /*silent*/0);
++	err = PTR_ERR(file);
++	if (IS_ERR(file))
++		goto out;
++
++	err = -EINVAL;
++	if (unlikely(file->f_dentry->d_sb == sb)) {
++		fput(file);
++		pr_err("%s must be outside\n", args[0].from);
++		goto out;
++	}
++
++	err = 0;
++	xino->file = file;
++	xino->path = args[0].from;
++
++out:
++	return err;
++}
++
++static int noinline_for_stack
++au_opts_parse_xino_itrunc_path(struct super_block *sb,
++			       struct au_opt_xino_itrunc *xino_itrunc,
++			       substring_t args[])
++{
++	int err;
++	aufs_bindex_t bend, bindex;
++	struct path path;
++	struct dentry *root;
++
++	err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
++	if (unlikely(err)) {
++		pr_err("lookup failed %s (%d)\n", args[0].from, err);
++		goto out;
++	}
++
++	xino_itrunc->bindex = -1;
++	root = sb->s_root;
++	aufs_read_lock(root, AuLock_FLUSH);
++	bend = au_sbend(sb);
++	for (bindex = 0; bindex <= bend; bindex++) {
++		if (au_h_dptr(root, bindex) == path.dentry) {
++			xino_itrunc->bindex = bindex;
++			break;
++		}
++	}
++	aufs_read_unlock(root, !AuLock_IR);
++	path_put(&path);
++
++	if (unlikely(xino_itrunc->bindex < 0)) {
++		pr_err("no such branch %s\n", args[0].from);
++		err = -EINVAL;
++	}
++
++out:
++	return err;
++}
++
++/* called without aufs lock */
++int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
++{
++	int err, n, token;
++	aufs_bindex_t bindex;
++	unsigned char skipped;
++	struct dentry *root;
++	struct au_opt *opt, *opt_tail;
++	char *opt_str;
++	/* reduce the stack space */
++	union {
++		struct au_opt_xino_itrunc *xino_itrunc;
++		struct au_opt_wbr_create *create;
++	} u;
++	struct {
++		substring_t args[MAX_OPT_ARGS];
++	} *a;
++
++	err = -ENOMEM;
++	a = kmalloc(sizeof(*a), GFP_NOFS);
++	if (unlikely(!a))
++		goto out;
++
++	root = sb->s_root;
++	err = 0;
++	bindex = 0;
++	opt = opts->opt;
++	opt_tail = opt + opts->max_opt - 1;
++	opt->type = Opt_tail;
++	while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
++		err = -EINVAL;
++		skipped = 0;
++		token = match_token(opt_str, options, a->args);
++		switch (token) {
++		case Opt_br:
++			err = 0;
++			while (!err && (opt_str = strsep(&a->args[0].from, ":"))
++			       && *opt_str) {
++				err = opt_add(opt, opt_str, opts->sb_flags,
++					      bindex++);
++				if (unlikely(!err && ++opt > opt_tail)) {
++					err = -E2BIG;
++					break;
++				}
++				opt->type = Opt_tail;
++				skipped = 1;
++			}
++			break;
++		case Opt_add:
++			if (unlikely(match_int(&a->args[0], &n))) {
++				pr_err("bad integer in %s\n", opt_str);
++				break;
++			}
++			bindex = n;
++			err = opt_add(opt, a->args[1].from, opts->sb_flags,
++				      bindex);
++			if (!err)
++				opt->type = token;
++			break;
++		case Opt_append:
++			err = opt_add(opt, a->args[0].from, opts->sb_flags,
++				      /*dummy bindex*/1);
++			if (!err)
++				opt->type = token;
++			break;
++		case Opt_prepend:
++			err = opt_add(opt, a->args[0].from, opts->sb_flags,
++				      /*bindex*/0);
++			if (!err)
++				opt->type = token;
++			break;
++		case Opt_del:
++			err = au_opts_parse_del(&opt->del, a->args);
++			if (!err)
++				opt->type = token;
++			break;
++#if 0 /* reserved for future use */
++		case Opt_idel:
++			del->pathname = "(indexed)";
++			if (unlikely(match_int(&args[0], &n))) {
++				pr_err("bad integer in %s\n", opt_str);
++				break;
++			}
++			err = au_opts_parse_idel(sb, n, &opt->del, a->args);
++			if (!err)
++				opt->type = token;
++			break;
++#endif
++		case Opt_mod:
++			err = au_opts_parse_mod(&opt->mod, a->args);
++			if (!err)
++				opt->type = token;
++			break;
++#ifdef IMOD /* reserved for future use */
++		case Opt_imod:
++			u.mod->path = "(indexed)";
++			if (unlikely(match_int(&a->args[0], &n))) {
++				pr_err("bad integer in %s\n", opt_str);
++				break;
++			}
++			err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
++			if (!err)
++				opt->type = token;
++			break;
++#endif
++		case Opt_xino:
++			err = au_opts_parse_xino(sb, &opt->xino, a->args);
++			if (!err)
++				opt->type = token;
++			break;
++
++		case Opt_trunc_xino_path:
++			err = au_opts_parse_xino_itrunc_path
++				(sb, &opt->xino_itrunc, a->args);
++			if (!err)
++				opt->type = token;
++			break;
++
++		case Opt_itrunc_xino:
++			u.xino_itrunc = &opt->xino_itrunc;
++			if (unlikely(match_int(&a->args[0], &n))) {
++				pr_err("bad integer in %s\n", opt_str);
++				break;
++			}
++			u.xino_itrunc->bindex = n;
++			aufs_read_lock(root, AuLock_FLUSH);
++			if (n < 0 || au_sbend(sb) < n) {
++				pr_err("out of bounds, %d\n", n);
++				aufs_read_unlock(root, !AuLock_IR);
++				break;
++			}
++			aufs_read_unlock(root, !AuLock_IR);
++			err = 0;
++			opt->type = token;
++			break;
++
++		case Opt_dirwh:
++			if (unlikely(match_int(&a->args[0], &opt->dirwh)))
++				break;
++			err = 0;
++			opt->type = token;
++			break;
++
++		case Opt_rdcache:
++			if (unlikely(match_int(&a->args[0], &n))) {
++				pr_err("bad integer in %s\n", opt_str);
++				break;
++			}
++			if (unlikely(n > AUFS_RDCACHE_MAX)) {
++				pr_err("rdcache must be smaller than %d\n",
++				       AUFS_RDCACHE_MAX);
++				break;
++			}
++			opt->rdcache = n;
++			err = 0;
++			opt->type = token;
++			break;
++		case Opt_rdblk:
++			if (unlikely(match_int(&a->args[0], &n)
++				     || n < 0
++				     || n > KMALLOC_MAX_SIZE)) {
++				pr_err("bad integer in %s\n", opt_str);
++				break;
++			}
++			if (unlikely(n && n < NAME_MAX)) {
++				pr_err("rdblk must be larger than %d\n",
++				       NAME_MAX);
++				break;
++			}
++			opt->rdblk = n;
++			err = 0;
++			opt->type = token;
++			break;
++		case Opt_rdhash:
++			if (unlikely(match_int(&a->args[0], &n)
++				     || n < 0
++				     || n * sizeof(struct hlist_head)
++				     > KMALLOC_MAX_SIZE)) {
++				pr_err("bad integer in %s\n", opt_str);
++				break;
++			}
++			opt->rdhash = n;
++			err = 0;
++			opt->type = token;
++			break;
++
++		case Opt_trunc_xino:
++		case Opt_notrunc_xino:
++		case Opt_noxino:
++		case Opt_trunc_xib:
++		case Opt_notrunc_xib:
++		case Opt_shwh:
++		case Opt_noshwh:
++		case Opt_plink:
++		case Opt_noplink:
++		case Opt_list_plink:
++		case Opt_dio:
++		case Opt_nodio:
++		case Opt_diropq_a:
++		case Opt_diropq_w:
++		case Opt_warn_perm:
++		case Opt_nowarn_perm:
++		case Opt_refrof:
++		case Opt_norefrof:
++		case Opt_verbose:
++		case Opt_noverbose:
++		case Opt_sum:
++		case Opt_nosum:
++		case Opt_wsum:
++		case Opt_rdblk_def:
++		case Opt_rdhash_def:
++			err = 0;
++			opt->type = token;
++			break;
++
++		case Opt_udba:
++			opt->udba = udba_val(a->args[0].from);
++			if (opt->udba >= 0) {
++				err = 0;
++				opt->type = token;
++			} else
++				pr_err("wrong value, %s\n", opt_str);
++			break;
++
++		case Opt_wbr_create:
++			u.create = &opt->wbr_create;
++			u.create->wbr_create
++				= au_wbr_create_val(a->args[0].from, u.create);
++			if (u.create->wbr_create >= 0) {
++				err = 0;
++				opt->type = token;
++			} else
++				pr_err("wrong value, %s\n", opt_str);
++			break;
++		case Opt_wbr_copyup:
++			opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
++			if (opt->wbr_copyup >= 0) {
++				err = 0;
++				opt->type = token;
++			} else
++				pr_err("wrong value, %s\n", opt_str);
++			break;
++
++		case Opt_ignore:
++			pr_warning("ignored %s\n", opt_str);
++			/*FALLTHROUGH*/
++		case Opt_ignore_silent:
++			skipped = 1;
++			err = 0;
++			break;
++		case Opt_err:
++			pr_err("unknown option %s\n", opt_str);
++			break;
++		}
++
++		if (!err && !skipped) {
++			if (unlikely(++opt > opt_tail)) {
++				err = -E2BIG;
++				opt--;
++				opt->type = Opt_tail;
++				break;
++			}
++			opt->type = Opt_tail;
++		}
++	}
++
++	kfree(a);
++	dump_opts(opts);
++	if (unlikely(err))
++		au_opts_free(opts);
++
++out:
++	return err;
++}
++
++static int au_opt_wbr_create(struct super_block *sb,
++			     struct au_opt_wbr_create *create)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++
++	SiMustWriteLock(sb);
++
++	err = 1; /* handled */
++	sbinfo = au_sbi(sb);
++	if (sbinfo->si_wbr_create_ops->fin) {
++		err = sbinfo->si_wbr_create_ops->fin(sb);
++		if (!err)
++			err = 1;
++	}
++
++	sbinfo->si_wbr_create = create->wbr_create;
++	sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
++	switch (create->wbr_create) {
++	case AuWbrCreate_MFSRRV:
++	case AuWbrCreate_MFSRR:
++		sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
++		/*FALLTHROUGH*/
++	case AuWbrCreate_MFS:
++	case AuWbrCreate_MFSV:
++	case AuWbrCreate_PMFS:
++	case AuWbrCreate_PMFSV:
++		sbinfo->si_wbr_mfs.mfs_expire
++			= msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
++		break;
++	}
++
++	if (sbinfo->si_wbr_create_ops->init)
++		sbinfo->si_wbr_create_ops->init(sb); /* ignore */
++
++	return err;
++}
++
++/*
++ * returns,
++ * plus: processed without an error
++ * zero: unprocessed
++ */
++static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
++			 struct au_opts *opts)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++
++	SiMustWriteLock(sb);
++
++	err = 1; /* handled */
++	sbinfo = au_sbi(sb);
++	switch (opt->type) {
++	case Opt_udba:
++		sbinfo->si_mntflags &= ~AuOptMask_UDBA;
++		sbinfo->si_mntflags |= opt->udba;
++		opts->given_udba |= opt->udba;
++		break;
++
++	case Opt_plink:
++		au_opt_set(sbinfo->si_mntflags, PLINK);
++		break;
++	case Opt_noplink:
++		if (au_opt_test(sbinfo->si_mntflags, PLINK))
++			au_plink_put(sb, /*verbose*/1);
++		au_opt_clr(sbinfo->si_mntflags, PLINK);
++		break;
++	case Opt_list_plink:
++		if (au_opt_test(sbinfo->si_mntflags, PLINK))
++			au_plink_list(sb);
++		break;
++
++	case Opt_dio:
++		au_opt_set(sbinfo->si_mntflags, DIO);
++		au_fset_opts(opts->flags, REFRESH_DYAOP);
++		break;
++	case Opt_nodio:
++		au_opt_clr(sbinfo->si_mntflags, DIO);
++		au_fset_opts(opts->flags, REFRESH_DYAOP);
++		break;
++
++	case Opt_diropq_a:
++		au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
++		break;
++	case Opt_diropq_w:
++		au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
++		break;
++
++	case Opt_warn_perm:
++		au_opt_set(sbinfo->si_mntflags, WARN_PERM);
++		break;
++	case Opt_nowarn_perm:
++		au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
++		break;
++
++	case Opt_refrof:
++		au_opt_set(sbinfo->si_mntflags, REFROF);
++		break;
++	case Opt_norefrof:
++		au_opt_clr(sbinfo->si_mntflags, REFROF);
++		break;
++
++	case Opt_verbose:
++		au_opt_set(sbinfo->si_mntflags, VERBOSE);
++		break;
++	case Opt_noverbose:
++		au_opt_clr(sbinfo->si_mntflags, VERBOSE);
++		break;
++
++	case Opt_sum:
++		au_opt_set(sbinfo->si_mntflags, SUM);
++		break;
++	case Opt_wsum:
++		au_opt_clr(sbinfo->si_mntflags, SUM);
++		au_opt_set(sbinfo->si_mntflags, SUM_W);
++	case Opt_nosum:
++		au_opt_clr(sbinfo->si_mntflags, SUM);
++		au_opt_clr(sbinfo->si_mntflags, SUM_W);
++		break;
++
++	case Opt_wbr_create:
++		err = au_opt_wbr_create(sb, &opt->wbr_create);
++		break;
++	case Opt_wbr_copyup:
++		sbinfo->si_wbr_copyup = opt->wbr_copyup;
++		sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
++		break;
++
++	case Opt_dirwh:
++		sbinfo->si_dirwh = opt->dirwh;
++		break;
++
++	case Opt_rdcache:
++		sbinfo->si_rdcache
++			= msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
++		break;
++	case Opt_rdblk:
++		sbinfo->si_rdblk = opt->rdblk;
++		break;
++	case Opt_rdblk_def:
++		sbinfo->si_rdblk = AUFS_RDBLK_DEF;
++		break;
++	case Opt_rdhash:
++		sbinfo->si_rdhash = opt->rdhash;
++		break;
++	case Opt_rdhash_def:
++		sbinfo->si_rdhash = AUFS_RDHASH_DEF;
++		break;
++
++	case Opt_shwh:
++		au_opt_set(sbinfo->si_mntflags, SHWH);
++		break;
++	case Opt_noshwh:
++		au_opt_clr(sbinfo->si_mntflags, SHWH);
++		break;
++
++	case Opt_trunc_xino:
++		au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
++		break;
++	case Opt_notrunc_xino:
++		au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
++		break;
++
++	case Opt_trunc_xino_path:
++	case Opt_itrunc_xino:
++		err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
++		if (!err)
++			err = 1;
++		break;
++
++	case Opt_trunc_xib:
++		au_fset_opts(opts->flags, TRUNC_XIB);
++		break;
++	case Opt_notrunc_xib:
++		au_fclr_opts(opts->flags, TRUNC_XIB);
++		break;
++
++	default:
++		err = 0;
++		break;
++	}
++
++	return err;
++}
++
++/*
++ * returns tri-state.
++ * plus: processed without an error
++ * zero: unprocessed
++ * minus: error
++ */
++static int au_opt_br(struct super_block *sb, struct au_opt *opt,
++		     struct au_opts *opts)
++{
++	int err, do_refresh;
++
++	err = 0;
++	switch (opt->type) {
++	case Opt_append:
++		opt->add.bindex = au_sbend(sb) + 1;
++		if (opt->add.bindex < 0)
++			opt->add.bindex = 0;
++		goto add;
++	case Opt_prepend:
++		opt->add.bindex = 0;
++	add:
++	case Opt_add:
++		err = au_br_add(sb, &opt->add,
++				au_ftest_opts(opts->flags, REMOUNT));
++		if (!err) {
++			err = 1;
++			au_fset_opts(opts->flags, REFRESH);
++		}
++		break;
++
++	case Opt_del:
++	case Opt_idel:
++		err = au_br_del(sb, &opt->del,
++				au_ftest_opts(opts->flags, REMOUNT));
++		if (!err) {
++			err = 1;
++			au_fset_opts(opts->flags, TRUNC_XIB);
++			au_fset_opts(opts->flags, REFRESH);
++		}
++		break;
++
++	case Opt_mod:
++	case Opt_imod:
++		err = au_br_mod(sb, &opt->mod,
++				au_ftest_opts(opts->flags, REMOUNT),
++				&do_refresh);
++		if (!err) {
++			err = 1;
++			if (do_refresh)
++				au_fset_opts(opts->flags, REFRESH);
++		}
++		break;
++	}
++
++	return err;
++}
++
++static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
++		       struct au_opt_xino **opt_xino,
++		       struct au_opts *opts)
++{
++	int err;
++	aufs_bindex_t bend, bindex;
++	struct dentry *root, *parent, *h_root;
++
++	err = 0;
++	switch (opt->type) {
++	case Opt_xino:
++		err = au_xino_set(sb, &opt->xino,
++				  !!au_ftest_opts(opts->flags, REMOUNT));
++		if (unlikely(err))
++			break;
++
++		*opt_xino = &opt->xino;
++		au_xino_brid_set(sb, -1);
++
++		/* safe d_parent access */
++		parent = opt->xino.file->f_dentry->d_parent;
++		root = sb->s_root;
++		bend = au_sbend(sb);
++		for (bindex = 0; bindex <= bend; bindex++) {
++			h_root = au_h_dptr(root, bindex);
++			if (h_root == parent) {
++				au_xino_brid_set(sb, au_sbr_id(sb, bindex));
++				break;
++			}
++		}
++		break;
++
++	case Opt_noxino:
++		au_xino_clr(sb);
++		au_xino_brid_set(sb, -1);
++		*opt_xino = (void *)-1;
++		break;
++	}
++
++	return err;
++}
++
++int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
++		   unsigned int pending)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	unsigned char do_plink, skip, do_free;
++	struct au_branch *br;
++	struct au_wbr *wbr;
++	struct dentry *root;
++	struct inode *dir, *h_dir;
++	struct au_sbinfo *sbinfo;
++	struct au_hinode *hdir;
++
++	SiMustAnyLock(sb);
++
++	sbinfo = au_sbi(sb);
++	AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
++
++	if (!(sb_flags & MS_RDONLY)) {
++		if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
++			pr_warning("first branch should be rw\n");
++		if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
++			pr_warning("shwh should be used with ro\n");
++	}
++
++	if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
++	    && !au_opt_test(sbinfo->si_mntflags, XINO))
++		pr_warning("udba=*notify requires xino\n");
++
++	err = 0;
++	root = sb->s_root;
++	dir = root->d_inode;
++	do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
++	bend = au_sbend(sb);
++	for (bindex = 0; !err && bindex <= bend; bindex++) {
++		skip = 0;
++		h_dir = au_h_iptr(dir, bindex);
++		br = au_sbr(sb, bindex);
++		do_free = 0;
++
++		wbr = br->br_wbr;
++		if (wbr)
++			wbr_wh_read_lock(wbr);
++
++		if (!au_br_writable(br->br_perm)) {
++			do_free = !!wbr;
++			skip = (!wbr
++				|| (!wbr->wbr_whbase
++				    && !wbr->wbr_plink
++				    && !wbr->wbr_orph));
++		} else if (!au_br_wh_linkable(br->br_perm)) {
++			/* skip = (!br->br_whbase && !br->br_orph); */
++			skip = (!wbr || !wbr->wbr_whbase);
++			if (skip && wbr) {
++				if (do_plink)
++					skip = !!wbr->wbr_plink;
++				else
++					skip = !wbr->wbr_plink;
++			}
++		} else {
++			/* skip = (br->br_whbase && br->br_ohph); */
++			skip = (wbr && wbr->wbr_whbase);
++			if (skip) {
++				if (do_plink)
++					skip = !!wbr->wbr_plink;
++				else
++					skip = !wbr->wbr_plink;
++			}
++		}
++		if (wbr)
++			wbr_wh_read_unlock(wbr);
++
++		if (skip)
++			continue;
++
++		hdir = au_hi(dir, bindex);
++		au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
++		if (wbr)
++			wbr_wh_write_lock(wbr);
++		err = au_wh_init(au_h_dptr(root, bindex), br, sb);
++		if (wbr)
++			wbr_wh_write_unlock(wbr);
++		au_hn_imtx_unlock(hdir);
++
++		if (!err && do_free) {
++			kfree(wbr);
++			br->br_wbr = NULL;
++		}
++	}
++
++	return err;
++}
++
++int au_opts_mount(struct super_block *sb, struct au_opts *opts)
++{
++	int err;
++	unsigned int tmp;
++	aufs_bindex_t bindex, bend;
++	struct au_opt *opt;
++	struct au_opt_xino *opt_xino, xino;
++	struct au_sbinfo *sbinfo;
++	struct au_branch *br;
++
++	SiMustWriteLock(sb);
++
++	err = 0;
++	opt_xino = NULL;
++	opt = opts->opt;
++	while (err >= 0 && opt->type != Opt_tail)
++		err = au_opt_simple(sb, opt++, opts);
++	if (err > 0)
++		err = 0;
++	else if (unlikely(err < 0))
++		goto out;
++
++	/* disable xino and udba temporary */
++	sbinfo = au_sbi(sb);
++	tmp = sbinfo->si_mntflags;
++	au_opt_clr(sbinfo->si_mntflags, XINO);
++	au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
++
++	opt = opts->opt;
++	while (err >= 0 && opt->type != Opt_tail)
++		err = au_opt_br(sb, opt++, opts);
++	if (err > 0)
++		err = 0;
++	else if (unlikely(err < 0))
++		goto out;
++
++	bend = au_sbend(sb);
++	if (unlikely(bend < 0)) {
++		err = -EINVAL;
++		pr_err("no branches\n");
++		goto out;
++	}
++
++	if (au_opt_test(tmp, XINO))
++		au_opt_set(sbinfo->si_mntflags, XINO);
++	opt = opts->opt;
++	while (!err && opt->type != Opt_tail)
++		err = au_opt_xino(sb, opt++, &opt_xino, opts);
++	if (unlikely(err))
++		goto out;
++
++	err = au_opts_verify(sb, sb->s_flags, tmp);
++	if (unlikely(err))
++		goto out;
++
++	/* restore xino */
++	if (au_opt_test(tmp, XINO) && !opt_xino) {
++		xino.file = au_xino_def(sb);
++		err = PTR_ERR(xino.file);
++		if (IS_ERR(xino.file))
++			goto out;
++
++		err = au_xino_set(sb, &xino, /*remount*/0);
++		fput(xino.file);
++		if (unlikely(err))
++			goto out;
++	}
++
++	/* restore udba */
++	tmp &= AuOptMask_UDBA;
++	sbinfo->si_mntflags &= ~AuOptMask_UDBA;
++	sbinfo->si_mntflags |= tmp;
++	bend = au_sbend(sb);
++	for (bindex = 0; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		err = au_hnotify_reset_br(tmp, br, br->br_perm);
++		if (unlikely(err))
++			AuIOErr("hnotify failed on br %d, %d, ignored\n",
++				bindex, err);
++		/* go on even if err */
++	}
++	if (au_opt_test(tmp, UDBA_HNOTIFY)) {
++		struct inode *dir = sb->s_root->d_inode;
++		au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
++	}
++
++out:
++	return err;
++}
++
++int au_opts_remount(struct super_block *sb, struct au_opts *opts)
++{
++	int err, rerr;
++	struct inode *dir;
++	struct au_opt_xino *opt_xino;
++	struct au_opt *opt;
++	struct au_sbinfo *sbinfo;
++
++	SiMustWriteLock(sb);
++
++	dir = sb->s_root->d_inode;
++	sbinfo = au_sbi(sb);
++	err = 0;
++	opt_xino = NULL;
++	opt = opts->opt;
++	while (err >= 0 && opt->type != Opt_tail) {
++		err = au_opt_simple(sb, opt, opts);
++		if (!err)
++			err = au_opt_br(sb, opt, opts);
++		if (!err)
++			err = au_opt_xino(sb, opt, &opt_xino, opts);
++		opt++;
++	}
++	if (err > 0)
++		err = 0;
++	AuTraceErr(err);
++	/* go on even err */
++
++	rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
++	if (unlikely(rerr && !err))
++		err = rerr;
++
++	if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
++		rerr = au_xib_trunc(sb);
++		if (unlikely(rerr && !err))
++			err = rerr;
++	}
++
++	/* will be handled by the caller */
++	if (!au_ftest_opts(opts->flags, REFRESH)
++	    && (opts->given_udba || au_opt_test(sbinfo->si_mntflags, XINO)))
++		au_fset_opts(opts->flags, REFRESH);
++
++	AuDbg("status 0x%x\n", opts->flags);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++unsigned int au_opt_udba(struct super_block *sb)
++{
++	return au_mntflags(sb) & AuOptMask_UDBA;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/opts.h linux-3.2.0-gentoo-r1/fs/aufs/opts.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/opts.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/opts.h	2012-01-17 12:11:24.868175939 +0100
+@@ -0,0 +1,209 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * mount options/flags
++ */
++
++#ifndef __AUFS_OPTS_H__
++#define __AUFS_OPTS_H__
++
++#ifdef __KERNEL__
++
++#include <linux/path.h>
++
++struct file;
++struct super_block;
++
++/* ---------------------------------------------------------------------- */
++
++/* mount flags */
++#define AuOpt_XINO		1		/* external inode number bitmap
++						   and translation table */
++#define AuOpt_TRUNC_XINO	(1 << 1)	/* truncate xino files */
++#define AuOpt_UDBA_NONE		(1 << 2)	/* users direct branch access */
++#define AuOpt_UDBA_REVAL	(1 << 3)
++#define AuOpt_UDBA_HNOTIFY	(1 << 4)
++#define AuOpt_SHWH		(1 << 5)	/* show whiteout */
++#define AuOpt_PLINK		(1 << 6)	/* pseudo-link */
++#define AuOpt_DIRPERM1		(1 << 7)	/* unimplemented */
++#define AuOpt_REFROF		(1 << 8)	/* unimplemented */
++#define AuOpt_ALWAYS_DIROPQ	(1 << 9)	/* policy to creating diropq */
++#define AuOpt_SUM		(1 << 10)	/* summation for statfs(2) */
++#define AuOpt_SUM_W		(1 << 11)	/* unimplemented */
++#define AuOpt_WARN_PERM		(1 << 12)	/* warn when add-branch */
++#define AuOpt_VERBOSE		(1 << 13)	/* busy inode when del-branch */
++#define AuOpt_DIO		(1 << 14)	/* direct io */
++
++#ifndef CONFIG_AUFS_HNOTIFY
++#undef AuOpt_UDBA_HNOTIFY
++#define AuOpt_UDBA_HNOTIFY	0
++#endif
++#ifndef CONFIG_AUFS_SHWH
++#undef AuOpt_SHWH
++#define AuOpt_SHWH		0
++#endif
++
++#define AuOpt_Def	(AuOpt_XINO \
++			 | AuOpt_UDBA_REVAL \
++			 | AuOpt_PLINK \
++			 /* | AuOpt_DIRPERM1 */ \
++			 | AuOpt_WARN_PERM)
++#define AuOptMask_UDBA	(AuOpt_UDBA_NONE \
++			 | AuOpt_UDBA_REVAL \
++			 | AuOpt_UDBA_HNOTIFY)
++
++#define au_opt_test(flags, name)	(flags & AuOpt_##name)
++#define au_opt_set(flags, name) do { \
++	BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
++	((flags) |= AuOpt_##name); \
++} while (0)
++#define au_opt_set_udba(flags, name) do { \
++	(flags) &= ~AuOptMask_UDBA; \
++	((flags) |= AuOpt_##name); \
++} while (0)
++#define au_opt_clr(flags, name) do { \
++	((flags) &= ~AuOpt_##name); \
++} while (0)
++
++static inline unsigned int au_opts_plink(unsigned int mntflags)
++{
++#ifdef CONFIG_PROC_FS
++	return mntflags;
++#else
++	return mntflags & ~AuOpt_PLINK;
++#endif
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* policies to select one among multiple writable branches */
++enum {
++	AuWbrCreate_TDP,	/* top down parent */
++	AuWbrCreate_RR,		/* round robin */
++	AuWbrCreate_MFS,	/* most free space */
++	AuWbrCreate_MFSV,	/* mfs with seconds */
++	AuWbrCreate_MFSRR,	/* mfs then rr */
++	AuWbrCreate_MFSRRV,	/* mfs then rr with seconds */
++	AuWbrCreate_PMFS,	/* parent and mfs */
++	AuWbrCreate_PMFSV,	/* parent and mfs with seconds */
++
++	AuWbrCreate_Def = AuWbrCreate_TDP
++};
++
++enum {
++	AuWbrCopyup_TDP,	/* top down parent */
++	AuWbrCopyup_BUP,	/* bottom up parent */
++	AuWbrCopyup_BU,		/* bottom up */
++
++	AuWbrCopyup_Def = AuWbrCopyup_TDP
++};
++
++/* ---------------------------------------------------------------------- */
++
++struct au_opt_add {
++	aufs_bindex_t	bindex;
++	char		*pathname;
++	int		perm;
++	struct path	path;
++};
++
++struct au_opt_del {
++	char		*pathname;
++	struct path	h_path;
++};
++
++struct au_opt_mod {
++	char		*path;
++	int		perm;
++	struct dentry	*h_root;
++};
++
++struct au_opt_xino {
++	char		*path;
++	struct file	*file;
++};
++
++struct au_opt_xino_itrunc {
++	aufs_bindex_t	bindex;
++};
++
++struct au_opt_wbr_create {
++	int			wbr_create;
++	int			mfs_second;
++	unsigned long long	mfsrr_watermark;
++};
++
++struct au_opt {
++	int type;
++	union {
++		struct au_opt_xino	xino;
++		struct au_opt_xino_itrunc xino_itrunc;
++		struct au_opt_add	add;
++		struct au_opt_del	del;
++		struct au_opt_mod	mod;
++		int			dirwh;
++		int			rdcache;
++		unsigned int		rdblk;
++		unsigned int		rdhash;
++		int			udba;
++		struct au_opt_wbr_create wbr_create;
++		int			wbr_copyup;
++	};
++};
++
++/* opts flags */
++#define AuOpts_REMOUNT		1
++#define AuOpts_REFRESH		(1 << 1)
++#define AuOpts_TRUNC_XIB	(1 << 2)
++#define AuOpts_REFRESH_DYAOP	(1 << 3)
++#define au_ftest_opts(flags, name)	((flags) & AuOpts_##name)
++#define au_fset_opts(flags, name) \
++	do { (flags) |= AuOpts_##name; } while (0)
++#define au_fclr_opts(flags, name) \
++	do { (flags) &= ~AuOpts_##name; } while (0)
++
++struct au_opts {
++	struct au_opt	*opt;
++	int		max_opt;
++
++	unsigned int	given_udba;
++	unsigned int	flags;
++	unsigned long	sb_flags;
++};
++
++/* ---------------------------------------------------------------------- */
++
++char *au_optstr_br_perm(int brperm);
++const char *au_optstr_udba(int udba);
++const char *au_optstr_wbr_copyup(int wbr_copyup);
++const char *au_optstr_wbr_create(int wbr_create);
++
++void au_opts_free(struct au_opts *opts);
++int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
++int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
++		   unsigned int pending);
++int au_opts_mount(struct super_block *sb, struct au_opts *opts);
++int au_opts_remount(struct super_block *sb, struct au_opts *opts);
++
++unsigned int au_opt_udba(struct super_block *sb);
++
++/* ---------------------------------------------------------------------- */
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_OPTS_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/plink.c linux-3.2.0-gentoo-r1/fs/aufs/plink.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/plink.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/plink.c	2012-01-17 12:11:24.870490775 +0100
+@@ -0,0 +1,515 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * pseudo-link
++ */
++
++#include "aufs.h"
++
++/*
++ * the pseudo-link maintenance mode.
++ * during a user process maintains the pseudo-links,
++ * prohibit adding a new plink and branch manipulation.
++ *
++ * Flags
++ * NOPLM:
++ *	For entry functions which will handle plink, and i_mutex is already held
++ *	in VFS.
++ *	They cannot wait and should return an error at once.
++ *	Callers has to check the error.
++ * NOPLMW:
++ *	For entry functions which will handle plink, but i_mutex is not held
++ *	in VFS.
++ *	They can wait the plink maintenance mode to finish.
++ *
++ * They behave like F_SETLK and F_SETLKW.
++ * If the caller never handle plink, then both flags are unnecessary.
++ */
++
++int au_plink_maint(struct super_block *sb, int flags)
++{
++	int err;
++	pid_t pid, ppid;
++	struct au_sbinfo *sbi;
++
++	SiMustAnyLock(sb);
++
++	err = 0;
++	if (!au_opt_test(au_mntflags(sb), PLINK))
++		goto out;
++
++	sbi = au_sbi(sb);
++	pid = sbi->si_plink_maint_pid;
++	if (!pid || pid == current->pid)
++		goto out;
++
++	/* todo: it highly depends upon /sbin/mount.aufs */
++	rcu_read_lock();
++	ppid = task_pid_vnr(rcu_dereference(current->real_parent));
++	rcu_read_unlock();
++	if (pid == ppid)
++		goto out;
++
++	if (au_ftest_lock(flags, NOPLMW)) {
++		/* if there is no i_mutex lock in VFS, we don't need to wait */
++		/* AuDebugOn(!lockdep_depth(current)); */
++		while (sbi->si_plink_maint_pid) {
++			si_read_unlock(sb);
++			/* gave up wake_up_bit() */
++			wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
++
++			if (au_ftest_lock(flags, FLUSH))
++				au_nwt_flush(&sbi->si_nowait);
++			si_noflush_read_lock(sb);
++		}
++	} else if (au_ftest_lock(flags, NOPLM)) {
++		AuDbg("ppid %d, pid %d\n", ppid, pid);
++		err = -EAGAIN;
++	}
++
++out:
++	return err;
++}
++
++void au_plink_maint_leave(struct au_sbinfo *sbinfo)
++{
++	spin_lock(&sbinfo->si_plink_maint_lock);
++	sbinfo->si_plink_maint_pid = 0;
++	spin_unlock(&sbinfo->si_plink_maint_lock);
++	wake_up_all(&sbinfo->si_plink_wq);
++}
++
++int au_plink_maint_enter(struct super_block *sb)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++
++	err = 0;
++	sbinfo = au_sbi(sb);
++	/* make sure i am the only one in this fs */
++	si_write_lock(sb, AuLock_FLUSH);
++	if (au_opt_test(au_mntflags(sb), PLINK)) {
++		spin_lock(&sbinfo->si_plink_maint_lock);
++		if (!sbinfo->si_plink_maint_pid)
++			sbinfo->si_plink_maint_pid = current->pid;
++		else
++			err = -EBUSY;
++		spin_unlock(&sbinfo->si_plink_maint_lock);
++	}
++	si_write_unlock(sb);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct pseudo_link {
++	union {
++		struct list_head list;
++		struct rcu_head rcu;
++	};
++	struct inode *inode;
++};
++
++#ifdef CONFIG_AUFS_DEBUG
++void au_plink_list(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++	struct list_head *plink_list;
++	struct pseudo_link *plink;
++
++	SiMustAnyLock(sb);
++
++	sbinfo = au_sbi(sb);
++	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
++	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
++
++	plink_list = &sbinfo->si_plink.head;
++	rcu_read_lock();
++	list_for_each_entry_rcu(plink, plink_list, list)
++		AuDbg("%lu\n", plink->inode->i_ino);
++	rcu_read_unlock();
++}
++#endif
++
++/* is the inode pseudo-linked? */
++int au_plink_test(struct inode *inode)
++{
++	int found;
++	struct au_sbinfo *sbinfo;
++	struct list_head *plink_list;
++	struct pseudo_link *plink;
++
++	sbinfo = au_sbi(inode->i_sb);
++	AuRwMustAnyLock(&sbinfo->si_rwsem);
++	AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
++	AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
++
++	found = 0;
++	plink_list = &sbinfo->si_plink.head;
++	rcu_read_lock();
++	list_for_each_entry_rcu(plink, plink_list, list)
++		if (plink->inode == inode) {
++			found = 1;
++			break;
++		}
++	rcu_read_unlock();
++	return found;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * generate a name for plink.
++ * the file will be stored under AUFS_WH_PLINKDIR.
++ */
++/* 20 is max digits length of ulong 64 */
++#define PLINK_NAME_LEN	((20 + 1) * 2)
++
++static int plink_name(char *name, int len, struct inode *inode,
++		      aufs_bindex_t bindex)
++{
++	int rlen;
++	struct inode *h_inode;
++
++	h_inode = au_h_iptr(inode, bindex);
++	rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
++	return rlen;
++}
++
++struct au_do_plink_lkup_args {
++	struct dentry **errp;
++	struct qstr *tgtname;
++	struct dentry *h_parent;
++	struct au_branch *br;
++};
++
++static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
++				       struct dentry *h_parent,
++				       struct au_branch *br)
++{
++	struct dentry *h_dentry;
++	struct mutex *h_mtx;
++
++	h_mtx = &h_parent->d_inode->i_mutex;
++	mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
++	h_dentry = au_lkup_one(tgtname, h_parent, br, /*nd*/NULL);
++	mutex_unlock(h_mtx);
++	return h_dentry;
++}
++
++static void au_call_do_plink_lkup(void *args)
++{
++	struct au_do_plink_lkup_args *a = args;
++	*a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
++}
++
++/* lookup the plink-ed @inode under the branch at @bindex */
++struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
++{
++	struct dentry *h_dentry, *h_parent;
++	struct au_branch *br;
++	struct inode *h_dir;
++	int wkq_err;
++	char a[PLINK_NAME_LEN];
++	struct qstr tgtname = {
++		.name	= a
++	};
++
++	AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
++
++	br = au_sbr(inode->i_sb, bindex);
++	h_parent = br->br_wbr->wbr_plink;
++	h_dir = h_parent->d_inode;
++	tgtname.len = plink_name(a, sizeof(a), inode, bindex);
++
++	if (current_fsuid()) {
++		struct au_do_plink_lkup_args args = {
++			.errp		= &h_dentry,
++			.tgtname	= &tgtname,
++			.h_parent	= h_parent,
++			.br		= br
++		};
++
++		wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
++		if (unlikely(wkq_err))
++			h_dentry = ERR_PTR(wkq_err);
++	} else
++		h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
++
++	return h_dentry;
++}
++
++/* create a pseudo-link */
++static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
++		      struct dentry *h_dentry, struct au_branch *br)
++{
++	int err;
++	struct path h_path = {
++		.mnt = br->br_mnt
++	};
++	struct inode *h_dir;
++
++	h_dir = h_parent->d_inode;
++	mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
++again:
++	h_path.dentry = au_lkup_one(tgt, h_parent, br, /*nd*/NULL);
++	err = PTR_ERR(h_path.dentry);
++	if (IS_ERR(h_path.dentry))
++		goto out;
++
++	err = 0;
++	/* wh.plink dir is not monitored */
++	/* todo: is it really safe? */
++	if (h_path.dentry->d_inode
++	    && h_path.dentry->d_inode != h_dentry->d_inode) {
++		err = vfsub_unlink(h_dir, &h_path, /*force*/0);
++		dput(h_path.dentry);
++		h_path.dentry = NULL;
++		if (!err)
++			goto again;
++	}
++	if (!err && !h_path.dentry->d_inode)
++		err = vfsub_link(h_dentry, h_dir, &h_path);
++	dput(h_path.dentry);
++
++out:
++	mutex_unlock(&h_dir->i_mutex);
++	return err;
++}
++
++struct do_whplink_args {
++	int *errp;
++	struct qstr *tgt;
++	struct dentry *h_parent;
++	struct dentry *h_dentry;
++	struct au_branch *br;
++};
++
++static void call_do_whplink(void *args)
++{
++	struct do_whplink_args *a = args;
++	*a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
++}
++
++static int whplink(struct dentry *h_dentry, struct inode *inode,
++		   aufs_bindex_t bindex, struct au_branch *br)
++{
++	int err, wkq_err;
++	struct au_wbr *wbr;
++	struct dentry *h_parent;
++	struct inode *h_dir;
++	char a[PLINK_NAME_LEN];
++	struct qstr tgtname = {
++		.name = a
++	};
++
++	wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
++	h_parent = wbr->wbr_plink;
++	h_dir = h_parent->d_inode;
++	tgtname.len = plink_name(a, sizeof(a), inode, bindex);
++
++	/* always superio. */
++	if (current_fsuid()) {
++		struct do_whplink_args args = {
++			.errp		= &err,
++			.tgt		= &tgtname,
++			.h_parent	= h_parent,
++			.h_dentry	= h_dentry,
++			.br		= br
++		};
++		wkq_err = au_wkq_wait(call_do_whplink, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	} else
++		err = do_whplink(&tgtname, h_parent, h_dentry, br);
++
++	return err;
++}
++
++/* free a single plink */
++static void do_put_plink(struct pseudo_link *plink, int do_del)
++{
++	if (do_del)
++		list_del(&plink->list);
++	iput(plink->inode);
++	kfree(plink);
++}
++
++static void do_put_plink_rcu(struct rcu_head *rcu)
++{
++	struct pseudo_link *plink;
++
++	plink = container_of(rcu, struct pseudo_link, rcu);
++	iput(plink->inode);
++	kfree(plink);
++}
++
++/*
++ * create a new pseudo-link for @h_dentry on @bindex.
++ * the linked inode is held in aufs @inode.
++ */
++void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
++		     struct dentry *h_dentry)
++{
++	struct super_block *sb;
++	struct au_sbinfo *sbinfo;
++	struct list_head *plink_list;
++	struct pseudo_link *plink, *tmp;
++	int found, err, cnt;
++
++	sb = inode->i_sb;
++	sbinfo = au_sbi(sb);
++	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
++	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
++
++	cnt = 0;
++	found = 0;
++	plink_list = &sbinfo->si_plink.head;
++	rcu_read_lock();
++	list_for_each_entry_rcu(plink, plink_list, list) {
++		cnt++;
++		if (plink->inode == inode) {
++			found = 1;
++			break;
++		}
++	}
++	rcu_read_unlock();
++	if (found)
++		return;
++
++	tmp = kmalloc(sizeof(*plink), GFP_NOFS);
++	if (tmp)
++		tmp->inode = au_igrab(inode);
++	else {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	spin_lock(&sbinfo->si_plink.spin);
++	list_for_each_entry(plink, plink_list, list) {
++		if (plink->inode == inode) {
++			found = 1;
++			break;
++		}
++	}
++	if (!found)
++		list_add_rcu(&tmp->list, plink_list);
++	spin_unlock(&sbinfo->si_plink.spin);
++	if (!found) {
++		cnt++;
++		WARN_ONCE(cnt > AUFS_PLINK_WARN,
++			  "unexpectedly many pseudo links, %d\n", cnt);
++		err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
++	} else {
++		do_put_plink(tmp, 0);
++		return;
++	}
++
++out:
++	if (unlikely(err)) {
++		pr_warning("err %d, damaged pseudo link.\n", err);
++		if (tmp) {
++			au_spl_del_rcu(&tmp->list, &sbinfo->si_plink);
++			call_rcu(&tmp->rcu, do_put_plink_rcu);
++		}
++	}
++}
++
++/* free all plinks */
++void au_plink_put(struct super_block *sb, int verbose)
++{
++	struct au_sbinfo *sbinfo;
++	struct list_head *plink_list;
++	struct pseudo_link *plink, *tmp;
++
++	SiMustWriteLock(sb);
++
++	sbinfo = au_sbi(sb);
++	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
++	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
++
++	plink_list = &sbinfo->si_plink.head;
++	/* no spin_lock since sbinfo is write-locked */
++	WARN(verbose && !list_empty(plink_list), "pseudo-link is not flushed");
++	list_for_each_entry_safe(plink, tmp, plink_list, list)
++		do_put_plink(plink, 0);
++	INIT_LIST_HEAD(plink_list);
++}
++
++void au_plink_clean(struct super_block *sb, int verbose)
++{
++	struct dentry *root;
++
++	root = sb->s_root;
++	aufs_write_lock(root);
++	if (au_opt_test(au_mntflags(sb), PLINK))
++		au_plink_put(sb, verbose);
++	aufs_write_unlock(root);
++}
++
++/* free the plinks on a branch specified by @br_id */
++void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
++{
++	struct au_sbinfo *sbinfo;
++	struct list_head *plink_list;
++	struct pseudo_link *plink, *tmp;
++	struct inode *inode;
++	aufs_bindex_t bstart, bend, bindex;
++	unsigned char do_put;
++
++	SiMustWriteLock(sb);
++
++	sbinfo = au_sbi(sb);
++	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
++	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
++
++	plink_list = &sbinfo->si_plink.head;
++	/* no spin_lock since sbinfo is write-locked */
++	list_for_each_entry_safe(plink, tmp, plink_list, list) {
++		do_put = 0;
++		inode = au_igrab(plink->inode);
++		ii_write_lock_child(inode);
++		bstart = au_ibstart(inode);
++		bend = au_ibend(inode);
++		if (bstart >= 0) {
++			for (bindex = bstart; bindex <= bend; bindex++) {
++				if (!au_h_iptr(inode, bindex)
++				    || au_ii_br_id(inode, bindex) != br_id)
++					continue;
++				au_set_h_iptr(inode, bindex, NULL, 0);
++				do_put = 1;
++				break;
++			}
++		} else
++			do_put_plink(plink, 1);
++
++		if (do_put) {
++			for (bindex = bstart; bindex <= bend; bindex++)
++				if (au_h_iptr(inode, bindex)) {
++					do_put = 0;
++					break;
++				}
++			if (do_put)
++				do_put_plink(plink, 1);
++		}
++		ii_write_unlock(inode);
++		iput(inode);
++	}
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/poll.c linux-3.2.0-gentoo-r1/fs/aufs/poll.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/poll.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/poll.c	2012-01-17 12:11:24.870490775 +0100
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * poll operation
++ * There is only one filesystem which implements ->poll operation, currently.
++ */
++
++#include "aufs.h"
++
++unsigned int aufs_poll(struct file *file, poll_table *wait)
++{
++	unsigned int mask;
++	int err;
++	struct file *h_file;
++	struct dentry *dentry;
++	struct super_block *sb;
++
++	/* We should pretend an error happened. */
++	mask = POLLERR /* | POLLIN | POLLOUT */;
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++	if (unlikely(err))
++		goto out;
++
++	/* it is not an error if h_file has no operation */
++	mask = DEFAULT_POLLMASK;
++	h_file = au_hf_top(file);
++	if (h_file->f_op && h_file->f_op->poll)
++		mask = h_file->f_op->poll(h_file, wait);
++
++	di_read_unlock(dentry, AuLock_IR);
++	fi_read_unlock(file);
++
++out:
++	si_read_unlock(sb);
++	AuTraceErr((int)mask);
++	return mask;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/procfs.c linux-3.2.0-gentoo-r1/fs/aufs/procfs.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/procfs.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/procfs.c	2012-01-17 12:11:24.893639131 +0100
+@@ -0,0 +1,170 @@
++/*
++ * Copyright (C) 2010-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * procfs interfaces
++ */
++
++#include <linux/proc_fs.h>
++#include "aufs.h"
++
++static int au_procfs_plm_release(struct inode *inode, struct file *file)
++{
++	struct au_sbinfo *sbinfo;
++
++	sbinfo = file->private_data;
++	if (sbinfo) {
++		au_plink_maint_leave(sbinfo);
++		kobject_put(&sbinfo->si_kobj);
++	}
++
++	return 0;
++}
++
++static void au_procfs_plm_write_clean(struct file *file)
++{
++	struct au_sbinfo *sbinfo;
++
++	sbinfo = file->private_data;
++	if (sbinfo)
++		au_plink_clean(sbinfo->si_sb, /*verbose*/0);
++}
++
++static int au_procfs_plm_write_si(struct file *file, unsigned long id)
++{
++	int err;
++	struct super_block *sb;
++	struct au_sbinfo *sbinfo;
++
++	err = -EBUSY;
++	if (unlikely(file->private_data))
++		goto out;
++
++	sb = NULL;
++	/* don't use au_sbilist_lock() here */
++	spin_lock(&au_sbilist.spin);
++	list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
++		if (id == sysaufs_si_id(sbinfo)) {
++			kobject_get(&sbinfo->si_kobj);
++			sb = sbinfo->si_sb;
++			break;
++		}
++	spin_unlock(&au_sbilist.spin);
++
++	err = -EINVAL;
++	if (unlikely(!sb))
++		goto out;
++
++	err = au_plink_maint_enter(sb);
++	if (!err)
++		/* keep kobject_get() */
++		file->private_data = sbinfo;
++	else
++		kobject_put(&sbinfo->si_kobj);
++out:
++	return err;
++}
++
++/*
++ * Accept a valid "si=xxxx" only.
++ * Once it is accepted successfully, accept "clean" too.
++ */
++static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
++				   size_t count, loff_t *ppos)
++{
++	ssize_t err;
++	unsigned long id;
++	/* last newline is allowed */
++	char buf[3 + sizeof(unsigned long) * 2 + 1];
++
++	err = -EACCES;
++	if (unlikely(!capable(CAP_SYS_ADMIN)))
++		goto out;
++
++	err = -EINVAL;
++	if (unlikely(count > sizeof(buf)))
++		goto out;
++
++	err = copy_from_user(buf, ubuf, count);
++	if (unlikely(err)) {
++		err = -EFAULT;
++		goto out;
++	}
++	buf[count] = 0;
++
++	err = -EINVAL;
++	if (!strcmp("clean", buf)) {
++		au_procfs_plm_write_clean(file);
++		goto out_success;
++	} else if (unlikely(strncmp("si=", buf, 3)))
++		goto out;
++
++	err = kstrtoul(buf + 3, 16, &id);
++	if (unlikely(err))
++		goto out;
++
++	err = au_procfs_plm_write_si(file, id);
++	if (unlikely(err))
++		goto out;
++
++out_success:
++	err = count; /* success */
++out:
++	return err;
++}
++
++static const struct file_operations au_procfs_plm_fop = {
++	.write		= au_procfs_plm_write,
++	.release	= au_procfs_plm_release,
++	.owner		= THIS_MODULE
++};
++
++/* ---------------------------------------------------------------------- */
++
++static struct proc_dir_entry *au_procfs_dir;
++
++void au_procfs_fin(void)
++{
++	remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
++	remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
++}
++
++int __init au_procfs_init(void)
++{
++	int err;
++	struct proc_dir_entry *entry;
++
++	err = -ENOMEM;
++	au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
++	if (unlikely(!au_procfs_dir))
++		goto out;
++
++	entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
++			    au_procfs_dir, &au_procfs_plm_fop);
++	if (unlikely(!entry))
++		goto out_dir;
++
++	err = 0;
++	goto out; /* success */
++
++
++out_dir:
++	remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
++out:
++	return err;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/rdu.c linux-3.2.0-gentoo-r1/fs/aufs/rdu.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/rdu.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/rdu.c	2012-01-17 12:11:24.893639131 +0100
+@@ -0,0 +1,383 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * readdir in userspace.
++ */
++
++#include <linux/compat.h>
++#include <linux/fs_stack.h>
++#include <linux/security.h>
++#include "aufs.h"
++
++/* bits for struct aufs_rdu.flags */
++#define	AuRdu_CALLED	1
++#define	AuRdu_CONT	(1 << 1)
++#define	AuRdu_FULL	(1 << 2)
++#define au_ftest_rdu(flags, name)	((flags) & AuRdu_##name)
++#define au_fset_rdu(flags, name) \
++	do { (flags) |= AuRdu_##name; } while (0)
++#define au_fclr_rdu(flags, name) \
++	do { (flags) &= ~AuRdu_##name; } while (0)
++
++struct au_rdu_arg {
++	struct aufs_rdu			*rdu;
++	union au_rdu_ent_ul		ent;
++	unsigned long			end;
++
++	struct super_block		*sb;
++	int				err;
++};
++
++static int au_rdu_fill(void *__arg, const char *name, int nlen,
++		       loff_t offset, u64 h_ino, unsigned int d_type)
++{
++	int err, len;
++	struct au_rdu_arg *arg = __arg;
++	struct aufs_rdu *rdu = arg->rdu;
++	struct au_rdu_ent ent;
++
++	err = 0;
++	arg->err = 0;
++	au_fset_rdu(rdu->cookie.flags, CALLED);
++	len = au_rdu_len(nlen);
++	if (arg->ent.ul + len  < arg->end) {
++		ent.ino = h_ino;
++		ent.bindex = rdu->cookie.bindex;
++		ent.type = d_type;
++		ent.nlen = nlen;
++		if (unlikely(nlen > AUFS_MAX_NAMELEN))
++			ent.type = DT_UNKNOWN;
++
++		/* unnecessary to support mmap_sem since this is a dir */
++		err = -EFAULT;
++		if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
++			goto out;
++		if (copy_to_user(arg->ent.e->name, name, nlen))
++			goto out;
++		/* the terminating NULL */
++		if (__put_user(0, arg->ent.e->name + nlen))
++			goto out;
++		err = 0;
++		/* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
++		arg->ent.ul += len;
++		rdu->rent++;
++	} else {
++		err = -EFAULT;
++		au_fset_rdu(rdu->cookie.flags, FULL);
++		rdu->full = 1;
++		rdu->tail = arg->ent;
++	}
++
++out:
++	/* AuTraceErr(err); */
++	return err;
++}
++
++static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
++{
++	int err;
++	loff_t offset;
++	struct au_rdu_cookie *cookie = &arg->rdu->cookie;
++
++	offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
++	err = offset;
++	if (unlikely(offset != cookie->h_pos))
++		goto out;
++
++	err = 0;
++	do {
++		arg->err = 0;
++		au_fclr_rdu(cookie->flags, CALLED);
++		/* smp_mb(); */
++		err = vfsub_readdir(h_file, au_rdu_fill, arg);
++		if (err >= 0)
++			err = arg->err;
++	} while (!err
++		 && au_ftest_rdu(cookie->flags, CALLED)
++		 && !au_ftest_rdu(cookie->flags, FULL));
++	cookie->h_pos = h_file->f_pos;
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++static int au_rdu(struct file *file, struct aufs_rdu *rdu)
++{
++	int err;
++	aufs_bindex_t bend;
++	struct au_rdu_arg arg;
++	struct dentry *dentry;
++	struct inode *inode;
++	struct file *h_file;
++	struct au_rdu_cookie *cookie = &rdu->cookie;
++
++	err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
++	if (unlikely(err)) {
++		err = -EFAULT;
++		AuTraceErr(err);
++		goto out;
++	}
++	rdu->rent = 0;
++	rdu->tail = rdu->ent;
++	rdu->full = 0;
++	arg.rdu = rdu;
++	arg.ent = rdu->ent;
++	arg.end = arg.ent.ul;
++	arg.end += rdu->sz;
++
++	err = -ENOTDIR;
++	if (unlikely(!file->f_op || !file->f_op->readdir))
++		goto out;
++
++	err = security_file_permission(file, MAY_READ);
++	AuTraceErr(err);
++	if (unlikely(err))
++		goto out;
++
++	dentry = file->f_dentry;
++	inode = dentry->d_inode;
++#if 1
++	mutex_lock(&inode->i_mutex);
++#else
++	err = mutex_lock_killable(&inode->i_mutex);
++	AuTraceErr(err);
++	if (unlikely(err))
++		goto out;
++#endif
++
++	arg.sb = inode->i_sb;
++	err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
++	if (unlikely(err))
++		goto out_mtx;
++	err = au_alive_dir(dentry);
++	if (unlikely(err))
++		goto out_si;
++	/* todo: reval? */
++	fi_read_lock(file);
++
++	err = -EAGAIN;
++	if (unlikely(au_ftest_rdu(cookie->flags, CONT)
++		     && cookie->generation != au_figen(file)))
++		goto out_unlock;
++
++	err = 0;
++	if (!rdu->blk) {
++		rdu->blk = au_sbi(arg.sb)->si_rdblk;
++		if (!rdu->blk)
++			rdu->blk = au_dir_size(file, /*dentry*/NULL);
++	}
++	bend = au_fbstart(file);
++	if (cookie->bindex < bend)
++		cookie->bindex = bend;
++	bend = au_fbend_dir(file);
++	/* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
++	for (; !err && cookie->bindex <= bend;
++	     cookie->bindex++, cookie->h_pos = 0) {
++		h_file = au_hf_dir(file, cookie->bindex);
++		if (!h_file)
++			continue;
++
++		au_fclr_rdu(cookie->flags, FULL);
++		err = au_rdu_do(h_file, &arg);
++		AuTraceErr(err);
++		if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
++			break;
++	}
++	AuDbg("rent %llu\n", rdu->rent);
++
++	if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
++		rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
++		au_fset_rdu(cookie->flags, CONT);
++		cookie->generation = au_figen(file);
++	}
++
++	ii_read_lock_child(inode);
++	fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
++	ii_read_unlock(inode);
++
++out_unlock:
++	fi_read_unlock(file);
++out_si:
++	si_read_unlock(arg.sb);
++out_mtx:
++	mutex_unlock(&inode->i_mutex);
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
++{
++	int err;
++	ino_t ino;
++	unsigned long long nent;
++	union au_rdu_ent_ul *u;
++	struct au_rdu_ent ent;
++	struct super_block *sb;
++
++	err = 0;
++	nent = rdu->nent;
++	u = &rdu->ent;
++	sb = file->f_dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	while (nent-- > 0) {
++		/* unnecessary to support mmap_sem since this is a dir */
++		err = copy_from_user(&ent, u->e, sizeof(ent));
++		if (!err)
++			err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
++		if (unlikely(err)) {
++			err = -EFAULT;
++			AuTraceErr(err);
++			break;
++		}
++
++		/* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
++		if (!ent.wh)
++			err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
++		else
++			err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
++					&ino);
++		if (unlikely(err)) {
++			AuTraceErr(err);
++			break;
++		}
++
++		err = __put_user(ino, &u->e->ino);
++		if (unlikely(err)) {
++			err = -EFAULT;
++			AuTraceErr(err);
++			break;
++		}
++		u->ul += au_rdu_len(ent.nlen);
++	}
++	si_read_unlock(sb);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_rdu_verify(struct aufs_rdu *rdu)
++{
++	AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
++	      "%llu, b%d, 0x%x, g%u}\n",
++	      rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
++	      rdu->blk,
++	      rdu->rent, rdu->shwh, rdu->full,
++	      rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
++	      rdu->cookie.generation);
++
++	if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
++		return 0;
++
++	AuDbg("%u:%u\n",
++	      rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
++	return -EINVAL;
++}
++
++long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	long err, e;
++	struct aufs_rdu rdu;
++	void __user *p = (void __user *)arg;
++
++	err = copy_from_user(&rdu, p, sizeof(rdu));
++	if (unlikely(err)) {
++		err = -EFAULT;
++		AuTraceErr(err);
++		goto out;
++	}
++	err = au_rdu_verify(&rdu);
++	if (unlikely(err))
++		goto out;
++
++	switch (cmd) {
++	case AUFS_CTL_RDU:
++		err = au_rdu(file, &rdu);
++		if (unlikely(err))
++			break;
++
++		e = copy_to_user(p, &rdu, sizeof(rdu));
++		if (unlikely(e)) {
++			err = -EFAULT;
++			AuTraceErr(err);
++		}
++		break;
++	case AUFS_CTL_RDU_INO:
++		err = au_rdu_ino(file, &rdu);
++		break;
++
++	default:
++		/* err = -ENOTTY; */
++		err = -EINVAL;
++	}
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++#ifdef CONFIG_COMPAT
++long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	long err, e;
++	struct aufs_rdu rdu;
++	void __user *p = compat_ptr(arg);
++
++	/* todo: get_user()? */
++	err = copy_from_user(&rdu, p, sizeof(rdu));
++	if (unlikely(err)) {
++		err = -EFAULT;
++		AuTraceErr(err);
++		goto out;
++	}
++	rdu.ent.e = compat_ptr(rdu.ent.ul);
++	err = au_rdu_verify(&rdu);
++	if (unlikely(err))
++		goto out;
++
++	switch (cmd) {
++	case AUFS_CTL_RDU:
++		err = au_rdu(file, &rdu);
++		if (unlikely(err))
++			break;
++
++		rdu.ent.ul = ptr_to_compat(rdu.ent.e);
++		rdu.tail.ul = ptr_to_compat(rdu.tail.e);
++		e = copy_to_user(p, &rdu, sizeof(rdu));
++		if (unlikely(e)) {
++			err = -EFAULT;
++			AuTraceErr(err);
++		}
++		break;
++	case AUFS_CTL_RDU_INO:
++		err = au_rdu_ino(file, &rdu);
++		break;
++
++	default:
++		/* err = -ENOTTY; */
++		err = -EINVAL;
++	}
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++#endif
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/rwsem.h linux-3.2.0-gentoo-r1/fs/aufs/rwsem.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/rwsem.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/rwsem.h	2012-01-17 12:11:24.916787487 +0100
+@@ -0,0 +1,188 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * simple read-write semaphore wrappers
++ */
++
++#ifndef __AUFS_RWSEM_H__
++#define __AUFS_RWSEM_H__
++
++#ifdef __KERNEL__
++
++#include "debug.h"
++
++struct au_rwsem {
++	struct rw_semaphore	rwsem;
++#ifdef CONFIG_AUFS_DEBUG
++	/* just for debugging, not almighty counter */
++	atomic_t		rcnt, wcnt;
++#endif
++};
++
++#ifdef CONFIG_AUFS_DEBUG
++#define AuDbgCntInit(rw) do { \
++	atomic_set(&(rw)->rcnt, 0); \
++	atomic_set(&(rw)->wcnt, 0); \
++	smp_mb(); /* atomic set */ \
++} while (0)
++
++#define AuDbgRcntInc(rw)	atomic_inc(&(rw)->rcnt)
++#define AuDbgRcntDec(rw)	WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
++#define AuDbgWcntInc(rw)	atomic_inc(&(rw)->wcnt)
++#define AuDbgWcntDec(rw)	WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
++#else
++#define AuDbgCntInit(rw)	do {} while (0)
++#define AuDbgRcntInc(rw)	do {} while (0)
++#define AuDbgRcntDec(rw)	do {} while (0)
++#define AuDbgWcntInc(rw)	do {} while (0)
++#define AuDbgWcntDec(rw)	do {} while (0)
++#endif /* CONFIG_AUFS_DEBUG */
++
++/* to debug easier, do not make them inlined functions */
++#define AuRwMustNoWaiters(rw)	AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
++/* rwsem_is_locked() is unusable */
++#define AuRwMustReadLock(rw)	AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
++#define AuRwMustWriteLock(rw)	AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
++#define AuRwMustAnyLock(rw)	AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
++					&& atomic_read(&(rw)->wcnt) <= 0)
++#define AuRwDestroy(rw)		AuDebugOn(atomic_read(&(rw)->rcnt) \
++					|| atomic_read(&(rw)->wcnt))
++
++#define au_rw_class(rw, key)	lockdep_set_class(&(rw)->rwsem, key)
++
++static inline void au_rw_init(struct au_rwsem *rw)
++{
++	AuDbgCntInit(rw);
++	init_rwsem(&rw->rwsem);
++}
++
++static inline void au_rw_init_wlock(struct au_rwsem *rw)
++{
++	au_rw_init(rw);
++	down_write(&rw->rwsem);
++	AuDbgWcntInc(rw);
++}
++
++static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
++					   unsigned int lsc)
++{
++	au_rw_init(rw);
++	down_write_nested(&rw->rwsem, lsc);
++	AuDbgWcntInc(rw);
++}
++
++static inline void au_rw_read_lock(struct au_rwsem *rw)
++{
++	down_read(&rw->rwsem);
++	AuDbgRcntInc(rw);
++}
++
++static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
++{
++	down_read_nested(&rw->rwsem, lsc);
++	AuDbgRcntInc(rw);
++}
++
++static inline void au_rw_read_unlock(struct au_rwsem *rw)
++{
++	AuRwMustReadLock(rw);
++	AuDbgRcntDec(rw);
++	up_read(&rw->rwsem);
++}
++
++static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
++{
++	AuRwMustWriteLock(rw);
++	AuDbgRcntInc(rw);
++	AuDbgWcntDec(rw);
++	downgrade_write(&rw->rwsem);
++}
++
++static inline void au_rw_write_lock(struct au_rwsem *rw)
++{
++	down_write(&rw->rwsem);
++	AuDbgWcntInc(rw);
++}
++
++static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
++					   unsigned int lsc)
++{
++	down_write_nested(&rw->rwsem, lsc);
++	AuDbgWcntInc(rw);
++}
++
++static inline void au_rw_write_unlock(struct au_rwsem *rw)
++{
++	AuRwMustWriteLock(rw);
++	AuDbgWcntDec(rw);
++	up_write(&rw->rwsem);
++}
++
++/* why is not _nested version defined */
++static inline int au_rw_read_trylock(struct au_rwsem *rw)
++{
++	int ret = down_read_trylock(&rw->rwsem);
++	if (ret)
++		AuDbgRcntInc(rw);
++	return ret;
++}
++
++static inline int au_rw_write_trylock(struct au_rwsem *rw)
++{
++	int ret = down_write_trylock(&rw->rwsem);
++	if (ret)
++		AuDbgWcntInc(rw);
++	return ret;
++}
++
++#undef AuDbgCntInit
++#undef AuDbgRcntInc
++#undef AuDbgRcntDec
++#undef AuDbgWcntInc
++#undef AuDbgWcntDec
++
++#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
++static inline void prefix##_read_lock(param) \
++{ au_rw_read_lock(rwsem); } \
++static inline void prefix##_write_lock(param) \
++{ au_rw_write_lock(rwsem); } \
++static inline int prefix##_read_trylock(param) \
++{ return au_rw_read_trylock(rwsem); } \
++static inline int prefix##_write_trylock(param) \
++{ return au_rw_write_trylock(rwsem); }
++/* why is not _nested version defined */
++/* static inline void prefix##_read_trylock_nested(param, lsc)
++{ au_rw_read_trylock_nested(rwsem, lsc)); }
++static inline void prefix##_write_trylock_nestd(param, lsc)
++{ au_rw_write_trylock_nested(rwsem, lsc); } */
++
++#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
++static inline void prefix##_read_unlock(param) \
++{ au_rw_read_unlock(rwsem); } \
++static inline void prefix##_write_unlock(param) \
++{ au_rw_write_unlock(rwsem); } \
++static inline void prefix##_downgrade_lock(param) \
++{ au_rw_dgrade_lock(rwsem); }
++
++#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
++	AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
++	AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_RWSEM_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/sbinfo.c linux-3.2.0-gentoo-r1/fs/aufs/sbinfo.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/sbinfo.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/sbinfo.c	2012-01-17 12:11:24.916787487 +0100
+@@ -0,0 +1,343 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * superblock private data
++ */
++
++#include "aufs.h"
++
++/*
++ * they are necessary regardless sysfs is disabled.
++ */
++void au_si_free(struct kobject *kobj)
++{
++	struct au_sbinfo *sbinfo;
++	char *locked __maybe_unused; /* debug only */
++
++	sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
++	AuDebugOn(!list_empty(&sbinfo->si_plink.head));
++	AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
++
++	au_rw_write_lock(&sbinfo->si_rwsem);
++	au_br_free(sbinfo);
++	au_rw_write_unlock(&sbinfo->si_rwsem);
++
++	AuDebugOn(radix_tree_gang_lookup
++		  (&sbinfo->au_si_pid.tree, (void **)&locked,
++		   /*first_index*/PID_MAX_DEFAULT - 1,
++		   /*max_items*/sizeof(locked)/sizeof(*locked)));
++
++	kfree(sbinfo->si_branch);
++	kfree(sbinfo->au_si_pid.bitmap);
++	mutex_destroy(&sbinfo->si_xib_mtx);
++	AuRwDestroy(&sbinfo->si_rwsem);
++
++	kfree(sbinfo);
++}
++
++int au_si_alloc(struct super_block *sb)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++	static struct lock_class_key aufs_si;
++
++	err = -ENOMEM;
++	sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
++	if (unlikely(!sbinfo))
++		goto out;
++
++	BUILD_BUG_ON(sizeof(unsigned long) !=
++		     sizeof(*sbinfo->au_si_pid.bitmap));
++	sbinfo->au_si_pid.bitmap = kcalloc(BITS_TO_LONGS(PID_MAX_DEFAULT),
++					sizeof(*sbinfo->au_si_pid.bitmap),
++					GFP_NOFS);
++	if (unlikely(!sbinfo->au_si_pid.bitmap))
++		goto out_sbinfo;
++
++	/* will be reallocated separately */
++	sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
++	if (unlikely(!sbinfo->si_branch))
++		goto out_pidmap;
++
++	err = sysaufs_si_init(sbinfo);
++	if (unlikely(err))
++		goto out_br;
++
++	au_nwt_init(&sbinfo->si_nowait);
++	au_rw_init_wlock(&sbinfo->si_rwsem);
++	au_rw_class(&sbinfo->si_rwsem, &aufs_si);
++	spin_lock_init(&sbinfo->au_si_pid.tree_lock);
++	INIT_RADIX_TREE(&sbinfo->au_si_pid.tree, GFP_ATOMIC | __GFP_NOFAIL);
++
++	atomic_long_set(&sbinfo->si_ninodes, 0);
++	atomic_long_set(&sbinfo->si_nfiles, 0);
++
++	sbinfo->si_bend = -1;
++
++	sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
++	sbinfo->si_wbr_create = AuWbrCreate_Def;
++	sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
++	sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
++
++	sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
++
++	mutex_init(&sbinfo->si_xib_mtx);
++	sbinfo->si_xino_brid = -1;
++	/* leave si_xib_last_pindex and si_xib_next_bit */
++
++	sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
++	sbinfo->si_rdblk = AUFS_RDBLK_DEF;
++	sbinfo->si_rdhash = AUFS_RDHASH_DEF;
++	sbinfo->si_dirwh = AUFS_DIRWH_DEF;
++
++	au_spl_init(&sbinfo->si_plink);
++	init_waitqueue_head(&sbinfo->si_plink_wq);
++	spin_lock_init(&sbinfo->si_plink_maint_lock);
++
++	/* leave other members for sysaufs and si_mnt. */
++	sbinfo->si_sb = sb;
++	sb->s_fs_info = sbinfo;
++	si_pid_set(sb);
++	au_debug_sbinfo_init(sbinfo);
++	return 0; /* success */
++
++out_br:
++	kfree(sbinfo->si_branch);
++out_pidmap:
++	kfree(sbinfo->au_si_pid.bitmap);
++out_sbinfo:
++	kfree(sbinfo);
++out:
++	return err;
++}
++
++int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
++{
++	int err, sz;
++	struct au_branch **brp;
++
++	AuRwMustWriteLock(&sbinfo->si_rwsem);
++
++	err = -ENOMEM;
++	sz = sizeof(*brp) * (sbinfo->si_bend + 1);
++	if (unlikely(!sz))
++		sz = sizeof(*brp);
++	brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
++	if (brp) {
++		sbinfo->si_branch = brp;
++		err = 0;
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++unsigned int au_sigen_inc(struct super_block *sb)
++{
++	unsigned int gen;
++
++	SiMustWriteLock(sb);
++
++	gen = ++au_sbi(sb)->si_generation;
++	au_update_digen(sb->s_root);
++	au_update_iigen(sb->s_root->d_inode);
++	sb->s_root->d_inode->i_version++;
++	return gen;
++}
++
++aufs_bindex_t au_new_br_id(struct super_block *sb)
++{
++	aufs_bindex_t br_id;
++	int i;
++	struct au_sbinfo *sbinfo;
++
++	SiMustWriteLock(sb);
++
++	sbinfo = au_sbi(sb);
++	for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
++		br_id = ++sbinfo->si_last_br_id;
++		AuDebugOn(br_id < 0);
++		if (br_id && au_br_index(sb, br_id) < 0)
++			return br_id;
++	}
++
++	return -1;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* it is ok that new 'nwt' tasks are appended while we are sleeping */
++int si_read_lock(struct super_block *sb, int flags)
++{
++	int err;
++
++	err = 0;
++	if (au_ftest_lock(flags, FLUSH))
++		au_nwt_flush(&au_sbi(sb)->si_nowait);
++
++	si_noflush_read_lock(sb);
++	err = au_plink_maint(sb, flags);
++	if (unlikely(err))
++		si_read_unlock(sb);
++
++	return err;
++}
++
++int si_write_lock(struct super_block *sb, int flags)
++{
++	int err;
++
++	if (au_ftest_lock(flags, FLUSH))
++		au_nwt_flush(&au_sbi(sb)->si_nowait);
++
++	si_noflush_write_lock(sb);
++	err = au_plink_maint(sb, flags);
++	if (unlikely(err))
++		si_write_unlock(sb);
++
++	return err;
++}
++
++/* dentry and super_block lock. call at entry point */
++int aufs_read_lock(struct dentry *dentry, int flags)
++{
++	int err;
++	struct super_block *sb;
++
++	sb = dentry->d_sb;
++	err = si_read_lock(sb, flags);
++	if (unlikely(err))
++		goto out;
++
++	if (au_ftest_lock(flags, DW))
++		di_write_lock_child(dentry);
++	else
++		di_read_lock_child(dentry, flags);
++
++	if (au_ftest_lock(flags, GEN)) {
++		err = au_digen_test(dentry, au_sigen(sb));
++		AuDebugOn(!err && au_dbrange_test(dentry));
++		if (unlikely(err))
++			aufs_read_unlock(dentry, flags);
++	}
++
++out:
++	return err;
++}
++
++void aufs_read_unlock(struct dentry *dentry, int flags)
++{
++	if (au_ftest_lock(flags, DW))
++		di_write_unlock(dentry);
++	else
++		di_read_unlock(dentry, flags);
++	si_read_unlock(dentry->d_sb);
++}
++
++void aufs_write_lock(struct dentry *dentry)
++{
++	si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
++	di_write_lock_child(dentry);
++}
++
++void aufs_write_unlock(struct dentry *dentry)
++{
++	di_write_unlock(dentry);
++	si_write_unlock(dentry->d_sb);
++}
++
++int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
++{
++	int err;
++	unsigned int sigen;
++	struct super_block *sb;
++
++	sb = d1->d_sb;
++	err = si_read_lock(sb, flags);
++	if (unlikely(err))
++		goto out;
++
++	di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIR));
++
++	if (au_ftest_lock(flags, GEN)) {
++		sigen = au_sigen(sb);
++		err = au_digen_test(d1, sigen);
++		AuDebugOn(!err && au_dbrange_test(d1));
++		if (!err) {
++			err = au_digen_test(d2, sigen);
++			AuDebugOn(!err && au_dbrange_test(d2));
++		}
++		if (unlikely(err))
++			aufs_read_and_write_unlock2(d1, d2);
++	}
++
++out:
++	return err;
++}
++
++void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
++{
++	di_write_unlock2(d1, d2);
++	si_read_unlock(d1->d_sb);
++}
++
++/* ---------------------------------------------------------------------- */
++
++int si_pid_test_slow(struct super_block *sb)
++{
++	void *p;
++
++	rcu_read_lock();
++	p = radix_tree_lookup(&au_sbi(sb)->au_si_pid.tree, current->pid);
++	rcu_read_unlock();
++
++	return (long)!!p;
++}
++
++void si_pid_set_slow(struct super_block *sb)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++
++	AuDebugOn(si_pid_test_slow(sb));
++
++	sbinfo = au_sbi(sb);
++	err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
++	AuDebugOn(err);
++	spin_lock(&sbinfo->au_si_pid.tree_lock);
++	err = radix_tree_insert(&sbinfo->au_si_pid.tree, current->pid,
++				/*any valid ptr*/sb);
++	spin_unlock(&sbinfo->au_si_pid.tree_lock);
++	AuDebugOn(err);
++	radix_tree_preload_end();
++}
++
++void si_pid_clr_slow(struct super_block *sb)
++{
++	void *p;
++	struct au_sbinfo *sbinfo;
++
++	AuDebugOn(!si_pid_test_slow(sb));
++
++	sbinfo = au_sbi(sb);
++	spin_lock(&sbinfo->au_si_pid.tree_lock);
++	p = radix_tree_delete(&sbinfo->au_si_pid.tree, current->pid);
++	spin_unlock(&sbinfo->au_si_pid.tree_lock);
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/spl.h linux-3.2.0-gentoo-r1/fs/aufs/spl.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/spl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/spl.h	2012-01-17 12:11:24.916787487 +0100
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * simple list protected by a spinlock
++ */
++
++#ifndef __AUFS_SPL_H__
++#define __AUFS_SPL_H__
++
++#ifdef __KERNEL__
++
++struct au_splhead {
++	spinlock_t		spin;
++	struct list_head	head;
++};
++
++static inline void au_spl_init(struct au_splhead *spl)
++{
++	spin_lock_init(&spl->spin);
++	INIT_LIST_HEAD(&spl->head);
++}
++
++static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
++{
++	spin_lock(&spl->spin);
++	list_add(list, &spl->head);
++	spin_unlock(&spl->spin);
++}
++
++static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
++{
++	spin_lock(&spl->spin);
++	list_del(list);
++	spin_unlock(&spl->spin);
++}
++
++static inline void au_spl_del_rcu(struct list_head *list,
++				  struct au_splhead *spl)
++{
++	spin_lock(&spl->spin);
++	list_del_rcu(list);
++	spin_unlock(&spl->spin);
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_SPL_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/super.c linux-3.2.0-gentoo-r1/fs/aufs/super.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/super.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/super.c	2012-01-17 12:11:24.930676503 +0100
+@@ -0,0 +1,938 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * mount and super_block operations
++ */
++
++#include <linux/mm.h>
++#include <linux/module.h>
++#include <linux/seq_file.h>
++#include <linux/statfs.h>
++#include <linux/vmalloc.h>
++#include <linux/writeback.h>
++#include "aufs.h"
++
++/*
++ * super_operations
++ */
++static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
++{
++	struct au_icntnr *c;
++
++	c = au_cache_alloc_icntnr();
++	if (c) {
++		au_icntnr_init(c);
++		c->vfs_inode.i_version = 1; /* sigen(sb); */
++		c->iinfo.ii_hinode = NULL;
++		return &c->vfs_inode;
++	}
++	return NULL;
++}
++
++static void aufs_destroy_inode_cb(struct rcu_head *head)
++{
++	struct inode *inode = container_of(head, struct inode, i_rcu);
++
++	INIT_LIST_HEAD(&inode->i_dentry);
++	au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
++}
++
++static void aufs_destroy_inode(struct inode *inode)
++{
++	au_iinfo_fin(inode);
++	call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
++}
++
++struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
++{
++	struct inode *inode;
++	int err;
++
++	inode = iget_locked(sb, ino);
++	if (unlikely(!inode)) {
++		inode = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++	if (!(inode->i_state & I_NEW))
++		goto out;
++
++	err = au_xigen_new(inode);
++	if (!err)
++		err = au_iinfo_init(inode);
++	if (!err)
++		inode->i_version++;
++	else {
++		iget_failed(inode);
++		inode = ERR_PTR(err);
++	}
++
++out:
++	/* never return NULL */
++	AuDebugOn(!inode);
++	AuTraceErrPtr(inode);
++	return inode;
++}
++
++/* lock free root dinfo */
++static int au_show_brs(struct seq_file *seq, struct super_block *sb)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	struct path path;
++	struct au_hdentry *hdp;
++	struct au_branch *br;
++	char *perm;
++
++	err = 0;
++	bend = au_sbend(sb);
++	hdp = au_di(sb->s_root)->di_hdentry;
++	for (bindex = 0; !err && bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		path.mnt = br->br_mnt;
++		path.dentry = hdp[bindex].hd_dentry;
++		err = au_seq_path(seq, &path);
++		if (err > 0) {
++			perm = au_optstr_br_perm(br->br_perm);
++			if (perm) {
++				err = seq_printf(seq, "=%s", perm);
++				kfree(perm);
++				if (err == -1)
++					err = -E2BIG;
++			} else
++				err = -ENOMEM;
++		}
++		if (!err && bindex != bend)
++			err = seq_putc(seq, ':');
++	}
++
++	return err;
++}
++
++static void au_show_wbr_create(struct seq_file *m, int v,
++			       struct au_sbinfo *sbinfo)
++{
++	const char *pat;
++
++	AuRwMustAnyLock(&sbinfo->si_rwsem);
++
++	seq_printf(m, ",create=");
++	pat = au_optstr_wbr_create(v);
++	switch (v) {
++	case AuWbrCreate_TDP:
++	case AuWbrCreate_RR:
++	case AuWbrCreate_MFS:
++	case AuWbrCreate_PMFS:
++		seq_printf(m, pat);
++		break;
++	case AuWbrCreate_MFSV:
++		seq_printf(m, /*pat*/"mfs:%lu",
++			   jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
++			   / MSEC_PER_SEC);
++		break;
++	case AuWbrCreate_PMFSV:
++		seq_printf(m, /*pat*/"pmfs:%lu",
++			   jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
++			   / MSEC_PER_SEC);
++		break;
++	case AuWbrCreate_MFSRR:
++		seq_printf(m, /*pat*/"mfsrr:%llu",
++			   sbinfo->si_wbr_mfs.mfsrr_watermark);
++		break;
++	case AuWbrCreate_MFSRRV:
++		seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
++			   sbinfo->si_wbr_mfs.mfsrr_watermark,
++			   jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
++			   / MSEC_PER_SEC);
++		break;
++	}
++}
++
++static int au_show_xino(struct seq_file *seq, struct vfsmount *mnt)
++{
++#ifdef CONFIG_SYSFS
++	return 0;
++#else
++	int err;
++	const int len = sizeof(AUFS_XINO_FNAME) - 1;
++	aufs_bindex_t bindex, brid;
++	struct super_block *sb;
++	struct qstr *name;
++	struct file *f;
++	struct dentry *d, *h_root;
++	struct au_hdentry *hdp;
++
++	AuRwMustAnyLock(&sbinfo->si_rwsem);
++
++	err = 0;
++	sb = mnt->mnt_sb;
++	f = au_sbi(sb)->si_xib;
++	if (!f)
++		goto out;
++
++	/* stop printing the default xino path on the first writable branch */
++	h_root = NULL;
++	brid = au_xino_brid(sb);
++	if (brid >= 0) {
++		bindex = au_br_index(sb, brid);
++		hdp = au_di(sb->s_root)->di_hdentry;
++		h_root = hdp[0 + bindex].hd_dentry;
++	}
++	d = f->f_dentry;
++	name = &d->d_name;
++	/* safe ->d_parent because the file is unlinked */
++	if (d->d_parent == h_root
++	    && name->len == len
++	    && !memcmp(name->name, AUFS_XINO_FNAME, len))
++		goto out;
++
++	seq_puts(seq, ",xino=");
++	err = au_xino_path(seq, f);
++
++out:
++	return err;
++#endif
++}
++
++/* seq_file will re-call me in case of too long string */
++static int aufs_show_options(struct seq_file *m, struct vfsmount *mnt)
++{
++	int err;
++	unsigned int mnt_flags, v;
++	struct super_block *sb;
++	struct au_sbinfo *sbinfo;
++
++#define AuBool(name, str) do { \
++	v = au_opt_test(mnt_flags, name); \
++	if (v != au_opt_test(AuOpt_Def, name)) \
++		seq_printf(m, ",%s" #str, v ? "" : "no"); \
++} while (0)
++
++#define AuStr(name, str) do { \
++	v = mnt_flags & AuOptMask_##name; \
++	if (v != (AuOpt_Def & AuOptMask_##name)) \
++		seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
++} while (0)
++
++#define AuUInt(name, str, val) do { \
++	if (val != AUFS_##name##_DEF) \
++		seq_printf(m, "," #str "=%u", val); \
++} while (0)
++
++	/* lock free root dinfo */
++	sb = mnt->mnt_sb;
++	si_noflush_read_lock(sb);
++	sbinfo = au_sbi(sb);
++	seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
++
++	mnt_flags = au_mntflags(sb);
++	if (au_opt_test(mnt_flags, XINO)) {
++		err = au_show_xino(m, mnt);
++		if (unlikely(err))
++			goto out;
++	} else
++		seq_puts(m, ",noxino");
++
++	AuBool(TRUNC_XINO, trunc_xino);
++	AuStr(UDBA, udba);
++	AuBool(SHWH, shwh);
++	AuBool(PLINK, plink);
++	AuBool(DIO, dio);
++	/* AuBool(DIRPERM1, dirperm1); */
++	/* AuBool(REFROF, refrof); */
++
++	v = sbinfo->si_wbr_create;
++	if (v != AuWbrCreate_Def)
++		au_show_wbr_create(m, v, sbinfo);
++
++	v = sbinfo->si_wbr_copyup;
++	if (v != AuWbrCopyup_Def)
++		seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
++
++	v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
++	if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
++		seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
++
++	AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
++
++	v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
++	AuUInt(RDCACHE, rdcache, v);
++
++	AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
++	AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
++
++	AuBool(SUM, sum);
++	/* AuBool(SUM_W, wsum); */
++	AuBool(WARN_PERM, warn_perm);
++	AuBool(VERBOSE, verbose);
++
++out:
++	/* be sure to print "br:" last */
++	if (!sysaufs_brs) {
++		seq_puts(m, ",br:");
++		au_show_brs(m, sb);
++	}
++	si_read_unlock(sb);
++	return 0;
++
++#undef AuBool
++#undef AuStr
++#undef AuUInt
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* sum mode which returns the summation for statfs(2) */
++
++static u64 au_add_till_max(u64 a, u64 b)
++{
++	u64 old;
++
++	old = a;
++	a += b;
++	if (old < a)
++		return a;
++	return ULLONG_MAX;
++}
++
++static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
++{
++	int err;
++	u64 blocks, bfree, bavail, files, ffree;
++	aufs_bindex_t bend, bindex, i;
++	unsigned char shared;
++	struct path h_path;
++	struct super_block *h_sb;
++
++	blocks = 0;
++	bfree = 0;
++	bavail = 0;
++	files = 0;
++	ffree = 0;
++
++	err = 0;
++	bend = au_sbend(sb);
++	for (bindex = bend; bindex >= 0; bindex--) {
++		h_path.mnt = au_sbr_mnt(sb, bindex);
++		h_sb = h_path.mnt->mnt_sb;
++		shared = 0;
++		for (i = bindex + 1; !shared && i <= bend; i++)
++			shared = (au_sbr_sb(sb, i) == h_sb);
++		if (shared)
++			continue;
++
++		/* sb->s_root for NFS is unreliable */
++		h_path.dentry = h_path.mnt->mnt_root;
++		err = vfs_statfs(&h_path, buf);
++		if (unlikely(err))
++			goto out;
++
++		blocks = au_add_till_max(blocks, buf->f_blocks);
++		bfree = au_add_till_max(bfree, buf->f_bfree);
++		bavail = au_add_till_max(bavail, buf->f_bavail);
++		files = au_add_till_max(files, buf->f_files);
++		ffree = au_add_till_max(ffree, buf->f_ffree);
++	}
++
++	buf->f_blocks = blocks;
++	buf->f_bfree = bfree;
++	buf->f_bavail = bavail;
++	buf->f_files = files;
++	buf->f_ffree = ffree;
++
++out:
++	return err;
++}
++
++static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
++{
++	int err;
++	struct path h_path;
++	struct super_block *sb;
++
++	/* lock free root dinfo */
++	sb = dentry->d_sb;
++	si_noflush_read_lock(sb);
++	if (!au_opt_test(au_mntflags(sb), SUM)) {
++		/* sb->s_root for NFS is unreliable */
++		h_path.mnt = au_sbr_mnt(sb, 0);
++		h_path.dentry = h_path.mnt->mnt_root;
++		err = vfs_statfs(&h_path, buf);
++	} else
++		err = au_statfs_sum(sb, buf);
++	si_read_unlock(sb);
++
++	if (!err) {
++		buf->f_type = AUFS_SUPER_MAGIC;
++		buf->f_namelen = AUFS_MAX_NAMELEN;
++		memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
++	}
++	/* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* final actions when unmounting a file system */
++static void aufs_put_super(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++
++	sbinfo = au_sbi(sb);
++	if (!sbinfo)
++		return;
++
++	dbgaufs_si_fin(sbinfo);
++	kobject_put(&sbinfo->si_kobj);
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_array_free(void *array)
++{
++	if (array) {
++		if (!is_vmalloc_addr(array))
++			kfree(array);
++		else
++			vfree(array);
++	}
++}
++
++void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg)
++{
++	void *array;
++	unsigned long long n;
++
++	array = NULL;
++	n = 0;
++	if (!*hint)
++		goto out;
++
++	if (*hint > ULLONG_MAX / sizeof(array)) {
++		array = ERR_PTR(-EMFILE);
++		pr_err("hint %llu\n", *hint);
++		goto out;
++	}
++
++	array = kmalloc(sizeof(array) * *hint, GFP_NOFS);
++	if (unlikely(!array))
++		array = vmalloc(sizeof(array) * *hint);
++	if (unlikely(!array)) {
++		array = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	n = cb(array, *hint, arg);
++	AuDebugOn(n > *hint);
++
++out:
++	*hint = n;
++	return array;
++}
++
++static unsigned long long au_iarray_cb(void *a,
++				       unsigned long long max __maybe_unused,
++				       void *arg)
++{
++	unsigned long long n;
++	struct inode **p, *inode;
++	struct list_head *head;
++
++	n = 0;
++	p = a;
++	head = arg;
++	spin_lock(&inode_sb_list_lock);
++	list_for_each_entry(inode, head, i_sb_list) {
++		if (!is_bad_inode(inode)
++		    && au_ii(inode)->ii_bstart >= 0) {
++			spin_lock(&inode->i_lock);
++			if (atomic_read(&inode->i_count)) {
++				au_igrab(inode);
++				*p++ = inode;
++				n++;
++				AuDebugOn(n > max);
++			}
++			spin_unlock(&inode->i_lock);
++		}
++	}
++	spin_unlock(&inode_sb_list_lock);
++
++	return n;
++}
++
++struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
++{
++	*max = atomic_long_read(&au_sbi(sb)->si_ninodes);
++	return au_array_alloc(max, au_iarray_cb, &sb->s_inodes);
++}
++
++void au_iarray_free(struct inode **a, unsigned long long max)
++{
++	unsigned long long ull;
++
++	for (ull = 0; ull < max; ull++)
++		iput(a[ull]);
++	au_array_free(a);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * refresh dentry and inode at remount time.
++ */
++/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
++static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
++		      struct dentry *parent)
++{
++	int err;
++
++	di_write_lock_child(dentry);
++	di_read_lock_parent(parent, AuLock_IR);
++	err = au_refresh_dentry(dentry, parent);
++	if (!err && dir_flags)
++		au_hn_reset(dentry->d_inode, dir_flags);
++	di_read_unlock(parent, AuLock_IR);
++	di_write_unlock(dentry);
++
++	return err;
++}
++
++static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
++			   struct au_sbinfo *sbinfo,
++			   const unsigned int dir_flags)
++{
++	int err;
++	struct dentry *parent;
++	struct inode *inode;
++
++	err = 0;
++	parent = dget_parent(dentry);
++	if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
++		inode = dentry->d_inode;
++		if (inode) {
++			if (!S_ISDIR(inode->i_mode))
++				err = au_do_refresh(dentry, /*dir_flags*/0,
++						 parent);
++			else {
++				err = au_do_refresh(dentry, dir_flags, parent);
++				if (unlikely(err))
++					au_fset_si(sbinfo, FAILED_REFRESH_DIR);
++			}
++		} else
++			err = au_do_refresh(dentry, /*dir_flags*/0, parent);
++		AuDbgDentry(dentry);
++	}
++	dput(parent);
++
++	AuTraceErr(err);
++	return err;
++}
++
++static int au_refresh_d(struct super_block *sb)
++{
++	int err, i, j, ndentry, e;
++	unsigned int sigen;
++	struct au_dcsub_pages dpages;
++	struct au_dpage *dpage;
++	struct dentry **dentries, *d;
++	struct au_sbinfo *sbinfo;
++	struct dentry *root = sb->s_root;
++	const unsigned int dir_flags = au_hi_flags(root->d_inode, /*isdir*/1);
++
++	err = au_dpages_init(&dpages, GFP_NOFS);
++	if (unlikely(err))
++		goto out;
++	err = au_dcsub_pages(&dpages, root, NULL, NULL);
++	if (unlikely(err))
++		goto out_dpages;
++
++	sigen = au_sigen(sb);
++	sbinfo = au_sbi(sb);
++	for (i = 0; i < dpages.ndpage; i++) {
++		dpage = dpages.dpages + i;
++		dentries = dpage->dentries;
++		ndentry = dpage->ndentry;
++		for (j = 0; j < ndentry; j++) {
++			d = dentries[j];
++			e = au_do_refresh_d(d, sigen, sbinfo, dir_flags);
++			if (unlikely(e && !err))
++				err = e;
++			/* go on even err */
++		}
++	}
++
++out_dpages:
++	au_dpages_free(&dpages);
++out:
++	return err;
++}
++
++static int au_refresh_i(struct super_block *sb)
++{
++	int err, e;
++	unsigned int sigen;
++	unsigned long long max, ull;
++	struct inode *inode, **array;
++
++	array = au_iarray_alloc(sb, &max);
++	err = PTR_ERR(array);
++	if (IS_ERR(array))
++		goto out;
++
++	err = 0;
++	sigen = au_sigen(sb);
++	for (ull = 0; ull < max; ull++) {
++		inode = array[ull];
++		if (au_iigen(inode) != sigen) {
++			ii_write_lock_child(inode);
++			e = au_refresh_hinode_self(inode);
++			ii_write_unlock(inode);
++			if (unlikely(e)) {
++				pr_err("error %d, i%lu\n", e, inode->i_ino);
++				if (!err)
++					err = e;
++				/* go on even if err */
++			}
++		}
++	}
++
++	au_iarray_free(array, max);
++
++out:
++	return err;
++}
++
++static void au_remount_refresh(struct super_block *sb)
++{
++	int err, e;
++	unsigned int udba;
++	aufs_bindex_t bindex, bend;
++	struct dentry *root;
++	struct inode *inode;
++	struct au_branch *br;
++
++	au_sigen_inc(sb);
++	au_fclr_si(au_sbi(sb), FAILED_REFRESH_DIR);
++
++	root = sb->s_root;
++	DiMustNoWaiters(root);
++	inode = root->d_inode;
++	IiMustNoWaiters(inode);
++
++	udba = au_opt_udba(sb);
++	bend = au_sbend(sb);
++	for (bindex = 0; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		err = au_hnotify_reset_br(udba, br, br->br_perm);
++		if (unlikely(err))
++			AuIOErr("hnotify failed on br %d, %d, ignored\n",
++				bindex, err);
++		/* go on even if err */
++	}
++	au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
++
++	di_write_unlock(root);
++	err = au_refresh_d(sb);
++	e = au_refresh_i(sb);
++	if (unlikely(e && !err))
++		err = e;
++	/* aufs_write_lock() calls ..._child() */
++	di_write_lock_child(root);
++
++	au_cpup_attr_all(inode, /*force*/1);
++
++	if (unlikely(err))
++		AuIOErr("refresh failed, ignored, %d\n", err);
++}
++
++/* stop extra interpretation of errno in mount(8), and strange error messages */
++static int cvt_err(int err)
++{
++	AuTraceErr(err);
++
++	switch (err) {
++	case -ENOENT:
++	case -ENOTDIR:
++	case -EEXIST:
++	case -EIO:
++		err = -EINVAL;
++	}
++	return err;
++}
++
++static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
++{
++	int err, do_dx;
++	unsigned int mntflags;
++	struct au_opts opts;
++	struct dentry *root;
++	struct inode *inode;
++	struct au_sbinfo *sbinfo;
++
++	err = 0;
++	root = sb->s_root;
++	if (!data || !*data) {
++		err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++		if (!err) {
++			di_write_lock_child(root);
++			err = au_opts_verify(sb, *flags, /*pending*/0);
++			aufs_write_unlock(root);
++		}
++		goto out;
++	}
++
++	err = -ENOMEM;
++	memset(&opts, 0, sizeof(opts));
++	opts.opt = (void *)__get_free_page(GFP_NOFS);
++	if (unlikely(!opts.opt))
++		goto out;
++	opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
++	opts.flags = AuOpts_REMOUNT;
++	opts.sb_flags = *flags;
++
++	/* parse it before aufs lock */
++	err = au_opts_parse(sb, data, &opts);
++	if (unlikely(err))
++		goto out_opts;
++
++	sbinfo = au_sbi(sb);
++	inode = root->d_inode;
++	mutex_lock(&inode->i_mutex);
++	err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++	if (unlikely(err))
++		goto out_mtx;
++	di_write_lock_child(root);
++
++	/* au_opts_remount() may return an error */
++	err = au_opts_remount(sb, &opts);
++	au_opts_free(&opts);
++
++	if (au_ftest_opts(opts.flags, REFRESH))
++		au_remount_refresh(sb);
++
++	if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
++		mntflags = au_mntflags(sb);
++		do_dx = !!au_opt_test(mntflags, DIO);
++		au_dy_arefresh(do_dx);
++	}
++
++	aufs_write_unlock(root);
++
++out_mtx:
++	mutex_unlock(&inode->i_mutex);
++out_opts:
++	free_page((unsigned long)opts.opt);
++out:
++	err = cvt_err(err);
++	AuTraceErr(err);
++	return err;
++}
++
++static const struct super_operations aufs_sop = {
++	.alloc_inode	= aufs_alloc_inode,
++	.destroy_inode	= aufs_destroy_inode,
++	/* always deleting, no clearing */
++	.drop_inode	= generic_delete_inode,
++	.show_options	= aufs_show_options,
++	.statfs		= aufs_statfs,
++	.put_super	= aufs_put_super,
++	.remount_fs	= aufs_remount_fs
++};
++
++/* ---------------------------------------------------------------------- */
++
++static int alloc_root(struct super_block *sb)
++{
++	int err;
++	struct inode *inode;
++	struct dentry *root;
++
++	err = -ENOMEM;
++	inode = au_iget_locked(sb, AUFS_ROOT_INO);
++	err = PTR_ERR(inode);
++	if (IS_ERR(inode))
++		goto out;
++
++	inode->i_op = &aufs_dir_iop;
++	inode->i_fop = &aufs_dir_fop;
++	inode->i_mode = S_IFDIR;
++	set_nlink(inode, 2);
++	unlock_new_inode(inode);
++
++	root = d_alloc_root(inode);
++	if (unlikely(!root))
++		goto out_iput;
++	err = PTR_ERR(root);
++	if (IS_ERR(root))
++		goto out_iput;
++
++	err = au_di_init(root);
++	if (!err) {
++		sb->s_root = root;
++		return 0; /* success */
++	}
++	dput(root);
++	goto out; /* do not iput */
++
++out_iput:
++	iget_failed(inode);
++out:
++	return err;
++
++}
++
++static int aufs_fill_super(struct super_block *sb, void *raw_data,
++			   int silent __maybe_unused)
++{
++	int err;
++	struct au_opts opts;
++	struct dentry *root;
++	struct inode *inode;
++	char *arg = raw_data;
++
++	if (unlikely(!arg || !*arg)) {
++		err = -EINVAL;
++		pr_err("no arg\n");
++		goto out;
++	}
++
++	err = -ENOMEM;
++	memset(&opts, 0, sizeof(opts));
++	opts.opt = (void *)__get_free_page(GFP_NOFS);
++	if (unlikely(!opts.opt))
++		goto out;
++	opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
++	opts.sb_flags = sb->s_flags;
++
++	err = au_si_alloc(sb);
++	if (unlikely(err))
++		goto out_opts;
++
++	/* all timestamps always follow the ones on the branch */
++	sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
++	sb->s_op = &aufs_sop;
++	sb->s_d_op = &aufs_dop;
++	sb->s_magic = AUFS_SUPER_MAGIC;
++	sb->s_maxbytes = 0;
++	au_export_init(sb);
++
++	err = alloc_root(sb);
++	if (unlikely(err)) {
++		si_write_unlock(sb);
++		goto out_info;
++	}
++	root = sb->s_root;
++	inode = root->d_inode;
++
++	/*
++	 * actually we can parse options regardless aufs lock here.
++	 * but at remount time, parsing must be done before aufs lock.
++	 * so we follow the same rule.
++	 */
++	ii_write_lock_parent(inode);
++	aufs_write_unlock(root);
++	err = au_opts_parse(sb, arg, &opts);
++	if (unlikely(err))
++		goto out_root;
++
++	/* lock vfs_inode first, then aufs. */
++	mutex_lock(&inode->i_mutex);
++	aufs_write_lock(root);
++	err = au_opts_mount(sb, &opts);
++	au_opts_free(&opts);
++	aufs_write_unlock(root);
++	mutex_unlock(&inode->i_mutex);
++	if (!err)
++		goto out_opts; /* success */
++
++out_root:
++	dput(root);
++	sb->s_root = NULL;
++out_info:
++	dbgaufs_si_fin(au_sbi(sb));
++	kobject_put(&au_sbi(sb)->si_kobj);
++	sb->s_fs_info = NULL;
++out_opts:
++	free_page((unsigned long)opts.opt);
++out:
++	AuTraceErr(err);
++	err = cvt_err(err);
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
++				 const char *dev_name __maybe_unused,
++				 void *raw_data)
++{
++	struct dentry *root;
++	struct super_block *sb;
++
++	/* all timestamps always follow the ones on the branch */
++	/* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
++	root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
++	if (IS_ERR(root))
++		goto out;
++
++	sb = root->d_sb;
++	si_write_lock(sb, !AuLock_FLUSH);
++	sysaufs_brs_add(sb, 0);
++	si_write_unlock(sb);
++	au_sbilist_add(sb);
++
++out:
++	return root;
++}
++
++static void aufs_kill_sb(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++
++	sbinfo = au_sbi(sb);
++	if (sbinfo) {
++		au_sbilist_del(sb);
++		aufs_write_lock(sb->s_root);
++		if (sbinfo->si_wbr_create_ops->fin)
++			sbinfo->si_wbr_create_ops->fin(sb);
++		if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
++			au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
++			au_remount_refresh(sb);
++		}
++		if (au_opt_test(sbinfo->si_mntflags, PLINK))
++			au_plink_put(sb, /*verbose*/1);
++		au_xino_clr(sb);
++		sbinfo->si_sb = NULL;
++		aufs_write_unlock(sb->s_root);
++		au_nwt_flush(&sbinfo->si_nowait);
++	}
++	generic_shutdown_super(sb);
++}
++
++struct file_system_type aufs_fs_type = {
++	.name		= AUFS_FSTYPE,
++	.fs_flags	=
++		FS_RENAME_DOES_D_MOVE	/* a race between rename and others */
++		| FS_REVAL_DOT,		/* for NFS branch and udba */
++	.mount		= aufs_mount,
++	.kill_sb	= aufs_kill_sb,
++	/* no need to __module_get() and module_put(). */
++	.owner		= THIS_MODULE,
++};
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/super.h linux-3.2.0-gentoo-r1/fs/aufs/super.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/super.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/super.h	2012-01-17 12:11:24.937621009 +0100
+@@ -0,0 +1,546 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * super_block operations
++ */
++
++#ifndef __AUFS_SUPER_H__
++#define __AUFS_SUPER_H__
++
++#ifdef __KERNEL__
++
++#include <linux/fs.h>
++#include "rwsem.h"
++#include "spl.h"
++#include "wkq.h"
++
++typedef ssize_t (*au_readf_t)(struct file *, char __user *, size_t, loff_t *);
++typedef ssize_t (*au_writef_t)(struct file *, const char __user *, size_t,
++			       loff_t *);
++
++/* policies to select one among multiple writable branches */
++struct au_wbr_copyup_operations {
++	int (*copyup)(struct dentry *dentry);
++};
++
++struct au_wbr_create_operations {
++	int (*create)(struct dentry *dentry, int isdir);
++	int (*init)(struct super_block *sb);
++	int (*fin)(struct super_block *sb);
++};
++
++struct au_wbr_mfs {
++	struct mutex	mfs_lock; /* protect this structure */
++	unsigned long	mfs_jiffy;
++	unsigned long	mfs_expire;
++	aufs_bindex_t	mfs_bindex;
++
++	unsigned long long	mfsrr_bytes;
++	unsigned long long	mfsrr_watermark;
++};
++
++struct au_branch;
++struct au_sbinfo {
++	/* nowait tasks in the system-wide workqueue */
++	struct au_nowait_tasks	si_nowait;
++
++	/*
++	 * tried sb->s_umount, but failed due to the dependecy between i_mutex.
++	 * rwsem for au_sbinfo is necessary.
++	 */
++	struct au_rwsem		si_rwsem;
++
++	/* prevent recursive locking in deleting inode */
++	struct {
++		unsigned long		*bitmap;
++		spinlock_t		tree_lock;
++		struct radix_tree_root	tree;
++	} au_si_pid;
++
++	/*
++	 * dirty approach to protect sb->sb_inodes and ->s_files from remount.
++	 */
++	atomic_long_t		si_ninodes, si_nfiles;
++
++	/* branch management */
++	unsigned int		si_generation;
++
++	/* see above flags */
++	unsigned char		au_si_status;
++
++	aufs_bindex_t		si_bend;
++
++	/* dirty trick to keep br_id plus */
++	unsigned int		si_last_br_id :
++				sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
++	struct au_branch	**si_branch;
++
++	/* policy to select a writable branch */
++	unsigned char		si_wbr_copyup;
++	unsigned char		si_wbr_create;
++	struct au_wbr_copyup_operations *si_wbr_copyup_ops;
++	struct au_wbr_create_operations *si_wbr_create_ops;
++
++	/* round robin */
++	atomic_t		si_wbr_rr_next;
++
++	/* most free space */
++	struct au_wbr_mfs	si_wbr_mfs;
++
++	/* mount flags */
++	/* include/asm-ia64/siginfo.h defines a macro named si_flags */
++	unsigned int		si_mntflags;
++
++	/* external inode number (bitmap and translation table) */
++	au_readf_t		si_xread;
++	au_writef_t		si_xwrite;
++	struct file		*si_xib;
++	struct mutex		si_xib_mtx; /* protect xib members */
++	unsigned long		*si_xib_buf;
++	unsigned long		si_xib_last_pindex;
++	int			si_xib_next_bit;
++	aufs_bindex_t		si_xino_brid;
++	/* reserved for future use */
++	/* unsigned long long	si_xib_limit; */	/* Max xib file size */
++
++#ifdef CONFIG_AUFS_EXPORT
++	/* i_generation */
++	struct file		*si_xigen;
++	atomic_t		si_xigen_next;
++#endif
++
++	/* vdir parameters */
++	unsigned long		si_rdcache;	/* max cache time in jiffies */
++	unsigned int		si_rdblk;	/* deblk size */
++	unsigned int		si_rdhash;	/* hash size */
++
++	/*
++	 * If the number of whiteouts are larger than si_dirwh, leave all of
++	 * them after au_whtmp_ren to reduce the cost of rmdir(2).
++	 * future fsck.aufs or kernel thread will remove them later.
++	 * Otherwise, remove all whiteouts and the dir in rmdir(2).
++	 */
++	unsigned int		si_dirwh;
++
++	/*
++	 * rename(2) a directory with all children.
++	 */
++	/* reserved for future use */
++	/* int			si_rendir; */
++
++	/* pseudo_link list */
++	struct au_splhead	si_plink;
++	wait_queue_head_t	si_plink_wq;
++	spinlock_t		si_plink_maint_lock;
++	pid_t			si_plink_maint_pid;
++
++	/*
++	 * sysfs and lifetime management.
++	 * this is not a small structure and it may be a waste of memory in case
++	 * of sysfs is disabled, particulary when many aufs-es are mounted.
++	 * but using sysfs is majority.
++	 */
++	struct kobject		si_kobj;
++#ifdef CONFIG_DEBUG_FS
++	struct dentry		 *si_dbgaufs, *si_dbgaufs_xib;
++#ifdef CONFIG_AUFS_EXPORT
++	struct dentry		 *si_dbgaufs_xigen;
++#endif
++#endif
++
++#ifdef CONFIG_AUFS_SBILIST
++	struct list_head	si_list;
++#endif
++
++	/* dirty, necessary for unmounting, sysfs and sysrq */
++	struct super_block	*si_sb;
++};
++
++/* sbinfo status flags */
++/*
++ * set true when refresh_dirs() failed at remount time.
++ * then try refreshing dirs at access time again.
++ * if it is false, refreshing dirs at access time is unnecesary
++ */
++#define AuSi_FAILED_REFRESH_DIR	1
++static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
++					   unsigned int flag)
++{
++	AuRwMustAnyLock(&sbi->si_rwsem);
++	return sbi->au_si_status & flag;
++}
++#define au_ftest_si(sbinfo, name)	au_do_ftest_si(sbinfo, AuSi_##name)
++#define au_fset_si(sbinfo, name) do { \
++	AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
++	(sbinfo)->au_si_status |= AuSi_##name; \
++} while (0)
++#define au_fclr_si(sbinfo, name) do { \
++	AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
++	(sbinfo)->au_si_status &= ~AuSi_##name; \
++} while (0)
++
++/* ---------------------------------------------------------------------- */
++
++/* policy to select one among writable branches */
++#define AuWbrCopyup(sbinfo, ...) \
++	((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
++#define AuWbrCreate(sbinfo, ...) \
++	((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
++
++/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
++#define AuLock_DW		1		/* write-lock dentry */
++#define AuLock_IR		(1 << 1)	/* read-lock inode */
++#define AuLock_IW		(1 << 2)	/* write-lock inode */
++#define AuLock_FLUSH		(1 << 3)	/* wait for 'nowait' tasks */
++#define AuLock_DIR		(1 << 4)	/* target is a dir */
++#define AuLock_NOPLM		(1 << 5)	/* return err in plm mode */
++#define AuLock_NOPLMW		(1 << 6)	/* wait for plm mode ends */
++#define AuLock_GEN		(1 << 7)	/* test digen/iigen */
++#define au_ftest_lock(flags, name)	((flags) & AuLock_##name)
++#define au_fset_lock(flags, name) \
++	do { (flags) |= AuLock_##name; } while (0)
++#define au_fclr_lock(flags, name) \
++	do { (flags) &= ~AuLock_##name; } while (0)
++
++/* ---------------------------------------------------------------------- */
++
++/* super.c */
++extern struct file_system_type aufs_fs_type;
++struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
++typedef unsigned long long (*au_arraycb_t)(void *array, unsigned long long max,
++					   void *arg);
++void au_array_free(void *array);
++void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg);
++struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
++void au_iarray_free(struct inode **a, unsigned long long max);
++
++/* sbinfo.c */
++void au_si_free(struct kobject *kobj);
++int au_si_alloc(struct super_block *sb);
++int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr);
++
++unsigned int au_sigen_inc(struct super_block *sb);
++aufs_bindex_t au_new_br_id(struct super_block *sb);
++
++int si_read_lock(struct super_block *sb, int flags);
++int si_write_lock(struct super_block *sb, int flags);
++int aufs_read_lock(struct dentry *dentry, int flags);
++void aufs_read_unlock(struct dentry *dentry, int flags);
++void aufs_write_lock(struct dentry *dentry);
++void aufs_write_unlock(struct dentry *dentry);
++int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
++void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
++
++int si_pid_test_slow(struct super_block *sb);
++void si_pid_set_slow(struct super_block *sb);
++void si_pid_clr_slow(struct super_block *sb);
++
++/* wbr_policy.c */
++extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
++extern struct au_wbr_create_operations au_wbr_create_ops[];
++int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
++
++/* ---------------------------------------------------------------------- */
++
++static inline struct au_sbinfo *au_sbi(struct super_block *sb)
++{
++	return sb->s_fs_info;
++}
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_EXPORT
++void au_export_init(struct super_block *sb);
++
++static inline int au_test_nfsd(void)
++{
++	struct task_struct *tsk = current;
++
++	return (tsk->flags & PF_KTHREAD)
++		&& !strcmp(tsk->comm, "nfsd");
++}
++
++void au_xigen_inc(struct inode *inode);
++int au_xigen_new(struct inode *inode);
++int au_xigen_set(struct super_block *sb, struct file *base);
++void au_xigen_clr(struct super_block *sb);
++
++static inline int au_busy_or_stale(void)
++{
++	if (!au_test_nfsd())
++		return -EBUSY;
++	return -ESTALE;
++}
++#else
++AuStubVoid(au_export_init, struct super_block *sb)
++AuStubInt0(au_test_nfsd, void)
++AuStubVoid(au_xigen_inc, struct inode *inode)
++AuStubInt0(au_xigen_new, struct inode *inode)
++AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
++AuStubVoid(au_xigen_clr, struct super_block *sb)
++static inline int au_busy_or_stale(void)
++{
++	return -EBUSY;
++}
++#endif /* CONFIG_AUFS_EXPORT */
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_SBILIST
++/* module.c */
++extern struct au_splhead au_sbilist;
++
++static inline void au_sbilist_init(void)
++{
++	au_spl_init(&au_sbilist);
++}
++
++static inline void au_sbilist_add(struct super_block *sb)
++{
++	au_spl_add(&au_sbi(sb)->si_list, &au_sbilist);
++}
++
++static inline void au_sbilist_del(struct super_block *sb)
++{
++	au_spl_del(&au_sbi(sb)->si_list, &au_sbilist);
++}
++
++#ifdef CONFIG_AUFS_MAGIC_SYSRQ
++static inline void au_sbilist_lock(void)
++{
++	spin_lock(&au_sbilist.spin);
++}
++
++static inline void au_sbilist_unlock(void)
++{
++	spin_unlock(&au_sbilist.spin);
++}
++#define AuGFP_SBILIST	GFP_ATOMIC
++#else
++AuStubVoid(au_sbilist_lock, void)
++AuStubVoid(au_sbilist_unlock, void)
++#define AuGFP_SBILIST	GFP_NOFS
++#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
++#else
++AuStubVoid(au_sbilist_init, void)
++AuStubVoid(au_sbilist_add, struct super_block*)
++AuStubVoid(au_sbilist_del, struct super_block*)
++AuStubVoid(au_sbilist_lock, void)
++AuStubVoid(au_sbilist_unlock, void)
++#define AuGFP_SBILIST	GFP_NOFS
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
++{
++	/*
++	 * This function is a dynamic '__init' fucntion actually,
++	 * so the tiny check for si_rwsem is unnecessary.
++	 */
++	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
++#ifdef CONFIG_DEBUG_FS
++	sbinfo->si_dbgaufs = NULL;
++	sbinfo->si_dbgaufs_xib = NULL;
++#ifdef CONFIG_AUFS_EXPORT
++	sbinfo->si_dbgaufs_xigen = NULL;
++#endif
++#endif
++}
++
++/* ---------------------------------------------------------------------- */
++
++static inline pid_t si_pid_bit(void)
++{
++	/* the origin of pid is 1, but the bitmap's is 0 */
++	return current->pid - 1;
++}
++
++static inline int si_pid_test(struct super_block *sb)
++{
++	pid_t bit = si_pid_bit();
++	if (bit < PID_MAX_DEFAULT)
++		return test_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
++	else
++		return si_pid_test_slow(sb);
++}
++
++static inline void si_pid_set(struct super_block *sb)
++{
++	pid_t bit = si_pid_bit();
++	if (bit < PID_MAX_DEFAULT) {
++		AuDebugOn(test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
++		set_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
++		/* smp_mb(); */
++	} else
++		si_pid_set_slow(sb);
++}
++
++static inline void si_pid_clr(struct super_block *sb)
++{
++	pid_t bit = si_pid_bit();
++	if (bit < PID_MAX_DEFAULT) {
++		AuDebugOn(!test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
++		clear_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
++		/* smp_mb(); */
++	} else
++		si_pid_clr_slow(sb);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* lock superblock. mainly for entry point functions */
++/*
++ * __si_read_lock, __si_write_lock,
++ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
++ */
++AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
++
++#define SiMustNoWaiters(sb)	AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
++#define SiMustAnyLock(sb)	AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
++#define SiMustWriteLock(sb)	AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
++
++static inline void si_noflush_read_lock(struct super_block *sb)
++{
++	__si_read_lock(sb);
++	si_pid_set(sb);
++}
++
++static inline int si_noflush_read_trylock(struct super_block *sb)
++{
++	int locked = __si_read_trylock(sb);
++	if (locked)
++		si_pid_set(sb);
++	return locked;
++}
++
++static inline void si_noflush_write_lock(struct super_block *sb)
++{
++	__si_write_lock(sb);
++	si_pid_set(sb);
++}
++
++static inline int si_noflush_write_trylock(struct super_block *sb)
++{
++	int locked = __si_write_trylock(sb);
++	if (locked)
++		si_pid_set(sb);
++	return locked;
++}
++
++#if 0 /* unused */
++static inline int si_read_trylock(struct super_block *sb, int flags)
++{
++	if (au_ftest_lock(flags, FLUSH))
++		au_nwt_flush(&au_sbi(sb)->si_nowait);
++	return si_noflush_read_trylock(sb);
++}
++#endif
++
++static inline void si_read_unlock(struct super_block *sb)
++{
++	si_pid_clr(sb);
++	__si_read_unlock(sb);
++}
++
++#if 0 /* unused */
++static inline int si_write_trylock(struct super_block *sb, int flags)
++{
++	if (au_ftest_lock(flags, FLUSH))
++		au_nwt_flush(&au_sbi(sb)->si_nowait);
++	return si_noflush_write_trylock(sb);
++}
++#endif
++
++static inline void si_write_unlock(struct super_block *sb)
++{
++	si_pid_clr(sb);
++	__si_write_unlock(sb);
++}
++
++#if 0 /* unused */
++static inline void si_downgrade_lock(struct super_block *sb)
++{
++	__si_downgrade_lock(sb);
++}
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++static inline aufs_bindex_t au_sbend(struct super_block *sb)
++{
++	SiMustAnyLock(sb);
++	return au_sbi(sb)->si_bend;
++}
++
++static inline unsigned int au_mntflags(struct super_block *sb)
++{
++	SiMustAnyLock(sb);
++	return au_sbi(sb)->si_mntflags;
++}
++
++static inline unsigned int au_sigen(struct super_block *sb)
++{
++	SiMustAnyLock(sb);
++	return au_sbi(sb)->si_generation;
++}
++
++static inline void au_ninodes_inc(struct super_block *sb)
++{
++	atomic_long_inc(&au_sbi(sb)->si_ninodes);
++}
++
++static inline void au_ninodes_dec(struct super_block *sb)
++{
++	AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_ninodes));
++	atomic_long_dec(&au_sbi(sb)->si_ninodes);
++}
++
++static inline void au_nfiles_inc(struct super_block *sb)
++{
++	atomic_long_inc(&au_sbi(sb)->si_nfiles);
++}
++
++static inline void au_nfiles_dec(struct super_block *sb)
++{
++	AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_nfiles));
++	atomic_long_dec(&au_sbi(sb)->si_nfiles);
++}
++
++static inline struct au_branch *au_sbr(struct super_block *sb,
++				       aufs_bindex_t bindex)
++{
++	SiMustAnyLock(sb);
++	return au_sbi(sb)->si_branch[0 + bindex];
++}
++
++static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
++{
++	SiMustWriteLock(sb);
++	au_sbi(sb)->si_xino_brid = brid;
++}
++
++static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
++{
++	SiMustAnyLock(sb);
++	return au_sbi(sb)->si_xino_brid;
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_SUPER_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/sysaufs.c linux-3.2.0-gentoo-r1/fs/aufs/sysaufs.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/sysaufs.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/sysaufs.c	2012-01-17 12:11:24.958454530 +0100
+@@ -0,0 +1,105 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * sysfs interface and lifetime management
++ * they are necessary regardless sysfs is disabled.
++ */
++
++#include <linux/random.h>
++#include "aufs.h"
++
++unsigned long sysaufs_si_mask;
++struct kset *sysaufs_kset;
++
++#define AuSiAttr(_name) { \
++	.attr   = { .name = __stringify(_name), .mode = 0444 },	\
++	.show   = sysaufs_si_##_name,				\
++}
++
++static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
++struct attribute *sysaufs_si_attrs[] = {
++	&sysaufs_si_attr_xi_path.attr,
++	NULL,
++};
++
++static const struct sysfs_ops au_sbi_ops = {
++	.show   = sysaufs_si_show
++};
++
++static struct kobj_type au_sbi_ktype = {
++	.release	= au_si_free,
++	.sysfs_ops	= &au_sbi_ops,
++	.default_attrs	= sysaufs_si_attrs
++};
++
++/* ---------------------------------------------------------------------- */
++
++int sysaufs_si_init(struct au_sbinfo *sbinfo)
++{
++	int err;
++
++	sbinfo->si_kobj.kset = sysaufs_kset;
++	/* cf. sysaufs_name() */
++	err = kobject_init_and_add
++		(&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
++		 SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
++
++	dbgaufs_si_null(sbinfo);
++	if (!err) {
++		err = dbgaufs_si_init(sbinfo);
++		if (unlikely(err))
++			kobject_put(&sbinfo->si_kobj);
++	}
++	return err;
++}
++
++void sysaufs_fin(void)
++{
++	dbgaufs_fin();
++	sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
++	kset_unregister(sysaufs_kset);
++}
++
++int __init sysaufs_init(void)
++{
++	int err;
++
++	do {
++		get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
++	} while (!sysaufs_si_mask);
++
++	err = -EINVAL;
++	sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
++	if (unlikely(!sysaufs_kset))
++		goto out;
++	err = PTR_ERR(sysaufs_kset);
++	if (IS_ERR(sysaufs_kset))
++		goto out;
++	err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
++	if (unlikely(err)) {
++		kset_unregister(sysaufs_kset);
++		goto out;
++	}
++
++	err = dbgaufs_init();
++	if (unlikely(err))
++		sysaufs_fin();
++out:
++	return err;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/sysaufs.h linux-3.2.0-gentoo-r1/fs/aufs/sysaufs.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/sysaufs.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/sysaufs.h	2012-01-17 12:11:24.963084201 +0100
+@@ -0,0 +1,104 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * sysfs interface and mount lifetime management
++ */
++
++#ifndef __SYSAUFS_H__
++#define __SYSAUFS_H__
++
++#ifdef __KERNEL__
++
++#include <linux/sysfs.h>
++#include "module.h"
++
++struct super_block;
++struct au_sbinfo;
++
++struct sysaufs_si_attr {
++	struct attribute attr;
++	int (*show)(struct seq_file *seq, struct super_block *sb);
++};
++
++/* ---------------------------------------------------------------------- */
++
++/* sysaufs.c */
++extern unsigned long sysaufs_si_mask;
++extern struct kset *sysaufs_kset;
++extern struct attribute *sysaufs_si_attrs[];
++int sysaufs_si_init(struct au_sbinfo *sbinfo);
++int __init sysaufs_init(void);
++void sysaufs_fin(void);
++
++/* ---------------------------------------------------------------------- */
++
++/* some people doesn't like to show a pointer in kernel */
++static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
++{
++	return sysaufs_si_mask ^ (unsigned long)sbinfo;
++}
++
++#define SysaufsSiNamePrefix	"si_"
++#define SysaufsSiNameLen	(sizeof(SysaufsSiNamePrefix) + 16)
++static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
++{
++	snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
++		 sysaufs_si_id(sbinfo));
++}
++
++struct au_branch;
++#ifdef CONFIG_SYSFS
++/* sysfs.c */
++extern struct attribute_group *sysaufs_attr_group;
++
++int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
++ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
++			 char *buf);
++
++void sysaufs_br_init(struct au_branch *br);
++void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
++void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
++
++#define sysaufs_brs_init()	do {} while (0)
++
++#else
++#define sysaufs_attr_group	NULL
++
++AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
++
++static inline
++ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
++			 char *buf)
++{
++	return 0;
++}
++
++AuStubVoid(sysaufs_br_init, struct au_branch *br)
++AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
++AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
++
++static inline void sysaufs_brs_init(void)
++{
++	sysaufs_brs = 0;
++}
++
++#endif /* CONFIG_SYSFS */
++
++#endif /* __KERNEL__ */
++#endif /* __SYSAUFS_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/sysfs.c linux-3.2.0-gentoo-r1/fs/aufs/sysfs.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/sysfs.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/sysfs.c	2012-01-17 12:11:24.983917722 +0100
+@@ -0,0 +1,257 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * sysfs interface
++ */
++
++#include <linux/seq_file.h>
++#include "aufs.h"
++
++#ifdef CONFIG_AUFS_FS_MODULE
++/* this entry violates the "one line per file" policy of sysfs */
++static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
++			   char *buf)
++{
++	ssize_t err;
++	static char *conf =
++/* this file is generated at compiling */
++#include "conf.str"
++		;
++
++	err = snprintf(buf, PAGE_SIZE, conf);
++	if (unlikely(err >= PAGE_SIZE))
++		err = -EFBIG;
++	return err;
++}
++
++static struct kobj_attribute au_config_attr = __ATTR_RO(config);
++#endif
++
++static struct attribute *au_attr[] = {
++#ifdef CONFIG_AUFS_FS_MODULE
++	&au_config_attr.attr,
++#endif
++	NULL,	/* need to NULL terminate the list of attributes */
++};
++
++static struct attribute_group sysaufs_attr_group_body = {
++	.attrs = au_attr
++};
++
++struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
++
++/* ---------------------------------------------------------------------- */
++
++int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
++{
++	int err;
++
++	SiMustAnyLock(sb);
++
++	err = 0;
++	if (au_opt_test(au_mntflags(sb), XINO)) {
++		err = au_xino_path(seq, au_sbi(sb)->si_xib);
++		seq_putc(seq, '\n');
++	}
++	return err;
++}
++
++/*
++ * the lifetime of branch is independent from the entry under sysfs.
++ * sysfs handles the lifetime of the entry, and never call ->show() after it is
++ * unlinked.
++ */
++static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
++			 aufs_bindex_t bindex)
++{
++	int err;
++	struct path path;
++	struct dentry *root;
++	struct au_branch *br;
++	char *perm;
++
++	AuDbg("b%d\n", bindex);
++
++	err = 0;
++	root = sb->s_root;
++	di_read_lock_parent(root, !AuLock_IR);
++	br = au_sbr(sb, bindex);
++	path.mnt = br->br_mnt;
++	path.dentry = au_h_dptr(root, bindex);
++	au_seq_path(seq, &path);
++	di_read_unlock(root, !AuLock_IR);
++	perm = au_optstr_br_perm(br->br_perm);
++	if (perm) {
++		err = seq_printf(seq, "=%s\n", perm);
++		kfree(perm);
++		if (err == -1)
++			err = -E2BIG;
++	} else
++		err = -ENOMEM;
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct seq_file *au_seq(char *p, ssize_t len)
++{
++	struct seq_file *seq;
++
++	seq = kzalloc(sizeof(*seq), GFP_NOFS);
++	if (seq) {
++		/* mutex_init(&seq.lock); */
++		seq->buf = p;
++		seq->size = len;
++		return seq; /* success */
++	}
++
++	seq = ERR_PTR(-ENOMEM);
++	return seq;
++}
++
++#define SysaufsBr_PREFIX "br"
++
++/* todo: file size may exceed PAGE_SIZE */
++ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
++			char *buf)
++{
++	ssize_t err;
++	long l;
++	aufs_bindex_t bend;
++	struct au_sbinfo *sbinfo;
++	struct super_block *sb;
++	struct seq_file *seq;
++	char *name;
++	struct attribute **cattr;
++
++	sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
++	sb = sbinfo->si_sb;
++
++	/*
++	 * prevent a race condition between sysfs and aufs.
++	 * for instance, sysfs_file_read() calls sysfs_get_active_two() which
++	 * prohibits maintaining the sysfs entries.
++	 * hew we acquire read lock after sysfs_get_active_two().
++	 * on the other hand, the remount process may maintain the sysfs/aufs
++	 * entries after acquiring write lock.
++	 * it can cause a deadlock.
++	 * simply we gave up processing read here.
++	 */
++	err = -EBUSY;
++	if (unlikely(!si_noflush_read_trylock(sb)))
++		goto out;
++
++	seq = au_seq(buf, PAGE_SIZE);
++	err = PTR_ERR(seq);
++	if (IS_ERR(seq))
++		goto out_unlock;
++
++	name = (void *)attr->name;
++	cattr = sysaufs_si_attrs;
++	while (*cattr) {
++		if (!strcmp(name, (*cattr)->name)) {
++			err = container_of(*cattr, struct sysaufs_si_attr, attr)
++				->show(seq, sb);
++			goto out_seq;
++		}
++		cattr++;
++	}
++
++	bend = au_sbend(sb);
++	if (!strncmp(name, SysaufsBr_PREFIX, sizeof(SysaufsBr_PREFIX) - 1)) {
++		name += sizeof(SysaufsBr_PREFIX) - 1;
++		err = kstrtol(name, 10, &l);
++		if (!err) {
++			if (l <= bend)
++				err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l);
++			else
++				err = -ENOENT;
++		}
++		goto out_seq;
++	}
++	BUG();
++
++out_seq:
++	if (!err) {
++		err = seq->count;
++		/* sysfs limit */
++		if (unlikely(err == PAGE_SIZE))
++			err = -EFBIG;
++	}
++	kfree(seq);
++out_unlock:
++	si_read_unlock(sb);
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void sysaufs_br_init(struct au_branch *br)
++{
++	struct attribute *attr = &br->br_attr;
++
++	sysfs_attr_init(attr);
++	attr->name = br->br_name;
++	attr->mode = S_IRUGO;
++}
++
++void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
++{
++	struct au_branch *br;
++	struct kobject *kobj;
++	aufs_bindex_t bend;
++
++	dbgaufs_brs_del(sb, bindex);
++
++	if (!sysaufs_brs)
++		return;
++
++	kobj = &au_sbi(sb)->si_kobj;
++	bend = au_sbend(sb);
++	for (; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		sysfs_remove_file(kobj, &br->br_attr);
++	}
++}
++
++void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
++{
++	int err;
++	aufs_bindex_t bend;
++	struct kobject *kobj;
++	struct au_branch *br;
++
++	dbgaufs_brs_add(sb, bindex);
++
++	if (!sysaufs_brs)
++		return;
++
++	kobj = &au_sbi(sb)->si_kobj;
++	bend = au_sbend(sb);
++	for (; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		snprintf(br->br_name, sizeof(br->br_name), SysaufsBr_PREFIX
++			 "%d", bindex);
++		err = sysfs_create_file(kobj, &br->br_attr);
++		if (unlikely(err))
++			pr_warning("failed %s under sysfs(%d)\n",
++				   br->br_name, err);
++	}
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/sysrq.c linux-3.2.0-gentoo-r1/fs/aufs/sysrq.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/sysrq.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/sysrq.c	2012-01-17 12:11:25.009380916 +0100
+@@ -0,0 +1,148 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * magic sysrq hanlder
++ */
++
++/* #include <linux/sysrq.h> */
++#include <linux/writeback.h>
++#include "aufs.h"
++
++/* ---------------------------------------------------------------------- */
++
++static void sysrq_sb(struct super_block *sb)
++{
++	char *plevel;
++	struct au_sbinfo *sbinfo;
++	struct file *file;
++
++	plevel = au_plevel;
++	au_plevel = KERN_WARNING;
++
++	sbinfo = au_sbi(sb);
++	/* since we define pr_fmt, call printk directly */
++	printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
++	printk(KERN_WARNING AUFS_NAME ": superblock\n");
++	au_dpri_sb(sb);
++
++#if 0
++	printk(KERN_WARNING AUFS_NAME ": root dentry\n");
++	au_dpri_dentry(sb->s_root);
++	printk(KERN_WARNING AUFS_NAME ": root inode\n");
++	au_dpri_inode(sb->s_root->d_inode);
++#endif
++
++#if 0
++	do {
++		int err, i, j, ndentry;
++		struct au_dcsub_pages dpages;
++		struct au_dpage *dpage;
++
++		err = au_dpages_init(&dpages, GFP_ATOMIC);
++		if (unlikely(err))
++			break;
++		err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
++		if (!err)
++			for (i = 0; i < dpages.ndpage; i++) {
++				dpage = dpages.dpages + i;
++				ndentry = dpage->ndentry;
++				for (j = 0; j < ndentry; j++)
++					au_dpri_dentry(dpage->dentries[j]);
++			}
++		au_dpages_free(&dpages);
++	} while (0);
++#endif
++
++#if 1
++	{
++		struct inode *i;
++		printk(KERN_WARNING AUFS_NAME ": isolated inode\n");
++		spin_lock(&inode_sb_list_lock);
++		list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
++			spin_lock(&i->i_lock);
++			if (1 || list_empty(&i->i_dentry))
++				au_dpri_inode(i);
++			spin_unlock(&i->i_lock);
++		}
++		spin_unlock(&inode_sb_list_lock);
++	}
++#endif
++	printk(KERN_WARNING AUFS_NAME ": files\n");
++	lg_global_lock(files_lglock);
++	do_file_list_for_each_entry(sb, file) {
++		umode_t mode;
++		mode = file->f_dentry->d_inode->i_mode;
++		if (!special_file(mode) || au_special_file(mode))
++			au_dpri_file(file);
++	} while_file_list_for_each_entry;
++	lg_global_unlock(files_lglock);
++	printk(KERN_WARNING AUFS_NAME ": done\n");
++
++	au_plevel = plevel;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* module parameter */
++static char *aufs_sysrq_key = "a";
++module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
++MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
++
++static void au_sysrq(int key __maybe_unused)
++{
++	struct au_sbinfo *sbinfo;
++
++	lockdep_off();
++	au_sbilist_lock();
++	list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
++		sysrq_sb(sbinfo->si_sb);
++	au_sbilist_unlock();
++	lockdep_on();
++}
++
++static struct sysrq_key_op au_sysrq_op = {
++	.handler	= au_sysrq,
++	.help_msg	= "Aufs",
++	.action_msg	= "Aufs",
++	.enable_mask	= SYSRQ_ENABLE_DUMP
++};
++
++/* ---------------------------------------------------------------------- */
++
++int __init au_sysrq_init(void)
++{
++	int err;
++	char key;
++
++	err = -1;
++	key = *aufs_sysrq_key;
++	if ('a' <= key && key <= 'z')
++		err = register_sysrq_key(key, &au_sysrq_op);
++	if (unlikely(err))
++		pr_err("err %d, sysrq=%c\n", err, key);
++	return err;
++}
++
++void au_sysrq_fin(void)
++{
++	int err;
++	err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
++	if (unlikely(err))
++		pr_err("err %d (ignored)\n", err);
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/vdir.c linux-3.2.0-gentoo-r1/fs/aufs/vdir.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/vdir.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/vdir.c	2012-01-17 12:11:25.027899601 +0100
+@@ -0,0 +1,885 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * virtual or vertical directory
++ */
++
++#include "aufs.h"
++
++static unsigned int calc_size(int nlen)
++{
++	return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
++}
++
++static int set_deblk_end(union au_vdir_deblk_p *p,
++			 union au_vdir_deblk_p *deblk_end)
++{
++	if (calc_size(0) <= deblk_end->deblk - p->deblk) {
++		p->de->de_str.len = 0;
++		/* smp_mb(); */
++		return 0;
++	}
++	return -1; /* error */
++}
++
++/* returns true or false */
++static int is_deblk_end(union au_vdir_deblk_p *p,
++			union au_vdir_deblk_p *deblk_end)
++{
++	if (calc_size(0) <= deblk_end->deblk - p->deblk)
++		return !p->de->de_str.len;
++	return 1;
++}
++
++static unsigned char *last_deblk(struct au_vdir *vdir)
++{
++	return vdir->vd_deblk[vdir->vd_nblk - 1];
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* estimate the apropriate size for name hash table */
++unsigned int au_rdhash_est(loff_t sz)
++{
++	unsigned int n;
++
++	n = UINT_MAX;
++	sz >>= 10;
++	if (sz < n)
++		n = sz;
++	if (sz < AUFS_RDHASH_DEF)
++		n = AUFS_RDHASH_DEF;
++	/* pr_info("n %u\n", n); */
++	return n;
++}
++
++/*
++ * the allocated memory has to be freed by
++ * au_nhash_wh_free() or au_nhash_de_free().
++ */
++int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
++{
++	struct hlist_head *head;
++	unsigned int u;
++
++	head = kmalloc(sizeof(*nhash->nh_head) * num_hash, gfp);
++	if (head) {
++		nhash->nh_num = num_hash;
++		nhash->nh_head = head;
++		for (u = 0; u < num_hash; u++)
++			INIT_HLIST_HEAD(head++);
++		return 0; /* success */
++	}
++
++	return -ENOMEM;
++}
++
++static void nhash_count(struct hlist_head *head)
++{
++#if 0
++	unsigned long n;
++	struct hlist_node *pos;
++
++	n = 0;
++	hlist_for_each(pos, head)
++		n++;
++	pr_info("%lu\n", n);
++#endif
++}
++
++static void au_nhash_wh_do_free(struct hlist_head *head)
++{
++	struct au_vdir_wh *tpos;
++	struct hlist_node *pos, *node;
++
++	hlist_for_each_entry_safe(tpos, pos, node, head, wh_hash) {
++		/* hlist_del(pos); */
++		kfree(tpos);
++	}
++}
++
++static void au_nhash_de_do_free(struct hlist_head *head)
++{
++	struct au_vdir_dehstr *tpos;
++	struct hlist_node *pos, *node;
++
++	hlist_for_each_entry_safe(tpos, pos, node, head, hash) {
++		/* hlist_del(pos); */
++		au_cache_free_vdir_dehstr(tpos);
++	}
++}
++
++static void au_nhash_do_free(struct au_nhash *nhash,
++			     void (*free)(struct hlist_head *head))
++{
++	unsigned int n;
++	struct hlist_head *head;
++
++	n = nhash->nh_num;
++	if (!n)
++		return;
++
++	head = nhash->nh_head;
++	while (n-- > 0) {
++		nhash_count(head);
++		free(head++);
++	}
++	kfree(nhash->nh_head);
++}
++
++void au_nhash_wh_free(struct au_nhash *whlist)
++{
++	au_nhash_do_free(whlist, au_nhash_wh_do_free);
++}
++
++static void au_nhash_de_free(struct au_nhash *delist)
++{
++	au_nhash_do_free(delist, au_nhash_de_do_free);
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
++			    int limit)
++{
++	int num;
++	unsigned int u, n;
++	struct hlist_head *head;
++	struct au_vdir_wh *tpos;
++	struct hlist_node *pos;
++
++	num = 0;
++	n = whlist->nh_num;
++	head = whlist->nh_head;
++	for (u = 0; u < n; u++, head++)
++		hlist_for_each_entry(tpos, pos, head, wh_hash)
++			if (tpos->wh_bindex == btgt && ++num > limit)
++				return 1;
++	return 0;
++}
++
++static struct hlist_head *au_name_hash(struct au_nhash *nhash,
++				       unsigned char *name,
++				       unsigned int len)
++{
++	unsigned int v;
++	/* const unsigned int magic_bit = 12; */
++
++	AuDebugOn(!nhash->nh_num || !nhash->nh_head);
++
++	v = 0;
++	while (len--)
++		v += *name++;
++	/* v = hash_long(v, magic_bit); */
++	v %= nhash->nh_num;
++	return nhash->nh_head + v;
++}
++
++static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
++			      int nlen)
++{
++	return str->len == nlen && !memcmp(str->name, name, nlen);
++}
++
++/* returns found or not */
++int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
++{
++	struct hlist_head *head;
++	struct au_vdir_wh *tpos;
++	struct hlist_node *pos;
++	struct au_vdir_destr *str;
++
++	head = au_name_hash(whlist, name, nlen);
++	hlist_for_each_entry(tpos, pos, head, wh_hash) {
++		str = &tpos->wh_str;
++		AuDbg("%.*s\n", str->len, str->name);
++		if (au_nhash_test_name(str, name, nlen))
++			return 1;
++	}
++	return 0;
++}
++
++/* returns found(true) or not */
++static int test_known(struct au_nhash *delist, char *name, int nlen)
++{
++	struct hlist_head *head;
++	struct au_vdir_dehstr *tpos;
++	struct hlist_node *pos;
++	struct au_vdir_destr *str;
++
++	head = au_name_hash(delist, name, nlen);
++	hlist_for_each_entry(tpos, pos, head, hash) {
++		str = tpos->str;
++		AuDbg("%.*s\n", str->len, str->name);
++		if (au_nhash_test_name(str, name, nlen))
++			return 1;
++	}
++	return 0;
++}
++
++static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
++			    unsigned char d_type)
++{
++#ifdef CONFIG_AUFS_SHWH
++	wh->wh_ino = ino;
++	wh->wh_type = d_type;
++#endif
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
++		       unsigned int d_type, aufs_bindex_t bindex,
++		       unsigned char shwh)
++{
++	int err;
++	struct au_vdir_destr *str;
++	struct au_vdir_wh *wh;
++
++	AuDbg("%.*s\n", nlen, name);
++	AuDebugOn(!whlist->nh_num || !whlist->nh_head);
++
++	err = -ENOMEM;
++	wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
++	if (unlikely(!wh))
++		goto out;
++
++	err = 0;
++	wh->wh_bindex = bindex;
++	if (shwh)
++		au_shwh_init_wh(wh, ino, d_type);
++	str = &wh->wh_str;
++	str->len = nlen;
++	memcpy(str->name, name, nlen);
++	hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
++	/* smp_mb(); */
++
++out:
++	return err;
++}
++
++static int append_deblk(struct au_vdir *vdir)
++{
++	int err;
++	unsigned long ul;
++	const unsigned int deblk_sz = vdir->vd_deblk_sz;
++	union au_vdir_deblk_p p, deblk_end;
++	unsigned char **o;
++
++	err = -ENOMEM;
++	o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
++		     GFP_NOFS);
++	if (unlikely(!o))
++		goto out;
++
++	vdir->vd_deblk = o;
++	p.deblk = kmalloc(deblk_sz, GFP_NOFS);
++	if (p.deblk) {
++		ul = vdir->vd_nblk++;
++		vdir->vd_deblk[ul] = p.deblk;
++		vdir->vd_last.ul = ul;
++		vdir->vd_last.p.deblk = p.deblk;
++		deblk_end.deblk = p.deblk + deblk_sz;
++		err = set_deblk_end(&p, &deblk_end);
++	}
++
++out:
++	return err;
++}
++
++static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
++		     unsigned int d_type, struct au_nhash *delist)
++{
++	int err;
++	unsigned int sz;
++	const unsigned int deblk_sz = vdir->vd_deblk_sz;
++	union au_vdir_deblk_p p, *room, deblk_end;
++	struct au_vdir_dehstr *dehstr;
++
++	p.deblk = last_deblk(vdir);
++	deblk_end.deblk = p.deblk + deblk_sz;
++	room = &vdir->vd_last.p;
++	AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
++		  || !is_deblk_end(room, &deblk_end));
++
++	sz = calc_size(nlen);
++	if (unlikely(sz > deblk_end.deblk - room->deblk)) {
++		err = append_deblk(vdir);
++		if (unlikely(err))
++			goto out;
++
++		p.deblk = last_deblk(vdir);
++		deblk_end.deblk = p.deblk + deblk_sz;
++		/* smp_mb(); */
++		AuDebugOn(room->deblk != p.deblk);
++	}
++
++	err = -ENOMEM;
++	dehstr = au_cache_alloc_vdir_dehstr();
++	if (unlikely(!dehstr))
++		goto out;
++
++	dehstr->str = &room->de->de_str;
++	hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
++	room->de->de_ino = ino;
++	room->de->de_type = d_type;
++	room->de->de_str.len = nlen;
++	memcpy(room->de->de_str.name, name, nlen);
++
++	err = 0;
++	room->deblk += sz;
++	if (unlikely(set_deblk_end(room, &deblk_end)))
++		err = append_deblk(vdir);
++	/* smp_mb(); */
++
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_vdir_free(struct au_vdir *vdir)
++{
++	unsigned char **deblk;
++
++	deblk = vdir->vd_deblk;
++	while (vdir->vd_nblk--)
++		kfree(*deblk++);
++	kfree(vdir->vd_deblk);
++	au_cache_free_vdir(vdir);
++}
++
++static struct au_vdir *alloc_vdir(struct file *file)
++{
++	struct au_vdir *vdir;
++	struct super_block *sb;
++	int err;
++
++	sb = file->f_dentry->d_sb;
++	SiMustAnyLock(sb);
++
++	err = -ENOMEM;
++	vdir = au_cache_alloc_vdir();
++	if (unlikely(!vdir))
++		goto out;
++
++	vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
++	if (unlikely(!vdir->vd_deblk))
++		goto out_free;
++
++	vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
++	if (!vdir->vd_deblk_sz) {
++		/* estimate the apropriate size for deblk */
++		vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
++		/* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
++	}
++	vdir->vd_nblk = 0;
++	vdir->vd_version = 0;
++	vdir->vd_jiffy = 0;
++	err = append_deblk(vdir);
++	if (!err)
++		return vdir; /* success */
++
++	kfree(vdir->vd_deblk);
++
++out_free:
++	au_cache_free_vdir(vdir);
++out:
++	vdir = ERR_PTR(err);
++	return vdir;
++}
++
++static int reinit_vdir(struct au_vdir *vdir)
++{
++	int err;
++	union au_vdir_deblk_p p, deblk_end;
++
++	while (vdir->vd_nblk > 1) {
++		kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
++		/* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
++		vdir->vd_nblk--;
++	}
++	p.deblk = vdir->vd_deblk[0];
++	deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
++	err = set_deblk_end(&p, &deblk_end);
++	/* keep vd_dblk_sz */
++	vdir->vd_last.ul = 0;
++	vdir->vd_last.p.deblk = vdir->vd_deblk[0];
++	vdir->vd_version = 0;
++	vdir->vd_jiffy = 0;
++	/* smp_mb(); */
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++#define AuFillVdir_CALLED	1
++#define AuFillVdir_WHABLE	(1 << 1)
++#define AuFillVdir_SHWH		(1 << 2)
++#define au_ftest_fillvdir(flags, name)	((flags) & AuFillVdir_##name)
++#define au_fset_fillvdir(flags, name) \
++	do { (flags) |= AuFillVdir_##name; } while (0)
++#define au_fclr_fillvdir(flags, name) \
++	do { (flags) &= ~AuFillVdir_##name; } while (0)
++
++#ifndef CONFIG_AUFS_SHWH
++#undef AuFillVdir_SHWH
++#define AuFillVdir_SHWH		0
++#endif
++
++struct fillvdir_arg {
++	struct file		*file;
++	struct au_vdir		*vdir;
++	struct au_nhash		delist;
++	struct au_nhash		whlist;
++	aufs_bindex_t		bindex;
++	unsigned int		flags;
++	int			err;
++};
++
++static int fillvdir(void *__arg, const char *__name, int nlen,
++		    loff_t offset __maybe_unused, u64 h_ino,
++		    unsigned int d_type)
++{
++	struct fillvdir_arg *arg = __arg;
++	char *name = (void *)__name;
++	struct super_block *sb;
++	ino_t ino;
++	const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
++
++	arg->err = 0;
++	sb = arg->file->f_dentry->d_sb;
++	au_fset_fillvdir(arg->flags, CALLED);
++	/* smp_mb(); */
++	if (nlen <= AUFS_WH_PFX_LEN
++	    || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
++		if (test_known(&arg->delist, name, nlen)
++		    || au_nhash_test_known_wh(&arg->whlist, name, nlen))
++			goto out; /* already exists or whiteouted */
++
++		sb = arg->file->f_dentry->d_sb;
++		arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
++		if (!arg->err) {
++			if (unlikely(nlen > AUFS_MAX_NAMELEN))
++				d_type = DT_UNKNOWN;
++			arg->err = append_de(arg->vdir, name, nlen, ino,
++					     d_type, &arg->delist);
++		}
++	} else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
++		name += AUFS_WH_PFX_LEN;
++		nlen -= AUFS_WH_PFX_LEN;
++		if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
++			goto out; /* already whiteouted */
++
++		if (shwh)
++			arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
++					     &ino);
++		if (!arg->err) {
++			if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
++				d_type = DT_UNKNOWN;
++			arg->err = au_nhash_append_wh
++				(&arg->whlist, name, nlen, ino, d_type,
++				 arg->bindex, shwh);
++		}
++	}
++
++out:
++	if (!arg->err)
++		arg->vdir->vd_jiffy = jiffies;
++	/* smp_mb(); */
++	AuTraceErr(arg->err);
++	return arg->err;
++}
++
++static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
++			  struct au_nhash *whlist, struct au_nhash *delist)
++{
++#ifdef CONFIG_AUFS_SHWH
++	int err;
++	unsigned int nh, u;
++	struct hlist_head *head;
++	struct au_vdir_wh *tpos;
++	struct hlist_node *pos, *n;
++	char *p, *o;
++	struct au_vdir_destr *destr;
++
++	AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
++
++	err = -ENOMEM;
++	o = p = __getname_gfp(GFP_NOFS);
++	if (unlikely(!p))
++		goto out;
++
++	err = 0;
++	nh = whlist->nh_num;
++	memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
++	p += AUFS_WH_PFX_LEN;
++	for (u = 0; u < nh; u++) {
++		head = whlist->nh_head + u;
++		hlist_for_each_entry_safe(tpos, pos, n, head, wh_hash) {
++			destr = &tpos->wh_str;
++			memcpy(p, destr->name, destr->len);
++			err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
++					tpos->wh_ino, tpos->wh_type, delist);
++			if (unlikely(err))
++				break;
++		}
++	}
++
++	__putname(o);
++
++out:
++	AuTraceErr(err);
++	return err;
++#else
++	return 0;
++#endif
++}
++
++static int au_do_read_vdir(struct fillvdir_arg *arg)
++{
++	int err;
++	unsigned int rdhash;
++	loff_t offset;
++	aufs_bindex_t bend, bindex, bstart;
++	unsigned char shwh;
++	struct file *hf, *file;
++	struct super_block *sb;
++
++	file = arg->file;
++	sb = file->f_dentry->d_sb;
++	SiMustAnyLock(sb);
++
++	rdhash = au_sbi(sb)->si_rdhash;
++	if (!rdhash)
++		rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
++	err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
++	if (unlikely(err))
++		goto out;
++	err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
++	if (unlikely(err))
++		goto out_delist;
++
++	err = 0;
++	arg->flags = 0;
++	shwh = 0;
++	if (au_opt_test(au_mntflags(sb), SHWH)) {
++		shwh = 1;
++		au_fset_fillvdir(arg->flags, SHWH);
++	}
++	bstart = au_fbstart(file);
++	bend = au_fbend_dir(file);
++	for (bindex = bstart; !err && bindex <= bend; bindex++) {
++		hf = au_hf_dir(file, bindex);
++		if (!hf)
++			continue;
++
++		offset = vfsub_llseek(hf, 0, SEEK_SET);
++		err = offset;
++		if (unlikely(offset))
++			break;
++
++		arg->bindex = bindex;
++		au_fclr_fillvdir(arg->flags, WHABLE);
++		if (shwh
++		    || (bindex != bend
++			&& au_br_whable(au_sbr_perm(sb, bindex))))
++			au_fset_fillvdir(arg->flags, WHABLE);
++		do {
++			arg->err = 0;
++			au_fclr_fillvdir(arg->flags, CALLED);
++			/* smp_mb(); */
++			err = vfsub_readdir(hf, fillvdir, arg);
++			if (err >= 0)
++				err = arg->err;
++		} while (!err && au_ftest_fillvdir(arg->flags, CALLED));
++	}
++
++	if (!err && shwh)
++		err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
++
++	au_nhash_wh_free(&arg->whlist);
++
++out_delist:
++	au_nhash_de_free(&arg->delist);
++out:
++	return err;
++}
++
++static int read_vdir(struct file *file, int may_read)
++{
++	int err;
++	unsigned long expire;
++	unsigned char do_read;
++	struct fillvdir_arg arg;
++	struct inode *inode;
++	struct au_vdir *vdir, *allocated;
++
++	err = 0;
++	inode = file->f_dentry->d_inode;
++	IMustLock(inode);
++	SiMustAnyLock(inode->i_sb);
++
++	allocated = NULL;
++	do_read = 0;
++	expire = au_sbi(inode->i_sb)->si_rdcache;
++	vdir = au_ivdir(inode);
++	if (!vdir) {
++		do_read = 1;
++		vdir = alloc_vdir(file);
++		err = PTR_ERR(vdir);
++		if (IS_ERR(vdir))
++			goto out;
++		err = 0;
++		allocated = vdir;
++	} else if (may_read
++		   && (inode->i_version != vdir->vd_version
++		       || time_after(jiffies, vdir->vd_jiffy + expire))) {
++		do_read = 1;
++		err = reinit_vdir(vdir);
++		if (unlikely(err))
++			goto out;
++	}
++
++	if (!do_read)
++		return 0; /* success */
++
++	arg.file = file;
++	arg.vdir = vdir;
++	err = au_do_read_vdir(&arg);
++	if (!err) {
++		/* file->f_pos = 0; */
++		vdir->vd_version = inode->i_version;
++		vdir->vd_last.ul = 0;
++		vdir->vd_last.p.deblk = vdir->vd_deblk[0];
++		if (allocated)
++			au_set_ivdir(inode, allocated);
++	} else if (allocated)
++		au_vdir_free(allocated);
++
++out:
++	return err;
++}
++
++static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
++{
++	int err, rerr;
++	unsigned long ul, n;
++	const unsigned int deblk_sz = src->vd_deblk_sz;
++
++	AuDebugOn(tgt->vd_nblk != 1);
++
++	err = -ENOMEM;
++	if (tgt->vd_nblk < src->vd_nblk) {
++		unsigned char **p;
++
++		p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
++			     GFP_NOFS);
++		if (unlikely(!p))
++			goto out;
++		tgt->vd_deblk = p;
++	}
++
++	if (tgt->vd_deblk_sz != deblk_sz) {
++		unsigned char *p;
++
++		tgt->vd_deblk_sz = deblk_sz;
++		p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS);
++		if (unlikely(!p))
++			goto out;
++		tgt->vd_deblk[0] = p;
++	}
++	memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
++	tgt->vd_version = src->vd_version;
++	tgt->vd_jiffy = src->vd_jiffy;
++
++	n = src->vd_nblk;
++	for (ul = 1; ul < n; ul++) {
++		tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
++					    GFP_NOFS);
++		if (unlikely(!tgt->vd_deblk[ul]))
++			goto out;
++		tgt->vd_nblk++;
++	}
++	tgt->vd_nblk = n;
++	tgt->vd_last.ul = tgt->vd_last.ul;
++	tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
++	tgt->vd_last.p.deblk += src->vd_last.p.deblk
++		- src->vd_deblk[src->vd_last.ul];
++	/* smp_mb(); */
++	return 0; /* success */
++
++out:
++	rerr = reinit_vdir(tgt);
++	BUG_ON(rerr);
++	return err;
++}
++
++int au_vdir_init(struct file *file)
++{
++	int err;
++	struct inode *inode;
++	struct au_vdir *vdir_cache, *allocated;
++
++	err = read_vdir(file, !file->f_pos);
++	if (unlikely(err))
++		goto out;
++
++	allocated = NULL;
++	vdir_cache = au_fvdir_cache(file);
++	if (!vdir_cache) {
++		vdir_cache = alloc_vdir(file);
++		err = PTR_ERR(vdir_cache);
++		if (IS_ERR(vdir_cache))
++			goto out;
++		allocated = vdir_cache;
++	} else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
++		err = reinit_vdir(vdir_cache);
++		if (unlikely(err))
++			goto out;
++	} else
++		return 0; /* success */
++
++	inode = file->f_dentry->d_inode;
++	err = copy_vdir(vdir_cache, au_ivdir(inode));
++	if (!err) {
++		file->f_version = inode->i_version;
++		if (allocated)
++			au_set_fvdir_cache(file, allocated);
++	} else if (allocated)
++		au_vdir_free(allocated);
++
++out:
++	return err;
++}
++
++static loff_t calc_offset(struct au_vdir *vdir)
++{
++	loff_t offset;
++	union au_vdir_deblk_p p;
++
++	p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
++	offset = vdir->vd_last.p.deblk - p.deblk;
++	offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
++	return offset;
++}
++
++/* returns true or false */
++static int seek_vdir(struct file *file)
++{
++	int valid;
++	unsigned int deblk_sz;
++	unsigned long ul, n;
++	loff_t offset;
++	union au_vdir_deblk_p p, deblk_end;
++	struct au_vdir *vdir_cache;
++
++	valid = 1;
++	vdir_cache = au_fvdir_cache(file);
++	offset = calc_offset(vdir_cache);
++	AuDbg("offset %lld\n", offset);
++	if (file->f_pos == offset)
++		goto out;
++
++	vdir_cache->vd_last.ul = 0;
++	vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
++	if (!file->f_pos)
++		goto out;
++
++	valid = 0;
++	deblk_sz = vdir_cache->vd_deblk_sz;
++	ul = div64_u64(file->f_pos, deblk_sz);
++	AuDbg("ul %lu\n", ul);
++	if (ul >= vdir_cache->vd_nblk)
++		goto out;
++
++	n = vdir_cache->vd_nblk;
++	for (; ul < n; ul++) {
++		p.deblk = vdir_cache->vd_deblk[ul];
++		deblk_end.deblk = p.deblk + deblk_sz;
++		offset = ul;
++		offset *= deblk_sz;
++		while (!is_deblk_end(&p, &deblk_end) && offset < file->f_pos) {
++			unsigned int l;
++
++			l = calc_size(p.de->de_str.len);
++			offset += l;
++			p.deblk += l;
++		}
++		if (!is_deblk_end(&p, &deblk_end)) {
++			valid = 1;
++			vdir_cache->vd_last.ul = ul;
++			vdir_cache->vd_last.p = p;
++			break;
++		}
++	}
++
++out:
++	/* smp_mb(); */
++	AuTraceErr(!valid);
++	return valid;
++}
++
++int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir)
++{
++	int err;
++	unsigned int l, deblk_sz;
++	union au_vdir_deblk_p deblk_end;
++	struct au_vdir *vdir_cache;
++	struct au_vdir_de *de;
++
++	vdir_cache = au_fvdir_cache(file);
++	if (!seek_vdir(file))
++		return 0;
++
++	deblk_sz = vdir_cache->vd_deblk_sz;
++	while (1) {
++		deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
++		deblk_end.deblk += deblk_sz;
++		while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
++			de = vdir_cache->vd_last.p.de;
++			AuDbg("%.*s, off%lld, i%lu, dt%d\n",
++			      de->de_str.len, de->de_str.name, file->f_pos,
++			      (unsigned long)de->de_ino, de->de_type);
++			err = filldir(dirent, de->de_str.name, de->de_str.len,
++				      file->f_pos, de->de_ino, de->de_type);
++			if (unlikely(err)) {
++				AuTraceErr(err);
++				/* todo: ignore the error caused by udba? */
++				/* return err; */
++				return 0;
++			}
++
++			l = calc_size(de->de_str.len);
++			vdir_cache->vd_last.p.deblk += l;
++			file->f_pos += l;
++		}
++		if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
++			vdir_cache->vd_last.ul++;
++			vdir_cache->vd_last.p.deblk
++				= vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
++			file->f_pos = deblk_sz * vdir_cache->vd_last.ul;
++			continue;
++		}
++		break;
++	}
++
++	/* smp_mb(); */
++	return 0;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/vfsub.c linux-3.2.0-gentoo-r1/fs/aufs/vfsub.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/vfsub.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/vfsub.c	2012-01-17 12:11:25.032529271 +0100
+@@ -0,0 +1,835 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * sub-routines for VFS
++ */
++
++#include <linux/ima.h>
++#include <linux/namei.h>
++#include <linux/security.h>
++#include <linux/splice.h>
++#include "aufs.h"
++
++int vfsub_update_h_iattr(struct path *h_path, int *did)
++{
++	int err;
++	struct kstat st;
++	struct super_block *h_sb;
++
++	/* for remote fs, leave work for its getattr or d_revalidate */
++	/* for bad i_attr fs, handle them in aufs_getattr() */
++	/* still some fs may acquire i_mutex. we need to skip them */
++	err = 0;
++	if (!did)
++		did = &err;
++	h_sb = h_path->dentry->d_sb;
++	*did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
++	if (*did)
++		err = vfs_getattr(h_path->mnt, h_path->dentry, &st);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct file *vfsub_dentry_open(struct path *path, int flags)
++{
++	struct file *file;
++
++	path_get(path);
++	file = dentry_open(path->dentry, path->mnt,
++			   flags /* | __FMODE_NONOTIFY */,
++			   current_cred());
++	if (!IS_ERR_OR_NULL(file)
++	    && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
++		i_readcount_inc(path->dentry->d_inode);
++
++	return file;
++}
++
++struct file *vfsub_filp_open(const char *path, int oflags, int mode)
++{
++	struct file *file;
++
++	lockdep_off();
++	file = filp_open(path,
++			 oflags /* | __FMODE_NONOTIFY */,
++			 mode);
++	lockdep_on();
++	if (IS_ERR(file))
++		goto out;
++	vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
++
++out:
++	return file;
++}
++
++int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
++{
++	int err;
++
++	err = kern_path(name, flags, path);
++	if (!err && path->dentry->d_inode)
++		vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
++	return err;
++}
++
++struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
++				    int len)
++{
++	struct path path = {
++		.mnt = NULL
++	};
++
++	/* VFS checks it too, but by WARN_ON_ONCE() */
++	IMustLock(parent->d_inode);
++
++	path.dentry = lookup_one_len(name, parent, len);
++	if (IS_ERR(path.dentry))
++		goto out;
++	if (path.dentry->d_inode)
++		vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
++
++out:
++	AuTraceErrPtr(path.dentry);
++	return path.dentry;
++}
++
++struct dentry *vfsub_lookup_hash(struct nameidata *nd)
++{
++	struct path path = {
++		.mnt = nd->path.mnt
++	};
++
++	IMustLock(nd->path.dentry->d_inode);
++
++	path.dentry = lookup_hash(nd);
++	if (IS_ERR(path.dentry))
++		goto out;
++	if (path.dentry->d_inode)
++		vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
++
++out:
++	AuTraceErrPtr(path.dentry);
++	return path.dentry;
++}
++
++/*
++ * this is "VFS:__lookup_one_len()" which was removed and merged into
++ * VFS:lookup_one_len() by the commit.
++ *	6a96ba5 2011-03-14 kill __lookup_one_len()
++ * this function should always be equivalent to the corresponding part in
++ * VFS:lookup_one_len().
++ */
++int vfsub_name_hash(const char *name, struct qstr *this, int len)
++{
++	unsigned long hash;
++	unsigned int c;
++
++	this->name = name;
++	this->len = len;
++	if (!len)
++		return -EACCES;
++
++	hash = init_name_hash();
++	while (len--) {
++		c = *(const unsigned char *)name++;
++		if (c == '/' || c == '\0')
++			return -EACCES;
++		hash = partial_name_hash(c, hash);
++	}
++	this->hash = end_name_hash(hash);
++	return 0;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
++				 struct dentry *d2, struct au_hinode *hdir2)
++{
++	struct dentry *d;
++
++	lockdep_off();
++	d = lock_rename(d1, d2);
++	lockdep_on();
++	au_hn_suspend(hdir1);
++	if (hdir1 != hdir2)
++		au_hn_suspend(hdir2);
++
++	return d;
++}
++
++void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
++			 struct dentry *d2, struct au_hinode *hdir2)
++{
++	au_hn_resume(hdir1);
++	if (hdir1 != hdir2)
++		au_hn_resume(hdir2);
++	lockdep_off();
++	unlock_rename(d1, d2);
++	lockdep_on();
++}
++
++/* ---------------------------------------------------------------------- */
++
++int vfsub_create(struct inode *dir, struct path *path, int mode)
++{
++	int err;
++	struct dentry *d;
++
++	IMustLock(dir);
++
++	d = path->dentry;
++	path->dentry = d->d_parent;
++	err = security_path_mknod(path, d, mode, 0);
++	path->dentry = d;
++	if (unlikely(err))
++		goto out;
++
++	if (au_test_fs_null_nd(dir->i_sb))
++		err = vfs_create(dir, path->dentry, mode, NULL);
++	else {
++		struct nameidata h_nd;
++
++		memset(&h_nd, 0, sizeof(h_nd));
++		h_nd.flags = LOOKUP_CREATE;
++		h_nd.intent.open.flags = O_CREAT
++			| vfsub_fmode_to_uint(FMODE_READ);
++		h_nd.intent.open.create_mode = mode;
++		h_nd.path.dentry = path->dentry->d_parent;
++		h_nd.path.mnt = path->mnt;
++		path_get(&h_nd.path);
++		err = vfs_create(dir, path->dentry, mode, &h_nd);
++		path_put(&h_nd.path);
++	}
++
++	if (!err) {
++		struct path tmp = *path;
++		int did;
++
++		vfsub_update_h_iattr(&tmp, &did);
++		if (did) {
++			tmp.dentry = path->dentry->d_parent;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++		}
++		/*ignore*/
++	}
++
++out:
++	return err;
++}
++
++int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
++{
++	int err;
++	struct dentry *d;
++
++	IMustLock(dir);
++
++	d = path->dentry;
++	path->dentry = d->d_parent;
++	err = security_path_symlink(path, d, symname);
++	path->dentry = d;
++	if (unlikely(err))
++		goto out;
++
++	err = vfs_symlink(dir, path->dentry, symname);
++	if (!err) {
++		struct path tmp = *path;
++		int did;
++
++		vfsub_update_h_iattr(&tmp, &did);
++		if (did) {
++			tmp.dentry = path->dentry->d_parent;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++		}
++		/*ignore*/
++	}
++
++out:
++	return err;
++}
++
++int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
++{
++	int err;
++	struct dentry *d;
++
++	IMustLock(dir);
++
++	d = path->dentry;
++	path->dentry = d->d_parent;
++	err = security_path_mknod(path, d, mode, new_encode_dev(dev));
++	path->dentry = d;
++	if (unlikely(err))
++		goto out;
++
++	err = vfs_mknod(dir, path->dentry, mode, dev);
++	if (!err) {
++		struct path tmp = *path;
++		int did;
++
++		vfsub_update_h_iattr(&tmp, &did);
++		if (did) {
++			tmp.dentry = path->dentry->d_parent;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++		}
++		/*ignore*/
++	}
++
++out:
++	return err;
++}
++
++static int au_test_nlink(struct inode *inode)
++{
++	const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
++
++	if (!au_test_fs_no_limit_nlink(inode->i_sb)
++	    || inode->i_nlink < link_max)
++		return 0;
++	return -EMLINK;
++}
++
++int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path)
++{
++	int err;
++	struct dentry *d;
++
++	IMustLock(dir);
++
++	err = au_test_nlink(src_dentry->d_inode);
++	if (unlikely(err))
++		return err;
++
++	d = path->dentry;
++	path->dentry = d->d_parent;
++	err = security_path_link(src_dentry, path, d);
++	path->dentry = d;
++	if (unlikely(err))
++		goto out;
++
++	lockdep_off();
++	err = vfs_link(src_dentry, dir, path->dentry);
++	lockdep_on();
++	if (!err) {
++		struct path tmp = *path;
++		int did;
++
++		/* fuse has different memory inode for the same inumber */
++		vfsub_update_h_iattr(&tmp, &did);
++		if (did) {
++			tmp.dentry = path->dentry->d_parent;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++			tmp.dentry = src_dentry;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++		}
++		/*ignore*/
++	}
++
++out:
++	return err;
++}
++
++int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
++		 struct inode *dir, struct path *path)
++{
++	int err;
++	struct path tmp = {
++		.mnt	= path->mnt
++	};
++	struct dentry *d;
++
++	IMustLock(dir);
++	IMustLock(src_dir);
++
++	d = path->dentry;
++	path->dentry = d->d_parent;
++	tmp.dentry = src_dentry->d_parent;
++	err = security_path_rename(&tmp, src_dentry, path, d);
++	path->dentry = d;
++	if (unlikely(err))
++		goto out;
++
++	lockdep_off();
++	err = vfs_rename(src_dir, src_dentry, dir, path->dentry);
++	lockdep_on();
++	if (!err) {
++		int did;
++
++		tmp.dentry = d->d_parent;
++		vfsub_update_h_iattr(&tmp, &did);
++		if (did) {
++			tmp.dentry = src_dentry;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++			tmp.dentry = src_dentry->d_parent;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++		}
++		/*ignore*/
++	}
++
++out:
++	return err;
++}
++
++int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
++{
++	int err;
++	struct dentry *d;
++
++	IMustLock(dir);
++
++	d = path->dentry;
++	path->dentry = d->d_parent;
++	err = security_path_mkdir(path, d, mode);
++	path->dentry = d;
++	if (unlikely(err))
++		goto out;
++
++	err = vfs_mkdir(dir, path->dentry, mode);
++	if (!err) {
++		struct path tmp = *path;
++		int did;
++
++		vfsub_update_h_iattr(&tmp, &did);
++		if (did) {
++			tmp.dentry = path->dentry->d_parent;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++		}
++		/*ignore*/
++	}
++
++out:
++	return err;
++}
++
++int vfsub_rmdir(struct inode *dir, struct path *path)
++{
++	int err;
++	struct dentry *d;
++
++	IMustLock(dir);
++
++	d = path->dentry;
++	path->dentry = d->d_parent;
++	err = security_path_rmdir(path, d);
++	path->dentry = d;
++	if (unlikely(err))
++		goto out;
++
++	lockdep_off();
++	err = vfs_rmdir(dir, path->dentry);
++	lockdep_on();
++	if (!err) {
++		struct path tmp = {
++			.dentry	= path->dentry->d_parent,
++			.mnt	= path->mnt
++		};
++
++		vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
++	}
++
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* todo: support mmap_sem? */
++ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
++		     loff_t *ppos)
++{
++	ssize_t err;
++
++	lockdep_off();
++	err = vfs_read(file, ubuf, count, ppos);
++	lockdep_on();
++	if (err >= 0)
++		vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
++	return err;
++}
++
++/* todo: kernel_read()? */
++ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
++		     loff_t *ppos)
++{
++	ssize_t err;
++	mm_segment_t oldfs;
++	union {
++		void *k;
++		char __user *u;
++	} buf;
++
++	buf.k = kbuf;
++	oldfs = get_fs();
++	set_fs(KERNEL_DS);
++	err = vfsub_read_u(file, buf.u, count, ppos);
++	set_fs(oldfs);
++	return err;
++}
++
++ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
++		      loff_t *ppos)
++{
++	ssize_t err;
++
++	lockdep_off();
++	err = vfs_write(file, ubuf, count, ppos);
++	lockdep_on();
++	if (err >= 0)
++		vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
++	return err;
++}
++
++ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
++{
++	ssize_t err;
++	mm_segment_t oldfs;
++	union {
++		void *k;
++		const char __user *u;
++	} buf;
++
++	buf.k = kbuf;
++	oldfs = get_fs();
++	set_fs(KERNEL_DS);
++	err = vfsub_write_u(file, buf.u, count, ppos);
++	set_fs(oldfs);
++	return err;
++}
++
++int vfsub_flush(struct file *file, fl_owner_t id)
++{
++	int err;
++
++	err = 0;
++	if (file->f_op && file->f_op->flush) {
++		if (!au_test_nfs(file->f_dentry->d_sb))
++			err = file->f_op->flush(file, id);
++		else {
++			lockdep_off();
++			err = file->f_op->flush(file, id);
++			lockdep_on();
++		}
++		if (!err)
++			vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
++		/*ignore*/
++	}
++	return err;
++}
++
++int vfsub_readdir(struct file *file, filldir_t filldir, void *arg)
++{
++	int err;
++
++	lockdep_off();
++	err = vfs_readdir(file, filldir, arg);
++	lockdep_on();
++	if (err >= 0)
++		vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
++	return err;
++}
++
++long vfsub_splice_to(struct file *in, loff_t *ppos,
++		     struct pipe_inode_info *pipe, size_t len,
++		     unsigned int flags)
++{
++	long err;
++
++	lockdep_off();
++	err = do_splice_to(in, ppos, pipe, len, flags);
++	lockdep_on();
++	file_accessed(in);
++	if (err >= 0)
++		vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
++	return err;
++}
++
++long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
++		       loff_t *ppos, size_t len, unsigned int flags)
++{
++	long err;
++
++	lockdep_off();
++	err = do_splice_from(pipe, out, ppos, len, flags);
++	lockdep_on();
++	if (err >= 0)
++		vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
++	return err;
++}
++
++int vfsub_fsync(struct file *file, struct path *path, int datasync)
++{
++	int err;
++
++	/* file can be NULL */
++	lockdep_off();
++	err = vfs_fsync(file, datasync);
++	lockdep_on();
++	if (!err) {
++		if (!path) {
++			AuDebugOn(!file);
++			path = &file->f_path;
++		}
++		vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
++	}
++	return err;
++}
++
++/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
++int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
++		struct file *h_file)
++{
++	int err;
++	struct inode *h_inode;
++
++	h_inode = h_path->dentry->d_inode;
++	if (!h_file) {
++		err = mnt_want_write(h_path->mnt);
++		if (err)
++			goto out;
++		err = inode_permission(h_inode, MAY_WRITE);
++		if (err)
++			goto out_mnt;
++		err = get_write_access(h_inode);
++		if (err)
++			goto out_mnt;
++		err = break_lease(h_inode, O_WRONLY);
++		if (err)
++			goto out_inode;
++	}
++
++	err = locks_verify_truncate(h_inode, h_file, length);
++	if (!err)
++		err = security_path_truncate(h_path);
++	if (!err) {
++		lockdep_off();
++		err = do_truncate(h_path->dentry, length, attr, h_file);
++		lockdep_on();
++	}
++
++out_inode:
++	if (!h_file)
++		put_write_access(h_inode);
++out_mnt:
++	if (!h_file)
++		mnt_drop_write(h_path->mnt);
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct au_vfsub_mkdir_args {
++	int *errp;
++	struct inode *dir;
++	struct path *path;
++	int mode;
++};
++
++static void au_call_vfsub_mkdir(void *args)
++{
++	struct au_vfsub_mkdir_args *a = args;
++	*a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
++}
++
++int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
++{
++	int err, do_sio, wkq_err;
++
++	do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
++	if (!do_sio)
++		err = vfsub_mkdir(dir, path, mode);
++	else {
++		struct au_vfsub_mkdir_args args = {
++			.errp	= &err,
++			.dir	= dir,
++			.path	= path,
++			.mode	= mode
++		};
++		wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++
++	return err;
++}
++
++struct au_vfsub_rmdir_args {
++	int *errp;
++	struct inode *dir;
++	struct path *path;
++};
++
++static void au_call_vfsub_rmdir(void *args)
++{
++	struct au_vfsub_rmdir_args *a = args;
++	*a->errp = vfsub_rmdir(a->dir, a->path);
++}
++
++int vfsub_sio_rmdir(struct inode *dir, struct path *path)
++{
++	int err, do_sio, wkq_err;
++
++	do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
++	if (!do_sio)
++		err = vfsub_rmdir(dir, path);
++	else {
++		struct au_vfsub_rmdir_args args = {
++			.errp	= &err,
++			.dir	= dir,
++			.path	= path
++		};
++		wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct notify_change_args {
++	int *errp;
++	struct path *path;
++	struct iattr *ia;
++};
++
++static void call_notify_change(void *args)
++{
++	struct notify_change_args *a = args;
++	struct inode *h_inode;
++
++	h_inode = a->path->dentry->d_inode;
++	IMustLock(h_inode);
++
++	*a->errp = -EPERM;
++	if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
++		*a->errp = notify_change(a->path->dentry, a->ia);
++		if (!*a->errp)
++			vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
++	}
++	AuTraceErr(*a->errp);
++}
++
++int vfsub_notify_change(struct path *path, struct iattr *ia)
++{
++	int err;
++	struct notify_change_args args = {
++		.errp	= &err,
++		.path	= path,
++		.ia	= ia
++	};
++
++	call_notify_change(&args);
++
++	return err;
++}
++
++int vfsub_sio_notify_change(struct path *path, struct iattr *ia)
++{
++	int err, wkq_err;
++	struct notify_change_args args = {
++		.errp	= &err,
++		.path	= path,
++		.ia	= ia
++	};
++
++	wkq_err = au_wkq_wait(call_notify_change, &args);
++	if (unlikely(wkq_err))
++		err = wkq_err;
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct unlink_args {
++	int *errp;
++	struct inode *dir;
++	struct path *path;
++};
++
++static void call_unlink(void *args)
++{
++	struct unlink_args *a = args;
++	struct dentry *d = a->path->dentry;
++	struct inode *h_inode;
++	const int stop_sillyrename = (au_test_nfs(d->d_sb)
++				      && d->d_count == 1);
++
++	IMustLock(a->dir);
++
++	a->path->dentry = d->d_parent;
++	*a->errp = security_path_unlink(a->path, d);
++	a->path->dentry = d;
++	if (unlikely(*a->errp))
++		return;
++
++	if (!stop_sillyrename)
++		dget(d);
++	h_inode = d->d_inode;
++	if (h_inode)
++		ihold(h_inode);
++
++	lockdep_off();
++	*a->errp = vfs_unlink(a->dir, d);
++	lockdep_on();
++	if (!*a->errp) {
++		struct path tmp = {
++			.dentry = d->d_parent,
++			.mnt	= a->path->mnt
++		};
++		vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
++	}
++
++	if (!stop_sillyrename)
++		dput(d);
++	if (h_inode)
++		iput(h_inode);
++
++	AuTraceErr(*a->errp);
++}
++
++/*
++ * @dir: must be locked.
++ * @dentry: target dentry.
++ */
++int vfsub_unlink(struct inode *dir, struct path *path, int force)
++{
++	int err;
++	struct unlink_args args = {
++		.errp	= &err,
++		.dir	= dir,
++		.path	= path
++	};
++
++	if (!force)
++		call_unlink(&args);
++	else {
++		int wkq_err;
++
++		wkq_err = au_wkq_wait(call_unlink, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++
++	return err;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/vfsub.h linux-3.2.0-gentoo-r1/fs/aufs/vfsub.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/vfsub.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/vfsub.h	2012-01-17 12:11:25.044103451 +0100
+@@ -0,0 +1,232 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * sub-routines for VFS
++ */
++
++#ifndef __AUFS_VFSUB_H__
++#define __AUFS_VFSUB_H__
++
++#ifdef __KERNEL__
++
++#include <linux/fs.h>
++#include <linux/lglock.h>
++#include "debug.h"
++
++/* copied from linux/fs/internal.h */
++/* todo: BAD approach!! */
++DECLARE_BRLOCK(vfsmount_lock);
++extern void file_sb_list_del(struct file *f);
++extern spinlock_t inode_sb_list_lock;
++
++/* copied from linux/fs/file_table.c */
++DECLARE_LGLOCK(files_lglock);
++#ifdef CONFIG_SMP
++/*
++ * These macros iterate all files on all CPUs for a given superblock.
++ * files_lglock must be held globally.
++ */
++#define do_file_list_for_each_entry(__sb, __file)		\
++{								\
++	int i;							\
++	for_each_possible_cpu(i) {				\
++		struct list_head *list;				\
++		list = per_cpu_ptr((__sb)->s_files, i);		\
++		list_for_each_entry((__file), list, f_u.fu_list)
++
++#define while_file_list_for_each_entry				\
++	}							\
++}
++
++#else
++
++#define do_file_list_for_each_entry(__sb, __file)		\
++{								\
++	struct list_head *list;					\
++	list = &(sb)->s_files;					\
++	list_for_each_entry((__file), list, f_u.fu_list)
++
++#define while_file_list_for_each_entry				\
++}
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++/* lock subclass for lower inode */
++/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
++/* reduce? gave up. */
++enum {
++	AuLsc_I_Begin = I_MUTEX_QUOTA, /* 4 */
++	AuLsc_I_PARENT,		/* lower inode, parent first */
++	AuLsc_I_PARENT2,	/* copyup dirs */
++	AuLsc_I_PARENT3,	/* copyup wh */
++	AuLsc_I_CHILD,
++	AuLsc_I_CHILD2,
++	AuLsc_I_End
++};
++
++/* to debug easier, do not make them inlined functions */
++#define MtxMustLock(mtx)	AuDebugOn(!mutex_is_locked(mtx))
++#define IMustLock(i)		MtxMustLock(&(i)->i_mutex)
++
++/* ---------------------------------------------------------------------- */
++
++static inline void vfsub_drop_nlink(struct inode *inode)
++{
++	AuDebugOn(!inode->i_nlink);
++	drop_nlink(inode);
++}
++
++static inline void vfsub_dead_dir(struct inode *inode)
++{
++	AuDebugOn(!S_ISDIR(inode->i_mode));
++	inode->i_flags |= S_DEAD;
++	clear_nlink(inode);
++}
++
++/* ---------------------------------------------------------------------- */
++
++int vfsub_update_h_iattr(struct path *h_path, int *did);
++struct file *vfsub_dentry_open(struct path *path, int flags);
++struct file *vfsub_filp_open(const char *path, int oflags, int mode);
++int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
++struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
++				    int len);
++struct dentry *vfsub_lookup_hash(struct nameidata *nd);
++int vfsub_name_hash(const char *name, struct qstr *this, int len);
++
++/* ---------------------------------------------------------------------- */
++
++struct au_hinode;
++struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
++				 struct dentry *d2, struct au_hinode *hdir2);
++void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
++			 struct dentry *d2, struct au_hinode *hdir2);
++
++int vfsub_create(struct inode *dir, struct path *path, int mode);
++int vfsub_symlink(struct inode *dir, struct path *path,
++		  const char *symname);
++int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
++int vfsub_link(struct dentry *src_dentry, struct inode *dir,
++	       struct path *path);
++int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
++		 struct inode *hdir, struct path *path);
++int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
++int vfsub_rmdir(struct inode *dir, struct path *path);
++
++/* ---------------------------------------------------------------------- */
++
++ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
++		     loff_t *ppos);
++ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
++			loff_t *ppos);
++ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
++		      loff_t *ppos);
++ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
++		      loff_t *ppos);
++int vfsub_flush(struct file *file, fl_owner_t id);
++int vfsub_readdir(struct file *file, filldir_t filldir, void *arg);
++
++static inline unsigned int vfsub_file_flags(struct file *file)
++{
++	unsigned int flags;
++
++	spin_lock(&file->f_lock);
++	flags = file->f_flags;
++	spin_unlock(&file->f_lock);
++
++	return flags;
++}
++
++static inline void vfsub_file_accessed(struct file *h_file)
++{
++	file_accessed(h_file);
++	vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
++}
++
++static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
++				     struct dentry *h_dentry)
++{
++	struct path h_path = {
++		.dentry	= h_dentry,
++		.mnt	= h_mnt
++	};
++	touch_atime(h_mnt, h_dentry);
++	vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
++}
++
++long vfsub_splice_to(struct file *in, loff_t *ppos,
++		     struct pipe_inode_info *pipe, size_t len,
++		     unsigned int flags);
++long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
++		       loff_t *ppos, size_t len, unsigned int flags);
++int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
++		struct file *h_file);
++int vfsub_fsync(struct file *file, struct path *path, int datasync);
++
++/* ---------------------------------------------------------------------- */
++
++static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
++{
++	loff_t err;
++
++	lockdep_off();
++	err = vfs_llseek(file, offset, origin);
++	lockdep_on();
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* dirty workaround for strict type of fmode_t */
++union vfsub_fmu {
++	fmode_t fm;
++	unsigned int ui;
++};
++
++static inline unsigned int vfsub_fmode_to_uint(fmode_t fm)
++{
++	union vfsub_fmu u = {
++		.fm = fm
++	};
++
++	BUILD_BUG_ON(sizeof(u.fm) != sizeof(u.ui));
++
++	return u.ui;
++}
++
++static inline fmode_t vfsub_uint_to_fmode(unsigned int ui)
++{
++	union vfsub_fmu u = {
++		.ui = ui
++	};
++
++	return u.fm;
++}
++
++/* ---------------------------------------------------------------------- */
++
++int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
++int vfsub_sio_rmdir(struct inode *dir, struct path *path);
++int vfsub_sio_notify_change(struct path *path, struct iattr *ia);
++int vfsub_notify_change(struct path *path, struct iattr *ia);
++int vfsub_unlink(struct inode *dir, struct path *path, int force);
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_VFSUB_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/wbr_policy.c linux-3.2.0-gentoo-r1/fs/aufs/wbr_policy.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/wbr_policy.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/wbr_policy.c	2012-01-17 12:11:25.044103451 +0100
+@@ -0,0 +1,700 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * policies for selecting one among multiple writable branches
++ */
++
++#include <linux/statfs.h>
++#include "aufs.h"
++
++/* subset of cpup_attr() */
++static noinline_for_stack
++int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
++{
++	int err, sbits;
++	struct iattr ia;
++	struct inode *h_isrc;
++
++	h_isrc = h_src->d_inode;
++	ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
++	ia.ia_mode = h_isrc->i_mode;
++	ia.ia_uid = h_isrc->i_uid;
++	ia.ia_gid = h_isrc->i_gid;
++	sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
++	au_cpup_attr_flags(h_path->dentry->d_inode, h_isrc);
++	err = vfsub_sio_notify_change(h_path, &ia);
++
++	/* is this nfs only? */
++	if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
++		ia.ia_valid = ATTR_FORCE | ATTR_MODE;
++		ia.ia_mode = h_isrc->i_mode;
++		err = vfsub_sio_notify_change(h_path, &ia);
++	}
++
++	return err;
++}
++
++#define AuCpdown_PARENT_OPQ	1
++#define AuCpdown_WHED		(1 << 1)
++#define AuCpdown_MADE_DIR	(1 << 2)
++#define AuCpdown_DIROPQ		(1 << 3)
++#define au_ftest_cpdown(flags, name)	((flags) & AuCpdown_##name)
++#define au_fset_cpdown(flags, name) \
++	do { (flags) |= AuCpdown_##name; } while (0)
++#define au_fclr_cpdown(flags, name) \
++	do { (flags) &= ~AuCpdown_##name; } while (0)
++
++struct au_cpdown_dir_args {
++	struct dentry *parent;
++	unsigned int flags;
++};
++
++static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
++			     struct au_cpdown_dir_args *a)
++{
++	int err;
++	struct dentry *opq_dentry;
++
++	opq_dentry = au_diropq_create(dentry, bdst);
++	err = PTR_ERR(opq_dentry);
++	if (IS_ERR(opq_dentry))
++		goto out;
++	dput(opq_dentry);
++	au_fset_cpdown(a->flags, DIROPQ);
++
++out:
++	return err;
++}
++
++static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
++			    struct inode *dir, aufs_bindex_t bdst)
++{
++	int err;
++	struct path h_path;
++	struct au_branch *br;
++
++	br = au_sbr(dentry->d_sb, bdst);
++	h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
++	err = PTR_ERR(h_path.dentry);
++	if (IS_ERR(h_path.dentry))
++		goto out;
++
++	err = 0;
++	if (h_path.dentry->d_inode) {
++		h_path.mnt = br->br_mnt;
++		err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
++					  dentry);
++	}
++	dput(h_path.dentry);
++
++out:
++	return err;
++}
++
++static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
++			 struct dentry *h_parent, void *arg)
++{
++	int err, rerr;
++	aufs_bindex_t bopq, bstart;
++	struct path h_path;
++	struct dentry *parent;
++	struct inode *h_dir, *h_inode, *inode, *dir;
++	struct au_cpdown_dir_args *args = arg;
++
++	bstart = au_dbstart(dentry);
++	/* dentry is di-locked */
++	parent = dget_parent(dentry);
++	dir = parent->d_inode;
++	h_dir = h_parent->d_inode;
++	AuDebugOn(h_dir != au_h_iptr(dir, bdst));
++	IMustLock(h_dir);
++
++	err = au_lkup_neg(dentry, bdst);
++	if (unlikely(err < 0))
++		goto out;
++	h_path.dentry = au_h_dptr(dentry, bdst);
++	h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
++	err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
++			      S_IRWXU | S_IRUGO | S_IXUGO);
++	if (unlikely(err))
++		goto out_put;
++	au_fset_cpdown(args->flags, MADE_DIR);
++
++	bopq = au_dbdiropq(dentry);
++	au_fclr_cpdown(args->flags, WHED);
++	au_fclr_cpdown(args->flags, DIROPQ);
++	if (au_dbwh(dentry) == bdst)
++		au_fset_cpdown(args->flags, WHED);
++	if (!au_ftest_cpdown(args->flags, PARENT_OPQ) && bopq <= bdst)
++		au_fset_cpdown(args->flags, PARENT_OPQ);
++	h_inode = h_path.dentry->d_inode;
++	mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++	if (au_ftest_cpdown(args->flags, WHED)) {
++		err = au_cpdown_dir_opq(dentry, bdst, args);
++		if (unlikely(err)) {
++			mutex_unlock(&h_inode->i_mutex);
++			goto out_dir;
++		}
++	}
++
++	err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
++	mutex_unlock(&h_inode->i_mutex);
++	if (unlikely(err))
++		goto out_opq;
++
++	if (au_ftest_cpdown(args->flags, WHED)) {
++		err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
++		if (unlikely(err))
++			goto out_opq;
++	}
++
++	inode = dentry->d_inode;
++	if (au_ibend(inode) < bdst)
++		au_set_ibend(inode, bdst);
++	au_set_h_iptr(inode, bdst, au_igrab(h_inode),
++		      au_hi_flags(inode, /*isdir*/1));
++	goto out; /* success */
++
++	/* revert */
++out_opq:
++	if (au_ftest_cpdown(args->flags, DIROPQ)) {
++		mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++		rerr = au_diropq_remove(dentry, bdst);
++		mutex_unlock(&h_inode->i_mutex);
++		if (unlikely(rerr)) {
++			AuIOErr("failed removing diropq for %.*s b%d (%d)\n",
++				AuDLNPair(dentry), bdst, rerr);
++			err = -EIO;
++			goto out;
++		}
++	}
++out_dir:
++	if (au_ftest_cpdown(args->flags, MADE_DIR)) {
++		rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
++		if (unlikely(rerr)) {
++			AuIOErr("failed removing %.*s b%d (%d)\n",
++				AuDLNPair(dentry), bdst, rerr);
++			err = -EIO;
++		}
++	}
++out_put:
++	au_set_h_dptr(dentry, bdst, NULL);
++	if (au_dbend(dentry) == bdst)
++		au_update_dbend(dentry);
++out:
++	dput(parent);
++	return err;
++}
++
++int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
++{
++	int err;
++	struct au_cpdown_dir_args args = {
++		.parent	= dget_parent(dentry),
++		.flags	= 0
++	};
++
++	err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &args);
++	dput(args.parent);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* policies for create */
++
++static int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	int err, i, j, ndentry;
++	aufs_bindex_t bopq;
++	struct au_dcsub_pages dpages;
++	struct au_dpage *dpage;
++	struct dentry **dentries, *parent, *d;
++
++	err = au_dpages_init(&dpages, GFP_NOFS);
++	if (unlikely(err))
++		goto out;
++	parent = dget_parent(dentry);
++	err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
++	if (unlikely(err))
++		goto out_free;
++
++	err = bindex;
++	for (i = 0; i < dpages.ndpage; i++) {
++		dpage = dpages.dpages + i;
++		dentries = dpage->dentries;
++		ndentry = dpage->ndentry;
++		for (j = 0; j < ndentry; j++) {
++			d = dentries[j];
++			di_read_lock_parent2(d, !AuLock_IR);
++			bopq = au_dbdiropq(d);
++			di_read_unlock(d, !AuLock_IR);
++			if (bopq >= 0 && bopq < err)
++				err = bopq;
++		}
++	}
++
++out_free:
++	dput(parent);
++	au_dpages_free(&dpages);
++out:
++	return err;
++}
++
++static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
++{
++	for (; bindex >= 0; bindex--)
++		if (!au_br_rdonly(au_sbr(sb, bindex)))
++			return bindex;
++	return -EROFS;
++}
++
++/* top down parent */
++static int au_wbr_create_tdp(struct dentry *dentry, int isdir __maybe_unused)
++{
++	int err;
++	aufs_bindex_t bstart, bindex;
++	struct super_block *sb;
++	struct dentry *parent, *h_parent;
++
++	sb = dentry->d_sb;
++	bstart = au_dbstart(dentry);
++	err = bstart;
++	if (!au_br_rdonly(au_sbr(sb, bstart)))
++		goto out;
++
++	err = -EROFS;
++	parent = dget_parent(dentry);
++	for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
++		h_parent = au_h_dptr(parent, bindex);
++		if (!h_parent || !h_parent->d_inode)
++			continue;
++
++		if (!au_br_rdonly(au_sbr(sb, bindex))) {
++			err = bindex;
++			break;
++		}
++	}
++	dput(parent);
++
++	/* bottom up here */
++	if (unlikely(err < 0)) {
++		err = au_wbr_bu(sb, bstart - 1);
++		if (err >= 0)
++			err = au_wbr_nonopq(dentry, err);
++	}
++
++out:
++	AuDbg("b%d\n", err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* an exception for the policy other than tdp */
++static int au_wbr_create_exp(struct dentry *dentry)
++{
++	int err;
++	aufs_bindex_t bwh, bdiropq;
++	struct dentry *parent;
++
++	err = -1;
++	bwh = au_dbwh(dentry);
++	parent = dget_parent(dentry);
++	bdiropq = au_dbdiropq(parent);
++	if (bwh >= 0) {
++		if (bdiropq >= 0)
++			err = min(bdiropq, bwh);
++		else
++			err = bwh;
++		AuDbg("%d\n", err);
++	} else if (bdiropq >= 0) {
++		err = bdiropq;
++		AuDbg("%d\n", err);
++	}
++	dput(parent);
++
++	if (err >= 0)
++		err = au_wbr_nonopq(dentry, err);
++
++	if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
++		err = -1;
++
++	AuDbg("%d\n", err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* round robin */
++static int au_wbr_create_init_rr(struct super_block *sb)
++{
++	int err;
++
++	err = au_wbr_bu(sb, au_sbend(sb));
++	atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
++	/* smp_mb(); */
++
++	AuDbg("b%d\n", err);
++	return err;
++}
++
++static int au_wbr_create_rr(struct dentry *dentry, int isdir)
++{
++	int err, nbr;
++	unsigned int u;
++	aufs_bindex_t bindex, bend;
++	struct super_block *sb;
++	atomic_t *next;
++
++	err = au_wbr_create_exp(dentry);
++	if (err >= 0)
++		goto out;
++
++	sb = dentry->d_sb;
++	next = &au_sbi(sb)->si_wbr_rr_next;
++	bend = au_sbend(sb);
++	nbr = bend + 1;
++	for (bindex = 0; bindex <= bend; bindex++) {
++		if (!isdir) {
++			err = atomic_dec_return(next) + 1;
++			/* modulo for 0 is meaningless */
++			if (unlikely(!err))
++				err = atomic_dec_return(next) + 1;
++		} else
++			err = atomic_read(next);
++		AuDbg("%d\n", err);
++		u = err;
++		err = u % nbr;
++		AuDbg("%d\n", err);
++		if (!au_br_rdonly(au_sbr(sb, err)))
++			break;
++		err = -EROFS;
++	}
++
++	if (err >= 0)
++		err = au_wbr_nonopq(dentry, err);
++
++out:
++	AuDbg("%d\n", err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* most free space */
++static void au_mfs(struct dentry *dentry)
++{
++	struct super_block *sb;
++	struct au_branch *br;
++	struct au_wbr_mfs *mfs;
++	aufs_bindex_t bindex, bend;
++	int err;
++	unsigned long long b, bavail;
++	struct path h_path;
++	/* reduce the stack usage */
++	struct kstatfs *st;
++
++	st = kmalloc(sizeof(*st), GFP_NOFS);
++	if (unlikely(!st)) {
++		AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
++		return;
++	}
++
++	bavail = 0;
++	sb = dentry->d_sb;
++	mfs = &au_sbi(sb)->si_wbr_mfs;
++	MtxMustLock(&mfs->mfs_lock);
++	mfs->mfs_bindex = -EROFS;
++	mfs->mfsrr_bytes = 0;
++	bend = au_sbend(sb);
++	for (bindex = 0; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		if (au_br_rdonly(br))
++			continue;
++
++		/* sb->s_root for NFS is unreliable */
++		h_path.mnt = br->br_mnt;
++		h_path.dentry = h_path.mnt->mnt_root;
++		err = vfs_statfs(&h_path, st);
++		if (unlikely(err)) {
++			AuWarn1("failed statfs, b%d, %d\n", bindex, err);
++			continue;
++		}
++
++		/* when the available size is equal, select the lower one */
++		BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
++			     || sizeof(b) < sizeof(st->f_bsize));
++		b = st->f_bavail * st->f_bsize;
++		br->br_wbr->wbr_bytes = b;
++		if (b >= bavail) {
++			bavail = b;
++			mfs->mfs_bindex = bindex;
++			mfs->mfs_jiffy = jiffies;
++		}
++	}
++
++	mfs->mfsrr_bytes = bavail;
++	AuDbg("b%d\n", mfs->mfs_bindex);
++	kfree(st);
++}
++
++static int au_wbr_create_mfs(struct dentry *dentry, int isdir __maybe_unused)
++{
++	int err;
++	struct super_block *sb;
++	struct au_wbr_mfs *mfs;
++
++	err = au_wbr_create_exp(dentry);
++	if (err >= 0)
++		goto out;
++
++	sb = dentry->d_sb;
++	mfs = &au_sbi(sb)->si_wbr_mfs;
++	mutex_lock(&mfs->mfs_lock);
++	if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
++	    || mfs->mfs_bindex < 0
++	    || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
++		au_mfs(dentry);
++	mutex_unlock(&mfs->mfs_lock);
++	err = mfs->mfs_bindex;
++
++	if (err >= 0)
++		err = au_wbr_nonopq(dentry, err);
++
++out:
++	AuDbg("b%d\n", err);
++	return err;
++}
++
++static int au_wbr_create_init_mfs(struct super_block *sb)
++{
++	struct au_wbr_mfs *mfs;
++
++	mfs = &au_sbi(sb)->si_wbr_mfs;
++	mutex_init(&mfs->mfs_lock);
++	mfs->mfs_jiffy = 0;
++	mfs->mfs_bindex = -EROFS;
++
++	return 0;
++}
++
++static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
++{
++	mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
++	return 0;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* most free space and then round robin */
++static int au_wbr_create_mfsrr(struct dentry *dentry, int isdir)
++{
++	int err;
++	struct au_wbr_mfs *mfs;
++
++	err = au_wbr_create_mfs(dentry, isdir);
++	if (err >= 0) {
++		mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
++		mutex_lock(&mfs->mfs_lock);
++		if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
++			err = au_wbr_create_rr(dentry, isdir);
++		mutex_unlock(&mfs->mfs_lock);
++	}
++
++	AuDbg("b%d\n", err);
++	return err;
++}
++
++static int au_wbr_create_init_mfsrr(struct super_block *sb)
++{
++	int err;
++
++	au_wbr_create_init_mfs(sb); /* ignore */
++	err = au_wbr_create_init_rr(sb);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* top down parent and most free space */
++static int au_wbr_create_pmfs(struct dentry *dentry, int isdir)
++{
++	int err, e2;
++	unsigned long long b;
++	aufs_bindex_t bindex, bstart, bend;
++	struct super_block *sb;
++	struct dentry *parent, *h_parent;
++	struct au_branch *br;
++
++	err = au_wbr_create_tdp(dentry, isdir);
++	if (unlikely(err < 0))
++		goto out;
++	parent = dget_parent(dentry);
++	bstart = au_dbstart(parent);
++	bend = au_dbtaildir(parent);
++	if (bstart == bend)
++		goto out_parent; /* success */
++
++	e2 = au_wbr_create_mfs(dentry, isdir);
++	if (e2 < 0)
++		goto out_parent; /* success */
++
++	/* when the available size is equal, select upper one */
++	sb = dentry->d_sb;
++	br = au_sbr(sb, err);
++	b = br->br_wbr->wbr_bytes;
++	AuDbg("b%d, %llu\n", err, b);
++
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		h_parent = au_h_dptr(parent, bindex);
++		if (!h_parent || !h_parent->d_inode)
++			continue;
++
++		br = au_sbr(sb, bindex);
++		if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
++			b = br->br_wbr->wbr_bytes;
++			err = bindex;
++			AuDbg("b%d, %llu\n", err, b);
++		}
++	}
++
++	if (err >= 0)
++		err = au_wbr_nonopq(dentry, err);
++
++out_parent:
++	dput(parent);
++out:
++	AuDbg("b%d\n", err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* policies for copyup */
++
++/* top down parent */
++static int au_wbr_copyup_tdp(struct dentry *dentry)
++{
++	return au_wbr_create_tdp(dentry, /*isdir, anything is ok*/0);
++}
++
++/* bottom up parent */
++static int au_wbr_copyup_bup(struct dentry *dentry)
++{
++	int err;
++	aufs_bindex_t bindex, bstart;
++	struct dentry *parent, *h_parent;
++	struct super_block *sb;
++
++	err = -EROFS;
++	sb = dentry->d_sb;
++	parent = dget_parent(dentry);
++	bstart = au_dbstart(parent);
++	for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
++		h_parent = au_h_dptr(parent, bindex);
++		if (!h_parent || !h_parent->d_inode)
++			continue;
++
++		if (!au_br_rdonly(au_sbr(sb, bindex))) {
++			err = bindex;
++			break;
++		}
++	}
++	dput(parent);
++
++	/* bottom up here */
++	if (unlikely(err < 0))
++		err = au_wbr_bu(sb, bstart - 1);
++
++	AuDbg("b%d\n", err);
++	return err;
++}
++
++/* bottom up */
++static int au_wbr_copyup_bu(struct dentry *dentry)
++{
++	int err;
++	aufs_bindex_t bstart;
++
++	bstart = au_dbstart(dentry);
++	err = au_wbr_bu(dentry->d_sb, bstart);
++	AuDbg("b%d\n", err);
++	if (err > bstart)
++		err = au_wbr_nonopq(dentry, err);
++
++	AuDbg("b%d\n", err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
++	[AuWbrCopyup_TDP] = {
++		.copyup	= au_wbr_copyup_tdp
++	},
++	[AuWbrCopyup_BUP] = {
++		.copyup	= au_wbr_copyup_bup
++	},
++	[AuWbrCopyup_BU] = {
++		.copyup	= au_wbr_copyup_bu
++	}
++};
++
++struct au_wbr_create_operations au_wbr_create_ops[] = {
++	[AuWbrCreate_TDP] = {
++		.create	= au_wbr_create_tdp
++	},
++	[AuWbrCreate_RR] = {
++		.create	= au_wbr_create_rr,
++		.init	= au_wbr_create_init_rr
++	},
++	[AuWbrCreate_MFS] = {
++		.create	= au_wbr_create_mfs,
++		.init	= au_wbr_create_init_mfs,
++		.fin	= au_wbr_create_fin_mfs
++	},
++	[AuWbrCreate_MFSV] = {
++		.create	= au_wbr_create_mfs,
++		.init	= au_wbr_create_init_mfs,
++		.fin	= au_wbr_create_fin_mfs
++	},
++	[AuWbrCreate_MFSRR] = {
++		.create	= au_wbr_create_mfsrr,
++		.init	= au_wbr_create_init_mfsrr,
++		.fin	= au_wbr_create_fin_mfs
++	},
++	[AuWbrCreate_MFSRRV] = {
++		.create	= au_wbr_create_mfsrr,
++		.init	= au_wbr_create_init_mfsrr,
++		.fin	= au_wbr_create_fin_mfs
++	},
++	[AuWbrCreate_PMFS] = {
++		.create	= au_wbr_create_pmfs,
++		.init	= au_wbr_create_init_mfs,
++		.fin	= au_wbr_create_fin_mfs
++	},
++	[AuWbrCreate_PMFSV] = {
++		.create	= au_wbr_create_pmfs,
++		.init	= au_wbr_create_init_mfs,
++		.fin	= au_wbr_create_fin_mfs
++	}
++};
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/whout.c linux-3.2.0-gentoo-r1/fs/aufs/whout.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/whout.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/whout.c	2012-01-17 12:11:25.057992464 +0100
+@@ -0,0 +1,1049 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * whiteout for logical deletion and opaque directory
++ */
++
++#include "aufs.h"
++
++#define WH_MASK			S_IRUGO
++
++/*
++ * If a directory contains this file, then it is opaque.  We start with the
++ * .wh. flag so that it is blocked by lookup.
++ */
++static struct qstr diropq_name = {
++	.name = AUFS_WH_DIROPQ,
++	.len = sizeof(AUFS_WH_DIROPQ) - 1
++};
++
++/*
++ * generate whiteout name, which is NOT terminated by NULL.
++ * @name: original d_name.name
++ * @len: original d_name.len
++ * @wh: whiteout qstr
++ * returns zero when succeeds, otherwise error.
++ * succeeded value as wh->name should be freed by kfree().
++ */
++int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
++{
++	char *p;
++
++	if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
++		return -ENAMETOOLONG;
++
++	wh->len = name->len + AUFS_WH_PFX_LEN;
++	p = kmalloc(wh->len, GFP_NOFS);
++	wh->name = p;
++	if (p) {
++		memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
++		memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
++		/* smp_mb(); */
++		return 0;
++	}
++	return -ENOMEM;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * test if the @wh_name exists under @h_parent.
++ * @try_sio specifies the necessary of super-io.
++ */
++int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
++	       struct au_branch *br, int try_sio)
++{
++	int err;
++	struct dentry *wh_dentry;
++
++	if (!try_sio)
++		wh_dentry = au_lkup_one(wh_name, h_parent, br, /*nd*/NULL);
++	else
++		wh_dentry = au_sio_lkup_one(wh_name, h_parent, br);
++	err = PTR_ERR(wh_dentry);
++	if (IS_ERR(wh_dentry))
++		goto out;
++
++	err = 0;
++	if (!wh_dentry->d_inode)
++		goto out_wh; /* success */
++
++	err = 1;
++	if (S_ISREG(wh_dentry->d_inode->i_mode))
++		goto out_wh; /* success */
++
++	err = -EIO;
++	AuIOErr("%.*s Invalid whiteout entry type 0%o.\n",
++		AuDLNPair(wh_dentry), wh_dentry->d_inode->i_mode);
++
++out_wh:
++	dput(wh_dentry);
++out:
++	return err;
++}
++
++/*
++ * test if the @h_dentry sets opaque or not.
++ */
++int au_diropq_test(struct dentry *h_dentry, struct au_branch *br)
++{
++	int err;
++	struct inode *h_dir;
++
++	h_dir = h_dentry->d_inode;
++	err = au_wh_test(h_dentry, &diropq_name, br,
++			 au_test_h_perm_sio(h_dir, MAY_EXEC));
++	return err;
++}
++
++/*
++ * returns a negative dentry whose name is unique and temporary.
++ */
++struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
++			     struct qstr *prefix)
++{
++	struct dentry *dentry;
++	int i;
++	char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
++		*name, *p;
++	/* strict atomic_t is unnecessary here */
++	static unsigned short cnt;
++	struct qstr qs;
++
++	BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
++
++	name = defname;
++	qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
++	if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
++		dentry = ERR_PTR(-ENAMETOOLONG);
++		if (unlikely(qs.len > NAME_MAX))
++			goto out;
++		dentry = ERR_PTR(-ENOMEM);
++		name = kmalloc(qs.len + 1, GFP_NOFS);
++		if (unlikely(!name))
++			goto out;
++	}
++
++	/* doubly whiteout-ed */
++	memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
++	p = name + AUFS_WH_PFX_LEN * 2;
++	memcpy(p, prefix->name, prefix->len);
++	p += prefix->len;
++	*p++ = '.';
++	AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
++
++	qs.name = name;
++	for (i = 0; i < 3; i++) {
++		sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
++		dentry = au_sio_lkup_one(&qs, h_parent, br);
++		if (IS_ERR(dentry) || !dentry->d_inode)
++			goto out_name;
++		dput(dentry);
++	}
++	/* pr_warning("could not get random name\n"); */
++	dentry = ERR_PTR(-EEXIST);
++	AuDbg("%.*s\n", AuLNPair(&qs));
++	BUG();
++
++out_name:
++	if (name != defname)
++		kfree(name);
++out:
++	AuTraceErrPtr(dentry);
++	return dentry;
++}
++
++/*
++ * rename the @h_dentry on @br to the whiteouted temporary name.
++ */
++int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
++{
++	int err;
++	struct path h_path = {
++		.mnt = br->br_mnt
++	};
++	struct inode *h_dir;
++	struct dentry *h_parent;
++
++	h_parent = h_dentry->d_parent; /* dir inode is locked */
++	h_dir = h_parent->d_inode;
++	IMustLock(h_dir);
++
++	h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
++	err = PTR_ERR(h_path.dentry);
++	if (IS_ERR(h_path.dentry))
++		goto out;
++
++	/* under the same dir, no need to lock_rename() */
++	err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path);
++	AuTraceErr(err);
++	dput(h_path.dentry);
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++/*
++ * functions for removing a whiteout
++ */
++
++static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
++{
++	int force;
++
++	/*
++	 * forces superio when the dir has a sticky bit.
++	 * this may be a violation of unix fs semantics.
++	 */
++	force = (h_dir->i_mode & S_ISVTX)
++		&& h_path->dentry->d_inode->i_uid != current_fsuid();
++	return vfsub_unlink(h_dir, h_path, force);
++}
++
++int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
++			struct dentry *dentry)
++{
++	int err;
++
++	err = do_unlink_wh(h_dir, h_path);
++	if (!err && dentry)
++		au_set_dbwh(dentry, -1);
++
++	return err;
++}
++
++static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
++			  struct au_branch *br)
++{
++	int err;
++	struct path h_path = {
++		.mnt = br->br_mnt
++	};
++
++	err = 0;
++	h_path.dentry = au_lkup_one(wh, h_parent, br, /*nd*/NULL);
++	if (IS_ERR(h_path.dentry))
++		err = PTR_ERR(h_path.dentry);
++	else {
++		if (h_path.dentry->d_inode
++		    && S_ISREG(h_path.dentry->d_inode->i_mode))
++			err = do_unlink_wh(h_parent->d_inode, &h_path);
++		dput(h_path.dentry);
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++/*
++ * initialize/clean whiteout for a branch
++ */
++
++static void au_wh_clean(struct inode *h_dir, struct path *whpath,
++			const int isdir)
++{
++	int err;
++
++	if (!whpath->dentry->d_inode)
++		return;
++
++	err = mnt_want_write(whpath->mnt);
++	if (!err) {
++		if (isdir)
++			err = vfsub_rmdir(h_dir, whpath);
++		else
++			err = vfsub_unlink(h_dir, whpath, /*force*/0);
++		mnt_drop_write(whpath->mnt);
++	}
++	if (unlikely(err))
++		pr_warning("failed removing %.*s (%d), ignored.\n",
++			   AuDLNPair(whpath->dentry), err);
++}
++
++static int test_linkable(struct dentry *h_root)
++{
++	struct inode *h_dir = h_root->d_inode;
++
++	if (h_dir->i_op->link)
++		return 0;
++
++	pr_err("%.*s (%s) doesn't support link(2), use noplink and rw+nolwh\n",
++	       AuDLNPair(h_root), au_sbtype(h_root->d_sb));
++	return -ENOSYS;
++}
++
++/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
++static int au_whdir(struct inode *h_dir, struct path *path)
++{
++	int err;
++
++	err = -EEXIST;
++	if (!path->dentry->d_inode) {
++		int mode = S_IRWXU;
++
++		if (au_test_nfs(path->dentry->d_sb))
++			mode |= S_IXUGO;
++		err = mnt_want_write(path->mnt);
++		if (!err) {
++			err = vfsub_mkdir(h_dir, path, mode);
++			mnt_drop_write(path->mnt);
++		}
++	} else if (S_ISDIR(path->dentry->d_inode->i_mode))
++		err = 0;
++	else
++		pr_err("unknown %.*s exists\n", AuDLNPair(path->dentry));
++
++	return err;
++}
++
++struct au_wh_base {
++	const struct qstr *name;
++	struct dentry *dentry;
++};
++
++static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
++			  struct path *h_path)
++{
++	h_path->dentry = base[AuBrWh_BASE].dentry;
++	au_wh_clean(h_dir, h_path, /*isdir*/0);
++	h_path->dentry = base[AuBrWh_PLINK].dentry;
++	au_wh_clean(h_dir, h_path, /*isdir*/1);
++	h_path->dentry = base[AuBrWh_ORPH].dentry;
++	au_wh_clean(h_dir, h_path, /*isdir*/1);
++}
++
++/*
++ * returns tri-state,
++ * minus: error, caller should print the mesage
++ * zero: succuess
++ * plus: error, caller should NOT print the mesage
++ */
++static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
++				int do_plink, struct au_wh_base base[],
++				struct path *h_path)
++{
++	int err;
++	struct inode *h_dir;
++
++	h_dir = h_root->d_inode;
++	h_path->dentry = base[AuBrWh_BASE].dentry;
++	au_wh_clean(h_dir, h_path, /*isdir*/0);
++	h_path->dentry = base[AuBrWh_PLINK].dentry;
++	if (do_plink) {
++		err = test_linkable(h_root);
++		if (unlikely(err)) {
++			err = 1;
++			goto out;
++		}
++
++		err = au_whdir(h_dir, h_path);
++		if (unlikely(err))
++			goto out;
++		wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
++	} else
++		au_wh_clean(h_dir, h_path, /*isdir*/1);
++	h_path->dentry = base[AuBrWh_ORPH].dentry;
++	err = au_whdir(h_dir, h_path);
++	if (unlikely(err))
++		goto out;
++	wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
++
++out:
++	return err;
++}
++
++/*
++ * for the moment, aufs supports the branch filesystem which does not support
++ * link(2). testing on FAT which does not support i_op->setattr() fully either,
++ * copyup failed. finally, such filesystem will not be used as the writable
++ * branch.
++ *
++ * returns tri-state, see above.
++ */
++static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
++			 int do_plink, struct au_wh_base base[],
++			 struct path *h_path)
++{
++	int err;
++	struct inode *h_dir;
++
++	WbrWhMustWriteLock(wbr);
++
++	err = test_linkable(h_root);
++	if (unlikely(err)) {
++		err = 1;
++		goto out;
++	}
++
++	/*
++	 * todo: should this create be done in /sbin/mount.aufs helper?
++	 */
++	err = -EEXIST;
++	h_dir = h_root->d_inode;
++	if (!base[AuBrWh_BASE].dentry->d_inode) {
++		err = mnt_want_write(h_path->mnt);
++		if (!err) {
++			h_path->dentry = base[AuBrWh_BASE].dentry;
++			err = vfsub_create(h_dir, h_path, WH_MASK);
++			mnt_drop_write(h_path->mnt);
++		}
++	} else if (S_ISREG(base[AuBrWh_BASE].dentry->d_inode->i_mode))
++		err = 0;
++	else
++		pr_err("unknown %.*s/%.*s exists\n",
++		       AuDLNPair(h_root), AuDLNPair(base[AuBrWh_BASE].dentry));
++	if (unlikely(err))
++		goto out;
++
++	h_path->dentry = base[AuBrWh_PLINK].dentry;
++	if (do_plink) {
++		err = au_whdir(h_dir, h_path);
++		if (unlikely(err))
++			goto out;
++		wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
++	} else
++		au_wh_clean(h_dir, h_path, /*isdir*/1);
++	wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
++
++	h_path->dentry = base[AuBrWh_ORPH].dentry;
++	err = au_whdir(h_dir, h_path);
++	if (unlikely(err))
++		goto out;
++	wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
++
++out:
++	return err;
++}
++
++/*
++ * initialize the whiteout base file/dir for @br.
++ */
++int au_wh_init(struct dentry *h_root, struct au_branch *br,
++	       struct super_block *sb)
++{
++	int err, i;
++	const unsigned char do_plink
++		= !!au_opt_test(au_mntflags(sb), PLINK);
++	struct path path = {
++		.mnt = br->br_mnt
++	};
++	struct inode *h_dir;
++	struct au_wbr *wbr = br->br_wbr;
++	static const struct qstr base_name[] = {
++		[AuBrWh_BASE] = {
++			.name	= AUFS_BASE_NAME,
++			.len	= sizeof(AUFS_BASE_NAME) - 1
++		},
++		[AuBrWh_PLINK] = {
++			.name	= AUFS_PLINKDIR_NAME,
++			.len	= sizeof(AUFS_PLINKDIR_NAME) - 1
++		},
++		[AuBrWh_ORPH] = {
++			.name	= AUFS_ORPHDIR_NAME,
++			.len	= sizeof(AUFS_ORPHDIR_NAME) - 1
++		}
++	};
++	struct au_wh_base base[] = {
++		[AuBrWh_BASE] = {
++			.name	= base_name + AuBrWh_BASE,
++			.dentry	= NULL
++		},
++		[AuBrWh_PLINK] = {
++			.name	= base_name + AuBrWh_PLINK,
++			.dentry	= NULL
++		},
++		[AuBrWh_ORPH] = {
++			.name	= base_name + AuBrWh_ORPH,
++			.dentry	= NULL
++		}
++	};
++
++	if (wbr)
++		WbrWhMustWriteLock(wbr);
++
++	for (i = 0; i < AuBrWh_Last; i++) {
++		/* doubly whiteouted */
++		struct dentry *d;
++
++		d = au_wh_lkup(h_root, (void *)base[i].name, br);
++		err = PTR_ERR(d);
++		if (IS_ERR(d))
++			goto out;
++
++		base[i].dentry = d;
++		AuDebugOn(wbr
++			  && wbr->wbr_wh[i]
++			  && wbr->wbr_wh[i] != base[i].dentry);
++	}
++
++	if (wbr)
++		for (i = 0; i < AuBrWh_Last; i++) {
++			dput(wbr->wbr_wh[i]);
++			wbr->wbr_wh[i] = NULL;
++		}
++
++	err = 0;
++	if (!au_br_writable(br->br_perm)) {
++		h_dir = h_root->d_inode;
++		au_wh_init_ro(h_dir, base, &path);
++	} else if (!au_br_wh_linkable(br->br_perm)) {
++		err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
++		if (err > 0)
++			goto out;
++		else if (err)
++			goto out_err;
++	} else {
++		err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
++		if (err > 0)
++			goto out;
++		else if (err)
++			goto out_err;
++	}
++	goto out; /* success */
++
++out_err:
++	pr_err("an error(%d) on the writable branch %.*s(%s)\n",
++	       err, AuDLNPair(h_root), au_sbtype(h_root->d_sb));
++out:
++	for (i = 0; i < AuBrWh_Last; i++)
++		dput(base[i].dentry);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++/*
++ * whiteouts are all hard-linked usually.
++ * when its link count reaches a ceiling, we create a new whiteout base
++ * asynchronously.
++ */
++
++struct reinit_br_wh {
++	struct super_block *sb;
++	struct au_branch *br;
++};
++
++static void reinit_br_wh(void *arg)
++{
++	int err;
++	aufs_bindex_t bindex;
++	struct path h_path;
++	struct reinit_br_wh *a = arg;
++	struct au_wbr *wbr;
++	struct inode *dir;
++	struct dentry *h_root;
++	struct au_hinode *hdir;
++
++	err = 0;
++	wbr = a->br->br_wbr;
++	/* big aufs lock */
++	si_noflush_write_lock(a->sb);
++	if (!au_br_writable(a->br->br_perm))
++		goto out;
++	bindex = au_br_index(a->sb, a->br->br_id);
++	if (unlikely(bindex < 0))
++		goto out;
++
++	di_read_lock_parent(a->sb->s_root, AuLock_IR);
++	dir = a->sb->s_root->d_inode;
++	hdir = au_hi(dir, bindex);
++	h_root = au_h_dptr(a->sb->s_root, bindex);
++
++	au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
++	wbr_wh_write_lock(wbr);
++	err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
++			  h_root, a->br);
++	if (!err) {
++		err = mnt_want_write(a->br->br_mnt);
++		if (!err) {
++			h_path.dentry = wbr->wbr_whbase;
++			h_path.mnt = a->br->br_mnt;
++			err = vfsub_unlink(hdir->hi_inode, &h_path, /*force*/0);
++			mnt_drop_write(a->br->br_mnt);
++		}
++	} else {
++		pr_warning("%.*s is moved, ignored\n",
++			   AuDLNPair(wbr->wbr_whbase));
++		err = 0;
++	}
++	dput(wbr->wbr_whbase);
++	wbr->wbr_whbase = NULL;
++	if (!err)
++		err = au_wh_init(h_root, a->br, a->sb);
++	wbr_wh_write_unlock(wbr);
++	au_hn_imtx_unlock(hdir);
++	di_read_unlock(a->sb->s_root, AuLock_IR);
++
++out:
++	if (wbr)
++		atomic_dec(&wbr->wbr_wh_running);
++	atomic_dec(&a->br->br_count);
++	si_write_unlock(a->sb);
++	au_nwt_done(&au_sbi(a->sb)->si_nowait);
++	kfree(arg);
++	if (unlikely(err))
++		AuIOErr("err %d\n", err);
++}
++
++static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
++{
++	int do_dec, wkq_err;
++	struct reinit_br_wh *arg;
++
++	do_dec = 1;
++	if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
++		goto out;
++
++	/* ignore ENOMEM */
++	arg = kmalloc(sizeof(*arg), GFP_NOFS);
++	if (arg) {
++		/*
++		 * dec(wh_running), kfree(arg) and dec(br_count)
++		 * in reinit function
++		 */
++		arg->sb = sb;
++		arg->br = br;
++		atomic_inc(&br->br_count);
++		wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
++		if (unlikely(wkq_err)) {
++			atomic_dec(&br->br_wbr->wbr_wh_running);
++			atomic_dec(&br->br_count);
++			kfree(arg);
++		}
++		do_dec = 0;
++	}
++
++out:
++	if (do_dec)
++		atomic_dec(&br->br_wbr->wbr_wh_running);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * create the whiteout @wh.
++ */
++static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
++			     struct dentry *wh)
++{
++	int err;
++	struct path h_path = {
++		.dentry = wh
++	};
++	struct au_branch *br;
++	struct au_wbr *wbr;
++	struct dentry *h_parent;
++	struct inode *h_dir;
++
++	h_parent = wh->d_parent; /* dir inode is locked */
++	h_dir = h_parent->d_inode;
++	IMustLock(h_dir);
++
++	br = au_sbr(sb, bindex);
++	h_path.mnt = br->br_mnt;
++	wbr = br->br_wbr;
++	wbr_wh_read_lock(wbr);
++	if (wbr->wbr_whbase) {
++		err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path);
++		if (!err || err != -EMLINK)
++			goto out;
++
++		/* link count full. re-initialize br_whbase. */
++		kick_reinit_br_wh(sb, br);
++	}
++
++	/* return this error in this context */
++	err = vfsub_create(h_dir, &h_path, WH_MASK);
++
++out:
++	wbr_wh_read_unlock(wbr);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * create or remove the diropq.
++ */
++static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
++				unsigned int flags)
++{
++	struct dentry *opq_dentry, *h_dentry;
++	struct super_block *sb;
++	struct au_branch *br;
++	int err;
++
++	sb = dentry->d_sb;
++	br = au_sbr(sb, bindex);
++	h_dentry = au_h_dptr(dentry, bindex);
++	opq_dentry = au_lkup_one(&diropq_name, h_dentry, br, /*nd*/NULL);
++	if (IS_ERR(opq_dentry))
++		goto out;
++
++	if (au_ftest_diropq(flags, CREATE)) {
++		err = link_or_create_wh(sb, bindex, opq_dentry);
++		if (!err) {
++			au_set_dbdiropq(dentry, bindex);
++			goto out; /* success */
++		}
++	} else {
++		struct path tmp = {
++			.dentry = opq_dentry,
++			.mnt	= br->br_mnt
++		};
++		err = do_unlink_wh(au_h_iptr(dentry->d_inode, bindex), &tmp);
++		if (!err)
++			au_set_dbdiropq(dentry, -1);
++	}
++	dput(opq_dentry);
++	opq_dentry = ERR_PTR(err);
++
++out:
++	return opq_dentry;
++}
++
++struct do_diropq_args {
++	struct dentry **errp;
++	struct dentry *dentry;
++	aufs_bindex_t bindex;
++	unsigned int flags;
++};
++
++static void call_do_diropq(void *args)
++{
++	struct do_diropq_args *a = args;
++	*a->errp = do_diropq(a->dentry, a->bindex, a->flags);
++}
++
++struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
++			     unsigned int flags)
++{
++	struct dentry *diropq, *h_dentry;
++
++	h_dentry = au_h_dptr(dentry, bindex);
++	if (!au_test_h_perm_sio(h_dentry->d_inode, MAY_EXEC | MAY_WRITE))
++		diropq = do_diropq(dentry, bindex, flags);
++	else {
++		int wkq_err;
++		struct do_diropq_args args = {
++			.errp		= &diropq,
++			.dentry		= dentry,
++			.bindex		= bindex,
++			.flags		= flags
++		};
++
++		wkq_err = au_wkq_wait(call_do_diropq, &args);
++		if (unlikely(wkq_err))
++			diropq = ERR_PTR(wkq_err);
++	}
++
++	return diropq;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * lookup whiteout dentry.
++ * @h_parent: lower parent dentry which must exist and be locked
++ * @base_name: name of dentry which will be whiteouted
++ * returns dentry for whiteout.
++ */
++struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
++			  struct au_branch *br)
++{
++	int err;
++	struct qstr wh_name;
++	struct dentry *wh_dentry;
++
++	err = au_wh_name_alloc(&wh_name, base_name);
++	wh_dentry = ERR_PTR(err);
++	if (!err) {
++		wh_dentry = au_lkup_one(&wh_name, h_parent, br, /*nd*/NULL);
++		kfree(wh_name.name);
++	}
++	return wh_dentry;
++}
++
++/*
++ * link/create a whiteout for @dentry on @bindex.
++ */
++struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
++			    struct dentry *h_parent)
++{
++	struct dentry *wh_dentry;
++	struct super_block *sb;
++	int err;
++
++	sb = dentry->d_sb;
++	wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
++	if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) {
++		err = link_or_create_wh(sb, bindex, wh_dentry);
++		if (!err)
++			au_set_dbwh(dentry, bindex);
++		else {
++			dput(wh_dentry);
++			wh_dentry = ERR_PTR(err);
++		}
++	}
++
++	return wh_dentry;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* Delete all whiteouts in this directory on branch bindex. */
++static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
++			   aufs_bindex_t bindex, struct au_branch *br)
++{
++	int err;
++	unsigned long ul, n;
++	struct qstr wh_name;
++	char *p;
++	struct hlist_head *head;
++	struct au_vdir_wh *tpos;
++	struct hlist_node *pos;
++	struct au_vdir_destr *str;
++
++	err = -ENOMEM;
++	p = __getname_gfp(GFP_NOFS);
++	wh_name.name = p;
++	if (unlikely(!wh_name.name))
++		goto out;
++
++	err = 0;
++	memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
++	p += AUFS_WH_PFX_LEN;
++	n = whlist->nh_num;
++	head = whlist->nh_head;
++	for (ul = 0; !err && ul < n; ul++, head++) {
++		hlist_for_each_entry(tpos, pos, head, wh_hash) {
++			if (tpos->wh_bindex != bindex)
++				continue;
++
++			str = &tpos->wh_str;
++			if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
++				memcpy(p, str->name, str->len);
++				wh_name.len = AUFS_WH_PFX_LEN + str->len;
++				err = unlink_wh_name(h_dentry, &wh_name, br);
++				if (!err)
++					continue;
++				break;
++			}
++			AuIOErr("whiteout name too long %.*s\n",
++				str->len, str->name);
++			err = -EIO;
++			break;
++		}
++	}
++	__putname(wh_name.name);
++
++out:
++	return err;
++}
++
++struct del_wh_children_args {
++	int *errp;
++	struct dentry *h_dentry;
++	struct au_nhash *whlist;
++	aufs_bindex_t bindex;
++	struct au_branch *br;
++};
++
++static void call_del_wh_children(void *args)
++{
++	struct del_wh_children_args *a = args;
++	*a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
++{
++	struct au_whtmp_rmdir *whtmp;
++	int err;
++	unsigned int rdhash;
++
++	SiMustAnyLock(sb);
++
++	whtmp = kmalloc(sizeof(*whtmp), gfp);
++	if (unlikely(!whtmp)) {
++		whtmp = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	whtmp->dir = NULL;
++	whtmp->br = NULL;
++	whtmp->wh_dentry = NULL;
++	/* no estimation for dir size */
++	rdhash = au_sbi(sb)->si_rdhash;
++	if (!rdhash)
++		rdhash = AUFS_RDHASH_DEF;
++	err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
++	if (unlikely(err)) {
++		kfree(whtmp);
++		whtmp = ERR_PTR(err);
++	}
++
++out:
++	return whtmp;
++}
++
++void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
++{
++	if (whtmp->br)
++		atomic_dec(&whtmp->br->br_count);
++	dput(whtmp->wh_dentry);
++	iput(whtmp->dir);
++	au_nhash_wh_free(&whtmp->whlist);
++	kfree(whtmp);
++}
++
++/*
++ * rmdir the whiteouted temporary named dir @h_dentry.
++ * @whlist: whiteouted children.
++ */
++int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
++		   struct dentry *wh_dentry, struct au_nhash *whlist)
++{
++	int err;
++	struct path h_tmp;
++	struct inode *wh_inode, *h_dir;
++	struct au_branch *br;
++
++	h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
++	IMustLock(h_dir);
++
++	br = au_sbr(dir->i_sb, bindex);
++	wh_inode = wh_dentry->d_inode;
++	mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
++
++	/*
++	 * someone else might change some whiteouts while we were sleeping.
++	 * it means this whlist may have an obsoleted entry.
++	 */
++	if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
++		err = del_wh_children(wh_dentry, whlist, bindex, br);
++	else {
++		int wkq_err;
++		struct del_wh_children_args args = {
++			.errp		= &err,
++			.h_dentry	= wh_dentry,
++			.whlist		= whlist,
++			.bindex		= bindex,
++			.br		= br
++		};
++
++		wkq_err = au_wkq_wait(call_del_wh_children, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++	mutex_unlock(&wh_inode->i_mutex);
++
++	if (!err) {
++		h_tmp.dentry = wh_dentry;
++		h_tmp.mnt = br->br_mnt;
++		err = vfsub_rmdir(h_dir, &h_tmp);
++	}
++
++	if (!err) {
++		if (au_ibstart(dir) == bindex) {
++			/* todo: dir->i_mutex is necessary */
++			au_cpup_attr_timesizes(dir);
++			vfsub_drop_nlink(dir);
++		}
++		return 0; /* success */
++	}
++
++	pr_warning("failed removing %.*s(%d), ignored\n",
++		   AuDLNPair(wh_dentry), err);
++	return err;
++}
++
++static void call_rmdir_whtmp(void *args)
++{
++	int err;
++	aufs_bindex_t bindex;
++	struct au_whtmp_rmdir *a = args;
++	struct super_block *sb;
++	struct dentry *h_parent;
++	struct inode *h_dir;
++	struct au_hinode *hdir;
++
++	/* rmdir by nfsd may cause deadlock with this i_mutex */
++	/* mutex_lock(&a->dir->i_mutex); */
++	err = -EROFS;
++	sb = a->dir->i_sb;
++	si_read_lock(sb, !AuLock_FLUSH);
++	if (!au_br_writable(a->br->br_perm))
++		goto out;
++	bindex = au_br_index(sb, a->br->br_id);
++	if (unlikely(bindex < 0))
++		goto out;
++
++	err = -EIO;
++	ii_write_lock_parent(a->dir);
++	h_parent = dget_parent(a->wh_dentry);
++	h_dir = h_parent->d_inode;
++	hdir = au_hi(a->dir, bindex);
++	au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
++	err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
++			  a->br);
++	if (!err) {
++		err = mnt_want_write(a->br->br_mnt);
++		if (!err) {
++			err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry,
++					     &a->whlist);
++			mnt_drop_write(a->br->br_mnt);
++		}
++	}
++	au_hn_imtx_unlock(hdir);
++	dput(h_parent);
++	ii_write_unlock(a->dir);
++
++out:
++	/* mutex_unlock(&a->dir->i_mutex); */
++	au_whtmp_rmdir_free(a);
++	si_read_unlock(sb);
++	au_nwt_done(&au_sbi(sb)->si_nowait);
++	if (unlikely(err))
++		AuIOErr("err %d\n", err);
++}
++
++void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
++			 struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
++{
++	int wkq_err;
++	struct super_block *sb;
++
++	IMustLock(dir);
++
++	/* all post-process will be done in do_rmdir_whtmp(). */
++	sb = dir->i_sb;
++	args->dir = au_igrab(dir);
++	args->br = au_sbr(sb, bindex);
++	atomic_inc(&args->br->br_count);
++	args->wh_dentry = dget(wh_dentry);
++	wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
++	if (unlikely(wkq_err)) {
++		pr_warning("rmdir error %.*s (%d), ignored\n",
++			   AuDLNPair(wh_dentry), wkq_err);
++		au_whtmp_rmdir_free(args);
++	}
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/whout.h linux-3.2.0-gentoo-r1/fs/aufs/whout.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/whout.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/whout.h	2012-01-17 12:11:25.078825986 +0100
+@@ -0,0 +1,88 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * whiteout for logical deletion and opaque directory
++ */
++
++#ifndef __AUFS_WHOUT_H__
++#define __AUFS_WHOUT_H__
++
++#ifdef __KERNEL__
++
++#include "dir.h"
++
++/* whout.c */
++int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
++struct au_branch;
++int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
++	       struct au_branch *br, int try_sio);
++int au_diropq_test(struct dentry *h_dentry, struct au_branch *br);
++struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
++			     struct qstr *prefix);
++int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
++int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
++			struct dentry *dentry);
++int au_wh_init(struct dentry *h_parent, struct au_branch *br,
++	       struct super_block *sb);
++
++/* diropq flags */
++#define AuDiropq_CREATE	1
++#define au_ftest_diropq(flags, name)	((flags) & AuDiropq_##name)
++#define au_fset_diropq(flags, name) \
++	do { (flags) |= AuDiropq_##name; } while (0)
++#define au_fclr_diropq(flags, name) \
++	do { (flags) &= ~AuDiropq_##name; } while (0)
++
++struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
++			     unsigned int flags);
++struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
++			  struct au_branch *br);
++struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
++			    struct dentry *h_parent);
++
++/* real rmdir for the whiteout-ed dir */
++struct au_whtmp_rmdir {
++	struct inode *dir;
++	struct au_branch *br;
++	struct dentry *wh_dentry;
++	struct au_nhash whlist;
++};
++
++struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
++void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
++int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
++		   struct dentry *wh_dentry, struct au_nhash *whlist);
++void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
++			 struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
++
++/* ---------------------------------------------------------------------- */
++
++static inline struct dentry *au_diropq_create(struct dentry *dentry,
++					      aufs_bindex_t bindex)
++{
++	return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
++}
++
++static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_WHOUT_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/wkq.c linux-3.2.0-gentoo-r1/fs/aufs/wkq.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/wkq.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/wkq.c	2012-01-17 12:11:25.088085327 +0100
+@@ -0,0 +1,214 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * workqueue for asynchronous/super-io operations
++ * todo: try new dredential scheme
++ */
++
++#include <linux/module.h>
++#include "aufs.h"
++
++/* internal workqueue named AUFS_WKQ_NAME */
++
++static struct workqueue_struct *au_wkq;
++
++struct au_wkinfo {
++	struct work_struct wk;
++	struct kobject *kobj;
++
++	unsigned int flags; /* see wkq.h */
++
++	au_wkq_func_t func;
++	void *args;
++
++	struct completion *comp;
++};
++
++/* ---------------------------------------------------------------------- */
++
++static void wkq_func(struct work_struct *wk)
++{
++	struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
++
++	AuDebugOn(current_fsuid());
++	AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
++
++	wkinfo->func(wkinfo->args);
++	if (au_ftest_wkq(wkinfo->flags, WAIT))
++		complete(wkinfo->comp);
++	else {
++		kobject_put(wkinfo->kobj);
++		module_put(THIS_MODULE); /* todo: ?? */
++		kfree(wkinfo);
++	}
++}
++
++/*
++ * Since struct completion is large, try allocating it dynamically.
++ */
++#if defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS)
++#define AuWkqCompDeclare(name)	struct completion *comp = NULL
++
++static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
++{
++	*comp = kmalloc(sizeof(**comp), GFP_NOFS);
++	if (*comp) {
++		init_completion(*comp);
++		wkinfo->comp = *comp;
++		return 0;
++	}
++	return -ENOMEM;
++}
++
++static void au_wkq_comp_free(struct completion *comp)
++{
++	kfree(comp);
++}
++
++#else
++
++/* no braces */
++#define AuWkqCompDeclare(name) \
++	DECLARE_COMPLETION_ONSTACK(_ ## name); \
++	struct completion *comp = &_ ## name
++
++static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
++{
++	wkinfo->comp = *comp;
++	return 0;
++}
++
++static void au_wkq_comp_free(struct completion *comp __maybe_unused)
++{
++	/* empty */
++}
++#endif /* 4KSTACKS */
++
++static void au_wkq_run(struct au_wkinfo *wkinfo)
++{
++	if (au_ftest_wkq(wkinfo->flags, NEST)) {
++		if (au_wkq_test()) {
++			AuWarn1("wkq from wkq, due to a dead dir by UDBA?\n");
++			AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
++		}
++	} else
++		au_dbg_verify_kthread();
++
++	if (au_ftest_wkq(wkinfo->flags, WAIT)) {
++		INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
++		queue_work(au_wkq, &wkinfo->wk);
++	} else {
++		INIT_WORK(&wkinfo->wk, wkq_func);
++		schedule_work(&wkinfo->wk);
++	}
++}
++
++/*
++ * Be careful. It is easy to make deadlock happen.
++ * processA: lock, wkq and wait
++ * processB: wkq and wait, lock in wkq
++ * --> deadlock
++ */
++int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
++{
++	int err;
++	AuWkqCompDeclare(comp);
++	struct au_wkinfo wkinfo = {
++		.flags	= flags,
++		.func	= func,
++		.args	= args
++	};
++
++	err = au_wkq_comp_alloc(&wkinfo, &comp);
++	if (!err) {
++		au_wkq_run(&wkinfo);
++		/* no timeout, no interrupt */
++		wait_for_completion(wkinfo.comp);
++		au_wkq_comp_free(comp);
++		destroy_work_on_stack(&wkinfo.wk);
++	}
++
++	return err;
++
++}
++
++/*
++ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
++ * problem in a concurrent umounting.
++ */
++int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
++		  unsigned int flags)
++{
++	int err;
++	struct au_wkinfo *wkinfo;
++
++	atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
++
++	/*
++	 * wkq_func() must free this wkinfo.
++	 * it highly depends upon the implementation of workqueue.
++	 */
++	err = 0;
++	wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
++	if (wkinfo) {
++		wkinfo->kobj = &au_sbi(sb)->si_kobj;
++		wkinfo->flags = flags & ~AuWkq_WAIT;
++		wkinfo->func = func;
++		wkinfo->args = args;
++		wkinfo->comp = NULL;
++		kobject_get(wkinfo->kobj);
++		__module_get(THIS_MODULE); /* todo: ?? */
++
++		au_wkq_run(wkinfo);
++	} else {
++		err = -ENOMEM;
++		au_nwt_done(&au_sbi(sb)->si_nowait);
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_nwt_init(struct au_nowait_tasks *nwt)
++{
++	atomic_set(&nwt->nw_len, 0);
++	/* smp_mb(); */ /* atomic_set */
++	init_waitqueue_head(&nwt->nw_wq);
++}
++
++void au_wkq_fin(void)
++{
++	destroy_workqueue(au_wkq);
++}
++
++int __init au_wkq_init(void)
++{
++	int err;
++
++	err = 0;
++	BUILD_BUG_ON(!WQ_RESCUER);
++	au_wkq = alloc_workqueue(AUFS_WKQ_NAME, !WQ_RESCUER, WQ_DFL_ACTIVE);
++	if (IS_ERR(au_wkq))
++		err = PTR_ERR(au_wkq);
++	else if (!au_wkq)
++		err = -ENOMEM;
++
++	return err;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/wkq.h linux-3.2.0-gentoo-r1/fs/aufs/wkq.h
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/wkq.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/wkq.h	2012-01-17 12:11:25.104289179 +0100
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * workqueue for asynchronous/super-io operations
++ * todo: try new credentials management scheme
++ */
++
++#ifndef __AUFS_WKQ_H__
++#define __AUFS_WKQ_H__
++
++#ifdef __KERNEL__
++
++struct super_block;
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
++ */
++struct au_nowait_tasks {
++	atomic_t		nw_len;
++	wait_queue_head_t	nw_wq;
++};
++
++/* ---------------------------------------------------------------------- */
++
++typedef void (*au_wkq_func_t)(void *args);
++
++/* wkq flags */
++#define AuWkq_WAIT	1
++#define AuWkq_NEST	(1 << 1)
++#define au_ftest_wkq(flags, name)	((flags) & AuWkq_##name)
++#define au_fset_wkq(flags, name) \
++	do { (flags) |= AuWkq_##name; } while (0)
++#define au_fclr_wkq(flags, name) \
++	do { (flags) &= ~AuWkq_##name; } while (0)
++
++#ifndef CONFIG_AUFS_HNOTIFY
++#undef AuWkq_NEST
++#define AuWkq_NEST	0
++#endif
++
++/* wkq.c */
++int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
++int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
++		  unsigned int flags);
++void au_nwt_init(struct au_nowait_tasks *nwt);
++int __init au_wkq_init(void);
++void au_wkq_fin(void);
++
++/* ---------------------------------------------------------------------- */
++
++static inline int au_wkq_test(void)
++{
++	return current->flags & PF_WQ_WORKER;
++}
++
++static inline int au_wkq_wait(au_wkq_func_t func, void *args)
++{
++	return au_wkq_do_wait(AuWkq_WAIT, func, args);
++}
++
++static inline void au_nwt_done(struct au_nowait_tasks *nwt)
++{
++	if (atomic_dec_and_test(&nwt->nw_len))
++		wake_up_all(&nwt->nw_wq);
++}
++
++static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
++{
++	wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
++	return 0;
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_WKQ_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/aufs/xino.c linux-3.2.0-gentoo-r1/fs/aufs/xino.c
+--- linux-3.2.0-gentoo-r1.orig//fs/aufs/xino.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/fs/aufs/xino.c	2012-01-17 12:11:25.129752370 +0100
+@@ -0,0 +1,1264 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * external inode number translation table and bitmap
++ */
++
++#include <linux/seq_file.h>
++#include "aufs.h"
++
++/* todo: unnecessary to support mmap_sem since kernel-space? */
++ssize_t xino_fread(au_readf_t func, struct file *file, void *kbuf, size_t size,
++		   loff_t *pos)
++{
++	ssize_t err;
++	mm_segment_t oldfs;
++	union {
++		void *k;
++		char __user *u;
++	} buf;
++
++	buf.k = kbuf;
++	oldfs = get_fs();
++	set_fs(KERNEL_DS);
++	do {
++		/* todo: signal_pending? */
++		err = func(file, buf.u, size, pos);
++	} while (err == -EAGAIN || err == -EINTR);
++	set_fs(oldfs);
++
++#if 0 /* reserved for future use */
++	if (err > 0)
++		fsnotify_access(file->f_dentry);
++#endif
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static ssize_t do_xino_fwrite(au_writef_t func, struct file *file, void *kbuf,
++			      size_t size, loff_t *pos)
++{
++	ssize_t err;
++	mm_segment_t oldfs;
++	union {
++		void *k;
++		const char __user *u;
++	} buf;
++
++	buf.k = kbuf;
++	oldfs = get_fs();
++	set_fs(KERNEL_DS);
++	do {
++		/* todo: signal_pending? */
++		err = func(file, buf.u, size, pos);
++	} while (err == -EAGAIN || err == -EINTR);
++	set_fs(oldfs);
++
++#if 0 /* reserved for future use */
++	if (err > 0)
++		fsnotify_modify(file->f_dentry);
++#endif
++
++	return err;
++}
++
++struct do_xino_fwrite_args {
++	ssize_t *errp;
++	au_writef_t func;
++	struct file *file;
++	void *buf;
++	size_t size;
++	loff_t *pos;
++};
++
++static void call_do_xino_fwrite(void *args)
++{
++	struct do_xino_fwrite_args *a = args;
++	*a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
++}
++
++ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
++		    loff_t *pos)
++{
++	ssize_t err;
++
++	/* todo: signal block and no wkq? */
++	if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
++		lockdep_off();
++		err = do_xino_fwrite(func, file, buf, size, pos);
++		lockdep_on();
++	} else {
++		/*
++		 * it breaks RLIMIT_FSIZE and normal user's limit,
++		 * users should care about quota and real 'filesystem full.'
++		 */
++		int wkq_err;
++		struct do_xino_fwrite_args args = {
++			.errp	= &err,
++			.func	= func,
++			.file	= file,
++			.buf	= buf,
++			.size	= size,
++			.pos	= pos
++		};
++
++		wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * create a new xinofile at the same place/path as @base_file.
++ */
++struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
++{
++	struct file *file;
++	struct dentry *base, *parent;
++	struct inode *dir;
++	struct qstr *name;
++	struct path path;
++	int err;
++
++	base = base_file->f_dentry;
++	parent = base->d_parent; /* dir inode is locked */
++	dir = parent->d_inode;
++	IMustLock(dir);
++
++	file = ERR_PTR(-EINVAL);
++	name = &base->d_name;
++	path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
++	if (IS_ERR(path.dentry)) {
++		file = (void *)path.dentry;
++		pr_err("%.*s lookup err %ld\n",
++		       AuLNPair(name), PTR_ERR(path.dentry));
++		goto out;
++	}
++
++	/* no need to mnt_want_write() since we call dentry_open() later */
++	err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
++	if (unlikely(err)) {
++		file = ERR_PTR(err);
++		pr_err("%.*s create err %d\n", AuLNPair(name), err);
++		goto out_dput;
++	}
++
++	path.mnt = base_file->f_vfsmnt;
++	file = vfsub_dentry_open(&path,
++				 O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
++				 /* | __FMODE_NONOTIFY */);
++	if (IS_ERR(file)) {
++		pr_err("%.*s open err %ld\n", AuLNPair(name), PTR_ERR(file));
++		goto out_dput;
++	}
++
++	err = vfsub_unlink(dir, &file->f_path, /*force*/0);
++	if (unlikely(err)) {
++		pr_err("%.*s unlink err %d\n", AuLNPair(name), err);
++		goto out_fput;
++	}
++
++	if (copy_src) {
++		/* no one can touch copy_src xino */
++		err = au_copy_file(file, copy_src,
++				   i_size_read(copy_src->f_dentry->d_inode));
++		if (unlikely(err)) {
++			pr_err("%.*s copy err %d\n", AuLNPair(name), err);
++			goto out_fput;
++		}
++	}
++	goto out_dput; /* success */
++
++out_fput:
++	fput(file);
++	file = ERR_PTR(err);
++out_dput:
++	dput(path.dentry);
++out:
++	return file;
++}
++
++struct au_xino_lock_dir {
++	struct au_hinode *hdir;
++	struct dentry *parent;
++	struct mutex *mtx;
++};
++
++static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
++			     struct au_xino_lock_dir *ldir)
++{
++	aufs_bindex_t brid, bindex;
++
++	ldir->hdir = NULL;
++	bindex = -1;
++	brid = au_xino_brid(sb);
++	if (brid >= 0)
++		bindex = au_br_index(sb, brid);
++	if (bindex >= 0) {
++		ldir->hdir = au_hi(sb->s_root->d_inode, bindex);
++		au_hn_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT);
++	} else {
++		ldir->parent = dget_parent(xino->f_dentry);
++		ldir->mtx = &ldir->parent->d_inode->i_mutex;
++		mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT);
++	}
++}
++
++static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
++{
++	if (ldir->hdir)
++		au_hn_imtx_unlock(ldir->hdir);
++	else {
++		mutex_unlock(ldir->mtx);
++		dput(ldir->parent);
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* trucate xino files asynchronously */
++
++int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
++{
++	int err;
++	aufs_bindex_t bi, bend;
++	struct au_branch *br;
++	struct file *new_xino, *file;
++	struct super_block *h_sb;
++	struct au_xino_lock_dir ldir;
++
++	err = -EINVAL;
++	bend = au_sbend(sb);
++	if (unlikely(bindex < 0 || bend < bindex))
++		goto out;
++	br = au_sbr(sb, bindex);
++	file = br->br_xino.xi_file;
++	if (!file)
++		goto out;
++
++	au_xino_lock_dir(sb, file, &ldir);
++	/* mnt_want_write() is unnecessary here */
++	new_xino = au_xino_create2(file, file);
++	au_xino_unlock_dir(&ldir);
++	err = PTR_ERR(new_xino);
++	if (IS_ERR(new_xino))
++		goto out;
++	err = 0;
++	fput(file);
++	br->br_xino.xi_file = new_xino;
++
++	h_sb = br->br_mnt->mnt_sb;
++	for (bi = 0; bi <= bend; bi++) {
++		if (unlikely(bi == bindex))
++			continue;
++		br = au_sbr(sb, bi);
++		if (br->br_mnt->mnt_sb != h_sb)
++			continue;
++
++		fput(br->br_xino.xi_file);
++		br->br_xino.xi_file = new_xino;
++		get_file(new_xino);
++	}
++
++out:
++	return err;
++}
++
++struct xino_do_trunc_args {
++	struct super_block *sb;
++	struct au_branch *br;
++};
++
++static void xino_do_trunc(void *_args)
++{
++	struct xino_do_trunc_args *args = _args;
++	struct super_block *sb;
++	struct au_branch *br;
++	struct inode *dir;
++	int err;
++	aufs_bindex_t bindex;
++
++	err = 0;
++	sb = args->sb;
++	dir = sb->s_root->d_inode;
++	br = args->br;
++
++	si_noflush_write_lock(sb);
++	ii_read_lock_parent(dir);
++	bindex = au_br_index(sb, br->br_id);
++	err = au_xino_trunc(sb, bindex);
++	if (!err
++	    && br->br_xino.xi_file->f_dentry->d_inode->i_blocks
++	    >= br->br_xino_upper)
++		br->br_xino_upper += AUFS_XINO_TRUNC_STEP;
++
++	ii_read_unlock(dir);
++	if (unlikely(err))
++		pr_warning("err b%d, (%d)\n", bindex, err);
++	atomic_dec(&br->br_xino_running);
++	atomic_dec(&br->br_count);
++	si_write_unlock(sb);
++	au_nwt_done(&au_sbi(sb)->si_nowait);
++	kfree(args);
++}
++
++static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
++{
++	struct xino_do_trunc_args *args;
++	int wkq_err;
++
++	if (br->br_xino.xi_file->f_dentry->d_inode->i_blocks
++	    < br->br_xino_upper)
++		return;
++
++	if (atomic_inc_return(&br->br_xino_running) > 1)
++		goto out;
++
++	/* lock and kfree() will be called in trunc_xino() */
++	args = kmalloc(sizeof(*args), GFP_NOFS);
++	if (unlikely(!args)) {
++		AuErr1("no memory\n");
++		goto out_args;
++	}
++
++	atomic_inc(&br->br_count);
++	args->sb = sb;
++	args->br = br;
++	wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
++	if (!wkq_err)
++		return; /* success */
++
++	pr_err("wkq %d\n", wkq_err);
++	atomic_dec(&br->br_count);
++
++out_args:
++	kfree(args);
++out:
++	atomic_dec(&br->br_xino_running);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_xino_do_write(au_writef_t write, struct file *file,
++			    ino_t h_ino, ino_t ino)
++{
++	loff_t pos;
++	ssize_t sz;
++
++	pos = h_ino;
++	if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
++		AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
++		return -EFBIG;
++	}
++	pos *= sizeof(ino);
++	sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
++	if (sz == sizeof(ino))
++		return 0; /* success */
++
++	AuIOErr("write failed (%zd)\n", sz);
++	return -EIO;
++}
++
++/*
++ * write @ino to the xinofile for the specified branch{@sb, @bindex}
++ * at the position of @h_ino.
++ * even if @ino is zero, it is written to the xinofile and means no entry.
++ * if the size of the xino file on a specific filesystem exceeds the watermark,
++ * try truncating it.
++ */
++int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++		  ino_t ino)
++{
++	int err;
++	unsigned int mnt_flags;
++	struct au_branch *br;
++
++	BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
++		     || ((loff_t)-1) > 0);
++	SiMustAnyLock(sb);
++
++	mnt_flags = au_mntflags(sb);
++	if (!au_opt_test(mnt_flags, XINO))
++		return 0;
++
++	br = au_sbr(sb, bindex);
++	err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
++			       h_ino, ino);
++	if (!err) {
++		if (au_opt_test(mnt_flags, TRUNC_XINO)
++		    && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
++			xino_try_trunc(sb, br);
++		return 0; /* success */
++	}
++
++	AuIOErr("write failed (%d)\n", err);
++	return -EIO;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* aufs inode number bitmap */
++
++static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
++static ino_t xib_calc_ino(unsigned long pindex, int bit)
++{
++	ino_t ino;
++
++	AuDebugOn(bit < 0 || page_bits <= bit);
++	ino = AUFS_FIRST_INO + pindex * page_bits + bit;
++	return ino;
++}
++
++static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
++{
++	AuDebugOn(ino < AUFS_FIRST_INO);
++	ino -= AUFS_FIRST_INO;
++	*pindex = ino / page_bits;
++	*bit = ino % page_bits;
++}
++
++static int xib_pindex(struct super_block *sb, unsigned long pindex)
++{
++	int err;
++	loff_t pos;
++	ssize_t sz;
++	struct au_sbinfo *sbinfo;
++	struct file *xib;
++	unsigned long *p;
++
++	sbinfo = au_sbi(sb);
++	MtxMustLock(&sbinfo->si_xib_mtx);
++	AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
++		  || !au_opt_test(sbinfo->si_mntflags, XINO));
++
++	if (pindex == sbinfo->si_xib_last_pindex)
++		return 0;
++
++	xib = sbinfo->si_xib;
++	p = sbinfo->si_xib_buf;
++	pos = sbinfo->si_xib_last_pindex;
++	pos *= PAGE_SIZE;
++	sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
++	if (unlikely(sz != PAGE_SIZE))
++		goto out;
++
++	pos = pindex;
++	pos *= PAGE_SIZE;
++	if (i_size_read(xib->f_dentry->d_inode) >= pos + PAGE_SIZE)
++		sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
++	else {
++		memset(p, 0, PAGE_SIZE);
++		sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
++	}
++	if (sz == PAGE_SIZE) {
++		sbinfo->si_xib_last_pindex = pindex;
++		return 0; /* success */
++	}
++
++out:
++	AuIOErr1("write failed (%zd)\n", sz);
++	err = sz;
++	if (sz >= 0)
++		err = -EIO;
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void au_xib_clear_bit(struct inode *inode)
++{
++	int err, bit;
++	unsigned long pindex;
++	struct super_block *sb;
++	struct au_sbinfo *sbinfo;
++
++	AuDebugOn(inode->i_nlink);
++
++	sb = inode->i_sb;
++	xib_calc_bit(inode->i_ino, &pindex, &bit);
++	AuDebugOn(page_bits <= bit);
++	sbinfo = au_sbi(sb);
++	mutex_lock(&sbinfo->si_xib_mtx);
++	err = xib_pindex(sb, pindex);
++	if (!err) {
++		clear_bit(bit, sbinfo->si_xib_buf);
++		sbinfo->si_xib_next_bit = bit;
++	}
++	mutex_unlock(&sbinfo->si_xib_mtx);
++}
++
++/* for s_op->delete_inode() */
++void au_xino_delete_inode(struct inode *inode, const int unlinked)
++{
++	int err;
++	unsigned int mnt_flags;
++	aufs_bindex_t bindex, bend, bi;
++	unsigned char try_trunc;
++	struct au_iinfo *iinfo;
++	struct super_block *sb;
++	struct au_hinode *hi;
++	struct inode *h_inode;
++	struct au_branch *br;
++	au_writef_t xwrite;
++
++	sb = inode->i_sb;
++	mnt_flags = au_mntflags(sb);
++	if (!au_opt_test(mnt_flags, XINO)
++	    || inode->i_ino == AUFS_ROOT_INO)
++		return;
++
++	if (unlinked) {
++		au_xigen_inc(inode);
++		au_xib_clear_bit(inode);
++	}
++
++	iinfo = au_ii(inode);
++	if (!iinfo)
++		return;
++
++	bindex = iinfo->ii_bstart;
++	if (bindex < 0)
++		return;
++
++	xwrite = au_sbi(sb)->si_xwrite;
++	try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
++	hi = iinfo->ii_hinode + bindex;
++	bend = iinfo->ii_bend;
++	for (; bindex <= bend; bindex++, hi++) {
++		h_inode = hi->hi_inode;
++		if (!h_inode
++		    || (!unlinked && h_inode->i_nlink))
++			continue;
++
++		/* inode may not be revalidated */
++		bi = au_br_index(sb, hi->hi_id);
++		if (bi < 0)
++			continue;
++
++		br = au_sbr(sb, bi);
++		err = au_xino_do_write(xwrite, br->br_xino.xi_file,
++				       h_inode->i_ino, /*ino*/0);
++		if (!err && try_trunc
++		    && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
++			xino_try_trunc(sb, br);
++	}
++}
++
++/* get an unused inode number from bitmap */
++ino_t au_xino_new_ino(struct super_block *sb)
++{
++	ino_t ino;
++	unsigned long *p, pindex, ul, pend;
++	struct au_sbinfo *sbinfo;
++	struct file *file;
++	int free_bit, err;
++
++	if (!au_opt_test(au_mntflags(sb), XINO))
++		return iunique(sb, AUFS_FIRST_INO);
++
++	sbinfo = au_sbi(sb);
++	mutex_lock(&sbinfo->si_xib_mtx);
++	p = sbinfo->si_xib_buf;
++	free_bit = sbinfo->si_xib_next_bit;
++	if (free_bit < page_bits && !test_bit(free_bit, p))
++		goto out; /* success */
++	free_bit = find_first_zero_bit(p, page_bits);
++	if (free_bit < page_bits)
++		goto out; /* success */
++
++	pindex = sbinfo->si_xib_last_pindex;
++	for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
++		err = xib_pindex(sb, ul);
++		if (unlikely(err))
++			goto out_err;
++		free_bit = find_first_zero_bit(p, page_bits);
++		if (free_bit < page_bits)
++			goto out; /* success */
++	}
++
++	file = sbinfo->si_xib;
++	pend = i_size_read(file->f_dentry->d_inode) / PAGE_SIZE;
++	for (ul = pindex + 1; ul <= pend; ul++) {
++		err = xib_pindex(sb, ul);
++		if (unlikely(err))
++			goto out_err;
++		free_bit = find_first_zero_bit(p, page_bits);
++		if (free_bit < page_bits)
++			goto out; /* success */
++	}
++	BUG();
++
++out:
++	set_bit(free_bit, p);
++	sbinfo->si_xib_next_bit = free_bit + 1;
++	pindex = sbinfo->si_xib_last_pindex;
++	mutex_unlock(&sbinfo->si_xib_mtx);
++	ino = xib_calc_ino(pindex, free_bit);
++	AuDbg("i%lu\n", (unsigned long)ino);
++	return ino;
++out_err:
++	mutex_unlock(&sbinfo->si_xib_mtx);
++	AuDbg("i0\n");
++	return 0;
++}
++
++/*
++ * read @ino from xinofile for the specified branch{@sb, @bindex}
++ * at the position of @h_ino.
++ * if @ino does not exist and @do_new is true, get new one.
++ */
++int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++		 ino_t *ino)
++{
++	int err;
++	ssize_t sz;
++	loff_t pos;
++	struct file *file;
++	struct au_sbinfo *sbinfo;
++
++	*ino = 0;
++	if (!au_opt_test(au_mntflags(sb), XINO))
++		return 0; /* no xino */
++
++	err = 0;
++	sbinfo = au_sbi(sb);
++	pos = h_ino;
++	if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
++		AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
++		return -EFBIG;
++	}
++	pos *= sizeof(*ino);
++
++	file = au_sbr(sb, bindex)->br_xino.xi_file;
++	if (i_size_read(file->f_dentry->d_inode) < pos + sizeof(*ino))
++		return 0; /* no ino */
++
++	sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
++	if (sz == sizeof(*ino))
++		return 0; /* success */
++
++	err = sz;
++	if (unlikely(sz >= 0)) {
++		err = -EIO;
++		AuIOErr("xino read error (%zd)\n", sz);
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* create and set a new xino file */
++
++struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
++{
++	struct file *file;
++	struct dentry *h_parent, *d;
++	struct inode *h_dir;
++	int err;
++
++	/*
++	 * at mount-time, and the xino file is the default path,
++	 * hnotify is disabled so we have no notify events to ignore.
++	 * when a user specified the xino, we cannot get au_hdir to be ignored.
++	 */
++	file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
++			       /* | __FMODE_NONOTIFY */,
++			       S_IRUGO | S_IWUGO);
++	if (IS_ERR(file)) {
++		if (!silent)
++			pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
++		return file;
++	}
++
++	/* keep file count */
++	h_parent = dget_parent(file->f_dentry);
++	h_dir = h_parent->d_inode;
++	mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
++	/* mnt_want_write() is unnecessary here */
++	err = vfsub_unlink(h_dir, &file->f_path, /*force*/0);
++	mutex_unlock(&h_dir->i_mutex);
++	dput(h_parent);
++	if (unlikely(err)) {
++		if (!silent)
++			pr_err("unlink %s(%d)\n", fname, err);
++		goto out;
++	}
++
++	err = -EINVAL;
++	d = file->f_dentry;
++	if (unlikely(sb == d->d_sb)) {
++		if (!silent)
++			pr_err("%s must be outside\n", fname);
++		goto out;
++	}
++	if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
++		if (!silent)
++			pr_err("xino doesn't support %s(%s)\n",
++			       fname, au_sbtype(d->d_sb));
++		goto out;
++	}
++	return file; /* success */
++
++out:
++	fput(file);
++	file = ERR_PTR(err);
++	return file;
++}
++
++/*
++ * find another branch who is on the same filesystem of the specified
++ * branch{@btgt}. search until @bend.
++ */
++static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
++			aufs_bindex_t bend)
++{
++	aufs_bindex_t bindex;
++	struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
++
++	for (bindex = 0; bindex < btgt; bindex++)
++		if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
++			return bindex;
++	for (bindex++; bindex <= bend; bindex++)
++		if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
++			return bindex;
++	return -1;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * initialize the xinofile for the specified branch @br
++ * at the place/path where @base_file indicates.
++ * test whether another branch is on the same filesystem or not,
++ * if @do_test is true.
++ */
++int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
++	       struct file *base_file, int do_test)
++{
++	int err;
++	ino_t ino;
++	aufs_bindex_t bend, bindex;
++	struct au_branch *shared_br, *b;
++	struct file *file;
++	struct super_block *tgt_sb;
++
++	shared_br = NULL;
++	bend = au_sbend(sb);
++	if (do_test) {
++		tgt_sb = br->br_mnt->mnt_sb;
++		for (bindex = 0; bindex <= bend; bindex++) {
++			b = au_sbr(sb, bindex);
++			if (tgt_sb == b->br_mnt->mnt_sb) {
++				shared_br = b;
++				break;
++			}
++		}
++	}
++
++	if (!shared_br || !shared_br->br_xino.xi_file) {
++		struct au_xino_lock_dir ldir;
++
++		au_xino_lock_dir(sb, base_file, &ldir);
++		/* mnt_want_write() is unnecessary here */
++		file = au_xino_create2(base_file, NULL);
++		au_xino_unlock_dir(&ldir);
++		err = PTR_ERR(file);
++		if (IS_ERR(file))
++			goto out;
++		br->br_xino.xi_file = file;
++	} else {
++		br->br_xino.xi_file = shared_br->br_xino.xi_file;
++		get_file(br->br_xino.xi_file);
++	}
++
++	ino = AUFS_ROOT_INO;
++	err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
++			       h_ino, ino);
++	if (unlikely(err)) {
++		fput(br->br_xino.xi_file);
++		br->br_xino.xi_file = NULL;
++	}
++
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* trucate a xino bitmap file */
++
++/* todo: slow */
++static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
++{
++	int err, bit;
++	ssize_t sz;
++	unsigned long pindex;
++	loff_t pos, pend;
++	struct au_sbinfo *sbinfo;
++	au_readf_t func;
++	ino_t *ino;
++	unsigned long *p;
++
++	err = 0;
++	sbinfo = au_sbi(sb);
++	MtxMustLock(&sbinfo->si_xib_mtx);
++	p = sbinfo->si_xib_buf;
++	func = sbinfo->si_xread;
++	pend = i_size_read(file->f_dentry->d_inode);
++	pos = 0;
++	while (pos < pend) {
++		sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
++		err = sz;
++		if (unlikely(sz <= 0))
++			goto out;
++
++		err = 0;
++		for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
++			if (unlikely(*ino < AUFS_FIRST_INO))
++				continue;
++
++			xib_calc_bit(*ino, &pindex, &bit);
++			AuDebugOn(page_bits <= bit);
++			err = xib_pindex(sb, pindex);
++			if (!err)
++				set_bit(bit, p);
++			else
++				goto out;
++		}
++	}
++
++out:
++	return err;
++}
++
++static int xib_restore(struct super_block *sb)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	void *page;
++
++	err = -ENOMEM;
++	page = (void *)__get_free_page(GFP_NOFS);
++	if (unlikely(!page))
++		goto out;
++
++	err = 0;
++	bend = au_sbend(sb);
++	for (bindex = 0; !err && bindex <= bend; bindex++)
++		if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
++			err = do_xib_restore
++				(sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
++		else
++			AuDbg("b%d\n", bindex);
++	free_page((unsigned long)page);
++
++out:
++	return err;
++}
++
++int au_xib_trunc(struct super_block *sb)
++{
++	int err;
++	ssize_t sz;
++	loff_t pos;
++	struct au_xino_lock_dir ldir;
++	struct au_sbinfo *sbinfo;
++	unsigned long *p;
++	struct file *file;
++
++	SiMustWriteLock(sb);
++
++	err = 0;
++	sbinfo = au_sbi(sb);
++	if (!au_opt_test(sbinfo->si_mntflags, XINO))
++		goto out;
++
++	file = sbinfo->si_xib;
++	if (i_size_read(file->f_dentry->d_inode) <= PAGE_SIZE)
++		goto out;
++
++	au_xino_lock_dir(sb, file, &ldir);
++	/* mnt_want_write() is unnecessary here */
++	file = au_xino_create2(sbinfo->si_xib, NULL);
++	au_xino_unlock_dir(&ldir);
++	err = PTR_ERR(file);
++	if (IS_ERR(file))
++		goto out;
++	fput(sbinfo->si_xib);
++	sbinfo->si_xib = file;
++
++	p = sbinfo->si_xib_buf;
++	memset(p, 0, PAGE_SIZE);
++	pos = 0;
++	sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
++	if (unlikely(sz != PAGE_SIZE)) {
++		err = sz;
++		AuIOErr("err %d\n", err);
++		if (sz >= 0)
++			err = -EIO;
++		goto out;
++	}
++
++	mutex_lock(&sbinfo->si_xib_mtx);
++	/* mnt_want_write() is unnecessary here */
++	err = xib_restore(sb);
++	mutex_unlock(&sbinfo->si_xib_mtx);
++
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * xino mount option handlers
++ */
++static au_readf_t find_readf(struct file *h_file)
++{
++	const struct file_operations *fop = h_file->f_op;
++
++	if (fop) {
++		if (fop->read)
++			return fop->read;
++		if (fop->aio_read)
++			return do_sync_read;
++	}
++	return ERR_PTR(-ENOSYS);
++}
++
++static au_writef_t find_writef(struct file *h_file)
++{
++	const struct file_operations *fop = h_file->f_op;
++
++	if (fop) {
++		if (fop->write)
++			return fop->write;
++		if (fop->aio_write)
++			return do_sync_write;
++	}
++	return ERR_PTR(-ENOSYS);
++}
++
++/* xino bitmap */
++static void xino_clear_xib(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++
++	SiMustWriteLock(sb);
++
++	sbinfo = au_sbi(sb);
++	sbinfo->si_xread = NULL;
++	sbinfo->si_xwrite = NULL;
++	if (sbinfo->si_xib)
++		fput(sbinfo->si_xib);
++	sbinfo->si_xib = NULL;
++	free_page((unsigned long)sbinfo->si_xib_buf);
++	sbinfo->si_xib_buf = NULL;
++}
++
++static int au_xino_set_xib(struct super_block *sb, struct file *base)
++{
++	int err;
++	loff_t pos;
++	struct au_sbinfo *sbinfo;
++	struct file *file;
++
++	SiMustWriteLock(sb);
++
++	sbinfo = au_sbi(sb);
++	file = au_xino_create2(base, sbinfo->si_xib);
++	err = PTR_ERR(file);
++	if (IS_ERR(file))
++		goto out;
++	if (sbinfo->si_xib)
++		fput(sbinfo->si_xib);
++	sbinfo->si_xib = file;
++	sbinfo->si_xread = find_readf(file);
++	sbinfo->si_xwrite = find_writef(file);
++
++	err = -ENOMEM;
++	if (!sbinfo->si_xib_buf)
++		sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
++	if (unlikely(!sbinfo->si_xib_buf))
++		goto out_unset;
++
++	sbinfo->si_xib_last_pindex = 0;
++	sbinfo->si_xib_next_bit = 0;
++	if (i_size_read(file->f_dentry->d_inode) < PAGE_SIZE) {
++		pos = 0;
++		err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
++				  PAGE_SIZE, &pos);
++		if (unlikely(err != PAGE_SIZE))
++			goto out_free;
++	}
++	err = 0;
++	goto out; /* success */
++
++out_free:
++	free_page((unsigned long)sbinfo->si_xib_buf);
++	sbinfo->si_xib_buf = NULL;
++	if (err >= 0)
++		err = -EIO;
++out_unset:
++	fput(sbinfo->si_xib);
++	sbinfo->si_xib = NULL;
++	sbinfo->si_xread = NULL;
++	sbinfo->si_xwrite = NULL;
++out:
++	return err;
++}
++
++/* xino for each branch */
++static void xino_clear_br(struct super_block *sb)
++{
++	aufs_bindex_t bindex, bend;
++	struct au_branch *br;
++
++	bend = au_sbend(sb);
++	for (bindex = 0; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		if (!br || !br->br_xino.xi_file)
++			continue;
++
++		fput(br->br_xino.xi_file);
++		br->br_xino.xi_file = NULL;
++	}
++}
++
++static int au_xino_set_br(struct super_block *sb, struct file *base)
++{
++	int err;
++	ino_t ino;
++	aufs_bindex_t bindex, bend, bshared;
++	struct {
++		struct file *old, *new;
++	} *fpair, *p;
++	struct au_branch *br;
++	struct inode *inode;
++	au_writef_t writef;
++
++	SiMustWriteLock(sb);
++
++	err = -ENOMEM;
++	bend = au_sbend(sb);
++	fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
++	if (unlikely(!fpair))
++		goto out;
++
++	inode = sb->s_root->d_inode;
++	ino = AUFS_ROOT_INO;
++	writef = au_sbi(sb)->si_xwrite;
++	for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
++		br = au_sbr(sb, bindex);
++		bshared = is_sb_shared(sb, bindex, bindex - 1);
++		if (bshared >= 0) {
++			/* shared xino */
++			*p = fpair[bshared];
++			get_file(p->new);
++		}
++
++		if (!p->new) {
++			/* new xino */
++			p->old = br->br_xino.xi_file;
++			p->new = au_xino_create2(base, br->br_xino.xi_file);
++			err = PTR_ERR(p->new);
++			if (IS_ERR(p->new)) {
++				p->new = NULL;
++				goto out_pair;
++			}
++		}
++
++		err = au_xino_do_write(writef, p->new,
++				       au_h_iptr(inode, bindex)->i_ino, ino);
++		if (unlikely(err))
++			goto out_pair;
++	}
++
++	for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
++		br = au_sbr(sb, bindex);
++		if (br->br_xino.xi_file)
++			fput(br->br_xino.xi_file);
++		get_file(p->new);
++		br->br_xino.xi_file = p->new;
++	}
++
++out_pair:
++	for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
++		if (p->new)
++			fput(p->new);
++		else
++			break;
++	kfree(fpair);
++out:
++	return err;
++}
++
++void au_xino_clr(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++
++	au_xigen_clr(sb);
++	xino_clear_xib(sb);
++	xino_clear_br(sb);
++	sbinfo = au_sbi(sb);
++	/* lvalue, do not call au_mntflags() */
++	au_opt_clr(sbinfo->si_mntflags, XINO);
++}
++
++int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
++{
++	int err, skip;
++	struct dentry *parent, *cur_parent;
++	struct qstr *dname, *cur_name;
++	struct file *cur_xino;
++	struct inode *dir;
++	struct au_sbinfo *sbinfo;
++
++	SiMustWriteLock(sb);
++
++	err = 0;
++	sbinfo = au_sbi(sb);
++	parent = dget_parent(xino->file->f_dentry);
++	if (remount) {
++		skip = 0;
++		dname = &xino->file->f_dentry->d_name;
++		cur_xino = sbinfo->si_xib;
++		if (cur_xino) {
++			cur_parent = dget_parent(cur_xino->f_dentry);
++			cur_name = &cur_xino->f_dentry->d_name;
++			skip = (cur_parent == parent
++				&& dname->len == cur_name->len
++				&& !memcmp(dname->name, cur_name->name,
++					   dname->len));
++			dput(cur_parent);
++		}
++		if (skip)
++			goto out;
++	}
++
++	au_opt_set(sbinfo->si_mntflags, XINO);
++	dir = parent->d_inode;
++	mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT);
++	/* mnt_want_write() is unnecessary here */
++	err = au_xino_set_xib(sb, xino->file);
++	if (!err)
++		err = au_xigen_set(sb, xino->file);
++	if (!err)
++		err = au_xino_set_br(sb, xino->file);
++	mutex_unlock(&dir->i_mutex);
++	if (!err)
++		goto out; /* success */
++
++	/* reset all */
++	AuIOErr("failed creating xino(%d).\n", err);
++
++out:
++	dput(parent);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * create a xinofile at the default place/path.
++ */
++struct file *au_xino_def(struct super_block *sb)
++{
++	struct file *file;
++	char *page, *p;
++	struct au_branch *br;
++	struct super_block *h_sb;
++	struct path path;
++	aufs_bindex_t bend, bindex, bwr;
++
++	br = NULL;
++	bend = au_sbend(sb);
++	bwr = -1;
++	for (bindex = 0; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		if (au_br_writable(br->br_perm)
++		    && !au_test_fs_bad_xino(br->br_mnt->mnt_sb)) {
++			bwr = bindex;
++			break;
++		}
++	}
++
++	if (bwr >= 0) {
++		file = ERR_PTR(-ENOMEM);
++		page = __getname_gfp(GFP_NOFS);
++		if (unlikely(!page))
++			goto out;
++		path.mnt = br->br_mnt;
++		path.dentry = au_h_dptr(sb->s_root, bwr);
++		p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
++		file = (void *)p;
++		if (!IS_ERR(p)) {
++			strcat(p, "/" AUFS_XINO_FNAME);
++			AuDbg("%s\n", p);
++			file = au_xino_create(sb, p, /*silent*/0);
++			if (!IS_ERR(file))
++				au_xino_brid_set(sb, br->br_id);
++		}
++		__putname(page);
++	} else {
++		file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
++		if (IS_ERR(file))
++			goto out;
++		h_sb = file->f_dentry->d_sb;
++		if (unlikely(au_test_fs_bad_xino(h_sb))) {
++			pr_err("xino doesn't support %s(%s)\n",
++			       AUFS_XINO_DEFPATH, au_sbtype(h_sb));
++			fput(file);
++			file = ERR_PTR(-EINVAL);
++		}
++		if (!IS_ERR(file))
++			au_xino_brid_set(sb, -1);
++	}
++
++out:
++	return file;
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_xino_path(struct seq_file *seq, struct file *file)
++{
++	int err;
++
++	err = au_seq_path(seq, &file->f_path);
++	if (unlikely(err < 0))
++		goto out;
++
++	err = 0;
++#define Deleted "\\040(deleted)"
++	seq->count -= sizeof(Deleted) - 1;
++	AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
++			 sizeof(Deleted) - 1));
++#undef Deleted
++
++out:
++	return err;
++}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/file_table.c linux-3.2.0-gentoo-r1/fs/file_table.c
+--- linux-3.2.0-gentoo-r1.orig//fs/file_table.c	2012-01-17 11:56:05.850634078 +0100
++++ linux-3.2.0-gentoo-r1/fs/file_table.c	2012-01-17 12:09:55.948393073 +0100
+@@ -443,6 +443,8 @@
+ 	}
+ }
+ 
++EXPORT_SYMBOL(file_sb_list_del);
++
+ #ifdef CONFIG_SMP
+ 
+ /*
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/inode.c linux-3.2.0-gentoo-r1/fs/inode.c
+--- linux-3.2.0-gentoo-r1.orig//fs/inode.c	2012-01-17 11:56:10.684010972 +0100
++++ linux-3.2.0-gentoo-r1/fs/inode.c	2012-01-17 12:09:55.969226594 +0100
+@@ -65,6 +65,7 @@
+ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
+ 
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
++EXPORT_SYMBOL(inode_sb_list_lock);
+ 
+ /*
+  * Empty aops. Can be used for the cases where the user does not
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/Kconfig linux-3.2.0-gentoo-r1/fs/Kconfig
+--- linux-3.2.0-gentoo-r1.orig//fs/Kconfig	2012-01-17 11:56:10.732622521 +0100
++++ linux-3.2.0-gentoo-r1/fs/Kconfig	2012-01-17 12:09:24.806908708 +0100
+@@ -215,6 +215,7 @@
+ source "fs/sysv/Kconfig"
+ source "fs/ufs/Kconfig"
+ source "fs/exofs/Kconfig"
++source "fs/aufs/Kconfig"
+ 
+ endif # MISC_FILESYSTEMS
+ 
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/Makefile linux-3.2.0-gentoo-r1/fs/Makefile
+--- linux-3.2.0-gentoo-r1.orig//fs/Makefile	2012-01-17 11:56:14.626176130 +0100
++++ linux-3.2.0-gentoo-r1/fs/Makefile	2012-01-17 12:09:24.924965327 +0100
+@@ -123,3 +123,4 @@
+ obj-y				+= exofs/ # Multiple modules
+ obj-$(CONFIG_CEPH_FS)		+= ceph/
+ obj-$(CONFIG_PSTORE)		+= pstore/
++obj-$(CONFIG_AUFS_FS)           += aufs/
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/namei.c linux-3.2.0-gentoo-r1/fs/namei.c
+--- linux-3.2.0-gentoo-r1.orig//fs/namei.c	2012-01-17 11:56:13.778946273 +0100
++++ linux-3.2.0-gentoo-r1/fs/namei.c	2012-01-17 12:09:55.971541430 +0100
+@@ -1753,10 +1753,11 @@
+  * needs parent already locked. Doesn't follow mounts.
+  * SMP-safe.
+  */
+-static struct dentry *lookup_hash(struct nameidata *nd)
++struct dentry *lookup_hash(struct nameidata *nd)
+ {
+ 	return __lookup_hash(&nd->last, nd->path.dentry, nd);
+ }
++EXPORT_SYMBOL(lookup_hash);
+ 
+ /**
+  * lookup_one_len - filesystem helper to lookup single pathname component
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/namespace.c linux-3.2.0-gentoo-r1/fs/namespace.c
+--- linux-3.2.0-gentoo-r1.orig//fs/namespace.c	2012-01-17 11:56:14.563675567 +0100
++++ linux-3.2.0-gentoo-r1/fs/namespace.c	2012-01-17 12:09:55.973856265 +0100
+@@ -1506,6 +1506,7 @@
+ 	}
+ 	return 0;
+ }
++EXPORT_SYMBOL(iterate_mounts);
+ 
+ static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
+ {
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/notify/group.c linux-3.2.0-gentoo-r1/fs/notify/group.c
+--- linux-3.2.0-gentoo-r1.orig//fs/notify/group.c	2012-01-17 11:56:07.028885437 +0100
++++ linux-3.2.0-gentoo-r1/fs/notify/group.c	2012-01-17 12:09:56.087283212 +0100
+@@ -22,6 +22,7 @@
+ #include <linux/srcu.h>
+ #include <linux/rculist.h>
+ #include <linux/wait.h>
++#include <linux/module.h>
+ 
+ #include <linux/fsnotify_backend.h>
+ #include "fsnotify.h"
+@@ -70,6 +71,7 @@
+ 	if (atomic_dec_and_test(&group->refcnt))
+ 		fsnotify_destroy_group(group);
+ }
++EXPORT_SYMBOL(fsnotify_put_group);
+ 
+ /*
+  * Create a new fsnotify_group and hold a reference for the group returned.
+@@ -102,3 +104,4 @@
+ 
+ 	return group;
+ }
++EXPORT_SYMBOL(fsnotify_alloc_group);
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/notify/mark.c linux-3.2.0-gentoo-r1/fs/notify/mark.c
+--- linux-3.2.0-gentoo-r1.orig//fs/notify/mark.c	2012-01-17 11:56:07.014996423 +0100
++++ linux-3.2.0-gentoo-r1/fs/notify/mark.c	2012-01-17 12:09:56.128950255 +0100
+@@ -112,6 +112,7 @@
+ 	if (atomic_dec_and_test(&mark->refcnt))
+ 		mark->free_mark(mark);
+ }
++EXPORT_SYMBOL(fsnotify_put_mark);
+ 
+ /*
+  * Any time a mark is getting freed we end up here.
+@@ -189,6 +190,7 @@
+ 	if (unlikely(atomic_dec_and_test(&group->num_marks)))
+ 		fsnotify_final_destroy_group(group);
+ }
++EXPORT_SYMBOL(fsnotify_destroy_mark);
+ 
+ void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
+ {
+@@ -276,6 +278,7 @@
+ 
+ 	return ret;
+ }
++EXPORT_SYMBOL(fsnotify_add_mark);
+ 
+ /*
+  * clear any marks in a group in which mark->flags & flags is true
+@@ -331,6 +334,7 @@
+ 	atomic_set(&mark->refcnt, 1);
+ 	mark->free_mark = free_mark;
+ }
++EXPORT_SYMBOL(fsnotify_init_mark);
+ 
+ static int fsnotify_mark_destroy(void *ignored)
+ {
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/open.c linux-3.2.0-gentoo-r1/fs/open.c
+--- linux-3.2.0-gentoo-r1.orig//fs/open.c	2012-01-17 11:56:14.554416224 +0100
++++ linux-3.2.0-gentoo-r1/fs/open.c	2012-01-17 12:09:56.149783776 +0100
+@@ -60,6 +60,7 @@
+ 	mutex_unlock(&dentry->d_inode->i_mutex);
+ 	return ret;
+ }
++EXPORT_SYMBOL(do_truncate);
+ 
+ static long do_sys_truncate(const char __user *pathname, loff_t length)
+ {
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/proc/nommu.c linux-3.2.0-gentoo-r1/fs/proc/nommu.c
+--- linux-3.2.0-gentoo-r1.orig//fs/proc/nommu.c	2012-01-17 11:56:13.538203362 +0100
++++ linux-3.2.0-gentoo-r1/fs/proc/nommu.c	2012-01-17 12:09:43.920506893 +0100
+@@ -46,6 +46,10 @@
+ 
+ 	if (file) {
+ 		struct inode *inode = region->vm_file->f_path.dentry->d_inode;
++		if (region->vm_prfile) {
++			file = region->vm_prfile;
++			inode = file->f_path.dentry->d_inode;
++		}
+ 		dev = inode->i_sb->s_dev;
+ 		ino = inode->i_ino;
+ 	}
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/proc/task_mmu.c linux-3.2.0-gentoo-r1/fs/proc/task_mmu.c
+--- linux-3.2.0-gentoo-r1.orig//fs/proc/task_mmu.c	2012-01-17 11:56:13.517369840 +0100
++++ linux-3.2.0-gentoo-r1/fs/proc/task_mmu.c	2012-01-17 12:09:43.955229428 +0100
+@@ -222,6 +222,10 @@
+ 
+ 	if (file) {
+ 		struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
++		if (vma->vm_prfile) {
++			file = vma->vm_prfile;
++			inode = file->f_path.dentry->d_inode;
++		}
+ 		dev = inode->i_sb->s_dev;
+ 		ino = inode->i_ino;
+ 		pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
+@@ -1033,6 +1037,8 @@
+ 
+ 	if (file) {
+ 		seq_printf(m, " file=");
++		if (vma->vm_prfile)
++			file = vma->vm_prfile;
+ 		seq_path(m, &file->f_path, "\n\t= ");
+ 	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
+ 		seq_printf(m, " heap");
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/proc/task_nommu.c linux-3.2.0-gentoo-r1/fs/proc/task_nommu.c
+--- linux-3.2.0-gentoo-r1.orig//fs/proc/task_nommu.c	2012-01-17 11:56:13.491906648 +0100
++++ linux-3.2.0-gentoo-r1/fs/proc/task_nommu.c	2012-01-17 12:09:43.957544264 +0100
+@@ -148,6 +148,10 @@
+ 
+ 	if (file) {
+ 		struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
++		if (vma->vm_prfile) {
++			file = vma->vm_prfile;
++			inode = file->f_path.dentry->d_inode;
++		}
+ 		dev = inode->i_sb->s_dev;
+ 		ino = inode->i_ino;
+ 		pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
+diff -uNr linux-3.2.0-gentoo-r1.orig//fs/splice.c linux-3.2.0-gentoo-r1/fs/splice.c
+--- linux-3.2.0-gentoo-r1.orig//fs/splice.c	2012-01-17 11:56:07.149256893 +0100
++++ linux-3.2.0-gentoo-r1/fs/splice.c	2012-01-17 12:09:56.154413448 +0100
+@@ -1085,8 +1085,8 @@
+ /*
+  * Attempt to initiate a splice from pipe to file.
+  */
+-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
+-			   loff_t *ppos, size_t len, unsigned int flags)
++long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
++		    loff_t *ppos, size_t len, unsigned int flags)
+ {
+ 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
+ 				loff_t *, size_t, unsigned int);
+@@ -1109,13 +1109,14 @@
+ 
+ 	return splice_write(pipe, out, ppos, len, flags);
+ }
++EXPORT_SYMBOL(do_splice_from);
+ 
+ /*
+  * Attempt to initiate a splice from a file to a pipe.
+  */
+-static long do_splice_to(struct file *in, loff_t *ppos,
+-			 struct pipe_inode_info *pipe, size_t len,
+-			 unsigned int flags)
++long do_splice_to(struct file *in, loff_t *ppos,
++		  struct pipe_inode_info *pipe, size_t len,
++		  unsigned int flags)
+ {
+ 	ssize_t (*splice_read)(struct file *, loff_t *,
+ 			       struct pipe_inode_info *, size_t, unsigned int);
+@@ -1135,6 +1136,7 @@
+ 
+ 	return splice_read(in, ppos, pipe, len, flags);
+ }
++EXPORT_SYMBOL(do_splice_to);
+ 
+ /**
+  * splice_direct_to_actor - splices data directly between two non-pipes
+diff -uNr linux-3.2.0-gentoo-r1.orig//include/linux/aufs_type.h linux-3.2.0-gentoo-r1/include/linux/aufs_type.h
+--- linux-3.2.0-gentoo-r1.orig//include/linux/aufs_type.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-gentoo-r1/include/linux/aufs_type.h	2012-01-17 12:11:46.699391210 +0100
+@@ -0,0 +1,233 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++#ifndef __AUFS_TYPE_H__
++#define __AUFS_TYPE_H__
++
++#define AUFS_NAME	"aufs"
++
++#ifdef __KERNEL__
++/*
++ * define it before including all other headers.
++ * sched.h may use pr_* macros before defining "current", so define the
++ * no-current version first, and re-define later.
++ */
++#define pr_fmt(fmt)	AUFS_NAME " %s:%d: " fmt, __func__, __LINE__
++#include <linux/sched.h>
++#undef pr_fmt
++#define pr_fmt(fmt)	AUFS_NAME " %s:%d:%s[%d]: " fmt, \
++		__func__, __LINE__, current->comm, current->pid
++#else
++#include <stdint.h>
++#include <sys/types.h>
++#endif /* __KERNEL__ */
++
++#include <linux/limits.h>
++
++#define AUFS_VERSION	"3.2-20120109"
++
++/* todo? move this to linux-2.6.19/include/magic.h */
++#define AUFS_SUPER_MAGIC	('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_BRANCH_MAX_127
++typedef int8_t aufs_bindex_t;
++#define AUFS_BRANCH_MAX 127
++#else
++typedef int16_t aufs_bindex_t;
++#ifdef CONFIG_AUFS_BRANCH_MAX_511
++#define AUFS_BRANCH_MAX 511
++#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
++#define AUFS_BRANCH_MAX 1023
++#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
++#define AUFS_BRANCH_MAX 32767
++#endif
++#endif
++
++#ifdef __KERNEL__
++#ifndef AUFS_BRANCH_MAX
++#error unknown CONFIG_AUFS_BRANCH_MAX value
++#endif
++#endif /* __KERNEL__ */
++
++/* ---------------------------------------------------------------------- */
++
++#define AUFS_FSTYPE		AUFS_NAME
++
++#define AUFS_ROOT_INO		2
++#define AUFS_FIRST_INO		11
++
++#define AUFS_WH_PFX		".wh."
++#define AUFS_WH_PFX_LEN		((int)sizeof(AUFS_WH_PFX) - 1)
++#define AUFS_WH_TMP_LEN		4
++/* a limit for rmdir/rename a dir */
++#define AUFS_MAX_NAMELEN	(NAME_MAX \
++				- AUFS_WH_PFX_LEN * 2	/* doubly whiteouted */\
++				- 1			/* dot */\
++				- AUFS_WH_TMP_LEN)	/* hex */
++#define AUFS_XINO_FNAME		"." AUFS_NAME ".xino"
++#define AUFS_XINO_DEFPATH	"/tmp/" AUFS_XINO_FNAME
++#define AUFS_XINO_TRUNC_INIT	64 /* blocks */
++#define AUFS_XINO_TRUNC_STEP	4  /* blocks */
++#define AUFS_DIRWH_DEF		3
++#define AUFS_RDCACHE_DEF	10 /* seconds */
++#define AUFS_RDCACHE_MAX	3600 /* seconds */
++#define AUFS_RDBLK_DEF		512 /* bytes */
++#define AUFS_RDHASH_DEF		32
++#define AUFS_WKQ_NAME		AUFS_NAME "d"
++#define AUFS_MFS_DEF_SEC	30 /* seconds */
++#define AUFS_MFS_MAX_SEC	3600 /* seconds */
++#define AUFS_PLINK_WARN		100 /* number of plinks */
++
++/* pseudo-link maintenace under /proc */
++#define AUFS_PLINK_MAINT_NAME	"plink_maint"
++#define AUFS_PLINK_MAINT_DIR	"fs/" AUFS_NAME
++#define AUFS_PLINK_MAINT_PATH	AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
++
++#define AUFS_DIROPQ_NAME	AUFS_WH_PFX ".opq" /* whiteouted doubly */
++#define AUFS_WH_DIROPQ		AUFS_WH_PFX AUFS_DIROPQ_NAME
++
++#define AUFS_BASE_NAME		AUFS_WH_PFX AUFS_NAME
++#define AUFS_PLINKDIR_NAME	AUFS_WH_PFX "plnk"
++#define AUFS_ORPHDIR_NAME	AUFS_WH_PFX "orph"
++
++/* doubly whiteouted */
++#define AUFS_WH_BASE		AUFS_WH_PFX AUFS_BASE_NAME
++#define AUFS_WH_PLINKDIR	AUFS_WH_PFX AUFS_PLINKDIR_NAME
++#define AUFS_WH_ORPHDIR		AUFS_WH_PFX AUFS_ORPHDIR_NAME
++
++/* branch permissions and attributes */
++#define AUFS_BRPERM_RW		"rw"
++#define AUFS_BRPERM_RO		"ro"
++#define AUFS_BRPERM_RR		"rr"
++#define AUFS_BRRATTR_WH		"wh"
++#define AUFS_BRWATTR_NLWH	"nolwh"
++
++/* ---------------------------------------------------------------------- */
++
++/* ioctl */
++enum {
++	/* readdir in userspace */
++	AuCtl_RDU,
++	AuCtl_RDU_INO,
++
++	/* pathconf wrapper */
++	AuCtl_WBR_FD,
++
++	/* busy inode */
++	AuCtl_IBUSY
++};
++
++/* borrowed from linux/include/linux/kernel.h */
++#ifndef ALIGN
++#define ALIGN(x, a)		__ALIGN_MASK(x, (typeof(x))(a)-1)
++#define __ALIGN_MASK(x, mask)	(((x)+(mask))&~(mask))
++#endif
++
++/* borrowed from linux/include/linux/compiler-gcc3.h */
++#ifndef __aligned
++#define __aligned(x)			__attribute__((aligned(x)))
++#endif
++
++#ifdef __KERNEL__
++#ifndef __packed
++#define __packed			__attribute__((packed))
++#endif
++#endif
++
++struct au_rdu_cookie {
++	uint64_t	h_pos;
++	int16_t		bindex;
++	uint8_t		flags;
++	uint8_t		pad;
++	uint32_t	generation;
++} __aligned(8);
++
++struct au_rdu_ent {
++	uint64_t	ino;
++	int16_t		bindex;
++	uint8_t		type;
++	uint8_t		nlen;
++	uint8_t		wh;
++	char		name[0];
++} __aligned(8);
++
++static inline int au_rdu_len(int nlen)
++{
++	/* include the terminating NULL */
++	return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
++		     sizeof(uint64_t));
++}
++
++union au_rdu_ent_ul {
++	struct au_rdu_ent __user	*e;
++	uint64_t			ul;
++};
++
++enum {
++	AufsCtlRduV_SZ,
++	AufsCtlRduV_End
++};
++
++struct aufs_rdu {
++	/* input */
++	union {
++		uint64_t	sz;	/* AuCtl_RDU */
++		uint64_t	nent;	/* AuCtl_RDU_INO */
++	};
++	union au_rdu_ent_ul	ent;
++	uint16_t		verify[AufsCtlRduV_End];
++
++	/* input/output */
++	uint32_t		blk;
++
++	/* output */
++	union au_rdu_ent_ul	tail;
++	/* number of entries which were added in a single call */
++	uint64_t		rent;
++	uint8_t			full;
++	uint8_t			shwh;
++
++	struct au_rdu_cookie	cookie;
++} __aligned(8);
++
++/* ---------------------------------------------------------------------- */
++
++struct aufs_wbr_fd {
++	uint32_t	oflags;
++	int16_t		brid;
++} __aligned(8);
++
++/* ---------------------------------------------------------------------- */
++
++struct aufs_ibusy {
++	uint64_t	ino, h_ino;
++	int16_t		bindex;
++} __aligned(8);
++
++/* ---------------------------------------------------------------------- */
++
++#define AuCtlType		'A'
++#define AUFS_CTL_RDU		_IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
++#define AUFS_CTL_RDU_INO	_IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
++#define AUFS_CTL_WBR_FD		_IOW(AuCtlType, AuCtl_WBR_FD, \
++				     struct aufs_wbr_fd)
++#define AUFS_CTL_IBUSY		_IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
++
++#endif /* __AUFS_TYPE_H__ */
+diff -uNr linux-3.2.0-gentoo-r1.orig//include/linux/Kbuild linux-3.2.0-gentoo-r1/include/linux/Kbuild
+--- linux-3.2.0-gentoo-r1.orig//include/linux/Kbuild	2012-01-17 11:58:53.002603489 +0100
++++ linux-3.2.0-gentoo-r1/include/linux/Kbuild	2012-01-17 12:09:24.941169177 +0100
+@@ -65,6 +65,7 @@
+ header-y += atmsap.h
+ header-y += atmsvc.h
+ header-y += audit.h
++header-y += aufs_type.h
+ header-y += auto_fs.h
+ header-y += auto_fs4.h
+ header-y += auxvec.h
+diff -uNr linux-3.2.0-gentoo-r1.orig//include/linux/mm_types.h linux-3.2.0-gentoo-r1/include/linux/mm_types.h
+--- linux-3.2.0-gentoo-r1.orig//include/linux/mm_types.h	2012-01-17 11:58:48.143763404 +0100
++++ linux-3.2.0-gentoo-r1/include/linux/mm_types.h	2012-01-17 12:09:43.966803608 +0100
+@@ -186,6 +186,7 @@
+ 	unsigned long	vm_top;		/* region allocated to here */
+ 	unsigned long	vm_pgoff;	/* the offset in vm_file corresponding to vm_start */
+ 	struct file	*vm_file;	/* the backing file or NULL */
++	struct file	*vm_prfile;	/* the virtual backing file or NULL */
+ 
+ 	int		vm_usage;	/* region usage count (access under nommu_region_sem) */
+ 	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
+@@ -245,6 +246,7 @@
+ 	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
+ 					   units, *not* PAGE_CACHE_SIZE */
+ 	struct file * vm_file;		/* File we map to (can be NULL). */
++	struct file *vm_prfile;		/* shadow of vm_file */
+ 	void * vm_private_data;		/* was vm_pte (shared mem) */
+ 
+ #ifndef CONFIG_MMU
+diff -uNr linux-3.2.0-gentoo-r1.orig//include/linux/namei.h linux-3.2.0-gentoo-r1/include/linux/namei.h
+--- linux-3.2.0-gentoo-r1.orig//include/linux/namei.h	2012-01-17 11:58:55.565126585 +0100
++++ linux-3.2.0-gentoo-r1/include/linux/namei.h	2012-01-17 12:09:34.642645500 +0100
+@@ -85,6 +85,7 @@
+ extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
+ 		int (*open)(struct inode *, struct file *));
+ 
++extern struct dentry *lookup_hash(struct nameidata *nd);
+ extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
+ 
+ extern int follow_down_one(struct path *);
+diff -uNr linux-3.2.0-gentoo-r1.orig//include/linux/splice.h linux-3.2.0-gentoo-r1/include/linux/splice.h
+--- linux-3.2.0-gentoo-r1.orig//include/linux/splice.h	2012-01-17 11:58:47.639129226 +0100
++++ linux-3.2.0-gentoo-r1/include/linux/splice.h	2012-01-17 12:09:34.658849350 +0100
+@@ -91,4 +91,10 @@
+ extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
+ 
+ extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
++
++extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
++			   loff_t *ppos, size_t len, unsigned int flags);
++extern long do_splice_to(struct file *in, loff_t *ppos,
++			 struct pipe_inode_info *pipe, size_t len,
++			 unsigned int flags);
+ #endif
+diff -uNr linux-3.2.0-gentoo-r1.orig//kernel/fork.c linux-3.2.0-gentoo-r1/kernel/fork.c
+--- linux-3.2.0-gentoo-r1.orig//kernel/fork.c	2012-01-17 11:56:03.646910513 +0100
++++ linux-3.2.0-gentoo-r1/kernel/fork.c	2012-01-17 12:09:43.978377785 +0100
+@@ -376,6 +376,8 @@
+ 			struct address_space *mapping = file->f_mapping;
+ 
+ 			get_file(file);
++			if (tmp->vm_prfile)
++				get_file(tmp->vm_prfile);
+ 			if (tmp->vm_flags & VM_DENYWRITE)
+ 				atomic_dec(&inode->i_writecount);
+ 			mutex_lock(&mapping->i_mmap_mutex);
+diff -uNr linux-3.2.0-gentoo-r1.orig//mm/memory.c linux-3.2.0-gentoo-r1/mm/memory.c
+--- linux-3.2.0-gentoo-r1.orig//mm/memory.c	2012-01-17 11:56:05.519612577 +0100
++++ linux-3.2.0-gentoo-r1/mm/memory.c	2012-01-17 12:09:44.008470650 +0100
+@@ -2622,6 +2622,8 @@
+ 		/* file_update_time outside page_lock */
+ 		if (vma->vm_file)
+ 			file_update_time(vma->vm_file);
++		if (vma->vm_prfile)
++			file_update_time(vma->vm_prfile);
+ 
+ 		return ret;
+ 	}
+@@ -3307,6 +3309,8 @@
+ 		/* file_update_time outside page_lock */
+ 		if (vma->vm_file)
+ 			file_update_time(vma->vm_file);
++		if (vma->vm_prfile)
++			file_update_time(vma->vm_prfile);
+ 	} else {
+ 		unlock_page(vmf.page);
+ 		if (anon)
+diff -uNr linux-3.2.0-gentoo-r1.orig//mm/mmap.c linux-3.2.0-gentoo-r1/mm/mmap.c
+--- linux-3.2.0-gentoo-r1.orig//mm/mmap.c	2012-01-17 11:56:05.480260369 +0100
++++ linux-3.2.0-gentoo-r1/mm/mmap.c	2012-01-17 12:09:44.010785486 +0100
+@@ -232,6 +232,8 @@
+ 		vma->vm_ops->close(vma);
+ 	if (vma->vm_file) {
+ 		fput(vma->vm_file);
++		if (vma->vm_prfile)
++			fput(vma->vm_prfile);
+ 		if (vma->vm_flags & VM_EXECUTABLE)
+ 			removed_exe_file_vma(vma->vm_mm);
+ 	}
+@@ -619,6 +621,8 @@
+ 	if (remove_next) {
+ 		if (file) {
+ 			fput(file);
++			if (vma->vm_prfile)
++				fput(vma->vm_prfile);
+ 			if (next->vm_flags & VM_EXECUTABLE)
+ 				removed_exe_file_vma(mm);
+ 		}
+@@ -1965,6 +1969,8 @@
+ 
+ 	if (new->vm_file) {
+ 		get_file(new->vm_file);
++		if (new->vm_prfile)
++			get_file(new->vm_prfile);
+ 		if (vma->vm_flags & VM_EXECUTABLE)
+ 			added_exe_file_vma(mm);
+ 	}
+@@ -1989,6 +1995,8 @@
+ 		if (vma->vm_flags & VM_EXECUTABLE)
+ 			removed_exe_file_vma(mm);
+ 		fput(new->vm_file);
++		if (new->vm_prfile)
++			fput(new->vm_prfile);
+ 	}
+ 	unlink_anon_vmas(new);
+  out_free_mpol:
+@@ -2356,6 +2364,8 @@
+ 			new_vma->vm_pgoff = pgoff;
+ 			if (new_vma->vm_file) {
+ 				get_file(new_vma->vm_file);
++				if (new_vma->vm_prfile)
++					get_file(new_vma->vm_prfile);
+ 				if (vma->vm_flags & VM_EXECUTABLE)
+ 					added_exe_file_vma(mm);
+ 			}
+diff -uNr linux-3.2.0-gentoo-r1.orig//mm/nommu.c linux-3.2.0-gentoo-r1/mm/nommu.c
+--- linux-3.2.0-gentoo-r1.orig//mm/nommu.c	2012-01-17 11:56:05.369148257 +0100
++++ linux-3.2.0-gentoo-r1/mm/nommu.c	2012-01-17 12:09:44.020044827 +0100
+@@ -633,6 +633,8 @@
+ 
+ 		if (region->vm_file)
+ 			fput(region->vm_file);
++		if (region->vm_prfile)
++			fput(region->vm_prfile);
+ 
+ 		/* IO memory and memory shared directly out of the pagecache
+ 		 * from ramfs/tmpfs mustn't be released here */
+@@ -789,6 +791,8 @@
+ 		vma->vm_ops->close(vma);
+ 	if (vma->vm_file) {
+ 		fput(vma->vm_file);
++		if (vma->vm_prfile)
++			fput(vma->vm_prfile);
+ 		if (vma->vm_flags & VM_EXECUTABLE)
+ 			removed_exe_file_vma(mm);
+ 	}
+@@ -1362,6 +1366,8 @@
+ 				}
+ 			}
+ 			fput(region->vm_file);
++			if (region->vm_prfile)
++				fput(region->vm_prfile);
+ 			kmem_cache_free(vm_region_jar, region);
+ 			region = pregion;
+ 			result = start;
+@@ -1438,9 +1444,13 @@
+ error:
+ 	if (region->vm_file)
+ 		fput(region->vm_file);
++	if (region->vm_prfile)
++		fput(region->vm_prfile);
+ 	kmem_cache_free(vm_region_jar, region);
+ 	if (vma->vm_file)
+ 		fput(vma->vm_file);
++	if (vma->vm_prfile)
++		fput(vma->vm_prfile);
+ 	if (vma->vm_flags & VM_EXECUTABLE)
+ 		removed_exe_file_vma(vma->vm_mm);
+ 	kmem_cache_free(vm_area_cachep, vma);
+diff -uNr linux-3.2.0-gentoo-r1.orig//security/commoncap.c linux-3.2.0-gentoo-r1/security/commoncap.c
+--- linux-3.2.0-gentoo-r1.orig//security/commoncap.c	2012-01-17 11:57:45.874683687 +0100
++++ linux-3.2.0-gentoo-r1/security/commoncap.c	2012-01-17 12:09:56.182191475 +0100
+@@ -975,3 +975,4 @@
+ 	}
+ 	return ret;
+ }
++EXPORT_SYMBOL(cap_file_mmap);
+diff -uNr linux-3.2.0-gentoo-r1.orig//security/device_cgroup.c linux-3.2.0-gentoo-r1/security/device_cgroup.c
+--- linux-3.2.0-gentoo-r1.orig//security/device_cgroup.c	2012-01-17 11:57:45.874683687 +0100
++++ linux-3.2.0-gentoo-r1/security/device_cgroup.c	2012-01-17 12:09:56.221543683 +0100
+@@ -7,6 +7,7 @@
+ #include <linux/device_cgroup.h>
+ #include <linux/cgroup.h>
+ #include <linux/ctype.h>
++#include <linux/export.h>
+ #include <linux/list.h>
+ #include <linux/uaccess.h>
+ #include <linux/seq_file.h>
+@@ -500,6 +501,7 @@
+ 
+ 	return -EPERM;
+ }
++EXPORT_SYMBOL(__devcgroup_inode_permission);
+ 
+ int devcgroup_inode_mknod(int mode, dev_t dev)
+ {
+diff -uNr linux-3.2.0-gentoo-r1.orig//security/security.c linux-3.2.0-gentoo-r1/security/security.c
+--- linux-3.2.0-gentoo-r1.orig//security/security.c	2012-01-17 11:57:45.592273735 +0100
++++ linux-3.2.0-gentoo-r1/security/security.c	2012-01-17 12:09:56.235432696 +0100
+@@ -411,6 +411,7 @@
+ 		return 0;
+ 	return security_ops->path_rmdir(dir, dentry);
+ }
++EXPORT_SYMBOL(security_path_rmdir);
+ 
+ int security_path_unlink(struct path *dir, struct dentry *dentry)
+ {
+@@ -427,6 +428,7 @@
+ 		return 0;
+ 	return security_ops->path_symlink(dir, dentry, old_name);
+ }
++EXPORT_SYMBOL(security_path_symlink);
+ 
+ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
+ 		       struct dentry *new_dentry)
+@@ -435,6 +437,7 @@
+ 		return 0;
+ 	return security_ops->path_link(old_dentry, new_dir, new_dentry);
+ }
++EXPORT_SYMBOL(security_path_link);
+ 
+ int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
+ 			 struct path *new_dir, struct dentry *new_dentry)
+@@ -453,6 +456,7 @@
+ 		return 0;
+ 	return security_ops->path_truncate(path);
+ }
++EXPORT_SYMBOL(security_path_truncate);
+ 
+ int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
+ 			mode_t mode)
+@@ -461,6 +465,7 @@
+ 		return 0;
+ 	return security_ops->path_chmod(dentry, mnt, mode);
+ }
++EXPORT_SYMBOL(security_path_chmod);
+ 
+ int security_path_chown(struct path *path, uid_t uid, gid_t gid)
+ {
+@@ -468,6 +473,7 @@
+ 		return 0;
+ 	return security_ops->path_chown(path, uid, gid);
+ }
++EXPORT_SYMBOL(security_path_chown);
+ 
+ int security_path_chroot(struct path *path)
+ {
+@@ -544,6 +550,7 @@
+ 		return 0;
+ 	return security_ops->inode_readlink(dentry);
+ }
++EXPORT_SYMBOL(security_inode_readlink);
+ 
+ int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd)
+ {
+@@ -558,6 +565,7 @@
+ 		return 0;
+ 	return security_ops->inode_permission(inode, mask);
+ }
++EXPORT_SYMBOL(security_inode_permission);
+ 
+ int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
+ {
+@@ -673,6 +681,7 @@
+ 
+ 	return fsnotify_perm(file, mask);
+ }
++EXPORT_SYMBOL(security_file_permission);
+ 
+ int security_file_alloc(struct file *file)
+ {
+@@ -700,6 +709,7 @@
+ 		return ret;
+ 	return ima_file_mmap(file, prot);
+ }
++EXPORT_SYMBOL(security_file_mmap);
+ 
+ int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
+ 			    unsigned long prot)
diff --git a/3.2.34/bump/1021_linux-3.2.22.patch b/3.2.34/bump/1021_linux-3.2.22.patch
new file mode 100644
index 0000000..e6ad93a
--- /dev/null
+++ b/3.2.34/bump/1021_linux-3.2.22.patch
@@ -0,0 +1,1245 @@
+diff --git a/Documentation/stable_kernel_rules.txt b/Documentation/stable_kernel_rules.txt
+index 21fd05c..e1f856b 100644
+--- a/Documentation/stable_kernel_rules.txt
++++ b/Documentation/stable_kernel_rules.txt
+@@ -12,6 +12,12 @@ Rules on what kind of patches are accepted, and which ones are not, into the
+    marked CONFIG_BROKEN), an oops, a hang, data corruption, a real
+    security issue, or some "oh, that's not good" issue.  In short, something
+    critical.
++ - Serious issues as reported by a user of a distribution kernel may also
++   be considered if they fix a notable performance or interactivity issue.
++   As these fixes are not as obvious and have a higher risk of a subtle
++   regression they should only be submitted by a distribution kernel
++   maintainer and include an addendum linking to a bugzilla entry if it
++   exists and additional information on the user-visible impact.
+  - New device IDs and quirks are also accepted.
+  - No "theoretical race condition" issues, unless an explanation of how the
+    race can be exploited is also provided.
+diff --git a/Makefile b/Makefile
+index 7eb465e..9a7d921 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 3
+ PATCHLEVEL = 2
+-SUBLEVEL = 21
++SUBLEVEL = 22
+ EXTRAVERSION =
+ NAME = Saber-toothed Squirrel
+ 
+diff --git a/arch/arm/plat-samsung/include/plat/map-s3c.h b/arch/arm/plat-samsung/include/plat/map-s3c.h
+index 7d04875..c0c70a8 100644
+--- a/arch/arm/plat-samsung/include/plat/map-s3c.h
++++ b/arch/arm/plat-samsung/include/plat/map-s3c.h
+@@ -22,7 +22,7 @@
+ #define S3C24XX_VA_WATCHDOG	S3C_VA_WATCHDOG
+ 
+ #define S3C2412_VA_SSMC		S3C_ADDR_CPU(0x00000000)
+-#define S3C2412_VA_EBI		S3C_ADDR_CPU(0x00010000)
++#define S3C2412_VA_EBI		S3C_ADDR_CPU(0x00100000)
+ 
+ #define S3C2410_PA_UART		(0x50000000)
+ #define S3C24XX_PA_UART		S3C2410_PA_UART
+diff --git a/arch/arm/plat-samsung/include/plat/watchdog-reset.h b/arch/arm/plat-samsung/include/plat/watchdog-reset.h
+index 40dbb2b..11b19ea 100644
+--- a/arch/arm/plat-samsung/include/plat/watchdog-reset.h
++++ b/arch/arm/plat-samsung/include/plat/watchdog-reset.h
+@@ -24,7 +24,7 @@ static inline void arch_wdt_reset(void)
+ 
+ 	__raw_writel(0, S3C2410_WTCON);	  /* disable watchdog, to be safe  */
+ 
+-	if (s3c2410_wdtclk)
++	if (!IS_ERR(s3c2410_wdtclk))
+ 		clk_enable(s3c2410_wdtclk);
+ 
+ 	/* put initial values into count and data */
+diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
+index f3444f7..0c3b775 100644
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -175,7 +175,7 @@
+ #define X86_FEATURE_XSAVEOPT	(7*32+ 4) /* Optimized Xsave */
+ #define X86_FEATURE_PLN		(7*32+ 5) /* Intel Power Limit Notification */
+ #define X86_FEATURE_PTS		(7*32+ 6) /* Intel Package Thermal Status */
+-#define X86_FEATURE_DTS		(7*32+ 7) /* Digital Thermal Sensor */
++#define X86_FEATURE_DTHERM	(7*32+ 7) /* Digital Thermal Sensor */
+ 
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW  (8*32+ 0) /* Intel TPR Shadow */
+diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
+index effff47..cb00ccc 100644
+--- a/arch/x86/include/asm/pgtable-3level.h
++++ b/arch/x86/include/asm/pgtable-3level.h
+@@ -31,6 +31,60 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
+ 	ptep->pte_low = pte.pte_low;
+ }
+ 
++#define pmd_read_atomic pmd_read_atomic
++/*
++ * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with
++ * a "*pmdp" dereference done by gcc. Problem is, in certain places
++ * where pte_offset_map_lock is called, concurrent page faults are
++ * allowed, if the mmap_sem is hold for reading. An example is mincore
++ * vs page faults vs MADV_DONTNEED. On the page fault side
++ * pmd_populate rightfully does a set_64bit, but if we're reading the
++ * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
++ * because gcc will not read the 64bit of the pmd atomically. To fix
++ * this all places running pmd_offset_map_lock() while holding the
++ * mmap_sem in read mode, shall read the pmdp pointer using this
++ * function to know if the pmd is null nor not, and in turn to know if
++ * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
++ * operations.
++ *
++ * Without THP if the mmap_sem is hold for reading, the pmd can only
++ * transition from null to not null while pmd_read_atomic runs. So
++ * we can always return atomic pmd values with this function.
++ *
++ * With THP if the mmap_sem is hold for reading, the pmd can become
++ * trans_huge or none or point to a pte (and in turn become "stable")
++ * at any time under pmd_read_atomic. We could read it really
++ * atomically here with a atomic64_read for the THP enabled case (and
++ * it would be a whole lot simpler), but to avoid using cmpxchg8b we
++ * only return an atomic pmdval if the low part of the pmdval is later
++ * found stable (i.e. pointing to a pte). And we're returning a none
++ * pmdval if the low part of the pmd is none. In some cases the high
++ * and low part of the pmdval returned may not be consistent if THP is
++ * enabled (the low part may point to previously mapped hugepage,
++ * while the high part may point to a more recently mapped hugepage),
++ * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part
++ * of the pmd to be read atomically to decide if the pmd is unstable
++ * or not, with the only exception of when the low part of the pmd is
++ * zero in which case we return a none pmd.
++ */
++static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
++{
++	pmdval_t ret;
++	u32 *tmp = (u32 *)pmdp;
++
++	ret = (pmdval_t) (*tmp);
++	if (ret) {
++		/*
++		 * If the low part is null, we must not read the high part
++		 * or we can end up with a partial pmd.
++		 */
++		smp_rmb();
++		ret |= ((pmdval_t)*(tmp + 1)) << 32;
++	}
++
++	return (pmd_t) { ret };
++}
++
+ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
+ {
+ 	set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
+diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
+index c7f64e6..ea6106c 100644
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -31,7 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
+ 	const struct cpuid_bit *cb;
+ 
+ 	static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
+-		{ X86_FEATURE_DTS,		CR_EAX, 0, 0x00000006, 0 },
++		{ X86_FEATURE_DTHERM,		CR_EAX, 0, 0x00000006, 0 },
+ 		{ X86_FEATURE_IDA,		CR_EAX, 1, 0x00000006, 0 },
+ 		{ X86_FEATURE_ARAT,		CR_EAX, 2, 0x00000006, 0 },
+ 		{ X86_FEATURE_PLN,		CR_EAX, 4, 0x00000006, 0 },
+diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
+index a43fa1a..1502c502 100644
+--- a/drivers/acpi/acpi_pad.c
++++ b/drivers/acpi/acpi_pad.c
+@@ -36,6 +36,7 @@
+ #define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator"
+ #define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80
+ static DEFINE_MUTEX(isolated_cpus_lock);
++static DEFINE_MUTEX(round_robin_lock);
+ 
+ static unsigned long power_saving_mwait_eax;
+ 
+@@ -107,7 +108,7 @@ static void round_robin_cpu(unsigned int tsk_index)
+ 	if (!alloc_cpumask_var(&tmp, GFP_KERNEL))
+ 		return;
+ 
+-	mutex_lock(&isolated_cpus_lock);
++	mutex_lock(&round_robin_lock);
+ 	cpumask_clear(tmp);
+ 	for_each_cpu(cpu, pad_busy_cpus)
+ 		cpumask_or(tmp, tmp, topology_thread_cpumask(cpu));
+@@ -116,7 +117,7 @@ static void round_robin_cpu(unsigned int tsk_index)
+ 	if (cpumask_empty(tmp))
+ 		cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus);
+ 	if (cpumask_empty(tmp)) {
+-		mutex_unlock(&isolated_cpus_lock);
++		mutex_unlock(&round_robin_lock);
+ 		return;
+ 	}
+ 	for_each_cpu(cpu, tmp) {
+@@ -131,7 +132,7 @@ static void round_robin_cpu(unsigned int tsk_index)
+ 	tsk_in_cpu[tsk_index] = preferred_cpu;
+ 	cpumask_set_cpu(preferred_cpu, pad_busy_cpus);
+ 	cpu_weight[preferred_cpu]++;
+-	mutex_unlock(&isolated_cpus_lock);
++	mutex_unlock(&round_robin_lock);
+ 
+ 	set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu));
+ }
+diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
+index c3d2dfc..b96544a 100644
+--- a/drivers/base/power/main.c
++++ b/drivers/base/power/main.c
+@@ -869,7 +869,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
+ 	dpm_wait_for_children(dev, async);
+ 
+ 	if (async_error)
+-		return 0;
++		goto Complete;
+ 
+ 	pm_runtime_get_noresume(dev);
+ 	if (pm_runtime_barrier(dev) && device_may_wakeup(dev))
+@@ -878,7 +878,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
+ 	if (pm_wakeup_pending()) {
+ 		pm_runtime_put_sync(dev);
+ 		async_error = -EBUSY;
+-		return 0;
++		goto Complete;
+ 	}
+ 
+ 	device_lock(dev);
+@@ -926,6 +926,8 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
+ 	}
+ 
+ 	device_unlock(dev);
++
++ Complete:
+ 	complete_all(&dev->power.completion);
+ 
+ 	if (error) {
+diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
+index 0477982..1b5675b 100644
+--- a/drivers/char/hw_random/atmel-rng.c
++++ b/drivers/char/hw_random/atmel-rng.c
+@@ -34,7 +34,7 @@ static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max,
+ 	u32 *data = buf;
+ 
+ 	/* data ready? */
+-	if (readl(trng->base + TRNG_ODATA) & 1) {
++	if (readl(trng->base + TRNG_ISR) & 1) {
+ 		*data = readl(trng->base + TRNG_ODATA);
+ 		/*
+ 		  ensure data ready is only set again AFTER the next data
+diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
+index 70ad892..b3ccefa 100644
+--- a/drivers/edac/i7core_edac.c
++++ b/drivers/edac/i7core_edac.c
+@@ -1932,12 +1932,6 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
+ 	if (mce->bank != 8)
+ 		return NOTIFY_DONE;
+ 
+-#ifdef CONFIG_SMP
+-	/* Only handle if it is the right mc controller */
+-	if (mce->socketid != pvt->i7core_dev->socket)
+-		return NOTIFY_DONE;
+-#endif
+-
+ 	smp_rmb();
+ 	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
+ 		smp_wmb();
+@@ -2234,8 +2228,6 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
+ 	if (pvt->enable_scrub)
+ 		disable_sdram_scrub_setting(mci);
+ 
+-	atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec);
+-
+ 	/* Disable EDAC polling */
+ 	i7core_pci_ctl_release(pvt);
+ 
+@@ -2336,8 +2328,6 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
+ 	/* DCLK for scrub rate setting */
+ 	pvt->dclk_freq = get_dclk_freq();
+ 
+-	atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec);
+-
+ 	return 0;
+ 
+ fail0:
+@@ -2481,8 +2471,10 @@ static int __init i7core_init(void)
+ 
+ 	pci_rc = pci_register_driver(&i7core_driver);
+ 
+-	if (pci_rc >= 0)
++	if (pci_rc >= 0) {
++		atomic_notifier_chain_register(&x86_mce_decoder_chain, &i7_mce_dec);
+ 		return 0;
++	}
+ 
+ 	i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
+ 		      pci_rc);
+@@ -2498,6 +2490,7 @@ static void __exit i7core_exit(void)
+ {
+ 	debugf2("MC: " __FILE__ ": %s()\n", __func__);
+ 	pci_unregister_driver(&i7core_driver);
++	atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &i7_mce_dec);
+ }
+ 
+ module_init(i7core_init);
+diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
+index 7a402bf..18a1293 100644
+--- a/drivers/edac/sb_edac.c
++++ b/drivers/edac/sb_edac.c
+@@ -1661,9 +1661,6 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
+ 	debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
+ 		__func__, mci, &sbridge_dev->pdev[0]->dev);
+ 
+-	atomic_notifier_chain_unregister(&x86_mce_decoder_chain,
+-					 &sbridge_mce_dec);
+-
+ 	/* Remove MC sysfs nodes */
+ 	edac_mc_del_mc(mci->dev);
+ 
+@@ -1731,8 +1728,6 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
+ 		goto fail0;
+ 	}
+ 
+-	atomic_notifier_chain_register(&x86_mce_decoder_chain,
+-				       &sbridge_mce_dec);
+ 	return 0;
+ 
+ fail0:
+@@ -1861,8 +1856,10 @@ static int __init sbridge_init(void)
+ 
+ 	pci_rc = pci_register_driver(&sbridge_driver);
+ 
+-	if (pci_rc >= 0)
++	if (pci_rc >= 0) {
++		atomic_notifier_chain_register(&x86_mce_decoder_chain, &sbridge_mce_dec);
+ 		return 0;
++	}
+ 
+ 	sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
+ 		      pci_rc);
+@@ -1878,6 +1875,7 @@ static void __exit sbridge_exit(void)
+ {
+ 	debugf2("MC: " __FILE__ ": %s()\n", __func__);
+ 	pci_unregister_driver(&sbridge_driver);
++	atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &sbridge_mce_dec);
+ }
+ 
+ module_init(sbridge_init);
+diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
+index 3e927ce..a1ee634 100644
+--- a/drivers/gpu/drm/drm_edid.c
++++ b/drivers/gpu/drm/drm_edid.c
+@@ -585,7 +585,7 @@ static bool
+ drm_monitor_supports_rb(struct edid *edid)
+ {
+ 	if (edid->revision >= 4) {
+-		bool ret;
++		bool ret = false;
+ 		drm_for_each_detailed_block((u8 *)edid, is_rb, &ret);
+ 		return ret;
+ 	}
+diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
+index 3e7c478..3e2edc6 100644
+--- a/drivers/gpu/drm/i915/i915_gem.c
++++ b/drivers/gpu/drm/i915/i915_gem.c
+@@ -3312,6 +3312,10 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
+ 
+ 			if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
+ 				ret = -EIO;
++		} else if (wait_for(i915_seqno_passed(ring->get_seqno(ring),
++						      seqno) ||
++				    atomic_read(&dev_priv->mm.wedged), 3000)) {
++			ret = -EBUSY;
+ 		}
+ 	}
+ 
+diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
+index d3820c2..578ddfc 100644
+--- a/drivers/gpu/drm/i915/i915_irq.c
++++ b/drivers/gpu/drm/i915/i915_irq.c
+@@ -424,6 +424,30 @@ static void gen6_pm_rps_work(struct work_struct *work)
+ 	mutex_unlock(&dev_priv->dev->struct_mutex);
+ }
+ 
++static void gen6_queue_rps_work(struct drm_i915_private *dev_priv,
++				u32 pm_iir)
++{
++	unsigned long flags;
++
++	/*
++	 * IIR bits should never already be set because IMR should
++	 * prevent an interrupt from being shown in IIR. The warning
++	 * displays a case where we've unsafely cleared
++	 * dev_priv->pm_iir. Although missing an interrupt of the same
++	 * type is not a problem, it displays a problem in the logic.
++	 *
++	 * The mask bit in IMR is cleared by rps_work.
++	 */
++
++	spin_lock_irqsave(&dev_priv->rps_lock, flags);
++	dev_priv->pm_iir |= pm_iir;
++	I915_WRITE(GEN6_PMIMR, dev_priv->pm_iir);
++	POSTING_READ(GEN6_PMIMR);
++	spin_unlock_irqrestore(&dev_priv->rps_lock, flags);
++
++	queue_work(dev_priv->wq, &dev_priv->rps_work);
++}
++
+ static void pch_irq_handler(struct drm_device *dev, u32 pch_iir)
+ {
+ 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+@@ -529,16 +553,8 @@ static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS)
+ 		pch_irq_handler(dev, pch_iir);
+ 	}
+ 
+-	if (pm_iir & GEN6_PM_DEFERRED_EVENTS) {
+-		unsigned long flags;
+-		spin_lock_irqsave(&dev_priv->rps_lock, flags);
+-		WARN(dev_priv->pm_iir & pm_iir, "Missed a PM interrupt\n");
+-		dev_priv->pm_iir |= pm_iir;
+-		I915_WRITE(GEN6_PMIMR, dev_priv->pm_iir);
+-		POSTING_READ(GEN6_PMIMR);
+-		spin_unlock_irqrestore(&dev_priv->rps_lock, flags);
+-		queue_work(dev_priv->wq, &dev_priv->rps_work);
+-	}
++	if (pm_iir & GEN6_PM_DEFERRED_EVENTS)
++		gen6_queue_rps_work(dev_priv, pm_iir);
+ 
+ 	/* should clear PCH hotplug event before clear CPU irq */
+ 	I915_WRITE(SDEIIR, pch_iir);
+@@ -634,25 +650,8 @@ static irqreturn_t ironlake_irq_handler(DRM_IRQ_ARGS)
+ 		i915_handle_rps_change(dev);
+ 	}
+ 
+-	if (IS_GEN6(dev) && pm_iir & GEN6_PM_DEFERRED_EVENTS) {
+-		/*
+-		 * IIR bits should never already be set because IMR should
+-		 * prevent an interrupt from being shown in IIR. The warning
+-		 * displays a case where we've unsafely cleared
+-		 * dev_priv->pm_iir. Although missing an interrupt of the same
+-		 * type is not a problem, it displays a problem in the logic.
+-		 *
+-		 * The mask bit in IMR is cleared by rps_work.
+-		 */
+-		unsigned long flags;
+-		spin_lock_irqsave(&dev_priv->rps_lock, flags);
+-		WARN(dev_priv->pm_iir & pm_iir, "Missed a PM interrupt\n");
+-		dev_priv->pm_iir |= pm_iir;
+-		I915_WRITE(GEN6_PMIMR, dev_priv->pm_iir);
+-		POSTING_READ(GEN6_PMIMR);
+-		spin_unlock_irqrestore(&dev_priv->rps_lock, flags);
+-		queue_work(dev_priv->wq, &dev_priv->rps_work);
+-	}
++	if (IS_GEN6(dev) && pm_iir & GEN6_PM_DEFERRED_EVENTS)
++		gen6_queue_rps_work(dev_priv, pm_iir);
+ 
+ 	/* should clear PCH hotplug event before clear CPU irq */
+ 	I915_WRITE(SDEIIR, pch_iir);
+diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
+index a1eb83d..f38d196 100644
+--- a/drivers/gpu/drm/i915/i915_suspend.c
++++ b/drivers/gpu/drm/i915/i915_suspend.c
+@@ -739,8 +739,11 @@ static void i915_restore_display(struct drm_device *dev)
+ 	if (HAS_PCH_SPLIT(dev)) {
+ 		I915_WRITE(BLC_PWM_PCH_CTL1, dev_priv->saveBLC_PWM_CTL);
+ 		I915_WRITE(BLC_PWM_PCH_CTL2, dev_priv->saveBLC_PWM_CTL2);
+-		I915_WRITE(BLC_PWM_CPU_CTL, dev_priv->saveBLC_CPU_PWM_CTL);
++		/* NOTE: BLC_PWM_CPU_CTL must be written after BLC_PWM_CPU_CTL2;
++		 * otherwise we get blank eDP screen after S3 on some machines
++		 */
+ 		I915_WRITE(BLC_PWM_CPU_CTL2, dev_priv->saveBLC_CPU_PWM_CTL2);
++		I915_WRITE(BLC_PWM_CPU_CTL, dev_priv->saveBLC_CPU_PWM_CTL);
+ 		I915_WRITE(PCH_PP_ON_DELAYS, dev_priv->savePP_ON_DELAYS);
+ 		I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->savePP_OFF_DELAYS);
+ 		I915_WRITE(PCH_PP_DIVISOR, dev_priv->savePP_DIVISOR);
+diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
+index 5c1cdb8..6aa7716 100644
+--- a/drivers/gpu/drm/i915/intel_display.c
++++ b/drivers/gpu/drm/i915/intel_display.c
+@@ -2187,6 +2187,33 @@ intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+ }
+ 
+ static int
++intel_finish_fb(struct drm_framebuffer *old_fb)
++{
++	struct drm_i915_gem_object *obj = to_intel_framebuffer(old_fb)->obj;
++	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
++	bool was_interruptible = dev_priv->mm.interruptible;
++	int ret;
++
++	wait_event(dev_priv->pending_flip_queue,
++		   atomic_read(&dev_priv->mm.wedged) ||
++		   atomic_read(&obj->pending_flip) == 0);
++
++	/* Big Hammer, we also need to ensure that any pending
++	 * MI_WAIT_FOR_EVENT inside a user batch buffer on the
++	 * current scanout is retired before unpinning the old
++	 * framebuffer.
++	 *
++	 * This should only fail upon a hung GPU, in which case we
++	 * can safely continue.
++	 */
++	dev_priv->mm.interruptible = false;
++	ret = i915_gem_object_finish_gpu(obj);
++	dev_priv->mm.interruptible = was_interruptible;
++
++	return ret;
++}
++
++static int
+ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
+ 		    struct drm_framebuffer *old_fb)
+ {
+@@ -2224,25 +2251,8 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
+ 		return ret;
+ 	}
+ 
+-	if (old_fb) {
+-		struct drm_i915_private *dev_priv = dev->dev_private;
+-		struct drm_i915_gem_object *obj = to_intel_framebuffer(old_fb)->obj;
+-
+-		wait_event(dev_priv->pending_flip_queue,
+-			   atomic_read(&dev_priv->mm.wedged) ||
+-			   atomic_read(&obj->pending_flip) == 0);
+-
+-		/* Big Hammer, we also need to ensure that any pending
+-		 * MI_WAIT_FOR_EVENT inside a user batch buffer on the
+-		 * current scanout is retired before unpinning the old
+-		 * framebuffer.
+-		 *
+-		 * This should only fail upon a hung GPU, in which case we
+-		 * can safely continue.
+-		 */
+-		ret = i915_gem_object_finish_gpu(obj);
+-		(void) ret;
+-	}
++	if (old_fb)
++		intel_finish_fb(old_fb);
+ 
+ 	ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y,
+ 					 LEAVE_ATOMIC_MODE_SET);
+@@ -3312,6 +3322,23 @@ static void intel_crtc_disable(struct drm_crtc *crtc)
+ 	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+ 	struct drm_device *dev = crtc->dev;
+ 
++	/* Flush any pending WAITs before we disable the pipe. Note that
++	 * we need to drop the struct_mutex in order to acquire it again
++	 * during the lowlevel dpms routines around a couple of the
++	 * operations. It does not look trivial nor desirable to move
++	 * that locking higher. So instead we leave a window for the
++	 * submission of further commands on the fb before we can actually
++	 * disable it. This race with userspace exists anyway, and we can
++	 * only rely on the pipe being disabled by userspace after it
++	 * receives the hotplug notification and has flushed any pending
++	 * batches.
++	 */
++	if (crtc->fb) {
++		mutex_lock(&dev->struct_mutex);
++		intel_finish_fb(crtc->fb);
++		mutex_unlock(&dev->struct_mutex);
++	}
++
+ 	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
+ 
+ 	if (crtc->fb) {
+diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
+index 933e66b..f6613dc 100644
+--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
++++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
+@@ -306,7 +306,7 @@ static int init_ring_common(struct intel_ring_buffer *ring)
+ 
+ 	I915_WRITE_CTL(ring,
+ 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
+-			| RING_REPORT_64K | RING_VALID);
++			| RING_VALID);
+ 
+ 	/* If the head is still not zero, the ring is dead */
+ 	if ((I915_READ_CTL(ring) & RING_VALID) == 0 ||
+@@ -1157,18 +1157,6 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
+ 	struct drm_device *dev = ring->dev;
+ 	struct drm_i915_private *dev_priv = dev->dev_private;
+ 	unsigned long end;
+-	u32 head;
+-
+-	/* If the reported head position has wrapped or hasn't advanced,
+-	 * fallback to the slow and accurate path.
+-	 */
+-	head = intel_read_status_page(ring, 4);
+-	if (head > ring->head) {
+-		ring->head = head;
+-		ring->space = ring_space(ring);
+-		if (ring->space >= n)
+-			return 0;
+-	}
+ 
+ 	trace_i915_ring_wait_begin(ring);
+ 	end = jiffies + 3 * HZ;
+diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+index 3a4cc32..cc0801d 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
++++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+@@ -499,7 +499,7 @@ int nouveau_fbcon_init(struct drm_device *dev)
+ 	nfbdev->helper.funcs = &nouveau_fbcon_helper_funcs;
+ 
+ 	ret = drm_fb_helper_init(dev, &nfbdev->helper,
+-				 nv_two_heads(dev) ? 2 : 1, 4);
++				 dev->mode_config.num_crtc, 4);
+ 	if (ret) {
+ 		kfree(nfbdev);
+ 		return ret;
+diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
+index 4c07436..d99aa84 100644
+--- a/drivers/hwmon/applesmc.c
++++ b/drivers/hwmon/applesmc.c
+@@ -215,7 +215,7 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len)
+ 	int i;
+ 
+ 	if (send_command(cmd) || send_argument(key)) {
+-		pr_warn("%s: read arg fail\n", key);
++		pr_warn("%.4s: read arg fail\n", key);
+ 		return -EIO;
+ 	}
+ 
+@@ -223,7 +223,7 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len)
+ 
+ 	for (i = 0; i < len; i++) {
+ 		if (__wait_status(0x05)) {
+-			pr_warn("%s: read data fail\n", key);
++			pr_warn("%.4s: read data fail\n", key);
+ 			return -EIO;
+ 		}
+ 		buffer[i] = inb(APPLESMC_DATA_PORT);
+diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
+index 427468f..0790c98 100644
+--- a/drivers/hwmon/coretemp.c
++++ b/drivers/hwmon/coretemp.c
+@@ -660,7 +660,7 @@ static void __cpuinit get_core_online(unsigned int cpu)
+ 	 * sensors. We check this bit only, all the early CPUs
+ 	 * without thermal sensors will be filtered out.
+ 	 */
+-	if (!cpu_has(c, X86_FEATURE_DTS))
++	if (!cpu_has(c, X86_FEATURE_DTHERM))
+ 		return;
+ 
+ 	if (!pdev) {
+diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
+index da2f021..532a902 100644
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -288,8 +288,10 @@ static void __cell_release(struct cell *cell, struct bio_list *inmates)
+ 
+ 	hlist_del(&cell->list);
+ 
+-	bio_list_add(inmates, cell->holder);
+-	bio_list_merge(inmates, &cell->bios);
++	if (inmates) {
++		bio_list_add(inmates, cell->holder);
++		bio_list_merge(inmates, &cell->bios);
++	}
+ 
+ 	mempool_free(cell, prison->cell_pool);
+ }
+@@ -312,9 +314,10 @@ static void cell_release(struct cell *cell, struct bio_list *bios)
+  */
+ static void __cell_release_singleton(struct cell *cell, struct bio *bio)
+ {
+-	hlist_del(&cell->list);
+ 	BUG_ON(cell->holder != bio);
+ 	BUG_ON(!bio_list_empty(&cell->bios));
++
++	__cell_release(cell, NULL);
+ }
+ 
+ static void cell_release_singleton(struct cell *cell, struct bio *bio)
+diff --git a/drivers/media/dvb/siano/smsusb.c b/drivers/media/dvb/siano/smsusb.c
+index b7d1e3e..fb68805 100644
+--- a/drivers/media/dvb/siano/smsusb.c
++++ b/drivers/media/dvb/siano/smsusb.c
+@@ -544,6 +544,8 @@ static const struct usb_device_id smsusb_id_table[] __devinitconst = {
+ 		.driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM },
+ 	{ USB_DEVICE(0x2040, 0xc0a0),
+ 		.driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM },
++	{ USB_DEVICE(0x2040, 0xf5a0),
++		.driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM },
+ 	{ } /* Terminating entry */
+ 	};
+ 
+diff --git a/drivers/media/video/gspca/gspca.c b/drivers/media/video/gspca/gspca.c
+index 2ca10df..981501f 100644
+--- a/drivers/media/video/gspca/gspca.c
++++ b/drivers/media/video/gspca/gspca.c
+@@ -1697,7 +1697,7 @@ static int vidioc_streamoff(struct file *file, void *priv,
+ 				enum v4l2_buf_type buf_type)
+ {
+ 	struct gspca_dev *gspca_dev = priv;
+-	int ret;
++	int i, ret;
+ 
+ 	if (buf_type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+ 		return -EINVAL;
+@@ -1728,6 +1728,8 @@ static int vidioc_streamoff(struct file *file, void *priv,
+ 	wake_up_interruptible(&gspca_dev->wq);
+ 
+ 	/* empty the transfer queues */
++	for (i = 0; i < gspca_dev->nframes; i++)
++		gspca_dev->frame[i].v4l2_buf.flags &= ~BUF_ALL_FLAGS;
+ 	atomic_set(&gspca_dev->fr_q, 0);
+ 	atomic_set(&gspca_dev->fr_i, 0);
+ 	gspca_dev->fr_o = 0;
+diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
+index 8dc84d6..86cd532 100644
+--- a/drivers/net/can/c_can/c_can.c
++++ b/drivers/net/can/c_can/c_can.c
+@@ -590,8 +590,8 @@ static void c_can_chip_config(struct net_device *dev)
+ 	priv->write_reg(priv, &priv->regs->control,
+ 			CONTROL_ENABLE_AR);
+ 
+-	if (priv->can.ctrlmode & (CAN_CTRLMODE_LISTENONLY &
+-					CAN_CTRLMODE_LOOPBACK)) {
++	if ((priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) &&
++	    (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK)) {
+ 		/* loopback + silent mode : useful for hot self-test */
+ 		priv->write_reg(priv, &priv->regs->control, CONTROL_EIE |
+ 				CONTROL_SIE | CONTROL_IE | CONTROL_TEST);
+diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
+index e023379..e59d006 100644
+--- a/drivers/net/can/flexcan.c
++++ b/drivers/net/can/flexcan.c
+@@ -933,12 +933,12 @@ static int __devinit flexcan_probe(struct platform_device *pdev)
+ 	u32 clock_freq = 0;
+ 
+ 	if (pdev->dev.of_node) {
+-		const u32 *clock_freq_p;
++		const __be32 *clock_freq_p;
+ 
+ 		clock_freq_p = of_get_property(pdev->dev.of_node,
+ 						"clock-frequency", NULL);
+ 		if (clock_freq_p)
+-			clock_freq = *clock_freq_p;
++			clock_freq = be32_to_cpup(clock_freq_p);
+ 	}
+ 
+ 	if (!clock_freq) {
+diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
+index a3e65fd..e556fc3 100644
+--- a/drivers/net/ethernet/intel/e1000e/82571.c
++++ b/drivers/net/ethernet/intel/e1000e/82571.c
+@@ -2080,8 +2080,9 @@ const struct e1000_info e1000_82574_info = {
+ 				  | FLAG_HAS_SMART_POWER_DOWN
+ 				  | FLAG_HAS_AMT
+ 				  | FLAG_HAS_CTRLEXT_ON_LOAD,
+-	.flags2			  = FLAG2_CHECK_PHY_HANG
++	.flags2			= FLAG2_CHECK_PHY_HANG
+ 				  | FLAG2_DISABLE_ASPM_L0S
++				  | FLAG2_DISABLE_ASPM_L1
+ 				  | FLAG2_NO_DISABLE_RX,
+ 	.pba			= 32,
+ 	.max_hw_frame_size	= DEFAULT_JUMBO,
+diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
+index 4e933d1..64d3f98 100644
+--- a/drivers/net/ethernet/intel/e1000e/netdev.c
++++ b/drivers/net/ethernet/intel/e1000e/netdev.c
+@@ -5132,14 +5132,6 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
+ 		return -EINVAL;
+ 	}
+ 
+-	/* 82573 Errata 17 */
+-	if (((adapter->hw.mac.type == e1000_82573) ||
+-	     (adapter->hw.mac.type == e1000_82574)) &&
+-	    (max_frame > ETH_FRAME_LEN + ETH_FCS_LEN)) {
+-		adapter->flags2 |= FLAG2_DISABLE_ASPM_L1;
+-		e1000e_disable_aspm(adapter->pdev, PCIE_LINK_STATE_L1);
+-	}
+-
+ 	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
+ 		usleep_range(1000, 2000);
+ 	/* e1000e_down -> e1000e_reset dependent on max_frame_size & mtu */
+diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
+index 8b0c2ca..6973620 100644
+--- a/drivers/net/wireless/ath/ath9k/hw.c
++++ b/drivers/net/wireless/ath/ath9k/hw.c
+@@ -718,13 +718,25 @@ static void ath9k_hw_init_qos(struct ath_hw *ah)
+ 
+ u32 ar9003_get_pll_sqsum_dvc(struct ath_hw *ah)
+ {
++	struct ath_common *common = ath9k_hw_common(ah);
++	int i = 0;
++
+ 	REG_CLR_BIT(ah, PLL3, PLL3_DO_MEAS_MASK);
+ 	udelay(100);
+ 	REG_SET_BIT(ah, PLL3, PLL3_DO_MEAS_MASK);
+ 
+-	while ((REG_READ(ah, PLL4) & PLL4_MEAS_DONE) == 0)
++	while ((REG_READ(ah, PLL4) & PLL4_MEAS_DONE) == 0) {
++
+ 		udelay(100);
+ 
++		if (WARN_ON_ONCE(i >= 100)) {
++			ath_err(common, "PLL4 meaurement not done\n");
++			break;
++		}
++
++		i++;
++	}
++
+ 	return (REG_READ(ah, PLL3) & SQSUM_DVC_MASK) >> 3;
+ }
+ EXPORT_SYMBOL(ar9003_get_pll_sqsum_dvc);
+diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
+index f76a814..95437fc 100644
+--- a/drivers/net/wireless/ath/ath9k/main.c
++++ b/drivers/net/wireless/ath/ath9k/main.c
+@@ -1042,6 +1042,15 @@ void ath_hw_pll_work(struct work_struct *work)
+ 					    hw_pll_work.work);
+ 	u32 pll_sqsum;
+ 
++	/*
++	 * ensure that the PLL WAR is executed only
++	 * after the STA is associated (or) if the
++	 * beaconing had started in interfaces that
++	 * uses beacons.
++	 */
++	if (!(sc->sc_flags & SC_OP_BEACONS))
++		return;
++
+ 	if (AR_SREV_9485(sc->sc_ah)) {
+ 
+ 		ath9k_ps_wakeup(sc);
+@@ -1486,15 +1495,6 @@ static int ath9k_add_interface(struct ieee80211_hw *hw,
+ 		}
+ 	}
+ 
+-	if ((ah->opmode == NL80211_IFTYPE_ADHOC) ||
+-	    ((vif->type == NL80211_IFTYPE_ADHOC) &&
+-	     sc->nvifs > 0)) {
+-		ath_err(common, "Cannot create ADHOC interface when other"
+-			" interfaces already exist.\n");
+-		ret = -EINVAL;
+-		goto out;
+-	}
+-
+ 	ath_dbg(common, ATH_DBG_CONFIG,
+ 		"Attach a VIF of type: %d\n", vif->type);
+ 
+diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
+index 76fd277..c59c592 100644
+--- a/drivers/net/wireless/ath/ath9k/xmit.c
++++ b/drivers/net/wireless/ath/ath9k/xmit.c
+@@ -936,13 +936,13 @@ static void ath_buf_set_rate(struct ath_softc *sc, struct ath_buf *bf,
+ 		}
+ 
+ 		/* legacy rates */
++		rate = &sc->sbands[tx_info->band].bitrates[rates[i].idx];
+ 		if ((tx_info->band == IEEE80211_BAND_2GHZ) &&
+ 		    !(rate->flags & IEEE80211_RATE_ERP_G))
+ 			phy = WLAN_RC_PHY_CCK;
+ 		else
+ 			phy = WLAN_RC_PHY_OFDM;
+ 
+-		rate = &sc->sbands[tx_info->band].bitrates[rates[i].idx];
+ 		info->rates[i].Rate = rate->hw_value;
+ 		if (rate->hw_value_short) {
+ 			if (rates[i].flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)
+diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c b/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
+index 5815cf5..4661a64 100644
+--- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
++++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
+@@ -1777,6 +1777,7 @@ static ssize_t iwl_dbgfs_rx_queue_read(struct file *file,
+ 	return simple_read_from_buffer(user_buf, count, ppos, buf, pos);
+ }
+ 
++#ifdef CONFIG_IWLWIFI_DEBUG
+ static ssize_t iwl_dbgfs_log_event_read(struct file *file,
+ 					 char __user *user_buf,
+ 					 size_t count, loff_t *ppos)
+@@ -1814,6 +1815,7 @@ static ssize_t iwl_dbgfs_log_event_write(struct file *file,
+ 
+ 	return count;
+ }
++#endif
+ 
+ static ssize_t iwl_dbgfs_interrupt_read(struct file *file,
+ 					char __user *user_buf,
+@@ -1941,7 +1943,9 @@ static ssize_t iwl_dbgfs_fh_reg_read(struct file *file,
+ 	return ret;
+ }
+ 
++#ifdef CONFIG_IWLWIFI_DEBUG
+ DEBUGFS_READ_WRITE_FILE_OPS(log_event);
++#endif
+ DEBUGFS_READ_WRITE_FILE_OPS(interrupt);
+ DEBUGFS_READ_FILE_OPS(fh_reg);
+ DEBUGFS_READ_FILE_OPS(rx_queue);
+@@ -1957,7 +1961,9 @@ static int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans,
+ {
+ 	DEBUGFS_ADD_FILE(rx_queue, dir, S_IRUSR);
+ 	DEBUGFS_ADD_FILE(tx_queue, dir, S_IRUSR);
++#ifdef CONFIG_IWLWIFI_DEBUG
+ 	DEBUGFS_ADD_FILE(log_event, dir, S_IWUSR | S_IRUSR);
++#endif
+ 	DEBUGFS_ADD_FILE(interrupt, dir, S_IWUSR | S_IRUSR);
+ 	DEBUGFS_ADD_FILE(csr, dir, S_IWUSR);
+ 	DEBUGFS_ADD_FILE(fh_reg, dir, S_IRUSR);
+diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
+index 226faab..fc35308 100644
+--- a/drivers/net/xen-netfront.c
++++ b/drivers/net/xen-netfront.c
+@@ -1922,14 +1922,14 @@ static int __devexit xennet_remove(struct xenbus_device *dev)
+ 
+ 	dev_dbg(&dev->dev, "%s\n", dev->nodename);
+ 
+-	unregister_netdev(info->netdev);
+-
+ 	xennet_disconnect_backend(info);
+ 
+-	del_timer_sync(&info->rx_refill_timer);
+-
+ 	xennet_sysfs_delif(info->netdev);
+ 
++	unregister_netdev(info->netdev);
++
++	del_timer_sync(&info->rx_refill_timer);
++
+ 	free_percpu(info->stats);
+ 
+ 	free_netdev(info->netdev);
+diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c
+index da14432..efc4b7f 100644
+--- a/drivers/oprofile/oprofile_perf.c
++++ b/drivers/oprofile/oprofile_perf.c
+@@ -25,7 +25,7 @@ static int oprofile_perf_enabled;
+ static DEFINE_MUTEX(oprofile_perf_mutex);
+ 
+ static struct op_counter_config *counter_config;
+-static struct perf_event **perf_events[nr_cpumask_bits];
++static struct perf_event **perf_events[NR_CPUS];
+ static int num_counters;
+ 
+ /*
+diff --git a/drivers/staging/iio/adc/ad7606_core.c b/drivers/staging/iio/adc/ad7606_core.c
+index 54423ab..2ee187f 100644
+--- a/drivers/staging/iio/adc/ad7606_core.c
++++ b/drivers/staging/iio/adc/ad7606_core.c
+@@ -241,6 +241,7 @@ static const struct attribute_group ad7606_attribute_group = {
+ 		.indexed = 1,				\
+ 		.channel = num,				\
+ 		.address = num,				\
++		.info_mask = (1 << IIO_CHAN_INFO_SCALE_SHARED), \
+ 		.scan_index = num,			\
+ 		.scan_type = IIO_ST('s', 16, 16, 0),	\
+ 	}
+diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c
+index ec41d38..f4b738f 100644
+--- a/drivers/staging/rtl8712/usb_intf.c
++++ b/drivers/staging/rtl8712/usb_intf.c
+@@ -102,6 +102,8 @@ static struct usb_device_id rtl871x_usb_id_tbl[] = {
+ 	/* - */
+ 	{USB_DEVICE(0x20F4, 0x646B)},
+ 	{USB_DEVICE(0x083A, 0xC512)},
++	{USB_DEVICE(0x25D4, 0x4CA1)},
++	{USB_DEVICE(0x25D4, 0x4CAB)},
+ 
+ /* RTL8191SU */
+ 	/* Realtek */
+diff --git a/drivers/staging/rts_pstor/rtsx_transport.c b/drivers/staging/rts_pstor/rtsx_transport.c
+index 4e3d2c1..9b2e5c9 100644
+--- a/drivers/staging/rts_pstor/rtsx_transport.c
++++ b/drivers/staging/rts_pstor/rtsx_transport.c
+@@ -335,6 +335,7 @@ static int rtsx_transfer_sglist_adma_partial(struct rtsx_chip *chip, u8 card,
+ 	int sg_cnt, i, resid;
+ 	int err = 0;
+ 	long timeleft;
++	struct scatterlist *sg_ptr;
+ 	u32 val = TRIG_DMA;
+ 
+ 	if ((sg == NULL) || (num_sg <= 0) || !offset || !index)
+@@ -371,7 +372,7 @@ static int rtsx_transfer_sglist_adma_partial(struct rtsx_chip *chip, u8 card,
+ 	sg_cnt = dma_map_sg(&(rtsx->pci->dev), sg, num_sg, dma_dir);
+ 
+ 	resid = size;
+-
++	sg_ptr = sg;
+ 	chip->sgi = 0;
+ 	/* Usually the next entry will be @sg@ + 1, but if this sg element
+ 	 * is part of a chained scatterlist, it could jump to the start of
+@@ -379,14 +380,14 @@ static int rtsx_transfer_sglist_adma_partial(struct rtsx_chip *chip, u8 card,
+ 	 * the proper sg
+ 	 */
+ 	for (i = 0; i < *index; i++)
+-		sg = sg_next(sg);
++		sg_ptr = sg_next(sg_ptr);
+ 	for (i = *index; i < sg_cnt; i++) {
+ 		dma_addr_t addr;
+ 		unsigned int len;
+ 		u8 option;
+ 
+-		addr = sg_dma_address(sg);
+-		len = sg_dma_len(sg);
++		addr = sg_dma_address(sg_ptr);
++		len = sg_dma_len(sg_ptr);
+ 
+ 		RTSX_DEBUGP("DMA addr: 0x%x, Len: 0x%x\n",
+ 			     (unsigned int)addr, len);
+@@ -415,7 +416,7 @@ static int rtsx_transfer_sglist_adma_partial(struct rtsx_chip *chip, u8 card,
+ 		if (!resid)
+ 			break;
+ 
+-		sg = sg_next(sg);
++		sg_ptr = sg_next(sg_ptr);
+ 	}
+ 
+ 	RTSX_DEBUGP("SG table count = %d\n", chip->sgi);
+diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
+index aa0c43f..35e6b5f 100644
+--- a/drivers/usb/serial/cp210x.c
++++ b/drivers/usb/serial/cp210x.c
+@@ -93,6 +93,7 @@ static const struct usb_device_id id_table[] = {
+ 	{ USB_DEVICE(0x10C4, 0x814B) }, /* West Mountain Radio RIGtalk */
+ 	{ USB_DEVICE(0x10C4, 0x8156) }, /* B&G H3000 link cable */
+ 	{ USB_DEVICE(0x10C4, 0x815E) }, /* Helicomm IP-Link 1220-DVM */
++	{ USB_DEVICE(0x10C4, 0x815F) }, /* Timewave HamLinkUSB */
+ 	{ USB_DEVICE(0x10C4, 0x818B) }, /* AVIT Research USB to TTL */
+ 	{ USB_DEVICE(0x10C4, 0x819F) }, /* MJS USB Toslink Switcher */
+ 	{ USB_DEVICE(0x10C4, 0x81A6) }, /* ThinkOptics WavIt */
+@@ -134,7 +135,13 @@ static const struct usb_device_id id_table[] = {
+ 	{ USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */
+ 	{ USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */
+ 	{ USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */
++	{ USB_DEVICE(0x166A, 0x0201) }, /* Clipsal 5500PACA C-Bus Pascal Automation Controller */
++	{ USB_DEVICE(0x166A, 0x0301) }, /* Clipsal 5800PC C-Bus Wireless PC Interface */
+ 	{ USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */
++	{ USB_DEVICE(0x166A, 0x0304) }, /* Clipsal 5000CT2 C-Bus Black and White Touchscreen */
++	{ USB_DEVICE(0x166A, 0x0305) }, /* Clipsal C-5000CT2 C-Bus Spectrum Colour Touchscreen */
++	{ USB_DEVICE(0x166A, 0x0401) }, /* Clipsal L51xx C-Bus Architectural Dimmer */
++	{ USB_DEVICE(0x166A, 0x0101) }, /* Clipsal 5560884 C-Bus Multi-room Audio Matrix Switcher */
+ 	{ USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */
+ 	{ USB_DEVICE(0x16DC, 0x0010) }, /* W-IE-NE-R Plein & Baus GmbH PL512 Power Supply */
+ 	{ USB_DEVICE(0x16DC, 0x0011) }, /* W-IE-NE-R Plein & Baus GmbH RCM Remote Control for MARATON Power Supply */
+@@ -146,7 +153,11 @@ static const struct usb_device_id id_table[] = {
+ 	{ USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
+ 	{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
+ 	{ USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */
++	{ USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */
++	{ USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */
+ 	{ USB_DEVICE(0x3195, 0xF190) }, /* Link Instruments MSO-19 */
++	{ USB_DEVICE(0x3195, 0xF280) }, /* Link Instruments MSO-28 */
++	{ USB_DEVICE(0x3195, 0xF281) }, /* Link Instruments MSO-28 */
+ 	{ USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */
+ 	{ } /* Terminating Entry */
+ };
+diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
+index 61d6c31..21a4734 100644
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -235,6 +235,7 @@ static void option_instat_callback(struct urb *urb);
+ #define NOVATELWIRELESS_PRODUCT_G1		0xA001
+ #define NOVATELWIRELESS_PRODUCT_G1_M		0xA002
+ #define NOVATELWIRELESS_PRODUCT_G2		0xA010
++#define NOVATELWIRELESS_PRODUCT_MC551		0xB001
+ 
+ /* AMOI PRODUCTS */
+ #define AMOI_VENDOR_ID				0x1614
+@@ -496,6 +497,10 @@ static void option_instat_callback(struct urb *urb);
+ /* MediaTek products */
+ #define MEDIATEK_VENDOR_ID			0x0e8d
+ 
++/* Cellient products */
++#define CELLIENT_VENDOR_ID			0x2692
++#define CELLIENT_PRODUCT_MEN200			0x9005
++
+ /* some devices interfaces need special handling due to a number of reasons */
+ enum option_blacklist_reason {
+ 		OPTION_BLACKLIST_NONE = 0,
+@@ -730,6 +735,8 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G1) },
+ 	{ USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G1_M) },
+ 	{ USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G2) },
++	/* Novatel Ovation MC551 a.k.a. Verizon USB551L */
++	{ USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) },
+ 
+ 	{ USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) },
+ 	{ USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) },
+@@ -1227,6 +1234,7 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a1, 0xff, 0x02, 0x01) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a2, 0xff, 0x00, 0x00) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a2, 0xff, 0x02, 0x01) },        /* MediaTek MT6276M modem & app port */
++	{ USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) },
+ 	{ } /* Terminating entry */
+ };
+ MODULE_DEVICE_TABLE(usb, option_ids);
+diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
+index 08a07a2..57ceaf3 100644
+--- a/fs/nilfs2/gcinode.c
++++ b/fs/nilfs2/gcinode.c
+@@ -191,6 +191,8 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)
+ 	while (!list_empty(head)) {
+ 		ii = list_first_entry(head, struct nilfs_inode_info, i_dirty);
+ 		list_del_init(&ii->i_dirty);
++		truncate_inode_pages(&ii->vfs_inode.i_data, 0);
++		nilfs_btnode_cache_clear(&ii->i_btnode_cache);
+ 		iput(&ii->vfs_inode);
+ 	}
+ }
+diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
+index bb24ab6..6f24e67 100644
+--- a/fs/nilfs2/segment.c
++++ b/fs/nilfs2/segment.c
+@@ -2309,6 +2309,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
+ 		if (!test_bit(NILFS_I_UPDATED, &ii->i_state))
+ 			continue;
+ 		list_del_init(&ii->i_dirty);
++		truncate_inode_pages(&ii->vfs_inode.i_data, 0);
++		nilfs_btnode_cache_clear(&ii->i_btnode_cache);
+ 		iput(&ii->vfs_inode);
+ 	}
+ }
+diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
+index a03c098..bc00876 100644
+--- a/include/asm-generic/pgtable.h
++++ b/include/asm-generic/pgtable.h
+@@ -445,6 +445,18 @@ static inline int pmd_write(pmd_t pmd)
+ #endif /* __HAVE_ARCH_PMD_WRITE */
+ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+ 
++#ifndef pmd_read_atomic
++static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
++{
++	/*
++	 * Depend on compiler for an atomic pmd read. NOTE: this is
++	 * only going to work, if the pmdval_t isn't larger than
++	 * an unsigned long.
++	 */
++	return *pmdp;
++}
++#endif
++
+ /*
+  * This function is meant to be used by sites walking pagetables with
+  * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
+@@ -458,14 +470,30 @@ static inline int pmd_write(pmd_t pmd)
+  * undefined so behaving like if the pmd was none is safe (because it
+  * can return none anyway). The compiler level barrier() is critically
+  * important to compute the two checks atomically on the same pmdval.
++ *
++ * For 32bit kernels with a 64bit large pmd_t this automatically takes
++ * care of reading the pmd atomically to avoid SMP race conditions
++ * against pmd_populate() when the mmap_sem is hold for reading by the
++ * caller (a special atomic read not done by "gcc" as in the generic
++ * version above, is also needed when THP is disabled because the page
++ * fault can populate the pmd from under us).
+  */
+ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
+ {
+-	/* depend on compiler for an atomic pmd read */
+-	pmd_t pmdval = *pmd;
++	pmd_t pmdval = pmd_read_atomic(pmd);
+ 	/*
+ 	 * The barrier will stabilize the pmdval in a register or on
+ 	 * the stack so that it will stop changing under the code.
++	 *
++	 * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
++	 * pmd_read_atomic is allowed to return a not atomic pmdval
++	 * (for example pointing to an hugepage that has never been
++	 * mapped in the pmd). The below checks will only care about
++	 * the low part of the pmd with 32bit PAE x86 anyway, with the
++	 * exception of pmd_none(). So the important thing is that if
++	 * the low part of the pmd is found null, the high part will
++	 * be also null or the pmd_none() check below would be
++	 * confused.
+ 	 */
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ 	barrier();
+diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
+index f961cc5..da587ad 100644
+--- a/net/batman-adv/routing.c
++++ b/net/batman-adv/routing.c
+@@ -619,6 +619,8 @@ int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if)
+ 		/* packet needs to be linearized to access the TT changes */
+ 		if (skb_linearize(skb) < 0)
+ 			goto out;
++		/* skb_linearize() possibly changed skb->data */
++		tt_query = (struct tt_query_packet *)skb->data;
+ 
+ 		if (is_my_mac(tt_query->dst))
+ 			handle_tt_response(bat_priv, tt_query);
+diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
+index 5f09a57..088af45 100644
+--- a/net/batman-adv/translation-table.c
++++ b/net/batman-adv/translation-table.c
+@@ -1816,10 +1816,10 @@ bool is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src, uint8_t *dst)
+ {
+ 	struct tt_local_entry *tt_local_entry = NULL;
+ 	struct tt_global_entry *tt_global_entry = NULL;
+-	bool ret = true;
++	bool ret = false;
+ 
+ 	if (!atomic_read(&bat_priv->ap_isolation))
+-		return false;
++		goto out;
+ 
+ 	tt_local_entry = tt_local_hash_find(bat_priv, dst);
+ 	if (!tt_local_entry)
+@@ -1829,10 +1829,10 @@ bool is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src, uint8_t *dst)
+ 	if (!tt_global_entry)
+ 		goto out;
+ 
+-	if (_is_ap_isolated(tt_local_entry, tt_global_entry))
++	if (!_is_ap_isolated(tt_local_entry, tt_global_entry))
+ 		goto out;
+ 
+-	ret = false;
++	ret = true;
+ 
+ out:
+ 	if (tt_global_entry)
+diff --git a/net/wireless/reg.c b/net/wireless/reg.c
+index c1c99dd..d57d05b 100644
+--- a/net/wireless/reg.c
++++ b/net/wireless/reg.c
+@@ -1369,7 +1369,7 @@ static void reg_set_request_processed(void)
+ 	spin_unlock(&reg_requests_lock);
+ 
+ 	if (last_request->initiator == NL80211_REGDOM_SET_BY_USER)
+-		cancel_delayed_work_sync(&reg_timeout);
++		cancel_delayed_work(&reg_timeout);
+ 
+ 	if (need_more_processing)
+ 		schedule_work(&reg_work);
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 0005bde..5f096a5 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -5988,6 +5988,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
+ 	{ .id = 0x10ec0272, .name = "ALC272", .patch = patch_alc662 },
+ 	{ .id = 0x10ec0275, .name = "ALC275", .patch = patch_alc269 },
+ 	{ .id = 0x10ec0276, .name = "ALC276", .patch = patch_alc269 },
++	{ .id = 0x10ec0280, .name = "ALC280", .patch = patch_alc269 },
+ 	{ .id = 0x10ec0861, .rev = 0x100340, .name = "ALC660",
+ 	  .patch = patch_alc861 },
+ 	{ .id = 0x10ec0660, .name = "ALC660-VD", .patch = patch_alc861vd },
+diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
+index 11224ed..323d4d9 100644
+--- a/tools/hv/hv_kvp_daemon.c
++++ b/tools/hv/hv_kvp_daemon.c
+@@ -384,14 +384,18 @@ int main(void)
+ 	pfd.fd = fd;
+ 
+ 	while (1) {
++		struct sockaddr *addr_p = (struct sockaddr *) &addr;
++		socklen_t addr_l = sizeof(addr);
+ 		pfd.events = POLLIN;
+ 		pfd.revents = 0;
+ 		poll(&pfd, 1, -1);
+ 
+-		len = recv(fd, kvp_recv_buffer, sizeof(kvp_recv_buffer), 0);
++		len = recvfrom(fd, kvp_recv_buffer, sizeof(kvp_recv_buffer), 0,
++				addr_p, &addr_l);
+ 
+-		if (len < 0) {
+-			syslog(LOG_ERR, "recv failed; error:%d", len);
++		if (len < 0 || addr.nl_pid) {
++			syslog(LOG_ERR, "recvfrom failed; pid:%u error:%d %s",
++					addr.nl_pid, errno, strerror(errno));
+ 			close(fd);
+ 			return -1;
+ 		}
diff --git a/3.2.34/bump/1022_linux-3.2.23.patch b/3.2.34/bump/1022_linux-3.2.23.patch
new file mode 100644
index 0000000..3d796d0
--- /dev/null
+++ b/3.2.34/bump/1022_linux-3.2.23.patch
@@ -0,0 +1,1862 @@
+diff --git a/Makefile b/Makefile
+index 9a7d921..40d1e3b 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 3
+ PATCHLEVEL = 2
+-SUBLEVEL = 22
++SUBLEVEL = 23
+ EXTRAVERSION =
+ NAME = Saber-toothed Squirrel
+ 
+diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
+index e10e59a..1d1710e 100644
+--- a/arch/arm/kernel/smp.c
++++ b/arch/arm/kernel/smp.c
+@@ -471,9 +471,7 @@ static DEFINE_PER_CPU(struct clock_event_device, percpu_clockevent);
+ static void ipi_timer(void)
+ {
+ 	struct clock_event_device *evt = &__get_cpu_var(percpu_clockevent);
+-	irq_enter();
+ 	evt->event_handler(evt);
+-	irq_exit();
+ }
+ 
+ #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+@@ -572,7 +570,9 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
+ 
+ 	switch (ipinr) {
+ 	case IPI_TIMER:
++		irq_enter();
+ 		ipi_timer();
++		irq_exit();
+ 		break;
+ 
+ 	case IPI_RESCHEDULE:
+@@ -580,15 +580,21 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
+ 		break;
+ 
+ 	case IPI_CALL_FUNC:
++		irq_enter();
+ 		generic_smp_call_function_interrupt();
++		irq_exit();
+ 		break;
+ 
+ 	case IPI_CALL_FUNC_SINGLE:
++		irq_enter();
+ 		generic_smp_call_function_single_interrupt();
++		irq_exit();
+ 		break;
+ 
+ 	case IPI_CPU_STOP:
++		irq_enter();
+ 		ipi_cpu_stop(cpu);
++		irq_exit();
+ 		break;
+ 
+ 	default:
+diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+index 44d8829..5e8dc08 100644
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -763,7 +763,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ 	lwz	r3,VCORE_NAPPING_THREADS(r5)
+ 	lwz	r4,VCPU_PTID(r9)
+ 	li	r0,1
+-	sldi	r0,r0,r4
++	sld	r0,r0,r4
+ 	andc.	r3,r3,r0		/* no sense IPI'ing ourselves */
+ 	beq	43f
+ 	mulli	r4,r4,PACA_SIZE		/* get paca for thread 0 */
+diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
+index 03a217a..b7e63d8 100644
+--- a/arch/powerpc/xmon/xmon.c
++++ b/arch/powerpc/xmon/xmon.c
+@@ -975,7 +975,7 @@ static int cpu_cmd(void)
+ 		/* print cpus waiting or in xmon */
+ 		printf("cpus stopped:");
+ 		count = 0;
+-		for (cpu = 0; cpu < NR_CPUS; ++cpu) {
++		for_each_possible_cpu(cpu) {
+ 			if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
+ 				if (count == 0)
+ 					printf(" %x", cpu);
+diff --git a/drivers/block/umem.c b/drivers/block/umem.c
+index aa27120..9a72277 100644
+--- a/drivers/block/umem.c
++++ b/drivers/block/umem.c
+@@ -513,6 +513,44 @@ static void process_page(unsigned long data)
+ 	}
+ }
+ 
++struct mm_plug_cb {
++	struct blk_plug_cb cb;
++	struct cardinfo *card;
++};
++
++static void mm_unplug(struct blk_plug_cb *cb)
++{
++	struct mm_plug_cb *mmcb = container_of(cb, struct mm_plug_cb, cb);
++
++	spin_lock_irq(&mmcb->card->lock);
++	activate(mmcb->card);
++	spin_unlock_irq(&mmcb->card->lock);
++	kfree(mmcb);
++}
++
++static int mm_check_plugged(struct cardinfo *card)
++{
++	struct blk_plug *plug = current->plug;
++	struct mm_plug_cb *mmcb;
++
++	if (!plug)
++		return 0;
++
++	list_for_each_entry(mmcb, &plug->cb_list, cb.list) {
++		if (mmcb->cb.callback == mm_unplug && mmcb->card == card)
++			return 1;
++	}
++	/* Not currently on the callback list */
++	mmcb = kmalloc(sizeof(*mmcb), GFP_ATOMIC);
++	if (!mmcb)
++		return 0;
++
++	mmcb->card = card;
++	mmcb->cb.callback = mm_unplug;
++	list_add(&mmcb->cb.list, &plug->cb_list);
++	return 1;
++}
++
+ static void mm_make_request(struct request_queue *q, struct bio *bio)
+ {
+ 	struct cardinfo *card = q->queuedata;
+@@ -523,6 +561,8 @@ static void mm_make_request(struct request_queue *q, struct bio *bio)
+ 	*card->biotail = bio;
+ 	bio->bi_next = NULL;
+ 	card->biotail = &bio->bi_next;
++	if (bio->bi_rw & REQ_SYNC || !mm_check_plugged(card))
++		activate(card);
+ 	spin_unlock_irq(&card->lock);
+ 
+ 	return;
+diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
+index c4da951..ca67338 100644
+--- a/drivers/gpu/drm/i915/i915_dma.c
++++ b/drivers/gpu/drm/i915/i915_dma.c
+@@ -1890,6 +1890,27 @@ ips_ping_for_i915_load(void)
+ 	}
+ }
+ 
++static void i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv)
++{
++	struct apertures_struct *ap;
++	struct pci_dev *pdev = dev_priv->dev->pdev;
++	bool primary;
++
++	ap = alloc_apertures(1);
++	if (!ap)
++		return;
++
++	ap->ranges[0].base = dev_priv->dev->agp->base;
++	ap->ranges[0].size =
++		dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
++	primary =
++		pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
++
++	remove_conflicting_framebuffers(ap, "inteldrmfb", primary);
++
++	kfree(ap);
++}
++
+ /**
+  * i915_driver_load - setup chip and create an initial config
+  * @dev: DRM device
+@@ -1927,6 +1948,15 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
+ 		goto free_priv;
+ 	}
+ 
++	dev_priv->mm.gtt = intel_gtt_get();
++	if (!dev_priv->mm.gtt) {
++		DRM_ERROR("Failed to initialize GTT\n");
++		ret = -ENODEV;
++		goto put_bridge;
++	}
++
++	i915_kick_out_firmware_fb(dev_priv);
++
+ 	/* overlay on gen2 is broken and can't address above 1G */
+ 	if (IS_GEN2(dev))
+ 		dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(30));
+@@ -1950,13 +1980,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
+ 		goto put_bridge;
+ 	}
+ 
+-	dev_priv->mm.gtt = intel_gtt_get();
+-	if (!dev_priv->mm.gtt) {
+-		DRM_ERROR("Failed to initialize GTT\n");
+-		ret = -ENODEV;
+-		goto out_rmmap;
+-	}
+-
+ 	agp_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
+ 
+ 	dev_priv->mm.gtt_mapping =
+diff --git a/drivers/md/persistent-data/dm-space-map-checker.c b/drivers/md/persistent-data/dm-space-map-checker.c
+index 50ed53b..fc90c11 100644
+--- a/drivers/md/persistent-data/dm-space-map-checker.c
++++ b/drivers/md/persistent-data/dm-space-map-checker.c
+@@ -8,6 +8,7 @@
+ 
+ #include <linux/device-mapper.h>
+ #include <linux/export.h>
++#include <linux/vmalloc.h>
+ 
+ #ifdef CONFIG_DM_DEBUG_SPACE_MAPS
+ 
+@@ -89,13 +90,23 @@ static int ca_create(struct count_array *ca, struct dm_space_map *sm)
+ 
+ 	ca->nr = nr_blocks;
+ 	ca->nr_free = nr_blocks;
+-	ca->counts = kzalloc(sizeof(*ca->counts) * nr_blocks, GFP_KERNEL);
+-	if (!ca->counts)
+-		return -ENOMEM;
++
++	if (!nr_blocks)
++		ca->counts = NULL;
++	else {
++		ca->counts = vzalloc(sizeof(*ca->counts) * nr_blocks);
++		if (!ca->counts)
++			return -ENOMEM;
++	}
+ 
+ 	return 0;
+ }
+ 
++static void ca_destroy(struct count_array *ca)
++{
++	vfree(ca->counts);
++}
++
+ static int ca_load(struct count_array *ca, struct dm_space_map *sm)
+ {
+ 	int r;
+@@ -126,12 +137,14 @@ static int ca_load(struct count_array *ca, struct dm_space_map *sm)
+ static int ca_extend(struct count_array *ca, dm_block_t extra_blocks)
+ {
+ 	dm_block_t nr_blocks = ca->nr + extra_blocks;
+-	uint32_t *counts = kzalloc(sizeof(*counts) * nr_blocks, GFP_KERNEL);
++	uint32_t *counts = vzalloc(sizeof(*counts) * nr_blocks);
+ 	if (!counts)
+ 		return -ENOMEM;
+ 
+-	memcpy(counts, ca->counts, sizeof(*counts) * ca->nr);
+-	kfree(ca->counts);
++	if (ca->counts) {
++		memcpy(counts, ca->counts, sizeof(*counts) * ca->nr);
++		ca_destroy(ca);
++	}
+ 	ca->nr = nr_blocks;
+ 	ca->nr_free += extra_blocks;
+ 	ca->counts = counts;
+@@ -151,11 +164,6 @@ static int ca_commit(struct count_array *old, struct count_array *new)
+ 	return 0;
+ }
+ 
+-static void ca_destroy(struct count_array *ca)
+-{
+-	kfree(ca->counts);
+-}
+-
+ /*----------------------------------------------------------------*/
+ 
+ struct sm_checker {
+@@ -343,25 +351,25 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
+ 	int r;
+ 	struct sm_checker *smc;
+ 
+-	if (!sm)
+-		return NULL;
++	if (IS_ERR_OR_NULL(sm))
++		return ERR_PTR(-EINVAL);
+ 
+ 	smc = kmalloc(sizeof(*smc), GFP_KERNEL);
+ 	if (!smc)
+-		return NULL;
++		return ERR_PTR(-ENOMEM);
+ 
+ 	memcpy(&smc->sm, &ops_, sizeof(smc->sm));
+ 	r = ca_create(&smc->old_counts, sm);
+ 	if (r) {
+ 		kfree(smc);
+-		return NULL;
++		return ERR_PTR(r);
+ 	}
+ 
+ 	r = ca_create(&smc->counts, sm);
+ 	if (r) {
+ 		ca_destroy(&smc->old_counts);
+ 		kfree(smc);
+-		return NULL;
++		return ERR_PTR(r);
+ 	}
+ 
+ 	smc->real_sm = sm;
+@@ -371,7 +379,7 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
+ 		ca_destroy(&smc->counts);
+ 		ca_destroy(&smc->old_counts);
+ 		kfree(smc);
+-		return NULL;
++		return ERR_PTR(r);
+ 	}
+ 
+ 	r = ca_commit(&smc->old_counts, &smc->counts);
+@@ -379,7 +387,7 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
+ 		ca_destroy(&smc->counts);
+ 		ca_destroy(&smc->old_counts);
+ 		kfree(smc);
+-		return NULL;
++		return ERR_PTR(r);
+ 	}
+ 
+ 	return &smc->sm;
+@@ -391,25 +399,25 @@ struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm)
+ 	int r;
+ 	struct sm_checker *smc;
+ 
+-	if (!sm)
+-		return NULL;
++	if (IS_ERR_OR_NULL(sm))
++		return ERR_PTR(-EINVAL);
+ 
+ 	smc = kmalloc(sizeof(*smc), GFP_KERNEL);
+ 	if (!smc)
+-		return NULL;
++		return ERR_PTR(-ENOMEM);
+ 
+ 	memcpy(&smc->sm, &ops_, sizeof(smc->sm));
+ 	r = ca_create(&smc->old_counts, sm);
+ 	if (r) {
+ 		kfree(smc);
+-		return NULL;
++		return ERR_PTR(r);
+ 	}
+ 
+ 	r = ca_create(&smc->counts, sm);
+ 	if (r) {
+ 		ca_destroy(&smc->old_counts);
+ 		kfree(smc);
+-		return NULL;
++		return ERR_PTR(r);
+ 	}
+ 
+ 	smc->real_sm = sm;
+diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
+index fc469ba..3d0ed53 100644
+--- a/drivers/md/persistent-data/dm-space-map-disk.c
++++ b/drivers/md/persistent-data/dm-space-map-disk.c
+@@ -290,7 +290,16 @@ struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
+ 				       dm_block_t nr_blocks)
+ {
+ 	struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks);
+-	return dm_sm_checker_create_fresh(sm);
++	struct dm_space_map *smc;
++
++	if (IS_ERR_OR_NULL(sm))
++		return sm;
++
++	smc = dm_sm_checker_create_fresh(sm);
++	if (IS_ERR(smc))
++		dm_sm_destroy(sm);
++
++	return smc;
+ }
+ EXPORT_SYMBOL_GPL(dm_sm_disk_create);
+ 
+diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
+index 6f8d387..ba54aac 100644
+--- a/drivers/md/persistent-data/dm-transaction-manager.c
++++ b/drivers/md/persistent-data/dm-transaction-manager.c
+@@ -138,6 +138,9 @@ EXPORT_SYMBOL_GPL(dm_tm_create_non_blocking_clone);
+ 
+ void dm_tm_destroy(struct dm_transaction_manager *tm)
+ {
++	if (!tm->is_clone)
++		wipe_shadow_table(tm);
++
+ 	kfree(tm);
+ }
+ EXPORT_SYMBOL_GPL(dm_tm_destroy);
+@@ -342,8 +345,10 @@ static int dm_tm_create_internal(struct dm_block_manager *bm,
+ 		}
+ 
+ 		*sm = dm_sm_checker_create(inner);
+-		if (!*sm)
++		if (IS_ERR(*sm)) {
++			r = PTR_ERR(*sm);
+ 			goto bad2;
++		}
+ 
+ 	} else {
+ 		r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location,
+@@ -362,8 +367,10 @@ static int dm_tm_create_internal(struct dm_block_manager *bm,
+ 		}
+ 
+ 		*sm = dm_sm_checker_create(inner);
+-		if (!*sm)
++		if (IS_ERR(*sm)) {
++			r = PTR_ERR(*sm);
+ 			goto bad2;
++		}
+ 	}
+ 
+ 	return 0;
+diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
+index b219449..7a9eef6 100644
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -1919,7 +1919,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
+ 			if (r10_sync_page_io(rdev,
+ 					     r10_bio->devs[sl].addr +
+ 					     sect,
+-					     s<<9, conf->tmppage, WRITE)
++					     s, conf->tmppage, WRITE)
+ 			    == 0) {
+ 				/* Well, this device is dead */
+ 				printk(KERN_NOTICE
+@@ -1956,7 +1956,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
+ 			switch (r10_sync_page_io(rdev,
+ 					     r10_bio->devs[sl].addr +
+ 					     sect,
+-					     s<<9, conf->tmppage,
++					     s, conf->tmppage,
+ 						 READ)) {
+ 			case 0:
+ 				/* Well, this device is dead */
+@@ -2119,7 +2119,7 @@ read_more:
+ 	rdev = conf->mirrors[mirror].rdev;
+ 	printk_ratelimited(
+ 		KERN_ERR
+-		"md/raid10:%s: %s: redirecting"
++		"md/raid10:%s: %s: redirecting "
+ 		"sector %llu to another mirror\n",
+ 		mdname(mddev),
+ 		bdevname(rdev->bdev, b),
+@@ -2436,6 +2436,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
+ 			/* want to reconstruct this device */
+ 			rb2 = r10_bio;
+ 			sect = raid10_find_virt(conf, sector_nr, i);
++			if (sect >= mddev->resync_max_sectors) {
++				/* last stripe is not complete - don't
++				 * try to recover this sector.
++				 */
++				continue;
++			}
+ 			/* Unless we are doing a full sync, we only need
+ 			 * to recover the block if it is set in the bitmap
+ 			 */
+diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
+index 858fdbb..6ba4954 100644
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -542,6 +542,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
+ 					 * a chance*/
+ 					md_check_recovery(conf->mddev);
+ 				}
++				/*
++				 * Because md_wait_for_blocked_rdev
++				 * will dec nr_pending, we must
++				 * increment it first.
++				 */
++				atomic_inc(&rdev->nr_pending);
+ 				md_wait_for_blocked_rdev(rdev, conf->mddev);
+ 			} else {
+ 				/* Acknowledged bad block - skip the write */
+@@ -3621,7 +3627,6 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
+ 		raid_bio->bi_next = (void*)rdev;
+ 		align_bi->bi_bdev =  rdev->bdev;
+ 		align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
+-		align_bi->bi_sector += rdev->data_offset;
+ 
+ 		if (!bio_fits_rdev(align_bi) ||
+ 		    is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
+@@ -3632,6 +3637,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
+ 			return 0;
+ 		}
+ 
++		/* No reshape active, so we can trust rdev->data_offset */
++		align_bi->bi_sector += rdev->data_offset;
++
+ 		spin_lock_irq(&conf->device_lock);
+ 		wait_event_lock_irq(conf->wait_for_stripe,
+ 				    conf->quiesce == 0,
+diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
+index 72d3f23..68ecf48 100644
+--- a/drivers/mtd/nand/cafe_nand.c
++++ b/drivers/mtd/nand/cafe_nand.c
+@@ -102,7 +102,7 @@ static const char *part_probes[] = { "cmdlinepart", "RedBoot", NULL };
+ static int cafe_device_ready(struct mtd_info *mtd)
+ {
+ 	struct cafe_priv *cafe = mtd->priv;
+-	int result = !!(cafe_readl(cafe, NAND_STATUS) | 0x40000000);
++	int result = !!(cafe_readl(cafe, NAND_STATUS) & 0x40000000);
+ 	uint32_t irqs = cafe_readl(cafe, NAND_IRQ);
+ 
+ 	cafe_writel(cafe, irqs, NAND_IRQ);
+diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
+index f65e0b9..1a88e38 100644
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -77,6 +77,7 @@
+ #include <net/route.h>
+ #include <net/net_namespace.h>
+ #include <net/netns/generic.h>
++#include <net/pkt_sched.h>
+ #include "bonding.h"
+ #include "bond_3ad.h"
+ #include "bond_alb.h"
+@@ -382,8 +383,6 @@ struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
+ 	return next;
+ }
+ 
+-#define bond_queue_mapping(skb) (*(u16 *)((skb)->cb))
+-
+ /**
+  * bond_dev_queue_xmit - Prepare skb for xmit.
+  *
+@@ -396,7 +395,9 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
+ {
+ 	skb->dev = slave_dev;
+ 
+-	skb->queue_mapping = bond_queue_mapping(skb);
++	BUILD_BUG_ON(sizeof(skb->queue_mapping) !=
++		     sizeof(qdisc_skb_cb(skb)->bond_queue_mapping));
++	skb->queue_mapping = qdisc_skb_cb(skb)->bond_queue_mapping;
+ 
+ 	if (unlikely(netpoll_tx_running(slave_dev)))
+ 		bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb);
+@@ -4151,7 +4152,7 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb)
+ 	/*
+ 	 * Save the original txq to restore before passing to the driver
+ 	 */
+-	bond_queue_mapping(skb) = skb->queue_mapping;
++	qdisc_skb_cb(skb)->bond_queue_mapping = skb->queue_mapping;
+ 
+ 	if (unlikely(txq >= dev->real_num_tx_queues)) {
+ 		do {
+diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
+index eeac9ca..68fe73c 100644
+--- a/drivers/net/dummy.c
++++ b/drivers/net/dummy.c
+@@ -37,6 +37,7 @@
+ #include <linux/rtnetlink.h>
+ #include <net/rtnetlink.h>
+ #include <linux/u64_stats_sync.h>
++#include <linux/sched.h>
+ 
+ static int numdummies = 1;
+ 
+@@ -186,8 +187,10 @@ static int __init dummy_init_module(void)
+ 	rtnl_lock();
+ 	err = __rtnl_link_register(&dummy_link_ops);
+ 
+-	for (i = 0; i < numdummies && !err; i++)
++	for (i = 0; i < numdummies && !err; i++) {
+ 		err = dummy_init_one();
++		cond_resched();
++	}
+ 	if (err < 0)
+ 		__rtnl_link_unregister(&dummy_link_ops);
+ 	rtnl_unlock();
+diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
+index bf266a0..36c7c4e 100644
+--- a/drivers/net/ethernet/emulex/benet/be_main.c
++++ b/drivers/net/ethernet/emulex/benet/be_main.c
+@@ -696,6 +696,8 @@ static netdev_tx_t be_xmit(struct sk_buff *skb,
+ 
+ 	copied = make_tx_wrbs(adapter, txq, skb, wrb_cnt, dummy_wrb);
+ 	if (copied) {
++		int gso_segs = skb_shinfo(skb)->gso_segs;
++
+ 		/* record the sent skb in the sent_skb table */
+ 		BUG_ON(txo->sent_skb_list[start]);
+ 		txo->sent_skb_list[start] = skb;
+@@ -713,8 +715,7 @@ static netdev_tx_t be_xmit(struct sk_buff *skb,
+ 
+ 		be_txq_notify(adapter, txq->id, wrb_cnt);
+ 
+-		be_tx_stats_update(txo, wrb_cnt, copied,
+-				skb_shinfo(skb)->gso_segs, stopped);
++		be_tx_stats_update(txo, wrb_cnt, copied, gso_segs, stopped);
+ 	} else {
+ 		txq->head = start;
+ 		dev_kfree_skb_any(skb);
+diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
+index 65c51ff..11ddd838 100644
+--- a/drivers/net/ethernet/marvell/sky2.c
++++ b/drivers/net/ethernet/marvell/sky2.c
+@@ -4361,10 +4361,12 @@ static int sky2_set_features(struct net_device *dev, u32 features)
+ 	struct sky2_port *sky2 = netdev_priv(dev);
+ 	u32 changed = dev->features ^ features;
+ 
+-	if (changed & NETIF_F_RXCSUM) {
+-		u32 on = features & NETIF_F_RXCSUM;
+-		sky2_write32(sky2->hw, Q_ADDR(rxqaddr[sky2->port], Q_CSR),
+-			     on ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM);
++	if ((changed & NETIF_F_RXCSUM) &&
++	    !(sky2->hw->flags & SKY2_HW_NEW_LE)) {
++		sky2_write32(sky2->hw,
++			     Q_ADDR(rxqaddr[sky2->port], Q_CSR),
++			     (features & NETIF_F_RXCSUM)
++			     ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM);
+ 	}
+ 
+ 	if (changed & NETIF_F_RXHASH)
+diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h
+index 0f9ee46..4cc4a8b 100644
+--- a/drivers/net/wireless/ath/ath.h
++++ b/drivers/net/wireless/ath/ath.h
+@@ -143,6 +143,7 @@ struct ath_common {
+ 	u32 keymax;
+ 	DECLARE_BITMAP(keymap, ATH_KEYMAX);
+ 	DECLARE_BITMAP(tkip_keymap, ATH_KEYMAX);
++	DECLARE_BITMAP(ccmp_keymap, ATH_KEYMAX);
+ 	enum ath_crypt_caps crypt_caps;
+ 
+ 	unsigned int clockrate;
+diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
+index 6973620..7f97164 100644
+--- a/drivers/net/wireless/ath/ath9k/hw.c
++++ b/drivers/net/wireless/ath/ath9k/hw.c
+@@ -557,7 +557,7 @@ static int __ath9k_hw_init(struct ath_hw *ah)
+ 
+ 	if (ah->config.serialize_regmode == SER_REG_MODE_AUTO) {
+ 		if (ah->hw_version.macVersion == AR_SREV_VERSION_5416_PCI ||
+-		    ((AR_SREV_9160(ah) || AR_SREV_9280(ah)) &&
++		    ((AR_SREV_9160(ah) || AR_SREV_9280(ah) || AR_SREV_9287(ah)) &&
+ 		     !ah->is_pciexpress)) {
+ 			ah->config.serialize_regmode =
+ 				SER_REG_MODE_ON;
+diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
+index 2f3aeac..e6d791c 100644
+--- a/drivers/net/wireless/ath/ath9k/recv.c
++++ b/drivers/net/wireless/ath/ath9k/recv.c
+@@ -829,7 +829,8 @@ static bool ath9k_rx_accept(struct ath_common *common,
+ 	 * descriptor does contain a valid key index. This has been observed
+ 	 * mostly with CCMP encryption.
+ 	 */
+-	if (rx_stats->rs_keyix == ATH9K_RXKEYIX_INVALID)
++	if (rx_stats->rs_keyix == ATH9K_RXKEYIX_INVALID ||
++	    !test_bit(rx_stats->rs_keyix, common->ccmp_keymap))
+ 		rx_stats->rs_status &= ~ATH9K_RXERR_KEYMISS;
+ 
+ 	if (!rx_stats->rs_datalen)
+diff --git a/drivers/net/wireless/ath/key.c b/drivers/net/wireless/ath/key.c
+index 4cf7c5e..1ec3fa5 100644
+--- a/drivers/net/wireless/ath/key.c
++++ b/drivers/net/wireless/ath/key.c
+@@ -556,6 +556,9 @@ int ath_key_config(struct ath_common *common,
+ 		return -EIO;
+ 
+ 	set_bit(idx, common->keymap);
++	if (key->cipher == WLAN_CIPHER_SUITE_CCMP)
++		set_bit(idx, common->ccmp_keymap);
++
+ 	if (key->cipher == WLAN_CIPHER_SUITE_TKIP) {
+ 		set_bit(idx + 64, common->keymap);
+ 		set_bit(idx, common->tkip_keymap);
+@@ -582,6 +585,7 @@ void ath_key_delete(struct ath_common *common, struct ieee80211_key_conf *key)
+ 		return;
+ 
+ 	clear_bit(key->hw_key_idx, common->keymap);
++	clear_bit(key->hw_key_idx, common->ccmp_keymap);
+ 	if (key->cipher != WLAN_CIPHER_SUITE_TKIP)
+ 		return;
+ 
+diff --git a/drivers/net/wireless/mwifiex/11n_rxreorder.c b/drivers/net/wireless/mwifiex/11n_rxreorder.c
+index 7aa9aa0..39fd4d5 100644
+--- a/drivers/net/wireless/mwifiex/11n_rxreorder.c
++++ b/drivers/net/wireless/mwifiex/11n_rxreorder.c
+@@ -267,7 +267,8 @@ mwifiex_11n_create_rx_reorder_tbl(struct mwifiex_private *priv, u8 *ta,
+ 	else
+ 		last_seq = priv->rx_seq[tid];
+ 
+-	if (last_seq >= new_node->start_win)
++	if (last_seq != MWIFIEX_DEF_11N_RX_SEQ_NUM &&
++	    last_seq >= new_node->start_win)
+ 		new_node->start_win = last_seq + 1;
+ 
+ 	new_node->win_size = win_size;
+@@ -611,5 +612,5 @@ void mwifiex_11n_cleanup_reorder_tbl(struct mwifiex_private *priv)
+ 	spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags);
+ 
+ 	INIT_LIST_HEAD(&priv->rx_reorder_tbl_ptr);
+-	memset(priv->rx_seq, 0, sizeof(priv->rx_seq));
++	mwifiex_reset_11n_rx_seq_num(priv);
+ }
+diff --git a/drivers/net/wireless/mwifiex/11n_rxreorder.h b/drivers/net/wireless/mwifiex/11n_rxreorder.h
+index 033c8ad..7128baa 100644
+--- a/drivers/net/wireless/mwifiex/11n_rxreorder.h
++++ b/drivers/net/wireless/mwifiex/11n_rxreorder.h
+@@ -37,6 +37,13 @@
+ 
+ #define ADDBA_RSP_STATUS_ACCEPT 0
+ 
++#define MWIFIEX_DEF_11N_RX_SEQ_NUM	0xffff
++
++static inline void mwifiex_reset_11n_rx_seq_num(struct mwifiex_private *priv)
++{
++	memset(priv->rx_seq, 0xff, sizeof(priv->rx_seq));
++}
++
+ int mwifiex_11n_rx_reorder_pkt(struct mwifiex_private *,
+ 			       u16 seqNum,
+ 			       u16 tid, u8 *ta,
+diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
+index 462c710..01dcb1a 100644
+--- a/drivers/net/wireless/mwifiex/cfg80211.c
++++ b/drivers/net/wireless/mwifiex/cfg80211.c
+@@ -1177,11 +1177,11 @@ struct net_device *mwifiex_add_virtual_intf(struct wiphy *wiphy,
+ 	void *mdev_priv;
+ 
+ 	if (!priv)
+-		return NULL;
++		return ERR_PTR(-EFAULT);
+ 
+ 	adapter = priv->adapter;
+ 	if (!adapter)
+-		return NULL;
++		return ERR_PTR(-EFAULT);
+ 
+ 	switch (type) {
+ 	case NL80211_IFTYPE_UNSPECIFIED:
+@@ -1190,7 +1190,7 @@ struct net_device *mwifiex_add_virtual_intf(struct wiphy *wiphy,
+ 		if (priv->bss_mode) {
+ 			wiphy_err(wiphy, "cannot create multiple"
+ 					" station/adhoc interfaces\n");
+-			return NULL;
++			return ERR_PTR(-EINVAL);
+ 		}
+ 
+ 		if (type == NL80211_IFTYPE_UNSPECIFIED)
+@@ -1208,14 +1208,15 @@ struct net_device *mwifiex_add_virtual_intf(struct wiphy *wiphy,
+ 		break;
+ 	default:
+ 		wiphy_err(wiphy, "type not supported\n");
+-		return NULL;
++		return ERR_PTR(-EINVAL);
+ 	}
+ 
+ 	dev = alloc_netdev_mq(sizeof(struct mwifiex_private *), name,
+ 			      ether_setup, 1);
+ 	if (!dev) {
+ 		wiphy_err(wiphy, "no memory available for netdevice\n");
+-		goto error;
++		priv->bss_mode = NL80211_IFTYPE_UNSPECIFIED;
++		return ERR_PTR(-ENOMEM);
+ 	}
+ 
+ 	dev_net_set(dev, wiphy_net(wiphy));
+@@ -1240,7 +1241,9 @@ struct net_device *mwifiex_add_virtual_intf(struct wiphy *wiphy,
+ 	/* Register network device */
+ 	if (register_netdevice(dev)) {
+ 		wiphy_err(wiphy, "cannot register virtual network device\n");
+-		goto error;
++		free_netdev(dev);
++		priv->bss_mode = NL80211_IFTYPE_UNSPECIFIED;
++		return ERR_PTR(-EFAULT);
+ 	}
+ 
+ 	sema_init(&priv->async_sem, 1);
+@@ -1252,12 +1255,6 @@ struct net_device *mwifiex_add_virtual_intf(struct wiphy *wiphy,
+ 	mwifiex_dev_debugfs_init(priv);
+ #endif
+ 	return dev;
+-error:
+-	if (dev && (dev->reg_state == NETREG_UNREGISTERED))
+-		free_netdev(dev);
+-	priv->bss_mode = NL80211_IFTYPE_UNSPECIFIED;
+-
+-	return NULL;
+ }
+ EXPORT_SYMBOL_GPL(mwifiex_add_virtual_intf);
+ 
+diff --git a/drivers/net/wireless/mwifiex/wmm.c b/drivers/net/wireless/mwifiex/wmm.c
+index 6c239c3..06fcf1e 100644
+--- a/drivers/net/wireless/mwifiex/wmm.c
++++ b/drivers/net/wireless/mwifiex/wmm.c
+@@ -406,6 +406,8 @@ mwifiex_wmm_init(struct mwifiex_adapter *adapter)
+ 		priv->add_ba_param.tx_win_size = MWIFIEX_AMPDU_DEF_TXWINSIZE;
+ 		priv->add_ba_param.rx_win_size = MWIFIEX_AMPDU_DEF_RXWINSIZE;
+ 
++		mwifiex_reset_11n_rx_seq_num(priv);
++
+ 		atomic_set(&priv->wmm.tx_pkts_queued, 0);
+ 		atomic_set(&priv->wmm.highest_queued_prio, HIGH_PRIO_TID);
+ 	}
+@@ -1209,10 +1211,12 @@ mwifiex_dequeue_tx_packet(struct mwifiex_adapter *adapter)
+ 		return 0;
+ 	}
+ 
+-	if (!ptr->is_11n_enabled || mwifiex_is_ba_stream_setup(priv, ptr, tid)
+-	    || ((priv->sec_info.wpa_enabled
+-		  || priv->sec_info.wpa2_enabled) && !priv->wpa_is_gtk_set)
+-		) {
++	if (!ptr->is_11n_enabled ||
++	    mwifiex_is_ba_stream_setup(priv, ptr, tid) ||
++	    priv->wps.session_enable ||
++	    ((priv->sec_info.wpa_enabled ||
++	      priv->sec_info.wpa2_enabled) &&
++	     !priv->wpa_is_gtk_set)) {
+ 		mwifiex_send_single_packet(priv, ptr, ptr_index, flags);
+ 		/* ra_list_spinlock has been freed in
+ 		   mwifiex_send_single_packet() */
+diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c
+index 94a3e17..0302148 100644
+--- a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c
++++ b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c
+@@ -311,9 +311,11 @@ static struct usb_device_id rtl8192c_usb_ids[] = {
+ 	{RTL_USB_DEVICE(0x07b8, 0x8188, rtl92cu_hal_cfg)}, /*Abocom - Abocom*/
+ 	{RTL_USB_DEVICE(0x07b8, 0x8189, rtl92cu_hal_cfg)}, /*Funai - Abocom*/
+ 	{RTL_USB_DEVICE(0x0846, 0x9041, rtl92cu_hal_cfg)}, /*NetGear WNA1000M*/
++	{RTL_USB_DEVICE(0x0bda, 0x5088, rtl92cu_hal_cfg)}, /*Thinkware-CC&C*/
+ 	{RTL_USB_DEVICE(0x0df6, 0x0052, rtl92cu_hal_cfg)}, /*Sitecom - Edimax*/
+ 	{RTL_USB_DEVICE(0x0df6, 0x005c, rtl92cu_hal_cfg)}, /*Sitecom - Edimax*/
+ 	{RTL_USB_DEVICE(0x0eb0, 0x9071, rtl92cu_hal_cfg)}, /*NO Brand - Etop*/
++	{RTL_USB_DEVICE(0x4856, 0x0091, rtl92cu_hal_cfg)}, /*NetweeN - Feixun*/
+ 	/* HP - Lite-On ,8188CUS Slim Combo */
+ 	{RTL_USB_DEVICE(0x103c, 0x1629, rtl92cu_hal_cfg)},
+ 	{RTL_USB_DEVICE(0x13d3, 0x3357, rtl92cu_hal_cfg)}, /* AzureWave */
+@@ -355,6 +357,7 @@ static struct usb_device_id rtl8192c_usb_ids[] = {
+ 	{RTL_USB_DEVICE(0x07b8, 0x8178, rtl92cu_hal_cfg)}, /*Funai -Abocom*/
+ 	{RTL_USB_DEVICE(0x0846, 0x9021, rtl92cu_hal_cfg)}, /*Netgear-Sercomm*/
+ 	{RTL_USB_DEVICE(0x0b05, 0x17ab, rtl92cu_hal_cfg)}, /*ASUS-Edimax*/
++	{RTL_USB_DEVICE(0x0bda, 0x8186, rtl92cu_hal_cfg)}, /*Realtek 92CE-VAU*/
+ 	{RTL_USB_DEVICE(0x0df6, 0x0061, rtl92cu_hal_cfg)}, /*Sitecom-Edimax*/
+ 	{RTL_USB_DEVICE(0x0e66, 0x0019, rtl92cu_hal_cfg)}, /*Hawking-Edimax*/
+ 	{RTL_USB_DEVICE(0x2001, 0x3307, rtl92cu_hal_cfg)}, /*D-Link-Cameo*/
+diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c
+index 3269213..64ddb63 100644
+--- a/drivers/target/tcm_fc/tfc_sess.c
++++ b/drivers/target/tcm_fc/tfc_sess.c
+@@ -61,7 +61,8 @@ static struct ft_tport *ft_tport_create(struct fc_lport *lport)
+ 	struct ft_tport *tport;
+ 	int i;
+ 
+-	tport = rcu_dereference(lport->prov[FC_TYPE_FCP]);
++	tport = rcu_dereference_protected(lport->prov[FC_TYPE_FCP],
++					  lockdep_is_held(&ft_lport_lock));
+ 	if (tport && tport->tpg)
+ 		return tport;
+ 
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 3568374..19b127c 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -692,6 +692,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
+ 	kfree(name);
+ 
+ 	iput(inode);
++
++	btrfs_run_delayed_items(trans, root);
+ 	return ret;
+ }
+ 
+@@ -897,6 +899,7 @@ again:
+ 				ret = btrfs_unlink_inode(trans, root, dir,
+ 							 inode, victim_name,
+ 							 victim_name_len);
++				btrfs_run_delayed_items(trans, root);
+ 			}
+ 			kfree(victim_name);
+ 			ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
+@@ -1477,6 +1480,9 @@ again:
+ 			ret = btrfs_unlink_inode(trans, root, dir, inode,
+ 						 name, name_len);
+ 			BUG_ON(ret);
++
++			btrfs_run_delayed_items(trans, root);
++
+ 			kfree(name);
+ 			iput(inode);
+ 
+diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
+index 9e0675a..b21670c 100644
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -2975,18 +2975,15 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
+ 	 * MS-CIFS indicates that servers are only limited by the client's
+ 	 * bufsize for reads, testing against win98se shows that it throws
+ 	 * INVALID_PARAMETER errors if you try to request too large a read.
++	 * OS/2 just sends back short reads.
+ 	 *
+-	 * If the server advertises a MaxBufferSize of less than one page,
+-	 * assume that it also can't satisfy reads larger than that either.
+-	 *
+-	 * FIXME: Is there a better heuristic for this?
++	 * If the server doesn't advertise CAP_LARGE_READ_X, then assume that
++	 * it can't handle a read request larger than its MaxBufferSize either.
+ 	 */
+ 	if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP))
+ 		defsize = CIFS_DEFAULT_IOSIZE;
+ 	else if (server->capabilities & CAP_LARGE_READ_X)
+ 		defsize = CIFS_DEFAULT_NON_POSIX_RSIZE;
+-	else if (server->maxBuf >= PAGE_CACHE_SIZE)
+-		defsize = CIFSMaxBufSize;
+ 	else
+ 		defsize = server->maxBuf - sizeof(READ_RSP);
+ 
+diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
+index 6e39668..07ee5b4 100644
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -2422,8 +2422,10 @@ out_dio:
+ 		unaligned_dio = 0;
+ 	}
+ 
+-	if (unaligned_dio)
++	if (unaligned_dio) {
++		ocfs2_iocb_clear_unaligned_aio(iocb);
+ 		atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio);
++	}
+ 
+ out:
+ 	if (rw_level != -1)
+diff --git a/fs/open.c b/fs/open.c
+index 22c41b5..e2b5d51 100644
+--- a/fs/open.c
++++ b/fs/open.c
+@@ -396,10 +396,10 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
+ {
+ 	struct file *file;
+ 	struct inode *inode;
+-	int error;
++	int error, fput_needed;
+ 
+ 	error = -EBADF;
+-	file = fget(fd);
++	file = fget_raw_light(fd, &fput_needed);
+ 	if (!file)
+ 		goto out;
+ 
+@@ -413,7 +413,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
+ 	if (!error)
+ 		set_fs_pwd(current->fs, &file->f_path);
+ out_putf:
+-	fput(file);
++	fput_light(file, fput_needed);
+ out:
+ 	return error;
+ }
+diff --git a/fs/splice.c b/fs/splice.c
+index 6d0dfb8..014fcb4 100644
+--- a/fs/splice.c
++++ b/fs/splice.c
+@@ -274,13 +274,16 @@ void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
+  * Check if we need to grow the arrays holding pages and partial page
+  * descriptions.
+  */
+-int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
++int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
+ {
+-	if (pipe->buffers <= PIPE_DEF_BUFFERS)
++	unsigned int buffers = ACCESS_ONCE(pipe->buffers);
++
++	spd->nr_pages_max = buffers;
++	if (buffers <= PIPE_DEF_BUFFERS)
+ 		return 0;
+ 
+-	spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL);
+-	spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL);
++	spd->pages = kmalloc(buffers * sizeof(struct page *), GFP_KERNEL);
++	spd->partial = kmalloc(buffers * sizeof(struct partial_page), GFP_KERNEL);
+ 
+ 	if (spd->pages && spd->partial)
+ 		return 0;
+@@ -290,10 +293,9 @@ int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
+ 	return -ENOMEM;
+ }
+ 
+-void splice_shrink_spd(struct pipe_inode_info *pipe,
+-		       struct splice_pipe_desc *spd)
++void splice_shrink_spd(struct splice_pipe_desc *spd)
+ {
+-	if (pipe->buffers <= PIPE_DEF_BUFFERS)
++	if (spd->nr_pages_max <= PIPE_DEF_BUFFERS)
+ 		return;
+ 
+ 	kfree(spd->pages);
+@@ -316,6 +318,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
+ 	struct splice_pipe_desc spd = {
+ 		.pages = pages,
+ 		.partial = partial,
++		.nr_pages_max = PIPE_DEF_BUFFERS,
+ 		.flags = flags,
+ 		.ops = &page_cache_pipe_buf_ops,
+ 		.spd_release = spd_release_page,
+@@ -327,7 +330,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
+ 	index = *ppos >> PAGE_CACHE_SHIFT;
+ 	loff = *ppos & ~PAGE_CACHE_MASK;
+ 	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+-	nr_pages = min(req_pages, pipe->buffers);
++	nr_pages = min(req_pages, spd.nr_pages_max);
+ 
+ 	/*
+ 	 * Lookup the (hopefully) full range of pages we need.
+@@ -498,7 +501,7 @@ fill_it:
+ 	if (spd.nr_pages)
+ 		error = splice_to_pipe(pipe, &spd);
+ 
+-	splice_shrink_spd(pipe, &spd);
++	splice_shrink_spd(&spd);
+ 	return error;
+ }
+ 
+@@ -599,6 +602,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+ 	struct splice_pipe_desc spd = {
+ 		.pages = pages,
+ 		.partial = partial,
++		.nr_pages_max = PIPE_DEF_BUFFERS,
+ 		.flags = flags,
+ 		.ops = &default_pipe_buf_ops,
+ 		.spd_release = spd_release_page,
+@@ -609,8 +613,8 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+ 
+ 	res = -ENOMEM;
+ 	vec = __vec;
+-	if (pipe->buffers > PIPE_DEF_BUFFERS) {
+-		vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL);
++	if (spd.nr_pages_max > PIPE_DEF_BUFFERS) {
++		vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL);
+ 		if (!vec)
+ 			goto shrink_ret;
+ 	}
+@@ -618,7 +622,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+ 	offset = *ppos & ~PAGE_CACHE_MASK;
+ 	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ 
+-	for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) {
++	for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
+ 		struct page *page;
+ 
+ 		page = alloc_page(GFP_USER);
+@@ -666,7 +670,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+ shrink_ret:
+ 	if (vec != __vec)
+ 		kfree(vec);
+-	splice_shrink_spd(pipe, &spd);
++	splice_shrink_spd(&spd);
+ 	return res;
+ 
+ err:
+@@ -1616,6 +1620,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
+ 	struct splice_pipe_desc spd = {
+ 		.pages = pages,
+ 		.partial = partial,
++		.nr_pages_max = PIPE_DEF_BUFFERS,
+ 		.flags = flags,
+ 		.ops = &user_page_pipe_buf_ops,
+ 		.spd_release = spd_release_page,
+@@ -1631,13 +1636,13 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
+ 
+ 	spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
+ 					    spd.partial, flags & SPLICE_F_GIFT,
+-					    pipe->buffers);
++					    spd.nr_pages_max);
+ 	if (spd.nr_pages <= 0)
+ 		ret = spd.nr_pages;
+ 	else
+ 		ret = splice_to_pipe(pipe, &spd);
+ 
+-	splice_shrink_spd(pipe, &spd);
++	splice_shrink_spd(&spd);
+ 	return ret;
+ }
+ 
+diff --git a/fs/udf/super.c b/fs/udf/super.c
+index 87cb24a..270e135 100644
+--- a/fs/udf/super.c
++++ b/fs/udf/super.c
+@@ -56,6 +56,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/bitmap.h>
+ #include <linux/crc-itu-t.h>
++#include <linux/log2.h>
+ #include <asm/byteorder.h>
+ 
+ #include "udf_sb.h"
+@@ -1217,16 +1218,65 @@ out_bh:
+ 	return ret;
+ }
+ 
++static int udf_load_sparable_map(struct super_block *sb,
++				 struct udf_part_map *map,
++				 struct sparablePartitionMap *spm)
++{
++	uint32_t loc;
++	uint16_t ident;
++	struct sparingTable *st;
++	struct udf_sparing_data *sdata = &map->s_type_specific.s_sparing;
++	int i;
++	struct buffer_head *bh;
++
++	map->s_partition_type = UDF_SPARABLE_MAP15;
++	sdata->s_packet_len = le16_to_cpu(spm->packetLength);
++	if (!is_power_of_2(sdata->s_packet_len)) {
++		udf_err(sb, "error loading logical volume descriptor: "
++			"Invalid packet length %u\n",
++			(unsigned)sdata->s_packet_len);
++		return -EIO;
++	}
++	if (spm->numSparingTables > 4) {
++		udf_err(sb, "error loading logical volume descriptor: "
++			"Too many sparing tables (%d)\n",
++			(int)spm->numSparingTables);
++		return -EIO;
++	}
++
++	for (i = 0; i < spm->numSparingTables; i++) {
++		loc = le32_to_cpu(spm->locSparingTable[i]);
++		bh = udf_read_tagged(sb, loc, loc, &ident);
++		if (!bh)
++			continue;
++
++		st = (struct sparingTable *)bh->b_data;
++		if (ident != 0 ||
++		    strncmp(st->sparingIdent.ident, UDF_ID_SPARING,
++			    strlen(UDF_ID_SPARING)) ||
++		    sizeof(*st) + le16_to_cpu(st->reallocationTableLen) >
++							sb->s_blocksize) {
++			brelse(bh);
++			continue;
++		}
++
++		sdata->s_spar_map[i] = bh;
++	}
++	map->s_partition_func = udf_get_pblock_spar15;
++	return 0;
++}
++
+ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
+ 			       struct kernel_lb_addr *fileset)
+ {
+ 	struct logicalVolDesc *lvd;
+-	int i, j, offset;
++	int i, offset;
+ 	uint8_t type;
+ 	struct udf_sb_info *sbi = UDF_SB(sb);
+ 	struct genericPartitionMap *gpm;
+ 	uint16_t ident;
+ 	struct buffer_head *bh;
++	unsigned int table_len;
+ 	int ret = 0;
+ 
+ 	bh = udf_read_tagged(sb, block, block, &ident);
+@@ -1234,15 +1284,20 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
+ 		return 1;
+ 	BUG_ON(ident != TAG_IDENT_LVD);
+ 	lvd = (struct logicalVolDesc *)bh->b_data;
+-
+-	i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
+-	if (i != 0) {
+-		ret = i;
++	table_len = le32_to_cpu(lvd->mapTableLength);
++	if (sizeof(*lvd) + table_len > sb->s_blocksize) {
++		udf_err(sb, "error loading logical volume descriptor: "
++			"Partition table too long (%u > %lu)\n", table_len,
++			sb->s_blocksize - sizeof(*lvd));
+ 		goto out_bh;
+ 	}
+ 
++	ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
++	if (ret)
++		goto out_bh;
++
+ 	for (i = 0, offset = 0;
+-	     i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength);
++	     i < sbi->s_partitions && offset < table_len;
+ 	     i++, offset += gpm->partitionMapLength) {
+ 		struct udf_part_map *map = &sbi->s_partmaps[i];
+ 		gpm = (struct genericPartitionMap *)
+@@ -1277,38 +1332,9 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
+ 			} else if (!strncmp(upm2->partIdent.ident,
+ 						UDF_ID_SPARABLE,
+ 						strlen(UDF_ID_SPARABLE))) {
+-				uint32_t loc;
+-				struct sparingTable *st;
+-				struct sparablePartitionMap *spm =
+-					(struct sparablePartitionMap *)gpm;
+-
+-				map->s_partition_type = UDF_SPARABLE_MAP15;
+-				map->s_type_specific.s_sparing.s_packet_len =
+-						le16_to_cpu(spm->packetLength);
+-				for (j = 0; j < spm->numSparingTables; j++) {
+-					struct buffer_head *bh2;
+-
+-					loc = le32_to_cpu(
+-						spm->locSparingTable[j]);
+-					bh2 = udf_read_tagged(sb, loc, loc,
+-							     &ident);
+-					map->s_type_specific.s_sparing.
+-							s_spar_map[j] = bh2;
+-
+-					if (bh2 == NULL)
+-						continue;
+-
+-					st = (struct sparingTable *)bh2->b_data;
+-					if (ident != 0 || strncmp(
+-						st->sparingIdent.ident,
+-						UDF_ID_SPARING,
+-						strlen(UDF_ID_SPARING))) {
+-						brelse(bh2);
+-						map->s_type_specific.s_sparing.
+-							s_spar_map[j] = NULL;
+-					}
+-				}
+-				map->s_partition_func = udf_get_pblock_spar15;
++				if (udf_load_sparable_map(sb, map,
++				    (struct sparablePartitionMap *)gpm) < 0)
++					goto out_bh;
+ 			} else if (!strncmp(upm2->partIdent.ident,
+ 						UDF_ID_METADATA,
+ 						strlen(UDF_ID_METADATA))) {
+diff --git a/include/linux/aio.h b/include/linux/aio.h
+index 2314ad8..b1a520e 100644
+--- a/include/linux/aio.h
++++ b/include/linux/aio.h
+@@ -140,6 +140,7 @@ struct kiocb {
+ 		(x)->ki_dtor = NULL;			\
+ 		(x)->ki_obj.tsk = tsk;			\
+ 		(x)->ki_user_data = 0;                  \
++		(x)->private = NULL;			\
+ 	} while (0)
+ 
+ #define AIO_RING_MAGIC			0xa10a10a1
+diff --git a/include/linux/splice.h b/include/linux/splice.h
+index 26e5b61..09a545a 100644
+--- a/include/linux/splice.h
++++ b/include/linux/splice.h
+@@ -51,7 +51,8 @@ struct partial_page {
+ struct splice_pipe_desc {
+ 	struct page **pages;		/* page map */
+ 	struct partial_page *partial;	/* pages[] may not be contig */
+-	int nr_pages;			/* number of pages in map */
++	int nr_pages;			/* number of populated pages in map */
++	unsigned int nr_pages_max;	/* pages[] & partial[] arrays size */
+ 	unsigned int flags;		/* splice flags */
+ 	const struct pipe_buf_operations *ops;/* ops associated with output pipe */
+ 	void (*spd_release)(struct splice_pipe_desc *, unsigned int);
+@@ -85,9 +86,8 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
+ /*
+  * for dynamic pipe sizing
+  */
+-extern int splice_grow_spd(struct pipe_inode_info *, struct splice_pipe_desc *);
+-extern void splice_shrink_spd(struct pipe_inode_info *,
+-				struct splice_pipe_desc *);
++extern int splice_grow_spd(const struct pipe_inode_info *, struct splice_pipe_desc *);
++extern void splice_shrink_spd(struct splice_pipe_desc *);
+ extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
+ 
+ extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
+diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
+index 9808877..a7a683e 100644
+--- a/include/net/cipso_ipv4.h
++++ b/include/net/cipso_ipv4.h
+@@ -42,6 +42,7 @@
+ #include <net/netlabel.h>
+ #include <net/request_sock.h>
+ #include <linux/atomic.h>
++#include <asm/unaligned.h>
+ 
+ /* known doi values */
+ #define CIPSO_V4_DOI_UNKNOWN          0x00000000
+@@ -285,7 +286,33 @@ static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
+ static inline int cipso_v4_validate(const struct sk_buff *skb,
+ 				    unsigned char **option)
+ {
+-	return -ENOSYS;
++	unsigned char *opt = *option;
++	unsigned char err_offset = 0;
++	u8 opt_len = opt[1];
++	u8 opt_iter;
++
++	if (opt_len < 8) {
++		err_offset = 1;
++		goto out;
++	}
++
++	if (get_unaligned_be32(&opt[2]) == 0) {
++		err_offset = 2;
++		goto out;
++	}
++
++	for (opt_iter = 6; opt_iter < opt_len;) {
++		if (opt[opt_iter + 1] > (opt_len - opt_iter)) {
++			err_offset = opt_iter + 1;
++			goto out;
++		}
++		opt_iter += opt[opt_iter + 1];
++	}
++
++out:
++	*option = opt + err_offset;
++	return err_offset;
++
+ }
+ #endif /* CONFIG_NETLABEL */
+ 
+diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
+index 55ce96b..9d7d54a 100644
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -220,13 +220,16 @@ struct tcf_proto {
+ 
+ struct qdisc_skb_cb {
+ 	unsigned int		pkt_len;
+-	unsigned char		data[24];
++	u16			bond_queue_mapping;
++	u16			_pad;
++	unsigned char		data[20];
+ };
+ 
+ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
+ {
+ 	struct qdisc_skb_cb *qcb;
+-	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(unsigned int) + sz);
++
++	BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb, data) + sz);
+ 	BUILD_BUG_ON(sizeof(qcb->data) < sz);
+ }
+ 
+diff --git a/kernel/relay.c b/kernel/relay.c
+index b6f803a..a535fc9 100644
+--- a/kernel/relay.c
++++ b/kernel/relay.c
+@@ -1235,6 +1235,7 @@ static ssize_t subbuf_splice_actor(struct file *in,
+ 	struct splice_pipe_desc spd = {
+ 		.pages = pages,
+ 		.nr_pages = 0,
++		.nr_pages_max = PIPE_DEF_BUFFERS,
+ 		.partial = partial,
+ 		.flags = flags,
+ 		.ops = &relay_pipe_buf_ops,
+@@ -1302,8 +1303,8 @@ static ssize_t subbuf_splice_actor(struct file *in,
+                 ret += padding;
+ 
+ out:
+-	splice_shrink_spd(pipe, &spd);
+-        return ret;
++	splice_shrink_spd(&spd);
++	return ret;
+ }
+ 
+ static ssize_t relay_file_splice_read(struct file *in,
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 697e49d..5638104 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -2541,10 +2541,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
+ 		if (cpumask_test_cpu(cpu, tracing_cpumask) &&
+ 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
+ 			atomic_inc(&global_trace.data[cpu]->disabled);
++			ring_buffer_record_disable_cpu(global_trace.buffer, cpu);
+ 		}
+ 		if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
+ 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
+ 			atomic_dec(&global_trace.data[cpu]->disabled);
++			ring_buffer_record_enable_cpu(global_trace.buffer, cpu);
+ 		}
+ 	}
+ 	arch_spin_unlock(&ftrace_max_lock);
+@@ -3456,6 +3458,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
+ 		.pages		= pages_def,
+ 		.partial	= partial_def,
+ 		.nr_pages	= 0, /* This gets updated below. */
++		.nr_pages_max	= PIPE_DEF_BUFFERS,
+ 		.flags		= flags,
+ 		.ops		= &tracing_pipe_buf_ops,
+ 		.spd_release	= tracing_spd_release_pipe,
+@@ -3527,7 +3530,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
+ 
+ 	ret = splice_to_pipe(pipe, &spd);
+ out:
+-	splice_shrink_spd(pipe, &spd);
++	splice_shrink_spd(&spd);
+ 	return ret;
+ 
+ out_err:
+@@ -4017,6 +4020,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
+ 	struct splice_pipe_desc spd = {
+ 		.pages		= pages_def,
+ 		.partial	= partial_def,
++		.nr_pages_max	= PIPE_DEF_BUFFERS,
+ 		.flags		= flags,
+ 		.ops		= &buffer_pipe_buf_ops,
+ 		.spd_release	= buffer_spd_release,
+@@ -4104,7 +4108,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
+ 	}
+ 
+ 	ret = splice_to_pipe(pipe, &spd);
+-	splice_shrink_spd(pipe, &spd);
++	splice_shrink_spd(&spd);
+ out:
+ 	return ret;
+ }
+diff --git a/mm/madvise.c b/mm/madvise.c
+index 74bf193..23d3a6b 100644
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -13,6 +13,7 @@
+ #include <linux/hugetlb.h>
+ #include <linux/sched.h>
+ #include <linux/ksm.h>
++#include <linux/file.h>
+ 
+ /*
+  * Any behaviour which results in changes to the vma->vm_flags needs to
+@@ -197,14 +198,16 @@ static long madvise_remove(struct vm_area_struct *vma,
+ 	struct address_space *mapping;
+ 	loff_t offset, endoff;
+ 	int error;
++	struct file *f;
+ 
+ 	*prev = NULL;	/* tell sys_madvise we drop mmap_sem */
+ 
+ 	if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
+ 		return -EINVAL;
+ 
+-	if (!vma->vm_file || !vma->vm_file->f_mapping
+-		|| !vma->vm_file->f_mapping->host) {
++	f = vma->vm_file;
++
++	if (!f || !f->f_mapping || !f->f_mapping->host) {
+ 			return -EINVAL;
+ 	}
+ 
+@@ -218,9 +221,16 @@ static long madvise_remove(struct vm_area_struct *vma,
+ 	endoff = (loff_t)(end - vma->vm_start - 1)
+ 			+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+ 
+-	/* vmtruncate_range needs to take i_mutex */
++	/*
++	 * vmtruncate_range may need to take i_mutex.  We need to
++	 * explicitly grab a reference because the vma (and hence the
++	 * vma's reference to the file) can go away as soon as we drop
++	 * mmap_sem.
++	 */
++	get_file(f);
+ 	up_read(&current->mm->mmap_sem);
+ 	error = vmtruncate_range(mapping->host, offset, endoff);
++	fput(f);
+ 	down_read(&current->mm->mmap_sem);
+ 	return error;
+ }
+diff --git a/mm/shmem.c b/mm/shmem.c
+index 6c253f7..7a82174 100644
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -1359,6 +1359,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
+ 	struct splice_pipe_desc spd = {
+ 		.pages = pages,
+ 		.partial = partial,
++		.nr_pages_max = PIPE_DEF_BUFFERS,
+ 		.flags = flags,
+ 		.ops = &page_cache_pipe_buf_ops,
+ 		.spd_release = spd_release_page,
+@@ -1447,7 +1448,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
+ 	if (spd.nr_pages)
+ 		error = splice_to_pipe(pipe, &spd);
+ 
+-	splice_shrink_spd(pipe, &spd);
++	splice_shrink_spd(&spd);
+ 
+ 	if (error > 0) {
+ 		*ppos += error;
+diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
+index f603e5b..f3f75ad 100644
+--- a/net/bridge/br_if.c
++++ b/net/bridge/br_if.c
+@@ -240,6 +240,7 @@ int br_add_bridge(struct net *net, const char *name)
+ 		return -ENOMEM;
+ 
+ 	dev_net_set(dev, net);
++	dev->rtnl_link_ops = &br_link_ops;
+ 
+ 	res = register_netdev(dev);
+ 	if (res)
+diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
+index a1daf82..cbf9ccd 100644
+--- a/net/bridge/br_netlink.c
++++ b/net/bridge/br_netlink.c
+@@ -211,7 +211,7 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
+ 	return 0;
+ }
+ 
+-static struct rtnl_link_ops br_link_ops __read_mostly = {
++struct rtnl_link_ops br_link_ops __read_mostly = {
+ 	.kind		= "bridge",
+ 	.priv_size	= sizeof(struct net_bridge),
+ 	.setup		= br_dev_setup,
+diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
+index 93264df..b9bba8f 100644
+--- a/net/bridge/br_private.h
++++ b/net/bridge/br_private.h
+@@ -536,6 +536,7 @@ extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr)
+ #endif
+ 
+ /* br_netlink.c */
++extern struct rtnl_link_ops br_link_ops;
+ extern int br_netlink_init(void);
+ extern void br_netlink_fini(void);
+ extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
+diff --git a/net/core/ethtool.c b/net/core/ethtool.c
+index 2b587ec..2367246 100644
+--- a/net/core/ethtool.c
++++ b/net/core/ethtool.c
+@@ -1672,6 +1672,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
+ 	case ETHTOOL_GRXCSUM:
+ 	case ETHTOOL_GTXCSUM:
+ 	case ETHTOOL_GSG:
++	case ETHTOOL_GSSET_INFO:
+ 	case ETHTOOL_GSTRINGS:
+ 	case ETHTOOL_GTSO:
+ 	case ETHTOOL_GPERMADDR:
+diff --git a/net/core/netpoll.c b/net/core/netpoll.c
+index ab0633f..db4bb7a 100644
+--- a/net/core/netpoll.c
++++ b/net/core/netpoll.c
+@@ -351,22 +351,23 @@ EXPORT_SYMBOL(netpoll_send_skb_on_dev);
+ 
+ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
+ {
+-	int total_len, eth_len, ip_len, udp_len;
++	int total_len, ip_len, udp_len;
+ 	struct sk_buff *skb;
+ 	struct udphdr *udph;
+ 	struct iphdr *iph;
+ 	struct ethhdr *eth;
+ 
+ 	udp_len = len + sizeof(*udph);
+-	ip_len = eth_len = udp_len + sizeof(*iph);
+-	total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
++	ip_len = udp_len + sizeof(*iph);
++	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
+ 
+-	skb = find_skb(np, total_len, total_len - len);
++	skb = find_skb(np, total_len + np->dev->needed_tailroom,
++		       total_len - len);
+ 	if (!skb)
+ 		return;
+ 
+ 	skb_copy_to_linear_data(skb, msg, len);
+-	skb->len += len;
++	skb_put(skb, len);
+ 
+ 	skb_push(skb, sizeof(*udph));
+ 	skb_reset_transport_header(skb);
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index 2ec200de..af9c3c6 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -1663,6 +1663,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
+ 	struct splice_pipe_desc spd = {
+ 		.pages = pages,
+ 		.partial = partial,
++		.nr_pages_max = MAX_SKB_FRAGS,
+ 		.flags = flags,
+ 		.ops = &sock_pipe_buf_ops,
+ 		.spd_release = sock_spd_release,
+@@ -1709,7 +1710,7 @@ done:
+ 		lock_sock(sk);
+ 	}
+ 
+-	splice_shrink_spd(pipe, &spd);
++	splice_shrink_spd(&spd);
+ 	return ret;
+ }
+ 
+diff --git a/net/core/sock.c b/net/core/sock.c
+index b23f174..8d095b9 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1497,6 +1497,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
+ 	gfp_t gfp_mask;
+ 	long timeo;
+ 	int err;
++	int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
++
++	err = -EMSGSIZE;
++	if (npages > MAX_SKB_FRAGS)
++		goto failure;
+ 
+ 	gfp_mask = sk->sk_allocation;
+ 	if (gfp_mask & __GFP_WAIT)
+@@ -1515,14 +1520,12 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
+ 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+ 			skb = alloc_skb(header_len, gfp_mask);
+ 			if (skb) {
+-				int npages;
+ 				int i;
+ 
+ 				/* No pages, we're done... */
+ 				if (!data_len)
+ 					break;
+ 
+-				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+ 				skb->truesize += data_len;
+ 				skb_shinfo(skb)->nr_frags = npages;
+ 				for (i = 0; i < npages; i++) {
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 059b9d9..2e21751 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -2881,10 +2881,6 @@ static int __net_init ip6_route_net_init(struct net *net)
+ 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
+ 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
+ 
+-#ifdef CONFIG_PROC_FS
+-	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
+-	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
+-#endif
+ 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
+ 
+ 	ret = 0;
+@@ -2905,10 +2901,6 @@ out_ip6_dst_ops:
+ 
+ static void __net_exit ip6_route_net_exit(struct net *net)
+ {
+-#ifdef CONFIG_PROC_FS
+-	proc_net_remove(net, "ipv6_route");
+-	proc_net_remove(net, "rt6_stats");
+-#endif
+ 	kfree(net->ipv6.ip6_null_entry);
+ #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ 	kfree(net->ipv6.ip6_prohibit_entry);
+@@ -2917,11 +2909,33 @@ static void __net_exit ip6_route_net_exit(struct net *net)
+ 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
+ }
+ 
++static int __net_init ip6_route_net_init_late(struct net *net)
++{
++#ifdef CONFIG_PROC_FS
++	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
++	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
++#endif
++	return 0;
++}
++
++static void __net_exit ip6_route_net_exit_late(struct net *net)
++{
++#ifdef CONFIG_PROC_FS
++	proc_net_remove(net, "ipv6_route");
++	proc_net_remove(net, "rt6_stats");
++#endif
++}
++
+ static struct pernet_operations ip6_route_net_ops = {
+ 	.init = ip6_route_net_init,
+ 	.exit = ip6_route_net_exit,
+ };
+ 
++static struct pernet_operations ip6_route_net_late_ops = {
++	.init = ip6_route_net_init_late,
++	.exit = ip6_route_net_exit_late,
++};
++
+ static struct notifier_block ip6_route_dev_notifier = {
+ 	.notifier_call = ip6_route_dev_notify,
+ 	.priority = 0,
+@@ -2971,19 +2985,25 @@ int __init ip6_route_init(void)
+ 	if (ret)
+ 		goto xfrm6_init;
+ 
++	ret = register_pernet_subsys(&ip6_route_net_late_ops);
++	if (ret)
++		goto fib6_rules_init;
++
+ 	ret = -ENOBUFS;
+ 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
+ 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
+ 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
+-		goto fib6_rules_init;
++		goto out_register_late_subsys;
+ 
+ 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
+ 	if (ret)
+-		goto fib6_rules_init;
++		goto out_register_late_subsys;
+ 
+ out:
+ 	return ret;
+ 
++out_register_late_subsys:
++	unregister_pernet_subsys(&ip6_route_net_late_ops);
+ fib6_rules_init:
+ 	fib6_rules_cleanup();
+ xfrm6_init:
+@@ -3002,6 +3022,7 @@ out_kmem_cache:
+ void ip6_route_cleanup(void)
+ {
+ 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
++	unregister_pernet_subsys(&ip6_route_net_late_ops);
+ 	fib6_rules_cleanup();
+ 	xfrm6_fini();
+ 	fib6_gc_cleanup();
+diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
+index d2726a7..3c55f63 100644
+--- a/net/l2tp/l2tp_eth.c
++++ b/net/l2tp/l2tp_eth.c
+@@ -167,6 +167,7 @@ static void l2tp_eth_delete(struct l2tp_session *session)
+ 		if (dev) {
+ 			unregister_netdev(dev);
+ 			spriv->dev = NULL;
++			module_put(THIS_MODULE);
+ 		}
+ 	}
+ }
+@@ -254,6 +255,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
+ 	if (rc < 0)
+ 		goto out_del_dev;
+ 
++	__module_get(THIS_MODULE);
+ 	/* Must be done after register_netdev() */
+ 	strlcpy(session->ifname, dev->name, IFNAMSIZ);
+ 
+diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
+index 2fbbe1f..6c7e609 100644
+--- a/net/l2tp/l2tp_ip.c
++++ b/net/l2tp/l2tp_ip.c
+@@ -515,10 +515,12 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
+ 					   sk->sk_bound_dev_if);
+ 		if (IS_ERR(rt))
+ 			goto no_route;
+-		if (connected)
++		if (connected) {
+ 			sk_setup_caps(sk, &rt->dst);
+-		else
+-			dst_release(&rt->dst); /* safe since we hold rcu_read_lock */
++		} else {
++			skb_dst_set(skb, &rt->dst);
++			goto xmit;
++		}
+ 	}
+ 
+ 	/* We dont need to clone dst here, it is guaranteed to not disappear.
+@@ -526,6 +528,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
+ 	 */
+ 	skb_dst_set_noref(skb, &rt->dst);
+ 
++xmit:
+ 	/* Queue the packet to IP for output */
+ 	rc = ip_queue_xmit(skb, &inet->cork.fl);
+ 	rcu_read_unlock();
+diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
+index 064d20f..cda4875 100644
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -2389,7 +2389,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
+ 	 * frames that we didn't handle, including returning unknown
+ 	 * ones. For all other modes we will return them to the sender,
+ 	 * setting the 0x80 bit in the action category, as required by
+-	 * 802.11-2007 7.3.1.11.
++	 * 802.11-2012 9.24.4.
+ 	 * Newer versions of hostapd shall also use the management frame
+ 	 * registration mechanisms, but older ones still use cooked
+ 	 * monitor interfaces so push all frames there.
+@@ -2399,6 +2399,9 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
+ 	     sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
+ 		return RX_DROP_MONITOR;
+ 
++	if (is_multicast_ether_addr(mgmt->da))
++		return RX_DROP_MONITOR;
++
+ 	/* do not return rejected action frames */
+ 	if (mgmt->u.action.category & 0x80)
+ 		return RX_DROP_UNUSABLE;
+diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
+index 96633f5..12b6a80 100644
+--- a/net/nfc/nci/ntf.c
++++ b/net/nfc/nci/ntf.c
+@@ -86,7 +86,7 @@ static int nci_rf_activate_nfca_passive_poll(struct nci_dev *ndev,
+ 	nfca_poll->sens_res = __le16_to_cpu(*((__u16 *)data));
+ 	data += 2;
+ 
+-	nfca_poll->nfcid1_len = *data++;
++	nfca_poll->nfcid1_len = min_t(__u8, *data++, sizeof(nfca_poll->nfcid1));
+ 
+ 	nfc_dbg("sens_res 0x%x, nfcid1_len %d",
+ 		nfca_poll->sens_res,
+@@ -111,7 +111,7 @@ static int nci_rf_activate_nfca_passive_poll(struct nci_dev *ndev,
+ 
+ 	switch (ntf->rf_interface_type) {
+ 	case NCI_RF_INTERFACE_ISO_DEP:
+-		nfca_poll_iso_dep->rats_res_len = *data++;
++		nfca_poll_iso_dep->rats_res_len = min_t(__u8, *data++, 20);
+ 		if (nfca_poll_iso_dep->rats_res_len > 0) {
+ 			memcpy(nfca_poll_iso_dep->rats_res,
+ 				data,
+diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
+index ee7b2b3..7a167fc 100644
+--- a/net/nfc/rawsock.c
++++ b/net/nfc/rawsock.c
+@@ -52,7 +52,10 @@ static int rawsock_release(struct socket *sock)
+ {
+ 	struct sock *sk = sock->sk;
+ 
+-	nfc_dbg("sock=%p", sock);
++	nfc_dbg("sock=%p sk=%p", sock, sk);
++
++	if (!sk)
++		return 0;
+ 
+ 	sock_orphan(sk);
+ 	sock_put(sk);
+diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
+index 7b7a516..2b973f5 100644
+--- a/sound/pci/hda/patch_sigmatel.c
++++ b/sound/pci/hda/patch_sigmatel.c
+@@ -4457,7 +4457,7 @@ static int stac92xx_init(struct hda_codec *codec)
+ 					 AC_PINCTL_IN_EN);
+ 	for (i = 0; i < spec->num_pwrs; i++)  {
+ 		hda_nid_t nid = spec->pwr_nids[i];
+-		int pinctl, def_conf;
++		unsigned int pinctl, def_conf;
+ 
+ 		/* power on when no jack detection is available */
+ 		/* or when the VREF is used for controlling LED */
+@@ -4484,7 +4484,7 @@ static int stac92xx_init(struct hda_codec *codec)
+ 		def_conf = get_defcfg_connect(def_conf);
+ 		/* skip any ports that don't have jacks since presence
+  		 * detection is useless */
+-		if (def_conf != AC_JACK_PORT_NONE &&
++		if (def_conf != AC_JACK_PORT_COMPLEX ||
+ 		    !is_jack_detectable(codec, nid)) {
+ 			stac_toggle_power_map(codec, nid, 1);
+ 			continue;
+diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c
+index 87d5ef1..8b48801 100644
+--- a/sound/soc/codecs/tlv320aic3x.c
++++ b/sound/soc/codecs/tlv320aic3x.c
+@@ -963,9 +963,7 @@ static int aic3x_hw_params(struct snd_pcm_substream *substream,
+ 	}
+ 
+ found:
+-	data = snd_soc_read(codec, AIC3X_PLL_PROGA_REG);
+-	snd_soc_write(codec, AIC3X_PLL_PROGA_REG,
+-		      data | (pll_p << PLLP_SHIFT));
++	snd_soc_update_bits(codec, AIC3X_PLL_PROGA_REG, PLLP_MASK, pll_p);
+ 	snd_soc_write(codec, AIC3X_OVRF_STATUS_AND_PLLR_REG,
+ 		      pll_r << PLLR_SHIFT);
+ 	snd_soc_write(codec, AIC3X_PLL_PROGB_REG, pll_j << PLLJ_SHIFT);
+diff --git a/sound/soc/codecs/tlv320aic3x.h b/sound/soc/codecs/tlv320aic3x.h
+index 06a1978..16d9999 100644
+--- a/sound/soc/codecs/tlv320aic3x.h
++++ b/sound/soc/codecs/tlv320aic3x.h
+@@ -166,6 +166,7 @@
+ 
+ /* PLL registers bitfields */
+ #define PLLP_SHIFT		0
++#define PLLP_MASK		7
+ #define PLLQ_SHIFT		3
+ #define PLLR_SHIFT		0
+ #define PLLJ_SHIFT		2
diff --git a/3.2.34/bump/1023_linux-3.2.24.patch b/3.2.34/bump/1023_linux-3.2.24.patch
new file mode 100644
index 0000000..4692eb4
--- /dev/null
+++ b/3.2.34/bump/1023_linux-3.2.24.patch
@@ -0,0 +1,4684 @@
+diff --git a/Makefile b/Makefile
+index 40d1e3b..80bb4fd 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 3
+ PATCHLEVEL = 2
+-SUBLEVEL = 23
++SUBLEVEL = 24
+ EXTRAVERSION =
+ NAME = Saber-toothed Squirrel
+ 
+diff --git a/arch/arm/plat-samsung/adc.c b/arch/arm/plat-samsung/adc.c
+index 33ecd0c..b1e05cc 100644
+--- a/arch/arm/plat-samsung/adc.c
++++ b/arch/arm/plat-samsung/adc.c
+@@ -157,11 +157,13 @@ int s3c_adc_start(struct s3c_adc_client *client,
+ 		return -EINVAL;
+ 	}
+ 
+-	if (client->is_ts && adc->ts_pend)
+-		return -EAGAIN;
+-
+ 	spin_lock_irqsave(&adc->lock, flags);
+ 
++	if (client->is_ts && adc->ts_pend) {
++		spin_unlock_irqrestore(&adc->lock, flags);
++		return -EAGAIN;
++	}
++
+ 	client->channel = channel;
+ 	client->nr_samples = nr_samples;
+ 
+diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
+index 97f8bf6..adda036 100644
+--- a/arch/mips/include/asm/thread_info.h
++++ b/arch/mips/include/asm/thread_info.h
+@@ -60,6 +60,8 @@ struct thread_info {
+ register struct thread_info *__current_thread_info __asm__("$28");
+ #define current_thread_info()  __current_thread_info
+ 
++#endif /* !__ASSEMBLY__ */
++
+ /* thread information allocation */
+ #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT)
+ #define THREAD_SIZE_ORDER (1)
+@@ -97,8 +99,6 @@ register struct thread_info *__current_thread_info __asm__("$28");
+ 
+ #define free_thread_info(info) kfree(info)
+ 
+-#endif /* !__ASSEMBLY__ */
+-
+ #define PREEMPT_ACTIVE		0x10000000
+ 
+ /*
+diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
+index a81176f..be281c6 100644
+--- a/arch/mips/kernel/vmlinux.lds.S
++++ b/arch/mips/kernel/vmlinux.lds.S
+@@ -1,5 +1,6 @@
+ #include <asm/asm-offsets.h>
+ #include <asm/page.h>
++#include <asm/thread_info.h>
+ #include <asm-generic/vmlinux.lds.h>
+ 
+ #undef mips
+@@ -73,7 +74,7 @@ SECTIONS
+ 	.data : {	/* Data */
+ 		. = . + DATAOFFSET;		/* for CONFIG_MAPPED_KERNEL */
+ 
+-		INIT_TASK_DATA(PAGE_SIZE)
++		INIT_TASK_DATA(THREAD_SIZE)
+ 		NOSAVE_DATA
+ 		CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
+ 		READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
+diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
+index 98b7c4b..fa3f921 100644
+--- a/arch/powerpc/include/asm/cputime.h
++++ b/arch/powerpc/include/asm/cputime.h
+@@ -126,11 +126,11 @@ static inline u64 cputime64_to_jiffies64(const cputime_t ct)
+ /*
+  * Convert cputime <-> microseconds
+  */
+-extern u64 __cputime_msec_factor;
++extern u64 __cputime_usec_factor;
+ 
+ static inline unsigned long cputime_to_usecs(const cputime_t ct)
+ {
+-	return mulhdu(ct, __cputime_msec_factor) * USEC_PER_MSEC;
++	return mulhdu(ct, __cputime_usec_factor);
+ }
+ 
+ static inline cputime_t usecs_to_cputime(const unsigned long us)
+@@ -143,7 +143,7 @@ static inline cputime_t usecs_to_cputime(const unsigned long us)
+ 	sec = us / 1000000;
+ 	if (ct) {
+ 		ct *= tb_ticks_per_sec;
+-		do_div(ct, 1000);
++		do_div(ct, 1000000);
+ 	}
+ 	if (sec)
+ 		ct += (cputime_t) sec * tb_ticks_per_sec;
+diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
+index 5db163c..ec8affe 100644
+--- a/arch/powerpc/kernel/time.c
++++ b/arch/powerpc/kernel/time.c
+@@ -168,13 +168,13 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq);
+ #ifdef CONFIG_VIRT_CPU_ACCOUNTING
+ /*
+  * Factors for converting from cputime_t (timebase ticks) to
+- * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds).
++ * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
+  * These are all stored as 0.64 fixed-point binary fractions.
+  */
+ u64 __cputime_jiffies_factor;
+ EXPORT_SYMBOL(__cputime_jiffies_factor);
+-u64 __cputime_msec_factor;
+-EXPORT_SYMBOL(__cputime_msec_factor);
++u64 __cputime_usec_factor;
++EXPORT_SYMBOL(__cputime_usec_factor);
+ u64 __cputime_sec_factor;
+ EXPORT_SYMBOL(__cputime_sec_factor);
+ u64 __cputime_clockt_factor;
+@@ -192,8 +192,8 @@ static void calc_cputime_factors(void)
+ 
+ 	div128_by_32(HZ, 0, tb_ticks_per_sec, &res);
+ 	__cputime_jiffies_factor = res.result_low;
+-	div128_by_32(1000, 0, tb_ticks_per_sec, &res);
+-	__cputime_msec_factor = res.result_low;
++	div128_by_32(1000000, 0, tb_ticks_per_sec, &res);
++	__cputime_usec_factor = res.result_low;
+ 	div128_by_32(1, 0, tb_ticks_per_sec, &res);
+ 	__cputime_sec_factor = res.result_low;
+ 	div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);
+diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
+index 4558f0d..479d03c 100644
+--- a/arch/x86/kernel/acpi/boot.c
++++ b/arch/x86/kernel/acpi/boot.c
+@@ -416,12 +416,14 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
+ 		return 0;
+ 	}
+ 
+-	if (intsrc->source_irq == 0 && intsrc->global_irq == 2) {
++	if (intsrc->source_irq == 0) {
+ 		if (acpi_skip_timer_override) {
+-			printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
++			printk(PREFIX "BIOS IRQ0 override ignored.\n");
+ 			return 0;
+ 		}
+-		if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
++
++		if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity
++			&& (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
+ 			intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;
+ 			printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
+ 		}
+@@ -1327,17 +1329,12 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d)
+ }
+ 
+ /*
+- * Force ignoring BIOS IRQ0 pin2 override
++ * Force ignoring BIOS IRQ0 override
+  */
+ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
+ {
+-	/*
+-	 * The ati_ixp4x0_rev() early PCI quirk should have set
+-	 * the acpi_skip_timer_override flag already:
+-	 */
+ 	if (!acpi_skip_timer_override) {
+-		WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n");
+-		pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n",
++		pr_notice("%s detected: Ignoring BIOS IRQ0 override\n",
+ 			d->ident);
+ 		acpi_skip_timer_override = 1;
+ 	}
+@@ -1431,7 +1428,7 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
+ 	 * is enabled.  This input is incorrectly designated the
+ 	 * ISA IRQ 0 via an interrupt source override even though
+ 	 * it is wired to the output of the master 8259A and INTIN0
+-	 * is not connected at all.  Force ignoring BIOS IRQ0 pin2
++	 * is not connected at all.  Force ignoring BIOS IRQ0
+ 	 * override in that cases.
+ 	 */
+ 	{
+@@ -1466,6 +1463,14 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
+ 		     DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"),
+ 		     },
+ 	 },
++	{
++	 .callback = dmi_ignore_irq0_timer_override,
++	 .ident = "FUJITSU SIEMENS",
++	 .matches = {
++		     DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
++		     DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"),
++		     },
++	 },
+ 	{}
+ };
+ 
+diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
+index 37a458b..e61f79c 100644
+--- a/arch/x86/kernel/reboot.c
++++ b/arch/x86/kernel/reboot.c
+@@ -460,6 +460,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
+ 			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
+ 		},
+ 	},
++	{	/* Handle problems with rebooting on the Precision M6600. */
++		.callback = set_pci_reboot,
++		.ident = "Dell OptiPlex 990",
++		.matches = {
++			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++			DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
++		},
++	},
+ 	{ }
+ };
+ 
+diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
+index 688be8a..9e76a32 100644
+--- a/block/scsi_ioctl.c
++++ b/block/scsi_ioctl.c
+@@ -721,11 +721,14 @@ int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd)
+ 		break;
+ 	}
+ 
++	if (capable(CAP_SYS_RAWIO))
++		return 0;
++
+ 	/* In particular, rule out all resets and host-specific ioctls.  */
+ 	printk_ratelimited(KERN_WARNING
+ 			   "%s: sending ioctl %x to a partition!\n", current->comm, cmd);
+ 
+-	return capable(CAP_SYS_RAWIO) ? 0 : -ENOTTY;
++	return -ENOTTY;
+ }
+ EXPORT_SYMBOL(scsi_verify_blk_ioctl);
+ 
+diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
+index c850de4..eff7222 100644
+--- a/drivers/acpi/processor_core.c
++++ b/drivers/acpi/processor_core.c
+@@ -189,10 +189,12 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
+ 		 *     Processor (CPU3, 0x03, 0x00000410, 0x06) {}
+ 		 * }
+ 		 *
+-		 * Ignores apic_id and always return 0 for CPU0's handle.
++		 * Ignores apic_id and always returns 0 for the processor
++		 * handle with acpi id 0 if nr_cpu_ids is 1.
++		 * This should be the case if SMP tables are not found.
+ 		 * Return -1 for other CPU's handle.
+ 		 */
+-		if (acpi_id == 0)
++		if (nr_cpu_ids <= 1 && acpi_id == 0)
+ 			return acpi_id;
+ 		else
+ 			return apic_id;
+diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
+index ca191ff..ed6bc52 100644
+--- a/drivers/acpi/sleep.c
++++ b/drivers/acpi/sleep.c
+@@ -702,8 +702,8 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p)
+ 	 * can wake the system.  _S0W may be valid, too.
+ 	 */
+ 	if (acpi_target_sleep_state == ACPI_STATE_S0 ||
+-	    (device_may_wakeup(dev) &&
+-	     adev->wakeup.sleep_state <= acpi_target_sleep_state)) {
++	    (device_may_wakeup(dev) && adev->wakeup.flags.valid &&
++	     adev->wakeup.sleep_state >= acpi_target_sleep_state)) {
+ 		acpi_status status;
+ 
+ 		acpi_method[3] = 'W';
+diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
+index 9f66181..240a244 100644
+--- a/drivers/acpi/sysfs.c
++++ b/drivers/acpi/sysfs.c
+@@ -173,7 +173,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp)
+ {
+ 	int result = 0;
+ 
+-	if (!strncmp(val, "enable", strlen("enable") - 1)) {
++	if (!strncmp(val, "enable", strlen("enable"))) {
+ 		result = acpi_debug_trace(trace_method_name, trace_debug_level,
+ 					  trace_debug_layer, 0);
+ 		if (result)
+@@ -181,7 +181,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp)
+ 		goto exit;
+ 	}
+ 
+-	if (!strncmp(val, "disable", strlen("disable") - 1)) {
++	if (!strncmp(val, "disable", strlen("disable"))) {
+ 		int name = 0;
+ 		result = acpi_debug_trace((char *)&name, trace_debug_level,
+ 					  trace_debug_layer, 0);
+diff --git a/drivers/gpio/gpio-wm8994.c b/drivers/gpio/gpio-wm8994.c
+index 96198f3..a2da8f2 100644
+--- a/drivers/gpio/gpio-wm8994.c
++++ b/drivers/gpio/gpio-wm8994.c
+@@ -89,8 +89,11 @@ static int wm8994_gpio_direction_out(struct gpio_chip *chip,
+ 	struct wm8994_gpio *wm8994_gpio = to_wm8994_gpio(chip);
+ 	struct wm8994 *wm8994 = wm8994_gpio->wm8994;
+ 
++	if (value)
++		value = WM8994_GPN_LVL;
++
+ 	return wm8994_set_bits(wm8994, WM8994_GPIO_1 + offset,
+-			       WM8994_GPN_DIR, 0);
++			       WM8994_GPN_DIR | WM8994_GPN_LVL, value);
+ }
+ 
+ static void wm8994_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
+index 6aa7716..cc75c4b 100644
+--- a/drivers/gpu/drm/i915/intel_display.c
++++ b/drivers/gpu/drm/i915/intel_display.c
+@@ -8043,8 +8043,8 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
+ 	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
+ 
+ 	if (intel_enable_rc6(dev_priv->dev))
+-		rc6_mask = GEN6_RC_CTL_RC6p_ENABLE |
+-			GEN6_RC_CTL_RC6_ENABLE;
++		rc6_mask = GEN6_RC_CTL_RC6_ENABLE |
++			((IS_GEN7(dev_priv->dev)) ? GEN6_RC_CTL_RC6p_ENABLE : 0);
+ 
+ 	I915_WRITE(GEN6_RC_CONTROL,
+ 		   rc6_mask |
+diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
+index 299d238..899c712 100644
+--- a/drivers/hid/hid-apple.c
++++ b/drivers/hid/hid-apple.c
+@@ -514,6 +514,12 @@ static const struct hid_device_id apple_devices[] = {
+ 		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS),
+ 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI),
++		.driver_data = APPLE_HAS_FN },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ISO),
++		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_JIS),
++		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
+ 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI),
+ 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
+ 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO),
+diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
+index c27b402..95430a0 100644
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -1374,6 +1374,9 @@ static const struct hid_device_id hid_have_special_driver[] = {
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS) },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI) },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ISO) },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_JIS) },
+ 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI) },
+ 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO) },
+ 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS) },
+@@ -1884,6 +1887,7 @@ static const struct hid_device_id hid_ignore_list[] = {
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MCT) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_HYBRID) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_HEATCONTROL) },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_BEATPAD) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_MCC, USB_DEVICE_ID_MCC_PMD1024LS) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_MCC, USB_DEVICE_ID_MCC_PMD1208LS) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICKIT1) },
+@@ -1968,6 +1972,9 @@ static const struct hid_device_id hid_mouse_ignore_list[] = {
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS) },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI) },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ISO) },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_JIS) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
+ 	{ }
+diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
+index fba3fc4..7db934d 100644
+--- a/drivers/hid/hid-ids.h
++++ b/drivers/hid/hid-ids.h
+@@ -125,6 +125,9 @@
+ #define USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI	0x024c
+ #define USB_DEVICE_ID_APPLE_WELLSPRING6_ISO	0x024d
+ #define USB_DEVICE_ID_APPLE_WELLSPRING6_JIS	0x024e
++#define USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI	0x0262
++#define USB_DEVICE_ID_APPLE_WELLSPRING7_ISO	0x0263
++#define USB_DEVICE_ID_APPLE_WELLSPRING7_JIS	0x0264
+ #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI  0x0239
+ #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO   0x023a
+ #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS   0x023b
+@@ -491,6 +494,9 @@
+ #define USB_DEVICE_ID_CRYSTALTOUCH	0x0006
+ #define USB_DEVICE_ID_CRYSTALTOUCH_DUAL	0x0007
+ 
++#define USB_VENDOR_ID_MADCATZ		0x0738
++#define USB_DEVICE_ID_MADCATZ_BEATPAD	0x4540
++
+ #define USB_VENDOR_ID_MCC		0x09db
+ #define USB_DEVICE_ID_MCC_PMD1024LS	0x0076
+ #define USB_DEVICE_ID_MCC_PMD1208LS	0x007a
+diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
+index d912649..1ba7af2 100644
+--- a/drivers/hwmon/it87.c
++++ b/drivers/hwmon/it87.c
+@@ -2086,7 +2086,7 @@ static void __devinit it87_init_device(struct platform_device *pdev)
+ 
+ 	/* Start monitoring */
+ 	it87_write_value(data, IT87_REG_CONFIG,
+-			 (it87_read_value(data, IT87_REG_CONFIG) & 0x36)
++			 (it87_read_value(data, IT87_REG_CONFIG) & 0x3e)
+ 			 | (update_vbat ? 0x41 : 0x01));
+ }
+ 
+diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c
+index 61c9cf1..1201a15 100644
+--- a/drivers/hwspinlock/hwspinlock_core.c
++++ b/drivers/hwspinlock/hwspinlock_core.c
+@@ -345,7 +345,7 @@ int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev,
+ 		spin_lock_init(&hwlock->lock);
+ 		hwlock->bank = bank;
+ 
+-		ret = hwspin_lock_register_single(hwlock, i);
++		ret = hwspin_lock_register_single(hwlock, base_id + i);
+ 		if (ret)
+ 			goto reg_failed;
+ 	}
+@@ -354,7 +354,7 @@ int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev,
+ 
+ reg_failed:
+ 	while (--i >= 0)
+-		hwspin_lock_unregister_single(i);
++		hwspin_lock_unregister_single(base_id + i);
+ 	return ret;
+ }
+ EXPORT_SYMBOL_GPL(hwspin_lock_register);
+diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
+index d728875..2189cbf 100644
+--- a/drivers/input/joystick/xpad.c
++++ b/drivers/input/joystick/xpad.c
+@@ -142,6 +142,7 @@ static const struct xpad_device {
+ 	{ 0x0c12, 0x880a, "Pelican Eclipse PL-2023", 0, XTYPE_XBOX },
+ 	{ 0x0c12, 0x8810, "Zeroplus Xbox Controller", 0, XTYPE_XBOX },
+ 	{ 0x0c12, 0x9902, "HAMA VibraX - *FAULTY HARDWARE*", 0, XTYPE_XBOX },
++	{ 0x0d2f, 0x0002, "Andamiro Pump It Up pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
+ 	{ 0x0e4c, 0x1097, "Radica Gamester Controller", 0, XTYPE_XBOX },
+ 	{ 0x0e4c, 0x2390, "Radica Games Jtech Controller", 0, XTYPE_XBOX },
+ 	{ 0x0e6f, 0x0003, "Logic3 Freebird wireless Controller", 0, XTYPE_XBOX },
+@@ -164,6 +165,7 @@ static const struct xpad_device {
+ 	{ 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
+ 	{ 0x0f0d, 0x0016, "Hori Real Arcade Pro.EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
+ 	{ 0x0f0d, 0x000d, "Hori Fighting Stick EX2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
++	{ 0x1689, 0xfd00, "Razer Onza Tournament Edition", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
+ 	{ 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX },
+ 	{ 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN }
+ };
+@@ -238,12 +240,14 @@ static struct usb_device_id xpad_table [] = {
+ 	XPAD_XBOX360_VENDOR(0x045e),		/* Microsoft X-Box 360 controllers */
+ 	XPAD_XBOX360_VENDOR(0x046d),		/* Logitech X-Box 360 style controllers */
+ 	XPAD_XBOX360_VENDOR(0x0738),		/* Mad Catz X-Box 360 controllers */
++	{ USB_DEVICE(0x0738, 0x4540) },		/* Mad Catz Beat Pad */
+ 	XPAD_XBOX360_VENDOR(0x0e6f),		/* 0x0e6f X-Box 360 controllers */
+ 	XPAD_XBOX360_VENDOR(0x12ab),		/* X-Box 360 dance pads */
+ 	XPAD_XBOX360_VENDOR(0x1430),		/* RedOctane X-Box 360 controllers */
+ 	XPAD_XBOX360_VENDOR(0x146b),		/* BigBen Interactive Controllers */
+ 	XPAD_XBOX360_VENDOR(0x1bad),		/* Harminix Rock Band Guitar and Drums */
+-	XPAD_XBOX360_VENDOR(0x0f0d),            /* Hori Controllers */
++	XPAD_XBOX360_VENDOR(0x0f0d),		/* Hori Controllers */
++	XPAD_XBOX360_VENDOR(0x1689),		/* Razer Onza */
+ 	{ }
+ };
+ 
+diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c
+index 5ec617e..ec58f48 100644
+--- a/drivers/input/mouse/bcm5974.c
++++ b/drivers/input/mouse/bcm5974.c
+@@ -79,6 +79,10 @@
+ #define USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI	0x0252
+ #define USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO	0x0253
+ #define USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS	0x0254
++/* MacbookPro10,1 (unibody, June 2012) */
++#define USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI	0x0262
++#define USB_DEVICE_ID_APPLE_WELLSPRING7_ISO	0x0263
++#define USB_DEVICE_ID_APPLE_WELLSPRING7_JIS	0x0264
+ 
+ #define BCM5974_DEVICE(prod) {					\
+ 	.match_flags = (USB_DEVICE_ID_MATCH_DEVICE |		\
+@@ -128,6 +132,10 @@ static const struct usb_device_id bcm5974_table[] = {
+ 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI),
+ 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO),
+ 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS),
++	/* MacbookPro10,1 */
++	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI),
++	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7_ISO),
++	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7_JIS),
+ 	/* Terminating entry */
+ 	{}
+ };
+@@ -354,6 +362,18 @@ static const struct bcm5974_config bcm5974_config_table[] = {
+ 		{ DIM_X, DIM_X / SN_COORD, -4620, 5140 },
+ 		{ DIM_Y, DIM_Y / SN_COORD, -150, 6600 }
+ 	},
++	{
++		USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI,
++		USB_DEVICE_ID_APPLE_WELLSPRING7_ISO,
++		USB_DEVICE_ID_APPLE_WELLSPRING7_JIS,
++		HAS_INTEGRATED_BUTTON,
++		0x84, sizeof(struct bt_data),
++		0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS,
++		{ DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 },
++		{ DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 },
++		{ DIM_X, DIM_X / SN_COORD, -4750, 5280 },
++		{ DIM_Y, DIM_Y / SN_COORD, -150, 6730 }
++	},
+ 	{}
+ };
+ 
+diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
+index f1d5408..a1b8caa 100644
+--- a/drivers/iommu/amd_iommu.c
++++ b/drivers/iommu/amd_iommu.c
+@@ -59,6 +59,8 @@ static struct protection_domain *pt_domain;
+ 
+ static struct iommu_ops amd_iommu_ops;
+ 
++static struct dma_map_ops amd_iommu_dma_ops;
++
+ /*
+  * general struct to manage commands send to an IOMMU
+  */
+@@ -1878,6 +1880,11 @@ static int device_change_notifier(struct notifier_block *nb,
+ 		list_add_tail(&dma_domain->list, &iommu_pd_list);
+ 		spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
+ 
++		if (!iommu_pass_through)
++			dev->archdata.dma_ops = &amd_iommu_dma_ops;
++		else
++			dev->archdata.dma_ops = &nommu_dma_ops;
++
+ 		break;
+ 	case BUS_NOTIFY_DEL_DEVICE:
+ 
+diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
+index 6269eb0..ef2d493 100644
+--- a/drivers/iommu/amd_iommu_init.c
++++ b/drivers/iommu/amd_iommu_init.c
+@@ -1468,6 +1468,8 @@ static int __init amd_iommu_init(void)
+ 
+ 	register_syscore_ops(&amd_iommu_syscore_ops);
+ 
++	x86_platform.iommu_shutdown = disable_iommus;
++
+ 	if (iommu_pass_through)
+ 		goto out;
+ 
+@@ -1476,7 +1478,6 @@ static int __init amd_iommu_init(void)
+ 	else
+ 		printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
+ 
+-	x86_platform.iommu_shutdown = disable_iommus;
+ out:
+ 	return ret;
+ 
+diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
+index 9bfd057..dae2b7a 100644
+--- a/drivers/md/dm-raid1.c
++++ b/drivers/md/dm-raid1.c
+@@ -1080,6 +1080,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+ 	ti->split_io = dm_rh_get_region_size(ms->rh);
+ 	ti->num_flush_requests = 1;
+ 	ti->num_discard_requests = 1;
++	ti->discard_zeroes_data_unsupported = 1;
+ 
+ 	ms->kmirrord_wq = alloc_workqueue("kmirrord",
+ 					  WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
+@@ -1210,7 +1211,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
+ 	 * We need to dec pending if this was a write.
+ 	 */
+ 	if (rw == WRITE) {
+-		if (!(bio->bi_rw & REQ_FLUSH))
++		if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)))
+ 			dm_rh_dec(ms->rh, map_context->ll);
+ 		return error;
+ 	}
+diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
+index 7771ed2..69732e0 100644
+--- a/drivers/md/dm-region-hash.c
++++ b/drivers/md/dm-region-hash.c
+@@ -404,6 +404,9 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
+ 		return;
+ 	}
+ 
++	if (bio->bi_rw & REQ_DISCARD)
++		return;
++
+ 	/* We must inform the log that the sync count has changed. */
+ 	log->type->set_region_sync(log, region, 0);
+ 
+@@ -524,7 +527,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
+ 	struct bio *bio;
+ 
+ 	for (bio = bios->head; bio; bio = bio->bi_next) {
+-		if (bio->bi_rw & REQ_FLUSH)
++		if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))
+ 			continue;
+ 		rh_inc(rh, dm_rh_bio_to_region(rh, bio));
+ 	}
+diff --git a/drivers/md/md.c b/drivers/md/md.c
+index 700ecae..d8646d7 100644
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -3700,8 +3700,8 @@ array_state_show(struct mddev *mddev, char *page)
+ 	return sprintf(page, "%s\n", array_states[st]);
+ }
+ 
+-static int do_md_stop(struct mddev * mddev, int ro, int is_open);
+-static int md_set_readonly(struct mddev * mddev, int is_open);
++static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev);
++static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);
+ static int do_md_run(struct mddev * mddev);
+ static int restart_array(struct mddev *mddev);
+ 
+@@ -3717,14 +3717,14 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
+ 		/* stopping an active array */
+ 		if (atomic_read(&mddev->openers) > 0)
+ 			return -EBUSY;
+-		err = do_md_stop(mddev, 0, 0);
++		err = do_md_stop(mddev, 0, NULL);
+ 		break;
+ 	case inactive:
+ 		/* stopping an active array */
+ 		if (mddev->pers) {
+ 			if (atomic_read(&mddev->openers) > 0)
+ 				return -EBUSY;
+-			err = do_md_stop(mddev, 2, 0);
++			err = do_md_stop(mddev, 2, NULL);
+ 		} else
+ 			err = 0; /* already inactive */
+ 		break;
+@@ -3732,7 +3732,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
+ 		break; /* not supported yet */
+ 	case readonly:
+ 		if (mddev->pers)
+-			err = md_set_readonly(mddev, 0);
++			err = md_set_readonly(mddev, NULL);
+ 		else {
+ 			mddev->ro = 1;
+ 			set_disk_ro(mddev->gendisk, 1);
+@@ -3742,7 +3742,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
+ 	case read_auto:
+ 		if (mddev->pers) {
+ 			if (mddev->ro == 0)
+-				err = md_set_readonly(mddev, 0);
++				err = md_set_readonly(mddev, NULL);
+ 			else if (mddev->ro == 1)
+ 				err = restart_array(mddev);
+ 			if (err == 0) {
+@@ -5078,15 +5078,17 @@ void md_stop(struct mddev *mddev)
+ }
+ EXPORT_SYMBOL_GPL(md_stop);
+ 
+-static int md_set_readonly(struct mddev *mddev, int is_open)
++static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
+ {
+ 	int err = 0;
+ 	mutex_lock(&mddev->open_mutex);
+-	if (atomic_read(&mddev->openers) > is_open) {
++	if (atomic_read(&mddev->openers) > !!bdev) {
+ 		printk("md: %s still in use.\n",mdname(mddev));
+ 		err = -EBUSY;
+ 		goto out;
+ 	}
++	if (bdev)
++		sync_blockdev(bdev);
+ 	if (mddev->pers) {
+ 		__md_stop_writes(mddev);
+ 
+@@ -5108,18 +5110,26 @@ out:
+  *   0 - completely stop and dis-assemble array
+  *   2 - stop but do not disassemble array
+  */
+-static int do_md_stop(struct mddev * mddev, int mode, int is_open)
++static int do_md_stop(struct mddev * mddev, int mode,
++		      struct block_device *bdev)
+ {
+ 	struct gendisk *disk = mddev->gendisk;
+ 	struct md_rdev *rdev;
+ 
+ 	mutex_lock(&mddev->open_mutex);
+-	if (atomic_read(&mddev->openers) > is_open ||
++	if (atomic_read(&mddev->openers) > !!bdev ||
+ 	    mddev->sysfs_active) {
+ 		printk("md: %s still in use.\n",mdname(mddev));
+ 		mutex_unlock(&mddev->open_mutex);
+ 		return -EBUSY;
+ 	}
++	if (bdev)
++		/* It is possible IO was issued on some other
++		 * open file which was closed before we took ->open_mutex.
++		 * As that was not the last close __blkdev_put will not
++		 * have called sync_blockdev, so we must.
++		 */
++		sync_blockdev(bdev);
+ 
+ 	if (mddev->pers) {
+ 		if (mddev->ro)
+@@ -5193,7 +5203,7 @@ static void autorun_array(struct mddev *mddev)
+ 	err = do_md_run(mddev);
+ 	if (err) {
+ 		printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
+-		do_md_stop(mddev, 0, 0);
++		do_md_stop(mddev, 0, NULL);
+ 	}
+ }
+ 
+@@ -6184,11 +6194,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
+ 			goto done_unlock;
+ 
+ 		case STOP_ARRAY:
+-			err = do_md_stop(mddev, 0, 1);
++			err = do_md_stop(mddev, 0, bdev);
+ 			goto done_unlock;
+ 
+ 		case STOP_ARRAY_RO:
+-			err = md_set_readonly(mddev, 1);
++			err = md_set_readonly(mddev, bdev);
+ 			goto done_unlock;
+ 
+ 		case BLKROSET:
+diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
+index 7af60ec..2d97bf0 100644
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -1713,8 +1713,14 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
+ 
+ 	if (atomic_dec_and_test(&r1_bio->remaining)) {
+ 		/* if we're here, all write(s) have completed, so clean up */
+-		md_done_sync(mddev, r1_bio->sectors, 1);
+-		put_buf(r1_bio);
++		int s = r1_bio->sectors;
++		if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
++		    test_bit(R1BIO_WriteError, &r1_bio->state))
++			reschedule_retry(r1_bio);
++		else {
++			put_buf(r1_bio);
++			md_done_sync(mddev, s, 1);
++		}
+ 	}
+ }
+ 
+@@ -2378,9 +2384,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
+ 	 */
+ 	if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
+ 		atomic_set(&r1_bio->remaining, read_targets);
+-		for (i=0; i<conf->raid_disks; i++) {
++		for (i = 0; i < conf->raid_disks && read_targets; i++) {
+ 			bio = r1_bio->bios[i];
+ 			if (bio->bi_end_io == end_sync_read) {
++				read_targets--;
+ 				md_sync_acct(bio->bi_bdev, nr_sectors);
+ 				generic_make_request(bio);
+ 			}
+diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
+index 6ba4954..26ef63a 100644
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -196,12 +196,14 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
+ 		BUG_ON(!list_empty(&sh->lru));
+ 		BUG_ON(atomic_read(&conf->active_stripes)==0);
+ 		if (test_bit(STRIPE_HANDLE, &sh->state)) {
+-			if (test_bit(STRIPE_DELAYED, &sh->state))
++			if (test_bit(STRIPE_DELAYED, &sh->state) &&
++			    !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+ 				list_add_tail(&sh->lru, &conf->delayed_list);
+ 			else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
+ 				   sh->bm_seq - conf->seq_write > 0)
+ 				list_add_tail(&sh->lru, &conf->bitmap_list);
+ 			else {
++				clear_bit(STRIPE_DELAYED, &sh->state);
+ 				clear_bit(STRIPE_BIT_DELAY, &sh->state);
+ 				list_add_tail(&sh->lru, &conf->handle_list);
+ 			}
+diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c
+index f732877..d5cda35 100644
+--- a/drivers/media/dvb/dvb-core/dvbdev.c
++++ b/drivers/media/dvb/dvb-core/dvbdev.c
+@@ -243,6 +243,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
+ 	if (minor == MAX_DVB_MINORS) {
+ 		kfree(dvbdevfops);
+ 		kfree(dvbdev);
++		up_write(&minor_rwsem);
+ 		mutex_unlock(&dvbdev_register_lock);
+ 		return -EINVAL;
+ 	}
+diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
+index 34c03be..83e8e1b 100644
+--- a/drivers/mtd/nand/nandsim.c
++++ b/drivers/mtd/nand/nandsim.c
+@@ -28,7 +28,7 @@
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+ #include <linux/vmalloc.h>
+-#include <asm/div64.h>
++#include <linux/math64.h>
+ #include <linux/slab.h>
+ #include <linux/errno.h>
+ #include <linux/string.h>
+@@ -547,12 +547,6 @@ static char *get_partition_name(int i)
+ 	return kstrdup(buf, GFP_KERNEL);
+ }
+ 
+-static uint64_t divide(uint64_t n, uint32_t d)
+-{
+-	do_div(n, d);
+-	return n;
+-}
+-
+ /*
+  * Initialize the nandsim structure.
+  *
+@@ -581,7 +575,7 @@ static int init_nandsim(struct mtd_info *mtd)
+ 	ns->geom.oobsz    = mtd->oobsize;
+ 	ns->geom.secsz    = mtd->erasesize;
+ 	ns->geom.pgszoob  = ns->geom.pgsz + ns->geom.oobsz;
+-	ns->geom.pgnum    = divide(ns->geom.totsz, ns->geom.pgsz);
++	ns->geom.pgnum    = div_u64(ns->geom.totsz, ns->geom.pgsz);
+ 	ns->geom.totszoob = ns->geom.totsz + (uint64_t)ns->geom.pgnum * ns->geom.oobsz;
+ 	ns->geom.secshift = ffs(ns->geom.secsz) - 1;
+ 	ns->geom.pgshift  = chip->page_shift;
+@@ -924,7 +918,7 @@ static int setup_wear_reporting(struct mtd_info *mtd)
+ 
+ 	if (!rptwear)
+ 		return 0;
+-	wear_eb_count = divide(mtd->size, mtd->erasesize);
++	wear_eb_count = div_u64(mtd->size, mtd->erasesize);
+ 	mem = wear_eb_count * sizeof(unsigned long);
+ 	if (mem / sizeof(unsigned long) != wear_eb_count) {
+ 		NS_ERR("Too many erase blocks for wear reporting\n");
+diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c
+index 3680aa2..2cf084e 100644
+--- a/drivers/net/bonding/bond_debugfs.c
++++ b/drivers/net/bonding/bond_debugfs.c
+@@ -6,7 +6,7 @@
+ #include "bonding.h"
+ #include "bond_alb.h"
+ 
+-#ifdef CONFIG_DEBUG_FS
++#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_NET_NS)
+ 
+ #include <linux/debugfs.h>
+ #include <linux/seq_file.h>
+diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
+index 1a88e38..6c284d1 100644
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -3184,6 +3184,12 @@ static int bond_master_netdev_event(unsigned long event,
+ 	switch (event) {
+ 	case NETDEV_CHANGENAME:
+ 		return bond_event_changename(event_bond);
++	case NETDEV_UNREGISTER:
++		bond_remove_proc_entry(event_bond);
++		break;
++	case NETDEV_REGISTER:
++		bond_create_proc_entry(event_bond);
++		break;
+ 	default:
+ 		break;
+ 	}
+@@ -4391,8 +4397,6 @@ static void bond_uninit(struct net_device *bond_dev)
+ 
+ 	bond_work_cancel_all(bond);
+ 
+-	bond_remove_proc_entry(bond);
+-
+ 	bond_debug_unregister(bond);
+ 
+ 	__hw_addr_flush(&bond->mc_list);
+@@ -4794,7 +4798,6 @@ static int bond_init(struct net_device *bond_dev)
+ 
+ 	bond_set_lockdep_class(bond_dev);
+ 
+-	bond_create_proc_entry(bond);
+ 	list_add_tail(&bond->bond_list, &bn->dev_list);
+ 
+ 	bond_prepare_sysfs_group(bond);
+diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+index eccdcff..5ae7df7 100644
+--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
++++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+@@ -267,7 +267,6 @@ static void atl1c_check_link_status(struct atl1c_adapter *adapter)
+ 				dev_warn(&pdev->dev, "stop mac failed\n");
+ 		atl1c_set_aspm(hw, false);
+ 		netif_carrier_off(netdev);
+-		netif_stop_queue(netdev);
+ 		atl1c_phy_reset(hw);
+ 		atl1c_phy_init(&adapter->hw);
+ 	} else {
+diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+index aec7212..8dda46a 100644
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+@@ -723,21 +723,6 @@ struct bnx2x_fastpath {
+ 
+ #define ETH_RX_ERROR_FALGS		ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG
+ 
+-#define BNX2X_IP_CSUM_ERR(cqe) \
+-			(!((cqe)->fast_path_cqe.status_flags & \
+-			   ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG) && \
+-			 ((cqe)->fast_path_cqe.type_error_flags & \
+-			  ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG))
+-
+-#define BNX2X_L4_CSUM_ERR(cqe) \
+-			(!((cqe)->fast_path_cqe.status_flags & \
+-			   ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG) && \
+-			 ((cqe)->fast_path_cqe.type_error_flags & \
+-			  ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG))
+-
+-#define BNX2X_RX_CSUM_OK(cqe) \
+-			(!(BNX2X_L4_CSUM_ERR(cqe) || BNX2X_IP_CSUM_ERR(cqe)))
+-
+ #define BNX2X_PRS_FLAG_OVERETH_IPV4(flags) \
+ 				(((le16_to_cpu(flags) & \
+ 				   PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) >> \
+diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+index 580b44e..2c1a5c0 100644
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+@@ -220,7 +220,7 @@ int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
+ 
+ 		if ((netif_tx_queue_stopped(txq)) &&
+ 		    (bp->state == BNX2X_STATE_OPEN) &&
+-		    (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 3))
++		    (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 4))
+ 			netif_tx_wake_queue(txq);
+ 
+ 		__netif_tx_unlock(txq);
+@@ -551,6 +551,26 @@ static inline void bnx2x_set_skb_rxhash(struct bnx2x *bp, union eth_rx_cqe *cqe,
+ 		le32_to_cpu(cqe->fast_path_cqe.rss_hash_result);
+ }
+ 
++static void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
++				struct bnx2x_fastpath *fp)
++{
++	/* Do nothing if no IP/L4 csum validation was done */
++
++	if (cqe->fast_path_cqe.status_flags &
++	    (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG |
++	     ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG))
++		return;
++
++	/* If both IP/L4 validation were done, check if an error was found. */
++
++	if (cqe->fast_path_cqe.type_error_flags &
++	    (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |
++	     ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG))
++		fp->eth_q_stats.hw_csum_err++;
++	else
++		skb->ip_summed = CHECKSUM_UNNECESSARY;
++}
++
+ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
+ {
+ 	struct bnx2x *bp = fp->bp;
+@@ -746,13 +766,9 @@ reuse_rx:
+ 
+ 			skb_checksum_none_assert(skb);
+ 
+-			if (bp->dev->features & NETIF_F_RXCSUM) {
++			if (bp->dev->features & NETIF_F_RXCSUM)
++				bnx2x_csum_validate(skb, cqe, fp);
+ 
+-				if (likely(BNX2X_RX_CSUM_OK(cqe)))
+-					skb->ip_summed = CHECKSUM_UNNECESSARY;
+-				else
+-					fp->eth_q_stats.hw_csum_err++;
+-			}
+ 		}
+ 
+ 		skb_record_rx_queue(skb, fp->index);
+@@ -2238,8 +2254,6 @@ int bnx2x_poll(struct napi_struct *napi, int budget)
+ /* we split the first BD into headers and data BDs
+  * to ease the pain of our fellow microcode engineers
+  * we use one mapping for both BDs
+- * So far this has only been observed to happen
+- * in Other Operating Systems(TM)
+  */
+ static noinline u16 bnx2x_tx_split(struct bnx2x *bp,
+ 				   struct bnx2x_fp_txdata *txdata,
+@@ -2890,7 +2904,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 
+ 	txdata->tx_bd_prod += nbd;
+ 
+-	if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_SKB_FRAGS + 3)) {
++	if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_SKB_FRAGS + 4)) {
+ 		netif_tx_stop_queue(txq);
+ 
+ 		/* paired memory barrier is in bnx2x_tx_int(), we have to keep
+@@ -2899,7 +2913,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 		smp_mb();
+ 
+ 		fp->eth_q_stats.driver_xoff++;
+-		if (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 3)
++		if (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 4)
+ 			netif_tx_wake_queue(txq);
+ 	}
+ 	txdata->tx_pkt++;
+diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
+index 2dcac28..6b258d9 100644
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -14046,7 +14046,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
+ 		}
+ 	}
+ 
+-	if (tg3_flag(tp, 5755_PLUS))
++	if (tg3_flag(tp, 5755_PLUS) ||
++	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906)
+ 		tg3_flag_set(tp, SHORT_DMA_BUG);
+ 
+ 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719)
+diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
+index e556fc3..3072d35 100644
+--- a/drivers/net/ethernet/intel/e1000e/82571.c
++++ b/drivers/net/ethernet/intel/e1000e/82571.c
+@@ -1571,6 +1571,9 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw)
+ 	ctrl = er32(CTRL);
+ 	status = er32(STATUS);
+ 	rxcw = er32(RXCW);
++	/* SYNCH bit and IV bit are sticky */
++	udelay(10);
++	rxcw = er32(RXCW);
+ 
+ 	if ((rxcw & E1000_RXCW_SYNCH) && !(rxcw & E1000_RXCW_IV)) {
+ 
+diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
+index cc2565c..9e61d6b 100644
+--- a/drivers/net/ethernet/realtek/r8169.c
++++ b/drivers/net/ethernet/realtek/r8169.c
+@@ -4185,6 +4185,7 @@ out:
+ 	return rc;
+ 
+ err_out_msi_4:
++	netif_napi_del(&tp->napi);
+ 	rtl_disable_msi(pdev, tp);
+ 	iounmap(ioaddr);
+ err_out_free_res_3:
+@@ -4210,6 +4211,8 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
+ 
+ 	cancel_delayed_work_sync(&tp->task);
+ 
++	netif_napi_del(&tp->napi);
++
+ 	unregister_netdev(dev);
+ 
+ 	rtl_release_firmware(tp);
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+index 72cd190..d4d2bc1 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -1174,6 +1174,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
+ 		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion);
+ 		wmb();
+ 		priv->hw->desc->set_tx_owner(desc);
++		wmb();
+ 	}
+ 
+ 	/* Interrupt on completition only for the latest segment */
+@@ -1189,6 +1190,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
+ 
+ 	/* To avoid raise condition */
+ 	priv->hw->desc->set_tx_owner(first);
++	wmb();
+ 
+ 	priv->cur_tx++;
+ 
+@@ -1252,6 +1254,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)
+ 		}
+ 		wmb();
+ 		priv->hw->desc->set_rx_owner(p + entry);
++		wmb();
+ 	}
+ }
+ 
+diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
+index 1b7082d..26106c0 100644
+--- a/drivers/net/macvtap.c
++++ b/drivers/net/macvtap.c
+@@ -504,10 +504,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
+ 		if (copy > size) {
+ 			++from;
+ 			--count;
+-		}
++			offset = 0;
++		} else
++			offset += size;
+ 		copy -= size;
+ 		offset1 += size;
+-		offset = 0;
+ 	}
+ 
+ 	if (len == offset1)
+@@ -517,24 +518,29 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
+ 		struct page *page[MAX_SKB_FRAGS];
+ 		int num_pages;
+ 		unsigned long base;
++		unsigned long truesize;
+ 
+-		len = from->iov_len - offset1;
++		len = from->iov_len - offset;
+ 		if (!len) {
+-			offset1 = 0;
++			offset = 0;
+ 			++from;
+ 			continue;
+ 		}
+-		base = (unsigned long)from->iov_base + offset1;
++		base = (unsigned long)from->iov_base + offset;
+ 		size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
++		if (i + size > MAX_SKB_FRAGS)
++			return -EMSGSIZE;
+ 		num_pages = get_user_pages_fast(base, size, 0, &page[i]);
+-		if ((num_pages != size) ||
+-		    (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags))
+-			/* put_page is in skb free */
++		if (num_pages != size) {
++			for (i = 0; i < num_pages; i++)
++				put_page(page[i]);
+ 			return -EFAULT;
++		}
++		truesize = size * PAGE_SIZE;
+ 		skb->data_len += len;
+ 		skb->len += len;
+-		skb->truesize += len;
+-		atomic_add(len, &skb->sk->sk_wmem_alloc);
++		skb->truesize += truesize;
++		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
+ 		while (len) {
+ 			int off = base & ~PAGE_MASK;
+ 			int size = min_t(int, len, PAGE_SIZE - off);
+@@ -545,7 +551,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
+ 			len -= size;
+ 			i++;
+ 		}
+-		offset1 = 0;
++		offset = 0;
+ 		++from;
+ 	}
+ 	return 0;
+@@ -645,7 +651,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
+ 	int err;
+ 	struct virtio_net_hdr vnet_hdr = { 0 };
+ 	int vnet_hdr_len = 0;
+-	int copylen;
++	int copylen = 0;
+ 	bool zerocopy = false;
+ 
+ 	if (q->flags & IFF_VNET_HDR) {
+@@ -674,15 +680,31 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
+ 	if (unlikely(len < ETH_HLEN))
+ 		goto err;
+ 
++	err = -EMSGSIZE;
++	if (unlikely(count > UIO_MAXIOV))
++		goto err;
++
+ 	if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY))
+ 		zerocopy = true;
+ 
+ 	if (zerocopy) {
++		/* Userspace may produce vectors with count greater than
++		 * MAX_SKB_FRAGS, so we need to linearize parts of the skb
++		 * to let the rest of data to be fit in the frags.
++		 */
++		if (count > MAX_SKB_FRAGS) {
++			copylen = iov_length(iv, count - MAX_SKB_FRAGS);
++			if (copylen < vnet_hdr_len)
++				copylen = 0;
++			else
++				copylen -= vnet_hdr_len;
++		}
+ 		/* There are 256 bytes to be copied in skb, so there is enough
+ 		 * room for skb expand head in case it is used.
+ 		 * The rest buffer is mapped from userspace.
+ 		 */
+-		copylen = vnet_hdr.hdr_len;
++		if (copylen < vnet_hdr.hdr_len)
++			copylen = vnet_hdr.hdr_len;
+ 		if (!copylen)
+ 			copylen = GOODCOPY_LEN;
+ 	} else
+@@ -693,10 +715,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
+ 	if (!skb)
+ 		goto err;
+ 
+-	if (zerocopy) {
++	if (zerocopy)
+ 		err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count);
+-		skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+-	} else
++	else
+ 		err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len,
+ 						   len);
+ 	if (err)
+@@ -715,8 +736,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
+ 	rcu_read_lock_bh();
+ 	vlan = rcu_dereference_bh(q->vlan);
+ 	/* copy skb_ubuf_info for callback when skb has no error */
+-	if (zerocopy)
++	if (zerocopy) {
+ 		skb_shinfo(skb)->destructor_arg = m->msg_control;
++		skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
++	}
+ 	if (vlan)
+ 		macvlan_start_xmit(skb, vlan->dev);
+ 	else
+diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
+index ad96164..00ed9c1 100644
+--- a/drivers/net/usb/ipheth.c
++++ b/drivers/net/usb/ipheth.c
+@@ -59,6 +59,7 @@
+ #define USB_PRODUCT_IPHONE_3G   0x1292
+ #define USB_PRODUCT_IPHONE_3GS  0x1294
+ #define USB_PRODUCT_IPHONE_4	0x1297
++#define USB_PRODUCT_IPAD 0x129a
+ #define USB_PRODUCT_IPHONE_4_VZW 0x129c
+ #define USB_PRODUCT_IPHONE_4S	0x12a0
+ 
+@@ -101,6 +102,10 @@ static struct usb_device_id ipheth_table[] = {
+ 		IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
+ 		IPHETH_USBINTF_PROTO) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(
++		USB_VENDOR_APPLE, USB_PRODUCT_IPAD,
++		IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
++		IPHETH_USBINTF_PROTO) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(
+ 		USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4_VZW,
+ 		IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
+ 		IPHETH_USBINTF_PROTO) },
+diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c
+index 833cbef..8a40ff9 100644
+--- a/drivers/net/wireless/brcm80211/brcmsmac/main.c
++++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c
+@@ -900,8 +900,7 @@ brcms_c_dotxstatus(struct brcms_c_info *wlc, struct tx_status *txs)
+ 	 */
+ 	if (!(txs->status & TX_STATUS_AMPDU)
+ 	    && (txs->status & TX_STATUS_INTERMEDIATE)) {
+-		wiphy_err(wlc->wiphy, "%s: INTERMEDIATE but not AMPDU\n",
+-			  __func__);
++		BCMMSG(wlc->wiphy, "INTERMEDIATE but not AMPDU\n");
+ 		return false;
+ 	}
+ 
+diff --git a/drivers/net/wireless/ipw2x00/ipw.h b/drivers/net/wireless/ipw2x00/ipw.h
+new file mode 100644
+index 0000000..4007bf5
+--- /dev/null
++++ b/drivers/net/wireless/ipw2x00/ipw.h
+@@ -0,0 +1,23 @@
++/*
++ * Intel Pro/Wireless 2100, 2200BG, 2915ABG network connection driver
++ *
++ * Copyright 2012 Stanislav Yakovlev <stas.yakovlev@gmail.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#ifndef __IPW_H__
++#define __IPW_H__
++
++#include <linux/ieee80211.h>
++
++static const u32 ipw_cipher_suites[] = {
++	WLAN_CIPHER_SUITE_WEP40,
++	WLAN_CIPHER_SUITE_WEP104,
++	WLAN_CIPHER_SUITE_TKIP,
++	WLAN_CIPHER_SUITE_CCMP,
++};
++
++#endif
+diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
+index 127e9c6..10862d4 100644
+--- a/drivers/net/wireless/ipw2x00/ipw2100.c
++++ b/drivers/net/wireless/ipw2x00/ipw2100.c
+@@ -166,6 +166,7 @@ that only one external action is invoked at a time.
+ #include <net/lib80211.h>
+ 
+ #include "ipw2100.h"
++#include "ipw.h"
+ 
+ #define IPW2100_VERSION "git-1.2.2"
+ 
+@@ -1955,6 +1956,9 @@ static int ipw2100_wdev_init(struct net_device *dev)
+ 		wdev->wiphy->bands[IEEE80211_BAND_2GHZ] = bg_band;
+ 	}
+ 
++	wdev->wiphy->cipher_suites = ipw_cipher_suites;
++	wdev->wiphy->n_cipher_suites = ARRAY_SIZE(ipw_cipher_suites);
++
+ 	set_wiphy_dev(wdev->wiphy, &priv->pci_dev->dev);
+ 	if (wiphy_register(wdev->wiphy)) {
+ 		ipw2100_down(priv);
+diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
+index 827889b..56bd370 100644
+--- a/drivers/net/wireless/ipw2x00/ipw2200.c
++++ b/drivers/net/wireless/ipw2x00/ipw2200.c
+@@ -34,6 +34,7 @@
+ #include <linux/slab.h>
+ #include <net/cfg80211-wext.h>
+ #include "ipw2200.h"
++#include "ipw.h"
+ 
+ 
+ #ifndef KBUILD_EXTMOD
+@@ -11535,6 +11536,9 @@ static int ipw_wdev_init(struct net_device *dev)
+ 		wdev->wiphy->bands[IEEE80211_BAND_5GHZ] = a_band;
+ 	}
+ 
++	wdev->wiphy->cipher_suites = ipw_cipher_suites;
++	wdev->wiphy->n_cipher_suites = ARRAY_SIZE(ipw_cipher_suites);
++
+ 	set_wiphy_dev(wdev->wiphy, &priv->pci_dev->dev);
+ 
+ 	/* With that information in place, we can now register the wiphy... */
+diff --git a/drivers/net/wireless/iwlegacy/iwl-4965-sta.c b/drivers/net/wireless/iwlegacy/iwl-4965-sta.c
+index a262c23..0116ca8 100644
+--- a/drivers/net/wireless/iwlegacy/iwl-4965-sta.c
++++ b/drivers/net/wireless/iwlegacy/iwl-4965-sta.c
+@@ -466,7 +466,7 @@ int iwl4965_remove_dynamic_key(struct iwl_priv *priv,
+ 		return 0;
+ 	}
+ 
+-	if (priv->stations[sta_id].sta.key.key_offset == WEP_INVALID_OFFSET) {
++	if (priv->stations[sta_id].sta.key.key_flags & STA_KEY_FLG_INVALID) {
+ 		IWL_WARN(priv, "Removing wrong key %d 0x%x\n",
+ 			    keyconf->keyidx, key_flags);
+ 		spin_unlock_irqrestore(&priv->sta_lock, flags);
+@@ -483,7 +483,7 @@ int iwl4965_remove_dynamic_key(struct iwl_priv *priv,
+ 					sizeof(struct iwl4965_keyinfo));
+ 	priv->stations[sta_id].sta.key.key_flags =
+ 			STA_KEY_FLG_NO_ENC | STA_KEY_FLG_INVALID;
+-	priv->stations[sta_id].sta.key.key_offset = WEP_INVALID_OFFSET;
++	priv->stations[sta_id].sta.key.key_offset = keyconf->hw_key_idx;
+ 	priv->stations[sta_id].sta.sta.modify_mask = STA_MODIFY_KEY_MASK;
+ 	priv->stations[sta_id].sta.mode = STA_CONTROL_MODIFY_MSK;
+ 
+diff --git a/drivers/net/wireless/iwlegacy/iwl-core.c b/drivers/net/wireless/iwlegacy/iwl-core.c
+index 2bd5659..1bb64c9 100644
+--- a/drivers/net/wireless/iwlegacy/iwl-core.c
++++ b/drivers/net/wireless/iwlegacy/iwl-core.c
+@@ -1884,14 +1884,12 @@ void iwl_legacy_bg_watchdog(unsigned long data)
+ 		return;
+ 
+ 	/* monitor and check for other stuck queues */
+-	if (iwl_legacy_is_any_associated(priv)) {
+-		for (cnt = 0; cnt < priv->hw_params.max_txq_num; cnt++) {
+-			/* skip as we already checked the command queue */
+-			if (cnt == priv->cmd_queue)
+-				continue;
+-			if (iwl_legacy_check_stuck_queue(priv, cnt))
+-				return;
+-		}
++	for (cnt = 0; cnt < priv->hw_params.max_txq_num; cnt++) {
++		/* skip as we already checked the command queue */
++		if (cnt == priv->cmd_queue)
++			continue;
++		if (iwl_legacy_check_stuck_queue(priv, cnt))
++			return;
+ 	}
+ 
+ 	mod_timer(&priv->watchdog, jiffies +
+diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
+index 1e31050..ba28807 100644
+--- a/drivers/net/wireless/rt2x00/rt2x00usb.c
++++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
+@@ -426,8 +426,8 @@ void rt2x00usb_kick_queue(struct data_queue *queue)
+ 	case QID_RX:
+ 		if (!rt2x00queue_full(queue))
+ 			rt2x00queue_for_each_entry(queue,
+-						   Q_INDEX_DONE,
+ 						   Q_INDEX,
++						   Q_INDEX_DONE,
+ 						   NULL,
+ 						   rt2x00usb_kick_rx_entry);
+ 		break;
+diff --git a/drivers/net/wireless/rtl818x/rtl8187/leds.c b/drivers/net/wireless/rtl818x/rtl8187/leds.c
+index 2e0de2f..c2d5b49 100644
+--- a/drivers/net/wireless/rtl818x/rtl8187/leds.c
++++ b/drivers/net/wireless/rtl818x/rtl8187/leds.c
+@@ -117,7 +117,7 @@ static void rtl8187_led_brightness_set(struct led_classdev *led_dev,
+ 			radio_on = true;
+ 		} else if (radio_on) {
+ 			radio_on = false;
+-			cancel_delayed_work_sync(&priv->led_on);
++			cancel_delayed_work(&priv->led_on);
+ 			ieee80211_queue_delayed_work(hw, &priv->led_off, 0);
+ 		}
+ 	} else if (radio_on) {
+diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
+index 12d1e81..d024f83 100644
+--- a/drivers/pci/pci-driver.c
++++ b/drivers/pci/pci-driver.c
+@@ -742,6 +742,18 @@ static int pci_pm_suspend_noirq(struct device *dev)
+ 
+ 	pci_pm_set_unknown_state(pci_dev);
+ 
++	/*
++	 * Some BIOSes from ASUS have a bug: If a USB EHCI host controller's
++	 * PCI COMMAND register isn't 0, the BIOS assumes that the controller
++	 * hasn't been quiesced and tries to turn it off.  If the controller
++	 * is already in D3, this can hang or cause memory corruption.
++	 *
++	 * Since the value of the COMMAND register doesn't matter once the
++	 * device has been suspended, we can safely set it to 0 here.
++	 */
++	if (pci_dev->class == PCI_CLASS_SERIAL_USB_EHCI)
++		pci_write_config_word(pci_dev, PCI_COMMAND, 0);
++
+ 	return 0;
+ }
+ 
+diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
+index e5b75eb..6d4a531 100644
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -1689,11 +1689,6 @@ int pci_prepare_to_sleep(struct pci_dev *dev)
+ 	if (target_state == PCI_POWER_ERROR)
+ 		return -EIO;
+ 
+-	/* Some devices mustn't be in D3 during system sleep */
+-	if (target_state == PCI_D3hot &&
+-			(dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP))
+-		return 0;
+-
+ 	pci_enable_wake(dev, target_state, device_may_wakeup(&dev->dev));
+ 
+ 	error = pci_set_power_state(dev, target_state);
+diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
+index 3c56fec..78fda9c 100644
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -2940,32 +2940,6 @@ static void __devinit disable_igfx_irq(struct pci_dev *dev)
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq);
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq);
+ 
+-/*
+- * The Intel 6 Series/C200 Series chipset's EHCI controllers on many
+- * ASUS motherboards will cause memory corruption or a system crash
+- * if they are in D3 while the system is put into S3 sleep.
+- */
+-static void __devinit asus_ehci_no_d3(struct pci_dev *dev)
+-{
+-	const char *sys_info;
+-	static const char good_Asus_board[] = "P8Z68-V";
+-
+-	if (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP)
+-		return;
+-	if (dev->subsystem_vendor != PCI_VENDOR_ID_ASUSTEK)
+-		return;
+-	sys_info = dmi_get_system_info(DMI_BOARD_NAME);
+-	if (sys_info && memcmp(sys_info, good_Asus_board,
+-			sizeof(good_Asus_board) - 1) == 0)
+-		return;
+-
+-	dev_info(&dev->dev, "broken D3 during system sleep on ASUS\n");
+-	dev->dev_flags |= PCI_DEV_FLAGS_NO_D3_DURING_SLEEP;
+-	device_set_wakeup_capable(&dev->dev, false);
+-}
+-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c26, asus_ehci_no_d3);
+-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c2d, asus_ehci_no_d3);
+-
+ static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
+ 			  struct pci_fixup *end)
+ {
+diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
+index 809a3ae..b46ec11 100644
+--- a/drivers/platform/x86/intel_ips.c
++++ b/drivers/platform/x86/intel_ips.c
+@@ -72,6 +72,7 @@
+ #include <linux/string.h>
+ #include <linux/tick.h>
+ #include <linux/timer.h>
++#include <linux/dmi.h>
+ #include <drm/i915_drm.h>
+ #include <asm/msr.h>
+ #include <asm/processor.h>
+@@ -1505,6 +1506,24 @@ static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = {
+ 
+ MODULE_DEVICE_TABLE(pci, ips_id_table);
+ 
++static int ips_blacklist_callback(const struct dmi_system_id *id)
++{
++	pr_info("Blacklisted intel_ips for %s\n", id->ident);
++	return 1;
++}
++
++static const struct dmi_system_id ips_blacklist[] = {
++	{
++		.callback = ips_blacklist_callback,
++		.ident = "HP ProBook",
++		.matches = {
++			DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
++			DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook"),
++		},
++	},
++	{ }	/* terminating entry */
++};
++
+ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ {
+ 	u64 platform_info;
+@@ -1514,6 +1533,9 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
+ 	u16 htshi, trc, trc_required_mask;
+ 	u8 tse;
+ 
++	if (dmi_check_system(ips_blacklist))
++		return -ENODEV;
++
+ 	ips = kzalloc(sizeof(struct ips_driver), GFP_KERNEL);
+ 	if (!ips)
+ 		return -ENOMEM;
+diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
+index 09e26bf..af1e296 100644
+--- a/drivers/platform/x86/samsung-laptop.c
++++ b/drivers/platform/x86/samsung-laptop.c
+@@ -540,245 +540,34 @@ static DEVICE_ATTR(performance_level, S_IWUSR | S_IRUGO,
+ 		   get_performance_level, set_performance_level);
+ 
+ 
+-static int __init dmi_check_cb(const struct dmi_system_id *id)
+-{
+-	pr_info("found laptop model '%s'\n",
+-		id->ident);
+-	return 1;
+-}
+-
+ static struct dmi_system_id __initdata samsung_dmi_table[] = {
+ 	{
+-		.ident = "N128",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR,
+-					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "N128"),
+-			DMI_MATCH(DMI_BOARD_NAME, "N128"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "N130",
+ 		.matches = {
+ 			DMI_MATCH(DMI_SYS_VENDOR,
+ 					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "N130"),
+-			DMI_MATCH(DMI_BOARD_NAME, "N130"),
++			DMI_MATCH(DMI_CHASSIS_TYPE, "8"), /* Portable */
+ 		},
+-		.callback = dmi_check_cb,
+ 	},
+ 	{
+-		.ident = "N510",
+ 		.matches = {
+ 			DMI_MATCH(DMI_SYS_VENDOR,
+ 					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "N510"),
+-			DMI_MATCH(DMI_BOARD_NAME, "N510"),
++			DMI_MATCH(DMI_CHASSIS_TYPE, "9"), /* Laptop */
+ 		},
+-		.callback = dmi_check_cb,
+ 	},
+ 	{
+-		.ident = "X125",
+ 		.matches = {
+ 			DMI_MATCH(DMI_SYS_VENDOR,
+ 					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "X125"),
+-			DMI_MATCH(DMI_BOARD_NAME, "X125"),
++			DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /* Notebook */
+ 		},
+-		.callback = dmi_check_cb,
+ 	},
+ 	{
+-		.ident = "X120/X170",
+ 		.matches = {
+ 			DMI_MATCH(DMI_SYS_VENDOR,
+ 					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "X120/X170"),
+-			DMI_MATCH(DMI_BOARD_NAME, "X120/X170"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "NC10",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR,
+-					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "NC10"),
+-			DMI_MATCH(DMI_BOARD_NAME, "NC10"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-		{
+-		.ident = "NP-Q45",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR,
+-					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "SQ45S70S"),
+-			DMI_MATCH(DMI_BOARD_NAME, "SQ45S70S"),
+-		},
+-		.callback = dmi_check_cb,
+-		},
+-	{
+-		.ident = "X360",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR,
+-					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "X360"),
+-			DMI_MATCH(DMI_BOARD_NAME, "X360"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "R410 Plus",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR,
+-					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "R410P"),
+-			DMI_MATCH(DMI_BOARD_NAME, "R460"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "R518",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR,
+-					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "R518"),
+-			DMI_MATCH(DMI_BOARD_NAME, "R518"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "R519/R719",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR,
+-					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "R519/R719"),
+-			DMI_MATCH(DMI_BOARD_NAME, "R519/R719"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "N150/N210/N220",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR,
+-					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "N150/N210/N220"),
+-			DMI_MATCH(DMI_BOARD_NAME, "N150/N210/N220"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "N220",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR,
+-					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "N220"),
+-			DMI_MATCH(DMI_BOARD_NAME, "N220"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "N150/N210/N220/N230",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR,
+-					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "N150/N210/N220/N230"),
+-			DMI_MATCH(DMI_BOARD_NAME, "N150/N210/N220/N230"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "N150P/N210P/N220P",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR,
+-					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "N150P/N210P/N220P"),
+-			DMI_MATCH(DMI_BOARD_NAME, "N150P/N210P/N220P"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "R700",
+-		.matches = {
+-		      DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
+-		      DMI_MATCH(DMI_PRODUCT_NAME, "SR700"),
+-		      DMI_MATCH(DMI_BOARD_NAME, "SR700"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "R530/R730",
+-		.matches = {
+-		      DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
+-		      DMI_MATCH(DMI_PRODUCT_NAME, "R530/R730"),
+-		      DMI_MATCH(DMI_BOARD_NAME, "R530/R730"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "NF110/NF210/NF310",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "NF110/NF210/NF310"),
+-			DMI_MATCH(DMI_BOARD_NAME, "NF110/NF210/NF310"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "N145P/N250P/N260P",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "N145P/N250P/N260P"),
+-			DMI_MATCH(DMI_BOARD_NAME, "N145P/N250P/N260P"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "R70/R71",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR,
+-					"SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "R70/R71"),
+-			DMI_MATCH(DMI_BOARD_NAME, "R70/R71"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "P460",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "P460"),
+-			DMI_MATCH(DMI_BOARD_NAME, "P460"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "R528/R728",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "R528/R728"),
+-			DMI_MATCH(DMI_BOARD_NAME, "R528/R728"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-	{
+-		.ident = "NC210/NC110",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "NC210/NC110"),
+-			DMI_MATCH(DMI_BOARD_NAME, "NC210/NC110"),
+-		},
+-		.callback = dmi_check_cb,
+-	},
+-		{
+-		.ident = "X520",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "X520"),
+-			DMI_MATCH(DMI_BOARD_NAME, "X520"),
++			DMI_MATCH(DMI_CHASSIS_TYPE, "14"), /* Sub-Notebook */
+ 		},
+-		.callback = dmi_check_cb,
+ 	},
+ 	{ },
+ };
+@@ -819,7 +608,8 @@ static int __init samsung_init(void)
+ 
+ 	f0000_segment = ioremap_nocache(0xf0000, 0xffff);
+ 	if (!f0000_segment) {
+-		pr_err("Can't map the segment at 0xf0000\n");
++		if (debug || force)
++			pr_err("Can't map the segment at 0xf0000\n");
+ 		return -EINVAL;
+ 	}
+ 
+@@ -832,7 +622,8 @@ static int __init samsung_init(void)
+ 	}
+ 
+ 	if (loca == 0xffff) {
+-		pr_err("This computer does not support SABI\n");
++		if (debug || force)
++			pr_err("This computer does not support SABI\n");
+ 		goto error_no_signature;
+ 	}
+ 
+diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
+index 39e41fb..5160354 100644
+--- a/drivers/rtc/rtc-mxc.c
++++ b/drivers/rtc/rtc-mxc.c
+@@ -191,10 +191,11 @@ static irqreturn_t mxc_rtc_interrupt(int irq, void *dev_id)
+ 	struct platform_device *pdev = dev_id;
+ 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+ 	void __iomem *ioaddr = pdata->ioaddr;
++	unsigned long flags;
+ 	u32 status;
+ 	u32 events = 0;
+ 
+-	spin_lock_irq(&pdata->rtc->irq_lock);
++	spin_lock_irqsave(&pdata->rtc->irq_lock, flags);
+ 	status = readw(ioaddr + RTC_RTCISR) & readw(ioaddr + RTC_RTCIENR);
+ 	/* clear interrupt sources */
+ 	writew(status, ioaddr + RTC_RTCISR);
+@@ -217,7 +218,7 @@ static irqreturn_t mxc_rtc_interrupt(int irq, void *dev_id)
+ 		rtc_update_alarm(&pdev->dev, &pdata->g_rtc_alarm);
+ 
+ 	rtc_update_irq(pdata->rtc, 1, events);
+-	spin_unlock_irq(&pdata->rtc->irq_lock);
++	spin_unlock_irqrestore(&pdata->rtc->irq_lock, flags);
+ 
+ 	return IRQ_HANDLED;
+ }
+diff --git a/drivers/scsi/aic94xx/aic94xx_task.c b/drivers/scsi/aic94xx/aic94xx_task.c
+index 532d212..393e7ce 100644
+--- a/drivers/scsi/aic94xx/aic94xx_task.c
++++ b/drivers/scsi/aic94xx/aic94xx_task.c
+@@ -201,7 +201,7 @@ static void asd_get_response_tasklet(struct asd_ascb *ascb,
+ 
+ 		if (SAS_STATUS_BUF_SIZE >= sizeof(*resp)) {
+ 			resp->frame_len = le16_to_cpu(*(__le16 *)(r+6));
+-			memcpy(&resp->ending_fis[0], r+16, 24);
++			memcpy(&resp->ending_fis[0], r+16, ATA_RESP_FIS_SIZE);
+ 			ts->buf_valid_size = sizeof(*resp);
+ 		}
+ 	}
+diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
+index db9238f..4868fc9 100644
+--- a/drivers/scsi/libsas/sas_ata.c
++++ b/drivers/scsi/libsas/sas_ata.c
+@@ -112,12 +112,12 @@ static void sas_ata_task_done(struct sas_task *task)
+ 	if (stat->stat == SAS_PROTO_RESPONSE || stat->stat == SAM_STAT_GOOD ||
+ 	    ((stat->stat == SAM_STAT_CHECK_CONDITION &&
+ 	      dev->sata_dev.command_set == ATAPI_COMMAND_SET))) {
+-		ata_tf_from_fis(resp->ending_fis, &dev->sata_dev.tf);
++		memcpy(dev->sata_dev.fis, resp->ending_fis, ATA_RESP_FIS_SIZE);
+ 
+ 		if (!link->sactive) {
+-			qc->err_mask |= ac_err_mask(dev->sata_dev.tf.command);
++			qc->err_mask |= ac_err_mask(dev->sata_dev.fis[2]);
+ 		} else {
+-			link->eh_info.err_mask |= ac_err_mask(dev->sata_dev.tf.command);
++			link->eh_info.err_mask |= ac_err_mask(dev->sata_dev.fis[2]);
+ 			if (unlikely(link->eh_info.err_mask))
+ 				qc->flags |= ATA_QCFLAG_FAILED;
+ 		}
+@@ -138,8 +138,8 @@ static void sas_ata_task_done(struct sas_task *task)
+ 				qc->flags |= ATA_QCFLAG_FAILED;
+ 			}
+ 
+-			dev->sata_dev.tf.feature = 0x04; /* status err */
+-			dev->sata_dev.tf.command = ATA_ERR;
++			dev->sata_dev.fis[3] = 0x04; /* status err */
++			dev->sata_dev.fis[2] = ATA_ERR;
+ 		}
+ 	}
+ 
+@@ -252,7 +252,7 @@ static bool sas_ata_qc_fill_rtf(struct ata_queued_cmd *qc)
+ {
+ 	struct domain_device *dev = qc->ap->private_data;
+ 
+-	memcpy(&qc->result_tf, &dev->sata_dev.tf, sizeof(qc->result_tf));
++	ata_tf_from_fis(dev->sata_dev.fis, &qc->result_tf);
+ 	return true;
+ }
+ 
+diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c
+index 65ea65a..93b9406 100644
+--- a/drivers/target/target_core_cdb.c
++++ b/drivers/target/target_core_cdb.c
+@@ -1199,7 +1199,7 @@ int target_emulate_write_same(struct se_task *task)
+ 	if (num_blocks != 0)
+ 		range = num_blocks;
+ 	else
+-		range = (dev->transport->get_blocks(dev) - lba);
++		range = (dev->transport->get_blocks(dev) - lba) + 1;
+ 
+ 	pr_debug("WRITE_SAME UNMAP: LBA: %llu Range: %llu\n",
+ 		 (unsigned long long)lba, (unsigned long long)range);
+diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
+index b75bc92..9145141 100644
+--- a/drivers/target/target_core_pr.c
++++ b/drivers/target/target_core_pr.c
+@@ -2042,7 +2042,7 @@ static int __core_scsi3_write_aptpl_to_file(
+ 	if (IS_ERR(file) || !file || !file->f_dentry) {
+ 		pr_err("filp_open(%s) for APTPL metadata"
+ 			" failed\n", path);
+-		return (PTR_ERR(file) < 0 ? PTR_ERR(file) : -ENOENT);
++		return IS_ERR(file) ? PTR_ERR(file) : -ENOENT;
+ 	}
+ 
+ 	iov[0].iov_base = &buf[0];
+@@ -3853,7 +3853,7 @@ int target_scsi3_emulate_pr_out(struct se_task *task)
+ 			" SPC-2 reservation is held, returning"
+ 			" RESERVATION_CONFLICT\n");
+ 		cmd->scsi_sense_reason = TCM_RESERVATION_CONFLICT;
+-		ret = EINVAL;
++		ret = -EINVAL;
+ 		goto out;
+ 	}
+ 
+@@ -3863,7 +3863,8 @@ int target_scsi3_emulate_pr_out(struct se_task *task)
+ 	 */
+ 	if (!cmd->se_sess) {
+ 		cmd->scsi_sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+-		return -EINVAL;
++		ret = -EINVAL;
++		goto out;
+ 	}
+ 
+ 	if (cmd->data_length < 24) {
+diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
+index d95cfe2..278819c 100644
+--- a/drivers/target/tcm_fc/tfc_cmd.c
++++ b/drivers/target/tcm_fc/tfc_cmd.c
+@@ -249,6 +249,8 @@ u32 ft_get_task_tag(struct se_cmd *se_cmd)
+ {
+ 	struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd);
+ 
++	if (cmd->aborted)
++		return ~0;
+ 	return fc_seq_exch(cmd->seq)->rxid;
+ }
+ 
+diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
+index 19fb5fa..9aaed0d 100644
+--- a/drivers/usb/class/cdc-wdm.c
++++ b/drivers/usb/class/cdc-wdm.c
+@@ -473,6 +473,8 @@ retry:
+ 			goto retry;
+ 		}
+ 		if (!desc->reslength) { /* zero length read */
++			dev_dbg(&desc->intf->dev, "%s: zero length - clearing WDM_READ\n", __func__);
++			clear_bit(WDM_READ, &desc->flags);
+ 			spin_unlock_irq(&desc->iuspin);
+ 			goto retry;
+ 		}
+diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
+index 52d27ed..175b6bb 100644
+--- a/drivers/usb/core/hub.c
++++ b/drivers/usb/core/hub.c
+@@ -2039,12 +2039,16 @@ static unsigned hub_is_wusb(struct usb_hub *hub)
+ static int hub_port_reset(struct usb_hub *hub, int port1,
+ 			struct usb_device *udev, unsigned int delay, bool warm);
+ 
+-/* Is a USB 3.0 port in the Inactive state? */
+-static bool hub_port_inactive(struct usb_hub *hub, u16 portstatus)
++/* Is a USB 3.0 port in the Inactive or Complinance Mode state?
++ * Port worm reset is required to recover
++ */
++static bool hub_port_warm_reset_required(struct usb_hub *hub, u16 portstatus)
+ {
+ 	return hub_is_superspeed(hub->hdev) &&
+-		(portstatus & USB_PORT_STAT_LINK_STATE) ==
+-		USB_SS_PORT_LS_SS_INACTIVE;
++		(((portstatus & USB_PORT_STAT_LINK_STATE) ==
++		  USB_SS_PORT_LS_SS_INACTIVE) ||
++		 ((portstatus & USB_PORT_STAT_LINK_STATE) ==
++		  USB_SS_PORT_LS_COMP_MOD)) ;
+ }
+ 
+ static int hub_port_wait_reset(struct usb_hub *hub, int port1,
+@@ -2080,7 +2084,7 @@ static int hub_port_wait_reset(struct usb_hub *hub, int port1,
+ 			 *
+ 			 * See https://bugzilla.kernel.org/show_bug.cgi?id=41752
+ 			 */
+-			if (hub_port_inactive(hub, portstatus)) {
++			if (hub_port_warm_reset_required(hub, portstatus)) {
+ 				int ret;
+ 
+ 				if ((portchange & USB_PORT_STAT_C_CONNECTION))
+@@ -3646,9 +3650,7 @@ static void hub_events(void)
+ 			/* Warm reset a USB3 protocol port if it's in
+ 			 * SS.Inactive state.
+ 			 */
+-			if (hub_is_superspeed(hub->hdev) &&
+-				(portstatus & USB_PORT_STAT_LINK_STATE)
+-					== USB_SS_PORT_LS_SS_INACTIVE) {
++			if (hub_port_warm_reset_required(hub, portstatus)) {
+ 				dev_dbg(hub_dev, "warm reset port %d\n", i);
+ 				hub_port_reset(hub, i, NULL,
+ 						HUB_BH_RESET_TIME, true);
+diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
+index a8b2980..fd8a2c2 100644
+--- a/drivers/usb/host/xhci-hub.c
++++ b/drivers/usb/host/xhci-hub.c
+@@ -438,6 +438,42 @@ void xhci_test_and_clear_bit(struct xhci_hcd *xhci, __le32 __iomem **port_array,
+ 	}
+ }
+ 
++/* Updates Link Status for super Speed port */
++static void xhci_hub_report_link_state(u32 *status, u32 status_reg)
++{
++	u32 pls = status_reg & PORT_PLS_MASK;
++
++	/* resume state is a xHCI internal state.
++	 * Do not report it to usb core.
++	 */
++	if (pls == XDEV_RESUME)
++		return;
++
++	/* When the CAS bit is set then warm reset
++	 * should be performed on port
++	 */
++	if (status_reg & PORT_CAS) {
++		/* The CAS bit can be set while the port is
++		 * in any link state.
++		 * Only roothubs have CAS bit, so we
++		 * pretend to be in compliance mode
++		 * unless we're already in compliance
++		 * or the inactive state.
++		 */
++		if (pls != USB_SS_PORT_LS_COMP_MOD &&
++		    pls != USB_SS_PORT_LS_SS_INACTIVE) {
++			pls = USB_SS_PORT_LS_COMP_MOD;
++		}
++		/* Return also connection bit -
++		 * hub state machine resets port
++		 * when this bit is set.
++		 */
++		pls |= USB_PORT_STAT_CONNECTION;
++	}
++	/* update status field */
++	*status |= pls;
++}
++
+ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
+ 		u16 wIndex, char *buf, u16 wLength)
+ {
+@@ -579,13 +615,9 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
+ 			else
+ 				status |= USB_PORT_STAT_POWER;
+ 		}
+-		/* Port Link State */
++		/* Update Port Link State for super speed ports*/
+ 		if (hcd->speed == HCD_USB3) {
+-			/* resume state is a xHCI internal state.
+-			 * Do not report it to usb core.
+-			 */
+-			if ((temp & PORT_PLS_MASK) != XDEV_RESUME)
+-				status |= (temp & PORT_PLS_MASK);
++			xhci_hub_report_link_state(&status, temp);
+ 		}
+ 		if (bus_state->port_c_suspend & (1 << wIndex))
+ 			status |= 1 << USB_PORT_FEAT_C_SUSPEND;
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
+index 363b141..7a56805 100644
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -341,7 +341,11 @@ struct xhci_op_regs {
+ #define PORT_PLC	(1 << 22)
+ /* port configure error change - port failed to configure its link partner */
+ #define PORT_CEC	(1 << 23)
+-/* bit 24 reserved */
++/* Cold Attach Status - xHC can set this bit to report device attached during
++ * Sx state. Warm port reset should be perfomed to clear this bit and move port
++ * to connected state.
++ */
++#define PORT_CAS	(1 << 24)
+ /* wake on connect (enable) */
+ #define PORT_WKCONN_E	(1 << 25)
+ /* wake on disconnect (enable) */
+diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
+index 21a4734..5971c95 100644
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -496,6 +496,15 @@ static void option_instat_callback(struct urb *urb);
+ 
+ /* MediaTek products */
+ #define MEDIATEK_VENDOR_ID			0x0e8d
++#define MEDIATEK_PRODUCT_DC_1COM		0x00a0
++#define MEDIATEK_PRODUCT_DC_4COM		0x00a5
++#define MEDIATEK_PRODUCT_DC_5COM		0x00a4
++#define MEDIATEK_PRODUCT_7208_1COM		0x7101
++#define MEDIATEK_PRODUCT_7208_2COM		0x7102
++#define MEDIATEK_PRODUCT_FP_1COM		0x0003
++#define MEDIATEK_PRODUCT_FP_2COM		0x0023
++#define MEDIATEK_PRODUCT_FPDC_1COM		0x0043
++#define MEDIATEK_PRODUCT_FPDC_2COM		0x0033
+ 
+ /* Cellient products */
+ #define CELLIENT_VENDOR_ID			0x2692
+@@ -553,6 +562,10 @@ static const struct option_blacklist_info net_intf1_blacklist = {
+ 	.reserved = BIT(1),
+ };
+ 
++static const struct option_blacklist_info net_intf2_blacklist = {
++	.reserved = BIT(2),
++};
++
+ static const struct option_blacklist_info net_intf3_blacklist = {
+ 	.reserved = BIT(3),
+ };
+@@ -1093,6 +1106,8 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1298, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1299, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1300, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1402, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf2_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x2002, 0xff,
+ 	  0xff, 0xff), .driver_info = (kernel_ulong_t)&zte_k3765_z_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x2003, 0xff, 0xff, 0xff) },
+@@ -1234,6 +1249,17 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a1, 0xff, 0x02, 0x01) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a2, 0xff, 0x00, 0x00) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a2, 0xff, 0x02, 0x01) },        /* MediaTek MT6276M modem & app port */
++	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_1COM, 0x0a, 0x00, 0x00) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_5COM, 0xff, 0x02, 0x01) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_5COM, 0xff, 0x00, 0x00) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM, 0xff, 0x02, 0x01) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM, 0xff, 0x00, 0x00) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7208_1COM, 0x02, 0x00, 0x00) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7208_2COM, 0x02, 0x02, 0x01) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FP_1COM, 0x0a, 0x00, 0x00) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FP_2COM, 0x0a, 0x00, 0x00) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FPDC_1COM, 0x0a, 0x00, 0x00) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FPDC_2COM, 0x0a, 0x00, 0x00) },
+ 	{ USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) },
+ 	{ } /* Terminating entry */
+ };
+diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
+index c14c42b..ae66278 100644
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -222,6 +222,8 @@ static int vhost_worker(void *data)
+ 		if (work) {
+ 			__set_current_state(TASK_RUNNING);
+ 			work->fn(work);
++			if (need_resched())
++				schedule();
+ 		} else
+ 			schedule();
+ 
+diff --git a/fs/buffer.c b/fs/buffer.c
+index c807931..4115eca 100644
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -1087,6 +1087,9 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
+ static struct buffer_head *
+ __getblk_slow(struct block_device *bdev, sector_t block, int size)
+ {
++	int ret;
++	struct buffer_head *bh;
++
+ 	/* Size must be multiple of hard sectorsize */
+ 	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
+ 			(size < 512 || size > PAGE_SIZE))) {
+@@ -1099,20 +1102,21 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
+ 		return NULL;
+ 	}
+ 
+-	for (;;) {
+-		struct buffer_head * bh;
+-		int ret;
++retry:
++	bh = __find_get_block(bdev, block, size);
++	if (bh)
++		return bh;
+ 
++	ret = grow_buffers(bdev, block, size);
++	if (ret == 0) {
++		free_more_memory();
++		goto retry;
++	} else if (ret > 0) {
+ 		bh = __find_get_block(bdev, block, size);
+ 		if (bh)
+ 			return bh;
+-
+-		ret = grow_buffers(bdev, block, size);
+-		if (ret < 0)
+-			return NULL;
+-		if (ret == 0)
+-			free_more_memory();
+ 	}
++	return NULL;
+ }
+ 
+ /*
+diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
+index b21670c..56c152d 100644
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -2925,6 +2925,18 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
+ #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)
+ #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536)
+ 
++/*
++ * On hosts with high memory, we can't currently support wsize/rsize that are
++ * larger than we can kmap at once. Cap the rsize/wsize at
++ * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request
++ * larger than that anyway.
++ */
++#ifdef CONFIG_HIGHMEM
++#define CIFS_KMAP_SIZE_LIMIT	(LAST_PKMAP * PAGE_CACHE_SIZE)
++#else /* CONFIG_HIGHMEM */
++#define CIFS_KMAP_SIZE_LIMIT	(1<<24)
++#endif /* CONFIG_HIGHMEM */
++
+ static unsigned int
+ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
+ {
+@@ -2955,6 +2967,9 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
+ 		wsize = min_t(unsigned int, wsize,
+ 				server->maxBuf - sizeof(WRITE_REQ) + 4);
+ 
++	/* limit to the amount that we can kmap at once */
++	wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
++
+ 	/* hard limit of CIFS_MAX_WSIZE */
+ 	wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
+ 
+@@ -2996,6 +3011,9 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
+ 	if (!(server->capabilities & CAP_LARGE_READ_X))
+ 		rsize = min_t(unsigned int, CIFSMaxBufSize, rsize);
+ 
++	/* limit to the amount that we can kmap at once */
++	rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
++
+ 	/* hard limit of CIFS_MAX_RSIZE */
+ 	rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE);
+ 
+diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
+index db4a138..4c37ed4 100644
+--- a/fs/cifs/readdir.c
++++ b/fs/cifs/readdir.c
+@@ -86,9 +86,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
+ 
+ 	dentry = d_lookup(parent, name);
+ 	if (dentry) {
+-		/* FIXME: check for inode number changes? */
+-		if (dentry->d_inode != NULL)
++		inode = dentry->d_inode;
++		/* update inode in place if i_ino didn't change */
++		if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
++			cifs_fattr_to_inode(inode, fattr);
+ 			return dentry;
++		}
+ 		d_drop(dentry);
+ 		dput(dentry);
+ 	}
+diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
+index 69f994a..0dbe58a 100644
+--- a/fs/ecryptfs/kthread.c
++++ b/fs/ecryptfs/kthread.c
+@@ -149,7 +149,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
+ 	(*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
+ 	if (!IS_ERR(*lower_file))
+ 		goto out;
+-	if (flags & O_RDONLY) {
++	if ((flags & O_ACCMODE) == O_RDONLY) {
+ 		rc = PTR_ERR((*lower_file));
+ 		goto out;
+ 	}
+diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
+index 0dc5a3d..de42310 100644
+--- a/fs/ecryptfs/miscdev.c
++++ b/fs/ecryptfs/miscdev.c
+@@ -49,7 +49,10 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
+ 	mutex_lock(&ecryptfs_daemon_hash_mux);
+ 	/* TODO: Just use file->private_data? */
+ 	rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
+-	BUG_ON(rc || !daemon);
++	if (rc || !daemon) {
++		mutex_unlock(&ecryptfs_daemon_hash_mux);
++		return -EINVAL;
++	}
+ 	mutex_lock(&daemon->mux);
+ 	mutex_unlock(&ecryptfs_daemon_hash_mux);
+ 	if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
+@@ -122,6 +125,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
+ 		goto out_unlock_daemon;
+ 	}
+ 	daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
++	file->private_data = daemon;
+ 	atomic_inc(&ecryptfs_num_miscdev_opens);
+ out_unlock_daemon:
+ 	mutex_unlock(&daemon->mux);
+@@ -152,9 +156,9 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file)
+ 
+ 	mutex_lock(&ecryptfs_daemon_hash_mux);
+ 	rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
+-	BUG_ON(rc || !daemon);
++	if (rc || !daemon)
++		daemon = file->private_data;
+ 	mutex_lock(&daemon->mux);
+-	BUG_ON(daemon->pid != task_pid(current));
+ 	BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
+ 	daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
+ 	atomic_dec(&ecryptfs_num_miscdev_opens);
+@@ -191,31 +195,32 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
+ 			  struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
+ 			  u16 msg_flags, struct ecryptfs_daemon *daemon)
+ {
+-	int rc = 0;
++	struct ecryptfs_message *msg;
+ 
+-	mutex_lock(&msg_ctx->mux);
+-	msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
+-			       GFP_KERNEL);
+-	if (!msg_ctx->msg) {
+-		rc = -ENOMEM;
++	msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL);
++	if (!msg) {
+ 		printk(KERN_ERR "%s: Out of memory whilst attempting "
+ 		       "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
+-		       (sizeof(*msg_ctx->msg) + data_size));
+-		goto out_unlock;
++		       (sizeof(*msg) + data_size));
++		return -ENOMEM;
+ 	}
++
++	mutex_lock(&msg_ctx->mux);
++	msg_ctx->msg = msg;
+ 	msg_ctx->msg->index = msg_ctx->index;
+ 	msg_ctx->msg->data_len = data_size;
+ 	msg_ctx->type = msg_type;
+ 	memcpy(msg_ctx->msg->data, data, data_size);
+ 	msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
+-	mutex_lock(&daemon->mux);
+ 	list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
++	mutex_unlock(&msg_ctx->mux);
++
++	mutex_lock(&daemon->mux);
+ 	daemon->num_queued_msg_ctx++;
+ 	wake_up_interruptible(&daemon->wait);
+ 	mutex_unlock(&daemon->mux);
+-out_unlock:
+-	mutex_unlock(&msg_ctx->mux);
+-	return rc;
++
++	return 0;
+ }
+ 
+ /**
+@@ -246,8 +251,16 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
+ 	mutex_lock(&ecryptfs_daemon_hash_mux);
+ 	/* TODO: Just use file->private_data? */
+ 	rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
+-	BUG_ON(rc || !daemon);
++	if (rc || !daemon) {
++		mutex_unlock(&ecryptfs_daemon_hash_mux);
++		return -EINVAL;
++	}
+ 	mutex_lock(&daemon->mux);
++	if (task_pid(current) != daemon->pid) {
++		mutex_unlock(&daemon->mux);
++		mutex_unlock(&ecryptfs_daemon_hash_mux);
++		return -EPERM;
++	}
+ 	if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
+ 		rc = 0;
+ 		mutex_unlock(&ecryptfs_daemon_hash_mux);
+@@ -284,9 +297,6 @@ check_list:
+ 		 * message from the queue; try again */
+ 		goto check_list;
+ 	}
+-	BUG_ON(euid != daemon->euid);
+-	BUG_ON(current_user_ns() != daemon->user_ns);
+-	BUG_ON(task_pid(current) != daemon->pid);
+ 	msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
+ 				   struct ecryptfs_msg_ctx, daemon_out_list);
+ 	BUG_ON(!msg_ctx);
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index 4d9d3a4..a6f3763 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -1629,8 +1629,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
+ 	if (op == EPOLL_CTL_ADD) {
+ 		if (is_file_epoll(tfile)) {
+ 			error = -ELOOP;
+-			if (ep_loop_check(ep, tfile) != 0)
++			if (ep_loop_check(ep, tfile) != 0) {
++				clear_tfile_check_list();
+ 				goto error_tgt_fput;
++			}
+ 		} else
+ 			list_add(&tfile->f_tfile_llink, &tfile_check_list);
+ 	}
+diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
+index 49cf230..24a49d4 100644
+--- a/fs/exofs/ore.c
++++ b/fs/exofs/ore.c
+@@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
+ out:
+ 	ios->numdevs = devs_in_group;
+ 	ios->pages_consumed = cur_pg;
+-	if (unlikely(ret)) {
+-		if (length == ios->length)
+-			return ret;
+-		else
+-			ios->length -= length;
+-	}
+-	return 0;
++	return ret;
+ }
+ 
+ int ore_create(struct ore_io_state *ios)
+diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
+index d222c77..fff2070 100644
+--- a/fs/exofs/ore_raid.c
++++ b/fs/exofs/ore_raid.c
+@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
+  * ios->sp2d[p][*], xor is calculated the same way. These pages are
+  * allocated/freed and don't go through cache
+  */
+-static int _read_4_write(struct ore_io_state *ios)
++static int _read_4_write_first_stripe(struct ore_io_state *ios)
+ {
+-	struct ore_io_state *ios_read;
+ 	struct ore_striping_info read_si;
+ 	struct __stripe_pages_2d *sp2d = ios->sp2d;
+ 	u64 offset = ios->si.first_stripe_start;
+-	u64 last_stripe_end;
+-	unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
+-	unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
+-	int ret;
++	unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
+ 
+ 	if (offset == ios->offset) /* Go to start collect $200 */
+ 		goto read_last_stripe;
+@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
+ 	min_p = _sp2d_min_pg(sp2d);
+ 	max_p = _sp2d_max_pg(sp2d);
+ 
++	ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
++		   offset, ios->offset, min_p, max_p);
++
+ 	for (c = 0; ; c++) {
+ 		ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
+ 		read_si.obj_offset += min_p * PAGE_SIZE;
+@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)
+ 	}
+ 
+ read_last_stripe:
++	return 0;
++}
++
++static int _read_4_write_last_stripe(struct ore_io_state *ios)
++{
++	struct ore_striping_info read_si;
++	struct __stripe_pages_2d *sp2d = ios->sp2d;
++	u64 offset;
++	u64 last_stripe_end;
++	unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
++	unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
++
+ 	offset = ios->offset + ios->length;
+ 	if (offset % PAGE_SIZE)
+ 		_add_to_r4w_last_page(ios, &offset);
+@@ -527,15 +538,15 @@ read_last_stripe:
+ 	c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
+ 		       ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
+ 
+-	BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
+-	/* unaligned IO must be within a single stripe */
+-
+ 	if (min_p == sp2d->pages_in_unit) {
+ 		/* Didn't do it yet */
+ 		min_p = _sp2d_min_pg(sp2d);
+ 		max_p = _sp2d_max_pg(sp2d);
+ 	}
+ 
++	ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
++		   offset, last_stripe_end, min_p, max_p);
++
+ 	while (offset < last_stripe_end) {
+ 		struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
+ 
+@@ -568,6 +579,15 @@ read_last_stripe:
+ 	}
+ 
+ read_it:
++	return 0;
++}
++
++static int _read_4_write_execute(struct ore_io_state *ios)
++{
++	struct ore_io_state *ios_read;
++	unsigned i;
++	int ret;
++
+ 	ios_read = ios->ios_read_4_write;
+ 	if (!ios_read)
+ 		return 0;
+@@ -591,6 +611,8 @@ read_it:
+ 	}
+ 
+ 	_mark_read4write_pages_uptodate(ios_read, ret);
++	ore_put_io_state(ios_read);
++	ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
+ 	return 0;
+ }
+ 
+@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
+ 			/* If first stripe, Read in all read4write pages
+ 			 * (if needed) before we calculate the first parity.
+ 			 */
+-			_read_4_write(ios);
++			_read_4_write_first_stripe(ios);
+ 		}
++		if (!cur_len) /* If last stripe r4w pages of last stripe */
++			_read_4_write_last_stripe(ios);
++		_read_4_write_execute(ios);
+ 
+ 		for (i = 0; i < num_pages; i++) {
+ 			pages[i] = _raid_page_alloc();
+@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
+ 
+ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
+ {
+-	struct ore_layout *layout = ios->layout;
+-
+ 	if (ios->parity_pages) {
++		struct ore_layout *layout = ios->layout;
+ 		unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
+-		unsigned stripe_size = ios->si.bytes_in_stripe;
+-		u64 last_stripe, first_stripe;
+ 
+ 		if (_sp2d_alloc(pages_in_unit, layout->group_width,
+ 				layout->parity, &ios->sp2d)) {
+ 			return -ENOMEM;
+ 		}
+-
+-		/* Round io down to last full strip */
+-		first_stripe = div_u64(ios->offset, stripe_size);
+-		last_stripe = div_u64(ios->offset + ios->length, stripe_size);
+-
+-		/* If an IO spans more then a single stripe it must end at
+-		 * a stripe boundary. The reminder at the end is pushed into the
+-		 * next IO.
+-		 */
+-		if (last_stripe != first_stripe) {
+-			ios->length = last_stripe * stripe_size - ios->offset;
+-
+-			BUG_ON(!ios->length);
+-			ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
+-					PAGE_SIZE;
+-			ios->si.length = ios->length; /*make it consistent */
+-		}
+ 	}
+ 	return 0;
+ }
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index ab7aa3f..a93486e 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -1097,7 +1097,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
+ 	}
+ 	if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
+ 		seq_printf(seq, ",max_batch_time=%u",
+-			   (unsigned) sbi->s_min_batch_time);
++			   (unsigned) sbi->s_max_batch_time);
+ 	}
+ 
+ 	/*
+diff --git a/fs/fifo.c b/fs/fifo.c
+index b1a524d..cf6f434 100644
+--- a/fs/fifo.c
++++ b/fs/fifo.c
+@@ -14,7 +14,7 @@
+ #include <linux/sched.h>
+ #include <linux/pipe_fs_i.h>
+ 
+-static void wait_for_partner(struct inode* inode, unsigned int *cnt)
++static int wait_for_partner(struct inode* inode, unsigned int *cnt)
+ {
+ 	int cur = *cnt;	
+ 
+@@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt)
+ 		if (signal_pending(current))
+ 			break;
+ 	}
++	return cur == *cnt ? -ERESTARTSYS : 0;
+ }
+ 
+ static void wake_up_partner(struct inode* inode)
+@@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
+ 				 * seen a writer */
+ 				filp->f_version = pipe->w_counter;
+ 			} else {
+-				wait_for_partner(inode, &pipe->w_counter);
+-				if(signal_pending(current))
++				if (wait_for_partner(inode, &pipe->w_counter))
+ 					goto err_rd;
+ 			}
+ 		}
+@@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
+ 			wake_up_partner(inode);
+ 
+ 		if (!pipe->readers) {
+-			wait_for_partner(inode, &pipe->r_counter);
+-			if (signal_pending(current))
++			if (wait_for_partner(inode, &pipe->r_counter))
+ 				goto err_wr;
+ 		}
+ 		break;
+diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
+index 2d0ca24..ebc2f4d 100644
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -592,9 +592,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+ 		spin_lock(&sbinfo->stat_lock);
+ 		/* If no limits set, just report 0 for max/free/used
+ 		 * blocks, like simple_statfs() */
+-		if (sbinfo->max_blocks >= 0) {
+-			buf->f_blocks = sbinfo->max_blocks;
+-			buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
++		if (sbinfo->spool) {
++			long free_pages;
++
++			spin_lock(&sbinfo->spool->lock);
++			buf->f_blocks = sbinfo->spool->max_hpages;
++			free_pages = sbinfo->spool->max_hpages
++				- sbinfo->spool->used_hpages;
++			buf->f_bavail = buf->f_bfree = free_pages;
++			spin_unlock(&sbinfo->spool->lock);
+ 			buf->f_files = sbinfo->max_inodes;
+ 			buf->f_ffree = sbinfo->free_inodes;
+ 		}
+@@ -610,6 +616,10 @@ static void hugetlbfs_put_super(struct super_block *sb)
+ 
+ 	if (sbi) {
+ 		sb->s_fs_info = NULL;
++
++		if (sbi->spool)
++			hugepage_put_subpool(sbi->spool);
++
+ 		kfree(sbi);
+ 	}
+ }
+@@ -841,10 +851,14 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
+ 	sb->s_fs_info = sbinfo;
+ 	sbinfo->hstate = config.hstate;
+ 	spin_lock_init(&sbinfo->stat_lock);
+-	sbinfo->max_blocks = config.nr_blocks;
+-	sbinfo->free_blocks = config.nr_blocks;
+ 	sbinfo->max_inodes = config.nr_inodes;
+ 	sbinfo->free_inodes = config.nr_inodes;
++	sbinfo->spool = NULL;
++	if (config.nr_blocks != -1) {
++		sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
++		if (!sbinfo->spool)
++			goto out_free;
++	}
+ 	sb->s_maxbytes = MAX_LFS_FILESIZE;
+ 	sb->s_blocksize = huge_page_size(config.hstate);
+ 	sb->s_blocksize_bits = huge_page_shift(config.hstate);
+@@ -864,38 +878,12 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
+ 	sb->s_root = root;
+ 	return 0;
+ out_free:
++	if (sbinfo->spool)
++		kfree(sbinfo->spool);
+ 	kfree(sbinfo);
+ 	return -ENOMEM;
+ }
+ 
+-int hugetlb_get_quota(struct address_space *mapping, long delta)
+-{
+-	int ret = 0;
+-	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
+-
+-	if (sbinfo->free_blocks > -1) {
+-		spin_lock(&sbinfo->stat_lock);
+-		if (sbinfo->free_blocks - delta >= 0)
+-			sbinfo->free_blocks -= delta;
+-		else
+-			ret = -ENOMEM;
+-		spin_unlock(&sbinfo->stat_lock);
+-	}
+-
+-	return ret;
+-}
+-
+-void hugetlb_put_quota(struct address_space *mapping, long delta)
+-{
+-	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
+-
+-	if (sbinfo->free_blocks > -1) {
+-		spin_lock(&sbinfo->stat_lock);
+-		sbinfo->free_blocks += delta;
+-		spin_unlock(&sbinfo->stat_lock);
+-	}
+-}
+-
+ static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
+ 	int flags, const char *dev_name, void *data)
+ {
+diff --git a/fs/locks.c b/fs/locks.c
+index 0d68f1f..6a64f15 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -1465,7 +1465,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
+ 	case F_WRLCK:
+ 		return generic_add_lease(filp, arg, flp);
+ 	default:
+-		BUG();
++		return -EINVAL;
+ 	}
+ }
+ EXPORT_SYMBOL(generic_setlease);
+diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
+index 47d1c6f..b122af8 100644
+--- a/fs/nfs/idmap.c
++++ b/fs/nfs/idmap.c
+@@ -318,12 +318,12 @@ struct idmap_hashent {
+ 	unsigned long		ih_expires;
+ 	__u32			ih_id;
+ 	size_t			ih_namelen;
+-	char			ih_name[IDMAP_NAMESZ];
++	const char		*ih_name;
+ };
+ 
+ struct idmap_hashtable {
+ 	__u8			h_type;
+-	struct idmap_hashent	h_entries[IDMAP_HASH_SZ];
++	struct idmap_hashent	*h_entries;
+ };
+ 
+ struct idmap {
+@@ -378,6 +378,28 @@ nfs_idmap_new(struct nfs_client *clp)
+ 	return 0;
+ }
+ 
++static void
++idmap_alloc_hashtable(struct idmap_hashtable *h)
++{
++	if (h->h_entries != NULL)
++		return;
++	h->h_entries = kcalloc(IDMAP_HASH_SZ,
++			sizeof(*h->h_entries),
++			GFP_KERNEL);
++}
++
++static void
++idmap_free_hashtable(struct idmap_hashtable *h)
++{
++	int i;
++
++	if (h->h_entries == NULL)
++		return;
++	for (i = 0; i < IDMAP_HASH_SZ; i++)
++		kfree(h->h_entries[i].ih_name);
++	kfree(h->h_entries);
++}
++
+ void
+ nfs_idmap_delete(struct nfs_client *clp)
+ {
+@@ -387,6 +409,8 @@ nfs_idmap_delete(struct nfs_client *clp)
+ 		return;
+ 	rpc_unlink(idmap->idmap_dentry);
+ 	clp->cl_idmap = NULL;
++	idmap_free_hashtable(&idmap->idmap_user_hash);
++	idmap_free_hashtable(&idmap->idmap_group_hash);
+ 	kfree(idmap);
+ }
+ 
+@@ -396,6 +420,8 @@ nfs_idmap_delete(struct nfs_client *clp)
+ static inline struct idmap_hashent *
+ idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len)
+ {
++	if (h->h_entries == NULL)
++		return NULL;
+ 	return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ];
+ }
+ 
+@@ -404,6 +430,8 @@ idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len)
+ {
+ 	struct idmap_hashent *he = idmap_name_hash(h, name, len);
+ 
++	if (he == NULL)
++		return NULL;
+ 	if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0)
+ 		return NULL;
+ 	if (time_after(jiffies, he->ih_expires))
+@@ -414,6 +442,8 @@ idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len)
+ static inline struct idmap_hashent *
+ idmap_id_hash(struct idmap_hashtable* h, __u32 id)
+ {
++	if (h->h_entries == NULL)
++		return NULL;
+ 	return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ];
+ }
+ 
+@@ -421,6 +451,9 @@ static struct idmap_hashent *
+ idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
+ {
+ 	struct idmap_hashent *he = idmap_id_hash(h, id);
++
++	if (he == NULL)
++		return NULL;
+ 	if (he->ih_id != id || he->ih_namelen == 0)
+ 		return NULL;
+ 	if (time_after(jiffies, he->ih_expires))
+@@ -436,12 +469,14 @@ idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
+ static inline struct idmap_hashent *
+ idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len)
+ {
++	idmap_alloc_hashtable(h);
+ 	return idmap_name_hash(h, name, len);
+ }
+ 
+ static inline struct idmap_hashent *
+ idmap_alloc_id(struct idmap_hashtable *h, __u32 id)
+ {
++	idmap_alloc_hashtable(h);
+ 	return idmap_id_hash(h, id);
+ }
+ 
+@@ -449,9 +484,14 @@ static void
+ idmap_update_entry(struct idmap_hashent *he, const char *name,
+ 		size_t namelen, __u32 id)
+ {
++	char *str = kmalloc(namelen + 1, GFP_KERNEL);
++	if (str == NULL)
++		return;
++	kfree(he->ih_name);
+ 	he->ih_id = id;
+-	memcpy(he->ih_name, name, namelen);
+-	he->ih_name[namelen] = '\0';
++	memcpy(str, name, namelen);
++	str[namelen] = '\0';
++	he->ih_name = str;
+ 	he->ih_namelen = namelen;
+ 	he->ih_expires = jiffies + nfs_idmap_cache_timeout;
+ }
+diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
+index 66020ac..07354b7 100644
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -1186,8 +1186,9 @@ restart:
+ 				spin_lock(&state->state_lock);
+ 				list_for_each_entry(lock, &state->lock_states, ls_locks) {
+ 					if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
+-						printk("%s: Lock reclaim failed!\n",
+-							__func__);
++						pr_warn_ratelimited("NFS: "
++							"%s: Lock reclaim "
++							"failed!\n", __func__);
+ 				}
+ 				spin_unlock(&state->state_lock);
+ 				nfs4_put_open_state(state);
+diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
+index 55d0128..a03ee52 100644
+--- a/fs/nfs/objlayout/objio_osd.c
++++ b/fs/nfs/objlayout/objio_osd.c
+@@ -433,7 +433,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata)
+ 	objios->ios->done = _read_done;
+ 	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
+ 		rdata->args.offset, rdata->args.count);
+-	return ore_read(objios->ios);
++	ret = ore_read(objios->ios);
++	if (unlikely(ret))
++		objio_free_result(&objios->oir);
++	return ret;
+ }
+ 
+ /*
+@@ -464,8 +467,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
+ 	struct objio_state *objios = priv;
+ 	struct nfs_write_data *wdata = objios->oir.rpcdata;
+ 	pgoff_t index = offset / PAGE_SIZE;
+-	struct page *page = find_get_page(wdata->inode->i_mapping, index);
++	struct page *page;
++	loff_t i_size = i_size_read(wdata->inode);
++
++	if (offset >= i_size) {
++		*uptodate = true;
++		dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
++		return ZERO_PAGE(0);
++	}
+ 
++	page = find_get_page(wdata->inode->i_mapping, index);
+ 	if (!page) {
+ 		page = find_or_create_page(wdata->inode->i_mapping,
+ 						index, GFP_NOFS);
+@@ -486,8 +497,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
+ 
+ static void __r4w_put_page(void *priv, struct page *page)
+ {
+-	dprintk("%s: index=0x%lx\n", __func__, page->index);
+-	page_cache_release(page);
++	dprintk("%s: index=0x%lx\n", __func__,
++		(page == ZERO_PAGE(0)) ? -1UL : page->index);
++	if (ZERO_PAGE(0) != page)
++		page_cache_release(page);
+ 	return;
+ }
+ 
+@@ -517,8 +530,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
+ 	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
+ 		wdata->args.offset, wdata->args.count);
+ 	ret = ore_write(objios->ios);
+-	if (unlikely(ret))
++	if (unlikely(ret)) {
++		objio_free_result(&objios->oir);
+ 		return ret;
++	}
+ 
+ 	if (objios->sync)
+ 		_write_done(objios->ios, objios);
+diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
+index 07ee5b4..1c7d45e 100644
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -1950,7 +1950,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
+ 	if (ret < 0)
+ 		mlog_errno(ret);
+ 
+-	if (file->f_flags & O_SYNC)
++	if (file && (file->f_flags & O_SYNC))
+ 		handle->h_sync = 1;
+ 
+ 	ocfs2_commit_trans(osb, handle);
+diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
+index fbb0b47..d5378d0 100644
+--- a/fs/ramfs/file-nommu.c
++++ b/fs/ramfs/file-nommu.c
+@@ -110,6 +110,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
+ 
+ 		/* prevent the page from being discarded on memory pressure */
+ 		SetPageDirty(page);
++		SetPageUptodate(page);
+ 
+ 		unlock_page(page);
+ 		put_page(page);
+diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
+index 6094c5a..b73ecd8 100644
+--- a/fs/ubifs/sb.c
++++ b/fs/ubifs/sb.c
+@@ -715,8 +715,12 @@ static int fixup_free_space(struct ubifs_info *c)
+ 		lnum = ubifs_next_log_lnum(c, lnum);
+ 	}
+ 
+-	/* Fixup the current log head */
+-	err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
++	/*
++	 * Fixup the log head which contains the only a CS node at the
++	 * beginning.
++	 */
++	err = fixup_leb(c, c->lhead_lnum,
++			ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));
+ 	if (err)
+ 		goto out;
+ 
+diff --git a/include/linux/Kbuild b/include/linux/Kbuild
+index bd21ecd..a3ce901 100644
+--- a/include/linux/Kbuild
++++ b/include/linux/Kbuild
+@@ -268,6 +268,7 @@ header-y += netfilter_ipv4.h
+ header-y += netfilter_ipv6.h
+ header-y += netlink.h
+ header-y += netrom.h
++header-y += nfc.h
+ header-y += nfs.h
+ header-y += nfs2.h
+ header-y += nfs3.h
+diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
+index fd0dc30..cc07d27 100644
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -165,6 +165,7 @@ enum  hrtimer_base_type {
+  * @lock:		lock protecting the base and associated clock bases
+  *			and timers
+  * @active_bases:	Bitfield to mark bases with active timers
++ * @clock_was_set:	Indicates that clock was set from irq context.
+  * @expires_next:	absolute time of the next event which was scheduled
+  *			via clock_set_next_event()
+  * @hres_active:	State of high resolution mode
+@@ -177,7 +178,8 @@ enum  hrtimer_base_type {
+  */
+ struct hrtimer_cpu_base {
+ 	raw_spinlock_t			lock;
+-	unsigned long			active_bases;
++	unsigned int			active_bases;
++	unsigned int			clock_was_set;
+ #ifdef CONFIG_HIGH_RES_TIMERS
+ 	ktime_t				expires_next;
+ 	int				hres_active;
+@@ -286,6 +288,8 @@ extern void hrtimer_peek_ahead_timers(void);
+ # define MONOTONIC_RES_NSEC	HIGH_RES_NSEC
+ # define KTIME_MONOTONIC_RES	KTIME_HIGH_RES
+ 
++extern void clock_was_set_delayed(void);
++
+ #else
+ 
+ # define MONOTONIC_RES_NSEC	LOW_RES_NSEC
+@@ -306,6 +310,9 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
+ {
+ 	return 0;
+ }
++
++static inline void clock_was_set_delayed(void) { }
++
+ #endif
+ 
+ extern void clock_was_set(void);
+@@ -320,6 +327,7 @@ extern ktime_t ktime_get(void);
+ extern ktime_t ktime_get_real(void);
+ extern ktime_t ktime_get_boottime(void);
+ extern ktime_t ktime_get_monotonic_offset(void);
++extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot);
+ 
+ DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
+ 
+diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
+index d9d6c86..c5ed2f1 100644
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -14,6 +14,15 @@ struct user_struct;
+ #include <linux/shm.h>
+ #include <asm/tlbflush.h>
+ 
++struct hugepage_subpool {
++	spinlock_t lock;
++	long count;
++	long max_hpages, used_hpages;
++};
++
++struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
++void hugepage_put_subpool(struct hugepage_subpool *spool);
++
+ int PageHuge(struct page *page);
+ 
+ void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
+@@ -138,12 +147,11 @@ struct hugetlbfs_config {
+ };
+ 
+ struct hugetlbfs_sb_info {
+-	long	max_blocks;   /* blocks allowed */
+-	long	free_blocks;  /* blocks free */
+ 	long	max_inodes;   /* inodes allowed */
+ 	long	free_inodes;  /* inodes free */
+ 	spinlock_t	stat_lock;
+ 	struct hstate *hstate;
++	struct hugepage_subpool *spool;
+ };
+ 
+ 
+@@ -166,8 +174,6 @@ extern const struct file_operations hugetlbfs_file_operations;
+ extern const struct vm_operations_struct hugetlb_vm_ops;
+ struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
+ 				struct user_struct **user, int creat_flags);
+-int hugetlb_get_quota(struct address_space *mapping, long delta);
+-void hugetlb_put_quota(struct address_space *mapping, long delta);
+ 
+ static inline int is_file_hugepages(struct file *file)
+ {
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index 188cb2f..905b1e1 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -652,7 +652,7 @@ typedef struct pglist_data {
+ 					     range, including holes */
+ 	int node_id;
+ 	wait_queue_head_t kswapd_wait;
+-	struct task_struct *kswapd;
++	struct task_struct *kswapd;	/* Protected by lock_memory_hotplug() */
+ 	int kswapd_max_order;
+ 	enum zone_type classzone_idx;
+ } pg_data_t;
+diff --git a/include/linux/pci.h b/include/linux/pci.h
+index c0cfa0d..7cda65b 100644
+--- a/include/linux/pci.h
++++ b/include/linux/pci.h
+@@ -176,8 +176,6 @@ enum pci_dev_flags {
+ 	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2,
+ 	/* Provide indication device is assigned by a Virtual Machine Manager */
+ 	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4,
+-	/* Device causes system crash if in D3 during S3 sleep */
+-	PCI_DEV_FLAGS_NO_D3_DURING_SLEEP = (__force pci_dev_flags_t) 8,
+ };
+ 
+ enum pci_irq_reroute_variant {
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 1c4f3e9..5afa2a3 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1892,6 +1892,14 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
+ }
+ #endif
+ 
++#ifdef CONFIG_NO_HZ
++void calc_load_enter_idle(void);
++void calc_load_exit_idle(void);
++#else
++static inline void calc_load_enter_idle(void) { }
++static inline void calc_load_exit_idle(void) { }
++#endif /* CONFIG_NO_HZ */
++
+ #ifndef CONFIG_CPUMASK_OFFSTACK
+ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
+ {
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index bdb4590..53dc7e7 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -213,11 +213,8 @@ enum {
+ 	/* device driver is going to provide hardware time stamp */
+ 	SKBTX_IN_PROGRESS = 1 << 2,
+ 
+-	/* ensure the originating sk reference is available on driver level */
+-	SKBTX_DRV_NEEDS_SK_REF = 1 << 3,
+-
+ 	/* device driver supports TX zero-copy buffers */
+-	SKBTX_DEV_ZEROCOPY = 1 << 4,
++	SKBTX_DEV_ZEROCOPY = 1 << 3,
+ };
+ 
+ /*
+diff --git a/include/linux/timex.h b/include/linux/timex.h
+index aa60fe7..08e90fb 100644
+--- a/include/linux/timex.h
++++ b/include/linux/timex.h
+@@ -266,7 +266,7 @@ static inline int ntp_synced(void)
+ /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
+ extern u64 tick_length;
+ 
+-extern void second_overflow(void);
++extern int second_overflow(unsigned long secs);
+ extern void update_ntp_one_tick(void);
+ extern int do_adjtimex(struct timex *);
+ extern void hardpps(const struct timespec *, const struct timespec *);
+diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
+index 6a308d4..1e100c6 100644
+--- a/include/scsi/libsas.h
++++ b/include/scsi/libsas.h
+@@ -159,6 +159,8 @@ enum ata_command_set {
+         ATAPI_COMMAND_SET = 1,
+ };
+ 
++#define ATA_RESP_FIS_SIZE 24
++
+ struct sata_device {
+         enum   ata_command_set command_set;
+         struct smp_resp        rps_resp; /* report_phy_sata_resp */
+@@ -170,7 +172,7 @@ struct sata_device {
+ 
+ 	struct ata_port *ap;
+ 	struct ata_host ata_host;
+-	struct ata_taskfile tf;
++	u8     fis[ATA_RESP_FIS_SIZE];
+ 	u32 sstatus;
+ 	u32 serror;
+ 	u32 scontrol;
+@@ -486,7 +488,7 @@ enum exec_status {
+  */
+ struct ata_task_resp {
+ 	u16  frame_len;
+-	u8   ending_fis[24];	  /* dev to host or data-in */
++	u8   ending_fis[ATA_RESP_FIS_SIZE];	  /* dev to host or data-in */
+ 	u32  sstatus;
+ 	u32  serror;
+ 	u32  scontrol;
+diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
+index ae34bf5..6db7a5e 100644
+--- a/kernel/hrtimer.c
++++ b/kernel/hrtimer.c
+@@ -657,6 +657,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
+ 	return 0;
+ }
+ 
++static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
++{
++	ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
++	ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
++
++	return ktime_get_update_offsets(offs_real, offs_boot);
++}
++
+ /*
+  * Retrigger next event is called after clock was set
+  *
+@@ -665,22 +673,12 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
+ static void retrigger_next_event(void *arg)
+ {
+ 	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
+-	struct timespec realtime_offset, xtim, wtm, sleep;
+ 
+ 	if (!hrtimer_hres_active())
+ 		return;
+ 
+-	/* Optimized out for !HIGH_RES */
+-	get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
+-	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
+-
+-	/* Adjust CLOCK_REALTIME offset */
+ 	raw_spin_lock(&base->lock);
+-	base->clock_base[HRTIMER_BASE_REALTIME].offset =
+-		timespec_to_ktime(realtime_offset);
+-	base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
+-		timespec_to_ktime(sleep);
+-
++	hrtimer_update_base(base);
+ 	hrtimer_force_reprogram(base, 0);
+ 	raw_spin_unlock(&base->lock);
+ }
+@@ -710,13 +708,25 @@ static int hrtimer_switch_to_hres(void)
+ 		base->clock_base[i].resolution = KTIME_HIGH_RES;
+ 
+ 	tick_setup_sched_timer();
+-
+ 	/* "Retrigger" the interrupt to get things going */
+ 	retrigger_next_event(NULL);
+ 	local_irq_restore(flags);
+ 	return 1;
+ }
+ 
++/*
++ * Called from timekeeping code to reprogramm the hrtimer interrupt
++ * device. If called from the timer interrupt context we defer it to
++ * softirq context.
++ */
++void clock_was_set_delayed(void)
++{
++	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
++
++	cpu_base->clock_was_set = 1;
++	__raise_softirq_irqoff(HRTIMER_SOFTIRQ);
++}
++
+ #else
+ 
+ static inline int hrtimer_hres_active(void) { return 0; }
+@@ -1250,11 +1260,10 @@ void hrtimer_interrupt(struct clock_event_device *dev)
+ 	cpu_base->nr_events++;
+ 	dev->next_event.tv64 = KTIME_MAX;
+ 
+-	entry_time = now = ktime_get();
++	raw_spin_lock(&cpu_base->lock);
++	entry_time = now = hrtimer_update_base(cpu_base);
+ retry:
+ 	expires_next.tv64 = KTIME_MAX;
+-
+-	raw_spin_lock(&cpu_base->lock);
+ 	/*
+ 	 * We set expires_next to KTIME_MAX here with cpu_base->lock
+ 	 * held to prevent that a timer is enqueued in our queue via
+@@ -1330,8 +1339,12 @@ retry:
+ 	 * We need to prevent that we loop forever in the hrtimer
+ 	 * interrupt routine. We give it 3 attempts to avoid
+ 	 * overreacting on some spurious event.
++	 *
++	 * Acquire base lock for updating the offsets and retrieving
++	 * the current time.
+ 	 */
+-	now = ktime_get();
++	raw_spin_lock(&cpu_base->lock);
++	now = hrtimer_update_base(cpu_base);
+ 	cpu_base->nr_retries++;
+ 	if (++retries < 3)
+ 		goto retry;
+@@ -1343,6 +1356,7 @@ retry:
+ 	 */
+ 	cpu_base->nr_hangs++;
+ 	cpu_base->hang_detected = 1;
++	raw_spin_unlock(&cpu_base->lock);
+ 	delta = ktime_sub(now, entry_time);
+ 	if (delta.tv64 > cpu_base->max_hang_time.tv64)
+ 		cpu_base->max_hang_time = delta;
+@@ -1395,6 +1409,13 @@ void hrtimer_peek_ahead_timers(void)
+ 
+ static void run_hrtimer_softirq(struct softirq_action *h)
+ {
++	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
++
++	if (cpu_base->clock_was_set) {
++		cpu_base->clock_was_set = 0;
++		clock_was_set();
++	}
++
+ 	hrtimer_peek_ahead_timers();
+ }
+ 
+diff --git a/kernel/power/swap.c b/kernel/power/swap.c
+index b313086..64f8f97 100644
+--- a/kernel/power/swap.c
++++ b/kernel/power/swap.c
+@@ -6,7 +6,7 @@
+  *
+  * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
+  * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+- * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com>
++ * Copyright (C) 2010-2012 Bojan Smojver <bojan@rexursive.com>
+  *
+  * This file is released under the GPLv2.
+  *
+@@ -283,14 +283,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
+ 		return -ENOSPC;
+ 
+ 	if (bio_chain) {
+-		src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
++		src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN |
++		                              __GFP_NORETRY);
+ 		if (src) {
+ 			copy_page(src, buf);
+ 		} else {
+ 			ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
+ 			if (ret)
+ 				return ret;
+-			src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
++			src = (void *)__get_free_page(__GFP_WAIT |
++			                              __GFP_NOWARN |
++			                              __GFP_NORETRY);
+ 			if (src) {
+ 				copy_page(src, buf);
+ 			} else {
+@@ -368,12 +371,17 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
+ 		clear_page(handle->cur);
+ 		handle->cur_swap = offset;
+ 		handle->k = 0;
+-	}
+-	if (bio_chain && low_free_pages() <= handle->reqd_free_pages) {
+-		error = hib_wait_on_bio_chain(bio_chain);
+-		if (error)
+-			goto out;
+-		handle->reqd_free_pages = reqd_free_pages();
++
++		if (bio_chain && low_free_pages() <= handle->reqd_free_pages) {
++			error = hib_wait_on_bio_chain(bio_chain);
++			if (error)
++				goto out;
++			/*
++			 * Recalculate the number of required free pages, to
++			 * make sure we never take more than half.
++			 */
++			handle->reqd_free_pages = reqd_free_pages();
++		}
+ 	}
+  out:
+ 	return error;
+@@ -420,8 +428,9 @@ static int swap_writer_finish(struct swap_map_handle *handle,
+ /* Maximum number of threads for compression/decompression. */
+ #define LZO_THREADS	3
+ 
+-/* Maximum number of pages for read buffering. */
+-#define LZO_READ_PAGES	(MAP_PAGE_ENTRIES * 8)
++/* Minimum/maximum number of pages for read buffering. */
++#define LZO_MIN_RD_PAGES	1024
++#define LZO_MAX_RD_PAGES	8192
+ 
+ 
+ /**
+@@ -632,12 +641,6 @@ static int save_image_lzo(struct swap_map_handle *handle,
+ 	}
+ 
+ 	/*
+-	 * Adjust number of free pages after all allocations have been done.
+-	 * We don't want to run out of pages when writing.
+-	 */
+-	handle->reqd_free_pages = reqd_free_pages();
+-
+-	/*
+ 	 * Start the CRC32 thread.
+ 	 */
+ 	init_waitqueue_head(&crc->go);
+@@ -658,6 +661,12 @@ static int save_image_lzo(struct swap_map_handle *handle,
+ 		goto out_clean;
+ 	}
+ 
++	/*
++	 * Adjust the number of required free pages after all allocations have
++	 * been done. We don't want to run out of pages when writing.
++	 */
++	handle->reqd_free_pages = reqd_free_pages();
++
+ 	printk(KERN_INFO
+ 		"PM: Using %u thread(s) for compression.\n"
+ 		"PM: Compressing and saving image data (%u pages) ...     ",
+@@ -1067,7 +1076,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
+ 	unsigned i, thr, run_threads, nr_threads;
+ 	unsigned ring = 0, pg = 0, ring_size = 0,
+ 	         have = 0, want, need, asked = 0;
+-	unsigned long read_pages;
++	unsigned long read_pages = 0;
+ 	unsigned char **page = NULL;
+ 	struct dec_data *data = NULL;
+ 	struct crc_data *crc = NULL;
+@@ -1079,7 +1088,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
+ 	nr_threads = num_online_cpus() - 1;
+ 	nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
+ 
+-	page = vmalloc(sizeof(*page) * LZO_READ_PAGES);
++	page = vmalloc(sizeof(*page) * LZO_MAX_RD_PAGES);
+ 	if (!page) {
+ 		printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+ 		ret = -ENOMEM;
+@@ -1144,15 +1153,22 @@ static int load_image_lzo(struct swap_map_handle *handle,
+ 	}
+ 
+ 	/*
+-	 * Adjust number of pages for read buffering, in case we are short.
++	 * Set the number of pages for read buffering.
++	 * This is complete guesswork, because we'll only know the real
++	 * picture once prepare_image() is called, which is much later on
++	 * during the image load phase. We'll assume the worst case and
++	 * say that none of the image pages are from high memory.
+ 	 */
+-	read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1;
+-	read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES);
++	if (low_free_pages() > snapshot_get_image_size())
++		read_pages = (low_free_pages() - snapshot_get_image_size()) / 2;
++	read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES);
+ 
+ 	for (i = 0; i < read_pages; i++) {
+ 		page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
+ 		                                  __GFP_WAIT | __GFP_HIGH :
+-		                                  __GFP_WAIT);
++		                                  __GFP_WAIT | __GFP_NOWARN |
++		                                  __GFP_NORETRY);
++
+ 		if (!page[i]) {
+ 			if (i < LZO_CMP_PAGES) {
+ 				ring_size = i;
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 576a27f..52ac69b 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -1885,7 +1885,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+ 
+ #endif
+ 
+-static void calc_load_account_idle(struct rq *this_rq);
+ static void update_sysctl(void);
+ static int get_update_sysctl_factor(void);
+ static void update_cpu_load(struct rq *this_rq);
+@@ -3401,11 +3400,73 @@ unsigned long this_cpu_load(void)
+ }
+ 
+ 
++/*
++ * Global load-average calculations
++ *
++ * We take a distributed and async approach to calculating the global load-avg
++ * in order to minimize overhead.
++ *
++ * The global load average is an exponentially decaying average of nr_running +
++ * nr_uninterruptible.
++ *
++ * Once every LOAD_FREQ:
++ *
++ *   nr_active = 0;
++ *   for_each_possible_cpu(cpu)
++ *   	nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible;
++ *
++ *   avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n)
++ *
++ * Due to a number of reasons the above turns in the mess below:
++ *
++ *  - for_each_possible_cpu() is prohibitively expensive on machines with
++ *    serious number of cpus, therefore we need to take a distributed approach
++ *    to calculating nr_active.
++ *
++ *        \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0
++ *                      = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) }
++ *
++ *    So assuming nr_active := 0 when we start out -- true per definition, we
++ *    can simply take per-cpu deltas and fold those into a global accumulate
++ *    to obtain the same result. See calc_load_fold_active().
++ *
++ *    Furthermore, in order to avoid synchronizing all per-cpu delta folding
++ *    across the machine, we assume 10 ticks is sufficient time for every
++ *    cpu to have completed this task.
++ *
++ *    This places an upper-bound on the IRQ-off latency of the machine. Then
++ *    again, being late doesn't loose the delta, just wrecks the sample.
++ *
++ *  - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because
++ *    this would add another cross-cpu cacheline miss and atomic operation
++ *    to the wakeup path. Instead we increment on whatever cpu the task ran
++ *    when it went into uninterruptible state and decrement on whatever cpu
++ *    did the wakeup. This means that only the sum of nr_uninterruptible over
++ *    all cpus yields the correct result.
++ *
++ *  This covers the NO_HZ=n code, for extra head-aches, see the comment below.
++ */
++
+ /* Variables and functions for calc_load */
+ static atomic_long_t calc_load_tasks;
+ static unsigned long calc_load_update;
+ unsigned long avenrun[3];
+-EXPORT_SYMBOL(avenrun);
++EXPORT_SYMBOL(avenrun); /* should be removed */
++
++/**
++ * get_avenrun - get the load average array
++ * @loads:	pointer to dest load array
++ * @offset:	offset to add
++ * @shift:	shift count to shift the result left
++ *
++ * These values are estimates at best, so no need for locking.
++ */
++void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
++{
++	loads[0] = (avenrun[0] + offset) << shift;
++	loads[1] = (avenrun[1] + offset) << shift;
++	loads[2] = (avenrun[2] + offset) << shift;
++}
+ 
+ static long calc_load_fold_active(struct rq *this_rq)
+ {
+@@ -3422,6 +3483,9 @@ static long calc_load_fold_active(struct rq *this_rq)
+ 	return delta;
+ }
+ 
++/*
++ * a1 = a0 * e + a * (1 - e)
++ */
+ static unsigned long
+ calc_load(unsigned long load, unsigned long exp, unsigned long active)
+ {
+@@ -3433,30 +3497,118 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
+ 
+ #ifdef CONFIG_NO_HZ
+ /*
+- * For NO_HZ we delay the active fold to the next LOAD_FREQ update.
++ * Handle NO_HZ for the global load-average.
++ *
++ * Since the above described distributed algorithm to compute the global
++ * load-average relies on per-cpu sampling from the tick, it is affected by
++ * NO_HZ.
++ *
++ * The basic idea is to fold the nr_active delta into a global idle-delta upon
++ * entering NO_HZ state such that we can include this as an 'extra' cpu delta
++ * when we read the global state.
++ *
++ * Obviously reality has to ruin such a delightfully simple scheme:
++ *
++ *  - When we go NO_HZ idle during the window, we can negate our sample
++ *    contribution, causing under-accounting.
++ *
++ *    We avoid this by keeping two idle-delta counters and flipping them
++ *    when the window starts, thus separating old and new NO_HZ load.
++ *
++ *    The only trick is the slight shift in index flip for read vs write.
++ *
++ *        0s            5s            10s           15s
++ *          +10           +10           +10           +10
++ *        |-|-----------|-|-----------|-|-----------|-|
++ *    r:0 0 1           1 0           0 1           1 0
++ *    w:0 1 1           0 0           1 1           0 0
++ *
++ *    This ensures we'll fold the old idle contribution in this window while
++ *    accumlating the new one.
++ *
++ *  - When we wake up from NO_HZ idle during the window, we push up our
++ *    contribution, since we effectively move our sample point to a known
++ *    busy state.
++ *
++ *    This is solved by pushing the window forward, and thus skipping the
++ *    sample, for this cpu (effectively using the idle-delta for this cpu which
++ *    was in effect at the time the window opened). This also solves the issue
++ *    of having to deal with a cpu having been in NOHZ idle for multiple
++ *    LOAD_FREQ intervals.
+  *
+  * When making the ILB scale, we should try to pull this in as well.
+  */
+-static atomic_long_t calc_load_tasks_idle;
++static atomic_long_t calc_load_idle[2];
++static int calc_load_idx;
+ 
+-static void calc_load_account_idle(struct rq *this_rq)
++static inline int calc_load_write_idx(void)
+ {
++	int idx = calc_load_idx;
++
++	/*
++	 * See calc_global_nohz(), if we observe the new index, we also
++	 * need to observe the new update time.
++	 */
++	smp_rmb();
++
++	/*
++	 * If the folding window started, make sure we start writing in the
++	 * next idle-delta.
++	 */
++	if (!time_before(jiffies, calc_load_update))
++		idx++;
++
++	return idx & 1;
++}
++
++static inline int calc_load_read_idx(void)
++{
++	return calc_load_idx & 1;
++}
++
++void calc_load_enter_idle(void)
++{
++	struct rq *this_rq = this_rq();
+ 	long delta;
+ 
++	/*
++	 * We're going into NOHZ mode, if there's any pending delta, fold it
++	 * into the pending idle delta.
++	 */
+ 	delta = calc_load_fold_active(this_rq);
+-	if (delta)
+-		atomic_long_add(delta, &calc_load_tasks_idle);
++	if (delta) {
++		int idx = calc_load_write_idx();
++		atomic_long_add(delta, &calc_load_idle[idx]);
++	}
+ }
+ 
+-static long calc_load_fold_idle(void)
++void calc_load_exit_idle(void)
+ {
+-	long delta = 0;
++	struct rq *this_rq = this_rq();
++
++	/*
++	 * If we're still before the sample window, we're done.
++	 */
++	if (time_before(jiffies, this_rq->calc_load_update))
++		return;
+ 
+ 	/*
+-	 * Its got a race, we don't care...
++	 * We woke inside or after the sample window, this means we're already
++	 * accounted through the nohz accounting, so skip the entire deal and
++	 * sync up for the next window.
+ 	 */
+-	if (atomic_long_read(&calc_load_tasks_idle))
+-		delta = atomic_long_xchg(&calc_load_tasks_idle, 0);
++	this_rq->calc_load_update = calc_load_update;
++	if (time_before(jiffies, this_rq->calc_load_update + 10))
++		this_rq->calc_load_update += LOAD_FREQ;
++}
++
++static long calc_load_fold_idle(void)
++{
++	int idx = calc_load_read_idx();
++	long delta = 0;
++
++	if (atomic_long_read(&calc_load_idle[idx]))
++		delta = atomic_long_xchg(&calc_load_idle[idx], 0);
+ 
+ 	return delta;
+ }
+@@ -3542,66 +3694,39 @@ static void calc_global_nohz(void)
+ {
+ 	long delta, active, n;
+ 
+-	/*
+-	 * If we crossed a calc_load_update boundary, make sure to fold
+-	 * any pending idle changes, the respective CPUs might have
+-	 * missed the tick driven calc_load_account_active() update
+-	 * due to NO_HZ.
+-	 */
+-	delta = calc_load_fold_idle();
+-	if (delta)
+-		atomic_long_add(delta, &calc_load_tasks);
+-
+-	/*
+-	 * It could be the one fold was all it took, we done!
+-	 */
+-	if (time_before(jiffies, calc_load_update + 10))
+-		return;
+-
+-	/*
+-	 * Catch-up, fold however many we are behind still
+-	 */
+-	delta = jiffies - calc_load_update - 10;
+-	n = 1 + (delta / LOAD_FREQ);
++	if (!time_before(jiffies, calc_load_update + 10)) {
++		/*
++		 * Catch-up, fold however many we are behind still
++		 */
++		delta = jiffies - calc_load_update - 10;
++		n = 1 + (delta / LOAD_FREQ);
+ 
+-	active = atomic_long_read(&calc_load_tasks);
+-	active = active > 0 ? active * FIXED_1 : 0;
++		active = atomic_long_read(&calc_load_tasks);
++		active = active > 0 ? active * FIXED_1 : 0;
+ 
+-	avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
+-	avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
+-	avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
++		avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
++		avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
++		avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
+ 
+-	calc_load_update += n * LOAD_FREQ;
+-}
+-#else
+-static void calc_load_account_idle(struct rq *this_rq)
+-{
+-}
++		calc_load_update += n * LOAD_FREQ;
++	}
+ 
+-static inline long calc_load_fold_idle(void)
+-{
+-	return 0;
++	/*
++	 * Flip the idle index...
++	 *
++	 * Make sure we first write the new time then flip the index, so that
++	 * calc_load_write_idx() will see the new time when it reads the new
++	 * index, this avoids a double flip messing things up.
++	 */
++	smp_wmb();
++	calc_load_idx++;
+ }
++#else /* !CONFIG_NO_HZ */
+ 
+-static void calc_global_nohz(void)
+-{
+-}
+-#endif
++static inline long calc_load_fold_idle(void) { return 0; }
++static inline void calc_global_nohz(void) { }
+ 
+-/**
+- * get_avenrun - get the load average array
+- * @loads:	pointer to dest load array
+- * @offset:	offset to add
+- * @shift:	shift count to shift the result left
+- *
+- * These values are estimates at best, so no need for locking.
+- */
+-void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
+-{
+-	loads[0] = (avenrun[0] + offset) << shift;
+-	loads[1] = (avenrun[1] + offset) << shift;
+-	loads[2] = (avenrun[2] + offset) << shift;
+-}
++#endif /* CONFIG_NO_HZ */
+ 
+ /*
+  * calc_load - update the avenrun load estimates 10 ticks after the
+@@ -3609,11 +3734,18 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
+  */
+ void calc_global_load(unsigned long ticks)
+ {
+-	long active;
++	long active, delta;
+ 
+ 	if (time_before(jiffies, calc_load_update + 10))
+ 		return;
+ 
++	/*
++	 * Fold the 'old' idle-delta to include all NO_HZ cpus.
++	 */
++	delta = calc_load_fold_idle();
++	if (delta)
++		atomic_long_add(delta, &calc_load_tasks);
++
+ 	active = atomic_long_read(&calc_load_tasks);
+ 	active = active > 0 ? active * FIXED_1 : 0;
+ 
+@@ -3624,12 +3756,7 @@ void calc_global_load(unsigned long ticks)
+ 	calc_load_update += LOAD_FREQ;
+ 
+ 	/*
+-	 * Account one period with whatever state we found before
+-	 * folding in the nohz state and ageing the entire idle period.
+-	 *
+-	 * This avoids loosing a sample when we go idle between 
+-	 * calc_load_account_active() (10 ticks ago) and now and thus
+-	 * under-accounting.
++	 * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
+ 	 */
+ 	calc_global_nohz();
+ }
+@@ -3646,7 +3773,6 @@ static void calc_load_account_active(struct rq *this_rq)
+ 		return;
+ 
+ 	delta  = calc_load_fold_active(this_rq);
+-	delta += calc_load_fold_idle();
+ 	if (delta)
+ 		atomic_long_add(delta, &calc_load_tasks);
+ 
+@@ -3654,6 +3780,10 @@ static void calc_load_account_active(struct rq *this_rq)
+ }
+ 
+ /*
++ * End of global load-average stuff
++ */
++
++/*
+  * The exact cpuload at various idx values, calculated at every tick would be
+  * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
+  *
+diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
+index 0a51882..be92bfe 100644
+--- a/kernel/sched_idletask.c
++++ b/kernel/sched_idletask.c
+@@ -23,7 +23,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
+ static struct task_struct *pick_next_task_idle(struct rq *rq)
+ {
+ 	schedstat_inc(rq, sched_goidle);
+-	calc_load_account_idle(rq);
+ 	return rq->idle;
+ }
+ 
+diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
+index 4b85a7a..f1eb182 100644
+--- a/kernel/time/ntp.c
++++ b/kernel/time/ntp.c
+@@ -31,8 +31,6 @@ unsigned long			tick_nsec;
+ u64				tick_length;
+ static u64			tick_length_base;
+ 
+-static struct hrtimer		leap_timer;
+-
+ #define MAX_TICKADJ		500LL		/* usecs */
+ #define MAX_TICKADJ_SCALED \
+ 	(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
+@@ -350,60 +348,60 @@ void ntp_clear(void)
+ }
+ 
+ /*
+- * Leap second processing. If in leap-insert state at the end of the
+- * day, the system clock is set back one second; if in leap-delete
+- * state, the system clock is set ahead one second.
++ * this routine handles the overflow of the microsecond field
++ *
++ * The tricky bits of code to handle the accurate clock support
++ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
++ * They were originally developed for SUN and DEC kernels.
++ * All the kudos should go to Dave for this stuff.
++ *
++ * Also handles leap second processing, and returns leap offset
+  */
+-static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
++int second_overflow(unsigned long secs)
+ {
+-	enum hrtimer_restart res = HRTIMER_NORESTART;
+-
+-	write_seqlock(&xtime_lock);
++	int leap = 0;
++	s64 delta;
+ 
++	/*
++	 * Leap second processing. If in leap-insert state at the end of the
++	 * day, the system clock is set back one second; if in leap-delete
++	 * state, the system clock is set ahead one second.
++	 */
+ 	switch (time_state) {
+ 	case TIME_OK:
++		if (time_status & STA_INS)
++			time_state = TIME_INS;
++		else if (time_status & STA_DEL)
++			time_state = TIME_DEL;
+ 		break;
+ 	case TIME_INS:
+-		timekeeping_leap_insert(-1);
+-		time_state = TIME_OOP;
+-		printk(KERN_NOTICE
+-			"Clock: inserting leap second 23:59:60 UTC\n");
+-		hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
+-		res = HRTIMER_RESTART;
++		if (secs % 86400 == 0) {
++			leap = -1;
++			time_state = TIME_OOP;
++			time_tai++;
++			printk(KERN_NOTICE
++				"Clock: inserting leap second 23:59:60 UTC\n");
++		}
+ 		break;
+ 	case TIME_DEL:
+-		timekeeping_leap_insert(1);
+-		time_tai--;
+-		time_state = TIME_WAIT;
+-		printk(KERN_NOTICE
+-			"Clock: deleting leap second 23:59:59 UTC\n");
++		if ((secs + 1) % 86400 == 0) {
++			leap = 1;
++			time_tai--;
++			time_state = TIME_WAIT;
++			printk(KERN_NOTICE
++				"Clock: deleting leap second 23:59:59 UTC\n");
++		}
+ 		break;
+ 	case TIME_OOP:
+-		time_tai++;
+ 		time_state = TIME_WAIT;
+-		/* fall through */
++		break;
++
+ 	case TIME_WAIT:
+ 		if (!(time_status & (STA_INS | STA_DEL)))
+ 			time_state = TIME_OK;
+ 		break;
+ 	}
+ 
+-	write_sequnlock(&xtime_lock);
+-
+-	return res;
+-}
+-
+-/*
+- * this routine handles the overflow of the microsecond field
+- *
+- * The tricky bits of code to handle the accurate clock support
+- * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+- * They were originally developed for SUN and DEC kernels.
+- * All the kudos should go to Dave for this stuff.
+- */
+-void second_overflow(void)
+-{
+-	s64 delta;
+ 
+ 	/* Bump the maxerror field */
+ 	time_maxerror += MAXFREQ / NSEC_PER_USEC;
+@@ -423,23 +421,25 @@ void second_overflow(void)
+ 	pps_dec_valid();
+ 
+ 	if (!time_adjust)
+-		return;
++		goto out;
+ 
+ 	if (time_adjust > MAX_TICKADJ) {
+ 		time_adjust -= MAX_TICKADJ;
+ 		tick_length += MAX_TICKADJ_SCALED;
+-		return;
++		goto out;
+ 	}
+ 
+ 	if (time_adjust < -MAX_TICKADJ) {
+ 		time_adjust += MAX_TICKADJ;
+ 		tick_length -= MAX_TICKADJ_SCALED;
+-		return;
++		goto out;
+ 	}
+ 
+ 	tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
+ 							 << NTP_SCALE_SHIFT;
+ 	time_adjust = 0;
++out:
++	return leap;
+ }
+ 
+ #ifdef CONFIG_GENERIC_CMOS_UPDATE
+@@ -501,27 +501,6 @@ static void notify_cmos_timer(void)
+ static inline void notify_cmos_timer(void) { }
+ #endif
+ 
+-/*
+- * Start the leap seconds timer:
+- */
+-static inline void ntp_start_leap_timer(struct timespec *ts)
+-{
+-	long now = ts->tv_sec;
+-
+-	if (time_status & STA_INS) {
+-		time_state = TIME_INS;
+-		now += 86400 - now % 86400;
+-		hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
+-
+-		return;
+-	}
+-
+-	if (time_status & STA_DEL) {
+-		time_state = TIME_DEL;
+-		now += 86400 - (now + 1) % 86400;
+-		hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
+-	}
+-}
+ 
+ /*
+  * Propagate a new txc->status value into the NTP state:
+@@ -546,22 +525,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
+ 	time_status &= STA_RONLY;
+ 	time_status |= txc->status & ~STA_RONLY;
+ 
+-	switch (time_state) {
+-	case TIME_OK:
+-		ntp_start_leap_timer(ts);
+-		break;
+-	case TIME_INS:
+-	case TIME_DEL:
+-		time_state = TIME_OK;
+-		ntp_start_leap_timer(ts);
+-	case TIME_WAIT:
+-		if (!(time_status & (STA_INS | STA_DEL)))
+-			time_state = TIME_OK;
+-		break;
+-	case TIME_OOP:
+-		hrtimer_restart(&leap_timer);
+-		break;
+-	}
+ }
+ /*
+  * Called with the xtime lock held, so we can access and modify
+@@ -643,9 +606,6 @@ int do_adjtimex(struct timex *txc)
+ 		    (txc->tick <  900000/USER_HZ ||
+ 		     txc->tick > 1100000/USER_HZ))
+ 			return -EINVAL;
+-
+-		if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
+-			hrtimer_cancel(&leap_timer);
+ 	}
+ 
+ 	if (txc->modes & ADJ_SETOFFSET) {
+@@ -967,6 +927,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
+ void __init ntp_init(void)
+ {
+ 	ntp_clear();
+-	hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+-	leap_timer.function = ntp_leap_second;
+ }
+diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
+index c923640..9955ebd 100644
+--- a/kernel/time/tick-sched.c
++++ b/kernel/time/tick-sched.c
+@@ -430,6 +430,7 @@ void tick_nohz_stop_sched_tick(int inidle)
+ 		 */
+ 		if (!ts->tick_stopped) {
+ 			select_nohz_load_balancer(1);
++			calc_load_enter_idle();
+ 
+ 			ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
+ 			ts->tick_stopped = 1;
+@@ -563,6 +564,7 @@ void tick_nohz_restart_sched_tick(void)
+ 		account_idle_ticks(ticks);
+ #endif
+ 
++	calc_load_exit_idle();
+ 	touch_softlockup_watchdog();
+ 	/*
+ 	 * Cancel the scheduled timer and restore the tick
+diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
+index 2378413..03e67d4 100644
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -161,23 +161,43 @@ static struct timespec xtime __attribute__ ((aligned (16)));
+ static struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
+ static struct timespec total_sleep_time;
+ 
++/* Offset clock monotonic -> clock realtime */
++static ktime_t offs_real;
++
++/* Offset clock monotonic -> clock boottime */
++static ktime_t offs_boot;
++
+ /*
+  * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
+  */
+ static struct timespec raw_time;
+ 
+-/* flag for if timekeeping is suspended */
+-int __read_mostly timekeeping_suspended;
++/* must hold write on xtime_lock */
++static void update_rt_offset(void)
++{
++	struct timespec tmp, *wtm = &wall_to_monotonic;
+ 
+-/* must hold xtime_lock */
+-void timekeeping_leap_insert(int leapsecond)
++	set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec);
++	offs_real = timespec_to_ktime(tmp);
++}
++
++/* must hold write on xtime_lock */
++static void timekeeping_update(bool clearntp)
+ {
+-	xtime.tv_sec += leapsecond;
+-	wall_to_monotonic.tv_sec -= leapsecond;
+-	update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+-			timekeeper.mult);
++	if (clearntp) {
++		timekeeper.ntp_error = 0;
++		ntp_clear();
++	}
++	update_rt_offset();
++	update_vsyscall(&xtime, &wall_to_monotonic,
++			 timekeeper.clock, timekeeper.mult);
+ }
+ 
++
++
++/* flag for if timekeeping is suspended */
++int __read_mostly timekeeping_suspended;
++
+ /**
+  * timekeeping_forward_now - update clock to the current time
+  *
+@@ -375,11 +395,7 @@ int do_settimeofday(const struct timespec *tv)
+ 
+ 	xtime = *tv;
+ 
+-	timekeeper.ntp_error = 0;
+-	ntp_clear();
+-
+-	update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+-				timekeeper.mult);
++	timekeeping_update(true);
+ 
+ 	write_sequnlock_irqrestore(&xtime_lock, flags);
+ 
+@@ -412,11 +428,7 @@ int timekeeping_inject_offset(struct timespec *ts)
+ 	xtime = timespec_add(xtime, *ts);
+ 	wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
+ 
+-	timekeeper.ntp_error = 0;
+-	ntp_clear();
+-
+-	update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+-				timekeeper.mult);
++	timekeeping_update(true);
+ 
+ 	write_sequnlock_irqrestore(&xtime_lock, flags);
+ 
+@@ -591,6 +603,7 @@ void __init timekeeping_init(void)
+ 	}
+ 	set_normalized_timespec(&wall_to_monotonic,
+ 				-boot.tv_sec, -boot.tv_nsec);
++	update_rt_offset();
+ 	total_sleep_time.tv_sec = 0;
+ 	total_sleep_time.tv_nsec = 0;
+ 	write_sequnlock_irqrestore(&xtime_lock, flags);
+@@ -599,6 +612,12 @@ void __init timekeeping_init(void)
+ /* time in seconds when suspend began */
+ static struct timespec timekeeping_suspend_time;
+ 
++static void update_sleep_time(struct timespec t)
++{
++	total_sleep_time = t;
++	offs_boot = timespec_to_ktime(t);
++}
++
+ /**
+  * __timekeeping_inject_sleeptime - Internal function to add sleep interval
+  * @delta: pointer to a timespec delta value
+@@ -616,7 +635,7 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta)
+ 
+ 	xtime = timespec_add(xtime, *delta);
+ 	wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta);
+-	total_sleep_time = timespec_add(total_sleep_time, *delta);
++	update_sleep_time(timespec_add(total_sleep_time, *delta));
+ }
+ 
+ 
+@@ -645,10 +664,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
+ 
+ 	__timekeeping_inject_sleeptime(delta);
+ 
+-	timekeeper.ntp_error = 0;
+-	ntp_clear();
+-	update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+-				timekeeper.mult);
++	timekeeping_update(true);
+ 
+ 	write_sequnlock_irqrestore(&xtime_lock, flags);
+ 
+@@ -683,6 +699,7 @@ static void timekeeping_resume(void)
+ 	timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
+ 	timekeeper.ntp_error = 0;
+ 	timekeeping_suspended = 0;
++	timekeeping_update(false);
+ 	write_sequnlock_irqrestore(&xtime_lock, flags);
+ 
+ 	touch_softlockup_watchdog();
+@@ -942,9 +959,14 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
+ 
+ 	timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
+ 	while (timekeeper.xtime_nsec >= nsecps) {
++		int leap;
+ 		timekeeper.xtime_nsec -= nsecps;
+ 		xtime.tv_sec++;
+-		second_overflow();
++		leap = second_overflow(xtime.tv_sec);
++		xtime.tv_sec += leap;
++		wall_to_monotonic.tv_sec -= leap;
++		if (leap)
++			clock_was_set_delayed();
+ 	}
+ 
+ 	/* Accumulate raw time */
+@@ -1050,14 +1072,17 @@ static void update_wall_time(void)
+ 	 * xtime.tv_nsec isn't larger then NSEC_PER_SEC
+ 	 */
+ 	if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
++		int leap;
+ 		xtime.tv_nsec -= NSEC_PER_SEC;
+ 		xtime.tv_sec++;
+-		second_overflow();
++		leap = second_overflow(xtime.tv_sec);
++		xtime.tv_sec += leap;
++		wall_to_monotonic.tv_sec -= leap;
++		if (leap)
++			clock_was_set_delayed();
+ 	}
+ 
+-	/* check to see if there is a new clocksource to use */
+-	update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+-				timekeeper.mult);
++	timekeeping_update(false);
+ }
+ 
+ /**
+@@ -1216,6 +1241,40 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
+ 	} while (read_seqretry(&xtime_lock, seq));
+ }
+ 
++#ifdef CONFIG_HIGH_RES_TIMERS
++/**
++ * ktime_get_update_offsets - hrtimer helper
++ * @real:	pointer to storage for monotonic -> realtime offset
++ * @_boot:	pointer to storage for monotonic -> boottime offset
++ *
++ * Returns current monotonic time and updates the offsets
++ * Called from hrtimer_interupt() or retrigger_next_event()
++ */
++ktime_t ktime_get_update_offsets(ktime_t *real, ktime_t *boot)
++{
++	ktime_t now;
++	unsigned int seq;
++	u64 secs, nsecs;
++
++	do {
++		seq = read_seqbegin(&xtime_lock);
++
++		secs = xtime.tv_sec;
++		nsecs = xtime.tv_nsec;
++		nsecs += timekeeping_get_ns();
++		/* If arch requires, add in gettimeoffset() */
++		nsecs += arch_gettimeoffset();
++
++		*real = offs_real;
++		*boot = offs_boot;
++	} while (read_seqretry(&xtime_lock, seq));
++
++	now = ktime_add_ns(ktime_set(secs, 0), nsecs);
++	now = ktime_sub(now, *real);
++	return now;
++}
++#endif
++
+ /**
+  * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
+  */
+diff --git a/mm/compaction.c b/mm/compaction.c
+index 8fb8a40..50f1c60 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -592,8 +592,11 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
+ 		if (err) {
+ 			putback_lru_pages(&cc->migratepages);
+ 			cc->nr_migratepages = 0;
++			if (err == -ENOMEM) {
++				ret = COMPACT_PARTIAL;
++				goto out;
++			}
+ 		}
+-
+ 	}
+ 
+ out:
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index 5f5c545..7c535b0 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -53,6 +53,84 @@ static unsigned long __initdata default_hstate_size;
+  */
+ static DEFINE_SPINLOCK(hugetlb_lock);
+ 
++static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
++{
++	bool free = (spool->count == 0) && (spool->used_hpages == 0);
++
++	spin_unlock(&spool->lock);
++
++	/* If no pages are used, and no other handles to the subpool
++	 * remain, free the subpool the subpool remain */
++	if (free)
++		kfree(spool);
++}
++
++struct hugepage_subpool *hugepage_new_subpool(long nr_blocks)
++{
++	struct hugepage_subpool *spool;
++
++	spool = kmalloc(sizeof(*spool), GFP_KERNEL);
++	if (!spool)
++		return NULL;
++
++	spin_lock_init(&spool->lock);
++	spool->count = 1;
++	spool->max_hpages = nr_blocks;
++	spool->used_hpages = 0;
++
++	return spool;
++}
++
++void hugepage_put_subpool(struct hugepage_subpool *spool)
++{
++	spin_lock(&spool->lock);
++	BUG_ON(!spool->count);
++	spool->count--;
++	unlock_or_release_subpool(spool);
++}
++
++static int hugepage_subpool_get_pages(struct hugepage_subpool *spool,
++				      long delta)
++{
++	int ret = 0;
++
++	if (!spool)
++		return 0;
++
++	spin_lock(&spool->lock);
++	if ((spool->used_hpages + delta) <= spool->max_hpages) {
++		spool->used_hpages += delta;
++	} else {
++		ret = -ENOMEM;
++	}
++	spin_unlock(&spool->lock);
++
++	return ret;
++}
++
++static void hugepage_subpool_put_pages(struct hugepage_subpool *spool,
++				       long delta)
++{
++	if (!spool)
++		return;
++
++	spin_lock(&spool->lock);
++	spool->used_hpages -= delta;
++	/* If hugetlbfs_put_super couldn't free spool due to
++	* an outstanding quota reference, free it now. */
++	unlock_or_release_subpool(spool);
++}
++
++static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
++{
++	return HUGETLBFS_SB(inode->i_sb)->spool;
++}
++
++static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
++{
++	return subpool_inode(vma->vm_file->f_dentry->d_inode);
++}
++
+ /*
+  * Region tracking -- allows tracking of reservations and instantiated pages
+  *                    across the pages in a mapping.
+@@ -533,9 +611,9 @@ static void free_huge_page(struct page *page)
+ 	 */
+ 	struct hstate *h = page_hstate(page);
+ 	int nid = page_to_nid(page);
+-	struct address_space *mapping;
++	struct hugepage_subpool *spool =
++		(struct hugepage_subpool *)page_private(page);
+ 
+-	mapping = (struct address_space *) page_private(page);
+ 	set_page_private(page, 0);
+ 	page->mapping = NULL;
+ 	BUG_ON(page_count(page));
+@@ -551,8 +629,7 @@ static void free_huge_page(struct page *page)
+ 		enqueue_huge_page(h, page);
+ 	}
+ 	spin_unlock(&hugetlb_lock);
+-	if (mapping)
+-		hugetlb_put_quota(mapping, 1);
++	hugepage_subpool_put_pages(spool, 1);
+ }
+ 
+ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
+@@ -966,11 +1043,12 @@ static void return_unused_surplus_pages(struct hstate *h,
+ /*
+  * Determine if the huge page at addr within the vma has an associated
+  * reservation.  Where it does not we will need to logically increase
+- * reservation and actually increase quota before an allocation can occur.
+- * Where any new reservation would be required the reservation change is
+- * prepared, but not committed.  Once the page has been quota'd allocated
+- * an instantiated the change should be committed via vma_commit_reservation.
+- * No action is required on failure.
++ * reservation and actually increase subpool usage before an allocation
++ * can occur.  Where any new reservation would be required the
++ * reservation change is prepared, but not committed.  Once the page
++ * has been allocated from the subpool and instantiated the change should
++ * be committed via vma_commit_reservation.  No action is required on
++ * failure.
+  */
+ static long vma_needs_reservation(struct hstate *h,
+ 			struct vm_area_struct *vma, unsigned long addr)
+@@ -1019,24 +1097,24 @@ static void vma_commit_reservation(struct hstate *h,
+ static struct page *alloc_huge_page(struct vm_area_struct *vma,
+ 				    unsigned long addr, int avoid_reserve)
+ {
++	struct hugepage_subpool *spool = subpool_vma(vma);
+ 	struct hstate *h = hstate_vma(vma);
+ 	struct page *page;
+-	struct address_space *mapping = vma->vm_file->f_mapping;
+-	struct inode *inode = mapping->host;
+ 	long chg;
+ 
+ 	/*
+-	 * Processes that did not create the mapping will have no reserves and
+-	 * will not have accounted against quota. Check that the quota can be
+-	 * made before satisfying the allocation
+-	 * MAP_NORESERVE mappings may also need pages and quota allocated
+-	 * if no reserve mapping overlaps.
++	 * Processes that did not create the mapping will have no
++	 * reserves and will not have accounted against subpool
++	 * limit. Check that the subpool limit can be made before
++	 * satisfying the allocation MAP_NORESERVE mappings may also
++	 * need pages and subpool limit allocated allocated if no reserve
++	 * mapping overlaps.
+ 	 */
+ 	chg = vma_needs_reservation(h, vma, addr);
+ 	if (chg < 0)
+ 		return ERR_PTR(-VM_FAULT_OOM);
+ 	if (chg)
+-		if (hugetlb_get_quota(inode->i_mapping, chg))
++		if (hugepage_subpool_get_pages(spool, chg))
+ 			return ERR_PTR(-VM_FAULT_SIGBUS);
+ 
+ 	spin_lock(&hugetlb_lock);
+@@ -1046,12 +1124,12 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
+ 	if (!page) {
+ 		page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
+ 		if (!page) {
+-			hugetlb_put_quota(inode->i_mapping, chg);
++			hugepage_subpool_put_pages(spool, chg);
+ 			return ERR_PTR(-VM_FAULT_SIGBUS);
+ 		}
+ 	}
+ 
+-	set_page_private(page, (unsigned long) mapping);
++	set_page_private(page, (unsigned long)spool);
+ 
+ 	vma_commit_reservation(h, vma, addr);
+ 
+@@ -2081,6 +2159,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
+ {
+ 	struct hstate *h = hstate_vma(vma);
+ 	struct resv_map *reservations = vma_resv_map(vma);
++	struct hugepage_subpool *spool = subpool_vma(vma);
+ 	unsigned long reserve;
+ 	unsigned long start;
+ 	unsigned long end;
+@@ -2096,7 +2175,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
+ 
+ 		if (reserve) {
+ 			hugetlb_acct_memory(h, -reserve);
+-			hugetlb_put_quota(vma->vm_file->f_mapping, reserve);
++			hugepage_subpool_put_pages(spool, reserve);
+ 		}
+ 	}
+ }
+@@ -2326,7 +2405,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+ 	address = address & huge_page_mask(h);
+ 	pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
+ 		+ (vma->vm_pgoff >> PAGE_SHIFT);
+-	mapping = (struct address_space *)page_private(page);
++	mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
+ 
+ 	/*
+ 	 * Take the mapping lock for the duration of the table walk. As
+@@ -2865,11 +2944,12 @@ int hugetlb_reserve_pages(struct inode *inode,
+ {
+ 	long ret, chg;
+ 	struct hstate *h = hstate_inode(inode);
++	struct hugepage_subpool *spool = subpool_inode(inode);
+ 
+ 	/*
+ 	 * Only apply hugepage reservation if asked. At fault time, an
+ 	 * attempt will be made for VM_NORESERVE to allocate a page
+-	 * and filesystem quota without using reserves
++	 * without using reserves
+ 	 */
+ 	if (vm_flags & VM_NORESERVE)
+ 		return 0;
+@@ -2898,19 +2978,19 @@ int hugetlb_reserve_pages(struct inode *inode,
+ 		goto out_err;
+ 	}
+ 
+-	/* There must be enough filesystem quota for the mapping */
+-	if (hugetlb_get_quota(inode->i_mapping, chg)) {
++	/* There must be enough pages in the subpool for the mapping */
++	if (hugepage_subpool_get_pages(spool, chg)) {
+ 		ret = -ENOSPC;
+ 		goto out_err;
+ 	}
+ 
+ 	/*
+ 	 * Check enough hugepages are available for the reservation.
+-	 * Hand back the quota if there are not
++	 * Hand the pages back to the subpool if there are not
+ 	 */
+ 	ret = hugetlb_acct_memory(h, chg);
+ 	if (ret < 0) {
+-		hugetlb_put_quota(inode->i_mapping, chg);
++		hugepage_subpool_put_pages(spool, chg);
+ 		goto out_err;
+ 	}
+ 
+@@ -2938,12 +3018,13 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
+ {
+ 	struct hstate *h = hstate_inode(inode);
+ 	long chg = region_truncate(&inode->i_mapping->private_list, offset);
++	struct hugepage_subpool *spool = subpool_inode(inode);
+ 
+ 	spin_lock(&inode->i_lock);
+ 	inode->i_blocks -= (blocks_per_huge_page(h) * freed);
+ 	spin_unlock(&inode->i_lock);
+ 
+-	hugetlb_put_quota(inode->i_mapping, (chg - freed));
++	hugepage_subpool_put_pages(spool, (chg - freed));
+ 	hugetlb_acct_memory(h, -(chg - freed));
+ }
+ 
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index fbe2d2c..8342119 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2824,7 +2824,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
+ 		 * them before going back to sleep.
+ 		 */
+ 		set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold);
+-		schedule();
++
++		if (!kthread_should_stop())
++			schedule();
++
+ 		set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold);
+ 	} else {
+ 		if (remaining)
+@@ -3090,14 +3093,17 @@ int kswapd_run(int nid)
+ }
+ 
+ /*
+- * Called by memory hotplug when all memory in a node is offlined.
++ * Called by memory hotplug when all memory in a node is offlined.  Caller must
++ * hold lock_memory_hotplug().
+  */
+ void kswapd_stop(int nid)
+ {
+ 	struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
+ 
+-	if (kswapd)
++	if (kswapd) {
+ 		kthread_stop(kswapd);
++		NODE_DATA(nid)->kswapd = NULL;
++	}
+ }
+ 
+ static int __init kswapd_init(void)
+diff --git a/net/can/raw.c b/net/can/raw.c
+index cde1b4a..46cca3a 100644
+--- a/net/can/raw.c
++++ b/net/can/raw.c
+@@ -681,9 +681,6 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
+ 	if (err < 0)
+ 		goto free_skb;
+ 
+-	/* to be able to check the received tx sock reference in raw_rcv() */
+-	skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
+-
+ 	skb->dev = dev;
+ 	skb->sk  = sk;
+ 
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 1cbddc9..5738654 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2079,25 +2079,6 @@ static int dev_gso_segment(struct sk_buff *skb, int features)
+ 	return 0;
+ }
+ 
+-/*
+- * Try to orphan skb early, right before transmission by the device.
+- * We cannot orphan skb if tx timestamp is requested or the sk-reference
+- * is needed on driver level for other reasons, e.g. see net/can/raw.c
+- */
+-static inline void skb_orphan_try(struct sk_buff *skb)
+-{
+-	struct sock *sk = skb->sk;
+-
+-	if (sk && !skb_shinfo(skb)->tx_flags) {
+-		/* skb_tx_hash() wont be able to get sk.
+-		 * We copy sk_hash into skb->rxhash
+-		 */
+-		if (!skb->rxhash)
+-			skb->rxhash = sk->sk_hash;
+-		skb_orphan(skb);
+-	}
+-}
+-
+ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+ {
+ 	return ((features & NETIF_F_GEN_CSUM) ||
+@@ -2182,8 +2163,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+ 		if (!list_empty(&ptype_all))
+ 			dev_queue_xmit_nit(skb, dev);
+ 
+-		skb_orphan_try(skb);
+-
+ 		features = netif_skb_features(skb);
+ 
+ 		if (vlan_tx_tag_present(skb) &&
+@@ -2293,7 +2272,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+ 	if (skb->sk && skb->sk->sk_hash)
+ 		hash = skb->sk->sk_hash;
+ 	else
+-		hash = (__force u16) skb->protocol ^ skb->rxhash;
++		hash = (__force u16) skb->protocol;
+ 	hash = jhash_1word(hash, hashrnd);
+ 
+ 	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 9726927..32e6ca2 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5836,6 +5836,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+ 			goto discard;
+ 
+ 		if (th->syn) {
++			if (th->fin)
++				goto discard;
+ 			if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
+ 				return 1;
+ 
+diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
+index 274d150..cf98d62 100644
+--- a/net/iucv/af_iucv.c
++++ b/net/iucv/af_iucv.c
+@@ -380,7 +380,6 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
+ 			skb_trim(skb, skb->dev->mtu);
+ 	}
+ 	skb->protocol = ETH_P_AF_IUCV;
+-	skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
+ 	nskb = skb_clone(skb, GFP_ATOMIC);
+ 	if (!nskb)
+ 		return -ENOMEM;
+diff --git a/net/wireless/util.c b/net/wireless/util.c
+index d38815d..74d5292 100644
+--- a/net/wireless/util.c
++++ b/net/wireless/util.c
+@@ -813,7 +813,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
+ 	     ntype == NL80211_IFTYPE_P2P_CLIENT))
+ 		return -EBUSY;
+ 
+-	if (ntype != otype) {
++	if (ntype != otype && netif_running(dev)) {
+ 		err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr,
+ 						    ntype);
+ 		if (err)
+diff --git a/scripts/depmod.sh b/scripts/depmod.sh
+index a272356..2ae4817 100755
+--- a/scripts/depmod.sh
++++ b/scripts/depmod.sh
+@@ -9,12 +9,6 @@ fi
+ DEPMOD=$1
+ KERNELRELEASE=$2
+ 
+-if ! "$DEPMOD" -V 2>/dev/null | grep -q module-init-tools; then
+-	echo "Warning: you may need to install module-init-tools" >&2
+-	echo "See http://www.codemonkey.org.uk/docs/post-halloween-2.6.txt" >&2
+-	sleep 1
+-fi
+-
+ if ! test -r System.map -a -x "$DEPMOD"; then
+ 	exit 0
+ fi
+diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
+index 9f614b4..272407c 100644
+--- a/virt/kvm/irq_comm.c
++++ b/virt/kvm/irq_comm.c
+@@ -318,6 +318,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
+ 	 */
+ 	hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link)
+ 		if (ei->type == KVM_IRQ_ROUTING_MSI ||
++		    ue->type == KVM_IRQ_ROUTING_MSI ||
+ 		    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
+ 			return r;
+ 
diff --git a/3.2.34/bump/1024_linux-3.2.25.patch b/3.2.34/bump/1024_linux-3.2.25.patch
new file mode 100644
index 0000000..e95c213
--- /dev/null
+++ b/3.2.34/bump/1024_linux-3.2.25.patch
@@ -0,0 +1,4503 @@
+diff --git a/Makefile b/Makefile
+index 80bb4fd..e13e4e7 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 3
+ PATCHLEVEL = 2
+-SUBLEVEL = 24
++SUBLEVEL = 25
+ EXTRAVERSION =
+ NAME = Saber-toothed Squirrel
+ 
+diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
+index 559da19..578e5a0 100644
+--- a/arch/powerpc/include/asm/reg.h
++++ b/arch/powerpc/include/asm/reg.h
+@@ -1016,7 +1016,8 @@
+ /* Macros for setting and retrieving special purpose registers */
+ #ifndef __ASSEMBLY__
+ #define mfmsr()		({unsigned long rval; \
+-			asm volatile("mfmsr %0" : "=r" (rval)); rval;})
++			asm volatile("mfmsr %0" : "=r" (rval) : \
++						: "memory"); rval;})
+ #ifdef CONFIG_PPC_BOOK3S_64
+ #define __mtmsrd(v, l)	asm volatile("mtmsrd %0," __stringify(l) \
+ 				     : : "r" (v) : "memory")
+diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
+index bf99cfa..6324008 100644
+--- a/arch/powerpc/kernel/ftrace.c
++++ b/arch/powerpc/kernel/ftrace.c
+@@ -245,9 +245,9 @@ __ftrace_make_nop(struct module *mod,
+ 
+ 	/*
+ 	 * On PPC32 the trampoline looks like:
+-	 *  0x3d, 0x60, 0x00, 0x00  lis r11,sym@ha
+-	 *  0x39, 0x6b, 0x00, 0x00  addi r11,r11,sym@l
+-	 *  0x7d, 0x69, 0x03, 0xa6  mtctr r11
++	 *  0x3d, 0x80, 0x00, 0x00  lis r12,sym@ha
++	 *  0x39, 0x8c, 0x00, 0x00  addi r12,r12,sym@l
++	 *  0x7d, 0x89, 0x03, 0xa6  mtctr r12
+ 	 *  0x4e, 0x80, 0x04, 0x20  bctr
+ 	 */
+ 
+@@ -262,9 +262,9 @@ __ftrace_make_nop(struct module *mod,
+ 	pr_devel(" %08x %08x ", jmp[0], jmp[1]);
+ 
+ 	/* verify that this is what we expect it to be */
+-	if (((jmp[0] & 0xffff0000) != 0x3d600000) ||
+-	    ((jmp[1] & 0xffff0000) != 0x396b0000) ||
+-	    (jmp[2] != 0x7d6903a6) ||
++	if (((jmp[0] & 0xffff0000) != 0x3d800000) ||
++	    ((jmp[1] & 0xffff0000) != 0x398c0000) ||
++	    (jmp[2] != 0x7d8903a6) ||
+ 	    (jmp[3] != 0x4e800420)) {
+ 		printk(KERN_ERR "Not a trampoline\n");
+ 		return -EINVAL;
+diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
+index 6e0073e..07c7bf4 100644
+--- a/arch/s390/kernel/processor.c
++++ b/arch/s390/kernel/processor.c
+@@ -26,12 +26,14 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id);
+ void __cpuinit cpu_init(void)
+ {
+ 	struct cpuid *id = &per_cpu(cpu_id, smp_processor_id());
++	struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
+ 
+ 	get_cpu_id(id);
+ 	atomic_inc(&init_mm.mm_count);
+ 	current->active_mm = &init_mm;
+ 	BUG_ON(current->mm);
+ 	enter_lazy_tlb(&init_mm, current);
++	memset(idle, 0, sizeof(*idle));
+ }
+ 
+ /*
+diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
+index 3ea8728..1df64a8 100644
+--- a/arch/s390/kernel/smp.c
++++ b/arch/s390/kernel/smp.c
+@@ -1020,14 +1020,11 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self,
+ 	unsigned int cpu = (unsigned int)(long)hcpu;
+ 	struct cpu *c = &per_cpu(cpu_devices, cpu);
+ 	struct sys_device *s = &c->sysdev;
+-	struct s390_idle_data *idle;
+ 	int err = 0;
+ 
+ 	switch (action) {
+ 	case CPU_ONLINE:
+ 	case CPU_ONLINE_FROZEN:
+-		idle = &per_cpu(s390_idle, cpu);
+-		memset(idle, 0, sizeof(struct s390_idle_data));
+ 		err = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+ 		break;
+ 	case CPU_DEAD:
+diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
+index 563a09d..29c95d7 100644
+--- a/arch/x86/kernel/microcode_core.c
++++ b/arch/x86/kernel/microcode_core.c
+@@ -297,20 +297,31 @@ static ssize_t reload_store(struct sys_device *dev,
+ 			    const char *buf, size_t size)
+ {
+ 	unsigned long val;
+-	int cpu = dev->id;
+-	int ret = 0;
+-	char *end;
++	int cpu;
++	ssize_t ret = 0, tmp_ret;
+ 
+-	val = simple_strtoul(buf, &end, 0);
+-	if (end == buf)
++	/* allow reload only from the BSP */
++	if (boot_cpu_data.cpu_index != dev->id)
+ 		return -EINVAL;
+ 
+-	if (val == 1) {
+-		get_online_cpus();
+-		if (cpu_online(cpu))
+-			ret = reload_for_cpu(cpu);
+-		put_online_cpus();
++	ret = kstrtoul(buf, 0, &val);
++	if (ret)
++		return ret;
++
++	if (val != 1)
++		return size;
++
++	get_online_cpus();
++	for_each_online_cpu(cpu) {
++		tmp_ret = reload_for_cpu(cpu);
++		if (tmp_ret != 0)
++			pr_warn("Error reloading microcode on CPU %d\n", cpu);
++
++		/* save retval of the first encountered reload error */
++		if (!ret)
++			ret = tmp_ret;
+ 	}
++	put_online_cpus();
+ 
+ 	if (!ret)
+ 		ret = size;
+diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
+index 6dd8955..0951b81 100644
+--- a/arch/x86/pci/fixup.c
++++ b/arch/x86/pci/fixup.c
+@@ -521,3 +521,20 @@ static void sb600_disable_hpet_bar(struct pci_dev *dev)
+ 	}
+ }
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar);
++
++/*
++ * Twinhead H12Y needs us to block out a region otherwise we map devices
++ * there and any access kills the box.
++ *
++ *   See: https://bugzilla.kernel.org/show_bug.cgi?id=10231
++ *
++ * Match off the LPC and svid/sdid (older kernels lose the bridge subvendor)
++ */
++static void __devinit twinhead_reserve_killing_zone(struct pci_dev *dev)
++{
++        if (dev->subsystem_vendor == 0x14FF && dev->subsystem_device == 0xA003) {
++                pr_info("Reserving memory on Twinhead H12Y\n");
++                request_mem_region(0xFFB00000, 0x100000, "twinhead");
++        }
++}
++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone);
+diff --git a/block/blk-core.c b/block/blk-core.c
+index 15de223..49d9e91 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -607,7 +607,7 @@ EXPORT_SYMBOL(blk_init_allocated_queue);
+ 
+ int blk_get_queue(struct request_queue *q)
+ {
+-	if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
++	if (likely(!blk_queue_dead(q))) {
+ 		kobject_get(&q->kobj);
+ 		return 0;
+ 	}
+@@ -754,7 +754,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
+ 	const bool is_sync = rw_is_sync(rw_flags) != 0;
+ 	int may_queue;
+ 
+-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
++	if (unlikely(blk_queue_dead(q)))
+ 		return NULL;
+ 
+ 	may_queue = elv_may_queue(q, rw_flags);
+@@ -874,7 +874,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
+ 		struct io_context *ioc;
+ 		struct request_list *rl = &q->rq;
+ 
+-		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
++		if (unlikely(blk_queue_dead(q)))
+ 			return NULL;
+ 
+ 		prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
+diff --git a/block/blk-exec.c b/block/blk-exec.c
+index a1ebceb..6053285 100644
+--- a/block/blk-exec.c
++++ b/block/blk-exec.c
+@@ -50,7 +50,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
+ {
+ 	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
+ 
+-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
++	if (unlikely(blk_queue_dead(q))) {
+ 		rq->errors = -ENXIO;
+ 		if (rq->end_io)
+ 			rq->end_io(rq, rq->errors);
+diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
+index e7f9f65..f0b2ca8 100644
+--- a/block/blk-sysfs.c
++++ b/block/blk-sysfs.c
+@@ -425,7 +425,7 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+ 	if (!entry->show)
+ 		return -EIO;
+ 	mutex_lock(&q->sysfs_lock);
+-	if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
++	if (blk_queue_dead(q)) {
+ 		mutex_unlock(&q->sysfs_lock);
+ 		return -ENOENT;
+ 	}
+@@ -447,7 +447,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
+ 
+ 	q = container_of(kobj, struct request_queue, kobj);
+ 	mutex_lock(&q->sysfs_lock);
+-	if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
++	if (blk_queue_dead(q)) {
+ 		mutex_unlock(&q->sysfs_lock);
+ 		return -ENOENT;
+ 	}
+diff --git a/block/blk-throttle.c b/block/blk-throttle.c
+index 4553245..5eed6a7 100644
+--- a/block/blk-throttle.c
++++ b/block/blk-throttle.c
+@@ -310,7 +310,7 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
+ 	struct request_queue *q = td->queue;
+ 
+ 	/* no throttling for dead queue */
+-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
++	if (unlikely(blk_queue_dead(q)))
+ 		return NULL;
+ 
+ 	rcu_read_lock();
+@@ -335,7 +335,7 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
+ 	spin_lock_irq(q->queue_lock);
+ 
+ 	/* Make sure @q is still alive */
+-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
++	if (unlikely(blk_queue_dead(q))) {
+ 		kfree(tg);
+ 		return NULL;
+ 	}
+diff --git a/block/blk.h b/block/blk.h
+index 3f6551b..e38691d 100644
+--- a/block/blk.h
++++ b/block/blk.h
+@@ -85,7 +85,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
+ 			q->flush_queue_delayed = 1;
+ 			return NULL;
+ 		}
+-		if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags) ||
++		if (unlikely(blk_queue_dead(q)) ||
+ 		    !q->elevator->ops->elevator_dispatch_fn(q, 0))
+ 			return NULL;
+ 	}
+diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
+index 6512b20..d1fcbc0 100644
+--- a/drivers/acpi/ac.c
++++ b/drivers/acpi/ac.c
+@@ -292,7 +292,9 @@ static int acpi_ac_add(struct acpi_device *device)
+ 	ac->charger.properties = ac_props;
+ 	ac->charger.num_properties = ARRAY_SIZE(ac_props);
+ 	ac->charger.get_property = get_ac_property;
+-	power_supply_register(&ac->device->dev, &ac->charger);
++	result = power_supply_register(&ac->device->dev, &ac->charger);
++	if (result)
++		goto end;
+ 
+ 	printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
+ 	       acpi_device_name(device), acpi_device_bid(device),
+diff --git a/drivers/gpu/drm/nouveau/nva3_copy.fuc b/drivers/gpu/drm/nouveau/nva3_copy.fuc
+index eaf35f8..d894731 100644
+--- a/drivers/gpu/drm/nouveau/nva3_copy.fuc
++++ b/drivers/gpu/drm/nouveau/nva3_copy.fuc
+@@ -118,9 +118,9 @@ dispatch_dma:
+ // mthd 0x030c-0x0340, various stuff
+ .b16 0xc3 14
+ .b32 ctx_src_address_high           ~0x000000ff
+-.b32 ctx_src_address_low            ~0xfffffff0
++.b32 ctx_src_address_low            ~0xffffffff
+ .b32 ctx_dst_address_high           ~0x000000ff
+-.b32 ctx_dst_address_low            ~0xfffffff0
++.b32 ctx_dst_address_low            ~0xffffffff
+ .b32 ctx_src_pitch                  ~0x0007ffff
+ .b32 ctx_dst_pitch                  ~0x0007ffff
+ .b32 ctx_xcnt                       ~0x0000ffff
+diff --git a/drivers/gpu/drm/nouveau/nva3_copy.fuc.h b/drivers/gpu/drm/nouveau/nva3_copy.fuc.h
+index 2731de2..e2a0e88 100644
+--- a/drivers/gpu/drm/nouveau/nva3_copy.fuc.h
++++ b/drivers/gpu/drm/nouveau/nva3_copy.fuc.h
+@@ -1,37 +1,72 @@
+-uint32_t nva3_pcopy_data[] = {
++u32 nva3_pcopy_data[] = {
++/* 0x0000: ctx_object */
+ 	0x00000000,
++/* 0x0004: ctx_dma */
++/* 0x0004: ctx_dma_query */
+ 	0x00000000,
++/* 0x0008: ctx_dma_src */
+ 	0x00000000,
++/* 0x000c: ctx_dma_dst */
+ 	0x00000000,
++/* 0x0010: ctx_query_address_high */
+ 	0x00000000,
++/* 0x0014: ctx_query_address_low */
+ 	0x00000000,
++/* 0x0018: ctx_query_counter */
+ 	0x00000000,
++/* 0x001c: ctx_src_address_high */
+ 	0x00000000,
++/* 0x0020: ctx_src_address_low */
+ 	0x00000000,
++/* 0x0024: ctx_src_pitch */
+ 	0x00000000,
++/* 0x0028: ctx_src_tile_mode */
+ 	0x00000000,
++/* 0x002c: ctx_src_xsize */
+ 	0x00000000,
++/* 0x0030: ctx_src_ysize */
+ 	0x00000000,
++/* 0x0034: ctx_src_zsize */
+ 	0x00000000,
++/* 0x0038: ctx_src_zoff */
+ 	0x00000000,
++/* 0x003c: ctx_src_xoff */
+ 	0x00000000,
++/* 0x0040: ctx_src_yoff */
+ 	0x00000000,
++/* 0x0044: ctx_src_cpp */
+ 	0x00000000,
++/* 0x0048: ctx_dst_address_high */
+ 	0x00000000,
++/* 0x004c: ctx_dst_address_low */
+ 	0x00000000,
++/* 0x0050: ctx_dst_pitch */
+ 	0x00000000,
++/* 0x0054: ctx_dst_tile_mode */
+ 	0x00000000,
++/* 0x0058: ctx_dst_xsize */
+ 	0x00000000,
++/* 0x005c: ctx_dst_ysize */
+ 	0x00000000,
++/* 0x0060: ctx_dst_zsize */
+ 	0x00000000,
++/* 0x0064: ctx_dst_zoff */
+ 	0x00000000,
++/* 0x0068: ctx_dst_xoff */
+ 	0x00000000,
++/* 0x006c: ctx_dst_yoff */
+ 	0x00000000,
++/* 0x0070: ctx_dst_cpp */
+ 	0x00000000,
++/* 0x0074: ctx_format */
+ 	0x00000000,
++/* 0x0078: ctx_swz_const0 */
+ 	0x00000000,
++/* 0x007c: ctx_swz_const1 */
+ 	0x00000000,
++/* 0x0080: ctx_xcnt */
+ 	0x00000000,
++/* 0x0084: ctx_ycnt */
+ 	0x00000000,
+ 	0x00000000,
+ 	0x00000000,
+@@ -63,6 +98,7 @@ uint32_t nva3_pcopy_data[] = {
+ 	0x00000000,
+ 	0x00000000,
+ 	0x00000000,
++/* 0x0100: dispatch_table */
+ 	0x00010000,
+ 	0x00000000,
+ 	0x00000000,
+@@ -73,6 +109,7 @@ uint32_t nva3_pcopy_data[] = {
+ 	0x00010162,
+ 	0x00000000,
+ 	0x00030060,
++/* 0x0128: dispatch_dma */
+ 	0x00010170,
+ 	0x00000000,
+ 	0x00010170,
+@@ -118,11 +155,11 @@ uint32_t nva3_pcopy_data[] = {
+ 	0x0000001c,
+ 	0xffffff00,
+ 	0x00000020,
+-	0x0000000f,
++	0x00000000,
+ 	0x00000048,
+ 	0xffffff00,
+ 	0x0000004c,
+-	0x0000000f,
++	0x00000000,
+ 	0x00000024,
+ 	0xfff80000,
+ 	0x00000050,
+@@ -146,7 +183,8 @@ uint32_t nva3_pcopy_data[] = {
+ 	0x00000800,
+ };
+ 
+-uint32_t nva3_pcopy_code[] = {
++u32 nva3_pcopy_code[] = {
++/* 0x0000: main */
+ 	0x04fe04bd,
+ 	0x3517f000,
+ 	0xf10010fe,
+@@ -158,23 +196,31 @@ uint32_t nva3_pcopy_code[] = {
+ 	0x17f11031,
+ 	0x27f01200,
+ 	0x0012d003,
++/* 0x002f: spin */
+ 	0xf40031f4,
+ 	0x0ef40028,
++/* 0x0035: ih */
+ 	0x8001cffd,
+ 	0xf40812c4,
+ 	0x21f4060b,
++/* 0x0041: ih_no_chsw */
+ 	0x0412c472,
+ 	0xf4060bf4,
++/* 0x004a: ih_no_cmd */
+ 	0x11c4c321,
+ 	0x4001d00c,
++/* 0x0052: swctx */
+ 	0x47f101f8,
+ 	0x4bfe7700,
+ 	0x0007fe00,
+ 	0xf00204b9,
+ 	0x01f40643,
+ 	0x0604fa09,
++/* 0x006b: swctx_load */
+ 	0xfa060ef4,
++/* 0x006e: swctx_done */
+ 	0x03f80504,
++/* 0x0072: chsw */
+ 	0x27f100f8,
+ 	0x23cf1400,
+ 	0x1e3fc800,
+@@ -183,18 +229,22 @@ uint32_t nva3_pcopy_code[] = {
+ 	0x1e3af052,
+ 	0xf00023d0,
+ 	0x24d00147,
++/* 0x0093: chsw_no_unload */
+ 	0xcf00f880,
+ 	0x3dc84023,
+ 	0x220bf41e,
+ 	0xf40131f4,
+ 	0x57f05221,
+ 	0x0367f004,
++/* 0x00a8: chsw_load_ctx_dma */
+ 	0xa07856bc,
+ 	0xb6018068,
+ 	0x87d00884,
+ 	0x0162b600,
++/* 0x00bb: chsw_finish_load */
+ 	0xf0f018f4,
+ 	0x23d00237,
++/* 0x00c3: dispatch */
+ 	0xf100f880,
+ 	0xcf190037,
+ 	0x33cf4032,
+@@ -202,6 +252,7 @@ uint32_t nva3_pcopy_code[] = {
+ 	0x1024b607,
+ 	0x010057f1,
+ 	0x74bd64bd,
++/* 0x00dc: dispatch_loop */
+ 	0x58005658,
+ 	0x50b60157,
+ 	0x0446b804,
+@@ -211,6 +262,7 @@ uint32_t nva3_pcopy_code[] = {
+ 	0xb60276bb,
+ 	0x57bb0374,
+ 	0xdf0ef400,
++/* 0x0100: dispatch_valid_mthd */
+ 	0xb60246bb,
+ 	0x45bb0344,
+ 	0x01459800,
+@@ -220,31 +272,41 @@ uint32_t nva3_pcopy_code[] = {
+ 	0xb0014658,
+ 	0x1bf40064,
+ 	0x00538009,
++/* 0x0127: dispatch_cmd */
+ 	0xf4300ef4,
+ 	0x55f90132,
+ 	0xf40c01f4,
++/* 0x0132: dispatch_invalid_bitfield */
+ 	0x25f0250e,
++/* 0x0135: dispatch_illegal_mthd */
+ 	0x0125f002,
++/* 0x0138: dispatch_error */
+ 	0x100047f1,
+ 	0xd00042d0,
+ 	0x27f04043,
+ 	0x0002d040,
++/* 0x0148: hostirq_wait */
+ 	0xf08002cf,
+ 	0x24b04024,
+ 	0xf71bf400,
++/* 0x0154: dispatch_done */
+ 	0x1d0027f1,
+ 	0xd00137f0,
+ 	0x00f80023,
++/* 0x0160: cmd_nop */
++/* 0x0162: cmd_pm_trigger */
+ 	0x27f100f8,
+ 	0x34bd2200,
+ 	0xd00233f0,
+ 	0x00f80023,
++/* 0x0170: cmd_dma */
+ 	0x012842b7,
+ 	0xf00145b6,
+ 	0x43801e39,
+ 	0x0040b701,
+ 	0x0644b606,
+ 	0xf80043d0,
++/* 0x0189: cmd_exec_set_format */
+ 	0xf030f400,
+ 	0xb00001b0,
+ 	0x01b00101,
+@@ -256,20 +318,26 @@ uint32_t nva3_pcopy_code[] = {
+ 	0x70b63847,
+ 	0x0232f401,
+ 	0x94bd84bd,
++/* 0x01b4: ncomp_loop */
+ 	0xb60f4ac4,
+ 	0xb4bd0445,
++/* 0x01bc: bpc_loop */
+ 	0xf404a430,
+ 	0xa5ff0f18,
+ 	0x00cbbbc0,
+ 	0xf40231f4,
++/* 0x01ce: cmp_c0 */
+ 	0x1bf4220e,
+ 	0x10c7f00c,
+ 	0xf400cbbb,
++/* 0x01da: cmp_c1 */
+ 	0xa430160e,
+ 	0x0c18f406,
+ 	0xbb14c7f0,
+ 	0x0ef400cb,
++/* 0x01e9: cmp_zero */
+ 	0x80c7f107,
++/* 0x01ed: bpc_next */
+ 	0x01c83800,
+ 	0xb60180b6,
+ 	0xb5b801b0,
+@@ -280,6 +348,7 @@ uint32_t nva3_pcopy_code[] = {
+ 	0x98110680,
+ 	0x68fd2008,
+ 	0x0502f400,
++/* 0x0216: dst_xcnt */
+ 	0x75fd64bd,
+ 	0x1c078000,
+ 	0xf10078fd,
+@@ -304,6 +373,7 @@ uint32_t nva3_pcopy_code[] = {
+ 	0x980056d0,
+ 	0x56d01f06,
+ 	0x1030f440,
++/* 0x0276: cmd_exec_set_surface_tiled */
+ 	0x579800f8,
+ 	0x6879c70a,
+ 	0xb66478c7,
+@@ -311,9 +381,11 @@ uint32_t nva3_pcopy_code[] = {
+ 	0x0e76b060,
+ 	0xf0091bf4,
+ 	0x0ef40477,
++/* 0x0291: xtile64 */
+ 	0x027cf00f,
+ 	0xfd1170b6,
+ 	0x77f00947,
++/* 0x029d: xtileok */
+ 	0x0f5a9806,
+ 	0xfd115b98,
+ 	0xb7f000ab,
+@@ -371,6 +443,7 @@ uint32_t nva3_pcopy_code[] = {
+ 	0x67d00600,
+ 	0x0060b700,
+ 	0x0068d004,
++/* 0x0382: cmd_exec_set_surface_linear */
+ 	0x6cf000f8,
+ 	0x0260b702,
+ 	0x0864b602,
+@@ -381,13 +454,16 @@ uint32_t nva3_pcopy_code[] = {
+ 	0xb70067d0,
+ 	0x98040060,
+ 	0x67d00957,
++/* 0x03ab: cmd_exec_wait */
+ 	0xf900f800,
+ 	0xf110f900,
+ 	0xb6080007,
++/* 0x03b6: loop */
+ 	0x01cf0604,
+ 	0x0114f000,
+ 	0xfcfa1bf4,
+ 	0xf800fc10,
++/* 0x03c5: cmd_exec_query */
+ 	0x0d34c800,
+ 	0xf5701bf4,
+ 	0xf103ab21,
+@@ -417,6 +493,7 @@ uint32_t nva3_pcopy_code[] = {
+ 	0x47f10153,
+ 	0x44b60800,
+ 	0x0045d006,
++/* 0x0438: query_counter */
+ 	0x03ab21f5,
+ 	0x080c47f1,
+ 	0x980644b6,
+@@ -439,11 +516,13 @@ uint32_t nva3_pcopy_code[] = {
+ 	0x47f10153,
+ 	0x44b60800,
+ 	0x0045d006,
++/* 0x0492: cmd_exec */
+ 	0x21f500f8,
+ 	0x3fc803ab,
+ 	0x0e0bf400,
+ 	0x018921f5,
+ 	0x020047f1,
++/* 0x04a7: cmd_exec_no_format */
+ 	0xf11e0ef4,
+ 	0xb6081067,
+ 	0x77f00664,
+@@ -451,19 +530,24 @@ uint32_t nva3_pcopy_code[] = {
+ 	0x981c0780,
+ 	0x67d02007,
+ 	0x4067d000,
++/* 0x04c2: cmd_exec_init_src_surface */
+ 	0x32f444bd,
+ 	0xc854bd02,
+ 	0x0bf4043f,
+ 	0x8221f50a,
+ 	0x0a0ef403,
++/* 0x04d4: src_tiled */
+ 	0x027621f5,
++/* 0x04db: cmd_exec_init_dst_surface */
+ 	0xf40749f0,
+ 	0x57f00231,
+ 	0x083fc82c,
+ 	0xf50a0bf4,
+ 	0xf4038221,
++/* 0x04ee: dst_tiled */
+ 	0x21f50a0e,
+ 	0x49f00276,
++/* 0x04f5: cmd_exec_kick */
+ 	0x0057f108,
+ 	0x0654b608,
+ 	0xd0210698,
+@@ -473,6 +557,8 @@ uint32_t nva3_pcopy_code[] = {
+ 	0xc80054d0,
+ 	0x0bf40c3f,
+ 	0xc521f507,
++/* 0x0519: cmd_exec_done */
++/* 0x051b: cmd_wrcache_flush */
+ 	0xf100f803,
+ 	0xbd220027,
+ 	0x0133f034,
+diff --git a/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h b/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h
+index 4199038..9e87036 100644
+--- a/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h
++++ b/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h
+@@ -1,34 +1,65 @@
+-uint32_t nvc0_pcopy_data[] = {
++u32 nvc0_pcopy_data[] = {
++/* 0x0000: ctx_object */
+ 	0x00000000,
++/* 0x0004: ctx_query_address_high */
+ 	0x00000000,
++/* 0x0008: ctx_query_address_low */
+ 	0x00000000,
++/* 0x000c: ctx_query_counter */
+ 	0x00000000,
++/* 0x0010: ctx_src_address_high */
+ 	0x00000000,
++/* 0x0014: ctx_src_address_low */
+ 	0x00000000,
++/* 0x0018: ctx_src_pitch */
+ 	0x00000000,
++/* 0x001c: ctx_src_tile_mode */
+ 	0x00000000,
++/* 0x0020: ctx_src_xsize */
+ 	0x00000000,
++/* 0x0024: ctx_src_ysize */
+ 	0x00000000,
++/* 0x0028: ctx_src_zsize */
+ 	0x00000000,
++/* 0x002c: ctx_src_zoff */
+ 	0x00000000,
++/* 0x0030: ctx_src_xoff */
+ 	0x00000000,
++/* 0x0034: ctx_src_yoff */
+ 	0x00000000,
++/* 0x0038: ctx_src_cpp */
+ 	0x00000000,
++/* 0x003c: ctx_dst_address_high */
+ 	0x00000000,
++/* 0x0040: ctx_dst_address_low */
+ 	0x00000000,
++/* 0x0044: ctx_dst_pitch */
+ 	0x00000000,
++/* 0x0048: ctx_dst_tile_mode */
+ 	0x00000000,
++/* 0x004c: ctx_dst_xsize */
+ 	0x00000000,
++/* 0x0050: ctx_dst_ysize */
+ 	0x00000000,
++/* 0x0054: ctx_dst_zsize */
+ 	0x00000000,
++/* 0x0058: ctx_dst_zoff */
+ 	0x00000000,
++/* 0x005c: ctx_dst_xoff */
+ 	0x00000000,
++/* 0x0060: ctx_dst_yoff */
+ 	0x00000000,
++/* 0x0064: ctx_dst_cpp */
+ 	0x00000000,
++/* 0x0068: ctx_format */
+ 	0x00000000,
++/* 0x006c: ctx_swz_const0 */
+ 	0x00000000,
++/* 0x0070: ctx_swz_const1 */
+ 	0x00000000,
++/* 0x0074: ctx_xcnt */
+ 	0x00000000,
++/* 0x0078: ctx_ycnt */
+ 	0x00000000,
+ 	0x00000000,
+ 	0x00000000,
+@@ -63,6 +94,7 @@ uint32_t nvc0_pcopy_data[] = {
+ 	0x00000000,
+ 	0x00000000,
+ 	0x00000000,
++/* 0x0100: dispatch_table */
+ 	0x00010000,
+ 	0x00000000,
+ 	0x00000000,
+@@ -111,11 +143,11 @@ uint32_t nvc0_pcopy_data[] = {
+ 	0x00000010,
+ 	0xffffff00,
+ 	0x00000014,
+-	0x0000000f,
++	0x00000000,
+ 	0x0000003c,
+ 	0xffffff00,
+ 	0x00000040,
+-	0x0000000f,
++	0x00000000,
+ 	0x00000018,
+ 	0xfff80000,
+ 	0x00000044,
+@@ -139,7 +171,8 @@ uint32_t nvc0_pcopy_data[] = {
+ 	0x00000800,
+ };
+ 
+-uint32_t nvc0_pcopy_code[] = {
++u32 nvc0_pcopy_code[] = {
++/* 0x0000: main */
+ 	0x04fe04bd,
+ 	0x3517f000,
+ 	0xf10010fe,
+@@ -151,15 +184,20 @@ uint32_t nvc0_pcopy_code[] = {
+ 	0x17f11031,
+ 	0x27f01200,
+ 	0x0012d003,
++/* 0x002f: spin */
+ 	0xf40031f4,
+ 	0x0ef40028,
++/* 0x0035: ih */
+ 	0x8001cffd,
+ 	0xf40812c4,
+ 	0x21f4060b,
++/* 0x0041: ih_no_chsw */
+ 	0x0412c4ca,
+ 	0xf5070bf4,
++/* 0x004b: ih_no_cmd */
+ 	0xc4010221,
+ 	0x01d00c11,
++/* 0x0053: swctx */
+ 	0xf101f840,
+ 	0xfe770047,
+ 	0x47f1004b,
+@@ -188,8 +226,11 @@ uint32_t nvc0_pcopy_code[] = {
+ 	0xf00204b9,
+ 	0x01f40643,
+ 	0x0604fa09,
++/* 0x00c3: swctx_load */
+ 	0xfa060ef4,
++/* 0x00c6: swctx_done */
+ 	0x03f80504,
++/* 0x00ca: chsw */
+ 	0x27f100f8,
+ 	0x23cf1400,
+ 	0x1e3fc800,
+@@ -198,18 +239,22 @@ uint32_t nvc0_pcopy_code[] = {
+ 	0x1e3af053,
+ 	0xf00023d0,
+ 	0x24d00147,
++/* 0x00eb: chsw_no_unload */
+ 	0xcf00f880,
+ 	0x3dc84023,
+ 	0x090bf41e,
+ 	0xf40131f4,
++/* 0x00fa: chsw_finish_load */
+ 	0x37f05321,
+ 	0x8023d002,
++/* 0x0102: dispatch */
+ 	0x37f100f8,
+ 	0x32cf1900,
+ 	0x0033cf40,
+ 	0x07ff24e4,
+ 	0xf11024b6,
+ 	0xbd010057,
++/* 0x011b: dispatch_loop */
+ 	0x5874bd64,
+ 	0x57580056,
+ 	0x0450b601,
+@@ -219,6 +264,7 @@ uint32_t nvc0_pcopy_code[] = {
+ 	0xbb0f08f4,
+ 	0x74b60276,
+ 	0x0057bb03,
++/* 0x013f: dispatch_valid_mthd */
+ 	0xbbdf0ef4,
+ 	0x44b60246,
+ 	0x0045bb03,
+@@ -229,24 +275,33 @@ uint32_t nvc0_pcopy_code[] = {
+ 	0x64b00146,
+ 	0x091bf400,
+ 	0xf4005380,
++/* 0x0166: dispatch_cmd */
+ 	0x32f4300e,
+ 	0xf455f901,
+ 	0x0ef40c01,
++/* 0x0171: dispatch_invalid_bitfield */
+ 	0x0225f025,
++/* 0x0174: dispatch_illegal_mthd */
++/* 0x0177: dispatch_error */
+ 	0xf10125f0,
+ 	0xd0100047,
+ 	0x43d00042,
+ 	0x4027f040,
++/* 0x0187: hostirq_wait */
+ 	0xcf0002d0,
+ 	0x24f08002,
+ 	0x0024b040,
++/* 0x0193: dispatch_done */
+ 	0xf1f71bf4,
+ 	0xf01d0027,
+ 	0x23d00137,
++/* 0x019f: cmd_nop */
+ 	0xf800f800,
++/* 0x01a1: cmd_pm_trigger */
+ 	0x0027f100,
+ 	0xf034bd22,
+ 	0x23d00233,
++/* 0x01af: cmd_exec_set_format */
+ 	0xf400f800,
+ 	0x01b0f030,
+ 	0x0101b000,
+@@ -258,20 +313,26 @@ uint32_t nvc0_pcopy_code[] = {
+ 	0x3847c701,
+ 	0xf40170b6,
+ 	0x84bd0232,
++/* 0x01da: ncomp_loop */
+ 	0x4ac494bd,
+ 	0x0445b60f,
++/* 0x01e2: bpc_loop */
+ 	0xa430b4bd,
+ 	0x0f18f404,
+ 	0xbbc0a5ff,
+ 	0x31f400cb,
+ 	0x220ef402,
++/* 0x01f4: cmp_c0 */
+ 	0xf00c1bf4,
+ 	0xcbbb10c7,
+ 	0x160ef400,
++/* 0x0200: cmp_c1 */
+ 	0xf406a430,
+ 	0xc7f00c18,
+ 	0x00cbbb14,
++/* 0x020f: cmp_zero */
+ 	0xf1070ef4,
++/* 0x0213: bpc_next */
+ 	0x380080c7,
+ 	0x80b601c8,
+ 	0x01b0b601,
+@@ -283,6 +344,7 @@ uint32_t nvc0_pcopy_code[] = {
+ 	0x1d08980e,
+ 	0xf40068fd,
+ 	0x64bd0502,
++/* 0x023c: dst_xcnt */
+ 	0x800075fd,
+ 	0x78fd1907,
+ 	0x1057f100,
+@@ -307,15 +369,18 @@ uint32_t nvc0_pcopy_code[] = {
+ 	0x1c069800,
+ 	0xf44056d0,
+ 	0x00f81030,
++/* 0x029c: cmd_exec_set_surface_tiled */
+ 	0xc7075798,
+ 	0x78c76879,
+ 	0x0380b664,
+ 	0xb06077c7,
+ 	0x1bf40e76,
+ 	0x0477f009,
++/* 0x02b7: xtile64 */
+ 	0xf00f0ef4,
+ 	0x70b6027c,
+ 	0x0947fd11,
++/* 0x02c3: xtileok */
+ 	0x980677f0,
+ 	0x5b980c5a,
+ 	0x00abfd0e,
+@@ -374,6 +439,7 @@ uint32_t nvc0_pcopy_code[] = {
+ 	0xb70067d0,
+ 	0xd0040060,
+ 	0x00f80068,
++/* 0x03a8: cmd_exec_set_surface_linear */
+ 	0xb7026cf0,
+ 	0xb6020260,
+ 	0x57980864,
+@@ -384,12 +450,15 @@ uint32_t nvc0_pcopy_code[] = {
+ 	0x0060b700,
+ 	0x06579804,
+ 	0xf80067d0,
++/* 0x03d1: cmd_exec_wait */
+ 	0xf900f900,
+ 	0x0007f110,
+ 	0x0604b608,
++/* 0x03dc: loop */
+ 	0xf00001cf,
+ 	0x1bf40114,
+ 	0xfc10fcfa,
++/* 0x03eb: cmd_exec_query */
+ 	0xc800f800,
+ 	0x1bf40d34,
+ 	0xd121f570,
+@@ -419,6 +488,7 @@ uint32_t nvc0_pcopy_code[] = {
+ 	0x0153f026,
+ 	0x080047f1,
+ 	0xd00644b6,
++/* 0x045e: query_counter */
+ 	0x21f50045,
+ 	0x47f103d1,
+ 	0x44b6080c,
+@@ -442,11 +512,13 @@ uint32_t nvc0_pcopy_code[] = {
+ 	0x080047f1,
+ 	0xd00644b6,
+ 	0x00f80045,
++/* 0x04b8: cmd_exec */
+ 	0x03d121f5,
+ 	0xf4003fc8,
+ 	0x21f50e0b,
+ 	0x47f101af,
+ 	0x0ef40200,
++/* 0x04cd: cmd_exec_no_format */
+ 	0x1067f11e,
+ 	0x0664b608,
+ 	0x800177f0,
+@@ -454,18 +526,23 @@ uint32_t nvc0_pcopy_code[] = {
+ 	0x1d079819,
+ 	0xd00067d0,
+ 	0x44bd4067,
++/* 0x04e8: cmd_exec_init_src_surface */
+ 	0xbd0232f4,
+ 	0x043fc854,
+ 	0xf50a0bf4,
+ 	0xf403a821,
++/* 0x04fa: src_tiled */
+ 	0x21f50a0e,
+ 	0x49f0029c,
++/* 0x0501: cmd_exec_init_dst_surface */
+ 	0x0231f407,
+ 	0xc82c57f0,
+ 	0x0bf4083f,
+ 	0xa821f50a,
+ 	0x0a0ef403,
++/* 0x0514: dst_tiled */
+ 	0x029c21f5,
++/* 0x051b: cmd_exec_kick */
+ 	0xf10849f0,
+ 	0xb6080057,
+ 	0x06980654,
+@@ -475,7 +552,9 @@ uint32_t nvc0_pcopy_code[] = {
+ 	0x54d00546,
+ 	0x0c3fc800,
+ 	0xf5070bf4,
++/* 0x053f: cmd_exec_done */
+ 	0xf803eb21,
++/* 0x0541: cmd_wrcache_flush */
+ 	0x0027f100,
+ 	0xf034bd22,
+ 	0x23d00133,
+diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
+index 552b436..3254d51 100644
+--- a/drivers/gpu/drm/radeon/atombios_dp.c
++++ b/drivers/gpu/drm/radeon/atombios_dp.c
+@@ -22,6 +22,7 @@
+  *
+  * Authors: Dave Airlie
+  *          Alex Deucher
++ *          Jerome Glisse
+  */
+ #include "drmP.h"
+ #include "radeon_drm.h"
+@@ -634,7 +635,6 @@ static bool radeon_dp_get_link_status(struct radeon_connector *radeon_connector,
+ 	ret = radeon_dp_aux_native_read(radeon_connector, DP_LANE0_1_STATUS,
+ 					link_status, DP_LINK_STATUS_SIZE, 100);
+ 	if (ret <= 0) {
+-		DRM_ERROR("displayport link status failed\n");
+ 		return false;
+ 	}
+ 
+@@ -812,8 +812,10 @@ static int radeon_dp_link_train_cr(struct radeon_dp_link_train_info *dp_info)
+ 		else
+ 			mdelay(dp_info->rd_interval * 4);
+ 
+-		if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status))
++		if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) {
++			DRM_ERROR("displayport link status failed\n");
+ 			break;
++		}
+ 
+ 		if (dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) {
+ 			clock_recovery = true;
+@@ -875,8 +877,10 @@ static int radeon_dp_link_train_ce(struct radeon_dp_link_train_info *dp_info)
+ 		else
+ 			mdelay(dp_info->rd_interval * 4);
+ 
+-		if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status))
++		if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) {
++			DRM_ERROR("displayport link status failed\n");
+ 			break;
++		}
+ 
+ 		if (dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) {
+ 			channel_eq = true;
+diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
+index 4a4493f..87d494d 100644
+--- a/drivers/gpu/drm/radeon/radeon_connectors.c
++++ b/drivers/gpu/drm/radeon/radeon_connectors.c
+@@ -64,14 +64,33 @@ void radeon_connector_hotplug(struct drm_connector *connector)
+ 
+ 	/* just deal with DP (not eDP) here. */
+ 	if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
+-		int saved_dpms = connector->dpms;
+-
+-		/* Only turn off the display it it's physically disconnected */
+-		if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd))
+-			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+-		else if (radeon_dp_needs_link_train(radeon_connector))
+-			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
+-		connector->dpms = saved_dpms;
++		struct radeon_connector_atom_dig *dig_connector =
++			radeon_connector->con_priv;
++
++		/* if existing sink type was not DP no need to retrain */
++		if (dig_connector->dp_sink_type != CONNECTOR_OBJECT_ID_DISPLAYPORT)
++			return;
++
++		/* first get sink type as it may be reset after (un)plug */
++		dig_connector->dp_sink_type = radeon_dp_getsinktype(radeon_connector);
++		/* don't do anything if sink is not display port, i.e.,
++		 * passive dp->(dvi|hdmi) adaptor
++		 */
++		if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
++			int saved_dpms = connector->dpms;
++			/* Only turn off the display if it's physically disconnected */
++			if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) {
++				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
++			} else if (radeon_dp_needs_link_train(radeon_connector)) {
++				/* set it to OFF so that drm_helper_connector_dpms()
++				 * won't return immediately since the current state
++				 * is ON at this point.
++				 */
++				connector->dpms = DRM_MODE_DPMS_OFF;
++				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
++			}
++			connector->dpms = saved_dpms;
++		}
+ 	}
+ }
+ 
+diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
+index 986d608..2132109 100644
+--- a/drivers/gpu/drm/radeon/radeon_cursor.c
++++ b/drivers/gpu/drm/radeon/radeon_cursor.c
+@@ -257,8 +257,14 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
+ 				if (!(cursor_end & 0x7f))
+ 					w--;
+ 			}
+-			if (w <= 0)
++			if (w <= 0) {
+ 				w = 1;
++				cursor_end = x - xorigin + w;
++				if (!(cursor_end & 0x7f)) {
++					x--;
++					WARN_ON_ONCE(x < 0);
++				}
++			}
+ 		}
+ 	}
+ 
+diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
+index f3ae607..39497c7 100644
+--- a/drivers/gpu/drm/radeon/radeon_object.c
++++ b/drivers/gpu/drm/radeon/radeon_object.c
+@@ -117,7 +117,6 @@ int radeon_bo_create(struct radeon_device *rdev,
+ 		return -ENOMEM;
+ 	}
+ 
+-retry:
+ 	bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL);
+ 	if (bo == NULL)
+ 		return -ENOMEM;
+@@ -130,6 +129,8 @@ retry:
+ 	bo->gem_base.driver_private = NULL;
+ 	bo->surface_reg = -1;
+ 	INIT_LIST_HEAD(&bo->list);
++
++retry:
+ 	radeon_ttm_placement_from_domain(bo, domain);
+ 	/* Kernel allocation are uninterruptible */
+ 	mutex_lock(&rdev->vram_mutex);
+diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
+index a1b8caa..0f074e0 100644
+--- a/drivers/iommu/amd_iommu.c
++++ b/drivers/iommu/amd_iommu.c
+@@ -1865,6 +1865,11 @@ static int device_change_notifier(struct notifier_block *nb,
+ 
+ 		iommu_init_device(dev);
+ 
++		if (iommu_pass_through) {
++			attach_device(dev, pt_domain);
++			break;
++		}
++
+ 		domain = domain_for_device(dev);
+ 
+ 		/* allocate a protection domain if a device is added */
+@@ -1880,10 +1885,7 @@ static int device_change_notifier(struct notifier_block *nb,
+ 		list_add_tail(&dma_domain->list, &iommu_pd_list);
+ 		spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
+ 
+-		if (!iommu_pass_through)
+-			dev->archdata.dma_ops = &amd_iommu_dma_ops;
+-		else
+-			dev->archdata.dma_ops = &nommu_dma_ops;
++		dev->archdata.dma_ops = &amd_iommu_dma_ops;
+ 
+ 		break;
+ 	case BUS_NOTIFY_DEL_DEVICE:
+diff --git a/drivers/media/video/cx25821/cx25821-core.c b/drivers/media/video/cx25821/cx25821-core.c
+index a7fa38f..e572ce5 100644
+--- a/drivers/media/video/cx25821/cx25821-core.c
++++ b/drivers/media/video/cx25821/cx25821-core.c
+@@ -914,9 +914,6 @@ static int cx25821_dev_setup(struct cx25821_dev *dev)
+ 	list_add_tail(&dev->devlist, &cx25821_devlist);
+ 	mutex_unlock(&cx25821_devlist_mutex);
+ 
+-	strcpy(cx25821_boards[UNKNOWN_BOARD].name, "unknown");
+-	strcpy(cx25821_boards[CX25821_BOARD].name, "cx25821");
+-
+ 	if (dev->pci->device != 0x8210) {
+ 		pr_info("%s(): Exiting. Incorrect Hardware device = 0x%02x\n",
+ 			__func__, dev->pci->device);
+diff --git a/drivers/media/video/cx25821/cx25821.h b/drivers/media/video/cx25821/cx25821.h
+index 2d2d009..bf54360 100644
+--- a/drivers/media/video/cx25821/cx25821.h
++++ b/drivers/media/video/cx25821/cx25821.h
+@@ -187,7 +187,7 @@ enum port {
+ };
+ 
+ struct cx25821_board {
+-	char *name;
++	const char *name;
+ 	enum port porta;
+ 	enum port portb;
+ 	enum port portc;
+diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
+index 6878a94..83b51b5 100644
+--- a/drivers/mmc/host/sdhci-pci.c
++++ b/drivers/mmc/host/sdhci-pci.c
+@@ -148,6 +148,7 @@ static const struct sdhci_pci_fixes sdhci_ene_714 = {
+ static const struct sdhci_pci_fixes sdhci_cafe = {
+ 	.quirks		= SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER |
+ 			  SDHCI_QUIRK_NO_BUSY_IRQ |
++			  SDHCI_QUIRK_BROKEN_CARD_DETECTION |
+ 			  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
+ };
+ 
+diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
+index 9e61d6b..ed1be8a 100644
+--- a/drivers/net/ethernet/realtek/r8169.c
++++ b/drivers/net/ethernet/realtek/r8169.c
+@@ -3770,6 +3770,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
+ 	case RTL_GIGA_MAC_VER_22:
+ 	case RTL_GIGA_MAC_VER_23:
+ 	case RTL_GIGA_MAC_VER_24:
++	case RTL_GIGA_MAC_VER_34:
+ 		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
+ 		break;
+ 	default:
+diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
+index 01dcb1a..727c129 100644
+--- a/drivers/net/wireless/mwifiex/cfg80211.c
++++ b/drivers/net/wireless/mwifiex/cfg80211.c
+@@ -545,9 +545,9 @@ mwifiex_dump_station_info(struct mwifiex_private *priv,
+ 
+ 	/*
+ 	 * Bit 0 in tx_htinfo indicates that current Tx rate is 11n rate. Valid
+-	 * MCS index values for us are 0 to 7.
++	 * MCS index values for us are 0 to 15.
+ 	 */
+-	if ((priv->tx_htinfo & BIT(0)) && (priv->tx_rate < 8)) {
++	if ((priv->tx_htinfo & BIT(0)) && (priv->tx_rate < 16)) {
+ 		sinfo->txrate.mcs = priv->tx_rate;
+ 		sinfo->txrate.flags |= RATE_INFO_FLAGS_MCS;
+ 		/* 40MHz rate */
+diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
+index 0ffa111..bdf960b 100644
+--- a/drivers/net/wireless/rt2x00/rt2800usb.c
++++ b/drivers/net/wireless/rt2x00/rt2800usb.c
+@@ -876,6 +876,7 @@ static struct usb_device_id rt2800usb_device_table[] = {
+ 	{ USB_DEVICE(0x1482, 0x3c09) },
+ 	/* AirTies */
+ 	{ USB_DEVICE(0x1eda, 0x2012) },
++	{ USB_DEVICE(0x1eda, 0x2210) },
+ 	{ USB_DEVICE(0x1eda, 0x2310) },
+ 	/* Allwin */
+ 	{ USB_DEVICE(0x8516, 0x2070) },
+@@ -945,6 +946,7 @@ static struct usb_device_id rt2800usb_device_table[] = {
+ 	/* DVICO */
+ 	{ USB_DEVICE(0x0fe9, 0xb307) },
+ 	/* Edimax */
++	{ USB_DEVICE(0x7392, 0x4085) },
+ 	{ USB_DEVICE(0x7392, 0x7711) },
+ 	{ USB_DEVICE(0x7392, 0x7717) },
+ 	{ USB_DEVICE(0x7392, 0x7718) },
+@@ -1020,6 +1022,7 @@ static struct usb_device_id rt2800usb_device_table[] = {
+ 	/* Philips */
+ 	{ USB_DEVICE(0x0471, 0x200f) },
+ 	/* Planex */
++	{ USB_DEVICE(0x2019, 0x5201) },
+ 	{ USB_DEVICE(0x2019, 0xab25) },
+ 	{ USB_DEVICE(0x2019, 0xed06) },
+ 	/* Quanta */
+@@ -1088,6 +1091,12 @@ static struct usb_device_id rt2800usb_device_table[] = {
+ #ifdef CONFIG_RT2800USB_RT33XX
+ 	/* Belkin */
+ 	{ USB_DEVICE(0x050d, 0x945b) },
++	/* D-Link */
++	{ USB_DEVICE(0x2001, 0x3c17) },
++	/* Panasonic */
++	{ USB_DEVICE(0x083a, 0xb511) },
++	/* Philips */
++	{ USB_DEVICE(0x0471, 0x20dd) },
+ 	/* Ralink */
+ 	{ USB_DEVICE(0x148f, 0x3370) },
+ 	{ USB_DEVICE(0x148f, 0x8070) },
+@@ -1099,6 +1108,8 @@ static struct usb_device_id rt2800usb_device_table[] = {
+ 	{ USB_DEVICE(0x8516, 0x3572) },
+ 	/* Askey */
+ 	{ USB_DEVICE(0x1690, 0x0744) },
++	{ USB_DEVICE(0x1690, 0x0761) },
++	{ USB_DEVICE(0x1690, 0x0764) },
+ 	/* Cisco */
+ 	{ USB_DEVICE(0x167b, 0x4001) },
+ 	/* EnGenius */
+@@ -1113,6 +1124,9 @@ static struct usb_device_id rt2800usb_device_table[] = {
+ 	/* Sitecom */
+ 	{ USB_DEVICE(0x0df6, 0x0041) },
+ 	{ USB_DEVICE(0x0df6, 0x0062) },
++	{ USB_DEVICE(0x0df6, 0x0065) },
++	{ USB_DEVICE(0x0df6, 0x0066) },
++	{ USB_DEVICE(0x0df6, 0x0068) },
+ 	/* Toshiba */
+ 	{ USB_DEVICE(0x0930, 0x0a07) },
+ 	/* Zinwell */
+@@ -1122,6 +1136,9 @@ static struct usb_device_id rt2800usb_device_table[] = {
+ 	/* Azurewave */
+ 	{ USB_DEVICE(0x13d3, 0x3329) },
+ 	{ USB_DEVICE(0x13d3, 0x3365) },
++	/* D-Link */
++	{ USB_DEVICE(0x2001, 0x3c1c) },
++	{ USB_DEVICE(0x2001, 0x3c1d) },
+ 	/* Ralink */
+ 	{ USB_DEVICE(0x148f, 0x5370) },
+ 	{ USB_DEVICE(0x148f, 0x5372) },
+@@ -1163,13 +1180,8 @@ static struct usb_device_id rt2800usb_device_table[] = {
+ 	/* D-Link */
+ 	{ USB_DEVICE(0x07d1, 0x3c0b) },
+ 	{ USB_DEVICE(0x07d1, 0x3c17) },
+-	{ USB_DEVICE(0x2001, 0x3c17) },
+-	/* Edimax */
+-	{ USB_DEVICE(0x7392, 0x4085) },
+ 	/* Encore */
+ 	{ USB_DEVICE(0x203d, 0x14a1) },
+-	/* Fujitsu Stylistic 550 */
+-	{ USB_DEVICE(0x1690, 0x0761) },
+ 	/* Gemtek */
+ 	{ USB_DEVICE(0x15a9, 0x0010) },
+ 	/* Gigabyte */
+@@ -1190,7 +1202,6 @@ static struct usb_device_id rt2800usb_device_table[] = {
+ 	{ USB_DEVICE(0x05a6, 0x0101) },
+ 	{ USB_DEVICE(0x1d4d, 0x0010) },
+ 	/* Planex */
+-	{ USB_DEVICE(0x2019, 0x5201) },
+ 	{ USB_DEVICE(0x2019, 0xab24) },
+ 	/* Qcom */
+ 	{ USB_DEVICE(0x18e8, 0x6259) },
+diff --git a/drivers/net/wireless/rtlwifi/rtl8192de/phy.c b/drivers/net/wireless/rtlwifi/rtl8192de/phy.c
+index 2cf4c5f..de9faa9 100644
+--- a/drivers/net/wireless/rtlwifi/rtl8192de/phy.c
++++ b/drivers/net/wireless/rtlwifi/rtl8192de/phy.c
+@@ -3462,21 +3462,21 @@ void rtl92d_phy_config_macphymode_info(struct ieee80211_hw *hw)
+ 	switch (rtlhal->macphymode) {
+ 	case DUALMAC_SINGLEPHY:
+ 		rtlphy->rf_type = RF_2T2R;
+-		rtlhal->version |= CHIP_92D_SINGLEPHY;
++		rtlhal->version |= RF_TYPE_2T2R;
+ 		rtlhal->bandset = BAND_ON_BOTH;
+ 		rtlhal->current_bandtype = BAND_ON_2_4G;
+ 		break;
+ 
+ 	case SINGLEMAC_SINGLEPHY:
+ 		rtlphy->rf_type = RF_2T2R;
+-		rtlhal->version |= CHIP_92D_SINGLEPHY;
++		rtlhal->version |= RF_TYPE_2T2R;
+ 		rtlhal->bandset = BAND_ON_BOTH;
+ 		rtlhal->current_bandtype = BAND_ON_2_4G;
+ 		break;
+ 
+ 	case DUALMAC_DUALPHY:
+ 		rtlphy->rf_type = RF_1T1R;
+-		rtlhal->version &= (~CHIP_92D_SINGLEPHY);
++		rtlhal->version &= RF_TYPE_1T1R;
+ 		/* Now we let MAC0 run on 5G band. */
+ 		if (rtlhal->interfaceindex == 0) {
+ 			rtlhal->bandset = BAND_ON_5G;
+diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
+index 351dc0b..ee77a58 100644
+--- a/drivers/scsi/hosts.c
++++ b/drivers/scsi/hosts.c
+@@ -287,6 +287,7 @@ static void scsi_host_dev_release(struct device *dev)
+ 	struct Scsi_Host *shost = dev_to_shost(dev);
+ 	struct device *parent = dev->parent;
+ 	struct request_queue *q;
++	void *queuedata;
+ 
+ 	scsi_proc_hostdir_rm(shost->hostt);
+ 
+@@ -296,9 +297,9 @@ static void scsi_host_dev_release(struct device *dev)
+ 		destroy_workqueue(shost->work_q);
+ 	q = shost->uspace_req_q;
+ 	if (q) {
+-		kfree(q->queuedata);
+-		q->queuedata = NULL;
+-		scsi_free_queue(q);
++		queuedata = q->queuedata;
++		blk_cleanup_queue(q);
++		kfree(queuedata);
+ 	}
+ 
+ 	scsi_destroy_command_freelist(shost);
+diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
+index e48ba4b..dbe3568 100644
+--- a/drivers/scsi/libsas/sas_expander.c
++++ b/drivers/scsi/libsas/sas_expander.c
+@@ -774,7 +774,7 @@ static struct domain_device *sas_ex_discover_end_dev(
+ }
+ 
+ /* See if this phy is part of a wide port */
+-static int sas_ex_join_wide_port(struct domain_device *parent, int phy_id)
++static bool sas_ex_join_wide_port(struct domain_device *parent, int phy_id)
+ {
+ 	struct ex_phy *phy = &parent->ex_dev.ex_phy[phy_id];
+ 	int i;
+@@ -790,11 +790,11 @@ static int sas_ex_join_wide_port(struct domain_device *parent, int phy_id)
+ 			sas_port_add_phy(ephy->port, phy->phy);
+ 			phy->port = ephy->port;
+ 			phy->phy_state = PHY_DEVICE_DISCOVERED;
+-			return 0;
++			return true;
+ 		}
+ 	}
+ 
+-	return -ENODEV;
++	return false;
+ }
+ 
+ static struct domain_device *sas_ex_discover_expander(
+@@ -932,8 +932,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id)
+ 		return res;
+ 	}
+ 
+-	res = sas_ex_join_wide_port(dev, phy_id);
+-	if (!res) {
++	if (sas_ex_join_wide_port(dev, phy_id)) {
+ 		SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n",
+ 			    phy_id, SAS_ADDR(ex_phy->attached_sas_addr));
+ 		return res;
+@@ -978,8 +977,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id)
+ 			if (SAS_ADDR(ex->ex_phy[i].attached_sas_addr) ==
+ 			    SAS_ADDR(child->sas_addr)) {
+ 				ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED;
+-				res = sas_ex_join_wide_port(dev, i);
+-				if (!res)
++				if (sas_ex_join_wide_port(dev, i))
+ 					SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n",
+ 						    i, SAS_ADDR(ex->ex_phy[i].attached_sas_addr));
+ 
+@@ -1849,32 +1847,20 @@ static int sas_discover_new(struct domain_device *dev, int phy_id)
+ {
+ 	struct ex_phy *ex_phy = &dev->ex_dev.ex_phy[phy_id];
+ 	struct domain_device *child;
+-	bool found = false;
+-	int res, i;
++	int res;
+ 
+ 	SAS_DPRINTK("ex %016llx phy%d new device attached\n",
+ 		    SAS_ADDR(dev->sas_addr), phy_id);
+ 	res = sas_ex_phy_discover(dev, phy_id);
+ 	if (res)
+-		goto out;
+-	/* to support the wide port inserted */
+-	for (i = 0; i < dev->ex_dev.num_phys; i++) {
+-		struct ex_phy *ex_phy_temp = &dev->ex_dev.ex_phy[i];
+-		if (i == phy_id)
+-			continue;
+-		if (SAS_ADDR(ex_phy_temp->attached_sas_addr) ==
+-		    SAS_ADDR(ex_phy->attached_sas_addr)) {
+-			found = true;
+-			break;
+-		}
+-	}
+-	if (found) {
+-		sas_ex_join_wide_port(dev, phy_id);
++		return res;
++
++	if (sas_ex_join_wide_port(dev, phy_id))
+ 		return 0;
+-	}
++
+ 	res = sas_ex_discover_devices(dev, phy_id);
+-	if (!res)
+-		goto out;
++	if (res)
++		return res;
+ 	list_for_each_entry(child, &dev->ex_dev.children, siblings) {
+ 		if (SAS_ADDR(child->sas_addr) ==
+ 		    SAS_ADDR(ex_phy->attached_sas_addr)) {
+@@ -1884,7 +1870,6 @@ static int sas_discover_new(struct domain_device *dev, int phy_id)
+ 			break;
+ 		}
+ 	}
+-out:
+ 	return res;
+ }
+ 
+@@ -1983,9 +1968,7 @@ int sas_ex_revalidate_domain(struct domain_device *port_dev)
+ 	struct domain_device *dev = NULL;
+ 
+ 	res = sas_find_bcast_dev(port_dev, &dev);
+-	if (res)
+-		goto out;
+-	if (dev) {
++	while (res == 0 && dev) {
+ 		struct expander_device *ex = &dev->ex_dev;
+ 		int i = 0, phy_id;
+ 
+@@ -1997,8 +1980,10 @@ int sas_ex_revalidate_domain(struct domain_device *port_dev)
+ 			res = sas_rediscover(dev, phy_id);
+ 			i = phy_id + 1;
+ 		} while (i < ex->num_phys);
++
++		dev = NULL;
++		res = sas_find_bcast_dev(port_dev, &dev);
+ 	}
+-out:
+ 	return res;
+ }
+ 
+diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
+index 2aeb2e9..831db24 100644
+--- a/drivers/scsi/scsi.c
++++ b/drivers/scsi/scsi.c
+@@ -785,7 +785,13 @@ static void scsi_done(struct scsi_cmnd *cmd)
+ /* Move this to a header if it becomes more generally useful */
+ static struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd)
+ {
+-	return *(struct scsi_driver **)cmd->request->rq_disk->private_data;
++	struct scsi_driver **sdp;
++
++	sdp = (struct scsi_driver **)cmd->request->rq_disk->private_data;
++	if (!sdp)
++		return NULL;
++
++	return *sdp;
+ }
+ 
+ /**
+diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
+index dc6131e..456b131 100644
+--- a/drivers/scsi/scsi_error.c
++++ b/drivers/scsi/scsi_error.c
+@@ -1673,6 +1673,20 @@ static void scsi_restart_operations(struct Scsi_Host *shost)
+ 	 * requests are started.
+ 	 */
+ 	scsi_run_host_queues(shost);
++
++	/*
++	 * if eh is active and host_eh_scheduled is pending we need to re-run
++	 * recovery.  we do this check after scsi_run_host_queues() to allow
++	 * everything pent up since the last eh run a chance to make forward
++	 * progress before we sync again.  Either we'll immediately re-run
++	 * recovery or scsi_device_unbusy() will wake us again when these
++	 * pending commands complete.
++	 */
++	spin_lock_irqsave(shost->host_lock, flags);
++	if (shost->host_eh_scheduled)
++		if (scsi_host_set_state(shost, SHOST_RECOVERY))
++			WARN_ON(scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY));
++	spin_unlock_irqrestore(shost->host_lock, flags);
+ }
+ 
+ /**
+diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
+index f0ab58e..6c4b620 100644
+--- a/drivers/scsi/scsi_lib.c
++++ b/drivers/scsi/scsi_lib.c
+@@ -406,10 +406,6 @@ static void scsi_run_queue(struct request_queue *q)
+ 	LIST_HEAD(starved_list);
+ 	unsigned long flags;
+ 
+-	/* if the device is dead, sdev will be NULL, so no queue to run */
+-	if (!sdev)
+-		return;
+-
+ 	shost = sdev->host;
+ 	if (scsi_target(sdev)->single_lun)
+ 		scsi_single_lun_run(sdev);
+@@ -483,15 +479,26 @@ void scsi_requeue_run_queue(struct work_struct *work)
+  */
+ static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd)
+ {
++	struct scsi_device *sdev = cmd->device;
+ 	struct request *req = cmd->request;
+ 	unsigned long flags;
+ 
++	/*
++	 * We need to hold a reference on the device to avoid the queue being
++	 * killed after the unlock and before scsi_run_queue is invoked which
++	 * may happen because scsi_unprep_request() puts the command which
++	 * releases its reference on the device.
++	 */
++	get_device(&sdev->sdev_gendev);
++
+ 	spin_lock_irqsave(q->queue_lock, flags);
+ 	scsi_unprep_request(req);
+ 	blk_requeue_request(q, req);
+ 	spin_unlock_irqrestore(q->queue_lock, flags);
+ 
+ 	scsi_run_queue(q);
++
++	put_device(&sdev->sdev_gendev);
+ }
+ 
+ void scsi_next_command(struct scsi_cmnd *cmd)
+@@ -1374,16 +1381,16 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
+  * may be changed after request stacking drivers call the function,
+  * regardless of taking lock or not.
+  *
+- * When scsi can't dispatch I/Os anymore and needs to kill I/Os
+- * (e.g. !sdev), scsi needs to return 'not busy'.
+- * Otherwise, request stacking drivers may hold requests forever.
++ * When scsi can't dispatch I/Os anymore and needs to kill I/Os scsi
++ * needs to return 'not busy'. Otherwise, request stacking drivers
++ * may hold requests forever.
+  */
+ static int scsi_lld_busy(struct request_queue *q)
+ {
+ 	struct scsi_device *sdev = q->queuedata;
+ 	struct Scsi_Host *shost;
+ 
+-	if (!sdev)
++	if (blk_queue_dead(q))
+ 		return 0;
+ 
+ 	shost = sdev->host;
+@@ -1494,12 +1501,6 @@ static void scsi_request_fn(struct request_queue *q)
+ 	struct scsi_cmnd *cmd;
+ 	struct request *req;
+ 
+-	if (!sdev) {
+-		while ((req = blk_peek_request(q)) != NULL)
+-			scsi_kill_request(req, q);
+-		return;
+-	}
+-
+ 	if(!get_device(&sdev->sdev_gendev))
+ 		/* We must be tearing the block queue down already */
+ 		return;
+@@ -1701,20 +1702,6 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
+ 	return q;
+ }
+ 
+-void scsi_free_queue(struct request_queue *q)
+-{
+-	unsigned long flags;
+-
+-	WARN_ON(q->queuedata);
+-
+-	/* cause scsi_request_fn() to kill all non-finished requests */
+-	spin_lock_irqsave(q->queue_lock, flags);
+-	q->request_fn(q);
+-	spin_unlock_irqrestore(q->queue_lock, flags);
+-
+-	blk_cleanup_queue(q);
+-}
+-
+ /*
+  * Function:    scsi_block_requests()
+  *
+diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
+index 5b475d0..d58adca 100644
+--- a/drivers/scsi/scsi_priv.h
++++ b/drivers/scsi/scsi_priv.h
+@@ -85,7 +85,6 @@ extern void scsi_next_command(struct scsi_cmnd *cmd);
+ extern void scsi_io_completion(struct scsi_cmnd *, unsigned int);
+ extern void scsi_run_host_queues(struct Scsi_Host *shost);
+ extern struct request_queue *scsi_alloc_queue(struct scsi_device *sdev);
+-extern void scsi_free_queue(struct request_queue *q);
+ extern int scsi_init_queue(void);
+ extern void scsi_exit_queue(void);
+ struct request_queue;
+diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
+index 6e7ea4a..a48b59c 100644
+--- a/drivers/scsi/scsi_scan.c
++++ b/drivers/scsi/scsi_scan.c
+@@ -1710,6 +1710,9 @@ static void scsi_sysfs_add_devices(struct Scsi_Host *shost)
+ {
+ 	struct scsi_device *sdev;
+ 	shost_for_each_device(sdev, shost) {
++		/* target removed before the device could be added */
++		if (sdev->sdev_state == SDEV_DEL)
++			continue;
+ 		if (!scsi_host_scan_allowed(shost) ||
+ 		    scsi_sysfs_add_sdev(sdev) != 0)
+ 			__scsi_remove_device(sdev);
+diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
+index 04c2a27..bb7c482 100644
+--- a/drivers/scsi/scsi_sysfs.c
++++ b/drivers/scsi/scsi_sysfs.c
+@@ -971,11 +971,8 @@ void __scsi_remove_device(struct scsi_device *sdev)
+ 		sdev->host->hostt->slave_destroy(sdev);
+ 	transport_destroy_device(dev);
+ 
+-	/* cause the request function to reject all I/O requests */
+-	sdev->request_queue->queuedata = NULL;
+-
+ 	/* Freeing the queue signals to block that we're done */
+-	scsi_free_queue(sdev->request_queue);
++	blk_cleanup_queue(sdev->request_queue);
+ 	put_device(dev);
+ }
+ 
+@@ -1000,7 +997,6 @@ static void __scsi_remove_target(struct scsi_target *starget)
+ 	struct scsi_device *sdev;
+ 
+ 	spin_lock_irqsave(shost->host_lock, flags);
+-	starget->reap_ref++;
+  restart:
+ 	list_for_each_entry(sdev, &shost->__devices, siblings) {
+ 		if (sdev->channel != starget->channel ||
+@@ -1014,14 +1010,6 @@ static void __scsi_remove_target(struct scsi_target *starget)
+ 		goto restart;
+ 	}
+ 	spin_unlock_irqrestore(shost->host_lock, flags);
+-	scsi_target_reap(starget);
+-}
+-
+-static int __remove_child (struct device * dev, void * data)
+-{
+-	if (scsi_is_target_device(dev))
+-		__scsi_remove_target(to_scsi_target(dev));
+-	return 0;
+ }
+ 
+ /**
+@@ -1034,14 +1022,34 @@ static int __remove_child (struct device * dev, void * data)
+  */
+ void scsi_remove_target(struct device *dev)
+ {
+-	if (scsi_is_target_device(dev)) {
+-		__scsi_remove_target(to_scsi_target(dev));
+-		return;
++	struct Scsi_Host *shost = dev_to_shost(dev->parent);
++	struct scsi_target *starget, *found;
++	unsigned long flags;
++
++ restart:
++	found = NULL;
++	spin_lock_irqsave(shost->host_lock, flags);
++	list_for_each_entry(starget, &shost->__targets, siblings) {
++		if (starget->state == STARGET_DEL)
++			continue;
++		if (starget->dev.parent == dev || &starget->dev == dev) {
++			found = starget;
++			found->reap_ref++;
++			break;
++		}
+ 	}
++	spin_unlock_irqrestore(shost->host_lock, flags);
+ 
+-	get_device(dev);
+-	device_for_each_child(dev, NULL, __remove_child);
+-	put_device(dev);
++	if (found) {
++		__scsi_remove_target(found);
++		scsi_target_reap(found);
++		/* in the case where @dev has multiple starget children,
++		 * continue removing.
++		 *
++		 * FIXME: does such a case exist?
++		 */
++		goto restart;
++	}
+ }
+ EXPORT_SYMBOL(scsi_remove_target);
+ 
+diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
+index 0842cc7..2ff1255 100644
+--- a/drivers/target/iscsi/iscsi_target.c
++++ b/drivers/target/iscsi/iscsi_target.c
+@@ -427,19 +427,8 @@ int iscsit_reset_np_thread(
+ 
+ int iscsit_del_np_comm(struct iscsi_np *np)
+ {
+-	if (!np->np_socket)
+-		return 0;
+-
+-	/*
+-	 * Some network transports allocate their own struct sock->file,
+-	 * see  if we need to free any additional allocated resources.
+-	 */
+-	if (np->np_flags & NPF_SCTP_STRUCT_FILE) {
+-		kfree(np->np_socket->file);
+-		np->np_socket->file = NULL;
+-	}
+-
+-	sock_release(np->np_socket);
++	if (np->np_socket)
++		sock_release(np->np_socket);
+ 	return 0;
+ }
+ 
+@@ -4105,13 +4094,8 @@ int iscsit_close_connection(
+ 	kfree(conn->conn_ops);
+ 	conn->conn_ops = NULL;
+ 
+-	if (conn->sock) {
+-		if (conn->conn_flags & CONNFLAG_SCTP_STRUCT_FILE) {
+-			kfree(conn->sock->file);
+-			conn->sock->file = NULL;
+-		}
++	if (conn->sock)
+ 		sock_release(conn->sock);
+-	}
+ 	conn->thread_set = NULL;
+ 
+ 	pr_debug("Moving to TARG_CONN_STATE_FREE.\n");
+diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h
+index 7da2d6a..0f68197 100644
+--- a/drivers/target/iscsi/iscsi_target_core.h
++++ b/drivers/target/iscsi/iscsi_target_core.h
+@@ -224,7 +224,6 @@ enum iscsi_timer_flags_table {
+ /* Used for struct iscsi_np->np_flags */
+ enum np_flags_table {
+ 	NPF_IP_NETWORK		= 0x00,
+-	NPF_SCTP_STRUCT_FILE	= 0x01 /* Bugfix */
+ };
+ 
+ /* Used for struct iscsi_np->np_thread_state */
+@@ -511,7 +510,6 @@ struct iscsi_conn {
+ 	u16			local_port;
+ 	int			net_size;
+ 	u32			auth_id;
+-#define CONNFLAG_SCTP_STRUCT_FILE			0x01
+ 	u32			conn_flags;
+ 	/* Used for iscsi_tx_login_rsp() */
+ 	u32			login_itt;
+diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
+index bd2adec..2ec5339 100644
+--- a/drivers/target/iscsi/iscsi_target_login.c
++++ b/drivers/target/iscsi/iscsi_target_login.c
+@@ -793,22 +793,6 @@ int iscsi_target_setup_login_socket(
+ 	}
+ 	np->np_socket = sock;
+ 	/*
+-	 * The SCTP stack needs struct socket->file.
+-	 */
+-	if ((np->np_network_transport == ISCSI_SCTP_TCP) ||
+-	    (np->np_network_transport == ISCSI_SCTP_UDP)) {
+-		if (!sock->file) {
+-			sock->file = kzalloc(sizeof(struct file), GFP_KERNEL);
+-			if (!sock->file) {
+-				pr_err("Unable to allocate struct"
+-						" file for SCTP\n");
+-				ret = -ENOMEM;
+-				goto fail;
+-			}
+-			np->np_flags |= NPF_SCTP_STRUCT_FILE;
+-		}
+-	}
+-	/*
+ 	 * Setup the np->np_sockaddr from the passed sockaddr setup
+ 	 * in iscsi_target_configfs.c code..
+ 	 */
+@@ -857,21 +841,15 @@ int iscsi_target_setup_login_socket(
+ 
+ fail:
+ 	np->np_socket = NULL;
+-	if (sock) {
+-		if (np->np_flags & NPF_SCTP_STRUCT_FILE) {
+-			kfree(sock->file);
+-			sock->file = NULL;
+-		}
+-
++	if (sock)
+ 		sock_release(sock);
+-	}
+ 	return ret;
+ }
+ 
+ static int __iscsi_target_login_thread(struct iscsi_np *np)
+ {
+ 	u8 buffer[ISCSI_HDR_LEN], iscsi_opcode, zero_tsih = 0;
+-	int err, ret = 0, ip_proto, sock_type, set_sctp_conn_flag, stop;
++	int err, ret = 0, ip_proto, sock_type, stop;
+ 	struct iscsi_conn *conn = NULL;
+ 	struct iscsi_login *login;
+ 	struct iscsi_portal_group *tpg = NULL;
+@@ -882,7 +860,6 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
+ 	struct sockaddr_in6 sock_in6;
+ 
+ 	flush_signals(current);
+-	set_sctp_conn_flag = 0;
+ 	sock = np->np_socket;
+ 	ip_proto = np->np_ip_proto;
+ 	sock_type = np->np_sock_type;
+@@ -907,35 +884,12 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
+ 		spin_unlock_bh(&np->np_thread_lock);
+ 		goto out;
+ 	}
+-	/*
+-	 * The SCTP stack needs struct socket->file.
+-	 */
+-	if ((np->np_network_transport == ISCSI_SCTP_TCP) ||
+-	    (np->np_network_transport == ISCSI_SCTP_UDP)) {
+-		if (!new_sock->file) {
+-			new_sock->file = kzalloc(
+-					sizeof(struct file), GFP_KERNEL);
+-			if (!new_sock->file) {
+-				pr_err("Unable to allocate struct"
+-						" file for SCTP\n");
+-				sock_release(new_sock);
+-				/* Get another socket */
+-				return 1;
+-			}
+-			set_sctp_conn_flag = 1;
+-		}
+-	}
+-
+ 	iscsi_start_login_thread_timer(np);
+ 
+ 	conn = kzalloc(sizeof(struct iscsi_conn), GFP_KERNEL);
+ 	if (!conn) {
+ 		pr_err("Could not allocate memory for"
+ 			" new connection\n");
+-		if (set_sctp_conn_flag) {
+-			kfree(new_sock->file);
+-			new_sock->file = NULL;
+-		}
+ 		sock_release(new_sock);
+ 		/* Get another socket */
+ 		return 1;
+@@ -945,9 +899,6 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
+ 	conn->conn_state = TARG_CONN_STATE_FREE;
+ 	conn->sock = new_sock;
+ 
+-	if (set_sctp_conn_flag)
+-		conn->conn_flags |= CONNFLAG_SCTP_STRUCT_FILE;
+-
+ 	pr_debug("Moving to TARG_CONN_STATE_XPT_UP.\n");
+ 	conn->conn_state = TARG_CONN_STATE_XPT_UP;
+ 
+@@ -1195,13 +1146,8 @@ old_sess_out:
+ 		iscsi_release_param_list(conn->param_list);
+ 		conn->param_list = NULL;
+ 	}
+-	if (conn->sock) {
+-		if (conn->conn_flags & CONNFLAG_SCTP_STRUCT_FILE) {
+-			kfree(conn->sock->file);
+-			conn->sock->file = NULL;
+-		}
++	if (conn->sock)
+ 		sock_release(conn->sock);
+-	}
+ 	kfree(conn);
+ 
+ 	if (tpg) {
+diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c
+index 93b9406..717a8d4 100644
+--- a/drivers/target/target_core_cdb.c
++++ b/drivers/target/target_core_cdb.c
+@@ -1114,11 +1114,11 @@ int target_emulate_unmap(struct se_task *task)
+ 	struct se_cmd *cmd = task->task_se_cmd;
+ 	struct se_device *dev = cmd->se_dev;
+ 	unsigned char *buf, *ptr = NULL;
+-	unsigned char *cdb = &cmd->t_task_cdb[0];
+ 	sector_t lba;
+-	unsigned int size = cmd->data_length, range;
+-	int ret = 0, offset;
+-	unsigned short dl, bd_dl;
++	int size = cmd->data_length;
++	u32 range;
++	int ret = 0;
++	int dl, bd_dl;
+ 
+ 	if (!dev->transport->do_discard) {
+ 		pr_err("UNMAP emulation not supported for: %s\n",
+@@ -1127,24 +1127,41 @@ int target_emulate_unmap(struct se_task *task)
+ 		return -ENOSYS;
+ 	}
+ 
+-	/* First UNMAP block descriptor starts at 8 byte offset */
+-	offset = 8;
+-	size -= 8;
+-	dl = get_unaligned_be16(&cdb[0]);
+-	bd_dl = get_unaligned_be16(&cdb[2]);
+-
+ 	buf = transport_kmap_data_sg(cmd);
+ 
+-	ptr = &buf[offset];
+-	pr_debug("UNMAP: Sub: %s Using dl: %hu bd_dl: %hu size: %hu"
++	dl = get_unaligned_be16(&buf[0]);
++	bd_dl = get_unaligned_be16(&buf[2]);
++
++	size = min(size - 8, bd_dl);
++	if (size / 16 > dev->se_sub_dev->se_dev_attrib.max_unmap_block_desc_count) {
++		cmd->scsi_sense_reason = TCM_INVALID_PARAMETER_LIST;
++		ret = -EINVAL;
++		goto err;
++	}
++
++	/* First UNMAP block descriptor starts at 8 byte offset */
++	ptr = &buf[8];
++	pr_debug("UNMAP: Sub: %s Using dl: %u bd_dl: %u size: %u"
+ 		" ptr: %p\n", dev->transport->name, dl, bd_dl, size, ptr);
+ 
+-	while (size) {
++	while (size >= 16) {
+ 		lba = get_unaligned_be64(&ptr[0]);
+ 		range = get_unaligned_be32(&ptr[8]);
+ 		pr_debug("UNMAP: Using lba: %llu and range: %u\n",
+ 				 (unsigned long long)lba, range);
+ 
++		if (range > dev->se_sub_dev->se_dev_attrib.max_unmap_lba_count) {
++			cmd->scsi_sense_reason = TCM_INVALID_PARAMETER_LIST;
++			ret = -EINVAL;
++			goto err;
++		}
++
++		if (lba + range > dev->transport->get_blocks(dev) + 1) {
++			cmd->scsi_sense_reason = TCM_ADDRESS_OUT_OF_RANGE;
++			ret = -EINVAL;
++			goto err;
++		}
++
+ 		ret = dev->transport->do_discard(dev, lba, range);
+ 		if (ret < 0) {
+ 			pr_err("blkdev_issue_discard() failed: %d\n",
+diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
+index 5660916..94c03d2 100644
+--- a/drivers/target/target_core_transport.c
++++ b/drivers/target/target_core_transport.c
+@@ -1820,6 +1820,7 @@ static void transport_generic_request_failure(struct se_cmd *cmd)
+ 	case TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE:
+ 	case TCM_UNKNOWN_MODE_PAGE:
+ 	case TCM_WRITE_PROTECTED:
++	case TCM_ADDRESS_OUT_OF_RANGE:
+ 	case TCM_CHECK_CONDITION_ABORT_CMD:
+ 	case TCM_CHECK_CONDITION_UNIT_ATTENTION:
+ 	case TCM_CHECK_CONDITION_NOT_READY:
+@@ -4496,6 +4497,15 @@ int transport_send_check_condition_and_sense(
+ 		/* WRITE PROTECTED */
+ 		buffer[offset+SPC_ASC_KEY_OFFSET] = 0x27;
+ 		break;
++	case TCM_ADDRESS_OUT_OF_RANGE:
++		/* CURRENT ERROR */
++		buffer[offset] = 0x70;
++		buffer[offset+SPC_ADD_SENSE_LEN_OFFSET] = 10;
++		/* ILLEGAL REQUEST */
++		buffer[offset+SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
++		/* LOGICAL BLOCK ADDRESS OUT OF RANGE */
++		buffer[offset+SPC_ASC_KEY_OFFSET] = 0x21;
++		break;
+ 	case TCM_CHECK_CONDITION_UNIT_ATTENTION:
+ 		/* CURRENT ERROR */
+ 		buffer[offset] = 0x70;
+diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
+index f6ff837..a9df218 100644
+--- a/drivers/usb/core/devio.c
++++ b/drivers/usb/core/devio.c
+@@ -1555,10 +1555,14 @@ static int processcompl_compat(struct async *as, void __user * __user *arg)
+ 	void __user *addr = as->userurb;
+ 	unsigned int i;
+ 
+-	if (as->userbuffer && urb->actual_length)
+-		if (copy_to_user(as->userbuffer, urb->transfer_buffer,
+-				 urb->actual_length))
++	if (as->userbuffer && urb->actual_length) {
++		if (urb->number_of_packets > 0)		/* Isochronous */
++			i = urb->transfer_buffer_length;
++		else					/* Non-Isoc */
++			i = urb->actual_length;
++		if (copy_to_user(as->userbuffer, urb->transfer_buffer, i))
+ 			return -EFAULT;
++	}
+ 	if (put_user(as->status, &userurb->status))
+ 		return -EFAULT;
+ 	if (put_user(urb->actual_length, &userurb->actual_length))
+diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
+index 29c854b..4e1f0aa 100644
+--- a/drivers/usb/gadget/u_ether.c
++++ b/drivers/usb/gadget/u_ether.c
+@@ -796,12 +796,6 @@ int gether_setup(struct usb_gadget *g, u8 ethaddr[ETH_ALEN])
+ 
+ 	SET_ETHTOOL_OPS(net, &ops);
+ 
+-	/* two kinds of host-initiated state changes:
+-	 *  - iff DATA transfer is active, carrier is "on"
+-	 *  - tx queueing enabled if open *and* carrier is "on"
+-	 */
+-	netif_carrier_off(net);
+-
+ 	dev->gadget = g;
+ 	SET_NETDEV_DEV(net, &g->dev);
+ 	SET_NETDEV_DEVTYPE(net, &gadget_type);
+@@ -815,6 +809,12 @@ int gether_setup(struct usb_gadget *g, u8 ethaddr[ETH_ALEN])
+ 		INFO(dev, "HOST MAC %pM\n", dev->host_mac);
+ 
+ 		the_dev = dev;
++
++		/* two kinds of host-initiated state changes:
++		 *  - iff DATA transfer is active, carrier is "on"
++		 *  - tx queueing enabled if open *and* carrier is "on"
++		 */
++		netif_carrier_off(net);
+ 	}
+ 
+ 	return status;
+diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
+index 5971c95..d89aac1 100644
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -932,8 +932,12 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0165, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0167, 0xff, 0xff, 0xff),
+ 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1008, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1010, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0326, 0xff, 0xff, 0xff),
++	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1008, 0xff, 0xff, 0xff),
++	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1010, 0xff, 0xff, 0xff),
++	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1012, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1057, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1058, 0xff, 0xff, 0xff) },
+diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
+index 0b39458..03321e5 100644
+--- a/fs/btrfs/async-thread.c
++++ b/fs/btrfs/async-thread.c
+@@ -206,10 +206,17 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
+ 
+ 		work->ordered_func(work);
+ 
+-		/* now take the lock again and call the freeing code */
++		/* now take the lock again and drop our item from the list */
+ 		spin_lock(&workers->order_lock);
+ 		list_del(&work->order_list);
++		spin_unlock(&workers->order_lock);
++
++		/*
++		 * we don't want to call the ordered free functions
++		 * with the lock held though
++		 */
+ 		work->ordered_free(work);
++		spin_lock(&workers->order_lock);
+ 	}
+ 
+ 	spin_unlock(&workers->order_lock);
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index f44b392..6b2a724 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -872,7 +872,8 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+ 
+ #ifdef CONFIG_MIGRATION
+ static int btree_migratepage(struct address_space *mapping,
+-			struct page *newpage, struct page *page)
++			struct page *newpage, struct page *page,
++			enum migrate_mode mode)
+ {
+ 	/*
+ 	 * we can't safely write a btree page from here,
+@@ -887,7 +888,7 @@ static int btree_migratepage(struct address_space *mapping,
+ 	if (page_has_private(page) &&
+ 	    !try_to_release_page(page, GFP_KERNEL))
+ 		return -EAGAIN;
+-	return migrate_page(mapping, newpage, page);
++	return migrate_page(mapping, newpage, page, mode);
+ }
+ #endif
+ 
+diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
+index 6aa7457..c858a29 100644
+--- a/fs/cifs/cifssmb.c
++++ b/fs/cifs/cifssmb.c
+@@ -89,6 +89,32 @@ static struct {
+ /* Forward declarations */
+ static void cifs_readv_complete(struct work_struct *work);
+ 
++#ifdef CONFIG_HIGHMEM
++/*
++ * On arches that have high memory, kmap address space is limited. By
++ * serializing the kmap operations on those arches, we ensure that we don't
++ * end up with a bunch of threads in writeback with partially mapped page
++ * arrays, stuck waiting for kmap to come back. That situation prevents
++ * progress and can deadlock.
++ */
++static DEFINE_MUTEX(cifs_kmap_mutex);
++
++static inline void
++cifs_kmap_lock(void)
++{
++	mutex_lock(&cifs_kmap_mutex);
++}
++
++static inline void
++cifs_kmap_unlock(void)
++{
++	mutex_unlock(&cifs_kmap_mutex);
++}
++#else /* !CONFIG_HIGHMEM */
++#define cifs_kmap_lock() do { ; } while(0)
++#define cifs_kmap_unlock() do { ; } while(0)
++#endif /* CONFIG_HIGHMEM */
++
+ /* Mark as invalid, all open files on tree connections since they
+    were closed when session to server was lost */
+ static void mark_open_files_invalid(struct cifs_tcon *pTcon)
+@@ -1540,6 +1566,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+ 	eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
+ 	cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
+ 
++	cifs_kmap_lock();
+ 	list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
+ 		if (remaining >= PAGE_CACHE_SIZE) {
+ 			/* enough data to fill the page */
+@@ -1589,6 +1616,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+ 			page_cache_release(page);
+ 		}
+ 	}
++	cifs_kmap_unlock();
+ 
+ 	/* issue the read if we have any iovecs left to fill */
+ 	if (rdata->nr_iov > 1) {
+@@ -2171,6 +2199,7 @@ cifs_async_writev(struct cifs_writedata *wdata)
+ 	iov[0].iov_base = smb;
+ 
+ 	/* marshal up the pages into iov array */
++	cifs_kmap_lock();
+ 	wdata->bytes = 0;
+ 	for (i = 0; i < wdata->nr_pages; i++) {
+ 		iov[i + 1].iov_len = min(inode->i_size -
+@@ -2179,6 +2208,7 @@ cifs_async_writev(struct cifs_writedata *wdata)
+ 		iov[i + 1].iov_base = kmap(wdata->pages[i]);
+ 		wdata->bytes += iov[i + 1].iov_len;
+ 	}
++	cifs_kmap_unlock();
+ 
+ 	cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
+ 
+diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
+index 914bf9e..d6970f7 100644
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -557,7 +557,8 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
+ 		if (bitmap_bh == NULL)
+ 			continue;
+ 
+-		x = ext4_count_free(bitmap_bh, sb->s_blocksize);
++		x = ext4_count_free(bitmap_bh->b_data,
++				    EXT4_BLOCKS_PER_GROUP(sb) / 8);
+ 		printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
+ 			i, ext4_free_group_clusters(sb, gdp), x);
+ 		bitmap_count += x;
+diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
+index fa3af81..bbde5d5 100644
+--- a/fs/ext4/bitmap.c
++++ b/fs/ext4/bitmap.c
+@@ -11,21 +11,15 @@
+ #include <linux/jbd2.h>
+ #include "ext4.h"
+ 
+-#ifdef EXT4FS_DEBUG
+-
+ static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
+ 
+-unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars)
++unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
+ {
+ 	unsigned int i, sum = 0;
+ 
+-	if (!map)
+-		return 0;
+ 	for (i = 0; i < numchars; i++)
+-		sum += nibblemap[map->b_data[i] & 0xf] +
+-			nibblemap[(map->b_data[i] >> 4) & 0xf];
++		sum += nibblemap[bitmap[i] & 0xf] +
++			nibblemap[(bitmap[i] >> 4) & 0xf];
+ 	return sum;
+ }
+ 
+-#endif  /*  EXT4FS_DEBUG  */
+-
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 7b1cd5c..8cb184c 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1123,8 +1123,7 @@ struct ext4_sb_info {
+ 	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
+ 	ext4_group_t s_groups_count;	/* Number of groups in the fs */
+ 	ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
+-	unsigned long s_overhead_last;  /* Last calculated overhead */
+-	unsigned long s_blocks_last;    /* Last seen block count */
++	unsigned long s_overhead;  /* # of fs overhead clusters */
+ 	unsigned int s_cluster_ratio;	/* Number of blocks per cluster */
+ 	unsigned int s_cluster_bits;	/* log2 of s_cluster_ratio */
+ 	loff_t s_bitmap_maxbytes;	/* max bytes for bitmap files */
+@@ -1757,7 +1756,7 @@ struct mmpd_data {
+ # define NORET_AND	noreturn,
+ 
+ /* bitmap.c */
+-extern unsigned int ext4_count_free(struct buffer_head *, unsigned);
++extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
+ 
+ /* balloc.c */
+ extern unsigned int ext4_block_group(struct super_block *sb,
+@@ -1925,6 +1924,7 @@ extern int ext4_group_extend(struct super_block *sb,
+ 				ext4_fsblk_t n_blocks_count);
+ 
+ /* super.c */
++extern int ext4_calculate_overhead(struct super_block *sb);
+ extern void *ext4_kvmalloc(size_t size, gfp_t flags);
+ extern void *ext4_kvzalloc(size_t size, gfp_t flags);
+ extern void ext4_kvfree(void *ptr);
+diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
+index 8fb6844..6266799 100644
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -1057,7 +1057,8 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
+ 		if (!bitmap_bh)
+ 			continue;
+ 
+-		x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
++		x = ext4_count_free(bitmap_bh->b_data,
++				    EXT4_INODES_PER_GROUP(sb) / 8);
+ 		printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
+ 			(unsigned long) i, ext4_free_inodes_count(sb, gdp), x);
+ 		bitmap_count += x;
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 3ce7613..8b01f9f 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -277,6 +277,15 @@ void ext4_da_update_reserve_space(struct inode *inode,
+ 		used = ei->i_reserved_data_blocks;
+ 	}
+ 
++	if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
++		ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d "
++			 "with only %d reserved metadata blocks\n", __func__,
++			 inode->i_ino, ei->i_allocated_meta_blocks,
++			 ei->i_reserved_meta_blocks);
++		WARN_ON(1);
++		ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
++	}
++
+ 	/* Update per-inode reservations */
+ 	ei->i_reserved_data_blocks -= used;
+ 	ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
+@@ -1102,6 +1111,17 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
+ 	struct ext4_inode_info *ei = EXT4_I(inode);
+ 	unsigned int md_needed;
+ 	int ret;
++	ext4_lblk_t save_last_lblock;
++	int save_len;
++
++	/*
++	 * We will charge metadata quota at writeout time; this saves
++	 * us from metadata over-estimation, though we may go over by
++	 * a small amount in the end.  Here we just reserve for data.
++	 */
++	ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
++	if (ret)
++		return ret;
+ 
+ 	/*
+ 	 * recalculate the amount of metadata blocks to reserve
+@@ -1110,32 +1130,31 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
+ 	 */
+ repeat:
+ 	spin_lock(&ei->i_block_reservation_lock);
++	/*
++	 * ext4_calc_metadata_amount() has side effects, which we have
++	 * to be prepared undo if we fail to claim space.
++	 */
++	save_len = ei->i_da_metadata_calc_len;
++	save_last_lblock = ei->i_da_metadata_calc_last_lblock;
+ 	md_needed = EXT4_NUM_B2C(sbi,
+ 				 ext4_calc_metadata_amount(inode, lblock));
+ 	trace_ext4_da_reserve_space(inode, md_needed);
+-	spin_unlock(&ei->i_block_reservation_lock);
+ 
+ 	/*
+-	 * We will charge metadata quota at writeout time; this saves
+-	 * us from metadata over-estimation, though we may go over by
+-	 * a small amount in the end.  Here we just reserve for data.
+-	 */
+-	ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
+-	if (ret)
+-		return ret;
+-	/*
+ 	 * We do still charge estimated metadata to the sb though;
+ 	 * we cannot afford to run out of free blocks.
+ 	 */
+ 	if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
+-		dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
++		ei->i_da_metadata_calc_len = save_len;
++		ei->i_da_metadata_calc_last_lblock = save_last_lblock;
++		spin_unlock(&ei->i_block_reservation_lock);
+ 		if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
+ 			yield();
+ 			goto repeat;
+ 		}
++		dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
+ 		return -ENOSPC;
+ 	}
+-	spin_lock(&ei->i_block_reservation_lock);
+ 	ei->i_reserved_data_blocks++;
+ 	ei->i_reserved_meta_blocks += md_needed;
+ 	spin_unlock(&ei->i_block_reservation_lock);
+diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
+index 996780a..4eac337 100644
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -952,6 +952,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
+ 			   &sbi->s_flex_groups[flex_group].free_inodes);
+ 	}
+ 
++	/*
++	 * Update the fs overhead information
++	 */
++	ext4_calculate_overhead(sb);
++
+ 	ext4_handle_dirty_super(handle, sb);
+ 
+ exit_journal:
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index a93486e..a071348 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -3083,6 +3083,114 @@ static void ext4_destroy_lazyinit_thread(void)
+ 	kthread_stop(ext4_lazyinit_task);
+ }
+ 
++/*
++ * Note: calculating the overhead so we can be compatible with
++ * historical BSD practice is quite difficult in the face of
++ * clusters/bigalloc.  This is because multiple metadata blocks from
++ * different block group can end up in the same allocation cluster.
++ * Calculating the exact overhead in the face of clustered allocation
++ * requires either O(all block bitmaps) in memory or O(number of block
++ * groups**2) in time.  We will still calculate the superblock for
++ * older file systems --- and if we come across with a bigalloc file
++ * system with zero in s_overhead_clusters the estimate will be close to
++ * correct especially for very large cluster sizes --- but for newer
++ * file systems, it's better to calculate this figure once at mkfs
++ * time, and store it in the superblock.  If the superblock value is
++ * present (even for non-bigalloc file systems), we will use it.
++ */
++static int count_overhead(struct super_block *sb, ext4_group_t grp,
++			  char *buf)
++{
++	struct ext4_sb_info	*sbi = EXT4_SB(sb);
++	struct ext4_group_desc	*gdp;
++	ext4_fsblk_t		first_block, last_block, b;
++	ext4_group_t		i, ngroups = ext4_get_groups_count(sb);
++	int			s, j, count = 0;
++
++	first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
++		(grp * EXT4_BLOCKS_PER_GROUP(sb));
++	last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
++	for (i = 0; i < ngroups; i++) {
++		gdp = ext4_get_group_desc(sb, i, NULL);
++		b = ext4_block_bitmap(sb, gdp);
++		if (b >= first_block && b <= last_block) {
++			ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
++			count++;
++		}
++		b = ext4_inode_bitmap(sb, gdp);
++		if (b >= first_block && b <= last_block) {
++			ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
++			count++;
++		}
++		b = ext4_inode_table(sb, gdp);
++		if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
++			for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
++				int c = EXT4_B2C(sbi, b - first_block);
++				ext4_set_bit(c, buf);
++				count++;
++			}
++		if (i != grp)
++			continue;
++		s = 0;
++		if (ext4_bg_has_super(sb, grp)) {
++			ext4_set_bit(s++, buf);
++			count++;
++		}
++		for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
++			ext4_set_bit(EXT4_B2C(sbi, s++), buf);
++			count++;
++		}
++	}
++	if (!count)
++		return 0;
++	return EXT4_CLUSTERS_PER_GROUP(sb) -
++		ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
++}
++
++/*
++ * Compute the overhead and stash it in sbi->s_overhead
++ */
++int ext4_calculate_overhead(struct super_block *sb)
++{
++	struct ext4_sb_info *sbi = EXT4_SB(sb);
++	struct ext4_super_block *es = sbi->s_es;
++	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
++	ext4_fsblk_t overhead = 0;
++	char *buf = (char *) get_zeroed_page(GFP_KERNEL);
++
++	memset(buf, 0, PAGE_SIZE);
++	if (!buf)
++		return -ENOMEM;
++
++	/*
++	 * Compute the overhead (FS structures).  This is constant
++	 * for a given filesystem unless the number of block groups
++	 * changes so we cache the previous value until it does.
++	 */
++
++	/*
++	 * All of the blocks before first_data_block are overhead
++	 */
++	overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
++
++	/*
++	 * Add the overhead found in each block group
++	 */
++	for (i = 0; i < ngroups; i++) {
++		int blks;
++
++		blks = count_overhead(sb, i, buf);
++		overhead += blks;
++		if (blks)
++			memset(buf, 0, PAGE_SIZE);
++		cond_resched();
++	}
++	sbi->s_overhead = overhead;
++	smp_wmb();
++	free_page((unsigned long) buf);
++	return 0;
++}
++
+ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+ {
+ 	char *orig_data = kstrdup(data, GFP_KERNEL);
+@@ -3695,6 +3803,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+ 
+ no_journal:
+ 	/*
++	 * Get the # of file system overhead blocks from the
++	 * superblock if present.
++	 */
++	if (es->s_overhead_clusters)
++		sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
++	else {
++		ret = ext4_calculate_overhead(sb);
++		if (ret)
++			goto failed_mount_wq;
++	}
++
++	/*
+ 	 * The maximum number of concurrent works can be high and
+ 	 * concurrency isn't really necessary.  Limit it to 1.
+ 	 */
+@@ -4568,67 +4688,21 @@ restore_opts:
+ 	return err;
+ }
+ 
+-/*
+- * Note: calculating the overhead so we can be compatible with
+- * historical BSD practice is quite difficult in the face of
+- * clusters/bigalloc.  This is because multiple metadata blocks from
+- * different block group can end up in the same allocation cluster.
+- * Calculating the exact overhead in the face of clustered allocation
+- * requires either O(all block bitmaps) in memory or O(number of block
+- * groups**2) in time.  We will still calculate the superblock for
+- * older file systems --- and if we come across with a bigalloc file
+- * system with zero in s_overhead_clusters the estimate will be close to
+- * correct especially for very large cluster sizes --- but for newer
+- * file systems, it's better to calculate this figure once at mkfs
+- * time, and store it in the superblock.  If the superblock value is
+- * present (even for non-bigalloc file systems), we will use it.
+- */
+ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
+ {
+ 	struct super_block *sb = dentry->d_sb;
+ 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 	struct ext4_super_block *es = sbi->s_es;
+-	struct ext4_group_desc *gdp;
++	ext4_fsblk_t overhead = 0;
+ 	u64 fsid;
+ 	s64 bfree;
+ 
+-	if (test_opt(sb, MINIX_DF)) {
+-		sbi->s_overhead_last = 0;
+-	} else if (es->s_overhead_clusters) {
+-		sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters);
+-	} else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
+-		ext4_group_t i, ngroups = ext4_get_groups_count(sb);
+-		ext4_fsblk_t overhead = 0;
+-
+-		/*
+-		 * Compute the overhead (FS structures).  This is constant
+-		 * for a given filesystem unless the number of block groups
+-		 * changes so we cache the previous value until it does.
+-		 */
+-
+-		/*
+-		 * All of the blocks before first_data_block are
+-		 * overhead
+-		 */
+-		overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
+-
+-		/*
+-		 * Add the overhead found in each block group
+-		 */
+-		for (i = 0; i < ngroups; i++) {
+-			gdp = ext4_get_group_desc(sb, i, NULL);
+-			overhead += ext4_num_overhead_clusters(sb, i, gdp);
+-			cond_resched();
+-		}
+-		sbi->s_overhead_last = overhead;
+-		smp_wmb();
+-		sbi->s_blocks_last = ext4_blocks_count(es);
+-	}
++	if (!test_opt(sb, MINIX_DF))
++		overhead = sbi->s_overhead;
+ 
+ 	buf->f_type = EXT4_SUPER_MAGIC;
+ 	buf->f_bsize = sb->s_blocksize;
+-	buf->f_blocks = (ext4_blocks_count(es) -
+-			 EXT4_C2B(sbi, sbi->s_overhead_last));
++	buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, sbi->s_overhead);
+ 	bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
+ 		percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
+ 	/* prevent underflow in case that few free space is available */
+diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
+index ebc2f4d..0aa424a 100644
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -569,7 +569,8 @@ static int hugetlbfs_set_page_dirty(struct page *page)
+ }
+ 
+ static int hugetlbfs_migrate_page(struct address_space *mapping,
+-				struct page *newpage, struct page *page)
++				struct page *newpage, struct page *page,
++				enum migrate_mode mode)
+ {
+ 	int rc;
+ 
+diff --git a/fs/locks.c b/fs/locks.c
+index 6a64f15..fcc50ab 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -308,7 +308,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock,
+ 	return 0;
+ }
+ 
+-static int assign_type(struct file_lock *fl, int type)
++static int assign_type(struct file_lock *fl, long type)
+ {
+ 	switch (type) {
+ 	case F_RDLCK:
+@@ -445,7 +445,7 @@ static const struct lock_manager_operations lease_manager_ops = {
+ /*
+  * Initialize a lease, use the default lock manager operations
+  */
+-static int lease_init(struct file *filp, int type, struct file_lock *fl)
++static int lease_init(struct file *filp, long type, struct file_lock *fl)
+  {
+ 	if (assign_type(fl, type) != 0)
+ 		return -EINVAL;
+@@ -463,7 +463,7 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl)
+ }
+ 
+ /* Allocate a file_lock initialised to this type of lease */
+-static struct file_lock *lease_alloc(struct file *filp, int type)
++static struct file_lock *lease_alloc(struct file *filp, long type)
+ {
+ 	struct file_lock *fl = locks_alloc_lock();
+ 	int error = -ENOMEM;
+diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
+index 3f4d957..68b3f20 100644
+--- a/fs/nfs/internal.h
++++ b/fs/nfs/internal.h
+@@ -330,7 +330,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data);
+ 
+ #ifdef CONFIG_MIGRATION
+ extern int nfs_migrate_page(struct address_space *,
+-		struct page *, struct page *);
++		struct page *, struct page *, enum migrate_mode);
+ #else
+ #define nfs_migrate_page NULL
+ #endif
+diff --git a/fs/nfs/write.c b/fs/nfs/write.c
+index 4efd421..c6e523a 100644
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -1711,7 +1711,7 @@ out_error:
+ 
+ #ifdef CONFIG_MIGRATION
+ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
+-		struct page *page)
++		struct page *page, enum migrate_mode mode)
+ {
+ 	/*
+ 	 * If PagePrivate is set, then the page is currently associated with
+@@ -1726,7 +1726,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
+ 
+ 	nfs_fscache_release_page(page, GFP_KERNEL);
+ 
+-	return migrate_page(mapping, newpage, page);
++	return migrate_page(mapping, newpage, page, mode);
+ }
+ #endif
+ 
+diff --git a/fs/udf/super.c b/fs/udf/super.c
+index 270e135..516b7f0 100644
+--- a/fs/udf/super.c
++++ b/fs/udf/super.c
+@@ -1285,7 +1285,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
+ 	BUG_ON(ident != TAG_IDENT_LVD);
+ 	lvd = (struct logicalVolDesc *)bh->b_data;
+ 	table_len = le32_to_cpu(lvd->mapTableLength);
+-	if (sizeof(*lvd) + table_len > sb->s_blocksize) {
++	if (table_len > sb->s_blocksize - sizeof(*lvd)) {
+ 		udf_err(sb, "error loading logical volume descriptor: "
+ 			"Partition table too long (%u > %lu)\n", table_len,
+ 			sb->s_blocksize - sizeof(*lvd));
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index 0ed1eb0..ff039f0 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -481,6 +481,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
+ 
+ #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
+ #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
++#define blk_queue_dead(q)	test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
+ #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
+ #define blk_queue_noxmerges(q)	\
+ 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
+diff --git a/include/linux/cpu.h b/include/linux/cpu.h
+index 6cb60fd..c692acc 100644
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -66,8 +66,9 @@ enum {
+ 	/* migration should happen before other stuff but after perf */
+ 	CPU_PRI_PERF		= 20,
+ 	CPU_PRI_MIGRATION	= 10,
+-	/* prepare workqueues for other notifiers */
+-	CPU_PRI_WORKQUEUE	= 5,
++	/* bring up workqueues before normal notifiers and down after */
++	CPU_PRI_WORKQUEUE_UP	= 5,
++	CPU_PRI_WORKQUEUE_DOWN	= -5,
+ };
+ 
+ #define CPU_ONLINE		0x0002 /* CPU (unsigned)v is up */
+diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
+index e9eaec5..7a7e5fd 100644
+--- a/include/linux/cpuset.h
++++ b/include/linux/cpuset.h
+@@ -89,42 +89,33 @@ extern void rebuild_sched_domains(void);
+ extern void cpuset_print_task_mems_allowed(struct task_struct *p);
+ 
+ /*
+- * reading current mems_allowed and mempolicy in the fastpath must protected
+- * by get_mems_allowed()
++ * get_mems_allowed is required when making decisions involving mems_allowed
++ * such as during page allocation. mems_allowed can be updated in parallel
++ * and depending on the new value an operation can fail potentially causing
++ * process failure. A retry loop with get_mems_allowed and put_mems_allowed
++ * prevents these artificial failures.
+  */
+-static inline void get_mems_allowed(void)
++static inline unsigned int get_mems_allowed(void)
+ {
+-	current->mems_allowed_change_disable++;
+-
+-	/*
+-	 * ensure that reading mems_allowed and mempolicy happens after the
+-	 * update of ->mems_allowed_change_disable.
+-	 *
+-	 * the write-side task finds ->mems_allowed_change_disable is not 0,
+-	 * and knows the read-side task is reading mems_allowed or mempolicy,
+-	 * so it will clear old bits lazily.
+-	 */
+-	smp_mb();
++	return read_seqcount_begin(&current->mems_allowed_seq);
+ }
+ 
+-static inline void put_mems_allowed(void)
++/*
++ * If this returns false, the operation that took place after get_mems_allowed
++ * may have failed. It is up to the caller to retry the operation if
++ * appropriate.
++ */
++static inline bool put_mems_allowed(unsigned int seq)
+ {
+-	/*
+-	 * ensure that reading mems_allowed and mempolicy before reducing
+-	 * mems_allowed_change_disable.
+-	 *
+-	 * the write-side task will know that the read-side task is still
+-	 * reading mems_allowed or mempolicy, don't clears old bits in the
+-	 * nodemask.
+-	 */
+-	smp_mb();
+-	--ACCESS_ONCE(current->mems_allowed_change_disable);
++	return !read_seqcount_retry(&current->mems_allowed_seq, seq);
+ }
+ 
+ static inline void set_mems_allowed(nodemask_t nodemask)
+ {
+ 	task_lock(current);
++	write_seqcount_begin(&current->mems_allowed_seq);
+ 	current->mems_allowed = nodemask;
++	write_seqcount_end(&current->mems_allowed_seq);
+ 	task_unlock(current);
+ }
+ 
+@@ -234,12 +225,14 @@ static inline void set_mems_allowed(nodemask_t nodemask)
+ {
+ }
+ 
+-static inline void get_mems_allowed(void)
++static inline unsigned int get_mems_allowed(void)
+ {
++	return 0;
+ }
+ 
+-static inline void put_mems_allowed(void)
++static inline bool put_mems_allowed(unsigned int seq)
+ {
++	return true;
+ }
+ 
+ #endif /* !CONFIG_CPUSETS */
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 43d36b7..29b6353 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -525,6 +525,7 @@ enum positive_aop_returns {
+ struct page;
+ struct address_space;
+ struct writeback_control;
++enum migrate_mode;
+ 
+ struct iov_iter {
+ 	const struct iovec *iov;
+@@ -609,9 +610,12 @@ struct address_space_operations {
+ 			loff_t offset, unsigned long nr_segs);
+ 	int (*get_xip_mem)(struct address_space *, pgoff_t, int,
+ 						void **, unsigned long *);
+-	/* migrate the contents of a page to the specified target */
++	/*
++	 * migrate the contents of a page to the specified target. If sync
++	 * is false, it must not block.
++	 */
+ 	int (*migratepage) (struct address_space *,
+-			struct page *, struct page *);
++			struct page *, struct page *, enum migrate_mode);
+ 	int (*launder_page) (struct page *);
+ 	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
+ 					unsigned long);
+@@ -2586,7 +2590,8 @@ extern int generic_check_addressable(unsigned, u64);
+ 
+ #ifdef CONFIG_MIGRATION
+ extern int buffer_migrate_page(struct address_space *,
+-				struct page *, struct page *);
++				struct page *, struct page *,
++				enum migrate_mode);
+ #else
+ #define buffer_migrate_page NULL
+ #endif
+diff --git a/include/linux/init_task.h b/include/linux/init_task.h
+index 32574ee..df53fdf 100644
+--- a/include/linux/init_task.h
++++ b/include/linux/init_task.h
+@@ -30,6 +30,13 @@ extern struct fs_struct init_fs;
+ #define INIT_THREADGROUP_FORK_LOCK(sig)
+ #endif
+ 
++#ifdef CONFIG_CPUSETS
++#define INIT_CPUSET_SEQ							\
++	.mems_allowed_seq = SEQCNT_ZERO,
++#else
++#define INIT_CPUSET_SEQ
++#endif
++
+ #define INIT_SIGNALS(sig) {						\
+ 	.nr_threads	= 1,						\
+ 	.wait_chldexit	= __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
+@@ -193,6 +200,7 @@ extern struct cred init_cred;
+ 	INIT_FTRACE_GRAPH						\
+ 	INIT_TRACE_RECURSION						\
+ 	INIT_TASK_RCU_PREEMPT(tsk)					\
++	INIT_CPUSET_SEQ							\
+ }
+ 
+ 
+diff --git a/include/linux/migrate.h b/include/linux/migrate.h
+index e39aeec..eaf8674 100644
+--- a/include/linux/migrate.h
++++ b/include/linux/migrate.h
+@@ -6,18 +6,31 @@
+ 
+ typedef struct page *new_page_t(struct page *, unsigned long private, int **);
+ 
++/*
++ * MIGRATE_ASYNC means never block
++ * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking
++ *	on most operations but not ->writepage as the potential stall time
++ *	is too significant
++ * MIGRATE_SYNC will block when migrating pages
++ */
++enum migrate_mode {
++	MIGRATE_ASYNC,
++	MIGRATE_SYNC_LIGHT,
++	MIGRATE_SYNC,
++};
++
+ #ifdef CONFIG_MIGRATION
+ #define PAGE_MIGRATION 1
+ 
+ extern void putback_lru_pages(struct list_head *l);
+ extern int migrate_page(struct address_space *,
+-			struct page *, struct page *);
++			struct page *, struct page *, enum migrate_mode);
+ extern int migrate_pages(struct list_head *l, new_page_t x,
+ 			unsigned long private, bool offlining,
+-			bool sync);
++			enum migrate_mode mode);
+ extern int migrate_huge_pages(struct list_head *l, new_page_t x,
+ 			unsigned long private, bool offlining,
+-			bool sync);
++			enum migrate_mode mode);
+ 
+ extern int fail_migrate_page(struct address_space *,
+ 			struct page *, struct page *);
+@@ -36,10 +49,10 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
+ static inline void putback_lru_pages(struct list_head *l) {}
+ static inline int migrate_pages(struct list_head *l, new_page_t x,
+ 		unsigned long private, bool offlining,
+-		bool sync) { return -ENOSYS; }
++		enum migrate_mode mode) { return -ENOSYS; }
+ static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
+ 		unsigned long private, bool offlining,
+-		bool sync) { return -ENOSYS; }
++		enum migrate_mode mode) { return -ENOSYS; }
+ 
+ static inline int migrate_prep(void) { return -ENOSYS; }
+ static inline int migrate_prep_local(void) { return -ENOSYS; }
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index 905b1e1..25842b6 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -173,6 +173,8 @@ static inline int is_unevictable_lru(enum lru_list l)
+ #define ISOLATE_CLEAN		((__force isolate_mode_t)0x4)
+ /* Isolate unmapped file */
+ #define ISOLATE_UNMAPPED	((__force isolate_mode_t)0x8)
++/* Isolate for asynchronous migration */
++#define ISOLATE_ASYNC_MIGRATE	((__force isolate_mode_t)0x10)
+ 
+ /* LRU Isolation modes. */
+ typedef unsigned __bitwise__ isolate_mode_t;
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 5afa2a3..d336c35 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -145,6 +145,7 @@ extern unsigned long this_cpu_load(void);
+ 
+ 
+ extern void calc_global_load(unsigned long ticks);
++extern void update_cpu_load_nohz(void);
+ 
+ extern unsigned long get_parent_ip(unsigned long addr);
+ 
+@@ -1481,7 +1482,7 @@ struct task_struct {
+ #endif
+ #ifdef CONFIG_CPUSETS
+ 	nodemask_t mems_allowed;	/* Protected by alloc_lock */
+-	int mems_allowed_change_disable;
++	seqcount_t mems_allowed_seq;	/* Seqence no to catch updates */
+ 	int cpuset_mem_spread_rotor;
+ 	int cpuset_slab_spread_rotor;
+ #endif
+diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
+index 94bbec3..6ee550e 100644
+--- a/include/target/target_core_base.h
++++ b/include/target/target_core_base.h
+@@ -157,6 +157,7 @@ enum tcm_sense_reason_table {
+ 	TCM_CHECK_CONDITION_UNIT_ATTENTION	= 0x0e,
+ 	TCM_CHECK_CONDITION_NOT_READY		= 0x0f,
+ 	TCM_RESERVATION_CONFLICT		= 0x10,
++	TCM_ADDRESS_OUT_OF_RANGE		= 0x11,
+ };
+ 
+ struct se_obj {
+diff --git a/kernel/cpuset.c b/kernel/cpuset.c
+index 0b1712d..46a1d3c 100644
+--- a/kernel/cpuset.c
++++ b/kernel/cpuset.c
+@@ -964,7 +964,6 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
+ {
+ 	bool need_loop;
+ 
+-repeat:
+ 	/*
+ 	 * Allow tasks that have access to memory reserves because they have
+ 	 * been OOM killed to get memory anywhere.
+@@ -983,45 +982,19 @@ repeat:
+ 	 */
+ 	need_loop = task_has_mempolicy(tsk) ||
+ 			!nodes_intersects(*newmems, tsk->mems_allowed);
+-	nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
+-	mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
+ 
+-	/*
+-	 * ensure checking ->mems_allowed_change_disable after setting all new
+-	 * allowed nodes.
+-	 *
+-	 * the read-side task can see an nodemask with new allowed nodes and
+-	 * old allowed nodes. and if it allocates page when cpuset clears newly
+-	 * disallowed ones continuous, it can see the new allowed bits.
+-	 *
+-	 * And if setting all new allowed nodes is after the checking, setting
+-	 * all new allowed nodes and clearing newly disallowed ones will be done
+-	 * continuous, and the read-side task may find no node to alloc page.
+-	 */
+-	smp_mb();
++	if (need_loop)
++		write_seqcount_begin(&tsk->mems_allowed_seq);
+ 
+-	/*
+-	 * Allocation of memory is very fast, we needn't sleep when waiting
+-	 * for the read-side.
+-	 */
+-	while (need_loop && ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
+-		task_unlock(tsk);
+-		if (!task_curr(tsk))
+-			yield();
+-		goto repeat;
+-	}
+-
+-	/*
+-	 * ensure checking ->mems_allowed_change_disable before clearing all new
+-	 * disallowed nodes.
+-	 *
+-	 * if clearing newly disallowed bits before the checking, the read-side
+-	 * task may find no node to alloc page.
+-	 */
+-	smp_mb();
++	nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
++	mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
+ 
+ 	mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
+ 	tsk->mems_allowed = *newmems;
++
++	if (need_loop)
++		write_seqcount_end(&tsk->mems_allowed_seq);
++
+ 	task_unlock(tsk);
+ }
+ 
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 79ee71f..222457a 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -979,6 +979,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
+ #ifdef CONFIG_CGROUPS
+ 	init_rwsem(&sig->threadgroup_fork_lock);
+ #endif
++#ifdef CONFIG_CPUSETS
++	seqcount_init(&tsk->mems_allowed_seq);
++#endif
+ 
+ 	sig->oom_adj = current->signal->oom_adj;
+ 	sig->oom_score_adj = current->signal->oom_score_adj;
+diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
+index 7c0d578..013bd2e 100644
+--- a/kernel/power/hibernate.c
++++ b/kernel/power/hibernate.c
+@@ -367,6 +367,7 @@ int hibernation_snapshot(int platform_mode)
+ 	}
+ 
+ 	suspend_console();
++	ftrace_stop();
+ 	pm_restrict_gfp_mask();
+ 	error = dpm_suspend(PMSG_FREEZE);
+ 	if (error)
+@@ -392,6 +393,7 @@ int hibernation_snapshot(int platform_mode)
+ 	if (error || !in_suspend)
+ 		pm_restore_gfp_mask();
+ 
++	ftrace_start();
+ 	resume_console();
+ 	dpm_complete(msg);
+ 
+@@ -496,6 +498,7 @@ int hibernation_restore(int platform_mode)
+ 
+ 	pm_prepare_console();
+ 	suspend_console();
++	ftrace_stop();
+ 	pm_restrict_gfp_mask();
+ 	error = dpm_suspend_start(PMSG_QUIESCE);
+ 	if (!error) {
+@@ -503,6 +506,7 @@ int hibernation_restore(int platform_mode)
+ 		dpm_resume_end(PMSG_RECOVER);
+ 	}
+ 	pm_restore_gfp_mask();
++	ftrace_start();
+ 	resume_console();
+ 	pm_restore_console();
+ 	return error;
+@@ -529,6 +533,7 @@ int hibernation_platform_enter(void)
+ 
+ 	entering_platform_hibernation = true;
+ 	suspend_console();
++	ftrace_stop();
+ 	error = dpm_suspend_start(PMSG_HIBERNATE);
+ 	if (error) {
+ 		if (hibernation_ops->recover)
+@@ -572,6 +577,7 @@ int hibernation_platform_enter(void)
+  Resume_devices:
+ 	entering_platform_hibernation = false;
+ 	dpm_resume_end(PMSG_RESTORE);
++	ftrace_start();
+ 	resume_console();
+ 
+  Close:
+diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
+index 4953dc0..af48faa 100644
+--- a/kernel/power/suspend.c
++++ b/kernel/power/suspend.c
+@@ -25,6 +25,7 @@
+ #include <linux/export.h>
+ #include <linux/suspend.h>
+ #include <linux/syscore_ops.h>
++#include <linux/ftrace.h>
+ #include <trace/events/power.h>
+ 
+ #include "power.h"
+@@ -220,6 +221,7 @@ int suspend_devices_and_enter(suspend_state_t state)
+ 			goto Close;
+ 	}
+ 	suspend_console();
++	ftrace_stop();
+ 	suspend_test_start();
+ 	error = dpm_suspend_start(PMSG_SUSPEND);
+ 	if (error) {
+@@ -239,6 +241,7 @@ int suspend_devices_and_enter(suspend_state_t state)
+ 	suspend_test_start();
+ 	dpm_resume_end(PMSG_RESUME);
+ 	suspend_test_finish("resume devices");
++	ftrace_start();
+ 	resume_console();
+  Close:
+ 	if (suspend_ops->end)
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 52ac69b..9cd8ca7 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -1887,7 +1887,7 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+ 
+ static void update_sysctl(void);
+ static int get_update_sysctl_factor(void);
+-static void update_cpu_load(struct rq *this_rq);
++static void update_idle_cpu_load(struct rq *this_rq);
+ 
+ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+ {
+@@ -3855,22 +3855,13 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
+  * scheduler tick (TICK_NSEC). With tickless idle this will not be called
+  * every tick. We fix it up based on jiffies.
+  */
+-static void update_cpu_load(struct rq *this_rq)
++static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
++			      unsigned long pending_updates)
+ {
+-	unsigned long this_load = this_rq->load.weight;
+-	unsigned long curr_jiffies = jiffies;
+-	unsigned long pending_updates;
+ 	int i, scale;
+ 
+ 	this_rq->nr_load_updates++;
+ 
+-	/* Avoid repeated calls on same jiffy, when moving in and out of idle */
+-	if (curr_jiffies == this_rq->last_load_update_tick)
+-		return;
+-
+-	pending_updates = curr_jiffies - this_rq->last_load_update_tick;
+-	this_rq->last_load_update_tick = curr_jiffies;
+-
+ 	/* Update our load: */
+ 	this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
+ 	for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
+@@ -3895,9 +3886,78 @@ static void update_cpu_load(struct rq *this_rq)
+ 	sched_avg_update(this_rq);
+ }
+ 
++#ifdef CONFIG_NO_HZ
++/*
++ * There is no sane way to deal with nohz on smp when using jiffies because the
++ * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
++ * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
++ *
++ * Therefore we cannot use the delta approach from the regular tick since that
++ * would seriously skew the load calculation. However we'll make do for those
++ * updates happening while idle (nohz_idle_balance) or coming out of idle
++ * (tick_nohz_idle_exit).
++ *
++ * This means we might still be one tick off for nohz periods.
++ */
++
++/*
++ * Called from nohz_idle_balance() to update the load ratings before doing the
++ * idle balance.
++ */
++static void update_idle_cpu_load(struct rq *this_rq)
++{
++	unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
++	unsigned long load = this_rq->load.weight;
++	unsigned long pending_updates;
++
++	/*
++	 * bail if there's load or we're actually up-to-date.
++	 */
++	if (load || curr_jiffies == this_rq->last_load_update_tick)
++		return;
++
++	pending_updates = curr_jiffies - this_rq->last_load_update_tick;
++	this_rq->last_load_update_tick = curr_jiffies;
++
++	__update_cpu_load(this_rq, load, pending_updates);
++}
++
++/*
++ * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
++ */
++void update_cpu_load_nohz(void)
++{
++	struct rq *this_rq = this_rq();
++	unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
++	unsigned long pending_updates;
++
++	if (curr_jiffies == this_rq->last_load_update_tick)
++		return;
++
++	raw_spin_lock(&this_rq->lock);
++	pending_updates = curr_jiffies - this_rq->last_load_update_tick;
++	if (pending_updates) {
++		this_rq->last_load_update_tick = curr_jiffies;
++		/*
++		 * We were idle, this means load 0, the current load might be
++		 * !0 due to remote wakeups and the sort.
++		 */
++		__update_cpu_load(this_rq, 0, pending_updates);
++	}
++	raw_spin_unlock(&this_rq->lock);
++}
++#endif /* CONFIG_NO_HZ */
++
++/*
++ * Called from scheduler_tick()
++ */
+ static void update_cpu_load_active(struct rq *this_rq)
+ {
+-	update_cpu_load(this_rq);
++	/*
++	 * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
++	 */
++	this_rq->last_load_update_tick = jiffies;
++	__update_cpu_load(this_rq, this_rq->load.weight, 1);
+ 
+ 	calc_load_account_active(this_rq);
+ }
+diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
+index 8a39fa3..66e4576 100644
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -4735,7 +4735,7 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
+ 
+ 		raw_spin_lock_irq(&this_rq->lock);
+ 		update_rq_clock(this_rq);
+-		update_cpu_load(this_rq);
++		update_idle_cpu_load(this_rq);
+ 		raw_spin_unlock_irq(&this_rq->lock);
+ 
+ 		rebalance_domains(balance_cpu, CPU_IDLE);
+diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
+index 9955ebd..793548c 100644
+--- a/kernel/time/tick-sched.c
++++ b/kernel/time/tick-sched.c
+@@ -549,6 +549,7 @@ void tick_nohz_restart_sched_tick(void)
+ 	/* Update jiffies first */
+ 	select_nohz_load_balancer(0);
+ 	tick_do_update_jiffies64(now);
++	update_cpu_load_nohz();
+ 
+ #ifndef CONFIG_VIRT_CPU_ACCOUNTING
+ 	/*
+diff --git a/kernel/workqueue.c b/kernel/workqueue.c
+index 7947e16..a650bee 100644
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -3586,6 +3586,41 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
+ 	return notifier_from_errno(0);
+ }
+ 
++/*
++ * Workqueues should be brought up before normal priority CPU notifiers.
++ * This will be registered high priority CPU notifier.
++ */
++static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb,
++					       unsigned long action,
++					       void *hcpu)
++{
++	switch (action & ~CPU_TASKS_FROZEN) {
++	case CPU_UP_PREPARE:
++	case CPU_UP_CANCELED:
++	case CPU_DOWN_FAILED:
++	case CPU_ONLINE:
++		return workqueue_cpu_callback(nfb, action, hcpu);
++	}
++	return NOTIFY_OK;
++}
++
++/*
++ * Workqueues should be brought down after normal priority CPU notifiers.
++ * This will be registered as low priority CPU notifier.
++ */
++static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb,
++						 unsigned long action,
++						 void *hcpu)
++{
++	switch (action & ~CPU_TASKS_FROZEN) {
++	case CPU_DOWN_PREPARE:
++	case CPU_DYING:
++	case CPU_POST_DEAD:
++		return workqueue_cpu_callback(nfb, action, hcpu);
++	}
++	return NOTIFY_OK;
++}
++
+ #ifdef CONFIG_SMP
+ 
+ struct work_for_cpu {
+@@ -3779,7 +3814,8 @@ static int __init init_workqueues(void)
+ 	unsigned int cpu;
+ 	int i;
+ 
+-	cpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);
++	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
++	cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
+ 
+ 	/* initialize gcwqs */
+ 	for_each_gcwq_cpu(cpu) {
+diff --git a/mm/compaction.c b/mm/compaction.c
+index 50f1c60..46973fb 100644
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -372,7 +372,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
+ 		}
+ 
+ 		if (!cc->sync)
+-			mode |= ISOLATE_CLEAN;
++			mode |= ISOLATE_ASYNC_MIGRATE;
+ 
+ 		/* Try isolate the page */
+ 		if (__isolate_lru_page(page, mode, 0) != 0)
+@@ -577,7 +577,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
+ 		nr_migrate = cc->nr_migratepages;
+ 		err = migrate_pages(&cc->migratepages, compaction_alloc,
+ 				(unsigned long)cc, false,
+-				cc->sync);
++				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
+ 		update_nr_listpages(cc);
+ 		nr_remaining = cc->nr_migratepages;
+ 
+diff --git a/mm/filemap.c b/mm/filemap.c
+index 03c5b0e..556858c 100644
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -500,10 +500,13 @@ struct page *__page_cache_alloc(gfp_t gfp)
+ 	struct page *page;
+ 
+ 	if (cpuset_do_page_mem_spread()) {
+-		get_mems_allowed();
+-		n = cpuset_mem_spread_node();
+-		page = alloc_pages_exact_node(n, gfp, 0);
+-		put_mems_allowed();
++		unsigned int cpuset_mems_cookie;
++		do {
++			cpuset_mems_cookie = get_mems_allowed();
++			n = cpuset_mem_spread_node();
++			page = alloc_pages_exact_node(n, gfp, 0);
++		} while (!put_mems_allowed(cpuset_mems_cookie) && !page);
++
+ 		return page;
+ 	}
+ 	return alloc_pages(gfp, 0);
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index 7c535b0..b1e1bad 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -538,8 +538,10 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
+ 	struct zonelist *zonelist;
+ 	struct zone *zone;
+ 	struct zoneref *z;
++	unsigned int cpuset_mems_cookie;
+ 
+-	get_mems_allowed();
++retry_cpuset:
++	cpuset_mems_cookie = get_mems_allowed();
+ 	zonelist = huge_zonelist(vma, address,
+ 					htlb_alloc_mask, &mpol, &nodemask);
+ 	/*
+@@ -566,10 +568,15 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
+ 			}
+ 		}
+ 	}
+-err:
++
+ 	mpol_cond_put(mpol);
+-	put_mems_allowed();
++	if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
++		goto retry_cpuset;
+ 	return page;
++
++err:
++	mpol_cond_put(mpol);
++	return NULL;
+ }
+ 
+ static void update_and_free_page(struct hstate *h, struct page *page)
+diff --git a/mm/memory-failure.c b/mm/memory-failure.c
+index 06d3479..5bd5bb1 100644
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1427,8 +1427,8 @@ static int soft_offline_huge_page(struct page *page, int flags)
+ 	/* Keep page count to indicate a given hugepage is isolated. */
+ 
+ 	list_add(&hpage->lru, &pagelist);
+-	ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0,
+-				true);
++	ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, false,
++				MIGRATE_SYNC);
+ 	if (ret) {
+ 		struct page *page1, *page2;
+ 		list_for_each_entry_safe(page1, page2, &pagelist, lru)
+@@ -1557,7 +1557,7 @@ int soft_offline_page(struct page *page, int flags)
+ 					    page_is_file_cache(page));
+ 		list_add(&page->lru, &pagelist);
+ 		ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
+-								0, true);
++							false, MIGRATE_SYNC);
+ 		if (ret) {
+ 			putback_lru_pages(&pagelist);
+ 			pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
+diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
+index 2168489..6629faf 100644
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -809,7 +809,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
+ 		}
+ 		/* this function returns # of failed pages */
+ 		ret = migrate_pages(&source, hotremove_migrate_alloc, 0,
+-								true, true);
++							true, MIGRATE_SYNC);
+ 		if (ret)
+ 			putback_lru_pages(&source);
+ 	}
+diff --git a/mm/mempolicy.c b/mm/mempolicy.c
+index b26aae2..c0007f9 100644
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -942,7 +942,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
+ 
+ 	if (!list_empty(&pagelist)) {
+ 		err = migrate_pages(&pagelist, new_node_page, dest,
+-								false, true);
++							false, MIGRATE_SYNC);
+ 		if (err)
+ 			putback_lru_pages(&pagelist);
+ 	}
+@@ -1843,18 +1843,24 @@ struct page *
+ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
+ 		unsigned long addr, int node)
+ {
+-	struct mempolicy *pol = get_vma_policy(current, vma, addr);
++	struct mempolicy *pol;
+ 	struct zonelist *zl;
+ 	struct page *page;
++	unsigned int cpuset_mems_cookie;
++
++retry_cpuset:
++	pol = get_vma_policy(current, vma, addr);
++	cpuset_mems_cookie = get_mems_allowed();
+ 
+-	get_mems_allowed();
+ 	if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
+ 		unsigned nid;
+ 
+ 		nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
+ 		mpol_cond_put(pol);
+ 		page = alloc_page_interleave(gfp, order, nid);
+-		put_mems_allowed();
++		if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
++			goto retry_cpuset;
++
+ 		return page;
+ 	}
+ 	zl = policy_zonelist(gfp, pol, node);
+@@ -1865,7 +1871,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
+ 		struct page *page =  __alloc_pages_nodemask(gfp, order,
+ 						zl, policy_nodemask(gfp, pol));
+ 		__mpol_put(pol);
+-		put_mems_allowed();
++		if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
++			goto retry_cpuset;
+ 		return page;
+ 	}
+ 	/*
+@@ -1873,7 +1880,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
+ 	 */
+ 	page = __alloc_pages_nodemask(gfp, order, zl,
+ 				      policy_nodemask(gfp, pol));
+-	put_mems_allowed();
++	if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
++		goto retry_cpuset;
+ 	return page;
+ }
+ 
+@@ -1900,11 +1908,14 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
+ {
+ 	struct mempolicy *pol = current->mempolicy;
+ 	struct page *page;
++	unsigned int cpuset_mems_cookie;
+ 
+ 	if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
+ 		pol = &default_policy;
+ 
+-	get_mems_allowed();
++retry_cpuset:
++	cpuset_mems_cookie = get_mems_allowed();
++
+ 	/*
+ 	 * No reference counting needed for current->mempolicy
+ 	 * nor system default_policy
+@@ -1915,7 +1926,10 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
+ 		page = __alloc_pages_nodemask(gfp, order,
+ 				policy_zonelist(gfp, pol, numa_node_id()),
+ 				policy_nodemask(gfp, pol));
+-	put_mems_allowed();
++
++	if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
++		goto retry_cpuset;
++
+ 	return page;
+ }
+ EXPORT_SYMBOL(alloc_pages_current);
+diff --git a/mm/migrate.c b/mm/migrate.c
+index 177aca4..180d97f 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -220,6 +220,56 @@ out:
+ 	pte_unmap_unlock(ptep, ptl);
+ }
+ 
++#ifdef CONFIG_BLOCK
++/* Returns true if all buffers are successfully locked */
++static bool buffer_migrate_lock_buffers(struct buffer_head *head,
++							enum migrate_mode mode)
++{
++	struct buffer_head *bh = head;
++
++	/* Simple case, sync compaction */
++	if (mode != MIGRATE_ASYNC) {
++		do {
++			get_bh(bh);
++			lock_buffer(bh);
++			bh = bh->b_this_page;
++
++		} while (bh != head);
++
++		return true;
++	}
++
++	/* async case, we cannot block on lock_buffer so use trylock_buffer */
++	do {
++		get_bh(bh);
++		if (!trylock_buffer(bh)) {
++			/*
++			 * We failed to lock the buffer and cannot stall in
++			 * async migration. Release the taken locks
++			 */
++			struct buffer_head *failed_bh = bh;
++			put_bh(failed_bh);
++			bh = head;
++			while (bh != failed_bh) {
++				unlock_buffer(bh);
++				put_bh(bh);
++				bh = bh->b_this_page;
++			}
++			return false;
++		}
++
++		bh = bh->b_this_page;
++	} while (bh != head);
++	return true;
++}
++#else
++static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
++							enum migrate_mode mode)
++{
++	return true;
++}
++#endif /* CONFIG_BLOCK */
++
+ /*
+  * Replace the page in the mapping.
+  *
+@@ -229,7 +279,8 @@ out:
+  * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
+  */
+ static int migrate_page_move_mapping(struct address_space *mapping,
+-		struct page *newpage, struct page *page)
++		struct page *newpage, struct page *page,
++		struct buffer_head *head, enum migrate_mode mode)
+ {
+ 	int expected_count;
+ 	void **pslot;
+@@ -259,6 +310,20 @@ static int migrate_page_move_mapping(struct address_space *mapping,
+ 	}
+ 
+ 	/*
++	 * In the async migration case of moving a page with buffers, lock the
++	 * buffers using trylock before the mapping is moved. If the mapping
++	 * was moved, we later failed to lock the buffers and could not move
++	 * the mapping back due to an elevated page count, we would have to
++	 * block waiting on other references to be dropped.
++	 */
++	if (mode == MIGRATE_ASYNC && head &&
++			!buffer_migrate_lock_buffers(head, mode)) {
++		page_unfreeze_refs(page, expected_count);
++		spin_unlock_irq(&mapping->tree_lock);
++		return -EAGAIN;
++	}
++
++	/*
+ 	 * Now we know that no one else is looking at the page.
+ 	 */
+ 	get_page(newpage);	/* add cache reference */
+@@ -415,13 +480,14 @@ EXPORT_SYMBOL(fail_migrate_page);
+  * Pages are locked upon entry and exit.
+  */
+ int migrate_page(struct address_space *mapping,
+-		struct page *newpage, struct page *page)
++		struct page *newpage, struct page *page,
++		enum migrate_mode mode)
+ {
+ 	int rc;
+ 
+ 	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
+ 
+-	rc = migrate_page_move_mapping(mapping, newpage, page);
++	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode);
+ 
+ 	if (rc)
+ 		return rc;
+@@ -438,28 +504,28 @@ EXPORT_SYMBOL(migrate_page);
+  * exist.
+  */
+ int buffer_migrate_page(struct address_space *mapping,
+-		struct page *newpage, struct page *page)
++		struct page *newpage, struct page *page, enum migrate_mode mode)
+ {
+ 	struct buffer_head *bh, *head;
+ 	int rc;
+ 
+ 	if (!page_has_buffers(page))
+-		return migrate_page(mapping, newpage, page);
++		return migrate_page(mapping, newpage, page, mode);
+ 
+ 	head = page_buffers(page);
+ 
+-	rc = migrate_page_move_mapping(mapping, newpage, page);
++	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode);
+ 
+ 	if (rc)
+ 		return rc;
+ 
+-	bh = head;
+-	do {
+-		get_bh(bh);
+-		lock_buffer(bh);
+-		bh = bh->b_this_page;
+-
+-	} while (bh != head);
++	/*
++	 * In the async case, migrate_page_move_mapping locked the buffers
++	 * with an IRQ-safe spinlock held. In the sync case, the buffers
++	 * need to be locked now
++	 */
++	if (mode != MIGRATE_ASYNC)
++		BUG_ON(!buffer_migrate_lock_buffers(head, mode));
+ 
+ 	ClearPagePrivate(page);
+ 	set_page_private(newpage, page_private(page));
+@@ -536,10 +602,14 @@ static int writeout(struct address_space *mapping, struct page *page)
+  * Default handling if a filesystem does not provide a migration function.
+  */
+ static int fallback_migrate_page(struct address_space *mapping,
+-	struct page *newpage, struct page *page)
++	struct page *newpage, struct page *page, enum migrate_mode mode)
+ {
+-	if (PageDirty(page))
++	if (PageDirty(page)) {
++		/* Only writeback pages in full synchronous migration */
++		if (mode != MIGRATE_SYNC)
++			return -EBUSY;
+ 		return writeout(mapping, page);
++	}
+ 
+ 	/*
+ 	 * Buffers may be managed in a filesystem specific way.
+@@ -549,7 +619,7 @@ static int fallback_migrate_page(struct address_space *mapping,
+ 	    !try_to_release_page(page, GFP_KERNEL))
+ 		return -EAGAIN;
+ 
+-	return migrate_page(mapping, newpage, page);
++	return migrate_page(mapping, newpage, page, mode);
+ }
+ 
+ /*
+@@ -564,7 +634,7 @@ static int fallback_migrate_page(struct address_space *mapping,
+  *  == 0 - success
+  */
+ static int move_to_new_page(struct page *newpage, struct page *page,
+-					int remap_swapcache, bool sync)
++				int remap_swapcache, enum migrate_mode mode)
+ {
+ 	struct address_space *mapping;
+ 	int rc;
+@@ -585,29 +655,18 @@ static int move_to_new_page(struct page *newpage, struct page *page,
+ 
+ 	mapping = page_mapping(page);
+ 	if (!mapping)
+-		rc = migrate_page(mapping, newpage, page);
+-	else {
++		rc = migrate_page(mapping, newpage, page, mode);
++	else if (mapping->a_ops->migratepage)
+ 		/*
+-		 * Do not writeback pages if !sync and migratepage is
+-		 * not pointing to migrate_page() which is nonblocking
+-		 * (swapcache/tmpfs uses migratepage = migrate_page).
++		 * Most pages have a mapping and most filesystems provide a
++		 * migratepage callback. Anonymous pages are part of swap
++		 * space which also has its own migratepage callback. This
++		 * is the most common path for page migration.
+ 		 */
+-		if (PageDirty(page) && !sync &&
+-		    mapping->a_ops->migratepage != migrate_page)
+-			rc = -EBUSY;
+-		else if (mapping->a_ops->migratepage)
+-			/*
+-			 * Most pages have a mapping and most filesystems
+-			 * should provide a migration function. Anonymous
+-			 * pages are part of swap space which also has its
+-			 * own migration function. This is the most common
+-			 * path for page migration.
+-			 */
+-			rc = mapping->a_ops->migratepage(mapping,
+-							newpage, page);
+-		else
+-			rc = fallback_migrate_page(mapping, newpage, page);
+-	}
++		rc = mapping->a_ops->migratepage(mapping,
++						newpage, page, mode);
++	else
++		rc = fallback_migrate_page(mapping, newpage, page, mode);
+ 
+ 	if (rc) {
+ 		newpage->mapping = NULL;
+@@ -622,7 +681,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
+ }
+ 
+ static int __unmap_and_move(struct page *page, struct page *newpage,
+-				int force, bool offlining, bool sync)
++			int force, bool offlining, enum migrate_mode mode)
+ {
+ 	int rc = -EAGAIN;
+ 	int remap_swapcache = 1;
+@@ -631,7 +690,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
+ 	struct anon_vma *anon_vma = NULL;
+ 
+ 	if (!trylock_page(page)) {
+-		if (!force || !sync)
++		if (!force || mode == MIGRATE_ASYNC)
+ 			goto out;
+ 
+ 		/*
+@@ -677,10 +736,12 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
+ 
+ 	if (PageWriteback(page)) {
+ 		/*
+-		 * For !sync, there is no point retrying as the retry loop
+-		 * is expected to be too short for PageWriteback to be cleared
++		 * Only in the case of a full syncronous migration is it
++		 * necessary to wait for PageWriteback. In the async case,
++		 * the retry loop is too short and in the sync-light case,
++		 * the overhead of stalling is too much
+ 		 */
+-		if (!sync) {
++		if (mode != MIGRATE_SYNC) {
+ 			rc = -EBUSY;
+ 			goto uncharge;
+ 		}
+@@ -751,7 +812,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
+ 
+ skip_unmap:
+ 	if (!page_mapped(page))
+-		rc = move_to_new_page(newpage, page, remap_swapcache, sync);
++		rc = move_to_new_page(newpage, page, remap_swapcache, mode);
+ 
+ 	if (rc && remap_swapcache)
+ 		remove_migration_ptes(page, page);
+@@ -774,7 +835,8 @@ out:
+  * to the newly allocated page in newpage.
+  */
+ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
+-			struct page *page, int force, bool offlining, bool sync)
++			struct page *page, int force, bool offlining,
++			enum migrate_mode mode)
+ {
+ 	int rc = 0;
+ 	int *result = NULL;
+@@ -792,7 +854,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
+ 		if (unlikely(split_huge_page(page)))
+ 			goto out;
+ 
+-	rc = __unmap_and_move(page, newpage, force, offlining, sync);
++	rc = __unmap_and_move(page, newpage, force, offlining, mode);
+ out:
+ 	if (rc != -EAGAIN) {
+ 		/*
+@@ -840,7 +902,8 @@ out:
+  */
+ static int unmap_and_move_huge_page(new_page_t get_new_page,
+ 				unsigned long private, struct page *hpage,
+-				int force, bool offlining, bool sync)
++				int force, bool offlining,
++				enum migrate_mode mode)
+ {
+ 	int rc = 0;
+ 	int *result = NULL;
+@@ -853,7 +916,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
+ 	rc = -EAGAIN;
+ 
+ 	if (!trylock_page(hpage)) {
+-		if (!force || !sync)
++		if (!force || mode != MIGRATE_SYNC)
+ 			goto out;
+ 		lock_page(hpage);
+ 	}
+@@ -864,7 +927,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
+ 	try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
+ 
+ 	if (!page_mapped(hpage))
+-		rc = move_to_new_page(new_hpage, hpage, 1, sync);
++		rc = move_to_new_page(new_hpage, hpage, 1, mode);
+ 
+ 	if (rc)
+ 		remove_migration_ptes(hpage, hpage);
+@@ -907,7 +970,7 @@ out:
+  */
+ int migrate_pages(struct list_head *from,
+ 		new_page_t get_new_page, unsigned long private, bool offlining,
+-		bool sync)
++		enum migrate_mode mode)
+ {
+ 	int retry = 1;
+ 	int nr_failed = 0;
+@@ -928,7 +991,7 @@ int migrate_pages(struct list_head *from,
+ 
+ 			rc = unmap_and_move(get_new_page, private,
+ 						page, pass > 2, offlining,
+-						sync);
++						mode);
+ 
+ 			switch(rc) {
+ 			case -ENOMEM:
+@@ -958,7 +1021,7 @@ out:
+ 
+ int migrate_huge_pages(struct list_head *from,
+ 		new_page_t get_new_page, unsigned long private, bool offlining,
+-		bool sync)
++		enum migrate_mode mode)
+ {
+ 	int retry = 1;
+ 	int nr_failed = 0;
+@@ -975,7 +1038,7 @@ int migrate_huge_pages(struct list_head *from,
+ 
+ 			rc = unmap_and_move_huge_page(get_new_page,
+ 					private, page, pass > 2, offlining,
+-					sync);
++					mode);
+ 
+ 			switch(rc) {
+ 			case -ENOMEM:
+@@ -1104,7 +1167,7 @@ set_status:
+ 	err = 0;
+ 	if (!list_empty(&pagelist)) {
+ 		err = migrate_pages(&pagelist, new_page_node,
+-				(unsigned long)pm, 0, true);
++				(unsigned long)pm, 0, MIGRATE_SYNC);
+ 		if (err)
+ 			putback_lru_pages(&pagelist);
+ 	}
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 485be89..065dbe8 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -1886,14 +1886,20 @@ static struct page *
+ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
+ 	struct zonelist *zonelist, enum zone_type high_zoneidx,
+ 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
+-	int migratetype, unsigned long *did_some_progress,
+-	bool sync_migration)
++	int migratetype, bool sync_migration,
++	bool *deferred_compaction,
++	unsigned long *did_some_progress)
+ {
+ 	struct page *page;
+ 
+-	if (!order || compaction_deferred(preferred_zone))
++	if (!order)
+ 		return NULL;
+ 
++	if (compaction_deferred(preferred_zone)) {
++		*deferred_compaction = true;
++		return NULL;
++	}
++
+ 	current->flags |= PF_MEMALLOC;
+ 	*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
+ 						nodemask, sync_migration);
+@@ -1921,7 +1927,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
+ 		 * but not enough to satisfy watermarks.
+ 		 */
+ 		count_vm_event(COMPACTFAIL);
+-		defer_compaction(preferred_zone);
++
++		/*
++		 * As async compaction considers a subset of pageblocks, only
++		 * defer if the failure was a sync compaction failure.
++		 */
++		if (sync_migration)
++			defer_compaction(preferred_zone);
+ 
+ 		cond_resched();
+ 	}
+@@ -1933,8 +1945,9 @@ static inline struct page *
+ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
+ 	struct zonelist *zonelist, enum zone_type high_zoneidx,
+ 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
+-	int migratetype, unsigned long *did_some_progress,
+-	bool sync_migration)
++	int migratetype, bool sync_migration,
++	bool *deferred_compaction,
++	unsigned long *did_some_progress)
+ {
+ 	return NULL;
+ }
+@@ -2084,6 +2097,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
+ 	unsigned long pages_reclaimed = 0;
+ 	unsigned long did_some_progress;
+ 	bool sync_migration = false;
++	bool deferred_compaction = false;
+ 
+ 	/*
+ 	 * In the slowpath, we sanity check order to avoid ever trying to
+@@ -2164,12 +2178,22 @@ rebalance:
+ 					zonelist, high_zoneidx,
+ 					nodemask,
+ 					alloc_flags, preferred_zone,
+-					migratetype, &did_some_progress,
+-					sync_migration);
++					migratetype, sync_migration,
++					&deferred_compaction,
++					&did_some_progress);
+ 	if (page)
+ 		goto got_pg;
+ 	sync_migration = true;
+ 
++	/*
++	 * If compaction is deferred for high-order allocations, it is because
++	 * sync compaction recently failed. In this is the case and the caller
++	 * has requested the system not be heavily disrupted, fail the
++	 * allocation now instead of entering direct reclaim
++	 */
++	if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD))
++		goto nopage;
++
+ 	/* Try direct reclaim and then allocating */
+ 	page = __alloc_pages_direct_reclaim(gfp_mask, order,
+ 					zonelist, high_zoneidx,
+@@ -2232,8 +2256,9 @@ rebalance:
+ 					zonelist, high_zoneidx,
+ 					nodemask,
+ 					alloc_flags, preferred_zone,
+-					migratetype, &did_some_progress,
+-					sync_migration);
++					migratetype, sync_migration,
++					&deferred_compaction,
++					&did_some_progress);
+ 		if (page)
+ 			goto got_pg;
+ 	}
+@@ -2257,8 +2282,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
+ {
+ 	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
+ 	struct zone *preferred_zone;
+-	struct page *page;
++	struct page *page = NULL;
+ 	int migratetype = allocflags_to_migratetype(gfp_mask);
++	unsigned int cpuset_mems_cookie;
+ 
+ 	gfp_mask &= gfp_allowed_mask;
+ 
+@@ -2277,15 +2303,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
+ 	if (unlikely(!zonelist->_zonerefs->zone))
+ 		return NULL;
+ 
+-	get_mems_allowed();
++retry_cpuset:
++	cpuset_mems_cookie = get_mems_allowed();
++
+ 	/* The preferred zone is used for statistics later */
+ 	first_zones_zonelist(zonelist, high_zoneidx,
+ 				nodemask ? : &cpuset_current_mems_allowed,
+ 				&preferred_zone);
+-	if (!preferred_zone) {
+-		put_mems_allowed();
+-		return NULL;
+-	}
++	if (!preferred_zone)
++		goto out;
+ 
+ 	/* First allocation attempt */
+ 	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
+@@ -2295,9 +2321,19 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
+ 		page = __alloc_pages_slowpath(gfp_mask, order,
+ 				zonelist, high_zoneidx, nodemask,
+ 				preferred_zone, migratetype);
+-	put_mems_allowed();
+ 
+ 	trace_mm_page_alloc(page, order, gfp_mask, migratetype);
++
++out:
++	/*
++	 * When updating a task's mems_allowed, it is possible to race with
++	 * parallel threads in such a way that an allocation can fail while
++	 * the mask is being updated. If a page allocation is about to fail,
++	 * check if the cpuset changed during allocation and if so, retry.
++	 */
++	if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
++		goto retry_cpuset;
++
+ 	return page;
+ }
+ EXPORT_SYMBOL(__alloc_pages_nodemask);
+@@ -2521,13 +2557,15 @@ void si_meminfo_node(struct sysinfo *val, int nid)
+ bool skip_free_areas_node(unsigned int flags, int nid)
+ {
+ 	bool ret = false;
++	unsigned int cpuset_mems_cookie;
+ 
+ 	if (!(flags & SHOW_MEM_FILTER_NODES))
+ 		goto out;
+ 
+-	get_mems_allowed();
+-	ret = !node_isset(nid, cpuset_current_mems_allowed);
+-	put_mems_allowed();
++	do {
++		cpuset_mems_cookie = get_mems_allowed();
++		ret = !node_isset(nid, cpuset_current_mems_allowed);
++	} while (!put_mems_allowed(cpuset_mems_cookie));
+ out:
+ 	return ret;
+ }
+@@ -3407,25 +3445,33 @@ static void setup_zone_migrate_reserve(struct zone *zone)
+ 		if (page_to_nid(page) != zone_to_nid(zone))
+ 			continue;
+ 
+-		/* Blocks with reserved pages will never free, skip them. */
+-		block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
+-		if (pageblock_is_reserved(pfn, block_end_pfn))
+-			continue;
+-
+ 		block_migratetype = get_pageblock_migratetype(page);
+ 
+-		/* If this block is reserved, account for it */
+-		if (reserve > 0 && block_migratetype == MIGRATE_RESERVE) {
+-			reserve--;
+-			continue;
+-		}
++		/* Only test what is necessary when the reserves are not met */
++		if (reserve > 0) {
++			/*
++			 * Blocks with reserved pages will never free, skip
++			 * them.
++			 */
++			block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
++			if (pageblock_is_reserved(pfn, block_end_pfn))
++				continue;
+ 
+-		/* Suitable for reserving if this block is movable */
+-		if (reserve > 0 && block_migratetype == MIGRATE_MOVABLE) {
+-			set_pageblock_migratetype(page, MIGRATE_RESERVE);
+-			move_freepages_block(zone, page, MIGRATE_RESERVE);
+-			reserve--;
+-			continue;
++			/* If this block is reserved, account for it */
++			if (block_migratetype == MIGRATE_RESERVE) {
++				reserve--;
++				continue;
++			}
++
++			/* Suitable for reserving if this block is movable */
++			if (block_migratetype == MIGRATE_MOVABLE) {
++				set_pageblock_migratetype(page,
++							MIGRATE_RESERVE);
++				move_freepages_block(zone, page,
++							MIGRATE_RESERVE);
++				reserve--;
++				continue;
++			}
+ 		}
+ 
+ 		/*
+diff --git a/mm/slab.c b/mm/slab.c
+index 83311c9a..cd3ab93 100644
+--- a/mm/slab.c
++++ b/mm/slab.c
+@@ -3267,12 +3267,10 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
+ 	if (in_interrupt() || (flags & __GFP_THISNODE))
+ 		return NULL;
+ 	nid_alloc = nid_here = numa_mem_id();
+-	get_mems_allowed();
+ 	if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
+ 		nid_alloc = cpuset_slab_spread_node();
+ 	else if (current->mempolicy)
+ 		nid_alloc = slab_node(current->mempolicy);
+-	put_mems_allowed();
+ 	if (nid_alloc != nid_here)
+ 		return ____cache_alloc_node(cachep, flags, nid_alloc);
+ 	return NULL;
+@@ -3295,14 +3293,17 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
+ 	enum zone_type high_zoneidx = gfp_zone(flags);
+ 	void *obj = NULL;
+ 	int nid;
++	unsigned int cpuset_mems_cookie;
+ 
+ 	if (flags & __GFP_THISNODE)
+ 		return NULL;
+ 
+-	get_mems_allowed();
+-	zonelist = node_zonelist(slab_node(current->mempolicy), flags);
+ 	local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
+ 
++retry_cpuset:
++	cpuset_mems_cookie = get_mems_allowed();
++	zonelist = node_zonelist(slab_node(current->mempolicy), flags);
++
+ retry:
+ 	/*
+ 	 * Look through allowed nodes for objects available
+@@ -3355,7 +3356,9 @@ retry:
+ 			}
+ 		}
+ 	}
+-	put_mems_allowed();
++
++	if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !obj))
++		goto retry_cpuset;
+ 	return obj;
+ }
+ 
+diff --git a/mm/slub.c b/mm/slub.c
+index af47188..5710788 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1582,6 +1582,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags,
+ 	struct zone *zone;
+ 	enum zone_type high_zoneidx = gfp_zone(flags);
+ 	void *object;
++	unsigned int cpuset_mems_cookie;
+ 
+ 	/*
+ 	 * The defrag ratio allows a configuration of the tradeoffs between
+@@ -1605,23 +1606,32 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags,
+ 			get_cycles() % 1024 > s->remote_node_defrag_ratio)
+ 		return NULL;
+ 
+-	get_mems_allowed();
+-	zonelist = node_zonelist(slab_node(current->mempolicy), flags);
+-	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+-		struct kmem_cache_node *n;
+-
+-		n = get_node(s, zone_to_nid(zone));
+-
+-		if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
+-				n->nr_partial > s->min_partial) {
+-			object = get_partial_node(s, n, c);
+-			if (object) {
+-				put_mems_allowed();
+-				return object;
++	do {
++		cpuset_mems_cookie = get_mems_allowed();
++		zonelist = node_zonelist(slab_node(current->mempolicy), flags);
++		for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
++			struct kmem_cache_node *n;
++
++			n = get_node(s, zone_to_nid(zone));
++
++			if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
++					n->nr_partial > s->min_partial) {
++				object = get_partial_node(s, n, c);
++				if (object) {
++					/*
++					 * Return the object even if
++					 * put_mems_allowed indicated that
++					 * the cpuset mems_allowed was
++					 * updated in parallel. It's a
++					 * harmless race between the alloc
++					 * and the cpuset update.
++					 */
++					put_mems_allowed(cpuset_mems_cookie);
++					return object;
++				}
+ 			}
+ 		}
+-	}
+-	put_mems_allowed();
++	} while (!put_mems_allowed(cpuset_mems_cookie));
+ #endif
+ 	return NULL;
+ }
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 8342119..48febd7 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -715,7 +715,13 @@ static enum page_references page_check_references(struct page *page,
+ 		 */
+ 		SetPageReferenced(page);
+ 
+-		if (referenced_page)
++		if (referenced_page || referenced_ptes > 1)
++			return PAGEREF_ACTIVATE;
++
++		/*
++		 * Activate file-backed executable pages after first usage.
++		 */
++		if (vm_flags & VM_EXEC)
+ 			return PAGEREF_ACTIVATE;
+ 
+ 		return PAGEREF_KEEP;
+@@ -1061,8 +1067,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
+ 
+ 	ret = -EBUSY;
+ 
+-	if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page)))
+-		return ret;
++	/*
++	 * To minimise LRU disruption, the caller can indicate that it only
++	 * wants to isolate pages it will be able to operate on without
++	 * blocking - clean pages for the most part.
++	 *
++	 * ISOLATE_CLEAN means that only clean pages should be isolated. This
++	 * is used by reclaim when it is cannot write to backing storage
++	 *
++	 * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
++	 * that it is possible to migrate without blocking
++	 */
++	if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
++		/* All the caller can do on PageWriteback is block */
++		if (PageWriteback(page))
++			return ret;
++
++		if (PageDirty(page)) {
++			struct address_space *mapping;
++
++			/* ISOLATE_CLEAN means only clean pages */
++			if (mode & ISOLATE_CLEAN)
++				return ret;
++
++			/*
++			 * Only pages without mappings or that have a
++			 * ->migratepage callback are possible to migrate
++			 * without blocking
++			 */
++			mapping = page_mapping(page);
++			if (mapping && !mapping->a_ops->migratepage)
++				return ret;
++		}
++	}
+ 
+ 	if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
+ 		return ret;
+@@ -1178,7 +1215,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+ 			 * anon page which don't already have a swap slot is
+ 			 * pointless.
+ 			 */
+-			if (nr_swap_pages <= 0 && PageAnon(cursor_page) &&
++			if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) &&
+ 			    !PageSwapCache(cursor_page))
+ 				break;
+ 
+@@ -1874,7 +1911,8 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
+ 	 * latencies, so it's better to scan a minimum amount there as
+ 	 * well.
+ 	 */
+-	if (scanning_global_lru(sc) && current_is_kswapd())
++	if (scanning_global_lru(sc) && current_is_kswapd() &&
++	    zone->all_unreclaimable)
+ 		force_scan = true;
+ 	if (!scanning_global_lru(sc))
+ 		force_scan = true;
+@@ -2012,8 +2050,9 @@ static inline bool should_continue_reclaim(struct zone *zone,
+ 	 * inactive lists are large enough, continue reclaiming
+ 	 */
+ 	pages_for_compaction = (2UL << sc->order);
+-	inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON) +
+-				zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
++	inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
++	if (nr_swap_pages > 0)
++		inactive_lru_pages += zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
+ 	if (sc->nr_reclaimed < pages_for_compaction &&
+ 			inactive_lru_pages > pages_for_compaction)
+ 		return true;
+@@ -2088,6 +2127,42 @@ restart:
+ 	throttle_vm_writeout(sc->gfp_mask);
+ }
+ 
++/* Returns true if compaction should go ahead for a high-order request */
++static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
++{
++	unsigned long balance_gap, watermark;
++	bool watermark_ok;
++
++	/* Do not consider compaction for orders reclaim is meant to satisfy */
++	if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
++		return false;
++
++	/*
++	 * Compaction takes time to run and there are potentially other
++	 * callers using the pages just freed. Continue reclaiming until
++	 * there is a buffer of free pages available to give compaction
++	 * a reasonable chance of completing and allocating the page
++	 */
++	balance_gap = min(low_wmark_pages(zone),
++		(zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
++			KSWAPD_ZONE_BALANCE_GAP_RATIO);
++	watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
++	watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
++
++	/*
++	 * If compaction is deferred, reclaim up to a point where
++	 * compaction will have a chance of success when re-enabled
++	 */
++	if (compaction_deferred(zone))
++		return watermark_ok;
++
++	/* If compaction is not ready to start, keep reclaiming */
++	if (!compaction_suitable(zone, sc->order))
++		return false;
++
++	return watermark_ok;
++}
++
+ /*
+  * This is the direct reclaim path, for page-allocating processes.  We only
+  * try to reclaim pages from zones which will satisfy the caller's allocation
+@@ -2105,8 +2180,9 @@ restart:
+  * scan then give up on it.
+  *
+  * This function returns true if a zone is being reclaimed for a costly
+- * high-order allocation and compaction is either ready to begin or deferred.
+- * This indicates to the caller that it should retry the allocation or fail.
++ * high-order allocation and compaction is ready to begin. This indicates to
++ * the caller that it should consider retrying the allocation instead of
++ * further reclaim.
+  */
+ static bool shrink_zones(int priority, struct zonelist *zonelist,
+ 					struct scan_control *sc)
+@@ -2115,7 +2191,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
+ 	struct zone *zone;
+ 	unsigned long nr_soft_reclaimed;
+ 	unsigned long nr_soft_scanned;
+-	bool should_abort_reclaim = false;
++	bool aborted_reclaim = false;
+ 
+ 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
+ 					gfp_zone(sc->gfp_mask), sc->nodemask) {
+@@ -2140,10 +2216,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
+ 				 * noticable problem, like transparent huge page
+ 				 * allocations.
+ 				 */
+-				if (sc->order > PAGE_ALLOC_COSTLY_ORDER &&
+-					(compaction_suitable(zone, sc->order) ||
+-					 compaction_deferred(zone))) {
+-					should_abort_reclaim = true;
++				if (compaction_ready(zone, sc)) {
++					aborted_reclaim = true;
+ 					continue;
+ 				}
+ 			}
+@@ -2165,7 +2239,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
+ 		shrink_zone(priority, zone, sc);
+ 	}
+ 
+-	return should_abort_reclaim;
++	return aborted_reclaim;
+ }
+ 
+ static bool zone_reclaimable(struct zone *zone)
+@@ -2219,8 +2293,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
+ 	struct zoneref *z;
+ 	struct zone *zone;
+ 	unsigned long writeback_threshold;
++	bool aborted_reclaim;
+ 
+-	get_mems_allowed();
+ 	delayacct_freepages_start();
+ 
+ 	if (scanning_global_lru(sc))
+@@ -2230,8 +2304,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
+ 		sc->nr_scanned = 0;
+ 		if (!priority)
+ 			disable_swap_token(sc->mem_cgroup);
+-		if (shrink_zones(priority, zonelist, sc))
+-			break;
++		aborted_reclaim = shrink_zones(priority, zonelist, sc);
+ 
+ 		/*
+ 		 * Don't shrink slabs when reclaiming memory from
+@@ -2285,7 +2358,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
+ 
+ out:
+ 	delayacct_freepages_end();
+-	put_mems_allowed();
+ 
+ 	if (sc->nr_reclaimed)
+ 		return sc->nr_reclaimed;
+@@ -2298,6 +2370,10 @@ out:
+ 	if (oom_killer_disabled)
+ 		return 0;
+ 
++	/* Aborted reclaim to try compaction? don't OOM, then */
++	if (aborted_reclaim)
++		return 1;
++
+ 	/* top priority shrink_zones still had more to do? don't OOM, then */
+ 	if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc))
+ 		return 1;
+diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
+index c505fd5..c119f33 100644
+--- a/sound/pci/hda/patch_hdmi.c
++++ b/sound/pci/hda/patch_hdmi.c
+@@ -868,7 +868,6 @@ static int hdmi_pcm_open(struct hda_pcm_stream *hinfo,
+ 	struct hdmi_spec_per_pin *per_pin;
+ 	struct hdmi_eld *eld;
+ 	struct hdmi_spec_per_cvt *per_cvt = NULL;
+-	int pinctl;
+ 
+ 	/* Validate hinfo */
+ 	pin_idx = hinfo_to_pin_index(spec, hinfo);
+@@ -904,11 +903,6 @@ static int hdmi_pcm_open(struct hda_pcm_stream *hinfo,
+ 	snd_hda_codec_write(codec, per_pin->pin_nid, 0,
+ 			    AC_VERB_SET_CONNECT_SEL,
+ 			    mux_idx);
+-	pinctl = snd_hda_codec_read(codec, per_pin->pin_nid, 0,
+-				    AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+-	snd_hda_codec_write(codec, per_pin->pin_nid, 0,
+-			    AC_VERB_SET_PIN_WIDGET_CONTROL,
+-			    pinctl | PIN_OUT);
+ 	snd_hda_spdif_ctls_assign(codec, pin_idx, per_cvt->cvt_nid);
+ 
+ 	/* Initially set the converter's capabilities */
+@@ -1147,11 +1141,17 @@ static int generic_hdmi_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
+ 	struct hdmi_spec *spec = codec->spec;
+ 	int pin_idx = hinfo_to_pin_index(spec, hinfo);
+ 	hda_nid_t pin_nid = spec->pins[pin_idx].pin_nid;
++	int pinctl;
+ 
+ 	hdmi_set_channel_count(codec, cvt_nid, substream->runtime->channels);
+ 
+ 	hdmi_setup_audio_infoframe(codec, pin_idx, substream);
+ 
++	pinctl = snd_hda_codec_read(codec, pin_nid, 0,
++				    AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
++	snd_hda_codec_write(codec, pin_nid, 0,
++			    AC_VERB_SET_PIN_WIDGET_CONTROL, pinctl | PIN_OUT);
++
+ 	return hdmi_setup_stream(codec, cvt_nid, pin_nid, stream_tag, format);
+ }
+ 
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 5f096a5..191fd78 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -5989,6 +5989,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
+ 	{ .id = 0x10ec0275, .name = "ALC275", .patch = patch_alc269 },
+ 	{ .id = 0x10ec0276, .name = "ALC276", .patch = patch_alc269 },
+ 	{ .id = 0x10ec0280, .name = "ALC280", .patch = patch_alc269 },
++	{ .id = 0x10ec0282, .name = "ALC282", .patch = patch_alc269 },
+ 	{ .id = 0x10ec0861, .rev = 0x100340, .name = "ALC660",
+ 	  .patch = patch_alc861 },
+ 	{ .id = 0x10ec0660, .name = "ALC660-VD", .patch = patch_alc861vd },
+diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
+index 90e93bf..0dc441c 100644
+--- a/sound/soc/soc-dapm.c
++++ b/sound/soc/soc-dapm.c
+@@ -1381,7 +1381,15 @@ static int dapm_power_widgets(struct snd_soc_dapm_context *dapm, int event)
+ 	}
+ 
+ 	list_for_each_entry(w, &card->widgets, list) {
+-		list_del_init(&w->dirty);
++		switch (w->id) {
++		case snd_soc_dapm_pre:
++		case snd_soc_dapm_post:
++			/* These widgets always need to be powered */
++			break;
++		default:
++			list_del_init(&w->dirty);
++			break;
++		}
+ 
+ 		if (w->power) {
+ 			d = w->dapm;
diff --git a/3.2.34/bump/1025_linux-3.2.26.patch b/3.2.34/bump/1025_linux-3.2.26.patch
new file mode 100644
index 0000000..44065b9
--- /dev/null
+++ b/3.2.34/bump/1025_linux-3.2.26.patch
@@ -0,0 +1,238 @@
+diff --git a/Makefile b/Makefile
+index e13e4e7..fa5acc83 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 3
+ PATCHLEVEL = 2
+-SUBLEVEL = 25
++SUBLEVEL = 26
+ EXTRAVERSION =
+ NAME = Saber-toothed Squirrel
+ 
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index bb3ee36..f7c89e2 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -99,7 +99,6 @@ struct cpuinfo_x86 {
+ 	u16			apicid;
+ 	u16			initial_apicid;
+ 	u16			x86_clflush_size;
+-#ifdef CONFIG_SMP
+ 	/* number of cores as seen by the OS: */
+ 	u16			booted_cores;
+ 	/* Physical processor id: */
+@@ -110,7 +109,6 @@ struct cpuinfo_x86 {
+ 	u8			compute_unit_id;
+ 	/* Index into per_cpu list: */
+ 	u16			cpu_index;
+-#endif
+ 	u32			microcode;
+ } __attribute__((__aligned__(SMP_CACHE_BYTES)));
+ 
+diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
+index bae1efe..be16854 100644
+--- a/arch/x86/kernel/amd_nb.c
++++ b/arch/x86/kernel/amd_nb.c
+@@ -154,16 +154,14 @@ int amd_get_subcaches(int cpu)
+ {
+ 	struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
+ 	unsigned int mask;
+-	int cuid = 0;
++	int cuid;
+ 
+ 	if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ 		return 0;
+ 
+ 	pci_read_config_dword(link, 0x1d4, &mask);
+ 
+-#ifdef CONFIG_SMP
+ 	cuid = cpu_data(cpu).compute_unit_id;
+-#endif
+ 	return (mask >> (4 * cuid)) & 0xf;
+ }
+ 
+@@ -172,7 +170,7 @@ int amd_set_subcaches(int cpu, int mask)
+ 	static unsigned int reset, ban;
+ 	struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
+ 	unsigned int reg;
+-	int cuid = 0;
++	int cuid;
+ 
+ 	if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf)
+ 		return -EINVAL;
+@@ -190,9 +188,7 @@ int amd_set_subcaches(int cpu, int mask)
+ 		pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
+ 	}
+ 
+-#ifdef CONFIG_SMP
+ 	cuid = cpu_data(cpu).compute_unit_id;
+-#endif
+ 	mask <<= 4 * cuid;
+ 	mask |= (0xf ^ (1 << cuid)) << 26;
+ 
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index 3524e1f..ff8557e 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -148,7 +148,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
+ 
+ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
+ {
+-#ifdef CONFIG_SMP
+ 	/* calling is from identify_secondary_cpu() ? */
+ 	if (!c->cpu_index)
+ 		return;
+@@ -192,7 +191,6 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
+ 
+ valid_k7:
+ 	;
+-#endif
+ }
+ 
+ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index aa003b1..ca93cc7 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -676,9 +676,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
+ 	if (this_cpu->c_early_init)
+ 		this_cpu->c_early_init(c);
+ 
+-#ifdef CONFIG_SMP
+ 	c->cpu_index = 0;
+-#endif
+ 	filter_cpuid_features(c, false);
+ 
+ 	setup_smep(c);
+@@ -764,10 +762,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
+ 		c->apicid = c->initial_apicid;
+ # endif
+ #endif
+-
+-#ifdef CONFIG_X86_HT
+ 		c->phys_proc_id = c->initial_apicid;
+-#endif
+ 	}
+ 
+ 	setup_smep(c);
+diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
+index 5231312..3e6ff6c 100644
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -181,7 +181,6 @@ static void __cpuinit trap_init_f00f_bug(void)
+ 
+ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
+ {
+-#ifdef CONFIG_SMP
+ 	/* calling is from identify_secondary_cpu() ? */
+ 	if (!c->cpu_index)
+ 		return;
+@@ -198,7 +197,6 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
+ 		WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
+ 				    "with B stepping processors.\n");
+ 	}
+-#endif
+ }
+ 
+ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
+diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
+index b0f1271..3b67877 100644
+--- a/arch/x86/kernel/cpu/mcheck/mce.c
++++ b/arch/x86/kernel/cpu/mcheck/mce.c
+@@ -119,9 +119,7 @@ void mce_setup(struct mce *m)
+ 	m->time = get_seconds();
+ 	m->cpuvendor = boot_cpu_data.x86_vendor;
+ 	m->cpuid = cpuid_eax(1);
+-#ifdef CONFIG_SMP
+ 	m->socketid = cpu_data(m->extcpu).phys_proc_id;
+-#endif
+ 	m->apicid = cpu_data(m->extcpu).initial_apicid;
+ 	rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
+ }
+diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
+index 445a61c..d4444be 100644
+--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
++++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
+@@ -65,11 +65,9 @@ struct threshold_bank {
+ };
+ static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
+ 
+-#ifdef CONFIG_SMP
+ static unsigned char shared_bank[NR_BANKS] = {
+ 	0, 0, 0, 0, 1
+ };
+-#endif
+ 
+ static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
+ 
+@@ -227,10 +225,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
+ 
+ 			if (!block)
+ 				per_cpu(bank_map, cpu) |= (1 << bank);
+-#ifdef CONFIG_SMP
++
+ 			if (shared_bank[bank] && c->cpu_core_id)
+ 				break;
+-#endif
+ 
+ 			memset(&b, 0, sizeof(b));
+ 			b.cpu			= cpu;
+diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
+index 14b2314..8022c66 100644
+--- a/arch/x86/kernel/cpu/proc.c
++++ b/arch/x86/kernel/cpu/proc.c
+@@ -64,12 +64,10 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
+ static int show_cpuinfo(struct seq_file *m, void *v)
+ {
+ 	struct cpuinfo_x86 *c = v;
+-	unsigned int cpu = 0;
++	unsigned int cpu;
+ 	int i;
+ 
+-#ifdef CONFIG_SMP
+ 	cpu = c->cpu_index;
+-#endif
+ 	seq_printf(m, "processor\t: %u\n"
+ 		   "vendor_id\t: %s\n"
+ 		   "cpu family\t: %d\n"
+diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
+index 18a1293..0db57b5 100644
+--- a/drivers/edac/sb_edac.c
++++ b/drivers/edac/sb_edac.c
+@@ -1609,11 +1609,9 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
+ 		mce->cpuvendor, mce->cpuid, mce->time,
+ 		mce->socketid, mce->apicid);
+ 
+-#ifdef CONFIG_SMP
+ 	/* Only handle if it is the right mc controller */
+ 	if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc)
+ 		return NOTIFY_DONE;
+-#endif
+ 
+ 	smp_rmb();
+ 	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
+diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
+index 0790c98..19b4412 100644
+--- a/drivers/hwmon/coretemp.c
++++ b/drivers/hwmon/coretemp.c
+@@ -57,16 +57,15 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
+ #define TOTAL_ATTRS		(MAX_CORE_ATTRS + 1)
+ #define MAX_CORE_DATA		(NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
+ 
+-#ifdef CONFIG_SMP
+ #define TO_PHYS_ID(cpu)		cpu_data(cpu).phys_proc_id
+ #define TO_CORE_ID(cpu)		cpu_data(cpu).cpu_core_id
++#define TO_ATTR_NO(cpu)		(TO_CORE_ID(cpu) + BASE_SYSFS_ATTR_NO)
++
++#ifdef CONFIG_SMP
+ #define for_each_sibling(i, cpu)	for_each_cpu(i, cpu_sibling_mask(cpu))
+ #else
+-#define TO_PHYS_ID(cpu)		(cpu)
+-#define TO_CORE_ID(cpu)		(cpu)
+ #define for_each_sibling(i, cpu)	for (i = 0; false; )
+ #endif
+-#define TO_ATTR_NO(cpu)		(TO_CORE_ID(cpu) + BASE_SYSFS_ATTR_NO)
+ 
+ /*
+  * Per-Core Temperature Data
diff --git a/3.2.34/bump/1026_linux-3.2.27.patch b/3.2.34/bump/1026_linux-3.2.27.patch
new file mode 100644
index 0000000..5878eb4
--- /dev/null
+++ b/3.2.34/bump/1026_linux-3.2.27.patch
@@ -0,0 +1,3188 @@
+diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
+index edad99a..69820b2 100644
+--- a/Documentation/sound/alsa/HD-Audio-Models.txt
++++ b/Documentation/sound/alsa/HD-Audio-Models.txt
+@@ -60,10 +60,11 @@ ALC267/268
+ ==========
+   N/A
+ 
+-ALC269
++ALC269/270/275/276/280/282
+ ======
+   laptop-amic	Laptops with analog-mic input
+   laptop-dmic	Laptops with digital-mic input
++  lenovo-dock   Enables docking station I/O for some Lenovos
+ 
+ ALC662/663/272
+ ==============
+diff --git a/Documentation/stable_kernel_rules.txt b/Documentation/stable_kernel_rules.txt
+index e1f856b..22bf11b 100644
+--- a/Documentation/stable_kernel_rules.txt
++++ b/Documentation/stable_kernel_rules.txt
+@@ -1,4 +1,4 @@
+-Everything you ever wanted to know about Linux 2.6 -stable releases.
++Everything you ever wanted to know about Linux -stable releases.
+ 
+ Rules on what kind of patches are accepted, and which ones are not, into the
+ "-stable" tree:
+@@ -41,10 +41,10 @@ Procedure for submitting patches to the -stable tree:
+    cherry-picked than this can be specified in the following format in
+    the sign-off area:
+ 
+-     Cc: <stable@vger.kernel.org> # .32.x: a1f84a3: sched: Check for idle
+-     Cc: <stable@vger.kernel.org> # .32.x: 1b9508f: sched: Rate-limit newidle
+-     Cc: <stable@vger.kernel.org> # .32.x: fd21073: sched: Fix affinity logic
+-     Cc: <stable@vger.kernel.org> # .32.x
++     Cc: <stable@vger.kernel.org> # 3.3.x: a1f84a3: sched: Check for idle
++     Cc: <stable@vger.kernel.org> # 3.3.x: 1b9508f: sched: Rate-limit newidle
++     Cc: <stable@vger.kernel.org> # 3.3.x: fd21073: sched: Fix affinity logic
++     Cc: <stable@vger.kernel.org> # 3.3.x
+     Signed-off-by: Ingo Molnar <mingo@elte.hu>
+ 
+    The tag sequence has the meaning of:
+@@ -78,6 +78,15 @@ Review cycle:
+    security kernel team, and not go through the normal review cycle.
+    Contact the kernel security team for more details on this procedure.
+ 
++Trees:
++
++ - The queues of patches, for both completed versions and in progress
++   versions can be found at:
++	http://git.kernel.org/?p=linux/kernel/git/stable/stable-queue.git
++ - The finalized and tagged releases of all stable kernels can be found
++   in separate branches per version at:
++	http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git
++
+ 
+ Review committee:
+ 
+diff --git a/Makefile b/Makefile
+index fa5acc83..bdf851f 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 3
+ PATCHLEVEL = 2
+-SUBLEVEL = 26
++SUBLEVEL = 27
+ EXTRAVERSION =
+ NAME = Saber-toothed Squirrel
+ 
+diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h
+index 93226cf..b1479fd 100644
+--- a/arch/arm/include/asm/mutex.h
++++ b/arch/arm/include/asm/mutex.h
+@@ -7,121 +7,10 @@
+  */
+ #ifndef _ASM_MUTEX_H
+ #define _ASM_MUTEX_H
+-
+-#if __LINUX_ARM_ARCH__ < 6
+-/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */
+-# include <asm-generic/mutex-xchg.h>
+-#else
+-
+ /*
+- * Attempting to lock a mutex on ARMv6+ can be done with a bastardized
+- * atomic decrement (it is not a reliable atomic decrement but it satisfies
+- * the defined semantics for our purpose, while being smaller and faster
+- * than a real atomic decrement or atomic swap.  The idea is to attempt
+- * decrementing the lock value only once.  If once decremented it isn't zero,
+- * or if its store-back fails due to a dispute on the exclusive store, we
+- * simply bail out immediately through the slow path where the lock will be
+- * reattempted until it succeeds.
++ * On pre-ARMv6 hardware this results in a swp-based implementation,
++ * which is the most efficient. For ARMv6+, we emit a pair of exclusive
++ * accesses instead.
+  */
+-static inline void
+-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
+-{
+-	int __ex_flag, __res;
+-
+-	__asm__ (
+-
+-		"ldrex	%0, [%2]	\n\t"
+-		"sub	%0, %0, #1	\n\t"
+-		"strex	%1, %0, [%2]	"
+-
+-		: "=&r" (__res), "=&r" (__ex_flag)
+-		: "r" (&(count)->counter)
+-		: "cc","memory" );
+-
+-	__res |= __ex_flag;
+-	if (unlikely(__res != 0))
+-		fail_fn(count);
+-}
+-
+-static inline int
+-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+-{
+-	int __ex_flag, __res;
+-
+-	__asm__ (
+-
+-		"ldrex	%0, [%2]	\n\t"
+-		"sub	%0, %0, #1	\n\t"
+-		"strex	%1, %0, [%2]	"
+-
+-		: "=&r" (__res), "=&r" (__ex_flag)
+-		: "r" (&(count)->counter)
+-		: "cc","memory" );
+-
+-	__res |= __ex_flag;
+-	if (unlikely(__res != 0))
+-		__res = fail_fn(count);
+-	return __res;
+-}
+-
+-/*
+- * Same trick is used for the unlock fast path. However the original value,
+- * rather than the result, is used to test for success in order to have
+- * better generated assembly.
+- */
+-static inline void
+-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
+-{
+-	int __ex_flag, __res, __orig;
+-
+-	__asm__ (
+-
+-		"ldrex	%0, [%3]	\n\t"
+-		"add	%1, %0, #1	\n\t"
+-		"strex	%2, %1, [%3]	"
+-
+-		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
+-		: "r" (&(count)->counter)
+-		: "cc","memory" );
+-
+-	__orig |= __ex_flag;
+-	if (unlikely(__orig != 0))
+-		fail_fn(count);
+-}
+-
+-/*
+- * If the unlock was done on a contended lock, or if the unlock simply fails
+- * then the mutex remains locked.
+- */
+-#define __mutex_slowpath_needs_to_unlock()	1
+-
+-/*
+- * For __mutex_fastpath_trylock we use another construct which could be
+- * described as a "single value cmpxchg".
+- *
+- * This provides the needed trylock semantics like cmpxchg would, but it is
+- * lighter and less generic than a true cmpxchg implementation.
+- */
+-static inline int
+-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
+-{
+-	int __ex_flag, __res, __orig;
+-
+-	__asm__ (
+-
+-		"1: ldrex	%0, [%3]	\n\t"
+-		"subs		%1, %0, #1	\n\t"
+-		"strexeq	%2, %1, [%3]	\n\t"
+-		"movlt		%0, #0		\n\t"
+-		"cmpeq		%2, #0		\n\t"
+-		"bgt		1b		"
+-
+-		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
+-		: "r" (&count->counter)
+-		: "cc", "memory" );
+-
+-	return __orig;
+-}
+-
+-#endif
++#include <asm-generic/mutex-xchg.h>
+ #endif
+diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
+index b145f16..ece0996 100644
+--- a/arch/arm/kernel/entry-armv.S
++++ b/arch/arm/kernel/entry-armv.S
+@@ -242,6 +242,19 @@ svc_preempt:
+ 	b	1b
+ #endif
+ 
++__und_fault:
++	@ Correct the PC such that it is pointing at the instruction
++	@ which caused the fault.  If the faulting instruction was ARM
++	@ the PC will be pointing at the next instruction, and have to
++	@ subtract 4.  Otherwise, it is Thumb, and the PC will be
++	@ pointing at the second half of the Thumb instruction.  We
++	@ have to subtract 2.
++	ldr	r2, [r0, #S_PC]
++	sub	r2, r2, r1
++	str	r2, [r0, #S_PC]
++	b	do_undefinstr
++ENDPROC(__und_fault)
++
+ 	.align	5
+ __und_svc:
+ #ifdef CONFIG_KPROBES
+@@ -259,25 +272,32 @@ __und_svc:
+ 	@
+ 	@  r0 - instruction
+ 	@
+-#ifndef	CONFIG_THUMB2_KERNEL
++#ifndef CONFIG_THUMB2_KERNEL
+ 	ldr	r0, [r4, #-4]
+ #else
++	mov	r1, #2
+ 	ldrh	r0, [r4, #-2]			@ Thumb instruction at LR - 2
+ 	cmp	r0, #0xe800			@ 32-bit instruction if xx >= 0
+-	ldrhhs	r9, [r4]			@ bottom 16 bits
+-	orrhs	r0, r9, r0, lsl #16
++	blo	__und_svc_fault
++	ldrh	r9, [r4]			@ bottom 16 bits
++	add	r4, r4, #2
++	str	r4, [sp, #S_PC]
++	orr	r0, r9, r0, lsl #16
+ #endif
+-	adr	r9, BSYM(1f)
++	adr	r9, BSYM(__und_svc_finish)
+ 	mov	r2, r4
+ 	bl	call_fpe
+ 
++	mov	r1, #4				@ PC correction to apply
++__und_svc_fault:
+ 	mov	r0, sp				@ struct pt_regs *regs
+-	bl	do_undefinstr
++	bl	__und_fault
+ 
+ 	@
+ 	@ IRQs off again before pulling preserved data off the stack
+ 	@
+-1:	disable_irq_notrace
++__und_svc_finish:
++	disable_irq_notrace
+ 
+ 	@
+ 	@ restore SPSR and restart the instruction
+@@ -421,25 +441,33 @@ __und_usr:
+ 	mov	r2, r4
+ 	mov	r3, r5
+ 
++	@ r2 = regs->ARM_pc, which is either 2 or 4 bytes ahead of the
++	@      faulting instruction depending on Thumb mode.
++	@ r3 = regs->ARM_cpsr
+ 	@
+-	@ fall through to the emulation code, which returns using r9 if
+-	@ it has emulated the instruction, or the more conventional lr
+-	@ if we are to treat this as a real undefined instruction
+-	@
+-	@  r0 - instruction
++	@ The emulation code returns using r9 if it has emulated the
++	@ instruction, or the more conventional lr if we are to treat
++	@ this as a real undefined instruction
+ 	@
+ 	adr	r9, BSYM(ret_from_exception)
+-	adr	lr, BSYM(__und_usr_unknown)
++
+ 	tst	r3, #PSR_T_BIT			@ Thumb mode?
+-	itet	eq				@ explicit IT needed for the 1f label
+-	subeq	r4, r2, #4			@ ARM instr at LR - 4
+-	subne	r4, r2, #2			@ Thumb instr at LR - 2
+-1:	ldreqt	r0, [r4]
++	bne	__und_usr_thumb
++	sub	r4, r2, #4			@ ARM instr at LR - 4
++1:	ldrt	r0, [r4]
+ #ifdef CONFIG_CPU_ENDIAN_BE8
+-	reveq	r0, r0				@ little endian instruction
++	rev	r0, r0				@ little endian instruction
+ #endif
+-	beq	call_fpe
++	@ r0 = 32-bit ARM instruction which caused the exception
++	@ r2 = PC value for the following instruction (:= regs->ARM_pc)
++	@ r4 = PC value for the faulting instruction
++	@ lr = 32-bit undefined instruction function
++	adr	lr, BSYM(__und_usr_fault_32)
++	b	call_fpe
++
++__und_usr_thumb:
+ 	@ Thumb instruction
++	sub	r4, r2, #2			@ First half of thumb instr at LR - 2
+ #if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
+ /*
+  * Thumb-2 instruction handling.  Note that because pre-v6 and >= v6 platforms
+@@ -453,7 +481,7 @@ __und_usr:
+ 	ldr	r5, .LCcpu_architecture
+ 	ldr	r5, [r5]
+ 	cmp	r5, #CPU_ARCH_ARMv7
+-	blo	__und_usr_unknown
++	blo	__und_usr_fault_16		@ 16bit undefined instruction
+ /*
+  * The following code won't get run unless the running CPU really is v7, so
+  * coding round the lack of ldrht on older arches is pointless.  Temporarily
+@@ -461,15 +489,18 @@ __und_usr:
+  */
+ 	.arch	armv6t2
+ #endif
+-2:
+- ARM(	ldrht	r5, [r4], #2	)
+- THUMB(	ldrht	r5, [r4]	)
+- THUMB(	add	r4, r4, #2	)
++2:	ldrht	r5, [r4]
+ 	cmp	r5, #0xe800			@ 32bit instruction if xx != 0
+-	blo	__und_usr_unknown
+-3:	ldrht	r0, [r4]
++	blo	__und_usr_fault_16		@ 16bit undefined instruction
++3:	ldrht	r0, [r2]
+ 	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
++	str	r2, [sp, #S_PC]			@ it's a 2x16bit instr, update
+ 	orr	r0, r0, r5, lsl #16
++	adr	lr, BSYM(__und_usr_fault_32)
++	@ r0 = the two 16-bit Thumb instructions which caused the exception
++	@ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc)
++	@ r4 = PC value for the first 16-bit Thumb instruction
++	@ lr = 32bit undefined instruction function
+ 
+ #if __LINUX_ARM_ARCH__ < 7
+ /* If the target arch was overridden, change it back: */
+@@ -480,17 +511,13 @@ __und_usr:
+ #endif
+ #endif /* __LINUX_ARM_ARCH__ < 7 */
+ #else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */
+-	b	__und_usr_unknown
++	b	__und_usr_fault_16
+ #endif
+- UNWIND(.fnend		)
++ UNWIND(.fnend)
+ ENDPROC(__und_usr)
+ 
+-	@
+-	@ fallthrough to call_fpe
+-	@
+-
+ /*
+- * The out of line fixup for the ldrt above.
++ * The out of line fixup for the ldrt instructions above.
+  */
+ 	.pushsection .fixup, "ax"
+ 4:	mov	pc, r9
+@@ -521,11 +548,12 @@ ENDPROC(__und_usr)
+  * NEON handler code.
+  *
+  * Emulators may wish to make use of the following registers:
+- *  r0  = instruction opcode.
+- *  r2  = PC+4
++ *  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
++ *  r2  = PC value to resume execution after successful emulation
+  *  r9  = normal "successful" return address
+- *  r10 = this threads thread_info structure.
++ *  r10 = this threads thread_info structure
+  *  lr  = unrecognised instruction return address
++ * IRQs disabled, FIQs enabled.
+  */
+ 	@
+ 	@ Fall-through from Thumb-2 __und_usr
+@@ -660,12 +688,17 @@ ENTRY(no_fp)
+ 	mov	pc, lr
+ ENDPROC(no_fp)
+ 
+-__und_usr_unknown:
+-	enable_irq
++__und_usr_fault_32:
++	mov	r1, #4
++	b	1f
++__und_usr_fault_16:
++	mov	r1, #2
++1:	enable_irq
+ 	mov	r0, sp
+ 	adr	lr, BSYM(ret_from_exception)
+-	b	do_undefinstr
+-ENDPROC(__und_usr_unknown)
++	b	__und_fault
++ENDPROC(__und_usr_fault_32)
++ENDPROC(__und_usr_fault_16)
+ 
+ 	.align	5
+ __pabt_usr:
+diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
+index 3d0c6fb..e68d251 100644
+--- a/arch/arm/kernel/process.c
++++ b/arch/arm/kernel/process.c
+@@ -125,6 +125,7 @@ void arm_machine_restart(char mode, const char *cmd)
+ 	 */
+ 	mdelay(1000);
+ 	printk("Reboot failed -- System halted\n");
++	local_irq_disable();
+ 	while (1);
+ }
+ 
+@@ -240,6 +241,7 @@ void machine_shutdown(void)
+ void machine_halt(void)
+ {
+ 	machine_shutdown();
++	local_irq_disable();
+ 	while (1);
+ }
+ 
+diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
+index 160cb16..8380bd1 100644
+--- a/arch/arm/kernel/traps.c
++++ b/arch/arm/kernel/traps.c
+@@ -362,18 +362,10 @@ static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
+ 
+ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+ {
+-	unsigned int correction = thumb_mode(regs) ? 2 : 4;
+ 	unsigned int instr;
+ 	siginfo_t info;
+ 	void __user *pc;
+ 
+-	/*
+-	 * According to the ARM ARM, PC is 2 or 4 bytes ahead,
+-	 * depending whether we're in Thumb mode or not.
+-	 * Correct this offset.
+-	 */
+-	regs->ARM_pc -= correction;
+-
+ 	pc = (void __user *)instruction_pointer(regs);
+ 
+ 	if (processor_mode(regs) == SVC_MODE) {
+diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
+index 845f461..c202113 100644
+--- a/arch/arm/mm/tlb-v7.S
++++ b/arch/arm/mm/tlb-v7.S
+@@ -38,11 +38,19 @@ ENTRY(v7wbi_flush_user_tlb_range)
+ 	dsb
+ 	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
+ 	mov	r1, r1, lsr #PAGE_SHIFT
++#ifdef CONFIG_ARM_ERRATA_720789
++	mov	r3, #0
++#else
+ 	asid	r3, r3				@ mask ASID
++#endif
+ 	orr	r0, r3, r0, lsl #PAGE_SHIFT	@ Create initial MVA
+ 	mov	r1, r1, lsl #PAGE_SHIFT
+ 1:
++#ifdef CONFIG_ARM_ERRATA_720789
++	ALT_SMP(mcr	p15, 0, r0, c8, c3, 3)	@ TLB invalidate U MVA all ASID (shareable)
++#else
+ 	ALT_SMP(mcr	p15, 0, r0, c8, c3, 1)	@ TLB invalidate U MVA (shareable)
++#endif
+ 	ALT_UP(mcr	p15, 0, r0, c8, c7, 1)	@ TLB invalidate U MVA
+ 
+ 	add	r0, r0, #PAGE_SZ
+@@ -67,7 +75,11 @@ ENTRY(v7wbi_flush_kern_tlb_range)
+ 	mov	r0, r0, lsl #PAGE_SHIFT
+ 	mov	r1, r1, lsl #PAGE_SHIFT
+ 1:
++#ifdef CONFIG_ARM_ERRATA_720789
++	ALT_SMP(mcr	p15, 0, r0, c8, c3, 3)	@ TLB invalidate U MVA all ASID (shareable)
++#else
+ 	ALT_SMP(mcr	p15, 0, r0, c8, c3, 1)	@ TLB invalidate U MVA (shareable)
++#endif
+ 	ALT_UP(mcr	p15, 0, r0, c8, c7, 1)	@ TLB invalidate U MVA
+ 	add	r0, r0, #PAGE_SZ
+ 	cmp	r0, r1
+diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
+index 4fa9903..cc926c9 100644
+--- a/arch/arm/vfp/entry.S
++++ b/arch/arm/vfp/entry.S
+@@ -7,18 +7,20 @@
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License version 2 as
+  * published by the Free Software Foundation.
+- *
+- * Basic entry code, called from the kernel's undefined instruction trap.
+- *  r0  = faulted instruction
+- *  r5  = faulted PC+4
+- *  r9  = successful return
+- *  r10 = thread_info structure
+- *  lr  = failure return
+  */
+ #include <asm/thread_info.h>
+ #include <asm/vfpmacros.h>
+ #include "../kernel/entry-header.S"
+ 
++@ VFP entry point.
++@
++@  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
++@  r2  = PC value to resume execution after successful emulation
++@  r9  = normal "successful" return address
++@  r10 = this threads thread_info structure
++@  lr  = unrecognised instruction return address
++@  IRQs disabled.
++@
+ ENTRY(do_vfp)
+ #ifdef CONFIG_PREEMPT
+ 	ldr	r4, [r10, #TI_PREEMPT]	@ get preempt count
+diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
+index 2d30c7f..3a0efaa 100644
+--- a/arch/arm/vfp/vfphw.S
++++ b/arch/arm/vfp/vfphw.S
+@@ -61,13 +61,13 @@
+ 
+ @ VFP hardware support entry point.
+ @
+-@  r0  = faulted instruction
+-@  r2  = faulted PC+4
+-@  r9  = successful return
++@  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
++@  r2  = PC value to resume execution after successful emulation
++@  r9  = normal "successful" return address
+ @  r10 = vfp_state union
+ @  r11 = CPU number
+-@  lr  = failure return
+-
++@  lr  = unrecognised instruction return address
++@  IRQs enabled.
+ ENTRY(vfp_support_entry)
+ 	DBGSTR3	"instr %08x pc %08x state %p", r0, r2, r10
+ 
+@@ -161,9 +161,12 @@ vfp_hw_state_valid:
+ 					@ exception before retrying branch
+ 					@ out before setting an FPEXC that
+ 					@ stops us reading stuff
+-	VFPFMXR	FPEXC, r1		@ restore FPEXC last
+-	sub	r2, r2, #4
+-	str	r2, [sp, #S_PC]		@ retry the instruction
++	VFPFMXR	FPEXC, r1		@ Restore FPEXC last
++	sub	r2, r2, #4		@ Retry current instruction - if Thumb
++	str	r2, [sp, #S_PC]		@ mode it's two 16-bit instructions,
++					@ else it's one 32-bit instruction, so
++					@ always subtract 4 from the following
++					@ instruction address.
+ #ifdef CONFIG_PREEMPT
+ 	get_thread_info	r10
+ 	ldr	r4, [r10, #TI_PREEMPT]	@ get preempt count
+diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
+index 8ea07e4..ad83dad 100644
+--- a/arch/arm/vfp/vfpmodule.c
++++ b/arch/arm/vfp/vfpmodule.c
+@@ -453,10 +453,16 @@ static int vfp_pm_suspend(void)
+ 
+ 		/* disable, just in case */
+ 		fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
++	} else if (vfp_current_hw_state[ti->cpu]) {
++#ifndef CONFIG_SMP
++		fmxr(FPEXC, fpexc | FPEXC_EN);
++		vfp_save_state(vfp_current_hw_state[ti->cpu], fpexc);
++		fmxr(FPEXC, fpexc);
++#endif
+ 	}
+ 
+ 	/* clear any information we had about last context state */
+-	memset(vfp_current_hw_state, 0, sizeof(vfp_current_hw_state));
++	vfp_current_hw_state[ti->cpu] = NULL;
+ 
+ 	return 0;
+ }
+diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
+index 3fad89e..2fc214b 100644
+--- a/arch/ia64/include/asm/atomic.h
++++ b/arch/ia64/include/asm/atomic.h
+@@ -18,8 +18,8 @@
+ #include <asm/system.h>
+ 
+ 
+-#define ATOMIC_INIT(i)		((atomic_t) { (i) })
+-#define ATOMIC64_INIT(i)	((atomic64_t) { (i) })
++#define ATOMIC_INIT(i)		{ (i) }
++#define ATOMIC64_INIT(i)	{ (i) }
+ 
+ #define atomic_read(v)		(*(volatile int *)&(v)->counter)
+ #define atomic64_read(v)	(*(volatile long *)&(v)->counter)
+diff --git a/arch/m68k/include/asm/entry.h b/arch/m68k/include/asm/entry.h
+index c3c5a86..8798ebc 100644
+--- a/arch/m68k/include/asm/entry.h
++++ b/arch/m68k/include/asm/entry.h
+@@ -33,8 +33,8 @@
+ 
+ /* the following macro is used when enabling interrupts */
+ #if defined(MACH_ATARI_ONLY)
+-	/* block out HSYNC on the atari */
+-#define ALLOWINT	(~0x400)
++	/* block out HSYNC = ipl 2 on the atari */
++#define ALLOWINT	(~0x500)
+ #define	MAX_NOINT_IPL	3
+ #else
+ 	/* portable version */
+diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
+index 8623f8d..9a5932e 100644
+--- a/arch/m68k/kernel/sys_m68k.c
++++ b/arch/m68k/kernel/sys_m68k.c
+@@ -479,9 +479,13 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5,
+ 			goto bad_access;
+ 		}
+ 
+-		mem_value = *mem;
++		/*
++		 * No need to check for EFAULT; we know that the page is
++		 * present and writable.
++		 */
++		__get_user(mem_value, mem);
+ 		if (mem_value == oldval)
+-			*mem = newval;
++			__put_user(newval, mem);
+ 
+ 		pte_unmap_unlock(pte, ptl);
+ 		up_read(&mm->mmap_sem);
+diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
+index 5682f16..20f0e01 100644
+--- a/arch/s390/include/asm/mmu_context.h
++++ b/arch/s390/include/asm/mmu_context.h
+@@ -12,7 +12,6 @@
+ #include <asm/pgalloc.h>
+ #include <asm/uaccess.h>
+ #include <asm/tlbflush.h>
+-#include <asm-generic/mm_hooks.h>
+ 
+ static inline int init_new_context(struct task_struct *tsk,
+ 				   struct mm_struct *mm)
+@@ -92,4 +91,17 @@ static inline void activate_mm(struct mm_struct *prev,
+         switch_mm(prev, next, current);
+ }
+ 
++static inline void arch_dup_mmap(struct mm_struct *oldmm,
++				 struct mm_struct *mm)
++{
++#ifdef CONFIG_64BIT
++	if (oldmm->context.asce_limit < mm->context.asce_limit)
++		crst_table_downgrade(mm, oldmm->context.asce_limit);
++#endif
++}
++
++static inline void arch_exit_mmap(struct mm_struct *mm)
++{
++}
++
+ #endif /* __S390_MMU_CONTEXT_H */
+diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
+index 5f33d37..172550d 100644
+--- a/arch/s390/include/asm/processor.h
++++ b/arch/s390/include/asm/processor.h
+@@ -130,7 +130,9 @@ struct stack_frame {
+ 	regs->psw.mask	= psw_user_bits | PSW_MASK_BA;			\
+ 	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;			\
+ 	regs->gprs[15]	= new_stackp;					\
++	__tlb_flush_mm(current->mm);					\
+ 	crst_table_downgrade(current->mm, 1UL << 31);			\
++	update_mm(current->mm, current);				\
+ } while (0)
+ 
+ /* Forward declaration, a strange C thing */
+diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
+index b28aaa4..0fc0a7e 100644
+--- a/arch/s390/mm/fault.c
++++ b/arch/s390/mm/fault.c
+@@ -453,6 +453,7 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write)
+ 	struct pt_regs regs;
+ 	int access, fault;
+ 
++	/* Emulate a uaccess fault from kernel mode. */
+ 	regs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK;
+ 	if (!irqs_disabled())
+ 		regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT;
+@@ -461,12 +462,12 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write)
+ 	uaddr &= PAGE_MASK;
+ 	access = write ? VM_WRITE : VM_READ;
+ 	fault = do_exception(&regs, access, uaddr | 2);
+-	if (unlikely(fault)) {
+-		if (fault & VM_FAULT_OOM)
+-			return -EFAULT;
+-		else if (fault & VM_FAULT_SIGBUS)
+-			do_sigbus(&regs, pgm_int_code, uaddr);
+-	}
++	/*
++	 * Since the fault happened in kernel mode while performing a uaccess
++	 * all we need to do now is emulating a fixup in case "fault" is not
++	 * zero.
++	 * For the calling uaccess functions this results always in -EFAULT.
++	 */
+ 	return fault ? -EFAULT : 0;
+ }
+ 
+diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
+index a0155c0..c70b3d8 100644
+--- a/arch/s390/mm/mmap.c
++++ b/arch/s390/mm/mmap.c
+@@ -106,9 +106,15 @@ EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
+ 
+ int s390_mmap_check(unsigned long addr, unsigned long len)
+ {
++	int rc;
++
+ 	if (!is_compat_task() &&
+-	    len >= TASK_SIZE && TASK_SIZE < (1UL << 53))
+-		return crst_table_upgrade(current->mm, 1UL << 53);
++	    len >= TASK_SIZE && TASK_SIZE < (1UL << 53)) {
++		rc = crst_table_upgrade(current->mm, 1UL << 53);
++		if (rc)
++			return rc;
++		update_mm(current->mm, current);
++	}
+ 	return 0;
+ }
+ 
+@@ -128,6 +134,7 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
+ 		rc = crst_table_upgrade(mm, 1UL << 53);
+ 		if (rc)
+ 			return (unsigned long) rc;
++		update_mm(mm, current);
+ 		area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
+ 	}
+ 	return area;
+@@ -150,6 +157,7 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
+ 		rc = crst_table_upgrade(mm, 1UL << 53);
+ 		if (rc)
+ 			return (unsigned long) rc;
++		update_mm(mm, current);
+ 		area = arch_get_unmapped_area_topdown(filp, addr, len,
+ 						      pgoff, flags);
+ 	}
+diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
+index f8ceac4..f8e92f8 100644
+--- a/arch/s390/mm/pgtable.c
++++ b/arch/s390/mm/pgtable.c
+@@ -97,7 +97,6 @@ repeat:
+ 		crst_table_free(mm, table);
+ 	if (mm->context.asce_limit < limit)
+ 		goto repeat;
+-	update_mm(mm, current);
+ 	return 0;
+ }
+ 
+@@ -105,9 +104,6 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
+ {
+ 	pgd_t *pgd;
+ 
+-	if (mm->context.asce_limit <= limit)
+-		return;
+-	__tlb_flush_mm(mm);
+ 	while (mm->context.asce_limit > limit) {
+ 		pgd = mm->pgd;
+ 		switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
+@@ -130,7 +126,6 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
+ 		mm->task_size = mm->context.asce_limit;
+ 		crst_table_free(mm, (unsigned long *) pgd);
+ 	}
+-	update_mm(mm, current);
+ }
+ #endif
+ 
+diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
+index 1f84794..73ef56c 100644
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -219,7 +219,7 @@ void __init arch_init_ideal_nops(void)
+ 			ideal_nops = intel_nops;
+ #endif
+ 		}
+-
++		break;
+ 	default:
+ #ifdef CONFIG_X86_64
+ 		ideal_nops = k8_nops;
+diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
+index 1b267e7..00a0385 100644
+--- a/arch/x86/xen/p2m.c
++++ b/arch/x86/xen/p2m.c
+@@ -686,6 +686,7 @@ int m2p_add_override(unsigned long mfn, struct page *page,
+ 	unsigned long uninitialized_var(address);
+ 	unsigned level;
+ 	pte_t *ptep = NULL;
++	int ret = 0;
+ 
+ 	pfn = page_to_pfn(page);
+ 	if (!PageHighMem(page)) {
+@@ -721,6 +722,24 @@ int m2p_add_override(unsigned long mfn, struct page *page,
+ 	list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
+ 	spin_unlock_irqrestore(&m2p_override_lock, flags);
+ 
++	/* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
++	 * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
++	 * pfn so that the following mfn_to_pfn(mfn) calls will return the
++	 * pfn from the m2p_override (the backend pfn) instead.
++	 * We need to do this because the pages shared by the frontend
++	 * (xen-blkfront) can be already locked (lock_page, called by
++	 * do_read_cache_page); when the userspace backend tries to use them
++	 * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
++	 * do_blockdev_direct_IO is going to try to lock the same pages
++	 * again resulting in a deadlock.
++	 * As a side effect get_user_pages_fast might not be safe on the
++	 * frontend pages while they are being shared with the backend,
++	 * because mfn_to_pfn (that ends up being called by GUPF) will
++	 * return the backend pfn rather than the frontend pfn. */
++	ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
++	if (ret == 0 && get_phys_to_machine(pfn) == mfn)
++		set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
++
+ 	return 0;
+ }
+ EXPORT_SYMBOL_GPL(m2p_add_override);
+@@ -732,6 +751,7 @@ int m2p_remove_override(struct page *page, bool clear_pte)
+ 	unsigned long uninitialized_var(address);
+ 	unsigned level;
+ 	pte_t *ptep = NULL;
++	int ret = 0;
+ 
+ 	pfn = page_to_pfn(page);
+ 	mfn = get_phys_to_machine(pfn);
+@@ -801,6 +821,22 @@ int m2p_remove_override(struct page *page, bool clear_pte)
+ 	} else
+ 		set_phys_to_machine(pfn, page->index);
+ 
++	/* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
++	 * somewhere in this domain, even before being added to the
++	 * m2p_override (see comment above in m2p_add_override).
++	 * If there are no other entries in the m2p_override corresponding
++	 * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for
++	 * the original pfn (the one shared by the frontend): the backend
++	 * cannot do any IO on this page anymore because it has been
++	 * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of
++	 * the original pfn causes mfn_to_pfn(mfn) to return the frontend
++	 * pfn again. */
++	mfn &= ~FOREIGN_FRAME_BIT;
++	ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
++	if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
++			m2p_find_override(mfn) == NULL)
++		set_phys_to_machine(pfn, mfn);
++
+ 	return 0;
+ }
+ EXPORT_SYMBOL_GPL(m2p_remove_override);
+diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
+index 9955a53..c864add 100644
+--- a/drivers/block/floppy.c
++++ b/drivers/block/floppy.c
+@@ -4369,8 +4369,14 @@ out_unreg_blkdev:
+ out_put_disk:
+ 	while (dr--) {
+ 		del_timer_sync(&motor_off_timer[dr]);
+-		if (disks[dr]->queue)
++		if (disks[dr]->queue) {
+ 			blk_cleanup_queue(disks[dr]->queue);
++			/*
++			 * put_disk() is not paired with add_disk() and
++			 * will put queue reference one extra time. fix it.
++			 */
++			disks[dr]->queue = NULL;
++		}
+ 		put_disk(disks[dr]);
+ 	}
+ 	return err;
+diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
+index e46f2f7..650a308 100644
+--- a/drivers/block/virtio_blk.c
++++ b/drivers/block/virtio_blk.c
+@@ -20,8 +20,6 @@ struct workqueue_struct *virtblk_wq;
+ 
+ struct virtio_blk
+ {
+-	spinlock_t lock;
+-
+ 	struct virtio_device *vdev;
+ 	struct virtqueue *vq;
+ 
+@@ -62,7 +60,7 @@ static void blk_done(struct virtqueue *vq)
+ 	unsigned int len;
+ 	unsigned long flags;
+ 
+-	spin_lock_irqsave(&vblk->lock, flags);
++	spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
+ 	while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
+ 		int error;
+ 
+@@ -97,7 +95,7 @@ static void blk_done(struct virtqueue *vq)
+ 	}
+ 	/* In case queue is stopped waiting for more buffers. */
+ 	blk_start_queue(vblk->disk->queue);
+-	spin_unlock_irqrestore(&vblk->lock, flags);
++	spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
+ }
+ 
+ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
+@@ -384,7 +382,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
+ 	}
+ 
+ 	INIT_LIST_HEAD(&vblk->reqs);
+-	spin_lock_init(&vblk->lock);
+ 	vblk->vdev = vdev;
+ 	vblk->sg_elems = sg_elems;
+ 	sg_init_table(vblk->sg, vblk->sg_elems);
+@@ -410,7 +407,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
+ 		goto out_mempool;
+ 	}
+ 
+-	q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
++	q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL);
+ 	if (!q) {
+ 		err = -ENOMEM;
+ 		goto out_put_disk;
+diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
+index 5c0d96a..b12ffea 100644
+--- a/drivers/char/mspec.c
++++ b/drivers/char/mspec.c
+@@ -284,7 +284,7 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
+ 	vdata->flags = flags;
+ 	vdata->type = type;
+ 	spin_lock_init(&vdata->lock);
+-	vdata->refcnt = ATOMIC_INIT(1);
++	atomic_set(&vdata->refcnt, 1);
+ 	vma->vm_private_data = vdata;
+ 
+ 	vma->vm_flags |= (VM_IO | VM_RESERVED | VM_PFNMAP | VM_DONTEXPAND);
+diff --git a/drivers/char/random.c b/drivers/char/random.c
+index 6035ab8..631d4f6 100644
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -125,21 +125,26 @@
+  * The current exported interfaces for gathering environmental noise
+  * from the devices are:
+  *
++ *	void add_device_randomness(const void *buf, unsigned int size);
+  * 	void add_input_randomness(unsigned int type, unsigned int code,
+  *                                unsigned int value);
+- * 	void add_interrupt_randomness(int irq);
++ *	void add_interrupt_randomness(int irq, int irq_flags);
+  * 	void add_disk_randomness(struct gendisk *disk);
+  *
++ * add_device_randomness() is for adding data to the random pool that
++ * is likely to differ between two devices (or possibly even per boot).
++ * This would be things like MAC addresses or serial numbers, or the
++ * read-out of the RTC. This does *not* add any actual entropy to the
++ * pool, but it initializes the pool to different values for devices
++ * that might otherwise be identical and have very little entropy
++ * available to them (particularly common in the embedded world).
++ *
+  * add_input_randomness() uses the input layer interrupt timing, as well as
+  * the event type information from the hardware.
+  *
+- * add_interrupt_randomness() uses the inter-interrupt timing as random
+- * inputs to the entropy pool.  Note that not all interrupts are good
+- * sources of randomness!  For example, the timer interrupts is not a
+- * good choice, because the periodicity of the interrupts is too
+- * regular, and hence predictable to an attacker.  Network Interface
+- * Controller interrupts are a better measure, since the timing of the
+- * NIC interrupts are more unpredictable.
++ * add_interrupt_randomness() uses the interrupt timing as random
++ * inputs to the entropy pool. Using the cycle counters and the irq source
++ * as inputs, it feeds the randomness roughly once a second.
+  *
+  * add_disk_randomness() uses what amounts to the seek time of block
+  * layer request events, on a per-disk_devt basis, as input to the
+@@ -248,6 +253,8 @@
+ #include <linux/percpu.h>
+ #include <linux/cryptohash.h>
+ #include <linux/fips.h>
++#include <linux/ptrace.h>
++#include <linux/kmemcheck.h>
+ 
+ #ifdef CONFIG_GENERIC_HARDIRQS
+ # include <linux/irq.h>
+@@ -256,6 +263,7 @@
+ #include <asm/processor.h>
+ #include <asm/uaccess.h>
+ #include <asm/irq.h>
++#include <asm/irq_regs.h>
+ #include <asm/io.h>
+ 
+ /*
+@@ -266,6 +274,8 @@
+ #define SEC_XFER_SIZE 512
+ #define EXTRACT_SIZE 10
+ 
++#define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long))
++
+ /*
+  * The minimum number of bits of entropy before we wake up a read on
+  * /dev/random.  Should be enough to do a significant reseed.
+@@ -420,8 +430,10 @@ struct entropy_store {
+ 	/* read-write data: */
+ 	spinlock_t lock;
+ 	unsigned add_ptr;
++	unsigned input_rotate;
+ 	int entropy_count;
+-	int input_rotate;
++	int entropy_total;
++	unsigned int initialized:1;
+ 	__u8 last_data[EXTRACT_SIZE];
+ };
+ 
+@@ -454,6 +466,10 @@ static struct entropy_store nonblocking_pool = {
+ 	.pool = nonblocking_pool_data
+ };
+ 
++static __u32 const twist_table[8] = {
++	0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
++	0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
++
+ /*
+  * This function adds bytes into the entropy "pool".  It does not
+  * update the entropy estimate.  The caller should call
+@@ -464,29 +480,24 @@ static struct entropy_store nonblocking_pool = {
+  * it's cheap to do so and helps slightly in the expected case where
+  * the entropy is concentrated in the low-order bits.
+  */
+-static void mix_pool_bytes_extract(struct entropy_store *r, const void *in,
+-				   int nbytes, __u8 out[64])
++static void __mix_pool_bytes(struct entropy_store *r, const void *in,
++			     int nbytes, __u8 out[64])
+ {
+-	static __u32 const twist_table[8] = {
+-		0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
+-		0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
+ 	unsigned long i, j, tap1, tap2, tap3, tap4, tap5;
+ 	int input_rotate;
+ 	int wordmask = r->poolinfo->poolwords - 1;
+ 	const char *bytes = in;
+ 	__u32 w;
+-	unsigned long flags;
+ 
+-	/* Taps are constant, so we can load them without holding r->lock.  */
+ 	tap1 = r->poolinfo->tap1;
+ 	tap2 = r->poolinfo->tap2;
+ 	tap3 = r->poolinfo->tap3;
+ 	tap4 = r->poolinfo->tap4;
+ 	tap5 = r->poolinfo->tap5;
+ 
+-	spin_lock_irqsave(&r->lock, flags);
+-	input_rotate = r->input_rotate;
+-	i = r->add_ptr;
++	smp_rmb();
++	input_rotate = ACCESS_ONCE(r->input_rotate);
++	i = ACCESS_ONCE(r->add_ptr);
+ 
+ 	/* mix one byte at a time to simplify size handling and churn faster */
+ 	while (nbytes--) {
+@@ -513,19 +524,53 @@ static void mix_pool_bytes_extract(struct entropy_store *r, const void *in,
+ 		input_rotate += i ? 7 : 14;
+ 	}
+ 
+-	r->input_rotate = input_rotate;
+-	r->add_ptr = i;
++	ACCESS_ONCE(r->input_rotate) = input_rotate;
++	ACCESS_ONCE(r->add_ptr) = i;
++	smp_wmb();
+ 
+ 	if (out)
+ 		for (j = 0; j < 16; j++)
+ 			((__u32 *)out)[j] = r->pool[(i - j) & wordmask];
++}
++
++static void mix_pool_bytes(struct entropy_store *r, const void *in,
++			     int nbytes, __u8 out[64])
++{
++	unsigned long flags;
+ 
++	spin_lock_irqsave(&r->lock, flags);
++	__mix_pool_bytes(r, in, nbytes, out);
+ 	spin_unlock_irqrestore(&r->lock, flags);
+ }
+ 
+-static void mix_pool_bytes(struct entropy_store *r, const void *in, int bytes)
++struct fast_pool {
++	__u32		pool[4];
++	unsigned long	last;
++	unsigned short	count;
++	unsigned char	rotate;
++	unsigned char	last_timer_intr;
++};
++
++/*
++ * This is a fast mixing routine used by the interrupt randomness
++ * collector.  It's hardcoded for an 128 bit pool and assumes that any
++ * locks that might be needed are taken by the caller.
++ */
++static void fast_mix(struct fast_pool *f, const void *in, int nbytes)
+ {
+-       mix_pool_bytes_extract(r, in, bytes, NULL);
++	const char	*bytes = in;
++	__u32		w;
++	unsigned	i = f->count;
++	unsigned	input_rotate = f->rotate;
++
++	while (nbytes--) {
++		w = rol32(*bytes++, input_rotate & 31) ^ f->pool[i & 3] ^
++			f->pool[(i + 1) & 3];
++		f->pool[i & 3] = (w >> 3) ^ twist_table[w & 7];
++		input_rotate += (i++ & 3) ? 7 : 14;
++	}
++	f->count = i;
++	f->rotate = input_rotate;
+ }
+ 
+ /*
+@@ -533,30 +578,34 @@ static void mix_pool_bytes(struct entropy_store *r, const void *in, int bytes)
+  */
+ static void credit_entropy_bits(struct entropy_store *r, int nbits)
+ {
+-	unsigned long flags;
+-	int entropy_count;
++	int entropy_count, orig;
+ 
+ 	if (!nbits)
+ 		return;
+ 
+-	spin_lock_irqsave(&r->lock, flags);
+-
+ 	DEBUG_ENT("added %d entropy credits to %s\n", nbits, r->name);
+-	entropy_count = r->entropy_count;
++retry:
++	entropy_count = orig = ACCESS_ONCE(r->entropy_count);
+ 	entropy_count += nbits;
+ 	if (entropy_count < 0) {
+ 		DEBUG_ENT("negative entropy/overflow\n");
+ 		entropy_count = 0;
+ 	} else if (entropy_count > r->poolinfo->POOLBITS)
+ 		entropy_count = r->poolinfo->POOLBITS;
+-	r->entropy_count = entropy_count;
++	if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
++		goto retry;
++
++	if (!r->initialized && nbits > 0) {
++		r->entropy_total += nbits;
++		if (r->entropy_total > 128)
++			r->initialized = 1;
++	}
+ 
+ 	/* should we wake readers? */
+ 	if (r == &input_pool && entropy_count >= random_read_wakeup_thresh) {
+ 		wake_up_interruptible(&random_read_wait);
+ 		kill_fasync(&fasync, SIGIO, POLL_IN);
+ 	}
+-	spin_unlock_irqrestore(&r->lock, flags);
+ }
+ 
+ /*********************************************************************
+@@ -609,6 +658,25 @@ static void set_timer_rand_state(unsigned int irq,
+ }
+ #endif
+ 
++/*
++ * Add device- or boot-specific data to the input and nonblocking
++ * pools to help initialize them to unique values.
++ *
++ * None of this adds any entropy, it is meant to avoid the
++ * problem of the nonblocking pool having similar initial state
++ * across largely identical devices.
++ */
++void add_device_randomness(const void *buf, unsigned int size)
++{
++	unsigned long time = get_cycles() ^ jiffies;
++
++	mix_pool_bytes(&input_pool, buf, size, NULL);
++	mix_pool_bytes(&input_pool, &time, sizeof(time), NULL);
++	mix_pool_bytes(&nonblocking_pool, buf, size, NULL);
++	mix_pool_bytes(&nonblocking_pool, &time, sizeof(time), NULL);
++}
++EXPORT_SYMBOL(add_device_randomness);
++
+ static struct timer_rand_state input_timer_state;
+ 
+ /*
+@@ -624,8 +692,8 @@ static struct timer_rand_state input_timer_state;
+ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
+ {
+ 	struct {
+-		cycles_t cycles;
+ 		long jiffies;
++		unsigned cycles;
+ 		unsigned num;
+ 	} sample;
+ 	long delta, delta2, delta3;
+@@ -639,7 +707,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
+ 	sample.jiffies = jiffies;
+ 	sample.cycles = get_cycles();
+ 	sample.num = num;
+-	mix_pool_bytes(&input_pool, &sample, sizeof(sample));
++	mix_pool_bytes(&input_pool, &sample, sizeof(sample), NULL);
+ 
+ 	/*
+ 	 * Calculate number of bits of randomness we probably added.
+@@ -696,17 +764,48 @@ void add_input_randomness(unsigned int type, unsigned int code,
+ }
+ EXPORT_SYMBOL_GPL(add_input_randomness);
+ 
+-void add_interrupt_randomness(int irq)
++static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
++
++void add_interrupt_randomness(int irq, int irq_flags)
+ {
+-	struct timer_rand_state *state;
++	struct entropy_store	*r;
++	struct fast_pool	*fast_pool = &__get_cpu_var(irq_randomness);
++	struct pt_regs		*regs = get_irq_regs();
++	unsigned long		now = jiffies;
++	__u32			input[4], cycles = get_cycles();
++
++	input[0] = cycles ^ jiffies;
++	input[1] = irq;
++	if (regs) {
++		__u64 ip = instruction_pointer(regs);
++		input[2] = ip;
++		input[3] = ip >> 32;
++	}
+ 
+-	state = get_timer_rand_state(irq);
++	fast_mix(fast_pool, input, sizeof(input));
+ 
+-	if (state == NULL)
++	if ((fast_pool->count & 1023) &&
++	    !time_after(now, fast_pool->last + HZ))
+ 		return;
+ 
+-	DEBUG_ENT("irq event %d\n", irq);
+-	add_timer_randomness(state, 0x100 + irq);
++	fast_pool->last = now;
++
++	r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool;
++	__mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool), NULL);
++	/*
++	 * If we don't have a valid cycle counter, and we see
++	 * back-to-back timer interrupts, then skip giving credit for
++	 * any entropy.
++	 */
++	if (cycles == 0) {
++		if (irq_flags & __IRQF_TIMER) {
++			if (fast_pool->last_timer_intr)
++				return;
++			fast_pool->last_timer_intr = 1;
++		} else
++			fast_pool->last_timer_intr = 0;
++	}
++	credit_entropy_bits(r, 1);
+ }
+ 
+ #ifdef CONFIG_BLOCK
+@@ -738,7 +837,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf,
+  */
+ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
+ {
+-	__u32 tmp[OUTPUT_POOL_WORDS];
++	__u32	tmp[OUTPUT_POOL_WORDS];
+ 
+ 	if (r->pull && r->entropy_count < nbytes * 8 &&
+ 	    r->entropy_count < r->poolinfo->POOLBITS) {
+@@ -757,7 +856,7 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
+ 
+ 		bytes = extract_entropy(r->pull, tmp, bytes,
+ 					random_read_wakeup_thresh / 8, rsvd);
+-		mix_pool_bytes(r, tmp, bytes);
++		mix_pool_bytes(r, tmp, bytes, NULL);
+ 		credit_entropy_bits(r, bytes*8);
+ 	}
+ }
+@@ -816,13 +915,19 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min,
+ static void extract_buf(struct entropy_store *r, __u8 *out)
+ {
+ 	int i;
+-	__u32 hash[5], workspace[SHA_WORKSPACE_WORDS];
++	union {
++		__u32 w[5];
++		unsigned long l[LONGS(EXTRACT_SIZE)];
++	} hash;
++	__u32 workspace[SHA_WORKSPACE_WORDS];
+ 	__u8 extract[64];
++	unsigned long flags;
+ 
+ 	/* Generate a hash across the pool, 16 words (512 bits) at a time */
+-	sha_init(hash);
++	sha_init(hash.w);
++	spin_lock_irqsave(&r->lock, flags);
+ 	for (i = 0; i < r->poolinfo->poolwords; i += 16)
+-		sha_transform(hash, (__u8 *)(r->pool + i), workspace);
++		sha_transform(hash.w, (__u8 *)(r->pool + i), workspace);
+ 
+ 	/*
+ 	 * We mix the hash back into the pool to prevent backtracking
+@@ -833,13 +938,14 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
+ 	 * brute-forcing the feedback as hard as brute-forcing the
+ 	 * hash.
+ 	 */
+-	mix_pool_bytes_extract(r, hash, sizeof(hash), extract);
++	__mix_pool_bytes(r, hash.w, sizeof(hash.w), extract);
++	spin_unlock_irqrestore(&r->lock, flags);
+ 
+ 	/*
+ 	 * To avoid duplicates, we atomically extract a portion of the
+ 	 * pool while mixing, and hash one final time.
+ 	 */
+-	sha_transform(hash, extract, workspace);
++	sha_transform(hash.w, extract, workspace);
+ 	memset(extract, 0, sizeof(extract));
+ 	memset(workspace, 0, sizeof(workspace));
+ 
+@@ -848,19 +954,30 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
+ 	 * pattern, we fold it in half. Thus, we always feed back
+ 	 * twice as much data as we output.
+ 	 */
+-	hash[0] ^= hash[3];
+-	hash[1] ^= hash[4];
+-	hash[2] ^= rol32(hash[2], 16);
+-	memcpy(out, hash, EXTRACT_SIZE);
+-	memset(hash, 0, sizeof(hash));
++	hash.w[0] ^= hash.w[3];
++	hash.w[1] ^= hash.w[4];
++	hash.w[2] ^= rol32(hash.w[2], 16);
++
++	/*
++	 * If we have a architectural hardware random number
++	 * generator, mix that in, too.
++	 */
++	for (i = 0; i < LONGS(EXTRACT_SIZE); i++) {
++		unsigned long v;
++		if (!arch_get_random_long(&v))
++			break;
++		hash.l[i] ^= v;
++	}
++
++	memcpy(out, &hash, EXTRACT_SIZE);
++	memset(&hash, 0, sizeof(hash));
+ }
+ 
+ static ssize_t extract_entropy(struct entropy_store *r, void *buf,
+-			       size_t nbytes, int min, int reserved)
++				 size_t nbytes, int min, int reserved)
+ {
+ 	ssize_t ret = 0, i;
+ 	__u8 tmp[EXTRACT_SIZE];
+-	unsigned long flags;
+ 
+ 	xfer_secondary_pool(r, nbytes);
+ 	nbytes = account(r, nbytes, min, reserved);
+@@ -869,6 +986,8 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf,
+ 		extract_buf(r, tmp);
+ 
+ 		if (fips_enabled) {
++			unsigned long flags;
++
+ 			spin_lock_irqsave(&r->lock, flags);
+ 			if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
+ 				panic("Hardware RNG duplicated output!\n");
+@@ -927,17 +1046,34 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
+ 
+ /*
+  * This function is the exported kernel interface.  It returns some
+- * number of good random numbers, suitable for seeding TCP sequence
+- * numbers, etc.
++ * number of good random numbers, suitable for key generation, seeding
++ * TCP sequence numbers, etc.  It does not use the hw random number
++ * generator, if available; use get_random_bytes_arch() for that.
+  */
+ void get_random_bytes(void *buf, int nbytes)
+ {
++	extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
++}
++EXPORT_SYMBOL(get_random_bytes);
++
++/*
++ * This function will use the architecture-specific hardware random
++ * number generator if it is available.  The arch-specific hw RNG will
++ * almost certainly be faster than what we can do in software, but it
++ * is impossible to verify that it is implemented securely (as
++ * opposed, to, say, the AES encryption of a sequence number using a
++ * key known by the NSA).  So it's useful if we need the speed, but
++ * only if we're willing to trust the hardware manufacturer not to
++ * have put in a back door.
++ */
++void get_random_bytes_arch(void *buf, int nbytes)
++{
+ 	char *p = buf;
+ 
+ 	while (nbytes) {
+ 		unsigned long v;
+ 		int chunk = min(nbytes, (int)sizeof(unsigned long));
+-		
++
+ 		if (!arch_get_random_long(&v))
+ 			break;
+ 		
+@@ -946,9 +1082,11 @@ void get_random_bytes(void *buf, int nbytes)
+ 		nbytes -= chunk;
+ 	}
+ 
+-	extract_entropy(&nonblocking_pool, p, nbytes, 0, 0);
++	if (nbytes)
++		extract_entropy(&nonblocking_pool, p, nbytes, 0, 0);
+ }
+-EXPORT_SYMBOL(get_random_bytes);
++EXPORT_SYMBOL(get_random_bytes_arch);
++
+ 
+ /*
+  * init_std_data - initialize pool with system data
+@@ -961,16 +1099,19 @@ EXPORT_SYMBOL(get_random_bytes);
+  */
+ static void init_std_data(struct entropy_store *r)
+ {
+-	ktime_t now;
+-	unsigned long flags;
++	int i;
++	ktime_t now = ktime_get_real();
++	unsigned long rv;
+ 
+-	spin_lock_irqsave(&r->lock, flags);
+ 	r->entropy_count = 0;
+-	spin_unlock_irqrestore(&r->lock, flags);
+-
+-	now = ktime_get_real();
+-	mix_pool_bytes(r, &now, sizeof(now));
+-	mix_pool_bytes(r, utsname(), sizeof(*(utsname())));
++	r->entropy_total = 0;
++	mix_pool_bytes(r, &now, sizeof(now), NULL);
++	for (i = r->poolinfo->POOLBYTES; i > 0; i -= sizeof(rv)) {
++		if (!arch_get_random_long(&rv))
++			break;
++		mix_pool_bytes(r, &rv, sizeof(rv), NULL);
++	}
++	mix_pool_bytes(r, utsname(), sizeof(*(utsname())), NULL);
+ }
+ 
+ static int rand_initialize(void)
+@@ -1107,7 +1248,7 @@ write_pool(struct entropy_store *r, const char __user *buffer, size_t count)
+ 		count -= bytes;
+ 		p += bytes;
+ 
+-		mix_pool_bytes(r, buf, bytes);
++		mix_pool_bytes(r, buf, bytes, NULL);
+ 		cond_resched();
+ 	}
+ 
+diff --git a/drivers/firmware/pcdp.c b/drivers/firmware/pcdp.c
+index 51e0e2d..a330492 100644
+--- a/drivers/firmware/pcdp.c
++++ b/drivers/firmware/pcdp.c
+@@ -95,7 +95,7 @@ efi_setup_pcdp_console(char *cmdline)
+ 	if (efi.hcdp == EFI_INVALID_TABLE_ADDR)
+ 		return -ENODEV;
+ 
+-	pcdp = ioremap(efi.hcdp, 4096);
++	pcdp = early_ioremap(efi.hcdp, 4096);
+ 	printk(KERN_INFO "PCDP: v%d at 0x%lx\n", pcdp->rev, efi.hcdp);
+ 
+ 	if (strstr(cmdline, "console=hcdp")) {
+@@ -131,6 +131,6 @@ efi_setup_pcdp_console(char *cmdline)
+ 	}
+ 
+ out:
+-	iounmap(pcdp);
++	early_iounmap(pcdp, 4096);
+ 	return rc;
+ }
+diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
+index d4c4937..fae2050 100644
+--- a/drivers/gpu/drm/i915/intel_dp.c
++++ b/drivers/gpu/drm/i915/intel_dp.c
+@@ -708,8 +708,8 @@ intel_dp_mode_fixup(struct drm_encoder *encoder, struct drm_display_mode *mode,
+ 
+ 	bpp = adjusted_mode->private_flags & INTEL_MODE_DP_FORCE_6BPC ? 18 : 24;
+ 
+-	for (lane_count = 1; lane_count <= max_lane_count; lane_count <<= 1) {
+-		for (clock = 0; clock <= max_clock; clock++) {
++	for (clock = 0; clock <= max_clock; clock++) {
++		for (lane_count = 1; lane_count <= max_lane_count; lane_count <<= 1) {
+ 			int link_avail = intel_dp_max_data_rate(intel_dp_link_clock(bws[clock]), lane_count);
+ 
+ 			if (intel_dp_link_required(mode->clock, bpp)
+diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
+index a6dcd18..96532bc 100644
+--- a/drivers/input/mouse/synaptics.c
++++ b/drivers/input/mouse/synaptics.c
+@@ -40,11 +40,28 @@
+  * Note that newer firmware allows querying device for maximum useable
+  * coordinates.
+  */
++#define XMIN 0
++#define XMAX 6143
++#define YMIN 0
++#define YMAX 6143
+ #define XMIN_NOMINAL 1472
+ #define XMAX_NOMINAL 5472
+ #define YMIN_NOMINAL 1408
+ #define YMAX_NOMINAL 4448
+ 
++/* Size in bits of absolute position values reported by the hardware */
++#define ABS_POS_BITS 13
++
++/*
++ * Any position values from the hardware above the following limits are
++ * treated as "wrapped around negative" values that have been truncated to
++ * the 13-bit reporting range of the hardware. These are just reasonable
++ * guesses and can be adjusted if hardware is found that operates outside
++ * of these parameters.
++ */
++#define X_MAX_POSITIVE (((1 << ABS_POS_BITS) + XMAX) / 2)
++#define Y_MAX_POSITIVE (((1 << ABS_POS_BITS) + YMAX) / 2)
++
+ /*
+  * Synaptics touchpads report the y coordinate from bottom to top, which is
+  * opposite from what userspace expects.
+@@ -544,6 +561,12 @@ static int synaptics_parse_hw_state(const unsigned char buf[],
+ 		hw->right = (buf[0] & 0x02) ? 1 : 0;
+ 	}
+ 
++	/* Convert wrap-around values to negative */
++	if (hw->x > X_MAX_POSITIVE)
++		hw->x -= 1 << ABS_POS_BITS;
++	if (hw->y > Y_MAX_POSITIVE)
++		hw->y -= 1 << ABS_POS_BITS;
++
+ 	return 0;
+ }
+ 
+diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
+index 532a902..d432032 100644
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -19,7 +19,7 @@
+ /*
+  * Tunable constants
+  */
+-#define ENDIO_HOOK_POOL_SIZE 10240
++#define ENDIO_HOOK_POOL_SIZE 1024
+ #define DEFERRED_SET_SIZE 64
+ #define MAPPING_POOL_SIZE 1024
+ #define PRISON_CELLS 1024
+@@ -857,7 +857,7 @@ static void process_prepared_mapping(struct new_mapping *m)
+ 
+ 	if (m->err) {
+ 		cell_error(m->cell);
+-		return;
++		goto out;
+ 	}
+ 
+ 	/*
+@@ -869,7 +869,7 @@ static void process_prepared_mapping(struct new_mapping *m)
+ 	if (r) {
+ 		DMERR("dm_thin_insert_block() failed");
+ 		cell_error(m->cell);
+-		return;
++		goto out;
+ 	}
+ 
+ 	/*
+@@ -884,6 +884,7 @@ static void process_prepared_mapping(struct new_mapping *m)
+ 	} else
+ 		cell_defer(tc, m->cell, m->data_block);
+ 
++out:
+ 	list_del(&m->list);
+ 	mempool_free(m, tc->pool->mapping_pool);
+ }
+diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
+index 2d97bf0..62306e5 100644
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -2321,7 +2321,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
+ 		/* There is nowhere to write, so all non-sync
+ 		 * drives must be failed - so we are finished
+ 		 */
+-		sector_t rv = max_sector - sector_nr;
++		sector_t rv;
++		if (min_bad > 0)
++			max_sector = sector_nr + min_bad;
++		rv = max_sector - sector_nr;
+ 		*skipped = 1;
+ 		put_buf(r1_bio);
+ 		return rv;
+diff --git a/drivers/media/rc/ene_ir.c b/drivers/media/rc/ene_ir.c
+index ed77c6d..5327061 100644
+--- a/drivers/media/rc/ene_ir.c
++++ b/drivers/media/rc/ene_ir.c
+@@ -1018,6 +1018,8 @@ static int ene_probe(struct pnp_dev *pnp_dev, const struct pnp_device_id *id)
+ 
+ 	spin_lock_init(&dev->hw_lock);
+ 
++	dev->hw_io = pnp_port_start(pnp_dev, 0);
++
+ 	pnp_set_drvdata(pnp_dev, dev);
+ 	dev->pnp_dev = pnp_dev;
+ 
+@@ -1072,7 +1074,6 @@ static int ene_probe(struct pnp_dev *pnp_dev, const struct pnp_device_id *id)
+ 
+ 	/* claim the resources */
+ 	error = -EBUSY;
+-	dev->hw_io = pnp_port_start(pnp_dev, 0);
+ 	if (!request_region(dev->hw_io, ENE_IO_SIZE, ENE_DRIVER_NAME)) {
+ 		dev->hw_io = -1;
+ 		dev->irq = -1;
+diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
+index 60107ee..4eec7b7 100644
+--- a/drivers/mfd/ab3100-core.c
++++ b/drivers/mfd/ab3100-core.c
+@@ -409,8 +409,6 @@ static irqreturn_t ab3100_irq_handler(int irq, void *data)
+ 	u32 fatevent;
+ 	int err;
+ 
+-	add_interrupt_randomness(irq);
+-
+ 	err = ab3100_get_register_page_interruptible(ab3100, AB3100_EVENTA1,
+ 				       event_regs, 3);
+ 	if (err)
+diff --git a/drivers/mfd/wm831x-otp.c b/drivers/mfd/wm831x-otp.c
+index f742745..b90f3e0 100644
+--- a/drivers/mfd/wm831x-otp.c
++++ b/drivers/mfd/wm831x-otp.c
+@@ -18,6 +18,7 @@
+ #include <linux/bcd.h>
+ #include <linux/delay.h>
+ #include <linux/mfd/core.h>
++#include <linux/random.h>
+ 
+ #include <linux/mfd/wm831x/core.h>
+ #include <linux/mfd/wm831x/otp.h>
+@@ -66,6 +67,7 @@ static DEVICE_ATTR(unique_id, 0444, wm831x_unique_id_show, NULL);
+ 
+ int wm831x_otp_init(struct wm831x *wm831x)
+ {
++	char uuid[WM831X_UNIQUE_ID_LEN];
+ 	int ret;
+ 
+ 	ret = device_create_file(wm831x->dev, &dev_attr_unique_id);
+@@ -73,6 +75,12 @@ int wm831x_otp_init(struct wm831x *wm831x)
+ 		dev_err(wm831x->dev, "Unique ID attribute not created: %d\n",
+ 			ret);
+ 
++	ret = wm831x_unique_id_read(wm831x, uuid);
++	if (ret == 0)
++		add_device_randomness(uuid, sizeof(uuid));
++	else
++		dev_err(wm831x->dev, "Failed to read UUID: %d\n", ret);
++
+ 	return ret;
+ }
+ 
+diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
+index bdf960b..ae7528b 100644
+--- a/drivers/net/wireless/rt2x00/rt2800usb.c
++++ b/drivers/net/wireless/rt2x00/rt2800usb.c
+@@ -925,6 +925,7 @@ static struct usb_device_id rt2800usb_device_table[] = {
+ 	{ USB_DEVICE(0x0411, 0x015d) },
+ 	{ USB_DEVICE(0x0411, 0x016f) },
+ 	{ USB_DEVICE(0x0411, 0x01a2) },
++	{ USB_DEVICE(0x0411, 0x01ee) },
+ 	/* Corega */
+ 	{ USB_DEVICE(0x07aa, 0x002f) },
+ 	{ USB_DEVICE(0x07aa, 0x003c) },
+diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
+index d1049ee..26fba2d 100644
+--- a/drivers/platform/x86/asus-wmi.c
++++ b/drivers/platform/x86/asus-wmi.c
+@@ -1431,14 +1431,9 @@ static int asus_wmi_platform_init(struct asus_wmi *asus)
+ 	 */
+ 	if (!asus_wmi_evaluate_method(ASUS_WMI_METHODID_DSTS, 0, 0, NULL))
+ 		asus->dsts_id = ASUS_WMI_METHODID_DSTS;
+-	else if (!asus_wmi_evaluate_method(ASUS_WMI_METHODID_DSTS2, 0, 0, NULL))
++	else
+ 		asus->dsts_id = ASUS_WMI_METHODID_DSTS2;
+ 
+-	if (!asus->dsts_id) {
+-		pr_err("Can't find DSTS");
+-		return -ENODEV;
+-	}
+-
+ 	/* CWAP allow to define the behavior of the Fn+F2 key,
+ 	 * this method doesn't seems to be present on Eee PCs */
+ 	if (asus->driver->wapf >= 0)
+diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c
+index bdc909b..f3c2110 100644
+--- a/drivers/rtc/rtc-wm831x.c
++++ b/drivers/rtc/rtc-wm831x.c
+@@ -24,7 +24,7 @@
+ #include <linux/mfd/wm831x/core.h>
+ #include <linux/delay.h>
+ #include <linux/platform_device.h>
+-
++#include <linux/random.h>
+ 
+ /*
+  * R16416 (0x4020) - RTC Write Counter
+@@ -96,6 +96,26 @@ struct wm831x_rtc {
+ 	unsigned int alarm_enabled:1;
+ };
+ 
++static void wm831x_rtc_add_randomness(struct wm831x *wm831x)
++{
++	int ret;
++	u16 reg;
++
++	/*
++	 * The write counter contains a pseudo-random number which is
++	 * regenerated every time we set the RTC so it should be a
++	 * useful per-system source of entropy.
++	 */
++	ret = wm831x_reg_read(wm831x, WM831X_RTC_WRITE_COUNTER);
++	if (ret >= 0) {
++		reg = ret;
++		add_device_randomness(&reg, sizeof(reg));
++	} else {
++		dev_warn(wm831x->dev, "Failed to read RTC write counter: %d\n",
++			 ret);
++	}
++}
++
+ /*
+  * Read current time and date in RTC
+  */
+@@ -449,6 +469,8 @@ static int wm831x_rtc_probe(struct platform_device *pdev)
+ 			alm_irq, ret);
+ 	}
+ 
++	wm831x_rtc_add_randomness(wm831x);
++
+ 	return 0;
+ 
+ err:
+diff --git a/drivers/staging/media/lirc/lirc_sir.c b/drivers/staging/media/lirc/lirc_sir.c
+index 6903d39..90e9e32 100644
+--- a/drivers/staging/media/lirc/lirc_sir.c
++++ b/drivers/staging/media/lirc/lirc_sir.c
+@@ -53,6 +53,7 @@
+ #include <linux/io.h>
+ #include <asm/irq.h>
+ #include <linux/fcntl.h>
++#include <linux/platform_device.h>
+ #ifdef LIRC_ON_SA1100
+ #include <asm/hardware.h>
+ #ifdef CONFIG_SA1100_COLLIE
+@@ -488,9 +489,11 @@ static struct lirc_driver driver = {
+ 	.owner		= THIS_MODULE,
+ };
+ 
++static struct platform_device *lirc_sir_dev;
+ 
+ static int init_chrdev(void)
+ {
++	driver.dev = &lirc_sir_dev->dev;
+ 	driver.minor = lirc_register_driver(&driver);
+ 	if (driver.minor < 0) {
+ 		printk(KERN_ERR LIRC_DRIVER_NAME ": init_chrdev() failed.\n");
+@@ -1216,20 +1219,71 @@ static int init_lirc_sir(void)
+ 	return 0;
+ }
+ 
++static int __devinit lirc_sir_probe(struct platform_device *dev)
++{
++	return 0;
++}
++
++static int __devexit lirc_sir_remove(struct platform_device *dev)
++{
++	return 0;
++}
++
++static struct platform_driver lirc_sir_driver = {
++	.probe		= lirc_sir_probe,
++	.remove		= __devexit_p(lirc_sir_remove),
++	.driver		= {
++		.name	= "lirc_sir",
++		.owner	= THIS_MODULE,
++	},
++};
+ 
+ static int __init lirc_sir_init(void)
+ {
+ 	int retval;
+ 
++	retval = platform_driver_register(&lirc_sir_driver);
++	if (retval) {
++		printk(KERN_ERR LIRC_DRIVER_NAME ": Platform driver register "
++		       "failed!\n");
++		return -ENODEV;
++	}
++
++	lirc_sir_dev = platform_device_alloc("lirc_dev", 0);
++	if (!lirc_sir_dev) {
++		printk(KERN_ERR LIRC_DRIVER_NAME ": Platform device alloc "
++		       "failed!\n");
++		retval = -ENOMEM;
++		goto pdev_alloc_fail;
++	}
++
++	retval = platform_device_add(lirc_sir_dev);
++	if (retval) {
++		printk(KERN_ERR LIRC_DRIVER_NAME ": Platform device add "
++		       "failed!\n");
++		retval = -ENODEV;
++		goto pdev_add_fail;
++	}
++
+ 	retval = init_chrdev();
+ 	if (retval < 0)
+-		return retval;
++		goto fail;
++
+ 	retval = init_lirc_sir();
+ 	if (retval) {
+ 		drop_chrdev();
+-		return retval;
++		goto fail;
+ 	}
++
+ 	return 0;
++
++fail:
++	platform_device_del(lirc_sir_dev);
++pdev_add_fail:
++	platform_device_put(lirc_sir_dev);
++pdev_alloc_fail:
++	platform_driver_unregister(&lirc_sir_driver);
++	return retval;
+ }
+ 
+ static void __exit lirc_sir_exit(void)
+@@ -1237,6 +1291,8 @@ static void __exit lirc_sir_exit(void)
+ 	drop_hardware();
+ 	drop_chrdev();
+ 	drop_port();
++	platform_device_unregister(lirc_sir_dev);
++	platform_driver_unregister(&lirc_sir_driver);
+ 	printk(KERN_INFO LIRC_DRIVER_NAME ": Uninstalled.\n");
+ }
+ 
+diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
+index a4b192d..08b92a6 100644
+--- a/drivers/tty/serial/pch_uart.c
++++ b/drivers/tty/serial/pch_uart.c
+@@ -660,7 +660,8 @@ static void pch_dma_rx_complete(void *arg)
+ 		tty_flip_buffer_push(tty);
+ 	tty_kref_put(tty);
+ 	async_tx_ack(priv->desc_rx);
+-	pch_uart_hal_enable_interrupt(priv, PCH_UART_HAL_RX_INT);
++	pch_uart_hal_enable_interrupt(priv, PCH_UART_HAL_RX_INT |
++					    PCH_UART_HAL_RX_ERR_INT);
+ }
+ 
+ static void pch_dma_tx_complete(void *arg)
+@@ -715,7 +716,8 @@ static int handle_rx_to(struct eg20t_port *priv)
+ 	int rx_size;
+ 	int ret;
+ 	if (!priv->start_rx) {
+-		pch_uart_hal_disable_interrupt(priv, PCH_UART_HAL_RX_INT);
++		pch_uart_hal_disable_interrupt(priv, PCH_UART_HAL_RX_INT |
++						     PCH_UART_HAL_RX_ERR_INT);
+ 		return 0;
+ 	}
+ 	buf = &priv->rxbuf;
+@@ -977,11 +979,13 @@ static irqreturn_t pch_uart_interrupt(int irq, void *dev_id)
+ 		case PCH_UART_IID_RDR:	/* Received Data Ready */
+ 			if (priv->use_dma) {
+ 				pch_uart_hal_disable_interrupt(priv,
+-							PCH_UART_HAL_RX_INT);
++						PCH_UART_HAL_RX_INT |
++						PCH_UART_HAL_RX_ERR_INT);
+ 				ret = dma_handle_rx(priv);
+ 				if (!ret)
+ 					pch_uart_hal_enable_interrupt(priv,
+-							PCH_UART_HAL_RX_INT);
++						PCH_UART_HAL_RX_INT |
++						PCH_UART_HAL_RX_ERR_INT);
+ 			} else {
+ 				ret = handle_rx(priv);
+ 			}
+@@ -1107,7 +1111,8 @@ static void pch_uart_stop_rx(struct uart_port *port)
+ 	struct eg20t_port *priv;
+ 	priv = container_of(port, struct eg20t_port, port);
+ 	priv->start_rx = 0;
+-	pch_uart_hal_disable_interrupt(priv, PCH_UART_HAL_RX_INT);
++	pch_uart_hal_disable_interrupt(priv, PCH_UART_HAL_RX_INT |
++					     PCH_UART_HAL_RX_ERR_INT);
+ 	priv->int_dis_flag = 1;
+ }
+ 
+@@ -1163,6 +1168,7 @@ static int pch_uart_startup(struct uart_port *port)
+ 		break;
+ 	case 16:
+ 		fifo_size = PCH_UART_HAL_FIFO16;
++		break;
+ 	case 1:
+ 	default:
+ 		fifo_size = PCH_UART_HAL_FIFO_DIS;
+@@ -1200,7 +1206,8 @@ static int pch_uart_startup(struct uart_port *port)
+ 		pch_request_dma(port);
+ 
+ 	priv->start_rx = 1;
+-	pch_uart_hal_enable_interrupt(priv, PCH_UART_HAL_RX_INT);
++	pch_uart_hal_enable_interrupt(priv, PCH_UART_HAL_RX_INT |
++					    PCH_UART_HAL_RX_ERR_INT);
+ 	uart_update_timeout(port, CS8, default_baud);
+ 
+ 	return 0;
+@@ -1258,7 +1265,7 @@ static void pch_uart_set_termios(struct uart_port *port,
+ 		stb = PCH_UART_HAL_STB1;
+ 
+ 	if (termios->c_cflag & PARENB) {
+-		if (!(termios->c_cflag & PARODD))
++		if (termios->c_cflag & PARODD)
+ 			parity = PCH_UART_HAL_PARITY_ODD;
+ 		else
+ 			parity = PCH_UART_HAL_PARITY_EVEN;
+diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
+index 175b6bb..52340cc 100644
+--- a/drivers/usb/core/hub.c
++++ b/drivers/usb/core/hub.c
+@@ -24,6 +24,7 @@
+ #include <linux/kthread.h>
+ #include <linux/mutex.h>
+ #include <linux/freezer.h>
++#include <linux/random.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/byteorder.h>
+@@ -1897,6 +1898,14 @@ int usb_new_device(struct usb_device *udev)
+ 	/* Tell the world! */
+ 	announce_device(udev);
+ 
++	if (udev->serial)
++		add_device_randomness(udev->serial, strlen(udev->serial));
++	if (udev->product)
++		add_device_randomness(udev->product, strlen(udev->product));
++	if (udev->manufacturer)
++		add_device_randomness(udev->manufacturer,
++				      strlen(udev->manufacturer));
++
+ 	device_enable_async_suspend(&udev->dev);
+ 	/* Register the device.  The device driver is responsible
+ 	 * for configuring the device and invoking the add-device
+diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
+index 1fc8f12..347bb05 100644
+--- a/drivers/usb/early/ehci-dbgp.c
++++ b/drivers/usb/early/ehci-dbgp.c
+@@ -450,7 +450,7 @@ static int dbgp_ehci_startup(void)
+ 	writel(FLAG_CF, &ehci_regs->configured_flag);
+ 
+ 	/* Wait until the controller is no longer halted */
+-	loop = 10;
++	loop = 1000;
+ 	do {
+ 		status = readl(&ehci_regs->status);
+ 		if (!(status & STS_HALT))
+diff --git a/drivers/video/smscufx.c b/drivers/video/smscufx.c
+index aaccffa..dd9533a 100644
+--- a/drivers/video/smscufx.c
++++ b/drivers/video/smscufx.c
+@@ -904,7 +904,7 @@ static ssize_t ufx_ops_write(struct fb_info *info, const char __user *buf,
+ 	result = fb_sys_write(info, buf, count, ppos);
+ 
+ 	if (result > 0) {
+-		int start = max((int)(offset / info->fix.line_length) - 1, 0);
++		int start = max((int)(offset / info->fix.line_length), 0);
+ 		int lines = min((u32)((result / info->fix.line_length) + 1),
+ 				(u32)info->var.yres);
+ 
+diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
+index 24a49d4..1585db1 100644
+--- a/fs/exofs/ore.c
++++ b/fs/exofs/ore.c
+@@ -837,11 +837,11 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
+ 				bio->bi_rw |= REQ_WRITE;
+ 			}
+ 
+-			osd_req_write(or, _ios_obj(ios, dev), per_dev->offset,
+-				      bio, per_dev->length);
++			osd_req_write(or, _ios_obj(ios, cur_comp),
++				      per_dev->offset, bio, per_dev->length);
+ 			ORE_DBGMSG("write(0x%llx) offset=0x%llx "
+ 				      "length=0x%llx dev=%d\n",
+-				     _LLU(_ios_obj(ios, dev)->id),
++				     _LLU(_ios_obj(ios, cur_comp)->id),
+ 				     _LLU(per_dev->offset),
+ 				     _LLU(per_dev->length), dev);
+ 		} else if (ios->kern_buff) {
+@@ -853,20 +853,20 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
+ 			       (ios->si.unit_off + ios->length >
+ 				ios->layout->stripe_unit));
+ 
+-			ret = osd_req_write_kern(or, _ios_obj(ios, per_dev->dev),
++			ret = osd_req_write_kern(or, _ios_obj(ios, cur_comp),
+ 						 per_dev->offset,
+ 						 ios->kern_buff, ios->length);
+ 			if (unlikely(ret))
+ 				goto out;
+ 			ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
+ 				      "length=0x%llx dev=%d\n",
+-				     _LLU(_ios_obj(ios, dev)->id),
++				     _LLU(_ios_obj(ios, cur_comp)->id),
+ 				     _LLU(per_dev->offset),
+ 				     _LLU(ios->length), per_dev->dev);
+ 		} else {
+-			osd_req_set_attributes(or, _ios_obj(ios, dev));
++			osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
+ 			ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
+-				     _LLU(_ios_obj(ios, dev)->id),
++				     _LLU(_ios_obj(ios, cur_comp)->id),
+ 				     ios->out_attr_len, dev);
+ 		}
+ 
+diff --git a/fs/nfs/file.c b/fs/nfs/file.c
+index c43a452..961e562 100644
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -452,8 +452,11 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
+ 
+ 	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
+ 
+-	/* Only do I/O if gfp is a superset of GFP_KERNEL */
+-	if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) {
++	/* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not
++	 * doing this memory reclaim for a fs-related allocation.
++	 */
++	if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL &&
++	    !(current->flags & PF_FSTRANS)) {
+ 		int how = FLUSH_SYNC;
+ 
+ 		/* Don't let kswapd deadlock waiting for OOM RPC calls */
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 9cfa60a..87a1746 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -2236,7 +2236,7 @@ out_acl:
+ 	if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) {
+ 		if ((buflen -= 4) < 0)
+ 			goto out_resource;
+-		WRITE32(1);
++		WRITE32(0);
+ 	}
+ 	if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) {
+ 		if ((buflen -= 4) < 0)
+diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
+index ac258be..c598cfb 100644
+--- a/fs/nilfs2/ioctl.c
++++ b/fs/nilfs2/ioctl.c
+@@ -182,7 +182,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
+ 	if (copy_from_user(&cpmode, argp, sizeof(cpmode)))
+ 		goto out;
+ 
+-	down_read(&inode->i_sb->s_umount);
++	mutex_lock(&nilfs->ns_snapshot_mount_mutex);
+ 
+ 	nilfs_transaction_begin(inode->i_sb, &ti, 0);
+ 	ret = nilfs_cpfile_change_cpmode(
+@@ -192,7 +192,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
+ 	else
+ 		nilfs_transaction_commit(inode->i_sb); /* never fails */
+ 
+-	up_read(&inode->i_sb->s_umount);
++	mutex_unlock(&nilfs->ns_snapshot_mount_mutex);
+ out:
+ 	mnt_drop_write(filp->f_path.mnt);
+ 	return ret;
+diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
+index 8351c44..97bfbdd 100644
+--- a/fs/nilfs2/super.c
++++ b/fs/nilfs2/super.c
+@@ -951,6 +951,8 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
+ 	struct nilfs_root *root;
+ 	int ret;
+ 
++	mutex_lock(&nilfs->ns_snapshot_mount_mutex);
++
+ 	down_read(&nilfs->ns_segctor_sem);
+ 	ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno);
+ 	up_read(&nilfs->ns_segctor_sem);
+@@ -975,6 +977,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
+ 	ret = nilfs_get_root_dentry(s, root, root_dentry);
+ 	nilfs_put_root(root);
+  out:
++	mutex_unlock(&nilfs->ns_snapshot_mount_mutex);
+ 	return ret;
+ }
+ 
+diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
+index 35a8970..1c98f53 100644
+--- a/fs/nilfs2/the_nilfs.c
++++ b/fs/nilfs2/the_nilfs.c
+@@ -76,6 +76,7 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
+ 	nilfs->ns_bdev = bdev;
+ 	atomic_set(&nilfs->ns_ndirtyblks, 0);
+ 	init_rwsem(&nilfs->ns_sem);
++	mutex_init(&nilfs->ns_snapshot_mount_mutex);
+ 	INIT_LIST_HEAD(&nilfs->ns_dirty_files);
+ 	INIT_LIST_HEAD(&nilfs->ns_gc_inodes);
+ 	spin_lock_init(&nilfs->ns_inode_lock);
+diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
+index 9992b11..de7435f 100644
+--- a/fs/nilfs2/the_nilfs.h
++++ b/fs/nilfs2/the_nilfs.h
+@@ -47,6 +47,7 @@ enum {
+  * @ns_flags: flags
+  * @ns_bdev: block device
+  * @ns_sem: semaphore for shared states
++ * @ns_snapshot_mount_mutex: mutex to protect snapshot mounts
+  * @ns_sbh: buffer heads of on-disk super blocks
+  * @ns_sbp: pointers to super block data
+  * @ns_sbwtime: previous write time of super block
+@@ -99,6 +100,7 @@ struct the_nilfs {
+ 
+ 	struct block_device    *ns_bdev;
+ 	struct rw_semaphore	ns_sem;
++	struct mutex		ns_snapshot_mount_mutex;
+ 
+ 	/*
+ 	 * used for
+diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
+index c5ed2f1..a2227f7 100644
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -41,6 +41,9 @@ int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
+ 			unsigned long *, int *, int, unsigned int flags);
+ void unmap_hugepage_range(struct vm_area_struct *,
+ 			unsigned long, unsigned long, struct page *);
++void __unmap_hugepage_range_final(struct vm_area_struct *vma,
++			  unsigned long start, unsigned long end,
++			  struct page *ref_page);
+ void __unmap_hugepage_range(struct vm_area_struct *,
+ 			unsigned long, unsigned long, struct page *);
+ int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
+@@ -99,6 +102,13 @@ static inline unsigned long hugetlb_total_pages(void)
+ #define copy_hugetlb_page_range(src, dst, vma)	({ BUG(); 0; })
+ #define hugetlb_prefault(mapping, vma)		({ BUG(); 0; })
+ #define unmap_hugepage_range(vma, start, end, page)	BUG()
++static inline void __unmap_hugepage_range_final(struct vm_area_struct *vma,
++			unsigned long start, unsigned long end,
++			struct page *ref_page)
++{
++	BUG();
++}
++
+ static inline void hugetlb_report_meminfo(struct seq_file *m)
+ {
+ }
+diff --git a/include/linux/init_task.h b/include/linux/init_task.h
+index df53fdf..cdde2b3 100644
+--- a/include/linux/init_task.h
++++ b/include/linux/init_task.h
+@@ -124,8 +124,17 @@ extern struct group_info init_groups;
+ 
+ extern struct cred init_cred;
+ 
++extern struct task_group root_task_group;
++
++#ifdef CONFIG_CGROUP_SCHED
++# define INIT_CGROUP_SCHED(tsk)						\
++	.sched_task_group = &root_task_group,
++#else
++# define INIT_CGROUP_SCHED(tsk)
++#endif
++
+ #ifdef CONFIG_PERF_EVENTS
+-# define INIT_PERF_EVENTS(tsk)					\
++# define INIT_PERF_EVENTS(tsk)						\
+ 	.perf_event_mutex = 						\
+ 		 __MUTEX_INITIALIZER(tsk.perf_event_mutex),		\
+ 	.perf_event_list = LIST_HEAD_INIT(tsk.perf_event_list),
+@@ -162,6 +171,7 @@ extern struct cred init_cred;
+ 	},								\
+ 	.tasks		= LIST_HEAD_INIT(tsk.tasks),			\
+ 	INIT_PUSHABLE_TASKS(tsk)					\
++	INIT_CGROUP_SCHED(tsk)						\
+ 	.ptraced	= LIST_HEAD_INIT(tsk.ptraced),			\
+ 	.ptrace_entry	= LIST_HEAD_INIT(tsk.ptrace_entry),		\
+ 	.real_parent	= &tsk,						\
+diff --git a/include/linux/random.h b/include/linux/random.h
+index 8f74538..29e217a 100644
+--- a/include/linux/random.h
++++ b/include/linux/random.h
+@@ -50,11 +50,13 @@ struct rnd_state {
+ 
+ extern void rand_initialize_irq(int irq);
+ 
++extern void add_device_randomness(const void *, unsigned int);
+ extern void add_input_randomness(unsigned int type, unsigned int code,
+ 				 unsigned int value);
+-extern void add_interrupt_randomness(int irq);
++extern void add_interrupt_randomness(int irq, int irq_flags);
+ 
+ extern void get_random_bytes(void *buf, int nbytes);
++extern void get_random_bytes_arch(void *buf, int nbytes);
+ void generate_random_uuid(unsigned char uuid_out[16]);
+ 
+ #ifndef MODULE
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index d336c35..1e86bb4 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1236,6 +1236,9 @@ struct task_struct {
+ 	const struct sched_class *sched_class;
+ 	struct sched_entity se;
+ 	struct sched_rt_entity rt;
++#ifdef CONFIG_CGROUP_SCHED
++	struct task_group *sched_task_group;
++#endif
+ 
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+ 	/* list of struct preempt_notifier: */
+@@ -2646,7 +2649,7 @@ extern int sched_group_set_rt_period(struct task_group *tg,
+ extern long sched_group_rt_period(struct task_group *tg);
+ extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
+ #endif
+-#endif
++#endif /* CONFIG_CGROUP_SCHED */
+ 
+ extern int task_can_switch_user(struct user_struct *up,
+ 					struct task_struct *tsk);
+diff --git a/kernel/futex.c b/kernel/futex.c
+index 866c9d5..80fb1c6 100644
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -2231,11 +2231,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
+  * @uaddr2:	the pi futex we will take prior to returning to user-space
+  *
+  * The caller will wait on uaddr and will be requeued by futex_requeue() to
+- * uaddr2 which must be PI aware.  Normal wakeup will wake on uaddr2 and
+- * complete the acquisition of the rt_mutex prior to returning to userspace.
+- * This ensures the rt_mutex maintains an owner when it has waiters; without
+- * one, the pi logic wouldn't know which task to boost/deboost, if there was a
+- * need to.
++ * uaddr2 which must be PI aware and unique from uaddr.  Normal wakeup will wake
++ * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
++ * userspace.  This ensures the rt_mutex maintains an owner when it has waiters;
++ * without one, the pi logic would not know which task to boost/deboost, if
++ * there was a need to.
+  *
+  * We call schedule in futex_wait_queue_me() when we enqueue and return there
+  * via the following:
+@@ -2272,6 +2272,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+ 	struct futex_q q = futex_q_init;
+ 	int res, ret;
+ 
++	if (uaddr == uaddr2)
++		return -EINVAL;
++
+ 	if (!bitset)
+ 		return -EINVAL;
+ 
+@@ -2343,7 +2346,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+ 		 * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
+ 		 * the pi_state.
+ 		 */
+-		WARN_ON(!&q.pi_state);
++		WARN_ON(!q.pi_state);
+ 		pi_mutex = &q.pi_state->pi_mutex;
+ 		ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
+ 		debug_rt_mutex_free_waiter(&rt_waiter);
+@@ -2370,7 +2373,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+ 	 * fault, unlock the rt_mutex and return the fault to userspace.
+ 	 */
+ 	if (ret == -EFAULT) {
+-		if (rt_mutex_owner(pi_mutex) == current)
++		if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
+ 			rt_mutex_unlock(pi_mutex);
+ 	} else if (ret == -EINTR) {
+ 		/*
+diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
+index 470d08c..10e0772 100644
+--- a/kernel/irq/handle.c
++++ b/kernel/irq/handle.c
+@@ -117,7 +117,7 @@ irqreturn_t
+ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
+ {
+ 	irqreturn_t retval = IRQ_NONE;
+-	unsigned int random = 0, irq = desc->irq_data.irq;
++	unsigned int flags = 0, irq = desc->irq_data.irq;
+ 
+ 	do {
+ 		irqreturn_t res;
+@@ -145,7 +145,7 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
+ 
+ 			/* Fall through to add to randomness */
+ 		case IRQ_HANDLED:
+-			random |= action->flags;
++			flags |= action->flags;
+ 			break;
+ 
+ 		default:
+@@ -156,8 +156,7 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
+ 		action = action->next;
+ 	} while (action);
+ 
+-	if (random & IRQF_SAMPLE_RANDOM)
+-		add_interrupt_randomness(irq);
++	add_interrupt_randomness(irq, flags);
+ 
+ 	if (!noirqdebug)
+ 		note_interrupt(irq, desc, retval);
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 9cd8ca7..e0431c4 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -746,22 +746,19 @@ static inline int cpu_of(struct rq *rq)
+ /*
+  * Return the group to which this tasks belongs.
+  *
+- * We use task_subsys_state_check() and extend the RCU verification with
+- * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each
+- * task it moves into the cgroup. Therefore by holding either of those locks,
+- * we pin the task to the current cgroup.
++ * We cannot use task_subsys_state() and friends because the cgroup
++ * subsystem changes that value before the cgroup_subsys::attach() method
++ * is called, therefore we cannot pin it and might observe the wrong value.
++ *
++ * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
++ * core changes this before calling sched_move_task().
++ *
++ * Instead we use a 'copy' which is updated from sched_move_task() while
++ * holding both task_struct::pi_lock and rq::lock.
+  */
+ static inline struct task_group *task_group(struct task_struct *p)
+ {
+-	struct task_group *tg;
+-	struct cgroup_subsys_state *css;
+-
+-	css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
+-			lockdep_is_held(&p->pi_lock) ||
+-			lockdep_is_held(&task_rq(p)->lock));
+-	tg = container_of(css, struct task_group, css);
+-
+-	return autogroup_task_group(p, tg);
++	return p->sched_task_group;
+ }
+ 
+ /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
+@@ -2372,7 +2369,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
+ 	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
+ 	 *
+ 	 * sched_move_task() holds both and thus holding either pins the cgroup,
+-	 * see set_task_rq().
++	 * see task_group().
+ 	 *
+ 	 * Furthermore, all task_rq users should acquire both locks, see
+ 	 * task_rq_lock().
+@@ -8952,6 +8949,7 @@ void sched_destroy_group(struct task_group *tg)
+  */
+ void sched_move_task(struct task_struct *tsk)
+ {
++	struct task_group *tg;
+ 	int on_rq, running;
+ 	unsigned long flags;
+ 	struct rq *rq;
+@@ -8966,6 +8964,12 @@ void sched_move_task(struct task_struct *tsk)
+ 	if (unlikely(running))
+ 		tsk->sched_class->put_prev_task(rq, tsk);
+ 
++	tg = container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id,
++				lockdep_is_held(&tsk->sighand->siglock)),
++			  struct task_group, css);
++	tg = autogroup_task_group(tsk, tg);
++	tsk->sched_task_group = tg;
++
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 	if (tsk->sched_class->task_move_group)
+ 		tsk->sched_class->task_move_group(tsk, on_rq);
+diff --git a/lib/vsprintf.c b/lib/vsprintf.c
+index 993599e..d74c317 100644
+--- a/lib/vsprintf.c
++++ b/lib/vsprintf.c
+@@ -886,7 +886,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
+ 		 * %pK cannot be used in IRQ context because its test
+ 		 * for CAP_SYSLOG would be meaningless.
+ 		 */
+-		if (in_irq() || in_serving_softirq() || in_nmi()) {
++		if (kptr_restrict && (in_irq() || in_serving_softirq() ||
++				      in_nmi())) {
+ 			if (spec.field_width == -1)
+ 				spec.field_width = 2 * sizeof(void *);
+ 			return string(buf, end, "pK-error", spec);
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index b1e1bad..0f897b8 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -2382,6 +2382,25 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
+ 	}
+ }
+ 
++void __unmap_hugepage_range_final(struct vm_area_struct *vma,
++			  unsigned long start, unsigned long end,
++			  struct page *ref_page)
++{
++	__unmap_hugepage_range(vma, start, end, ref_page);
++
++	/*
++	 * Clear this flag so that x86's huge_pmd_share page_table_shareable
++	 * test will fail on a vma being torn down, and not grab a page table
++	 * on its way out.  We're lucky that the flag has such an appropriate
++	 * name, and can in fact be safely cleared here. We could clear it
++	 * before the __unmap_hugepage_range above, but all that's necessary
++	 * is to clear it before releasing the i_mmap_mutex. This works
++	 * because in the context this is called, the VMA is about to be
++	 * destroyed and the i_mmap_mutex is held.
++	 */
++	vma->vm_flags &= ~VM_MAYSHARE;
++}
++
+ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
+ 			  unsigned long end, struct page *ref_page)
+ {
+@@ -2939,9 +2958,14 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
+ 		}
+ 	}
+ 	spin_unlock(&mm->page_table_lock);
+-	mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
+-
++	/*
++	 * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare
++	 * may have cleared our pud entry and done put_page on the page table:
++	 * once we release i_mmap_mutex, another task can do the final put_page
++	 * and that page table be reused and filled with junk.
++	 */
+ 	flush_tlb_range(vma, start, end);
++	mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
+ }
+ 
+ int hugetlb_reserve_pages(struct inode *inode,
+diff --git a/mm/internal.h b/mm/internal.h
+index 2189af4..0c26b5e 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -309,3 +309,5 @@ extern u64 hwpoison_filter_flags_mask;
+ extern u64 hwpoison_filter_flags_value;
+ extern u64 hwpoison_filter_memcg;
+ extern u32 hwpoison_filter_enable;
++
++extern void set_pageblock_order(void);
+diff --git a/mm/memory.c b/mm/memory.c
+index 1b1ca17..70f5daf 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1358,8 +1358,11 @@ unsigned long unmap_vmas(struct mmu_gather *tlb,
+ 				 * Since no pte has actually been setup, it is
+ 				 * safe to do nothing in this case.
+ 				 */
+-				if (vma->vm_file)
+-					unmap_hugepage_range(vma, start, end, NULL);
++				if (vma->vm_file) {
++					mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
++					__unmap_hugepage_range_final(vma, start, end, NULL);
++					mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
++				}
+ 
+ 				start = end;
+ 			} else
+diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
+index 9a611d3..862b608 100644
+--- a/mm/mmu_notifier.c
++++ b/mm/mmu_notifier.c
+@@ -33,6 +33,24 @@
+ void __mmu_notifier_release(struct mm_struct *mm)
+ {
+ 	struct mmu_notifier *mn;
++	struct hlist_node *n;
++
++	/*
++	 * RCU here will block mmu_notifier_unregister until
++	 * ->release returns.
++	 */
++	rcu_read_lock();
++	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist)
++		/*
++		 * if ->release runs before mmu_notifier_unregister it
++		 * must be handled as it's the only way for the driver
++		 * to flush all existing sptes and stop the driver
++		 * from establishing any more sptes before all the
++		 * pages in the mm are freed.
++		 */
++		if (mn->ops->release)
++			mn->ops->release(mn, mm);
++	rcu_read_unlock();
+ 
+ 	spin_lock(&mm->mmu_notifier_mm->lock);
+ 	while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
+@@ -46,23 +64,6 @@ void __mmu_notifier_release(struct mm_struct *mm)
+ 		 * mmu_notifier_unregister to return.
+ 		 */
+ 		hlist_del_init_rcu(&mn->hlist);
+-		/*
+-		 * RCU here will block mmu_notifier_unregister until
+-		 * ->release returns.
+-		 */
+-		rcu_read_lock();
+-		spin_unlock(&mm->mmu_notifier_mm->lock);
+-		/*
+-		 * if ->release runs before mmu_notifier_unregister it
+-		 * must be handled as it's the only way for the driver
+-		 * to flush all existing sptes and stop the driver
+-		 * from establishing any more sptes before all the
+-		 * pages in the mm are freed.
+-		 */
+-		if (mn->ops->release)
+-			mn->ops->release(mn, mm);
+-		rcu_read_unlock();
+-		spin_lock(&mm->mmu_notifier_mm->lock);
+ 	}
+ 	spin_unlock(&mm->mmu_notifier_mm->lock);
+ 
+@@ -284,16 +285,13 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
+ {
+ 	BUG_ON(atomic_read(&mm->mm_count) <= 0);
+ 
+-	spin_lock(&mm->mmu_notifier_mm->lock);
+ 	if (!hlist_unhashed(&mn->hlist)) {
+-		hlist_del_rcu(&mn->hlist);
+-
+ 		/*
+ 		 * RCU here will force exit_mmap to wait ->release to finish
+ 		 * before freeing the pages.
+ 		 */
+ 		rcu_read_lock();
+-		spin_unlock(&mm->mmu_notifier_mm->lock);
++
+ 		/*
+ 		 * exit_mmap will block in mmu_notifier_release to
+ 		 * guarantee ->release is called before freeing the
+@@ -302,8 +300,11 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
+ 		if (mn->ops->release)
+ 			mn->ops->release(mn, mm);
+ 		rcu_read_unlock();
+-	} else
++
++		spin_lock(&mm->mmu_notifier_mm->lock);
++		hlist_del_rcu(&mn->hlist);
+ 		spin_unlock(&mm->mmu_notifier_mm->lock);
++	}
+ 
+ 	/*
+ 	 * Wait any running method to finish, of course including
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 065dbe8..6e51bf0 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -4281,25 +4281,24 @@ static inline void setup_usemap(struct pglist_data *pgdat,
+ 
+ #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
+ 
+-/* Return a sensible default order for the pageblock size. */
+-static inline int pageblock_default_order(void)
+-{
+-	if (HPAGE_SHIFT > PAGE_SHIFT)
+-		return HUGETLB_PAGE_ORDER;
+-
+-	return MAX_ORDER-1;
+-}
+-
+ /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
+-static inline void __init set_pageblock_order(unsigned int order)
++void __init set_pageblock_order(void)
+ {
++	unsigned int order;
++
+ 	/* Check that pageblock_nr_pages has not already been setup */
+ 	if (pageblock_order)
+ 		return;
+ 
++	if (HPAGE_SHIFT > PAGE_SHIFT)
++		order = HUGETLB_PAGE_ORDER;
++	else
++		order = MAX_ORDER - 1;
++
+ 	/*
+ 	 * Assume the largest contiguous order of interest is a huge page.
+-	 * This value may be variable depending on boot parameters on IA64
++	 * This value may be variable depending on boot parameters on IA64 and
++	 * powerpc.
+ 	 */
+ 	pageblock_order = order;
+ }
+@@ -4307,15 +4306,13 @@ static inline void __init set_pageblock_order(unsigned int order)
+ 
+ /*
+  * When CONFIG_HUGETLB_PAGE_SIZE_VARIABLE is not set, set_pageblock_order()
+- * and pageblock_default_order() are unused as pageblock_order is set
+- * at compile-time. See include/linux/pageblock-flags.h for the values of
+- * pageblock_order based on the kernel config
++ * is unused as pageblock_order is set at compile-time. See
++ * include/linux/pageblock-flags.h for the values of pageblock_order based on
++ * the kernel config
+  */
+-static inline int pageblock_default_order(unsigned int order)
++void __init set_pageblock_order(void)
+ {
+-	return MAX_ORDER-1;
+ }
+-#define set_pageblock_order(x)	do {} while (0)
+ 
+ #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
+ 
+@@ -4403,7 +4400,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
+ 		if (!size)
+ 			continue;
+ 
+-		set_pageblock_order(pageblock_default_order());
++		set_pageblock_order();
+ 		setup_usemap(pgdat, zone, size);
+ 		ret = init_currently_empty_zone(zone, zone_start_pfn,
+ 						size, MEMMAP_EARLY);
+diff --git a/mm/sparse.c b/mm/sparse.c
+index a8bc7d3..bf7d3cc 100644
+--- a/mm/sparse.c
++++ b/mm/sparse.c
+@@ -486,6 +486,9 @@ void __init sparse_init(void)
+ 	struct page **map_map;
+ #endif
+ 
++	/* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
++	set_pageblock_order();
++
+ 	/*
+ 	 * map is using big page (aka 2M in x86 64 bit)
+ 	 * usemap is less one page (aka 24 bytes)
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 5738654..4b18703 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1177,6 +1177,7 @@ static int __dev_open(struct net_device *dev)
+ 		net_dmaengine_get();
+ 		dev_set_rx_mode(dev);
+ 		dev_activate(dev);
++		add_device_randomness(dev->dev_addr, dev->addr_len);
+ 	}
+ 
+ 	return ret;
+@@ -4841,6 +4842,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
+ 	err = ops->ndo_set_mac_address(dev, sa);
+ 	if (!err)
+ 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
++	add_device_randomness(dev->dev_addr, dev->addr_len);
+ 	return err;
+ }
+ EXPORT_SYMBOL(dev_set_mac_address);
+@@ -5621,6 +5623,7 @@ int register_netdevice(struct net_device *dev)
+ 	dev_init_scheduler(dev);
+ 	dev_hold(dev);
+ 	list_netdevice(dev);
++	add_device_randomness(dev->dev_addr, dev->addr_len);
+ 
+ 	/* Notify protocols, that a new device appeared. */
+ 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
+diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
+index 7f36b38..b856f87 100644
+--- a/net/core/drop_monitor.c
++++ b/net/core/drop_monitor.c
+@@ -33,22 +33,19 @@
+ #define TRACE_ON 1
+ #define TRACE_OFF 0
+ 
+-static void send_dm_alert(struct work_struct *unused);
+-
+-
+ /*
+  * Globals, our netlink socket pointer
+  * and the work handle that will send up
+  * netlink alerts
+  */
+ static int trace_state = TRACE_OFF;
+-static DEFINE_SPINLOCK(trace_state_lock);
++static DEFINE_MUTEX(trace_state_mutex);
+ 
+ struct per_cpu_dm_data {
+-	struct work_struct dm_alert_work;
+-	struct sk_buff *skb;
+-	atomic_t dm_hit_count;
+-	struct timer_list send_timer;
++	spinlock_t		lock;
++	struct sk_buff		*skb;
++	struct work_struct	dm_alert_work;
++	struct timer_list	send_timer;
+ };
+ 
+ struct dm_hw_stat_delta {
+@@ -74,56 +71,59 @@ static int dm_delay = 1;
+ static unsigned long dm_hw_check_delta = 2*HZ;
+ static LIST_HEAD(hw_stats_list);
+ 
+-static void reset_per_cpu_data(struct per_cpu_dm_data *data)
++static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
+ {
+ 	size_t al;
+ 	struct net_dm_alert_msg *msg;
+ 	struct nlattr *nla;
++	struct sk_buff *skb;
++	unsigned long flags;
+ 
+ 	al = sizeof(struct net_dm_alert_msg);
+ 	al += dm_hit_limit * sizeof(struct net_dm_drop_point);
+ 	al += sizeof(struct nlattr);
+ 
+-	data->skb = genlmsg_new(al, GFP_KERNEL);
+-	genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,
+-			0, NET_DM_CMD_ALERT);
+-	nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg));
+-	msg = nla_data(nla);
+-	memset(msg, 0, al);
+-	atomic_set(&data->dm_hit_count, dm_hit_limit);
++	skb = genlmsg_new(al, GFP_KERNEL);
++
++	if (skb) {
++		genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
++				0, NET_DM_CMD_ALERT);
++		nla = nla_reserve(skb, NLA_UNSPEC,
++				  sizeof(struct net_dm_alert_msg));
++		msg = nla_data(nla);
++		memset(msg, 0, al);
++	} else {
++		mod_timer(&data->send_timer, jiffies + HZ / 10);
++	}
++
++	spin_lock_irqsave(&data->lock, flags);
++	swap(data->skb, skb);
++	spin_unlock_irqrestore(&data->lock, flags);
++
++	return skb;
+ }
+ 
+-static void send_dm_alert(struct work_struct *unused)
++static void send_dm_alert(struct work_struct *work)
+ {
+ 	struct sk_buff *skb;
+-	struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
++	struct per_cpu_dm_data *data;
+ 
+-	/*
+-	 * Grab the skb we're about to send
+-	 */
+-	skb = data->skb;
++	data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+ 
+-	/*
+-	 * Replace it with a new one
+-	 */
+-	reset_per_cpu_data(data);
+-
+-	/*
+-	 * Ship it!
+-	 */
+-	genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
++	skb = reset_per_cpu_data(data);
+ 
++	if (skb)
++		genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
+ }
+ 
+ /*
+  * This is the timer function to delay the sending of an alert
+  * in the event that more drops will arrive during the
+- * hysteresis period.  Note that it operates under the timer interrupt
+- * so we don't need to disable preemption here
++ * hysteresis period.
+  */
+-static void sched_send_work(unsigned long unused)
++static void sched_send_work(unsigned long _data)
+ {
+-	struct per_cpu_dm_data *data =  &__get_cpu_var(dm_cpu_data);
++	struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data;
+ 
+ 	schedule_work(&data->dm_alert_work);
+ }
+@@ -134,17 +134,19 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
+ 	struct nlmsghdr *nlh;
+ 	struct nlattr *nla;
+ 	int i;
+-	struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
++	struct sk_buff *dskb;
++	struct per_cpu_dm_data *data;
++	unsigned long flags;
+ 
++	local_irq_save(flags);
++	data = &__get_cpu_var(dm_cpu_data);
++	spin_lock(&data->lock);
++	dskb = data->skb;
+ 
+-	if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
+-		/*
+-		 * we're already at zero, discard this hit
+-		 */
++	if (!dskb)
+ 		goto out;
+-	}
+ 
+-	nlh = (struct nlmsghdr *)data->skb->data;
++	nlh = (struct nlmsghdr *)dskb->data;
+ 	nla = genlmsg_data(nlmsg_data(nlh));
+ 	msg = nla_data(nla);
+ 	for (i = 0; i < msg->entries; i++) {
+@@ -153,11 +155,12 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
+ 			goto out;
+ 		}
+ 	}
+-
++	if (msg->entries == dm_hit_limit)
++		goto out;
+ 	/*
+ 	 * We need to create a new entry
+ 	 */
+-	__nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));
++	__nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
+ 	nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
+ 	memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
+ 	msg->points[msg->entries].count = 1;
+@@ -165,11 +168,11 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
+ 
+ 	if (!timer_pending(&data->send_timer)) {
+ 		data->send_timer.expires = jiffies + dm_delay * HZ;
+-		add_timer_on(&data->send_timer, smp_processor_id());
++		add_timer(&data->send_timer);
+ 	}
+ 
+ out:
+-	return;
++	spin_unlock_irqrestore(&data->lock, flags);
+ }
+ 
+ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
+@@ -213,7 +216,7 @@ static int set_all_monitor_traces(int state)
+ 	struct dm_hw_stat_delta *new_stat = NULL;
+ 	struct dm_hw_stat_delta *temp;
+ 
+-	spin_lock(&trace_state_lock);
++	mutex_lock(&trace_state_mutex);
+ 
+ 	if (state == trace_state) {
+ 		rc = -EAGAIN;
+@@ -252,7 +255,7 @@ static int set_all_monitor_traces(int state)
+ 		rc = -EINPROGRESS;
+ 
+ out_unlock:
+-	spin_unlock(&trace_state_lock);
++	mutex_unlock(&trace_state_mutex);
+ 
+ 	return rc;
+ }
+@@ -295,12 +298,12 @@ static int dropmon_net_event(struct notifier_block *ev_block,
+ 
+ 		new_stat->dev = dev;
+ 		new_stat->last_rx = jiffies;
+-		spin_lock(&trace_state_lock);
++		mutex_lock(&trace_state_mutex);
+ 		list_add_rcu(&new_stat->list, &hw_stats_list);
+-		spin_unlock(&trace_state_lock);
++		mutex_unlock(&trace_state_mutex);
+ 		break;
+ 	case NETDEV_UNREGISTER:
+-		spin_lock(&trace_state_lock);
++		mutex_lock(&trace_state_mutex);
+ 		list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
+ 			if (new_stat->dev == dev) {
+ 				new_stat->dev = NULL;
+@@ -311,7 +314,7 @@ static int dropmon_net_event(struct notifier_block *ev_block,
+ 				}
+ 			}
+ 		}
+-		spin_unlock(&trace_state_lock);
++		mutex_unlock(&trace_state_mutex);
+ 		break;
+ 	}
+ out:
+@@ -367,13 +370,15 @@ static int __init init_net_drop_monitor(void)
+ 
+ 	for_each_present_cpu(cpu) {
+ 		data = &per_cpu(dm_cpu_data, cpu);
+-		reset_per_cpu_data(data);
+ 		INIT_WORK(&data->dm_alert_work, send_dm_alert);
+ 		init_timer(&data->send_timer);
+-		data->send_timer.data = cpu;
++		data->send_timer.data = (unsigned long)data;
+ 		data->send_timer.function = sched_send_work;
++		spin_lock_init(&data->lock);
++		reset_per_cpu_data(data);
+ 	}
+ 
++
+ 	goto out;
+ 
+ out_unreg:
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 2ef859a..05842ab 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -1354,6 +1354,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
+ 			goto errout;
+ 		send_addr_notify = 1;
+ 		modified = 1;
++		add_device_randomness(dev->dev_addr, dev->addr_len);
+ 	}
+ 
+ 	if (tb[IFLA_MTU]) {
+diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
+index 8761bf8..337c68b 100644
+--- a/net/sunrpc/rpcb_clnt.c
++++ b/net/sunrpc/rpcb_clnt.c
+@@ -246,7 +246,7 @@ static int rpcb_create_local_unix(void)
+ 	if (IS_ERR(clnt)) {
+ 		dprintk("RPC:       failed to create AF_LOCAL rpcbind "
+ 				"client (errno %ld).\n", PTR_ERR(clnt));
+-		result = -PTR_ERR(clnt);
++		result = PTR_ERR(clnt);
+ 		goto out;
+ 	}
+ 
+@@ -293,7 +293,7 @@ static int rpcb_create_local_net(void)
+ 	if (IS_ERR(clnt)) {
+ 		dprintk("RPC:       failed to create local rpcbind "
+ 				"client (errno %ld).\n", PTR_ERR(clnt));
+-		result = -PTR_ERR(clnt);
++		result = PTR_ERR(clnt);
+ 		goto out;
+ 	}
+ 
+diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
+index 4e2b3b4..c90b832 100644
+--- a/net/sunrpc/sched.c
++++ b/net/sunrpc/sched.c
+@@ -755,7 +755,9 @@ void rpc_execute(struct rpc_task *task)
+ 
+ static void rpc_async_schedule(struct work_struct *work)
+ {
++	current->flags |= PF_FSTRANS;
+ 	__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
++	current->flags &= ~PF_FSTRANS;
+ }
+ 
+ /**
+diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
+index b446e10..06cdbff 100644
+--- a/net/sunrpc/xprtrdma/transport.c
++++ b/net/sunrpc/xprtrdma/transport.c
+@@ -200,6 +200,7 @@ xprt_rdma_connect_worker(struct work_struct *work)
+ 	int rc = 0;
+ 
+ 	if (!xprt->shutdown) {
++		current->flags |= PF_FSTRANS;
+ 		xprt_clear_connected(xprt);
+ 
+ 		dprintk("RPC:       %s: %sconnect\n", __func__,
+@@ -212,10 +213,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
+ 
+ out:
+ 	xprt_wake_pending_tasks(xprt, rc);
+-
+ out_clear:
+ 	dprintk("RPC:       %s: exit\n", __func__);
+ 	xprt_clear_connecting(xprt);
++	current->flags &= ~PF_FSTRANS;
+ }
+ 
+ /*
+diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
+index 55472c4..1a6edc7 100644
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -1895,6 +1895,8 @@ static void xs_local_setup_socket(struct work_struct *work)
+ 	if (xprt->shutdown)
+ 		goto out;
+ 
++	current->flags |= PF_FSTRANS;
++
+ 	clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+ 	status = __sock_create(xprt->xprt_net, AF_LOCAL,
+ 					SOCK_STREAM, 0, &sock, 1);
+@@ -1928,6 +1930,7 @@ static void xs_local_setup_socket(struct work_struct *work)
+ out:
+ 	xprt_clear_connecting(xprt);
+ 	xprt_wake_pending_tasks(xprt, status);
++	current->flags &= ~PF_FSTRANS;
+ }
+ 
+ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
+@@ -1970,6 +1973,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
+ 	if (xprt->shutdown)
+ 		goto out;
+ 
++	current->flags |= PF_FSTRANS;
++
+ 	/* Start by resetting any existing state */
+ 	xs_reset_transport(transport);
+ 	sock = xs_create_sock(xprt, transport,
+@@ -1988,6 +1993,7 @@ static void xs_udp_setup_socket(struct work_struct *work)
+ out:
+ 	xprt_clear_connecting(xprt);
+ 	xprt_wake_pending_tasks(xprt, status);
++	current->flags &= ~PF_FSTRANS;
+ }
+ 
+ /*
+@@ -2113,6 +2119,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
+ 	if (xprt->shutdown)
+ 		goto out;
+ 
++	current->flags |= PF_FSTRANS;
++
+ 	if (!sock) {
+ 		clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+ 		sock = xs_create_sock(xprt, transport,
+@@ -2162,6 +2170,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
+ 	case -EINPROGRESS:
+ 	case -EALREADY:
+ 		xprt_clear_connecting(xprt);
++		current->flags &= ~PF_FSTRANS;
+ 		return;
+ 	case -EINVAL:
+ 		/* Happens, for instance, if the user specified a link
+@@ -2174,6 +2183,7 @@ out_eagain:
+ out:
+ 	xprt_clear_connecting(xprt);
+ 	xprt_wake_pending_tasks(xprt, status);
++	current->flags &= ~PF_FSTRANS;
+ }
+ 
+ /**
+diff --git a/net/wireless/util.c b/net/wireless/util.c
+index 74d5292..b5e4c1c 100644
+--- a/net/wireless/util.c
++++ b/net/wireless/util.c
+@@ -981,6 +981,9 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
+ 	}
+ 	mutex_unlock(&rdev->devlist_mtx);
+ 
++	if (total == 1)
++		return 0;
++
+ 	for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) {
+ 		const struct ieee80211_iface_combination *c;
+ 		struct ieee80211_iface_limit *limits;
+diff --git a/sound/drivers/mpu401/mpu401_uart.c b/sound/drivers/mpu401/mpu401_uart.c
+index 1cff331..4608c2c 100644
+--- a/sound/drivers/mpu401/mpu401_uart.c
++++ b/sound/drivers/mpu401/mpu401_uart.c
+@@ -554,6 +554,7 @@ int snd_mpu401_uart_new(struct snd_card *card, int device,
+ 	spin_lock_init(&mpu->output_lock);
+ 	spin_lock_init(&mpu->timer_lock);
+ 	mpu->hardware = hardware;
++	mpu->irq = -1;
+ 	if (! (info_flags & MPU401_INFO_INTEGRATED)) {
+ 		int res_size = hardware == MPU401_HW_PC98II ? 4 : 2;
+ 		mpu->res = request_region(port, res_size, "MPU401 UART");
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 191fd78..2e2eb93 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -4809,6 +4809,15 @@ static int alc269_resume(struct hda_codec *codec)
+ }
+ #endif /* CONFIG_PM */
+ 
++static void alc269_fixup_pincfg_no_hp_to_lineout(struct hda_codec *codec,
++						 const struct alc_fixup *fix, int action)
++{
++	struct alc_spec *spec = codec->spec;
++
++	if (action == ALC_FIXUP_ACT_PRE_PROBE)
++		spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
++}
++
+ static void alc269_fixup_hweq(struct hda_codec *codec,
+ 			       const struct alc_fixup *fix, int action)
+ {
+@@ -4909,6 +4918,8 @@ enum {
+ 	ALC269_FIXUP_DMIC,
+ 	ALC269VB_FIXUP_AMIC,
+ 	ALC269VB_FIXUP_DMIC,
++	ALC269_FIXUP_LENOVO_DOCK,
++	ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT,
+ };
+ 
+ static const struct alc_fixup alc269_fixups[] = {
+@@ -5029,6 +5040,20 @@ static const struct alc_fixup alc269_fixups[] = {
+ 			{ }
+ 		},
+ 	},
++	[ALC269_FIXUP_LENOVO_DOCK] = {
++		.type = ALC_FIXUP_PINS,
++		.v.pins = (const struct alc_pincfg[]) {
++			{ 0x19, 0x23a11040 }, /* dock mic */
++			{ 0x1b, 0x2121103f }, /* dock headphone */
++			{ }
++		},
++		.chained = true,
++		.chain_id = ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT
++	},
++	[ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT] = {
++		.type = ALC_FIXUP_FUNC,
++		.v.func = alc269_fixup_pincfg_no_hp_to_lineout,
++	},
+ };
+ 
+ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+@@ -5051,6 +5076,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE),
+ 	SND_PCI_QUIRK(0x17aa, 0x21ca, "Thinkpad L412", ALC269_FIXUP_SKU_IGNORE),
+ 	SND_PCI_QUIRK(0x17aa, 0x21e9, "Thinkpad Edge 15", ALC269_FIXUP_SKU_IGNORE),
++	SND_PCI_QUIRK(0x17aa, 0x21f6, "Thinkpad T530", ALC269_FIXUP_LENOVO_DOCK),
++	SND_PCI_QUIRK(0x17aa, 0x2203, "Thinkpad X230 Tablet", ALC269_FIXUP_LENOVO_DOCK),
+ 	SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_QUANTA_MUTE),
+ 	SND_PCI_QUIRK(0x17aa, 0x3bf8, "Lenovo Ideapd", ALC269_FIXUP_PCM_44K),
+ 	SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
+@@ -5109,6 +5136,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ static const struct alc_model_fixup alc269_fixup_models[] = {
+ 	{.id = ALC269_FIXUP_AMIC, .name = "laptop-amic"},
+ 	{.id = ALC269_FIXUP_DMIC, .name = "laptop-dmic"},
++	{.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"},
+ 	{}
+ };
+ 
+diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
+index 1fe1308..7160ff2 100644
+--- a/sound/pci/hda/patch_via.c
++++ b/sound/pci/hda/patch_via.c
+@@ -3227,7 +3227,7 @@ static void set_widgets_power_state_vt1718S(struct hda_codec *codec)
+ {
+ 	struct via_spec *spec = codec->spec;
+ 	int imux_is_smixer;
+-	unsigned int parm;
++	unsigned int parm, parm2;
+ 	/* MUX6 (1eh) = stereo mixer */
+ 	imux_is_smixer =
+ 	snd_hda_codec_read(codec, 0x1e, 0, AC_VERB_GET_CONNECT_SEL, 0x00) == 5;
+@@ -3250,7 +3250,7 @@ static void set_widgets_power_state_vt1718S(struct hda_codec *codec)
+ 	parm = AC_PWRST_D3;
+ 	set_pin_power_state(codec, 0x27, &parm);
+ 	snd_hda_codec_write(codec, 0x1a, 0, AC_VERB_SET_POWER_STATE, parm);
+-	snd_hda_codec_write(codec, 0xb, 0, AC_VERB_SET_POWER_STATE, parm);
++	parm2 = parm; /* for pin 0x0b */
+ 
+ 	/* PW2 (26h), AOW2 (ah) */
+ 	parm = AC_PWRST_D3;
+@@ -3265,6 +3265,9 @@ static void set_widgets_power_state_vt1718S(struct hda_codec *codec)
+ 	if (!spec->hp_independent_mode) /* check for redirected HP */
+ 		set_pin_power_state(codec, 0x28, &parm);
+ 	snd_hda_codec_write(codec, 0x8, 0, AC_VERB_SET_POWER_STATE, parm);
++	if (!spec->hp_independent_mode && parm2 != AC_PWRST_D3)
++		parm = parm2;
++	snd_hda_codec_write(codec, 0xb, 0, AC_VERB_SET_POWER_STATE, parm);
+ 	/* MW9 (21h), Mw2 (1ah), AOW0 (8h) */
+ 	snd_hda_codec_write(codec, 0x21, 0, AC_VERB_SET_POWER_STATE,
+ 			    imux_is_smixer ? AC_PWRST_D0 : parm);
+diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
+index 07dd7eb..e97df24 100644
+--- a/sound/soc/codecs/wm8962.c
++++ b/sound/soc/codecs/wm8962.c
+@@ -3105,6 +3105,9 @@ static int wm8962_set_bias_level(struct snd_soc_codec *codec,
+ 		/* VMID 2*250k */
+ 		snd_soc_update_bits(codec, WM8962_PWR_MGMT_1,
+ 				    WM8962_VMID_SEL_MASK, 0x100);
++
++		if (codec->dapm.bias_level == SND_SOC_BIAS_OFF)
++			msleep(100);
+ 		break;
+ 
+ 	case SND_SOC_BIAS_OFF:
+diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
+index de61b8a..98c5774 100644
+--- a/sound/soc/codecs/wm8994.c
++++ b/sound/soc/codecs/wm8994.c
+@@ -2508,7 +2508,7 @@ static int wm8994_hw_params(struct snd_pcm_substream *substream,
+ 		return -EINVAL;
+ 	}
+ 
+-	bclk_rate = params_rate(params) * 2;
++	bclk_rate = params_rate(params) * 4;
+ 	switch (params_format(params)) {
+ 	case SNDRV_PCM_FORMAT_S16_LE:
+ 		bclk_rate *= 16;
+diff --git a/sound/usb/clock.c b/sound/usb/clock.c
+index 379baad..5e634a2 100644
+--- a/sound/usb/clock.c
++++ b/sound/usb/clock.c
+@@ -111,7 +111,8 @@ static bool uac_clock_source_is_valid(struct snd_usb_audio *chip, int source_id)
+ 		return 0;
+ 
+ 	/* If a clock source can't tell us whether it's valid, we assume it is */
+-	if (!uac2_control_is_readable(cs_desc->bmControls, UAC2_CS_CONTROL_CLOCK_VALID))
++	if (!uac2_control_is_readable(cs_desc->bmControls,
++				      UAC2_CS_CONTROL_CLOCK_VALID - 1))
+ 		return 1;
+ 
+ 	err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_CUR,
diff --git a/3.2.34/bump/1027_linux-3.2.28.patch b/3.2.34/bump/1027_linux-3.2.28.patch
new file mode 100644
index 0000000..4dbba4b
--- /dev/null
+++ b/3.2.34/bump/1027_linux-3.2.28.patch
@@ -0,0 +1,1114 @@
+diff --git a/Makefile b/Makefile
+index bdf851f..5368961 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 3
+ PATCHLEVEL = 2
+-SUBLEVEL = 27
++SUBLEVEL = 28
+ EXTRAVERSION =
+ NAME = Saber-toothed Squirrel
+ 
+diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig
+index 6ee781b..3ee3e84 100644
+--- a/arch/arm/configs/mxs_defconfig
++++ b/arch/arm/configs/mxs_defconfig
+@@ -32,7 +32,6 @@ CONFIG_NO_HZ=y
+ CONFIG_HIGH_RES_TIMERS=y
+ CONFIG_PREEMPT_VOLUNTARY=y
+ CONFIG_AEABI=y
+-CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
+ CONFIG_AUTO_ZRELADDR=y
+ CONFIG_FPE_NWFPE=y
+ CONFIG_NET=y
+diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
+index f0c05f4..ae7786d 100644
+--- a/arch/arm/mach-pxa/raumfeld.c
++++ b/arch/arm/mach-pxa/raumfeld.c
+@@ -951,12 +951,12 @@ static struct i2c_board_info raumfeld_connector_i2c_board_info __initdata = {
+ 
+ static struct eeti_ts_platform_data eeti_ts_pdata = {
+ 	.irq_active_high = 1,
++	.irq_gpio = GPIO_TOUCH_IRQ,
+ };
+ 
+ static struct i2c_board_info raumfeld_controller_i2c_board_info __initdata = {
+ 	.type	= "eeti_ts",
+ 	.addr	= 0x0a,
+-	.irq	= gpio_to_irq(GPIO_TOUCH_IRQ),
+ 	.platform_data = &eeti_ts_pdata,
+ };
+ 
+diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
+index 84a9828..38c6645 100644
+--- a/arch/s390/kernel/compat_linux.c
++++ b/arch/s390/kernel/compat_linux.c
+@@ -615,7 +615,6 @@ asmlinkage unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg)
+ 		return -EFAULT;
+ 	if (a.offset & ~PAGE_MASK)
+ 		return -EINVAL;
+-	a.addr = (unsigned long) compat_ptr(a.addr);
+ 	return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+ 			      a.offset >> PAGE_SHIFT);
+ }
+@@ -626,7 +625,6 @@ asmlinkage long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg)
+ 
+ 	if (copy_from_user(&a, arg, sizeof(a)))
+ 		return -EFAULT;
+-	a.addr = (unsigned long) compat_ptr(a.addr);
+ 	return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
+ }
+ 
+diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
+index 18c51df..25408d3 100644
+--- a/arch/s390/kernel/compat_wrapper.S
++++ b/arch/s390/kernel/compat_wrapper.S
+@@ -1636,7 +1636,7 @@ ENTRY(compat_sys_process_vm_readv_wrapper)
+ 	llgfr	%r6,%r6			# unsigned long
+ 	llgf	%r0,164(%r15)		# unsigned long
+ 	stg	%r0,160(%r15)
+-	jg	sys_process_vm_readv
++	jg	compat_sys_process_vm_readv
+ 
+ ENTRY(compat_sys_process_vm_writev_wrapper)
+ 	lgfr	%r2,%r2			# compat_pid_t
+@@ -1646,4 +1646,4 @@ ENTRY(compat_sys_process_vm_writev_wrapper)
+ 	llgfr	%r6,%r6			# unsigned long
+ 	llgf	%r0,164(%r15)		# unsigned long
+ 	stg	%r0,160(%r15)
+-	jg	sys_process_vm_writev
++	jg	compat_sys_process_vm_writev
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 7315488..407789b 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -1956,6 +1956,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
+ #endif
+ 		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
+ 		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
++		CPU_BASED_RDPMC_EXITING |
+ 		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+ 	/*
+ 	 * We can allow some features even when not supported by the
+diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
+index d62c731..c364358 100644
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -1170,12 +1170,7 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
+ 	return (int32_t)(seq1 - seq2) >= 0;
+ }
+ 
+-static inline u32
+-i915_gem_next_request_seqno(struct intel_ring_buffer *ring)
+-{
+-	drm_i915_private_t *dev_priv = ring->dev->dev_private;
+-	return ring->outstanding_lazy_request = dev_priv->next_seqno;
+-}
++u32 i915_gem_next_request_seqno(struct intel_ring_buffer *ring);
+ 
+ int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
+ 					   struct intel_ring_buffer *pipelined);
+diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
+index 3e2edc6..548a400 100644
+--- a/drivers/gpu/drm/i915/i915_gem.c
++++ b/drivers/gpu/drm/i915/i915_gem.c
+@@ -1647,6 +1647,28 @@ i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
+ 	}
+ }
+ 
++static u32
++i915_gem_get_seqno(struct drm_device *dev)
++{
++	drm_i915_private_t *dev_priv = dev->dev_private;
++	u32 seqno = dev_priv->next_seqno;
++
++	/* reserve 0 for non-seqno */
++	if (++dev_priv->next_seqno == 0)
++		dev_priv->next_seqno = 1;
++
++	return seqno;
++}
++
++u32
++i915_gem_next_request_seqno(struct intel_ring_buffer *ring)
++{
++	if (ring->outstanding_lazy_request == 0)
++		ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev);
++
++	return ring->outstanding_lazy_request;
++}
++
+ int
+ i915_add_request(struct intel_ring_buffer *ring,
+ 		 struct drm_file *file,
+@@ -1658,6 +1680,7 @@ i915_add_request(struct intel_ring_buffer *ring,
+ 	int ret;
+ 
+ 	BUG_ON(request == NULL);
++	seqno = i915_gem_next_request_seqno(ring);
+ 
+ 	ret = ring->add_request(ring, &seqno);
+ 	if (ret)
+diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
+index f6613dc..19085c0 100644
+--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
++++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
+@@ -52,20 +52,6 @@ static inline int ring_space(struct intel_ring_buffer *ring)
+ 	return space;
+ }
+ 
+-static u32 i915_gem_get_seqno(struct drm_device *dev)
+-{
+-	drm_i915_private_t *dev_priv = dev->dev_private;
+-	u32 seqno;
+-
+-	seqno = dev_priv->next_seqno;
+-
+-	/* reserve 0 for non-seqno */
+-	if (++dev_priv->next_seqno == 0)
+-		dev_priv->next_seqno = 1;
+-
+-	return seqno;
+-}
+-
+ static int
+ render_ring_flush(struct intel_ring_buffer *ring,
+ 		  u32	invalidate_domains,
+@@ -277,8 +263,6 @@ static int init_ring_common(struct intel_ring_buffer *ring)
+ 	I915_WRITE_HEAD(ring, 0);
+ 	ring->write_tail(ring, 0);
+ 
+-	/* Initialize the ring. */
+-	I915_WRITE_START(ring, obj->gtt_offset);
+ 	head = I915_READ_HEAD(ring) & HEAD_ADDR;
+ 
+ 	/* G45 ring initialization fails to reset head to zero */
+@@ -304,14 +288,19 @@ static int init_ring_common(struct intel_ring_buffer *ring)
+ 		}
+ 	}
+ 
++	/* Initialize the ring. This must happen _after_ we've cleared the ring
++	 * registers with the above sequence (the readback of the HEAD registers
++	 * also enforces ordering), otherwise the hw might lose the new ring
++	 * register values. */
++	I915_WRITE_START(ring, obj->gtt_offset);
+ 	I915_WRITE_CTL(ring,
+ 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
+ 			| RING_VALID);
+ 
+ 	/* If the head is still not zero, the ring is dead */
+-	if ((I915_READ_CTL(ring) & RING_VALID) == 0 ||
+-	    I915_READ_START(ring) != obj->gtt_offset ||
+-	    (I915_READ_HEAD(ring) & HEAD_ADDR) != 0) {
++	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
++		     I915_READ_START(ring) == obj->gtt_offset &&
++		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
+ 		DRM_ERROR("%s initialization failed "
+ 				"ctl %08x head %08x tail %08x start %08x\n",
+ 				ring->name,
+@@ -488,7 +477,7 @@ gen6_add_request(struct intel_ring_buffer *ring,
+ 	mbox1_reg = ring->signal_mbox[0];
+ 	mbox2_reg = ring->signal_mbox[1];
+ 
+-	*seqno = i915_gem_get_seqno(ring->dev);
++	*seqno = i915_gem_next_request_seqno(ring);
+ 
+ 	update_mboxes(ring, *seqno, mbox1_reg);
+ 	update_mboxes(ring, *seqno, mbox2_reg);
+@@ -586,8 +575,7 @@ static int
+ pc_render_add_request(struct intel_ring_buffer *ring,
+ 		      u32 *result)
+ {
+-	struct drm_device *dev = ring->dev;
+-	u32 seqno = i915_gem_get_seqno(dev);
++	u32 seqno = i915_gem_next_request_seqno(ring);
+ 	struct pipe_control *pc = ring->private;
+ 	u32 scratch_addr = pc->gtt_offset + 128;
+ 	int ret;
+@@ -638,8 +626,7 @@ static int
+ render_ring_add_request(struct intel_ring_buffer *ring,
+ 			u32 *result)
+ {
+-	struct drm_device *dev = ring->dev;
+-	u32 seqno = i915_gem_get_seqno(dev);
++	u32 seqno = i915_gem_next_request_seqno(ring);
+ 	int ret;
+ 
+ 	ret = intel_ring_begin(ring, 4);
+@@ -813,7 +800,7 @@ ring_add_request(struct intel_ring_buffer *ring,
+ 	if (ret)
+ 		return ret;
+ 
+-	seqno = i915_gem_get_seqno(ring->dev);
++	seqno = i915_gem_next_request_seqno(ring);
+ 
+ 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
+ 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
+diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
+index 931f4df..fc0633c 100644
+--- a/drivers/gpu/drm/radeon/evergreen.c
++++ b/drivers/gpu/drm/radeon/evergreen.c
+@@ -1065,24 +1065,8 @@ void evergreen_agp_enable(struct radeon_device *rdev)
+ 
+ void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save)
+ {
+-	save->vga_control[0] = RREG32(D1VGA_CONTROL);
+-	save->vga_control[1] = RREG32(D2VGA_CONTROL);
+ 	save->vga_render_control = RREG32(VGA_RENDER_CONTROL);
+ 	save->vga_hdp_control = RREG32(VGA_HDP_CONTROL);
+-	save->crtc_control[0] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET);
+-	save->crtc_control[1] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET);
+-	if (rdev->num_crtc >= 4) {
+-		save->vga_control[2] = RREG32(EVERGREEN_D3VGA_CONTROL);
+-		save->vga_control[3] = RREG32(EVERGREEN_D4VGA_CONTROL);
+-		save->crtc_control[2] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET);
+-		save->crtc_control[3] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET);
+-	}
+-	if (rdev->num_crtc >= 6) {
+-		save->vga_control[4] = RREG32(EVERGREEN_D5VGA_CONTROL);
+-		save->vga_control[5] = RREG32(EVERGREEN_D6VGA_CONTROL);
+-		save->crtc_control[4] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET);
+-		save->crtc_control[5] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET);
+-	}
+ 
+ 	/* Stop all video */
+ 	WREG32(VGA_RENDER_CONTROL, 0);
+@@ -1193,47 +1177,6 @@ void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *s
+ 	/* Unlock host access */
+ 	WREG32(VGA_HDP_CONTROL, save->vga_hdp_control);
+ 	mdelay(1);
+-	/* Restore video state */
+-	WREG32(D1VGA_CONTROL, save->vga_control[0]);
+-	WREG32(D2VGA_CONTROL, save->vga_control[1]);
+-	if (rdev->num_crtc >= 4) {
+-		WREG32(EVERGREEN_D3VGA_CONTROL, save->vga_control[2]);
+-		WREG32(EVERGREEN_D4VGA_CONTROL, save->vga_control[3]);
+-	}
+-	if (rdev->num_crtc >= 6) {
+-		WREG32(EVERGREEN_D5VGA_CONTROL, save->vga_control[4]);
+-		WREG32(EVERGREEN_D6VGA_CONTROL, save->vga_control[5]);
+-	}
+-	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 1);
+-	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 1);
+-	if (rdev->num_crtc >= 4) {
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 1);
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 1);
+-	}
+-	if (rdev->num_crtc >= 6) {
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 1);
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 1);
+-	}
+-	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, save->crtc_control[0]);
+-	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, save->crtc_control[1]);
+-	if (rdev->num_crtc >= 4) {
+-		WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, save->crtc_control[2]);
+-		WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, save->crtc_control[3]);
+-	}
+-	if (rdev->num_crtc >= 6) {
+-		WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, save->crtc_control[4]);
+-		WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, save->crtc_control[5]);
+-	}
+-	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
+-	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
+-	if (rdev->num_crtc >= 4) {
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
+-	}
+-	if (rdev->num_crtc >= 6) {
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
+-	}
+ 	WREG32(VGA_RENDER_CONTROL, save->vga_render_control);
+ }
+ 
+@@ -2080,10 +2023,18 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
+ 	if (rdev->flags & RADEON_IS_IGP)
+ 		rdev->config.evergreen.tile_config |= 1 << 4;
+ 	else {
+-		if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
+-			rdev->config.evergreen.tile_config |= 1 << 4;
+-		else
++		switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
++		case 0: /* four banks */
+ 			rdev->config.evergreen.tile_config |= 0 << 4;
++			break;
++		case 1: /* eight banks */
++			rdev->config.evergreen.tile_config |= 1 << 4;
++			break;
++		case 2: /* sixteen banks */
++		default:
++			rdev->config.evergreen.tile_config |= 2 << 4;
++			break;
++		}
+ 	}
+ 	rdev->config.evergreen.tile_config |=
+ 		((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT) << 8;
+diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
+index 9e50814..636255b 100644
+--- a/drivers/gpu/drm/radeon/ni.c
++++ b/drivers/gpu/drm/radeon/ni.c
+@@ -804,10 +804,18 @@ static void cayman_gpu_init(struct radeon_device *rdev)
+ 		rdev->config.cayman.tile_config |= (3 << 0);
+ 		break;
+ 	}
+-	if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
+-		rdev->config.cayman.tile_config |= 1 << 4;
+-	else
++	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
++	case 0: /* four banks */
+ 		rdev->config.cayman.tile_config |= 0 << 4;
++		break;
++	case 1: /* eight banks */
++		rdev->config.cayman.tile_config |= 1 << 4;
++		break;
++	case 2: /* sixteen banks */
++	default:
++		rdev->config.cayman.tile_config |= 2 << 4;
++		break;
++	}
+ 	rdev->config.cayman.tile_config |=
+ 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
+ 	rdev->config.cayman.tile_config |=
+diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
+index 5991484..5ce9402 100644
+--- a/drivers/gpu/drm/radeon/radeon_asic.h
++++ b/drivers/gpu/drm/radeon/radeon_asic.h
+@@ -253,13 +253,10 @@ void rs690_line_buffer_adjust(struct radeon_device *rdev,
+  * rv515
+  */
+ struct rv515_mc_save {
+-	u32 d1vga_control;
+-	u32 d2vga_control;
+ 	u32 vga_render_control;
+ 	u32 vga_hdp_control;
+-	u32 d1crtc_control;
+-	u32 d2crtc_control;
+ };
++
+ int rv515_init(struct radeon_device *rdev);
+ void rv515_fini(struct radeon_device *rdev);
+ uint32_t rv515_mc_rreg(struct radeon_device *rdev, uint32_t reg);
+@@ -387,11 +384,10 @@ void r700_cp_fini(struct radeon_device *rdev);
+  * evergreen
+  */
+ struct evergreen_mc_save {
+-	u32 vga_control[6];
+ 	u32 vga_render_control;
+ 	u32 vga_hdp_control;
+-	u32 crtc_control[6];
+ };
++
+ void evergreen_pcie_gart_tlb_flush(struct radeon_device *rdev);
+ int evergreen_init(struct radeon_device *rdev);
+ void evergreen_fini(struct radeon_device *rdev);
+diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
+index 6613ee9..d5f45b4 100644
+--- a/drivers/gpu/drm/radeon/rv515.c
++++ b/drivers/gpu/drm/radeon/rv515.c
+@@ -281,12 +281,8 @@ int rv515_debugfs_ga_info_init(struct radeon_device *rdev)
+ 
+ void rv515_mc_stop(struct radeon_device *rdev, struct rv515_mc_save *save)
+ {
+-	save->d1vga_control = RREG32(R_000330_D1VGA_CONTROL);
+-	save->d2vga_control = RREG32(R_000338_D2VGA_CONTROL);
+ 	save->vga_render_control = RREG32(R_000300_VGA_RENDER_CONTROL);
+ 	save->vga_hdp_control = RREG32(R_000328_VGA_HDP_CONTROL);
+-	save->d1crtc_control = RREG32(R_006080_D1CRTC_CONTROL);
+-	save->d2crtc_control = RREG32(R_006880_D2CRTC_CONTROL);
+ 
+ 	/* Stop all video */
+ 	WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0);
+@@ -311,15 +307,6 @@ void rv515_mc_resume(struct radeon_device *rdev, struct rv515_mc_save *save)
+ 	/* Unlock host access */
+ 	WREG32(R_000328_VGA_HDP_CONTROL, save->vga_hdp_control);
+ 	mdelay(1);
+-	/* Restore video state */
+-	WREG32(R_000330_D1VGA_CONTROL, save->d1vga_control);
+-	WREG32(R_000338_D2VGA_CONTROL, save->d2vga_control);
+-	WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 1);
+-	WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 1);
+-	WREG32(R_006080_D1CRTC_CONTROL, save->d1crtc_control);
+-	WREG32(R_006880_D2CRTC_CONTROL, save->d2crtc_control);
+-	WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 0);
+-	WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0);
+ 	WREG32(R_000300_VGA_RENDER_CONTROL, save->vga_render_control);
+ }
+ 
+diff --git a/drivers/input/touchscreen/eeti_ts.c b/drivers/input/touchscreen/eeti_ts.c
+index 7f8f538..4f938bb 100644
+--- a/drivers/input/touchscreen/eeti_ts.c
++++ b/drivers/input/touchscreen/eeti_ts.c
+@@ -48,7 +48,7 @@ struct eeti_ts_priv {
+ 	struct input_dev *input;
+ 	struct work_struct work;
+ 	struct mutex mutex;
+-	int irq, irq_active_high;
++	int irq_gpio, irq, irq_active_high;
+ };
+ 
+ #define EETI_TS_BITDEPTH	(11)
+@@ -62,7 +62,7 @@ struct eeti_ts_priv {
+ 
+ static inline int eeti_ts_irq_active(struct eeti_ts_priv *priv)
+ {
+-	return gpio_get_value(irq_to_gpio(priv->irq)) == priv->irq_active_high;
++	return gpio_get_value(priv->irq_gpio) == priv->irq_active_high;
+ }
+ 
+ static void eeti_ts_read(struct work_struct *work)
+@@ -157,7 +157,7 @@ static void eeti_ts_close(struct input_dev *dev)
+ static int __devinit eeti_ts_probe(struct i2c_client *client,
+ 				   const struct i2c_device_id *idp)
+ {
+-	struct eeti_ts_platform_data *pdata;
++	struct eeti_ts_platform_data *pdata = client->dev.platform_data;
+ 	struct eeti_ts_priv *priv;
+ 	struct input_dev *input;
+ 	unsigned int irq_flags;
+@@ -199,9 +199,12 @@ static int __devinit eeti_ts_probe(struct i2c_client *client,
+ 
+ 	priv->client = client;
+ 	priv->input = input;
+-	priv->irq = client->irq;
++	priv->irq_gpio = pdata->irq_gpio;
++	priv->irq = gpio_to_irq(pdata->irq_gpio);
+ 
+-	pdata = client->dev.platform_data;
++	err = gpio_request_one(pdata->irq_gpio, GPIOF_IN, client->name);
++	if (err < 0)
++		goto err1;
+ 
+ 	if (pdata)
+ 		priv->irq_active_high = pdata->irq_active_high;
+@@ -215,13 +218,13 @@ static int __devinit eeti_ts_probe(struct i2c_client *client,
+ 
+ 	err = input_register_device(input);
+ 	if (err)
+-		goto err1;
++		goto err2;
+ 
+ 	err = request_irq(priv->irq, eeti_ts_isr, irq_flags,
+ 			  client->name, priv);
+ 	if (err) {
+ 		dev_err(&client->dev, "Unable to request touchscreen IRQ.\n");
+-		goto err2;
++		goto err3;
+ 	}
+ 
+ 	/*
+@@ -233,9 +236,11 @@ static int __devinit eeti_ts_probe(struct i2c_client *client,
+ 	device_init_wakeup(&client->dev, 0);
+ 	return 0;
+ 
+-err2:
++err3:
+ 	input_unregister_device(input);
+ 	input = NULL; /* so we dont try to free it below */
++err2:
++	gpio_free(pdata->irq_gpio);
+ err1:
+ 	input_free_device(input);
+ 	kfree(priv);
+diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
+index 43a76c4..db662e2 100644
+--- a/drivers/mfd/ezx-pcap.c
++++ b/drivers/mfd/ezx-pcap.c
+@@ -202,7 +202,7 @@ static void pcap_isr_work(struct work_struct *work)
+ 		}
+ 		local_irq_enable();
+ 		ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr);
+-	} while (gpio_get_value(irq_to_gpio(pcap->spi->irq)));
++	} while (gpio_get_value(pdata->gpio));
+ }
+ 
+ static void pcap_irq_handler(unsigned int irq, struct irq_desc *desc)
+diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
+index 23406e6..ae286a9 100644
+--- a/drivers/net/caif/caif_serial.c
++++ b/drivers/net/caif/caif_serial.c
+@@ -325,6 +325,9 @@ static int ldisc_open(struct tty_struct *tty)
+ 
+ 	sprintf(name, "cf%s", tty->name);
+ 	dev = alloc_netdev(sizeof(*ser), name, caifdev_setup);
++	if (!dev)
++		return -ENOMEM;
++
+ 	ser = netdev_priv(dev);
+ 	ser->tty = tty_kref_get(tty);
+ 	ser->dev = dev;
+diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
+index 965c723..721adfd 100644
+--- a/drivers/net/ethernet/broadcom/bnx2.c
++++ b/drivers/net/ethernet/broadcom/bnx2.c
+@@ -5378,7 +5378,7 @@ bnx2_free_tx_skbs(struct bnx2 *bp)
+ 			int k, last;
+ 
+ 			if (skb == NULL) {
+-				j++;
++				j = NEXT_TX_BD(j);
+ 				continue;
+ 			}
+ 
+@@ -5390,8 +5390,8 @@ bnx2_free_tx_skbs(struct bnx2 *bp)
+ 			tx_buf->skb = NULL;
+ 
+ 			last = tx_buf->nr_frags;
+-			j++;
+-			for (k = 0; k < last; k++, j++) {
++			j = NEXT_TX_BD(j);
++			for (k = 0; k < last; k++, j = NEXT_TX_BD(j)) {
+ 				tx_buf = &txr->tx_buf_ring[TX_RING_IDX(j)];
+ 				dma_unmap_page(&bp->pdev->dev,
+ 					dma_unmap_addr(tx_buf, mapping),
+diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
+index de00805..0549261 100644
+--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
++++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
+@@ -4743,12 +4743,14 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake)
+ 		e1000_setup_rctl(adapter);
+ 		e1000_set_rx_mode(netdev);
+ 
++		rctl = er32(RCTL);
++
+ 		/* turn on all-multi mode if wake on multicast is enabled */
+-		if (wufc & E1000_WUFC_MC) {
+-			rctl = er32(RCTL);
++		if (wufc & E1000_WUFC_MC)
+ 			rctl |= E1000_RCTL_MPE;
+-			ew32(RCTL, rctl);
+-		}
++
++		/* enable receives in the hardware */
++		ew32(RCTL, rctl | E1000_RCTL_EN);
+ 
+ 		if (hw->mac_type >= e1000_82540) {
+ 			ctrl = er32(CTRL);
+diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
+index 3072d35..4f4d52a 100644
+--- a/drivers/net/ethernet/intel/e1000e/82571.c
++++ b/drivers/net/ethernet/intel/e1000e/82571.c
+@@ -1600,10 +1600,8 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw)
+ 			 * auto-negotiation in the TXCW register and disable
+ 			 * forced link in the Device Control register in an
+ 			 * attempt to auto-negotiate with our link partner.
+-			 * If the partner code word is null, stop forcing
+-			 * and restart auto negotiation.
+ 			 */
+-			if ((rxcw & E1000_RXCW_C) || !(rxcw & E1000_RXCW_CW))  {
++			if (rxcw & E1000_RXCW_C) {
+ 				/* Enable autoneg, and unforce link up */
+ 				ew32(TXCW, mac->txcw);
+ 				ew32(CTRL, (ctrl & ~E1000_CTRL_SLU));
+diff --git a/drivers/net/tun.c b/drivers/net/tun.c
+index 7bea9c6..a12c9bf 100644
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1243,10 +1243,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
+ 	int vnet_hdr_sz;
+ 	int ret;
+ 
+-	if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
++	if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) {
+ 		if (copy_from_user(&ifr, argp, ifreq_len))
+ 			return -EFAULT;
+-
++	} else {
++		memset(&ifr, 0, sizeof(ifr));
++	}
+ 	if (cmd == TUNGETFEATURES) {
+ 		/* Currently this just means: "what IFF flags are valid?".
+ 		 * This is needed because we never checked for invalid flags on
+diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
+index 582ca2d..c4c6a73 100644
+--- a/drivers/net/usb/kaweth.c
++++ b/drivers/net/usb/kaweth.c
+@@ -1308,7 +1308,7 @@ static int kaweth_internal_control_msg(struct usb_device *usb_dev,
+         int retv;
+         int length = 0; /* shut up GCC */
+ 
+-        urb = usb_alloc_urb(0, GFP_NOIO);
++	urb = usb_alloc_urb(0, GFP_ATOMIC);
+         if (!urb)
+                 return -ENOMEM;
+ 
+diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
+index 7f97164..2b8e957 100644
+--- a/drivers/net/wireless/ath/ath9k/hw.c
++++ b/drivers/net/wireless/ath/ath9k/hw.c
+@@ -674,6 +674,7 @@ int ath9k_hw_init(struct ath_hw *ah)
+ 	case AR9300_DEVID_AR9340:
+ 	case AR9300_DEVID_AR9580:
+ 	case AR9300_DEVID_AR9462:
++	case AR9485_DEVID_AR1111:
+ 		break;
+ 	default:
+ 		if (common->bus_ops->ath_bus_type == ATH_USB)
+diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
+index 1bd8edf..a5c4ba8 100644
+--- a/drivers/net/wireless/ath/ath9k/hw.h
++++ b/drivers/net/wireless/ath/ath9k/hw.h
+@@ -48,6 +48,7 @@
+ #define AR9300_DEVID_AR9580	0x0033
+ #define AR9300_DEVID_AR9462	0x0034
+ #define AR9300_DEVID_AR9330	0x0035
++#define AR9485_DEVID_AR1111	0x0037
+ 
+ #define AR5416_AR9100_DEVID	0x000b
+ 
+diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c
+index 2dcdf63..1883d39 100644
+--- a/drivers/net/wireless/ath/ath9k/pci.c
++++ b/drivers/net/wireless/ath/ath9k/pci.c
+@@ -35,6 +35,7 @@ static DEFINE_PCI_DEVICE_TABLE(ath_pci_id_table) = {
+ 	{ PCI_VDEVICE(ATHEROS, 0x0032) }, /* PCI-E  AR9485 */
+ 	{ PCI_VDEVICE(ATHEROS, 0x0033) }, /* PCI-E  AR9580 */
+ 	{ PCI_VDEVICE(ATHEROS, 0x0034) }, /* PCI-E  AR9462 */
++	{ PCI_VDEVICE(ATHEROS, 0x0037) }, /* PCI-E  AR1111/AR9485 */
+ 	{ 0 }
+ };
+ 
+diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+index 9ba2c1b..3395025 100644
+--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
++++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+@@ -708,11 +708,14 @@ static int rs_toggle_antenna(u32 valid_ant, u32 *rate_n_flags,
+  */
+ static bool rs_use_green(struct ieee80211_sta *sta)
+ {
+-	struct iwl_station_priv *sta_priv = (void *)sta->drv_priv;
+-	struct iwl_rxon_context *ctx = sta_priv->ctx;
+-
+-	return (sta->ht_cap.cap & IEEE80211_HT_CAP_GRN_FLD) &&
+-		!(ctx->ht.non_gf_sta_present);
++	/*
++	 * There's a bug somewhere in this code that causes the
++	 * scaling to get stuck because GF+SGI can't be combined
++	 * in SISO rates. Until we find that bug, disable GF, it
++	 * has only limited benefit and we still interoperate with
++	 * GF APs since we can always receive GF transmissions.
++	 */
++	return false;
+ }
+ 
+ /**
+diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c
+index bf55b4a..d69f88c 100644
+--- a/drivers/net/wireless/rt2x00/rt61pci.c
++++ b/drivers/net/wireless/rt2x00/rt61pci.c
+@@ -2243,8 +2243,7 @@ static void rt61pci_txdone(struct rt2x00_dev *rt2x00dev)
+ 
+ static void rt61pci_wakeup(struct rt2x00_dev *rt2x00dev)
+ {
+-	struct ieee80211_conf conf = { .flags = 0 };
+-	struct rt2x00lib_conf libconf = { .conf = &conf };
++	struct rt2x00lib_conf libconf = { .conf = &rt2x00dev->hw->conf };
+ 
+ 	rt61pci_config(rt2x00dev, &libconf, IEEE80211_CONF_CHANGE_PS);
+ }
+diff --git a/drivers/net/wireless/rtlwifi/usb.c b/drivers/net/wireless/rtlwifi/usb.c
+index db34db6..a49e848 100644
+--- a/drivers/net/wireless/rtlwifi/usb.c
++++ b/drivers/net/wireless/rtlwifi/usb.c
+@@ -120,15 +120,19 @@ static u32 _usb_read_sync(struct rtl_priv *rtlpriv, u32 addr, u16 len)
+ 	u8 request;
+ 	u16 wvalue;
+ 	u16 index;
+-	__le32 *data = &rtlpriv->usb_data[rtlpriv->usb_data_index];
++	__le32 *data;
++	unsigned long flags;
+ 
++	spin_lock_irqsave(&rtlpriv->locks.usb_lock, flags);
++	if (++rtlpriv->usb_data_index >= RTL_USB_MAX_RX_COUNT)
++		rtlpriv->usb_data_index = 0;
++	data = &rtlpriv->usb_data[rtlpriv->usb_data_index];
++	spin_unlock_irqrestore(&rtlpriv->locks.usb_lock, flags);
+ 	request = REALTEK_USB_VENQT_CMD_REQ;
+ 	index = REALTEK_USB_VENQT_CMD_IDX; /* n/a */
+ 
+ 	wvalue = (u16)addr;
+ 	_usbctrl_vendorreq_sync_read(udev, request, wvalue, index, data, len);
+-	if (++rtlpriv->usb_data_index >= RTL_USB_MAX_RX_COUNT)
+-		rtlpriv->usb_data_index = 0;
+ 	return le32_to_cpu(*data);
+ }
+ 
+@@ -909,6 +913,10 @@ int __devinit rtl_usb_probe(struct usb_interface *intf,
+ 				    GFP_KERNEL);
+ 	if (!rtlpriv->usb_data)
+ 		return -ENOMEM;
++
++	/* this spin lock must be initialized early */
++	spin_lock_init(&rtlpriv->locks.usb_lock);
++
+ 	rtlpriv->usb_data_index = 0;
+ 	SET_IEEE80211_DEV(hw, &intf->dev);
+ 	udev = interface_to_usbdev(intf);
+diff --git a/drivers/net/wireless/rtlwifi/wifi.h b/drivers/net/wireless/rtlwifi/wifi.h
+index b1e9deb..deb87e9 100644
+--- a/drivers/net/wireless/rtlwifi/wifi.h
++++ b/drivers/net/wireless/rtlwifi/wifi.h
+@@ -1550,6 +1550,7 @@ struct rtl_locks {
+ 	spinlock_t rf_lock;
+ 	spinlock_t lps_lock;
+ 	spinlock_t waitq_lock;
++	spinlock_t usb_lock;
+ 
+ 	/*Dual mac*/
+ 	spinlock_t cck_and_rw_pagea_lock;
+diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
+index 7daf4b8..90effcc 100644
+--- a/fs/hfsplus/wrapper.c
++++ b/fs/hfsplus/wrapper.c
+@@ -56,7 +56,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
+ 	DECLARE_COMPLETION_ONSTACK(wait);
+ 	struct bio *bio;
+ 	int ret = 0;
+-	unsigned int io_size;
++	u64 io_size;
+ 	loff_t start;
+ 	int offset;
+ 
+diff --git a/include/linux/input/eeti_ts.h b/include/linux/input/eeti_ts.h
+index f875b31..16625d7 100644
+--- a/include/linux/input/eeti_ts.h
++++ b/include/linux/input/eeti_ts.h
+@@ -2,6 +2,7 @@
+ #define LINUX_INPUT_EETI_TS_H
+ 
+ struct eeti_ts_platform_data {
++	int irq_gpio;
+ 	unsigned int irq_active_high;
+ };
+ 
+diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h
+index 40c37216..32a1b5c 100644
+--- a/include/linux/mfd/ezx-pcap.h
++++ b/include/linux/mfd/ezx-pcap.h
+@@ -16,6 +16,7 @@ struct pcap_subdev {
+ struct pcap_platform_data {
+ 	unsigned int irq_base;
+ 	unsigned int config;
++	int gpio;
+ 	void (*init) (void *);	/* board specific init */
+ 	int num_subdevs;
+ 	struct pcap_subdev *subdevs;
+diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
+index 68223e4..4e9115d 100644
+--- a/net/caif/caif_dev.c
++++ b/net/caif/caif_dev.c
+@@ -428,9 +428,9 @@ static int __init caif_device_init(void)
+ 
+ static void __exit caif_device_exit(void)
+ {
+-	unregister_pernet_subsys(&caif_net_ops);
+ 	unregister_netdevice_notifier(&caif_device_notifier);
+ 	dev_remove_pack(&caif_packet_type);
++	unregister_pernet_subsys(&caif_net_ops);
+ }
+ 
+ module_init(caif_device_init);
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 05842ab..0cf604b 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -670,6 +670,12 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
+ 	}
+ }
+ 
++static unsigned int rtnl_dev_get_flags(const struct net_device *dev)
++{
++	return (dev->flags & ~(IFF_PROMISC | IFF_ALLMULTI)) |
++	       (dev->gflags & (IFF_PROMISC | IFF_ALLMULTI));
++}
++
+ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
+ 					   const struct ifinfomsg *ifm)
+ {
+@@ -678,7 +684,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
+ 	/* bugwards compatibility: ifi_change == 0 is treated as ~0 */
+ 	if (ifm->ifi_change)
+ 		flags = (flags & ifm->ifi_change) |
+-			(dev->flags & ~ifm->ifi_change);
++			(rtnl_dev_get_flags(dev) & ~ifm->ifi_change);
+ 
+ 	return flags;
+ }
+diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
+index 86f3b88..afaa735 100644
+--- a/net/ipv4/cipso_ipv4.c
++++ b/net/ipv4/cipso_ipv4.c
+@@ -1725,8 +1725,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
+ 		case CIPSO_V4_TAG_LOCAL:
+ 			/* This is a non-standard tag that we only allow for
+ 			 * local connections, so if the incoming interface is
+-			 * not the loopback device drop the packet. */
+-			if (!(skb->dev->flags & IFF_LOOPBACK)) {
++			 * not the loopback device drop the packet. Further,
++			 * there is no legitimate reason for setting this from
++			 * userspace so reject it if skb is NULL. */
++			if (skb == NULL || !(skb->dev->flags & IFF_LOOPBACK)) {
+ 				err_offset = opt_iter;
+ 				goto validate_return_locked;
+ 			}
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 11ba922..ad466a7 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -2391,7 +2391,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
+ 		/* Cap the max timeout in ms TCP will retry/retrans
+ 		 * before giving up and aborting (ETIMEDOUT) a connection.
+ 		 */
+-		icsk->icsk_user_timeout = msecs_to_jiffies(val);
++		if (val < 0)
++			err = -EINVAL;
++		else
++			icsk->icsk_user_timeout = msecs_to_jiffies(val);
+ 		break;
+ 	default:
+ 		err = -ENOPROTOOPT;
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 32e6ca2..a08a621 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5415,7 +5415,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
+ 			if (tp->copied_seq == tp->rcv_nxt &&
+ 			    len - tcp_header_len <= tp->ucopy.len) {
+ #ifdef CONFIG_NET_DMA
+-				if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
++				if (tp->ucopy.task == current &&
++				    sock_owned_by_user(sk) &&
++				    tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
+ 					copied_early = 1;
+ 					eaten = 1;
+ 				}
+diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
+index a7078fd..f85de8e 100644
+--- a/net/mac80211/mesh.c
++++ b/net/mac80211/mesh.c
+@@ -543,6 +543,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
+ 
+ 	del_timer_sync(&sdata->u.mesh.housekeeping_timer);
+ 	del_timer_sync(&sdata->u.mesh.mesh_path_root_timer);
++	del_timer_sync(&sdata->u.mesh.mesh_path_timer);
+ 	/*
+ 	 * If the timer fired while we waited for it, it will have
+ 	 * requeued the work. Now the work will be running again
+diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
+index 17859ea..351a69b 100644
+--- a/net/sched/sch_sfb.c
++++ b/net/sched/sch_sfb.c
+@@ -559,6 +559,8 @@ static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb)
+ 
+ 	sch->qstats.backlog = q->qdisc->qstats.backlog;
+ 	opts = nla_nest_start(skb, TCA_OPTIONS);
++	if (opts == NULL)
++		goto nla_put_failure;
+ 	NLA_PUT(skb, TCA_SFB_PARMS, sizeof(opt), &opt);
+ 	return nla_nest_end(skb, opts);
+ 
+diff --git a/net/sctp/input.c b/net/sctp/input.c
+index b7692aa..0fc18c7 100644
+--- a/net/sctp/input.c
++++ b/net/sctp/input.c
+@@ -736,15 +736,12 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)
+ 
+ 	epb = &ep->base;
+ 
+-	if (hlist_unhashed(&epb->node))
+-		return;
+-
+ 	epb->hashent = sctp_ep_hashfn(epb->bind_addr.port);
+ 
+ 	head = &sctp_ep_hashtable[epb->hashent];
+ 
+ 	sctp_write_lock(&head->lock);
+-	__hlist_del(&epb->node);
++	hlist_del_init(&epb->node);
+ 	sctp_write_unlock(&head->lock);
+ }
+ 
+@@ -825,7 +822,7 @@ static void __sctp_unhash_established(struct sctp_association *asoc)
+ 	head = &sctp_assoc_hashtable[epb->hashent];
+ 
+ 	sctp_write_lock(&head->lock);
+-	__hlist_del(&epb->node);
++	hlist_del_init(&epb->node);
+ 	sctp_write_unlock(&head->lock);
+ }
+ 
+diff --git a/net/sctp/socket.c b/net/sctp/socket.c
+index 0075554..8e49d76 100644
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -1231,8 +1231,14 @@ out_free:
+ 	SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p"
+ 			  " kaddrs: %p err: %d\n",
+ 			  asoc, kaddrs, err);
+-	if (asoc)
++	if (asoc) {
++		/* sctp_primitive_ASSOCIATE may have added this association
++		 * To the hash table, try to unhash it, just in case, its a noop
++		 * if it wasn't hashed so we're safe
++		 */
++		sctp_unhash_established(asoc);
+ 		sctp_association_free(asoc);
++	}
+ 	return err;
+ }
+ 
+@@ -1942,8 +1948,10 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
+ 	goto out_unlock;
+ 
+ out_free:
+-	if (new_asoc)
++	if (new_asoc) {
++		sctp_unhash_established(asoc);
+ 		sctp_association_free(asoc);
++	}
+ out_unlock:
+ 	sctp_release_sock(sk);
+ 
+diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
+index 788a12c..2ab7850 100644
+--- a/net/wanrouter/wanmain.c
++++ b/net/wanrouter/wanmain.c
+@@ -602,36 +602,31 @@ static int wanrouter_device_new_if(struct wan_device *wandev,
+ 		 * successfully, add it to the interface list.
+ 		 */
+ 
+-		if (dev->name == NULL) {
+-			err = -EINVAL;
+-		} else {
++#ifdef WANDEBUG
++		printk(KERN_INFO "%s: registering interface %s...\n",
++		       wanrouter_modname, dev->name);
++#endif
+ 
+-			#ifdef WANDEBUG
+-			printk(KERN_INFO "%s: registering interface %s...\n",
+-				wanrouter_modname, dev->name);
+-			#endif
+-
+-			err = register_netdev(dev);
+-			if (!err) {
+-				struct net_device *slave = NULL;
+-				unsigned long smp_flags=0;
+-
+-				lock_adapter_irq(&wandev->lock, &smp_flags);
+-
+-				if (wandev->dev == NULL) {
+-					wandev->dev = dev;
+-				} else {
+-					for (slave=wandev->dev;
+-					     DEV_TO_SLAVE(slave);
+-					     slave = DEV_TO_SLAVE(slave))
+-						DEV_TO_SLAVE(slave) = dev;
+-				}
+-				++wandev->ndev;
+-
+-				unlock_adapter_irq(&wandev->lock, &smp_flags);
+-				err = 0;	/* done !!! */
+-				goto out;
++		err = register_netdev(dev);
++		if (!err) {
++			struct net_device *slave = NULL;
++			unsigned long smp_flags=0;
++
++			lock_adapter_irq(&wandev->lock, &smp_flags);
++
++			if (wandev->dev == NULL) {
++				wandev->dev = dev;
++			} else {
++				for (slave=wandev->dev;
++				     DEV_TO_SLAVE(slave);
++				     slave = DEV_TO_SLAVE(slave))
++					DEV_TO_SLAVE(slave) = dev;
+ 			}
++			++wandev->ndev;
++
++			unlock_adapter_irq(&wandev->lock, &smp_flags);
++			err = 0;	/* done !!! */
++			goto out;
+ 		}
+ 		if (wandev->del_if)
+ 			wandev->del_if(wandev, dev);
+diff --git a/net/wireless/core.c b/net/wireless/core.c
+index 220f3bd..8f5042d 100644
+--- a/net/wireless/core.c
++++ b/net/wireless/core.c
+@@ -971,6 +971,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
+ 		 */
+ 		synchronize_rcu();
+ 		INIT_LIST_HEAD(&wdev->list);
++		/*
++		 * Ensure that all events have been processed and
++		 * freed.
++		 */
++		cfg80211_process_wdev_events(wdev);
+ 		break;
+ 	case NETDEV_PRE_UP:
+ 		if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)))
+diff --git a/net/wireless/core.h b/net/wireless/core.h
+index b9ec306..02c3be3 100644
+--- a/net/wireless/core.h
++++ b/net/wireless/core.h
+@@ -426,6 +426,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
+ 			  struct net_device *dev, enum nl80211_iftype ntype,
+ 			  u32 *flags, struct vif_params *params);
+ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
++void cfg80211_process_wdev_events(struct wireless_dev *wdev);
+ 
+ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
+ 				  struct wireless_dev *wdev,
+diff --git a/net/wireless/util.c b/net/wireless/util.c
+index b5e4c1c..22fb802 100644
+--- a/net/wireless/util.c
++++ b/net/wireless/util.c
+@@ -725,7 +725,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
+ 	wdev->connect_keys = NULL;
+ }
+ 
+-static void cfg80211_process_wdev_events(struct wireless_dev *wdev)
++void cfg80211_process_wdev_events(struct wireless_dev *wdev)
+ {
+ 	struct cfg80211_event *ev;
+ 	unsigned long flags;
+diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
+index 51a1afc..402f330 100644
+--- a/sound/pci/hda/patch_conexant.c
++++ b/sound/pci/hda/patch_conexant.c
+@@ -3059,7 +3059,6 @@ static const struct snd_pci_quirk cxt5066_cfg_tbl[] = {
+ 	SND_PCI_QUIRK(0x1028, 0x02d8, "Dell Vostro", CXT5066_DELL_VOSTRO),
+ 	SND_PCI_QUIRK(0x1028, 0x02f5, "Dell Vostro 320", CXT5066_IDEAPAD),
+ 	SND_PCI_QUIRK(0x1028, 0x0401, "Dell Vostro 1014", CXT5066_DELL_VOSTRO),
+-	SND_PCI_QUIRK(0x1028, 0x0402, "Dell Vostro", CXT5066_DELL_VOSTRO),
+ 	SND_PCI_QUIRK(0x1028, 0x0408, "Dell Inspiron One 19T", CXT5066_IDEAPAD),
+ 	SND_PCI_QUIRK(0x1028, 0x050f, "Dell Inspiron", CXT5066_IDEAPAD),
+ 	SND_PCI_QUIRK(0x1028, 0x0510, "Dell Vostro", CXT5066_IDEAPAD),
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 2e2eb93..32c8169 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -4981,6 +4981,8 @@ static const struct alc_fixup alc269_fixups[] = {
+ 	[ALC269_FIXUP_PCM_44K] = {
+ 		.type = ALC_FIXUP_FUNC,
+ 		.v.func = alc269_fixup_pcm_44k,
++		.chained = true,
++		.chain_id = ALC269_FIXUP_QUANTA_MUTE
+ 	},
+ 	[ALC269_FIXUP_STEREO_DMIC] = {
+ 		.type = ALC_FIXUP_FUNC,
+@@ -5077,9 +5079,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x17aa, 0x21ca, "Thinkpad L412", ALC269_FIXUP_SKU_IGNORE),
+ 	SND_PCI_QUIRK(0x17aa, 0x21e9, "Thinkpad Edge 15", ALC269_FIXUP_SKU_IGNORE),
+ 	SND_PCI_QUIRK(0x17aa, 0x21f6, "Thinkpad T530", ALC269_FIXUP_LENOVO_DOCK),
++	SND_PCI_QUIRK(0x17aa, 0x21fa, "Thinkpad X230", ALC269_FIXUP_LENOVO_DOCK),
++	SND_PCI_QUIRK(0x17aa, 0x21fb, "Thinkpad T430s", ALC269_FIXUP_LENOVO_DOCK),
+ 	SND_PCI_QUIRK(0x17aa, 0x2203, "Thinkpad X230 Tablet", ALC269_FIXUP_LENOVO_DOCK),
+-	SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_QUANTA_MUTE),
+-	SND_PCI_QUIRK(0x17aa, 0x3bf8, "Lenovo Ideapd", ALC269_FIXUP_PCM_44K),
++	SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
+ 	SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
+ 
+ #if 1
diff --git a/3.2.34/bump/1028_linux-3.2.29.patch b/3.2.34/bump/1028_linux-3.2.29.patch
new file mode 100644
index 0000000..3c65179
--- /dev/null
+++ b/3.2.34/bump/1028_linux-3.2.29.patch
@@ -0,0 +1,4279 @@
+diff --git a/MAINTAINERS b/MAINTAINERS
+index f986e7d..82d7fa6 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -5452,7 +5452,7 @@ F:	Documentation/blockdev/ramdisk.txt
+ F:	drivers/block/brd.c
+ 
+ RANDOM NUMBER DRIVER
+-M:	Matt Mackall <mpm@selenic.com>
++M:	Theodore Ts'o" <tytso@mit.edu>
+ S:	Maintained
+ F:	drivers/char/random.c
+ 
+diff --git a/Makefile b/Makefile
+index 5368961..d96fc2a 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 3
+ PATCHLEVEL = 2
+-SUBLEVEL = 28
++SUBLEVEL = 29
+ EXTRAVERSION =
+ NAME = Saber-toothed Squirrel
+ 
+diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
+index 640f909..6f1aca7 100644
+--- a/arch/alpha/include/asm/atomic.h
++++ b/arch/alpha/include/asm/atomic.h
+@@ -14,8 +14,8 @@
+  */
+ 
+ 
+-#define ATOMIC_INIT(i)		( (atomic_t) { (i) } )
+-#define ATOMIC64_INIT(i)	( (atomic64_t) { (i) } )
++#define ATOMIC_INIT(i)		{ (i) }
++#define ATOMIC64_INIT(i)	{ (i) }
+ 
+ #define atomic_read(v)		(*(volatile int *)&(v)->counter)
+ #define atomic64_read(v)	(*(volatile long *)&(v)->counter)
+diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
+index 06edfef..3eeb47c 100644
+--- a/arch/alpha/include/asm/socket.h
++++ b/arch/alpha/include/asm/socket.h
+@@ -69,9 +69,11 @@
+ 
+ #define SO_RXQ_OVFL             40
+ 
++#ifdef __KERNEL__
+ /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
+  * have to define SOCK_NONBLOCK to a different value here.
+  */
+ #define SOCK_NONBLOCK	0x40000000
++#endif /* __KERNEL__ */
+ 
+ #endif /* _ASM_SOCKET_H */
+diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
+index 9451dce..8512475 100644
+--- a/arch/arm/include/asm/pgtable.h
++++ b/arch/arm/include/asm/pgtable.h
+@@ -288,13 +288,13 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+  *
+  *   3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
+  *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+- *   <--------------- offset --------------------> <- type --> 0 0 0
++ *   <--------------- offset ----------------------> < type -> 0 0 0
+  *
+- * This gives us up to 63 swap files and 32GB per swap file.  Note that
++ * This gives us up to 31 swap files and 64GB per swap file.  Note that
+  * the offset field is always non-zero.
+  */
+ #define __SWP_TYPE_SHIFT	3
+-#define __SWP_TYPE_BITS		6
++#define __SWP_TYPE_BITS		5
+ #define __SWP_TYPE_MASK		((1 << __SWP_TYPE_BITS) - 1)
+ #define __SWP_OFFSET_SHIFT	(__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
+ 
+diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
+index c202113..ea94765 100644
+--- a/arch/arm/mm/tlb-v7.S
++++ b/arch/arm/mm/tlb-v7.S
+@@ -38,10 +38,10 @@ ENTRY(v7wbi_flush_user_tlb_range)
+ 	dsb
+ 	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
+ 	mov	r1, r1, lsr #PAGE_SHIFT
+-#ifdef CONFIG_ARM_ERRATA_720789
+-	mov	r3, #0
+-#else
+ 	asid	r3, r3				@ mask ASID
++#ifdef CONFIG_ARM_ERRATA_720789
++	ALT_SMP(W(mov)	r3, #0	)
++	ALT_UP(W(nop)		)
+ #endif
+ 	orr	r0, r3, r0, lsl #PAGE_SHIFT	@ Create initial MVA
+ 	mov	r1, r1, lsl #PAGE_SHIFT
+diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
+index ad83dad..f0702f3 100644
+--- a/arch/arm/vfp/vfpmodule.c
++++ b/arch/arm/vfp/vfpmodule.c
+@@ -628,8 +628,10 @@ static int __init vfp_init(void)
+ 			if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100)
+ 				elf_hwcap |= HWCAP_NEON;
+ #endif
++#ifdef CONFIG_VFPv3
+ 			if ((fmrx(MVFR1) & 0xf0000000) == 0x10000000)
+ 				elf_hwcap |= HWCAP_VFPv4;
++#endif
+ 		}
+ 	}
+ 	return 0;
+diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
+index f581a18..df7d12c 100644
+--- a/arch/x86/mm/hugetlbpage.c
++++ b/arch/x86/mm/hugetlbpage.c
+@@ -56,9 +56,16 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
+ }
+ 
+ /*
+- * search for a shareable pmd page for hugetlb.
++ * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
++ * and returns the corresponding pte. While this is not necessary for the
++ * !shared pmd case because we can allocate the pmd later as well, it makes the
++ * code much cleaner. pmd allocation is essential for the shared case because
++ * pud has to be populated inside the same i_mmap_mutex section - otherwise
++ * racing tasks could either miss the sharing (see huge_pte_offset) or select a
++ * bad pmd for sharing.
+  */
+-static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
++static pte_t *
++huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
+ {
+ 	struct vm_area_struct *vma = find_vma(mm, addr);
+ 	struct address_space *mapping = vma->vm_file->f_mapping;
+@@ -68,9 +75,10 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
+ 	struct vm_area_struct *svma;
+ 	unsigned long saddr;
+ 	pte_t *spte = NULL;
++	pte_t *pte;
+ 
+ 	if (!vma_shareable(vma, addr))
+-		return;
++		return (pte_t *)pmd_alloc(mm, pud, addr);
+ 
+ 	mutex_lock(&mapping->i_mmap_mutex);
+ 	vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
+@@ -97,7 +105,9 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
+ 		put_page(virt_to_page(spte));
+ 	spin_unlock(&mm->page_table_lock);
+ out:
++	pte = (pte_t *)pmd_alloc(mm, pud, addr);
+ 	mutex_unlock(&mapping->i_mmap_mutex);
++	return pte;
+ }
+ 
+ /*
+@@ -142,8 +152,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
+ 		} else {
+ 			BUG_ON(sz != PMD_SIZE);
+ 			if (pud_none(*pud))
+-				huge_pmd_share(mm, addr, pud);
+-			pte = (pte_t *) pmd_alloc(mm, pud, addr);
++				pte = huge_pmd_share(mm, addr, pud);
++			else
++				pte = (pte_t *)pmd_alloc(mm, pud, addr);
+ 		}
+ 	}
+ 	BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
+diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c
+index e7d13f5..d05f2fe 100644
+--- a/drivers/acpi/acpica/tbxface.c
++++ b/drivers/acpi/acpica/tbxface.c
+@@ -436,6 +436,7 @@ acpi_get_table_with_size(char *signature,
+ 
+ 	return (AE_NOT_FOUND);
+ }
++ACPI_EXPORT_SYMBOL(acpi_get_table_with_size)
+ 
+ acpi_status
+ acpi_get_table(char *signature,
+diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
+index 8c78443..3790c80 100644
+--- a/drivers/base/power/runtime.c
++++ b/drivers/base/power/runtime.c
+@@ -385,7 +385,6 @@ static int rpm_suspend(struct device *dev, int rpmflags)
+ 		goto repeat;
+ 	}
+ 
+-	dev->power.deferred_resume = false;
+ 	if (dev->power.no_callbacks)
+ 		goto no_callback;	/* Assume success. */
+ 
+@@ -446,6 +445,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
+ 	wake_up_all(&dev->power.wait_queue);
+ 
+ 	if (dev->power.deferred_resume) {
++		dev->power.deferred_resume = false;
+ 		rpm_resume(dev, 0);
+ 		retval = -EAGAIN;
+ 		goto out;
+@@ -568,6 +568,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
+ 		    || dev->parent->power.runtime_status == RPM_ACTIVE) {
+ 			atomic_inc(&dev->parent->power.child_count);
+ 			spin_unlock(&dev->parent->power.lock);
++			retval = 1;
+ 			goto no_callback;	/* Assume success. */
+ 		}
+ 		spin_unlock(&dev->parent->power.lock);
+@@ -645,7 +646,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
+ 	}
+ 	wake_up_all(&dev->power.wait_queue);
+ 
+-	if (!retval)
++	if (retval >= 0)
+ 		rpm_idle(dev, RPM_ASYNC);
+ 
+  out:
+diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
+index acda773..38aa6dd 100644
+--- a/drivers/block/cciss_scsi.c
++++ b/drivers/block/cciss_scsi.c
+@@ -763,16 +763,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
+ 		{
+ 			case CMD_TARGET_STATUS:
+ 				/* Pass it up to the upper layers... */
+-				if( ei->ScsiStatus)
+-                		{
+-#if 0
+-                    			printk(KERN_WARNING "cciss: cmd %p "
+-						"has SCSI Status = %x\n",
+-						c, ei->ScsiStatus);
+-#endif
+-					cmd->result |= (ei->ScsiStatus << 1);
+-                		}
+-				else {  /* scsi status is zero??? How??? */
++				if (!ei->ScsiStatus) {
+ 					
+ 	/* Ordinarily, this case should never happen, but there is a bug
+ 	   in some released firmware revisions that allows it to happen
+diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
+index 650a308..de9c800 100644
+--- a/drivers/block/virtio_blk.c
++++ b/drivers/block/virtio_blk.c
+@@ -4,6 +4,7 @@
+ #include <linux/blkdev.h>
+ #include <linux/hdreg.h>
+ #include <linux/module.h>
++#include <linux/mutex.h>
+ #include <linux/virtio.h>
+ #include <linux/virtio_blk.h>
+ #include <linux/scatterlist.h>
+@@ -26,14 +27,17 @@ struct virtio_blk
+ 	/* The disk structure for the kernel. */
+ 	struct gendisk *disk;
+ 
+-	/* Request tracking. */
+-	struct list_head reqs;
+-
+ 	mempool_t *pool;
+ 
+ 	/* Process context for config space updates */
+ 	struct work_struct config_work;
+ 
++	/* Lock for config space updates */
++	struct mutex config_lock;
++
++	/* enable config space updates */
++	bool config_enable;
++
+ 	/* What host tells us, plus 2 for header & tailer. */
+ 	unsigned int sg_elems;
+ 
+@@ -46,7 +50,6 @@ struct virtio_blk
+ 
+ struct virtblk_req
+ {
+-	struct list_head list;
+ 	struct request *req;
+ 	struct virtio_blk_outhdr out_hdr;
+ 	struct virtio_scsi_inhdr in_hdr;
+@@ -90,7 +93,6 @@ static void blk_done(struct virtqueue *vq)
+ 		}
+ 
+ 		__blk_end_request_all(vbr->req, error);
+-		list_del(&vbr->list);
+ 		mempool_free(vbr, vblk->pool);
+ 	}
+ 	/* In case queue is stopped waiting for more buffers. */
+@@ -175,7 +177,6 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
+ 		return false;
+ 	}
+ 
+-	list_add_tail(&vbr->list, &vblk->reqs);
+ 	return true;
+ }
+ 
+@@ -316,6 +317,10 @@ static void virtblk_config_changed_work(struct work_struct *work)
+ 	char cap_str_2[10], cap_str_10[10];
+ 	u64 capacity, size;
+ 
++	mutex_lock(&vblk->config_lock);
++	if (!vblk->config_enable)
++		goto done;
++
+ 	/* Host must always specify the capacity. */
+ 	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
+ 			  &capacity, sizeof(capacity));
+@@ -338,6 +343,8 @@ static void virtblk_config_changed_work(struct work_struct *work)
+ 		  cap_str_10, cap_str_2);
+ 
+ 	set_capacity(vblk->disk, capacity);
++done:
++	mutex_unlock(&vblk->config_lock);
+ }
+ 
+ static void virtblk_config_changed(struct virtio_device *vdev)
+@@ -381,11 +388,12 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
+ 		goto out_free_index;
+ 	}
+ 
+-	INIT_LIST_HEAD(&vblk->reqs);
+ 	vblk->vdev = vdev;
+ 	vblk->sg_elems = sg_elems;
+ 	sg_init_table(vblk->sg, vblk->sg_elems);
++	mutex_init(&vblk->config_lock);
+ 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
++	vblk->config_enable = true;
+ 
+ 	/* We expect one virtqueue, for output. */
+ 	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
+@@ -539,16 +547,19 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
+ 	struct virtio_blk *vblk = vdev->priv;
+ 	int index = vblk->index;
+ 
+-	flush_work(&vblk->config_work);
++	/* Prevent config work handler from accessing the device. */
++	mutex_lock(&vblk->config_lock);
++	vblk->config_enable = false;
++	mutex_unlock(&vblk->config_lock);
+ 
+-	/* Nothing should be pending. */
+-	BUG_ON(!list_empty(&vblk->reqs));
++	del_gendisk(vblk->disk);
++	blk_cleanup_queue(vblk->disk->queue);
+ 
+ 	/* Stop all the virtqueues. */
+ 	vdev->config->reset(vdev);
+ 
+-	del_gendisk(vblk->disk);
+-	blk_cleanup_queue(vblk->disk->queue);
++	flush_work(&vblk->config_work);
++
+ 	put_disk(vblk->disk);
+ 	mempool_destroy(vblk->pool);
+ 	vdev->config->del_vqs(vdev);
+diff --git a/drivers/char/random.c b/drivers/char/random.c
+index 631d4f6..8ae9235 100644
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -1114,6 +1114,16 @@ static void init_std_data(struct entropy_store *r)
+ 	mix_pool_bytes(r, utsname(), sizeof(*(utsname())), NULL);
+ }
+ 
++/*
++ * Note that setup_arch() may call add_device_randomness()
++ * long before we get here. This allows seeding of the pools
++ * with some platform dependent data very early in the boot
++ * process. But it limits our options here. We must use
++ * statically allocated structures that already have all
++ * initializations complete at compile time. We should also
++ * take care not to overwrite the precious per platform data
++ * we were given.
++ */
+ static int rand_initialize(void)
+ {
+ 	init_std_data(&input_pool);
+@@ -1391,10 +1401,15 @@ static int proc_do_uuid(ctl_table *table, int write,
+ 	uuid = table->data;
+ 	if (!uuid) {
+ 		uuid = tmp_uuid;
+-		uuid[8] = 0;
+-	}
+-	if (uuid[8] == 0)
+ 		generate_random_uuid(uuid);
++	} else {
++		static DEFINE_SPINLOCK(bootid_spinlock);
++
++		spin_lock(&bootid_spinlock);
++		if (!uuid[8])
++			generate_random_uuid(uuid);
++		spin_unlock(&bootid_spinlock);
++	}
+ 
+ 	sprintf(buf, "%pU", uuid);
+ 
+diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
+index 153980b..b298158 100644
+--- a/drivers/firmware/dmi_scan.c
++++ b/drivers/firmware/dmi_scan.c
+@@ -6,6 +6,7 @@
+ #include <linux/dmi.h>
+ #include <linux/efi.h>
+ #include <linux/bootmem.h>
++#include <linux/random.h>
+ #include <asm/dmi.h>
+ 
+ /*
+@@ -111,6 +112,8 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *,
+ 
+ 	dmi_table(buf, dmi_len, dmi_num, decode, NULL);
+ 
++	add_device_randomness(buf, dmi_len);
++
+ 	dmi_iounmap(buf, dmi_len);
+ 	return 0;
+ }
+diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
+index cc75c4b..3eed270 100644
+--- a/drivers/gpu/drm/i915/intel_display.c
++++ b/drivers/gpu/drm/i915/intel_display.c
+@@ -4748,17 +4748,6 @@ static bool intel_choose_pipe_bpp_dither(struct drm_crtc *crtc,
+ 			continue;
+ 		}
+ 
+-		if (intel_encoder->type == INTEL_OUTPUT_EDP) {
+-			/* Use VBT settings if we have an eDP panel */
+-			unsigned int edp_bpc = dev_priv->edp.bpp / 3;
+-
+-			if (edp_bpc < display_bpc) {
+-				DRM_DEBUG_KMS("clamping display bpc (was %d) to eDP (%d)\n", display_bpc, edp_bpc);
+-				display_bpc = edp_bpc;
+-			}
+-			continue;
+-		}
+-
+ 		/* Not one of the known troublemakers, check the EDID */
+ 		list_for_each_entry(connector, &dev->mode_config.connector_list,
+ 				    head) {
+diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
+index fae2050..c8ecaab 100644
+--- a/drivers/gpu/drm/i915/intel_dp.c
++++ b/drivers/gpu/drm/i915/intel_dp.c
+@@ -1152,10 +1152,14 @@ static void ironlake_edp_panel_off(struct intel_dp *intel_dp)
+ 	WARN(!intel_dp->want_panel_vdd, "Need VDD to turn off panel\n");
+ 
+ 	pp = ironlake_get_pp_control(dev_priv);
+-	pp &= ~(POWER_TARGET_ON | PANEL_POWER_RESET | EDP_BLC_ENABLE);
++	/* We need to switch off panel power _and_ force vdd, for otherwise some
++	 * panels get very unhappy and cease to work. */
++	pp &= ~(POWER_TARGET_ON | EDP_FORCE_VDD | PANEL_POWER_RESET | EDP_BLC_ENABLE);
+ 	I915_WRITE(PCH_PP_CONTROL, pp);
+ 	POSTING_READ(PCH_PP_CONTROL);
+ 
++	intel_dp->want_panel_vdd = false;
++
+ 	ironlake_wait_panel_off(intel_dp);
+ }
+ 
+@@ -1265,11 +1269,9 @@ static void intel_dp_prepare(struct drm_encoder *encoder)
+ 	 * ensure that we have vdd while we switch off the panel. */
+ 	ironlake_edp_panel_vdd_on(intel_dp);
+ 	ironlake_edp_backlight_off(intel_dp);
+-	ironlake_edp_panel_off(intel_dp);
+-
+ 	intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
++	ironlake_edp_panel_off(intel_dp);
+ 	intel_dp_link_down(intel_dp);
+-	ironlake_edp_panel_vdd_off(intel_dp, false);
+ }
+ 
+ static void intel_dp_commit(struct drm_encoder *encoder)
+@@ -1304,11 +1306,9 @@ intel_dp_dpms(struct drm_encoder *encoder, int mode)
+ 		/* Switching the panel off requires vdd. */
+ 		ironlake_edp_panel_vdd_on(intel_dp);
+ 		ironlake_edp_backlight_off(intel_dp);
+-		ironlake_edp_panel_off(intel_dp);
+-
+ 		intel_dp_sink_dpms(intel_dp, mode);
++		ironlake_edp_panel_off(intel_dp);
+ 		intel_dp_link_down(intel_dp);
+-		ironlake_edp_panel_vdd_off(intel_dp, false);
+ 
+ 		if (is_cpu_edp(intel_dp))
+ 			ironlake_edp_pll_off(encoder);
+diff --git a/drivers/gpu/drm/nouveau/nvd0_display.c b/drivers/gpu/drm/nouveau/nvd0_display.c
+index cb006a7..3002d82 100644
+--- a/drivers/gpu/drm/nouveau/nvd0_display.c
++++ b/drivers/gpu/drm/nouveau/nvd0_display.c
+@@ -472,7 +472,7 @@ static int
+ nvd0_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
+ {
+ 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+-	const u32 data = (y << 16) | x;
++	const u32 data = (y << 16) | (x & 0xffff);
+ 
+ 	nv_wr32(crtc->dev, 0x64d084 + (nv_crtc->index * 0x1000), data);
+ 	nv_wr32(crtc->dev, 0x64d080 + (nv_crtc->index * 0x1000), 0x00000000);
+diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h
+index 1b50ad8..4760466 100644
+--- a/drivers/gpu/drm/radeon/atombios.h
++++ b/drivers/gpu/drm/radeon/atombios.h
+@@ -101,6 +101,7 @@
+ #define ATOM_LCD_SELFTEST_START									(ATOM_DISABLE+5)
+ #define ATOM_LCD_SELFTEST_STOP									(ATOM_ENABLE+5)
+ #define ATOM_ENCODER_INIT			                  (ATOM_DISABLE+7)
++#define ATOM_INIT			                          (ATOM_DISABLE+7)
+ #define ATOM_GET_STATUS                         (ATOM_DISABLE+8)
+ 
+ #define ATOM_BLANKING         1
+@@ -251,25 +252,25 @@ typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES{
+   USHORT SetEngineClock;                         //Function Table,directly used by various SW components,latest version 1.1
+   USHORT SetMemoryClock;                         //Function Table,directly used by various SW components,latest version 1.1
+   USHORT SetPixelClock;                          //Function Table,directly used by various SW components,latest version 1.2  
+-  USHORT DynamicClockGating;                     //Atomic Table,  indirectly used by various SW components,called from ASIC_Init
++  USHORT EnableDispPowerGating;                  //Atomic Table,  indirectly used by various SW components,called from ASIC_Init
+   USHORT ResetMemoryDLL;                         //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock
+   USHORT ResetMemoryDevice;                      //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock
+-  USHORT MemoryPLLInit;
+-  USHORT AdjustDisplayPll;												//only used by Bios
++  USHORT MemoryPLLInit;                          //Atomic Table,  used only by Bios
++  USHORT AdjustDisplayPll;											 //Atomic Table,  used by various SW componentes. 
+   USHORT AdjustMemoryController;                 //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock                
+   USHORT EnableASIC_StaticPwrMgt;                //Atomic Table,  only used by Bios
+   USHORT ASIC_StaticPwrMgtStatusChange;          //Obsolete ,     only used by Bios   
+   USHORT DAC_LoadDetection;                      //Atomic Table,  directly used by various SW components,latest version 1.2  
+   USHORT LVTMAEncoderControl;                    //Atomic Table,directly used by various SW components,latest version 1.3
+-  USHORT LCD1OutputControl;                      //Atomic Table,  directly used by various SW components,latest version 1.1 
++  USHORT HW_Misc_Operation;                      //Atomic Table,  directly used by various SW components,latest version 1.1 
+   USHORT DAC1EncoderControl;                     //Atomic Table,  directly used by various SW components,latest version 1.1  
+   USHORT DAC2EncoderControl;                     //Atomic Table,  directly used by various SW components,latest version 1.1 
+   USHORT DVOOutputControl;                       //Atomic Table,  directly used by various SW components,latest version 1.1 
+   USHORT CV1OutputControl;                       //Atomic Table,  Atomic Table,  Obsolete from Ry6xx, use DAC2 Output instead 
+-  USHORT GetConditionalGoldenSetting;            //only used by Bios
++  USHORT GetConditionalGoldenSetting;            //Only used by Bios
+   USHORT TVEncoderControl;                       //Function Table,directly used by various SW components,latest version 1.1
+-  USHORT TMDSAEncoderControl;                    //Atomic Table,  directly used by various SW components,latest version 1.3
+-  USHORT LVDSEncoderControl;                     //Atomic Table,  directly used by various SW components,latest version 1.3
++  USHORT PatchMCSetting;                         //only used by BIOS
++  USHORT MC_SEQ_Control;                         //only used by BIOS
+   USHORT TV1OutputControl;                       //Atomic Table,  Obsolete from Ry6xx, use DAC2 Output instead
+   USHORT EnableScaler;                           //Atomic Table,  used only by Bios
+   USHORT BlankCRTC;                              //Atomic Table,  directly used by various SW components,latest version 1.1 
+@@ -282,7 +283,7 @@ typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES{
+   USHORT SetCRTC_Replication;                    //Atomic Table,  used only by Bios
+   USHORT SelectCRTC_Source;                      //Atomic Table,  directly used by various SW components,latest version 1.1 
+   USHORT EnableGraphSurfaces;                    //Atomic Table,  used only by Bios
+-  USHORT UpdateCRTC_DoubleBufferRegisters;
++  USHORT UpdateCRTC_DoubleBufferRegisters;			 //Atomic Table,  used only by Bios
+   USHORT LUT_AutoFill;                           //Atomic Table,  only used by Bios
+   USHORT EnableHW_IconCursor;                    //Atomic Table,  only used by Bios
+   USHORT GetMemoryClock;                         //Atomic Table,  directly used by various SW components,latest version 1.1 
+@@ -308,27 +309,36 @@ typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES{
+   USHORT SetVoltage;                             //Function Table,directly and/or indirectly used by various SW components,latest version 1.1
+   USHORT DAC1OutputControl;                      //Atomic Table,  directly used by various SW components,latest version 1.1
+   USHORT DAC2OutputControl;                      //Atomic Table,  directly used by various SW components,latest version 1.1
+-  USHORT SetupHWAssistedI2CStatus;               //Function Table,only used by Bios, obsolete soon.Switch to use "ReadEDIDFromHWAssistedI2C"
++  USHORT ComputeMemoryClockParam;                //Function Table,only used by Bios, obsolete soon.Switch to use "ReadEDIDFromHWAssistedI2C"
+   USHORT ClockSource;                            //Atomic Table,  indirectly used by various SW components,called from ASIC_Init
+   USHORT MemoryDeviceInit;                       //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock
+-  USHORT EnableYUV;                              //Atomic Table,  indirectly used by various SW components,called from EnableVGARender
++  USHORT GetDispObjectInfo;                      //Atomic Table,  indirectly used by various SW components,called from EnableVGARender
+   USHORT DIG1EncoderControl;                     //Atomic Table,directly used by various SW components,latest version 1.1
+   USHORT DIG2EncoderControl;                     //Atomic Table,directly used by various SW components,latest version 1.1
+   USHORT DIG1TransmitterControl;                 //Atomic Table,directly used by various SW components,latest version 1.1
+   USHORT DIG2TransmitterControl;	               //Atomic Table,directly used by various SW components,latest version 1.1 
+   USHORT ProcessAuxChannelTransaction;					 //Function Table,only used by Bios
+   USHORT DPEncoderService;											 //Function Table,only used by Bios
++  USHORT GetVoltageInfo;                         //Function Table,only used by Bios since SI
+ }ATOM_MASTER_LIST_OF_COMMAND_TABLES;   
+ 
+ // For backward compatible 
+ #define ReadEDIDFromHWAssistedI2C                ProcessI2cChannelTransaction
+-#define UNIPHYTransmitterControl						     DIG1TransmitterControl
+-#define LVTMATransmitterControl							     DIG2TransmitterControl
++#define DPTranslatorControl                      DIG2EncoderControl
++#define UNIPHYTransmitterControl			     DIG1TransmitterControl
++#define LVTMATransmitterControl				     DIG2TransmitterControl
+ #define SetCRTC_DPM_State                        GetConditionalGoldenSetting
+ #define SetUniphyInstance                        ASIC_StaticPwrMgtStatusChange
+ #define HPDInterruptService                      ReadHWAssistedI2CStatus
+ #define EnableVGA_Access                         GetSCLKOverMCLKRatio
+-#define GetDispObjectInfo                        EnableYUV 
++#define EnableYUV                                GetDispObjectInfo                         
++#define DynamicClockGating                       EnableDispPowerGating
++#define SetupHWAssistedI2CStatus                 ComputeMemoryClockParam
++
++#define TMDSAEncoderControl                      PatchMCSetting
++#define LVDSEncoderControl                       MC_SEQ_Control
++#define LCD1OutputControl                        HW_Misc_Operation
++
+ 
+ typedef struct _ATOM_MASTER_COMMAND_TABLE
+ {
+@@ -495,6 +505,34 @@ typedef struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V5
+ // ucInputFlag
+ #define ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN  1   // 1-StrobeMode, 0-PerformanceMode
+ 
++// use for ComputeMemoryClockParamTable
++typedef struct _COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_1
++{
++  union
++  {
++    ULONG  ulClock;         
++    ATOM_S_MPLL_FB_DIVIDER   ulFbDiv;         //Output:UPPER_WORD=FB_DIV_INTEGER,  LOWER_WORD=FB_DIV_FRAC shl (16-FB_FRACTION_BITS)
++  };
++  UCHAR   ucDllSpeed;                         //Output 
++  UCHAR   ucPostDiv;                          //Output
++  union{
++    UCHAR   ucInputFlag;                      //Input : ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN: 1-StrobeMode, 0-PerformanceMode
++    UCHAR   ucPllCntlFlag;                    //Output: 
++  };
++  UCHAR   ucBWCntl;                       
++}COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_1;
++
++// definition of ucInputFlag
++#define MPLL_INPUT_FLAG_STROBE_MODE_EN          0x01
++// definition of ucPllCntlFlag
++#define MPLL_CNTL_FLAG_VCO_MODE_MASK            0x03 
++#define MPLL_CNTL_FLAG_BYPASS_DQ_PLL            0x04
++#define MPLL_CNTL_FLAG_QDR_ENABLE               0x08
++#define MPLL_CNTL_FLAG_AD_HALF_RATE             0x10
++
++//MPLL_CNTL_FLAG_BYPASS_AD_PLL has a wrong name, should be BYPASS_DQ_PLL
++#define MPLL_CNTL_FLAG_BYPASS_AD_PLL            0x04
++
+ typedef struct _DYNAMICE_MEMORY_SETTINGS_PARAMETER
+ {
+   ATOM_COMPUTE_CLOCK_FREQ ulClock;
+@@ -562,6 +600,16 @@ typedef struct _DYNAMIC_CLOCK_GATING_PARAMETERS
+ #define  DYNAMIC_CLOCK_GATING_PS_ALLOCATION  DYNAMIC_CLOCK_GATING_PARAMETERS
+ 
+ /****************************************************************************/	
++// Structure used by EnableDispPowerGatingTable.ctb
++/****************************************************************************/	
++typedef struct _ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 
++{
++  UCHAR ucDispPipeId;                 // ATOM_CRTC1, ATOM_CRTC2, ...
++  UCHAR ucEnable;                     // ATOM_ENABLE or ATOM_DISABLE
++  UCHAR ucPadding[2];
++}ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1;
++
++/****************************************************************************/	
+ // Structure used by EnableASIC_StaticPwrMgtTable.ctb
+ /****************************************************************************/	
+ typedef struct _ENABLE_ASIC_STATIC_PWR_MGT_PARAMETERS
+@@ -807,6 +855,7 @@ typedef struct _ATOM_DIG_ENCODER_CONFIG_V4
+ #define ATOM_ENCODER_CONFIG_V4_DPLINKRATE_1_62GHZ		  0x00
+ #define ATOM_ENCODER_CONFIG_V4_DPLINKRATE_2_70GHZ		  0x01
+ #define ATOM_ENCODER_CONFIG_V4_DPLINKRATE_5_40GHZ		  0x02
++#define ATOM_ENCODER_CONFIG_V4_DPLINKRATE_3_24GHZ		  0x03
+ #define ATOM_ENCODER_CONFIG_V4_ENCODER_SEL					  0x70
+ #define ATOM_ENCODER_CONFIG_V4_DIG0_ENCODER					  0x00
+ #define ATOM_ENCODER_CONFIG_V4_DIG1_ENCODER					  0x10
+@@ -814,6 +863,7 @@ typedef struct _ATOM_DIG_ENCODER_CONFIG_V4
+ #define ATOM_ENCODER_CONFIG_V4_DIG3_ENCODER					  0x30
+ #define ATOM_ENCODER_CONFIG_V4_DIG4_ENCODER					  0x40
+ #define ATOM_ENCODER_CONFIG_V4_DIG5_ENCODER					  0x50
++#define ATOM_ENCODER_CONFIG_V4_DIG6_ENCODER					  0x60
+ 
+ typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V4
+ {
+@@ -1171,6 +1221,106 @@ typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS_V4
+ #define ATOM_TRANSMITTER_CONFIG_V4_TRANSMITTER3           	0x80	//EF
+ 
+ 
++typedef struct _ATOM_DIG_TRANSMITTER_CONFIG_V5
++{
++#if ATOM_BIG_ENDIAN
++  UCHAR ucReservd1:1;
++  UCHAR ucHPDSel:3;
++  UCHAR ucPhyClkSrcId:2;            
++  UCHAR ucCoherentMode:1;            
++  UCHAR ucReserved:1;
++#else
++  UCHAR ucReserved:1;
++  UCHAR ucCoherentMode:1;            
++  UCHAR ucPhyClkSrcId:2;            
++  UCHAR ucHPDSel:3;
++  UCHAR ucReservd1:1;
++#endif
++}ATOM_DIG_TRANSMITTER_CONFIG_V5;
++
++typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5
++{
++  USHORT usSymClock;		        // Encoder Clock in 10kHz,(DP mode)= linkclock/10, (TMDS/LVDS/HDMI)= pixel clock,  (HDMI deep color), =pixel clock * deep_color_ratio
++  UCHAR  ucPhyId;                   // 0=UNIPHYA, 1=UNIPHYB, 2=UNIPHYC, 3=UNIPHYD, 4= UNIPHYE 5=UNIPHYF
++  UCHAR  ucAction;				    // define as ATOM_TRANSMITER_ACTION_xxx
++  UCHAR  ucLaneNum;                 // indicate lane number 1-8
++  UCHAR  ucConnObjId;               // Connector Object Id defined in ObjectId.h
++  UCHAR  ucDigMode;                 // indicate DIG mode
++  union{
++  ATOM_DIG_TRANSMITTER_CONFIG_V5 asConfig;
++  UCHAR ucConfig;
++  };
++  UCHAR  ucDigEncoderSel;           // indicate DIG front end encoder 
++  UCHAR  ucDPLaneSet;
++  UCHAR  ucReserved;
++  UCHAR  ucReserved1;
++}DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5;
++
++//ucPhyId
++#define ATOM_PHY_ID_UNIPHYA                                 0  
++#define ATOM_PHY_ID_UNIPHYB                                 1
++#define ATOM_PHY_ID_UNIPHYC                                 2
++#define ATOM_PHY_ID_UNIPHYD                                 3
++#define ATOM_PHY_ID_UNIPHYE                                 4
++#define ATOM_PHY_ID_UNIPHYF                                 5
++#define ATOM_PHY_ID_UNIPHYG                                 6
++
++// ucDigEncoderSel
++#define ATOM_TRANMSITTER_V5__DIGA_SEL                       0x01
++#define ATOM_TRANMSITTER_V5__DIGB_SEL                       0x02
++#define ATOM_TRANMSITTER_V5__DIGC_SEL                       0x04
++#define ATOM_TRANMSITTER_V5__DIGD_SEL                       0x08
++#define ATOM_TRANMSITTER_V5__DIGE_SEL                       0x10
++#define ATOM_TRANMSITTER_V5__DIGF_SEL                       0x20
++#define ATOM_TRANMSITTER_V5__DIGG_SEL                       0x40
++
++// ucDigMode
++#define ATOM_TRANSMITTER_DIGMODE_V5_DP                      0
++#define ATOM_TRANSMITTER_DIGMODE_V5_LVDS                    1
++#define ATOM_TRANSMITTER_DIGMODE_V5_DVI                     2
++#define ATOM_TRANSMITTER_DIGMODE_V5_HDMI                    3
++#define ATOM_TRANSMITTER_DIGMODE_V5_SDVO                    4
++#define ATOM_TRANSMITTER_DIGMODE_V5_DP_MST                  5
++
++// ucDPLaneSet
++#define DP_LANE_SET__0DB_0_4V                               0x00
++#define DP_LANE_SET__0DB_0_6V                               0x01
++#define DP_LANE_SET__0DB_0_8V                               0x02
++#define DP_LANE_SET__0DB_1_2V                               0x03
++#define DP_LANE_SET__3_5DB_0_4V                             0x08  
++#define DP_LANE_SET__3_5DB_0_6V                             0x09
++#define DP_LANE_SET__3_5DB_0_8V                             0x0a
++#define DP_LANE_SET__6DB_0_4V                               0x10
++#define DP_LANE_SET__6DB_0_6V                               0x11
++#define DP_LANE_SET__9_5DB_0_4V                             0x18  
++
++// ATOM_DIG_TRANSMITTER_CONFIG_V5 asConfig;
++// Bit1
++#define ATOM_TRANSMITTER_CONFIG_V5_COHERENT				          0x02
++
++// Bit3:2
++#define ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SEL_MASK 	        0x0c
++#define ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SEL_SHIFT		    0x02
++
++#define ATOM_TRANSMITTER_CONFIG_V5_P1PLL         		        0x00
++#define ATOM_TRANSMITTER_CONFIG_V5_P2PLL		                0x04
++#define ATOM_TRANSMITTER_CONFIG_V5_P0PLL		                0x08   
++#define ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT           0x0c
++// Bit6:4
++#define ATOM_TRANSMITTER_CONFIG_V5_HPD_SEL_MASK		          0x70
++#define ATOM_TRANSMITTER_CONFIG_V5_HPD_SEL_SHIFT		      0x04
++
++#define ATOM_TRANSMITTER_CONFIG_V5_NO_HPD_SEL				        0x00
++#define ATOM_TRANSMITTER_CONFIG_V5_HPD1_SEL				          0x10
++#define ATOM_TRANSMITTER_CONFIG_V5_HPD2_SEL				          0x20
++#define ATOM_TRANSMITTER_CONFIG_V5_HPD3_SEL				          0x30
++#define ATOM_TRANSMITTER_CONFIG_V5_HPD4_SEL				          0x40
++#define ATOM_TRANSMITTER_CONFIG_V5_HPD5_SEL				          0x50
++#define ATOM_TRANSMITTER_CONFIG_V5_HPD6_SEL				          0x60
++
++#define DIG_TRANSMITTER_CONTROL_PS_ALLOCATION_V1_5            DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5
++
++
+ /****************************************************************************/	
+ // Structures used by ExternalEncoderControlTable V1.3
+ // ASIC Families: Evergreen, Llano, NI
+@@ -1793,6 +1943,7 @@ typedef struct _ENABLE_SPREAD_SPECTRUM_ON_PPLL_V2
+ #define ATOM_PPLL_SS_TYPE_V3_P1PLL            0x00
+ #define ATOM_PPLL_SS_TYPE_V3_P2PLL            0x04
+ #define ATOM_PPLL_SS_TYPE_V3_DCPLL            0x08
++#define ATOM_PPLL_SS_TYPE_V3_P0PLL            ATOM_PPLL_SS_TYPE_V3_DCPLL
+ #define ATOM_PPLL_SS_AMOUNT_V3_FBDIV_MASK     0x00FF
+ #define ATOM_PPLL_SS_AMOUNT_V3_FBDIV_SHIFT    0
+ #define ATOM_PPLL_SS_AMOUNT_V3_NFRAC_MASK     0x0F00
+@@ -2030,12 +2181,77 @@ typedef struct	_SET_VOLTAGE_PARAMETERS_V2
+   USHORT   usVoltageLevel;              // real voltage level
+ }SET_VOLTAGE_PARAMETERS_V2;
+ 
++
++typedef struct	_SET_VOLTAGE_PARAMETERS_V1_3
++{
++  UCHAR    ucVoltageType;               // To tell which voltage to set up, VDDC/MVDDC/MVDDQ/VDDCI
++  UCHAR    ucVoltageMode;               // Indicate action: Set voltage level
++  USHORT   usVoltageLevel;              // real voltage level in unit of mv or Voltage Phase (0, 1, 2, .. )
++}SET_VOLTAGE_PARAMETERS_V1_3;
++
++//ucVoltageType
++#define VOLTAGE_TYPE_VDDC                    1
++#define VOLTAGE_TYPE_MVDDC                   2
++#define VOLTAGE_TYPE_MVDDQ                   3
++#define VOLTAGE_TYPE_VDDCI                   4
++
++//SET_VOLTAGE_PARAMETERS_V3.ucVoltageMode
++#define ATOM_SET_VOLTAGE                     0        //Set voltage Level
++#define ATOM_INIT_VOLTAGE_REGULATOR          3        //Init Regulator
++#define ATOM_SET_VOLTAGE_PHASE               4        //Set Vregulator Phase
++#define ATOM_GET_MAX_VOLTAGE                 6        //Get Max Voltage, not used in SetVoltageTable v1.3
++#define ATOM_GET_VOLTAGE_LEVEL               6        //Get Voltage level from vitual voltage ID
++
++// define vitual voltage id in usVoltageLevel
++#define ATOM_VIRTUAL_VOLTAGE_ID0             0xff01
++#define ATOM_VIRTUAL_VOLTAGE_ID1             0xff02
++#define ATOM_VIRTUAL_VOLTAGE_ID2             0xff03
++#define ATOM_VIRTUAL_VOLTAGE_ID3             0xff04
++
+ typedef struct _SET_VOLTAGE_PS_ALLOCATION
+ {
+   SET_VOLTAGE_PARAMETERS sASICSetVoltage;
+   WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;
+ }SET_VOLTAGE_PS_ALLOCATION;
+ 
++// New Added from SI for GetVoltageInfoTable, input parameter structure
++typedef struct  _GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_1
++{
++  UCHAR    ucVoltageType;               // Input: To tell which voltage to set up, VDDC/MVDDC/MVDDQ/VDDCI
++  UCHAR    ucVoltageMode;               // Input: Indicate action: Get voltage info
++  USHORT   usVoltageLevel;              // Input: real voltage level in unit of mv or Voltage Phase (0, 1, 2, .. ) or Leakage Id 
++  ULONG    ulReserved;
++}GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_1;
++
++// New Added from SI for GetVoltageInfoTable, output parameter structure when ucVotlageMode == ATOM_GET_VOLTAGE_VID
++typedef struct  _GET_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_1
++{
++  ULONG    ulVotlageGpioState;
++  ULONG    ulVoltageGPioMask;
++}GET_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_1;
++
++// New Added from SI for GetVoltageInfoTable, output parameter structure when ucVotlageMode == ATOM_GET_VOLTAGE_STATEx_LEAKAGE_VID
++typedef struct  _GET_LEAKAGE_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_1
++{
++  USHORT   usVoltageLevel;
++  USHORT   usVoltageId;                                  // Voltage Id programmed in Voltage Regulator
++  ULONG    ulReseved;
++}GET_LEAKAGE_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_1;
++
++
++// GetVoltageInfo v1.1 ucVoltageMode
++#define	ATOM_GET_VOLTAGE_VID                0x00
++#define ATOM_GET_VOTLAGE_INIT_SEQ           0x03
++#define ATOM_GET_VOLTTAGE_PHASE_PHASE_VID   0x04
++// for SI, this state map to 0xff02 voltage state in Power Play table, which is power boost state
++#define	ATOM_GET_VOLTAGE_STATE0_LEAKAGE_VID 0x10
++
++// for SI, this state map to 0xff01 voltage state in Power Play table, which is performance state
++#define	ATOM_GET_VOLTAGE_STATE1_LEAKAGE_VID 0x11
++// undefined power state
++#define	ATOM_GET_VOLTAGE_STATE2_LEAKAGE_VID 0x12
++#define	ATOM_GET_VOLTAGE_STATE3_LEAKAGE_VID 0x13
++
+ /****************************************************************************/	
+ // Structures used by TVEncoderControlTable
+ /****************************************************************************/	
+@@ -2065,9 +2281,9 @@ typedef struct _ATOM_MASTER_LIST_OF_DATA_TABLES
+   USHORT        MultimediaConfigInfo;     // Only used by MM Lib,latest version 2.1, not configuable from Bios, need to include the table to build Bios
+   USHORT        StandardVESA_Timing;      // Only used by Bios
+   USHORT        FirmwareInfo;             // Shared by various SW components,latest version 1.4
+-  USHORT        DAC_Info;                 // Will be obsolete from R600
++  USHORT        PaletteData;              // Only used by BIOS
+   USHORT        LCD_Info;                 // Shared by various SW components,latest version 1.3, was called LVDS_Info 
+-  USHORT        TMDS_Info;                // Will be obsolete from R600
++  USHORT        DIGTransmitterInfo;       // Internal used by VBIOS only version 3.1
+   USHORT        AnalogTV_Info;            // Shared by various SW components,latest version 1.1 
+   USHORT        SupportedDevicesInfo;     // Will be obsolete from R600
+   USHORT        GPIO_I2C_Info;            // Shared by various SW components,latest version 1.2 will be used from R600           
+@@ -2096,15 +2312,16 @@ typedef struct _ATOM_MASTER_LIST_OF_DATA_TABLES
+ 	USHORT				PowerSourceInfo;					// Shared by various SW components, latest versoin 1.1
+ }ATOM_MASTER_LIST_OF_DATA_TABLES;
+ 
+-// For backward compatible 
+-#define LVDS_Info                LCD_Info
+-
+ typedef struct _ATOM_MASTER_DATA_TABLE
+ { 
+   ATOM_COMMON_TABLE_HEADER sHeader;  
+   ATOM_MASTER_LIST_OF_DATA_TABLES   ListOfDataTables;
+ }ATOM_MASTER_DATA_TABLE;
+ 
++// For backward compatible 
++#define LVDS_Info                LCD_Info
++#define DAC_Info                 PaletteData
++#define TMDS_Info                DIGTransmitterInfo
+ 
+ /****************************************************************************/	
+ // Structure used in MultimediaCapabilityInfoTable
+@@ -2171,7 +2388,9 @@ typedef struct _ATOM_MULTIMEDIA_CONFIG_INFO
+ typedef struct _ATOM_FIRMWARE_CAPABILITY
+ {
+ #if ATOM_BIG_ENDIAN
+-  USHORT Reserved:3;
++  USHORT Reserved:1;
++  USHORT SCL2Redefined:1;
++  USHORT PostWithoutModeSet:1;
+   USHORT HyperMemory_Size:4;
+   USHORT HyperMemory_Support:1;
+   USHORT PPMode_Assigned:1;
+@@ -2193,7 +2412,9 @@ typedef struct _ATOM_FIRMWARE_CAPABILITY
+   USHORT PPMode_Assigned:1;
+   USHORT HyperMemory_Support:1;
+   USHORT HyperMemory_Size:4;
+-  USHORT Reserved:3;
++  USHORT PostWithoutModeSet:1;
++  USHORT SCL2Redefined:1;
++  USHORT Reserved:1;
+ #endif
+ }ATOM_FIRMWARE_CAPABILITY;
+ 
+@@ -2418,7 +2639,8 @@ typedef struct _ATOM_FIRMWARE_INFO_V2_2
+   USHORT                          usLcdMaxPixelClockPLL_Output; // In MHz unit
+   ULONG                           ulReserved4;                //Was ulAsicMaximumVoltage
+   ULONG                           ulMinPixelClockPLL_Output;  //In 10Khz unit
+-  ULONG                           ulReserved5;                //Was usMinEngineClockPLL_Input and usMaxEngineClockPLL_Input
++  UCHAR                           ucRemoteDisplayConfig;
++  UCHAR                           ucReserved5[3];             //Was usMinEngineClockPLL_Input and usMaxEngineClockPLL_Input
+   ULONG                           ulReserved6;                //Was usMinEngineClockPLL_Output and usMinMemoryClockPLL_Input
+   ULONG                           ulReserved7;                //Was usMaxMemoryClockPLL_Input and usMinMemoryClockPLL_Output
+   USHORT                          usReserved11;               //Was usMaxPixelClock;  //In 10Khz unit, Max.  Pclk used only for DAC
+@@ -2438,6 +2660,11 @@ typedef struct _ATOM_FIRMWARE_INFO_V2_2
+ 
+ #define ATOM_FIRMWARE_INFO_LAST  ATOM_FIRMWARE_INFO_V2_2
+ 
++
++// definition of ucRemoteDisplayConfig
++#define REMOTE_DISPLAY_DISABLE                   0x00
++#define REMOTE_DISPLAY_ENABLE                    0x01
++
+ /****************************************************************************/	
+ // Structures used in IntegratedSystemInfoTable
+ /****************************************************************************/	
+@@ -2660,8 +2887,9 @@ usMinDownStreamHTLinkWidth:  same as above.
+ #define    INTEGRATED_SYSTEM_INFO__AMD_CPU__GREYHOUND      2
+ #define    INTEGRATED_SYSTEM_INFO__AMD_CPU__K8             3
+ #define    INTEGRATED_SYSTEM_INFO__AMD_CPU__PHARAOH        4
++#define    INTEGRATED_SYSTEM_INFO__AMD_CPU__OROCHI         5
+ 
+-#define    INTEGRATED_SYSTEM_INFO__AMD_CPU__MAX_CODE       INTEGRATED_SYSTEM_INFO__AMD_CPU__PHARAOH    // this deff reflects max defined CPU code
++#define    INTEGRATED_SYSTEM_INFO__AMD_CPU__MAX_CODE       INTEGRATED_SYSTEM_INFO__AMD_CPU__OROCHI    // this deff reflects max defined CPU code
+ 
+ #define SYSTEM_CONFIG_POWEREXPRESS_ENABLE                 0x00000001
+ #define SYSTEM_CONFIG_RUN_AT_OVERDRIVE_ENGINE             0x00000002
+@@ -2753,6 +2981,7 @@ typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V5
+ #define ASIC_INT_DIG4_ENCODER_ID													0x0b
+ #define ASIC_INT_DIG5_ENCODER_ID													0x0c
+ #define ASIC_INT_DIG6_ENCODER_ID													0x0d
++#define ASIC_INT_DIG7_ENCODER_ID													0x0e
+ 
+ //define Encoder attribute
+ #define ATOM_ANALOG_ENCODER																0
+@@ -3226,15 +3455,23 @@ typedef struct _ATOM_LCD_INFO_V13
+ 
+   UCHAR               ucPowerSequenceDIGONtoDE_in4Ms;
+   UCHAR               ucPowerSequenceDEtoVARY_BL_in4Ms;
+-  UCHAR               ucPowerSequenceDEtoDIGON_in4Ms;
+   UCHAR               ucPowerSequenceVARY_BLtoDE_in4Ms;
++  UCHAR               ucPowerSequenceDEtoDIGON_in4Ms;
+ 
+   UCHAR               ucOffDelay_in4Ms;
+   UCHAR               ucPowerSequenceVARY_BLtoBLON_in4Ms;
+   UCHAR               ucPowerSequenceBLONtoVARY_BL_in4Ms;
+   UCHAR               ucReserved1;
+ 
+-  ULONG               ulReserved[4];
++  UCHAR               ucDPCD_eDP_CONFIGURATION_CAP;     // dpcd 0dh
++  UCHAR               ucDPCD_MAX_LINK_RATE;             // dpcd 01h
++  UCHAR               ucDPCD_MAX_LANE_COUNT;            // dpcd 02h
++  UCHAR               ucDPCD_MAX_DOWNSPREAD;            // dpcd 03h
++
++  USHORT              usMaxPclkFreqInSingleLink;        // Max PixelClock frequency in single link mode. 
++  UCHAR               uceDPToLVDSRxId;
++  UCHAR               ucLcdReservd;
++  ULONG               ulReserved[2];
+ }ATOM_LCD_INFO_V13;  
+ 
+ #define ATOM_LCD_INFO_LAST  ATOM_LCD_INFO_V13    
+@@ -3273,6 +3510,11 @@ typedef struct _ATOM_LCD_INFO_V13
+ //Use this cap bit for a quick reference whether an embadded panel (LCD1 ) is LVDS or eDP.
+ #define	LCDPANEL_CAP_V13_eDP                    0x4        // = LCDPANEL_CAP_eDP no change comparing to previous version
+ 
++//uceDPToLVDSRxId
++#define eDP_TO_LVDS_RX_DISABLE                  0x00       // no eDP->LVDS translator chip 
++#define eDP_TO_LVDS_COMMON_ID                   0x01       // common eDP->LVDS translator chip without AMD SW init
++#define eDP_TO_LVDS_RT_ID                       0x02       // RT tanslator which require AMD SW init
++
+ typedef struct  _ATOM_PATCH_RECORD_MODE
+ {
+   UCHAR     ucRecordType;
+@@ -3317,6 +3559,7 @@ typedef struct  _ATOM_PANEL_RESOLUTION_PATCH_RECORD
+ #define LCD_CAP_RECORD_TYPE                   3
+ #define LCD_FAKE_EDID_PATCH_RECORD_TYPE       4
+ #define LCD_PANEL_RESOLUTION_RECORD_TYPE      5
++#define LCD_EDID_OFFSET_PATCH_RECORD_TYPE     6
+ #define ATOM_RECORD_END_TYPE                  0xFF
+ 
+ /****************************Spread Spectrum Info Table Definitions **********************/
+@@ -3528,6 +3771,7 @@ else	//Non VGA case
+ 
+ CAIL needs to claim an reserved area defined by FBAccessAreaOffset and usFBUsedbyDrvInKB in non VGA case.*/
+ 
++/***********************************************************************************/	
+ #define ATOM_MAX_FIRMWARE_VRAM_USAGE_INFO			1
+ 
+ typedef struct _ATOM_FIRMWARE_VRAM_RESERVE_INFO
+@@ -3818,13 +4062,17 @@ typedef struct _EXT_DISPLAY_PATH
+     ATOM_DP_CONN_CHANNEL_MAPPING asDPMapping;
+     ATOM_DVI_CONN_CHANNEL_MAPPING asDVIMapping;
+   };
+-  UCHAR   ucReserved;
+-  USHORT  usReserved[2]; 
++  UCHAR   ucChPNInvert;                   // bit vector for up to 8 lanes, =0: P and N is not invert, =1 P and N is inverted
++  USHORT  usCaps;
++  USHORT  usReserved; 
+ }EXT_DISPLAY_PATH;
+    
+ #define NUMBER_OF_UCHAR_FOR_GUID          16
+ #define MAX_NUMBER_OF_EXT_DISPLAY_PATH    7
+ 
++//usCaps
++#define  EXT_DISPLAY_PATH_CAPS__HBR2_DISABLE          0x01
++
+ typedef  struct _ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO
+ {
+   ATOM_COMMON_TABLE_HEADER sHeader;
+@@ -3832,7 +4080,9 @@ typedef  struct _ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO
+   EXT_DISPLAY_PATH         sPath[MAX_NUMBER_OF_EXT_DISPLAY_PATH]; // total of fixed 7 entries.
+   UCHAR                    ucChecksum;                            // a  simple Checksum of the sum of whole structure equal to 0x0. 
+   UCHAR                    uc3DStereoPinId;                       // use for eDP panel
+-  UCHAR                    Reserved [6];                          // for potential expansion
++  UCHAR                    ucRemoteDisplayConfig;
++  UCHAR                    uceDPToLVDSRxId;
++  UCHAR                    Reserved[4];                           // for potential expansion
+ }ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO;
+ 
+ //Related definitions, all records are different but they have a commond header
+@@ -3977,6 +4227,7 @@ typedef struct  _ATOM_OBJECT_GPIO_CNTL_RECORD
+ #define GPIO_PIN_STATE_ACTIVE_HIGH      0x1
+ 
+ // Indexes to GPIO array in GLSync record 
++// GLSync record is for Frame Lock/Gen Lock feature.
+ #define ATOM_GPIO_INDEX_GLSYNC_REFCLK    0
+ #define ATOM_GPIO_INDEX_GLSYNC_HSYNC     1
+ #define ATOM_GPIO_INDEX_GLSYNC_VSYNC     2
+@@ -3984,7 +4235,9 @@ typedef struct  _ATOM_OBJECT_GPIO_CNTL_RECORD
+ #define ATOM_GPIO_INDEX_GLSYNC_SWAP_GNT  4
+ #define ATOM_GPIO_INDEX_GLSYNC_INTERRUPT 5
+ #define ATOM_GPIO_INDEX_GLSYNC_V_RESET   6
+-#define ATOM_GPIO_INDEX_GLSYNC_MAX       7
++#define ATOM_GPIO_INDEX_GLSYNC_SWAP_CNTL 7
++#define ATOM_GPIO_INDEX_GLSYNC_SWAP_SEL  8
++#define ATOM_GPIO_INDEX_GLSYNC_MAX       9
+ 
+ typedef struct  _ATOM_ENCODER_DVO_CF_RECORD
+ {
+@@ -3994,7 +4247,8 @@ typedef struct  _ATOM_ENCODER_DVO_CF_RECORD
+ }ATOM_ENCODER_DVO_CF_RECORD;
+ 
+ // Bit maps for ATOM_ENCODER_CAP_RECORD.ucEncoderCap
+-#define ATOM_ENCODER_CAP_RECORD_HBR2     0x01         // DP1.2 HBR2 is supported by this path
++#define ATOM_ENCODER_CAP_RECORD_HBR2                  0x01         // DP1.2 HBR2 is supported by HW encoder
++#define ATOM_ENCODER_CAP_RECORD_HBR2_EN               0x02         // DP1.2 HBR2 setting is qualified and HBR2 can be enabled 
+ 
+ typedef struct  _ATOM_ENCODER_CAP_RECORD
+ {
+@@ -4003,11 +4257,13 @@ typedef struct  _ATOM_ENCODER_CAP_RECORD
+     USHORT                    usEncoderCap;         
+     struct {
+ #if ATOM_BIG_ENDIAN
+-      USHORT                  usReserved:15;        // Bit1-15 may be defined for other capability in future
++      USHORT                  usReserved:14;        // Bit1-15 may be defined for other capability in future
++      USHORT                  usHBR2En:1;           // Bit1 is for DP1.2 HBR2 enable
+       USHORT                  usHBR2Cap:1;          // Bit0 is for DP1.2 HBR2 capability. 
+ #else
+       USHORT                  usHBR2Cap:1;          // Bit0 is for DP1.2 HBR2 capability. 
+-      USHORT                  usReserved:15;        // Bit1-15 may be defined for other capability in future
++      USHORT                  usHBR2En:1;           // Bit1 is for DP1.2 HBR2 enable
++      USHORT                  usReserved:14;        // Bit1-15 may be defined for other capability in future
+ #endif
+     };
+   }; 
+@@ -4157,6 +4413,7 @@ typedef struct _ATOM_VOLTAGE_CONTROL
+ #define	VOLTAGE_CONTROL_ID_VT1556M						0x07									
+ #define	VOLTAGE_CONTROL_ID_CHL822x						0x08									
+ #define	VOLTAGE_CONTROL_ID_VT1586M						0x09
++#define VOLTAGE_CONTROL_ID_UP1637 						0x0A
+ 
+ typedef struct  _ATOM_VOLTAGE_OBJECT
+ {
+@@ -4193,6 +4450,69 @@ typedef struct  _ATOM_LEAKID_VOLTAGE
+ 	USHORT	usVoltage;
+ }ATOM_LEAKID_VOLTAGE;
+ 
++typedef struct _ATOM_VOLTAGE_OBJECT_HEADER_V3{
++ 	 UCHAR		ucVoltageType;									//Indicate Voltage Source: VDDC, MVDDC, MVDDQ or MVDDCI	 
++   UCHAR		ucVoltageMode;							    //Indicate voltage control mode: Init/Set/Leakage/Set phase 
++	 USHORT		usSize;													//Size of Object	
++}ATOM_VOLTAGE_OBJECT_HEADER_V3;
++
++typedef struct  _VOLTAGE_LUT_ENTRY_V2
++{
++	 ULONG		ulVoltageId;									  // The Voltage ID which is used to program GPIO register
++	 USHORT		usVoltageValue;									// The corresponding Voltage Value, in mV
++}VOLTAGE_LUT_ENTRY_V2;
++
++typedef struct  _LEAKAGE_VOLTAGE_LUT_ENTRY_V2
++{
++  USHORT	usVoltageLevel; 							  // The Voltage ID which is used to program GPIO register
++  USHORT  usVoltageId;                    
++	USHORT	usLeakageId;									  // The corresponding Voltage Value, in mV
++}LEAKAGE_VOLTAGE_LUT_ENTRY_V2;
++
++typedef struct  _ATOM_I2C_VOLTAGE_OBJECT_V3
++{
++   ATOM_VOLTAGE_OBJECT_HEADER_V3 sHeader;
++   UCHAR	ucVoltageRegulatorId;					  //Indicate Voltage Regulator Id
++   UCHAR    ucVoltageControlI2cLine;
++   UCHAR    ucVoltageControlAddress;
++   UCHAR    ucVoltageControlOffset;	 	
++   ULONG    ulReserved;
++   VOLTAGE_LUT_ENTRY asVolI2cLut[1];        // end with 0xff
++}ATOM_I2C_VOLTAGE_OBJECT_V3;
++
++typedef struct  _ATOM_GPIO_VOLTAGE_OBJECT_V3
++{
++   ATOM_VOLTAGE_OBJECT_HEADER_V3 sHeader;   
++   UCHAR    ucVoltageGpioCntlId;         // default is 0 which indicate control through CG VID mode 
++   UCHAR    ucGpioEntryNum;              // indiate the entry numbers of Votlage/Gpio value Look up table
++   UCHAR    ucPhaseDelay;                // phase delay in unit of micro second
++   UCHAR    ucReserved;   
++   ULONG    ulGpioMaskVal;               // GPIO Mask value
++   VOLTAGE_LUT_ENTRY_V2 asVolGpioLut[1];   
++}ATOM_GPIO_VOLTAGE_OBJECT_V3;
++
++typedef struct  _ATOM_LEAKAGE_VOLTAGE_OBJECT_V3
++{
++   ATOM_VOLTAGE_OBJECT_HEADER_V3 sHeader;
++   UCHAR    ucLeakageCntlId;             // default is 0
++   UCHAR    ucLeakageEntryNum;           // indicate the entry number of LeakageId/Voltage Lut table
++   UCHAR    ucReserved[2];               
++   ULONG    ulMaxVoltageLevel;
++   LEAKAGE_VOLTAGE_LUT_ENTRY_V2 asLeakageIdLut[1];   
++}ATOM_LEAKAGE_VOLTAGE_OBJECT_V3;
++
++typedef union _ATOM_VOLTAGE_OBJECT_V3{
++  ATOM_GPIO_VOLTAGE_OBJECT_V3 asGpioVoltageObj;
++  ATOM_I2C_VOLTAGE_OBJECT_V3 asI2cVoltageObj;
++  ATOM_LEAKAGE_VOLTAGE_OBJECT_V3 asLeakageObj;
++}ATOM_VOLTAGE_OBJECT_V3;
++
++typedef struct  _ATOM_VOLTAGE_OBJECT_INFO_V3_1
++{
++   ATOM_COMMON_TABLE_HEADER	sHeader; 
++	 ATOM_VOLTAGE_OBJECT_V3			asVoltageObj[3];	//Info for Voltage control	  	 
++}ATOM_VOLTAGE_OBJECT_INFO_V3_1;
++
+ typedef struct  _ATOM_ASIC_PROFILE_VOLTAGE
+ {
+ 	UCHAR		ucProfileId;
+@@ -4305,7 +4625,18 @@ typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V6
+   USHORT usHDMISSpreadRateIn10Hz;
+   USHORT usDVISSPercentage;
+   USHORT usDVISSpreadRateIn10Hz;
+-  ULONG  ulReserved3[21]; 
++  ULONG  SclkDpmBoostMargin;
++  ULONG  SclkDpmThrottleMargin;
++  USHORT SclkDpmTdpLimitPG; 
++  USHORT SclkDpmTdpLimitBoost;
++  ULONG  ulBoostEngineCLock;
++  UCHAR  ulBoostVid_2bit;  
++  UCHAR  EnableBoost;
++  USHORT GnbTdpLimit;
++  USHORT usMaxLVDSPclkFreqInSingleLink;
++  UCHAR  ucLvdsMisc;
++  UCHAR  ucLVDSReserved;
++  ULONG  ulReserved3[15]; 
+   ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO sExtDispConnInfo;   
+ }ATOM_INTEGRATED_SYSTEM_INFO_V6;   
+ 
+@@ -4313,9 +4644,16 @@ typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V6
+ #define INTEGRATED_SYSTEM_INFO_V6_GPUCAPINFO__TMDSHDMI_COHERENT_SINGLEPLL_MODE       0x01
+ #define INTEGRATED_SYSTEM_INFO_V6_GPUCAPINFO__DISABLE_AUX_HW_MODE_DETECTION          0x08
+ 
+-// ulOtherDisplayMisc
+-#define INTEGRATED_SYSTEM_INFO__GET_EDID_CALLBACK_FUNC_SUPPORT                       0x01
++//ucLVDSMisc:                   
++#define SYS_INFO_LVDSMISC__888_FPDI_MODE                                             0x01
++#define SYS_INFO_LVDSMISC__DL_CH_SWAP                                                0x02
++#define SYS_INFO_LVDSMISC__888_BPC                                                   0x04
++#define SYS_INFO_LVDSMISC__OVERRIDE_EN                                               0x08
++#define SYS_INFO_LVDSMISC__BLON_ACTIVE_LOW                                           0x10
+ 
++// not used any more
++#define SYS_INFO_LVDSMISC__VSYNC_ACTIVE_LOW                                          0x04
++#define SYS_INFO_LVDSMISC__HSYNC_ACTIVE_LOW                                          0x08
+ 
+ /**********************************************************************************************************************
+   ATOM_INTEGRATED_SYSTEM_INFO_V6 Description
+@@ -4384,7 +4722,208 @@ ucUMAChannelNumber:      	        System memory channel numbers.
+ ulCSR_M3_ARB_CNTL_DEFAULT[10]:    Arrays with values for CSR M3 arbiter for default
+ ulCSR_M3_ARB_CNTL_UVD[10]:        Arrays with values for CSR M3 arbiter for UVD playback.
+ ulCSR_M3_ARB_CNTL_FS3D[10]:       Arrays with values for CSR M3 arbiter for Full Screen 3D applications.
+-sAvail_SCLK[5]:                   Arrays to provide available list of SLCK and corresponding voltage, order from low to high  
++sAvail_SCLK[5]:                   Arrays to provide availabe list of SLCK and corresponding voltage, order from low to high  
++ulGMCRestoreResetTime:            GMC power restore and GMC reset time to calculate data reconnection latency. Unit in ns. 
++ulMinimumNClk:                    Minimum NCLK speed among all NB-Pstates to calcualte data reconnection latency. Unit in 10kHz. 
++ulIdleNClk:                       NCLK speed while memory runs in self-refresh state. Unit in 10kHz.
++ulDDR_DLL_PowerUpTime:            DDR PHY DLL power up time. Unit in ns.
++ulDDR_PLL_PowerUpTime:            DDR PHY PLL power up time. Unit in ns.
++usPCIEClkSSPercentage:            PCIE Clock Spred Spectrum Percentage in unit 0.01%; 100 mean 1%.
++usPCIEClkSSType:                  PCIE Clock Spred Spectrum Type. 0 for Down spread(default); 1 for Center spread.
++usLvdsSSPercentage:               LVDS panel ( not include eDP ) Spread Spectrum Percentage in unit of 0.01%, =0, use VBIOS default setting. 
++usLvdsSSpreadRateIn10Hz:          LVDS panel ( not include eDP ) Spread Spectrum frequency in unit of 10Hz, =0, use VBIOS default setting. 
++usHDMISSPercentage:               HDMI Spread Spectrum Percentage in unit 0.01%; 100 mean 1%,  =0, use VBIOS default setting. 
++usHDMISSpreadRateIn10Hz:          HDMI Spread Spectrum frequency in unit of 10Hz,  =0, use VBIOS default setting. 
++usDVISSPercentage:                DVI Spread Spectrum Percentage in unit 0.01%; 100 mean 1%,  =0, use VBIOS default setting. 
++usDVISSpreadRateIn10Hz:           DVI Spread Spectrum frequency in unit of 10Hz,  =0, use VBIOS default setting. 
++usMaxLVDSPclkFreqInSingleLink:    Max pixel clock LVDS panel single link, if=0 means VBIOS use default threhold, right now it is 85Mhz
++ucLVDSMisc:                       [bit0] LVDS 888bit panel mode =0: LVDS 888 panel in LDI mode, =1: LVDS 888 panel in FPDI mode
++                                  [bit1] LVDS panel lower and upper link mapping =0: lower link and upper link not swap, =1: lower link and upper link are swapped
++                                  [bit2] LVDS 888bit per color mode  =0: 666 bit per color =1:888 bit per color
++                                  [bit3] LVDS parameter override enable  =0: ucLvdsMisc parameter are not used =1: ucLvdsMisc parameter should be used
++                                  [bit4] Polarity of signal sent to digital BLON output pin. =0: not inverted(active high) =1: inverted ( active low )
++**********************************************************************************************************************/
++
++// this Table is used for Liano/Ontario APU
++typedef struct _ATOM_FUSION_SYSTEM_INFO_V1
++{
++  ATOM_INTEGRATED_SYSTEM_INFO_V6    sIntegratedSysInfo;   
++  ULONG  ulPowerplayTable[128];  
++}ATOM_FUSION_SYSTEM_INFO_V1; 
++/**********************************************************************************************************************
++  ATOM_FUSION_SYSTEM_INFO_V1 Description
++sIntegratedSysInfo:               refer to ATOM_INTEGRATED_SYSTEM_INFO_V6 definition.
++ulPowerplayTable[128]:            This 512 bytes memory is used to save ATOM_PPLIB_POWERPLAYTABLE3, starting form ulPowerplayTable[0]    
++**********************************************************************************************************************/ 
++
++// this IntegrateSystemInfoTable is used for Trinity APU
++typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7
++{
++  ATOM_COMMON_TABLE_HEADER   sHeader;
++  ULONG  ulBootUpEngineClock;
++  ULONG  ulDentistVCOFreq;
++  ULONG  ulBootUpUMAClock;
++  ATOM_CLK_VOLT_CAPABILITY   sDISPCLK_Voltage[4];
++  ULONG  ulBootUpReqDisplayVector;
++  ULONG  ulOtherDisplayMisc;
++  ULONG  ulGPUCapInfo;
++  ULONG  ulSB_MMIO_Base_Addr;
++  USHORT usRequestedPWMFreqInHz;
++  UCHAR  ucHtcTmpLmt;
++  UCHAR  ucHtcHystLmt;
++  ULONG  ulMinEngineClock;
++  ULONG  ulSystemConfig;            
++  ULONG  ulCPUCapInfo;
++  USHORT usNBP0Voltage;               
++  USHORT usNBP1Voltage;
++  USHORT usBootUpNBVoltage;                       
++  USHORT usExtDispConnInfoOffset;
++  USHORT usPanelRefreshRateRange;     
++  UCHAR  ucMemoryType;  
++  UCHAR  ucUMAChannelNumber;
++  UCHAR  strVBIOSMsg[40];
++  ULONG  ulReserved[20];
++  ATOM_AVAILABLE_SCLK_LIST   sAvail_SCLK[5];
++  ULONG  ulGMCRestoreResetTime;
++  ULONG  ulMinimumNClk;
++  ULONG  ulIdleNClk;
++  ULONG  ulDDR_DLL_PowerUpTime;
++  ULONG  ulDDR_PLL_PowerUpTime;
++  USHORT usPCIEClkSSPercentage;
++  USHORT usPCIEClkSSType;
++  USHORT usLvdsSSPercentage;
++  USHORT usLvdsSSpreadRateIn10Hz;
++  USHORT usHDMISSPercentage;
++  USHORT usHDMISSpreadRateIn10Hz;
++  USHORT usDVISSPercentage;
++  USHORT usDVISSpreadRateIn10Hz;
++  ULONG  SclkDpmBoostMargin;
++  ULONG  SclkDpmThrottleMargin;
++  USHORT SclkDpmTdpLimitPG; 
++  USHORT SclkDpmTdpLimitBoost;
++  ULONG  ulBoostEngineCLock;
++  UCHAR  ulBoostVid_2bit;  
++  UCHAR  EnableBoost;
++  USHORT GnbTdpLimit;
++  USHORT usMaxLVDSPclkFreqInSingleLink;
++  UCHAR  ucLvdsMisc;
++  UCHAR  ucLVDSReserved;
++  UCHAR  ucLVDSPwrOnSeqDIGONtoDE_in4Ms;
++  UCHAR  ucLVDSPwrOnSeqDEtoVARY_BL_in4Ms;
++  UCHAR  ucLVDSPwrOffSeqVARY_BLtoDE_in4Ms;
++  UCHAR  ucLVDSPwrOffSeqDEtoDIGON_in4Ms;
++  UCHAR  ucLVDSOffToOnDelay_in4Ms;
++  UCHAR  ucLVDSPwrOnSeqVARY_BLtoBLON_in4Ms;
++  UCHAR  ucLVDSPwrOffSeqBLONtoVARY_BL_in4Ms;
++  UCHAR  ucLVDSReserved1;
++  ULONG  ulLCDBitDepthControlVal;
++  ULONG  ulNbpStateMemclkFreq[4];
++  USHORT usNBP2Voltage;               
++  USHORT usNBP3Voltage;
++  ULONG  ulNbpStateNClkFreq[4];
++  UCHAR  ucNBDPMEnable;
++  UCHAR  ucReserved[3];
++  UCHAR  ucDPMState0VclkFid;
++  UCHAR  ucDPMState0DclkFid;
++  UCHAR  ucDPMState1VclkFid;
++  UCHAR  ucDPMState1DclkFid;
++  UCHAR  ucDPMState2VclkFid;
++  UCHAR  ucDPMState2DclkFid;
++  UCHAR  ucDPMState3VclkFid;
++  UCHAR  ucDPMState3DclkFid;
++  ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO sExtDispConnInfo;
++}ATOM_INTEGRATED_SYSTEM_INFO_V1_7;
++
++// ulOtherDisplayMisc
++#define INTEGRATED_SYSTEM_INFO__GET_EDID_CALLBACK_FUNC_SUPPORT            0x01
++#define INTEGRATED_SYSTEM_INFO__GET_BOOTUP_DISPLAY_CALLBACK_FUNC_SUPPORT  0x02
++#define INTEGRATED_SYSTEM_INFO__GET_EXPANSION_CALLBACK_FUNC_SUPPORT       0x04
++#define INTEGRATED_SYSTEM_INFO__FAST_BOOT_SUPPORT                         0x08
++
++// ulGPUCapInfo
++#define SYS_INFO_GPUCAPS__TMDSHDMI_COHERENT_SINGLEPLL_MODE                0x01
++#define SYS_INFO_GPUCAPS__DP_SINGLEPLL_MODE                               0x02
++#define SYS_INFO_GPUCAPS__DISABLE_AUX_MODE_DETECT                         0x08
++
++/**********************************************************************************************************************
++  ATOM_INTEGRATED_SYSTEM_INFO_V1_7 Description
++ulBootUpEngineClock:              VBIOS bootup Engine clock frequency, in 10kHz unit. if it is equal 0, then VBIOS use pre-defined bootup engine clock
++ulDentistVCOFreq:                 Dentist VCO clock in 10kHz unit. 
++ulBootUpUMAClock:                 System memory boot up clock frequency in 10Khz unit. 
++sDISPCLK_Voltage:                 Report Display clock voltage requirement.
++ 
++ulBootUpReqDisplayVector:         VBIOS boot up display IDs, following are supported devices in Trinity projects:
++                                  ATOM_DEVICE_CRT1_SUPPORT                  0x0001
++                                  ATOM_DEVICE_DFP1_SUPPORT                  0x0008 
++                                  ATOM_DEVICE_DFP6_SUPPORT                  0x0040 
++                                  ATOM_DEVICE_DFP2_SUPPORT                  0x0080       
++                                  ATOM_DEVICE_DFP3_SUPPORT                  0x0200       
++                                  ATOM_DEVICE_DFP4_SUPPORT                  0x0400        
++                                  ATOM_DEVICE_DFP5_SUPPORT                  0x0800
++                                  ATOM_DEVICE_LCD1_SUPPORT                  0x0002
++ulOtherDisplayMisc:      	        bit[0]=0: INT15 callback function Get LCD EDID ( ax=4e08, bl=1b ) is not supported by SBIOS. 
++                                        =1: INT15 callback function Get LCD EDID ( ax=4e08, bl=1b ) is supported by SBIOS. 
++                                  bit[1]=0: INT15 callback function Get boot display( ax=4e08, bl=01h) is not supported by SBIOS
++                                        =1: INT15 callback function Get boot display( ax=4e08, bl=01h) is supported by SBIOS
++                                  bit[2]=0: INT15 callback function Get panel Expansion ( ax=4e08, bl=02h) is not supported by SBIOS
++                                        =1: INT15 callback function Get panel Expansion ( ax=4e08, bl=02h) is supported by SBIOS
++                                  bit[3]=0: VBIOS fast boot is disable
++                                        =1: VBIOS fast boot is enable. ( VBIOS skip display device detection in every set mode if LCD panel is connect and LID is open)
++ulGPUCapInfo:                     bit[0]=0: TMDS/HDMI Coherent Mode use cascade PLL mode.
++                                        =1: TMDS/HDMI Coherent Mode use signel PLL mode.
++                                  bit[1]=0: DP mode use cascade PLL mode ( New for Trinity )
++                                        =1: DP mode use single PLL mode
++                                  bit[3]=0: Enable AUX HW mode detection logic
++                                        =1: Disable AUX HW mode detection logic
++                                      
++ulSB_MMIO_Base_Addr:              Physical Base address to SB MMIO space. Driver needs to initialize it for SMU usage.
++
++usRequestedPWMFreqInHz:           When it's set to 0x0 by SBIOS: the LCD BackLight is not controlled by GPU(SW). 
++                                  Any attempt to change BL using VBIOS function or enable VariBri from PP table is not effective since ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU==0;
++                                  
++                                  When it's set to a non-zero frequency, the BackLight is controlled by GPU (SW) in one of two ways below:
++                                  1. SW uses the GPU BL PWM output to control the BL, in chis case, this non-zero frequency determines what freq GPU should use;
++                                  VBIOS will set up proper PWM frequency and ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU==1,as the result,
++                                  Changing BL using VBIOS function is functional in both driver and non-driver present environment; 
++                                  and enabling VariBri under the driver environment from PP table is optional.
++
++                                  2. SW uses other means to control BL (like DPCD),this non-zero frequency serves as a flag only indicating
++                                  that BL control from GPU is expected.
++                                  VBIOS will NOT set up PWM frequency but make ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU==1
++                                  Changing BL using VBIOS function could be functional in both driver and non-driver present environment,but
++                                  it's per platform 
++                                  and enabling VariBri under the driver environment from PP table is optional.
++
++ucHtcTmpLmt:                      Refer to D18F3x64 bit[22:16], HtcTmpLmt. 
++                                  Threshold on value to enter HTC_active state.
++ucHtcHystLmt:                     Refer to D18F3x64 bit[27:24], HtcHystLmt. 
++                                  To calculate threshold off value to exit HTC_active state, which is Threshold on vlaue minus ucHtcHystLmt.
++ulMinEngineClock:                 Minimum SCLK allowed in 10kHz unit. This is calculated based on WRCK Fuse settings.
++ulSystemConfig:                   Bit[0]=0: PCIE Power Gating Disabled 
++                                        =1: PCIE Power Gating Enabled
++                                  Bit[1]=0: DDR-DLL shut-down feature disabled.
++                                         1: DDR-DLL shut-down feature enabled.
++                                  Bit[2]=0: DDR-PLL Power down feature disabled.
++                                         1: DDR-PLL Power down feature enabled.                                 
++ulCPUCapInfo:                     TBD
++usNBP0Voltage:                    VID for voltage on NB P0 State
++usNBP1Voltage:                    VID for voltage on NB P1 State  
++usNBP2Voltage:                    VID for voltage on NB P2 State
++usNBP3Voltage:                    VID for voltage on NB P3 State  
++usBootUpNBVoltage:                Voltage Index of GNB voltage configured by SBIOS, which is suffcient to support VBIOS DISPCLK requirement.
++usExtDispConnInfoOffset:          Offset to sExtDispConnInfo inside the structure
++usPanelRefreshRateRange:          Bit vector for LCD supported refresh rate range. If DRR is requestd by the platform, at least two bits need to be set
++                                  to indicate a range.
++                                  SUPPORTED_LCD_REFRESHRATE_30Hz          0x0004
++                                  SUPPORTED_LCD_REFRESHRATE_40Hz          0x0008
++                                  SUPPORTED_LCD_REFRESHRATE_50Hz          0x0010
++                                  SUPPORTED_LCD_REFRESHRATE_60Hz          0x0020
++ucMemoryType:                     [3:0]=1:DDR1;=2:DDR2;=3:DDR3.[7:4] is reserved.
++ucUMAChannelNumber:      	        System memory channel numbers. 
++ulCSR_M3_ARB_CNTL_DEFAULT[10]:    Arrays with values for CSR M3 arbiter for default
++ulCSR_M3_ARB_CNTL_UVD[10]:        Arrays with values for CSR M3 arbiter for UVD playback.
++ulCSR_M3_ARB_CNTL_FS3D[10]:       Arrays with values for CSR M3 arbiter for Full Screen 3D applications.
++sAvail_SCLK[5]:                   Arrays to provide availabe list of SLCK and corresponding voltage, order from low to high  
+ ulGMCRestoreResetTime:            GMC power restore and GMC reset time to calculate data reconnection latency. Unit in ns. 
+ ulMinimumNClk:                    Minimum NCLK speed among all NB-Pstates to calcualte data reconnection latency. Unit in 10kHz. 
+ ulIdleNClk:                       NCLK speed while memory runs in self-refresh state. Unit in 10kHz.
+@@ -4398,6 +4937,41 @@ usHDMISSPercentage:               HDMI Spread Spectrum Percentage in unit 0.01%;
+ usHDMISSpreadRateIn10Hz:          HDMI Spread Spectrum frequency in unit of 10Hz,  =0, use VBIOS default setting. 
+ usDVISSPercentage:                DVI Spread Spectrum Percentage in unit 0.01%; 100 mean 1%,  =0, use VBIOS default setting. 
+ usDVISSpreadRateIn10Hz:           DVI Spread Spectrum frequency in unit of 10Hz,  =0, use VBIOS default setting. 
++usMaxLVDSPclkFreqInSingleLink:    Max pixel clock LVDS panel single link, if=0 means VBIOS use default threhold, right now it is 85Mhz
++ucLVDSMisc:                       [bit0] LVDS 888bit panel mode =0: LVDS 888 panel in LDI mode, =1: LVDS 888 panel in FPDI mode
++                                  [bit1] LVDS panel lower and upper link mapping =0: lower link and upper link not swap, =1: lower link and upper link are swapped
++                                  [bit2] LVDS 888bit per color mode  =0: 666 bit per color =1:888 bit per color
++                                  [bit3] LVDS parameter override enable  =0: ucLvdsMisc parameter are not used =1: ucLvdsMisc parameter should be used
++                                  [bit4] Polarity of signal sent to digital BLON output pin. =0: not inverted(active high) =1: inverted ( active low )
++ucLVDSPwrOnSeqDIGONtoDE_in4Ms:    LVDS power up sequence time in unit of 4ms, time delay from DIGON signal active to data enable signal active( DE ).
++                                  =0 mean use VBIOS default which is 8 ( 32ms ). The LVDS power up sequence is as following: DIGON->DE->VARY_BL->BLON. 
++                                  This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable.
++ucLVDSPwrOnDEtoVARY_BL_in4Ms:     LVDS power up sequence time in unit of 4ms., time delay from DE( data enable ) active to Vary Brightness enable signal active( VARY_BL ).  
++                                  =0 mean use VBIOS default which is 90 ( 360ms ). The LVDS power up sequence is as following: DIGON->DE->VARY_BL->BLON. 
++                                  This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable.
++
++ucLVDSPwrOffVARY_BLtoDE_in4Ms:    LVDS power down sequence time in unit of 4ms, time delay from data enable ( DE ) signal off to LCDVCC (DIGON) off. 
++                                  =0 mean use VBIOS default delay which is 8 ( 32ms ). The LVDS power down sequence is as following: BLON->VARY_BL->DE->DIGON
++                                  This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable.
++
++ucLVDSPwrOffDEtoDIGON_in4Ms:      LVDS power down sequence time in unit of 4ms, time delay from vary brightness enable signal( VARY_BL) off to data enable ( DE ) signal off. 
++                                  =0 mean use VBIOS default which is 90 ( 360ms ). The LVDS power down sequence is as following: BLON->VARY_BL->DE->DIGON
++                                  This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable.
++
++ucLVDSOffToOnDelay_in4Ms:         LVDS power down sequence time in unit of 4ms. Time delay from DIGON signal off to DIGON signal active. 
++                                  =0 means to use VBIOS default delay which is 125 ( 500ms ).
++                                  This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable.
++
++ucLVDSPwrOnVARY_BLtoBLON_in4Ms:   LVDS power up sequence time in unit of 4ms. Time delay from VARY_BL signal on to DLON signal active. 
++                                  =0 means to use VBIOS default delay which is 0 ( 0ms ).
++                                  This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable.
++
++ucLVDSPwrOffBLONtoVARY_BL_in4Ms:  LVDS power down sequence time in unit of 4ms. Time delay from BLON signal off to VARY_BL signal off. 
++                                  =0 means to use VBIOS default delay which is 0 ( 0ms ).
++                                  This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable.
++
++ulNbpStateMemclkFreq[4]:          system memory clock frequncey in unit of 10Khz in different NB pstate. 
++
+ **********************************************************************************************************************/
+ 
+ /**************************************************************************/
+@@ -4459,6 +5033,7 @@ typedef struct _ATOM_ASIC_SS_ASSIGNMENT
+ #define ASIC_INTERNAL_SS_ON_DP      7
+ #define ASIC_INTERNAL_SS_ON_DCPLL   8
+ #define ASIC_EXTERNAL_SS_ON_DP_CLOCK 9
++#define ASIC_INTERNAL_VCE_SS        10
+ 
+ typedef struct _ATOM_ASIC_SS_ASSIGNMENT_V2
+ {
+@@ -4520,7 +5095,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3
+ #define ATOM_DOS_MODE_INFO_DEF        7
+ #define ATOM_I2C_CHANNEL_STATUS_DEF   8
+ #define ATOM_I2C_CHANNEL_STATUS1_DEF  9
+-
++#define ATOM_INTERNAL_TIMER_DEF       10
+ 
+ // BIOS_0_SCRATCH Definition 
+ #define ATOM_S0_CRT1_MONO               0x00000001L
+@@ -4648,6 +5223,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3
+ #define ATOM_S2_DEVICE_DPMS_MASKw1      0x3FF
+ #define ATOM_S2_FORCEDLOWPWRMODE_STATE_MASKb3     0x0C
+ #define ATOM_S2_FORCEDLOWPWRMODE_STATE_CHANGEb3   0x10
++#define ATOM_S2_TMDS_COHERENT_MODEb3    0x10          // used by VBIOS code only, use coherent mode for TMDS/HDMI mode
+ #define ATOM_S2_VRI_BRIGHT_ENABLEb3     0x20
+ #define ATOM_S2_ROTATION_STATE_MASKb3   0xC0
+ 
+@@ -5038,6 +5614,23 @@ typedef struct _ENABLE_GRAPH_SURFACE_PARAMETERS_V1_3
+   USHORT usDeviceId;                  // Active Device Id for this surface. If no device, set to 0. 
+ }ENABLE_GRAPH_SURFACE_PARAMETERS_V1_3;
+ 
++typedef struct _ENABLE_GRAPH_SURFACE_PARAMETERS_V1_4
++{
++  USHORT usHight;                     // Image Hight
++  USHORT usWidth;                     // Image Width
++  USHORT usGraphPitch;
++  UCHAR  ucColorDepth;
++  UCHAR  ucPixelFormat;
++  UCHAR  ucSurface;                   // Surface 1 or 2
++  UCHAR  ucEnable;                    // ATOM_ENABLE or ATOM_DISABLE
++  UCHAR  ucModeType;
++  UCHAR  ucReserved;
++}ENABLE_GRAPH_SURFACE_PARAMETERS_V1_4;
++
++// ucEnable
++#define ATOM_GRAPH_CONTROL_SET_PITCH             0x0f
++#define ATOM_GRAPH_CONTROL_SET_DISP_START        0x10
++
+ typedef struct _ENABLE_GRAPH_SURFACE_PS_ALLOCATION
+ {
+   ENABLE_GRAPH_SURFACE_PARAMETERS sSetSurface;          
+@@ -5057,6 +5650,58 @@ typedef struct  _GET_DISPLAY_SURFACE_SIZE_PARAMETERS
+   USHORT  usY_Size;
+ }GET_DISPLAY_SURFACE_SIZE_PARAMETERS; 
+ 
++typedef struct  _GET_DISPLAY_SURFACE_SIZE_PARAMETERS_V2
++{
++  union{
++    USHORT  usX_Size;                     //When use as input parameter, usX_Size indicates which CRTC                 
++    USHORT  usSurface; 
++  };
++  USHORT usY_Size;
++  USHORT usDispXStart;               
++  USHORT usDispYStart;
++}GET_DISPLAY_SURFACE_SIZE_PARAMETERS_V2; 
++
++
++typedef struct _PALETTE_DATA_CONTROL_PARAMETERS_V3 
++{
++  UCHAR  ucLutId;
++  UCHAR  ucAction;
++  USHORT usLutStartIndex;
++  USHORT usLutLength;
++  USHORT usLutOffsetInVram;
++}PALETTE_DATA_CONTROL_PARAMETERS_V3;
++
++// ucAction:
++#define PALETTE_DATA_AUTO_FILL            1
++#define PALETTE_DATA_READ                 2
++#define PALETTE_DATA_WRITE                3
++
++
++typedef struct _INTERRUPT_SERVICE_PARAMETERS_V2
++{
++  UCHAR  ucInterruptId;
++  UCHAR  ucServiceId;
++  UCHAR  ucStatus;
++  UCHAR  ucReserved;
++}INTERRUPT_SERVICE_PARAMETER_V2;
++
++// ucInterruptId
++#define HDP1_INTERRUPT_ID                 1
++#define HDP2_INTERRUPT_ID                 2
++#define HDP3_INTERRUPT_ID                 3
++#define HDP4_INTERRUPT_ID                 4
++#define HDP5_INTERRUPT_ID                 5
++#define HDP6_INTERRUPT_ID                 6
++#define SW_INTERRUPT_ID                   11   
++
++// ucAction
++#define INTERRUPT_SERVICE_GEN_SW_INT      1
++#define INTERRUPT_SERVICE_GET_STATUS      2
++
++ // ucStatus
++#define INTERRUPT_STATUS__INT_TRIGGER     1
++#define INTERRUPT_STATUS__HPD_HIGH        2
++
+ typedef struct _INDIRECT_IO_ACCESS
+ {
+   ATOM_COMMON_TABLE_HEADER sHeader;  
+@@ -5189,7 +5834,7 @@ typedef struct _ATOM_INIT_REG_BLOCK{
+ 
+ #define END_OF_REG_INDEX_BLOCK  0x0ffff
+ #define END_OF_REG_DATA_BLOCK   0x00000000
+-#define ATOM_INIT_REG_MASK_FLAG 0x80
++#define ATOM_INIT_REG_MASK_FLAG 0x80               //Not used in BIOS
+ #define	CLOCK_RANGE_HIGHEST			0x00ffffff
+ 
+ #define VALUE_DWORD             SIZEOF ULONG
+@@ -5229,6 +5874,7 @@ typedef struct _ATOM_MC_INIT_PARAM_TABLE
+ #define _128Mx8             0x51
+ #define _128Mx16            0x52
+ #define _256Mx8             0x61
++#define _256Mx16            0x62
+ 
+ #define SAMSUNG             0x1
+ #define INFINEON            0x2
+@@ -5585,7 +6231,7 @@ typedef struct _ATOM_VRAM_MODULE_V7
+   ULONG	  ulChannelMapCfg;	                // mmMC_SHARED_CHREMAP
+   USHORT  usModuleSize;                     // Size of ATOM_VRAM_MODULE_V7
+   USHORT  usPrivateReserved;                // MC_ARB_RAMCFG (includes NOOFBANK,NOOFRANKS,NOOFROWS,NOOFCOLS)
+-  USHORT  usReserved;
++  USHORT  usEnableChannels;                 // bit vector which indicate which channels are enabled
+   UCHAR   ucExtMemoryID;                    // Current memory module ID
+   UCHAR   ucMemoryType;                     // MEM_TYPE_DDR2/DDR3/GDDR3/GDDR5
+   UCHAR   ucChannelNum;                     // Number of mem. channels supported in this module
+@@ -5597,7 +6243,8 @@ typedef struct _ATOM_VRAM_MODULE_V7
+   UCHAR   ucNPL_RT;                         // Round trip delay (MC_SEQ_CAS_TIMING [28:24]:TCL=CL+NPL_RT-2). Always 2.
+   UCHAR	  ucPreamble;                       // [7:4] Write Preamble, [3:0] Read Preamble
+   UCHAR   ucMemorySize;                     // Total memory size in unit of 16MB for CONFIG_MEMSIZE - bit[23:0] zeros
+-  UCHAR   ucReserved[3];
++  USHORT  usSEQSettingOffset;
++  UCHAR   ucReserved;
+ // Memory Module specific values
+   USHORT  usEMRS2Value;                     // EMRS2/MR2 Value. 
+   USHORT  usEMRS3Value;                     // EMRS3/MR3 Value.
+@@ -5633,10 +6280,10 @@ typedef struct _ATOM_VRAM_INFO_V3
+ typedef struct _ATOM_VRAM_INFO_V4
+ {
+   ATOM_COMMON_TABLE_HEADER   sHeader;
+-	USHORT										 usMemAdjustTblOffset;													 // offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting
+-	USHORT										 usMemClkPatchTblOffset;												 //	offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting
+-	USHORT										 usRerseved;
+-	UCHAR           	         ucMemDQ7_0ByteRemap;													   // DQ line byte remap, =0: Memory Data line BYTE0, =1: BYTE1, =2: BYTE2, =3: BYTE3
++  USHORT                     usMemAdjustTblOffset;													 // offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting
++  USHORT                     usMemClkPatchTblOffset;												 //	offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting
++  USHORT										 usRerseved;
++  UCHAR           	         ucMemDQ7_0ByteRemap;													   // DQ line byte remap, =0: Memory Data line BYTE0, =1: BYTE1, =2: BYTE2, =3: BYTE3
+   ULONG                      ulMemDQ7_0BitRemap;                             // each DQ line ( 7~0) use 3bits, like: DQ0=Bit[2:0], DQ1:[5:3], ... DQ7:[23:21]
+   UCHAR                      ucReservde[4]; 
+   UCHAR                      ucNumOfVRAMModule;
+@@ -5648,9 +6295,10 @@ typedef struct _ATOM_VRAM_INFO_V4
+ typedef struct _ATOM_VRAM_INFO_HEADER_V2_1
+ {
+   ATOM_COMMON_TABLE_HEADER   sHeader;
+-	USHORT										 usMemAdjustTblOffset;													 // offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting
+-	USHORT										 usMemClkPatchTblOffset;												 //	offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting
+-	USHORT										 usReserved[4];
++  USHORT                     usMemAdjustTblOffset;													 // offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting
++  USHORT                     usMemClkPatchTblOffset;												 //	offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting
++  USHORT                     usPerBytePresetOffset;                          // offset of ATOM_INIT_REG_BLOCK structure for Per Byte Offset Preset Settings
++  USHORT                     usReserved[3];
+   UCHAR                      ucNumOfVRAMModule;                              // indicate number of VRAM module
+   UCHAR                      ucMemoryClkPatchTblVer;                         // version of memory AC timing register list
+   UCHAR                      ucVramModuleVer;                                // indicate ATOM_VRAM_MODUE version
+@@ -5935,6 +6583,52 @@ typedef struct _ATOM_DISP_OUT_INFO_V2
+ 	ASIC_ENCODER_INFO      asEncoderInfo[1];
+ }ATOM_DISP_OUT_INFO_V2;
+ 
++
++typedef struct _ATOM_DISP_CLOCK_ID {
++  UCHAR ucPpllId; 
++  UCHAR ucPpllAttribute;
++}ATOM_DISP_CLOCK_ID;
++
++// ucPpllAttribute
++#define CLOCK_SOURCE_SHAREABLE            0x01
++#define CLOCK_SOURCE_DP_MODE              0x02
++#define CLOCK_SOURCE_NONE_DP_MODE         0x04
++
++//DispOutInfoTable
++typedef struct _ASIC_TRANSMITTER_INFO_V2
++{
++	USHORT usTransmitterObjId;
++	USHORT usDispClkIdOffset;    // point to clock source id list supported by Encoder Object
++  UCHAR  ucTransmitterCmdTblId;
++	UCHAR  ucConfig;
++	UCHAR  ucEncoderID;					 // available 1st encoder ( default )
++	UCHAR  ucOptionEncoderID;    // available 2nd encoder ( optional )
++	UCHAR  uc2ndEncoderID;
++	UCHAR  ucReserved;
++}ASIC_TRANSMITTER_INFO_V2;
++
++typedef struct _ATOM_DISP_OUT_INFO_V3
++{
++  ATOM_COMMON_TABLE_HEADER sHeader;  
++	USHORT ptrTransmitterInfo;
++	USHORT ptrEncoderInfo;
++  USHORT ptrMainCallParserFar;                  // direct address of main parser call in VBIOS binary. 
++  USHORT usReserved;
++  UCHAR  ucDCERevision;   
++  UCHAR  ucMaxDispEngineNum;
++  UCHAR  ucMaxActiveDispEngineNum;
++  UCHAR  ucMaxPPLLNum;
++  UCHAR  ucCoreRefClkSource;                          // value of CORE_REF_CLK_SOURCE
++  UCHAR  ucReserved[3];
++	ASIC_TRANSMITTER_INFO_V2  asTransmitterInfo[1];     // for alligment only
++}ATOM_DISP_OUT_INFO_V3;
++
++typedef enum CORE_REF_CLK_SOURCE{
++  CLOCK_SRC_XTALIN=0,
++  CLOCK_SRC_XO_IN=1,
++  CLOCK_SRC_XO_IN2=2,
++}CORE_REF_CLK_SOURCE;
++
+ // DispDevicePriorityInfo
+ typedef struct _ATOM_DISPLAY_DEVICE_PRIORITY_INFO
+ {
+@@ -6070,6 +6764,39 @@ typedef struct _PROCESS_I2C_CHANNEL_TRANSACTION_PARAMETERS
+ #define HW_I2C_READ         0
+ #define I2C_2BYTE_ADDR      0x02
+ 
++/****************************************************************************/	
++// Structures used by HW_Misc_OperationTable
++/****************************************************************************/	
++typedef struct  _ATOM_HW_MISC_OPERATION_INPUT_PARAMETER_V1_1 
++{
++  UCHAR  ucCmd;                //  Input: To tell which action to take
++  UCHAR  ucReserved[3];
++  ULONG  ulReserved;
++}ATOM_HW_MISC_OPERATION_INPUT_PARAMETER_V1_1; 
++
++typedef struct  _ATOM_HW_MISC_OPERATION_OUTPUT_PARAMETER_V1_1 
++{
++  UCHAR  ucReturnCode;        // Output: Return value base on action was taken
++  UCHAR  ucReserved[3];
++  ULONG  ulReserved;
++}ATOM_HW_MISC_OPERATION_OUTPUT_PARAMETER_V1_1;
++
++// Actions code
++#define  ATOM_GET_SDI_SUPPORT              0xF0
++
++// Return code 
++#define  ATOM_UNKNOWN_CMD                   0
++#define  ATOM_FEATURE_NOT_SUPPORTED         1
++#define  ATOM_FEATURE_SUPPORTED             2
++
++typedef struct _ATOM_HW_MISC_OPERATION_PS_ALLOCATION
++{
++	ATOM_HW_MISC_OPERATION_INPUT_PARAMETER_V1_1        sInput_Output;
++	PROCESS_I2C_CHANNEL_TRANSACTION_PARAMETERS         sReserved; 
++}ATOM_HW_MISC_OPERATION_PS_ALLOCATION;
++
++/****************************************************************************/	
++
+ typedef struct _SET_HWBLOCK_INSTANCE_PARAMETER_V2
+ {
+    UCHAR ucHWBlkInst;                // HW block instance, 0, 1, 2, ...
+@@ -6090,6 +6817,52 @@ typedef struct _SET_HWBLOCK_INSTANCE_PARAMETER_V2
+ #define SELECT_CRTC_PIXEL_RATE        7
+ #define SELECT_VGA_BLK                8
+ 
++// DIGTransmitterInfoTable structure used to program UNIPHY settings 
++typedef struct _DIG_TRANSMITTER_INFO_HEADER_V3_1{  
++  ATOM_COMMON_TABLE_HEADER sHeader;  
++  USHORT usDPVsPreEmphSettingOffset;     // offset of PHY_ANALOG_SETTING_INFO * with DP Voltage Swing and Pre-Emphasis for each Link clock 
++  USHORT usPhyAnalogRegListOffset;       // offset of CLOCK_CONDITION_REGESTER_INFO* with None-DP mode Analog Setting's register Info 
++  USHORT usPhyAnalogSettingOffset;       // offset of CLOCK_CONDITION_SETTING_ENTRY* with None-DP mode Analog Setting for each link clock range
++  USHORT usPhyPllRegListOffset;          // offset of CLOCK_CONDITION_REGESTER_INFO* with Phy Pll register Info 
++  USHORT usPhyPllSettingOffset;          // offset of CLOCK_CONDITION_SETTING_ENTRY* with Phy Pll Settings
++}DIG_TRANSMITTER_INFO_HEADER_V3_1;
++
++typedef struct _CLOCK_CONDITION_REGESTER_INFO{
++  USHORT usRegisterIndex;
++  UCHAR  ucStartBit;
++  UCHAR  ucEndBit;
++}CLOCK_CONDITION_REGESTER_INFO;
++
++typedef struct _CLOCK_CONDITION_SETTING_ENTRY{
++  USHORT usMaxClockFreq;
++  UCHAR  ucEncodeMode;
++  UCHAR  ucPhySel;
++  ULONG  ulAnalogSetting[1];
++}CLOCK_CONDITION_SETTING_ENTRY;
++
++typedef struct _CLOCK_CONDITION_SETTING_INFO{
++  USHORT usEntrySize;
++  CLOCK_CONDITION_SETTING_ENTRY asClkCondSettingEntry[1];
++}CLOCK_CONDITION_SETTING_INFO;
++
++typedef struct _PHY_CONDITION_REG_VAL{
++  ULONG  ulCondition;
++  ULONG  ulRegVal;
++}PHY_CONDITION_REG_VAL;
++
++typedef struct _PHY_CONDITION_REG_INFO{
++  USHORT usRegIndex;
++  USHORT usSize;
++  PHY_CONDITION_REG_VAL asRegVal[1];
++}PHY_CONDITION_REG_INFO;
++
++typedef struct _PHY_ANALOG_SETTING_INFO{
++  UCHAR  ucEncodeMode;
++  UCHAR  ucPhySel;
++  USHORT usSize;
++  PHY_CONDITION_REG_INFO  asAnalogSetting[1];
++}PHY_ANALOG_SETTING_INFO;
++
+ /****************************************************************************/	
+ //Portion VI: Definitinos for vbios MC scratch registers that driver used
+ /****************************************************************************/
+@@ -7020,4 +7793,68 @@ typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Table
+ 
+ #pragma pack() // BIOS data must use byte aligment
+ 
++//
++// AMD ACPI Table
++//
++#pragma pack(1)
++
++typedef struct {
++  ULONG Signature;
++  ULONG TableLength;      //Length
++  UCHAR Revision;
++  UCHAR Checksum;
++  UCHAR OemId[6];
++  UCHAR OemTableId[8];    //UINT64  OemTableId;
++  ULONG OemRevision;
++  ULONG CreatorId;
++  ULONG CreatorRevision;
++} AMD_ACPI_DESCRIPTION_HEADER;
++/*
++//EFI_ACPI_DESCRIPTION_HEADER from AcpiCommon.h
++typedef struct {
++  UINT32  Signature;       //0x0
++  UINT32  Length;          //0x4
++  UINT8   Revision;        //0x8
++  UINT8   Checksum;        //0x9
++  UINT8   OemId[6];        //0xA
++  UINT64  OemTableId;      //0x10
++  UINT32  OemRevision;     //0x18
++  UINT32  CreatorId;       //0x1C
++  UINT32  CreatorRevision; //0x20
++}EFI_ACPI_DESCRIPTION_HEADER;
++*/
++typedef struct {
++  AMD_ACPI_DESCRIPTION_HEADER SHeader;
++  UCHAR TableUUID[16];    //0x24
++  ULONG VBIOSImageOffset; //0x34. Offset to the first GOP_VBIOS_CONTENT block from the beginning of the stucture.
++  ULONG Lib1ImageOffset;  //0x38. Offset to the first GOP_LIB1_CONTENT block from the beginning of the stucture.
++  ULONG Reserved[4];      //0x3C
++}UEFI_ACPI_VFCT;
++
++typedef struct {
++  ULONG  PCIBus;          //0x4C
++  ULONG  PCIDevice;       //0x50
++  ULONG  PCIFunction;     //0x54
++  USHORT VendorID;        //0x58
++  USHORT DeviceID;        //0x5A
++  USHORT SSVID;           //0x5C
++  USHORT SSID;            //0x5E
++  ULONG  Revision;        //0x60
++  ULONG  ImageLength;     //0x64
++}VFCT_IMAGE_HEADER;
++
++
++typedef struct {
++  VFCT_IMAGE_HEADER	VbiosHeader;
++  UCHAR	VbiosContent[1];
++}GOP_VBIOS_CONTENT;
++
++typedef struct {
++  VFCT_IMAGE_HEADER	Lib1Header;
++  UCHAR	Lib1Content[1];
++}GOP_LIB1_CONTENT;
++
++#pragma pack()
++
++
+ #endif /* _ATOMBIOS_H */
+diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
+index 8227e76..28e69e9 100644
+--- a/drivers/gpu/drm/radeon/radeon.h
++++ b/drivers/gpu/drm/radeon/radeon.h
+@@ -123,21 +123,6 @@ struct radeon_device;
+ /*
+  * BIOS.
+  */
+-#define ATRM_BIOS_PAGE 4096
+-
+-#if defined(CONFIG_VGA_SWITCHEROO)
+-bool radeon_atrm_supported(struct pci_dev *pdev);
+-int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len);
+-#else
+-static inline bool radeon_atrm_supported(struct pci_dev *pdev)
+-{
+-	return false;
+-}
+-
+-static inline int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len){
+-	return -EINVAL;
+-}
+-#endif
+ bool radeon_get_bios(struct radeon_device *rdev);
+ 
+ 
+diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
+index 9d2c369..38585c5 100644
+--- a/drivers/gpu/drm/radeon/radeon_atombios.c
++++ b/drivers/gpu/drm/radeon/radeon_atombios.c
+@@ -446,7 +446,7 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev,
+ 	}
+ 
+ 	/* Fujitsu D3003-S2 board lists DVI-I as DVI-D and VGA */
+-	if ((dev->pdev->device == 0x9802) &&
++	if (((dev->pdev->device == 0x9802) || (dev->pdev->device == 0x9806)) &&
+ 	    (dev->pdev->subsystem_vendor == 0x1734) &&
+ 	    (dev->pdev->subsystem_device == 0x11bd)) {
+ 		if (*connector_type == DRM_MODE_CONNECTOR_VGA) {
+diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+index 9d95792..2a2cf0b 100644
+--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
++++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+@@ -30,56 +30,8 @@ static struct radeon_atpx_priv {
+ 	/* handle for device - and atpx */
+ 	acpi_handle dhandle;
+ 	acpi_handle atpx_handle;
+-	acpi_handle atrm_handle;
+ } radeon_atpx_priv;
+ 
+-/* retrieve the ROM in 4k blocks */
+-static int radeon_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
+-			    int offset, int len)
+-{
+-	acpi_status status;
+-	union acpi_object atrm_arg_elements[2], *obj;
+-	struct acpi_object_list atrm_arg;
+-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
+-
+-	atrm_arg.count = 2;
+-	atrm_arg.pointer = &atrm_arg_elements[0];
+-
+-	atrm_arg_elements[0].type = ACPI_TYPE_INTEGER;
+-	atrm_arg_elements[0].integer.value = offset;
+-
+-	atrm_arg_elements[1].type = ACPI_TYPE_INTEGER;
+-	atrm_arg_elements[1].integer.value = len;
+-
+-	status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
+-	if (ACPI_FAILURE(status)) {
+-		printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
+-		return -ENODEV;
+-	}
+-
+-	obj = (union acpi_object *)buffer.pointer;
+-	memcpy(bios+offset, obj->buffer.pointer, len);
+-	kfree(buffer.pointer);
+-	return len;
+-}
+-
+-bool radeon_atrm_supported(struct pci_dev *pdev)
+-{
+-	/* get the discrete ROM only via ATRM */
+-	if (!radeon_atpx_priv.atpx_detected)
+-		return false;
+-
+-	if (radeon_atpx_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+-		return false;
+-	return true;
+-}
+-
+-
+-int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len)
+-{
+-	return radeon_atrm_call(radeon_atpx_priv.atrm_handle, bios, offset, len);
+-}
+-
+ static int radeon_atpx_get_version(acpi_handle handle)
+ {
+ 	acpi_status status;
+@@ -197,7 +149,7 @@ static int radeon_atpx_power_state(enum vga_switcheroo_client_id id,
+ 
+ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
+ {
+-	acpi_handle dhandle, atpx_handle, atrm_handle;
++	acpi_handle dhandle, atpx_handle;
+ 	acpi_status status;
+ 
+ 	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+@@ -208,13 +160,8 @@ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
+ 	if (ACPI_FAILURE(status))
+ 		return false;
+ 
+-	status = acpi_get_handle(dhandle, "ATRM", &atrm_handle);
+-	if (ACPI_FAILURE(status))
+-		return false;
+-
+ 	radeon_atpx_priv.dhandle = dhandle;
+ 	radeon_atpx_priv.atpx_handle = atpx_handle;
+-	radeon_atpx_priv.atrm_handle = atrm_handle;
+ 	return true;
+ }
+ 
+diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
+index 229a20f..d306cc8 100644
+--- a/drivers/gpu/drm/radeon/radeon_bios.c
++++ b/drivers/gpu/drm/radeon/radeon_bios.c
+@@ -32,6 +32,7 @@
+ 
+ #include <linux/vga_switcheroo.h>
+ #include <linux/slab.h>
++#include <linux/acpi.h>
+ /*
+  * BIOS.
+  */
+@@ -98,16 +99,81 @@ static bool radeon_read_bios(struct radeon_device *rdev)
+ 	return true;
+ }
+ 
++#ifdef CONFIG_ACPI
+ /* ATRM is used to get the BIOS on the discrete cards in
+  * dual-gpu systems.
+  */
++/* retrieve the ROM in 4k blocks */
++#define ATRM_BIOS_PAGE 4096
++/**
++ * radeon_atrm_call - fetch a chunk of the vbios
++ *
++ * @atrm_handle: acpi ATRM handle
++ * @bios: vbios image pointer
++ * @offset: offset of vbios image data to fetch
++ * @len: length of vbios image data to fetch
++ *
++ * Executes ATRM to fetch a chunk of the discrete
++ * vbios image on PX systems (all asics).
++ * Returns the length of the buffer fetched.
++ */
++static int radeon_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
++			    int offset, int len)
++{
++	acpi_status status;
++	union acpi_object atrm_arg_elements[2], *obj;
++	struct acpi_object_list atrm_arg;
++	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
++
++	atrm_arg.count = 2;
++	atrm_arg.pointer = &atrm_arg_elements[0];
++
++	atrm_arg_elements[0].type = ACPI_TYPE_INTEGER;
++	atrm_arg_elements[0].integer.value = offset;
++
++	atrm_arg_elements[1].type = ACPI_TYPE_INTEGER;
++	atrm_arg_elements[1].integer.value = len;
++
++	status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
++	if (ACPI_FAILURE(status)) {
++		printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
++		return -ENODEV;
++	}
++
++	obj = (union acpi_object *)buffer.pointer;
++	memcpy(bios+offset, obj->buffer.pointer, obj->buffer.length);
++	len = obj->buffer.length;
++	kfree(buffer.pointer);
++	return len;
++}
++
+ static bool radeon_atrm_get_bios(struct radeon_device *rdev)
+ {
+ 	int ret;
+ 	int size = 256 * 1024;
+ 	int i;
++	struct pci_dev *pdev = NULL;
++	acpi_handle dhandle, atrm_handle;
++	acpi_status status;
++	bool found = false;
++
++	/* ATRM is for the discrete card only */
++	if (rdev->flags & RADEON_IS_IGP)
++		return false;
++
++	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
++		dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
++		if (!dhandle)
++			continue;
++
++		status = acpi_get_handle(dhandle, "ATRM", &atrm_handle);
++		if (!ACPI_FAILURE(status)) {
++			found = true;
++			break;
++		}
++	}
+ 
+-	if (!radeon_atrm_supported(rdev->pdev))
++	if (!found)
+ 		return false;
+ 
+ 	rdev->bios = kmalloc(size, GFP_KERNEL);
+@@ -117,10 +183,11 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev)
+ 	}
+ 
+ 	for (i = 0; i < size / ATRM_BIOS_PAGE; i++) {
+-		ret = radeon_atrm_get_bios_chunk(rdev->bios,
+-						 (i * ATRM_BIOS_PAGE),
+-						 ATRM_BIOS_PAGE);
+-		if (ret <= 0)
++		ret = radeon_atrm_call(atrm_handle,
++				       rdev->bios,
++				       (i * ATRM_BIOS_PAGE),
++				       ATRM_BIOS_PAGE);
++		if (ret < ATRM_BIOS_PAGE)
+ 			break;
+ 	}
+ 
+@@ -130,6 +197,12 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev)
+ 	}
+ 	return true;
+ }
++#else
++static inline bool radeon_atrm_get_bios(struct radeon_device *rdev)
++{
++	return false;
++}
++#endif
+ 
+ static bool ni_read_disabled_bios(struct radeon_device *rdev)
+ {
+@@ -476,6 +549,61 @@ static bool radeon_read_disabled_bios(struct radeon_device *rdev)
+ 		return legacy_read_disabled_bios(rdev);
+ }
+ 
++#ifdef CONFIG_ACPI
++static bool radeon_acpi_vfct_bios(struct radeon_device *rdev)
++{
++	bool ret = false;
++	struct acpi_table_header *hdr;
++	acpi_size tbl_size;
++	UEFI_ACPI_VFCT *vfct;
++	GOP_VBIOS_CONTENT *vbios;
++	VFCT_IMAGE_HEADER *vhdr;
++
++	if (!ACPI_SUCCESS(acpi_get_table_with_size("VFCT", 1, &hdr, &tbl_size)))
++		return false;
++	if (tbl_size < sizeof(UEFI_ACPI_VFCT)) {
++		DRM_ERROR("ACPI VFCT table present but broken (too short #1)\n");
++		goto out_unmap;
++	}
++
++	vfct = (UEFI_ACPI_VFCT *)hdr;
++	if (vfct->VBIOSImageOffset + sizeof(VFCT_IMAGE_HEADER) > tbl_size) {
++		DRM_ERROR("ACPI VFCT table present but broken (too short #2)\n");
++		goto out_unmap;
++	}
++
++	vbios = (GOP_VBIOS_CONTENT *)((char *)hdr + vfct->VBIOSImageOffset);
++	vhdr = &vbios->VbiosHeader;
++	DRM_INFO("ACPI VFCT contains a BIOS for %02x:%02x.%d %04x:%04x, size %d\n",
++			vhdr->PCIBus, vhdr->PCIDevice, vhdr->PCIFunction,
++			vhdr->VendorID, vhdr->DeviceID, vhdr->ImageLength);
++
++	if (vhdr->PCIBus != rdev->pdev->bus->number ||
++	    vhdr->PCIDevice != PCI_SLOT(rdev->pdev->devfn) ||
++	    vhdr->PCIFunction != PCI_FUNC(rdev->pdev->devfn) ||
++	    vhdr->VendorID != rdev->pdev->vendor ||
++	    vhdr->DeviceID != rdev->pdev->device) {
++		DRM_INFO("ACPI VFCT table is not for this card\n");
++		goto out_unmap;
++	};
++
++	if (vfct->VBIOSImageOffset + sizeof(VFCT_IMAGE_HEADER) + vhdr->ImageLength > tbl_size) {
++		DRM_ERROR("ACPI VFCT image truncated\n");
++		goto out_unmap;
++	}
++
++	rdev->bios = kmemdup(&vbios->VbiosContent, vhdr->ImageLength, GFP_KERNEL);
++	ret = !!rdev->bios;
++
++out_unmap:
++	return ret;
++}
++#else
++static inline bool radeon_acpi_vfct_bios(struct radeon_device *rdev)
++{
++	return false;
++}
++#endif
+ 
+ bool radeon_get_bios(struct radeon_device *rdev)
+ {
+@@ -484,6 +612,8 @@ bool radeon_get_bios(struct radeon_device *rdev)
+ 
+ 	r = radeon_atrm_get_bios(rdev);
+ 	if (r == false)
++		r = radeon_acpi_vfct_bios(rdev);
++	if (r == false)
+ 		r = igp_read_bios_from_vram(rdev);
+ 	if (r == false)
+ 		r = radeon_read_bios(rdev);
+diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
+index 39497c7..f3ae607 100644
+--- a/drivers/gpu/drm/radeon/radeon_object.c
++++ b/drivers/gpu/drm/radeon/radeon_object.c
+@@ -117,6 +117,7 @@ int radeon_bo_create(struct radeon_device *rdev,
+ 		return -ENOMEM;
+ 	}
+ 
++retry:
+ 	bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL);
+ 	if (bo == NULL)
+ 		return -ENOMEM;
+@@ -129,8 +130,6 @@ int radeon_bo_create(struct radeon_device *rdev,
+ 	bo->gem_base.driver_private = NULL;
+ 	bo->surface_reg = -1;
+ 	INIT_LIST_HEAD(&bo->list);
+-
+-retry:
+ 	radeon_ttm_placement_from_domain(bo, domain);
+ 	/* Kernel allocation are uninterruptible */
+ 	mutex_lock(&rdev->vram_mutex);
+diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c
+index b99af34..a2abb8e 100644
+--- a/drivers/hid/hid-chicony.c
++++ b/drivers/hid/hid-chicony.c
+@@ -60,6 +60,7 @@ static int ch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+ static const struct hid_device_id ch_devices[] = {
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
+ 	{ }
+ };
+ MODULE_DEVICE_TABLE(hid, ch_devices);
+diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
+index 95430a0..5cc029f 100644
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -1398,12 +1398,14 @@ static const struct hid_device_id hid_have_special_driver[] = {
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHUNGHWAT, USB_DEVICE_ID_CHUNGHWAT_MULTITOUCH) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CVTOUCH, USB_DEVICE_ID_CVTOUCH_SCREEN) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_2) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_3) },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_4) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_TRUETOUCH) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0006) },
+diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c
+index 2f0be4c..9e43aac 100644
+--- a/drivers/hid/hid-cypress.c
++++ b/drivers/hid/hid-cypress.c
+@@ -129,6 +129,8 @@ static const struct hid_device_id cp_devices[] = {
+ 		.driver_data = CP_RDESC_SWAPPED_MIN_MAX },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_3),
+ 		.driver_data = CP_RDESC_SWAPPED_MIN_MAX },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_4),
++		.driver_data = CP_RDESC_SWAPPED_MIN_MAX },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE),
+ 		.driver_data = CP_2WHEEL_MOUSE_HACK },
+ 	{ }
+diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
+index 7db934d..e4317a2 100644
+--- a/drivers/hid/hid-ids.h
++++ b/drivers/hid/hid-ids.h
+@@ -196,6 +196,7 @@
+ #define USB_DEVICE_ID_CHICONY_MULTI_TOUCH	0xb19d
+ #define USB_DEVICE_ID_CHICONY_WIRELESS	0x0618
+ #define USB_DEVICE_ID_CHICONY_WIRELESS2	0x1123
++#define USB_DEVICE_ID_CHICONY_AK1D	0x1125
+ 
+ #define USB_VENDOR_ID_CHUNGHWAT		0x2247
+ #define USB_DEVICE_ID_CHUNGHWAT_MULTITOUCH	0x0001
+@@ -225,6 +226,7 @@
+ #define USB_DEVICE_ID_CYPRESS_BARCODE_1	0xde61
+ #define USB_DEVICE_ID_CYPRESS_BARCODE_2	0xde64
+ #define USB_DEVICE_ID_CYPRESS_BARCODE_3	0xbca1
++#define USB_DEVICE_ID_CYPRESS_BARCODE_4	0xed81
+ #define USB_DEVICE_ID_CYPRESS_TRUETOUCH	0xc001
+ 
+ #define USB_VENDOR_ID_DEALEXTREAME	0x10c5
+diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
+index 0bfa545..c76b051 100644
+--- a/drivers/infiniband/ulp/srp/ib_srp.c
++++ b/drivers/infiniband/ulp/srp/ib_srp.c
+@@ -568,24 +568,62 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
+ 			scmnd->sc_data_direction);
+ }
+ 
+-static void srp_remove_req(struct srp_target_port *target,
+-			   struct srp_request *req, s32 req_lim_delta)
++/**
++ * srp_claim_req - Take ownership of the scmnd associated with a request.
++ * @target: SRP target port.
++ * @req: SRP request.
++ * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
++ *         ownership of @req->scmnd if it equals @scmnd.
++ *
++ * Return value:
++ * Either NULL or a pointer to the SCSI command the caller became owner of.
++ */
++static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,
++				       struct srp_request *req,
++				       struct scsi_cmnd *scmnd)
+ {
+ 	unsigned long flags;
+ 
+-	srp_unmap_data(req->scmnd, target, req);
++	spin_lock_irqsave(&target->lock, flags);
++	if (!scmnd) {
++		scmnd = req->scmnd;
++		req->scmnd = NULL;
++	} else if (req->scmnd == scmnd) {
++		req->scmnd = NULL;
++	} else {
++		scmnd = NULL;
++	}
++	spin_unlock_irqrestore(&target->lock, flags);
++
++	return scmnd;
++}
++
++/**
++ * srp_free_req() - Unmap data and add request to the free request list.
++ */
++static void srp_free_req(struct srp_target_port *target,
++			 struct srp_request *req, struct scsi_cmnd *scmnd,
++			 s32 req_lim_delta)
++{
++	unsigned long flags;
++
++	srp_unmap_data(scmnd, target, req);
++
+ 	spin_lock_irqsave(&target->lock, flags);
+ 	target->req_lim += req_lim_delta;
+-	req->scmnd = NULL;
+ 	list_add_tail(&req->list, &target->free_reqs);
+ 	spin_unlock_irqrestore(&target->lock, flags);
+ }
+ 
+ static void srp_reset_req(struct srp_target_port *target, struct srp_request *req)
+ {
+-	req->scmnd->result = DID_RESET << 16;
+-	req->scmnd->scsi_done(req->scmnd);
+-	srp_remove_req(target, req, 0);
++	struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL);
++
++	if (scmnd) {
++		scmnd->result = DID_RESET << 16;
++		scmnd->scsi_done(scmnd);
++		srp_free_req(target, req, scmnd, 0);
++	}
+ }
+ 
+ static int srp_reconnect_target(struct srp_target_port *target)
+@@ -1055,11 +1093,18 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
+ 		complete(&target->tsk_mgmt_done);
+ 	} else {
+ 		req = &target->req_ring[rsp->tag];
+-		scmnd = req->scmnd;
+-		if (!scmnd)
++		scmnd = srp_claim_req(target, req, NULL);
++		if (!scmnd) {
+ 			shost_printk(KERN_ERR, target->scsi_host,
+ 				     "Null scmnd for RSP w/tag %016llx\n",
+ 				     (unsigned long long) rsp->tag);
++
++			spin_lock_irqsave(&target->lock, flags);
++			target->req_lim += be32_to_cpu(rsp->req_lim_delta);
++			spin_unlock_irqrestore(&target->lock, flags);
++
++			return;
++		}
+ 		scmnd->result = rsp->status;
+ 
+ 		if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
+@@ -1074,7 +1119,9 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
+ 		else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER))
+ 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
+ 
+-		srp_remove_req(target, req, be32_to_cpu(rsp->req_lim_delta));
++		srp_free_req(target, req, scmnd,
++			     be32_to_cpu(rsp->req_lim_delta));
++
+ 		scmnd->host_scribble = NULL;
+ 		scmnd->scsi_done(scmnd);
+ 	}
+@@ -1613,25 +1660,17 @@ static int srp_abort(struct scsi_cmnd *scmnd)
+ {
+ 	struct srp_target_port *target = host_to_target(scmnd->device->host);
+ 	struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
+-	int ret = SUCCESS;
+ 
+ 	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
+ 
+-	if (!req || target->qp_in_error)
+-		return FAILED;
+-	if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
+-			      SRP_TSK_ABORT_TASK))
++	if (!req || target->qp_in_error || !srp_claim_req(target, req, scmnd))
+ 		return FAILED;
++	srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
++			  SRP_TSK_ABORT_TASK);
++	srp_free_req(target, req, scmnd, 0);
++	scmnd->result = DID_ABORT << 16;
+ 
+-	if (req->scmnd) {
+-		if (!target->tsk_mgmt_status) {
+-			srp_remove_req(target, req, 0);
+-			scmnd->result = DID_ABORT << 16;
+-		} else
+-			ret = FAILED;
+-	}
+-
+-	return ret;
++	return SUCCESS;
+ }
+ 
+ static int srp_reset_device(struct scsi_cmnd *scmnd)
+diff --git a/drivers/md/md.c b/drivers/md/md.c
+index d8646d7..2887f22 100644
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -1144,8 +1144,11 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
+ 			ret = 0;
+ 	}
+ 	rdev->sectors = rdev->sb_start;
+-	/* Limit to 4TB as metadata cannot record more than that */
+-	if (rdev->sectors >= (2ULL << 32))
++	/* Limit to 4TB as metadata cannot record more than that.
++	 * (not needed for Linear and RAID0 as metadata doesn't
++	 * record this size)
++	 */
++	if (rdev->sectors >= (2ULL << 32) && sb->level >= 1)
+ 		rdev->sectors = (2ULL << 32) - 2;
+ 
+ 	if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
+@@ -1427,7 +1430,7 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
+ 	/* Limit to 4TB as metadata cannot record more than that.
+ 	 * 4TB == 2^32 KB, or 2*2^32 sectors.
+ 	 */
+-	if (num_sectors >= (2ULL << 32))
++	if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
+ 		num_sectors = (2ULL << 32) - 2;
+ 	md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
+ 		       rdev->sb_page);
+diff --git a/drivers/media/dvb/siano/smsusb.c b/drivers/media/dvb/siano/smsusb.c
+index fb68805..027550d 100644
+--- a/drivers/media/dvb/siano/smsusb.c
++++ b/drivers/media/dvb/siano/smsusb.c
+@@ -481,7 +481,7 @@ static int smsusb_resume(struct usb_interface *intf)
+ 	return 0;
+ }
+ 
+-static const struct usb_device_id smsusb_id_table[] __devinitconst = {
++static const struct usb_device_id smsusb_id_table[] = {
+ 	{ USB_DEVICE(0x187f, 0x0010),
+ 		.driver_info = SMS1XXX_BOARD_SIANO_STELLAR },
+ 	{ USB_DEVICE(0x187f, 0x0100),
+diff --git a/drivers/media/video/gspca/spca506.c b/drivers/media/video/gspca/spca506.c
+index 89fec4c..731cd16 100644
+--- a/drivers/media/video/gspca/spca506.c
++++ b/drivers/media/video/gspca/spca506.c
+@@ -685,7 +685,7 @@ static const struct sd_desc sd_desc = {
+ };
+ 
+ /* -- module initialisation -- */
+-static const struct usb_device_id device_table[] __devinitconst = {
++static const struct usb_device_id device_table[] = {
+ 	{USB_DEVICE(0x06e1, 0xa190)},
+ /*fixme: may be IntelPCCameraPro BRIDGE_SPCA505
+ 	{USB_DEVICE(0x0733, 0x0430)}, */
+diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
+index 17bbacb..cc2ae7e 100644
+--- a/drivers/misc/sgi-xp/xpc_uv.c
++++ b/drivers/misc/sgi-xp/xpc_uv.c
+@@ -18,6 +18,8 @@
+ #include <linux/interrupt.h>
+ #include <linux/delay.h>
+ #include <linux/device.h>
++#include <linux/cpu.h>
++#include <linux/module.h>
+ #include <linux/err.h>
+ #include <linux/slab.h>
+ #include <asm/uv/uv_hub.h>
+@@ -59,6 +61,8 @@ static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
+ 					 XPC_NOTIFY_MSG_SIZE_UV)
+ #define XPC_NOTIFY_IRQ_NAME		"xpc_notify"
+ 
++static int xpc_mq_node = -1;
++
+ static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
+ static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
+ 
+@@ -109,11 +113,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
+ #if defined CONFIG_X86_64
+ 	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
+ 			UV_AFFINITY_CPU);
+-	if (mq->irq < 0) {
+-		dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
+-			-mq->irq);
++	if (mq->irq < 0)
+ 		return mq->irq;
+-	}
+ 
+ 	mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset);
+ 
+@@ -238,8 +239,9 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
+ 	mq->mmr_blade = uv_cpu_to_blade_id(cpu);
+ 
+ 	nid = cpu_to_node(cpu);
+-	page = alloc_pages_exact_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+-				pg_order);
++	page = alloc_pages_exact_node(nid,
++				      GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
++				      pg_order);
+ 	if (page == NULL) {
+ 		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
+ 			"bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
+@@ -1731,9 +1733,50 @@ static struct xpc_arch_operations xpc_arch_ops_uv = {
+ 	.notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,
+ };
+ 
++static int
++xpc_init_mq_node(int nid)
++{
++	int cpu;
++
++	get_online_cpus();
++
++	for_each_cpu(cpu, cpumask_of_node(nid)) {
++		xpc_activate_mq_uv =
++			xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid,
++					     XPC_ACTIVATE_IRQ_NAME,
++					     xpc_handle_activate_IRQ_uv);
++		if (!IS_ERR(xpc_activate_mq_uv))
++			break;
++	}
++	if (IS_ERR(xpc_activate_mq_uv)) {
++		put_online_cpus();
++		return PTR_ERR(xpc_activate_mq_uv);
++	}
++
++	for_each_cpu(cpu, cpumask_of_node(nid)) {
++		xpc_notify_mq_uv =
++			xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid,
++					     XPC_NOTIFY_IRQ_NAME,
++					     xpc_handle_notify_IRQ_uv);
++		if (!IS_ERR(xpc_notify_mq_uv))
++			break;
++	}
++	if (IS_ERR(xpc_notify_mq_uv)) {
++		xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
++		put_online_cpus();
++		return PTR_ERR(xpc_notify_mq_uv);
++	}
++
++	put_online_cpus();
++	return 0;
++}
++
+ int
+ xpc_init_uv(void)
+ {
++	int nid;
++	int ret = 0;
++
+ 	xpc_arch_ops = xpc_arch_ops_uv;
+ 
+ 	if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
+@@ -1742,21 +1785,21 @@ xpc_init_uv(void)
+ 		return -E2BIG;
+ 	}
+ 
+-	xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0,
+-						  XPC_ACTIVATE_IRQ_NAME,
+-						  xpc_handle_activate_IRQ_uv);
+-	if (IS_ERR(xpc_activate_mq_uv))
+-		return PTR_ERR(xpc_activate_mq_uv);
++	if (xpc_mq_node < 0)
++		for_each_online_node(nid) {
++			ret = xpc_init_mq_node(nid);
+ 
+-	xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0,
+-						XPC_NOTIFY_IRQ_NAME,
+-						xpc_handle_notify_IRQ_uv);
+-	if (IS_ERR(xpc_notify_mq_uv)) {
+-		xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
+-		return PTR_ERR(xpc_notify_mq_uv);
+-	}
++			if (!ret)
++				break;
++		}
++	else
++		ret = xpc_init_mq_node(xpc_mq_node);
+ 
+-	return 0;
++	if (ret < 0)
++		dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n",
++			-ret);
++
++	return ret;
+ }
+ 
+ void
+@@ -1765,3 +1808,6 @@ xpc_exit_uv(void)
+ 	xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);
+ 	xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
+ }
++
++module_param(xpc_mq_node, int, 0);
++MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues.");
+diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
+index e888202..01b104e 100644
+--- a/drivers/net/netconsole.c
++++ b/drivers/net/netconsole.c
+@@ -652,7 +652,6 @@ static int netconsole_netdev_event(struct notifier_block *this,
+ 							  flags);
+ 					dev_put(nt->np.dev);
+ 					nt->np.dev = NULL;
+-					netconsole_target_put(nt);
+ 				}
+ 				nt->enabled = 0;
+ 				stopped = true;
+diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
+index e6d791c..b4cbc82 100644
+--- a/drivers/net/wireless/ath/ath9k/recv.c
++++ b/drivers/net/wireless/ath/ath9k/recv.c
+@@ -1782,7 +1782,6 @@ int ath_rx_tasklet(struct ath_softc *sc, int flush, bool hp)
+ 	struct ieee80211_hw *hw = sc->hw;
+ 	struct ieee80211_hdr *hdr;
+ 	int retval;
+-	bool decrypt_error = false;
+ 	struct ath_rx_status rs;
+ 	enum ath9k_rx_qtype qtype;
+ 	bool edma = !!(ah->caps.hw_caps & ATH9K_HW_CAP_EDMA);
+@@ -1804,6 +1803,7 @@ int ath_rx_tasklet(struct ath_softc *sc, int flush, bool hp)
+ 	tsf_lower = tsf & 0xffffffff;
+ 
+ 	do {
++		bool decrypt_error = false;
+ 		/* If handling rx interrupt and flush is in progress => exit */
+ 		if ((sc->sc_flags & SC_OP_RXFLUSH) && (flush == 0))
+ 			break;
+diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c
+index 9b60968..8a009bc 100644
+--- a/drivers/net/wireless/p54/p54usb.c
++++ b/drivers/net/wireless/p54/p54usb.c
+@@ -42,7 +42,7 @@ MODULE_FIRMWARE("isl3887usb");
+  * whenever you add a new device.
+  */
+ 
+-static struct usb_device_id p54u_table[] __devinitdata = {
++static struct usb_device_id p54u_table[] = {
+ 	/* Version 1 devices (pci chip + net2280) */
+ 	{USB_DEVICE(0x0411, 0x0050)},	/* Buffalo WLI2-USB2-G54 */
+ 	{USB_DEVICE(0x045e, 0x00c2)},	/* Microsoft MN-710 */
+diff --git a/drivers/net/wireless/rtl818x/rtl8187/dev.c b/drivers/net/wireless/rtl818x/rtl8187/dev.c
+index 4a78f9e..4e98c39 100644
+--- a/drivers/net/wireless/rtl818x/rtl8187/dev.c
++++ b/drivers/net/wireless/rtl818x/rtl8187/dev.c
+@@ -44,7 +44,7 @@ MODULE_AUTHOR("Larry Finger <Larry.Finger@lwfinger.net>");
+ MODULE_DESCRIPTION("RTL8187/RTL8187B USB wireless driver");
+ MODULE_LICENSE("GPL");
+ 
+-static struct usb_device_id rtl8187_table[] __devinitdata = {
++static struct usb_device_id rtl8187_table[] = {
+ 	/* Asus */
+ 	{USB_DEVICE(0x0b05, 0x171d), .driver_info = DEVICE_RTL8187},
+ 	/* Belkin */
+diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
+index d024f83..68af94c 100644
+--- a/drivers/pci/pci-driver.c
++++ b/drivers/pci/pci-driver.c
+@@ -952,6 +952,13 @@ static int pci_pm_poweroff_noirq(struct device *dev)
+ 	if (!pci_dev->state_saved && !pci_is_bridge(pci_dev))
+ 		pci_prepare_to_sleep(pci_dev);
+ 
++	/*
++	 * The reason for doing this here is the same as for the analogous code
++	 * in pci_pm_suspend_noirq().
++	 */
++	if (pci_dev->class == PCI_CLASS_SERIAL_USB_EHCI)
++		pci_write_config_word(pci_dev, PCI_COMMAND, 0);
++
+ 	return 0;
+ }
+ 
+diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
+index b0859d4..ec5b17f 100644
+--- a/drivers/platform/x86/asus-nb-wmi.c
++++ b/drivers/platform/x86/asus-nb-wmi.c
+@@ -86,6 +86,10 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
+ 	{ KE_KEY, 0x8A, { KEY_PROG1 } },
+ 	{ KE_KEY, 0x95, { KEY_MEDIA } },
+ 	{ KE_KEY, 0x99, { KEY_PHONE } },
++	{ KE_KEY, 0xA0, { KEY_SWITCHVIDEOMODE } }, /* SDSP HDMI only */
++	{ KE_KEY, 0xA1, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + HDMI */
++	{ KE_KEY, 0xA2, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + HDMI */
++	{ KE_KEY, 0xA3, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV + HDMI */
+ 	{ KE_KEY, 0xb5, { KEY_CALC } },
+ 	{ KE_KEY, 0xc4, { KEY_KBDILLUMUP } },
+ 	{ KE_KEY, 0xc5, { KEY_KBDILLUMDOWN } },
+diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
+index 30d2072..33471e1 100644
+--- a/drivers/rapidio/devices/tsi721.c
++++ b/drivers/rapidio/devices/tsi721.c
+@@ -439,6 +439,9 @@ static void tsi721_db_dpc(struct work_struct *work)
+ 				" info %4.4x\n", DBELL_SID(idb.bytes),
+ 				DBELL_TID(idb.bytes), DBELL_INF(idb.bytes));
+ 		}
++
++		wr_ptr = ioread32(priv->regs +
++				  TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE;
+ 	}
+ 
+ 	iowrite32(rd_ptr & (IDB_QSIZE - 1),
+@@ -449,6 +452,10 @@ static void tsi721_db_dpc(struct work_struct *work)
+ 	regval |= TSI721_SR_CHINT_IDBQRCV;
+ 	iowrite32(regval,
+ 		priv->regs + TSI721_SR_CHINTE(IDB_QUEUE));
++
++	wr_ptr = ioread32(priv->regs + TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE;
++	if (wr_ptr != rd_ptr)
++		schedule_work(&priv->idb_work);
+ }
+ 
+ /**
+@@ -2155,7 +2162,7 @@ static int __devinit tsi721_probe(struct pci_dev *pdev,
+ 				  const struct pci_device_id *id)
+ {
+ 	struct tsi721_device *priv;
+-	int i, cap;
++	int cap;
+ 	int err;
+ 	u32 regval;
+ 
+@@ -2175,12 +2182,15 @@ static int __devinit tsi721_probe(struct pci_dev *pdev,
+ 	priv->pdev = pdev;
+ 
+ #ifdef DEBUG
++	{
++	int i;
+ 	for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+ 		dev_dbg(&pdev->dev, "res[%d] @ 0x%llx (0x%lx, 0x%lx)\n",
+ 			i, (unsigned long long)pci_resource_start(pdev, i),
+ 			(unsigned long)pci_resource_len(pdev, i),
+ 			pci_resource_flags(pdev, i));
+ 	}
++	}
+ #endif
+ 	/*
+ 	 * Verify BAR configuration
+diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c
+index 971bc8e..11bcb20 100644
+--- a/drivers/rtc/rtc-rs5c348.c
++++ b/drivers/rtc/rtc-rs5c348.c
+@@ -122,9 +122,12 @@ rs5c348_rtc_read_time(struct device *dev, struct rtc_time *tm)
+ 	tm->tm_min = bcd2bin(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK);
+ 	tm->tm_hour = bcd2bin(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK);
+ 	if (!pdata->rtc_24h) {
+-		tm->tm_hour %= 12;
+-		if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM)
++		if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM) {
++			tm->tm_hour -= 20;
++			tm->tm_hour %= 12;
+ 			tm->tm_hour += 12;
++		} else
++			tm->tm_hour %= 12;
+ 	}
+ 	tm->tm_wday = bcd2bin(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK);
+ 	tm->tm_mday = bcd2bin(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK);
+diff --git a/drivers/staging/speakup/main.c b/drivers/staging/speakup/main.c
+index 8be5604..0d70f68 100644
+--- a/drivers/staging/speakup/main.c
++++ b/drivers/staging/speakup/main.c
+@@ -1854,7 +1854,7 @@ static void speakup_bits(struct vc_data *vc)
+ 
+ static int handle_goto(struct vc_data *vc, u_char type, u_char ch, u_short key)
+ {
+-	static u_char *goto_buf = "\0\0\0\0\0\0";
++	static u_char goto_buf[8];
+ 	static int num;
+ 	int maxlen, go_pos;
+ 	char *cp;
+diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c
+index 27521b6..ae62d57 100644
+--- a/drivers/staging/vt6656/main_usb.c
++++ b/drivers/staging/vt6656/main_usb.c
+@@ -222,7 +222,7 @@ DEVICE_PARAM(b80211hEnable, "802.11h mode");
+ // Static vars definitions
+ //
+ 
+-static struct usb_device_id vt6656_table[] __devinitdata = {
++static struct usb_device_id vt6656_table[] = {
+ 	{USB_DEVICE(VNT_USB_VENDOR_ID, VNT_USB_PRODUCT_ID)},
+ 	{}
+ };
+diff --git a/drivers/staging/winbond/wbusb.c b/drivers/staging/winbond/wbusb.c
+index f958eb4..3f0ce2b 100644
+--- a/drivers/staging/winbond/wbusb.c
++++ b/drivers/staging/winbond/wbusb.c
+@@ -25,7 +25,7 @@ MODULE_DESCRIPTION("IS89C35 802.11bg WLAN USB Driver");
+ MODULE_LICENSE("GPL");
+ MODULE_VERSION("0.1");
+ 
+-static const struct usb_device_id wb35_table[] __devinitconst = {
++static const struct usb_device_id wb35_table[] = {
+ 	{ USB_DEVICE(0x0416, 0x0035) },
+ 	{ USB_DEVICE(0x18E8, 0x6201) },
+ 	{ USB_DEVICE(0x18E8, 0x6206) },
+diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
+index 94c03d2..597fb9b 100644
+--- a/drivers/target/target_core_transport.c
++++ b/drivers/target/target_core_transport.c
+@@ -3509,9 +3509,9 @@ transport_generic_get_mem(struct se_cmd *cmd)
+ 	return 0;
+ 
+ out:
+-	while (i >= 0) {
+-		__free_page(sg_page(&cmd->t_data_sg[i]));
++	while (i > 0) {
+ 		i--;
++		__free_page(sg_page(&cmd->t_data_sg[i]));
+ 	}
+ 	kfree(cmd->t_data_sg);
+ 	cmd->t_data_sg = NULL;
+diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c
+index 5acd24a..086f7fe 100644
+--- a/drivers/tty/serial/pmac_zilog.c
++++ b/drivers/tty/serial/pmac_zilog.c
+@@ -1407,10 +1407,16 @@ static int pmz_verify_port(struct uart_port *port, struct serial_struct *ser)
+ static int pmz_poll_get_char(struct uart_port *port)
+ {
+ 	struct uart_pmac_port *uap = (struct uart_pmac_port *)port;
++	int tries = 2;
+ 
+-	while ((read_zsreg(uap, R0) & Rx_CH_AV) == 0)
+-		udelay(5);
+-	return read_zsdata(uap);
++	while (tries) {
++		if ((read_zsreg(uap, R0) & Rx_CH_AV) != 0)
++			return read_zsdata(uap);
++		if (tries--)
++			udelay(5);
++	}
++
++	return NO_POLL_CHAR;
+ }
+ 
+ static void pmz_poll_put_char(struct uart_port *port, unsigned char c)
+diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
+index 1094469..dbf7d20 100644
+--- a/drivers/usb/class/cdc-acm.c
++++ b/drivers/usb/class/cdc-acm.c
+@@ -1043,7 +1043,8 @@ skip_normal_probe:
+ 	}
+ 
+ 
+-	if (data_interface->cur_altsetting->desc.bNumEndpoints < 2)
++	if (data_interface->cur_altsetting->desc.bNumEndpoints < 2 ||
++	    control_interface->cur_altsetting->desc.bNumEndpoints == 0)
+ 		return -EINVAL;
+ 
+ 	epctrl = &control_interface->cur_altsetting->endpoint[0].desc;
+diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
+index 4e1f0aa..9a2a1ae 100644
+--- a/drivers/usb/gadget/u_ether.c
++++ b/drivers/usb/gadget/u_ether.c
+@@ -669,6 +669,8 @@ static int eth_stop(struct net_device *net)
+ 	spin_lock_irqsave(&dev->lock, flags);
+ 	if (dev->port_usb) {
+ 		struct gether	*link = dev->port_usb;
++		const struct usb_endpoint_descriptor *in;
++		const struct usb_endpoint_descriptor *out;
+ 
+ 		if (link->close)
+ 			link->close(link);
+@@ -682,10 +684,14 @@ static int eth_stop(struct net_device *net)
+ 		 * their own pace; the network stack can handle old packets.
+ 		 * For the moment we leave this here, since it works.
+ 		 */
++		in = link->in_ep->desc;
++		out = link->out_ep->desc;
+ 		usb_ep_disable(link->in_ep);
+ 		usb_ep_disable(link->out_ep);
+ 		if (netif_carrier_ok(net)) {
+ 			DBG(dev, "host still using in/out endpoints\n");
++			link->in_ep->desc = in;
++			link->out_ep->desc = out;
+ 			usb_ep_enable(link->in_ep);
+ 			usb_ep_enable(link->out_ep);
+ 		}
+diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
+index daf5754..07c72a4 100644
+--- a/drivers/usb/host/xhci-pci.c
++++ b/drivers/usb/host/xhci-pci.c
+@@ -95,6 +95,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
+ 			pdev->device == PCI_DEVICE_ID_ASROCK_P67) {
+ 		xhci->quirks |= XHCI_RESET_ON_RESUME;
+ 		xhci_dbg(xhci, "QUIRK: Resetting on resume\n");
++		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
+ 	}
+ 	if (pdev->vendor == PCI_VENDOR_ID_VIA)
+ 		xhci->quirks |= XHCI_RESET_ON_RESUME;
+diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
+index 05f82e9..f7c0a2a 100644
+--- a/drivers/usb/host/xhci.c
++++ b/drivers/usb/host/xhci.c
+@@ -163,7 +163,7 @@ int xhci_reset(struct xhci_hcd *xhci)
+ 	xhci_writel(xhci, command, &xhci->op_regs->command);
+ 
+ 	ret = handshake(xhci, &xhci->op_regs->command,
+-			CMD_RESET, 0, 250 * 1000);
++			CMD_RESET, 0, 10 * 1000 * 1000);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -172,7 +172,8 @@ int xhci_reset(struct xhci_hcd *xhci)
+ 	 * xHCI cannot write to any doorbells or operational registers other
+ 	 * than status until the "Controller Not Ready" flag is cleared.
+ 	 */
+-	return handshake(xhci, &xhci->op_regs->status, STS_CNR, 0, 250 * 1000);
++	return handshake(xhci, &xhci->op_regs->status,
++			 STS_CNR, 0, 10 * 1000 * 1000);
+ }
+ 
+ #ifdef CONFIG_PCI
+diff --git a/drivers/usb/misc/emi62.c b/drivers/usb/misc/emi62.c
+index fc15ad4..723e833 100644
+--- a/drivers/usb/misc/emi62.c
++++ b/drivers/usb/misc/emi62.c
+@@ -259,7 +259,7 @@ wraperr:
+ 	return err;
+ }
+ 
+-static const struct usb_device_id id_table[] __devinitconst = {
++static const struct usb_device_id id_table[] = {
+ 	{ USB_DEVICE(EMI62_VENDOR_ID, EMI62_PRODUCT_ID) },
+ 	{ }                                             /* Terminating entry */
+ };
+diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
+index 4045e39..b3182bb 100644
+--- a/drivers/usb/serial/ftdi_sio.c
++++ b/drivers/usb/serial/ftdi_sio.c
+@@ -811,6 +811,7 @@ static struct usb_device_id id_table_combined [] = {
+ 	{ USB_DEVICE(LARSENBRUSGAARD_VID, LB_ALTITRACK_PID) },
+ 	{ USB_DEVICE(GN_OTOMETRICS_VID, AURICAL_USB_PID) },
+ 	{ USB_DEVICE(PI_VID, PI_E861_PID) },
++	{ USB_DEVICE(KONDO_VID, KONDO_USB_SERIAL_PID) },
+ 	{ USB_DEVICE(BAYER_VID, BAYER_CONTOUR_CABLE_PID) },
+ 	{ USB_DEVICE(FTDI_VID, MARVELL_OPENRD_PID),
+ 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
+diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
+index d27d7d7..54b4258 100644
+--- a/drivers/usb/serial/ftdi_sio_ids.h
++++ b/drivers/usb/serial/ftdi_sio_ids.h
+@@ -795,6 +795,13 @@
+ #define PI_E861_PID         0x1008  /* E-861 piezo controller USB connection */
+ 
+ /*
++ * Kondo Kagaku Co.Ltd.
++ * http://www.kondo-robot.com/EN
++ */
++#define KONDO_VID 		0x165c
++#define KONDO_USB_SERIAL_PID	0x0002
++
++/*
+  * Bayer Ascensia Contour blood glucose meter USB-converter cable.
+  * http://winglucofacts.com/cables/
+  */
+diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
+index 5c7d654..b150ed9 100644
+--- a/drivers/usb/serial/mos7840.c
++++ b/drivers/usb/serial/mos7840.c
+@@ -1191,9 +1191,12 @@ static int mos7840_chars_in_buffer(struct tty_struct *tty)
+ 	}
+ 
+ 	spin_lock_irqsave(&mos7840_port->pool_lock, flags);
+-	for (i = 0; i < NUM_URBS; ++i)
+-		if (mos7840_port->busy[i])
+-			chars += URB_TRANSFER_BUFFER_SIZE;
++	for (i = 0; i < NUM_URBS; ++i) {
++		if (mos7840_port->busy[i]) {
++			struct urb *urb = mos7840_port->write_urb_pool[i];
++			chars += urb->transfer_buffer_length;
++		}
++	}
+ 	spin_unlock_irqrestore(&mos7840_port->pool_lock, flags);
+ 	dbg("%s - returns %d", __func__, chars);
+ 	return chars;
+diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
+index d89aac1..113560d 100644
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -80,84 +80,9 @@ static void option_instat_callback(struct urb *urb);
+ #define OPTION_PRODUCT_GTM380_MODEM		0x7201
+ 
+ #define HUAWEI_VENDOR_ID			0x12D1
+-#define HUAWEI_PRODUCT_E600			0x1001
+-#define HUAWEI_PRODUCT_E220			0x1003
+-#define HUAWEI_PRODUCT_E220BIS			0x1004
+-#define HUAWEI_PRODUCT_E1401			0x1401
+-#define HUAWEI_PRODUCT_E1402			0x1402
+-#define HUAWEI_PRODUCT_E1403			0x1403
+-#define HUAWEI_PRODUCT_E1404			0x1404
+-#define HUAWEI_PRODUCT_E1405			0x1405
+-#define HUAWEI_PRODUCT_E1406			0x1406
+-#define HUAWEI_PRODUCT_E1407			0x1407
+-#define HUAWEI_PRODUCT_E1408			0x1408
+-#define HUAWEI_PRODUCT_E1409			0x1409
+-#define HUAWEI_PRODUCT_E140A			0x140A
+-#define HUAWEI_PRODUCT_E140B			0x140B
+-#define HUAWEI_PRODUCT_E140C			0x140C
+-#define HUAWEI_PRODUCT_E140D			0x140D
+-#define HUAWEI_PRODUCT_E140E			0x140E
+-#define HUAWEI_PRODUCT_E140F			0x140F
+-#define HUAWEI_PRODUCT_E1410			0x1410
+-#define HUAWEI_PRODUCT_E1411			0x1411
+-#define HUAWEI_PRODUCT_E1412			0x1412
+-#define HUAWEI_PRODUCT_E1413			0x1413
+-#define HUAWEI_PRODUCT_E1414			0x1414
+-#define HUAWEI_PRODUCT_E1415			0x1415
+-#define HUAWEI_PRODUCT_E1416			0x1416
+-#define HUAWEI_PRODUCT_E1417			0x1417
+-#define HUAWEI_PRODUCT_E1418			0x1418
+-#define HUAWEI_PRODUCT_E1419			0x1419
+-#define HUAWEI_PRODUCT_E141A			0x141A
+-#define HUAWEI_PRODUCT_E141B			0x141B
+-#define HUAWEI_PRODUCT_E141C			0x141C
+-#define HUAWEI_PRODUCT_E141D			0x141D
+-#define HUAWEI_PRODUCT_E141E			0x141E
+-#define HUAWEI_PRODUCT_E141F			0x141F
+-#define HUAWEI_PRODUCT_E1420			0x1420
+-#define HUAWEI_PRODUCT_E1421			0x1421
+-#define HUAWEI_PRODUCT_E1422			0x1422
+-#define HUAWEI_PRODUCT_E1423			0x1423
+-#define HUAWEI_PRODUCT_E1424			0x1424
+-#define HUAWEI_PRODUCT_E1425			0x1425
+-#define HUAWEI_PRODUCT_E1426			0x1426
+-#define HUAWEI_PRODUCT_E1427			0x1427
+-#define HUAWEI_PRODUCT_E1428			0x1428
+-#define HUAWEI_PRODUCT_E1429			0x1429
+-#define HUAWEI_PRODUCT_E142A			0x142A
+-#define HUAWEI_PRODUCT_E142B			0x142B
+-#define HUAWEI_PRODUCT_E142C			0x142C
+-#define HUAWEI_PRODUCT_E142D			0x142D
+-#define HUAWEI_PRODUCT_E142E			0x142E
+-#define HUAWEI_PRODUCT_E142F			0x142F
+-#define HUAWEI_PRODUCT_E1430			0x1430
+-#define HUAWEI_PRODUCT_E1431			0x1431
+-#define HUAWEI_PRODUCT_E1432			0x1432
+-#define HUAWEI_PRODUCT_E1433			0x1433
+-#define HUAWEI_PRODUCT_E1434			0x1434
+-#define HUAWEI_PRODUCT_E1435			0x1435
+-#define HUAWEI_PRODUCT_E1436			0x1436
+-#define HUAWEI_PRODUCT_E1437			0x1437
+-#define HUAWEI_PRODUCT_E1438			0x1438
+-#define HUAWEI_PRODUCT_E1439			0x1439
+-#define HUAWEI_PRODUCT_E143A			0x143A
+-#define HUAWEI_PRODUCT_E143B			0x143B
+-#define HUAWEI_PRODUCT_E143C			0x143C
+-#define HUAWEI_PRODUCT_E143D			0x143D
+-#define HUAWEI_PRODUCT_E143E			0x143E
+-#define HUAWEI_PRODUCT_E143F			0x143F
+ #define HUAWEI_PRODUCT_K4505			0x1464
+ #define HUAWEI_PRODUCT_K3765			0x1465
+-#define HUAWEI_PRODUCT_E14AC			0x14AC
+-#define HUAWEI_PRODUCT_K3806			0x14AE
+ #define HUAWEI_PRODUCT_K4605			0x14C6
+-#define HUAWEI_PRODUCT_K3770			0x14C9
+-#define HUAWEI_PRODUCT_K3771			0x14CA
+-#define HUAWEI_PRODUCT_K4510			0x14CB
+-#define HUAWEI_PRODUCT_K4511			0x14CC
+-#define HUAWEI_PRODUCT_ETS1220			0x1803
+-#define HUAWEI_PRODUCT_E353			0x1506
+-#define HUAWEI_PRODUCT_E173S			0x1C05
+ 
+ #define QUANTA_VENDOR_ID			0x0408
+ #define QUANTA_PRODUCT_Q101			0xEA02
+@@ -614,101 +539,123 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE(QUANTA_VENDOR_ID, QUANTA_PRODUCT_GLX) },
+ 	{ USB_DEVICE(QUANTA_VENDOR_ID, QUANTA_PRODUCT_GKE) },
+ 	{ USB_DEVICE(QUANTA_VENDOR_ID, QUANTA_PRODUCT_GLE) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E600, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E220, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E220BIS, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1401, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1402, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1403, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1404, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1405, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1406, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1407, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1408, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1409, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E140A, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E140B, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E140C, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E140D, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E140E, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E140F, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1410, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1411, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1412, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1413, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1414, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1415, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1416, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1417, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1418, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1419, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E141A, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E141B, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E141C, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E141D, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E141E, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E141F, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1420, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1421, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1422, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1423, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1424, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1425, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1426, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1427, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1428, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1429, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E142A, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E142B, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E142C, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E142D, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E142E, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E142F, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1430, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1431, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1432, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1433, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1434, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1435, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1436, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1437, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1438, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1439, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E143A, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E143B, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E143C, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E143D, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E143E, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E143F, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E173S, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4505, 0xff, 0xff, 0xff),
+ 		.driver_info = (kernel_ulong_t) &huawei_cdc12_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3765, 0xff, 0xff, 0xff),
+ 		.driver_info = (kernel_ulong_t) &huawei_cdc12_blacklist },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_ETS1220, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E14AC, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3806, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0xff, 0xff),
+ 		.driver_info = (kernel_ulong_t) &huawei_cdc12_blacklist },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0x01, 0x31) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0x01, 0x32) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x31) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x32) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3771, 0xff, 0x02, 0x31) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3771, 0xff, 0x02, 0x32) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4510, 0xff, 0x01, 0x31) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4510, 0xff, 0x01, 0x32) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4511, 0xff, 0x01, 0x31) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4511, 0xff, 0x01, 0x32) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x01) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x02) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x03) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x10) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x12) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x13) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x02, 0x01) },  /* E398 3G Modem */
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x02, 0x02) },  /* E398 3G PC UI Interface */
+-	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x02, 0x03) },  /* E398 3G Application Interface */
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0xff, 0xff) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x01) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x02) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x03) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x04) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x05) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x06) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x0A) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x0B) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x0D) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x0E) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x0F) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x10) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x12) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x13) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x14) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x15) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x17) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x18) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x19) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x1A) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x1B) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x1C) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x31) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x32) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x33) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x34) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x35) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x36) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x3A) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x3B) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x3D) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x3E) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x3F) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x48) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x49) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x4A) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x4B) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x4C) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x61) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x62) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x63) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x64) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x65) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x66) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x6A) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x6B) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x6D) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x6E) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x6F) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x78) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x79) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x7A) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x7B) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x7C) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x01) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x02) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x03) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x04) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x05) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x06) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x0A) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x0B) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x0D) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x0E) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x0F) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x10) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x12) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x13) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x14) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x15) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x17) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x18) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x19) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x1A) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x1B) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x1C) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x31) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x32) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x33) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x34) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x35) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x36) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x3A) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x3B) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x3D) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x3E) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x3F) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x48) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x49) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x4A) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x4B) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x4C) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x61) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x62) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x63) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x64) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x65) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x66) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x6A) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x6B) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x6D) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x6E) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x6F) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x78) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x79) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x7A) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x7B) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x7C) },
++
++
+ 	{ USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V640) },
+ 	{ USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V620) },
+ 	{ USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V740) },
+diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
+index 8745637..bf9a9b7 100644
+--- a/drivers/video/console/fbcon.c
++++ b/drivers/video/console/fbcon.c
+@@ -373,8 +373,15 @@ static void fb_flashcursor(struct work_struct *work)
+ 	struct vc_data *vc = NULL;
+ 	int c;
+ 	int mode;
++	int ret;
++
++	/* FIXME: we should sort out the unbind locking instead */
++	/* instead we just fail to flash the cursor if we can't get
++	 * the lock instead of blocking fbcon deinit */
++	ret = console_trylock();
++	if (ret == 0)
++		return;
+ 
+-	console_lock();
+ 	if (ops && ops->currcon != -1)
+ 		vc = vc_cons[ops->currcon].d;
+ 
+diff --git a/fs/buffer.c b/fs/buffer.c
+index 4115eca..19a4f0b 100644
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -964,7 +964,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
+ /*
+  * Initialise the state of a blockdev page's buffers.
+  */ 
+-static void
++static sector_t
+ init_page_buffers(struct page *page, struct block_device *bdev,
+ 			sector_t block, int size)
+ {
+@@ -986,33 +986,41 @@ init_page_buffers(struct page *page, struct block_device *bdev,
+ 		block++;
+ 		bh = bh->b_this_page;
+ 	} while (bh != head);
++
++	/*
++	 * Caller needs to validate requested block against end of device.
++	 */
++	return end_block;
+ }
+ 
+ /*
+  * Create the page-cache page that contains the requested block.
+  *
+- * This is user purely for blockdev mappings.
++ * This is used purely for blockdev mappings.
+  */
+-static struct page *
++static int
+ grow_dev_page(struct block_device *bdev, sector_t block,
+-		pgoff_t index, int size)
++		pgoff_t index, int size, int sizebits)
+ {
+ 	struct inode *inode = bdev->bd_inode;
+ 	struct page *page;
+ 	struct buffer_head *bh;
++	sector_t end_block;
++	int ret = 0;		/* Will call free_more_memory() */
+ 
+ 	page = find_or_create_page(inode->i_mapping, index,
+ 		(mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
+ 	if (!page)
+-		return NULL;
++		return ret;
+ 
+ 	BUG_ON(!PageLocked(page));
+ 
+ 	if (page_has_buffers(page)) {
+ 		bh = page_buffers(page);
+ 		if (bh->b_size == size) {
+-			init_page_buffers(page, bdev, block, size);
+-			return page;
++			end_block = init_page_buffers(page, bdev,
++						index << sizebits, size);
++			goto done;
+ 		}
+ 		if (!try_to_free_buffers(page))
+ 			goto failed;
+@@ -1032,15 +1040,14 @@ grow_dev_page(struct block_device *bdev, sector_t block,
+ 	 */
+ 	spin_lock(&inode->i_mapping->private_lock);
+ 	link_dev_buffers(page, bh);
+-	init_page_buffers(page, bdev, block, size);
++	end_block = init_page_buffers(page, bdev, index << sizebits, size);
+ 	spin_unlock(&inode->i_mapping->private_lock);
+-	return page;
+-
++done:
++	ret = (block < end_block) ? 1 : -ENXIO;
+ failed:
+-	BUG();
+ 	unlock_page(page);
+ 	page_cache_release(page);
+-	return NULL;
++	return ret;
+ }
+ 
+ /*
+@@ -1050,7 +1057,6 @@ failed:
+ static int
+ grow_buffers(struct block_device *bdev, sector_t block, int size)
+ {
+-	struct page *page;
+ 	pgoff_t index;
+ 	int sizebits;
+ 
+@@ -1074,22 +1080,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
+ 			bdevname(bdev, b));
+ 		return -EIO;
+ 	}
+-	block = index << sizebits;
++
+ 	/* Create a page with the proper size buffers.. */
+-	page = grow_dev_page(bdev, block, index, size);
+-	if (!page)
+-		return 0;
+-	unlock_page(page);
+-	page_cache_release(page);
+-	return 1;
++	return grow_dev_page(bdev, block, index, size, sizebits);
+ }
+ 
+ static struct buffer_head *
+ __getblk_slow(struct block_device *bdev, sector_t block, int size)
+ {
+-	int ret;
+-	struct buffer_head *bh;
+-
+ 	/* Size must be multiple of hard sectorsize */
+ 	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
+ 			(size < 512 || size > PAGE_SIZE))) {
+@@ -1102,21 +1100,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
+ 		return NULL;
+ 	}
+ 
+-retry:
+-	bh = __find_get_block(bdev, block, size);
+-	if (bh)
+-		return bh;
++	for (;;) {
++		struct buffer_head *bh;
++		int ret;
+ 
+-	ret = grow_buffers(bdev, block, size);
+-	if (ret == 0) {
+-		free_more_memory();
+-		goto retry;
+-	} else if (ret > 0) {
+ 		bh = __find_get_block(bdev, block, size);
+ 		if (bh)
+ 			return bh;
++
++		ret = grow_buffers(bdev, block, size);
++		if (ret < 0)
++			return NULL;
++		if (ret == 0)
++			free_more_memory();
+ 	}
+-	return NULL;
+ }
+ 
+ /*
+@@ -1372,10 +1369,6 @@ EXPORT_SYMBOL(__find_get_block);
+  * which corresponds to the passed block_device, block and size. The
+  * returned buffer has its reference count incremented.
+  *
+- * __getblk() cannot fail - it just keeps trying.  If you pass it an
+- * illegal block number, __getblk() will happily return a buffer_head
+- * which represents the non-existent block.  Very weird.
+- *
+  * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
+  * attempt is failing.  FIXME, perhaps?
+  */
+diff --git a/fs/compat.c b/fs/compat.c
+index c987875..e07a3d3 100644
+--- a/fs/compat.c
++++ b/fs/compat.c
+@@ -1174,11 +1174,14 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
+ 	struct file *file;
+ 	int fput_needed;
+ 	ssize_t ret;
++	loff_t pos;
+ 
+ 	file = fget_light(fd, &fput_needed);
+ 	if (!file)
+ 		return -EBADF;
+-	ret = compat_readv(file, vec, vlen, &file->f_pos);
++	pos = file->f_pos;
++	ret = compat_readv(file, vec, vlen, &pos);
++	file->f_pos = pos;
+ 	fput_light(file, fput_needed);
+ 	return ret;
+ }
+@@ -1233,11 +1236,14 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
+ 	struct file *file;
+ 	int fput_needed;
+ 	ssize_t ret;
++	loff_t pos;
+ 
+ 	file = fget_light(fd, &fput_needed);
+ 	if (!file)
+ 		return -EBADF;
+-	ret = compat_writev(file, vec, vlen, &file->f_pos);
++	pos = file->f_pos;
++	ret = compat_writev(file, vec, vlen, &pos);
++	file->f_pos = pos;
+ 	fput_light(file, fput_needed);
+ 	return ret;
+ }
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index a071348..f8d5fce 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -904,6 +904,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
+ 	ei->i_reserved_meta_blocks = 0;
+ 	ei->i_allocated_meta_blocks = 0;
+ 	ei->i_da_metadata_calc_len = 0;
++	ei->i_da_metadata_calc_last_lblock = 0;
+ 	spin_lock_init(&(ei->i_block_reservation_lock));
+ #ifdef CONFIG_QUOTA
+ 	ei->i_reserved_quota = 0;
+@@ -3107,6 +3108,10 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp,
+ 	ext4_group_t		i, ngroups = ext4_get_groups_count(sb);
+ 	int			s, j, count = 0;
+ 
++	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
++		return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
++			sbi->s_itb_per_group + 2);
++
+ 	first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
+ 		(grp * EXT4_BLOCKS_PER_GROUP(sb));
+ 	last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
+diff --git a/fs/fuse/file.c b/fs/fuse/file.c
+index 0c84100..5242006 100644
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -1687,7 +1687,7 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
+ 	size_t n;
+ 	u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
+ 
+-	for (n = 0; n < count; n++) {
++	for (n = 0; n < count; n++, iov++) {
+ 		if (iov->iov_len > (size_t) max)
+ 			return -ENOMEM;
+ 		max -= iov->iov_len;
+diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
+index 3db6b82..d774309 100644
+--- a/fs/nfs/blocklayout/blocklayout.c
++++ b/fs/nfs/blocklayout/blocklayout.c
+@@ -38,6 +38,8 @@
+ #include <linux/buffer_head.h>	/* various write calls */
+ #include <linux/prefetch.h>
+ 
++#include "../pnfs.h"
++#include "../internal.h"
+ #include "blocklayout.h"
+ 
+ #define NFSDBG_FACILITY	NFSDBG_PNFS_LD
+@@ -814,7 +816,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
+ 	 * GETDEVICEINFO's maxcount
+ 	 */
+ 	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
+-	max_pages = max_resp_sz >> PAGE_SHIFT;
++	max_pages = nfs_page_array_len(0, max_resp_sz);
+ 	dprintk("%s max_resp_sz %u max_pages %d\n",
+ 		__func__, max_resp_sz, max_pages);
+ 
+diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
+index c69682a..4e2ee99 100644
+--- a/fs/nfs/blocklayout/extents.c
++++ b/fs/nfs/blocklayout/extents.c
+@@ -153,7 +153,7 @@ static int _preload_range(struct pnfs_inval_markings *marks,
+ 	count = (int)(end - start) / (int)tree->mtt_step_size;
+ 
+ 	/* Pre-malloc what memory we might need */
+-	storage = kmalloc(sizeof(*storage) * count, GFP_NOFS);
++	storage = kcalloc(count, sizeof(*storage), GFP_NOFS);
+ 	if (!storage)
+ 		return -ENOMEM;
+ 	for (i = 0; i < count; i++) {
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index ac28990..756f4df 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -1103,7 +1103,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
+ 	struct nfs_fattr *fattr = NULL;
+ 	int error;
+ 
+-	if (nd->flags & LOOKUP_RCU)
++	if (nd && (nd->flags & LOOKUP_RCU))
+ 		return -ECHILD;
+ 
+ 	parent = dget_parent(dentry);
+@@ -1508,7 +1508,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
+ 	struct nfs_open_context *ctx;
+ 	int openflags, ret = 0;
+ 
+-	if (nd->flags & LOOKUP_RCU)
++	if (nd && (nd->flags & LOOKUP_RCU))
+ 		return -ECHILD;
+ 
+ 	inode = dentry->d_inode;
+diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
+index d4bc9ed9..5195fd6 100644
+--- a/fs/nfs/nfs3proc.c
++++ b/fs/nfs/nfs3proc.c
+@@ -68,7 +68,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
+ 	nfs_fattr_init(info->fattr);
+ 	status = rpc_call_sync(client, &msg, 0);
+ 	dprintk("%s: reply fsinfo: %d\n", __func__, status);
+-	if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
++	if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) {
+ 		msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+ 		msg.rpc_resp = info->fattr;
+ 		status = rpc_call_sync(client, &msg, 0);
+diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
+index ed388aa..bd5d9cf 100644
+--- a/fs/nfs/nfs4filelayoutdev.c
++++ b/fs/nfs/nfs4filelayoutdev.c
+@@ -721,7 +721,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_fla
+ 	 * GETDEVICEINFO's maxcount
+ 	 */
+ 	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
+-	max_pages = max_resp_sz >> PAGE_SHIFT;
++	max_pages = nfs_page_array_len(0, max_resp_sz);
+ 	dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
+ 		__func__, inode, max_resp_sz, max_pages);
+ 
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index 8000459..d20221d 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -5769,11 +5769,58 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
+ 	dprintk("<-- %s\n", __func__);
+ }
+ 
++static size_t max_response_pages(struct nfs_server *server)
++{
++	u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
++	return nfs_page_array_len(0, max_resp_sz);
++}
++
++static void nfs4_free_pages(struct page **pages, size_t size)
++{
++	int i;
++
++	if (!pages)
++		return;
++
++	for (i = 0; i < size; i++) {
++		if (!pages[i])
++			break;
++		__free_page(pages[i]);
++	}
++	kfree(pages);
++}
++
++static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
++{
++	struct page **pages;
++	int i;
++
++	pages = kcalloc(size, sizeof(struct page *), gfp_flags);
++	if (!pages) {
++		dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
++		return NULL;
++	}
++
++	for (i = 0; i < size; i++) {
++		pages[i] = alloc_page(gfp_flags);
++		if (!pages[i]) {
++			dprintk("%s: failed to allocate page\n", __func__);
++			nfs4_free_pages(pages, size);
++			return NULL;
++		}
++	}
++
++	return pages;
++}
++
+ static void nfs4_layoutget_release(void *calldata)
+ {
+ 	struct nfs4_layoutget *lgp = calldata;
++	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
++	size_t max_pages = max_response_pages(server);
+ 
+ 	dprintk("--> %s\n", __func__);
++	nfs4_free_pages(lgp->args.layout.pages, max_pages);
+ 	put_nfs_open_context(lgp->args.ctx);
+ 	kfree(calldata);
+ 	dprintk("<-- %s\n", __func__);
+@@ -5785,9 +5832,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
+ 	.rpc_release = nfs4_layoutget_release,
+ };
+ 
+-int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
++int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
+ {
+ 	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
++	size_t max_pages = max_response_pages(server);
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
+@@ -5805,6 +5853,13 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
+ 
+ 	dprintk("--> %s\n", __func__);
+ 
++	lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
++	if (!lgp->args.layout.pages) {
++		nfs4_layoutget_release(lgp);
++		return -ENOMEM;
++	}
++	lgp->args.layout.pglen = max_pages * PAGE_SIZE;
++
+ 	lgp->res.layoutp = &lgp->args.layout;
+ 	lgp->res.seq_res.sr_slot = NULL;
+ 	task = rpc_run_task(&task_setup_data);
+diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
+index f881a63..3ad6595 100644
+--- a/fs/nfs/pnfs.c
++++ b/fs/nfs/pnfs.c
+@@ -575,9 +575,6 @@ send_layoutget(struct pnfs_layout_hdr *lo,
+ 	struct nfs_server *server = NFS_SERVER(ino);
+ 	struct nfs4_layoutget *lgp;
+ 	struct pnfs_layout_segment *lseg = NULL;
+-	struct page **pages = NULL;
+-	int i;
+-	u32 max_resp_sz, max_pages;
+ 
+ 	dprintk("--> %s\n", __func__);
+ 
+@@ -586,20 +583,6 @@ send_layoutget(struct pnfs_layout_hdr *lo,
+ 	if (lgp == NULL)
+ 		return NULL;
+ 
+-	/* allocate pages for xdr post processing */
+-	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
+-	max_pages = max_resp_sz >> PAGE_SHIFT;
+-
+-	pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
+-	if (!pages)
+-		goto out_err_free;
+-
+-	for (i = 0; i < max_pages; i++) {
+-		pages[i] = alloc_page(gfp_flags);
+-		if (!pages[i])
+-			goto out_err_free;
+-	}
+-
+ 	lgp->args.minlength = PAGE_CACHE_SIZE;
+ 	if (lgp->args.minlength > range->length)
+ 		lgp->args.minlength = range->length;
+@@ -608,39 +591,19 @@ send_layoutget(struct pnfs_layout_hdr *lo,
+ 	lgp->args.type = server->pnfs_curr_ld->id;
+ 	lgp->args.inode = ino;
+ 	lgp->args.ctx = get_nfs_open_context(ctx);
+-	lgp->args.layout.pages = pages;
+-	lgp->args.layout.pglen = max_pages * PAGE_SIZE;
+ 	lgp->lsegpp = &lseg;
+ 	lgp->gfp_flags = gfp_flags;
+ 
+ 	/* Synchronously retrieve layout information from server and
+ 	 * store in lseg.
+ 	 */
+-	nfs4_proc_layoutget(lgp);
++	nfs4_proc_layoutget(lgp, gfp_flags);
+ 	if (!lseg) {
+ 		/* remember that LAYOUTGET failed and suspend trying */
+ 		set_bit(lo_fail_bit(range->iomode), &lo->plh_flags);
+ 	}
+ 
+-	/* free xdr pages */
+-	for (i = 0; i < max_pages; i++)
+-		__free_page(pages[i]);
+-	kfree(pages);
+-
+ 	return lseg;
+-
+-out_err_free:
+-	/* free any allocated xdr pages, lgp as it's not used */
+-	if (pages) {
+-		for (i = 0; i < max_pages; i++) {
+-			if (!pages[i])
+-				break;
+-			__free_page(pages[i]);
+-		}
+-		kfree(pages);
+-	}
+-	kfree(lgp);
+-	return NULL;
+ }
+ 
+ /* Initiates a LAYOUTRETURN(FILE) */
+diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
+index 53d593a..c946b1b 100644
+--- a/fs/nfs/pnfs.h
++++ b/fs/nfs/pnfs.h
+@@ -162,7 +162,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server,
+ 				   struct pnfs_devicelist *devlist);
+ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
+ 				   struct pnfs_device *dev);
+-extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
++extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags);
+ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
+ 
+ /* pnfs.c */
+diff --git a/fs/nfs/super.c b/fs/nfs/super.c
+index 376cd65..6e85ec6 100644
+--- a/fs/nfs/super.c
++++ b/fs/nfs/super.c
+@@ -3087,4 +3087,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
+ 	return res;
+ }
+ 
++MODULE_ALIAS("nfs4");
++
+ #endif /* CONFIG_NFS_V4 */
+diff --git a/fs/nfs/write.c b/fs/nfs/write.c
+index c6e523a..301391a 100644
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -1742,12 +1742,12 @@ int __init nfs_init_writepagecache(void)
+ 	nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
+ 						     nfs_wdata_cachep);
+ 	if (nfs_wdata_mempool == NULL)
+-		return -ENOMEM;
++		goto out_destroy_write_cache;
+ 
+ 	nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
+ 						      nfs_wdata_cachep);
+ 	if (nfs_commit_mempool == NULL)
+-		return -ENOMEM;
++		goto out_destroy_write_mempool;
+ 
+ 	/*
+ 	 * NFS congestion size, scale with available memory.
+@@ -1770,6 +1770,12 @@ int __init nfs_init_writepagecache(void)
+ 		nfs_congestion_kb = 256*1024;
+ 
+ 	return 0;
++
++out_destroy_write_mempool:
++	mempool_destroy(nfs_wdata_mempool);
++out_destroy_write_cache:
++	kmem_cache_destroy(nfs_wdata_cachep);
++	return -ENOMEM;
+ }
+ 
+ void nfs_destroy_writepagecache(void)
+diff --git a/fs/open.c b/fs/open.c
+index e2b5d51..b8485d3 100644
+--- a/fs/open.c
++++ b/fs/open.c
+@@ -882,9 +882,10 @@ static inline int build_open_flags(int flags, int mode, struct open_flags *op)
+ 	int lookup_flags = 0;
+ 	int acc_mode;
+ 
+-	if (!(flags & O_CREAT))
+-		mode = 0;
+-	op->mode = mode;
++	if (flags & O_CREAT)
++		op->mode = (mode & S_IALLUGO) | S_IFREG;
++	else
++		op->mode = 0;
+ 
+ 	/* Must never be set by userspace */
+ 	flags &= ~FMODE_NONOTIFY;
+diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
+index 2da1715..4619247 100644
+--- a/fs/squashfs/super.c
++++ b/fs/squashfs/super.c
+@@ -290,7 +290,7 @@ handle_fragments:
+ 
+ check_directory_table:
+ 	/* Sanity check directory_table */
+-	if (msblk->directory_table >= next_table) {
++	if (msblk->directory_table > next_table) {
+ 		err = -EINVAL;
+ 		goto failed_mount;
+ 	}
+diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
+index 580a6d3..c04e0db 100644
+--- a/include/asm-generic/mutex-xchg.h
++++ b/include/asm-generic/mutex-xchg.h
+@@ -26,7 +26,13 @@ static inline void
+ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
+ {
+ 	if (unlikely(atomic_xchg(count, 0) != 1))
+-		fail_fn(count);
++		/*
++		 * We failed to acquire the lock, so mark it contended
++		 * to ensure that any waiting tasks are woken up by the
++		 * unlock slow path.
++		 */
++		if (likely(atomic_xchg(count, -1) != 1))
++			fail_fn(count);
+ }
+ 
+ /**
+@@ -43,7 +49,8 @@ static inline int
+ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+ {
+ 	if (unlikely(atomic_xchg(count, 0) != 1))
+-		return fail_fn(count);
++		if (likely(atomic_xchg(count, -1) != 1))
++			return fail_fn(count);
+ 	return 0;
+ }
+ 
+diff --git a/include/linux/usb.h b/include/linux/usb.h
+index 4269c3f..93629fc 100644
+--- a/include/linux/usb.h
++++ b/include/linux/usb.h
+@@ -775,6 +775,27 @@ static inline int usb_make_path(struct usb_device *dev, char *buf, size_t size)
+ 	.bInterfaceSubClass = (sc), \
+ 	.bInterfaceProtocol = (pr)
+ 
++/**
++ * USB_VENDOR_AND_INTERFACE_INFO - describe a specific usb vendor with a class of usb interfaces
++ * @vend: the 16 bit USB Vendor ID
++ * @cl: bInterfaceClass value
++ * @sc: bInterfaceSubClass value
++ * @pr: bInterfaceProtocol value
++ *
++ * This macro is used to create a struct usb_device_id that matches a
++ * specific vendor with a specific class of interfaces.
++ *
++ * This is especially useful when explicitly matching devices that have
++ * vendor specific bDeviceClass values, but standards-compliant interfaces.
++ */
++#define USB_VENDOR_AND_INTERFACE_INFO(vend, cl, sc, pr) \
++	.match_flags = USB_DEVICE_ID_MATCH_INT_INFO \
++		| USB_DEVICE_ID_MATCH_VENDOR, \
++	.idVendor = (vend), \
++	.bInterfaceClass = (cl), \
++	.bInterfaceSubClass = (sc), \
++	.bInterfaceProtocol = (pr)
++
+ /* ----------------------------------------------------------------------- */
+ 
+ /* Stuff for dynamic usb ids */
+diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
+index 5bf0790..31fdc48 100644
+--- a/kernel/audit_tree.c
++++ b/kernel/audit_tree.c
+@@ -250,7 +250,6 @@ static void untag_chunk(struct node *p)
+ 		spin_unlock(&hash_lock);
+ 		spin_unlock(&entry->lock);
+ 		fsnotify_destroy_mark(entry);
+-		fsnotify_put_mark(entry);
+ 		goto out;
+ 	}
+ 
+@@ -259,7 +258,7 @@ static void untag_chunk(struct node *p)
+ 
+ 	fsnotify_duplicate_mark(&new->mark, entry);
+ 	if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) {
+-		free_chunk(new);
++		fsnotify_put_mark(&new->mark);
+ 		goto Fallback;
+ 	}
+ 
+@@ -293,7 +292,6 @@ static void untag_chunk(struct node *p)
+ 	spin_unlock(&hash_lock);
+ 	spin_unlock(&entry->lock);
+ 	fsnotify_destroy_mark(entry);
+-	fsnotify_put_mark(entry);
+ 	goto out;
+ 
+ Fallback:
+@@ -322,7 +320,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
+ 
+ 	entry = &chunk->mark;
+ 	if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) {
+-		free_chunk(chunk);
++		fsnotify_put_mark(entry);
+ 		return -ENOSPC;
+ 	}
+ 
+@@ -332,6 +330,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
+ 		spin_unlock(&hash_lock);
+ 		chunk->dead = 1;
+ 		spin_unlock(&entry->lock);
++		fsnotify_get_mark(entry);
+ 		fsnotify_destroy_mark(entry);
+ 		fsnotify_put_mark(entry);
+ 		return 0;
+@@ -396,7 +395,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
+ 	fsnotify_duplicate_mark(chunk_entry, old_entry);
+ 	if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) {
+ 		spin_unlock(&old_entry->lock);
+-		free_chunk(chunk);
++		fsnotify_put_mark(chunk_entry);
+ 		fsnotify_put_mark(old_entry);
+ 		return -ENOSPC;
+ 	}
+@@ -412,6 +411,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
+ 		spin_unlock(&chunk_entry->lock);
+ 		spin_unlock(&old_entry->lock);
+ 
++		fsnotify_get_mark(chunk_entry);
+ 		fsnotify_destroy_mark(chunk_entry);
+ 
+ 		fsnotify_put_mark(chunk_entry);
+@@ -445,7 +445,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
+ 	spin_unlock(&old_entry->lock);
+ 	fsnotify_destroy_mark(old_entry);
+ 	fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
+-	fsnotify_put_mark(old_entry); /* and kill it */
+ 	return 0;
+ }
+ 
+diff --git a/kernel/sched.c b/kernel/sched.c
+index e0431c4..910db7d 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -4355,6 +4355,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+ # define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
+ #endif
+ 
++static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
++{
++	u64 temp = (__force u64) rtime;
++
++	temp *= (__force u64) utime;
++
++	if (sizeof(cputime_t) == 4)
++		temp = div_u64(temp, (__force u32) total);
++	else
++		temp = div64_u64(temp, (__force u64) total);
++
++	return (__force cputime_t) temp;
++}
++
+ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+ {
+ 	cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime);
+@@ -4364,13 +4378,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+ 	 */
+ 	rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
+ 
+-	if (total) {
+-		u64 temp = rtime;
+-
+-		temp *= utime;
+-		do_div(temp, total);
+-		utime = (cputime_t)temp;
+-	} else
++	if (total)
++		utime = scale_utime(utime, rtime, total);
++	else
+ 		utime = rtime;
+ 
+ 	/*
+@@ -4397,13 +4407,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+ 	total = cputime_add(cputime.utime, cputime.stime);
+ 	rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
+ 
+-	if (total) {
+-		u64 temp = rtime;
+-
+-		temp *= cputime.utime;
+-		do_div(temp, total);
+-		utime = (cputime_t)temp;
+-	} else
++	if (total)
++		utime = scale_utime(cputime.utime, rtime, total);
++	else
+ 		utime = rtime;
+ 
+ 	sig->prev_utime = max(sig->prev_utime, utime);
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 48febd7..86eb848 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1977,10 +1977,10 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
+ 	 * proportional to the fraction of recently scanned pages on
+ 	 * each list that were recently referenced and in active use.
+ 	 */
+-	ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1);
++	ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1);
+ 	ap /= reclaim_stat->recent_rotated[0] + 1;
+ 
+-	fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
++	fp = file_prio * (reclaim_stat->recent_scanned[1] + 1);
+ 	fp /= reclaim_stat->recent_rotated[1] + 1;
+ 	spin_unlock_irq(&zone->lru_lock);
+ 
+@@ -1993,7 +1993,7 @@ out:
+ 		unsigned long scan;
+ 
+ 		scan = zone_nr_lru_pages(zone, sc, l);
+-		if (priority || noswap) {
++		if (priority || noswap || !vmscan_swappiness(sc)) {
+ 			scan >>= priority;
+ 			if (!scan && force_scan)
+ 				scan = SWAP_CLUSTER_MAX;
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index 643a41b..6033f02 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -1411,7 +1411,13 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
+ 		if (conn->type == ACL_LINK) {
+ 			conn->state = BT_CONFIG;
+ 			hci_conn_hold(conn);
+-			conn->disc_timeout = HCI_DISCONN_TIMEOUT;
++
++			if (!conn->out &&
++			    !(conn->ssp_mode && conn->hdev->ssp_mode) &&
++			    !hci_find_link_key(hdev, &ev->bdaddr))
++				conn->disc_timeout = HCI_PAIRING_TIMEOUT;
++			else
++				conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+ 			mgmt_connected(hdev->id, &ev->bdaddr, conn->type);
+ 		} else
+ 			conn->state = BT_CONNECTED;
+diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
+index 17b5b1c..dd76177 100644
+--- a/net/bluetooth/l2cap_core.c
++++ b/net/bluetooth/l2cap_core.c
+@@ -862,6 +862,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn)
+ 	write_lock_bh(&conn->chan_lock);
+ 
+ 	hci_conn_hold(conn->hcon);
++	conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
+ 
+ 	bacpy(&bt_sk(sk)->src, conn->src);
+ 	bacpy(&bt_sk(sk)->dst, conn->dst);
+@@ -2263,12 +2264,14 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len)
+ 	while (len >= L2CAP_CONF_OPT_SIZE) {
+ 		len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val);
+ 
+-		switch (type) {
+-		case L2CAP_CONF_RFC:
+-			if (olen == sizeof(rfc))
+-				memcpy(&rfc, (void *)val, olen);
+-			goto done;
+-		}
++		if (type != L2CAP_CONF_RFC)
++			continue;
++
++		if (olen != sizeof(rfc))
++			break;
++
++		memcpy(&rfc, (void *)val, olen);
++		goto done;
+ 	}
+ 
+ 	/* Use sane default values in case a misbehaving remote device
+diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
+index 75c3582..fb85d37 100644
+--- a/net/dccp/ccid.h
++++ b/net/dccp/ccid.h
+@@ -246,7 +246,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
+ 					u32 __user *optval, int __user *optlen)
+ {
+ 	int rc = -ENOPROTOOPT;
+-	if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
++	if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
+ 		rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len,
+ 						 optval, optlen);
+ 	return rc;
+@@ -257,7 +257,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk,
+ 					u32 __user *optval, int __user *optlen)
+ {
+ 	int rc = -ENOPROTOOPT;
+-	if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
++	if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
+ 		rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len,
+ 						 optval, optlen);
+ 	return rc;
+diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
+index 9ed2cd0..3282453 100644
+--- a/net/sunrpc/svc_xprt.c
++++ b/net/sunrpc/svc_xprt.c
+@@ -315,7 +315,6 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
+  */
+ void svc_xprt_enqueue(struct svc_xprt *xprt)
+ {
+-	struct svc_serv	*serv = xprt->xpt_server;
+ 	struct svc_pool *pool;
+ 	struct svc_rqst	*rqstp;
+ 	int cpu;
+@@ -361,8 +360,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
+ 				rqstp, rqstp->rq_xprt);
+ 		rqstp->rq_xprt = xprt;
+ 		svc_xprt_get(xprt);
+-		rqstp->rq_reserved = serv->sv_max_mesg;
+-		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+ 		pool->sp_stats.threads_woken++;
+ 		wake_up(&rqstp->rq_wait);
+ 	} else {
+@@ -642,8 +639,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
+ 	if (xprt) {
+ 		rqstp->rq_xprt = xprt;
+ 		svc_xprt_get(xprt);
+-		rqstp->rq_reserved = serv->sv_max_mesg;
+-		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+ 
+ 		/* As there is a shortage of threads and this request
+ 		 * had to be queued, don't allow the thread to wait so
+@@ -740,6 +735,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
+ 		else
+ 			len = xprt->xpt_ops->xpo_recvfrom(rqstp);
+ 		dprintk("svc: got len=%d\n", len);
++		rqstp->rq_reserved = serv->sv_max_mesg;
++		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+ 	}
+ 	svc_xprt_received(xprt);
+ 
+@@ -796,7 +793,8 @@ int svc_send(struct svc_rqst *rqstp)
+ 
+ 	/* Grab mutex to serialize outgoing data. */
+ 	mutex_lock(&xprt->xpt_mutex);
+-	if (test_bit(XPT_DEAD, &xprt->xpt_flags))
++	if (test_bit(XPT_DEAD, &xprt->xpt_flags)
++			|| test_bit(XPT_CLOSE, &xprt->xpt_flags))
+ 		len = -ENOTCONN;
+ 	else
+ 		len = xprt->xpt_ops->xpo_sendto(rqstp);
+diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
+index 71bed1c..296192c 100644
+--- a/net/sunrpc/svcsock.c
++++ b/net/sunrpc/svcsock.c
+@@ -1136,9 +1136,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
+ 	if (len >= 0)
+ 		svsk->sk_tcplen += len;
+ 	if (len != want) {
++		svc_tcp_save_pages(svsk, rqstp);
+ 		if (len < 0 && len != -EAGAIN)
+ 			goto err_other;
+-		svc_tcp_save_pages(svsk, rqstp);
+ 		dprintk("svc: incomplete TCP record (%d of %d)\n",
+ 			svsk->sk_tcplen, svsk->sk_reclen);
+ 		goto err_noclose;
+diff --git a/sound/pci/hda/hda_proc.c b/sound/pci/hda/hda_proc.c
+index 254ab52..2210b83 100644
+--- a/sound/pci/hda/hda_proc.c
++++ b/sound/pci/hda/hda_proc.c
+@@ -412,7 +412,7 @@ static void print_digital_conv(struct snd_info_buffer *buffer,
+ 	if (digi1 & AC_DIG1_EMPHASIS)
+ 		snd_iprintf(buffer, " Preemphasis");
+ 	if (digi1 & AC_DIG1_COPYRIGHT)
+-		snd_iprintf(buffer, " Copyright");
++		snd_iprintf(buffer, " Non-Copyright");
+ 	if (digi1 & AC_DIG1_NONAUDIO)
+ 		snd_iprintf(buffer, " Non-Audio");
+ 	if (digi1 & AC_DIG1_PROFESSIONAL)
+diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
+index 35abe3c..b22989e 100644
+--- a/sound/pci/hda/patch_ca0132.c
++++ b/sound/pci/hda/patch_ca0132.c
+@@ -276,6 +276,10 @@ static int _add_switch(struct hda_codec *codec, hda_nid_t nid, const char *pfx,
+ 	int type = dir ? HDA_INPUT : HDA_OUTPUT;
+ 	struct snd_kcontrol_new knew =
+ 		HDA_CODEC_MUTE_MONO(namestr, nid, chan, 0, type);
++	if ((query_amp_caps(codec, nid, type) & AC_AMPCAP_MUTE) == 0) {
++		snd_printdd("Skipping '%s %s Switch' (no mute on node 0x%x)\n", pfx, dirstr[dir], nid);
++		return 0;
++	}
+ 	sprintf(namestr, "%s %s Switch", pfx, dirstr[dir]);
+ 	return snd_hda_ctl_add(codec, nid, snd_ctl_new1(&knew, codec));
+ }
+@@ -287,6 +291,10 @@ static int _add_volume(struct hda_codec *codec, hda_nid_t nid, const char *pfx,
+ 	int type = dir ? HDA_INPUT : HDA_OUTPUT;
+ 	struct snd_kcontrol_new knew =
+ 		HDA_CODEC_VOLUME_MONO(namestr, nid, chan, 0, type);
++	if ((query_amp_caps(codec, nid, type) & AC_AMPCAP_NUM_STEPS) == 0) {
++		snd_printdd("Skipping '%s %s Volume' (no amp on node 0x%x)\n", pfx, dirstr[dir], nid);
++		return 0;
++	}
+ 	sprintf(namestr, "%s %s Volume", pfx, dirstr[dir]);
+ 	return snd_hda_ctl_add(codec, nid, snd_ctl_new1(&knew, codec));
+ }
+diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c
+index 90117f8..90e5005 100644
+--- a/sound/soc/codecs/wm9712.c
++++ b/sound/soc/codecs/wm9712.c
+@@ -270,7 +270,7 @@ SOC_DAPM_ENUM("Route", wm9712_enum[9]);
+ 
+ /* Mic select */
+ static const struct snd_kcontrol_new wm9712_mic_src_controls =
+-SOC_DAPM_ENUM("Route", wm9712_enum[7]);
++SOC_DAPM_ENUM("Mic Source Select", wm9712_enum[7]);
+ 
+ /* diff select */
+ static const struct snd_kcontrol_new wm9712_diff_sel_controls =
+@@ -289,7 +289,9 @@ SND_SOC_DAPM_MUX("Left Capture Select", SND_SOC_NOPM, 0, 0,
+ 	&wm9712_capture_selectl_controls),
+ SND_SOC_DAPM_MUX("Right Capture Select", SND_SOC_NOPM, 0, 0,
+ 	&wm9712_capture_selectr_controls),
+-SND_SOC_DAPM_MUX("Mic Select Source", SND_SOC_NOPM, 0, 0,
++SND_SOC_DAPM_MUX("Left Mic Select Source", SND_SOC_NOPM, 0, 0,
++	&wm9712_mic_src_controls),
++SND_SOC_DAPM_MUX("Right Mic Select Source", SND_SOC_NOPM, 0, 0,
+ 	&wm9712_mic_src_controls),
+ SND_SOC_DAPM_MUX("Differential Source", SND_SOC_NOPM, 0, 0,
+ 	&wm9712_diff_sel_controls),
+@@ -317,6 +319,7 @@ SND_SOC_DAPM_PGA("Out 3 PGA", AC97_INT_PAGING, 5, 1, NULL, 0),
+ SND_SOC_DAPM_PGA("Line PGA", AC97_INT_PAGING, 2, 1, NULL, 0),
+ SND_SOC_DAPM_PGA("Phone PGA", AC97_INT_PAGING, 1, 1, NULL, 0),
+ SND_SOC_DAPM_PGA("Mic PGA", AC97_INT_PAGING, 0, 1, NULL, 0),
++SND_SOC_DAPM_PGA("Differential Mic", SND_SOC_NOPM, 0, 0, NULL, 0),
+ SND_SOC_DAPM_MICBIAS("Mic Bias", AC97_INT_PAGING, 10, 1),
+ SND_SOC_DAPM_OUTPUT("MONOOUT"),
+ SND_SOC_DAPM_OUTPUT("HPOUTL"),
+@@ -377,6 +380,18 @@ static const struct snd_soc_dapm_route wm9712_audio_map[] = {
+ 	{"Mic PGA", NULL, "MIC1"},
+ 	{"Mic PGA", NULL, "MIC2"},
+ 
++	/* microphones */
++	{"Differential Mic", NULL, "MIC1"},
++	{"Differential Mic", NULL, "MIC2"},
++	{"Left Mic Select Source", "Mic 1", "MIC1"},
++	{"Left Mic Select Source", "Mic 2", "MIC2"},
++	{"Left Mic Select Source", "Stereo", "MIC1"},
++	{"Left Mic Select Source", "Differential", "Differential Mic"},
++	{"Right Mic Select Source", "Mic 1", "MIC1"},
++	{"Right Mic Select Source", "Mic 2", "MIC2"},
++	{"Right Mic Select Source", "Stereo", "MIC2"},
++	{"Right Mic Select Source", "Differential", "Differential Mic"},
++
+ 	/* left capture selector */
+ 	{"Left Capture Select", "Mic", "MIC1"},
+ 	{"Left Capture Select", "Speaker Mixer", "Speaker Mixer"},
diff --git a/3.2.34/bump/1029_linux-3.2.30.patch b/3.2.34/bump/1029_linux-3.2.30.patch
new file mode 100644
index 0000000..86aea4b
--- /dev/null
+++ b/3.2.34/bump/1029_linux-3.2.30.patch
@@ -0,0 +1,5552 @@
+diff --git a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
+index ab22fe6..e39a0c0 100644
+--- a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
++++ b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
+@@ -10,8 +10,8 @@ Required properties:
+ 
+ Optional properties:
+ - fsl,card-wired : Indicate the card is wired to host permanently
+-- fsl,cd-internal : Indicate to use controller internal card detection
+-- fsl,wp-internal : Indicate to use controller internal write protection
++- fsl,cd-controller : Indicate to use controller internal card detection
++- fsl,wp-controller : Indicate to use controller internal write protection
+ - cd-gpios : Specify GPIOs for card detection
+ - wp-gpios : Specify GPIOs for write protection
+ 
+@@ -21,8 +21,8 @@ esdhc@70004000 {
+ 	compatible = "fsl,imx51-esdhc";
+ 	reg = <0x70004000 0x4000>;
+ 	interrupts = <1>;
+-	fsl,cd-internal;
+-	fsl,wp-internal;
++	fsl,cd-controller;
++	fsl,wp-controller;
+ };
+ 
+ esdhc@70008000 {
+diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
+index 2871fd5..99d4e44 100644
+--- a/Documentation/i2c/busses/i2c-i801
++++ b/Documentation/i2c/busses/i2c-i801
+@@ -20,6 +20,8 @@ Supported adapters:
+   * Intel Patsburg (PCH)
+   * Intel DH89xxCC (PCH)
+   * Intel Panther Point (PCH)
++  * Intel Lynx Point (PCH)
++  * Intel Lynx Point-LP (PCH)
+    Datasheets: Publicly available at the Intel website
+ 
+ On Intel Patsburg and later chipsets, both the normal host SMBus controller
+diff --git a/Makefile b/Makefile
+index d96fc2a..9fd7e60 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 3
+ PATCHLEVEL = 2
+-SUBLEVEL = 29
++SUBLEVEL = 30
+ EXTRAVERSION =
+ NAME = Saber-toothed Squirrel
+ 
+diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
+index 987c72d..9fdc151 100644
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -2065,6 +2065,7 @@ source "drivers/cpufreq/Kconfig"
+ config CPU_FREQ_IMX
+ 	tristate "CPUfreq driver for i.MX CPUs"
+ 	depends on ARCH_MXC && CPU_FREQ
++	select CPU_FREQ_TABLE
+ 	help
+ 	  This enables the CPUfreq driver for i.MX CPUs.
+ 
+diff --git a/arch/arm/Makefile b/arch/arm/Makefile
+index dfcf3b0..362c7ca 100644
+--- a/arch/arm/Makefile
++++ b/arch/arm/Makefile
+@@ -284,10 +284,10 @@ zImage Image xipImage bootpImage uImage: vmlinux
+ zinstall uinstall install: vmlinux
+ 	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@
+ 
+-%.dtb:
++%.dtb: scripts
+ 	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@
+ 
+-dtbs:
++dtbs: scripts
+ 	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@
+ 
+ # We use MRPROPER_FILES and CLEAN_FILES now
+diff --git a/arch/arm/boot/dts/imx51-babbage.dts b/arch/arm/boot/dts/imx51-babbage.dts
+index f8766af..4790df2 100644
+--- a/arch/arm/boot/dts/imx51-babbage.dts
++++ b/arch/arm/boot/dts/imx51-babbage.dts
+@@ -29,8 +29,8 @@
+ 		aips@70000000 { /* aips-1 */
+ 			spba@70000000 {
+ 				esdhc@70004000 { /* ESDHC1 */
+-					fsl,cd-internal;
+-					fsl,wp-internal;
++					fsl,cd-controller;
++					fsl,wp-controller;
+ 					status = "okay";
+ 				};
+ 
+diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
+index 8512475..9b419ab 100644
+--- a/arch/arm/include/asm/pgtable.h
++++ b/arch/arm/include/asm/pgtable.h
+@@ -232,6 +232,18 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
+ #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext)
+ #define pte_clear(mm,addr,ptep)	set_pte_ext(ptep, __pte(0), 0)
+ 
++#define pte_none(pte)		(!pte_val(pte))
++#define pte_present(pte)	(pte_val(pte) & L_PTE_PRESENT)
++#define pte_write(pte)		(!(pte_val(pte) & L_PTE_RDONLY))
++#define pte_dirty(pte)		(pte_val(pte) & L_PTE_DIRTY)
++#define pte_young(pte)		(pte_val(pte) & L_PTE_YOUNG)
++#define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
++#define pte_special(pte)	(0)
++
++#define pte_present_user(pte) \
++	((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \
++	 (L_PTE_PRESENT | L_PTE_USER))
++
+ #if __LINUX_ARM_ARCH__ < 6
+ static inline void __sync_icache_dcache(pte_t pteval)
+ {
+@@ -243,25 +255,15 @@ extern void __sync_icache_dcache(pte_t pteval);
+ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ 			      pte_t *ptep, pte_t pteval)
+ {
+-	if (addr >= TASK_SIZE)
+-		set_pte_ext(ptep, pteval, 0);
+-	else {
++	unsigned long ext = 0;
++
++	if (addr < TASK_SIZE && pte_present_user(pteval)) {
+ 		__sync_icache_dcache(pteval);
+-		set_pte_ext(ptep, pteval, PTE_EXT_NG);
++		ext |= PTE_EXT_NG;
+ 	}
+-}
+ 
+-#define pte_none(pte)		(!pte_val(pte))
+-#define pte_present(pte)	(pte_val(pte) & L_PTE_PRESENT)
+-#define pte_write(pte)		(!(pte_val(pte) & L_PTE_RDONLY))
+-#define pte_dirty(pte)		(pte_val(pte) & L_PTE_DIRTY)
+-#define pte_young(pte)		(pte_val(pte) & L_PTE_YOUNG)
+-#define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
+-#define pte_special(pte)	(0)
+-
+-#define pte_present_user(pte) \
+-	((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \
+-	 (L_PTE_PRESENT | L_PTE_USER))
++	set_pte_ext(ptep, pteval, ext);
++}
+ 
+ #define PTE_BIT_FUNC(fn,op) \
+ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
+diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
+index 814a52a9..2bc1a8e 100644
+--- a/arch/arm/kernel/hw_breakpoint.c
++++ b/arch/arm/kernel/hw_breakpoint.c
+@@ -160,6 +160,12 @@ static int debug_arch_supported(void)
+ 		arch >= ARM_DEBUG_ARCH_V7_1;
+ }
+ 
++/* Can we determine the watchpoint access type from the fsr? */
++static int debug_exception_updates_fsr(void)
++{
++	return 0;
++}
++
+ /* Determine number of WRP registers available. */
+ static int get_num_wrp_resources(void)
+ {
+@@ -620,18 +626,35 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
+ 	info->address &= ~alignment_mask;
+ 	info->ctrl.len <<= offset;
+ 
+-	/*
+-	 * Currently we rely on an overflow handler to take
+-	 * care of single-stepping the breakpoint when it fires.
+-	 * In the case of userspace breakpoints on a core with V7 debug,
+-	 * we can use the mismatch feature as a poor-man's hardware
+-	 * single-step, but this only works for per-task breakpoints.
+-	 */
+-	if (!bp->overflow_handler && (arch_check_bp_in_kernelspace(bp) ||
+-	    !core_has_mismatch_brps() || !bp->hw.bp_target)) {
+-		pr_warning("overflow handler required but none found\n");
+-		ret = -EINVAL;
++	if (!bp->overflow_handler) {
++		/*
++		 * Mismatch breakpoints are required for single-stepping
++		 * breakpoints.
++		 */
++		if (!core_has_mismatch_brps())
++			return -EINVAL;
++
++		/* We don't allow mismatch breakpoints in kernel space. */
++		if (arch_check_bp_in_kernelspace(bp))
++			return -EPERM;
++
++		/*
++		 * Per-cpu breakpoints are not supported by our stepping
++		 * mechanism.
++		 */
++		if (!bp->hw.bp_target)
++			return -EINVAL;
++
++		/*
++		 * We only support specific access types if the fsr
++		 * reports them.
++		 */
++		if (!debug_exception_updates_fsr() &&
++		    (info->ctrl.type == ARM_BREAKPOINT_LOAD ||
++		     info->ctrl.type == ARM_BREAKPOINT_STORE))
++			return -EINVAL;
+ 	}
++
+ out:
+ 	return ret;
+ }
+@@ -707,10 +730,12 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
+ 				goto unlock;
+ 
+ 			/* Check that the access type matches. */
+-			access = (fsr & ARM_FSR_ACCESS_MASK) ? HW_BREAKPOINT_W :
+-				 HW_BREAKPOINT_R;
+-			if (!(access & hw_breakpoint_type(wp)))
+-				goto unlock;
++			if (debug_exception_updates_fsr()) {
++				access = (fsr & ARM_FSR_ACCESS_MASK) ?
++					  HW_BREAKPOINT_W : HW_BREAKPOINT_R;
++				if (!(access & hw_breakpoint_type(wp)))
++					goto unlock;
++			}
+ 
+ 			/* We have a winner. */
+ 			info->trigger = addr;
+diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
+index 8380bd1..7ac5dfd 100644
+--- a/arch/arm/kernel/traps.c
++++ b/arch/arm/kernel/traps.c
+@@ -380,20 +380,23 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+ #endif
+ 			instr = *(u32 *) pc;
+ 	} else if (thumb_mode(regs)) {
+-		get_user(instr, (u16 __user *)pc);
++		if (get_user(instr, (u16 __user *)pc))
++			goto die_sig;
+ 		if (is_wide_instruction(instr)) {
+ 			unsigned int instr2;
+-			get_user(instr2, (u16 __user *)pc+1);
++			if (get_user(instr2, (u16 __user *)pc+1))
++				goto die_sig;
+ 			instr <<= 16;
+ 			instr |= instr2;
+ 		}
+-	} else {
+-		get_user(instr, (u32 __user *)pc);
++	} else if (get_user(instr, (u32 __user *)pc)) {
++		goto die_sig;
+ 	}
+ 
+ 	if (call_undef_hook(regs, instr) == 0)
+ 		return;
+ 
++die_sig:
+ #ifdef CONFIG_DEBUG_USER
+ 	if (user_debug & UDBG_UNDEFINED) {
+ 		printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n",
+diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c
+index 1620b15..cb105bf8 100644
+--- a/arch/arm/mach-dove/common.c
++++ b/arch/arm/mach-dove/common.c
+@@ -92,7 +92,7 @@ void __init dove_ge00_init(struct mv643xx_eth_platform_data *eth_data)
+ {
+ 	orion_ge00_init(eth_data, &dove_mbus_dram_info,
+ 			DOVE_GE00_PHYS_BASE, IRQ_DOVE_GE00_SUM,
+-			0, get_tclk());
++			0, get_tclk(), 1600);
+ }
+ 
+ /*****************************************************************************
+diff --git a/arch/arm/mach-imx/hotplug.c b/arch/arm/mach-imx/hotplug.c
+index 20ed2d5..f8f7437 100644
+--- a/arch/arm/mach-imx/hotplug.c
++++ b/arch/arm/mach-imx/hotplug.c
+@@ -42,22 +42,6 @@ static inline void cpu_enter_lowpower(void)
+ 	  : "cc");
+ }
+ 
+-static inline void cpu_leave_lowpower(void)
+-{
+-	unsigned int v;
+-
+-	asm volatile(
+-		"mrc	p15, 0, %0, c1, c0, 0\n"
+-	"	orr	%0, %0, %1\n"
+-	"	mcr	p15, 0, %0, c1, c0, 0\n"
+-	"	mrc	p15, 0, %0, c1, c0, 1\n"
+-	"	orr	%0, %0, %2\n"
+-	"	mcr	p15, 0, %0, c1, c0, 1\n"
+-	  : "=&r" (v)
+-	  : "Ir" (CR_C), "Ir" (0x40)
+-	  : "cc");
+-}
+-
+ /*
+  * platform-specific code to shutdown a CPU
+  *
+@@ -67,11 +51,10 @@ void platform_cpu_die(unsigned int cpu)
+ {
+ 	cpu_enter_lowpower();
+ 	imx_enable_cpu(cpu, false);
+-	cpu_do_idle();
+-	cpu_leave_lowpower();
+ 
+-	/* We should never return from idle */
+-	panic("cpu %d unexpectedly exit from shutdown\n", cpu);
++	/* spin here until hardware takes it down */
++	while (1)
++		;
+ }
+ 
+ int platform_cpu_disable(unsigned int cpu)
+diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
+index c5dbbb3..06faa97 100644
+--- a/arch/arm/mach-kirkwood/common.c
++++ b/arch/arm/mach-kirkwood/common.c
+@@ -88,7 +88,7 @@ void __init kirkwood_ge00_init(struct mv643xx_eth_platform_data *eth_data)
+ 
+ 	orion_ge00_init(eth_data, &kirkwood_mbus_dram_info,
+ 			GE00_PHYS_BASE, IRQ_KIRKWOOD_GE00_SUM,
+-			IRQ_KIRKWOOD_GE00_ERR, kirkwood_tclk);
++			IRQ_KIRKWOOD_GE00_ERR, kirkwood_tclk, 1600);
+ }
+ 
+ 
+@@ -102,7 +102,7 @@ void __init kirkwood_ge01_init(struct mv643xx_eth_platform_data *eth_data)
+ 
+ 	orion_ge01_init(eth_data, &kirkwood_mbus_dram_info,
+ 			GE01_PHYS_BASE, IRQ_KIRKWOOD_GE01_SUM,
+-			IRQ_KIRKWOOD_GE01_ERR, kirkwood_tclk);
++			IRQ_KIRKWOOD_GE01_ERR, kirkwood_tclk, 1600);
+ }
+ 
+ 
+diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c
+index d90e244..570ee4d 100644
+--- a/arch/arm/mach-mv78xx0/common.c
++++ b/arch/arm/mach-mv78xx0/common.c
+@@ -202,7 +202,8 @@ void __init mv78xx0_ge00_init(struct mv643xx_eth_platform_data *eth_data)
+ {
+ 	orion_ge00_init(eth_data, &mv78xx0_mbus_dram_info,
+ 			GE00_PHYS_BASE, IRQ_MV78XX0_GE00_SUM,
+-			IRQ_MV78XX0_GE_ERR, get_tclk());
++			IRQ_MV78XX0_GE_ERR, get_tclk(),
++			MV643XX_TX_CSUM_DEFAULT_LIMIT);
+ }
+ 
+ 
+@@ -213,7 +214,8 @@ void __init mv78xx0_ge01_init(struct mv643xx_eth_platform_data *eth_data)
+ {
+ 	orion_ge01_init(eth_data, &mv78xx0_mbus_dram_info,
+ 			GE01_PHYS_BASE, IRQ_MV78XX0_GE01_SUM,
+-			NO_IRQ, get_tclk());
++			NO_IRQ, get_tclk(),
++			MV643XX_TX_CSUM_DEFAULT_LIMIT);
+ }
+ 
+ 
+diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
+index 53b68b8..20260db 100644
+--- a/arch/arm/mach-orion5x/common.c
++++ b/arch/arm/mach-orion5x/common.c
+@@ -95,7 +95,8 @@ void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data)
+ {
+ 	orion_ge00_init(eth_data, &orion5x_mbus_dram_info,
+ 			ORION5X_ETH_PHYS_BASE, IRQ_ORION5X_ETH_SUM,
+-			IRQ_ORION5X_ETH_ERR, orion5x_tclk);
++			IRQ_ORION5X_ETH_ERR, orion5x_tclk,
++			MV643XX_TX_CSUM_DEFAULT_LIMIT);
+ }
+ 
+ 
+diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
+index 1a8d4aa..8fda9f7 100644
+--- a/arch/arm/mm/flush.c
++++ b/arch/arm/mm/flush.c
+@@ -236,8 +236,6 @@ void __sync_icache_dcache(pte_t pteval)
+ 	struct page *page;
+ 	struct address_space *mapping;
+ 
+-	if (!pte_present_user(pteval))
+-		return;
+ 	if (cache_is_vipt_nonaliasing() && !pte_exec(pteval))
+ 		/* only flush non-aliasing VIPT caches for exec mappings */
+ 		return;
+diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
+index af3b92b..f9adbbb 100644
+--- a/arch/arm/plat-omap/dmtimer.c
++++ b/arch/arm/plat-omap/dmtimer.c
+@@ -236,7 +236,7 @@ EXPORT_SYMBOL_GPL(omap_dm_timer_enable);
+ 
+ void omap_dm_timer_disable(struct omap_dm_timer *timer)
+ {
+-	pm_runtime_put(&timer->pdev->dev);
++	pm_runtime_put_sync(&timer->pdev->dev);
+ }
+ EXPORT_SYMBOL_GPL(omap_dm_timer_disable);
+ 
+diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
+index 11dce87..8a6886a 100644
+--- a/arch/arm/plat-orion/common.c
++++ b/arch/arm/plat-orion/common.c
+@@ -263,10 +263,12 @@ void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    unsigned long mapbase,
+ 			    unsigned long irq,
+ 			    unsigned long irq_err,
+-			    int tclk)
++			    int tclk,
++			    unsigned int tx_csum_limit)
+ {
+ 	fill_resources(&orion_ge00_shared, orion_ge00_shared_resources,
+ 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
++	orion_ge00_shared_data.tx_csum_limit = tx_csum_limit;
+ 	ge_complete(&orion_ge00_shared_data, mbus_dram_info, tclk,
+ 		    orion_ge00_resources, irq, &orion_ge00_shared,
+ 		    eth_data, &orion_ge00);
+@@ -317,10 +319,12 @@ void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    unsigned long mapbase,
+ 			    unsigned long irq,
+ 			    unsigned long irq_err,
+-			    int tclk)
++			    int tclk,
++			    unsigned int tx_csum_limit)
+ {
+ 	fill_resources(&orion_ge01_shared, orion_ge01_shared_resources,
+ 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
++	orion_ge01_shared_data.tx_csum_limit = tx_csum_limit;
+ 	ge_complete(&orion_ge01_shared_data, mbus_dram_info, tclk,
+ 		    orion_ge01_resources, irq, &orion_ge01_shared,
+ 		    eth_data, &orion_ge01);
+diff --git a/arch/arm/plat-orion/include/plat/common.h b/arch/arm/plat-orion/include/plat/common.h
+index a2c0e31..b637dae 100644
+--- a/arch/arm/plat-orion/include/plat/common.h
++++ b/arch/arm/plat-orion/include/plat/common.h
+@@ -41,14 +41,16 @@ void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    unsigned long mapbase,
+ 			    unsigned long irq,
+ 			    unsigned long irq_err,
+-			    int tclk);
++			    int tclk,
++			    unsigned int tx_csum_limit);
+ 
+ void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    struct mbus_dram_target_info *mbus_dram_info,
+ 			    unsigned long mapbase,
+ 			    unsigned long irq,
+ 			    unsigned long irq_err,
+-			    int tclk);
++			    int tclk,
++			    unsigned int tx_csum_limit);
+ 
+ void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    struct mbus_dram_target_info *mbus_dram_info,
+diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c
+index 8a90b6a..1eedf8d 100644
+--- a/arch/arm/plat-s3c24xx/dma.c
++++ b/arch/arm/plat-s3c24xx/dma.c
+@@ -431,7 +431,7 @@ s3c2410_dma_canload(struct s3c2410_dma_chan *chan)
+  * when necessary.
+ */
+ 
+-int s3c2410_dma_enqueue(unsigned int channel, void *id,
++int s3c2410_dma_enqueue(enum dma_ch channel, void *id,
+ 			dma_addr_t data, int size)
+ {
+ 	struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel);
+diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
+index 4054b31..c4b779b 100644
+--- a/arch/parisc/include/asm/atomic.h
++++ b/arch/parisc/include/asm/atomic.h
+@@ -247,7 +247,7 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+ 
+ #define atomic_sub_and_test(i,v)	(atomic_sub_return((i),(v)) == 0)
+ 
+-#define ATOMIC_INIT(i)	((atomic_t) { (i) })
++#define ATOMIC_INIT(i)	{ (i) }
+ 
+ #define smp_mb__before_atomic_dec()	smp_mb()
+ #define smp_mb__after_atomic_dec()	smp_mb()
+@@ -256,7 +256,7 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+ 
+ #ifdef CONFIG_64BIT
+ 
+-#define ATOMIC64_INIT(i) ((atomic64_t) { (i) })
++#define ATOMIC64_INIT(i) { (i) }
+ 
+ static __inline__ s64
+ __atomic64_add_return(s64 i, atomic64_t *v)
+diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
+index 7c5324f..cc20b0a 100644
+--- a/arch/powerpc/kernel/asm-offsets.c
++++ b/arch/powerpc/kernel/asm-offsets.c
+@@ -79,6 +79,7 @@ int main(void)
+ 	DEFINE(SIGSEGV, SIGSEGV);
+ 	DEFINE(NMI_MASK, NMI_MASK);
+ 	DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr));
++	DEFINE(THREAD_DSCR_INHERIT, offsetof(struct thread_struct, dscr_inherit));
+ #else
+ 	DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
+ #endif /* CONFIG_PPC64 */
+diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
+index 2cc451a..6856062 100644
+--- a/arch/powerpc/kernel/dbell.c
++++ b/arch/powerpc/kernel/dbell.c
+@@ -28,6 +28,8 @@ void doorbell_setup_this_cpu(void)
+ 
+ void doorbell_cause_ipi(int cpu, unsigned long data)
+ {
++	/* Order previous accesses vs. msgsnd, which is treated as a store */
++	mb();
+ 	ppc_msgsnd(PPC_DBELL, 0, data);
+ }
+ 
+diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
+index d834425..654fc53 100644
+--- a/arch/powerpc/kernel/entry_64.S
++++ b/arch/powerpc/kernel/entry_64.S
+@@ -380,6 +380,12 @@ _GLOBAL(ret_from_fork)
+ 	li	r3,0
+ 	b	syscall_exit
+ 
++	.section	".toc","aw"
++DSCR_DEFAULT:
++	.tc dscr_default[TC],dscr_default
++
++	.section	".text"
++
+ /*
+  * This routine switches between two different tasks.  The process
+  * state of one is saved on its kernel stack.  Then the state
+@@ -519,9 +525,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
+ 	mr	r1,r8		/* start using new stack pointer */
+ 	std	r7,PACAKSAVE(r13)
+ 
+-	ld	r6,_CCR(r1)
+-	mtcrf	0xFF,r6
+-
+ #ifdef CONFIG_ALTIVEC
+ BEGIN_FTR_SECTION
+ 	ld	r0,THREAD_VRSAVE(r4)
+@@ -530,14 +533,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ #endif /* CONFIG_ALTIVEC */
+ #ifdef CONFIG_PPC64
+ BEGIN_FTR_SECTION
++	lwz	r6,THREAD_DSCR_INHERIT(r4)
++	ld	r7,DSCR_DEFAULT@toc(2)
+ 	ld	r0,THREAD_DSCR(r4)
+-	cmpd	r0,r25
+-	beq	1f
++	cmpwi	r6,0
++	bne	1f
++	ld	r0,0(r7)
++1:	cmpd	r0,r25
++	beq	2f
+ 	mtspr	SPRN_DSCR,r0
+-1:	
++2:
+ END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
+ #endif
+ 
++	ld	r6,_CCR(r1)
++	mtcrf	0xFF,r6
++
+ 	/* r3-r13 are destroyed -- Cort */
+ 	REST_8GPRS(14, r1)
+ 	REST_10GPRS(22, r1)
+diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
+index 6457574..d687e3f 100644
+--- a/arch/powerpc/kernel/process.c
++++ b/arch/powerpc/kernel/process.c
+@@ -778,16 +778,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
+ #endif /* CONFIG_PPC_STD_MMU_64 */
+ #ifdef CONFIG_PPC64 
+ 	if (cpu_has_feature(CPU_FTR_DSCR)) {
+-		if (current->thread.dscr_inherit) {
+-			p->thread.dscr_inherit = 1;
+-			p->thread.dscr = current->thread.dscr;
+-		} else if (0 != dscr_default) {
+-			p->thread.dscr_inherit = 1;
+-			p->thread.dscr = dscr_default;
+-		} else {
+-			p->thread.dscr_inherit = 0;
+-			p->thread.dscr = 0;
+-		}
++		p->thread.dscr_inherit = current->thread.dscr_inherit;
++		p->thread.dscr = current->thread.dscr;
+ 	}
+ #endif
+ 
+diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
+index 6df7090..fe04b4a 100644
+--- a/arch/powerpc/kernel/smp.c
++++ b/arch/powerpc/kernel/smp.c
+@@ -214,8 +214,15 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
+ 	struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+ 	char *message = (char *)&info->messages;
+ 
++	/*
++	 * Order previous accesses before accesses in the IPI handler.
++	 */
++	smp_mb();
+ 	message[msg] = 1;
+-	mb();
++	/*
++	 * cause_ipi functions are required to include a full barrier
++	 * before doing whatever causes the IPI.
++	 */
+ 	smp_ops->cause_ipi(cpu, info->data);
+ }
+ 
+@@ -227,7 +234,7 @@ irqreturn_t smp_ipi_demux(void)
+ 	mb();	/* order any irq clear */
+ 
+ 	do {
+-		all = xchg_local(&info->messages, 0);
++		all = xchg(&info->messages, 0);
+ 
+ #ifdef __BIG_ENDIAN
+ 		if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION)))
+diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
+index ce035c1..55be64d 100644
+--- a/arch/powerpc/kernel/sysfs.c
++++ b/arch/powerpc/kernel/sysfs.c
+@@ -192,6 +192,14 @@ static ssize_t show_dscr_default(struct sysdev_class *class,
+ 	return sprintf(buf, "%lx\n", dscr_default);
+ }
+ 
++static void update_dscr(void *dummy)
++{
++	if (!current->thread.dscr_inherit) {
++		current->thread.dscr = dscr_default;
++		mtspr(SPRN_DSCR, dscr_default);
++	}
++}
++
+ static ssize_t __used store_dscr_default(struct sysdev_class *class,
+ 		struct sysdev_class_attribute *attr, const char *buf,
+ 		size_t count)
+@@ -204,6 +212,8 @@ static ssize_t __used store_dscr_default(struct sysdev_class *class,
+ 		return -EINVAL;
+ 	dscr_default = val;
+ 
++	on_each_cpu(update_dscr, NULL, 1);
++
+ 	return count;
+ }
+ 
+diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
+index 5459d14..82dcd4d 100644
+--- a/arch/powerpc/kernel/traps.c
++++ b/arch/powerpc/kernel/traps.c
+@@ -942,8 +942,9 @@ static int emulate_instruction(struct pt_regs *regs)
+ 			cpu_has_feature(CPU_FTR_DSCR)) {
+ 		PPC_WARN_EMULATED(mtdscr, regs);
+ 		rd = (instword >> 21) & 0x1f;
+-		mtspr(SPRN_DSCR, regs->gpr[rd]);
++		current->thread.dscr = regs->gpr[rd];
+ 		current->thread.dscr_inherit = 1;
++		mtspr(SPRN_DSCR, current->thread.dscr);
+ 		return 0;
+ 	}
+ #endif
+diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
+index 9518d36..5c76bf7 100644
+--- a/arch/powerpc/sysdev/xics/icp-hv.c
++++ b/arch/powerpc/sysdev/xics/icp-hv.c
+@@ -27,33 +27,53 @@ static inline unsigned int icp_hv_get_xirr(unsigned char cppr)
+ {
+ 	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ 	long rc;
++	unsigned int ret = XICS_IRQ_SPURIOUS;
+ 
+ 	rc = plpar_hcall(H_XIRR, retbuf, cppr);
+-	if (rc != H_SUCCESS)
+-		panic(" bad return code xirr - rc = %lx\n", rc);
+-	return (unsigned int)retbuf[0];
++	if (rc == H_SUCCESS) {
++		ret = (unsigned int)retbuf[0];
++	} else {
++		pr_err("%s: bad return code xirr cppr=0x%x returned %ld\n",
++			__func__, cppr, rc);
++		WARN_ON_ONCE(1);
++	}
++
++	return ret;
+ }
+ 
+ static inline void icp_hv_set_xirr(unsigned int value)
+ {
+ 	long rc = plpar_hcall_norets(H_EOI, value);
+-	if (rc != H_SUCCESS)
+-		panic("bad return code EOI - rc = %ld, value=%x\n", rc, value);
++	if (rc != H_SUCCESS) {
++		pr_err("%s: bad return code eoi xirr=0x%x returned %ld\n",
++			__func__, value, rc);
++		WARN_ON_ONCE(1);
++	}
+ }
+ 
+ static inline void icp_hv_set_cppr(u8 value)
+ {
+ 	long rc = plpar_hcall_norets(H_CPPR, value);
+-	if (rc != H_SUCCESS)
+-		panic("bad return code cppr - rc = %lx\n", rc);
++	if (rc != H_SUCCESS) {
++		pr_err("%s: bad return code cppr cppr=0x%x returned %ld\n",
++			__func__, value, rc);
++		WARN_ON_ONCE(1);
++	}
+ }
+ 
+ static inline void icp_hv_set_qirr(int n_cpu , u8 value)
+ {
+-	long rc = plpar_hcall_norets(H_IPI, get_hard_smp_processor_id(n_cpu),
+-				     value);
+-	if (rc != H_SUCCESS)
+-		panic("bad return code qirr - rc = %lx\n", rc);
++	int hw_cpu = get_hard_smp_processor_id(n_cpu);
++	long rc;
++
++	/* Make sure all previous accesses are ordered before IPI sending */
++	mb();
++	rc = plpar_hcall_norets(H_IPI, hw_cpu, value);
++	if (rc != H_SUCCESS) {
++		pr_err("%s: bad return code qirr cpu=%d hw_cpu=%d mfrr=0x%x "
++			"returned %ld\n", __func__, n_cpu, hw_cpu, value, rc);
++		WARN_ON_ONCE(1);
++	}
+ }
+ 
+ static void icp_hv_eoi(struct irq_data *d)
+diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
+index b2c7179..bb104b4 100644
+--- a/arch/x86/xen/setup.c
++++ b/arch/x86/xen/setup.c
+@@ -78,9 +78,16 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
+ 	memblock_x86_reserve_range(start, start + size, "XEN EXTRA");
+ 
+ 	xen_max_p2m_pfn = PFN_DOWN(start + size);
++	for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
++		unsigned long mfn = pfn_to_mfn(pfn);
++
++		if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
++			continue;
++		WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
++			pfn, mfn);
+ 
+-	for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
+ 		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
++	}
+ }
+ 
+ static unsigned long __init xen_release_chunk(unsigned long start,
+diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
+index fb65915..608257a 100644
+--- a/drivers/ata/ahci.c
++++ b/drivers/ata/ahci.c
+@@ -386,6 +386,8 @@ static const struct pci_device_id ahci_pci_tbl[] = {
+ 	  .driver_data = board_ahci_yes_fbs },			/* 88se9125 */
+ 	{ PCI_DEVICE(0x1b4b, 0x917a),
+ 	  .driver_data = board_ahci_yes_fbs },			/* 88se9172 */
++	{ PCI_DEVICE(0x1b4b, 0x9192),
++	  .driver_data = board_ahci_yes_fbs },			/* 88se9172 on some Gigabyte */
+ 	{ PCI_DEVICE(0x1b4b, 0x91a3),
+ 	  .driver_data = board_ahci_yes_fbs },
+ 
+diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
+index 8323fc3..3f1799b 100644
+--- a/drivers/gpu/drm/drm_crtc.c
++++ b/drivers/gpu/drm/drm_crtc.c
+@@ -1625,10 +1625,8 @@ int drm_mode_cursor_ioctl(struct drm_device *dev,
+ 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+ 		return -EINVAL;
+ 
+-	if (!req->flags) {
+-		DRM_ERROR("no operation set\n");
++	if (!req->flags || (~DRM_MODE_CURSOR_FLAGS & req->flags))
+ 		return -EINVAL;
+-	}
+ 
+ 	mutex_lock(&dev->mode_config.mutex);
+ 	obj = drm_mode_object_find(dev, req->crtc_id, DRM_MODE_OBJECT_CRTC);
+@@ -1641,7 +1639,6 @@ int drm_mode_cursor_ioctl(struct drm_device *dev,
+ 
+ 	if (req->flags & DRM_MODE_CURSOR_BO) {
+ 		if (!crtc->funcs->cursor_set) {
+-			DRM_ERROR("crtc does not support cursor\n");
+ 			ret = -ENXIO;
+ 			goto out;
+ 		}
+@@ -1654,7 +1651,6 @@ int drm_mode_cursor_ioctl(struct drm_device *dev,
+ 		if (crtc->funcs->cursor_move) {
+ 			ret = crtc->funcs->cursor_move(crtc, req->x, req->y);
+ 		} else {
+-			DRM_ERROR("crtc does not support cursor\n");
+ 			ret = -EFAULT;
+ 			goto out;
+ 		}
+@@ -1692,14 +1688,11 @@ int drm_mode_addfb(struct drm_device *dev,
+ 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+ 		return -EINVAL;
+ 
+-	if ((config->min_width > r->width) || (r->width > config->max_width)) {
+-		DRM_ERROR("mode new framebuffer width not within limits\n");
++	if ((config->min_width > r->width) || (r->width > config->max_width))
+ 		return -EINVAL;
+-	}
+-	if ((config->min_height > r->height) || (r->height > config->max_height)) {
+-		DRM_ERROR("mode new framebuffer height not within limits\n");
++
++	if ((config->min_height > r->height) || (r->height > config->max_height))
+ 		return -EINVAL;
+-	}
+ 
+ 	mutex_lock(&dev->mode_config.mutex);
+ 
+@@ -1756,7 +1749,6 @@ int drm_mode_rmfb(struct drm_device *dev,
+ 	obj = drm_mode_object_find(dev, *id, DRM_MODE_OBJECT_FB);
+ 	/* TODO check that we really get a framebuffer back. */
+ 	if (!obj) {
+-		DRM_ERROR("mode invalid framebuffer id\n");
+ 		ret = -EINVAL;
+ 		goto out;
+ 	}
+@@ -1767,7 +1759,6 @@ int drm_mode_rmfb(struct drm_device *dev,
+ 			found = 1;
+ 
+ 	if (!found) {
+-		DRM_ERROR("tried to remove a fb that we didn't own\n");
+ 		ret = -EINVAL;
+ 		goto out;
+ 	}
+@@ -1814,7 +1805,6 @@ int drm_mode_getfb(struct drm_device *dev,
+ 	mutex_lock(&dev->mode_config.mutex);
+ 	obj = drm_mode_object_find(dev, r->fb_id, DRM_MODE_OBJECT_FB);
+ 	if (!obj) {
+-		DRM_ERROR("invalid framebuffer id\n");
+ 		ret = -EINVAL;
+ 		goto out;
+ 	}
+@@ -1850,7 +1840,6 @@ int drm_mode_dirtyfb_ioctl(struct drm_device *dev,
+ 	mutex_lock(&dev->mode_config.mutex);
+ 	obj = drm_mode_object_find(dev, r->fb_id, DRM_MODE_OBJECT_FB);
+ 	if (!obj) {
+-		DRM_ERROR("invalid framebuffer id\n");
+ 		ret = -EINVAL;
+ 		goto out_err1;
+ 	}
+diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
+index a1ee634..0c1a99b 100644
+--- a/drivers/gpu/drm/drm_edid.c
++++ b/drivers/gpu/drm/drm_edid.c
+@@ -66,6 +66,8 @@
+ #define EDID_QUIRK_FIRST_DETAILED_PREFERRED	(1 << 5)
+ /* use +hsync +vsync for detailed mode */
+ #define EDID_QUIRK_DETAILED_SYNC_PP		(1 << 6)
++/* Force reduced-blanking timings for detailed modes */
++#define EDID_QUIRK_FORCE_REDUCED_BLANKING	(1 << 7)
+ 
+ struct detailed_mode_closure {
+ 	struct drm_connector *connector;
+@@ -85,6 +87,9 @@ static struct edid_quirk {
+ 	int product_id;
+ 	u32 quirks;
+ } edid_quirk_list[] = {
++	/* ASUS VW222S */
++	{ "ACI", 0x22a2, EDID_QUIRK_FORCE_REDUCED_BLANKING },
++
+ 	/* Acer AL1706 */
+ 	{ "ACR", 44358, EDID_QUIRK_PREFER_LARGE_60 },
+ 	/* Acer F51 */
+@@ -120,6 +125,9 @@ static struct edid_quirk {
+ 	/* Samsung SyncMaster 22[5-6]BW */
+ 	{ "SAM", 596, EDID_QUIRK_PREFER_LARGE_60 },
+ 	{ "SAM", 638, EDID_QUIRK_PREFER_LARGE_60 },
++
++	/* ViewSonic VA2026w */
++	{ "VSC", 5020, EDID_QUIRK_FORCE_REDUCED_BLANKING },
+ };
+ 
+ /*** DDC fetch and block validation ***/
+@@ -863,12 +871,19 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
+ 				"Wrong Hsync/Vsync pulse width\n");
+ 		return NULL;
+ 	}
++
++	if (quirks & EDID_QUIRK_FORCE_REDUCED_BLANKING) {
++		mode = drm_cvt_mode(dev, hactive, vactive, 60, true, false, false);
++		if (!mode)
++			return NULL;
++
++		goto set_size;
++	}
++
+ 	mode = drm_mode_create(dev);
+ 	if (!mode)
+ 		return NULL;
+ 
+-	mode->type = DRM_MODE_TYPE_DRIVER;
+-
+ 	if (quirks & EDID_QUIRK_135_CLOCK_TOO_HIGH)
+ 		timing->pixel_clock = cpu_to_le16(1088);
+ 
+@@ -892,8 +907,6 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
+ 
+ 	drm_mode_do_interlace_quirk(mode, pt);
+ 
+-	drm_mode_set_name(mode);
+-
+ 	if (quirks & EDID_QUIRK_DETAILED_SYNC_PP) {
+ 		pt->misc |= DRM_EDID_PT_HSYNC_POSITIVE | DRM_EDID_PT_VSYNC_POSITIVE;
+ 	}
+@@ -903,6 +916,7 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
+ 	mode->flags |= (pt->misc & DRM_EDID_PT_VSYNC_POSITIVE) ?
+ 		DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC;
+ 
++set_size:
+ 	mode->width_mm = pt->width_mm_lo | (pt->width_height_mm_hi & 0xf0) << 4;
+ 	mode->height_mm = pt->height_mm_lo | (pt->width_height_mm_hi & 0xf) << 8;
+ 
+@@ -916,6 +930,9 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
+ 		mode->height_mm = edid->height_cm * 10;
+ 	}
+ 
++	mode->type = DRM_MODE_TYPE_DRIVER;
++	drm_mode_set_name(mode);
++
+ 	return mode;
+ }
+ 
+diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
+index 578ddfc..c8b5bc1 100644
+--- a/drivers/gpu/drm/i915/i915_irq.c
++++ b/drivers/gpu/drm/i915/i915_irq.c
+@@ -2006,10 +2006,22 @@ static int i915_driver_irq_postinstall(struct drm_device *dev)
+ 			hotplug_en |= HDMIC_HOTPLUG_INT_EN;
+ 		if (dev_priv->hotplug_supported_mask & HDMID_HOTPLUG_INT_STATUS)
+ 			hotplug_en |= HDMID_HOTPLUG_INT_EN;
+-		if (dev_priv->hotplug_supported_mask & SDVOC_HOTPLUG_INT_STATUS)
+-			hotplug_en |= SDVOC_HOTPLUG_INT_EN;
+-		if (dev_priv->hotplug_supported_mask & SDVOB_HOTPLUG_INT_STATUS)
+-			hotplug_en |= SDVOB_HOTPLUG_INT_EN;
++		if (IS_G4X(dev)) {
++			if (dev_priv->hotplug_supported_mask & SDVOC_HOTPLUG_INT_STATUS_G4X)
++				hotplug_en |= SDVOC_HOTPLUG_INT_EN;
++			if (dev_priv->hotplug_supported_mask & SDVOB_HOTPLUG_INT_STATUS_G4X)
++				hotplug_en |= SDVOB_HOTPLUG_INT_EN;
++		} else if (IS_GEN4(dev)) {
++			if (dev_priv->hotplug_supported_mask & SDVOC_HOTPLUG_INT_STATUS_I965)
++				hotplug_en |= SDVOC_HOTPLUG_INT_EN;
++			if (dev_priv->hotplug_supported_mask & SDVOB_HOTPLUG_INT_STATUS_I965)
++				hotplug_en |= SDVOB_HOTPLUG_INT_EN;
++		} else {
++			if (dev_priv->hotplug_supported_mask & SDVOC_HOTPLUG_INT_STATUS_I915)
++				hotplug_en |= SDVOC_HOTPLUG_INT_EN;
++			if (dev_priv->hotplug_supported_mask & SDVOB_HOTPLUG_INT_STATUS_I915)
++				hotplug_en |= SDVOB_HOTPLUG_INT_EN;
++		}
+ 		if (dev_priv->hotplug_supported_mask & CRT_HOTPLUG_INT_STATUS) {
+ 			hotplug_en |= CRT_HOTPLUG_INT_EN;
+ 
+diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
+index fd53122..4a5e662 100644
+--- a/drivers/gpu/drm/i915/i915_reg.h
++++ b/drivers/gpu/drm/i915/i915_reg.h
+@@ -1419,14 +1419,20 @@
+ #define   DPC_HOTPLUG_INT_STATUS		(1 << 28)
+ #define   HDMID_HOTPLUG_INT_STATUS		(1 << 27)
+ #define   DPD_HOTPLUG_INT_STATUS		(1 << 27)
++/* CRT/TV common between gen3+ */
+ #define   CRT_HOTPLUG_INT_STATUS		(1 << 11)
+ #define   TV_HOTPLUG_INT_STATUS			(1 << 10)
+ #define   CRT_HOTPLUG_MONITOR_MASK		(3 << 8)
+ #define   CRT_HOTPLUG_MONITOR_COLOR		(3 << 8)
+ #define   CRT_HOTPLUG_MONITOR_MONO		(2 << 8)
+ #define   CRT_HOTPLUG_MONITOR_NONE		(0 << 8)
+-#define   SDVOC_HOTPLUG_INT_STATUS		(1 << 7)
+-#define   SDVOB_HOTPLUG_INT_STATUS		(1 << 6)
++/* SDVO is different across gen3/4 */
++#define   SDVOC_HOTPLUG_INT_STATUS_G4X		(1 << 3)
++#define   SDVOB_HOTPLUG_INT_STATUS_G4X		(1 << 2)
++#define   SDVOC_HOTPLUG_INT_STATUS_I965		(3 << 4)
++#define   SDVOB_HOTPLUG_INT_STATUS_I965		(3 << 2)
++#define   SDVOC_HOTPLUG_INT_STATUS_I915		(1 << 7)
++#define   SDVOB_HOTPLUG_INT_STATUS_I915		(1 << 6)
+ 
+ /* SDVO port control */
+ #define SDVOB			0x61140
+diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
+index 3eed270..6c3fb44 100644
+--- a/drivers/gpu/drm/i915/intel_display.c
++++ b/drivers/gpu/drm/i915/intel_display.c
+@@ -1072,8 +1072,8 @@ static void assert_pch_hdmi_disabled(struct drm_i915_private *dev_priv,
+ 				     enum pipe pipe, int reg)
+ {
+ 	u32 val = I915_READ(reg);
+-	WARN(hdmi_pipe_enabled(dev_priv, val, pipe),
+-	     "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n",
++	WARN(hdmi_pipe_enabled(dev_priv, pipe, val),
++	     "PCH HDMI (0x%08x) enabled on transcoder %c, should be disabled\n",
+ 	     reg, pipe_name(pipe));
+ }
+ 
+@@ -1089,13 +1089,13 @@ static void assert_pch_ports_disabled(struct drm_i915_private *dev_priv,
+ 
+ 	reg = PCH_ADPA;
+ 	val = I915_READ(reg);
+-	WARN(adpa_pipe_enabled(dev_priv, val, pipe),
++	WARN(adpa_pipe_enabled(dev_priv, pipe, val),
+ 	     "PCH VGA enabled on transcoder %c, should be disabled\n",
+ 	     pipe_name(pipe));
+ 
+ 	reg = PCH_LVDS;
+ 	val = I915_READ(reg);
+-	WARN(lvds_pipe_enabled(dev_priv, val, pipe),
++	WARN(lvds_pipe_enabled(dev_priv, pipe, val),
+ 	     "PCH LVDS enabled on transcoder %c, should be disabled\n",
+ 	     pipe_name(pipe));
+ 
+@@ -1437,7 +1437,7 @@ static void disable_pch_hdmi(struct drm_i915_private *dev_priv,
+ 			     enum pipe pipe, int reg)
+ {
+ 	u32 val = I915_READ(reg);
+-	if (hdmi_pipe_enabled(dev_priv, val, pipe)) {
++	if (hdmi_pipe_enabled(dev_priv, pipe, val)) {
+ 		DRM_DEBUG_KMS("Disabling pch HDMI %x on pipe %d\n",
+ 			      reg, pipe);
+ 		I915_WRITE(reg, val & ~PORT_ENABLE);
+@@ -1459,12 +1459,12 @@ static void intel_disable_pch_ports(struct drm_i915_private *dev_priv,
+ 
+ 	reg = PCH_ADPA;
+ 	val = I915_READ(reg);
+-	if (adpa_pipe_enabled(dev_priv, val, pipe))
++	if (adpa_pipe_enabled(dev_priv, pipe, val))
+ 		I915_WRITE(reg, val & ~ADPA_DAC_ENABLE);
+ 
+ 	reg = PCH_LVDS;
+ 	val = I915_READ(reg);
+-	if (lvds_pipe_enabled(dev_priv, val, pipe)) {
++	if (lvds_pipe_enabled(dev_priv, pipe, val)) {
+ 		DRM_DEBUG_KMS("disable lvds on pipe %d val 0x%08x\n", pipe, val);
+ 		I915_WRITE(reg, val & ~LVDS_PORT_EN);
+ 		POSTING_READ(reg);
+@@ -2852,16 +2852,14 @@ static void intel_clear_scanline_wait(struct drm_device *dev)
+ 
+ static void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
+ {
+-	struct drm_i915_gem_object *obj;
+-	struct drm_i915_private *dev_priv;
++	struct drm_device *dev = crtc->dev;
+ 
+ 	if (crtc->fb == NULL)
+ 		return;
+ 
+-	obj = to_intel_framebuffer(crtc->fb)->obj;
+-	dev_priv = crtc->dev->dev_private;
+-	wait_event(dev_priv->pending_flip_queue,
+-		   atomic_read(&obj->pending_flip) == 0);
++	mutex_lock(&dev->struct_mutex);
++	intel_finish_fb(crtc->fb);
++	mutex_unlock(&dev->struct_mutex);
+ }
+ 
+ static bool intel_crtc_driving_pch(struct drm_crtc *crtc)
+@@ -3322,23 +3320,6 @@ static void intel_crtc_disable(struct drm_crtc *crtc)
+ 	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+ 	struct drm_device *dev = crtc->dev;
+ 
+-	/* Flush any pending WAITs before we disable the pipe. Note that
+-	 * we need to drop the struct_mutex in order to acquire it again
+-	 * during the lowlevel dpms routines around a couple of the
+-	 * operations. It does not look trivial nor desirable to move
+-	 * that locking higher. So instead we leave a window for the
+-	 * submission of further commands on the fb before we can actually
+-	 * disable it. This race with userspace exists anyway, and we can
+-	 * only rely on the pipe being disabled by userspace after it
+-	 * receives the hotplug notification and has flushed any pending
+-	 * batches.
+-	 */
+-	if (crtc->fb) {
+-		mutex_lock(&dev->struct_mutex);
+-		intel_finish_fb(crtc->fb);
+-		mutex_unlock(&dev->struct_mutex);
+-	}
+-
+ 	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
+ 
+ 	if (crtc->fb) {
+diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
+index ceec71b..f07bde2 100644
+--- a/drivers/gpu/drm/i915/intel_lvds.c
++++ b/drivers/gpu/drm/i915/intel_lvds.c
+@@ -752,7 +752,7 @@ static const struct dmi_system_id intel_no_lvds[] = {
+ 		.ident = "Hewlett-Packard t5745",
+ 		.matches = {
+ 			DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
+-			DMI_MATCH(DMI_BOARD_NAME, "hp t5745"),
++			DMI_MATCH(DMI_PRODUCT_NAME, "hp t5745"),
+ 		},
+ 	},
+ 	{
+@@ -760,7 +760,7 @@ static const struct dmi_system_id intel_no_lvds[] = {
+ 		.ident = "Hewlett-Packard st5747",
+ 		.matches = {
+ 			DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
+-			DMI_MATCH(DMI_BOARD_NAME, "hp st5747"),
++			DMI_MATCH(DMI_PRODUCT_NAME, "hp st5747"),
+ 		},
+ 	},
+ 	{
+diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
+index a8d8ee5..bbf247c 100644
+--- a/drivers/gpu/drm/i915/intel_sdvo.c
++++ b/drivers/gpu/drm/i915/intel_sdvo.c
+@@ -2514,6 +2514,7 @@ bool intel_sdvo_init(struct drm_device *dev, int sdvo_reg)
+ 	struct drm_i915_private *dev_priv = dev->dev_private;
+ 	struct intel_encoder *intel_encoder;
+ 	struct intel_sdvo *intel_sdvo;
++	u32 hotplug_mask;
+ 	int i;
+ 
+ 	intel_sdvo = kzalloc(sizeof(struct intel_sdvo), GFP_KERNEL);
+@@ -2544,10 +2545,17 @@ bool intel_sdvo_init(struct drm_device *dev, int sdvo_reg)
+ 		}
+ 	}
+ 
+-	if (IS_SDVOB(sdvo_reg))
+-		dev_priv->hotplug_supported_mask |= SDVOB_HOTPLUG_INT_STATUS;
+-	else
+-		dev_priv->hotplug_supported_mask |= SDVOC_HOTPLUG_INT_STATUS;
++	hotplug_mask = 0;
++	if (IS_G4X(dev)) {
++		hotplug_mask = IS_SDVOB(sdvo_reg) ?
++			SDVOB_HOTPLUG_INT_STATUS_G4X : SDVOC_HOTPLUG_INT_STATUS_G4X;
++	} else if (IS_GEN4(dev)) {
++		hotplug_mask = IS_SDVOB(sdvo_reg) ?
++			SDVOB_HOTPLUG_INT_STATUS_I965 : SDVOC_HOTPLUG_INT_STATUS_I965;
++	} else {
++		hotplug_mask = IS_SDVOB(sdvo_reg) ?
++			SDVOB_HOTPLUG_INT_STATUS_I915 : SDVOC_HOTPLUG_INT_STATUS_I915;
++	}
+ 
+ 	drm_encoder_helper_add(&intel_encoder->base, &intel_sdvo_helper_funcs);
+ 
+@@ -2555,14 +2563,6 @@ bool intel_sdvo_init(struct drm_device *dev, int sdvo_reg)
+ 	if (!intel_sdvo_get_capabilities(intel_sdvo, &intel_sdvo->caps))
+ 		goto err;
+ 
+-	/* Set up hotplug command - note paranoia about contents of reply.
+-	 * We assume that the hardware is in a sane state, and only touch
+-	 * the bits we think we understand.
+-	 */
+-	intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_ACTIVE_HOT_PLUG,
+-			     &intel_sdvo->hotplug_active, 2);
+-	intel_sdvo->hotplug_active[0] &= ~0x3;
+-
+ 	if (intel_sdvo_output_setup(intel_sdvo,
+ 				    intel_sdvo->caps.output_flags) != true) {
+ 		DRM_DEBUG_KMS("SDVO output failed to setup on SDVO%c\n",
+@@ -2570,6 +2570,12 @@ bool intel_sdvo_init(struct drm_device *dev, int sdvo_reg)
+ 		goto err;
+ 	}
+ 
++	/* Only enable the hotplug irq if we need it, to work around noisy
++	 * hotplug lines.
++	 */
++	if (intel_sdvo->hotplug_active[0])
++		dev_priv->hotplug_supported_mask |= hotplug_mask;
++
+ 	intel_sdvo_select_ddc_bus(dev_priv, intel_sdvo, sdvo_reg);
+ 
+ 	/* Set the input timing to the screen. Assume always input 0. */
+diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
+index b12fd2c..6adef06 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_display.c
++++ b/drivers/gpu/drm/nouveau/nouveau_display.c
+@@ -381,7 +381,7 @@ nouveau_display_dumb_create(struct drm_file *file_priv, struct drm_device *dev,
+ 	args->size = args->pitch * args->height;
+ 	args->size = roundup(args->size, PAGE_SIZE);
+ 
+-	ret = nouveau_gem_new(dev, args->size, 0, TTM_PL_FLAG_VRAM, 0, 0, &bo);
++	ret = nouveau_gem_new(dev, args->size, 0, NOUVEAU_GEM_DOMAIN_VRAM, 0, 0, &bo);
+ 	if (ret)
+ 		return ret;
+ 
+diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
+index 757c549..ceffd20 100644
+--- a/drivers/gpu/drm/radeon/atombios_crtc.c
++++ b/drivers/gpu/drm/radeon/atombios_crtc.c
+@@ -1446,14 +1446,98 @@ static void radeon_legacy_atom_fixup(struct drm_crtc *crtc)
+ 	}
+ }
+ 
++/**
++ * radeon_get_pll_use_mask - look up a mask of which pplls are in use
++ *
++ * @crtc: drm crtc
++ *
++ * Returns the mask of which PPLLs (Pixel PLLs) are in use.
++ */
++static u32 radeon_get_pll_use_mask(struct drm_crtc *crtc)
++{
++	struct drm_device *dev = crtc->dev;
++	struct drm_crtc *test_crtc;
++	struct radeon_crtc *radeon_test_crtc;
++	u32 pll_in_use = 0;
++
++	list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) {
++		if (crtc == test_crtc)
++			continue;
++
++		radeon_test_crtc = to_radeon_crtc(test_crtc);
++		if (radeon_test_crtc->pll_id != ATOM_PPLL_INVALID)
++			pll_in_use |= (1 << radeon_test_crtc->pll_id);
++	}
++	return pll_in_use;
++}
++
++/**
++ * radeon_get_shared_dp_ppll - return the PPLL used by another crtc for DP
++ *
++ * @crtc: drm crtc
++ *
++ * Returns the PPLL (Pixel PLL) used by another crtc/encoder which is
++ * also in DP mode.  For DP, a single PPLL can be used for all DP
++ * crtcs/encoders.
++ */
++static int radeon_get_shared_dp_ppll(struct drm_crtc *crtc)
++{
++	struct drm_device *dev = crtc->dev;
++	struct drm_encoder *test_encoder;
++	struct radeon_crtc *radeon_test_crtc;
++
++	list_for_each_entry(test_encoder, &dev->mode_config.encoder_list, head) {
++		if (test_encoder->crtc && (test_encoder->crtc != crtc)) {
++			if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_encoder))) {
++				/* for DP use the same PLL for all */
++				radeon_test_crtc = to_radeon_crtc(test_encoder->crtc);
++				if (radeon_test_crtc->pll_id != ATOM_PPLL_INVALID)
++					return radeon_test_crtc->pll_id;
++			}
++		}
++	}
++	return ATOM_PPLL_INVALID;
++}
++
++/**
++ * radeon_atom_pick_pll - Allocate a PPLL for use by the crtc.
++ *
++ * @crtc: drm crtc
++ *
++ * Returns the PPLL (Pixel PLL) to be used by the crtc.  For DP monitors
++ * a single PPLL can be used for all DP crtcs/encoders.  For non-DP
++ * monitors a dedicated PPLL must be used.  If a particular board has
++ * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming
++ * as there is no need to program the PLL itself.  If we are not able to
++ * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to
++ * avoid messing up an existing monitor.
++ *
++ * Asic specific PLL information
++ *
++ * DCE 6.1
++ * - PPLL2 is only available to UNIPHYA (both DP and non-DP)
++ * - PPLL0, PPLL1 are available for UNIPHYB/C/D/E/F (both DP and non-DP)
++ *
++ * DCE 6.0
++ * - PPLL0 is available to all UNIPHY (DP only)
++ * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
++ *
++ * DCE 5.0
++ * - DCPLL is available to all UNIPHY (DP only)
++ * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
++ *
++ * DCE 3.0/4.0/4.1
++ * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
++ *
++ */
+ static int radeon_atom_pick_pll(struct drm_crtc *crtc)
+ {
+ 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+ 	struct drm_device *dev = crtc->dev;
+ 	struct radeon_device *rdev = dev->dev_private;
+ 	struct drm_encoder *test_encoder;
+-	struct drm_crtc *test_crtc;
+-	uint32_t pll_in_use = 0;
++	u32 pll_in_use;
++	int pll;
+ 
+ 	if (ASIC_IS_DCE4(rdev)) {
+ 		list_for_each_entry(test_encoder, &dev->mode_config.encoder_list, head) {
+@@ -1461,35 +1545,39 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc)
+ 				/* in DP mode, the DP ref clock can come from PPLL, DCPLL, or ext clock,
+ 				 * depending on the asic:
+ 				 * DCE4: PPLL or ext clock
+-				 * DCE5: DCPLL or ext clock
++				 * DCE5: PPLL, DCPLL, or ext clock
+ 				 *
+ 				 * Setting ATOM_PPLL_INVALID will cause SetPixelClock to skip
+ 				 * PPLL/DCPLL programming and only program the DP DTO for the
+ 				 * crtc virtual pixel clock.
+ 				 */
+ 				if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_encoder))) {
+-					if (ASIC_IS_DCE5(rdev) || rdev->clock.dp_extclk)
++					if (rdev->clock.dp_extclk)
++						/* skip PPLL programming if using ext clock */
+ 						return ATOM_PPLL_INVALID;
++					else if (ASIC_IS_DCE5(rdev))
++						/* use DCPLL for all DP */
++						return ATOM_DCPLL;
++					else {
++						/* use the same PPLL for all DP monitors */
++						pll = radeon_get_shared_dp_ppll(crtc);
++						if (pll != ATOM_PPLL_INVALID)
++							return pll;
++					}
+ 				}
++				break;
+ 			}
+ 		}
+-
+-		/* otherwise, pick one of the plls */
+-		list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) {
+-			struct radeon_crtc *radeon_test_crtc;
+-
+-			if (crtc == test_crtc)
+-				continue;
+-
+-			radeon_test_crtc = to_radeon_crtc(test_crtc);
+-			if ((radeon_test_crtc->pll_id >= ATOM_PPLL1) &&
+-			    (radeon_test_crtc->pll_id <= ATOM_PPLL2))
+-				pll_in_use |= (1 << radeon_test_crtc->pll_id);
+-		}
+-		if (!(pll_in_use & 1))
++		/* all other cases */
++		pll_in_use = radeon_get_pll_use_mask(crtc);
++		if (!(pll_in_use & (1 << ATOM_PPLL2)))
++			return ATOM_PPLL2;
++		if (!(pll_in_use & (1 << ATOM_PPLL1)))
+ 			return ATOM_PPLL1;
+-		return ATOM_PPLL2;
++		DRM_ERROR("unable to allocate a PPLL\n");
++		return ATOM_PPLL_INVALID;
+ 	} else
++		/* use PPLL1 or PPLL2 */
+ 		return radeon_crtc->crtc_id;
+ 
+ }
+@@ -1578,10 +1666,25 @@ static void atombios_crtc_commit(struct drm_crtc *crtc)
+ static void atombios_crtc_disable(struct drm_crtc *crtc)
+ {
+ 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
++	struct drm_device *dev = crtc->dev;
++	struct radeon_device *rdev = dev->dev_private;
+ 	struct radeon_atom_ss ss;
++	int i;
+ 
+ 	atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+ 
++	for (i = 0; i < rdev->num_crtc; i++) {
++		if (rdev->mode_info.crtcs[i] &&
++		    rdev->mode_info.crtcs[i]->enabled &&
++		    i != radeon_crtc->crtc_id &&
++		    radeon_crtc->pll_id == rdev->mode_info.crtcs[i]->pll_id) {
++			/* one other crtc is using this pll don't turn
++			 * off the pll
++			 */
++			goto done;
++		}
++	}
++
+ 	switch (radeon_crtc->pll_id) {
+ 	case ATOM_PPLL1:
+ 	case ATOM_PPLL2:
+@@ -1592,7 +1695,8 @@ static void atombios_crtc_disable(struct drm_crtc *crtc)
+ 	default:
+ 		break;
+ 	}
+-	radeon_crtc->pll_id = -1;
++done:
++	radeon_crtc->pll_id = ATOM_PPLL_INVALID;
+ }
+ 
+ static const struct drm_crtc_helper_funcs atombios_helper_funcs = {
+@@ -1641,6 +1745,6 @@ void radeon_atombios_init_crtc(struct drm_device *dev,
+ 		else
+ 			radeon_crtc->crtc_offset = 0;
+ 	}
+-	radeon_crtc->pll_id = -1;
++	radeon_crtc->pll_id = ATOM_PPLL_INVALID;
+ 	drm_crtc_helper_add(&radeon_crtc->base, &atombios_helper_funcs);
+ }
+diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
+index 5351ee1..382e141 100644
+--- a/drivers/gpu/drm/radeon/atombios_encoders.c
++++ b/drivers/gpu/drm/radeon/atombios_encoders.c
+@@ -1344,6 +1344,8 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode)
+ 	struct drm_device *dev = encoder->dev;
+ 	struct radeon_device *rdev = dev->dev_private;
+ 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
++	struct drm_encoder *ext_encoder = radeon_get_external_encoder(encoder);
++	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
+ 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
+ 	struct radeon_connector *radeon_connector = NULL;
+ 	struct radeon_connector_atom_dig *radeon_dig_connector = NULL;
+@@ -1355,12 +1357,38 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode)
+ 
+ 	switch (mode) {
+ 	case DRM_MODE_DPMS_ON:
+-		/* some early dce3.2 boards have a bug in their transmitter control table */
+-		if ((rdev->family == CHIP_RV710) || (rdev->family == CHIP_RV730) ||
+-		    ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev))
++		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev)) {
++			if (!connector)
++				dig->panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
++			else
++				dig->panel_mode = radeon_dp_get_panel_mode(encoder, connector);
++
++			/* setup and enable the encoder */
++			atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP, 0);
++			atombios_dig_encoder_setup(encoder,
++						   ATOM_ENCODER_CMD_SETUP_PANEL_MODE,
++						   dig->panel_mode);
++			if (ext_encoder) {
++				if (ASIC_IS_DCE41(rdev))
++					atombios_external_encoder_setup(encoder, ext_encoder,
++									EXTERNAL_ENCODER_ACTION_V3_ENCODER_SETUP);
++			}
++			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
++		} else if (ASIC_IS_DCE4(rdev)) {
++			/* setup and enable the encoder */
++			atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP, 0);
++			/* enable the transmitter */
+ 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
+-		else
+ 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0);
++		} else {
++			/* setup and enable the encoder and transmitter */
++			atombios_dig_encoder_setup(encoder, ATOM_ENABLE, 0);
++			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
++			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
++			/* some early dce3.2 boards have a bug in their transmitter control table */
++			if ((rdev->family != CHIP_RV710) || (rdev->family != CHIP_RV730))
++				atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0);
++		}
+ 		if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) {
+ 			if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+ 				atombios_set_edp_panel_power(connector,
+@@ -1377,10 +1405,19 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode)
+ 	case DRM_MODE_DPMS_STANDBY:
+ 	case DRM_MODE_DPMS_SUSPEND:
+ 	case DRM_MODE_DPMS_OFF:
+-		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev))
++		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev)) {
++			/* disable the transmitter */
+ 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
+-		else
++		} else if (ASIC_IS_DCE4(rdev)) {
++			/* disable the transmitter */
+ 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE_OUTPUT, 0, 0);
++			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
++		} else {
++			/* disable the encoder and transmitter */
++			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE_OUTPUT, 0, 0);
++			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
++			atombios_dig_encoder_setup(encoder, ATOM_DISABLE, 0);
++		}
+ 		if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) {
+ 			if (ASIC_IS_DCE4(rdev))
+ 				atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_OFF, 0);
+@@ -1805,10 +1842,12 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder,
+ 	struct drm_device *dev = encoder->dev;
+ 	struct radeon_device *rdev = dev->dev_private;
+ 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+-	struct drm_encoder *ext_encoder = radeon_get_external_encoder(encoder);
+ 
+ 	radeon_encoder->pixel_clock = adjusted_mode->clock;
+ 
++	/* need to call this here rather than in prepare() since we need some crtc info */
++	radeon_atom_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
++
+ 	if (ASIC_IS_AVIVO(rdev) && !ASIC_IS_DCE4(rdev)) {
+ 		if (radeon_encoder->active_device & (ATOM_DEVICE_CV_SUPPORT | ATOM_DEVICE_TV_SUPPORT))
+ 			atombios_yuv_setup(encoder, true);
+@@ -1827,38 +1866,7 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder,
+ 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+-		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev)) {
+-			struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
+-			struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
+-
+-			if (!connector)
+-				dig->panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
+-			else
+-				dig->panel_mode = radeon_dp_get_panel_mode(encoder, connector);
+-
+-			/* setup and enable the encoder */
+-			atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP, 0);
+-			atombios_dig_encoder_setup(encoder,
+-						   ATOM_ENCODER_CMD_SETUP_PANEL_MODE,
+-						   dig->panel_mode);
+-		} else if (ASIC_IS_DCE4(rdev)) {
+-			/* disable the transmitter */
+-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
+-			/* setup and enable the encoder */
+-			atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP, 0);
+-
+-			/* enable the transmitter */
+-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
+-		} else {
+-			/* disable the encoder and transmitter */
+-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
+-			atombios_dig_encoder_setup(encoder, ATOM_DISABLE, 0);
+-
+-			/* setup and enable the encoder and transmitter */
+-			atombios_dig_encoder_setup(encoder, ATOM_ENABLE, 0);
+-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
+-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
+-		}
++		/* handled in dpms */
+ 		break;
+ 	case ENCODER_OBJECT_ID_INTERNAL_DDI:
+ 	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
+@@ -1879,14 +1887,6 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder,
+ 		break;
+ 	}
+ 
+-	if (ext_encoder) {
+-		if (ASIC_IS_DCE41(rdev))
+-			atombios_external_encoder_setup(encoder, ext_encoder,
+-							EXTERNAL_ENCODER_ACTION_V3_ENCODER_SETUP);
+-		else
+-			atombios_external_encoder_setup(encoder, ext_encoder, ATOM_ENABLE);
+-	}
+-
+ 	atombios_apply_encoder_quirks(encoder, adjusted_mode);
+ 
+ 	if (atombios_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) {
+@@ -2059,7 +2059,6 @@ static void radeon_atom_encoder_prepare(struct drm_encoder *encoder)
+ 	}
+ 
+ 	radeon_atom_output_lock(encoder, true);
+-	radeon_atom_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+ 
+ 	if (connector) {
+ 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+@@ -2080,6 +2079,7 @@ static void radeon_atom_encoder_prepare(struct drm_encoder *encoder)
+ 
+ static void radeon_atom_encoder_commit(struct drm_encoder *encoder)
+ {
++	/* need to call this here as we need the crtc set up */
+ 	radeon_atom_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+ 	radeon_atom_output_lock(encoder, false);
+ }
+@@ -2120,14 +2120,7 @@ static void radeon_atom_encoder_disable(struct drm_encoder *encoder)
+ 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+ 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+ 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+-		if (ASIC_IS_DCE4(rdev))
+-			/* disable the transmitter */
+-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
+-		else {
+-			/* disable the encoder and transmitter */
+-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
+-			atombios_dig_encoder_setup(encoder, ATOM_DISABLE, 0);
+-		}
++		/* handled in dpms */
+ 		break;
+ 	case ENCODER_OBJECT_ID_INTERNAL_DDI:
+ 	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
+diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
+index 9231564..c5762e3 100644
+--- a/drivers/gpu/drm/radeon/radeon_device.c
++++ b/drivers/gpu/drm/radeon/radeon_device.c
+@@ -761,7 +761,7 @@ int radeon_device_init(struct radeon_device *rdev,
+ 	if (rdev->flags & RADEON_IS_AGP)
+ 		rdev->need_dma32 = true;
+ 	if ((rdev->flags & RADEON_IS_PCI) &&
+-	    (rdev->family < CHIP_RS400))
++	    (rdev->family <= CHIP_RS740))
+ 		rdev->need_dma32 = true;
+ 
+ 	dma_bits = rdev->need_dma32 ? 32 : 40;
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+index dff8fc7..033fc96 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+@@ -178,6 +178,7 @@ static struct pci_device_id vmw_pci_id_list[] = {
+ 	{0x15ad, 0x0405, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VMWGFX_CHIP_SVGAII},
+ 	{0, 0, 0}
+ };
++MODULE_DEVICE_TABLE(pci, vmw_pci_id_list);
+ 
+ static int enable_fbdev;
+ 
+@@ -1088,6 +1089,11 @@ static struct drm_driver driver = {
+ 	.master_drop = vmw_master_drop,
+ 	.open = vmw_driver_open,
+ 	.postclose = vmw_postclose,
++
++	.dumb_create = vmw_dumb_create,
++	.dumb_map_offset = vmw_dumb_map_offset,
++	.dumb_destroy = vmw_dumb_destroy,
++
+ 	.fops = {
+ 		 .owner = THIS_MODULE,
+ 		 .open = drm_open,
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+index dc27970..0e3fa7d 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+@@ -641,6 +641,16 @@ int vmw_kms_readback(struct vmw_private *dev_priv,
+ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
+ 				struct drm_file *file_priv);
+ 
++int vmw_dumb_create(struct drm_file *file_priv,
++		    struct drm_device *dev,
++		    struct drm_mode_create_dumb *args);
++
++int vmw_dumb_map_offset(struct drm_file *file_priv,
++			struct drm_device *dev, uint32_t handle,
++			uint64_t *offset);
++int vmw_dumb_destroy(struct drm_file *file_priv,
++		     struct drm_device *dev,
++		     uint32_t handle);
+ /**
+  * Overlay control - vmwgfx_overlay.c
+  */
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+index 1c7f09e..0795d17 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+@@ -1950,3 +1950,76 @@ err_ref:
+ 	vmw_resource_unreference(&res);
+ 	return ret;
+ }
++
++
++int vmw_dumb_create(struct drm_file *file_priv,
++		    struct drm_device *dev,
++		    struct drm_mode_create_dumb *args)
++{
++	struct vmw_private *dev_priv = vmw_priv(dev);
++	struct vmw_master *vmaster = vmw_master(file_priv->master);
++	struct vmw_user_dma_buffer *vmw_user_bo;
++	struct ttm_buffer_object *tmp;
++	int ret;
++
++	args->pitch = args->width * ((args->bpp + 7) / 8);
++	args->size = args->pitch * args->height;
++
++	vmw_user_bo = kzalloc(sizeof(*vmw_user_bo), GFP_KERNEL);
++	if (vmw_user_bo == NULL)
++		return -ENOMEM;
++
++	ret = ttm_read_lock(&vmaster->lock, true);
++	if (ret != 0) {
++		kfree(vmw_user_bo);
++		return ret;
++	}
++
++	ret = vmw_dmabuf_init(dev_priv, &vmw_user_bo->dma, args->size,
++			      &vmw_vram_sys_placement, true,
++			      &vmw_user_dmabuf_destroy);
++	if (ret != 0)
++		goto out_no_dmabuf;
++
++	tmp = ttm_bo_reference(&vmw_user_bo->dma.base);
++	ret = ttm_base_object_init(vmw_fpriv(file_priv)->tfile,
++				   &vmw_user_bo->base,
++				   false,
++				   ttm_buffer_type,
++				   &vmw_user_dmabuf_release, NULL);
++	if (unlikely(ret != 0))
++		goto out_no_base_object;
++
++	args->handle = vmw_user_bo->base.hash.key;
++
++out_no_base_object:
++	ttm_bo_unref(&tmp);
++out_no_dmabuf:
++	ttm_read_unlock(&vmaster->lock);
++	return ret;
++}
++
++int vmw_dumb_map_offset(struct drm_file *file_priv,
++			struct drm_device *dev, uint32_t handle,
++			uint64_t *offset)
++{
++	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
++	struct vmw_dma_buffer *out_buf;
++	int ret;
++
++	ret = vmw_user_dmabuf_lookup(tfile, handle, &out_buf);
++	if (ret != 0)
++		return -EINVAL;
++
++	*offset = out_buf->base.addr_space_offset;
++	vmw_dmabuf_unreference(&out_buf);
++	return 0;
++}
++
++int vmw_dumb_destroy(struct drm_file *file_priv,
++		     struct drm_device *dev,
++		     uint32_t handle)
++{
++	return ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
++					 handle, TTM_REF_USAGE);
++}
+diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
+index d21f6d0..b5cc078 100644
+--- a/drivers/hid/Kconfig
++++ b/drivers/hid/Kconfig
+@@ -350,6 +350,7 @@ config HID_MULTITOUCH
+ 	  - Lumio CrystalTouch panels
+ 	  - MosArt dual-touch panels
+ 	  - PenMount dual touch panels
++	  - PixArt optical touch screen
+ 	  - Pixcir dual touch panels
+ 	  - eGalax dual-touch panels, including the Joojoo and Wetab tablets
+ 	  - Stantum multitouch panels
+diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
+index 5cc029f..0c8bea9 100644
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -1507,6 +1507,9 @@ static const struct hid_device_id hid_have_special_driver[] = {
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_WKB2000) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_PENMOUNT, USB_DEVICE_ID_PENMOUNT_PCI) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_PETALYNX, USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE) },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN) },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1) },
++	{ HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN2) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_KEYBOARD) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH) },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_PIXART_IMAGING_INC_OPTICAL_TOUCH_SCREEN) },
+diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
+index e4317a2..ab75a4e 100644
+--- a/drivers/hid/hid-ids.h
++++ b/drivers/hid/hid-ids.h
+@@ -593,6 +593,11 @@
+ #define USB_VENDOR_ID_PI_ENGINEERING	0x05f3
+ #define USB_DEVICE_ID_PI_ENGINEERING_VEC_USB_FOOTPEDAL	0xff
+ 
++#define USB_VENDOR_ID_PIXART				0x093a
++#define USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN	0x8001
++#define USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1	0x8002
++#define USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN2	0x8003
++
+ #define USB_VENDOR_ID_PLAYDOTCOM	0x0b43
+ #define USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII	0x0003
+ 
+diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
+index 995fc4c..13af0f1 100644
+--- a/drivers/hid/hid-multitouch.c
++++ b/drivers/hid/hid-multitouch.c
+@@ -93,6 +93,7 @@ struct mt_class {
+ #define MT_CLS_DUAL_INRANGE_CONTACTID		0x0005
+ #define MT_CLS_DUAL_INRANGE_CONTACTNUMBER	0x0006
+ #define MT_CLS_DUAL_NSMU_CONTACTID		0x0007
++#define MT_CLS_INRANGE_CONTACTNUMBER		0x0009
+ 
+ /* vendor specific classes */
+ #define MT_CLS_3M				0x0101
+@@ -155,6 +156,9 @@ struct mt_class mt_classes[] = {
+ 		.quirks = MT_QUIRK_NOT_SEEN_MEANS_UP |
+ 			MT_QUIRK_SLOT_IS_CONTACTID,
+ 		.maxcontacts = 2 },
++	{ .name = MT_CLS_INRANGE_CONTACTNUMBER,
++		.quirks = MT_QUIRK_VALID_IS_INRANGE |
++			MT_QUIRK_SLOT_IS_CONTACTNUMBER },
+ 
+ 	/*
+ 	 * vendor specific classes
+@@ -744,6 +748,17 @@ static const struct hid_device_id mt_devices[] = {
+ 		HID_USB_DEVICE(USB_VENDOR_ID_PENMOUNT,
+ 			USB_DEVICE_ID_PENMOUNT_PCI) },
+ 
++	/* PixArt optical touch screen */
++	{ .driver_data = MT_CLS_INRANGE_CONTACTNUMBER,
++		HID_USB_DEVICE(USB_VENDOR_ID_PIXART,
++			USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN) },
++	{ .driver_data = MT_CLS_INRANGE_CONTACTNUMBER,
++		HID_USB_DEVICE(USB_VENDOR_ID_PIXART,
++			USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1) },
++	{ .driver_data = MT_CLS_INRANGE_CONTACTNUMBER,
++		HID_USB_DEVICE(USB_VENDOR_ID_PIXART,
++			USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN2) },
++
+ 	/* PixCir-based panels */
+ 	{ .driver_data = MT_CLS_DUAL_INRANGE_CONTACTID,
+ 		HID_USB_DEVICE(USB_VENDOR_ID_HANVON,
+diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
+index 1fe6b80..afb73af 100644
+--- a/drivers/hid/usbhid/hid-quirks.c
++++ b/drivers/hid/usbhid/hid-quirks.c
+@@ -68,6 +68,10 @@ static const struct hid_blacklist {
+ 	{ USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_AXIS_295, HID_QUIRK_NOGET },
+ 	{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
+ 	{ USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET },
++	{ USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS, HID_QUIRK_NOGET },
++	{ USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NO_INIT_REPORTS },
++	{ USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1, HID_QUIRK_NO_INIT_REPORTS },
++	{ USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN2, HID_QUIRK_NO_INIT_REPORTS },
+ 	{ USB_VENDOR_ID_PRODIGE, USB_DEVICE_ID_PRODIGE_CORDLESS, HID_QUIRK_NOGET },
+ 	{ USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_PIXART_IMAGING_INC_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NOGET },
+ 	{ USB_VENDOR_ID_SUN, USB_DEVICE_ID_RARITAN_KVM_DONGLE, HID_QUIRK_NOGET },
+diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
+index 00e9851..83d2fbd6 100644
+--- a/drivers/hwmon/asus_atk0110.c
++++ b/drivers/hwmon/asus_atk0110.c
+@@ -34,6 +34,12 @@ static const struct dmi_system_id __initconst atk_force_new_if[] = {
+ 		.matches = {
+ 			DMI_MATCH(DMI_BOARD_NAME, "SABERTOOTH X58")
+ 		}
++	}, {
++		/* Old interface reads the same sensor for fan0 and fan1 */
++		.ident = "Asus M5A78L",
++		.matches = {
++			DMI_MATCH(DMI_BOARD_NAME, "M5A78L")
++		}
+ 	},
+ 	{ }
+ };
+diff --git a/drivers/hwmon/twl4030-madc-hwmon.c b/drivers/hwmon/twl4030-madc-hwmon.c
+index 0018c7d..1a174f0 100644
+--- a/drivers/hwmon/twl4030-madc-hwmon.c
++++ b/drivers/hwmon/twl4030-madc-hwmon.c
+@@ -44,12 +44,13 @@ static ssize_t madc_read(struct device *dev,
+ 			 struct device_attribute *devattr, char *buf)
+ {
+ 	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+-	struct twl4030_madc_request req;
++	struct twl4030_madc_request req = {
++		.channels = 1 << attr->index,
++		.method = TWL4030_MADC_SW2,
++		.type = TWL4030_MADC_WAIT,
++	};
+ 	long val;
+ 
+-	req.channels = (1 << attr->index);
+-	req.method = TWL4030_MADC_SW2;
+-	req.func_cb = NULL;
+ 	val = twl4030_madc_conversion(&req);
+ 	if (val < 0)
+ 		return val;
+diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
+index a3afac4..60f593c 100644
+--- a/drivers/i2c/busses/Kconfig
++++ b/drivers/i2c/busses/Kconfig
+@@ -103,6 +103,8 @@ config I2C_I801
+ 	    Patsburg (PCH)
+ 	    DH89xxCC (PCH)
+ 	    Panther Point (PCH)
++	    Lynx Point (PCH)
++	    Lynx Point-LP (PCH)
+ 
+ 	  This driver can also be built as a module.  If so, the module
+ 	  will be called i2c-i801.
+@@ -349,9 +351,13 @@ config I2C_DAVINCI
+ 	  devices such as DaVinci NIC.
+ 	  For details please see http://www.ti.com/davinci
+ 
++config I2C_DESIGNWARE_CORE
++	tristate
++
+ config I2C_DESIGNWARE_PLATFORM
+ 	tristate "Synopsys DesignWare Platfrom"
+ 	depends on HAVE_CLK
++	select I2C_DESIGNWARE_CORE
+ 	help
+ 	  If you say yes to this option, support will be included for the
+ 	  Synopsys DesignWare I2C adapter. Only master mode is supported.
+@@ -362,6 +368,7 @@ config I2C_DESIGNWARE_PLATFORM
+ config I2C_DESIGNWARE_PCI
+ 	tristate "Synopsys DesignWare PCI"
+ 	depends on PCI
++	select I2C_DESIGNWARE_CORE
+ 	help
+ 	  If you say yes to this option, support will be included for the
+ 	  Synopsys DesignWare I2C adapter. Only master mode is supported.
+diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
+index fba6da6..d6b8779 100644
+--- a/drivers/i2c/busses/Makefile
++++ b/drivers/i2c/busses/Makefile
+@@ -33,10 +33,11 @@ obj-$(CONFIG_I2C_AU1550)	+= i2c-au1550.o
+ obj-$(CONFIG_I2C_BLACKFIN_TWI)	+= i2c-bfin-twi.o
+ obj-$(CONFIG_I2C_CPM)		+= i2c-cpm.o
+ obj-$(CONFIG_I2C_DAVINCI)	+= i2c-davinci.o
++obj-$(CONFIG_I2C_DESIGNWARE_CORE)	+= i2c-designware-core.o
+ obj-$(CONFIG_I2C_DESIGNWARE_PLATFORM)	+= i2c-designware-platform.o
+-i2c-designware-platform-objs := i2c-designware-platdrv.o i2c-designware-core.o
++i2c-designware-platform-objs := i2c-designware-platdrv.o
+ obj-$(CONFIG_I2C_DESIGNWARE_PCI)	+= i2c-designware-pci.o
+-i2c-designware-pci-objs := i2c-designware-pcidrv.o i2c-designware-core.o
++i2c-designware-pci-objs := i2c-designware-pcidrv.o
+ obj-$(CONFIG_I2C_GPIO)		+= i2c-gpio.o
+ obj-$(CONFIG_I2C_HIGHLANDER)	+= i2c-highlander.o
+ obj-$(CONFIG_I2C_IBM_IIC)	+= i2c-ibm_iic.o
+diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
+index df87992..6193349 100644
+--- a/drivers/i2c/busses/i2c-designware-core.c
++++ b/drivers/i2c/busses/i2c-designware-core.c
+@@ -25,6 +25,7 @@
+  * ----------------------------------------------------------------------------
+  *
+  */
++#include <linux/export.h>
+ #include <linux/clk.h>
+ #include <linux/errno.h>
+ #include <linux/err.h>
+@@ -305,6 +306,7 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
+ 	dw_writel(dev, dev->master_cfg , DW_IC_CON);
+ 	return 0;
+ }
++EXPORT_SYMBOL_GPL(i2c_dw_init);
+ 
+ /*
+  * Waiting for bus not busy
+@@ -557,12 +559,14 @@ done:
+ 
+ 	return ret;
+ }
++EXPORT_SYMBOL_GPL(i2c_dw_xfer);
+ 
+ u32 i2c_dw_func(struct i2c_adapter *adap)
+ {
+ 	struct dw_i2c_dev *dev = i2c_get_adapdata(adap);
+ 	return dev->functionality;
+ }
++EXPORT_SYMBOL_GPL(i2c_dw_func);
+ 
+ static u32 i2c_dw_read_clear_intrbits(struct dw_i2c_dev *dev)
+ {
+@@ -667,17 +671,20 @@ tx_aborted:
+ 
+ 	return IRQ_HANDLED;
+ }
++EXPORT_SYMBOL_GPL(i2c_dw_isr);
+ 
+ void i2c_dw_enable(struct dw_i2c_dev *dev)
+ {
+        /* Enable the adapter */
+ 	dw_writel(dev, 1, DW_IC_ENABLE);
+ }
++EXPORT_SYMBOL_GPL(i2c_dw_enable);
+ 
+ u32 i2c_dw_is_enabled(struct dw_i2c_dev *dev)
+ {
+ 	return dw_readl(dev, DW_IC_ENABLE);
+ }
++EXPORT_SYMBOL_GPL(i2c_dw_is_enabled);
+ 
+ void i2c_dw_disable(struct dw_i2c_dev *dev)
+ {
+@@ -688,18 +695,22 @@ void i2c_dw_disable(struct dw_i2c_dev *dev)
+ 	dw_writel(dev, 0, DW_IC_INTR_MASK);
+ 	dw_readl(dev, DW_IC_CLR_INTR);
+ }
++EXPORT_SYMBOL_GPL(i2c_dw_disable);
+ 
+ void i2c_dw_clear_int(struct dw_i2c_dev *dev)
+ {
+ 	dw_readl(dev, DW_IC_CLR_INTR);
+ }
++EXPORT_SYMBOL_GPL(i2c_dw_clear_int);
+ 
+ void i2c_dw_disable_int(struct dw_i2c_dev *dev)
+ {
+ 	dw_writel(dev, 0, DW_IC_INTR_MASK);
+ }
++EXPORT_SYMBOL_GPL(i2c_dw_disable_int);
+ 
+ u32 i2c_dw_read_comp_param(struct dw_i2c_dev *dev)
+ {
+ 	return dw_readl(dev, DW_IC_COMP_PARAM_1);
+ }
++EXPORT_SYMBOL_GPL(i2c_dw_read_comp_param);
+diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
+index ab26840d..817d025 100644
+--- a/drivers/i2c/busses/i2c-i801.c
++++ b/drivers/i2c/busses/i2c-i801.c
+@@ -51,6 +51,8 @@
+   Patsburg (PCH) IDF    0x1d72     32     hard     yes     yes     yes
+   DH89xxCC (PCH)        0x2330     32     hard     yes     yes     yes
+   Panther Point (PCH)   0x1e22     32     hard     yes     yes     yes
++  Lynx Point (PCH)      0x8c22     32     hard     yes     yes     yes
++  Lynx Point-LP (PCH)   0x9c22     32     hard     yes     yes     yes
+ 
+   Features supported by this driver:
+   Software PEC                     no
+@@ -145,6 +147,8 @@
+ #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_SMBUS	0x1e22
+ #define PCI_DEVICE_ID_INTEL_DH89XXCC_SMBUS	0x2330
+ #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS	0x3b30
++#define PCI_DEVICE_ID_INTEL_LYNXPOINT_SMBUS	0x8c22
++#define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_SMBUS	0x9c22
+ 
+ struct i801_priv {
+ 	struct i2c_adapter adapter;
+@@ -633,6 +637,8 @@ static const struct pci_device_id i801_ids[] = {
+ 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF2) },
+ 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DH89XXCC_SMBUS) },
+ 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PANTHERPOINT_SMBUS) },
++	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNXPOINT_SMBUS) },
++	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_SMBUS) },
+ 	{ 0, }
+ };
+ 
+diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
+index b4cfc6c..d4ec371 100644
+--- a/drivers/input/serio/i8042-x86ia64io.h
++++ b/drivers/input/serio/i8042-x86ia64io.h
+@@ -177,6 +177,20 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = {
+ 		},
+ 	},
+ 	{
++		/* Gigabyte T1005 - defines wrong chassis type ("Other") */
++		.matches = {
++			DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
++			DMI_MATCH(DMI_PRODUCT_NAME, "T1005"),
++		},
++	},
++	{
++		/* Gigabyte T1005M/P - defines wrong chassis type ("Other") */
++		.matches = {
++			DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
++			DMI_MATCH(DMI_PRODUCT_NAME, "T1005M/P"),
++		},
++	},
++	{
+ 		.matches = {
+ 			DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+ 			DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv9700"),
+diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c
+index d497db0..509135f 100644
+--- a/drivers/isdn/isdnloop/isdnloop.c
++++ b/drivers/isdn/isdnloop/isdnloop.c
+@@ -16,7 +16,6 @@
+ #include <linux/sched.h>
+ #include "isdnloop.h"
+ 
+-static char *revision = "$Revision: 1.11.6.7 $";
+ static char *isdnloop_id = "loop0";
+ 
+ MODULE_DESCRIPTION("ISDN4Linux: Pseudo Driver that simulates an ISDN card");
+@@ -1494,17 +1493,6 @@ isdnloop_addcard(char *id1)
+ static int __init
+ isdnloop_init(void)
+ {
+-	char *p;
+-	char rev[10];
+-
+-	if ((p = strchr(revision, ':'))) {
+-		strcpy(rev, p + 1);
+-		p = strchr(rev, '$');
+-		*p = 0;
+-	} else
+-		strcpy(rev, " ??? ");
+-	printk(KERN_NOTICE "isdnloop-ISDN-driver Rev%s\n", rev);
+-
+ 	if (isdnloop_id)
+ 		return (isdnloop_addcard(isdnloop_id));
+ 
+diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
+index 34416d4..74793af 100644
+--- a/drivers/mmc/card/block.c
++++ b/drivers/mmc/card/block.c
+@@ -1339,7 +1339,8 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
+ 		/* complete ongoing async transfer before issuing discard */
+ 		if (card->host->areq)
+ 			mmc_blk_issue_rw_rq(mq, NULL);
+-		if (req->cmd_flags & REQ_SECURE)
++		if (req->cmd_flags & REQ_SECURE &&
++			!(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN))
+ 			ret = mmc_blk_issue_secdiscard_rq(mq, req);
+ 		else
+ 			ret = mmc_blk_issue_discard_rq(mq, req);
+@@ -1614,6 +1615,8 @@ static int mmc_add_disk(struct mmc_blk_data *md)
+ 	return ret;
+ }
+ 
++#define CID_MANFID_SAMSUNG	0x15
++
+ static const struct mmc_fixup blk_fixups[] =
+ {
+ 	MMC_FIXUP("SEM02G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
+@@ -1644,6 +1647,28 @@ static const struct mmc_fixup blk_fixups[] =
+ 	MMC_FIXUP(CID_NAME_ANY, 0x13, 0x200, add_quirk_mmc,
+ 		  MMC_QUIRK_LONG_READ_TIME),
+ 
++	/*
++	 * On these Samsung MoviNAND parts, performing secure erase or
++	 * secure trim can result in unrecoverable corruption due to a
++	 * firmware bug.
++	 */
++	MMC_FIXUP("M8G2FA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
++		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
++	MMC_FIXUP("MAG4FA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
++		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
++	MMC_FIXUP("MBG8FA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
++		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
++	MMC_FIXUP("MCGAFA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
++		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
++	MMC_FIXUP("VAL00M", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
++		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
++	MMC_FIXUP("VYL00M", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
++		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
++	MMC_FIXUP("KYL00M", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
++		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
++	MMC_FIXUP("VZL00M", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
++		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
++
+ 	END_FIXUP
+ };
+ 
+diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
+index 99b449d..f201bed 100644
+--- a/drivers/mmc/host/mxs-mmc.c
++++ b/drivers/mmc/host/mxs-mmc.c
+@@ -279,11 +279,11 @@ static irqreturn_t mxs_mmc_irq_handler(int irq, void *dev_id)
+ 	writel(stat & MXS_MMC_IRQ_BITS,
+ 	       host->base + HW_SSP_CTRL1 + MXS_CLR_ADDR);
+ 
++	spin_unlock(&host->lock);
++
+ 	if ((stat & BM_SSP_CTRL1_SDIO_IRQ) && (stat & BM_SSP_CTRL1_SDIO_IRQ_EN))
+ 		mmc_signal_sdio_irq(host->mmc);
+ 
+-	spin_unlock(&host->lock);
+-
+ 	if (stat & BM_SSP_CTRL1_RESP_TIMEOUT_IRQ)
+ 		cmd->error = -ETIMEDOUT;
+ 	else if (stat & BM_SSP_CTRL1_RESP_ERR_IRQ)
+@@ -628,10 +628,6 @@ static void mxs_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
+ 		       host->base + HW_SSP_CTRL0 + MXS_SET_ADDR);
+ 		writel(BM_SSP_CTRL1_SDIO_IRQ_EN,
+ 		       host->base + HW_SSP_CTRL1 + MXS_SET_ADDR);
+-
+-		if (readl(host->base + HW_SSP_STATUS) & BM_SSP_STATUS_SDIO_IRQ)
+-			mmc_signal_sdio_irq(host->mmc);
+-
+ 	} else {
+ 		writel(BM_SSP_CTRL0_SDIO_IRQ_CHECK,
+ 		       host->base + HW_SSP_CTRL0 + MXS_CLR_ADDR);
+@@ -640,6 +636,10 @@ static void mxs_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
+ 	}
+ 
+ 	spin_unlock_irqrestore(&host->lock, flags);
++
++	if (enable && readl(host->base + HW_SSP_STATUS) & BM_SSP_STATUS_SDIO_IRQ)
++		mmc_signal_sdio_irq(host->mmc);
++
+ }
+ 
+ static const struct mmc_host_ops mxs_mmc_ops = {
+diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h
+index c3b08f1..62ca03a 100644
+--- a/drivers/mmc/host/sdhci-esdhc.h
++++ b/drivers/mmc/host/sdhci-esdhc.h
+@@ -48,14 +48,14 @@ static inline void esdhc_set_clock(struct sdhci_host *host, unsigned int clock)
+ 	int div = 1;
+ 	u32 temp;
+ 
++	if (clock == 0)
++		goto out;
++
+ 	temp = sdhci_readl(host, ESDHC_SYSTEM_CONTROL);
+ 	temp &= ~(ESDHC_CLOCK_IPGEN | ESDHC_CLOCK_HCKEN | ESDHC_CLOCK_PEREN
+ 		| ESDHC_CLOCK_MASK);
+ 	sdhci_writel(host, temp, ESDHC_SYSTEM_CONTROL);
+ 
+-	if (clock == 0)
+-		goto out;
+-
+ 	while (host->max_clk / pre_div / 16 > clock && pre_div < 256)
+ 		pre_div *= 2;
+ 
+diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
+index 890754c..95b29f5 100644
+--- a/drivers/mtd/ubi/vtbl.c
++++ b/drivers/mtd/ubi/vtbl.c
+@@ -346,7 +346,7 @@ retry:
+ 	 */
+ 	err = ubi_scan_add_used(ubi, si, new_seb->pnum, new_seb->ec,
+ 				vid_hdr, 0);
+-	kfree(new_seb);
++	kmem_cache_free(si->scan_leb_slab, new_seb);
+ 	ubi_free_vid_hdr(ubi, vid_hdr);
+ 	return err;
+ 
+@@ -359,7 +359,7 @@ write_error:
+ 		list_add(&new_seb->u.list, &si->erase);
+ 		goto retry;
+ 	}
+-	kfree(new_seb);
++	kmem_cache_free(si->scan_leb_slab, new_seb);
+ out_free:
+ 	ubi_free_vid_hdr(ubi, vid_hdr);
+ 	return err;
+diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
+index 330140e..9bcc39a 100644
+--- a/drivers/net/can/mcp251x.c
++++ b/drivers/net/can/mcp251x.c
+@@ -83,6 +83,11 @@
+ #define INSTRUCTION_LOAD_TXB(n)	(0x40 + 2 * (n))
+ #define INSTRUCTION_READ_RXB(n)	(((n) == 0) ? 0x90 : 0x94)
+ #define INSTRUCTION_RESET	0xC0
++#define RTS_TXB0		0x01
++#define RTS_TXB1		0x02
++#define RTS_TXB2		0x04
++#define INSTRUCTION_RTS(n)	(0x80 | ((n) & 0x07))
++
+ 
+ /* MPC251x registers */
+ #define CANSTAT	      0x0e
+@@ -397,6 +402,7 @@ static void mcp251x_hw_tx_frame(struct spi_device *spi, u8 *buf,
+ static void mcp251x_hw_tx(struct spi_device *spi, struct can_frame *frame,
+ 			  int tx_buf_idx)
+ {
++	struct mcp251x_priv *priv = dev_get_drvdata(&spi->dev);
+ 	u32 sid, eid, exide, rtr;
+ 	u8 buf[SPI_TRANSFER_BUF_LEN];
+ 
+@@ -418,7 +424,10 @@ static void mcp251x_hw_tx(struct spi_device *spi, struct can_frame *frame,
+ 	buf[TXBDLC_OFF] = (rtr << DLC_RTR_SHIFT) | frame->can_dlc;
+ 	memcpy(buf + TXBDAT_OFF, frame->data, frame->can_dlc);
+ 	mcp251x_hw_tx_frame(spi, buf, frame->can_dlc, tx_buf_idx);
+-	mcp251x_write_reg(spi, TXBCTRL(tx_buf_idx), TXBCTRL_TXREQ);
++
++	/* use INSTRUCTION_RTS, to avoid "repeated frame problem" */
++	priv->spi_tx_buf[0] = INSTRUCTION_RTS(1 << tx_buf_idx);
++	mcp251x_spi_trans(priv->spi, 1);
+ }
+ 
+ static void mcp251x_hw_rx_frame(struct spi_device *spi, u8 *buf,
+diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
+index 83199fd..d0722a7 100644
+--- a/drivers/net/ethernet/freescale/gianfar.c
++++ b/drivers/net/ethernet/freescale/gianfar.c
+@@ -1041,7 +1041,7 @@ static int gfar_probe(struct platform_device *ofdev)
+ 
+ 	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) {
+ 		dev->hw_features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
+-		dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
++		dev->features |= NETIF_F_HW_VLAN_RX;
+ 	}
+ 
+ 	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_EXTENDED_HASH) {
+diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
+index b1cd41b..021463b 100644
+--- a/drivers/net/ethernet/ibm/ibmveth.c
++++ b/drivers/net/ethernet/ibm/ibmveth.c
+@@ -472,14 +472,9 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
+ 	}
+ 
+ 	if (adapter->rx_queue.queue_addr != NULL) {
+-		if (!dma_mapping_error(dev, adapter->rx_queue.queue_dma)) {
+-			dma_unmap_single(dev,
+-					adapter->rx_queue.queue_dma,
+-					adapter->rx_queue.queue_len,
+-					DMA_BIDIRECTIONAL);
+-			adapter->rx_queue.queue_dma = DMA_ERROR_CODE;
+-		}
+-		kfree(adapter->rx_queue.queue_addr);
++		dma_free_coherent(dev, adapter->rx_queue.queue_len,
++				  adapter->rx_queue.queue_addr,
++				  adapter->rx_queue.queue_dma);
+ 		adapter->rx_queue.queue_addr = NULL;
+ 	}
+ 
+@@ -556,10 +551,13 @@ static int ibmveth_open(struct net_device *netdev)
+ 		goto err_out;
+ 	}
+ 
++	dev = &adapter->vdev->dev;
++
+ 	adapter->rx_queue.queue_len = sizeof(struct ibmveth_rx_q_entry) *
+ 						rxq_entries;
+-	adapter->rx_queue.queue_addr = kmalloc(adapter->rx_queue.queue_len,
+-						GFP_KERNEL);
++	adapter->rx_queue.queue_addr =
++	    dma_alloc_coherent(dev, adapter->rx_queue.queue_len,
++			       &adapter->rx_queue.queue_dma, GFP_KERNEL);
+ 
+ 	if (!adapter->rx_queue.queue_addr) {
+ 		netdev_err(netdev, "unable to allocate rx queue pages\n");
+@@ -567,19 +565,13 @@ static int ibmveth_open(struct net_device *netdev)
+ 		goto err_out;
+ 	}
+ 
+-	dev = &adapter->vdev->dev;
+-
+ 	adapter->buffer_list_dma = dma_map_single(dev,
+ 			adapter->buffer_list_addr, 4096, DMA_BIDIRECTIONAL);
+ 	adapter->filter_list_dma = dma_map_single(dev,
+ 			adapter->filter_list_addr, 4096, DMA_BIDIRECTIONAL);
+-	adapter->rx_queue.queue_dma = dma_map_single(dev,
+-			adapter->rx_queue.queue_addr,
+-			adapter->rx_queue.queue_len, DMA_BIDIRECTIONAL);
+ 
+ 	if ((dma_mapping_error(dev, adapter->buffer_list_dma)) ||
+-	    (dma_mapping_error(dev, adapter->filter_list_dma)) ||
+-	    (dma_mapping_error(dev, adapter->rx_queue.queue_dma))) {
++	    (dma_mapping_error(dev, adapter->filter_list_dma))) {
+ 		netdev_err(netdev, "unable to map filter or buffer list "
+ 			   "pages\n");
+ 		rc = -ENOMEM;
+diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
+index f478a22..8e362bb 100644
+--- a/drivers/net/ethernet/intel/e1000e/e1000.h
++++ b/drivers/net/ethernet/intel/e1000e/e1000.h
+@@ -302,6 +302,7 @@ struct e1000_adapter {
+ 	 */
+ 	struct e1000_ring *tx_ring /* One per active queue */
+ 						____cacheline_aligned_in_smp;
++	u32 tx_fifo_limit;
+ 
+ 	struct napi_struct napi;
+ 
+diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
+index 64d3f98..0182649 100644
+--- a/drivers/net/ethernet/intel/e1000e/netdev.c
++++ b/drivers/net/ethernet/intel/e1000e/netdev.c
+@@ -3386,6 +3386,15 @@ void e1000e_reset(struct e1000_adapter *adapter)
+ 	}
+ 
+ 	/*
++	 * Alignment of Tx data is on an arbitrary byte boundary with the
++	 * maximum size per Tx descriptor limited only to the transmit
++	 * allocation of the packet buffer minus 96 bytes with an upper
++	 * limit of 24KB due to receive synchronization limitations.
++	 */
++	adapter->tx_fifo_limit = min_t(u32, ((er32(PBA) >> 16) << 10) - 96,
++				       24 << 10);
++
++	/*
+ 	 * Disable Adaptive Interrupt Moderation if 2 full packets cannot
+ 	 * fit in receive buffer and early-receive not supported.
+ 	 */
+@@ -4647,13 +4656,9 @@ static bool e1000_tx_csum(struct e1000_adapter *adapter, struct sk_buff *skb)
+ 	return 1;
+ }
+ 
+-#define E1000_MAX_PER_TXD	8192
+-#define E1000_MAX_TXD_PWR	12
+-
+ static int e1000_tx_map(struct e1000_adapter *adapter,
+ 			struct sk_buff *skb, unsigned int first,
+-			unsigned int max_per_txd, unsigned int nr_frags,
+-			unsigned int mss)
++			unsigned int max_per_txd, unsigned int nr_frags)
+ {
+ 	struct e1000_ring *tx_ring = adapter->tx_ring;
+ 	struct pci_dev *pdev = adapter->pdev;
+@@ -4882,20 +4887,19 @@ static int e1000_maybe_stop_tx(struct net_device *netdev, int size)
+ {
+ 	struct e1000_adapter *adapter = netdev_priv(netdev);
+ 
++	BUG_ON(size > adapter->tx_ring->count);
++
+ 	if (e1000_desc_unused(adapter->tx_ring) >= size)
+ 		return 0;
+ 	return __e1000_maybe_stop_tx(netdev, size);
+ }
+ 
+-#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 )
+ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
+ 				    struct net_device *netdev)
+ {
+ 	struct e1000_adapter *adapter = netdev_priv(netdev);
+ 	struct e1000_ring *tx_ring = adapter->tx_ring;
+ 	unsigned int first;
+-	unsigned int max_per_txd = E1000_MAX_PER_TXD;
+-	unsigned int max_txd_pwr = E1000_MAX_TXD_PWR;
+ 	unsigned int tx_flags = 0;
+ 	unsigned int len = skb_headlen(skb);
+ 	unsigned int nr_frags;
+@@ -4915,18 +4919,8 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
+ 	}
+ 
+ 	mss = skb_shinfo(skb)->gso_size;
+-	/*
+-	 * The controller does a simple calculation to
+-	 * make sure there is enough room in the FIFO before
+-	 * initiating the DMA for each buffer.  The calc is:
+-	 * 4 = ceil(buffer len/mss).  To make sure we don't
+-	 * overrun the FIFO, adjust the max buffer len if mss
+-	 * drops.
+-	 */
+ 	if (mss) {
+ 		u8 hdr_len;
+-		max_per_txd = min(mss << 2, max_per_txd);
+-		max_txd_pwr = fls(max_per_txd) - 1;
+ 
+ 		/*
+ 		 * TSO Workaround for 82571/2/3 Controllers -- if skb->data
+@@ -4956,12 +4950,12 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
+ 		count++;
+ 	count++;
+ 
+-	count += TXD_USE_COUNT(len, max_txd_pwr);
++	count += DIV_ROUND_UP(len, adapter->tx_fifo_limit);
+ 
+ 	nr_frags = skb_shinfo(skb)->nr_frags;
+ 	for (f = 0; f < nr_frags; f++)
+-		count += TXD_USE_COUNT(skb_frag_size(&skb_shinfo(skb)->frags[f]),
+-				       max_txd_pwr);
++		count += DIV_ROUND_UP(skb_frag_size(&skb_shinfo(skb)->frags[f]),
++				      adapter->tx_fifo_limit);
+ 
+ 	if (adapter->hw.mac.tx_pkt_filtering)
+ 		e1000_transfer_dhcp_info(adapter, skb);
+@@ -5000,12 +4994,15 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
+ 		tx_flags |= E1000_TX_FLAGS_IPV4;
+ 
+ 	/* if count is 0 then mapping error has occurred */
+-	count = e1000_tx_map(adapter, skb, first, max_per_txd, nr_frags, mss);
++	count = e1000_tx_map(adapter, skb, first, adapter->tx_fifo_limit,
++			     nr_frags);
+ 	if (count) {
+ 		e1000_tx_queue(adapter, tx_flags, count);
+ 		/* Make sure there is space in the ring for the next send. */
+-		e1000_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 2);
+-
++		e1000_maybe_stop_tx(netdev,
++				    (MAX_SKB_FRAGS *
++				     DIV_ROUND_UP(PAGE_SIZE,
++						  adapter->tx_fifo_limit) + 2));
+ 	} else {
+ 		dev_kfree_skb_any(skb);
+ 		tx_ring->buffer_info[first].time_stamp = 0;
+@@ -6150,8 +6147,8 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
+ 	adapter->hw.phy.autoneg_advertised = 0x2f;
+ 
+ 	/* ring size defaults */
+-	adapter->rx_ring->count = 256;
+-	adapter->tx_ring->count = 256;
++	adapter->rx_ring->count = E1000_DEFAULT_RXD;
++	adapter->tx_ring->count = E1000_DEFAULT_TXD;
+ 
+ 	/*
+ 	 * Initial Wake on LAN setting - If APM wake is enabled in
+diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
+index d5731f1..a6611f1 100644
+--- a/drivers/net/ethernet/sfc/efx.c
++++ b/drivers/net/ethernet/sfc/efx.c
+@@ -1383,6 +1383,11 @@ static int efx_probe_all(struct efx_nic *efx)
+ 		goto fail2;
+ 	}
+ 
++	BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT);
++	if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) {
++		rc = -EINVAL;
++		goto fail3;
++	}
+ 	efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE;
+ 	rc = efx_probe_channels(efx);
+ 	if (rc)
+@@ -1973,6 +1978,7 @@ static int efx_register_netdev(struct efx_nic *efx)
+ 	net_dev->irq = efx->pci_dev->irq;
+ 	net_dev->netdev_ops = &efx_netdev_ops;
+ 	SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops);
++	net_dev->gso_max_segs = EFX_TSO_MAX_SEGS;
+ 
+ 	/* Clear MAC statistics */
+ 	efx->mac_op->update_stats(efx);
+diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
+index 4764793..1355245 100644
+--- a/drivers/net/ethernet/sfc/efx.h
++++ b/drivers/net/ethernet/sfc/efx.h
+@@ -34,6 +34,7 @@ extern netdev_tx_t
+ efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
+ extern void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
+ extern int efx_setup_tc(struct net_device *net_dev, u8 num_tc);
++extern unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
+ 
+ /* RX */
+ extern int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
+@@ -56,10 +57,15 @@ extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
+ #define EFX_MAX_EVQ_SIZE 16384UL
+ #define EFX_MIN_EVQ_SIZE 512UL
+ 
+-/* The smallest [rt]xq_entries that the driver supports. Callers of
+- * efx_wake_queue() assume that they can subsequently send at least one
+- * skb. Falcon/A1 may require up to three descriptors per skb_frag. */
+-#define EFX_MIN_RING_SIZE (roundup_pow_of_two(2 * 3 * MAX_SKB_FRAGS))
++/* Maximum number of TCP segments we support for soft-TSO */
++#define EFX_TSO_MAX_SEGS	100
++
++/* The smallest [rt]xq_entries that the driver supports.  RX minimum
++ * is a bit arbitrary.  For TX, we must have space for at least 2
++ * TSO skbs.
++ */
++#define EFX_RXQ_MIN_ENT		128U
++#define EFX_TXQ_MIN_ENT(efx)	(2 * efx_tx_max_skb_descs(efx))
+ 
+ /* Filters */
+ extern int efx_probe_filters(struct efx_nic *efx);
+diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
+index f3cd96d..90158c9 100644
+--- a/drivers/net/ethernet/sfc/ethtool.c
++++ b/drivers/net/ethernet/sfc/ethtool.c
+@@ -690,21 +690,27 @@ static int efx_ethtool_set_ringparam(struct net_device *net_dev,
+ 				     struct ethtool_ringparam *ring)
+ {
+ 	struct efx_nic *efx = netdev_priv(net_dev);
++	u32 txq_entries;
+ 
+ 	if (ring->rx_mini_pending || ring->rx_jumbo_pending ||
+ 	    ring->rx_pending > EFX_MAX_DMAQ_SIZE ||
+ 	    ring->tx_pending > EFX_MAX_DMAQ_SIZE)
+ 		return -EINVAL;
+ 
+-	if (ring->rx_pending < EFX_MIN_RING_SIZE ||
+-	    ring->tx_pending < EFX_MIN_RING_SIZE) {
++	if (ring->rx_pending < EFX_RXQ_MIN_ENT) {
+ 		netif_err(efx, drv, efx->net_dev,
+-			  "TX and RX queues cannot be smaller than %ld\n",
+-			  EFX_MIN_RING_SIZE);
++			  "RX queues cannot be smaller than %u\n",
++			  EFX_RXQ_MIN_ENT);
+ 		return -EINVAL;
+ 	}
+ 
+-	return efx_realloc_channels(efx, ring->rx_pending, ring->tx_pending);
++	txq_entries = max(ring->tx_pending, EFX_TXQ_MIN_ENT(efx));
++	if (txq_entries != ring->tx_pending)
++		netif_warn(efx, drv, efx->net_dev,
++			   "increasing TX queue size to minimum of %u\n",
++			   txq_entries);
++
++	return efx_realloc_channels(efx, ring->rx_pending, txq_entries);
+ }
+ 
+ static int efx_ethtool_set_pauseparam(struct net_device *net_dev,
+diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
+index 5fb24d3..66ece48 100644
+--- a/drivers/net/ethernet/sfc/nic.h
++++ b/drivers/net/ethernet/sfc/nic.h
+@@ -65,6 +65,9 @@ enum {
+ #define FALCON_GMAC_LOOPBACKS			\
+ 	(1 << LOOPBACK_GMAC)
+ 
++/* Alignment of PCIe DMA boundaries (4KB) */
++#define EFX_PAGE_SIZE	4096
++
+ /**
+  * struct falcon_board_type - board operations and type information
+  * @id: Board type id, as found in NVRAM
+diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
+index df88c543..807d515 100644
+--- a/drivers/net/ethernet/sfc/tx.c
++++ b/drivers/net/ethernet/sfc/tx.c
+@@ -115,6 +115,25 @@ efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr)
+ 	return len;
+ }
+ 
++unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
++{
++	/* Header and payload descriptor for each output segment, plus
++	 * one for every input fragment boundary within a segment
++	 */
++	unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
++
++	/* Possibly one more per segment for the alignment workaround */
++	if (EFX_WORKAROUND_5391(efx))
++		max_descs += EFX_TSO_MAX_SEGS;
++
++	/* Possibly more for PCIe page boundaries within input fragments */
++	if (PAGE_SIZE > EFX_PAGE_SIZE)
++		max_descs += max_t(unsigned int, MAX_SKB_FRAGS,
++				   DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE));
++
++	return max_descs;
++}
++
+ /*
+  * Add a socket buffer to a TX queue
+  *
+diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
+index f8a6853..ad6a9d9 100644
+--- a/drivers/net/ppp/pptp.c
++++ b/drivers/net/ppp/pptp.c
+@@ -189,7 +189,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+ 	if (sk_pppox(po)->sk_state & PPPOX_DEAD)
+ 		goto tx_error;
+ 
+-	rt = ip_route_output_ports(&init_net, &fl4, NULL,
++	rt = ip_route_output_ports(sock_net(sk), &fl4, NULL,
+ 				   opt->dst_addr.sin_addr.s_addr,
+ 				   opt->src_addr.sin_addr.s_addr,
+ 				   0, 0, IPPROTO_GRE,
+@@ -468,7 +468,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr,
+ 	po->chan.private = sk;
+ 	po->chan.ops = &pptp_chan_ops;
+ 
+-	rt = ip_route_output_ports(&init_net, &fl4, sk,
++	rt = ip_route_output_ports(sock_net(sk), &fl4, sk,
+ 				   opt->dst_addr.sin_addr.s_addr,
+ 				   opt->src_addr.sin_addr.s_addr,
+ 				   0, 0,
+diff --git a/drivers/net/wireless/iwlwifi/iwl-debugfs.c b/drivers/net/wireless/iwlwifi/iwl-debugfs.c
+index a1670e3..93e6179 100644
+--- a/drivers/net/wireless/iwlwifi/iwl-debugfs.c
++++ b/drivers/net/wireless/iwlwifi/iwl-debugfs.c
+@@ -232,6 +232,9 @@ static ssize_t iwl_dbgfs_sram_read(struct file *file,
+ 	struct iwl_priv *priv = file->private_data;
+ 	size_t bufsz;
+ 
++	if (!iwl_is_ready_rf(priv->shrd))
++		return -EAGAIN;
++
+ 	/* default is to dump the entire data segment */
+ 	if (!priv->dbgfs_sram_offset && !priv->dbgfs_sram_len) {
+ 		priv->dbgfs_sram_offset = 0x800000;
+diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-int.h b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-int.h
+index 5c29281..8533ba2 100644
+--- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-int.h
++++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-int.h
+@@ -303,7 +303,7 @@ int iwl_queue_space(const struct iwl_queue *q);
+ ******************************************************/
+ int iwl_dump_nic_event_log(struct iwl_trans *trans, bool full_log,
+ 			    char **buf, bool display);
+-int iwl_dump_fh(struct iwl_trans *trans, char **buf, bool display);
++int iwl_dump_fh(struct iwl_trans *trans, char **buf);
+ void iwl_dump_csr(struct iwl_trans *trans);
+ 
+ /*****************************************************
+diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-rx.c b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-rx.c
+index 1daf01e..17fb25d 100644
+--- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie-rx.c
++++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie-rx.c
+@@ -678,7 +678,7 @@ static void iwl_irq_handle_error(struct iwl_trans *trans)
+ 
+ 	iwl_dump_nic_error_log(trans);
+ 	iwl_dump_csr(trans);
+-	iwl_dump_fh(trans, NULL, false);
++	iwl_dump_fh(trans, NULL);
+ 	iwl_dump_nic_event_log(trans, false, NULL, false);
+ #ifdef CONFIG_IWLWIFI_DEBUG
+ 	if (iwl_get_debug_level(trans->shrd) & IWL_DL_FW_ERRORS)
+diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c b/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
+index 4661a64..75da4bc 100644
+--- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
++++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
+@@ -1541,13 +1541,9 @@ static const char *get_fh_string(int cmd)
+ 	}
+ }
+ 
+-int iwl_dump_fh(struct iwl_trans *trans, char **buf, bool display)
++int iwl_dump_fh(struct iwl_trans *trans, char **buf)
+ {
+ 	int i;
+-#ifdef CONFIG_IWLWIFI_DEBUG
+-	int pos = 0;
+-	size_t bufsz = 0;
+-#endif
+ 	static const u32 fh_tbl[] = {
+ 		FH_RSCSR_CHNL0_STTS_WPTR_REG,
+ 		FH_RSCSR_CHNL0_RBDCB_BASE_REG,
+@@ -1559,29 +1555,35 @@ int iwl_dump_fh(struct iwl_trans *trans, char **buf, bool display)
+ 		FH_TSSR_TX_STATUS_REG,
+ 		FH_TSSR_TX_ERROR_REG
+ 	};
+-#ifdef CONFIG_IWLWIFI_DEBUG
+-	if (display) {
+-		bufsz = ARRAY_SIZE(fh_tbl) * 48 + 40;
++
++#ifdef CONFIG_IWLWIFI_DEBUGFS
++	if (buf) {
++		int pos = 0;
++		size_t bufsz = ARRAY_SIZE(fh_tbl) * 48 + 40;
++
+ 		*buf = kmalloc(bufsz, GFP_KERNEL);
+ 		if (!*buf)
+ 			return -ENOMEM;
++
+ 		pos += scnprintf(*buf + pos, bufsz - pos,
+ 				"FH register values:\n");
+-		for (i = 0; i < ARRAY_SIZE(fh_tbl); i++) {
++
++		for (i = 0; i < ARRAY_SIZE(fh_tbl); i++)
+ 			pos += scnprintf(*buf + pos, bufsz - pos,
+ 				"  %34s: 0X%08x\n",
+ 				get_fh_string(fh_tbl[i]),
+ 				iwl_read_direct32(bus(trans), fh_tbl[i]));
+-		}
++
+ 		return pos;
+ 	}
+ #endif
++
+ 	IWL_ERR(trans, "FH register values:\n");
+-	for (i = 0; i <  ARRAY_SIZE(fh_tbl); i++) {
++	for (i = 0; i <  ARRAY_SIZE(fh_tbl); i++)
+ 		IWL_ERR(trans, "  %34s: 0X%08x\n",
+ 			get_fh_string(fh_tbl[i]),
+ 			iwl_read_direct32(bus(trans), fh_tbl[i]));
+-	}
++
+ 	return 0;
+ }
+ 
+@@ -1929,11 +1931,11 @@ static ssize_t iwl_dbgfs_fh_reg_read(struct file *file,
+ 					 size_t count, loff_t *ppos)
+ {
+ 	struct iwl_trans *trans = file->private_data;
+-	char *buf;
++	char *buf = NULL;
+ 	int pos = 0;
+ 	ssize_t ret = -EFAULT;
+ 
+-	ret = pos = iwl_dump_fh(trans, &buf, true);
++	ret = pos = iwl_dump_fh(trans, &buf);
+ 	if (buf) {
+ 		ret = simple_read_from_buffer(user_buf,
+ 					      count, ppos, buf, pos);
+diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c
+index 3a6b402..0ea85f4 100644
+--- a/drivers/net/wireless/rt2x00/rt2400pci.c
++++ b/drivers/net/wireless/rt2x00/rt2400pci.c
+@@ -1611,6 +1611,7 @@ static int rt2400pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
+ static int rt2400pci_probe_hw(struct rt2x00_dev *rt2x00dev)
+ {
+ 	int retval;
++	u32 reg;
+ 
+ 	/*
+ 	 * Allocate eeprom data.
+@@ -1624,6 +1625,14 @@ static int rt2400pci_probe_hw(struct rt2x00_dev *rt2x00dev)
+ 		return retval;
+ 
+ 	/*
++	 * Enable rfkill polling by setting GPIO direction of the
++	 * rfkill switch GPIO pin correctly.
++	 */
++	rt2x00pci_register_read(rt2x00dev, GPIOCSR, &reg);
++	rt2x00_set_field32(&reg, GPIOCSR_BIT8, 1);
++	rt2x00pci_register_write(rt2x00dev, GPIOCSR, reg);
++
++	/*
+ 	 * Initialize hw specifications.
+ 	 */
+ 	retval = rt2400pci_probe_hw_mode(rt2x00dev);
+diff --git a/drivers/net/wireless/rt2x00/rt2400pci.h b/drivers/net/wireless/rt2x00/rt2400pci.h
+index d3a4a68..7564ae9 100644
+--- a/drivers/net/wireless/rt2x00/rt2400pci.h
++++ b/drivers/net/wireless/rt2x00/rt2400pci.h
+@@ -670,6 +670,7 @@
+ #define GPIOCSR_BIT5			FIELD32(0x00000020)
+ #define GPIOCSR_BIT6			FIELD32(0x00000040)
+ #define GPIOCSR_BIT7			FIELD32(0x00000080)
++#define GPIOCSR_BIT8			FIELD32(0x00000100)
+ 
+ /*
+  * BBPPCSR: BBP Pin control register.
+diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c
+index dcc0e1f..aa10c48 100644
+--- a/drivers/net/wireless/rt2x00/rt2500pci.c
++++ b/drivers/net/wireless/rt2x00/rt2500pci.c
+@@ -1929,6 +1929,7 @@ static int rt2500pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
+ static int rt2500pci_probe_hw(struct rt2x00_dev *rt2x00dev)
+ {
+ 	int retval;
++	u32 reg;
+ 
+ 	/*
+ 	 * Allocate eeprom data.
+@@ -1942,6 +1943,14 @@ static int rt2500pci_probe_hw(struct rt2x00_dev *rt2x00dev)
+ 		return retval;
+ 
+ 	/*
++	 * Enable rfkill polling by setting GPIO direction of the
++	 * rfkill switch GPIO pin correctly.
++	 */
++	rt2x00pci_register_read(rt2x00dev, GPIOCSR, &reg);
++	rt2x00_set_field32(&reg, GPIOCSR_DIR0, 1);
++	rt2x00pci_register_write(rt2x00dev, GPIOCSR, reg);
++
++	/*
+ 	 * Initialize hw specifications.
+ 	 */
+ 	retval = rt2500pci_probe_hw_mode(rt2x00dev);
+diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c
+index 53c5f87..22ed6df 100644
+--- a/drivers/net/wireless/rt2x00/rt2500usb.c
++++ b/drivers/net/wireless/rt2x00/rt2500usb.c
+@@ -283,7 +283,7 @@ static int rt2500usb_rfkill_poll(struct rt2x00_dev *rt2x00dev)
+ 	u16 reg;
+ 
+ 	rt2500usb_register_read(rt2x00dev, MAC_CSR19, &reg);
+-	return rt2x00_get_field32(reg, MAC_CSR19_BIT7);
++	return rt2x00_get_field16(reg, MAC_CSR19_BIT7);
+ }
+ 
+ #ifdef CONFIG_RT2X00_LIB_LEDS
+@@ -1768,6 +1768,7 @@ static int rt2500usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
+ static int rt2500usb_probe_hw(struct rt2x00_dev *rt2x00dev)
+ {
+ 	int retval;
++	u16 reg;
+ 
+ 	/*
+ 	 * Allocate eeprom data.
+@@ -1781,6 +1782,14 @@ static int rt2500usb_probe_hw(struct rt2x00_dev *rt2x00dev)
+ 		return retval;
+ 
+ 	/*
++	 * Enable rfkill polling by setting GPIO direction of the
++	 * rfkill switch GPIO pin correctly.
++	 */
++	rt2500usb_register_read(rt2x00dev, MAC_CSR19, &reg);
++	rt2x00_set_field16(&reg, MAC_CSR19_BIT8, 0);
++	rt2500usb_register_write(rt2x00dev, MAC_CSR19, reg);
++
++	/*
+ 	 * Initialize hw specifications.
+ 	 */
+ 	retval = rt2500usb_probe_hw_mode(rt2x00dev);
+diff --git a/drivers/net/wireless/rt2x00/rt2500usb.h b/drivers/net/wireless/rt2x00/rt2500usb.h
+index b493306..196bd51 100644
+--- a/drivers/net/wireless/rt2x00/rt2500usb.h
++++ b/drivers/net/wireless/rt2x00/rt2500usb.h
+@@ -189,14 +189,15 @@
+  * MAC_CSR19: GPIO control register.
+  */
+ #define MAC_CSR19			0x0426
+-#define MAC_CSR19_BIT0			FIELD32(0x0001)
+-#define MAC_CSR19_BIT1			FIELD32(0x0002)
+-#define MAC_CSR19_BIT2			FIELD32(0x0004)
+-#define MAC_CSR19_BIT3			FIELD32(0x0008)
+-#define MAC_CSR19_BIT4			FIELD32(0x0010)
+-#define MAC_CSR19_BIT5			FIELD32(0x0020)
+-#define MAC_CSR19_BIT6			FIELD32(0x0040)
+-#define MAC_CSR19_BIT7			FIELD32(0x0080)
++#define MAC_CSR19_BIT0			FIELD16(0x0001)
++#define MAC_CSR19_BIT1			FIELD16(0x0002)
++#define MAC_CSR19_BIT2			FIELD16(0x0004)
++#define MAC_CSR19_BIT3			FIELD16(0x0008)
++#define MAC_CSR19_BIT4			FIELD16(0x0010)
++#define MAC_CSR19_BIT5			FIELD16(0x0020)
++#define MAC_CSR19_BIT6			FIELD16(0x0040)
++#define MAC_CSR19_BIT7			FIELD16(0x0080)
++#define MAC_CSR19_BIT8			FIELD16(0x0100)
+ 
+ /*
+  * MAC_CSR20: LED control register.
+diff --git a/drivers/net/wireless/rt2x00/rt2800pci.c b/drivers/net/wireless/rt2x00/rt2800pci.c
+index 837b460..518157d 100644
+--- a/drivers/net/wireless/rt2x00/rt2800pci.c
++++ b/drivers/net/wireless/rt2x00/rt2800pci.c
+@@ -935,6 +935,7 @@ static int rt2800pci_validate_eeprom(struct rt2x00_dev *rt2x00dev)
+ static int rt2800pci_probe_hw(struct rt2x00_dev *rt2x00dev)
+ {
+ 	int retval;
++	u32 reg;
+ 
+ 	/*
+ 	 * Allocate eeprom data.
+@@ -948,6 +949,14 @@ static int rt2800pci_probe_hw(struct rt2x00_dev *rt2x00dev)
+ 		return retval;
+ 
+ 	/*
++	 * Enable rfkill polling by setting GPIO direction of the
++	 * rfkill switch GPIO pin correctly.
++	 */
++	rt2x00pci_register_read(rt2x00dev, GPIO_CTRL_CFG, &reg);
++	rt2x00_set_field32(&reg, GPIO_CTRL_CFG_GPIOD_BIT2, 1);
++	rt2x00pci_register_write(rt2x00dev, GPIO_CTRL_CFG, reg);
++
++	/*
+ 	 * Initialize hw specifications.
+ 	 */
+ 	retval = rt2800_probe_hw_mode(rt2x00dev);
+diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
+index ae7528b..b66a61b 100644
+--- a/drivers/net/wireless/rt2x00/rt2800usb.c
++++ b/drivers/net/wireless/rt2x00/rt2800usb.c
+@@ -621,8 +621,16 @@ static void rt2800usb_fill_rxdone(struct queue_entry *entry,
+ 	skb_pull(entry->skb, RXINFO_DESC_SIZE);
+ 
+ 	/*
+-	 * FIXME: we need to check for rx_pkt_len validity
++	 * Check for rx_pkt_len validity. Return if invalid, leaving
++	 * rxdesc->size zeroed out by the upper level.
+ 	 */
++	if (unlikely(rx_pkt_len == 0 ||
++			rx_pkt_len > entry->queue->data_size)) {
++		ERROR(entry->queue->rt2x00dev,
++			"Bad frame size %d, forcing to 0\n", rx_pkt_len);
++		return;
++	}
++
+ 	rxd = (__le32 *)(entry->skb->data + rx_pkt_len);
+ 
+ 	/*
+@@ -690,6 +698,7 @@ static int rt2800usb_validate_eeprom(struct rt2x00_dev *rt2x00dev)
+ static int rt2800usb_probe_hw(struct rt2x00_dev *rt2x00dev)
+ {
+ 	int retval;
++	u32 reg;
+ 
+ 	/*
+ 	 * Allocate eeprom data.
+@@ -703,6 +712,14 @@ static int rt2800usb_probe_hw(struct rt2x00_dev *rt2x00dev)
+ 		return retval;
+ 
+ 	/*
++	 * Enable rfkill polling by setting GPIO direction of the
++	 * rfkill switch GPIO pin correctly.
++	 */
++	rt2x00usb_register_read(rt2x00dev, GPIO_CTRL_CFG, &reg);
++	rt2x00_set_field32(&reg, GPIO_CTRL_CFG_GPIOD_BIT2, 1);
++	rt2x00usb_register_write(rt2x00dev, GPIO_CTRL_CFG, reg);
++
++	/*
+ 	 * Initialize hw specifications.
+ 	 */
+ 	retval = rt2800_probe_hw_mode(rt2x00dev);
+@@ -1111,6 +1128,8 @@ static struct usb_device_id rt2800usb_device_table[] = {
+ 	{ USB_DEVICE(0x1690, 0x0744) },
+ 	{ USB_DEVICE(0x1690, 0x0761) },
+ 	{ USB_DEVICE(0x1690, 0x0764) },
++	/* ASUS */
++	{ USB_DEVICE(0x0b05, 0x179d) },
+ 	/* Cisco */
+ 	{ USB_DEVICE(0x167b, 0x4001) },
+ 	/* EnGenius */
+@@ -1163,7 +1182,6 @@ static struct usb_device_id rt2800usb_device_table[] = {
+ 	{ USB_DEVICE(0x0b05, 0x1760) },
+ 	{ USB_DEVICE(0x0b05, 0x1761) },
+ 	{ USB_DEVICE(0x0b05, 0x1790) },
+-	{ USB_DEVICE(0x0b05, 0x179d) },
+ 	/* AzureWave */
+ 	{ USB_DEVICE(0x13d3, 0x3262) },
+ 	{ USB_DEVICE(0x13d3, 0x3284) },
+diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
+index 21b529b..f099b30 100644
+--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
++++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
+@@ -624,7 +624,7 @@ void rt2x00lib_rxdone(struct queue_entry *entry)
+ 	 */
+ 	if (unlikely(rxdesc.size == 0 ||
+ 		     rxdesc.size > entry->queue->data_size)) {
+-		WARNING(rt2x00dev, "Wrong frame size %d max %d.\n",
++		ERROR(rt2x00dev, "Wrong frame size %d max %d.\n",
+ 			rxdesc.size, entry->queue->data_size);
+ 		dev_kfree_skb(entry->skb);
+ 		goto renew_skb;
+diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c
+index d69f88c..3e058e5 100644
+--- a/drivers/net/wireless/rt2x00/rt61pci.c
++++ b/drivers/net/wireless/rt2x00/rt61pci.c
+@@ -2832,6 +2832,7 @@ static int rt61pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
+ static int rt61pci_probe_hw(struct rt2x00_dev *rt2x00dev)
+ {
+ 	int retval;
++	u32 reg;
+ 
+ 	/*
+ 	 * Disable power saving.
+@@ -2850,6 +2851,14 @@ static int rt61pci_probe_hw(struct rt2x00_dev *rt2x00dev)
+ 		return retval;
+ 
+ 	/*
++	 * Enable rfkill polling by setting GPIO direction of the
++	 * rfkill switch GPIO pin correctly.
++	 */
++	rt2x00pci_register_read(rt2x00dev, MAC_CSR13, &reg);
++	rt2x00_set_field32(&reg, MAC_CSR13_BIT13, 1);
++	rt2x00pci_register_write(rt2x00dev, MAC_CSR13, reg);
++
++	/*
+ 	 * Initialize hw specifications.
+ 	 */
+ 	retval = rt61pci_probe_hw_mode(rt2x00dev);
+diff --git a/drivers/net/wireless/rt2x00/rt61pci.h b/drivers/net/wireless/rt2x00/rt61pci.h
+index e3cd6db..8f3da5a 100644
+--- a/drivers/net/wireless/rt2x00/rt61pci.h
++++ b/drivers/net/wireless/rt2x00/rt61pci.h
+@@ -372,6 +372,7 @@ struct hw_pairwise_ta_entry {
+ #define MAC_CSR13_BIT10			FIELD32(0x00000400)
+ #define MAC_CSR13_BIT11			FIELD32(0x00000800)
+ #define MAC_CSR13_BIT12			FIELD32(0x00001000)
++#define MAC_CSR13_BIT13			FIELD32(0x00002000)
+ 
+ /*
+  * MAC_CSR14: LED control register.
+diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c
+index cfb19db..2ad468d 100644
+--- a/drivers/net/wireless/rt2x00/rt73usb.c
++++ b/drivers/net/wireless/rt2x00/rt73usb.c
+@@ -2177,6 +2177,7 @@ static int rt73usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
+ static int rt73usb_probe_hw(struct rt2x00_dev *rt2x00dev)
+ {
+ 	int retval;
++	u32 reg;
+ 
+ 	/*
+ 	 * Allocate eeprom data.
+@@ -2190,6 +2191,14 @@ static int rt73usb_probe_hw(struct rt2x00_dev *rt2x00dev)
+ 		return retval;
+ 
+ 	/*
++	 * Enable rfkill polling by setting GPIO direction of the
++	 * rfkill switch GPIO pin correctly.
++	 */
++	rt2x00usb_register_read(rt2x00dev, MAC_CSR13, &reg);
++	rt2x00_set_field32(&reg, MAC_CSR13_BIT15, 0);
++	rt2x00usb_register_write(rt2x00dev, MAC_CSR13, reg);
++
++	/*
+ 	 * Initialize hw specifications.
+ 	 */
+ 	retval = rt73usb_probe_hw_mode(rt2x00dev);
+diff --git a/drivers/net/wireless/rt2x00/rt73usb.h b/drivers/net/wireless/rt2x00/rt73usb.h
+index 9f6b470..df1cc11 100644
+--- a/drivers/net/wireless/rt2x00/rt73usb.h
++++ b/drivers/net/wireless/rt2x00/rt73usb.h
+@@ -282,6 +282,9 @@ struct hw_pairwise_ta_entry {
+ #define MAC_CSR13_BIT10			FIELD32(0x00000400)
+ #define MAC_CSR13_BIT11			FIELD32(0x00000800)
+ #define MAC_CSR13_BIT12			FIELD32(0x00001000)
++#define MAC_CSR13_BIT13			FIELD32(0x00002000)
++#define MAC_CSR13_BIT14			FIELD32(0x00004000)
++#define MAC_CSR13_BIT15			FIELD32(0x00008000)
+ 
+ /*
+  * MAC_CSR14: LED control register.
+diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
+index 29a994f..7c471eb 100644
+--- a/drivers/scsi/megaraid/megaraid_sas_base.c
++++ b/drivers/scsi/megaraid/megaraid_sas_base.c
+@@ -4125,7 +4125,6 @@ megasas_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
+ 	spin_lock_init(&instance->cmd_pool_lock);
+ 	spin_lock_init(&instance->hba_lock);
+ 	spin_lock_init(&instance->completion_lock);
+-	spin_lock_init(&poll_aen_lock);
+ 
+ 	mutex_init(&instance->aen_mutex);
+ 	mutex_init(&instance->reset_mutex);
+@@ -5520,6 +5519,8 @@ static int __init megasas_init(void)
+ 	printk(KERN_INFO "megasas: %s %s\n", MEGASAS_VERSION,
+ 	       MEGASAS_EXT_VERSION);
+ 
++	spin_lock_init(&poll_aen_lock);
++
+ 	support_poll_for_event = 2;
+ 	support_device_change = 1;
+ 
+diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
+index e903077..98cb5e6 100644
+--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
++++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
+@@ -2353,10 +2353,13 @@ _base_allocate_memory_pools(struct MPT2SAS_ADAPTER *ioc,  int sleep_flag)
+ 	}
+ 
+ 	/* command line tunables  for max controller queue depth */
+-	if (max_queue_depth != -1)
+-		max_request_credit = (max_queue_depth < facts->RequestCredit)
+-		    ? max_queue_depth : facts->RequestCredit;
+-	else
++	if (max_queue_depth != -1 && max_queue_depth != 0) {
++		max_request_credit = min_t(u16, max_queue_depth +
++			ioc->hi_priority_depth + ioc->internal_depth,
++			facts->RequestCredit);
++		if (max_request_credit > MAX_HBA_QUEUE_DEPTH)
++			max_request_credit =  MAX_HBA_QUEUE_DEPTH;
++	} else
+ 		max_request_credit = min_t(u16, facts->RequestCredit,
+ 		    MAX_HBA_QUEUE_DEPTH);
+ 
+@@ -2431,7 +2434,7 @@ _base_allocate_memory_pools(struct MPT2SAS_ADAPTER *ioc,  int sleep_flag)
+ 	/* set the scsi host can_queue depth
+ 	 * with some internal commands that could be outstanding
+ 	 */
+-	ioc->shost->can_queue = ioc->scsiio_depth - (2);
++	ioc->shost->can_queue = ioc->scsiio_depth;
+ 	dinitprintk(ioc, printk(MPT2SAS_INFO_FMT "scsi host: "
+ 	    "can_queue depth (%d)\n", ioc->name, ioc->shost->can_queue));
+ 
+diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
+index 456b131..c83571e 100644
+--- a/drivers/scsi/scsi_error.c
++++ b/drivers/scsi/scsi_error.c
+@@ -41,6 +41,8 @@
+ 
+ #include <trace/events/scsi.h>
+ 
++static void scsi_eh_done(struct scsi_cmnd *scmd);
++
+ #define SENSE_TIMEOUT		(10*HZ)
+ 
+ /*
+@@ -240,6 +242,14 @@ static int scsi_check_sense(struct scsi_cmnd *scmd)
+ 	if (! scsi_command_normalize_sense(scmd, &sshdr))
+ 		return FAILED;	/* no valid sense data */
+ 
++	if (scmd->cmnd[0] == TEST_UNIT_READY && scmd->scsi_done != scsi_eh_done)
++		/*
++		 * nasty: for mid-layer issued TURs, we need to return the
++		 * actual sense data without any recovery attempt.  For eh
++		 * issued ones, we need to try to recover and interpret
++		 */
++		return SUCCESS;
++
+ 	if (scsi_sense_is_deferred(&sshdr))
+ 		return NEEDS_RETRY;
+ 
+diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
+index a48b59c..c6c80c9 100644
+--- a/drivers/scsi/scsi_scan.c
++++ b/drivers/scsi/scsi_scan.c
+@@ -776,6 +776,16 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
+ 	sdev->model = (char *) (sdev->inquiry + 16);
+ 	sdev->rev = (char *) (sdev->inquiry + 32);
+ 
++	if (strncmp(sdev->vendor, "ATA     ", 8) == 0) {
++		/*
++		 * sata emulation layer device.  This is a hack to work around
++		 * the SATL power management specifications which state that
++		 * when the SATL detects the device has gone into standby
++		 * mode, it shall respond with NOT READY.
++		 */
++		sdev->allow_restart = 1;
++	}
++
+ 	if (*bflags & BLIST_ISROM) {
+ 		sdev->type = TYPE_ROM;
+ 		sdev->removable = 1;
+diff --git a/drivers/staging/comedi/drivers/das08.c b/drivers/staging/comedi/drivers/das08.c
+index 3141dc8..a48fe88 100644
+--- a/drivers/staging/comedi/drivers/das08.c
++++ b/drivers/staging/comedi/drivers/das08.c
+@@ -385,7 +385,7 @@ static const struct das08_board_struct das08_boards[] = {
+ 	 .ai = das08_ai_rinsn,
+ 	 .ai_nbits = 16,
+ 	 .ai_pg = das08_pg_none,
+-	 .ai_encoding = das08_encode12,
++	 .ai_encoding = das08_encode16,
+ 	 .ao = das08jr_ao_winsn,
+ 	 .ao_nbits = 16,
+ 	 .di = das08jr_di_rbits,
+@@ -655,7 +655,7 @@ static int das08jr_ao_winsn(struct comedi_device *dev,
+ 	int chan;
+ 
+ 	lsb = data[0] & 0xff;
+-	msb = (data[0] >> 8) & 0xf;
++	msb = (data[0] >> 8) & 0xff;
+ 
+ 	chan = CR_CHAN(insn->chanspec);
+ 
+diff --git a/drivers/staging/rtl8712/recv_linux.c b/drivers/staging/rtl8712/recv_linux.c
+index 0e26d5f..495ee12 100644
+--- a/drivers/staging/rtl8712/recv_linux.c
++++ b/drivers/staging/rtl8712/recv_linux.c
+@@ -117,13 +117,8 @@ void r8712_recv_indicatepkt(struct _adapter *padapter,
+ 	if (skb == NULL)
+ 		goto _recv_indicatepkt_drop;
+ 	skb->data = precv_frame->u.hdr.rx_data;
+-#ifdef NET_SKBUFF_DATA_USES_OFFSET
+-	skb->tail = (sk_buff_data_t)(precv_frame->u.hdr.rx_tail -
+-		     precv_frame->u.hdr.rx_head);
+-#else
+-	skb->tail = (sk_buff_data_t)precv_frame->u.hdr.rx_tail;
+-#endif
+ 	skb->len = precv_frame->u.hdr.len;
++	skb_set_tail_pointer(skb, skb->len);
+ 	if ((pattrib->tcpchk_valid == 1) && (pattrib->tcp_chkrpt == 1))
+ 		skb->ip_summed = CHECKSUM_UNNECESSARY;
+ 	else
+diff --git a/drivers/staging/vt6656/dpc.c b/drivers/staging/vt6656/dpc.c
+index c0edf97..08021f4 100644
+--- a/drivers/staging/vt6656/dpc.c
++++ b/drivers/staging/vt6656/dpc.c
+@@ -200,7 +200,7 @@ s_vProcessRxMACHeader (
+     } else if (!compare_ether_addr(pbyRxBuffer, &pDevice->abySNAP_RFC1042[0])) {
+         cbHeaderSize += 6;
+         pwType = (PWORD) (pbyRxBufferAddr + cbHeaderSize);
+-	if ((*pwType == cpu_to_le16(ETH_P_IPX)) ||
++	if ((*pwType == cpu_to_be16(ETH_P_IPX)) ||
+ 	    (*pwType == cpu_to_le16(0xF380))) {
+ 		cbHeaderSize -= 8;
+             pwType = (PWORD) (pbyRxBufferAddr + cbHeaderSize);
+diff --git a/drivers/staging/vt6656/rxtx.c b/drivers/staging/vt6656/rxtx.c
+index 9b64b10..fe21868 100644
+--- a/drivers/staging/vt6656/rxtx.c
++++ b/drivers/staging/vt6656/rxtx.c
+@@ -1701,7 +1701,7 @@ s_bPacketToWirelessUsb(
+     // 802.1H
+     if (ntohs(psEthHeader->wType) > ETH_DATA_LEN) {
+ 	if (pDevice->dwDiagRefCount == 0) {
+-		if ((psEthHeader->wType == cpu_to_le16(ETH_P_IPX)) ||
++		if ((psEthHeader->wType == cpu_to_be16(ETH_P_IPX)) ||
+ 		    (psEthHeader->wType == cpu_to_le16(0xF380))) {
+ 			memcpy((PBYTE) (pbyPayloadHead),
+ 			       abySNAP_Bridgetunnel, 6);
+@@ -2840,10 +2840,10 @@ int nsDMA_tx_packet(PSDevice pDevice, unsigned int uDMAIdx, struct sk_buff *skb)
+     Packet_Type = skb->data[ETH_HLEN+1];
+     Descriptor_type = skb->data[ETH_HLEN+1+1+2];
+     Key_info = (skb->data[ETH_HLEN+1+1+2+1] << 8)|(skb->data[ETH_HLEN+1+1+2+2]);
+-    if (pDevice->sTxEthHeader.wType == cpu_to_le16(ETH_P_PAE)) {
+-	/* 802.1x OR eapol-key challenge frame transfer */
+-	if (((Protocol_Version == 1) || (Protocol_Version == 2)) &&
+-		(Packet_Type == 3)) {
++	if (pDevice->sTxEthHeader.wType == cpu_to_be16(ETH_P_PAE)) {
++		/* 802.1x OR eapol-key challenge frame transfer */
++		if (((Protocol_Version == 1) || (Protocol_Version == 2)) &&
++			(Packet_Type == 3)) {
+                         bTxeapol_key = TRUE;
+                        if(!(Key_info & BIT3) &&  //WPA or RSN group-key challenge
+ 			   (Key_info & BIT8) && (Key_info & BIT9)) {    //send 2/2 key
+@@ -2989,19 +2989,19 @@ int nsDMA_tx_packet(PSDevice pDevice, unsigned int uDMAIdx, struct sk_buff *skb)
+         }
+     }
+ 
+-    if (pDevice->sTxEthHeader.wType == cpu_to_le16(ETH_P_PAE)) {
+-        if (pDevice->byBBType != BB_TYPE_11A) {
+-            pDevice->wCurrentRate = RATE_1M;
+-            pDevice->byACKRate = RATE_1M;
+-            pDevice->byTopCCKBasicRate = RATE_1M;
+-            pDevice->byTopOFDMBasicRate = RATE_6M;
+-        } else {
+-            pDevice->wCurrentRate = RATE_6M;
+-            pDevice->byACKRate = RATE_6M;
+-            pDevice->byTopCCKBasicRate = RATE_1M;
+-            pDevice->byTopOFDMBasicRate = RATE_6M;
+-        }
+-    }
++	if (pDevice->sTxEthHeader.wType == cpu_to_be16(ETH_P_PAE)) {
++		if (pDevice->byBBType != BB_TYPE_11A) {
++			pDevice->wCurrentRate = RATE_1M;
++			pDevice->byACKRate = RATE_1M;
++			pDevice->byTopCCKBasicRate = RATE_1M;
++			pDevice->byTopOFDMBasicRate = RATE_6M;
++		} else {
++			pDevice->wCurrentRate = RATE_6M;
++			pDevice->byACKRate = RATE_6M;
++			pDevice->byTopCCKBasicRate = RATE_1M;
++			pDevice->byTopOFDMBasicRate = RATE_6M;
++		}
++	}
+ 
+     DBG_PRT(MSG_LEVEL_DEBUG,
+ 	    KERN_INFO "dma_tx: pDevice->wCurrentRate = %d\n",
+@@ -3017,7 +3017,7 @@ int nsDMA_tx_packet(PSDevice pDevice, unsigned int uDMAIdx, struct sk_buff *skb)
+ 
+     if (bNeedEncryption == TRUE) {
+         DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO"ntohs Pkt Type=%04x\n", ntohs(pDevice->sTxEthHeader.wType));
+-	if ((pDevice->sTxEthHeader.wType) == cpu_to_le16(ETH_P_PAE)) {
++	if ((pDevice->sTxEthHeader.wType) == cpu_to_be16(ETH_P_PAE)) {
+ 		bNeedEncryption = FALSE;
+             DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO"Pkt Type=%04x\n", (pDevice->sTxEthHeader.wType));
+             if ((pMgmt->eCurrMode == WMAC_MODE_ESS_STA) && (pMgmt->eCurrState == WMAC_STATE_ASSOC)) {
+diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c
+index 16ad9fe..4306475 100644
+--- a/drivers/staging/zcache/zcache-main.c
++++ b/drivers/staging/zcache/zcache-main.c
+@@ -1223,13 +1223,12 @@ static int zcache_pampd_get_data_and_free(char *data, size_t *bufsize, bool raw,
+ 					void *pampd, struct tmem_pool *pool,
+ 					struct tmem_oid *oid, uint32_t index)
+ {
+-	int ret = 0;
+-
+ 	BUG_ON(!is_ephemeral(pool));
+-	zbud_decompress((struct page *)(data), pampd);
++	if (zbud_decompress((struct page *)(data), pampd) < 0)
++		return -EINVAL;
+ 	zbud_free_and_delist((struct zbud_hdr *)pampd);
+ 	atomic_dec(&zcache_curr_eph_pampd_count);
+-	return ret;
++	return 0;
+ }
+ 
+ /*
+diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
+index 163fc90..8e68f79 100644
+--- a/drivers/tty/serial/imx.c
++++ b/drivers/tty/serial/imx.c
+@@ -130,6 +130,7 @@
+ #define  UCR4_OREN  	 (1<<1)  /* Receiver overrun interrupt enable */
+ #define  UCR4_DREN  	 (1<<0)  /* Recv data ready interrupt enable */
+ #define  UFCR_RXTL_SHF   0       /* Receiver trigger level shift */
++#define  UFCR_DCEDTE	 (1<<6)  /* DCE/DTE mode select */
+ #define  UFCR_RFDIV      (7<<7)  /* Reference freq divider mask */
+ #define  UFCR_RFDIV_REG(x)	(((x) < 7 ? 6 - (x) : 6) << 7)
+ #define  UFCR_TXTL_SHF   10      /* Transmitter trigger level shift */
+@@ -635,22 +636,11 @@ static void imx_break_ctl(struct uart_port *port, int break_state)
+ static int imx_setup_ufcr(struct imx_port *sport, unsigned int mode)
+ {
+ 	unsigned int val;
+-	unsigned int ufcr_rfdiv;
+-
+-	/* set receiver / transmitter trigger level.
+-	 * RFDIV is set such way to satisfy requested uartclk value
+-	 */
+-	val = TXTL << 10 | RXTL;
+-	ufcr_rfdiv = (clk_get_rate(sport->clk) + sport->port.uartclk / 2)
+-			/ sport->port.uartclk;
+-
+-	if(!ufcr_rfdiv)
+-		ufcr_rfdiv = 1;
+-
+-	val |= UFCR_RFDIV_REG(ufcr_rfdiv);
+ 
++	/* set receiver / transmitter trigger level */
++	val = readl(sport->port.membase + UFCR) & (UFCR_RFDIV | UFCR_DCEDTE);
++	val |= TXTL << UFCR_TXTL_SHF | RXTL;
+ 	writel(val, sport->port.membase + UFCR);
+-
+ 	return 0;
+ }
+ 
+@@ -725,6 +715,7 @@ static int imx_startup(struct uart_port *port)
+ 		}
+ 	}
+ 
++	spin_lock_irqsave(&sport->port.lock, flags);
+ 	/*
+ 	 * Finally, clear and enable interrupts
+ 	 */
+@@ -778,7 +769,6 @@ static int imx_startup(struct uart_port *port)
+ 	/*
+ 	 * Enable modem status interrupts
+ 	 */
+-	spin_lock_irqsave(&sport->port.lock,flags);
+ 	imx_enable_ms(&sport->port);
+ 	spin_unlock_irqrestore(&sport->port.lock,flags);
+ 
+@@ -808,10 +798,13 @@ static void imx_shutdown(struct uart_port *port)
+ {
+ 	struct imx_port *sport = (struct imx_port *)port;
+ 	unsigned long temp;
++	unsigned long flags;
+ 
++	spin_lock_irqsave(&sport->port.lock, flags);
+ 	temp = readl(sport->port.membase + UCR2);
+ 	temp &= ~(UCR2_TXEN);
+ 	writel(temp, sport->port.membase + UCR2);
++	spin_unlock_irqrestore(&sport->port.lock, flags);
+ 
+ 	if (USE_IRDA(sport)) {
+ 		struct imxuart_platform_data *pdata;
+@@ -840,12 +833,14 @@ static void imx_shutdown(struct uart_port *port)
+ 	 * Disable all interrupts, port and break condition.
+ 	 */
+ 
++	spin_lock_irqsave(&sport->port.lock, flags);
+ 	temp = readl(sport->port.membase + UCR1);
+ 	temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN);
+ 	if (USE_IRDA(sport))
+ 		temp &= ~(UCR1_IREN);
+ 
+ 	writel(temp, sport->port.membase + UCR1);
++	spin_unlock_irqrestore(&sport->port.lock, flags);
+ }
+ 
+ static void
+@@ -1119,6 +1114,9 @@ imx_console_write(struct console *co, const char *s, unsigned int count)
+ {
+ 	struct imx_port *sport = imx_ports[co->index];
+ 	unsigned int old_ucr1, old_ucr2, ucr1;
++	unsigned long flags;
++
++	spin_lock_irqsave(&sport->port.lock, flags);
+ 
+ 	/*
+ 	 *	First, save UCR1/2 and then disable interrupts
+@@ -1145,6 +1143,8 @@ imx_console_write(struct console *co, const char *s, unsigned int count)
+ 
+ 	writel(old_ucr1, sport->port.membase + UCR1);
+ 	writel(old_ucr2, sport->port.membase + UCR2);
++
++	spin_unlock_irqrestore(&sport->port.lock, flags);
+ }
+ 
+ /*
+diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
+index 32d3adc..8b2a9d8 100644
+--- a/drivers/usb/core/quirks.c
++++ b/drivers/usb/core/quirks.c
+@@ -96,6 +96,10 @@ static const struct usb_device_id usb_quirk_list[] = {
+ 	{ USB_DEVICE(0x04b4, 0x0526), .driver_info =
+ 			USB_QUIRK_CONFIG_INTF_STRINGS },
+ 
++	/* Microchip Joss Optical infrared touchboard device */
++	{ USB_DEVICE(0x04d8, 0x000c), .driver_info =
++			USB_QUIRK_CONFIG_INTF_STRINGS },
++
+ 	/* Samsung Android phone modem - ID conflict with SPH-I500 */
+ 	{ USB_DEVICE(0x04e8, 0x6601), .driver_info =
+ 			USB_QUIRK_CONFIG_INTF_STRINGS },
+diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
+index fef1db3..2023733 100644
+--- a/drivers/usb/host/ehci-q.c
++++ b/drivers/usb/host/ehci-q.c
+@@ -128,9 +128,17 @@ qh_refresh (struct ehci_hcd *ehci, struct ehci_qh *qh)
+ 	else {
+ 		qtd = list_entry (qh->qtd_list.next,
+ 				struct ehci_qtd, qtd_list);
+-		/* first qtd may already be partially processed */
+-		if (cpu_to_hc32(ehci, qtd->qtd_dma) == qh->hw->hw_current)
++		/*
++		 * first qtd may already be partially processed.
++		 * If we come here during unlink, the QH overlay region
++		 * might have reference to the just unlinked qtd. The
++		 * qtd is updated in qh_completions(). Update the QH
++		 * overlay here.
++		 */
++		if (cpu_to_hc32(ehci, qtd->qtd_dma) == qh->hw->hw_current) {
++			qh->hw->hw_qtd_next = qtd->hw_next;
+ 			qtd = NULL;
++		}
+ 	}
+ 
+ 	if (qtd)
+diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
+index 833b3c6..d0ec2f0 100644
+--- a/drivers/usb/host/pci-quirks.c
++++ b/drivers/usb/host/pci-quirks.c
+@@ -75,7 +75,9 @@
+ #define	NB_PIF0_PWRDOWN_1	0x01100013
+ 
+ #define USB_INTEL_XUSB2PR      0xD0
++#define USB_INTEL_USB2PRM      0xD4
+ #define USB_INTEL_USB3_PSSEN   0xD8
++#define USB_INTEL_USB3PRM      0xDC
+ 
+ static struct amd_chipset_info {
+ 	struct pci_dev	*nb_dev;
+@@ -772,10 +774,18 @@ void usb_enable_xhci_ports(struct pci_dev *xhci_pdev)
+ 		return;
+ 	}
+ 
+-	ports_available = 0xffffffff;
++	/* Read USB3PRM, the USB 3.0 Port Routing Mask Register
++	 * Indicate the ports that can be changed from OS.
++	 */
++	pci_read_config_dword(xhci_pdev, USB_INTEL_USB3PRM,
++			&ports_available);
++
++	dev_dbg(&xhci_pdev->dev, "Configurable ports to enable SuperSpeed: 0x%x\n",
++			ports_available);
++
+ 	/* Write USB3_PSSEN, the USB 3.0 Port SuperSpeed Enable
+-	 * Register, to turn on SuperSpeed terminations for all
+-	 * available ports.
++	 * Register, to turn on SuperSpeed terminations for the
++	 * switchable ports.
+ 	 */
+ 	pci_write_config_dword(xhci_pdev, USB_INTEL_USB3_PSSEN,
+ 			cpu_to_le32(ports_available));
+@@ -785,7 +795,16 @@ void usb_enable_xhci_ports(struct pci_dev *xhci_pdev)
+ 	dev_dbg(&xhci_pdev->dev, "USB 3.0 ports that are now enabled "
+ 			"under xHCI: 0x%x\n", ports_available);
+ 
+-	ports_available = 0xffffffff;
++	/* Read XUSB2PRM, xHCI USB 2.0 Port Routing Mask Register
++	 * Indicate the USB 2.0 ports to be controlled by the xHCI host.
++	 */
++
++	pci_read_config_dword(xhci_pdev, USB_INTEL_USB2PRM,
++			&ports_available);
++
++	dev_dbg(&xhci_pdev->dev, "Configurable USB 2.0 ports to hand over to xCHI: 0x%x\n",
++			ports_available);
++
+ 	/* Write XUSB2PR, the xHC USB 2.0 Port Routing Register, to
+ 	 * switch the USB 2.0 power and data lines over to the xHCI
+ 	 * host.
+@@ -800,6 +819,13 @@ void usb_enable_xhci_ports(struct pci_dev *xhci_pdev)
+ }
+ EXPORT_SYMBOL_GPL(usb_enable_xhci_ports);
+ 
++void usb_disable_xhci_ports(struct pci_dev *xhci_pdev)
++{
++	pci_write_config_dword(xhci_pdev, USB_INTEL_USB3_PSSEN, 0x0);
++	pci_write_config_dword(xhci_pdev, USB_INTEL_XUSB2PR, 0x0);
++}
++EXPORT_SYMBOL_GPL(usb_disable_xhci_ports);
++
+ /**
+  * PCI Quirks for xHCI.
+  *
+@@ -815,12 +841,12 @@ static void __devinit quirk_usb_handoff_xhci(struct pci_dev *pdev)
+ 	void __iomem *op_reg_base;
+ 	u32 val;
+ 	int timeout;
++	int len = pci_resource_len(pdev, 0);
+ 
+ 	if (!mmio_resource_enabled(pdev, 0))
+ 		return;
+ 
+-	base = ioremap_nocache(pci_resource_start(pdev, 0),
+-				pci_resource_len(pdev, 0));
++	base = ioremap_nocache(pci_resource_start(pdev, 0), len);
+ 	if (base == NULL)
+ 		return;
+ 
+@@ -830,9 +856,17 @@ static void __devinit quirk_usb_handoff_xhci(struct pci_dev *pdev)
+ 	 */
+ 	ext_cap_offset = xhci_find_next_cap_offset(base, XHCI_HCC_PARAMS_OFFSET);
+ 	do {
++		if ((ext_cap_offset + sizeof(val)) > len) {
++			/* We're reading garbage from the controller */
++			dev_warn(&pdev->dev,
++				 "xHCI controller failing to respond");
++			return;
++		}
++
+ 		if (!ext_cap_offset)
+ 			/* We've reached the end of the extended capabilities */
+ 			goto hc_init;
++
+ 		val = readl(base + ext_cap_offset);
+ 		if (XHCI_EXT_CAPS_ID(val) == XHCI_EXT_CAPS_LEGACY)
+ 			break;
+@@ -863,9 +897,10 @@ static void __devinit quirk_usb_handoff_xhci(struct pci_dev *pdev)
+ 	/* Disable any BIOS SMIs and clear all SMI events*/
+ 	writel(val, base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET);
+ 
++hc_init:
+ 	if (usb_is_intel_switchable_xhci(pdev))
+ 		usb_enable_xhci_ports(pdev);
+-hc_init:
++
+ 	op_reg_base = base + XHCI_HC_LENGTH(readl(base));
+ 
+ 	/* Wait for the host controller to be ready before writing any
+diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h
+index b1002a8..7f69a39 100644
+--- a/drivers/usb/host/pci-quirks.h
++++ b/drivers/usb/host/pci-quirks.h
+@@ -10,10 +10,12 @@ void usb_amd_quirk_pll_disable(void);
+ void usb_amd_quirk_pll_enable(void);
+ bool usb_is_intel_switchable_xhci(struct pci_dev *pdev);
+ void usb_enable_xhci_ports(struct pci_dev *xhci_pdev);
++void usb_disable_xhci_ports(struct pci_dev *xhci_pdev);
+ #else
+ static inline void usb_amd_quirk_pll_disable(void) {}
+ static inline void usb_amd_quirk_pll_enable(void) {}
+ static inline void usb_amd_dev_put(void) {}
++static inline void usb_disable_xhci_ports(struct pci_dev *xhci_pdev) {}
+ #endif  /* CONFIG_PCI */
+ 
+ #endif  /*  __LINUX_USB_PCI_QUIRKS_H  */
+diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
+index fd8a2c2..978860b 100644
+--- a/drivers/usb/host/xhci-hub.c
++++ b/drivers/usb/host/xhci-hub.c
+@@ -469,11 +469,48 @@ static void xhci_hub_report_link_state(u32 *status, u32 status_reg)
+ 		 * when this bit is set.
+ 		 */
+ 		pls |= USB_PORT_STAT_CONNECTION;
++	} else {
++		/*
++		 * If CAS bit isn't set but the Port is already at
++		 * Compliance Mode, fake a connection so the USB core
++		 * notices the Compliance state and resets the port.
++		 * This resolves an issue generated by the SN65LVPE502CP
++		 * in which sometimes the port enters compliance mode
++		 * caused by a delay on the host-device negotiation.
++		 */
++		if (pls == USB_SS_PORT_LS_COMP_MOD)
++			pls |= USB_PORT_STAT_CONNECTION;
+ 	}
++
+ 	/* update status field */
+ 	*status |= pls;
+ }
+ 
++/*
++ * Function for Compliance Mode Quirk.
++ *
++ * This Function verifies if all xhc USB3 ports have entered U0, if so,
++ * the compliance mode timer is deleted. A port won't enter
++ * compliance mode if it has previously entered U0.
++ */
++void xhci_del_comp_mod_timer(struct xhci_hcd *xhci, u32 status, u16 wIndex)
++{
++	u32 all_ports_seen_u0 = ((1 << xhci->num_usb3_ports)-1);
++	bool port_in_u0 = ((status & PORT_PLS_MASK) == XDEV_U0);
++
++	if (!(xhci->quirks & XHCI_COMP_MODE_QUIRK))
++		return;
++
++	if ((xhci->port_status_u0 != all_ports_seen_u0) && port_in_u0) {
++		xhci->port_status_u0 |= 1 << wIndex;
++		if (xhci->port_status_u0 == all_ports_seen_u0) {
++			del_timer_sync(&xhci->comp_mode_recovery_timer);
++			xhci_dbg(xhci, "All USB3 ports have entered U0 already!\n");
++			xhci_dbg(xhci, "Compliance Mode Recovery Timer Deleted.\n");
++		}
++	}
++}
++
+ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
+ 		u16 wIndex, char *buf, u16 wLength)
+ {
+@@ -618,6 +655,11 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
+ 		/* Update Port Link State for super speed ports*/
+ 		if (hcd->speed == HCD_USB3) {
+ 			xhci_hub_report_link_state(&status, temp);
++			/*
++			 * Verify if all USB3 Ports Have entered U0 already.
++			 * Delete Compliance Mode Timer if so.
++			 */
++			xhci_del_comp_mod_timer(xhci, temp, wIndex);
+ 		}
+ 		if (bus_state->port_c_suspend & (1 << wIndex))
+ 			status |= 1 << USB_PORT_FEAT_C_SUSPEND;
+diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
+index 07c72a4..bddcbfc 100644
+--- a/drivers/usb/host/xhci-pci.c
++++ b/drivers/usb/host/xhci-pci.c
+@@ -90,6 +90,15 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
+ 		xhci->quirks |= XHCI_EP_LIMIT_QUIRK;
+ 		xhci->limit_active_eps = 64;
+ 		xhci->quirks |= XHCI_SW_BW_CHECKING;
++		/*
++		 * PPT desktop boards DH77EB and DH77DF will power back on after
++		 * a few seconds of being shutdown.  The fix for this is to
++		 * switch the ports from xHCI to EHCI on shutdown.  We can't use
++		 * DMI information to find those particular boards (since each
++		 * vendor will change the board name), so we have to key off all
++		 * PPT chipsets.
++		 */
++		xhci->quirks |= XHCI_SPURIOUS_REBOOT;
+ 	}
+ 	if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
+ 			pdev->device == PCI_DEVICE_ID_ASROCK_P67) {
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index fb0981e..c7c530c 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -145,25 +145,34 @@ static void next_trb(struct xhci_hcd *xhci,
+  */
+ static void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring, bool consumer)
+ {
+-	union xhci_trb *next = ++(ring->dequeue);
+ 	unsigned long long addr;
+ 
+ 	ring->deq_updates++;
+-	/* Update the dequeue pointer further if that was a link TRB or we're at
+-	 * the end of an event ring segment (which doesn't have link TRBS)
+-	 */
+-	while (last_trb(xhci, ring, ring->deq_seg, next)) {
+-		if (consumer && last_trb_on_last_seg(xhci, ring, ring->deq_seg, next)) {
+-			ring->cycle_state = (ring->cycle_state ? 0 : 1);
+-			if (!in_interrupt())
+-				xhci_dbg(xhci, "Toggle cycle state for ring %p = %i\n",
+-						ring,
+-						(unsigned int) ring->cycle_state);
++
++	do {
++		/*
++		 * Update the dequeue pointer further if that was a link TRB or
++		 * we're at the end of an event ring segment (which doesn't have
++		 * link TRBS)
++		 */
++		if (last_trb(xhci, ring, ring->deq_seg, ring->dequeue)) {
++			if (consumer && last_trb_on_last_seg(xhci, ring,
++						ring->deq_seg, ring->dequeue)) {
++				if (!in_interrupt())
++					xhci_dbg(xhci, "Toggle cycle state "
++							"for ring %p = %i\n",
++							ring,
++							(unsigned int)
++							ring->cycle_state);
++				ring->cycle_state = (ring->cycle_state ? 0 : 1);
++			}
++			ring->deq_seg = ring->deq_seg->next;
++			ring->dequeue = ring->deq_seg->trbs;
++		} else {
++			ring->dequeue++;
+ 		}
+-		ring->deq_seg = ring->deq_seg->next;
+-		ring->dequeue = ring->deq_seg->trbs;
+-		next = ring->dequeue;
+-	}
++	} while (last_trb(xhci, ring, ring->deq_seg, ring->dequeue));
++
+ 	addr = (unsigned long long) xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue);
+ }
+ 
+diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
+index f7c0a2a..09872ee 100644
+--- a/drivers/usb/host/xhci.c
++++ b/drivers/usb/host/xhci.c
+@@ -26,6 +26,7 @@
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+ #include <linux/slab.h>
++#include <linux/dmi.h>
+ 
+ #include "xhci.h"
+ 
+@@ -387,6 +388,95 @@ static void xhci_msix_sync_irqs(struct xhci_hcd *xhci)
+ 
+ #endif
+ 
++static void compliance_mode_recovery(unsigned long arg)
++{
++	struct xhci_hcd *xhci;
++	struct usb_hcd *hcd;
++	u32 temp;
++	int i;
++
++	xhci = (struct xhci_hcd *)arg;
++
++	for (i = 0; i < xhci->num_usb3_ports; i++) {
++		temp = xhci_readl(xhci, xhci->usb3_ports[i]);
++		if ((temp & PORT_PLS_MASK) == USB_SS_PORT_LS_COMP_MOD) {
++			/*
++			 * Compliance Mode Detected. Letting USB Core
++			 * handle the Warm Reset
++			 */
++			xhci_dbg(xhci, "Compliance Mode Detected->Port %d!\n",
++					i + 1);
++			xhci_dbg(xhci, "Attempting Recovery routine!\n");
++			hcd = xhci->shared_hcd;
++
++			if (hcd->state == HC_STATE_SUSPENDED)
++				usb_hcd_resume_root_hub(hcd);
++
++			usb_hcd_poll_rh_status(hcd);
++		}
++	}
++
++	if (xhci->port_status_u0 != ((1 << xhci->num_usb3_ports)-1))
++		mod_timer(&xhci->comp_mode_recovery_timer,
++			jiffies + msecs_to_jiffies(COMP_MODE_RCVRY_MSECS));
++}
++
++/*
++ * Quirk to work around issue generated by the SN65LVPE502CP USB3.0 re-driver
++ * that causes ports behind that hardware to enter compliance mode sometimes.
++ * The quirk creates a timer that polls every 2 seconds the link state of
++ * each host controller's port and recovers it by issuing a Warm reset
++ * if Compliance mode is detected, otherwise the port will become "dead" (no
++ * device connections or disconnections will be detected anymore). Becasue no
++ * status event is generated when entering compliance mode (per xhci spec),
++ * this quirk is needed on systems that have the failing hardware installed.
++ */
++static void compliance_mode_recovery_timer_init(struct xhci_hcd *xhci)
++{
++	xhci->port_status_u0 = 0;
++	init_timer(&xhci->comp_mode_recovery_timer);
++
++	xhci->comp_mode_recovery_timer.data = (unsigned long) xhci;
++	xhci->comp_mode_recovery_timer.function = compliance_mode_recovery;
++	xhci->comp_mode_recovery_timer.expires = jiffies +
++			msecs_to_jiffies(COMP_MODE_RCVRY_MSECS);
++
++	set_timer_slack(&xhci->comp_mode_recovery_timer,
++			msecs_to_jiffies(COMP_MODE_RCVRY_MSECS));
++	add_timer(&xhci->comp_mode_recovery_timer);
++	xhci_dbg(xhci, "Compliance Mode Recovery Timer Initialized.\n");
++}
++
++/*
++ * This function identifies the systems that have installed the SN65LVPE502CP
++ * USB3.0 re-driver and that need the Compliance Mode Quirk.
++ * Systems:
++ * Vendor: Hewlett-Packard -> System Models: Z420, Z620 and Z820
++ */
++static bool compliance_mode_recovery_timer_quirk_check(void)
++{
++	const char *dmi_product_name, *dmi_sys_vendor;
++
++	dmi_product_name = dmi_get_system_info(DMI_PRODUCT_NAME);
++	dmi_sys_vendor = dmi_get_system_info(DMI_SYS_VENDOR);
++
++	if (!(strstr(dmi_sys_vendor, "Hewlett-Packard")))
++		return false;
++
++	if (strstr(dmi_product_name, "Z420") ||
++			strstr(dmi_product_name, "Z620") ||
++			strstr(dmi_product_name, "Z820"))
++		return true;
++
++	return false;
++}
++
++static int xhci_all_ports_seen_u0(struct xhci_hcd *xhci)
++{
++	return (xhci->port_status_u0 == ((1 << xhci->num_usb3_ports)-1));
++}
++
++
+ /*
+  * Initialize memory for HCD and xHC (one-time init).
+  *
+@@ -410,6 +500,12 @@ int xhci_init(struct usb_hcd *hcd)
+ 	retval = xhci_mem_init(xhci, GFP_KERNEL);
+ 	xhci_dbg(xhci, "Finished xhci_init\n");
+ 
++	/* Initializing Compliance Mode Recovery Data If Needed */
++	if (compliance_mode_recovery_timer_quirk_check()) {
++		xhci->quirks |= XHCI_COMP_MODE_QUIRK;
++		compliance_mode_recovery_timer_init(xhci);
++	}
++
+ 	return retval;
+ }
+ 
+@@ -618,6 +714,11 @@ void xhci_stop(struct usb_hcd *hcd)
+ 	del_timer_sync(&xhci->event_ring_timer);
+ #endif
+ 
++	/* Deleting Compliance Mode Recovery Timer */
++	if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) &&
++			(!(xhci_all_ports_seen_u0(xhci))))
++		del_timer_sync(&xhci->comp_mode_recovery_timer);
++
+ 	if (xhci->quirks & XHCI_AMD_PLL_FIX)
+ 		usb_amd_dev_put();
+ 
+@@ -648,6 +749,9 @@ void xhci_shutdown(struct usb_hcd *hcd)
+ {
+ 	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+ 
++	if (xhci->quirks & XHCI_SPURIOUS_REBOOT)
++		usb_disable_xhci_ports(to_pci_dev(hcd->self.controller));
++
+ 	spin_lock_irq(&xhci->lock);
+ 	xhci_halt(xhci);
+ 	spin_unlock_irq(&xhci->lock);
+@@ -791,6 +895,16 @@ int xhci_suspend(struct xhci_hcd *xhci)
+ 	}
+ 	spin_unlock_irq(&xhci->lock);
+ 
++	/*
++	 * Deleting Compliance Mode Recovery Timer because the xHCI Host
++	 * is about to be suspended.
++	 */
++	if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) &&
++			(!(xhci_all_ports_seen_u0(xhci)))) {
++		del_timer_sync(&xhci->comp_mode_recovery_timer);
++		xhci_dbg(xhci, "Compliance Mode Recovery Timer Deleted!\n");
++	}
++
+ 	/* step 5: remove core well power */
+ 	/* synchronize irq when using MSI-X */
+ 	xhci_msix_sync_irqs(xhci);
+@@ -923,6 +1037,16 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
+ 		usb_hcd_resume_root_hub(hcd);
+ 		usb_hcd_resume_root_hub(xhci->shared_hcd);
+ 	}
++
++	/*
++	 * If system is subject to the Quirk, Compliance Mode Timer needs to
++	 * be re-initialized Always after a system resume. Ports are subject
++	 * to suffer the Compliance Mode issue again. It doesn't matter if
++	 * ports have entered previously to U0 before system's suspension.
++	 */
++	if (xhci->quirks & XHCI_COMP_MODE_QUIRK)
++		compliance_mode_recovery_timer_init(xhci);
++
+ 	return retval;
+ }
+ #endif	/* CONFIG_PM */
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
+index 7a56805..44d518a 100644
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -1471,6 +1471,8 @@ struct xhci_hcd {
+ #define	XHCI_SW_BW_CHECKING	(1 << 8)
+ #define XHCI_AMD_0x96_HOST	(1 << 9)
+ #define XHCI_TRUST_TX_LENGTH	(1 << 10)
++#define XHCI_SPURIOUS_REBOOT	(1 << 13)
++#define XHCI_COMP_MODE_QUIRK	(1 << 14)
+ 	unsigned int		num_active_eps;
+ 	unsigned int		limit_active_eps;
+ 	/* There are two roothubs to keep track of bus suspend info for */
+@@ -1487,6 +1489,11 @@ struct xhci_hcd {
+ 	unsigned		sw_lpm_support:1;
+ 	/* support xHCI 1.0 spec USB2 hardware LPM */
+ 	unsigned		hw_lpm_support:1;
++	/* Compliance Mode Recovery Data */
++	struct timer_list	comp_mode_recovery_timer;
++	u32    			port_status_u0;
++/* Compliance Mode Timer Triggered every 2 seconds */
++#define COMP_MODE_RCVRY_MSECS 2000
+ };
+ 
+ /* convert between an HCD pointer and the corresponding EHCI_HCD */
+diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
+index b3182bb..7324bea 100644
+--- a/drivers/usb/serial/ftdi_sio.c
++++ b/drivers/usb/serial/ftdi_sio.c
+@@ -704,6 +704,7 @@ static struct usb_device_id id_table_combined [] = {
+ 	{ USB_DEVICE(FTDI_VID, FTDI_PCDJ_DAC2_PID) },
+ 	{ USB_DEVICE(FTDI_VID, FTDI_RRCIRKITS_LOCOBUFFER_PID) },
+ 	{ USB_DEVICE(FTDI_VID, FTDI_ASK_RDR400_PID) },
++	{ USB_DEVICE(FTDI_VID, FTDI_NZR_SEM_USB_PID) },
+ 	{ USB_DEVICE(ICOM_VID, ICOM_ID_1_PID) },
+ 	{ USB_DEVICE(ICOM_VID, ICOM_OPC_U_UC_PID) },
+ 	{ USB_DEVICE(ICOM_VID, ICOM_ID_RP2C1_PID) },
+@@ -804,13 +805,32 @@ static struct usb_device_id id_table_combined [] = {
+ 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
+ 	{ USB_DEVICE(ADI_VID, ADI_GNICEPLUS_PID),
+ 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
+-	{ USB_DEVICE(MICROCHIP_VID, MICROCHIP_USB_BOARD_PID) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(MICROCHIP_VID, MICROCHIP_USB_BOARD_PID,
++					USB_CLASS_VENDOR_SPEC,
++					USB_SUBCLASS_VENDOR_SPEC, 0x00) },
+ 	{ USB_DEVICE(JETI_VID, JETI_SPC1201_PID) },
+ 	{ USB_DEVICE(MARVELL_VID, MARVELL_SHEEVAPLUG_PID),
+ 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
+ 	{ USB_DEVICE(LARSENBRUSGAARD_VID, LB_ALTITRACK_PID) },
+ 	{ USB_DEVICE(GN_OTOMETRICS_VID, AURICAL_USB_PID) },
++	{ USB_DEVICE(FTDI_VID, PI_C865_PID) },
++	{ USB_DEVICE(FTDI_VID, PI_C857_PID) },
++	{ USB_DEVICE(PI_VID, PI_C866_PID) },
++	{ USB_DEVICE(PI_VID, PI_C663_PID) },
++	{ USB_DEVICE(PI_VID, PI_C725_PID) },
++	{ USB_DEVICE(PI_VID, PI_E517_PID) },
++	{ USB_DEVICE(PI_VID, PI_C863_PID) },
+ 	{ USB_DEVICE(PI_VID, PI_E861_PID) },
++	{ USB_DEVICE(PI_VID, PI_C867_PID) },
++	{ USB_DEVICE(PI_VID, PI_E609_PID) },
++	{ USB_DEVICE(PI_VID, PI_E709_PID) },
++	{ USB_DEVICE(PI_VID, PI_100F_PID) },
++	{ USB_DEVICE(PI_VID, PI_1011_PID) },
++	{ USB_DEVICE(PI_VID, PI_1012_PID) },
++	{ USB_DEVICE(PI_VID, PI_1013_PID) },
++	{ USB_DEVICE(PI_VID, PI_1014_PID) },
++	{ USB_DEVICE(PI_VID, PI_1015_PID) },
++	{ USB_DEVICE(PI_VID, PI_1016_PID) },
+ 	{ USB_DEVICE(KONDO_VID, KONDO_USB_SERIAL_PID) },
+ 	{ USB_DEVICE(BAYER_VID, BAYER_CONTOUR_CABLE_PID) },
+ 	{ USB_DEVICE(FTDI_VID, MARVELL_OPENRD_PID),
+diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
+index 54b4258..06f6fd2 100644
+--- a/drivers/usb/serial/ftdi_sio_ids.h
++++ b/drivers/usb/serial/ftdi_sio_ids.h
+@@ -75,6 +75,9 @@
+ #define FTDI_OPENDCC_GATEWAY_PID	0xBFDB
+ #define FTDI_OPENDCC_GBM_PID	0xBFDC
+ 
++/* NZR SEM 16+ USB (http://www.nzr.de) */
++#define FTDI_NZR_SEM_USB_PID	0xC1E0	/* NZR SEM-LOG16+ */
++
+ /*
+  * RR-CirKits LocoBuffer USB (http://www.rr-cirkits.com)
+  */
+@@ -539,7 +542,10 @@
+ /*
+  * Microchip Technology, Inc.
+  *
+- * MICROCHIP_VID (0x04D8) and MICROCHIP_USB_BOARD_PID (0x000A) are also used by:
++ * MICROCHIP_VID (0x04D8) and MICROCHIP_USB_BOARD_PID (0x000A) are
++ * used by single function CDC ACM class based firmware demo
++ * applications.  The VID/PID has also been used in firmware
++ * emulating FTDI serial chips by:
+  * Hornby Elite - Digital Command Control Console
+  * http://www.hornby.com/hornby-dcc/controllers/
+  */
+@@ -791,8 +797,27 @@
+  * Physik Instrumente
+  * http://www.physikinstrumente.com/en/products/
+  */
++/* These two devices use the VID of FTDI */
++#define PI_C865_PID	0xe0a0  /* PI C-865 Piezomotor Controller */
++#define PI_C857_PID	0xe0a1  /* PI Encoder Trigger Box */
++
+ #define PI_VID              0x1a72  /* Vendor ID */
+-#define PI_E861_PID         0x1008  /* E-861 piezo controller USB connection */
++#define PI_C866_PID	0x1000  /* PI C-866 Piezomotor Controller */
++#define PI_C663_PID	0x1001  /* PI C-663 Mercury-Step */
++#define PI_C725_PID	0x1002  /* PI C-725 Piezomotor Controller */
++#define PI_E517_PID	0x1005  /* PI E-517 Digital Piezo Controller Operation Module */
++#define PI_C863_PID	0x1007  /* PI C-863 */
++#define PI_E861_PID	0x1008  /* PI E-861 Piezomotor Controller */
++#define PI_C867_PID	0x1009  /* PI C-867 Piezomotor Controller */
++#define PI_E609_PID	0x100D  /* PI E-609 Digital Piezo Controller */
++#define PI_E709_PID	0x100E  /* PI E-709 Digital Piezo Controller */
++#define PI_100F_PID	0x100F  /* PI Digital Piezo Controller */
++#define PI_1011_PID	0x1011  /* PI Digital Piezo Controller */
++#define PI_1012_PID	0x1012  /* PI Motion Controller */
++#define PI_1013_PID	0x1013  /* PI Motion Controller */
++#define PI_1014_PID	0x1014  /* PI Device */
++#define PI_1015_PID	0x1015  /* PI Device */
++#define PI_1016_PID	0x1016  /* PI Digital Servo Module */
+ 
+ /*
+  * Kondo Kagaku Co.Ltd.
+diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
+index 113560d..c068b4d 100644
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -1090,6 +1090,10 @@ static const struct usb_device_id option_ids[] = {
+ 	 .driver_info = (kernel_ulong_t)&zte_ad3812_z_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MC2716, 0xff, 0xff, 0xff),
+ 	 .driver_info = (kernel_ulong_t)&zte_mc2716_z_blacklist },
++	{ USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x01) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x05) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x86, 0x10) },
++
+ 	{ USB_DEVICE(BENQ_VENDOR_ID, BENQ_PRODUCT_H10) },
+ 	{ USB_DEVICE(DLINK_VENDOR_ID, DLINK_PRODUCT_DWM_652) },
+ 	{ USB_DEVICE(ALINK_VENDOR_ID, DLINK_PRODUCT_DWM_652_U5) }, /* Yes, ALINK_VENDOR_ID */
+diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c
+index 70aa47d..f7c1753 100644
+--- a/drivers/video/omap2/omapfb/omapfb-main.c
++++ b/drivers/video/omap2/omapfb/omapfb-main.c
+@@ -1183,7 +1183,7 @@ static int _setcolreg(struct fb_info *fbi, u_int regno, u_int red, u_int green,
+ 			break;
+ 
+ 		if (regno < 16) {
+-			u16 pal;
++			u32 pal;
+ 			pal = ((red >> (16 - var->red.length)) <<
+ 					var->red.offset) |
+ 				((green >> (16 - var->green.length)) <<
+diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
+index 284798a..89588e7 100644
+--- a/drivers/xen/swiotlb-xen.c
++++ b/drivers/xen/swiotlb-xen.c
+@@ -231,7 +231,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+ 		return ret;
+ 
+ 	if (hwdev && hwdev->coherent_dma_mask)
+-		dma_mask = hwdev->coherent_dma_mask;
++		dma_mask = dma_alloc_coherent_mask(hwdev, flags);
+ 
+ 	phys = virt_to_phys(ret);
+ 	dev_addr = xen_phys_to_bus(phys);
+diff --git a/fs/cifs/file.c b/fs/cifs/file.c
+index 0bb785f..51574d4 100644
+--- a/fs/cifs/file.c
++++ b/fs/cifs/file.c
+@@ -882,7 +882,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
+ 	if (!buf) {
+ 		mutex_unlock(&cinode->lock_mutex);
+ 		FreeXid(xid);
+-		return rc;
++		return -ENOMEM;
+ 	}
+ 
+ 	for (i = 0; i < 2; i++) {
+diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
+index af11098..7c7556b 100644
+--- a/fs/ecryptfs/inode.c
++++ b/fs/ecryptfs/inode.c
+@@ -640,6 +640,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ 	struct dentry *lower_old_dir_dentry;
+ 	struct dentry *lower_new_dir_dentry;
+ 	struct dentry *trap = NULL;
++	struct inode *target_inode;
+ 
+ 	lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
+ 	lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
+@@ -647,6 +648,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ 	dget(lower_new_dentry);
+ 	lower_old_dir_dentry = dget_parent(lower_old_dentry);
+ 	lower_new_dir_dentry = dget_parent(lower_new_dentry);
++	target_inode = new_dentry->d_inode;
+ 	trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+ 	/* source should not be ancestor of target */
+ 	if (trap == lower_old_dentry) {
+@@ -662,6 +664,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ 			lower_new_dir_dentry->d_inode, lower_new_dentry);
+ 	if (rc)
+ 		goto out_lock;
++	if (target_inode)
++		fsstack_copy_attr_all(target_inode,
++				      ecryptfs_inode_to_lower(target_inode));
+ 	fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
+ 	if (new_dir != old_dir)
+ 		fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
+diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
+index 5b3f907..71b263f 100644
+--- a/fs/ext3/inode.c
++++ b/fs/ext3/inode.c
+@@ -3072,6 +3072,8 @@ static int ext3_do_update_inode(handle_t *handle,
+ 	struct ext3_inode_info *ei = EXT3_I(inode);
+ 	struct buffer_head *bh = iloc->bh;
+ 	int err = 0, rc, block;
++	int need_datasync = 0;
++	__le32 disksize;
+ 
+ again:
+ 	/* we can't allow multiple procs in here at once, its a bit racey */
+@@ -3109,7 +3111,11 @@ again:
+ 		raw_inode->i_gid_high = 0;
+ 	}
+ 	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+-	raw_inode->i_size = cpu_to_le32(ei->i_disksize);
++	disksize = cpu_to_le32(ei->i_disksize);
++	if (disksize != raw_inode->i_size) {
++		need_datasync = 1;
++		raw_inode->i_size = disksize;
++	}
+ 	raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
+ 	raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+ 	raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+@@ -3125,8 +3131,11 @@ again:
+ 	if (!S_ISREG(inode->i_mode)) {
+ 		raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
+ 	} else {
+-		raw_inode->i_size_high =
+-			cpu_to_le32(ei->i_disksize >> 32);
++		disksize = cpu_to_le32(ei->i_disksize >> 32);
++		if (disksize != raw_inode->i_size_high) {
++			raw_inode->i_size_high = disksize;
++			need_datasync = 1;
++		}
+ 		if (ei->i_disksize > 0x7fffffffULL) {
+ 			struct super_block *sb = inode->i_sb;
+ 			if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
+@@ -3179,6 +3188,8 @@ again:
+ 	ext3_clear_inode_state(inode, EXT3_STATE_NEW);
+ 
+ 	atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
++	if (need_datasync)
++		atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
+ out_brelse:
+ 	brelse (bh);
+ 	ext3_std_error(inode->i_sb, err);
+diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
+index 2aaf3ea..5c029fb 100644
+--- a/fs/fuse/dev.c
++++ b/fs/fuse/dev.c
+@@ -1524,6 +1524,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
+ 		req->pages[req->num_pages] = page;
+ 		req->num_pages++;
+ 
++		offset = 0;
+ 		num -= this_num;
+ 		total_len += this_num;
+ 		index++;
+diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
+index 50a15fa..b78b5b6 100644
+--- a/fs/nfs/inode.c
++++ b/fs/nfs/inode.c
+@@ -150,7 +150,7 @@ static void nfs_zap_caches_locked(struct inode *inode)
+ 	nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
+ 	nfsi->attrtimeo_timestamp = jiffies;
+ 
+-	memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
++	memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
+ 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
+ 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
+ 	else
+diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
+index 5195fd6..dba87e6 100644
+--- a/fs/nfs/nfs3proc.c
++++ b/fs/nfs/nfs3proc.c
+@@ -633,7 +633,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+ 		  u64 cookie, struct page **pages, unsigned int count, int plus)
+ {
+ 	struct inode		*dir = dentry->d_inode;
+-	__be32			*verf = NFS_COOKIEVERF(dir);
++	__be32			*verf = NFS_I(dir)->cookieverf;
+ 	struct nfs3_readdirargs	arg = {
+ 		.fh		= NFS_FH(dir),
+ 		.cookie		= cookie,
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index d20221d..61796a40 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -3025,11 +3025,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+ 			dentry->d_parent->d_name.name,
+ 			dentry->d_name.name,
+ 			(unsigned long long)cookie);
+-	nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
++	nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
+ 	res.pgbase = args.pgbase;
+ 	status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
+ 	if (status >= 0) {
+-		memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
++		memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE);
+ 		status += args.pgbase;
+ 	}
+ 
+diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
+index bdd5bdc..00818c8 100644
+--- a/fs/nfs/nfs4xdr.c
++++ b/fs/nfs/nfs4xdr.c
+@@ -6113,7 +6113,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ 	status = decode_open(xdr, res);
+ 	if (status)
+ 		goto out;
+-	if (decode_getfh(xdr, &res->fh) != 0)
++	status = decode_getfh(xdr, &res->fh);
++	if (status)
+ 		goto out;
+ 	if (decode_getfattr(xdr, res->f_attr, res->server,
+ 				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
+diff --git a/fs/nfs/super.c b/fs/nfs/super.c
+index 6e85ec6..e42d6f6 100644
+--- a/fs/nfs/super.c
++++ b/fs/nfs/super.c
+@@ -1820,6 +1820,7 @@ static int nfs_validate_mount_data(void *options,
+ 
+ 		memcpy(sap, &data->addr, sizeof(data->addr));
+ 		args->nfs_server.addrlen = sizeof(data->addr);
++		args->nfs_server.port = ntohs(data->addr.sin_port);
+ 		if (!nfs_verify_server_address(sap))
+ 			goto out_no_address;
+ 
+@@ -2538,6 +2539,7 @@ static int nfs4_validate_mount_data(void *options,
+ 			return -EFAULT;
+ 		if (!nfs_verify_server_address(sap))
+ 			goto out_no_address;
++		args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
+ 
+ 		if (data->auth_flavourlen) {
+ 			if (data->auth_flavourlen > 1)
+diff --git a/fs/stat.c b/fs/stat.c
+index 8806b89..7b21801 100644
+--- a/fs/stat.c
++++ b/fs/stat.c
+@@ -57,12 +57,13 @@ EXPORT_SYMBOL(vfs_getattr);
+ 
+ int vfs_fstat(unsigned int fd, struct kstat *stat)
+ {
+-	struct file *f = fget(fd);
++	int fput_needed;
++	struct file *f = fget_raw_light(fd, &fput_needed);
+ 	int error = -EBADF;
+ 
+ 	if (f) {
+ 		error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
+-		fput(f);
++		fput_light(f, fput_needed);
+ 	}
+ 	return error;
+ }
+diff --git a/fs/udf/file.c b/fs/udf/file.c
+index d567b84..874c9e3 100644
+--- a/fs/udf/file.c
++++ b/fs/udf/file.c
+@@ -39,20 +39,24 @@
+ #include "udf_i.h"
+ #include "udf_sb.h"
+ 
+-static int udf_adinicb_readpage(struct file *file, struct page *page)
++static void __udf_adinicb_readpage(struct page *page)
+ {
+ 	struct inode *inode = page->mapping->host;
+ 	char *kaddr;
+ 	struct udf_inode_info *iinfo = UDF_I(inode);
+ 
+-	BUG_ON(!PageLocked(page));
+-
+ 	kaddr = kmap(page);
+-	memset(kaddr, 0, PAGE_CACHE_SIZE);
+ 	memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size);
++	memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size);
+ 	flush_dcache_page(page);
+ 	SetPageUptodate(page);
+ 	kunmap(page);
++}
++
++static int udf_adinicb_readpage(struct file *file, struct page *page)
++{
++	BUG_ON(!PageLocked(page));
++	__udf_adinicb_readpage(page);
+ 	unlock_page(page);
+ 
+ 	return 0;
+@@ -77,6 +81,25 @@ static int udf_adinicb_writepage(struct page *page,
+ 	return 0;
+ }
+ 
++static int udf_adinicb_write_begin(struct file *file,
++			struct address_space *mapping, loff_t pos,
++			unsigned len, unsigned flags, struct page **pagep,
++			void **fsdata)
++{
++	struct page *page;
++
++	if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE))
++		return -EIO;
++	page = grab_cache_page_write_begin(mapping, 0, flags);
++	if (!page)
++		return -ENOMEM;
++	*pagep = page;
++
++	if (!PageUptodate(page) && len != PAGE_CACHE_SIZE)
++		__udf_adinicb_readpage(page);
++	return 0;
++}
++
+ static int udf_adinicb_write_end(struct file *file,
+ 			struct address_space *mapping,
+ 			loff_t pos, unsigned len, unsigned copied,
+@@ -98,8 +121,8 @@ static int udf_adinicb_write_end(struct file *file,
+ const struct address_space_operations udf_adinicb_aops = {
+ 	.readpage	= udf_adinicb_readpage,
+ 	.writepage	= udf_adinicb_writepage,
+-	.write_begin = simple_write_begin,
+-	.write_end = udf_adinicb_write_end,
++	.write_begin	= udf_adinicb_write_begin,
++	.write_end	= udf_adinicb_write_end,
+ };
+ 
+ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
+index ddd46db..7639f18 100644
+--- a/include/drm/drm_mode.h
++++ b/include/drm/drm_mode.h
+@@ -277,8 +277,9 @@ struct drm_mode_mode_cmd {
+ 	struct drm_mode_modeinfo mode;
+ };
+ 
+-#define DRM_MODE_CURSOR_BO	(1<<0)
+-#define DRM_MODE_CURSOR_MOVE	(1<<1)
++#define DRM_MODE_CURSOR_BO	0x01
++#define DRM_MODE_CURSOR_MOVE	0x02
++#define DRM_MODE_CURSOR_FLAGS	0x03
+ 
+ /*
+  * depending on the value in flags different members are used.
+diff --git a/include/linux/kobject.h b/include/linux/kobject.h
+index ad81e1c..445f978 100644
+--- a/include/linux/kobject.h
++++ b/include/linux/kobject.h
+@@ -226,7 +226,7 @@ static inline int kobject_uevent_env(struct kobject *kobj,
+ 
+ static inline __printf(2, 3)
+ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
+-{ return 0; }
++{ return -ENOMEM; }
+ 
+ static inline int kobject_action_type(const char *buf, size_t count,
+ 				      enum kobject_action *type)
+diff --git a/include/linux/ktime.h b/include/linux/ktime.h
+index 603bec2..06177ba10 100644
+--- a/include/linux/ktime.h
++++ b/include/linux/ktime.h
+@@ -58,13 +58,6 @@ union ktime {
+ 
+ typedef union ktime ktime_t;		/* Kill this */
+ 
+-#define KTIME_MAX			((s64)~((u64)1 << 63))
+-#if (BITS_PER_LONG == 64)
+-# define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
+-#else
+-# define KTIME_SEC_MAX			LONG_MAX
+-#endif
+-
+ /*
+  * ktime_t definitions when using the 64-bit scalar representation:
+  */
+diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
+index c8ef9bc..87967ee 100644
+--- a/include/linux/mmc/card.h
++++ b/include/linux/mmc/card.h
+@@ -219,6 +219,7 @@ struct mmc_card {
+ #define MMC_QUIRK_BLK_NO_CMD23	(1<<7)		/* Avoid CMD23 for regular multiblock */
+ #define MMC_QUIRK_BROKEN_BYTE_MODE_512 (1<<8)	/* Avoid sending 512 bytes in */
+ #define MMC_QUIRK_LONG_READ_TIME (1<<9)		/* Data read time > CSD says */
++#define MMC_QUIRK_SEC_ERASE_TRIM_BROKEN (1<<10)	/* Skip secure for erase/trim */
+ 						/* byte mode */
+ 	unsigned int    poweroff_notify_state;	/* eMMC4.5 notify feature */
+ #define MMC_NO_POWER_NOTIFICATION	0
+diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h
+index 30b0c4e..43e038a 100644
+--- a/include/linux/mv643xx_eth.h
++++ b/include/linux/mv643xx_eth.h
+@@ -15,6 +15,8 @@
+ #define MV643XX_ETH_SIZE_REG_4		0x2224
+ #define MV643XX_ETH_BASE_ADDR_ENABLE_REG	0x2290
+ 
++#define MV643XX_TX_CSUM_DEFAULT_LIMIT	0
++
+ struct mv643xx_eth_shared_platform_data {
+ 	struct mbus_dram_target_info	*dram;
+ 	struct platform_device	*shared_smi;
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index cb52340..00ca32b 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1299,6 +1299,8 @@ struct net_device {
+ 	/* for setting kernel sock attribute on TCP connection setup */
+ #define GSO_MAX_SIZE		65536
+ 	unsigned int		gso_max_size;
++#define GSO_MAX_SEGS		65535
++	u16			gso_max_segs;
+ 
+ #ifdef CONFIG_DCB
+ 	/* Data Center Bridging netlink ops */
+@@ -1511,6 +1513,8 @@ struct packet_type {
+ 	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
+ 					       struct sk_buff *skb);
+ 	int			(*gro_complete)(struct sk_buff *skb);
++	bool			(*id_match)(struct packet_type *ptype,
++					    struct sock *sk);
+ 	void			*af_packet_priv;
+ 	struct list_head	list;
+ };
+diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
+index 92ecf55..33c52a2 100644
+--- a/include/linux/nfs_fs.h
++++ b/include/linux/nfs_fs.h
+@@ -261,11 +261,6 @@ static inline const struct nfs_rpc_ops *NFS_PROTO(const struct inode *inode)
+ 	return NFS_SERVER(inode)->nfs_client->rpc_ops;
+ }
+ 
+-static inline __be32 *NFS_COOKIEVERF(const struct inode *inode)
+-{
+-	return NFS_I(inode)->cookieverf;
+-}
+-
+ static inline unsigned NFS_MINATTRTIMEO(const struct inode *inode)
+ {
+ 	struct nfs_server *nfss = NFS_SERVER(inode);
+diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
+index 2aaee0c..67cc215 100644
+--- a/include/linux/pci_ids.h
++++ b/include/linux/pci_ids.h
+@@ -2124,7 +2124,7 @@
+ #define PCI_DEVICE_ID_TIGON3_5704S	0x16a8
+ #define PCI_DEVICE_ID_NX2_57800_VF	0x16a9
+ #define PCI_DEVICE_ID_NX2_5706S		0x16aa
+-#define PCI_DEVICE_ID_NX2_57840_MF	0x16ab
++#define PCI_DEVICE_ID_NX2_57840_MF	0x16a4
+ #define PCI_DEVICE_ID_NX2_5708S		0x16ac
+ #define PCI_DEVICE_ID_NX2_57840_VF	0x16ad
+ #define PCI_DEVICE_ID_NX2_57810_MF	0x16ae
+diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
+index b1f8912..b669be6 100644
+--- a/include/linux/perf_event.h
++++ b/include/linux/perf_event.h
+@@ -794,7 +794,7 @@ struct perf_event {
+ 	struct hw_perf_event		hw;
+ 
+ 	struct perf_event_context	*ctx;
+-	struct file			*filp;
++	atomic_long_t			refcount;
+ 
+ 	/*
+ 	 * These accumulate total time (in nanoseconds) that children
+diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
+index 15518a1..0a4cd10 100644
+--- a/include/linux/sunrpc/xprt.h
++++ b/include/linux/sunrpc/xprt.h
+@@ -114,6 +114,7 @@ struct rpc_xprt_ops {
+ 	void		(*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
+ 	int		(*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
+ 	void		(*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
++	void		(*alloc_slot)(struct rpc_xprt *xprt, struct rpc_task *task);
+ 	void		(*rpcbind)(struct rpc_task *task);
+ 	void		(*set_port)(struct rpc_xprt *xprt, unsigned short port);
+ 	void		(*connect)(struct rpc_task *task);
+@@ -274,6 +275,8 @@ void			xprt_connect(struct rpc_task *task);
+ void			xprt_reserve(struct rpc_task *task);
+ int			xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
+ int			xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
++void			xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
++void			xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
+ int			xprt_prepare_transmit(struct rpc_task *task);
+ void			xprt_transmit(struct rpc_task *task);
+ void			xprt_end_transmit(struct rpc_task *task);
+diff --git a/include/linux/time.h b/include/linux/time.h
+index b306178..8c0216e 100644
+--- a/include/linux/time.h
++++ b/include/linux/time.h
+@@ -107,11 +107,36 @@ static inline struct timespec timespec_sub(struct timespec lhs,
+ 	return ts_delta;
+ }
+ 
++#define KTIME_MAX			((s64)~((u64)1 << 63))
++#if (BITS_PER_LONG == 64)
++# define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
++#else
++# define KTIME_SEC_MAX			LONG_MAX
++#endif
++
+ /*
+  * Returns true if the timespec is norm, false if denorm:
+  */
+-#define timespec_valid(ts) \
+-	(((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC))
++static inline bool timespec_valid(const struct timespec *ts)
++{
++	/* Dates before 1970 are bogus */
++	if (ts->tv_sec < 0)
++		return false;
++	/* Can't have more nanoseconds then a second */
++	if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
++		return false;
++	return true;
++}
++
++static inline bool timespec_valid_strict(const struct timespec *ts)
++{
++	if (!timespec_valid(ts))
++		return false;
++	/* Disallow values that could overflow ktime_t */
++	if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
++		return false;
++	return true;
++}
+ 
+ extern void read_persistent_clock(struct timespec *ts);
+ extern void read_boot_clock(struct timespec *ts);
+diff --git a/include/net/scm.h b/include/net/scm.h
+index d456f4c..0c0017c 100644
+--- a/include/net/scm.h
++++ b/include/net/scm.h
+@@ -71,9 +71,11 @@ static __inline__ void scm_destroy(struct scm_cookie *scm)
+ }
+ 
+ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
+-			       struct scm_cookie *scm)
++			       struct scm_cookie *scm, bool forcecreds)
+ {
+ 	memset(scm, 0, sizeof(*scm));
++	if (forcecreds)
++		scm_set_cred(scm, task_tgid(current), current_cred());
+ 	unix_get_peersec_dgram(sock, scm);
+ 	if (msg->msg_controllen <= 0)
+ 		return 0;
+diff --git a/include/net/sock.h b/include/net/sock.h
+index 32e3937..ddf523c 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -194,6 +194,7 @@ struct sock_common {
+   *	@sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
+   *	@sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
+   *	@sk_gso_max_size: Maximum GSO segment size to build
++  *	@sk_gso_max_segs: Maximum number of GSO segments
+   *	@sk_lingertime: %SO_LINGER l_linger setting
+   *	@sk_backlog: always used with the per-socket spinlock held
+   *	@sk_callback_lock: used with the callbacks in the end of this struct
+@@ -310,6 +311,7 @@ struct sock {
+ 	int			sk_route_nocaps;
+ 	int			sk_gso_type;
+ 	unsigned int		sk_gso_max_size;
++	u16			sk_gso_max_segs;
+ 	int			sk_rcvlowat;
+ 	unsigned long	        sk_lingertime;
+ 	struct sk_buff_head	sk_error_queue;
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 58690af..7d1f05e 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -3011,12 +3011,12 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
+ /*
+  * Called when the last reference to the file is gone.
+  */
+-static int perf_release(struct inode *inode, struct file *file)
++static void put_event(struct perf_event *event)
+ {
+-	struct perf_event *event = file->private_data;
+ 	struct task_struct *owner;
+ 
+-	file->private_data = NULL;
++	if (!atomic_long_dec_and_test(&event->refcount))
++		return;
+ 
+ 	rcu_read_lock();
+ 	owner = ACCESS_ONCE(event->owner);
+@@ -3051,7 +3051,13 @@ static int perf_release(struct inode *inode, struct file *file)
+ 		put_task_struct(owner);
+ 	}
+ 
+-	return perf_event_release_kernel(event);
++	perf_event_release_kernel(event);
++}
++
++static int perf_release(struct inode *inode, struct file *file)
++{
++	put_event(file->private_data);
++	return 0;
+ }
+ 
+ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
+@@ -3304,7 +3310,7 @@ unlock:
+ 
+ static const struct file_operations perf_fops;
+ 
+-static struct perf_event *perf_fget_light(int fd, int *fput_needed)
++static struct file *perf_fget_light(int fd, int *fput_needed)
+ {
+ 	struct file *file;
+ 
+@@ -3318,7 +3324,7 @@ static struct perf_event *perf_fget_light(int fd, int *fput_needed)
+ 		return ERR_PTR(-EBADF);
+ 	}
+ 
+-	return file->private_data;
++	return file;
+ }
+ 
+ static int perf_event_set_output(struct perf_event *event,
+@@ -3350,19 +3356,21 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+ 
+ 	case PERF_EVENT_IOC_SET_OUTPUT:
+ 	{
++		struct file *output_file = NULL;
+ 		struct perf_event *output_event = NULL;
+ 		int fput_needed = 0;
+ 		int ret;
+ 
+ 		if (arg != -1) {
+-			output_event = perf_fget_light(arg, &fput_needed);
+-			if (IS_ERR(output_event))
+-				return PTR_ERR(output_event);
++			output_file = perf_fget_light(arg, &fput_needed);
++			if (IS_ERR(output_file))
++				return PTR_ERR(output_file);
++			output_event = output_file->private_data;
+ 		}
+ 
+ 		ret = perf_event_set_output(event, output_event);
+ 		if (output_event)
+-			fput_light(output_event->filp, fput_needed);
++			fput_light(output_file, fput_needed);
+ 
+ 		return ret;
+ 	}
+@@ -5912,6 +5920,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
+ 
+ 	mutex_init(&event->mmap_mutex);
+ 
++	atomic_long_set(&event->refcount, 1);
+ 	event->cpu		= cpu;
+ 	event->attr		= *attr;
+ 	event->group_leader	= group_leader;
+@@ -6182,12 +6191,12 @@ SYSCALL_DEFINE5(perf_event_open,
+ 		return event_fd;
+ 
+ 	if (group_fd != -1) {
+-		group_leader = perf_fget_light(group_fd, &fput_needed);
+-		if (IS_ERR(group_leader)) {
+-			err = PTR_ERR(group_leader);
++		group_file = perf_fget_light(group_fd, &fput_needed);
++		if (IS_ERR(group_file)) {
++			err = PTR_ERR(group_file);
+ 			goto err_fd;
+ 		}
+-		group_file = group_leader->filp;
++		group_leader = group_file->private_data;
+ 		if (flags & PERF_FLAG_FD_OUTPUT)
+ 			output_event = group_leader;
+ 		if (flags & PERF_FLAG_FD_NO_GROUP)
+@@ -6322,7 +6331,6 @@ SYSCALL_DEFINE5(perf_event_open,
+ 		put_ctx(gctx);
+ 	}
+ 
+-	event->filp = event_file;
+ 	WARN_ON_ONCE(ctx->parent_ctx);
+ 	mutex_lock(&ctx->mutex);
+ 
+@@ -6412,7 +6420,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
+ 		goto err_free;
+ 	}
+ 
+-	event->filp = NULL;
+ 	WARN_ON_ONCE(ctx->parent_ctx);
+ 	mutex_lock(&ctx->mutex);
+ 	perf_install_in_context(ctx, event, cpu);
+@@ -6461,7 +6468,7 @@ static void sync_child_event(struct perf_event *child_event,
+ 	 * Release the parent event, if this was the last
+ 	 * reference to it.
+ 	 */
+-	fput(parent_event->filp);
++	put_event(parent_event);
+ }
+ 
+ static void
+@@ -6537,9 +6544,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
+ 	 *
+ 	 *   __perf_event_exit_task()
+ 	 *     sync_child_event()
+-	 *       fput(parent_event->filp)
+-	 *         perf_release()
+-	 *           mutex_lock(&ctx->mutex)
++	 *       put_event()
++	 *         mutex_lock(&ctx->mutex)
+ 	 *
+ 	 * But since its the parent context it won't be the same instance.
+ 	 */
+@@ -6607,7 +6613,7 @@ static void perf_free_event(struct perf_event *event,
+ 	list_del_init(&event->child_list);
+ 	mutex_unlock(&parent->child_mutex);
+ 
+-	fput(parent->filp);
++	put_event(parent);
+ 
+ 	perf_group_detach(event);
+ 	list_del_event(event, ctx);
+@@ -6687,6 +6693,12 @@ inherit_event(struct perf_event *parent_event,
+ 				           NULL, NULL);
+ 	if (IS_ERR(child_event))
+ 		return child_event;
++
++	if (!atomic_long_inc_not_zero(&parent_event->refcount)) {
++		free_event(child_event);
++		return NULL;
++	}
++
+ 	get_ctx(child_ctx);
+ 
+ 	/*
+@@ -6728,14 +6740,6 @@ inherit_event(struct perf_event *parent_event,
+ 	raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
+ 
+ 	/*
+-	 * Get a reference to the parent filp - we will fput it
+-	 * when the child event exits. This is safe to do because
+-	 * we are in the parent and we know that the filp still
+-	 * exists and has a nonzero count:
+-	 */
+-	atomic_long_inc(&parent_event->filp->f_count);
+-
+-	/*
+ 	 * Link this into the parent event's child list
+ 	 */
+ 	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
+index 03e67d4..5ee1ac0 100644
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -382,7 +382,7 @@ int do_settimeofday(const struct timespec *tv)
+ 	struct timespec ts_delta;
+ 	unsigned long flags;
+ 
+-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
++	if (!timespec_valid_strict(tv))
+ 		return -EINVAL;
+ 
+ 	write_seqlock_irqsave(&xtime_lock, flags);
+@@ -417,6 +417,8 @@ EXPORT_SYMBOL(do_settimeofday);
+ int timekeeping_inject_offset(struct timespec *ts)
+ {
+ 	unsigned long flags;
++	struct timespec tmp;
++	int ret = 0;
+ 
+ 	if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
+ 		return -EINVAL;
+@@ -425,9 +427,16 @@ int timekeeping_inject_offset(struct timespec *ts)
+ 
+ 	timekeeping_forward_now();
+ 
++	tmp = timespec_add(xtime,  *ts);
++	if (!timespec_valid_strict(&tmp)) {
++		ret = -EINVAL;
++		goto error;
++	}
++
+ 	xtime = timespec_add(xtime, *ts);
+ 	wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
+ 
++error: /* even if we error out, we forwarded the time, so call update */
+ 	timekeeping_update(true);
+ 
+ 	write_sequnlock_irqrestore(&xtime_lock, flags);
+@@ -435,7 +444,7 @@ int timekeeping_inject_offset(struct timespec *ts)
+ 	/* signal hrtimers about time change */
+ 	clock_was_set();
+ 
+-	return 0;
++	return ret;
+ }
+ EXPORT_SYMBOL(timekeeping_inject_offset);
+ 
+@@ -582,7 +591,20 @@ void __init timekeeping_init(void)
+ 	struct timespec now, boot;
+ 
+ 	read_persistent_clock(&now);
++	if (!timespec_valid_strict(&now)) {
++		pr_warn("WARNING: Persistent clock returned invalid value!\n"
++			"         Check your CMOS/BIOS settings.\n");
++		now.tv_sec = 0;
++		now.tv_nsec = 0;
++	}
++
+ 	read_boot_clock(&boot);
++	if (!timespec_valid_strict(&boot)) {
++		pr_warn("WARNING: Boot clock returned invalid value!\n"
++			"         Check your CMOS/BIOS settings.\n");
++		boot.tv_sec = 0;
++		boot.tv_nsec = 0;
++	}
+ 
+ 	write_seqlock_irqsave(&xtime_lock, flags);
+ 
+@@ -627,7 +649,7 @@ static void update_sleep_time(struct timespec t)
+  */
+ static void __timekeeping_inject_sleeptime(struct timespec *delta)
+ {
+-	if (!timespec_valid(delta)) {
++	if (!timespec_valid_strict(delta)) {
+ 		printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid "
+ 					"sleep delta value!\n");
+ 		return;
+@@ -1011,6 +1033,10 @@ static void update_wall_time(void)
+ #else
+ 	offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
+ #endif
++	/* Check if there's really nothing to do */
++	if (offset < timekeeper.cycle_interval)
++		return;
++
+ 	timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
+ 
+ 	/*
+diff --git a/kernel/workqueue.c b/kernel/workqueue.c
+index a650bee..979d4de 100644
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -3437,14 +3437,17 @@ static int __cpuinit trustee_thread(void *__gcwq)
+ 
+ 	for_each_busy_worker(worker, i, pos, gcwq) {
+ 		struct work_struct *rebind_work = &worker->rebind_work;
++		unsigned long worker_flags = worker->flags;
+ 
+ 		/*
+ 		 * Rebind_work may race with future cpu hotplug
+ 		 * operations.  Use a separate flag to mark that
+-		 * rebinding is scheduled.
++		 * rebinding is scheduled.  The morphing should
++		 * be atomic.
+ 		 */
+-		worker->flags |= WORKER_REBIND;
+-		worker->flags &= ~WORKER_ROGUE;
++		worker_flags |= WORKER_REBIND;
++		worker_flags &= ~WORKER_ROGUE;
++		ACCESS_ONCE(worker->flags) = worker_flags;
+ 
+ 		/* queue rebind_work, wq doesn't matter, use the default one */
+ 		if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
+diff --git a/mm/mempolicy.c b/mm/mempolicy.c
+index c0007f9..11b8d47 100644
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -2533,7 +2533,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
+ 		break;
+ 
+ 	default:
+-		BUG();
++		return -EINVAL;
+ 	}
+ 
+ 	l = strlen(policy_modes[mode]);
+diff --git a/net/atm/common.c b/net/atm/common.c
+index 14ff9fe..0ca06e8 100644
+--- a/net/atm/common.c
++++ b/net/atm/common.c
+@@ -784,6 +784,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
+ 
+ 		if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
+ 			return -ENOTCONN;
++		memset(&pvc, 0, sizeof(pvc));
+ 		pvc.sap_family = AF_ATMPVC;
+ 		pvc.sap_addr.itf = vcc->dev->number;
+ 		pvc.sap_addr.vpi = vcc->vpi;
+diff --git a/net/atm/pvc.c b/net/atm/pvc.c
+index 3a73491..ae03240 100644
+--- a/net/atm/pvc.c
++++ b/net/atm/pvc.c
+@@ -95,6 +95,7 @@ static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
+ 		return -ENOTCONN;
+ 	*sockaddr_len = sizeof(struct sockaddr_atmpvc);
+ 	addr = (struct sockaddr_atmpvc *)sockaddr;
++	memset(addr, 0, sizeof(*addr));
+ 	addr->sap_family = AF_ATMPVC;
+ 	addr->sap_addr.itf = vcc->dev->number;
+ 	addr->sap_addr.vpi = vcc->vpi;
+diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
+index f6afe3d..8361ee4 100644
+--- a/net/bluetooth/hci_sock.c
++++ b/net/bluetooth/hci_sock.c
+@@ -388,6 +388,7 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, int *add
+ 	*addr_len = sizeof(*haddr);
+ 	haddr->hci_family = AF_BLUETOOTH;
+ 	haddr->hci_dev    = hdev->id;
++	haddr->hci_channel= 0;
+ 
+ 	release_sock(sk);
+ 	return 0;
+@@ -671,6 +672,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, char
+ 		{
+ 			struct hci_filter *f = &hci_pi(sk)->filter;
+ 
++			memset(&uf, 0, sizeof(uf));
+ 			uf.type_mask = f->type_mask;
+ 			uf.opcode    = f->opcode;
+ 			uf.event_mask[0] = *((u32 *) f->event_mask + 0);
+diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
+index 5c406d3..6dedd6f 100644
+--- a/net/bluetooth/l2cap_sock.c
++++ b/net/bluetooth/l2cap_sock.c
+@@ -293,6 +293,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
+ 
+ 	BT_DBG("sock %p, sk %p", sock, sk);
+ 
++	memset(la, 0, sizeof(struct sockaddr_l2));
+ 	addr->sa_family = AF_BLUETOOTH;
+ 	*len = sizeof(struct sockaddr_l2);
+ 
+diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
+index 5417f61..7ee4ead 100644
+--- a/net/bluetooth/rfcomm/sock.c
++++ b/net/bluetooth/rfcomm/sock.c
+@@ -547,6 +547,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
+ 
+ 	BT_DBG("sock %p, sk %p", sock, sk);
+ 
++	memset(sa, 0, sizeof(*sa));
+ 	sa->rc_family  = AF_BLUETOOTH;
+ 	sa->rc_channel = rfcomm_pi(sk)->channel;
+ 	if (peer)
+@@ -835,6 +836,7 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c
+ 		}
+ 
+ 		sec.level = rfcomm_pi(sk)->sec_level;
++		sec.key_size = 0;
+ 
+ 		len = min_t(unsigned int, len, sizeof(sec));
+ 		if (copy_to_user(optval, (char *) &sec, len))
+diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
+index c258796..bc1eb56 100644
+--- a/net/bluetooth/rfcomm/tty.c
++++ b/net/bluetooth/rfcomm/tty.c
+@@ -471,7 +471,7 @@ static int rfcomm_get_dev_list(void __user *arg)
+ 
+ 	size = sizeof(*dl) + dev_num * sizeof(*di);
+ 
+-	dl = kmalloc(size, GFP_KERNEL);
++	dl = kzalloc(size, GFP_KERNEL);
+ 	if (!dl)
+ 		return -ENOMEM;
+ 
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 4b18703..832ba6d 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1059,6 +1059,8 @@ rollback:
+  */
+ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
+ {
++	char *new_ifalias;
++
+ 	ASSERT_RTNL();
+ 
+ 	if (len >= IFALIASZ)
+@@ -1072,9 +1074,10 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
+ 		return 0;
+ 	}
+ 
+-	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
+-	if (!dev->ifalias)
++	new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
++	if (!new_ifalias)
+ 		return -ENOMEM;
++	dev->ifalias = new_ifalias;
+ 
+ 	strlcpy(dev->ifalias, alias, len+1);
+ 	return len;
+@@ -1628,6 +1631,19 @@ static inline int deliver_skb(struct sk_buff *skb,
+ 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+ }
+ 
++static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
++{
++	if (ptype->af_packet_priv == NULL)
++		return false;
++
++	if (ptype->id_match)
++		return ptype->id_match(ptype, skb->sk);
++	else if ((struct sock *)ptype->af_packet_priv == skb->sk)
++		return true;
++
++	return false;
++}
++
+ /*
+  *	Support routine. Sends outgoing frames to any network
+  *	taps currently in use.
+@@ -1645,8 +1661,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
+ 		 * they originated from - MvS (miquels@drinkel.ow.org)
+ 		 */
+ 		if ((ptype->dev == dev || !ptype->dev) &&
+-		    (ptype->af_packet_priv == NULL ||
+-		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
++		    (!skb_loop_sk(ptype, skb))) {
+ 			if (pt_prev) {
+ 				deliver_skb(skb2, pt_prev, skb->dev);
+ 				pt_prev = ptype;
+@@ -2108,6 +2123,9 @@ u32 netif_skb_features(struct sk_buff *skb)
+ 	__be16 protocol = skb->protocol;
+ 	u32 features = skb->dev->features;
+ 
++	if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
++		features &= ~NETIF_F_GSO_MASK;
++
+ 	if (protocol == htons(ETH_P_8021Q)) {
+ 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+ 		protocol = veh->h_vlan_encapsulated_proto;
+@@ -5990,6 +6008,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+ 	dev_net_set(dev, &init_net);
+ 
+ 	dev->gso_max_size = GSO_MAX_SIZE;
++	dev->gso_max_segs = GSO_MAX_SEGS;
+ 
+ 	INIT_LIST_HEAD(&dev->napi_list);
+ 	INIT_LIST_HEAD(&dev->unreg_list);
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 8d095b9..018fd41 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1308,6 +1308,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
+ 		} else {
+ 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
+ 			sk->sk_gso_max_size = dst->dev->gso_max_size;
++			sk->sk_gso_max_segs = dst->dev->gso_max_segs;
+ 		}
+ 	}
+ }
+diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
+index 3d604e1..4caf63f 100644
+--- a/net/dccp/ccids/ccid3.c
++++ b/net/dccp/ccids/ccid3.c
+@@ -532,6 +532,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
+ 	case DCCP_SOCKOPT_CCID_TX_INFO:
+ 		if (len < sizeof(tfrc))
+ 			return -EINVAL;
++		memset(&tfrc, 0, sizeof(tfrc));
+ 		tfrc.tfrctx_x	   = hc->tx_x;
+ 		tfrc.tfrctx_x_recv = hc->tx_x_recv;
+ 		tfrc.tfrctx_x_calc = hc->tx_x_calc;
+diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
+index d2aae27..0064394 100644
+--- a/net/ipv4/ipmr.c
++++ b/net/ipv4/ipmr.c
+@@ -125,6 +125,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
+ static struct kmem_cache *mrt_cachep __read_mostly;
+ 
+ static struct mr_table *ipmr_new_table(struct net *net, u32 id);
++static void ipmr_free_table(struct mr_table *mrt);
++
+ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
+ 			 struct sk_buff *skb, struct mfc_cache *cache,
+ 			 int local);
+@@ -132,6 +134,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
+ 			     struct sk_buff *pkt, vifi_t vifi, int assert);
+ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ 			      struct mfc_cache *c, struct rtmsg *rtm);
++static void mroute_clean_tables(struct mr_table *mrt);
+ static void ipmr_expire_process(unsigned long arg);
+ 
+ #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+@@ -272,7 +275,7 @@ static void __net_exit ipmr_rules_exit(struct net *net)
+ 
+ 	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
+ 		list_del(&mrt->list);
+-		kfree(mrt);
++		ipmr_free_table(mrt);
+ 	}
+ 	fib_rules_unregister(net->ipv4.mr_rules_ops);
+ }
+@@ -300,7 +303,7 @@ static int __net_init ipmr_rules_init(struct net *net)
+ 
+ static void __net_exit ipmr_rules_exit(struct net *net)
+ {
+-	kfree(net->ipv4.mrt);
++	ipmr_free_table(net->ipv4.mrt);
+ }
+ #endif
+ 
+@@ -337,6 +340,13 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
+ 	return mrt;
+ }
+ 
++static void ipmr_free_table(struct mr_table *mrt)
++{
++	del_timer_sync(&mrt->ipmr_expire_timer);
++	mroute_clean_tables(mrt);
++	kfree(mrt);
++}
++
+ /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
+ 
+ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index ad466a7..043d49b 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -740,7 +740,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
+ 			   old_size_goal + mss_now > xmit_size_goal)) {
+ 			xmit_size_goal = old_size_goal;
+ 		} else {
+-			tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
++			tp->xmit_size_goal_segs =
++				min_t(u16, xmit_size_goal / mss_now,
++				      sk->sk_gso_max_segs);
+ 			xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
+ 		}
+ 	}
+diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
+index 850c737..6cebfd2 100644
+--- a/net/ipv4/tcp_cong.c
++++ b/net/ipv4/tcp_cong.c
+@@ -290,7 +290,8 @@ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
+ 	left = tp->snd_cwnd - in_flight;
+ 	if (sk_can_gso(sk) &&
+ 	    left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
+-	    left * tp->mss_cache < sk->sk_gso_max_size)
++	    left * tp->mss_cache < sk->sk_gso_max_size &&
++	    left < sk->sk_gso_max_segs)
+ 		return 1;
+ 	return left <= tcp_max_burst(tp);
+ }
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index c51dd5b..921cbac 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1318,21 +1318,21 @@ static void tcp_cwnd_validate(struct sock *sk)
+  * when we would be allowed to send the split-due-to-Nagle skb fully.
+  */
+ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
+-					unsigned int mss_now, unsigned int cwnd)
++					unsigned int mss_now, unsigned int max_segs)
+ {
+ 	const struct tcp_sock *tp = tcp_sk(sk);
+-	u32 needed, window, cwnd_len;
++	u32 needed, window, max_len;
+ 
+ 	window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
+-	cwnd_len = mss_now * cwnd;
++	max_len = mss_now * max_segs;
+ 
+-	if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
+-		return cwnd_len;
++	if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
++		return max_len;
+ 
+ 	needed = min(skb->len, window);
+ 
+-	if (cwnd_len <= needed)
+-		return cwnd_len;
++	if (max_len <= needed)
++		return max_len;
+ 
+ 	return needed - needed % mss_now;
+ }
+@@ -1560,7 +1560,8 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
+ 	limit = min(send_win, cong_win);
+ 
+ 	/* If a full-sized TSO skb can be sent, do it. */
+-	if (limit >= sk->sk_gso_max_size)
++	if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
++			   sk->sk_gso_max_segs * tp->mss_cache))
+ 		goto send_now;
+ 
+ 	/* Middle in queue won't get any more data, full sendable already? */
+@@ -1786,7 +1787,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
+ 		limit = mss_now;
+ 		if (tso_segs > 1 && !tcp_urg_mode(tp))
+ 			limit = tcp_mss_split_point(sk, skb, mss_now,
+-						    cwnd_quota);
++						    min_t(unsigned int,
++							  cwnd_quota,
++							  sk->sk_gso_max_segs));
+ 
+ 		if (skb->len > limit &&
+ 		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
+diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+index a5521c5..aef80d7 100644
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -493,8 +493,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
+ 	struct net_device *dev;
+ 	struct inet6_dev *idev;
+ 
+-	rcu_read_lock();
+-	for_each_netdev_rcu(net, dev) {
++	for_each_netdev(net, dev) {
+ 		idev = __in6_dev_get(dev);
+ 		if (idev) {
+ 			int changed = (!idev->cnf.forwarding) ^ (!newf);
+@@ -503,7 +502,6 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
+ 				dev_forward_change(idev);
+ 		}
+ 	}
+-	rcu_read_unlock();
+ }
+ 
+ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
+diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
+index 89ff8c6..7501b22 100644
+--- a/net/l2tp/l2tp_core.c
++++ b/net/l2tp/l2tp_core.c
+@@ -1253,11 +1253,10 @@ static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
+ 	/* Remove from tunnel list */
+ 	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+ 	list_del_rcu(&tunnel->list);
++	kfree_rcu(tunnel, rcu);
+ 	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+-	synchronize_rcu();
+ 
+ 	atomic_dec(&l2tp_tunnel_count);
+-	kfree(tunnel);
+ }
+ 
+ /* Create a socket for the tunnel, if one isn't set up by
+diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
+index a16a48e..4393794 100644
+--- a/net/l2tp/l2tp_core.h
++++ b/net/l2tp/l2tp_core.h
+@@ -157,6 +157,7 @@ struct l2tp_tunnel_cfg {
+ 
+ struct l2tp_tunnel {
+ 	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
++	struct rcu_head rcu;
+ 	rwlock_t		hlist_lock;	/* protect session_hlist */
+ 	struct hlist_head	session_hlist[L2TP_HASH_SIZE];
+ 						/* hashed list of sessions,
+diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
+index a18e6c3..99a60d5 100644
+--- a/net/llc/af_llc.c
++++ b/net/llc/af_llc.c
+@@ -966,14 +966,13 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
+ 	struct sockaddr_llc sllc;
+ 	struct sock *sk = sock->sk;
+ 	struct llc_sock *llc = llc_sk(sk);
+-	int rc = 0;
++	int rc = -EBADF;
+ 
+ 	memset(&sllc, 0, sizeof(sllc));
+ 	lock_sock(sk);
+ 	if (sock_flag(sk, SOCK_ZAPPED))
+ 		goto out;
+ 	*uaddrlen = sizeof(sllc);
+-	memset(uaddr, 0, *uaddrlen);
+ 	if (peer) {
+ 		rc = -ENOTCONN;
+ 		if (sk->sk_state != TCP_ESTABLISHED)
+diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
+index e1a66cf..72f4253 100644
+--- a/net/netfilter/ipvs/ip_vs_ctl.c
++++ b/net/netfilter/ipvs/ip_vs_ctl.c
+@@ -2713,6 +2713,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+ 	{
+ 		struct ip_vs_timeout_user t;
+ 
++		memset(&t, 0, sizeof(t));
+ 		__ip_vs_get_timeouts(net, &t);
+ 		if (copy_to_user(user, &t, sizeof(t)) != 0)
+ 			ret = -EFAULT;
+diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
+index a99fb41..38b78b9 100644
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -1333,7 +1333,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
+ 	if (NULL == siocb->scm)
+ 		siocb->scm = &scm;
+ 
+-	err = scm_send(sock, msg, siocb->scm);
++	err = scm_send(sock, msg, siocb->scm, true);
+ 	if (err < 0)
+ 		return err;
+ 
+@@ -1344,7 +1344,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
+ 		dst_pid = addr->nl_pid;
+ 		dst_group = ffs(addr->nl_groups);
+ 		err =  -EPERM;
+-		if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
++		if ((dst_group || dst_pid) &&
++		    !netlink_capable(sock, NL_NONROOT_SEND))
+ 			goto out;
+ 	} else {
+ 		dst_pid = nlk->dst_pid;
+@@ -2103,6 +2104,7 @@ static void __init netlink_add_usersock_entry(void)
+ 	rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
+ 	nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
+ 	nl_table[NETLINK_USERSOCK].registered = 1;
++	nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND;
+ 
+ 	netlink_table_ungrab();
+ }
+diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
+index d9d4970..85afc13 100644
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1281,6 +1281,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
+ 	spin_unlock(&f->lock);
+ }
+ 
++bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
++{
++	if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout)
++		return true;
++
++	return false;
++}
++
+ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
+ {
+ 	struct packet_sock *po = pkt_sk(sk);
+@@ -1333,6 +1341,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
+ 		match->prot_hook.dev = po->prot_hook.dev;
+ 		match->prot_hook.func = packet_rcv_fanout;
+ 		match->prot_hook.af_packet_priv = match;
++		match->prot_hook.id_match = match_fanout_group;
+ 		dev_add_pack(&match->prot_hook);
+ 		list_add(&match->list, &fanout_list);
+ 	}
+@@ -1931,7 +1940,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
+ 
+ 	if (likely(po->tx_ring.pg_vec)) {
+ 		ph = skb_shinfo(skb)->destructor_arg;
+-		BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
+ 		BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
+ 		atomic_dec(&po->tx_ring.pending);
+ 		__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
+diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
+index b77f5a0..bdacd8d 100644
+--- a/net/sched/act_gact.c
++++ b/net/sched/act_gact.c
+@@ -67,6 +67,9 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
+ 	struct tcf_common *pc;
+ 	int ret = 0;
+ 	int err;
++#ifdef CONFIG_GACT_PROB
++	struct tc_gact_p *p_parm = NULL;
++#endif
+ 
+ 	if (nla == NULL)
+ 		return -EINVAL;
+@@ -82,6 +85,12 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
+ #ifndef CONFIG_GACT_PROB
+ 	if (tb[TCA_GACT_PROB] != NULL)
+ 		return -EOPNOTSUPP;
++#else
++	if (tb[TCA_GACT_PROB]) {
++		p_parm = nla_data(tb[TCA_GACT_PROB]);
++		if (p_parm->ptype >= MAX_RAND)
++			return -EINVAL;
++	}
+ #endif
+ 
+ 	pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info);
+@@ -103,8 +112,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
+ 	spin_lock_bh(&gact->tcf_lock);
+ 	gact->tcf_action = parm->action;
+ #ifdef CONFIG_GACT_PROB
+-	if (tb[TCA_GACT_PROB] != NULL) {
+-		struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]);
++	if (p_parm) {
+ 		gact->tcfg_paction = p_parm->paction;
+ 		gact->tcfg_pval    = p_parm->pval;
+ 		gact->tcfg_ptype   = p_parm->ptype;
+@@ -133,7 +141,7 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
+ 
+ 	spin_lock(&gact->tcf_lock);
+ #ifdef CONFIG_GACT_PROB
+-	if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL)
++	if (gact->tcfg_ptype)
+ 		action = gact_rand[gact->tcfg_ptype](gact);
+ 	else
+ 		action = gact->tcf_action;
+diff --git a/net/socket.c b/net/socket.c
+index 273cbce..68879db 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -2645,6 +2645,7 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
+ 	if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
+ 		return -EFAULT;
+ 
++	memset(&ifc, 0, sizeof(ifc));
+ 	if (ifc32.ifcbuf == 0) {
+ 		ifc32.ifc_len = 0;
+ 		ifc.ifc_len = 0;
+diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
+index 3ac9789..ffba207 100644
+--- a/net/sunrpc/xprt.c
++++ b/net/sunrpc/xprt.c
+@@ -962,11 +962,11 @@ static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
+ 	return false;
+ }
+ 
+-static void xprt_alloc_slot(struct rpc_task *task)
++void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
+ {
+-	struct rpc_xprt	*xprt = task->tk_xprt;
+ 	struct rpc_rqst *req;
+ 
++	spin_lock(&xprt->reserve_lock);
+ 	if (!list_empty(&xprt->free)) {
+ 		req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
+ 		list_del(&req->rq_list);
+@@ -987,12 +987,29 @@ static void xprt_alloc_slot(struct rpc_task *task)
+ 	default:
+ 		task->tk_status = -EAGAIN;
+ 	}
++	spin_unlock(&xprt->reserve_lock);
+ 	return;
+ out_init_req:
+ 	task->tk_status = 0;
+ 	task->tk_rqstp = req;
+ 	xprt_request_init(task, xprt);
++	spin_unlock(&xprt->reserve_lock);
++}
++EXPORT_SYMBOL_GPL(xprt_alloc_slot);
++
++void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
++{
++	/* Note: grabbing the xprt_lock_write() ensures that we throttle
++	 * new slot allocation if the transport is congested (i.e. when
++	 * reconnecting a stream transport or when out of socket write
++	 * buffer space).
++	 */
++	if (xprt_lock_write(xprt, task)) {
++		xprt_alloc_slot(xprt, task);
++		xprt_release_write(xprt, task);
++	}
+ }
++EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot);
+ 
+ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
+ {
+@@ -1076,20 +1093,9 @@ void xprt_reserve(struct rpc_task *task)
+ 	if (task->tk_rqstp != NULL)
+ 		return;
+ 
+-	/* Note: grabbing the xprt_lock_write() here is not strictly needed,
+-	 * but ensures that we throttle new slot allocation if the transport
+-	 * is congested (e.g. if reconnecting or if we're out of socket
+-	 * write buffer space).
+-	 */
+ 	task->tk_timeout = 0;
+ 	task->tk_status = -EAGAIN;
+-	if (!xprt_lock_write(xprt, task))
+-		return;
+-
+-	spin_lock(&xprt->reserve_lock);
+-	xprt_alloc_slot(task);
+-	spin_unlock(&xprt->reserve_lock);
+-	xprt_release_write(xprt, task);
++	xprt->ops->alloc_slot(xprt, task);
+ }
+ 
+ static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
+diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
+index 06cdbff..5d9202d 100644
+--- a/net/sunrpc/xprtrdma/transport.c
++++ b/net/sunrpc/xprtrdma/transport.c
+@@ -713,6 +713,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+ static struct rpc_xprt_ops xprt_rdma_procs = {
+ 	.reserve_xprt		= xprt_rdma_reserve_xprt,
+ 	.release_xprt		= xprt_release_xprt_cong, /* sunrpc/xprt.c */
++	.alloc_slot		= xprt_alloc_slot,
+ 	.release_request	= xprt_release_rqst_cong,       /* ditto */
+ 	.set_retrans_timeout	= xprt_set_retrans_timeout_def, /* ditto */
+ 	.rpcbind		= rpcb_getport_async,	/* sunrpc/rpcb_clnt.c */
+diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
+index 1a6edc7..c5391af 100644
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -2422,6 +2422,7 @@ static void bc_destroy(struct rpc_xprt *xprt)
+ static struct rpc_xprt_ops xs_local_ops = {
+ 	.reserve_xprt		= xprt_reserve_xprt,
+ 	.release_xprt		= xs_tcp_release_xprt,
++	.alloc_slot		= xprt_alloc_slot,
+ 	.rpcbind		= xs_local_rpcbind,
+ 	.set_port		= xs_local_set_port,
+ 	.connect		= xs_connect,
+@@ -2438,6 +2439,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
+ 	.set_buffer_size	= xs_udp_set_buffer_size,
+ 	.reserve_xprt		= xprt_reserve_xprt_cong,
+ 	.release_xprt		= xprt_release_xprt_cong,
++	.alloc_slot		= xprt_alloc_slot,
+ 	.rpcbind		= rpcb_getport_async,
+ 	.set_port		= xs_set_port,
+ 	.connect		= xs_connect,
+@@ -2455,6 +2457,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
+ static struct rpc_xprt_ops xs_tcp_ops = {
+ 	.reserve_xprt		= xprt_reserve_xprt,
+ 	.release_xprt		= xs_tcp_release_xprt,
++	.alloc_slot		= xprt_lock_and_alloc_slot,
+ 	.rpcbind		= rpcb_getport_async,
+ 	.set_port		= xs_set_port,
+ 	.connect		= xs_connect,
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index d99678a..317bfe3 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -1435,7 +1435,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
+ 	if (NULL == siocb->scm)
+ 		siocb->scm = &tmp_scm;
+ 	wait_for_unix_gc();
+-	err = scm_send(sock, msg, siocb->scm);
++	err = scm_send(sock, msg, siocb->scm, false);
+ 	if (err < 0)
+ 		return err;
+ 
+@@ -1596,7 +1596,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
+ 	if (NULL == siocb->scm)
+ 		siocb->scm = &tmp_scm;
+ 	wait_for_unix_gc();
+-	err = scm_send(sock, msg, siocb->scm);
++	err = scm_send(sock, msg, siocb->scm, false);
+ 	if (err < 0)
+ 		return err;
+ 
+diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
+index f3be54e..b0187e7 100644
+--- a/sound/pci/hda/hda_codec.c
++++ b/sound/pci/hda/hda_codec.c
+@@ -2312,6 +2312,7 @@ int snd_hda_codec_reset(struct hda_codec *codec)
+ 	}
+ 	if (codec->patch_ops.free)
+ 		codec->patch_ops.free(codec);
++	memset(&codec->patch_ops, 0, sizeof(codec->patch_ops));
+ 	codec->proc_widget_hook = NULL;
+ 	codec->spec = NULL;
+ 	free_hda_cache(&codec->amp_cache);
+@@ -2324,7 +2325,6 @@ int snd_hda_codec_reset(struct hda_codec *codec)
+ 	codec->num_pcms = 0;
+ 	codec->pcm_info = NULL;
+ 	codec->preset = NULL;
+-	memset(&codec->patch_ops, 0, sizeof(codec->patch_ops));
+ 	codec->slave_dig_outs = NULL;
+ 	codec->spdif_status_reset = 0;
+ 	module_put(codec->owner);
+diff --git a/sound/pci/ice1712/prodigy_hifi.c b/sound/pci/ice1712/prodigy_hifi.c
+index 764cc93..075d5aa 100644
+--- a/sound/pci/ice1712/prodigy_hifi.c
++++ b/sound/pci/ice1712/prodigy_hifi.c
+@@ -297,6 +297,7 @@ static int ak4396_dac_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem
+ }
+ 
+ static const DECLARE_TLV_DB_SCALE(db_scale_wm_dac, -12700, 100, 1);
++static const DECLARE_TLV_DB_LINEAR(ak4396_db_scale, TLV_DB_GAIN_MUTE, 0);
+ 
+ static struct snd_kcontrol_new prodigy_hd2_controls[] __devinitdata = {
+     {
+@@ -307,7 +308,7 @@ static struct snd_kcontrol_new prodigy_hd2_controls[] __devinitdata = {
+ 	.info = ak4396_dac_vol_info,
+ 	.get = ak4396_dac_vol_get,
+ 	.put = ak4396_dac_vol_put,
+-	.tlv = { .p = db_scale_wm_dac },
++	.tlv = { .p = ak4396_db_scale },
+     },
+ };
+ 
diff --git a/3.2.34/bump/1030_linux-3.2.31.patch b/3.2.34/bump/1030_linux-3.2.31.patch
new file mode 100644
index 0000000..c6accf5
--- /dev/null
+++ b/3.2.34/bump/1030_linux-3.2.31.patch
@@ -0,0 +1,3327 @@
+diff --git a/Makefile b/Makefile
+index 9fd7e60..fd9c414 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 3
+ PATCHLEVEL = 2
+-SUBLEVEL = 30
++SUBLEVEL = 31
+ EXTRAVERSION =
+ NAME = Saber-toothed Squirrel
+ 
+diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
+index 9c18ebd..d63632f 100644
+--- a/arch/arm/boot/compressed/head.S
++++ b/arch/arm/boot/compressed/head.S
+@@ -648,6 +648,7 @@ __armv7_mmu_cache_on:
+ 		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
+ #endif
+ 		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
++		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
+ 		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
+ 		orr	r0, r0, #0x003c		@ write buffer
+ #ifdef CONFIG_MMU
+diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
+index 73ef56c..bda833c 100644
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -160,7 +160,7 @@ static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
+ #endif
+ 
+ #ifdef P6_NOP1
+-static const unsigned char  __initconst_or_module p6nops[] =
++static const unsigned char p6nops[] =
+ {
+ 	P6_NOP1,
+ 	P6_NOP2,
+diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
+index 44d4393..a1e21ae 100644
+--- a/arch/x86/xen/enlighten.c
++++ b/arch/x86/xen/enlighten.c
+@@ -1289,6 +1289,10 @@ asmlinkage void __init xen_start_kernel(void)
+ 
+ 		/* Make sure ACS will be enabled */
+ 		pci_request_acs();
++
++		/* Avoid searching for BIOS MP tables */
++		x86_init.mpparse.find_smp_config = x86_init_noop;
++		x86_init.mpparse.get_smp_config = x86_init_uint_noop;
+ 	}
+ #ifdef CONFIG_PCI
+ 	/* PCI BIOS service won't work from a PV guest. */
+diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
+index bb104b4..6e5a7f1 100644
+--- a/arch/x86/xen/setup.c
++++ b/arch/x86/xen/setup.c
+@@ -16,6 +16,7 @@
+ #include <asm/e820.h>
+ #include <asm/setup.h>
+ #include <asm/acpi.h>
++#include <asm/numa.h>
+ #include <asm/xen/hypervisor.h>
+ #include <asm/xen/hypercall.h>
+ 
+@@ -431,4 +432,7 @@ void __init xen_arch_setup(void)
+ 	boot_option_idle_override = IDLE_HALT;
+ 	WARN_ON(set_pm_idle_to_default());
+ 	fiddle_vdso();
++#ifdef CONFIG_NUMA
++	numa_off = 1;
++#endif
+ }
+diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
+index c04ad68..321e23e 100644
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -4118,6 +4118,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
+ 
+ 	/* Devices which aren't very happy with higher link speeds */
+ 	{ "WD My Book",			NULL,	ATA_HORKAGE_1_5_GBPS, },
++	{ "Seagate FreeAgent GoFlex",	NULL,	ATA_HORKAGE_1_5_GBPS, },
+ 
+ 	/*
+ 	 * Devices which choke on SETXFER.  Applies only if both the
+diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
+index de0435e..887f68f 100644
+--- a/drivers/block/aoe/aoecmd.c
++++ b/drivers/block/aoe/aoecmd.c
+@@ -35,6 +35,7 @@ new_skb(ulong len)
+ 		skb_reset_mac_header(skb);
+ 		skb_reset_network_header(skb);
+ 		skb->protocol = __constant_htons(ETH_P_AOE);
++		skb_checksum_none_assert(skb);
+ 	}
+ 	return skb;
+ }
+diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
+index 38aa6dd..da33111 100644
+--- a/drivers/block/cciss_scsi.c
++++ b/drivers/block/cciss_scsi.c
+@@ -795,6 +795,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
+ 				}
+ 			break;
+ 			case CMD_PROTOCOL_ERR:
++				cmd->result = DID_ERROR << 16;
+ 				dev_warn(&h->pdev->dev,
+ 					"%p has protocol error\n", c);
+                         break;
+diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
+index c3f0ee1..86848c6 100644
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -445,6 +445,14 @@ static void nbd_clear_que(struct nbd_device *lo)
+ 		req->errors++;
+ 		nbd_end_request(req);
+ 	}
++
++	while (!list_empty(&lo->waiting_queue)) {
++		req = list_entry(lo->waiting_queue.next, struct request,
++				 queuelist);
++		list_del_init(&req->queuelist);
++		req->errors++;
++		nbd_end_request(req);
++	}
+ }
+ 
+ 
+@@ -594,6 +602,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
+ 		lo->file = NULL;
+ 		nbd_clear_que(lo);
+ 		BUG_ON(!list_empty(&lo->queue_head));
++		BUG_ON(!list_empty(&lo->waiting_queue));
+ 		if (file)
+ 			fput(file);
+ 		return 0;
+diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
+index f1bd44f..5c6709d 100644
+--- a/drivers/bluetooth/ath3k.c
++++ b/drivers/bluetooth/ath3k.c
+@@ -62,6 +62,7 @@ static struct usb_device_id ath3k_table[] = {
+ 
+ 	/* Atheros AR3011 with sflash firmware*/
+ 	{ USB_DEVICE(0x0CF3, 0x3002) },
++	{ USB_DEVICE(0x0CF3, 0xE019) },
+ 	{ USB_DEVICE(0x13d3, 0x3304) },
+ 	{ USB_DEVICE(0x0930, 0x0215) },
+ 	{ USB_DEVICE(0x0489, 0xE03D) },
+@@ -76,12 +77,15 @@ static struct usb_device_id ath3k_table[] = {
+ 	{ USB_DEVICE(0x04CA, 0x3005) },
+ 	{ USB_DEVICE(0x13d3, 0x3362) },
+ 	{ USB_DEVICE(0x0CF3, 0xE004) },
++	{ USB_DEVICE(0x0930, 0x0219) },
++	{ USB_DEVICE(0x0489, 0xe057) },
+ 
+ 	/* Atheros AR5BBU12 with sflash firmware */
+ 	{ USB_DEVICE(0x0489, 0xE02C) },
+ 
+ 	/* Atheros AR5BBU22 with sflash firmware */
+ 	{ USB_DEVICE(0x0489, 0xE03C) },
++	{ USB_DEVICE(0x0489, 0xE036) },
+ 
+ 	{ }	/* Terminating entry */
+ };
+@@ -100,9 +104,12 @@ static struct usb_device_id ath3k_blist_tbl[] = {
+ 	{ USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 },
+ 	{ USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 },
+ 	{ USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 },
++	{ USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
++	{ USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 },
+ 
+ 	/* Atheros AR5BBU22 with sflash firmware */
+ 	{ USB_DEVICE(0x0489, 0xE03C), .driver_info = BTUSB_ATH3012 },
++	{ USB_DEVICE(0x0489, 0xE036), .driver_info = BTUSB_ATH3012 },
+ 
+ 	{ }	/* Terminating entry */
+ };
+diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
+index fc4bcd6..6f95d98 100644
+--- a/drivers/bluetooth/btusb.c
++++ b/drivers/bluetooth/btusb.c
+@@ -60,6 +60,9 @@ static struct usb_device_id btusb_table[] = {
+ 	/* Generic Bluetooth USB device */
+ 	{ USB_DEVICE_INFO(0xe0, 0x01, 0x01) },
+ 
++	/* Apple-specific (Broadcom) devices */
++	{ USB_VENDOR_AND_INTERFACE_INFO(0x05ac, 0xff, 0x01, 0x01) },
++
+ 	/* Broadcom SoftSailing reporting vendor specific */
+ 	{ USB_DEVICE(0x0a5c, 0x21e1) },
+ 
+@@ -102,15 +105,14 @@ static struct usb_device_id btusb_table[] = {
+ 
+ 	/* Broadcom BCM20702A0 */
+ 	{ USB_DEVICE(0x0489, 0xe042) },
+-	{ USB_DEVICE(0x0a5c, 0x21e3) },
+-	{ USB_DEVICE(0x0a5c, 0x21e6) },
+-	{ USB_DEVICE(0x0a5c, 0x21e8) },
+-	{ USB_DEVICE(0x0a5c, 0x21f3) },
+ 	{ USB_DEVICE(0x413c, 0x8197) },
+ 
+ 	/* Foxconn - Hon Hai */
+ 	{ USB_DEVICE(0x0489, 0xe033) },
+ 
++	/*Broadcom devices with vendor specific id */
++	{ USB_VENDOR_AND_INTERFACE_INFO(0x0a5c, 0xff, 0x01, 0x01) },
++
+ 	{ }	/* Terminating entry */
+ };
+ 
+@@ -125,6 +127,7 @@ static struct usb_device_id blacklist_table[] = {
+ 
+ 	/* Atheros 3011 with sflash firmware */
+ 	{ USB_DEVICE(0x0cf3, 0x3002), .driver_info = BTUSB_IGNORE },
++	{ USB_DEVICE(0x0cf3, 0xe019), .driver_info = BTUSB_IGNORE },
+ 	{ USB_DEVICE(0x13d3, 0x3304), .driver_info = BTUSB_IGNORE },
+ 	{ USB_DEVICE(0x0930, 0x0215), .driver_info = BTUSB_IGNORE },
+ 	{ USB_DEVICE(0x0489, 0xe03d), .driver_info = BTUSB_IGNORE },
+@@ -139,12 +142,15 @@ static struct usb_device_id blacklist_table[] = {
+ 	{ USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 },
+ 	{ USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 },
+ 	{ USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 },
++	{ USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
++	{ USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 },
+ 
+ 	/* Atheros AR5BBU12 with sflash firmware */
+ 	{ USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE },
+ 
+ 	/* Atheros AR5BBU12 with sflash firmware */
+ 	{ USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 },
++	{ USB_DEVICE(0x0489, 0xe036), .driver_info = BTUSB_ATH3012 },
+ 
+ 	/* Broadcom BCM2035 */
+ 	{ USB_DEVICE(0x0a5c, 0x2035), .driver_info = BTUSB_WRONG_SCO_MTU },
+diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
+index ad683ec..b7fe343 100644
+--- a/drivers/cpufreq/powernow-k8.c
++++ b/drivers/cpufreq/powernow-k8.c
+@@ -32,7 +32,6 @@
+ #include <linux/slab.h>
+ #include <linux/string.h>
+ #include <linux/cpumask.h>
+-#include <linux/sched.h>	/* for current / set_cpus_allowed() */
+ #include <linux/io.h>
+ #include <linux/delay.h>
+ 
+@@ -1132,16 +1131,23 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
+ 	return res;
+ }
+ 
+-/* Driver entry point to switch to the target frequency */
+-static int powernowk8_target(struct cpufreq_policy *pol,
+-		unsigned targfreq, unsigned relation)
++struct powernowk8_target_arg {
++	struct cpufreq_policy		*pol;
++	unsigned			targfreq;
++	unsigned			relation;
++};
++
++static long powernowk8_target_fn(void *arg)
+ {
+-	cpumask_var_t oldmask;
++	struct powernowk8_target_arg *pta = arg;
++	struct cpufreq_policy *pol = pta->pol;
++	unsigned targfreq = pta->targfreq;
++	unsigned relation = pta->relation;
+ 	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
+ 	u32 checkfid;
+ 	u32 checkvid;
+ 	unsigned int newstate;
+-	int ret = -EIO;
++	int ret;
+ 
+ 	if (!data)
+ 		return -EINVAL;
+@@ -1149,29 +1155,16 @@ static int powernowk8_target(struct cpufreq_policy *pol,
+ 	checkfid = data->currfid;
+ 	checkvid = data->currvid;
+ 
+-	/* only run on specific CPU from here on. */
+-	/* This is poor form: use a workqueue or smp_call_function_single */
+-	if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
+-		return -ENOMEM;
+-
+-	cpumask_copy(oldmask, tsk_cpus_allowed(current));
+-	set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
+-
+-	if (smp_processor_id() != pol->cpu) {
+-		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
+-		goto err_out;
+-	}
+-
+ 	if (pending_bit_stuck()) {
+ 		printk(KERN_ERR PFX "failing targ, change pending bit set\n");
+-		goto err_out;
++		return -EIO;
+ 	}
+ 
+ 	pr_debug("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
+ 		pol->cpu, targfreq, pol->min, pol->max, relation);
+ 
+ 	if (query_current_values_with_pending_wait(data))
+-		goto err_out;
++		return -EIO;
+ 
+ 	if (cpu_family != CPU_HW_PSTATE) {
+ 		pr_debug("targ: curr fid 0x%x, vid 0x%x\n",
+@@ -1189,7 +1182,7 @@ static int powernowk8_target(struct cpufreq_policy *pol,
+ 
+ 	if (cpufreq_frequency_table_target(pol, data->powernow_table,
+ 				targfreq, relation, &newstate))
+-		goto err_out;
++		return -EIO;
+ 
+ 	mutex_lock(&fidvid_mutex);
+ 
+@@ -1202,9 +1195,8 @@ static int powernowk8_target(struct cpufreq_policy *pol,
+ 		ret = transition_frequency_fidvid(data, newstate);
+ 	if (ret) {
+ 		printk(KERN_ERR PFX "transition frequency failed\n");
+-		ret = 1;
+ 		mutex_unlock(&fidvid_mutex);
+-		goto err_out;
++		return 1;
+ 	}
+ 	mutex_unlock(&fidvid_mutex);
+ 
+@@ -1213,12 +1205,25 @@ static int powernowk8_target(struct cpufreq_policy *pol,
+ 				data->powernow_table[newstate].index);
+ 	else
+ 		pol->cur = find_khz_freq_from_fid(data->currfid);
+-	ret = 0;
+ 
+-err_out:
+-	set_cpus_allowed_ptr(current, oldmask);
+-	free_cpumask_var(oldmask);
+-	return ret;
++	return 0;
++}
++
++/* Driver entry point to switch to the target frequency */
++static int powernowk8_target(struct cpufreq_policy *pol,
++		unsigned targfreq, unsigned relation)
++{
++	struct powernowk8_target_arg pta = { .pol = pol, .targfreq = targfreq,
++					     .relation = relation };
++
++	/*
++	 * Must run on @pol->cpu.  cpufreq core is responsible for ensuring
++	 * that we're bound to the current CPU and pol->cpu stays online.
++	 */
++	if (smp_processor_id() == pol->cpu)
++		return powernowk8_target_fn(&pta);
++	else
++		return work_on_cpu(pol->cpu, powernowk8_target_fn, &pta);
+ }
+ 
+ /* Driver entry point to verify the policy and range of frequencies */
+diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
+index 79dcf6e..c60d9c1 100644
+--- a/drivers/dma/at_hdmac.c
++++ b/drivers/dma/at_hdmac.c
+@@ -678,7 +678,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ 			flags);
+ 
+ 	if (unlikely(!atslave || !sg_len)) {
+-		dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
++		dev_dbg(chan2dev(chan), "prep_slave_sg: sg length is zero!\n");
+ 		return NULL;
+ 	}
+ 
+@@ -706,6 +706,11 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ 
+ 			mem = sg_dma_address(sg);
+ 			len = sg_dma_len(sg);
++			if (unlikely(!len)) {
++				dev_dbg(chan2dev(chan),
++					"prep_slave_sg: sg(%d) data length is zero\n", i);
++				goto err;
++			}
+ 			mem_width = 2;
+ 			if (unlikely(mem & 3 || len & 3))
+ 				mem_width = 0;
+@@ -740,6 +745,11 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ 
+ 			mem = sg_dma_address(sg);
+ 			len = sg_dma_len(sg);
++			if (unlikely(!len)) {
++				dev_dbg(chan2dev(chan),
++					"prep_slave_sg: sg(%d) data length is zero\n", i);
++				goto err;
++			}
+ 			mem_width = 2;
+ 			if (unlikely(mem & 3 || len & 3))
+ 				mem_width = 0;
+@@ -773,6 +783,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ 
+ err_desc_get:
+ 	dev_err(chan2dev(chan), "not enough descriptors available\n");
++err:
+ 	atc_desc_put(atchan, first);
+ 	return NULL;
+ }
+diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
+index 57104147..e8eedb7 100644
+--- a/drivers/dma/pl330.c
++++ b/drivers/dma/pl330.c
+@@ -858,6 +858,11 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
+ 	/* Initialize channel parameters */
+ 	num_chan = max(pdat ? pdat->nr_valid_peri : 0, (u8)pi->pcfg.num_chan);
+ 	pdmac->peripherals = kzalloc(num_chan * sizeof(*pch), GFP_KERNEL);
++	if (!pdmac->peripherals) {
++		ret = -ENOMEM;
++		dev_err(&adev->dev, "unable to allocate pdmac->peripherals\n");
++		goto probe_err4;
++	}
+ 
+ 	for (i = 0; i < num_chan; i++) {
+ 		pch = &pdmac->peripherals[i];
+diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
+index 0db57b5..da71881 100644
+--- a/drivers/edac/sb_edac.c
++++ b/drivers/edac/sb_edac.c
+@@ -554,7 +554,8 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
+ {
+ 	struct sbridge_pvt *pvt = mci->pvt_info;
+ 	struct csrow_info *csr;
+-	int i, j, banks, ranks, rows, cols, size, npages;
++	unsigned i, j, banks, ranks, rows, cols, npages;
++	u64 size;
+ 	int csrow = 0;
+ 	unsigned long last_page = 0;
+ 	u32 reg;
+@@ -626,10 +627,10 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
+ 				cols = numcol(mtr);
+ 
+ 				/* DDR3 has 8 I/O banks */
+-				size = (rows * cols * banks * ranks) >> (20 - 3);
++				size = ((u64)rows * cols * banks * ranks) >> (20 - 3);
+ 				npages = MiB_TO_PAGES(size);
+ 
+-				debugf0("mc#%d: channel %d, dimm %d, %d Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
++				debugf0("mc#%d: channel %d, dimm %d, %Ld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
+ 					pvt->sbridge_dev->mc, i, j,
+ 					size, npages,
+ 					banks, ranks, rows, cols);
+diff --git a/drivers/gpio/gpio-lpc32xx.c b/drivers/gpio/gpio-lpc32xx.c
+index 5b69480..2c40776 100644
+--- a/drivers/gpio/gpio-lpc32xx.c
++++ b/drivers/gpio/gpio-lpc32xx.c
+@@ -295,6 +295,7 @@ static int lpc32xx_gpio_dir_output_p012(struct gpio_chip *chip, unsigned pin,
+ {
+ 	struct lpc32xx_gpio_chip *group = to_lpc32xx_gpio(chip);
+ 
++	__set_gpio_level_p012(group, pin, value);
+ 	__set_gpio_dir_p012(group, pin, 0);
+ 
+ 	return 0;
+@@ -305,6 +306,7 @@ static int lpc32xx_gpio_dir_output_p3(struct gpio_chip *chip, unsigned pin,
+ {
+ 	struct lpc32xx_gpio_chip *group = to_lpc32xx_gpio(chip);
+ 
++	__set_gpio_level_p3(group, pin, value);
+ 	__set_gpio_dir_p3(group, pin, 0);
+ 
+ 	return 0;
+@@ -313,6 +315,9 @@ static int lpc32xx_gpio_dir_output_p3(struct gpio_chip *chip, unsigned pin,
+ static int lpc32xx_gpio_dir_out_always(struct gpio_chip *chip, unsigned pin,
+ 	int value)
+ {
++	struct lpc32xx_gpio_chip *group = to_lpc32xx_gpio(chip);
++
++	__set_gpo_level_p3(group, pin, value);
+ 	return 0;
+ }
+ 
+diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
+index 548a400..e48e01e 100644
+--- a/drivers/gpu/drm/i915/i915_gem.c
++++ b/drivers/gpu/drm/i915/i915_gem.c
+@@ -3357,7 +3357,8 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
+ 	struct drm_i915_private *dev_priv = dev->dev_private;
+ 	int ret;
+ 
+-	BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
++	if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
++		return -EBUSY;
+ 	WARN_ON(i915_verify_lists(dev));
+ 
+ 	if (obj->gtt_space != NULL) {
+diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
+index 9cd81ba..c2a64f4 100644
+--- a/drivers/gpu/drm/i915/intel_hdmi.c
++++ b/drivers/gpu/drm/i915/intel_hdmi.c
+@@ -271,7 +271,7 @@ static void intel_hdmi_dpms(struct drm_encoder *encoder, int mode)
+ 	u32 temp;
+ 	u32 enable_bits = SDVO_ENABLE;
+ 
+-	if (intel_hdmi->has_audio)
++	if (intel_hdmi->has_audio || mode != DRM_MODE_DPMS_ON)
+ 		enable_bits |= SDVO_AUDIO_ENABLE;
+ 
+ 	temp = I915_READ(intel_hdmi->sdvox_reg);
+diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
+index ceffd20..a4011b0 100644
+--- a/drivers/gpu/drm/radeon/atombios_crtc.c
++++ b/drivers/gpu/drm/radeon/atombios_crtc.c
+@@ -1446,98 +1446,14 @@ static void radeon_legacy_atom_fixup(struct drm_crtc *crtc)
+ 	}
+ }
+ 
+-/**
+- * radeon_get_pll_use_mask - look up a mask of which pplls are in use
+- *
+- * @crtc: drm crtc
+- *
+- * Returns the mask of which PPLLs (Pixel PLLs) are in use.
+- */
+-static u32 radeon_get_pll_use_mask(struct drm_crtc *crtc)
+-{
+-	struct drm_device *dev = crtc->dev;
+-	struct drm_crtc *test_crtc;
+-	struct radeon_crtc *radeon_test_crtc;
+-	u32 pll_in_use = 0;
+-
+-	list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) {
+-		if (crtc == test_crtc)
+-			continue;
+-
+-		radeon_test_crtc = to_radeon_crtc(test_crtc);
+-		if (radeon_test_crtc->pll_id != ATOM_PPLL_INVALID)
+-			pll_in_use |= (1 << radeon_test_crtc->pll_id);
+-	}
+-	return pll_in_use;
+-}
+-
+-/**
+- * radeon_get_shared_dp_ppll - return the PPLL used by another crtc for DP
+- *
+- * @crtc: drm crtc
+- *
+- * Returns the PPLL (Pixel PLL) used by another crtc/encoder which is
+- * also in DP mode.  For DP, a single PPLL can be used for all DP
+- * crtcs/encoders.
+- */
+-static int radeon_get_shared_dp_ppll(struct drm_crtc *crtc)
+-{
+-	struct drm_device *dev = crtc->dev;
+-	struct drm_encoder *test_encoder;
+-	struct radeon_crtc *radeon_test_crtc;
+-
+-	list_for_each_entry(test_encoder, &dev->mode_config.encoder_list, head) {
+-		if (test_encoder->crtc && (test_encoder->crtc != crtc)) {
+-			if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_encoder))) {
+-				/* for DP use the same PLL for all */
+-				radeon_test_crtc = to_radeon_crtc(test_encoder->crtc);
+-				if (radeon_test_crtc->pll_id != ATOM_PPLL_INVALID)
+-					return radeon_test_crtc->pll_id;
+-			}
+-		}
+-	}
+-	return ATOM_PPLL_INVALID;
+-}
+-
+-/**
+- * radeon_atom_pick_pll - Allocate a PPLL for use by the crtc.
+- *
+- * @crtc: drm crtc
+- *
+- * Returns the PPLL (Pixel PLL) to be used by the crtc.  For DP monitors
+- * a single PPLL can be used for all DP crtcs/encoders.  For non-DP
+- * monitors a dedicated PPLL must be used.  If a particular board has
+- * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming
+- * as there is no need to program the PLL itself.  If we are not able to
+- * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to
+- * avoid messing up an existing monitor.
+- *
+- * Asic specific PLL information
+- *
+- * DCE 6.1
+- * - PPLL2 is only available to UNIPHYA (both DP and non-DP)
+- * - PPLL0, PPLL1 are available for UNIPHYB/C/D/E/F (both DP and non-DP)
+- *
+- * DCE 6.0
+- * - PPLL0 is available to all UNIPHY (DP only)
+- * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
+- *
+- * DCE 5.0
+- * - DCPLL is available to all UNIPHY (DP only)
+- * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
+- *
+- * DCE 3.0/4.0/4.1
+- * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
+- *
+- */
+ static int radeon_atom_pick_pll(struct drm_crtc *crtc)
+ {
+ 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+ 	struct drm_device *dev = crtc->dev;
+ 	struct radeon_device *rdev = dev->dev_private;
+ 	struct drm_encoder *test_encoder;
+-	u32 pll_in_use;
+-	int pll;
++	struct drm_crtc *test_crtc;
++	uint32_t pll_in_use = 0;
+ 
+ 	if (ASIC_IS_DCE4(rdev)) {
+ 		list_for_each_entry(test_encoder, &dev->mode_config.encoder_list, head) {
+@@ -1545,7 +1461,7 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc)
+ 				/* in DP mode, the DP ref clock can come from PPLL, DCPLL, or ext clock,
+ 				 * depending on the asic:
+ 				 * DCE4: PPLL or ext clock
+-				 * DCE5: PPLL, DCPLL, or ext clock
++				 * DCE5: DCPLL or ext clock
+ 				 *
+ 				 * Setting ATOM_PPLL_INVALID will cause SetPixelClock to skip
+ 				 * PPLL/DCPLL programming and only program the DP DTO for the
+@@ -1553,31 +1469,29 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc)
+ 				 */
+ 				if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_encoder))) {
+ 					if (rdev->clock.dp_extclk)
+-						/* skip PPLL programming if using ext clock */
+ 						return ATOM_PPLL_INVALID;
+ 					else if (ASIC_IS_DCE5(rdev))
+-						/* use DCPLL for all DP */
+ 						return ATOM_DCPLL;
+-					else {
+-						/* use the same PPLL for all DP monitors */
+-						pll = radeon_get_shared_dp_ppll(crtc);
+-						if (pll != ATOM_PPLL_INVALID)
+-							return pll;
+-					}
+ 				}
+-				break;
+ 			}
+ 		}
+-		/* all other cases */
+-		pll_in_use = radeon_get_pll_use_mask(crtc);
+-		if (!(pll_in_use & (1 << ATOM_PPLL2)))
+-			return ATOM_PPLL2;
+-		if (!(pll_in_use & (1 << ATOM_PPLL1)))
++
++		/* otherwise, pick one of the plls */
++		list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) {
++			struct radeon_crtc *radeon_test_crtc;
++
++			if (crtc == test_crtc)
++				continue;
++
++			radeon_test_crtc = to_radeon_crtc(test_crtc);
++			if ((radeon_test_crtc->pll_id >= ATOM_PPLL1) &&
++			    (radeon_test_crtc->pll_id <= ATOM_PPLL2))
++				pll_in_use |= (1 << radeon_test_crtc->pll_id);
++		}
++		if (!(pll_in_use & 1))
+ 			return ATOM_PPLL1;
+-		DRM_ERROR("unable to allocate a PPLL\n");
+-		return ATOM_PPLL_INVALID;
++		return ATOM_PPLL2;
+ 	} else
+-		/* use PPLL1 or PPLL2 */
+ 		return radeon_crtc->crtc_id;
+ 
+ }
+@@ -1696,7 +1610,7 @@ static void atombios_crtc_disable(struct drm_crtc *crtc)
+ 		break;
+ 	}
+ done:
+-	radeon_crtc->pll_id = ATOM_PPLL_INVALID;
++	radeon_crtc->pll_id = -1;
+ }
+ 
+ static const struct drm_crtc_helper_funcs atombios_helper_funcs = {
+@@ -1745,6 +1659,6 @@ void radeon_atombios_init_crtc(struct drm_device *dev,
+ 		else
+ 			radeon_crtc->crtc_offset = 0;
+ 	}
+-	radeon_crtc->pll_id = ATOM_PPLL_INVALID;
++	radeon_crtc->pll_id = -1;
+ 	drm_crtc_helper_add(&radeon_crtc->base, &atombios_helper_funcs);
+ }
+diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
+index 0c8bea9..a21e763 100644
+--- a/drivers/hid/hid-core.c
++++ b/drivers/hid/hid-core.c
+@@ -1026,7 +1026,7 @@ static struct hid_report *hid_get_report(struct hid_report_enum *report_enum,
+ 	return report;
+ }
+ 
+-void hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
++int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
+ 		int interrupt)
+ {
+ 	struct hid_report_enum *report_enum = hid->report_enum + type;
+@@ -1034,10 +1034,11 @@ void hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
+ 	unsigned int a;
+ 	int rsize, csize = size;
+ 	u8 *cdata = data;
++	int ret = 0;
+ 
+ 	report = hid_get_report(report_enum, data);
+ 	if (!report)
+-		return;
++		goto out;
+ 
+ 	if (report_enum->numbered) {
+ 		cdata++;
+@@ -1057,14 +1058,19 @@ void hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
+ 
+ 	if ((hid->claimed & HID_CLAIMED_HIDDEV) && hid->hiddev_report_event)
+ 		hid->hiddev_report_event(hid, report);
+-	if (hid->claimed & HID_CLAIMED_HIDRAW)
+-		hidraw_report_event(hid, data, size);
++	if (hid->claimed & HID_CLAIMED_HIDRAW) {
++		ret = hidraw_report_event(hid, data, size);
++		if (ret)
++			goto out;
++	}
+ 
+ 	for (a = 0; a < report->maxfield; a++)
+ 		hid_input_field(hid, report->field[a], cdata, interrupt);
+ 
+ 	if (hid->claimed & HID_CLAIMED_INPUT)
+ 		hidinput_report_event(hid, report);
++out:
++	return ret;
+ }
+ EXPORT_SYMBOL_GPL(hid_report_raw_event);
+ 
+@@ -1141,7 +1147,7 @@ nomem:
+ 		}
+ 	}
+ 
+-	hid_report_raw_event(hid, type, data, size, interrupt);
++	ret = hid_report_raw_event(hid, type, data, size, interrupt);
+ 
+ unlock:
+ 	up(&hid->driver_lock);
+diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
+index 2eac8c5..8821ecc 100644
+--- a/drivers/hid/hid-logitech-dj.c
++++ b/drivers/hid/hid-logitech-dj.c
+@@ -185,6 +185,7 @@ static struct hid_ll_driver logi_dj_ll_driver;
+ static int logi_dj_output_hidraw_report(struct hid_device *hid, u8 * buf,
+ 					size_t count,
+ 					unsigned char report_type);
++static int logi_dj_recv_query_paired_devices(struct dj_receiver_dev *djrcv_dev);
+ 
+ static void logi_dj_recv_destroy_djhid_device(struct dj_receiver_dev *djrcv_dev,
+ 						struct dj_report *dj_report)
+@@ -225,6 +226,7 @@ static void logi_dj_recv_add_djhid_device(struct dj_receiver_dev *djrcv_dev,
+ 	if (dj_report->report_params[DEVICE_PAIRED_PARAM_SPFUNCTION] &
+ 	    SPFUNCTION_DEVICE_LIST_EMPTY) {
+ 		dbg_hid("%s: device list is empty\n", __func__);
++		djrcv_dev->querying_devices = false;
+ 		return;
+ 	}
+ 
+@@ -235,6 +237,12 @@ static void logi_dj_recv_add_djhid_device(struct dj_receiver_dev *djrcv_dev,
+ 		return;
+ 	}
+ 
++	if (djrcv_dev->paired_dj_devices[dj_report->device_index]) {
++		/* The device is already known. No need to reallocate it. */
++		dbg_hid("%s: device is already known\n", __func__);
++		return;
++	}
++
+ 	dj_hiddev = hid_allocate_device();
+ 	if (IS_ERR(dj_hiddev)) {
+ 		dev_err(&djrcv_hdev->dev, "%s: hid_allocate_device failed\n",
+@@ -298,6 +306,7 @@ static void delayedwork_callback(struct work_struct *work)
+ 	struct dj_report dj_report;
+ 	unsigned long flags;
+ 	int count;
++	int retval;
+ 
+ 	dbg_hid("%s\n", __func__);
+ 
+@@ -330,6 +339,25 @@ static void delayedwork_callback(struct work_struct *work)
+ 		logi_dj_recv_destroy_djhid_device(djrcv_dev, &dj_report);
+ 		break;
+ 	default:
++	/* A normal report (i. e. not belonging to a pair/unpair notification)
++	 * arriving here, means that the report arrived but we did not have a
++	 * paired dj_device associated to the report's device_index, this
++	 * means that the original "device paired" notification corresponding
++	 * to this dj_device never arrived to this driver. The reason is that
++	 * hid-core discards all packets coming from a device while probe() is
++	 * executing. */
++	if (!djrcv_dev->paired_dj_devices[dj_report.device_index]) {
++		/* ok, we don't know the device, just re-ask the
++		 * receiver for the list of connected devices. */
++		retval = logi_dj_recv_query_paired_devices(djrcv_dev);
++		if (!retval) {
++			/* everything went fine, so just leave */
++			break;
++		}
++		dev_err(&djrcv_dev->hdev->dev,
++			"%s:logi_dj_recv_query_paired_devices "
++			"error:%d\n", __func__, retval);
++		}
+ 		dbg_hid("%s: unexpected report type\n", __func__);
+ 	}
+ }
+@@ -360,6 +388,12 @@ static void logi_dj_recv_forward_null_report(struct dj_receiver_dev *djrcv_dev,
+ 	if (!djdev) {
+ 		dbg_hid("djrcv_dev->paired_dj_devices[dj_report->device_index]"
+ 			" is NULL, index %d\n", dj_report->device_index);
++		kfifo_in(&djrcv_dev->notif_fifo, dj_report, sizeof(struct dj_report));
++
++		if (schedule_work(&djrcv_dev->work) == 0) {
++			dbg_hid("%s: did not schedule the work item, was already "
++			"queued\n", __func__);
++		}
+ 		return;
+ 	}
+ 
+@@ -390,6 +424,12 @@ static void logi_dj_recv_forward_report(struct dj_receiver_dev *djrcv_dev,
+ 	if (dj_device == NULL) {
+ 		dbg_hid("djrcv_dev->paired_dj_devices[dj_report->device_index]"
+ 			" is NULL, index %d\n", dj_report->device_index);
++		kfifo_in(&djrcv_dev->notif_fifo, dj_report, sizeof(struct dj_report));
++
++		if (schedule_work(&djrcv_dev->work) == 0) {
++			dbg_hid("%s: did not schedule the work item, was already "
++			"queued\n", __func__);
++		}
+ 		return;
+ 	}
+ 
+@@ -428,27 +468,42 @@ static int logi_dj_recv_send_report(struct dj_receiver_dev *djrcv_dev,
+ 
+ static int logi_dj_recv_query_paired_devices(struct dj_receiver_dev *djrcv_dev)
+ {
+-	struct dj_report dj_report;
++	struct dj_report *dj_report;
++	int retval;
++
++	/* no need to protect djrcv_dev->querying_devices */
++	if (djrcv_dev->querying_devices)
++		return 0;
+ 
+-	memset(&dj_report, 0, sizeof(dj_report));
+-	dj_report.report_id = REPORT_ID_DJ_SHORT;
+-	dj_report.device_index = 0xFF;
+-	dj_report.report_type = REPORT_TYPE_CMD_GET_PAIRED_DEVICES;
+-	return logi_dj_recv_send_report(djrcv_dev, &dj_report);
++	dj_report = kzalloc(sizeof(struct dj_report), GFP_KERNEL);
++	if (!dj_report)
++		return -ENOMEM;
++	dj_report->report_id = REPORT_ID_DJ_SHORT;
++	dj_report->device_index = 0xFF;
++	dj_report->report_type = REPORT_TYPE_CMD_GET_PAIRED_DEVICES;
++	retval = logi_dj_recv_send_report(djrcv_dev, dj_report);
++	kfree(dj_report);
++	return retval;
+ }
+ 
++
+ static int logi_dj_recv_switch_to_dj_mode(struct dj_receiver_dev *djrcv_dev,
+ 					  unsigned timeout)
+ {
+-	struct dj_report dj_report;
++	struct dj_report *dj_report;
++	int retval;
+ 
+-	memset(&dj_report, 0, sizeof(dj_report));
+-	dj_report.report_id = REPORT_ID_DJ_SHORT;
+-	dj_report.device_index = 0xFF;
+-	dj_report.report_type = REPORT_TYPE_CMD_SWITCH;
+-	dj_report.report_params[CMD_SWITCH_PARAM_DEVBITFIELD] = 0x1F;
+-	dj_report.report_params[CMD_SWITCH_PARAM_TIMEOUT_SECONDS] = (u8)timeout;
+-	return logi_dj_recv_send_report(djrcv_dev, &dj_report);
++	dj_report = kzalloc(sizeof(struct dj_report), GFP_KERNEL);
++	if (!dj_report)
++		return -ENOMEM;
++	dj_report->report_id = REPORT_ID_DJ_SHORT;
++	dj_report->device_index = 0xFF;
++	dj_report->report_type = REPORT_TYPE_CMD_SWITCH;
++	dj_report->report_params[CMD_SWITCH_PARAM_DEVBITFIELD] = 0x3F;
++	dj_report->report_params[CMD_SWITCH_PARAM_TIMEOUT_SECONDS] = (u8)timeout;
++	retval = logi_dj_recv_send_report(djrcv_dev, dj_report);
++	kfree(dj_report);
++	return retval;
+ }
+ 
+ 
+diff --git a/drivers/hid/hid-logitech-dj.h b/drivers/hid/hid-logitech-dj.h
+index fd28a5e..4a40003 100644
+--- a/drivers/hid/hid-logitech-dj.h
++++ b/drivers/hid/hid-logitech-dj.h
+@@ -101,6 +101,7 @@ struct dj_receiver_dev {
+ 	struct work_struct work;
+ 	struct kfifo notif_fifo;
+ 	spinlock_t lock;
++	bool querying_devices;
+ };
+ 
+ struct dj_device {
+diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
+index cf7d6d5..17d15bb 100644
+--- a/drivers/hid/hidraw.c
++++ b/drivers/hid/hidraw.c
+@@ -42,6 +42,7 @@ static struct cdev hidraw_cdev;
+ static struct class *hidraw_class;
+ static struct hidraw *hidraw_table[HIDRAW_MAX_DEVICES];
+ static DEFINE_MUTEX(minors_lock);
++static void drop_ref(struct hidraw *hid, int exists_bit);
+ 
+ static ssize_t hidraw_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
+ {
+@@ -87,13 +88,16 @@ static ssize_t hidraw_read(struct file *file, char __user *buffer, size_t count,
+ 		len = list->buffer[list->tail].len > count ?
+ 			count : list->buffer[list->tail].len;
+ 
+-		if (copy_to_user(buffer, list->buffer[list->tail].value, len)) {
+-			ret = -EFAULT;
+-			goto out;
++		if (list->buffer[list->tail].value) {
++			if (copy_to_user(buffer, list->buffer[list->tail].value, len)) {
++				ret = -EFAULT;
++				goto out;
++			}
++			ret = len;
+ 		}
+-		ret = len;
+ 
+ 		kfree(list->buffer[list->tail].value);
++		list->buffer[list->tail].value = NULL;
+ 		list->tail = (list->tail + 1) & (HIDRAW_BUFFER_SIZE - 1);
+ 	}
+ out:
+@@ -110,7 +114,7 @@ static ssize_t hidraw_send_report(struct file *file, const char __user *buffer,
+ 	__u8 *buf;
+ 	int ret = 0;
+ 
+-	if (!hidraw_table[minor]) {
++	if (!hidraw_table[minor] || !hidraw_table[minor]->exist) {
+ 		ret = -ENODEV;
+ 		goto out;
+ 	}
+@@ -258,7 +262,7 @@ static int hidraw_open(struct inode *inode, struct file *file)
+ 	}
+ 
+ 	mutex_lock(&minors_lock);
+-	if (!hidraw_table[minor]) {
++	if (!hidraw_table[minor] || !hidraw_table[minor]->exist) {
+ 		err = -ENODEV;
+ 		goto out_unlock;
+ 	}
+@@ -295,32 +299,12 @@ out:
+ static int hidraw_release(struct inode * inode, struct file * file)
+ {
+ 	unsigned int minor = iminor(inode);
+-	struct hidraw *dev;
+ 	struct hidraw_list *list = file->private_data;
+-	int ret;
+-
+-	mutex_lock(&minors_lock);
+-	if (!hidraw_table[minor]) {
+-		ret = -ENODEV;
+-		goto unlock;
+-	}
+ 
++	drop_ref(hidraw_table[minor], 0);
+ 	list_del(&list->node);
+-	dev = hidraw_table[minor];
+-	if (!--dev->open) {
+-		if (list->hidraw->exist) {
+-			hid_hw_power(dev->hid, PM_HINT_NORMAL);
+-			hid_hw_close(dev->hid);
+-		} else {
+-			kfree(list->hidraw);
+-		}
+-	}
+ 	kfree(list);
+-	ret = 0;
+-unlock:
+-	mutex_unlock(&minors_lock);
+-
+-	return ret;
++	return 0;
+ }
+ 
+ static long hidraw_ioctl(struct file *file, unsigned int cmd,
+@@ -437,19 +421,29 @@ static const struct file_operations hidraw_ops = {
+ 	.llseek =	noop_llseek,
+ };
+ 
+-void hidraw_report_event(struct hid_device *hid, u8 *data, int len)
++int hidraw_report_event(struct hid_device *hid, u8 *data, int len)
+ {
+ 	struct hidraw *dev = hid->hidraw;
+ 	struct hidraw_list *list;
++	int ret = 0;
+ 
+ 	list_for_each_entry(list, &dev->list, node) {
+-		list->buffer[list->head].value = kmemdup(data, len, GFP_ATOMIC);
++		int new_head = (list->head + 1) & (HIDRAW_BUFFER_SIZE - 1);
++
++		if (new_head == list->tail)
++			continue;
++
++		if (!(list->buffer[list->head].value = kmemdup(data, len, GFP_ATOMIC))) {
++			ret = -ENOMEM;
++			break;
++		}
+ 		list->buffer[list->head].len = len;
+-		list->head = (list->head + 1) & (HIDRAW_BUFFER_SIZE - 1);
++		list->head = new_head;
+ 		kill_fasync(&list->fasync, SIGIO, POLL_IN);
+ 	}
+ 
+ 	wake_up_interruptible(&dev->wait);
++	return ret;
+ }
+ EXPORT_SYMBOL_GPL(hidraw_report_event);
+ 
+@@ -512,21 +506,7 @@ EXPORT_SYMBOL_GPL(hidraw_connect);
+ void hidraw_disconnect(struct hid_device *hid)
+ {
+ 	struct hidraw *hidraw = hid->hidraw;
+-
+-	mutex_lock(&minors_lock);
+-	hidraw->exist = 0;
+-
+-	device_destroy(hidraw_class, MKDEV(hidraw_major, hidraw->minor));
+-
+-	hidraw_table[hidraw->minor] = NULL;
+-
+-	if (hidraw->open) {
+-		hid_hw_close(hid);
+-		wake_up_interruptible(&hidraw->wait);
+-	} else {
+-		kfree(hidraw);
+-	}
+-	mutex_unlock(&minors_lock);
++	drop_ref(hidraw, 1);
+ }
+ EXPORT_SYMBOL_GPL(hidraw_disconnect);
+ 
+@@ -542,21 +522,28 @@ int __init hidraw_init(void)
+ 
+ 	if (result < 0) {
+ 		pr_warn("can't get major number\n");
+-		result = 0;
+ 		goto out;
+ 	}
+ 
+ 	hidraw_class = class_create(THIS_MODULE, "hidraw");
+ 	if (IS_ERR(hidraw_class)) {
+ 		result = PTR_ERR(hidraw_class);
+-		unregister_chrdev(hidraw_major, "hidraw");
+-		goto out;
++		goto error_cdev;
+ 	}
+ 
+         cdev_init(&hidraw_cdev, &hidraw_ops);
+-        cdev_add(&hidraw_cdev, dev_id, HIDRAW_MAX_DEVICES);
++	result = cdev_add(&hidraw_cdev, dev_id, HIDRAW_MAX_DEVICES);
++	if (result < 0)
++		goto error_class;
++
+ out:
+ 	return result;
++
++error_class:
++	class_destroy(hidraw_class);
++error_cdev:
++	unregister_chrdev_region(dev_id, HIDRAW_MAX_DEVICES);
++	goto out;
+ }
+ 
+ void hidraw_exit(void)
+@@ -568,3 +555,23 @@ void hidraw_exit(void)
+ 	unregister_chrdev_region(dev_id, HIDRAW_MAX_DEVICES);
+ 
+ }
++
++static void drop_ref(struct hidraw *hidraw, int exists_bit)
++{
++	mutex_lock(&minors_lock);
++	if (exists_bit) {
++		hid_hw_close(hidraw->hid);
++		hidraw->exist = 0;
++		if (hidraw->open)
++			wake_up_interruptible(&hidraw->wait);
++	} else {
++		--hidraw->open;
++	}
++
++	if (!hidraw->open && !hidraw->exist) {
++		device_destroy(hidraw_class, MKDEV(hidraw_major, hidraw->minor));
++		hidraw_table[hidraw->minor] = NULL;
++		kfree(hidraw);
++	}
++	mutex_unlock(&minors_lock);
++}
+diff --git a/drivers/hwmon/ad7314.c b/drivers/hwmon/ad7314.c
+index 5d760f3..08e2947 100644
+--- a/drivers/hwmon/ad7314.c
++++ b/drivers/hwmon/ad7314.c
+@@ -96,10 +96,18 @@ static ssize_t ad7314_show_temperature(struct device *dev,
+ 	}
+ }
+ 
++static ssize_t ad7314_show_name(struct device *dev,
++				struct device_attribute *devattr, char *buf)
++{
++	return sprintf(buf, "%s\n", to_spi_device(dev)->modalias);
++}
++
++static DEVICE_ATTR(name, S_IRUGO, ad7314_show_name, NULL);
+ static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
+ 			  ad7314_show_temperature, NULL, 0);
+ 
+ static struct attribute *ad7314_attributes[] = {
++	&dev_attr_name.attr,
+ 	&sensor_dev_attr_temp1_input.dev_attr.attr,
+ 	NULL,
+ };
+diff --git a/drivers/hwmon/ads7871.c b/drivers/hwmon/ads7871.c
+index 04450f8..685aae6 100644
+--- a/drivers/hwmon/ads7871.c
++++ b/drivers/hwmon/ads7871.c
+@@ -133,6 +133,12 @@ static ssize_t show_voltage(struct device *dev,
+ 	}
+ }
+ 
++static ssize_t ads7871_show_name(struct device *dev,
++				 struct device_attribute *devattr, char *buf)
++{
++	return sprintf(buf, "%s\n", to_spi_device(dev)->modalias);
++}
++
+ static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, show_voltage, NULL, 0);
+ static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, show_voltage, NULL, 1);
+ static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, show_voltage, NULL, 2);
+@@ -142,6 +148,8 @@ static SENSOR_DEVICE_ATTR(in5_input, S_IRUGO, show_voltage, NULL, 5);
+ static SENSOR_DEVICE_ATTR(in6_input, S_IRUGO, show_voltage, NULL, 6);
+ static SENSOR_DEVICE_ATTR(in7_input, S_IRUGO, show_voltage, NULL, 7);
+ 
++static DEVICE_ATTR(name, S_IRUGO, ads7871_show_name, NULL);
++
+ static struct attribute *ads7871_attributes[] = {
+ 	&sensor_dev_attr_in0_input.dev_attr.attr,
+ 	&sensor_dev_attr_in1_input.dev_attr.attr,
+@@ -151,6 +159,7 @@ static struct attribute *ads7871_attributes[] = {
+ 	&sensor_dev_attr_in5_input.dev_attr.attr,
+ 	&sensor_dev_attr_in6_input.dev_attr.attr,
+ 	&sensor_dev_attr_in7_input.dev_attr.attr,
++	&dev_attr_name.attr,
+ 	NULL
+ };
+ 
+diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
+index e8e18ca..ac2d6cb 100644
+--- a/drivers/hwmon/fam15h_power.c
++++ b/drivers/hwmon/fam15h_power.c
+@@ -128,12 +128,12 @@ static bool __devinit fam15h_power_is_internal_node0(struct pci_dev *f4)
+  * counter saturations resulting in bogus power readings.
+  * We correct this value ourselves to cope with older BIOSes.
+  */
+-static DEFINE_PCI_DEVICE_TABLE(affected_device) = {
++static const struct pci_device_id affected_device[] = {
+ 	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
+ 	{ 0 }
+ };
+ 
+-static void __devinit tweak_runavg_range(struct pci_dev *pdev)
++static void tweak_runavg_range(struct pci_dev *pdev)
+ {
+ 	u32 val;
+ 
+@@ -157,6 +157,16 @@ static void __devinit tweak_runavg_range(struct pci_dev *pdev)
+ 		REG_TDP_RUNNING_AVERAGE, val);
+ }
+ 
++#ifdef CONFIG_PM
++static int fam15h_power_resume(struct pci_dev *pdev)
++{
++	tweak_runavg_range(pdev);
++	return 0;
++}
++#else
++#define fam15h_power_resume NULL
++#endif
++
+ static void __devinit fam15h_power_init_data(struct pci_dev *f4,
+ 					     struct fam15h_power_data *data)
+ {
+@@ -255,6 +265,7 @@ static struct pci_driver fam15h_power_driver = {
+ 	.id_table = fam15h_power_id_table,
+ 	.probe = fam15h_power_probe,
+ 	.remove = __devexit_p(fam15h_power_remove),
++	.resume = fam15h_power_resume,
+ };
+ 
+ static int __init fam15h_power_init(void)
+diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
+index d4ec371..cd1a843 100644
+--- a/drivers/input/serio/i8042-x86ia64io.h
++++ b/drivers/input/serio/i8042-x86ia64io.h
+@@ -335,6 +335,12 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = {
+ 	},
+ 	{
+ 		.matches = {
++			DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
++			DMI_MATCH(DMI_PRODUCT_NAME, "SATELLITE C850D"),
++		},
++	},
++	{
++		.matches = {
+ 			DMI_MATCH(DMI_SYS_VENDOR, "ALIENWARE"),
+ 			DMI_MATCH(DMI_PRODUCT_NAME, "Sentia"),
+ 		},
+diff --git a/drivers/iommu/intr_remapping.c b/drivers/iommu/intr_remapping.c
+index 6777ca0..73ca321 100644
+--- a/drivers/iommu/intr_remapping.c
++++ b/drivers/iommu/intr_remapping.c
+@@ -752,6 +752,7 @@ int __init parse_ioapics_under_ir(void)
+ {
+ 	struct dmar_drhd_unit *drhd;
+ 	int ir_supported = 0;
++	int ioapic_idx;
+ 
+ 	for_each_drhd_unit(drhd) {
+ 		struct intel_iommu *iommu = drhd->iommu;
+@@ -764,13 +765,20 @@ int __init parse_ioapics_under_ir(void)
+ 		}
+ 	}
+ 
+-	if (ir_supported && ir_ioapic_num != nr_ioapics) {
+-		printk(KERN_WARNING
+-		       "Not all IO-APIC's listed under remapping hardware\n");
+-		return -1;
++	if (!ir_supported)
++		return 0;
++
++	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
++		int ioapic_id = mpc_ioapic_id(ioapic_idx);
++		if (!map_ioapic_to_ir(ioapic_id)) {
++			pr_err(FW_BUG "ioapic %d has no mapping iommu, "
++			       "interrupt remapping will be disabled\n",
++			       ioapic_id);
++			return -1;
++		}
+ 	}
+ 
+-	return ir_supported;
++	return 1;
+ }
+ 
+ int __init ir_dev_scope_init(void)
+diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
+index 8e91321..52848ab 100644
+--- a/drivers/md/dm-table.c
++++ b/drivers/md/dm-table.c
+@@ -1350,17 +1350,25 @@ static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
+ 	return q && blk_queue_nonrot(q);
+ }
+ 
+-static bool dm_table_is_nonrot(struct dm_table *t)
++static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
++			     sector_t start, sector_t len, void *data)
++{
++	struct request_queue *q = bdev_get_queue(dev->bdev);
++
++	return q && !blk_queue_add_random(q);
++}
++
++static bool dm_table_all_devices_attribute(struct dm_table *t,
++					   iterate_devices_callout_fn func)
+ {
+ 	struct dm_target *ti;
+ 	unsigned i = 0;
+ 
+-	/* Ensure that all underlying device are non-rotational. */
+ 	while (i < dm_table_get_num_targets(t)) {
+ 		ti = dm_table_get_target(t, i++);
+ 
+ 		if (!ti->type->iterate_devices ||
+-		    !ti->type->iterate_devices(ti, device_is_nonrot, NULL))
++		    !ti->type->iterate_devices(ti, func, NULL))
+ 			return 0;
+ 	}
+ 
+@@ -1392,7 +1400,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
+ 	if (!dm_table_discard_zeroes_data(t))
+ 		q->limits.discard_zeroes_data = 0;
+ 
+-	if (dm_table_is_nonrot(t))
++	/* Ensure that all underlying devices are non-rotational. */
++	if (dm_table_all_devices_attribute(t, device_is_nonrot))
+ 		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+ 	else
+ 		queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
+@@ -1400,6 +1409,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
+ 	dm_table_set_integrity(t);
+ 
+ 	/*
++	 * Determine whether or not this queue's I/O timings contribute
++	 * to the entropy pool, Only request-based targets use this.
++	 * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
++	 * have it set.
++	 */
++	if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
++		queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
++
++	/*
+ 	 * QUEUE_FLAG_STACKABLE must be set after all queue settings are
+ 	 * visible to other CPUs because, once the flag is set, incoming bios
+ 	 * are processed by request-based dm, which refers to the queue
+diff --git a/drivers/md/dm.c b/drivers/md/dm.c
+index 4720f68..502dcf7 100644
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -866,10 +866,14 @@ static void dm_done(struct request *clone, int error, bool mapped)
+ {
+ 	int r = error;
+ 	struct dm_rq_target_io *tio = clone->end_io_data;
+-	dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
++	dm_request_endio_fn rq_end_io = NULL;
+ 
+-	if (mapped && rq_end_io)
+-		r = rq_end_io(tio->ti, clone, error, &tio->info);
++	if (tio->ti) {
++		rq_end_io = tio->ti->type->rq_end_io;
++
++		if (mapped && rq_end_io)
++			r = rq_end_io(tio->ti, clone, error, &tio->info);
++	}
+ 
+ 	if (r <= 0)
+ 		/* The target wants to complete the I/O */
+@@ -1566,15 +1570,6 @@ static int map_request(struct dm_target *ti, struct request *clone,
+ 	int r, requeued = 0;
+ 	struct dm_rq_target_io *tio = clone->end_io_data;
+ 
+-	/*
+-	 * Hold the md reference here for the in-flight I/O.
+-	 * We can't rely on the reference count by device opener,
+-	 * because the device may be closed during the request completion
+-	 * when all bios are completed.
+-	 * See the comment in rq_completed() too.
+-	 */
+-	dm_get(md);
+-
+ 	tio->ti = ti;
+ 	r = ti->type->map_rq(ti, clone, &tio->info);
+ 	switch (r) {
+@@ -1606,6 +1601,26 @@ static int map_request(struct dm_target *ti, struct request *clone,
+ 	return requeued;
+ }
+ 
++static struct request *dm_start_request(struct mapped_device *md, struct request *orig)
++{
++	struct request *clone;
++
++	blk_start_request(orig);
++	clone = orig->special;
++	atomic_inc(&md->pending[rq_data_dir(clone)]);
++
++	/*
++	 * Hold the md reference here for the in-flight I/O.
++	 * We can't rely on the reference count by device opener,
++	 * because the device may be closed during the request completion
++	 * when all bios are completed.
++	 * See the comment in rq_completed() too.
++	 */
++	dm_get(md);
++
++	return clone;
++}
++
+ /*
+  * q->request_fn for request-based dm.
+  * Called with the queue lock held.
+@@ -1635,14 +1650,21 @@ static void dm_request_fn(struct request_queue *q)
+ 			pos = blk_rq_pos(rq);
+ 
+ 		ti = dm_table_find_target(map, pos);
+-		BUG_ON(!dm_target_is_valid(ti));
++		if (!dm_target_is_valid(ti)) {
++			/*
++			 * Must perform setup, that dm_done() requires,
++			 * before calling dm_kill_unmapped_request
++			 */
++			DMERR_LIMIT("request attempted access beyond the end of device");
++			clone = dm_start_request(md, rq);
++			dm_kill_unmapped_request(clone, -EIO);
++			continue;
++		}
+ 
+ 		if (ti->type->busy && ti->type->busy(ti))
+ 			goto delay_and_out;
+ 
+-		blk_start_request(rq);
+-		clone = rq->special;
+-		atomic_inc(&md->pending[rq_data_dir(clone)]);
++		clone = dm_start_request(md, rq);
+ 
+ 		spin_unlock(q->queue_lock);
+ 		if (map_request(ti, clone, md))
+@@ -1662,8 +1684,6 @@ delay_and_out:
+ 	blk_delay_queue(q, HZ / 10);
+ out:
+ 	dm_table_put(map);
+-
+-	return;
+ }
+ 
+ int dm_underlying_device_busy(struct request_queue *q)
+diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
+index 7a9eef6..0634ee5 100644
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -1226,14 +1226,16 @@ static int enough(struct r10conf *conf, int ignore)
+ 	do {
+ 		int n = conf->copies;
+ 		int cnt = 0;
++		int this = first;
+ 		while (n--) {
+-			if (conf->mirrors[first].rdev &&
+-			    first != ignore)
++			if (conf->mirrors[this].rdev &&
++			    this != ignore)
+ 				cnt++;
+-			first = (first+1) % conf->raid_disks;
++			this = (this+1) % conf->raid_disks;
+ 		}
+ 		if (cnt == 0)
+ 			return 0;
++		first = (first + conf->near_copies) % conf->raid_disks;
+ 	} while (first != 0);
+ 	return 1;
+ }
+diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
+index 6ce32a7..aaeaff2 100644
+--- a/drivers/mmc/host/sdhci.c
++++ b/drivers/mmc/host/sdhci.c
+@@ -2712,8 +2712,9 @@ int sdhci_add_host(struct sdhci_host *host)
+ 	    mmc_card_is_removable(mmc))
+ 		mmc->caps |= MMC_CAP_NEEDS_POLL;
+ 
+-	/* UHS-I mode(s) supported by the host controller. */
+-	if (host->version >= SDHCI_SPEC_300)
++	/* Any UHS-I mode in caps implies SDR12 and SDR25 support. */
++	if (caps[1] & (SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_SDR50 |
++		       SDHCI_SUPPORT_DDR50))
+ 		mmc->caps |= MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25;
+ 
+ 	/* SDR104 supports also implies SDR50 support */
+diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c
+index 32778d5..46194bc 100644
+--- a/drivers/net/can/janz-ican3.c
++++ b/drivers/net/can/janz-ican3.c
+@@ -1250,7 +1250,6 @@ static irqreturn_t ican3_irq(int irq, void *dev_id)
+  */
+ static int ican3_reset_module(struct ican3_dev *mod)
+ {
+-	u8 val = 1 << mod->num;
+ 	unsigned long start;
+ 	u8 runold, runnew;
+ 
+@@ -1264,8 +1263,7 @@ static int ican3_reset_module(struct ican3_dev *mod)
+ 	runold = ioread8(mod->dpm + TARGET_RUNNING);
+ 
+ 	/* reset the module */
+-	iowrite8(val, &mod->ctrl->reset_assert);
+-	iowrite8(val, &mod->ctrl->reset_deassert);
++	iowrite8(0x00, &mod->dpmctrl->hwreset);
+ 
+ 	/* wait until the module has finished resetting and is running */
+ 	start = jiffies;
+diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
+index 2adc294..79c70ae 100644
+--- a/drivers/net/can/ti_hecc.c
++++ b/drivers/net/can/ti_hecc.c
+@@ -971,12 +971,12 @@ static int __devexit ti_hecc_remove(struct platform_device *pdev)
+ 	struct net_device *ndev = platform_get_drvdata(pdev);
+ 	struct ti_hecc_priv *priv = netdev_priv(ndev);
+ 
++	unregister_candev(ndev);
+ 	clk_disable(priv->clk);
+ 	clk_put(priv->clk);
+ 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ 	iounmap(priv->base);
+ 	release_mem_region(res->start, resource_size(res));
+-	unregister_candev(ndev);
+ 	free_candev(ndev);
+ 	platform_set_drvdata(pdev, NULL);
+ 
+diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+index 2c1a5c0..4c50ac0 100644
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+@@ -554,14 +554,16 @@ static inline void bnx2x_set_skb_rxhash(struct bnx2x *bp, union eth_rx_cqe *cqe,
+ static void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
+ 				struct bnx2x_fastpath *fp)
+ {
+-	/* Do nothing if no IP/L4 csum validation was done */
+-
++	/* Do nothing if no L4 csum validation was done.
++	 * We do not check whether IP csum was validated. For IPv4 we assume
++	 * that if the card got as far as validating the L4 csum, it also
++	 * validated the IP csum. IPv6 has no IP csum.
++	 */
+ 	if (cqe->fast_path_cqe.status_flags &
+-	    (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG |
+-	     ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG))
++	    ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)
+ 		return;
+ 
+-	/* If both IP/L4 validation were done, check if an error was found. */
++	/* If L4 validation was done, check if an error was found. */
+ 
+ 	if (cqe->fast_path_cqe.type_error_flags &
+ 	    (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |
+diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
+index 6b258d9..01bc102 100644
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -14013,9 +14013,13 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
+ 	if (tg3_flag(tp, HW_TSO_1) ||
+ 	    tg3_flag(tp, HW_TSO_2) ||
+ 	    tg3_flag(tp, HW_TSO_3) ||
+-	    (tp->fw_needed && !tg3_flag(tp, ENABLE_ASF)))
++	    tp->fw_needed) {
++		/* For firmware TSO, assume ASF is disabled.
++		 * We'll disable TSO later if we discover ASF
++		 * is enabled in tg3_get_eeprom_hw_cfg().
++		 */
+ 		tg3_flag_set(tp, TSO_CAPABLE);
+-	else {
++	} else {
+ 		tg3_flag_clear(tp, TSO_CAPABLE);
+ 		tg3_flag_clear(tp, TSO_BUG);
+ 		tp->fw_needed = NULL;
+@@ -14290,6 +14294,12 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
+ 	 */
+ 	tg3_get_eeprom_hw_cfg(tp);
+ 
++	if (tp->fw_needed && tg3_flag(tp, ENABLE_ASF)) {
++		tg3_flag_clear(tp, TSO_CAPABLE);
++		tg3_flag_clear(tp, TSO_BUG);
++		tp->fw_needed = NULL;
++	}
++
+ 	if (tg3_flag(tp, ENABLE_APE)) {
+ 		/* Allow reads and writes to the
+ 		 * APE register and memory space.
+diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+index 8cf3173..da5204d 100644
+--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
++++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+@@ -1351,6 +1351,10 @@ static void netxen_mask_aer_correctable(struct netxen_adapter *adapter)
+ 	struct pci_dev *root = pdev->bus->self;
+ 	u32 aer_pos;
+ 
++	/* root bus? */
++	if (!root)
++		return;
++
+ 	if (adapter->ahw.board_type != NETXEN_BRDTYPE_P3_4_GB_MM &&
+ 		adapter->ahw.board_type != NETXEN_BRDTYPE_P3_10G_TP)
+ 		return;
+diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
+index c97d2f5..bfc3b0d 100644
+--- a/drivers/net/ethernet/ti/davinci_cpdma.c
++++ b/drivers/net/ethernet/ti/davinci_cpdma.c
+@@ -851,6 +851,7 @@ int cpdma_chan_stop(struct cpdma_chan *chan)
+ 
+ 		next_dma = desc_read(desc, hw_next);
+ 		chan->head = desc_from_phys(pool, next_dma);
++		chan->count--;
+ 		chan->stats.teardown_dequeue++;
+ 
+ 		/* issue callback without locks held */
+diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
+index bc9a4bb..1161584 100644
+--- a/drivers/net/ppp/pppoe.c
++++ b/drivers/net/ppp/pppoe.c
+@@ -576,7 +576,7 @@ static int pppoe_release(struct socket *sock)
+ 
+ 	po = pppox_sk(sk);
+ 
+-	if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
++	if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) {
+ 		dev_put(po->pppoe_dev);
+ 		po->pppoe_dev = NULL;
+ 	}
+diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
+index fc147a5..6729585 100644
+--- a/drivers/net/usb/asix.c
++++ b/drivers/net/usb/asix.c
+@@ -1648,6 +1648,10 @@ static const struct usb_device_id	products [] = {
+ 	USB_DEVICE (0x2001, 0x3c05),
+ 	.driver_info = (unsigned long) &ax88772_info,
+ }, {
++       // DLink DUB-E100 H/W Ver C1
++       USB_DEVICE (0x2001, 0x1a02),
++       .driver_info = (unsigned long) &ax88772_info,
++}, {
+ 	// Linksys USB1000
+ 	USB_DEVICE (0x1737, 0x0039),
+ 	.driver_info = (unsigned long) &ax88178_info,
+diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c
+index 864448b..e773250 100644
+--- a/drivers/net/usb/sierra_net.c
++++ b/drivers/net/usb/sierra_net.c
+@@ -678,7 +678,7 @@ static int sierra_net_get_fw_attr(struct usbnet *dev, u16 *datap)
+ 		return -EIO;
+ 	}
+ 
+-	*datap = *attrdata;
++	*datap = le16_to_cpu(*attrdata);
+ 
+ 	kfree(attrdata);
+ 	return result;
+diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
+index aaaca9a..3f575af 100644
+--- a/drivers/net/wan/ixp4xx_hss.c
++++ b/drivers/net/wan/ixp4xx_hss.c
+@@ -10,6 +10,7 @@
+ 
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+ 
++#include <linux/module.h>
+ #include <linux/bitops.h>
+ #include <linux/cdev.h>
+ #include <linux/dma-mapping.h>
+diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_common.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_common.c
+index 8918261..746202f 100644
+--- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_common.c
++++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_common.c
+@@ -775,8 +775,11 @@ static void brcmf_c_arp_offload_set(struct brcmf_pub *drvr, int arp_mode)
+ {
+ 	char iovbuf[32];
+ 	int retcode;
++	__le32 arp_mode_le;
+ 
+-	brcmf_c_mkiovar("arp_ol", (char *)&arp_mode, 4, iovbuf, sizeof(iovbuf));
++	arp_mode_le = cpu_to_le32(arp_mode);
++	brcmf_c_mkiovar("arp_ol", (char *)&arp_mode_le, 4, iovbuf,
++			sizeof(iovbuf));
+ 	retcode = brcmf_proto_cdc_set_dcmd(drvr, 0, BRCMF_C_SET_VAR,
+ 				   iovbuf, sizeof(iovbuf));
+ 	retcode = retcode >= 0 ? 0 : retcode;
+@@ -792,8 +795,11 @@ static void brcmf_c_arp_offload_enable(struct brcmf_pub *drvr, int arp_enable)
+ {
+ 	char iovbuf[32];
+ 	int retcode;
++	__le32 arp_enable_le;
+ 
+-	brcmf_c_mkiovar("arpoe", (char *)&arp_enable, 4,
++	arp_enable_le = cpu_to_le32(arp_enable);
++
++	brcmf_c_mkiovar("arpoe", (char *)&arp_enable_le, 4,
+ 			iovbuf, sizeof(iovbuf));
+ 	retcode = brcmf_proto_cdc_set_dcmd(drvr, 0, BRCMF_C_SET_VAR,
+ 				   iovbuf, sizeof(iovbuf));
+@@ -814,10 +820,10 @@ int brcmf_c_preinit_dcmds(struct brcmf_pub *drvr)
+ 	char buf[128], *ptr;
+ 	u32 dongle_align = BRCMF_SDALIGN;
+ 	u32 glom = 0;
+-	u32 roaming = 1;
+-	uint bcn_timeout = 3;
+-	int scan_assoc_time = 40;
+-	int scan_unassoc_time = 40;
++	__le32 roaming_le = cpu_to_le32(1);
++	__le32 bcn_timeout_le = cpu_to_le32(3);
++	__le32 scan_assoc_time_le = cpu_to_le32(40);
++	__le32 scan_unassoc_time_le = cpu_to_le32(40);
+ 	int i;
+ 
+ 	brcmf_os_proto_block(drvr);
+@@ -852,14 +858,14 @@ int brcmf_c_preinit_dcmds(struct brcmf_pub *drvr)
+ 
+ 	/* Setup timeout if Beacons are lost and roam is off to report
+ 		 link down */
+-	brcmf_c_mkiovar("bcn_timeout", (char *)&bcn_timeout, 4, iovbuf,
++	brcmf_c_mkiovar("bcn_timeout", (char *)&bcn_timeout_le, 4, iovbuf,
+ 		    sizeof(iovbuf));
+ 	brcmf_proto_cdc_set_dcmd(drvr, 0, BRCMF_C_SET_VAR, iovbuf,
+ 				  sizeof(iovbuf));
+ 
+ 	/* Enable/Disable build-in roaming to allowed ext supplicant to take
+ 		 of romaing */
+-	brcmf_c_mkiovar("roam_off", (char *)&roaming, 4,
++	brcmf_c_mkiovar("roam_off", (char *)&roaming_le, 4,
+ 		      iovbuf, sizeof(iovbuf));
+ 	brcmf_proto_cdc_set_dcmd(drvr, 0, BRCMF_C_SET_VAR, iovbuf,
+ 				  sizeof(iovbuf));
+@@ -874,9 +880,9 @@ int brcmf_c_preinit_dcmds(struct brcmf_pub *drvr)
+ 				  sizeof(iovbuf));
+ 
+ 	brcmf_proto_cdc_set_dcmd(drvr, 0, BRCMF_C_SET_SCAN_CHANNEL_TIME,
+-			 (char *)&scan_assoc_time, sizeof(scan_assoc_time));
++		 (char *)&scan_assoc_time_le, sizeof(scan_assoc_time_le));
+ 	brcmf_proto_cdc_set_dcmd(drvr, 0, BRCMF_C_SET_SCAN_UNASSOC_TIME,
+-			 (char *)&scan_unassoc_time, sizeof(scan_unassoc_time));
++		 (char *)&scan_unassoc_time_le, sizeof(scan_unassoc_time_le));
+ 
+ 	/* Set and enable ARP offload feature */
+ 	brcmf_c_arp_offload_set(drvr, BRCMF_ARPOL_MODE);
+diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c
+index 5eddabe..e4e326a 100644
+--- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c
++++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c
+@@ -498,8 +498,10 @@ static void wl_iscan_prep(struct brcmf_scan_params_le *params_le,
+ 	params_le->active_time = cpu_to_le32(-1);
+ 	params_le->passive_time = cpu_to_le32(-1);
+ 	params_le->home_time = cpu_to_le32(-1);
+-	if (ssid && ssid->SSID_len)
+-		memcpy(&params_le->ssid_le, ssid, sizeof(struct brcmf_ssid));
++	if (ssid && ssid->SSID_len) {
++		params_le->ssid_le.SSID_len = cpu_to_le32(ssid->SSID_len);
++		memcpy(&params_le->ssid_le.SSID, ssid->SSID, ssid->SSID_len);
++	}
+ }
+ 
+ static s32
+diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/def.h b/drivers/net/wireless/rtlwifi/rtl8192ce/def.h
+index 9fc804d..7305a47 100644
+--- a/drivers/net/wireless/rtlwifi/rtl8192ce/def.h
++++ b/drivers/net/wireless/rtlwifi/rtl8192ce/def.h
+@@ -117,6 +117,7 @@
+ 
+ #define CHIP_VER_B			BIT(4)
+ #define CHIP_92C_BITMASK		BIT(0)
++#define CHIP_UNKNOWN			BIT(7)
+ #define CHIP_92C_1T2R			0x03
+ #define CHIP_92C			0x01
+ #define CHIP_88C			0x00
+diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
+index a3deaef..cb480d8 100644
+--- a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
++++ b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
+@@ -1001,8 +1001,16 @@ static enum version_8192c _rtl92ce_read_chip_version(struct ieee80211_hw *hw)
+ 		version = (value32 & TYPE_ID) ? VERSION_A_CHIP_92C :
+ 			   VERSION_A_CHIP_88C;
+ 	} else {
+-		version = (value32 & TYPE_ID) ? VERSION_B_CHIP_92C :
+-			   VERSION_B_CHIP_88C;
++		version = (enum version_8192c) (CHIP_VER_B |
++				((value32 & TYPE_ID) ? CHIP_92C_BITMASK : 0) |
++				((value32 & VENDOR_ID) ? CHIP_VENDOR_UMC : 0));
++		if ((!IS_CHIP_VENDOR_UMC(version)) && (value32 &
++		     CHIP_VER_RTL_MASK)) {
++			version = (enum version_8192c)(version |
++				   ((((value32 & CHIP_VER_RTL_MASK) == BIT(12))
++				   ? CHIP_VENDOR_UMC_B_CUT : CHIP_UNKNOWN) |
++				   CHIP_VENDOR_UMC));
++		}
+ 	}
+ 
+ 	switch (version) {
+diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c
+index f2aa33d..df852e8 100644
+--- a/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c
++++ b/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c
+@@ -165,12 +165,14 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw)
+ 
+ 	/* request fw */
+ 	if (IS_VENDOR_UMC_A_CUT(rtlhal->version) &&
+-	    !IS_92C_SERIAL(rtlhal->version))
++	    !IS_92C_SERIAL(rtlhal->version)) {
+ 		fw_name = "rtlwifi/rtl8192cfwU.bin";
+-	else if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version))
++	} else if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version)) {
+ 		fw_name = "rtlwifi/rtl8192cfwU_B.bin";
+-	else
++		pr_info("****** This B_CUT device may not work with kernels 3.6 and earlier\n");
++	} else {
+ 		fw_name = rtlpriv->cfg->fw_name;
++	}
+ 	err = request_firmware(&firmware, fw_name, rtlpriv->io.dev);
+ 	if (err) {
+ 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
+diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
+index 9ddf69e..74d38ca 100644
+--- a/drivers/pci/hotplug/acpiphp_glue.c
++++ b/drivers/pci/hotplug/acpiphp_glue.c
+@@ -132,6 +132,15 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
+ 	if (!acpi_pci_check_ejectable(pbus, handle) && !is_dock_device(handle))
+ 		return AE_OK;
+ 
++	status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr);
++	if (ACPI_FAILURE(status)) {
++		warn("can't evaluate _ADR (%#x)\n", status);
++		return AE_OK;
++	}
++
++	device = (adr >> 16) & 0xffff;
++	function = adr & 0xffff;
++
+ 	pdev = pbus->self;
+ 	if (pdev && pci_is_pcie(pdev)) {
+ 		tmp = acpi_find_root_bridge_handle(pdev);
+@@ -144,10 +153,6 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
+ 		}
+ 	}
+ 
+-	acpi_evaluate_integer(handle, "_ADR", NULL, &adr);
+-	device = (adr >> 16) & 0xffff;
+-	function = adr & 0xffff;
+-
+ 	newfunc = kzalloc(sizeof(struct acpiphp_func), GFP_KERNEL);
+ 	if (!newfunc)
+ 		return AE_NO_MEMORY;
+diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
+index edaccad..f75a4c8 100644
+--- a/drivers/platform/x86/asus-laptop.c
++++ b/drivers/platform/x86/asus-laptop.c
+@@ -823,9 +823,9 @@ static ssize_t show_infos(struct device *dev,
+ 	 * The significance of others is yet to be found.
+ 	 * If we don't find the method, we assume the device are present.
+ 	 */
+-	rv = acpi_evaluate_integer(asus->handle, "HRWS", NULL, &temp);
++	rv = acpi_evaluate_integer(asus->handle, "HWRS", NULL, &temp);
+ 	if (!ACPI_FAILURE(rv))
+-		len += sprintf(page + len, "HRWS value         : %#x\n",
++		len += sprintf(page + len, "HWRS value         : %#x\n",
+ 			       (uint) temp);
+ 	/*
+ 	 * Another value for userspace: the ASYM method returns 0x02 for
+@@ -1660,9 +1660,9 @@ static int asus_laptop_get_info(struct asus_laptop *asus)
+ 	 * The significance of others is yet to be found.
+ 	 */
+ 	status =
+-	    acpi_evaluate_integer(asus->handle, "HRWS", NULL, &hwrs_result);
++	    acpi_evaluate_integer(asus->handle, "HWRS", NULL, &hwrs_result);
+ 	if (!ACPI_FAILURE(status))
+-		pr_notice("  HRWS returned %x", (int)hwrs_result);
++		pr_notice("  HWRS returned %x", (int)hwrs_result);
+ 
+ 	if (!acpi_check_handle(asus->handle, METHOD_WL_STATUS, NULL))
+ 		asus->have_rsts = true;
+diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
+index 20687d5..a3e98f1 100644
+--- a/drivers/rtc/rtc-twl.c
++++ b/drivers/rtc/rtc-twl.c
+@@ -462,6 +462,11 @@ static int __devinit twl_rtc_probe(struct platform_device *pdev)
+ 			goto out1;
+ 	}
+ 
++	/* ensure interrupts are disabled, bootloaders can be strange */
++	ret = twl_rtc_write_u8(0, REG_RTC_INTERRUPTS_REG);
++	if (ret < 0)
++		dev_warn(&pdev->dev, "unable to disable interrupt\n");
++
+ 	/* init cached IRQ enable bits */
+ 	ret = twl_rtc_read_u8(&rtc_irq_bits, REG_RTC_INTERRUPTS_REG);
+ 	if (ret < 0)
+diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
+index 1ad0b82..1069974 100644
+--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
++++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
+@@ -1264,6 +1264,9 @@ int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba)
+ 	int rc = 0;
+ 	u64 mask64;
+ 
++	memset(&iscsi_init, 0x00, sizeof(struct iscsi_kwqe_init1));
++	memset(&iscsi_init2, 0x00, sizeof(struct iscsi_kwqe_init2));
++
+ 	bnx2i_adjust_qp_size(hba);
+ 
+ 	iscsi_init.flags =
+diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
+index b4d2c86..be9aad8 100644
+--- a/drivers/scsi/hpsa.c
++++ b/drivers/scsi/hpsa.c
+@@ -1213,8 +1213,9 @@ static void complete_scsi_command(struct CommandList *cp)
+ 	}
+ 		break;
+ 	case CMD_PROTOCOL_ERR:
++		cmd->result = DID_ERROR << 16;
+ 		dev_warn(&h->pdev->dev, "cp %p has "
+-			"protocol error \n", cp);
++			"protocol error\n", cp);
+ 		break;
+ 	case CMD_HARDWARE_ERR:
+ 		cmd->result = DID_ERROR << 16;
+diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
+index 98cb5e6..17de348 100644
+--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
++++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
+@@ -1156,6 +1156,13 @@ _base_check_enable_msix(struct MPT2SAS_ADAPTER *ioc)
+ 	u16 message_control;
+ 
+ 
++	/* Check whether controller SAS2008 B0 controller,
++	   if it is SAS2008 B0 controller use IO-APIC instead of MSIX */
++	if (ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2008 &&
++	    ioc->pdev->revision == 0x01) {
++		return -EINVAL;
++	}
++
+ 	base = pci_find_capability(ioc->pdev, PCI_CAP_ID_MSIX);
+ 	if (!base) {
+ 		dfailprintk(ioc, printk(MPT2SAS_INFO_FMT "msix not "
+diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
+index 597fb9b..34d114a 100644
+--- a/drivers/target/target_core_transport.c
++++ b/drivers/target/target_core_transport.c
+@@ -3039,15 +3039,20 @@ static int transport_generic_cmd_sequencer(
+ 			/* Returns CHECK_CONDITION + INVALID_CDB_FIELD */
+ 			goto out_invalid_cdb_field;
+ 		}
+-
++		/*
++		 * For the overflow case keep the existing fabric provided
++		 * ->data_length.  Otherwise for the underflow case, reset
++		 * ->data_length to the smaller SCSI expected data transfer
++		 * length.
++		 */
+ 		if (size > cmd->data_length) {
+ 			cmd->se_cmd_flags |= SCF_OVERFLOW_BIT;
+ 			cmd->residual_count = (size - cmd->data_length);
+ 		} else {
+ 			cmd->se_cmd_flags |= SCF_UNDERFLOW_BIT;
+ 			cmd->residual_count = (cmd->data_length - size);
++			cmd->data_length = size;
+ 		}
+-		cmd->data_length = size;
+ 	}
+ 
+ 	/* reject any command that we don't have a handler for */
+diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
+index 08b92a6..8d70fbc 100644
+--- a/drivers/tty/serial/pch_uart.c
++++ b/drivers/tty/serial/pch_uart.c
+@@ -236,6 +236,9 @@ struct eg20t_port {
+ 	int				tx_dma_use;
+ 	void				*rx_buf_virt;
+ 	dma_addr_t			rx_buf_dma;
++
++	/* protect the eg20t_port private structure and io access to membase */
++	spinlock_t lock;
+ };
+ 
+ /**
+@@ -964,7 +967,7 @@ static irqreturn_t pch_uart_interrupt(int irq, void *dev_id)
+ 	unsigned int iid;
+ 	unsigned long flags;
+ 
+-	spin_lock_irqsave(&priv->port.lock, flags);
++	spin_lock_irqsave(&priv->lock, flags);
+ 	handled = 0;
+ 	while ((iid = pch_uart_hal_get_iid(priv)) > 1) {
+ 		switch (iid) {
+@@ -1017,7 +1020,7 @@ static irqreturn_t pch_uart_interrupt(int irq, void *dev_id)
+ 			priv->int_dis_flag = 0;
+ 	}
+ 
+-	spin_unlock_irqrestore(&priv->port.lock, flags);
++	spin_unlock_irqrestore(&priv->lock, flags);
+ 	return IRQ_RETVAL(handled);
+ }
+ 
+@@ -1131,9 +1134,9 @@ static void pch_uart_break_ctl(struct uart_port *port, int ctl)
+ 	unsigned long flags;
+ 
+ 	priv = container_of(port, struct eg20t_port, port);
+-	spin_lock_irqsave(&port->lock, flags);
++	spin_lock_irqsave(&priv->lock, flags);
+ 	pch_uart_hal_set_break(priv, ctl);
+-	spin_unlock_irqrestore(&port->lock, flags);
++	spin_unlock_irqrestore(&priv->lock, flags);
+ }
+ 
+ /* Grab any interrupt resources and initialise any low level driver state. */
+@@ -1284,7 +1287,8 @@ static void pch_uart_set_termios(struct uart_port *port,
+ 
+ 	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16);
+ 
+-	spin_lock_irqsave(&port->lock, flags);
++	spin_lock_irqsave(&priv->lock, flags);
++	spin_lock(&port->lock);
+ 
+ 	uart_update_timeout(port, termios->c_cflag, baud);
+ 	rtn = pch_uart_hal_set_line(priv, baud, parity, bits, stb);
+@@ -1297,7 +1301,8 @@ static void pch_uart_set_termios(struct uart_port *port,
+ 		tty_termios_encode_baud_rate(termios, baud, baud);
+ 
+ out:
+-	spin_unlock_irqrestore(&port->lock, flags);
++	spin_unlock(&port->lock);
++	spin_unlock_irqrestore(&priv->lock, flags);
+ }
+ 
+ static const char *pch_uart_type(struct uart_port *port)
+@@ -1449,6 +1454,8 @@ static struct eg20t_port *pch_uart_init_port(struct pci_dev *pdev,
+ 	pci_enable_msi(pdev);
+ 	pci_set_master(pdev);
+ 
++	spin_lock_init(&priv->lock);
++
+ 	iobase = pci_resource_start(pdev, 0);
+ 	mapbase = pci_resource_start(pdev, 1);
+ 	priv->mapbase = mapbase;
+diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c
+index d956965..3440812 100644
+--- a/drivers/usb/core/devices.c
++++ b/drivers/usb/core/devices.c
+@@ -624,7 +624,7 @@ static ssize_t usb_device_read(struct file *file, char __user *buf,
+ 	/* print devices for all busses */
+ 	list_for_each_entry(bus, &usb_bus_list, bus_list) {
+ 		/* recurse through all children of the root hub */
+-		if (!bus->root_hub)
++		if (!bus_to_hcd(bus)->rh_registered)
+ 			continue;
+ 		usb_lock_device(bus->root_hub);
+ 		ret = usb_device_dump(&buf, &nbytes, &skip_bytes, ppos,
+diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
+index 8cb9304..032e5a6 100644
+--- a/drivers/usb/core/hcd.c
++++ b/drivers/usb/core/hcd.c
+@@ -1002,10 +1002,7 @@ static int register_root_hub(struct usb_hcd *hcd)
+ 	if (retval) {
+ 		dev_err (parent_dev, "can't register root hub for %s, %d\n",
+ 				dev_name(&usb_dev->dev), retval);
+-	}
+-	mutex_unlock(&usb_bus_list_lock);
+-
+-	if (retval == 0) {
++	} else {
+ 		spin_lock_irq (&hcd_root_hub_lock);
+ 		hcd->rh_registered = 1;
+ 		spin_unlock_irq (&hcd_root_hub_lock);
+@@ -1014,6 +1011,7 @@ static int register_root_hub(struct usb_hcd *hcd)
+ 		if (HCD_DEAD(hcd))
+ 			usb_hc_died (hcd);	/* This time clean up */
+ 	}
++	mutex_unlock(&usb_bus_list_lock);
+ 
+ 	return retval;
+ }
+diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c
+index 527736e..d584eaf 100644
+--- a/drivers/usb/gadget/dummy_hcd.c
++++ b/drivers/usb/gadget/dummy_hcd.c
+@@ -2292,10 +2292,8 @@ static int dummy_hcd_probe(struct platform_device *pdev)
+ 	hs_hcd->has_tt = 1;
+ 
+ 	retval = usb_add_hcd(hs_hcd, 0, 0);
+-	if (retval != 0) {
+-		usb_put_hcd(hs_hcd);
+-		return retval;
+-	}
++	if (retval)
++		goto put_usb2_hcd;
+ 
+ 	if (mod_data.is_super_speed) {
+ 		ss_hcd = usb_create_shared_hcd(&dummy_hcd, &pdev->dev,
+@@ -2314,6 +2312,8 @@ static int dummy_hcd_probe(struct platform_device *pdev)
+ put_usb3_hcd:
+ 	usb_put_hcd(ss_hcd);
+ dealloc_usb2_hcd:
++	usb_remove_hcd(hs_hcd);
++put_usb2_hcd:
+ 	usb_put_hcd(hs_hcd);
+ 	the_controller.hs_hcd = the_controller.ss_hcd = NULL;
+ 	return retval;
+diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
+index 3c166d3..f62be89 100644
+--- a/drivers/watchdog/hpwdt.c
++++ b/drivers/watchdog/hpwdt.c
+@@ -813,6 +813,9 @@ static int __devinit hpwdt_init_one(struct pci_dev *dev,
+ 	hpwdt_timer_reg = pci_mem_addr + 0x70;
+ 	hpwdt_timer_con = pci_mem_addr + 0x72;
+ 
++	/* Make sure that timer is disabled until /dev/watchdog is opened */
++	hpwdt_stop();
++
+ 	/* Make sure that we have a valid soft_margin */
+ 	if (hpwdt_change_timer(soft_margin))
+ 		hpwdt_change_timer(DEFAULT_MARGIN);
+diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
+index 1b2e180..667776e 100644
+--- a/fs/cifs/cifs_unicode.c
++++ b/fs/cifs/cifs_unicode.c
+@@ -327,6 +327,6 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen,
+ 	}
+ 
+ ctoUCS_out:
+-	return i;
++	return j;
+ }
+ 
+diff --git a/fs/dcache.c b/fs/dcache.c
+index eb723d3..63c0c6b 100644
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -311,7 +311,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
+ 	 * Inform try_to_ascend() that we are no longer attached to the
+ 	 * dentry tree
+ 	 */
+-	dentry->d_flags |= DCACHE_DISCONNECTED;
++	dentry->d_flags |= DCACHE_DENTRY_KILLED;
+ 	if (parent)
+ 		spin_unlock(&parent->d_lock);
+ 	dentry_iput(dentry);
+@@ -968,7 +968,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq
+ 	 * or deletion
+ 	 */
+ 	if (new != old->d_parent ||
+-		 (old->d_flags & DCACHE_DISCONNECTED) ||
++		 (old->d_flags & DCACHE_DENTRY_KILLED) ||
+ 		 (!locked && read_seqretry(&rename_lock, seq))) {
+ 		spin_unlock(&new->d_lock);
+ 		new = NULL;
+@@ -1054,6 +1054,8 @@ positive:
+ 	return 1;
+ 
+ rename_retry:
++	if (locked)
++		goto again;
+ 	locked = 1;
+ 	write_seqlock(&rename_lock);
+ 	goto again;
+@@ -1156,6 +1158,8 @@ out:
+ rename_retry:
+ 	if (found)
+ 		return found;
++	if (locked)
++		goto again;
+ 	locked = 1;
+ 	write_seqlock(&rename_lock);
+ 	goto again;
+@@ -2922,6 +2926,8 @@ resume:
+ 	return;
+ 
+ rename_retry:
++	if (locked)
++		goto again;
+ 	locked = 1;
+ 	write_seqlock(&rename_lock);
+ 	goto again;
+diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
+index 53c3bce..0be1aa4 100644
+--- a/fs/proc/proc_sysctl.c
++++ b/fs/proc/proc_sysctl.c
+@@ -123,9 +123,6 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
+ 
+ 	err = ERR_PTR(-ENOMEM);
+ 	inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
+-	if (h)
+-		sysctl_head_finish(h);
+-
+ 	if (!inode)
+ 		goto out;
+ 
+@@ -134,6 +131,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
+ 	d_add(dentry, inode);
+ 
+ out:
++	if (h)
++		sysctl_head_finish(h);
+ 	sysctl_head_finish(head);
+ 	return err;
+ }
+diff --git a/include/linux/dcache.h b/include/linux/dcache.h
+index 4eb8c80..1dfe974 100644
+--- a/include/linux/dcache.h
++++ b/include/linux/dcache.h
+@@ -219,6 +219,8 @@ struct dentry_operations {
+ #define DCACHE_MANAGED_DENTRY \
+ 	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
+ 
++#define DCACHE_DENTRY_KILLED	0x100000
++
+ extern seqlock_t rename_lock;
+ 
+ static inline int dname_external(struct dentry *dentry)
+diff --git a/include/linux/hid.h b/include/linux/hid.h
+index c235e4e..331e2ef 100644
+--- a/include/linux/hid.h
++++ b/include/linux/hid.h
+@@ -875,7 +875,7 @@ static inline int hid_hw_power(struct hid_device *hdev, int level)
+ 	return hdev->ll_driver->power ? hdev->ll_driver->power(hdev, level) : 0;
+ }
+ 
+-void hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
++int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
+ 		int interrupt);
+ 
+ extern int hid_generic_init(void);
+diff --git a/include/linux/hidraw.h b/include/linux/hidraw.h
+index 4b88e69..45e9fcb 100644
+--- a/include/linux/hidraw.h
++++ b/include/linux/hidraw.h
+@@ -76,13 +76,13 @@ struct hidraw_list {
+ #ifdef CONFIG_HIDRAW
+ int hidraw_init(void);
+ void hidraw_exit(void);
+-void hidraw_report_event(struct hid_device *, u8 *, int);
++int hidraw_report_event(struct hid_device *, u8 *, int);
+ int hidraw_connect(struct hid_device *);
+ void hidraw_disconnect(struct hid_device *);
+ #else
+ static inline int hidraw_init(void) { return 0; }
+ static inline void hidraw_exit(void) { }
+-static inline void hidraw_report_event(struct hid_device *hid, u8 *data, int len) { }
++static inline int hidraw_report_event(struct hid_device *hid, u8 *data, int len) { return 0; }
+ static inline int hidraw_connect(struct hid_device *hid) { return -1; }
+ static inline void hidraw_disconnect(struct hid_device *hid) { }
+ #endif
+diff --git a/include/linux/memory.h b/include/linux/memory.h
+index 935699b..6bea2c2 100644
+--- a/include/linux/memory.h
++++ b/include/linux/memory.h
+@@ -20,7 +20,7 @@
+ #include <linux/compiler.h>
+ #include <linux/mutex.h>
+ 
+-#define MIN_MEMORY_BLOCK_SIZE     (1 << SECTION_SIZE_BITS)
++#define MIN_MEMORY_BLOCK_SIZE     (1UL << SECTION_SIZE_BITS)
+ 
+ struct memory_block {
+ 	unsigned long start_section_nr;
+diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
+index 22e61fd..28e493b 100644
+--- a/include/linux/xfrm.h
++++ b/include/linux/xfrm.h
+@@ -84,6 +84,8 @@ struct xfrm_replay_state {
+ 	__u32	bitmap;
+ };
+ 
++#define XFRMA_REPLAY_ESN_MAX	4096
++
+ struct xfrm_replay_state_esn {
+ 	unsigned int	bmp_len;
+ 	__u32		oseq;
+diff --git a/include/net/bluetooth/smp.h b/include/net/bluetooth/smp.h
+index 15b97d5..fe810d4 100644
+--- a/include/net/bluetooth/smp.h
++++ b/include/net/bluetooth/smp.h
+@@ -131,7 +131,7 @@ struct smp_chan {
+ };
+ 
+ /* SMP Commands */
+-int smp_conn_security(struct l2cap_conn *conn, __u8 sec_level);
++int smp_conn_security(struct hci_conn *hcon, __u8 sec_level);
+ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb);
+ int smp_distribute_keys(struct l2cap_conn *conn, __u8 force);
+ 
+diff --git a/include/net/xfrm.h b/include/net/xfrm.h
+index b203e14..921f627 100644
+--- a/include/net/xfrm.h
++++ b/include/net/xfrm.h
+@@ -269,6 +269,9 @@ struct xfrm_replay {
+ 	int	(*check)(struct xfrm_state *x,
+ 			 struct sk_buff *skb,
+ 			 __be32 net_seq);
++	int	(*recheck)(struct xfrm_state *x,
++			   struct sk_buff *skb,
++			   __be32 net_seq);
+ 	void	(*notify)(struct xfrm_state *x, int event);
+ 	int	(*overflow)(struct xfrm_state *x, struct sk_buff *skb);
+ };
+diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
+index a9c87ad..a9536da 100644
+--- a/include/trace/events/kmem.h
++++ b/include/trace/events/kmem.h
+@@ -214,7 +214,7 @@ TRACE_EVENT(mm_page_alloc,
+ 
+ 	TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s",
+ 		__entry->page,
+-		page_to_pfn(__entry->page),
++		__entry->page ? page_to_pfn(__entry->page) : 0,
+ 		__entry->order,
+ 		__entry->migratetype,
+ 		show_gfp_flags(__entry->gfp_flags))
+@@ -240,7 +240,7 @@ DECLARE_EVENT_CLASS(mm_page,
+ 
+ 	TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d",
+ 		__entry->page,
+-		page_to_pfn(__entry->page),
++		__entry->page ? page_to_pfn(__entry->page) : 0,
+ 		__entry->order,
+ 		__entry->migratetype,
+ 		__entry->order == 0)
+diff --git a/kernel/async.c b/kernel/async.c
+index 80b74b8..009f516 100644
+--- a/kernel/async.c
++++ b/kernel/async.c
+@@ -88,6 +88,13 @@ static async_cookie_t  __lowest_in_progress(struct list_head *running)
+ {
+ 	struct async_entry *entry;
+ 
++	if (!running) { /* just check the entry count */
++		if (atomic_read(&entry_count))
++			return 0; /* smaller than any cookie */
++		else
++			return next_cookie;
++	}
++
+ 	if (!list_empty(running)) {
+ 		entry = list_first_entry(running,
+ 			struct async_entry, list);
+@@ -238,9 +245,7 @@ EXPORT_SYMBOL_GPL(async_schedule_domain);
+  */
+ void async_synchronize_full(void)
+ {
+-	do {
+-		async_synchronize_cookie(next_cookie);
+-	} while (!list_empty(&async_running) || !list_empty(&async_pending));
++	async_synchronize_cookie_domain(next_cookie, NULL);
+ }
+ EXPORT_SYMBOL_GPL(async_synchronize_full);
+ 
+@@ -260,7 +265,7 @@ EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
+ /**
+  * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing
+  * @cookie: async_cookie_t to use as checkpoint
+- * @running: running list to synchronize on
++ * @running: running list to synchronize on, NULL indicates all lists
+  *
+  * This function waits until all asynchronous function calls for the
+  * synchronization domain specified by the running list @list submitted
+diff --git a/kernel/cpuset.c b/kernel/cpuset.c
+index 46a1d3c..84a524b 100644
+--- a/kernel/cpuset.c
++++ b/kernel/cpuset.c
+@@ -2080,6 +2080,9 @@ static void scan_for_empty_cpusets(struct cpuset *root)
+  * (of no affect) on systems that are actively using CPU hotplug
+  * but making no active use of cpusets.
+  *
++ * The only exception to this is suspend/resume, where we don't
++ * modify cpusets at all.
++ *
+  * This routine ensures that top_cpuset.cpus_allowed tracks
+  * cpu_active_mask on each CPU hotplug (cpuhp) event.
+  *
+diff --git a/kernel/exit.c b/kernel/exit.c
+index 5a8a66e..234e152 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -1019,6 +1019,22 @@ NORET_TYPE void do_exit(long code)
+ 
+ 	preempt_disable();
+ 	exit_rcu();
++
++	/*
++	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
++	 * when the following two conditions become true.
++	 *   - There is race condition of mmap_sem (It is acquired by
++	 *     exit_mm()), and
++	 *   - SMI occurs before setting TASK_RUNINNG.
++	 *     (or hypervisor of virtual machine switches to other guest)
++	 *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD
++	 *
++	 * To avoid it, we have to wait for releasing tsk->pi_lock which
++	 * is held by try_to_wake_up()
++	 */
++	smp_mb();
++	raw_spin_unlock_wait(&tsk->pi_lock);
++
+ 	/* causes final put_task_struct in finish_task_switch(). */
+ 	tsk->state = TASK_DEAD;
+ 	schedule();
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 910db7d..fcc893f 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -8192,34 +8192,66 @@ int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
+ }
+ #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+ 
++static int num_cpus_frozen;	/* used to mark begin/end of suspend/resume */
++
+ /*
+  * Update cpusets according to cpu_active mask.  If cpusets are
+  * disabled, cpuset_update_active_cpus() becomes a simple wrapper
+  * around partition_sched_domains().
++ *
++ * If we come here as part of a suspend/resume, don't touch cpusets because we
++ * want to restore it back to its original state upon resume anyway.
+  */
+ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
+ 			     void *hcpu)
+ {
+-	switch (action & ~CPU_TASKS_FROZEN) {
++	switch (action) {
++	case CPU_ONLINE_FROZEN:
++	case CPU_DOWN_FAILED_FROZEN:
++
++		/*
++		 * num_cpus_frozen tracks how many CPUs are involved in suspend
++		 * resume sequence. As long as this is not the last online
++		 * operation in the resume sequence, just build a single sched
++		 * domain, ignoring cpusets.
++		 */
++		num_cpus_frozen--;
++		if (likely(num_cpus_frozen)) {
++			partition_sched_domains(1, NULL, NULL);
++			break;
++		}
++
++		/*
++		 * This is the last CPU online operation. So fall through and
++		 * restore the original sched domains by considering the
++		 * cpuset configurations.
++		 */
++
+ 	case CPU_ONLINE:
+ 	case CPU_DOWN_FAILED:
+ 		cpuset_update_active_cpus();
+-		return NOTIFY_OK;
++		break;
+ 	default:
+ 		return NOTIFY_DONE;
+ 	}
++	return NOTIFY_OK;
+ }
+ 
+ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
+ 			       void *hcpu)
+ {
+-	switch (action & ~CPU_TASKS_FROZEN) {
++	switch (action) {
+ 	case CPU_DOWN_PREPARE:
+ 		cpuset_update_active_cpus();
+-		return NOTIFY_OK;
++		break;
++	case CPU_DOWN_PREPARE_FROZEN:
++		num_cpus_frozen++;
++		partition_sched_domains(1, NULL, NULL);
++		break;
+ 	default:
+ 		return NOTIFY_DONE;
+ 	}
++	return NOTIFY_OK;
+ }
+ 
+ static int update_runtime(struct notifier_block *nfb,
+diff --git a/kernel/workqueue.c b/kernel/workqueue.c
+index 979d4de..b413138 100644
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -3627,18 +3627,17 @@ static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb,
+ #ifdef CONFIG_SMP
+ 
+ struct work_for_cpu {
+-	struct completion completion;
++	struct work_struct work;
+ 	long (*fn)(void *);
+ 	void *arg;
+ 	long ret;
+ };
+ 
+-static int do_work_for_cpu(void *_wfc)
++static void work_for_cpu_fn(struct work_struct *work)
+ {
+-	struct work_for_cpu *wfc = _wfc;
++	struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
++
+ 	wfc->ret = wfc->fn(wfc->arg);
+-	complete(&wfc->completion);
+-	return 0;
+ }
+ 
+ /**
+@@ -3653,19 +3652,11 @@ static int do_work_for_cpu(void *_wfc)
+  */
+ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+ {
+-	struct task_struct *sub_thread;
+-	struct work_for_cpu wfc = {
+-		.completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
+-		.fn = fn,
+-		.arg = arg,
+-	};
++	struct work_for_cpu wfc = { .fn = fn, .arg = arg };
+ 
+-	sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
+-	if (IS_ERR(sub_thread))
+-		return PTR_ERR(sub_thread);
+-	kthread_bind(sub_thread, cpu);
+-	wake_up_process(sub_thread);
+-	wait_for_completion(&wfc.completion);
++	INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
++	schedule_work_on(cpu, &wfc.work);
++	flush_work(&wfc.work);
+ 	return wfc.ret;
+ }
+ EXPORT_SYMBOL_GPL(work_on_cpu);
+diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
+index 6629faf..9ad7d1e 100644
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -127,9 +127,6 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
+ 	struct mem_section *ms;
+ 	struct page *page, *memmap;
+ 
+-	if (!pfn_valid(start_pfn))
+-		return;
+-
+ 	section_nr = pfn_to_section_nr(start_pfn);
+ 	ms = __nr_to_section(section_nr);
+ 
+@@ -188,9 +185,16 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
+ 	end_pfn = pfn + pgdat->node_spanned_pages;
+ 
+ 	/* register_section info */
+-	for (; pfn < end_pfn; pfn += PAGES_PER_SECTION)
+-		register_page_bootmem_info_section(pfn);
+-
++	for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
++		/*
++		 * Some platforms can assign the same pfn to multiple nodes - on
++		 * node0 as well as nodeN.  To avoid registering a pfn against
++		 * multiple nodes we check that this pfn does not already
++		 * reside in some other node.
++		 */
++		if (pfn_valid(pfn) && (pfn_to_nid(pfn) == node))
++			register_page_bootmem_info_section(pfn);
++	}
+ }
+ #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
+ 
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 6e51bf0..a88dded 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -541,7 +541,7 @@ static inline void __free_one_page(struct page *page,
+ 		combined_idx = buddy_idx & page_idx;
+ 		higher_page = page + (combined_idx - page_idx);
+ 		buddy_idx = __find_buddy_index(combined_idx, order + 1);
+-		higher_buddy = page + (buddy_idx - combined_idx);
++		higher_buddy = higher_page + (buddy_idx - combined_idx);
+ 		if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
+ 			list_add_tail(&page->lru,
+ 				&zone->free_area[order].free_list[migratetype]);
+diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
+index f5ffc02..9ddbd4e 100644
+--- a/net/8021q/vlan_core.c
++++ b/net/8021q/vlan_core.c
+@@ -106,7 +106,6 @@ static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
+ 		return NULL;
+ 	memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+ 	skb->mac_header += VLAN_HLEN;
+-	skb_reset_mac_len(skb);
+ 	return skb;
+ }
+ 
+@@ -173,6 +172,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
+ 
+ 	skb_reset_network_header(skb);
+ 	skb_reset_transport_header(skb);
++	skb_reset_mac_len(skb);
++
+ 	return skb;
+ 
+ err_free:
+diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
+index 98bfbd5..1fb1aec 100644
+--- a/net/bluetooth/hci_conn.c
++++ b/net/bluetooth/hci_conn.c
+@@ -44,6 +44,7 @@
+ 
+ #include <net/bluetooth/bluetooth.h>
+ #include <net/bluetooth/hci_core.h>
++#include <net/bluetooth/smp.h>
+ 
+ static void hci_le_connect(struct hci_conn *conn)
+ {
+@@ -641,6 +642,9 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
+ {
+ 	BT_DBG("conn %p", conn);
+ 
++	if (conn->type == LE_LINK)
++		return smp_conn_security(conn, sec_level);
++
+ 	/* For sdp we don't need the link key. */
+ 	if (sec_level == BT_SECURITY_SDP)
+ 		return 1;
+diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
+index dd76177..04175d9 100644
+--- a/net/bluetooth/l2cap_core.c
++++ b/net/bluetooth/l2cap_core.c
+@@ -902,14 +902,15 @@ static void l2cap_chan_ready(struct sock *sk)
+ static void l2cap_conn_ready(struct l2cap_conn *conn)
+ {
+ 	struct l2cap_chan *chan;
++	struct hci_conn *hcon = conn->hcon;
+ 
+ 	BT_DBG("conn %p", conn);
+ 
+-	if (!conn->hcon->out && conn->hcon->type == LE_LINK)
++	if (!hcon->out && hcon->type == LE_LINK)
+ 		l2cap_le_conn_ready(conn);
+ 
+-	if (conn->hcon->out && conn->hcon->type == LE_LINK)
+-		smp_conn_security(conn, conn->hcon->pending_sec_level);
++	if (hcon->out && hcon->type == LE_LINK)
++		smp_conn_security(hcon, hcon->pending_sec_level);
+ 
+ 	read_lock(&conn->chan_lock);
+ 
+@@ -918,8 +919,8 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
+ 
+ 		bh_lock_sock(sk);
+ 
+-		if (conn->hcon->type == LE_LINK) {
+-			if (smp_conn_security(conn, chan->sec_level))
++		if (hcon->type == LE_LINK) {
++			if (smp_conn_security(hcon, chan->sec_level))
+ 				l2cap_chan_ready(sk);
+ 
+ 		} else if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) {
+diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
+index 6dedd6f..158887a 100644
+--- a/net/bluetooth/l2cap_sock.c
++++ b/net/bluetooth/l2cap_sock.c
+@@ -616,7 +616,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch
+ 				break;
+ 			}
+ 
+-			if (smp_conn_security(conn, sec.level))
++			if (smp_conn_security(conn->hcon, sec.level))
+ 				break;
+ 
+ 			err = 0;
+diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
+index 759b635..c27b4e3 100644
+--- a/net/bluetooth/smp.c
++++ b/net/bluetooth/smp.c
+@@ -554,9 +554,9 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
+ 	return 0;
+ }
+ 
+-int smp_conn_security(struct l2cap_conn *conn, __u8 sec_level)
++int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
+ {
+-	struct hci_conn *hcon = conn->hcon;
++	struct l2cap_conn *conn = hcon->l2cap_data;
+ 	struct smp_chan *smp = conn->smp_chan;
+ 
+ 	BT_DBG("conn %p hcon %p level 0x%2.2x", conn, hcon, sec_level);
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 832ba6d..abe1147 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2108,7 +2108,8 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+ 
+ static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
+ {
+-	if (!can_checksum_protocol(features, protocol)) {
++	if (skb->ip_summed != CHECKSUM_NONE &&
++	    !can_checksum_protocol(features, protocol)) {
+ 		features &= ~NETIF_F_ALL_CSUM;
+ 		features &= ~NETIF_F_SG;
+ 	} else if (illegal_highdma(skb->dev, skb)) {
+@@ -2686,16 +2687,17 @@ ipv6:
+ 		nhoff += poff;
+ 		if (pskb_may_pull(skb, nhoff + 4)) {
+ 			ports.v32 = * (__force u32 *) (skb->data + nhoff);
+-			if (ports.v16[1] < ports.v16[0])
+-				swap(ports.v16[0], ports.v16[1]);
+ 			skb->l4_rxhash = 1;
+ 		}
+ 	}
+ 
+ 	/* get a consistent hash (same value on both flow directions) */
+-	if (addr2 < addr1)
++	if (addr2 < addr1 ||
++	    (addr2 == addr1 &&
++	     ports.v16[1] < ports.v16[0])) {
+ 		swap(addr1, addr2);
+-
++		swap(ports.v16[0], ports.v16[1]);
++	}
+ 	hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
+ 	if (!hash)
+ 		hash = 1;
+@@ -6387,7 +6389,8 @@ static struct hlist_head *netdev_create_hash(void)
+ /* Initialize per network namespace state */
+ static int __net_init netdev_init(struct net *net)
+ {
+-	INIT_LIST_HEAD(&net->dev_base_head);
++	if (net != &init_net)
++		INIT_LIST_HEAD(&net->dev_base_head);
+ 
+ 	net->dev_name_head = netdev_create_hash();
+ 	if (net->dev_name_head == NULL)
+diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
+index 31a5ae5..dd00b71 100644
+--- a/net/core/net_namespace.c
++++ b/net/core/net_namespace.c
+@@ -25,7 +25,9 @@ static DEFINE_MUTEX(net_mutex);
+ LIST_HEAD(net_namespace_list);
+ EXPORT_SYMBOL_GPL(net_namespace_list);
+ 
+-struct net init_net;
++struct net init_net = {
++	.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
++};
+ EXPORT_SYMBOL(init_net);
+ 
+ #define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 018fd41..1e8a882 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -593,7 +593,8 @@ set_rcvbuf:
+ 
+ 	case SO_KEEPALIVE:
+ #ifdef CONFIG_INET
+-		if (sk->sk_protocol == IPPROTO_TCP)
++		if (sk->sk_protocol == IPPROTO_TCP &&
++		    sk->sk_type == SOCK_STREAM)
+ 			tcp_set_keepalive(sk, valbool);
+ #endif
+ 		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
+diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
+index 007e2eb..e1d4f30 100644
+--- a/net/ipv4/raw.c
++++ b/net/ipv4/raw.c
+@@ -131,18 +131,20 @@ found:
+  *	0 - deliver
+  *	1 - block
+  */
+-static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
++static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
+ {
+-	int type;
++	struct icmphdr _hdr;
++	const struct icmphdr *hdr;
+ 
+-	if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
++	hdr = skb_header_pointer(skb, skb_transport_offset(skb),
++				 sizeof(_hdr), &_hdr);
++	if (!hdr)
+ 		return 1;
+ 
+-	type = icmp_hdr(skb)->type;
+-	if (type < 32) {
++	if (hdr->type < 32) {
+ 		__u32 data = raw_sk(sk)->filter.data;
+ 
+-		return ((1 << type) & data) != 0;
++		return ((1U << hdr->type) & data) != 0;
+ 	}
+ 
+ 	/* Do not block unknown ICMP types */
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 043d49b..7397ad8 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1589,8 +1589,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ 		}
+ 
+ #ifdef CONFIG_NET_DMA
+-		if (tp->ucopy.dma_chan)
+-			dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
++		if (tp->ucopy.dma_chan) {
++			if (tp->rcv_wnd == 0 &&
++			    !skb_queue_empty(&sk->sk_async_wait_queue)) {
++				tcp_service_net_dma(sk, true);
++				tcp_cleanup_rbuf(sk, copied);
++			} else
++				dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
++		}
+ #endif
+ 		if (copied >= target) {
+ 			/* Do not sleep, just process backlog. */
+diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
+index 43242e6..42853c4 100644
+--- a/net/ipv6/mip6.c
++++ b/net/ipv6/mip6.c
+@@ -84,28 +84,30 @@ static int mip6_mh_len(int type)
+ 
+ static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
+ {
+-	struct ip6_mh *mh;
++	struct ip6_mh _hdr;
++	const struct ip6_mh *mh;
+ 
+-	if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) ||
+-	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
+-				 ((skb_transport_header(skb)[1] + 1) << 3))))
++	mh = skb_header_pointer(skb, skb_transport_offset(skb),
++				sizeof(_hdr), &_hdr);
++	if (!mh)
+ 		return -1;
+ 
+-	mh = (struct ip6_mh *)skb_transport_header(skb);
++	if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len)
++		return -1;
+ 
+ 	if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
+ 		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
+ 			       mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
+-		mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) -
+-					 skb_network_header(skb)));
++		mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) +
++				skb_network_header_len(skb));
+ 		return -1;
+ 	}
+ 
+ 	if (mh->ip6mh_proto != IPPROTO_NONE) {
+ 		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
+ 			       mh->ip6mh_proto);
+-		mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) -
+-					 skb_network_header(skb)));
++		mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) +
++				skb_network_header_len(skb));
+ 		return -1;
+ 	}
+ 
+diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
+index 361ebf3..6e6c2c4 100644
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -107,21 +107,20 @@ found:
+  *	0 - deliver
+  *	1 - block
+  */
+-static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
++static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb)
+ {
+-	struct icmp6hdr *icmph;
+-	struct raw6_sock *rp = raw6_sk(sk);
+-
+-	if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) {
+-		__u32 *data = &rp->filter.data[0];
+-		int bit_nr;
++	struct icmp6hdr *_hdr;
++	const struct icmp6hdr *hdr;
+ 
+-		icmph = (struct icmp6hdr *) skb->data;
+-		bit_nr = icmph->icmp6_type;
++	hdr = skb_header_pointer(skb, skb_transport_offset(skb),
++				 sizeof(_hdr), &_hdr);
++	if (hdr) {
++		const __u32 *data = &raw6_sk(sk)->filter.data[0];
++		unsigned int type = hdr->icmp6_type;
+ 
+-		return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0;
++		return (data[type >> 5] & (1U << (type & 31))) != 0;
+ 	}
+-	return 0;
++	return 1;
+ }
+ 
+ #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 2e21751..488a1b7 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1435,17 +1435,18 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
+ 	struct fib6_table *table;
+ 	struct net *net = dev_net(rt->rt6i_dev);
+ 
+-	if (rt == net->ipv6.ip6_null_entry)
+-		return -ENOENT;
++	if (rt == net->ipv6.ip6_null_entry) {
++		err = -ENOENT;
++		goto out;
++	}
+ 
+ 	table = rt->rt6i_table;
+ 	write_lock_bh(&table->tb6_lock);
+-
+ 	err = fib6_del(rt, info);
+-	dst_release(&rt->dst);
+-
+ 	write_unlock_bh(&table->tb6_lock);
+ 
++out:
++	dst_release(&rt->dst);
+ 	return err;
+ }
+ 
+diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
+index 3c55f63..2cef50b 100644
+--- a/net/l2tp/l2tp_eth.c
++++ b/net/l2tp/l2tp_eth.c
+@@ -132,7 +132,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
+ 		printk("\n");
+ 	}
+ 
+-	if (!pskb_may_pull(skb, sizeof(ETH_HLEN)))
++	if (!pskb_may_pull(skb, ETH_HLEN))
+ 		goto error;
+ 
+ 	secpath_reset(skb);
+diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
+index 732152f..f156382 100644
+--- a/net/netrom/af_netrom.c
++++ b/net/netrom/af_netrom.c
+@@ -1170,7 +1170,12 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
+ 		msg->msg_flags |= MSG_TRUNC;
+ 	}
+ 
+-	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
++	er = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
++	if (er < 0) {
++		skb_free_datagram(sk, skb);
++		release_sock(sk);
++		return er;
++	}
+ 
+ 	if (sax != NULL) {
+ 		sax->sax25_family = AF_NETROM;
+diff --git a/net/rds/recv.c b/net/rds/recv.c
+index bc3f8cd..fc57d31 100644
+--- a/net/rds/recv.c
++++ b/net/rds/recv.c
+@@ -410,6 +410,8 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+ 
+ 	rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo);
+ 
++	msg->msg_namelen = 0;
++
+ 	if (msg_flags & MSG_OOB)
+ 		goto out;
+ 
+@@ -485,6 +487,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+ 			sin->sin_port = inc->i_hdr.h_sport;
+ 			sin->sin_addr.s_addr = inc->i_saddr;
+ 			memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
++			msg->msg_namelen = sizeof(*sin);
+ 		}
+ 		break;
+ 	}
+diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
+index 24d94c0..599f67a 100644
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -250,10 +250,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+ 			else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
+ 				cl = defmap[TC_PRIO_BESTEFFORT];
+ 
+-			if (cl == NULL || cl->level >= head->level)
++			if (cl == NULL)
+ 				goto fallback;
+ 		}
+-
++		if (cl->level >= head->level)
++			goto fallback;
+ #ifdef CONFIG_NET_CLS_ACT
+ 		switch (result) {
+ 		case TC_ACT_QUEUED:
+diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
+index 7b03254..ca0fb48 100644
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -829,7 +829,10 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
+ 		if (mask) {
+ 			struct qfq_group *next = qfq_ffs(q, mask);
+ 			if (qfq_gt(roundedF, next->F)) {
+-				cl->S = next->F;
++				if (qfq_gt(limit, next->F))
++					cl->S = next->F;
++				else /* preserve timestamp correctness */
++					cl->S = limit;
+ 				return;
+ 			}
+ 		}
+diff --git a/net/sctp/output.c b/net/sctp/output.c
+index 8fc4dcd..32ba8d0 100644
+--- a/net/sctp/output.c
++++ b/net/sctp/output.c
+@@ -334,6 +334,25 @@ finish:
+ 	return retval;
+ }
+ 
++static void sctp_packet_release_owner(struct sk_buff *skb)
++{
++	sk_free(skb->sk);
++}
++
++static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
++{
++	skb_orphan(skb);
++	skb->sk = sk;
++	skb->destructor = sctp_packet_release_owner;
++
++	/*
++	 * The data chunks have already been accounted for in sctp_sendmsg(),
++	 * therefore only reserve a single byte to keep socket around until
++	 * the packet has been transmitted.
++	 */
++	atomic_inc(&sk->sk_wmem_alloc);
++}
++
+ /* All packets are sent to the network through this function from
+  * sctp_outq_tail().
+  *
+@@ -375,7 +394,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
+ 	/* Set the owning socket so that we know where to get the
+ 	 * destination IP address.
+ 	 */
+-	skb_set_owner_w(nskb, sk);
++	sctp_packet_set_owner_w(nskb, sk);
+ 
+ 	if (!sctp_transport_dst_check(tp)) {
+ 		sctp_transport_route(tp, NULL, sctp_sk(sk));
+diff --git a/net/wireless/reg.c b/net/wireless/reg.c
+index d57d05b..fa39731 100644
+--- a/net/wireless/reg.c
++++ b/net/wireless/reg.c
+@@ -331,6 +331,9 @@ static void reg_regdb_search(struct work_struct *work)
+ 	struct reg_regdb_search_request *request;
+ 	const struct ieee80211_regdomain *curdom, *regdom;
+ 	int i, r;
++	bool set_reg = false;
++
++	mutex_lock(&cfg80211_mutex);
+ 
+ 	mutex_lock(&reg_regdb_search_mutex);
+ 	while (!list_empty(&reg_regdb_search_list)) {
+@@ -346,9 +349,7 @@ static void reg_regdb_search(struct work_struct *work)
+ 				r = reg_copy_regd(&regdom, curdom);
+ 				if (r)
+ 					break;
+-				mutex_lock(&cfg80211_mutex);
+-				set_regdom(regdom);
+-				mutex_unlock(&cfg80211_mutex);
++				set_reg = true;
+ 				break;
+ 			}
+ 		}
+@@ -356,6 +357,11 @@ static void reg_regdb_search(struct work_struct *work)
+ 		kfree(request);
+ 	}
+ 	mutex_unlock(&reg_regdb_search_mutex);
++
++	if (set_reg)
++		set_regdom(regdom);
++
++	mutex_unlock(&cfg80211_mutex);
+ }
+ 
+ static DECLARE_WORK(reg_regdb_work, reg_regdb_search);
+diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
+index 54a0dc2..ab2bb42 100644
+--- a/net/xfrm/xfrm_input.c
++++ b/net/xfrm/xfrm_input.c
+@@ -212,7 +212,7 @@ resume:
+ 		/* only the first xfrm gets the encap type */
+ 		encap_type = 0;
+ 
+-		if (async && x->repl->check(x, skb, seq)) {
++		if (async && x->repl->recheck(x, skb, seq)) {
+ 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
+ 			goto drop_unlock;
+ 		}
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index 0174034..113d20e 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -1761,7 +1761,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family,
+ 
+ 	if (!afinfo) {
+ 		dst_release(dst_orig);
+-		ret = ERR_PTR(-EINVAL);
++		return ERR_PTR(-EINVAL);
+ 	} else {
+ 		ret = afinfo->blackhole_route(net, dst_orig);
+ 	}
+diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
+index 2f6d11d..3efb07d 100644
+--- a/net/xfrm/xfrm_replay.c
++++ b/net/xfrm/xfrm_replay.c
+@@ -420,6 +420,18 @@ err:
+ 	return -EINVAL;
+ }
+ 
++static int xfrm_replay_recheck_esn(struct xfrm_state *x,
++				   struct sk_buff *skb, __be32 net_seq)
++{
++	if (unlikely(XFRM_SKB_CB(skb)->seq.input.hi !=
++		     htonl(xfrm_replay_seqhi(x, net_seq)))) {
++			x->stats.replay_window++;
++			return -EINVAL;
++	}
++
++	return xfrm_replay_check_esn(x, skb, net_seq);
++}
++
+ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
+ {
+ 	unsigned int bitnr, nr, i;
+@@ -479,6 +491,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
+ static struct xfrm_replay xfrm_replay_legacy = {
+ 	.advance	= xfrm_replay_advance,
+ 	.check		= xfrm_replay_check,
++	.recheck	= xfrm_replay_check,
+ 	.notify		= xfrm_replay_notify,
+ 	.overflow	= xfrm_replay_overflow,
+ };
+@@ -486,6 +499,7 @@ static struct xfrm_replay xfrm_replay_legacy = {
+ static struct xfrm_replay xfrm_replay_bmp = {
+ 	.advance	= xfrm_replay_advance_bmp,
+ 	.check		= xfrm_replay_check_bmp,
++	.recheck	= xfrm_replay_check_bmp,
+ 	.notify		= xfrm_replay_notify_bmp,
+ 	.overflow	= xfrm_replay_overflow_bmp,
+ };
+@@ -493,6 +507,7 @@ static struct xfrm_replay xfrm_replay_bmp = {
+ static struct xfrm_replay xfrm_replay_esn = {
+ 	.advance	= xfrm_replay_advance_esn,
+ 	.check		= xfrm_replay_check_esn,
++	.recheck	= xfrm_replay_recheck_esn,
+ 	.notify		= xfrm_replay_notify_bmp,
+ 	.overflow	= xfrm_replay_overflow_esn,
+ };
+diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
+index 7cae73e..ede01a8 100644
+--- a/net/xfrm/xfrm_user.c
++++ b/net/xfrm/xfrm_user.c
+@@ -123,9 +123,21 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
+ 				struct nlattr **attrs)
+ {
+ 	struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
++	struct xfrm_replay_state_esn *rs;
+ 
+-	if ((p->flags & XFRM_STATE_ESN) && !rt)
+-		return -EINVAL;
++	if (p->flags & XFRM_STATE_ESN) {
++		if (!rt)
++			return -EINVAL;
++
++		rs = nla_data(rt);
++
++		if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
++			return -EINVAL;
++
++		if (nla_len(rt) < xfrm_replay_state_esn_len(rs) &&
++		    nla_len(rt) != sizeof(*rs))
++			return -EINVAL;
++	}
+ 
+ 	if (!rt)
+ 		return 0;
+@@ -370,14 +382,15 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
+ 					 struct nlattr *rp)
+ {
+ 	struct xfrm_replay_state_esn *up;
++	int ulen;
+ 
+ 	if (!replay_esn || !rp)
+ 		return 0;
+ 
+ 	up = nla_data(rp);
++	ulen = xfrm_replay_state_esn_len(up);
+ 
+-	if (xfrm_replay_state_esn_len(replay_esn) !=
+-			xfrm_replay_state_esn_len(up))
++	if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen)
+ 		return -EINVAL;
+ 
+ 	return 0;
+@@ -388,22 +401,28 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn
+ 				       struct nlattr *rta)
+ {
+ 	struct xfrm_replay_state_esn *p, *pp, *up;
++	int klen, ulen;
+ 
+ 	if (!rta)
+ 		return 0;
+ 
+ 	up = nla_data(rta);
++	klen = xfrm_replay_state_esn_len(up);
++	ulen = nla_len(rta) >= klen ? klen : sizeof(*up);
+ 
+-	p = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL);
++	p = kzalloc(klen, GFP_KERNEL);
+ 	if (!p)
+ 		return -ENOMEM;
+ 
+-	pp = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL);
++	pp = kzalloc(klen, GFP_KERNEL);
+ 	if (!pp) {
+ 		kfree(p);
+ 		return -ENOMEM;
+ 	}
+ 
++	memcpy(p, up, ulen);
++	memcpy(pp, up, ulen);
++
+ 	*replay_esn = p;
+ 	*preplay_esn = pp;
+ 
+@@ -442,10 +461,11 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *
+  * somehow made shareable and move it to xfrm_state.c - JHS
+  *
+ */
+-static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs)
++static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs,
++				  int update_esn)
+ {
+ 	struct nlattr *rp = attrs[XFRMA_REPLAY_VAL];
+-	struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL];
++	struct nlattr *re = update_esn ? attrs[XFRMA_REPLAY_ESN_VAL] : NULL;
+ 	struct nlattr *lt = attrs[XFRMA_LTIME_VAL];
+ 	struct nlattr *et = attrs[XFRMA_ETIMER_THRESH];
+ 	struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
+@@ -555,7 +575,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
+ 		goto error;
+ 
+ 	/* override default values from above */
+-	xfrm_update_ae_params(x, attrs);
++	xfrm_update_ae_params(x, attrs, 0);
+ 
+ 	return x;
+ 
+@@ -689,6 +709,7 @@ out:
+ 
+ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
+ {
++	memset(p, 0, sizeof(*p));
+ 	memcpy(&p->id, &x->id, sizeof(p->id));
+ 	memcpy(&p->sel, &x->sel, sizeof(p->sel));
+ 	memcpy(&p->lft, &x->lft, sizeof(p->lft));
+@@ -742,7 +763,7 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb)
+ 		return -EMSGSIZE;
+ 
+ 	algo = nla_data(nla);
+-	strcpy(algo->alg_name, auth->alg_name);
++	strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name));
+ 	memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8);
+ 	algo->alg_key_len = auth->alg_key_len;
+ 
+@@ -862,6 +883,7 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
+ {
+ 	struct xfrm_dump_info info;
+ 	struct sk_buff *skb;
++	int err;
+ 
+ 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ 	if (!skb)
+@@ -872,9 +894,10 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
+ 	info.nlmsg_seq = seq;
+ 	info.nlmsg_flags = 0;
+ 
+-	if (dump_one_state(x, 0, &info)) {
++	err = dump_one_state(x, 0, &info);
++	if (err) {
+ 		kfree_skb(skb);
+-		return NULL;
++		return ERR_PTR(err);
+ 	}
+ 
+ 	return skb;
+@@ -1297,6 +1320,7 @@ static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy
+ 
+ static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir)
+ {
++	memset(p, 0, sizeof(*p));
+ 	memcpy(&p->sel, &xp->selector, sizeof(p->sel));
+ 	memcpy(&p->lft, &xp->lft, sizeof(p->lft));
+ 	memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft));
+@@ -1401,6 +1425,7 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb)
+ 		struct xfrm_user_tmpl *up = &vec[i];
+ 		struct xfrm_tmpl *kp = &xp->xfrm_vec[i];
+ 
++		memset(up, 0, sizeof(*up));
+ 		memcpy(&up->id, &kp->id, sizeof(up->id));
+ 		up->family = kp->encap_family;
+ 		memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr));
+@@ -1529,6 +1554,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
+ {
+ 	struct xfrm_dump_info info;
+ 	struct sk_buff *skb;
++	int err;
+ 
+ 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ 	if (!skb)
+@@ -1539,9 +1565,10 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
+ 	info.nlmsg_seq = seq;
+ 	info.nlmsg_flags = 0;
+ 
+-	if (dump_one_policy(xp, dir, 0, &info) < 0) {
++	err = dump_one_policy(xp, dir, 0, &info);
++	if (err) {
+ 		kfree_skb(skb);
+-		return NULL;
++		return ERR_PTR(err);
+ 	}
+ 
+ 	return skb;
+@@ -1794,7 +1821,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
+ 		goto out;
+ 
+ 	spin_lock_bh(&x->lock);
+-	xfrm_update_ae_params(x, attrs);
++	xfrm_update_ae_params(x, attrs, 1);
+ 	spin_unlock_bh(&x->lock);
+ 
+ 	c.event = nlh->nlmsg_type;
+diff --git a/sound/soc/samsung/dma.c b/sound/soc/samsung/dma.c
+index a68b264..a9a593a 100644
+--- a/sound/soc/samsung/dma.c
++++ b/sound/soc/samsung/dma.c
+@@ -34,9 +34,7 @@ static const struct snd_pcm_hardware dma_hardware = {
+ 	.info			= SNDRV_PCM_INFO_INTERLEAVED |
+ 				    SNDRV_PCM_INFO_BLOCK_TRANSFER |
+ 				    SNDRV_PCM_INFO_MMAP |
+-				    SNDRV_PCM_INFO_MMAP_VALID |
+-				    SNDRV_PCM_INFO_PAUSE |
+-				    SNDRV_PCM_INFO_RESUME,
++				    SNDRV_PCM_INFO_MMAP_VALID,
+ 	.formats		= SNDRV_PCM_FMTBIT_S16_LE |
+ 				    SNDRV_PCM_FMTBIT_U16_LE |
+ 				    SNDRV_PCM_FMTBIT_U8 |
+@@ -246,15 +244,11 @@ static int dma_trigger(struct snd_pcm_substream *substream, int cmd)
+ 
+ 	switch (cmd) {
+ 	case SNDRV_PCM_TRIGGER_START:
+-	case SNDRV_PCM_TRIGGER_RESUME:
+-	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+ 		prtd->state |= ST_RUNNING;
+ 		prtd->params->ops->trigger(prtd->params->ch);
+ 		break;
+ 
+ 	case SNDRV_PCM_TRIGGER_STOP:
+-	case SNDRV_PCM_TRIGGER_SUSPEND:
+-	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+ 		prtd->state &= ~ST_RUNNING;
+ 		prtd->params->ops->stop(prtd->params->ch);
+ 		break;
diff --git a/3.2.34/bump/1031_linux-3.2.32.patch b/3.2.34/bump/1031_linux-3.2.32.patch
new file mode 100644
index 0000000..247fc0b
--- /dev/null
+++ b/3.2.34/bump/1031_linux-3.2.32.patch
@@ -0,0 +1,6206 @@
+diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c
+index c095d79..288dba6 100644
+--- a/Documentation/virtual/lguest/lguest.c
++++ b/Documentation/virtual/lguest/lguest.c
+@@ -1299,6 +1299,7 @@ static struct device *new_device(const char *name, u16 type)
+ 	dev->feature_len = 0;
+ 	dev->num_vq = 0;
+ 	dev->running = false;
++	dev->next = NULL;
+ 
+ 	/*
+ 	 * Append to device list.  Prepending to a single-linked list is
+diff --git a/Makefile b/Makefile
+index fd9c414..b6d8282 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 3
+ PATCHLEVEL = 2
+-SUBLEVEL = 31
++SUBLEVEL = 32
+ EXTRAVERSION =
+ NAME = Saber-toothed Squirrel
+ 
+diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c
+index a6cbb71..04e703a 100644
+--- a/arch/arm/plat-omap/counter_32k.c
++++ b/arch/arm/plat-omap/counter_32k.c
+@@ -82,22 +82,29 @@ static void notrace omap_update_sched_clock(void)
+  * nsecs and adds to a monotonically increasing timespec.
+  */
+ static struct timespec persistent_ts;
+-static cycles_t cycles, last_cycles;
++static cycles_t cycles;
+ static unsigned int persistent_mult, persistent_shift;
++static DEFINE_SPINLOCK(read_persistent_clock_lock);
++
+ void read_persistent_clock(struct timespec *ts)
+ {
+ 	unsigned long long nsecs;
+-	cycles_t delta;
+-	struct timespec *tsp = &persistent_ts;
++	cycles_t last_cycles;
++	unsigned long flags;
++
++	spin_lock_irqsave(&read_persistent_clock_lock, flags);
+ 
+ 	last_cycles = cycles;
+ 	cycles = timer_32k_base ? __raw_readl(timer_32k_base) : 0;
+-	delta = cycles - last_cycles;
+ 
+-	nsecs = clocksource_cyc2ns(delta, persistent_mult, persistent_shift);
++	nsecs = clocksource_cyc2ns(cycles - last_cycles,
++					persistent_mult, persistent_shift);
++
++	timespec_add_ns(&persistent_ts, nsecs);
++
++	*ts = persistent_ts;
+ 
+-	timespec_add_ns(tsp, nsecs);
+-	*ts = *tsp;
++	spin_unlock_irqrestore(&read_persistent_clock_lock, flags);
+ }
+ 
+ int __init omap_init_clocksource_32k(void)
+diff --git a/arch/mips/Makefile b/arch/mips/Makefile
+index 0be3186..aaf7444 100644
+--- a/arch/mips/Makefile
++++ b/arch/mips/Makefile
+@@ -224,7 +224,7 @@ KBUILD_CPPFLAGS += -D"DATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)"
+ LDFLAGS			+= -m $(ld-emul)
+ 
+ ifdef CONFIG_MIPS
+-CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -xc /dev/null | \
++CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
+ 	egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \
+ 	sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/")
+ ifdef CONFIG_64BIT
+diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
+index 1a96618..ce7dd99 100644
+--- a/arch/mips/kernel/Makefile
++++ b/arch/mips/kernel/Makefile
+@@ -102,7 +102,7 @@ obj-$(CONFIG_MIPS_MACHINE)	+= mips_machine.o
+ 
+ obj-$(CONFIG_OF)		+= prom.o
+ 
+-CFLAGS_cpu-bugs64.o	= $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi)
++CFLAGS_cpu-bugs64.o	= $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -x c /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi)
+ 
+ obj-$(CONFIG_HAVE_STD_PC_SERIAL_PORT)	+= 8250-platform.o
+ 
+diff --git a/arch/mn10300/Makefile b/arch/mn10300/Makefile
+index 7120282..3eb4a52 100644
+--- a/arch/mn10300/Makefile
++++ b/arch/mn10300/Makefile
+@@ -26,7 +26,7 @@ CHECKFLAGS	+=
+ PROCESSOR	:= unset
+ UNIT		:= unset
+ 
+-KBUILD_CFLAGS	+= -mam33 -mmem-funcs -DCPU=AM33
++KBUILD_CFLAGS	+= -mam33 -DCPU=AM33 $(call cc-option,-mmem-funcs,)
+ KBUILD_AFLAGS	+= -mam33 -DCPU=AM33
+ 
+ ifeq ($(CONFIG_MN10300_CURRENT_IN_E2),y)
+diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
+index 1b6cb10..a0a4e8a 100644
+--- a/arch/powerpc/platforms/pseries/eeh_driver.c
++++ b/arch/powerpc/platforms/pseries/eeh_driver.c
+@@ -25,6 +25,7 @@
+ #include <linux/delay.h>
+ #include <linux/interrupt.h>
+ #include <linux/irq.h>
++#include <linux/module.h>
+ #include <linux/pci.h>
+ #include <asm/eeh.h>
+ #include <asm/eeh_event.h>
+@@ -41,6 +42,41 @@ static inline const char * pcid_name (struct pci_dev *pdev)
+ 	return "";
+ }
+ 
++/**
++ * eeh_pcid_get - Get the PCI device driver
++ * @pdev: PCI device
++ *
++ * The function is used to retrieve the PCI device driver for
++ * the indicated PCI device. Besides, we will increase the reference
++ * of the PCI device driver to prevent that being unloaded on
++ * the fly. Otherwise, kernel crash would be seen.
++ */
++static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
++{
++	if (!pdev || !pdev->driver)
++		return NULL;
++
++	if (!try_module_get(pdev->driver->driver.owner))
++		return NULL;
++
++	return pdev->driver;
++}
++
++/**
++ * eeh_pcid_put - Dereference on the PCI device driver
++ * @pdev: PCI device
++ *
++ * The function is called to do dereference on the PCI device
++ * driver of the indicated PCI device.
++ */
++static inline void eeh_pcid_put(struct pci_dev *pdev)
++{
++	if (!pdev || !pdev->driver)
++		return;
++
++	module_put(pdev->driver->driver.owner);
++}
++
+ #if 0
+ static void print_device_node_tree(struct pci_dn *pdn, int dent)
+ {
+@@ -109,18 +145,20 @@ static void eeh_enable_irq(struct pci_dev *dev)
+ static int eeh_report_error(struct pci_dev *dev, void *userdata)
+ {
+ 	enum pci_ers_result rc, *res = userdata;
+-	struct pci_driver *driver = dev->driver;
++	struct pci_driver *driver;
+ 
+ 	dev->error_state = pci_channel_io_frozen;
+ 
+-	if (!driver)
+-		return 0;
++	driver = eeh_pcid_get(dev);
++	if (!driver) return 0;
+ 
+ 	eeh_disable_irq(dev);
+ 
+ 	if (!driver->err_handler ||
+-	    !driver->err_handler->error_detected)
++	    !driver->err_handler->error_detected) {
++		eeh_pcid_put(dev);
+ 		return 0;
++	}
+ 
+ 	rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen);
+ 
+@@ -128,6 +166,7 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata)
+ 	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+ 	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+ 
++	eeh_pcid_put(dev);
+ 	return 0;
+ }
+ 
+@@ -142,12 +181,15 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata)
+ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
+ {
+ 	enum pci_ers_result rc, *res = userdata;
+-	struct pci_driver *driver = dev->driver;
++	struct pci_driver *driver;
+ 
+-	if (!driver ||
+-	    !driver->err_handler ||
+-	    !driver->err_handler->mmio_enabled)
++	driver = eeh_pcid_get(dev);
++	if (!driver) return 0;
++	if (!driver->err_handler ||
++	    !driver->err_handler->mmio_enabled) {
++		eeh_pcid_put(dev);
+ 		return 0;
++	}
+ 
+ 	rc = driver->err_handler->mmio_enabled (dev);
+ 
+@@ -155,6 +197,7 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
+ 	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+ 	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+ 
++	eeh_pcid_put(dev);
+ 	return 0;
+ }
+ 
+@@ -165,18 +208,20 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
+ static int eeh_report_reset(struct pci_dev *dev, void *userdata)
+ {
+ 	enum pci_ers_result rc, *res = userdata;
+-	struct pci_driver *driver = dev->driver;
+-
+-	if (!driver)
+-		return 0;
++	struct pci_driver *driver;
+ 
+ 	dev->error_state = pci_channel_io_normal;
+ 
++	driver = eeh_pcid_get(dev);
++	if (!driver) return 0;
++
+ 	eeh_enable_irq(dev);
+ 
+ 	if (!driver->err_handler ||
+-	    !driver->err_handler->slot_reset)
++	    !driver->err_handler->slot_reset) {
++		eeh_pcid_put(dev);
+ 		return 0;
++	}
+ 
+ 	rc = driver->err_handler->slot_reset(dev);
+ 	if ((*res == PCI_ERS_RESULT_NONE) ||
+@@ -184,6 +229,7 @@ static int eeh_report_reset(struct pci_dev *dev, void *userdata)
+ 	if (*res == PCI_ERS_RESULT_DISCONNECT &&
+ 	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+ 
++	eeh_pcid_put(dev);
+ 	return 0;
+ }
+ 
+@@ -193,21 +239,24 @@ static int eeh_report_reset(struct pci_dev *dev, void *userdata)
+ 
+ static int eeh_report_resume(struct pci_dev *dev, void *userdata)
+ {
+-	struct pci_driver *driver = dev->driver;
++	struct pci_driver *driver;
+ 
+ 	dev->error_state = pci_channel_io_normal;
+ 
+-	if (!driver)
+-		return 0;
++	driver = eeh_pcid_get(dev);
++	if (!driver) return 0;
+ 
+ 	eeh_enable_irq(dev);
+ 
+ 	if (!driver->err_handler ||
+-	    !driver->err_handler->resume)
++	    !driver->err_handler->resume) {
++		eeh_pcid_put(dev);
+ 		return 0;
++	}
+ 
+ 	driver->err_handler->resume(dev);
+ 
++	eeh_pcid_put(dev);
+ 	return 0;
+ }
+ 
+@@ -220,21 +269,24 @@ static int eeh_report_resume(struct pci_dev *dev, void *userdata)
+ 
+ static int eeh_report_failure(struct pci_dev *dev, void *userdata)
+ {
+-	struct pci_driver *driver = dev->driver;
++	struct pci_driver *driver;
+ 
+ 	dev->error_state = pci_channel_io_perm_failure;
+ 
+-	if (!driver)
+-		return 0;
++	driver = eeh_pcid_get(dev);
++	if (!driver) return 0;
+ 
+ 	eeh_disable_irq(dev);
+ 
+ 	if (!driver->err_handler ||
+-	    !driver->err_handler->error_detected)
++	    !driver->err_handler->error_detected) {
++		eeh_pcid_put(dev);
+ 		return 0;
++	}
+ 
+ 	driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
+ 
++	eeh_pcid_put(dev);
+ 	return 0;
+ }
+ 
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index 18601c8..884507e 100644
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -146,8 +146,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
+ 
+ static inline int pmd_large(pmd_t pte)
+ {
+-	return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
+-		(_PAGE_PSE | _PAGE_PRESENT);
++	return pmd_flags(pte) & _PAGE_PSE;
+ }
+ 
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+@@ -415,7 +414,13 @@ static inline int pte_hidden(pte_t pte)
+ 
+ static inline int pmd_present(pmd_t pmd)
+ {
+-	return pmd_flags(pmd) & _PAGE_PRESENT;
++	/*
++	 * Checking for _PAGE_PSE is needed too because
++	 * split_huge_page will temporarily clear the present bit (but
++	 * the _PAGE_PSE flag will remain set at all times while the
++	 * _PAGE_PRESENT bit is clear).
++	 */
++	return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
+ }
+ 
+ static inline int pmd_none(pmd_t pmd)
+diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
+index 37718f0..4d320b2 100644
+--- a/arch/x86/platform/efi/efi.c
++++ b/arch/x86/platform/efi/efi.c
+@@ -731,6 +731,7 @@ void __init efi_enter_virtual_mode(void)
+ 	 *
+ 	 * Call EFI services through wrapper functions.
+ 	 */
++	efi.runtime_version = efi_systab.fw_revision;
+ 	efi.get_time = virt_efi_get_time;
+ 	efi.set_time = virt_efi_set_time;
+ 	efi.get_wakeup_time = virt_efi_get_wakeup_time;
+diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
+index 9ecec98..5016de5 100644
+--- a/drivers/acpi/bus.c
++++ b/drivers/acpi/bus.c
+@@ -950,8 +950,6 @@ static int __init acpi_bus_init(void)
+ 	status = acpi_ec_ecdt_probe();
+ 	/* Ignore result. Not having an ECDT is not fatal. */
+ 
+-	acpi_bus_osc_support();
+-
+ 	status = acpi_initialize_objects(ACPI_FULL_INITIALIZATION);
+ 	if (ACPI_FAILURE(status)) {
+ 		printk(KERN_ERR PREFIX "Unable to initialize ACPI objects\n");
+@@ -959,6 +957,12 @@ static int __init acpi_bus_init(void)
+ 	}
+ 
+ 	/*
++	 * _OSC method may exist in module level code,
++	 * so it must be run after ACPI_FULL_INITIALIZATION
++	 */
++	acpi_bus_osc_support();
++
++	/*
+ 	 * _PDC control method may load dynamic SSDT tables,
+ 	 * and we need to install the table handler before that.
+ 	 */
+diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
+index 6f95d98..1f90dab 100644
+--- a/drivers/bluetooth/btusb.c
++++ b/drivers/bluetooth/btusb.c
+@@ -108,7 +108,7 @@ static struct usb_device_id btusb_table[] = {
+ 	{ USB_DEVICE(0x413c, 0x8197) },
+ 
+ 	/* Foxconn - Hon Hai */
+-	{ USB_DEVICE(0x0489, 0xe033) },
++	{ USB_VENDOR_AND_INTERFACE_INFO(0x0489, 0xff, 0x01, 0x01) },
+ 
+ 	/*Broadcom devices with vendor specific id */
+ 	{ USB_VENDOR_AND_INTERFACE_INFO(0x0a5c, 0xff, 0x01, 0x01) },
+diff --git a/drivers/char/ttyprintk.c b/drivers/char/ttyprintk.c
+index eedd547..5936691 100644
+--- a/drivers/char/ttyprintk.c
++++ b/drivers/char/ttyprintk.c
+@@ -67,7 +67,7 @@ static int tpk_printk(const unsigned char *buf, int count)
+ 				tmp[tpk_curr + 1] = '\0';
+ 				printk(KERN_INFO "%s%s", tpk_tag, tmp);
+ 				tpk_curr = 0;
+-				if (buf[i + 1] == '\n')
++				if ((i + 1) < count && buf[i + 1] == '\n')
+ 					i++;
+ 				break;
+ 			case '\n':
+diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
+index b48967b..5991114 100644
+--- a/drivers/dma/dmaengine.c
++++ b/drivers/dma/dmaengine.c
+@@ -564,8 +564,8 @@ void dmaengine_get(void)
+ 				list_del_rcu(&device->global_node);
+ 				break;
+ 			} else if (err)
+-				pr_err("dmaengine: failed to get %s: (%d)\n",
+-				       dma_chan_name(chan), err);
++				pr_debug("%s: failed to get %s: (%d)\n",
++				       __func__, dma_chan_name(chan), err);
+ 		}
+ 	}
+ 
+diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
+index 4799393..b97d4f0 100644
+--- a/drivers/firewire/core-cdev.c
++++ b/drivers/firewire/core-cdev.c
+@@ -471,8 +471,8 @@ static int ioctl_get_info(struct client *client, union ioctl_arg *arg)
+ 	client->bus_reset_closure = a->bus_reset_closure;
+ 	if (a->bus_reset != 0) {
+ 		fill_bus_reset_event(&bus_reset, client);
+-		ret = copy_to_user(u64_to_uptr(a->bus_reset),
+-				   &bus_reset, sizeof(bus_reset));
++		/* unaligned size of bus_reset is 36 bytes */
++		ret = copy_to_user(u64_to_uptr(a->bus_reset), &bus_reset, 36);
+ 	}
+ 	if (ret == 0 && list_empty(&client->link))
+ 		list_add_tail(&client->link, &client->device->client_list);
+diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
+index 0535c21..3e60e8d 100644
+--- a/drivers/firmware/efivars.c
++++ b/drivers/firmware/efivars.c
+@@ -435,12 +435,23 @@ efivar_attr_read(struct efivar_entry *entry, char *buf)
+ 	if (status != EFI_SUCCESS)
+ 		return -EIO;
+ 
+-	if (var->Attributes & 0x1)
++	if (var->Attributes & EFI_VARIABLE_NON_VOLATILE)
+ 		str += sprintf(str, "EFI_VARIABLE_NON_VOLATILE\n");
+-	if (var->Attributes & 0x2)
++	if (var->Attributes & EFI_VARIABLE_BOOTSERVICE_ACCESS)
+ 		str += sprintf(str, "EFI_VARIABLE_BOOTSERVICE_ACCESS\n");
+-	if (var->Attributes & 0x4)
++	if (var->Attributes & EFI_VARIABLE_RUNTIME_ACCESS)
+ 		str += sprintf(str, "EFI_VARIABLE_RUNTIME_ACCESS\n");
++	if (var->Attributes & EFI_VARIABLE_HARDWARE_ERROR_RECORD)
++		str += sprintf(str, "EFI_VARIABLE_HARDWARE_ERROR_RECORD\n");
++	if (var->Attributes & EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS)
++		str += sprintf(str,
++			"EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS\n");
++	if (var->Attributes &
++			EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS)
++		str += sprintf(str,
++			"EFI_VARIABLE_TIME_BASED_AUTHENTICATED_WRITE_ACCESS\n");
++	if (var->Attributes & EFI_VARIABLE_APPEND_WRITE)
++		str += sprintf(str, "EFI_VARIABLE_APPEND_WRITE\n");
+ 	return str - buf;
+ }
+ 
+diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
+index e48e01e..33e1555 100644
+--- a/drivers/gpu/drm/i915/i915_gem.c
++++ b/drivers/gpu/drm/i915/i915_gem.c
+@@ -1543,16 +1543,19 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
+ 	list_move_tail(&obj->ring_list, &ring->active_list);
+ 
+ 	obj->last_rendering_seqno = seqno;
+-	if (obj->fenced_gpu_access) {
+-		struct drm_i915_fence_reg *reg;
+-
+-		BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE);
+ 
++	if (obj->fenced_gpu_access) {
+ 		obj->last_fenced_seqno = seqno;
+ 		obj->last_fenced_ring = ring;
+ 
+-		reg = &dev_priv->fence_regs[obj->fence_reg];
+-		list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
++		/* Bump MRU to take account of the delayed flush */
++		if (obj->fence_reg != I915_FENCE_REG_NONE) {
++			struct drm_i915_fence_reg *reg;
++
++			reg = &dev_priv->fence_regs[obj->fence_reg];
++			list_move_tail(&reg->lru_list,
++				       &dev_priv->mm.fence_list);
++		}
+ 	}
+ }
+ 
+@@ -1561,6 +1564,7 @@ i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
+ {
+ 	list_del_init(&obj->ring_list);
+ 	obj->last_rendering_seqno = 0;
++	obj->last_fenced_seqno = 0;
+ }
+ 
+ static void
+@@ -1589,6 +1593,7 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
+ 	BUG_ON(!list_empty(&obj->gpu_write_list));
+ 	BUG_ON(!obj->active);
+ 	obj->ring = NULL;
++	obj->last_fenced_ring = NULL;
+ 
+ 	i915_gem_object_move_off_active(obj);
+ 	obj->fenced_gpu_access = false;
+diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+index a6c2f7a..1202198 100644
+--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+@@ -574,7 +574,8 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
+ 					if (ret)
+ 						break;
+ 				}
+-				obj->pending_fenced_gpu_access = need_fence;
++				obj->pending_fenced_gpu_access =
++					!!(entry->flags & EXEC_OBJECT_NEEDS_FENCE);
+ 			}
+ 
+ 			entry->offset = obj->gtt_offset;
+diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
+index 31d334d..861223b 100644
+--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
++++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
+@@ -107,10 +107,10 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
+ 		 */
+ 		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ 		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+-	} else if (IS_MOBILE(dev)) {
++	} else if (IS_MOBILE(dev) || (IS_GEN3(dev) && !IS_G33(dev))) {
+ 		uint32_t dcc;
+ 
+-		/* On mobile 9xx chipsets, channel interleave by the CPU is
++		/* On 9xx chipsets, channel interleave by the CPU is
+ 		 * determined by DCC.  For single-channel, neither the CPU
+ 		 * nor the GPU do swizzling.  For dual channel interleaved,
+ 		 * the GPU's interleave is bit 9 and 10 for X tiled, and bit
+diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
+index c8b5bc1..2812d7b 100644
+--- a/drivers/gpu/drm/i915/i915_irq.c
++++ b/drivers/gpu/drm/i915/i915_irq.c
+@@ -530,6 +530,12 @@ static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS)
+ 	if (de_iir & DE_GSE_IVB)
+ 		intel_opregion_gse_intr(dev);
+ 
++	if (de_iir & DE_PIPEA_VBLANK_IVB)
++		drm_handle_vblank(dev, 0);
++
++	if (de_iir & DE_PIPEB_VBLANK_IVB)
++		drm_handle_vblank(dev, 1);
++
+ 	if (de_iir & DE_PLANEA_FLIP_DONE_IVB) {
+ 		intel_prepare_page_flip(dev, 0);
+ 		intel_finish_page_flip_plane(dev, 0);
+@@ -540,12 +546,6 @@ static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS)
+ 		intel_finish_page_flip_plane(dev, 1);
+ 	}
+ 
+-	if (de_iir & DE_PIPEA_VBLANK_IVB)
+-		drm_handle_vblank(dev, 0);
+-
+-	if (de_iir & DE_PIPEB_VBLANK_IVB)
+-		drm_handle_vblank(dev, 1);
+-
+ 	/* check event from PCH */
+ 	if (de_iir & DE_PCH_EVENT_IVB) {
+ 		if (pch_iir & SDE_HOTPLUG_MASK_CPT)
+@@ -622,6 +622,12 @@ static irqreturn_t ironlake_irq_handler(DRM_IRQ_ARGS)
+ 	if (de_iir & DE_GSE)
+ 		intel_opregion_gse_intr(dev);
+ 
++	if (de_iir & DE_PIPEA_VBLANK)
++		drm_handle_vblank(dev, 0);
++
++	if (de_iir & DE_PIPEB_VBLANK)
++		drm_handle_vblank(dev, 1);
++
+ 	if (de_iir & DE_PLANEA_FLIP_DONE) {
+ 		intel_prepare_page_flip(dev, 0);
+ 		intel_finish_page_flip_plane(dev, 0);
+@@ -632,12 +638,6 @@ static irqreturn_t ironlake_irq_handler(DRM_IRQ_ARGS)
+ 		intel_finish_page_flip_plane(dev, 1);
+ 	}
+ 
+-	if (de_iir & DE_PIPEA_VBLANK)
+-		drm_handle_vblank(dev, 0);
+-
+-	if (de_iir & DE_PIPEB_VBLANK)
+-		drm_handle_vblank(dev, 1);
+-
+ 	/* check event from PCH */
+ 	if (de_iir & DE_PCH_EVENT) {
+ 		if (pch_iir & hotplug_mask)
+diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
+index 4a5e662..a294a32 100644
+--- a/drivers/gpu/drm/i915/i915_reg.h
++++ b/drivers/gpu/drm/i915/i915_reg.h
+@@ -401,6 +401,9 @@
+ # define VS_TIMER_DISPATCH				(1 << 6)
+ # define MI_FLUSH_ENABLE				(1 << 11)
+ 
++#define GEN6_GT_MODE	0x20d0
++#define   GEN6_GT_MODE_HI	(1 << 9)
++
+ #define GFX_MODE	0x02520
+ #define GFX_MODE_GEN7	0x0229c
+ #define   GFX_RUN_LIST_ENABLE		(1<<15)
+@@ -1557,6 +1560,10 @@
+ 
+ /* Video Data Island Packet control */
+ #define VIDEO_DIP_DATA		0x61178
++/* Read the description of VIDEO_DIP_DATA (before Haswel) or VIDEO_DIP_ECC
++ * (Haswell and newer) to see which VIDEO_DIP_DATA byte corresponds to each byte
++ * of the infoframe structure specified by CEA-861. */
++#define   VIDEO_DIP_DATA_SIZE	32
+ #define VIDEO_DIP_CTL		0x61170
+ #define   VIDEO_DIP_ENABLE		(1 << 31)
+ #define   VIDEO_DIP_PORT_B		(1 << 29)
+diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
+index 6c3fb44..adac0dd 100644
+--- a/drivers/gpu/drm/i915/intel_display.c
++++ b/drivers/gpu/drm/i915/intel_display.c
+@@ -2850,13 +2850,34 @@ static void intel_clear_scanline_wait(struct drm_device *dev)
+ 		I915_WRITE_CTL(ring, tmp);
+ }
+ 
++static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
++{
++	struct drm_device *dev = crtc->dev;
++	struct drm_i915_private *dev_priv = dev->dev_private;
++	unsigned long flags;
++	bool pending;
++
++	if (atomic_read(&dev_priv->mm.wedged))
++		return false;
++
++	spin_lock_irqsave(&dev->event_lock, flags);
++	pending = to_intel_crtc(crtc)->unpin_work != NULL;
++	spin_unlock_irqrestore(&dev->event_lock, flags);
++
++	return pending;
++}
++
+ static void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
+ {
+ 	struct drm_device *dev = crtc->dev;
++	struct drm_i915_private *dev_priv = dev->dev_private;
+ 
+ 	if (crtc->fb == NULL)
+ 		return;
+ 
++	wait_event(dev_priv->pending_flip_queue,
++		   !intel_crtc_has_pending_flip(crtc));
++
+ 	mutex_lock(&dev->struct_mutex);
+ 	intel_finish_fb(crtc->fb);
+ 	mutex_unlock(&dev->struct_mutex);
+@@ -5027,7 +5048,7 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
+ 	/* default to 8bpc */
+ 	pipeconf &= ~(PIPECONF_BPP_MASK | PIPECONF_DITHER_EN);
+ 	if (is_dp) {
+-		if (mode->private_flags & INTEL_MODE_DP_FORCE_6BPC) {
++		if (adjusted_mode->private_flags & INTEL_MODE_DP_FORCE_6BPC) {
+ 			pipeconf |= PIPECONF_BPP_6 |
+ 				    PIPECONF_DITHER_EN |
+ 				    PIPECONF_DITHER_TYPE_SP;
+@@ -5495,7 +5516,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
+ 	/* determine panel color depth */
+ 	temp = I915_READ(PIPECONF(pipe));
+ 	temp &= ~PIPE_BPC_MASK;
+-	dither = intel_choose_pipe_bpp_dither(crtc, &pipe_bpp, mode);
++	dither = intel_choose_pipe_bpp_dither(crtc, &pipe_bpp, adjusted_mode);
+ 	switch (pipe_bpp) {
+ 	case 18:
+ 		temp |= PIPE_6BPC;
+@@ -6952,9 +6973,8 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
+ 
+ 	atomic_clear_mask(1 << intel_crtc->plane,
+ 			  &obj->pending_flip.counter);
+-	if (atomic_read(&obj->pending_flip) == 0)
+-		wake_up(&dev_priv->pending_flip_queue);
+ 
++	wake_up(&dev_priv->pending_flip_queue);
+ 	schedule_work(&work->work);
+ 
+ 	trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj);
+@@ -7193,7 +7213,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
+ 	default:
+ 		WARN_ONCE(1, "unknown plane in flip command\n");
+ 		ret = -ENODEV;
+-		goto err;
++		goto err_unpin;
+ 	}
+ 
+ 	ret = intel_ring_begin(ring, 4);
+@@ -8278,6 +8298,11 @@ static void gen6_init_clock_gating(struct drm_device *dev)
+ 			   DISPPLANE_TRICKLE_FEED_DISABLE);
+ 		intel_flush_display_plane(dev_priv, pipe);
+ 	}
++
++	/* The default value should be 0x200 according to docs, but the two
++	 * platforms I checked have a 0 for this. (Maybe BIOS overrides?) */
++	I915_WRITE(GEN6_GT_MODE, 0xffff << 16);
++	I915_WRITE(GEN6_GT_MODE, GEN6_GT_MODE_HI << 16 | GEN6_GT_MODE_HI);
+ }
+ 
+ static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
+diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
+index c2a64f4..497da2a 100644
+--- a/drivers/gpu/drm/i915/intel_hdmi.c
++++ b/drivers/gpu/drm/i915/intel_hdmi.c
+@@ -138,14 +138,20 @@ static void i9xx_write_infoframe(struct drm_encoder *encoder,
+ 
+ 	I915_WRITE(VIDEO_DIP_CTL, VIDEO_DIP_ENABLE | val | port | flags);
+ 
++	mmiowb();
+ 	for (i = 0; i < len; i += 4) {
+ 		I915_WRITE(VIDEO_DIP_DATA, *data);
+ 		data++;
+ 	}
++	/* Write every possible data byte to force correct ECC calculation. */
++	for (; i < VIDEO_DIP_DATA_SIZE; i += 4)
++		I915_WRITE(VIDEO_DIP_DATA, 0);
++	mmiowb();
+ 
+ 	flags |= intel_infoframe_flags(frame);
+ 
+ 	I915_WRITE(VIDEO_DIP_CTL, VIDEO_DIP_ENABLE | val | port | flags);
++	POSTING_READ(VIDEO_DIP_CTL);
+ }
+ 
+ static void ironlake_write_infoframe(struct drm_encoder *encoder,
+@@ -168,14 +174,20 @@ static void ironlake_write_infoframe(struct drm_encoder *encoder,
+ 
+ 	I915_WRITE(reg, VIDEO_DIP_ENABLE | val | flags);
+ 
++	mmiowb();
+ 	for (i = 0; i < len; i += 4) {
+ 		I915_WRITE(TVIDEO_DIP_DATA(intel_crtc->pipe), *data);
+ 		data++;
+ 	}
++	/* Write every possible data byte to force correct ECC calculation. */
++	for (; i < VIDEO_DIP_DATA_SIZE; i += 4)
++		I915_WRITE(TVIDEO_DIP_DATA(intel_crtc->pipe), 0);
++	mmiowb();
+ 
+ 	flags |= intel_infoframe_flags(frame);
+ 
+ 	I915_WRITE(reg, VIDEO_DIP_ENABLE | val | flags);
++	POSTING_READ(reg);
+ }
+ static void intel_set_infoframe(struct drm_encoder *encoder,
+ 				struct dip_infoframe *frame)
+@@ -546,10 +558,13 @@ void intel_hdmi_init(struct drm_device *dev, int sdvox_reg)
+ 	if (!HAS_PCH_SPLIT(dev)) {
+ 		intel_hdmi->write_infoframe = i9xx_write_infoframe;
+ 		I915_WRITE(VIDEO_DIP_CTL, 0);
++		POSTING_READ(VIDEO_DIP_CTL);
+ 	} else {
+ 		intel_hdmi->write_infoframe = ironlake_write_infoframe;
+-		for_each_pipe(i)
++		for_each_pipe(i) {
+ 			I915_WRITE(TVIDEO_DIP_CTL(i), 0);
++			POSTING_READ(TVIDEO_DIP_CTL(i));
++		}
+ 	}
+ 
+ 	drm_encoder_helper_add(&intel_encoder->base, &intel_hdmi_helper_funcs);
+diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
+index fc0633c..b61f490 100644
+--- a/drivers/gpu/drm/radeon/evergreen.c
++++ b/drivers/gpu/drm/radeon/evergreen.c
+@@ -37,6 +37,16 @@
+ #define EVERGREEN_PFP_UCODE_SIZE 1120
+ #define EVERGREEN_PM4_UCODE_SIZE 1376
+ 
++static const u32 crtc_offsets[6] =
++{
++	EVERGREEN_CRTC0_REGISTER_OFFSET,
++	EVERGREEN_CRTC1_REGISTER_OFFSET,
++	EVERGREEN_CRTC2_REGISTER_OFFSET,
++	EVERGREEN_CRTC3_REGISTER_OFFSET,
++	EVERGREEN_CRTC4_REGISTER_OFFSET,
++	EVERGREEN_CRTC5_REGISTER_OFFSET
++};
++
+ static void evergreen_gpu_init(struct radeon_device *rdev);
+ void evergreen_fini(struct radeon_device *rdev);
+ void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
+@@ -66,6 +76,27 @@ void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev)
+ 	}
+ }
+ 
++void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc)
++{
++	int i;
++
++	if (crtc >= rdev->num_crtc)
++		return;
++
++	if (RREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[crtc]) & EVERGREEN_CRTC_MASTER_EN) {
++		for (i = 0; i < rdev->usec_timeout; i++) {
++			if (!(RREG32(EVERGREEN_CRTC_STATUS + crtc_offsets[crtc]) & EVERGREEN_CRTC_V_BLANK))
++				break;
++			udelay(1);
++		}
++		for (i = 0; i < rdev->usec_timeout; i++) {
++			if (RREG32(EVERGREEN_CRTC_STATUS + crtc_offsets[crtc]) & EVERGREEN_CRTC_V_BLANK)
++				break;
++			udelay(1);
++		}
++	}
++}
++
+ void evergreen_pre_page_flip(struct radeon_device *rdev, int crtc)
+ {
+ 	/* enable the pflip int */
+@@ -1065,116 +1096,88 @@ void evergreen_agp_enable(struct radeon_device *rdev)
+ 
+ void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save)
+ {
++	u32 crtc_enabled, tmp, frame_count, blackout;
++	int i, j;
++
+ 	save->vga_render_control = RREG32(VGA_RENDER_CONTROL);
+ 	save->vga_hdp_control = RREG32(VGA_HDP_CONTROL);
+ 
+-	/* Stop all video */
++	/* disable VGA render */
+ 	WREG32(VGA_RENDER_CONTROL, 0);
+-	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 1);
+-	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 1);
+-	if (rdev->num_crtc >= 4) {
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 1);
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 1);
+-	}
+-	if (rdev->num_crtc >= 6) {
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 1);
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 1);
+-	}
+-	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
+-	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
+-	if (rdev->num_crtc >= 4) {
+-		WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
+-		WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
+-	}
+-	if (rdev->num_crtc >= 6) {
+-		WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
+-		WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
+-	}
+-	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
+-	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
+-	if (rdev->num_crtc >= 4) {
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
+-	}
+-	if (rdev->num_crtc >= 6) {
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
+-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
++	/* blank the display controllers */
++	for (i = 0; i < rdev->num_crtc; i++) {
++		crtc_enabled = RREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i]) & EVERGREEN_CRTC_MASTER_EN;
++		if (crtc_enabled) {
++			save->crtc_enabled[i] = true;
++			tmp = RREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i]);
++			if (!(tmp & EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE)) {
++				dce4_wait_for_vblank(rdev, i);
++				tmp |= EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE;
++				WREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i], tmp);
++			}
++			/* wait for the next frame */
++			frame_count = radeon_get_vblank_counter(rdev, i);
++			for (j = 0; j < rdev->usec_timeout; j++) {
++				if (radeon_get_vblank_counter(rdev, i) != frame_count)
++					break;
++				udelay(1);
++			}
++		}
+ 	}
+ 
+-	WREG32(D1VGA_CONTROL, 0);
+-	WREG32(D2VGA_CONTROL, 0);
+-	if (rdev->num_crtc >= 4) {
+-		WREG32(EVERGREEN_D3VGA_CONTROL, 0);
+-		WREG32(EVERGREEN_D4VGA_CONTROL, 0);
+-	}
+-	if (rdev->num_crtc >= 6) {
+-		WREG32(EVERGREEN_D5VGA_CONTROL, 0);
+-		WREG32(EVERGREEN_D6VGA_CONTROL, 0);
++	evergreen_mc_wait_for_idle(rdev);
++
++	blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
++	if ((blackout & BLACKOUT_MODE_MASK) != 1) {
++		/* Block CPU access */
++		WREG32(BIF_FB_EN, 0);
++		/* blackout the MC */
++		blackout &= ~BLACKOUT_MODE_MASK;
++		WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
+ 	}
+ }
+ 
+ void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save)
+ {
+-	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC0_REGISTER_OFFSET,
+-	       upper_32_bits(rdev->mc.vram_start));
+-	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC0_REGISTER_OFFSET,
+-	       upper_32_bits(rdev->mc.vram_start));
+-	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC0_REGISTER_OFFSET,
+-	       (u32)rdev->mc.vram_start);
+-	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC0_REGISTER_OFFSET,
+-	       (u32)rdev->mc.vram_start);
+-
+-	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC1_REGISTER_OFFSET,
+-	       upper_32_bits(rdev->mc.vram_start));
+-	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC1_REGISTER_OFFSET,
+-	       upper_32_bits(rdev->mc.vram_start));
+-	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC1_REGISTER_OFFSET,
+-	       (u32)rdev->mc.vram_start);
+-	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC1_REGISTER_OFFSET,
+-	       (u32)rdev->mc.vram_start);
+-
+-	if (rdev->num_crtc >= 4) {
+-		WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC2_REGISTER_OFFSET,
+-		       upper_32_bits(rdev->mc.vram_start));
+-		WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC2_REGISTER_OFFSET,
+-		       upper_32_bits(rdev->mc.vram_start));
+-		WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC2_REGISTER_OFFSET,
+-		       (u32)rdev->mc.vram_start);
+-		WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC2_REGISTER_OFFSET,
+-		       (u32)rdev->mc.vram_start);
+-
+-		WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC3_REGISTER_OFFSET,
+-		       upper_32_bits(rdev->mc.vram_start));
+-		WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC3_REGISTER_OFFSET,
+-		       upper_32_bits(rdev->mc.vram_start));
+-		WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC3_REGISTER_OFFSET,
+-		       (u32)rdev->mc.vram_start);
+-		WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC3_REGISTER_OFFSET,
+-		       (u32)rdev->mc.vram_start);
+-	}
+-	if (rdev->num_crtc >= 6) {
+-		WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC4_REGISTER_OFFSET,
+-		       upper_32_bits(rdev->mc.vram_start));
+-		WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC4_REGISTER_OFFSET,
+-		       upper_32_bits(rdev->mc.vram_start));
+-		WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC4_REGISTER_OFFSET,
+-		       (u32)rdev->mc.vram_start);
+-		WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC4_REGISTER_OFFSET,
+-		       (u32)rdev->mc.vram_start);
++	u32 tmp, frame_count;
++	int i, j;
+ 
+-		WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC5_REGISTER_OFFSET,
++	/* update crtc base addresses */
++	for (i = 0; i < rdev->num_crtc; i++) {
++		WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
+ 		       upper_32_bits(rdev->mc.vram_start));
+-		WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC5_REGISTER_OFFSET,
++		WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
+ 		       upper_32_bits(rdev->mc.vram_start));
+-		WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC5_REGISTER_OFFSET,
++		WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i],
+ 		       (u32)rdev->mc.vram_start);
+-		WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC5_REGISTER_OFFSET,
++		WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i],
+ 		       (u32)rdev->mc.vram_start);
+ 	}
+-
+ 	WREG32(EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(rdev->mc.vram_start));
+ 	WREG32(EVERGREEN_VGA_MEMORY_BASE_ADDRESS, (u32)rdev->mc.vram_start);
+-	/* Unlock host access */
++
++	/* unblackout the MC */
++	tmp = RREG32(MC_SHARED_BLACKOUT_CNTL);
++	tmp &= ~BLACKOUT_MODE_MASK;
++	WREG32(MC_SHARED_BLACKOUT_CNTL, tmp);
++	/* allow CPU access */
++	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
++
++	for (i = 0; i < rdev->num_crtc; i++) {
++		if (save->crtc_enabled) {
++			tmp = RREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i]);
++			tmp &= ~EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE;
++			WREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i], tmp);
++			/* wait for the next frame */
++			frame_count = radeon_get_vblank_counter(rdev, i);
++			for (j = 0; j < rdev->usec_timeout; j++) {
++				if (radeon_get_vblank_counter(rdev, i) != frame_count)
++					break;
++				udelay(1);
++			}
++		}
++	}
++	/* Unlock vga access */
+ 	WREG32(VGA_HDP_CONTROL, save->vga_hdp_control);
+ 	mdelay(1);
+ 	WREG32(VGA_RENDER_CONTROL, save->vga_render_control);
+diff --git a/drivers/gpu/drm/radeon/evergreen_reg.h b/drivers/gpu/drm/radeon/evergreen_reg.h
+index 7d7f215..e022776 100644
+--- a/drivers/gpu/drm/radeon/evergreen_reg.h
++++ b/drivers/gpu/drm/radeon/evergreen_reg.h
+@@ -210,7 +210,10 @@
+ #define EVERGREEN_CRTC_CONTROL                          0x6e70
+ #       define EVERGREEN_CRTC_MASTER_EN                 (1 << 0)
+ #       define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24)
++#define EVERGREEN_CRTC_BLANK_CONTROL                    0x6e74
++#       define EVERGREEN_CRTC_BLANK_DATA_EN             (1 << 8)
+ #define EVERGREEN_CRTC_STATUS                           0x6e8c
++#       define EVERGREEN_CRTC_V_BLANK                   (1 << 0)
+ #define EVERGREEN_CRTC_STATUS_POSITION                  0x6e90
+ #define EVERGREEN_MASTER_UPDATE_MODE                    0x6ef8
+ #define EVERGREEN_CRTC_UPDATE_LOCK                      0x6ed4
+diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
+index 6ecd23f..fe44a95 100644
+--- a/drivers/gpu/drm/radeon/evergreend.h
++++ b/drivers/gpu/drm/radeon/evergreend.h
+@@ -77,6 +77,10 @@
+ 
+ #define	CONFIG_MEMSIZE					0x5428
+ 
++#define	BIF_FB_EN						0x5490
++#define		FB_READ_EN					(1 << 0)
++#define		FB_WRITE_EN					(1 << 1)
++
+ #define CP_ME_CNTL					0x86D8
+ #define		CP_ME_HALT					(1 << 28)
+ #define		CP_PFP_HALT					(1 << 26)
+@@ -194,6 +198,9 @@
+ #define		NOOFCHAN_MASK					0x00003000
+ #define MC_SHARED_CHREMAP					0x2008
+ 
++#define MC_SHARED_BLACKOUT_CNTL           		0x20ac
++#define		BLACKOUT_MODE_MASK			0x00000007
++
+ #define	MC_ARB_RAMCFG					0x2760
+ #define		NOOFBANK_SHIFT					0
+ #define		NOOFBANK_MASK					0x00000003
+diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
+index 5ce9402..5aa6670 100644
+--- a/drivers/gpu/drm/radeon/radeon_asic.h
++++ b/drivers/gpu/drm/radeon/radeon_asic.h
+@@ -386,6 +386,7 @@ void r700_cp_fini(struct radeon_device *rdev);
+ struct evergreen_mc_save {
+ 	u32 vga_render_control;
+ 	u32 vga_hdp_control;
++	bool crtc_enabled[RADEON_MAX_CRTCS];
+ };
+ 
+ void evergreen_pcie_gart_tlb_flush(struct radeon_device *rdev);
+diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
+index baa019e..4f9496e 100644
+--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
++++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
+@@ -143,6 +143,16 @@ static bool radeon_msi_ok(struct radeon_device *rdev)
+ 	    (rdev->pdev->subsystem_device == 0x01fd))
+ 		return true;
+ 
++	/* Gateway RS690 only seems to work with MSIs. */
++	if ((rdev->pdev->device == 0x791f) &&
++	    (rdev->pdev->subsystem_vendor == 0x107b) &&
++	    (rdev->pdev->subsystem_device == 0x0185))
++		return true;
++
++	/* try and enable MSIs by default on all RS690s */
++	if (rdev->family == CHIP_RS690)
++		return true;
++
+ 	/* RV515 seems to have MSI issues where it loses
+ 	 * MSI rearms occasionally. This leads to lockups and freezes.
+ 	 * disable it by default.
+diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
+index 78a665b..ebd6c51 100644
+--- a/drivers/gpu/drm/radeon/radeon_pm.c
++++ b/drivers/gpu/drm/radeon/radeon_pm.c
+@@ -553,7 +553,9 @@ void radeon_pm_suspend(struct radeon_device *rdev)
+ void radeon_pm_resume(struct radeon_device *rdev)
+ {
+ 	/* set up the default clocks if the MC ucode is loaded */
+-	if (ASIC_IS_DCE5(rdev) && rdev->mc_fw) {
++	if ((rdev->family >= CHIP_BARTS) &&
++	    (rdev->family <= CHIP_CAYMAN) &&
++	    rdev->mc_fw) {
+ 		if (rdev->pm.default_vddc)
+ 			radeon_atom_set_voltage(rdev, rdev->pm.default_vddc,
+ 						SET_VOLTAGE_TYPE_ASIC_VDDC);
+@@ -608,7 +610,9 @@ int radeon_pm_init(struct radeon_device *rdev)
+ 		radeon_pm_print_states(rdev);
+ 		radeon_pm_init_profile(rdev);
+ 		/* set up the default clocks if the MC ucode is loaded */
+-		if (ASIC_IS_DCE5(rdev) && rdev->mc_fw) {
++		if ((rdev->family >= CHIP_BARTS) &&
++		    (rdev->family <= CHIP_CAYMAN) &&
++		    rdev->mc_fw) {
+ 			if (rdev->pm.default_vddc)
+ 				radeon_atom_set_voltage(rdev, rdev->pm.default_vddc,
+ 							SET_VOLTAGE_TYPE_ASIC_VDDC);
+diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
+index fe2fdbb..1740b82 100644
+--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
+@@ -148,7 +148,7 @@ static int ipoib_stop(struct net_device *dev)
+ 
+ 	netif_stop_queue(dev);
+ 
+-	ipoib_ib_dev_down(dev, 0);
++	ipoib_ib_dev_down(dev, 1);
+ 	ipoib_ib_dev_stop(dev, 0);
+ 
+ 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
+diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+index e5069b4..80799c0 100644
+--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+@@ -190,7 +190,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
+ 
+ 	mcast->mcmember = *mcmember;
+ 
+-	/* Set the cached Q_Key before we attach if it's the broadcast group */
++	/* Set the multicast MTU and cached Q_Key before we attach if it's
++	 * the broadcast group.
++	 */
+ 	if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
+ 		    sizeof (union ib_gid))) {
+ 		spin_lock_irq(&priv->lock);
+@@ -198,10 +200,17 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
+ 			spin_unlock_irq(&priv->lock);
+ 			return -EAGAIN;
+ 		}
++		priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+ 		priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
+ 		spin_unlock_irq(&priv->lock);
+ 		priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
+ 		set_qkey = 1;
++
++		if (!ipoib_cm_admin_enabled(dev)) {
++			rtnl_lock();
++			dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
++			rtnl_unlock();
++		}
+ 	}
+ 
+ 	if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+@@ -590,14 +599,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
+ 		return;
+ 	}
+ 
+-	priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+-
+-	if (!ipoib_cm_admin_enabled(dev)) {
+-		rtnl_lock();
+-		dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
+-		rtnl_unlock();
+-	}
+-
+ 	ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
+ 
+ 	clear_bit(IPOIB_MCAST_RUN, &priv->flags);
+diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
+index c76b051..4ec049d 100644
+--- a/drivers/infiniband/ulp/srp/ib_srp.c
++++ b/drivers/infiniband/ulp/srp/ib_srp.c
+@@ -620,9 +620,9 @@ static void srp_reset_req(struct srp_target_port *target, struct srp_request *re
+ 	struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL);
+ 
+ 	if (scmnd) {
++		srp_free_req(target, req, scmnd, 0);
+ 		scmnd->result = DID_RESET << 16;
+ 		scmnd->scsi_done(scmnd);
+-		srp_free_req(target, req, scmnd, 0);
+ 	}
+ }
+ 
+@@ -1669,6 +1669,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
+ 			  SRP_TSK_ABORT_TASK);
+ 	srp_free_req(target, req, scmnd, 0);
+ 	scmnd->result = DID_ABORT << 16;
++	scmnd->scsi_done(scmnd);
+ 
+ 	return SUCCESS;
+ }
+diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
+index 96532bc..7be5fd9 100644
+--- a/drivers/input/mouse/synaptics.c
++++ b/drivers/input/mouse/synaptics.c
+@@ -53,14 +53,19 @@
+ #define ABS_POS_BITS 13
+ 
+ /*
+- * Any position values from the hardware above the following limits are
+- * treated as "wrapped around negative" values that have been truncated to
+- * the 13-bit reporting range of the hardware. These are just reasonable
+- * guesses and can be adjusted if hardware is found that operates outside
+- * of these parameters.
++ * These values should represent the absolute maximum value that will
++ * be reported for a positive position value. Some Synaptics firmware
++ * uses this value to indicate a finger near the edge of the touchpad
++ * whose precise position cannot be determined.
++ *
++ * At least one touchpad is known to report positions in excess of this
++ * value which are actually negative values truncated to the 13-bit
++ * reporting range. These values have never been observed to be lower
++ * than 8184 (i.e. -8), so we treat all values greater than 8176 as
++ * negative and any other value as positive.
+  */
+-#define X_MAX_POSITIVE (((1 << ABS_POS_BITS) + XMAX) / 2)
+-#define Y_MAX_POSITIVE (((1 << ABS_POS_BITS) + YMAX) / 2)
++#define X_MAX_POSITIVE 8176
++#define Y_MAX_POSITIVE 8176
+ 
+ /*
+  * Synaptics touchpads report the y coordinate from bottom to top, which is
+@@ -561,11 +566,21 @@ static int synaptics_parse_hw_state(const unsigned char buf[],
+ 		hw->right = (buf[0] & 0x02) ? 1 : 0;
+ 	}
+ 
+-	/* Convert wrap-around values to negative */
++	/*
++	 * Convert wrap-around values to negative. (X|Y)_MAX_POSITIVE
++	 * is used by some firmware to indicate a finger at the edge of
++	 * the touchpad whose precise position cannot be determined, so
++	 * convert these values to the maximum axis value.
++	 */
+ 	if (hw->x > X_MAX_POSITIVE)
+ 		hw->x -= 1 << ABS_POS_BITS;
++	else if (hw->x == X_MAX_POSITIVE)
++		hw->x = XMAX;
++
+ 	if (hw->y > Y_MAX_POSITIVE)
+ 		hw->y -= 1 << ABS_POS_BITS;
++	else if (hw->y == Y_MAX_POSITIVE)
++		hw->y = YMAX;
+ 
+ 	return 0;
+ }
+diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
+index ccf347f..b9062c0 100644
+--- a/drivers/iommu/intel-iommu.c
++++ b/drivers/iommu/intel-iommu.c
+@@ -563,7 +563,9 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
+ {
+ 	int i;
+ 
+-	domain->iommu_coherency = 1;
++	i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
++
++	domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0;
+ 
+ 	for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
+ 		if (!ecap_coherent(g_iommus[i]->ecap)) {
+diff --git a/drivers/media/rc/ite-cir.c b/drivers/media/rc/ite-cir.c
+index 0e49c99..c06992e 100644
+--- a/drivers/media/rc/ite-cir.c
++++ b/drivers/media/rc/ite-cir.c
+@@ -1473,6 +1473,7 @@ static int ite_probe(struct pnp_dev *pdev, const struct pnp_device_id
+ 	rdev = rc_allocate_device();
+ 	if (!rdev)
+ 		goto failure;
++	itdev->rdev = rdev;
+ 
+ 	ret = -ENODEV;
+ 
+@@ -1604,7 +1605,6 @@ static int ite_probe(struct pnp_dev *pdev, const struct pnp_device_id
+ 	if (ret)
+ 		goto failure;
+ 
+-	itdev->rdev = rdev;
+ 	ite_pr(KERN_NOTICE, "driver has been successfully loaded\n");
+ 
+ 	return 0;
+diff --git a/drivers/media/video/gspca/pac7302.c b/drivers/media/video/gspca/pac7302.c
+index 1c44f78..6ddc769 100644
+--- a/drivers/media/video/gspca/pac7302.c
++++ b/drivers/media/video/gspca/pac7302.c
+@@ -1197,6 +1197,8 @@ static const struct usb_device_id device_table[] = {
+ 	{USB_DEVICE(0x093a, 0x2629), .driver_info = FL_VFLIP},
+ 	{USB_DEVICE(0x093a, 0x262a)},
+ 	{USB_DEVICE(0x093a, 0x262c)},
++	{USB_DEVICE(0x145f, 0x013c)},
++	{USB_DEVICE(0x1ae7, 0x2001)}, /* SpeedLink Snappy Mic SL-6825-SBK */
+ 	{}
+ };
+ MODULE_DEVICE_TABLE(usb, device_table);
+diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
+index d5fe43d..bc27065 100644
+--- a/drivers/mmc/host/omap_hsmmc.c
++++ b/drivers/mmc/host/omap_hsmmc.c
+@@ -2188,9 +2188,7 @@ static int omap_hsmmc_suspend(struct device *dev)
+ 		} else {
+ 			host->suspended = 0;
+ 			if (host->pdata->resume) {
+-				ret = host->pdata->resume(&pdev->dev,
+-							  host->slot_id);
+-				if (ret)
++				if (host->pdata->resume(&pdev->dev, host->slot_id))
+ 					dev_dbg(mmc_dev(host->mmc),
+ 						"Unmask interrupt failed\n");
+ 			}
+diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
+index 0d33ff0..06af9e4 100644
+--- a/drivers/mmc/host/sdhci-s3c.c
++++ b/drivers/mmc/host/sdhci-s3c.c
+@@ -601,7 +601,7 @@ static int __devexit sdhci_s3c_remove(struct platform_device *pdev)
+ 
+ 	sdhci_remove_host(host, 1);
+ 
+-	for (ptr = 0; ptr < 3; ptr++) {
++	for (ptr = 0; ptr < MAX_BUS_CLK; ptr++) {
+ 		if (sc->clk_bus[ptr]) {
+ 			clk_disable(sc->clk_bus[ptr]);
+ 			clk_put(sc->clk_bus[ptr]);
+diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
+index d5505f3..559d30d 100644
+--- a/drivers/mmc/host/sh_mmcif.c
++++ b/drivers/mmc/host/sh_mmcif.c
+@@ -1003,6 +1003,10 @@ static irqreturn_t sh_mmcif_intr(int irq, void *dev_id)
+ 		host->sd_error = true;
+ 		dev_dbg(&host->pd->dev, "int err state = %08x\n", state);
+ 	}
++	if (host->state == STATE_IDLE) {
++		dev_info(&host->pd->dev, "Spurious IRQ status 0x%x", state);
++		return IRQ_HANDLED;
++	}
+ 	if (state & ~(INT_CMD12RBE | INT_CMD12CRE))
+ 		complete(&host->intr_wait);
+ 	else
+diff --git a/drivers/mtd/maps/autcpu12-nvram.c b/drivers/mtd/maps/autcpu12-nvram.c
+index e5bfd0e..0598d52 100644
+--- a/drivers/mtd/maps/autcpu12-nvram.c
++++ b/drivers/mtd/maps/autcpu12-nvram.c
+@@ -43,7 +43,8 @@ struct map_info autcpu12_sram_map = {
+ 
+ static int __init init_autcpu12_sram (void)
+ {
+-	int err, save0, save1;
++	map_word tmp, save0, save1;
++	int err;
+ 
+ 	autcpu12_sram_map.virt = ioremap(0x12000000, SZ_128K);
+ 	if (!autcpu12_sram_map.virt) {
+@@ -51,7 +52,7 @@ static int __init init_autcpu12_sram (void)
+ 		err = -EIO;
+ 		goto out;
+ 	}
+-	simple_map_init(&autcpu_sram_map);
++	simple_map_init(&autcpu12_sram_map);
+ 
+ 	/*
+ 	 * Check for 32K/128K
+@@ -61,20 +62,22 @@ static int __init init_autcpu12_sram (void)
+ 	 * Read	and check result on ofs 0x0
+ 	 * Restore contents
+ 	 */
+-	save0 = map_read32(&autcpu12_sram_map,0);
+-	save1 = map_read32(&autcpu12_sram_map,0x10000);
+-	map_write32(&autcpu12_sram_map,~save0,0x10000);
++	save0 = map_read(&autcpu12_sram_map, 0);
++	save1 = map_read(&autcpu12_sram_map, 0x10000);
++	tmp.x[0] = ~save0.x[0];
++	map_write(&autcpu12_sram_map, tmp, 0x10000);
+ 	/* if we find this pattern on 0x0, we have 32K size
+ 	 * restore contents and exit
+ 	 */
+-	if ( map_read32(&autcpu12_sram_map,0) != save0) {
+-		map_write32(&autcpu12_sram_map,save0,0x0);
++	tmp = map_read(&autcpu12_sram_map, 0);
++	if (!map_word_equal(&autcpu12_sram_map, tmp, save0)) {
++		map_write(&autcpu12_sram_map, save0, 0x0);
+ 		goto map;
+ 	}
+ 	/* We have a 128K found, restore 0x10000 and set size
+ 	 * to 128K
+ 	 */
+-	map_write32(&autcpu12_sram_map,save1,0x10000);
++	map_write(&autcpu12_sram_map, save1, 0x10000);
+ 	autcpu12_sram_map.size = SZ_128K;
+ 
+ map:
+diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
+index a0bd2de..198da0a 100644
+--- a/drivers/mtd/mtdpart.c
++++ b/drivers/mtd/mtdpart.c
+@@ -748,6 +748,8 @@ static const char *default_mtd_part_types[] = {
+  * partition parsers, specified in @types. However, if @types is %NULL, then
+  * the default list of parsers is used. The default list contains only the
+  * "cmdlinepart" and "ofpart" parsers ATM.
++ * Note: If there are more then one parser in @types, the kernel only takes the
++ * partitions parsed out by the first parser.
+  *
+  * This function may return:
+  * o a negative error code in case of failure
+@@ -772,11 +774,12 @@ int parse_mtd_partitions(struct mtd_info *master, const char **types,
+ 		if (!parser)
+ 			continue;
+ 		ret = (*parser->parse_fn)(master, pparts, data);
++		put_partition_parser(parser);
+ 		if (ret > 0) {
+ 			printk(KERN_NOTICE "%d %s partitions found on MTD device %s\n",
+ 			       ret, parser->name, master->name);
++			break;
+ 		}
+-		put_partition_parser(parser);
+ 	}
+ 	return ret;
+ }
+diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
+index f024375..532da04 100644
+--- a/drivers/mtd/nand/nand_bbt.c
++++ b/drivers/mtd/nand/nand_bbt.c
+@@ -390,7 +390,7 @@ static int read_abs_bbts(struct mtd_info *mtd, uint8_t *buf,
+ 	/* Read the mirror version, if available */
+ 	if (md && (md->options & NAND_BBT_VERSION)) {
+ 		scan_read_raw(mtd, buf, (loff_t)md->pages[0] << this->page_shift,
+-			      mtd->writesize, td);
++			      mtd->writesize, md);
+ 		md->version[0] = buf[bbt_get_ver_offs(mtd, md)];
+ 		pr_info("Bad block table at page %d, version 0x%02X\n",
+ 			 md->pages[0], md->version[0]);
+diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
+index 83e8e1b..ade0da0 100644
+--- a/drivers/mtd/nand/nandsim.c
++++ b/drivers/mtd/nand/nandsim.c
+@@ -2355,6 +2355,7 @@ static int __init ns_init_module(void)
+ 		uint64_t new_size = (uint64_t)nsmtd->erasesize << overridesize;
+ 		if (new_size >> overridesize != nsmtd->erasesize) {
+ 			NS_ERR("overridesize is too big\n");
++			retval = -EINVAL;
+ 			goto err_exit;
+ 		}
+ 		/* N.B. This relies on nand_scan not doing anything with the size before we change it */
+diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
+index f745f00..297c965 100644
+--- a/drivers/mtd/nand/omap2.c
++++ b/drivers/mtd/nand/omap2.c
+@@ -1132,7 +1132,8 @@ static int omap_nand_remove(struct platform_device *pdev)
+ 	/* Release NAND device, its internal structures and partitions */
+ 	nand_release(&info->mtd);
+ 	iounmap(info->nand.IO_ADDR_R);
+-	kfree(&info->mtd);
++	release_mem_region(info->phys_base, NAND_IO_SIZE);
++	kfree(info);
+ 	return 0;
+ }
+ 
+diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
+index 6c3fb5a..1f9c363 100644
+--- a/drivers/mtd/ubi/build.c
++++ b/drivers/mtd/ubi/build.c
+@@ -816,6 +816,11 @@ static int autoresize(struct ubi_device *ubi, int vol_id)
+ 	struct ubi_volume *vol = ubi->volumes[vol_id];
+ 	int err, old_reserved_pebs = vol->reserved_pebs;
+ 
++	if (ubi->ro_mode) {
++		ubi_warn("skip auto-resize because of R/O mode");
++		return 0;
++	}
++
+ 	/*
+ 	 * Clear the auto-resize flag in the volume in-memory copy of the
+ 	 * volume table, and 'ubi_resize_volume()' will propagate this change
+diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
+index b99318e..b2b62de 100644
+--- a/drivers/mtd/ubi/scan.c
++++ b/drivers/mtd/ubi/scan.c
+@@ -997,7 +997,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,
+ 			return err;
+ 		goto adjust_mean_ec;
+ 	case UBI_IO_FF:
+-		if (ec_err)
++		if (ec_err || bitflips)
+ 			err = add_to_list(si, pnum, ec, 1, &si->erase);
+ 		else
+ 			err = add_to_list(si, pnum, ec, 0, &si->free);
+diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c
+index 5fedc33..d8f2b5b 100644
+--- a/drivers/net/can/mscan/mpc5xxx_can.c
++++ b/drivers/net/can/mscan/mpc5xxx_can.c
+@@ -181,7 +181,7 @@ static u32 __devinit mpc512x_can_get_clock(struct platform_device *ofdev,
+ 
+ 		if (!clock_name || !strcmp(clock_name, "sys")) {
+ 			sys_clk = clk_get(&ofdev->dev, "sys_clk");
+-			if (!sys_clk) {
++			if (IS_ERR(sys_clk)) {
+ 				dev_err(&ofdev->dev, "couldn't get sys_clk\n");
+ 				goto exit_unmap;
+ 			}
+@@ -204,7 +204,7 @@ static u32 __devinit mpc512x_can_get_clock(struct platform_device *ofdev,
+ 
+ 		if (clocksrc < 0) {
+ 			ref_clk = clk_get(&ofdev->dev, "ref_clk");
+-			if (!ref_clk) {
++			if (IS_ERR(ref_clk)) {
+ 				dev_err(&ofdev->dev, "couldn't get ref_clk\n");
+ 				goto exit_unmap;
+ 			}
+diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
+index 0549261..c5f6b0e 100644
+--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
++++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
+@@ -4720,8 +4720,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake)
+ 
+ 	netif_device_detach(netdev);
+ 
+-	mutex_lock(&adapter->mutex);
+-
+ 	if (netif_running(netdev)) {
+ 		WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags));
+ 		e1000_down(adapter);
+@@ -4729,10 +4727,8 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake)
+ 
+ #ifdef CONFIG_PM
+ 	retval = pci_save_state(pdev);
+-	if (retval) {
+-		mutex_unlock(&adapter->mutex);
++	if (retval)
+ 		return retval;
+-	}
+ #endif
+ 
+ 	status = er32(STATUS);
+@@ -4789,8 +4785,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake)
+ 	if (netif_running(netdev))
+ 		e1000_free_irq(adapter);
+ 
+-	mutex_unlock(&adapter->mutex);
+-
+ 	pci_disable_device(pdev);
+ 
+ 	return 0;
+diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
+index ed1be8a..4b43bc5 100644
+--- a/drivers/net/ethernet/realtek/r8169.c
++++ b/drivers/net/ethernet/realtek/r8169.c
+@@ -327,6 +327,8 @@ enum rtl_registers {
+ 	Config0		= 0x51,
+ 	Config1		= 0x52,
+ 	Config2		= 0x53,
++#define PME_SIGNAL			(1 << 5)	/* 8168c and later */
++
+ 	Config3		= 0x54,
+ 	Config4		= 0x55,
+ 	Config5		= 0x56,
+@@ -1360,7 +1362,6 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
+ 		u16 reg;
+ 		u8  mask;
+ 	} cfg[] = {
+-		{ WAKE_ANY,   Config1, PMEnable },
+ 		{ WAKE_PHY,   Config3, LinkUp },
+ 		{ WAKE_MAGIC, Config3, MagicPacket },
+ 		{ WAKE_UCAST, Config5, UWF },
+@@ -1368,16 +1369,32 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
+ 		{ WAKE_MCAST, Config5, MWF },
+ 		{ WAKE_ANY,   Config5, LanWake }
+ 	};
++	u8 options;
+ 
+ 	RTL_W8(Cfg9346, Cfg9346_Unlock);
+ 
+ 	for (i = 0; i < ARRAY_SIZE(cfg); i++) {
+-		u8 options = RTL_R8(cfg[i].reg) & ~cfg[i].mask;
++		options = RTL_R8(cfg[i].reg) & ~cfg[i].mask;
+ 		if (wolopts & cfg[i].opt)
+ 			options |= cfg[i].mask;
+ 		RTL_W8(cfg[i].reg, options);
+ 	}
+ 
++	switch (tp->mac_version) {
++	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_17:
++		options = RTL_R8(Config1) & ~PMEnable;
++		if (wolopts)
++			options |= PMEnable;
++		RTL_W8(Config1, options);
++		break;
++	default:
++		options = RTL_R8(Config2) & ~PME_SIGNAL;
++		if (wolopts)
++			options |= PME_SIGNAL;
++		RTL_W8(Config2, options);
++		break;
++	}
++
+ 	RTL_W8(Cfg9346, Cfg9346_Lock);
+ }
+ 
+diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
+index 7145714..c0f097b 100644
+--- a/drivers/net/rionet.c
++++ b/drivers/net/rionet.c
+@@ -79,6 +79,7 @@ static int rionet_capable = 1;
+  * on system trade-offs.
+  */
+ static struct rio_dev **rionet_active;
++static int nact;	/* total number of active rionet peers */
+ 
+ #define is_rionet_capable(src_ops, dst_ops)			\
+ 			((src_ops & RIO_SRC_OPS_DATA_MSG) &&	\
+@@ -175,6 +176,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+ 	struct ethhdr *eth = (struct ethhdr *)skb->data;
+ 	u16 destid;
+ 	unsigned long flags;
++	int add_num = 1;
+ 
+ 	local_irq_save(flags);
+ 	if (!spin_trylock(&rnet->tx_lock)) {
+@@ -182,7 +184,10 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+ 		return NETDEV_TX_LOCKED;
+ 	}
+ 
+-	if ((rnet->tx_cnt + 1) > RIONET_TX_RING_SIZE) {
++	if (is_multicast_ether_addr(eth->h_dest))
++		add_num = nact;
++
++	if ((rnet->tx_cnt + add_num) > RIONET_TX_RING_SIZE) {
+ 		netif_stop_queue(ndev);
+ 		spin_unlock_irqrestore(&rnet->tx_lock, flags);
+ 		printk(KERN_ERR "%s: BUG! Tx Ring full when queue awake!\n",
+@@ -191,11 +196,16 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+ 	}
+ 
+ 	if (is_multicast_ether_addr(eth->h_dest)) {
++		int count = 0;
+ 		for (i = 0; i < RIO_MAX_ROUTE_ENTRIES(rnet->mport->sys_size);
+ 				i++)
+-			if (rionet_active[i])
++			if (rionet_active[i]) {
+ 				rionet_queue_tx_msg(skb, ndev,
+ 						    rionet_active[i]);
++				if (count)
++					atomic_inc(&skb->users);
++				count++;
++			}
+ 	} else if (RIONET_MAC_MATCH(eth->h_dest)) {
+ 		destid = RIONET_GET_DESTID(eth->h_dest);
+ 		if (rionet_active[destid])
+@@ -220,14 +230,17 @@ static void rionet_dbell_event(struct rio_mport *mport, void *dev_id, u16 sid, u
+ 	if (info == RIONET_DOORBELL_JOIN) {
+ 		if (!rionet_active[sid]) {
+ 			list_for_each_entry(peer, &rionet_peers, node) {
+-				if (peer->rdev->destid == sid)
++				if (peer->rdev->destid == sid) {
+ 					rionet_active[sid] = peer->rdev;
++					nact++;
++				}
+ 			}
+ 			rio_mport_send_doorbell(mport, sid,
+ 						RIONET_DOORBELL_JOIN);
+ 		}
+ 	} else if (info == RIONET_DOORBELL_LEAVE) {
+ 		rionet_active[sid] = NULL;
++		nact--;
+ 	} else {
+ 		if (netif_msg_intr(rnet))
+ 			printk(KERN_WARNING "%s: unhandled doorbell\n",
+@@ -524,6 +537,7 @@ static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id)
+ 
+ 		rc = rionet_setup_netdev(rdev->net->hport, ndev);
+ 		rionet_check = 1;
++		nact = 0;
+ 	}
+ 
+ 	/*
+diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c
+index 1883d39..f7e17a0 100644
+--- a/drivers/net/wireless/ath/ath9k/pci.c
++++ b/drivers/net/wireless/ath/ath9k/pci.c
+@@ -122,8 +122,9 @@ static void ath_pci_aspm_init(struct ath_common *common)
+ 	if (!parent)
+ 		return;
+ 
+-	if (ah->btcoex_hw.scheme != ATH_BTCOEX_CFG_NONE) {
+-		/* Bluetooth coexistance requires disabling ASPM. */
++	if ((ah->btcoex_hw.scheme != ATH_BTCOEX_CFG_NONE) &&
++	    (AR_SREV_9285(ah))) {
++		/* Bluetooth coexistance requires disabling ASPM for AR9285. */
+ 		pci_read_config_byte(pdev, pos + PCI_EXP_LNKCTL, &aspm);
+ 		aspm &= ~(PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1);
+ 		pci_write_config_byte(pdev, pos + PCI_EXP_LNKCTL, aspm);
+diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
+index dfee1b3..9005380 100644
+--- a/drivers/pci/probe.c
++++ b/drivers/pci/probe.c
+@@ -658,8 +658,10 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
+ 
+ 	/* Check if setup is sensible at all */
+ 	if (!pass &&
+-	    (primary != bus->number || secondary <= bus->number)) {
+-		dev_dbg(&dev->dev, "bus configuration invalid, reconfiguring\n");
++	    (primary != bus->number || secondary <= bus->number ||
++	     secondary > subordinate)) {
++		dev_info(&dev->dev, "bridge configuration invalid ([bus %02x-%02x]), reconfiguring\n",
++			 secondary, subordinate);
+ 		broken = 1;
+ 	}
+ 
+diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
+index 0860181..4f1b10b 100644
+--- a/drivers/s390/scsi/zfcp_aux.c
++++ b/drivers/s390/scsi/zfcp_aux.c
+@@ -519,6 +519,7 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn,
+ 
+ 	rwlock_init(&port->unit_list_lock);
+ 	INIT_LIST_HEAD(&port->unit_list);
++	atomic_set(&port->units, 0);
+ 
+ 	INIT_WORK(&port->gid_pn_work, zfcp_fc_port_did_lookup);
+ 	INIT_WORK(&port->test_link_work, zfcp_fc_link_test_work);
+diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c
+index 96f13ad8..79a6afe 100644
+--- a/drivers/s390/scsi/zfcp_ccw.c
++++ b/drivers/s390/scsi/zfcp_ccw.c
+@@ -39,17 +39,23 @@ void zfcp_ccw_adapter_put(struct zfcp_adapter *adapter)
+ 	spin_unlock_irqrestore(&zfcp_ccw_adapter_ref_lock, flags);
+ }
+ 
+-static int zfcp_ccw_activate(struct ccw_device *cdev)
+-
++/**
++ * zfcp_ccw_activate - activate adapter and wait for it to finish
++ * @cdev: pointer to belonging ccw device
++ * @clear: Status flags to clear.
++ * @tag: s390dbf trace record tag
++ */
++static int zfcp_ccw_activate(struct ccw_device *cdev, int clear, char *tag)
+ {
+ 	struct zfcp_adapter *adapter = zfcp_ccw_adapter_by_cdev(cdev);
+ 
+ 	if (!adapter)
+ 		return 0;
+ 
++	zfcp_erp_clear_adapter_status(adapter, clear);
+ 	zfcp_erp_set_adapter_status(adapter, ZFCP_STATUS_COMMON_RUNNING);
+ 	zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED,
+-				"ccresu2");
++				tag);
+ 	zfcp_erp_wait(adapter);
+ 	flush_work(&adapter->scan_work);
+ 
+@@ -164,26 +170,29 @@ static int zfcp_ccw_set_online(struct ccw_device *cdev)
+ 	BUG_ON(!zfcp_reqlist_isempty(adapter->req_list));
+ 	adapter->req_no = 0;
+ 
+-	zfcp_ccw_activate(cdev);
++	zfcp_ccw_activate(cdev, 0, "ccsonl1");
+ 	zfcp_ccw_adapter_put(adapter);
+ 	return 0;
+ }
+ 
+ /**
+- * zfcp_ccw_set_offline - set_offline function of zfcp driver
++ * zfcp_ccw_offline_sync - shut down adapter and wait for it to finish
+  * @cdev: pointer to belonging ccw device
++ * @set: Status flags to set.
++ * @tag: s390dbf trace record tag
+  *
+  * This function gets called by the common i/o layer and sets an adapter
+  * into state offline.
+  */
+-static int zfcp_ccw_set_offline(struct ccw_device *cdev)
++static int zfcp_ccw_offline_sync(struct ccw_device *cdev, int set, char *tag)
+ {
+ 	struct zfcp_adapter *adapter = zfcp_ccw_adapter_by_cdev(cdev);
+ 
+ 	if (!adapter)
+ 		return 0;
+ 
+-	zfcp_erp_adapter_shutdown(adapter, 0, "ccsoff1");
++	zfcp_erp_set_adapter_status(adapter, set);
++	zfcp_erp_adapter_shutdown(adapter, 0, tag);
+ 	zfcp_erp_wait(adapter);
+ 
+ 	zfcp_ccw_adapter_put(adapter);
+@@ -191,6 +200,18 @@ static int zfcp_ccw_set_offline(struct ccw_device *cdev)
+ }
+ 
+ /**
++ * zfcp_ccw_set_offline - set_offline function of zfcp driver
++ * @cdev: pointer to belonging ccw device
++ *
++ * This function gets called by the common i/o layer and sets an adapter
++ * into state offline.
++ */
++static int zfcp_ccw_set_offline(struct ccw_device *cdev)
++{
++	return zfcp_ccw_offline_sync(cdev, 0, "ccsoff1");
++}
++
++/**
+  * zfcp_ccw_notify - ccw notify function
+  * @cdev: pointer to belonging ccw device
+  * @event: indicates if adapter was detached or attached
+@@ -207,6 +228,11 @@ static int zfcp_ccw_notify(struct ccw_device *cdev, int event)
+ 
+ 	switch (event) {
+ 	case CIO_GONE:
++		if (atomic_read(&adapter->status) &
++		    ZFCP_STATUS_ADAPTER_SUSPENDED) { /* notification ignore */
++			zfcp_dbf_hba_basic("ccnigo1", adapter);
++			break;
++		}
+ 		dev_warn(&cdev->dev, "The FCP device has been detached\n");
+ 		zfcp_erp_adapter_shutdown(adapter, 0, "ccnoti1");
+ 		break;
+@@ -216,6 +242,11 @@ static int zfcp_ccw_notify(struct ccw_device *cdev, int event)
+ 		zfcp_erp_adapter_shutdown(adapter, 0, "ccnoti2");
+ 		break;
+ 	case CIO_OPER:
++		if (atomic_read(&adapter->status) &
++		    ZFCP_STATUS_ADAPTER_SUSPENDED) { /* notification ignore */
++			zfcp_dbf_hba_basic("ccniop1", adapter);
++			break;
++		}
+ 		dev_info(&cdev->dev, "The FCP device is operational again\n");
+ 		zfcp_erp_set_adapter_status(adapter,
+ 					    ZFCP_STATUS_COMMON_RUNNING);
+@@ -251,6 +282,28 @@ static void zfcp_ccw_shutdown(struct ccw_device *cdev)
+ 	zfcp_ccw_adapter_put(adapter);
+ }
+ 
++static int zfcp_ccw_suspend(struct ccw_device *cdev)
++{
++	zfcp_ccw_offline_sync(cdev, ZFCP_STATUS_ADAPTER_SUSPENDED, "ccsusp1");
++	return 0;
++}
++
++static int zfcp_ccw_thaw(struct ccw_device *cdev)
++{
++	/* trace records for thaw and final shutdown during suspend
++	   can only be found in system dump until the end of suspend
++	   but not after resume because it's based on the memory image
++	   right after the very first suspend (freeze) callback */
++	zfcp_ccw_activate(cdev, 0, "ccthaw1");
++	return 0;
++}
++
++static int zfcp_ccw_resume(struct ccw_device *cdev)
++{
++	zfcp_ccw_activate(cdev, ZFCP_STATUS_ADAPTER_SUSPENDED, "ccresu1");
++	return 0;
++}
++
+ struct ccw_driver zfcp_ccw_driver = {
+ 	.driver = {
+ 		.owner	= THIS_MODULE,
+@@ -263,7 +316,7 @@ struct ccw_driver zfcp_ccw_driver = {
+ 	.set_offline = zfcp_ccw_set_offline,
+ 	.notify      = zfcp_ccw_notify,
+ 	.shutdown    = zfcp_ccw_shutdown,
+-	.freeze      = zfcp_ccw_set_offline,
+-	.thaw	     = zfcp_ccw_activate,
+-	.restore     = zfcp_ccw_activate,
++	.freeze      = zfcp_ccw_suspend,
++	.thaw	     = zfcp_ccw_thaw,
++	.restore     = zfcp_ccw_resume,
+ };
+diff --git a/drivers/s390/scsi/zfcp_cfdc.c b/drivers/s390/scsi/zfcp_cfdc.c
+index fab2c25..8ed63aa 100644
+--- a/drivers/s390/scsi/zfcp_cfdc.c
++++ b/drivers/s390/scsi/zfcp_cfdc.c
+@@ -293,7 +293,7 @@ void zfcp_cfdc_adapter_access_changed(struct zfcp_adapter *adapter)
+ 	}
+ 	read_unlock_irqrestore(&adapter->port_list_lock, flags);
+ 
+-	shost_for_each_device(sdev, port->adapter->scsi_host) {
++	shost_for_each_device(sdev, adapter->scsi_host) {
+ 		zfcp_sdev = sdev_to_zfcp(sdev);
+ 		status = atomic_read(&zfcp_sdev->status);
+ 		if ((status & ZFCP_STATUS_COMMON_ACCESS_DENIED) ||
+diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c
+index a9a816e..79b9848 100644
+--- a/drivers/s390/scsi/zfcp_dbf.c
++++ b/drivers/s390/scsi/zfcp_dbf.c
+@@ -191,7 +191,7 @@ void zfcp_dbf_hba_def_err(struct zfcp_adapter *adapter, u64 req_id, u16 scount,
+ 	length = min((u16)sizeof(struct qdio_buffer),
+ 		     (u16)ZFCP_DBF_PAY_MAX_REC);
+ 
+-	while ((char *)pl[payload->counter] && payload->counter < scount) {
++	while (payload->counter < scount && (char *)pl[payload->counter]) {
+ 		memcpy(payload->data, (char *)pl[payload->counter], length);
+ 		debug_event(dbf->pay, 1, payload, zfcp_dbf_plen(length));
+ 		payload->counter++;
+@@ -200,6 +200,26 @@ void zfcp_dbf_hba_def_err(struct zfcp_adapter *adapter, u64 req_id, u16 scount,
+ 	spin_unlock_irqrestore(&dbf->pay_lock, flags);
+ }
+ 
++/**
++ * zfcp_dbf_hba_basic - trace event for basic adapter events
++ * @adapter: pointer to struct zfcp_adapter
++ */
++void zfcp_dbf_hba_basic(char *tag, struct zfcp_adapter *adapter)
++{
++	struct zfcp_dbf *dbf = adapter->dbf;
++	struct zfcp_dbf_hba *rec = &dbf->hba_buf;
++	unsigned long flags;
++
++	spin_lock_irqsave(&dbf->hba_lock, flags);
++	memset(rec, 0, sizeof(*rec));
++
++	memcpy(rec->tag, tag, ZFCP_DBF_TAG_LEN);
++	rec->id = ZFCP_DBF_HBA_BASIC;
++
++	debug_event(dbf->hba, 1, rec, sizeof(*rec));
++	spin_unlock_irqrestore(&dbf->hba_lock, flags);
++}
++
+ static void zfcp_dbf_set_common(struct zfcp_dbf_rec *rec,
+ 				struct zfcp_adapter *adapter,
+ 				struct zfcp_port *port,
+diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h
+index 714f087..3ac7a4b 100644
+--- a/drivers/s390/scsi/zfcp_dbf.h
++++ b/drivers/s390/scsi/zfcp_dbf.h
+@@ -154,6 +154,7 @@ enum zfcp_dbf_hba_id {
+ 	ZFCP_DBF_HBA_RES	= 1,
+ 	ZFCP_DBF_HBA_USS	= 2,
+ 	ZFCP_DBF_HBA_BIT	= 3,
++	ZFCP_DBF_HBA_BASIC	= 4,
+ };
+ 
+ /**
+diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
+index ed5d921..f172b84 100644
+--- a/drivers/s390/scsi/zfcp_def.h
++++ b/drivers/s390/scsi/zfcp_def.h
+@@ -77,6 +77,7 @@ struct zfcp_reqlist;
+ #define ZFCP_STATUS_ADAPTER_SIOSL_ISSUED	0x00000004
+ #define ZFCP_STATUS_ADAPTER_XCONFIG_OK		0x00000008
+ #define ZFCP_STATUS_ADAPTER_HOST_CON_INIT	0x00000010
++#define ZFCP_STATUS_ADAPTER_SUSPENDED		0x00000040
+ #define ZFCP_STATUS_ADAPTER_ERP_PENDING		0x00000100
+ #define ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED	0x00000200
+ #define ZFCP_STATUS_ADAPTER_DATA_DIV_ENABLED	0x00000400
+@@ -204,6 +205,7 @@ struct zfcp_port {
+ 	struct zfcp_adapter    *adapter;       /* adapter used to access port */
+ 	struct list_head	unit_list;	/* head of logical unit list */
+ 	rwlock_t		unit_list_lock; /* unit list lock */
++	atomic_t		units;	       /* zfcp_unit count */
+ 	atomic_t	       status;	       /* status of this remote port */
+ 	u64		       wwnn;	       /* WWNN if known */
+ 	u64		       wwpn;	       /* WWPN */
+diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
+index 2302e1c..ef9e502 100644
+--- a/drivers/s390/scsi/zfcp_ext.h
++++ b/drivers/s390/scsi/zfcp_ext.h
+@@ -54,6 +54,7 @@ extern void zfcp_dbf_hba_fsf_res(char *, struct zfcp_fsf_req *);
+ extern void zfcp_dbf_hba_bit_err(char *, struct zfcp_fsf_req *);
+ extern void zfcp_dbf_hba_berr(struct zfcp_dbf *, struct zfcp_fsf_req *);
+ extern void zfcp_dbf_hba_def_err(struct zfcp_adapter *, u64, u16, void **);
++extern void zfcp_dbf_hba_basic(char *, struct zfcp_adapter *);
+ extern void zfcp_dbf_san_req(char *, struct zfcp_fsf_req *, u32);
+ extern void zfcp_dbf_san_res(char *, struct zfcp_fsf_req *);
+ extern void zfcp_dbf_san_in_els(char *, struct zfcp_fsf_req *);
+@@ -158,6 +159,7 @@ extern void zfcp_scsi_dif_sense_error(struct scsi_cmnd *, int);
+ extern struct attribute_group zfcp_sysfs_unit_attrs;
+ extern struct attribute_group zfcp_sysfs_adapter_attrs;
+ extern struct attribute_group zfcp_sysfs_port_attrs;
++extern struct mutex zfcp_sysfs_port_units_mutex;
+ extern struct device_attribute *zfcp_sysfs_sdev_attrs[];
+ extern struct device_attribute *zfcp_sysfs_shost_attrs[];
+ 
+diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
+index e9a787e..8c849f0 100644
+--- a/drivers/s390/scsi/zfcp_fsf.c
++++ b/drivers/s390/scsi/zfcp_fsf.c
+@@ -219,7 +219,7 @@ static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req)
+ 		return;
+ 	}
+ 
+-	zfcp_dbf_hba_fsf_uss("fssrh_2", req);
++	zfcp_dbf_hba_fsf_uss("fssrh_4", req);
+ 
+ 	switch (sr_buf->status_type) {
+ 	case FSF_STATUS_READ_PORT_CLOSED:
+@@ -771,12 +771,14 @@ out:
+ static void zfcp_fsf_abort_fcp_command_handler(struct zfcp_fsf_req *req)
+ {
+ 	struct scsi_device *sdev = req->data;
+-	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
++	struct zfcp_scsi_dev *zfcp_sdev;
+ 	union fsf_status_qual *fsq = &req->qtcb->header.fsf_status_qual;
+ 
+ 	if (req->status & ZFCP_STATUS_FSFREQ_ERROR)
+ 		return;
+ 
++	zfcp_sdev = sdev_to_zfcp(sdev);
++
+ 	switch (req->qtcb->header.fsf_status) {
+ 	case FSF_PORT_HANDLE_NOT_VALID:
+ 		if (fsq->word[0] == fsq->word[1]) {
+@@ -885,7 +887,7 @@ static void zfcp_fsf_send_ct_handler(struct zfcp_fsf_req *req)
+ 
+ 	switch (header->fsf_status) {
+         case FSF_GOOD:
+-		zfcp_dbf_san_res("fsscth1", req);
++		zfcp_dbf_san_res("fsscth2", req);
+ 		ct->status = 0;
+ 		break;
+         case FSF_SERVICE_CLASS_NOT_SUPPORTED:
+@@ -1739,13 +1741,15 @@ static void zfcp_fsf_open_lun_handler(struct zfcp_fsf_req *req)
+ {
+ 	struct zfcp_adapter *adapter = req->adapter;
+ 	struct scsi_device *sdev = req->data;
+-	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
++	struct zfcp_scsi_dev *zfcp_sdev;
+ 	struct fsf_qtcb_header *header = &req->qtcb->header;
+ 	struct fsf_qtcb_bottom_support *bottom = &req->qtcb->bottom.support;
+ 
+ 	if (req->status & ZFCP_STATUS_FSFREQ_ERROR)
+ 		return;
+ 
++	zfcp_sdev = sdev_to_zfcp(sdev);
++
+ 	atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED |
+ 			  ZFCP_STATUS_COMMON_ACCESS_BOXED |
+ 			  ZFCP_STATUS_LUN_SHARED |
+@@ -1856,11 +1860,13 @@ out:
+ static void zfcp_fsf_close_lun_handler(struct zfcp_fsf_req *req)
+ {
+ 	struct scsi_device *sdev = req->data;
+-	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
++	struct zfcp_scsi_dev *zfcp_sdev;
+ 
+ 	if (req->status & ZFCP_STATUS_FSFREQ_ERROR)
+ 		return;
+ 
++	zfcp_sdev = sdev_to_zfcp(sdev);
++
+ 	switch (req->qtcb->header.fsf_status) {
+ 	case FSF_PORT_HANDLE_NOT_VALID:
+ 		zfcp_erp_adapter_reopen(zfcp_sdev->port->adapter, 0, "fscuh_1");
+@@ -1950,7 +1956,7 @@ static void zfcp_fsf_req_trace(struct zfcp_fsf_req *req, struct scsi_cmnd *scsi)
+ {
+ 	struct fsf_qual_latency_info *lat_in;
+ 	struct latency_cont *lat = NULL;
+-	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(scsi->device);
++	struct zfcp_scsi_dev *zfcp_sdev;
+ 	struct zfcp_blk_drv_data blktrc;
+ 	int ticks = req->adapter->timer_ticks;
+ 
+@@ -1965,6 +1971,7 @@ static void zfcp_fsf_req_trace(struct zfcp_fsf_req *req, struct scsi_cmnd *scsi)
+ 
+ 	if (req->adapter->adapter_features & FSF_FEATURE_MEASUREMENT_DATA &&
+ 	    !(req->status & ZFCP_STATUS_FSFREQ_ERROR)) {
++		zfcp_sdev = sdev_to_zfcp(scsi->device);
+ 		blktrc.flags |= ZFCP_BLK_LAT_VALID;
+ 		blktrc.channel_lat = lat_in->channel_lat * ticks;
+ 		blktrc.fabric_lat = lat_in->fabric_lat * ticks;
+@@ -2002,12 +2009,14 @@ static void zfcp_fsf_fcp_handler_common(struct zfcp_fsf_req *req)
+ {
+ 	struct scsi_cmnd *scmnd = req->data;
+ 	struct scsi_device *sdev = scmnd->device;
+-	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
++	struct zfcp_scsi_dev *zfcp_sdev;
+ 	struct fsf_qtcb_header *header = &req->qtcb->header;
+ 
+ 	if (unlikely(req->status & ZFCP_STATUS_FSFREQ_ERROR))
+ 		return;
+ 
++	zfcp_sdev = sdev_to_zfcp(sdev);
++
+ 	switch (header->fsf_status) {
+ 	case FSF_HANDLE_MISMATCH:
+ 	case FSF_PORT_HANDLE_NOT_VALID:
+diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
+index e14da57..e76d003 100644
+--- a/drivers/s390/scsi/zfcp_qdio.c
++++ b/drivers/s390/scsi/zfcp_qdio.c
+@@ -102,18 +102,22 @@ static void zfcp_qdio_int_resp(struct ccw_device *cdev, unsigned int qdio_err,
+ {
+ 	struct zfcp_qdio *qdio = (struct zfcp_qdio *) parm;
+ 	struct zfcp_adapter *adapter = qdio->adapter;
+-	struct qdio_buffer_element *sbale;
+ 	int sbal_no, sbal_idx;
+-	void *pl[ZFCP_QDIO_MAX_SBALS_PER_REQ + 1];
+-	u64 req_id;
+-	u8 scount;
+ 
+ 	if (unlikely(qdio_err)) {
+-		memset(pl, 0, ZFCP_QDIO_MAX_SBALS_PER_REQ * sizeof(void *));
+ 		if (zfcp_adapter_multi_buffer_active(adapter)) {
++			void *pl[ZFCP_QDIO_MAX_SBALS_PER_REQ + 1];
++			struct qdio_buffer_element *sbale;
++			u64 req_id;
++			u8 scount;
++
++			memset(pl, 0,
++			       ZFCP_QDIO_MAX_SBALS_PER_REQ * sizeof(void *));
+ 			sbale = qdio->res_q[idx]->element;
+ 			req_id = (u64) sbale->addr;
+-			scount = sbale->scount + 1; /* incl. signaling SBAL */
++			scount = min(sbale->scount + 1,
++				     ZFCP_QDIO_MAX_SBALS_PER_REQ + 1);
++				     /* incl. signaling SBAL */
+ 
+ 			for (sbal_no = 0; sbal_no < scount; sbal_no++) {
+ 				sbal_idx = (idx + sbal_no) %
+diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c
+index cdc4ff7..9e62210 100644
+--- a/drivers/s390/scsi/zfcp_sysfs.c
++++ b/drivers/s390/scsi/zfcp_sysfs.c
+@@ -227,6 +227,8 @@ static ssize_t zfcp_sysfs_port_rescan_store(struct device *dev,
+ static ZFCP_DEV_ATTR(adapter, port_rescan, S_IWUSR, NULL,
+ 		     zfcp_sysfs_port_rescan_store);
+ 
++DEFINE_MUTEX(zfcp_sysfs_port_units_mutex);
++
+ static ssize_t zfcp_sysfs_port_remove_store(struct device *dev,
+ 					    struct device_attribute *attr,
+ 					    const char *buf, size_t count)
+@@ -249,6 +251,16 @@ static ssize_t zfcp_sysfs_port_remove_store(struct device *dev,
+ 	else
+ 		retval = 0;
+ 
++	mutex_lock(&zfcp_sysfs_port_units_mutex);
++	if (atomic_read(&port->units) > 0) {
++		retval = -EBUSY;
++		mutex_unlock(&zfcp_sysfs_port_units_mutex);
++		goto out;
++	}
++	/* port is about to be removed, so no more unit_add */
++	atomic_set(&port->units, -1);
++	mutex_unlock(&zfcp_sysfs_port_units_mutex);
++
+ 	write_lock_irq(&adapter->port_list_lock);
+ 	list_del(&port->list);
+ 	write_unlock_irq(&adapter->port_list_lock);
+@@ -289,12 +301,14 @@ static ssize_t zfcp_sysfs_unit_add_store(struct device *dev,
+ {
+ 	struct zfcp_port *port = container_of(dev, struct zfcp_port, dev);
+ 	u64 fcp_lun;
++	int retval;
+ 
+ 	if (strict_strtoull(buf, 0, (unsigned long long *) &fcp_lun))
+ 		return -EINVAL;
+ 
+-	if (zfcp_unit_add(port, fcp_lun))
+-		return -EINVAL;
++	retval = zfcp_unit_add(port, fcp_lun);
++	if (retval)
++		return retval;
+ 
+ 	return count;
+ }
+diff --git a/drivers/s390/scsi/zfcp_unit.c b/drivers/s390/scsi/zfcp_unit.c
+index 20796eb..4e6a535 100644
+--- a/drivers/s390/scsi/zfcp_unit.c
++++ b/drivers/s390/scsi/zfcp_unit.c
+@@ -104,7 +104,7 @@ static void zfcp_unit_release(struct device *dev)
+ {
+ 	struct zfcp_unit *unit = container_of(dev, struct zfcp_unit, dev);
+ 
+-	put_device(&unit->port->dev);
++	atomic_dec(&unit->port->units);
+ 	kfree(unit);
+ }
+ 
+@@ -119,16 +119,27 @@ static void zfcp_unit_release(struct device *dev)
+ int zfcp_unit_add(struct zfcp_port *port, u64 fcp_lun)
+ {
+ 	struct zfcp_unit *unit;
++	int retval = 0;
++
++	mutex_lock(&zfcp_sysfs_port_units_mutex);
++	if (atomic_read(&port->units) == -1) {
++		/* port is already gone */
++		retval = -ENODEV;
++		goto out;
++	}
+ 
+ 	unit = zfcp_unit_find(port, fcp_lun);
+ 	if (unit) {
+ 		put_device(&unit->dev);
+-		return -EEXIST;
++		retval = -EEXIST;
++		goto out;
+ 	}
+ 
+ 	unit = kzalloc(sizeof(struct zfcp_unit), GFP_KERNEL);
+-	if (!unit)
+-		return -ENOMEM;
++	if (!unit) {
++		retval = -ENOMEM;
++		goto out;
++	}
+ 
+ 	unit->port = port;
+ 	unit->fcp_lun = fcp_lun;
+@@ -139,28 +150,33 @@ int zfcp_unit_add(struct zfcp_port *port, u64 fcp_lun)
+ 	if (dev_set_name(&unit->dev, "0x%016llx",
+ 			 (unsigned long long) fcp_lun)) {
+ 		kfree(unit);
+-		return -ENOMEM;
++		retval = -ENOMEM;
++		goto out;
+ 	}
+ 
+-	get_device(&port->dev);
+-
+ 	if (device_register(&unit->dev)) {
+ 		put_device(&unit->dev);
+-		return -ENOMEM;
++		retval = -ENOMEM;
++		goto out;
+ 	}
+ 
+ 	if (sysfs_create_group(&unit->dev.kobj, &zfcp_sysfs_unit_attrs)) {
+ 		device_unregister(&unit->dev);
+-		return -EINVAL;
++		retval = -EINVAL;
++		goto out;
+ 	}
+ 
++	atomic_inc(&port->units); /* under zfcp_sysfs_port_units_mutex ! */
++
+ 	write_lock_irq(&port->unit_list_lock);
+ 	list_add_tail(&unit->list, &port->unit_list);
+ 	write_unlock_irq(&port->unit_list_lock);
+ 
+ 	zfcp_unit_scsi_scan(unit);
+ 
+-	return 0;
++out:
++	mutex_unlock(&zfcp_sysfs_port_units_mutex);
++	return retval;
+ }
+ 
+ /**
+diff --git a/drivers/scsi/atp870u.c b/drivers/scsi/atp870u.c
+index 7e6eca4..59fc5a1 100644
+--- a/drivers/scsi/atp870u.c
++++ b/drivers/scsi/atp870u.c
+@@ -1174,7 +1174,16 @@ wait_io1:
+ 	outw(val, tmport);
+ 	outb(2, 0x80);
+ TCM_SYNC:
+-	udelay(0x800);
++	/*
++	 * The funny division into multiple delays is to accomodate
++	 * arches like ARM where udelay() multiplies its argument by
++	 * a large number to initialize a loop counter.  To avoid
++	 * overflow, the maximum supported udelay is 2000 microseconds.
++	 *
++	 * XXX it would be more polite to find a way to use msleep()
++	 */
++	mdelay(2);
++	udelay(48);
+ 	if ((inb(tmport) & 0x80) == 0x00) {	/* bsy ? */
+ 		outw(0, tmport--);
+ 		outb(0, tmport);
+diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
+index 4ef0212..e5a4423 100644
+--- a/drivers/scsi/device_handler/scsi_dh_alua.c
++++ b/drivers/scsi/device_handler/scsi_dh_alua.c
+@@ -578,8 +578,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_dh_data *h)
+ 		h->state = TPGS_STATE_STANDBY;
+ 		break;
+ 	case TPGS_STATE_OFFLINE:
+-	case TPGS_STATE_UNAVAILABLE:
+-		/* Path unusable for unavailable/offline */
++		/* Path unusable */
+ 		err = SCSI_DH_DEV_OFFLINED;
+ 		break;
+ 	default:
+diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
+index be9aad8..22523aa 100644
+--- a/drivers/scsi/hpsa.c
++++ b/drivers/scsi/hpsa.c
+@@ -532,12 +532,42 @@ static void set_performant_mode(struct ctlr_info *h, struct CommandList *c)
+ 		c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
+ }
+ 
++static int is_firmware_flash_cmd(u8 *cdb)
++{
++	return cdb[0] == BMIC_WRITE && cdb[6] == BMIC_FLASH_FIRMWARE;
++}
++
++/*
++ * During firmware flash, the heartbeat register may not update as frequently
++ * as it should.  So we dial down lockup detection during firmware flash. and
++ * dial it back up when firmware flash completes.
++ */
++#define HEARTBEAT_SAMPLE_INTERVAL_DURING_FLASH (240 * HZ)
++#define HEARTBEAT_SAMPLE_INTERVAL (30 * HZ)
++static void dial_down_lockup_detection_during_fw_flash(struct ctlr_info *h,
++		struct CommandList *c)
++{
++	if (!is_firmware_flash_cmd(c->Request.CDB))
++		return;
++	atomic_inc(&h->firmware_flash_in_progress);
++	h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL_DURING_FLASH;
++}
++
++static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h,
++		struct CommandList *c)
++{
++	if (is_firmware_flash_cmd(c->Request.CDB) &&
++		atomic_dec_and_test(&h->firmware_flash_in_progress))
++		h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL;
++}
++
+ static void enqueue_cmd_and_start_io(struct ctlr_info *h,
+ 	struct CommandList *c)
+ {
+ 	unsigned long flags;
+ 
+ 	set_performant_mode(h, c);
++	dial_down_lockup_detection_during_fw_flash(h, c);
+ 	spin_lock_irqsave(&h->lock, flags);
+ 	addQ(&h->reqQ, c);
+ 	h->Qdepth++;
+@@ -2926,7 +2956,7 @@ static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
+ 			c->Request.Timeout = 0; /* Don't time out */
+ 			memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
+ 			c->Request.CDB[0] =  cmd;
+-			c->Request.CDB[1] = 0x03;  /* Reset target above */
++			c->Request.CDB[1] = HPSA_RESET_TYPE_LUN;
+ 			/* If bytes 4-7 are zero, it means reset the */
+ 			/* LunID device */
+ 			c->Request.CDB[4] = 0x00;
+@@ -3032,6 +3062,7 @@ static inline int bad_tag(struct ctlr_info *h, u32 tag_index,
+ static inline void finish_cmd(struct CommandList *c, u32 raw_tag)
+ {
+ 	removeQ(c);
++	dial_up_lockup_detection_on_fw_flash_complete(c->h, c);
+ 	if (likely(c->cmd_type == CMD_SCSI))
+ 		complete_scsi_command(c);
+ 	else if (c->cmd_type == CMD_IOCTL_PEND)
+@@ -4172,9 +4203,6 @@ static void controller_lockup_detected(struct ctlr_info *h)
+ 	spin_unlock_irqrestore(&h->lock, flags);
+ }
+ 
+-#define HEARTBEAT_SAMPLE_INTERVAL (10 * HZ)
+-#define HEARTBEAT_CHECK_MINIMUM_INTERVAL (HEARTBEAT_SAMPLE_INTERVAL / 2)
+-
+ static void detect_controller_lockup(struct ctlr_info *h)
+ {
+ 	u64 now;
+@@ -4185,7 +4213,7 @@ static void detect_controller_lockup(struct ctlr_info *h)
+ 	now = get_jiffies_64();
+ 	/* If we've received an interrupt recently, we're ok. */
+ 	if (time_after64(h->last_intr_timestamp +
+-				(HEARTBEAT_CHECK_MINIMUM_INTERVAL), now))
++				(h->heartbeat_sample_interval), now))
+ 		return;
+ 
+ 	/*
+@@ -4194,7 +4222,7 @@ static void detect_controller_lockup(struct ctlr_info *h)
+ 	 * otherwise don't care about signals in this thread.
+ 	 */
+ 	if (time_after64(h->last_heartbeat_timestamp +
+-				(HEARTBEAT_CHECK_MINIMUM_INTERVAL), now))
++				(h->heartbeat_sample_interval), now))
+ 		return;
+ 
+ 	/* If heartbeat has not changed since we last looked, we're not ok. */
+@@ -4236,6 +4264,7 @@ static void add_ctlr_to_lockup_detector_list(struct ctlr_info *h)
+ {
+ 	unsigned long flags;
+ 
++	h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL;
+ 	spin_lock_irqsave(&lockup_detector_lock, flags);
+ 	list_add_tail(&h->lockup_list, &hpsa_ctlr_list);
+ 	spin_unlock_irqrestore(&lockup_detector_lock, flags);
+diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
+index 91edafb..c721509 100644
+--- a/drivers/scsi/hpsa.h
++++ b/drivers/scsi/hpsa.h
+@@ -124,6 +124,8 @@ struct ctlr_info {
+ 	u64 last_intr_timestamp;
+ 	u32 last_heartbeat;
+ 	u64 last_heartbeat_timestamp;
++	u32 heartbeat_sample_interval;
++	atomic_t firmware_flash_in_progress;
+ 	u32 lockup_detected;
+ 	struct list_head lockup_list;
+ };
+diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
+index 3fd4715..e4ea0a3 100644
+--- a/drivers/scsi/hpsa_cmd.h
++++ b/drivers/scsi/hpsa_cmd.h
+@@ -163,6 +163,7 @@ struct SenseSubsystem_info {
+ #define BMIC_WRITE 0x27
+ #define BMIC_CACHE_FLUSH 0xc2
+ #define HPSA_CACHE_FLUSH 0x01	/* C2 was already being used by HPSA */
++#define BMIC_FLASH_FIRMWARE 0xF7
+ 
+ /* Command List Structure */
+ union SCSI3Addr {
+diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
+index 3d391dc..36aca4b 100644
+--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
++++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
+@@ -1547,6 +1547,9 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata,
+ 
+ 	host_config = &evt_struct->iu.mad.host_config;
+ 
++	/* The transport length field is only 16-bit */
++	length = min(0xffff, length);
++
+ 	/* Set up a lun reset SRP command */
+ 	memset(host_config, 0x00, sizeof(*host_config));
+ 	host_config->common.type = VIOSRP_HOST_CONFIG_TYPE;
+diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c
+index 83d08b6..5c8b0dc 100644
+--- a/drivers/scsi/isci/init.c
++++ b/drivers/scsi/isci/init.c
+@@ -469,7 +469,6 @@ static int __devinit isci_pci_probe(struct pci_dev *pdev, const struct pci_devic
+ 		if (sci_oem_parameters_validate(&orom->ctrl[i])) {
+ 			dev_warn(&pdev->dev,
+ 				 "[%d]: invalid oem parameters detected, falling back to firmware\n", i);
+-			devm_kfree(&pdev->dev, orom);
+ 			orom = NULL;
+ 			break;
+ 		}
+diff --git a/drivers/scsi/isci/probe_roms.c b/drivers/scsi/isci/probe_roms.c
+index b5f4341..7cd637d 100644
+--- a/drivers/scsi/isci/probe_roms.c
++++ b/drivers/scsi/isci/probe_roms.c
+@@ -104,7 +104,6 @@ struct isci_orom *isci_request_oprom(struct pci_dev *pdev)
+ 
+ 	if (i >= len) {
+ 		dev_err(&pdev->dev, "oprom parse error\n");
+-		devm_kfree(&pdev->dev, rom);
+ 		rom = NULL;
+ 	}
+ 	pci_unmap_biosrom(oprom);
+diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
+index bb7c482..08d48a3 100644
+--- a/drivers/scsi/scsi_sysfs.c
++++ b/drivers/scsi/scsi_sysfs.c
+@@ -1023,33 +1023,31 @@ static void __scsi_remove_target(struct scsi_target *starget)
+ void scsi_remove_target(struct device *dev)
+ {
+ 	struct Scsi_Host *shost = dev_to_shost(dev->parent);
+-	struct scsi_target *starget, *found;
++	struct scsi_target *starget, *last = NULL;
+ 	unsigned long flags;
+ 
+- restart:
+-	found = NULL;
++	/* remove targets being careful to lookup next entry before
++	 * deleting the last
++	 */
+ 	spin_lock_irqsave(shost->host_lock, flags);
+ 	list_for_each_entry(starget, &shost->__targets, siblings) {
+ 		if (starget->state == STARGET_DEL)
+ 			continue;
+ 		if (starget->dev.parent == dev || &starget->dev == dev) {
+-			found = starget;
+-			found->reap_ref++;
+-			break;
++			/* assuming new targets arrive at the end */
++			starget->reap_ref++;
++			spin_unlock_irqrestore(shost->host_lock, flags);
++			if (last)
++				scsi_target_reap(last);
++			last = starget;
++			__scsi_remove_target(starget);
++			spin_lock_irqsave(shost->host_lock, flags);
+ 		}
+ 	}
+ 	spin_unlock_irqrestore(shost->host_lock, flags);
+ 
+-	if (found) {
+-		__scsi_remove_target(found);
+-		scsi_target_reap(found);
+-		/* in the case where @dev has multiple starget children,
+-		 * continue removing.
+-		 *
+-		 * FIXME: does such a case exist?
+-		 */
+-		goto restart;
+-	}
++	if (last)
++		scsi_target_reap(last);
+ }
+ EXPORT_SYMBOL(scsi_remove_target);
+ 
+diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
+index 4ad2c0e..9465bce 100644
+--- a/drivers/staging/comedi/comedi_fops.c
++++ b/drivers/staging/comedi/comedi_fops.c
+@@ -843,7 +843,7 @@ static int parse_insn(struct comedi_device *dev, struct comedi_insn *insn,
+ 				ret = -EAGAIN;
+ 				break;
+ 			}
+-			ret = s->async->inttrig(dev, s, insn->data[0]);
++			ret = s->async->inttrig(dev, s, data[0]);
+ 			if (ret >= 0)
+ 				ret = 1;
+ 			break;
+@@ -1088,7 +1088,6 @@ static int do_cmd_ioctl(struct comedi_device *dev,
+ 		goto cleanup;
+ 	}
+ 
+-	kfree(async->cmd.chanlist);
+ 	async->cmd = user_cmd;
+ 	async->cmd.data = NULL;
+ 	/* load channel/gain list */
+@@ -1833,6 +1832,8 @@ void do_become_nonbusy(struct comedi_device *dev, struct comedi_subdevice *s)
+ 	if (async) {
+ 		comedi_reset_async_buf(async);
+ 		async->inttrig = NULL;
++		kfree(async->cmd.chanlist);
++		async->cmd.chanlist = NULL;
+ 	} else {
+ 		printk(KERN_ERR
+ 		       "BUG: (?) do_become_nonbusy called with async=0\n");
+diff --git a/drivers/staging/comedi/drivers/jr3_pci.c b/drivers/staging/comedi/drivers/jr3_pci.c
+index 8d98cf4..c8b7eed 100644
+--- a/drivers/staging/comedi/drivers/jr3_pci.c
++++ b/drivers/staging/comedi/drivers/jr3_pci.c
+@@ -913,7 +913,7 @@ static int jr3_pci_attach(struct comedi_device *dev,
+ 	}
+ 
+ 	/*  Reset DSP card */
+-	devpriv->iobase->channel[0].reset = 0;
++	writel(0, &devpriv->iobase->channel[0].reset);
+ 
+ 	result = comedi_load_firmware(dev, "jr3pci.idm", jr3_download_firmware);
+ 	printk("Firmare load %d\n", result);
+diff --git a/drivers/staging/comedi/drivers/s626.c b/drivers/staging/comedi/drivers/s626.c
+index 23fc64b..c72128f 100644
+--- a/drivers/staging/comedi/drivers/s626.c
++++ b/drivers/staging/comedi/drivers/s626.c
+@@ -2370,7 +2370,7 @@ static int s626_enc_insn_config(struct comedi_device *dev,
+ 	/*   (data==NULL) ? (Preloadvalue=0) : (Preloadvalue=data[0]); */
+ 
+ 	k->SetMode(dev, k, Setup, TRUE);
+-	Preload(dev, k, *(insn->data));
++	Preload(dev, k, data[0]);
+ 	k->PulseIndex(dev, k);
+ 	SetLatchSource(dev, k, valueSrclatch);
+ 	k->SetEnable(dev, k, (uint16_t) (enab != 0));
+diff --git a/drivers/staging/speakup/speakup_soft.c b/drivers/staging/speakup/speakup_soft.c
+index 42cdafe..b5130c8 100644
+--- a/drivers/staging/speakup/speakup_soft.c
++++ b/drivers/staging/speakup/speakup_soft.c
+@@ -40,7 +40,7 @@ static int softsynth_is_alive(struct spk_synth *synth);
+ static unsigned char get_index(void);
+ 
+ static struct miscdevice synth_device;
+-static int initialized;
++static int init_pos;
+ static int misc_registered;
+ 
+ static struct var_t vars[] = {
+@@ -194,7 +194,7 @@ static int softsynth_close(struct inode *inode, struct file *fp)
+ 	unsigned long flags;
+ 	spk_lock(flags);
+ 	synth_soft.alive = 0;
+-	initialized = 0;
++	init_pos = 0;
+ 	spk_unlock(flags);
+ 	/* Make sure we let applications go before leaving */
+ 	speakup_start_ttys();
+@@ -239,13 +239,8 @@ static ssize_t softsynth_read(struct file *fp, char *buf, size_t count,
+ 			ch = '\x18';
+ 		} else if (synth_buffer_empty()) {
+ 			break;
+-		} else if (!initialized) {
+-			if (*init) {
+-				ch = *init;
+-				init++;
+-			} else {
+-				initialized = 1;
+-			}
++		} else if (init[init_pos]) {
++			ch = init[init_pos++];
+ 		} else {
+ 			ch = synth_buffer_getc();
+ 		}
+diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
+index 2ff1255..f35cb10 100644
+--- a/drivers/target/iscsi/iscsi_target.c
++++ b/drivers/target/iscsi/iscsi_target.c
+@@ -3204,7 +3204,6 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd)
+ 		len += 1;
+ 
+ 		if ((len + payload_len) > buffer_len) {
+-			spin_unlock(&tiqn->tiqn_tpg_lock);
+ 			end_of_buf = 1;
+ 			goto eob;
+ 		}
+@@ -3357,6 +3356,7 @@ static int iscsit_send_reject(
+ 	hdr->opcode		= ISCSI_OP_REJECT;
+ 	hdr->flags		|= ISCSI_FLAG_CMD_FINAL;
+ 	hton24(hdr->dlength, ISCSI_HDR_LEN);
++	hdr->ffffffff		= 0xffffffff;
+ 	cmd->stat_sn		= conn->stat_sn++;
+ 	hdr->statsn		= cpu_to_be32(cmd->stat_sn);
+ 	hdr->exp_cmdsn	= cpu_to_be32(conn->sess->exp_cmd_sn);
+diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h
+index 0f68197..dae283f 100644
+--- a/drivers/target/iscsi/iscsi_target_core.h
++++ b/drivers/target/iscsi/iscsi_target_core.h
+@@ -25,10 +25,10 @@
+ #define NA_DATAOUT_TIMEOUT_RETRIES	5
+ #define NA_DATAOUT_TIMEOUT_RETRIES_MAX	15
+ #define NA_DATAOUT_TIMEOUT_RETRIES_MIN	1
+-#define NA_NOPIN_TIMEOUT		5
++#define NA_NOPIN_TIMEOUT		15
+ #define NA_NOPIN_TIMEOUT_MAX		60
+ #define NA_NOPIN_TIMEOUT_MIN		3
+-#define NA_NOPIN_RESPONSE_TIMEOUT	5
++#define NA_NOPIN_RESPONSE_TIMEOUT	30
+ #define NA_NOPIN_RESPONSE_TIMEOUT_MAX	60
+ #define NA_NOPIN_RESPONSE_TIMEOUT_MIN	3
+ #define NA_RANDOM_DATAIN_PDU_OFFSETS	0
+diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
+index d4cf2cd..309f14c 100644
+--- a/drivers/target/iscsi/iscsi_target_tpg.c
++++ b/drivers/target/iscsi/iscsi_target_tpg.c
+@@ -674,6 +674,12 @@ int iscsit_ta_generate_node_acls(
+ 	pr_debug("iSCSI_TPG[%hu] - Generate Initiator Portal Group ACLs: %s\n",
+ 		tpg->tpgt, (a->generate_node_acls) ? "Enabled" : "Disabled");
+ 
++	if (flag == 1 && a->cache_dynamic_acls == 0) {
++		pr_debug("Explicitly setting cache_dynamic_acls=1 when "
++			"generate_node_acls=1\n");
++		a->cache_dynamic_acls = 1;
++	}
++
+ 	return 0;
+ }
+ 
+@@ -713,6 +719,12 @@ int iscsit_ta_cache_dynamic_acls(
+ 		return -EINVAL;
+ 	}
+ 
++	if (a->generate_node_acls == 1 && flag == 0) {
++		pr_debug("Skipping cache_dynamic_acls=0 when"
++			" generate_node_acls=1\n");
++		return 0;
++	}
++
+ 	a->cache_dynamic_acls = flag;
+ 	pr_debug("iSCSI_TPG[%hu] - Cache Dynamic Initiator Portal Group"
+ 		" ACLs %s\n", tpg->tpgt, (a->cache_dynamic_acls) ?
+diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
+index 93d4f6a..0b01bfc 100644
+--- a/drivers/target/target_core_configfs.c
++++ b/drivers/target/target_core_configfs.c
+@@ -3123,6 +3123,7 @@ static int __init target_core_init_configfs(void)
+ 				GFP_KERNEL);
+ 	if (!target_cg->default_groups) {
+ 		pr_err("Unable to allocate target_cg->default_groups\n");
++		ret = -ENOMEM;
+ 		goto out_global;
+ 	}
+ 
+@@ -3138,6 +3139,7 @@ static int __init target_core_init_configfs(void)
+ 				GFP_KERNEL);
+ 	if (!hba_cg->default_groups) {
+ 		pr_err("Unable to allocate hba_cg->default_groups\n");
++		ret = -ENOMEM;
+ 		goto out_global;
+ 	}
+ 	config_group_init_type_name(&alua_group,
+@@ -3153,6 +3155,7 @@ static int __init target_core_init_configfs(void)
+ 			GFP_KERNEL);
+ 	if (!alua_cg->default_groups) {
+ 		pr_err("Unable to allocate alua_cg->default_groups\n");
++		ret = -ENOMEM;
+ 		goto out_global;
+ 	}
+ 
+@@ -3164,14 +3167,17 @@ static int __init target_core_init_configfs(void)
+ 	 * Add core/alua/lu_gps/default_lu_gp
+ 	 */
+ 	lu_gp = core_alua_allocate_lu_gp("default_lu_gp", 1);
+-	if (IS_ERR(lu_gp))
++	if (IS_ERR(lu_gp)) {
++		ret = -ENOMEM;
+ 		goto out_global;
++	}
+ 
+ 	lu_gp_cg = &alua_lu_gps_group;
+ 	lu_gp_cg->default_groups = kzalloc(sizeof(struct config_group) * 2,
+ 			GFP_KERNEL);
+ 	if (!lu_gp_cg->default_groups) {
+ 		pr_err("Unable to allocate lu_gp_cg->default_groups\n");
++		ret = -ENOMEM;
+ 		goto out_global;
+ 	}
+ 
+diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
+index 455a251..cafa477 100644
+--- a/drivers/target/target_core_file.c
++++ b/drivers/target/target_core_file.c
+@@ -139,6 +139,19 @@ static struct se_device *fd_create_virtdevice(
+ 	 * of pure timestamp updates.
+ 	 */
+ 	flags = O_RDWR | O_CREAT | O_LARGEFILE | O_DSYNC;
++	/*
++	 * Optionally allow fd_buffered_io=1 to be enabled for people
++	 * who want use the fs buffer cache as an WriteCache mechanism.
++	 *
++	 * This means that in event of a hard failure, there is a risk
++	 * of silent data-loss if the SCSI client has *not* performed a
++	 * forced unit access (FUA) write, or issued SYNCHRONIZE_CACHE
++	 * to write-out the entire device cache.
++	 */
++	if (fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE) {
++		pr_debug("FILEIO: Disabling O_DSYNC, using buffered FILEIO\n");
++		flags &= ~O_DSYNC;
++	}
+ 
+ 	file = filp_open(dev_p, flags, 0600);
+ 	if (IS_ERR(file)) {
+@@ -206,6 +219,12 @@ static struct se_device *fd_create_virtdevice(
+ 	if (!dev)
+ 		goto fail;
+ 
++	if (fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE) {
++		pr_debug("FILEIO: Forcing setting of emulate_write_cache=1"
++			" with FDBD_HAS_BUFFERED_IO_WCE\n");
++		dev->se_sub_dev->se_dev_attrib.emulate_write_cache = 1;
++	}
++
+ 	fd_dev->fd_dev_id = fd_host->fd_host_dev_id_count++;
+ 	fd_dev->fd_queue_depth = dev->queue_depth;
+ 
+@@ -450,6 +469,7 @@ enum {
+ static match_table_t tokens = {
+ 	{Opt_fd_dev_name, "fd_dev_name=%s"},
+ 	{Opt_fd_dev_size, "fd_dev_size=%s"},
++	{Opt_fd_buffered_io, "fd_buffered_io=%d"},
+ 	{Opt_err, NULL}
+ };
+ 
+@@ -461,7 +481,7 @@ static ssize_t fd_set_configfs_dev_params(
+ 	struct fd_dev *fd_dev = se_dev->se_dev_su_ptr;
+ 	char *orig, *ptr, *arg_p, *opts;
+ 	substring_t args[MAX_OPT_ARGS];
+-	int ret = 0, token;
++	int ret = 0, arg, token;
+ 
+ 	opts = kstrdup(page, GFP_KERNEL);
+ 	if (!opts)
+@@ -505,6 +525,19 @@ static ssize_t fd_set_configfs_dev_params(
+ 					" bytes\n", fd_dev->fd_dev_size);
+ 			fd_dev->fbd_flags |= FBDF_HAS_SIZE;
+ 			break;
++		case Opt_fd_buffered_io:
++			match_int(args, &arg);
++			if (arg != 1) {
++				pr_err("bogus fd_buffered_io=%d value\n", arg);
++				ret = -EINVAL;
++				goto out;
++			}
++
++			pr_debug("FILEIO: Using buffered I/O"
++				" operations for struct fd_dev\n");
++
++			fd_dev->fbd_flags |= FDBD_HAS_BUFFERED_IO_WCE;
++			break;
+ 		default:
+ 			break;
+ 		}
+@@ -536,8 +569,10 @@ static ssize_t fd_show_configfs_dev_params(
+ 	ssize_t bl = 0;
+ 
+ 	bl = sprintf(b + bl, "TCM FILEIO ID: %u", fd_dev->fd_dev_id);
+-	bl += sprintf(b + bl, "        File: %s  Size: %llu  Mode: O_DSYNC\n",
+-		fd_dev->fd_dev_name, fd_dev->fd_dev_size);
++	bl += sprintf(b + bl, "        File: %s  Size: %llu  Mode: %s\n",
++		fd_dev->fd_dev_name, fd_dev->fd_dev_size,
++		(fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE) ?
++		"Buffered-WCE" : "O_DSYNC");
+ 	return bl;
+ }
+ 
+diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h
+index 53ece69..6b1b6a9 100644
+--- a/drivers/target/target_core_file.h
++++ b/drivers/target/target_core_file.h
+@@ -18,6 +18,7 @@ struct fd_request {
+ 
+ #define FBDF_HAS_PATH		0x01
+ #define FBDF_HAS_SIZE		0x02
++#define FDBD_HAS_BUFFERED_IO_WCE 0x04
+ 
+ struct fd_dev {
+ 	u32		fbd_flags;
+diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
+index fc7bbba..d190269 100644
+--- a/drivers/tty/n_gsm.c
++++ b/drivers/tty/n_gsm.c
+@@ -108,7 +108,7 @@ struct gsm_mux_net {
+  */
+ 
+ struct gsm_msg {
+-	struct gsm_msg *next;
++	struct list_head list;
+ 	u8 addr;		/* DLCI address + flags */
+ 	u8 ctrl;		/* Control byte + flags */
+ 	unsigned int len;	/* Length of data block (can be zero) */
+@@ -245,8 +245,7 @@ struct gsm_mux {
+ 	unsigned int tx_bytes;		/* TX data outstanding */
+ #define TX_THRESH_HI		8192
+ #define TX_THRESH_LO		2048
+-	struct gsm_msg *tx_head;	/* Pending data packets */
+-	struct gsm_msg *tx_tail;
++	struct list_head tx_list;	/* Pending data packets */
+ 
+ 	/* Control messages */
+ 	struct timer_list t2_timer;	/* Retransmit timer for commands */
+@@ -663,7 +662,7 @@ static struct gsm_msg *gsm_data_alloc(struct gsm_mux *gsm, u8 addr, int len,
+ 	m->len = len;
+ 	m->addr = addr;
+ 	m->ctrl = ctrl;
+-	m->next = NULL;
++	INIT_LIST_HEAD(&m->list);
+ 	return m;
+ }
+ 
+@@ -673,22 +672,21 @@ static struct gsm_msg *gsm_data_alloc(struct gsm_mux *gsm, u8 addr, int len,
+  *
+  *	The tty device has called us to indicate that room has appeared in
+  *	the transmit queue. Ram more data into the pipe if we have any
++ *	If we have been flow-stopped by a CMD_FCOFF, then we can only
++ *	send messages on DLCI0 until CMD_FCON
+  *
+  *	FIXME: lock against link layer control transmissions
+  */
+ 
+ static void gsm_data_kick(struct gsm_mux *gsm)
+ {
+-	struct gsm_msg *msg = gsm->tx_head;
++	struct gsm_msg *msg, *nmsg;
+ 	int len;
+ 	int skip_sof = 0;
+ 
+-	/* FIXME: We need to apply this solely to data messages */
+-	if (gsm->constipated)
+-		return;
+-
+-	while (gsm->tx_head != NULL) {
+-		msg = gsm->tx_head;
++	list_for_each_entry_safe(msg, nmsg, &gsm->tx_list, list) {
++		if (gsm->constipated && msg->addr)
++			continue;
+ 		if (gsm->encoding != 0) {
+ 			gsm->txframe[0] = GSM1_SOF;
+ 			len = gsm_stuff_frame(msg->data,
+@@ -711,14 +709,13 @@ static void gsm_data_kick(struct gsm_mux *gsm)
+ 						len - skip_sof) < 0)
+ 			break;
+ 		/* FIXME: Can eliminate one SOF in many more cases */
+-		gsm->tx_head = msg->next;
+-		if (gsm->tx_head == NULL)
+-			gsm->tx_tail = NULL;
+ 		gsm->tx_bytes -= msg->len;
+-		kfree(msg);
+ 		/* For a burst of frames skip the extra SOF within the
+ 		   burst */
+ 		skip_sof = 1;
++
++		list_del(&msg->list);
++		kfree(msg);
+ 	}
+ }
+ 
+@@ -768,11 +765,7 @@ static void __gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg)
+ 	msg->data = dp;
+ 
+ 	/* Add to the actual output queue */
+-	if (gsm->tx_tail)
+-		gsm->tx_tail->next = msg;
+-	else
+-		gsm->tx_head = msg;
+-	gsm->tx_tail = msg;
++	list_add_tail(&msg->list, &gsm->tx_list);
+ 	gsm->tx_bytes += msg->len;
+ 	gsm_data_kick(gsm);
+ }
+@@ -875,7 +868,7 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm,
+ 
+ 	/* dlci->skb is locked by tx_lock */
+ 	if (dlci->skb == NULL) {
+-		dlci->skb = skb_dequeue(&dlci->skb_list);
++		dlci->skb = skb_dequeue_tail(&dlci->skb_list);
+ 		if (dlci->skb == NULL)
+ 			return 0;
+ 		first = 1;
+@@ -886,7 +879,7 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm,
+ 	if (len > gsm->mtu) {
+ 		if (dlci->adaption == 3) {
+ 			/* Over long frame, bin it */
+-			kfree_skb(dlci->skb);
++			dev_kfree_skb_any(dlci->skb);
+ 			dlci->skb = NULL;
+ 			return 0;
+ 		}
+@@ -899,8 +892,11 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm,
+ 
+ 	/* FIXME: need a timer or something to kick this so it can't
+ 	   get stuck with no work outstanding and no buffer free */
+-	if (msg == NULL)
++	if (msg == NULL) {
++		skb_queue_tail(&dlci->skb_list, dlci->skb);
++		dlci->skb = NULL;
+ 		return -ENOMEM;
++	}
+ 	dp = msg->data;
+ 
+ 	if (dlci->adaption == 4) { /* Interruptible framed (Packetised Data) */
+@@ -912,7 +908,7 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm,
+ 	skb_pull(dlci->skb, len);
+ 	__gsm_data_queue(dlci, msg);
+ 	if (last) {
+-		kfree_skb(dlci->skb);
++		dev_kfree_skb_any(dlci->skb);
+ 		dlci->skb = NULL;
+ 	}
+ 	return size;
+@@ -971,16 +967,22 @@ static void gsm_dlci_data_sweep(struct gsm_mux *gsm)
+ static void gsm_dlci_data_kick(struct gsm_dlci *dlci)
+ {
+ 	unsigned long flags;
++	int sweep;
++
++	if (dlci->constipated) 
++		return;
+ 
+ 	spin_lock_irqsave(&dlci->gsm->tx_lock, flags);
+ 	/* If we have nothing running then we need to fire up */
++	sweep = (dlci->gsm->tx_bytes < TX_THRESH_LO);
+ 	if (dlci->gsm->tx_bytes == 0) {
+ 		if (dlci->net)
+ 			gsm_dlci_data_output_framed(dlci->gsm, dlci);
+ 		else
+ 			gsm_dlci_data_output(dlci->gsm, dlci);
+-	} else if (dlci->gsm->tx_bytes < TX_THRESH_LO)
+-		gsm_dlci_data_sweep(dlci->gsm);
++	}
++	if (sweep)
++ 		gsm_dlci_data_sweep(dlci->gsm);
+ 	spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags);
+ }
+ 
+@@ -1027,6 +1029,7 @@ static void gsm_process_modem(struct tty_struct *tty, struct gsm_dlci *dlci,
+ {
+ 	int  mlines = 0;
+ 	u8 brk = 0;
++	int fc;
+ 
+ 	/* The modem status command can either contain one octet (v.24 signals)
+ 	   or two octets (v.24 signals + break signals). The length field will
+@@ -1038,19 +1041,21 @@ static void gsm_process_modem(struct tty_struct *tty, struct gsm_dlci *dlci,
+ 	else {
+ 		brk = modem & 0x7f;
+ 		modem = (modem >> 7) & 0x7f;
+-	};
++	}
+ 
+ 	/* Flow control/ready to communicate */
+-	if (modem & MDM_FC) {
++	fc = (modem & MDM_FC) || !(modem & MDM_RTR);
++	if (fc && !dlci->constipated) {
+ 		/* Need to throttle our output on this device */
+ 		dlci->constipated = 1;
+-	}
+-	if (modem & MDM_RTC) {
+-		mlines |= TIOCM_DSR | TIOCM_DTR;
++	} else if (!fc && dlci->constipated) {
+ 		dlci->constipated = 0;
+ 		gsm_dlci_data_kick(dlci);
+ 	}
++
+ 	/* Map modem bits */
++	if (modem & MDM_RTC)
++		mlines |= TIOCM_DSR | TIOCM_DTR;
+ 	if (modem & MDM_RTR)
+ 		mlines |= TIOCM_RTS | TIOCM_CTS;
+ 	if (modem & MDM_IC)
+@@ -1190,6 +1195,8 @@ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command,
+ 							u8 *data, int clen)
+ {
+ 	u8 buf[1];
++	unsigned long flags;
++
+ 	switch (command) {
+ 	case CMD_CLD: {
+ 		struct gsm_dlci *dlci = gsm->dlci[0];
+@@ -1206,16 +1213,18 @@ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command,
+ 		gsm_control_reply(gsm, CMD_TEST, data, clen);
+ 		break;
+ 	case CMD_FCON:
+-		/* Modem wants us to STFU */
+-		gsm->constipated = 1;
+-		gsm_control_reply(gsm, CMD_FCON, NULL, 0);
+-		break;
+-	case CMD_FCOFF:
+ 		/* Modem can accept data again */
+ 		gsm->constipated = 0;
+-		gsm_control_reply(gsm, CMD_FCOFF, NULL, 0);
++		gsm_control_reply(gsm, CMD_FCON, NULL, 0);
+ 		/* Kick the link in case it is idling */
++		spin_lock_irqsave(&gsm->tx_lock, flags);
+ 		gsm_data_kick(gsm);
++		spin_unlock_irqrestore(&gsm->tx_lock, flags);
++		break;
++	case CMD_FCOFF:
++		/* Modem wants us to STFU */
++		gsm->constipated = 1;
++		gsm_control_reply(gsm, CMD_FCOFF, NULL, 0);
+ 		break;
+ 	case CMD_MSC:
+ 		/* Out of band modem line change indicator for a DLCI */
+@@ -1668,7 +1677,7 @@ static void gsm_dlci_free(struct kref *ref)
+ 	dlci->gsm->dlci[dlci->addr] = NULL;
+ 	kfifo_free(dlci->fifo);
+ 	while ((dlci->skb = skb_dequeue(&dlci->skb_list)))
+-		kfree_skb(dlci->skb);
++		dev_kfree_skb(dlci->skb);
+ 	kfree(dlci);
+ }
+ 
+@@ -2007,7 +2016,7 @@ void gsm_cleanup_mux(struct gsm_mux *gsm)
+ {
+ 	int i;
+ 	struct gsm_dlci *dlci = gsm->dlci[0];
+-	struct gsm_msg *txq;
++	struct gsm_msg *txq, *ntxq;
+ 	struct gsm_control *gc;
+ 
+ 	gsm->dead = 1;
+@@ -2042,11 +2051,9 @@ void gsm_cleanup_mux(struct gsm_mux *gsm)
+ 		if (gsm->dlci[i])
+ 			gsm_dlci_release(gsm->dlci[i]);
+ 	/* Now wipe the queues */
+-	for (txq = gsm->tx_head; txq != NULL; txq = gsm->tx_head) {
+-		gsm->tx_head = txq->next;
++	list_for_each_entry_safe(txq, ntxq, &gsm->tx_list, list)
+ 		kfree(txq);
+-	}
+-	gsm->tx_tail = NULL;
++	INIT_LIST_HEAD(&gsm->tx_list);
+ }
+ EXPORT_SYMBOL_GPL(gsm_cleanup_mux);
+ 
+@@ -2157,6 +2164,7 @@ struct gsm_mux *gsm_alloc_mux(void)
+ 	}
+ 	spin_lock_init(&gsm->lock);
+ 	kref_init(&gsm->ref);
++	INIT_LIST_HEAD(&gsm->tx_list);
+ 
+ 	gsm->t1 = T1;
+ 	gsm->t2 = T2;
+@@ -2273,7 +2281,7 @@ static void gsmld_receive_buf(struct tty_struct *tty, const unsigned char *cp,
+ 			gsm->error(gsm, *dp, flags);
+ 			break;
+ 		default:
+-			WARN_ONCE("%s: unknown flag %d\n",
++			WARN_ONCE(1, "%s: unknown flag %d\n",
+ 			       tty_name(tty, buf), flags);
+ 			break;
+ 		}
+@@ -2377,12 +2385,12 @@ static void gsmld_write_wakeup(struct tty_struct *tty)
+ 
+ 	/* Queue poll */
+ 	clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
++	spin_lock_irqsave(&gsm->tx_lock, flags);
+ 	gsm_data_kick(gsm);
+ 	if (gsm->tx_bytes < TX_THRESH_LO) {
+-		spin_lock_irqsave(&gsm->tx_lock, flags);
+ 		gsm_dlci_data_sweep(gsm);
+-		spin_unlock_irqrestore(&gsm->tx_lock, flags);
+ 	}
++	spin_unlock_irqrestore(&gsm->tx_lock, flags);
+ }
+ 
+ /**
+@@ -2889,6 +2897,10 @@ static int gsmtty_open(struct tty_struct *tty, struct file *filp)
+ 	gsm = gsm_mux[mux];
+ 	if (gsm->dead)
+ 		return -EL2HLT;
++	/* If DLCI 0 is not yet fully open return an error. This is ok from a locking
++	   perspective as we don't have to worry about this if DLCI0 is lost */
++	if (gsm->dlci[0] && gsm->dlci[0]->state != DLCI_OPEN) 
++		return -EL2NSYNC;
+ 	dlci = gsm->dlci[line];
+ 	if (dlci == NULL)
+ 		dlci = gsm_dlci_alloc(gsm, line);
+diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
+index 39d6ab6..8481aae 100644
+--- a/drivers/tty/n_tty.c
++++ b/drivers/tty/n_tty.c
+@@ -1728,7 +1728,8 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
+ 
+ do_it_again:
+ 
+-	BUG_ON(!tty->read_buf);
++	if (WARN_ON(!tty->read_buf))
++		return -EAGAIN;
+ 
+ 	c = job_control(tty, file);
+ 	if (c < 0)
+diff --git a/drivers/tty/serial/8250_pci.c b/drivers/tty/serial/8250_pci.c
+index 482d51e..e7d82c1 100644
+--- a/drivers/tty/serial/8250_pci.c
++++ b/drivers/tty/serial/8250_pci.c
+@@ -1118,6 +1118,8 @@ pci_xr17c154_setup(struct serial_private *priv,
+ #define PCI_SUBDEVICE_ID_OCTPRO422	0x0208
+ #define PCI_SUBDEVICE_ID_POCTAL232	0x0308
+ #define PCI_SUBDEVICE_ID_POCTAL422	0x0408
++#define PCI_SUBDEVICE_ID_SIIG_DUAL_00	0x2500
++#define PCI_SUBDEVICE_ID_SIIG_DUAL_30	0x2530
+ #define PCI_VENDOR_ID_ADVANTECH		0x13fe
+ #define PCI_DEVICE_ID_INTEL_CE4100_UART 0x2e66
+ #define PCI_DEVICE_ID_ADVANTECH_PCI3620	0x3620
+@@ -3168,8 +3170,11 @@ static struct pci_device_id serial_pci_tbl[] = {
+ 		 * For now just used the hex ID 0x950a.
+ 		 */
+ 	{	PCI_VENDOR_ID_OXSEMI, 0x950a,
+-		PCI_SUBVENDOR_ID_SIIG, PCI_SUBDEVICE_ID_SIIG_DUAL_SERIAL, 0, 0,
+-		pbn_b0_2_115200 },
++		PCI_SUBVENDOR_ID_SIIG, PCI_SUBDEVICE_ID_SIIG_DUAL_00,
++		0, 0, pbn_b0_2_115200 },
++	{	PCI_VENDOR_ID_OXSEMI, 0x950a,
++		PCI_SUBVENDOR_ID_SIIG, PCI_SUBDEVICE_ID_SIIG_DUAL_30,
++		0, 0, pbn_b0_2_115200 },
+ 	{	PCI_VENDOR_ID_OXSEMI, 0x950a,
+ 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+ 		pbn_b0_2_1130000 },
+diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
+index 6da8cf8..fe9f111 100644
+--- a/drivers/tty/serial/amba-pl011.c
++++ b/drivers/tty/serial/amba-pl011.c
+@@ -1627,13 +1627,26 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios,
+ 			old_cr &= ~ST_UART011_CR_OVSFACT;
+ 	}
+ 
++	/*
++	 * Workaround for the ST Micro oversampling variants to
++	 * increase the bitrate slightly, by lowering the divisor,
++	 * to avoid delayed sampling of start bit at high speeds,
++	 * else we see data corruption.
++	 */
++	if (uap->vendor->oversampling) {
++		if ((baud >= 3000000) && (baud < 3250000) && (quot > 1))
++			quot -= 1;
++		else if ((baud > 3250000) && (quot > 2))
++			quot -= 2;
++	}
+ 	/* Set baud rate */
+ 	writew(quot & 0x3f, port->membase + UART011_FBRD);
+ 	writew(quot >> 6, port->membase + UART011_IBRD);
+ 
+ 	/*
+ 	 * ----------v----------v----------v----------v-----
+-	 * NOTE: MUST BE WRITTEN AFTER UARTLCR_M & UARTLCR_L
++	 * NOTE: lcrh_tx and lcrh_rx MUST BE WRITTEN AFTER
++	 * UART011_FBRD & UART011_IBRD.
+ 	 * ----------^----------^----------^----------^-----
+ 	 */
+ 	writew(lcr_h, port->membase + uap->lcrh_rx);
+diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
+index a40ab98..4cddbfc 100644
+--- a/drivers/usb/host/xhci-mem.c
++++ b/drivers/usb/host/xhci-mem.c
+@@ -1680,6 +1680,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci)
+ {
+ 	struct pci_dev	*pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller);
+ 	struct dev_info	*dev_info, *next;
++	struct xhci_cd  *cur_cd, *next_cd;
+ 	unsigned long	flags;
+ 	int size;
+ 	int i, j, num_ports;
+@@ -1701,6 +1702,11 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci)
+ 		xhci_ring_free(xhci, xhci->cmd_ring);
+ 	xhci->cmd_ring = NULL;
+ 	xhci_dbg(xhci, "Freed command ring\n");
++	list_for_each_entry_safe(cur_cd, next_cd,
++			&xhci->cancel_cmd_list, cancel_cmd_list) {
++		list_del(&cur_cd->cancel_cmd_list);
++		kfree(cur_cd);
++	}
+ 
+ 	for (i = 1; i < MAX_HC_SLOTS; ++i)
+ 		xhci_free_virt_device(xhci, i);
+@@ -2246,6 +2252,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
+ 	xhci->cmd_ring = xhci_ring_alloc(xhci, 1, true, false, flags);
+ 	if (!xhci->cmd_ring)
+ 		goto fail;
++	INIT_LIST_HEAD(&xhci->cancel_cmd_list);
+ 	xhci_dbg(xhci, "Allocated command ring at %p\n", xhci->cmd_ring);
+ 	xhci_dbg(xhci, "First segment DMA is 0x%llx\n",
+ 			(unsigned long long)xhci->cmd_ring->first_seg->dma);
+diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
+index bddcbfc..4ed7572 100644
+--- a/drivers/usb/host/xhci-pci.c
++++ b/drivers/usb/host/xhci-pci.c
+@@ -99,6 +99,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
+ 		 * PPT chipsets.
+ 		 */
+ 		xhci->quirks |= XHCI_SPURIOUS_REBOOT;
++		xhci->quirks |= XHCI_AVOID_BEI;
+ 	}
+ 	if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
+ 			pdev->device == PCI_DEVICE_ID_ASROCK_P67) {
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index c7c530c..950aef8 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -309,12 +309,123 @@ static int room_on_ring(struct xhci_hcd *xhci, struct xhci_ring *ring,
+ /* Ring the host controller doorbell after placing a command on the ring */
+ void xhci_ring_cmd_db(struct xhci_hcd *xhci)
+ {
++	if (!(xhci->cmd_ring_state & CMD_RING_STATE_RUNNING))
++		return;
++
+ 	xhci_dbg(xhci, "// Ding dong!\n");
+ 	xhci_writel(xhci, DB_VALUE_HOST, &xhci->dba->doorbell[0]);
+ 	/* Flush PCI posted writes */
+ 	xhci_readl(xhci, &xhci->dba->doorbell[0]);
+ }
+ 
++static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
++{
++	u64 temp_64;
++	int ret;
++
++	xhci_dbg(xhci, "Abort command ring\n");
++
++	if (!(xhci->cmd_ring_state & CMD_RING_STATE_RUNNING)) {
++		xhci_dbg(xhci, "The command ring isn't running, "
++				"Have the command ring been stopped?\n");
++		return 0;
++	}
++
++	temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
++	if (!(temp_64 & CMD_RING_RUNNING)) {
++		xhci_dbg(xhci, "Command ring had been stopped\n");
++		return 0;
++	}
++	xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
++	xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
++			&xhci->op_regs->cmd_ring);
++
++	/* Section 4.6.1.2 of xHCI 1.0 spec says software should
++	 * time the completion od all xHCI commands, including
++	 * the Command Abort operation. If software doesn't see
++	 * CRR negated in a timely manner (e.g. longer than 5
++	 * seconds), then it should assume that the there are
++	 * larger problems with the xHC and assert HCRST.
++	 */
++	ret = handshake(xhci, &xhci->op_regs->cmd_ring,
++			CMD_RING_RUNNING, 0, 5 * 1000 * 1000);
++	if (ret < 0) {
++		xhci_err(xhci, "Stopped the command ring failed, "
++				"maybe the host is dead\n");
++		xhci->xhc_state |= XHCI_STATE_DYING;
++		xhci_quiesce(xhci);
++		xhci_halt(xhci);
++		return -ESHUTDOWN;
++	}
++
++	return 0;
++}
++
++static int xhci_queue_cd(struct xhci_hcd *xhci,
++		struct xhci_command *command,
++		union xhci_trb *cmd_trb)
++{
++	struct xhci_cd *cd;
++	cd = kzalloc(sizeof(struct xhci_cd), GFP_ATOMIC);
++	if (!cd)
++		return -ENOMEM;
++	INIT_LIST_HEAD(&cd->cancel_cmd_list);
++
++	cd->command = command;
++	cd->cmd_trb = cmd_trb;
++	list_add_tail(&cd->cancel_cmd_list, &xhci->cancel_cmd_list);
++
++	return 0;
++}
++
++/*
++ * Cancel the command which has issue.
++ *
++ * Some commands may hang due to waiting for acknowledgement from
++ * usb device. It is outside of the xHC's ability to control and
++ * will cause the command ring is blocked. When it occurs software
++ * should intervene to recover the command ring.
++ * See Section 4.6.1.1 and 4.6.1.2
++ */
++int xhci_cancel_cmd(struct xhci_hcd *xhci, struct xhci_command *command,
++		union xhci_trb *cmd_trb)
++{
++	int retval = 0;
++	unsigned long flags;
++
++	spin_lock_irqsave(&xhci->lock, flags);
++
++	if (xhci->xhc_state & XHCI_STATE_DYING) {
++		xhci_warn(xhci, "Abort the command ring,"
++				" but the xHCI is dead.\n");
++		retval = -ESHUTDOWN;
++		goto fail;
++	}
++
++	/* queue the cmd desriptor to cancel_cmd_list */
++	retval = xhci_queue_cd(xhci, command, cmd_trb);
++	if (retval) {
++		xhci_warn(xhci, "Queuing command descriptor failed.\n");
++		goto fail;
++	}
++
++	/* abort command ring */
++	retval = xhci_abort_cmd_ring(xhci);
++	if (retval) {
++		xhci_err(xhci, "Abort command ring failed\n");
++		if (unlikely(retval == -ESHUTDOWN)) {
++			spin_unlock_irqrestore(&xhci->lock, flags);
++			usb_hc_died(xhci_to_hcd(xhci)->primary_hcd);
++			xhci_dbg(xhci, "xHCI host controller is dead.\n");
++			return retval;
++		}
++	}
++
++fail:
++	spin_unlock_irqrestore(&xhci->lock, flags);
++	return retval;
++}
++
+ void xhci_ring_ep_doorbell(struct xhci_hcd *xhci,
+ 		unsigned int slot_id,
+ 		unsigned int ep_index,
+@@ -1043,6 +1154,20 @@ static void handle_reset_ep_completion(struct xhci_hcd *xhci,
+ 	}
+ }
+ 
++/* Complete the command and detele it from the devcie's command queue.
++ */
++static void xhci_complete_cmd_in_cmd_wait_list(struct xhci_hcd *xhci,
++		struct xhci_command *command, u32 status)
++{
++	command->status = status;
++	list_del(&command->cmd_list);
++	if (command->completion)
++		complete(command->completion);
++	else
++		xhci_free_command(xhci, command);
++}
++
++
+ /* Check to see if a command in the device's command queue matches this one.
+  * Signal the completion or free the command, and return 1.  Return 0 if the
+  * completed command isn't at the head of the command list.
+@@ -1061,15 +1186,144 @@ static int handle_cmd_in_cmd_wait_list(struct xhci_hcd *xhci,
+ 	if (xhci->cmd_ring->dequeue != command->command_trb)
+ 		return 0;
+ 
+-	command->status = GET_COMP_CODE(le32_to_cpu(event->status));
+-	list_del(&command->cmd_list);
+-	if (command->completion)
+-		complete(command->completion);
+-	else
+-		xhci_free_command(xhci, command);
++	xhci_complete_cmd_in_cmd_wait_list(xhci, command,
++			GET_COMP_CODE(le32_to_cpu(event->status)));
+ 	return 1;
+ }
+ 
++/*
++ * Finding the command trb need to be cancelled and modifying it to
++ * NO OP command. And if the command is in device's command wait
++ * list, finishing and freeing it.
++ *
++ * If we can't find the command trb, we think it had already been
++ * executed.
++ */
++static void xhci_cmd_to_noop(struct xhci_hcd *xhci, struct xhci_cd *cur_cd)
++{
++	struct xhci_segment *cur_seg;
++	union xhci_trb *cmd_trb;
++	u32 cycle_state;
++
++	if (xhci->cmd_ring->dequeue == xhci->cmd_ring->enqueue)
++		return;
++
++	/* find the current segment of command ring */
++	cur_seg = find_trb_seg(xhci->cmd_ring->first_seg,
++			xhci->cmd_ring->dequeue, &cycle_state);
++
++	/* find the command trb matched by cd from command ring */
++	for (cmd_trb = xhci->cmd_ring->dequeue;
++			cmd_trb != xhci->cmd_ring->enqueue;
++			next_trb(xhci, xhci->cmd_ring, &cur_seg, &cmd_trb)) {
++		/* If the trb is link trb, continue */
++		if (TRB_TYPE_LINK_LE32(cmd_trb->generic.field[3]))
++			continue;
++
++		if (cur_cd->cmd_trb == cmd_trb) {
++
++			/* If the command in device's command list, we should
++			 * finish it and free the command structure.
++			 */
++			if (cur_cd->command)
++				xhci_complete_cmd_in_cmd_wait_list(xhci,
++					cur_cd->command, COMP_CMD_STOP);
++
++			/* get cycle state from the origin command trb */
++			cycle_state = le32_to_cpu(cmd_trb->generic.field[3])
++				& TRB_CYCLE;
++
++			/* modify the command trb to NO OP command */
++			cmd_trb->generic.field[0] = 0;
++			cmd_trb->generic.field[1] = 0;
++			cmd_trb->generic.field[2] = 0;
++			cmd_trb->generic.field[3] = cpu_to_le32(
++					TRB_TYPE(TRB_CMD_NOOP) | cycle_state);
++			break;
++		}
++	}
++}
++
++static void xhci_cancel_cmd_in_cd_list(struct xhci_hcd *xhci)
++{
++	struct xhci_cd *cur_cd, *next_cd;
++
++	if (list_empty(&xhci->cancel_cmd_list))
++		return;
++
++	list_for_each_entry_safe(cur_cd, next_cd,
++			&xhci->cancel_cmd_list, cancel_cmd_list) {
++		xhci_cmd_to_noop(xhci, cur_cd);
++		list_del(&cur_cd->cancel_cmd_list);
++		kfree(cur_cd);
++	}
++}
++
++/*
++ * traversing the cancel_cmd_list. If the command descriptor according
++ * to cmd_trb is found, the function free it and return 1, otherwise
++ * return 0.
++ */
++static int xhci_search_cmd_trb_in_cd_list(struct xhci_hcd *xhci,
++		union xhci_trb *cmd_trb)
++{
++	struct xhci_cd *cur_cd, *next_cd;
++
++	if (list_empty(&xhci->cancel_cmd_list))
++		return 0;
++
++	list_for_each_entry_safe(cur_cd, next_cd,
++			&xhci->cancel_cmd_list, cancel_cmd_list) {
++		if (cur_cd->cmd_trb == cmd_trb) {
++			if (cur_cd->command)
++				xhci_complete_cmd_in_cmd_wait_list(xhci,
++					cur_cd->command, COMP_CMD_STOP);
++			list_del(&cur_cd->cancel_cmd_list);
++			kfree(cur_cd);
++			return 1;
++		}
++	}
++
++	return 0;
++}
++
++/*
++ * If the cmd_trb_comp_code is COMP_CMD_ABORT, we just check whether the
++ * trb pointed by the command ring dequeue pointer is the trb we want to
++ * cancel or not. And if the cmd_trb_comp_code is COMP_CMD_STOP, we will
++ * traverse the cancel_cmd_list to trun the all of the commands according
++ * to command descriptor to NO-OP trb.
++ */
++static int handle_stopped_cmd_ring(struct xhci_hcd *xhci,
++		int cmd_trb_comp_code)
++{
++	int cur_trb_is_good = 0;
++
++	/* Searching the cmd trb pointed by the command ring dequeue
++	 * pointer in command descriptor list. If it is found, free it.
++	 */
++	cur_trb_is_good = xhci_search_cmd_trb_in_cd_list(xhci,
++			xhci->cmd_ring->dequeue);
++
++	if (cmd_trb_comp_code == COMP_CMD_ABORT)
++		xhci->cmd_ring_state = CMD_RING_STATE_STOPPED;
++	else if (cmd_trb_comp_code == COMP_CMD_STOP) {
++		/* traversing the cancel_cmd_list and canceling
++		 * the command according to command descriptor
++		 */
++		xhci_cancel_cmd_in_cd_list(xhci);
++
++		xhci->cmd_ring_state = CMD_RING_STATE_RUNNING;
++		/*
++		 * ring command ring doorbell again to restart the
++		 * command ring
++		 */
++		if (xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue)
++			xhci_ring_cmd_db(xhci);
++	}
++	return cur_trb_is_good;
++}
++
+ static void handle_cmd_completion(struct xhci_hcd *xhci,
+ 		struct xhci_event_cmd *event)
+ {
+@@ -1095,6 +1349,22 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
+ 		xhci->error_bitmask |= 1 << 5;
+ 		return;
+ 	}
++
++	if ((GET_COMP_CODE(le32_to_cpu(event->status)) == COMP_CMD_ABORT) ||
++		(GET_COMP_CODE(le32_to_cpu(event->status)) == COMP_CMD_STOP)) {
++		/* If the return value is 0, we think the trb pointed by
++		 * command ring dequeue pointer is a good trb. The good
++		 * trb means we don't want to cancel the trb, but it have
++		 * been stopped by host. So we should handle it normally.
++		 * Otherwise, driver should invoke inc_deq() and return.
++		 */
++		if (handle_stopped_cmd_ring(xhci,
++				GET_COMP_CODE(le32_to_cpu(event->status)))) {
++			inc_deq(xhci, xhci->cmd_ring, false);
++			return;
++		}
++	}
++
+ 	switch (le32_to_cpu(xhci->cmd_ring->dequeue->generic.field[3])
+ 		& TRB_TYPE_BITMASK) {
+ 	case TRB_TYPE(TRB_ENABLE_SLOT):
+@@ -3356,7 +3626,9 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
+ 			} else {
+ 				td->last_trb = ep_ring->enqueue;
+ 				field |= TRB_IOC;
+-				if (xhci->hci_version == 0x100) {
++				if (xhci->hci_version == 0x100 &&
++						!(xhci->quirks &
++							XHCI_AVOID_BEI)) {
+ 					/* Set BEI bit except for the last td */
+ 					if (i < num_tds - 1)
+ 						field |= TRB_BEI;
+diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
+index 09872ee..f5c0f38 100644
+--- a/drivers/usb/host/xhci.c
++++ b/drivers/usb/host/xhci.c
+@@ -52,7 +52,7 @@ MODULE_PARM_DESC(link_quirk, "Don't clear the chain bit on a link TRB");
+  * handshake done).  There are two failure modes:  "usec" have passed (major
+  * hardware flakeout), or the register reads as all-ones (hardware removed).
+  */
+-static int handshake(struct xhci_hcd *xhci, void __iomem *ptr,
++int handshake(struct xhci_hcd *xhci, void __iomem *ptr,
+ 		      u32 mask, u32 done, int usec)
+ {
+ 	u32	result;
+@@ -105,8 +105,12 @@ int xhci_halt(struct xhci_hcd *xhci)
+ 
+ 	ret = handshake(xhci, &xhci->op_regs->status,
+ 			STS_HALT, STS_HALT, XHCI_MAX_HALT_USEC);
+-	if (!ret)
++	if (!ret) {
+ 		xhci->xhc_state |= XHCI_STATE_HALTED;
++		xhci->cmd_ring_state = CMD_RING_STATE_STOPPED;
++	} else
++		xhci_warn(xhci, "Host not halted after %u microseconds.\n",
++				XHCI_MAX_HALT_USEC);
+ 	return ret;
+ }
+ 
+@@ -459,6 +463,8 @@ static bool compliance_mode_recovery_timer_quirk_check(void)
+ 
+ 	dmi_product_name = dmi_get_system_info(DMI_PRODUCT_NAME);
+ 	dmi_sys_vendor = dmi_get_system_info(DMI_SYS_VENDOR);
++	if (!dmi_product_name || !dmi_sys_vendor)
++		return false;
+ 
+ 	if (!(strstr(dmi_sys_vendor, "Hewlett-Packard")))
+ 		return false;
+@@ -570,6 +576,7 @@ static int xhci_run_finished(struct xhci_hcd *xhci)
+ 		return -ENODEV;
+ 	}
+ 	xhci->shared_hcd->state = HC_STATE_RUNNING;
++	xhci->cmd_ring_state = CMD_RING_STATE_RUNNING;
+ 
+ 	if (xhci->quirks & XHCI_NEC_HOST)
+ 		xhci_ring_cmd_db(xhci);
+@@ -874,7 +881,7 @@ int xhci_suspend(struct xhci_hcd *xhci)
+ 	command &= ~CMD_RUN;
+ 	xhci_writel(xhci, command, &xhci->op_regs->command);
+ 	if (handshake(xhci, &xhci->op_regs->status,
+-		      STS_HALT, STS_HALT, 100*100)) {
++		      STS_HALT, STS_HALT, XHCI_MAX_HALT_USEC)) {
+ 		xhci_warn(xhci, "WARN: xHC CMD_RUN timeout\n");
+ 		spin_unlock_irq(&xhci->lock);
+ 		return -ETIMEDOUT;
+@@ -2506,6 +2513,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci,
+ 	struct completion *cmd_completion;
+ 	u32 *cmd_status;
+ 	struct xhci_virt_device *virt_dev;
++	union xhci_trb *cmd_trb;
+ 
+ 	spin_lock_irqsave(&xhci->lock, flags);
+ 	virt_dev = xhci->devs[udev->slot_id];
+@@ -2551,6 +2559,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci,
+ 	}
+ 	init_completion(cmd_completion);
+ 
++	cmd_trb = xhci->cmd_ring->dequeue;
+ 	if (!ctx_change)
+ 		ret = xhci_queue_configure_endpoint(xhci, in_ctx->dma,
+ 				udev->slot_id, must_succeed);
+@@ -2572,14 +2581,17 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci,
+ 	/* Wait for the configure endpoint command to complete */
+ 	timeleft = wait_for_completion_interruptible_timeout(
+ 			cmd_completion,
+-			USB_CTRL_SET_TIMEOUT);
++			XHCI_CMD_DEFAULT_TIMEOUT);
+ 	if (timeleft <= 0) {
+ 		xhci_warn(xhci, "%s while waiting for %s command\n",
+ 				timeleft == 0 ? "Timeout" : "Signal",
+ 				ctx_change == 0 ?
+ 					"configure endpoint" :
+ 					"evaluate context");
+-		/* FIXME cancel the configure endpoint command */
++		/* cancel the configure endpoint command */
++		ret = xhci_cancel_cmd(xhci, command, cmd_trb);
++		if (ret < 0)
++			return ret;
+ 		return -ETIME;
+ 	}
+ 
+@@ -3528,8 +3540,10 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev)
+ 	unsigned long flags;
+ 	int timeleft;
+ 	int ret;
++	union xhci_trb *cmd_trb;
+ 
+ 	spin_lock_irqsave(&xhci->lock, flags);
++	cmd_trb = xhci->cmd_ring->dequeue;
+ 	ret = xhci_queue_slot_control(xhci, TRB_ENABLE_SLOT, 0);
+ 	if (ret) {
+ 		spin_unlock_irqrestore(&xhci->lock, flags);
+@@ -3541,12 +3555,12 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev)
+ 
+ 	/* XXX: how much time for xHC slot assignment? */
+ 	timeleft = wait_for_completion_interruptible_timeout(&xhci->addr_dev,
+-			USB_CTRL_SET_TIMEOUT);
++			XHCI_CMD_DEFAULT_TIMEOUT);
+ 	if (timeleft <= 0) {
+ 		xhci_warn(xhci, "%s while waiting for a slot\n",
+ 				timeleft == 0 ? "Timeout" : "Signal");
+-		/* FIXME cancel the enable slot request */
+-		return 0;
++		/* cancel the enable slot request */
++		return xhci_cancel_cmd(xhci, NULL, cmd_trb);
+ 	}
+ 
+ 	if (!xhci->slot_id) {
+@@ -3607,6 +3621,7 @@ int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev)
+ 	struct xhci_slot_ctx *slot_ctx;
+ 	struct xhci_input_control_ctx *ctrl_ctx;
+ 	u64 temp_64;
++	union xhci_trb *cmd_trb;
+ 
+ 	if (!udev->slot_id) {
+ 		xhci_dbg(xhci, "Bad Slot ID %d\n", udev->slot_id);
+@@ -3645,6 +3660,7 @@ int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev)
+ 	xhci_dbg_ctx(xhci, virt_dev->in_ctx, 2);
+ 
+ 	spin_lock_irqsave(&xhci->lock, flags);
++	cmd_trb = xhci->cmd_ring->dequeue;
+ 	ret = xhci_queue_address_device(xhci, virt_dev->in_ctx->dma,
+ 					udev->slot_id);
+ 	if (ret) {
+@@ -3657,7 +3673,7 @@ int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev)
+ 
+ 	/* ctrl tx can take up to 5 sec; XXX: need more time for xHC? */
+ 	timeleft = wait_for_completion_interruptible_timeout(&xhci->addr_dev,
+-			USB_CTRL_SET_TIMEOUT);
++			XHCI_CMD_DEFAULT_TIMEOUT);
+ 	/* FIXME: From section 4.3.4: "Software shall be responsible for timing
+ 	 * the SetAddress() "recovery interval" required by USB and aborting the
+ 	 * command on a timeout.
+@@ -3665,7 +3681,10 @@ int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev)
+ 	if (timeleft <= 0) {
+ 		xhci_warn(xhci, "%s while waiting for address device command\n",
+ 				timeleft == 0 ? "Timeout" : "Signal");
+-		/* FIXME cancel the address device command */
++		/* cancel the address device command */
++		ret = xhci_cancel_cmd(xhci, NULL, cmd_trb);
++		if (ret < 0)
++			return ret;
+ 		return -ETIME;
+ 	}
+ 
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
+index 44d518a..cc368c2 100644
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -1255,6 +1255,16 @@ struct xhci_td {
+ 	union xhci_trb		*last_trb;
+ };
+ 
++/* xHCI command default timeout value */
++#define XHCI_CMD_DEFAULT_TIMEOUT	(5 * HZ)
++
++/* command descriptor */
++struct xhci_cd {
++	struct list_head	cancel_cmd_list;
++	struct xhci_command	*command;
++	union xhci_trb		*cmd_trb;
++};
++
+ struct xhci_dequeue_state {
+ 	struct xhci_segment *new_deq_seg;
+ 	union xhci_trb *new_deq_ptr;
+@@ -1402,6 +1412,11 @@ struct xhci_hcd {
+ 	/* data structures */
+ 	struct xhci_device_context_array *dcbaa;
+ 	struct xhci_ring	*cmd_ring;
++	unsigned int            cmd_ring_state;
++#define CMD_RING_STATE_RUNNING         (1 << 0)
++#define CMD_RING_STATE_ABORTED         (1 << 1)
++#define CMD_RING_STATE_STOPPED         (1 << 2)
++	struct list_head        cancel_cmd_list;
+ 	unsigned int		cmd_ring_reserved_trbs;
+ 	struct xhci_ring	*event_ring;
+ 	struct xhci_erst	erst;
+@@ -1473,6 +1488,7 @@ struct xhci_hcd {
+ #define XHCI_TRUST_TX_LENGTH	(1 << 10)
+ #define XHCI_SPURIOUS_REBOOT	(1 << 13)
+ #define XHCI_COMP_MODE_QUIRK	(1 << 14)
++#define XHCI_AVOID_BEI		(1 << 15)
+ 	unsigned int		num_active_eps;
+ 	unsigned int		limit_active_eps;
+ 	/* There are two roothubs to keep track of bus suspend info for */
+@@ -1666,6 +1682,8 @@ static inline void xhci_unregister_pci(void) {}
+ 
+ /* xHCI host controller glue */
+ typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *);
++int handshake(struct xhci_hcd *xhci, void __iomem *ptr,
++		u32 mask, u32 done, int usec);
+ void xhci_quiesce(struct xhci_hcd *xhci);
+ int xhci_halt(struct xhci_hcd *xhci);
+ int xhci_reset(struct xhci_hcd *xhci);
+@@ -1756,6 +1774,8 @@ void xhci_queue_config_ep_quirk(struct xhci_hcd *xhci,
+ 		unsigned int slot_id, unsigned int ep_index,
+ 		struct xhci_dequeue_state *deq_state);
+ void xhci_stop_endpoint_command_watchdog(unsigned long arg);
++int xhci_cancel_cmd(struct xhci_hcd *xhci, struct xhci_command *command,
++		union xhci_trb *cmd_trb);
+ void xhci_ring_ep_doorbell(struct xhci_hcd *xhci, unsigned int slot_id,
+ 		unsigned int ep_index, unsigned int stream_id);
+ 
+diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
+index 7324bea..e29a664 100644
+--- a/drivers/usb/serial/ftdi_sio.c
++++ b/drivers/usb/serial/ftdi_sio.c
+@@ -584,6 +584,8 @@ static struct usb_device_id id_table_combined [] = {
+ 	{ USB_DEVICE(FTDI_VID, FTDI_IBS_PEDO_PID) },
+ 	{ USB_DEVICE(FTDI_VID, FTDI_IBS_PROD_PID) },
+ 	{ USB_DEVICE(FTDI_VID, FTDI_TAVIR_STK500_PID) },
++	{ USB_DEVICE(FTDI_VID, FTDI_TIAO_UMPA_PID),
++		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
+ 	/*
+ 	 * ELV devices:
+ 	 */
+diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
+index 06f6fd2..7b5eb74 100644
+--- a/drivers/usb/serial/ftdi_sio_ids.h
++++ b/drivers/usb/serial/ftdi_sio_ids.h
+@@ -517,6 +517,11 @@
+  */
+ #define FTDI_TAVIR_STK500_PID	0xFA33	/* STK500 AVR programmer */
+ 
++/*
++ * TIAO product ids (FTDI_VID)
++ * http://www.tiaowiki.com/w/Main_Page
++ */
++#define FTDI_TIAO_UMPA_PID	0x8a98	/* TIAO/DIYGADGET USB Multi-Protocol Adapter */
+ 
+ 
+ /********************************/
+diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
+index c068b4d..3fd4e6f 100644
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -870,7 +870,8 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0153, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0155, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0156, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0157, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0157, 0xff, 0xff, 0xff),
++	  .driver_info = (kernel_ulong_t)&net_intf5_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0158, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0159, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0161, 0xff, 0xff, 0xff) },
+diff --git a/drivers/usb/serial/qcaux.c b/drivers/usb/serial/qcaux.c
+index a348198..87271e3 100644
+--- a/drivers/usb/serial/qcaux.c
++++ b/drivers/usb/serial/qcaux.c
+@@ -36,8 +36,6 @@
+ #define UTSTARCOM_PRODUCT_UM175_V1		0x3712
+ #define UTSTARCOM_PRODUCT_UM175_V2		0x3714
+ #define UTSTARCOM_PRODUCT_UM175_ALLTEL		0x3715
+-#define PANTECH_PRODUCT_UML190_VZW		0x3716
+-#define PANTECH_PRODUCT_UML290_VZW		0x3718
+ 
+ /* CMOTECH devices */
+ #define CMOTECH_VENDOR_ID			0x16d8
+@@ -68,11 +66,9 @@ static struct usb_device_id id_table[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(LG_VENDOR_ID, LG_PRODUCT_VX4400_6000, 0xff, 0xff, 0x00) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(SANYO_VENDOR_ID, SANYO_PRODUCT_KATANA_LX, 0xff, 0xff, 0x00) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_U520, 0xff, 0x00, 0x00) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(UTSTARCOM_VENDOR_ID, PANTECH_PRODUCT_UML190_VZW, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(UTSTARCOM_VENDOR_ID, PANTECH_PRODUCT_UML190_VZW, 0xff, 0xfe, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(UTSTARCOM_VENDOR_ID, PANTECH_PRODUCT_UML290_VZW, 0xff, 0xfd, 0xff) },  /* NMEA */
+-	{ USB_DEVICE_AND_INTERFACE_INFO(UTSTARCOM_VENDOR_ID, PANTECH_PRODUCT_UML290_VZW, 0xff, 0xfe, 0xff) },  /* WMC */
+-	{ USB_DEVICE_AND_INTERFACE_INFO(UTSTARCOM_VENDOR_ID, PANTECH_PRODUCT_UML290_VZW, 0xff, 0xff, 0xff) },  /* DIAG */
++	{ USB_VENDOR_AND_INTERFACE_INFO(UTSTARCOM_VENDOR_ID, 0xff, 0xfd, 0xff) },  /* NMEA */
++	{ USB_VENDOR_AND_INTERFACE_INFO(UTSTARCOM_VENDOR_ID, 0xff, 0xfe, 0xff) },  /* WMC */
++	{ USB_VENDOR_AND_INTERFACE_INFO(UTSTARCOM_VENDOR_ID, 0xff, 0xff, 0xff) },  /* DIAG */
+ 	{ },
+ };
+ MODULE_DEVICE_TABLE(usb, id_table);
+diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
+index f55ae23..790fa63 100644
+--- a/fs/autofs4/root.c
++++ b/fs/autofs4/root.c
+@@ -392,10 +392,12 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
+ 		ino->flags |= AUTOFS_INF_PENDING;
+ 		spin_unlock(&sbi->fs_lock);
+ 		status = autofs4_mount_wait(dentry);
+-		if (status)
+-			return ERR_PTR(status);
+ 		spin_lock(&sbi->fs_lock);
+ 		ino->flags &= ~AUTOFS_INF_PENDING;
++		if (status) {
++			spin_unlock(&sbi->fs_lock);
++			return ERR_PTR(status);
++		}
+ 	}
+ done:
+ 	if (!(ino->flags & AUTOFS_INF_EXPIRING)) {
+diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
+index 6ff96c6..8dd615c 100644
+--- a/fs/binfmt_elf.c
++++ b/fs/binfmt_elf.c
+@@ -1668,30 +1668,19 @@ static int elf_note_info_init(struct elf_note_info *info)
+ 		return 0;
+ 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
+ 	if (!info->psinfo)
+-		goto notes_free;
++		return 0;
+ 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
+ 	if (!info->prstatus)
+-		goto psinfo_free;
++		return 0;
+ 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
+ 	if (!info->fpu)
+-		goto prstatus_free;
++		return 0;
+ #ifdef ELF_CORE_COPY_XFPREGS
+ 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
+ 	if (!info->xfpu)
+-		goto fpu_free;
++		return 0;
+ #endif
+ 	return 1;
+-#ifdef ELF_CORE_COPY_XFPREGS
+- fpu_free:
+-	kfree(info->fpu);
+-#endif
+- prstatus_free:
+-	kfree(info->prstatus);
+- psinfo_free:
+-	kfree(info->psinfo);
+- notes_free:
+-	kfree(info->notes);
+-	return 0;
+ }
+ 
+ static int fill_note_info(struct elfhdr *elf, int phdrs,
+diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
+index a9f29b1..2262a77 100644
+--- a/fs/ecryptfs/ecryptfs_kernel.h
++++ b/fs/ecryptfs/ecryptfs_kernel.h
+@@ -559,6 +559,8 @@ struct ecryptfs_open_req {
+ struct inode *ecryptfs_get_inode(struct inode *lower_inode,
+ 				 struct super_block *sb);
+ void ecryptfs_i_size_init(const char *page_virt, struct inode *inode);
++int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry,
++			     struct inode *ecryptfs_inode);
+ int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
+ 					 size_t *decrypted_name_size,
+ 					 struct dentry *ecryptfs_dentry,
+diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
+index d3f95f9..841f24f 100644
+--- a/fs/ecryptfs/file.c
++++ b/fs/ecryptfs/file.c
+@@ -139,29 +139,50 @@ out:
+ 	return rc;
+ }
+ 
+-static void ecryptfs_vma_close(struct vm_area_struct *vma)
+-{
+-	filemap_write_and_wait(vma->vm_file->f_mapping);
+-}
+-
+-static const struct vm_operations_struct ecryptfs_file_vm_ops = {
+-	.close		= ecryptfs_vma_close,
+-	.fault		= filemap_fault,
+-};
++struct kmem_cache *ecryptfs_file_info_cache;
+ 
+-static int ecryptfs_file_mmap(struct file *file, struct vm_area_struct *vma)
++static int read_or_initialize_metadata(struct dentry *dentry)
+ {
++	struct inode *inode = dentry->d_inode;
++	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
++	struct ecryptfs_crypt_stat *crypt_stat;
+ 	int rc;
+ 
+-	rc = generic_file_mmap(file, vma);
++	crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
++	mount_crypt_stat = &ecryptfs_superblock_to_private(
++						inode->i_sb)->mount_crypt_stat;
++	mutex_lock(&crypt_stat->cs_mutex);
++
++	if (crypt_stat->flags & ECRYPTFS_POLICY_APPLIED &&
++	    crypt_stat->flags & ECRYPTFS_KEY_VALID) {
++		rc = 0;
++		goto out;
++	}
++
++	rc = ecryptfs_read_metadata(dentry);
+ 	if (!rc)
+-		vma->vm_ops = &ecryptfs_file_vm_ops;
++		goto out;
++
++	if (mount_crypt_stat->flags & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED) {
++		crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED
++				       | ECRYPTFS_ENCRYPTED);
++		rc = 0;
++		goto out;
++	}
++
++	if (!(mount_crypt_stat->flags & ECRYPTFS_XATTR_METADATA_ENABLED) &&
++	    !i_size_read(ecryptfs_inode_to_lower(inode))) {
++		rc = ecryptfs_initialize_file(dentry, inode);
++		if (!rc)
++			goto out;
++	}
+ 
++	rc = -EIO;
++out:
++	mutex_unlock(&crypt_stat->cs_mutex);
+ 	return rc;
+ }
+ 
+-struct kmem_cache *ecryptfs_file_info_cache;
+-
+ /**
+  * ecryptfs_open
+  * @inode: inode speciying file to open
+@@ -237,32 +258,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
+ 		rc = 0;
+ 		goto out;
+ 	}
+-	mutex_lock(&crypt_stat->cs_mutex);
+-	if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
+-	    || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
+-		rc = ecryptfs_read_metadata(ecryptfs_dentry);
+-		if (rc) {
+-			ecryptfs_printk(KERN_DEBUG,
+-					"Valid headers not found\n");
+-			if (!(mount_crypt_stat->flags
+-			      & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
+-				rc = -EIO;
+-				printk(KERN_WARNING "Either the lower file "
+-				       "is not in a valid eCryptfs format, "
+-				       "or the key could not be retrieved. "
+-				       "Plaintext passthrough mode is not "
+-				       "enabled; returning -EIO\n");
+-				mutex_unlock(&crypt_stat->cs_mutex);
+-				goto out_put;
+-			}
+-			rc = 0;
+-			crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED
+-					       | ECRYPTFS_ENCRYPTED);
+-			mutex_unlock(&crypt_stat->cs_mutex);
+-			goto out;
+-		}
+-	}
+-	mutex_unlock(&crypt_stat->cs_mutex);
++	rc = read_or_initialize_metadata(ecryptfs_dentry);
++	if (rc)
++		goto out_put;
+ 	ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = "
+ 			"[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino,
+ 			(unsigned long long)i_size_read(inode));
+@@ -278,8 +276,14 @@ out:
+ 
+ static int ecryptfs_flush(struct file *file, fl_owner_t td)
+ {
+-	return file->f_mode & FMODE_WRITE
+-	       ? filemap_write_and_wait(file->f_mapping) : 0;
++	struct file *lower_file = ecryptfs_file_to_lower(file);
++
++	if (lower_file->f_op && lower_file->f_op->flush) {
++		filemap_write_and_wait(file->f_mapping);
++		return lower_file->f_op->flush(lower_file, td);
++	}
++
++	return 0;
+ }
+ 
+ static int ecryptfs_release(struct inode *inode, struct file *file)
+@@ -293,15 +297,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
+ static int
+ ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+ {
+-	int rc = 0;
+-
+-	rc = generic_file_fsync(file, start, end, datasync);
+-	if (rc)
+-		goto out;
+-	rc = vfs_fsync_range(ecryptfs_file_to_lower(file), start, end,
+-			     datasync);
+-out:
+-	return rc;
++	return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
+ }
+ 
+ static int ecryptfs_fasync(int fd, struct file *file, int flag)
+@@ -370,7 +366,7 @@ const struct file_operations ecryptfs_main_fops = {
+ #ifdef CONFIG_COMPAT
+ 	.compat_ioctl = ecryptfs_compat_ioctl,
+ #endif
+-	.mmap = ecryptfs_file_mmap,
++	.mmap = generic_file_mmap,
+ 	.open = ecryptfs_open,
+ 	.flush = ecryptfs_flush,
+ 	.release = ecryptfs_release,
+diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
+index 7c7556b..a9be90d 100644
+--- a/fs/ecryptfs/inode.c
++++ b/fs/ecryptfs/inode.c
+@@ -161,6 +161,31 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
+ 	return vfs_create(lower_dir_inode, lower_dentry, mode, NULL);
+ }
+ 
++static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
++			      struct inode *inode)
++{
++	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
++	struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
++	struct dentry *lower_dir_dentry;
++	int rc;
++
++	dget(lower_dentry);
++	lower_dir_dentry = lock_parent(lower_dentry);
++	rc = vfs_unlink(lower_dir_inode, lower_dentry);
++	if (rc) {
++		printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
++		goto out_unlock;
++	}
++	fsstack_copy_attr_times(dir, lower_dir_inode);
++	set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink);
++	inode->i_ctime = dir->i_ctime;
++	d_drop(dentry);
++out_unlock:
++	unlock_dir(lower_dir_dentry);
++	dput(lower_dentry);
++	return rc;
++}
++
+ /**
+  * ecryptfs_do_create
+  * @directory_inode: inode of the new file's dentry's parent in ecryptfs
+@@ -201,8 +226,10 @@ ecryptfs_do_create(struct inode *directory_inode,
+ 	}
+ 	inode = __ecryptfs_get_inode(lower_dentry->d_inode,
+ 				     directory_inode->i_sb);
+-	if (IS_ERR(inode))
++	if (IS_ERR(inode)) {
++		vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
+ 		goto out_lock;
++	}
+ 	fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
+ 	fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode);
+ out_lock:
+@@ -219,8 +246,8 @@ out:
+  *
+  * Returns zero on success
+  */
+-static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry,
+-				    struct inode *ecryptfs_inode)
++int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry,
++			     struct inode *ecryptfs_inode)
+ {
+ 	struct ecryptfs_crypt_stat *crypt_stat =
+ 		&ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
+@@ -284,7 +311,9 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
+ 	 * that this on disk file is prepared to be an ecryptfs file */
+ 	rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode);
+ 	if (rc) {
+-		drop_nlink(ecryptfs_inode);
++		ecryptfs_do_unlink(directory_inode, ecryptfs_dentry,
++				   ecryptfs_inode);
++		make_bad_inode(ecryptfs_inode);
+ 		unlock_new_inode(ecryptfs_inode);
+ 		iput(ecryptfs_inode);
+ 		goto out;
+@@ -496,27 +525,7 @@ out_lock:
+ 
+ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
+ {
+-	int rc = 0;
+-	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+-	struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
+-	struct dentry *lower_dir_dentry;
+-
+-	dget(lower_dentry);
+-	lower_dir_dentry = lock_parent(lower_dentry);
+-	rc = vfs_unlink(lower_dir_inode, lower_dentry);
+-	if (rc) {
+-		printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
+-		goto out_unlock;
+-	}
+-	fsstack_copy_attr_times(dir, lower_dir_inode);
+-	set_nlink(dentry->d_inode,
+-		  ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink);
+-	dentry->d_inode->i_ctime = dir->i_ctime;
+-	d_drop(dentry);
+-out_unlock:
+-	unlock_dir(lower_dir_dentry);
+-	dput(lower_dentry);
+-	return rc;
++	return ecryptfs_do_unlink(dir, dentry, dentry->d_inode);
+ }
+ 
+ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
+@@ -1026,12 +1035,6 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
+ 			goto out;
+ 	}
+ 
+-	if (S_ISREG(inode->i_mode)) {
+-		rc = filemap_write_and_wait(inode->i_mapping);
+-		if (rc)
+-			goto out;
+-		fsstack_copy_attr_all(inode, lower_inode);
+-	}
+ 	memcpy(&lower_ia, ia, sizeof(lower_ia));
+ 	if (ia->ia_valid & ATTR_FILE)
+ 		lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
+diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
+index b4a6bef..1cfef9f 100644
+--- a/fs/ecryptfs/main.c
++++ b/fs/ecryptfs/main.c
+@@ -162,6 +162,7 @@ void ecryptfs_put_lower_file(struct inode *inode)
+ 	inode_info = ecryptfs_inode_to_private(inode);
+ 	if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count,
+ 				      &inode_info->lower_file_mutex)) {
++		filemap_write_and_wait(inode->i_mapping);
+ 		fput(inode_info->lower_file);
+ 		inode_info->lower_file = NULL;
+ 		mutex_unlock(&inode_info->lower_file_mutex);
+diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
+index 6a44148..93a998a 100644
+--- a/fs/ecryptfs/mmap.c
++++ b/fs/ecryptfs/mmap.c
+@@ -62,18 +62,6 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
+ {
+ 	int rc;
+ 
+-	/*
+-	 * Refuse to write the page out if we are called from reclaim context
+-	 * since our writepage() path may potentially allocate memory when
+-	 * calling into the lower fs vfs_write() which may in turn invoke
+-	 * us again.
+-	 */
+-	if (current->flags & PF_MEMALLOC) {
+-		redirty_page_for_writepage(wbc, page);
+-		rc = 0;
+-		goto out;
+-	}
+-
+ 	rc = ecryptfs_encrypt_page(page);
+ 	if (rc) {
+ 		ecryptfs_printk(KERN_WARNING, "Error encrypting "
+@@ -498,7 +486,6 @@ static int ecryptfs_write_end(struct file *file,
+ 	struct ecryptfs_crypt_stat *crypt_stat =
+ 		&ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
+ 	int rc;
+-	int need_unlock_page = 1;
+ 
+ 	ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
+ 			"(page w/ index = [0x%.16lx], to = [%d])\n", index, to);
+@@ -519,26 +506,26 @@ static int ecryptfs_write_end(struct file *file,
+ 			"zeros in page with index = [0x%.16lx]\n", index);
+ 		goto out;
+ 	}
+-	set_page_dirty(page);
+-	unlock_page(page);
+-	need_unlock_page = 0;
++	rc = ecryptfs_encrypt_page(page);
++	if (rc) {
++		ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
++				"index [0x%.16lx])\n", index);
++		goto out;
++	}
+ 	if (pos + copied > i_size_read(ecryptfs_inode)) {
+ 		i_size_write(ecryptfs_inode, pos + copied);
+ 		ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
+ 			"[0x%.16llx]\n",
+ 			(unsigned long long)i_size_read(ecryptfs_inode));
+-		balance_dirty_pages_ratelimited(mapping);
+-		rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
+-		if (rc) {
+-			printk(KERN_ERR "Error writing inode size to metadata; "
+-			       "rc = [%d]\n", rc);
+-			goto out;
+-		}
+ 	}
+-	rc = copied;
++	rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
++	if (rc)
++		printk(KERN_ERR "Error writing inode size to metadata; "
++		       "rc = [%d]\n", rc);
++	else
++		rc = copied;
+ out:
+-	if (need_unlock_page)
+-		unlock_page(page);
++	unlock_page(page);
+ 	page_cache_release(page);
+ 	return rc;
+ }
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 8b01f9f..bac2330 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2382,6 +2382,16 @@ static int ext4_nonda_switch(struct super_block *sb)
+ 	free_blocks  = EXT4_C2B(sbi,
+ 		percpu_counter_read_positive(&sbi->s_freeclusters_counter));
+ 	dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
++	/*
++	 * Start pushing delalloc when 1/2 of free blocks are dirty.
++	 */
++	if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
++	    !writeback_in_progress(sb->s_bdi) &&
++	    down_read_trylock(&sb->s_umount)) {
++		writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
++		up_read(&sb->s_umount);
++	}
++
+ 	if (2 * free_blocks < 3 * dirty_blocks ||
+ 		free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
+ 		/*
+@@ -2390,13 +2400,6 @@ static int ext4_nonda_switch(struct super_block *sb)
+ 		 */
+ 		return 1;
+ 	}
+-	/*
+-	 * Even if we don't switch but are nearing capacity,
+-	 * start pushing delalloc when 1/2 of free blocks are dirty.
+-	 */
+-	if (free_blocks < 2 * dirty_blocks)
+-		writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
+-
+ 	return 0;
+ }
+ 
+@@ -4004,6 +4007,7 @@ static int ext4_do_update_inode(handle_t *handle,
+ 	struct ext4_inode_info *ei = EXT4_I(inode);
+ 	struct buffer_head *bh = iloc->bh;
+ 	int err = 0, rc, block;
++	int need_datasync = 0;
+ 
+ 	/* For fields not not tracking in the in-memory inode,
+ 	 * initialise them to zero for new inodes. */
+@@ -4052,7 +4056,10 @@ static int ext4_do_update_inode(handle_t *handle,
+ 		raw_inode->i_file_acl_high =
+ 			cpu_to_le16(ei->i_file_acl >> 32);
+ 	raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
+-	ext4_isize_set(raw_inode, ei->i_disksize);
++	if (ei->i_disksize != ext4_isize(raw_inode)) {
++		ext4_isize_set(raw_inode, ei->i_disksize);
++		need_datasync = 1;
++	}
+ 	if (ei->i_disksize > 0x7fffffffULL) {
+ 		struct super_block *sb = inode->i_sb;
+ 		if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+@@ -4105,7 +4112,7 @@ static int ext4_do_update_inode(handle_t *handle,
+ 		err = rc;
+ 	ext4_clear_inode_state(inode, EXT4_STATE_NEW);
+ 
+-	ext4_update_inode_fsync_trans(handle, inode, 0);
++	ext4_update_inode_fsync_trans(handle, inode, need_datasync);
+ out_brelse:
+ 	brelse(bh);
+ 	ext4_std_error(inode->i_sb, err);
+diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
+index c5826c6..e2016f3 100644
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -141,55 +141,21 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
+ }
+ 
+ /**
+- * mext_check_null_inode - NULL check for two inodes
+- *
+- * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
+- */
+-static int
+-mext_check_null_inode(struct inode *inode1, struct inode *inode2,
+-		      const char *function, unsigned int line)
+-{
+-	int ret = 0;
+-
+-	if (inode1 == NULL) {
+-		__ext4_error(inode2->i_sb, function, line,
+-			"Both inodes should not be NULL: "
+-			"inode1 NULL inode2 %lu", inode2->i_ino);
+-		ret = -EIO;
+-	} else if (inode2 == NULL) {
+-		__ext4_error(inode1->i_sb, function, line,
+-			"Both inodes should not be NULL: "
+-			"inode1 %lu inode2 NULL", inode1->i_ino);
+-		ret = -EIO;
+-	}
+-	return ret;
+-}
+-
+-/**
+  * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
+  *
+- * @orig_inode:		original inode structure
+- * @donor_inode:	donor inode structure
+- * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
+- * i_ino order.
++ * Acquire write lock of i_data_sem of the two inodes
+  */
+ static void
+-double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
++double_down_write_data_sem(struct inode *first, struct inode *second)
+ {
+-	struct inode *first = orig_inode, *second = donor_inode;
++	if (first < second) {
++		down_write(&EXT4_I(first)->i_data_sem);
++		down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
++	} else {
++		down_write(&EXT4_I(second)->i_data_sem);
++		down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING);
+ 
+-	/*
+-	 * Use the inode number to provide the stable locking order instead
+-	 * of its address, because the C language doesn't guarantee you can
+-	 * compare pointers that don't come from the same array.
+-	 */
+-	if (donor_inode->i_ino < orig_inode->i_ino) {
+-		first = donor_inode;
+-		second = orig_inode;
+ 	}
+-
+-	down_write(&EXT4_I(first)->i_data_sem);
+-	down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
+ }
+ 
+ /**
+@@ -969,14 +935,6 @@ mext_check_arguments(struct inode *orig_inode,
+ 		return -EINVAL;
+ 	}
+ 
+-	/* Files should be in the same ext4 FS */
+-	if (orig_inode->i_sb != donor_inode->i_sb) {
+-		ext4_debug("ext4 move extent: The argument files "
+-			"should be in same FS [ino:orig %lu, donor %lu]\n",
+-			orig_inode->i_ino, donor_inode->i_ino);
+-		return -EINVAL;
+-	}
+-
+ 	/* Ext4 move extent supports only extent based file */
+ 	if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
+ 		ext4_debug("ext4 move extent: orig file is not extents "
+@@ -1072,35 +1030,19 @@ mext_check_arguments(struct inode *orig_inode,
+  * @inode1:	the inode structure
+  * @inode2:	the inode structure
+  *
+- * Lock two inodes' i_mutex by i_ino order.
+- * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
++ * Lock two inodes' i_mutex
+  */
+-static int
++static void
+ mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
+ {
+-	int ret = 0;
+-
+-	BUG_ON(inode1 == NULL && inode2 == NULL);
+-
+-	ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
+-	if (ret < 0)
+-		goto out;
+-
+-	if (inode1 == inode2) {
+-		mutex_lock(&inode1->i_mutex);
+-		goto out;
+-	}
+-
+-	if (inode1->i_ino < inode2->i_ino) {
++	BUG_ON(inode1 == inode2);
++	if (inode1 < inode2) {
+ 		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+ 		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+ 	} else {
+ 		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
+ 		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
+ 	}
+-
+-out:
+-	return ret;
+ }
+ 
+ /**
+@@ -1109,28 +1051,13 @@ out:
+  * @inode1:     the inode that is released first
+  * @inode2:     the inode that is released second
+  *
+- * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
+  */
+ 
+-static int
++static void
+ mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
+ {
+-	int ret = 0;
+-
+-	BUG_ON(inode1 == NULL && inode2 == NULL);
+-
+-	ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
+-	if (ret < 0)
+-		goto out;
+-
+-	if (inode1)
+-		mutex_unlock(&inode1->i_mutex);
+-
+-	if (inode2 && inode2 != inode1)
+-		mutex_unlock(&inode2->i_mutex);
+-
+-out:
+-	return ret;
++	mutex_unlock(&inode1->i_mutex);
++	mutex_unlock(&inode2->i_mutex);
+ }
+ 
+ /**
+@@ -1187,16 +1114,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ 	ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
+ 	ext4_lblk_t rest_blocks;
+ 	pgoff_t orig_page_offset = 0, seq_end_page;
+-	int ret1, ret2, depth, last_extent = 0;
++	int ret, depth, last_extent = 0;
+ 	int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+ 	int data_offset_in_page;
+ 	int block_len_in_page;
+ 	int uninit;
+ 
+-	/* orig and donor should be different file */
+-	if (orig_inode->i_ino == donor_inode->i_ino) {
++	if (orig_inode->i_sb != donor_inode->i_sb) {
++		ext4_debug("ext4 move extent: The argument files "
++			"should be in same FS [ino:orig %lu, donor %lu]\n",
++			orig_inode->i_ino, donor_inode->i_ino);
++		return -EINVAL;
++	}
++
++	/* orig and donor should be different inodes */
++	if (orig_inode == donor_inode) {
+ 		ext4_debug("ext4 move extent: The argument files should not "
+-			"be same file [ino:orig %lu, donor %lu]\n",
++			"be same inode [ino:orig %lu, donor %lu]\n",
+ 			orig_inode->i_ino, donor_inode->i_ino);
+ 		return -EINVAL;
+ 	}
+@@ -1208,18 +1142,21 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ 			orig_inode->i_ino, donor_inode->i_ino);
+ 		return -EINVAL;
+ 	}
+-
++	/* TODO: This is non obvious task to swap blocks for inodes with full
++	   jornaling enabled */
++	if (ext4_should_journal_data(orig_inode) ||
++	    ext4_should_journal_data(donor_inode)) {
++		return -EINVAL;
++	}
+ 	/* Protect orig and donor inodes against a truncate */
+-	ret1 = mext_inode_double_lock(orig_inode, donor_inode);
+-	if (ret1 < 0)
+-		return ret1;
++	mext_inode_double_lock(orig_inode, donor_inode);
+ 
+ 	/* Protect extent tree against block allocations via delalloc */
+ 	double_down_write_data_sem(orig_inode, donor_inode);
+ 	/* Check the filesystem environment whether move_extent can be done */
+-	ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
++	ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
+ 				    donor_start, &len);
+-	if (ret1)
++	if (ret)
+ 		goto out;
+ 
+ 	file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
+@@ -1227,13 +1164,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ 	if (file_end < block_end)
+ 		len -= block_end - file_end;
+ 
+-	ret1 = get_ext_path(orig_inode, block_start, &orig_path);
+-	if (ret1)
++	ret = get_ext_path(orig_inode, block_start, &orig_path);
++	if (ret)
+ 		goto out;
+ 
+ 	/* Get path structure to check the hole */
+-	ret1 = get_ext_path(orig_inode, block_start, &holecheck_path);
+-	if (ret1)
++	ret = get_ext_path(orig_inode, block_start, &holecheck_path);
++	if (ret)
+ 		goto out;
+ 
+ 	depth = ext_depth(orig_inode);
+@@ -1252,13 +1189,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ 		last_extent = mext_next_extent(orig_inode,
+ 					holecheck_path, &ext_cur);
+ 		if (last_extent < 0) {
+-			ret1 = last_extent;
++			ret = last_extent;
+ 			goto out;
+ 		}
+ 		last_extent = mext_next_extent(orig_inode, orig_path,
+ 							&ext_dummy);
+ 		if (last_extent < 0) {
+-			ret1 = last_extent;
++			ret = last_extent;
+ 			goto out;
+ 		}
+ 		seq_start = le32_to_cpu(ext_cur->ee_block);
+@@ -1272,7 +1209,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ 	if (le32_to_cpu(ext_cur->ee_block) > block_end) {
+ 		ext4_debug("ext4 move extent: The specified range of file "
+ 							"may be the hole\n");
+-		ret1 = -EINVAL;
++		ret = -EINVAL;
+ 		goto out;
+ 	}
+ 
+@@ -1292,7 +1229,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ 		last_extent = mext_next_extent(orig_inode, holecheck_path,
+ 						&ext_cur);
+ 		if (last_extent < 0) {
+-			ret1 = last_extent;
++			ret = last_extent;
+ 			break;
+ 		}
+ 		add_blocks = ext4_ext_get_actual_len(ext_cur);
+@@ -1349,18 +1286,18 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ 						orig_page_offset,
+ 						data_offset_in_page,
+ 						block_len_in_page, uninit,
+-						&ret1);
++						&ret);
+ 
+ 			/* Count how many blocks we have exchanged */
+ 			*moved_len += block_len_in_page;
+-			if (ret1 < 0)
++			if (ret < 0)
+ 				break;
+ 			if (*moved_len > len) {
+ 				EXT4_ERROR_INODE(orig_inode,
+ 					"We replaced blocks too much! "
+ 					"sum of replaced: %llu requested: %llu",
+ 					*moved_len, len);
+-				ret1 = -EIO;
++				ret = -EIO;
+ 				break;
+ 			}
+ 
+@@ -1374,22 +1311,22 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ 		}
+ 
+ 		double_down_write_data_sem(orig_inode, donor_inode);
+-		if (ret1 < 0)
++		if (ret < 0)
+ 			break;
+ 
+ 		/* Decrease buffer counter */
+ 		if (holecheck_path)
+ 			ext4_ext_drop_refs(holecheck_path);
+-		ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path);
+-		if (ret1)
++		ret = get_ext_path(orig_inode, seq_start, &holecheck_path);
++		if (ret)
+ 			break;
+ 		depth = holecheck_path->p_depth;
+ 
+ 		/* Decrease buffer counter */
+ 		if (orig_path)
+ 			ext4_ext_drop_refs(orig_path);
+-		ret1 = get_ext_path(orig_inode, seq_start, &orig_path);
+-		if (ret1)
++		ret = get_ext_path(orig_inode, seq_start, &orig_path);
++		if (ret)
+ 			break;
+ 
+ 		ext_cur = holecheck_path[depth].p_ext;
+@@ -1412,12 +1349,7 @@ out:
+ 		kfree(holecheck_path);
+ 	}
+ 	double_up_write_data_sem(orig_inode, donor_inode);
+-	ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
+-
+-	if (ret1)
+-		return ret1;
+-	else if (ret2)
+-		return ret2;
++	mext_inode_double_unlock(orig_inode, donor_inode);
+ 
+-	return 0;
++	return ret;
+ }
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index 4dd0890..88f97e5 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -1801,9 +1801,7 @@ retry:
+ 	err = PTR_ERR(inode);
+ 	if (!IS_ERR(inode)) {
+ 		init_special_inode(inode, inode->i_mode, rdev);
+-#ifdef CONFIG_EXT4_FS_XATTR
+ 		inode->i_op = &ext4_special_inode_operations;
+-#endif
+ 		err = ext4_add_nondir(handle, dentry, inode);
+ 	}
+ 	ext4_journal_stop(handle);
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 54f5786..13bfa07 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -63,6 +63,7 @@ int writeback_in_progress(struct backing_dev_info *bdi)
+ {
+ 	return test_bit(BDI_writeback_running, &bdi->state);
+ }
++EXPORT_SYMBOL(writeback_in_progress);
+ 
+ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
+ {
+diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
+index b09e51d..464cd76 100644
+--- a/fs/jffs2/wbuf.c
++++ b/fs/jffs2/wbuf.c
+@@ -1032,11 +1032,11 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
+ 	ops.datbuf = NULL;
+ 
+ 	ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops);
+-	if (ret || ops.oobretlen != ops.ooblen) {
++	if ((ret && !mtd_is_bitflip(ret)) || ops.oobretlen != ops.ooblen) {
+ 		printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd"
+ 				" bytes, read %zd bytes, error %d\n",
+ 				jeb->offset, ops.ooblen, ops.oobretlen, ret);
+-		if (!ret)
++		if (!ret || mtd_is_bitflip(ret))
+ 			ret = -EIO;
+ 		return ret;
+ 	}
+@@ -1075,11 +1075,11 @@ int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c,
+ 	ops.datbuf = NULL;
+ 
+ 	ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops);
+-	if (ret || ops.oobretlen != ops.ooblen) {
++	if ((ret && !mtd_is_bitflip(ret)) || ops.oobretlen != ops.ooblen) {
+ 		printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd"
+ 				" bytes, read %zd bytes, error %d\n",
+ 				jeb->offset, ops.ooblen, ops.oobretlen, ret);
+-		if (!ret)
++		if (!ret || mtd_is_bitflip(ret))
+ 			ret = -EIO;
+ 		return ret;
+ 	}
+diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
+index 23d7451..df753a1 100644
+--- a/fs/lockd/mon.c
++++ b/fs/lockd/mon.c
+@@ -40,6 +40,7 @@ struct nsm_args {
+ 	u32			proc;
+ 
+ 	char			*mon_name;
++	char			*nodename;
+ };
+ 
+ struct nsm_res {
+@@ -93,6 +94,7 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
+ 		.vers		= 3,
+ 		.proc		= NLMPROC_NSM_NOTIFY,
+ 		.mon_name	= nsm->sm_mon_name,
++		.nodename	= utsname()->nodename,
+ 	};
+ 	struct rpc_message msg = {
+ 		.rpc_argp	= &args,
+@@ -429,7 +431,7 @@ static void encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp)
+ {
+ 	__be32 *p;
+ 
+-	encode_nsm_string(xdr, utsname()->nodename);
++	encode_nsm_string(xdr, argp->nodename);
+ 	p = xdr_reserve_space(xdr, 4 + 4 + 4);
+ 	*p++ = cpu_to_be32(argp->prog);
+ 	*p++ = cpu_to_be32(argp->vers);
+diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
+index d774309..1aaa0ee 100644
+--- a/fs/nfs/blocklayout/blocklayout.c
++++ b/fs/nfs/blocklayout/blocklayout.c
+@@ -164,25 +164,39 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
+ 	return bio;
+ }
+ 
+-static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
++static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw,
+ 				      sector_t isect, struct page *page,
+ 				      struct pnfs_block_extent *be,
+ 				      void (*end_io)(struct bio *, int err),
+-				      struct parallel_io *par)
++				      struct parallel_io *par,
++				      unsigned int offset, int len)
+ {
++	isect = isect + (offset >> SECTOR_SHIFT);
++	dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
++		npg, rw, (unsigned long long)isect, offset, len);
+ retry:
+ 	if (!bio) {
+ 		bio = bl_alloc_init_bio(npg, isect, be, end_io, par);
+ 		if (!bio)
+ 			return ERR_PTR(-ENOMEM);
+ 	}
+-	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
++	if (bio_add_page(bio, page, len, offset) < len) {
+ 		bio = bl_submit_bio(rw, bio);
+ 		goto retry;
+ 	}
+ 	return bio;
+ }
+ 
++static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
++				      sector_t isect, struct page *page,
++				      struct pnfs_block_extent *be,
++				      void (*end_io)(struct bio *, int err),
++				      struct parallel_io *par)
++{
++	return do_add_page_to_bio(bio, npg, rw, isect, page, be,
++				  end_io, par, 0, PAGE_CACHE_SIZE);
++}
++
+ /* This is basically copied from mpage_end_io_read */
+ static void bl_end_io_read(struct bio *bio, int err)
+ {
+@@ -446,6 +460,106 @@ map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be)
+ 	return;
+ }
+ 
++static void
++bl_read_single_end_io(struct bio *bio, int error)
++{
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++	struct page *page = bvec->bv_page;
++
++	/* Only one page in bvec */
++	unlock_page(page);
++}
++
++static int
++bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be,
++		    unsigned int offset, unsigned int len)
++{
++	struct bio *bio;
++	struct page *shadow_page;
++	sector_t isect;
++	char *kaddr, *kshadow_addr;
++	int ret = 0;
++
++	dprintk("%s: offset %u len %u\n", __func__, offset, len);
++
++	shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
++	if (shadow_page == NULL)
++		return -ENOMEM;
++
++	bio = bio_alloc(GFP_NOIO, 1);
++	if (bio == NULL)
++		return -ENOMEM;
++
++	isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) +
++		(offset / SECTOR_SIZE);
++
++	bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
++	bio->bi_bdev = be->be_mdev;
++	bio->bi_end_io = bl_read_single_end_io;
++
++	lock_page(shadow_page);
++	if (bio_add_page(bio, shadow_page,
++			 SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) {
++		unlock_page(shadow_page);
++		bio_put(bio);
++		return -EIO;
++	}
++
++	submit_bio(READ, bio);
++	wait_on_page_locked(shadow_page);
++	if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) {
++		ret = -EIO;
++	} else {
++		kaddr = kmap_atomic(page);
++		kshadow_addr = kmap_atomic(shadow_page);
++		memcpy(kaddr + offset, kshadow_addr + offset, len);
++		kunmap_atomic(kshadow_addr);
++		kunmap_atomic(kaddr);
++	}
++	__free_page(shadow_page);
++	bio_put(bio);
++
++	return ret;
++}
++
++static int
++bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be,
++			  unsigned int dirty_offset, unsigned int dirty_len,
++			  bool full_page)
++{
++	int ret = 0;
++	unsigned int start, end;
++
++	if (full_page) {
++		start = 0;
++		end = PAGE_CACHE_SIZE;
++	} else {
++		start = round_down(dirty_offset, SECTOR_SIZE);
++		end = round_up(dirty_offset + dirty_len, SECTOR_SIZE);
++	}
++
++	dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len);
++	if (!be) {
++		zero_user_segments(page, start, dirty_offset,
++				   dirty_offset + dirty_len, end);
++		if (start == 0 && end == PAGE_CACHE_SIZE &&
++		    trylock_page(page)) {
++			SetPageUptodate(page);
++			unlock_page(page);
++		}
++		return ret;
++	}
++
++	if (start != dirty_offset)
++		ret = bl_do_readpage_sync(page, be, start, dirty_offset - start);
++
++	if (!ret && (dirty_offset + dirty_len < end))
++		ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len,
++					  end - dirty_offset - dirty_len);
++
++	return ret;
++}
++
+ /* Given an unmapped page, zero it or read in page for COW, page is locked
+  * by caller.
+  */
+@@ -479,7 +593,6 @@ init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read)
+ 	SetPageUptodate(page);
+ 
+ cleanup:
+-	bl_put_extent(cow_read);
+ 	if (bh)
+ 		free_buffer_head(bh);
+ 	if (ret) {
+@@ -501,6 +614,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
+ 	struct parallel_io *par;
+ 	loff_t offset = wdata->args.offset;
+ 	size_t count = wdata->args.count;
++	unsigned int pg_offset, pg_len, saved_len;
+ 	struct page **pages = wdata->args.pages;
+ 	struct page *page;
+ 	pgoff_t index;
+@@ -615,10 +729,11 @@ next_page:
+ 		if (!extent_length) {
+ 			/* We've used up the previous extent */
+ 			bl_put_extent(be);
++			bl_put_extent(cow_read);
+ 			bio = bl_submit_bio(WRITE, bio);
+ 			/* Get the next one */
+ 			be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg),
+-					     isect, NULL);
++					     isect, &cow_read);
+ 			if (!be || !is_writable(be, isect)) {
+ 				wdata->pnfs_error = -EINVAL;
+ 				goto out;
+@@ -626,7 +741,26 @@ next_page:
+ 			extent_length = be->be_length -
+ 			    (isect - be->be_f_offset);
+ 		}
+-		if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
++
++		dprintk("%s offset %lld count %Zu\n", __func__, offset, count);
++		pg_offset = offset & ~PAGE_CACHE_MASK;
++		if (pg_offset + count > PAGE_CACHE_SIZE)
++			pg_len = PAGE_CACHE_SIZE - pg_offset;
++		else
++			pg_len = count;
++
++		saved_len = pg_len;
++		if (be->be_state == PNFS_BLOCK_INVALID_DATA &&
++		    !bl_is_sector_init(be->be_inval, isect)) {
++			ret = bl_read_partial_page_sync(pages[i], cow_read,
++							pg_offset, pg_len, true);
++			if (ret) {
++				dprintk("%s bl_read_partial_page_sync fail %d\n",
++					__func__, ret);
++				wdata->pnfs_error = ret;
++				goto out;
++			}
++
+ 			ret = bl_mark_sectors_init(be->be_inval, isect,
+ 						       PAGE_CACHE_SECTORS,
+ 						       NULL);
+@@ -636,15 +770,35 @@ next_page:
+ 				wdata->pnfs_error = ret;
+ 				goto out;
+ 			}
++
++			/* Expand to full page write */
++			pg_offset = 0;
++			pg_len = PAGE_CACHE_SIZE;
++		} else if  ((pg_offset & (SECTOR_SIZE - 1)) ||
++			    (pg_len & (SECTOR_SIZE - 1))){
++			/* ahh, nasty case. We have to do sync full sector
++			 * read-modify-write cycles.
++			 */
++			unsigned int saved_offset = pg_offset;
++			ret = bl_read_partial_page_sync(pages[i], be, pg_offset,
++							pg_len, false);
++			pg_offset = round_down(pg_offset, SECTOR_SIZE);
++			pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE)
++				 - pg_offset;
+ 		}
+-		bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE,
++
++
++		bio = do_add_page_to_bio(bio, wdata->npages - i, WRITE,
+ 					 isect, pages[i], be,
+-					 bl_end_io_write, par);
++					 bl_end_io_write, par,
++					 pg_offset, pg_len);
+ 		if (IS_ERR(bio)) {
+ 			wdata->pnfs_error = PTR_ERR(bio);
+ 			bio = NULL;
+ 			goto out;
+ 		}
++		offset += saved_len;
++		count -= saved_len;
+ 		isect += PAGE_CACHE_SECTORS;
+ 		last_isect = isect;
+ 		extent_length -= PAGE_CACHE_SECTORS;
+@@ -662,12 +816,10 @@ next_page:
+ 	}
+ 
+ write_done:
+-	wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset);
+-	if (count < wdata->res.count) {
+-		wdata->res.count = count;
+-	}
++	wdata->res.count = wdata->args.count;
+ out:
+ 	bl_put_extent(be);
++	bl_put_extent(cow_read);
+ 	bl_submit_bio(WRITE, bio);
+ 	put_parallel(par);
+ 	return PNFS_ATTEMPTED;
+diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
+index 42acf7e..519a9de 100644
+--- a/fs/nfs/blocklayout/blocklayout.h
++++ b/fs/nfs/blocklayout/blocklayout.h
+@@ -40,6 +40,7 @@
+ 
+ #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
+ #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
++#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+ 
+ struct block_mount_id {
+ 	spinlock_t			bm_lock;    /* protects list */
+diff --git a/fs/udf/super.c b/fs/udf/super.c
+index 516b7f0..f66439e 100644
+--- a/fs/udf/super.c
++++ b/fs/udf/super.c
+@@ -1289,6 +1289,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
+ 		udf_err(sb, "error loading logical volume descriptor: "
+ 			"Partition table too long (%u > %lu)\n", table_len,
+ 			sb->s_blocksize - sizeof(*lvd));
++		ret = 1;
+ 		goto out_bh;
+ 	}
+ 
+@@ -1333,8 +1334,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
+ 						UDF_ID_SPARABLE,
+ 						strlen(UDF_ID_SPARABLE))) {
+ 				if (udf_load_sparable_map(sb, map,
+-				    (struct sparablePartitionMap *)gpm) < 0)
++				    (struct sparablePartitionMap *)gpm) < 0) {
++					ret = 1;
+ 					goto out_bh;
++				}
+ 			} else if (!strncmp(upm2->partIdent.ident,
+ 						UDF_ID_METADATA,
+ 						strlen(UDF_ID_METADATA))) {
+diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
+index 7978eec..3e8f2f7 100644
+--- a/include/linux/mempolicy.h
++++ b/include/linux/mempolicy.h
+@@ -188,7 +188,7 @@ struct sp_node {
+ 
+ struct shared_policy {
+ 	struct rb_root root;
+-	spinlock_t lock;
++	struct mutex mutex;
+ };
+ 
+ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
+diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
+index 67cc215..1874c5e 100644
+--- a/include/linux/pci_ids.h
++++ b/include/linux/pci_ids.h
+@@ -1823,7 +1823,6 @@
+ #define PCI_DEVICE_ID_SIIG_8S_20x_650	0x2081
+ #define PCI_DEVICE_ID_SIIG_8S_20x_850	0x2082
+ #define PCI_SUBDEVICE_ID_SIIG_QUARTET_SERIAL	0x2050
+-#define PCI_SUBDEVICE_ID_SIIG_DUAL_SERIAL	0x2530
+ 
+ #define PCI_VENDOR_ID_RADISYS		0x1331
+ 
+diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
+index e5a7b9a..416dcb0 100644
+--- a/include/net/ip_vs.h
++++ b/include/net/ip_vs.h
+@@ -1353,7 +1353,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
+ 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ 
+ 	if (!ct || !nf_ct_is_untracked(ct)) {
+-		nf_reset(skb);
++		nf_conntrack_put(skb->nfct);
+ 		skb->nfct = &nf_ct_untracked_get()->ct_general;
+ 		skb->nfctinfo = IP_CT_NEW;
+ 		nf_conntrack_get(skb->nfct);
+diff --git a/kernel/rcutree.c b/kernel/rcutree.c
+index 6b76d81..a122196 100644
+--- a/kernel/rcutree.c
++++ b/kernel/rcutree.c
+@@ -292,7 +292,9 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
+ static int
+ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
+ {
+-	return *rdp->nxttail[RCU_DONE_TAIL] && !rcu_gp_in_progress(rsp);
++	return *rdp->nxttail[RCU_DONE_TAIL +
++			     ACCESS_ONCE(rsp->completed) != rdp->completed] &&
++	       !rcu_gp_in_progress(rsp);
+ }
+ 
+ /*
+diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
+index 8b44e7f..85e9da2 100644
+--- a/kernel/sched_stoptask.c
++++ b/kernel/sched_stoptask.c
+@@ -25,8 +25,10 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
+ {
+ 	struct task_struct *stop = rq->stop;
+ 
+-	if (stop && stop->on_rq)
++	if (stop && stop->on_rq) {
++		stop->se.exec_start = rq->clock_task;
+ 		return stop;
++	}
+ 
+ 	return NULL;
+ }
+@@ -50,6 +52,21 @@ static void yield_task_stop(struct rq *rq)
+ 
+ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
+ {
++	struct task_struct *curr = rq->curr;
++	u64 delta_exec;
++
++	delta_exec = rq->clock_task - curr->se.exec_start;
++	if (unlikely((s64)delta_exec < 0))
++		delta_exec = 0;
++
++	schedstat_set(curr->se.statistics.exec_max,
++			max(curr->se.statistics.exec_max, delta_exec));
++
++	curr->se.sum_exec_runtime += delta_exec;
++	account_group_exec_runtime(curr, delta_exec);
++
++	curr->se.exec_start = rq->clock_task;
++	cpuacct_charge(curr, delta_exec);
+ }
+ 
+ static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
+@@ -58,6 +75,9 @@ static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
+ 
+ static void set_curr_task_stop(struct rq *rq)
+ {
++	struct task_struct *stop = rq->stop;
++
++	stop->se.exec_start = rq->clock_task;
+ }
+ 
+ static void switched_to_stop(struct rq *rq, struct task_struct *p)
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 481611f..c504302 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -365,6 +365,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier);
+ void kernel_restart(char *cmd)
+ {
+ 	kernel_restart_prepare(cmd);
++	disable_nonboot_cpus();
+ 	if (!cmd)
+ 		printk(KERN_EMERG "Restarting system.\n");
+ 	else
+diff --git a/kernel/workqueue.c b/kernel/workqueue.c
+index b413138..43a19c5 100644
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -1726,10 +1726,9 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
+ 		*nextp = n;
+ }
+ 
+-static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
++static void cwq_activate_delayed_work(struct work_struct *work)
+ {
+-	struct work_struct *work = list_first_entry(&cwq->delayed_works,
+-						    struct work_struct, entry);
++	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
+ 	struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
+ 
+ 	trace_workqueue_activate_work(work);
+@@ -1738,6 +1737,14 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
+ 	cwq->nr_active++;
+ }
+ 
++static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
++{
++	struct work_struct *work = list_first_entry(&cwq->delayed_works,
++						    struct work_struct, entry);
++
++	cwq_activate_delayed_work(work);
++}
++
+ /**
+  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
+  * @cwq: cwq of interest
+@@ -1869,7 +1876,9 @@ __acquires(&gcwq->lock)
+ 
+ 	spin_unlock_irq(&gcwq->lock);
+ 
++	smp_wmb();	/* paired with test_and_set_bit(PENDING) */
+ 	work_clear_pending(work);
++
+ 	lock_map_acquire_read(&cwq->wq->lockdep_map);
+ 	lock_map_acquire(&lockdep_map);
+ 	trace_workqueue_execute_start(work);
+@@ -2626,6 +2635,18 @@ static int try_to_grab_pending(struct work_struct *work)
+ 		smp_rmb();
+ 		if (gcwq == get_work_gcwq(work)) {
+ 			debug_work_deactivate(work);
++
++			/*
++			 * A delayed work item cannot be grabbed directly
++			 * because it might have linked NO_COLOR work items
++			 * which, if left on the delayed_list, will confuse
++			 * cwq->nr_active management later on and cause
++			 * stall.  Make sure the work item is activated
++			 * before grabbing.
++			 */
++			if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
++				cwq_activate_delayed_work(work);
++
+ 			list_del_init(&work->entry);
+ 			cwq_dec_nr_in_flight(get_work_cwq(work),
+ 				get_work_color(work),
+diff --git a/lib/gcd.c b/lib/gcd.c
+index f879033..433d89b 100644
+--- a/lib/gcd.c
++++ b/lib/gcd.c
+@@ -9,6 +9,9 @@ unsigned long gcd(unsigned long a, unsigned long b)
+ 
+ 	if (a < b)
+ 		swap(a, b);
++
++	if (!b)
++		return a;
+ 	while ((r = a % b) != 0) {
+ 		a = b;
+ 		b = r;
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index 0f897b8..d6c0fdf 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -2429,8 +2429,8 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+ 	 * from page cache lookup which is in HPAGE_SIZE units.
+ 	 */
+ 	address = address & huge_page_mask(h);
+-	pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
+-		+ (vma->vm_pgoff >> PAGE_SHIFT);
++	pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) +
++			vma->vm_pgoff;
+ 	mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
+ 
+ 	/*
+diff --git a/mm/mempolicy.c b/mm/mempolicy.c
+index 11b8d47..4c82c21 100644
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -607,24 +607,39 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
+ 	return first;
+ }
+ 
+-/* Apply policy to a single VMA */
+-static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new)
++/*
++ * Apply policy to a single VMA
++ * This must be called with the mmap_sem held for writing.
++ */
++static int vma_replace_policy(struct vm_area_struct *vma,
++						struct mempolicy *pol)
+ {
+-	int err = 0;
+-	struct mempolicy *old = vma->vm_policy;
++	int err;
++	struct mempolicy *old;
++	struct mempolicy *new;
+ 
+ 	pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
+ 		 vma->vm_start, vma->vm_end, vma->vm_pgoff,
+ 		 vma->vm_ops, vma->vm_file,
+ 		 vma->vm_ops ? vma->vm_ops->set_policy : NULL);
+ 
+-	if (vma->vm_ops && vma->vm_ops->set_policy)
++	new = mpol_dup(pol);
++	if (IS_ERR(new))
++		return PTR_ERR(new);
++
++	if (vma->vm_ops && vma->vm_ops->set_policy) {
+ 		err = vma->vm_ops->set_policy(vma, new);
+-	if (!err) {
+-		mpol_get(new);
+-		vma->vm_policy = new;
+-		mpol_put(old);
++		if (err)
++			goto err_out;
+ 	}
++
++	old = vma->vm_policy;
++	vma->vm_policy = new; /* protected by mmap_sem */
++	mpol_put(old);
++
++	return 0;
++ err_out:
++	mpol_put(new);
+ 	return err;
+ }
+ 
+@@ -675,7 +690,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
+ 			if (err)
+ 				goto out;
+ 		}
+-		err = policy_vma(vma, new_pol);
++		err = vma_replace_policy(vma, new_pol);
+ 		if (err)
+ 			goto out;
+ 	}
+@@ -1507,8 +1522,18 @@ struct mempolicy *get_vma_policy(struct task_struct *task,
+ 									addr);
+ 			if (vpol)
+ 				pol = vpol;
+-		} else if (vma->vm_policy)
++		} else if (vma->vm_policy) {
+ 			pol = vma->vm_policy;
++
++			/*
++			 * shmem_alloc_page() passes MPOL_F_SHARED policy with
++			 * a pseudo vma whose vma->vm_ops=NULL. Take a reference
++			 * count on these policies which will be dropped by
++			 * mpol_cond_put() later
++			 */
++			if (mpol_needs_cond_ref(pol))
++				mpol_get(pol);
++		}
+ 	}
+ 	if (!pol)
+ 		pol = &default_policy;
+@@ -2032,7 +2057,7 @@ int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
+  */
+ 
+ /* lookup first element intersecting start-end */
+-/* Caller holds sp->lock */
++/* Caller holds sp->mutex */
+ static struct sp_node *
+ sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
+ {
+@@ -2096,36 +2121,50 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
+ 
+ 	if (!sp->root.rb_node)
+ 		return NULL;
+-	spin_lock(&sp->lock);
++	mutex_lock(&sp->mutex);
+ 	sn = sp_lookup(sp, idx, idx+1);
+ 	if (sn) {
+ 		mpol_get(sn->policy);
+ 		pol = sn->policy;
+ 	}
+-	spin_unlock(&sp->lock);
++	mutex_unlock(&sp->mutex);
+ 	return pol;
+ }
+ 
++static void sp_free(struct sp_node *n)
++{
++	mpol_put(n->policy);
++	kmem_cache_free(sn_cache, n);
++}
++
+ static void sp_delete(struct shared_policy *sp, struct sp_node *n)
+ {
+ 	pr_debug("deleting %lx-l%lx\n", n->start, n->end);
+ 	rb_erase(&n->nd, &sp->root);
+-	mpol_put(n->policy);
+-	kmem_cache_free(sn_cache, n);
++	sp_free(n);
+ }
+ 
+ static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
+ 				struct mempolicy *pol)
+ {
+-	struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL);
++	struct sp_node *n;
++	struct mempolicy *newpol;
+ 
++	n = kmem_cache_alloc(sn_cache, GFP_KERNEL);
+ 	if (!n)
+ 		return NULL;
++
++	newpol = mpol_dup(pol);
++	if (IS_ERR(newpol)) {
++		kmem_cache_free(sn_cache, n);
++		return NULL;
++	}
++	newpol->flags |= MPOL_F_SHARED;
++
+ 	n->start = start;
+ 	n->end = end;
+-	mpol_get(pol);
+-	pol->flags |= MPOL_F_SHARED;	/* for unref */
+-	n->policy = pol;
++	n->policy = newpol;
++
+ 	return n;
+ }
+ 
+@@ -2133,10 +2172,10 @@ static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
+ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
+ 				 unsigned long end, struct sp_node *new)
+ {
+-	struct sp_node *n, *new2 = NULL;
++	struct sp_node *n;
++	int ret = 0;
+ 
+-restart:
+-	spin_lock(&sp->lock);
++	mutex_lock(&sp->mutex);
+ 	n = sp_lookup(sp, start, end);
+ 	/* Take care of old policies in the same range. */
+ 	while (n && n->start < end) {
+@@ -2149,16 +2188,14 @@ restart:
+ 		} else {
+ 			/* Old policy spanning whole new range. */
+ 			if (n->end > end) {
++				struct sp_node *new2;
++				new2 = sp_alloc(end, n->end, n->policy);
+ 				if (!new2) {
+-					spin_unlock(&sp->lock);
+-					new2 = sp_alloc(end, n->end, n->policy);
+-					if (!new2)
+-						return -ENOMEM;
+-					goto restart;
++					ret = -ENOMEM;
++					goto out;
+ 				}
+ 				n->end = start;
+ 				sp_insert(sp, new2);
+-				new2 = NULL;
+ 				break;
+ 			} else
+ 				n->end = start;
+@@ -2169,12 +2206,9 @@ restart:
+ 	}
+ 	if (new)
+ 		sp_insert(sp, new);
+-	spin_unlock(&sp->lock);
+-	if (new2) {
+-		mpol_put(new2->policy);
+-		kmem_cache_free(sn_cache, new2);
+-	}
+-	return 0;
++out:
++	mutex_unlock(&sp->mutex);
++	return ret;
+ }
+ 
+ /**
+@@ -2192,7 +2226,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
+ 	int ret;
+ 
+ 	sp->root = RB_ROOT;		/* empty tree == default mempolicy */
+-	spin_lock_init(&sp->lock);
++	mutex_init(&sp->mutex);
+ 
+ 	if (mpol) {
+ 		struct vm_area_struct pvma;
+@@ -2246,7 +2280,7 @@ int mpol_set_shared_policy(struct shared_policy *info,
+ 	}
+ 	err = shared_policy_replace(info, vma->vm_pgoff, vma->vm_pgoff+sz, new);
+ 	if (err && new)
+-		kmem_cache_free(sn_cache, new);
++		sp_free(new);
+ 	return err;
+ }
+ 
+@@ -2258,16 +2292,14 @@ void mpol_free_shared_policy(struct shared_policy *p)
+ 
+ 	if (!p->root.rb_node)
+ 		return;
+-	spin_lock(&p->lock);
++	mutex_lock(&p->mutex);
+ 	next = rb_first(&p->root);
+ 	while (next) {
+ 		n = rb_entry(next, struct sp_node, nd);
+ 		next = rb_next(&n->nd);
+-		rb_erase(&n->nd, &p->root);
+-		mpol_put(n->policy);
+-		kmem_cache_free(sn_cache, n);
++		sp_delete(p, n);
+ 	}
+-	spin_unlock(&p->lock);
++	mutex_unlock(&p->mutex);
+ }
+ 
+ /* assumes fs == KERNEL_DS */
+diff --git a/mm/slab.c b/mm/slab.c
+index cd3ab93..4c3b671 100644
+--- a/mm/slab.c
++++ b/mm/slab.c
+@@ -1669,9 +1669,6 @@ void __init kmem_cache_init_late(void)
+ 
+ 	g_cpucache_up = LATE;
+ 
+-	/* Annotate slab for lockdep -- annotate the malloc caches */
+-	init_lock_keys();
+-
+ 	/* 6) resize the head arrays to their final sizes */
+ 	mutex_lock(&cache_chain_mutex);
+ 	list_for_each_entry(cachep, &cache_chain, next)
+@@ -1679,6 +1676,9 @@ void __init kmem_cache_init_late(void)
+ 			BUG();
+ 	mutex_unlock(&cache_chain_mutex);
+ 
++	/* Annotate slab for lockdep -- annotate the malloc caches */
++	init_lock_keys();
++
+ 	/* Done! */
+ 	g_cpucache_up = FULL;
+ 
+diff --git a/mm/truncate.c b/mm/truncate.c
+index 632b15e..00fb58a 100644
+--- a/mm/truncate.c
++++ b/mm/truncate.c
+@@ -394,11 +394,12 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
+ 	if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
+ 		return 0;
+ 
++	clear_page_mlock(page);
++
+ 	spin_lock_irq(&mapping->tree_lock);
+ 	if (PageDirty(page))
+ 		goto failed;
+ 
+-	clear_page_mlock(page);
+ 	BUG_ON(page_has_private(page));
+ 	__delete_from_page_cache(page);
+ 	spin_unlock_irq(&mapping->tree_lock);
+diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+index de9da21..d7d63f4 100644
+--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
++++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+@@ -84,6 +84,14 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+ 	*dataoff = nhoff + (iph->ihl << 2);
+ 	*protonum = iph->protocol;
+ 
++	/* Check bogus IP headers */
++	if (*dataoff > skb->len) {
++		pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: "
++			 "nhoff %u, ihl %u, skblen %u\n",
++			 nhoff, iph->ihl << 2, skb->len);
++		return -NF_ACCEPT;
++	}
++
+ 	return NF_ACCEPT;
+ }
+ 
+diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
+index 78844d9..6609a84 100644
+--- a/net/ipv4/netfilter/nf_nat_sip.c
++++ b/net/ipv4/netfilter/nf_nat_sip.c
+@@ -148,7 +148,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
+ 	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
+ 				    hdr, NULL, &matchoff, &matchlen,
+ 				    &addr, &port) > 0) {
+-		unsigned int matchend, poff, plen, buflen, n;
++		unsigned int olen, matchend, poff, plen, buflen, n;
+ 		char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+ 
+ 		/* We're only interested in headers related to this
+@@ -163,11 +163,12 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
+ 				goto next;
+ 		}
+ 
++		olen = *datalen;
+ 		if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
+ 			      &addr, port))
+ 			return NF_DROP;
+ 
+-		matchend = matchoff + matchlen;
++		matchend = matchoff + matchlen + *datalen - olen;
+ 
+ 		/* The maddr= parameter (RFC 2361) specifies where to send
+ 		 * the reply. */
+@@ -501,7 +502,10 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
+ 		ret = nf_ct_expect_related(rtcp_exp);
+ 		if (ret == 0)
+ 			break;
+-		else if (ret != -EBUSY) {
++		else if (ret == -EBUSY) {
++			nf_ct_unexpect_related(rtp_exp);
++			continue;
++		} else if (ret < 0) {
+ 			nf_ct_unexpect_related(rtp_exp);
+ 			port = 0;
+ 			break;
+diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
+index 340c80d..7918eb7 100644
+--- a/net/netfilter/nf_conntrack_expect.c
++++ b/net/netfilter/nf_conntrack_expect.c
+@@ -366,23 +366,6 @@ static void evict_oldest_expect(struct nf_conn *master,
+ 	}
+ }
+ 
+-static inline int refresh_timer(struct nf_conntrack_expect *i)
+-{
+-	struct nf_conn_help *master_help = nfct_help(i->master);
+-	const struct nf_conntrack_expect_policy *p;
+-
+-	if (!del_timer(&i->timeout))
+-		return 0;
+-
+-	p = &rcu_dereference_protected(
+-		master_help->helper,
+-		lockdep_is_held(&nf_conntrack_lock)
+-		)->expect_policy[i->class];
+-	i->timeout.expires = jiffies + p->timeout * HZ;
+-	add_timer(&i->timeout);
+-	return 1;
+-}
+-
+ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
+ {
+ 	const struct nf_conntrack_expect_policy *p;
+@@ -390,7 +373,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
+ 	struct nf_conn *master = expect->master;
+ 	struct nf_conn_help *master_help = nfct_help(master);
+ 	struct net *net = nf_ct_exp_net(expect);
+-	struct hlist_node *n;
++	struct hlist_node *n, *next;
+ 	unsigned int h;
+ 	int ret = 1;
+ 
+@@ -401,12 +384,12 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
+ 		goto out;
+ 	}
+ 	h = nf_ct_expect_dst_hash(&expect->tuple);
+-	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
++	hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) {
+ 		if (expect_matches(i, expect)) {
+-			/* Refresh timer: if it's dying, ignore.. */
+-			if (refresh_timer(i)) {
+-				ret = 0;
+-				goto out;
++			if (del_timer(&i->timeout)) {
++				nf_ct_unlink_expect(i);
++				nf_ct_expect_put(i);
++				break;
+ 			}
+ 		} else if (expect_clash(i, expect)) {
+ 			ret = -EBUSY;
+diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
+index dfd52ba..8f3f280 100644
+--- a/net/netfilter/xt_hashlimit.c
++++ b/net/netfilter/xt_hashlimit.c
+@@ -389,8 +389,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo)
+ #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
+ 
+ /* Precision saver. */
+-static inline u_int32_t
+-user2credits(u_int32_t user)
++static u32 user2credits(u32 user)
+ {
+ 	/* If multiplying would overflow... */
+ 	if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+@@ -400,7 +399,7 @@ user2credits(u_int32_t user)
+ 	return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE;
+ }
+ 
+-static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now)
++static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now)
+ {
+ 	dh->rateinfo.credit += (now - dh->rateinfo.prev) * CREDITS_PER_JIFFY;
+ 	if (dh->rateinfo.credit > dh->rateinfo.credit_cap)
+@@ -531,8 +530,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+ 		dh->rateinfo.prev = jiffies;
+ 		dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
+ 		                      hinfo->cfg.burst);
+-		dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
+-		                          hinfo->cfg.burst);
++		dh->rateinfo.credit_cap = dh->rateinfo.credit;
+ 		dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
+ 	} else {
+ 		/* update expiration timeout */
+diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
+index 32b7a57..a4c1e45 100644
+--- a/net/netfilter/xt_limit.c
++++ b/net/netfilter/xt_limit.c
+@@ -88,8 +88,7 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+ }
+ 
+ /* Precision saver. */
+-static u_int32_t
+-user2credits(u_int32_t user)
++static u32 user2credits(u32 user)
+ {
+ 	/* If multiplying would overflow... */
+ 	if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+@@ -118,12 +117,12 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
+ 
+ 	/* For SMP, we only want to use one set of state. */
+ 	r->master = priv;
++	/* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
++	   128. */
++	priv->prev = jiffies;
++	priv->credit = user2credits(r->avg * r->burst); /* Credits full. */
+ 	if (r->cost == 0) {
+-		/* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
+-		   128. */
+-		priv->prev = jiffies;
+-		priv->credit = user2credits(r->avg * r->burst); /* Credits full. */
+-		r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
++		r->credit_cap = priv->credit; /* Credits full. */
+ 		r->cost = user2credits(r->avg);
+ 	}
+ 	return 0;
+diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
+index c5391af..10a385b 100644
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -1028,6 +1028,16 @@ static void xs_udp_data_ready(struct sock *sk, int len)
+ 	read_unlock_bh(&sk->sk_callback_lock);
+ }
+ 
++/*
++ * Helper function to force a TCP close if the server is sending
++ * junk and/or it has put us in CLOSE_WAIT
++ */
++static void xs_tcp_force_close(struct rpc_xprt *xprt)
++{
++	set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
++	xprt_force_disconnect(xprt);
++}
++
+ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
+ {
+ 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+@@ -1054,7 +1064,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea
+ 	/* Sanity check of the record length */
+ 	if (unlikely(transport->tcp_reclen < 8)) {
+ 		dprintk("RPC:       invalid TCP record fragment length\n");
+-		xprt_force_disconnect(xprt);
++		xs_tcp_force_close(xprt);
+ 		return;
+ 	}
+ 	dprintk("RPC:       reading TCP record fragment of length %d\n",
+@@ -1135,7 +1145,7 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
+ 		break;
+ 	default:
+ 		dprintk("RPC:       invalid request message type\n");
+-		xprt_force_disconnect(&transport->xprt);
++		xs_tcp_force_close(&transport->xprt);
+ 	}
+ 	xs_tcp_check_fraghdr(transport);
+ }
+@@ -1458,6 +1468,8 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
+ static void xs_sock_mark_closed(struct rpc_xprt *xprt)
+ {
+ 	smp_mb__before_clear_bit();
++	clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
++	clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+ 	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ 	clear_bit(XPRT_CLOSING, &xprt->state);
+ 	smp_mb__after_clear_bit();
+@@ -1515,8 +1527,8 @@ static void xs_tcp_state_change(struct sock *sk)
+ 		break;
+ 	case TCP_CLOSE_WAIT:
+ 		/* The server initiated a shutdown of the socket */
+-		xprt_force_disconnect(xprt);
+ 		xprt->connect_cookie++;
++		xs_tcp_force_close(xprt);
+ 	case TCP_CLOSING:
+ 		/*
+ 		 * If the server closed down the connection, make sure that
+@@ -2159,8 +2171,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
+ 		/* We're probably in TIME_WAIT. Get rid of existing socket,
+ 		 * and retry
+ 		 */
+-		set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+-		xprt_force_disconnect(xprt);
++		xs_tcp_force_close(xprt);
+ 		break;
+ 	case -ECONNREFUSED:
+ 	case -ECONNRESET:
+diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
+index d897278..978416d 100644
+--- a/scripts/Kbuild.include
++++ b/scripts/Kbuild.include
+@@ -98,24 +98,24 @@ try-run = $(shell set -e;		\
+ # Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,)
+ 
+ as-option = $(call try-run,\
+-	$(CC) $(KBUILD_CFLAGS) $(1) -c -xassembler /dev/null -o "$$TMP",$(1),$(2))
++	$(CC) $(KBUILD_CFLAGS) $(1) -c -x assembler /dev/null -o "$$TMP",$(1),$(2))
+ 
+ # as-instr
+ # Usage: cflags-y += $(call as-instr,instr,option1,option2)
+ 
+ as-instr = $(call try-run,\
+-	/bin/echo -e "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" -,$(2),$(3))
++	printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
+ 
+ # cc-option
+ # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
+ 
+ cc-option = $(call try-run,\
+-	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",$(1),$(2))
++	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+ 
+ # cc-option-yn
+ # Usage: flag := $(call cc-option-yn,-march=winchip-c6)
+ cc-option-yn = $(call try-run,\
+-	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",y,n)
++	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
+ 
+ # cc-option-align
+ # Prefix align with either -falign or -malign
+@@ -125,7 +125,7 @@ cc-option-align = $(subst -functions=0,,\
+ # cc-disable-warning
+ # Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
+ cc-disable-warning = $(call try-run,\
+-	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -xc /dev/null -o "$$TMP",-Wno-$(strip $(1)))
++	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+ 
+ # cc-version
+ # Usage gcc-ver := $(call cc-version)
+@@ -143,7 +143,7 @@ cc-ifversion = $(shell [ $(call cc-version, $(CC)) $(1) $(2) ] && echo $(3))
+ # cc-ldoption
+ # Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+ cc-ldoption = $(call try-run,\
+-	$(CC) $(1) -nostdlib -xc /dev/null -o "$$TMP",$(1),$(2))
++	$(CC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
+ 
+ # ld-option
+ # Usage: LDFLAGS += $(call ld-option, -X)
+@@ -209,7 +209,7 @@ endif
+ # >$< substitution to preserve $ when reloading .cmd file
+ # note: when using inline perl scripts [perl -e '...$$t=1;...']
+ # in $(cmd_xxx) double $$ your perl vars
+-make-cmd = $(subst \#,\\\#,$(subst $$,$$$$,$(call escsq,$(cmd_$(1)))))
++make-cmd = $(subst \\,\\\\,$(subst \#,\\\#,$(subst $$,$$$$,$(call escsq,$(cmd_$(1))))))
+ 
+ # Find any prerequisites that is newer than target or that does not exist.
+ # PHONY targets skipped in both cases.
+diff --git a/scripts/gcc-version.sh b/scripts/gcc-version.sh
+index debecb5..7f2126d 100644
+--- a/scripts/gcc-version.sh
++++ b/scripts/gcc-version.sh
+@@ -22,10 +22,10 @@ if [ ${#compiler} -eq 0 ]; then
+ 	exit 1
+ fi
+ 
+-MAJOR=$(echo __GNUC__ | $compiler -E -xc - | tail -n 1)
+-MINOR=$(echo __GNUC_MINOR__ | $compiler -E -xc - | tail -n 1)
++MAJOR=$(echo __GNUC__ | $compiler -E -x c - | tail -n 1)
++MINOR=$(echo __GNUC_MINOR__ | $compiler -E -x c - | tail -n 1)
+ if [ "x$with_patchlevel" != "x" ] ; then
+-	PATCHLEVEL=$(echo __GNUC_PATCHLEVEL__ | $compiler -E -xc - | tail -n 1)
++	PATCHLEVEL=$(echo __GNUC_PATCHLEVEL__ | $compiler -E -x c - | tail -n 1)
+ 	printf "%02d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL
+ else
+ 	printf "%02d%02d\\n" $MAJOR $MINOR
+diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh
+index 29493dc..12dbd0b 100644
+--- a/scripts/gcc-x86_32-has-stack-protector.sh
++++ b/scripts/gcc-x86_32-has-stack-protector.sh
+@@ -1,6 +1,6 @@
+ #!/bin/sh
+ 
+-echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
++echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
+ if [ "$?" -eq "0" ] ; then
+ 	echo y
+ else
+diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh
+index afaec61..973e8c1 100644
+--- a/scripts/gcc-x86_64-has-stack-protector.sh
++++ b/scripts/gcc-x86_64-has-stack-protector.sh
+@@ -1,6 +1,6 @@
+ #!/bin/sh
+ 
+-echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
++echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
+ if [ "$?" -eq "0" ] ; then
+ 	echo y
+ else
+diff --git a/scripts/kconfig/check.sh b/scripts/kconfig/check.sh
+index fa59cbf..854d9c7 100755
+--- a/scripts/kconfig/check.sh
++++ b/scripts/kconfig/check.sh
+@@ -1,6 +1,6 @@
+ #!/bin/sh
+ # Needed for systems without gettext
+-$* -xc -o /dev/null - > /dev/null 2>&1 << EOF
++$* -x c -o /dev/null - > /dev/null 2>&1 << EOF
+ #include <libintl.h>
+ int main()
+ {
+diff --git a/scripts/kconfig/lxdialog/check-lxdialog.sh b/scripts/kconfig/lxdialog/check-lxdialog.sh
+index 82cc3a8..50df490 100644
+--- a/scripts/kconfig/lxdialog/check-lxdialog.sh
++++ b/scripts/kconfig/lxdialog/check-lxdialog.sh
+@@ -38,7 +38,7 @@ trap "rm -f $tmp" 0 1 2 3 15
+ 
+ # Check if we can link to ncurses
+ check() {
+-        $cc -xc - -o $tmp 2>/dev/null <<'EOF'
++        $cc -x c - -o $tmp 2>/dev/null <<'EOF'
+ #include CURSES_LOC
+ main() {}
+ EOF
+diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl
+index bccf07dd..3346f42 100644
+--- a/scripts/kconfig/streamline_config.pl
++++ b/scripts/kconfig/streamline_config.pl
+@@ -463,6 +463,8 @@ while(<CIN>) {
+ 	if (defined($configs{$1})) {
+ 	    if ($localyesconfig) {
+ 	        $setconfigs{$1} = 'y';
++		print "$1=y\n";
++		next;
+ 	    } else {
+ 	        $setconfigs{$1} = $2;
+ 	    }
+diff --git a/scripts/package/buildtar b/scripts/package/buildtar
+index 8a7b155..d0d748e 100644
+--- a/scripts/package/buildtar
++++ b/scripts/package/buildtar
+@@ -109,7 +109,7 @@ esac
+ 	if tar --owner=root --group=root --help >/dev/null 2>&1; then
+ 		opts="--owner=root --group=root"
+ 	fi
+-	tar cf - . $opts | ${compress} > "${tarball}${file_ext}"
++	tar cf - boot/* lib/* $opts | ${compress} > "${tarball}${file_ext}"
+ )
+ 
+ echo "Tarball successfully created in ${tarball}${file_ext}"
+diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
+index d83bafc..193ce81 100644
+--- a/sound/drivers/aloop.c
++++ b/sound/drivers/aloop.c
+@@ -119,6 +119,7 @@ struct loopback_pcm {
+ 	unsigned int period_size_frac;
+ 	unsigned long last_jiffies;
+ 	struct timer_list timer;
++	spinlock_t timer_lock;
+ };
+ 
+ static struct platform_device *devices[SNDRV_CARDS];
+@@ -169,6 +170,7 @@ static void loopback_timer_start(struct loopback_pcm *dpcm)
+ 	unsigned long tick;
+ 	unsigned int rate_shift = get_rate_shift(dpcm);
+ 
++	spin_lock(&dpcm->timer_lock);
+ 	if (rate_shift != dpcm->pcm_rate_shift) {
+ 		dpcm->pcm_rate_shift = rate_shift;
+ 		dpcm->period_size_frac = frac_pos(dpcm, dpcm->pcm_period_size);
+@@ -181,12 +183,15 @@ static void loopback_timer_start(struct loopback_pcm *dpcm)
+ 	tick = (tick + dpcm->pcm_bps - 1) / dpcm->pcm_bps;
+ 	dpcm->timer.expires = jiffies + tick;
+ 	add_timer(&dpcm->timer);
++	spin_unlock(&dpcm->timer_lock);
+ }
+ 
+ static inline void loopback_timer_stop(struct loopback_pcm *dpcm)
+ {
++	spin_lock(&dpcm->timer_lock);
+ 	del_timer(&dpcm->timer);
+ 	dpcm->timer.expires = 0;
++	spin_unlock(&dpcm->timer_lock);
+ }
+ 
+ #define CABLE_VALID_PLAYBACK	(1 << SNDRV_PCM_STREAM_PLAYBACK)
+@@ -659,6 +664,7 @@ static int loopback_open(struct snd_pcm_substream *substream)
+ 	dpcm->substream = substream;
+ 	setup_timer(&dpcm->timer, loopback_timer_function,
+ 		    (unsigned long)dpcm);
++	spin_lock_init(&dpcm->timer_lock);
+ 
+ 	cable = loopback->cables[substream->number][dev];
+ 	if (!cable) {
+diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
+index 402f330..94f0c4a 100644
+--- a/sound/pci/hda/patch_conexant.c
++++ b/sound/pci/hda/patch_conexant.c
+@@ -139,6 +139,7 @@ struct conexant_spec {
+ 	unsigned int asus:1;
+ 	unsigned int pin_eapd_ctrls:1;
+ 	unsigned int single_adc_amp:1;
++	unsigned int fixup_stereo_dmic:1;
+ 
+ 	unsigned int adc_switching:1;
+ 
+@@ -4113,9 +4114,9 @@ static int cx_auto_init(struct hda_codec *codec)
+ 
+ static int cx_auto_add_volume_idx(struct hda_codec *codec, const char *basename,
+ 			      const char *dir, int cidx,
+-			      hda_nid_t nid, int hda_dir, int amp_idx)
++			      hda_nid_t nid, int hda_dir, int amp_idx, int chs)
+ {
+-	static char name[32];
++	static char name[44];
+ 	static struct snd_kcontrol_new knew[] = {
+ 		HDA_CODEC_VOLUME(name, 0, 0, 0),
+ 		HDA_CODEC_MUTE(name, 0, 0, 0),
+@@ -4125,7 +4126,7 @@ static int cx_auto_add_volume_idx(struct hda_codec *codec, const char *basename,
+ 
+ 	for (i = 0; i < 2; i++) {
+ 		struct snd_kcontrol *kctl;
+-		knew[i].private_value = HDA_COMPOSE_AMP_VAL(nid, 3, amp_idx,
++		knew[i].private_value = HDA_COMPOSE_AMP_VAL(nid, chs, amp_idx,
+ 							    hda_dir);
+ 		knew[i].subdevice = HDA_SUBDEV_AMP_FLAG;
+ 		knew[i].index = cidx;
+@@ -4144,7 +4145,7 @@ static int cx_auto_add_volume_idx(struct hda_codec *codec, const char *basename,
+ }
+ 
+ #define cx_auto_add_volume(codec, str, dir, cidx, nid, hda_dir)		\
+-	cx_auto_add_volume_idx(codec, str, dir, cidx, nid, hda_dir, 0)
++	cx_auto_add_volume_idx(codec, str, dir, cidx, nid, hda_dir, 0, 3)
+ 
+ #define cx_auto_add_pb_volume(codec, nid, str, idx)			\
+ 	cx_auto_add_volume(codec, str, " Playback", idx, nid, HDA_OUTPUT)
+@@ -4214,6 +4215,36 @@ static int cx_auto_build_output_controls(struct hda_codec *codec)
+ 	return 0;
+ }
+ 
++/* Returns zero if this is a normal stereo channel, and non-zero if it should
++   be split in two independent channels.
++   dest_label must be at least 44 characters. */
++static int cx_auto_get_rightch_label(struct hda_codec *codec, const char *label,
++				     char *dest_label, int nid)
++{
++	struct conexant_spec *spec = codec->spec;
++	int i;
++
++	if (!spec->fixup_stereo_dmic)
++		return 0;
++
++	for (i = 0; i < AUTO_CFG_MAX_INS; i++) {
++		int def_conf;
++		if (spec->autocfg.inputs[i].pin != nid)
++			continue;
++
++		if (spec->autocfg.inputs[i].type != AUTO_PIN_MIC)
++			return 0;
++		def_conf = snd_hda_codec_get_pincfg(codec, nid);
++		if (snd_hda_get_input_pin_attr(def_conf) != INPUT_PIN_ATTR_INT)
++			return 0;
++
++		/* Finally found the inverted internal mic! */
++		snprintf(dest_label, 44, "Inverted %s", label);
++		return 1;
++	}
++	return 0;
++}
++
+ static int cx_auto_add_capture_volume(struct hda_codec *codec, hda_nid_t nid,
+ 				      const char *label, const char *pfx,
+ 				      int cidx)
+@@ -4222,14 +4253,25 @@ static int cx_auto_add_capture_volume(struct hda_codec *codec, hda_nid_t nid,
+ 	int i;
+ 
+ 	for (i = 0; i < spec->num_adc_nids; i++) {
++		char rightch_label[44];
+ 		hda_nid_t adc_nid = spec->adc_nids[i];
+ 		int idx = get_input_connection(codec, adc_nid, nid);
+ 		if (idx < 0)
+ 			continue;
+ 		if (spec->single_adc_amp)
+ 			idx = 0;
++
++		if (cx_auto_get_rightch_label(codec, label, rightch_label, nid)) {
++			/* Make two independent kcontrols for left and right */
++			int err = cx_auto_add_volume_idx(codec, label, pfx,
++					      cidx, adc_nid, HDA_INPUT, idx, 1);
++			if (err < 0)
++				return err;
++			return cx_auto_add_volume_idx(codec, rightch_label, pfx,
++						      cidx, adc_nid, HDA_INPUT, idx, 2);
++		}
+ 		return cx_auto_add_volume_idx(codec, label, pfx,
+-					      cidx, adc_nid, HDA_INPUT, idx);
++					      cidx, adc_nid, HDA_INPUT, idx, 3);
+ 	}
+ 	return 0;
+ }
+@@ -4242,9 +4284,19 @@ static int cx_auto_add_boost_volume(struct hda_codec *codec, int idx,
+ 	int i, con;
+ 
+ 	nid = spec->imux_info[idx].pin;
+-	if (get_wcaps(codec, nid) & AC_WCAP_IN_AMP)
++	if (get_wcaps(codec, nid) & AC_WCAP_IN_AMP) {
++		char rightch_label[44];
++		if (cx_auto_get_rightch_label(codec, label, rightch_label, nid)) {
++			int err = cx_auto_add_volume_idx(codec, label, " Boost",
++							 cidx, nid, HDA_INPUT, 0, 1);
++			if (err < 0)
++				return err;
++			return cx_auto_add_volume_idx(codec, rightch_label, " Boost",
++						      cidx, nid, HDA_INPUT, 0, 2);
++		}
+ 		return cx_auto_add_volume(codec, label, " Boost", cidx,
+ 					  nid, HDA_INPUT);
++	}
+ 	con = __select_input_connection(codec, spec->imux_info[idx].adc, nid,
+ 					&mux, false, 0);
+ 	if (con < 0)
+@@ -4398,23 +4450,31 @@ static void apply_pincfg(struct hda_codec *codec, const struct cxt_pincfg *cfg)
+ 
+ }
+ 
+-static void apply_pin_fixup(struct hda_codec *codec,
++enum {
++	CXT_PINCFG_LENOVO_X200,
++	CXT_PINCFG_LENOVO_TP410,
++	CXT_FIXUP_STEREO_DMIC,
++};
++
++static void apply_fixup(struct hda_codec *codec,
+ 			    const struct snd_pci_quirk *quirk,
+ 			    const struct cxt_pincfg **table)
+ {
++	struct conexant_spec *spec = codec->spec;
++
+ 	quirk = snd_pci_quirk_lookup(codec->bus->pci, quirk);
+-	if (quirk) {
++	if (quirk && table[quirk->value]) {
+ 		snd_printdd(KERN_INFO "hda_codec: applying pincfg for %s\n",
+ 			    quirk->name);
+ 		apply_pincfg(codec, table[quirk->value]);
+ 	}
++	if (quirk->value == CXT_FIXUP_STEREO_DMIC) {
++		snd_printdd(KERN_INFO "hda_codec: applying internal mic workaround for %s\n",
++			    quirk->name);
++		spec->fixup_stereo_dmic = 1;
++	}
+ }
+ 
+-enum {
+-	CXT_PINCFG_LENOVO_X200,
+-	CXT_PINCFG_LENOVO_TP410,
+-};
+-
+ /* ThinkPad X200 & co with cxt5051 */
+ static const struct cxt_pincfg cxt_pincfg_lenovo_x200[] = {
+ 	{ 0x16, 0x042140ff }, /* HP (seq# overridden) */
+@@ -4434,6 +4494,7 @@ static const struct cxt_pincfg cxt_pincfg_lenovo_tp410[] = {
+ static const struct cxt_pincfg *cxt_pincfg_tbl[] = {
+ 	[CXT_PINCFG_LENOVO_X200] = cxt_pincfg_lenovo_x200,
+ 	[CXT_PINCFG_LENOVO_TP410] = cxt_pincfg_lenovo_tp410,
++	[CXT_FIXUP_STEREO_DMIC] = NULL,
+ };
+ 
+ static const struct snd_pci_quirk cxt5051_fixups[] = {
+@@ -4447,6 +4508,9 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
+ 	SND_PCI_QUIRK(0x17aa, 0x215f, "Lenovo T510", CXT_PINCFG_LENOVO_TP410),
+ 	SND_PCI_QUIRK(0x17aa, 0x21ce, "Lenovo T420", CXT_PINCFG_LENOVO_TP410),
+ 	SND_PCI_QUIRK(0x17aa, 0x21cf, "Lenovo T520", CXT_PINCFG_LENOVO_TP410),
++	SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC),
++	SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC),
++	SND_PCI_QUIRK(0x17aa, 0x397b, "Lenovo S205", CXT_FIXUP_STEREO_DMIC),
+ 	{}
+ };
+ 
+@@ -4486,10 +4550,10 @@ static int patch_conexant_auto(struct hda_codec *codec)
+ 		break;
+ 	case 0x14f15051:
+ 		add_cx5051_fake_mutes(codec);
+-		apply_pin_fixup(codec, cxt5051_fixups, cxt_pincfg_tbl);
++		apply_fixup(codec, cxt5051_fixups, cxt_pincfg_tbl);
+ 		break;
+ 	default:
+-		apply_pin_fixup(codec, cxt5066_fixups, cxt_pincfg_tbl);
++		apply_fixup(codec, cxt5066_fixups, cxt_pincfg_tbl);
+ 		break;
+ 	}
+ 
+diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
+index 323d4d9..0961d88 100644
+--- a/tools/hv/hv_kvp_daemon.c
++++ b/tools/hv/hv_kvp_daemon.c
+@@ -348,7 +348,7 @@ int main(void)
+ 	fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+ 	if (fd < 0) {
+ 		syslog(LOG_ERR, "netlink socket creation failed; error:%d", fd);
+-		exit(-1);
++		exit(EXIT_FAILURE);
+ 	}
+ 	addr.nl_family = AF_NETLINK;
+ 	addr.nl_pad = 0;
+@@ -360,7 +360,7 @@ int main(void)
+ 	if (error < 0) {
+ 		syslog(LOG_ERR, "bind failed; error:%d", error);
+ 		close(fd);
+-		exit(-1);
++		exit(EXIT_FAILURE);
+ 	}
+ 	sock_opt = addr.nl_groups;
+ 	setsockopt(fd, 270, 1, &sock_opt, sizeof(sock_opt));
+@@ -378,7 +378,7 @@ int main(void)
+ 	if (len < 0) {
+ 		syslog(LOG_ERR, "netlink_send failed; error:%d", len);
+ 		close(fd);
+-		exit(-1);
++		exit(EXIT_FAILURE);
+ 	}
+ 
+ 	pfd.fd = fd;
+@@ -497,7 +497,7 @@ int main(void)
+ 		len = netlink_send(fd, incoming_cn_msg);
+ 		if (len < 0) {
+ 			syslog(LOG_ERR, "net_link send failed; error:%d", len);
+-			exit(-1);
++			exit(EXIT_FAILURE);
+ 		}
+ 	}
+ 
+diff --git a/tools/perf/Makefile b/tools/perf/Makefile
+index b98e307..e45d2b1 100644
+--- a/tools/perf/Makefile
++++ b/tools/perf/Makefile
+@@ -56,7 +56,7 @@ ifeq ($(ARCH),x86_64)
+ 	ARCH := x86
+ 	IS_X86_64 := 0
+ 	ifeq (, $(findstring m32,$(EXTRA_CFLAGS)))
+-		IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -xc - | tail -n 1)
++		IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -x c - | tail -n 1)
+ 	endif
+ 	ifeq (${IS_X86_64}, 1)
+ 		RAW_ARCH := x86_64
+diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
+index e8a03ac..7db8da5 100644
+--- a/tools/power/cpupower/Makefile
++++ b/tools/power/cpupower/Makefile
+@@ -100,7 +100,7 @@ GMO_FILES = ${shell for HLANG in ${LANGUAGES}; do echo po/$$HLANG.gmo; done;}
+ export CROSS CC AR STRIP RANLIB CFLAGS LDFLAGS LIB_OBJS
+ 
+ # check if compiler option is supported
+-cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
++cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
+ 
+ # use '-Os' optimization if available, else use -O2
+ OPTIMIZATION := $(call cc-supports,-Os,-O2)
diff --git a/3.2.34/bump/1032_linux-3.2.33.patch b/3.2.34/bump/1032_linux-3.2.33.patch
new file mode 100644
index 0000000..c32fb75
--- /dev/null
+++ b/3.2.34/bump/1032_linux-3.2.33.patch
@@ -0,0 +1,3450 @@
+diff --git a/Makefile b/Makefile
+index b6d8282..63ca1ea2 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 3
+ PATCHLEVEL = 2
+-SUBLEVEL = 32
++SUBLEVEL = 33
+ EXTRAVERSION =
+ NAME = Saber-toothed Squirrel
+ 
+diff --git a/arch/arm/include/asm/vfpmacros.h b/arch/arm/include/asm/vfpmacros.h
+index 3d5fc41..bf53047 100644
+--- a/arch/arm/include/asm/vfpmacros.h
++++ b/arch/arm/include/asm/vfpmacros.h
+@@ -28,7 +28,7 @@
+ 	ldr	\tmp, =elf_hwcap		    @ may not have MVFR regs
+ 	ldr	\tmp, [\tmp, #0]
+ 	tst	\tmp, #HWCAP_VFPv3D16
+-	ldceq	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d16-d31}
++	ldceql	p11, cr0, [\base],#32*4		    @ FLDMIAD \base!, {d16-d31}
+ 	addne	\base, \base, #32*4		    @ step over unused register space
+ #else
+ 	VFPFMRX	\tmp, MVFR0			    @ Media and VFP Feature Register 0
+@@ -52,7 +52,7 @@
+ 	ldr	\tmp, =elf_hwcap		    @ may not have MVFR regs
+ 	ldr	\tmp, [\tmp, #0]
+ 	tst	\tmp, #HWCAP_VFPv3D16
+-	stceq	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d16-d31}
++	stceql	p11, cr0, [\base],#32*4		    @ FSTMIAD \base!, {d16-d31}
+ 	addne	\base, \base, #32*4		    @ step over unused register space
+ #else
+ 	VFPFMRX	\tmp, MVFR0			    @ Media and VFP Feature Register 0
+diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
+index 1d1710e..bfa0eeb 100644
+--- a/arch/arm/kernel/smp.c
++++ b/arch/arm/kernel/smp.c
+@@ -295,18 +295,24 @@ static void __cpuinit smp_store_cpu_info(unsigned int cpuid)
+ asmlinkage void __cpuinit secondary_start_kernel(void)
+ {
+ 	struct mm_struct *mm = &init_mm;
+-	unsigned int cpu = smp_processor_id();
++	unsigned int cpu;
++
++	/*
++	 * The identity mapping is uncached (strongly ordered), so
++	 * switch away from it before attempting any exclusive accesses.
++	 */
++	cpu_switch_mm(mm->pgd, mm);
++	enter_lazy_tlb(mm, current);
++	local_flush_tlb_all();
+ 
+ 	/*
+ 	 * All kernel threads share the same mm context; grab a
+ 	 * reference and switch to it.
+ 	 */
++	cpu = smp_processor_id();
+ 	atomic_inc(&mm->mm_count);
+ 	current->active_mm = mm;
+ 	cpumask_set_cpu(cpu, mm_cpumask(mm));
+-	cpu_switch_mm(mm->pgd, mm);
+-	enter_lazy_tlb(mm, current);
+-	local_flush_tlb_all();
+ 
+ 	printk("CPU%u: Booted secondary processor\n", cpu);
+ 
+diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c
+index f4546e9..23817a6 100644
+--- a/arch/mips/kernel/kgdb.c
++++ b/arch/mips/kernel/kgdb.c
+@@ -283,6 +283,15 @@ static int kgdb_mips_notify(struct notifier_block *self, unsigned long cmd,
+ 	struct pt_regs *regs = args->regs;
+ 	int trap = (regs->cp0_cause & 0x7c) >> 2;
+ 
++#ifdef CONFIG_KPROBES
++	/*
++	 * Return immediately if the kprobes fault notifier has set
++	 * DIE_PAGE_FAULT.
++	 */
++	if (cmd == DIE_PAGE_FAULT)
++		return NOTIFY_DONE;
++#endif /* CONFIG_KPROBES */
++
+ 	/* Userspace events, ignore. */
+ 	if (user_mode(regs))
+ 		return NOTIFY_DONE;
+diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
+index d80f79d..8e1fb82 100644
+--- a/arch/s390/boot/compressed/vmlinux.lds.S
++++ b/arch/s390/boot/compressed/vmlinux.lds.S
+@@ -5,7 +5,7 @@ OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+ OUTPUT_ARCH(s390:64-bit)
+ #else
+ OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
+-OUTPUT_ARCH(s390)
++OUTPUT_ARCH(s390:31-bit)
+ #endif
+ 
+ ENTRY(startup)
+diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
+index e4c79eb..e43d21e 100644
+--- a/arch/s390/kernel/vmlinux.lds.S
++++ b/arch/s390/kernel/vmlinux.lds.S
+@@ -8,7 +8,7 @@
+ 
+ #ifndef CONFIG_64BIT
+ OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
+-OUTPUT_ARCH(s390)
++OUTPUT_ARCH(s390:31-bit)
+ ENTRY(_start)
+ jiffies = jiffies_64 + 4;
+ #else
+diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
+index 614da62..3c8f220 100644
+--- a/arch/sparc/kernel/perf_event.c
++++ b/arch/sparc/kernel/perf_event.c
+@@ -555,11 +555,13 @@ static u64 nop_for_index(int idx)
+ 
+ static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
+ {
+-	u64 val, mask = mask_for_index(idx);
++	u64 enc, val, mask = mask_for_index(idx);
++
++	enc = perf_event_get_enc(cpuc->events[idx]);
+ 
+ 	val = cpuc->pcr;
+ 	val &= ~mask;
+-	val |= hwc->config;
++	val |= event_encoding(enc, idx);
+ 	cpuc->pcr = val;
+ 
+ 	pcr_ops->write(cpuc->pcr);
+@@ -1422,8 +1424,6 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
+ {
+ 	unsigned long ufp;
+ 
+-	perf_callchain_store(entry, regs->tpc);
+-
+ 	ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
+ 	do {
+ 		struct sparc_stackf *usf, sf;
+@@ -1444,8 +1444,6 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
+ {
+ 	unsigned long ufp;
+ 
+-	perf_callchain_store(entry, regs->tpc);
+-
+ 	ufp = regs->u_regs[UREG_I6] & 0xffffffffUL;
+ 	do {
+ 		struct sparc_stackf32 *usf, sf;
+@@ -1464,6 +1462,11 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
+ void
+ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+ {
++	perf_callchain_store(entry, regs->tpc);
++
++	if (!current->mm)
++		return;
++
+ 	flushw_user();
+ 	if (test_thread_flag(TIF_32BIT))
+ 		perf_callchain_user_32(entry, regs);
+diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
+index 441521a..5e4252b 100644
+--- a/arch/sparc/kernel/sys_sparc_64.c
++++ b/arch/sparc/kernel/sys_sparc_64.c
+@@ -519,12 +519,12 @@ SYSCALL_DEFINE1(sparc64_personality, unsigned long, personality)
+ {
+ 	int ret;
+ 
+-	if (current->personality == PER_LINUX32 &&
+-	    personality == PER_LINUX)
+-		personality = PER_LINUX32;
++	if (personality(current->personality) == PER_LINUX32 &&
++	    personality(personality) == PER_LINUX)
++		personality |= PER_LINUX32;
+ 	ret = sys_personality(personality);
+-	if (ret == PER_LINUX32)
+-		ret = PER_LINUX;
++	if (personality(ret) == PER_LINUX32)
++		ret &= ~PER_LINUX32;
+ 
+ 	return ret;
+ }
+diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
+index 1d7e274..7f5f65d 100644
+--- a/arch/sparc/kernel/syscalls.S
++++ b/arch/sparc/kernel/syscalls.S
+@@ -212,24 +212,20 @@ linux_sparc_syscall:
+ 3:	stx	%o0, [%sp + PTREGS_OFF + PT_V9_I0]
+ ret_sys_call:
+ 	ldx	[%sp + PTREGS_OFF + PT_V9_TSTATE], %g3
+-	ldx	[%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc
+ 	sra	%o0, 0, %o0
+ 	mov	%ulo(TSTATE_XCARRY | TSTATE_ICARRY), %g2
+ 	sllx	%g2, 32, %g2
+ 
+-	/* Check if force_successful_syscall_return()
+-	 * was invoked.
+-	 */
+-	ldub	[%g6 + TI_SYS_NOERROR], %l2
+-	brnz,a,pn %l2, 80f
+-	 stb	%g0, [%g6 + TI_SYS_NOERROR]
+-
+ 	cmp	%o0, -ERESTART_RESTARTBLOCK
+ 	bgeu,pn	%xcc, 1f
+-	 andcc	%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %l6
+-80:
++	 andcc	%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0
++	ldx	[%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc
++
++2:
++	stb	%g0, [%g6 + TI_SYS_NOERROR]
+ 	/* System call success, clear Carry condition code. */
+ 	andn	%g3, %g2, %g3
++3:
+ 	stx	%g3, [%sp + PTREGS_OFF + PT_V9_TSTATE]	
+ 	bne,pn	%icc, linux_syscall_trace2
+ 	 add	%l1, 0x4, %l2			! npc = npc+4
+@@ -238,20 +234,20 @@ ret_sys_call:
+ 	 stx	%l2, [%sp + PTREGS_OFF + PT_V9_TNPC]
+ 
+ 1:
++	/* Check if force_successful_syscall_return()
++	 * was invoked.
++	 */
++	ldub	[%g6 + TI_SYS_NOERROR], %l2
++	brnz,pn %l2, 2b
++	 ldx	[%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc
+ 	/* System call failure, set Carry condition code.
+ 	 * Also, get abs(errno) to return to the process.
+ 	 */
+-	andcc	%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %l6	
+ 	sub	%g0, %o0, %o0
+-	or	%g3, %g2, %g3
+ 	stx	%o0, [%sp + PTREGS_OFF + PT_V9_I0]
+-	stx	%g3, [%sp + PTREGS_OFF + PT_V9_TSTATE]
+-	bne,pn	%icc, linux_syscall_trace2
+-	 add	%l1, 0x4, %l2			! npc = npc+4
+-	stx	%l1, [%sp + PTREGS_OFF + PT_V9_TPC]
++	ba,pt	%xcc, 3b
++	 or	%g3, %g2, %g3
+ 
+-	b,pt	%xcc, rtrap
+-	 stx	%l2, [%sp + PTREGS_OFF + PT_V9_TNPC]
+ linux_syscall_trace2:
+ 	call	syscall_trace_leave
+ 	 add	%sp, PTREGS_OFF, %o0
+diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
+index 8e073d8..6ff4d78 100644
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -2118,6 +2118,9 @@ EXPORT_SYMBOL(_PAGE_CACHE);
+ #ifdef CONFIG_SPARSEMEM_VMEMMAP
+ unsigned long vmemmap_table[VMEMMAP_SIZE];
+ 
++static long __meminitdata addr_start, addr_end;
++static int __meminitdata node_start;
++
+ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
+ {
+ 	unsigned long vstart = (unsigned long) start;
+@@ -2148,15 +2151,30 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
+ 
+ 			*vmem_pp = pte_base | __pa(block);
+ 
+-			printk(KERN_INFO "[%p-%p] page_structs=%lu "
+-			       "node=%d entry=%lu/%lu\n", start, block, nr,
+-			       node,
+-			       addr >> VMEMMAP_CHUNK_SHIFT,
+-			       VMEMMAP_SIZE);
++			/* check to see if we have contiguous blocks */
++			if (addr_end != addr || node_start != node) {
++				if (addr_start)
++					printk(KERN_DEBUG " [%lx-%lx] on node %d\n",
++					       addr_start, addr_end-1, node_start);
++				addr_start = addr;
++				node_start = node;
++			}
++			addr_end = addr + VMEMMAP_CHUNK;
+ 		}
+ 	}
+ 	return 0;
+ }
++
++void __meminit vmemmap_populate_print_last(void)
++{
++	if (addr_start) {
++		printk(KERN_DEBUG " [%lx-%lx] on node %d\n",
++		       addr_start, addr_end-1, node_start);
++		addr_start = 0;
++		addr_end = 0;
++		node_start = 0;
++	}
++}
+ #endif /* CONFIG_SPARSEMEM_VMEMMAP */
+ 
+ static void prot_init_common(unsigned long page_none,
+diff --git a/arch/tile/Makefile b/arch/tile/Makefile
+index 17acce7..04c637c 100644
+--- a/arch/tile/Makefile
++++ b/arch/tile/Makefile
+@@ -26,6 +26,10 @@ $(error Set TILERA_ROOT or CROSS_COMPILE when building $(ARCH) on $(HOST_ARCH))
+   endif
+ endif
+ 
++# The tile compiler may emit .eh_frame information for backtracing.
++# In kernel modules, this causes load failures due to unsupported relocations.
++KBUILD_CFLAGS   += -fno-asynchronous-unwind-tables
++
+ ifneq ($(CONFIG_DEBUG_EXTRA_FLAGS),"")
+ KBUILD_CFLAGS   += $(CONFIG_DEBUG_EXTRA_FLAGS)
+ endif
+diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
+index bcda816..4893d58 100644
+--- a/arch/x86/kernel/entry_32.S
++++ b/arch/x86/kernel/entry_32.S
+@@ -1025,7 +1025,7 @@ ENTRY(xen_sysenter_target)
+ 
+ ENTRY(xen_hypervisor_callback)
+ 	CFI_STARTPROC
+-	pushl_cfi $0
++	pushl_cfi $-1 /* orig_ax = -1 => not a system call */
+ 	SAVE_ALL
+ 	TRACE_IRQS_OFF
+ 
+@@ -1067,14 +1067,16 @@ ENTRY(xen_failsafe_callback)
+ 2:	mov 8(%esp),%es
+ 3:	mov 12(%esp),%fs
+ 4:	mov 16(%esp),%gs
++	/* EAX == 0 => Category 1 (Bad segment)
++	   EAX != 0 => Category 2 (Bad IRET) */
+ 	testl %eax,%eax
+ 	popl_cfi %eax
+ 	lea 16(%esp),%esp
+ 	CFI_ADJUST_CFA_OFFSET -16
+ 	jz 5f
+ 	addl $16,%esp
+-	jmp iret_exc		# EAX != 0 => Category 2 (Bad IRET)
+-5:	pushl_cfi $0		# EAX == 0 => Category 1 (Bad segment)
++	jmp iret_exc
++5:	pushl_cfi $-1 /* orig_ax = -1 => not a system call */
+ 	SAVE_ALL
+ 	jmp ret_from_exception
+ 	CFI_ENDPROC
+diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
+index faf8d5e..6274f5f 100644
+--- a/arch/x86/kernel/entry_64.S
++++ b/arch/x86/kernel/entry_64.S
+@@ -1303,7 +1303,7 @@ ENTRY(xen_failsafe_callback)
+ 	CFI_RESTORE r11
+ 	addq $0x30,%rsp
+ 	CFI_ADJUST_CFA_OFFSET -0x30
+-	pushq_cfi $0
++	pushq_cfi $-1 /* orig_ax = -1 => not a system call */
+ 	SAVE_ALL
+ 	jmp error_exit
+ 	CFI_ENDPROC
+diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
+index 75f9528..6bc0899 100644
+--- a/arch/x86/oprofile/nmi_int.c
++++ b/arch/x86/oprofile/nmi_int.c
+@@ -55,7 +55,7 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
+ 	val |= counter_config->extra;
+ 	event &= model->event_mask ? model->event_mask : 0xFF;
+ 	val |= event & 0xFF;
+-	val |= (event & 0x0F00) << 24;
++	val |= (u64)(event & 0x0F00) << 24;
+ 
+ 	return val;
+ }
+diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
+index a1e21ae..69b9ef6 100644
+--- a/arch/x86/xen/enlighten.c
++++ b/arch/x86/xen/enlighten.c
+@@ -818,7 +818,16 @@ static void xen_write_cr4(unsigned long cr4)
+ 
+ 	native_write_cr4(cr4);
+ }
+-
++#ifdef CONFIG_X86_64
++static inline unsigned long xen_read_cr8(void)
++{
++	return 0;
++}
++static inline void xen_write_cr8(unsigned long val)
++{
++	BUG_ON(val);
++}
++#endif
+ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
+ {
+ 	int ret;
+@@ -987,6 +996,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
+ 	.read_cr4_safe = native_read_cr4_safe,
+ 	.write_cr4 = xen_write_cr4,
+ 
++#ifdef CONFIG_X86_64
++	.read_cr8 = xen_read_cr8,
++	.write_cr8 = xen_write_cr8,
++#endif
++
+ 	.wbinvd = native_wbinvd,
+ 
+ 	.read_msr = native_read_msr_safe,
+@@ -997,6 +1011,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
+ 	.read_tsc = native_read_tsc,
+ 	.read_pmc = native_read_pmc,
+ 
++	.read_tscp = native_read_tscp,
++
+ 	.iret = xen_iret,
+ 	.irq_enable_sysexit = xen_sysexit,
+ #ifdef CONFIG_X86_64
+diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
+index b19a18d..d2519b2 100644
+--- a/drivers/acpi/ec.c
++++ b/drivers/acpi/ec.c
+@@ -71,9 +71,6 @@ enum ec_command {
+ #define ACPI_EC_UDELAY_GLK	1000	/* Wait 1ms max. to get global lock */
+ #define ACPI_EC_MSI_UDELAY	550	/* Wait 550us for MSI EC */
+ 
+-#define ACPI_EC_STORM_THRESHOLD 8	/* number of false interrupts
+-					   per one transaction */
+-
+ enum {
+ 	EC_FLAGS_QUERY_PENDING,		/* Query is pending */
+ 	EC_FLAGS_GPE_STORM,		/* GPE storm detected */
+@@ -87,6 +84,15 @@ static unsigned int ec_delay __read_mostly = ACPI_EC_DELAY;
+ module_param(ec_delay, uint, 0644);
+ MODULE_PARM_DESC(ec_delay, "Timeout(ms) waited until an EC command completes");
+ 
++/*
++ * If the number of false interrupts per one transaction exceeds
++ * this threshold, will think there is a GPE storm happened and
++ * will disable the GPE for normal transaction.
++ */
++static unsigned int ec_storm_threshold  __read_mostly = 8;
++module_param(ec_storm_threshold, uint, 0644);
++MODULE_PARM_DESC(ec_storm_threshold, "Maxim false GPE numbers not considered as GPE storm");
++
+ /* If we find an EC via the ECDT, we need to keep a ptr to its context */
+ /* External interfaces use first EC only, so remember */
+ typedef int (*acpi_ec_query_func) (void *data);
+@@ -319,7 +325,7 @@ static int acpi_ec_transaction(struct acpi_ec *ec, struct transaction *t)
+ 		msleep(1);
+ 		/* It is safe to enable the GPE outside of the transaction. */
+ 		acpi_enable_gpe(NULL, ec->gpe);
+-	} else if (t->irq_count > ACPI_EC_STORM_THRESHOLD) {
++	} else if (t->irq_count > ec_storm_threshold) {
+ 		pr_info(PREFIX "GPE storm detected, "
+ 			"transactions will use polling mode\n");
+ 		set_bit(EC_FLAGS_GPE_STORM, &ec->flags);
+@@ -914,6 +920,17 @@ static int ec_flag_msi(const struct dmi_system_id *id)
+ 	return 0;
+ }
+ 
++/*
++ * Clevo M720 notebook actually works ok with IRQ mode, if we lifted
++ * the GPE storm threshold back to 20
++ */
++static int ec_enlarge_storm_threshold(const struct dmi_system_id *id)
++{
++	pr_debug("Setting the EC GPE storm threshold to 20\n");
++	ec_storm_threshold  = 20;
++	return 0;
++}
++
+ static struct dmi_system_id __initdata ec_dmi_table[] = {
+ 	{
+ 	ec_skip_dsdt_scan, "Compal JFL92", {
+@@ -945,10 +962,13 @@ static struct dmi_system_id __initdata ec_dmi_table[] = {
+ 	{
+ 	ec_validate_ecdt, "ASUS hardware", {
+ 	DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc.") }, NULL},
++	{
++	ec_enlarge_storm_threshold, "CLEVO hardware", {
++	DMI_MATCH(DMI_SYS_VENDOR, "CLEVO Co."),
++	DMI_MATCH(DMI_PRODUCT_NAME, "M720T/M730T"),}, NULL},
+ 	{},
+ };
+ 
+-
+ int __init acpi_ec_ecdt_probe(void)
+ {
+ 	acpi_status status;
+diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
+index 10f92b3..7a987a7 100644
+--- a/drivers/bcma/main.c
++++ b/drivers/bcma/main.c
+@@ -124,9 +124,10 @@ static int bcma_register_cores(struct bcma_bus *bus)
+ 
+ static void bcma_unregister_cores(struct bcma_bus *bus)
+ {
+-	struct bcma_device *core;
++	struct bcma_device *core, *tmp;
+ 
+-	list_for_each_entry(core, &bus->cores, list) {
++	list_for_each_entry_safe(core, tmp, &bus->cores, list) {
++		list_del(&core->list);
+ 		if (core->dev_registered)
+ 			device_unregister(&core->dev);
+ 	}
+diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
+index b366b34..0d91655 100644
+--- a/drivers/char/tpm/tpm.c
++++ b/drivers/char/tpm/tpm.c
+@@ -1072,17 +1072,20 @@ ssize_t tpm_write(struct file *file, const char __user *buf,
+ 		  size_t size, loff_t *off)
+ {
+ 	struct tpm_chip *chip = file->private_data;
+-	size_t in_size = size, out_size;
++	size_t in_size = size;
++	ssize_t out_size;
+ 
+ 	/* cannot perform a write until the read has cleared
+-	   either via tpm_read or a user_read_timer timeout */
+-	while (atomic_read(&chip->data_pending) != 0)
+-		msleep(TPM_TIMEOUT);
+-
+-	mutex_lock(&chip->buffer_mutex);
++	   either via tpm_read or a user_read_timer timeout.
++	   This also prevents splitted buffered writes from blocking here.
++	*/
++	if (atomic_read(&chip->data_pending) != 0)
++		return -EBUSY;
+ 
+ 	if (in_size > TPM_BUFSIZE)
+-		in_size = TPM_BUFSIZE;
++		return -E2BIG;
++
++	mutex_lock(&chip->buffer_mutex);
+ 
+ 	if (copy_from_user
+ 	    (chip->data_buffer, (void __user *) buf, in_size)) {
+@@ -1092,6 +1095,10 @@ ssize_t tpm_write(struct file *file, const char __user *buf,
+ 
+ 	/* atomic tpm command send and result receive */
+ 	out_size = tpm_transmit(chip, chip->data_buffer, TPM_BUFSIZE);
++	if (out_size < 0) {
++		mutex_unlock(&chip->buffer_mutex);
++		return out_size;
++	}
+ 
+ 	atomic_set(&chip->data_pending, out_size);
+ 	mutex_unlock(&chip->buffer_mutex);
+diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
+index b7fe343..f6cd315 100644
+--- a/drivers/cpufreq/powernow-k8.c
++++ b/drivers/cpufreq/powernow-k8.c
+@@ -1216,14 +1216,7 @@ static int powernowk8_target(struct cpufreq_policy *pol,
+ 	struct powernowk8_target_arg pta = { .pol = pol, .targfreq = targfreq,
+ 					     .relation = relation };
+ 
+-	/*
+-	 * Must run on @pol->cpu.  cpufreq core is responsible for ensuring
+-	 * that we're bound to the current CPU and pol->cpu stays online.
+-	 */
+-	if (smp_processor_id() == pol->cpu)
+-		return powernowk8_target_fn(&pta);
+-	else
+-		return work_on_cpu(pol->cpu, powernowk8_target_fn, &pta);
++	return work_on_cpu(pol->cpu, powernowk8_target_fn, &pta);
+ }
+ 
+ /* Driver entry point to verify the policy and range of frequencies */
+diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
+index c9eee6d..a9d5482 100644
+--- a/drivers/edac/amd64_edac.c
++++ b/drivers/edac/amd64_edac.c
+@@ -170,8 +170,11 @@ static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate)
+ 	 * memory controller and apply to register. Search for the first
+ 	 * bandwidth entry that is greater or equal than the setting requested
+ 	 * and program that. If at last entry, turn off DRAM scrubbing.
++	 *
++	 * If no suitable bandwidth is found, turn off DRAM scrubbing entirely
++	 * by falling back to the last element in scrubrates[].
+ 	 */
+-	for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
++	for (i = 0; i < ARRAY_SIZE(scrubrates) - 1; i++) {
+ 		/*
+ 		 * skip scrub rates which aren't recommended
+ 		 * (see F10 BKDG, F3x58)
+@@ -181,12 +184,6 @@ static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate)
+ 
+ 		if (scrubrates[i].bandwidth <= new_bw)
+ 			break;
+-
+-		/*
+-		 * if no suitable bandwidth found, turn off DRAM scrubbing
+-		 * entirely by falling back to the last element in the
+-		 * scrubrates array.
+-		 */
+ 	}
+ 
+ 	scrubval = scrubrates[i].scrubval;
+diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
+index 33e1555..dbe4dbe 100644
+--- a/drivers/gpu/drm/i915/i915_gem.c
++++ b/drivers/gpu/drm/i915/i915_gem.c
+@@ -999,6 +999,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
+ 	if (obj->phys_obj)
+ 		ret = i915_gem_phys_pwrite(dev, obj, args, file);
+ 	else if (obj->gtt_space &&
++		 obj->tiling_mode == I915_TILING_NONE &&
+ 		 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+ 		ret = i915_gem_object_pin(obj, 0, true);
+ 		if (ret)
+diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
+index f07bde2..57152a7 100644
+--- a/drivers/gpu/drm/i915/intel_lvds.c
++++ b/drivers/gpu/drm/i915/intel_lvds.c
+@@ -771,6 +771,14 @@ static const struct dmi_system_id intel_no_lvds[] = {
+ 			DMI_MATCH(DMI_BOARD_NAME, "MS-7469"),
+ 		},
+ 	},
++	{
++		.callback = intel_no_lvds_dmi_callback,
++		.ident = "ZOTAC ZBOXSD-ID12/ID13",
++		.matches = {
++			DMI_MATCH(DMI_BOARD_VENDOR, "ZOTAC"),
++			DMI_MATCH(DMI_BOARD_NAME, "ZBOXSD-ID12/ID13"),
++		},
++	},
+ 
+ 	{ }	/* terminating entry */
+ };
+diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+index 2f46e0c..3ad3cc6 100644
+--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
++++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+@@ -973,11 +973,7 @@ static void radeon_legacy_tmds_ext_mode_set(struct drm_encoder *encoder,
+ static void radeon_ext_tmds_enc_destroy(struct drm_encoder *encoder)
+ {
+ 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+-	struct radeon_encoder_ext_tmds *tmds = radeon_encoder->enc_priv;
+-	if (tmds) {
+-		if (tmds->i2c_bus)
+-			radeon_i2c_destroy(tmds->i2c_bus);
+-	}
++	/* don't destroy the i2c bus record here, this will be done in radeon_i2c_fini */
+ 	kfree(radeon_encoder->enc_priv);
+ 	drm_encoder_cleanup(encoder);
+ 	kfree(radeon_encoder);
+diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
+index 4065374..f4c3d28 100644
+--- a/drivers/hv/channel.c
++++ b/drivers/hv/channel.c
+@@ -146,14 +146,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
+ 
+ 	if (ret != 0) {
+ 		err = ret;
+-		goto errorout;
++		goto error0;
+ 	}
+ 
+ 	ret = hv_ringbuffer_init(
+ 		&newchannel->inbound, in, recv_ringbuffer_size);
+ 	if (ret != 0) {
+ 		err = ret;
+-		goto errorout;
++		goto error0;
+ 	}
+ 
+ 
+@@ -168,7 +168,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
+ 
+ 	if (ret != 0) {
+ 		err = ret;
+-		goto errorout;
++		goto error0;
+ 	}
+ 
+ 	/* Create and init the channel open message */
+@@ -177,7 +177,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
+ 			   GFP_KERNEL);
+ 	if (!open_info) {
+ 		err = -ENOMEM;
+-		goto errorout;
++		goto error0;
+ 	}
+ 
+ 	init_completion(&open_info->waitevent);
+@@ -193,7 +193,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
+ 
+ 	if (userdatalen > MAX_USER_DEFINED_BYTES) {
+ 		err = -EINVAL;
+-		goto errorout;
++		goto error0;
+ 	}
+ 
+ 	if (userdatalen)
+@@ -208,19 +208,18 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
+ 			       sizeof(struct vmbus_channel_open_channel));
+ 
+ 	if (ret != 0)
+-		goto cleanup;
++		goto error1;
+ 
+ 	t = wait_for_completion_timeout(&open_info->waitevent, 5*HZ);
+ 	if (t == 0) {
+ 		err = -ETIMEDOUT;
+-		goto errorout;
++		goto error1;
+ 	}
+ 
+ 
+ 	if (open_info->response.open_result.status)
+ 		err = open_info->response.open_result.status;
+ 
+-cleanup:
+ 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
+ 	list_del(&open_info->msglistentry);
+ 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+@@ -228,9 +227,12 @@ cleanup:
+ 	kfree(open_info);
+ 	return err;
+ 
+-errorout:
+-	hv_ringbuffer_cleanup(&newchannel->outbound);
+-	hv_ringbuffer_cleanup(&newchannel->inbound);
++error1:
++	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
++	list_del(&open_info->msglistentry);
++	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
++
++error0:
+ 	free_pages((unsigned long)out,
+ 		get_order(send_ringbuffer_size + recv_ringbuffer_size));
+ 	kfree(open_info);
+diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
+index 0634ee5..8f67c4d 100644
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -2641,7 +2641,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
+ 				else {
+ 					bad_sectors -= (sector - first_bad);
+ 					if (max_sync > bad_sectors)
+-						max_sync = max_sync;
++						max_sync = bad_sectors;
+ 					continue;
+ 				}
+ 			}
+diff --git a/drivers/media/video/au0828/au0828-video.c b/drivers/media/video/au0828/au0828-video.c
+index 0b3e481..eab0641 100644
+--- a/drivers/media/video/au0828/au0828-video.c
++++ b/drivers/media/video/au0828/au0828-video.c
+@@ -1692,14 +1692,18 @@ static int vidioc_streamoff(struct file *file, void *priv,
+ 			(AUVI_INPUT(i).audio_setup)(dev, 0);
+ 		}
+ 
+-		videobuf_streamoff(&fh->vb_vidq);
+-		res_free(fh, AU0828_RESOURCE_VIDEO);
++		if (res_check(fh, AU0828_RESOURCE_VIDEO)) {
++			videobuf_streamoff(&fh->vb_vidq);
++			res_free(fh, AU0828_RESOURCE_VIDEO);
++		}
+ 	} else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE) {
+ 		dev->vbi_timeout_running = 0;
+ 		del_timer_sync(&dev->vbi_timeout);
+ 
+-		videobuf_streamoff(&fh->vb_vbiq);
+-		res_free(fh, AU0828_RESOURCE_VBI);
++		if (res_check(fh, AU0828_RESOURCE_VBI)) {
++			videobuf_streamoff(&fh->vb_vbiq);
++			res_free(fh, AU0828_RESOURCE_VBI);
++		}
+ 	}
+ 
+ 	return 0;
+diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
+index 3ed9c5e..daed698 100644
+--- a/drivers/mtd/nand/nand_base.c
++++ b/drivers/mtd/nand/nand_base.c
+@@ -2903,9 +2903,7 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip,
+ 	if (le16_to_cpu(p->features) & 1)
+ 		*busw = NAND_BUSWIDTH_16;
+ 
+-	chip->options &= ~NAND_CHIPOPTIONS_MSK;
+-	chip->options |= (NAND_NO_READRDY |
+-			NAND_NO_AUTOINCR) & NAND_CHIPOPTIONS_MSK;
++	chip->options |= NAND_NO_READRDY | NAND_NO_AUTOINCR;
+ 
+ 	return 1;
+ }
+@@ -3069,9 +3067,8 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
+ 			mtd->erasesize <<= ((id_data[3] & 0x03) << 1);
+ 		}
+ 	}
+-	/* Get chip options, preserve non chip based options */
+-	chip->options &= ~NAND_CHIPOPTIONS_MSK;
+-	chip->options |= type->options & NAND_CHIPOPTIONS_MSK;
++	/* Get chip options */
++	chip->options |= type->options;
+ 
+ 	/*
+ 	 * Check if chip is not a Samsung device. Do not clear the
+diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
+index c5f6b0e..6546191 100644
+--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
++++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
+@@ -168,6 +168,8 @@ static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
+ 
+ static bool e1000_vlan_used(struct e1000_adapter *adapter);
+ static void e1000_vlan_mode(struct net_device *netdev, u32 features);
++static void e1000_vlan_filter_on_off(struct e1000_adapter *adapter,
++				     bool filter_on);
+ static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
+ static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
+ static void e1000_restore_vlan(struct e1000_adapter *adapter);
+@@ -1219,7 +1221,7 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
+ 	if (err)
+ 		goto err_register;
+ 
+-	e1000_vlan_mode(netdev, netdev->features);
++	e1000_vlan_filter_on_off(adapter, false);
+ 
+ 	/* print bus type/speed/width info */
+ 	e_info(probe, "(PCI%s:%dMHz:%d-bit) %pM\n",
+@@ -4553,6 +4555,21 @@ static bool e1000_vlan_used(struct e1000_adapter *adapter)
+ 	return false;
+ }
+ 
++static void __e1000_vlan_mode(struct e1000_adapter *adapter, u32 features)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 ctrl;
++
++	ctrl = er32(CTRL);
++	if (features & NETIF_F_HW_VLAN_RX) {
++		/* enable VLAN tag insert/strip */
++		ctrl |= E1000_CTRL_VME;
++	} else {
++		/* disable VLAN tag insert/strip */
++		ctrl &= ~E1000_CTRL_VME;
++	}
++	ew32(CTRL, ctrl);
++}
+ static void e1000_vlan_filter_on_off(struct e1000_adapter *adapter,
+ 				     bool filter_on)
+ {
+@@ -4562,6 +4579,7 @@ static void e1000_vlan_filter_on_off(struct e1000_adapter *adapter,
+ 	if (!test_bit(__E1000_DOWN, &adapter->flags))
+ 		e1000_irq_disable(adapter);
+ 
++	__e1000_vlan_mode(adapter, adapter->netdev->features);
+ 	if (filter_on) {
+ 		/* enable VLAN receive filtering */
+ 		rctl = er32(RCTL);
+@@ -4584,21 +4602,11 @@ static void e1000_vlan_filter_on_off(struct e1000_adapter *adapter,
+ static void e1000_vlan_mode(struct net_device *netdev, u32 features)
+ {
+ 	struct e1000_adapter *adapter = netdev_priv(netdev);
+-	struct e1000_hw *hw = &adapter->hw;
+-	u32 ctrl;
+ 
+ 	if (!test_bit(__E1000_DOWN, &adapter->flags))
+ 		e1000_irq_disable(adapter);
+ 
+-	ctrl = er32(CTRL);
+-	if (features & NETIF_F_HW_VLAN_RX) {
+-		/* enable VLAN tag insert/strip */
+-		ctrl |= E1000_CTRL_VME;
+-	} else {
+-		/* disable VLAN tag insert/strip */
+-		ctrl &= ~E1000_CTRL_VME;
+-	}
+-	ew32(CTRL, ctrl);
++	__e1000_vlan_mode(adapter, features);
+ 
+ 	if (!test_bit(__E1000_DOWN, &adapter->flags))
+ 		e1000_irq_enable(adapter);
+diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
+index dea0cb4..57be855 100644
+--- a/drivers/net/ethernet/marvell/skge.c
++++ b/drivers/net/ethernet/marvell/skge.c
+@@ -4143,6 +4143,13 @@ static struct dmi_system_id skge_32bit_dma_boards[] = {
+ 			DMI_MATCH(DMI_BOARD_NAME, "nForce"),
+ 		},
+ 	},
++	{
++		.ident = "ASUS P5NSLI",
++		.matches = {
++			DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
++			DMI_MATCH(DMI_BOARD_NAME, "P5NSLI")
++		},
++	},
+ 	{}
+ };
+ 
+diff --git a/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h b/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h
+index 026f9de..cc54153 100644
+--- a/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h
++++ b/drivers/net/wireless/ath/ath9k/ar9003_2p2_initvals.h
+@@ -835,107 +835,107 @@ static const u32 ar9300_2p2_baseband_core[][2] = {
+ 
+ static const u32 ar9300Modes_high_power_tx_gain_table_2p2[][5] = {
+ 	/* Addr      5G_HT20     5G_HT40     2G_HT40     2G_HT20   */
+-	{0x0000a2dc, 0x000cfff0, 0x000cfff0, 0x03aaa352, 0x03aaa352},
+-	{0x0000a2e0, 0x000f0000, 0x000f0000, 0x03ccc584, 0x03ccc584},
+-	{0x0000a2e4, 0x03f00000, 0x03f00000, 0x03f0f800, 0x03f0f800},
++	{0x0000a2dc, 0x00033800, 0x00033800, 0x03aaa352, 0x03aaa352},
++	{0x0000a2e0, 0x0003c000, 0x0003c000, 0x03ccc584, 0x03ccc584},
++	{0x0000a2e4, 0x03fc0000, 0x03fc0000, 0x03f0f800, 0x03f0f800},
+ 	{0x0000a2e8, 0x00000000, 0x00000000, 0x03ff0000, 0x03ff0000},
+ 	{0x0000a410, 0x000050d9, 0x000050d9, 0x000050d9, 0x000050d9},
+ 	{0x0000a500, 0x00000000, 0x00000000, 0x00000000, 0x00000000},
+ 	{0x0000a504, 0x06000003, 0x06000003, 0x04000002, 0x04000002},
+ 	{0x0000a508, 0x0a000020, 0x0a000020, 0x08000004, 0x08000004},
+ 	{0x0000a50c, 0x10000023, 0x10000023, 0x0b000200, 0x0b000200},
+-	{0x0000a510, 0x15000028, 0x15000028, 0x0f000202, 0x0f000202},
+-	{0x0000a514, 0x1b00002b, 0x1b00002b, 0x12000400, 0x12000400},
+-	{0x0000a518, 0x1f020028, 0x1f020028, 0x16000402, 0x16000402},
+-	{0x0000a51c, 0x2502002b, 0x2502002b, 0x19000404, 0x19000404},
+-	{0x0000a520, 0x2a04002a, 0x2a04002a, 0x1c000603, 0x1c000603},
+-	{0x0000a524, 0x2e06002a, 0x2e06002a, 0x21000a02, 0x21000a02},
+-	{0x0000a528, 0x3302202d, 0x3302202d, 0x25000a04, 0x25000a04},
+-	{0x0000a52c, 0x3804202c, 0x3804202c, 0x28000a20, 0x28000a20},
+-	{0x0000a530, 0x3c06202c, 0x3c06202c, 0x2c000e20, 0x2c000e20},
+-	{0x0000a534, 0x4108202d, 0x4108202d, 0x30000e22, 0x30000e22},
+-	{0x0000a538, 0x4506402d, 0x4506402d, 0x34000e24, 0x34000e24},
+-	{0x0000a53c, 0x4906222d, 0x4906222d, 0x38001640, 0x38001640},
+-	{0x0000a540, 0x4d062231, 0x4d062231, 0x3c001660, 0x3c001660},
+-	{0x0000a544, 0x50082231, 0x50082231, 0x3f001861, 0x3f001861},
+-	{0x0000a548, 0x5608422e, 0x5608422e, 0x43001a81, 0x43001a81},
+-	{0x0000a54c, 0x5a08442e, 0x5a08442e, 0x47001a83, 0x47001a83},
+-	{0x0000a550, 0x5e0a4431, 0x5e0a4431, 0x4a001c84, 0x4a001c84},
+-	{0x0000a554, 0x640a4432, 0x640a4432, 0x4e001ce3, 0x4e001ce3},
+-	{0x0000a558, 0x680a4434, 0x680a4434, 0x52001ce5, 0x52001ce5},
+-	{0x0000a55c, 0x6c0a6434, 0x6c0a6434, 0x56001ce9, 0x56001ce9},
+-	{0x0000a560, 0x6f0a6633, 0x6f0a6633, 0x5a001ceb, 0x5a001ceb},
+-	{0x0000a564, 0x730c6634, 0x730c6634, 0x5d001eec, 0x5d001eec},
+-	{0x0000a568, 0x730c6634, 0x730c6634, 0x5d001eec, 0x5d001eec},
+-	{0x0000a56c, 0x730c6634, 0x730c6634, 0x5d001eec, 0x5d001eec},
+-	{0x0000a570, 0x730c6634, 0x730c6634, 0x5d001eec, 0x5d001eec},
+-	{0x0000a574, 0x730c6634, 0x730c6634, 0x5d001eec, 0x5d001eec},
+-	{0x0000a578, 0x730c6634, 0x730c6634, 0x5d001eec, 0x5d001eec},
+-	{0x0000a57c, 0x730c6634, 0x730c6634, 0x5d001eec, 0x5d001eec},
++	{0x0000a510, 0x16000220, 0x16000220, 0x0f000202, 0x0f000202},
++	{0x0000a514, 0x1c000223, 0x1c000223, 0x12000400, 0x12000400},
++	{0x0000a518, 0x21002220, 0x21002220, 0x16000402, 0x16000402},
++	{0x0000a51c, 0x27002223, 0x27002223, 0x19000404, 0x19000404},
++	{0x0000a520, 0x2b022220, 0x2b022220, 0x1c000603, 0x1c000603},
++	{0x0000a524, 0x2f022222, 0x2f022222, 0x21000a02, 0x21000a02},
++	{0x0000a528, 0x34022225, 0x34022225, 0x25000a04, 0x25000a04},
++	{0x0000a52c, 0x3a02222a, 0x3a02222a, 0x28000a20, 0x28000a20},
++	{0x0000a530, 0x3e02222c, 0x3e02222c, 0x2c000e20, 0x2c000e20},
++	{0x0000a534, 0x4202242a, 0x4202242a, 0x30000e22, 0x30000e22},
++	{0x0000a538, 0x4702244a, 0x4702244a, 0x34000e24, 0x34000e24},
++	{0x0000a53c, 0x4b02244c, 0x4b02244c, 0x38001640, 0x38001640},
++	{0x0000a540, 0x4e02246c, 0x4e02246c, 0x3c001660, 0x3c001660},
++	{0x0000a544, 0x52022470, 0x52022470, 0x3f001861, 0x3f001861},
++	{0x0000a548, 0x55022490, 0x55022490, 0x43001a81, 0x43001a81},
++	{0x0000a54c, 0x59022492, 0x59022492, 0x47001a83, 0x47001a83},
++	{0x0000a550, 0x5d022692, 0x5d022692, 0x4a001c84, 0x4a001c84},
++	{0x0000a554, 0x61022892, 0x61022892, 0x4e001ce3, 0x4e001ce3},
++	{0x0000a558, 0x65024890, 0x65024890, 0x52001ce5, 0x52001ce5},
++	{0x0000a55c, 0x69024892, 0x69024892, 0x56001ce9, 0x56001ce9},
++	{0x0000a560, 0x6e024c92, 0x6e024c92, 0x5a001ceb, 0x5a001ceb},
++	{0x0000a564, 0x74026e92, 0x74026e92, 0x5d001eec, 0x5d001eec},
++	{0x0000a568, 0x74026e92, 0x74026e92, 0x5d001eec, 0x5d001eec},
++	{0x0000a56c, 0x74026e92, 0x74026e92, 0x5d001eec, 0x5d001eec},
++	{0x0000a570, 0x74026e92, 0x74026e92, 0x5d001eec, 0x5d001eec},
++	{0x0000a574, 0x74026e92, 0x74026e92, 0x5d001eec, 0x5d001eec},
++	{0x0000a578, 0x74026e92, 0x74026e92, 0x5d001eec, 0x5d001eec},
++	{0x0000a57c, 0x74026e92, 0x74026e92, 0x5d001eec, 0x5d001eec},
+ 	{0x0000a580, 0x00800000, 0x00800000, 0x00800000, 0x00800000},
+ 	{0x0000a584, 0x06800003, 0x06800003, 0x04800002, 0x04800002},
+ 	{0x0000a588, 0x0a800020, 0x0a800020, 0x08800004, 0x08800004},
+ 	{0x0000a58c, 0x10800023, 0x10800023, 0x0b800200, 0x0b800200},
+-	{0x0000a590, 0x15800028, 0x15800028, 0x0f800202, 0x0f800202},
+-	{0x0000a594, 0x1b80002b, 0x1b80002b, 0x12800400, 0x12800400},
+-	{0x0000a598, 0x1f820028, 0x1f820028, 0x16800402, 0x16800402},
+-	{0x0000a59c, 0x2582002b, 0x2582002b, 0x19800404, 0x19800404},
+-	{0x0000a5a0, 0x2a84002a, 0x2a84002a, 0x1c800603, 0x1c800603},
+-	{0x0000a5a4, 0x2e86002a, 0x2e86002a, 0x21800a02, 0x21800a02},
+-	{0x0000a5a8, 0x3382202d, 0x3382202d, 0x25800a04, 0x25800a04},
+-	{0x0000a5ac, 0x3884202c, 0x3884202c, 0x28800a20, 0x28800a20},
+-	{0x0000a5b0, 0x3c86202c, 0x3c86202c, 0x2c800e20, 0x2c800e20},
+-	{0x0000a5b4, 0x4188202d, 0x4188202d, 0x30800e22, 0x30800e22},
+-	{0x0000a5b8, 0x4586402d, 0x4586402d, 0x34800e24, 0x34800e24},
+-	{0x0000a5bc, 0x4986222d, 0x4986222d, 0x38801640, 0x38801640},
+-	{0x0000a5c0, 0x4d862231, 0x4d862231, 0x3c801660, 0x3c801660},
+-	{0x0000a5c4, 0x50882231, 0x50882231, 0x3f801861, 0x3f801861},
+-	{0x0000a5c8, 0x5688422e, 0x5688422e, 0x43801a81, 0x43801a81},
+-	{0x0000a5cc, 0x5a88442e, 0x5a88442e, 0x47801a83, 0x47801a83},
+-	{0x0000a5d0, 0x5e8a4431, 0x5e8a4431, 0x4a801c84, 0x4a801c84},
+-	{0x0000a5d4, 0x648a4432, 0x648a4432, 0x4e801ce3, 0x4e801ce3},
+-	{0x0000a5d8, 0x688a4434, 0x688a4434, 0x52801ce5, 0x52801ce5},
+-	{0x0000a5dc, 0x6c8a6434, 0x6c8a6434, 0x56801ce9, 0x56801ce9},
+-	{0x0000a5e0, 0x6f8a6633, 0x6f8a6633, 0x5a801ceb, 0x5a801ceb},
+-	{0x0000a5e4, 0x738c6634, 0x738c6634, 0x5d801eec, 0x5d801eec},
+-	{0x0000a5e8, 0x738c6634, 0x738c6634, 0x5d801eec, 0x5d801eec},
+-	{0x0000a5ec, 0x738c6634, 0x738c6634, 0x5d801eec, 0x5d801eec},
+-	{0x0000a5f0, 0x738c6634, 0x738c6634, 0x5d801eec, 0x5d801eec},
+-	{0x0000a5f4, 0x738c6634, 0x738c6634, 0x5d801eec, 0x5d801eec},
+-	{0x0000a5f8, 0x738c6634, 0x738c6634, 0x5d801eec, 0x5d801eec},
+-	{0x0000a5fc, 0x738c6634, 0x738c6634, 0x5d801eec, 0x5d801eec},
++	{0x0000a590, 0x16800220, 0x16800220, 0x0f800202, 0x0f800202},
++	{0x0000a594, 0x1c800223, 0x1c800223, 0x12800400, 0x12800400},
++	{0x0000a598, 0x21802220, 0x21802220, 0x16800402, 0x16800402},
++	{0x0000a59c, 0x27802223, 0x27802223, 0x19800404, 0x19800404},
++	{0x0000a5a0, 0x2b822220, 0x2b822220, 0x1c800603, 0x1c800603},
++	{0x0000a5a4, 0x2f822222, 0x2f822222, 0x21800a02, 0x21800a02},
++	{0x0000a5a8, 0x34822225, 0x34822225, 0x25800a04, 0x25800a04},
++	{0x0000a5ac, 0x3a82222a, 0x3a82222a, 0x28800a20, 0x28800a20},
++	{0x0000a5b0, 0x3e82222c, 0x3e82222c, 0x2c800e20, 0x2c800e20},
++	{0x0000a5b4, 0x4282242a, 0x4282242a, 0x30800e22, 0x30800e22},
++	{0x0000a5b8, 0x4782244a, 0x4782244a, 0x34800e24, 0x34800e24},
++	{0x0000a5bc, 0x4b82244c, 0x4b82244c, 0x38801640, 0x38801640},
++	{0x0000a5c0, 0x4e82246c, 0x4e82246c, 0x3c801660, 0x3c801660},
++	{0x0000a5c4, 0x52822470, 0x52822470, 0x3f801861, 0x3f801861},
++	{0x0000a5c8, 0x55822490, 0x55822490, 0x43801a81, 0x43801a81},
++	{0x0000a5cc, 0x59822492, 0x59822492, 0x47801a83, 0x47801a83},
++	{0x0000a5d0, 0x5d822692, 0x5d822692, 0x4a801c84, 0x4a801c84},
++	{0x0000a5d4, 0x61822892, 0x61822892, 0x4e801ce3, 0x4e801ce3},
++	{0x0000a5d8, 0x65824890, 0x65824890, 0x52801ce5, 0x52801ce5},
++	{0x0000a5dc, 0x69824892, 0x69824892, 0x56801ce9, 0x56801ce9},
++	{0x0000a5e0, 0x6e824c92, 0x6e824c92, 0x5a801ceb, 0x5a801ceb},
++	{0x0000a5e4, 0x74826e92, 0x74826e92, 0x5d801eec, 0x5d801eec},
++	{0x0000a5e8, 0x74826e92, 0x74826e92, 0x5d801eec, 0x5d801eec},
++	{0x0000a5ec, 0x74826e92, 0x74826e92, 0x5d801eec, 0x5d801eec},
++	{0x0000a5f0, 0x74826e92, 0x74826e92, 0x5d801eec, 0x5d801eec},
++	{0x0000a5f4, 0x74826e92, 0x74826e92, 0x5d801eec, 0x5d801eec},
++	{0x0000a5f8, 0x74826e92, 0x74826e92, 0x5d801eec, 0x5d801eec},
++	{0x0000a5fc, 0x74826e92, 0x74826e92, 0x5d801eec, 0x5d801eec},
+ 	{0x0000a600, 0x00000000, 0x00000000, 0x00000000, 0x00000000},
+ 	{0x0000a604, 0x00000000, 0x00000000, 0x00000000, 0x00000000},
+-	{0x0000a608, 0x01804601, 0x01804601, 0x00000000, 0x00000000},
+-	{0x0000a60c, 0x01804601, 0x01804601, 0x00000000, 0x00000000},
+-	{0x0000a610, 0x01804601, 0x01804601, 0x00000000, 0x00000000},
+-	{0x0000a614, 0x01804601, 0x01804601, 0x01404000, 0x01404000},
+-	{0x0000a618, 0x01804601, 0x01804601, 0x01404501, 0x01404501},
+-	{0x0000a61c, 0x01804601, 0x01804601, 0x02008501, 0x02008501},
+-	{0x0000a620, 0x03408d02, 0x03408d02, 0x0280ca03, 0x0280ca03},
+-	{0x0000a624, 0x0300cc03, 0x0300cc03, 0x03010c04, 0x03010c04},
+-	{0x0000a628, 0x03410d04, 0x03410d04, 0x04014c04, 0x04014c04},
+-	{0x0000a62c, 0x03410d04, 0x03410d04, 0x04015005, 0x04015005},
+-	{0x0000a630, 0x03410d04, 0x03410d04, 0x04015005, 0x04015005},
+-	{0x0000a634, 0x03410d04, 0x03410d04, 0x04015005, 0x04015005},
+-	{0x0000a638, 0x03410d04, 0x03410d04, 0x04015005, 0x04015005},
+-	{0x0000a63c, 0x03410d04, 0x03410d04, 0x04015005, 0x04015005},
+-	{0x0000b2dc, 0x000cfff0, 0x000cfff0, 0x03aaa352, 0x03aaa352},
+-	{0x0000b2e0, 0x000f0000, 0x000f0000, 0x03ccc584, 0x03ccc584},
+-	{0x0000b2e4, 0x03f00000, 0x03f00000, 0x03f0f800, 0x03f0f800},
++	{0x0000a608, 0x00000000, 0x00000000, 0x00000000, 0x00000000},
++	{0x0000a60c, 0x00000000, 0x00000000, 0x00000000, 0x00000000},
++	{0x0000a610, 0x00000000, 0x00000000, 0x00000000, 0x00000000},
++	{0x0000a614, 0x02004000, 0x02004000, 0x01404000, 0x01404000},
++	{0x0000a618, 0x02004801, 0x02004801, 0x01404501, 0x01404501},
++	{0x0000a61c, 0x02808a02, 0x02808a02, 0x02008501, 0x02008501},
++	{0x0000a620, 0x0380ce03, 0x0380ce03, 0x0280ca03, 0x0280ca03},
++	{0x0000a624, 0x04411104, 0x04411104, 0x03010c04, 0x03010c04},
++	{0x0000a628, 0x04411104, 0x04411104, 0x04014c04, 0x04014c04},
++	{0x0000a62c, 0x04411104, 0x04411104, 0x04015005, 0x04015005},
++	{0x0000a630, 0x04411104, 0x04411104, 0x04015005, 0x04015005},
++	{0x0000a634, 0x04411104, 0x04411104, 0x04015005, 0x04015005},
++	{0x0000a638, 0x04411104, 0x04411104, 0x04015005, 0x04015005},
++	{0x0000a63c, 0x04411104, 0x04411104, 0x04015005, 0x04015005},
++	{0x0000b2dc, 0x00033800, 0x00033800, 0x03aaa352, 0x03aaa352},
++	{0x0000b2e0, 0x0003c000, 0x0003c000, 0x03ccc584, 0x03ccc584},
++	{0x0000b2e4, 0x03fc0000, 0x03fc0000, 0x03f0f800, 0x03f0f800},
+ 	{0x0000b2e8, 0x00000000, 0x00000000, 0x03ff0000, 0x03ff0000},
+-	{0x0000c2dc, 0x000cfff0, 0x000cfff0, 0x03aaa352, 0x03aaa352},
+-	{0x0000c2e0, 0x000f0000, 0x000f0000, 0x03ccc584, 0x03ccc584},
+-	{0x0000c2e4, 0x03f00000, 0x03f00000, 0x03f0f800, 0x03f0f800},
++	{0x0000c2dc, 0x00033800, 0x00033800, 0x03aaa352, 0x03aaa352},
++	{0x0000c2e0, 0x0003c000, 0x0003c000, 0x03ccc584, 0x03ccc584},
++	{0x0000c2e4, 0x03fc0000, 0x03fc0000, 0x03f0f800, 0x03f0f800},
+ 	{0x0000c2e8, 0x00000000, 0x00000000, 0x03ff0000, 0x03ff0000},
+ 	{0x00016044, 0x012492d4, 0x012492d4, 0x012492d4, 0x012492d4},
+-	{0x00016048, 0x61200001, 0x61200001, 0x66480001, 0x66480001},
++	{0x00016048, 0x66480001, 0x66480001, 0x66480001, 0x66480001},
+ 	{0x00016068, 0x6db6db6c, 0x6db6db6c, 0x6db6db6c, 0x6db6db6c},
+ 	{0x00016444, 0x012492d4, 0x012492d4, 0x012492d4, 0x012492d4},
+-	{0x00016448, 0x61200001, 0x61200001, 0x66480001, 0x66480001},
++	{0x00016448, 0x66480001, 0x66480001, 0x66480001, 0x66480001},
+ 	{0x00016468, 0x6db6db6c, 0x6db6db6c, 0x6db6db6c, 0x6db6db6c},
+ 	{0x00016844, 0x012492d4, 0x012492d4, 0x012492d4, 0x012492d4},
+-	{0x00016848, 0x61200001, 0x61200001, 0x66480001, 0x66480001},
++	{0x00016848, 0x66480001, 0x66480001, 0x66480001, 0x66480001},
+ 	{0x00016868, 0x6db6db6c, 0x6db6db6c, 0x6db6db6c, 0x6db6db6c},
+ };
+ 
+diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
+index 56bd370..da567f0 100644
+--- a/drivers/net/wireless/ipw2x00/ipw2200.c
++++ b/drivers/net/wireless/ipw2x00/ipw2200.c
+@@ -10463,7 +10463,7 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv,
+ 		} else
+ 			len = src->len;
+ 
+-		dst = alloc_skb(len + sizeof(*rt_hdr), GFP_ATOMIC);
++		dst = alloc_skb(len + sizeof(*rt_hdr) + sizeof(u16)*2, GFP_ATOMIC);
+ 		if (!dst)
+ 			continue;
+ 
+diff --git a/drivers/net/wireless/iwlwifi/iwl-6000.c b/drivers/net/wireless/iwlwifi/iwl-6000.c
+index 9b6b010..4ac4ef0 100644
+--- a/drivers/net/wireless/iwlwifi/iwl-6000.c
++++ b/drivers/net/wireless/iwlwifi/iwl-6000.c
+@@ -193,7 +193,7 @@ static int iwl6000_hw_channel_switch(struct iwl_priv *priv,
+ 	 * See iwlagn_mac_channel_switch.
+ 	 */
+ 	struct iwl_rxon_context *ctx = &priv->contexts[IWL_RXON_CTX_BSS];
+-	struct iwl6000_channel_switch_cmd cmd;
++	struct iwl6000_channel_switch_cmd *cmd;
+ 	const struct iwl_channel_info *ch_info;
+ 	u32 switch_time_in_usec, ucode_switch_time;
+ 	u16 ch;
+@@ -203,18 +203,25 @@ static int iwl6000_hw_channel_switch(struct iwl_priv *priv,
+ 	struct ieee80211_vif *vif = ctx->vif;
+ 	struct iwl_host_cmd hcmd = {
+ 		.id = REPLY_CHANNEL_SWITCH,
+-		.len = { sizeof(cmd), },
++		.len = { sizeof(*cmd), },
+ 		.flags = CMD_SYNC,
+-		.data = { &cmd, },
++		.dataflags[0] = IWL_HCMD_DFL_NOCOPY,
+ 	};
++	int err;
+ 
+-	cmd.band = priv->band == IEEE80211_BAND_2GHZ;
++	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
++	if (!cmd)
++		return -ENOMEM;
++
++	hcmd.data[0] = cmd;
++
++	cmd->band = priv->band == IEEE80211_BAND_2GHZ;
+ 	ch = ch_switch->channel->hw_value;
+ 	IWL_DEBUG_11H(priv, "channel switch from %u to %u\n",
+ 		      ctx->active.channel, ch);
+-	cmd.channel = cpu_to_le16(ch);
+-	cmd.rxon_flags = ctx->staging.flags;
+-	cmd.rxon_filter_flags = ctx->staging.filter_flags;
++	cmd->channel = cpu_to_le16(ch);
++	cmd->rxon_flags = ctx->staging.flags;
++	cmd->rxon_filter_flags = ctx->staging.filter_flags;
+ 	switch_count = ch_switch->count;
+ 	tsf_low = ch_switch->timestamp & 0x0ffffffff;
+ 	/*
+@@ -230,30 +237,32 @@ static int iwl6000_hw_channel_switch(struct iwl_priv *priv,
+ 			switch_count = 0;
+ 	}
+ 	if (switch_count <= 1)
+-		cmd.switch_time = cpu_to_le32(priv->ucode_beacon_time);
++		cmd->switch_time = cpu_to_le32(priv->ucode_beacon_time);
+ 	else {
+ 		switch_time_in_usec =
+ 			vif->bss_conf.beacon_int * switch_count * TIME_UNIT;
+ 		ucode_switch_time = iwl_usecs_to_beacons(priv,
+ 							 switch_time_in_usec,
+ 							 beacon_interval);
+-		cmd.switch_time = iwl_add_beacon_time(priv,
+-						      priv->ucode_beacon_time,
+-						      ucode_switch_time,
+-						      beacon_interval);
++		cmd->switch_time = iwl_add_beacon_time(priv,
++						       priv->ucode_beacon_time,
++						       ucode_switch_time,
++						       beacon_interval);
+ 	}
+ 	IWL_DEBUG_11H(priv, "uCode time for the switch is 0x%x\n",
+-		      cmd.switch_time);
++		      cmd->switch_time);
+ 	ch_info = iwl_get_channel_info(priv, priv->band, ch);
+ 	if (ch_info)
+-		cmd.expect_beacon = is_channel_radar(ch_info);
++		cmd->expect_beacon = is_channel_radar(ch_info);
+ 	else {
+ 		IWL_ERR(priv, "invalid channel switch from %u to %u\n",
+ 			ctx->active.channel, ch);
+ 		return -EFAULT;
+ 	}
+ 
+-	return iwl_trans_send_cmd(trans(priv), &hcmd);
++	err = iwl_trans_send_cmd(trans(priv), &hcmd);
++	kfree(cmd);
++	return err;
+ }
+ 
+ static struct iwl_lib_ops iwl6000_lib = {
+diff --git a/drivers/pcmcia/pxa2xx_sharpsl.c b/drivers/pcmcia/pxa2xx_sharpsl.c
+index 69ae2fd..b938163 100644
+--- a/drivers/pcmcia/pxa2xx_sharpsl.c
++++ b/drivers/pcmcia/pxa2xx_sharpsl.c
+@@ -219,7 +219,7 @@ static void sharpsl_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt)
+ 	sharpsl_pcmcia_init_reset(skt);
+ }
+ 
+-static struct pcmcia_low_level sharpsl_pcmcia_ops __initdata = {
++static struct pcmcia_low_level sharpsl_pcmcia_ops = {
+ 	.owner                  = THIS_MODULE,
+ 	.hw_init                = sharpsl_pcmcia_hw_init,
+ 	.hw_shutdown            = sharpsl_pcmcia_hw_shutdown,
+diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
+index af1e296..21bc1a7 100644
+--- a/drivers/platform/x86/samsung-laptop.c
++++ b/drivers/platform/x86/samsung-laptop.c
+@@ -21,6 +21,7 @@
+ #include <linux/dmi.h>
+ #include <linux/platform_device.h>
+ #include <linux/rfkill.h>
++#include <linux/acpi.h>
+ 
+ /*
+  * This driver is needed because a number of Samsung laptops do not hook
+@@ -226,6 +227,7 @@ static struct backlight_device *backlight_device;
+ static struct mutex sabi_mutex;
+ static struct platform_device *sdev;
+ static struct rfkill *rfk;
++static bool handle_backlight;
+ static bool has_stepping_quirk;
+ 
+ static int force;
+@@ -602,6 +604,13 @@ static int __init samsung_init(void)
+ 	int retval;
+ 
+ 	mutex_init(&sabi_mutex);
++	handle_backlight = true;
++
++#ifdef CONFIG_ACPI
++	/* Don't handle backlight here if the acpi video already handle it */
++	if (acpi_video_backlight_support())
++		handle_backlight = false;
++#endif
+ 
+ 	if (!force && !dmi_check_system(samsung_dmi_table))
+ 		return -ENODEV;
+@@ -661,7 +670,8 @@ static int __init samsung_init(void)
+ 		printk(KERN_DEBUG "ifaceP = 0x%08x\n", ifaceP);
+ 		printk(KERN_DEBUG "sabi_iface = %p\n", sabi_iface);
+ 
+-		test_backlight();
++		if (handle_backlight)
++			test_backlight();
+ 		test_wireless();
+ 
+ 		retval = sabi_get_command(sabi_config->commands.get_brightness,
+@@ -680,13 +690,23 @@ static int __init samsung_init(void)
+ 	}
+ 
+ 	/* Check for stepping quirk */
+-	check_for_stepping_quirk();
++	if (handle_backlight)
++		check_for_stepping_quirk();
++
++#ifdef CONFIG_ACPI
++	/* Only log that if we are really on a sabi platform */
++	if (acpi_video_backlight_support())
++		pr_info("Backlight controlled by ACPI video driver\n");
++#endif
+ 
+ 	/* knock up a platform device to hang stuff off of */
+ 	sdev = platform_device_register_simple("samsung", -1, NULL, 0);
+ 	if (IS_ERR(sdev))
+ 		goto error_no_platform;
+ 
++	if (!handle_backlight)
++		goto skip_backlight;
++
+ 	/* create a backlight device to talk to this one */
+ 	memset(&props, 0, sizeof(struct backlight_properties));
+ 	props.type = BACKLIGHT_PLATFORM;
+@@ -702,6 +722,7 @@ static int __init samsung_init(void)
+ 	backlight_device->props.power = FB_BLANK_UNBLANK;
+ 	backlight_update_status(backlight_device);
+ 
++skip_backlight:
+ 	retval = init_wireless(sdev);
+ 	if (retval)
+ 		goto error_no_rfk;
+diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c
+index d93a960..bc744b4 100644
+--- a/drivers/rtc/rtc-imxdi.c
++++ b/drivers/rtc/rtc-imxdi.c
+@@ -392,6 +392,8 @@ static int dryice_rtc_probe(struct platform_device *pdev)
+ 	if (imxdi->ioaddr == NULL)
+ 		return -ENOMEM;
+ 
++	spin_lock_init(&imxdi->irq_lock);
++
+ 	imxdi->irq = platform_get_irq(pdev, 0);
+ 	if (imxdi->irq < 0)
+ 		return imxdi->irq;
+diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
+index 6888b2c..b3a729c 100644
+--- a/drivers/scsi/scsi_debug.c
++++ b/drivers/scsi/scsi_debug.c
+@@ -2045,8 +2045,7 @@ static void unmap_region(sector_t lba, unsigned int len)
+ 		block = lba + alignment;
+ 		rem = do_div(block, granularity);
+ 
+-		if (rem == 0 && lba + granularity <= end &&
+-		    block < map_size)
++		if (rem == 0 && lba + granularity < end && block < map_size)
+ 			clear_bit(block, map_storep);
+ 
+ 		lba += granularity - rem;
+diff --git a/drivers/staging/comedi/drivers/amplc_pc236.c b/drivers/staging/comedi/drivers/amplc_pc236.c
+index 48246cd..b4311bf 100644
+--- a/drivers/staging/comedi/drivers/amplc_pc236.c
++++ b/drivers/staging/comedi/drivers/amplc_pc236.c
+@@ -470,7 +470,7 @@ static int pc236_detach(struct comedi_device *dev)
+ {
+ 	printk(KERN_DEBUG "comedi%d: %s: detach\n", dev->minor,
+ 	       PC236_DRIVER_NAME);
+-	if (devpriv)
++	if (dev->iobase)
+ 		pc236_intr_disable(dev);
+ 
+ 	if (dev->irq)
+diff --git a/drivers/staging/hv/storvsc_drv.c b/drivers/staging/hv/storvsc_drv.c
+index ae8c33e..abc5ac5 100644
+--- a/drivers/staging/hv/storvsc_drv.c
++++ b/drivers/staging/hv/storvsc_drv.c
+@@ -1043,7 +1043,12 @@ static int storvsc_host_reset(struct hv_device *device)
+ 	/*
+ 	 * At this point, all outstanding requests in the adapter
+ 	 * should have been flushed out and return to us
++	 * There is a potential race here where the host may be in
++	 * the process of responding when we return from here.
++	 * Just wait for all in-transit packets to be accounted for
++	 * before we return from here.
+ 	 */
++	storvsc_wait_to_drain(stor_device);
+ 
+ cleanup:
+ 	return ret;
+diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
+index dbf7d20..df7f15d 100644
+--- a/drivers/usb/class/cdc-acm.c
++++ b/drivers/usb/class/cdc-acm.c
+@@ -760,10 +760,6 @@ static const __u32 acm_tty_speed[] = {
+ 	2500000, 3000000, 3500000, 4000000
+ };
+ 
+-static const __u8 acm_tty_size[] = {
+-	5, 6, 7, 8
+-};
+-
+ static void acm_tty_set_termios(struct tty_struct *tty,
+ 						struct ktermios *termios_old)
+ {
+@@ -780,7 +776,21 @@ static void acm_tty_set_termios(struct tty_struct *tty,
+ 	newline.bParityType = termios->c_cflag & PARENB ?
+ 				(termios->c_cflag & PARODD ? 1 : 2) +
+ 				(termios->c_cflag & CMSPAR ? 2 : 0) : 0;
+-	newline.bDataBits = acm_tty_size[(termios->c_cflag & CSIZE) >> 4];
++	switch (termios->c_cflag & CSIZE) {
++	case CS5:
++		newline.bDataBits = 5;
++		break;
++	case CS6:
++		newline.bDataBits = 6;
++		break;
++	case CS7:
++		newline.bDataBits = 7;
++		break;
++	case CS8:
++	default:
++		newline.bDataBits = 8;
++		break;
++	}
+ 	/* FIXME: Needs to clear unsupported bits in the termios */
+ 	acm->clocal = ((termios->c_cflag & CLOCAL) != 0);
+ 
+@@ -1172,7 +1182,7 @@ made_compressed_probe:
+ 
+ 		if (usb_endpoint_xfer_int(epwrite))
+ 			usb_fill_int_urb(snd->urb, usb_dev,
+-				usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
++				usb_sndintpipe(usb_dev, epwrite->bEndpointAddress),
+ 				NULL, acm->writesize, acm_write_bulk, snd, epwrite->bInterval);
+ 		else
+ 			usb_fill_bulk_urb(snd->urb, usb_dev,
+@@ -1496,6 +1506,9 @@ static const struct usb_device_id acm_ids[] = {
+ 					   Maybe we should define a new
+ 					   quirk for this. */
+ 	},
++	{ USB_DEVICE(0x0572, 0x1340), /* Conexant CX93010-2x UCMxx */
++	.driver_info = NO_UNION_NORMAL,
++	},
+ 	{ USB_DEVICE(0x1bbb, 0x0003), /* Alcatel OT-I650 */
+ 	.driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
+ 	},
+diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
+index 52340cc..a9a74d2 100644
+--- a/drivers/usb/core/hub.c
++++ b/drivers/usb/core/hub.c
+@@ -482,13 +482,16 @@ static void hub_tt_work(struct work_struct *work)
+ 	int			limit = 100;
+ 
+ 	spin_lock_irqsave (&hub->tt.lock, flags);
+-	while (--limit && !list_empty (&hub->tt.clear_list)) {
++	while (!list_empty(&hub->tt.clear_list)) {
+ 		struct list_head	*next;
+ 		struct usb_tt_clear	*clear;
+ 		struct usb_device	*hdev = hub->hdev;
+ 		const struct hc_driver	*drv;
+ 		int			status;
+ 
++		if (!hub->quiescing && --limit < 0)
++			break;
++
+ 		next = hub->tt.clear_list.next;
+ 		clear = list_entry (next, struct usb_tt_clear, clear_list);
+ 		list_del (&clear->clear_list);
+@@ -952,7 +955,7 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type)
+ 	if (hub->has_indicators)
+ 		cancel_delayed_work_sync(&hub->leds);
+ 	if (hub->tt.hub)
+-		cancel_work_sync(&hub->tt.clear_work);
++		flush_work_sync(&hub->tt.clear_work);
+ }
+ 
+ /* caller has locked the hub device */
+diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
+index d0ec2f0..c2815a5 100644
+--- a/drivers/usb/host/pci-quirks.c
++++ b/drivers/usb/host/pci-quirks.c
+@@ -545,7 +545,14 @@ static const struct dmi_system_id __devinitconst ehci_dmi_nohandoff_table[] = {
+ 		/*  Pegatron Lucid (Ordissimo AIRIS) */
+ 		.matches = {
+ 			DMI_MATCH(DMI_BOARD_NAME, "M11JB"),
+-			DMI_MATCH(DMI_BIOS_VERSION, "Lucid-GE-133"),
++			DMI_MATCH(DMI_BIOS_VERSION, "Lucid-"),
++		},
++	},
++	{
++		/*  Pegatron Lucid (Ordissimo) */
++		.matches = {
++			DMI_MATCH(DMI_BOARD_NAME, "Ordissimo"),
++			DMI_MATCH(DMI_BIOS_VERSION, "Lucid-"),
+ 		},
+ 	},
+ 	{ }
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index 950aef8..0c6fb19 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -1212,6 +1212,17 @@ static void xhci_cmd_to_noop(struct xhci_hcd *xhci, struct xhci_cd *cur_cd)
+ 	cur_seg = find_trb_seg(xhci->cmd_ring->first_seg,
+ 			xhci->cmd_ring->dequeue, &cycle_state);
+ 
++	if (!cur_seg) {
++		xhci_warn(xhci, "Command ring mismatch, dequeue = %p %llx (dma)\n",
++				xhci->cmd_ring->dequeue,
++				(unsigned long long)
++				xhci_trb_virt_to_dma(xhci->cmd_ring->deq_seg,
++					xhci->cmd_ring->dequeue));
++		xhci_debug_ring(xhci, xhci->cmd_ring);
++		xhci_dbg_ring_ptrs(xhci, xhci->cmd_ring);
++		return;
++	}
++
+ 	/* find the command trb matched by cd from command ring */
+ 	for (cmd_trb = xhci->cmd_ring->dequeue;
+ 			cmd_trb != xhci->cmd_ring->enqueue;
+diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
+index f5c0f38..5a23f4d 100644
+--- a/drivers/usb/host/xhci.c
++++ b/drivers/usb/host/xhci.c
+@@ -471,7 +471,8 @@ static bool compliance_mode_recovery_timer_quirk_check(void)
+ 
+ 	if (strstr(dmi_product_name, "Z420") ||
+ 			strstr(dmi_product_name, "Z620") ||
+-			strstr(dmi_product_name, "Z820"))
++			strstr(dmi_product_name, "Z820") ||
++			strstr(dmi_product_name, "Z1"))
+ 		return true;
+ 
+ 	return false;
+diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
+index 42de17b..d3addb2 100644
+--- a/drivers/usb/serial/mct_u232.c
++++ b/drivers/usb/serial/mct_u232.c
+@@ -577,12 +577,14 @@ static void mct_u232_close(struct usb_serial_port *port)
+ {
+ 	dbg("%s port %d", __func__, port->number);
+ 
+-	if (port->serial->dev) {
+-		/* shutdown our urbs */
+-		usb_kill_urb(port->write_urb);
+-		usb_kill_urb(port->read_urb);
+-		usb_kill_urb(port->interrupt_in_urb);
+-	}
++	/*
++	 * Must kill the read urb as it is actually an interrupt urb, which
++	 * generic close thus fails to kill.
++	 */
++	usb_kill_urb(port->read_urb);
++	usb_kill_urb(port->interrupt_in_urb);
++
++	usb_serial_generic_close(port);
+ } /* mct_u232_close */
+ 
+ 
+diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
+index b150ed9..d481f80 100644
+--- a/drivers/usb/serial/mos7840.c
++++ b/drivers/usb/serial/mos7840.c
+@@ -235,12 +235,10 @@ struct moschip_port {
+ 	int port_num;		/*Actual port number in the device(1,2,etc) */
+ 	struct urb *write_urb;	/* write URB for this port */
+ 	struct urb *read_urb;	/* read URB for this port */
+-	struct urb *int_urb;
+ 	__u8 shadowLCR;		/* last LCR value received */
+ 	__u8 shadowMCR;		/* last MCR value received */
+ 	char open;
+ 	char open_ports;
+-	char zombie;
+ 	wait_queue_head_t wait_chase;	/* for handling sleeping while waiting for chase to finish */
+ 	wait_queue_head_t delta_msr_wait;	/* for handling sleeping while waiting for msr change to happen */
+ 	int delta_msr_cond;
+@@ -505,7 +503,6 @@ static void mos7840_control_callback(struct urb *urb)
+ 	unsigned char *data;
+ 	struct moschip_port *mos7840_port;
+ 	__u8 regval = 0x0;
+-	int result = 0;
+ 	int status = urb->status;
+ 
+ 	mos7840_port = urb->context;
+@@ -524,7 +521,7 @@ static void mos7840_control_callback(struct urb *urb)
+ 	default:
+ 		dbg("%s - nonzero urb status received: %d", __func__,
+ 		    status);
+-		goto exit;
++		return;
+ 	}
+ 
+ 	dbg("%s urb buffer size is %d", __func__, urb->actual_length);
+@@ -537,17 +534,6 @@ static void mos7840_control_callback(struct urb *urb)
+ 		mos7840_handle_new_msr(mos7840_port, regval);
+ 	else if (mos7840_port->MsrLsr == 1)
+ 		mos7840_handle_new_lsr(mos7840_port, regval);
+-
+-exit:
+-	spin_lock(&mos7840_port->pool_lock);
+-	if (!mos7840_port->zombie)
+-		result = usb_submit_urb(mos7840_port->int_urb, GFP_ATOMIC);
+-	spin_unlock(&mos7840_port->pool_lock);
+-	if (result) {
+-		dev_err(&urb->dev->dev,
+-			"%s - Error %d submitting interrupt urb\n",
+-			__func__, result);
+-	}
+ }
+ 
+ static int mos7840_get_reg(struct moschip_port *mcs, __u16 Wval, __u16 reg,
+@@ -655,14 +641,7 @@ static void mos7840_interrupt_callback(struct urb *urb)
+ 					wreg = MODEM_STATUS_REGISTER;
+ 					break;
+ 				}
+-				spin_lock(&mos7840_port->pool_lock);
+-				if (!mos7840_port->zombie) {
+-					rv = mos7840_get_reg(mos7840_port, wval, wreg, &Data);
+-				} else {
+-					spin_unlock(&mos7840_port->pool_lock);
+-					return;
+-				}
+-				spin_unlock(&mos7840_port->pool_lock);
++				rv = mos7840_get_reg(mos7840_port, wval, wreg, &Data);
+ 			}
+ 		}
+ 	}
+@@ -2594,7 +2573,6 @@ error:
+ 		kfree(mos7840_port->ctrl_buf);
+ 		usb_free_urb(mos7840_port->control_urb);
+ 		kfree(mos7840_port);
+-		serial->port[i] = NULL;
+ 	}
+ 	return status;
+ }
+@@ -2625,9 +2603,6 @@ static void mos7840_disconnect(struct usb_serial *serial)
+ 		mos7840_port = mos7840_get_port_private(serial->port[i]);
+ 		dbg ("mos7840_port %d = %p", i, mos7840_port);
+ 		if (mos7840_port) {
+-			spin_lock_irqsave(&mos7840_port->pool_lock, flags);
+-			mos7840_port->zombie = 1;
+-			spin_unlock_irqrestore(&mos7840_port->pool_lock, flags);
+ 			usb_kill_urb(mos7840_port->control_urb);
+ 		}
+ 	}
+@@ -2661,6 +2636,7 @@ static void mos7840_release(struct usb_serial *serial)
+ 		mos7840_port = mos7840_get_port_private(serial->port[i]);
+ 		dbg("mos7840_port %d = %p", i, mos7840_port);
+ 		if (mos7840_port) {
++			usb_free_urb(mos7840_port->control_urb);
+ 			kfree(mos7840_port->ctrl_buf);
+ 			kfree(mos7840_port->dr);
+ 			kfree(mos7840_port);
+diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
+index c248a91..d6c5ed6 100644
+--- a/drivers/usb/serial/opticon.c
++++ b/drivers/usb/serial/opticon.c
+@@ -160,7 +160,11 @@ static int send_control_msg(struct usb_serial_port *port, u8 requesttype,
+ {
+ 	struct usb_serial *serial = port->serial;
+ 	int retval;
+-	u8 buffer[2];
++	u8 *buffer;
++
++	buffer = kzalloc(1, GFP_KERNEL);
++	if (!buffer)
++		return -ENOMEM;
+ 
+ 	buffer[0] = val;
+ 	/* Send the message to the vendor control endpoint
+@@ -169,6 +173,7 @@ static int send_control_msg(struct usb_serial_port *port, u8 requesttype,
+ 				requesttype,
+ 				USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+ 				0, 0, buffer, 1, 0);
++	kfree(buffer);
+ 
+ 	return retval;
+ }
+@@ -292,7 +297,7 @@ static int opticon_write(struct tty_struct *tty, struct usb_serial_port *port,
+ 	if (!dr) {
+ 		dev_err(&port->dev, "out of memory\n");
+ 		count = -ENOMEM;
+-		goto error;
++		goto error_no_dr;
+ 	}
+ 
+ 	dr->bRequestType = USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT;
+@@ -322,6 +327,8 @@ static int opticon_write(struct tty_struct *tty, struct usb_serial_port *port,
+ 
+ 	return count;
+ error:
++	kfree(dr);
++error_no_dr:
+ 	usb_free_urb(urb);
+ error_no_urb:
+ 	kfree(buffer);
+diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
+index 3fd4e6f..c334670 100644
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -503,11 +503,19 @@ static const struct option_blacklist_info net_intf5_blacklist = {
+ 	.reserved = BIT(5),
+ };
+ 
++static const struct option_blacklist_info net_intf6_blacklist = {
++	.reserved = BIT(6),
++};
++
+ static const struct option_blacklist_info zte_mf626_blacklist = {
+ 	.sendsetup = BIT(0) | BIT(1),
+ 	.reserved = BIT(4),
+ };
+ 
++static const struct option_blacklist_info zte_1255_blacklist = {
++	.reserved = BIT(3) | BIT(4),
++};
++
+ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
+ 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
+@@ -853,13 +861,19 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0113, 0xff, 0xff, 0xff),
+ 		.driver_info = (kernel_ulong_t)&net_intf5_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0117, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0118, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0121, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0118, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf5_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0121, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf5_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0122, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0123, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0124, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0125, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0126, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0123, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0124, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf5_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0125, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf6_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0126, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf5_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0128, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0142, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0143, 0xff, 0xff, 0xff) },
+@@ -872,7 +886,8 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0156, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0157, 0xff, 0xff, 0xff),
+ 	  .driver_info = (kernel_ulong_t)&net_intf5_blacklist },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0158, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0158, 0xff, 0xff, 0xff),
++	  .driver_info = (kernel_ulong_t)&net_intf3_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0159, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0161, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0162, 0xff, 0xff, 0xff) },
+@@ -880,13 +895,22 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0165, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0167, 0xff, 0xff, 0xff),
+ 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0191, 0xff, 0xff, 0xff), /* ZTE EuFi890 */
++	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0199, 0xff, 0xff, 0xff), /* ZTE MF820S */
++	  .driver_info = (kernel_ulong_t)&net_intf1_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0257, 0xff, 0xff, 0xff), /* ZTE MF821 */
++	  .driver_info = (kernel_ulong_t)&net_intf3_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0326, 0xff, 0xff, 0xff),
+ 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1008, 0xff, 0xff, 0xff),
+ 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1010, 0xff, 0xff, 0xff),
+ 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1012, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1012, 0xff, 0xff, 0xff),
++	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1021, 0xff, 0xff, 0xff),
++	  .driver_info = (kernel_ulong_t)&net_intf2_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1057, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1058, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1059, 0xff, 0xff, 0xff) },
+@@ -1002,18 +1026,24 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1169, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1170, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1244, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1245, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1245, 0xff, 0xff, 0xff),
++	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1246, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1247, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1247, 0xff, 0xff, 0xff),
++	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1248, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1249, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1250, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1251, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1252, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1252, 0xff, 0xff, 0xff),
++	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1253, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1254, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1255, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1256, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1254, 0xff, 0xff, 0xff),
++	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1255, 0xff, 0xff, 0xff),
++	  .driver_info = (kernel_ulong_t)&zte_1255_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1256, 0xff, 0xff, 0xff),
++	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1257, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1258, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1259, 0xff, 0xff, 0xff) },
+@@ -1058,8 +1088,16 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1298, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1299, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1300, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1401, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf2_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1402, 0xff, 0xff, 0xff),
+ 		.driver_info = (kernel_ulong_t)&net_intf2_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1424, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf2_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1425, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf2_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1426, 0xff, 0xff, 0xff),  /* ZTE MF91 */
++		.driver_info = (kernel_ulong_t)&net_intf2_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x2002, 0xff,
+ 	  0xff, 0xff), .driver_info = (kernel_ulong_t)&zte_k3765_z_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x2003, 0xff, 0xff, 0xff) },
+@@ -1071,15 +1109,21 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0070, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0073, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0094, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0130, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0133, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0141, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0130, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf1_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0133, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf3_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0141, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf5_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0147, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0152, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0168, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0168, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0170, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0176, 0xff, 0xff, 0xff) },
+-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0178, 0xff, 0xff, 0xff) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0176, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf3_blacklist },
++	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0178, 0xff, 0xff, 0xff),
++		.driver_info = (kernel_ulong_t)&net_intf3_blacklist },
+ 
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_CDMA_TECH, 0xff, 0xff, 0xff) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AC8710, 0xff, 0xff, 0xff) },
+diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
+index 535d087..e1f1ebd 100644
+--- a/drivers/usb/serial/sierra.c
++++ b/drivers/usb/serial/sierra.c
+@@ -171,7 +171,6 @@ static int sierra_probe(struct usb_serial *serial,
+ {
+ 	int result = 0;
+ 	struct usb_device *udev;
+-	struct sierra_intf_private *data;
+ 	u8 ifnum;
+ 
+ 	udev = serial->dev;
+@@ -199,11 +198,6 @@ static int sierra_probe(struct usb_serial *serial,
+ 		return -ENODEV;
+ 	}
+ 
+-	data = serial->private = kzalloc(sizeof(struct sierra_intf_private), GFP_KERNEL);
+-	if (!data)
+-		return -ENOMEM;
+-	spin_lock_init(&data->susp_lock);
+-
+ 	return result;
+ }
+ 
+@@ -915,6 +909,7 @@ static void sierra_dtr_rts(struct usb_serial_port *port, int on)
+ static int sierra_startup(struct usb_serial *serial)
+ {
+ 	struct usb_serial_port *port;
++	struct sierra_intf_private *intfdata;
+ 	struct sierra_port_private *portdata;
+ 	struct sierra_iface_info *himemoryp = NULL;
+ 	int i;
+@@ -922,6 +917,14 @@ static int sierra_startup(struct usb_serial *serial)
+ 
+ 	dev_dbg(&serial->dev->dev, "%s\n", __func__);
+ 
++	intfdata = kzalloc(sizeof(*intfdata), GFP_KERNEL);
++	if (!intfdata)
++		return -ENOMEM;
++
++	spin_lock_init(&intfdata->susp_lock);
++
++	usb_set_serial_data(serial, intfdata);
++
+ 	/* Set Device mode to D0 */
+ 	sierra_set_power_state(serial->dev, 0x0000);
+ 
+@@ -937,7 +940,7 @@ static int sierra_startup(struct usb_serial *serial)
+ 			dev_dbg(&port->dev, "%s: kmalloc for "
+ 				"sierra_port_private (%d) failed!\n",
+ 				__func__, i);
+-			return -ENOMEM;
++			goto err;
+ 		}
+ 		spin_lock_init(&portdata->lock);
+ 		init_usb_anchor(&portdata->active);
+@@ -974,6 +977,14 @@ static int sierra_startup(struct usb_serial *serial)
+ 	}
+ 
+ 	return 0;
++err:
++	for (--i; i >= 0; --i) {
++		portdata = usb_get_serial_port_data(serial->port[i]);
++		kfree(portdata);
++	}
++	kfree(intfdata);
++
++	return -ENOMEM;
+ }
+ 
+ static void sierra_release(struct usb_serial *serial)
+@@ -993,6 +1004,7 @@ static void sierra_release(struct usb_serial *serial)
+ 			continue;
+ 		kfree(portdata);
+ 	}
++	kfree(serial->private);
+ }
+ 
+ #ifdef CONFIG_PM
+diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
+index 5b073bc..59d646d 100644
+--- a/drivers/usb/serial/whiteheat.c
++++ b/drivers/usb/serial/whiteheat.c
+@@ -576,6 +576,7 @@ no_firmware:
+ 		"%s: please contact support@connecttech.com\n",
+ 		serial->type->description);
+ 	kfree(result);
++	kfree(command);
+ 	return -ENODEV;
+ 
+ no_command_private:
+diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
+index 591f57f..fa8a1b2 100644
+--- a/drivers/usb/storage/unusual_devs.h
++++ b/drivers/usb/storage/unusual_devs.h
+@@ -1004,6 +1004,12 @@ UNUSUAL_DEV( 0x07cf, 0x1001, 0x1000, 0x9999,
+ 		USB_SC_8070, USB_PR_CB, NULL,
+ 		US_FL_NEED_OVERRIDE | US_FL_FIX_INQUIRY ),
+ 
++/* Submitted by Oleksandr Chumachenko <ledest@gmail.com> */
++UNUSUAL_DEV( 0x07cf, 0x1167, 0x0100, 0x0100,
++		"Casio",
++		"EX-N1 DigitalCamera",
++		USB_SC_8070, USB_PR_DEVICE, NULL, 0),
++
+ /* Submitted by Hartmut Wahl <hwahl@hwahl.de>*/
+ UNUSUAL_DEV( 0x0839, 0x000a, 0x0001, 0x0001,
+ 		"Samsung",
+diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
+index 882a51f..b76071e 100644
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -371,7 +371,8 @@ static void handle_rx(struct vhost_net *net)
+ 		.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
+ 	};
+ 	size_t total_len = 0;
+-	int err, headcount, mergeable;
++	int err, mergeable;
++	s16 headcount;
+ 	size_t vhost_hlen, sock_hlen;
+ 	size_t vhost_len, sock_len;
+ 	/* TODO: check that we are running from vhost_worker? */
+diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c
+index 41746bb..cb5988f 100644
+--- a/drivers/video/udlfb.c
++++ b/drivers/video/udlfb.c
+@@ -646,7 +646,7 @@ static ssize_t dlfb_ops_write(struct fb_info *info, const char __user *buf,
+ 	result = fb_sys_write(info, buf, count, ppos);
+ 
+ 	if (result > 0) {
+-		int start = max((int)(offset / info->fix.line_length) - 1, 0);
++		int start = max((int)(offset / info->fix.line_length), 0);
+ 		int lines = min((u32)((result / info->fix.line_length) + 1),
+ 				(u32)info->var.yres);
+ 
+diff --git a/drivers/video/via/via_clock.c b/drivers/video/via/via_clock.c
+index af8f26b..db1e392 100644
+--- a/drivers/video/via/via_clock.c
++++ b/drivers/video/via/via_clock.c
+@@ -25,6 +25,7 @@
+ 
+ #include <linux/kernel.h>
+ #include <linux/via-core.h>
++#include <asm/olpc.h>
+ #include "via_clock.h"
+ #include "global.h"
+ #include "debug.h"
+@@ -289,6 +290,10 @@ static void dummy_set_pll(struct via_pll_config config)
+ 	printk(KERN_INFO "Using undocumented set PLL.\n%s", via_slap);
+ }
+ 
++static void noop_set_clock_state(u8 state)
++{
++}
++
+ void via_clock_init(struct via_clock *clock, int gfx_chip)
+ {
+ 	switch (gfx_chip) {
+@@ -346,4 +351,18 @@ void via_clock_init(struct via_clock *clock, int gfx_chip)
+ 		break;
+ 
+ 	}
++
++	if (machine_is_olpc()) {
++		/* The OLPC XO-1.5 cannot suspend/resume reliably if the
++		 * IGA1/IGA2 clocks are set as on or off (memory rot
++		 * occasionally happens during suspend under such
++		 * configurations).
++		 *
++		 * The only known stable scenario is to leave this bits as-is,
++		 * which in their default states are documented to enable the
++		 * clock only when it is needed.
++		 */
++		clock->set_primary_clock_state = noop_set_clock_state;
++		clock->set_secondary_clock_state = noop_set_clock_state;
++	}
+ }
+diff --git a/fs/ceph/export.c b/fs/ceph/export.c
+index 9fbcdec..b001030 100644
+--- a/fs/ceph/export.c
++++ b/fs/ceph/export.c
+@@ -91,7 +91,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
+  * FIXME: we should try harder by querying the mds for the ino.
+  */
+ static struct dentry *__fh_to_dentry(struct super_block *sb,
+-				     struct ceph_nfs_fh *fh)
++				     struct ceph_nfs_fh *fh, int fh_len)
+ {
+ 	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+ 	struct inode *inode;
+@@ -99,6 +99,9 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
+ 	struct ceph_vino vino;
+ 	int err;
+ 
++	if (fh_len < sizeof(*fh) / 4)
++		return ERR_PTR(-ESTALE);
++
+ 	dout("__fh_to_dentry %llx\n", fh->ino);
+ 	vino.ino = fh->ino;
+ 	vino.snap = CEPH_NOSNAP;
+@@ -142,7 +145,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
+  * convert connectable fh to dentry
+  */
+ static struct dentry *__cfh_to_dentry(struct super_block *sb,
+-				      struct ceph_nfs_confh *cfh)
++				      struct ceph_nfs_confh *cfh, int fh_len)
+ {
+ 	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+ 	struct inode *inode;
+@@ -150,6 +153,9 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
+ 	struct ceph_vino vino;
+ 	int err;
+ 
++	if (fh_len < sizeof(*cfh) / 4)
++		return ERR_PTR(-ESTALE);
++
+ 	dout("__cfh_to_dentry %llx (%llx/%x)\n",
+ 	     cfh->ino, cfh->parent_ino, cfh->parent_name_hash);
+ 
+@@ -199,9 +205,11 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ 					int fh_len, int fh_type)
+ {
+ 	if (fh_type == 1)
+-		return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw);
++		return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw,
++								fh_len);
+ 	else
+-		return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw);
++		return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw,
++								fh_len);
+ }
+ 
+ /*
+@@ -222,6 +230,8 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
+ 
+ 	if (fh_type == 1)
+ 		return ERR_PTR(-ESTALE);
++	if (fh_len < sizeof(*cfh) / 4)
++		return ERR_PTR(-ESTALE);
+ 
+ 	pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino,
+ 		 cfh->parent_name_hash);
+diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
+index 51352de..f854cf9 100644
+--- a/fs/compat_ioctl.c
++++ b/fs/compat_ioctl.c
+@@ -210,6 +210,8 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd,
+ 
+ 	err  = get_user(palp, &up->palette);
+ 	err |= get_user(length, &up->length);
++	if (err)
++		return -EFAULT;
+ 
+ 	up_native = compat_alloc_user_space(sizeof(struct video_spu_palette));
+ 	err  = put_user(compat_ptr(palp), &up_native->palette);
+diff --git a/fs/exec.c b/fs/exec.c
+index 160cd2f..121ccae 100644
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1095,7 +1095,7 @@ int flush_old_exec(struct linux_binprm * bprm)
+ 	bprm->mm = NULL;		/* We're using it now */
+ 
+ 	set_fs(USER_DS);
+-	current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
++	current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD | PF_NOFREEZE);
+ 	flush_thread();
+ 	current->personality &= ~bprm->per_clear;
+ 
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 54f2bdc..191580a 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -2715,6 +2715,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
+ #define EXT4_EXT_MARK_UNINIT1	0x2  /* mark first half uninitialized */
+ #define EXT4_EXT_MARK_UNINIT2	0x4  /* mark second half uninitialized */
+ 
++#define EXT4_EXT_DATA_VALID1	0x8  /* first half contains valid data */
++#define EXT4_EXT_DATA_VALID2	0x10 /* second half contains valid data */
++
+ /*
+  * ext4_split_extent_at() splits an extent at given block.
+  *
+@@ -2750,6 +2753,9 @@ static int ext4_split_extent_at(handle_t *handle,
+ 	unsigned int ee_len, depth;
+ 	int err = 0;
+ 
++	BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
++	       (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
++
+ 	ext_debug("ext4_split_extents_at: inode %lu, logical"
+ 		"block %llu\n", inode->i_ino, (unsigned long long)split);
+ 
+@@ -2808,7 +2814,14 @@ static int ext4_split_extent_at(handle_t *handle,
+ 
+ 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
+ 	if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+-		err = ext4_ext_zeroout(inode, &orig_ex);
++		if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
++			if (split_flag & EXT4_EXT_DATA_VALID1)
++				err = ext4_ext_zeroout(inode, ex2);
++			else
++				err = ext4_ext_zeroout(inode, ex);
++		} else
++			err = ext4_ext_zeroout(inode, &orig_ex);
++
+ 		if (err)
+ 			goto fix_extent_len;
+ 		/* update the extent length and mark as initialized */
+@@ -2861,12 +2874,13 @@ static int ext4_split_extent(handle_t *handle,
+ 	uninitialized = ext4_ext_is_uninitialized(ex);
+ 
+ 	if (map->m_lblk + map->m_len < ee_block + ee_len) {
+-		split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
+-			      EXT4_EXT_MAY_ZEROOUT : 0;
++		split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
+ 		flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
+ 		if (uninitialized)
+ 			split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
+ 				       EXT4_EXT_MARK_UNINIT2;
++		if (split_flag & EXT4_EXT_DATA_VALID2)
++			split_flag1 |= EXT4_EXT_DATA_VALID1;
+ 		err = ext4_split_extent_at(handle, inode, path,
+ 				map->m_lblk + map->m_len, split_flag1, flags1);
+ 		if (err)
+@@ -2879,8 +2893,8 @@ static int ext4_split_extent(handle_t *handle,
+ 		return PTR_ERR(path);
+ 
+ 	if (map->m_lblk >= ee_block) {
+-		split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
+-			      EXT4_EXT_MAY_ZEROOUT : 0;
++		split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
++					    EXT4_EXT_DATA_VALID2);
+ 		if (uninitialized)
+ 			split_flag1 |= EXT4_EXT_MARK_UNINIT1;
+ 		if (split_flag & EXT4_EXT_MARK_UNINIT2)
+@@ -3158,26 +3172,47 @@ static int ext4_split_unwritten_extents(handle_t *handle,
+ 
+ 	split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
+ 	split_flag |= EXT4_EXT_MARK_UNINIT2;
+-
++	if (flags & EXT4_GET_BLOCKS_CONVERT)
++		split_flag |= EXT4_EXT_DATA_VALID2;
+ 	flags |= EXT4_GET_BLOCKS_PRE_IO;
+ 	return ext4_split_extent(handle, inode, path, map, split_flag, flags);
+ }
+ 
+ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
+-					      struct inode *inode,
+-					      struct ext4_ext_path *path)
++						struct inode *inode,
++						struct ext4_map_blocks *map,
++						struct ext4_ext_path *path)
+ {
+ 	struct ext4_extent *ex;
++	ext4_lblk_t ee_block;
++	unsigned int ee_len;
+ 	int depth;
+ 	int err = 0;
+ 
+ 	depth = ext_depth(inode);
+ 	ex = path[depth].p_ext;
++	ee_block = le32_to_cpu(ex->ee_block);
++	ee_len = ext4_ext_get_actual_len(ex);
+ 
+ 	ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
+ 		"block %llu, max_blocks %u\n", inode->i_ino,
+-		(unsigned long long)le32_to_cpu(ex->ee_block),
+-		ext4_ext_get_actual_len(ex));
++		  (unsigned long long)ee_block, ee_len);
++
++	/* If extent is larger than requested then split is required */
++	if (ee_block != map->m_lblk || ee_len > map->m_len) {
++		err = ext4_split_unwritten_extents(handle, inode, map, path,
++						   EXT4_GET_BLOCKS_CONVERT);
++		if (err < 0)
++			goto out;
++		ext4_ext_drop_refs(path);
++		path = ext4_ext_find_extent(inode, map->m_lblk, path);
++		if (IS_ERR(path)) {
++			err = PTR_ERR(path);
++			goto out;
++		}
++		depth = ext_depth(inode);
++		ex = path[depth].p_ext;
++	}
+ 
+ 	err = ext4_ext_get_access(handle, inode, path + depth);
+ 	if (err)
+@@ -3479,7 +3514,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
+ 	}
+ 	/* IO end_io complete, convert the filled extent to written */
+ 	if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
+-		ret = ext4_convert_unwritten_extents_endio(handle, inode,
++		ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
+ 							path);
+ 		if (ret >= 0) {
+ 			ext4_update_inode_fsync_trans(handle, inode, 1);
+diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
+index fe9945f..5235d6e 100644
+--- a/fs/gfs2/export.c
++++ b/fs/gfs2/export.c
+@@ -167,6 +167,8 @@ static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ 	case GFS2_SMALL_FH_SIZE:
+ 	case GFS2_LARGE_FH_SIZE:
+ 	case GFS2_OLD_FH_SIZE:
++		if (fh_len < GFS2_SMALL_FH_SIZE)
++			return NULL;
+ 		this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
+ 		this.no_formal_ino |= be32_to_cpu(fh[1]);
+ 		this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
+@@ -186,6 +188,8 @@ static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid,
+ 	switch (fh_type) {
+ 	case GFS2_LARGE_FH_SIZE:
+ 	case GFS2_OLD_FH_SIZE:
++		if (fh_len < GFS2_LARGE_FH_SIZE)
++			return NULL;
+ 		parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
+ 		parent.no_formal_ino |= be32_to_cpu(fh[5]);
+ 		parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
+diff --git a/fs/isofs/export.c b/fs/isofs/export.c
+index dd4687f..516eb21 100644
+--- a/fs/isofs/export.c
++++ b/fs/isofs/export.c
+@@ -179,7 +179,7 @@ static struct dentry *isofs_fh_to_parent(struct super_block *sb,
+ {
+ 	struct isofs_fid *ifid = (struct isofs_fid *)fid;
+ 
+-	if (fh_type != 2)
++	if (fh_len < 2 || fh_type != 2)
+ 		return NULL;
+ 
+ 	return isofs_export_iget(sb,
+diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
+index 8799207..931bf95 100644
+--- a/fs/jbd/commit.c
++++ b/fs/jbd/commit.c
+@@ -86,7 +86,12 @@ nope:
+ static void release_data_buffer(struct buffer_head *bh)
+ {
+ 	if (buffer_freed(bh)) {
++		WARN_ON_ONCE(buffer_dirty(bh));
+ 		clear_buffer_freed(bh);
++		clear_buffer_mapped(bh);
++		clear_buffer_new(bh);
++		clear_buffer_req(bh);
++		bh->b_bdev = NULL;
+ 		release_buffer_page(bh);
+ 	} else
+ 		put_bh(bh);
+@@ -847,17 +852,35 @@ restart_loop:
+ 		 * there's no point in keeping a checkpoint record for
+ 		 * it. */
+ 
+-		/* A buffer which has been freed while still being
+-		 * journaled by a previous transaction may end up still
+-		 * being dirty here, but we want to avoid writing back
+-		 * that buffer in the future after the "add to orphan"
+-		 * operation been committed,  That's not only a performance
+-		 * gain, it also stops aliasing problems if the buffer is
+-		 * left behind for writeback and gets reallocated for another
+-		 * use in a different page. */
+-		if (buffer_freed(bh) && !jh->b_next_transaction) {
+-			clear_buffer_freed(bh);
+-			clear_buffer_jbddirty(bh);
++		/*
++		 * A buffer which has been freed while still being journaled by
++		 * a previous transaction.
++		 */
++		if (buffer_freed(bh)) {
++			/*
++			 * If the running transaction is the one containing
++			 * "add to orphan" operation (b_next_transaction !=
++			 * NULL), we have to wait for that transaction to
++			 * commit before we can really get rid of the buffer.
++			 * So just clear b_modified to not confuse transaction
++			 * credit accounting and refile the buffer to
++			 * BJ_Forget of the running transaction. If the just
++			 * committed transaction contains "add to orphan"
++			 * operation, we can completely invalidate the buffer
++			 * now. We are rather throughout in that since the
++			 * buffer may be still accessible when blocksize <
++			 * pagesize and it is attached to the last partial
++			 * page.
++			 */
++			jh->b_modified = 0;
++			if (!jh->b_next_transaction) {
++				clear_buffer_freed(bh);
++				clear_buffer_jbddirty(bh);
++				clear_buffer_mapped(bh);
++				clear_buffer_new(bh);
++				clear_buffer_req(bh);
++				bh->b_bdev = NULL;
++			}
+ 		}
+ 
+ 		if (buffer_jbddirty(bh)) {
+diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
+index 7e59c6e..edac004 100644
+--- a/fs/jbd/transaction.c
++++ b/fs/jbd/transaction.c
+@@ -1839,15 +1839,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
+  * We're outside-transaction here.  Either or both of j_running_transaction
+  * and j_committing_transaction may be NULL.
+  */
+-static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
++static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
++				int partial_page)
+ {
+ 	transaction_t *transaction;
+ 	struct journal_head *jh;
+ 	int may_free = 1;
+-	int ret;
+ 
+ 	BUFFER_TRACE(bh, "entry");
+ 
++retry:
+ 	/*
+ 	 * It is safe to proceed here without the j_list_lock because the
+ 	 * buffers cannot be stolen by try_to_free_buffers as long as we are
+@@ -1875,10 +1876,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
+ 	 * clear the buffer dirty bit at latest at the moment when the
+ 	 * transaction marking the buffer as freed in the filesystem
+ 	 * structures is committed because from that moment on the
+-	 * buffer can be reallocated and used by a different page.
++	 * block can be reallocated and used by a different page.
+ 	 * Since the block hasn't been freed yet but the inode has
+ 	 * already been added to orphan list, it is safe for us to add
+ 	 * the buffer to BJ_Forget list of the newest transaction.
++	 *
++	 * Also we have to clear buffer_mapped flag of a truncated buffer
++	 * because the buffer_head may be attached to the page straddling
++	 * i_size (can happen only when blocksize < pagesize) and thus the
++	 * buffer_head can be reused when the file is extended again. So we end
++	 * up keeping around invalidated buffers attached to transactions'
++	 * BJ_Forget list just to stop checkpointing code from cleaning up
++	 * the transaction this buffer was modified in.
+ 	 */
+ 	transaction = jh->b_transaction;
+ 	if (transaction == NULL) {
+@@ -1905,13 +1914,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
+ 			 * committed, the buffer won't be needed any
+ 			 * longer. */
+ 			JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
+-			ret = __dispose_buffer(jh,
++			may_free = __dispose_buffer(jh,
+ 					journal->j_running_transaction);
+-			journal_put_journal_head(jh);
+-			spin_unlock(&journal->j_list_lock);
+-			jbd_unlock_bh_state(bh);
+-			spin_unlock(&journal->j_state_lock);
+-			return ret;
++			goto zap_buffer;
+ 		} else {
+ 			/* There is no currently-running transaction. So the
+ 			 * orphan record which we wrote for this file must have
+@@ -1919,13 +1924,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
+ 			 * the committing transaction, if it exists. */
+ 			if (journal->j_committing_transaction) {
+ 				JBUFFER_TRACE(jh, "give to committing trans");
+-				ret = __dispose_buffer(jh,
++				may_free = __dispose_buffer(jh,
+ 					journal->j_committing_transaction);
+-				journal_put_journal_head(jh);
+-				spin_unlock(&journal->j_list_lock);
+-				jbd_unlock_bh_state(bh);
+-				spin_unlock(&journal->j_state_lock);
+-				return ret;
++				goto zap_buffer;
+ 			} else {
+ 				/* The orphan record's transaction has
+ 				 * committed.  We can cleanse this buffer */
+@@ -1946,10 +1947,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
+ 		}
+ 		/*
+ 		 * The buffer is committing, we simply cannot touch
+-		 * it. So we just set j_next_transaction to the
+-		 * running transaction (if there is one) and mark
+-		 * buffer as freed so that commit code knows it should
+-		 * clear dirty bits when it is done with the buffer.
++		 * it. If the page is straddling i_size we have to wait
++		 * for commit and try again.
++		 */
++		if (partial_page) {
++			tid_t tid = journal->j_committing_transaction->t_tid;
++
++			journal_put_journal_head(jh);
++			spin_unlock(&journal->j_list_lock);
++			jbd_unlock_bh_state(bh);
++			spin_unlock(&journal->j_state_lock);
++			log_wait_commit(journal, tid);
++			goto retry;
++		}
++		/*
++		 * OK, buffer won't be reachable after truncate. We just set
++		 * j_next_transaction to the running transaction (if there is
++		 * one) and mark buffer as freed so that commit code knows it
++		 * should clear dirty bits when it is done with the buffer.
+ 		 */
+ 		set_buffer_freed(bh);
+ 		if (journal->j_running_transaction && buffer_jbddirty(bh))
+@@ -1972,6 +1987,14 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
+ 	}
+ 
+ zap_buffer:
++	/*
++	 * This is tricky. Although the buffer is truncated, it may be reused
++	 * if blocksize < pagesize and it is attached to the page straddling
++	 * EOF. Since the buffer might have been added to BJ_Forget list of the
++	 * running transaction, journal_get_write_access() won't clear
++	 * b_modified and credit accounting gets confused. So clear b_modified
++	 * here. */
++	jh->b_modified = 0;
+ 	journal_put_journal_head(jh);
+ zap_buffer_no_jh:
+ 	spin_unlock(&journal->j_list_lock);
+@@ -2020,7 +2043,8 @@ void journal_invalidatepage(journal_t *journal,
+ 		if (offset <= curr_off) {
+ 			/* This block is wholly outside the truncation point */
+ 			lock_buffer(bh);
+-			may_free &= journal_unmap_buffer(journal, bh);
++			may_free &= journal_unmap_buffer(journal, bh,
++							 offset > 0);
+ 			unlock_buffer(bh);
+ 		}
+ 		curr_off = next_off;
+diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
+index 36057ce..6e2a2d5 100644
+--- a/fs/lockd/clntxdr.c
++++ b/fs/lockd/clntxdr.c
+@@ -223,7 +223,7 @@ static void encode_nlm_stat(struct xdr_stream *xdr,
+ {
+ 	__be32 *p;
+ 
+-	BUG_ON(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD);
++	WARN_ON_ONCE(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD);
+ 	p = xdr_reserve_space(xdr, 4);
+ 	*p = stat;
+ }
+diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
+index df753a1..23d7451 100644
+--- a/fs/lockd/mon.c
++++ b/fs/lockd/mon.c
+@@ -40,7 +40,6 @@ struct nsm_args {
+ 	u32			proc;
+ 
+ 	char			*mon_name;
+-	char			*nodename;
+ };
+ 
+ struct nsm_res {
+@@ -94,7 +93,6 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
+ 		.vers		= 3,
+ 		.proc		= NLMPROC_NSM_NOTIFY,
+ 		.mon_name	= nsm->sm_mon_name,
+-		.nodename	= utsname()->nodename,
+ 	};
+ 	struct rpc_message msg = {
+ 		.rpc_argp	= &args,
+@@ -431,7 +429,7 @@ static void encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp)
+ {
+ 	__be32 *p;
+ 
+-	encode_nsm_string(xdr, argp->nodename);
++	encode_nsm_string(xdr, utsname()->nodename);
+ 	p = xdr_reserve_space(xdr, 4 + 4 + 4);
+ 	*p++ = cpu_to_be32(argp->prog);
+ 	*p++ = cpu_to_be32(argp->vers);
+diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
+index d27aab1..d413af3 100644
+--- a/fs/lockd/svcproc.c
++++ b/fs/lockd/svcproc.c
+@@ -67,7 +67,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
+ 
+ 	/* Obtain file pointer. Not used by FREE_ALL call. */
+ 	if (filp != NULL) {
+-		if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0)
++		error = cast_status(nlm_lookup_file(rqstp, &file, &lock->fh));
++		if (error != 0)
+ 			goto no_locks;
+ 		*filp = file;
+ 
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 4cfe260..d225b51 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -3673,6 +3673,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 	memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+ 
+ 	nfsd4_close_open_stateid(stp);
++	release_last_closed_stateid(oo);
+ 	oo->oo_last_closed_stid = stp;
+ 
+ 	/* place unused nfs4_stateowners on so_close_lru list to be
+diff --git a/fs/proc/stat.c b/fs/proc/stat.c
+index 0855e6f..4c9a859 100644
+--- a/fs/proc/stat.c
++++ b/fs/proc/stat.c
+@@ -24,11 +24,14 @@
+ 
+ static cputime64_t get_idle_time(int cpu)
+ {
+-	u64 idle_time = get_cpu_idle_time_us(cpu, NULL);
++	u64 idle_time = -1ULL;
+ 	cputime64_t idle;
+ 
++	if (cpu_online(cpu))
++		idle_time = get_cpu_idle_time_us(cpu, NULL);
++
+ 	if (idle_time == -1ULL) {
+-		/* !NO_HZ so we can rely on cpustat.idle */
++		/* !NO_HZ or cpu offline so we can rely on cpustat.idle */
+ 		idle = kstat_cpu(cpu).cpustat.idle;
+ 		idle = cputime64_add(idle, arch_idle_time(cpu));
+ 	} else
+@@ -39,11 +42,14 @@ static cputime64_t get_idle_time(int cpu)
+ 
+ static cputime64_t get_iowait_time(int cpu)
+ {
+-	u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL);
++	u64 iowait_time = -1ULL;
+ 	cputime64_t iowait;
+ 
++	if (cpu_online(cpu))
++		iowait_time = get_cpu_iowait_time_us(cpu, NULL);
++
+ 	if (iowait_time == -1ULL)
+-		/* !NO_HZ so we can rely on cpustat.iowait */
++		/* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
+ 		iowait = kstat_cpu(cpu).cpustat.iowait;
+ 	else
+ 		iowait = usecs_to_cputime64(iowait_time);
+diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
+index 950f13a..5809abb 100644
+--- a/fs/reiserfs/inode.c
++++ b/fs/reiserfs/inode.c
+@@ -1573,8 +1573,10 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ 			reiserfs_warning(sb, "reiserfs-13077",
+ 				"nfsd/reiserfs, fhtype=%d, len=%d - odd",
+ 				fh_type, fh_len);
+-		fh_type = 5;
++		fh_type = fh_len;
+ 	}
++	if (fh_len < 2)
++		return NULL;
+ 
+ 	return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1],
+ 		(fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0);
+@@ -1583,6 +1585,8 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
+ 		int fh_len, int fh_type)
+ {
++	if (fh_type > fh_len)
++		fh_type = fh_len;
+ 	if (fh_type < 4)
+ 		return NULL;
+ 
+diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
+index 7fdf6a7..fabbb81 100644
+--- a/fs/sysfs/dir.c
++++ b/fs/sysfs/dir.c
+@@ -430,20 +430,18 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+ /**
+  *	sysfs_pathname - return full path to sysfs dirent
+  *	@sd: sysfs_dirent whose path we want
+- *	@path: caller allocated buffer
++ *	@path: caller allocated buffer of size PATH_MAX
+  *
+  *	Gives the name "/" to the sysfs_root entry; any path returned
+  *	is relative to wherever sysfs is mounted.
+- *
+- *	XXX: does no error checking on @path size
+  */
+ static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
+ {
+ 	if (sd->s_parent) {
+ 		sysfs_pathname(sd->s_parent, path);
+-		strcat(path, "/");
++		strlcat(path, "/", PATH_MAX);
+ 	}
+-	strcat(path, sd->s_name);
++	strlcat(path, sd->s_name, PATH_MAX);
+ 	return path;
+ }
+ 
+@@ -476,9 +474,11 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+ 		char *path = kzalloc(PATH_MAX, GFP_KERNEL);
+ 		WARN(1, KERN_WARNING
+ 		     "sysfs: cannot create duplicate filename '%s'\n",
+-		     (path == NULL) ? sd->s_name :
+-		     strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"),
+-		            sd->s_name));
++		     (path == NULL) ? sd->s_name
++				    : (sysfs_pathname(acxt->parent_sd, path),
++				       strlcat(path, "/", PATH_MAX),
++				       strlcat(path, sd->s_name, PATH_MAX),
++				       path));
+ 		kfree(path);
+ 	}
+ 
+diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
+index 558910f..5703fb8 100644
+--- a/fs/xfs/xfs_export.c
++++ b/fs/xfs/xfs_export.c
+@@ -195,6 +195,9 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
+ 	struct xfs_fid64	*fid64 = (struct xfs_fid64 *)fid;
+ 	struct inode		*inode = NULL;
+ 
++	if (fh_len < xfs_fileid_length(fileid_type))
++		return NULL;
++
+ 	switch (fileid_type) {
+ 	case FILEID_INO32_GEN_PARENT:
+ 		inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino,
+diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
+index 12d5543..c944c4f 100644
+--- a/include/linux/if_vlan.h
++++ b/include/linux/if_vlan.h
+@@ -97,6 +97,8 @@ static inline int is_vlan_dev(struct net_device *dev)
+ }
+ 
+ #define vlan_tx_tag_present(__skb)	((__skb)->vlan_tci & VLAN_TAG_PRESENT)
++#define vlan_tx_nonzero_tag_present(__skb) \
++	(vlan_tx_tag_present(__skb) && ((__skb)->vlan_tci & VLAN_VID_MASK))
+ #define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
+ 
+ #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+@@ -106,7 +108,7 @@ extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
+ extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
+ extern u16 vlan_dev_vlan_id(const struct net_device *dev);
+ 
+-extern bool vlan_do_receive(struct sk_buff **skb, bool last_handler);
++extern bool vlan_do_receive(struct sk_buff **skb);
+ extern struct sk_buff *vlan_untag(struct sk_buff *skb);
+ 
+ #else
+@@ -128,10 +130,8 @@ static inline u16 vlan_dev_vlan_id(const struct net_device *dev)
+ 	return 0;
+ }
+ 
+-static inline bool vlan_do_receive(struct sk_buff **skb, bool last_handler)
++static inline bool vlan_do_receive(struct sk_buff **skb)
+ {
+-	if (((*skb)->vlan_tci & VLAN_VID_MASK) && last_handler)
+-		(*skb)->pkt_type = PACKET_OTHERHOST;
+ 	return false;
+ }
+ 
+diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
+index 904131b..b25b09b 100644
+--- a/include/linux/mtd/nand.h
++++ b/include/linux/mtd/nand.h
+@@ -215,9 +215,6 @@ typedef enum {
+ #define NAND_SUBPAGE_READ(chip) ((chip->ecc.mode == NAND_ECC_SOFT) \
+ 					&& (chip->page_shift > 9))
+ 
+-/* Mask to zero out the chip options, which come from the id table */
+-#define NAND_CHIPOPTIONS_MSK	(0x0000ffff & ~NAND_NO_AUTOINCR)
+-
+ /* Non chip related options */
+ /* This option skips the bbt scan during initialization. */
+ #define NAND_SKIP_BBTSCAN	0x00010000
+diff --git a/include/linux/netfilter/xt_set.h b/include/linux/netfilter/xt_set.h
+index c0405ac..e3a9978 100644
+--- a/include/linux/netfilter/xt_set.h
++++ b/include/linux/netfilter/xt_set.h
+@@ -58,8 +58,8 @@ struct xt_set_info_target_v1 {
+ struct xt_set_info_target_v2 {
+ 	struct xt_set_info add_set;
+ 	struct xt_set_info del_set;
+-	u32 flags;
+-	u32 timeout;
++	__u32 flags;
++	__u32 timeout;
+ };
+ 
+ #endif /*_XT_SET_H*/
+diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
+index a88fb69..ea6f8a4 100644
+--- a/include/net/netfilter/nf_conntrack_ecache.h
++++ b/include/net/netfilter/nf_conntrack_ecache.h
+@@ -18,6 +18,7 @@ struct nf_conntrack_ecache {
+ 	u16 ctmask;		/* bitmask of ct events to be delivered */
+ 	u16 expmask;		/* bitmask of expect events to be delivered */
+ 	u32 pid;		/* netlink pid of destroyer */
++	struct timer_list timeout;
+ };
+ 
+ static inline struct nf_conntrack_ecache *
+diff --git a/kernel/cgroup.c b/kernel/cgroup.c
+index cdc0354..6337535 100644
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -1803,9 +1803,8 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
+ 	 * trading it for newcg is protected by cgroup_mutex, we're safe to drop
+ 	 * it here; it will be freed under RCU.
+ 	 */
+-	put_css_set(oldcg);
+-
+ 	set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
++	put_css_set(oldcg);
+ 	return 0;
+ }
+ 
+diff --git a/kernel/module.c b/kernel/module.c
+index 6969ef0..6c8fa34 100644
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -2659,6 +2659,10 @@ static int check_module_license_and_versions(struct module *mod)
+ 	if (strcmp(mod->name, "driverloader") == 0)
+ 		add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
+ 
++	/* lve claims to be GPL but upstream won't provide source */
++	if (strcmp(mod->name, "lve") == 0)
++		add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
++
+ #ifdef CONFIG_MODVERSIONS
+ 	if ((mod->num_syms && !mod->crcs)
+ 	    || (mod->num_gpl_syms && !mod->gpl_crcs)
+diff --git a/kernel/sys.c b/kernel/sys.c
+index c504302..d7c4ab0 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -1171,15 +1171,16 @@ DECLARE_RWSEM(uts_sem);
+  * Work around broken programs that cannot handle "Linux 3.0".
+  * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
+  */
+-static int override_release(char __user *release, int len)
++static int override_release(char __user *release, size_t len)
+ {
+ 	int ret = 0;
+-	char buf[65];
+ 
+ 	if (current->personality & UNAME26) {
+-		char *rest = UTS_RELEASE;
++		const char *rest = UTS_RELEASE;
++		char buf[65] = { 0 };
+ 		int ndots = 0;
+ 		unsigned v;
++		size_t copy;
+ 
+ 		while (*rest) {
+ 			if (*rest == '.' && ++ndots >= 3)
+@@ -1189,8 +1190,9 @@ static int override_release(char __user *release, int len)
+ 			rest++;
+ 		}
+ 		v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
+-		snprintf(buf, len, "2.6.%u%s", v, rest);
+-		ret = copy_to_user(release, buf, len);
++		copy = min(sizeof(buf), max_t(size_t, 1, len));
++		copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
++		ret = copy_to_user(release, buf, copy + 1);
+ 	}
+ 	return ret;
+ }
+diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
+index 5ee1ac0..cb7f33e 100644
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -992,7 +992,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
+ 	}
+ 
+ 	/* Accumulate raw time */
+-	raw_nsecs = timekeeper.raw_interval << shift;
++	raw_nsecs = (u64)timekeeper.raw_interval << shift;
+ 	raw_nsecs += raw_time.tv_nsec;
+ 	if (raw_nsecs >= NSEC_PER_SEC) {
+ 		u64 raw_secs = raw_nsecs;
+diff --git a/kernel/timer.c b/kernel/timer.c
+index 9c3c62b..c219db6 100644
+--- a/kernel/timer.c
++++ b/kernel/timer.c
+@@ -63,6 +63,7 @@ EXPORT_SYMBOL(jiffies_64);
+ #define TVR_SIZE (1 << TVR_BITS)
+ #define TVN_MASK (TVN_SIZE - 1)
+ #define TVR_MASK (TVR_SIZE - 1)
++#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
+ 
+ struct tvec {
+ 	struct list_head vec[TVN_SIZE];
+@@ -356,11 +357,12 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+ 		vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
+ 	} else {
+ 		int i;
+-		/* If the timeout is larger than 0xffffffff on 64-bit
+-		 * architectures then we use the maximum timeout:
++		/* If the timeout is larger than MAX_TVAL (on 64-bit
++		 * architectures or with CONFIG_BASE_SMALL=1) then we
++		 * use the maximum timeout.
+ 		 */
+-		if (idx > 0xffffffffUL) {
+-			idx = 0xffffffffUL;
++		if (idx > MAX_TVAL) {
++			idx = MAX_TVAL;
+ 			expires = idx + base->timer_jiffies;
+ 		}
+ 		i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
+diff --git a/lib/genalloc.c b/lib/genalloc.c
+index f352cc4..716f947 100644
+--- a/lib/genalloc.c
++++ b/lib/genalloc.c
+@@ -176,7 +176,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
+ 	struct gen_pool_chunk *chunk;
+ 	int nbits = size >> pool->min_alloc_order;
+ 	int nbytes = sizeof(struct gen_pool_chunk) +
+-				(nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
++				BITS_TO_LONGS(nbits) * sizeof(long);
+ 
+ 	chunk = kmalloc_node(nbytes, GFP_KERNEL | __GFP_ZERO, nid);
+ 	if (unlikely(chunk == NULL))
+diff --git a/mm/rmap.c b/mm/rmap.c
+index a4fd368..8685697 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -56,6 +56,7 @@
+ #include <linux/mmu_notifier.h>
+ #include <linux/migrate.h>
+ #include <linux/hugetlb.h>
++#include <linux/backing-dev.h>
+ 
+ #include <asm/tlbflush.h>
+ 
+@@ -935,11 +936,8 @@ int page_mkclean(struct page *page)
+ 
+ 	if (page_mapped(page)) {
+ 		struct address_space *mapping = page_mapping(page);
+-		if (mapping) {
++		if (mapping)
+ 			ret = page_mkclean_file(mapping, page);
+-			if (page_test_and_clear_dirty(page_to_pfn(page), 1))
+-				ret = 1;
+-		}
+ 	}
+ 
+ 	return ret;
+@@ -1120,6 +1118,8 @@ void page_add_file_rmap(struct page *page)
+  */
+ void page_remove_rmap(struct page *page)
+ {
++	struct address_space *mapping = page_mapping(page);
++
+ 	/* page still mapped by someone else? */
+ 	if (!atomic_add_negative(-1, &page->_mapcount))
+ 		return;
+@@ -1130,8 +1130,19 @@ void page_remove_rmap(struct page *page)
+ 	 * this if the page is anon, so about to be freed; but perhaps
+ 	 * not if it's in swapcache - there might be another pte slot
+ 	 * containing the swap entry, but page not yet written to swap.
++	 *
++	 * And we can skip it on file pages, so long as the filesystem
++	 * participates in dirty tracking; but need to catch shm and tmpfs
++	 * and ramfs pages which have been modified since creation by read
++	 * fault.
++	 *
++	 * Note that mapping must be decided above, before decrementing
++	 * mapcount (which luckily provides a barrier): once page is unmapped,
++	 * it could be truncated and page->mapping reset to NULL at any moment.
++	 * Note also that we are relying on page_mapping(page) to set mapping
++	 * to &swapper_space when PageSwapCache(page).
+ 	 */
+-	if ((!PageAnon(page) || PageSwapCache(page)) &&
++	if (mapping && !mapping_cap_account_dirty(mapping) &&
+ 	    page_test_and_clear_dirty(page_to_pfn(page), 1))
+ 		set_page_dirty(page);
+ 	/*
+diff --git a/mm/shmem.c b/mm/shmem.c
+index 7a82174..126ca35 100644
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -1962,12 +1962,14 @@ static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
+ {
+ 	struct inode *inode;
+ 	struct dentry *dentry = NULL;
+-	u64 inum = fid->raw[2];
+-	inum = (inum << 32) | fid->raw[1];
++	u64 inum;
+ 
+ 	if (fh_len < 3)
+ 		return NULL;
+ 
++	inum = fid->raw[2];
++	inum = (inum << 32) | fid->raw[1];
++
+ 	inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
+ 			shmem_match, fid->raw);
+ 	if (inode) {
+diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
+index 9ddbd4e..e860a4f 100644
+--- a/net/8021q/vlan_core.c
++++ b/net/8021q/vlan_core.c
+@@ -5,7 +5,7 @@
+ #include <linux/export.h>
+ #include "vlan.h"
+ 
+-bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
++bool vlan_do_receive(struct sk_buff **skbp)
+ {
+ 	struct sk_buff *skb = *skbp;
+ 	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
+@@ -13,14 +13,8 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
+ 	struct vlan_pcpu_stats *rx_stats;
+ 
+ 	vlan_dev = vlan_find_dev(skb->dev, vlan_id);
+-	if (!vlan_dev) {
+-		/* Only the last call to vlan_do_receive() should change
+-		 * pkt_type to PACKET_OTHERHOST
+-		 */
+-		if (vlan_id && last_handler)
+-			skb->pkt_type = PACKET_OTHERHOST;
++	if (!vlan_dev)
+ 		return false;
+-	}
+ 
+ 	skb = *skbp = skb_share_check(skb, GFP_ATOMIC);
+ 	if (unlikely(!skb))
+diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
+index c27b4e3..1849ee0 100644
+--- a/net/bluetooth/smp.c
++++ b/net/bluetooth/smp.c
+@@ -30,6 +30,8 @@
+ 
+ #define SMP_TIMEOUT 30000 /* 30 seconds */
+ 
++#define AUTH_REQ_MASK   0x07
++
+ static inline void swap128(u8 src[16], u8 dst[16])
+ {
+ 	int i;
+@@ -206,7 +208,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
+ 		req->max_key_size = SMP_MAX_ENC_KEY_SIZE;
+ 		req->init_key_dist = dist_keys;
+ 		req->resp_key_dist = dist_keys;
+-		req->auth_req = authreq;
++		req->auth_req = (authreq & AUTH_REQ_MASK);
+ 		return;
+ 	}
+ 
+@@ -215,7 +217,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
+ 	rsp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
+ 	rsp->init_key_dist = req->init_key_dist & dist_keys;
+ 	rsp->resp_key_dist = req->resp_key_dist & dist_keys;
+-	rsp->auth_req = authreq;
++	rsp->auth_req = (authreq & AUTH_REQ_MASK);
+ }
+ 
+ static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size)
+diff --git a/net/core/dev.c b/net/core/dev.c
+index abe1147..f500a69 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3278,18 +3278,18 @@ another_round:
+ ncls:
+ #endif
+ 
+-	rx_handler = rcu_dereference(skb->dev->rx_handler);
+ 	if (vlan_tx_tag_present(skb)) {
+ 		if (pt_prev) {
+ 			ret = deliver_skb(skb, pt_prev, orig_dev);
+ 			pt_prev = NULL;
+ 		}
+-		if (vlan_do_receive(&skb, !rx_handler))
++		if (vlan_do_receive(&skb))
+ 			goto another_round;
+ 		else if (unlikely(!skb))
+ 			goto out;
+ 	}
+ 
++	rx_handler = rcu_dereference(skb->dev->rx_handler);
+ 	if (rx_handler) {
+ 		if (pt_prev) {
+ 			ret = deliver_skb(skb, pt_prev, orig_dev);
+@@ -3309,6 +3309,9 @@ ncls:
+ 		}
+ 	}
+ 
++	if (vlan_tx_nonzero_tag_present(skb))
++		skb->pkt_type = PACKET_OTHERHOST;
++
+ 	/* deliver only exact match when indicated */
+ 	null_or_dev = deliver_exact ? skb->dev : NULL;
+ 
+diff --git a/net/core/neighbour.c b/net/core/neighbour.c
+index 7aafaed..5b9709f 100644
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -1254,8 +1254,6 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
+ 	if (!dst)
+ 		goto discard;
+ 
+-	__skb_pull(skb, skb_network_offset(skb));
+-
+ 	if (!neigh_event_send(neigh, skb)) {
+ 		int err;
+ 		struct net_device *dev = neigh->dev;
+@@ -1265,6 +1263,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
+ 			neigh_hh_init(neigh, dst);
+ 
+ 		do {
++			__skb_pull(skb, skb_network_offset(skb));
+ 			seq = read_seqbegin(&neigh->ha_lock);
+ 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ 					      neigh->ha, NULL, skb->len);
+@@ -1295,9 +1294,8 @@ int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
+ 	unsigned int seq;
+ 	int err;
+ 
+-	__skb_pull(skb, skb_network_offset(skb));
+-
+ 	do {
++		__skb_pull(skb, skb_network_offset(skb));
+ 		seq = read_seqbegin(&neigh->ha_lock);
+ 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ 				      neigh->ha, NULL, skb->len);
+diff --git a/net/core/pktgen.c b/net/core/pktgen.c
+index df878de..7bc9991 100644
+--- a/net/core/pktgen.c
++++ b/net/core/pktgen.c
+@@ -2935,7 +2935,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
+ 		  sizeof(struct ipv6hdr) - sizeof(struct udphdr) -
+ 		  pkt_dev->pkt_overhead;
+ 
+-	if (datalen < sizeof(struct pktgen_hdr)) {
++	if (datalen < 0 || datalen < sizeof(struct pktgen_hdr)) {
+ 		datalen = sizeof(struct pktgen_hdr);
+ 		if (net_ratelimit())
+ 			pr_info("increased datalen to %d\n", datalen);
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index de69cec..58c09a0 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -651,10 +651,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
+ 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
+ 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
+ 	/* When socket is gone, all binding information is lost.
+-	 * routing might fail in this case. using iif for oif to
+-	 * make sure we can deliver it
++	 * routing might fail in this case. No choice here, if we choose to force
++	 * input interface, we will misroute in case of asymmetric route.
+ 	 */
+-	arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
++	if (sk)
++		arg.bound_dev_if = sk->sk_bound_dev_if;
+ 
+ 	net = dev_net(skb_dst(skb)->dev);
+ 	arg.tos = ip_hdr(skb)->tos;
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 4a56574..ccab3c8 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1048,7 +1048,8 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
+ 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
+ 
+ 	fl6.flowi6_proto = IPPROTO_TCP;
+-	fl6.flowi6_oif = inet6_iif(skb);
++	if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
++		fl6.flowi6_oif = inet6_iif(skb);
+ 	fl6.fl6_dport = t1->dest;
+ 	fl6.fl6_sport = t1->source;
+ 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
+index 28a39bb..a582504 100644
+--- a/net/mac80211/wpa.c
++++ b/net/mac80211/wpa.c
+@@ -106,7 +106,8 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
+ 		if (status->flag & RX_FLAG_MMIC_ERROR)
+ 			goto mic_fail;
+ 
+-		if (!(status->flag & RX_FLAG_IV_STRIPPED) && rx->key)
++		if (!(status->flag & RX_FLAG_IV_STRIPPED) && rx->key &&
++		    rx->key->conf.cipher == WLAN_CIPHER_SUITE_TKIP)
+ 			goto update_iv;
+ 
+ 		return RX_CONTINUE;
+diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
+index 1d15193..7489bd3 100644
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -247,12 +247,15 @@ static void death_by_event(unsigned long ul_conntrack)
+ {
+ 	struct nf_conn *ct = (void *)ul_conntrack;
+ 	struct net *net = nf_ct_net(ct);
++	struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
++
++	BUG_ON(ecache == NULL);
+ 
+ 	if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
+ 		/* bad luck, let's retry again */
+-		ct->timeout.expires = jiffies +
++		ecache->timeout.expires = jiffies +
+ 			(random32() % net->ct.sysctl_events_retry_timeout);
+-		add_timer(&ct->timeout);
++		add_timer(&ecache->timeout);
+ 		return;
+ 	}
+ 	/* we've got the event delivered, now it's dying */
+@@ -266,6 +269,9 @@ static void death_by_event(unsigned long ul_conntrack)
+ void nf_ct_insert_dying_list(struct nf_conn *ct)
+ {
+ 	struct net *net = nf_ct_net(ct);
++	struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
++
++	BUG_ON(ecache == NULL);
+ 
+ 	/* add this conntrack to the dying list */
+ 	spin_lock_bh(&nf_conntrack_lock);
+@@ -273,10 +279,10 @@ void nf_ct_insert_dying_list(struct nf_conn *ct)
+ 			     &net->ct.dying);
+ 	spin_unlock_bh(&nf_conntrack_lock);
+ 	/* set a new timer to retry event delivery */
+-	setup_timer(&ct->timeout, death_by_event, (unsigned long)ct);
+-	ct->timeout.expires = jiffies +
++	setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
++	ecache->timeout.expires = jiffies +
+ 		(random32() % net->ct.sysctl_events_retry_timeout);
+-	add_timer(&ct->timeout);
++	add_timer(&ecache->timeout);
+ }
+ EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
+ 
+diff --git a/net/rds/send.c b/net/rds/send.c
+index 96531d4..88eace5 100644
+--- a/net/rds/send.c
++++ b/net/rds/send.c
+@@ -1122,7 +1122,7 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
+ 	rds_stats_inc(s_send_pong);
+ 
+ 	if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
+-		rds_send_xmit(conn);
++		queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+ 
+ 	rds_message_put(rm);
+ 	return 0;
+diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
+index 4530a91..237a2ee 100644
+--- a/net/sunrpc/cache.c
++++ b/net/sunrpc/cache.c
+@@ -1404,11 +1404,11 @@ static ssize_t read_flush(struct file *file, char __user *buf,
+ 			  size_t count, loff_t *ppos,
+ 			  struct cache_detail *cd)
+ {
+-	char tbuf[20];
++	char tbuf[22];
+ 	unsigned long p = *ppos;
+ 	size_t len;
+ 
+-	sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time));
++	snprintf(tbuf, sizeof(tbuf), "%lu\n", convert_to_wallclock(cd->flush_time));
+ 	len = strlen(tbuf);
+ 	if (p >= len)
+ 		return 0;
+diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
+index 10a385b..65fe23b 100644
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -254,7 +254,6 @@ struct sock_xprt {
+ 	void			(*old_data_ready)(struct sock *, int);
+ 	void			(*old_state_change)(struct sock *);
+ 	void			(*old_write_space)(struct sock *);
+-	void			(*old_error_report)(struct sock *);
+ };
+ 
+ /*
+@@ -737,10 +736,10 @@ static int xs_tcp_send_request(struct rpc_task *task)
+ 		dprintk("RPC:       sendmsg returned unrecognized error %d\n",
+ 			-status);
+ 	case -ECONNRESET:
+-	case -EPIPE:
+ 		xs_tcp_shutdown(xprt);
+ 	case -ECONNREFUSED:
+ 	case -ENOTCONN:
++	case -EPIPE:
+ 		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+ 	}
+ 
+@@ -781,7 +780,6 @@ static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk)
+ 	transport->old_data_ready = sk->sk_data_ready;
+ 	transport->old_state_change = sk->sk_state_change;
+ 	transport->old_write_space = sk->sk_write_space;
+-	transport->old_error_report = sk->sk_error_report;
+ }
+ 
+ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk)
+@@ -789,7 +787,6 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s
+ 	sk->sk_data_ready = transport->old_data_ready;
+ 	sk->sk_state_change = transport->old_state_change;
+ 	sk->sk_write_space = transport->old_write_space;
+-	sk->sk_error_report = transport->old_error_report;
+ }
+ 
+ static void xs_reset_transport(struct sock_xprt *transport)
+@@ -1465,7 +1462,7 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
+ 	xprt_clear_connecting(xprt);
+ }
+ 
+-static void xs_sock_mark_closed(struct rpc_xprt *xprt)
++static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
+ {
+ 	smp_mb__before_clear_bit();
+ 	clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+@@ -1473,6 +1470,11 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt)
+ 	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ 	clear_bit(XPRT_CLOSING, &xprt->state);
+ 	smp_mb__after_clear_bit();
++}
++
++static void xs_sock_mark_closed(struct rpc_xprt *xprt)
++{
++	xs_sock_reset_connection_flags(xprt);
+ 	/* Mark transport as closed and wake up all pending tasks */
+ 	xprt_disconnect_done(xprt);
+ }
+@@ -1528,6 +1530,7 @@ static void xs_tcp_state_change(struct sock *sk)
+ 	case TCP_CLOSE_WAIT:
+ 		/* The server initiated a shutdown of the socket */
+ 		xprt->connect_cookie++;
++		clear_bit(XPRT_CONNECTED, &xprt->state);
+ 		xs_tcp_force_close(xprt);
+ 	case TCP_CLOSING:
+ 		/*
+@@ -1552,25 +1555,6 @@ static void xs_tcp_state_change(struct sock *sk)
+ 	read_unlock_bh(&sk->sk_callback_lock);
+ }
+ 
+-/**
+- * xs_error_report - callback mainly for catching socket errors
+- * @sk: socket
+- */
+-static void xs_error_report(struct sock *sk)
+-{
+-	struct rpc_xprt *xprt;
+-
+-	read_lock_bh(&sk->sk_callback_lock);
+-	if (!(xprt = xprt_from_sock(sk)))
+-		goto out;
+-	dprintk("RPC:       %s client %p...\n"
+-			"RPC:       error %d\n",
+-			__func__, xprt, sk->sk_err);
+-	xprt_wake_pending_tasks(xprt, -EAGAIN);
+-out:
+-	read_unlock_bh(&sk->sk_callback_lock);
+-}
+-
+ static void xs_write_space(struct sock *sk)
+ {
+ 	struct socket *sock;
+@@ -1870,7 +1854,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
+ 		sk->sk_user_data = xprt;
+ 		sk->sk_data_ready = xs_local_data_ready;
+ 		sk->sk_write_space = xs_udp_write_space;
+-		sk->sk_error_report = xs_error_report;
+ 		sk->sk_allocation = GFP_ATOMIC;
+ 
+ 		xprt_clear_connected(xprt);
+@@ -1959,7 +1942,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
+ 		sk->sk_user_data = xprt;
+ 		sk->sk_data_ready = xs_udp_data_ready;
+ 		sk->sk_write_space = xs_udp_write_space;
+-		sk->sk_error_report = xs_error_report;
+ 		sk->sk_no_check = UDP_CSUM_NORCV;
+ 		sk->sk_allocation = GFP_ATOMIC;
+ 
+@@ -2027,10 +2009,8 @@ static void xs_abort_connection(struct sock_xprt *transport)
+ 	any.sa_family = AF_UNSPEC;
+ 	result = kernel_connect(transport->sock, &any, sizeof(any), 0);
+ 	if (!result)
+-		xs_sock_mark_closed(&transport->xprt);
+-	else
+-		dprintk("RPC:       AF_UNSPEC connect return code %d\n",
+-				result);
++		xs_sock_reset_connection_flags(&transport->xprt);
++	dprintk("RPC:       AF_UNSPEC connect return code %d\n", result);
+ }
+ 
+ static void xs_tcp_reuse_connection(struct sock_xprt *transport)
+@@ -2075,7 +2055,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
+ 		sk->sk_data_ready = xs_tcp_data_ready;
+ 		sk->sk_state_change = xs_tcp_state_change;
+ 		sk->sk_write_space = xs_tcp_write_space;
+-		sk->sk_error_report = xs_error_report;
+ 		sk->sk_allocation = GFP_ATOMIC;
+ 
+ 		/* socket options */
+@@ -2488,6 +2467,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
+ static struct rpc_xprt_ops bc_tcp_ops = {
+ 	.reserve_xprt		= xprt_reserve_xprt,
+ 	.release_xprt		= xprt_release_xprt,
++	.alloc_slot		= xprt_alloc_slot,
+ 	.buf_alloc		= bc_malloc,
+ 	.buf_free		= bc_free,
+ 	.send_request		= bc_send_request,
+diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
+index fac51ee..1e7cfba 100644
+--- a/sound/pci/ac97/ac97_codec.c
++++ b/sound/pci/ac97/ac97_codec.c
+@@ -1271,6 +1271,8 @@ static int snd_ac97_cvol_new(struct snd_card *card, char *name, int reg, unsigne
+ 		tmp.index = ac97->num;
+ 		kctl = snd_ctl_new1(&tmp, ac97);
+ 	}
++	if (!kctl)
++		return -ENOMEM;
+ 	if (reg >= AC97_PHONE && reg <= AC97_PCM)
+ 		set_tlv_db_scale(kctl, db_scale_5bit_12db_max);
+ 	else
+diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c
+index 6a3e567..d37b946 100644
+--- a/sound/pci/emu10k1/emu10k1_main.c
++++ b/sound/pci/emu10k1/emu10k1_main.c
+@@ -1416,6 +1416,15 @@ static struct snd_emu_chip_details emu_chip_details[] = {
+ 	 .ca0108_chip = 1,
+ 	 .spk71 = 1,
+ 	 .emu_model = EMU_MODEL_EMU1010B}, /* EMU 1010 new revision */
++	/* Tested by Maxim Kachur <mcdebugger@duganet.ru> 17th Oct 2012. */
++	/* This is MAEM8986, 0202 is MAEM8980 */
++	{.vendor = 0x1102, .device = 0x0008, .subsystem = 0x40071102,
++	 .driver = "Audigy2", .name = "E-mu 1010 PCIe [MAEM8986]",
++	 .id = "EMU1010",
++	 .emu10k2_chip = 1,
++	 .ca0108_chip = 1,
++	 .spk71 = 1,
++	 .emu_model = EMU_MODEL_EMU1010B}, /* EMU 1010 PCIe */
+ 	/* Tested by James@superbug.co.uk 8th July 2005. */
+ 	/* This is MAEM8810, 0202 is MAEM8820 */
+ 	{.vendor = 0x1102, .device = 0x0004, .subsystem = 0x40011102,
+diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
+index ec0518e..e449278 100644
+--- a/sound/pci/hda/patch_cirrus.c
++++ b/sound/pci/hda/patch_cirrus.c
+@@ -1404,7 +1404,7 @@ static int patch_cs420x(struct hda_codec *codec)
+ 	return 0;
+ 
+  error:
+-	kfree(codec->spec);
++	cs_free(codec);
+ 	codec->spec = NULL;
+ 	return err;
+ }
+@@ -1949,7 +1949,7 @@ static int patch_cs421x(struct hda_codec *codec)
+ 	return 0;
+ 
+  error:
+-	kfree(codec->spec);
++	cs_free(codec);
+ 	codec->spec = NULL;
+ 	return err;
+ }
+diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
+index 94f0c4a..58c287b 100644
+--- a/sound/pci/hda/patch_conexant.c
++++ b/sound/pci/hda/patch_conexant.c
+@@ -4463,7 +4463,9 @@ static void apply_fixup(struct hda_codec *codec,
+ 	struct conexant_spec *spec = codec->spec;
+ 
+ 	quirk = snd_pci_quirk_lookup(codec->bus->pci, quirk);
+-	if (quirk && table[quirk->value]) {
++	if (!quirk)
++		return;
++	if (table[quirk->value]) {
+ 		snd_printdd(KERN_INFO "hda_codec: applying pincfg for %s\n",
+ 			    quirk->name);
+ 		apply_pincfg(codec, table[quirk->value]);
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 32c8169..c2c7f90 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -620,6 +620,8 @@ static void alc_line_automute(struct hda_codec *codec)
+ {
+ 	struct alc_spec *spec = codec->spec;
+ 
++	if (spec->autocfg.line_out_type == AUTO_PIN_SPEAKER_OUT)
++		return;
+ 	/* check LO jack only when it's different from HP */
+ 	if (spec->autocfg.line_out_pins[0] == spec->autocfg.hp_pins[0])
+ 		return;
+@@ -2663,8 +2665,10 @@ static const char *alc_get_line_out_pfx(struct alc_spec *spec, int ch,
+ 			return "PCM";
+ 		break;
+ 	}
+-	if (snd_BUG_ON(ch >= ARRAY_SIZE(channel_name)))
++	if (ch >= ARRAY_SIZE(channel_name)) {
++		snd_BUG();
+ 		return "PCM";
++	}
+ 
+ 	return channel_name[ch];
+ }
+@@ -5080,6 +5084,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x17aa, 0x21e9, "Thinkpad Edge 15", ALC269_FIXUP_SKU_IGNORE),
+ 	SND_PCI_QUIRK(0x17aa, 0x21f6, "Thinkpad T530", ALC269_FIXUP_LENOVO_DOCK),
+ 	SND_PCI_QUIRK(0x17aa, 0x21fa, "Thinkpad X230", ALC269_FIXUP_LENOVO_DOCK),
++	SND_PCI_QUIRK(0x17aa, 0x21f3, "Thinkpad T430", ALC269_FIXUP_LENOVO_DOCK),
+ 	SND_PCI_QUIRK(0x17aa, 0x21fb, "Thinkpad T430s", ALC269_FIXUP_LENOVO_DOCK),
+ 	SND_PCI_QUIRK(0x17aa, 0x2203, "Thinkpad X230 Tablet", ALC269_FIXUP_LENOVO_DOCK),
+ 	SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
+diff --git a/usr/gen_init_cpio.c b/usr/gen_init_cpio.c
+index af0f22f..aca6edc 100644
+--- a/usr/gen_init_cpio.c
++++ b/usr/gen_init_cpio.c
+@@ -303,7 +303,7 @@ static int cpio_mkfile(const char *name, const char *location,
+ 	int retval;
+ 	int rc = -1;
+ 	int namesize;
+-	int i;
++	unsigned int i;
+ 
+ 	mode |= S_IFREG;
+ 
+@@ -381,25 +381,28 @@ error:
+ 
+ static char *cpio_replace_env(char *new_location)
+ {
+-       char expanded[PATH_MAX + 1];
+-       char env_var[PATH_MAX + 1];
+-       char *start;
+-       char *end;
+-
+-       for (start = NULL; (start = strstr(new_location, "${")); ) {
+-               end = strchr(start, '}');
+-               if (start < end) {
+-                       *env_var = *expanded = '\0';
+-                       strncat(env_var, start + 2, end - start - 2);
+-                       strncat(expanded, new_location, start - new_location);
+-                       strncat(expanded, getenv(env_var), PATH_MAX);
+-                       strncat(expanded, end + 1, PATH_MAX);
+-                       strncpy(new_location, expanded, PATH_MAX);
+-               } else
+-                       break;
+-       }
+-
+-       return new_location;
++	char expanded[PATH_MAX + 1];
++	char env_var[PATH_MAX + 1];
++	char *start;
++	char *end;
++
++	for (start = NULL; (start = strstr(new_location, "${")); ) {
++		end = strchr(start, '}');
++		if (start < end) {
++			*env_var = *expanded = '\0';
++			strncat(env_var, start + 2, end - start - 2);
++			strncat(expanded, new_location, start - new_location);
++			strncat(expanded, getenv(env_var),
++				PATH_MAX - strlen(expanded));
++			strncat(expanded, end + 1,
++				PATH_MAX - strlen(expanded));
++			strncpy(new_location, expanded, PATH_MAX);
++			new_location[PATH_MAX] = 0;
++		} else
++			break;
++	}
++
++	return new_location;
+ }
+ 
+ 
diff --git a/3.2.34/bump/1033_linux-3.2.34.patch b/3.2.34/bump/1033_linux-3.2.34.patch
new file mode 100644
index 0000000..d647b38
--- /dev/null
+++ b/3.2.34/bump/1033_linux-3.2.34.patch
@@ -0,0 +1,3678 @@
+diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
+index 3d84912..47c4ec2 100644
+--- a/Documentation/feature-removal-schedule.txt
++++ b/Documentation/feature-removal-schedule.txt
+@@ -6,14 +6,6 @@ be removed from this file.
+ 
+ ---------------------------
+ 
+-What:	x86 floppy disable_hlt
+-When:	2012
+-Why:	ancient workaround of dubious utility clutters the
+-	code used by everybody else.
+-Who:	Len Brown <len.brown@intel.com>
+-
+----------------------------
+-
+ What:	CONFIG_APM_CPU_IDLE, and its ability to call APM BIOS in idle
+ When:	2012
+ Why:	This optional sub-feature of APM is of dubious reliability,
+diff --git a/Makefile b/Makefile
+index 63ca1ea2..14ebacf 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 3
+ PATCHLEVEL = 2
+-SUBLEVEL = 33
++SUBLEVEL = 34
+ EXTRAVERSION =
+ NAME = Saber-toothed Squirrel
+ 
+diff --git a/arch/arm/mach-at91/at91rm9200_devices.c b/arch/arm/mach-at91/at91rm9200_devices.c
+index 143eebb..929fd91 100644
+--- a/arch/arm/mach-at91/at91rm9200_devices.c
++++ b/arch/arm/mach-at91/at91rm9200_devices.c
+@@ -462,7 +462,7 @@ static struct i2c_gpio_platform_data pdata = {
+ 
+ static struct platform_device at91rm9200_twi_device = {
+ 	.name			= "i2c-gpio",
+-	.id			= -1,
++	.id			= 0,
+ 	.dev.platform_data	= &pdata,
+ };
+ 
+diff --git a/arch/arm/mach-at91/at91sam9260_devices.c b/arch/arm/mach-at91/at91sam9260_devices.c
+index 2590988..465e026 100644
+--- a/arch/arm/mach-at91/at91sam9260_devices.c
++++ b/arch/arm/mach-at91/at91sam9260_devices.c
+@@ -467,7 +467,7 @@ static struct i2c_gpio_platform_data pdata = {
+ 
+ static struct platform_device at91sam9260_twi_device = {
+ 	.name			= "i2c-gpio",
+-	.id			= -1,
++	.id			= 0,
+ 	.dev.platform_data	= &pdata,
+ };
+ 
+diff --git a/arch/arm/mach-at91/at91sam9261_devices.c b/arch/arm/mach-at91/at91sam9261_devices.c
+index daf3e66..d6d1e76 100644
+--- a/arch/arm/mach-at91/at91sam9261_devices.c
++++ b/arch/arm/mach-at91/at91sam9261_devices.c
+@@ -284,7 +284,7 @@ static struct i2c_gpio_platform_data pdata = {
+ 
+ static struct platform_device at91sam9261_twi_device = {
+ 	.name			= "i2c-gpio",
+-	.id			= -1,
++	.id			= 0,
+ 	.dev.platform_data	= &pdata,
+ };
+ 
+diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c
+index 32a7e43..e051376e 100644
+--- a/arch/arm/mach-at91/at91sam9263_devices.c
++++ b/arch/arm/mach-at91/at91sam9263_devices.c
+@@ -540,7 +540,7 @@ static struct i2c_gpio_platform_data pdata = {
+ 
+ static struct platform_device at91sam9263_twi_device = {
+ 	.name			= "i2c-gpio",
+-	.id			= -1,
++	.id			= 0,
+ 	.dev.platform_data	= &pdata,
+ };
+ 
+diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c
+index 628eb56..4862b23 100644
+--- a/arch/arm/mach-at91/at91sam9rl_devices.c
++++ b/arch/arm/mach-at91/at91sam9rl_devices.c
+@@ -319,7 +319,7 @@ static struct i2c_gpio_platform_data pdata = {
+ 
+ static struct platform_device at91sam9rl_twi_device = {
+ 	.name			= "i2c-gpio",
+-	.id			= -1,
++	.id			= 0,
+ 	.dev.platform_data	= &pdata,
+ };
+ 
+diff --git a/arch/arm/mach-at91/setup.c b/arch/arm/mach-at91/setup.c
+index f5bbe0ef..0d264bf 100644
+--- a/arch/arm/mach-at91/setup.c
++++ b/arch/arm/mach-at91/setup.c
+@@ -163,7 +163,7 @@ static void __init soc_detect(u32 dbgu_base)
+ 	}
+ 
+ 	/* at91sam9g10 */
+-	if ((cidr & ~AT91_CIDR_EXT) == ARCH_ID_AT91SAM9G10) {
++	if ((socid & ~AT91_CIDR_EXT) == ARCH_ID_AT91SAM9G10) {
+ 		at91_soc_initdata.type = AT91_SOC_SAM9G10;
+ 		at91_boot_soc = at91sam9261_soc;
+ 	}
+diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
+index 2d2f01c..d75adff 100644
+--- a/arch/x86/include/asm/system.h
++++ b/arch/x86/include/asm/system.h
+@@ -93,10 +93,6 @@ do {									\
+ 			"memory");					\
+ } while (0)
+ 
+-/*
+- * disable hlt during certain critical i/o operations
+- */
+-#define HAVE_DISABLE_HLT
+ #else
+ 
+ /* frame pointer must be last for get_wchan */
+@@ -392,9 +388,6 @@ static inline void clflush(volatile void *__p)
+ 
+ #define nop() asm volatile ("nop")
+ 
+-void disable_hlt(void);
+-void enable_hlt(void);
+-
+ void cpu_idle_wait(void);
+ 
+ extern unsigned long arch_align_stack(unsigned long sp);
+diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
+index ee5d4fb..59b9b37 100644
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -341,34 +341,10 @@ void (*pm_idle)(void);
+ EXPORT_SYMBOL(pm_idle);
+ #endif
+ 
+-#ifdef CONFIG_X86_32
+-/*
+- * This halt magic was a workaround for ancient floppy DMA
+- * wreckage. It should be safe to remove.
+- */
+-static int hlt_counter;
+-void disable_hlt(void)
+-{
+-	hlt_counter++;
+-}
+-EXPORT_SYMBOL(disable_hlt);
+-
+-void enable_hlt(void)
+-{
+-	hlt_counter--;
+-}
+-EXPORT_SYMBOL(enable_hlt);
+-
+-static inline int hlt_use_halt(void)
+-{
+-	return (!hlt_counter && boot_cpu_data.hlt_works_ok);
+-}
+-#else
+ static inline int hlt_use_halt(void)
+ {
+ 	return 1;
+ }
+-#endif
+ 
+ /*
+  * We use this if we don't have any better
+diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
+index ec3d603..2b8b0de 100644
+--- a/arch/x86/xen/mmu.c
++++ b/arch/x86/xen/mmu.c
+@@ -1203,6 +1203,25 @@ unsigned long xen_read_cr2_direct(void)
+ 	return percpu_read(xen_vcpu_info.arch.cr2);
+ }
+ 
++void xen_flush_tlb_all(void)
++{
++	struct mmuext_op *op;
++	struct multicall_space mcs;
++
++	trace_xen_mmu_flush_tlb_all(0);
++
++	preempt_disable();
++
++	mcs = xen_mc_entry(sizeof(*op));
++
++	op = mcs.args;
++	op->cmd = MMUEXT_TLB_FLUSH_ALL;
++	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
++
++	xen_mc_issue(PARAVIRT_LAZY_MMU);
++
++	preempt_enable();
++}
+ static void xen_flush_tlb(void)
+ {
+ 	struct mmuext_op *op;
+@@ -2366,7 +2385,7 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
+ 	err = 0;
+ out:
+ 
+-	flush_tlb_all();
++	xen_flush_tlb_all();
+ 
+ 	return err;
+ }
+diff --git a/crypto/cryptd.c b/crypto/cryptd.c
+index 671d4d6..7bdd61b 100644
+--- a/crypto/cryptd.c
++++ b/crypto/cryptd.c
+@@ -137,13 +137,18 @@ static void cryptd_queue_worker(struct work_struct *work)
+ 	struct crypto_async_request *req, *backlog;
+ 
+ 	cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
+-	/* Only handle one request at a time to avoid hogging crypto
+-	 * workqueue. preempt_disable/enable is used to prevent
+-	 * being preempted by cryptd_enqueue_request() */
++	/*
++	 * Only handle one request at a time to avoid hogging crypto workqueue.
++	 * preempt_disable/enable is used to prevent being preempted by
++	 * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent
++	 * cryptd_enqueue_request() being accessed from software interrupts.
++	 */
++	local_bh_disable();
+ 	preempt_disable();
+ 	backlog = crypto_get_backlog(&cpu_queue->queue);
+ 	req = crypto_dequeue_request(&cpu_queue->queue);
+ 	preempt_enable();
++	local_bh_enable();
+ 
+ 	if (!req)
+ 		return;
+diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
+index c864add..7a90d4a 100644
+--- a/drivers/block/floppy.c
++++ b/drivers/block/floppy.c
+@@ -1032,37 +1032,6 @@ static int fd_wait_for_completion(unsigned long delay, timeout_fn function)
+ 	return 0;
+ }
+ 
+-static DEFINE_SPINLOCK(floppy_hlt_lock);
+-static int hlt_disabled;
+-static void floppy_disable_hlt(void)
+-{
+-	unsigned long flags;
+-
+-	WARN_ONCE(1, "floppy_disable_hlt() scheduled for removal in 2012");
+-	spin_lock_irqsave(&floppy_hlt_lock, flags);
+-	if (!hlt_disabled) {
+-		hlt_disabled = 1;
+-#ifdef HAVE_DISABLE_HLT
+-		disable_hlt();
+-#endif
+-	}
+-	spin_unlock_irqrestore(&floppy_hlt_lock, flags);
+-}
+-
+-static void floppy_enable_hlt(void)
+-{
+-	unsigned long flags;
+-
+-	spin_lock_irqsave(&floppy_hlt_lock, flags);
+-	if (hlt_disabled) {
+-		hlt_disabled = 0;
+-#ifdef HAVE_DISABLE_HLT
+-		enable_hlt();
+-#endif
+-	}
+-	spin_unlock_irqrestore(&floppy_hlt_lock, flags);
+-}
+-
+ static void setup_DMA(void)
+ {
+ 	unsigned long f;
+@@ -1107,7 +1076,6 @@ static void setup_DMA(void)
+ 	fd_enable_dma();
+ 	release_dma_lock(f);
+ #endif
+-	floppy_disable_hlt();
+ }
+ 
+ static void show_floppy(void);
+@@ -1709,7 +1677,6 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id)
+ 	fd_disable_dma();
+ 	release_dma_lock(f);
+ 
+-	floppy_enable_hlt();
+ 	do_floppy = NULL;
+ 	if (fdc >= N_FDC || FDCS->address == -1) {
+ 		/* we don't even know which FDC is the culprit */
+@@ -1858,8 +1825,6 @@ static void floppy_shutdown(unsigned long data)
+ 		show_floppy();
+ 	cancel_activity();
+ 
+-	floppy_enable_hlt();
+-
+ 	flags = claim_dma_lock();
+ 	fd_disable_dma();
+ 	release_dma_lock(flags);
+@@ -4198,6 +4163,7 @@ static int __init floppy_init(void)
+ 
+ 		disks[dr]->queue = blk_init_queue(do_fd_request, &floppy_lock);
+ 		if (!disks[dr]->queue) {
++			put_disk(disks[dr]);
+ 			err = -ENOMEM;
+ 			goto out_put_disk;
+ 		}
+@@ -4339,7 +4305,7 @@ static int __init floppy_init(void)
+ 
+ 		err = platform_device_register(&floppy_device[drive]);
+ 		if (err)
+-			goto out_flush_work;
++			goto out_remove_drives;
+ 
+ 		err = device_create_file(&floppy_device[drive].dev,
+ 					 &dev_attr_cmos);
+@@ -4357,6 +4323,15 @@ static int __init floppy_init(void)
+ 
+ out_unreg_platform_dev:
+ 	platform_device_unregister(&floppy_device[drive]);
++out_remove_drives:
++	while (drive--) {
++		if ((allowed_drive_mask & (1 << drive)) &&
++		    fdc_state[FDC(drive)].version != FDC_NONE) {
++			del_gendisk(disks[drive]);
++			device_remove_file(&floppy_device[drive].dev, &dev_attr_cmos);
++			platform_device_unregister(&floppy_device[drive]);
++		}
++	}
+ out_flush_work:
+ 	flush_work_sync(&floppy_work);
+ 	if (atomic_read(&usage_count))
+@@ -4510,7 +4485,6 @@ static void floppy_release_irq_and_dma(void)
+ #if N_FDC > 1
+ 	set_dor(1, ~8, 0);
+ #endif
+-	floppy_enable_hlt();
+ 
+ 	if (floppy_track_buffer && max_buffer_sectors) {
+ 		tmpsize = max_buffer_sectors * 1024;
+diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c
+index c593bd4..edff410 100644
+--- a/drivers/gpio/gpio-timberdale.c
++++ b/drivers/gpio/gpio-timberdale.c
+@@ -116,7 +116,7 @@ static void timbgpio_irq_disable(struct irq_data *d)
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&tgpio->lock, flags);
+-	tgpio->last_ier &= ~(1 << offset);
++	tgpio->last_ier &= ~(1UL << offset);
+ 	iowrite32(tgpio->last_ier, tgpio->membase + TGPIO_IER);
+ 	spin_unlock_irqrestore(&tgpio->lock, flags);
+ }
+@@ -128,7 +128,7 @@ static void timbgpio_irq_enable(struct irq_data *d)
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&tgpio->lock, flags);
+-	tgpio->last_ier |= 1 << offset;
++	tgpio->last_ier |= 1UL << offset;
+ 	iowrite32(tgpio->last_ier, tgpio->membase + TGPIO_IER);
+ 	spin_unlock_irqrestore(&tgpio->lock, flags);
+ }
+diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
+index 828bf65..020b103 100644
+--- a/drivers/gpu/drm/drm_fops.c
++++ b/drivers/gpu/drm/drm_fops.c
+@@ -136,8 +136,11 @@ int drm_open(struct inode *inode, struct file *filp)
+ 	retcode = drm_open_helper(inode, filp, dev);
+ 	if (!retcode) {
+ 		atomic_inc(&dev->counts[_DRM_STAT_OPENS]);
+-		if (!dev->open_count++)
++		if (!dev->open_count++) {
+ 			retcode = drm_setup(dev);
++			if (retcode)
++				dev->open_count--;
++		}
+ 	}
+ 	if (!retcode) {
+ 		mutex_lock(&dev->struct_mutex);
+diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
+index 83e820e..bcadf74 100644
+--- a/drivers/gpu/drm/i915/intel_drv.h
++++ b/drivers/gpu/drm/i915/intel_drv.h
+@@ -227,12 +227,12 @@ struct dip_infoframe {
+ 			uint16_t bottom_bar_start;
+ 			uint16_t left_bar_end;
+ 			uint16_t right_bar_start;
+-		} avi;
++		} __attribute__ ((packed)) avi;
+ 		struct {
+ 			uint8_t vn[8];
+ 			uint8_t pd[16];
+ 			uint8_t sdi;
+-		} spd;
++		} __attribute__ ((packed)) spd;
+ 		uint8_t payload[27];
+ 	} __attribute__ ((packed)) body;
+ } __attribute__((packed));
+diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
+index cdf17d4..478b51f 100644
+--- a/drivers/gpu/drm/i915/intel_overlay.c
++++ b/drivers/gpu/drm/i915/intel_overlay.c
+@@ -428,9 +428,17 @@ static int intel_overlay_off(struct intel_overlay *overlay)
+ 	OUT_RING(flip_addr);
+ 	OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
+ 	/* turn overlay off */
+-	OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
+-	OUT_RING(flip_addr);
+-	OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
++	if (IS_I830(dev)) {
++		/* Workaround: Don't disable the overlay fully, since otherwise
++		 * it dies on the next OVERLAY_ON cmd. */
++		OUT_RING(MI_NOOP);
++		OUT_RING(MI_NOOP);
++		OUT_RING(MI_NOOP);
++	} else {
++		OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
++		OUT_RING(flip_addr);
++		OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
++	}
+ 	ADVANCE_LP_RING();
+ 
+ 	return intel_overlay_do_wait_request(overlay, request,
+diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
+index bbf247c..3f4afba 100644
+--- a/drivers/gpu/drm/i915/intel_sdvo.c
++++ b/drivers/gpu/drm/i915/intel_sdvo.c
+@@ -868,31 +868,38 @@ static void intel_sdvo_dump_hdmi_buf(struct intel_sdvo *intel_sdvo)
+ }
+ #endif
+ 
+-static bool intel_sdvo_set_avi_infoframe(struct intel_sdvo *intel_sdvo)
++static bool intel_sdvo_write_infoframe(struct intel_sdvo *intel_sdvo,
++				       unsigned if_index, uint8_t tx_rate,
++				       uint8_t *data, unsigned length)
+ {
+-	struct dip_infoframe avi_if = {
+-		.type = DIP_TYPE_AVI,
+-		.ver = DIP_VERSION_AVI,
+-		.len = DIP_LEN_AVI,
+-	};
+-	uint8_t tx_rate = SDVO_HBUF_TX_VSYNC;
+-	uint8_t set_buf_index[2] = { 1, 0 };
+-	uint64_t *data = (uint64_t *)&avi_if;
+-	unsigned i;
+-
+-	intel_dip_infoframe_csum(&avi_if);
++	uint8_t set_buf_index[2] = { if_index, 0 };
++	uint8_t hbuf_size, tmp[8];
++	int i;
+ 
+ 	if (!intel_sdvo_set_value(intel_sdvo,
+ 				  SDVO_CMD_SET_HBUF_INDEX,
+ 				  set_buf_index, 2))
+ 		return false;
+ 
+-	for (i = 0; i < sizeof(avi_if); i += 8) {
++	if (!intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_HBUF_INFO,
++				  &hbuf_size, 1))
++		return false;
++
++	/* Buffer size is 0 based, hooray! */
++	hbuf_size++;
++
++	DRM_DEBUG_KMS("writing sdvo hbuf: %i, hbuf_size %i, hbuf_size: %i\n",
++		      if_index, length, hbuf_size);
++
++	for (i = 0; i < hbuf_size; i += 8) {
++		memset(tmp, 0, 8);
++		if (i < length)
++			memcpy(tmp, data + i, min_t(unsigned, 8, length - i));
++
+ 		if (!intel_sdvo_set_value(intel_sdvo,
+ 					  SDVO_CMD_SET_HBUF_DATA,
+-					  data, 8))
++					  tmp, 8))
+ 			return false;
+-		data++;
+ 	}
+ 
+ 	return intel_sdvo_set_value(intel_sdvo,
+@@ -900,6 +907,28 @@ static bool intel_sdvo_set_avi_infoframe(struct intel_sdvo *intel_sdvo)
+ 				    &tx_rate, 1);
+ }
+ 
++static bool intel_sdvo_set_avi_infoframe(struct intel_sdvo *intel_sdvo)
++{
++	struct dip_infoframe avi_if = {
++		.type = DIP_TYPE_AVI,
++		.ver = DIP_VERSION_AVI,
++		.len = DIP_LEN_AVI,
++	};
++	uint8_t sdvo_data[4 + sizeof(avi_if.body.avi)];
++
++	intel_dip_infoframe_csum(&avi_if);
++
++	/* sdvo spec says that the ecc is handled by the hw, and it looks like
++	 * we must not send the ecc field, either. */
++	memcpy(sdvo_data, &avi_if, 3);
++	sdvo_data[3] = avi_if.checksum;
++	memcpy(&sdvo_data[4], &avi_if.body, sizeof(avi_if.body.avi));
++
++	return intel_sdvo_write_infoframe(intel_sdvo, SDVO_HBUF_INDEX_AVI_IF,
++					  SDVO_HBUF_TX_VSYNC,
++					  sdvo_data, sizeof(sdvo_data));
++}
++
+ static bool intel_sdvo_set_tv_format(struct intel_sdvo *intel_sdvo)
+ {
+ 	struct intel_sdvo_tv_format format;
+diff --git a/drivers/gpu/drm/i915/intel_sdvo_regs.h b/drivers/gpu/drm/i915/intel_sdvo_regs.h
+index 372f33b..4193c54 100644
+--- a/drivers/gpu/drm/i915/intel_sdvo_regs.h
++++ b/drivers/gpu/drm/i915/intel_sdvo_regs.h
+@@ -708,6 +708,8 @@ struct intel_sdvo_enhancements_arg {
+ #define SDVO_CMD_SET_AUDIO_STAT		0x91
+ #define SDVO_CMD_GET_AUDIO_STAT		0x92
+ #define SDVO_CMD_SET_HBUF_INDEX		0x93
++  #define SDVO_HBUF_INDEX_ELD		0
++  #define SDVO_HBUF_INDEX_AVI_IF	1
+ #define SDVO_CMD_GET_HBUF_INDEX		0x94
+ #define SDVO_CMD_GET_HBUF_INFO		0x95
+ #define SDVO_CMD_SET_HBUF_AV_SPLIT	0x96
+diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
+index 9791d13..8c084c0 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
++++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
+@@ -178,8 +178,10 @@ nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state)
+ 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+ 		return 0;
+ 
+-	NV_INFO(dev, "Disabling fbcon acceleration...\n");
+-	nouveau_fbcon_save_disable_accel(dev);
++	if (dev->mode_config.num_crtc) {
++		NV_INFO(dev, "Disabling fbcon acceleration...\n");
++		nouveau_fbcon_save_disable_accel(dev);
++	}
+ 
+ 	NV_INFO(dev, "Unpinning framebuffer(s)...\n");
+ 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+@@ -246,10 +248,12 @@ nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state)
+ 		pci_set_power_state(pdev, PCI_D3hot);
+ 	}
+ 
+-	console_lock();
+-	nouveau_fbcon_set_suspend(dev, 1);
+-	console_unlock();
+-	nouveau_fbcon_restore_accel(dev);
++	if (dev->mode_config.num_crtc) {
++		console_lock();
++		nouveau_fbcon_set_suspend(dev, 1);
++		console_unlock();
++		nouveau_fbcon_restore_accel(dev);
++	}
+ 	return 0;
+ 
+ out_abort:
+@@ -275,7 +279,8 @@ nouveau_pci_resume(struct pci_dev *pdev)
+ 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+ 		return 0;
+ 
+-	nouveau_fbcon_save_disable_accel(dev);
++	if (dev->mode_config.num_crtc)
++		nouveau_fbcon_save_disable_accel(dev);
+ 
+ 	NV_INFO(dev, "We're back, enabling device...\n");
+ 	pci_set_power_state(pdev, PCI_D0);
+@@ -376,15 +381,18 @@ nouveau_pci_resume(struct pci_dev *pdev)
+ 		nv_crtc->lut.depth = 0;
+ 	}
+ 
+-	console_lock();
+-	nouveau_fbcon_set_suspend(dev, 0);
+-	console_unlock();
++	if (dev->mode_config.num_crtc) {
++		console_lock();
++		nouveau_fbcon_set_suspend(dev, 0);
++		console_unlock();
+ 
+-	nouveau_fbcon_zfill_all(dev);
++		nouveau_fbcon_zfill_all(dev);
++	}
+ 
+ 	drm_helper_resume_force_mode(dev);
+ 
+-	nouveau_fbcon_restore_accel(dev);
++	if (dev->mode_config.num_crtc)
++		nouveau_fbcon_restore_accel(dev);
+ 	return 0;
+ }
+ 
+@@ -466,9 +474,7 @@ static int __init nouveau_init(void)
+ #ifdef CONFIG_VGA_CONSOLE
+ 		if (vgacon_text_force())
+ 			nouveau_modeset = 0;
+-		else
+ #endif
+-			nouveau_modeset = 1;
+ 	}
+ 
+ 	if (!nouveau_modeset)
+diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
+index d8831ab..01adcfb 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_state.c
++++ b/drivers/gpu/drm/nouveau/nouveau_state.c
+@@ -46,6 +46,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_engine *engine = &dev_priv->engine;
++	u32 pclass = dev->pdev->class >> 8;
+ 
+ 	switch (dev_priv->chipset & 0xf0) {
+ 	case 0x00:
+@@ -481,7 +482,8 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+ 	}
+ 
+ 	/* headless mode */
+-	if (nouveau_modeset == 2) {
++	if (nouveau_modeset == 2 ||
++	    (nouveau_modeset < 0 && pclass != PCI_CLASS_DISPLAY_VGA)) {
+ 		engine->display.early_init = nouveau_stub_init;
+ 		engine->display.late_takedown = nouveau_stub_takedown;
+ 		engine->display.create = nouveau_stub_init;
+diff --git a/drivers/gpu/drm/nouveau/nv04_dac.c b/drivers/gpu/drm/nouveau/nv04_dac.c
+index e000455..2d6bfd0 100644
+--- a/drivers/gpu/drm/nouveau/nv04_dac.c
++++ b/drivers/gpu/drm/nouveau/nv04_dac.c
+@@ -209,7 +209,7 @@ out:
+ 	NVWriteVgaCrtc(dev, 0, NV_CIO_CR_MODE_INDEX, saved_cr_mode);
+ 
+ 	if (blue == 0x18) {
+-		NV_INFO(dev, "Load detected on head A\n");
++		NV_DEBUG(dev, "Load detected on head A\n");
+ 		return connector_status_connected;
+ 	}
+ 
+@@ -323,7 +323,7 @@ nv17_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
+ 
+ 	if (nv17_dac_sample_load(encoder) &
+ 	    NV_PRAMDAC_TEST_CONTROL_SENSEB_ALLHI) {
+-		NV_INFO(dev, "Load detected on output %c\n",
++		NV_DEBUG(dev, "Load detected on output %c\n",
+ 			'@' + ffs(dcb->or));
+ 		return connector_status_connected;
+ 	} else {
+@@ -398,7 +398,7 @@ static void nv04_dac_commit(struct drm_encoder *encoder)
+ 
+ 	helper->dpms(encoder, DRM_MODE_DPMS_ON);
+ 
+-	NV_INFO(dev, "Output %s is running on CRTC %d using output %c\n",
++	NV_DEBUG(dev, "Output %s is running on CRTC %d using output %c\n",
+ 		drm_get_connector_name(&nouveau_encoder_connector_get(nv_encoder)->base),
+ 		nv_crtc->index, '@' + ffs(nv_encoder->dcb->or));
+ }
+@@ -447,7 +447,7 @@ static void nv04_dac_dpms(struct drm_encoder *encoder, int mode)
+ 		return;
+ 	nv_encoder->last_dpms = mode;
+ 
+-	NV_INFO(dev, "Setting dpms mode %d on vga encoder (output %d)\n",
++	NV_DEBUG(dev, "Setting dpms mode %d on vga encoder (output %d)\n",
+ 		     mode, nv_encoder->dcb->index);
+ 
+ 	nv04_dac_update_dacclk(encoder, mode == DRM_MODE_DPMS_ON);
+diff --git a/drivers/gpu/drm/nouveau/nv04_dfp.c b/drivers/gpu/drm/nouveau/nv04_dfp.c
+index 12098bf..752440c 100644
+--- a/drivers/gpu/drm/nouveau/nv04_dfp.c
++++ b/drivers/gpu/drm/nouveau/nv04_dfp.c
+@@ -468,7 +468,7 @@ static void nv04_dfp_commit(struct drm_encoder *encoder)
+ 
+ 	helper->dpms(encoder, DRM_MODE_DPMS_ON);
+ 
+-	NV_INFO(dev, "Output %s is running on CRTC %d using output %c\n",
++	NV_DEBUG(dev, "Output %s is running on CRTC %d using output %c\n",
+ 		drm_get_connector_name(&nouveau_encoder_connector_get(nv_encoder)->base),
+ 		nv_crtc->index, '@' + ffs(nv_encoder->dcb->or));
+ }
+@@ -511,7 +511,7 @@ static void nv04_lvds_dpms(struct drm_encoder *encoder, int mode)
+ 		return;
+ 	nv_encoder->last_dpms = mode;
+ 
+-	NV_INFO(dev, "Setting dpms mode %d on lvds encoder (output %d)\n",
++	NV_DEBUG(dev, "Setting dpms mode %d on lvds encoder (output %d)\n",
+ 		     mode, nv_encoder->dcb->index);
+ 
+ 	if (was_powersaving && is_powersaving_dpms(mode))
+@@ -556,7 +556,7 @@ static void nv04_tmds_dpms(struct drm_encoder *encoder, int mode)
+ 		return;
+ 	nv_encoder->last_dpms = mode;
+ 
+-	NV_INFO(dev, "Setting dpms mode %d on tmds encoder (output %d)\n",
++	NV_DEBUG(dev, "Setting dpms mode %d on tmds encoder (output %d)\n",
+ 		     mode, nv_encoder->dcb->index);
+ 
+ 	nv04_dfp_update_backlight(encoder, mode);
+diff --git a/drivers/gpu/drm/nouveau/nv04_tv.c b/drivers/gpu/drm/nouveau/nv04_tv.c
+index 3eb605d..4de1fbe 100644
+--- a/drivers/gpu/drm/nouveau/nv04_tv.c
++++ b/drivers/gpu/drm/nouveau/nv04_tv.c
+@@ -69,7 +69,7 @@ static void nv04_tv_dpms(struct drm_encoder *encoder, int mode)
+ 	struct nv04_mode_state *state = &dev_priv->mode_reg;
+ 	uint8_t crtc1A;
+ 
+-	NV_INFO(dev, "Setting dpms mode %d on TV encoder (output %d)\n",
++	NV_DEBUG(dev, "Setting dpms mode %d on TV encoder (output %d)\n",
+ 		mode, nv_encoder->dcb->index);
+ 
+ 	state->pllsel &= ~(PLLSEL_TV_CRTC1_MASK | PLLSEL_TV_CRTC2_MASK);
+@@ -162,7 +162,7 @@ static void nv04_tv_commit(struct drm_encoder *encoder)
+ 
+ 	helper->dpms(encoder, DRM_MODE_DPMS_ON);
+ 
+-	NV_INFO(dev, "Output %s is running on CRTC %d using output %c\n",
++	NV_DEBUG(dev, "Output %s is running on CRTC %d using output %c\n",
+ 		      drm_get_connector_name(&nouveau_encoder_connector_get(nv_encoder)->base), nv_crtc->index,
+ 		      '@' + ffs(nv_encoder->dcb->or));
+ }
+diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
+index b61f490..ca94e23 100644
+--- a/drivers/gpu/drm/radeon/evergreen.c
++++ b/drivers/gpu/drm/radeon/evergreen.c
+@@ -1164,7 +1164,7 @@ void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *s
+ 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
+ 
+ 	for (i = 0; i < rdev->num_crtc; i++) {
+-		if (save->crtc_enabled) {
++		if (save->crtc_enabled[i]) {
+ 			tmp = RREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i]);
+ 			tmp &= ~EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE;
+ 			WREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i], tmp);
+diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+index 3ad3cc6..8165953 100644
+--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
++++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+@@ -650,6 +650,7 @@ static enum drm_connector_status radeon_legacy_primary_dac_detect(struct drm_enc
+ 	tmp |= RADEON_DAC_RANGE_CNTL_PS2 | RADEON_DAC_CMP_EN;
+ 	WREG32(RADEON_DAC_CNTL, tmp);
+ 
++	tmp = dac_macro_cntl;
+ 	tmp &= ~(RADEON_DAC_PDWN_R |
+ 		 RADEON_DAC_PDWN_G |
+ 		 RADEON_DAC_PDWN_B);
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
+index 3fa884d..27151f7 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
+@@ -306,7 +306,7 @@ void vmw_bo_pin(struct ttm_buffer_object *bo, bool pin)
+ 
+ 	BUG_ON(!atomic_read(&bo->reserved));
+ 	BUG_ON(old_mem_type != TTM_PL_VRAM &&
+-	       old_mem_type != VMW_PL_FLAG_GMR);
++	       old_mem_type != VMW_PL_GMR);
+ 
+ 	pl_flags = TTM_PL_FLAG_VRAM | VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED;
+ 	if (pin)
+diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+index 033fc96..b639536 100644
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+@@ -1048,6 +1048,11 @@ static void vmw_pm_complete(struct device *kdev)
+ 	struct drm_device *dev = pci_get_drvdata(pdev);
+ 	struct vmw_private *dev_priv = vmw_priv(dev);
+ 
++	mutex_lock(&dev_priv->hw_mutex);
++	vmw_write(dev_priv, SVGA_REG_ID, SVGA_ID_2);
++	(void) vmw_read(dev_priv, SVGA_REG_ID);
++	mutex_unlock(&dev_priv->hw_mutex);
++
+ 	/**
+ 	 * Reclaim 3d reference held by fbdev and potentially
+ 	 * start fifo.
+diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c
+index e5c699b..3899989 100644
+--- a/drivers/hid/hid-microsoft.c
++++ b/drivers/hid/hid-microsoft.c
+@@ -29,22 +29,30 @@
+ #define MS_RDESC		0x08
+ #define MS_NOGET		0x10
+ #define MS_DUPLICATE_USAGES	0x20
++#define MS_RDESC_3K		0x40
+ 
+-/*
+- * Microsoft Wireless Desktop Receiver (Model 1028) has
+- * 'Usage Min/Max' where it ought to have 'Physical Min/Max'
+- */
+ static __u8 *ms_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ 		unsigned int *rsize)
+ {
+ 	unsigned long quirks = (unsigned long)hid_get_drvdata(hdev);
+ 
++	/*
++	 * Microsoft Wireless Desktop Receiver (Model 1028) has
++	 * 'Usage Min/Max' where it ought to have 'Physical Min/Max'
++	 */
+ 	if ((quirks & MS_RDESC) && *rsize == 571 && rdesc[557] == 0x19 &&
+ 			rdesc[559] == 0x29) {
+ 		hid_info(hdev, "fixing up Microsoft Wireless Receiver Model 1028 report descriptor\n");
+ 		rdesc[557] = 0x35;
+ 		rdesc[559] = 0x45;
+ 	}
++	/* the same as above (s/usage/physical/) */
++	if ((quirks & MS_RDESC_3K) && *rsize == 106 &&
++			!memcmp((char []){ 0x19, 0x00, 0x29, 0xff },
++				&rdesc[94], 4)) {
++		rdesc[94] = 0x35;
++		rdesc[96] = 0x45;
++	}
+ 	return rdesc;
+ }
+ 
+@@ -193,7 +201,7 @@ static const struct hid_device_id ms_devices[] = {
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_USB),
+ 		.driver_data = MS_PRESENTER },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3K),
+-		.driver_data = MS_ERGONOMY },
++		.driver_data = MS_ERGONOMY | MS_RDESC_3K },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0),
+ 		.driver_data = MS_NOGET },
+ 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_MOUSE_4500),
+diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c
+index ceaec92..b6a3ce3 100644
+--- a/drivers/hwmon/w83627ehf.c
++++ b/drivers/hwmon/w83627ehf.c
+@@ -2015,6 +2015,7 @@ static int __devinit w83627ehf_probe(struct platform_device *pdev)
+ 	mutex_init(&data->lock);
+ 	mutex_init(&data->update_lock);
+ 	data->name = w83627ehf_device_names[sio_data->kind];
++	data->bank = 0xff;		/* Force initial bank selection */
+ 	platform_set_drvdata(pdev, data);
+ 
+ 	/* 627EHG and 627EHF have 10 voltage inputs; 627DHG and 667HG have 9 */
+diff --git a/drivers/input/touchscreen/tsc40.c b/drivers/input/touchscreen/tsc40.c
+index 29d5ed4..80d4610 100644
+--- a/drivers/input/touchscreen/tsc40.c
++++ b/drivers/input/touchscreen/tsc40.c
+@@ -107,7 +107,6 @@ static int tsc_connect(struct serio *serio, struct serio_driver *drv)
+ 	__set_bit(BTN_TOUCH, input_dev->keybit);
+ 	input_set_abs_params(ptsc->dev, ABS_X, 0, 0x3ff, 0, 0);
+ 	input_set_abs_params(ptsc->dev, ABS_Y, 0, 0x3ff, 0, 0);
+-	input_set_abs_params(ptsc->dev, ABS_PRESSURE, 0, 0, 0, 0);
+ 
+ 	serio_set_drvdata(serio, ptsc);
+ 
+diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
+index 11ddd838..69fc888 100644
+--- a/drivers/net/ethernet/marvell/sky2.c
++++ b/drivers/net/ethernet/marvell/sky2.c
+@@ -3060,8 +3060,10 @@ static irqreturn_t sky2_intr(int irq, void *dev_id)
+ 
+ 	/* Reading this mask interrupts as side effect */
+ 	status = sky2_read32(hw, B0_Y2_SP_ISRC2);
+-	if (status == 0 || status == ~0)
++	if (status == 0 || status == ~0) {
++		sky2_write32(hw, B0_Y2_SP_ICR, 2);
+ 		return IRQ_NONE;
++	}
+ 
+ 	prefetch(&hw->st_le[hw->st_idx]);
+ 
+diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
+index 4b43bc5..b8db4cd 100644
+--- a/drivers/net/ethernet/realtek/r8169.c
++++ b/drivers/net/ethernet/realtek/r8169.c
+@@ -77,7 +77,7 @@ static const int multicast_filter_limit = 32;
+ #define MAC_ADDR_LEN	6
+ 
+ #define MAX_READ_REQUEST_SHIFT	12
+-#define TX_DMA_BURST	6	/* Maximum PCI burst, '6' is 1024 */
++#define TX_DMA_BURST	7	/* Maximum PCI burst, '7' is unlimited */
+ #define SafeMtu		0x1c20	/* ... actually life sucks beyond ~7k */
+ #define InterFrameGap	0x03	/* 3 means InterFrameGap = the shortest one */
+ 
+@@ -3521,6 +3521,8 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
+ 	void __iomem *ioaddr = tp->mmio_addr;
+ 
+ 	switch (tp->mac_version) {
++	case RTL_GIGA_MAC_VER_25:
++	case RTL_GIGA_MAC_VER_26:
+ 	case RTL_GIGA_MAC_VER_29:
+ 	case RTL_GIGA_MAC_VER_30:
+ 	case RTL_GIGA_MAC_VER_32:
+@@ -6064,6 +6066,9 @@ static void rtl_set_rx_mode(struct net_device *dev)
+ 		mc_filter[1] = swab32(data);
+ 	}
+ 
++	if (tp->mac_version == RTL_GIGA_MAC_VER_35)
++		mc_filter[1] = mc_filter[0] = 0xffffffff;
++
+ 	RTL_W32(MAR0 + 4, mc_filter[1]);
+ 	RTL_W32(MAR0 + 0, mc_filter[0]);
+ 
+diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
+index b873b5d..dc53a8f 100644
+--- a/drivers/net/usb/usbnet.c
++++ b/drivers/net/usb/usbnet.c
+@@ -1156,6 +1156,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
+ 		usb_anchor_urb(urb, &dev->deferred);
+ 		/* no use to process more packets */
+ 		netif_stop_queue(net);
++		usb_put_urb(urb);
+ 		spin_unlock_irqrestore(&dev->txq.lock, flags);
+ 		netdev_dbg(dev->net, "Delaying transmission for resumption\n");
+ 		goto deferred;
+@@ -1297,6 +1298,8 @@ void usbnet_disconnect (struct usb_interface *intf)
+ 
+ 	cancel_work_sync(&dev->kevent);
+ 
++	usb_scuttle_anchored_urbs(&dev->deferred);
++
+ 	if (dev->driver_info->unbind)
+ 		dev->driver_info->unbind (dev, intf);
+ 
+diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
+index c59c592..18da100 100644
+--- a/drivers/net/wireless/ath/ath9k/xmit.c
++++ b/drivers/net/wireless/ath/ath9k/xmit.c
+@@ -288,6 +288,7 @@ static struct ath_buf *ath_tx_get_buffer(struct ath_softc *sc)
+ 	}
+ 
+ 	bf = list_first_entry(&sc->tx.txbuf, struct ath_buf, list);
++	bf->bf_next = NULL;
+ 	list_del(&bf->list);
+ 
+ 	spin_unlock_bh(&sc->tx.txbuflock);
+@@ -369,7 +370,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq,
+ 	u16 seq_st = 0, acked_cnt = 0, txfail_cnt = 0;
+ 	u32 ba[WME_BA_BMP_SIZE >> 5];
+ 	int isaggr, txfail, txpending, sendbar = 0, needreset = 0, nbad = 0;
+-	bool rc_update = true;
++	bool rc_update = true, isba;
+ 	struct ieee80211_tx_rate rates[4];
+ 	struct ath_frame_info *fi;
+ 	int nframes;
+@@ -407,13 +408,17 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq,
+ 	an = (struct ath_node *)sta->drv_priv;
+ 	tidno = ieee80211_get_qos_ctl(hdr)[0] & IEEE80211_QOS_CTL_TID_MASK;
+ 	tid = ATH_AN_2_TID(an, tidno);
++	isba = ts->ts_flags & ATH9K_TX_BA;
+ 
+ 	/*
+ 	 * The hardware occasionally sends a tx status for the wrong TID.
+ 	 * In this case, the BA status cannot be considered valid and all
+ 	 * subframes need to be retransmitted
++	 *
++	 * Only BlockAcks have a TID and therefore normal Acks cannot be
++	 * checked
+ 	 */
+-	if (tidno != ts->tid)
++	if (isba && tidno != ts->tid)
+ 		txok = false;
+ 
+ 	isaggr = bf_isaggr(bf);
+@@ -1710,6 +1715,7 @@ static void ath_tx_send_normal(struct ath_softc *sc, struct ath_txq *txq,
+ 	if (tid)
+ 		INCR(tid->seq_start, IEEE80211_SEQ_MAX);
+ 
++	bf->bf_next = NULL;
+ 	bf->bf_lastbf = bf;
+ 	ath_tx_fill_desc(sc, bf, txq, fi->framelen);
+ 	ath_tx_txqaddbuf(sc, txq, &bf_head, false);
+diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c
+index 1ba079d..fb19447 100644
+--- a/drivers/net/wireless/rt2x00/rt2800lib.c
++++ b/drivers/net/wireless/rt2x00/rt2800lib.c
+@@ -2141,7 +2141,7 @@ static int rt2800_get_gain_calibration_delta(struct rt2x00_dev *rt2x00dev)
+ 	/*
+ 	 * Check if temperature compensation is supported.
+ 	 */
+-	if (tssi_bounds[4] == 0xff)
++	if (tssi_bounds[4] == 0xff || step == 0xff)
+ 		return 0;
+ 
+ 	/*
+diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
+index f35cb10..6fa7222 100644
+--- a/drivers/target/iscsi/iscsi_target.c
++++ b/drivers/target/iscsi/iscsi_target.c
+@@ -3523,7 +3523,9 @@ restart:
+ 		 */
+ 		iscsit_thread_check_cpumask(conn, current, 1);
+ 
+-		schedule_timeout_interruptible(MAX_SCHEDULE_TIMEOUT);
++		wait_event_interruptible(conn->queues_wq,
++					 !iscsit_conn_all_queues_empty(conn) ||
++					 ts->status == ISCSI_THREAD_SET_RESET);
+ 
+ 		if ((ts->status == ISCSI_THREAD_SET_RESET) ||
+ 		     signal_pending(current))
+diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h
+index dae283f..bd8ce01 100644
+--- a/drivers/target/iscsi/iscsi_target_core.h
++++ b/drivers/target/iscsi/iscsi_target_core.h
+@@ -491,6 +491,7 @@ struct iscsi_tmr_req {
+ };
+ 
+ struct iscsi_conn {
++	wait_queue_head_t	queues_wq;
+ 	/* Authentication Successful for this connection */
+ 	u8			auth_complete;
+ 	/* State connection is currently in */
+diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
+index 2ec5339..eb0c9fe 100644
+--- a/drivers/target/iscsi/iscsi_target_login.c
++++ b/drivers/target/iscsi/iscsi_target_login.c
+@@ -44,6 +44,7 @@ extern spinlock_t sess_idr_lock;
+ 
+ static int iscsi_login_init_conn(struct iscsi_conn *conn)
+ {
++	init_waitqueue_head(&conn->queues_wq);
+ 	INIT_LIST_HEAD(&conn->conn_list);
+ 	INIT_LIST_HEAD(&conn->conn_cmd_list);
+ 	INIT_LIST_HEAD(&conn->immed_queue_list);
+diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
+index 99f2af3..e612722 100644
+--- a/drivers/target/iscsi/iscsi_target_util.c
++++ b/drivers/target/iscsi/iscsi_target_util.c
+@@ -659,7 +659,7 @@ void iscsit_add_cmd_to_immediate_queue(
+ 	atomic_set(&conn->check_immediate_queue, 1);
+ 	spin_unlock_bh(&conn->immed_queue_lock);
+ 
+-	wake_up_process(conn->thread_set->tx_thread);
++	wake_up(&conn->queues_wq);
+ }
+ 
+ struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *conn)
+@@ -733,7 +733,7 @@ void iscsit_add_cmd_to_response_queue(
+ 	atomic_inc(&cmd->response_queue_count);
+ 	spin_unlock_bh(&conn->response_queue_lock);
+ 
+-	wake_up_process(conn->thread_set->tx_thread);
++	wake_up(&conn->queues_wq);
+ }
+ 
+ struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *conn)
+@@ -787,6 +787,24 @@ static void iscsit_remove_cmd_from_response_queue(
+ 	}
+ }
+ 
++bool iscsit_conn_all_queues_empty(struct iscsi_conn *conn)
++{
++	bool empty;
++
++	spin_lock_bh(&conn->immed_queue_lock);
++	empty = list_empty(&conn->immed_queue_list);
++	spin_unlock_bh(&conn->immed_queue_lock);
++
++	if (!empty)
++		return empty;
++
++	spin_lock_bh(&conn->response_queue_lock);
++	empty = list_empty(&conn->response_queue_list);
++	spin_unlock_bh(&conn->response_queue_lock);
++
++	return empty;
++}
++
+ void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *conn)
+ {
+ 	struct iscsi_queue_req *qr, *qr_tmp;
+diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h
+index 835bf7d..cfac698 100644
+--- a/drivers/target/iscsi/iscsi_target_util.h
++++ b/drivers/target/iscsi/iscsi_target_util.h
+@@ -28,6 +28,7 @@ extern struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_
+ extern void iscsit_add_cmd_to_response_queue(struct iscsi_cmd *, struct iscsi_conn *, u8);
+ extern struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *);
+ extern void iscsit_remove_cmd_from_tx_queues(struct iscsi_cmd *, struct iscsi_conn *);
++extern bool iscsit_conn_all_queues_empty(struct iscsi_conn *);
+ extern void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *);
+ extern void iscsit_release_cmd(struct iscsi_cmd *);
+ extern void iscsit_free_cmd(struct iscsi_cmd *);
+diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
+index 0b01bfc..013b133 100644
+--- a/drivers/target/target_core_configfs.c
++++ b/drivers/target/target_core_configfs.c
+@@ -3205,7 +3205,8 @@ static int __init target_core_init_configfs(void)
+ 	if (ret < 0)
+ 		goto out;
+ 
+-	if (core_dev_setup_virtual_lun0() < 0)
++	ret = core_dev_setup_virtual_lun0();
++	if (ret < 0)
+ 		goto out;
+ 
+ 	return 0;
+diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
+index f8773ae..a0143a0 100644
+--- a/drivers/target/target_core_device.c
++++ b/drivers/target/target_core_device.c
+@@ -835,20 +835,20 @@ int se_dev_check_shutdown(struct se_device *dev)
+ 
+ u32 se_dev_align_max_sectors(u32 max_sectors, u32 block_size)
+ {
+-	u32 tmp, aligned_max_sectors;
++	u32 aligned_max_sectors;
++	u32 alignment;
+ 	/*
+ 	 * Limit max_sectors to a PAGE_SIZE aligned value for modern
+ 	 * transport_allocate_data_tasks() operation.
+ 	 */
+-	tmp = rounddown((max_sectors * block_size), PAGE_SIZE);
+-	aligned_max_sectors = (tmp / block_size);
+-	if (max_sectors != aligned_max_sectors) {
+-		printk(KERN_INFO "Rounding down aligned max_sectors from %u"
+-				" to %u\n", max_sectors, aligned_max_sectors);
+-		return aligned_max_sectors;
+-	}
++	alignment = max(1ul, PAGE_SIZE / block_size);
++	aligned_max_sectors = rounddown(max_sectors, alignment);
++
++	if (max_sectors != aligned_max_sectors)
++		pr_info("Rounding down aligned max_sectors from %u to %u\n",
++			max_sectors, aligned_max_sectors);
+ 
+-	return max_sectors;
++	return aligned_max_sectors;
+ }
+ 
+ void se_dev_set_default_attribs(
+diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
+index d481f80..43a38aa 100644
+--- a/drivers/usb/serial/mos7840.c
++++ b/drivers/usb/serial/mos7840.c
+@@ -2585,7 +2585,6 @@ error:
+ static void mos7840_disconnect(struct usb_serial *serial)
+ {
+ 	int i;
+-	unsigned long flags;
+ 	struct moschip_port *mos7840_port;
+ 	dbg("%s", " disconnect :entering..........");
+ 
+diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
+index 625890c..080b186 100644
+--- a/drivers/xen/gntdev.c
++++ b/drivers/xen/gntdev.c
+@@ -105,6 +105,21 @@ static void gntdev_print_maps(struct gntdev_priv *priv,
+ #endif
+ }
+ 
++static void gntdev_free_map(struct grant_map *map)
++{
++	if (map == NULL)
++		return;
++
++	if (map->pages)
++		free_xenballooned_pages(map->count, map->pages);
++	kfree(map->pages);
++	kfree(map->grants);
++	kfree(map->map_ops);
++	kfree(map->unmap_ops);
++	kfree(map->kmap_ops);
++	kfree(map);
++}
++
+ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
+ {
+ 	struct grant_map *add;
+@@ -142,12 +157,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
+ 	return add;
+ 
+ err:
+-	kfree(add->pages);
+-	kfree(add->grants);
+-	kfree(add->map_ops);
+-	kfree(add->unmap_ops);
+-	kfree(add->kmap_ops);
+-	kfree(add);
++	gntdev_free_map(add);
+ 	return NULL;
+ }
+ 
+@@ -196,17 +206,9 @@ static void gntdev_put_map(struct grant_map *map)
+ 	if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
+ 		notify_remote_via_evtchn(map->notify.event);
+ 
+-	if (map->pages) {
+-		if (!use_ptemod)
+-			unmap_grant_pages(map, 0, map->count);
+-
+-		free_xenballooned_pages(map->count, map->pages);
+-	}
+-	kfree(map->pages);
+-	kfree(map->grants);
+-	kfree(map->map_ops);
+-	kfree(map->unmap_ops);
+-	kfree(map);
++	if (map->pages && !use_ptemod)
++		unmap_grant_pages(map, 0, map->count);
++	gntdev_free_map(map);
+ }
+ 
+ /* ------------------------------------------------------------------ */
+diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
+index 72ddf23..b3522af 100644
+--- a/fs/cifs/cifsacl.c
++++ b/fs/cifs/cifsacl.c
+@@ -225,6 +225,13 @@ sid_to_str(struct cifs_sid *sidptr, char *sidstr)
+ }
+ 
+ static void
++cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
++{
++	memcpy(dst, src, sizeof(*dst));
++	dst->num_subauth = min_t(u8, src->num_subauth, NUM_SUBAUTHS);
++}
++
++static void
+ id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
+ 		struct cifs_sid_id **psidid, char *typestr)
+ {
+@@ -248,7 +255,7 @@ id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
+ 		}
+ 	}
+ 
+-	memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
++	cifs_copy_sid(&(*psidid)->sid, sidptr);
+ 	(*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
+ 	(*psidid)->refcount = 0;
+ 
+@@ -354,7 +361,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
+ 	 * any fields of the node after a reference is put .
+ 	 */
+ 	if (test_bit(SID_ID_MAPPED, &psidid->state)) {
+-		memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
++		cifs_copy_sid(ssid, &psidid->sid);
+ 		psidid->time = jiffies; /* update ts for accessing */
+ 		goto id_sid_out;
+ 	}
+@@ -370,14 +377,14 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
+ 		if (IS_ERR(sidkey)) {
+ 			rc = -EINVAL;
+ 			cFYI(1, "%s: Can't map and id to a SID", __func__);
++		} else if (sidkey->datalen < sizeof(struct cifs_sid)) {
++			rc = -EIO;
++			cFYI(1, "%s: Downcall contained malformed key "
++				"(datalen=%hu)", __func__, sidkey->datalen);
+ 		} else {
+ 			lsid = (struct cifs_sid *)sidkey->payload.data;
+-			memcpy(&psidid->sid, lsid,
+-				sidkey->datalen < sizeof(struct cifs_sid) ?
+-				sidkey->datalen : sizeof(struct cifs_sid));
+-			memcpy(ssid, &psidid->sid,
+-				sidkey->datalen < sizeof(struct cifs_sid) ?
+-				sidkey->datalen : sizeof(struct cifs_sid));
++			cifs_copy_sid(&psidid->sid, lsid);
++			cifs_copy_sid(ssid, &psidid->sid);
+ 			set_bit(SID_ID_MAPPED, &psidid->state);
+ 			key_put(sidkey);
+ 			kfree(psidid->sidstr);
+@@ -396,7 +403,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
+ 			return rc;
+ 		}
+ 		if (test_bit(SID_ID_MAPPED, &psidid->state))
+-			memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
++			cifs_copy_sid(ssid, &psidid->sid);
+ 		else
+ 			rc = -EINVAL;
+ 	}
+@@ -674,8 +681,6 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
+ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
+ 				struct cifs_ntsd *pnntsd, __u32 sidsoffset)
+ {
+-	int i;
+-
+ 	struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
+ 	struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
+ 
+@@ -691,26 +696,14 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
+ 	owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+ 				le32_to_cpu(pntsd->osidoffset));
+ 	nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
+-
+-	nowner_sid_ptr->revision = owner_sid_ptr->revision;
+-	nowner_sid_ptr->num_subauth = owner_sid_ptr->num_subauth;
+-	for (i = 0; i < 6; i++)
+-		nowner_sid_ptr->authority[i] = owner_sid_ptr->authority[i];
+-	for (i = 0; i < 5; i++)
+-		nowner_sid_ptr->sub_auth[i] = owner_sid_ptr->sub_auth[i];
++	cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);
+ 
+ 	/* copy group sid */
+ 	group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+ 				le32_to_cpu(pntsd->gsidoffset));
+ 	ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
+ 					sizeof(struct cifs_sid));
+-
+-	ngroup_sid_ptr->revision = group_sid_ptr->revision;
+-	ngroup_sid_ptr->num_subauth = group_sid_ptr->num_subauth;
+-	for (i = 0; i < 6; i++)
+-		ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
+-	for (i = 0; i < 5; i++)
+-		ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
++	cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);
+ 
+ 	return;
+ }
+@@ -1117,8 +1110,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
+ 				kfree(nowner_sid_ptr);
+ 				return rc;
+ 			}
+-			memcpy(owner_sid_ptr, nowner_sid_ptr,
+-					sizeof(struct cifs_sid));
++			cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
+ 			kfree(nowner_sid_ptr);
+ 			*aclflag = CIFS_ACL_OWNER;
+ 		}
+@@ -1136,8 +1128,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
+ 				kfree(ngroup_sid_ptr);
+ 				return rc;
+ 			}
+-			memcpy(group_sid_ptr, ngroup_sid_ptr,
+-					sizeof(struct cifs_sid));
++			cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
+ 			kfree(ngroup_sid_ptr);
+ 			*aclflag = CIFS_ACL_GROUP;
+ 		}
+diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
+index 1cfef9f..94afdfd 100644
+--- a/fs/ecryptfs/main.c
++++ b/fs/ecryptfs/main.c
+@@ -280,6 +280,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
+ 	char *fnek_src;
+ 	char *cipher_key_bytes_src;
+ 	char *fn_cipher_key_bytes_src;
++	u8 cipher_code;
+ 
+ 	*check_ruid = 0;
+ 
+@@ -421,6 +422,18 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
+ 	    && !fn_cipher_key_bytes_set)
+ 		mount_crypt_stat->global_default_fn_cipher_key_bytes =
+ 			mount_crypt_stat->global_default_cipher_key_size;
++
++	cipher_code = ecryptfs_code_for_cipher_string(
++		mount_crypt_stat->global_default_cipher_name,
++		mount_crypt_stat->global_default_cipher_key_size);
++	if (!cipher_code) {
++		ecryptfs_printk(KERN_ERR,
++				"eCryptfs doesn't support cipher: %s",
++				mount_crypt_stat->global_default_cipher_name);
++		rc = -EINVAL;
++		goto out;
++	}
++
+ 	mutex_lock(&key_tfm_list_mutex);
+ 	if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name,
+ 				 NULL)) {
+@@ -506,7 +519,6 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
+ 		goto out;
+ 	}
+ 
+-	s->s_flags = flags;
+ 	rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
+ 	if (rc)
+ 		goto out1;
+@@ -542,6 +554,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
+ 	}
+ 
+ 	ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
++
++	/**
++	 * Set the POSIX ACL flag based on whether they're enabled in the lower
++	 * mount. Force a read-only eCryptfs mount if the lower mount is ro.
++	 * Allow a ro eCryptfs mount even when the lower mount is rw.
++	 */
++	s->s_flags = flags & ~MS_POSIXACL;
++	s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
++
+ 	s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
+ 	s->s_blocksize = path.dentry->d_sb->s_blocksize;
+ 	s->s_magic = ECRYPTFS_SUPER_MAGIC;
+diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
+index a6e711a..ee02db5 100644
+--- a/fs/nfs/dns_resolve.c
++++ b/fs/nfs/dns_resolve.c
+@@ -213,7 +213,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
+ {
+ 	char buf1[NFS_DNS_HOSTNAME_MAXLEN+1];
+ 	struct nfs_dns_ent key, *item;
+-	unsigned long ttl;
++	unsigned int ttl;
+ 	ssize_t len;
+ 	int ret = -EINVAL;
+ 
+@@ -236,7 +236,8 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
+ 	key.namelen = len;
+ 	memset(&key.h, 0, sizeof(key.h));
+ 
+-	ttl = get_expiry(&buf);
++	if (get_uint(&buf, &ttl) < 0)
++		goto out;
+ 	if (ttl == 0)
+ 		goto out;
+ 	key.h.expiry_time = ttl + seconds_since_boot();
+diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
+index 68b3f20..c5af878 100644
+--- a/fs/nfs/internal.h
++++ b/fs/nfs/internal.h
+@@ -274,8 +274,9 @@ extern void nfs_sb_active(struct super_block *sb);
+ extern void nfs_sb_deactive(struct super_block *sb);
+ 
+ /* namespace.c */
++#define NFS_PATH_CANONICAL 1
+ extern char *nfs_path(char **p, struct dentry *dentry,
+-		      char *buffer, ssize_t buflen);
++		      char *buffer, ssize_t buflen, unsigned flags);
+ extern struct vfsmount *nfs_d_automount(struct path *path);
+ #ifdef CONFIG_NFS_V4
+ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
+@@ -364,7 +365,7 @@ static inline char *nfs_devname(struct dentry *dentry,
+ 				char *buffer, ssize_t buflen)
+ {
+ 	char *dummy;
+-	return nfs_path(&dummy, dentry, buffer, buflen);
++	return nfs_path(&dummy, dentry, buffer, buflen, NFS_PATH_CANONICAL);
+ }
+ 
+ /*
+diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
+index d4c2d6b..3d93216 100644
+--- a/fs/nfs/mount_clnt.c
++++ b/fs/nfs/mount_clnt.c
+@@ -181,7 +181,7 @@ int nfs_mount(struct nfs_mount_request *info)
+ 	else
+ 		msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT];
+ 
+-	status = rpc_call_sync(mnt_clnt, &msg, 0);
++	status = rpc_call_sync(mnt_clnt, &msg, RPC_TASK_SOFT|RPC_TASK_TIMEOUT);
+ 	rpc_shutdown_client(mnt_clnt);
+ 
+ 	if (status < 0)
+diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
+index 8102391..a86873e 100644
+--- a/fs/nfs/namespace.c
++++ b/fs/nfs/namespace.c
+@@ -37,6 +37,7 @@ static struct vfsmount *nfs_do_submount(struct dentry *dentry,
+  * @dentry - pointer to dentry
+  * @buffer - result buffer
+  * @buflen - length of buffer
++ * @flags - options (see below)
+  *
+  * Helper function for constructing the server pathname
+  * by arbitrary hashed dentry.
+@@ -44,8 +45,14 @@ static struct vfsmount *nfs_do_submount(struct dentry *dentry,
+  * This is mainly for use in figuring out the path on the
+  * server side when automounting on top of an existing partition
+  * and in generating /proc/mounts and friends.
++ *
++ * Supported flags:
++ * NFS_PATH_CANONICAL: ensure there is exactly one slash after
++ *		       the original device (export) name
++ *		       (if unset, the original name is returned verbatim)
+  */
+-char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen)
++char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen,
++	       unsigned flags)
+ {
+ 	char *end;
+ 	int namelen;
+@@ -78,7 +85,7 @@ rename_retry:
+ 		rcu_read_unlock();
+ 		goto rename_retry;
+ 	}
+-	if (*end != '/') {
++	if ((flags & NFS_PATH_CANONICAL) && *end != '/') {
+ 		if (--buflen < 0) {
+ 			spin_unlock(&dentry->d_lock);
+ 			rcu_read_unlock();
+@@ -95,9 +102,11 @@ rename_retry:
+ 		return end;
+ 	}
+ 	namelen = strlen(base);
+-	/* Strip off excess slashes in base string */
+-	while (namelen > 0 && base[namelen - 1] == '/')
+-		namelen--;
++	if (flags & NFS_PATH_CANONICAL) {
++		/* Strip off excess slashes in base string */
++		while (namelen > 0 && base[namelen - 1] == '/')
++			namelen--;
++	}
+ 	buflen -= namelen;
+ 	if (buflen < 0) {
+ 		spin_unlock(&dentry->d_lock);
+diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
+index bb80c49..96f2b67 100644
+--- a/fs/nfs/nfs4namespace.c
++++ b/fs/nfs/nfs4namespace.c
+@@ -57,7 +57,8 @@ Elong:
+ static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
+ {
+ 	char *limit;
+-	char *path = nfs_path(&limit, dentry, buffer, buflen);
++	char *path = nfs_path(&limit, dentry, buffer, buflen,
++			      NFS_PATH_CANONICAL);
+ 	if (!IS_ERR(path)) {
+ 		char *colon = strchr(path, ':');
+ 		if (colon && colon < limit)
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index 61796a40..864b831 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -303,8 +303,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
+ 			dprintk("%s ERROR: %d Reset session\n", __func__,
+ 				errorcode);
+ 			nfs4_schedule_session_recovery(clp->cl_session);
+-			exception->retry = 1;
+-			break;
++			goto wait_on_recovery;
+ #endif /* defined(CONFIG_NFS_V4_1) */
+ 		case -NFS4ERR_FILE_OPEN:
+ 			if (exception->timeout > HZ) {
+@@ -1464,9 +1463,11 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
+ 	data->timestamp = jiffies;
+ 	if (nfs4_setup_sequence(data->o_arg.server,
+ 				&data->o_arg.seq_args,
+-				&data->o_res.seq_res, 1, task))
+-		return;
+-	rpc_call_start(task);
++				&data->o_res.seq_res,
++				1, task) != 0)
++		nfs_release_seqid(data->o_arg.seqid);
++	else
++		rpc_call_start(task);
+ 	return;
+ unlock_no_action:
+ 	rcu_read_unlock();
+@@ -2046,9 +2047,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
+ 	calldata->timestamp = jiffies;
+ 	if (nfs4_setup_sequence(NFS_SERVER(calldata->inode),
+ 				&calldata->arg.seq_args, &calldata->res.seq_res,
+-				1, task))
+-		return;
+-	rpc_call_start(task);
++				1, task) != 0)
++		nfs_release_seqid(calldata->arg.seqid);
++	else
++		rpc_call_start(task);
+ }
+ 
+ static const struct rpc_call_ops nfs4_close_ops = {
+@@ -4148,6 +4150,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
+ 			if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
+ 				rpc_restart_call_prepare(task);
+ 	}
++	nfs_release_seqid(calldata->arg.seqid);
+ }
+ 
+ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
+@@ -4164,9 +4167,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
+ 	calldata->timestamp = jiffies;
+ 	if (nfs4_setup_sequence(calldata->server,
+ 				&calldata->arg.seq_args,
+-				&calldata->res.seq_res, 1, task))
+-		return;
+-	rpc_call_start(task);
++				&calldata->res.seq_res,
++				1, task) != 0)
++		nfs_release_seqid(calldata->arg.seqid);
++	else
++		rpc_call_start(task);
+ }
+ 
+ static const struct rpc_call_ops nfs4_locku_ops = {
+@@ -4310,7 +4315,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
+ 	/* Do we need to do an open_to_lock_owner? */
+ 	if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
+ 		if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0)
+-			return;
++			goto out_release_lock_seqid;
+ 		data->arg.open_stateid = &state->stateid;
+ 		data->arg.new_lock_owner = 1;
+ 		data->res.open_seqid = data->arg.open_seqid;
+@@ -4319,10 +4324,15 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
+ 	data->timestamp = jiffies;
+ 	if (nfs4_setup_sequence(data->server,
+ 				&data->arg.seq_args,
+-				&data->res.seq_res, 1, task))
++				&data->res.seq_res,
++				1, task) == 0) {
++		rpc_call_start(task);
+ 		return;
+-	rpc_call_start(task);
+-	dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
++	}
++	nfs_release_seqid(data->arg.open_seqid);
++out_release_lock_seqid:
++	nfs_release_seqid(data->arg.lock_seqid);
++	dprintk("%s: done!, ret = %d\n", __func__, task->tk_status);
+ }
+ 
+ static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata)
+diff --git a/fs/nfs/super.c b/fs/nfs/super.c
+index e42d6f6..8150344 100644
+--- a/fs/nfs/super.c
++++ b/fs/nfs/super.c
+@@ -768,7 +768,7 @@ static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
+ 	int err = 0;
+ 	if (!page)
+ 		return -ENOMEM;
+-	devname = nfs_path(&dummy, mnt->mnt_root, page, PAGE_SIZE);
++	devname = nfs_path(&dummy, mnt->mnt_root, page, PAGE_SIZE, 0);
+ 	if (IS_ERR(devname))
+ 		err = PTR_ERR(devname);
+ 	else
+diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
+index 5f312ab..a0205fc 100644
+--- a/fs/nfsd/export.c
++++ b/fs/nfsd/export.c
+@@ -401,7 +401,7 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)
+ 	int migrated, i, err;
+ 
+ 	/* listsize */
+-	err = get_int(mesg, &fsloc->locations_count);
++	err = get_uint(mesg, &fsloc->locations_count);
+ 	if (err)
+ 		return err;
+ 	if (fsloc->locations_count > MAX_FS_LOCATIONS)
+@@ -459,7 +459,7 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)
+ 		return -EINVAL;
+ 
+ 	for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) {
+-		err = get_int(mesg, &f->pseudoflavor);
++		err = get_uint(mesg, &f->pseudoflavor);
+ 		if (err)
+ 			return err;
+ 		/*
+@@ -468,7 +468,7 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)
+ 		 * problem at export time instead of when a client fails
+ 		 * to authenticate.
+ 		 */
+-		err = get_int(mesg, &f->flags);
++		err = get_uint(mesg, &f->flags);
+ 		if (err)
+ 			return err;
+ 		/* Only some flags are allowed to differ between flavors: */
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index f35794b..a506360 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -21,6 +21,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
+ 			if ((old->path.mnt == new->path.mnt) &&
+ 			    (old->path.dentry == new->path.dentry))
+ 				return true;
++			break;
+ 		case (FSNOTIFY_EVENT_NONE):
+ 			return true;
+ 		default:
+diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
+index 4f5d0ce..86ca506 100644
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -3514,7 +3514,7 @@ xlog_do_recovery_pass(
+ 				 *   - order is important.
+ 				 */
+ 				error = xlog_bread_offset(log, 0,
+-						bblks - split_bblks, hbp,
++						bblks - split_bblks, dbp,
+ 						offset + BBTOB(split_bblks));
+ 				if (error)
+ 					goto bread_err2;
+diff --git a/include/linux/if_link.h b/include/linux/if_link.h
+index c52d4b5..4b24ff4 100644
+--- a/include/linux/if_link.h
++++ b/include/linux/if_link.h
+@@ -137,6 +137,7 @@ enum {
+ 	IFLA_AF_SPEC,
+ 	IFLA_GROUP,		/* Group the device belongs to */
+ 	IFLA_NET_NS_FD,
++	IFLA_EXT_MASK,		/* Extended info mask, VFs, etc */
+ 	__IFLA_MAX
+ };
+ 
+diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
+index 8e872ea..577592e 100644
+--- a/include/linux/rtnetlink.h
++++ b/include/linux/rtnetlink.h
+@@ -602,6 +602,9 @@ struct tcamsg {
+ #define TCA_ACT_TAB 1 /* attr type must be >=1 */	
+ #define TCAA_MAX 1
+ 
++/* New extended info filters for IFLA_EXT_MASK */
++#define RTEXT_FILTER_VF		(1 << 0)
++
+ /* End of information exported to user level */
+ 
+ #ifdef __KERNEL__
+diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
+index 5efd8ce..f0c6ab5 100644
+--- a/include/linux/sunrpc/cache.h
++++ b/include/linux/sunrpc/cache.h
+@@ -224,6 +224,22 @@ static inline int get_int(char **bpp, int *anint)
+ 	return 0;
+ }
+ 
++static inline int get_uint(char **bpp, unsigned int *anint)
++{
++	char buf[50];
++	int len = qword_get(bpp, buf, sizeof(buf));
++
++	if (len < 0)
++		return -EINVAL;
++	if (len == 0)
++		return -ENOENT;
++
++	if (kstrtouint(buf, 0, anint))
++		return -EINVAL;
++
++	return 0;
++}
++
+ /*
+  * timestamps kept in the cache are expressed in seconds
+  * since boot.  This is the best for measuring differences in
+diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
+index 95852e3..19d632d 100644
+--- a/include/net/cfg80211.h
++++ b/include/net/cfg80211.h
+@@ -2431,6 +2431,15 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb);
+ unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc);
+ 
+ /**
++ * ieee80211_get_mesh_hdrlen - get mesh extension header length
++ * @meshhdr: the mesh extension header, only the flags field
++ *	(first byte) will be accessed
++ * Returns the length of the extension header, which is always at
++ * least 6 bytes and at most 18 if address 5 and 6 are present.
++ */
++unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
++
++/**
+  * DOC: Data path helpers
+  *
+  * In addition to generic utilities, cfg80211 also offers
+diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
+index 678f1ff..3702939 100644
+--- a/include/net/rtnetlink.h
++++ b/include/net/rtnetlink.h
+@@ -6,7 +6,7 @@
+ 
+ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, void *);
+ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
+-typedef u16 (*rtnl_calcit_func)(struct sk_buff *);
++typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *);
+ 
+ extern int	__rtnl_register(int protocol, int msgtype,
+ 				rtnl_doit_func, rtnl_dumpit_func,
+diff --git a/include/sound/core.h b/include/sound/core.h
+index 3be5ab7..222f11e 100644
+--- a/include/sound/core.h
++++ b/include/sound/core.h
+@@ -132,6 +132,7 @@ struct snd_card {
+ 	int shutdown;			/* this card is going down */
+ 	int free_on_last_close;		/* free in context of file_release */
+ 	wait_queue_head_t shutdown_sleep;
++	atomic_t refcount;		/* refcount for disconnection */
+ 	struct device *dev;		/* device assigned to this card */
+ 	struct device *card_dev;	/* cardX object for sysfs */
+ 
+@@ -189,6 +190,7 @@ struct snd_minor {
+ 	const struct file_operations *f_ops;	/* file operations */
+ 	void *private_data;		/* private data for f_ops->open */
+ 	struct device *dev;		/* device for sysfs */
++	struct snd_card *card_ptr;	/* assigned card instance */
+ };
+ 
+ /* return a device pointer linked to each sound device as a parent */
+@@ -295,6 +297,7 @@ int snd_card_info_done(void);
+ int snd_component_add(struct snd_card *card, const char *component);
+ int snd_card_file_add(struct snd_card *card, struct file *file);
+ int snd_card_file_remove(struct snd_card *card, struct file *file);
++void snd_card_unref(struct snd_card *card);
+ 
+ #define snd_card_set_dev(card, devptr) ((card)->dev = (devptr))
+ 
+diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h
+index 92f1a79..348c4fe 100644
+--- a/include/trace/events/xen.h
++++ b/include/trace/events/xen.h
+@@ -377,6 +377,14 @@ DECLARE_EVENT_CLASS(xen_mmu_pgd,
+ DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_pin);
+ DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_unpin);
+ 
++TRACE_EVENT(xen_mmu_flush_tlb_all,
++	    TP_PROTO(int x),
++	    TP_ARGS(x),
++	    TP_STRUCT__entry(__array(char, x, 0)),
++	    TP_fast_assign((void)x),
++	    TP_printk("%s", "")
++	);
++
+ TRACE_EVENT(xen_mmu_flush_tlb,
+ 	    TP_PROTO(int x),
+ 	    TP_ARGS(x),
+diff --git a/kernel/module.c b/kernel/module.c
+index 6c8fa34..65362d9 100644
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -2193,15 +2193,17 @@ static void layout_symtab(struct module *mod, struct load_info *info)
+ 
+ 	src = (void *)info->hdr + symsect->sh_offset;
+ 	nsrc = symsect->sh_size / sizeof(*src);
+-	for (ndst = i = 1; i < nsrc; ++i, ++src)
+-		if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
+-			unsigned int j = src->st_name;
++	for (ndst = i = 0; i < nsrc; i++) {
++		if (i == 0 ||
++		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
++			unsigned int j = src[i].st_name;
+ 
+ 			while (!__test_and_set_bit(j, info->strmap)
+ 			       && info->strtab[j])
+ 				++j;
+ 			++ndst;
+ 		}
++	}
+ 
+ 	/* Append room for core symbols at end of core part. */
+ 	info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
+@@ -2238,14 +2240,14 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
+ 
+ 	mod->core_symtab = dst = mod->module_core + info->symoffs;
+ 	src = mod->symtab;
+-	*dst = *src;
+-	for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
+-		if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum))
+-			continue;
+-		dst[ndst] = *src;
+-		dst[ndst].st_name = bitmap_weight(info->strmap,
+-						  dst[ndst].st_name);
+-		++ndst;
++	for (ndst = i = 0; i < mod->num_symtab; i++) {
++		if (i == 0 ||
++		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
++			dst[ndst] = src[i];
++			dst[ndst].st_name = bitmap_weight(info->strmap,
++							  dst[ndst].st_name);
++			++ndst;
++		}
+ 	}
+ 	mod->core_num_syms = ndst;
+ 
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 86eb848..313381c 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -3015,6 +3015,8 @@ static int kswapd(void *p)
+ 						&balanced_classzone_idx);
+ 		}
+ 	}
++
++	current->reclaim_state = NULL;
+ 	return 0;
+ }
+ 
+diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
+index 1fb1aec..aa12649 100644
+--- a/net/bluetooth/hci_conn.c
++++ b/net/bluetooth/hci_conn.c
+@@ -642,8 +642,10 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
+ {
+ 	BT_DBG("conn %p", conn);
+ 
++#ifdef CONFIG_BT_L2CAP
+ 	if (conn->type == LE_LINK)
+ 		return smp_conn_security(conn, sec_level);
++#endif
+ 
+ 	/* For sdp we don't need the link key. */
+ 	if (sec_level == BT_SECURITY_SDP)
+diff --git a/net/core/dev.c b/net/core/dev.c
+index f500a69..480be72 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1633,7 +1633,7 @@ static inline int deliver_skb(struct sk_buff *skb,
+ 
+ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
+ {
+-	if (ptype->af_packet_priv == NULL)
++	if (!ptype->af_packet_priv || !skb->sk)
+ 		return false;
+ 
+ 	if (ptype->id_match)
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 0cf604b..5229c7f 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -60,7 +60,6 @@ struct rtnl_link {
+ };
+ 
+ static DEFINE_MUTEX(rtnl_mutex);
+-static u16 min_ifinfo_dump_size;
+ 
+ void rtnl_lock(void)
+ {
+@@ -727,10 +726,11 @@ static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
+ }
+ 
+ /* All VF info */
+-static inline int rtnl_vfinfo_size(const struct net_device *dev)
++static inline int rtnl_vfinfo_size(const struct net_device *dev,
++				   u32 ext_filter_mask)
+ {
+-	if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
+-
++	if (dev->dev.parent && dev_is_pci(dev->dev.parent) &&
++	    (ext_filter_mask & RTEXT_FILTER_VF)) {
+ 		int num_vfs = dev_num_vf(dev->dev.parent);
+ 		size_t size = nla_total_size(sizeof(struct nlattr));
+ 		size += nla_total_size(num_vfs * sizeof(struct nlattr));
+@@ -769,7 +769,8 @@ static size_t rtnl_port_size(const struct net_device *dev)
+ 		return port_self_size;
+ }
+ 
+-static noinline size_t if_nlmsg_size(const struct net_device *dev)
++static noinline size_t if_nlmsg_size(const struct net_device *dev,
++				     u32 ext_filter_mask)
+ {
+ 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ 	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+@@ -787,8 +788,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
+ 	       + nla_total_size(4) /* IFLA_MASTER */
+ 	       + nla_total_size(1) /* IFLA_OPERSTATE */
+ 	       + nla_total_size(1) /* IFLA_LINKMODE */
+-	       + nla_total_size(4) /* IFLA_NUM_VF */
+-	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
++	       + nla_total_size(ext_filter_mask
++			        & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
++	       + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
+ 	       + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ 	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ 	       + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
+@@ -871,7 +873,7 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
+ 
+ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+ 			    int type, u32 pid, u32 seq, u32 change,
+-			    unsigned int flags)
++			    unsigned int flags, u32 ext_filter_mask)
+ {
+ 	struct ifinfomsg *ifm;
+ 	struct nlmsghdr *nlh;
+@@ -944,10 +946,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+ 		goto nla_put_failure;
+ 	copy_rtnl_link_stats64(nla_data(attr), stats);
+ 
+-	if (dev->dev.parent)
++	if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF))
+ 		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
+ 
+-	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
++	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent
++	    && (ext_filter_mask & RTEXT_FILTER_VF)) {
+ 		int i;
+ 
+ 		struct nlattr *vfinfo, *vf;
+@@ -1051,6 +1054,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+ 	struct net_device *dev;
+ 	struct hlist_head *head;
+ 	struct hlist_node *node;
++	struct nlattr *tb[IFLA_MAX+1];
++	u32 ext_filter_mask = 0;
+ 
+ 	s_h = cb->args[0];
+ 	s_idx = cb->args[1];
+@@ -1058,6 +1063,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+ 	rcu_read_lock();
+ 	cb->seq = net->dev_base_seq;
+ 
++	if (nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
++			ifla_policy) >= 0) {
++
++		if (tb[IFLA_EXT_MASK])
++			ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
++	}
++
+ 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ 		idx = 0;
+ 		head = &net->dev_index_head[h];
+@@ -1067,7 +1079,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+ 			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+ 					     NETLINK_CB(cb->skb).pid,
+ 					     cb->nlh->nlmsg_seq, 0,
+-					     NLM_F_MULTI) <= 0)
++					     NLM_F_MULTI,
++					     ext_filter_mask) <= 0)
+ 				goto out;
+ 
+ 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+@@ -1103,6 +1116,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
+ 	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
+ 	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
+ 	[IFLA_AF_SPEC]		= { .type = NLA_NESTED },
++	[IFLA_EXT_MASK]		= { .type = NLA_U32 },
+ };
+ EXPORT_SYMBOL(ifla_policy);
+ 
+@@ -1845,6 +1859,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ 	struct net_device *dev = NULL;
+ 	struct sk_buff *nskb;
+ 	int err;
++	u32 ext_filter_mask = 0;
+ 
+ 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+ 	if (err < 0)
+@@ -1853,6 +1868,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ 	if (tb[IFLA_IFNAME])
+ 		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+ 
++	if (tb[IFLA_EXT_MASK])
++		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
++
+ 	ifm = nlmsg_data(nlh);
+ 	if (ifm->ifi_index > 0)
+ 		dev = __dev_get_by_index(net, ifm->ifi_index);
+@@ -1864,12 +1882,12 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ 	if (dev == NULL)
+ 		return -ENODEV;
+ 
+-	nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
++	nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
+ 	if (nskb == NULL)
+ 		return -ENOBUFS;
+ 
+ 	err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid,
+-			       nlh->nlmsg_seq, 0, 0);
++			       nlh->nlmsg_seq, 0, 0, ext_filter_mask);
+ 	if (err < 0) {
+ 		/* -EMSGSIZE implies BUG in if_nlmsg_size */
+ 		WARN_ON(err == -EMSGSIZE);
+@@ -1880,8 +1898,32 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ 	return err;
+ }
+ 
+-static u16 rtnl_calcit(struct sk_buff *skb)
++static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
+ {
++	struct net *net = sock_net(skb->sk);
++	struct net_device *dev;
++	struct nlattr *tb[IFLA_MAX+1];
++	u32 ext_filter_mask = 0;
++	u16 min_ifinfo_dump_size = 0;
++
++	if (nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
++			ifla_policy) >= 0) {
++		if (tb[IFLA_EXT_MASK])
++			ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
++	}
++
++	if (!ext_filter_mask)
++		return NLMSG_GOODSIZE;
++	/*
++	 * traverse the list of net devices and compute the minimum
++	 * buffer size based upon the filter mask.
++	 */
++	list_for_each_entry(dev, &net->dev_base_head, dev_list) {
++		min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
++					     if_nlmsg_size(dev,
++						           ext_filter_mask));
++	}
++
+ 	return min_ifinfo_dump_size;
+ }
+ 
+@@ -1916,13 +1958,11 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
+ 	int err = -ENOBUFS;
+ 	size_t if_info_size;
+ 
+-	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL);
++	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL);
+ 	if (skb == NULL)
+ 		goto errout;
+ 
+-	min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size);
+-
+-	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0);
++	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0);
+ 	if (err < 0) {
+ 		/* -EMSGSIZE implies BUG in if_nlmsg_size() */
+ 		WARN_ON(err == -EMSGSIZE);
+@@ -1980,7 +2020,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+ 			return -EOPNOTSUPP;
+ 		calcit = rtnl_get_calcit(family, type);
+ 		if (calcit)
+-			min_dump_alloc = calcit(skb);
++			min_dump_alloc = calcit(skb, nlh);
+ 
+ 		__rtnl_unlock();
+ 		rtnl = net->rtnl;
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 7397ad8..52edbb8 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -481,14 +481,12 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+ 			 !tp->urg_data ||
+ 			 before(tp->urg_seq, tp->copied_seq) ||
+ 			 !before(tp->urg_seq, tp->rcv_nxt)) {
+-			struct sk_buff *skb;
+ 
+ 			answ = tp->rcv_nxt - tp->copied_seq;
+ 
+-			/* Subtract 1, if FIN is in queue. */
+-			skb = skb_peek_tail(&sk->sk_receive_queue);
+-			if (answ && skb)
+-				answ -= tcp_hdr(skb)->fin;
++			/* Subtract 1, if FIN was received */
++			if (answ && sock_flag(sk, SOCK_DONE))
++				answ--;
+ 		} else
+ 			answ = tp->urg_seq - tp->copied_seq;
+ 		release_sock(sk);
+diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
+index 813b43a..834857f 100644
+--- a/net/ipv4/tcp_illinois.c
++++ b/net/ipv4/tcp_illinois.c
+@@ -313,11 +313,13 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
+ 			.tcpv_rttcnt = ca->cnt_rtt,
+ 			.tcpv_minrtt = ca->base_rtt,
+ 		};
+-		u64 t = ca->sum_rtt;
+ 
+-		do_div(t, ca->cnt_rtt);
+-		info.tcpv_rtt = t;
++		if (info.tcpv_rttcnt > 0) {
++			u64 t = ca->sum_rtt;
+ 
++			do_div(t, info.tcpv_rttcnt);
++			info.tcpv_rtt = t;
++		}
+ 		nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+ 	}
+ }
+diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
+index 0cb78d7..9ffc37f 100644
+--- a/net/ipv6/ndisc.c
++++ b/net/ipv6/ndisc.c
+@@ -606,7 +606,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
+ {
+ 	struct inet6_dev *idev;
+ 	struct inet6_ifaddr *ifa;
+-	struct in6_addr mcaddr;
++	struct in6_addr mcaddr = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+ 
+ 	idev = in6_dev_get(dev);
+ 	if (!idev)
+@@ -614,7 +614,6 @@ static void ndisc_send_unsol_na(struct net_device *dev)
+ 
+ 	read_lock_bh(&idev->lock);
+ 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
+-		addrconf_addr_solict_mult(&ifa->addr, &mcaddr);
+ 		ndisc_send_na(dev, NULL, &mcaddr, &ifa->addr,
+ 			      /*router=*/ !!idev->cnf.forwarding,
+ 			      /*solicited=*/ false, /*override=*/ true,
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 488a1b7..19724bd 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -185,7 +185,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
+ };
+ 
+ static const u32 ip6_template_metrics[RTAX_MAX] = {
+-	[RTAX_HOPLIMIT - 1] = 255,
++	[RTAX_HOPLIMIT - 1] = 0,
+ };
+ 
+ static struct rt6_info ip6_null_entry_template = {
+@@ -1097,7 +1097,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
+ 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+ 	rt->rt6i_dst.plen = 128;
+ 	rt->rt6i_idev     = idev;
+-	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
++	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
+ 
+ 	spin_lock_bh(&icmp6_dst_lock);
+ 	rt->dst.next = icmp6_dst_gc_list;
+diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
+index 2cef50b..64164fb 100644
+--- a/net/l2tp/l2tp_eth.c
++++ b/net/l2tp/l2tp_eth.c
+@@ -269,6 +269,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
+ 
+ out_del_dev:
+ 	free_netdev(dev);
++	spriv->dev = NULL;
+ out_del_session:
+ 	l2tp_session_delete(session);
+ out:
+diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
+index 3ece106..8c7364b 100644
+--- a/net/mac80211/ibss.c
++++ b/net/mac80211/ibss.c
+@@ -940,7 +940,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
+ 	sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH;
+ 	sdata->u.ibss.ibss_join_req = jiffies;
+ 
+-	memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN);
++	memcpy(sdata->u.ibss.ssid, params->ssid, params->ssid_len);
+ 	sdata->u.ibss.ssid_len = params->ssid_len;
+ 
+ 	mutex_unlock(&sdata->u.ibss.mtx);
+diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
+index cda4875..cd6cbdb 100644
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -515,6 +515,11 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
+ 
+ 		if (ieee80211_is_action(hdr->frame_control)) {
+ 			u8 category;
++
++			/* make sure category field is present */
++			if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE)
++				return RX_DROP_MONITOR;
++
+ 			mgmt = (struct ieee80211_mgmt *)hdr;
+ 			category = mgmt->u.action.category;
+ 			if (category != WLAN_CATEGORY_MESH_ACTION &&
+@@ -854,14 +859,16 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
+ 		     (!rx->sta || !test_sta_flag(rx->sta, WLAN_STA_ASSOC)))) {
+ 		if (rx->sta && rx->sta->dummy &&
+ 		    ieee80211_is_data_present(hdr->frame_control)) {
+-			u16 ethertype;
+-			u8 *payload;
+-
+-			payload = rx->skb->data +
+-				ieee80211_hdrlen(hdr->frame_control);
+-			ethertype = (payload[6] << 8) | payload[7];
+-			if (cpu_to_be16(ethertype) ==
+-			    rx->sdata->control_port_protocol)
++			unsigned int hdrlen;
++			__be16 ethertype;
++
++			hdrlen = ieee80211_hdrlen(hdr->frame_control);
++
++			if (rx->skb->len < hdrlen + 8)
++				return RX_DROP_MONITOR;
++
++			skb_copy_bits(rx->skb, hdrlen + 6, &ethertype, 2);
++			if (ethertype == rx->sdata->control_port_protocol)
+ 				return RX_CONTINUE;
+ 		}
+ 		return RX_DROP_MONITOR;
+@@ -1449,11 +1456,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
+ 
+ 	hdr = (struct ieee80211_hdr *)rx->skb->data;
+ 	fc = hdr->frame_control;
++
++	if (ieee80211_is_ctl(fc))
++		return RX_CONTINUE;
++
+ 	sc = le16_to_cpu(hdr->seq_ctrl);
+ 	frag = sc & IEEE80211_SCTL_FRAG;
+ 
+ 	if (likely((!ieee80211_has_morefrags(fc) && frag == 0) ||
+-		   (rx->skb)->len < 24 ||
+ 		   is_multicast_ether_addr(hdr->addr1))) {
+ 		/* not fragmented */
+ 		goto out;
+@@ -1887,6 +1897,20 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
+ 
+ 	hdr = (struct ieee80211_hdr *) skb->data;
+ 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
++
++	/* make sure fixed part of mesh header is there, also checks skb len */
++	if (!pskb_may_pull(rx->skb, hdrlen + 6))
++		return RX_DROP_MONITOR;
++
++	mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
++
++	/* make sure full mesh header is there, also checks skb len */
++	if (!pskb_may_pull(rx->skb,
++			   hdrlen + ieee80211_get_mesh_hdrlen(mesh_hdr)))
++		return RX_DROP_MONITOR;
++
++	/* reload pointers */
++	hdr = (struct ieee80211_hdr *) skb->data;
+ 	mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
+ 
+ 	/* frame is in RMC, don't forward */
+@@ -1895,7 +1919,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
+ 	    mesh_rmc_check(hdr->addr3, mesh_hdr, rx->sdata))
+ 		return RX_DROP_MONITOR;
+ 
+-	if (!ieee80211_is_data(hdr->frame_control))
++	if (!ieee80211_is_data(hdr->frame_control) ||
++	    !(status->rx_flags & IEEE80211_RX_RA_MATCH))
+ 		return RX_CONTINUE;
+ 
+ 	if (!mesh_hdr->ttl)
+@@ -1916,9 +1941,12 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
+ 		if (is_multicast_ether_addr(hdr->addr1)) {
+ 			mpp_addr = hdr->addr3;
+ 			proxied_addr = mesh_hdr->eaddr1;
+-		} else {
++		} else if (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6) {
++			/* has_a4 already checked in ieee80211_rx_mesh_check */
+ 			mpp_addr = hdr->addr4;
+ 			proxied_addr = mesh_hdr->eaddr2;
++		} else {
++			return RX_DROP_MONITOR;
+ 		}
+ 
+ 		rcu_read_lock();
+@@ -1941,7 +1969,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
+ 
+ 	mesh_hdr->ttl--;
+ 
+-	if (status->rx_flags & IEEE80211_RX_RA_MATCH) {
++	{
+ 		if (!mesh_hdr->ttl)
+ 			IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.mesh,
+ 						     dropped_frames_ttl);
+@@ -2295,6 +2323,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
+ 		}
+ 		break;
+ 	case WLAN_CATEGORY_SELF_PROTECTED:
++		if (len < (IEEE80211_MIN_ACTION_SIZE +
++			   sizeof(mgmt->u.action.u.self_prot.action_code)))
++			break;
++
+ 		switch (mgmt->u.action.u.self_prot.action_code) {
+ 		case WLAN_SP_MESH_PEERING_OPEN:
+ 		case WLAN_SP_MESH_PEERING_CLOSE:
+@@ -2313,6 +2345,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
+ 		}
+ 		break;
+ 	case WLAN_CATEGORY_MESH_ACTION:
++		if (len < (IEEE80211_MIN_ACTION_SIZE +
++			   sizeof(mgmt->u.action.u.mesh_action.action_code)))
++			break;
++
+ 		if (!ieee80211_vif_is_mesh(&sdata->vif))
+ 			break;
+ 		if (mesh_action_is_path_sel(mgmt) &&
+@@ -2870,10 +2906,15 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
+ 		     test_bit(SCAN_OFF_CHANNEL, &local->scanning)))
+ 		status->rx_flags |= IEEE80211_RX_IN_SCAN;
+ 
+-	if (ieee80211_is_mgmt(fc))
+-		err = skb_linearize(skb);
+-	else
++	if (ieee80211_is_mgmt(fc)) {
++		/* drop frame if too short for header */
++		if (skb->len < ieee80211_hdrlen(fc))
++			err = -ENOBUFS;
++		else
++			err = skb_linearize(skb);
++	} else {
+ 		err = !pskb_may_pull(skb, ieee80211_hdrlen(fc));
++	}
+ 
+ 	if (err) {
+ 		dev_kfree_skb(skb);
+diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
+index 38b78b9..3d1d55d 100644
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -137,6 +137,8 @@ static void netlink_destroy_callback(struct netlink_callback *cb);
+ static DEFINE_RWLOCK(nl_table_lock);
+ static atomic_t nl_table_users = ATOMIC_INIT(0);
+ 
++#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
++
+ static ATOMIC_NOTIFIER_HEAD(netlink_chain);
+ 
+ static u32 netlink_group_mask(u32 group)
+@@ -331,6 +333,11 @@ netlink_update_listeners(struct sock *sk)
+ 	struct hlist_node *node;
+ 	unsigned long mask;
+ 	unsigned int i;
++	struct listeners *listeners;
++
++	listeners = nl_deref_protected(tbl->listeners);
++	if (!listeners)
++		return;
+ 
+ 	for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
+ 		mask = 0;
+@@ -338,7 +345,7 @@ netlink_update_listeners(struct sock *sk)
+ 			if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
+ 				mask |= nlk_sk(sk)->groups[i];
+ 		}
+-		tbl->listeners->masks[i] = mask;
++		listeners->masks[i] = mask;
+ 	}
+ 	/* this function is only called with the netlink table "grabbed", which
+ 	 * makes sure updates are visible before bind or setsockopt return. */
+@@ -519,7 +526,11 @@ static int netlink_release(struct socket *sock)
+ 	if (netlink_is_kernel(sk)) {
+ 		BUG_ON(nl_table[sk->sk_protocol].registered == 0);
+ 		if (--nl_table[sk->sk_protocol].registered == 0) {
+-			kfree(nl_table[sk->sk_protocol].listeners);
++			struct listeners *old;
++
++			old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
++			RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
++			kfree_rcu(old, rcu);
+ 			nl_table[sk->sk_protocol].module = NULL;
+ 			nl_table[sk->sk_protocol].registered = 0;
+ 		}
+@@ -950,7 +961,7 @@ int netlink_has_listeners(struct sock *sk, unsigned int group)
+ 	rcu_read_lock();
+ 	listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
+ 
+-	if (group - 1 < nl_table[sk->sk_protocol].groups)
++	if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
+ 		res = test_bit(group - 1, listeners->masks);
+ 
+ 	rcu_read_unlock();
+@@ -1584,7 +1595,7 @@ int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
+ 		new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
+ 		if (!new)
+ 			return -ENOMEM;
+-		old = rcu_dereference_protected(tbl->listeners, 1);
++		old = nl_deref_protected(tbl->listeners);
+ 		memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
+ 		rcu_assign_pointer(tbl->listeners, new);
+ 
+diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
+index 76388b0..9032d50 100644
+--- a/net/sctp/sm_sideeffect.c
++++ b/net/sctp/sm_sideeffect.c
+@@ -1604,8 +1604,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
+ 					asoc->outqueue.outstanding_bytes;
+ 			sackh.num_gap_ack_blocks = 0;
+ 			sackh.num_dup_tsns = 0;
++			chunk->subh.sack_hdr = &sackh;
+ 			sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK,
+-					SCTP_SACKH(&sackh));
++					SCTP_CHUNK(chunk));
+ 			break;
+ 
+ 		case SCTP_CMD_DISCARD_PACKET:
+diff --git a/net/wireless/core.c b/net/wireless/core.c
+index 8f5042d..ea93f4b 100644
+--- a/net/wireless/core.c
++++ b/net/wireless/core.c
+@@ -548,8 +548,7 @@ int wiphy_register(struct wiphy *wiphy)
+ 		for (i = 0; i < sband->n_channels; i++) {
+ 			sband->channels[i].orig_flags =
+ 				sband->channels[i].flags;
+-			sband->channels[i].orig_mag =
+-				sband->channels[i].max_antenna_gain;
++			sband->channels[i].orig_mag = INT_MAX;
+ 			sband->channels[i].orig_mpwr =
+ 				sband->channels[i].max_power;
+ 			sband->channels[i].band = band;
+diff --git a/net/wireless/util.c b/net/wireless/util.c
+index 22fb802..5fba039 100644
+--- a/net/wireless/util.c
++++ b/net/wireless/util.c
+@@ -301,23 +301,21 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb)
+ }
+ EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb);
+ 
+-static int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
++unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
+ {
+ 	int ae = meshhdr->flags & MESH_FLAGS_AE;
+-	/* 7.1.3.5a.2 */
++	/* 802.11-2012, 8.2.4.7.3 */
+ 	switch (ae) {
++	default:
+ 	case 0:
+ 		return 6;
+ 	case MESH_FLAGS_AE_A4:
+ 		return 12;
+ 	case MESH_FLAGS_AE_A5_A6:
+ 		return 18;
+-	case (MESH_FLAGS_AE_A4 | MESH_FLAGS_AE_A5_A6):
+-		return 24;
+-	default:
+-		return 6;
+ 	}
+ }
++EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
+ 
+ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
+ 			   enum nl80211_iftype iftype)
+@@ -365,6 +363,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
+ 			/* make sure meshdr->flags is on the linear part */
+ 			if (!pskb_may_pull(skb, hdrlen + 1))
+ 				return -1;
++			if (meshdr->flags & MESH_FLAGS_AE_A4)
++				return -1;
+ 			if (meshdr->flags & MESH_FLAGS_AE_A5_A6) {
+ 				skb_copy_bits(skb, hdrlen +
+ 					offsetof(struct ieee80211s_hdr, eaddr1),
+@@ -389,6 +389,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
+ 			/* make sure meshdr->flags is on the linear part */
+ 			if (!pskb_may_pull(skb, hdrlen + 1))
+ 				return -1;
++			if (meshdr->flags & MESH_FLAGS_AE_A5_A6)
++				return -1;
+ 			if (meshdr->flags & MESH_FLAGS_AE_A4)
+ 				skb_copy_bits(skb, hdrlen +
+ 					offsetof(struct ieee80211s_hdr, eaddr1),
+diff --git a/sound/core/control.c b/sound/core/control.c
+index 819a5c5..5511307 100644
+--- a/sound/core/control.c
++++ b/sound/core/control.c
+@@ -86,6 +86,7 @@ static int snd_ctl_open(struct inode *inode, struct file *file)
+ 	write_lock_irqsave(&card->ctl_files_rwlock, flags);
+ 	list_add_tail(&ctl->list, &card->ctl_files);
+ 	write_unlock_irqrestore(&card->ctl_files_rwlock, flags);
++	snd_card_unref(card);
+ 	return 0;
+ 
+       __error:
+@@ -93,6 +94,8 @@ static int snd_ctl_open(struct inode *inode, struct file *file)
+       __error2:
+ 	snd_card_file_remove(card, file);
+       __error1:
++	if (card)
++		snd_card_unref(card);
+       	return err;
+ }
+ 
+@@ -1433,6 +1436,8 @@ static ssize_t snd_ctl_read(struct file *file, char __user *buffer,
+ 			spin_unlock_irq(&ctl->read_lock);
+ 			schedule();
+ 			remove_wait_queue(&ctl->change_sleep, &wait);
++			if (ctl->card->shutdown)
++				return -ENODEV;
+ 			if (signal_pending(current))
+ 				return -ERESTARTSYS;
+ 			spin_lock_irq(&ctl->read_lock);
+diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c
+index 75ea16f..3f7f662 100644
+--- a/sound/core/hwdep.c
++++ b/sound/core/hwdep.c
+@@ -100,8 +100,10 @@ static int snd_hwdep_open(struct inode *inode, struct file * file)
+ 	if (hw == NULL)
+ 		return -ENODEV;
+ 
+-	if (!try_module_get(hw->card->module))
++	if (!try_module_get(hw->card->module)) {
++		snd_card_unref(hw->card);
+ 		return -EFAULT;
++	}
+ 
+ 	init_waitqueue_entry(&wait, current);
+ 	add_wait_queue(&hw->open_wait, &wait);
+@@ -129,6 +131,10 @@ static int snd_hwdep_open(struct inode *inode, struct file * file)
+ 		mutex_unlock(&hw->open_mutex);
+ 		schedule();
+ 		mutex_lock(&hw->open_mutex);
++		if (hw->card->shutdown) {
++			err = -ENODEV;
++			break;
++		}
+ 		if (signal_pending(current)) {
+ 			err = -ERESTARTSYS;
+ 			break;
+@@ -148,6 +154,7 @@ static int snd_hwdep_open(struct inode *inode, struct file * file)
+ 	mutex_unlock(&hw->open_mutex);
+ 	if (err < 0)
+ 		module_put(hw->card->module);
++	snd_card_unref(hw->card);
+ 	return err;
+ }
+ 
+@@ -459,12 +466,15 @@ static int snd_hwdep_dev_disconnect(struct snd_device *device)
+ 		mutex_unlock(&register_mutex);
+ 		return -EINVAL;
+ 	}
++	mutex_lock(&hwdep->open_mutex);
++	wake_up(&hwdep->open_wait);
+ #ifdef CONFIG_SND_OSSEMUL
+ 	if (hwdep->ossreg)
+ 		snd_unregister_oss_device(hwdep->oss_type, hwdep->card, hwdep->device);
+ #endif
+ 	snd_unregister_device(SNDRV_DEVICE_TYPE_HWDEP, hwdep->card, hwdep->device);
+ 	list_del_init(&hwdep->list);
++	mutex_unlock(&hwdep->open_mutex);
+ 	mutex_unlock(&register_mutex);
+ 	return 0;
+ }
+diff --git a/sound/core/init.c b/sound/core/init.c
+index 3ac49b1..fa0f35b 100644
+--- a/sound/core/init.c
++++ b/sound/core/init.c
+@@ -212,6 +212,7 @@ int snd_card_create(int idx, const char *xid,
+ 	spin_lock_init(&card->files_lock);
+ 	INIT_LIST_HEAD(&card->files_list);
+ 	init_waitqueue_head(&card->shutdown_sleep);
++	atomic_set(&card->refcount, 0);
+ #ifdef CONFIG_PM
+ 	mutex_init(&card->power_lock);
+ 	init_waitqueue_head(&card->power_sleep);
+@@ -445,21 +446,36 @@ static int snd_card_do_free(struct snd_card *card)
+ 	return 0;
+ }
+ 
++/**
++ * snd_card_unref - release the reference counter
++ * @card: the card instance
++ *
++ * Decrements the reference counter.  When it reaches to zero, wake up
++ * the sleeper and call the destructor if needed.
++ */
++void snd_card_unref(struct snd_card *card)
++{
++	if (atomic_dec_and_test(&card->refcount)) {
++		wake_up(&card->shutdown_sleep);
++		if (card->free_on_last_close)
++			snd_card_do_free(card);
++	}
++}
++EXPORT_SYMBOL(snd_card_unref);
++
+ int snd_card_free_when_closed(struct snd_card *card)
+ {
+-	int free_now = 0;
+-	int ret = snd_card_disconnect(card);
+-	if (ret)
+-		return ret;
++	int ret;
+ 
+-	spin_lock(&card->files_lock);
+-	if (list_empty(&card->files_list))
+-		free_now = 1;
+-	else
+-		card->free_on_last_close = 1;
+-	spin_unlock(&card->files_lock);
++	atomic_inc(&card->refcount);
++	ret = snd_card_disconnect(card);
++	if (ret) {
++		atomic_dec(&card->refcount);
++		return ret;
++	}
+ 
+-	if (free_now)
++	card->free_on_last_close = 1;
++	if (atomic_dec_and_test(&card->refcount))
+ 		snd_card_do_free(card);
+ 	return 0;
+ }
+@@ -473,7 +489,7 @@ int snd_card_free(struct snd_card *card)
+ 		return ret;
+ 
+ 	/* wait, until all devices are ready for the free operation */
+-	wait_event(card->shutdown_sleep, list_empty(&card->files_list));
++	wait_event(card->shutdown_sleep, !atomic_read(&card->refcount));
+ 	snd_card_do_free(card);
+ 	return 0;
+ }
+@@ -854,6 +870,7 @@ int snd_card_file_add(struct snd_card *card, struct file *file)
+ 		return -ENODEV;
+ 	}
+ 	list_add(&mfile->list, &card->files_list);
++	atomic_inc(&card->refcount);
+ 	spin_unlock(&card->files_lock);
+ 	return 0;
+ }
+@@ -876,7 +893,6 @@ EXPORT_SYMBOL(snd_card_file_add);
+ int snd_card_file_remove(struct snd_card *card, struct file *file)
+ {
+ 	struct snd_monitor_file *mfile, *found = NULL;
+-	int last_close = 0;
+ 
+ 	spin_lock(&card->files_lock);
+ 	list_for_each_entry(mfile, &card->files_list, list) {
+@@ -891,19 +907,13 @@ int snd_card_file_remove(struct snd_card *card, struct file *file)
+ 			break;
+ 		}
+ 	}
+-	if (list_empty(&card->files_list))
+-		last_close = 1;
+ 	spin_unlock(&card->files_lock);
+-	if (last_close) {
+-		wake_up(&card->shutdown_sleep);
+-		if (card->free_on_last_close)
+-			snd_card_do_free(card);
+-	}
+ 	if (!found) {
+ 		snd_printk(KERN_ERR "ALSA card file remove problem (%p)\n", file);
+ 		return -ENOENT;
+ 	}
+ 	kfree(found);
++	snd_card_unref(card);
+ 	return 0;
+ }
+ 
+diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c
+index 18297f7..c353768 100644
+--- a/sound/core/oss/mixer_oss.c
++++ b/sound/core/oss/mixer_oss.c
+@@ -52,14 +52,19 @@ static int snd_mixer_oss_open(struct inode *inode, struct file *file)
+ 					 SNDRV_OSS_DEVICE_TYPE_MIXER);
+ 	if (card == NULL)
+ 		return -ENODEV;
+-	if (card->mixer_oss == NULL)
++	if (card->mixer_oss == NULL) {
++		snd_card_unref(card);
+ 		return -ENODEV;
++	}
+ 	err = snd_card_file_add(card, file);
+-	if (err < 0)
++	if (err < 0) {
++		snd_card_unref(card);
+ 		return err;
++	}
+ 	fmixer = kzalloc(sizeof(*fmixer), GFP_KERNEL);
+ 	if (fmixer == NULL) {
+ 		snd_card_file_remove(card, file);
++		snd_card_unref(card);
+ 		return -ENOMEM;
+ 	}
+ 	fmixer->card = card;
+@@ -68,8 +73,10 @@ static int snd_mixer_oss_open(struct inode *inode, struct file *file)
+ 	if (!try_module_get(card->module)) {
+ 		kfree(fmixer);
+ 		snd_card_file_remove(card, file);
++		snd_card_unref(card);
+ 		return -EFAULT;
+ 	}
++	snd_card_unref(card);
+ 	return 0;
+ }
+ 
+diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
+index 3cc4b86..542f69e 100644
+--- a/sound/core/oss/pcm_oss.c
++++ b/sound/core/oss/pcm_oss.c
+@@ -2441,6 +2441,10 @@ static int snd_pcm_oss_open(struct inode *inode, struct file *file)
+ 		mutex_unlock(&pcm->open_mutex);
+ 		schedule();
+ 		mutex_lock(&pcm->open_mutex);
++		if (pcm->card->shutdown) {
++			err = -ENODEV;
++			break;
++		}
+ 		if (signal_pending(current)) {
+ 			err = -ERESTARTSYS;
+ 			break;
+@@ -2450,6 +2454,7 @@ static int snd_pcm_oss_open(struct inode *inode, struct file *file)
+ 	mutex_unlock(&pcm->open_mutex);
+ 	if (err < 0)
+ 		goto __error;
++	snd_card_unref(pcm->card);
+ 	return err;
+ 
+       __error:
+@@ -2457,6 +2462,8 @@ static int snd_pcm_oss_open(struct inode *inode, struct file *file)
+       __error2:
+       	snd_card_file_remove(pcm->card, file);
+       __error1:
++	if (pcm)
++		snd_card_unref(pcm->card);
+ 	return err;
+ }
+ 
+diff --git a/sound/core/pcm.c b/sound/core/pcm.c
+index 8928ca87..13eaeb3 100644
+--- a/sound/core/pcm.c
++++ b/sound/core/pcm.c
+@@ -1046,11 +1046,19 @@ static int snd_pcm_dev_disconnect(struct snd_device *device)
+ 	if (list_empty(&pcm->list))
+ 		goto unlock;
+ 
++	mutex_lock(&pcm->open_mutex);
++	wake_up(&pcm->open_wait);
+ 	list_del_init(&pcm->list);
+ 	for (cidx = 0; cidx < 2; cidx++)
+-		for (substream = pcm->streams[cidx].substream; substream; substream = substream->next)
+-			if (substream->runtime)
++		for (substream = pcm->streams[cidx].substream; substream; substream = substream->next) {
++			snd_pcm_stream_lock_irq(substream);
++			if (substream->runtime) {
+ 				substream->runtime->status->state = SNDRV_PCM_STATE_DISCONNECTED;
++				wake_up(&substream->runtime->sleep);
++				wake_up(&substream->runtime->tsleep);
++			}
++			snd_pcm_stream_unlock_irq(substream);
++		}
+ 	list_for_each_entry(notify, &snd_pcm_notify_list, list) {
+ 		notify->n_disconnect(pcm);
+ 	}
+@@ -1066,6 +1074,7 @@ static int snd_pcm_dev_disconnect(struct snd_device *device)
+ 		}
+ 		snd_unregister_device(devtype, pcm->card, pcm->device);
+ 	}
++	mutex_unlock(&pcm->open_mutex);
+  unlock:
+ 	mutex_unlock(&register_mutex);
+ 	return 0;
+diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
+index 25ed9fe..7ada40e 100644
+--- a/sound/core/pcm_native.c
++++ b/sound/core/pcm_native.c
+@@ -369,6 +369,14 @@ static int period_to_usecs(struct snd_pcm_runtime *runtime)
+ 	return usecs;
+ }
+ 
++static void snd_pcm_set_state(struct snd_pcm_substream *substream, int state)
++{
++	snd_pcm_stream_lock_irq(substream);
++	if (substream->runtime->status->state != SNDRV_PCM_STATE_DISCONNECTED)
++		substream->runtime->status->state = state;
++	snd_pcm_stream_unlock_irq(substream);
++}
++
+ static int snd_pcm_hw_params(struct snd_pcm_substream *substream,
+ 			     struct snd_pcm_hw_params *params)
+ {
+@@ -452,7 +460,7 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream,
+ 		runtime->boundary *= 2;
+ 
+ 	snd_pcm_timer_resolution_change(substream);
+-	runtime->status->state = SNDRV_PCM_STATE_SETUP;
++	snd_pcm_set_state(substream, SNDRV_PCM_STATE_SETUP);
+ 
+ 	if (pm_qos_request_active(&substream->latency_pm_qos_req))
+ 		pm_qos_remove_request(&substream->latency_pm_qos_req);
+@@ -464,7 +472,7 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream,
+ 	/* hardware might be unusable from this time,
+ 	   so we force application to retry to set
+ 	   the correct hardware parameter settings */
+-	runtime->status->state = SNDRV_PCM_STATE_OPEN;
++	snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN);
+ 	if (substream->ops->hw_free != NULL)
+ 		substream->ops->hw_free(substream);
+ 	return err;
+@@ -512,7 +520,7 @@ static int snd_pcm_hw_free(struct snd_pcm_substream *substream)
+ 		return -EBADFD;
+ 	if (substream->ops->hw_free)
+ 		result = substream->ops->hw_free(substream);
+-	runtime->status->state = SNDRV_PCM_STATE_OPEN;
++	snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN);
+ 	pm_qos_remove_request(&substream->latency_pm_qos_req);
+ 	return result;
+ }
+@@ -1320,7 +1328,7 @@ static void snd_pcm_post_prepare(struct snd_pcm_substream *substream, int state)
+ {
+ 	struct snd_pcm_runtime *runtime = substream->runtime;
+ 	runtime->control->appl_ptr = runtime->status->hw_ptr;
+-	runtime->status->state = SNDRV_PCM_STATE_PREPARED;
++	snd_pcm_set_state(substream, SNDRV_PCM_STATE_PREPARED);
+ }
+ 
+ static struct action_ops snd_pcm_action_prepare = {
+@@ -1500,6 +1508,10 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream,
+ 		down_read(&snd_pcm_link_rwsem);
+ 		snd_pcm_stream_lock_irq(substream);
+ 		remove_wait_queue(&to_check->sleep, &wait);
++		if (card->shutdown) {
++			result = -ENODEV;
++			break;
++		}
+ 		if (tout == 0) {
+ 			if (substream->runtime->status->state == SNDRV_PCM_STATE_SUSPENDED)
+ 				result = -ESTRPIPE;
+@@ -1620,6 +1632,7 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
+  _end:
+ 	write_unlock_irq(&snd_pcm_link_rwlock);
+ 	up_write(&snd_pcm_link_rwsem);
++	snd_card_unref(substream1->pcm->card);
+ 	fput(file);
+ 	return res;
+ }
+@@ -2092,7 +2105,10 @@ static int snd_pcm_playback_open(struct inode *inode, struct file *file)
+ 		return err;
+ 	pcm = snd_lookup_minor_data(iminor(inode),
+ 				    SNDRV_DEVICE_TYPE_PCM_PLAYBACK);
+-	return snd_pcm_open(file, pcm, SNDRV_PCM_STREAM_PLAYBACK);
++	err = snd_pcm_open(file, pcm, SNDRV_PCM_STREAM_PLAYBACK);
++	if (pcm)
++		snd_card_unref(pcm->card);
++	return err;
+ }
+ 
+ static int snd_pcm_capture_open(struct inode *inode, struct file *file)
+@@ -2103,7 +2119,10 @@ static int snd_pcm_capture_open(struct inode *inode, struct file *file)
+ 		return err;
+ 	pcm = snd_lookup_minor_data(iminor(inode),
+ 				    SNDRV_DEVICE_TYPE_PCM_CAPTURE);
+-	return snd_pcm_open(file, pcm, SNDRV_PCM_STREAM_CAPTURE);
++	err = snd_pcm_open(file, pcm, SNDRV_PCM_STREAM_CAPTURE);
++	if (pcm)
++		snd_card_unref(pcm->card);
++	return err;
+ }
+ 
+ static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream)
+@@ -2140,6 +2159,10 @@ static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream)
+ 		mutex_unlock(&pcm->open_mutex);
+ 		schedule();
+ 		mutex_lock(&pcm->open_mutex);
++		if (pcm->card->shutdown) {
++			err = -ENODEV;
++			break;
++		}
+ 		if (signal_pending(current)) {
+ 			err = -ERESTARTSYS;
+ 			break;
+diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
+index ebf6e49..1bb95ae 100644
+--- a/sound/core/rawmidi.c
++++ b/sound/core/rawmidi.c
+@@ -379,8 +379,10 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file)
+ 	if (rmidi == NULL)
+ 		return -ENODEV;
+ 
+-	if (!try_module_get(rmidi->card->module))
++	if (!try_module_get(rmidi->card->module)) {
++		snd_card_unref(rmidi->card);
+ 		return -ENXIO;
++	}
+ 
+ 	mutex_lock(&rmidi->open_mutex);
+ 	card = rmidi->card;
+@@ -422,6 +424,10 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file)
+ 		mutex_unlock(&rmidi->open_mutex);
+ 		schedule();
+ 		mutex_lock(&rmidi->open_mutex);
++		if (rmidi->card->shutdown) {
++			err = -ENODEV;
++			break;
++		}
+ 		if (signal_pending(current)) {
+ 			err = -ERESTARTSYS;
+ 			break;
+@@ -440,6 +446,7 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file)
+ #endif
+ 	file->private_data = rawmidi_file;
+ 	mutex_unlock(&rmidi->open_mutex);
++	snd_card_unref(rmidi->card);
+ 	return 0;
+ 
+  __error:
+@@ -447,6 +454,7 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file)
+  __error_card:
+ 	mutex_unlock(&rmidi->open_mutex);
+ 	module_put(rmidi->card->module);
++	snd_card_unref(rmidi->card);
+ 	return err;
+ }
+ 
+@@ -991,6 +999,8 @@ static ssize_t snd_rawmidi_read(struct file *file, char __user *buf, size_t coun
+ 			spin_unlock_irq(&runtime->lock);
+ 			schedule();
+ 			remove_wait_queue(&runtime->sleep, &wait);
++			if (rfile->rmidi->card->shutdown)
++				return -ENODEV;
+ 			if (signal_pending(current))
+ 				return result > 0 ? result : -ERESTARTSYS;
+ 			if (!runtime->avail)
+@@ -1234,6 +1244,8 @@ static ssize_t snd_rawmidi_write(struct file *file, const char __user *buf,
+ 			spin_unlock_irq(&runtime->lock);
+ 			timeout = schedule_timeout(30 * HZ);
+ 			remove_wait_queue(&runtime->sleep, &wait);
++			if (rfile->rmidi->card->shutdown)
++				return -ENODEV;
+ 			if (signal_pending(current))
+ 				return result > 0 ? result : -ERESTARTSYS;
+ 			if (!runtime->avail && !timeout)
+@@ -1609,9 +1621,20 @@ static int snd_rawmidi_dev_register(struct snd_device *device)
+ static int snd_rawmidi_dev_disconnect(struct snd_device *device)
+ {
+ 	struct snd_rawmidi *rmidi = device->device_data;
++	int dir;
+ 
+ 	mutex_lock(&register_mutex);
++	mutex_lock(&rmidi->open_mutex);
++	wake_up(&rmidi->open_wait);
+ 	list_del_init(&rmidi->list);
++	for (dir = 0; dir < 2; dir++) {
++		struct snd_rawmidi_substream *s;
++		list_for_each_entry(s, &rmidi->streams[dir].substreams, list) {
++			if (s->runtime)
++				wake_up(&s->runtime->sleep);
++		}
++	}
++
+ #ifdef CONFIG_SND_OSSEMUL
+ 	if (rmidi->ossreg) {
+ 		if ((int)rmidi->device == midi_map[rmidi->card->number]) {
+@@ -1626,6 +1649,7 @@ static int snd_rawmidi_dev_disconnect(struct snd_device *device)
+ 	}
+ #endif /* CONFIG_SND_OSSEMUL */
+ 	snd_unregister_device(SNDRV_DEVICE_TYPE_RAWMIDI, rmidi->card, rmidi->device);
++	mutex_unlock(&rmidi->open_mutex);
+ 	mutex_unlock(&register_mutex);
+ 	return 0;
+ }
+diff --git a/sound/core/sound.c b/sound/core/sound.c
+index 828af35..8e17b4d 100644
+--- a/sound/core/sound.c
++++ b/sound/core/sound.c
+@@ -99,6 +99,10 @@ static void snd_request_other(int minor)
+  *
+  * Checks that a minor device with the specified type is registered, and returns
+  * its user data pointer.
++ *
++ * This function increments the reference counter of the card instance
++ * if an associated instance with the given minor number and type is found.
++ * The caller must call snd_card_unref() appropriately later.
+  */
+ void *snd_lookup_minor_data(unsigned int minor, int type)
+ {
+@@ -109,9 +113,11 @@ void *snd_lookup_minor_data(unsigned int minor, int type)
+ 		return NULL;
+ 	mutex_lock(&sound_mutex);
+ 	mreg = snd_minors[minor];
+-	if (mreg && mreg->type == type)
++	if (mreg && mreg->type == type) {
+ 		private_data = mreg->private_data;
+-	else
++		if (private_data && mreg->card_ptr)
++			atomic_inc(&mreg->card_ptr->refcount);
++	} else
+ 		private_data = NULL;
+ 	mutex_unlock(&sound_mutex);
+ 	return private_data;
+@@ -275,6 +281,7 @@ int snd_register_device_for_dev(int type, struct snd_card *card, int dev,
+ 	preg->device = dev;
+ 	preg->f_ops = f_ops;
+ 	preg->private_data = private_data;
++	preg->card_ptr = card;
+ 	mutex_lock(&sound_mutex);
+ #ifdef CONFIG_SND_DYNAMIC_MINORS
+ 	minor = snd_find_free_minor(type);
+diff --git a/sound/core/sound_oss.c b/sound/core/sound_oss.c
+index c700920..ec86009 100644
+--- a/sound/core/sound_oss.c
++++ b/sound/core/sound_oss.c
+@@ -40,6 +40,9 @@
+ static struct snd_minor *snd_oss_minors[SNDRV_OSS_MINORS];
+ static DEFINE_MUTEX(sound_oss_mutex);
+ 
++/* NOTE: This function increments the refcount of the associated card like
++ * snd_lookup_minor_data(); the caller must call snd_card_unref() appropriately
++ */
+ void *snd_lookup_oss_minor_data(unsigned int minor, int type)
+ {
+ 	struct snd_minor *mreg;
+@@ -49,9 +52,11 @@ void *snd_lookup_oss_minor_data(unsigned int minor, int type)
+ 		return NULL;
+ 	mutex_lock(&sound_oss_mutex);
+ 	mreg = snd_oss_minors[minor];
+-	if (mreg && mreg->type == type)
++	if (mreg && mreg->type == type) {
+ 		private_data = mreg->private_data;
+-	else
++		if (private_data && mreg->card_ptr)
++			atomic_inc(&mreg->card_ptr->refcount);
++	} else
+ 		private_data = NULL;
+ 	mutex_unlock(&sound_oss_mutex);
+ 	return private_data;
+@@ -123,6 +128,7 @@ int snd_register_oss_device(int type, struct snd_card *card, int dev,
+ 	preg->device = dev;
+ 	preg->f_ops = f_ops;
+ 	preg->private_data = private_data;
++	preg->card_ptr = card;
+ 	mutex_lock(&sound_oss_mutex);
+ 	snd_oss_minors[minor] = preg;
+ 	minor_unit = SNDRV_MINOR_OSS_DEVICE(minor);
+diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
+index bcb3310..b4890f9 100644
+--- a/sound/pci/hda/patch_analog.c
++++ b/sound/pci/hda/patch_analog.c
+@@ -573,6 +573,7 @@ static int ad198x_build_pcms(struct hda_codec *codec)
+ 	if (spec->multiout.dig_out_nid) {
+ 		info++;
+ 		codec->num_pcms++;
++		codec->spdif_status_reset = 1;
+ 		info->name = "AD198x Digital";
+ 		info->pcm_type = HDA_PCM_TYPE_SPDIF;
+ 		info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ad198x_pcm_digital_playback;
+diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
+index e449278..0ed6867 100644
+--- a/sound/pci/hda/patch_cirrus.c
++++ b/sound/pci/hda/patch_cirrus.c
+@@ -93,8 +93,8 @@ enum {
+ #define CS420X_VENDOR_NID	0x11
+ #define CS_DIG_OUT1_PIN_NID	0x10
+ #define CS_DIG_OUT2_PIN_NID	0x15
+-#define CS_DMIC1_PIN_NID	0x12
+-#define CS_DMIC2_PIN_NID	0x0e
++#define CS_DMIC1_PIN_NID	0x0e
++#define CS_DMIC2_PIN_NID	0x12
+ 
+ /* coef indices */
+ #define IDX_SPDIF_STAT		0x0000
+@@ -1088,14 +1088,18 @@ static void init_input(struct hda_codec *codec)
+ 			cs_automic(codec);
+ 
+ 		coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */
++		cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
++
++		coef = cs_vendor_coef_get(codec, IDX_BEEP_CFG);
+ 		if (is_active_pin(codec, CS_DMIC2_PIN_NID))
+-			coef |= 0x0500; /* DMIC2 2 chan on, GPIO1 off */
++			coef |= 1 << 4; /* DMIC2 2 chan on, GPIO1 off */
+ 		if (is_active_pin(codec, CS_DMIC1_PIN_NID))
+-			coef |= 0x1800; /* DMIC1 2 chan on, GPIO0 off
++			coef |= 1 << 3; /* DMIC1 2 chan on, GPIO0 off
+ 					 * No effect if SPDIF_OUT2 is
+ 					 * selected in IDX_SPDIF_CTL.
+ 					*/
+-		cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
++
++		cs_vendor_coef_set(codec, IDX_BEEP_CFG, coef);
+ 	}
+ }
+ 
+@@ -1109,7 +1113,7 @@ static const struct hda_verb cs_coef_init_verbs[] = {
+ 	  | 0x0400 /* Disable Coefficient Auto increment */
+ 	  )},
+ 	/* Beep */
+-	{0x11, AC_VERB_SET_COEF_INDEX, IDX_DAC_CFG},
++	{0x11, AC_VERB_SET_COEF_INDEX, IDX_BEEP_CFG},
+ 	{0x11, AC_VERB_SET_PROC_COEF, 0x0007}, /* Enable Beep thru DAC1/2/3 */
+ 
+ 	{} /* terminator */
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index c2c7f90..3ce2da2 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -6039,6 +6039,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
+ 	  .patch = patch_alc662 },
+ 	{ .id = 0x10ec0663, .name = "ALC663", .patch = patch_alc662 },
+ 	{ .id = 0x10ec0665, .name = "ALC665", .patch = patch_alc662 },
++	{ .id = 0x10ec0668, .name = "ALC668", .patch = patch_alc662 },
+ 	{ .id = 0x10ec0670, .name = "ALC670", .patch = patch_alc662 },
+ 	{ .id = 0x10ec0680, .name = "ALC680", .patch = patch_alc680 },
+ 	{ .id = 0x10ec0880, .name = "ALC880", .patch = patch_alc880 },
+@@ -6056,6 +6057,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
+ 	{ .id = 0x10ec0889, .name = "ALC889", .patch = patch_alc882 },
+ 	{ .id = 0x10ec0892, .name = "ALC892", .patch = patch_alc662 },
+ 	{ .id = 0x10ec0899, .name = "ALC898", .patch = patch_alc882 },
++	{ .id = 0x10ec0900, .name = "ALC1150", .patch = patch_alc882 },
+ 	{} /* terminator */
+ };
+ 
+diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
+index 7160ff2..9e0c889 100644
+--- a/sound/pci/hda/patch_via.c
++++ b/sound/pci/hda/patch_via.c
+@@ -1856,11 +1856,11 @@ static int via_auto_fill_dac_nids(struct hda_codec *codec)
+ {
+ 	struct via_spec *spec = codec->spec;
+ 	const struct auto_pin_cfg *cfg = &spec->autocfg;
+-	int i, dac_num;
++	int i;
+ 	hda_nid_t nid;
+ 
++	spec->multiout.num_dacs = 0;
+ 	spec->multiout.dac_nids = spec->private_dac_nids;
+-	dac_num = 0;
+ 	for (i = 0; i < cfg->line_outs; i++) {
+ 		hda_nid_t dac = 0;
+ 		nid = cfg->line_out_pins[i];
+@@ -1871,16 +1871,13 @@ static int via_auto_fill_dac_nids(struct hda_codec *codec)
+ 		if (!i && parse_output_path(codec, nid, dac, 1,
+ 					    &spec->out_mix_path))
+ 			dac = spec->out_mix_path.path[0];
+-		if (dac) {
+-			spec->private_dac_nids[i] = dac;
+-			dac_num++;
+-		}
++		if (dac)
++			spec->private_dac_nids[spec->multiout.num_dacs++] = dac;
+ 	}
+ 	if (!spec->out_path[0].depth && spec->out_mix_path.depth) {
+ 		spec->out_path[0] = spec->out_mix_path;
+ 		spec->out_mix_path.depth = 0;
+ 	}
+-	spec->multiout.num_dacs = dac_num;
+ 	return 0;
+ }
+ 
+@@ -3689,6 +3686,18 @@ static void set_widgets_power_state_vt2002P(struct hda_codec *codec)
+ 				    AC_VERB_SET_POWER_STATE, AC_PWRST_D3);
+ }
+ 
++/* NIDs 0x24 and 0x33 on VT1802 have connections to non-existing NID 0x3e
++ * Replace this with mixer NID 0x1c
++ */
++static void fix_vt1802_connections(struct hda_codec *codec)
++{
++	static hda_nid_t conn_24[] = { 0x14, 0x1c };
++	static hda_nid_t conn_33[] = { 0x1c };
++
++	snd_hda_override_conn_list(codec, 0x24, ARRAY_SIZE(conn_24), conn_24);
++	snd_hda_override_conn_list(codec, 0x33, ARRAY_SIZE(conn_33), conn_33);
++}
++
+ /* patch for vt2002P */
+ static int patch_vt2002P(struct hda_codec *codec)
+ {
+@@ -3703,6 +3712,8 @@ static int patch_vt2002P(struct hda_codec *codec)
+ 	spec->aa_mix_nid = 0x21;
+ 	override_mic_boost(codec, 0x2b, 0, 3, 40);
+ 	override_mic_boost(codec, 0x29, 0, 3, 40);
++	if (spec->codec_type == VT1802)
++		fix_vt1802_connections(codec);
+ 	add_secret_dac_path(codec);
+ 
+ 	/* automatic parse from the BIOS config */
+diff --git a/sound/usb/card.c b/sound/usb/card.c
+index 0f6dc0d..566acb3 100644
+--- a/sound/usb/card.c
++++ b/sound/usb/card.c
+@@ -336,7 +336,7 @@ static int snd_usb_audio_create(struct usb_device *dev, int idx,
+ 		return -ENOMEM;
+ 	}
+ 
+-	mutex_init(&chip->shutdown_mutex);
++	init_rwsem(&chip->shutdown_rwsem);
+ 	chip->index = idx;
+ 	chip->dev = dev;
+ 	chip->card = card;
+@@ -555,9 +555,11 @@ static void snd_usb_audio_disconnect(struct usb_device *dev,
+ 		return;
+ 
+ 	card = chip->card;
+-	mutex_lock(&register_mutex);
+-	mutex_lock(&chip->shutdown_mutex);
++	down_write(&chip->shutdown_rwsem);
+ 	chip->shutdown = 1;
++	up_write(&chip->shutdown_rwsem);
++
++	mutex_lock(&register_mutex);
+ 	chip->num_interfaces--;
+ 	if (chip->num_interfaces <= 0) {
+ 		snd_card_disconnect(card);
+@@ -574,11 +576,9 @@ static void snd_usb_audio_disconnect(struct usb_device *dev,
+ 			snd_usb_mixer_disconnect(p);
+ 		}
+ 		usb_chip[chip->index] = NULL;
+-		mutex_unlock(&chip->shutdown_mutex);
+ 		mutex_unlock(&register_mutex);
+ 		snd_card_free_when_closed(card);
+ 	} else {
+-		mutex_unlock(&chip->shutdown_mutex);
+ 		mutex_unlock(&register_mutex);
+ 	}
+ }
+@@ -610,16 +610,20 @@ int snd_usb_autoresume(struct snd_usb_audio *chip)
+ {
+ 	int err = -ENODEV;
+ 
++	down_read(&chip->shutdown_rwsem);
+ 	if (!chip->shutdown && !chip->probing)
+ 		err = usb_autopm_get_interface(chip->pm_intf);
++	up_read(&chip->shutdown_rwsem);
+ 
+ 	return err;
+ }
+ 
+ void snd_usb_autosuspend(struct snd_usb_audio *chip)
+ {
++	down_read(&chip->shutdown_rwsem);
+ 	if (!chip->shutdown && !chip->probing)
+ 		usb_autopm_put_interface(chip->pm_intf);
++	up_read(&chip->shutdown_rwsem);
+ }
+ 
+ static int usb_audio_suspend(struct usb_interface *intf, pm_message_t message)
+diff --git a/sound/usb/card.h b/sound/usb/card.h
+index a39edcc..665e297 100644
+--- a/sound/usb/card.h
++++ b/sound/usb/card.h
+@@ -86,6 +86,7 @@ struct snd_usb_substream {
+ 	struct snd_urb_ctx syncurb[SYNC_URBS];	/* sync urb table */
+ 	char *syncbuf;				/* sync buffer for all sync URBs */
+ 	dma_addr_t sync_dma;			/* DMA address of syncbuf */
++	unsigned int speed;		/* USB_SPEED_XXX */
+ 
+ 	u64 formats;			/* format bitmasks (all or'ed) */
+ 	unsigned int num_formats;		/* number of supported audio formats (list) */
+diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
+index 08dcce5..24c5114 100644
+--- a/sound/usb/endpoint.c
++++ b/sound/usb/endpoint.c
+@@ -148,8 +148,10 @@ void snd_usb_release_substream_urbs(struct snd_usb_substream *subs, int force)
+ 	int i;
+ 
+ 	/* stop urbs (to be sure) */
+-	deactivate_urbs(subs, force, 1);
+-	wait_clear_urbs(subs);
++	if (!subs->stream->chip->shutdown) {
++		deactivate_urbs(subs, force, 1);
++		wait_clear_urbs(subs);
++	}
+ 
+ 	for (i = 0; i < MAX_URBS; i++)
+ 		release_urb_ctx(&subs->dataurb[i]);
+@@ -895,7 +897,8 @@ void snd_usb_init_substream(struct snd_usb_stream *as,
+ 	subs->dev = as->chip->dev;
+ 	subs->txfr_quirk = as->chip->txfr_quirk;
+ 	subs->ops = audio_urb_ops[stream];
+-	if (snd_usb_get_speed(subs->dev) >= USB_SPEED_HIGH)
++	subs->speed = snd_usb_get_speed(subs->dev);
++	if (subs->speed >= USB_SPEED_HIGH)
+ 		subs->ops.prepare_sync = prepare_capture_sync_urb_hs;
+ 
+ 	snd_usb_set_pcm_ops(as->pcm, stream);
+diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
+index ab23869..6730a33 100644
+--- a/sound/usb/mixer.c
++++ b/sound/usb/mixer.c
+@@ -287,25 +287,32 @@ static int get_ctl_value_v1(struct usb_mixer_elem_info *cval, int request, int v
+ 	unsigned char buf[2];
+ 	int val_len = cval->val_type >= USB_MIXER_S16 ? 2 : 1;
+ 	int timeout = 10;
+-	int err;
++	int idx = 0, err;
+ 
+ 	err = snd_usb_autoresume(cval->mixer->chip);
+ 	if (err < 0)
+ 		return -EIO;
++	down_read(&chip->shutdown_rwsem);
+ 	while (timeout-- > 0) {
++		if (chip->shutdown)
++			break;
++		idx = snd_usb_ctrl_intf(chip) | (cval->id << 8);
+ 		if (snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), request,
+ 				    USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
+-				    validx, snd_usb_ctrl_intf(chip) | (cval->id << 8),
+-				    buf, val_len) >= val_len) {
++				    validx, idx, buf, val_len) >= val_len) {
+ 			*value_ret = convert_signed_value(cval, snd_usb_combine_bytes(buf, val_len));
+-			snd_usb_autosuspend(cval->mixer->chip);
+-			return 0;
++			err = 0;
++			goto out;
+ 		}
+ 	}
+-	snd_usb_autosuspend(cval->mixer->chip);
+ 	snd_printdd(KERN_ERR "cannot get ctl value: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n",
+-		    request, validx, snd_usb_ctrl_intf(chip) | (cval->id << 8), cval->val_type);
+-	return -EINVAL;
++		    request, validx, idx, cval->val_type);
++	err = -EINVAL;
++
++ out:
++	up_read(&chip->shutdown_rwsem);
++	snd_usb_autosuspend(cval->mixer->chip);
++	return err;
+ }
+ 
+ static int get_ctl_value_v2(struct usb_mixer_elem_info *cval, int request, int validx, int *value_ret)
+@@ -313,7 +320,7 @@ static int get_ctl_value_v2(struct usb_mixer_elem_info *cval, int request, int v
+ 	struct snd_usb_audio *chip = cval->mixer->chip;
+ 	unsigned char buf[2 + 3*sizeof(__u16)]; /* enough space for one range */
+ 	unsigned char *val;
+-	int ret, size;
++	int idx = 0, ret, size;
+ 	__u8 bRequest;
+ 
+ 	if (request == UAC_GET_CUR) {
+@@ -330,16 +337,22 @@ static int get_ctl_value_v2(struct usb_mixer_elem_info *cval, int request, int v
+ 	if (ret)
+ 		goto error;
+ 
+-	ret = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), bRequest,
++	down_read(&chip->shutdown_rwsem);
++	if (chip->shutdown)
++		ret = -ENODEV;
++	else {
++		idx = snd_usb_ctrl_intf(chip) | (cval->id << 8);
++		ret = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), bRequest,
+ 			      USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
+-			      validx, snd_usb_ctrl_intf(chip) | (cval->id << 8),
+-			      buf, size);
++			      validx, idx, buf, size);
++	}
++	up_read(&chip->shutdown_rwsem);
+ 	snd_usb_autosuspend(chip);
+ 
+ 	if (ret < 0) {
+ error:
+ 		snd_printk(KERN_ERR "cannot get ctl value: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n",
+-			   request, validx, snd_usb_ctrl_intf(chip) | (cval->id << 8), cval->val_type);
++			   request, validx, idx, cval->val_type);
+ 		return ret;
+ 	}
+ 
+@@ -417,7 +430,7 @@ int snd_usb_mixer_set_ctl_value(struct usb_mixer_elem_info *cval,
+ {
+ 	struct snd_usb_audio *chip = cval->mixer->chip;
+ 	unsigned char buf[2];
+-	int val_len, err, timeout = 10;
++	int idx = 0, val_len, err, timeout = 10;
+ 
+ 	if (cval->mixer->protocol == UAC_VERSION_1) {
+ 		val_len = cval->val_type >= USB_MIXER_S16 ? 2 : 1;
+@@ -440,19 +453,27 @@ int snd_usb_mixer_set_ctl_value(struct usb_mixer_elem_info *cval,
+ 	err = snd_usb_autoresume(chip);
+ 	if (err < 0)
+ 		return -EIO;
+-	while (timeout-- > 0)
++	down_read(&chip->shutdown_rwsem);
++	while (timeout-- > 0) {
++		if (chip->shutdown)
++			break;
++		idx = snd_usb_ctrl_intf(chip) | (cval->id << 8);
+ 		if (snd_usb_ctl_msg(chip->dev,
+ 				    usb_sndctrlpipe(chip->dev, 0), request,
+ 				    USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_OUT,
+-				    validx, snd_usb_ctrl_intf(chip) | (cval->id << 8),
+-				    buf, val_len) >= 0) {
+-			snd_usb_autosuspend(chip);
+-			return 0;
++				    validx, idx, buf, val_len) >= 0) {
++			err = 0;
++			goto out;
+ 		}
+-	snd_usb_autosuspend(chip);
++	}
+ 	snd_printdd(KERN_ERR "cannot set ctl value: req = %#x, wValue = %#x, wIndex = %#x, type = %d, data = %#x/%#x\n",
+-		    request, validx, snd_usb_ctrl_intf(chip) | (cval->id << 8), cval->val_type, buf[0], buf[1]);
+-	return -EINVAL;
++		    request, validx, idx, cval->val_type, buf[0], buf[1]);
++	err = -EINVAL;
++
++ out:
++	up_read(&chip->shutdown_rwsem);
++	snd_usb_autosuspend(chip);
++	return err;
+ }
+ 
+ static int set_cur_ctl_value(struct usb_mixer_elem_info *cval, int validx, int value)
+diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
+index ab125ee..38a607a 100644
+--- a/sound/usb/mixer_quirks.c
++++ b/sound/usb/mixer_quirks.c
+@@ -186,6 +186,11 @@ static int snd_audigy2nx_led_put(struct snd_kcontrol *kcontrol, struct snd_ctl_e
+ 	if (value > 1)
+ 		return -EINVAL;
+ 	changed = value != mixer->audigy2nx_leds[index];
++	down_read(&mixer->chip->shutdown_rwsem);
++	if (mixer->chip->shutdown) {
++		err = -ENODEV;
++		goto out;
++	}
+ 	if (mixer->chip->usb_id == USB_ID(0x041e, 0x3042))
+ 		err = snd_usb_ctl_msg(mixer->chip->dev,
+ 			      usb_sndctrlpipe(mixer->chip->dev, 0), 0x24,
+@@ -202,6 +207,8 @@ static int snd_audigy2nx_led_put(struct snd_kcontrol *kcontrol, struct snd_ctl_e
+ 			      usb_sndctrlpipe(mixer->chip->dev, 0), 0x24,
+ 			      USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_OTHER,
+ 			      value, index + 2, NULL, 0);
++ out:
++	up_read(&mixer->chip->shutdown_rwsem);
+ 	if (err < 0)
+ 		return err;
+ 	mixer->audigy2nx_leds[index] = value;
+@@ -295,11 +302,16 @@ static void snd_audigy2nx_proc_read(struct snd_info_entry *entry,
+ 
+ 	for (i = 0; jacks[i].name; ++i) {
+ 		snd_iprintf(buffer, "%s: ", jacks[i].name);
+-		err = snd_usb_ctl_msg(mixer->chip->dev,
++		down_read(&mixer->chip->shutdown_rwsem);
++		if (mixer->chip->shutdown)
++			err = 0;
++		else
++			err = snd_usb_ctl_msg(mixer->chip->dev,
+ 				      usb_rcvctrlpipe(mixer->chip->dev, 0),
+ 				      UAC_GET_MEM, USB_DIR_IN | USB_TYPE_CLASS |
+ 				      USB_RECIP_INTERFACE, 0,
+ 				      jacks[i].unitid << 8, buf, 3);
++		up_read(&mixer->chip->shutdown_rwsem);
+ 		if (err == 3 && (buf[0] == 3 || buf[0] == 6))
+ 			snd_iprintf(buffer, "%02x %02x\n", buf[1], buf[2]);
+ 		else
+@@ -329,10 +341,15 @@ static int snd_xonar_u1_switch_put(struct snd_kcontrol *kcontrol,
+ 	else
+ 		new_status = old_status & ~0x02;
+ 	changed = new_status != old_status;
+-	err = snd_usb_ctl_msg(mixer->chip->dev,
++	down_read(&mixer->chip->shutdown_rwsem);
++	if (mixer->chip->shutdown)
++		err = -ENODEV;
++	else
++		err = snd_usb_ctl_msg(mixer->chip->dev,
+ 			      usb_sndctrlpipe(mixer->chip->dev, 0), 0x08,
+ 			      USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_OTHER,
+ 			      50, 0, &new_status, 1);
++	up_read(&mixer->chip->shutdown_rwsem);
+ 	if (err < 0)
+ 		return err;
+ 	mixer->xonar_u1_status = new_status;
+@@ -371,11 +388,17 @@ static int snd_nativeinstruments_control_get(struct snd_kcontrol *kcontrol,
+ 	u8 bRequest = (kcontrol->private_value >> 16) & 0xff;
+ 	u16 wIndex = kcontrol->private_value & 0xffff;
+ 	u8 tmp;
++	int ret;
+ 
+-	int ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), bRequest,
++	down_read(&mixer->chip->shutdown_rwsem);
++	if (mixer->chip->shutdown)
++		ret = -ENODEV;
++	else
++		ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), bRequest,
+ 				  USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
+ 				  0, cpu_to_le16(wIndex),
+ 				  &tmp, sizeof(tmp), 1000);
++	up_read(&mixer->chip->shutdown_rwsem);
+ 
+ 	if (ret < 0) {
+ 		snd_printk(KERN_ERR
+@@ -396,11 +419,17 @@ static int snd_nativeinstruments_control_put(struct snd_kcontrol *kcontrol,
+ 	u8 bRequest = (kcontrol->private_value >> 16) & 0xff;
+ 	u16 wIndex = kcontrol->private_value & 0xffff;
+ 	u16 wValue = ucontrol->value.integer.value[0];
++	int ret;
+ 
+-	int ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), bRequest,
++	down_read(&mixer->chip->shutdown_rwsem);
++	if (mixer->chip->shutdown)
++		ret = -ENODEV;
++	else
++		ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), bRequest,
+ 				  USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
+ 				  cpu_to_le16(wValue), cpu_to_le16(wIndex),
+ 				  NULL, 0, 1000);
++	up_read(&mixer->chip->shutdown_rwsem);
+ 
+ 	if (ret < 0) {
+ 		snd_printk(KERN_ERR
+diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
+index 839165f..983e071 100644
+--- a/sound/usb/pcm.c
++++ b/sound/usb/pcm.c
+@@ -67,6 +67,8 @@ static snd_pcm_uframes_t snd_usb_pcm_pointer(struct snd_pcm_substream *substream
+ 	unsigned int hwptr_done;
+ 
+ 	subs = (struct snd_usb_substream *)substream->runtime->private_data;
++	if (subs->stream->chip->shutdown)
++		return SNDRV_PCM_POS_XRUN;
+ 	spin_lock(&subs->lock);
+ 	hwptr_done = subs->hwptr_done;
+ 	substream->runtime->delay = snd_usb_pcm_delay(subs,
+@@ -373,8 +375,14 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream,
+ 	changed = subs->cur_audiofmt != fmt ||
+ 		subs->period_bytes != params_period_bytes(hw_params) ||
+ 		subs->cur_rate != rate;
++
++	down_read(&subs->stream->chip->shutdown_rwsem);
++	if (subs->stream->chip->shutdown) {
++		ret = -ENODEV;
++		goto unlock;
++	}
+ 	if ((ret = set_format(subs, fmt)) < 0)
+-		return ret;
++		goto unlock;
+ 
+ 	if (subs->cur_rate != rate) {
+ 		struct usb_host_interface *alts;
+@@ -383,12 +391,11 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream,
+ 		alts = &iface->altsetting[fmt->altset_idx];
+ 		ret = snd_usb_init_sample_rate(subs->stream->chip, subs->interface, alts, fmt, rate);
+ 		if (ret < 0)
+-			return ret;
++			goto unlock;
+ 		subs->cur_rate = rate;
+ 	}
+ 
+ 	if (changed) {
+-		mutex_lock(&subs->stream->chip->shutdown_mutex);
+ 		/* format changed */
+ 		snd_usb_release_substream_urbs(subs, 0);
+ 		/* influenced: period_bytes, channels, rate, format, */
+@@ -396,9 +403,10 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream,
+ 						  params_rate(hw_params),
+ 						  snd_pcm_format_physical_width(params_format(hw_params)) *
+ 							params_channels(hw_params));
+-		mutex_unlock(&subs->stream->chip->shutdown_mutex);
+ 	}
+ 
++unlock:
++	up_read(&subs->stream->chip->shutdown_rwsem);
+ 	return ret;
+ }
+ 
+@@ -414,9 +422,9 @@ static int snd_usb_hw_free(struct snd_pcm_substream *substream)
+ 	subs->cur_audiofmt = NULL;
+ 	subs->cur_rate = 0;
+ 	subs->period_bytes = 0;
+-	mutex_lock(&subs->stream->chip->shutdown_mutex);
++	down_read(&subs->stream->chip->shutdown_rwsem);
+ 	snd_usb_release_substream_urbs(subs, 0);
+-	mutex_unlock(&subs->stream->chip->shutdown_mutex);
++	up_read(&subs->stream->chip->shutdown_rwsem);
+ 	return snd_pcm_lib_free_vmalloc_buffer(substream);
+ }
+ 
+@@ -429,12 +437,18 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream)
+ {
+ 	struct snd_pcm_runtime *runtime = substream->runtime;
+ 	struct snd_usb_substream *subs = runtime->private_data;
++	int ret = 0;
+ 
+ 	if (! subs->cur_audiofmt) {
+ 		snd_printk(KERN_ERR "usbaudio: no format is specified!\n");
+ 		return -ENXIO;
+ 	}
+ 
++	down_read(&subs->stream->chip->shutdown_rwsem);
++	if (subs->stream->chip->shutdown) {
++		ret = -ENODEV;
++		goto unlock;
++	}
+ 	/* some unit conversions in runtime */
+ 	subs->maxframesize = bytes_to_frames(runtime, subs->maxpacksize);
+ 	subs->curframesize = bytes_to_frames(runtime, subs->curpacksize);
+@@ -447,7 +461,10 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream)
+ 	subs->last_frame_number = 0;
+ 	runtime->delay = 0;
+ 
+-	return snd_usb_substream_prepare(subs, runtime);
++	ret = snd_usb_substream_prepare(subs, runtime);
++ unlock:
++	up_read(&subs->stream->chip->shutdown_rwsem);
++	return ret;
+ }
+ 
+ static struct snd_pcm_hardware snd_usb_hardware =
+@@ -500,7 +517,7 @@ static int hw_check_valid_format(struct snd_usb_substream *subs,
+ 		return 0;
+ 	}
+ 	/* check whether the period time is >= the data packet interval */
+-	if (snd_usb_get_speed(subs->dev) != USB_SPEED_FULL) {
++	if (subs->speed != USB_SPEED_FULL) {
+ 		ptime = 125 * (1 << fp->datainterval);
+ 		if (ptime > pt->max || (ptime == pt->max && pt->openmax)) {
+ 			hwc_debug("   > check: ptime %u > max %u\n", ptime, pt->max);
+@@ -776,7 +793,7 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre
+ 		return err;
+ 
+ 	param_period_time_if_needed = SNDRV_PCM_HW_PARAM_PERIOD_TIME;
+-	if (snd_usb_get_speed(subs->dev) == USB_SPEED_FULL)
++	if (subs->speed == USB_SPEED_FULL)
+ 		/* full speed devices have fixed data packet interval */
+ 		ptmin = 1000;
+ 	if (ptmin == 1000)
+diff --git a/sound/usb/proc.c b/sound/usb/proc.c
+index 961c9a2..aef03db 100644
+--- a/sound/usb/proc.c
++++ b/sound/usb/proc.c
+@@ -107,7 +107,7 @@ static void proc_dump_substream_formats(struct snd_usb_substream *subs, struct s
+ 			}
+ 			snd_iprintf(buffer, "\n");
+ 		}
+-		if (snd_usb_get_speed(subs->dev) != USB_SPEED_FULL)
++		if (subs->speed != USB_SPEED_FULL)
+ 			snd_iprintf(buffer, "    Data packet interval: %d us\n",
+ 				    125 * (1 << fp->datainterval));
+ 		// snd_iprintf(buffer, "    Max Packet Size = %d\n", fp->maxpacksize);
+@@ -128,7 +128,7 @@ static void proc_dump_substream_status(struct snd_usb_substream *subs, struct sn
+ 		snd_iprintf(buffer, "]\n");
+ 		snd_iprintf(buffer, "    Packet Size = %d\n", subs->curpacksize);
+ 		snd_iprintf(buffer, "    Momentary freq = %u Hz (%#x.%04x)\n",
+-			    snd_usb_get_speed(subs->dev) == USB_SPEED_FULL
++			    subs->speed == USB_SPEED_FULL
+ 			    ? get_full_speed_hz(subs->freqm)
+ 			    : get_high_speed_hz(subs->freqm),
+ 			    subs->freqm >> 16, subs->freqm & 0xffff);
+diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
+index 3e2b035..6c805a5 100644
+--- a/sound/usb/usbaudio.h
++++ b/sound/usb/usbaudio.h
+@@ -36,7 +36,7 @@ struct snd_usb_audio {
+ 	struct snd_card *card;
+ 	struct usb_interface *pm_intf;
+ 	u32 usb_id;
+-	struct mutex shutdown_mutex;
++	struct rw_semaphore shutdown_rwsem;
+ 	unsigned int shutdown:1;
+ 	unsigned int probing:1;
+ 	unsigned int autosuspended:1;	
diff --git a/3.2.34/cloneconfig.patch b/3.2.34/cloneconfig.patch
new file mode 100644
index 0000000..4bfb615
--- /dev/null
+++ b/3.2.34/cloneconfig.patch
@@ -0,0 +1,41 @@
+From: Andreas Gruenbacher <agruen@suse.de>
+Subject: Add ``cloneconfig'' target
+Patch-mainline: Submitted 24 Feb 2011
+
+Cloneconfig takes the first configuration it finds which appears
+to belong to the running kernel, and configures the kernel sources
+to match this configuration as closely as possible.
+
+Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
+Signed-off-by: Jeff Mahoney <jeffm@suse.com>
+---
+
+ scripts/kconfig/Makefile |   17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+--- a/scripts/kconfig/Makefile
++++ b/scripts/kconfig/Makefile
+@@ -99,6 +99,23 @@ PHONY += allnoconfig allyesconfig allmod
+ 
+ allnoconfig allyesconfig allmodconfig alldefconfig randconfig: $(obj)/conf
+ 	$< --$@ $(Kconfig)
++ 
++UNAME_RELEASE := $(shell uname -r)
++CLONECONFIG := $(firstword $(wildcard /proc/config.gz \
++				      /lib/modules/$(UNAME_RELEASE)/.config \
++				      /etc/kernel-config \
++				      /boot/config-$(UNAME_RELEASE)))
++cloneconfig: $(obj)/conf
++	$(Q)case "$(CLONECONFIG)" in				\
++	'')	echo -e "The configuration of the running"	\
++			"kernel could not be determined\n";	\
++		false ;;					\
++	*.gz)	gzip -cd $(CLONECONFIG) > .config.running ;;	\
++	*)	cat $(CLONECONFIG) > .config.running ;;		\
++	esac &&							\
++	echo -e "Cloning configuration file $(CLONECONFIG)\n"
++	$(Q)$< --defconfig=.config.running arch/$(SRCARCH)/Kconfig
++
+ 
+ PHONY += listnewconfig oldnoconfig savedefconfig defconfig
+ 
diff --git a/3.2.34/colored-printk-3.2.33.patch b/3.2.34/colored-printk-3.2.33.patch
new file mode 100644
index 0000000..574f706
--- /dev/null
+++ b/3.2.34/colored-printk-3.2.33.patch
@@ -0,0 +1,337 @@
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/arch/x86/kernel/early_printk.c linux-2.6.29.3-cprintk/arch/x86/kernel/early_printk.c
+--- a/arch/x86/kernel/early_printk.c	2009-03-24 00:12:14.000000000 +0100
++++ b/arch/x86/kernel/early_printk.c	2009-05-09 16:10:36.000000000 +0200
+@@ -23,7 +23,8 @@
+ static int max_ypos = 25, max_xpos = 80;
+ static int current_ypos = 25, current_xpos;
+ 
+-static void early_vga_write(struct console *con, const char *str, unsigned n)
++static void early_vga_write(struct console *con, const char *str, unsigned n,
++                            unsigned int loglevel)
+ {
+ 	char c;
+ 	int  i, k, j;
+@@ -93,7 +94,8 @@ static int early_serial_putc(unsigned ch
+ 	return timeout ? 0 : -1;
+ }
+ 
+-static void early_serial_write(struct console *con, const char *s, unsigned n)
++static void early_serial_write(struct console *con, const char *s, unsigned n,
++                               unsigned int loglevel)
+ {
+ 	while (*s && n-- > 0) {
+ 		if (*s == '\n')
+@@ -887,7 +889,7 @@ asmlinkage void early_printk(const char 
+ 
+ 	va_start(ap, fmt);
+ 	n = vscnprintf(buf, sizeof(buf), fmt, ap);
+-	early_console->write(early_console, buf, n);
++	early_console->write(early_console, buf, n, 0);
+ 	va_end(ap);
+ }
+ 
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/drivers/char/Kconfig linux-2.6.29.3-cprintk/drivers/tty/Kconfig
+--- a/drivers/char/Kconfig	2009-03-24 00:12:14.000000000 +0100
++++ b/drivers/tty/Kconfig	2009-05-09 14:43:48.000000000 +0200
+@@ -66,6 +66,111 @@ config VT_CONSOLE
+ 
+ 	  If unsure, say Y.
+ 
++menuconfig VT_CKO
++	bool "Colored kernel message output"
++	depends on VT_CONSOLE
++	---help---
++	  This option enables kernel messages to be emitted in
++	  colors other than the default.
++
++	  The color value you need to enter is composed (OR-ed)
++	  of a foreground and a background color.
++
++	  Foreground:
++	  0x00 = black,   0x08 = dark gray,
++	  0x01 = red,     0x09 = light red,
++	  0x02 = green,   0x0A = light green,
++	  0x03 = brown,   0x0B = yellow,
++	  0x04 = blue,    0x0C = light blue,
++	  0x05 = magenta, 0x0D = light magenta,
++	  0x06 = cyan,    0x0E = light cyan,
++	  0x07 = gray,    0x0F = white,
++
++	  (Foreground colors 0x08 to 0x0F do not work when a VGA
++	  console font with 512 glyphs is used.)
++
++	  Background:
++	  0x00 = black,   0x40 = blue,
++	  0x10 = red,     0x50 = magenta,
++	  0x20 = green,   0x60 = cyan,
++	  0x30 = brown,   0x70 = gray,
++
++	  For example, 0x1F would yield white on red.
++
++	  If unsure, say N.
++
++config VT_PRINTK_EMERG_COLOR
++	hex "Emergency messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel emergency messages will
++	  be printed to the console.
++
++config VT_PRINTK_ALERT_COLOR
++	hex "Alert messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel alert messages will
++	  be printed to the console.
++
++config VT_PRINTK_CRIT_COLOR
++	hex "Critical messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel critical messages will
++	  be printed to the console.
++
++config VT_PRINTK_ERR_COLOR
++	hex "Error messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel error messages will
++	  be printed to the console.
++
++config VT_PRINTK_WARNING_COLOR
++	hex "Warning messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel warning messages will
++	  be printed to the console.
++
++config VT_PRINTK_NOTICE_COLOR
++	hex "Notice messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel notice messages will
++	  be printed to the console.
++
++config VT_PRINTK_INFO_COLOR
++	hex "Information messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel information messages will
++	  be printed to the console.
++
++config VT_PRINTK_DEBUG_COLOR
++	hex "Debug messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel debug messages will
++	  be printed to the console.
++
+ config HW_CONSOLE
+ 	bool
+ 	depends on VT && !S390 && !UML
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/drivers/char/vt.c linux-2.6.29.3-cprintk/drivers/tty/vt/vt.c
+--- a/drivers/char/vt.c	2009-05-09 10:46:57.000000000 +0200
++++ b/drivers/tty/vt/vt.c	2009-05-09 14:43:48.000000000 +0200
+@@ -73,6 +73,7 @@
+  */
+ 
+ #include <linux/module.h>
++#include <linux/moduleparam.h>
+ #include <linux/types.h>
+ #include <linux/sched.h>
+ #include <linux/tty.h>
+@@ -2431,17 +2432,45 @@ struct tty_driver *console_driver;
+ 
+ #ifdef CONFIG_VT_CONSOLE
+ 
++#ifdef CONFIG_VT_CKO 
++static unsigned int printk_color[8] __read_mostly = {
++	CONFIG_VT_PRINTK_EMERG_COLOR,	/* KERN_EMERG */
++	CONFIG_VT_PRINTK_ALERT_COLOR,	/* KERN_ALERT */
++	CONFIG_VT_PRINTK_CRIT_COLOR,	/* KERN_CRIT */
++	CONFIG_VT_PRINTK_ERR_COLOR,	/* KERN_ERR */
++	CONFIG_VT_PRINTK_WARNING_COLOR,	/* KERN_WARNING */
++	CONFIG_VT_PRINTK_NOTICE_COLOR,	/* KERN_NOTICE */
++	CONFIG_VT_PRINTK_INFO_COLOR,	/* KERN_INFO */
++	CONFIG_VT_PRINTK_DEBUG_COLOR,	/* KERN_DEBUG */
++};
++module_param_array(printk_color, uint, NULL, S_IRUGO | S_IWUSR);
++
++static inline void vc_set_color(struct vc_data *vc, unsigned char color)
++{
++	vc->vc_color = color_table[color & 0xF] |
++	               (color_table[(color >> 4) & 0x7] << 4) |
++	               (color & 0x80);
++	update_attr(vc);
++}
++#else
++static unsigned int printk_color[8];
++static inline void vc_set_color(const struct vc_data *vc, unsigned char c)
++{
++}
++#endif
++
+ /*
+  *	Console on virtual terminal
+  *
+  * The console must be locked when we get here.
+  */
+ 
+-static void vt_console_print(struct console *co, const char *b, unsigned count)
++static void vt_console_print(struct console *co, const char *b, unsigned count,
++                             unsigned int loglevel)
+ {
+ 	struct vc_data *vc = vc_cons[fg_console].d;
+-	unsigned char c;
+ 	static DEFINE_SPINLOCK(printing_lock);
++	unsigned char current_color, c;
+ 	const ushort *start;
+ 	ushort cnt = 0;
+ 	ushort myx;
+@@ -2474,11 +2503,19 @@ static void vt_console_print(struct cons
+ 
+ 	start = (ushort *)vc->vc_pos;
+ 
++	/*
++	 * We always get a valid loglevel - <8> and "no level" is transformed
++	 * to <4> in the typical kernel.
++	 */
++	current_color = printk_color[loglevel];
++	vc_set_color(vc, current_color);
++
+ 	/* Contrived structure to try to emulate original need_wrap behaviour
+ 	 * Problems caused when we have need_wrap set on '\n' character */
+ 	while (count--) {
+ 		c = *b++;
+ 		if (c == 10 || c == 13 || c == 8 || vc->vc_need_wrap) {
++			vc_set_color(vc, vc->vc_def_color);
+ 			if (cnt > 0) {
+ 				if (CON_IS_VISIBLE(vc))
+ 					vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x);
+@@ -2491,6 +2528,7 @@ static void vt_console_print(struct cons
+ 				bs(vc);
+ 				start = (ushort *)vc->vc_pos;
+ 				myx = vc->vc_x;
++				vc_set_color(vc, current_color);
+ 				continue;
+ 			}
+ 			if (c != 13)
+@@ -2498,6 +2536,7 @@ static void vt_console_print(struct cons
+ 			cr(vc);
+ 			start = (ushort *)vc->vc_pos;
+ 			myx = vc->vc_x;
++			vc_set_color(vc, current_color);
+ 			if (c == 10 || c == 13)
+ 				continue;
+ 		}
+@@ -2520,6 +2559,7 @@ static void vt_console_print(struct cons
+ 			vc->vc_need_wrap = 1;
+ 		}
+ 	}
++	vc_set_color(vc, vc->vc_def_color);
+ 	set_cursor(vc);
+ 	notify_update(vc);
+ 
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/drivers/net/netconsole.c linux-2.6.29.3-cprintk/drivers/net/netconsole.c
+--- a/drivers/net/netconsole.c	2009-03-24 00:12:14.000000000 +0100
++++ b/drivers/net/netconsole.c	2009-05-09 14:43:48.000000000 +0200
+@@ -691,7 +691,8 @@ static struct notifier_block netconsole_
+ 	.notifier_call  = netconsole_netdev_event,
+ };
+ 
+-static void write_msg(struct console *con, const char *msg, unsigned int len)
++static void write_msg(struct console *con, const char *msg, unsigned int len,
++    unsigned int loglevel)
+ {
+ 	int frag, left;
+ 	unsigned long flags;
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/drivers/serial/8250.c linux-2.6.29.3-cprintk/drivers/tty/serial/8250.c
+--- a/drivers/serial/8250.c	2009-03-24 00:12:14.000000000 +0100
++++ b/drivers/tty/serial/8250.c	2009-05-09 14:43:48.000000000 +0200
+@@ -2698,7 +2698,8 @@ static void serial8250_console_putchar(s
+  *	The console_lock must be held when we get here.
+  */
+ static void
+-serial8250_console_write(struct console *co, const char *s, unsigned int count)
++serial8250_console_write(struct console *co, const char *s, unsigned int count,
++                         unsigned int loglevel)
+ {
+ 	struct uart_8250_port *up = &serial8250_ports[co->index];
+ 	unsigned long flags;
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/drivers/serial/8250_early.c linux-2.6.29.3-cprintk/drivers/tty/serial/8250_early.c
+--- a/drivers/serial/8250_early.c	2009-03-24 00:12:14.000000000 +0100
++++ b/drivers/tty/serial/8250_early.c	2009-05-09 14:43:48.000000000 +0200
+@@ -83,7 +83,7 @@ static void __init serial_putc(struct ua
+ }
+ 
+ static void __init early_serial8250_write(struct console *console,
+-					const char *s, unsigned int count)
++    const char *s, unsigned int count, unsigned int loglevel)
+ {
+ 	struct uart_port *port = &early_device.port;
+ 	unsigned int ier;
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/include/linux/console.h linux-2.6.29.3-cprintk/include/linux/console.h
+--- a/include/linux/console.h	2009-03-24 00:12:14.000000000 +0100
++++ b/include/linux/console.h	2009-05-09 14:43:48.000000000 +0200
+@@ -95,7 +95,7 @@ void give_up_console(const struct consw 
+ 
+ struct console {
+ 	char	name[16];
+-	void	(*write)(struct console *, const char *, unsigned);
++	void	(*write)(struct console *, const char *, unsigned, unsigned int);
+ 	int	(*read)(struct console *, char *, unsigned);
+ 	struct tty_driver *(*device)(struct console *, int *);
+ 	void	(*unblank)(void);
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/kernel/printk.c linux-2.6.29.3-cprintk/kernel/printk.c
+--- a/kernel/printk.c	2009-03-24 00:12:14.000000000 +0100
++++ b/kernel/printk.c	2009-05-09 14:43:48.000000000 +0200
+@@ -389,7 +389,8 @@ SYSCALL_DEFINE3(syslog, int, type, char 
+ /*
+  * Call the console drivers on a range of log_buf
+  */
+-static void __call_console_drivers(unsigned start, unsigned end)
++static void __call_console_drivers(unsigned start, unsigned end,
++    unsigned int loglevel)
+ {
+ 	struct console *con;
+ 
+@@ -397,7 +398,7 @@ static void __call_console_drivers(unsig
+ 		if ((con->flags & CON_ENABLED) && con->write &&
+ 				(cpu_online(smp_processor_id()) ||
+ 				(con->flags & CON_ANYTIME)))
+-			con->write(con, &LOG_BUF(start), end - start);
++			con->write(con, &LOG_BUF(start), end - start, loglevel);
+ 	}
+ }
+ 
+@@ -424,10 +425,11 @@ static void _call_console_drivers(unsign
+ 		if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
+ 			/* wrapped write */
+ 			__call_console_drivers(start & LOG_BUF_MASK,
+-						log_buf_len);
+-			__call_console_drivers(0, end & LOG_BUF_MASK);
++						log_buf_len, msg_log_level);
++			__call_console_drivers(0, end & LOG_BUF_MASK,
++			                       msg_log_level);
+ 		} else {
+-			__call_console_drivers(start, end);
++			__call_console_drivers(start, end, msg_log_level);
+ 		}
+ 	}
+ }
diff --git a/3.2.34/hz-432-kconfig-option.patch b/3.2.34/hz-432-kconfig-option.patch
new file mode 100644
index 0000000..2fe9a4f
--- /dev/null
+++ b/3.2.34/hz-432-kconfig-option.patch
@@ -0,0 +1,25 @@
+diff -urN oldtree/kernel/Kconfig.hz newtree/kernel/Kconfig.hz
+--- oldtree/kernel/Kconfig.hz	2007-03-06 15:00:55.000000000 -0500
++++ newtree/kernel/Kconfig.hz	2007-03-06 17:52:36.000000000 -0500
+@@ -39,6 +39,14 @@
+ 	 on SMP and NUMA systems and exactly dividing by both PAL and
+ 	 NTSC frame rates for video and multimedia work.
+ 
++	config HZ_432
++	  bool "432 HZ"
++	help
++	  432 HZ is the best value for desktop systems. Most responsive
++	  out of all the options. This is for Dual Core/Processor systems only.
++	  as timer frequencies * number of processors = actual frequency.
++	  Try this if you have a dual-core/dual processor system.
++
+ 	config HZ_1000
+ 		bool "1000 HZ"
+ 	help
+@@ -52,5 +60,6 @@
+ 	default 100 if HZ_100
+ 	default 250 if HZ_250_NODEFAULT
+ 	default 300 if HZ_300
++	default 432 if HZ_432
+ 	default 1000 if HZ_1000
+ 
diff --git a/3.2.34/hz-864-kconfig-option.patch b/3.2.34/hz-864-kconfig-option.patch
new file mode 100644
index 0000000..6bdca04
--- /dev/null
+++ b/3.2.34/hz-864-kconfig-option.patch
@@ -0,0 +1,25 @@
+diff -urN oldtree/kernel/Kconfig.hz newtree/kernel/Kconfig.hz
+--- oldtree/kernel/Kconfig.hz	2007-03-06 15:00:55.000000000 -0500
++++ newtree/kernel/Kconfig.hz	2007-03-06 17:52:36.000000000 -0500
+@@ -39,6 +39,14 @@
+ 	  as timer frequencies * number of processors = actual frequency.
+ 	  Try this if you have a dual-core/dual processor system.
+ 
++	config HZ_864
++	  bool "864 HZ"
++	help
++	  864 HZ is the best value for desktop systems. Most responsive
++	  out of all the options. The only reason it is not default is
++	  because it may break few drivers. Give it a try if you have
++	  a desktop :).
++
+ 	config HZ_1000
+ 		bool "1000 HZ"
+ 	help
+@@ -52,5 +60,6 @@
+ 	default 250 if HZ_250_NODEFAULT
+ 	default 300 if HZ_300
+ 	default 432 if HZ_432
++	default 864 if HZ_864
+ 	default 1000 if HZ_1000
+ 
diff --git a/3.2.34/imqmq-3.2.patch b/3.2.34/imqmq-3.2.patch
new file mode 100644
index 0000000..678869b
--- /dev/null
+++ b/3.2.34/imqmq-3.2.patch
@@ -0,0 +1,1603 @@
+diff -uNr linux-3.2.0-go.orig//drivers/net/imq.c linux-3.2.0-go/drivers/net/imq.c
+--- linux-3.2.0-go.orig//drivers/net/imq.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-go/drivers/net/imq.c	2012-01-16 18:54:18.592086804 +0100
+@@ -0,0 +1,850 @@
++/*
++ *             Pseudo-driver for the intermediate queue device.
++ *
++ *             This program is free software; you can redistribute it and/or
++ *             modify it under the terms of the GNU General Public License
++ *             as published by the Free Software Foundation; either version
++ *             2 of the License, or (at your option) any later version.
++ *
++ * Authors:    Patrick McHardy, <kaber@trash.net>
++ *
++ *            The first version was written by Martin Devera, <devik@cdi.cz>
++ *
++ * Credits:    Jan Rafaj <imq2t@cedric.vabo.cz>
++ *              - Update patch to 2.4.21
++ *             Sebastian Strollo <sstrollo@nortelnetworks.com>
++ *              - Fix "Dead-loop on netdevice imq"-issue
++ *             Marcel Sebek <sebek64@post.cz>
++ *              - Update to 2.6.2-rc1
++ *
++ *	       After some time of inactivity there is a group taking care
++ *	       of IMQ again: http://www.linuximq.net
++ *
++ *
++ *	       2004/06/30 - New version of IMQ patch to kernels <=2.6.7
++ *             including the following changes:
++ *
++ *	       - Correction of ipv6 support "+"s issue (Hasso Tepper)
++ *	       - Correction of imq_init_devs() issue that resulted in
++ *	       kernel OOPS unloading IMQ as module (Norbert Buchmuller)
++ *	       - Addition of functionality to choose number of IMQ devices
++ *	       during kernel config (Andre Correa)
++ *	       - Addition of functionality to choose how IMQ hooks on
++ *	       PRE and POSTROUTING (after or before NAT) (Andre Correa)
++ *	       - Cosmetic corrections (Norbert Buchmuller) (Andre Correa)
++ *
++ *
++ *             2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were
++ *             released with almost no problems. 2.6.14-x was released
++ *             with some important changes: nfcache was removed; After
++ *             some weeks of trouble we figured out that some IMQ fields
++ *             in skb were missing in skbuff.c - skb_clone and copy_skb_header.
++ *             These functions are correctly patched by this new patch version.
++ *
++ *             Thanks for all who helped to figure out all the problems with
++ *             2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX,
++ *             Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully
++ *             I didn't forget anybody). I apologize again for my lack of time.
++ *
++ *
++ *             2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead
++ *             of qdisc_restart() and moved qdisc_run() to tasklet to avoid
++ *             recursive locking. New initialization routines to fix 'rmmod' not
++ *             working anymore. Used code from ifb.c. (Jussi Kivilinna)
++ *
++ *             2008/08/06 - 2.6.26 - (JK)
++ *              - Replaced tasklet with 'netif_schedule()'.
++ *              - Cleaned up and added comments for imq_nf_queue().
++ *
++ *             2009/04/12
++ *              - Add skb_save_cb/skb_restore_cb helper functions for backuping
++ *                control buffer. This is needed because qdisc-layer on kernels
++ *                2.6.27 and newer overwrite control buffer. (Jussi Kivilinna)
++ *              - Add better locking for IMQ device. Hopefully this will solve
++ *                SMP issues. (Jussi Kivilinna)
++ *              - Port to 2.6.27
++ *              - Port to 2.6.28
++ *              - Port to 2.6.29 + fix rmmod not working
++ *
++ *             2009/04/20 - (Jussi Kivilinna)
++ *              - Use netdevice feature flags to avoid extra packet handling
++ *                by core networking layer and possibly increase performance.
++ *
++ *             2009/09/26 - (Jussi Kivilinna)
++ *              - Add imq_nf_reinject_lockless to fix deadlock with
++ *                imq_nf_queue/imq_nf_reinject.
++ *
++ *             2009/12/08 - (Jussi Kivilinna)
++ *              - Port to 2.6.32
++ *              - Add check for skb->nf_queue_entry==NULL in imq_dev_xmit()
++ *              - Also add better error checking for skb->nf_queue_entry usage
++ *
++ *             2010/02/25 - (Jussi Kivilinna)
++ *              - Port to 2.6.33
++ *
++ *             2010/08/15 - (Jussi Kivilinna)
++ *              - Port to 2.6.35
++ *              - Simplify hook registration by using nf_register_hooks.
++ *              - nf_reinject doesn't need spinlock around it, therefore remove
++ *                imq_nf_reinject function. Other nf_reinject users protect
++ *                their own data with spinlock. With IMQ however all data is
++ *                needed is stored per skbuff, so no locking is needed.
++ *              - Changed IMQ to use 'separate' NF_IMQ_QUEUE instead of
++ *                NF_QUEUE, this allows working coexistance of IMQ and other
++ *                NF_QUEUE users.
++ *              - Make IMQ multi-queue. Number of IMQ device queues can be
++ *                increased with 'numqueues' module parameters. Default number
++ *                of queues is 1, in other words by default IMQ works as
++ *                single-queue device. Multi-queue selection is based on
++ *                IFB multi-queue patch by Changli Gao <xiaosuo@gmail.com>.
++ *
++ *             2011/03/18 - (Jussi Kivilinna)
++ *              - Port to 2.6.38
++ *
++ *             2011/07/12 - (syoder89@gmail.com)
++ *              - Crash fix that happens when the receiving interface has more
++ *                than one queue (add missing skb_set_queue_mapping in
++ *                imq_select_queue).
++ *
++ *             2011/07/26 - (Jussi Kivilinna)
++ *              - Add queue mapping checks for packets exiting IMQ.
++ *              - Port to 3.0
++ *
++ *             2011/08/16 - (Jussi Kivilinna)
++ *              - Clear IFF_TX_SKB_SHARING flag that was added for linux 3.0.2
++ *
++ *             2011/11/03 - Germano Michel <germanomichel@gmail.com>
++ *              - Fix IMQ for net namespaces
++ *
++ *             2011/11/04 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
++ *              - Port to 3.1
++ *              - Clean-up, move 'get imq device pointer by imqX name' to
++ *                separate function from imq_nf_queue().
++ *
++ *	       Also, many thanks to pablo Sebastian Greco for making the initial
++ *	       patch and to those who helped the testing.
++ *
++ *             More info at: http://www.linuximq.net/ (Andre Correa)
++ */
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/moduleparam.h>
++#include <linux/list.h>
++#include <linux/skbuff.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/rtnetlink.h>
++#include <linux/if_arp.h>
++#include <linux/netfilter.h>
++#include <linux/netfilter_ipv4.h>
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	#include <linux/netfilter_ipv6.h>
++#endif
++#include <linux/imq.h>
++#include <net/pkt_sched.h>
++#include <net/netfilter/nf_queue.h>
++#include <net/sock.h>
++#include <linux/ip.h>
++#include <linux/ipv6.h>
++#include <linux/if_vlan.h>
++#include <linux/if_pppox.h>
++#include <net/ip.h>
++#include <net/ipv6.h>
++
++static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num);
++
++static nf_hookfn imq_nf_hook;
++
++static struct nf_hook_ops imq_ops[] = {
++	{
++	/* imq_ingress_ipv4 */
++		.hook		= imq_nf_hook,
++		.owner		= THIS_MODULE,
++		.pf		= PF_INET,
++		.hooknum	= NF_INET_PRE_ROUTING,
++#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
++		.priority	= NF_IP_PRI_MANGLE + 1,
++#else
++		.priority	= NF_IP_PRI_NAT_DST + 1,
++#endif
++	},
++	{
++	/* imq_egress_ipv4 */
++		.hook		= imq_nf_hook,
++		.owner		= THIS_MODULE,
++		.pf		= PF_INET,
++		.hooknum	= NF_INET_POST_ROUTING,
++#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
++		.priority	= NF_IP_PRI_LAST,
++#else
++		.priority	= NF_IP_PRI_NAT_SRC - 1,
++#endif
++	},
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	{
++	/* imq_ingress_ipv6 */
++		.hook		= imq_nf_hook,
++		.owner		= THIS_MODULE,
++		.pf		= PF_INET6,
++		.hooknum	= NF_INET_PRE_ROUTING,
++#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
++		.priority	= NF_IP6_PRI_MANGLE + 1,
++#else
++		.priority	= NF_IP6_PRI_NAT_DST + 1,
++#endif
++	},
++	{
++	/* imq_egress_ipv6 */
++		.hook		= imq_nf_hook,
++		.owner		= THIS_MODULE,
++		.pf		= PF_INET6,
++		.hooknum	= NF_INET_POST_ROUTING,
++#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
++		.priority	= NF_IP6_PRI_LAST,
++#else
++		.priority	= NF_IP6_PRI_NAT_SRC - 1,
++#endif
++	},
++#endif
++};
++
++#if defined(CONFIG_IMQ_NUM_DEVS)
++static int numdevs = CONFIG_IMQ_NUM_DEVS;
++#else
++static int numdevs = IMQ_MAX_DEVS;
++#endif
++
++static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
++
++#define IMQ_MAX_QUEUES 32
++static int numqueues = 1;
++static u32 imq_hashrnd;
++
++static inline __be16 pppoe_proto(const struct sk_buff *skb)
++{
++	return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
++			sizeof(struct pppoe_hdr)));
++}
++
++static u16 imq_hash(struct net_device *dev, struct sk_buff *skb)
++{
++	unsigned int pull_len;
++	u16 protocol = skb->protocol;
++	u32 addr1, addr2;
++	u32 hash, ihl = 0;
++	union {
++		u16 in16[2];
++		u32 in32;
++	} ports;
++	u8 ip_proto;
++
++	pull_len = 0;
++
++recheck:
++	switch (protocol) {
++	case htons(ETH_P_8021Q): {
++		if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL))
++			goto other;
++
++		pull_len += VLAN_HLEN;
++		skb->network_header += VLAN_HLEN;
++
++		protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
++		goto recheck;
++	}
++
++	case htons(ETH_P_PPP_SES): {
++		if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL))
++			goto other;
++
++		pull_len += PPPOE_SES_HLEN;
++		skb->network_header += PPPOE_SES_HLEN;
++
++		protocol = pppoe_proto(skb);
++		goto recheck;
++	}
++
++	case htons(ETH_P_IP): {
++		const struct iphdr *iph = ip_hdr(skb);
++
++		if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr))))
++			goto other;
++
++		addr1 = iph->daddr;
++		addr2 = iph->saddr;
++
++		ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ?
++				 iph->protocol : 0;
++		ihl = ip_hdrlen(skb);
++
++		break;
++	}
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	case htons(ETH_P_IPV6): {
++		const struct ipv6hdr *iph = ipv6_hdr(skb);
++
++		if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr))))
++			goto other;
++
++		addr1 = iph->daddr.s6_addr32[3];
++		addr2 = iph->saddr.s6_addr32[3];
++		ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto);
++		if (unlikely(ihl < 0))
++			goto other;
++
++		break;
++	}
++#endif
++	default:
++other:
++		if (pull_len != 0) {
++			skb_push(skb, pull_len);
++			skb->network_header -= pull_len;
++		}
++
++		return (u16)(ntohs(protocol) % dev->real_num_tx_queues);
++	}
++
++	if (addr1 > addr2)
++		swap(addr1, addr2);
++
++	switch (ip_proto) {
++	case IPPROTO_TCP:
++	case IPPROTO_UDP:
++	case IPPROTO_DCCP:
++	case IPPROTO_ESP:
++	case IPPROTO_AH:
++	case IPPROTO_SCTP:
++	case IPPROTO_UDPLITE: {
++		if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) {
++			if (ports.in16[0] > ports.in16[1])
++				swap(ports.in16[0], ports.in16[1]);
++			break;
++		}
++		/* fall-through */
++	}
++	default:
++		ports.in32 = 0;
++		break;
++	}
++
++	if (pull_len != 0) {
++		skb_push(skb, pull_len);
++		skb->network_header -= pull_len;
++	}
++
++	hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto);
++
++	return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
++}
++
++static inline bool sk_tx_queue_recorded(struct sock *sk)
++{
++	return (sk_tx_queue_get(sk) >= 0);
++}
++
++static struct netdev_queue *imq_select_queue(struct net_device *dev,
++						struct sk_buff *skb)
++{
++	u16 queue_index = 0;
++	u32 hash;
++
++	if (likely(dev->real_num_tx_queues == 1))
++		goto out;
++
++	/* IMQ can be receiving ingress or engress packets. */
++
++	/* Check first for if rx_queue is set */
++	if (skb_rx_queue_recorded(skb)) {
++		queue_index = skb_get_rx_queue(skb);
++		goto out;
++	}
++
++	/* Check if socket has tx_queue set */
++	if (sk_tx_queue_recorded(skb->sk)) {
++		queue_index = sk_tx_queue_get(skb->sk);
++		goto out;
++	}
++
++	/* Try use socket hash */
++	if (skb->sk && skb->sk->sk_hash) {
++		hash = skb->sk->sk_hash;
++		queue_index =
++			(u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
++		goto out;
++	}
++
++	/* Generate hash from packet data */
++	queue_index = imq_hash(dev, skb);
++
++out:
++	if (unlikely(queue_index >= dev->real_num_tx_queues))
++		queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues);
++
++	skb_set_queue_mapping(skb, queue_index);
++	return netdev_get_tx_queue(dev, queue_index);
++}
++
++static struct net_device_stats *imq_get_stats(struct net_device *dev)
++{
++	return &dev->stats;
++}
++
++/* called for packets kfree'd in qdiscs at places other than enqueue */
++static void imq_skb_destructor(struct sk_buff *skb)
++{
++	struct nf_queue_entry *entry = skb->nf_queue_entry;
++
++	skb->nf_queue_entry = NULL;
++
++	if (entry) {
++		nf_queue_entry_release_refs(entry);
++		kfree(entry);
++	}
++
++	skb_restore_cb(skb); /* kfree backup */
++}
++
++static void imq_done_check_queue_mapping(struct sk_buff *skb,
++					 struct net_device *dev)
++{
++	unsigned int queue_index;
++
++	/* Don't let queue_mapping be left too large after exiting IMQ */
++	if (likely(skb->dev != dev && skb->dev != NULL)) {
++		queue_index = skb_get_queue_mapping(skb);
++		if (unlikely(queue_index >= skb->dev->real_num_tx_queues)) {
++			queue_index = (u16)((u32)queue_index %
++						skb->dev->real_num_tx_queues);
++			skb_set_queue_mapping(skb, queue_index);
++		}
++	} else {
++		/* skb->dev was IMQ device itself or NULL, be on safe side and
++		 * just clear queue mapping.
++		 */
++		skb_set_queue_mapping(skb, 0);
++	}
++}
++
++static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	struct nf_queue_entry *entry = skb->nf_queue_entry;
++
++	skb->nf_queue_entry = NULL;
++	dev->trans_start = jiffies;
++
++	dev->stats.tx_bytes += skb->len;
++	dev->stats.tx_packets++;
++
++	if (unlikely(entry == NULL)) {
++		/* We don't know what is going on here.. packet is queued for
++		 * imq device, but (probably) not by us.
++		 *
++		 * If this packet was not send here by imq_nf_queue(), then
++		 * skb_save_cb() was not used and skb_free() should not show:
++		 *   WARNING: IMQ: kfree_skb: skb->cb_next:..
++		 * and/or
++		 *   WARNING: IMQ: kfree_skb: skb->nf_queue_entry...
++		 *
++		 * However if this message is shown, then IMQ is somehow broken
++		 * and you should report this to linuximq.net.
++		 */
++
++		/* imq_dev_xmit is black hole that eats all packets, report that
++		 * we eat this packet happily and increase dropped counters.
++		 */
++
++		dev->stats.tx_dropped++;
++		dev_kfree_skb(skb);
++
++		return NETDEV_TX_OK;
++	}
++
++	skb_restore_cb(skb); /* restore skb->cb */
++
++	skb->imq_flags = 0;
++	skb->destructor = NULL;
++
++	imq_done_check_queue_mapping(skb, dev);
++
++	nf_reinject(entry, NF_ACCEPT);
++
++	return NETDEV_TX_OK;
++}
++
++static struct net_device *get_imq_device_by_index(int index)
++{
++	struct net_device *dev = NULL;
++	struct net *net;
++	char buf[8];
++
++	/* get device by name and cache result */
++	snprintf(buf, sizeof(buf), "imq%d", index);
++
++	/* Search device from all namespaces. */
++	for_each_net(net) {
++		dev = dev_get_by_name(net, buf);
++		if (dev)
++			break;
++	}
++
++	if (WARN_ON_ONCE(dev == NULL)) {
++		/* IMQ device not found. Exotic config? */
++		return ERR_PTR(-ENODEV);
++	}
++
++	imq_devs_cache[index] = dev;
++	dev_put(dev);
++
++	return dev;
++}
++
++static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
++{
++	struct net_device *dev;
++	struct sk_buff *skb_orig, *skb, *skb_shared;
++	struct Qdisc *q;
++	struct netdev_queue *txq;
++	spinlock_t *root_lock;
++	int users, index;
++	int retval = -EINVAL;
++	unsigned int orig_queue_index;
++
++	index = entry->skb->imq_flags & IMQ_F_IFMASK;
++	if (unlikely(index > numdevs - 1)) {
++		if (net_ratelimit())
++			printk(KERN_WARNING
++			       "IMQ: invalid device specified, highest is %u\n",
++			       numdevs - 1);
++		retval = -EINVAL;
++		goto out;
++	}
++
++	/* check for imq device by index from cache */
++	dev = imq_devs_cache[index];
++	if (unlikely(!dev)) {
++		dev = get_imq_device_by_index(index);
++		if (IS_ERR(dev)) {
++			retval = PTR_ERR(dev);
++			goto out;
++		}
++	}
++
++	if (unlikely(!(dev->flags & IFF_UP))) {
++		entry->skb->imq_flags = 0;
++		nf_reinject(entry, NF_ACCEPT);
++		retval = 0;
++		goto out;
++	}
++	dev->last_rx = jiffies;
++
++	skb = entry->skb;
++	skb_orig = NULL;
++
++	/* skb has owner? => make clone */
++	if (unlikely(skb->destructor)) {
++		skb_orig = skb;
++		skb = skb_clone(skb, GFP_ATOMIC);
++		if (unlikely(!skb)) {
++			retval = -ENOMEM;
++			goto out;
++		}
++		entry->skb = skb;
++	}
++
++	skb->nf_queue_entry = entry;
++
++	dev->stats.rx_bytes += skb->len;
++	dev->stats.rx_packets++;
++
++	if (!skb->dev) {
++		/* skb->dev == NULL causes problems, try the find cause. */
++		if (net_ratelimit()) {
++			dev_warn(&dev->dev,
++				 "received packet with skb->dev == NULL\n");
++			dump_stack();
++		}
++
++		skb->dev = dev;
++	}
++
++	/* Disables softirqs for lock below */
++	rcu_read_lock_bh();
++
++	/* Multi-queue selection */
++	orig_queue_index = skb_get_queue_mapping(skb);
++	txq = imq_select_queue(dev, skb);
++
++	q = rcu_dereference(txq->qdisc);
++	if (unlikely(!q->enqueue))
++		goto packet_not_eaten_by_imq_dev;
++
++	root_lock = qdisc_lock(q);
++	spin_lock(root_lock);
++
++	users = atomic_read(&skb->users);
++
++	skb_shared = skb_get(skb); /* increase reference count by one */
++	skb_save_cb(skb_shared); /* backup skb->cb, as qdisc layer will
++					overwrite it */
++	qdisc_enqueue_root(skb_shared, q); /* might kfree_skb */
++
++	if (likely(atomic_read(&skb_shared->users) == users + 1)) {
++		kfree_skb(skb_shared); /* decrease reference count by one */
++
++		skb->destructor = &imq_skb_destructor;
++
++		/* cloned? */
++		if (unlikely(skb_orig))
++			kfree_skb(skb_orig); /* free original */
++
++		spin_unlock(root_lock);
++		rcu_read_unlock_bh();
++
++		/* schedule qdisc dequeue */
++		__netif_schedule(q);
++
++		retval = 0;
++		goto out;
++	} else {
++		skb_restore_cb(skb_shared); /* restore skb->cb */
++		skb->nf_queue_entry = NULL;
++		/* qdisc dropped packet and decreased skb reference count of
++		 * skb, so we don't really want to and try refree as that would
++		 * actually destroy the skb. */
++		spin_unlock(root_lock);
++		goto packet_not_eaten_by_imq_dev;
++	}
++
++packet_not_eaten_by_imq_dev:
++	skb_set_queue_mapping(skb, orig_queue_index);
++	rcu_read_unlock_bh();
++
++	/* cloned? restore original */
++	if (unlikely(skb_orig)) {
++		kfree_skb(skb);
++		entry->skb = skb_orig;
++	}
++	retval = -1;
++out:
++	return retval;
++}
++
++static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb,
++				const struct net_device *indev,
++				const struct net_device *outdev,
++				int (*okfn)(struct sk_buff *))
++{
++	return (pskb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT;
++}
++
++static int imq_close(struct net_device *dev)
++{
++	netif_stop_queue(dev);
++	return 0;
++}
++
++static int imq_open(struct net_device *dev)
++{
++	netif_start_queue(dev);
++	return 0;
++}
++
++static const struct net_device_ops imq_netdev_ops = {
++	.ndo_open		= imq_open,
++	.ndo_stop		= imq_close,
++	.ndo_start_xmit		= imq_dev_xmit,
++	.ndo_get_stats		= imq_get_stats,
++};
++
++static void imq_setup(struct net_device *dev)
++{
++	dev->netdev_ops		= &imq_netdev_ops;
++	dev->type		= ARPHRD_VOID;
++	dev->mtu		= 16000; /* too small? */
++	dev->tx_queue_len	= 11000; /* too big? */
++	dev->flags		= IFF_NOARP;
++	dev->features		= NETIF_F_SG | NETIF_F_FRAGLIST |
++				  NETIF_F_GSO | NETIF_F_HW_CSUM |
++				  NETIF_F_HIGHDMA;
++	dev->priv_flags		&= ~(IFF_XMIT_DST_RELEASE |
++				     IFF_TX_SKB_SHARING);
++}
++
++static int imq_validate(struct nlattr *tb[], struct nlattr *data[])
++{
++	int ret = 0;
++
++	if (tb[IFLA_ADDRESS]) {
++		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
++			ret = -EINVAL;
++			goto end;
++		}
++		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
++			ret = -EADDRNOTAVAIL;
++			goto end;
++		}
++	}
++	return 0;
++end:
++	printk(KERN_WARNING "IMQ: imq_validate failed (%d)\n", ret);
++	return ret;
++}
++
++static struct rtnl_link_ops imq_link_ops __read_mostly = {
++	.kind		= "imq",
++	.priv_size	= 0,
++	.setup		= imq_setup,
++	.validate	= imq_validate,
++};
++
++static const struct nf_queue_handler imq_nfqh = {
++	.name  = "imq",
++	.outfn = imq_nf_queue,
++};
++
++static int __init imq_init_hooks(void)
++{
++	int ret;
++
++	nf_register_queue_imq_handler(&imq_nfqh);
++
++	ret = nf_register_hooks(imq_ops, ARRAY_SIZE(imq_ops));
++	if (ret < 0)
++		nf_unregister_queue_imq_handler();
++
++	return ret;
++}
++
++static int __init imq_init_one(int index)
++{
++	struct net_device *dev;
++	int ret;
++
++	dev = alloc_netdev_mq(0, "imq%d", imq_setup, numqueues);
++	if (!dev)
++		return -ENOMEM;
++
++	ret = dev_alloc_name(dev, dev->name);
++	if (ret < 0)
++		goto fail;
++
++	dev->rtnl_link_ops = &imq_link_ops;
++	ret = register_netdevice(dev);
++	if (ret < 0)
++		goto fail;
++
++	return 0;
++fail:
++	free_netdev(dev);
++	return ret;
++}
++
++static int __init imq_init_devs(void)
++{
++	int err, i;
++
++	if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) {
++		printk(KERN_ERR "IMQ: numdevs has to be betweed 1 and %u\n",
++		       IMQ_MAX_DEVS);
++		return -EINVAL;
++	}
++
++	if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) {
++		printk(KERN_ERR "IMQ: numqueues has to be betweed 1 and %u\n",
++		       IMQ_MAX_QUEUES);
++		return -EINVAL;
++	}
++
++	get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd));
++
++	rtnl_lock();
++	err = __rtnl_link_register(&imq_link_ops);
++
++	for (i = 0; i < numdevs && !err; i++)
++		err = imq_init_one(i);
++
++	if (err) {
++		__rtnl_link_unregister(&imq_link_ops);
++		memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
++	}
++	rtnl_unlock();
++
++	return err;
++}
++
++static int __init imq_init_module(void)
++{
++	int err;
++
++#if defined(CONFIG_IMQ_NUM_DEVS)
++	BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16);
++	BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2);
++	BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK);
++#endif
++
++	err = imq_init_devs();
++	if (err) {
++		printk(KERN_ERR "IMQ: Error trying imq_init_devs(net)\n");
++		return err;
++	}
++
++	err = imq_init_hooks();
++	if (err) {
++		printk(KERN_ERR "IMQ: Error trying imq_init_hooks()\n");
++		rtnl_link_unregister(&imq_link_ops);
++		memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
++		return err;
++	}
++
++	printk(KERN_INFO "IMQ driver loaded successfully. "
++		"(numdevs = %d, numqueues = %d)\n", numdevs, numqueues);
++
++#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
++	printk(KERN_INFO "\tHooking IMQ before NAT on PREROUTING.\n");
++#else
++	printk(KERN_INFO "\tHooking IMQ after NAT on PREROUTING.\n");
++#endif
++#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB)
++	printk(KERN_INFO "\tHooking IMQ before NAT on POSTROUTING.\n");
++#else
++	printk(KERN_INFO "\tHooking IMQ after NAT on POSTROUTING.\n");
++#endif
++
++	return 0;
++}
++
++static void __exit imq_unhook(void)
++{
++	nf_unregister_hooks(imq_ops, ARRAY_SIZE(imq_ops));
++	nf_unregister_queue_imq_handler();
++}
++
++static void __exit imq_cleanup_devs(void)
++{
++	rtnl_link_unregister(&imq_link_ops);
++	memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
++}
++
++static void __exit imq_exit_module(void)
++{
++	imq_unhook();
++	imq_cleanup_devs();
++	printk(KERN_INFO "IMQ driver unloaded successfully.\n");
++}
++
++module_init(imq_init_module);
++module_exit(imq_exit_module);
++
++module_param(numdevs, int, 0);
++module_param(numqueues, int, 0);
++MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will "
++			"be created)");
++MODULE_PARM_DESC(numqueues, "number of queues per IMQ device");
++MODULE_AUTHOR("http://www.linuximq.net");
++MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See "
++			"http://www.linuximq.net/ for more information.");
++MODULE_LICENSE("GPL");
++MODULE_ALIAS_RTNL_LINK("imq");
++
+diff -uNr linux-3.2.0-go.orig//drivers/net/Kconfig linux-3.2.0-go/drivers/net/Kconfig
+--- linux-3.2.0-go.orig//drivers/net/Kconfig	2012-01-16 18:52:00.206580353 +0100
++++ linux-3.2.0-go/drivers/net/Kconfig	2012-01-16 18:54:18.680050560 +0100
+@@ -193,6 +193,125 @@
+ 	depends on RIONET
+ 	default "128"
+ 
++config IMQ
++	tristate "IMQ (intermediate queueing device) support"
++	depends on NETDEVICES && NETFILTER
++	---help---
++	  The IMQ device(s) is used as placeholder for QoS queueing
++	  disciplines. Every packet entering/leaving the IP stack can be
++	  directed through the IMQ device where it's enqueued/dequeued to the
++	  attached qdisc. This allows you to treat network devices as classes
++	  and distribute bandwidth among them. Iptables is used to specify
++	  through which IMQ device, if any, packets travel.
++
++	  More information at: http://www.linuximq.net/
++
++	  To compile this driver as a module, choose M here: the module
++	  will be called imq.  If unsure, say N.
++
++choice
++	prompt "IMQ behavior (PRE/POSTROUTING)"
++	depends on IMQ
++	default IMQ_BEHAVIOR_AB
++	help
++	  This setting defines how IMQ behaves in respect to its
++	  hooking in PREROUTING and POSTROUTING.
++
++	  IMQ can work in any of the following ways:
++
++	      PREROUTING   |      POSTROUTING
++	  -----------------|-------------------
++	  #1  After NAT    |      After NAT
++	  #2  After NAT    |      Before NAT
++	  #3  Before NAT   |      After NAT
++	  #4  Before NAT   |      Before NAT
++
++	  The default behavior is to hook before NAT on PREROUTING
++	  and after NAT on POSTROUTING (#3).
++
++	  This settings are specially usefull when trying to use IMQ
++	  to shape NATed clients.
++
++	  More information can be found at: www.linuximq.net
++
++	  If not sure leave the default settings alone.
++
++config IMQ_BEHAVIOR_AA
++	bool "IMQ AA"
++	help
++	  This setting defines how IMQ behaves in respect to its
++	  hooking in PREROUTING and POSTROUTING.
++
++	  Choosing this option will make IMQ hook like this:
++
++	  PREROUTING:   After NAT
++	  POSTROUTING:  After NAT
++
++	  More information can be found at: www.linuximq.net
++
++	  If not sure leave the default settings alone.
++
++config IMQ_BEHAVIOR_AB
++	bool "IMQ AB"
++	help
++	  This setting defines how IMQ behaves in respect to its
++	  hooking in PREROUTING and POSTROUTING.
++
++	  Choosing this option will make IMQ hook like this:
++
++	  PREROUTING:   After NAT
++	  POSTROUTING:  Before NAT
++
++	  More information can be found at: www.linuximq.net
++
++	  If not sure leave the default settings alone.
++
++config IMQ_BEHAVIOR_BA
++	bool "IMQ BA"
++	help
++	  This setting defines how IMQ behaves in respect to its
++	  hooking in PREROUTING and POSTROUTING.
++
++	  Choosing this option will make IMQ hook like this:
++
++	  PREROUTING:   Before NAT
++	  POSTROUTING:  After NAT
++
++	  More information can be found at: www.linuximq.net
++
++	  If not sure leave the default settings alone.
++
++config IMQ_BEHAVIOR_BB
++	bool "IMQ BB"
++	help
++	  This setting defines how IMQ behaves in respect to its
++	  hooking in PREROUTING and POSTROUTING.
++
++	  Choosing this option will make IMQ hook like this:
++
++	  PREROUTING:   Before NAT
++	  POSTROUTING:  Before NAT
++
++	  More information can be found at: www.linuximq.net
++
++	  If not sure leave the default settings alone.
++
++endchoice
++
++config IMQ_NUM_DEVS
++	int "Number of IMQ devices"
++	range 2 16
++	depends on IMQ
++	default "16"
++	help
++	  This setting defines how many IMQ devices will be created.
++
++	  The default value is 16.
++
++	  More information can be found at: www.linuximq.net
++
++	  If not sure leave the default settings alone.
++
+ config TUN
+ 	tristate "Universal TUN/TAP device driver support"
+ 	select CRC32
+diff -uNr linux-3.2.0-go.orig//drivers/net/Makefile linux-3.2.0-go/drivers/net/Makefile
+--- linux-3.2.0-go.orig//drivers/net/Makefile	2012-01-16 18:52:00.345470492 +0100
++++ linux-3.2.0-go/drivers/net/Makefile	2012-01-16 18:57:33.577640398 +0100
+@@ -7,6 +7,7 @@
+ #
+ obj-$(CONFIG_BONDING) += bonding/
+ obj-$(CONFIG_DUMMY) += dummy.o
++obj-$(CONFIG_IMQ) += imq.o
+ obj-$(CONFIG_EQUALIZER) += eql.o
+ obj-$(CONFIG_IFB) += ifb.o
+ obj-$(CONFIG_MACVLAN) += macvlan.o
+diff -uNr linux-3.2.0-go.orig//include/linux/imq.h linux-3.2.0-go/include/linux/imq.h
+--- linux-3.2.0-go.orig//include/linux/imq.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-go/include/linux/imq.h	2012-01-16 18:54:18.682365396 +0100
+@@ -0,0 +1,13 @@
++#ifndef _IMQ_H
++#define _IMQ_H
++
++/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */
++#define IMQ_F_BITS	5
++
++#define IMQ_F_IFMASK	0x0f
++#define IMQ_F_ENQUEUE	0x10
++
++#define IMQ_MAX_DEVS	(IMQ_F_IFMASK + 1)
++
++#endif /* _IMQ_H */
++
+diff -uNr linux-3.2.0-go.orig//include/linux/netfilter/xt_IMQ.h linux-3.2.0-go/include/linux/netfilter/xt_IMQ.h
+--- linux-3.2.0-go.orig//include/linux/netfilter/xt_IMQ.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-go/include/linux/netfilter/xt_IMQ.h	2012-01-16 18:54:18.682365396 +0100
+@@ -0,0 +1,9 @@
++#ifndef _XT_IMQ_H
++#define _XT_IMQ_H
++
++struct xt_imq_info {
++	unsigned int todev;     /* target imq device */
++};
++
++#endif /* _XT_IMQ_H */
++
+diff -uNr linux-3.2.0-go.orig//include/linux/netfilter.h linux-3.2.0-go/include/linux/netfilter.h
+--- linux-3.2.0-go.orig//include/linux/netfilter.h	2012-01-16 18:53:45.165859627 +0100
++++ linux-3.2.0-go/include/linux/netfilter.h	2012-01-16 18:54:18.684680232 +0100
+@@ -22,7 +22,8 @@
+ #define NF_QUEUE 3
+ #define NF_REPEAT 4
+ #define NF_STOP 5
+-#define NF_MAX_VERDICT NF_STOP
++#define NF_IMQ_QUEUE 6
++#define NF_MAX_VERDICT NF_IMQ_QUEUE
+ 
+ /* we overload the higher bits for encoding auxiliary data such as the queue
+  * number or errno values. Not nice, but better than additional function
+diff -uNr linux-3.2.0-go.orig//include/linux/netfilter_ipv4/ipt_IMQ.h linux-3.2.0-go/include/linux/netfilter_ipv4/ipt_IMQ.h
+--- linux-3.2.0-go.orig//include/linux/netfilter_ipv4/ipt_IMQ.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-go/include/linux/netfilter_ipv4/ipt_IMQ.h	2012-01-16 18:54:18.686995068 +0100
+@@ -0,0 +1,10 @@
++#ifndef _IPT_IMQ_H
++#define _IPT_IMQ_H
++
++/* Backwards compatibility for old userspace */
++#include <linux/netfilter/xt_IMQ.h>
++
++#define ipt_imq_info xt_imq_info
++
++#endif /* _IPT_IMQ_H */
++
+diff -uNr linux-3.2.0-go.orig//include/linux/netfilter_ipv6/ip6t_IMQ.h linux-3.2.0-go/include/linux/netfilter_ipv6/ip6t_IMQ.h
+--- linux-3.2.0-go.orig//include/linux/netfilter_ipv6/ip6t_IMQ.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-go/include/linux/netfilter_ipv6/ip6t_IMQ.h	2012-01-16 18:54:18.686995068 +0100
+@@ -0,0 +1,10 @@
++#ifndef _IP6T_IMQ_H
++#define _IP6T_IMQ_H
++
++/* Backwards compatibility for old userspace */
++#include <linux/netfilter/xt_IMQ.h>
++
++#define ip6t_imq_info xt_imq_info
++
++#endif /* _IP6T_IMQ_H */
++
+diff -uNr linux-3.2.0-go.orig//include/linux/skbuff.h linux-3.2.0-go/include/linux/skbuff.h
+--- linux-3.2.0-go.orig//include/linux/skbuff.h	2012-01-16 18:53:43.114915216 +0100
++++ linux-3.2.0-go/include/linux/skbuff.h	2012-01-16 18:59:22.256860605 +0100
+@@ -30,6 +30,9 @@
+ #include <linux/dmaengine.h>
+ #include <linux/hrtimer.h>
+ #include <linux/dma-mapping.h>
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++#include <linux/imq.h>
++#endif
+ 
+ /* Don't change this without changing skb_csum_unnecessary! */
+ #define CHECKSUM_NONE 0
+@@ -386,6 +389,9 @@
+ 	 * first. This is owned by whoever has the skb queued ATM.
+ 	 */
+ 	char			cb[48] __aligned(8);
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++	void			*cb_next;
++#endif
+ 
+ 	unsigned long		_skb_refdst;
+ #ifdef CONFIG_XFRM
+@@ -424,6 +430,9 @@
+ #ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
+ 	struct sk_buff		*nfct_reasm;
+ #endif
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++	struct nf_queue_entry	*nf_queue_entry;
++#endif
+ #ifdef CONFIG_BRIDGE_NETFILTER
+ 	struct nf_bridge_info	*nf_bridge;
+ #endif
+@@ -449,6 +458,10 @@
+ 
+ 	/* 0/13 bit hole */
+ 
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++	__u8			imq_flags:IMQ_F_BITS;
++#endif
++
+ #ifdef CONFIG_NET_DMA
+ 	dma_cookie_t		dma_cookie;
+ #endif
+@@ -535,6 +548,12 @@
+ 	return (struct rtable *)skb_dst(skb);
+ }
+ 
++
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++extern int skb_save_cb(struct sk_buff *skb);
++extern int skb_restore_cb(struct sk_buff *skb);
++#endif
++
+ extern void kfree_skb(struct sk_buff *skb);
+ extern void consume_skb(struct sk_buff *skb);
+ extern void	       __kfree_skb(struct sk_buff *skb);
+@@ -2358,6 +2377,10 @@
+ 	dst->nfct_reasm = src->nfct_reasm;
+ 	nf_conntrack_get_reasm(src->nfct_reasm);
+ #endif
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++	dst->imq_flags = src->imq_flags;
++	dst->nf_queue_entry = src->nf_queue_entry;
++#endif
+ #ifdef CONFIG_BRIDGE_NETFILTER
+ 	dst->nf_bridge  = src->nf_bridge;
+ 	nf_bridge_get(src->nf_bridge);
+diff -uNr linux-3.2.0-go.orig//include/net/netfilter/nf_queue.h linux-3.2.0-go/include/net/netfilter/nf_queue.h
+--- linux-3.2.0-go.orig//include/net/netfilter/nf_queue.h	2012-01-16 18:53:39.024600575 +0100
++++ linux-3.2.0-go/include/net/netfilter/nf_queue.h	2012-01-16 18:54:18.703198917 +0100
+@@ -30,5 +30,11 @@
+ 				       const struct nf_queue_handler *qh);
+ extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh);
+ extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
++extern void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
++
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++extern void nf_register_queue_imq_handler(const struct nf_queue_handler *qh);
++extern void nf_unregister_queue_imq_handler(void);
++#endif
+ 
+ #endif /* _NF_QUEUE_H */
+diff -uNr linux-3.2.0-go.orig//net/core/dev.c linux-3.2.0-go/net/core/dev.c
+--- linux-3.2.0-go.orig//net/core/dev.c	2012-01-16 18:52:41.130560289 +0100
++++ linux-3.2.0-go/net/core/dev.c	2012-01-16 18:54:18.707828588 +0100
+@@ -98,6 +98,9 @@
+ #include <net/net_namespace.h>
+ #include <net/sock.h>
+ #include <linux/rtnetlink.h>
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++#include <linux/imq.h>
++#endif
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include <linux/stat.h>
+@@ -2185,7 +2188,12 @@
+ 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+ 			skb_dst_drop(skb);
+ 
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++		if (!list_empty(&ptype_all) &&
++					!(skb->imq_flags & IMQ_F_ENQUEUE))
++#else
+ 		if (!list_empty(&ptype_all))
++#endif
+ 			dev_queue_xmit_nit(skb, dev);
+ 
+ 		skb_orphan_try(skb);
+diff -uNr linux-3.2.0-go.orig//net/core/skbuff.c linux-3.2.0-go/net/core/skbuff.c
+--- linux-3.2.0-go.orig//net/core/skbuff.c	2012-01-16 18:52:41.146764138 +0100
++++ linux-3.2.0-go/net/core/skbuff.c	2012-01-16 18:54:18.710143424 +0100
+@@ -73,6 +73,9 @@
+ 
+ static struct kmem_cache *skbuff_head_cache __read_mostly;
+ static struct kmem_cache *skbuff_fclone_cache __read_mostly;
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++static struct kmem_cache *skbuff_cb_store_cache __read_mostly;
++#endif
+ 
+ static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
+ 				  struct pipe_buffer *buf)
+@@ -92,6 +95,82 @@
+ 	return 1;
+ }
+ 
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++/* Control buffer save/restore for IMQ devices */
++struct skb_cb_table {
++	char			cb[48] __aligned(8);
++	void			*cb_next;
++	atomic_t		refcnt;
++};
++
++static DEFINE_SPINLOCK(skb_cb_store_lock);
++
++int skb_save_cb(struct sk_buff *skb)
++{
++	struct skb_cb_table *next;
++
++	next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC);
++	if (!next)
++		return -ENOMEM;
++
++	BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
++
++	memcpy(next->cb, skb->cb, sizeof(skb->cb));
++	next->cb_next = skb->cb_next;
++
++	atomic_set(&next->refcnt, 1);
++
++	skb->cb_next = next;
++	return 0;
++}
++EXPORT_SYMBOL(skb_save_cb);
++
++int skb_restore_cb(struct sk_buff *skb)
++{
++	struct skb_cb_table *next;
++
++	if (!skb->cb_next)
++		return 0;
++
++	next = skb->cb_next;
++
++	BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
++
++	memcpy(skb->cb, next->cb, sizeof(skb->cb));
++	skb->cb_next = next->cb_next;
++
++	spin_lock(&skb_cb_store_lock);
++
++	if (atomic_dec_and_test(&next->refcnt))
++		kmem_cache_free(skbuff_cb_store_cache, next);
++
++	spin_unlock(&skb_cb_store_lock);
++
++	return 0;
++}
++EXPORT_SYMBOL(skb_restore_cb);
++
++static void skb_copy_stored_cb(struct sk_buff *new, const struct sk_buff *__old)
++{
++	struct skb_cb_table *next;
++	struct sk_buff *old;
++
++	if (!__old->cb_next) {
++		new->cb_next = NULL;
++		return;
++	}
++
++	spin_lock(&skb_cb_store_lock);
++
++	old = (struct sk_buff *)__old;
++
++	next = old->cb_next;
++	atomic_inc(&next->refcnt);
++	new->cb_next = next;
++
++	spin_unlock(&skb_cb_store_lock);
++}
++#endif
+ 
+ /* Pipe buffer operations for a socket. */
+ static const struct pipe_buf_operations sock_pipe_buf_ops = {
+@@ -403,6 +482,26 @@
+ 		WARN_ON(in_irq());
+ 		skb->destructor(skb);
+ 	}
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++	/* This should not happen. When it does, avoid memleak by restoring
++	the chain of cb-backups. */
++	while (skb->cb_next != NULL) {
++		if (net_ratelimit())
++			printk(KERN_WARNING "IMQ: kfree_skb: skb->cb_next: "
++				"%08x\n", (unsigned int)skb->cb_next);
++
++		skb_restore_cb(skb);
++	}
++	/* This should not happen either, nf_queue_entry is nullified in
++	 * imq_dev_xmit(). If we have non-NULL nf_queue_entry then we are
++	 * leaking entry pointers, maybe memory. We don't know if this is
++	 * pointer to already freed memory, or should this be freed.
++	 * If this happens we need to add refcounting, etc for nf_queue_entry.
++	 */
++	if (skb->nf_queue_entry && net_ratelimit())
++		printk(KERN_WARNING
++				"IMQ: kfree_skb: skb->nf_queue_entry != NULL");
++#endif
+ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ 	nf_conntrack_put(skb->nfct);
+ #endif
+@@ -547,6 +646,9 @@
+ 	new->sp			= secpath_get(old->sp);
+ #endif
+ 	memcpy(new->cb, old->cb, sizeof(old->cb));
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++	skb_copy_stored_cb(new, old);
++#endif
+ 	new->csum		= old->csum;
+ 	new->local_df		= old->local_df;
+ 	new->pkt_type		= old->pkt_type;
+@@ -2907,6 +3009,13 @@
+ 						0,
+ 						SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+ 						NULL);
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++	skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache",
++						  sizeof(struct skb_cb_table),
++						  0,
++						  SLAB_HWCACHE_ALIGN|SLAB_PANIC,
++						  NULL);
++#endif
+ }
+ 
+ /**
+diff -uNr linux-3.2.0-go.orig//net/ipv6/ip6_output.c linux-3.2.0-go/net/ipv6/ip6_output.c
+--- linux-3.2.0-go.orig//net/ipv6/ip6_output.c	2012-01-16 18:52:40.091199069 +0100
++++ linux-3.2.0-go/net/ipv6/ip6_output.c	2012-01-16 18:54:18.712458260 +0100
+@@ -102,9 +102,6 @@
+ 	struct net_device *dev = dst->dev;
+ 	struct neighbour *neigh;
+ 
+-	skb->protocol = htons(ETH_P_IPV6);
+-	skb->dev = dev;
+-
+ 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
+ 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+ 
+@@ -170,6 +167,11 @@
+ 		return 0;
+ 	}
+ 
++	/* IMQ-patch: moved setting skb->dev and skb->protocol from
++	 * ip6_finish_output2 to fix crashing at netif_skb_features(). */
++	skb->protocol = htons(ETH_P_IPV6);
++	skb->dev = dev;
++
+ 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
+ 			    ip6_finish_output,
+ 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
+diff -uNr linux-3.2.0-go.orig//net/netfilter/core.c linux-3.2.0-go/net/netfilter/core.c
+--- linux-3.2.0-go.orig//net/netfilter/core.c	2012-01-16 18:52:40.811112965 +0100
++++ linux-3.2.0-go/net/netfilter/core.c	2012-01-16 19:02:01.429591439 +0100
+@@ -179,9 +179,11 @@
+ 		ret = NF_DROP_GETERR(verdict);
+ 		if (ret == 0)
+ 			ret = -EPERM;
+-	} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
++	} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE ||
++		   (verdict & NF_VERDICT_MASK) == NF_IMQ_QUEUE) {
+ 		int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+-						verdict >> NF_VERDICT_QBITS);
++				verdict >> NF_VERDICT_QBITS,
++				verdict & NF_VERDICT_MASK);
+ 		if (err < 0) {
+ 			if (err == -ECANCELED)
+ 				goto next_hook;
+diff -uNr linux-3.2.0-go.orig//net/netfilter/Kconfig linux-3.2.0-go/net/netfilter/Kconfig
+--- linux-3.2.0-go.orig//net/netfilter/Kconfig	2012-01-16 18:52:40.595833248 +0100
++++ linux-3.2.0-go/net/netfilter/Kconfig	2012-01-16 18:54:18.714773096 +0100
+@@ -506,6 +506,18 @@
+ 	  For more information on the LEDs available on your system, see
+ 	  Documentation/leds/leds-class.txt
+ 
++config NETFILTER_XT_TARGET_IMQ
++        tristate '"IMQ" target support'
++	depends on NETFILTER_XTABLES
++	depends on IP_NF_MANGLE || IP6_NF_MANGLE
++	select IMQ
++	default m if NETFILTER_ADVANCED=n
++        help
++          This option adds a `IMQ' target which is used to specify if and
++          to which imq device packets should get enqueued/dequeued.
++
++          To compile it as a module, choose M here.  If unsure, say N.
++
+ config NETFILTER_XT_TARGET_MARK
+ 	tristate '"MARK" target support'
+ 	depends on NETFILTER_ADVANCED
+diff -uNr linux-3.2.0-go.orig//net/netfilter/Makefile linux-3.2.0-go/net/netfilter/Makefile
+--- linux-3.2.0-go.orig//net/netfilter/Makefile	2012-01-16 18:52:40.818057473 +0100
++++ linux-3.2.0-go/net/netfilter/Makefile	2012-01-16 18:54:18.714773096 +0100
+@@ -56,6 +56,7 @@
+ obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
++obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
+diff -uNr linux-3.2.0-go.orig//net/netfilter/nf_internals.h linux-3.2.0-go/net/netfilter/nf_internals.h
+--- linux-3.2.0-go.orig//net/netfilter/nf_internals.h	2012-01-16 18:52:40.598148084 +0100
++++ linux-3.2.0-go/net/netfilter/nf_internals.h	2012-01-16 18:54:18.733291780 +0100
+@@ -29,7 +29,7 @@
+ 		    struct net_device *indev,
+ 		    struct net_device *outdev,
+ 		    int (*okfn)(struct sk_buff *),
+-		    unsigned int queuenum);
++		    unsigned int queuenum, unsigned int queuetype);
+ extern int __init netfilter_queue_init(void);
+ 
+ /* nf_log.c */
+diff -uNr linux-3.2.0-go.orig//net/netfilter/nf_queue.c linux-3.2.0-go/net/netfilter/nf_queue.c
+--- linux-3.2.0-go.orig//net/netfilter/nf_queue.c	2012-01-16 18:52:40.665278317 +0100
++++ linux-3.2.0-go/net/netfilter/nf_queue.c	2012-01-16 18:54:18.763384644 +0100
+@@ -22,6 +22,26 @@
+ 
+ static DEFINE_MUTEX(queue_handler_mutex);
+ 
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++static const struct nf_queue_handler *queue_imq_handler;
++
++void nf_register_queue_imq_handler(const struct nf_queue_handler *qh)
++{
++	mutex_lock(&queue_handler_mutex);
++	rcu_assign_pointer(queue_imq_handler, qh);
++	mutex_unlock(&queue_handler_mutex);
++}
++EXPORT_SYMBOL_GPL(nf_register_queue_imq_handler);
++
++void nf_unregister_queue_imq_handler(void)
++{
++	mutex_lock(&queue_handler_mutex);
++	rcu_assign_pointer(queue_imq_handler, NULL);
++	mutex_unlock(&queue_handler_mutex);
++}
++EXPORT_SYMBOL_GPL(nf_unregister_queue_imq_handler);
++#endif
++
+ /* return EBUSY when somebody else is registered, return EEXIST if the
+  * same handler is registered, return 0 in case of success. */
+ int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
+@@ -92,7 +112,7 @@
+ }
+ EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
+ 
+-static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
++void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
+ {
+ 	/* Release those devices we held, or Alexey will kill me. */
+ 	if (entry->indev)
+@@ -112,6 +132,7 @@
+ 	/* Drop reference to owner of hook which queued us. */
+ 	module_put(entry->elem->owner);
+ }
++EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
+ 
+ /*
+  * Any packet that leaves via this function must come back
+@@ -123,7 +144,8 @@
+ 		      struct net_device *indev,
+ 		      struct net_device *outdev,
+ 		      int (*okfn)(struct sk_buff *),
+-		      unsigned int queuenum)
++		      unsigned int queuenum,
++		      unsigned int queuetype)
+ {
+ 	int status = -ENOENT;
+ 	struct nf_queue_entry *entry = NULL;
+@@ -137,7 +159,17 @@
+ 	/* QUEUE == DROP if no one is waiting, to be safe. */
+ 	rcu_read_lock();
+ 
+-	qh = rcu_dereference(queue_handler[pf]);
++	if (queuetype == NF_IMQ_QUEUE) {
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++		qh = rcu_dereference(queue_imq_handler);
++#else
++		BUG();
++		goto err_unlock;
++#endif
++	} else {
++		qh = rcu_dereference(queue_handler[pf]);
++	}
++
+ 	if (!qh) {
+ 		status = -ESRCH;
+ 		goto err_unlock;
+@@ -209,7 +241,8 @@
+ 	     struct net_device *indev,
+ 	     struct net_device *outdev,
+ 	     int (*okfn)(struct sk_buff *),
+-	     unsigned int queuenum)
++	     unsigned int queuenum,
++	     unsigned int queuetype)
+ {
+ 	struct sk_buff *segs;
+ 	int err;
+@@ -217,7 +250,7 @@
+ 
+ 	if (!skb_is_gso(skb))
+ 		return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+-				  queuenum);
++				  queuenum, queuetype);
+ 
+ 	switch (pf) {
+ 	case NFPROTO_IPV4:
+@@ -244,7 +277,7 @@
+ 		segs->next = NULL;
+ 		if (err == 0)
+ 			err = __nf_queue(segs, elem, pf, hook, indev,
+-					   outdev, okfn, queuenum);
++					 outdev, okfn, queuenum, queuetype);
+ 		if (err == 0)
+ 			queued++;
+ 		else
+@@ -299,9 +332,11 @@
+ 		local_bh_enable();
+ 		break;
+ 	case NF_QUEUE:
++	case NF_IMQ_QUEUE:
+ 		err = __nf_queue(skb, elem, entry->pf, entry->hook,
+ 				 entry->indev, entry->outdev, entry->okfn,
+-				 verdict >> NF_VERDICT_QBITS);
++				 verdict >> NF_VERDICT_QBITS,
++				 verdict & NF_VERDICT_MASK);
+ 		if (err < 0) {
+ 			if (err == -ECANCELED)
+ 				goto next_hook;
+diff -uNr linux-3.2.0-go.orig//net/netfilter/xt_IMQ.c linux-3.2.0-go/net/netfilter/xt_IMQ.c
+--- linux-3.2.0-go.orig//net/netfilter/xt_IMQ.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.0-go/net/netfilter/xt_IMQ.c	2012-01-16 18:54:18.872181922 +0100
+@@ -0,0 +1,74 @@
++/*
++ * This target marks packets to be enqueued to an imq device
++ */
++#include <linux/module.h>
++#include <linux/skbuff.h>
++#include <linux/netfilter/x_tables.h>
++#include <linux/netfilter/xt_IMQ.h>
++#include <linux/imq.h>
++
++static unsigned int imq_target(struct sk_buff *pskb,
++				const struct xt_action_param *par)
++{
++	const struct xt_imq_info *mr = par->targinfo;
++
++	pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE;
++
++	return XT_CONTINUE;
++}
++
++static int imq_checkentry(const struct xt_tgchk_param *par)
++{
++	struct xt_imq_info *mr = par->targinfo;
++
++	if (mr->todev > IMQ_MAX_DEVS - 1) {
++		printk(KERN_WARNING
++		       "IMQ: invalid device specified, highest is %u\n",
++		       IMQ_MAX_DEVS - 1);
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static struct xt_target xt_imq_reg[] __read_mostly = {
++	{
++		.name           = "IMQ",
++		.family		= AF_INET,
++		.checkentry     = imq_checkentry,
++		.target         = imq_target,
++		.targetsize	= sizeof(struct xt_imq_info),
++		.table		= "mangle",
++		.me             = THIS_MODULE
++	},
++	{
++		.name           = "IMQ",
++		.family		= AF_INET6,
++		.checkentry     = imq_checkentry,
++		.target         = imq_target,
++		.targetsize	= sizeof(struct xt_imq_info),
++		.table		= "mangle",
++		.me             = THIS_MODULE
++	},
++};
++
++static int __init imq_init(void)
++{
++	return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
++}
++
++static void __exit imq_fini(void)
++{
++	xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
++}
++
++module_init(imq_init);
++module_exit(imq_fini);
++
++MODULE_AUTHOR("http://www.linuximq.net");
++MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. "
++		   "See http://www.linuximq.net/ for more information.");
++MODULE_LICENSE("GPL");
++MODULE_ALIAS("ipt_IMQ");
++MODULE_ALIAS("ip6t_IMQ");
++
diff --git a/3.2.34/kbuild-compress-kernel-modules-on-installation.patch b/3.2.34/kbuild-compress-kernel-modules-on-installation.patch
new file mode 100644
index 0000000..cb3cc7f
--- /dev/null
+++ b/3.2.34/kbuild-compress-kernel-modules-on-installation.patch
@@ -0,0 +1,137 @@
+================================
+Signed-off-by: Steve Brokenshire <sbrokenshire@xestia.co.uk>
+[Rediffed for 2.6.31.3, defaulted to y and compress with -9 /Thomas]
+Signed-off-by: Thomas Backlund <tmb@mandriva.org>
+
+diff -Nurp linux-2.6.31/Documentation/kbuild/modules.txt linux-2.6.31.compress/Documentation/kbuild/modules.txt
+--- linux-2.6.31/Documentation/kbuild/modules.txt	2009-09-10 01:13:59.000000000 +0300
++++ linux-2.6.31.compress/Documentation/kbuild/modules.txt	2009-10-09 14:17:49.335619817 +0300
+@@ -123,6 +123,13 @@ executed to make module versioning work.
+ 		Install the external module(s). The default location is
+ 		/lib/modules/<kernel_release>/extra/, but a prefix may
+ 		be added with INSTALL_MOD_PATH (discussed in section 5).
++		If MODULES_COMPRESS is set when the modules_install target is
++		run then the module is compressed after it has been
++		copied to /lib/modules/<kernel-version>. Compressed modules
++		using the default gzip compression format will require
++		module-init-tools installed with --zlib-enabled.
++		Any options set in MODULE_COMPRESS_OPTIONS will be
++		passed to the selected compression format.
+ 
+ 	clean
+ 		Remove all generated files in the module directory only.
+diff -Nurp linux-2.6.31/init/Kconfig linux-2.6.31.compress/init/Kconfig
+--- linux-2.6.31/init/Kconfig	2009-09-10 01:13:59.000000000 +0300
++++ linux-2.6.31.compress/init/Kconfig	2009-10-09 14:19:01.812591181 +0300
+@@ -1161,6 +1161,64 @@ config MODULE_FORCE_UNLOAD
+ 	  rmmod).  This is mainly for kernel developers and desperate users.
+ 	  If unsure, say N.
+ 
++config MODULE_COMPRESS
++	bool "Compress kernel modules on installation"
++	depends on MODULES
++	default y
++	help
++	  This option compresses the kernel modules when 'make
++	  modules_install' is run.
++
++	  The modules will be compressed into the selected compression 
++	  format with gzip being the default compression format.
++
++	  When a kernel module is installed from outside of the main kernel
++	  source and uses the Kbuild system for installing modules then that
++	  kernel module will also be compressed when it is installed.
++
++	  When running mkinitrd you will find that an error message
++	  appears saying that it cannot find a certain kernel module.
++	  As a workaround, unset CONFIG_MODULE_COMPRESS, build the modules
++	  and install them, run mkinitrd and create the initrd image, place
++	  the initrd image in the correct place for booting, set
++	  CONFIG_MODULE_COMPRESS and then install the modules again.
++
++	  This option requires the module-init-tools package to be 
++	  configured with --enable-zlib (if using gzip which is the
++	  default compression format).
++
++	  If unsure, say Y.
++
++config MODULE_COMPRESS_OPTIONS
++	string "Compression format command line options"
++	depends on MODULE_COMPRESS
++	default "-9"
++	help
++	  This option specifies the command line options to be used for
++	  the selected compression format.
++
++	  Please refer to the selected compression format's documentation
++	  on which options should be used.
++
++	  If unsure, leave this option blank.
++
++choice
++	prompt "Kernel module compression format"
++	depends on MODULE_COMPRESS
++	default MODULE_COMPRESS_GZIP
++
++config MODULE_COMPRESS_GZIP
++	bool "gzip compression"
++	help
++	  Compresses the kernel modules using the gzip (GNU zip) 
++	  compression format.
++
++	  This option requires gzip to be installed.
++
++	  If unsure, leave this option selected.
++
++endchoice
++
+ config MODVERSIONS
+ 	bool "Module versioning support"
+ 	help
+diff -Nurp linux-2.6.31/scripts/Makefile.modinst linux-2.6.31.compress/scripts/Makefile.modinst
+--- linux-2.6.31/scripts/Makefile.modinst	2009-09-10 01:13:59.000000000 +0300
++++ linux-2.6.31.compress/scripts/Makefile.modinst	2009-10-09 14:17:49.337619404 +0300
+@@ -5,6 +5,7 @@
+ PHONY := __modinst
+ __modinst:
+ 
++include include/config/auto.conf
+ include scripts/Kbuild.include
+ 
+ #
+@@ -16,8 +17,21 @@ PHONY += $(modules)
+ __modinst: $(modules)
+ 	@:
+ 
+-quiet_cmd_modules_install = INSTALL $@
+-      cmd_modules_install = mkdir -p $(2); cp $@ $(2) ; $(mod_strip_cmd) $(2)/$(notdir $@)
++ifeq ($(CONFIG_MODULE_COMPRESS_OPTIONS), "")
++else
++ MODCOMPOPT = $(shell echo -n $(CONFIG_MODULE_COMPRESS_OPTIONS))
++endif
++
++quiet_cmd_modules_install = INSTALL  $@
++      cmd_modules_install = mkdir -p $(2); \
++				cp $@ $(2) ; \
++				$(mod_strip_cmd) $(2)/$(notdir $@)
++
++quiet_cmd_modules_compress_gzip = COMPRESS $@
++      cmd_modules_compress_gzip = gzip $(MODCOMPOPT) -c \
++				 $(2)/$(@F) \
++				 > $(2)/$(@F).gz; \
++				 rm $(2)/$(@F)
+ 
+ # Modules built outside the kernel source tree go into extra by default
+ INSTALL_MOD_DIR ?= extra
+@@ -26,8 +40,11 @@ ext-mod-dir = $(INSTALL_MOD_DIR)$(subst 
+ modinst_dir = $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D))
+ 
+ $(modules):
++
+ 	$(call cmd,modules_install,$(MODLIB)/$(modinst_dir))
+ 
++	$(if $(CONFIG_MODULE_COMPRESS_GZIP), \
++		$(call cmd,modules_compress_gzip,$(MODLIB)/$(modinst_dir)))
+ 
+ # Declare the contents of the .PHONY variable as phony.  We keep that
+ # information in a variable se we can use it in if_changed and friends.
diff --git a/3.2.34/kernel-3.2-lsproduo.patch b/3.2.34/kernel-3.2-lsproduo.patch
new file mode 100644
index 0000000..d1d5982
--- /dev/null
+++ b/3.2.34/kernel-3.2-lsproduo.patch
@@ -0,0 +1,569 @@
+diff -uprN linux-3.4-rc7/arch/arm/configs/orion5x_defconfig linux-3.4-rc7-wtgl/arch/arm/configs/orion5x_defconfig
+--- linux-3.4-rc7/arch/arm/configs/orion5x_defconfig	2012-05-12 19:37:47.000000000 -0600
++++ linux-3.4-rc7-wtgl/arch/arm/configs/orion5x_defconfig	2012-08-16 23:41:47.118502384 -0600
+@@ -19,6 +19,7 @@ CONFIG_MACH_TS209=y
+ CONFIG_MACH_TERASTATION_PRO2=y
+ CONFIG_MACH_LINKSTATION_PRO=y
+ CONFIG_MACH_LINKSTATION_MINI=y
++CONFIG_MACH_LINKSTATION_PRODUO=y
+ CONFIG_MACH_LINKSTATION_LS_HGL=y
+ CONFIG_MACH_TS409=y
+ CONFIG_MACH_WRT350N_V2=y
+diff -uprN linux-3.4-rc7/arch/arm/mach-orion5x/Kconfig linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/Kconfig
+--- linux-3.4-rc7/arch/arm/mach-orion5x/Kconfig	2012-05-12 19:37:47.000000000 -0600
++++ linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/Kconfig	2012-08-16 23:47:02.334496150 -0600
+@@ -65,13 +65,52 @@ config MACH_LINKSTATION_MINI
+ 	  Say 'Y' here if you want your kernel to support the
+ 	  Buffalo Linkstation Mini platform.
+ 
++config MACH_LINKSTATION_PRODUO
++       bool "Buffalo Linkstation Pro Duo"
++       select I2C_BOARDINFO
++       help
++         Say 'Y' here if you want your kernel to support the
++         Buffalo Linkstation Pro Duo platform.
++         
++         LS-W1.0TGL/R1 is the general model number. There 
++         is no /R3 models, as /R1 stands for RAID1.
++         There are two hardware revisions of the product. 
++       
++         The first revision has version 1.xx firmware, 64 MB RAM,  
++         a single USB port, a power BUTTON, an Auto/Manual 
++         power MODE SWITCH, and a RESET button.
++       
++         The second revision has version 3.xx firmware, 128 MB RAM,  
++         two USB ports, an Off/On/Auto power SWITCH, and a FUNCTION button. 
++
++       choice
++               prompt "HW model"
++               depends on MACH_LINKSTATION_PRODUO
++               default MACH_LINKSTATION_PRODUO_REV1
++               default MACH_LINKSTATION_PRODUO_REV2
++
++               config MACH_LINKSTATION_PRODUO_REV1
++                   bool "Revision 1"
++                   help
++                     The first revision has version 1.xx firmware, 64 MB RAM,  
++                     a single USB port, a power BUTTON, an Auto/Manual 
++                     power MODE SWITCH, and a RESET button.
++
++               config MACH_LINKSTATION_PRODUO_REV2
++                   bool "Revision 2"
++                   help
++                     The second revision has version 3.xx firmware, 128 MB RAM,  
++                     two USB ports, an Off/On/Auto power SWITCH, and a FUNCTION button. 
++       endchoice
++
++
+ config MACH_LINKSTATION_LS_HGL
+ 	bool "Buffalo Linkstation LS-HGL"
+ 	select I2C_BOARDINFO
+ 	help
+ 	  Say 'Y' here if you want your kernel to support the
+ 	  Buffalo Linkstation LS-HGL platform.
+-
++
+ config MACH_TS409
+ 	bool "QNAP TS-409"
+ 	help
+diff -uprN linux-3.4-rc7/arch/arm/mach-orion5x/Makefile linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/Makefile
+--- linux-3.4-rc7/arch/arm/mach-orion5x/Makefile	2012-05-12 19:37:47.000000000 -0600
++++ linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/Makefile	2012-08-16 23:48:26.182494492 -0600
+@@ -5,6 +5,7 @@ obj-$(CONFIG_MACH_KUROBOX_PRO)	+= kurobo
+ obj-$(CONFIG_MACH_TERASTATION_PRO2)	+= terastation_pro2-setup.o
+ obj-$(CONFIG_MACH_LINKSTATION_PRO) += kurobox_pro-setup.o
+ obj-$(CONFIG_MACH_LINKSTATION_MINI) += lsmini-setup.o
++obj-$(CONFIG_MACH_LINKSTATION_PRODUO) += lsproduo-setup.o
+ obj-$(CONFIG_MACH_LINKSTATION_LS_HGL) += ls_hgl-setup.o
+ obj-$(CONFIG_MACH_DNS323)	+= dns323-setup.o
+ obj-$(CONFIG_MACH_TS209)	+= ts209-setup.o tsx09-common.o
+diff -uprN linux-3.4-rc7/arch/arm/mach-orion5x/lsproduo-setup.c linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/lsproduo-setup.c
+--- linux-3.4-rc7/arch/arm/mach-orion5x/lsproduo-setup.c	1969-12-31 17:00:00.000000000 -0700
++++ linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/lsproduo-setup.c	2012-08-16 23:52:09.630490073 -0600
+@@ -0,0 +1,459 @@
++/*
++ * arch/arm/mach-orion5x/lsproduo-setup.c
++ *
++ * Source taken from arch/arm/mach-orion5x/lsmini-setup.c - kernel 2.6.30
++ * Maintainer: Matt Gomboc <gomboc0@gmail.com>
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2.  This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/platform_device.h>
++#include <linux/mtd/physmap.h>
++#include <linux/mv643xx_eth.h>
++#include <linux/leds.h>
++#include <linux/gpio_keys.h>
++#include <linux/input.h>
++#include <linux/i2c.h>
++#include <linux/ata_platform.h>
++#include <linux/gpio.h>
++#include <asm/mach-types.h>
++#include <asm/mach/arch.h>
++#include <mach/orion5x.h>
++#include "common.h"
++#include "mpp.h"
++#include <linux/module.h>
++#include <linux/proc_fs.h>
++#include <asm/uaccess.h>
++
++/*****************************************************************************
++ * Linkstation Pro Duo Info
++ ****************************************************************************/
++
++/*
++ * 256K NOR flash Device bus boot chip select
++ */
++
++#define LSPRODUO_NOR_BOOT_BASE 0xf4000000
++#define LSPRODUO_NOR_BOOT_SIZE SZ_256K
++
++/*****************************************************************************
++ * 256KB NOR Flash on BOOT Device
++ ****************************************************************************/
++
++static struct physmap_flash_data lsproduo_nor_flash_data = {
++	.width	 = 1,
++};
++
++static struct resource lsproduo_nor_flash_resource = {
++	.flags	= IORESOURCE_MEM,
++	.start	= LSPRODUO_NOR_BOOT_BASE,
++	.end	= LSPRODUO_NOR_BOOT_BASE + LSPRODUO_NOR_BOOT_SIZE - 1,
++};
++
++static struct platform_device lsproduo_nor_flash = {
++	.name		= "physmap-flash",
++	.id			= 0,
++	.dev		= {
++		 .platform_data  = &lsproduo_nor_flash_data,
++	},
++	.num_resources	   = 1,
++	.resource		 = &lsproduo_nor_flash_resource,
++};
++
++/*****************************************************************************
++ * Ethernet
++ ****************************************************************************/
++
++static struct mv643xx_eth_platform_data lsproduo_eth_data = {
++	.phy_addr	= 8,
++};
++
++/*****************************************************************************
++ * RTC 5C372a on I2C bus
++ ****************************************************************************/
++
++static struct i2c_board_info __initdata lsproduo_i2c_rtc = {
++	I2C_BOARD_INFO("rs5c372a", 0x32),
++};
++
++/*****************************************************************************
++ * LEDs attached to GPIO
++ ****************************************************************************/
++
++#define LSPRODUO_GPIO_LED_ALARM   2
++#define LSPRODUO_GPIO_LED_INFO	3
++#define LSPRODUO_GPIO_LED_PWR	0
++
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV2
++  #define LSPRODUO_GPIO_LED_FUNC   18
++#endif
++
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV1
++static struct gpio_led lsproduo_led_pins[] = {
++	{
++		 .name	= "alarm:red",
++		 .gpio	= LSPRODUO_GPIO_LED_ALARM,
++		 .active_low	= 1,
++	}, {
++		 .name	= "info:amber",
++		 .gpio	= LSPRODUO_GPIO_LED_INFO,
++		 .active_low	= 1,
++	}, {
++		 .name	= "power:greem",
++		 .gpio	= LSPRODUO_GPIO_LED_PWR,
++		 .active_low	= 1,
++	},
++};
++#endif
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV2
++static struct gpio_led lsproduo_led_pins[] = {
++	{
++		 .name	= "alarm:red",
++		 .gpio	= LSPRODUO_GPIO_LED_ALARM,
++		 .active_low	= 1,
++	}, {
++		 .name	= "info:amber",
++		 .gpio	= LSPRODUO_GPIO_LED_INFO,
++		 .active_low	= 1,
++	}, {
++		 .name	= "power:green",
++		 .gpio	= LSPRODUO_GPIO_LED_PWR,
++		 .active_low	= 1,
++	},{
++		 .name	= "func:blue",
++		 .gpio	= LSPRODUO_GPIO_LED_FUNC,
++		 .active_low	= 1,
++	},
++};
++#endif
++
++
++
++static struct gpio_led_platform_data lsproduo_led_data = {
++	.leds	= lsproduo_led_pins,
++	.num_leds	= ARRAY_SIZE(lsproduo_led_pins),
++};
++
++static struct platform_device lsproduo_leds = {
++	.name   = "leds-gpio",
++	.id	= -1,
++	.dev	= {
++		 .platform_data  = &lsproduo_led_data,
++	},
++};
++
++/****************************************************************************
++ * GPIO Attached Keys
++ ****************************************************************************/
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV1
++ #define LSPRODUO_GPIO_KEY_POWER	  8
++ #define LSPRODUO_GPIO_KEY_AUTOPOWER	10
++
++ #define LSPRODUO_SW_POWER		0x00
++ #define LSPRODUO_SW_AUTOPOWER	   0x01
++
++static struct gpio_keys_button lsproduo_buttons[] = {
++	{
++		 .type		= EV_SW,
++		 .code	= LSPRODUO_SW_POWER,
++		 .gpio	= LSPRODUO_GPIO_KEY_POWER,
++		 .desc	= "Power-on Switch",
++		 .active_low	= 1,
++	}, {
++		 .type		= EV_SW,
++		 .code	= LSPRODUO_SW_AUTOPOWER,
++		 .gpio	= LSPRODUO_GPIO_KEY_AUTOPOWER,
++		 .desc	= "Power-auto Switch",
++		 .active_low	= 1,
++	},
++};
++
++#endif
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV2
++ #define LSPRODUO_GPIO_KEY_POWER		10
++ #define LSPRODUO_GPIO_KEY_AUTOPOWER	22
++ #define LSPRODUO_GPIO_KEY_FUNC			8
++
++ #define LSPRODUO_SW_POWER		0x00
++ #define LSPRODUO_SW_AUTOPOWER	   0x01
++
++static struct gpio_keys_button lsproduo_buttons[] = {
++	{
++		 .code	= KEY_OPTION,
++		 .gpio	= LSPRODUO_GPIO_KEY_FUNC,
++		 .desc	= "Function Button",
++		 .active_low	= 1,
++	},{
++		 .type		= EV_SW,
++		 .code	= LSPRODUO_SW_POWER,
++		 .gpio	= LSPRODUO_GPIO_KEY_POWER,
++		 .desc	= "Power-on Switch",
++		 .active_low	= 1,
++	}, {
++		 .type		= EV_SW,
++		 .code	= LSPRODUO_SW_AUTOPOWER,
++		 .gpio	= LSPRODUO_GPIO_KEY_AUTOPOWER,
++		 .desc	= "Power-auto Switch",
++		 .active_low	= 1,
++	},
++};
++
++#endif
++
++static struct gpio_keys_platform_data lsproduo_button_data = {
++	.buttons	 = lsproduo_buttons,
++	.nbuttons	= ARRAY_SIZE(lsproduo_buttons),
++};
++
++static struct platform_device lsproduo_button_device = {
++	.name	= "gpio-keys",
++	.id	   = -1,
++	.num_resources  = 0,
++	.dev	 = {
++		 .platform_data  = &lsproduo_button_data,
++	},
++};
++
++/****************************************************************************
++ * GPIO Attached Fan
++ ****************************************************************************/
++
++/* Define max char len */
++#define MAX_LEN 8
++
++#define LSPRODUO_GPIO_FAN_LOW	   17
++#define LSPRODUO_GPIO_FAN_HIGH	  14
++
++static struct proc_dir_entry *lsproduo_proc_dir_root, *lsproduo_proc_dir_gpio, *lsproduo_fan_proc_file;
++static char lsproduo_fan_state[MAX_LEN];
++
++static int lsproduo_fan_get(char *buf, char **start, off_t offset, int count, int *eof, void *data)
++{
++	int len;
++
++	len = snprintf(buf, count, "state: %s\n", lsproduo_fan_state);
++	return len;
++}
++
++static int lsproduo_fan_set( struct file *file, const char *buffer, unsigned long count, void *data )
++{
++	int len, ret;
++	char *ptr, tState[MAX_LEN];
++
++	if (count > MAX_LEN )
++		len = MAX_LEN;
++	else
++		len = count;
++
++	ret = copy_from_user(tState, buffer, len);
++	if(ret < 0)
++	{
++		printk(KERN_ERR "%s: Setting fan speed failed\n", "lsproduo");
++		return -EFAULT;
++	}
++
++	ptr = strrchr(tState, '\n');
++	if(ptr) *ptr = '\0';
++
++	if (strcasecmp(tState, "off") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan off\n", "lsproduo");
++		sprintf(lsproduo_fan_state, "off");
++		gpio_set_value(LSPRODUO_GPIO_FAN_LOW, 1);
++		gpio_set_value(LSPRODUO_GPIO_FAN_HIGH, 1);
++	} else if (strcasecmp(tState, "slow") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan slow\n", "lsproduo");
++		sprintf(lsproduo_fan_state, "slow");
++		gpio_set_value(LSPRODUO_GPIO_FAN_LOW, 1);
++		gpio_set_value(LSPRODUO_GPIO_FAN_HIGH, 0);
++	} else if (strcasecmp(tState, "fast") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan fast\n", "lsproduo");
++		sprintf(lsproduo_fan_state, "fast");
++		gpio_set_value(LSPRODUO_GPIO_FAN_LOW, 0);
++		gpio_set_value(LSPRODUO_GPIO_FAN_HIGH, 1);
++	} else if (strcasecmp(tState, "full") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan full\n", "lsproduo");
++		sprintf(lsproduo_fan_state, "full");
++		gpio_set_value(LSPRODUO_GPIO_FAN_LOW, 0);
++		gpio_set_value(LSPRODUO_GPIO_FAN_HIGH, 0);
++	} else
++	{
++		printk(KERN_ERR "%s: unknown fan speed given\n", "lsproduo");
++	}
++
++	lsproduo_fan_state[len] = '\0';
++
++	return len;
++}
++
++/*****************************************************************************
++ * SATA
++ ****************************************************************************/
++static struct mv_sata_platform_data lsproduo_sata_data = {
++	.n_ports	 = 2,
++};
++
++
++/*****************************************************************************
++ * Linkstation Pro Duo specific power off method: reboot
++ ****************************************************************************/
++/*
++ * On the Linkstation Pro Duo, the shutdown process is following:
++ * - Userland monitors key events until the power switch goes to off position
++ * - The board reboots
++ * - U-boot starts and goes into an idle mode waiting for the user
++ *   to move the switch to ON position
++ */
++
++static void lsproduo_power_off(void)
++{
++	/* orion5x_restart('h', NULL); */
++	arm_machine_restart(0, NULL);
++}
++
++
++/*****************************************************************************
++ * General Setup
++ ****************************************************************************/
++#define LSPRODUO_GPIO_HDD_POWER0	1
++#define LSPRODUO_GPIO_USB_POWER		9
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV1
++ #define LSPRODUO_GPIO_POWER		8
++ #define LSPRODUO_GPIO_AUTO_POWER	10
++#endif
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV2
++ #define LSPRODUO_GPIO_POWER		10
++ #define LSPRODUO_GPIO_USB_POWER2	19
++ #define LSPRODUO_GPIO_AUTO_POWER	22
++#endif
++
++static unsigned int lsproduo_mpp_modes[] __initdata = {
++	 MPP0_GPIO,	/* LED_PWR */
++	 MPP1_GPIO,	/* HDD_PWR */
++	 MPP2_GPIO,	/* LED_ALARM */
++	 MPP3_GPIO,	/* LED_INFO */
++	 MPP4_UNUSED,
++	 MPP5_UNUSED,
++	 MPP6_GPIO,	/* FAN_LCK */
++	 MPP9_GPIO,	/* USB_PWR */
++	 MPP11_UNUSED,	/* LED_ETH dummy */
++	 MPP12_UNUSED,
++	 MPP13_UNUSED,
++	 MPP14_GPIO,	/* FAN_HIGH */
++	 MPP15_UNUSED,
++	 MPP16_UNUSED,
++	 MPP17_GPIO,	/* FAN_LOW */
++
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV1
++		MPP7_GPIO,	/* INIT */
++		MPP8_GPIO,	/* POWER */
++		MPP10_GPIO,	/* AUTO_POWER */
++		MPP18_UNUSED,
++		MPP19_UNUSED,
++#endif
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV2
++		MPP7_UNUSED,
++		MPP8_GPIO,	/* FUNC */
++		MPP10_GPIO, 	/* POWER */
++		MPP18_GPIO,	/* LED_FUNC*/
++	 MPP19_GPIO,	/* USB_PWR2 */
++	 MPP22_GPIO,	/* AUTO_POWER */
++#endif
++	0,
++};
++
++static void __init lsproduo_init(void)
++{
++	/*
++	 * Setup basic Orion functions. Need to be called early.
++	 */
++	orion5x_init();
++
++	orion5x_mpp_conf(lsproduo_mpp_modes);
++
++	/*
++	 * Configure peripherals.
++	 */
++	orion5x_ehci0_init();
++	orion5x_ehci1_init();
++	orion5x_eth_init(&lsproduo_eth_data);
++	orion5x_i2c_init();
++	orion5x_sata_init(&lsproduo_sata_data);
++	orion5x_uart0_init();
++	orion5x_xor_init();
++
++	orion5x_setup_dev_boot_win(LSPRODUO_NOR_BOOT_BASE,
++					LSPRODUO_NOR_BOOT_SIZE);
++	platform_device_register(&lsproduo_nor_flash);
++
++	platform_device_register(&lsproduo_button_device);
++
++	platform_device_register(&lsproduo_leds);
++
++	i2c_register_board_info(0, &lsproduo_i2c_rtc, 1);
++
++	/* enable USB power */
++	gpio_set_value(LSPRODUO_GPIO_USB_POWER, 1);
++
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV2
++	gpio_set_value(LSPRODUO_GPIO_USB_POWER2, 1);
++#endif
++
++	printk(KERN_INFO "Buffalo Linkstation Pro Duo fan driver loaded\n");
++	sprintf(lsproduo_fan_state, "fast");
++	gpio_set_value(LSPRODUO_GPIO_FAN_LOW, 1);
++	gpio_set_value(LSPRODUO_GPIO_FAN_HIGH, 0);
++
++	lsproduo_proc_dir_root = proc_mkdir( "linkstation", NULL );
++	lsproduo_proc_dir_gpio = proc_mkdir( "gpio", lsproduo_proc_dir_root );
++	lsproduo_fan_proc_file = create_proc_entry( "fan", S_IRUGO, lsproduo_proc_dir_gpio );
++	if( lsproduo_fan_proc_file ) {
++		lsproduo_fan_proc_file->read_proc  = lsproduo_fan_get;
++		lsproduo_fan_proc_file->write_proc = lsproduo_fan_set;
++		lsproduo_fan_proc_file->data = NULL;
++	} else
++	{
++		printk(KERN_INFO "Registration of fan device failed\n");
++	}
++
++	/* register power-off method */
++	pm_power_off = lsproduo_power_off;
++
++	pr_info("%s: finished\n", __func__);
++}
++
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV1
++MACHINE_START(LINKSTATION_PRODUO, "Buffalo Linkstation Pro Duo - Revision 1")
++	.atag_offset	= 0x00000100,
++	.init_machine   = lsproduo_init,
++	.map_io	  = orion5x_map_io,
++	.init_early	= orion5x_init_early,
++	.init_irq	= orion5x_init_irq,
++	.timer	   = &orion5x_timer,
++	.fixup	   = tag_fixup_mem32,
++	/* .restart	 = orion5x_restart, */
++MACHINE_END
++#endif
++
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV2
++MACHINE_START(LINKSTATION_PRODUO, "Buffalo Linkstation Pro Duo - Revision 2")
++	.atag_offset	= 0x00000100,
++	.init_machine   = lsproduo_init,
++	.map_io	  = orion5x_map_io,
++	.init_early	= orion5x_init_early,
++	.init_irq	= orion5x_init_irq,
++	.timer	   = &orion5x_timer,
++	.fixup	   = tag_fixup_mem32,
++	/* .restart	 = orion5x_restart, */
++MACHINE_END
++#endif
++
++
++
+diff -uprN linux-3.4-rc7/arch/arm/mach-orion5x/mpp.h linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/mpp.h
+--- linux-3.4-rc7/arch/arm/mach-orion5x/mpp.h	2012-05-12 19:37:47.000000000 -0600
++++ linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/mpp.h	2012-08-16 22:15:34.000000000 -0600
+@@ -122,7 +122,10 @@
+ #define MPP19_GIGE              MPP(19, 0x1, 0, 0, 1,   1,   1)
+ #define MPP19_UART              MPP(19, 0x0, 0, 0, 0,   1,   1)
+ 
+-#define MPP_MAX			19
++#define MPP22_GPIO		MPP(22, 0x5, 1, 1, 0,   1,   0)
++
++
++#define MPP_MAX			22
+ 
+ void orion5x_mpp_conf(unsigned int *mpp_list);
+ 
+diff -uprN linux-3.4-rc7/arch/arm/tools/mach-types linux-3.4-rc7-wtgl/arch/arm/tools/mach-types
+--- linux-3.4-rc7/arch/arm/tools/mach-types	2012-05-12 19:37:47.000000000 -0600
++++ linux-3.4-rc7-wtgl/arch/arm/tools/mach-types	2012-08-16 23:43:59.830499760 -0600
+@@ -333,6 +333,8 @@ smdkc100		MACH_SMDKC100		SMDKC100		1826
+ tavorevb		MACH_TAVOREVB		TAVOREVB		1827
+ saar			MACH_SAAR		SAAR			1828
+ at91sam9m10g45ek	MACH_AT91SAM9M10G45EK	AT91SAM9M10G45EK	1830
++linkstation_produo	MACH_LINKSTATION_PRODUO	LINKSTATION_PRODUO	1831
++##see header for btaining a new version, preferred to patching
+ usb_a9g20		MACH_USB_A9G20		USB_A9G20		1841
+ mxlads			MACH_MXLADS		MXLADS			1851
+ linkstation_mini	MACH_LINKSTATION_MINI	LINKSTATION_MINI	1858
diff --git a/3.2.34/kernel-3.2-lsql.patch b/3.2.34/kernel-3.2-lsql.patch
new file mode 100644
index 0000000..89c1f91
--- /dev/null
+++ b/3.2.34/kernel-3.2-lsql.patch
@@ -0,0 +1,439 @@
+diff -uNr linux-3.2.33-go.orig/arch/arm/configs/orion5x_defconfig linux-3.2.33-go/arch/arm/configs/orion5x_defconfig
+--- linux-3.2.33-go.orig/arch/arm/configs/orion5x_defconfig	2012-11-11 15:13:23.313493927 +0100
++++ linux-3.2.33-go/arch/arm/configs/orion5x_defconfig	2012-11-11 15:14:01.321037277 +0100
+@@ -21,6 +21,7 @@
+ CONFIG_MACH_LINKSTATION_MINI=y
+ CONFIG_MACH_LINKSTATION_PRODUO=y
+ CONFIG_MACH_LINKSTATION_LS_HGL=y
++CONFIG_MACH_LINKSTATION_LSQL=y
+ CONFIG_MACH_TS409=y
+ CONFIG_MACH_WRT350N_V2=y
+ CONFIG_MACH_TS78XX=y
+diff -uNr linux-3.2.33-go.orig/arch/arm/mach-orion5x/Kconfig linux-3.2.33-go/arch/arm/mach-orion5x/Kconfig
+--- linux-3.2.33-go.orig/arch/arm/mach-orion5x/Kconfig	2012-11-11 15:13:23.518491566 +0100
++++ linux-3.2.33-go/arch/arm/mach-orion5x/Kconfig	2012-11-11 15:14:01.321037277 +0100
+@@ -111,6 +111,13 @@
+ 	  Say 'Y' here if you want your kernel to support the
+ 	  Buffalo Linkstation LS-HGL platform.
+ 
++config MACH_LINKSTATION_LSQL
++	bool "Buffalo Linkstation LS-QL"
++	select I2C_BOARDINFO
++	help
++	  Say 'Y' here if you want your kernel to support the
++	  Buffalo Linkstation LS-QL platform.
++
+ config MACH_TS409
+ 	bool "QNAP TS-409"
+ 	help
+diff -uNr linux-3.2.33-go.orig/arch/arm/mach-orion5x/lsql-setup.c linux-3.2.33-go/arch/arm/mach-orion5x/lsql-setup.c
+--- linux-3.2.33-go.orig/arch/arm/mach-orion5x/lsql-setup.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/arch/arm/mach-orion5x/lsql-setup.c	2012-11-11 15:14:01.323037254 +0100
+@@ -0,0 +1,388 @@
++/*
++ * arch/arm/mach-orion5x/lsql-setup.c
++ *
++ * Source based off  arch/arm/mach-orion5x/lsproduo-setup.c, which was from lsmini-setup.c
++ * Maintainer: Matt Gomboc <gomboc0@gmail.com>
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2.  This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/platform_device.h>
++#include <linux/mtd/physmap.h>
++#include <linux/mv643xx_eth.h>
++#include <linux/leds.h>
++#include <linux/gpio_keys.h>
++#include <linux/input.h>
++#include <linux/i2c.h>
++#include <linux/ata_platform.h>
++#include <linux/gpio.h>
++#include <asm/mach-types.h>
++#include <asm/mach/arch.h>
++#include <mach/orion5x.h>
++#include "common.h"
++#include "mpp.h"
++#include <linux/module.h>
++#include <linux/proc_fs.h>
++#include <asm/uaccess.h>
++
++/*****************************************************************************
++ * Linkstation Quad LS-QL/R5 Info
++ ****************************************************************************/
++
++/*
++ * 256K NOR flash Device bus boot chip select
++ */
++
++#define LSQL_NOR_BOOT_BASE 0xf4000000
++#define LSQL_NOR_BOOT_SIZE SZ_256K
++
++/*****************************************************************************
++ * 256KB NOR Flash on BOOT Device
++ ****************************************************************************/
++
++static struct physmap_flash_data lsql_nor_flash_data = {
++	.width	 = 1,
++};
++
++static struct resource lsql_nor_flash_resource = {
++	.flags	= IORESOURCE_MEM,
++	.start	= LSQL_NOR_BOOT_BASE,
++	.end	= LSQL_NOR_BOOT_BASE + LSQL_NOR_BOOT_SIZE - 1,
++};
++
++static struct platform_device lsql_nor_flash = {
++	.name		= "physmap-flash",
++	.id			= 0,
++	.dev		= {
++		 .platform_data  = &lsql_nor_flash_data,
++	},
++	.num_resources	   = 1,
++	.resource		 = &lsql_nor_flash_resource,
++};
++
++/*****************************************************************************
++ * Ethernet
++ ****************************************************************************/
++
++static struct mv643xx_eth_platform_data lsql_eth_data = {
++	.phy_addr	= 8,
++};
++
++/*****************************************************************************
++ * RTC 5C372a on I2C bus
++ ****************************************************************************/
++
++static struct i2c_board_info __initdata lsql_i2c_rtc = {
++	I2C_BOARD_INFO("rs5c372a", 0x32),
++};
++
++/*****************************************************************************
++ * LEDs attached to GPIO
++ ****************************************************************************/
++
++#define LSQL_GPIO_LED_ALARM	2  /* looks like it should be 2 by the uboot sources, but doesnt successfully trigger the3 top LED*/
++#define LSQL_GPIO_LED_INFO	3
++#define LSQL_GPIO_LED_PWR	0
++#define LSQL_GPIO_LED_FUNC	18
++
++
++static struct gpio_led lsql_led_pins[] = {
++	{
++		 .name	= "alarm:red",
++		 .gpio	= LSQL_GPIO_LED_ALARM,
++		 .active_low	= 1,
++	}, {
++		 .name	= "info:amber",
++		 .gpio	= LSQL_GPIO_LED_INFO,
++		 .active_low	= 1,
++	}, {
++		 .name	= "power:blue",
++		 .gpio	= LSQL_GPIO_LED_PWR,
++		 .active_low	= 1,
++	},{
++		 .name	= "func:blue",
++		 .gpio	= LSQL_GPIO_LED_FUNC,
++		 .active_low	= 1,
++	},
++};
++
++
++
++static struct gpio_led_platform_data lsql_led_data = {
++	.leds	= lsql_led_pins,
++	.num_leds	= ARRAY_SIZE(lsql_led_pins),
++};
++
++
++static struct platform_device lsql_leds = {
++	.name   = "leds-gpio",
++	.id	= -1,
++	.dev	= {
++		 .platform_data  = &lsql_led_data,
++	},
++};
++
++
++/****************************************************************************
++ * GPIO Attached Keys
++ ****************************************************************************/
++
++ #define LSQL_GPIO_KEY_POWER		10
++ #define LSQL_GPIO_KEY_AUTOPOWER	22
++ #define LSQL_GPIO_KEY_FUNC			7
++
++ #define LSQL_SW_POWER		0x00
++ #define LSQL_SW_AUTOPOWER	   0x01
++
++static struct gpio_keys_button lsql_buttons[] = {
++	{
++		 .code	= KEY_OPTION,
++		 .gpio	= LSQL_GPIO_KEY_FUNC,
++		 .desc	= "Function Button",
++		 .active_low	= 1,
++	},{
++		 .type		= EV_SW,
++		 .code	= LSQL_SW_POWER,
++		 .gpio	= LSQL_GPIO_KEY_POWER,
++		 .desc	= "Power-on Switch",
++		 .active_low	= 1,
++	}, {
++		 .type		= EV_SW,
++		 .code	= LSQL_SW_AUTOPOWER,
++		 .gpio	= LSQL_GPIO_KEY_AUTOPOWER,
++		 .desc	= "Power-auto Switch",
++		 .active_low	= 1,
++	},
++};
++
++
++static struct gpio_keys_platform_data lsql_button_data = {
++	.buttons	 = lsql_buttons,
++	.nbuttons	= ARRAY_SIZE(lsql_buttons),
++};
++
++static struct platform_device lsql_button_device = {
++	.name	= "gpio-keys",
++	.id	   = -1,
++	.num_resources  = 0,
++	.dev	 = {
++		 .platform_data  = &lsql_button_data,
++	},
++};
++
++/****************************************************************************
++ * GPIO Attached Fan
++ ****************************************************************************/
++
++/* Define max char len */
++
++#define MAX_LEN 8
++
++#define LSQL_GPIO_FAN_LOW	   17
++#define LSQL_GPIO_FAN_HIGH	  14
++
++static struct proc_dir_entry *lsql_proc_dir_root, *lsql_proc_dir_gpio, *lsql_fan_proc_file;
++static char lsql_fan_state[MAX_LEN];
++
++static int lsql_fan_get(char *buf, char **start, off_t offset, int count, int *eof, void *data)
++{
++	int len;
++
++	len = snprintf(buf, count, "state: %s\n", lsql_fan_state);
++	return len;
++}
++
++static int lsql_fan_set( struct file *file, const char *buffer, unsigned long count, void *data )
++{
++	int len, ret;
++	char *ptr, tState[MAX_LEN];
++
++	if (count > MAX_LEN )
++		len = MAX_LEN;
++	else
++		len = count;
++
++	ret = copy_from_user(tState, buffer, len);
++	if(ret < 0)
++	{
++		printk(KERN_ERR "%s: Setting fan speed failed\n", "lsql");
++		return -EFAULT;
++	}
++
++	ptr = strrchr(tState, '\n');
++	if(ptr) *ptr = '\0';
++
++	if (strcasecmp(tState, "off") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan off\n", "lsql");
++		sprintf(lsql_fan_state, "off");
++		gpio_set_value(LSQL_GPIO_FAN_LOW, 1);
++		gpio_set_value(LSQL_GPIO_FAN_HIGH, 1);
++	} else if (strcasecmp(tState, "slow") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan slow\n", "lsql");
++		sprintf(lsql_fan_state, "slow");
++		gpio_set_value(LSQL_GPIO_FAN_LOW, 1);
++		gpio_set_value(LSQL_GPIO_FAN_HIGH, 0);
++	} else if (strcasecmp(tState, "fast") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan fast\n", "lsql");
++		sprintf(lsql_fan_state, "fast");
++		gpio_set_value(LSQL_GPIO_FAN_LOW, 0);
++		gpio_set_value(LSQL_GPIO_FAN_HIGH, 1);
++	} else if (strcasecmp(tState, "full") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan full\n", "lsql");
++		sprintf(lsql_fan_state, "full");
++		gpio_set_value(LSQL_GPIO_FAN_LOW, 0);
++		gpio_set_value(LSQL_GPIO_FAN_HIGH, 0);
++	} else
++	{
++		printk(KERN_ERR "%s: unknown fan speed given\n", "lsql");
++	}
++
++	lsql_fan_state[len] = '\0';
++
++	return len;
++}
++
++/*****************************************************************************
++ * SATA
++ ****************************************************************************/
++static struct mv_sata_platform_data lsql_sata_data = {
++	.n_ports	 = 2,  /*maybe this should be 4, but works with 2 */
++};
++
++
++/*****************************************************************************
++ * Linkstation Quad specific power off method: reboot
++ ****************************************************************************/
++/*
++ * On  Linkstations in general, the shutdown process is following:
++ * - Userland monitors key events until the power switch goes to off position
++ * - The board reboots
++ * - U-boot starts and goes into an idle mode waiting for the user
++ *   to move the switch to ON position
++ *
++ * on the Quad however, there is a power button on the upper, front, 
++ * a function button on the lower front, ans a Auto/Manual power button on the back.
++ * After halting system, uboot waits the power button on the front panel to be pushed
++ *
++ *
++ */
++
++static void lsql_power_off(void)
++{
++	arm_machine_restart(0, NULL); /* orion5x_restart('h', NULL); */
++}
++
++
++/*****************************************************************************
++ * General Setup
++ ****************************************************************************/
++#define LSQL_GPIO_USB_POWER		9
++#define LSQL_GPIO_POWER			10
++#define LSQL_GPIO_USB_POWER2	19
++#define LSQL_GPIO_AUTO_POWER	22
++
++static unsigned int lsql_mpp_modes[] __initdata = {
++	 MPP0_GPIO,		/* LED_PWR */
++	 MPP1_GPIO,		/* for debugging purposes, change to MPP1_UNUSED for final */
++	 MPP2_GPIO,		/* LED_ALARM */  /* looks like it should be 2 by the uboot sources, but doesnt successfully trigger the3 top LED*/
++	 MPP3_GPIO,		/* LED_INFO */
++	 MPP4_GPIO,	
++	 MPP5_GPIO,	
++	 MPP6_GPIO,		/* FAN_LCK */
++	 MPP7_GPIO,		/* FUNC */
++	 MPP8_GPIO,	
++	 MPP9_GPIO,		/* USB_PWR */
++	 MPP10_GPIO, 	/* POWER */
++	 MPP11_GPIO,	
++	 MPP12_GPIO,	
++	 MPP13_GPIO,	
++	 MPP14_GPIO,	/* FAN_HIGH */
++	 MPP15_GPIO,	
++	 MPP16_GPIO,	
++	 MPP17_GPIO,	/* FAN_LOW */
++	 MPP18_GPIO,	/* LED_FUNC*/
++	 MPP19_GPIO,	/* USB_PWR2 */
++	 MPP22_GPIO,	/* AUTO_POWER*/
++	0,
++};
++
++static void __init lsql_init(void)
++{
++	/*
++	 * Setup basic Orion functions. Need to be called early.
++	 */
++	orion5x_init();
++
++	orion5x_mpp_conf(lsql_mpp_modes);
++
++	/*
++	 * Configure peripherals.
++	 */
++	orion5x_ehci0_init();
++	orion5x_ehci1_init();
++	orion5x_eth_init(&lsql_eth_data);
++	orion5x_i2c_init();
++	orion5x_sata_init(&lsql_sata_data);
++	orion5x_uart0_init();
++	orion5x_xor_init();
++
++	orion5x_setup_dev_boot_win(LSQL_NOR_BOOT_BASE,
++					LSQL_NOR_BOOT_SIZE);
++	platform_device_register(&lsql_nor_flash);
++
++	platform_device_register(&lsql_button_device);
++
++	platform_device_register(&lsql_leds);
++
++	i2c_register_board_info(0, &lsql_i2c_rtc, 1);
++
++	/* enable USB power */  
++	gpio_set_value(LSQL_GPIO_USB_POWER, 1);
++	gpio_set_value(LSQL_GPIO_USB_POWER2, 1);
++
++
++	printk(KERN_INFO "Buffalo Linkstation fan driver loaded\n");
++	sprintf(lsql_fan_state, "fast");
++	gpio_set_value(LSQL_GPIO_FAN_LOW, 0);
++	gpio_set_value(LSQL_GPIO_FAN_HIGH, 1);
++
++	lsql_proc_dir_root = proc_mkdir( "linkstation", NULL );
++	lsql_proc_dir_gpio = proc_mkdir( "gpio", lsql_proc_dir_root );
++	lsql_fan_proc_file = create_proc_entry( "fan", S_IRUGO, lsql_proc_dir_gpio );
++	if( lsql_fan_proc_file ) {
++		lsql_fan_proc_file->read_proc  = lsql_fan_get;
++		lsql_fan_proc_file->write_proc = lsql_fan_set;
++		lsql_fan_proc_file->data = NULL;
++	} else
++	{
++		printk(KERN_INFO "Registration of fan device failed\n");
++	}
++
++	/* register power-off method */
++	pm_power_off = lsql_power_off;
++
++	pr_info("%s: finished\n", __func__);
++}
++
++#ifdef CONFIG_MACH_LINKSTATION_LSQL
++MACHINE_START(LINKSTATION_LSQL, "Buffalo Linkstation Quad QL/R5")
++	.atag_offset	= 0x00000100,
++	.init_machine   = lsql_init,
++	.map_io	  = orion5x_map_io,
++	.init_early	= orion5x_init_early,
++	.init_irq	= orion5x_init_irq,
++	.timer	   = &orion5x_timer,
++	.fixup	   = tag_fixup_mem32,
++	/* .restart	 = orion5x_restart, */
++MACHINE_END
++#endif
++
++
+diff -uNr linux-3.2.33-go.orig/arch/arm/mach-orion5x/Makefile linux-3.2.33-go/arch/arm/mach-orion5x/Makefile
+--- linux-3.2.33-go.orig/arch/arm/mach-orion5x/Makefile	2012-11-11 15:13:23.517491578 +0100
++++ linux-3.2.33-go/arch/arm/mach-orion5x/Makefile	2012-11-11 15:14:01.323037254 +0100
+@@ -7,6 +7,7 @@
+ obj-$(CONFIG_MACH_LINKSTATION_MINI) += lsmini-setup.o
+ obj-$(CONFIG_MACH_LINKSTATION_PRODUO) += lsproduo-setup.o
+ obj-$(CONFIG_MACH_LINKSTATION_LS_HGL) += ls_hgl-setup.o
++obj-$(CONFIG_MACH_LINKSTATION_LSQL) += lsql-setup.o
+ obj-$(CONFIG_MACH_DNS323)	+= dns323-setup.o
+ obj-$(CONFIG_MACH_TS209)	+= ts209-setup.o tsx09-common.o
+ obj-$(CONFIG_MACH_TS409)	+= ts409-setup.o tsx09-common.o
+diff -uNr linux-3.2.33-go.orig/arch/arm/tools/mach-types linux-3.2.33-go/arch/arm/tools/mach-types
+--- linux-3.2.33-go.orig/arch/arm/tools/mach-types	2012-11-11 15:13:23.340493615 +0100
++++ linux-3.2.33-go/arch/arm/tools/mach-types	2012-11-11 15:14:26.618733715 +0100
+@@ -1129,3 +1129,4 @@
+ m28evk			MACH_M28EVK		M28EVK			3613
+ smdk4212		MACH_SMDK4212		SMDK4212		3638
+ smdk4412		MACH_SMDK4412		SMDK4412		3765
++linkstation_lsql	MACH_LINKSTATION_LSQL	LINKSTATION_LSQL	4238
diff --git a/3.2.34/kernel-3.2-lsxhl.patch b/3.2.34/kernel-3.2-lsxhl.patch
new file mode 100644
index 0000000..60034e8
--- /dev/null
+++ b/3.2.34/kernel-3.2-lsxhl.patch
@@ -0,0 +1,387 @@
+Add support for the Buffalo Linkstation XHL. This NAS box is based on a
+Marvell Kirkwood chip at 1.2 GHz and features 256 MB RAM, 512kb SPI boot
+flash, gigabit ethernet and one SATA port.
+
+Signed-off-by: Michael Walle <michael at walle.cc>
+---
+ arch/arm/configs/kirkwood_defconfig  |    1 +
+ arch/arm/mach-kirkwood/Kconfig       |    6 +
+ arch/arm/mach-kirkwood/Makefile      |    1 +
+ arch/arm/mach-kirkwood/lsxhl-setup.c |  313 ++++++++++++++++++++++++++++++++++
+ arch/arm/tools/mach-types            |    1 +
+ 5 files changed, 322 insertions(+), 0 deletions(-)
+ create mode 100644 arch/arm/mach-kirkwood/lsxhl-setup.c
+
+diff --git a/arch/arm/configs/kirkwood_defconfig b/arch/arm/configs/kirkwood_defconfig
+index aeb3af5..9f77811 100644
+--- a/arch/arm/configs/kirkwood_defconfig
++++ b/arch/arm/configs/kirkwood_defconfig
+@@ -28,6 +28,7 @@ CONFIG_MACH_D2NET_V2=y
+ CONFIG_MACH_NET2BIG_V2=y
+ CONFIG_MACH_NET5BIG_V2=y
+ CONFIG_MACH_T5325=y
++CONFIG_MACH_LSXHL=y
+ # CONFIG_CPU_FEROCEON_OLD_ID is not set
+ CONFIG_NO_HZ=y
+ CONFIG_HIGH_RES_TIMERS=y
+diff --git a/arch/arm/mach-kirkwood/Kconfig b/arch/arm/mach-kirkwood/Kconfig
+index 7fc603b..307cc99 100644
+--- a/arch/arm/mach-kirkwood/Kconfig
++++ b/arch/arm/mach-kirkwood/Kconfig
+@@ -130,6 +130,12 @@ config MACH_T5325
+ 	  Say 'Y' here if you want your kernel to support the
+ 	  HP t5325 Thin Client.
+ 
++config MACH_LSXHL
++	bool "Buffalo LS-XHL Series"
++	help
++	  Say 'Y' here if you want your kernel to support the
++	  Buffalo LS-XHL Series.
++
+ endmenu
+ 
+ endif
+diff --git a/arch/arm/mach-kirkwood/Makefile b/arch/arm/mach-kirkwood/Makefile
+index 5dcaa81..221980b 100644
+--- a/arch/arm/mach-kirkwood/Makefile
++++ b/arch/arm/mach-kirkwood/Makefile
+@@ -18,5 +18,6 @@ obj-$(CONFIG_MACH_D2NET_V2)		+= d2net_v2-setup.o lacie_v2-common.o
+ obj-$(CONFIG_MACH_NET2BIG_V2)		+= netxbig_v2-setup.o lacie_v2-common.o
+ obj-$(CONFIG_MACH_NET5BIG_V2)		+= netxbig_v2-setup.o lacie_v2-common.o
+ obj-$(CONFIG_MACH_T5325)		+= t5325-setup.o
++obj-$(CONFIG_MACH_LSXHL)		+= lsxhl-setup.o
+ 
+ obj-$(CONFIG_CPU_IDLE)			+= cpuidle.o
+diff --git a/arch/arm/mach-kirkwood/lsxhl-setup.c b/arch/arm/mach-kirkwood/lsxhl-setup.c
+new file mode 100644
+index 0000000..783d257
+--- /dev/null
++++ b/arch/arm/mach-kirkwood/lsxhl-setup.c
+@@ -0,0 +1,313 @@
++/*
++ * arch/arm/mach-kirkwood/lsxhl-setup.c
++ *
++ * Buffalo LS-XHL Series Setup
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2.  This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/platform_device.h>
++#include <linux/mtd/physmap.h>
++#include <linux/ata_platform.h>
++#include <linux/spi/flash.h>
++#include <linux/spi/spi.h>
++#include <linux/mv643xx_eth.h>
++#include <linux/gpio.h>
++#include <linux/gpio_keys.h>
++#include <linux/gpio-fan.h>
++#include <linux/input.h>
++#include <linux/leds.h>
++#include <asm/mach-types.h>
++#include <asm/mach/arch.h>
++#include <mach/kirkwood.h>
++#include <plat/mvsdio.h>
++#include "common.h"
++#include "mpp.h"
++
++/*****************************************************************************
++ * 512KB SPI Flash on BOOT Device
++ ****************************************************************************/
++static struct mtd_partition lsxhl_partitions[] = {
++	{
++		.name		= "u-boot",
++		.size		= 0x70000,
++		.offset		= 0x00000,
++		.mask_flags	= MTD_WRITEABLE,
++	},
++	{
++		.name		= "u-boot env",
++		.size		= 0x10000,
++		.offset		= 0x70000,
++	}
++};
++
++static struct flash_platform_data lsxhl_spi_slave_data = {
++	.type		= "m25p40",
++	.parts		= lsxhl_partitions,
++	.nr_parts	= ARRAY_SIZE(lsxhl_partitions),
++};
++
++static struct spi_board_info __initdata lsxhl_spi_slave_info[] = {
++	{
++		.modalias	= "m25p80",
++		.platform_data	= &lsxhl_spi_slave_data,
++		.irq		= -1,
++		.max_speed_hz	= 20000000,
++		.bus_num	= 0,
++		.chip_select	= 0,
++	}
++};
++
++/*****************************************************************************
++ * Ethernet
++ ****************************************************************************/
++static struct mv643xx_eth_platform_data lsxhl_ge00_data = {
++	.phy_addr	= MV643XX_ETH_PHY_ADDR(0),
++};
++
++static struct mv643xx_eth_platform_data lsxhl_ge01_data = {
++	.phy_addr	= MV643XX_ETH_PHY_ADDR(8),
++};
++
++/*****************************************************************************
++ * SATA
++ ****************************************************************************/
++static struct mv_sata_platform_data lsxhl_sata_data = {
++	.n_ports	= 1,
++};
++
++/*****************************************************************************
++ * LEDs attached to GPIO
++ ****************************************************************************/
++#define LSXHL_GPIO_LED_ALARM		37
++#define LSXHL_GPIO_LED_INFO		38
++#define LSXHL_GPIO_LED_PWR		39
++#define LSXHL_GPIO_LED_FUNC_BLUE	36
++#define LSXHL_GPIO_LED_FUNC_RED		48
++
++static struct gpio_led lsxhl_led_pins[] = {
++	{
++		.name			= "alarm:red",
++		.gpio			= LSXHL_GPIO_LED_ALARM,
++		.active_low		= 1,
++	},
++	{
++		.name			= "info:amber",
++		.gpio			= LSXHL_GPIO_LED_INFO,
++		.active_low		= 1,
++	},
++	{
++		.name			= "power:blue",
++		.default_trigger	= "default-on",
++		.gpio			= LSXHL_GPIO_LED_PWR,
++		.active_low		= 1,
++	},
++	{
++		.name			= "func:blue:bottom",
++		.gpio			= LSXHL_GPIO_LED_FUNC_BLUE,
++		.active_low		= 1,
++	},
++	{
++		.name			= "func:red:bottom",
++		.gpio			= LSXHL_GPIO_LED_FUNC_RED,
++		.active_low		= 1,
++	},
++};
++
++static struct gpio_led_platform_data lsxhl_led_data = {
++	.leds		= lsxhl_led_pins,
++	.num_leds	= ARRAY_SIZE(lsxhl_led_pins),
++};
++
++static struct platform_device lsxhl_leds = {
++	.name	= "leds-gpio",
++	.id	= -1,
++	.dev	= {
++		.platform_data	= &lsxhl_led_data,
++	}
++};
++
++/*****************************************************************************
++ * General Setup
++ ****************************************************************************/
++#define LSXHL_GPIO_HDD_POWER 10
++#define LSXHL_GPIO_USB_POWER 11
++
++/*****************************************************************************
++ * GPIO Attached Keys
++ ****************************************************************************/
++#define LSXHL_GPIO_KEY_FUNC		41
++#define LSXHL_GPIO_KEY_AUTOPOWER	42
++#define LSXHL_GPIO_KEY_POWER		43
++#define LSXHL_SW_POWER		0x00
++#define LSXHL_SW_AUTOPOWER	0x01
++#define LSXHL_SW_FUNC		0x02
++
++static struct gpio_keys_button lsxhl_buttons[] = {
++	{
++		.type = EV_SW,
++		.code = LSXHL_SW_POWER,
++		.gpio = LSXHL_GPIO_KEY_POWER,
++		.desc = "Power-on Switch",
++		.active_low = 1,
++	}, {
++		.type = EV_SW,
++		.code = LSXHL_SW_AUTOPOWER,
++		.gpio = LSXHL_GPIO_KEY_AUTOPOWER,
++		.desc = "Power-auto Switch",
++		.active_low = 1,
++	}, {
++		.type = EV_SW,
++		.code = LSXHL_SW_POWER,
++		.gpio = LSXHL_GPIO_KEY_FUNC,
++		.desc = "Function Button",
++		.active_low = 1,
++	},
++};
++
++static struct gpio_keys_platform_data lsxhl_button_data = {
++	.buttons = lsxhl_buttons,
++	.nbuttons = ARRAY_SIZE(lsxhl_buttons),
++};
++
++static struct platform_device lsxhl_button_device = {
++	.name = "gpio-keys",
++	.id = -1,
++	.num_resources = 0,
++	.dev = {
++		.platform_data = &lsxhl_button_data,
++	},
++};
++
++/*****************************************************************************
++ * GPIO Fan
++ ****************************************************************************/
++#define LSXHL_GPIO_FAN_HIGH	18
++#define LSXHL_GPIO_FAN_LOW	19
++#define LSXHL_GPIO_FAN_LOCK	40
++
++static struct gpio_fan_alarm lsxhl_alarm = {
++	.gpio = LSXHL_GPIO_FAN_LOCK,
++};
++
++static struct gpio_fan_speed lsxhl_speeds[] = {
++	{
++		.rpm = 0,
++		.ctrl_val = 3,
++	}, {
++		.rpm = 1500,
++		.ctrl_val = 1,
++	}, {
++		.rpm = 3250,
++		.ctrl_val = 2,
++	}, {
++		.rpm = 5000,
++		.ctrl_val = 0,
++	}
++};
++
++static int lsxhl_gpio_list[] = {
++	LSXHL_GPIO_FAN_HIGH, LSXHL_GPIO_FAN_LOW,
++};
++
++static struct gpio_fan_platform_data lsxhl_fan_data = {
++	.num_ctrl = ARRAY_SIZE(lsxhl_gpio_list),
++	.ctrl = lsxhl_gpio_list,
++	.alarm = &lsxhl_alarm,
++	.num_speed = ARRAY_SIZE(lsxhl_speeds),
++	.speed = lsxhl_speeds,
++};
++
++static struct platform_device lsxhl_fan_device = {
++	.name = "gpio-fan",
++	.id = -1,
++	.num_resources = 0,
++	.dev = {
++		.platform_data = &lsxhl_fan_data,
++	},
++};
++
++/*****************************************************************************
++ * GPIO Data
++ ****************************************************************************/
++
++static unsigned int lsxhl_mpp_config[] __initdata = {
++	MPP10_GPO,	/* HDD Power Enable */
++	MPP11_GPIO,	/* USB Vbus Enable */
++	MPP18_GPO,	/* FAN High Enable# */
++	MPP19_GPO,	/* FAN Low Enable# */
++	MPP36_GPIO,	/* Function Blue LED */
++	MPP37_GPIO,	/* Alarm LED */
++	MPP38_GPIO,	/* Info LED */
++	MPP39_GPIO,	/* Power LED */
++	MPP40_GPIO,	/* Fan Lock */
++	MPP41_GPIO,	/* Function Button */
++	MPP42_GPIO,	/* Power Switch */
++	MPP43_GPIO,	/* Power Auto Switch */
++	MPP48_GPIO,	/* Function Red LED */
++	0
++};
++
++/*****************************************************************************
++ * LS-XHL specific power off method: reboot
++ ****************************************************************************/
++/*
++ * On the LS-XHL, the shutdown process is following:
++ * - Userland monitors key events until the power switch goes to off position
++ * - The board reboots
++ * - U-boot starts and goes into an idle mode waiting for the user
++ *   to move the switch to ON position
++ *
++ */
++
++static void lsxhl_power_off(void)
++{
++	arm_machine_restart('h', NULL);
++}
++
++static void __init lsxhl_init(void)
++{
++	/*
++	 * Basic setup. Needs to be called early.
++	 */
++	kirkwood_init();
++	kirkwood_mpp_conf(lsxhl_mpp_config);
++
++	/*
++	 * Configure peripherals.
++	 */
++	kirkwood_uart0_init();
++	kirkwood_ehci_init();
++	kirkwood_ge00_init(&lsxhl_ge00_data);
++	kirkwood_ge01_init(&lsxhl_ge01_data);
++	kirkwood_sata_init(&lsxhl_sata_data);
++	kirkwood_spi_init();
++
++	platform_device_register(&lsxhl_leds);
++	platform_device_register(&lsxhl_button_device);
++	platform_device_register(&lsxhl_fan_device);
++
++	spi_register_board_info(lsxhl_spi_slave_info,
++				ARRAY_SIZE(lsxhl_spi_slave_info));
++
++	/* usb power on */
++	gpio_set_value(LSXHL_GPIO_USB_POWER, 1);
++
++	/* register power-off method */
++	pm_power_off = lsxhl_power_off;
++
++	pr_info("%s: finished\n", __func__);
++}
++
++MACHINE_START(LSXHL, "Buffalo Linkstation LS-XHL")
++	.atag_offset	= 0x100,
++	.init_machine	= lsxhl_init,
++	.map_io		= kirkwood_map_io,
++	.init_early	= kirkwood_init_early,
++	.init_irq	= kirkwood_init_irq,
++	.timer		= &kirkwood_timer,
++MACHINE_END
+diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types
+index 3b3776d..8acc587 100644
+--- a/arch/arm/tools/mach-types
++++ b/arch/arm/tools/mach-types
+@@ -448,6 +448,7 @@ mityomapl138		MACH_MITYOMAPL138	MITYOMAPL138		2650
+ guruplug		MACH_GURUPLUG		GURUPLUG		2659
+ spear310		MACH_SPEAR310		SPEAR310		2660
+ spear320		MACH_SPEAR320		SPEAR320		2661
++lsxhl			MACH_LSXHL		LSXHL			2663
+ aquila			MACH_AQUILA		AQUILA			2676
+ sheeva_esata		MACH_ESATA_SHEEVAPLUG	ESATA_SHEEVAPLUG	2678
+ msm7x30_surf		MACH_MSM7X30_SURF	MSM7X30_SURF		2679
+-- 
+1.7.2.3
diff --git a/3.2.34/kernel-3.4.0-layer7-2.22.patch b/3.2.34/kernel-3.4.0-layer7-2.22.patch
new file mode 100644
index 0000000..736adfe
--- /dev/null
+++ b/3.2.34/kernel-3.4.0-layer7-2.22.patch
@@ -0,0 +1,2132 @@
+--- linux-2.6.28-stock/net/netfilter/Kconfig	2009-01-07 16:05:35.000000000 -0600
++++ linux-2.6.28/net/netfilter/Kconfig	2009-01-07 16:07:31.000000000 -0600
+@@ -795,6 +795,27 @@ config NETFILTER_XT_MATCH_STATE
+ 
+ 	  To compile it as a module, choose M here.  If unsure, say N.
+ 
++config NETFILTER_XT_MATCH_LAYER7
++	tristate '"layer7" match support'
++	depends on NETFILTER_XTABLES
++	depends on EXPERIMENTAL && (IP_NF_CONNTRACK || NF_CONNTRACK)
++       depends on NF_CT_ACCT
++	help
++	  Say Y if you want to be able to classify connections (and their
++	  packets) based on regular expression matching of their application
++	  layer data.   This is one way to classify applications such as
++	  peer-to-peer filesharing systems that do not always use the same
++	  port.
++
++	  To compile it as a module, choose M here.  If unsure, say N.
++
++config NETFILTER_XT_MATCH_LAYER7_DEBUG
++        bool 'Layer 7 debugging output'
++        depends on NETFILTER_XT_MATCH_LAYER7
++        help
++          Say Y to get lots of debugging output.
++
++
+ config NETFILTER_XT_MATCH_STATISTIC
+ 	tristate '"statistic" match support'
+ 	depends on NETFILTER_ADVANCED
+--- linux-2.6.28-stock/net/netfilter/Makefile	2009-01-07 16:05:35.000000000 -0600
++++ linux-2.6.28/net/netfilter/Makefile	2009-01-07 16:07:31.000000000 -0600
+@@ -84,6 +84,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) 
+ obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
++obj-$(CONFIG_NETFILTER_XT_MATCH_LAYER7) += xt_layer7.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
+--- linux-2.6.28-stock/net/netfilter/xt_layer7.c	1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.28/net/netfilter/xt_layer7.c	2009-01-07 20:47:14.000000000 -0600
+@@ -0,0 +1,666 @@
++/*
++  Kernel module to match application layer (OSI layer 7) data in connections.
++
++  http://l7-filter.sf.net
++
++  (C) 2003-2009 Matthew Strait and Ethan Sommer.
++
++  This program is free software; you can redistribute it and/or
++  modify it under the terms of the GNU General Public License
++  as published by the Free Software Foundation; either version
++  2 of the License, or (at your option) any later version.
++  http://www.gnu.org/licenses/gpl.txt
++
++  Based on ipt_string.c (C) 2000 Emmanuel Roger <winfield@freegates.be>,
++  xt_helper.c (C) 2002 Harald Welte and cls_layer7.c (C) 2003 Matthew Strait,
++  Ethan Sommer, Justin Levandoski.
++*/
++
++#include <linux/spinlock.h>
++#include <linux/version.h>
++#include <net/ip.h>
++#include <net/tcp.h>
++#include <linux/module.h>
++#include <linux/skbuff.h>
++#include <linux/netfilter.h>
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_core.h>
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
++#include <net/netfilter/nf_conntrack_extend.h>
++#include <net/netfilter/nf_conntrack_acct.h>
++#endif
++#include <linux/netfilter/x_tables.h>
++#include <linux/netfilter/xt_layer7.h>
++#include <linux/ctype.h>
++#include <linux/proc_fs.h>
++
++#include "regexp/regexp.c"
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Matthew Strait <quadong@users.sf.net>, Ethan Sommer <sommere@users.sf.net>");
++MODULE_DESCRIPTION("iptables application layer match module");
++MODULE_ALIAS("ipt_layer7");
++MODULE_VERSION("2.21");
++
++static int maxdatalen = 2048; // this is the default
++module_param(maxdatalen, int, 0444);
++MODULE_PARM_DESC(maxdatalen, "maximum bytes of data looked at by l7-filter");
++#ifdef CONFIG_NETFILTER_XT_MATCH_LAYER7_DEBUG
++	#define DPRINTK(format,args...) printk(format,##args)
++#else
++	#define DPRINTK(format,args...)
++#endif
++
++/* Number of packets whose data we look at.
++This can be modified through /proc/net/layer7_numpackets */
++static int num_packets = 10;
++
++static struct pattern_cache {
++	char * regex_string;
++	regexp * pattern;
++	struct pattern_cache * next;
++} * first_pattern_cache = NULL;
++
++DEFINE_SPINLOCK(l7_lock);
++
++static int total_acct_packets(struct nf_conn *ct)
++{
++#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 26)
++	BUG_ON(ct == NULL);
++	return (ct->counters[IP_CT_DIR_ORIGINAL].packets + ct->counters[IP_CT_DIR_REPLY].packets);
++#else
++	struct nf_conn_counter *acct;
++
++	BUG_ON(ct == NULL);
++	acct = nf_conn_acct_find(ct);
++	if (!acct)
++		return 0;
++	return (acct[IP_CT_DIR_ORIGINAL].packets + acct[IP_CT_DIR_REPLY].packets);
++#endif
++}
++
++#ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG
++/* Converts an unfriendly string into a friendly one by
++replacing unprintables with periods and all whitespace with " ". */
++static char * friendly_print(unsigned char * s)
++{
++	char * f = kmalloc(strlen(s) + 1, GFP_ATOMIC);
++	int i;
++
++	if(!f) {
++		if (net_ratelimit())
++			printk(KERN_ERR "layer7: out of memory in "
++					"friendly_print, bailing.\n");
++		return NULL;
++	}
++
++	for(i = 0; i < strlen(s); i++){
++		if(isprint(s[i]) && s[i] < 128)	f[i] = s[i];
++		else if(isspace(s[i]))		f[i] = ' ';
++		else 				f[i] = '.';
++	}
++	f[i] = '\0';
++	return f;
++}
++
++static char dec2hex(int i)
++{
++	switch (i) {
++		case 0 ... 9:
++			return (i + '0');
++			break;
++		case 10 ... 15:
++			return (i - 10 + 'a');
++			break;
++		default:
++			if (net_ratelimit())
++				printk("layer7: Problem in dec2hex\n");
++			return '\0';
++	}
++}
++
++static char * hex_print(unsigned char * s)
++{
++	char * g = kmalloc(strlen(s)*3 + 1, GFP_ATOMIC);
++	int i;
++
++	if(!g) {
++	       if (net_ratelimit())
++			printk(KERN_ERR "layer7: out of memory in hex_print, "
++					"bailing.\n");
++	       return NULL;
++	}
++
++	for(i = 0; i < strlen(s); i++) {
++		g[i*3    ] = dec2hex(s[i]/16);
++		g[i*3 + 1] = dec2hex(s[i]%16);
++		g[i*3 + 2] = ' ';
++	}
++	g[i*3] = '\0';
++
++	return g;
++}
++#endif // DEBUG
++
++/* Use instead of regcomp.  As we expect to be seeing the same regexps over and
++over again, it make sense to cache the results. */
++static regexp * compile_and_cache(const char * regex_string, 
++                                  const char * protocol)
++{
++	struct pattern_cache * node               = first_pattern_cache;
++	struct pattern_cache * last_pattern_cache = first_pattern_cache;
++	struct pattern_cache * tmp;
++	unsigned int len;
++
++	while (node != NULL) {
++		if (!strcmp(node->regex_string, regex_string))
++		return node->pattern;
++
++		last_pattern_cache = node;/* points at the last non-NULL node */
++		node = node->next;
++	}
++
++	/* If we reach the end of the list, then we have not yet cached
++	   the pattern for this regex. Let's do that now.
++	   Be paranoid about running out of memory to avoid list corruption. */
++	tmp = kmalloc(sizeof(struct pattern_cache), GFP_ATOMIC);
++
++	if(!tmp) {
++		if (net_ratelimit())
++			printk(KERN_ERR "layer7: out of memory in "
++					"compile_and_cache, bailing.\n");
++		return NULL;
++	}
++
++	tmp->regex_string  = kmalloc(strlen(regex_string) + 1, GFP_ATOMIC);
++	tmp->pattern       = kmalloc(sizeof(struct regexp),    GFP_ATOMIC);
++	tmp->next = NULL;
++
++	if(!tmp->regex_string || !tmp->pattern) {
++		if (net_ratelimit())
++			printk(KERN_ERR "layer7: out of memory in "
++					"compile_and_cache, bailing.\n");
++		kfree(tmp->regex_string);
++		kfree(tmp->pattern);
++		kfree(tmp);
++		return NULL;
++	}
++
++	/* Ok.  The new node is all ready now. */
++	node = tmp;
++
++	if(first_pattern_cache == NULL) /* list is empty */
++		first_pattern_cache = node; /* make node the beginning */
++	else
++		last_pattern_cache->next = node; /* attach node to the end */
++
++	/* copy the string and compile the regex */
++	len = strlen(regex_string);
++	DPRINTK("About to compile this: \"%s\"\n", regex_string);
++	node->pattern = regcomp((char *)regex_string, &len);
++	if ( !node->pattern ) {
++		if (net_ratelimit())
++			printk(KERN_ERR "layer7: Error compiling regexp "
++					"\"%s\" (%s)\n", 
++					regex_string, protocol);
++		/* pattern is now cached as NULL, so we won't try again. */
++	}
++
++	strcpy(node->regex_string, regex_string);
++	return node->pattern;
++}
++
++static int can_handle(const struct sk_buff *skb)
++{
++	if(!ip_hdr(skb)) /* not IP */
++		return 0;
++	if(ip_hdr(skb)->protocol != IPPROTO_TCP &&
++	   ip_hdr(skb)->protocol != IPPROTO_UDP &&
++	   ip_hdr(skb)->protocol != IPPROTO_ICMP)
++		return 0;
++	return 1;
++}
++
++/* Returns offset the into the skb->data that the application data starts */
++static int app_data_offset(const struct sk_buff *skb)
++{
++	/* In case we are ported somewhere (ebtables?) where ip_hdr(skb)
++	isn't set, this can be gotten from 4*(skb->data[0] & 0x0f) as well. */
++	int ip_hl = 4*ip_hdr(skb)->ihl;
++
++	if( ip_hdr(skb)->protocol == IPPROTO_TCP ) {
++		/* 12 == offset into TCP header for the header length field.
++		Can't get this with skb->h.th->doff because the tcphdr
++		struct doesn't get set when routing (this is confirmed to be
++		true in Netfilter as well as QoS.) */
++		int tcp_hl = 4*(skb->data[ip_hl + 12] >> 4);
++
++		return ip_hl + tcp_hl;
++	} else if( ip_hdr(skb)->protocol == IPPROTO_UDP  ) {
++		return ip_hl + 8; /* UDP header is always 8 bytes */
++	} else if( ip_hdr(skb)->protocol == IPPROTO_ICMP ) {
++		return ip_hl + 8; /* ICMP header is 8 bytes */
++	} else {
++		if (net_ratelimit())
++			printk(KERN_ERR "layer7: tried to handle unknown "
++					"protocol!\n");
++		return ip_hl + 8; /* something reasonable */
++	}
++}
++
++/* handles whether there's a match when we aren't appending data anymore */
++static int match_no_append(struct nf_conn * conntrack, 
++                           struct nf_conn * master_conntrack, 
++                           enum ip_conntrack_info ctinfo,
++                           enum ip_conntrack_info master_ctinfo,
++                           const struct xt_layer7_info * info)
++{
++	/* If we're in here, throw the app data away */
++	if(master_conntrack->layer7.app_data != NULL) {
++
++	#ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG
++		if(!master_conntrack->layer7.app_proto) {
++			char * f = 
++			  friendly_print(master_conntrack->layer7.app_data);
++			char * g = 
++			  hex_print(master_conntrack->layer7.app_data);
++			DPRINTK("\nl7-filter gave up after %d bytes "
++				"(%d packets):\n%s\n",
++				strlen(f), total_acct_packets(master_conntrack), f);
++			kfree(f);
++			DPRINTK("In hex: %s\n", g);
++			kfree(g);
++		}
++	#endif
++
++		kfree(master_conntrack->layer7.app_data);
++		master_conntrack->layer7.app_data = NULL; /* don't free again */
++	}
++
++	if(master_conntrack->layer7.app_proto){
++		/* Here child connections set their .app_proto (for /proc) */
++		if(!conntrack->layer7.app_proto) {
++			conntrack->layer7.app_proto = 
++			  kmalloc(strlen(master_conntrack->layer7.app_proto)+1, 
++			    GFP_ATOMIC);
++			if(!conntrack->layer7.app_proto){
++				if (net_ratelimit())
++					printk(KERN_ERR "layer7: out of memory "
++							"in match_no_append, "
++							"bailing.\n");
++				return 1;
++			}
++			strcpy(conntrack->layer7.app_proto, 
++				master_conntrack->layer7.app_proto);
++		}
++
++		return (!strcmp(master_conntrack->layer7.app_proto, 
++				info->protocol));
++	}
++	else {
++		/* If not classified, set to "unknown" to distinguish from
++		connections that are still being tested. */
++		master_conntrack->layer7.app_proto = 
++			kmalloc(strlen("unknown")+1, GFP_ATOMIC);
++		if(!master_conntrack->layer7.app_proto){
++			if (net_ratelimit())
++				printk(KERN_ERR "layer7: out of memory in "
++						"match_no_append, bailing.\n");
++			return 1;
++		}
++		strcpy(master_conntrack->layer7.app_proto, "unknown");
++		return 0;
++	}
++}
++
++/* add the new app data to the conntrack.  Return number of bytes added. */
++static int add_data(struct nf_conn * master_conntrack,
++                    char * app_data, int appdatalen)
++{
++	int length = 0, i;
++	int oldlength = master_conntrack->layer7.app_data_len;
++
++	/* This is a fix for a race condition by Deti Fliegl. However, I'm not 
++	   clear on whether the race condition exists or whether this really 
++	   fixes it.  I might just be being dense... Anyway, if it's not really 
++	   a fix, all it does is waste a very small amount of time. */
++	if(!master_conntrack->layer7.app_data) return 0;
++
++	/* Strip nulls. Make everything lower case (our regex lib doesn't
++	do case insensitivity).  Add it to the end of the current data. */
++	for(i = 0; i < maxdatalen-oldlength-1 &&
++		   i < appdatalen; i++) {
++		if(app_data[i] != '\0') {
++			/* the kernel version of tolower mungs 'upper ascii' */
++			master_conntrack->layer7.app_data[length+oldlength] =
++				isascii(app_data[i])? 
++					tolower(app_data[i]) : app_data[i];
++			length++;
++		}
++	}
++
++	master_conntrack->layer7.app_data[length+oldlength] = '\0';
++	master_conntrack->layer7.app_data_len = length + oldlength;
++
++	return length;
++}
++
++/* taken from drivers/video/modedb.c */
++static int my_atoi(const char *s)
++{
++	int val = 0;
++
++	for (;; s++) {
++		switch (*s) {
++			case '0'...'9':
++			val = 10*val+(*s-'0');
++			break;
++		default:
++			return val;
++		}
++	}
++}
++
++/* write out num_packets to userland. */
++static int layer7_read_proc(char* page, char ** start, off_t off, int count,
++                            int* eof, void * data)
++{
++	if(num_packets > 99 && net_ratelimit())
++		printk(KERN_ERR "layer7: NOT REACHED. num_packets too big\n");
++
++	page[0] = num_packets/10 + '0';
++	page[1] = num_packets%10 + '0';
++	page[2] = '\n';
++	page[3] = '\0';
++
++	*eof=1;
++
++	return 3;
++}
++
++/* Read in num_packets from userland */
++static int layer7_write_proc(struct file* file, const char* buffer,
++                             unsigned long count, void *data)
++{
++	char * foo = kmalloc(count, GFP_ATOMIC);
++
++	if(!foo){
++		if (net_ratelimit())
++			printk(KERN_ERR "layer7: out of memory, bailing. "
++					"num_packets unchanged.\n");
++		return count;
++	}
++
++	if(copy_from_user(foo, buffer, count)) {
++		return -EFAULT;
++	}
++
++
++	num_packets = my_atoi(foo);
++	kfree (foo);
++
++	/* This has an arbitrary limit to make the math easier. I'm lazy.
++	But anyway, 99 is a LOT! If you want more, you're doing it wrong! */
++	if(num_packets > 99) {
++		printk(KERN_WARNING "layer7: num_packets can't be > 99.\n");
++		num_packets = 99;
++	} else if(num_packets < 1) {
++		printk(KERN_WARNING "layer7: num_packets can't be < 1.\n");
++		num_packets = 1;
++	}
++
++	return count;
++}
++
++static bool
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
++match(const struct sk_buff *skbin, const struct xt_match_param *par)
++#else
++match(const struct sk_buff *skbin,
++      const struct net_device *in,
++      const struct net_device *out,
++      const struct xt_match *match,
++      const void *matchinfo,
++      int offset,
++      unsigned int protoff,
++      bool *hotdrop)
++#endif
++{
++	/* sidestep const without getting a compiler warning... */
++	struct sk_buff * skb = (struct sk_buff *)skbin; 
++
++	const struct xt_layer7_info * info = 
++	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
++		par->matchinfo;
++	#else
++		matchinfo;
++	#endif
++
++	enum ip_conntrack_info master_ctinfo, ctinfo;
++	struct nf_conn *master_conntrack, *conntrack;
++	unsigned char * app_data;
++	unsigned int pattern_result, appdatalen;
++	regexp * comppattern;
++
++	/* Be paranoid/incompetent - lock the entire match function. */
++	spin_lock_bh(&l7_lock);
++
++	if(!can_handle(skb)){
++		DPRINTK("layer7: This is some protocol I can't handle.\n");
++		spin_unlock_bh(&l7_lock);
++		return info->invert;
++	}
++
++	/* Treat parent & all its children together as one connection, except
++	for the purpose of setting conntrack->layer7.app_proto in the actual
++	connection. This makes /proc/net/ip_conntrack more satisfying. */
++	if(!(conntrack = nf_ct_get(skb, &ctinfo)) ||
++	   !(master_conntrack=nf_ct_get(skb,&master_ctinfo))){
++		DPRINTK("layer7: couldn't get conntrack.\n");
++		spin_unlock_bh(&l7_lock);
++		return info->invert;
++	}
++
++	/* Try to get a master conntrack (and its master etc) for FTP, etc. */
++	while (master_ct(master_conntrack) != NULL)
++		master_conntrack = master_ct(master_conntrack);
++
++	/* if we've classified it or seen too many packets */
++	if(total_acct_packets(master_conntrack) > num_packets ||
++	   master_conntrack->layer7.app_proto) {
++
++		pattern_result = match_no_append(conntrack, master_conntrack, 
++						 ctinfo, master_ctinfo, info);
++
++		/* skb->cb[0] == seen. Don't do things twice if there are 
++		multiple l7 rules. I'm not sure that using cb for this purpose 
++		is correct, even though it says "put your private variables 
++		there". But it doesn't look like it is being used for anything
++		else in the skbs that make it here. */
++		skb->cb[0] = 1; /* marking it seen here's probably irrelevant */
++
++		spin_unlock_bh(&l7_lock);
++		return (pattern_result ^ info->invert);
++	}
++
++	if(skb_is_nonlinear(skb)){
++		if(skb_linearize(skb) != 0){
++			if (net_ratelimit())
++				printk(KERN_ERR "layer7: failed to linearize "
++						"packet, bailing.\n");
++			spin_unlock_bh(&l7_lock);
++			return info->invert;
++		}
++	}
++
++	/* now that the skb is linearized, it's safe to set these. */
++	app_data = skb->data + app_data_offset(skb);
++	appdatalen = skb_tail_pointer(skb) - app_data;
++
++	/* the return value gets checked later, when we're ready to use it */
++	comppattern = compile_and_cache(info->pattern, info->protocol);
++
++	/* On the first packet of a connection, allocate space for app data */
++	if(total_acct_packets(master_conntrack) == 1 && !skb->cb[0] && 
++	   !master_conntrack->layer7.app_data){
++		master_conntrack->layer7.app_data = 
++			kmalloc(maxdatalen, GFP_ATOMIC);
++		if(!master_conntrack->layer7.app_data){
++			if (net_ratelimit())
++				printk(KERN_ERR "layer7: out of memory in "
++						"match, bailing.\n");
++			spin_unlock_bh(&l7_lock);
++			return info->invert;
++		}
++
++		master_conntrack->layer7.app_data[0] = '\0';
++	}
++
++	/* Can be here, but unallocated, if numpackets is increased near
++	the beginning of a connection */
++	if(master_conntrack->layer7.app_data == NULL){
++		spin_unlock_bh(&l7_lock);
++		return info->invert; /* unmatched */
++	}
++
++	if(!skb->cb[0]){
++		int newbytes;
++		newbytes = add_data(master_conntrack, app_data, appdatalen);
++
++		if(newbytes == 0) { /* didn't add any data */
++			skb->cb[0] = 1;
++			/* Didn't match before, not going to match now */
++			spin_unlock_bh(&l7_lock);
++			return info->invert;
++		}
++	}
++
++	/* If looking for "unknown", then never match.  "Unknown" means that
++	we've given up; we're still trying with these packets. */
++	if(!strcmp(info->protocol, "unknown")) {
++		pattern_result = 0;
++	/* If looking for "unset", then always match. "Unset" means that we
++	haven't yet classified the connection. */
++	} else if(!strcmp(info->protocol, "unset")) {
++		pattern_result = 2;
++		DPRINTK("layer7: matched unset: not yet classified "
++			"(%d/%d packets)\n",
++                        total_acct_packets(master_conntrack), num_packets);
++	/* If the regexp failed to compile, don't bother running it */
++	} else if(comppattern && 
++		  regexec(comppattern, master_conntrack->layer7.app_data)){
++		DPRINTK("layer7: matched %s\n", info->protocol);
++		pattern_result = 1;
++	} else pattern_result = 0;
++
++	if(pattern_result == 1) {
++		master_conntrack->layer7.app_proto = 
++			kmalloc(strlen(info->protocol)+1, GFP_ATOMIC);
++		if(!master_conntrack->layer7.app_proto){
++			if (net_ratelimit())
++				printk(KERN_ERR "layer7: out of memory in "
++						"match, bailing.\n");
++			spin_unlock_bh(&l7_lock);
++			return (pattern_result ^ info->invert);
++		}
++		strcpy(master_conntrack->layer7.app_proto, info->protocol);
++	} else if(pattern_result > 1) { /* cleanup from "unset" */
++		pattern_result = 1;
++	}
++
++	/* mark the packet seen */
++	skb->cb[0] = 1;
++
++	spin_unlock_bh(&l7_lock);
++	return (pattern_result ^ info->invert);
++}
++
++// load nf_conntrack_ipv4
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
++static bool check(const struct xt_mtchk_param *par)
++{
++        if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
++                printk(KERN_WARNING "can't load conntrack support for "
++                                    "proto=%d\n", par->match->family);
++#else
++static bool check(const char *tablename, const void *inf,
++		 const struct xt_match *match, void *matchinfo,
++		 unsigned int hook_mask)
++{
++        if (nf_ct_l3proto_try_module_get(match->family) < 0) {
++                printk(KERN_WARNING "can't load conntrack support for "
++                                    "proto=%d\n", match->family);
++#endif
++                return 0;
++        }
++	return 1;
++}
++
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
++	static void destroy(const struct xt_mtdtor_param *par)
++	{
++		nf_ct_l3proto_module_put(par->match->family);
++	}
++#else
++	static void destroy(const struct xt_match *match, void *matchinfo)
++	{
++		nf_ct_l3proto_module_put(match->family);
++	}
++#endif
++
++static struct xt_match xt_layer7_match[] __read_mostly = {
++{
++	.name		= "layer7",
++	.family		= AF_INET,
++	.checkentry	= check,
++	.match		= match,
++	.destroy	= destroy,
++	.matchsize	= sizeof(struct xt_layer7_info),
++	.me		= THIS_MODULE
++}
++};
++
++static void layer7_cleanup_proc(void)
++{
++	remove_proc_entry("layer7_numpackets", init_net.proc_net);
++}
++
++/* register the proc file */
++static void layer7_init_proc(void)
++{
++	struct proc_dir_entry* entry;
++	entry = create_proc_entry("layer7_numpackets", 0644, init_net.proc_net);
++	entry->read_proc = layer7_read_proc;
++	entry->write_proc = layer7_write_proc;
++}
++
++static int __init xt_layer7_init(void)
++{
++	need_conntrack();
++
++	layer7_init_proc();
++	if(maxdatalen < 1) {
++		printk(KERN_WARNING "layer7: maxdatalen can't be < 1, "
++			"using 1\n");
++		maxdatalen = 1;
++	}
++	/* This is not a hard limit.  It's just here to prevent people from
++	bringing their slow machines to a grinding halt. */
++	else if(maxdatalen > 65536) {
++		printk(KERN_WARNING "layer7: maxdatalen can't be > 65536, "
++			"using 65536\n");
++		maxdatalen = 65536;
++	}
++	return xt_register_matches(xt_layer7_match,
++				   ARRAY_SIZE(xt_layer7_match));
++}
++
++static void __exit xt_layer7_fini(void)
++{
++	layer7_cleanup_proc();
++	xt_unregister_matches(xt_layer7_match, ARRAY_SIZE(xt_layer7_match));
++}
++
++module_init(xt_layer7_init);
++module_exit(xt_layer7_fini);
+--- linux-2.6.28-stock/net/netfilter/regexp/regexp.c	1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.28/net/netfilter/regexp/regexp.c	2009-01-07 16:07:31.000000000 -0600
+@@ -0,0 +1,1197 @@
++/*
++ * regcomp and regexec -- regsub and regerror are elsewhere
++ * @(#)regexp.c	1.3 of 18 April 87
++ *
++ *	Copyright (c) 1986 by University of Toronto.
++ *	Written by Henry Spencer.  Not derived from licensed software.
++ *
++ *	Permission is granted to anyone to use this software for any
++ *	purpose on any computer system, and to redistribute it freely,
++ *	subject to the following restrictions:
++ *
++ *	1. The author is not responsible for the consequences of use of
++ *		this software, no matter how awful, even if they arise
++ *		from defects in it.
++ *
++ *	2. The origin of this software must not be misrepresented, either
++ *		by explicit claim or by omission.
++ *
++ *	3. Altered versions must be plainly marked as such, and must not
++ *		be misrepresented as being the original software.
++ *
++ * Beware that some of this code is subtly aware of the way operator
++ * precedence is structured in regular expressions.  Serious changes in
++ * regular-expression syntax might require a total rethink.
++ *
++ * This code was modified by Ethan Sommer to work within the kernel
++ * (it now uses kmalloc etc..)
++ *
++ * Modified slightly by Matthew Strait to use more modern C.
++ */
++
++#include "regexp.h"
++#include "regmagic.h"
++
++/* added by ethan and matt.  Lets it work in both kernel and user space.
++(So iptables can use it, for instance.)  Yea, it goes both ways... */
++#if __KERNEL__
++  #define malloc(foo) kmalloc(foo,GFP_ATOMIC)
++#else
++  #define printk(format,args...) printf(format,##args)
++#endif
++
++void regerror(char * s)
++{
++        printk("<3>Regexp: %s\n", s);
++        /* NOTREACHED */
++}
++
++/*
++ * The "internal use only" fields in regexp.h are present to pass info from
++ * compile to execute that permits the execute phase to run lots faster on
++ * simple cases.  They are:
++ *
++ * regstart	char that must begin a match; '\0' if none obvious
++ * reganch	is the match anchored (at beginning-of-line only)?
++ * regmust	string (pointer into program) that match must include, or NULL
++ * regmlen	length of regmust string
++ *
++ * Regstart and reganch permit very fast decisions on suitable starting points
++ * for a match, cutting down the work a lot.  Regmust permits fast rejection
++ * of lines that cannot possibly match.  The regmust tests are costly enough
++ * that regcomp() supplies a regmust only if the r.e. contains something
++ * potentially expensive (at present, the only such thing detected is * or +
++ * at the start of the r.e., which can involve a lot of backup).  Regmlen is
++ * supplied because the test in regexec() needs it and regcomp() is computing
++ * it anyway.
++ */
++
++/*
++ * Structure for regexp "program".  This is essentially a linear encoding
++ * of a nondeterministic finite-state machine (aka syntax charts or
++ * "railroad normal form" in parsing technology).  Each node is an opcode
++ * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
++ * all nodes except BRANCH implement concatenation; a "next" pointer with
++ * a BRANCH on both ends of it is connecting two alternatives.  (Here we
++ * have one of the subtle syntax dependencies:  an individual BRANCH (as
++ * opposed to a collection of them) is never concatenated with anything
++ * because of operator precedence.)  The operand of some types of node is
++ * a literal string; for others, it is a node leading into a sub-FSM.  In
++ * particular, the operand of a BRANCH node is the first node of the branch.
++ * (NB this is *not* a tree structure:  the tail of the branch connects
++ * to the thing following the set of BRANCHes.)  The opcodes are:
++ */
++
++/* definition	number	opnd?	meaning */
++#define	END	0	/* no	End of program. */
++#define	BOL	1	/* no	Match "" at beginning of line. */
++#define	EOL	2	/* no	Match "" at end of line. */
++#define	ANY	3	/* no	Match any one character. */
++#define	ANYOF	4	/* str	Match any character in this string. */
++#define	ANYBUT	5	/* str	Match any character not in this string. */
++#define	BRANCH	6	/* node	Match this alternative, or the next... */
++#define	BACK	7	/* no	Match "", "next" ptr points backward. */
++#define	EXACTLY	8	/* str	Match this string. */
++#define	NOTHING	9	/* no	Match empty string. */
++#define	STAR	10	/* node	Match this (simple) thing 0 or more times. */
++#define	PLUS	11	/* node	Match this (simple) thing 1 or more times. */
++#define	OPEN	20	/* no	Mark this point in input as start of #n. */
++			/*	OPEN+1 is number 1, etc. */
++#define	CLOSE	30	/* no	Analogous to OPEN. */
++
++/*
++ * Opcode notes:
++ *
++ * BRANCH	The set of branches constituting a single choice are hooked
++ *		together with their "next" pointers, since precedence prevents
++ *		anything being concatenated to any individual branch.  The
++ *		"next" pointer of the last BRANCH in a choice points to the
++ *		thing following the whole choice.  This is also where the
++ *		final "next" pointer of each individual branch points; each
++ *		branch starts with the operand node of a BRANCH node.
++ *
++ * BACK		Normal "next" pointers all implicitly point forward; BACK
++ *		exists to make loop structures possible.
++ *
++ * STAR,PLUS	'?', and complex '*' and '+', are implemented as circular
++ *		BRANCH structures using BACK.  Simple cases (one character
++ *		per match) are implemented with STAR and PLUS for speed
++ *		and to minimize recursive plunges.
++ *
++ * OPEN,CLOSE	...are numbered at compile time.
++ */
++
++/*
++ * A node is one char of opcode followed by two chars of "next" pointer.
++ * "Next" pointers are stored as two 8-bit pieces, high order first.  The
++ * value is a positive offset from the opcode of the node containing it.
++ * An operand, if any, simply follows the node.  (Note that much of the
++ * code generation knows about this implicit relationship.)
++ *
++ * Using two bytes for the "next" pointer is vast overkill for most things,
++ * but allows patterns to get big without disasters.
++ */
++#define	OP(p)	(*(p))
++#define	NEXT(p)	(((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
++#define	OPERAND(p)	((p) + 3)
++
++/*
++ * See regmagic.h for one further detail of program structure.
++ */
++
++
++/*
++ * Utility definitions.
++ */
++#ifndef CHARBITS
++#define	UCHARAT(p)	((int)*(unsigned char *)(p))
++#else
++#define	UCHARAT(p)	((int)*(p)&CHARBITS)
++#endif
++
++#define	FAIL(m)	{ regerror(m); return(NULL); }
++#define	ISMULT(c)	((c) == '*' || (c) == '+' || (c) == '?')
++#define	META	"^$.[()|?+*\\"
++
++/*
++ * Flags to be passed up and down.
++ */
++#define	HASWIDTH	01	/* Known never to match null string. */
++#define	SIMPLE		02	/* Simple enough to be STAR/PLUS operand. */
++#define	SPSTART		04	/* Starts with * or +. */
++#define	WORST		0	/* Worst case. */
++
++/*
++ * Global work variables for regcomp().
++ */
++struct match_globals {
++char *reginput;		/* String-input pointer. */
++char *regbol;		/* Beginning of input, for ^ check. */
++char **regstartp;	/* Pointer to startp array. */
++char **regendp;		/* Ditto for endp. */
++char *regparse;		/* Input-scan pointer. */
++int regnpar;		/* () count. */
++char regdummy;
++char *regcode;		/* Code-emit pointer; &regdummy = don't. */
++long regsize;		/* Code size. */
++};
++
++/*
++ * Forward declarations for regcomp()'s friends.
++ */
++#ifndef STATIC
++#define	STATIC	static
++#endif
++STATIC char *reg(struct match_globals *g, int paren,int *flagp);
++STATIC char *regbranch(struct match_globals *g, int *flagp);
++STATIC char *regpiece(struct match_globals *g, int *flagp);
++STATIC char *regatom(struct match_globals *g, int *flagp);
++STATIC char *regnode(struct match_globals *g, char op);
++STATIC char *regnext(struct match_globals *g, char *p);
++STATIC void regc(struct match_globals *g, char b);
++STATIC void reginsert(struct match_globals *g, char op, char *opnd);
++STATIC void regtail(struct match_globals *g, char *p, char *val);
++STATIC void regoptail(struct match_globals *g, char *p, char *val);
++
++
++__kernel_size_t my_strcspn(const char *s1,const char *s2)
++{
++        char *scan1;
++        char *scan2;
++        int count;
++
++        count = 0;
++        for (scan1 = (char *)s1; *scan1 != '\0'; scan1++) {
++                for (scan2 = (char *)s2; *scan2 != '\0';)       /* ++ moved down. */
++                        if (*scan1 == *scan2++)
++                                return(count);
++                count++;
++        }
++        return(count);
++}
++
++/*
++ - regcomp - compile a regular expression into internal code
++ *
++ * We can't allocate space until we know how big the compiled form will be,
++ * but we can't compile it (and thus know how big it is) until we've got a
++ * place to put the code.  So we cheat:  we compile it twice, once with code
++ * generation turned off and size counting turned on, and once "for real".
++ * This also means that we don't allocate space until we are sure that the
++ * thing really will compile successfully, and we never have to move the
++ * code and thus invalidate pointers into it.  (Note that it has to be in
++ * one piece because free() must be able to free it all.)
++ *
++ * Beware that the optimization-preparation code in here knows about some
++ * of the structure of the compiled regexp.
++ */
++regexp *
++regcomp(char *exp,int *patternsize)
++{
++	register regexp *r;
++	register char *scan;
++	register char *longest;
++	register int len;
++	int flags;
++	struct match_globals g;
++	
++	/* commented out by ethan
++	   extern char *malloc();
++	*/
++
++	if (exp == NULL)
++		FAIL("NULL argument");
++
++	/* First pass: determine size, legality. */
++	g.regparse = exp;
++	g.regnpar = 1;
++	g.regsize = 0L;
++	g.regcode = &g.regdummy;
++	regc(&g, MAGIC);
++	if (reg(&g, 0, &flags) == NULL)
++		return(NULL);
++
++	/* Small enough for pointer-storage convention? */
++	if (g.regsize >= 32767L)		/* Probably could be 65535L. */
++		FAIL("regexp too big");
++
++	/* Allocate space. */
++	*patternsize=sizeof(regexp) + (unsigned)g.regsize;
++	r = (regexp *)malloc(sizeof(regexp) + (unsigned)g.regsize);
++	if (r == NULL)
++		FAIL("out of space");
++
++	/* Second pass: emit code. */
++	g.regparse = exp;
++	g.regnpar = 1;
++	g.regcode = r->program;
++	regc(&g, MAGIC);
++	if (reg(&g, 0, &flags) == NULL)
++		return(NULL);
++
++	/* Dig out information for optimizations. */
++	r->regstart = '\0';	/* Worst-case defaults. */
++	r->reganch = 0;
++	r->regmust = NULL;
++	r->regmlen = 0;
++	scan = r->program+1;			/* First BRANCH. */
++	if (OP(regnext(&g, scan)) == END) {		/* Only one top-level choice. */
++		scan = OPERAND(scan);
++
++		/* Starting-point info. */
++		if (OP(scan) == EXACTLY)
++			r->regstart = *OPERAND(scan);
++		else if (OP(scan) == BOL)
++			r->reganch++;
++
++		/*
++		 * If there's something expensive in the r.e., find the
++		 * longest literal string that must appear and make it the
++		 * regmust.  Resolve ties in favor of later strings, since
++		 * the regstart check works with the beginning of the r.e.
++		 * and avoiding duplication strengthens checking.  Not a
++		 * strong reason, but sufficient in the absence of others.
++		 */
++		if (flags&SPSTART) {
++			longest = NULL;
++			len = 0;
++			for (; scan != NULL; scan = regnext(&g, scan))
++				if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
++					longest = OPERAND(scan);
++					len = strlen(OPERAND(scan));
++				}
++			r->regmust = longest;
++			r->regmlen = len;
++		}
++	}
++
++	return(r);
++}
++
++/*
++ - reg - regular expression, i.e. main body or parenthesized thing
++ *
++ * Caller must absorb opening parenthesis.
++ *
++ * Combining parenthesis handling with the base level of regular expression
++ * is a trifle forced, but the need to tie the tails of the branches to what
++ * follows makes it hard to avoid.
++ */
++static char *
++reg(struct match_globals *g, int paren, int *flagp /* Parenthesized? */ )
++{
++	register char *ret;
++	register char *br;
++	register char *ender;
++	register int parno = 0; /* 0 makes gcc happy */
++	int flags;
++
++	*flagp = HASWIDTH;	/* Tentatively. */
++
++	/* Make an OPEN node, if parenthesized. */
++	if (paren) {
++		if (g->regnpar >= NSUBEXP)
++			FAIL("too many ()");
++		parno = g->regnpar;
++		g->regnpar++;
++		ret = regnode(g, OPEN+parno);
++	} else
++		ret = NULL;
++
++	/* Pick up the branches, linking them together. */
++	br = regbranch(g, &flags);
++	if (br == NULL)
++		return(NULL);
++	if (ret != NULL)
++		regtail(g, ret, br);	/* OPEN -> first. */
++	else
++		ret = br;
++	if (!(flags&HASWIDTH))
++		*flagp &= ~HASWIDTH;
++	*flagp |= flags&SPSTART;
++	while (*g->regparse == '|') {
++		g->regparse++;
++		br = regbranch(g, &flags);
++		if (br == NULL)
++			return(NULL);
++		regtail(g, ret, br);	/* BRANCH -> BRANCH. */
++		if (!(flags&HASWIDTH))
++			*flagp &= ~HASWIDTH;
++		*flagp |= flags&SPSTART;
++	}
++
++	/* Make a closing node, and hook it on the end. */
++	ender = regnode(g, (paren) ? CLOSE+parno : END);	
++	regtail(g, ret, ender);
++
++	/* Hook the tails of the branches to the closing node. */
++	for (br = ret; br != NULL; br = regnext(g, br))
++		regoptail(g, br, ender);
++
++	/* Check for proper termination. */
++	if (paren && *g->regparse++ != ')') {
++		FAIL("unmatched ()");
++	} else if (!paren && *g->regparse != '\0') {
++		if (*g->regparse == ')') {
++			FAIL("unmatched ()");
++		} else
++			FAIL("junk on end");	/* "Can't happen". */
++		/* NOTREACHED */
++	}
++
++	return(ret);
++}
++
++/*
++ - regbranch - one alternative of an | operator
++ *
++ * Implements the concatenation operator.
++ */
++static char *
++regbranch(struct match_globals *g, int *flagp)
++{
++	register char *ret;
++	register char *chain;
++	register char *latest;
++	int flags;
++
++	*flagp = WORST;		/* Tentatively. */
++
++	ret = regnode(g, BRANCH);
++	chain = NULL;
++	while (*g->regparse != '\0' && *g->regparse != '|' && *g->regparse != ')') {
++		latest = regpiece(g, &flags);
++		if (latest == NULL)
++			return(NULL);
++		*flagp |= flags&HASWIDTH;
++		if (chain == NULL)	/* First piece. */
++			*flagp |= flags&SPSTART;
++		else
++			regtail(g, chain, latest);
++		chain = latest;
++	}
++	if (chain == NULL)	/* Loop ran zero times. */
++		(void) regnode(g, NOTHING);
++
++	return(ret);
++}
++
++/*
++ - regpiece - something followed by possible [*+?]
++ *
++ * Note that the branching code sequences used for ? and the general cases
++ * of * and + are somewhat optimized:  they use the same NOTHING node as
++ * both the endmarker for their branch list and the body of the last branch.
++ * It might seem that this node could be dispensed with entirely, but the
++ * endmarker role is not redundant.
++ */
++static char *
++regpiece(struct match_globals *g, int *flagp)
++{
++	register char *ret;
++	register char op;
++	register char *next;
++	int flags;
++
++	ret = regatom(g, &flags);
++	if (ret == NULL)
++		return(NULL);
++
++	op = *g->regparse;
++	if (!ISMULT(op)) {
++		*flagp = flags;
++		return(ret);
++	}
++
++	if (!(flags&HASWIDTH) && op != '?')
++		FAIL("*+ operand could be empty");
++	*flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
++
++	if (op == '*' && (flags&SIMPLE))
++		reginsert(g, STAR, ret);
++	else if (op == '*') {
++		/* Emit x* as (x&|), where & means "self". */
++		reginsert(g, BRANCH, ret);			/* Either x */
++		regoptail(g, ret, regnode(g, BACK));		/* and loop */
++		regoptail(g, ret, ret);			/* back */
++		regtail(g, ret, regnode(g, BRANCH));		/* or */
++		regtail(g, ret, regnode(g, NOTHING));		/* null. */
++	} else if (op == '+' && (flags&SIMPLE))
++		reginsert(g, PLUS, ret);
++	else if (op == '+') {
++		/* Emit x+ as x(&|), where & means "self". */
++		next = regnode(g, BRANCH);			/* Either */
++		regtail(g, ret, next);
++		regtail(g, regnode(g, BACK), ret);		/* loop back */
++		regtail(g, next, regnode(g, BRANCH));		/* or */
++		regtail(g, ret, regnode(g, NOTHING));		/* null. */
++	} else if (op == '?') {
++		/* Emit x? as (x|) */
++		reginsert(g, BRANCH, ret);			/* Either x */
++		regtail(g, ret, regnode(g, BRANCH));		/* or */
++		next = regnode(g, NOTHING);		/* null. */
++		regtail(g, ret, next);
++		regoptail(g, ret, next);
++	}
++	g->regparse++;
++	if (ISMULT(*g->regparse))
++		FAIL("nested *?+");
++
++	return(ret);
++}
++
++/*
++ - regatom - the lowest level
++ *
++ * Optimization:  gobbles an entire sequence of ordinary characters so that
++ * it can turn them into a single node, which is smaller to store and
++ * faster to run.  Backslashed characters are exceptions, each becoming a
++ * separate node; the code is simpler that way and it's not worth fixing.
++ */
++static char *
++regatom(struct match_globals *g, int *flagp)
++{
++	register char *ret;
++	int flags;
++
++	*flagp = WORST;		/* Tentatively. */
++
++	switch (*g->regparse++) {
++	case '^':
++		ret = regnode(g, BOL);
++		break;
++	case '$':
++		ret = regnode(g, EOL);
++		break;
++	case '.':
++		ret = regnode(g, ANY);
++		*flagp |= HASWIDTH|SIMPLE;
++		break;
++	case '[': {
++			register int class;
++			register int classend;
++
++			if (*g->regparse == '^') {	/* Complement of range. */
++				ret = regnode(g, ANYBUT);
++				g->regparse++;
++			} else
++				ret = regnode(g, ANYOF);
++			if (*g->regparse == ']' || *g->regparse == '-')
++				regc(g, *g->regparse++);
++			while (*g->regparse != '\0' && *g->regparse != ']') {
++				if (*g->regparse == '-') {
++					g->regparse++;
++					if (*g->regparse == ']' || *g->regparse == '\0')
++						regc(g, '-');
++					else {
++						class = UCHARAT(g->regparse-2)+1;
++						classend = UCHARAT(g->regparse);
++						if (class > classend+1)
++							FAIL("invalid [] range");
++						for (; class <= classend; class++)
++							regc(g, class);
++						g->regparse++;
++					}
++				} else
++					regc(g, *g->regparse++);
++			}
++			regc(g, '\0');
++			if (*g->regparse != ']')
++				FAIL("unmatched []");
++			g->regparse++;
++			*flagp |= HASWIDTH|SIMPLE;
++		}
++		break;
++	case '(':
++		ret = reg(g, 1, &flags);
++		if (ret == NULL)
++			return(NULL);
++		*flagp |= flags&(HASWIDTH|SPSTART);
++		break;
++	case '\0':
++	case '|':
++	case ')':
++		FAIL("internal urp");	/* Supposed to be caught earlier. */
++		break;
++	case '?':
++	case '+':
++	case '*':
++		FAIL("?+* follows nothing");
++		break;
++	case '\\':
++		if (*g->regparse == '\0')
++			FAIL("trailing \\");
++		ret = regnode(g, EXACTLY);
++		regc(g, *g->regparse++);
++		regc(g, '\0');
++		*flagp |= HASWIDTH|SIMPLE;
++		break;
++	default: {
++			register int len;
++			register char ender;
++
++			g->regparse--;
++			len = my_strcspn((const char *)g->regparse, (const char *)META);
++			if (len <= 0)
++				FAIL("internal disaster");
++			ender = *(g->regparse+len);
++			if (len > 1 && ISMULT(ender))
++				len--;		/* Back off clear of ?+* operand. */
++			*flagp |= HASWIDTH;
++			if (len == 1)
++				*flagp |= SIMPLE;
++			ret = regnode(g, EXACTLY);
++			while (len > 0) {
++				regc(g, *g->regparse++);
++				len--;
++			}
++			regc(g, '\0');
++		}
++		break;
++	}
++
++	return(ret);
++}
++
++/*
++ - regnode - emit a node
++ */
++static char *			/* Location. */
++regnode(struct match_globals *g, char op)
++{
++	register char *ret;
++	register char *ptr;
++
++	ret = g->regcode;
++	if (ret == &g->regdummy) {
++		g->regsize += 3;
++		return(ret);
++	}
++
++	ptr = ret;
++	*ptr++ = op;
++	*ptr++ = '\0';		/* Null "next" pointer. */
++	*ptr++ = '\0';
++	g->regcode = ptr;
++
++	return(ret);
++}
++
++/*
++ - regc - emit (if appropriate) a byte of code
++ */
++static void
++regc(struct match_globals *g, char b)
++{
++	if (g->regcode != &g->regdummy)
++		*g->regcode++ = b;
++	else
++		g->regsize++;
++}
++
++/*
++ - reginsert - insert an operator in front of already-emitted operand
++ *
++ * Means relocating the operand.
++ */
++static void
++reginsert(struct match_globals *g, char op, char* opnd)
++{
++	register char *src;
++	register char *dst;
++	register char *place;
++
++	if (g->regcode == &g->regdummy) {
++		g->regsize += 3;
++		return;
++	}
++
++	src = g->regcode;
++	g->regcode += 3;
++	dst = g->regcode;
++	while (src > opnd)
++		*--dst = *--src;
++
++	place = opnd;		/* Op node, where operand used to be. */
++	*place++ = op;
++	*place++ = '\0';
++	*place++ = '\0';
++}
++
++/*
++ - regtail - set the next-pointer at the end of a node chain
++ */
++static void
++regtail(struct match_globals *g, char *p, char *val)
++{
++	register char *scan;
++	register char *temp;
++	register int offset;
++
++	if (p == &g->regdummy)
++		return;
++
++	/* Find last node. */
++	scan = p;
++	for (;;) {
++		temp = regnext(g, scan);
++		if (temp == NULL)
++			break;
++		scan = temp;
++	}
++
++	if (OP(scan) == BACK)
++		offset = scan - val;
++	else
++		offset = val - scan;
++	*(scan+1) = (offset>>8)&0377;
++	*(scan+2) = offset&0377;
++}
++
++/*
++ - regoptail - regtail on operand of first argument; nop if operandless
++ */
++static void
++regoptail(struct match_globals *g, char *p, char *val)
++{
++	/* "Operandless" and "op != BRANCH" are synonymous in practice. */
++	if (p == NULL || p == &g->regdummy || OP(p) != BRANCH)
++		return;
++	regtail(g, OPERAND(p), val);
++}
++
++/*
++ * regexec and friends
++ */
++
++
++/*
++ * Forwards.
++ */
++STATIC int regtry(struct match_globals *g, regexp *prog, char *string);
++STATIC int regmatch(struct match_globals *g, char *prog);
++STATIC int regrepeat(struct match_globals *g, char *p);
++
++#ifdef DEBUG
++int regnarrate = 0;
++void regdump();
++STATIC char *regprop(char *op);
++#endif
++
++/*
++ - regexec - match a regexp against a string
++ */
++int
++regexec(regexp *prog, char *string)
++{
++	register char *s;
++	struct match_globals g;
++
++	/* Be paranoid... */
++	if (prog == NULL || string == NULL) {
++		printk("<3>Regexp: NULL parameter\n");
++		return(0);
++	}
++
++	/* Check validity of program. */
++	if (UCHARAT(prog->program) != MAGIC) {
++		printk("<3>Regexp: corrupted program\n");
++		return(0);
++	}
++
++	/* If there is a "must appear" string, look for it. */
++	if (prog->regmust != NULL) {
++		s = string;
++		while ((s = strchr(s, prog->regmust[0])) != NULL) {
++			if (strncmp(s, prog->regmust, prog->regmlen) == 0)
++				break;	/* Found it. */
++			s++;
++		}
++		if (s == NULL)	/* Not present. */
++			return(0);
++	}
++
++	/* Mark beginning of line for ^ . */
++	g.regbol = string;
++
++	/* Simplest case:  anchored match need be tried only once. */
++	if (prog->reganch)
++		return(regtry(&g, prog, string));
++
++	/* Messy cases:  unanchored match. */
++	s = string;
++	if (prog->regstart != '\0')
++		/* We know what char it must start with. */
++		while ((s = strchr(s, prog->regstart)) != NULL) {
++			if (regtry(&g, prog, s))
++				return(1);
++			s++;
++		}
++	else
++		/* We don't -- general case. */
++		do {
++			if (regtry(&g, prog, s))
++				return(1);
++		} while (*s++ != '\0');
++
++	/* Failure. */
++	return(0);
++}
++
++/*
++ - regtry - try match at specific point
++ */
++static int			/* 0 failure, 1 success */
++regtry(struct match_globals *g, regexp *prog, char *string)
++{
++	register int i;
++	register char **sp;
++	register char **ep;
++
++	g->reginput = string;
++	g->regstartp = prog->startp;
++	g->regendp = prog->endp;
++
++	sp = prog->startp;
++	ep = prog->endp;
++	for (i = NSUBEXP; i > 0; i--) {
++		*sp++ = NULL;
++		*ep++ = NULL;
++	}
++	if (regmatch(g, prog->program + 1)) {
++		prog->startp[0] = string;
++		prog->endp[0] = g->reginput;
++		return(1);
++	} else
++		return(0);
++}
++
++/*
++ - regmatch - main matching routine
++ *
++ * Conceptually the strategy is simple:  check to see whether the current
++ * node matches, call self recursively to see whether the rest matches,
++ * and then act accordingly.  In practice we make some effort to avoid
++ * recursion, in particular by going through "ordinary" nodes (that don't
++ * need to know whether the rest of the match failed) by a loop instead of
++ * by recursion.
++ */
++static int			/* 0 failure, 1 success */
++regmatch(struct match_globals *g, char *prog)
++{
++	register char *scan = prog; /* Current node. */
++	char *next;		    /* Next node. */
++
++#ifdef DEBUG
++	if (scan != NULL && regnarrate)
++		fprintf(stderr, "%s(\n", regprop(scan));
++#endif
++	while (scan != NULL) {
++#ifdef DEBUG
++		if (regnarrate)
++			fprintf(stderr, "%s...\n", regprop(scan));
++#endif
++		next = regnext(g, scan);
++
++		switch (OP(scan)) {
++		case BOL:
++			if (g->reginput != g->regbol)
++				return(0);
++			break;
++		case EOL:
++			if (*g->reginput != '\0')
++				return(0);
++			break;
++		case ANY:
++			if (*g->reginput == '\0')
++				return(0);
++			g->reginput++;
++			break;
++		case EXACTLY: {
++				register int len;
++				register char *opnd;
++
++				opnd = OPERAND(scan);
++				/* Inline the first character, for speed. */
++				if (*opnd != *g->reginput)
++					return(0);
++				len = strlen(opnd);
++				if (len > 1 && strncmp(opnd, g->reginput, len) != 0)
++					return(0);
++				g->reginput += len;
++			}
++			break;
++		case ANYOF:
++			if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) == NULL)
++				return(0);
++			g->reginput++;
++			break;
++		case ANYBUT:
++			if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) != NULL)
++				return(0);
++			g->reginput++;
++			break;
++		case NOTHING:
++		case BACK:
++			break;
++		case OPEN+1:
++		case OPEN+2:
++		case OPEN+3:
++		case OPEN+4:
++		case OPEN+5:
++		case OPEN+6:
++		case OPEN+7:
++		case OPEN+8:
++		case OPEN+9: {
++				register int no;
++				register char *save;
++
++				no = OP(scan) - OPEN;
++				save = g->reginput;
++
++				if (regmatch(g, next)) {
++					/*
++					 * Don't set startp if some later
++					 * invocation of the same parentheses
++					 * already has.
++					 */
++					if (g->regstartp[no] == NULL)
++						g->regstartp[no] = save;
++					return(1);
++				} else
++					return(0);
++			}
++			break;
++		case CLOSE+1:
++		case CLOSE+2:
++		case CLOSE+3:
++		case CLOSE+4:
++		case CLOSE+5:
++		case CLOSE+6:
++		case CLOSE+7:
++		case CLOSE+8:
++		case CLOSE+9:
++			{
++				register int no;
++				register char *save;
++
++				no = OP(scan) - CLOSE;
++				save = g->reginput;
++
++				if (regmatch(g, next)) {
++					/*
++					 * Don't set endp if some later
++					 * invocation of the same parentheses
++					 * already has.
++					 */
++					if (g->regendp[no] == NULL)
++						g->regendp[no] = save;
++					return(1);
++				} else
++					return(0);
++			}
++			break;
++		case BRANCH: {
++				register char *save;
++
++				if (OP(next) != BRANCH)		/* No choice. */
++					next = OPERAND(scan);	/* Avoid recursion. */
++				else {
++					do {
++						save = g->reginput;
++						if (regmatch(g, OPERAND(scan)))
++							return(1);
++						g->reginput = save;
++						scan = regnext(g, scan);
++					} while (scan != NULL && OP(scan) == BRANCH);
++					return(0);
++					/* NOTREACHED */
++				}
++			}
++			break;
++		case STAR:
++		case PLUS: {
++				register char nextch;
++				register int no;
++				register char *save;
++				register int min;
++
++				/*
++				 * Lookahead to avoid useless match attempts
++				 * when we know what character comes next.
++				 */
++				nextch = '\0';
++				if (OP(next) == EXACTLY)
++					nextch = *OPERAND(next);
++				min = (OP(scan) == STAR) ? 0 : 1;
++				save = g->reginput;
++				no = regrepeat(g, OPERAND(scan));
++				while (no >= min) {
++					/* If it could work, try it. */
++					if (nextch == '\0' || *g->reginput == nextch)
++						if (regmatch(g, next))
++							return(1);
++					/* Couldn't or didn't -- back up. */
++					no--;
++					g->reginput = save + no;
++				}
++				return(0);
++			}
++			break;
++		case END:
++			return(1);	/* Success! */
++			break;
++		default:
++			printk("<3>Regexp: memory corruption\n");
++			return(0);
++			break;
++		}
++
++		scan = next;
++	}
++
++	/*
++	 * We get here only if there's trouble -- normally "case END" is
++	 * the terminating point.
++	 */
++	printk("<3>Regexp: corrupted pointers\n");
++	return(0);
++}
++
++/*
++ - regrepeat - repeatedly match something simple, report how many
++ */
++static int
++regrepeat(struct match_globals *g, char *p)
++{
++	register int count = 0;
++	register char *scan;
++	register char *opnd;
++
++	scan = g->reginput;
++	opnd = OPERAND(p);
++	switch (OP(p)) {
++	case ANY:
++		count = strlen(scan);
++		scan += count;
++		break;
++	case EXACTLY:
++		while (*opnd == *scan) {
++			count++;
++			scan++;
++		}
++		break;
++	case ANYOF:
++		while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
++			count++;
++			scan++;
++		}
++		break;
++	case ANYBUT:
++		while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
++			count++;
++			scan++;
++		}
++		break;
++	default:		/* Oh dear.  Called inappropriately. */
++		printk("<3>Regexp: internal foulup\n");
++		count = 0;	/* Best compromise. */
++		break;
++	}
++	g->reginput = scan;
++
++	return(count);
++}
++
++/*
++ - regnext - dig the "next" pointer out of a node
++ */
++static char*
++regnext(struct match_globals *g, char *p)
++{
++	register int offset;
++
++	if (p == &g->regdummy)
++		return(NULL);
++
++	offset = NEXT(p);
++	if (offset == 0)
++		return(NULL);
++
++	if (OP(p) == BACK)
++		return(p-offset);
++	else
++		return(p+offset);
++}
++
++#ifdef DEBUG
++
++STATIC char *regprop();
++
++/*
++ - regdump - dump a regexp onto stdout in vaguely comprehensible form
++ */
++void
++regdump(regexp *r)
++{
++	register char *s;
++	register char op = EXACTLY;	/* Arbitrary non-END op. */
++	register char *next;
++	/* extern char *strchr(); */
++
++
++	s = r->program + 1;
++	while (op != END) {	/* While that wasn't END last time... */
++		op = OP(s);
++		printf("%2d%s", s-r->program, regprop(s));	/* Where, what. */
++		next = regnext(s);
++		if (next == NULL)		/* Next ptr. */
++			printf("(0)");
++		else
++			printf("(%d)", (s-r->program)+(next-s));
++		s += 3;
++		if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
++			/* Literal string, where present. */
++			while (*s != '\0') {
++				putchar(*s);
++				s++;
++			}
++			s++;
++		}
++		putchar('\n');
++	}
++
++	/* Header fields of interest. */
++	if (r->regstart != '\0')
++		printf("start `%c' ", r->regstart);
++	if (r->reganch)
++		printf("anchored ");
++	if (r->regmust != NULL)
++		printf("must have \"%s\"", r->regmust);
++	printf("\n");
++}
++
++/*
++ - regprop - printable representation of opcode
++ */
++static char *
++regprop(char *op)
++{
++#define BUFLEN 50
++	register char *p;
++	static char buf[BUFLEN];
++
++	strcpy(buf, ":");
++
++	switch (OP(op)) {
++	case BOL:
++		p = "BOL";
++		break;
++	case EOL:
++		p = "EOL";
++		break;
++	case ANY:
++		p = "ANY";
++		break;
++	case ANYOF:
++		p = "ANYOF";
++		break;
++	case ANYBUT:
++		p = "ANYBUT";
++		break;
++	case BRANCH:
++		p = "BRANCH";
++		break;
++	case EXACTLY:
++		p = "EXACTLY";
++		break;
++	case NOTHING:
++		p = "NOTHING";
++		break;
++	case BACK:
++		p = "BACK";
++		break;
++	case END:
++		p = "END";
++		break;
++	case OPEN+1:
++	case OPEN+2:
++	case OPEN+3:
++	case OPEN+4:
++	case OPEN+5:
++	case OPEN+6:
++	case OPEN+7:
++	case OPEN+8:
++	case OPEN+9:
++		snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "OPEN%d", OP(op)-OPEN);
++		p = NULL;
++		break;
++	case CLOSE+1:
++	case CLOSE+2:
++	case CLOSE+3:
++	case CLOSE+4:
++	case CLOSE+5:
++	case CLOSE+6:
++	case CLOSE+7:
++	case CLOSE+8:
++	case CLOSE+9:
++		snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "CLOSE%d", OP(op)-CLOSE);
++		p = NULL;
++		break;
++	case STAR:
++		p = "STAR";
++		break;
++	case PLUS:
++		p = "PLUS";
++		break;
++	default:
++		printk("<3>Regexp: corrupted opcode\n");
++		break;
++	}
++	if (p != NULL)
++		strncat(buf, p, BUFLEN-strlen(buf));
++	return(buf);
++}
++#endif
++
++
+--- linux-2.6.28-stock/net/netfilter/regexp/regexp.h	1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.28/net/netfilter/regexp/regexp.h	2009-01-07 16:07:31.000000000 -0600
+@@ -0,0 +1,41 @@
++/*
++ * Definitions etc. for regexp(3) routines.
++ *
++ * Caveat:  this is V8 regexp(3) [actually, a reimplementation thereof],
++ * not the System V one.
++ */
++
++#ifndef REGEXP_H
++#define REGEXP_H
++
++
++/*
++http://www.opensource.apple.com/darwinsource/10.3/expect-1/expect/expect.h ,
++which contains a version of this library, says:
++
++ *
++ * NSUBEXP must be at least 10, and no greater than 117 or the parser
++ * will not work properly.
++ *
++
++However, it looks rather like this library is limited to 10.  If you think
++otherwise, let us know.
++*/
++
++#define NSUBEXP  10
++typedef struct regexp {
++	char *startp[NSUBEXP];
++	char *endp[NSUBEXP];
++	char regstart;		/* Internal use only. */
++	char reganch;		/* Internal use only. */
++	char *regmust;		/* Internal use only. */
++	int regmlen;		/* Internal use only. */
++	char program[1];	/* Unwarranted chumminess with compiler. */
++} regexp;
++
++regexp * regcomp(char *exp, int *patternsize);
++int regexec(regexp *prog, char *string);
++void regsub(regexp *prog, char *source, char *dest);
++void regerror(char *s);
++
++#endif
+--- linux-2.6.28-stock/net/netfilter/regexp/regmagic.h	1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.28/net/netfilter/regexp/regmagic.h	2009-01-07 16:07:31.000000000 -0600
+@@ -0,0 +1,5 @@
++/*
++ * The first byte of the regexp internal "program" is actually this magic
++ * number; the start node begins in the second byte.
++ */
++#define	MAGIC	0234
+--- linux-2.6.28-stock/net/netfilter/regexp/regsub.c	1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.28/net/netfilter/regexp/regsub.c	2009-01-07 16:07:31.000000000 -0600
+@@ -0,0 +1,95 @@
++/*
++ * regsub
++ * @(#)regsub.c	1.3 of 2 April 86
++ *
++ *	Copyright (c) 1986 by University of Toronto.
++ *	Written by Henry Spencer.  Not derived from licensed software.
++ *
++ *	Permission is granted to anyone to use this software for any
++ *	purpose on any computer system, and to redistribute it freely,
++ *	subject to the following restrictions:
++ *
++ *	1. The author is not responsible for the consequences of use of
++ *		this software, no matter how awful, even if they arise
++ *		from defects in it.
++ *
++ *	2. The origin of this software must not be misrepresented, either
++ *		by explicit claim or by omission.
++ *
++ *	3. Altered versions must be plainly marked as such, and must not
++ *		be misrepresented as being the original software.
++ *
++ *
++ * This code was modified by Ethan Sommer to work within the kernel
++ * (it now uses kmalloc etc..)
++ *
++ */
++#include "regexp.h"
++#include "regmagic.h"
++#include <linux/string.h>
++
++
++#ifndef CHARBITS
++#define	UCHARAT(p)	((int)*(unsigned char *)(p))
++#else
++#define	UCHARAT(p)	((int)*(p)&CHARBITS)
++#endif
++
++#if 0
++//void regerror(char * s)
++//{
++//        printk("regexp(3): %s", s);
++//        /* NOTREACHED */
++//}
++#endif
++
++/*
++ - regsub - perform substitutions after a regexp match
++ */
++void
++regsub(regexp * prog, char * source, char * dest)
++{
++	register char *src;
++	register char *dst;
++	register char c;
++	register int no;
++	register int len;
++	
++	/* Not necessary and gcc doesn't like it -MLS */
++	/*extern char *strncpy();*/
++
++	if (prog == NULL || source == NULL || dest == NULL) {
++		regerror("NULL parm to regsub");
++		return;
++	}
++	if (UCHARAT(prog->program) != MAGIC) {
++		regerror("damaged regexp fed to regsub");
++		return;
++	}
++
++	src = source;
++	dst = dest;
++	while ((c = *src++) != '\0') {
++		if (c == '&')
++			no = 0;
++		else if (c == '\\' && '0' <= *src && *src <= '9')
++			no = *src++ - '0';
++		else
++			no = -1;
++
++		if (no < 0) {	/* Ordinary character. */
++			if (c == '\\' && (*src == '\\' || *src == '&'))
++				c = *src++;
++			*dst++ = c;
++		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
++			len = prog->endp[no] - prog->startp[no];
++			(void) strncpy(dst, prog->startp[no], len);
++			dst += len;
++			if (len != 0 && *(dst-1) == '\0') {	/* strncpy hit NUL. */
++				regerror("damaged match string");
++				return;
++			}
++		}
++	}
++	*dst++ = '\0';
++}
+--- linux-2.6.28-stock/net/netfilter/nf_conntrack_core.c	2009-01-07 16:05:35.000000000 -0600
++++ linux-2.6.28/net/netfilter/nf_conntrack_core.c	2009-01-07 16:07:31.000000000 -0600
+@@ -201,6 +201,14 @@ destroy_conntrack(struct nf_conntrack *n
+ 	 * too. */
+ 	nf_ct_remove_expectations(ct);
+ 
++	#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
++	if(ct->layer7.app_proto)
++		kfree(ct->layer7.app_proto);
++	if(ct->layer7.app_data)
++	kfree(ct->layer7.app_data);
++	#endif
++
++
+ 	/* We overload first tuple to link into unconfirmed list. */
+ 	if (!nf_ct_is_confirmed(ct)) {
+ 		BUG_ON(hlist_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode));
+--- linux-2.6.28-stock/net/netfilter/nf_conntrack_standalone.c	2009-01-07 16:05:35.000000000 -0600
++++ linux-2.6.28/net/netfilter/nf_conntrack_standalone.c	2009-01-07 16:07:31.000000000 -0600
+@@ -165,6 +165,12 @@ static int ct_seq_show(struct seq_file *
+ 		return -ENOSPC;
+ #endif
+ 
++#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
++	if(ct->layer7.app_proto &&
++           seq_printf(s, "l7proto=%s ", ct->layer7.app_proto))
++		return -ENOSPC;
++#endif
++
+ 	if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
+ 		return -ENOSPC;
+ 
+--- linux-2.6.28-stock/include/net/netfilter/nf_conntrack.h	2009-01-07 16:05:30.000000000 -0600
++++ linux-2.6.28/include/net/netfilter/nf_conntrack.h	2009-01-07 16:07:31.000000000 -0600
+@@ -134,6 +134,22 @@
+ 	struct net *ct_net;
+ #endif
+ 
++#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || \
++    defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
++	struct {
++		/*
++		 * e.g. "http". NULL before decision. "unknown" after decision
++		 * if no match.
++		 */
++		char *app_proto;
++		/*
++		 * application layer data so far. NULL after match decision.
++		 */
++		char *app_data;
++		unsigned int app_data_len;
++	} layer7;
++#endif
++
+ 	/* Storage reserved for other modules, must be the last member */
+ 	union nf_conntrack_proto proto;
+ };
+--- linux-2.6.28-stock/include/linux/netfilter/xt_layer7.h	1969-12-31 18:00:00.000000000 -0600
++++ linux-2.6.28/include/linux/netfilter/xt_layer7.h	2009-01-07 16:07:31.000000000 -0600
+@@ -0,0 +1,13 @@
++#ifndef _XT_LAYER7_H
++#define _XT_LAYER7_H
++
++#define MAX_PATTERN_LEN 8192
++#define MAX_PROTOCOL_LEN 256
++
++struct xt_layer7_info {
++    char protocol[MAX_PROTOCOL_LEN];
++    char pattern[MAX_PATTERN_LEN];
++    u_int8_t invert;
++};
++
++#endif /* _XT_LAYER7_H */
diff --git a/3.2.34/kirkwood-jumbo-frame.patch b/3.2.34/kirkwood-jumbo-frame.patch
new file mode 100644
index 0000000..fdbc5b1
--- /dev/null
+++ b/3.2.34/kirkwood-jumbo-frame.patch
@@ -0,0 +1,135 @@
+kirkwood and dove have a smaller FIFO than other "orion" SoCs. This
+needs to be taken into account otherwise people using things like jumbo frames
+will get into some troubles.
+
+As a side note, this patch is an updated version of a patch sent some years
+ago: http://lists.infradead.org/pipermail/linux-arm-kernel/2010-June/017320.html
+which seems to have been lost.
+
+Signed-off-by: Arnaud Patard <arnaud.patard@xxxxxxxxxxx>
+
+Index: alunn/arch/arm/mach-dove/common.c
+===================================================================
+--- alunn.orig/arch/arm/mach-dove/common.c	2012-07-20 09:14:45.000000000 +0200
++++ alunn/arch/arm/mach-dove/common.c	2012-07-20 17:51:38.872925518 +0200
+@@ -102,7 +102,7 @@ void __init dove_ehci1_init(void)
+ void __init dove_ge00_init(struct mv643xx_eth_platform_data *eth_data)
+ {
+ 	orion_ge00_init(eth_data, DOVE_GE00_PHYS_BASE,
+-			IRQ_DOVE_GE00_SUM, IRQ_DOVE_GE00_ERR);
++			IRQ_DOVE_GE00_SUM, IRQ_DOVE_GE00_ERR, 0);
+ }
+ 
+ /*****************************************************************************
+Index: alunn/arch/arm/mach-kirkwood/common.c
+===================================================================
+--- alunn.orig/arch/arm/mach-kirkwood/common.c	2012-07-20 09:14:46.000000000 +0200
++++ alunn/arch/arm/mach-kirkwood/common.c	2012-07-20 17:51:03.104927094 +0200
+@@ -301,7 +301,7 @@ void __init kirkwood_ge00_init(struct mv
+ {
+ 	orion_ge00_init(eth_data,
+ 			GE00_PHYS_BASE, IRQ_KIRKWOOD_GE00_SUM,
+-			IRQ_KIRKWOOD_GE00_ERR);
++			IRQ_KIRKWOOD_GE00_ERR, 1600);
+ 	/* The interface forgets the MAC address assigned by u-boot if
+ 	the clock is turned off, so claim the clk now. */
+ 	clk_prepare_enable(ge0);
+@@ -315,7 +315,7 @@ void __init kirkwood_ge01_init(struct mv
+ {
+ 	orion_ge01_init(eth_data,
+ 			GE01_PHYS_BASE, IRQ_KIRKWOOD_GE01_SUM,
+-			IRQ_KIRKWOOD_GE01_ERR);
++			IRQ_KIRKWOOD_GE01_ERR, 1600);
+ 	clk_prepare_enable(ge1);
+ }
+ 
+Index: alunn/arch/arm/mach-mv78xx0/common.c
+===================================================================
+--- alunn.orig/arch/arm/mach-mv78xx0/common.c	2012-07-20 09:14:46.000000000 +0200
++++ alunn/arch/arm/mach-mv78xx0/common.c	2012-07-20 17:50:26.712928695 +0200
+@@ -213,7 +213,7 @@ void __init mv78xx0_ge00_init(struct mv6
+ {
+ 	orion_ge00_init(eth_data,
+ 			GE00_PHYS_BASE, IRQ_MV78XX0_GE00_SUM,
+-			IRQ_MV78XX0_GE_ERR);
++			IRQ_MV78XX0_GE_ERR, 0);
+ }
+ 
+ 
+@@ -224,7 +224,7 @@ void __init mv78xx0_ge01_init(struct mv6
+ {
+ 	orion_ge01_init(eth_data,
+ 			GE01_PHYS_BASE, IRQ_MV78XX0_GE01_SUM,
+-			NO_IRQ);
++			NO_IRQ, 0);
+ }
+ 
+ 
+Index: alunn/arch/arm/mach-orion5x/common.c
+===================================================================
+--- alunn.orig/arch/arm/mach-orion5x/common.c	2012-07-20 09:14:46.000000000 +0200
++++ alunn/arch/arm/mach-orion5x/common.c	2012-07-20 17:50:26.744928692 +0200
+@@ -109,7 +109,7 @@ void __init orion5x_eth_init(struct mv64
+ {
+ 	orion_ge00_init(eth_data,
+ 			ORION5X_ETH_PHYS_BASE, IRQ_ORION5X_ETH_SUM,
+-			IRQ_ORION5X_ETH_ERR);
++			IRQ_ORION5X_ETH_ERR, 0);
+ }
+ 
+ 
+Index: alunn/arch/arm/plat-orion/common.c
+===================================================================
+--- alunn.orig/arch/arm/plat-orion/common.c	2012-07-20 09:14:46.000000000 +0200
++++ alunn/arch/arm/plat-orion/common.c	2012-07-20 17:50:26.756928690 +0200
+@@ -291,10 +291,12 @@ static struct platform_device orion_ge00
+ void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    unsigned long mapbase,
+ 			    unsigned long irq,
+-			    unsigned long irq_err)
++			    unsigned long irq_err,
++			    unsigned int tx_csum_limit)
+ {
+ 	fill_resources(&orion_ge00_shared, orion_ge00_shared_resources,
+ 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
++	orion_ge00_shared_data.tx_csum_limit = tx_csum_limit;
+ 	ge_complete(&orion_ge00_shared_data,
+ 		    orion_ge00_resources, irq, &orion_ge00_shared,
+ 		    eth_data, &orion_ge00);
+@@ -343,10 +345,12 @@ static struct platform_device orion_ge01
+ void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    unsigned long mapbase,
+ 			    unsigned long irq,
+-			    unsigned long irq_err)
++			    unsigned long irq_err,
++			    unsigned int tx_csum_limit)
+ {
+ 	fill_resources(&orion_ge01_shared, orion_ge01_shared_resources,
+ 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
++	orion_ge01_shared_data.tx_csum_limit = tx_csum_limit;
+ 	ge_complete(&orion_ge01_shared_data,
+ 		    orion_ge01_resources, irq, &orion_ge01_shared,
+ 		    eth_data, &orion_ge01);
+Index: alunn/arch/arm/plat-orion/include/plat/common.h
+===================================================================
+--- alunn.orig/arch/arm/plat-orion/include/plat/common.h	2012-07-20 09:14:46.000000000 +0200
++++ alunn/arch/arm/plat-orion/include/plat/common.h	2012-07-20 17:50:26.772928691 +0200
+@@ -39,12 +39,14 @@ void __init orion_rtc_init(unsigned long
+ void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    unsigned long mapbase,
+ 			    unsigned long irq,
+-			    unsigned long irq_err);
++			    unsigned long irq_err,
++			    unsigned int tx_csum_limit);
+ 
+ void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    unsigned long mapbase,
+ 			    unsigned long irq,
+-			    unsigned long irq_err);
++			    unsigned long irq_err,
++			    unsigned int tx_csum_limit);
+ 
+ void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    unsigned long mapbase,
+
+
diff --git a/3.2.34/linux-2.6-defaults-fat-utf8.patch b/3.2.34/linux-2.6-defaults-fat-utf8.patch
new file mode 100644
index 0000000..0d40fd3
--- /dev/null
+++ b/3.2.34/linux-2.6-defaults-fat-utf8.patch
@@ -0,0 +1,15 @@
+
+https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=181963
+
+--- linux-2.6.15.noarch/fs/fat/inode.c~	2006-02-20 23:20:12.000000000 -0500
++++ linux-2.6.15.noarch/fs/fat/inode.c	2006-02-20 23:21:42.000000000 -0500
+@@ -952,7 +952,8 @@ static int parse_options(char *options, 
+ 		opts->shortname = 0;
+ 	opts->name_check = 'n';
+ 	opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK =  0;
+-	opts->utf8 = opts->unicode_xlate = 0;
++	opts->utf8 = 1;
++	opts->unicode_xlate = 0;
+ 	opts->numtail = 1;
+ 	opts->nocase = 0;
+ 	*debug = 0;
diff --git a/3.2.34/linux-2.6-x86-tune-generic.patch b/3.2.34/linux-2.6-x86-tune-generic.patch
new file mode 100644
index 0000000..7a7c76e
--- /dev/null
+++ b/3.2.34/linux-2.6-x86-tune-generic.patch
@@ -0,0 +1,13 @@
+* Optimise for today's CPUs.
+
+--- linux-2.6/arch/x86/Makefile_32.cpu	2006-01-09 11:39:04.000000000 -0500
++++ linux-2.6/arch/x86/Makefile_32.cpu	2006-01-09 11:39:36.000000000 -0500
+@@ -15,7 +15,7 @@ cflags-$(CONFIG_M486)		+= -march=i486
+ cflags-$(CONFIG_M586)		+= -march=i586
+ cflags-$(CONFIG_M586TSC)	+= -march=i586
+ cflags-$(CONFIG_M586MMX)	+= -march=pentium-mmx
+-cflags-$(CONFIG_M686)		+= -march=i686
++cflags-$(CONFIG_M686)		+= -march=i686 $(call tune,generic)
+ cflags-$(CONFIG_MPENTIUMII)	+= -march=i686 $(call tune,pentium2)
+ cflags-$(CONFIG_MPENTIUMIII)	+= -march=i686 $(call tune,pentium3)
+ cflags-$(CONFIG_MPENTIUMM)	+= -march=i686 $(call tune,pentium3)
diff --git a/3.2.34/linux-3.2-e2c-0.4.58.patch b/3.2.34/linux-3.2-e2c-0.4.58.patch
new file mode 100644
index 0000000..9f8b37f
--- /dev/null
+++ b/3.2.34/linux-3.2-e2c-0.4.58.patch
@@ -0,0 +1,7807 @@
+--- linux-3.2-rc5/fs/ext2/ChangeLog.e2compr-26port	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/ext2/ChangeLog.e2compr-26port	2011-12-13 14:22:47.822975235 +0100
+@@ -0,0 +1,439 @@
++
++e2compr - Released under the GPL V 2 license.
++
++
++Installation:
++=============
++
++1. gunzip:
++	> gunzip  linux-3.1-rc3-e2c-0.4.58.patch.gz
++
++2. change to you kernel directory
++
++3. make clean:
++	> make clean
++
++3. patch:
++	> patch -p1 <  ../patch/to/patch/linux-3.1-rc3-e2c-0.4.58.patch
++
++   see if any rejects occured:
++	> find | grep .rej
++	
++   WARNING: All rejects must be fixed manually!
++
++4. config:
++	> make oldconfig
++	> make menuconfig
++   Now enable at least the ext2-compression feature:
++   Filesystems:   
++	     <*> Second extended fs support                                     
++         [ ]   Ext2 extended attributes                                     
++         [ ]   Ext2 execute in place support                                
++         [*]   Ext2 file compression (DANGEROUS)                          
++                 Ext2 file compression options  ---> 
++
++5. make:
++	> make 
++
++
++Building a patch:
++=================
++
++files.txt:
++
++fs/ext2/ChangeLog.e2compr-26port
++Documentation/filesystems/e2compress.txt
++fs/ext2/Readme.e2compr
++fs/Kconfig
++include/linux/ext2_fs_c.h
++fs/ext2/Makefile
++fs/ext2/compress.c
++fs/ext2/e2zlib.c
++fs/ext2/adler32.c
++fs/ext2/super.c
++fs/ext2/ialloc.c
++fs/ext2/balloc.c
++fs/ext2/inode.c
++fs/ext2/file.c
++fs/ext2/ioctl.c
++fs/ext2/ext2.h
++include/linux/ext2_fs.h
++fs/fcntl.c
++mm/truncate.c
++mm/swapfile.c
++mm/filemap.c
++mm/page_alloc.c
++
++
++cat  files.txt | xargs -n1 -I '{}' diff -pruNbB linux-3.1-rc3/'{}' linux-3.1-rc3-e2c/'{}' > ./linux-3.1-e2c-0.4.58.patch
++
++
++Changelog:
++==========
++
++25 August 2011
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* released version 0.4.58 for kernel 3.1
++	* file.c: i_alloc_sem was removed. I am not sure if only holding i_mutex 
++		will be enough. See http://patchwork.ozlabs.org/patch/101859/.
++  		In ext2_file_write() I replaced:
++
++			mutex_lock(&inode->i_mutex);
++		- 	down_read(&inode->i_alloc_sem);
++		+ 	atomic_inc(&inode->i_dio_count);
++
++		- 	up_read(&inode->i_alloc_sem);
++		+	inode_dio_done(inode);
++			mutex_unlock(&inode->i_mutex);
++
++		The main prupose of i_dio_count is blocking vmtruncate_range()
++		as long as the i_dio_count is greater than 0. In other words, 
++		all direct io must be completed before truncating is allowed.
++		
++	* file.c: generic_osync_inode was removed from mm - added functionality to 
++		file.c as ex_generic_osync_inode()
++	* file.c: changed: &inode_lock to &inode->i_lock
++	* ext2_warning() replaced by ext2_msg()
++	* compress.c: vfs_dq_init(inode) replaced by dquot_initialize(inode)
++	* compress.c: ext2_truncate(inode) replaced by 
++		ext2_truncate_blocks(inode, inode->i_size) which looks like
++		exactly the same!
++	* inode.c: dentry->d_lock now seems to need 
++		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED) held.
++	* compress.c, inode.c: added might_schedule() before wait_on_buffer()
++		statements to assure we are not atomic at this point.
++	* truncate.c: removed patch from memory.c and moved it to truncate.c
++		as surrounding kernel code also moved there. vmtruncate() was
++		split in truncate_setsize() and truncate_pagecache() with kernel 3.1
++
++
++10 August 2009
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* released version 0.4.58
++	* merged assert.h and debug.h into ext2_fs_c.h
++	* merged NDEBUG into EXT2_COMPR_DEBUG
++	* disabled adler cheksums on "read" if not defined EXT2_COMPR_DEBUG.
++	* merged none.c into compress.c
++	* inserted multiple defines "CONFIG_EXT2_COMPRESS" to allow disabling 
++	  of ext2compression with patched sources.
++	* re-inserted EXPORT_SYMBOL(__pagevec_free) to support ext2 as module
++		
++05 August 2009
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* released version 0.4.57
++	* ported to kernel 2.6.30:
++		inode.c: after fix of generic ext2 ext2_get_blocks() needed to remove bforget.
++	* integrated SMP from version 0.4.56
++	* per CPU one separate read and one separate write working area
++	* removed all external compression codecs
++	* removed "verify compression" (never helped to find a bug anyway)	
++	* Lindent'ed all source and header files
++
++01 August 2008
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* released version 0.4.55
++	* complete code cleanup
++	* changed policy to ALWAYS_LOCKING pages in do_generic_mapping_read()
++	  => completely removed PG_Compr-Flag now!	
++
++31 July 2008
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* released version 0.4.54
++	* fixes rare himem bug: only occures if page > cluster in inode.c/readpage()
++	* fixes rare readpage bug in mm/filemap.c/do_generic_mapping_read():
++		PG_Compr flags dissallow reading a page while de/compressing.
++		Setting and unsetting it requires the page lock, with one exception
++		do_generic_mapping_read() in filemap.c. This is done because of performance 
++		reasons. Anyway, a simultaneous call of do_generic_mapping_read() for the SAME
++		page might break the PG_Compr-Mimic.
++
++		Solutions: Always lock any page before reading OR second(n-th) call of
++			   do_generic_mapping_read() busy waits until first is done.
++			   Default is busy wait now, ALWAYS_LOCK implemented as option via define.
++
++25 June 2008
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* released version 0.4.53
++	* fixes himem bug: unmapped block in ext2_decompress_cluster()
++	* fixes bdev bug: ext2_get_block() must be called for every block
++	  which cause ooops because of bdev == NULL. ext2_get_block() will
++	  set the correct bdev and the correct blocknumber of the block.
++	  
++	  NEVER assign bdev manually, because the blocknumber might be random then:
++	  	"block->b_bdev = something" (DON'T!)
++		
++	  ALWAYS use:
++	  	if (!buffer_mapped(block)) || (block->b_bdev == NULL)
++			ext2_get_block()
++	  
++	  Bdev bug is closely related to file holes (empty block in a file).
++	  If compressed data will be written to a former hole, then
++	  usually ext2_get_block() must be called with create.
++	  	ext2_get_block( , , , 1 /*create*/).
++
++	* fixed missing include in xattr.h
++	* EXT2_COMPRBLK might be removed during compression if a cluster 
++	  doesn't compress. During compression we re-raise EXT2_COMPRBLK
++	  flag after every cluster now.
++	* added missing export of __pagevec_free to (mm/page_alloc.c)
++	* deny O_DIRECT access mode after open of a file using fcntl()
++	  (in fs/fcntl.c).
++	* file.c: 
++	  Replaced ext2_filew_write() to use kernels generic
++          do_sync_write(). Writing on compressed files calls 
++          ext2_filew_write():
++		- divide write range into clusters
++		- ext2_decompress_cluster (if needed)
++		- do_sync_write()
++ 		- ext2_compress_cluster (if needed)
++	* inode.c:
++	  ext2_writepage()/ext2_writepages() usually writes back
++          dirty pages of an inode. They reside in the kernels page cache.
++	  This pages might e.g. be written/dirtied by a mmap()-ped file.
++	  Also generic_file_aio_write() uses ext2_writepage() finally.
++	  I don't see how the ext2_writepage() would handle compressed 
++          files, so I re-inserted and re-wrote this part of old 2.4 code.
++ 	  Don't know if this code (USE_WRITEPAGE) is needed at all.
++          So I leave it disabled by default. Enabled it might
++          leave compressed files with compression ratio of 100%. 
++	  Don't use yet!	  
++
++17 April 2008
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* first patch for kernel 2.6.25 released
++
++20 March 2008
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* version 0.4.52: EXT2_COMPRESS_WHEN_CLU didn't work. this
++	  feature enables compression during file write.
++
++15 Oct 2007
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* First offical Sourceforge release as version 0.4.51
++	* TODO: figure out what is necessary to enable swap 
++	  suppport for e2compr again (see mm/swapfile.c).	  
++
++27 Sep 2007
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* System stalled with a lot of I/O during de-compression of
++	  USB-Sticks, too. I replaced mark_buffer_dirty 
++	  with set_buffer_dirty. This achieves that ONLY the buffers 
++	  and not the pages are marked. Then I write back the 
++	  buffers with ll_rw_block() at the end of 
++	  ext2_decompress_cluster() and ext2_decompress_pages(). 
++	  This should stop flooding the system with dirty pages. 
++	  Because now every routine waits for its newly dirtied buffers.	  
++	  My system with 128MB of RAM is responding much more better during
++	  compression/decompression now. Desompression also seems
++	  to be a bit faster. 
++	  (this change is active with: #ifndef E2C_GENERIC_OSYNC)  
++
++25 Sep 2007
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* System stalled with a lot of I/O during compression of
++	  USB-Sticks. Seems generic_osync_inode() should not be 
++	  called in ext2_compress_cluster. Therefore I replaced
++	  it with ll_rw_block() to write the modified blocks
++	  directly back to disk. This gave also a ~100% better
++	  performance for compression.
++	
++9 Sep 2007
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* fixed bdev-bug. this bug appeared primarily when
++	  files contained holes. A page with holes, which 
++	  was dirty caused ext2_get_cluster_blocks [ext2_get_block()]
++	  to create ALL blocks of the page, even if there were holes!
++	  These allocated hole-blocks weren't set to 0 anywhere and
++	  therefore contained invalid data. I changed the
++	  code to never allocate these holes.
++	  
++	* ext2_truncate() added again to ext2_compress_cluster for
++	  uncompressed clusters. Fixes filesize errors reported by 
++	  "e2fsck -f /dev/..."	  
++
++24 Aug 2007
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	
++	Major changes:	
++	* completly ported inode->i_mutex
++	
++	* clever CONFIG_GZ_HACK to reject "uncompressable" files 
++	  (according to their extension) early. The IOCTL in ioctl.c 
++	  which sets the compression on the file already rejects such 
++	  extensions now.
++	 
++	* new create_empty_buffers_e2c() was necessary, because the
++	  "extra"-pages should NOT have a valid i_mapping! Further the
++	  buffers needed to be initalized right.
++	
++	* proper block initalization (bdev-bug) in:
++	   - create_empty_buffers_e2c()
++	   - ext2_get_cluster_blocks
++	   
++	* in file.c copied:  
++	  ...with one single change at ext2_mapping_read in label page_ok:
++	  A new Page-Flag (page-flags.h) the so called "PG_compr"-Flag is 
++	  checked to assure the corresponding page is not under 
++	  compression/decompression. This was necessary because
++	  generic_mapping_read() doesn't lock() the page in ALL cases!!!
++	  Otherwise the generic_mapping_read() would have to lock EVERY page
++	  in the whole system before returning it....
++	  
++	* Fixed HiMem-Support: Balanced ALL kamp/kunmap calls. Unbalanced
++	  functions cause the system to hang at "kmap_himem()" after some 
++	  time. Can be seen with magic-sysctrl "altgr + prtscr + W".
++	
++	* ext2_decompres_cluster() didn't mark uptodate pages for writeback.
++	  Don't know how this method could EVER have worked...
++	  
++	* ext2_compress_cluster() caused an always increasing amount of dirty-pages
++	  (cat /proc/vmstat) which couldn't be wrote back by sync/umount.
++	  I think this was due the ClearPageDirty at the end of ext2_compress_cluster().
++	  
++	* introduced ext2_get_dcount() to savely determine if a file is really "open"
++	  and to abort compression/decompression in such a case. 
++	  
++	* Removed gzip completely and not working assembler code. Replaced by the
++	  kernels built-in zlib, which is pretty the same code... 
++	
++	* New kernel configuration interface
++	 
++	* Rollback of some unecessary "fixes"...
++	
++	TODO:
++	
++	* HiMem-Support:
++	  One might try to use kmap_atomic instead of kamp in ext2_readpage. kmap_atomic
++	  doesn't block and might speed up the regular page reading. might.		
++	
++20 April 2007
++	Andreas:
++		
++	* Replaced GZIP with zlib of the kernel because the assembly versions of existing
++	  compression modules crashed.
++	  
++	* Replaced gzip with the kernel zlib, which is built-in anyway
++	
++	* Initial HiMem-Support. 
++
++
++06 Mar 2007
++
++	Terry Loveall <loveall@iinet.com>
++
++	* adapted linux-2.6.10-e2compr-0.4.45-alpha0126.diff to 2.6.18.5 kernel
++
++	* replaced most instances of down/up(inode->i_sem) with 
++	  lock/unlock(inode->i_mutex). For exception see file.c, below.
++
++	* made various printk regularizations to uniquely identify each printk
++	  instance. Inserted missing KERN_DEBUG and KERN_WARNING.
++
++	* compress.c:
++	  bug fix: ext2_count_blocks: init head_bh for each iteration.
++	  bug fix: ext2_count_blocks: add set clen=ulen for uncompressable clusters.
++	  bug fix: ext2_compress_cluster: replacement and inlining of an 
++	   invalidate_inode_buffers function to keep root filesystem changes
++	   uptodate on disk (prevents umounting root file system to update).
++	  warning fix: ext2_compress_cluster: various variables initialized.
++	  ext2_compress_cluster: removed #ifdef NDEBUG
++	  bug fix: ext2_compress_cluster: defined maxclus, calculate and set for:
++	  bug fix: ext2_compress_cluster: set filesize for uncompressed clusters.
++	  ext2_cleanup_compressed_inode: changed error message to indicate 'Z'
++	   flag was caused by trying to un/compress already open file.
++	  bug fix: cp to compr dir: Truncate uncompressed files to their
++	   uncompressed length, i.e. force kernel to update inode and sb
++
++	* file.c:
++	  removed file->f_error code since f_error no longer in file struct.
++	  ext2_file_write: changed down/up i_sem to down_read/up_read i_alloc_sem
++
++	* inode.c:
++	  bug fix: ext2_get_block: restored changed: loop to bforget
++
++	* ioctl.c:
++	  ext2_ioctl: scrubbed 'B' flag on file uncompress.
++
++	* match[56]86.S:
++	  made code dependent on #ifdef CONFIG_REGPARM to compile with either
++	  register variable or stack variable parameter passing.
++
++28 Feb 2005
++
++	Yabo Ding <bobfree_cn@yahoo.com.cn>,<yding@wyse.com>
++
++	* Corrected page unlocking in inode.c.
++
++19 Feb 2005
++
++	Paul Whittaker <whitpa@users.sourceforge.net>
++
++	* Added corrections le32_to_cpu in critical areas of compress.c
++	* Optimized function exit code in inode.c.
++
++24 Aug 2004
++Yabo Ding <bobfree_cn@yahoo.com.cn>,<yding@wyse.com>
++
++  compress.c
++*  ext2_decompress_pages()
++     The old code cannot reread data from disk to a changed buffers data pointer in 2.6.x.
++     So, I copy memory data(decompressed) to a temporary buffer;
++     Then reread data(compressed) from disk, and copy to head;
++     Then copy back the memory data from temporary buffer.
++     It seems clumsy, but it works well.
++*  ext2_compress_cluster()
++     Force write to disk.
++
++  inode.c
++*  ext2_writepage()
++     Delete old code. All directly call block_write_full_page() function.
++
++* ../Kconfig
++    Change e2compr config as a submenu config
++
++04 Aug 2004
++
++Paul Whittaker <whitpa@users.sourceforge.net>
++
++* compress.c: replaced mark_buffer_dirty(x,y) with mark_buffer_dirty(x).  I'm
++  still not at all sure that this is sufficient.
++
++03 Aug 2004
++
++Paul Whittaker <whitpa@users.sourceforge.net>
++
++* ../../include/linux/ext2_fs_c.h: added missing prototypes for ext2_iLZRW3A(),
++  ext2_iLZRW3A(), ext2_rLZRW3A().
++
++02 Aug 2004
++
++Paul Whittaker <whitpa@users.sourceforge.net>
++
++* ../../mm/page_alloc.c: added EXPORT_SYMBOL(__pagevec_free).
++
++* ../../include/linux/pagemap.h, ../../mm/filemap.c: removed inline from
++  __grab_cache_page() declarations, added EXPORT_SYMBOL(__grab_cache_page).
++
++* ../../include/linux/mm.h, ../../mm/filemap.c: removed inline from
++  page_waitqueue() declarations, added EXPORT_SYMBOL(page_waitqueue).
++
++* bzip2/{lib_bzip_d,lib_bzip_e}.c, {gzip,lzo,lzrw3a,lzv1}/e2compr*.c:
++  replaced MOD_INC_USE_COUNT and MOD_DEC_USE_COUNT with try_module_get()
++  and module_put() to avoid deprecation and safety warnings.
++
++* lzrw3a/lzrw3a.c: added (UBYTE *) casts to avoid compiler warnings.
++
++* compress.c, inode.c: incorporated Yabo's changes, correcting mistakes in
++  ext2_readpages() in inode.c.
++
++* removed printks for ext2_discard_prealloc from file.c and inode.c (not
++  needed now that this problem has been resolved).
++
++2.6.5 -> 2.6.7 updates:
++
++* ../../mm/filemap.c: rewrote CONFIG_EXT2_COMPRESS hunk for 2.6.7.
++
++* compress.c, file.c: use mapping_mapped(), since mapping->i_mmap has changed
++  and mapping->i_mmap_shared no longer exists.
++
++* inode.c: page->count becomes page->_count.
+--- linux-3.2-rc5/Documentation/filesystems/e2compress.txt	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2-rc5-e2c/Documentation/filesystems/e2compress.txt	2011-12-13 14:22:47.824975303 +0100
+@@ -0,0 +1,116 @@
++Transparent compression for ext2 filesystem
++===========================================
++
++What this document is.
++----------------------
++This document is intended for explaining how e2compress has been implented/ported
++in kernel 2.4. It also give a status of current work. You need to have e2compress
++knowledge (i.e. to know how e2compress works, from a general point of view)
++
++What this document is not.
++--------------------------
++This document is not a full explaination of how e2compress work. For this,
++there are other documents such as fs/ext2/Readme.e2compr file for the technical
++point of view and user manual can be found at <http://e2compr.sourceforge.net/>.
++This site is also a place were you will find many information about e2compress
++development for kernel 2.4, tools, manuals and so on.
++
++
++Introduction
++============
++
++This is a first adaptation of e2compress for kernel 2.4. The work has been done
++by Alcatel (Alcatel Business Systems - R&D) at Illkirch. It has been started
++from the latest patch provided by Peter Moulder for kernel 2.2,
++i.e. e2compr-0.4.39-patch-2.2.18.
++It is full compatible with previous version.
++Here after you will first find some explainations about the choices mades for
++the development, and then the status of current work from functionnal point of
++view.
++
++
++Development
++===========
++
++As for previous patches, most interesting happens when reading in ext2_readpage
++and when writing in ext2_writepage and ext2_file_write.
++In fact, in 2.2 kernel, compression occures on cluster of blocks. So when reading
++or writing a part of a file, we first have to compute the cluster on which I/O
++occures, then we have to get every buffers of the cluster, uncompress the data if
++needed, then reading/writing happens "as for normal files".
++In 2.4 kernels, I/O occures through page cache: i.e. when reading/writing to a
++part of the file, first the corresponding page is get, we then get the needed
++buffers, which point to the page; this means that for keeping same work as for 2.2,
++we have to use the notion of cluster of page. For getting every buffers of a cluster,
++we first get every pages of the cluster, then get buffers of every pages...
++
++So, things happens as follow:
++
++ext2_readpage
++-------------
++If data corresponding to the page are in a compressed cluster, this functions perfoms
++more works: instead of reading one page, it reads the whole "cluster of pages".
++In fact, anyway, we have to read all compressed buffer. Once we have got all buffers
++of the cluster, uncompressed (at least a part of) the data, and located the part of
++the uncompressed data which correspond to the requested page, there is not any more 
++lot of work for also reading (i.e. doing some memcpy) other pages belonging to this
++cluster.
++So, the first reading of the first page of the cluster is quite longer, but then,
++every pages of the cluster are uptodate in the cache.
++
++ext2_writepage
++--------------
++An overhead has been added for pages belonging to a compressed cluster.
++In fact, if cluster is still compressed on the disk, we can't directly write the
++page (which contains uncompressed data) in the middle of a compressed cluster.
++So, we first have to uncompress the whole cluster on the disk, then we can write the
++new data of the dirty page(s).
++
++ext2_file_write
++---------------
++This replaces `generic_file_write' when e2compress option is activated.
++It is a copy of `generic_file_write'. The main difference is that instead of looping
++page by page in `generic_file_write', we loops on cluster of page.
++In each loop:
++	* we compute the cluster on which beginning of data (to be written) belongs to.
++	* then, we get all pages of the cluster.
++	* If cluster is a compressed one, we read all pages, and uncompress it.
++	  Otherwise, we perfoms a `prepare_write' (as in generic_file_write).
++	* We copy the data on each page from user space,
++	* Call `commit_write' on dirty pages.
++	* When reaching end of cluster, we compress it. (As in 2.2)
++
++Note: Another implentation could have been to keep generic_file_write, and add an overhead
++to `ext2_prepare_write' and `ext2_commit_write'; on the first access to a page of a compressed
++cluster, whole cluster will be uncompressed (i.e. all pages of the cluster will be read and
++uncompressed in `ext2_prepare_write') and when commiting the last page of the cluster,
++compression occures...
++
++ext2_open_file
++--------------
++In 2.4.16 kernel, this function has been added for treating the case of files opened for
++"direct IO". Direct IO is not supported on compressed file. So opening a file by this way
++is forbidden.
++
++Other places in ext2
++--------------------
++Other changes occures as in 2.2 for managing the compression flags of files and specific
++`COMPRESSED_BLK_ADDR' address for compressed blocks.
++So please, refer to existing documentation for 2.2 about this topic.
++
++Status
++======
++Today (middle of december 2001), e2compress on kernel 2.4.16 has been tested on i386
++architecture, is used with success by tens of people in the department from some weeks.
++It is full fonctionnal on ix86, full compatible with 2.2 version of e2compress.
++It should work on other architecture, but has NOT been tested.
++Please, note the following:
++	* No performance tests have been done.
++	* I don't proclaim that code is optimized (and it is probably not, but I hope that
++	  "gurus" will not find it too bad)
++So, I think I can say that there is no known "big" bug or "blocking" bug.
++
++Some strange things has been observed in very limit case, i.e. when memory is overloaded.
++
++
++As usual, this e2compress comes without warranty, use it at your won risk, etc...
+--- linux-3.2-rc5/fs/ext2/Readme.e2compr	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/ext2/Readme.e2compr	2011-12-13 14:22:47.825975345 +0100
+@@ -0,0 +1,511 @@
++
++  0. Introduction
++  ~~~~~~~~~~~~~~~
++
++This file gives some technical information on e2compr and how it's
++implemented.  
++
++More general information on e2compr can be found at
++http://e2compr.sourceforge.net/.
++
++The first couple of sections of this document are written for those
++who have no interest in the source code but just want to know enough
++to be able to predict and understand e2compr behaviour and its
++implications.
++
++Section 3 describes the e2compr-specific ext2 attributes for a file
++(i.e. chattr things).
++
++Section 4 describes the e2compr ioctls from the point of view of a
++user-mode C programmer.
++
++Section 5 gives more detail about the file format on disk.
++
++Section 6 gives details on what's written where, i.e. a map of e2compr
++code in the kernel.
++
++
++Authorship: section 2 is written mainly by Antoine; the remainder is
++written by Peter.
++
++Questions should be sent to the e2compr mailing list,
++e2compr-misc@lists.sourceforge.net, or to the current maintainers,
++bothie@users.sourceforge.net and whitpa@users.sourceforge.net.
++
++
++  1. The idea
++  ~~~~~~~~~~~
++
++See section `E2compr implementation' in the main e2compr texinfo
++documentation for an introduction to how e2compr works.  (Type
++`info "(e2compr)Implementation"' at the shell prompt.)  It was
++originally written as part of the file you're now reading.
++
++
++  2. More details
++  ~~~~~~~~~~~~~~~
++
++Every compressed file stores its cluster size in the inode structure
++(in the ext2 attribute flags field).
++This (the cluster size) is the most important information: when
++knowing the cluster size, we can convert a block number into a cluster
++number, get the cluster the block belongs to, and then get the block.
++The inode's flags field also keeps the algorithm that is used to compress data
++written to the file.
++
++(The algorithm that was used to compress a given
++cluster is stored in the cluster head near the beginning of the
++compressed data.  This may differ from the current algorithm
++identified in the inode, which is only used to determine which
++algorithm to use at the time clusters are written.)
++
++The algorithm id and the cluster size are stored in the i_flags field
++(thus reducing the number of possible flags).  We also create some new
++flags: the COMPRBLK flags tells if there is at least one compressed
++cluster in the file, the ECOMPR flag indicates that an error (related
++to compression) occurred while reading from or writing to this file.
++If it is set, the file becomes read-only.  (In previous releases, you
++were denied even read access to the file unless you set the NOCOMPR
++flag.  There might be some benefit in returning to the old behaviour
++if decompressing erroneous data can cause an OOPS, but I think it
++would be better to correct the decompressors.  Others may disagree,
++pointing out that it costs CPU time to check for incorrect data.)
++
++Beside the information stored into the inode, each cluster holds some
++data.  Here is the cluster_head structure for e2compr-0.4:
++
++struct ext2_cluster_head {
++  __u16 magic;		/* == EXT2_COMPRESS_MAGIC_04X. */
++  __u8  method;		/* compression method id. */
++  __u8  holemap_nbytes;	/* length of holemap[] array */
++  __u32 checksum;	/* adler32 checksum.  Checksum covers all fields
++			   below this one, and the compressed data. */
++  __u32 ulen;		/* size of uncompressed data */
++  __u32 clen;		/* size of compressed data (excluding cluster head) */
++  __u8  holemap[0];     /* bitmap describing where to put holes. */
++};
++
++The `magic' field is a magic number.  It is used to detect filesystem
++corruption, and can also be used for data recovery purposes.  (The
++e2compress program for e2compr-0.3 does this.)
++
++The `checksum' field contains an Adler-32 checksum on the fields below
++it in the struct and the compressed data.  Its purpose is to protect
++us from buffer overruns caused by corrupted data.
++
++The `ulen' field says how many bytes are stored in the cluster, when
++uncompressed.
++
++The `clen' field says how many bytes are held in the cluster, when
++compressed.
++
++The `method'
++field identifies the algorithm that was used to compress the cluster
++(this id will be used to uncompress the cluster, not the one stored
++into the inode that will be used only to compress a new cluster).
++
++The variable-length `holemap' array says where to put hole blocks when
++decompressing data.  The `holemap_nbytes' field gives the length of
++this array.  Iff holemap_nbytes is zero then there are no holes (other
++than at the end of the cluster, as determined by ulen versus cluster
++size).
++
++The compressed data immediately follows the holemap array (with no
++padding before it).
++
++
++Compressing a cluster is done in the following way:  We first get every
++block in the cluster and compute the bitmap.  We then compress the
++non-hole data, and store back the compressed data into the existing
++blocks.  Unused blocks are then freed.
++
++Decompressing a cluster is done in the following way:  We get the
++cluster head and retrieve the bitmap.  Missing blocks are allocated and
++put where the bitmap says, and then compressed data is decompressed and
++stored back into the blocks.
++
++
++Reading from a compressed cluster is really easy: get the blocks,
++decompress them into a working area, and get the bytes we want from
++the working area.  Writing to a compressed cluster is done by first
++decompressing the cluster, and then write to it, as if it were a
++normal file.  The file is then marked so that the cluster will be
++recompressed later.  [pjm: Do we decompress the cluster even if it's
++to be entirely written over?]
++
++In the current version, compression really occurs only when the inode
++is put (which in turn only occurs when no processes have the file
++open).  This may change.
++
++
++  3. Ext2 file attributes
++  ~~~~~~~~~~~~~~~~~~~~~~~
++
++Attribute     Lsattr  Meaning
++~~~~~~~~~     ~~~~~~  ~~~~~~~
++EXT2_SECRM_FL	   s  Secure deletion (not yet implemented)
++EXT2_UNRM_FL	   u  Undelete-able.  (Not yet implemented.)
++EXT2_COMPR_FL	   c  Future writes to this file should be compressed.
++                      (Clearing this flag decompresses the file if it
++		      is a regular file and there is space to do so;
++		      see the e2compr FAQ for details.)
++EXT2_SYNC_FL	   S  Synchronous updates.  (As far as I know, this is
++                      not yet fully implemented.)
++EXT2_IMMUTABLE_FL  i  Immutable file.
++EXT2_APPEND_FL	   a  Writes to file may only append.
++EXT2_NODUMP_FL	   d  Not a candidate for backup with dump(8).
++EXT2_NOATIME_FL    A  No access time updates.
++EXT2_DIRTY_FL	   Z  De/compression is yet to happen.  Read the
++                      source for exact meaning.
++EXT2_COMPRBLK_FL   B  File contains one or more compressed clusters.
++EXT2_NOCOMPR_FL    X  Access raw compressed data.  This isn't really
++		      supported at the moment; user-space access is
++		      yet to be worked out for 0.4.
++EXT2_ECOMPR_FL	   E  Compression error associated with this file
++EXT2_BTREE_FL      I  B-tree indexed directory (seemingly not yet implemented)
++EXT2_RESERVED_FL   -  (reserved for ext2 lib)
++
++See the chattr(1) man page for more verbose descriptions of the
++non-e2compr flags.
++
++
++  4. Ioctls available
++  ~~~~~~~~~~~~~~~~~~~
++
++  In brief
++  ~~~~~~~~
++
++Action             Ioctl                    To kernel	 From kernel
++~~~~~~             ~~~~~                    ~~~~~~~~~    ~~~~~~~~~~~
++Get cluster bit    EXT2_IOC_GETCLUSTERBIT   Cluster num  1 or 0 (cmp,uncmp)
++Recognize compressed                        Cluster num  -
++                   EXT2_IOC_RECOGNIZE_COMPRESSED
++Get algorithm      EXT2_IOC_GETCOMPRMETHOD  -		 Id
++Set algorithm      EXT2_IOC_SETCOMPRMETHOD  Id		 -
++Get cluster size   EXT2_IOC_GETCLUSTERSIZE  -		 Cluster size
++Set cluster size   EXT2_IOC_SETCLUSTERSIZE  Cluster size -
++Get attributes     EXT2_IOC_GETFLAGS	    -		 Flags
++Set attributes     EXT2_IOC_SETFLAGS	    Flags	 -
++Get block size     FIGETBSZ		    -		 Block size
++
++#include <linux/ext2_fs.h> to use any of these ioctls, except FIGETBSZ,
++which requires <linux/fs.h>.
++
++To find out what errors can be returned by these ioctls, read
++fs/ext2/ioctl.c (for all of the above ioctls except FIGETBSZ) or
++fs/ioctl.c (for FIGETBSZ).
++
++
++  Setting or testing a cluster bit
++  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++[Note: user-space access to compression details are yet to be worked out,
++so this section may not be accurate.]
++
++EXT2_IOC_GETCLUSTERBIT sets *arg to 1 if the specified cluster (0 for first
++cluster, 1 for second, etc.) is stored in compressed form.
++
++To make the kernel consider a certain cluster to be compressed (after
++you've done the compression yourself, in user space), use
++EXT2_IOC_RECOGNIZE_COMPRESSED.  This ioctl checks the validity of the
++cluster's data, then marks it as compressed (if valid).  This ioctl
++requires special priveleges, because if the compressed data is not
++valid then it may be possible to crash the system (due to buffer
++overruns).
++
++
++  Setting or getting the compression algorithm
++  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++EXT2_IOC_SETCOMPRMETHOD sets the default compression method (stored in
++the inode).  This is the compression method that is used for future
++writes.  In the current version of e2compr [accurate at 0.4.36], this
++does not cause a change to how
++existing clusters are stored, except when the compression method
++changes from `none' to something else, in which case the kernel
++attempts to compress ,all currently-uncompressed clusters` using the
++new algorithm.  It is an error to use this ioctl on a file without the
++compressed attribute.
++
++EXT2_IOC_GETCOMPRMETHOD sets *arg to the current compression method.
++
++In either case, Id is one of: EXT2_DEFER_METH, EXT2_LZV1_METH,
++EXT2_AUTO_METH, EXT2_NEVER_METH, EXT2_BZIP2_METH, EXT2_LZO1X_1_METH,
++EXT2_LZRW3A_METH (deprecated), EXT2_GZIP1_METH, EXT2_GZIP2_METH, ...,
++EXT2_GZIP9_METH.
++
++
++  Setting or getting the cluster size
++  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++EXT2_IOC_SETCLUSTERSIZE sets the cluster size to the value of *arg.
++This ioctl fails if there are already compressed clusters in the file
++(as determined by checking the EXT2_COMPRBLK_FL attribute).
++
++EXT2_IOC_GETCLUSTERSIZE sets *arg to the current cluster size.
++Surprisingly, this ioctl succeeds even if the EXT2_COMPR_FL attribute
++is clear.  (Maybe this will change in future, since the result is
++meaningless.) 
++
++In either case, the size is one of {4, 8, 16, 32}, and represents the
++number of blocks per cluster.  To convert to or from a number of
++bytes, use the FIGETBSZ ioctl.
++
++
++  Setting or getting the ext2 file attributes
++  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++These ioctls (EXT2_IOC_GETFLAGS and EXT2_IOC_SETFLAGS) are not
++e2compr-specific, but some attributes are e2compr-specific.
++
++*arg consists of the set of attributes for that file OR'ed together.
++E.g. a value of (EXT2_COMPR_FL | EXT2_COMPRBLK_FL | EXT2_NODUMP_FL)
++for a regular file means that the file contains one or more compressed
++clusters, and should not be backed up when using dump(8).
++
++See section 3 for a description of the various attributes.
++
++Note that although the compression method and cluster size are
++physically stored in the flags field on disk this information is
++masked out (i.e. set to zero) for GETFLAGS if the kernel has e2compr compiled in.
++If the kernel does not have e2compr compiled in, then this information
++is not masked out.  See section 5 for how the cluster size and
++compression method is stored if you wish to work with ,kernels without
++e2compr`.
++
++
++  Getting the block size
++  ~~~~~~~~~~~~~~~~~~~~~~
++
++This ioctl (FIGETBSZ) is not e2compr-specific, but is useful in
++interpreting a cluster size (which is specified as a number of blocks
++rather than bytes or kilobytes).
++
++*arg is set to the block size (in bytes) of the file.  For ext2 files,
++this is one of {1024,2048,4096}.  It is the same value for all files
++on the same filesystem.
++
++You must #include <linux/fs.h> to use this ioctl (unlike the rest of
++the ioctls listed here, which require <linux/ext2_fs.h>).
++
++
++  5. File format
++  ~~~~~~~~~~~~~~
++
++A note on byte ordering.  All current versions of the kernel and
++e2compr write to disk in little-endian format, so the 16-bit number
++`0x8EC7' would be written as a 0xC7 byte followed by a 0x8E byte.
++Unless you want to know the most general rule for byte ordering, you
++can skip to the `Inode' heading.
++
++In kernel 2.0, the ext2 fs is written to disk in the native byte
++ordering.  On x86 machines, this means little endian; most other
++architectures are big-endian (so the same 16-bit number would be
++written as an 0x8E byte followed by 0xC7).
++
++On kernel 2.1 and later, the ext2 fs (including e2compr data) is
++written in little-endian order regardless of the host architecture.
++
++
++  5.1. Inode
++  ~~~~~~~~~~
++
++fs/inode.c controls the reading and writing of inode information
++to/from disk; consult this file (functions ext2_read_inode(),
++ext2_update_inode() and/or ext2_write_inode()) for any detail omitted
++from this section.
++
++The physical structure of an inode is struct ext2_inode (defined in
++include/linux/ext2_fs.h).
++
++
++The i_flags member contains the ext2 file attributes, as well as
++cluster size and compression method.  
++
++The normal flags are stored in the low 23 bits.  Only the low 12 bits
++are defined at present, including 4 flags introduced by the e2compr
++patch.  See ext2_fs.h for the flag meanings (search for
++EXT2_SECRM_FL).
++
++Bits 23 through 25 hold the cluster size, or more precisely the log2 of
++the number of filesystem blocks per cluster (excluding the first cluster;
++see ext2_first_cluster_nblocks in include/linux/ext2_fs_c.h).
++
++Bits 26 through 30 store the compression method.  See the definitions
++for EXT2_LZV1_METH etc. in ext2_fs_c.h for the interpretation.
++
++Bit 31 is reserved for ext2 lib (which means that programs like e2fsck
++store things there during its operation but it isn't used by the
++kernel).
++
++
++  Data blocks
++  ~~~~~~~~~~~
++
++Uncompressed clusters are stored just as they would be without
++e2compr.  So if there are no compressed clusters then the file
++is stored identically to any other file.
++
++
++If a cluster is compressed, then the first non-hole block starts with
++a `cluster head', as defined in struct ext2_cluster_head in ext2_fs.h.
++
++The magic number (i.e. the value of the `magic' field) is 0x8ec7.
++`method' holds one of EXT2_LZV1_ID and the like.  `reserved_0'
++contains zero.  `ubitmap' describes where the uncompressed data goes.
++(Recall that when we compress a cluster, we only compress the data
++from non-hole blocks, so we need to know where the holes and non-holes
++go when we decompress the data.)  A `0' bit means a hole and a `1' bit
++means a data block; bit 0 refers to the first block, b1 the second,
++and so on.
++
++
++The block positions within the file where the compressed data is held
++is a subset of where the uncompressed data would be held.  Further, if the
++uncompressed data occupies u non-hole blocks and this compresses to c
++blocks, then the compressed data occupies the first c non-hole blocks
++of the file (and the remainder are freed).
++
++[This paragraph is an expansion of the preceeding: if you understood
++the preceeding paragraph then skip this one.]  Consider an array
++cblock[] where cblock[0] holds the block number on disk (or 0 to
++represent a hole) of the first block of a certain cluster of a file,
++cblock[1] the second, and so on.  (If you are familiar with the bmap
++array or the format of first-level indirect blocks, then cblock[] is a
++section of that array.)  Suppose that the cluster size of this file is
++16 blocks.  Suppose too that, when uncompressed, blocks 0, 1, 5 and 6
++of the cluster are holes but the other 12 blocks (2,3,4,7,8,...,15)
++contain data.  (Thus the bitmap is 0x0000ff9c.)  Now if we compress this 
++cluster to just 5 blocks, then cblock[0], [1], [5] and [6] will continue 
++to be holes, ,the positions of the compressed data blocks` are stored in 
++cblock[2], cblock[3], [4], [7] and [8], the blocks referenced by 
++cblock[9] through cblock[15] are freed, and cblock[9] through cblock[15] 
++are set to zero.
++
++
++  6. What's coded where
++  ~~~~~~~~~~~~~~~~~~~~~
++
++File names in this section are relative to linux/fs/ext2, except for
++ext2_fs.h which is in linux/include/linux.
++
++Most of the action happens in compress.c; though note that a few
++small, commonly-used routines are written as inline functions in
++ext2_fs.h.
++
++ext2_readpage() and ext2_mmap() are in file.c.  ext2_file_write() is
++also there.
++
++Routines to read/write the inode from/to disk are in inode.c.
++
++super.c contains some e2compr initialisation code (such as allocating
++the e2compr work area).
++
++All ioctl handling is in ioctl.c.
++
++acl.c is where we deny open() access in a couple of situations (if the
++EXT2_NOCOMPR_FL is set and another process has the file open; and we
++deny write access to a file with EXT2_ECOMPR_FL set).
++
++ialloc.c contains code in ext2_new_inode() for newly-created files to
++inherit compression attributes from the directory in which they're
++created.
++
++truncate.c handles truncation, i.e. zeroing any part of the cluster
++bitmap that's been truncated, and decompressing the final cluster (but
++marking dirty so that we try to recompress it on file close) if the
++new size is part-way through a compressed cluster, so that zeroing
++over the truncated data works.
++
++linux/include/linux/ext2_fs_i.h has the definition of the
++ext2-specific parts of the in-memory inode.  (The on-disk inode is
++defined in ext2_fs.h.)
++
++linux/mm/filemap.c is also interesting, though there's no
++e2compr-specific code there.  Similarly linux/include/linux/mm.h and
++linux/include/linux/fs.h.
++
++generic_readpage() is in linux/fs/buffer.c.  Also all buffer handling.
++
++
++The cleanup scheme
++~~~~~~~~~~~~~~~~~~
++
++inode->u.ext2_i.i_compr_flags has only a single bit defined:
++EXT2_CLEANUP_FL.  This bit gets set to 1 to indicate that
++ext2_cleanup_compressed_inode() needs to be called.  
++
++There is a related flag stored on disk as well as in memory:
++EXT2_DIRTY_FL of i_flags.  If ext2_cleanup_compressed_inode() couldn't
++finish it's job (e.g. due to I/O error) then it clears EXT2_CLEANUP_FL
++of i_compr_flags, but leaves EXT2_DIRTY_FL high.
++
++In ext2_read_inode(), if EXT2_DIRTY_FL is high then EXT2_CLEANUP_FL is
++raised, in the hope that ,whatever was preventing
++ext2_cleanup_compressed_inode() from finishing` is now past.
++
++Except for ext2_read_inode() as noted above, everything that raises
++EXT2_CLEANUP_FL (i.e. ext2_write_file(), ext2_ioctl() and
++ext2_truncate()) also raises EXT2_DIRTY_FL.
++
++Nothing lowers either EXT2_CLEANUP_FL or EXT2_DIRTY_FL except
++ext2_cleanup_compressed_inode() (and one or both of new_inode and
++delete_inode routines).
++
++
++One feels that at least one of these cleanup flags ought to
++disappear.  The main use of the persistent EXT2_DIRTY_FL is where the
++user does `chattr -c' in order to decompress the file, but there isn't
++enough space on the device to do this.  We can get rid of this problem
++by having ext2_ioctl() call ext2_cleanup_compressed_inode()
++try to
++
++
++Notes on a few variables
++~~~~~~~~~~~~~~~~~~~~~~~~
++
++Don't confuse the inode->i_dirt flag with (inode->u.ext2_i.i_flags &
++EXT2_DIRTY_FL).  See section `The cleanup scheme' above for a
++description of EXT2_DIRTY_FL.
++
++
++inode->u.ext2_i.i_clu_nblocks,
++inode->u.ext2_i.i_log2_clu_nblocks:
++
++i_clu_nblocks is always equal to ,1 << i_clu_nblocks` (except during a
++couple of cycles while they're being changed; I haven't consciously
++tried to avoid problems for SMP machines in this respect).
++
++i_clu_nblocks is the number of blocks per cluster for this inode.
++
++Old information: these variables were previously called
++`i_cluster_bits' and `i_cluster_size'.  They were in an array:
++
++inode->u.ext2_i.i_cluster_bits[2], 
++inode->u.ext2_i.i_cluster_size[2]: 
++
++I believe the reason these were declared as an array was for the case
++where someone changes the cluster size of a file that was already
++compressed.  (Reason for this belief: All readers of these fields use
++[0].  On creation (ialloc), read_inode, and `chattr +c' (where
++previously uncompressed), both [0] and [1] are updated.  On change
++(IOC_SET_CLUSTERSIZE), only [0] is updated.)  Since ,changing cluster
++size of an already-compressed file` isn't implemented, I've renamed
++them and made them scalars rather than arrays.
++
++
++inode->u.ext2_i.i_flags: When the e2compr patch is applied, this
++variable only holds the low 24 bits of the on-disk i_flags field.
++(Without the e2compr patch applied, all 32 bits are available.  An
++interesting side effect of this is that user programs can access the
++compression algorithm and cluster size on kernels without e2compr
++patch by using the EXT2_IOC_GETFLAGS, EXT2_IOC_SETFLAGS ioctls.)
++
++
++inode->u.ext2_i.i_compr_method: Holds the compression method
++identifier.  Starting from e2compr-0.4.0, this is different from an
++algorithm identifier: an example of a method is gzip9; the
++corresponding algorithm is gzip.  See compress.c for where
++ext2_method_table and ext2_algorithm_table are defined.  ext2_fs.h has
++some enumerations for addressing these tables (search for
++`EXT2_NONE_METH' and `EXT2_NONE_ALG').
+--- linux-3.2-rc5/fs/Kconfig	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/Kconfig	2011-12-13 14:22:47.826975380 +0100
+@@ -7,6 +7,126 @@ menu "File systems"
+ if BLOCK
+ 
+ source "fs/ext2/Kconfig"
++
++config EXT2_COMPRESS
++	bool "Ext2 file compression (DANGEROUS)"
++	depends on EXT2_FS && EXPERIMENTAL
++	select CRYPTO
++	select CRYPTO_ALGAPI
++	select CRYPTO_DEFLATE
++	select ZLIB_INFLATE
++	select ZLIB_DEFLATE
++	help
++	  Ext2 file compression allows transparent compression of files on an
++	  ext2 filesystem.  Transparent compression means that files are
++	  stored on the disk in a compressed format but they are automatically
++	  decompressed as they are read in and compressed when written out.
++	  The user is in control of how and which files are compressed, using
++	  the `chattr' utility (see chattr(1)).  For the sake of safety,
++	  administrative data (superblock, inodes, directories, etc.) are not
++	  compressed.
++
++	  Compression is very useful if you're short on disk space, and
++	  provides a better option than having lots of .gz files around.
++	  For more information, see <http://e2compr.sourceforge.net/>.
++
++	  You _need_ to have the special e2compr version of e2fsck to be able
++	  to make use of this.
++
++	  If you say Y, you will be asked which compression algorithms you wish
++	  to include.  Gzip is a good all-round algorithm, as its 1..9 parameter
++	  allows a good range of speed/compression trade-off.  Other noteworthy
++	  algorithms are LZV, which caters better to the faster/less compressing
++	  end of the scale, and bzip, which caters slightly better to the more
++	  compressing but slower end of the scale.
++
++	  Ext2 compression is still experimental, so unless you know you need
++	  it, you'd better say N.
++
++menu "Ext2 file compression options"
++	depends on EXT2_COMPRESS
++
++choice
++	#depends on EXT2_DEFAULT_COMPR_METHOD_GZIP
++	prompt "Gzip parameter for default compression method"
++	default EXT2_DEFAULT_COMPR_METHOD_GZIP8
++	help
++	  You have selected `gzip' as your default compression algorithm, but
++	  I need to know whether to use `gzip -1', `gzip -9', or somewhere
++	  in between.  gzip1 is the least compressing but fastest; gzip9 is the
++	  most compressing and slowest; and the numbers in between have
++	  characteristics in between (though not on a linear scale).
++	  If unsure, say `8'.
++
++config EXT2_DEFAULT_COMPR_METHOD_GZIP1
++	bool "1"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP2
++	bool "2"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP3
++	bool "3"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP4
++	bool "4"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP5
++	bool "5"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP6
++	bool "6"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP7
++	bool "7"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP8
++	bool "8"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP9
++	bool "9"
++
++endchoice
++
++config GZ_HACK
++	bool "Exclude .gz files from automatic compression"
++	depends on EXT2_COMPRESS
++	default y
++	help
++	  If you say Y here, then files created with names ending in `.gz' or
++	  `.?gz' or `.bz2' don't inherit the `c' ("compress") attribute from
++	  their parent directory.  (However, you can still do `chattr +c FILE'
++	  if you want to try to compress it anyway.)  This means that you
++	  don't waste CPU time trying to compress a file that probably can't
++	  be compressed.  See fs/ext2/namei.c if you want to add other rules.
++	  If you have any aesthetic sensibilities then you will say N here
++	  and try to implement something better.  Most people will say Y here.
++
++
++choice
++	depends on EXT2_COMPRESS
++        prompt "Default cluster size (in blocks, usually 1KB each)"
++        default EXT2_DEFAULT_CLUSTER_BITS_5
++	help
++	  To make random access to compressed files reasonably fast the files
++	  are compressed in clusters.  By default, the clusters will be of the
++	  size defined here but there is a modified version of the chattr
++	  utility that can set the cluster size for each file independently.
++	  Large clusters usually result in better compression at the cost of
++	  being slower.
++
++	  Note that the answer to this question is specified in filesystem
++	  blocks rather than in kilobytes, though most filesystems have 1KB
++	  blocks anyway.  (If you have a filesystem with large blocks then
++	  you should know it, but if you want to check then "tune2fs -l
++	  /dev/xxx | grep size".)  The default is 32 blocks which is the
++	  slowest setting but gives the best compression.
++
++config EXT2_DEFAULT_CLUSTER_BITS_2
++	bool "4"
++config EXT2_DEFAULT_CLUSTER_BITS_3
++	bool "8"
++config EXT2_DEFAULT_CLUSTER_BITS_4
++	bool "16"
++config EXT2_DEFAULT_CLUSTER_BITS_5
++	bool "32"
++
++endchoice
++
++endmenu
++
++
+ source "fs/ext3/Kconfig"
+ source "fs/ext4/Kconfig"
+ 
+--- linux-3.2-rc5/include/linux/ext2_fs_c.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2-rc5-e2c/include/linux/ext2_fs_c.h	2011-12-13 14:22:47.830975497 +0100
+@@ -0,0 +1,498 @@
++/*
++ *  Copyright (C) 2001 Alcatel Business Systems - R&D Illkirch
++ *  	(transparent compression code)
++ *  Pierre Peiffer (pierre.peiffer@sxb.bsf.alcatel.fr) - Denis Richard (denis.richard@sxb.bsf.alcatel.fr)
++ *  Adapted from patch e2compr-0.4.39-patch-2.2.18 .
++ */
++
++#ifndef EXT2_FS_C_H
++#define EXT2_FS_C_H
++
++#include <linux/ioctl.h>
++#include <linux/types.h>
++#include <linux/ext2_fs.h>
++#include "../../fs/ext2/ext2.h"
++
++/* EXT2_COMPR_DEBUG enables:
++ * - all assertions
++ * - adler checksum checking
++ */
++//#undef  EXT2_COMPR_DEBUG
++#define EXT2_COMPR_DEBUG
++
++#ifdef EXT2_COMPR_DEBUG
++#  define assert(expr) \
++	if(unlikely(!(expr))) {                                 \
++		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
++#expr, __FILE__, __func__, __LINE__);                   \
++	}
++#else
++#  define assert(expr) do {} while (0)
++#endif
++
++
++/* proof get_cpu and put_cpu correctness by calling might_sleep() or mabye schedule().
++   this will check if we are atomic */
++#ifdef EXT2_COMPR_DEBUG
++#define CHECK_NOT_ATOMIC  	assert(! in_atomic());//might_sleep();  
++#else 
++#define CHECK_NOT_ATOMIC  	
++#endif
++
++
++#undef  EXT2_COMPR_REPORT
++//#define EXT2_COMPR_REPORT
++//#define EXT2_COMPR_REPORT_VERBOSE
++//#define EXT2_COMPR_REPORT_PUT
++//# define EXT2_COMPR_REPORT_FILEOPEN
++//#define EXT2_COMPR_REPORT_MUTEX
++
++#ifdef  EXT2_COMPR_REPORT
++//# define EXT2_COMPR_REPORT_PUT
++//# define EXT2_COMPR_REPORT_WA
++//# define EXT2_COMPR_REPORT_MUTEX
++//# define EXT2_COMPR_REPORT_ALLOC  /* disk allocation etc. */
++//# define EXT2_COMPR_REPORT_ALGORITHMS /* Compression algorithms */
++//# define EXT2_COMPR_REPORT_VERBOSE /* Various things I don't think
++//				      useful at the moment. */
++//#define EXT2_COMPR_REPORT_VERBOSE_INODE
++#endif
++
++
++#ifdef EXT2_COMPR_DEBUG
++#define E2COMPR_VERSION "ext2-compression: e2c-0.4.58-smp-debug (26 August 2011) for kernel 3.1"
++#else  
++#define E2COMPR_VERSION "ext2-compression: e2c-0.4.58-smp-release (26 August 2011) for kernel 3.1"
++#endif
++
++#define EXT2_IOC_GETCLUSTERSIZE 	_IOR('c', 0, long)
++#define EXT2_IOC_SETCLUSTERSIZE 	_IOW('c', 0, long)
++#define EXT2_IOC_GETCOMPRMETHOD 	_IOR('c', 1, long)
++#define EXT2_IOC_SETCOMPRMETHOD 	_IOW('c', 1, long)
++#define EXT2_IOC_GETFIRSTCLUSTERSIZE 	_IOR('c', 2, long)
++#define EXT2_IOC_RECOGNIZE_COMPRESSED	_IOW('c', 2, long)
++#define EXT2_IOC_GETCLUSTERBIT		_IOR('c', 3, long)
++#define EXT2_IOC_GETCOMPRRATIO		_IOR('c', 4, long)
++/* Don't use _IOW('c', {5,6}, long), as these are used by old
++   e2compress binaries as SETCLUSTERBIT and CLRCLUSTERBIT
++   respectively. */
++
++/* EXT2_xxxx_ALG is an index into ext2_algorithm_table[] defined in
++   fs/ext2/compress.c. */
++/* N.B. Don't change these without also changing the table in
++   compress.c.  Be careful not to break binary compatibility.
++   (EXT2_NONE_ALG and EXT2_UNDEF_ALG are safe from binary
++   compatibility problems, though, so they can safely be renumbered --
++   and indeed probably should be if you do add another algorithm.) */
++#define EXT2_LZV1_ALG 0
++#define EXT2_LZRW3A_ALG 1
++#define EXT2_GZIP_ALG 2
++#define EXT2_BZIP2_ALG 3
++#define EXT2_LZO_ALG 4
++#define EXT2_NONE_ALG 5
++#define EXT2_UNDEF_ALG 6
++#define EXT2_N_ALGORITHMS 5 /* Count of "real" algorithms.  Excludes
++			       `none' and `undef'. */
++
++/* EXT2_xxxx_METH is an index into ext2_method_table[] defined in
++   fs/ext2/compress.c. */
++/* N.B. Don't change these without also changing the table in
++   compress.c. */
++#define EXT2_LZV1_METH 0
++#define EXT2_AUTO_METH 1
++#define EXT2_DEFER_METH 2
++#define EXT2_NEVER_METH 3
++#define EXT2_BZIP2_METH 4
++#define EXT2_LZRW3A_METH 8
++#define EXT2_LZO1X_1_METH 10
++#define EXT2_GZIP_1_METH 16
++#define EXT2_GZIP_2_METH 17
++#define EXT2_GZIP_3_METH 18
++#define EXT2_GZIP_4_METH 19
++#define EXT2_GZIP_5_METH 20
++#define EXT2_GZIP_6_METH 21
++#define EXT2_GZIP_7_METH 22
++#define EXT2_GZIP_8_METH 23
++#define EXT2_GZIP_9_METH 24
++
++#define EXT2_N_METHODS 32 /* Don't change this unless you know what
++			      you're doing.  In particular, it's tied
++			      to the width of the algorithm field
++			      in i_flags.*/
++
++/* Note: EXT2_N_ALGORITHMS can't be increased beyond 16 without
++   changing the width of the s_algorithms_used field in the in-memory
++   superblock.  The on-disk s_algorithms_used field is 32 bits long.
++   (This is in a state of flux.  Currently (1998-02-05) there is no
++   distinction: we always use the s_es copy. */
++
++
++#define EXT2_MAX_CLUSTER_BYTES		(32*1024)
++#define EXT2_LOG2_MAX_CLUSTER_BYTES	(5 + 10)
++
++#define EXT2_COMPRESS_MAGIC_04X	0x9ec7
++#define EXT2_MAX_CLUSTER_BLOCKS	32
++#define EXT2_MAX_CLUSTER_PAGES		EXT2_MAX_CLUSTER_BYTES >> PAGE_CACHE_SHIFT
++#define EXT2_ECOMPR			EIO
++/* A cluster is considered compressed iff the block number for the
++   last block of that cluster is EXT2_COMPRESSED_BLKADDR.  If this
++   changes then check if there's anywhere that needs a cpu_to_le32()
++   conversion. */
++#define EXT2_COMPRESSED_BLKADDR	0xffffffff 
++
++/* I like these names better. */
++#define EXT2_MAX_CLU_NBYTES EXT2_MAX_CLUSTER_BYTES
++#define EXT2_LOG2_MAX_CLU_NBYTES EXT2_LOG2_MAX_CLUSTER_BYTES
++#define EXT2_MAX_CLU_NBLOCKS EXT2_MAX_CLUSTER_BLOCKS
++
++
++#ifndef __KERNEL__
++
++/* Cluster head on disk, for e2compr versions before 0.4.0.  I'm
++   leaving this here so tht as I may make e2compress able to read
++   old-style e2compr files. */
++struct ext2_cluster_head_03x {
++  __u16 magic;			/* == EXT2_COMPRESS_MAGIC_03X */
++  __u16 len;			/* size of uncompressed data */
++  __u16 compr_len;		/* size of compressed data */
++  __u8  method;			/* compress method */
++  __u8  reserved_0;
++  __u32 bitmap;			/* block bitmap */
++  __u32 reserved_2;		/* 0 or adler32 checksum of
++				   _compressed_ data */
++};
++# define EXT2_COMPRESS_MAGIC_03X	0x8ec7 /* Head magic number
++						  for e2compr versions
++						  before 0.4.0. */
++#endif /* !__KERNEL__ */
++
++
++#ifdef __KERNEL__
++# ifdef  CONFIG_EXT2_COMPRESS
++
++//mw
++#define CONFIG_EXT2_HAVE_GZIP
++
++/* If defined, compress each cluster as soon as we get to the end of a
++   whole cluster, when writing.  (If undefined, we wait until
++   ext2_release_file() or the like.) */
++#define EXT2_COMPRESS_WHEN_CLU
++
++#  ifdef CONFIG_EXT2_DEFAULT_COMPR_METHOD_DEFER
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_DEFER_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_BZIP2)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_BZIP2_METH
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_LZO1X_1_ME
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_LZO)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_LZO1X_1_METH
++#   ifndef CONFIG_EXT2_HAVE_LZO
++#    error "Default algorithm (lzo) is not compiled in."
++#   endif
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_LZV1)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_LZV1_METH
++#   ifndef CONFIG_EXT2_HAVE_LZV1
++#    error "Default algorithm (lzv1) is not compiled in."
++#   endif
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_LZRW3A)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_LZRW3A_METH
++#   ifndef CONFIG_EXT2_HAVE_LZRW3A
++#    error "Default algorithm (lzrw3a) is not compiled in."
++#   endif
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP1)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_1_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP2)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_2_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP3)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_3_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP4)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_4_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP5)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_5_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP6)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_6_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP7)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_7_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP8)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_8_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP9)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_9_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_BZIP2)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_BZIP2_METH
++#   ifndef CONFIG_EXT2_HAVE_BZIP2
++#    error "Default algorithm (bzip2) is not compiled in."
++#   endif
++#  else
++#   error "No default compression algorithm."
++#  endif
++#  if EXT2_DEFAULT_COMPR_METHOD >= EXT2_GZIP_1_METH && EXT2_DEFAULT_COMPR_METHOD <= EXT2_GZIP_9_METH
++#   ifndef CONFIG_EXT2_HAVE_GZIP
++#    error "Default algorithm (gzip) is not compiled in."
++#   endif
++#  endif
++
++#  if defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_2)
++#   define EXT2_DEFAULT_LOG2_CLU_NBLOCKS	2
++#  elif defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_3)
++#   define EXT2_DEFAULT_LOG2_CLU_NBLOCKS	3
++#  elif defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_4)
++#   define EXT2_DEFAULT_LOG2_CLU_NBLOCKS	4
++#  elif defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_5)
++#   define EXT2_DEFAULT_LOG2_CLU_NBLOCKS	5
++#  else
++#   error "No default cluster size."
++#  endif
++
++#  define EXT2_DEFAULT_CLU_NBLOCKS	(1 << EXT2_DEFAULT_LOG2_CLU_NBLOCKS)
++
++#  if (EXT2_LZV1_ALG != 0) || (EXT2_BZIP2_ALG != 3) || (EXT2_LZO_ALG != 4) || (EXT2_N_ALGORITHMS != 5)
++#   error "this code needs changing; but then, you shouldn't be messing with algorithm ids anyway unless you are very careful to protect disk format compatibility"
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_LZV1
++#   define _ext2_lzv1_builtin (1 << EXT2_LZV1_ALG)
++#  else
++#   define _ext2_lzv1_builtin 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_LZRW3A
++#   define _ext2_lzrw3a_builtin (1 << EXT2_LZRW3A_ALG)
++#  else
++#   define _ext2_lzrw3a_builtin 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_GZIP
++#   define _ext2_gzip_builtin (1 << EXT2_GZIP_ALG)
++#  else
++#   define _ext2_gzip_builtin 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_BZIP2
++#   define _ext2_bzip2_builtin (1 << EXT2_BZIP2_ALG)
++#  else
++#   define _ext2_bzip2_builtin 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_LZO
++#   define _ext2_lzo_builtin (1 << EXT2_LZO_ALG)
++#  else
++#   define _ext2_lzo_builtin 0
++#  endif
++
++#  ifdef CONFIG_EXT2_HAVE_LZV1_MODULE
++#   define _ext2_lzv1_module (1 << EXT2_LZV1_ALG)
++#  else
++#   define _ext2_lzv1_module 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_LZRW3A_MODULE
++#   define _ext2_lzrw3a_module (1 << EXT2_LZRW3A_ALG)
++#  else
++#   define _ext2_lzrw3a_module 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_GZIP_MODULE
++#   define _ext2_gzip_module (1 << EXT2_GZIP_ALG)
++#  else
++#   define _ext2_gzip_module 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_BZIP2_MODULE
++#   define _ext2_bzip2_module (1 << EXT2_BZIP2_ALG)
++#  else
++#   define _ext2_bzip2_module 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_LZO_MODULE
++#   define _ext2_lzo_module (1 << EXT2_LZO_ALG)
++#  else
++#   define _ext2_lzo_module 0
++#  endif
++
++#  define EXT2_ALGORITHMS_MODULE  (_ext2_lzv1_module | _ext2_lzrw3a_module | _ext2_gzip_module | _ext2_bzip2_module | _ext2_lzo_module)
++#  define EXT2_ALGORITHMS_BUILTIN  (_ext2_lzv1_builtin | _ext2_lzrw3a_builtin | _ext2_gzip_builtin | _ext2_bzip2_builtin | _ext2_lzo_builtin)
++
++#  if EXT2_ALGORITHMS_MODULE & EXT2_ALGORITHMS_BUILTIN
++#   error "Arithmetic error?  Some algorithm appears to be both built-in and a module."
++#  endif
++
++/* EXT2_ALGORITHMS_SUPP is what we test when mounting a filesystem.
++   See fs/ext2/super.c. */
++#  define EXT2_ALGORITHMS_SUPP (EXT2_ALGORITHMS_MODULE | EXT2_ALGORITHMS_BUILTIN)
++#  if EXT2_ALGORITHMS_SUPP == 0
++#   error "You must select at least one compression algorithm."
++#  endif
++
++/* Cluster head on disk.  Little-endian. */
++struct ext2_cluster_head {
++  __u16 magic;		/* == EXT2_COMPRESS_MAGIC_04X. */
++  __u8  method;		/* compression method id. */
++  __u8  holemap_nbytes;	/* length of holemap[] array */
++  __u32 checksum;	/* adler32 checksum.  Checksum covers all fields
++			   below this one, and the compressed data. */
++  __u32 ulen;		/* size of uncompressed data */
++  __u32 clen;		/* size of compressed data (excluding cluster head) */
++  __u8  holemap[0];     /* bitmap describing where to put holes. */
++};
++
++
++struct ext2_wa_S {
++  __u8 u[EXT2_MAX_CLUSTER_BYTES];  /* Uncompressed data. */
++  __u8 c[EXT2_MAX_CLUSTER_BYTES];  /* Compressed data. */
++  __u8 heap[1];  /* Heap: working space for de/compression routines. */
++};
++
++#  define EXT2_CLEANUP_FL		0x40 /* See Readme.e2compr */
++#  define EXT2_OSYNC_INODE		0x20 /* sync of inode running	*/
++#  define ROUNDUP_DIV(_n, _d) ((_n) ? 1 + (((_n) - 1) / (_d)) : 0)
++#  define ROUNDUP_RSHIFT(_n, _b) ((_n) ? 1 + (((_n) - 1) >> (_b)) : 0)
++
++#  if defined(EXT2_NDIR_BLOCKS) && (EXT2_NDIR_BLOCKS != 12)
++#   error "e2compr currently assumes that EXT2_NDIR_BLOCKS is 12."
++/* If EXT2_NDIR_BLOCKS changes then change the definitions of
++   ext2_first_cluster_nblocks() and friends, and search the patch for
++   anywhere where 12 is hard-coded.  (At the time of writing, it's
++   only hard-coded in ext2_first_cluster_nblocks().)  What we want to
++   achieve is for clusters not to straddle address blocks.  Apart from
++   performance, some code in compress.c (search for `straddle')
++   assumes this. */
++#  endif
++
++#  include <linux/fs.h>
++
++#  define EXT2_ALG_INIT_COMPRESS  	1
++#  define EXT2_ALG_INIT_DECOMPRESS	2
++
++extern int    ext2_get_cluster_pages (struct inode*, u32, struct page**, struct page *, int);
++extern int    ext2_get_cluster_extra_pages (struct inode*, u32, struct page**, struct page**);
++extern int    ext2_kmap_cluster_pages   (struct page *, struct page**, struct page**);
++extern int    ext2_kunmap_cluster_pages (struct page *, struct page**, struct page**);
++extern int    ext2_get_cluster_blocks (struct inode*, u32, struct buffer_head**, struct page**, struct page**, int);
++extern int    ext2_decompress_cluster (struct inode*, u32);
++extern int    ext2_decompress_pages(struct inode*, u32, struct page**);
++extern int    ext2_compress_cluster (struct inode*, u32);
++extern int    ext2_decompress_inode (struct inode*);
++extern int    ext2_cleanup_compressed_inode (struct inode*);
++extern void   ext2_update_comprblk (struct inode *);
++extern int    ext2_get_dcount(struct inode *inode);
++
++extern size_t ext2_decompress_blocks    (struct inode*, struct buffer_head**, int, size_t, u32 cluster);
++extern int    ext2_count_blocks		(struct inode*);
++extern int    ext2_recognize_compressed (struct inode *, unsigned cluster);
++extern unsigned long ext2_adler32	(unsigned long, unsigned char*, int);
++
++extern size_t ext2_iLZV1   (int);
++extern size_t ext2_iLZV2   (int);
++extern size_t ext2_iNONE   (int);
++extern size_t ext2_iGZIP   (int);
++extern size_t ext2_iBZIP2  (int);
++extern size_t ext2_iLZO    (int);
++extern size_t ext2_iLZRW3A (int);
++extern size_t ext2_iZLIB   (int);
++
++extern size_t ext2_wLZV1   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_wLZV2   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_wNONE   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_wGZIP   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_wBZIP2  (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_wLZO    (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_wLZRW3A (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_wZLIB   (__u8*, __u8*, void*, size_t, size_t, int);
++
++extern size_t ext2_rLZV1   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_rLZV2   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_rNONE   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_rGZIP   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_rBZIP2  (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_rLZO    (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_rLZRW3A (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_rZLIB   (__u8*, __u8*, void*, size_t, size_t, int);
++
++struct ext2_algorithm { 
++	char	*name; 
++	int	avail;
++	size_t (*init)		(int); 
++	size_t (*compress)	(__u8*, __u8*, void*, size_t, size_t, int); 
++	size_t (*decompress)	(__u8*, __u8*, void*, size_t, size_t, int);
++};
++
++struct ext2_method {
++	unsigned  alg;
++	int	  xarg;
++};
++
++
++#  define ext2_first_cluster_nblocks(_i) ((EXT2_I(_i))->i_clu_nblocks > 4 && (_i)->i_sb->s_blocksize < 4096 ? 12 : 4)
++#  define ext2_block_to_cluster(_i,_b)	((_b) < ext2_first_cluster_nblocks(_i) ? 0 : (((_b) - ext2_first_cluster_nblocks(_i)) >> (EXT2_I(_i))->i_log2_clu_nblocks) + 1)
++#  define ext2_offset_to_cluster(_i,_o)	ext2_block_to_cluster((_i), ((_o) >> (_i)->i_sb->s_blocksize_bits))
++#  define ext2_n_clusters(_i)	((_i)->i_size ? ext2_offset_to_cluster((_i), (_i)->i_size - 1) + 1 : 0)
++#  define ext2_cluster_block0(_i,_c)	((_c) ? ext2_first_cluster_nblocks(_i) + (((_c) - 1) << (EXT2_I(_i))->i_log2_clu_nblocks) : 0)
++#  define ext2_cluster_nblocks(_i,_c)	((_c) ? (EXT2_I(_i))->i_clu_nblocks : ext2_first_cluster_nblocks(_i))
++#  define ext2_cluster_offset(_i,_c)	((_c) ? ext2_cluster_block0((_i), (_c)) << (_i)->i_sb->s_blocksize_bits : 0)
++
++#  define ext2_first_cluster_npages(_i) ((EXT2_I(_i))->i_clu_nblocks > 4 && (_i)->i_sb->s_blocksize < 4096 ? 12 >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits) : 4 >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits))
++#  define ext2_page_to_cluster(_i,_p)	((_p) < ext2_first_cluster_npages(_i) ? 0 : (((_p) - ext2_first_cluster_npages(_i)) >> (((EXT2_I(_i))->i_log2_clu_nblocks)+(_i)->i_sb->s_blocksize_bits-PAGE_CACHE_SHIFT)) + 1)
++#  define ext2_cluster_page0(_i,_c)	((_c) ? ext2_cluster_block0(_i, _c) >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits) : 0)
++#  define ext2_cluster_npages(_i,_c)	((_c) ? (EXT2_I(_i))->i_clu_nblocks >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits) : ext2_first_cluster_npages(_i))
++
++static inline int
++ext2_offset_is_clu_boundary(struct inode *inode, u32 off)
++{
++	if (off & (inode->i_sb->s_blocksize - 1))
++		return 0;
++	if (off == 0)
++		return 1;
++	off >>= inode->i_sb->s_blocksize_bits;
++	if (off < ext2_first_cluster_nblocks(inode))
++		return 0;
++	off -= ext2_first_cluster_nblocks(inode);
++	return !(off & (EXT2_I(inode)->i_clu_nblocks - 1));
++}
++
++struct ext2_wa_contents_S {
++	ino_t ino;
++	dev_t dev;
++	unsigned cluster;
++};
++
++DECLARE_PER_CPU(struct ext2_wa_S *, ext2_rd_wa);
++DECLARE_PER_CPU(struct ext2_wa_S *, ext2_wr_wa);
++
++extern void ext2_alloc_rd_wa(void);
++extern void ext2_alloc_wr_wa(void);
++
++extern struct ext2_algorithm ext2_algorithm_table[];
++extern struct ext2_method ext2_method_table[]; /*mw: is static so far, no writes*/
++
++/* Both of these return -errno if error, 0 if not compressed, positive
++   if compressed.  (You should use the macro unless you've already
++   tested COMPRBLK.) */
++extern int ext2_cluster_is_compressed_fn (struct inode *inode, __u32 cluster);
++static inline int ext2_cluster_is_compressed (struct inode *inode, __u32 cluster)
++{
++	if ((EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL) == 0)
++		return 0;
++	return ext2_cluster_is_compressed_fn (inode, cluster);
++}
++extern unsigned ext2_calc_free_ix (unsigned , u8 const *, unsigned );
++extern int ext2_unpack_blkaddrs(struct inode *, struct buffer_head **, int, unsigned , u8 const *, unsigned , unsigned , unsigned , unsigned );
++
++#  define HOLE_BLKADDR(_b) \
++	(((_b) == 0) \
++	 || ((_b) == EXT2_COMPRESSED_BLKADDR))
++# else /* !CONFIG_EXT2_COMPRESS */
++#  define HOLE_BLKADDR(_b) ((_b) == 0)
++# endif
++
++/* For some reason or other, I see code like `if (le32_to_cpu(tmp) !=
++   0)' around in the kernel.  So far I haven't checked whether or not
++   the compiler knows that the swab can be dropped. */
++# if defined(EXT2_COMPRESSED_BLKADDR) && EXT2_COMPRESSED_BLKADDR != 0xffffffff
++/* This may be a false positive; the "correct" test would be `if
++   defined(CONFIG_EXT2_COMPRESS)', but if this test does succeed, then
++   there is at least cause to have a look around. */
++#  error "Next bit of code is wrong."
++# endif
++
++# define HOLE_BLKADDR_SWAB32(_b) HOLE_BLKADDR(_b)
++
++#ifdef EXT2_COMPR_REPORT
++#define trace_e2c(format, args...) printk(KERN_DEBUG format, ## args)
++#else
++#define trace_e2c(format, args...) do {} while(0)
++#endif
++
++#endif /* __KERNEL__ */
++
++
++#endif /* EXT2_FS_C_H */
+--- linux-3.2-rc5/fs/ext2/Makefile	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/ext2/Makefile	2011-12-13 14:22:47.830975498 +0100
+@@ -2,10 +2,17 @@
+ # Makefile for the linux ext2-filesystem routines.
+ #
+ 
++ifeq ($(CONFIG_EXT2_COMPRESS),y)
++
++COMPRESS_STUFF := adler32.o compress.o e2zlib.o\
++		  $($(obj-y):%/=%/ext2-compr-%.o)
++endif
++
+ obj-$(CONFIG_EXT2_FS) += ext2.o
+ 
+ ext2-y := balloc.o dir.o file.o ialloc.o inode.o \
+-	  ioctl.o namei.o super.o symlink.o
++ 	  ioctl.o namei.o super.o symlink.o $(COMPRESS_STUFF)
++ 
+ 
+ ext2-$(CONFIG_EXT2_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
+ ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
+--- linux-3.2-rc5/fs/ext2/compress.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/ext2/compress.c	2011-12-13 14:22:47.839975781 +0100
+@@ -0,0 +1,3420 @@
++/*
++ *  linux/fs/ext2/compress.c
++ *
++ *  Copyright (C) 1995  Antoine Dumesnil de Maricourt (dumesnil@etca.fr) 
++ *      (transparent compression code)
++ */
++
++/*
++ *  Copyright (C) 2001 Alcatel Business Systems - R&D Illkirch FRANCE
++ *
++ *  	Transparent compression code for 2.4 kernel.
++ *
++ *  Denis Richard (denis.richard@sxb.bsf.alcatel.fr)
++ *  Pierre Peiffer (pierre.peiffer@sxb.bsf.alcatel.fr)
++ *
++ *  Adapted from patch e2compr-0.4.39-patch-2.2.18 .
++ */
++
++#include <asm/segment.h>
++#include <asm/system.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/ext2_fs.h>
++#include <linux/ext2_fs_c.h>
++#include <linux/fcntl.h>
++#include <linux/sched.h>
++#include <linux/stat.h>
++#include <linux/buffer_head.h>
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/quotaops.h>
++#include <linux/kmod.h>
++#include <linux/vmalloc.h>
++#include <linux/swap.h>
++#include <linux/slab.h>
++#include <linux/pagemap.h>
++#include <linux/writeback.h>
++#include <linux/rmap.h>
++#include <linux/swap.h>
++#include <linux/mm.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/kernel_stat.h>
++#include <linux/swap.h>
++#include <linux/pagemap.h>
++#include <linux/init.h>
++#include <linux/highmem.h>
++#include <linux/vmstat.h>
++#include <linux/file.h>
++#include <linux/writeback.h>
++#include <linux/blkdev.h>
++#include <linux/buffer_head.h>
++#include <linux/mm_inline.h>
++#include <linux/pagevec.h>
++#include <linux/backing-dev.h>
++#include <linux/rmap.h>
++#include <linux/topology.h>
++#include <linux/cpu.h>
++#include <linux/cpuset.h>
++#include <linux/notifier.h>
++#include <linux/rwsem.h>
++#include <linux/delay.h>
++#include <linux/kthread.h>
++#include <linux/freezer.h>
++#include <asm/tlbflush.h>
++#include <asm/div64.h>
++#include <linux/swapops.h>
++#include <linux/percpu.h>
++
++#define MIN(a,b) ((a) < (b) ? (a) : (b))
++
++#ifdef CONFIG_HIGHMEM
++#define restore_b_data_himem(bh)   assert(page_address(bh->b_page));  bh->b_data = page_address(bh->b_page) + bh_offset(bh)
++
++
++
++int ext2_kmap_cluster_pages(struct page *page, struct page *pg[],
++			    struct page *epg[])
++{
++    int i = 0;
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++	if (!pg[i])
++	    break;
++	if (epg && epg[i])
++	    kmap(epg[i]);
++	else
++	    kmap(pg[i]);
++    }
++
++    if (page)
++	kmap(page);
++    return 0;
++}
++
++
++int ext2_kunmap_cluster_pages(struct page *page, struct page *pg[],
++			      struct page *epg[])
++{
++    int i = 0;
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++	if (!pg[i])
++	    break;
++	if (epg && epg[i])
++	    kunmap(epg[i]);
++	else
++	    kunmap(pg[i]);
++    }
++
++    if (page)
++	kunmap(page);
++    return 0;
++}
++#else //no high-mem:
++#define restore_b_data_himem(bh)	;
++#endif
++
++
++/*none compression dummy functions*/
++size_t ext2_iNONE (int action) { return 0; }
++size_t ext2_wNONE (__u8 *ibuf, __u8 *obuf, void *wa, size_t ilen, size_t olen, int xarg) { return 0; }
++size_t ext2_rNONE (__u8 *ibuf, __u8 *obuf, void *wa, size_t ilen, size_t olen, int xarg) { return 0; }
++
++/*
++ *    Algorithm and method tables
++ */
++struct ext2_algorithm ext2_algorithm_table[] = {
++    /* Note: all algorithms must have the `name' field filled in.
++       This is used to autoload algorithm modules (ext2-compr-%s), and
++       in kernel printk. */
++    /* N.B. Do not renumber these algorithms!  (To do so is to change
++       the binary format.)  It's OK for `none' and `undef' to be
++       renumbered, though. */
++
++    /* Fields:
++       name; available; routines for:
++       init,  compress,   decompress. */
++    {"lzv1", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
++    {"lzrw3a", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
++    {"gzip", 1, ext2_iZLIB, ext2_wZLIB, ext2_rZLIB},	//Andreas: workaround
++    {"bzip2", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
++    {"lzo", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
++    {"none", 1, ext2_iNONE, ext2_wNONE, ext2_rNONE},
++
++    /* This "algorithm" is for unused entries in the method table.
++       It differs from EXT2_NONE_ALG in that it is considered
++       unavailable, whereas `none' is always available. */
++    {"undef", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
++
++};
++
++/* Note: EXT2_N_ALGORITHMS can't be increased beyond 16 without
++   changing the width of the s_algorithms_used field in the in-memory
++   superblock.  The on-disk s_algorithms_used field is 32 bits long.
++   (This is in a state of flux.  Currently (1998-02-05) there is no
++   distinction: we always use the s_es copy. */
++
++/* The size of this table must be 32 to prevent Oopsen from
++   invalid data.  We index this from 5 bits of i_flags, so
++   the size is (1 << 5) == 32. */
++struct ext2_method ext2_method_table[32] = {
++    /* Fields: algorithm id, algorithm argument. */
++    {EXT2_LZV1_ALG, 0},
++    {EXT2_NONE_ALG, 0},		/* 1: auto */
++    {EXT2_NONE_ALG, 0},		/* 2: defer */
++    {EXT2_NONE_ALG, 0},		/* 3: never */
++    {EXT2_BZIP2_ALG, 0},	/* 4: bzip2 */
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_LZRW3A_ALG, 0},	/* 8: lzrw3a */
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_LZO_ALG, 0},		/* 10: lzo1x_1 */
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_GZIP_ALG, 1},		/* 16 */
++    {EXT2_GZIP_ALG, 2},
++    {EXT2_GZIP_ALG, 3},
++    {EXT2_GZIP_ALG, 4},
++    {EXT2_GZIP_ALG, 5},
++    {EXT2_GZIP_ALG, 6},
++    {EXT2_GZIP_ALG, 7},
++    {EXT2_GZIP_ALG, 8},
++    {EXT2_GZIP_ALG, 9},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0}
++};
++
++
++static void ext2_mark_algorithm_use(struct inode *inode, unsigned alg)
++{
++    struct ext2_sb_info *sbi = EXT2_SB(inode->i_sb);
++
++    /* Hopefully, lock_super() isn't needed here, as we don't
++       block in the critical region.  True? */
++    assert(alg < EXT2_N_ALGORITHMS);
++    if (sbi->s_es->s_feature_incompat
++	& cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION)) {
++	sbi->s_es->s_algorithm_usage_bitmap |= cpu_to_le32(1 << alg);
++    } else {
++	struct ext2_super_block *es = sbi->s_es;
++
++	es->s_algorithm_usage_bitmap = cpu_to_le32(1 << alg);
++	es->s_feature_incompat
++	    |= cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION);
++	if (es->s_rev_level < EXT2_DYNAMIC_REV) {
++	    /* Raise the filesystem revision level to
++	       EXT2_DYNAMIC_REV so that s_feature_incompat
++	       is honoured (except in ancient kernels /
++	       e2fsprogs).  We must also initialize two
++	       other dynamic-rev fields.  The remaining
++	       fields are assumed to be already correct
++	       (e.g. still zeroed). */
++	    es->s_rev_level = cpu_to_le32(EXT2_DYNAMIC_REV);
++	    es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO);
++	    es->s_inode_size = cpu_to_le16(EXT2_GOOD_OLD_INODE_SIZE);
++	}
++    }
++    mark_buffer_dirty(sbi->s_sbh);
++}
++
++
++/* Displays an error message if algorithm ,alg` is not marked in use,
++   and then marks it in use. */
++static void ext2_ensure_algorithm_use(struct inode *inode, unsigned alg)
++{
++    assert(alg < EXT2_N_ALGORITHMS);
++
++    if (!(EXT2_SB(inode->i_sb)->s_es->s_algorithm_usage_bitmap
++	  & cpu_to_le32(1 << alg))) {
++	ext2_msg(inode->i_sb, "algorithm usage bitmap algorithm %s not marked used in inode %lu",
++		     ext2_algorithm_table[alg].name, inode->i_ino);
++	ext2_mark_algorithm_use(inode, alg);
++    }
++}
++
++
++/*mw: out of cache bug fix 5-16-07 */
++static void create_empty_buffers_e2c(struct page *page,
++				     unsigned long blocksize,
++				     unsigned long b_state,
++				     struct inode *inode)
++{
++    struct buffer_head *bh, *head, *tail;
++
++    head = alloc_page_buffers(page, blocksize, 1);
++    bh = head;
++    do {
++	bh->b_state |= b_state;
++	tail = bh;
++	bh->b_bdev = NULL;	//mw: make it like 2.4
++	bh->b_blocknr = 0;	//mw: make it like 2.4
++	bh->b_end_io = NULL;	//mw: make it like 2.4
++	bh = bh->b_this_page;
++    } while (bh);
++    tail->b_this_page = head;
++    spin_lock(&inode->i_mapping->private_lock);
++    if (PageUptodate(page) || PageDirty(page)) {
++	bh = head;
++	do {
++	    if (PageDirty(page))
++		set_buffer_dirty(bh);
++	    if (PageUptodate(page))
++		set_buffer_uptodate(bh);
++	    bh = bh->b_this_page;
++	} while (bh != head);
++    }
++    attach_page_buffers(page, head);
++    spin_unlock(&inode->i_mapping->private_lock);
++}
++
++int ext2_get_cluster_pages(struct inode *inode, u32 cluster,
++			   struct page *pg[], struct page *page, int compr)
++{
++    int nbpg, npg, i;
++    u32 page0;	/* = position within file (not position within fs). */
++    u32 idx = 0;
++    struct page *cached_page;
++    struct pagevec lru_pvec;
++
++    /*mw */
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
++	pg[i] = NULL;
++
++    cached_page = NULL;
++    pagevec_init(&lru_pvec, 0);
++
++    page0 = ext2_cluster_page0(inode, cluster);	
++    nbpg = ext2_cluster_npages(inode, cluster);	
++
++    if (compr && (((page0 + nbpg) << PAGE_CACHE_SHIFT) > inode->i_size))	
++	nbpg = ((inode->i_size - 1) >> PAGE_CACHE_SHIFT) - page0 + 1;	
++#ifdef  EXT2_COMPR_REPORT
++    trace_e2c("ext2_get_cluster_pages: page0=%d, nbpg=%d page=%ld\n",
++	      page0, nbpg, ((page != NULL) ? page->index : 0));
++#endif
++    for (npg = 0; npg < nbpg; npg++) {
++	if ((page == NULL) || ((page0 + npg) != page->index)) {
++		//pg[npg] = __grab_cache_page(inode->i_mapping,  page0+npg); /* &cached_page, &lru_pvec);*/
++		pg[npg] = grab_cache_page_write_begin(inode->i_mapping, page0+npg,  0); 
++	    if (!pg[npg])
++		goto error;
++	} else {
++	    pg[npg] = page;
++	}
++	if (!page_has_buffers(pg[npg])) {
++	    ClearPageUptodate(pg[npg]);	
++	    ClearPageDirty(pg[npg]);	
++	    create_empty_buffers_e2c(pg[npg], inode->i_sb->s_blocksize, 0, inode);	
++	    if (unlikely(!page_has_buffers(pg[npg])))
++		trace_e2c("ext2_get_cluster_pages: NOMEM!\n");
++	    assert(!PageUptodate(pg[npg]));
++	    assert(!PageDirty(pg[npg]));
++	}
++    }
++    //set remaining pages to NULL
++    for (idx = npg; idx < EXT2_MAX_CLUSTER_PAGES; idx++)
++	pg[idx] = NULL;
++
++    if (cached_page)
++	page_cache_release(cached_page);
++    pagevec_lru_add_file(&lru_pvec);
++    pagevec_free(&lru_pvec);
++    return (npg);
++  error:
++    if (cached_page)
++	page_cache_release(cached_page);
++    pagevec_lru_add_file(&lru_pvec);
++    pagevec_free(&lru_pvec);
++    while (--npg >= 0) {
++	if ((page == NULL) || ((page0 + npg) != page->index)) {
++	    unlock_page(pg[npg]);
++	    page_cache_release(pg[npg]);
++	}
++	pg[npg] = NULL;
++    }
++    trace_e2c("ext2_get_cluster_pages: error no page\n");
++    return (-ENOMEM);
++}
++
++
++int ext2_get_cluster_extra_pages(struct inode *inode, u32 cluster,
++				 struct page *pg[], struct page *epg[])
++{
++    struct page *page;
++    int nbpg, npg, i;
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
++	epg[i] = NULL;
++
++    nbpg = ext2_cluster_npages(inode, cluster);
++    for (npg = 0; npg < nbpg; npg++) {
++	if (pg[npg] == NULL)
++	    break;
++	if (PageUptodate(pg[npg])) {
++	    //page = page_cache_alloc(inode->i_mapping);   
++	    //mw: has gfp-mask of adress-space:  gfp_t mapping_gfp_mask(struct address_space * mapping)
++	    //    don't trigger. shrink_dcache_memory which might call ext2_cleanup_compressed_inode with the SAME mutex.
++	    page = __page_cache_alloc(GFP_NOFS);
++
++	    if (!page) {
++		goto error;
++	    }
++	    ClearPageError(page);
++	    ClearPageReferenced(page);
++	    ClearPageUptodate(page);
++	    ClearPageDirty(page);
++	    lock_page(page);
++	    page->index = pg[npg]->index;
++
++	    if (!page_has_buffers(page)) {
++		create_empty_buffers_e2c(page, inode->i_sb->s_blocksize, 0,
++					 inode);
++		/*mw : only the "extra_pages" for decompression need create_empty_buffers_unlocked, because
++		 *     they have no mapping-context and they must not have one. Otherwise they get need a page->index
++		 *     which belongs always to an address_space object (e.g.: inode). But I think this is not intented here.
++		 *     we just need thei buffers for a short time of decompression */
++		if (unlikely(!page_has_buffers(page)))
++		    return printk("Error: NOMEM!\n");
++	    }
++
++	    epg[npg] = page;
++#ifdef  EXT2_COMPR_REPORT
++	    trace_e2c
++		("ext2_get_cluster_extra_pages: allocated page idx=%ld\n",
++		 pg[npg]->index);
++#endif
++	} else {
++	    epg[npg] = NULL;
++	}
++    }
++    return (npg);
++  error:
++    while (--npg >= 0)
++	if (epg[npg]) {
++	    ClearPageDirty(epg[npg]);
++	    ClearPageUptodate(epg[npg]);
++	    try_to_free_buffers(epg[npg]);
++	    unlock_page(epg[npg]);
++	    assert(page_count(epg[npg]) == 1);
++	    page_cache_release(epg[npg]);
++	}
++    trace_e2c("ext2_get_cluster_extra_pages: error no page\n");
++    return (-ENOMEM);
++
++}
++
++/* Read every block in the cluster.  The blocks are stored in the bh
++   array, which must be big enough.
++
++   Return the number of block contained in the cluster, or -errno if an
++   error occured.  The buffers should be released by the caller
++   (unless an error occurred).
++ 
++   The inode must be locked, otherwise it is possible that we return
++   some out of date blocks.
++ 
++   Called by :
++ 
++         ext2_decompress_cluster()      [i_sem]
++         ext2_compress_cluster()        [i_sem]
++         ext2_readpage()      		[i_sem] */
++
++
++int ext2_get_cluster_blocks(struct inode *inode, u32 cluster,
++			    struct buffer_head *bh[], struct page *pg[],
++			    struct page *epg[], int compr)
++{
++    struct buffer_head *br[EXT2_MAX_CLUSTER_BLOCKS];
++    int nreq, nbh = 0, npg, i;
++    u32 clu_nblocks;
++    int err;
++    const int blocks = PAGE_CACHE_SIZE >> inode->i_sb->s_blocksize_bits;
++
++    /*mw */
++    for (i = 0; i < EXT2_MAX_CLUSTER_BLOCKS; i++)
++	bh[i] = NULL;
++
++    assert(atomic_read(&inode->i_mutex.count) <= 0);	/* i.e. mutex_lock */
++
++    /*
++     *  Request full cluster.
++     */
++    {
++	u32 endblk;
++	u32 block; /* = position within file (not position within fs). */
++	u32 nbpg;
++	u32 page0; /* = position within file (not position within fs). */
++	u32 idx;
++
++	block = ext2_cluster_block0(inode, cluster);
++	clu_nblocks = ext2_cluster_nblocks(inode, cluster);
++	/* impl: Don't shorten endblk for i_size.  The
++	   remaining blocks should be NULL anyway, except in
++	   the case when called from ext2_decompress_cluster
++	   from ext2_truncate, in which case i_size is short
++	   and we _want_ to get all of the blocks. */
++	endblk = block + clu_nblocks;
++
++	page0 = ext2_cluster_page0(inode, cluster);
++	nbpg = ext2_cluster_npages(inode, cluster);
++
++	if (compr
++	    && (((page0 + nbpg) << PAGE_CACHE_SHIFT) > inode->i_size)) {
++	    nbpg = ((inode->i_size - 1) >> PAGE_CACHE_SHIFT) - page0 + 1;
++	    endblk =
++		block +
++		(nbpg <<
++		 (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits));
++	}
++
++	idx = page0 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
++#ifdef  EXT2_COMPR_REPORT
++	trace_e2c("ext2_get_cluster_blocks: page0=%d, nbpg=%d\n", page0,
++		  nbpg);
++#endif
++	for (npg = 0; npg < nbpg; npg++) {
++	    struct buffer_head *buffer;
++
++	    if ((epg != NULL) && (epg[npg] != NULL))
++		buffer = page_buffers(epg[npg]);
++	    else
++		buffer = page_buffers(pg[npg]);
++	    for (i = 0; i < blocks && (block + nbh) < endblk;
++		 buffer = buffer->b_this_page, i++) {
++		if (idx == (block + nbh)) {
++		    bh[nbh] = buffer;
++		    nbh++;
++		}
++		idx++;
++	    }
++	}
++#ifdef  EXT2_COMPR_REPORT
++	trace_e2c
++	    ("ext2_get_cluster_blocks: get every pages and %d buffers\n",
++	     nbh);
++#endif
++
++	for (nbh = 0, nreq = 0; block < endblk; nbh++) {
++	    assert(bh[nbh] != NULL);
++	    bh[nbh]->b_blocknr = 0;
++	    clear_bit(BH_Mapped, &bh[nbh]->b_state);
++
++	    //mw: does not work with 2.6 and holes!!!
++	    //err=ext2_get_block(inode, block++, bh[nbh], (PageDirty(bh[nbh]->b_page) ? 1 : 0));  
++	    err = ext2_get_block(inode, block++, bh[nbh], 0);
++	    /* mw: 0: we dont' create non existing blocks here
++	     *     let's do it just before the writeback, when we know, which blocks we really need...*/
++	    //err=ext2_get_block(inode, block++, bh[nbh], (buffer_dirty(bh[nbh]) ? 1 : 0));
++
++	    /* mw: bdev-bug-fix: for files which got compressed and now consume less buffers
++	     * ext2_get_block returns 0, for a empty-block. As these buffer were used before
++	     * the bh[nbh]->b_bdev might be != NULL or just invalid. So we set them explicitly
++	     * to NULL. */
++	    //printk("Get Block cluster %i: (%#x):%i Blk-NR:%lu(%lu)[%lu-%lu] Bdev:%#x(%#x), PGDirty:%i, mapped:%i, PID: %lu\n", cluster, bh[nbh], nbh, block, 
++
++	    //if we are not mapped, then the blocknr will be wrong
++	    //we set a bdev here the we will write to some "random" block
++	    if (!buffer_mapped(bh[nbh])) {
++		bh[nbh]->b_bdev = NULL;	/* don't write wrongly mapped blocks !!! */
++		/* mw: you encounter null pointer oops you MUST
++		 *         map your buffer using ext2_get_block()*/
++	    }
++
++	    if (bh[nbh]->b_blocknr != 0) {
++		if (!buffer_uptodate(bh[nbh])
++		    /* TODO: Do we need this
++		       `!buffer_locked' test? */
++		    && !buffer_locked(bh[nbh])
++		    && !PageDirty(bh[nbh]->b_page))
++		    br[nreq++] = bh[nbh];
++	    } else if ((err != 0)
++		       && (err != -EFBIG))
++		/* impl: for some unknown reason,
++		   ext2_getblk() returns -EFBIG if
++		   !create and there's a hole. ==> not right any more in 2.4 */
++		goto error;
++	}
++	for (i = nbh; i < EXT2_MAX_CLUSTER_BLOCKS; i++) {
++	    bh[i] = NULL;
++	}
++    }
++#ifdef  EXT2_COMPR_REPORT_CPR
++    trace_e2c("ext2_get_cluster_blocks: nreq=%d for cluster=%d\n", nreq,
++	      cluster);
++#endif
++
++    //read all blocks, which are not null-blocks
++    if (nreq > 0)
++	ll_rw_block(READ, nreq, br);
++
++    /*
++     *  Adjust nbh if we have some null blocks at end of cluster.
++     */
++    while ((nbh != 0) && (bh[nbh - 1]->b_blocknr == 0))
++	nbh--;
++
++    /*
++     *  Wait for blocks.
++     */
++    err = -EIO;
++    CHECK_NOT_ATOMIC
++    for (i = 0; i < nbh; i++)
++	if ((!PageDirty(bh[i]->b_page)) && (bh[i]->b_blocknr != 0)) {  
++	    wait_on_buffer(bh[i]);
++	    if (!buffer_uptodate(bh[i])) {	/* Read error ??? */
++		trace_e2c
++		    ("ext2_get_cluster_blocks: wait_on_buffer error (blocknr=%ld)\n",
++		     bh[i]->b_blocknr);
++		goto error;
++	    }
++	}
++    assert(nbh <= EXT2_MAX_CLU_NBLOCKS);
++
++    return nbh;
++
++  error:
++    printk("ERROR: ext2_get_cluster_blocks()\n");
++    return err;
++}
++
++
++/* Iterations over block in the inode are done with a generic
++   iteration key mechanism.  We need one method to convert a block
++   number into a new key, one method to iterate (i.e., increment the
++   key) and one method to free the key.  The code could be shared with
++   truncate.c, as this mechanism is very general.
++ 
++   This code assumes tht nobody else can read or write the file
++   between ext2_get_key() and ext2_free_key(), so callers need to have
++   i_sem (which they all do anyway). */
++
++/* TODO: Get all of the bkey routines to return -errno instead of
++   true/false. */
++/* TODO: The bkey routines currently assume tht address blocks are
++   allocated even if all contained addresses are NULL, but this is not
++   true.  Make sure tht we differentiate between NULL block and error,
++   and then fix up ext2_set_key_blkaddr() and anything else (including
++   the pack/unpack routines). */
++struct ext2_bkey {
++    int level;
++    u32 block;
++    struct inode *inode;
++    int off[4];
++    u32 *ptr[4];
++    struct buffer_head *ibh[4];
++};
++
++
++/*
++ *    Method to convert a block number into a key.
++ *
++ *    Returns 1 on success, 0 on failure.  You may safely, but need
++ *    not, free the key even if ext2_get_key() fails. 
++ */
++static int ext2_get_key(struct ext2_bkey *key, struct inode *inode,
++			u32 block)
++{
++    int x, level;
++    int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
++
++    assert(atomic_read(&inode->i_mutex.count) <= 0);
++
++    /*
++     *  The first step can be viewed as translating the
++     *    original block number in a special base (powers
++     *    of addr_per_block).
++     */
++
++    key->block = block;
++
++    key->off[0] = key->off[1] = key->off[2] = key->off[3] = 0;
++    key->ibh[0] = key->ibh[1] = key->ibh[2] = key->ibh[3] = NULL;
++    key->ptr[0] = key->ptr[1] = key->ptr[2] = key->ptr[3] = NULL;
++
++    if (block >= EXT2_NDIR_BLOCKS) {
++	block -= EXT2_NDIR_BLOCKS;
++
++	if (block >= addr_per_block) {
++	    block -= addr_per_block;
++
++	    if (block >= addr_per_block * addr_per_block) {
++		block -= addr_per_block * addr_per_block;
++
++		key->off[0] = EXT2_TIND_BLOCK;
++		key->off[1] = (block / (addr_per_block * addr_per_block));
++		key->off[2] =
++		    (block % (addr_per_block * addr_per_block)) /
++		    addr_per_block;
++		key->off[3] = (block % addr_per_block);
++		level = 3;
++	    } else {
++		key->off[0] = EXT2_DIND_BLOCK;
++		key->off[1] = block / addr_per_block;
++		key->off[2] = block % addr_per_block;
++		level = 2;
++	    }
++	} else {
++	    key->off[0] = EXT2_IND_BLOCK;
++	    key->off[1] = block;
++	    level = 1;
++	}
++    } else {
++	key->off[0] = block;
++	level = 0;
++    }
++
++    /*
++     *  In the second step, we load the needed buffers.
++     */
++
++    key->level = level;
++    key->inode = inode;
++
++    key->ptr[0] = (u32 *) (&(EXT2_I(inode)->i_data));
++
++    for (x = 1; x <= level; x++) {
++	u32 *ptr;
++
++	ptr = key->ptr[x - 1];
++	if (ptr == NULL)
++	    break;
++/* Paul Whittaker tweak 19 Feb 2005 */
++	block = le32_to_cpu(ptr[key->off[x - 1]]);
++	if (block == 0)
++	    continue;		// TLL 05/01/07
++	if (x - 1 != 0)
++	    block = le32_to_cpu(block);
++	if ((key->ibh[x] = __bread(inode->i_sb->s_bdev,
++				   block, inode->i_sb->s_blocksize))
++	    == NULL)
++	    goto error;
++	key->ptr[x] = (u32 *) (key->ibh[x]->b_data);
++    }
++
++    return 1;
++  error:
++    for (; x != 0; x--)
++	if (key->ibh[x] != NULL)
++	    brelse(key->ibh[x]);
++    return 0;
++}
++
++
++/*
++ *    Find the block for a given key.  Return 0 if there
++ *      is no block for this key.
++ */
++static inline u32 ext2_get_key_blkaddr(struct ext2_bkey *key)
++{
++    assert(key->inode);
++    assert(atomic_read(&(key->inode)->i_mutex.count) <= 0);
++
++/* Paul Whittaker tweak 19 Feb 2005 */
++    if (key->ptr[key->level] == NULL)
++	return 0;
++    return le32_to_cpu(key->ptr[key->level][key->off[key->level]]);
++}
++
++
++/*
++ *    Change the block for a given key.  Return 0 on success,
++ *      -errno on failure.
++ */
++static inline int ext2_set_key_blkaddr(struct ext2_bkey *key, u32 blkaddr)
++{
++    char bdn[BDEVNAME_SIZE];
++    assert(key->inode);
++    assert(atomic_read(&(key->inode)->i_mutex.count) <= 0);
++
++    if (key->ptr[key->level] == NULL) {
++	/* The reason that this "can't happen" is that this
++	   routine is only used to shuffle block numbers or by
++	   free_cluster_blocks.  Cluster sizes are such that
++	   clusters can't straddle address blocks.  So the
++	   indirect block address can't be zero.  AFAIK, ptr
++	   can only be NULL on error or on null indirect block
++	   address.  Hmm, come to think of it, I think there
++	   are still some callers that don't check for errors
++	   from ext2_get_key(), so this still can happen until
++	   those are fixed up. */
++	printk(KERN_ERR
++	       "ext2_set_key_blkaddr: can't happen: NULL parent.  "
++	       "dev=%s, ino=%lu, level=%u.\n",
++	       bdevname(key->inode->i_sb->s_bdev, bdn),
++	       key->inode->i_ino, key->level);
++	return -ENOSYS;
++    }
++    /* Paul Whittaker tweak 19 Feb 2005 */
++    key->ptr[key->level][key->off[key->level]] = le32_to_cpu(blkaddr);
++    if (key->level > 0)
++	mark_buffer_dirty(key->ibh[key->level]);
++    return 0;
++}
++
++
++/*
++ *    Increment the key.  Returns 0 if we go beyond the limits,
++ *      1 otherwise.
++ *
++ *    Precondition: -key->off[level] <= incr < addr_per_block.
++ */
++static int ext2_next_key(struct ext2_bkey *key, int incr)
++{
++    int addr_per_block = EXT2_ADDR_PER_BLOCK(key->inode->i_sb);
++    int x, level = key->level;
++    u32 tmp;
++
++    assert(key->inode);
++    assert(atomic_read(&(key->inode)->i_mutex.count) <= 0);
++
++
++    /*
++     *  Increment the key. This is done in two step: first
++     *    adjust the off array, then reload buffers that should
++     *    be reloaded (we assume level > 0).
++     */
++
++    assert(key->off[level] >= -incr);
++    assert(incr < addr_per_block);
++    key->block += incr;
++    key->off[level] += incr;
++
++    /*
++     *  First step: should be thought as the propagation
++     *    of a carry.
++     */
++
++    if (level == 0) {
++	if (key->off[0] >= EXT2_NDIR_BLOCKS) {
++	    key->off[1] = key->off[0] - EXT2_NDIR_BLOCKS;
++	    key->off[0] = EXT2_IND_BLOCK;
++	    level = 1;
++	}
++	x = 0;
++    } else {
++	for (x = level; x > 0; x--) {
++	    if (key->off[x] >= addr_per_block) {
++		key->off[x] -= addr_per_block;
++		key->off[x - 1]++;
++
++		if (x == 1) {
++		    if (++level < 4) {
++			key->off[level] = key->off[level - 1];
++			key->off[level - 1] = 0;
++		    } else
++			return 0;
++		}
++	    } else
++		break;
++	}
++    }
++
++    /*
++     *  Second step: reload the buffers that have changed.
++     */
++
++    key->level = level;
++
++    CHECK_NOT_ATOMIC
++    while (x++ < level) {
++	if (key->ibh[x] != NULL) {
++	    if (IS_SYNC(key->inode) && buffer_dirty(key->ibh[x])) {
++		//mw:
++		assert(buffer_mapped(key->ibh[x])
++		       && (key->ibh[x]->b_bdev != NULL));
++		ll_rw_block(WRITE, 1, &(key->ibh[x]));
++		wait_on_buffer(key->ibh[x]);
++	    }
++	    brelse(key->ibh[x]);
++	}
++/* Paul Whittaker tweak 19 Feb 2005 */
++	if ((key->ptr[x - 1] != NULL)
++	    && ((tmp = le32_to_cpu(key->ptr[x - 1][key->off[x - 1]])) !=
++		0)) {
++	    if ((key->ibh[x] =
++		 __bread(key->inode->i_sb->s_bdev, tmp,
++			 key->inode->i_sb->s_blocksize))
++		!= NULL)
++		key->ptr[x] = (u32 *) (key->ibh[x]->b_data);
++	    else
++		key->ptr[x] = NULL;
++	} else {
++	    key->ibh[x] = NULL;
++	    key->ptr[x] = NULL;
++	}
++    }
++
++    return 1;
++}
++
++
++/* Method to free the key: just release buffers.
++
++   Returns 0 on success, -errno on error.
++*/
++
++static int ext2_free_key(struct ext2_bkey *key)
++{
++    int x, n;
++    struct buffer_head *bh[4];
++
++    assert(key->inode);
++    assert(atomic_read(&(key->inode)->i_mutex.count) <= 0);
++
++
++    for (x = 0, n = 0; x <= key->level; x++) {
++	if (key->ibh[x] != NULL) {
++	    if (IS_SYNC(key->inode) && buffer_dirty(key->ibh[x]))
++		bh[n++] = key->ibh[x];
++	    else
++		brelse(key->ibh[x]);
++	}
++    }
++
++    if (n > 0) {
++	int ncopy = n;
++	while (ncopy-- > 0) {
++	    assert(buffer_mapped(bh[ncopy])
++		   && (bh[ncopy]->b_bdev != NULL));
++	}
++
++	ll_rw_block(WRITE, n, bh);
++
++	CHECK_NOT_ATOMIC
++
++	while (n-- > 0) {
++	    wait_on_buffer(bh[n]);
++	    /* TODO: Check for error. */
++	    brelse(bh[n]);
++	}
++    }
++    return 0;
++}
++
++
++/* Returns positive if specified cluster is compressed,
++   zero if not,
++   -errno if an error occurred.
++
++   If you need the result to be accurate, then down i_sem before
++   calling this, and don't raise i_sem until after you've used the
++   result. */
++int ext2_cluster_is_compressed_fn(struct inode *inode, unsigned cluster)
++{
++    unsigned block = (ext2_cluster_block0(inode, cluster)
++		      + ext2_cluster_nblocks(inode, cluster)
++		      - 1);
++    struct ext2_bkey key;
++    int result;
++
++    assert(atomic_read(&inode->i_mutex.count) <= 0);
++
++    /* impl: Not all callers of ext2_cluster_is_compressed_fn() have
++       i_sem down.  Of course it is impossible to guarantee
++       up-to-date information for such callers (someone may
++       compress or decompress between when we check and when they
++       use the information), so hopefully it won't matter if the
++       information we return is slightly inaccurate (e.g. because
++       someone is de/compressing the cluster while we check). */
++    if (!ext2_get_key(&key, inode, block))
++	return -EIO;
++
++    result = (ext2_get_key_blkaddr(&key) == EXT2_COMPRESSED_BLKADDR);
++    ext2_free_key(&key);
++    return result;
++}
++
++
++/* Support for the GETCOMPRRATIO ioctl() call.  We calculate how many
++   blocks the file would hold if it weren't compressed.  This requires
++   reading the cluster head for every compressed cluster.
++
++   Returns either -EAGAIN or the number of blocks that the file would
++   take up if uncompressed.  */
++int ext2_count_blocks(struct inode *inode)
++{
++    struct buffer_head *head_bh;
++    int count;
++    int cluster;
++    struct ext2_bkey key;
++    u32 end_blknr;
++
++    if (!(EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL))
++	return inode->i_blocks;
++
++    mutex_lock(&inode->i_mutex);
++    end_blknr = ROUNDUP_RSHIFT(inode->i_size,
++			       inode->i_sb->s_blocksize_bits);
++
++    /* inode->i_blocks is stored in units of 512-byte blocks.  It's
++       more convenient for us to work in units of s_blocksize. */
++    {
++	u32 shift = inode->i_sb->s_blocksize_bits - 9;
++
++	count = inode->i_blocks;
++	if (count & ((1 << shift) - 1))
++	    ext2_msg(inode->i_sb,
++			 "ext2_count_blocks",
++			 "i_blocks not multiple of blocksize");
++	count >>= shift;
++    }
++
++    cluster = 0;
++    if (!ext2_get_key(&key, inode, 0)) {
++	count = -EIO;
++	goto out;
++    }
++    while (key.block < end_blknr) {
++	u32 head_blkaddr = ext2_get_key_blkaddr(&key);
++
++	/* bug fix: init head_bh for each iteration TLL 2/21/07 */
++	head_bh = NULL;
++	if (head_blkaddr == EXT2_COMPRESSED_BLKADDR) {
++	    count = -EXT2_ECOMPR;
++	    break;
++	}
++	if (!ext2_next_key(&key, ext2_cluster_nblocks(inode, cluster) - 1))
++	    break;
++	if (ext2_get_key_blkaddr(&key) == EXT2_COMPRESSED_BLKADDR) {
++	    struct ext2_cluster_head *head;
++
++	    if (head_blkaddr == 0) {
++		count = -EXT2_ECOMPR;
++		break;
++	    }
++	    head_bh = __getblk(inode->i_sb->s_bdev,
++			       head_blkaddr, inode->i_sb->s_blocksize);
++	    if (head_bh == NULL) {
++		/* Hmm, EAGAIN or EIO? */
++		count = -EAGAIN;
++		break;
++	    }
++	    if (!buffer_uptodate(head_bh))
++		ll_rw_block(READ, 1, &head_bh);
++
++	    CHECK_NOT_ATOMIC
++
++	    wait_on_buffer(head_bh);
++
++#ifdef CONFIG_HIGHMEM
++	    if (!page_address(head_bh->b_page)) {
++		BUG();
++	    }
++#endif
++
++	    head = (struct ext2_cluster_head *) head_bh->b_data;
++	    /* remove clen > ulen test TLL 2/21/07 */
++	    if ((head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X))
++		|| (le32_to_cpu(head->ulen) > EXT2_MAX_CLUSTER_BYTES)
++		|| (head->holemap_nbytes > 4)) {
++		count = -EXT2_ECOMPR;
++		break;
++	    }
++	    assert(sizeof(struct ext2_cluster_head) == 16);
++	    count += (ROUNDUP_RSHIFT(le32_to_cpu(head->ulen),
++				     inode->i_sb->s_blocksize_bits)
++		      - ROUNDUP_RSHIFT((le32_to_cpu(head->clen)
++					+ sizeof(struct ext2_cluster_head)
++					+ head->holemap_nbytes),
++				       inode->i_sb->s_blocksize_bits));
++	    brelse(head_bh);
++	    head_bh = NULL;
++	}
++
++	if (!ext2_next_key(&key, 1))
++	    break;
++	cluster++;
++    }
++    ext2_free_key(&key);
++    if (head_bh != NULL)
++	brelse(head_bh);
++  out:
++    mutex_unlock(&inode->i_mutex);
++    if (count == -EXT2_ECOMPR) {
++	ext2_msg(inode->i_sb,
++		     "ext2_count_blocks",
++		     "invalid compressed cluster %u of inode %lu",
++		     cluster, inode->i_ino);
++	EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL;
++    }
++
++    /* The count should be in units of 512 (i.e. 1 << 9) bytes. */
++    if (count >= 0)
++	count <<= inode->i_sb->s_blocksize_bits - 9;
++    return count;
++}
++
++
++/* Decompress some blocks previously obtained from a cluster.
++   Decompressed data is stored in ext2_rd_wa.u.  Buffer heads in the bh
++   array are packed together at the begining of the array.  The ulen
++   argument is an indication of how many bytes the caller wants to
++   obtain, excluding holes.  (This can be less than head->ulen, as in the
++   case of readpage.)  No hole processing is done; we don't even look at
++   head->holemap.
++
++   Note the semantic difference between this and
++   (): the latter decompresses a cluster _and
++   stores it as such_, whereas ext2_decompress_blocks() just
++   decompresses the contents of the blocks into ext2_rd_wa.u.
++
++   The working area is supposed to be available and locked.
++ 
++   Returns a negative value on failure, the number of bytes
++   decompressed otherwise.
++ 
++   Called by :
++ 
++         ext2_decompress_cluster ()    [sem down]
++         ext2_readpage () [sem down, but only ifndef EXT2_LOCK_BUFFERS] */
++	
++/* TODO: ext2_decompress_blocks() scribbles in ext2_rd_wa.c.
++   Check callers to make sure this isn't a problem. */
++
++/* mw: caller must already have done: "get_cpu_var(ext2_rd_wa)" */
++size_t
++ext2_decompress_blocks(struct inode * inode,
++		       struct buffer_head ** bh,
++		       int nblk, size_t ulen, u32 cluster)
++{
++    struct ext2_cluster_head *head;
++    int count, src_ix, x;
++    unsigned char *dst;
++    unsigned meth, alg;
++    char bdn[BDEVNAME_SIZE];
++
++#ifdef EXT2_COMPR_DEBUG
++    assert(in_atomic());
++    assert(atomic_read(&inode->i_mutex.count) <= 0);	/* i.e. mutex_lock */
++#endif
++
++    /*
++       We pack the buffer together before (and must take care
++       not to duplicate the buffer heads in the array).
++
++       pjm 1998-01-09: Starting from e2compr-0.4.0, they should
++       already be packed together in the blkaddr array.  TODO:
++       Insert appropriate assert() statements checking tht this is
++       the case.  TODO: Check that callers have bh[] packed. */
++#ifdef  EXT2_COMPR_REPORT
++    trace_e2c("ext2_decompress_blocks: nblk=%d\n", nblk);
++#endif
++    for (src_ix = 0, x = 0; src_ix < nblk; src_ix++) {
++	if (bh[src_ix] == NULL)
++	    printk("no_bheader()\n");
++	if ((bh[src_ix] != NULL) && (bh[src_ix]->b_blocknr != 0)) {
++
++	    if (x < src_ix) {
++		ext2_msg(inode->i_sb, "bad buffer table",
++			     "inode = %lu", inode->i_ino);
++		goto error;
++	    }
++	    x++;
++	}
++    }
++
++    nblk = x;
++#ifdef  EXT2_COMPR_REPORT_CPR
++    trace_e2c("ext2_decompress_blocks (2): nblk=%d\n", nblk);
++#endif
++    if (nblk == 0) {
++	ext2_msg(inode->i_sb, "no block in cluster", "inode = %lu",
++		     inode->i_ino);
++	goto error;
++    }
++
++    restore_b_data_himem(bh[0]);
++    head = (struct ext2_cluster_head *) (bh[0]->b_data);
++
++    /*
++     *  Do some consistency checks.
++     */
++
++    if (head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X)) {
++	ext2_msg(inode->i_sb,
++		     "bad magic number",
++		     "inode = %lu, magic = %#04x",
++		     inode->i_ino, le16_to_cpu(head->magic));
++	goto error;
++    }
++#if EXT2_GRAIN_SIZE & (EXT2_GRAIN_SIZE - 1)
++# error "This code assumes EXT2_GRAIN_SIZE to be a power of two."
++#endif
++    /* The macro also assumes that _a > 0, _b > 0. */
++#define ROUNDUP_GE(_a, _b, _d) (   (  ((_a) - 1) \
++				    | ((_d) - 1)) \
++				>= (  ((_b) - 1) \
++				    | ((_d) - 1)))
++
++    //mw: following 3 just for debugging!!!
++    assert(!((le32_to_cpu(head->ulen) > EXT2_MAX_CLUSTER_BYTES)));
++    assert(!((head->clen == 0)));
++    assert(!(ROUNDUP_GE(le32_to_cpu(head->clen)
++	+ head->holemap_nbytes + sizeof(struct ext2_cluster_head), 
++	le32_to_cpu(head->ulen), EXT2_GRAIN_SIZE)));
++
++    if ((le32_to_cpu(head->ulen) > EXT2_MAX_CLUSTER_BYTES)
++	|| (head->clen == 0)
++	|| ROUNDUP_GE(le32_to_cpu(head->clen)
++		      + head->holemap_nbytes
++		      + sizeof(struct ext2_cluster_head),
++		      le32_to_cpu(head->ulen), EXT2_GRAIN_SIZE)) {
++	ext2_msg(inode->i_sb,
++		     "invalid cluster len",
++		     "inode = %lu, len = %u:%u",
++		     inode->i_ino,
++		     le32_to_cpu(head->clen), le32_to_cpu(head->ulen));
++	goto error;
++    }
++#undef ROUNDUP_GE
++
++    /* TODO: Test for `nblk != 1 + ...' instead of the current
++       one-sided test.  However, first look at callers, and make
++       sure that they handle the situation properly (e.g. freeing
++       unneeded blocks) and tht they always pass a correct
++       value for nblk. */
++    if (nblk <= ((le32_to_cpu(head->clen)
++		  + head->holemap_nbytes + sizeof(struct ext2_cluster_head)
++		  - 1)
++		 / bh[0]->b_size)) {
++	int i;
++	ext2_msg(inode->i_sb,
++		     "missing blocks",
++		     "inode = %lu, blocks = %d/%u",
++		     inode->i_ino, nblk, ((le32_to_cpu(head->clen)
++					   + head->holemap_nbytes
++					   + sizeof(struct ext2_cluster_head)
++					   - 1)
++					  / bh[0]->b_size) + 1);
++	printk("i_size=%d\n", (int) inode->i_size);
++	for (i = 0; i < 12; i++)
++	    printk("i_data[%d]=%d\n", i, EXT2_I(inode)->i_data[i]);
++	    printk("cluster_head (sizeof head=%u):\n\tmagic=0x%4x\n\tmethod=%d\n\t  \
++	     holemap_nbytes=%d\n\tulen=%d\n\tclen=%d\n\tbh->b_size=%zu\n",
++	     sizeof(struct ext2_cluster_head), head->magic,
++	     (int) head->method, (int) head->holemap_nbytes, head->ulen,
++	     head->clen, bh[0]->b_size);
++	goto error;
++    }
++
++    /* I moved it here in case we need to load a module that
++     * needs more heap that is currently allocated.
++     * In such case "init_module" for that algorithm forces
++     * re-allocation of ext2_wa. It should be safe here b/c the
++     * first reference to ext2_wa comes just after and we have
++     * locked ext2_wa before.
++     *
++     * FIXME: Totally separate working areas for reading and writing.
++     *      Jan R.
++     */
++    meth = head->method;	/* only a byte, so no swabbing needed. */
++    if (meth >= EXT2_N_METHODS) {
++	ext2_msg(inode->i_sb,
++		     "Ass: illegal method id",
++		     "inode = %lu, id = %u", inode->i_ino, meth);
++	dump_stack();
++	goto error;
++    }
++    alg = ext2_method_table[meth].alg;
++
++    /*
++     *  Adjust the length if too many bytes are requested.
++     *
++     *    TODO: Traiter les bitmaps ici, et non plus au niveau de    
++     *          l'appelant. Faire un petit cache en memorisant le    
++     *          numero du dernier noeud decompresse et du dernier    
++     *          cluster. Le pb, c'est qu'on ne peut pas savoir si    
++     *          les blocs ont ete liberes et realloue entre temps    
++     *          -> il faut etre prevenu pour invalider le buffer.    
++     *
++     *          pjm fixme tr: Take care of the bitmaps here,
++     *          instead of by the caller as we currently do.  Keep
++     *          a small cache that holds the number of the
++     *          previous <inode, cluster> to have been
++     *          decompressed.  The problem is that we have no way
++     *          of knowing whether the blocks have been freed and
++     *          reallocated in the meantime / since last time ->
++     *          we must be informed so that we can invalidate the
++     *          buffer.  */
++    if (ulen > le32_to_cpu(head->ulen)) {
++	memset(__get_cpu_var(ext2_rd_wa)->u + le32_to_cpu(head->ulen), 0, ulen - le32_to_cpu(head->ulen));
++	ulen = le32_to_cpu(head->ulen);
++
++	assert((bh[0]->b_size & (bh[nblk - 1]->b_size - 1)) == 0);
++	if (((le32_to_cpu(head->clen)
++	      + head->holemap_nbytes + sizeof(struct ext2_cluster_head)
++	      - 1)
++	     | (bh[0]->b_size - 1))
++	    >= ((ulen - 1) | (bh[0]->b_size - 1))) {
++	    printk(KERN_WARNING
++		   "ext2_decompress_blocks: "
++		   "ulen (=%zu) or clen (=%u) wrong "
++		   "in dev %s, inode %lu.\n",
++		   ulen, le32_to_cpu(head->clen),
++		   bdevname(inode->i_sb->s_bdev, bdn), inode->i_ino);
++	    goto error;
++	}
++    }
++
++    /*
++     *  Now, decompress data.
++     */
++    /* TODO: Is this (ulen == 0) possible? */
++    if (ulen == 0)
++	return 0;
++
++    for (x = 0, dst = __get_cpu_var(ext2_rd_wa)->c; x < nblk; dst += bh[x++]->b_size) {
++	restore_b_data_himem(bh[x]);
++	memcpy(dst, bh[x]->b_data, bh[x]->b_size);
++    }
++
++
++    if (!ext2_algorithm_table[alg].avail) {
++	ext2_msg(inode->i_sb,
++		     "ext2_decompress_blocks",
++		     "algorithm `%s' not available for inode %lu",
++		     ext2_algorithm_table[alg].name, inode->i_ino);
++	ext2_mark_algorithm_use(inode, alg);
++	goto error;
++    }
++
++
++#ifdef EXT2_COMPR_DEBUG
++    {
++    	struct ext2_cluster_head *wa1head = (struct ext2_cluster_head *) __get_cpu_var(ext2_rd_wa)->c;
++    	unsigned clen = le32_to_cpu(wa1head->clen);
++    	if (wa1head->checksum !=
++    		cpu_to_le32(ext2_adler32
++			(le32_to_cpu(*(u32 *) __get_cpu_var(ext2_rd_wa)->c),
++			 __get_cpu_var(ext2_rd_wa)->c + 8,
++			 (sizeof(struct ext2_cluster_head) - 8 +
++			  head->holemap_nbytes + clen))))
++    	{
++    			head->checksum = cpu_to_le32(0);
++    			ext2_msg(inode->i_sb, "ext2_decompress_blocks: corrupted compressed data ",
++				 "in inode %lu", inode->i_ino);
++    			//goto error; 
++			//mw: we try to go on. if data is corrupt we will get an compression error anyway.
++    	}
++    }
++#endif
++
++    count = ext2_algorithm_table[alg].decompress(__get_cpu_var(ext2_rd_wa)->c +
++					     sizeof(struct
++						    ext2_cluster_head) +
++					     head->holemap_nbytes,
++					     __get_cpu_var(ext2_rd_wa)->u,
++					     __get_cpu_var(ext2_rd_wa)->heap,
++					     le32_to_cpu(head->clen), ulen,
++					     ext2_method_table[meth].xarg);
++
++    /* If we got fewer than ulen bytes, there is a problem, since
++       we corrected the ulen value before decompressing.  Note
++       that it's OK for count to exceed ulen, because ulen can be
++       less than head->ulen. */
++    if ((count < ulen) || (count != le32_to_cpu(head->ulen))) {
++	ext2_msg(inode->i_sb, 
++		"ext2_decompress_blocks: corrupted compressed data ", "inode = %lu, count = %u of %zu (%u/%u)",
++		     inode->i_ino, count, ulen, le32_to_cpu(head->clen), le32_to_cpu(head->ulen));
++	goto error;
++    }
++    ext2_ensure_algorithm_use(inode, alg);
++    return count;
++
++  error:
++
++    /* Raise the ECOMPR flag for this file.  What this means is
++       that the file cannot be written to, and can only be read if
++       the user raises the NOCOMPR flag.
++
++       pjm 1997-01-16: I've changed it so that files with ECOMPR
++       still have read permission, so user can still read the rest
++       of the file but get an I/O error (errno = EXT2_ECOMPR) when
++       they try to access anything from this cluster. */
++
++    EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL;
++
++    inode->i_ctime = CURRENT_TIME;
++    mark_inode_dirty_sync(inode);
++    /* pjm 1998-02-21: We used to do `memset(ext2_rd_wa.u, 0, ulen)'
++       here because once upon a time the user could sometimes see
++       buf contents.  I believe that this can never happen any
++       more. */
++    return -EXT2_ECOMPR;
++}
++
++
++/* ext2_calc_free_ix: Calculates the position of the C_NBLK'th non-hole
++   block; equals C_NBLK plus the number of holes in the first CALC_FREE_IX()
++   block positions of the cluster.
++
++   pre: 1 =< c_nblk < EXT2_MAX_CLUSTER_BLOCKS,
++        Number of 1 bits in ,ubitmap` > ,c_nblk`.
++   post: c_nblk =< calc_free_ix() < EXT2_MAX_CLUSTER_BLOCKS
++
++   Called by:
++       ext2_decompress_cluster()
++       ext2_file_write()
++
++   TODO: Have ext2_compress_cluster() call this.
++   */
++unsigned ext2_calc_free_ix(unsigned holemap_nbytes, u8 const *holemap,
++			   unsigned c_nblk)
++{
++    unsigned i;
++
++    assert(1 <= c_nblk);
++    assert(c_nblk < EXT2_MAX_CLUSTER_BLOCKS);
++    for (i = 0; (i < holemap_nbytes * 8) && (c_nblk > 0);) {
++	assert(i < EXT2_MAX_CLUSTER_BLOCKS - 1);
++	if ((holemap[i >> 3] & (1 << (i & 7))) == 0)
++	    c_nblk--;
++	i++;
++    }
++    i += c_nblk;
++    assert(i < EXT2_MAX_CLUSTER_BLOCKS);
++    return i;
++}
++
++
++/*  (): Prepare the blkaddr[] array for
++   decompression by moving non-hole blocks to their proper positions
++   (according to ubitmap) and zeroing any other blocks.
++
++   Returns 0 on success, -errno on error.
++
++   Note: We assume tht blkaddr[i] won't change under us forall
++   clu_block0 =< i < clu_block0 + clu_nblocks.  Holding i_sem should
++   guarantee this.
++
++   Called by:
++       ext2_decompress_cluster()
++       ext2_file_write() */
++int
++ext2_unpack_blkaddrs(struct inode *inode,
++		     struct buffer_head *bh[],
++		     int mmcp,
++		     unsigned holemap_nbytes,
++		     u8 const *holemap,
++		     unsigned c_nblk,
++		     unsigned free_ix,
++		     unsigned clu_block0, unsigned clu_nblocks)
++{
++    struct ext2_bkey key;
++    u32 *blkaddr;
++    unsigned si, di;
++
++    assert(clu_nblocks <= EXT2_MAX_CLUSTER_BLOCKS);
++    assert(1 <= c_nblk);
++    assert(c_nblk <= free_ix);
++    assert(free_ix < EXT2_MAX_CLUSTER_BLOCKS);
++    if (!ext2_get_key(&key, inode, clu_block0))
++	return -EIO;
++
++    if (key.ptr[key.level] == NULL) {
++	/* TODO: Call ext2_error(). */
++	ext2_free_key(&key);
++	return -EIO;
++    }
++
++    /* impl: Note tht we're relying on clusters not straddling
++       address block boundaries. */
++    blkaddr = &key.ptr[key.level][key.off[key.level]];
++    memset(blkaddr + free_ix,
++	   0, sizeof(*blkaddr) * (clu_nblocks - free_ix));
++    si = c_nblk;
++    for (di = free_ix; di > si;) {
++	--di;
++	if (((di >> 3) < holemap_nbytes)
++	    && (holemap[di >> 3] & (1 << (di & 7)))) {
++	    blkaddr[di] = 0;
++	    bh[di]->b_blocknr = 0;
++	    clear_bit(BH_Mapped, &bh[di]->b_state);
++	} else {
++	    if (si == 0) {
++		break;
++	    }
++	    blkaddr[di] = blkaddr[--si];
++	    assert(bh[di]->b_blocknr == 0);
++	    assert(bh[si]->b_blocknr != 0);
++	    assert(buffer_mapped(bh[si]));
++#ifdef  EXT2_COMPR_REPORT_CPR
++	    trace_e2c("unpack: di=%d sts=0x%x si=%d blk=%ld sts=0x%x\n",
++		      di, (int) bh[di]->b_state, si, bh[si]->b_blocknr,
++		      (int) bh[si]->b_state);
++#endif
++	    bh[di]->b_blocknr = bh[si]->b_blocknr;
++	    set_bit(BH_Mapped, &bh[di]->b_state);
++	    bh[si]->b_blocknr = 0;
++	    clear_bit(BH_Mapped, &bh[si]->b_state);
++	    set_bit(BH_Uptodate, &bh[di]->b_state);
++	    if (mmcp) {
++		restore_b_data_himem(bh[si]);
++		restore_b_data_himem(bh[di]);
++		memcpy(bh[di]->b_data, bh[si]->b_data,
++		       inode->i_sb->s_blocksize);
++	    }
++	}
++    }
++    if (key.level > 0)
++	mark_buffer_dirty(key.ibh[key.level]);
++    return ext2_free_key(&key);
++}
++
++
++/*
++ *    Decompress one cluster.  If already compressed, the cluster
++ *      is decompressed in place, and the compress bitmap is updated.
++ *
++ *      Returns the size of decompressed data on success, a negative
++ *      value in case of failure, or 0 if the cluster was not compressed.
++ *
++ *      The inode is supposed to be writable.
++ *
++ *      Called by :
++ *
++ *        ext2_decompress_inode()      [sem down]
++ *        ext2_file_write()            [sem down]
++ *        trunc_bitmap()               [sem down]
++ */
++int ext2_decompress_cluster(struct inode *inode, u32 cluster)
++{
++    struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS];
++    struct buffer_head *bhc[EXT2_MAX_CLUSTER_BLOCKS];
++    struct page *pg[EXT2_MAX_CLUSTER_PAGES], *epg[EXT2_MAX_CLUSTER_PAGES];
++    int result, nbh;
++    unsigned npg, c_nblk;
++    struct ext2_cluster_head *head;
++    int i = 0;
++    unsigned free_ix, clu_block0, clu_nblocks;
++    int d_npg = -1;		/* number of decompressed page  */
++    unsigned long allpagesuptodate = 1;
++    struct buffer_head *bh_writeout[EXT2_MAX_CLUSTER_BLOCKS];
++    int bhn_writeout;
++#ifdef CONFIG_HIGHMEM
++    int kmapped = 0;
++#endif
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_BLOCKS; i++) {
++	bh_writeout[i] = NULL;
++	bhn_writeout = 0;
++    }
++
++    assert(atomic_read(&inode->i_mutex.count) <= 0);	/* i.e. mutex_lock */
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
++	epg[i] = NULL;
++
++    /*
++       Get blocks from cluster.
++       Assign to variables head, ubitmap, clu_block0, clu_nblocks.
++       Shuffle blkaddr[] array and write zero to holes.
++       Allocate new blocks.
++       Get the working area.
++       Decompress.
++       Copy to bh[]->b_data (marking buffers uptodate and dirty).
++       Release working area.
++       Release bh[].
++     */
++
++    nbh = 0;
++    npg = ext2_cluster_npages(inode, cluster);
++    result = ext2_get_cluster_pages(inode, cluster, pg, NULL, 0);
++    if (result <= 0) {
++	for (i = 0; i < npg; i++)
++	    epg[i] = NULL;
++	goto out_err;
++    }
++
++    for (i = 0; i < npg; i++) {
++	if ((pg[i]->index <= ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) &&
++	    !PageUptodate(pg[i])) {
++	    allpagesuptodate = 0;
++	}
++    }
++    if (allpagesuptodate) {
++	//printk("DecompressPages: Ino:%lu\n", inode->i_ino);
++	result = ext2_decompress_pages(inode, cluster, pg);
++	if (result != 0) {
++	    for (i = 0; i < npg; i++)
++		epg[i] = NULL;
++	    if (result > 0)
++		goto cleanup;
++	    else
++		goto out_err;
++	}
++	/*mw: if we continue here then in ext2_decompress_pages
++	 * not all pages were up-to-date 
++	 */
++    }
++    //printk("DecompressCluster: Ino:%lu\n", inode->i_ino);
++    result = ext2_get_cluster_extra_pages(inode, cluster, pg, epg);
++    if (result <= 0) {
++	goto out_err;
++    }
++#ifdef CONFIG_HIGHMEM
++    ext2_kmap_cluster_pages(NULL, pg, epg);
++    kmapped = 1;
++#endif
++
++    result = ext2_get_cluster_blocks(inode, cluster, bh, pg, epg, 0);
++    if (result <= 0) {
++	goto out_err;
++    }
++    nbh = c_nblk = result;
++
++
++#ifdef EXT2_COMPR_REPORT
++    {
++	int j;
++	printk
++	    (" > > > ext2_decompress_cluster %d: inode=%ld, size=%d nbh=%d\n",
++	     cluster, inode->i_ino, (int) inode->i_size, nbh);
++#ifdef EXT2_COMPR_REPORT_VERBOSE
++	for (j = 0; j < nbh; j++) {
++	    if (bh[j]) {
++		printk("0buffer_head[%d]: blocknr=%lu,  addr=%p \n", j,
++		       (unsigned long) bh[j]->b_blocknr, bh[j]);
++		if (bh[j]->b_page)
++		    printk("0:[page->index=%ld]\n", bh[j]->b_page->index);
++		else
++		    printk("[No page]\n");
++	    } else
++		printk("buffer_head[%d] is NULL\n", j);
++	}
++	while ((j < EXT2_MAX_CLUSTER_BLOCKS) && (bh[j] != NULL) && bh[j]->b_blocknr) {	/*Add by Yabo Ding */
++	    printk
++		("buffer_head[%d] is free but not NULL: blocknr=%lu, addr=%p\n",
++		 j, (unsigned long) bh[j]->b_blocknr, bh[j]);
++	    j++;
++	}
++#endif
++    }
++#endif
++    for (i = 0; i < nbh; i++)
++	assert(bh[i]->b_blocknr != 0);
++
++    restore_b_data_himem(bh[0]);
++
++    head = (struct ext2_cluster_head *) bh[0]->b_data;
++    if (head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X)) {
++	ext2_msg(inode->i_sb,
++		     "ext2_decompress_cluster: bad magic number",
++		     "cluster %d: inode = %lu, magic = %#04x",
++		     cluster, inode->i_ino, le16_to_cpu(head->magic));
++	EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL;
++	result = -EXT2_ECOMPR;
++	goto out_err;
++    }
++    if (le32_to_cpu(head->ulen) -
++	(c_nblk << inode->i_sb->s_blocksize_bits) <= 0) {
++	ext2_error(inode->i_sb, "ext2_decompress_cluster",
++		   "ulen too small for c_nblk.  ulen=%u, c_nblk=%u, bs=%lu",
++		   le32_to_cpu(head->ulen), c_nblk,
++		   inode->i_sb->s_blocksize);
++	EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL;
++	result = -EXT2_ECOMPR;
++	goto out_err;
++    }
++    free_ix =
++	ext2_calc_free_ix(head->holemap_nbytes, (u8 const *) (&head[1]),
++			  c_nblk);
++    clu_block0 = ext2_cluster_block0(inode, cluster);
++    clu_nblocks = ext2_cluster_nblocks(inode, cluster);
++    ext2_unpack_blkaddrs(inode, bh, 1,
++			 head->holemap_nbytes, (u8 const *) (&head[1]),
++			 c_nblk, free_ix, clu_block0, clu_nblocks);
++
++    /* Allocate the extra blocks needed. */
++    {
++	int data_left = le32_to_cpu(head->ulen);
++
++	data_left -= c_nblk << inode->i_sb->s_blocksize_bits;
++	assert(data_left > 0);
++	for (i = free_ix; i < clu_nblocks; i++)
++	    if (((i >> 3) >= head->holemap_nbytes)
++		|| !(head->holemap[i >> 3] & (1 << (i & 7)))) {
++		result = ext2_get_block(inode,
++					clu_block0 + i,
++					bh[i], 1 /* create */ );
++		if (bh[i]->b_blocknr == 0)
++		    goto out_err;
++		d_npg =
++		    (i >>
++		     (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)) +
++		    1;
++		nbh++;
++		data_left -= inode->i_sb->s_blocksize;
++		if (data_left <= 0)
++		    break;
++	    }
++    }
++
++    /* jmr 1998-10-28 Hope this is the last time I'm moving this code.
++     * Module loading must be done _before_ we lock wa, just think what
++     * can happen if we reallocate wa when somebody else uses it...
++     */
++    {
++	unsigned meth;
++#ifdef CONFIG_KMOD
++	unsigned alg;
++#endif
++
++	meth = head->method;	/* only a byte, so no swabbing needed. */
++	if (meth >= EXT2_N_METHODS) {
++	    ext2_msg(inode->i_sb,
++			 "Ass.: illegal method id",
++			 "inode = %lu, id = %u", inode->i_ino, meth);
++	    result = -EXT2_ECOMPR;
++	    goto out_err;
++	}
++#ifdef CONFIG_KMOD
++	alg = ext2_method_table[meth].alg;
++	if (!ext2_algorithm_table[alg].avail) {
++	    char str[32];
++
++	    sprintf(str, "ext2-compr-%s", ext2_algorithm_table[alg].name);
++	    request_module(str);
++	}
++#endif
++    }
++
++    result = -EINTR;
++
++    /*
++     *  Then, decompress and copy back data.
++     */
++    {
++	int ic;
++
++	for (ic = 0, i = 0; i < clu_nblocks; i++) {
++	    if (bh[i]->b_blocknr != 0) {
++		bhc[ic] = bh[i];
++		ic++;
++		if (ic == c_nblk) {
++		    break;
++		}
++	    }
++	}
++    }
++
++
++#ifdef EXT2_COMPR_REPORT_WA
++    printk(KERN_DEBUG "pid %d locks wa\n", current->pid);
++#endif
++    if (get_cpu_var(ext2_rd_wa) == NULL)
++    {
++	 ext2_alloc_rd_wa();
++    }
++    assert(__get_cpu_var(ext2_rd_wa) != NULL);
++
++    result = ext2_decompress_blocks(inode, bhc, c_nblk,
++				    le32_to_cpu(head->ulen), cluster);
++    if (result != (int) le32_to_cpu(head->ulen)) {
++	if (result >= 0) {
++	    /* I think this is impossible, as
++	       ext2_decompress_blocks() checks against
++	       head->ulen. */
++	    printk(KERN_WARNING "Unexpected return value %d "
++		   "from ext2_decompress_blocks()\n", result);
++	    result = -EXT2_ECOMPR;
++	}
++
++#ifdef EXT2_COMPR_REPORT_WA
++	printk(KERN_DEBUG "pid %d unlocks wa\n", current->pid);
++#endif
++	put_cpu_var(ext2_rd_wa);
++	goto out_err;
++    }
++
++#ifdef EXT2_COMPR_REPORT
++    printk(KERN_DEBUG "ext2: %04x:%lu: cluster %d+%d [%d] "
++	   "decompressed into %d bytes\n",
++	   inode->i_rdev,
++	   inode->i_ino, clu_block0, clu_nblocks, c_nblk, result);
++#endif
++
++    /* Copy back decompressed data. */
++    {
++	int count = result;
++	unsigned char const *src;
++	int c, p;
++	int cbh;
++	int n;			/* block index in page  */
++	struct buffer_head *bp;
++	unsigned addr0, b_start, b_end;
++
++	assert(count > 0);
++	if (d_npg == -1) {
++	    d_npg = ((count - 1) >> PAGE_CACHE_SHIFT) + 1;
++	}
++#ifdef  EXT2_COMPR_REPORT_CPR
++	trace_e2c
++	    ("ext2_decompress_cluster: cnt=%d free_ix=%d d_npg=%d nbh=%d\n",
++	     count, free_ix, d_npg, nbh);
++#endif
++	result = -EXT2_ECOMPR;
++	src =  __get_cpu_var(ext2_rd_wa)->u;
++	cbh = 0;
++	for (c = 0; c < clu_nblocks; c++) {
++
++	    if (bh[c]->b_blocknr == 0) {
++#ifdef  EXT2_COMPR_REPORT_CPR
++		trace_e2c("\t clear buf %d sts=0x%x\n", c,
++			  (int) bh[c]->b_state);
++#endif
++		restore_b_data_himem(bh[c]);
++		memset(bh[c]->b_data, 0, inode->i_sb->s_blocksize);
++		continue;
++	    }
++	    if (cbh >= (nbh - 1)) {
++		break;
++	    }
++	    if (count < inode->i_sb->s_blocksize) {
++		put_cpu_var(ext2_rd_wa);
++		goto out_err;
++	    }
++	    cbh++;
++	    count -= inode->i_sb->s_blocksize;
++	    p = c >> (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
++	    if (!PageUptodate(pg[p])) {
++		addr0 = (clu_block0 << inode->i_sb->s_blocksize_bits);
++		b_start = addr0 + (c << inode->i_sb->s_blocksize_bits);
++		b_end = b_start + inode->i_sb->s_blocksize;
++#ifdef  EXT2_COMPR_REPORT_CPR
++		trace_e2c("\t[%d] sts=0x%x e=%d s=%d sz=%d pg:%lu(%#x)\n",
++			  c, (int) bh[c]->b_state, b_end, b_start,
++			  (int) inode->i_size, pg[p]->index,
++			  (unsigned int) pg[p]);
++#endif
++		if (b_end <= inode->i_size) {
++		    /* Block is before end of file, copy data */
++		    restore_b_data_himem(bh[c]);
++		    memcpy(bh[c]->b_data, src, inode->i_sb->s_blocksize);
++
++		} else if (b_start < inode->i_size) {
++		    /* Block contains end of file, copy to end */
++		    restore_b_data_himem(bh[c]);
++		    memcpy(bh[c]->b_data, src, inode->i_size - b_start);
++
++		}
++		set_buffer_uptodate(bh[c]);
++		set_buffer_dirty(bh[c]);
++		bh_writeout[bhn_writeout] = bh[c];	//mw
++		bhn_writeout++;	//mw
++	    } else {
++		//mw: DEBUG. buffer is uptodate now. compress will not reread! an get the compressed data!!!
++		// clear flag in extra page!!!
++		// clear_bit(BH_Uptodate, &bh[c]->b_state);
++
++		n = c & ((PAGE_CACHE_SIZE - 1) >> inode->i_sb->
++			 s_blocksize_bits);
++		bp = page_buffers(pg[p]);
++		for (i = 0; i < n; i++) {
++		    bp = bp->b_this_page;
++		}
++		result = ext2_get_block(inode, clu_block0 + c, bp, 0);
++
++		//mw: needed to do a writeback of the non-epg-buffers
++		//no idea how it was done before
++		set_buffer_uptodate(bp);
++		set_buffer_dirty(bp);
++		bh_writeout[bhn_writeout] = bp;	//mw
++		bhn_writeout++;	//mw
++
++		if (bp->b_blocknr == 0) {
++			put_cpu_var(ext2_rd_wa);
++			goto out_err;
++		}
++		assert(bp->b_blocknr == bh[c]->b_blocknr);
++	    }
++	    src += inode->i_sb->s_blocksize;
++	}
++	if (count > inode->i_sb->s_blocksize) {
++		put_cpu_var(ext2_rd_wa);
++		goto out_err;
++	}
++	p = c >> (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
++	if (!PageUptodate(pg[p])) {
++	    addr0 = (clu_block0 << inode->i_sb->s_blocksize_bits);
++	    b_start = addr0 + (c << inode->i_sb->s_blocksize_bits);
++#ifdef  EXT2_COMPR_REPORT_CPR
++	    trace_e2c("\t[%d] sts=0x%x c=%d s=%d sz=%d pg:%lu(%#x)\n", c,
++		      (int) bh[c]->b_state, count, b_start,
++		      (int) inode->i_size, pg[p]->index,
++		      (unsigned int) pg[p]);
++#endif
++	    if (b_start >= inode->i_size) {
++		restore_b_data_himem(bh[c]);
++		memset(bh[c]->b_data, 0, inode->i_sb->s_blocksize);
++
++	    } else {
++		if ((inode->i_size - b_start) < count) {
++		    restore_b_data_himem(bh[c]);
++		    memcpy(bh[c]->b_data, src, inode->i_size - b_start);
++		    memset(bh[c]->b_data + (inode->i_size - b_start), 0,
++			   count - (inode->i_size - b_start));
++		} else {
++		    restore_b_data_himem(bh[c]);
++		    memcpy(bh[c]->b_data, src, count);
++		}
++	    }
++	    set_buffer_uptodate(bh[c]);
++	    set_buffer_dirty(bh[c]);
++	    bh_writeout[bhn_writeout] = bh[c];	//mw
++	    bhn_writeout++;	//mw
++	} else {
++	    assert(epg[p] != NULL);	//mw
++	    n = c & ((PAGE_CACHE_SIZE - 1) >> inode->i_sb->
++		     s_blocksize_bits);
++	    bp = page_buffers(pg[p]);
++	    for (i = 0; i < n; i++) {
++		bp = bp->b_this_page;
++	    }
++	    result = ext2_get_block(inode, clu_block0 + c, bp, 0);
++
++	    //mw: needed to do a writeback of the non-epg-buffers
++	    //no idea how it was done before
++	    set_buffer_uptodate(bp);
++	    set_buffer_dirty(bp);
++	    bh_writeout[bhn_writeout] = bp;	//mw
++	    bhn_writeout++;	//mw
++	    if (bp->b_blocknr == 0) {
++		put_cpu_var(ext2_rd_wa);
++		goto out_err;
++	    }
++	    assert(bp->b_blocknr == bh[c]->b_blocknr);
++	}
++	result = (nbh - 1) * inode->i_sb->s_blocksize + count;
++    }
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++	if (pg[i] == NULL)
++	    break;
++	if (i < d_npg)
++	    SetPageUptodate(pg[i]);
++    }
++
++#ifdef EXT2_COMPR_REPORT_WA
++    printk(KERN_DEBUG "pid %d unlocks wa\n", current->pid);
++#endif
++    put_cpu_var(ext2_rd_wa);
++
++    inode->i_ctime = CURRENT_TIME;
++    mark_inode_dirty_sync(inode);
++    /* If needed, EXT2_DIRTY_FL is raised by the caller. */
++
++#if 0
++    /* TODO: SYNC */
++    if (IS_SYNC(inode)) {
++	generic_osync_inode(inode, inode->i_mapping,
++			    OSYNC_METADATA | OSYNC_DATA);
++    }
++#endif
++    assert(result >= 0);
++
++    //Sync out changes:
++    assert(bhn_writeout <= EXT2_MAX_CLUSTER_BLOCKS);
++    assert(bhn_writeout >= 0);
++
++    //mw: debug
++    for (i = 0; i < bhn_writeout; i++) {
++	if ((!buffer_mapped(bh_writeout[i]))
++	    || (bh_writeout[i]->b_bdev == NULL)) {
++	    u32 block = ext2_cluster_block0(inode, cluster);
++	    ext2_get_block(inode, block + i, bh_writeout[i], 1);
++	    //printk("ext2_get_block Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh_writeout[i]->b_blocknr, (bh_writeout[i]->b_state & BH_Mapped), (bh_writeout[i]->b_page ? bh_writeout[i]->b_page->index : 0), bh_writeout[i]->b_bdev );
++	}
++	assert(buffer_mapped(bh_writeout[i]));
++	assert(bh_writeout[i]->b_bdev != NULL);
++	assert(bh_writeout[i]->b_bdev == inode->i_sb->s_bdev);
++	/*if (bh_writeout[i]->b_bdev == NULL)
++	   bh_writeout[i]->b_bdev = inode->i_sb->s_bdev; //fix bdev-bug */
++    }
++
++    ll_rw_block(WRITE, bhn_writeout, bh_writeout);
++    //mw: seems we have to wait here, otherwise: crash!
++
++    CHECK_NOT_ATOMIC
++    for (i = 0; i < bhn_writeout; i++) {
++	if (bh_writeout[i])
++	    wait_on_buffer(bh_writeout[i]);
++    }
++    goto cleanup;
++
++  out_err:
++    printk("Error in Decompressing cluster: Err=%i\n", result);
++
++  cleanup:
++
++#ifdef CONFIG_HIGHMEM
++    if (kmapped)
++	ext2_kunmap_cluster_pages(NULL, pg, epg);
++#endif
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++	if (pg[i] == NULL)
++	    break;
++	unlock_page(pg[i]);
++	page_cache_release(pg[i]);
++    }
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++	if (epg[i] != NULL) {
++	    ClearPageDirty(epg[i]);
++	    ClearPageUptodate(epg[i]);
++	    try_to_free_buffers(epg[i]);
++	    unlock_page(epg[i]);
++	    assert(page_count(epg[i]) == 1);
++	    page_cache_release(epg[i]);
++	}
++    }
++
++    /*
++     *  Release buffers, don't forget to unlock the locked ones.
++     *  pjm 1998-01-14: TO_DO: Locked ones?
++     */
++    assert(nbh >= 0);
++    assert(nbh <= EXT2_MAX_CLUSTER_BLOCKS);
++#ifdef EXT2_COMPR_REPORT
++    trace_e2c(" < < < ext2_decompress_cluster %d: inode=%ld, res=%i\n",
++	      cluster, inode->i_ino, result);
++#endif
++    return result;
++}
++
++
++/*
++ * Function to decompress the pages of a cluster.
++ *
++ *	Allocate buffers to pages what are not mapped on the device.
++ *
++ *      Returns the size of decompressed data on success, a negative
++ *      value in case of failure, or 0 if some pages are not uptodate.
++ *
++ *      The inode is supposed to be writable.
++ *	All the pages must be UPTODATE, 
++ */
++int ext2_decompress_pages(struct inode *inode, u32 cluster,
++			  struct page *pg[])
++{
++    struct ext2_cluster_head *head;
++    struct buffer_head *bh0;
++    struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS];
++    unsigned nbh, c_nblk;
++    unsigned free_ix, clu_block0, clu_nblocks;
++    int i, pagesPerCluster, data_left, size = 0;
++    long status = 0;
++    char *dp;
++    struct buffer_head *bh_writeout[EXT2_MAX_CLUSTER_BLOCKS];
++    int bhn_writeout;
++#ifdef CONFIG_HIGHMEM
++    int kmapped = 0;
++
++    ext2_kmap_cluster_pages(NULL, pg, NULL);
++    kmapped = 1;
++#endif
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_BLOCKS; i++) {
++	bh_writeout[i] = NULL;
++	bhn_writeout = 0;
++    }
++
++    /* First, get cluster_head (For this, we need to re-read the first block of
++       the cluster, without overwriting the data of the page the buffer point to... */
++    /* This suppose that cluster are aligned with PAGE_SIZE... To be improved */
++
++    /* Changed by Yabo Ding<bobfree_cn@yahoo.com.cn>,<yding@wyse.com>
++       The old code cannot reread data from disk to a changed buffers data pointer in 2.6.x.
++       So, I copy memory data(decompressed) to a temporary buffer;
++       Then reread data(compressed) from disk, and copy to head;
++       Then copy back the memory data from temporary buffer.
++       It seems clumsy, but it works well.
++     */
++
++    bh0 = page_buffers(pg[0]);
++    restore_b_data_himem(bh0);
++
++    head = (struct ext2_cluster_head *) kmalloc(bh0->b_size, GFP_KERNEL);
++    if (head == NULL) {
++	ext2_msg(inode->i_sb, "no more memory", "inode = %lu",
++		     inode->i_ino);
++	status = -EIO;
++	goto out_x;
++    }
++    dp = kmalloc(bh0->b_size, GFP_KERNEL);
++    if (dp == NULL) {
++	ext2_msg(inode->i_sb, "no more memory", "inode = %lu",
++		     inode->i_ino);
++	kfree(head);
++	status = -EIO;
++	goto out_x;
++    }
++    memcpy(dp, bh0->b_data, bh0->b_size);
++    clear_bit(BH_Uptodate, &bh0->b_state);
++    if (!buffer_mapped(bh0)) {
++	status =
++	    ext2_get_block(inode, ext2_cluster_block0(inode, cluster), bh0,
++			   0);
++	if (bh0->b_blocknr == 0) {
++	    trace_e2c
++		("ext2_decompress_pages: ext2_get_block error %ld (cluster = %u)\n",
++		 status, cluster);
++	    kfree(head);
++	    memcpy(bh0->b_data, dp, bh0->b_size);
++	    kfree(dp);
++	    status = -EIO;
++	    goto out;
++	}
++    }
++    ll_rw_block(READ, 1, &bh0);
++
++    CHECK_NOT_ATOMIC
++    wait_on_buffer(bh0);
++    //printk("RE-Read: Buffer: blocknr:%lu(%#x) \n", bh0->b_blocknr, bh0);
++    if (!buffer_uptodate(bh0)) {	/* Read error ??? */
++	trace_e2c("ext2_decompress_pages: IO error (cluster = %u)\n",
++		  cluster);
++	kfree(head);
++	memcpy(bh0->b_data, dp, bh0->b_size);
++	kfree(dp);
++	status = -EIO;
++	goto out;
++    }
++    /* This suppose that cluster are aligned with PAGE_SIZE... To be improved 
++       bh0->b_data = page_address(pg[0]);                                    */
++    memcpy((char *) head, bh0->b_data, bh0->b_size);
++    memcpy(bh0->b_data, dp, bh0->b_size);
++    kfree(dp);
++
++    if (head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X)) {
++	ext2_msg(inode->i_sb,
++		     "ext2_decompress_pages: bad magic number",
++		     "inode = %lu, magic = %#04x", inode->i_ino,
++		     le16_to_cpu(head->magic));
++	kfree(head);
++	status = -EIO;
++	goto out;
++    }
++#ifdef  EXT2_COMPR_REPORT
++    trace_e2c("ext2_decompress_pages: clt=%d i=%ld head=0x%x\n", cluster,
++	      inode->i_ino, (unsigned) head);
++#endif
++
++    /* Now, try to do the same as in ext2_decompress_cluster for moving/allocating blocks */
++    nbh = 0;
++    pagesPerCluster = ext2_cluster_npages(inode, cluster);
++    for (i = 0; i < pagesPerCluster && pg[i]; i++) {
++	assert(PageLocked(pg[i]));
++	//if (!(PageUptodate(pg[i]))) {                 
++	//mw: do it like ext2_decompress_cluster to handle end of a file correctly
++	if (!(PageUptodate(pg[i]))
++	    && (pg[i]->index <= ((inode->i_size - 1) >> PAGE_CACHE_SHIFT))) {
++	    kfree(head);
++	    printk("should never happen: not all pages uptodate!\n");	//mw
++	    status = 0;
++	    goto out_x;
++	}
++    }
++
++    for (i = 0; i < pagesPerCluster && pg[i]; i++) {
++	struct buffer_head *bhead, *bhx;
++	int idx = 0;
++
++	/* assert(PageUptodate(pg[i])); with ftruncate() can be false */
++	if (!page_has_buffers(pg[i])) {
++	    ClearPageUptodate(pg[i]);	/*mw */
++	    ClearPageDirty(pg[i]);	/*mw */
++	    assert(0);
++	    create_empty_buffers_e2c(pg[i], inode->i_sb->s_blocksize, 0,
++				     inode);
++	    if (unlikely(!page_has_buffers(pg[i])))
++		printk("Error: NOMEM!\n");
++	}
++	bhead = page_buffers(pg[i]);
++	for (bhx = bhead; bhx != bhead || !idx; bhx = bhx->b_this_page) {
++	    idx++;
++	    bh[nbh] = bhx;
++	    nbh++;
++	}
++    }
++
++    while ((nbh != 0) && (bh[nbh - 1]->b_blocknr == 0))
++	--nbh;
++
++    c_nblk = nbh;
++
++    free_ix =
++	ext2_calc_free_ix(head->holemap_nbytes, (u8 const *) (&head[1]),
++			  c_nblk);
++    clu_block0 = ext2_cluster_block0(inode, cluster);
++    clu_nblocks = ext2_cluster_nblocks(inode, cluster);
++    ext2_unpack_blkaddrs(inode, bh, 0, head->holemap_nbytes,
++			 (u8 const *) (&head[1]), c_nblk, free_ix,
++			 clu_block0, clu_nblocks);
++
++    /* Allocate the extra blocks needed. */
++    data_left = size = le32_to_cpu(head->ulen);
++
++    data_left -= c_nblk << inode->i_sb->s_blocksize_bits;
++    assert(data_left > 0);
++    for (i = 0; i < free_ix; i++) {
++	if (bh[i]->b_blocknr != 0) {
++#ifdef  EXT2_COMPR_REPORT_CPR
++	    trace_e2c("\t [%d] blk=%ld sts=0x%x\n", i, bh[i]->b_blocknr,
++		      (int) bh[i]->b_state);
++#endif
++	    set_buffer_dirty(bh[i]);
++	    bh_writeout[bhn_writeout] = bh[i];	//mw
++	    bhn_writeout++;	//mw
++	}
++    }
++
++    for (i = free_ix; i < clu_nblocks; i++) {
++	if (((i >> 3) >= head->holemap_nbytes)
++	    || !(head->holemap[i >> 3] & (1 << (i & 7)))) {
++	    status =
++		ext2_get_block(inode, clu_block0 + i, bh[i],
++			       1 /* create */ );
++	    if (status || bh[i]->b_blocknr == 0) {
++		status = -EIO;
++		goto out;
++	    }
++#ifdef  EXT2_COMPR_REPORT_CPR
++	    trace_e2c("\t [%d] blk=%ld sts=0x%x\n", i, bh[i]->b_blocknr,
++		      (int) bh[i]->b_state);
++#endif
++	    set_bit(BH_Uptodate, &bh[i]->b_state);
++	    set_buffer_dirty(bh[i]);
++	    bh_writeout[bhn_writeout] = bh[i];	//mw
++	    bhn_writeout++;	//mw
++	    nbh++;
++	    data_left -= inode->i_sb->s_blocksize;
++	    if (data_left <= 0)
++		break;
++	}
++    }
++
++  out:
++    kfree(head);
++
++  out_x:
++
++    for (i = 0; i < bhn_writeout; i++) {
++
++	if ((!buffer_mapped(bh_writeout[i]))
++	    || (bh_writeout[i]->b_bdev == NULL)) {
++	    u32 block = ext2_cluster_block0(inode, cluster);
++	    ext2_get_block(inode, block + i, bh_writeout[i], 1);
++	    //printk("ext2_get_block Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh_writeout[i]->b_blocknr, (bh_writeout[i]->b_state & BH_Mapped), (bh_writeout[i]->b_page ? bh_writeout[i]->b_page->index : 0), bh_writeout[i]->b_bdev );
++	}
++	assert(buffer_mapped(bh_writeout[i]));
++	assert(bh_writeout[i]->b_bdev != NULL);
++	assert(bh_writeout[i]->b_bdev == inode->i_sb->s_bdev);
++	/*if (bh_writeout[i]->b_bdev == NULL)
++	   bh_writeout[i]->b_bdev = inode->i_sb->s_bdev; //fix bdev-bug */
++    }
++    //Sync out changes:
++    ll_rw_block(WRITE, bhn_writeout, bh_writeout);
++    //mw: seems we have to wait here, otherwise: crash!
++
++    CHECK_NOT_ATOMIC
++    for (i = 0; i < bhn_writeout; i++) {
++	if (bh_writeout[i])
++	    wait_on_buffer(bh_writeout[i]);
++    }
++
++
++#ifdef CONFIG_HIGHMEM
++    if (kmapped)
++	ext2_kunmap_cluster_pages(NULL, pg, NULL);
++#endif
++
++    return (status ? status : size);
++}
++
++
++/* Decompress every cluster that is still compressed.
++   We stop and return -ENOSPC if we run out of space on device.
++
++   The caller needs to check for EXT2_COMPRBLK_FL before calling.
++
++   Returns 0 on success, -errno on failure.
++
++   Called by ext2_ioctl(). */
++int ext2_decompress_inode(struct inode *inode)
++{
++    u32 cluster;
++    u32 n_clusters;
++    int err = 0;
++    struct ext2_inode_info *ei = EXT2_I(inode);
++
++    assert(ei->i_flags & EXT2_COMPRBLK_FL);
++
++    /* Quotas aren't otherwise kept if file is opened O_RDONLY. */
++    dquot_initialize(inode);
++    
++    //mutex_lock(&inode->i_mutex); /* MW 5-16-07 */
++    assert(atomic_read(&inode->i_mutex.count) <= 0);	/* i.e. mutex_lock */
++    err = 0;
++    /* This test can succeed because down() (and I think DQUOT_INIT) can block. */
++    if (!(ei->i_flags & EXT2_COMPRBLK_FL))
++	goto out;
++
++    n_clusters = ext2_n_clusters(inode);
++    for (cluster = 0; cluster < n_clusters; cluster++) {
++	err = ext2_cluster_is_compressed_fn(inode, cluster);
++	if (err > 0) {
++	    err = ext2_decompress_cluster(inode, cluster);
++	    /* If we later get an error, we'll need to recompress. */
++	    ei->i_flags |= EXT2_DIRTY_FL;
++	    ei->i_compr_flags |= EXT2_CLEANUP_FL;
++	}
++	if (err < 0)
++	    goto error;
++    }
++    assert(err >= 0);
++    err = 0;
++    ei->i_flags &= ~(EXT2_COMPRBLK_FL | EXT2_DIRTY_FL);
++    ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
++  error:
++    inode->i_ctime = CURRENT_TIME;
++    mark_inode_dirty_sync(inode);
++  out:
++//      mutex_unlock(&inode->i_mutex); /* MW 5-16-07 */
++    return err;
++}
++
++
++/*
++   TODO: SECRM_FL
++
++   TODO: Avant de liberer les blocs, regarder si le compteur
++   est a 1, et marquer le noeud si ce n'est pas le cas
++   (pour preparer la recompression immediate).        
++
++   pjm fixme translation.
++   "Before freeing the blocks, check if the counter is 1, 
++   and mark the inode if not (in order to prepare for
++   immediate recompression)." */
++
++/* This is called by ext2_compress_cluster to free the blocks now
++   available due to compression.  We free ,nb` blocks beginning with
++   block ,block`.  We set the address of each freed block to
++   EXT2_COMPRESSED_BLKADDR, thus marking the cluster as compressed.
++   N.B. It is up to the caller to adjust i_blocks. */
++
++/* TODO: ext2_truncate() is much more careful than this routine.
++   (E.g. it checks for bh->b_count > 1, and checks for things changing
++   underneath it.  It also calls bforget instead of brelse if it's
++   going to free it.)  Why?  Maybe we should copy it. */
++
++/* effic: Reduce the number of calls to ext2_free_block() the way
++   ext2_trunc_direct() does. */
++
++/* fixme: I think tht we do indeed need to check if buffers are held by
++   somebody else before freeing them. */
++static int ext2_free_cluster_blocks(struct inode *inode, u32 block,
++				    unsigned nb)
++{
++    u32 tmp;
++    struct ext2_bkey key;
++    int err;
++
++/*
++ * whitpa 04 Oct 2004: although it may be true that using e2compr in
++ * conjunction with quotas is a Bad Idea, having quotas enabled for other
++ * filesystems doesn't necessarily mean that the quota feature will actually be
++ * used in this one, so many people find the following assertion very annoying.
++ * I have therefore disabled it.
++ */
++/*	assert (!inode->i_sb->dq_op || (inode->i_flags & S_QUOTA)); */
++    if (!nb)
++	return 0;
++    if (nb > EXT2_MAX_CLU_NBLOCKS) {
++	assert((int) nb >= 0);
++	assert(nb <= EXT2_MAX_CLU_NBLOCKS);
++	return -EDOM;
++    }
++    assert(((block + nb) & 3) == 0);
++    if (!ext2_get_key(&key, inode, block))
++	return -EIO;
++
++    while (nb-- > 0) {
++	tmp = ext2_get_key_blkaddr(&key);
++	err = ext2_set_key_blkaddr(&key, EXT2_COMPRESSED_BLKADDR);
++	if (err)
++	    goto out;
++	if (tmp != 0) {
++	    assert(tmp != EXT2_COMPRESSED_BLKADDR);
++#ifdef EXT2_COMPR_REPORT_ALLOC
++	    printk(KERN_DEBUG "ext2: free %d = (%d) %d:%d:%d:%d : %d\n",
++		   key.block,
++		   key.level,
++		   key.off[0], key.off[1], key.off[2], key.off[3], tmp);
++#endif
++	    ext2_free_blocks(inode, tmp, 1);
++	}
++	if (!ext2_next_key(&key, 1))
++	    break;
++    }
++    err = 0;
++  out:
++    ext2_free_key(&key);
++    return err;
++}
++
++#ifdef EXT2_COMPR_DEBUG
++static unsigned count_bits(unsigned char *p, unsigned nb)
++{
++    u32 x = le32_to_cpu(*(u32 *) p);
++    unsigned n = 0;
++
++    assert(nb <= 4);
++    if (nb != 4)
++	x &= (1 << (nb * 8)) - 1;
++    while (x) {
++	x &= (x - 1);
++	n++;
++    }
++    return n;
++}
++#endif
++
++/*
++ * __remove_compr_assoc_queue is used in invalidate_inode_buffers
++ * replacement code for ext2_compress_cluster(). TLL 02/21/07
++ * Yeah, it is duplicate code, but using it does not require
++ * patching fs/buffer.c/__remove_assoc_queue to export it.
++ * The buffer's backing address_space's private_lock must be held.
++ */
++/*static inline void __remove_compr_assoc_queue(struct buffer_head *bh)
++{
++	list_del_init(&bh->b_assoc_buffers);
++}*/
++
++/* Compress one cluster.  If the cluster uses fewer blocks once
++   compressed, it is stored in place of the original data.  Unused
++   blocks are freed, and the cluster is marked as compressed.
++
++   Returns a negative value on error,
++   0 if the cluster does not compress well,
++   positive if it is compressed (whether it was already compressed
++   or whether we compressed it).
++ 
++   Assume inode is writable.
++ 
++   Called by :
++ 
++         ext2_cleanup_compressed_inode () [i_sem] 
++
++   If ever we acquire new callers, make sure that quotas are
++   initialised, and COMPRBLK is handled correctly (i.e. such
++   that ioctl() can't change the cluster size on us), and that caller
++   tests for ext2_wa==NULL.
++*/
++
++int ext2_compress_cluster(struct inode *inode, u32 cluster)
++{
++    struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS + 1];
++    struct page *pg[EXT2_MAX_CLUSTER_PAGES];
++    int s_nblk;			/* Equals clu_nblocks less any trailing hole blocks. */
++    unsigned u_nblk = (~(unsigned) 0), c_nblk;	/* Number of blocks occupied by
++						   un/compressed data. */
++    int result, n, x;
++    int ulen, maxlen = 0, clen = 0;
++    unsigned char *dst;
++    u8 *src;
++    unsigned meth, alg;
++    int nbh = 0, npg, i;
++    unsigned char holemap_nbytes = 0;
++    unsigned last_hole_pos;
++    struct ext2_cluster_head *head;
++    unsigned r_nblk;
++    struct ext2_inode_info *ei = EXT2_I(inode);
++    unsigned long saved_isize;
++    //int dotrunc = 1; //mw
++
++#ifdef CONFIG_HIGHMEM
++    int kmapped = 0;
++#endif
++
++    /* impl: Otherwise, ioctl() could change the cluster size
++       beneath us. */
++    /* TLL say not compressed and return -1 6-15-07 */
++    if (!(ei->i_flags & EXT2_COMPRBLK_FL))
++	return -1;
++
++    //mw
++    saved_isize = inode->i_size;
++
++    assert(atomic_read(&inode->i_mutex.count) <= 0);	/* i.e. mutex_lock */
++    assert(!mapping_mapped(inode->i_mapping));
++
++    npg = ext2_cluster_npages(inode, cluster);
++
++    result = ext2_get_cluster_pages(inode, cluster, pg, NULL, 1);
++    if (result <= 0)
++	goto done;
++
++#ifdef CONFIG_HIGHMEM
++    ext2_kmap_cluster_pages(NULL, pg, NULL);
++    kmapped = 1;
++#endif
++
++    /* effic: We ought to use the page cache.  Using the page
++       cache always costs extra CPU time, but saves I/O if the
++       page is present.  We still need to detect holes, which
++       unfortunately may still cause I/O.  Testing for all-zero
++       could save us that I/O. */
++
++    nbh = ext2_get_cluster_blocks(inode, cluster, bh, pg, NULL, 1);
++
++    s_nblk = nbh;
++
++#ifdef EXT2_COMPR_REPORT
++    {
++	int i;
++	trace_e2c(" > > > ext2_compress_cluster %d: inode=%ld, size=%d\n",
++		  cluster, inode->i_ino, (int) inode->i_size);
++#ifdef EXT2_COMPR_REPORT_CPR
++	for (i = 0; i < s_nblk; i++) {
++	    if (bh[i]) {
++		printk(KERN_DEBUG
++		       "bbuffer_head[%d]: blocknr=%lu, addr=0x%p ", i,
++		       (unsigned long) bh[i]->b_blocknr, bh[i]);
++		if (bh[i]->b_page)
++		    printk(KERN_DEBUG "bgn:[page->index=%ld]\n",
++			   bh[i]->b_page->index);
++		else
++		    printk(KERN_DEBUG "[No page]\n");
++	    } else
++		printk("bbuffer_head[%d] is NULL\n", i);
++	}
++#endif
++    }
++#endif
++    /*
++     *  Did somebody else compress the cluster while we were waiting ?
++     *  This should never arise ...
++     */
++    result = ext2_cluster_is_compressed_fn(inode, cluster);
++    if (result != 0) {
++	if (result > 0) {
++	    ext2_msg(inode->i_sb,
++			 "ext2_compress_cluster",
++			 "compressing compressed cluster");
++	}
++	goto done;
++    }
++
++    /* I moved it here in case we need to load a module that
++     * needs more heap that is currently allocated.
++     * In such case "init_module" for that algorithm forces
++     * re-allocation of ext2_wa. It should be safe here b/c the
++     * first reference to ext2_wa comes just after and we have 
++     * locked ext2_wa before.
++     *
++     * I know that we may not need the compression at all
++     * (compressing 0 or 1 block) but it's better to sacrifice
++     * a bit than do make a total mess of this code.
++     *
++     * FIXME: Totally separate working areas for reading and writing.
++     *      Jan R.
++     */
++
++    meth = ei->i_compr_method;
++    assert(meth < EXT2_N_METHODS);
++    alg = ext2_method_table[meth].alg;
++#ifdef CONFIG_KMOD
++    if (!ext2_algorithm_table[alg].avail) {
++	char str[32];
++
++	sprintf(str, "ext2-compr-%s", ext2_algorithm_table[alg].name);
++	request_module(str);
++    }
++#endif
++
++    result = -EINTR;
++
++    /*
++     *  Try to get the working area.
++     */
++#ifdef EXT2_COMPR_REPORT_WA
++    printk(KERN_DEBUG "pid %d enters critical region\n", current->pid);
++#endif
++    if (get_cpu_var(ext2_wr_wa) == NULL)
++    {
++	ext2_alloc_wr_wa();
++    }
++    assert(__get_cpu_var(ext2_wr_wa) != NULL);	
++
++
++    /*
++     * Now, we try to compress the cluster.  If the cluster does
++     *    not compress well, we just give up.  Otherwise, we reuse
++     *    the old blocks to store the compressed data (except that
++     *    compressed data is contiguous in the file even if the
++     *    uncompressed data had holes).
++     */
++
++    /*
++     *  Compute the block bitmap, how many bytes of data we have
++     *    in the cluster, and the maximum interesting length after
++     *    compression.  The bitmap will be used to reallocate blocks
++     *    when decompressing the cluster, so that we don't create blocks
++     *    that were previously missing.  We also pack the buffers
++     *    together.
++     */
++
++     head = (struct ext2_cluster_head *) __get_cpu_var(ext2_wr_wa)->c;
++#if EXT2_MAX_CLUSTER_BLOCKS > 32
++# error "We need to zero more bits than this."
++#endif
++    *(u32 *) (&head[1]) = 0;
++    last_hole_pos = (unsigned) (-1);
++    assert(head->holemap[0] == 0);
++    assert(head->holemap[1] == 0);
++    assert(head->holemap[2] == 0);
++    assert(head->holemap[3] == 0);
++    assert(*(u32 *) head->holemap == 0);
++    assert(count_bits(head->holemap, 4) == 0);
++
++    /* TODO: Check that i_size can't change beneath us.
++       do_truncate() is safe because it uses i_sem around changing
++       i_size.  For the moment, I do a runtime check. */
++
++    saved_isize = inode->i_size;
++
++#ifdef EXT2_COMPR_REPORT_VERBOSE
++    printk
++	("00 ext2_compress_cluster[%u]: i_size=%u, s_blocksize_bits=%u, s_nblk=%u\n",
++	 __LINE__, (unsigned) inode->i_size, inode->i_sb->s_blocksize_bits,
++	 s_nblk);
++#endif
++//      assert (ROUNDUP_RSHIFT(inode->i_size, inode->i_sb->s_blocksize_bits)
++//              >= s_nblk);
++    /* This initial guess at ulen doesn't take holes into account
++       unless they're at end of cluster.  We ,compensate for other
++       holes` during the loop below. */
++    ulen = MIN(s_nblk << inode->i_sb->s_blocksize_bits,
++	       inode->i_size - ext2_cluster_offset(inode, cluster));
++    r_nblk = (((ulen - 1) >> inode->i_sb->s_blocksize_bits) + 1);
++    if (r_nblk <= 1) {
++	/* MW: required to remove Z flag, otherwise compress 
++	 * is tried on each access */
++	result = 0;
++	goto no_compress;
++    }
++    /* Verify if more than 1 block to compress in the cluster       */
++    nbh = 0;
++    for (x = 0; x < s_nblk; x++) {
++	if ((bh[x] != NULL) && (bh[x]->b_blocknr != 0)) {
++	    nbh++;
++	} else {
++	    last_hole_pos = x;
++	    head->holemap[x >> 3] |= 1 << (x & 7);
++	    ulen -= inode->i_sb->s_blocksize;
++	    /* impl: We know that it's a whole block because
++	       ext2_get_cluster_blocks trims s_nblk for trailing
++	       NULL blocks, and partial blocks only come at
++	       the end, so there can't be partial NULL blocks. */
++	}
++    }
++    /* We don't try to compress cluster that only have one block
++       or no block at all.  (When fragments are implemented, this code
++       should be changed.) */
++    if (nbh <= 1) {
++	/* MW: required to remove Z flag, otherwise compress 
++	 * is tried on each access */
++	goto no_compress;
++    }
++
++    u_nblk = nbh;
++    /* Copy the data in the compression area        */
++    dst =  __get_cpu_var(ext2_wr_wa)->u;
++    for (x = 0; x < s_nblk; x++) {
++	if ((bh[x] != NULL) && (bh[x]->b_blocknr != 0)) {
++	    restore_b_data_himem(bh[x]);
++	    memcpy(dst, bh[x]->b_data, bh[x]->b_size);
++	    dst += bh[x]->b_size;
++	}
++    }
++
++    assert(count_bits(head->holemap, 4) == s_nblk - u_nblk);
++
++#if EXT2_GRAIN_SIZE != EXT2_MIN_BLOCK_SIZE
++# error "this code ought to be changed"
++#endif
++
++    /* ,maxlen` is the maximum length that the compressed data can
++       be while still taking up fewer blocks on disk. */
++    holemap_nbytes = (last_hole_pos >> 3) + 1;
++    /* impl: Remember that ,last_hole_pos` starts off as being -1,
++       so the high 3 bits of ,last_hole_pos >> 3` can be wrong.
++       This doesn't matter if holemap_nbytes discards the high
++       bits. */
++
++    assert(sizeof(holemap_nbytes) < sizeof(unsigned));
++    assert((last_hole_pos == (unsigned) -1)
++	   == (holemap_nbytes == 0));
++    maxlen =
++	((((r_nblk <
++	    u_nblk) ? r_nblk : u_nblk) - 1) * inode->i_sb->s_blocksize -
++	 sizeof(struct ext2_cluster_head)
++	 - holemap_nbytes);
++    clen = 0;
++    /* Handling of EXT2_AUTO_METH at the moment is just that we
++       use the kernel default algorithm.  I hope that in future
++       this can be extended to the kernel deciding when to
++       compress and what algorithm to use, based on available disk
++       space, CPU time, algorithms currently used by the fs,
++       etc. */
++    if ((meth == EXT2_AUTO_METH)
++	|| !ext2_algorithm_table[alg].avail) {
++	meth = EXT2_DEFAULT_COMPR_METHOD;
++	alg = ext2_method_table[meth].alg;
++	assert(ext2_algorithm_table[alg].avail);
++    }
++    if (alg == EXT2_NONE_ALG)
++	goto no_compress;
++
++    clen = ext2_algorithm_table[alg].compress(__get_cpu_var(ext2_wr_wa)->u,
++	 __get_cpu_var(ext2_wr_wa)->c + sizeof(struct ext2_cluster_head) + holemap_nbytes,
++	 __get_cpu_var(ext2_wr_wa)->heap, ulen, maxlen, ext2_method_table[meth].xarg);
++
++#ifdef EXT2_COMPR_REPORT_ALGORITHMS
++    printk(KERN_DEBUG "03 ext2: %lu: cluster %d+%d [%d] compressed "
++	   "into %d bytes (ulen = %d, maxlen = %d)\n",
++	   inode->i_ino,
++	   ext2_cluster_offset(inode, cluster),
++	   ext2_cluster_nblocks(inode, cluster),
++	   u_nblk, clen, ulen, maxlen);
++#endif
++
++    if ((clen == 0) || (clen > maxlen)) {
++      no_compress:
++
++	/* this chunk didn't compress. */
++	assert(inode->i_size == saved_isize);
++#ifdef EXT2_COMPR_REPORT_WA
++	printk(KERN_DEBUG
++	       "pid %d leaves critical region, nbh=%d, u_nblk=%d, "
++	       "inode->i_size=%lu, saved_isize=%lu, clen=%d, ulen=%d, maxlen=%d\n",
++	       current->pid, nbh, u_nblk,
++	       (long unsigned) inode->i_size, saved_isize, clen, ulen,
++	       maxlen);
++#endif
++
++	result = 0;
++	put_cpu_var(ext2_wr_wa);
++	goto done;
++    }
++
++
++#if EXT2_MAX_CLUSTER_BLOCKS > 32
++# error "We need to zero more bits than this."
++#endif
++    assert(-1 <= (int) last_hole_pos);
++    assert((int) last_hole_pos < 32);
++    assert((le32_to_cpu(*(u32 *) head->holemap)
++	    & (~0u << (1 + last_hole_pos))
++	    & (~(~0u << (8 * holemap_nbytes))))
++	   == 0);
++    /* Don't change "~0u << (1 + last_hole_pos)" to "~1u << last_hole_pos" 
++       as I almost did, as last_hole_pos can be -1 and cannot be 32. */
++    assert(count_bits(head->holemap, holemap_nbytes) == s_nblk - u_nblk);
++
++    /* Compress the blocks at the beginning of the cluster  */
++    for (x = 0, nbh = 0; x < s_nblk; x++) {
++	if ((bh[x] != NULL) && (bh[x]->b_blocknr != 0)) {
++	    if (nbh != x) {
++		restore_b_data_himem(bh[x]);
++		bh[nbh]->b_blocknr = bh[x]->b_blocknr;
++		set_bit(BH_Mapped, &bh[nbh]->b_state);
++		bh[x]->b_blocknr = 0;
++		assert(buffer_mapped(bh[x]));
++		clear_bit(BH_Mapped, &bh[x]->b_state);
++	    }
++	    nbh++;
++	}
++    }
++    assert(nbh == u_nblk);
++    assert(count_bits(head->holemap, holemap_nbytes) == s_nblk - u_nblk);
++
++    /*
++     * Compression was successful, so add the header and copy to blocks.
++     */
++
++    /* Header. */
++    {
++	head->magic = cpu_to_le16(EXT2_COMPRESS_MAGIC_04X);
++	head->method = meth;
++	head->holemap_nbytes = holemap_nbytes;
++	head->ulen = cpu_to_le32(ulen);
++	head->clen = cpu_to_le32(clen);
++
++	barrier(); //mw: "barrier" tells compiler not to re-order resulting asm statments, somehow.
++	head->checksum =
++	    cpu_to_le32(ext2_adler32
++			(le32_to_cpu(*(u32 *) __get_cpu_var(ext2_wr_wa)->c),
++			 __get_cpu_var(ext2_wr_wa)->c + 8,
++			 (sizeof(struct ext2_cluster_head) - 8 +
++			  head->holemap_nbytes + clen)));
++    }
++
++    assert((le32_to_cpu(*(u32 *) head->holemap)
++	    & (~0 << (1 + last_hole_pos))
++	    & ((1 << (8 * holemap_nbytes)) - 1)) == 0);
++    result = clen += sizeof(struct ext2_cluster_head) + holemap_nbytes;
++    c_nblk = ROUNDUP_RSHIFT(clen, inode->i_sb->s_blocksize_bits);
++
++    /* Release unneeded buffer heads.  (Freeing is done later,
++       after unlocking ext2_wr_wa.) */
++    assert(nbh == u_nblk);
++    nbh = c_nblk;
++
++#ifdef  EXT2_COMPR_REPORT
++    trace_e2c("ext2_compress_cluster: head->clen=%d, clen=%d\n", head->clen, clen);
++#endif
++    src = __get_cpu_var(ext2_wr_wa)->c;
++
++    for (n = 0; (int) clen > 0; n++) {
++	restore_b_data_himem(bh[n]);
++	if (clen >= inode->i_sb->s_blocksize) {
++	    memcpy(bh[n]->b_data, src, inode->i_sb->s_blocksize);
++	} else {
++	    memcpy(bh[n]->b_data, src, clen);
++	}
++
++	/* TO_DO: OSYNC.  means: write opertions are blocking until the
++	 * the pages are written from page cache to disk */
++
++	set_buffer_uptodate(bh[n]);
++	set_buffer_dirty(bh[n]);
++	src += inode->i_sb->s_blocksize;
++	clen -= inode->i_sb->s_blocksize;
++    }
++
++    i = 0;
++    assert(n == c_nblk);
++    assert((le32_to_cpu(*(u32 *) head->holemap)
++	    & (~0 << (1 + last_hole_pos))
++	    & ((1 << (8 * holemap_nbytes)) - 1)) == 0);
++
++    /* Runtime check that no-one can change i_size while i_sem is down.
++       (See where saved_isize is set, above.) */
++    assert(inode->i_size == saved_isize);
++    assert(!mapping_mapped(inode->i_mapping));
++
++    /* Free the remaining blocks, and shuffle used blocks to start
++       of cluster in blkaddr array. */
++    {
++	u32 free_ix, curr;
++	int err;
++
++	/* Calculate free_ix.  There should be ,c_nblk`
++	   non-hole blocks among the first ,free_ix`
++	   blocks. */
++	{
++	    assert((le32_to_cpu(*(u32 *) head->holemap)
++		    & (~0 << (1 + last_hole_pos))
++		    & ((1 << (8 * holemap_nbytes)) - 1)) == 0);
++	    assert(n == c_nblk);
++	    for (free_ix = 0;
++		 ((int) free_ix <= (int) last_hole_pos) && (n > 0);
++		 free_ix++)
++		if (!(head->holemap[free_ix >> 3]
++		      & (1 << (free_ix & 7))))
++		    n--;
++	    free_ix += n;
++
++	    if ((free_ix < c_nblk)
++		|| (free_ix + u_nblk > s_nblk + c_nblk)
++		|| (free_ix >= ext2_cluster_nblocks(inode, cluster))
++		|| ((holemap_nbytes == 0) && (c_nblk != free_ix))) {
++		assert(free_ix >= c_nblk);
++		/*assert (free_ix - c_nblk <= s_nblk - u_nblk); */
++		assert(free_ix + u_nblk <= s_nblk + c_nblk);
++		assert(free_ix < ext2_cluster_nblocks(inode, cluster));
++		assert((holemap_nbytes != 0) || (c_nblk == free_ix));
++		assert(1 <= c_nblk);
++		assert(c_nblk < u_nblk);
++		assert(u_nblk <= s_nblk);
++		assert(s_nblk <= ext2_cluster_nblocks(inode, cluster));
++		assert(ext2_cluster_nblocks(inode, cluster) <=
++		       EXT2_MAX_CLU_NBLOCKS);
++		ext2_error(inode->i_sb, "ext2_compress_cluster",
++			   "re assertions: c=%d, u=%d, f=%d, s=%d, n=%d, "
++			   "lhp=%d, hm=%x, hnb=%d, " "ino=%lu, clu=%u",
++			   (int) c_nblk, (int) u_nblk, (int) free_ix,
++			   (int) s_nblk, (int) ext2_cluster_nblocks(inode,
++								    cluster),
++			   (int) last_hole_pos,
++			   (unsigned) le32_to_cpu(*(u32 *) head->holemap),
++			   (int) holemap_nbytes, inode->i_ino, cluster);
++	    }
++	}
++        
++	/*mw: put here: set all __get_cpu related pointers to NULL
++	      as they become invalid with put_cpu */
++    	head = NULL;		/* prevent any more stupid bugs */
++    	src = NULL;
++        dst = NULL;
++    	put_cpu_var(ext2_wr_wa);
++
++#ifdef EXT2_COMPR_DEBUG
++	/* TODO: remove this TEST */
++        /* mw: ext2_free_cluster_blocks can sleep: check we are not atomic */
++	schedule();
++#endif
++
++	/* Free unneeded blocks, and mark cluster as
++	   compressed. */
++	err = ext2_free_cluster_blocks
++	    (inode,
++	     ext2_cluster_block0(inode, cluster) + free_ix,
++	     ext2_cluster_nblocks(inode, cluster) - free_ix);
++	/* pjm 1998-06-15: This should help reduce fragmentation.
++	   Actually, we could set block to clu_block0 + clu_nbytes,
++	   and goal to the last allocated blkaddr in the compressed
++	   cluster.
++	   It would be nice if we would transfer the freed blocks
++	   to preallocation, while we're at it. */
++//              write_lock(&ei->i_meta_lock);
++	/* mw: i_next_alloc_goal and i_next_alloc_block were removed in 2.6.24.x
++	 *     so we dont need to set them to 0 (they are anyway, somehow).
++	 */
++	//ei->i_next_alloc_goal = ei->i_next_alloc_block = 0;
++//              write_unlock(&ei->i_meta_lock);
++	if (err < 0) {
++		goto done;
++	}
++	/* Note that ext2_free_cluster_blocks() marks the
++	   cluster as compressed. */
++
++	/* Shuffle used blocks to beginning of block-number array. */
++	{
++	    struct ext2_bkey key;
++	    unsigned i;
++
++	    if (!ext2_get_key(&key,
++			      inode,
++			      ext2_cluster_block0(inode, cluster))) {
++		ei->i_flags |= EXT2_ECOMPR_FL;
++		result = -EIO;
++		free_ix = 0;
++	    }
++	    for (i = 0; i < free_ix; i++) {
++		curr = ext2_get_key_blkaddr(&key);
++
++		if ((c_nblk == free_ix)
++		    && (curr != bh[i]->b_blocknr)) {
++		    /* "Can't happen", yet has
++		       happened a couple of times. */
++		    ext2_error(inode->i_sb, "ext2_compress_cluster",
++			       "c_nblk=free_ix=%d, "
++			       "curr=%u, b_blocknr=%lu, "
++			       "lhp=%d , hm=<noinfo>, "
++			       "ino=%lu, blk=%u",
++			       c_nblk, curr,
++			       (unsigned long) bh[i]->b_blocknr,
++			       (int) last_hole_pos,
++			       /*mw: became invalid due put_cpu:
++				(unsigned) le32_to_cpu(*(u32 *) head->
++						      holemap),*/
++			       inode->i_ino,
++			       (unsigned) 
++			       ext2_cluster_block0(inode, cluster) + i);
++		}
++		err = ext2_set_key_blkaddr(&key,
++					   (i < c_nblk
++					    ? bh[i]->b_blocknr
++					    : EXT2_COMPRESSED_BLKADDR));
++		if (err)
++		    break;
++		if (!ext2_next_key(&key, 1)) {
++		    ei->i_flags |= EXT2_ECOMPR_FL;	/* sorry... */
++		    result = -EIO;
++		    break;
++		}
++	    }
++	    ext2_free_key(&key);
++	}
++    }
++
++    /*
++     *        Unlock the working area.
++     */
++
++#ifdef EXT2_COMPR_REPORT_WA
++    printk(KERN_DEBUG "pid %d leaves critical region\n", current->pid);
++#endif
++
++    assert(c_nblk < u_nblk);
++    ext2_mark_algorithm_use(inode, alg);
++
++    /* TLL update b_assoc_map per 2.6.20 6-07-07 */
++    for (i = 0; i < c_nblk; i++)
++	if (bh[i] != NULL) {
++	    bh[i]->b_assoc_map = inode->i_mapping;
++	    bh[i]->b_page->mapping = inode->i_mapping;	//Andreas 5-24-07 : necessary? WRONG?
++	}
++    //mw: we must force the writeback, otherwise ext2_readpage will get confused
++    //    yaboo ding had similiar code above. but I think it makes more sense after
++    //    the block shuffeling.
++    //    Note: generic_oysnc_inode() made trouble with USB-Sticks and caused a lot
++    //    of IO, stalled system ... therefore ll_rw_block() replace it. Anyway we already operate 
++    //        with this low-level function. 
++
++    /*mw: new "hole" fix. hole == bdev bug! */
++    for (i = 0; i < c_nblk; i++) {
++
++	/* this was a hole (uncompressed)
++	 * at the beginning of the cluster.
++	 * so NO block was yet associated with it.
++	 * But now we need it, because a compressed
++	 * cluster always starts at the cluster.*/
++	if (!buffer_mapped(bh[i]) || bh[i]->b_bdev == NULL) {
++	    u32 block = ext2_cluster_block0(inode, cluster);
++	    ext2_get_block(inode, block + i, bh[i], 1);
++	    //printk("ext2_get_block Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh[i]->b_blocknr, (bh[i]->b_state & BH_Mapped), (bh[i]->b_page ? bh[i]->b_page->index : 0), bh[i]->b_bdev );
++	}
++	assert(buffer_mapped(bh[i]));
++	assert(bh[i]->b_bdev != NULL);
++	assert(bh[i]->b_bdev == inode->i_sb->s_bdev);
++    }
++
++    ll_rw_block(WRITE, c_nblk, bh);
++
++    CHECK_NOT_ATOMIC
++    //mw: seems we have to wait here, otherwise: crash!
++    for (i = 0; i < c_nblk; i++) {
++	if (bh[i])
++	    wait_on_buffer(bh[i]);
++	//printk("written compressed block: Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh[i]->b_blocknr, (bh[i]->b_state & BH_Mapped), (bh[i]->b_page ? bh[i]->b_page->index : 0), bh[i]->b_bdev );
++    }
++
++
++#ifdef CONFIG_HIGHMEM
++    if (kmapped)
++	ext2_kunmap_cluster_pages(NULL, pg, NULL);
++#endif
++
++    inode->i_ctime = CURRENT_TIME;	//mw: these two come always together. So I also put it here.
++    mark_inode_dirty_sync(inode);
++
++    //ext2_update_inode(inode, inode_needs_sync(inode)); //mw: might be able to fix pipe_write vs. readpage. mutex-rec-locking
++
++    /* COMPRBLK is already high, so no need to raise it. */
++    {
++	for (i = c_nblk; (i < EXT2_MAX_CLUSTER_BLOCKS) && (bh[i] != NULL);
++	     i++) {
++	    clear_buffer_dirty(bh[i]);
++	    bh[i]->b_blocknr = 0;
++	    clear_bit(BH_Mapped, &bh[i]->b_state);
++	    clear_bit(BH_Uptodate, &bh[i]->b_state);
++	}
++	for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++	    if (pg[i] == NULL) {
++		break;
++	    }
++	    assert(PageLocked(pg[i]));
++	    ClearPageUptodate(pg[i]);
++	    unlock_page(pg[i]);
++	    page_cache_release(pg[i]);
++	}
++
++	/* invalidate_inode_buffers replacement code: TLL 02/21/07
++	 * e2compr on post 2.6.10 kernels do not have an uptodate
++	 * mapping->assoc_mapping (other Vm(?) changes require it be
++	 * made explicit, 2.4 kernels have it implicit). Therefore, when
++	 * umount is called, a GPF ensues from a NULL ops pointer.
++	 * e2c on a USB thumbdrive mounted as the root fs does not
++	 * support repeated compress/uncompress cycles on a given file.
++	 * Inlined the flush list code to explicityly force update to
++	 * disk with a known valid bh list.
++	 */
++
++	/* mw: I consider this code as ... not so good! */
++	/*	  
++	  if (inode_has_buffers(inode)) {
++		//struct address_space *mapping = &inode->i_data;
++		// struct address_space *buffer_mapping = mapping->assoc_mapping;
++		// requires: inode->i_data->mapping->assoc_mapping; to be set
++		invalidate_inode_buffers(inode);	// TLL do it proper 5-25-07
++		//if (dotrunc)
++		 //ext2_truncate(inode);	// TLL file size hack 6-19-07 
++	  }
++	*/
++
++    }
++#ifdef  EXT2_COMPR_REPORT
++    trace_e2c(" < < < ext2_compress_cluster %i: [done cpr] inode=%ld\n", cluster, inode->i_ino);
++#endif
++    return result;
++
++
++  done:
++
++#ifdef CONFIG_HIGHMEM
++    if (kmapped)
++	ext2_kunmap_cluster_pages(NULL, pg, NULL);
++#endif
++
++    {
++	for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++	    if (pg[i] == NULL) {
++		break;
++	    }
++	    unlock_page(pg[i]);
++	    page_cache_release(pg[i]);
++	}
++	/* TLL cp to compr dir bug fix 03-25-07
++	   Truncate uncompressed files to their uncompressed
++	   length, i.e. force kernel to update inode and sb */
++	//if(dotrunc)
++	//26.08.2011: ext2_truncate(inode) does not exist anymore
++	ext2_truncate_blocks(inode, inode->i_size);
++	
++    }
++#ifdef EXT2_COMPR_REPORT_VERBOSE
++    {
++	int i;
++
++	printk(KERN_DEBUG "ext2_compress_cluster[end]: buffers kept for cluster=%d\n", cluster);
++	for (i = 0; i < nbh; i++) {
++	    if (bh[i]) {
++		printk(KERN_DEBUG "2buffer_head[%d]: blocknr=%lu, addr=0x%p ", i, (unsigned long) bh[i]->b_blocknr, bh[i]);
++		if (bh[i]->b_page)
++		    printk(KERN_DEBUG "2:[page->index=%ld]\n", bh[i]->b_page->index);
++		else
++		    printk(KERN_DEBUG "[No page]\n");
++	    } else
++		printk(KERN_DEBUG "buffer_head[%d] is NULL\n", i);
++	}
++    }
++#endif
++
++#ifdef  EXT2_COMPR_REPORT
++    trace_e2c(" < < < ext2_compress_cluster %i: [done NO cpr] inode=%ld\n", cluster, inode->i_ino);
++#endif
++    return result;
++}
++
++
++/* Go through all the clusters and compress them if not already
++   compressed.
++
++   This is called by ext2_put_inode() and ext2_release_file().  Later,
++   we may have ext2_ioctl() call it (when EXT2_COMPR_FL rises).  None
++   of the callers does any locking, so we do it here.
++
++   Neither of the current callers uses the return code, but we get ready
++   for if we start using it.
++
++   Returns 0 on "success" (whether or not we cleared EXT2_CLEANUP_FL
++   or EXT2_DIRTY_FL bits), -errno on error. */
++int ext2_cleanup_compressed_inode(struct inode *inode)
++{
++    u32 cluster;
++    u32 n_clusters;
++    int dirty = 0;
++    int err = 0;
++    u32 comprblk_mask;
++    atomic_t start_i_count = inode->i_count;
++    int retry = 0;
++    int have_downed;
++    struct ext2_inode_info *ei = EXT2_I(inode);
++#ifdef EXT2_COMPR_REPORT
++    char bdn[BDEVNAME_SIZE];
++#endif
++
++    /* impl: Actually, this assertion could fail if the kernel
++       isn't locked.  I haven't looked, but I suppose that the
++       kernel always is locked when this is called. */
++    assert(ei->i_compr_flags & EXT2_CLEANUP_FL);
++
++#ifdef EXT2_COMPR_REPORT_PUT
++    printk(KERN_DEBUG "ext2_cleanup_compressed_inode() called for pid %d; "
++	   "dev=%s, ino=%lu, i_state=0x%lx, i_count=%u\n",
++	   current->pid, bdevname(inode->i_sb->s_bdev, bdn), inode->i_ino,
++	   inode->i_state, atomic_read(&inode->i_count));
++#endif
++
++    /* Do these tests twice: once before down() and once after. */
++    for (have_downed = 0;; have_downed++) {
++	if ((ei->i_flags & (EXT2_COMPR_FL | EXT2_DIRTY_FL))
++	    != (EXT2_COMPR_FL | EXT2_DIRTY_FL)) {
++	    if (have_downed)
++		goto out;
++	    /* TLL 5-25-07 changed from a warning to trace */
++	    /*trace_e2c("ext2_cleanup_compressed_inode: trying to un/compress an "
++	       "uncompressable file.\n"
++	       "i_flags=%#x. (dev=%s, ino=%lu, down=%d)\n",
++	       ei->i_flags, bdevname(inode->i_sb->s_bdev, bdn), 
++	       inode->i_ino, have_downed); */
++	    return 0;
++	}
++
++	/* test if file is mapped by mmap */
++	if (mapping_mapped(inode->i_mapping))
++	{
++	    //trace_e2c("ext2_cleanup_compressed_inode: (dev. %s): ino=%ld: file mapped, does not compress cluster\n", bdevname(inode->i_sb->s_bdev, bdn), inode->i_ino);
++	    if (have_downed)
++		goto out;
++	    else
++		return 0;
++	}
++
++	if (IS_RDONLY(inode)
++	    || (ei->i_flags & EXT2_ECOMPR_FL)) {
++	    ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
++	    if (have_downed)
++		goto out;
++	    else
++		return 0;
++	}
++
++	//mw            
++	if (ext2_get_dcount(inode) > 1) {
++	    err = 0;
++	    //printk("Compress: file busy (dcount: %i>1)\n", ext2_get_dcount(inode));
++	    if (have_downed)
++		goto out;
++	    else
++		return 0;
++	}
++
++	if (have_downed)
++	    break;
++
++	/* Quotas aren't otherwise kept if file is opened O_RDONLY. */
++	dquot_initialize(inode);
++	
++	/* Check whether OSYNC of inode is acutally running */
++	//if (ei->i_compr_flags & EXT2_OSYNC_INODE)
++	//printk(KERN_DEBUG "OSYNC!\n");
++
++	/* I think:
++	 * checking these flags should prevent that one Process aquires the MUTEX again, 
++	 * e.g. in a recursive call
++	 * BUT: what happens acutally: two processes are working on this inode: pdflush and the userprogramm
++	 * SO: the check might be correct if:  ei->i_compr_flags & EXT2_OSYNC_INOD AND the same process already posesses this lock!!!
++	 */
++	//if (!(ei->i_compr_flags & EXT2_OSYNC_INODE))
++	//{
++	mutex_lock(&inode->i_mutex);
++#ifdef EXT2_COMPR_REPORT_MUTEX
++	printk(KERN_DEBUG "CLEANUP_LOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino);
++#endif
++	//}
++    }
++    n_clusters = ext2_n_clusters(inode);
++
++#ifdef EXT2_COMPR_REPORT_PUT
++    printk(KERN_DEBUG "ext2: inode:%lu: put compressed, clusters = %d, flags = %x, pid = %u\n",
++	   inode->i_ino, n_clusters, ei->i_flags, current->pid);
++#endif
++
++    assert(atomic_read(&inode->i_mutex.count) <= 0);	/* i.e. mutex_lock */
++
++    /* Try to compress the clusters.  We clear EXT2_DIRTY_FL only
++       if we looked at every cluster and if there was no error.  */
++
++    /* impl: We raise EXT2_COMPRBLK_FL now so that ext2_ioctl()
++       doesn't try to change the cluster size beneath us.  If need
++       be, we restore the bit to its original setting before
++       returning.  Note that no-one else can _change_
++       EXT2_COMPRBLK_FL while we work because i_sem is down. */
++    /* impl: Note what's happening here with comprblk_mask.  The
++       current state of COMPRBLK_FL (before we start) is that
++       (comprblk == 1) || (no compressed clusters).  At the end of
++       the procedure, comprblk == one if (at least one compressed
++       cluster, or an error occurred preventing us from finding
++       out). */
++    comprblk_mask = ~EXT2_COMPRBLK_FL | ei->i_flags;
++    ei->i_flags |= EXT2_COMPRBLK_FL;
++
++    for (cluster = 0; cluster < n_clusters; cluster++) {
++	if (atomic_read(&inode->i_count) > atomic_read(&start_i_count)) {
++	    /* This is a poor way of doing this (and doubly
++	       poor now that the only users of i_count are
++	       the dentries), but the idea is not to
++	       compress things tht are likely to be
++	       decompressed soon.  I guess a better way of
++	       doing this would be just to make sure tht
++	       the stuff is in the page cache. */
++	    retry = 1;
++	    break;
++	}
++	err = ext2_cluster_is_compressed_fn(inode, cluster);
++	if (err == 0) {
++	    //mw: ext2_compress_cluster might clean EXT2_COMPRBLK_FL, therefore raise it for every new cluster
++	    ei->i_flags |= EXT2_COMPRBLK_FL;
++
++	    err = ext2_compress_cluster(inode, cluster);
++	    if (err < 0)
++		dirty = 1;
++	    else if (err > 0)
++		comprblk_mask = ~0ul;
++	} else if (err < 0)
++	    break;
++	else {
++	    err = 0;
++	    assert(comprblk_mask == ~0ul);	/* i.e. that EXT2_COMPRBLK_FL was high. */
++	}
++    }
++
++    if ((cluster >= n_clusters) && !dirty)
++	ei->i_flags &= ~EXT2_DIRTY_FL;
++    if (!retry) {
++	ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
++	ei->i_flags &= comprblk_mask;
++    }
++
++    /* We clear EXT2_CLEANUP_FL because, otherwise, we'll get
++       called again almost immediately. */
++
++    /*
++     *  The CLEANUP flag *MUST* be cleared, otherwise the iput routine
++     *  calls ext2_put_inode() again (because i_dirt is set) and there
++     *  is a loop.  The control scheme (CLEANUP + DIRTY flags) could 
++     *  probably be improved.  On the other hand, i_dirt MUST be set
++     *  because we may have sleeped, and we must force the iput routine
++     *  to look again at the i_count ...
++     */
++    /* TODO: Have a look at this cleanup scheme.  The above
++       comment sounds wrong. */
++
++    inode->i_ctime = CURRENT_TIME;
++    mark_inode_dirty_sync(inode);
++  out:
++
++#ifdef EXT2_COMPR_REPORT_MUTEX
++    printk(KERN_DEBUG "CLEANUP_UNLOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino);
++#endif
++
++//      if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) {  /* MW 5-16-07 */
++    mutex_unlock(&inode->i_mutex);
++//      }       /* MW 5-16-07 */                  
++    return err;			/* TODO: Check that ,err` is appropriate. */
++}
++
++
++int ext2_recognize_compressed(struct inode *inode, unsigned cluster)
++{
++    /* ext2_recognize_compressed(): Check tht the cluster is valid
++       in every way, and then do the EXT2_COMPRESSED_BLKADDR
++       thing. */
++    /* nyi, fixme.  All of the userspace stuff (EXT2_NOCOMPR_FL
++       etc.) needs work, so I might as well leave this.  See
++       ioctl.c for a description of what it's supposed to do. */
++    return -ENOSYS;
++}
++
++
++/* Look for compressed clusters.  If none, then clear EXT2_COMPRBLK_FL.
++
++   Called by:
++       ext2_truncate().
++       */
++void ext2_update_comprblk(struct inode *inode)
++{
++    unsigned block, last_block;
++    struct ext2_bkey key;
++    struct ext2_inode_info *ei = EXT2_I(inode);
++
++    assert(ei->i_flags & EXT2_COMPRBLK_FL);
++    if (inode->i_size == 0) {
++	ei->i_flags &= ~EXT2_COMPRBLK_FL;
++	trace_e2c("ext2_update_comprblk 1: inode: %lu removed EXT2_COMPRBLK_FL!\n", inode->i_ino);
++	return;
++    }
++    last_block = ROUNDUP_RSHIFT(inode->i_size,
++				inode->i_sb->s_blocksize_bits) - 1;
++    block = ext2_first_cluster_nblocks(inode) - 1;
++
++    assert(atomic_read(&inode->i_mutex.count) <= 0);
++
++    if (!ext2_get_key(&key, inode, block))
++	return;
++    for (;;) {
++	if (ext2_get_key_blkaddr(&key) == EXT2_COMPRESSED_BLKADDR)
++	    goto out;
++	if (block >= last_block)
++	    goto clear;
++	if (!ext2_next_key(&key, ei->i_clu_nblocks))
++	    goto out;
++	block += ei->i_clu_nblocks;
++    }
++  clear:
++    trace_e2c("ext2_update_comprblk 2: inode: %lu removed EXT2_COMPRBLK_FL!\n", inode->i_ino);
++    ei->i_flags &= ~EXT2_COMPRBLK_FL;
++  out:
++    ext2_free_key(&key);
++    assert(atomic_read(&inode->i_mutex.count) <= 0);
++
++}
++
++
++/*
++ * allocate working areas
++ */
++
++DEFINE_PER_CPU(struct ext2_wa_S *, ext2_rd_wa) = NULL;
++DEFINE_PER_CPU(struct ext2_wa_S *, ext2_wr_wa) = NULL;
++
++/* SMP, setup wa's. caller must hold wa already via get_cpu_var */
++void ext2_alloc_rd_wa(){
++	if ((__get_cpu_var(ext2_rd_wa) == NULL) ) {
++		size_t rsize =  2 * EXT2_MAX_CLUSTER_BYTES; //mw: just guessing
++
++		__get_cpu_var(ext2_rd_wa) = vmalloc (rsize);
++		if (__get_cpu_var(ext2_rd_wa) == NULL)
++			printk ("EXT2-fs: can't allocate working area; compression turned off.\n");
++		else {
++			printk ("ext2-compression: allocated read buffer for CPU%i at %p-%p (%zu bytes)\n",
++				get_cpu(), __get_cpu_var(ext2_rd_wa), (char *)__get_cpu_var(ext2_rd_wa) + rsize, rsize);
++#  ifdef EXT2_COMPR_REPORT_WA
++			printk (KERN_INFO "EXT2-fs: rd_wa=%p--%p (%d)\n",
++				ext2_rd_wa, (char *)ext2_rd_wa + rsize, rsize);
++#  endif
++			put_cpu();
++		}
++	}
++}
++
++void ext2_alloc_wr_wa(){
++
++	if ((__get_cpu_var(ext2_wr_wa) == NULL) ) {
++		size_t wsize = 2 * EXT2_MAX_CLUSTER_BYTES; //mw: just guessing
++		__get_cpu_var(ext2_wr_wa) = vmalloc (wsize);
++
++		if (__get_cpu_var(ext2_wr_wa) == NULL)
++			printk ("EXT2-fs: can't allocate working area; "
++				"compression turned off.\n");
++		else {
++			printk ("ext2-compression: allocated write buffer for CPU%i at %p-%p (%zu bytes)\n",
++				get_cpu(), __get_cpu_var(ext2_wr_wa), (char *)__get_cpu_var(ext2_wr_wa) + wsize, wsize);
++#ifdef EXT2_COMPR_REPORT_WA
++			printk (KERN_INFO "EXT2-fs: wr_wa=%p--%p (%d)\n",
++				ext2_wr_wa, (char *)ext2_wr_wa + wsize, wsize);
++#endif
++			put_cpu();	
++		}
++	}
++}
++
++
+--- linux-3.2-rc5/fs/ext2/e2zlib.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/ext2/e2zlib.c	2011-12-13 14:22:47.841975843 +0100
+@@ -0,0 +1,74 @@
++
++#include <linux/string.h>
++#include <linux/types.h>
++#include <linux/fs.h>
++#include <linux/ext2_fs_c.h>
++#include <linux/module.h>
++#include <linux/crypto.h>
++#include <linux/zlib.h>
++#include <linux/vmalloc.h>
++
++static DEFINE_PER_CPU(struct crypto_comp *, tfm) = NULL;
++
++size_t ext2_iZLIB(int action)
++{
++	/*mw: we init tfm when we need it...*/
++	return 0;
++}
++
++
++size_t ext2_wZLIB(__u8 * ibuf, __u8 * obuf, void *heap,
++		  size_t ilen, size_t olen, int level)
++{
++    int ret, dlen;
++  
++    if (!try_module_get(THIS_MODULE))
++	return 0;
++    
++    /*check if we already have a tfm*/    
++    get_cpu_var(tfm);
++    if (__get_cpu_var(tfm) == NULL){
++	 __get_cpu_var(tfm) = crypto_alloc_comp("deflate", 0, CRYPTO_ALG_ASYNC);
++     }
++    assert(__get_cpu_var(tfm) != NULL);			
++    
++    dlen = olen;
++    ret = crypto_comp_compress(__get_cpu_var(tfm) , ibuf, ilen, obuf, &dlen);
++
++    put_cpu_var(tfm);
++
++    if (ret) {
++	//printk(KERN_DEBUG "ext2_wZLIB: crypto_comp_compress failed: %d, ilen: %d, olen: %d\n", ret, ilen, olen);
++	return 0;
++    }
++    return dlen;
++}
++
++
++size_t ext2_rZLIB(__u8 * ibuf, __u8 * obuf, void *heap,
++		  size_t ilen, size_t olen, int ignored)
++{
++    int ret, dlen;
++  
++    if (!try_module_get(THIS_MODULE))
++	return 0;
++    
++    /*check if we already have a tfm*/    
++    get_cpu_var(tfm);
++    if (__get_cpu_var(tfm) == NULL){
++	 __get_cpu_var(tfm) = crypto_alloc_comp("deflate", 0, CRYPTO_ALG_ASYNC);
++     }
++    assert(__get_cpu_var(tfm) != NULL);				
++
++    dlen = olen;
++    ret = crypto_comp_decompress(__get_cpu_var(tfm), ibuf, ilen, obuf, &dlen);
++
++    put_cpu_var(tfm);
++
++    if (ret) {
++	//printk(KERN_DEBUG "ext2_wZLIB: crypto_comp_decompress failed: %d, ilen: %d, olen: %d\n", ret, ilen, olen);
++	return 0;
++    }
++	
++    return dlen;
++}
+--- linux-3.2-rc5/fs/ext2/adler32.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/ext2/adler32.c	2011-12-13 14:22:47.841975844 +0100
+@@ -0,0 +1,43 @@
++/* adler32.c -- compute the Adler-32 checksum of a data stream
++ * Copyright (C) 1995-1998 Mark Adler
++ * For conditions of distribution and use, see copyright notice in zlib.h 
++ */
++
++/* @(#) $Id: e2compr2.6.25.patch,v 1.1.2.1 2008/04/17 09:49:32 winkler Exp $ */
++
++#define BASE 65521L /* largest prime smaller than 65536 */
++#define NMAX 5552
++/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
++
++#define DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
++#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
++#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
++#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
++#define DO16(buf)   DO8(buf,0); DO8(buf,8);
++
++/* ========================================================================= */
++unsigned long ext2_adler32(unsigned long adler, const unsigned char *buf, unsigned int len)
++{
++    unsigned long s1 = adler & 0xffff;
++    unsigned long s2 = (adler >> 16) & 0xffff;
++    int k;
++
++    if (buf == 0) return 1L;
++
++    while (len > 0) {
++        k = len < NMAX ? len : NMAX;
++        len -= k;
++        while (k >= 16) {
++            DO16(buf);
++	    buf += 16;
++            k -= 16;
++        }
++        if (k != 0) do {
++            s1 += *buf++;
++	    s2 += s1;
++        } while (--k);
++        s1 %= BASE;
++        s2 %= BASE;
++    }
++    return (s2 << 16) | s1;
++}
+--- linux-3.2-rc5/fs/ext2/super.c	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/ext2/super.c	2011-12-13 14:22:47.843975906 +0100
+@@ -32,7 +32,12 @@
+ #include <linux/log2.h>
+ #include <linux/quotaops.h>
+ #include <asm/uaccess.h>
++#ifdef CONFIG_EXT2_COMPRESS
++#include <linux/vmalloc.h>
++#include <linux/ext2_fs_c.h>
++#else
+ #include "ext2.h"
++#endif
+ #include "xattr.h"
+ #include "acl.h"
+ #include "xip.h"
+@@ -393,7 +398,11 @@ enum {
+ 	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic,
+ 	Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
+ 	Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
+-	Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
++	Opt_acl, Opt_noacl,
++#ifdef CONFIG_EXT2_COMPRESS
++ 	Opt_force_compat,
++#endif
++	Opt_xip, Opt_ignore, Opt_err, Opt_quota,
+ 	Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
+ };
+ 
+@@ -426,6 +435,9 @@ static const match_table_t tokens = {
+ 	{Opt_ignore, "noquota"},
+ 	{Opt_quota, "quota"},
+ 	{Opt_usrquota, "usrquota"},
++#ifdef CONFIG_EXT2_COMPRESS
++ 	{Opt_force_compat, "force-compat"},
++#endif
+ 	{Opt_reservation, "reservation"},
+ 	{Opt_noreservation, "noreservation"},
+ 	{Opt_err, NULL}
+@@ -569,6 +581,11 @@ static int parse_options(char *options,
+ 			clear_opt(sbi->s_mount_opt, RESERVATION);
+ 			ext2_msg(sb, KERN_INFO, "reservations OFF");
+ 			break;
++#ifdef CONFIG_EXT2_COMPRESS
++ 		case Opt_force_compat:
++ 			set_opt(sbi->s_mount_opt, FORCE_COMPAT);
++ 			break;
++#endif
+ 		case Opt_ignore:
+ 			break;
+ 		default:
+@@ -585,6 +602,10 @@ static int ext2_setup_super (struct supe
+ 	int res = 0;
+ 	struct ext2_sb_info *sbi = EXT2_SB(sb);
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++		printk (KERN_INFO E2COMPR_VERSION "\n");
++#endif
++
+ 	if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) {
+ 		ext2_msg(sb, KERN_ERR,
+ 			"error: revision level too high, "
+@@ -876,6 +897,65 @@ static int ext2_fill_super(struct super_
+ 			le32_to_cpu(features));
+ 		goto failed_mount;
+ 	}
++#ifdef CONFIG_EXT2_COMPRESS
++	/* Check that required algorithms are available. */
++	/* todo: Provide a mount option to override this. */
++	/*
++	 * Philosophical bug: we assume that an algorithm's
++	 * module is available if and only if this kernel was
++	 * compiled with that algorithm as a module.  This is
++	 * untrue, but it is unclear what the right thing to
++	 * do is.
++	 */
++	j = 0; /* error flag */
++	if ((es->s_feature_incompat
++	    & cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION))
++	    && (es->s_algorithm_usage_bitmap
++	    & ~cpu_to_le32(EXT2_ALGORITHMS_SUPP))) {
++		/*
++		 * The filesystem employs an algorithm not
++		 * supported by this filesystem.  Issue warning or
++		 * error.
++		 */
++		for (i = 0; i < 32; i++) {
++			if (!(es->s_algorithm_usage_bitmap
++			    & cpu_to_le32(1 << i))
++			    || ((EXT2_ALGORITHMS_SUPP
++			    & (1 << i))))
++				continue;
++		/*
++		 * TODO: Can't this message be moved outside
++		 * of the for loop?
++		 */
++		if (!j) {
++			if (test_opt(sb, FORCE_COMPAT))
++				printk(KERN_WARNING
++				    "EXT2-fs: %s: "
++				    "uses unsupported "
++				    "compression algorithms",
++				    sb->s_id);
++			else
++				printk("EXT2-fs: %s: couldn't mount "
++				    "because of unsupported "
++				    "compression algorithms",
++				    sb->s_id);
++			j = 1;
++		}
++		if (i < EXT2_N_ALGORITHMS)
++			printk(" %s", ext2_algorithm_table[i].name);
++		else
++			printk(" %u", i);
++		}
++	}
++	if (j) {
++		if (test_opt(sb, FORCE_COMPAT))
++			printk(" but ignoring as you request.\n");
++		else {
++			printk(".\n");
++		goto failed_mount;
++	}
++	}
++#endif /* CONFIG_EXT2_COMPRESS */
+ 	if (!(sb->s_flags & MS_RDONLY) &&
+ 	    (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
+ 		ext2_msg(sb, KERN_ERR, "error: couldn't mount RDWR because of "
+--- linux-3.2-rc5/fs/ext2/ialloc.c	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/ext2/ialloc.c	2011-12-13 14:22:47.845975968 +0100
+@@ -470,6 +470,9 @@ struct inode *ext2_new_inode(struct inod
+ 		brelse(bitmap_bh);
+ 		bitmap_bh = read_inode_bitmap(sb, group);
+ 		if (!bitmap_bh) {
++#ifdef CONFIG_EXT2_COMPRESS
++			EXT2_I(inode)->i_flags &= ~EXT2_COMPR_FL;
++#endif
+ 			err = -EIO;
+ 			goto fail;
+ 		}
+@@ -558,6 +561,17 @@ got:
+ 	memset(ei->i_data, 0, sizeof(ei->i_data));
+ 	ei->i_flags =
+ 		ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED);
++#ifdef CONFIG_EXT2_COMPRESS
++	/*
++	 *      The EXT2_COMPR flag is inherited from the parent
++	 *      directory as well as the cluster size and the compression
++	 *      algorithm.
++	 */
++	ei->i_log2_clu_nblocks = EXT2_I(dir)->i_log2_clu_nblocks;
++	ei->i_clu_nblocks = EXT2_I(dir)->i_clu_nblocks;
++	ei->i_compr_method = EXT2_I(dir)->i_compr_method;
++	ei->i_compr_flags = 0;	
++#endif
+ 	ei->i_faddr = 0;
+ 	ei->i_frag_no = 0;
+ 	ei->i_frag_size = 0;
+--- linux-3.2-rc5/fs/ext2/balloc.c	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/ext2/balloc.c	2011-12-13 14:22:47.847976031 +0100
+@@ -11,8 +11,13 @@
+  *        David S. Miller (davem@caip.rutgers.edu), 1995
+  */
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++#include <linux/quotaops.h>
++#include <linux/ext2_fs_c.h>
++#else
+ #include "ext2.h"
+ #include <linux/quotaops.h>
++#endif
+ #include <linux/slab.h>
+ #include <linux/sched.h>
+ #include <linux/buffer_head.h>
+@@ -499,6 +504,13 @@ void ext2_free_blocks (struct inode * in
+ 	struct ext2_super_block * es = sbi->s_es;
+ 	unsigned freed = 0, group_freed;
+ 
++
++#ifdef CONFIG_EXT2_COMPRESS
++	assert((block != EXT2_COMPRESSED_BLKADDR)
++		|| !S_ISREG(inode->i_mode)
++		|| !(EXT2_SB(sb)->s_es->s_feature_incompat
++			& cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION)));
++#endif
+ 	if (block < le32_to_cpu(es->s_first_data_block) ||
+ 	    block + count < block ||
+ 	    block + count > le32_to_cpu(es->s_blocks_count)) {
+--- linux-3.2-rc5/fs/ext2/inode.c	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/ext2/inode.c	2011-12-13 14:22:47.852976189 +0100
+@@ -32,7 +32,14 @@
+ #include <linux/mpage.h>
+ #include <linux/fiemap.h>
+ #include <linux/namei.h>
++#ifdef CONFIG_EXT2_COMPRESS
++#include <linux/kmod.h>
++#include <linux/ext2_fs_c.h>
++#include <linux/spinlock.h>
++#include <linux/pagevec.h>
++#else
+ #include "ext2.h"
++#endif
+ #include "acl.h"
+ #include "xip.h"
+ 
+@@ -40,6 +47,34 @@ MODULE_AUTHOR("Remy Card and others");
+ MODULE_DESCRIPTION("Second Extended Filesystem");
+ MODULE_LICENSE("GPL");
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++/* mw: this function counts all references 
++ * to this inode. this is necessary to
++ * refuse un/compression if the file has
++ * more than one refernce, I guess. */ 
++int ext2_get_dcount(struct inode *inode)
++{        
++	struct dentry *dentry;
++    	struct list_head *head, *next, *tmp;
++    	int count;
++    	
++    	head = &inode->i_dentry;
++    	next = inode->i_dentry.next;
++    	count = 0;
++    	while (next != head) {
++    		dentry = list_entry(next, struct dentry, d_alias);
++    		tmp = next;
++    		next = tmp->next;
++		spin_lock(&dentry->d_lock);
++    		count += dentry->d_count;
++		spin_unlock(&dentry->d_lock);
++    		//mw: similar to fs/dcache.c
++    	}
++        
++        return count;
++}
++#endif
++
+ static int __ext2_write_inode(struct inode *inode, int do_sync);
+ 
+ /*
+@@ -54,7 +89,9 @@ static inline int ext2_inode_is_fast_sym
+ 		inode->i_blocks - ea_blocks == 0);
+ }
+ 
++#ifndef CONFIG_EXT2_COMPRESS
+ static void ext2_truncate_blocks(struct inode *inode, loff_t offset);
++#endif
+ 
+ static void ext2_write_failed(struct address_space *mapping, loff_t to)
+ {
+@@ -240,7 +277,11 @@ static Indirect *ext2_get_branch(struct
+ 	*err = 0;
+ 	/* i_data is not going away, no lock needed */
+ 	add_chain (chain, NULL, EXT2_I(inode)->i_data + *offsets);
++#ifdef CONFIG_EXT2_COMPRESS
++	if (HOLE_BLKADDR(p->key))
++#else
+ 	if (!p->key)
++#endif
+ 		goto no_block;
+ 	while (--depth) {
+ 		bh = sb_bread(sb, le32_to_cpu(p->key));
+@@ -251,7 +292,11 @@ static Indirect *ext2_get_branch(struct
+ 			goto changed;
+ 		add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
+ 		read_unlock(&EXT2_I(inode)->i_meta_lock);
++#ifdef CONFIG_EXT2_COMPRESS
++		if (HOLE_BLKADDR(p->key))
++#else
+ 		if (!p->key)
++#endif
+ 			goto no_block;
+ 	}
+ 	return NULL;
+@@ -297,7 +342,11 @@ static ext2_fsblk_t ext2_find_near(struc
+ 
+ 	/* Try to find previous block */
+ 	for (p = ind->p - 1; p >= start; p--)
++#ifdef CONFIG_EXT2_COMPRESS
++		if (!HOLE_BLKADDR(*p))
++#else
+ 		if (*p)
++#endif
+ 			return le32_to_cpu(*p);
+ 
+ 	/* No such thing, so let's try location of indirect block */
+@@ -498,7 +547,13 @@ static int ext2_alloc_branch(struct inod
+ 		 */
+ 		bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+ 		branch[n].bh = bh;
++#ifndef CONFIG_EXT2_COMPRESS
+ 		lock_buffer(bh);
++#else
++		CHECK_NOT_ATOMIC
++		if (!buffer_uptodate(bh))
++ 			wait_on_buffer(bh);
++#endif	
+ 		memset(bh->b_data, 0, blocksize);
+ 		branch[n].p = (__le32 *) bh->b_data + offsets[n];
+ 		branch[n].key = cpu_to_le32(new_blocks[n]);
+@@ -514,7 +569,9 @@ static int ext2_alloc_branch(struct inod
+ 				*(branch[n].p + i) = cpu_to_le32(++current_block);
+ 		}
+ 		set_buffer_uptodate(bh);
++#ifndef CONFIG_EXT2_COMPRESS
+ 		unlock_buffer(bh);
++#endif
+ 		mark_buffer_dirty_inode(bh, inode);
+ 		/* We used to sync bh here if IS_SYNC(inode).
+ 		 * But we now rely upon generic_write_sync()
+@@ -675,6 +732,7 @@ static int ext2_get_blocks(struct inode
+ 	if (err == -EAGAIN || !verify_chain(chain, partial)) {
+ 		while (partial > chain) {
+ 			brelse(partial->bh);
++//			bforget(partial->bh); /*mw: e2c-pre-2.6.30.4 used bforget here*/
+ 			partial--;
+ 		}
+ 		partial = ext2_get_branch(inode, depth, offsets, chain, &err);
+@@ -766,21 +824,608 @@ int ext2_fiemap(struct inode *inode, str
+ 				    ext2_get_block);
+ }
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++/*
++ *    Readpage method that will take care of decompression.
++ */
++/* effic: I (pjm) think tht at present, reading a 32KB cluster 4KB at
++   a time does `decompress 4KB' for the first 4KB, then `decompress
++   8KB' for the second, and so on.  See if we can provide the page
++   cache with all the pages in a cluster.  The problem is, we don't
++   want to erase anything tht hasn't been written to disk, so we can't
++   just call update_vm_cache().  The plan at present is to remember
++   what the contents of ext2_rd_wa.u come from, and don't bother
++   decompressing anything if the working area already contains the
++   right data.  However, this is only a win where adjacent calls to
++   ext2_decompress_blocks() request the same cluster.  We could force
++   that by copying some code from generic_file_read() (but check for
++   deadlocks before doing anything like that), but instead I'm taking
++   the more passive approach of hoping for the best. */
++static int ext2_readpage(struct file *file, struct page *page)
++{
++	struct inode *inode = page->mapping->host;
++	struct page *pg[EXT2_MAX_CLUSTER_PAGES], *epg[EXT2_MAX_CLUSTER_PAGES];
++	u32 cluster0, max_cluster;
++	int i, blockOfCluster, blocksToDo, npg;
++	const int inc = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
++	struct ext2_inode_info *ei = EXT2_I(page->mapping->host);
++#ifdef CONFIG_HIGHMEM	
++	int kmapped = 0; //mw
++#endif	
++	
++	int iClusterCnt;
++
++	/* For directories, fall out through default routine */
++	if (S_ISDIR(inode->i_mode))
++	{	
++		int rc;
++		
++		rc = block_read_full_page(page,ext2_get_block);
++		assert(!rc);
++		return rc;
++	}
++
++	/* The semaphore prevents us trying to compress and decompress
++	   the cluster at the same time, or compress a cluster in the
++	   middle of reading it (thinking it to be uncompressed).
++
++	   You may not like the fact that we hold the semaphore across
++	   readpage (given that it isn't held without e2compr compiled
++	   in), but it does guarantee that we won't compress the
++	   cluster during readpage.  (OTOH, it's unlikely, if not
++	   impossible, for someone to ,compress a cluster and rewrite
++	   the blocks` before the readpage completes.) */
++	/* This procedure used to have `#ifndef EXT2_LOCK_BUFFERS'
++	   around all the semaphore stuff, and unlocked each buffer
++	   before brelsing them ifdef EXT2_LOCK_BUFFERS.  I (pjm,
++	   1998-01-20) have removed that because (a) EXT2_LOCK_BUFFERS
++	   isn't #defined anywhere, and doesn't appear outside of this
++	   function, and (b) I haven't looked at what effect locking
++	   the buffers has.  You may like to reintroduce the idea of
++	   buffer locking to this function if you're more familiar
++	   with buffer locking than I, and believe that the full i_sem
++	   isn't necessary to protect from races (people seeing raw
++	   compressed data) between readpage and ext2_file_write(),
++	   ext2_compress_cluster() and ext2_truncate(). */
++	unlock_page(page);
++	mutex_lock(&inode->i_mutex);
++	
++	assert (atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */
++	
++	//mw: added EXT2_COMPR_FL, because EXT2_COMPRBLK_FL mit change without mutex !!!
++	if ( !(ei->i_flags & (EXT2_COMPRBLK_FL|EXT2_COMPR_FL))  
++		||  (ei->i_flags & EXT2_NOCOMPR_FL) )
++	{
++		goto readpage_uncompressed;
++	}
++
++	{
++		register u32 blockOfFile
++		    = (page->index << PAGE_CACHE_SHIFT) >> inode->i_sb->s_blocksize_bits;
++
++		blocksToDo = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
++		cluster0 = ext2_block_to_cluster(inode, blockOfFile);
++		max_cluster = ext2_block_to_cluster
++		    (inode, blockOfFile + blocksToDo - 1);
++		blockOfCluster
++		    = blockOfFile - ext2_cluster_block0(inode, cluster0);
++	}
++
++	/* return -???, any idea which code. do_generic_file_read() cares, ext2_readpages() doesn't. 
++	   maybe I should look at the "generic" readpage() and see what it returns in this case */
++
++	/* Check if any part of the requested area contains part of a
++	   compressed cluster.  If not, we can use default ext2_readpage().
++
++	   (Note that we don't have to worry about a cluster becoming
++	   compressed in the meantime, because we have the semaphore.)
++
++	   A page can cover up to 9 clusters.  (The maximum can only
++	   occur with 32KB pages, 4KB clusters, and a non-page-aligned
++	   offset.  Thanks go to Kurt Fitzner for reporting that
++	   page offsets needn't be aligned; see generic_file_mmap().)  */
++	{
++	int isCmp[(PAGE_SIZE >> 12) + 1];
++	u8 *dst;
++	unsigned clu_ix;
++
++	assert (max_cluster - cluster0 < sizeof(isCmp)/sizeof(*isCmp));
++	for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) {
++		isCmp[clu_ix] = ext2_cluster_is_compressed_fn (inode, cluster0 + clu_ix);
++		if (isCmp[clu_ix] < 0){
++			printk("IO-ERROR: isCmp\n");
++			goto io_error;
++		}
++	} 
++	
++	for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++)
++		if (isCmp[clu_ix] > 0)
++			goto readpage_compressed;
++	/* fall through */
++	readpage_uncompressed:
++	{
++		int rc=0;
++		lock_page(page);
++
++		/* Did somebody else fill it already? */
++		if (PageUptodate(page) ){  //mw: necessary for DEBUG! anyway checked in do_generic_mapping_read
++			unlock_page(page);
++		}
++		else {
++			//try_to_free_buffers(page);
++			rc = block_read_full_page(page,ext2_get_block);
++		}
++		mutex_unlock(&inode->i_mutex);
++		assert(!rc);			
++		return rc;
++	}
++
++	readpage_compressed:
++
++	/* Copied from block_read_full_page */
++	/* if (!PageLocked(page)) */
++	/* PAGE_BUG(page); */
++	lock_page(page);
++	if (PageUptodate(page)) { 
++		unlock_page(page);
++		mutex_unlock(&inode->i_mutex);
++		return(0);
++	}
++	get_page(page);
++
++	ClearPageUptodate(page);
++	ClearPageError(page);
++
++	dst = (u8 *) page_address(page);
++	for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) {
++		struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS];
++		int nbh, blocksThisClu;
++		
++		for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++			pg[i] = NULL;
++			epg[i] = NULL;
++		}
++			
++		/* clear_bit(PG_locked, &page->flags); */
++		npg = ext2_cluster_npages(inode, cluster0 + clu_ix);
++		nbh = ext2_get_cluster_pages(inode, cluster0 + clu_ix, pg, page, 0);
++		
++		if (nbh <= 0) {
++			for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
++			printk("no pages\n");
++			goto out;
++		}		
++		iClusterCnt =  ext2_cluster_npages(inode, cluster0);
++		
++		nbh =  ext2_get_cluster_extra_pages(inode, cluster0 + clu_ix, pg, epg);
++		if (nbh <= 0) 
++		{
++			for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
++				epg[i] = NULL;
++			printk("no extra pages\n");
++			goto out;
++		}
++		assert (iClusterCnt = ext2_cluster_npages(inode, cluster0));
++		
++#ifdef CONFIG_HIGHMEM
++		ext2_kmap_cluster_pages(page, pg, epg);
++		kmapped = 1;
++#endif
++		
++		nbh = ext2_get_cluster_blocks(inode, cluster0 + clu_ix, bh, pg, epg, 0);
++		if (nbh <= 0)
++		{
++			printk("no blocks\n");
++			goto out;
++		}	
++			
++		/* How many blocks (including holes) we need from this cluster. */
++		{
++			blocksThisClu = (ext2_cluster_nblocks(inode, cluster0 +
++			    clu_ix) - blockOfCluster);
++			if (blocksThisClu > blocksToDo)
++				blocksThisClu = blocksToDo;
++		}
++
++		if (isCmp[clu_ix]) {
++			u8 const *src;
++			int n, nbytes_wanted;
++			struct ext2_cluster_head *head;
++			unsigned meth;
++# ifdef CONFIG_KMOD
++			unsigned alg;
++# endif
++
++			bh[0]->b_data = page_address(bh[0]->b_page);
++            		head = (struct ext2_cluster_head *) bh[0]->b_data;
++
++			/* jmr 1998-10-28 Hope this is the last time I'm moving this code.
++			 * Module loading must be done _before_ we lock wa, just think what
++			 * can happen if we reallocate wa when somebody else uses it...
++			 */
++			meth = head->method; /* only a byte, so no swabbing needed. */
++			if (meth >= EXT2_N_METHODS) {
++				printk("illegal method id\n");
++				ext2_msg(inode->i_sb,
++				    "illegal method id",
++				    "inode = %lu, id = %u",
++				    inode->i_ino, meth);
++				goto out;
++			}
++# ifdef CONFIG_KMOD
++			alg = ext2_method_table[meth].alg;
++			if (!ext2_algorithm_table[alg].avail) {
++				char str[32];
++
++				sprintf(str, "ext2-compr-%s", ext2_algorithm_table[alg].name);
++				request_module(str);
++			}
++# endif /* CONFIG_KMOD */
++
++			/* Calculate nbytes_wanted. */
++			{
++				unsigned nblk_wanted, i;
++
++				/* We want to decompress the whole cluster */
++				//nblk_wanted = ext2_cluster_nblocks(inode, cluster0 + clu_ix);
++				nblk_wanted = npg << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); /*mw: FIXED */
++				
++				for (i = nblk_wanted; i != 0;)
++					if (((--i >> 3) < head->holemap_nbytes)
++					    && (head->holemap[i >> 3] & (1 << (i & 7))))
++						--nblk_wanted;
++				nbytes_wanted = (nblk_wanted
++				    << inode->i_sb->s_blocksize_bits);
++			}
++
++			/* Decompress. */
++			get_cpu_var(ext2_rd_wa);
++			if (__get_cpu_var(ext2_rd_wa) == NULL)
++			{
++				ext2_alloc_rd_wa();
++			}
++			assert(__get_cpu_var(ext2_rd_wa) != NULL);
++
++			n = ext2_decompress_blocks(inode, bh, nbh, nbytes_wanted, cluster0 + clu_ix);
++			if (n < 0) {
++			    assert(nbh >= 0);
++			    printk("ext2_readpage: noblocks decompressed\n");
++			    put_cpu_var(ext2_rd_wa);	
++			    goto out;
++			}
++
++# ifdef EXT2_COMPR_REPORT_VERBOSE_INODE
++			if (ei->i_flags & EXT2_COMPR_FL)
++				printk(KERN_DEBUG "ext2: mmap %04x:%lu: blocksToDo=%d, blockOfCluster=%d, blocksThisClu=%d, clu_nblocks=%d\n",
++				    inode->i_rdev,
++				    inode->i_ino,
++				    blocksToDo,
++				    blockOfCluster,
++				    blocksThisClu,
++				    ext2_cluster_nblocks(inode, cluster0 + clu_ix));
++# endif
++
++			/* */
++			{
++			unsigned i;
++			int ipg;
++			
++			i = ext2_cluster_nblocks(inode, cluster0 + clu_ix) - 1;
++			//i = (npg << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)) - 1; /*mw: FIXED!!!  (here: shift = 2Bit) */
++			//if(i+1 != ext2_cluster_nblocks(inode, cluster0 + clu_ix))
++			//printk("npg=%i, nbh=%i, npgf=%i, nbhf =%i, cluster:%i, dec_blk:%i, b_wanted:%i, size:%i\n ", ext2_cluster_npages(inode, cluster0 + clu_ix),  ext2_cluster_nblocks(inode, cluster0 + clu_ix), npgtest, i+1, cluster0 + clu_ix, n, nbytes_wanted, inode->i_size);
++			blockOfCluster = 0;
++			assert(n > 0);
++			src = __get_cpu_var(ext2_rd_wa)->u + nbytes_wanted - inode->i_sb->s_blocksize;
++#ifdef  EXT2_COMPR_REPORT			
++			trace_e2c("ext2_readpage: copy data inc=%d blocksThisClu=%d, n=%d\n", inc, blocksThisClu, n);
++#endif			
++			for (ipg = npg - 1; ipg >= 0; ipg--) {
++				if (pg[ipg] == NULL) {
++				i -= inc;
++				src -= PAGE_SIZE;
++				continue;
++			}
++			if (((inode->i_size-1) >> PAGE_SHIFT) ==  pg[ipg]->index) {
++				n = ((inode->i_size-1) & (PAGE_SIZE -1)) >> inode->i_sb->s_blocksize_bits;
++				i -= ((blocksThisClu-1) - n);
++				src -= ((blocksThisClu-1) - n) << inode->i_sb->s_blocksize_bits;
++			} else {
++				n = blocksThisClu - 1;
++			}
++			if (PageUptodate(pg[ipg])  ) {
++				for (;n >= 0;n--, i--) {
++					if (((i >> 3) >= head->holemap_nbytes)
++					    || !(head->holemap[i >> 3] & (1 << (i & 7)))) {
++						src -= inode->i_sb->s_blocksize;
++					}
++				}
++			} else {
++                
++				dst = (u8 *) page_address(pg[ipg]) + (n << inode->i_sb->s_blocksize_bits);
++				
++				for (;
++				    n >= 0;
++				    n--, i--, dst -= inode->i_sb->s_blocksize) {
++					assert(!buffer_dirty(bh[i]));
++					clear_buffer_dirty(bh[i]);   //mw: had a refile_buffer in 2.4 
++					if (((i >> 3) >= head->holemap_nbytes)
++					    || !(head->holemap[i >> 3] & (1 << (i & 7)))) {
++						assert(i >= 0);
++						memcpy(dst, src, inode->i_sb->s_blocksize);
++						src -= inode->i_sb->s_blocksize;
++					} else {
++						assert(i >= 0);
++						memset (dst, 0, inode->i_sb->s_blocksize);
++					}
++					//clear_bit(BH_Uptodate, &bh[i]->b_state);
++ 				}
++				SetPageUptodate(pg[ipg]);
++			}
++			} 
++			}
++			put_cpu_var(ext2_rd_wa);
++		} else {
++			/* Uncompressed cluster.  Just copy the data.  */
++			int n;
++
++# ifdef EXT2_COMPR_REPORT_VERBOSE_INODE
++			if (ei->i_flags & EXT2_COMPR_FL)
++				printk(KERN_DEBUG
++				    "ext2: mmap %lu: blocksToDo = %d, "
++				    "blockOfCluster = %d, clu_nblocks = %d\n",
++				    inode->i_ino, blocksToDo, blockOfCluster,
++				    ext2_cluster_nblocks(inode, cluster0 +
++				    clu_ix));
++# endif
++
++			for (n = 0;
++			    n < blocksThisClu;
++			    n++, dst += inode->i_sb->s_blocksize) {
++				if ((blockOfCluster + n < nbh)
++				    && (bh[blockOfCluster + n] != NULL))
++				{
++					memcpy(dst,
++					    bh[blockOfCluster + n]->b_data,
++					    inode->i_sb->s_blocksize);
++				}
++				else
++				{
++					memset(dst, 0, inode->i_sb->s_blocksize);
++				}
++			}
++			blockOfCluster = 0;
++		} // end uncompressed Cluster
++
++		blocksToDo -= blocksThisClu;
++
++#ifdef CONFIG_HIGHMEM
++		if (kmapped)
++			ext2_kunmap_cluster_pages(page, pg, epg);
++#endif
++
++		for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++			if (epg[i] != NULL) {
++				
++				ClearPageDirty(epg[i]);			
++				ClearPageUptodate(epg[i]);	
++				try_to_free_buffers(epg[i]);
++				unlock_page(epg[i]);
++				assert(page_count(epg[i]) <= 1);
++				page_cache_release(epg[i]);
++			}
++		}
++
++		for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++			if (pg[i] == NULL)
++				break;		
++			if (pg[i] == page)
++				continue;
++			unlock_page(pg[i]);
++			page_cache_release(pg[i]);
++		}		
++		//mw
++		assert (isCmp[clu_ix] == ext2_cluster_is_compressed_fn (inode, cluster0 + clu_ix));
++	} // end for-loop: Cluster	
++	} 
++
++	SetPageUptodate(page);		
++	unlock_page(page);
++	atomic_dec(&page->_count);
++	mutex_unlock(&inode->i_mutex);
++	return 0;
++	
++	out:
++
++#ifdef CONFIG_HIGHMEM
++	if (kmapped)
++		ext2_kunmap_cluster_pages(page, pg, epg);
++#endif
++
++	for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++		if (epg[i] != NULL) {
++			
++			ClearPageDirty(epg[i]);			
++			ClearPageUptodate(epg[i]);		
++			try_to_free_buffers(epg[i]);
++			unlock_page(epg[i]);
++			assert(page_count(epg[i]) <= 1);
++			page_cache_release(epg[i]);
++		}
++	}
++
++	for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++		if (pg[i] == NULL)
++			break;
++		if (pg[i] == page)
++			continue;
++		unlock_page(pg[i]);
++		page_cache_release(pg[i]);	
++	}
++ 	mutex_unlock(&inode->i_mutex);
++ 	return 0;
++
++	io_error:
++#ifdef CONFIG_HIGHMEM
++	if (kmapped)
++		ext2_kunmap_cluster_pages(page, pg, epg);
++#endif
++	SetPageError(page);
++	unlock_page(page);
++	atomic_dec(&page->_count);
++	mutex_unlock(&inode->i_mutex);
++	printk("Readpage: IOERROR\n");
++	return -EIO; /* it is tested in do_generic_file_read(), ...     */
++}
++#endif /* CONFIG_EXT2_COMPRESS */
++
+ static int ext2_writepage(struct page *page, struct writeback_control *wbc)
+ {
++/* mw (24/06/2008):
++ *         WRITEPAGE: this code was also in e2compr 2.4 and once removed by yaboo ding.
++ *         ext2_writepage() is also called for dirty pages. Usually we write using file_write() which 
++ *         wraps correctly to compressed files. BUT: a writeable memory map might
++ *         produce dirty pages, which will be written back normally. this should/might fail.
++ *         The following code should fix this bug, but this was not tested yet. 
++ */
++#ifdef CONFIG_EXT2_COMPRESS
++#undef USE_WRITEPAGE
++//#define USE_WRITEPAGE	
++#ifdef USE_WRITEPAGE	
++
++    struct ext2_inode_info *ei = EXT2_I(page->mapping->host);
++    int retval;
++
++    struct inode *inode = page->mapping->host;
++    u32 cluster0, max_cluster;                                        
++    int blocksToDo;                                              
++                                                                                 
++    unlock_page(page);  
++    //mw: do we need this ???
++    //if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) {          
++    /* trace_e2c("ext2_writepage: inode"); */                                          
++    mutex_lock(&inode->i_mutex);           
++    /* trace_e2c(" down\n"); */                                   
++    //}                                                                  
++    if (!(ei->i_flags & EXT2_COMPRBLK_FL)                       
++    	|| (ei->i_flags & EXT2_NOCOMPR_FL) )
++    {                               
++    	//mw: do we need this ???
++    	//if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) {           
++    		/* trace_e2c("ext2_writepage: inode up 1\n"); */                                 
++    		mutex_unlock(&inode->i_mutex);                                       
++    	//}
++    	lock_page(page);                                           
++    	return block_write_full_page(page, ext2_get_block, wbc);
++    }                                                                
++    /* */   
++    {                                                                                  
++    	register u32 blockOfFile                              
++        	= (page->index << PAGE_CACHE_SHIFT) >> inode->i_sb->s_blocksize_bits;
++                                                                        
++    	blocksToDo = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;      
++        cluster0 = ext2_block_to_cluster(inode, blockOfFile);          
++        max_cluster = ext2_block_to_cluster(inode, blockOfFile + blocksToDo - 1);      
++    }                                                             
++                                                                         
++    /* Check if any part of the requested area contains part of a                        
++       compressed cluster.  If not, we can use default ext2_writepage(). 
++       
++       (Note that we don't have to worry about a cluster becoming
++       compressed in the meantime, because we have the semaphore.)            
++                                                                              
++       A page can cover up to 9 clusters.  (The maximum can only              
++       occur with 32KB pages, 4KB clusters, and a non-page-aligned   
++       offset.  Thanks go to Kurt Fitzner for reporting that               
++       page offsets needn't be aligned; see generic_file_mmap().)  */                  
++                                                                                            
++    {                                                                   
++    	int isCmp[(PAGE_SIZE >> 12) + 1];                                 
++        unsigned clu_ix;                                          
++                                                                     
++        assert (max_cluster - cluster0 < sizeof(isCmp)/sizeof(*isCmp));
++        for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) { 
++        	isCmp[clu_ix] = ext2_cluster_is_compressed_fn (inode, cluster0 + clu_ix);     
++            if (isCmp[clu_ix] < 0) {                                            
++            	//mw: do we need this ???if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) {  
++            		/* trace_e2c("ext2_writepage: inode up 2\n"); */
++            		lock_page(page);
++                	mutex_unlock(&inode->i_mutex);                                            
++                //}                                                               
++                return -EIO;                                                    
++            }                                                     
++         }
++        
++         for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++)   
++        	if (isCmp[clu_ix] > 0)                                                     
++                    ext2_decompress_cluster(inode, cluster0 + clu_ix);
++         
++         //mw: do we need this ???
++         //if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) {                            
++        	 /* trace_e2c("ext2_writepage: inode up 3\n"); */
++         mutex_unlock(&inode->i_mutex);                                                   
++         //}
++         lock_page(page);
++         
++         /* fall through */                                            
++    }                                                                                 
++#endif /* CONFIG_EXT2_COMPRESS */
++#endif       
+ 	return block_write_full_page(page, ext2_get_block, wbc);
+ }
+ 
++#ifndef CONFIG_EXT2_COMPRESS
+ static int ext2_readpage(struct file *file, struct page *page)
+ {
+ 	return mpage_readpage(page, ext2_get_block);
+ }
++#endif
+ 
+ static int
+ ext2_readpages(struct file *file, struct address_space *mapping,
+ 		struct list_head *pages, unsigned nr_pages)
+ {
++#ifdef CONFIG_EXT2_COMPRESS
++/*
++ * For now, just read each page into cache and don't worry about emitting BIOs.
++ * (whitpa 02 Aug 2004).
++ */
++
++	unsigned page_idx;
++	struct pagevec lru_pvec;
++	int iError;
++	
++	pagevec_init(&lru_pvec, 0);
++
++	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
++		struct page *page = list_entry(pages->prev, struct page, lru);
++        
++		prefetchw(&page->flags);
++		list_del(&page->lru);
++		
++		iError = add_to_page_cache(page, mapping, page->index,	GFP_KERNEL);
++		if (!iError) {
++			if (!PageUptodate(page))  
++			{
++				(void) ext2_readpage(file, page);
++			}
++			else
++			{
++				unlock_page(page);
++			}
++			if (!pagevec_add(&lru_pvec, page))
++				__pagevec_lru_add_file(&lru_pvec);
++		} else {
++			page_cache_release(page);
++		}
++		
++	}
++	pagevec_lru_add_file(&lru_pvec);
++	BUG_ON(!list_empty(pages));
++	return 0;
++#else
+ 	return mpage_readpages(mapping, pages, nr_pages, ext2_get_block);
++#endif
+ }
+ 
+ static int
+@@ -829,11 +1474,58 @@ static int ext2_nobh_writepage(struct pa
+ 	return nobh_writepage(page, ext2_get_block, wbc);
+ }
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++static sector_t ext2_do_bmap(struct address_space *mapping, sector_t block)
++#else
+ static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
++#endif
+ {
+ 	return generic_block_bmap(mapping,block,ext2_get_block);
+ }
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++/* Return 0 instead of EXT2_COMPRESSED_BLKADDR if EXT2_NOCOMPR_FL
++ * high.  This is necessary for us to be able to use
++ * generic_readpage() when EXT2_NOCOMPR_FL is high.
++ */
++static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
++{
++	sector_t result;
++	struct inode *inode = mapping->host;
++
++	if ((EXT2_I(inode)->i_flags & (EXT2_COMPRBLK_FL | EXT2_NOCOMPR_FL))
++	    == (EXT2_COMPRBLK_FL | 0)) {
++		int err;
++
++		err = ext2_cluster_is_compressed_fn
++		    (inode, ext2_block_to_cluster(inode, block));
++		if (err > 0)
++			ext2_msg (inode->i_sb, "ext2_bmap",
++			    "compressed cluster, inode %lu",
++			    inode->i_ino);
++		if (err != 0)
++			return 0;
++	}
++
++	result = ext2_do_bmap(mapping, block);
++	if (result != EXT2_COMPRESSED_BLKADDR)
++		return result;
++
++	if (!(EXT2_SB(inode->i_sb)->s_es->s_feature_incompat
++	    & cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION)))
++		ext2_error(inode->i_sb, "ext2_bmap",
++		    "compressed_blkaddr (ino %lu, blk %lu) "
++		    "on non-compressed fs",
++		    inode->i_ino, (unsigned long) block);
++	if (!S_ISREG(inode->i_mode))
++		ext2_error(inode->i_sb, "ext2_bmap",
++		    "compressed_blkaddr for non-regular file "
++		    "(ino %lu, blk %lu)",
++		    inode->i_ino, (unsigned long) block);
++	return 0;
++}
++#endif /* CONFIG_EXT2_COMPRESS */
++
+ static ssize_t
+ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+ 			loff_t offset, unsigned long nr_segs)
+@@ -853,6 +1545,18 @@ ext2_direct_IO(int rw, struct kiocb *ioc
+ static int
+ ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
+ {
++#ifdef CONFIG_EXT2_COMPRESS
++#ifdef USE_WRITEPAGE
++	struct ext2_inode_info *ei = EXT2_I(mapping->host);
++	if ( (ei->i_flags & EXT2_COMPRBLK_FL) 
++			 && !(ei->i_flags & EXT2_NOCOMPR_FL))
++	{
++		//NULL will invoke ext2_writepage for writeback, hopefully.
++		return mpage_writepages(mapping, wbc, NULL);
++	}
++	else
++#endif	
++#endif
+ 	return mpage_writepages(mapping, wbc, ext2_get_block);
+ }
+ 
+@@ -1001,6 +1705,12 @@ static inline void ext2_free_data(struct
+ 
+ 	for ( ; p < q ; p++) {
+ 		nr = le32_to_cpu(*p);
++#ifdef CONFIG_EXT2_COMPRESS
++		if (nr == EXT2_COMPRESSED_BLKADDR) {
++			*p = 0;
++			continue;
++		}
++#endif
+ 		if (nr) {
+ 			*p = 0;
+ 			/* accumulate blocks to free if they're contiguous */
+@@ -1045,6 +1755,12 @@ static void ext2_free_branches(struct in
+ 			nr = le32_to_cpu(*p);
+ 			if (!nr)
+ 				continue;
++#ifdef CONFIG_EXT2_COMPRESS
++			if (nr == EXT2_COMPRESSED_BLKADDR) {
++			  *p = 0;
++			  continue;
++			}
++#endif
+ 			*p = 0;
+ 			bh = sb_bread(inode->i_sb, nr);
+ 			/*
+@@ -1069,6 +1785,96 @@ static void ext2_free_branches(struct in
+ 		ext2_free_data(inode, p, q);
+ }
+ 
++/* pjm 1998-01-14: As far as I can tell, "I don't do any locking" is
++  no longer correct, as i_sem is downed for all write() and
++  truncate() stuff except where it doesn't matter (e.g. new inode). */
++
++#ifdef CONFIG_EXT2_COMPRESS
++/* If the EXT2_ECOMPR_FL bit is high, then things can go rather badly.
++  This can only happen if access permission was obtained before the
++  flag was raised.  Also, it shouldn't be too much of a problem
++  unless the end point of truncation is a compressed cluster with a
++  compression error. */
++
++ /* From what I (Antoine) understand, the complexity of the truncate
++    code is due to the fact that we don't want to free blocks that
++    are still referenced.  It does not ensure that concurrent read
++    operation will terminate properly, i.e., the semantic of reading
++    while somebody truncates is undefined (you can either get the old
++    data if you got the blocks before, or get plenty of zeros
++    otherwise). */
++
++/* todo: Provide error trapping in readiness for when i_op->truncate
++  allows a return code. */
++static void fix_compression (struct inode * inode)
++{
++	struct ext2_inode_info *ei = EXT2_I(inode);		
++	/*if (atomic_read(&inode->i_mutex.count) > 0)
++	{
++		printk("Assert Mutex failed for file: %s \n", inode_name(inode, 0));
++		dump_stack();
++	}*/
++		
++	assert (ei->i_flags & EXT2_COMPRBLK_FL);  /* one or more compressed clusters */
++	assert ((atomic_read(&inode->i_mutex.count) < 1)
++		|| ((inode->i_nlink == 0)
++		    && (atomic_read(&inode->i_count) == 0)));
++	/* pjm 1998-01-14: I think the below comment can safely be removed, as
++	   it's impossible for someone to be compressing during truncate(), because
++	   i_sem is down. */
++	/*   Dans le cas ou les clusters peuvent etre compresses, cela pose
++	     un probleme : il faudrait stopper aussi si le cluster est
++	     comprime et ne contient pas plus de donnees que i_size ne
++	     permet. Sinon, on peut passer son temps a decompresser un
++	     cluster que quelqu'un d'autre compresse en meme
++	     temps... (TODO).  Cela ne peut arriver que si on reverifie apres
++	     coup si le cluster est non compresse (ce qu'on fait a l'heure
++	     actuelle) => faire autrement.
++
++	     pjm fixme tr
++
++	     If the clusters can be compressed, we'd have a problem: we'd
++	     also need to stop if the cluster is compressed and doesn't
++	     contain more data than i_size permits.  Otherwise we can spend
++	     time decompressing a cluster that someone else is compressing
++	     at the same time.  (TODO.)  This can only happen if we reverify
++	     "apres coup" ("after the event"? "after each time"?) "si" ("if"
++	     or "that") the cluster is not compressed (as we are currently
++	     doing) => do differently. */
++
++	/* todo: Handle errors from ext2_cluster_is_compressed().
++	   (Except ext2_truncate() currently silently ignores errors
++	   anyway.) */
++
++	if (!ext2_offset_is_clu_boundary(inode, inode->i_size)
++	    && (! ( ei->i_flags & EXT2_NOCOMPR_FL))
++	    && (ext2_cluster_is_compressed_fn
++		  (inode, ext2_offset_to_cluster (inode, inode->i_size))
++		> 0)) {
++		trace_e2c("fix_compression: inode:%lu decompress_cluster!\n", inode->i_ino);
++		ext2_decompress_cluster(inode, ext2_offset_to_cluster(inode, inode->i_size));
++		/* todo: Check the return code of
++		   ext2_decompress_cluster().  (Then again, I don't
++		   know how to report an error anyway.
++		   ext2_truncate() silently ignores errors.) */
++	  
++		/* Organise for the cluster to be recompressed later. */
++		assert (ei->i_flags & EXT2_COMPR_FL);
++		
++		ei->i_flags |= EXT2_DIRTY_FL;
++		ei->i_compr_flags |= EXT2_CLEANUP_FL;
++		mark_inode_dirty(inode);
++	} else
++		/* If there are no more compressed clusters, then
++		   remove the EXT2_COMPRBLK_FL.  Not essential from a
++		   safety point of view, but friendlier.  We only do
++		   this in the `else' because the cleanup function
++		   will handle it in the `if' case. */
++		ext2_update_comprblk(inode);
++}
++#endif
++
++
+ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
+ {
+ 	__le32 *i_data = EXT2_I(inode)->i_data;
+@@ -1081,6 +1887,27 @@ static void __ext2_truncate_blocks(struc
+ 	int n;
+ 	long iblock;
+ 	unsigned blocksize;
++
++#ifdef CONFIG_EXT2_COMPRESS
++	/* If the new size is in the middle of a compressed cluster,
++	   then we decompress it, and set things up to be recompressed
++	   later.
++
++	   todo: It isn't very nice to get ENOSPC on truncate.  We
++	   can't completely remove the possibility (unless the
++	   compression algorithms obey the rule `shorter input never
++	   gives longer output') but we could greatly reduce the
++	   possibility, e.g. by moving the fix_compression() function
++	   to compress.c, and have it decompress and immediately
++	   recompress the cluster, without allocating blocks for the
++	   full decompressed data. */
++	if (EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL) {
++	  	trace_e2c("ext2_truncate: ino=%ld sz=%d\n", inode->i_ino, (int)inode->i_size);
++	 	fix_compression(inode);
++		truncate_inode_pages(inode->i_mapping, inode->i_size);	    
++	}
++#endif
++
+ 	blocksize = inode->i_sb->s_blocksize;
+ 	iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
+ 
+@@ -1151,8 +1978,11 @@ do_indirects:
+ 
+ 	mutex_unlock(&ei->truncate_mutex);
+ }
+-
++#ifdef CONFIG_EXT2_COMPRESS
++void ext2_truncate_blocks(struct inode *inode, loff_t offset)
++#else
+ static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
++#endif
+ {
+ 	/*
+ 	 * XXX: it seems like a bug here that we don't allow
+@@ -1340,7 +2170,73 @@ struct inode *ext2_iget (struct super_bl
+ 		goto bad_inode;
+ 	}
+ 	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
++#ifdef CONFIG_EXT2_COMPRESS
++	ei->i_flags = 0x807fffff & le32_to_cpu(raw_inode->i_flags);
++	ei->i_compr_flags = 0;
++	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) {
++		
++		if (S_ISDIR(inode->i_mode)) 
++		{
++			//mw:
++			//mutex_lock(&inode->i_mutex);
++			if (S_ISDIR(inode->i_mode))
++			{
++				ei->i_flags &= ~(EXT2_COMPRBLK_FL | EXT2_DIRTY_FL);  //modify!!!
++			}
++			//mutex_unlock(&inode->i_mutex);
++		}
++		
++		/* The above shouldn't be necessary unless someone's
++		 * been playing with EXT2_IOC_SETFLAGS on a non-e2compr
++		 * kernel, or the inode has been scribbled on.
++		 */
++		if (ei->i_flags & (EXT2_COMPR_FL | EXT2_COMPRBLK_FL)) {
++			ei->i_compr_method
++			    = (le32_to_cpu(raw_inode->i_flags) >> 26) & 0x1f;
++			ei->i_log2_clu_nblocks
++			    = (le32_to_cpu(raw_inode->i_flags) >> 23) & 0x7;
++			if ((ei->i_log2_clu_nblocks < 2)
++			    || (ei->i_log2_clu_nblocks > 5)) {
++				if ((ei->i_log2_clu_nblocks == 0)
++				    && !(ei->i_flags & EXT2_COMPRBLK_FL)) {
++					/* The EXT2_COMPR_FL flag was
++					 * raised under a kernel
++					 * without e2compr support.
++					 */
++					if (S_ISREG(inode->i_mode))
++						ei->i_flags |= EXT2_DIRTY_FL;
++					/* Todo: once we're sure the kernel can
++					 * handle [log2_]clu_nblocks==0, get rid
++					 * of the next statement.
++					 */
++					ei->i_log2_clu_nblocks
++					    = EXT2_DEFAULT_LOG2_CLU_NBLOCKS;
++				} else {
++					ei->i_flags |= EXT2_ECOMPR_FL;
++					ext2_error(inode->i_sb,
++					    "ext2_read_inode",
++					    "inode %lu is corrupted: "
++					    "log2_clu_nblocks=%u",
++					    inode->i_ino,
++					    ei->i_log2_clu_nblocks);
++				}
++			}
++		} else {
++			ei->i_compr_method = EXT2_DEFAULT_COMPR_METHOD;
++			ei->i_log2_clu_nblocks
++			    = EXT2_DEFAULT_LOG2_CLU_NBLOCKS;
++		}
++	if (ei->i_log2_clu_nblocks >
++	    (EXT2_LOG2_MAX_CLUSTER_BYTES - inode->i_sb->s_blocksize_bits))
++		ei->i_log2_clu_nblocks = (EXT2_LOG2_MAX_CLUSTER_BYTES
++		    - inode->i_sb->s_blocksize_bits);
++	ei->i_clu_nblocks = 1 << ei->i_log2_clu_nblocks;
++	if (ei->i_flags & EXT2_DIRTY_FL)
++		ei->i_compr_flags = EXT2_CLEANUP_FL;
++	}
++#else /* !CONFIG_EXT2_COMPRESS */
+ 	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
++#endif
+ 	ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
+ 	ei->i_frag_no = raw_inode->i_frag;
+ 	ei->i_frag_size = raw_inode->i_fsize;
+@@ -1463,7 +2359,35 @@ static int __ext2_write_inode(struct ino
+ 
+ 	raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
+ 	raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
++#ifdef CONFIG_EXT2_COMPRESS
++	if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
++	    && (ei->i_flags & (EXT2_COMPR_FL | EXT2_COMPRBLK_FL))) {
++		if ((ei->i_log2_clu_nblocks < 2)
++		    || (ei->i_log2_clu_nblocks > 5)) {
++			ei->i_flags |= EXT2_ECOMPR_FL;
++			ext2_error (inode->i_sb, "ext2_write_inode",
++			    "inode %lu is corrupted: log2_clu_nblocks=%u",
++			    inode->i_ino, ei->i_log2_clu_nblocks);
++		}
++		assert (ei->i_clu_nblocks == (1 << ei->i_log2_clu_nblocks));
++		assert (ei->i_compr_method < 0x20);
++		raw_inode->i_flags = cpu_to_le32
++		    ((ei->i_flags & 0x807fffff)
++		    | (ei->i_compr_method << 26)
++		    | (ei->i_log2_clu_nblocks << 23));
++	} else
++	{
++		//mw: i_mutex was introduced and disabled again: deadlock with lilo
++		//	mutex_lock(&inode->i_mutex); //mw
++		raw_inode->i_flags = cpu_to_le32	//modify !!!
++		   (ei->i_flags
++		   & 0x807fffff /* no compr meth/size */
++		   & ~(EXT2_COMPR_FL | EXT2_COMPRBLK_FL | EXT2_IMMUTABLE_FL | EXT2_ECOMPR_FL | EXT2_NOCOMPR_FL));
++		//	mutex_unlock(&inode->i_mutex); //mw
++	}
++#else
+ 	raw_inode->i_flags = cpu_to_le32(ei->i_flags);
++#endif
+ 	raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
+ 	raw_inode->i_frag = ei->i_frag_no;
+ 	raw_inode->i_fsize = ei->i_frag_size;
+--- linux-3.2-rc5/fs/ext2/file.c	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/ext2/file.c	2011-12-13 14:22:47.853976220 +0100
+@@ -18,10 +18,25 @@
+  * 	(jj@sunsite.ms.mff.cuni.cz)
+  */
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++#include <linux/fs.h>
++#include <linux/ext2_fs_c.h>
++#include <linux/buffer_head.h>
++#include <asm/uaccess.h>
++#include <linux/kmod.h>
++#include <linux/slab.h>
++#include <linux/swap.h>
++#include <linux/pagemap.h>
++#include <linux/quotaops.h>
++#include <linux/writeback.h>
++#else
+ #include <linux/time.h>
+ #include <linux/pagemap.h>
+ #include <linux/quotaops.h>
+ #include "ext2.h"
++#endif
++
++
+ #include "xattr.h"
+ #include "acl.h"
+ 
+@@ -30,8 +45,39 @@
+  * for a single struct file are closed. Note that different open() calls
+  * for the same file yield different struct file structures.
+  */
++
++/*
++ * pjm 1998-01-09: I would note that this is different from `when no
++ * process has the inode open'.
++ */
+ static int ext2_release_file (struct inode * inode, struct file * filp)
+ {
++#ifdef CONFIG_EXT2_COMPRESS
++	/*
++	 * Now's as good a time as any to clean up wrt compression.
++	 * Previously (before 2.1.4x) we waited until
++	 * ext2_put_inode(), but now the dcache sometimes delays that
++	 * call until umount time.
++	 */
++	//printk(KERN_DEBUG "ext2_release_file: pid=%d, i_ino=%lu, i_count=%d\n", current->pid, inode->i_ino,  atomic_read(&inode->i_count));
++	
++	if (S_ISREG (inode->i_mode)
++	    && inode->i_nlink
++	    && (EXT2_I(inode)->i_compr_flags & EXT2_CLEANUP_FL)) {
++#ifdef EXT2_COMPR_REPORT_PUT
++		printk(KERN_DEBUG "ext2_release_file: pid=%d, i_ino=%lu, i_count=%d\n", current->pid, inode->i_ino,  atomic_read(&inode->i_count));
++#endif
++		/*
++		 * todo: See how the return code of
++		 * ext2_release_file() is used, and decide whether it
++		 * might be appropriate to pass any errors to
++		 * caller.
++		 */
++		//dump_stack();
++		(void) ext2_cleanup_compressed_inode (inode);
++	}
++	
++#endif
+ 	if (filp->f_mode & FMODE_WRITE) {
+ 		mutex_lock(&EXT2_I(inode)->truncate_mutex);
+ 		ext2_discard_reservation(inode);
+@@ -56,6 +102,456 @@ int ext2_fsync(struct file *file, loff_t
+ 	return ret;
+ }
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++struct page_cluster {
++	struct page *	page;
++	loff_t		pos;
++	unsigned	bytes;
++	unsigned long	offset;
++	unsigned char	in_range;
++	const char *	buf;
++};
++
++#define PAGE_IN_RANGE 1
++#define PAGE_KMAPPED  2
++
++
++/**
++ * generic_osync_inode - flush all dirty data for a given inode to disk
++ * @inode: inode to write
++ * @mapping: the address_space that should be flushed
++ * @what:  what to write and wait upon
++ *
++ * This can be called by file_write functions for files which have the
++ * O_SYNC flag set, to flush dirty writes to disk.
++ *
++ * @what is a bitmask, specifying which part of the inode's data should be
++ * written and waited upon.
++ *
++ *    OSYNC_DATA:     i_mapping's dirty data
++ *    OSYNC_METADATA: the buffers at i_mapping->private_list
++ *    OSYNC_INODE:    the inode itself
++ */
++
++/* mw: see generic_osync_inode() in kernel<2.6.30 for orginal method.
++       basically we want all of it:  OSYNC_DATA and OSYNC_METADATA  and OSYNC_INODE */
++int ex_generic_osync_inode(struct inode *inode, struct address_space *mapping) //, int what)
++{
++        int err = 0;
++        int need_write_inode_now = 0;
++        int err2;
++
++        err = filemap_fdatawrite(mapping);
++
++        err2 = sync_mapping_buffers(mapping);
++        if (!err)
++ 		err = err2;
++ 
++        err2 = filemap_fdatawait(mapping);
++        if (!err)
++ 		err = err2;
++ 
++        /* check if data is dirty */
++        spin_lock(&inode->i_lock);
++        if (inode->i_state & I_DIRTY)
++        	need_write_inode_now = 1;
++        spin_unlock(&inode->i_lock);
++
++        if (need_write_inode_now) {
++                err2 = write_inode_now(inode, 1);
++                if (!err)
++                        err = err2;
++        }
++        else
++                inode_sync_wait(inode);
++
++        return err;
++}
++
++
++/*
++ * Write to a file through the page cache.
++ *
++ * We currently put everything into the page cache prior to writing it.
++ * This is not a problem when writing full pages. With partial pages,
++ * however, we first have to read the data into the cache, then
++ * dirty the page, and finally schedule it for writing. Alternatively, we
++ * could write-through just the portion of data that would go into that
++ * page, but that would kill performance for applications that write data
++ * line by line, and it's prone to race conditions.
++ *
++ * Note that this routine doesn't try to keep track of dirty pages. Each
++ * file system has to do this all by itself, unfortunately.
++ *                                                    okir@monad.swb.de
++ */
++ssize_t
++ext2_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
++{
++	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
++	struct inode	*inode = mapping->host;
++	unsigned long	limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur, written, last_index;	   /* last page index */
++	loff_t	pos;
++	long	status;
++	int		err;
++	unsigned	bytes;
++	u32		comprblk_mask=0;
++	struct ext2_inode_info *ei = EXT2_I(inode);
++
++	if (!(ei->i_flags & (EXT2_COMPR_FL|EXT2_COMPRBLK_FL)) 
++#undef DUD //mw: I think this is a buggy bug-fix
++#ifdef DUD
++			|| (count < inode->i_sb->s_blocksize) 
++#endif
++	)		
++	{
++		return do_sync_write(file, buf, count, ppos);
++	}
++
++	if ((ssize_t) count < 0)
++		return -EINVAL;
++
++	if (!access_ok(VERIFY_READ, buf, count))
++		return -EFAULT;
++
++#ifdef EXT2_COMPR_REPORT_MUTEX
++    printk(KERN_DEBUG "EXT2_FILE_WRITE_LOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino );
++#endif
++	mutex_lock(&inode->i_mutex);
++	/* mw:	down_read(&inode->i_alloc_sem); // as used by ocsf2 TLL 02/21/07 
++		was removed with kernel 3.1 */
++	atomic_inc(&inode->i_dio_count);
++
++	pos = *ppos;
++	err = -EINVAL;
++	if (pos < 0)
++		goto out;
++
++	written = 0;
++
++	/* FIXME: this is for backwards compatibility with 2.4 */
++	if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND)
++	{
++		pos = inode->i_size;
++	}
++
++	/*
++	 * Check whether we've reached the file size limit.
++	 */
++	err = -EFBIG;
++
++	if (limit != RLIM_INFINITY) {
++		if (pos >= limit) {
++			send_sig(SIGXFSZ, current, 0);
++			goto out;
++		}
++		if (pos > 0xFFFFFFFFULL || count > limit - (u32)pos) {
++			/* send_sig(SIGXFSZ, current, 0); */
++			count = limit - (u32)pos;
++		}
++	}
++
++	/*
++	 *      LFS rule
++	 */
++	if ( pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) {
++		if (pos >= MAX_NON_LFS) {
++			send_sig(SIGXFSZ, current, 0);
++			goto out;
++		}
++		if (count > MAX_NON_LFS - (u32)pos) {
++			/* send_sig(SIGXFSZ, current, 0); */
++			count = MAX_NON_LFS - (u32)pos;
++		}
++	}
++
++	/*
++	 *	Are we about to exceed the fs block limit ?
++	 *
++	 *	If we have written data it becomes a short write
++	 *	If we have exceeded without writing data we send
++	 *	a signal and give them an EFBIG.
++	 *
++	 *	Linus frestrict idea will clean these up nicely..
++	 */
++	if (!S_ISBLK(inode->i_mode)) {
++		if (pos >= inode->i_sb->s_maxbytes) {
++			if (count || pos > inode->i_sb->s_maxbytes) {
++				send_sig(SIGXFSZ, current, 0);
++				err = -EFBIG;
++				goto out;
++			}
++			/* zero-length writes at ->s_maxbytes are OK */
++		}
++
++		if (pos + count > inode->i_sb->s_maxbytes)
++			count = inode->i_sb->s_maxbytes - pos;
++	} else { 
++		if (bdev_read_only(inode->i_sb->s_bdev)) {
++			err = -EPERM;
++			goto out;
++		}
++		if (pos >= inode->i_size) {
++			if (count || pos > inode->i_size) {
++				err = -ENOSPC;
++				goto out;
++			}
++		}
++
++		if (pos + count > inode->i_size)
++		{
++			count = inode->i_size - pos;			
++		}
++	}
++
++	err = 0;
++	if (count == 0)
++		goto out;
++
++	status  = 0;
++
++	if (file->f_flags & O_DIRECT)
++	{
++		err = -EINVAL;
++		goto out;
++	}
++	/*
++	 *	We must still check for EXT2_ECOMPR_FL, as it may have been
++	 *	set after we got the write permission to this file.
++	 */
++	if ((ei->i_flags & (EXT2_ECOMPR_FL | EXT2_NOCOMPR_FL))   == (EXT2_ECOMPR_FL | 0)) 
++	{
++		err = -EXT2_ECOMPR;
++		goto out;
++	}
++
++	should_remove_suid(file->f_dentry);
++	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
++	mark_inode_dirty_sync(inode);
++
++	if ((pos+count) > inode->i_size)
++		last_index = (pos+count-1) >> PAGE_CACHE_SHIFT;
++	else
++		last_index = (inode->i_size-1) >> PAGE_CACHE_SHIFT;
++
++	comprblk_mask = ei->i_flags | ~EXT2_COMPRBLK_FL;
++
++	//mw: now do it cluster-wise
++	do {
++		//unsigned long index, offset, clusters_page_index0, 
++		unsigned long index, nextClusterFirstByte, cluster_compressed=0;
++		u32  cluster=0;
++		status = -ENOMEM;	/* we'll assign it later anyway */
++
++#ifdef EXT2_COMPRESS_WHEN_CLU	
++		ei->i_flags |= EXT2_COMPRBLK_FL;
++		assert( (file->f_flags & O_DIRECT) == 0);
++		assert(mapping_mapped(inode->i_mapping) == 0);
++#endif			
++
++		index = pos >> PAGE_CACHE_SHIFT;	/*mw: pageindex (start)*/
++		cluster = ext2_page_to_cluster(inode, index);		
++
++		/*
++		 * We decompress the cluster if needed, and write
++		 * the data as normal.  The cluster will be
++		 * compressed again when the inode is cleaned up.
++		 */
++		if ((comprblk_mask == ~(u32)0)
++		    && !(ei->i_flags & EXT2_NOCOMPR_FL)) {
++		      /* AUFFÄLLIG 2*/
++			/* assert (block == pos >> inode->i_sb->s_blocksize_bits); */
++
++			cluster_compressed = ext2_cluster_is_compressed_fn(inode, cluster);
++			if (cluster_compressed < 0) {
++				if (! written)
++					written = cluster_compressed;
++				break;
++			}
++		}
++
++		if (cluster_compressed > 0) {
++			/* Here, decompression take place  */
++			cluster_compressed = ext2_decompress_cluster(inode, cluster);
++			if (cluster_compressed < 0) {
++				if (! written) {
++					written = cluster_compressed;
++				}
++				break;
++			}
++		}
++			
++		nextClusterFirstByte = (ext2_cluster_page0(inode, cluster+1) * PAGE_CACHE_SIZE);	
++		bytes = nextClusterFirstByte - pos;   /*mw: bytes todo in this cluster*/
++		if (bytes > count) {
++			bytes = count; /*mw: if end of data*/
++		}
++	
++#ifdef EXT2_COMPR_DEBUG
++		//assert we stay inside the cluster!
++		{
++			int endpos;
++			int endindex;
++			int endcluster;
++			unsigned long thisClusterFirstByte;
++			int relstart, relend, startblock, endblock;
++			
++			thisClusterFirstByte = (ext2_cluster_page0(inode, cluster) * PAGE_CACHE_SIZE);
++			
++			relstart =   pos - thisClusterFirstByte;
++			relend   =  bytes + relstart;
++			
++			startblock = relstart >> 10; 
++			endblock = relend >> 10;
++			
++					
++			endpos = pos + bytes;
++			//printk("do_sync_write cluster %d: inode:%lu, \t start:%i(%i), end:%i(%i), \t ccount:%d \t tcount:%d\n", cluster , inode->i_ino, relstart, startblock, relend , endblock,  (int)bytes, count);
++	 		endindex = (endpos-1) >> PAGE_CACHE_SHIFT;	/*mw: pageindex (start)*/
++			endcluster = ext2_page_to_cluster(inode, endindex);
++			assert(cluster == endcluster); 
++		}
++#endif
++	
++		//mw: must unlock here, do_sync_write() will aquire the mutex again
++		mutex_unlock(&inode->i_mutex);	
++		
++		//mw: this is pretty clever: we use the generic method now :-)
++		//printk("do_sync_write cluster %d, mapped:%i\n", cluster, mapping_mapped(inode->i_mapping));
++		//status = do_sync_write_nolock(file, buf,  bytes, &pos); //without locking mutex
++		status = do_sync_write(file, buf,  bytes, &pos);  //with locking mutex
++		assert(status>=0);
++		
++		mutex_lock(&inode->i_mutex);
++				
++		written += status;
++		count -= status;
++		buf += status;
++
++#ifdef EXT2_COMPRESS_WHEN_CLU
++		assert (ei->i_flags & EXT2_COMPRBLK_FL);
++		if ((ei->i_flags & EXT2_COMPR_FL)
++		    && (ext2_offset_is_clu_boundary(inode, pos)) ) {
++			
++			if  (mapping_mapped(inode->i_mapping) == 0 ) 
++			/*
++			 * Pierre Peiffer: For file mapped (via mmap, I mean),
++			 * compression will occure when releasing the file.
++			 * We must, in this case, avoid the pages (possibly
++			 * mapped by a process) to be compressed under them.
++			 */
++			{
++				int error;
++				assert(mapping_mapped(inode->i_mapping) == 0);
++				error = ext2_compress_cluster(inode, cluster);
++				/*if (ext2_cluster_is_compressed_fn(inode, cluster))
++					ext2_decompress_cluster(inode, cluster);*/
++				assert(mapping_mapped(inode->i_mapping) == 0);
++				/*
++				 * Actually, raising write_error may be a
++				 * mistake.  For example,
++				 * ext2_cleanup_compressed_cluster() doesn't
++				 * usually return any errors to user.  todo:
++				 * Have a look at ext2_compress_cluster, and
++				 * check whether its errors are such that they
++				 * should be returned to user.  Some of the
++				 * will be, of course, but it might be
++				 * possible for it to return without
++				 * change.
++				 */
++				if (error > 0)
++					comprblk_mask = ~(u32)0;
++			} else {
++#ifdef EXT2_COMPR_REPORT
++				char bdn[BDEVNAME_SIZE];
++				bdevname(inode->i_sb->s_bdev, bdn);
++#endif
++
++				trace_e2c("ext2_file_write: (dev. %s): "
++				    "ino=%ld, cluster=%d: file mapped, does "
++				    "not compress cluster\n",
++				    bdn, inode->i_ino, cluster);
++				ei->i_flags |= EXT2_DIRTY_FL;
++				ei->i_compr_flags |= EXT2_CLEANUP_FL;
++			}
++		}
++#endif
++
++	} while (count);
++	*ppos = pos;
++
++	/*
++	 * For now, when the user asks for O_SYNC, we'll actually
++	 * provide O_DSYNC.
++	 */
++	if (status >= 0) {
++		if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
++			/*if (ei->i_compr_flags & EXT2_OSYNC_INODE) {
++				osync_already = 1;
++			} else {
++				osync_already = 0;
++				ei->i_compr_flags |= EXT2_OSYNC_INODE;
++			}*/
++			/* Should 2nd arg be inode->i_mapping? */
++			status = ex_generic_osync_inode(inode, file->f_mapping
++				/*, OSYNC_METADATA|OSYNC_DATA*/);
++			/*if (osync_already == 0) {
++				ei->i_compr_flags &= ~EXT2_OSYNC_INODE;
++			}*/
++		}
++	}
++
++	err = written ? written : status;
++
++# ifdef EXT2_COMPRESS_WHEN_CLU
++	//mw: ext2_compress_cluster() might remove EXT2_COMPRBLK_FL
++	//if the file does not compress at all. this is NO error: remove next line?
++	//assert (ei->i_flags & EXT2_COMPRBLK_FL);
++	
++	ei->i_flags &= comprblk_mask;
++	if ( (ei->i_flags & EXT2_COMPR_FL)
++	    && (!ext2_offset_is_clu_boundary(inode, pos)) )
++	{
++		ei->i_flags |= EXT2_DIRTY_FL;
++		ei->i_compr_flags |= EXT2_CLEANUP_FL;
++	}
++
++# else
++	if (ei->i_flags & EXT2_COMPR_FL) {
++		ei->i_flags |= EXT2_DIRTY_FL;
++		ei->i_compr_flags |= EXT2_CLEANUP_FL;
++	}
++# endif
++out:
++
++#ifdef EXT2_COMPR_REPORT_MUTEX
++    printk(KERN_DEBUG "EXT2_FILE_WRITE_UNLOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino);
++#endif
++	/* mw:	up_read(&inode->i_alloc_sem); // as used by ocsf2 TLL 02/21/07 
++		was removed with kernel 3.1 */
++	inode_dio_done(inode);
++	mutex_unlock(&inode->i_mutex); 
++	return err;	
++}
++
++/*
++ * Called when an inode is about to be open.
++ * We use this to disallow opening RW large files on 32bit systems if
++ * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
++ * on this flag in sys_open.
++ * Prevent opening compressed file with O_DIRECT.
++ */
++static int ext2_file_open(struct inode * inode, struct file * filp)
++{
++	if ((filp->f_flags & O_DIRECT) && (EXT2_I(inode)->i_flags &
++	    (EXT2_COMPR_FL|EXT2_COMPRBLK_FL)))
++		return -EINVAL;
++	if (!(filp->f_flags & O_LARGEFILE) && inode->i_size > MAX_NON_LFS)
++		return -EFBIG;
++			
++  	return 0;
++ }
++#endif /* CONFIG_EXT2_COMPRESS*/
++
+ /*
+  * We have mostly NULL's here: the current defaults are ok for
+  * the ext2 filesystem.
+@@ -63,7 +559,12 @@ int ext2_fsync(struct file *file, loff_t
+ const struct file_operations ext2_file_operations = {
+ 	.llseek		= generic_file_llseek,
+ 	.read		= do_sync_read,
++#ifdef CONFIG_EXT2_COMPRESS
++        .write          = ext2_file_write,
++#else
+ 	.write		= do_sync_write,
++#endif
++
+ 	.aio_read	= generic_file_aio_read,
+ 	.aio_write	= generic_file_aio_write,
+ 	.unlocked_ioctl = ext2_ioctl,
+@@ -71,7 +572,11 @@ const struct file_operations ext2_file_o
+ 	.compat_ioctl	= ext2_compat_ioctl,
+ #endif
+ 	.mmap		= generic_file_mmap,
++#ifdef CONFIG_EXT2_COMPRESS
++    	.open         	= ext2_file_open,
++#else
+ 	.open		= dquot_file_open,
++#endif
+ 	.release	= ext2_release_file,
+ 	.fsync		= ext2_fsync,
+ 	.splice_read	= generic_file_splice_read,
+--- linux-3.2-rc5/fs/ext2/ioctl.c	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/ext2/ioctl.c	2011-12-13 14:22:47.855976282 +0100
+@@ -7,7 +7,14 @@
+  * Universite Pierre et Marie Curie (Paris VI)
+  */
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++#include <linux/fs.h>
++#include <linux/ext2_fs_c.h>
++#include <linux/kmod.h>
++#include <linux/stat.h>
++#else
+ #include "ext2.h"
++#endif
+ #include <linux/capability.h>
+ #include <linux/time.h>
+ #include <linux/sched.h>
+@@ -17,6 +24,65 @@
+ #include <asm/uaccess.h>
+ 
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++
++#ifndef MIN
++# define MIN(a,b) ((a) < (b) ? (a) : (b))
++#endif
++
++#ifdef CONFIG_GZ_HACK
++static int check_name(struct inode *ino)
++{
++  struct dentry *dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias);
++  if (dentry)
++    if (
++
++           (dentry->d_name.len >= 4) &&
++                 (((dentry->d_name.name[dentry->d_name.len - 2] == 'g')
++                   && (dentry->d_name.name[dentry->d_name.len - 1] == 'z')
++                   && ((dentry->d_name.name[dentry->d_name.len - 3] == '.')
++                       || (dentry->d_name.name[dentry->d_name.len - 4] == '.')))
++
++                  || ((dentry->d_name.name[dentry->d_name.len - 3] == 't')
++                      && (dentry->d_name.name[dentry->d_name.len - 2] == 'g')
++                      && (dentry->d_name.name[dentry->d_name.len - 1] == 'z')
++                      && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
++                      && (dentry->d_name.len >= 5))
++
++                  || ((dentry->d_name.name[dentry->d_name.len - 3] == 'p')
++                      && (dentry->d_name.name[dentry->d_name.len - 2] == 'n')
++                      && (dentry->d_name.name[dentry->d_name.len - 1] == 'g')
++                      && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
++                      && (dentry->d_name.len >= 5))
++
++                  || ((dentry->d_name.name[dentry->d_name.len - 3] == 'j')
++                      && (dentry->d_name.name[dentry->d_name.len - 2] == 'p')
++                      && (dentry->d_name.name[dentry->d_name.len - 1] == 'g')
++                      && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
++                      && (dentry->d_name.len >= 5))
++
++                  || ((dentry->d_name.name[dentry->d_name.len - 3] == 'b')
++                      && (dentry->d_name.name[dentry->d_name.len - 2] == 'z')
++                      && (dentry->d_name.name[dentry->d_name.len - 1] == '2')
++                      && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
++                      && (dentry->d_name.len >= 5))
++
++                  || ((dentry->d_name.name[dentry->d_name.len - 3] == 'm')
++                      && (dentry->d_name.name[dentry->d_name.len - 2] == 'n')
++                      && (dentry->d_name.name[dentry->d_name.len - 1] == 'g')
++                      && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
++                      && (dentry->d_name.len >= 5))
++                  )
++       ) {
++        return 1;
++    }
++  return 0;
++}
++#endif
++#endif
++
++
++
+ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_dentry->d_inode;
+@@ -24,6 +90,10 @@ long ext2_ioctl(struct file *filp, unsig
+ 	unsigned int flags;
+ 	unsigned short rsv_window_size;
+ 	int ret;
++#ifdef CONFIG_EXT2_COMPRESS
++	unsigned long datum;
++	int err;
++#endif
+ 
+ 	ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+ 
+@@ -75,7 +145,127 @@ long ext2_ioctl(struct file *filp, unsig
+ 		}
+ 
+ 		flags = flags & EXT2_FL_USER_MODIFIABLE;
++#ifdef CONFIG_EXT2_COMPRESS
++		if (S_ISREG (inode->i_mode) || S_ISDIR (inode->i_mode)) {		
++			
++			/* pjm 1998-01-14: In previous versions of
++			     e2compr, the kernel forbade raising
++			     EXT2_ECOMPR_FL from userspace.  I can't
++			     think of any purpose for forbidding this,
++			     and I find it useful to raise
++			     EXT2_ECOMPR_FL for testing purposes, so
++			     I've removed the forbidding code. */
++			if (S_ISREG (inode->i_mode)
++			    && (EXT2_NOCOMPR_FL
++				& (flags ^ ei->i_flags))) {   // mw hint:  ^ is a (excluisive OR)
++				/* NOCOMPR_FL can only be changed if
++				   nobody else has the file opened.  */
++				/* pjm 1998-02-16: inode->i_count is
++				   useless to us because only dentries
++				   use inodes now.  Unfortunately,
++				   there isn't an easy way of finding
++				   the equivalent.  We'd have to go
++				   through all dentries using the
++				   inode, and sum their d_count
++				   values.  Rather than do that, I'd
++				   rather get rid of the exclusion
++				   constraint.  todo. */				
++				//printk("i_count: %i\n", atomic_read(&inode->i_count));
++				//if (atomic_read(&inode->i_count) > 1)
++				//if (0)
++				if (ext2_get_dcount(inode) > 1)
++				{
++					mutex_unlock(&inode->i_mutex); /*mw*/
++					return -ETXTBSY;
++				}
++				else {
++					/* pjm 970429: Discarding
++					     cached pages is not very
++					     clean, but should work. */
++					/* pjm 980114: Not quite.  We
++					     should also sync any
++					     mappings to buffers first.
++					     This isn't very important,
++					     as none of the current
++					     e2compr programs can
++					     trigger this, but todo. */
++					invalidate_remote_inode (inode);
++				}
++			}
++
++			if (EXT2_COMPR_FL
++			    & (flags ^ ei->i_flags)) {
++				if (flags & EXT2_COMPR_FL) {
++					if (ei->i_flags & EXT2_COMPRBLK_FL) {
++						/* There shouldn't actually be any
++						   compressed blocks, AFAIK.  However,
++						   this is still possible because sometimes
++						   COMPRBLK gets raised just to stop 
++						   us changing cluster size at the wrong
++						   time.
++
++						   todo: Call a function that just
++						   checks that there are not compressed
++						   clusters, and print a warning if any are
++						   found. */
++					} else {
++						int bits = MIN(EXT2_DEFAULT_LOG2_CLU_NBLOCKS,
++							       (EXT2_LOG2_MAX_CLUSTER_BYTES
++								- inode->i_sb->s_blocksize_bits));
++
++						ei->i_log2_clu_nblocks = bits;
++						ei->i_clu_nblocks = 1 << bits;
++					}
++					ei->i_compr_method = EXT2_DEFAULT_COMPR_METHOD;
++					if (S_ISREG (inode->i_mode)) {
++						//compress	
++#ifdef CONFIG_GZ_HACK
++						/*  mw: check for .gz-files and similar
++						 *  I think this is the most clever place for
++						 * rejecting files. They remain regular, uncompressed
++						 * files and though can be read bypassing all 
++						 * compression stuff (= fast) :-). And it seems to save 
++						 * space... somehow */						
++						if (check_name (inode))
++						{
++							//printk("non-compressable file extension\n");
++							mutex_unlock(&inode->i_mutex);
++							return 0;
++						}
++#endif
++						//set flags to trigger compression later on
++						flags |= EXT2_DIRTY_FL;
++						ei->i_compr_flags |= EXT2_CLEANUP_FL;
++					}
++				} else if (S_ISREG (inode->i_mode)) {
++					if (ei->i_flags & EXT2_COMPRBLK_FL) {
++						int err;
++						
++						if (ext2_get_dcount(inode) > 1){
++							mutex_unlock(&inode->i_mutex); //mw
++							return -ETXTBSY;
++						}
++						err = ext2_decompress_inode(inode);
++						if (err)
++						{
++				            		mutex_unlock(&inode->i_mutex); //mw
++							return err;
++						}
++					}
++					ei->i_flags &= ~EXT2_DIRTY_FL;
++					ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
++				}
++			}
++		}
++#endif
+ 		flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
++#ifdef CONFIG_EXT2_COMPRESS
++		/* bug fix: scrub 'B' flag from uncompressed files TLL 02/28/07 */
++		if (!(flags & EXT2_COMPR_FL) && (flags & EXT2_COMPRBLK_FL) )
++		{ 
++				flags &= ~EXT2_COMPRBLK_FL;
++		}
++#endif
+ 		ei->i_flags = flags;
+ 		mutex_unlock(&inode->i_mutex);
+ 
+@@ -148,6 +338,184 @@ setflags_out:
+ 		mnt_drop_write(filp->f_path.mnt);
+ 		return 0;
+ 	}
++#ifdef CONFIG_EXT2_COMPRESS
++	case EXT2_IOC_GETCOMPRMETHOD:	/* Result means nothing if COMPR_FL is not set */
++ 		return put_user (ei->i_compr_method, (long *) arg);
++	case EXT2_IOC_SETCOMPRMETHOD:
++		if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
++			return -EPERM;
++		if (IS_RDONLY (inode))
++			return -EROFS;
++		if (get_user (datum, (long*) arg))
++			return -EFAULT;
++		if (!S_ISREG (inode->i_mode) && !S_ISDIR (inode->i_mode)) 
++			return -ENOSYS;
++		/* todo: Allow the below, but set initial value of
++		   i_compr_meth at read_inode() time (using default if
++		   !/) instead of +c time.  Same for cluster
++		   size. */
++		if ((unsigned) datum >= EXT2_N_METHODS)
++			return -EINVAL;
++		if (ei->i_compr_method != datum) {
++			if ((ei->i_compr_method == EXT2_NEVER_METH)
++			    && (ei->i_flags & EXT2_COMPR_FL))
++				return -EPERM;
++			/* If the previous method was `defer' then
++			   take a look at all uncompressed clusters
++			   and try to compress them.  (pjm 1997-04-16) */
++			if ((ei->i_compr_method == EXT2_DEFER_METH)
++			    && S_ISREG (inode->i_mode)) {
++				ei->i_flags |= EXT2_DIRTY_FL;
++				ei->i_compr_flags |= EXT2_CLEANUP_FL;
++			}
++			if ((datum == EXT2_NEVER_METH)
++			    && S_ISREG (inode->i_mode)) {
++				//printk("SETCOMPR\n");
++				if ((ei->i_flags & EXT2_COMPRBLK_FL))
++				{
++					/*mw*/
++					mutex_lock(&inode->i_mutex);
++					if (ext2_get_dcount(inode) > 1){
++						mutex_unlock(&inode->i_mutex); /*mw*/
++						return -ETXTBSY;
++					}
++					err = ext2_decompress_inode(inode);
++					mutex_unlock(&inode->i_mutex);
++					if ( err < 0)
++						return err;
++				}
++				ei->i_flags &= ~EXT2_DIRTY_FL;
++				ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
++			}
++			ei->i_compr_method = datum;
++			inode->i_ctime = CURRENT_TIME;
++			mark_inode_dirty(inode);
++		}
++#ifdef CONFIG_KMOD
++		if (!ext2_algorithm_table[ext2_method_table[datum].alg].avail) {
++			char str[32];
++
++			sprintf(str, "ext2-compr-%s", ext2_algorithm_table[ext2_method_table[datum].alg].name);
++			request_module(str);
++		}
++#endif
++		datum = ((datum < EXT2_N_METHODS)
++			 && (ext2_algorithm_table[ext2_method_table[datum].alg].avail));
++		return put_user(datum, (long *)arg);
++
++	case EXT2_IOC_GETCLUSTERBIT:
++		if (get_user (datum, (long*) arg))
++			return -EFAULT;
++		if (!S_ISREG (inode->i_mode))
++			return -ENOSYS;
++		/* We don't do `down(&inode->i_sem)' here because
++		   there's no way for userspace to do the
++		   corresponding up().  Userspace must rely on
++		   EXT2_NOCOMPR_FL if it needs to lock. */
++		err = ext2_cluster_is_compressed (inode, datum);
++		if (err < 0)
++			return err;
++		return put_user ((err ? 1 : 0),
++				 (long *) arg);
++
++	case EXT2_IOC_RECOGNIZE_COMPRESSED:
++		if (get_user (datum, (long*) arg))
++			return -EFAULT;
++		if (!S_ISREG (inode->i_mode))
++			return -ENOSYS;
++		if (IS_RDONLY (inode))
++			return -EROFS;
++		return ext2_recognize_compressed (inode, datum);
++		
++	case EXT2_IOC_GETCLUSTERSIZE:
++		/* Result means nothing if COMPR_FL is not set (until
++                   SETCLUSTERSIZE w/o COMPR_FL is implemented;
++                   todo). */
++		if (!S_ISREG (inode->i_mode)
++		    && !S_ISDIR (inode->i_mode)) 
++			return -ENOSYS;
++		return put_user (ei->i_clu_nblocks, (long *) arg);
++
++	case EXT2_IOC_GETFIRSTCLUSTERSIZE:
++		/* Result means nothing if COMPR_FL is not set (until
++                   SETCLUSTERSIZE w/o COMPR_FL is implemented;
++                   todo). */
++		if (!S_ISREG (inode->i_mode)
++		    && !S_ISDIR (inode->i_mode)) 
++			return -ENOSYS;
++		return put_user (ext2_first_cluster_nblocks(inode), (long *) arg);
++
++	case EXT2_IOC_SETCLUSTERSIZE:
++		if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
++			return -EPERM;
++		if (IS_RDONLY (inode))
++			return -EROFS;
++		if (get_user (datum, (long *) arg))
++			return -EFAULT;
++		if (!S_ISREG (inode->i_mode)
++		    && !S_ISDIR (inode->i_mode)) 
++			return -ENOSYS;
++
++		/* These are the only possible cluster sizes.  The
++		   cluster size must be a power of two so that
++		   clusters don't straddle address (aka indirect)
++		   blocks.  At the moment, the upper limit is constrained
++		   by how much memory is allocated for de/compression.
++		   Also, the gzip algorithms have some optimisations
++		   that assume tht the input is no more than 32KB,
++		   and in compress.c we would need to zero more bits
++		   of head->holemap.  (In previous releases, the file
++		   format was limited to 32 blocks and under 64KB.) */
++// #if EXT2_MAX_CLUSTER_BLOCKS > 32 || EXT2_MAX_CLUSTER_NBYTES > 32768
++// # error "This code not updated for cluster size yet."
++// #endif
++		switch (datum) {
++		case (1 << 2): datum = 2; break;
++		case (1 << 3): datum = 3; break;
++		case (1 << 4): datum = 4; break;
++		case (1 << 5): datum = 5; break;
++		default: return -EINVAL;
++		}
++
++		assert (ei->i_clu_nblocks == (1 << ei->i_log2_clu_nblocks));
++		if (datum == ei->i_log2_clu_nblocks)
++			return 0;
++
++		if (ei->i_flags & EXT2_ECOMPR_FL)
++			return -EPERM;
++		if (!(ei->i_flags & EXT2_COMPR_FL))
++			return -ENOSYS;
++
++		/* We currently lack a mechanism to change the cluster
++		   size if there are already some compressed clusters.
++		   The compression must be done in userspace
++		   (e.g. with the e2compress program) instead.  */
++		if (ei->i_flags & EXT2_COMPRBLK_FL)
++			return -ENOSYS;
++
++		if (datum + inode->i_sb->s_blocksize_bits
++		    > EXT2_LOG2_MAX_CLUSTER_BYTES)
++			return -EINVAL;
++
++		ei->i_log2_clu_nblocks = datum;
++		ei->i_clu_nblocks = 1 << datum;
++		inode->i_ctime = CURRENT_TIME;
++		mark_inode_dirty(inode);
++		return 0;
++
++	case EXT2_IOC_GETCOMPRRATIO:		
++		if (!S_ISREG (inode->i_mode)) 
++			return -ENOSYS;
++		if (ei->i_flags & EXT2_ECOMPR_FL)
++			return -EPERM;
++		if ((long) (datum = ext2_count_blocks (inode)) < 0)
++			return datum;
++		if ((err = put_user ((long) datum, (long*) arg)))
++			return err;
++		return put_user ((long) inode->i_blocks, (long*) arg + 1);
++		
++		
++#endif
+ 	default:
+ 		return -ENOTTY;
+ 	}
+--- linux-3.2-rc5/fs/ext2/ext2.h	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/ext2/ext2.h	2011-12-13 14:22:47.855976282 +0100
+@@ -37,6 +37,12 @@ struct ext2_inode_info {
+ 	struct ext2_block_alloc_info *i_block_alloc_info;
+ 
+ 	__u32	i_dir_start_lookup;
++#ifdef CONFIG_EXT2_COMPRESS
++	__u8    i_log2_clu_nblocks;
++	__u8    i_clu_nblocks;
++	__u8    i_compr_method;
++	__u8    i_compr_flags;
++#endif
+ #ifdef CONFIG_EXT2_FS_XATTR
+ 	/*
+ 	 * Extended attributes can be read independently of the main file
+@@ -126,6 +132,7 @@ extern void ext2_set_inode_flags(struct
+ extern void ext2_get_inode_flags(struct ext2_inode_info *);
+ extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ 		       u64 start, u64 len);
++extern void ext2_truncate_blocks(struct inode *inode, loff_t offset);
+ 
+ /* ioctl.c */
+ extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
+--- linux-3.2-rc5/include/linux/ext2_fs.h	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/include/linux/ext2_fs.h	2011-12-13 14:22:47.856976313 +0100
+@@ -87,6 +87,10 @@ static inline struct ext2_sb_info *EXT2_
+ /*
+  * Macro-instructions used to manage several block sizes
+  */
++#define EXT2_GRAIN_SIZE 		1024
++/* Minimum allocation unit.  This is used in fs/ext2/compress.c to
++   check compr_len validity wrt (uncompressed) len.  This definition
++   will probably need to be changed when fragments are implemented. */
+ #define EXT2_MIN_BLOCK_SIZE		1024
+ #define	EXT2_MAX_BLOCK_SIZE		4096
+ #define EXT2_MIN_BLOCK_LOG_SIZE		  10
+@@ -178,9 +182,10 @@ struct ext2_group_desc
+ #define EXT2_NODUMP_FL			FS_NODUMP_FL	/* do not dump file */
+ #define EXT2_NOATIME_FL			FS_NOATIME_FL	/* do not update atime */
+ /* Reserved for compression usage... */
+-#define EXT2_DIRTY_FL			FS_DIRTY_FL
++#define EXT2_DIRTY_FL			FS_DIRTY_FL     /* Needs compressing; see Readme.e2compr */
+ #define EXT2_COMPRBLK_FL		FS_COMPRBLK_FL	/* One or more compressed clusters */
+ #define EXT2_NOCOMP_FL			FS_NOCOMP_FL	/* Don't compress */
++#define EXT2_NOCOMPR_FL                 FS_NOCOMP_FL    /* Access raw data */
+ #define EXT2_ECOMPR_FL			FS_ECOMPR_FL	/* Compression error */
+ /* End compression flags --- maybe not all used */	
+ #define EXT2_BTREE_FL			FS_BTREE_FL	/* btree format dir */
+@@ -342,6 +347,7 @@ struct ext2_inode {
+ #define EXT2_MOUNT_MINIX_DF		0x000080  /* Mimics the Minix statfs */
+ #define EXT2_MOUNT_NOBH			0x000100  /* No buffer_heads */
+ #define EXT2_MOUNT_NO_UID32		0x000200  /* Disable 32-bit UIDs */
++#define EXT2_MOUNT_FORCE_COMPAT		0x000400  /* Mount despite incompatibilities */
+ #define EXT2_MOUNT_XATTR_USER		0x004000  /* Extended user attributes */
+ #define EXT2_MOUNT_POSIX_ACL		0x008000  /* POSIX Access Control Lists */
+ #define EXT2_MOUNT_XIP			0x010000  /* Execute in place */
+@@ -507,8 +513,14 @@ struct ext2_super_block {
+ #define EXT2_FEATURE_INCOMPAT_ANY		0xffffffff
+ 
+ #define EXT2_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
++#ifdef CONFIG_EXT2_COMPRESS
++#define EXT2_FEATURE_INCOMPAT_SUPP	(EXT2_FEATURE_INCOMPAT_COMPRESSION| \
++					 EXT2_FEATURE_INCOMPAT_FILETYPE| \
++					 EXT2_FEATURE_INCOMPAT_META_BG)
++#else
+ #define EXT2_FEATURE_INCOMPAT_SUPP	(EXT2_FEATURE_INCOMPAT_FILETYPE| \
+ 					 EXT2_FEATURE_INCOMPAT_META_BG)
++#endif
+ #define EXT2_FEATURE_RO_COMPAT_SUPP	(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+ 					 EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
+ 					 EXT2_FEATURE_RO_COMPAT_BTREE_DIR)
+@@ -588,4 +600,16 @@ enum {
+ 					 ~EXT2_DIR_ROUND)
+ #define EXT2_MAX_REC_LEN		((1<<16)-1)
+ 
++#ifndef __KERNEL__
++/* This simplifies things for user programs (notably e2fsprogs) that
++   must compile whether or not <linux/ext2_fs_c.h> is present, but
++   would prefer to include it.  Presumably the file is present if the
++   user has this version of ext2_fs.h. */
++
++# /* Do not remove this comment. */ include <linux/ext2_fs_c.h>
++
++/* The comment between `#' and `include' prevents mkdep from generating
++   a dependency on ext2_fs_c.h. */
++#endif
++
+ #endif	/* _LINUX_EXT2_FS_H */
+--- linux-3.2-rc5/fs/fcntl.c	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/fs/fcntl.c	2011-12-13 14:22:47.857976344 +0100
+@@ -25,6 +25,12 @@
+ #include <asm/siginfo.h>
+ #include <asm/uaccess.h>
+ 
++#ifdef CONFIG_EXT2_COMPRESS	
++//mw: deny O_DIRECT on file with compression
++#include <linux/ext2_fs.h>
++#include "ext2/ext2.h"
++#endif
++
+ void set_close_on_exec(unsigned int fd, int flag)
+ {
+ 	struct files_struct *files = current->files;
+@@ -171,6 +177,16 @@ static int setfl(int fd, struct file * f
+ 		if (!filp->f_mapping || !filp->f_mapping->a_ops ||
+ 			!filp->f_mapping->a_ops->direct_IO)
+ 				return -EINVAL;
++
++#ifdef CONFIG_EXT2_COMPRESS			
++		//mw: if we have a compressed ext2 file: deny!
++		//    TODO: maybe check fs-type first!
++		//assert(!(EXT2_I(inode)->i_flags & (EXT2_COMPR_FL|EXT2_COMPRBLK_FL)));
++		if (EXT2_I(inode)->i_flags & (EXT2_COMPR_FL|EXT2_COMPRBLK_FL))
++		{
++			return -EINVAL;
++		}
++#endif		
+ 	}
+ 
+ 	if (filp->f_op && filp->f_op->check_flags)
+--- linux-3.2-rc5/mm/truncate.c	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/mm/truncate.c	2011-12-13 14:22:47.858976376 +0100
+@@ -22,6 +22,9 @@
+ #include <linux/cleancache.h>
+ #include "internal.h"
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++#include <linux/ext2_fs_c.h>
++#endif
+ 
+ /**
+  * do_invalidatepage - invalidate part or all of a page
+@@ -551,6 +554,11 @@ void truncate_pagecache(struct inode *in
+ 	 * unmap_mapping_range call must be made for correctness.
+ 	 */
+ 	unmap_mapping_range(mapping, holebegin, 0, 1);
++#ifdef CONFIG_EXT2_COMPRESS
++		if ((inode->i_op && inode->i_op->truncate) &&
++		    ((strcmp(inode->i_sb->s_type->name, "ext2") != 0) ||
++		    (!(EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL))))
++#endif
+ 	truncate_inode_pages(mapping, newsize);
+ 	unmap_mapping_range(mapping, holebegin, 0, 1);
+ }
+--- linux-3.2-rc5/mm/swapfile.c	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/mm/swapfile.c	2011-12-13 14:22:47.859976408 +0100
+@@ -31,6 +31,10 @@
+ #include <linux/memcontrol.h>
+ #include <linux/poll.h>
+ #include <linux/oom.h>
++#ifdef CONFIG_EXT2_COMPRESS
++#include <linux/ext2_fs_c.h>
++#endif
++
+ 
+ #include <asm/pgtable.h>
+ #include <asm/tlbflush.h>
+@@ -2056,6 +2060,24 @@ SYSCALL_DEFINE2(swapon, const char __use
+ 	}
+ 
+ 	inode = mapping->host;
++
++#ifdef CONFIG_EXT2_COMPRESS
++		/*
++		 * Swapping not supported for e2compressed files.
++		 * (Actually, this code is pretty useless because we
++		 * should get an error later anyway because of the
++		 * holes.)  Yes, this is pretty horrible code... I'll
++		 * improve it later.
++		 */
++		if ((strcmp(inode->i_sb->s_type->name, "ext2") == 0)
++		    && (EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL))
++		{
++			printk("Assertion: Error NO swap SWAP implemented!\n");
++			error = -EINVAL;
++			goto bad_swap;
++		}
++#endif
++
+ 	/* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
+ 	error = claim_swapfile(p, inode);
+ 	if (unlikely(error))
+--- linux-3.2-rc5/mm/filemap.c	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/mm/filemap.c	2011-12-13 14:22:47.860976440 +0100
+@@ -43,6 +43,10 @@
+ 
+ #include <asm/mman.h>
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++# include <linux/ext2_fs_c.h>
++#endif
++
+ /*
+  * Shared mappings implemented 30.11.1994. It's not fully working yet,
+  * though.
+@@ -278,7 +282,19 @@ int filemap_fdatawait_range(struct addre
+ 			PAGECACHE_TAG_WRITEBACK,
+ 			min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
+ 		unsigned i;
++#ifdef CONFIG_EXT2_COMPRESS
++/*
++ * I'm not sure that this is right.  It has been reworked considerably since
++ * 2.6.5. - whitpa
++ */
++		struct inode *inode = mapping->host;
++        //printk("wait_on_page_writeback_range\n");
+ 
++		if ((strcmp(inode->i_sb->s_type->name, "ext2") != 0)
++			    || (atomic_read(&inode->i_mutex.count) > 0)
++			    || (EXT2_I(inode)->i_compr_flags &
++				EXT2_OSYNC_INODE))
++#endif
+ 		for (i = 0; i < nr_pages; i++) {
+ 			struct page *page = pvec.pages[i];
+ 
+@@ -1184,6 +1200,15 @@ page_ok:
+ 		}
+ 		nr = nr - offset;
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++		lock_page(page);
++		//check again: after locking still uptodate?
++		if(!PageUptodate(page)){
++			unlock_page(page);
++			goto page_not_up_to_date;
++		}				
++#endif
++
+ 		/* If users can be writing to this page using arbitrary
+ 		 * virtual addresses, take care about potential aliasing
+ 		 * before reading the page on the kernel side.
+@@ -1215,6 +1240,10 @@ page_ok:
+ 		offset &= ~PAGE_CACHE_MASK;
+ 		prev_offset = offset;
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++		unlock_page(page);
++#endif
++
+ 		page_cache_release(page);
+ 		if (ret == nr && desc->count)
+ 			continue;
+@@ -1224,7 +1253,12 @@ page_not_up_to_date:
+ 		/* Get exclusive access to the page ... */
+ 		error = lock_page_killable(page);
+ 		if (unlikely(error))
++		{
++			printk("Readpage Error: mw: page locking failed with code: %i\n", error);
++			printk("Readpage Error: mw: might happen as page was locked 'killable'\n");
++			printk("Readpage Error: mw: was reading app killed?\n");
+ 			goto readpage_error;
++		}
+ 
+ page_not_up_to_date_locked:
+ 		/* Did it get truncated before we got the lock? */
+@@ -1255,13 +1289,17 @@ readpage:
+ 				page_cache_release(page);
+ 				goto find_page;
+ 			}
++			printk("Readpage Error: fs-specific readpage failed with code: %i\n", error);
+ 			goto readpage_error;
+ 		}
+ 
+ 		if (!PageUptodate(page)) {
+ 			error = lock_page_killable(page);
+ 			if (unlikely(error))
++			{
++				printk("Readpage Error: page was not uptodate after read. page locking failed with code: %i\n", error);
+ 				goto readpage_error;
++			}
+ 			if (!PageUptodate(page)) {
+ 				if (page->mapping == NULL) {
+ 					/*
+@@ -1274,6 +1312,7 @@ readpage:
+ 				unlock_page(page);
+ 				shrink_readahead_size_eio(filp, ra);
+ 				error = -EIO;
++				printk("Readpage Error: page was not uptodate after read AND page locked. failed with code: %i\n", error);
+ 				goto readpage_error;
+ 			}
+ 			unlock_page(page);
+@@ -1285,6 +1324,7 @@ readpage_error:
+ 		/* UHHUH! A synchronous read error occurred. Report it */
+ 		desc->error = error;
+ 		page_cache_release(page);
++		printk("Readpage Error\n");
+ 		goto out;
+ 
+ no_cached_page:
+--- linux-3.2-rc5/mm/page_alloc.c	2011-12-10 00:09:32.000000000 +0100
++++ linux-3.2-rc5-e2c/mm/page_alloc.c	2011-12-13 14:22:47.863976534 +0100
+@@ -1733,6 +1733,8 @@ this_zone_full:
+ 	}
+ 	return page;
+ }
++/*mw: needed to build ext2 /w e2compr as module */
++EXPORT_SYMBOL(__pagevec_free);
+ 
+ /*
+  * Large machines with many possible nodes should not always dump per-node
diff --git a/3.2.34/linux-3.2.33-zfs.patch b/3.2.34/linux-3.2.33-zfs.patch
new file mode 100644
index 0000000..022b674
--- /dev/null
+++ b/3.2.34/linux-3.2.33-zfs.patch
@@ -0,0 +1,201830 @@
+diff -uNr linux-3.2.33-go.orig/fs/Kconfig linux-3.2.33-go/fs/Kconfig
+--- linux-3.2.33-go.orig/fs/Kconfig	2012-11-16 23:15:05.844494007 +0100
++++ linux-3.2.33-go/fs/Kconfig	2012-11-16 23:25:34.395038807 +0100
+@@ -5,6 +5,7 @@
+ menu "File systems"
+ 
+ if BLOCK
++source "fs/zfs/Kconfig"
+ 
+ source "fs/ext2/Kconfig"
+ source "fs/ext3/Kconfig"
+diff -uNr linux-3.2.33-go.orig/fs/Makefile linux-3.2.33-go/fs/Makefile
+--- linux-3.2.33-go.orig/fs/Makefile	2012-11-16 23:15:05.654496192 +0100
++++ linux-3.2.33-go/fs/Makefile	2012-11-16 23:25:34.403038714 +0100
+@@ -18,6 +18,7 @@
+ else
+ obj-y +=	no-block.o
+ endif
++obj-$(CONFIG_ZFS) += zfs/
+ 
+ obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
+ obj-y				+= notify/
+diff -uNr linux-3.2.33-go.orig/fs/zfs/avl/avl.c linux-3.2.33-go/fs/zfs/avl/avl.c
+--- linux-3.2.33-go.orig/fs/zfs/avl/avl.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/avl/avl.c	2012-11-16 23:25:34.355039267 +0100
+@@ -0,0 +1,1057 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++/*
++ * AVL - generic AVL tree implementation for kernel use
++ *
++ * A complete description of AVL trees can be found in many CS textbooks.
++ *
++ * Here is a very brief overview. An AVL tree is a binary search tree that is
++ * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
++ * any given node, the left and right subtrees are allowed to differ in height
++ * by at most 1 level.
++ *
++ * This relaxation from a perfectly balanced binary tree allows doing
++ * insertion and deletion relatively efficiently. Searching the tree is
++ * still a fast operation, roughly O(log(N)).
++ *
++ * The key to insertion and deletion is a set of tree maniuplations called
++ * rotations, which bring unbalanced subtrees back into the semi-balanced state.
++ *
++ * This implementation of AVL trees has the following peculiarities:
++ *
++ *	- The AVL specific data structures are physically embedded as fields
++ *	  in the "using" data structures.  To maintain generality the code
++ *	  must constantly translate between "avl_node_t *" and containing
++ *	  data structure "void *"s by adding/subracting the avl_offset.
++ *
++ *	- Since the AVL data is always embedded in other structures, there is
++ *	  no locking or memory allocation in the AVL routines. This must be
++ *	  provided for by the enclosing data structure's semantics. Typically,
++ *	  avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
++ *	  exclusive write lock. Other operations require a read lock.
++ *
++ *      - The implementation uses iteration instead of explicit recursion,
++ *	  since it is intended to run on limited size kernel stacks. Since
++ *	  there is no recursion stack present to move "up" in the tree,
++ *	  there is an explicit "parent" link in the avl_node_t.
++ *
++ *      - The left/right children pointers of a node are in an array.
++ *	  In the code, variables (instead of constants) are used to represent
++ *	  left and right indices.  The implementation is written as if it only
++ *	  dealt with left handed manipulations.  By changing the value assigned
++ *	  to "left", the code also works for right handed trees.  The
++ *	  following variables/terms are frequently used:
++ *
++ *		int left;	// 0 when dealing with left children,
++ *				// 1 for dealing with right children
++ *
++ *		int left_heavy;	// -1 when left subtree is taller at some node,
++ *				// +1 when right subtree is taller
++ *
++ *		int right;	// will be the opposite of left (0 or 1)
++ *		int right_heavy;// will be the opposite of left_heavy (-1 or 1)
++ *
++ *		int direction;  // 0 for "<" (ie. left child); 1 for ">" (right)
++ *
++ *	  Though it is a little more confusing to read the code, the approach
++ *	  allows using half as much code (and hence cache footprint) for tree
++ *	  manipulations and eliminates many conditional branches.
++ *
++ *	- The avl_index_t is an opaque "cookie" used to find nodes at or
++ *	  adjacent to where a new value would be inserted in the tree. The value
++ *	  is a modified "avl_node_t *".  The bottom bit (normally 0 for a
++ *	  pointer) is set to indicate if that the new node has a value greater
++ *	  than the value of the indicated "avl_node_t *".
++ */
++
++#include <sys/types.h>
++#include <sys/param.h>
++#include <sys/debug.h>
++#include <sys/avl.h>
++#include <sys/cmn_err.h>
++
++/*
++ * Small arrays to translate between balance (or diff) values and child indeces.
++ *
++ * Code that deals with binary tree data structures will randomly use
++ * left and right children when examining a tree.  C "if()" statements
++ * which evaluate randomly suffer from very poor hardware branch prediction.
++ * In this code we avoid some of the branch mispredictions by using the
++ * following translation arrays. They replace random branches with an
++ * additional memory reference. Since the translation arrays are both very
++ * small the data should remain efficiently in cache.
++ */
++static const int  avl_child2balance[2]	= {-1, 1};
++static const int  avl_balance2child[]	= {0, 0, 1};
++
++
++/*
++ * Walk from one node to the previous valued node (ie. an infix walk
++ * towards the left). At any given node we do one of 2 things:
++ *
++ * - If there is a left child, go to it, then to it's rightmost descendant.
++ *
++ * - otherwise we return thru parent nodes until we've come from a right child.
++ *
++ * Return Value:
++ * NULL - if at the end of the nodes
++ * otherwise next node
++ */
++void *
++avl_walk(avl_tree_t *tree, void	*oldnode, int left)
++{
++	size_t off = tree->avl_offset;
++	avl_node_t *node = AVL_DATA2NODE(oldnode, off);
++	int right = 1 - left;
++	int was_child;
++
++
++	/*
++	 * nowhere to walk to if tree is empty
++	 */
++	if (node == NULL)
++		return (NULL);
++
++	/*
++	 * Visit the previous valued node. There are two possibilities:
++	 *
++	 * If this node has a left child, go down one left, then all
++	 * the way right.
++	 */
++	if (node->avl_child[left] != NULL) {
++		for (node = node->avl_child[left];
++		    node->avl_child[right] != NULL;
++		    node = node->avl_child[right])
++			;
++	/*
++	 * Otherwise, return thru left children as far as we can.
++	 */
++	} else {
++		for (;;) {
++			was_child = AVL_XCHILD(node);
++			node = AVL_XPARENT(node);
++			if (node == NULL)
++				return (NULL);
++			if (was_child == right)
++				break;
++		}
++	}
++
++	return (AVL_NODE2DATA(node, off));
++}
++
++/*
++ * Return the lowest valued node in a tree or NULL.
++ * (leftmost child from root of tree)
++ */
++void *
++avl_first(avl_tree_t *tree)
++{
++	avl_node_t *node;
++	avl_node_t *prev = NULL;
++	size_t off = tree->avl_offset;
++
++	for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
++		prev = node;
++
++	if (prev != NULL)
++		return (AVL_NODE2DATA(prev, off));
++	return (NULL);
++}
++
++/*
++ * Return the highest valued node in a tree or NULL.
++ * (rightmost child from root of tree)
++ */
++void *
++avl_last(avl_tree_t *tree)
++{
++	avl_node_t *node;
++	avl_node_t *prev = NULL;
++	size_t off = tree->avl_offset;
++
++	for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
++		prev = node;
++
++	if (prev != NULL)
++		return (AVL_NODE2DATA(prev, off));
++	return (NULL);
++}
++
++/*
++ * Access the node immediately before or after an insertion point.
++ *
++ * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
++ *
++ * Return value:
++ *	NULL: no node in the given direction
++ *	"void *"  of the found tree node
++ */
++void *
++avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
++{
++	int child = AVL_INDEX2CHILD(where);
++	avl_node_t *node = AVL_INDEX2NODE(where);
++	void *data;
++	size_t off = tree->avl_offset;
++
++	if (node == NULL) {
++		ASSERT(tree->avl_root == NULL);
++		return (NULL);
++	}
++	data = AVL_NODE2DATA(node, off);
++	if (child != direction)
++		return (data);
++
++	return (avl_walk(tree, data, direction));
++}
++
++
++/*
++ * Search for the node which contains "value".  The algorithm is a
++ * simple binary tree search.
++ *
++ * return value:
++ *	NULL: the value is not in the AVL tree
++ *		*where (if not NULL)  is set to indicate the insertion point
++ *	"void *"  of the found tree node
++ */
++void *
++avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
++{
++	avl_node_t *node;
++	avl_node_t *prev = NULL;
++	int child = 0;
++	int diff;
++	size_t off = tree->avl_offset;
++
++	for (node = tree->avl_root; node != NULL;
++	    node = node->avl_child[child]) {
++
++		prev = node;
++
++		diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
++		ASSERT(-1 <= diff && diff <= 1);
++		if (diff == 0) {
++#ifdef DEBUG
++			if (where != NULL)
++				*where = 0;
++#endif
++			return (AVL_NODE2DATA(node, off));
++		}
++		child = avl_balance2child[1 + diff];
++
++	}
++
++	if (where != NULL)
++		*where = AVL_MKINDEX(prev, child);
++
++	return (NULL);
++}
++
++
++/*
++ * Perform a rotation to restore balance at the subtree given by depth.
++ *
++ * This routine is used by both insertion and deletion. The return value
++ * indicates:
++ *	 0 : subtree did not change height
++ *	!0 : subtree was reduced in height
++ *
++ * The code is written as if handling left rotations, right rotations are
++ * symmetric and handled by swapping values of variables right/left[_heavy]
++ *
++ * On input balance is the "new" balance at "node". This value is either
++ * -2 or +2.
++ */
++static int
++avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
++{
++	int left = !(balance < 0);	/* when balance = -2, left will be 0 */
++	int right = 1 - left;
++	int left_heavy = balance >> 1;
++	int right_heavy = -left_heavy;
++	avl_node_t *parent = AVL_XPARENT(node);
++	avl_node_t *child = node->avl_child[left];
++	avl_node_t *cright;
++	avl_node_t *gchild;
++	avl_node_t *gright;
++	avl_node_t *gleft;
++	int which_child = AVL_XCHILD(node);
++	int child_bal = AVL_XBALANCE(child);
++
++	/* BEGIN CSTYLED */
++	/*
++	 * case 1 : node is overly left heavy, the left child is balanced or
++	 * also left heavy. This requires the following rotation.
++	 *
++	 *                   (node bal:-2)
++	 *                    /           \
++	 *                   /             \
++	 *              (child bal:0 or -1)
++	 *              /    \
++	 *             /      \
++	 *                     cright
++	 *
++	 * becomes:
++	 *
++	 *              (child bal:1 or 0)
++	 *              /        \
++	 *             /          \
++	 *                        (node bal:-1 or 0)
++	 *                         /     \
++	 *                        /       \
++	 *                     cright
++	 *
++	 * we detect this situation by noting that child's balance is not
++	 * right_heavy.
++	 */
++	/* END CSTYLED */
++	if (child_bal != right_heavy) {
++
++		/*
++		 * compute new balance of nodes
++		 *
++		 * If child used to be left heavy (now balanced) we reduced
++		 * the height of this sub-tree -- used in "return...;" below
++		 */
++		child_bal += right_heavy; /* adjust towards right */
++
++		/*
++		 * move "cright" to be node's left child
++		 */
++		cright = child->avl_child[right];
++		node->avl_child[left] = cright;
++		if (cright != NULL) {
++			AVL_SETPARENT(cright, node);
++			AVL_SETCHILD(cright, left);
++		}
++
++		/*
++		 * move node to be child's right child
++		 */
++		child->avl_child[right] = node;
++		AVL_SETBALANCE(node, -child_bal);
++		AVL_SETCHILD(node, right);
++		AVL_SETPARENT(node, child);
++
++		/*
++		 * update the pointer into this subtree
++		 */
++		AVL_SETBALANCE(child, child_bal);
++		AVL_SETCHILD(child, which_child);
++		AVL_SETPARENT(child, parent);
++		if (parent != NULL)
++			parent->avl_child[which_child] = child;
++		else
++			tree->avl_root = child;
++
++		return (child_bal == 0);
++	}
++
++	/* BEGIN CSTYLED */
++	/*
++	 * case 2 : When node is left heavy, but child is right heavy we use
++	 * a different rotation.
++	 *
++	 *                   (node b:-2)
++	 *                    /   \
++	 *                   /     \
++	 *                  /       \
++	 *             (child b:+1)
++	 *              /     \
++	 *             /       \
++	 *                   (gchild b: != 0)
++	 *                     /  \
++	 *                    /    \
++	 *                 gleft   gright
++	 *
++	 * becomes:
++	 *
++	 *              (gchild b:0)
++	 *              /       \
++	 *             /         \
++	 *            /           \
++	 *        (child b:?)   (node b:?)
++	 *         /  \          /   \
++	 *        /    \        /     \
++	 *            gleft   gright
++	 *
++	 * computing the new balances is more complicated. As an example:
++	 *	 if gchild was right_heavy, then child is now left heavy
++	 *		else it is balanced
++	 */
++	/* END CSTYLED */
++	gchild = child->avl_child[right];
++	gleft = gchild->avl_child[left];
++	gright = gchild->avl_child[right];
++
++	/*
++	 * move gright to left child of node and
++	 *
++	 * move gleft to right child of node
++	 */
++	node->avl_child[left] = gright;
++	if (gright != NULL) {
++		AVL_SETPARENT(gright, node);
++		AVL_SETCHILD(gright, left);
++	}
++
++	child->avl_child[right] = gleft;
++	if (gleft != NULL) {
++		AVL_SETPARENT(gleft, child);
++		AVL_SETCHILD(gleft, right);
++	}
++
++	/*
++	 * move child to left child of gchild and
++	 *
++	 * move node to right child of gchild and
++	 *
++	 * fixup parent of all this to point to gchild
++	 */
++	balance = AVL_XBALANCE(gchild);
++	gchild->avl_child[left] = child;
++	AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
++	AVL_SETPARENT(child, gchild);
++	AVL_SETCHILD(child, left);
++
++	gchild->avl_child[right] = node;
++	AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
++	AVL_SETPARENT(node, gchild);
++	AVL_SETCHILD(node, right);
++
++	AVL_SETBALANCE(gchild, 0);
++	AVL_SETPARENT(gchild, parent);
++	AVL_SETCHILD(gchild, which_child);
++	if (parent != NULL)
++		parent->avl_child[which_child] = gchild;
++	else
++		tree->avl_root = gchild;
++
++	return (1);	/* the new tree is always shorter */
++}
++
++
++/*
++ * Insert a new node into an AVL tree at the specified (from avl_find()) place.
++ *
++ * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
++ * searches out to the leaf positions.  The avl_index_t indicates the node
++ * which will be the parent of the new node.
++ *
++ * After the node is inserted, a single rotation further up the tree may
++ * be necessary to maintain an acceptable AVL balance.
++ */
++void
++avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
++{
++	avl_node_t *node;
++	avl_node_t *parent = AVL_INDEX2NODE(where);
++	int old_balance;
++	int new_balance;
++	int which_child = AVL_INDEX2CHILD(where);
++	size_t off = tree->avl_offset;
++
++	ASSERT(tree);
++#ifdef _LP64
++	ASSERT(((uintptr_t)new_data & 0x7) == 0);
++#endif
++
++	node = AVL_DATA2NODE(new_data, off);
++
++	/*
++	 * First, add the node to the tree at the indicated position.
++	 */
++	++tree->avl_numnodes;
++
++	node->avl_child[0] = NULL;
++	node->avl_child[1] = NULL;
++
++	AVL_SETCHILD(node, which_child);
++	AVL_SETBALANCE(node, 0);
++	AVL_SETPARENT(node, parent);
++	if (parent != NULL) {
++		ASSERT(parent->avl_child[which_child] == NULL);
++		parent->avl_child[which_child] = node;
++	} else {
++		ASSERT(tree->avl_root == NULL);
++		tree->avl_root = node;
++	}
++	/*
++	 * Now, back up the tree modifying the balance of all nodes above the
++	 * insertion point. If we get to a highly unbalanced ancestor, we
++	 * need to do a rotation.  If we back out of the tree we are done.
++	 * If we brought any subtree into perfect balance (0), we are also done.
++	 */
++	for (;;) {
++		node = parent;
++		if (node == NULL)
++			return;
++
++		/*
++		 * Compute the new balance
++		 */
++		old_balance = AVL_XBALANCE(node);
++		new_balance = old_balance + avl_child2balance[which_child];
++
++		/*
++		 * If we introduced equal balance, then we are done immediately
++		 */
++		if (new_balance == 0) {
++			AVL_SETBALANCE(node, 0);
++			return;
++		}
++
++		/*
++		 * If both old and new are not zero we went
++		 * from -1 to -2 balance, do a rotation.
++		 */
++		if (old_balance != 0)
++			break;
++
++		AVL_SETBALANCE(node, new_balance);
++		parent = AVL_XPARENT(node);
++		which_child = AVL_XCHILD(node);
++	}
++
++	/*
++	 * perform a rotation to fix the tree and return
++	 */
++	(void) avl_rotation(tree, node, new_balance);
++}
++
++/*
++ * Insert "new_data" in "tree" in the given "direction" either after or
++ * before (AVL_AFTER, AVL_BEFORE) the data "here".
++ *
++ * Insertions can only be done at empty leaf points in the tree, therefore
++ * if the given child of the node is already present we move to either
++ * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
++ * every other node in the tree is a leaf, this always works.
++ *
++ * To help developers using this interface, we assert that the new node
++ * is correctly ordered at every step of the way in DEBUG kernels.
++ */
++void
++avl_insert_here(
++	avl_tree_t *tree,
++	void *new_data,
++	void *here,
++	int direction)
++{
++	avl_node_t *node;
++	int child = direction;	/* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
++#ifdef DEBUG
++	int diff;
++#endif
++
++	ASSERT(tree != NULL);
++	ASSERT(new_data != NULL);
++	ASSERT(here != NULL);
++	ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
++
++	/*
++	 * If corresponding child of node is not NULL, go to the neighboring
++	 * node and reverse the insertion direction.
++	 */
++	node = AVL_DATA2NODE(here, tree->avl_offset);
++
++#ifdef DEBUG
++	diff = tree->avl_compar(new_data, here);
++	ASSERT(-1 <= diff && diff <= 1);
++	ASSERT(diff != 0);
++	ASSERT(diff > 0 ? child == 1 : child == 0);
++#endif
++
++	if (node->avl_child[child] != NULL) {
++		node = node->avl_child[child];
++		child = 1 - child;
++		while (node->avl_child[child] != NULL) {
++#ifdef DEBUG
++			diff = tree->avl_compar(new_data,
++			    AVL_NODE2DATA(node, tree->avl_offset));
++			ASSERT(-1 <= diff && diff <= 1);
++			ASSERT(diff != 0);
++			ASSERT(diff > 0 ? child == 1 : child == 0);
++#endif
++			node = node->avl_child[child];
++		}
++#ifdef DEBUG
++		diff = tree->avl_compar(new_data,
++		    AVL_NODE2DATA(node, tree->avl_offset));
++		ASSERT(-1 <= diff && diff <= 1);
++		ASSERT(diff != 0);
++		ASSERT(diff > 0 ? child == 1 : child == 0);
++#endif
++	}
++	ASSERT(node->avl_child[child] == NULL);
++
++	avl_insert(tree, new_data, AVL_MKINDEX(node, child));
++}
++
++/*
++ * Add a new node to an AVL tree.
++ */
++void
++avl_add(avl_tree_t *tree, void *new_node)
++{
++	avl_index_t where;
++
++	/*
++	 * This is unfortunate.  We want to call panic() here, even for
++	 * non-DEBUG kernels.  In userland, however, we can't depend on anything
++	 * in libc or else the rtld build process gets confused.  So, all we can
++	 * do in userland is resort to a normal ASSERT().
++	 */
++	if (avl_find(tree, new_node, &where) != NULL)
++#ifdef _KERNEL
++		panic("avl_find() succeeded inside avl_add()");
++#else
++		ASSERT(0);
++#endif
++	avl_insert(tree, new_node, where);
++}
++
++/*
++ * Delete a node from the AVL tree.  Deletion is similar to insertion, but
++ * with 2 complications.
++ *
++ * First, we may be deleting an interior node. Consider the following subtree:
++ *
++ *     d           c            c
++ *    / \         / \          / \
++ *   b   e       b   e        b   e
++ *  / \	        / \          /
++ * a   c       a            a
++ *
++ * When we are deleting node (d), we find and bring up an adjacent valued leaf
++ * node, say (c), to take the interior node's place. In the code this is
++ * handled by temporarily swapping (d) and (c) in the tree and then using
++ * common code to delete (d) from the leaf position.
++ *
++ * Secondly, an interior deletion from a deep tree may require more than one
++ * rotation to fix the balance. This is handled by moving up the tree through
++ * parents and applying rotations as needed. The return value from
++ * avl_rotation() is used to detect when a subtree did not change overall
++ * height due to a rotation.
++ */
++void
++avl_remove(avl_tree_t *tree, void *data)
++{
++	avl_node_t *delete;
++	avl_node_t *parent;
++	avl_node_t *node;
++	avl_node_t tmp;
++	int old_balance;
++	int new_balance;
++	int left;
++	int right;
++	int which_child;
++	size_t off = tree->avl_offset;
++
++	ASSERT(tree);
++
++	delete = AVL_DATA2NODE(data, off);
++
++	/*
++	 * Deletion is easiest with a node that has at most 1 child.
++	 * We swap a node with 2 children with a sequentially valued
++	 * neighbor node. That node will have at most 1 child. Note this
++	 * has no effect on the ordering of the remaining nodes.
++	 *
++	 * As an optimization, we choose the greater neighbor if the tree
++	 * is right heavy, otherwise the left neighbor. This reduces the
++	 * number of rotations needed.
++	 */
++	if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
++
++		/*
++		 * choose node to swap from whichever side is taller
++		 */
++		old_balance = AVL_XBALANCE(delete);
++		left = avl_balance2child[old_balance + 1];
++		right = 1 - left;
++
++		/*
++		 * get to the previous value'd node
++		 * (down 1 left, as far as possible right)
++		 */
++		for (node = delete->avl_child[left];
++		    node->avl_child[right] != NULL;
++		    node = node->avl_child[right])
++			;
++
++		/*
++		 * create a temp placeholder for 'node'
++		 * move 'node' to delete's spot in the tree
++		 */
++		tmp = *node;
++
++		*node = *delete;
++		if (node->avl_child[left] == node)
++			node->avl_child[left] = &tmp;
++
++		parent = AVL_XPARENT(node);
++		if (parent != NULL)
++			parent->avl_child[AVL_XCHILD(node)] = node;
++		else
++			tree->avl_root = node;
++		AVL_SETPARENT(node->avl_child[left], node);
++		AVL_SETPARENT(node->avl_child[right], node);
++
++		/*
++		 * Put tmp where node used to be (just temporary).
++		 * It always has a parent and at most 1 child.
++		 */
++		delete = &tmp;
++		parent = AVL_XPARENT(delete);
++		parent->avl_child[AVL_XCHILD(delete)] = delete;
++		which_child = (delete->avl_child[1] != 0);
++		if (delete->avl_child[which_child] != NULL)
++			AVL_SETPARENT(delete->avl_child[which_child], delete);
++	}
++
++
++	/*
++	 * Here we know "delete" is at least partially a leaf node. It can
++	 * be easily removed from the tree.
++	 */
++	ASSERT(tree->avl_numnodes > 0);
++	--tree->avl_numnodes;
++	parent = AVL_XPARENT(delete);
++	which_child = AVL_XCHILD(delete);
++	if (delete->avl_child[0] != NULL)
++		node = delete->avl_child[0];
++	else
++		node = delete->avl_child[1];
++
++	/*
++	 * Connect parent directly to node (leaving out delete).
++	 */
++	if (node != NULL) {
++		AVL_SETPARENT(node, parent);
++		AVL_SETCHILD(node, which_child);
++	}
++	if (parent == NULL) {
++		tree->avl_root = node;
++		return;
++	}
++	parent->avl_child[which_child] = node;
++
++
++	/*
++	 * Since the subtree is now shorter, begin adjusting parent balances
++	 * and performing any needed rotations.
++	 */
++	do {
++
++		/*
++		 * Move up the tree and adjust the balance
++		 *
++		 * Capture the parent and which_child values for the next
++		 * iteration before any rotations occur.
++		 */
++		node = parent;
++		old_balance = AVL_XBALANCE(node);
++		new_balance = old_balance - avl_child2balance[which_child];
++		parent = AVL_XPARENT(node);
++		which_child = AVL_XCHILD(node);
++
++		/*
++		 * If a node was in perfect balance but isn't anymore then
++		 * we can stop, since the height didn't change above this point
++		 * due to a deletion.
++		 */
++		if (old_balance == 0) {
++			AVL_SETBALANCE(node, new_balance);
++			break;
++		}
++
++		/*
++		 * If the new balance is zero, we don't need to rotate
++		 * else
++		 * need a rotation to fix the balance.
++		 * If the rotation doesn't change the height
++		 * of the sub-tree we have finished adjusting.
++		 */
++		if (new_balance == 0)
++			AVL_SETBALANCE(node, new_balance);
++		else if (!avl_rotation(tree, node, new_balance))
++			break;
++	} while (parent != NULL);
++}
++
++#define	AVL_REINSERT(tree, obj)		\
++	avl_remove((tree), (obj));	\
++	avl_add((tree), (obj))
++
++boolean_t
++avl_update_lt(avl_tree_t *t, void *obj)
++{
++	void *neighbor;
++
++	ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
++	    (t->avl_compar(obj, neighbor) <= 0));
++
++	neighbor = AVL_PREV(t, obj);
++	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
++		AVL_REINSERT(t, obj);
++		return (B_TRUE);
++	}
++
++	return (B_FALSE);
++}
++
++boolean_t
++avl_update_gt(avl_tree_t *t, void *obj)
++{
++	void *neighbor;
++
++	ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
++	    (t->avl_compar(obj, neighbor) >= 0));
++
++	neighbor = AVL_NEXT(t, obj);
++	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
++		AVL_REINSERT(t, obj);
++		return (B_TRUE);
++	}
++
++	return (B_FALSE);
++}
++
++boolean_t
++avl_update(avl_tree_t *t, void *obj)
++{
++	void *neighbor;
++
++	neighbor = AVL_PREV(t, obj);
++	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
++		AVL_REINSERT(t, obj);
++		return (B_TRUE);
++	}
++
++	neighbor = AVL_NEXT(t, obj);
++	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
++		AVL_REINSERT(t, obj);
++		return (B_TRUE);
++	}
++
++	return (B_FALSE);
++}
++
++/*
++ * initialize a new AVL tree
++ */
++void
++avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
++    size_t size, size_t offset)
++{
++	ASSERT(tree);
++	ASSERT(compar);
++	ASSERT(size > 0);
++	ASSERT(size >= offset + sizeof (avl_node_t));
++#ifdef _LP64
++	ASSERT((offset & 0x7) == 0);
++#endif
++
++	tree->avl_compar = compar;
++	tree->avl_root = NULL;
++	tree->avl_numnodes = 0;
++	tree->avl_size = size;
++	tree->avl_offset = offset;
++}
++
++/*
++ * Delete a tree.
++ */
++/* ARGSUSED */
++void
++avl_destroy(avl_tree_t *tree)
++{
++	ASSERT(tree);
++	ASSERT(tree->avl_numnodes == 0);
++	ASSERT(tree->avl_root == NULL);
++}
++
++
++/*
++ * Return the number of nodes in an AVL tree.
++ */
++ulong_t
++avl_numnodes(avl_tree_t *tree)
++{
++	ASSERT(tree);
++	return (tree->avl_numnodes);
++}
++
++boolean_t
++avl_is_empty(avl_tree_t *tree)
++{
++	ASSERT(tree);
++	return (tree->avl_numnodes == 0);
++}
++
++#define	CHILDBIT	(1L)
++
++/*
++ * Post-order tree walk used to visit all tree nodes and destroy the tree
++ * in post order. This is used for destroying a tree w/o paying any cost
++ * for rebalancing it.
++ *
++ * example:
++ *
++ *	void *cookie = NULL;
++ *	my_data_t *node;
++ *
++ *	while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
++ *		free(node);
++ *	avl_destroy(tree);
++ *
++ * The cookie is really an avl_node_t to the current node's parent and
++ * an indication of which child you looked at last.
++ *
++ * On input, a cookie value of CHILDBIT indicates the tree is done.
++ */
++void *
++avl_destroy_nodes(avl_tree_t *tree, void **cookie)
++{
++	avl_node_t	*node;
++	avl_node_t	*parent;
++	int		child;
++	void		*first;
++	size_t		off = tree->avl_offset;
++
++	/*
++	 * Initial calls go to the first node or it's right descendant.
++	 */
++	if (*cookie == NULL) {
++		first = avl_first(tree);
++
++		/*
++		 * deal with an empty tree
++		 */
++		if (first == NULL) {
++			*cookie = (void *)CHILDBIT;
++			return (NULL);
++		}
++
++		node = AVL_DATA2NODE(first, off);
++		parent = AVL_XPARENT(node);
++		goto check_right_side;
++	}
++
++	/*
++	 * If there is no parent to return to we are done.
++	 */
++	parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
++	if (parent == NULL) {
++		if (tree->avl_root != NULL) {
++			ASSERT(tree->avl_numnodes == 1);
++			tree->avl_root = NULL;
++			tree->avl_numnodes = 0;
++		}
++		return (NULL);
++	}
++
++	/*
++	 * Remove the child pointer we just visited from the parent and tree.
++	 */
++	child = (uintptr_t)(*cookie) & CHILDBIT;
++	parent->avl_child[child] = NULL;
++	ASSERT(tree->avl_numnodes > 1);
++	--tree->avl_numnodes;
++
++	/*
++	 * If we just did a right child or there isn't one, go up to parent.
++	 */
++	if (child == 1 || parent->avl_child[1] == NULL) {
++		node = parent;
++		parent = AVL_XPARENT(parent);
++		goto done;
++	}
++
++	/*
++	 * Do parent's right child, then leftmost descendent.
++	 */
++	node = parent->avl_child[1];
++	while (node->avl_child[0] != NULL) {
++		parent = node;
++		node = node->avl_child[0];
++	}
++
++	/*
++	 * If here, we moved to a left child. It may have one
++	 * child on the right (when balance == +1).
++	 */
++check_right_side:
++	if (node->avl_child[1] != NULL) {
++		ASSERT(AVL_XBALANCE(node) == 1);
++		parent = node;
++		node = node->avl_child[1];
++		ASSERT(node->avl_child[0] == NULL &&
++		    node->avl_child[1] == NULL);
++	} else {
++		ASSERT(AVL_XBALANCE(node) <= 0);
++	}
++
++done:
++	if (parent == NULL) {
++		*cookie = (void *)CHILDBIT;
++		ASSERT(node == tree->avl_root);
++	} else {
++		*cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
++	}
++
++	return (AVL_NODE2DATA(node, off));
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++
++static int avl_init(void) { return 0; }
++static int avl_fini(void) { return 0; }
++
++spl_module_init(avl_init);
++spl_module_exit(avl_fini);
++
++MODULE_DESCRIPTION("Generic AVL tree implementation");
++MODULE_AUTHOR(ZFS_META_AUTHOR);
++MODULE_LICENSE(ZFS_META_LICENSE);
++
++EXPORT_SYMBOL(avl_create);
++EXPORT_SYMBOL(avl_find);
++EXPORT_SYMBOL(avl_insert);
++EXPORT_SYMBOL(avl_insert_here);
++EXPORT_SYMBOL(avl_walk);
++EXPORT_SYMBOL(avl_first);
++EXPORT_SYMBOL(avl_last);
++EXPORT_SYMBOL(avl_nearest);
++EXPORT_SYMBOL(avl_add);
++EXPORT_SYMBOL(avl_remove);
++EXPORT_SYMBOL(avl_numnodes);
++EXPORT_SYMBOL(avl_destroy_nodes);
++EXPORT_SYMBOL(avl_destroy);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/avl/Makefile linux-3.2.33-go/fs/zfs/avl/Makefile
+--- linux-3.2.33-go.orig/fs/zfs/avl/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/avl/Makefile	2012-11-16 23:25:34.357039243 +0100
+@@ -0,0 +1,7 @@
++MODULE := zavl
++
++EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS)  -Wno-unused-but-set-variable -DHAVE_SPL -D_KERNEL -DTEXT_DOMAIN=\"zfs-linux-kernel\" -DNDEBUG 
++
++obj-$(CONFIG_ZFS) := $(MODULE).o
++
++$(MODULE)-objs += avl.o
+diff -uNr linux-3.2.33-go.orig/fs/zfs/avl/Makefile.in linux-3.2.33-go/fs/zfs/avl/Makefile.in
+--- linux-3.2.33-go.orig/fs/zfs/avl/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/avl/Makefile.in	2012-11-16 23:25:34.355039267 +0100
+@@ -0,0 +1,7 @@
++MODULE := zavl
++
++EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS) @KERNELCPPFLAGS@
++
++obj-$(CONFIG_ZFS) := $(MODULE).o
++
++$(MODULE)-objs += @top_srcdir@/module/avl/avl.o
+diff -uNr linux-3.2.33-go.orig/fs/zfs/Kbuild linux-3.2.33-go/fs/zfs/Kbuild
+--- linux-3.2.33-go.orig/fs/zfs/Kbuild	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/Kbuild	2012-11-16 23:25:34.379038989 +0100
+@@ -0,0 +1,11 @@
++ZFS_MODULE_CFLAGS  = -I$(srctree)/include/zfs -I$(srctree)/include/spl 
++ZFS_MODULE_CFLAGS += -include $(srctree)/spl_config.h -include $(srctree)/zfs_config.h
++export ZFS_MODULE_CFLAGS
++
++obj-$(CONFIG_ZFS) :=
++obj-$(CONFIG_ZFS) +=  avl/
++obj-$(CONFIG_ZFS) +=  nvpair/
++obj-$(CONFIG_ZFS) +=  unicode/
++obj-$(CONFIG_ZFS) +=  zcommon/
++obj-$(CONFIG_ZFS) +=  zfs/
++obj-$(CONFIG_ZFS) +=  zpios/
+diff -uNr linux-3.2.33-go.orig/fs/zfs/Kconfig linux-3.2.33-go/fs/zfs/Kconfig
+--- linux-3.2.33-go.orig/fs/zfs/Kconfig	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/Kconfig	2012-11-16 23:25:34.378039001 +0100
+@@ -0,0 +1,14 @@
++config ZFS
++	tristate "ZFS filesystem support"
++	depends on SPL
++	depends on EFI_PARTITION
++	select ZLIB_INFLATE
++	select ZLIB_DEFLATE
++	help
++	  This is the ZFS filesystem from the ZFS On Linux project.
++
++	  See http://zfsonlinux.org/
++
++	  To compile this file system support as a module, choose M here.
++
++	  If unsure, say N.
+diff -uNr linux-3.2.33-go.orig/fs/zfs/Makefile linux-3.2.33-go/fs/zfs/Makefile
+--- linux-3.2.33-go.orig/fs/zfs/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/Makefile	2012-11-16 23:25:34.355039267 +0100
+@@ -0,0 +1,65 @@
++subdir-m += avl
++subdir-m += nvpair
++subdir-m += unicode
++subdir-m += zcommon
++subdir-m += zfs
++subdir-m += zpios
++
++ZFS_MODULE_CFLAGS += -include /usr/src/linux-3.2.33-go/spl_config.h
++ZFS_MODULE_CFLAGS += -include /root/zfs-0.6.0-rc12/zfs_config.h
++ZFS_MODULE_CFLAGS += -I/root/zfs-0.6.0-rc12/include -I/usr/src/linux-3.2.33-go/include -I/usr/src/linux-3.2.33-go
++export ZFS_MODULE_CFLAGS
++
++modules:
++	@# Make the exported SPL symbols available to these modules.
++	@# They may be in the root of SPL_OBJ when building against
++	@# installed devel headers, or they may be in the module
++	@# subdirectory when building against the spl source tree.
++	@if [ -f /usr/src/linux-3.2.33-go/NONE ]; then \
++		/bin/cp /usr/src/linux-3.2.33-go/NONE .; \
++	elif [ -f /usr/src/linux-3.2.33-go/module/NONE ]; then \
++		/bin/cp /usr/src/linux-3.2.33-go/module/NONE .; \
++	else \
++		echo -e "\n" \
++		"*** Missing spl symbols ensure you have built the spl:\n" \
++		"*** - /usr/src/linux-3.2.33-go/NONE, or\n" \
++		"*** - /usr/src/linux-3.2.33-go/module/NONE\n"; \
++		exit 1; \
++	fi
++	$(MAKE) -C /usr/src/linux-3.6.0-sabayon SUBDIRS=`pwd`  O=/usr/src/linux-3.6.0-sabayon CONFIG_ZFS=m $@
++
++clean:
++	@# Only cleanup the kernel build directories when CONFIG_KERNEL
++	@# is defined.  This indicates that kernel modules should be built.
++#	$(MAKE) -C /usr/src/linux-3.6.0-sabayon SUBDIRS=`pwd`  O=/usr/src/linux-3.6.0-sabayon $@
++
++	if [ -f NONE ]; then $(RM) NONE; fi
++	if [ -f NONE ]; then $(RM) NONE; fi
++	if [ -f Module.markers ]; then $(RM) Module.markers; fi
++
++modules_install:
++	@# Install the kernel modules
++	$(MAKE) -C /usr/src/linux-3.6.0-sabayon SUBDIRS=`pwd` \
++		INSTALL_MOD_PATH=$(DESTDIR) \
++		INSTALL_MOD_DIR=addon/zfs $@
++	@# Remove extraneous build products when packaging
++	if [ -n "$(DESTDIR)" ]; then \
++		find $(DESTDIR)/lib/modules/3.6.0-sabayon \
++			-name 'modules.*' | xargs $(RM); \
++	fi
++	sysmap=$(DESTDIR)/boot/System.map-3.6.0-sabayon; \
++	if [ -f $$sysmap ]; then \
++		depmod -ae -F $$sysmap 3.6.0-sabayon; \
++	fi
++
++modules_uninstall:
++	@# Uninstall the kernel modules
++	$(RM) -R $(DESTDIR)/lib/modules/3.6.0-sabayon/addon/zfs
++
++distdir:
++
++distclean maintainer-clean: clean
++install: modules_install
++uninstall: modules_uninstall
++all: modules
++check:
+diff -uNr linux-3.2.33-go.orig/fs/zfs/Makefile.in linux-3.2.33-go/fs/zfs/Makefile.in
+--- linux-3.2.33-go.orig/fs/zfs/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/Makefile.in	2012-11-16 23:25:34.355039267 +0100
+@@ -0,0 +1,65 @@
++subdir-m += avl
++subdir-m += nvpair
++subdir-m += unicode
++subdir-m += zcommon
++subdir-m += zfs
++subdir-m += zpios
++
++ZFS_MODULE_CFLAGS += -include @SPL_OBJ@/spl_config.h
++ZFS_MODULE_CFLAGS += -include @abs_top_builddir@/zfs_config.h
++ZFS_MODULE_CFLAGS += -I@abs_top_srcdir@/include -I@SPL@/include -I@SPL@
++export ZFS_MODULE_CFLAGS
++
++modules:
++	@# Make the exported SPL symbols available to these modules.
++	@# They may be in the root of SPL_OBJ when building against
++	@# installed devel headers, or they may be in the module
++	@# subdirectory when building against the spl source tree.
++	@if [ -f @SPL_OBJ@/@SPL_SYMBOLS@ ]; then \
++		/bin/cp @SPL_OBJ@/@SPL_SYMBOLS@ .; \
++	elif [ -f @SPL_OBJ@/module/@SPL_SYMBOLS@ ]; then \
++		/bin/cp @SPL_OBJ@/module/@SPL_SYMBOLS@ .; \
++	else \
++		echo -e "\n" \
++		"*** Missing spl symbols ensure you have built the spl:\n" \
++		"*** - @SPL_OBJ@/@SPL_SYMBOLS@, or\n" \
++		"*** - @SPL_OBJ@/module/@SPL_SYMBOLS@\n"; \
++		exit 1; \
++	fi
++	$(MAKE) -C @LINUX_OBJ@ SUBDIRS=`pwd` @KERNELMAKE_PARAMS@ CONFIG_ZFS=m $@
++
++clean:
++	@# Only cleanup the kernel build directories when CONFIG_KERNEL
++	@# is defined.  This indicates that kernel modules should be built.
++@CONFIG_KERNEL_TRUE@	$(MAKE) -C @LINUX_OBJ@ SUBDIRS=`pwd` @KERNELMAKE_PARAMS@ $@
++
++	if [ -f @SPL_SYMBOLS@ ]; then $(RM) @SPL_SYMBOLS@; fi
++	if [ -f @LINUX_SYMBOLS@ ]; then $(RM) @LINUX_SYMBOLS@; fi
++	if [ -f Module.markers ]; then $(RM) Module.markers; fi
++
++modules_install:
++	@# Install the kernel modules
++	$(MAKE) -C @LINUX_OBJ@ SUBDIRS=`pwd` \
++		INSTALL_MOD_PATH=$(DESTDIR) \
++		INSTALL_MOD_DIR=addon/zfs $@
++	@# Remove extraneous build products when packaging
++	if [ -n "$(DESTDIR)" ]; then \
++		find $(DESTDIR)/lib/modules/@LINUX_VERSION@ \
++			-name 'modules.*' | xargs $(RM); \
++	fi
++	sysmap=$(DESTDIR)/boot/System.map-@LINUX_VERSION@; \
++	if [ -f $$sysmap ]; then \
++		depmod -ae -F $$sysmap @LINUX_VERSION@; \
++	fi
++
++modules_uninstall:
++	@# Uninstall the kernel modules
++	$(RM) -R $(DESTDIR)/lib/modules/@LINUX_VERSION@/addon/zfs
++
++distdir:
++
++distclean maintainer-clean: clean
++install: modules_install
++uninstall: modules_uninstall
++all: modules
++check:
+diff -uNr linux-3.2.33-go.orig/fs/zfs/nvpair/Makefile linux-3.2.33-go/fs/zfs/nvpair/Makefile
+--- linux-3.2.33-go.orig/fs/zfs/nvpair/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/nvpair/Makefile	2012-11-16 23:25:34.359039219 +0100
+@@ -0,0 +1,9 @@
++MODULE := znvpair
++
++EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS)  -Wno-unused-but-set-variable -DHAVE_SPL -D_KERNEL -DTEXT_DOMAIN=\"zfs-linux-kernel\" -DNDEBUG 
++
++obj-$(CONFIG_ZFS) := $(MODULE).o
++
++$(MODULE)-objs += nvpair.o
++$(MODULE)-objs += nvpair_alloc_spl.o
++$(MODULE)-objs += nvpair_alloc_fixed.o
+diff -uNr linux-3.2.33-go.orig/fs/zfs/nvpair/Makefile.in linux-3.2.33-go/fs/zfs/nvpair/Makefile.in
+--- linux-3.2.33-go.orig/fs/zfs/nvpair/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/nvpair/Makefile.in	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,9 @@
++MODULE := znvpair
++
++EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS) @KERNELCPPFLAGS@
++
++obj-$(CONFIG_ZFS) := $(MODULE).o
++
++$(MODULE)-objs += @top_srcdir@/module/nvpair/nvpair.o
++$(MODULE)-objs += @top_srcdir@/module/nvpair/nvpair_alloc_spl.o
++$(MODULE)-objs += @top_srcdir@/module/nvpair/nvpair_alloc_fixed.o
+diff -uNr linux-3.2.33-go.orig/fs/zfs/nvpair/nvpair_alloc_fixed.c linux-3.2.33-go/fs/zfs/nvpair/nvpair_alloc_fixed.c
+--- linux-3.2.33-go.orig/fs/zfs/nvpair/nvpair_alloc_fixed.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/nvpair/nvpair_alloc_fixed.c	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,124 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++
++
++#include <sys/stropts.h>
++#include <sys/isa_defs.h>
++#include <sys/nvpair.h>
++#include <sys/sysmacros.h>
++#if defined(_KERNEL) && !defined(_BOOT)
++#include <sys/varargs.h>
++#else
++#include <stdarg.h>
++#include <strings.h>
++#endif
++
++/*
++ * This allocator is very simple.
++ *  - it uses a pre-allocated buffer for memory allocations.
++ *  - it does _not_ free memory in the pre-allocated buffer.
++ *
++ * The reason for the selected implemention is simplicity.
++ * This allocator is designed for the usage in interrupt context when
++ * the caller may not wait for free memory.
++ */
++
++/* pre-allocated buffer for memory allocations */
++typedef struct nvbuf {
++	uintptr_t	nvb_buf;	/* address of pre-allocated buffer */
++	uintptr_t 	nvb_lim;	/* limit address in the buffer */
++	uintptr_t	nvb_cur;	/* current address in the buffer */
++} nvbuf_t;
++
++/*
++ * Initialize the pre-allocated buffer allocator. The caller needs to supply
++ *
++ *   buf	address of pre-allocated buffer
++ *   bufsz	size of pre-allocated buffer
++ *
++ * nv_fixed_init() calculates the remaining members of nvbuf_t.
++ */
++static int
++nv_fixed_init(nv_alloc_t *nva, va_list valist)
++{
++	uintptr_t base = va_arg(valist, uintptr_t);
++	uintptr_t lim = base + va_arg(valist, size_t);
++	nvbuf_t *nvb = (nvbuf_t *)P2ROUNDUP(base, sizeof (uintptr_t));
++
++	if (base == 0 || (uintptr_t)&nvb[1] > lim)
++		return (EINVAL);
++
++	nvb->nvb_buf = (uintptr_t)&nvb[0];
++	nvb->nvb_cur = (uintptr_t)&nvb[1];
++	nvb->nvb_lim = lim;
++	nva->nva_arg = nvb;
++
++	return (0);
++}
++
++static void *
++nv_fixed_alloc(nv_alloc_t *nva, size_t size)
++{
++	nvbuf_t *nvb = nva->nva_arg;
++	uintptr_t new = nvb->nvb_cur;
++
++	if (size == 0 || new + size > nvb->nvb_lim)
++		return (NULL);
++
++	nvb->nvb_cur = P2ROUNDUP(new + size, sizeof (uintptr_t));
++
++	return ((void *)new);
++}
++
++/*ARGSUSED*/
++static void
++nv_fixed_free(nv_alloc_t *nva, void *buf, size_t size)
++{
++	/* don't free memory in the pre-allocated buffer */
++}
++
++static void
++nv_fixed_reset(nv_alloc_t *nva)
++{
++	nvbuf_t *nvb = nva->nva_arg;
++
++	nvb->nvb_cur = (uintptr_t)&nvb[1];
++}
++
++const nv_alloc_ops_t nv_fixed_ops_def = {
++	nv_fixed_init,	/* nv_ao_init() */
++	NULL,		/* nv_ao_fini() */
++	nv_fixed_alloc,	/* nv_ao_alloc() */
++	nv_fixed_free,	/* nv_ao_free() */
++	nv_fixed_reset	/* nv_ao_reset() */
++};
++
++const nv_alloc_ops_t *nv_fixed_ops = &nv_fixed_ops_def;
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(nv_fixed_ops);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/nvpair/nvpair_alloc_spl.c linux-3.2.33-go/fs/zfs/nvpair/nvpair_alloc_spl.c
+--- linux-3.2.33-go.orig/fs/zfs/nvpair/nvpair_alloc_spl.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/nvpair/nvpair_alloc_spl.c	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,75 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License, Version 1.0 only
++ * (the "License").  You may not use this file except in compliance
++ * with the License.
++ *
++ * You can obtain a copy of the license at * usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#include <sys/nvpair.h>
++#include <sys/kmem.h>
++
++static void *
++nv_alloc_sleep_spl(nv_alloc_t *nva, size_t size)
++{
++	return (kmem_alloc(size, KM_SLEEP | KM_NODEBUG));
++}
++
++static void *
++nv_alloc_nosleep_spl(nv_alloc_t *nva, size_t size)
++{
++	return (kmem_alloc(size, KM_NOSLEEP));
++}
++
++static void
++nv_free_spl(nv_alloc_t *nva, void *buf, size_t size)
++{
++	kmem_free(buf, size);
++}
++
++const nv_alloc_ops_t spl_sleep_ops_def = {
++	NULL,                   /* nv_ao_init() */
++	NULL,                   /* nv_ao_fini() */
++	nv_alloc_sleep_spl,     /* nv_ao_alloc() */
++	nv_free_spl,            /* nv_ao_free() */
++	NULL                    /* nv_ao_reset() */
++};
++
++const nv_alloc_ops_t spl_nosleep_ops_def = {
++	NULL,                   /* nv_ao_init() */
++	NULL,                   /* nv_ao_fini() */
++	nv_alloc_nosleep_spl,   /* nv_ao_alloc() */
++	nv_free_spl,            /* nv_ao_free() */
++	NULL                    /* nv_ao_reset() */
++};
++
++nv_alloc_t nv_alloc_sleep_def = {
++	&spl_sleep_ops_def,
++	NULL
++};
++
++nv_alloc_t nv_alloc_nosleep_def = {
++	&spl_nosleep_ops_def,
++	NULL
++};
++
++nv_alloc_t *nv_alloc_sleep = &nv_alloc_sleep_def;
++nv_alloc_t *nv_alloc_nosleep = &nv_alloc_nosleep_def;
+diff -uNr linux-3.2.33-go.orig/fs/zfs/nvpair/nvpair.c linux-3.2.33-go/fs/zfs/nvpair/nvpair.c
+--- linux-3.2.33-go.orig/fs/zfs/nvpair/nvpair.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/nvpair/nvpair.c	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,3425 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/stropts.h>
++#include <sys/debug.h>
++#include <sys/isa_defs.h>
++#include <sys/int_limits.h>
++#include <sys/nvpair.h>
++#include <sys/nvpair_impl.h>
++#include <rpc/types.h>
++#include <rpc/xdr.h>
++
++#if defined(_KERNEL) && !defined(_BOOT)
++#include <sys/varargs.h>
++#include <sys/ddi.h>
++#include <sys/sunddi.h>
++#else
++#include <stdarg.h>
++#include <stdlib.h>
++#include <string.h>
++#include <strings.h>
++#endif
++
++#ifndef	offsetof
++#define	offsetof(s, m)		((size_t)(&(((s *)0)->m)))
++#endif
++#define	skip_whitespace(p)	while ((*(p) == ' ') || (*(p) == '\t')) p++
++
++/*
++ * nvpair.c - Provides kernel & userland interfaces for manipulating
++ *	name-value pairs.
++ *
++ * Overview Diagram
++ *
++ *  +--------------+
++ *  |  nvlist_t    |
++ *  |--------------|
++ *  | nvl_version  |
++ *  | nvl_nvflag   |
++ *  | nvl_priv    -+-+
++ *  | nvl_flag     | |
++ *  | nvl_pad      | |
++ *  +--------------+ |
++ *                   V
++ *      +--------------+      last i_nvp in list
++ *      | nvpriv_t     |  +--------------------->
++ *      |--------------|  |
++ *   +--+- nvp_list    |  |   +------------+
++ *   |  |  nvp_last   -+--+   + nv_alloc_t |
++ *   |  |  nvp_curr    |      |------------|
++ *   |  |  nvp_nva    -+----> | nva_ops    |
++ *   |  |  nvp_stat    |      | nva_arg    |
++ *   |  +--------------+      +------------+
++ *   |
++ *   +-------+
++ *           V
++ *   +---------------------+      +-------------------+
++ *   |  i_nvp_t            |  +-->|  i_nvp_t          |  +-->
++ *   |---------------------|  |   |-------------------|  |
++ *   | nvi_next           -+--+   | nvi_next         -+--+
++ *   | nvi_prev (NULL)     | <----+ nvi_prev          |
++ *   | . . . . . . . . . . |      | . . . . . . . . . |
++ *   | nvp (nvpair_t)      |      | nvp (nvpair_t)    |
++ *   |  - nvp_size         |      |  - nvp_size       |
++ *   |  - nvp_name_sz      |      |  - nvp_name_sz    |
++ *   |  - nvp_value_elem   |      |  - nvp_value_elem |
++ *   |  - nvp_type         |      |  - nvp_type       |
++ *   |  - data ...         |      |  - data ...       |
++ *   +---------------------+      +-------------------+
++ *
++ *
++ *
++ *   +---------------------+              +---------------------+
++ *   |  i_nvp_t            |  +-->    +-->|  i_nvp_t (last)     |
++ *   |---------------------|  |       |   |---------------------|
++ *   |  nvi_next          -+--+ ... --+   | nvi_next (NULL)     |
++ * <-+- nvi_prev           |<-- ...  <----+ nvi_prev            |
++ *   | . . . . . . . . .   |              | . . . . . . . . .   |
++ *   | nvp (nvpair_t)      |              | nvp (nvpair_t)      |
++ *   |  - nvp_size         |              |  - nvp_size         |
++ *   |  - nvp_name_sz      |              |  - nvp_name_sz      |
++ *   |  - nvp_value_elem   |              |  - nvp_value_elem   |
++ *   |  - DATA_TYPE_NVLIST |              |  - nvp_type         |
++ *   |  - data (embedded)  |              |  - data ...         |
++ *   |    nvlist name      |              +---------------------+
++ *   |  +--------------+   |
++ *   |  |  nvlist_t    |   |
++ *   |  |--------------|   |
++ *   |  | nvl_version  |   |
++ *   |  | nvl_nvflag   |   |
++ *   |  | nvl_priv   --+---+---->
++ *   |  | nvl_flag     |   |
++ *   |  | nvl_pad      |   |
++ *   |  +--------------+   |
++ *   +---------------------+
++ *
++ *
++ * N.B. nvpair_t may be aligned on 4 byte boundary, so +4 will
++ * allow value to be aligned on 8 byte boundary
++ *
++ * name_len is the length of the name string including the null terminator
++ * so it must be >= 1
++ */
++#define	NVP_SIZE_CALC(name_len, data_len) \
++	(NV_ALIGN((sizeof (nvpair_t)) + name_len) + NV_ALIGN(data_len))
++
++static int i_get_value_size(data_type_t type, const void *data, uint_t nelem);
++static int nvlist_add_common(nvlist_t *nvl, const char *name, data_type_t type,
++    uint_t nelem, const void *data);
++
++#define	NV_STAT_EMBEDDED	0x1
++#define	EMBEDDED_NVL(nvp)	((nvlist_t *)(void *)NVP_VALUE(nvp))
++#define	EMBEDDED_NVL_ARRAY(nvp)	((nvlist_t **)(void *)NVP_VALUE(nvp))
++
++#define	NVP_VALOFF(nvp)	(NV_ALIGN(sizeof (nvpair_t) + (nvp)->nvp_name_sz))
++#define	NVPAIR2I_NVP(nvp) \
++	((i_nvp_t *)((size_t)(nvp) - offsetof(i_nvp_t, nvi_nvp)))
++
++
++int
++nv_alloc_init(nv_alloc_t *nva, const nv_alloc_ops_t *nvo, /* args */ ...)
++{
++	va_list valist;
++	int err = 0;
++
++	nva->nva_ops = nvo;
++	nva->nva_arg = NULL;
++
++	va_start(valist, nvo);
++	if (nva->nva_ops->nv_ao_init != NULL)
++		err = nva->nva_ops->nv_ao_init(nva, valist);
++	va_end(valist);
++
++	return (err);
++}
++
++void
++nv_alloc_reset(nv_alloc_t *nva)
++{
++	if (nva->nva_ops->nv_ao_reset != NULL)
++		nva->nva_ops->nv_ao_reset(nva);
++}
++
++void
++nv_alloc_fini(nv_alloc_t *nva)
++{
++	if (nva->nva_ops->nv_ao_fini != NULL)
++		nva->nva_ops->nv_ao_fini(nva);
++}
++
++nv_alloc_t *
++nvlist_lookup_nv_alloc(nvlist_t *nvl)
++{
++	nvpriv_t *priv;
++
++	if (nvl == NULL ||
++	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
++		return (NULL);
++
++	return (priv->nvp_nva);
++}
++
++static void *
++nv_mem_zalloc(nvpriv_t *nvp, size_t size)
++{
++	nv_alloc_t *nva = nvp->nvp_nva;
++	void *buf;
++
++	if ((buf = nva->nva_ops->nv_ao_alloc(nva, size)) != NULL)
++		bzero(buf, size);
++
++	return (buf);
++}
++
++static void
++nv_mem_free(nvpriv_t *nvp, void *buf, size_t size)
++{
++	nv_alloc_t *nva = nvp->nvp_nva;
++
++	nva->nva_ops->nv_ao_free(nva, buf, size);
++}
++
++static void
++nv_priv_init(nvpriv_t *priv, nv_alloc_t *nva, uint32_t stat)
++{
++	bzero(priv, sizeof (nvpriv_t));
++
++	priv->nvp_nva = nva;
++	priv->nvp_stat = stat;
++}
++
++static nvpriv_t *
++nv_priv_alloc(nv_alloc_t *nva)
++{
++	nvpriv_t *priv;
++
++	/*
++	 * nv_mem_alloc() cannot called here because it needs the priv
++	 * argument.
++	 */
++	if ((priv = nva->nva_ops->nv_ao_alloc(nva, sizeof (nvpriv_t))) == NULL)
++		return (NULL);
++
++	nv_priv_init(priv, nva, 0);
++
++	return (priv);
++}
++
++/*
++ * Embedded lists need their own nvpriv_t's.  We create a new
++ * nvpriv_t using the parameters and allocator from the parent
++ * list's nvpriv_t.
++ */
++static nvpriv_t *
++nv_priv_alloc_embedded(nvpriv_t *priv)
++{
++	nvpriv_t *emb_priv;
++
++	if ((emb_priv = nv_mem_zalloc(priv, sizeof (nvpriv_t))) == NULL)
++		return (NULL);
++
++	nv_priv_init(emb_priv, priv->nvp_nva, NV_STAT_EMBEDDED);
++
++	return (emb_priv);
++}
++
++static void
++nvlist_init(nvlist_t *nvl, uint32_t nvflag, nvpriv_t *priv)
++{
++	nvl->nvl_version = NV_VERSION;
++	nvl->nvl_nvflag = nvflag & (NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE);
++	nvl->nvl_priv = (uint64_t)(uintptr_t)priv;
++	nvl->nvl_flag = 0;
++	nvl->nvl_pad = 0;
++}
++
++uint_t
++nvlist_nvflag(nvlist_t *nvl)
++{
++	return (nvl->nvl_nvflag);
++}
++
++/*
++ * nvlist_alloc - Allocate nvlist.
++ */
++/*ARGSUSED1*/
++int
++nvlist_alloc(nvlist_t **nvlp, uint_t nvflag, int kmflag)
++{
++#if defined(_KERNEL) && !defined(_BOOT)
++	return (nvlist_xalloc(nvlp, nvflag,
++	    (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
++#else
++	return (nvlist_xalloc(nvlp, nvflag, nv_alloc_nosleep));
++#endif
++}
++
++int
++nvlist_xalloc(nvlist_t **nvlp, uint_t nvflag, nv_alloc_t *nva)
++{
++	nvpriv_t *priv;
++
++	if (nvlp == NULL || nva == NULL)
++		return (EINVAL);
++
++	if ((priv = nv_priv_alloc(nva)) == NULL)
++		return (ENOMEM);
++
++	if ((*nvlp = nv_mem_zalloc(priv,
++	    NV_ALIGN(sizeof (nvlist_t)))) == NULL) {
++		nv_mem_free(priv, priv, sizeof (nvpriv_t));
++		return (ENOMEM);
++	}
++
++	nvlist_init(*nvlp, nvflag, priv);
++
++	return (0);
++}
++
++/*
++ * nvp_buf_alloc - Allocate i_nvp_t for storing a new nv pair.
++ */
++static nvpair_t *
++nvp_buf_alloc(nvlist_t *nvl, size_t len)
++{
++	nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
++	i_nvp_t *buf;
++	nvpair_t *nvp;
++	size_t nvsize;
++
++	/*
++	 * Allocate the buffer
++	 */
++	nvsize = len + offsetof(i_nvp_t, nvi_nvp);
++
++	if ((buf = nv_mem_zalloc(priv, nvsize)) == NULL)
++		return (NULL);
++
++	nvp = &buf->nvi_nvp;
++	nvp->nvp_size = len;
++
++	return (nvp);
++}
++
++/*
++ * nvp_buf_free - de-Allocate an i_nvp_t.
++ */
++static void
++nvp_buf_free(nvlist_t *nvl, nvpair_t *nvp)
++{
++	nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
++	size_t nvsize = nvp->nvp_size + offsetof(i_nvp_t, nvi_nvp);
++
++	nv_mem_free(priv, NVPAIR2I_NVP(nvp), nvsize);
++}
++
++/*
++ * nvp_buf_link - link a new nv pair into the nvlist.
++ */
++static void
++nvp_buf_link(nvlist_t *nvl, nvpair_t *nvp)
++{
++	nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
++	i_nvp_t *curr = NVPAIR2I_NVP(nvp);
++
++	/* Put element at end of nvlist */
++	if (priv->nvp_list == NULL) {
++		priv->nvp_list = priv->nvp_last = curr;
++	} else {
++		curr->nvi_prev = priv->nvp_last;
++		priv->nvp_last->nvi_next = curr;
++		priv->nvp_last = curr;
++	}
++}
++
++/*
++ * nvp_buf_unlink - unlink an removed nvpair out of the nvlist.
++ */
++static void
++nvp_buf_unlink(nvlist_t *nvl, nvpair_t *nvp)
++{
++	nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
++	i_nvp_t *curr = NVPAIR2I_NVP(nvp);
++
++	/*
++	 * protect nvlist_next_nvpair() against walking on freed memory.
++	 */
++	if (priv->nvp_curr == curr)
++		priv->nvp_curr = curr->nvi_next;
++
++	if (curr == priv->nvp_list)
++		priv->nvp_list = curr->nvi_next;
++	else
++		curr->nvi_prev->nvi_next = curr->nvi_next;
++
++	if (curr == priv->nvp_last)
++		priv->nvp_last = curr->nvi_prev;
++	else
++		curr->nvi_next->nvi_prev = curr->nvi_prev;
++}
++
++/*
++ * take a nvpair type and number of elements and make sure the are valid
++ */
++static int
++i_validate_type_nelem(data_type_t type, uint_t nelem)
++{
++	switch (type) {
++	case DATA_TYPE_BOOLEAN:
++		if (nelem != 0)
++			return (EINVAL);
++		break;
++	case DATA_TYPE_BOOLEAN_VALUE:
++	case DATA_TYPE_BYTE:
++	case DATA_TYPE_INT8:
++	case DATA_TYPE_UINT8:
++	case DATA_TYPE_INT16:
++	case DATA_TYPE_UINT16:
++	case DATA_TYPE_INT32:
++	case DATA_TYPE_UINT32:
++	case DATA_TYPE_INT64:
++	case DATA_TYPE_UINT64:
++	case DATA_TYPE_STRING:
++	case DATA_TYPE_HRTIME:
++	case DATA_TYPE_NVLIST:
++#if !defined(_KERNEL)
++	case DATA_TYPE_DOUBLE:
++#endif
++		if (nelem != 1)
++			return (EINVAL);
++		break;
++	case DATA_TYPE_BOOLEAN_ARRAY:
++	case DATA_TYPE_BYTE_ARRAY:
++	case DATA_TYPE_INT8_ARRAY:
++	case DATA_TYPE_UINT8_ARRAY:
++	case DATA_TYPE_INT16_ARRAY:
++	case DATA_TYPE_UINT16_ARRAY:
++	case DATA_TYPE_INT32_ARRAY:
++	case DATA_TYPE_UINT32_ARRAY:
++	case DATA_TYPE_INT64_ARRAY:
++	case DATA_TYPE_UINT64_ARRAY:
++	case DATA_TYPE_STRING_ARRAY:
++	case DATA_TYPE_NVLIST_ARRAY:
++		/* we allow arrays with 0 elements */
++		break;
++	default:
++		return (EINVAL);
++	}
++	return (0);
++}
++
++/*
++ * Verify nvp_name_sz and check the name string length.
++ */
++static int
++i_validate_nvpair_name(nvpair_t *nvp)
++{
++	if ((nvp->nvp_name_sz <= 0) ||
++	    (nvp->nvp_size < NVP_SIZE_CALC(nvp->nvp_name_sz, 0)))
++		return (EFAULT);
++
++	/* verify the name string, make sure its terminated */
++	if (NVP_NAME(nvp)[nvp->nvp_name_sz - 1] != '\0')
++		return (EFAULT);
++
++	return (strlen(NVP_NAME(nvp)) == nvp->nvp_name_sz - 1 ? 0 : EFAULT);
++}
++
++static int
++i_validate_nvpair_value(data_type_t type, uint_t nelem, const void *data)
++{
++	switch (type) {
++	case DATA_TYPE_BOOLEAN_VALUE:
++		if (*(boolean_t *)data != B_TRUE &&
++		    *(boolean_t *)data != B_FALSE)
++			return (EINVAL);
++		break;
++	case DATA_TYPE_BOOLEAN_ARRAY: {
++		int i;
++
++		for (i = 0; i < nelem; i++)
++			if (((boolean_t *)data)[i] != B_TRUE &&
++			    ((boolean_t *)data)[i] != B_FALSE)
++				return (EINVAL);
++		break;
++	}
++	default:
++		break;
++	}
++
++	return (0);
++}
++
++/*
++ * This function takes a pointer to what should be a nvpair and it's size
++ * and then verifies that all the nvpair fields make sense and can be
++ * trusted.  This function is used when decoding packed nvpairs.
++ */
++static int
++i_validate_nvpair(nvpair_t *nvp)
++{
++	data_type_t type = NVP_TYPE(nvp);
++	int size1, size2;
++
++	/* verify nvp_name_sz, check the name string length */
++	if (i_validate_nvpair_name(nvp) != 0)
++		return (EFAULT);
++
++	if (i_validate_nvpair_value(type, NVP_NELEM(nvp), NVP_VALUE(nvp)) != 0)
++		return (EFAULT);
++
++	/*
++	 * verify nvp_type, nvp_value_elem, and also possibly
++	 * verify string values and get the value size.
++	 */
++	size2 = i_get_value_size(type, NVP_VALUE(nvp), NVP_NELEM(nvp));
++	size1 = nvp->nvp_size - NVP_VALOFF(nvp);
++	if (size2 < 0 || size1 != NV_ALIGN(size2))
++		return (EFAULT);
++
++	return (0);
++}
++
++static int
++nvlist_copy_pairs(nvlist_t *snvl, nvlist_t *dnvl)
++{
++	nvpriv_t *priv;
++	i_nvp_t *curr;
++
++	if ((priv = (nvpriv_t *)(uintptr_t)snvl->nvl_priv) == NULL)
++		return (EINVAL);
++
++	for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
++		nvpair_t *nvp = &curr->nvi_nvp;
++		int err;
++
++		if ((err = nvlist_add_common(dnvl, NVP_NAME(nvp), NVP_TYPE(nvp),
++		    NVP_NELEM(nvp), NVP_VALUE(nvp))) != 0)
++			return (err);
++	}
++
++	return (0);
++}
++
++/*
++ * Frees all memory allocated for an nvpair (like embedded lists) with
++ * the exception of the nvpair buffer itself.
++ */
++static void
++nvpair_free(nvpair_t *nvp)
++{
++	switch (NVP_TYPE(nvp)) {
++	case DATA_TYPE_NVLIST:
++		nvlist_free(EMBEDDED_NVL(nvp));
++		break;
++	case DATA_TYPE_NVLIST_ARRAY: {
++		nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp);
++		int i;
++
++		for (i = 0; i < NVP_NELEM(nvp); i++)
++			if (nvlp[i] != NULL)
++				nvlist_free(nvlp[i]);
++		break;
++	}
++	default:
++		break;
++	}
++}
++
++/*
++ * nvlist_free - free an unpacked nvlist
++ */
++void
++nvlist_free(nvlist_t *nvl)
++{
++	nvpriv_t *priv;
++	i_nvp_t *curr;
++
++	if (nvl == NULL ||
++	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
++		return;
++
++	/*
++	 * Unpacked nvlist are linked through i_nvp_t
++	 */
++	curr = priv->nvp_list;
++	while (curr != NULL) {
++		nvpair_t *nvp = &curr->nvi_nvp;
++		curr = curr->nvi_next;
++
++		nvpair_free(nvp);
++		nvp_buf_free(nvl, nvp);
++	}
++
++	if (!(priv->nvp_stat & NV_STAT_EMBEDDED))
++		nv_mem_free(priv, nvl, NV_ALIGN(sizeof (nvlist_t)));
++	else
++		nvl->nvl_priv = 0;
++
++	nv_mem_free(priv, priv, sizeof (nvpriv_t));
++}
++
++static int
++nvlist_contains_nvp(nvlist_t *nvl, nvpair_t *nvp)
++{
++	nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
++	i_nvp_t *curr;
++
++	if (nvp == NULL)
++		return (0);
++
++	for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next)
++		if (&curr->nvi_nvp == nvp)
++			return (1);
++
++	return (0);
++}
++
++/*
++ * Make a copy of nvlist
++ */
++/*ARGSUSED1*/
++int
++nvlist_dup(nvlist_t *nvl, nvlist_t **nvlp, int kmflag)
++{
++#if defined(_KERNEL) && !defined(_BOOT)
++	return (nvlist_xdup(nvl, nvlp,
++	    (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
++#else
++	return (nvlist_xdup(nvl, nvlp, nv_alloc_nosleep));
++#endif
++}
++
++int
++nvlist_xdup(nvlist_t *nvl, nvlist_t **nvlp, nv_alloc_t *nva)
++{
++	int err;
++	nvlist_t *ret;
++
++	if (nvl == NULL || nvlp == NULL)
++		return (EINVAL);
++
++	if ((err = nvlist_xalloc(&ret, nvl->nvl_nvflag, nva)) != 0)
++		return (err);
++
++	if ((err = nvlist_copy_pairs(nvl, ret)) != 0)
++		nvlist_free(ret);
++	else
++		*nvlp = ret;
++
++	return (err);
++}
++
++/*
++ * Remove all with matching name
++ */
++int
++nvlist_remove_all(nvlist_t *nvl, const char *name)
++{
++	nvpriv_t *priv;
++	i_nvp_t *curr;
++	int error = ENOENT;
++
++	if (nvl == NULL || name == NULL ||
++	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
++		return (EINVAL);
++
++	curr = priv->nvp_list;
++	while (curr != NULL) {
++		nvpair_t *nvp = &curr->nvi_nvp;
++
++		curr = curr->nvi_next;
++		if (strcmp(name, NVP_NAME(nvp)) != 0)
++			continue;
++
++		nvp_buf_unlink(nvl, nvp);
++		nvpair_free(nvp);
++		nvp_buf_free(nvl, nvp);
++
++		error = 0;
++	}
++
++	return (error);
++}
++
++/*
++ * Remove first one with matching name and type
++ */
++int
++nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type)
++{
++	nvpriv_t *priv;
++	i_nvp_t *curr;
++
++	if (nvl == NULL || name == NULL ||
++	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
++		return (EINVAL);
++
++	curr = priv->nvp_list;
++	while (curr != NULL) {
++		nvpair_t *nvp = &curr->nvi_nvp;
++
++		if (strcmp(name, NVP_NAME(nvp)) == 0 && NVP_TYPE(nvp) == type) {
++			nvp_buf_unlink(nvl, nvp);
++			nvpair_free(nvp);
++			nvp_buf_free(nvl, nvp);
++
++			return (0);
++		}
++		curr = curr->nvi_next;
++	}
++
++	return (ENOENT);
++}
++
++int
++nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp)
++{
++	if (nvl == NULL || nvp == NULL)
++		return (EINVAL);
++
++	nvp_buf_unlink(nvl, nvp);
++	nvpair_free(nvp);
++	nvp_buf_free(nvl, nvp);
++	return (0);
++}
++
++/*
++ * This function calculates the size of an nvpair value.
++ *
++ * The data argument controls the behavior in case of the data types
++ * 	DATA_TYPE_STRING    	and
++ *	DATA_TYPE_STRING_ARRAY
++ * Is data == NULL then the size of the string(s) is excluded.
++ */
++static int
++i_get_value_size(data_type_t type, const void *data, uint_t nelem)
++{
++	uint64_t value_sz;
++
++	if (i_validate_type_nelem(type, nelem) != 0)
++		return (-1);
++
++	/* Calculate required size for holding value */
++	switch (type) {
++	case DATA_TYPE_BOOLEAN:
++		value_sz = 0;
++		break;
++	case DATA_TYPE_BOOLEAN_VALUE:
++		value_sz = sizeof (boolean_t);
++		break;
++	case DATA_TYPE_BYTE:
++		value_sz = sizeof (uchar_t);
++		break;
++	case DATA_TYPE_INT8:
++		value_sz = sizeof (int8_t);
++		break;
++	case DATA_TYPE_UINT8:
++		value_sz = sizeof (uint8_t);
++		break;
++	case DATA_TYPE_INT16:
++		value_sz = sizeof (int16_t);
++		break;
++	case DATA_TYPE_UINT16:
++		value_sz = sizeof (uint16_t);
++		break;
++	case DATA_TYPE_INT32:
++		value_sz = sizeof (int32_t);
++		break;
++	case DATA_TYPE_UINT32:
++		value_sz = sizeof (uint32_t);
++		break;
++	case DATA_TYPE_INT64:
++		value_sz = sizeof (int64_t);
++		break;
++	case DATA_TYPE_UINT64:
++		value_sz = sizeof (uint64_t);
++		break;
++#if !defined(_KERNEL)
++	case DATA_TYPE_DOUBLE:
++		value_sz = sizeof (double);
++		break;
++#endif
++	case DATA_TYPE_STRING:
++		if (data == NULL)
++			value_sz = 0;
++		else
++			value_sz = strlen(data) + 1;
++		break;
++	case DATA_TYPE_BOOLEAN_ARRAY:
++		value_sz = (uint64_t)nelem * sizeof (boolean_t);
++		break;
++	case DATA_TYPE_BYTE_ARRAY:
++		value_sz = (uint64_t)nelem * sizeof (uchar_t);
++		break;
++	case DATA_TYPE_INT8_ARRAY:
++		value_sz = (uint64_t)nelem * sizeof (int8_t);
++		break;
++	case DATA_TYPE_UINT8_ARRAY:
++		value_sz = (uint64_t)nelem * sizeof (uint8_t);
++		break;
++	case DATA_TYPE_INT16_ARRAY:
++		value_sz = (uint64_t)nelem * sizeof (int16_t);
++		break;
++	case DATA_TYPE_UINT16_ARRAY:
++		value_sz = (uint64_t)nelem * sizeof (uint16_t);
++		break;
++	case DATA_TYPE_INT32_ARRAY:
++		value_sz = (uint64_t)nelem * sizeof (int32_t);
++		break;
++	case DATA_TYPE_UINT32_ARRAY:
++		value_sz = (uint64_t)nelem * sizeof (uint32_t);
++		break;
++	case DATA_TYPE_INT64_ARRAY:
++		value_sz = (uint64_t)nelem * sizeof (int64_t);
++		break;
++	case DATA_TYPE_UINT64_ARRAY:
++		value_sz = (uint64_t)nelem * sizeof (uint64_t);
++		break;
++	case DATA_TYPE_STRING_ARRAY:
++		value_sz = (uint64_t)nelem * sizeof (uint64_t);
++
++		if (data != NULL) {
++			char *const *strs = data;
++			uint_t i;
++
++			/* no alignment requirement for strings */
++			for (i = 0; i < nelem; i++) {
++				if (strs[i] == NULL)
++					return (-1);
++				value_sz += strlen(strs[i]) + 1;
++			}
++		}
++		break;
++	case DATA_TYPE_HRTIME:
++		value_sz = sizeof (hrtime_t);
++		break;
++	case DATA_TYPE_NVLIST:
++		value_sz = NV_ALIGN(sizeof (nvlist_t));
++		break;
++	case DATA_TYPE_NVLIST_ARRAY:
++		value_sz = (uint64_t)nelem * sizeof (uint64_t) +
++		    (uint64_t)nelem * NV_ALIGN(sizeof (nvlist_t));
++		break;
++	default:
++		return (-1);
++	}
++
++	return (value_sz > INT32_MAX ? -1 : (int)value_sz);
++}
++
++static int
++nvlist_copy_embedded(nvlist_t *nvl, nvlist_t *onvl, nvlist_t *emb_nvl)
++{
++	nvpriv_t *priv;
++	int err;
++
++	if ((priv = nv_priv_alloc_embedded((nvpriv_t *)(uintptr_t)
++	    nvl->nvl_priv)) == NULL)
++		return (ENOMEM);
++
++	nvlist_init(emb_nvl, onvl->nvl_nvflag, priv);
++
++	if ((err = nvlist_copy_pairs(onvl, emb_nvl)) != 0) {
++		nvlist_free(emb_nvl);
++		emb_nvl->nvl_priv = 0;
++	}
++
++	return (err);
++}
++
++/*
++ * nvlist_add_common - Add new <name,value> pair to nvlist
++ */
++static int
++nvlist_add_common(nvlist_t *nvl, const char *name,
++    data_type_t type, uint_t nelem, const void *data)
++{
++	nvpair_t *nvp;
++	uint_t i;
++
++	int nvp_sz, name_sz, value_sz;
++	int err = 0;
++
++	if (name == NULL || nvl == NULL || nvl->nvl_priv == 0)
++		return (EINVAL);
++
++	if (nelem != 0 && data == NULL)
++		return (EINVAL);
++
++	/*
++	 * Verify type and nelem and get the value size.
++	 * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
++	 * is the size of the string(s) included.
++	 */
++	if ((value_sz = i_get_value_size(type, data, nelem)) < 0)
++		return (EINVAL);
++
++	if (i_validate_nvpair_value(type, nelem, data) != 0)
++		return (EINVAL);
++
++	/*
++	 * If we're adding an nvlist or nvlist array, ensure that we are not
++	 * adding the input nvlist to itself, which would cause recursion,
++	 * and ensure that no NULL nvlist pointers are present.
++	 */
++	switch (type) {
++	case DATA_TYPE_NVLIST:
++		if (data == nvl || data == NULL)
++			return (EINVAL);
++		break;
++	case DATA_TYPE_NVLIST_ARRAY: {
++		nvlist_t **onvlp = (nvlist_t **)data;
++		for (i = 0; i < nelem; i++) {
++			if (onvlp[i] == nvl || onvlp[i] == NULL)
++				return (EINVAL);
++		}
++		break;
++	}
++	default:
++		break;
++	}
++
++	/* calculate sizes of the nvpair elements and the nvpair itself */
++	name_sz = strlen(name) + 1;
++
++	nvp_sz = NVP_SIZE_CALC(name_sz, value_sz);
++
++	if ((nvp = nvp_buf_alloc(nvl, nvp_sz)) == NULL)
++		return (ENOMEM);
++
++	ASSERT(nvp->nvp_size == nvp_sz);
++	nvp->nvp_name_sz = name_sz;
++	nvp->nvp_value_elem = nelem;
++	nvp->nvp_type = type;
++	bcopy(name, NVP_NAME(nvp), name_sz);
++
++	switch (type) {
++	case DATA_TYPE_BOOLEAN:
++		break;
++	case DATA_TYPE_STRING_ARRAY: {
++		char *const *strs = data;
++		char *buf = NVP_VALUE(nvp);
++		char **cstrs = (void *)buf;
++
++		/* skip pre-allocated space for pointer array */
++		buf += nelem * sizeof (uint64_t);
++		for (i = 0; i < nelem; i++) {
++			int slen = strlen(strs[i]) + 1;
++			bcopy(strs[i], buf, slen);
++			cstrs[i] = buf;
++			buf += slen;
++		}
++		break;
++	}
++	case DATA_TYPE_NVLIST: {
++		nvlist_t *nnvl = EMBEDDED_NVL(nvp);
++		nvlist_t *onvl = (nvlist_t *)data;
++
++		if ((err = nvlist_copy_embedded(nvl, onvl, nnvl)) != 0) {
++			nvp_buf_free(nvl, nvp);
++			return (err);
++		}
++		break;
++	}
++	case DATA_TYPE_NVLIST_ARRAY: {
++		nvlist_t **onvlp = (nvlist_t **)data;
++		nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp);
++		nvlist_t *embedded = (nvlist_t *)
++		    ((uintptr_t)nvlp + nelem * sizeof (uint64_t));
++
++		for (i = 0; i < nelem; i++) {
++			if ((err = nvlist_copy_embedded(nvl,
++			    onvlp[i], embedded)) != 0) {
++				/*
++				 * Free any successfully created lists
++				 */
++				nvpair_free(nvp);
++				nvp_buf_free(nvl, nvp);
++				return (err);
++			}
++
++			nvlp[i] = embedded++;
++		}
++		break;
++	}
++	default:
++		bcopy(data, NVP_VALUE(nvp), value_sz);
++	}
++
++	/* if unique name, remove before add */
++	if (nvl->nvl_nvflag & NV_UNIQUE_NAME)
++		(void) nvlist_remove_all(nvl, name);
++	else if (nvl->nvl_nvflag & NV_UNIQUE_NAME_TYPE)
++		(void) nvlist_remove(nvl, name, type);
++
++	nvp_buf_link(nvl, nvp);
++
++	return (0);
++}
++
++int
++nvlist_add_boolean(nvlist_t *nvl, const char *name)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN, 0, NULL));
++}
++
++int
++nvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_VALUE, 1, &val));
++}
++
++int
++nvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE, 1, &val));
++}
++
++int
++nvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_INT8, 1, &val));
++}
++
++int
++nvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8, 1, &val));
++}
++
++int
++nvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_INT16, 1, &val));
++}
++
++int
++nvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16, 1, &val));
++}
++
++int
++nvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_INT32, 1, &val));
++}
++
++int
++nvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32, 1, &val));
++}
++
++int
++nvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_INT64, 1, &val));
++}
++
++int
++nvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64, 1, &val));
++}
++
++#if !defined(_KERNEL)
++int
++nvlist_add_double(nvlist_t *nvl, const char *name, double val)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_DOUBLE, 1, &val));
++}
++#endif
++
++int
++nvlist_add_string(nvlist_t *nvl, const char *name, const char *val)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_STRING, 1, (void *)val));
++}
++
++int
++nvlist_add_boolean_array(nvlist_t *nvl, const char *name,
++    boolean_t *a, uint_t n)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_ARRAY, n, a));
++}
++
++int
++nvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *a, uint_t n)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a));
++}
++
++int
++nvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *a, uint_t n)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a));
++}
++
++int
++nvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *a, uint_t n)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a));
++}
++
++int
++nvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *a, uint_t n)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a));
++}
++
++int
++nvlist_add_uint16_array(nvlist_t *nvl, const char *name, uint16_t *a, uint_t n)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a));
++}
++
++int
++nvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *a, uint_t n)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a));
++}
++
++int
++nvlist_add_uint32_array(nvlist_t *nvl, const char *name, uint32_t *a, uint_t n)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a));
++}
++
++int
++nvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *a, uint_t n)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a));
++}
++
++int
++nvlist_add_uint64_array(nvlist_t *nvl, const char *name, uint64_t *a, uint_t n)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a));
++}
++
++int
++nvlist_add_string_array(nvlist_t *nvl, const char *name,
++    char *const *a, uint_t n)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a));
++}
++
++int
++nvlist_add_hrtime(nvlist_t *nvl, const char *name, hrtime_t val)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_HRTIME, 1, &val));
++}
++
++int
++nvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST, 1, val));
++}
++
++int
++nvlist_add_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **a, uint_t n)
++{
++	return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a));
++}
++
++/* reading name-value pairs */
++nvpair_t *
++nvlist_next_nvpair(nvlist_t *nvl, nvpair_t *nvp)
++{
++	nvpriv_t *priv;
++	i_nvp_t *curr;
++
++	if (nvl == NULL ||
++	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
++		return (NULL);
++
++	curr = NVPAIR2I_NVP(nvp);
++
++	/*
++	 * Ensure that nvp is a valid nvpair on this nvlist.
++	 * NB: nvp_curr is used only as a hint so that we don't always
++	 * have to walk the list to determine if nvp is still on the list.
++	 */
++	if (nvp == NULL)
++		curr = priv->nvp_list;
++	else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp))
++		curr = curr->nvi_next;
++	else
++		curr = NULL;
++
++	priv->nvp_curr = curr;
++
++	return (curr != NULL ? &curr->nvi_nvp : NULL);
++}
++
++nvpair_t *
++nvlist_prev_nvpair(nvlist_t *nvl, nvpair_t *nvp)
++{
++	nvpriv_t *priv;
++	i_nvp_t *curr;
++
++	if (nvl == NULL ||
++	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
++		return (NULL);
++
++	curr = NVPAIR2I_NVP(nvp);
++
++	if (nvp == NULL)
++		curr = priv->nvp_last;
++	else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp))
++		curr = curr->nvi_prev;
++	else
++		curr = NULL;
++
++	priv->nvp_curr = curr;
++
++	return (curr != NULL ? &curr->nvi_nvp : NULL);
++}
++
++boolean_t
++nvlist_empty(nvlist_t *nvl)
++{
++	nvpriv_t *priv;
++
++	if (nvl == NULL ||
++	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
++		return (B_TRUE);
++
++	return (priv->nvp_list == NULL);
++}
++
++char *
++nvpair_name(nvpair_t *nvp)
++{
++	return (NVP_NAME(nvp));
++}
++
++data_type_t
++nvpair_type(nvpair_t *nvp)
++{
++	return (NVP_TYPE(nvp));
++}
++
++int
++nvpair_type_is_array(nvpair_t *nvp)
++{
++	data_type_t type = NVP_TYPE(nvp);
++
++	if ((type == DATA_TYPE_BYTE_ARRAY) ||
++	    (type == DATA_TYPE_UINT8_ARRAY) ||
++	    (type == DATA_TYPE_INT16_ARRAY) ||
++	    (type == DATA_TYPE_UINT16_ARRAY) ||
++	    (type == DATA_TYPE_INT32_ARRAY) ||
++	    (type == DATA_TYPE_UINT32_ARRAY) ||
++	    (type == DATA_TYPE_INT64_ARRAY) ||
++	    (type == DATA_TYPE_UINT64_ARRAY) ||
++	    (type == DATA_TYPE_BOOLEAN_ARRAY) ||
++	    (type == DATA_TYPE_STRING_ARRAY) ||
++	    (type == DATA_TYPE_NVLIST_ARRAY))
++		return (1);
++	return (0);
++
++}
++
++static int
++nvpair_value_common(nvpair_t *nvp, data_type_t type, uint_t *nelem, void *data)
++{
++	if (nvp == NULL || nvpair_type(nvp) != type)
++		return (EINVAL);
++
++	/*
++	 * For non-array types, we copy the data.
++	 * For array types (including string), we set a pointer.
++	 */
++	switch (type) {
++	case DATA_TYPE_BOOLEAN:
++		if (nelem != NULL)
++			*nelem = 0;
++		break;
++
++	case DATA_TYPE_BOOLEAN_VALUE:
++	case DATA_TYPE_BYTE:
++	case DATA_TYPE_INT8:
++	case DATA_TYPE_UINT8:
++	case DATA_TYPE_INT16:
++	case DATA_TYPE_UINT16:
++	case DATA_TYPE_INT32:
++	case DATA_TYPE_UINT32:
++	case DATA_TYPE_INT64:
++	case DATA_TYPE_UINT64:
++	case DATA_TYPE_HRTIME:
++#if !defined(_KERNEL)
++	case DATA_TYPE_DOUBLE:
++#endif
++		if (data == NULL)
++			return (EINVAL);
++		bcopy(NVP_VALUE(nvp), data,
++		    (size_t)i_get_value_size(type, NULL, 1));
++		if (nelem != NULL)
++			*nelem = 1;
++		break;
++
++	case DATA_TYPE_NVLIST:
++	case DATA_TYPE_STRING:
++		if (data == NULL)
++			return (EINVAL);
++		*(void **)data = (void *)NVP_VALUE(nvp);
++		if (nelem != NULL)
++			*nelem = 1;
++		break;
++
++	case DATA_TYPE_BOOLEAN_ARRAY:
++	case DATA_TYPE_BYTE_ARRAY:
++	case DATA_TYPE_INT8_ARRAY:
++	case DATA_TYPE_UINT8_ARRAY:
++	case DATA_TYPE_INT16_ARRAY:
++	case DATA_TYPE_UINT16_ARRAY:
++	case DATA_TYPE_INT32_ARRAY:
++	case DATA_TYPE_UINT32_ARRAY:
++	case DATA_TYPE_INT64_ARRAY:
++	case DATA_TYPE_UINT64_ARRAY:
++	case DATA_TYPE_STRING_ARRAY:
++	case DATA_TYPE_NVLIST_ARRAY:
++		if (nelem == NULL || data == NULL)
++			return (EINVAL);
++		if ((*nelem = NVP_NELEM(nvp)) != 0)
++			*(void **)data = (void *)NVP_VALUE(nvp);
++		else
++			*(void **)data = NULL;
++		break;
++
++	default:
++		return (ENOTSUP);
++	}
++
++	return (0);
++}
++
++static int
++nvlist_lookup_common(nvlist_t *nvl, const char *name, data_type_t type,
++    uint_t *nelem, void *data)
++{
++	nvpriv_t *priv;
++	nvpair_t *nvp;
++	i_nvp_t *curr;
++
++	if (name == NULL || nvl == NULL ||
++	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
++		return (EINVAL);
++
++	if (!(nvl->nvl_nvflag & (NV_UNIQUE_NAME | NV_UNIQUE_NAME_TYPE)))
++		return (ENOTSUP);
++
++	for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
++		nvp = &curr->nvi_nvp;
++
++		if (strcmp(name, NVP_NAME(nvp)) == 0 && NVP_TYPE(nvp) == type)
++			return (nvpair_value_common(nvp, type, nelem, data));
++	}
++
++	return (ENOENT);
++}
++
++int
++nvlist_lookup_boolean(nvlist_t *nvl, const char *name)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_BOOLEAN, NULL, NULL));
++}
++
++int
++nvlist_lookup_boolean_value(nvlist_t *nvl, const char *name, boolean_t *val)
++{
++	return (nvlist_lookup_common(nvl, name,
++	    DATA_TYPE_BOOLEAN_VALUE, NULL, val));
++}
++
++int
++nvlist_lookup_byte(nvlist_t *nvl, const char *name, uchar_t *val)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_BYTE, NULL, val));
++}
++
++int
++nvlist_lookup_int8(nvlist_t *nvl, const char *name, int8_t *val)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT8, NULL, val));
++}
++
++int
++nvlist_lookup_uint8(nvlist_t *nvl, const char *name, uint8_t *val)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT8, NULL, val));
++}
++
++int
++nvlist_lookup_int16(nvlist_t *nvl, const char *name, int16_t *val)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT16, NULL, val));
++}
++
++int
++nvlist_lookup_uint16(nvlist_t *nvl, const char *name, uint16_t *val)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT16, NULL, val));
++}
++
++int
++nvlist_lookup_int32(nvlist_t *nvl, const char *name, int32_t *val)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT32, NULL, val));
++}
++
++int
++nvlist_lookup_uint32(nvlist_t *nvl, const char *name, uint32_t *val)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT32, NULL, val));
++}
++
++int
++nvlist_lookup_int64(nvlist_t *nvl, const char *name, int64_t *val)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT64, NULL, val));
++}
++
++int
++nvlist_lookup_uint64(nvlist_t *nvl, const char *name, uint64_t *val)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT64, NULL, val));
++}
++
++#if !defined(_KERNEL)
++int
++nvlist_lookup_double(nvlist_t *nvl, const char *name, double *val)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_DOUBLE, NULL, val));
++}
++#endif
++
++int
++nvlist_lookup_string(nvlist_t *nvl, const char *name, char **val)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_STRING, NULL, val));
++}
++
++int
++nvlist_lookup_nvlist(nvlist_t *nvl, const char *name, nvlist_t **val)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_NVLIST, NULL, val));
++}
++
++int
++nvlist_lookup_boolean_array(nvlist_t *nvl, const char *name,
++    boolean_t **a, uint_t *n)
++{
++	return (nvlist_lookup_common(nvl, name,
++	    DATA_TYPE_BOOLEAN_ARRAY, n, a));
++}
++
++int
++nvlist_lookup_byte_array(nvlist_t *nvl, const char *name,
++    uchar_t **a, uint_t *n)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a));
++}
++
++int
++nvlist_lookup_int8_array(nvlist_t *nvl, const char *name, int8_t **a, uint_t *n)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a));
++}
++
++int
++nvlist_lookup_uint8_array(nvlist_t *nvl, const char *name,
++    uint8_t **a, uint_t *n)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a));
++}
++
++int
++nvlist_lookup_int16_array(nvlist_t *nvl, const char *name,
++    int16_t **a, uint_t *n)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a));
++}
++
++int
++nvlist_lookup_uint16_array(nvlist_t *nvl, const char *name,
++    uint16_t **a, uint_t *n)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a));
++}
++
++int
++nvlist_lookup_int32_array(nvlist_t *nvl, const char *name,
++    int32_t **a, uint_t *n)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a));
++}
++
++int
++nvlist_lookup_uint32_array(nvlist_t *nvl, const char *name,
++    uint32_t **a, uint_t *n)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a));
++}
++
++int
++nvlist_lookup_int64_array(nvlist_t *nvl, const char *name,
++    int64_t **a, uint_t *n)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a));
++}
++
++int
++nvlist_lookup_uint64_array(nvlist_t *nvl, const char *name,
++    uint64_t **a, uint_t *n)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a));
++}
++
++int
++nvlist_lookup_string_array(nvlist_t *nvl, const char *name,
++    char ***a, uint_t *n)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a));
++}
++
++int
++nvlist_lookup_nvlist_array(nvlist_t *nvl, const char *name,
++    nvlist_t ***a, uint_t *n)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a));
++}
++
++int
++nvlist_lookup_hrtime(nvlist_t *nvl, const char *name, hrtime_t *val)
++{
++	return (nvlist_lookup_common(nvl, name, DATA_TYPE_HRTIME, NULL, val));
++}
++
++int
++nvlist_lookup_pairs(nvlist_t *nvl, int flag, ...)
++{
++	va_list ap;
++	char *name;
++	int noentok = (flag & NV_FLAG_NOENTOK ? 1 : 0);
++	int ret = 0;
++
++	va_start(ap, flag);
++	while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
++		data_type_t type;
++		void *val;
++		uint_t *nelem;
++
++		switch (type = va_arg(ap, data_type_t)) {
++		case DATA_TYPE_BOOLEAN:
++			ret = nvlist_lookup_common(nvl, name, type, NULL, NULL);
++			break;
++
++		case DATA_TYPE_BOOLEAN_VALUE:
++		case DATA_TYPE_BYTE:
++		case DATA_TYPE_INT8:
++		case DATA_TYPE_UINT8:
++		case DATA_TYPE_INT16:
++		case DATA_TYPE_UINT16:
++		case DATA_TYPE_INT32:
++		case DATA_TYPE_UINT32:
++		case DATA_TYPE_INT64:
++		case DATA_TYPE_UINT64:
++		case DATA_TYPE_HRTIME:
++		case DATA_TYPE_STRING:
++		case DATA_TYPE_NVLIST:
++#if !defined(_KERNEL)
++		case DATA_TYPE_DOUBLE:
++#endif
++			val = va_arg(ap, void *);
++			ret = nvlist_lookup_common(nvl, name, type, NULL, val);
++			break;
++
++		case DATA_TYPE_BYTE_ARRAY:
++		case DATA_TYPE_BOOLEAN_ARRAY:
++		case DATA_TYPE_INT8_ARRAY:
++		case DATA_TYPE_UINT8_ARRAY:
++		case DATA_TYPE_INT16_ARRAY:
++		case DATA_TYPE_UINT16_ARRAY:
++		case DATA_TYPE_INT32_ARRAY:
++		case DATA_TYPE_UINT32_ARRAY:
++		case DATA_TYPE_INT64_ARRAY:
++		case DATA_TYPE_UINT64_ARRAY:
++		case DATA_TYPE_STRING_ARRAY:
++		case DATA_TYPE_NVLIST_ARRAY:
++			val = va_arg(ap, void *);
++			nelem = va_arg(ap, uint_t *);
++			ret = nvlist_lookup_common(nvl, name, type, nelem, val);
++			break;
++
++		default:
++			ret = EINVAL;
++		}
++
++		if (ret == ENOENT && noentok)
++			ret = 0;
++	}
++	va_end(ap);
++
++	return (ret);
++}
++
++/*
++ * Find the 'name'ed nvpair in the nvlist 'nvl'. If 'name' found, the function
++ * returns zero and a pointer to the matching nvpair is returned in '*ret'
++ * (given 'ret' is non-NULL). If 'sep' is specified then 'name' will penitrate
++ * multiple levels of embedded nvlists, with 'sep' as the separator. As an
++ * example, if sep is '.', name might look like: "a" or "a.b" or "a.c[3]" or
++ * "a.d[3].e[1]".  This matches the C syntax for array embed (for convience,
++ * code also supports "a.d[3]e[1]" syntax).
++ *
++ * If 'ip' is non-NULL and the last name component is an array, return the
++ * value of the "...[index]" array index in *ip. For an array reference that
++ * is not indexed, *ip will be returned as -1. If there is a syntax error in
++ * 'name', and 'ep' is non-NULL then *ep will be set to point to the location
++ * inside the 'name' string where the syntax error was detected.
++ */
++static int
++nvlist_lookup_nvpair_ei_sep(nvlist_t *nvl, const char *name, const char sep,
++    nvpair_t **ret, int *ip, char **ep)
++{
++	nvpair_t	*nvp;
++	const char	*np;
++	char		*sepp=NULL;
++	char		*idxp, *idxep;
++	nvlist_t	**nva;
++	long		idx = 0;
++	int		n;
++
++	if (ip)
++		*ip = -1;			/* not indexed */
++	if (ep)
++		*ep = NULL;
++
++	if ((nvl == NULL) || (name == NULL))
++		return (EINVAL);
++
++	/* step through components of name */
++	for (np = name; np && *np; np = sepp) {
++		/* ensure unique names */
++		if (!(nvl->nvl_nvflag & NV_UNIQUE_NAME))
++			return (ENOTSUP);
++
++		/* skip white space */
++		skip_whitespace(np);
++		if (*np == 0)
++			break;
++
++		/* set 'sepp' to end of current component 'np' */
++		if (sep)
++			sepp = strchr(np, sep);
++		else
++			sepp = NULL;
++
++		/* find start of next "[ index ]..." */
++		idxp = strchr(np, '[');
++
++		/* if sepp comes first, set idxp to NULL */
++		if (sepp && idxp && (sepp < idxp))
++			idxp = NULL;
++
++		/*
++		 * At this point 'idxp' is set if there is an index
++		 * expected for the current component.
++		 */
++		if (idxp) {
++			/* set 'n' to length of current 'np' name component */
++			n = idxp++ - np;
++
++			/* keep sepp up to date for *ep use as we advance */
++			skip_whitespace(idxp);
++			sepp = idxp;
++
++			/* determine the index value */
++#if defined(_KERNEL) && !defined(_BOOT)
++			if (ddi_strtol(idxp, &idxep, 0, &idx))
++				goto fail;
++#else
++			idx = strtol(idxp, &idxep, 0);
++#endif
++			if (idxep == idxp)
++				goto fail;
++
++			/* keep sepp up to date for *ep use as we advance */
++			sepp = idxep;
++
++			/* skip white space index value and check for ']' */
++			skip_whitespace(sepp);
++			if (*sepp++ != ']')
++				goto fail;
++
++			/* for embedded arrays, support C syntax: "a[1].b" */
++			skip_whitespace(sepp);
++			if (sep && (*sepp == sep))
++				sepp++;
++		} else if (sepp) {
++			n = sepp++ - np;
++		} else {
++			n = strlen(np);
++		}
++
++		/* trim trailing whitespace by reducing length of 'np' */
++		if (n == 0)
++			goto fail;
++		for (n--; (np[n] == ' ') || (np[n] == '\t'); n--)
++			;
++		n++;
++
++		/* skip whitespace, and set sepp to NULL if complete */
++		if (sepp) {
++			skip_whitespace(sepp);
++			if (*sepp == 0)
++				sepp = NULL;
++		}
++
++		/*
++		 * At this point:
++		 * o  'n' is the length of current 'np' component.
++		 * o  'idxp' is set if there was an index, and value 'idx'.
++		 * o  'sepp' is set to the beginning of the next component,
++		 *    and set to NULL if we have no more components.
++		 *
++		 * Search for nvpair with matching component name.
++		 */
++		for (nvp = nvlist_next_nvpair(nvl, NULL); nvp != NULL;
++		    nvp = nvlist_next_nvpair(nvl, nvp)) {
++
++			/* continue if no match on name */
++			if (strncmp(np, nvpair_name(nvp), n) ||
++			    (strlen(nvpair_name(nvp)) != n))
++				continue;
++
++			/* if indexed, verify type is array oriented */
++			if (idxp && !nvpair_type_is_array(nvp))
++				goto fail;
++
++			/*
++			 * Full match found, return nvp and idx if this
++			 * was the last component.
++			 */
++			if (sepp == NULL) {
++				if (ret)
++					*ret = nvp;
++				if (ip && idxp)
++					*ip = (int)idx;	/* return index */
++				return (0);		/* found */
++			}
++
++			/*
++			 * More components: current match must be
++			 * of DATA_TYPE_NVLIST or DATA_TYPE_NVLIST_ARRAY
++			 * to support going deeper.
++			 */
++			if (nvpair_type(nvp) == DATA_TYPE_NVLIST) {
++				nvl = EMBEDDED_NVL(nvp);
++				break;
++			} else if (nvpair_type(nvp) == DATA_TYPE_NVLIST_ARRAY) {
++				(void) nvpair_value_nvlist_array(nvp,
++				    &nva, (uint_t *)&n);
++				if ((n < 0) || (idx >= n))
++					goto fail;
++				nvl = nva[idx];
++				break;
++			}
++
++			/* type does not support more levels */
++			goto fail;
++		}
++		if (nvp == NULL)
++			goto fail;		/* 'name' not found */
++
++		/* search for match of next component in embedded 'nvl' list */
++	}
++
++fail:	if (ep && sepp)
++		*ep = sepp;
++	return (EINVAL);
++}
++
++/*
++ * Return pointer to nvpair with specified 'name'.
++ */
++int
++nvlist_lookup_nvpair(nvlist_t *nvl, const char *name, nvpair_t **ret)
++{
++	return (nvlist_lookup_nvpair_ei_sep(nvl, name, 0, ret, NULL, NULL));
++}
++
++/*
++ * Determine if named nvpair exists in nvlist (use embedded separator of '.'
++ * and return array index).  See nvlist_lookup_nvpair_ei_sep for more detailed
++ * description.
++ */
++int nvlist_lookup_nvpair_embedded_index(nvlist_t *nvl,
++    const char *name, nvpair_t **ret, int *ip, char **ep)
++{
++	return (nvlist_lookup_nvpair_ei_sep(nvl, name, '.', ret, ip, ep));
++}
++
++boolean_t
++nvlist_exists(nvlist_t *nvl, const char *name)
++{
++	nvpriv_t *priv;
++	nvpair_t *nvp;
++	i_nvp_t *curr;
++
++	if (name == NULL || nvl == NULL ||
++	    (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
++		return (B_FALSE);
++
++	for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
++		nvp = &curr->nvi_nvp;
++
++		if (strcmp(name, NVP_NAME(nvp)) == 0)
++			return (B_TRUE);
++	}
++
++	return (B_FALSE);
++}
++
++int
++nvpair_value_boolean_value(nvpair_t *nvp, boolean_t *val)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_BOOLEAN_VALUE, NULL, val));
++}
++
++int
++nvpair_value_byte(nvpair_t *nvp, uchar_t *val)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_BYTE, NULL, val));
++}
++
++int
++nvpair_value_int8(nvpair_t *nvp, int8_t *val)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_INT8, NULL, val));
++}
++
++int
++nvpair_value_uint8(nvpair_t *nvp, uint8_t *val)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_UINT8, NULL, val));
++}
++
++int
++nvpair_value_int16(nvpair_t *nvp, int16_t *val)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_INT16, NULL, val));
++}
++
++int
++nvpair_value_uint16(nvpair_t *nvp, uint16_t *val)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_UINT16, NULL, val));
++}
++
++int
++nvpair_value_int32(nvpair_t *nvp, int32_t *val)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_INT32, NULL, val));
++}
++
++int
++nvpair_value_uint32(nvpair_t *nvp, uint32_t *val)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_UINT32, NULL, val));
++}
++
++int
++nvpair_value_int64(nvpair_t *nvp, int64_t *val)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_INT64, NULL, val));
++}
++
++int
++nvpair_value_uint64(nvpair_t *nvp, uint64_t *val)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_UINT64, NULL, val));
++}
++
++#if !defined(_KERNEL)
++int
++nvpair_value_double(nvpair_t *nvp, double *val)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_DOUBLE, NULL, val));
++}
++#endif
++
++int
++nvpair_value_string(nvpair_t *nvp, char **val)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_STRING, NULL, val));
++}
++
++int
++nvpair_value_nvlist(nvpair_t *nvp, nvlist_t **val)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_NVLIST, NULL, val));
++}
++
++int
++nvpair_value_boolean_array(nvpair_t *nvp, boolean_t **val, uint_t *nelem)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_BOOLEAN_ARRAY, nelem, val));
++}
++
++int
++nvpair_value_byte_array(nvpair_t *nvp, uchar_t **val, uint_t *nelem)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_BYTE_ARRAY, nelem, val));
++}
++
++int
++nvpair_value_int8_array(nvpair_t *nvp, int8_t **val, uint_t *nelem)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_INT8_ARRAY, nelem, val));
++}
++
++int
++nvpair_value_uint8_array(nvpair_t *nvp, uint8_t **val, uint_t *nelem)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_UINT8_ARRAY, nelem, val));
++}
++
++int
++nvpair_value_int16_array(nvpair_t *nvp, int16_t **val, uint_t *nelem)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_INT16_ARRAY, nelem, val));
++}
++
++int
++nvpair_value_uint16_array(nvpair_t *nvp, uint16_t **val, uint_t *nelem)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_UINT16_ARRAY, nelem, val));
++}
++
++int
++nvpair_value_int32_array(nvpair_t *nvp, int32_t **val, uint_t *nelem)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_INT32_ARRAY, nelem, val));
++}
++
++int
++nvpair_value_uint32_array(nvpair_t *nvp, uint32_t **val, uint_t *nelem)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_UINT32_ARRAY, nelem, val));
++}
++
++int
++nvpair_value_int64_array(nvpair_t *nvp, int64_t **val, uint_t *nelem)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_INT64_ARRAY, nelem, val));
++}
++
++int
++nvpair_value_uint64_array(nvpair_t *nvp, uint64_t **val, uint_t *nelem)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_UINT64_ARRAY, nelem, val));
++}
++
++int
++nvpair_value_string_array(nvpair_t *nvp, char ***val, uint_t *nelem)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_STRING_ARRAY, nelem, val));
++}
++
++int
++nvpair_value_nvlist_array(nvpair_t *nvp, nvlist_t ***val, uint_t *nelem)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_NVLIST_ARRAY, nelem, val));
++}
++
++int
++nvpair_value_hrtime(nvpair_t *nvp, hrtime_t *val)
++{
++	return (nvpair_value_common(nvp, DATA_TYPE_HRTIME, NULL, val));
++}
++
++/*
++ * Add specified pair to the list.
++ */
++int
++nvlist_add_nvpair(nvlist_t *nvl, nvpair_t *nvp)
++{
++	if (nvl == NULL || nvp == NULL)
++		return (EINVAL);
++
++	return (nvlist_add_common(nvl, NVP_NAME(nvp), NVP_TYPE(nvp),
++	    NVP_NELEM(nvp), NVP_VALUE(nvp)));
++}
++
++/*
++ * Merge the supplied nvlists and put the result in dst.
++ * The merged list will contain all names specified in both lists,
++ * the values are taken from nvl in the case of duplicates.
++ * Return 0 on success.
++ */
++/*ARGSUSED*/
++int
++nvlist_merge(nvlist_t *dst, nvlist_t *nvl, int flag)
++{
++	if (nvl == NULL || dst == NULL)
++		return (EINVAL);
++
++	if (dst != nvl)
++		return (nvlist_copy_pairs(nvl, dst));
++
++	return (0);
++}
++
++/*
++ * Encoding related routines
++ */
++#define	NVS_OP_ENCODE	0
++#define	NVS_OP_DECODE	1
++#define	NVS_OP_GETSIZE	2
++
++typedef struct nvs_ops nvs_ops_t;
++
++typedef struct {
++	int		nvs_op;
++	const nvs_ops_t	*nvs_ops;
++	void		*nvs_private;
++	nvpriv_t	*nvs_priv;
++} nvstream_t;
++
++/*
++ * nvs operations are:
++ *   - nvs_nvlist
++ *     encoding / decoding of a nvlist header (nvlist_t)
++ *     calculates the size used for header and end detection
++ *
++ *   - nvs_nvpair
++ *     responsible for the first part of encoding / decoding of an nvpair
++ *     calculates the decoded size of an nvpair
++ *
++ *   - nvs_nvp_op
++ *     second part of encoding / decoding of an nvpair
++ *
++ *   - nvs_nvp_size
++ *     calculates the encoding size of an nvpair
++ *
++ *   - nvs_nvl_fini
++ *     encodes the end detection mark (zeros).
++ */
++struct nvs_ops {
++	int (*nvs_nvlist)(nvstream_t *, nvlist_t *, size_t *);
++	int (*nvs_nvpair)(nvstream_t *, nvpair_t *, size_t *);
++	int (*nvs_nvp_op)(nvstream_t *, nvpair_t *);
++	int (*nvs_nvp_size)(nvstream_t *, nvpair_t *, size_t *);
++	int (*nvs_nvl_fini)(nvstream_t *);
++};
++
++typedef struct {
++	char	nvh_encoding;	/* nvs encoding method */
++	char	nvh_endian;	/* nvs endian */
++	char	nvh_reserved1;	/* reserved for future use */
++	char	nvh_reserved2;	/* reserved for future use */
++} nvs_header_t;
++
++static int
++nvs_encode_pairs(nvstream_t *nvs, nvlist_t *nvl)
++{
++	nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
++	i_nvp_t *curr;
++
++	/*
++	 * Walk nvpair in list and encode each nvpair
++	 */
++	for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next)
++		if (nvs->nvs_ops->nvs_nvpair(nvs, &curr->nvi_nvp, NULL) != 0)
++			return (EFAULT);
++
++	return (nvs->nvs_ops->nvs_nvl_fini(nvs));
++}
++
++static int
++nvs_decode_pairs(nvstream_t *nvs, nvlist_t *nvl)
++{
++	nvpair_t *nvp;
++	size_t nvsize;
++	int err;
++
++	/*
++	 * Get decoded size of next pair in stream, alloc
++	 * memory for nvpair_t, then decode the nvpair
++	 */
++	while ((err = nvs->nvs_ops->nvs_nvpair(nvs, NULL, &nvsize)) == 0) {
++		if (nvsize == 0) /* end of list */
++			break;
++
++		/* make sure len makes sense */
++		if (nvsize < NVP_SIZE_CALC(1, 0))
++			return (EFAULT);
++
++		if ((nvp = nvp_buf_alloc(nvl, nvsize)) == NULL)
++			return (ENOMEM);
++
++		if ((err = nvs->nvs_ops->nvs_nvp_op(nvs, nvp)) != 0) {
++			nvp_buf_free(nvl, nvp);
++			return (err);
++		}
++
++		if (i_validate_nvpair(nvp) != 0) {
++			nvpair_free(nvp);
++			nvp_buf_free(nvl, nvp);
++			return (EFAULT);
++		}
++
++		nvp_buf_link(nvl, nvp);
++	}
++	return (err);
++}
++
++static int
++nvs_getsize_pairs(nvstream_t *nvs, nvlist_t *nvl, size_t *buflen)
++{
++	nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
++	i_nvp_t *curr;
++	uint64_t nvsize = *buflen;
++	size_t size;
++
++	/*
++	 * Get encoded size of nvpairs in nvlist
++	 */
++	for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
++		if (nvs->nvs_ops->nvs_nvp_size(nvs, &curr->nvi_nvp, &size) != 0)
++			return (EINVAL);
++
++		if ((nvsize += size) > INT32_MAX)
++			return (EINVAL);
++	}
++
++	*buflen = nvsize;
++	return (0);
++}
++
++static int
++nvs_operation(nvstream_t *nvs, nvlist_t *nvl, size_t *buflen)
++{
++	int err;
++
++	if (nvl->nvl_priv == 0)
++		return (EFAULT);
++
++	/*
++	 * Perform the operation, starting with header, then each nvpair
++	 */
++	if ((err = nvs->nvs_ops->nvs_nvlist(nvs, nvl, buflen)) != 0)
++		return (err);
++
++	switch (nvs->nvs_op) {
++	case NVS_OP_ENCODE:
++		err = nvs_encode_pairs(nvs, nvl);
++		break;
++
++	case NVS_OP_DECODE:
++		err = nvs_decode_pairs(nvs, nvl);
++		break;
++
++	case NVS_OP_GETSIZE:
++		err = nvs_getsize_pairs(nvs, nvl, buflen);
++		break;
++
++	default:
++		err = EINVAL;
++	}
++
++	return (err);
++}
++
++static int
++nvs_embedded(nvstream_t *nvs, nvlist_t *embedded)
++{
++	switch (nvs->nvs_op) {
++	case NVS_OP_ENCODE:
++		return (nvs_operation(nvs, embedded, NULL));
++
++	case NVS_OP_DECODE: {
++		nvpriv_t *priv;
++		int err;
++
++		if (embedded->nvl_version != NV_VERSION)
++			return (ENOTSUP);
++
++		if ((priv = nv_priv_alloc_embedded(nvs->nvs_priv)) == NULL)
++			return (ENOMEM);
++
++		nvlist_init(embedded, embedded->nvl_nvflag, priv);
++
++		if ((err = nvs_operation(nvs, embedded, NULL)) != 0)
++			nvlist_free(embedded);
++		return (err);
++	}
++	default:
++		break;
++	}
++
++	return (EINVAL);
++}
++
++static int
++nvs_embedded_nvl_array(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
++{
++	size_t nelem = NVP_NELEM(nvp);
++	nvlist_t **nvlp = EMBEDDED_NVL_ARRAY(nvp);
++	int i;
++
++	switch (nvs->nvs_op) {
++	case NVS_OP_ENCODE:
++		for (i = 0; i < nelem; i++)
++			if (nvs_embedded(nvs, nvlp[i]) != 0)
++				return (EFAULT);
++		break;
++
++	case NVS_OP_DECODE: {
++		size_t len = nelem * sizeof (uint64_t);
++		nvlist_t *embedded = (nvlist_t *)((uintptr_t)nvlp + len);
++
++		bzero(nvlp, len);	/* don't trust packed data */
++		for (i = 0; i < nelem; i++) {
++			if (nvs_embedded(nvs, embedded) != 0) {
++				nvpair_free(nvp);
++				return (EFAULT);
++			}
++
++			nvlp[i] = embedded++;
++		}
++		break;
++	}
++	case NVS_OP_GETSIZE: {
++		uint64_t nvsize = 0;
++
++		for (i = 0; i < nelem; i++) {
++			size_t nvp_sz = 0;
++
++			if (nvs_operation(nvs, nvlp[i], &nvp_sz) != 0)
++				return (EINVAL);
++
++			if ((nvsize += nvp_sz) > INT32_MAX)
++				return (EINVAL);
++		}
++
++		*size = nvsize;
++		break;
++	}
++	default:
++		return (EINVAL);
++	}
++
++	return (0);
++}
++
++static int nvs_native(nvstream_t *, nvlist_t *, char *, size_t *);
++static int nvs_xdr(nvstream_t *, nvlist_t *, char *, size_t *);
++
++/*
++ * Common routine for nvlist operations:
++ * encode, decode, getsize (encoded size).
++ */
++static int
++nvlist_common(nvlist_t *nvl, char *buf, size_t *buflen, int encoding,
++    int nvs_op)
++{
++	int err = 0;
++	nvstream_t nvs;
++	int nvl_endian;
++#ifdef	_LITTLE_ENDIAN
++	int host_endian = 1;
++#else
++	int host_endian = 0;
++#endif	/* _LITTLE_ENDIAN */
++	nvs_header_t *nvh = (void *)buf;
++
++	if (buflen == NULL || nvl == NULL ||
++	    (nvs.nvs_priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
++		return (EINVAL);
++
++	nvs.nvs_op = nvs_op;
++
++	/*
++	 * For NVS_OP_ENCODE and NVS_OP_DECODE make sure an nvlist and
++	 * a buffer is allocated.  The first 4 bytes in the buffer are
++	 * used for encoding method and host endian.
++	 */
++	switch (nvs_op) {
++	case NVS_OP_ENCODE:
++		if (buf == NULL || *buflen < sizeof (nvs_header_t))
++			return (EINVAL);
++
++		nvh->nvh_encoding = encoding;
++		nvh->nvh_endian = nvl_endian = host_endian;
++		nvh->nvh_reserved1 = 0;
++		nvh->nvh_reserved2 = 0;
++		break;
++
++	case NVS_OP_DECODE:
++		if (buf == NULL || *buflen < sizeof (nvs_header_t))
++			return (EINVAL);
++
++		/* get method of encoding from first byte */
++		encoding = nvh->nvh_encoding;
++		nvl_endian = nvh->nvh_endian;
++		break;
++
++	case NVS_OP_GETSIZE:
++		nvl_endian = host_endian;
++
++		/*
++		 * add the size for encoding
++		 */
++		*buflen = sizeof (nvs_header_t);
++		break;
++
++	default:
++		return (ENOTSUP);
++	}
++
++	/*
++	 * Create an nvstream with proper encoding method
++	 */
++	switch (encoding) {
++	case NV_ENCODE_NATIVE:
++		/*
++		 * check endianness, in case we are unpacking
++		 * from a file
++		 */
++		if (nvl_endian != host_endian)
++			return (ENOTSUP);
++		err = nvs_native(&nvs, nvl, buf, buflen);
++		break;
++	case NV_ENCODE_XDR:
++		err = nvs_xdr(&nvs, nvl, buf, buflen);
++		break;
++	default:
++		err = ENOTSUP;
++		break;
++	}
++
++	return (err);
++}
++
++int
++nvlist_size(nvlist_t *nvl, size_t *size, int encoding)
++{
++	return (nvlist_common(nvl, NULL, size, encoding, NVS_OP_GETSIZE));
++}
++
++/*
++ * Pack nvlist into contiguous memory
++ */
++/*ARGSUSED1*/
++int
++nvlist_pack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding,
++    int kmflag)
++{
++#if defined(_KERNEL) && !defined(_BOOT)
++	return (nvlist_xpack(nvl, bufp, buflen, encoding,
++	    (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
++#else
++	return (nvlist_xpack(nvl, bufp, buflen, encoding, nv_alloc_nosleep));
++#endif
++}
++
++int
++nvlist_xpack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding,
++    nv_alloc_t *nva)
++{
++	nvpriv_t nvpriv;
++	size_t alloc_size;
++	char *buf;
++	int err;
++
++	if (nva == NULL || nvl == NULL || bufp == NULL || buflen == NULL)
++		return (EINVAL);
++
++	if (*bufp != NULL)
++		return (nvlist_common(nvl, *bufp, buflen, encoding,
++		    NVS_OP_ENCODE));
++
++	/*
++	 * Here is a difficult situation:
++	 * 1. The nvlist has fixed allocator properties.
++	 *    All other nvlist routines (like nvlist_add_*, ...) use
++	 *    these properties.
++	 * 2. When using nvlist_pack() the user can specify his own
++	 *    allocator properties (e.g. by using KM_NOSLEEP).
++	 *
++	 * We use the user specified properties (2). A clearer solution
++	 * will be to remove the kmflag from nvlist_pack(), but we will
++	 * not change the interface.
++	 */
++	nv_priv_init(&nvpriv, nva, 0);
++
++	if ((err = nvlist_size(nvl, &alloc_size, encoding)))
++		return (err);
++
++	if ((buf = nv_mem_zalloc(&nvpriv, alloc_size)) == NULL)
++		return (ENOMEM);
++
++	if ((err = nvlist_common(nvl, buf, &alloc_size, encoding,
++	    NVS_OP_ENCODE)) != 0) {
++		nv_mem_free(&nvpriv, buf, alloc_size);
++	} else {
++		*buflen = alloc_size;
++		*bufp = buf;
++	}
++
++	return (err);
++}
++
++/*
++ * Unpack buf into an nvlist_t
++ */
++/*ARGSUSED1*/
++int
++nvlist_unpack(char *buf, size_t buflen, nvlist_t **nvlp, int kmflag)
++{
++#if defined(_KERNEL) && !defined(_BOOT)
++	return (nvlist_xunpack(buf, buflen, nvlp,
++	    (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
++#else
++	return (nvlist_xunpack(buf, buflen, nvlp, nv_alloc_nosleep));
++#endif
++}
++
++int
++nvlist_xunpack(char *buf, size_t buflen, nvlist_t **nvlp, nv_alloc_t *nva)
++{
++	nvlist_t *nvl;
++	int err;
++
++	if (nvlp == NULL)
++		return (EINVAL);
++
++	if ((err = nvlist_xalloc(&nvl, 0, nva)) != 0)
++		return (err);
++
++	if ((err = nvlist_common(nvl, buf, &buflen, 0, NVS_OP_DECODE)) != 0)
++		nvlist_free(nvl);
++	else
++		*nvlp = nvl;
++
++	return (err);
++}
++
++/*
++ * Native encoding functions
++ */
++typedef struct {
++	/*
++	 * This structure is used when decoding a packed nvpair in
++	 * the native format.  n_base points to a buffer containing the
++	 * packed nvpair.  n_end is a pointer to the end of the buffer.
++	 * (n_end actually points to the first byte past the end of the
++	 * buffer.)  n_curr is a pointer that lies between n_base and n_end.
++	 * It points to the current data that we are decoding.
++	 * The amount of data left in the buffer is equal to n_end - n_curr.
++	 * n_flag is used to recognize a packed embedded list.
++	 */
++	caddr_t n_base;
++	caddr_t n_end;
++	caddr_t n_curr;
++	uint_t  n_flag;
++} nvs_native_t;
++
++static int
++nvs_native_create(nvstream_t *nvs, nvs_native_t *native, char *buf,
++    size_t buflen)
++{
++	switch (nvs->nvs_op) {
++	case NVS_OP_ENCODE:
++	case NVS_OP_DECODE:
++		nvs->nvs_private = native;
++		native->n_curr = native->n_base = buf;
++		native->n_end = buf + buflen;
++		native->n_flag = 0;
++		return (0);
++
++	case NVS_OP_GETSIZE:
++		nvs->nvs_private = native;
++		native->n_curr = native->n_base = native->n_end = NULL;
++		native->n_flag = 0;
++		return (0);
++	default:
++		return (EINVAL);
++	}
++}
++
++/*ARGSUSED*/
++static void
++nvs_native_destroy(nvstream_t *nvs)
++{
++}
++
++static int
++native_cp(nvstream_t *nvs, void *buf, size_t size)
++{
++	nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
++
++	if (native->n_curr + size > native->n_end)
++		return (EFAULT);
++
++	/*
++	 * The bcopy() below eliminates alignment requirement
++	 * on the buffer (stream) and is preferred over direct access.
++	 */
++	switch (nvs->nvs_op) {
++	case NVS_OP_ENCODE:
++		bcopy(buf, native->n_curr, size);
++		break;
++	case NVS_OP_DECODE:
++		bcopy(native->n_curr, buf, size);
++		break;
++	default:
++		return (EINVAL);
++	}
++
++	native->n_curr += size;
++	return (0);
++}
++
++/*
++ * operate on nvlist_t header
++ */
++static int
++nvs_native_nvlist(nvstream_t *nvs, nvlist_t *nvl, size_t *size)
++{
++	nvs_native_t *native = nvs->nvs_private;
++
++	switch (nvs->nvs_op) {
++	case NVS_OP_ENCODE:
++	case NVS_OP_DECODE:
++		if (native->n_flag)
++			return (0);	/* packed embedded list */
++
++		native->n_flag = 1;
++
++		/* copy version and nvflag of the nvlist_t */
++		if (native_cp(nvs, &nvl->nvl_version, sizeof (int32_t)) != 0 ||
++		    native_cp(nvs, &nvl->nvl_nvflag, sizeof (int32_t)) != 0)
++			return (EFAULT);
++
++		return (0);
++
++	case NVS_OP_GETSIZE:
++		/*
++		 * if calculate for packed embedded list
++		 * 	4 for end of the embedded list
++		 * else
++		 * 	2 * sizeof (int32_t) for nvl_version and nvl_nvflag
++		 * 	and 4 for end of the entire list
++		 */
++		if (native->n_flag) {
++			*size += 4;
++		} else {
++			native->n_flag = 1;
++			*size += 2 * sizeof (int32_t) + 4;
++		}
++
++		return (0);
++
++	default:
++		return (EINVAL);
++	}
++}
++
++static int
++nvs_native_nvl_fini(nvstream_t *nvs)
++{
++	if (nvs->nvs_op == NVS_OP_ENCODE) {
++		nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
++		/*
++		 * Add 4 zero bytes at end of nvlist. They are used
++		 * for end detection by the decode routine.
++		 */
++		if (native->n_curr + sizeof (int) > native->n_end)
++			return (EFAULT);
++
++		bzero(native->n_curr, sizeof (int));
++		native->n_curr += sizeof (int);
++	}
++
++	return (0);
++}
++
++static int
++nvpair_native_embedded(nvstream_t *nvs, nvpair_t *nvp)
++{
++	if (nvs->nvs_op == NVS_OP_ENCODE) {
++		nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
++		nvlist_t *packed = (void *)
++		    (native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp));
++		/*
++		 * Null out the pointer that is meaningless in the packed
++		 * structure. The address may not be aligned, so we have
++		 * to use bzero.
++		 */
++		bzero(&packed->nvl_priv, sizeof (packed->nvl_priv));
++	}
++
++	return (nvs_embedded(nvs, EMBEDDED_NVL(nvp)));
++}
++
++static int
++nvpair_native_embedded_array(nvstream_t *nvs, nvpair_t *nvp)
++{
++	if (nvs->nvs_op == NVS_OP_ENCODE) {
++		nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
++		char *value = native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp);
++		size_t len = NVP_NELEM(nvp) * sizeof (uint64_t);
++		nvlist_t *packed = (nvlist_t *)((uintptr_t)value + len);
++		int i;
++		/*
++		 * Null out pointers that are meaningless in the packed
++		 * structure. The addresses may not be aligned, so we have
++		 * to use bzero.
++		 */
++		bzero(value, len);
++
++		for (i = 0; i < NVP_NELEM(nvp); i++, packed++)
++			/*
++			 * Null out the pointer that is meaningless in the
++			 * packed structure. The address may not be aligned,
++			 * so we have to use bzero.
++			 */
++			bzero(&packed->nvl_priv, sizeof (packed->nvl_priv));
++	}
++
++	return (nvs_embedded_nvl_array(nvs, nvp, NULL));
++}
++
++static void
++nvpair_native_string_array(nvstream_t *nvs, nvpair_t *nvp)
++{
++	switch (nvs->nvs_op) {
++	case NVS_OP_ENCODE: {
++		nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
++		uint64_t *strp = (void *)
++		    (native->n_curr - nvp->nvp_size + NVP_VALOFF(nvp));
++		/*
++		 * Null out pointers that are meaningless in the packed
++		 * structure. The addresses may not be aligned, so we have
++		 * to use bzero.
++		 */
++		bzero(strp, NVP_NELEM(nvp) * sizeof (uint64_t));
++		break;
++	}
++	case NVS_OP_DECODE: {
++		char **strp = (void *)NVP_VALUE(nvp);
++		char *buf = ((char *)strp + NVP_NELEM(nvp) * sizeof (uint64_t));
++		int i;
++
++		for (i = 0; i < NVP_NELEM(nvp); i++) {
++			strp[i] = buf;
++			buf += strlen(buf) + 1;
++		}
++		break;
++	}
++	}
++}
++
++static int
++nvs_native_nvp_op(nvstream_t *nvs, nvpair_t *nvp)
++{
++	data_type_t type;
++	int value_sz;
++	int ret = 0;
++
++	/*
++	 * We do the initial bcopy of the data before we look at
++	 * the nvpair type, because when we're decoding, we won't
++	 * have the correct values for the pair until we do the bcopy.
++	 */
++	switch (nvs->nvs_op) {
++	case NVS_OP_ENCODE:
++	case NVS_OP_DECODE:
++		if (native_cp(nvs, nvp, nvp->nvp_size) != 0)
++			return (EFAULT);
++		break;
++	default:
++		return (EINVAL);
++	}
++
++	/* verify nvp_name_sz, check the name string length */
++	if (i_validate_nvpair_name(nvp) != 0)
++		return (EFAULT);
++
++	type = NVP_TYPE(nvp);
++
++	/*
++	 * Verify type and nelem and get the value size.
++	 * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
++	 * is the size of the string(s) excluded.
++	 */
++	if ((value_sz = i_get_value_size(type, NULL, NVP_NELEM(nvp))) < 0)
++		return (EFAULT);
++
++	if (NVP_SIZE_CALC(nvp->nvp_name_sz, value_sz) > nvp->nvp_size)
++		return (EFAULT);
++
++	switch (type) {
++	case DATA_TYPE_NVLIST:
++		ret = nvpair_native_embedded(nvs, nvp);
++		break;
++	case DATA_TYPE_NVLIST_ARRAY:
++		ret = nvpair_native_embedded_array(nvs, nvp);
++		break;
++	case DATA_TYPE_STRING_ARRAY:
++		nvpair_native_string_array(nvs, nvp);
++		break;
++	default:
++		break;
++	}
++
++	return (ret);
++}
++
++static int
++nvs_native_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
++{
++	uint64_t nvp_sz = nvp->nvp_size;
++
++	switch (NVP_TYPE(nvp)) {
++	case DATA_TYPE_NVLIST: {
++		size_t nvsize = 0;
++
++		if (nvs_operation(nvs, EMBEDDED_NVL(nvp), &nvsize) != 0)
++			return (EINVAL);
++
++		nvp_sz += nvsize;
++		break;
++	}
++	case DATA_TYPE_NVLIST_ARRAY: {
++		size_t nvsize;
++
++		if (nvs_embedded_nvl_array(nvs, nvp, &nvsize) != 0)
++			return (EINVAL);
++
++		nvp_sz += nvsize;
++		break;
++	}
++	default:
++		break;
++	}
++
++	if (nvp_sz > INT32_MAX)
++		return (EINVAL);
++
++	*size = nvp_sz;
++
++	return (0);
++}
++
++static int
++nvs_native_nvpair(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
++{
++	switch (nvs->nvs_op) {
++	case NVS_OP_ENCODE:
++		return (nvs_native_nvp_op(nvs, nvp));
++
++	case NVS_OP_DECODE: {
++		nvs_native_t *native = (nvs_native_t *)nvs->nvs_private;
++		int32_t decode_len;
++
++		/* try to read the size value from the stream */
++		if (native->n_curr + sizeof (int32_t) > native->n_end)
++			return (EFAULT);
++		bcopy(native->n_curr, &decode_len, sizeof (int32_t));
++
++		/* sanity check the size value */
++		if (decode_len < 0 ||
++		    decode_len > native->n_end - native->n_curr)
++			return (EFAULT);
++
++		*size = decode_len;
++
++		/*
++		 * If at the end of the stream then move the cursor
++		 * forward, otherwise nvpair_native_op() will read
++		 * the entire nvpair at the same cursor position.
++		 */
++		if (*size == 0)
++			native->n_curr += sizeof (int32_t);
++		break;
++	}
++
++	default:
++		return (EINVAL);
++	}
++
++	return (0);
++}
++
++static const nvs_ops_t nvs_native_ops = {
++	nvs_native_nvlist,
++	nvs_native_nvpair,
++	nvs_native_nvp_op,
++	nvs_native_nvp_size,
++	nvs_native_nvl_fini
++};
++
++static int
++nvs_native(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen)
++{
++	nvs_native_t native;
++	int err;
++
++	nvs->nvs_ops = &nvs_native_ops;
++
++	if ((err = nvs_native_create(nvs, &native, buf + sizeof (nvs_header_t),
++	    *buflen - sizeof (nvs_header_t))) != 0)
++		return (err);
++
++	err = nvs_operation(nvs, nvl, buflen);
++
++	nvs_native_destroy(nvs);
++
++	return (err);
++}
++
++/*
++ * XDR encoding functions
++ *
++ * An xdr packed nvlist is encoded as:
++ *
++ *  - encoding methode and host endian (4 bytes)
++ *  - nvl_version (4 bytes)
++ *  - nvl_nvflag (4 bytes)
++ *
++ *  - encoded nvpairs, the format of one xdr encoded nvpair is:
++ *	- encoded size of the nvpair (4 bytes)
++ *	- decoded size of the nvpair (4 bytes)
++ *	- name string, (4 + sizeof(NV_ALIGN4(string))
++ *	  a string is coded as size (4 bytes) and data
++ *	- data type (4 bytes)
++ *	- number of elements in the nvpair (4 bytes)
++ *	- data
++ *
++ *  - 2 zero's for end of the entire list (8 bytes)
++ */
++static int
++nvs_xdr_create(nvstream_t *nvs, XDR *xdr, char *buf, size_t buflen)
++{
++	/* xdr data must be 4 byte aligned */
++	if ((ulong_t)buf % 4 != 0)
++		return (EFAULT);
++
++	switch (nvs->nvs_op) {
++	case NVS_OP_ENCODE:
++		xdrmem_create(xdr, buf, (uint_t)buflen, XDR_ENCODE);
++		nvs->nvs_private = xdr;
++		return (0);
++	case NVS_OP_DECODE:
++		xdrmem_create(xdr, buf, (uint_t)buflen, XDR_DECODE);
++		nvs->nvs_private = xdr;
++		return (0);
++	case NVS_OP_GETSIZE:
++		nvs->nvs_private = NULL;
++		return (0);
++	default:
++		return (EINVAL);
++	}
++}
++
++static void
++nvs_xdr_destroy(nvstream_t *nvs)
++{
++	switch (nvs->nvs_op) {
++	case NVS_OP_ENCODE:
++	case NVS_OP_DECODE:
++		xdr_destroy((XDR *)nvs->nvs_private);
++		break;
++	default:
++		break;
++	}
++}
++
++static int
++nvs_xdr_nvlist(nvstream_t *nvs, nvlist_t *nvl, size_t *size)
++{
++	switch (nvs->nvs_op) {
++	case NVS_OP_ENCODE:
++	case NVS_OP_DECODE: {
++		XDR 	*xdr = nvs->nvs_private;
++
++		if (!xdr_int(xdr, &nvl->nvl_version) ||
++		    !xdr_u_int(xdr, &nvl->nvl_nvflag))
++			return (EFAULT);
++		break;
++	}
++	case NVS_OP_GETSIZE: {
++		/*
++		 * 2 * 4 for nvl_version + nvl_nvflag
++		 * and 8 for end of the entire list
++		 */
++		*size += 2 * 4 + 8;
++		break;
++	}
++	default:
++		return (EINVAL);
++	}
++	return (0);
++}
++
++static int
++nvs_xdr_nvl_fini(nvstream_t *nvs)
++{
++	if (nvs->nvs_op == NVS_OP_ENCODE) {
++		XDR *xdr = nvs->nvs_private;
++		int zero = 0;
++
++		if (!xdr_int(xdr, &zero) || !xdr_int(xdr, &zero))
++			return (EFAULT);
++	}
++
++	return (0);
++}
++
++/*
++ * The format of xdr encoded nvpair is:
++ * encode_size, decode_size, name string, data type, nelem, data
++ */
++static int
++nvs_xdr_nvp_op(nvstream_t *nvs, nvpair_t *nvp)
++{
++	data_type_t type;
++	char	*buf;
++	char	*buf_end = (char *)nvp + nvp->nvp_size;
++	int	value_sz;
++	uint_t	nelem, buflen;
++	bool_t	ret = FALSE;
++	XDR	*xdr = nvs->nvs_private;
++
++	ASSERT(xdr != NULL && nvp != NULL);
++
++	/* name string */
++	if ((buf = NVP_NAME(nvp)) >= buf_end)
++		return (EFAULT);
++	buflen = buf_end - buf;
++
++	if (!xdr_string(xdr, &buf, buflen - 1))
++		return (EFAULT);
++	nvp->nvp_name_sz = strlen(buf) + 1;
++
++	/* type and nelem */
++	if (!xdr_int(xdr, (int *)&nvp->nvp_type) ||
++	    !xdr_int(xdr, &nvp->nvp_value_elem))
++		return (EFAULT);
++
++	type = NVP_TYPE(nvp);
++	nelem = nvp->nvp_value_elem;
++
++	/*
++	 * Verify type and nelem and get the value size.
++	 * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
++	 * is the size of the string(s) excluded.
++	 */
++	if ((value_sz = i_get_value_size(type, NULL, nelem)) < 0)
++		return (EFAULT);
++
++	/* if there is no data to extract then return */
++	if (nelem == 0)
++		return (0);
++
++	/* value */
++	if ((buf = NVP_VALUE(nvp)) >= buf_end)
++		return (EFAULT);
++	buflen = buf_end - buf;
++
++	if (buflen < value_sz)
++		return (EFAULT);
++
++	switch (type) {
++	case DATA_TYPE_NVLIST:
++		if (nvs_embedded(nvs, (void *)buf) == 0)
++			return (0);
++		break;
++
++	case DATA_TYPE_NVLIST_ARRAY:
++		if (nvs_embedded_nvl_array(nvs, nvp, NULL) == 0)
++			return (0);
++		break;
++
++	case DATA_TYPE_BOOLEAN:
++		ret = TRUE;
++		break;
++
++	case DATA_TYPE_BYTE:
++	case DATA_TYPE_INT8:
++	case DATA_TYPE_UINT8:
++		ret = xdr_char(xdr, buf);
++		break;
++
++	case DATA_TYPE_INT16:
++		ret = xdr_short(xdr, (void *)buf);
++		break;
++
++	case DATA_TYPE_UINT16:
++		ret = xdr_u_short(xdr, (void *)buf);
++		break;
++
++	case DATA_TYPE_BOOLEAN_VALUE:
++	case DATA_TYPE_INT32:
++		ret = xdr_int(xdr, (void *)buf);
++		break;
++
++	case DATA_TYPE_UINT32:
++		ret = xdr_u_int(xdr, (void *)buf);
++		break;
++
++	case DATA_TYPE_INT64:
++		ret = xdr_longlong_t(xdr, (void *)buf);
++		break;
++
++	case DATA_TYPE_UINT64:
++		ret = xdr_u_longlong_t(xdr, (void *)buf);
++		break;
++
++	case DATA_TYPE_HRTIME:
++		/*
++		 * NOTE: must expose the definition of hrtime_t here
++		 */
++		ret = xdr_longlong_t(xdr, (void *)buf);
++		break;
++#if !defined(_KERNEL)
++	case DATA_TYPE_DOUBLE:
++		ret = xdr_double(xdr, (void *)buf);
++		break;
++#endif
++	case DATA_TYPE_STRING:
++		ret = xdr_string(xdr, &buf, buflen - 1);
++		break;
++
++	case DATA_TYPE_BYTE_ARRAY:
++		ret = xdr_opaque(xdr, buf, nelem);
++		break;
++
++	case DATA_TYPE_INT8_ARRAY:
++	case DATA_TYPE_UINT8_ARRAY:
++		ret = xdr_array(xdr, &buf, &nelem, buflen, sizeof (int8_t),
++		    (xdrproc_t)xdr_char);
++		break;
++
++	case DATA_TYPE_INT16_ARRAY:
++		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int16_t),
++		    sizeof (int16_t), (xdrproc_t)xdr_short);
++		break;
++
++	case DATA_TYPE_UINT16_ARRAY:
++		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint16_t),
++		    sizeof (uint16_t), (xdrproc_t)xdr_u_short);
++		break;
++
++	case DATA_TYPE_BOOLEAN_ARRAY:
++	case DATA_TYPE_INT32_ARRAY:
++		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int32_t),
++		    sizeof (int32_t), (xdrproc_t)xdr_int);
++		break;
++
++	case DATA_TYPE_UINT32_ARRAY:
++		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint32_t),
++		    sizeof (uint32_t), (xdrproc_t)xdr_u_int);
++		break;
++
++	case DATA_TYPE_INT64_ARRAY:
++		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (int64_t),
++		    sizeof (int64_t), (xdrproc_t)xdr_longlong_t);
++		break;
++
++	case DATA_TYPE_UINT64_ARRAY:
++		ret = xdr_array(xdr, &buf, &nelem, buflen / sizeof (uint64_t),
++		    sizeof (uint64_t), (xdrproc_t)xdr_u_longlong_t);
++		break;
++
++	case DATA_TYPE_STRING_ARRAY: {
++		size_t len = nelem * sizeof (uint64_t);
++		char **strp = (void *)buf;
++		int i;
++
++		if (nvs->nvs_op == NVS_OP_DECODE)
++			bzero(buf, len);	/* don't trust packed data */
++
++		for (i = 0; i < nelem; i++) {
++			if (buflen <= len)
++				return (EFAULT);
++
++			buf += len;
++			buflen -= len;
++
++			if (xdr_string(xdr, &buf, buflen - 1) != TRUE)
++				return (EFAULT);
++
++			if (nvs->nvs_op == NVS_OP_DECODE)
++				strp[i] = buf;
++			len = strlen(buf) + 1;
++		}
++		ret = TRUE;
++		break;
++	}
++	default:
++		break;
++	}
++
++	return (ret == TRUE ? 0 : EFAULT);
++}
++
++static int
++nvs_xdr_nvp_size(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
++{
++	data_type_t type = NVP_TYPE(nvp);
++	/*
++	 * encode_size + decode_size + name string size + data type + nelem
++	 * where name string size = 4 + NV_ALIGN4(strlen(NVP_NAME(nvp)))
++	 */
++	uint64_t nvp_sz = 4 + 4 + 4 + NV_ALIGN4(strlen(NVP_NAME(nvp))) + 4 + 4;
++
++	switch (type) {
++	case DATA_TYPE_BOOLEAN:
++		break;
++
++	case DATA_TYPE_BOOLEAN_VALUE:
++	case DATA_TYPE_BYTE:
++	case DATA_TYPE_INT8:
++	case DATA_TYPE_UINT8:
++	case DATA_TYPE_INT16:
++	case DATA_TYPE_UINT16:
++	case DATA_TYPE_INT32:
++	case DATA_TYPE_UINT32:
++		nvp_sz += 4;	/* 4 is the minimum xdr unit */
++		break;
++
++	case DATA_TYPE_INT64:
++	case DATA_TYPE_UINT64:
++	case DATA_TYPE_HRTIME:
++#if !defined(_KERNEL)
++	case DATA_TYPE_DOUBLE:
++#endif
++		nvp_sz += 8;
++		break;
++
++	case DATA_TYPE_STRING:
++		nvp_sz += 4 + NV_ALIGN4(strlen((char *)NVP_VALUE(nvp)));
++		break;
++
++	case DATA_TYPE_BYTE_ARRAY:
++		nvp_sz += NV_ALIGN4(NVP_NELEM(nvp));
++		break;
++
++	case DATA_TYPE_BOOLEAN_ARRAY:
++	case DATA_TYPE_INT8_ARRAY:
++	case DATA_TYPE_UINT8_ARRAY:
++	case DATA_TYPE_INT16_ARRAY:
++	case DATA_TYPE_UINT16_ARRAY:
++	case DATA_TYPE_INT32_ARRAY:
++	case DATA_TYPE_UINT32_ARRAY:
++		nvp_sz += 4 + 4 * (uint64_t)NVP_NELEM(nvp);
++		break;
++
++	case DATA_TYPE_INT64_ARRAY:
++	case DATA_TYPE_UINT64_ARRAY:
++		nvp_sz += 4 + 8 * (uint64_t)NVP_NELEM(nvp);
++		break;
++
++	case DATA_TYPE_STRING_ARRAY: {
++		int i;
++		char **strs = (void *)NVP_VALUE(nvp);
++
++		for (i = 0; i < NVP_NELEM(nvp); i++)
++			nvp_sz += 4 + NV_ALIGN4(strlen(strs[i]));
++
++		break;
++	}
++
++	case DATA_TYPE_NVLIST:
++	case DATA_TYPE_NVLIST_ARRAY: {
++		size_t nvsize = 0;
++		int old_nvs_op = nvs->nvs_op;
++		int err;
++
++		nvs->nvs_op = NVS_OP_GETSIZE;
++		if (type == DATA_TYPE_NVLIST)
++			err = nvs_operation(nvs, EMBEDDED_NVL(nvp), &nvsize);
++		else
++			err = nvs_embedded_nvl_array(nvs, nvp, &nvsize);
++		nvs->nvs_op = old_nvs_op;
++
++		if (err != 0)
++			return (EINVAL);
++
++		nvp_sz += nvsize;
++		break;
++	}
++
++	default:
++		return (EINVAL);
++	}
++
++	if (nvp_sz > INT32_MAX)
++		return (EINVAL);
++
++	*size = nvp_sz;
++
++	return (0);
++}
++
++
++/*
++ * The NVS_XDR_MAX_LEN macro takes a packed xdr buffer of size x and estimates
++ * the largest nvpair that could be encoded in the buffer.
++ *
++ * See comments above nvpair_xdr_op() for the format of xdr encoding.
++ * The size of a xdr packed nvpair without any data is 5 words.
++ *
++ * Using the size of the data directly as an estimate would be ok
++ * in all cases except one.  If the data type is of DATA_TYPE_STRING_ARRAY
++ * then the actual nvpair has space for an array of pointers to index
++ * the strings.  These pointers are not encoded into the packed xdr buffer.
++ *
++ * If the data is of type DATA_TYPE_STRING_ARRAY and all the strings are
++ * of length 0, then each string is endcoded in xdr format as a single word.
++ * Therefore when expanded to an nvpair there will be 2.25 word used for
++ * each string.  (a int64_t allocated for pointer usage, and a single char
++ * for the null termination.)
++ *
++ * This is the calculation performed by the NVS_XDR_MAX_LEN macro.
++ */
++#define	NVS_XDR_HDR_LEN		((size_t)(5 * 4))
++#define	NVS_XDR_DATA_LEN(y)	(((size_t)(y) <= NVS_XDR_HDR_LEN) ? \
++					0 : ((size_t)(y) - NVS_XDR_HDR_LEN))
++#define	NVS_XDR_MAX_LEN(x)	(NVP_SIZE_CALC(1, 0) + \
++					(NVS_XDR_DATA_LEN(x) * 2) + \
++					NV_ALIGN4((NVS_XDR_DATA_LEN(x) / 4)))
++
++static int
++nvs_xdr_nvpair(nvstream_t *nvs, nvpair_t *nvp, size_t *size)
++{
++	XDR 	*xdr = nvs->nvs_private;
++	int32_t	encode_len, decode_len;
++
++	switch (nvs->nvs_op) {
++	case NVS_OP_ENCODE: {
++		size_t nvsize;
++
++		if (nvs_xdr_nvp_size(nvs, nvp, &nvsize) != 0)
++			return (EFAULT);
++
++		decode_len = nvp->nvp_size;
++		encode_len = nvsize;
++		if (!xdr_int(xdr, &encode_len) || !xdr_int(xdr, &decode_len))
++			return (EFAULT);
++
++		return (nvs_xdr_nvp_op(nvs, nvp));
++	}
++	case NVS_OP_DECODE: {
++		struct xdr_bytesrec bytesrec;
++
++		/* get the encode and decode size */
++		if (!xdr_int(xdr, &encode_len) || !xdr_int(xdr, &decode_len))
++			return (EFAULT);
++		*size = decode_len;
++
++		/* are we at the end of the stream? */
++		if (*size == 0)
++			return (0);
++
++		/* sanity check the size parameter */
++		if (!xdr_control(xdr, XDR_GET_BYTES_AVAIL, &bytesrec))
++			return (EFAULT);
++
++		if (*size > NVS_XDR_MAX_LEN(bytesrec.xc_num_avail))
++			return (EFAULT);
++		break;
++	}
++
++	default:
++		return (EINVAL);
++	}
++	return (0);
++}
++
++static const struct nvs_ops nvs_xdr_ops = {
++	nvs_xdr_nvlist,
++	nvs_xdr_nvpair,
++	nvs_xdr_nvp_op,
++	nvs_xdr_nvp_size,
++	nvs_xdr_nvl_fini
++};
++
++static int
++nvs_xdr(nvstream_t *nvs, nvlist_t *nvl, char *buf, size_t *buflen)
++{
++	XDR xdr;
++	int err;
++
++	nvs->nvs_ops = &nvs_xdr_ops;
++
++	if ((err = nvs_xdr_create(nvs, &xdr, buf + sizeof (nvs_header_t),
++	    *buflen - sizeof (nvs_header_t))) != 0)
++		return (err);
++
++	err = nvs_operation(nvs, nvl, buflen);
++
++	nvs_xdr_destroy(nvs);
++
++	return (err);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++
++static int nvpair_init(void) { return 0; }
++static int nvpair_fini(void) { return 0; }
++
++spl_module_init(nvpair_init);
++spl_module_exit(nvpair_fini);
++
++MODULE_DESCRIPTION("Generic name/value pair implementation");
++MODULE_AUTHOR(ZFS_META_AUTHOR);
++MODULE_LICENSE(ZFS_META_LICENSE);
++
++EXPORT_SYMBOL(nv_alloc_init);
++EXPORT_SYMBOL(nv_alloc_reset);
++EXPORT_SYMBOL(nv_alloc_fini);
++
++/* list management */
++EXPORT_SYMBOL(nvlist_alloc);
++EXPORT_SYMBOL(nvlist_free);
++EXPORT_SYMBOL(nvlist_size);
++EXPORT_SYMBOL(nvlist_pack);
++EXPORT_SYMBOL(nvlist_unpack);
++EXPORT_SYMBOL(nvlist_dup);
++EXPORT_SYMBOL(nvlist_merge);
++
++EXPORT_SYMBOL(nvlist_xalloc);
++EXPORT_SYMBOL(nvlist_xpack);
++EXPORT_SYMBOL(nvlist_xunpack);
++EXPORT_SYMBOL(nvlist_xdup);
++EXPORT_SYMBOL(nvlist_lookup_nv_alloc);
++
++EXPORT_SYMBOL(nvlist_add_nvpair);
++EXPORT_SYMBOL(nvlist_add_boolean);
++EXPORT_SYMBOL(nvlist_add_boolean_value);
++EXPORT_SYMBOL(nvlist_add_byte);
++EXPORT_SYMBOL(nvlist_add_int8);
++EXPORT_SYMBOL(nvlist_add_uint8);
++EXPORT_SYMBOL(nvlist_add_int16);
++EXPORT_SYMBOL(nvlist_add_uint16);
++EXPORT_SYMBOL(nvlist_add_int32);
++EXPORT_SYMBOL(nvlist_add_uint32);
++EXPORT_SYMBOL(nvlist_add_int64);
++EXPORT_SYMBOL(nvlist_add_uint64);
++EXPORT_SYMBOL(nvlist_add_string);
++EXPORT_SYMBOL(nvlist_add_nvlist);
++EXPORT_SYMBOL(nvlist_add_boolean_array);
++EXPORT_SYMBOL(nvlist_add_byte_array);
++EXPORT_SYMBOL(nvlist_add_int8_array);
++EXPORT_SYMBOL(nvlist_add_uint8_array);
++EXPORT_SYMBOL(nvlist_add_int16_array);
++EXPORT_SYMBOL(nvlist_add_uint16_array);
++EXPORT_SYMBOL(nvlist_add_int32_array);
++EXPORT_SYMBOL(nvlist_add_uint32_array);
++EXPORT_SYMBOL(nvlist_add_int64_array);
++EXPORT_SYMBOL(nvlist_add_uint64_array);
++EXPORT_SYMBOL(nvlist_add_string_array);
++EXPORT_SYMBOL(nvlist_add_nvlist_array);
++EXPORT_SYMBOL(nvlist_next_nvpair);
++EXPORT_SYMBOL(nvlist_prev_nvpair);
++EXPORT_SYMBOL(nvlist_empty);
++EXPORT_SYMBOL(nvlist_add_hrtime);
++
++EXPORT_SYMBOL(nvlist_remove);
++EXPORT_SYMBOL(nvlist_remove_nvpair);
++EXPORT_SYMBOL(nvlist_remove_all);
++
++EXPORT_SYMBOL(nvlist_lookup_boolean);
++EXPORT_SYMBOL(nvlist_lookup_boolean_value);
++EXPORT_SYMBOL(nvlist_lookup_byte);
++EXPORT_SYMBOL(nvlist_lookup_int8);
++EXPORT_SYMBOL(nvlist_lookup_uint8);
++EXPORT_SYMBOL(nvlist_lookup_int16);
++EXPORT_SYMBOL(nvlist_lookup_uint16);
++EXPORT_SYMBOL(nvlist_lookup_int32);
++EXPORT_SYMBOL(nvlist_lookup_uint32);
++EXPORT_SYMBOL(nvlist_lookup_int64);
++EXPORT_SYMBOL(nvlist_lookup_uint64);
++EXPORT_SYMBOL(nvlist_lookup_string);
++EXPORT_SYMBOL(nvlist_lookup_nvlist);
++EXPORT_SYMBOL(nvlist_lookup_boolean_array);
++EXPORT_SYMBOL(nvlist_lookup_byte_array);
++EXPORT_SYMBOL(nvlist_lookup_int8_array);
++EXPORT_SYMBOL(nvlist_lookup_uint8_array);
++EXPORT_SYMBOL(nvlist_lookup_int16_array);
++EXPORT_SYMBOL(nvlist_lookup_uint16_array);
++EXPORT_SYMBOL(nvlist_lookup_int32_array);
++EXPORT_SYMBOL(nvlist_lookup_uint32_array);
++EXPORT_SYMBOL(nvlist_lookup_int64_array);
++EXPORT_SYMBOL(nvlist_lookup_uint64_array);
++EXPORT_SYMBOL(nvlist_lookup_string_array);
++EXPORT_SYMBOL(nvlist_lookup_nvlist_array);
++EXPORT_SYMBOL(nvlist_lookup_hrtime);
++EXPORT_SYMBOL(nvlist_lookup_pairs);
++
++EXPORT_SYMBOL(nvlist_lookup_nvpair);
++EXPORT_SYMBOL(nvlist_exists);
++
++/* processing nvpair */
++EXPORT_SYMBOL(nvpair_name);
++EXPORT_SYMBOL(nvpair_type);
++EXPORT_SYMBOL(nvpair_value_boolean_value);
++EXPORT_SYMBOL(nvpair_value_byte);
++EXPORT_SYMBOL(nvpair_value_int8);
++EXPORT_SYMBOL(nvpair_value_uint8);
++EXPORT_SYMBOL(nvpair_value_int16);
++EXPORT_SYMBOL(nvpair_value_uint16);
++EXPORT_SYMBOL(nvpair_value_int32);
++EXPORT_SYMBOL(nvpair_value_uint32);
++EXPORT_SYMBOL(nvpair_value_int64);
++EXPORT_SYMBOL(nvpair_value_uint64);
++EXPORT_SYMBOL(nvpair_value_string);
++EXPORT_SYMBOL(nvpair_value_nvlist);
++EXPORT_SYMBOL(nvpair_value_boolean_array);
++EXPORT_SYMBOL(nvpair_value_byte_array);
++EXPORT_SYMBOL(nvpair_value_int8_array);
++EXPORT_SYMBOL(nvpair_value_uint8_array);
++EXPORT_SYMBOL(nvpair_value_int16_array);
++EXPORT_SYMBOL(nvpair_value_uint16_array);
++EXPORT_SYMBOL(nvpair_value_int32_array);
++EXPORT_SYMBOL(nvpair_value_uint32_array);
++EXPORT_SYMBOL(nvpair_value_int64_array);
++EXPORT_SYMBOL(nvpair_value_uint64_array);
++EXPORT_SYMBOL(nvpair_value_string_array);
++EXPORT_SYMBOL(nvpair_value_nvlist_array);
++EXPORT_SYMBOL(nvpair_value_hrtime);
++
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/unicode/Makefile linux-3.2.33-go/fs/zfs/unicode/Makefile
+--- linux-3.2.33-go.orig/fs/zfs/unicode/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/unicode/Makefile	2012-11-16 23:25:34.362039185 +0100
+@@ -0,0 +1,8 @@
++MODULE := zunicode
++
++EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS)  -Wno-unused-but-set-variable -DHAVE_SPL -D_KERNEL -DTEXT_DOMAIN=\"zfs-linux-kernel\" -DNDEBUG 
++
++obj-$(CONFIG_ZFS) := $(MODULE).o
++
++$(MODULE)-objs += u8_textprep.o
++$(MODULE)-objs += uconv.o
+diff -uNr linux-3.2.33-go.orig/fs/zfs/unicode/Makefile.in linux-3.2.33-go/fs/zfs/unicode/Makefile.in
+--- linux-3.2.33-go.orig/fs/zfs/unicode/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/unicode/Makefile.in	2012-11-16 23:25:34.355039267 +0100
+@@ -0,0 +1,8 @@
++MODULE := zunicode
++
++EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS) @KERNELCPPFLAGS@
++
++obj-$(CONFIG_ZFS) := $(MODULE).o
++
++$(MODULE)-objs += @top_srcdir@/module/unicode/u8_textprep.o
++$(MODULE)-objs += @top_srcdir@/module/unicode/uconv.o
+diff -uNr linux-3.2.33-go.orig/fs/zfs/unicode/u8_textprep.c linux-3.2.33-go/fs/zfs/unicode/u8_textprep.c
+--- linux-3.2.33-go.orig/fs/zfs/unicode/u8_textprep.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/unicode/u8_textprep.c	2012-11-16 23:25:34.355039267 +0100
+@@ -0,0 +1,2150 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++
++
++
++/*
++ * UTF-8 text preparation functions (PSARC/2007/149, PSARC/2007/458).
++ *
++ * Man pages: u8_textprep_open(9F), u8_textprep_buf(9F), u8_textprep_close(9F),
++ * u8_textprep_str(9F), u8_strcmp(9F), and u8_validate(9F). See also
++ * the section 3C man pages.
++ * Interface stability: Committed.
++ */
++
++#include <sys/types.h>
++#ifdef	_KERNEL
++#include <sys/param.h>
++#include <sys/sysmacros.h>
++#include <sys/systm.h>
++#include <sys/debug.h>
++#include <sys/kmem.h>
++#include <sys/ddi.h>
++#include <sys/sunddi.h>
++#else
++#include <sys/u8_textprep.h>
++#include <strings.h>
++#endif	/* _KERNEL */
++#include <sys/byteorder.h>
++#include <sys/errno.h>
++#include <sys/u8_textprep_data.h>
++
++
++/* The maximum possible number of bytes in a UTF-8 character. */
++#define	U8_MB_CUR_MAX			(4)
++
++/*
++ * The maximum number of bytes needed for a UTF-8 character to cover
++ * U+0000 - U+FFFF, i.e., the coding space of now deprecated UCS-2.
++ */
++#define	U8_MAX_BYTES_UCS2		(3)
++
++/* The maximum possible number of bytes in a Stream-Safe Text. */
++#define	U8_STREAM_SAFE_TEXT_MAX		(128)
++
++/*
++ * The maximum number of characters in a combining/conjoining sequence and
++ * the actual upperbound limit of a combining/conjoining sequence.
++ */
++#define	U8_MAX_CHARS_A_SEQ		(32)
++#define	U8_UPPER_LIMIT_IN_A_SEQ		(31)
++
++/* The combining class value for Starter. */
++#define	U8_COMBINING_CLASS_STARTER	(0)
++
++/*
++ * Some Hangul related macros at below.
++ *
++ * The first and the last of Hangul syllables, Hangul Jamo Leading consonants,
++ * Vowels, and optional Trailing consonants in Unicode scalar values.
++ *
++ * Please be noted that the U8_HANGUL_JAMO_T_FIRST is 0x11A7 at below not
++ * the actual U+11A8. This is due to that the trailing consonant is optional
++ * and thus we are doing a pre-calculation of subtracting one.
++ *
++ * Each of 19 modern leading consonants has total 588 possible syllables since
++ * Hangul has 21 modern vowels and 27 modern trailing consonants plus 1 for
++ * no trailing consonant case, i.e., 21 x 28 = 588.
++ *
++ * We also have bunch of Hangul related macros at below. Please bear in mind
++ * that the U8_HANGUL_JAMO_1ST_BYTE can be used to check whether it is
++ * a Hangul Jamo or not but the value does not guarantee that it is a Hangul
++ * Jamo; it just guarantee that it will be most likely.
++ */
++#define	U8_HANGUL_SYL_FIRST		(0xAC00U)
++#define	U8_HANGUL_SYL_LAST		(0xD7A3U)
++
++#define	U8_HANGUL_JAMO_L_FIRST		(0x1100U)
++#define	U8_HANGUL_JAMO_L_LAST		(0x1112U)
++#define	U8_HANGUL_JAMO_V_FIRST		(0x1161U)
++#define	U8_HANGUL_JAMO_V_LAST		(0x1175U)
++#define	U8_HANGUL_JAMO_T_FIRST		(0x11A7U)
++#define	U8_HANGUL_JAMO_T_LAST		(0x11C2U)
++
++#define	U8_HANGUL_V_COUNT		(21)
++#define	U8_HANGUL_VT_COUNT		(588)
++#define	U8_HANGUL_T_COUNT		(28)
++
++#define	U8_HANGUL_JAMO_1ST_BYTE		(0xE1U)
++
++#define	U8_SAVE_HANGUL_AS_UTF8(s, i, j, k, b) \
++	(s)[(i)] = (uchar_t)(0xE0U | ((uint32_t)(b) & 0xF000U) >> 12); \
++	(s)[(j)] = (uchar_t)(0x80U | ((uint32_t)(b) & 0x0FC0U) >> 6); \
++	(s)[(k)] = (uchar_t)(0x80U | ((uint32_t)(b) & 0x003FU));
++
++#define	U8_HANGUL_JAMO_L(u) \
++	((u) >= U8_HANGUL_JAMO_L_FIRST && (u) <= U8_HANGUL_JAMO_L_LAST)
++
++#define	U8_HANGUL_JAMO_V(u) \
++	((u) >= U8_HANGUL_JAMO_V_FIRST && (u) <= U8_HANGUL_JAMO_V_LAST)
++
++#define	U8_HANGUL_JAMO_T(u) \
++	((u) > U8_HANGUL_JAMO_T_FIRST && (u) <= U8_HANGUL_JAMO_T_LAST)
++
++#define	U8_HANGUL_JAMO(u) \
++	((u) >= U8_HANGUL_JAMO_L_FIRST && (u) <= U8_HANGUL_JAMO_T_LAST)
++
++#define	U8_HANGUL_SYLLABLE(u) \
++	((u) >= U8_HANGUL_SYL_FIRST && (u) <= U8_HANGUL_SYL_LAST)
++
++#define	U8_HANGUL_COMPOSABLE_L_V(s, u) \
++	((s) == U8_STATE_HANGUL_L && U8_HANGUL_JAMO_V((u)))
++
++#define	U8_HANGUL_COMPOSABLE_LV_T(s, u) \
++	((s) == U8_STATE_HANGUL_LV && U8_HANGUL_JAMO_T((u)))
++
++/* The types of decomposition mappings. */
++#define	U8_DECOMP_BOTH			(0xF5U)
++#define	U8_DECOMP_CANONICAL		(0xF6U)
++
++/* The indicator for 16-bit table. */
++#define	U8_16BIT_TABLE_INDICATOR	(0x8000U)
++
++/* The following are some convenience macros. */
++#define	U8_PUT_3BYTES_INTO_UTF32(u, b1, b2, b3)  \
++	(u) = ((((uint32_t)(b1) & 0x0F) << 12) | \
++	       (((uint32_t)(b2) & 0x3F) << 6)  | \
++		((uint32_t)(b3) & 0x3F));
++
++#define	U8_SIMPLE_SWAP(a, b, t) \
++	(t) = (a); \
++	(a) = (b); \
++	(b) = (t);
++
++#define	U8_ASCII_TOUPPER(c) \
++	(((c) >= 'a' && (c) <= 'z') ? (c) - 'a' + 'A' : (c))
++
++#define	U8_ASCII_TOLOWER(c) \
++	(((c) >= 'A' && (c) <= 'Z') ? (c) - 'A' + 'a' : (c))
++
++#define	U8_ISASCII(c)			(((uchar_t)(c)) < 0x80U)
++/*
++ * The following macro assumes that the two characters that are to be
++ * swapped are adjacent to each other and 'a' comes before 'b'.
++ *
++ * If the assumptions are not met, then, the macro will fail.
++ */
++#define	U8_SWAP_COMB_MARKS(a, b) \
++	for (k = 0; k < disp[(a)]; k++) \
++		u8t[k] = u8s[start[(a)] + k]; \
++	for (k = 0; k < disp[(b)]; k++) \
++		u8s[start[(a)] + k] = u8s[start[(b)] + k]; \
++	start[(b)] = start[(a)] + disp[(b)]; \
++	for (k = 0; k < disp[(a)]; k++) \
++		u8s[start[(b)] + k] = u8t[k]; \
++	U8_SIMPLE_SWAP(comb_class[(a)], comb_class[(b)], tc); \
++	U8_SIMPLE_SWAP(disp[(a)], disp[(b)], tc);
++
++/* The possible states during normalization. */
++typedef enum {
++	U8_STATE_START = 0,
++	U8_STATE_HANGUL_L = 1,
++	U8_STATE_HANGUL_LV = 2,
++	U8_STATE_HANGUL_LVT = 3,
++	U8_STATE_HANGUL_V = 4,
++	U8_STATE_HANGUL_T = 5,
++	U8_STATE_COMBINING_MARK = 6
++} u8_normalization_states_t;
++
++/*
++ * The three vectors at below are used to check bytes of a given UTF-8
++ * character are valid and not containing any malformed byte values.
++ *
++ * We used to have a quite relaxed UTF-8 binary representation but then there
++ * was some security related issues and so the Unicode Consortium defined
++ * and announced the UTF-8 Corrigendum at Unicode 3.1 and then refined it
++ * one more time at the Unicode 3.2. The following three tables are based on
++ * that.
++ */
++
++#define	U8_ILLEGAL_NEXT_BYTE_COMMON(c)	((c) < 0x80 || (c) > 0xBF)
++
++#define	I_				U8_ILLEGAL_CHAR
++#define	O_				U8_OUT_OF_RANGE_CHAR
++
++const int8_t u8_number_of_bytes[0x100] = {
++	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
++	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
++	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
++	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
++	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
++	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
++	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
++	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
++
++/*	80  81  82  83  84  85  86  87  88  89  8A  8B  8C  8D  8E  8F  */
++	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
++
++/*  	90  91  92  93  94  95  96  97  98  99  9A  9B  9C  9D  9E  9F  */
++	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
++
++/*  	A0  A1  A2  A3  A4  A5  A6  A7  A8  A9  AA  AB  AC  AD  AE  AF  */
++	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
++
++/*	B0  B1  B2  B3  B4  B5  B6  B7  B8  B9  BA  BB  BC  BD  BE  BF  */
++	I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_, I_,
++
++/*	C0  C1  C2  C3  C4  C5  C6  C7  C8  C9  CA  CB  CC  CD  CE  CF  */
++	I_, I_, 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
++
++/*	D0  D1  D2  D3  D4  D5  D6  D7  D8  D9  DA  DB  DC  DD  DE  DF  */
++	2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
++
++/*	E0  E1  E2  E3  E4  E5  E6  E7  E8  E9  EA  EB  EC  ED  EE  EF  */
++	3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
++
++/*	F0  F1  F2  F3  F4  F5  F6  F7  F8  F9  FA  FB  FC  FD  FE  FF  */
++	4,  4,  4,  4,  4,  O_, O_, O_, O_, O_, O_, O_, O_, O_, O_, O_,
++};
++
++#undef	I_
++#undef	O_
++
++const uint8_t u8_valid_min_2nd_byte[0x100] = {
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++/*	C0    C1    C2    C3    C4    C5    C6    C7    */
++	0,    0,    0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
++/*	C8    C9    CA    CB    CC    CD    CE    CF    */
++	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
++/*	D0    D1    D2    D3    D4    D5    D6    D7    */
++	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
++/*	D8    D9    DA    DB    DC    DD    DE    DF    */
++	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
++/*	E0    E1    E2    E3    E4    E5    E6    E7    */
++	0xa0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
++/*	E8    E9    EA    EB    EC    ED    EE    EF    */
++	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
++/*	F0    F1    F2    F3    F4    F5    F6    F7    */
++	0x90, 0x80, 0x80, 0x80, 0x80, 0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++};
++
++const uint8_t u8_valid_max_2nd_byte[0x100] = {
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++/*	C0    C1    C2    C3    C4    C5    C6    C7    */
++	0,    0,    0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
++/*	C8    C9    CA    CB    CC    CD    CE    CF    */
++	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
++/*	D0    D1    D2    D3    D4    D5    D6    D7    */
++	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
++/*	D8    D9    DA    DB    DC    DD    DE    DF    */
++	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
++/*	E0    E1    E2    E3    E4    E5    E6    E7    */
++	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
++/*	E8    E9    EA    EB    EC    ED    EE    EF    */
++	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x9f, 0xbf, 0xbf,
++/*	F0    F1    F2    F3    F4    F5    F6    F7    */
++	0xbf, 0xbf, 0xbf, 0xbf, 0x8f, 0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++};
++
++
++/*
++ * The u8_validate() validates on the given UTF-8 character string and
++ * calculate the byte length. It is quite similar to mblen(3C) except that
++ * this will validate against the list of characters if required and
++ * specific to UTF-8 and Unicode.
++ */
++int
++u8_validate(char *u8str, size_t n, char **list, int flag, int *errnum)
++{
++	uchar_t *ib;
++	uchar_t *ibtail;
++	uchar_t **p;
++	uchar_t *s1;
++	uchar_t *s2;
++	uchar_t f;
++	int sz;
++	size_t i;
++	int ret_val;
++	boolean_t second;
++	boolean_t no_need_to_validate_entire;
++	boolean_t check_additional;
++	boolean_t validate_ucs2_range_only;
++
++	if (! u8str)
++		return (0);
++
++	ib = (uchar_t *)u8str;
++	ibtail = ib + n;
++
++	ret_val = 0;
++
++	no_need_to_validate_entire = ! (flag & U8_VALIDATE_ENTIRE);
++	check_additional = flag & U8_VALIDATE_CHECK_ADDITIONAL;
++	validate_ucs2_range_only = flag & U8_VALIDATE_UCS2_RANGE;
++
++	while (ib < ibtail) {
++		/*
++		 * The first byte of a UTF-8 character tells how many
++		 * bytes will follow for the character. If the first byte
++		 * is an illegal byte value or out of range value, we just
++		 * return -1 with an appropriate error number.
++		 */
++		sz = u8_number_of_bytes[*ib];
++		if (sz == U8_ILLEGAL_CHAR) {
++			*errnum = EILSEQ;
++			return (-1);
++		}
++
++		if (sz == U8_OUT_OF_RANGE_CHAR ||
++		    (validate_ucs2_range_only && sz > U8_MAX_BYTES_UCS2)) {
++			*errnum = ERANGE;
++			return (-1);
++		}
++
++		/*
++		 * If we don't have enough bytes to check on, that's also
++		 * an error. As you can see, we give illegal byte sequence
++		 * checking higher priority then EINVAL cases.
++		 */
++		if ((ibtail - ib) < sz) {
++			*errnum = EINVAL;
++			return (-1);
++		}
++
++		if (sz == 1) {
++			ib++;
++			ret_val++;
++		} else {
++			/*
++			 * Check on the multi-byte UTF-8 character. For more
++			 * details on this, see comment added for the used
++			 * data structures at the beginning of the file.
++			 */
++			f = *ib++;
++			ret_val++;
++			second = B_TRUE;
++			for (i = 1; i < sz; i++) {
++				if (second) {
++					if (*ib < u8_valid_min_2nd_byte[f] ||
++					    *ib > u8_valid_max_2nd_byte[f]) {
++						*errnum = EILSEQ;
++						return (-1);
++					}
++					second = B_FALSE;
++				} else if (U8_ILLEGAL_NEXT_BYTE_COMMON(*ib)) {
++					*errnum = EILSEQ;
++					return (-1);
++				}
++				ib++;
++				ret_val++;
++			}
++		}
++
++		if (check_additional) {
++			for (p = (uchar_t **)list, i = 0; p[i]; i++) {
++				s1 = ib - sz;
++				s2 = p[i];
++				while (s1 < ib) {
++					if (*s1 != *s2 || *s2 == '\0')
++						break;
++					s1++;
++					s2++;
++				}
++
++				if (s1 >= ib && *s2 == '\0') {
++					*errnum = EBADF;
++					return (-1);
++				}
++			}
++		}
++
++		if (no_need_to_validate_entire)
++			break;
++	}
++
++	return (ret_val);
++}
++
++/*
++ * The do_case_conv() looks at the mapping tables and returns found
++ * bytes if any. If not found, the input bytes are returned. The function
++ * always terminate the return bytes with a null character assuming that
++ * there are plenty of room to do so.
++ *
++ * The case conversions are simple case conversions mapping a character to
++ * another character as specified in the Unicode data. The byte size of
++ * the mapped character could be different from that of the input character.
++ *
++ * The return value is the byte length of the returned character excluding
++ * the terminating null byte.
++ */
++static size_t
++do_case_conv(int uv, uchar_t *u8s, uchar_t *s, int sz, boolean_t is_it_toupper)
++{
++	size_t i;
++	uint16_t b1 = 0;
++	uint16_t b2 = 0;
++	uint16_t b3 = 0;
++	uint16_t b3_tbl;
++	uint16_t b3_base;
++	uint16_t b4 = 0;
++	size_t start_id;
++	size_t end_id;
++
++	/*
++	 * At this point, the only possible values for sz are 2, 3, and 4.
++	 * The u8s should point to a vector that is well beyond the size of
++	 * 5 bytes.
++	 */
++	if (sz == 2) {
++		b3 = u8s[0] = s[0];
++		b4 = u8s[1] = s[1];
++	} else if (sz == 3) {
++		b2 = u8s[0] = s[0];
++		b3 = u8s[1] = s[1];
++		b4 = u8s[2] = s[2];
++	} else if (sz == 4) {
++		b1 = u8s[0] = s[0];
++		b2 = u8s[1] = s[1];
++		b3 = u8s[2] = s[2];
++		b4 = u8s[3] = s[3];
++	} else {
++		/* This is not possible but just in case as a fallback. */
++		if (is_it_toupper)
++			*u8s = U8_ASCII_TOUPPER(*s);
++		else
++			*u8s = U8_ASCII_TOLOWER(*s);
++		u8s[1] = '\0';
++
++		return (1);
++	}
++	u8s[sz] = '\0';
++
++	/*
++	 * Let's find out if we have a corresponding character.
++	 */
++	b1 = u8_common_b1_tbl[uv][b1];
++	if (b1 == U8_TBL_ELEMENT_NOT_DEF)
++		return ((size_t)sz);
++
++	b2 = u8_case_common_b2_tbl[uv][b1][b2];
++	if (b2 == U8_TBL_ELEMENT_NOT_DEF)
++		return ((size_t)sz);
++
++	if (is_it_toupper) {
++		b3_tbl = u8_toupper_b3_tbl[uv][b2][b3].tbl_id;
++		if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
++			return ((size_t)sz);
++
++		start_id = u8_toupper_b4_tbl[uv][b3_tbl][b4];
++		end_id = u8_toupper_b4_tbl[uv][b3_tbl][b4 + 1];
++
++		/* Either there is no match or an error at the table. */
++		if (start_id >= end_id || (end_id - start_id) > U8_MB_CUR_MAX)
++			return ((size_t)sz);
++
++		b3_base = u8_toupper_b3_tbl[uv][b2][b3].base;
++
++		for (i = 0; start_id < end_id; start_id++)
++			u8s[i++] = u8_toupper_final_tbl[uv][b3_base + start_id];
++	} else {
++		b3_tbl = u8_tolower_b3_tbl[uv][b2][b3].tbl_id;
++		if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
++			return ((size_t)sz);
++
++		start_id = u8_tolower_b4_tbl[uv][b3_tbl][b4];
++		end_id = u8_tolower_b4_tbl[uv][b3_tbl][b4 + 1];
++
++		if (start_id >= end_id || (end_id - start_id) > U8_MB_CUR_MAX)
++			return ((size_t)sz);
++
++		b3_base = u8_tolower_b3_tbl[uv][b2][b3].base;
++
++		for (i = 0; start_id < end_id; start_id++)
++			u8s[i++] = u8_tolower_final_tbl[uv][b3_base + start_id];
++	}
++
++	/*
++	 * If i is still zero, that means there is no corresponding character.
++	 */
++	if (i == 0)
++		return ((size_t)sz);
++
++	u8s[i] = '\0';
++
++	return (i);
++}
++
++/*
++ * The do_case_compare() function compares the two input strings, s1 and s2,
++ * one character at a time doing case conversions if applicable and return
++ * the comparison result as like strcmp().
++ *
++ * Since, in empirical sense, most of text data are 7-bit ASCII characters,
++ * we treat the 7-bit ASCII characters as a special case trying to yield
++ * faster processing time.
++ */
++static int
++do_case_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1,
++	size_t n2, boolean_t is_it_toupper, int *errnum)
++{
++	int f;
++	int sz1;
++	int sz2;
++	size_t j;
++	size_t i1;
++	size_t i2;
++	uchar_t u8s1[U8_MB_CUR_MAX + 1];
++	uchar_t u8s2[U8_MB_CUR_MAX + 1];
++
++	i1 = i2 = 0;
++	while (i1 < n1 && i2 < n2) {
++		/*
++		 * Find out what would be the byte length for this UTF-8
++		 * character at string s1 and also find out if this is
++		 * an illegal start byte or not and if so, issue a proper
++		 * error number and yet treat this byte as a character.
++		 */
++		sz1 = u8_number_of_bytes[*s1];
++		if (sz1 < 0) {
++			*errnum = EILSEQ;
++			sz1 = 1;
++		}
++
++		/*
++		 * For 7-bit ASCII characters mainly, we do a quick case
++		 * conversion right at here.
++		 *
++		 * If we don't have enough bytes for this character, issue
++		 * an EINVAL error and use what are available.
++		 *
++		 * If we have enough bytes, find out if there is
++		 * a corresponding uppercase character and if so, copy over
++		 * the bytes for a comparison later. If there is no
++		 * corresponding uppercase character, then, use what we have
++		 * for the comparison.
++		 */
++		if (sz1 == 1) {
++			if (is_it_toupper)
++				u8s1[0] = U8_ASCII_TOUPPER(*s1);
++			else
++				u8s1[0] = U8_ASCII_TOLOWER(*s1);
++			s1++;
++			u8s1[1] = '\0';
++		} else if ((i1 + sz1) > n1) {
++			*errnum = EINVAL;
++			for (j = 0; (i1 + j) < n1; )
++				u8s1[j++] = *s1++;
++			u8s1[j] = '\0';
++		} else {
++			(void) do_case_conv(uv, u8s1, s1, sz1, is_it_toupper);
++			s1 += sz1;
++		}
++
++		/* Do the same for the string s2. */
++		sz2 = u8_number_of_bytes[*s2];
++		if (sz2 < 0) {
++			*errnum = EILSEQ;
++			sz2 = 1;
++		}
++
++		if (sz2 == 1) {
++			if (is_it_toupper)
++				u8s2[0] = U8_ASCII_TOUPPER(*s2);
++			else
++				u8s2[0] = U8_ASCII_TOLOWER(*s2);
++			s2++;
++			u8s2[1] = '\0';
++		} else if ((i2 + sz2) > n2) {
++			*errnum = EINVAL;
++			for (j = 0; (i2 + j) < n2; )
++				u8s2[j++] = *s2++;
++			u8s2[j] = '\0';
++		} else {
++			(void) do_case_conv(uv, u8s2, s2, sz2, is_it_toupper);
++			s2 += sz2;
++		}
++
++		/* Now compare the two characters. */
++		if (sz1 == 1 && sz2 == 1) {
++			if (*u8s1 > *u8s2)
++				return (1);
++			if (*u8s1 < *u8s2)
++				return (-1);
++		} else {
++			f = strcmp((const char *)u8s1, (const char *)u8s2);
++			if (f != 0)
++				return (f);
++		}
++
++		/*
++		 * They were the same. Let's move on to the next
++		 * characters then.
++		 */
++		i1 += sz1;
++		i2 += sz2;
++	}
++
++	/*
++	 * We compared until the end of either or both strings.
++	 *
++	 * If we reached to or went over the ends for the both, that means
++	 * they are the same.
++	 *
++	 * If we reached only one of the two ends, that means the other string
++	 * has something which then the fact can be used to determine
++	 * the return value.
++	 */
++	if (i1 >= n1) {
++		if (i2 >= n2)
++			return (0);
++		return (-1);
++	}
++	return (1);
++}
++
++/*
++ * The combining_class() function checks on the given bytes and find out
++ * the corresponding Unicode combining class value. The return value 0 means
++ * it is a Starter. Any illegal UTF-8 character will also be treated as
++ * a Starter.
++ */
++static uchar_t
++combining_class(size_t uv, uchar_t *s, size_t sz)
++{
++	uint16_t b1 = 0;
++	uint16_t b2 = 0;
++	uint16_t b3 = 0;
++	uint16_t b4 = 0;
++
++	if (sz == 1 || sz > 4)
++		return (0);
++
++	if (sz == 2) {
++		b3 = s[0];
++		b4 = s[1];
++	} else if (sz == 3) {
++		b2 = s[0];
++		b3 = s[1];
++		b4 = s[2];
++	} else if (sz == 4) {
++		b1 = s[0];
++		b2 = s[1];
++		b3 = s[2];
++		b4 = s[3];
++	}
++
++	b1 = u8_common_b1_tbl[uv][b1];
++	if (b1 == U8_TBL_ELEMENT_NOT_DEF)
++		return (0);
++
++	b2 = u8_combining_class_b2_tbl[uv][b1][b2];
++	if (b2 == U8_TBL_ELEMENT_NOT_DEF)
++		return (0);
++
++	b3 = u8_combining_class_b3_tbl[uv][b2][b3];
++	if (b3 == U8_TBL_ELEMENT_NOT_DEF)
++		return (0);
++
++	return (u8_combining_class_b4_tbl[uv][b3][b4]);
++}
++
++/*
++ * The do_decomp() function finds out a matching decomposition if any
++ * and return. If there is no match, the input bytes are copied and returned.
++ * The function also checks if there is a Hangul, decomposes it if necessary
++ * and returns.
++ *
++ * To save time, a single byte 7-bit ASCII character should be handled by
++ * the caller.
++ *
++ * The function returns the number of bytes returned sans always terminating
++ * the null byte. It will also return a state that will tell if there was
++ * a Hangul character decomposed which then will be used by the caller.
++ */
++static size_t
++do_decomp(size_t uv, uchar_t *u8s, uchar_t *s, int sz,
++	boolean_t canonical_decomposition, u8_normalization_states_t *state)
++{
++	uint16_t b1 = 0;
++	uint16_t b2 = 0;
++	uint16_t b3 = 0;
++	uint16_t b3_tbl;
++	uint16_t b3_base;
++	uint16_t b4 = 0;
++	size_t start_id;
++	size_t end_id;
++	size_t i;
++	uint32_t u1;
++
++	if (sz == 2) {
++		b3 = u8s[0] = s[0];
++		b4 = u8s[1] = s[1];
++		u8s[2] = '\0';
++	} else if (sz == 3) {
++		/* Convert it to a Unicode scalar value. */
++		U8_PUT_3BYTES_INTO_UTF32(u1, s[0], s[1], s[2]);
++
++		/*
++		 * If this is a Hangul syllable, we decompose it into
++		 * a leading consonant, a vowel, and an optional trailing
++		 * consonant and then return.
++		 */
++		if (U8_HANGUL_SYLLABLE(u1)) {
++			u1 -= U8_HANGUL_SYL_FIRST;
++
++			b1 = U8_HANGUL_JAMO_L_FIRST + u1 / U8_HANGUL_VT_COUNT;
++			b2 = U8_HANGUL_JAMO_V_FIRST + (u1 % U8_HANGUL_VT_COUNT)
++			    / U8_HANGUL_T_COUNT;
++			b3 = u1 % U8_HANGUL_T_COUNT;
++
++			U8_SAVE_HANGUL_AS_UTF8(u8s, 0, 1, 2, b1);
++			U8_SAVE_HANGUL_AS_UTF8(u8s, 3, 4, 5, b2);
++			if (b3) {
++				b3 += U8_HANGUL_JAMO_T_FIRST;
++				U8_SAVE_HANGUL_AS_UTF8(u8s, 6, 7, 8, b3);
++
++				u8s[9] = '\0';
++				*state = U8_STATE_HANGUL_LVT;
++				return (9);
++			}
++
++			u8s[6] = '\0';
++			*state = U8_STATE_HANGUL_LV;
++			return (6);
++		}
++
++		b2 = u8s[0] = s[0];
++		b3 = u8s[1] = s[1];
++		b4 = u8s[2] = s[2];
++		u8s[3] = '\0';
++
++		/*
++		 * If this is a Hangul Jamo, we know there is nothing
++		 * further that we can decompose.
++		 */
++		if (U8_HANGUL_JAMO_L(u1)) {
++			*state = U8_STATE_HANGUL_L;
++			return (3);
++		}
++
++		if (U8_HANGUL_JAMO_V(u1)) {
++			if (*state == U8_STATE_HANGUL_L)
++				*state = U8_STATE_HANGUL_LV;
++			else
++				*state = U8_STATE_HANGUL_V;
++			return (3);
++		}
++
++		if (U8_HANGUL_JAMO_T(u1)) {
++			if (*state == U8_STATE_HANGUL_LV)
++				*state = U8_STATE_HANGUL_LVT;
++			else
++				*state = U8_STATE_HANGUL_T;
++			return (3);
++		}
++	} else if (sz == 4) {
++		b1 = u8s[0] = s[0];
++		b2 = u8s[1] = s[1];
++		b3 = u8s[2] = s[2];
++		b4 = u8s[3] = s[3];
++		u8s[4] = '\0';
++	} else {
++		/*
++		 * This is a fallback and should not happen if the function
++		 * was called properly.
++		 */
++		u8s[0] = s[0];
++		u8s[1] = '\0';
++		*state = U8_STATE_START;
++		return (1);
++	}
++
++	/*
++	 * At this point, this rountine does not know what it would get.
++	 * The caller should sort it out if the state isn't a Hangul one.
++	 */
++	*state = U8_STATE_START;
++
++	/* Try to find matching decomposition mapping byte sequence. */
++	b1 = u8_common_b1_tbl[uv][b1];
++	if (b1 == U8_TBL_ELEMENT_NOT_DEF)
++		return ((size_t)sz);
++
++	b2 = u8_decomp_b2_tbl[uv][b1][b2];
++	if (b2 == U8_TBL_ELEMENT_NOT_DEF)
++		return ((size_t)sz);
++
++	b3_tbl = u8_decomp_b3_tbl[uv][b2][b3].tbl_id;
++	if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
++		return ((size_t)sz);
++
++	/*
++	 * If b3_tbl is bigger than or equal to U8_16BIT_TABLE_INDICATOR
++	 * which is 0x8000, this means we couldn't fit the mappings into
++	 * the cardinality of a unsigned byte.
++	 */
++	if (b3_tbl >= U8_16BIT_TABLE_INDICATOR) {
++		b3_tbl -= U8_16BIT_TABLE_INDICATOR;
++		start_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4];
++		end_id = u8_decomp_b4_16bit_tbl[uv][b3_tbl][b4 + 1];
++	} else {
++		start_id = u8_decomp_b4_tbl[uv][b3_tbl][b4];
++		end_id = u8_decomp_b4_tbl[uv][b3_tbl][b4 + 1];
++	}
++
++	/* This also means there wasn't any matching decomposition. */
++	if (start_id >= end_id)
++		return ((size_t)sz);
++
++	/*
++	 * The final table for decomposition mappings has three types of
++	 * byte sequences depending on whether a mapping is for compatibility
++	 * decomposition, canonical decomposition, or both like the following:
++	 *
++	 * (1) Compatibility decomposition mappings:
++	 *
++	 *	+---+---+-...-+---+
++	 *	| B0| B1| ... | Bm|
++	 *	+---+---+-...-+---+
++	 *
++	 *	The first byte, B0, is always less then 0xF5 (U8_DECOMP_BOTH).
++	 *
++	 * (2) Canonical decomposition mappings:
++	 *
++	 *	+---+---+---+-...-+---+
++	 *	| T | b0| b1| ... | bn|
++	 *	+---+---+---+-...-+---+
++	 *
++	 *	where the first byte, T, is 0xF6 (U8_DECOMP_CANONICAL).
++	 *
++	 * (3) Both mappings:
++	 *
++	 *	+---+---+---+---+-...-+---+---+---+-...-+---+
++	 *	| T | D | b0| b1| ... | bn| B0| B1| ... | Bm|
++	 *	+---+---+---+---+-...-+---+---+---+-...-+---+
++	 *
++	 *	where T is 0xF5 (U8_DECOMP_BOTH) and D is a displacement
++	 *	byte, b0 to bn are canonical mapping bytes and B0 to Bm are
++	 *	compatibility mapping bytes.
++	 *
++	 * Note that compatibility decomposition means doing recursive
++	 * decompositions using both compatibility decomposition mappings and
++	 * canonical decomposition mappings. On the other hand, canonical
++	 * decomposition means doing recursive decompositions using only
++	 * canonical decomposition mappings. Since the table we have has gone
++	 * through the recursions already, we do not need to do so during
++	 * runtime, i.e., the table has been completely flattened out
++	 * already.
++	 */
++
++	b3_base = u8_decomp_b3_tbl[uv][b2][b3].base;
++
++	/* Get the type, T, of the byte sequence. */
++	b1 = u8_decomp_final_tbl[uv][b3_base + start_id];
++
++	/*
++	 * If necessary, adjust start_id, end_id, or both. Note that if
++	 * this is compatibility decomposition mapping, there is no
++	 * adjustment.
++	 */
++	if (canonical_decomposition) {
++		/* Is the mapping only for compatibility decomposition? */
++		if (b1 < U8_DECOMP_BOTH)
++			return ((size_t)sz);
++
++		start_id++;
++
++		if (b1 == U8_DECOMP_BOTH) {
++			end_id = start_id +
++			    u8_decomp_final_tbl[uv][b3_base + start_id];
++			start_id++;
++		}
++	} else {
++		/*
++		 * Unless this is a compatibility decomposition mapping,
++		 * we adjust the start_id.
++		 */
++		if (b1 == U8_DECOMP_BOTH) {
++			start_id++;
++			start_id += u8_decomp_final_tbl[uv][b3_base + start_id];
++		} else if (b1 == U8_DECOMP_CANONICAL) {
++			start_id++;
++		}
++	}
++
++	for (i = 0; start_id < end_id; start_id++)
++		u8s[i++] = u8_decomp_final_tbl[uv][b3_base + start_id];
++	u8s[i] = '\0';
++
++	return (i);
++}
++
++/*
++ * The find_composition_start() function uses the character bytes given and
++ * find out the matching composition mappings if any and return the address
++ * to the composition mappings as explained in the do_composition().
++ */
++static uchar_t *
++find_composition_start(size_t uv, uchar_t *s, size_t sz)
++{
++	uint16_t b1 = 0;
++	uint16_t b2 = 0;
++	uint16_t b3 = 0;
++	uint16_t b3_tbl;
++	uint16_t b3_base;
++	uint16_t b4 = 0;
++	size_t start_id;
++	size_t end_id;
++
++	if (sz == 1) {
++		b4 = s[0];
++	} else if (sz == 2) {
++		b3 = s[0];
++		b4 = s[1];
++	} else if (sz == 3) {
++		b2 = s[0];
++		b3 = s[1];
++		b4 = s[2];
++	} else if (sz == 4) {
++		b1 = s[0];
++		b2 = s[1];
++		b3 = s[2];
++		b4 = s[3];
++	} else {
++		/*
++		 * This is a fallback and should not happen if the function
++		 * was called properly.
++		 */
++		return (NULL);
++	}
++
++	b1 = u8_composition_b1_tbl[uv][b1];
++	if (b1 == U8_TBL_ELEMENT_NOT_DEF)
++		return (NULL);
++
++	b2 = u8_composition_b2_tbl[uv][b1][b2];
++	if (b2 == U8_TBL_ELEMENT_NOT_DEF)
++		return (NULL);
++
++	b3_tbl = u8_composition_b3_tbl[uv][b2][b3].tbl_id;
++	if (b3_tbl == U8_TBL_ELEMENT_NOT_DEF)
++		return (NULL);
++
++	if (b3_tbl >= U8_16BIT_TABLE_INDICATOR) {
++		b3_tbl -= U8_16BIT_TABLE_INDICATOR;
++		start_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4];
++		end_id = u8_composition_b4_16bit_tbl[uv][b3_tbl][b4 + 1];
++	} else {
++		start_id = u8_composition_b4_tbl[uv][b3_tbl][b4];
++		end_id = u8_composition_b4_tbl[uv][b3_tbl][b4 + 1];
++	}
++
++	if (start_id >= end_id)
++		return (NULL);
++
++	b3_base = u8_composition_b3_tbl[uv][b2][b3].base;
++
++	return ((uchar_t *)&(u8_composition_final_tbl[uv][b3_base + start_id]));
++}
++
++/*
++ * The blocked() function checks on the combining class values of previous
++ * characters in this sequence and return whether it is blocked or not.
++ */
++static boolean_t
++blocked(uchar_t *comb_class, size_t last)
++{
++	uchar_t my_comb_class;
++	size_t i;
++
++	my_comb_class = comb_class[last];
++	for (i = 1; i < last; i++)
++		if (comb_class[i] >= my_comb_class ||
++		    comb_class[i] == U8_COMBINING_CLASS_STARTER)
++			return (B_TRUE);
++
++	return (B_FALSE);
++}
++
++/*
++ * The do_composition() reads the character string pointed by 's' and
++ * do necessary canonical composition and then copy over the result back to
++ * the 's'.
++ *
++ * The input argument 's' cannot contain more than 32 characters.
++ */
++static size_t
++do_composition(size_t uv, uchar_t *s, uchar_t *comb_class, uchar_t *start,
++	uchar_t *disp, size_t last, uchar_t **os, uchar_t *oslast)
++{
++	uchar_t t[U8_STREAM_SAFE_TEXT_MAX + 1];
++	uchar_t tc[U8_MB_CUR_MAX];
++	uint8_t saved_marks[U8_MAX_CHARS_A_SEQ];
++	size_t saved_marks_count;
++	uchar_t *p;
++	uchar_t *saved_p;
++	uchar_t *q;
++	size_t i;
++	size_t saved_i;
++	size_t j;
++	size_t k;
++	size_t l;
++	size_t C;
++	size_t saved_l;
++	size_t size;
++	uint32_t u1;
++	uint32_t u2;
++	boolean_t match_not_found = B_TRUE;
++
++	/*
++	 * This should never happen unless the callers are doing some strange
++	 * and unexpected things.
++	 *
++	 * The "last" is the index pointing to the last character not last + 1.
++	 */
++	if (last >= U8_MAX_CHARS_A_SEQ)
++		last = U8_UPPER_LIMIT_IN_A_SEQ;
++
++	for (i = l = 0; i <= last; i++) {
++		/*
++		 * The last or any non-Starters at the beginning, we don't
++		 * have any chance to do composition and so we just copy them
++		 * to the temporary buffer.
++		 */
++		if (i >= last || comb_class[i] != U8_COMBINING_CLASS_STARTER) {
++SAVE_THE_CHAR:
++			p = s + start[i];
++			size = disp[i];
++			for (k = 0; k < size; k++)
++				t[l++] = *p++;
++			continue;
++		}
++
++		/*
++		 * If this could be a start of Hangul Jamos, then, we try to
++		 * conjoin them.
++		 */
++		if (s[start[i]] == U8_HANGUL_JAMO_1ST_BYTE) {
++			U8_PUT_3BYTES_INTO_UTF32(u1, s[start[i]],
++			    s[start[i] + 1], s[start[i] + 2]);
++			U8_PUT_3BYTES_INTO_UTF32(u2, s[start[i] + 3],
++			    s[start[i] + 4], s[start[i] + 5]);
++
++			if (U8_HANGUL_JAMO_L(u1) && U8_HANGUL_JAMO_V(u2)) {
++				u1 -= U8_HANGUL_JAMO_L_FIRST;
++				u2 -= U8_HANGUL_JAMO_V_FIRST;
++				u1 = U8_HANGUL_SYL_FIRST +
++				    (u1 * U8_HANGUL_V_COUNT + u2) *
++				    U8_HANGUL_T_COUNT;
++
++				i += 2;
++				if (i <= last) {
++					U8_PUT_3BYTES_INTO_UTF32(u2,
++					    s[start[i]], s[start[i] + 1],
++					    s[start[i] + 2]);
++
++					if (U8_HANGUL_JAMO_T(u2)) {
++						u1 += u2 -
++						    U8_HANGUL_JAMO_T_FIRST;
++						i++;
++					}
++				}
++
++				U8_SAVE_HANGUL_AS_UTF8(t + l, 0, 1, 2, u1);
++				i--;
++				l += 3;
++				continue;
++			}
++		}
++
++		/*
++		 * Let's then find out if this Starter has composition
++		 * mapping.
++		 */
++		p = find_composition_start(uv, s + start[i], disp[i]);
++		if (p == NULL)
++			goto SAVE_THE_CHAR;
++
++		/*
++		 * We have a Starter with composition mapping and the next
++		 * character is a non-Starter. Let's try to find out if
++		 * we can do composition.
++		 */
++
++		saved_p = p;
++		saved_i = i;
++		saved_l = l;
++		saved_marks_count = 0;
++
++TRY_THE_NEXT_MARK:
++		q = s + start[++i];
++		size = disp[i];
++
++		/*
++		 * The next for() loop compares the non-Starter pointed by
++		 * 'q' with the possible (joinable) characters pointed by 'p'.
++		 *
++		 * The composition final table entry pointed by the 'p'
++		 * looks like the following:
++		 *
++		 * +---+---+---+-...-+---+---+---+---+-...-+---+---+
++		 * | C | b0| b2| ... | bn| F | B0| B1| ... | Bm| F |
++		 * +---+---+---+-...-+---+---+---+---+-...-+---+---+
++		 *
++		 * where C is the count byte indicating the number of
++		 * mapping pairs where each pair would be look like
++		 * (b0-bn F, B0-Bm F). The b0-bn are the bytes of the second
++		 * character of a canonical decomposition and the B0-Bm are
++		 * the bytes of a matching composite character. The F is
++		 * a filler byte after each character as the separator.
++		 */
++
++		match_not_found = B_TRUE;
++
++		for (C = *p++; C > 0; C--) {
++			for (k = 0; k < size; p++, k++)
++				if (*p != q[k])
++					break;
++
++			/* Have we found it? */
++			if (k >= size && *p == U8_TBL_ELEMENT_FILLER) {
++				match_not_found = B_FALSE;
++
++				l = saved_l;
++
++				while (*++p != U8_TBL_ELEMENT_FILLER)
++					t[l++] = *p;
++
++				break;
++			}
++
++			/* We didn't find; skip to the next pair. */
++			if (*p != U8_TBL_ELEMENT_FILLER)
++				while (*++p != U8_TBL_ELEMENT_FILLER)
++					;
++			while (*++p != U8_TBL_ELEMENT_FILLER)
++				;
++			p++;
++		}
++
++		/*
++		 * If there was no match, we will need to save the combining
++		 * mark for later appending. After that, if the next one
++		 * is a non-Starter and not blocked, then, we try once
++		 * again to do composition with the next non-Starter.
++		 *
++		 * If there was no match and this was a Starter, then,
++		 * this is a new start.
++		 *
++		 * If there was a match and a composition done and we have
++		 * more to check on, then, we retrieve a new composition final
++		 * table entry for the composite and then try to do the
++		 * composition again.
++		 */
++
++		if (match_not_found) {
++			if (comb_class[i] == U8_COMBINING_CLASS_STARTER) {
++				i--;
++				goto SAVE_THE_CHAR;
++			}
++
++			saved_marks[saved_marks_count++] = i;
++		}
++
++		if (saved_l == l) {
++			while (i < last) {
++				if (blocked(comb_class, i + 1))
++					saved_marks[saved_marks_count++] = ++i;
++				else
++					break;
++			}
++			if (i < last) {
++				p = saved_p;
++				goto TRY_THE_NEXT_MARK;
++			}
++		} else if (i < last) {
++			p = find_composition_start(uv, t + saved_l,
++			    l - saved_l);
++			if (p != NULL) {
++				saved_p = p;
++				goto TRY_THE_NEXT_MARK;
++			}
++		}
++
++		/*
++		 * There is no more composition possible.
++		 *
++		 * If there was no composition what so ever then we copy
++		 * over the original Starter and then append any non-Starters
++		 * remaining at the target string sequentially after that.
++		 */
++
++		if (saved_l == l) {
++			p = s + start[saved_i];
++			size = disp[saved_i];
++			for (j = 0; j < size; j++)
++				t[l++] = *p++;
++		}
++
++		for (k = 0; k < saved_marks_count; k++) {
++			p = s + start[saved_marks[k]];
++			size = disp[saved_marks[k]];
++			for (j = 0; j < size; j++)
++				t[l++] = *p++;
++		}
++	}
++
++	/*
++	 * If the last character is a Starter and if we have a character
++	 * (possibly another Starter) that can be turned into a composite,
++	 * we do so and we do so until there is no more of composition
++	 * possible.
++	 */
++	if (comb_class[last] == U8_COMBINING_CLASS_STARTER) {
++		p = *os;
++		saved_l = l - disp[last];
++
++		while (p < oslast) {
++			size = u8_number_of_bytes[*p];
++			if (size <= 1 || (p + size) > oslast)
++				break;
++
++			saved_p = p;
++
++			for (i = 0; i < size; i++)
++				tc[i] = *p++;
++
++			q = find_composition_start(uv, t + saved_l,
++			    l - saved_l);
++			if (q == NULL) {
++				p = saved_p;
++				break;
++			}
++
++			match_not_found = B_TRUE;
++
++			for (C = *q++; C > 0; C--) {
++				for (k = 0; k < size; q++, k++)
++					if (*q != tc[k])
++						break;
++
++				if (k >= size && *q == U8_TBL_ELEMENT_FILLER) {
++					match_not_found = B_FALSE;
++
++					l = saved_l;
++
++					while (*++q != U8_TBL_ELEMENT_FILLER) {
++						/*
++						 * This is practically
++						 * impossible but we don't
++						 * want to take any chances.
++						 */
++						if (l >=
++						    U8_STREAM_SAFE_TEXT_MAX) {
++							p = saved_p;
++							goto SAFE_RETURN;
++						}
++						t[l++] = *q;
++					}
++
++					break;
++				}
++
++				if (*q != U8_TBL_ELEMENT_FILLER)
++					while (*++q != U8_TBL_ELEMENT_FILLER)
++						;
++				while (*++q != U8_TBL_ELEMENT_FILLER)
++					;
++				q++;
++			}
++
++			if (match_not_found) {
++				p = saved_p;
++				break;
++			}
++		}
++SAFE_RETURN:
++		*os = p;
++	}
++
++	/*
++	 * Now we copy over the temporary string to the target string.
++	 * Since composition always reduces the number of characters or
++	 * the number of characters stay, we don't need to worry about
++	 * the buffer overflow here.
++	 */
++	for (i = 0; i < l; i++)
++		s[i] = t[i];
++	s[l] = '\0';
++
++	return (l);
++}
++
++/*
++ * The collect_a_seq() function checks on the given string s, collect
++ * a sequence of characters at u8s, and return the sequence. While it collects
++ * a sequence, it also applies case conversion, canonical or compatibility
++ * decomposition, canonical decomposition, or some or all of them and
++ * in that order.
++ *
++ * The collected sequence cannot be bigger than 32 characters since if
++ * it is having more than 31 characters, the sequence will be terminated
++ * with a U+034F COMBINING GRAPHEME JOINER (CGJ) character and turned into
++ * a Stream-Safe Text. The collected sequence is always terminated with
++ * a null byte and the return value is the byte length of the sequence
++ * including 0. The return value does not include the terminating
++ * null byte.
++ */
++static size_t
++collect_a_seq(size_t uv, uchar_t *u8s, uchar_t **source, uchar_t *slast,
++	boolean_t is_it_toupper,
++	boolean_t is_it_tolower,
++	boolean_t canonical_decomposition,
++	boolean_t compatibility_decomposition,
++	boolean_t canonical_composition,
++	int *errnum, u8_normalization_states_t *state)
++{
++	uchar_t *s;
++	int sz;
++	int saved_sz;
++	size_t i;
++	size_t j;
++	size_t k;
++	size_t l;
++	uchar_t comb_class[U8_MAX_CHARS_A_SEQ];
++	uchar_t disp[U8_MAX_CHARS_A_SEQ];
++	uchar_t start[U8_MAX_CHARS_A_SEQ];
++	uchar_t u8t[U8_MB_CUR_MAX];
++	uchar_t uts[U8_STREAM_SAFE_TEXT_MAX + 1];
++	uchar_t tc;
++	size_t last;
++	size_t saved_last;
++	uint32_t u1;
++
++	/*
++	 * Save the source string pointer which we will return a changed
++	 * pointer if we do processing.
++	 */
++	s = *source;
++
++	/*
++	 * The following is a fallback for just in case callers are not
++	 * checking the string boundaries before the calling.
++	 */
++	if (s >= slast) {
++		u8s[0] = '\0';
++
++		return (0);
++	}
++
++	/*
++	 * As the first thing, let's collect a character and do case
++	 * conversion if necessary.
++	 */
++
++	sz = u8_number_of_bytes[*s];
++
++	if (sz < 0) {
++		*errnum = EILSEQ;
++
++		u8s[0] = *s++;
++		u8s[1] = '\0';
++
++		*source = s;
++
++		return (1);
++	}
++
++	if (sz == 1) {
++		if (is_it_toupper)
++			u8s[0] = U8_ASCII_TOUPPER(*s);
++		else if (is_it_tolower)
++			u8s[0] = U8_ASCII_TOLOWER(*s);
++		else
++			u8s[0] = *s;
++		s++;
++		u8s[1] = '\0';
++	} else if ((s + sz) > slast) {
++		*errnum = EINVAL;
++
++		for (i = 0; s < slast; )
++			u8s[i++] = *s++;
++		u8s[i] = '\0';
++
++		*source = s;
++
++		return (i);
++	} else {
++		if (is_it_toupper || is_it_tolower) {
++			i = do_case_conv(uv, u8s, s, sz, is_it_toupper);
++			s += sz;
++			sz = i;
++		} else {
++			for (i = 0; i < sz; )
++				u8s[i++] = *s++;
++			u8s[i] = '\0';
++		}
++	}
++
++	/*
++	 * And then canonical/compatibility decomposition followed by
++	 * an optional canonical composition. Please be noted that
++	 * canonical composition is done only when a decomposition is
++	 * done.
++	 */
++	if (canonical_decomposition || compatibility_decomposition) {
++		if (sz == 1) {
++			*state = U8_STATE_START;
++
++			saved_sz = 1;
++
++			comb_class[0] = 0;
++			start[0] = 0;
++			disp[0] = 1;
++
++			last = 1;
++		} else {
++			saved_sz = do_decomp(uv, u8s, u8s, sz,
++			    canonical_decomposition, state);
++
++			last = 0;
++
++			for (i = 0; i < saved_sz; ) {
++				sz = u8_number_of_bytes[u8s[i]];
++
++				comb_class[last] = combining_class(uv,
++				    u8s + i, sz);
++				start[last] = i;
++				disp[last] = sz;
++
++				last++;
++				i += sz;
++			}
++
++			/*
++			 * Decomposition yields various Hangul related
++			 * states but not on combining marks. We need to
++			 * find out at here by checking on the last
++			 * character.
++			 */
++			if (*state == U8_STATE_START) {
++				if (comb_class[last - 1])
++					*state = U8_STATE_COMBINING_MARK;
++			}
++		}
++
++		saved_last = last;
++
++		while (s < slast) {
++			sz = u8_number_of_bytes[*s];
++
++			/*
++			 * If this is an illegal character, an incomplete
++			 * character, or an 7-bit ASCII Starter character,
++			 * then we have collected a sequence; break and let
++			 * the next call deal with the two cases.
++			 *
++			 * Note that this is okay only if you are using this
++			 * function with a fixed length string, not on
++			 * a buffer with multiple calls of one chunk at a time.
++			 */
++			if (sz <= 1) {
++				break;
++			} else if ((s + sz) > slast) {
++				break;
++			} else {
++				/*
++				 * If the previous character was a Hangul Jamo
++				 * and this character is a Hangul Jamo that
++				 * can be conjoined, we collect the Jamo.
++				 */
++				if (*s == U8_HANGUL_JAMO_1ST_BYTE) {
++					U8_PUT_3BYTES_INTO_UTF32(u1,
++					    *s, *(s + 1), *(s + 2));
++
++					if (U8_HANGUL_COMPOSABLE_L_V(*state,
++					    u1)) {
++						i = 0;
++						*state = U8_STATE_HANGUL_LV;
++						goto COLLECT_A_HANGUL;
++					}
++
++					if (U8_HANGUL_COMPOSABLE_LV_T(*state,
++					    u1)) {
++						i = 0;
++						*state = U8_STATE_HANGUL_LVT;
++						goto COLLECT_A_HANGUL;
++					}
++				}
++
++				/*
++				 * Regardless of whatever it was, if this is
++				 * a Starter, we don't collect the character
++				 * since that's a new start and we will deal
++				 * with it at the next time.
++				 */
++				i = combining_class(uv, s, sz);
++				if (i == U8_COMBINING_CLASS_STARTER)
++					break;
++
++				/*
++				 * We know the current character is a combining
++				 * mark. If the previous character wasn't
++				 * a Starter (not Hangul) or a combining mark,
++				 * then, we don't collect this combining mark.
++				 */
++				if (*state != U8_STATE_START &&
++				    *state != U8_STATE_COMBINING_MARK)
++					break;
++
++				*state = U8_STATE_COMBINING_MARK;
++COLLECT_A_HANGUL:
++				/*
++				 * If we collected a Starter and combining
++				 * marks up to 30, i.e., total 31 characters,
++				 * then, we terminate this degenerately long
++				 * combining sequence with a U+034F COMBINING
++				 * GRAPHEME JOINER (CGJ) which is 0xCD 0x8F in
++				 * UTF-8 and turn this into a Stream-Safe
++				 * Text. This will be extremely rare but
++				 * possible.
++				 *
++				 * The following will also guarantee that
++				 * we are not writing more than 32 characters
++				 * plus a NULL at u8s[].
++				 */
++				if (last >= U8_UPPER_LIMIT_IN_A_SEQ) {
++TURN_STREAM_SAFE:
++					*state = U8_STATE_START;
++					comb_class[last] = 0;
++					start[last] = saved_sz;
++					disp[last] = 2;
++					last++;
++
++					u8s[saved_sz++] = 0xCD;
++					u8s[saved_sz++] = 0x8F;
++
++					break;
++				}
++
++				/*
++				 * Some combining marks also do decompose into
++				 * another combining mark or marks.
++				 */
++				if (*state == U8_STATE_COMBINING_MARK) {
++					k = last;
++					l = sz;
++					i = do_decomp(uv, uts, s, sz,
++					    canonical_decomposition, state);
++					for (j = 0; j < i; ) {
++						sz = u8_number_of_bytes[uts[j]];
++
++						comb_class[last] =
++						    combining_class(uv,
++						    uts + j, sz);
++						start[last] = saved_sz + j;
++						disp[last] = sz;
++
++						last++;
++						if (last >=
++						    U8_UPPER_LIMIT_IN_A_SEQ) {
++							last = k;
++							goto TURN_STREAM_SAFE;
++						}
++						j += sz;
++					}
++
++					*state = U8_STATE_COMBINING_MARK;
++					sz = i;
++					s += l;
++
++					for (i = 0; i < sz; i++)
++						u8s[saved_sz++] = uts[i];
++				} else {
++					comb_class[last] = i;
++					start[last] = saved_sz;
++					disp[last] = sz;
++					last++;
++
++					for (i = 0; i < sz; i++)
++						u8s[saved_sz++] = *s++;
++				}
++
++				/*
++				 * If this is U+0345 COMBINING GREEK
++				 * YPOGEGRAMMENI (0xCD 0x85 in UTF-8), a.k.a.,
++				 * iota subscript, and need to be converted to
++				 * uppercase letter, convert it to U+0399 GREEK
++				 * CAPITAL LETTER IOTA (0xCE 0x99 in UTF-8),
++				 * i.e., convert to capital adscript form as
++				 * specified in the Unicode standard.
++				 *
++				 * This is the only special case of (ambiguous)
++				 * case conversion at combining marks and
++				 * probably the standard will never have
++				 * anything similar like this in future.
++				 */
++				if (is_it_toupper && sz >= 2 &&
++				    u8s[saved_sz - 2] == 0xCD &&
++				    u8s[saved_sz - 1] == 0x85) {
++					u8s[saved_sz - 2] = 0xCE;
++					u8s[saved_sz - 1] = 0x99;
++				}
++			}
++		}
++
++		/*
++		 * Let's try to ensure a canonical ordering for the collected
++		 * combining marks. We do this only if we have collected
++		 * at least one more non-Starter. (The decomposition mapping
++		 * data tables have fully (and recursively) expanded and
++		 * canonically ordered decompositions.)
++		 *
++		 * The U8_SWAP_COMB_MARKS() convenience macro has some
++		 * assumptions and we are meeting the assumptions.
++		 */
++		last--;
++		if (last >= saved_last) {
++			for (i = 0; i < last; i++)
++				for (j = last; j > i; j--)
++					if (comb_class[j] &&
++					    comb_class[j - 1] > comb_class[j]) {
++						U8_SWAP_COMB_MARKS(j - 1, j);
++					}
++		}
++
++		*source = s;
++
++		if (! canonical_composition) {
++			u8s[saved_sz] = '\0';
++			return (saved_sz);
++		}
++
++		/*
++		 * Now do the canonical composition. Note that we do this
++		 * only after a canonical or compatibility decomposition to
++		 * finish up NFC or NFKC.
++		 */
++		sz = do_composition(uv, u8s, comb_class, start, disp, last,
++		    &s, slast);
++	}
++
++	*source = s;
++
++	return ((size_t)sz);
++}
++
++/*
++ * The do_norm_compare() function does string comparion based on Unicode
++ * simple case mappings and Unicode Normalization definitions.
++ *
++ * It does so by collecting a sequence of character at a time and comparing
++ * the collected sequences from the strings.
++ *
++ * The meanings on the return values are the same as the usual strcmp().
++ */
++static int
++do_norm_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1, size_t n2,
++	int flag, int *errnum)
++{
++	int result;
++	size_t sz1;
++	size_t sz2;
++	uchar_t u8s1[U8_STREAM_SAFE_TEXT_MAX + 1];
++	uchar_t u8s2[U8_STREAM_SAFE_TEXT_MAX + 1];
++	uchar_t *s1last;
++	uchar_t *s2last;
++	boolean_t is_it_toupper;
++	boolean_t is_it_tolower;
++	boolean_t canonical_decomposition;
++	boolean_t compatibility_decomposition;
++	boolean_t canonical_composition;
++	u8_normalization_states_t state;
++
++	s1last = s1 + n1;
++	s2last = s2 + n2;
++
++	is_it_toupper = flag & U8_TEXTPREP_TOUPPER;
++	is_it_tolower = flag & U8_TEXTPREP_TOLOWER;
++	canonical_decomposition = flag & U8_CANON_DECOMP;
++	compatibility_decomposition = flag & U8_COMPAT_DECOMP;
++	canonical_composition = flag & U8_CANON_COMP;
++
++	while (s1 < s1last && s2 < s2last) {
++		/*
++		 * If the current character is a 7-bit ASCII and the last
++		 * character, or, if the current character and the next
++		 * character are both some 7-bit ASCII characters then
++		 * we treat the current character as a sequence.
++		 *
++		 * In any other cases, we need to call collect_a_seq().
++		 */
++
++		if (U8_ISASCII(*s1) && ((s1 + 1) >= s1last ||
++		    ((s1 + 1) < s1last && U8_ISASCII(*(s1 + 1))))) {
++			if (is_it_toupper)
++				u8s1[0] = U8_ASCII_TOUPPER(*s1);
++			else if (is_it_tolower)
++				u8s1[0] = U8_ASCII_TOLOWER(*s1);
++			else
++				u8s1[0] = *s1;
++			u8s1[1] = '\0';
++			sz1 = 1;
++			s1++;
++		} else {
++			state = U8_STATE_START;
++			sz1 = collect_a_seq(uv, u8s1, &s1, s1last,
++			    is_it_toupper, is_it_tolower,
++			    canonical_decomposition,
++			    compatibility_decomposition,
++			    canonical_composition, errnum, &state);
++		}
++
++		if (U8_ISASCII(*s2) && ((s2 + 1) >= s2last ||
++		    ((s2 + 1) < s2last && U8_ISASCII(*(s2 + 1))))) {
++			if (is_it_toupper)
++				u8s2[0] = U8_ASCII_TOUPPER(*s2);
++			else if (is_it_tolower)
++				u8s2[0] = U8_ASCII_TOLOWER(*s2);
++			else
++				u8s2[0] = *s2;
++			u8s2[1] = '\0';
++			sz2 = 1;
++			s2++;
++		} else {
++			state = U8_STATE_START;
++			sz2 = collect_a_seq(uv, u8s2, &s2, s2last,
++			    is_it_toupper, is_it_tolower,
++			    canonical_decomposition,
++			    compatibility_decomposition,
++			    canonical_composition, errnum, &state);
++		}
++
++		/*
++		 * Now compare the two characters. If they are the same,
++		 * we move on to the next character sequences.
++		 */
++		if (sz1 == 1 && sz2 == 1) {
++			if (*u8s1 > *u8s2)
++				return (1);
++			if (*u8s1 < *u8s2)
++				return (-1);
++		} else {
++			result = strcmp((const char *)u8s1, (const char *)u8s2);
++			if (result != 0)
++				return (result);
++		}
++	}
++
++	/*
++	 * We compared until the end of either or both strings.
++	 *
++	 * If we reached to or went over the ends for the both, that means
++	 * they are the same.
++	 *
++	 * If we reached only one end, that means the other string has
++	 * something which then can be used to determine the return value.
++	 */
++	if (s1 >= s1last) {
++		if (s2 >= s2last)
++			return (0);
++		return (-1);
++	}
++	return (1);
++}
++
++/*
++ * The u8_strcmp() function compares two UTF-8 strings quite similar to
++ * the strcmp(). For the comparison, however, Unicode Normalization specific
++ * equivalency and Unicode simple case conversion mappings based equivalency
++ * can be requested and checked against.
++ */
++int
++u8_strcmp(const char *s1, const char *s2, size_t n, int flag, size_t uv,
++		int *errnum)
++{
++	int f;
++	size_t n1;
++	size_t n2;
++
++	*errnum = 0;
++
++	/*
++	 * Check on the requested Unicode version, case conversion, and
++	 * normalization flag values.
++	 */
++
++	if (uv > U8_UNICODE_LATEST) {
++		*errnum = ERANGE;
++		uv = U8_UNICODE_LATEST;
++	}
++
++	if (flag == 0) {
++		flag = U8_STRCMP_CS;
++	} else {
++		f = flag & (U8_STRCMP_CS | U8_STRCMP_CI_UPPER |
++		    U8_STRCMP_CI_LOWER);
++		if (f == 0) {
++			flag |= U8_STRCMP_CS;
++		} else if (f != U8_STRCMP_CS && f != U8_STRCMP_CI_UPPER &&
++		    f != U8_STRCMP_CI_LOWER) {
++			*errnum = EBADF;
++			flag = U8_STRCMP_CS;
++		}
++
++		f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP);
++		if (f && f != U8_STRCMP_NFD && f != U8_STRCMP_NFC &&
++		    f != U8_STRCMP_NFKD && f != U8_STRCMP_NFKC) {
++			*errnum = EBADF;
++			flag = U8_STRCMP_CS;
++		}
++	}
++
++	if (flag == U8_STRCMP_CS) {
++		return (n == 0 ? strcmp(s1, s2) : strncmp(s1, s2, n));
++	}
++
++	n1 = strlen(s1);
++	n2 = strlen(s2);
++	if (n != 0) {
++		if (n < n1)
++			n1 = n;
++		if (n < n2)
++			n2 = n;
++	}
++
++	/*
++	 * Simple case conversion can be done much faster and so we do
++	 * them separately here.
++	 */
++	if (flag == U8_STRCMP_CI_UPPER) {
++		return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2,
++		    n1, n2, B_TRUE, errnum));
++	} else if (flag == U8_STRCMP_CI_LOWER) {
++		return (do_case_compare(uv, (uchar_t *)s1, (uchar_t *)s2,
++		    n1, n2, B_FALSE, errnum));
++	}
++
++	return (do_norm_compare(uv, (uchar_t *)s1, (uchar_t *)s2, n1, n2,
++	    flag, errnum));
++}
++
++size_t
++u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen,
++	int flag, size_t unicode_version, int *errnum)
++{
++	int f;
++	int sz;
++	uchar_t *ib;
++	uchar_t *ibtail;
++	uchar_t *ob;
++	uchar_t *obtail;
++	boolean_t do_not_ignore_null;
++	boolean_t do_not_ignore_invalid;
++	boolean_t is_it_toupper;
++	boolean_t is_it_tolower;
++	boolean_t canonical_decomposition;
++	boolean_t compatibility_decomposition;
++	boolean_t canonical_composition;
++	size_t ret_val;
++	size_t i;
++	size_t j;
++	uchar_t u8s[U8_STREAM_SAFE_TEXT_MAX + 1];
++	u8_normalization_states_t state;
++
++	if (unicode_version > U8_UNICODE_LATEST) {
++		*errnum = ERANGE;
++		return ((size_t)-1);
++	}
++
++	f = flag & (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER);
++	if (f == (U8_TEXTPREP_TOUPPER | U8_TEXTPREP_TOLOWER)) {
++		*errnum = EBADF;
++		return ((size_t)-1);
++	}
++
++	f = flag & (U8_CANON_DECOMP | U8_COMPAT_DECOMP | U8_CANON_COMP);
++	if (f && f != U8_TEXTPREP_NFD && f != U8_TEXTPREP_NFC &&
++	    f != U8_TEXTPREP_NFKD && f != U8_TEXTPREP_NFKC) {
++		*errnum = EBADF;
++		return ((size_t)-1);
++	}
++
++	if (inarray == NULL || *inlen == 0)
++		return (0);
++
++	if (outarray == NULL) {
++		*errnum = E2BIG;
++		return ((size_t)-1);
++	}
++
++	ib = (uchar_t *)inarray;
++	ob = (uchar_t *)outarray;
++	ibtail = ib + *inlen;
++	obtail = ob + *outlen;
++
++	do_not_ignore_null = !(flag & U8_TEXTPREP_IGNORE_NULL);
++	do_not_ignore_invalid = !(flag & U8_TEXTPREP_IGNORE_INVALID);
++	is_it_toupper = flag & U8_TEXTPREP_TOUPPER;
++	is_it_tolower = flag & U8_TEXTPREP_TOLOWER;
++
++	ret_val = 0;
++
++	/*
++	 * If we don't have a normalization flag set, we do the simple case
++	 * conversion based text preparation separately below. Text
++	 * preparation involving Normalization will be done in the false task
++	 * block, again, separately since it will take much more time and
++	 * resource than doing simple case conversions.
++	 */
++	if (f == 0) {
++		while (ib < ibtail) {
++			if (*ib == '\0' && do_not_ignore_null)
++				break;
++
++			sz = u8_number_of_bytes[*ib];
++
++			if (sz < 0) {
++				if (do_not_ignore_invalid) {
++					*errnum = EILSEQ;
++					ret_val = (size_t)-1;
++					break;
++				}
++
++				sz = 1;
++				ret_val++;
++			}
++
++			if (sz == 1) {
++				if (ob >= obtail) {
++					*errnum = E2BIG;
++					ret_val = (size_t)-1;
++					break;
++				}
++
++				if (is_it_toupper)
++					*ob = U8_ASCII_TOUPPER(*ib);
++				else if (is_it_tolower)
++					*ob = U8_ASCII_TOLOWER(*ib);
++				else
++					*ob = *ib;
++				ib++;
++				ob++;
++			} else if ((ib + sz) > ibtail) {
++				if (do_not_ignore_invalid) {
++					*errnum = EINVAL;
++					ret_val = (size_t)-1;
++					break;
++				}
++
++				if ((obtail - ob) < (ibtail - ib)) {
++					*errnum = E2BIG;
++					ret_val = (size_t)-1;
++					break;
++				}
++
++				/*
++				 * We treat the remaining incomplete character
++				 * bytes as a character.
++				 */
++				ret_val++;
++
++				while (ib < ibtail)
++					*ob++ = *ib++;
++			} else {
++				if (is_it_toupper || is_it_tolower) {
++					i = do_case_conv(unicode_version, u8s,
++					    ib, sz, is_it_toupper);
++
++					if ((obtail - ob) < i) {
++						*errnum = E2BIG;
++						ret_val = (size_t)-1;
++						break;
++					}
++
++					ib += sz;
++
++					for (sz = 0; sz < i; sz++)
++						*ob++ = u8s[sz];
++				} else {
++					if ((obtail - ob) < sz) {
++						*errnum = E2BIG;
++						ret_val = (size_t)-1;
++						break;
++					}
++
++					for (i = 0; i < sz; i++)
++						*ob++ = *ib++;
++				}
++			}
++		}
++	} else {
++		canonical_decomposition = flag & U8_CANON_DECOMP;
++		compatibility_decomposition = flag & U8_COMPAT_DECOMP;
++		canonical_composition = flag & U8_CANON_COMP;
++
++		while (ib < ibtail) {
++			if (*ib == '\0' && do_not_ignore_null)
++				break;
++
++			/*
++			 * If the current character is a 7-bit ASCII
++			 * character and it is the last character, or,
++			 * if the current character is a 7-bit ASCII
++			 * character and the next character is also a 7-bit
++			 * ASCII character, then, we copy over this
++			 * character without going through collect_a_seq().
++			 *
++			 * In any other cases, we need to look further with
++			 * the collect_a_seq() function.
++			 */
++			if (U8_ISASCII(*ib) && ((ib + 1) >= ibtail ||
++			    ((ib + 1) < ibtail && U8_ISASCII(*(ib + 1))))) {
++				if (ob >= obtail) {
++					*errnum = E2BIG;
++					ret_val = (size_t)-1;
++					break;
++				}
++
++				if (is_it_toupper)
++					*ob = U8_ASCII_TOUPPER(*ib);
++				else if (is_it_tolower)
++					*ob = U8_ASCII_TOLOWER(*ib);
++				else
++					*ob = *ib;
++				ib++;
++				ob++;
++			} else {
++				*errnum = 0;
++				state = U8_STATE_START;
++
++				j = collect_a_seq(unicode_version, u8s,
++				    &ib, ibtail,
++				    is_it_toupper,
++				    is_it_tolower,
++				    canonical_decomposition,
++				    compatibility_decomposition,
++				    canonical_composition,
++				    errnum, &state);
++
++				if (*errnum && do_not_ignore_invalid) {
++					ret_val = (size_t)-1;
++					break;
++				}
++
++				if ((obtail - ob) < j) {
++					*errnum = E2BIG;
++					ret_val = (size_t)-1;
++					break;
++				}
++
++				for (i = 0; i < j; i++)
++					*ob++ = u8s[i];
++			}
++		}
++	}
++
++	*inlen = ibtail - ib;
++	*outlen = obtail - ob;
++
++	return (ret_val);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++
++static int unicode_init(void) { return 0; }
++static int unicode_fini(void) { return 0; }
++
++spl_module_init(unicode_init);
++spl_module_exit(unicode_fini);
++
++MODULE_DESCRIPTION("Unicode implementation");
++MODULE_AUTHOR(ZFS_META_AUTHOR);
++MODULE_LICENSE(ZFS_META_LICENSE);
++
++EXPORT_SYMBOL(u8_validate);
++EXPORT_SYMBOL(u8_strcmp);
++EXPORT_SYMBOL(u8_textprep_str);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/unicode/uconv.c linux-3.2.33-go/fs/zfs/unicode/uconv.c
+--- linux-3.2.33-go.orig/fs/zfs/unicode/uconv.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/unicode/uconv.c	2012-11-16 23:25:34.355039267 +0100
+@@ -0,0 +1,864 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++
++
++/*
++ * Unicode encoding conversion functions among UTF-8, UTF-16, and UTF-32.
++ * (PSARC/2005/446, PSARC/2007/038, PSARC/2007/517)
++ * Man pages: uconv_u16tou32(9F), uconv_u16tou8(9F), uconv_u32tou16(9F),
++ * uconv_u32tou8(9F), uconv_u8tou16(9F), and uconv_u8tou32(9F). See also
++ * the section 3C man pages.
++ * Interface stability: Committed
++ */
++
++#include <sys/types.h>
++#ifdef	_KERNEL
++#include <sys/param.h>
++#include <sys/sysmacros.h>
++#include <sys/systm.h>
++#include <sys/debug.h>
++#include <sys/kmem.h>
++#include <sys/sunddi.h>
++#else
++#include <sys/u8_textprep.h>
++#endif	/* _KERNEL */
++#include <sys/byteorder.h>
++#include <sys/errno.h>
++
++
++/*
++ * The max and min values of high and low surrogate pairs of UTF-16,
++ * UTF-16 bit shift value, bit mask, and starting value outside of BMP.
++ */
++#define	UCONV_U16_HI_MIN	(0xd800U)
++#define	UCONV_U16_HI_MAX	(0xdbffU)
++#define	UCONV_U16_LO_MIN	(0xdc00U)
++#define	UCONV_U16_LO_MAX	(0xdfffU)
++#define	UCONV_U16_BIT_SHIFT	(0x0400U)
++#define	UCONV_U16_BIT_MASK	(0x0fffffU)
++#define	UCONV_U16_START		(0x010000U)
++
++/* The maximum value of Unicode coding space and ASCII coding space. */
++#define	UCONV_UNICODE_MAX	(0x10ffffU)
++#define	UCONV_ASCII_MAX		(0x7fU)
++
++/* The mask values for input and output endians. */
++#define	UCONV_IN_ENDIAN_MASKS	(UCONV_IN_BIG_ENDIAN | UCONV_IN_LITTLE_ENDIAN)
++#define	UCONV_OUT_ENDIAN_MASKS	(UCONV_OUT_BIG_ENDIAN | UCONV_OUT_LITTLE_ENDIAN)
++
++/* Native and reversed endian macros. */
++#ifdef	_BIG_ENDIAN
++#define	UCONV_IN_NAT_ENDIAN	UCONV_IN_BIG_ENDIAN
++#define	UCONV_IN_REV_ENDIAN	UCONV_IN_LITTLE_ENDIAN
++#define	UCONV_OUT_NAT_ENDIAN	UCONV_OUT_BIG_ENDIAN
++#define	UCONV_OUT_REV_ENDIAN	UCONV_OUT_LITTLE_ENDIAN
++#else
++#define	UCONV_IN_NAT_ENDIAN	UCONV_IN_LITTLE_ENDIAN
++#define	UCONV_IN_REV_ENDIAN	UCONV_IN_BIG_ENDIAN
++#define	UCONV_OUT_NAT_ENDIAN	UCONV_OUT_LITTLE_ENDIAN
++#define	UCONV_OUT_REV_ENDIAN	UCONV_OUT_BIG_ENDIAN
++#endif	/* _BIG_ENDIAN */
++
++/* The Byte Order Mark (BOM) character in normal and reversed byte orderings. */
++#define	UCONV_BOM_NORMAL	(0xfeffU)
++#define	UCONV_BOM_SWAPPED	(0xfffeU)
++#define	UCONV_BOM_SWAPPED_32	(0xfffe0000U)
++
++/* UTF-32 boundaries based on UTF-8 character byte lengths. */
++#define	UCONV_U8_ONE_BYTE	(0x7fU)
++#define	UCONV_U8_TWO_BYTES	(0x7ffU)
++#define	UCONV_U8_THREE_BYTES	(0xffffU)
++#define	UCONV_U8_FOUR_BYTES	(0x10ffffU)
++
++/* The common minimum and maximum values at the UTF-8 character bytes. */
++#define	UCONV_U8_BYTE_MIN	(0x80U)
++#define	UCONV_U8_BYTE_MAX	(0xbfU)
++
++/*
++ * The following "6" and "0x3f" came from "10xx xxxx" bit representation of
++ * UTF-8 character bytes.
++ */
++#define	UCONV_U8_BIT_SHIFT	6
++#define	UCONV_U8_BIT_MASK	0x3f
++
++/*
++ * The following vector shows remaining bytes in a UTF-8 character.
++ * Index will be the first byte of the character.
++ */
++static const uchar_t remaining_bytes_tbl[0x100] = {
++	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
++	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
++	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
++	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
++	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
++	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
++	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
++	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
++	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
++	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
++	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
++	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
++
++/*	C0  C1  C2  C3  C4  C5  C6  C7  C8  C9  CA  CB  CC  CD  CE  CF */
++	0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
++
++/*	D0  D1  D2  D3  D4  D5  D6  D7  D8  D9  DA  DB  DC  DD  DE  DF */
++	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
++
++/*	E0  E1  E2  E3  E4  E5  E6  E7  E8  E9  EA  EB  EC  ED  EE  EF */
++	2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
++
++/*	F0  F1  F2  F3  F4  F5  F6  F7  F8  F9  FA  FB  FC  FD  FE  FF */
++	3,  3,  3,  3,  3,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
++};
++
++/*
++ * The following is a vector of bit-masks to get used bits in
++ * the first byte of a UTF-8 character.  Index is remaining bytes at above of
++ * the character.
++ */
++#ifdef	_KERNEL
++const uchar_t u8_masks_tbl[6] = { 0x00, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
++#else
++static const uchar_t u8_masks_tbl[6] = { 0x00, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
++#endif	/* _KERNEL */
++
++/*
++ * The following two vectors are to provide valid minimum and
++ * maximum values for the 2'nd byte of a multibyte UTF-8 character for
++ * better illegal sequence checking. The index value must be the value of
++ * the first byte of the UTF-8 character.
++ */
++static const uchar_t valid_min_2nd_byte[0x100] = {
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++
++/*	C0    C1    C2    C3    C4    C5    C6    C7 */
++	0,    0,    0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
++
++/*	C8    C9    CA    CB    CC    CD    CE    CF */
++	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
++
++/*	D0    D1    D2    D3    D4    D5    D6    D7 */
++	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
++
++/*	D8    D9    DA    DB    DC    DD    DE    DF */
++	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
++
++/*	E0    E1    E2    E3    E4    E5    E6    E7 */
++	0xa0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
++
++/*	E8    E9    EA    EB    EC    ED    EE    EF */
++	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
++
++/*	F0    F1    F2    F3    F4    F5    F6    F7 */
++	0x90, 0x80, 0x80, 0x80, 0x80, 0,    0,    0,
++
++	0,    0,    0,    0,    0,    0,    0,    0
++};
++
++static const uchar_t valid_max_2nd_byte[0x100] = {
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++	0,    0,    0,    0,    0,    0,    0,    0,
++
++/*	C0    C1    C2    C3    C4    C5    C6    C7 */
++	0,    0,    0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
++
++/*	C8    C9    CA    CB    CC    CD    CE    CF */
++	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
++
++/*	D0    D1    D2    D3    D4    D5    D6    D7 */
++	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
++
++/*	D8    D9    DA    DB    DC    DD    DE    DF */
++	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
++
++/*	E0    E1    E2    E3    E4    E5    E6    E7 */
++	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
++
++/*	E8    E9    EA    EB    EC    ED    EE    EF */
++	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x9f, 0xbf, 0xbf,
++
++/*	F0    F1    F2    F3    F4    F5    F6    F7 */
++	0xbf, 0xbf, 0xbf, 0xbf, 0x8f, 0,    0,    0,
++
++	0,    0,    0,    0,    0,    0,    0,    0
++};
++
++
++static int
++check_endian(int flag, int *in, int *out)
++{
++	*in = flag & UCONV_IN_ENDIAN_MASKS;
++
++	/* You cannot have both. */
++	if (*in == UCONV_IN_ENDIAN_MASKS)
++		return (EBADF);
++
++	if (*in == 0)
++		*in = UCONV_IN_NAT_ENDIAN;
++
++	*out = flag & UCONV_OUT_ENDIAN_MASKS;
++
++	/* You cannot have both. */
++	if (*out == UCONV_OUT_ENDIAN_MASKS)
++		return (EBADF);
++
++	if (*out == 0)
++		*out = UCONV_OUT_NAT_ENDIAN;
++
++	return (0);
++}
++
++static boolean_t
++check_bom16(const uint16_t *u16s, size_t u16l, int *in)
++{
++	if (u16l > 0) {
++		if (*u16s == UCONV_BOM_NORMAL) {
++			*in = UCONV_IN_NAT_ENDIAN;
++			return (B_TRUE);
++		}
++		if (*u16s == UCONV_BOM_SWAPPED) {
++			*in = UCONV_IN_REV_ENDIAN;
++			return (B_TRUE);
++		}
++	}
++
++	return (B_FALSE);
++}
++
++static boolean_t
++check_bom32(const uint32_t *u32s, size_t u32l, int *in)
++{
++	if (u32l > 0) {
++		if (*u32s == UCONV_BOM_NORMAL) {
++			*in = UCONV_IN_NAT_ENDIAN;
++			return (B_TRUE);
++		}
++		if (*u32s == UCONV_BOM_SWAPPED_32) {
++			*in = UCONV_IN_REV_ENDIAN;
++			return (B_TRUE);
++		}
++	}
++
++	return (B_FALSE);
++}
++
++int
++uconv_u16tou32(const uint16_t *u16s, size_t *utf16len,
++    uint32_t *u32s, size_t *utf32len, int flag)
++{
++	int inendian;
++	int outendian;
++	size_t u16l;
++	size_t u32l;
++	uint32_t hi;
++	uint32_t lo;
++	boolean_t do_not_ignore_null;
++
++	/*
++	 * Do preliminary validity checks on parameters and collect info on
++	 * endians.
++	 */
++	if (u16s == NULL || utf16len == NULL)
++		return (EILSEQ);
++
++	if (u32s == NULL || utf32len == NULL)
++		return (E2BIG);
++
++	if (check_endian(flag, &inendian, &outendian) != 0)
++		return (EBADF);
++
++	/*
++	 * Initialize input and output parameter buffer indices and
++	 * temporary variables.
++	 */
++	u16l = u32l = 0;
++	hi = 0;
++	do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0);
++
++	/*
++	 * Check on the BOM at the beginning of the input buffer if required
++	 * and if there is indeed one, process it.
++	 */
++	if ((flag & UCONV_IN_ACCEPT_BOM) &&
++	    check_bom16(u16s, *utf16len, &inendian))
++		u16l++;
++
++	/*
++	 * Reset inendian and outendian so that after this point, those can be
++	 * used as condition values.
++	 */
++	inendian &= UCONV_IN_NAT_ENDIAN;
++	outendian &= UCONV_OUT_NAT_ENDIAN;
++
++	/*
++	 * If there is something in the input buffer and if necessary and
++	 * requested, save the BOM at the output buffer.
++	 */
++	if (*utf16len > 0 && *utf32len > 0 && (flag & UCONV_OUT_EMIT_BOM))
++		u32s[u32l++] = (outendian) ? UCONV_BOM_NORMAL :
++		    UCONV_BOM_SWAPPED_32;
++
++	/*
++	 * Do conversion; if encounter a surrogate pair, assemble high and
++	 * low pair values to form a UTF-32 character. If a half of a pair
++	 * exists alone, then, either it is an illegal (EILSEQ) or
++	 * invalid (EINVAL) value.
++	 */
++	for (; u16l < *utf16len; u16l++) {
++		if (u16s[u16l] == 0 && do_not_ignore_null)
++			break;
++
++		lo = (uint32_t)((inendian) ? u16s[u16l] : BSWAP_16(u16s[u16l]));
++
++		if (lo >= UCONV_U16_HI_MIN && lo <= UCONV_U16_HI_MAX) {
++			if (hi)
++				return (EILSEQ);
++			hi = lo;
++			continue;
++		} else if (lo >= UCONV_U16_LO_MIN && lo <= UCONV_U16_LO_MAX) {
++			if (! hi)
++				return (EILSEQ);
++			lo = (((hi - UCONV_U16_HI_MIN) * UCONV_U16_BIT_SHIFT +
++			    lo - UCONV_U16_LO_MIN) & UCONV_U16_BIT_MASK)
++			    + UCONV_U16_START;
++			hi = 0;
++		} else if (hi) {
++			return (EILSEQ);
++		}
++
++		if (u32l >= *utf32len)
++			return (E2BIG);
++
++		u32s[u32l++] = (outendian) ? lo : BSWAP_32(lo);
++	}
++
++	/*
++	 * If high half didn't see low half, then, it's most likely the input
++	 * parameter is incomplete.
++	 */
++	if (hi)
++		return (EINVAL);
++
++	/*
++	 * Save the number of consumed and saved characters. They do not
++	 * include terminating NULL character (U+0000) at the end of
++	 * the input buffer (even when UCONV_IGNORE_NULL isn't specified and
++	 * the input buffer length is big enough to include the terminating
++	 * NULL character).
++	 */
++	*utf16len = u16l;
++	*utf32len = u32l;
++
++	return (0);
++}
++
++int
++uconv_u16tou8(const uint16_t *u16s, size_t *utf16len,
++    uchar_t *u8s, size_t *utf8len, int flag)
++{
++	int inendian;
++	int outendian;
++	size_t u16l;
++	size_t u8l;
++	uint32_t hi;
++	uint32_t lo;
++	boolean_t do_not_ignore_null;
++
++	if (u16s == NULL || utf16len == NULL)
++		return (EILSEQ);
++
++	if (u8s == NULL || utf8len == NULL)
++		return (E2BIG);
++
++	if (check_endian(flag, &inendian, &outendian) != 0)
++		return (EBADF);
++
++	u16l = u8l = 0;
++	hi = 0;
++	do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0);
++
++	if ((flag & UCONV_IN_ACCEPT_BOM) &&
++	    check_bom16(u16s, *utf16len, &inendian))
++		u16l++;
++
++	inendian &= UCONV_IN_NAT_ENDIAN;
++
++	for (; u16l < *utf16len; u16l++) {
++		if (u16s[u16l] == 0 && do_not_ignore_null)
++			break;
++
++		lo = (uint32_t)((inendian) ? u16s[u16l] : BSWAP_16(u16s[u16l]));
++
++		if (lo >= UCONV_U16_HI_MIN && lo <= UCONV_U16_HI_MAX) {
++			if (hi)
++				return (EILSEQ);
++			hi = lo;
++			continue;
++		} else if (lo >= UCONV_U16_LO_MIN && lo <= UCONV_U16_LO_MAX) {
++			if (! hi)
++				return (EILSEQ);
++			lo = (((hi - UCONV_U16_HI_MIN) * UCONV_U16_BIT_SHIFT +
++			    lo - UCONV_U16_LO_MIN) & UCONV_U16_BIT_MASK)
++			    + UCONV_U16_START;
++			hi = 0;
++		} else if (hi) {
++			return (EILSEQ);
++		}
++
++		/*
++		 * Now we convert a UTF-32 character into a UTF-8 character.
++		 * Unicode coding space is between U+0000 and U+10FFFF;
++		 * anything bigger is an illegal character.
++		 */
++		if (lo <= UCONV_U8_ONE_BYTE) {
++			if (u8l >= *utf8len)
++				return (E2BIG);
++			u8s[u8l++] = (uchar_t)lo;
++		} else if (lo <= UCONV_U8_TWO_BYTES) {
++			if ((u8l + 1) >= *utf8len)
++				return (E2BIG);
++			u8s[u8l++] = (uchar_t)(0xc0 | ((lo & 0x07c0) >> 6));
++			u8s[u8l++] = (uchar_t)(0x80 |  (lo & 0x003f));
++		} else if (lo <= UCONV_U8_THREE_BYTES) {
++			if ((u8l + 2) >= *utf8len)
++				return (E2BIG);
++			u8s[u8l++] = (uchar_t)(0xe0 | ((lo & 0x0f000) >> 12));
++			u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x00fc0) >> 6));
++			u8s[u8l++] = (uchar_t)(0x80 |  (lo & 0x0003f));
++		} else if (lo <= UCONV_U8_FOUR_BYTES) {
++			if ((u8l + 3) >= *utf8len)
++				return (E2BIG);
++			u8s[u8l++] = (uchar_t)(0xf0 | ((lo & 0x01c0000) >> 18));
++			u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x003f000) >> 12));
++			u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x0000fc0) >> 6));
++			u8s[u8l++] = (uchar_t)(0x80 |  (lo & 0x000003f));
++		} else {
++			return (EILSEQ);
++		}
++	}
++
++	if (hi)
++		return (EINVAL);
++
++	*utf16len = u16l;
++	*utf8len = u8l;
++
++	return (0);
++}
++
++int
++uconv_u32tou16(const uint32_t *u32s, size_t *utf32len,
++    uint16_t *u16s, size_t *utf16len, int flag)
++{
++	int inendian;
++	int outendian;
++	size_t u16l;
++	size_t u32l;
++	uint32_t hi;
++	uint32_t lo;
++	boolean_t do_not_ignore_null;
++
++	if (u32s == NULL || utf32len == NULL)
++		return (EILSEQ);
++
++	if (u16s == NULL || utf16len == NULL)
++		return (E2BIG);
++
++	if (check_endian(flag, &inendian, &outendian) != 0)
++		return (EBADF);
++
++	u16l = u32l = 0;
++	do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0);
++
++	if ((flag & UCONV_IN_ACCEPT_BOM) &&
++	    check_bom32(u32s, *utf32len, &inendian))
++		u32l++;
++
++	inendian &= UCONV_IN_NAT_ENDIAN;
++	outendian &= UCONV_OUT_NAT_ENDIAN;
++
++	if (*utf32len > 0 && *utf16len > 0 && (flag & UCONV_OUT_EMIT_BOM))
++		u16s[u16l++] = (outendian) ? UCONV_BOM_NORMAL :
++		    UCONV_BOM_SWAPPED;
++
++	for (; u32l < *utf32len; u32l++) {
++		if (u32s[u32l] == 0 && do_not_ignore_null)
++			break;
++
++		hi = (inendian) ? u32s[u32l] : BSWAP_32(u32s[u32l]);
++
++		/*
++		 * Anything bigger than the Unicode coding space, i.e.,
++		 * Unicode scalar value bigger than U+10FFFF, is an illegal
++		 * character.
++		 */
++		if (hi > UCONV_UNICODE_MAX)
++			return (EILSEQ);
++
++		/*
++		 * Anything bigger than U+FFFF must be converted into
++		 * a surrogate pair in UTF-16.
++		 */
++		if (hi >= UCONV_U16_START) {
++			lo = ((hi - UCONV_U16_START) % UCONV_U16_BIT_SHIFT) +
++			    UCONV_U16_LO_MIN;
++			hi = ((hi - UCONV_U16_START) / UCONV_U16_BIT_SHIFT) +
++			    UCONV_U16_HI_MIN;
++
++			if ((u16l + 1) >= *utf16len)
++				return (E2BIG);
++
++			if (outendian) {
++				u16s[u16l++] = (uint16_t)hi;
++				u16s[u16l++] = (uint16_t)lo;
++			} else {
++				u16s[u16l++] = BSWAP_16(((uint16_t)hi));
++				u16s[u16l++] = BSWAP_16(((uint16_t)lo));
++			}
++		} else {
++			if (u16l >= *utf16len)
++				return (E2BIG);
++			u16s[u16l++] = (outendian) ? (uint16_t)hi :
++			    BSWAP_16(((uint16_t)hi));
++		}
++	}
++
++	*utf16len = u16l;
++	*utf32len = u32l;
++
++	return (0);
++}
++
++int
++uconv_u32tou8(const uint32_t *u32s, size_t *utf32len,
++    uchar_t *u8s, size_t *utf8len, int flag)
++{
++	int inendian;
++	int outendian;
++	size_t u32l;
++	size_t u8l;
++	uint32_t lo;
++	boolean_t do_not_ignore_null;
++
++	if (u32s == NULL || utf32len == NULL)
++		return (EILSEQ);
++
++	if (u8s == NULL || utf8len == NULL)
++		return (E2BIG);
++
++	if (check_endian(flag, &inendian, &outendian) != 0)
++		return (EBADF);
++
++	u32l = u8l = 0;
++	do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0);
++
++	if ((flag & UCONV_IN_ACCEPT_BOM) &&
++	    check_bom32(u32s, *utf32len, &inendian))
++		u32l++;
++
++	inendian &= UCONV_IN_NAT_ENDIAN;
++
++	for (; u32l < *utf32len; u32l++) {
++		if (u32s[u32l] == 0 && do_not_ignore_null)
++			break;
++
++		lo = (inendian) ? u32s[u32l] : BSWAP_32(u32s[u32l]);
++
++		if (lo <= UCONV_U8_ONE_BYTE) {
++			if (u8l >= *utf8len)
++				return (E2BIG);
++			u8s[u8l++] = (uchar_t)lo;
++		} else if (lo <= UCONV_U8_TWO_BYTES) {
++			if ((u8l + 1) >= *utf8len)
++				return (E2BIG);
++			u8s[u8l++] = (uchar_t)(0xc0 | ((lo & 0x07c0) >> 6));
++			u8s[u8l++] = (uchar_t)(0x80 |  (lo & 0x003f));
++		} else if (lo <= UCONV_U8_THREE_BYTES) {
++			if ((u8l + 2) >= *utf8len)
++				return (E2BIG);
++			u8s[u8l++] = (uchar_t)(0xe0 | ((lo & 0x0f000) >> 12));
++			u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x00fc0) >> 6));
++			u8s[u8l++] = (uchar_t)(0x80 |  (lo & 0x0003f));
++		} else if (lo <= UCONV_U8_FOUR_BYTES) {
++			if ((u8l + 3) >= *utf8len)
++				return (E2BIG);
++			u8s[u8l++] = (uchar_t)(0xf0 | ((lo & 0x01c0000) >> 18));
++			u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x003f000) >> 12));
++			u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x0000fc0) >> 6));
++			u8s[u8l++] = (uchar_t)(0x80 |  (lo & 0x000003f));
++		} else {
++			return (EILSEQ);
++		}
++	}
++
++	*utf32len = u32l;
++	*utf8len = u8l;
++
++	return (0);
++}
++
++int
++uconv_u8tou16(const uchar_t *u8s, size_t *utf8len,
++    uint16_t *u16s, size_t *utf16len, int flag)
++{
++	int inendian;
++	int outendian;
++	size_t u16l;
++	size_t u8l;
++	uint32_t hi;
++	uint32_t lo;
++	int remaining_bytes;
++	int first_b;
++	boolean_t do_not_ignore_null;
++
++	if (u8s == NULL || utf8len == NULL)
++		return (EILSEQ);
++
++	if (u16s == NULL || utf16len == NULL)
++		return (E2BIG);
++
++	if (check_endian(flag, &inendian, &outendian) != 0)
++		return (EBADF);
++
++	u16l = u8l = 0;
++	do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0);
++
++	outendian &= UCONV_OUT_NAT_ENDIAN;
++
++	if (*utf8len > 0 && *utf16len > 0 && (flag & UCONV_OUT_EMIT_BOM))
++		u16s[u16l++] = (outendian) ? UCONV_BOM_NORMAL :
++		    UCONV_BOM_SWAPPED;
++
++	for (; u8l < *utf8len; ) {
++		if (u8s[u8l] == 0 && do_not_ignore_null)
++			break;
++
++		/*
++		 * Collect a UTF-8 character and convert it to a UTF-32
++		 * character. In doing so, we screen out illegally formed
++		 * UTF-8 characters and treat such as illegal characters.
++		 * The algorithm at below also screens out anything bigger
++		 * than the U+10FFFF.
++		 *
++		 * See Unicode 3.1 UTF-8 Corrigendum and Unicode 3.2 for
++		 * more details on the illegal values of UTF-8 character
++		 * bytes.
++		 */
++		hi = (uint32_t)u8s[u8l++];
++
++		if (hi > UCONV_ASCII_MAX) {
++			if ((remaining_bytes = remaining_bytes_tbl[hi]) == 0)
++				return (EILSEQ);
++
++			first_b = hi;
++			hi = hi & u8_masks_tbl[remaining_bytes];
++
++			for (; remaining_bytes > 0; remaining_bytes--) {
++				/*
++				 * If we have no more bytes, the current
++				 * UTF-8 character is incomplete.
++				 */
++				if (u8l >= *utf8len)
++					return (EINVAL);
++
++				lo = (uint32_t)u8s[u8l++];
++
++				if (first_b) {
++					if (lo < valid_min_2nd_byte[first_b] ||
++					    lo > valid_max_2nd_byte[first_b])
++						return (EILSEQ);
++					first_b = 0;
++				} else if (lo < UCONV_U8_BYTE_MIN ||
++				    lo > UCONV_U8_BYTE_MAX) {
++					return (EILSEQ);
++				}
++				hi = (hi << UCONV_U8_BIT_SHIFT) |
++				    (lo & UCONV_U8_BIT_MASK);
++			}
++		}
++
++		if (hi >= UCONV_U16_START) {
++			lo = ((hi - UCONV_U16_START) % UCONV_U16_BIT_SHIFT) +
++			    UCONV_U16_LO_MIN;
++			hi = ((hi - UCONV_U16_START) / UCONV_U16_BIT_SHIFT) +
++			    UCONV_U16_HI_MIN;
++
++			if ((u16l + 1) >= *utf16len)
++				return (E2BIG);
++
++			if (outendian) {
++				u16s[u16l++] = (uint16_t)hi;
++				u16s[u16l++] = (uint16_t)lo;
++			} else {
++				u16s[u16l++] = BSWAP_16(((uint16_t)hi));
++				u16s[u16l++] = BSWAP_16(((uint16_t)lo));
++			}
++		} else {
++			if (u16l >= *utf16len)
++				return (E2BIG);
++
++			u16s[u16l++] = (outendian) ? (uint16_t)hi :
++			    BSWAP_16(((uint16_t)hi));
++		}
++	}
++
++	*utf16len = u16l;
++	*utf8len = u8l;
++
++	return (0);
++}
++
++int
++uconv_u8tou32(const uchar_t *u8s, size_t *utf8len,
++    uint32_t *u32s, size_t *utf32len, int flag)
++{
++	int inendian;
++	int outendian;
++	size_t u32l;
++	size_t u8l;
++	uint32_t hi;
++	uint32_t c;
++	int remaining_bytes;
++	int first_b;
++	boolean_t do_not_ignore_null;
++
++	if (u8s == NULL || utf8len == NULL)
++		return (EILSEQ);
++
++	if (u32s == NULL || utf32len == NULL)
++		return (E2BIG);
++
++	if (check_endian(flag, &inendian, &outendian) != 0)
++		return (EBADF);
++
++	u32l = u8l = 0;
++	do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0);
++
++	outendian &= UCONV_OUT_NAT_ENDIAN;
++
++	if (*utf8len > 0 && *utf32len > 0 && (flag & UCONV_OUT_EMIT_BOM))
++		u32s[u32l++] = (outendian) ? UCONV_BOM_NORMAL :
++		    UCONV_BOM_SWAPPED_32;
++
++	for (; u8l < *utf8len; ) {
++		if (u8s[u8l] == 0 && do_not_ignore_null)
++			break;
++
++		hi = (uint32_t)u8s[u8l++];
++
++		if (hi > UCONV_ASCII_MAX) {
++			if ((remaining_bytes = remaining_bytes_tbl[hi]) == 0)
++				return (EILSEQ);
++
++			first_b = hi;
++			hi = hi & u8_masks_tbl[remaining_bytes];
++
++			for (; remaining_bytes > 0; remaining_bytes--) {
++				if (u8l >= *utf8len)
++					return (EINVAL);
++
++				c = (uint32_t)u8s[u8l++];
++
++				if (first_b) {
++					if (c < valid_min_2nd_byte[first_b] ||
++					    c > valid_max_2nd_byte[first_b])
++						return (EILSEQ);
++					first_b = 0;
++				} else if (c < UCONV_U8_BYTE_MIN ||
++				    c > UCONV_U8_BYTE_MAX) {
++					return (EILSEQ);
++				}
++				hi = (hi << UCONV_U8_BIT_SHIFT) |
++				    (c & UCONV_U8_BIT_MASK);
++			}
++		}
++
++		if (u32l >= *utf32len)
++			return (E2BIG);
++
++		u32s[u32l++] = (outendian) ? hi : BSWAP_32(hi);
++	}
++
++	*utf32len = u32l;
++	*utf8len = u8l;
++
++	return (0);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(uconv_u16tou32);
++EXPORT_SYMBOL(uconv_u16tou8);
++EXPORT_SYMBOL(uconv_u32tou16);
++EXPORT_SYMBOL(uconv_u32tou8);
++EXPORT_SYMBOL(uconv_u8tou16);
++EXPORT_SYMBOL(uconv_u8tou32);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zcommon/Makefile linux-3.2.33-go/fs/zfs/zcommon/Makefile
+--- linux-3.2.33-go.orig/fs/zfs/zcommon/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zcommon/Makefile	2012-11-16 23:25:34.364039163 +0100
+@@ -0,0 +1,14 @@
++MODULE := zcommon
++
++EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS)  -Wno-unused-but-set-variable -DHAVE_SPL -D_KERNEL -DTEXT_DOMAIN=\"zfs-linux-kernel\" -DNDEBUG 
++
++obj-$(CONFIG_ZFS) := $(MODULE).o
++
++$(MODULE)-objs += zfs_deleg.o
++$(MODULE)-objs += zfs_prop.o
++$(MODULE)-objs += zprop_common.o
++$(MODULE)-objs += zfs_namecheck.o
++$(MODULE)-objs += zfs_comutil.o
++$(MODULE)-objs += zfs_fletcher.o
++$(MODULE)-objs += zfs_uio.o
++$(MODULE)-objs += zpool_prop.o
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zcommon/Makefile.in linux-3.2.33-go/fs/zfs/zcommon/Makefile.in
+--- linux-3.2.33-go.orig/fs/zfs/zcommon/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zcommon/Makefile.in	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,14 @@
++MODULE := zcommon
++
++EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS) @KERNELCPPFLAGS@
++
++obj-$(CONFIG_ZFS) := $(MODULE).o
++
++$(MODULE)-objs += @top_srcdir@/module/zcommon/zfs_deleg.o
++$(MODULE)-objs += @top_srcdir@/module/zcommon/zfs_prop.o
++$(MODULE)-objs += @top_srcdir@/module/zcommon/zprop_common.o
++$(MODULE)-objs += @top_srcdir@/module/zcommon/zfs_namecheck.o
++$(MODULE)-objs += @top_srcdir@/module/zcommon/zfs_comutil.o
++$(MODULE)-objs += @top_srcdir@/module/zcommon/zfs_fletcher.o
++$(MODULE)-objs += @top_srcdir@/module/zcommon/zfs_uio.o
++$(MODULE)-objs += @top_srcdir@/module/zcommon/zpool_prop.o
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zcommon/zfs_comutil.c linux-3.2.33-go/fs/zfs/zcommon/zfs_comutil.c
+--- linux-3.2.33-go.orig/fs/zfs/zcommon/zfs_comutil.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zcommon/zfs_comutil.c	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,210 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/*
++ * This file is intended for functions that ought to be common between user
++ * land (libzfs) and the kernel. When many common routines need to be shared
++ * then a separate file should to be created.
++ */
++
++#if defined(_KERNEL)
++#include <sys/systm.h>
++#else
++#include <string.h>
++#endif
++
++#include <sys/types.h>
++#include <sys/fs/zfs.h>
++#include <sys/int_limits.h>
++#include <sys/nvpair.h>
++#include "zfs_comutil.h"
++
++/*
++ * Are there allocatable vdevs?
++ */
++boolean_t
++zfs_allocatable_devs(nvlist_t *nv)
++{
++	uint64_t is_log;
++	uint_t c;
++	nvlist_t **child;
++	uint_t children;
++
++	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
++	    &child, &children) != 0) {
++		return (B_FALSE);
++	}
++	for (c = 0; c < children; c++) {
++		is_log = 0;
++		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
++		    &is_log);
++		if (!is_log)
++			return (B_TRUE);
++	}
++	return (B_FALSE);
++}
++
++void
++zpool_get_rewind_policy(nvlist_t *nvl, zpool_rewind_policy_t *zrpp)
++{
++	nvlist_t *policy;
++	nvpair_t *elem;
++	char *nm;
++
++	/* Defaults */
++	zrpp->zrp_request = ZPOOL_NO_REWIND;
++	zrpp->zrp_maxmeta = 0;
++	zrpp->zrp_maxdata = UINT64_MAX;
++	zrpp->zrp_txg = UINT64_MAX;
++
++	if (nvl == NULL)
++		return;
++
++	elem = NULL;
++	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
++		nm = nvpair_name(elem);
++		if (strcmp(nm, ZPOOL_REWIND_POLICY) == 0) {
++			if (nvpair_value_nvlist(elem, &policy) == 0)
++				zpool_get_rewind_policy(policy, zrpp);
++			return;
++		} else if (strcmp(nm, ZPOOL_REWIND_REQUEST) == 0) {
++			if (nvpair_value_uint32(elem, &zrpp->zrp_request) == 0)
++				if (zrpp->zrp_request & ~ZPOOL_REWIND_POLICIES)
++					zrpp->zrp_request = ZPOOL_NO_REWIND;
++		} else if (strcmp(nm, ZPOOL_REWIND_REQUEST_TXG) == 0) {
++			(void) nvpair_value_uint64(elem, &zrpp->zrp_txg);
++		} else if (strcmp(nm, ZPOOL_REWIND_META_THRESH) == 0) {
++			(void) nvpair_value_uint64(elem, &zrpp->zrp_maxmeta);
++		} else if (strcmp(nm, ZPOOL_REWIND_DATA_THRESH) == 0) {
++			(void) nvpair_value_uint64(elem, &zrpp->zrp_maxdata);
++		}
++	}
++	if (zrpp->zrp_request == 0)
++		zrpp->zrp_request = ZPOOL_NO_REWIND;
++}
++
++typedef struct zfs_version_spa_map {
++	int	version_zpl;
++	int	version_spa;
++} zfs_version_spa_map_t;
++
++/*
++ * Keep this table in monotonically increasing version number order.
++ */
++static zfs_version_spa_map_t zfs_version_table[] = {
++	{ZPL_VERSION_INITIAL, SPA_VERSION_INITIAL},
++	{ZPL_VERSION_DIRENT_TYPE, SPA_VERSION_INITIAL},
++	{ZPL_VERSION_FUID, SPA_VERSION_FUID},
++	{ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE},
++	{ZPL_VERSION_SA, SPA_VERSION_SA},
++	{0, 0}
++};
++
++/*
++ * Return the max zpl version for a corresponding spa version
++ * -1 is returned if no mapping exists.
++ */
++int
++zfs_zpl_version_map(int spa_version)
++{
++	int i;
++	int version = -1;
++
++	for (i = 0; zfs_version_table[i].version_spa; i++) {
++		if (spa_version >= zfs_version_table[i].version_spa)
++			version = zfs_version_table[i].version_zpl;
++	}
++
++	return (version);
++}
++
++/*
++ * Return the min spa version for a corresponding spa version
++ * -1 is returned if no mapping exists.
++ */
++int
++zfs_spa_version_map(int zpl_version)
++{
++	int i;
++	int version = -1;
++
++	for (i = 0; zfs_version_table[i].version_zpl; i++) {
++		if (zfs_version_table[i].version_zpl >= zpl_version)
++			return (zfs_version_table[i].version_spa);
++	}
++
++	return (version);
++}
++
++const char *zfs_history_event_names[LOG_END] = {
++	"invalid event",
++	"pool create",
++	"vdev add",
++	"pool remove",
++	"pool destroy",
++	"pool export",
++	"pool import",
++	"vdev attach",
++	"vdev replace",
++	"vdev detach",
++	"vdev online",
++	"vdev offline",
++	"vdev upgrade",
++	"pool clear",
++	"pool scrub",
++	"pool property set",
++	"create",
++	"clone",
++	"destroy",
++	"destroy_begin_sync",
++	"inherit",
++	"property set",
++	"quota set",
++	"permission update",
++	"permission remove",
++	"permission who remove",
++	"promote",
++	"receive",
++	"rename",
++	"reservation set",
++	"replay_inc_sync",
++	"replay_full_sync",
++	"rollback",
++	"snapshot",
++	"filesystem version upgrade",
++	"refquota set",
++	"refreservation set",
++	"pool scrub done",
++	"user hold",
++	"user release",
++	"pool split",
++};
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(zfs_allocatable_devs);
++EXPORT_SYMBOL(zpool_get_rewind_policy);
++EXPORT_SYMBOL(zfs_zpl_version_map);
++EXPORT_SYMBOL(zfs_spa_version_map);
++EXPORT_SYMBOL(zfs_history_event_names);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zcommon/zfs_deleg.c linux-3.2.33-go/fs/zfs/zcommon/zfs_deleg.c
+--- linux-3.2.33-go.orig/fs/zfs/zcommon/zfs_deleg.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zcommon/zfs_deleg.c	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,244 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
++ */
++
++#if defined(_KERNEL)
++#include <sys/systm.h>
++#include <sys/sunddi.h>
++#include <sys/ctype.h>
++#else
++#include <stdio.h>
++#include <unistd.h>
++#include <strings.h>
++#include <libnvpair.h>
++#include <ctype.h>
++#endif
++/* XXX includes zfs_context.h, so why bother with the above? */
++#include <sys/dsl_deleg.h>
++#include "zfs_prop.h"
++#include "zfs_deleg.h"
++#include "zfs_namecheck.h"
++
++/*
++ * permission table
++ *
++ * Keep this table in sorted order
++ *
++ * This table is used for displaying all permissions for
++ * zfs allow
++ */
++
++zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
++	{ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW},
++	{ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE },
++	{ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE },
++	{ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY },
++	{ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT },
++	{ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE },
++	{ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE },
++	{ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME },
++	{ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
++	{ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
++	{ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
++	{ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND },
++	{ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
++	{ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
++	{ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
++	{ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED },
++	{ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
++	{ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD },
++	{ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE },
++	{ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF},
++	{NULL, ZFS_DELEG_NOTE_NONE }
++};
++
++static int
++zfs_valid_permission_name(const char *perm)
++{
++	if (zfs_deleg_canonicalize_perm(perm))
++		return (0);
++
++	return (permset_namecheck(perm, NULL, NULL));
++}
++
++const char *
++zfs_deleg_canonicalize_perm(const char *perm)
++{
++	int i;
++	zfs_prop_t prop;
++
++	for (i = 0; zfs_deleg_perm_tab[i].z_perm != NULL; i++) {
++		if (strcmp(perm, zfs_deleg_perm_tab[i].z_perm) == 0)
++			return (perm);
++	}
++
++	prop = zfs_name_to_prop(perm);
++	if (prop != ZPROP_INVAL && zfs_prop_delegatable(prop))
++		return (zfs_prop_to_name(prop));
++	return (NULL);
++
++}
++
++static int
++zfs_validate_who(char *who)
++{
++	char *p;
++
++	if (who[2] != ZFS_DELEG_FIELD_SEP_CHR)
++		return (-1);
++
++	switch (who[0]) {
++	case ZFS_DELEG_USER:
++	case ZFS_DELEG_GROUP:
++	case ZFS_DELEG_USER_SETS:
++	case ZFS_DELEG_GROUP_SETS:
++		if (who[1] != ZFS_DELEG_LOCAL && who[1] != ZFS_DELEG_DESCENDENT)
++			return (-1);
++		for (p = &who[3]; *p; p++)
++			if (!isdigit(*p))
++				return (-1);
++		break;
++
++	case ZFS_DELEG_NAMED_SET:
++	case ZFS_DELEG_NAMED_SET_SETS:
++		if (who[1] != ZFS_DELEG_NA)
++			return (-1);
++		return (permset_namecheck(&who[3], NULL, NULL));
++
++	case ZFS_DELEG_CREATE:
++	case ZFS_DELEG_CREATE_SETS:
++		if (who[1] != ZFS_DELEG_NA)
++			return (-1);
++		if (who[3] != '\0')
++			return (-1);
++		break;
++
++	case ZFS_DELEG_EVERYONE:
++	case ZFS_DELEG_EVERYONE_SETS:
++		if (who[1] != ZFS_DELEG_LOCAL && who[1] != ZFS_DELEG_DESCENDENT)
++			return (-1);
++		if (who[3] != '\0')
++			return (-1);
++		break;
++
++	default:
++		return (-1);
++	}
++
++	return (0);
++}
++
++int
++zfs_deleg_verify_nvlist(nvlist_t *nvp)
++{
++	nvpair_t *who, *perm_name;
++	nvlist_t *perms;
++	int error;
++
++	if (nvp == NULL)
++		return (-1);
++
++	who = nvlist_next_nvpair(nvp, NULL);
++	if (who == NULL)
++		return (-1);
++
++	do {
++		if (zfs_validate_who(nvpair_name(who)))
++			return (-1);
++
++		error = nvlist_lookup_nvlist(nvp, nvpair_name(who), &perms);
++
++		if (error && error != ENOENT)
++			return (-1);
++		if (error == ENOENT)
++			continue;
++
++		perm_name = nvlist_next_nvpair(perms, NULL);
++		if (perm_name == NULL) {
++			return (-1);
++		}
++		do {
++			error = zfs_valid_permission_name(
++			    nvpair_name(perm_name));
++			if (error)
++				return (-1);
++		} while ((perm_name = nvlist_next_nvpair(perms, perm_name)));
++	} while ((who = nvlist_next_nvpair(nvp, who)));
++	return (0);
++}
++
++/*
++ * Construct the base attribute name.  The base attribute names
++ * are the "key" to locate the jump objects which contain the actual
++ * permissions.  The base attribute names are encoded based on
++ * type of entry and whether it is a local or descendent permission.
++ *
++ * Arguments:
++ * attr - attribute name return string, attribute is assumed to be
++ *        ZFS_MAX_DELEG_NAME long.
++ * type - type of entry to construct
++ * inheritchr - inheritance type (local,descendent, or NA for create and
++ *                               permission set definitions
++ * data - is either a permission set name or a 64 bit uid/gid.
++ */
++void
++zfs_deleg_whokey(char *attr, zfs_deleg_who_type_t type,
++    char inheritchr, void *data)
++{
++	int len = ZFS_MAX_DELEG_NAME;
++	uint64_t *id = data;
++
++	switch (type) {
++	case ZFS_DELEG_USER:
++	case ZFS_DELEG_GROUP:
++	case ZFS_DELEG_USER_SETS:
++	case ZFS_DELEG_GROUP_SETS:
++		(void) snprintf(attr, len, "%c%c%c%lld", type, inheritchr,
++		    ZFS_DELEG_FIELD_SEP_CHR, (longlong_t)*id);
++		break;
++	case ZFS_DELEG_NAMED_SET_SETS:
++	case ZFS_DELEG_NAMED_SET:
++		(void) snprintf(attr, len, "%c-%c%s", type,
++		    ZFS_DELEG_FIELD_SEP_CHR, (char *)data);
++		break;
++	case ZFS_DELEG_CREATE:
++	case ZFS_DELEG_CREATE_SETS:
++		(void) snprintf(attr, len, "%c-%c", type,
++		    ZFS_DELEG_FIELD_SEP_CHR);
++		break;
++	case ZFS_DELEG_EVERYONE:
++	case ZFS_DELEG_EVERYONE_SETS:
++		(void) snprintf(attr, len, "%c%c%c", type, inheritchr,
++		    ZFS_DELEG_FIELD_SEP_CHR);
++		break;
++	default:
++		ASSERT(!"bad zfs_deleg_who_type_t");
++	}
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(zfs_deleg_verify_nvlist);
++EXPORT_SYMBOL(zfs_deleg_whokey);
++EXPORT_SYMBOL(zfs_deleg_canonicalize_perm);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zcommon/zfs_fletcher.c linux-3.2.33-go/fs/zfs/zcommon/zfs_fletcher.c
+--- linux-3.2.33-go.orig/fs/zfs/zcommon/zfs_fletcher.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zcommon/zfs_fletcher.c	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,255 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++/*
++ * Fletcher Checksums
++ * ------------------
++ *
++ * ZFS's 2nd and 4th order Fletcher checksums are defined by the following
++ * recurrence relations:
++ *
++ *	a  = a    + f
++ *	 i    i-1    i-1
++ *
++ *	b  = b    + a
++ *	 i    i-1    i
++ *
++ *	c  = c    + b		(fletcher-4 only)
++ *	 i    i-1    i
++ *
++ *	d  = d    + c		(fletcher-4 only)
++ *	 i    i-1    i
++ *
++ * Where
++ *	a_0 = b_0 = c_0 = d_0 = 0
++ * and
++ *	f_0 .. f_(n-1) are the input data.
++ *
++ * Using standard techniques, these translate into the following series:
++ *
++ *	     __n_			     __n_
++ *	     \   |			     \   |
++ *	a  =  >     f			b  =  >     i * f
++ *	 n   /___|   n - i		 n   /___|	 n - i
++ *	     i = 1			     i = 1
++ *
++ *
++ *	     __n_			     __n_
++ *	     \   |  i*(i+1)		     \   |  i*(i+1)*(i+2)
++ *	c  =  >     ------- f		d  =  >     ------------- f
++ *	 n   /___|     2     n - i	 n   /___|	  6	   n - i
++ *	     i = 1			     i = 1
++ *
++ * For fletcher-2, the f_is are 64-bit, and [ab]_i are 64-bit accumulators.
++ * Since the additions are done mod (2^64), errors in the high bits may not
++ * be noticed.  For this reason, fletcher-2 is deprecated.
++ *
++ * For fletcher-4, the f_is are 32-bit, and [abcd]_i are 64-bit accumulators.
++ * A conservative estimate of how big the buffer can get before we overflow
++ * can be estimated using f_i = 0xffffffff for all i:
++ *
++ * % bc
++ *  f=2^32-1;d=0; for (i = 1; d<2^64; i++) { d += f*i*(i+1)*(i+2)/6 }; (i-1)*4
++ * 2264
++ *  quit
++ * %
++ *
++ * So blocks of up to 2k will not overflow.  Our largest block size is
++ * 128k, which has 32k 4-byte words, so we can compute the largest possible
++ * accumulators, then divide by 2^64 to figure the max amount of overflow:
++ *
++ * % bc
++ *  a=b=c=d=0; f=2^32-1; for (i=1; i<=32*1024; i++) { a+=f; b+=a; c+=b; d+=c }
++ *  a/2^64;b/2^64;c/2^64;d/2^64
++ * 0
++ * 0
++ * 1365
++ * 11186858
++ *  quit
++ * %
++ *
++ * So a and b cannot overflow.  To make sure each bit of input has some
++ * effect on the contents of c and d, we can look at what the factors of
++ * the coefficients in the equations for c_n and d_n are.  The number of 2s
++ * in the factors determines the lowest set bit in the multiplier.  Running
++ * through the cases for n*(n+1)/2 reveals that the highest power of 2 is
++ * 2^14, and for n*(n+1)*(n+2)/6 it is 2^15.  So while some data may overflow
++ * the 64-bit accumulators, every bit of every f_i effects every accumulator,
++ * even for 128k blocks.
++ *
++ * If we wanted to make a stronger version of fletcher4 (fletcher4c?),
++ * we could do our calculations mod (2^32 - 1) by adding in the carries
++ * periodically, and store the number of carries in the top 32-bits.
++ *
++ * --------------------
++ * Checksum Performance
++ * --------------------
++ *
++ * There are two interesting components to checksum performance: cached and
++ * uncached performance.  With cached data, fletcher-2 is about four times
++ * faster than fletcher-4.  With uncached data, the performance difference is
++ * negligible, since the cost of a cache fill dominates the processing time.
++ * Even though fletcher-4 is slower than fletcher-2, it is still a pretty
++ * efficient pass over the data.
++ *
++ * In normal operation, the data which is being checksummed is in a buffer
++ * which has been filled either by:
++ *
++ *	1. a compression step, which will be mostly cached, or
++ *	2. a bcopy() or copyin(), which will be uncached (because the
++ *	   copy is cache-bypassing).
++ *
++ * For both cached and uncached data, both fletcher checksums are much faster
++ * than sha-256, and slower than 'off', which doesn't touch the data at all.
++ */
++
++#include <sys/types.h>
++#include <sys/sysmacros.h>
++#include <sys/byteorder.h>
++#include <sys/zio.h>
++#include <sys/spa.h>
++
++void
++fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
++{
++	const uint64_t *ip = buf;
++	const uint64_t *ipend = ip + (size / sizeof (uint64_t));
++	uint64_t a0, b0, a1, b1;
++
++	for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
++		a0 += ip[0];
++		a1 += ip[1];
++		b0 += a0;
++		b1 += a1;
++	}
++
++	ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
++}
++
++void
++fletcher_2_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
++{
++	const uint64_t *ip = buf;
++	const uint64_t *ipend = ip + (size / sizeof (uint64_t));
++	uint64_t a0, b0, a1, b1;
++
++	for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
++		a0 += BSWAP_64(ip[0]);
++		a1 += BSWAP_64(ip[1]);
++		b0 += a0;
++		b1 += a1;
++	}
++
++	ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
++}
++
++void
++fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
++{
++	const uint32_t *ip = buf;
++	const uint32_t *ipend = ip + (size / sizeof (uint32_t));
++	uint64_t a, b, c, d;
++
++	for (a = b = c = d = 0; ip < ipend; ip++) {
++		a += ip[0];
++		b += a;
++		c += b;
++		d += c;
++	}
++
++	ZIO_SET_CHECKSUM(zcp, a, b, c, d);
++}
++
++void
++fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
++{
++	const uint32_t *ip = buf;
++	const uint32_t *ipend = ip + (size / sizeof (uint32_t));
++	uint64_t a, b, c, d;
++
++	for (a = b = c = d = 0; ip < ipend; ip++) {
++		a += BSWAP_32(ip[0]);
++		b += a;
++		c += b;
++		d += c;
++	}
++
++	ZIO_SET_CHECKSUM(zcp, a, b, c, d);
++}
++
++void
++fletcher_4_incremental_native(const void *buf, uint64_t size,
++    zio_cksum_t *zcp)
++{
++	const uint32_t *ip = buf;
++	const uint32_t *ipend = ip + (size / sizeof (uint32_t));
++	uint64_t a, b, c, d;
++
++	a = zcp->zc_word[0];
++	b = zcp->zc_word[1];
++	c = zcp->zc_word[2];
++	d = zcp->zc_word[3];
++
++	for (; ip < ipend; ip++) {
++		a += ip[0];
++		b += a;
++		c += b;
++		d += c;
++	}
++
++	ZIO_SET_CHECKSUM(zcp, a, b, c, d);
++}
++
++void
++fletcher_4_incremental_byteswap(const void *buf, uint64_t size,
++    zio_cksum_t *zcp)
++{
++	const uint32_t *ip = buf;
++	const uint32_t *ipend = ip + (size / sizeof (uint32_t));
++	uint64_t a, b, c, d;
++
++	a = zcp->zc_word[0];
++	b = zcp->zc_word[1];
++	c = zcp->zc_word[2];
++	d = zcp->zc_word[3];
++
++	for (; ip < ipend; ip++) {
++		a += BSWAP_32(ip[0]);
++		b += a;
++		c += b;
++		d += c;
++	}
++
++	ZIO_SET_CHECKSUM(zcp, a, b, c, d);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(fletcher_2_native);
++EXPORT_SYMBOL(fletcher_2_byteswap);
++EXPORT_SYMBOL(fletcher_4_native);
++EXPORT_SYMBOL(fletcher_4_byteswap);
++EXPORT_SYMBOL(fletcher_4_incremental_native);
++EXPORT_SYMBOL(fletcher_4_incremental_byteswap);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zcommon/zfs_namecheck.c linux-3.2.33-go/fs/zfs/zcommon/zfs_namecheck.c
+--- linux-3.2.33-go.orig/fs/zfs/zcommon/zfs_namecheck.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zcommon/zfs_namecheck.c	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,378 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++/*
++ * Common name validation routines for ZFS.  These routines are shared by the
++ * userland code as well as the ioctl() layer to ensure that we don't
++ * inadvertently expose a hole through direct ioctl()s that never gets tested.
++ * In userland, however, we want significantly more information about _why_ the
++ * name is invalid.  In the kernel, we only care whether it's valid or not.
++ * Each routine therefore takes a 'namecheck_err_t' which describes exactly why
++ * the name failed to validate.
++ *
++ * Each function returns 0 on success, -1 on error.
++ */
++
++#if defined(_KERNEL)
++#include <sys/systm.h>
++#else
++#include <string.h>
++#endif
++
++#include <sys/param.h>
++#include <sys/nvpair.h>
++#include "zfs_namecheck.h"
++#include "zfs_deleg.h"
++
++static int
++valid_char(char c)
++{
++	return ((c >= 'a' && c <= 'z') ||
++	    (c >= 'A' && c <= 'Z') ||
++	    (c >= '0' && c <= '9') ||
++	    c == '-' || c == '_' || c == '.' || c == ':' || c == ' ');
++}
++
++/*
++ * Snapshot names must be made up of alphanumeric characters plus the following
++ * characters:
++ *
++ * 	[-_.: ]
++ */
++int
++snapshot_namecheck(const char *path, namecheck_err_t *why, char *what)
++{
++	const char *loc;
++
++	if (strlen(path) >= MAXNAMELEN) {
++		if (why)
++			*why = NAME_ERR_TOOLONG;
++		return (-1);
++	}
++
++	if (path[0] == '\0') {
++		if (why)
++			*why = NAME_ERR_EMPTY_COMPONENT;
++		return (-1);
++	}
++
++	for (loc = path; *loc; loc++) {
++		if (!valid_char(*loc)) {
++			if (why) {
++				*why = NAME_ERR_INVALCHAR;
++				*what = *loc;
++			}
++			return (-1);
++		}
++	}
++	return (0);
++}
++
++
++/*
++ * Permissions set name must start with the letter '@' followed by the
++ * same character restrictions as snapshot names, except that the name
++ * cannot exceed 64 characters.
++ */
++int
++permset_namecheck(const char *path, namecheck_err_t *why, char *what)
++{
++	if (strlen(path) >= ZFS_PERMSET_MAXLEN) {
++		if (why)
++			*why = NAME_ERR_TOOLONG;
++		return (-1);
++	}
++
++	if (path[0] != '@') {
++		if (why) {
++			*why = NAME_ERR_NO_AT;
++			*what = path[0];
++		}
++		return (-1);
++	}
++
++	return (snapshot_namecheck(&path[1], why, what));
++}
++
++/*
++ * Dataset names must be of the following form:
++ *
++ * 	[component][/]*[component][@component]
++ *
++ * Where each component is made up of alphanumeric characters plus the following
++ * characters:
++ *
++ * 	[-_.:%]
++ *
++ * We allow '%' here as we use that character internally to create unique
++ * names for temporary clones (for online recv).
++ */
++int
++dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
++{
++	const char *loc, *end;
++	int found_snapshot;
++
++	/*
++	 * Make sure the name is not too long.
++	 *
++	 * ZFS_MAXNAMELEN is the maximum dataset length used in the userland
++	 * which is the same as MAXNAMELEN used in the kernel.
++	 * If ZFS_MAXNAMELEN value is changed, make sure to cleanup all
++	 * places using MAXNAMELEN.
++	 *
++	 * When HAVE_KOBJ_NAME_LEN is defined the maximum safe kobject name
++	 * length is 20 bytes.  This 20 bytes is broken down as follows to
++	 * provide a maximum safe <pool>/<dataset>[@snapshot] length of only
++	 * 18 bytes.  To ensure bytes are left for <dataset>[@snapshot] the
++	 * <pool> portition is futher limited to 9 bytes.  For 2.6.27 and
++	 * newer kernels this limit is set to MAXNAMELEN.
++	 *
++	 *   <pool>/<dataset> + <partition> + <newline>
++	 *   (18)             + (1)         + (1)
++	 */
++#ifdef HAVE_KOBJ_NAME_LEN
++	if (strlen(path) > 18) {
++#else
++	if (strlen(path) >= MAXNAMELEN) {
++#endif /* HAVE_KOBJ_NAME_LEN */
++		if (why)
++			*why = NAME_ERR_TOOLONG;
++		return (-1);
++	}
++
++	/* Explicitly check for a leading slash.  */
++	if (path[0] == '/') {
++		if (why)
++			*why = NAME_ERR_LEADING_SLASH;
++		return (-1);
++	}
++
++	if (path[0] == '\0') {
++		if (why)
++			*why = NAME_ERR_EMPTY_COMPONENT;
++		return (-1);
++	}
++
++	loc = path;
++	found_snapshot = 0;
++	for (;;) {
++		/* Find the end of this component */
++		end = loc;
++		while (*end != '/' && *end != '@' && *end != '\0')
++			end++;
++
++		if (*end == '\0' && end[-1] == '/') {
++			/* trailing slashes are not allowed */
++			if (why)
++				*why = NAME_ERR_TRAILING_SLASH;
++			return (-1);
++		}
++
++		/* Zero-length components are not allowed */
++		if (loc == end) {
++			if (why) {
++				/*
++				 * Make sure this is really a zero-length
++				 * component and not a '@@'.
++				 */
++				if (*end == '@' && found_snapshot) {
++					*why = NAME_ERR_MULTIPLE_AT;
++				} else {
++					*why = NAME_ERR_EMPTY_COMPONENT;
++				}
++			}
++
++			return (-1);
++		}
++
++		/* Validate the contents of this component */
++		while (loc != end) {
++			if (!valid_char(*loc) && *loc != '%') {
++				if (why) {
++					*why = NAME_ERR_INVALCHAR;
++					*what = *loc;
++				}
++				return (-1);
++			}
++			loc++;
++		}
++
++		/* If we've reached the end of the string, we're OK */
++		if (*end == '\0')
++			return (0);
++
++		if (*end == '@') {
++			/*
++			 * If we've found an @ symbol, indicate that we're in
++			 * the snapshot component, and report a second '@'
++			 * character as an error.
++			 */
++			if (found_snapshot) {
++				if (why)
++					*why = NAME_ERR_MULTIPLE_AT;
++				return (-1);
++			}
++
++			found_snapshot = 1;
++		}
++
++		/*
++		 * If there is a '/' in a snapshot name
++		 * then report an error
++		 */
++		if (*end == '/' && found_snapshot) {
++			if (why)
++				*why = NAME_ERR_TRAILING_SLASH;
++			return (-1);
++		}
++
++		/* Update to the next component */
++		loc = end + 1;
++	}
++}
++
++
++/*
++ * mountpoint names must be of the following form:
++ *
++ *	/[component][/]*[component][/]
++ */
++int
++mountpoint_namecheck(const char *path, namecheck_err_t *why)
++{
++	const char *start, *end;
++
++	/*
++	 * Make sure none of the mountpoint component names are too long.
++	 * If a component name is too long then the mkdir of the mountpoint
++	 * will fail but then the mountpoint property will be set to a value
++	 * that can never be mounted.  Better to fail before setting the prop.
++	 * Extra slashes are OK, they will be tossed by the mountpoint mkdir.
++	 */
++
++	if (path == NULL || *path != '/') {
++		if (why)
++			*why = NAME_ERR_LEADING_SLASH;
++		return (-1);
++	}
++
++	/* Skip leading slash  */
++	start = &path[1];
++	do {
++		end = start;
++		while (*end != '/' && *end != '\0')
++			end++;
++
++		if (end - start >= MAXNAMELEN) {
++			if (why)
++				*why = NAME_ERR_TOOLONG;
++			return (-1);
++		}
++		start = end + 1;
++
++	} while (*end != '\0');
++
++	return (0);
++}
++
++/*
++ * For pool names, we have the same set of valid characters as described in
++ * dataset names, with the additional restriction that the pool name must begin
++ * with a letter.  The pool names 'raidz' and 'mirror' are also reserved names
++ * that cannot be used.
++ */
++int
++pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
++{
++	const char *c;
++
++	/*
++	 * Make sure the name is not too long.
++	 *
++	 * ZPOOL_MAXNAMELEN is the maximum pool length used in the userland
++	 * which is the same as MAXNAMELEN used in the kernel.
++	 * If ZPOOL_MAXNAMELEN value is changed, make sure to cleanup all
++	 * places using MAXNAMELEN.
++	 *
++	 * When HAVE_KOBJ_NAME_LEN is defined the maximum safe kobject name
++	 * length is 20 bytes.  This 20 bytes is broken down as follows to
++	 * provide a maximum safe <pool>/<dataset>[@snapshot] length of only
++	 * 18 bytes.  To ensure bytes are left for <dataset>[@snapshot] the
++	 * <pool> portition is futher limited to 8 bytes.  For 2.6.27 and
++	 * newer kernels this limit is set to MAXNAMELEN.
++	 *
++	 *   <pool>/<dataset> + <partition> + <newline>
++	 *   (18)             + (1)         + (1)
++	 */
++#ifdef HAVE_KOBJ_NAME_LEN
++	if (strlen(pool) > 8) {
++#else
++	if (strlen(pool) >= MAXNAMELEN) {
++#endif /* HAVE_KOBJ_NAME_LEN */
++		if (why)
++			*why = NAME_ERR_TOOLONG;
++		return (-1);
++	}
++
++	c = pool;
++	while (*c != '\0') {
++		if (!valid_char(*c)) {
++			if (why) {
++				*why = NAME_ERR_INVALCHAR;
++				*what = *c;
++			}
++			return (-1);
++		}
++		c++;
++	}
++
++	if (!(*pool >= 'a' && *pool <= 'z') &&
++	    !(*pool >= 'A' && *pool <= 'Z')) {
++		if (why)
++			*why = NAME_ERR_NOLETTER;
++		return (-1);
++	}
++
++	if (strcmp(pool, "mirror") == 0 || strcmp(pool, "raidz") == 0) {
++		if (why)
++			*why = NAME_ERR_RESERVED;
++		return (-1);
++	}
++
++	if (pool[0] == 'c' && (pool[1] >= '0' && pool[1] <= '9')) {
++		if (why)
++			*why = NAME_ERR_DISKLIKE;
++		return (-1);
++	}
++
++	return (0);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(snapshot_namecheck);
++EXPORT_SYMBOL(pool_namecheck);
++EXPORT_SYMBOL(dataset_namecheck);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zcommon/zfs_prop.c linux-3.2.33-go/fs/zfs/zcommon/zfs_prop.c
+--- linux-3.2.33-go.orig/fs/zfs/zcommon/zfs_prop.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zcommon/zfs_prop.c	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,657 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ */
++
++/* Portions Copyright 2010 Robert Milkowski */
++
++#include <sys/zio.h>
++#include <sys/spa.h>
++#include <sys/u8_textprep.h>
++#include <sys/zfs_acl.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/zfs_znode.h>
++
++#include "zfs_prop.h"
++#include "zfs_deleg.h"
++
++#if defined(_KERNEL)
++#include <sys/systm.h>
++#else
++#include <stdlib.h>
++#include <string.h>
++#include <ctype.h>
++#endif
++
++static zprop_desc_t zfs_prop_table[ZFS_NUM_PROPS];
++
++/* Note this is indexed by zfs_userquota_prop_t, keep the order the same */
++const char *zfs_userquota_prop_prefixes[] = {
++	"userused@",
++	"userquota@",
++	"groupused@",
++	"groupquota@"
++};
++
++zprop_desc_t *
++zfs_prop_get_table(void)
++{
++	return (zfs_prop_table);
++}
++
++void
++zfs_prop_init(void)
++{
++	static zprop_index_t checksum_table[] = {
++		{ "on",		ZIO_CHECKSUM_ON },
++		{ "off",	ZIO_CHECKSUM_OFF },
++		{ "fletcher2",	ZIO_CHECKSUM_FLETCHER_2 },
++		{ "fletcher4",	ZIO_CHECKSUM_FLETCHER_4 },
++		{ "sha256",	ZIO_CHECKSUM_SHA256 },
++		{ NULL }
++	};
++
++	static zprop_index_t dedup_table[] = {
++		{ "on",		ZIO_CHECKSUM_ON },
++		{ "off",	ZIO_CHECKSUM_OFF },
++		{ "verify",	ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY },
++		{ "sha256",	ZIO_CHECKSUM_SHA256 },
++		{ "sha256,verify",
++				ZIO_CHECKSUM_SHA256 | ZIO_CHECKSUM_VERIFY },
++		{ NULL }
++	};
++
++	static zprop_index_t compress_table[] = {
++		{ "on",		ZIO_COMPRESS_ON },
++		{ "off",	ZIO_COMPRESS_OFF },
++		{ "lzjb",	ZIO_COMPRESS_LZJB },
++		{ "gzip",	ZIO_COMPRESS_GZIP_6 },	/* gzip default */
++		{ "gzip-1",	ZIO_COMPRESS_GZIP_1 },
++		{ "gzip-2",	ZIO_COMPRESS_GZIP_2 },
++		{ "gzip-3",	ZIO_COMPRESS_GZIP_3 },
++		{ "gzip-4",	ZIO_COMPRESS_GZIP_4 },
++		{ "gzip-5",	ZIO_COMPRESS_GZIP_5 },
++		{ "gzip-6",	ZIO_COMPRESS_GZIP_6 },
++		{ "gzip-7",	ZIO_COMPRESS_GZIP_7 },
++		{ "gzip-8",	ZIO_COMPRESS_GZIP_8 },
++		{ "gzip-9",	ZIO_COMPRESS_GZIP_9 },
++		{ "zle",	ZIO_COMPRESS_ZLE },
++		{ NULL }
++	};
++
++	static zprop_index_t snapdir_table[] = {
++		{ "hidden",	ZFS_SNAPDIR_HIDDEN },
++		{ "visible",	ZFS_SNAPDIR_VISIBLE },
++		{ NULL }
++	};
++
++	static zprop_index_t acl_inherit_table[] = {
++		{ "discard",	ZFS_ACL_DISCARD },
++		{ "noallow",	ZFS_ACL_NOALLOW },
++		{ "restricted",	ZFS_ACL_RESTRICTED },
++		{ "passthrough", ZFS_ACL_PASSTHROUGH },
++		{ "secure",	ZFS_ACL_RESTRICTED }, /* bkwrd compatability */
++		{ "passthrough-x", ZFS_ACL_PASSTHROUGH_X },
++		{ NULL }
++	};
++
++	static zprop_index_t case_table[] = {
++		{ "sensitive",		ZFS_CASE_SENSITIVE },
++		{ "insensitive",	ZFS_CASE_INSENSITIVE },
++		{ "mixed",		ZFS_CASE_MIXED },
++		{ NULL }
++	};
++
++	static zprop_index_t copies_table[] = {
++		{ "1",		1 },
++		{ "2",		2 },
++		{ "3",		3 },
++		{ NULL }
++	};
++
++	/*
++	 * Use the unique flags we have to send to u8_strcmp() and/or
++	 * u8_textprep() to represent the various normalization property
++	 * values.
++	 */
++	static zprop_index_t normalize_table[] = {
++		{ "none",	0 },
++		{ "formD",	U8_TEXTPREP_NFD },
++		{ "formKC",	U8_TEXTPREP_NFKC },
++		{ "formC",	U8_TEXTPREP_NFC },
++		{ "formKD",	U8_TEXTPREP_NFKD },
++		{ NULL }
++	};
++
++	static zprop_index_t version_table[] = {
++		{ "1",		1 },
++		{ "2",		2 },
++		{ "3",		3 },
++		{ "4",		4 },
++		{ "5",		5 },
++		{ "current",	ZPL_VERSION },
++		{ NULL }
++	};
++
++	static zprop_index_t boolean_table[] = {
++		{ "off",	0 },
++		{ "on",		1 },
++		{ NULL }
++	};
++
++	static zprop_index_t logbias_table[] = {
++		{ "latency",	ZFS_LOGBIAS_LATENCY },
++		{ "throughput",	ZFS_LOGBIAS_THROUGHPUT },
++		{ NULL }
++	};
++
++	static zprop_index_t canmount_table[] = {
++		{ "off",	ZFS_CANMOUNT_OFF },
++		{ "on",		ZFS_CANMOUNT_ON },
++		{ "noauto",	ZFS_CANMOUNT_NOAUTO },
++		{ NULL }
++	};
++
++	static zprop_index_t cache_table[] = {
++		{ "none",	ZFS_CACHE_NONE },
++		{ "metadata",	ZFS_CACHE_METADATA },
++		{ "all",	ZFS_CACHE_ALL },
++		{ NULL }
++	};
++
++	static zprop_index_t sync_table[] = {
++		{ "standard",	ZFS_SYNC_STANDARD },
++		{ "always",	ZFS_SYNC_ALWAYS },
++		{ "disabled",	ZFS_SYNC_DISABLED },
++		{ NULL }
++	};
++
++	static zprop_index_t xattr_table[] = {
++		{ "off",	ZFS_XATTR_OFF },
++		{ "on",		ZFS_XATTR_DIR },
++		{ "sa",		ZFS_XATTR_SA },
++		{ "dir",	ZFS_XATTR_DIR },
++		{ NULL }
++	};
++
++	/* inherit index properties */
++	zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
++	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
++	    "standard | always | disabled", "SYNC",
++	    sync_table);
++	zprop_register_index(ZFS_PROP_CHECKSUM, "checksum",
++	    ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM |
++	    ZFS_TYPE_VOLUME,
++	    "on | off | fletcher2 | fletcher4 | sha256", "CHECKSUM",
++	    checksum_table);
++	zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF,
++	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
++	    "on | off | verify | sha256[,verify]", "DEDUP",
++	    dedup_table);
++	zprop_register_index(ZFS_PROP_COMPRESSION, "compression",
++	    ZIO_COMPRESS_DEFAULT, PROP_INHERIT,
++	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
++	    "on | off | lzjb | gzip | gzip-[1-9] | zle", "COMPRESS",
++	    compress_table);
++	zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN,
++	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
++	    "hidden | visible", "SNAPDIR", snapdir_table);
++	zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit",
++	    ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
++	    "discard | noallow | restricted | passthrough | passthrough-x",
++	    "ACLINHERIT", acl_inherit_table);
++	zprop_register_index(ZFS_PROP_COPIES, "copies", 1, PROP_INHERIT,
++	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
++	    "1 | 2 | 3", "COPIES", copies_table);
++	zprop_register_index(ZFS_PROP_PRIMARYCACHE, "primarycache",
++	    ZFS_CACHE_ALL, PROP_INHERIT,
++	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
++	    "all | none | metadata", "PRIMARYCACHE", cache_table);
++	zprop_register_index(ZFS_PROP_SECONDARYCACHE, "secondarycache",
++	    ZFS_CACHE_ALL, PROP_INHERIT,
++	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
++	    "all | none | metadata", "SECONDARYCACHE", cache_table);
++	zprop_register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY,
++	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
++	    "latency | throughput", "LOGBIAS", logbias_table);
++	zprop_register_index(ZFS_PROP_XATTR, "xattr", ZFS_XATTR_DIR,
++	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
++	    "on | off | dir | sa", "XATTR", xattr_table);
++
++	/* inherit index (boolean) properties */
++	zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
++	    ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table);
++	zprop_register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT,
++	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "DEVICES",
++	    boolean_table);
++	zprop_register_index(ZFS_PROP_EXEC, "exec", 1, PROP_INHERIT,
++	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "EXEC",
++	    boolean_table);
++	zprop_register_index(ZFS_PROP_SETUID, "setuid", 1, PROP_INHERIT,
++	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "SETUID",
++	    boolean_table);
++	zprop_register_index(ZFS_PROP_READONLY, "readonly", 0, PROP_INHERIT,
++	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off", "RDONLY",
++	    boolean_table);
++	zprop_register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT,
++	    ZFS_TYPE_FILESYSTEM, "on | off", "ZONED", boolean_table);
++	zprop_register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT,
++	    ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN", boolean_table);
++	zprop_register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT,
++	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "NBMAND",
++	    boolean_table);
++
++	/* default index properties */
++	zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT,
++	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
++	    "1 | 2 | 3 | 4 | 5 | current", "VERSION", version_table);
++	zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON,
++	    PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto",
++	    "CANMOUNT", canmount_table);
++
++	/* readonly index (boolean) properties */
++	zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY,
++	    ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table);
++	zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0,
++	    PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY",
++	    boolean_table);
++
++	/* set once index properties */
++	zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0,
++	    PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
++	    "none | formC | formD | formKC | formKD", "NORMALIZATION",
++	    normalize_table);
++	zprop_register_index(ZFS_PROP_CASE, "casesensitivity",
++	    ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM |
++	    ZFS_TYPE_SNAPSHOT,
++	    "sensitive | insensitive | mixed", "CASE", case_table);
++
++	/* set once index (boolean) properties */
++	zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME,
++	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
++	    "on | off", "UTF8ONLY", boolean_table);
++
++	/* string properties */
++	zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY,
++	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<snapshot>", "ORIGIN");
++	zprop_register_string(ZFS_PROP_CLONES, "clones", NULL, PROP_READONLY,
++	    ZFS_TYPE_SNAPSHOT, "<dataset>[,...]", "CLONES");
++	zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/",
++	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "<path> | legacy | none",
++	    "MOUNTPOINT");
++	zprop_register_string(ZFS_PROP_SHARENFS, "sharenfs", "off",
++	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | share(1M) options",
++	    "SHARENFS");
++	zprop_register_string(ZFS_PROP_TYPE, "type", NULL, PROP_READONLY,
++	    ZFS_TYPE_DATASET, "filesystem | volume | snapshot", "TYPE");
++	zprop_register_string(ZFS_PROP_SHARESMB, "sharesmb", "off",
++	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
++	    "on | off | sharemgr(1M) options", "SHARESMB");
++	zprop_register_string(ZFS_PROP_MLSLABEL, "mlslabel",
++	    ZFS_MLSLABEL_DEFAULT, PROP_INHERIT, ZFS_TYPE_DATASET,
++	    "<sensitivity label>", "MLSLABEL");
++
++	/* readonly number properties */
++	zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
++	    ZFS_TYPE_DATASET, "<size>", "USED");
++	zprop_register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY,
++	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "AVAIL");
++	zprop_register_number(ZFS_PROP_REFERENCED, "referenced", 0,
++	    PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "REFER");
++	zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
++	    PROP_READONLY, ZFS_TYPE_DATASET,
++	    "<1.00x or higher if compressed>", "RATIO");
++	zprop_register_number(ZFS_PROP_REFRATIO, "refcompressratio", 0,
++	    PROP_READONLY, ZFS_TYPE_DATASET,
++	    "<1.00x or higher if compressed>", "REFRATIO");
++	zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize",
++	    ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME,
++	    ZFS_TYPE_VOLUME, "512 to 128k, power of 2",	"VOLBLOCK");
++	zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0,
++	    PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
++	    "USEDSNAP");
++	zprop_register_number(ZFS_PROP_USEDDS, "usedbydataset", 0,
++	    PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
++	    "USEDDS");
++	zprop_register_number(ZFS_PROP_USEDCHILD, "usedbychildren", 0,
++	    PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
++	    "USEDCHILD");
++	zprop_register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0,
++	    PROP_READONLY,
++	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDREFRESERV");
++	zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY,
++	    ZFS_TYPE_SNAPSHOT, "<count>", "USERREFS");
++	zprop_register_number(ZFS_PROP_WRITTEN, "written", 0, PROP_READONLY,
++	    ZFS_TYPE_DATASET, "<size>", "WRITTEN");
++
++	/* default number properties */
++	zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
++	    ZFS_TYPE_FILESYSTEM, "<size> | none", "QUOTA");
++	zprop_register_number(ZFS_PROP_RESERVATION, "reservation", 0,
++	    PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
++	    "<size> | none", "RESERV");
++	zprop_register_number(ZFS_PROP_VOLSIZE, "volsize", 0, PROP_DEFAULT,
++	    ZFS_TYPE_VOLUME, "<size>", "VOLSIZE");
++	zprop_register_number(ZFS_PROP_REFQUOTA, "refquota", 0, PROP_DEFAULT,
++	    ZFS_TYPE_FILESYSTEM, "<size> | none", "REFQUOTA");
++	zprop_register_number(ZFS_PROP_REFRESERVATION, "refreservation", 0,
++	    PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
++	    "<size> | none", "REFRESERV");
++
++	/* inherit number properties */
++	zprop_register_number(ZFS_PROP_RECORDSIZE, "recordsize",
++	    SPA_MAXBLOCKSIZE, PROP_INHERIT,
++	    ZFS_TYPE_FILESYSTEM, "512 to 128k, power of 2", "RECSIZE");
++
++	/* hidden properties */
++	zprop_register_hidden(ZFS_PROP_CREATETXG, "createtxg", PROP_TYPE_NUMBER,
++	    PROP_READONLY, ZFS_TYPE_DATASET, "CREATETXG");
++	zprop_register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER,
++	    PROP_READONLY, ZFS_TYPE_SNAPSHOT, "NUMCLONES");
++	zprop_register_hidden(ZFS_PROP_NAME, "name", PROP_TYPE_STRING,
++	    PROP_READONLY, ZFS_TYPE_DATASET, "NAME");
++	zprop_register_hidden(ZFS_PROP_ISCSIOPTIONS, "iscsioptions",
++	    PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS");
++	zprop_register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu",
++	    PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME,
++	    "STMF_SBD_LU");
++	zprop_register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER,
++	    PROP_READONLY, ZFS_TYPE_DATASET, "GUID");
++	zprop_register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting",
++	    PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET,
++	    "USERACCOUNTING");
++	zprop_register_hidden(ZFS_PROP_UNIQUE, "unique", PROP_TYPE_NUMBER,
++	    PROP_READONLY, ZFS_TYPE_DATASET, "UNIQUE");
++	zprop_register_hidden(ZFS_PROP_OBJSETID, "objsetid", PROP_TYPE_NUMBER,
++	    PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID");
++
++	/*
++	 * Property to be removed once libbe is integrated
++	 */
++	zprop_register_hidden(ZFS_PROP_PRIVATE, "priv_prop",
++	    PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_FILESYSTEM,
++	    "PRIV_PROP");
++
++	/* oddball properties */
++	zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0,
++	    NULL, PROP_READONLY, ZFS_TYPE_DATASET,
++	    "<date>", "CREATION", B_FALSE, B_TRUE, NULL);
++}
++
++boolean_t
++zfs_prop_delegatable(zfs_prop_t prop)
++{
++	zprop_desc_t *pd = &zfs_prop_table[prop];
++
++	/* The mlslabel property is never delegatable. */
++	if (prop == ZFS_PROP_MLSLABEL)
++		return (B_FALSE);
++
++	return (pd->pd_attr != PROP_READONLY);
++}
++
++/*
++ * Given a zfs dataset property name, returns the corresponding property ID.
++ */
++zfs_prop_t
++zfs_name_to_prop(const char *propname)
++{
++	return (zprop_name_to_prop(propname, ZFS_TYPE_DATASET));
++}
++
++/*
++ * For user property names, we allow all lowercase alphanumeric characters, plus
++ * a few useful punctuation characters.
++ */
++static int
++valid_char(char c)
++{
++	return ((c >= 'a' && c <= 'z') ||
++	    (c >= '0' && c <= '9') ||
++	    c == '-' || c == '_' || c == '.' || c == ':');
++}
++
++/*
++ * Returns true if this is a valid user-defined property (one with a ':').
++ */
++boolean_t
++zfs_prop_user(const char *name)
++{
++	int i;
++	char c;
++	boolean_t foundsep = B_FALSE;
++
++	for (i = 0; i < strlen(name); i++) {
++		c = name[i];
++		if (!valid_char(c))
++			return (B_FALSE);
++		if (c == ':')
++			foundsep = B_TRUE;
++	}
++
++	if (!foundsep)
++		return (B_FALSE);
++
++	return (B_TRUE);
++}
++
++/*
++ * Returns true if this is a valid userspace-type property (one with a '@').
++ * Note that after the @, any character is valid (eg, another @, for SID
++ * user@domain).
++ */
++boolean_t
++zfs_prop_userquota(const char *name)
++{
++	zfs_userquota_prop_t prop;
++
++	for (prop = 0; prop < ZFS_NUM_USERQUOTA_PROPS; prop++) {
++		if (strncmp(name, zfs_userquota_prop_prefixes[prop],
++		    strlen(zfs_userquota_prop_prefixes[prop])) == 0) {
++			return (B_TRUE);
++		}
++	}
++
++	return (B_FALSE);
++}
++
++/*
++ * Returns true if this is a valid written@ property.
++ * Note that after the @, any character is valid (eg, another @, for
++ * written@pool/fs@origin).
++ */
++boolean_t
++zfs_prop_written(const char *name)
++{
++	static const char *prefix = "written@";
++	return (strncmp(name, prefix, strlen(prefix)) == 0);
++}
++
++/*
++ * Tables of index types, plus functions to convert between the user view
++ * (strings) and internal representation (uint64_t).
++ */
++int
++zfs_prop_string_to_index(zfs_prop_t prop, const char *string, uint64_t *index)
++{
++	return (zprop_string_to_index(prop, string, index, ZFS_TYPE_DATASET));
++}
++
++int
++zfs_prop_index_to_string(zfs_prop_t prop, uint64_t index, const char **string)
++{
++	return (zprop_index_to_string(prop, index, string, ZFS_TYPE_DATASET));
++}
++
++uint64_t
++zfs_prop_random_value(zfs_prop_t prop, uint64_t seed)
++{
++	return (zprop_random_value(prop, seed, ZFS_TYPE_DATASET));
++}
++
++/*
++ * Returns TRUE if the property applies to any of the given dataset types.
++ */
++boolean_t
++zfs_prop_valid_for_type(int prop, zfs_type_t types)
++{
++	return (zprop_valid_for_type(prop, types));
++}
++
++zprop_type_t
++zfs_prop_get_type(zfs_prop_t prop)
++{
++	return (zfs_prop_table[prop].pd_proptype);
++}
++
++/*
++ * Returns TRUE if the property is readonly.
++ */
++boolean_t
++zfs_prop_readonly(zfs_prop_t prop)
++{
++	return (zfs_prop_table[prop].pd_attr == PROP_READONLY ||
++	    zfs_prop_table[prop].pd_attr == PROP_ONETIME);
++}
++
++/*
++ * Returns TRUE if the property is only allowed to be set once.
++ */
++boolean_t
++zfs_prop_setonce(zfs_prop_t prop)
++{
++	return (zfs_prop_table[prop].pd_attr == PROP_ONETIME);
++}
++
++const char *
++zfs_prop_default_string(zfs_prop_t prop)
++{
++	return (zfs_prop_table[prop].pd_strdefault);
++}
++
++uint64_t
++zfs_prop_default_numeric(zfs_prop_t prop)
++{
++	return (zfs_prop_table[prop].pd_numdefault);
++}
++
++/*
++ * Given a dataset property ID, returns the corresponding name.
++ * Assuming the zfs dataset property ID is valid.
++ */
++const char *
++zfs_prop_to_name(zfs_prop_t prop)
++{
++	return (zfs_prop_table[prop].pd_name);
++}
++
++/*
++ * Returns TRUE if the property is inheritable.
++ */
++boolean_t
++zfs_prop_inheritable(zfs_prop_t prop)
++{
++	return (zfs_prop_table[prop].pd_attr == PROP_INHERIT ||
++	    zfs_prop_table[prop].pd_attr == PROP_ONETIME);
++}
++
++#ifndef _KERNEL
++
++/*
++ * Returns a string describing the set of acceptable values for the given
++ * zfs property, or NULL if it cannot be set.
++ */
++const char *
++zfs_prop_values(zfs_prop_t prop)
++{
++	return (zfs_prop_table[prop].pd_values);
++}
++
++/*
++ * Returns TRUE if this property is a string type.  Note that index types
++ * (compression, checksum) are treated as strings in userland, even though they
++ * are stored numerically on disk.
++ */
++int
++zfs_prop_is_string(zfs_prop_t prop)
++{
++	return (zfs_prop_table[prop].pd_proptype == PROP_TYPE_STRING ||
++	    zfs_prop_table[prop].pd_proptype == PROP_TYPE_INDEX);
++}
++
++/*
++ * Returns the column header for the given property.  Used only in
++ * 'zfs list -o', but centralized here with the other property information.
++ */
++const char *
++zfs_prop_column_name(zfs_prop_t prop)
++{
++	return (zfs_prop_table[prop].pd_colname);
++}
++
++/*
++ * Returns whether the given property should be displayed right-justified for
++ * 'zfs list'.
++ */
++boolean_t
++zfs_prop_align_right(zfs_prop_t prop)
++{
++	return (zfs_prop_table[prop].pd_rightalign);
++}
++
++#endif
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++
++static int zcommon_init(void) { return 0; }
++static int zcommon_fini(void) { return 0; }
++
++spl_module_init(zcommon_init);
++spl_module_exit(zcommon_fini);
++
++MODULE_DESCRIPTION("Generic ZFS support");
++MODULE_AUTHOR(ZFS_META_AUTHOR);
++MODULE_LICENSE(ZFS_META_LICENSE);
++
++/* zfs dataset property functions */
++EXPORT_SYMBOL(zfs_userquota_prop_prefixes);
++EXPORT_SYMBOL(zfs_prop_init);
++EXPORT_SYMBOL(zfs_prop_get_type);
++EXPORT_SYMBOL(zfs_prop_get_table);
++EXPORT_SYMBOL(zfs_prop_delegatable);
++
++/* Dataset property functions shared between libzfs and kernel. */
++EXPORT_SYMBOL(zfs_prop_default_string);
++EXPORT_SYMBOL(zfs_prop_default_numeric);
++EXPORT_SYMBOL(zfs_prop_readonly);
++EXPORT_SYMBOL(zfs_prop_inheritable);
++EXPORT_SYMBOL(zfs_prop_setonce);
++EXPORT_SYMBOL(zfs_prop_to_name);
++EXPORT_SYMBOL(zfs_name_to_prop);
++EXPORT_SYMBOL(zfs_prop_user);
++EXPORT_SYMBOL(zfs_prop_userquota);
++EXPORT_SYMBOL(zfs_prop_index_to_string);
++EXPORT_SYMBOL(zfs_prop_string_to_index);
++EXPORT_SYMBOL(zfs_prop_valid_for_type);
++
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zcommon/zfs_uio.c linux-3.2.33-go/fs/zfs/zcommon/zfs_uio.c
+--- linux-3.2.33-go.orig/fs/zfs/zcommon/zfs_uio.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zcommon/zfs_uio.c	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,255 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
++/*	  All Rights Reserved	*/
++
++/*
++ * University Copyright- Copyright (c) 1982, 1986, 1988
++ * The Regents of the University of California
++ * All Rights Reserved
++ *
++ * University Acknowledgment- Portions of this document are derived from
++ * software developed by the University of California, Berkeley, and its
++ * contributors.
++ */
++
++/*
++ * The uio support from OpenSolaris has been added as a short term
++ * work around.  The hope is to adopt native Linux type and drop the
++ * use of uio's entirely.  Under Linux they only add overhead and
++ * when possible we want to use native APIs for the ZPL layer.
++ */
++#ifdef _KERNEL
++
++#include <sys/types.h>
++#include <sys/uio_impl.h>
++
++/*
++ * Move "n" bytes at byte address "p"; "rw" indicates the direction
++ * of the move, and the I/O parameters are provided in "uio", which is
++ * update to reflect the data which was moved.  Returns 0 on success or
++ * a non-zero errno on failure.
++ */
++int
++uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
++{
++	struct iovec *iov;
++	ulong_t cnt;
++
++	while (n && uio->uio_resid) {
++		iov = uio->uio_iov;
++		cnt = MIN(iov->iov_len, n);
++		if (cnt == 0l) {
++			uio->uio_iov++;
++			uio->uio_iovcnt--;
++			continue;
++		}
++		switch (uio->uio_segflg) {
++		case UIO_USERSPACE:
++		case UIO_USERISPACE:
++			/* p = kernel data pointer
++			 * iov->iov_base = user data pointer */
++
++			if (rw == UIO_READ) {
++				if (copy_to_user(iov->iov_base, p, cnt))
++					return EFAULT;
++				/* error = xcopyout_nta(p, iov->iov_base, cnt,
++				 * (uio->uio_extflg & UIO_COPY_CACHED)); */
++			} else {
++				/* error = xcopyin_nta(iov->iov_base, p, cnt,
++				 * (uio->uio_extflg & UIO_COPY_CACHED)); */
++				if (copy_from_user(p, iov->iov_base, cnt))
++					return EFAULT;
++			}
++			break;
++		case UIO_SYSSPACE:
++			if (rw == UIO_READ)
++				bcopy(p, iov->iov_base, cnt);
++			else
++				bcopy(iov->iov_base, p, cnt);
++			break;
++		}
++		iov->iov_base += cnt;
++		iov->iov_len -= cnt;
++		uio->uio_resid -= cnt;
++		uio->uio_loffset += cnt;
++		p = (caddr_t)p + cnt;
++		n -= cnt;
++	}
++	return (0);
++}
++EXPORT_SYMBOL(uiomove);
++
++#define fuword8(uptr, vptr) get_user((*vptr), (uptr))
++
++/*
++ * Fault in the pages of the first n bytes specified by the uio structure.
++ * 1 byte in each page is touched and the uio struct is unmodified. Any
++ * error will terminate the process as this is only a best attempt to get
++ * the pages resident.
++ */
++void
++uio_prefaultpages(ssize_t n, struct uio *uio)
++{
++	struct iovec *iov;
++	ulong_t cnt, incr;
++	caddr_t p;
++	uint8_t tmp;
++	int iovcnt;
++
++	iov = uio->uio_iov;
++	iovcnt = uio->uio_iovcnt;
++
++	while ((n > 0) && (iovcnt > 0)) {
++		cnt = MIN(iov->iov_len, n);
++		if (cnt == 0) {
++			/* empty iov entry */
++			iov++;
++			iovcnt--;
++			continue;
++		}
++		n -= cnt;
++		/*
++		 * touch each page in this segment.
++		 */
++		p = iov->iov_base;
++		while (cnt) {
++			switch (uio->uio_segflg) {
++			case UIO_USERSPACE:
++			case UIO_USERISPACE:
++				if (fuword8((uint8_t *) p, &tmp))
++					return;
++				break;
++			case UIO_SYSSPACE:
++				bcopy(p, &tmp, 1);
++				break;
++			}
++			incr = MIN(cnt, PAGESIZE);
++			p += incr;
++			cnt -= incr;
++		}
++		/*
++		 * touch the last byte in case it straddles a page.
++		 */
++		p--;
++		switch (uio->uio_segflg) {
++		case UIO_USERSPACE:
++		case UIO_USERISPACE:
++			if (fuword8((uint8_t *) p, &tmp))
++				return;
++			break;
++		case UIO_SYSSPACE:
++			bcopy(p, &tmp, 1);
++			break;
++		}
++		iov++;
++		iovcnt--;
++	}
++}
++EXPORT_SYMBOL(uio_prefaultpages);
++
++/*
++ * same as uiomove() but doesn't modify uio structure.
++ * return in cbytes how many bytes were copied.
++ */
++int
++uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
++{
++	struct iovec *iov;
++	ulong_t cnt;
++	int iovcnt;
++
++	iovcnt = uio->uio_iovcnt;
++	*cbytes = 0;
++
++	for (iov = uio->uio_iov; n && iovcnt; iov++, iovcnt--) {
++		cnt = MIN(iov->iov_len, n);
++		if (cnt == 0)
++			continue;
++
++		switch (uio->uio_segflg) {
++
++		case UIO_USERSPACE:
++		case UIO_USERISPACE:
++			/* p = kernel data pointer
++			 * iov->iov_base = user data pointer */
++
++			if (rw == UIO_READ) {
++				/* * UIO_READ = copy data from kernel to user * */
++				if (copy_to_user(iov->iov_base, p, cnt))
++					return EFAULT;
++				/* error = xcopyout_nta(p, iov->iov_base, cnt,
++				 * (uio->uio_extflg & UIO_COPY_CACHED)); */
++			} else {
++				/* * UIO_WRITE = copy data from user to kernel * */
++				/* error = xcopyin_nta(iov->iov_base, p, cnt,
++				 * (uio->uio_extflg & UIO_COPY_CACHED)); */
++				if (copy_from_user(p, iov->iov_base, cnt))
++					return EFAULT;
++			}
++			break;
++
++		case UIO_SYSSPACE:
++			if (rw == UIO_READ)
++				bcopy(p, iov->iov_base, cnt);
++			else
++				bcopy(iov->iov_base, p, cnt);
++			break;
++		}
++		p = (caddr_t)p + cnt;
++		n -= cnt;
++		*cbytes += cnt;
++	}
++	return (0);
++}
++EXPORT_SYMBOL(uiocopy);
++
++/*
++ * Drop the next n chars out of *uiop.
++ */
++void
++uioskip(uio_t *uiop, size_t n)
++{
++	if (n > uiop->uio_resid)
++		return;
++	while (n != 0) {
++		iovec_t	*iovp = uiop->uio_iov;
++		size_t		niovb = MIN(iovp->iov_len, n);
++
++		if (niovb == 0) {
++			uiop->uio_iov++;
++			uiop->uio_iovcnt--;
++			continue;
++		}
++		iovp->iov_base += niovb;
++		uiop->uio_loffset += niovb;
++		iovp->iov_len -= niovb;
++		uiop->uio_resid -= niovb;
++		n -= niovb;
++	}
++}
++EXPORT_SYMBOL(uioskip);
++#endif /* _KERNEL */
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zcommon/zpool_prop.c linux-3.2.33-go/fs/zfs/zcommon/zpool_prop.c
+--- linux-3.2.33-go.orig/fs/zfs/zcommon/zpool_prop.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zcommon/zpool_prop.c	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,228 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ */
++
++#include <sys/zio.h>
++#include <sys/spa.h>
++#include <sys/zfs_acl.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/fs/zfs.h>
++
++#include "zfs_prop.h"
++
++#if defined(_KERNEL)
++#include <sys/systm.h>
++#else
++#include <stdlib.h>
++#include <string.h>
++#include <ctype.h>
++#endif
++
++static zprop_desc_t zpool_prop_table[ZPOOL_NUM_PROPS];
++
++zprop_desc_t *
++zpool_prop_get_table(void)
++{
++	return (zpool_prop_table);
++}
++
++void
++zpool_prop_init(void)
++{
++	static zprop_index_t boolean_table[] = {
++		{ "off",	0},
++		{ "on",		1},
++		{ NULL }
++	};
++
++	static zprop_index_t failuremode_table[] = {
++		{ "wait",	ZIO_FAILURE_MODE_WAIT },
++		{ "continue",	ZIO_FAILURE_MODE_CONTINUE },
++		{ "panic",	ZIO_FAILURE_MODE_PANIC },
++		{ NULL }
++	};
++
++	/* string properties */
++	zprop_register_string(ZPOOL_PROP_ALTROOT, "altroot", NULL, PROP_DEFAULT,
++	    ZFS_TYPE_POOL, "<path>", "ALTROOT");
++	zprop_register_string(ZPOOL_PROP_BOOTFS, "bootfs", NULL, PROP_DEFAULT,
++	    ZFS_TYPE_POOL, "<filesystem>", "BOOTFS");
++	zprop_register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL,
++	    PROP_DEFAULT, ZFS_TYPE_POOL, "<file> | none", "CACHEFILE");
++	zprop_register_string(ZPOOL_PROP_COMMENT, "comment", NULL,
++	    PROP_DEFAULT, ZFS_TYPE_POOL, "<comment-string>", "COMMENT");
++
++	/* readonly number properties */
++	zprop_register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY,
++	    ZFS_TYPE_POOL, "<size>", "SIZE");
++	zprop_register_number(ZPOOL_PROP_FREE, "free", 0, PROP_READONLY,
++	    ZFS_TYPE_POOL, "<size>", "FREE");
++	zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0,
++	    PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC");
++	zprop_register_number(ZPOOL_PROP_EXPANDSZ, "expandsize", 0,
++	    PROP_READONLY, ZFS_TYPE_POOL, "<size>", "EXPANDSZ");
++	zprop_register_number(ZPOOL_PROP_CAPACITY, "capacity", 0, PROP_READONLY,
++	    ZFS_TYPE_POOL, "<size>", "CAP");
++	zprop_register_number(ZPOOL_PROP_GUID, "guid", 0, PROP_READONLY,
++	    ZFS_TYPE_POOL, "<guid>", "GUID");
++	zprop_register_number(ZPOOL_PROP_HEALTH, "health", 0, PROP_READONLY,
++	    ZFS_TYPE_POOL, "<state>", "HEALTH");
++	zprop_register_number(ZPOOL_PROP_DEDUPRATIO, "dedupratio", 0,
++	    PROP_READONLY, ZFS_TYPE_POOL, "<1.00x or higher if deduped>",
++	    "DEDUP");
++
++	/* readonly onetime number properties */
++	zprop_register_number(ZPOOL_PROP_ASHIFT, "ashift", 0, PROP_ONETIME,
++	    ZFS_TYPE_POOL, "<ashift, 9-13, or 0=default>", "ASHIFT");
++
++	/* default number properties */
++	zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
++	    PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION");
++	zprop_register_number(ZPOOL_PROP_DEDUPDITTO, "dedupditto", 0,
++	    PROP_DEFAULT, ZFS_TYPE_POOL, "<threshold (min 100)>", "DEDUPDITTO");
++
++	/* default index (boolean) properties */
++	zprop_register_index(ZPOOL_PROP_DELEGATION, "delegation", 1,
++	    PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "DELEGATION",
++	    boolean_table);
++	zprop_register_index(ZPOOL_PROP_AUTOREPLACE, "autoreplace", 0,
++	    PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "REPLACE", boolean_table);
++	zprop_register_index(ZPOOL_PROP_LISTSNAPS, "listsnapshots", 0,
++	    PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "LISTSNAPS",
++	    boolean_table);
++	zprop_register_index(ZPOOL_PROP_AUTOEXPAND, "autoexpand", 0,
++	    PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table);
++	zprop_register_index(ZPOOL_PROP_READONLY, "readonly", 0,
++	    PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "RDONLY", boolean_table);
++
++	/* default index properties */
++	zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode",
++	    ZIO_FAILURE_MODE_WAIT, PROP_DEFAULT, ZFS_TYPE_POOL,
++	    "wait | continue | panic", "FAILMODE", failuremode_table);
++
++	/* hidden properties */
++	zprop_register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING,
++	    PROP_READONLY, ZFS_TYPE_POOL, "NAME");
++}
++
++/*
++ * Given a property name and its type, returns the corresponding property ID.
++ */
++zpool_prop_t
++zpool_name_to_prop(const char *propname)
++{
++	return (zprop_name_to_prop(propname, ZFS_TYPE_POOL));
++}
++
++/*
++ * Given a pool property ID, returns the corresponding name.
++ * Assuming the pool propety ID is valid.
++ */
++const char *
++zpool_prop_to_name(zpool_prop_t prop)
++{
++	return (zpool_prop_table[prop].pd_name);
++}
++
++zprop_type_t
++zpool_prop_get_type(zpool_prop_t prop)
++{
++	return (zpool_prop_table[prop].pd_proptype);
++}
++
++boolean_t
++zpool_prop_readonly(zpool_prop_t prop)
++{
++	return (zpool_prop_table[prop].pd_attr == PROP_READONLY);
++}
++
++const char *
++zpool_prop_default_string(zpool_prop_t prop)
++{
++	return (zpool_prop_table[prop].pd_strdefault);
++}
++
++uint64_t
++zpool_prop_default_numeric(zpool_prop_t prop)
++{
++	return (zpool_prop_table[prop].pd_numdefault);
++}
++
++int
++zpool_prop_string_to_index(zpool_prop_t prop, const char *string,
++    uint64_t *index)
++{
++	return (zprop_string_to_index(prop, string, index, ZFS_TYPE_POOL));
++}
++
++int
++zpool_prop_index_to_string(zpool_prop_t prop, uint64_t index,
++    const char **string)
++{
++	return (zprop_index_to_string(prop, index, string, ZFS_TYPE_POOL));
++}
++
++uint64_t
++zpool_prop_random_value(zpool_prop_t prop, uint64_t seed)
++{
++	return (zprop_random_value(prop, seed, ZFS_TYPE_POOL));
++}
++
++#ifndef _KERNEL
++
++const char *
++zpool_prop_values(zpool_prop_t prop)
++{
++	return (zpool_prop_table[prop].pd_values);
++}
++
++const char *
++zpool_prop_column_name(zpool_prop_t prop)
++{
++	return (zpool_prop_table[prop].pd_colname);
++}
++
++boolean_t
++zpool_prop_align_right(zpool_prop_t prop)
++{
++	return (zpool_prop_table[prop].pd_rightalign);
++}
++#endif
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++/* zpool property functions */
++EXPORT_SYMBOL(zpool_prop_init);
++EXPORT_SYMBOL(zpool_prop_get_type);
++EXPORT_SYMBOL(zpool_prop_get_table);
++
++/* Pool property functions shared between libzfs and kernel. */
++EXPORT_SYMBOL(zpool_name_to_prop);
++EXPORT_SYMBOL(zpool_prop_to_name);
++EXPORT_SYMBOL(zpool_prop_default_string);
++EXPORT_SYMBOL(zpool_prop_default_numeric);
++EXPORT_SYMBOL(zpool_prop_readonly);
++EXPORT_SYMBOL(zpool_prop_index_to_string);
++EXPORT_SYMBOL(zpool_prop_string_to_index);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zcommon/zprop_common.c linux-3.2.33-go/fs/zfs/zcommon/zprop_common.c
+--- linux-3.2.33-go.orig/fs/zfs/zcommon/zprop_common.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zcommon/zprop_common.c	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,444 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++/*
++ * Common routines used by zfs and zpool property management.
++ */
++
++#include <sys/zio.h>
++#include <sys/spa.h>
++#include <sys/zfs_acl.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/zfs_znode.h>
++#include <sys/fs/zfs.h>
++
++#include "zfs_prop.h"
++#include "zfs_deleg.h"
++
++#if defined(_KERNEL)
++#include <sys/systm.h>
++#include <util/qsort.h>
++#else
++#include <stdlib.h>
++#include <string.h>
++#include <ctype.h>
++#endif
++
++static zprop_desc_t *
++zprop_get_proptable(zfs_type_t type)
++{
++	if (type == ZFS_TYPE_POOL)
++		return (zpool_prop_get_table());
++	else
++		return (zfs_prop_get_table());
++}
++
++static int
++zprop_get_numprops(zfs_type_t type)
++{
++	if (type == ZFS_TYPE_POOL)
++		return (ZPOOL_NUM_PROPS);
++	else
++		return (ZFS_NUM_PROPS);
++}
++
++void
++zprop_register_impl(int prop, const char *name, zprop_type_t type,
++    uint64_t numdefault, const char *strdefault, zprop_attr_t attr,
++    int objset_types, const char *values, const char *colname,
++    boolean_t rightalign, boolean_t visible, const zprop_index_t *idx_tbl)
++{
++	zprop_desc_t *prop_tbl = zprop_get_proptable(objset_types);
++	zprop_desc_t *pd;
++
++	pd = &prop_tbl[prop];
++
++	ASSERT(pd->pd_name == NULL || pd->pd_name == name);
++	ASSERT(name != NULL);
++	ASSERT(colname != NULL);
++
++	pd->pd_name = name;
++	pd->pd_propnum = prop;
++	pd->pd_proptype = type;
++	pd->pd_numdefault = numdefault;
++	pd->pd_strdefault = strdefault;
++	pd->pd_attr = attr;
++	pd->pd_types = objset_types;
++	pd->pd_values = values;
++	pd->pd_colname = colname;
++	pd->pd_rightalign = rightalign;
++	pd->pd_visible = visible;
++	pd->pd_table = idx_tbl;
++	pd->pd_table_size = 0;
++	while (idx_tbl && (idx_tbl++)->pi_name != NULL)
++		pd->pd_table_size++;
++}
++
++void
++zprop_register_string(int prop, const char *name, const char *def,
++    zprop_attr_t attr, int objset_types, const char *values,
++    const char *colname)
++{
++	zprop_register_impl(prop, name, PROP_TYPE_STRING, 0, def, attr,
++	    objset_types, values, colname, B_FALSE, B_TRUE, NULL);
++
++}
++
++void
++zprop_register_number(int prop, const char *name, uint64_t def,
++    zprop_attr_t attr, int objset_types, const char *values,
++    const char *colname)
++{
++	zprop_register_impl(prop, name, PROP_TYPE_NUMBER, def, NULL, attr,
++	    objset_types, values, colname, B_TRUE, B_TRUE, NULL);
++}
++
++void
++zprop_register_index(int prop, const char *name, uint64_t def,
++    zprop_attr_t attr, int objset_types, const char *values,
++    const char *colname, const zprop_index_t *idx_tbl)
++{
++	zprop_register_impl(prop, name, PROP_TYPE_INDEX, def, NULL, attr,
++	    objset_types, values, colname, B_TRUE, B_TRUE, idx_tbl);
++}
++
++void
++zprop_register_hidden(int prop, const char *name, zprop_type_t type,
++    zprop_attr_t attr, int objset_types, const char *colname)
++{
++	zprop_register_impl(prop, name, type, 0, NULL, attr,
++	    objset_types, NULL, colname, B_FALSE, B_FALSE, NULL);
++}
++
++
++/*
++ * A comparison function we can use to order indexes into property tables.
++ */
++static int
++zprop_compare(const void *arg1, const void *arg2)
++{
++	const zprop_desc_t *p1 = *((zprop_desc_t **)arg1);
++	const zprop_desc_t *p2 = *((zprop_desc_t **)arg2);
++	boolean_t p1ro, p2ro;
++
++	p1ro = (p1->pd_attr == PROP_READONLY);
++	p2ro = (p2->pd_attr == PROP_READONLY);
++
++	if (p1ro == p2ro)
++		return (strcmp(p1->pd_name, p2->pd_name));
++
++	return (p1ro ? -1 : 1);
++}
++
++/*
++ * Iterate over all properties in the given property table, calling back
++ * into the specified function for each property. We will continue to
++ * iterate until we either reach the end or the callback function returns
++ * something other than ZPROP_CONT.
++ */
++int
++zprop_iter_common(zprop_func func, void *cb, boolean_t show_all,
++    boolean_t ordered, zfs_type_t type)
++{
++	int i, j, num_props, size, prop;
++	zprop_desc_t *prop_tbl;
++	zprop_desc_t **order;
++
++	prop_tbl = zprop_get_proptable(type);
++	num_props = zprop_get_numprops(type);
++	size = num_props * sizeof (zprop_desc_t *);
++
++#if defined(_KERNEL)
++	order = kmem_alloc(size, KM_PUSHPAGE);
++#else
++	if ((order = malloc(size)) == NULL)
++		return (ZPROP_CONT);
++#endif
++
++	for (j = 0; j < num_props; j++)
++		order[j] = &prop_tbl[j];
++
++	if (ordered) {
++		qsort((void *)order, num_props, sizeof (zprop_desc_t *),
++		    zprop_compare);
++	}
++
++	prop = ZPROP_CONT;
++	for (i = 0; i < num_props; i++) {
++		if ((order[i]->pd_visible || show_all) &&
++		    (func(order[i]->pd_propnum, cb) != ZPROP_CONT)) {
++			prop = order[i]->pd_propnum;
++			break;
++		}
++	}
++
++#if defined(_KERNEL)
++	kmem_free(order, size);
++#else
++	free(order);
++#endif
++	return (prop);
++}
++
++static boolean_t
++propname_match(const char *p, size_t len, zprop_desc_t *prop_entry)
++{
++	const char *propname = prop_entry->pd_name;
++#ifndef _KERNEL
++	const char *colname = prop_entry->pd_colname;
++	int c;
++#endif
++
++	if (len == strlen(propname) &&
++	    strncmp(p, propname, len) == 0)
++		return (B_TRUE);
++
++#ifndef _KERNEL
++	if (colname == NULL || len != strlen(colname))
++		return (B_FALSE);
++
++	for (c = 0; c < len; c++)
++		if (p[c] != tolower(colname[c]))
++			break;
++
++	return (colname[c] == '\0');
++#else
++	return (B_FALSE);
++#endif
++}
++
++typedef struct name_to_prop_cb {
++	const char *propname;
++	zprop_desc_t *prop_tbl;
++} name_to_prop_cb_t;
++
++static int
++zprop_name_to_prop_cb(int prop, void *cb_data)
++{
++	name_to_prop_cb_t *data = cb_data;
++
++	if (propname_match(data->propname, strlen(data->propname),
++	    &data->prop_tbl[prop]))
++		return (prop);
++
++	return (ZPROP_CONT);
++}
++
++int
++zprop_name_to_prop(const char *propname, zfs_type_t type)
++{
++	int prop;
++	name_to_prop_cb_t cb_data;
++
++	cb_data.propname = propname;
++	cb_data.prop_tbl = zprop_get_proptable(type);
++
++	prop = zprop_iter_common(zprop_name_to_prop_cb, &cb_data,
++	    B_TRUE, B_FALSE, type);
++
++	return (prop == ZPROP_CONT ? ZPROP_INVAL : prop);
++}
++
++int
++zprop_string_to_index(int prop, const char *string, uint64_t *index,
++    zfs_type_t type)
++{
++	zprop_desc_t *prop_tbl;
++	const zprop_index_t *idx_tbl;
++	int i;
++
++	if (prop == ZPROP_INVAL || prop == ZPROP_CONT)
++		return (-1);
++
++	ASSERT(prop < zprop_get_numprops(type));
++	prop_tbl = zprop_get_proptable(type);
++	if ((idx_tbl = prop_tbl[prop].pd_table) == NULL)
++		return (-1);
++
++	for (i = 0; idx_tbl[i].pi_name != NULL; i++) {
++		if (strcmp(string, idx_tbl[i].pi_name) == 0) {
++			*index = idx_tbl[i].pi_value;
++			return (0);
++		}
++	}
++
++	return (-1);
++}
++
++int
++zprop_index_to_string(int prop, uint64_t index, const char **string,
++    zfs_type_t type)
++{
++	zprop_desc_t *prop_tbl;
++	const zprop_index_t *idx_tbl;
++	int i;
++
++	if (prop == ZPROP_INVAL || prop == ZPROP_CONT)
++		return (-1);
++
++	ASSERT(prop < zprop_get_numprops(type));
++	prop_tbl = zprop_get_proptable(type);
++	if ((idx_tbl = prop_tbl[prop].pd_table) == NULL)
++		return (-1);
++
++	for (i = 0; idx_tbl[i].pi_name != NULL; i++) {
++		if (idx_tbl[i].pi_value == index) {
++			*string = idx_tbl[i].pi_name;
++			return (0);
++		}
++	}
++
++	return (-1);
++}
++
++/*
++ * Return a random valid property value.  Used by ztest.
++ */
++uint64_t
++zprop_random_value(int prop, uint64_t seed, zfs_type_t type)
++{
++	zprop_desc_t *prop_tbl;
++	const zprop_index_t *idx_tbl;
++
++	ASSERT((uint_t)prop < zprop_get_numprops(type));
++	prop_tbl = zprop_get_proptable(type);
++	idx_tbl = prop_tbl[prop].pd_table;
++
++	if (idx_tbl == NULL)
++		return (seed);
++
++	return (idx_tbl[seed % prop_tbl[prop].pd_table_size].pi_value);
++}
++
++const char *
++zprop_values(int prop, zfs_type_t type)
++{
++	zprop_desc_t *prop_tbl;
++
++	ASSERT(prop != ZPROP_INVAL && prop != ZPROP_CONT);
++	ASSERT(prop < zprop_get_numprops(type));
++
++	prop_tbl = zprop_get_proptable(type);
++
++	return (prop_tbl[prop].pd_values);
++}
++
++/*
++ * Returns TRUE if the property applies to any of the given dataset types.
++ */
++boolean_t
++zprop_valid_for_type(int prop, zfs_type_t type)
++{
++	zprop_desc_t *prop_tbl;
++
++	if (prop == ZPROP_INVAL || prop == ZPROP_CONT)
++		return (B_FALSE);
++
++	ASSERT(prop < zprop_get_numprops(type));
++	prop_tbl = zprop_get_proptable(type);
++	return ((prop_tbl[prop].pd_types & type) != 0);
++}
++
++#ifndef _KERNEL
++
++/*
++ * Determines the minimum width for the column, and indicates whether it's fixed
++ * or not.  Only string columns are non-fixed.
++ */
++size_t
++zprop_width(int prop, boolean_t *fixed, zfs_type_t type)
++{
++	zprop_desc_t *prop_tbl, *pd;
++	const zprop_index_t *idx;
++	size_t ret;
++	int i;
++
++	ASSERT(prop != ZPROP_INVAL && prop != ZPROP_CONT);
++	ASSERT(prop < zprop_get_numprops(type));
++
++	prop_tbl = zprop_get_proptable(type);
++	pd = &prop_tbl[prop];
++
++	*fixed = B_TRUE;
++
++	/*
++	 * Start with the width of the column name.
++	 */
++	ret = strlen(pd->pd_colname);
++
++	/*
++	 * For fixed-width values, make sure the width is large enough to hold
++	 * any possible value.
++	 */
++	switch (pd->pd_proptype) {
++	case PROP_TYPE_NUMBER:
++		/*
++		 * The maximum length of a human-readable number is 5 characters
++		 * ("20.4M", for example).
++		 */
++		if (ret < 5)
++			ret = 5;
++		/*
++		 * 'creation' is handled specially because it's a number
++		 * internally, but displayed as a date string.
++		 */
++		if (prop == ZFS_PROP_CREATION)
++			*fixed = B_FALSE;
++		break;
++	case PROP_TYPE_INDEX:
++		idx = prop_tbl[prop].pd_table;
++		for (i = 0; idx[i].pi_name != NULL; i++) {
++			if (strlen(idx[i].pi_name) > ret)
++				ret = strlen(idx[i].pi_name);
++		}
++		break;
++
++	case PROP_TYPE_STRING:
++		*fixed = B_FALSE;
++		break;
++	}
++
++	return (ret);
++}
++
++#endif
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++/* Common routines to initialize property tables */
++EXPORT_SYMBOL(zprop_register_impl);
++EXPORT_SYMBOL(zprop_register_string);
++EXPORT_SYMBOL(zprop_register_number);
++EXPORT_SYMBOL(zprop_register_index);
++EXPORT_SYMBOL(zprop_register_hidden);
++
++/* Common routines for zfs and zpool property management */
++EXPORT_SYMBOL(zprop_iter_common);
++EXPORT_SYMBOL(zprop_name_to_prop);
++EXPORT_SYMBOL(zprop_string_to_index);
++EXPORT_SYMBOL(zprop_index_to_string);
++EXPORT_SYMBOL(zprop_random_value);
++EXPORT_SYMBOL(zprop_values);
++EXPORT_SYMBOL(zprop_valid_for_type);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/arc.c linux-3.2.33-go/fs/zfs/zfs/arc.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/arc.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/arc.c	2012-11-16 23:25:34.349039334 +0100
+@@ -0,0 +1,4985 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ */
++
++/*
++ * DVA-based Adjustable Replacement Cache
++ *
++ * While much of the theory of operation used here is
++ * based on the self-tuning, low overhead replacement cache
++ * presented by Megiddo and Modha at FAST 2003, there are some
++ * significant differences:
++ *
++ * 1. The Megiddo and Modha model assumes any page is evictable.
++ * Pages in its cache cannot be "locked" into memory.  This makes
++ * the eviction algorithm simple: evict the last page in the list.
++ * This also make the performance characteristics easy to reason
++ * about.  Our cache is not so simple.  At any given moment, some
++ * subset of the blocks in the cache are un-evictable because we
++ * have handed out a reference to them.  Blocks are only evictable
++ * when there are no external references active.  This makes
++ * eviction far more problematic:  we choose to evict the evictable
++ * blocks that are the "lowest" in the list.
++ *
++ * There are times when it is not possible to evict the requested
++ * space.  In these circumstances we are unable to adjust the cache
++ * size.  To prevent the cache growing unbounded at these times we
++ * implement a "cache throttle" that slows the flow of new data
++ * into the cache until we can make space available.
++ *
++ * 2. The Megiddo and Modha model assumes a fixed cache size.
++ * Pages are evicted when the cache is full and there is a cache
++ * miss.  Our model has a variable sized cache.  It grows with
++ * high use, but also tries to react to memory pressure from the
++ * operating system: decreasing its size when system memory is
++ * tight.
++ *
++ * 3. The Megiddo and Modha model assumes a fixed page size. All
++ * elements of the cache are therefor exactly the same size.  So
++ * when adjusting the cache size following a cache miss, its simply
++ * a matter of choosing a single page to evict.  In our model, we
++ * have variable sized cache blocks (rangeing from 512 bytes to
++ * 128K bytes).  We therefor choose a set of blocks to evict to make
++ * space for a cache miss that approximates as closely as possible
++ * the space used by the new block.
++ *
++ * See also:  "ARC: A Self-Tuning, Low Overhead Replacement Cache"
++ * by N. Megiddo & D. Modha, FAST 2003
++ */
++
++/*
++ * The locking model:
++ *
++ * A new reference to a cache buffer can be obtained in two
++ * ways: 1) via a hash table lookup using the DVA as a key,
++ * or 2) via one of the ARC lists.  The arc_read() interface
++ * uses method 1, while the internal arc algorithms for
++ * adjusting the cache use method 2.  We therefor provide two
++ * types of locks: 1) the hash table lock array, and 2) the
++ * arc list locks.
++ *
++ * Buffers do not have their own mutexs, rather they rely on the
++ * hash table mutexs for the bulk of their protection (i.e. most
++ * fields in the arc_buf_hdr_t are protected by these mutexs).
++ *
++ * buf_hash_find() returns the appropriate mutex (held) when it
++ * locates the requested buffer in the hash table.  It returns
++ * NULL for the mutex if the buffer was not in the table.
++ *
++ * buf_hash_remove() expects the appropriate hash mutex to be
++ * already held before it is invoked.
++ *
++ * Each arc state also has a mutex which is used to protect the
++ * buffer list associated with the state.  When attempting to
++ * obtain a hash table lock while holding an arc list lock you
++ * must use: mutex_tryenter() to avoid deadlock.  Also note that
++ * the active state mutex must be held before the ghost state mutex.
++ *
++ * Arc buffers may have an associated eviction callback function.
++ * This function will be invoked prior to removing the buffer (e.g.
++ * in arc_do_user_evicts()).  Note however that the data associated
++ * with the buffer may be evicted prior to the callback.  The callback
++ * must be made with *no locks held* (to prevent deadlock).  Additionally,
++ * the users of callbacks must ensure that their private data is
++ * protected from simultaneous callbacks from arc_buf_evict()
++ * and arc_do_user_evicts().
++ *
++ * It as also possible to register a callback which is run when the
++ * arc_meta_limit is reached and no buffers can be safely evicted.  In
++ * this case the arc user should drop a reference on some arc buffers so
++ * they can be reclaimed and the arc_meta_limit honored.  For example,
++ * when using the ZPL each dentry holds a references on a znode.  These
++ * dentries must be pruned before the arc buffer holding the znode can
++ * be safely evicted.
++ *
++ * Note that the majority of the performance stats are manipulated
++ * with atomic operations.
++ *
++ * The L2ARC uses the l2arc_buflist_mtx global mutex for the following:
++ *
++ *	- L2ARC buflist creation
++ *	- L2ARC buflist eviction
++ *	- L2ARC write completion, which walks L2ARC buflists
++ *	- ARC header destruction, as it removes from L2ARC buflists
++ *	- ARC header release, as it removes from L2ARC buflists
++ */
++
++#include <sys/spa.h>
++#include <sys/zio.h>
++#include <sys/zfs_context.h>
++#include <sys/arc.h>
++#include <sys/vdev.h>
++#include <sys/vdev_impl.h>
++#ifdef _KERNEL
++#include <sys/vmsystm.h>
++#include <vm/anon.h>
++#include <sys/fs/swapnode.h>
++#include <sys/zpl.h>
++#endif
++#include <sys/callb.h>
++#include <sys/kstat.h>
++#include <sys/dmu_tx.h>
++#include <zfs_fletcher.h>
++
++static kmutex_t		arc_reclaim_thr_lock;
++static kcondvar_t	arc_reclaim_thr_cv;	/* used to signal reclaim thr */
++static uint8_t		arc_thread_exit;
++
++/* number of bytes to prune from caches when at arc_meta_limit is reached */
++uint_t arc_meta_prune = 1048576;
++
++typedef enum arc_reclaim_strategy {
++	ARC_RECLAIM_AGGR,		/* Aggressive reclaim strategy */
++	ARC_RECLAIM_CONS		/* Conservative reclaim strategy */
++} arc_reclaim_strategy_t;
++
++/* number of seconds before growing cache again */
++static int		arc_grow_retry = 5;
++
++/* expiration time for arc_no_grow */
++static clock_t		arc_grow_time = 0;
++
++/* shift of arc_c for calculating both min and max arc_p */
++static int		arc_p_min_shift = 4;
++
++/* log2(fraction of arc to reclaim) */
++static int		arc_shrink_shift = 5;
++
++/*
++ * minimum lifespan of a prefetch block in clock ticks
++ * (initialized in arc_init())
++ */
++static int		arc_min_prefetch_lifespan;
++
++static int arc_dead;
++
++/*
++ * The arc has filled available memory and has now warmed up.
++ */
++static boolean_t arc_warm;
++
++/*
++ * These tunables are for performance analysis.
++ */
++unsigned long zfs_arc_max = 0;
++unsigned long zfs_arc_min = 0;
++unsigned long zfs_arc_meta_limit = 0;
++int zfs_arc_grow_retry = 0;
++int zfs_arc_shrink_shift = 0;
++int zfs_arc_p_min_shift = 0;
++int zfs_arc_meta_prune = 0;
++
++/*
++ * Note that buffers can be in one of 6 states:
++ *	ARC_anon	- anonymous (discussed below)
++ *	ARC_mru		- recently used, currently cached
++ *	ARC_mru_ghost	- recentely used, no longer in cache
++ *	ARC_mfu		- frequently used, currently cached
++ *	ARC_mfu_ghost	- frequently used, no longer in cache
++ *	ARC_l2c_only	- exists in L2ARC but not other states
++ * When there are no active references to the buffer, they are
++ * are linked onto a list in one of these arc states.  These are
++ * the only buffers that can be evicted or deleted.  Within each
++ * state there are multiple lists, one for meta-data and one for
++ * non-meta-data.  Meta-data (indirect blocks, blocks of dnodes,
++ * etc.) is tracked separately so that it can be managed more
++ * explicitly: favored over data, limited explicitly.
++ *
++ * Anonymous buffers are buffers that are not associated with
++ * a DVA.  These are buffers that hold dirty block copies
++ * before they are written to stable storage.  By definition,
++ * they are "ref'd" and are considered part of arc_mru
++ * that cannot be freed.  Generally, they will aquire a DVA
++ * as they are written and migrate onto the arc_mru list.
++ *
++ * The ARC_l2c_only state is for buffers that are in the second
++ * level ARC but no longer in any of the ARC_m* lists.  The second
++ * level ARC itself may also contain buffers that are in any of
++ * the ARC_m* states - meaning that a buffer can exist in two
++ * places.  The reason for the ARC_l2c_only state is to keep the
++ * buffer header in the hash table, so that reads that hit the
++ * second level ARC benefit from these fast lookups.
++ */
++
++typedef struct arc_state {
++	list_t	arcs_list[ARC_BUFC_NUMTYPES];	/* list of evictable buffers */
++	uint64_t arcs_lsize[ARC_BUFC_NUMTYPES];	/* amount of evictable data */
++	uint64_t arcs_size;	/* total amount of data in this state */
++	kmutex_t arcs_mtx;
++} arc_state_t;
++
++/* The 6 states: */
++static arc_state_t ARC_anon;
++static arc_state_t ARC_mru;
++static arc_state_t ARC_mru_ghost;
++static arc_state_t ARC_mfu;
++static arc_state_t ARC_mfu_ghost;
++static arc_state_t ARC_l2c_only;
++
++typedef struct arc_stats {
++	kstat_named_t arcstat_hits;
++	kstat_named_t arcstat_misses;
++	kstat_named_t arcstat_demand_data_hits;
++	kstat_named_t arcstat_demand_data_misses;
++	kstat_named_t arcstat_demand_metadata_hits;
++	kstat_named_t arcstat_demand_metadata_misses;
++	kstat_named_t arcstat_prefetch_data_hits;
++	kstat_named_t arcstat_prefetch_data_misses;
++	kstat_named_t arcstat_prefetch_metadata_hits;
++	kstat_named_t arcstat_prefetch_metadata_misses;
++	kstat_named_t arcstat_mru_hits;
++	kstat_named_t arcstat_mru_ghost_hits;
++	kstat_named_t arcstat_mfu_hits;
++	kstat_named_t arcstat_mfu_ghost_hits;
++	kstat_named_t arcstat_deleted;
++	kstat_named_t arcstat_recycle_miss;
++	kstat_named_t arcstat_mutex_miss;
++	kstat_named_t arcstat_evict_skip;
++	kstat_named_t arcstat_evict_l2_cached;
++	kstat_named_t arcstat_evict_l2_eligible;
++	kstat_named_t arcstat_evict_l2_ineligible;
++	kstat_named_t arcstat_hash_elements;
++	kstat_named_t arcstat_hash_elements_max;
++	kstat_named_t arcstat_hash_collisions;
++	kstat_named_t arcstat_hash_chains;
++	kstat_named_t arcstat_hash_chain_max;
++	kstat_named_t arcstat_p;
++	kstat_named_t arcstat_c;
++	kstat_named_t arcstat_c_min;
++	kstat_named_t arcstat_c_max;
++	kstat_named_t arcstat_size;
++	kstat_named_t arcstat_hdr_size;
++	kstat_named_t arcstat_data_size;
++	kstat_named_t arcstat_other_size;
++	kstat_named_t arcstat_anon_size;
++	kstat_named_t arcstat_anon_evict_data;
++	kstat_named_t arcstat_anon_evict_metadata;
++	kstat_named_t arcstat_mru_size;
++	kstat_named_t arcstat_mru_evict_data;
++	kstat_named_t arcstat_mru_evict_metadata;
++	kstat_named_t arcstat_mru_ghost_size;
++	kstat_named_t arcstat_mru_ghost_evict_data;
++	kstat_named_t arcstat_mru_ghost_evict_metadata;
++	kstat_named_t arcstat_mfu_size;
++	kstat_named_t arcstat_mfu_evict_data;
++	kstat_named_t arcstat_mfu_evict_metadata;
++	kstat_named_t arcstat_mfu_ghost_size;
++	kstat_named_t arcstat_mfu_ghost_evict_data;
++	kstat_named_t arcstat_mfu_ghost_evict_metadata;
++	kstat_named_t arcstat_l2_hits;
++	kstat_named_t arcstat_l2_misses;
++	kstat_named_t arcstat_l2_feeds;
++	kstat_named_t arcstat_l2_rw_clash;
++	kstat_named_t arcstat_l2_read_bytes;
++	kstat_named_t arcstat_l2_write_bytes;
++	kstat_named_t arcstat_l2_writes_sent;
++	kstat_named_t arcstat_l2_writes_done;
++	kstat_named_t arcstat_l2_writes_error;
++	kstat_named_t arcstat_l2_writes_hdr_miss;
++	kstat_named_t arcstat_l2_evict_lock_retry;
++	kstat_named_t arcstat_l2_evict_reading;
++	kstat_named_t arcstat_l2_free_on_write;
++	kstat_named_t arcstat_l2_abort_lowmem;
++	kstat_named_t arcstat_l2_cksum_bad;
++	kstat_named_t arcstat_l2_io_error;
++	kstat_named_t arcstat_l2_size;
++	kstat_named_t arcstat_l2_hdr_size;
++	kstat_named_t arcstat_memory_throttle_count;
++	kstat_named_t arcstat_memory_direct_count;
++	kstat_named_t arcstat_memory_indirect_count;
++	kstat_named_t arcstat_no_grow;
++	kstat_named_t arcstat_tempreserve;
++	kstat_named_t arcstat_loaned_bytes;
++	kstat_named_t arcstat_prune;
++	kstat_named_t arcstat_meta_used;
++	kstat_named_t arcstat_meta_limit;
++	kstat_named_t arcstat_meta_max;
++} arc_stats_t;
++
++static arc_stats_t arc_stats = {
++	{ "hits",			KSTAT_DATA_UINT64 },
++	{ "misses",			KSTAT_DATA_UINT64 },
++	{ "demand_data_hits",		KSTAT_DATA_UINT64 },
++	{ "demand_data_misses",		KSTAT_DATA_UINT64 },
++	{ "demand_metadata_hits",	KSTAT_DATA_UINT64 },
++	{ "demand_metadata_misses",	KSTAT_DATA_UINT64 },
++	{ "prefetch_data_hits",		KSTAT_DATA_UINT64 },
++	{ "prefetch_data_misses",	KSTAT_DATA_UINT64 },
++	{ "prefetch_metadata_hits",	KSTAT_DATA_UINT64 },
++	{ "prefetch_metadata_misses",	KSTAT_DATA_UINT64 },
++	{ "mru_hits",			KSTAT_DATA_UINT64 },
++	{ "mru_ghost_hits",		KSTAT_DATA_UINT64 },
++	{ "mfu_hits",			KSTAT_DATA_UINT64 },
++	{ "mfu_ghost_hits",		KSTAT_DATA_UINT64 },
++	{ "deleted",			KSTAT_DATA_UINT64 },
++	{ "recycle_miss",		KSTAT_DATA_UINT64 },
++	{ "mutex_miss",			KSTAT_DATA_UINT64 },
++	{ "evict_skip",			KSTAT_DATA_UINT64 },
++	{ "evict_l2_cached",		KSTAT_DATA_UINT64 },
++	{ "evict_l2_eligible",		KSTAT_DATA_UINT64 },
++	{ "evict_l2_ineligible",	KSTAT_DATA_UINT64 },
++	{ "hash_elements",		KSTAT_DATA_UINT64 },
++	{ "hash_elements_max",		KSTAT_DATA_UINT64 },
++	{ "hash_collisions",		KSTAT_DATA_UINT64 },
++	{ "hash_chains",		KSTAT_DATA_UINT64 },
++	{ "hash_chain_max",		KSTAT_DATA_UINT64 },
++	{ "p",				KSTAT_DATA_UINT64 },
++	{ "c",				KSTAT_DATA_UINT64 },
++	{ "c_min",			KSTAT_DATA_UINT64 },
++	{ "c_max",			KSTAT_DATA_UINT64 },
++	{ "size",			KSTAT_DATA_UINT64 },
++	{ "hdr_size",			KSTAT_DATA_UINT64 },
++	{ "data_size",			KSTAT_DATA_UINT64 },
++	{ "other_size",			KSTAT_DATA_UINT64 },
++	{ "anon_size",			KSTAT_DATA_UINT64 },
++	{ "anon_evict_data",		KSTAT_DATA_UINT64 },
++	{ "anon_evict_metadata",	KSTAT_DATA_UINT64 },
++	{ "mru_size",			KSTAT_DATA_UINT64 },
++	{ "mru_evict_data",		KSTAT_DATA_UINT64 },
++	{ "mru_evict_metadata",		KSTAT_DATA_UINT64 },
++	{ "mru_ghost_size",		KSTAT_DATA_UINT64 },
++	{ "mru_ghost_evict_data",	KSTAT_DATA_UINT64 },
++	{ "mru_ghost_evict_metadata",	KSTAT_DATA_UINT64 },
++	{ "mfu_size",			KSTAT_DATA_UINT64 },
++	{ "mfu_evict_data",		KSTAT_DATA_UINT64 },
++	{ "mfu_evict_metadata",		KSTAT_DATA_UINT64 },
++	{ "mfu_ghost_size",		KSTAT_DATA_UINT64 },
++	{ "mfu_ghost_evict_data",	KSTAT_DATA_UINT64 },
++	{ "mfu_ghost_evict_metadata",	KSTAT_DATA_UINT64 },
++	{ "l2_hits",			KSTAT_DATA_UINT64 },
++	{ "l2_misses",			KSTAT_DATA_UINT64 },
++	{ "l2_feeds",			KSTAT_DATA_UINT64 },
++	{ "l2_rw_clash",		KSTAT_DATA_UINT64 },
++	{ "l2_read_bytes",		KSTAT_DATA_UINT64 },
++	{ "l2_write_bytes",		KSTAT_DATA_UINT64 },
++	{ "l2_writes_sent",		KSTAT_DATA_UINT64 },
++	{ "l2_writes_done",		KSTAT_DATA_UINT64 },
++	{ "l2_writes_error",		KSTAT_DATA_UINT64 },
++	{ "l2_writes_hdr_miss",		KSTAT_DATA_UINT64 },
++	{ "l2_evict_lock_retry",	KSTAT_DATA_UINT64 },
++	{ "l2_evict_reading",		KSTAT_DATA_UINT64 },
++	{ "l2_free_on_write",		KSTAT_DATA_UINT64 },
++	{ "l2_abort_lowmem",		KSTAT_DATA_UINT64 },
++	{ "l2_cksum_bad",		KSTAT_DATA_UINT64 },
++	{ "l2_io_error",		KSTAT_DATA_UINT64 },
++	{ "l2_size",			KSTAT_DATA_UINT64 },
++	{ "l2_hdr_size",		KSTAT_DATA_UINT64 },
++	{ "memory_throttle_count",	KSTAT_DATA_UINT64 },
++	{ "memory_direct_count",	KSTAT_DATA_UINT64 },
++	{ "memory_indirect_count",	KSTAT_DATA_UINT64 },
++	{ "arc_no_grow",		KSTAT_DATA_UINT64 },
++	{ "arc_tempreserve",		KSTAT_DATA_UINT64 },
++	{ "arc_loaned_bytes",		KSTAT_DATA_UINT64 },
++	{ "arc_prune",			KSTAT_DATA_UINT64 },
++	{ "arc_meta_used",		KSTAT_DATA_UINT64 },
++	{ "arc_meta_limit",		KSTAT_DATA_UINT64 },
++	{ "arc_meta_max",		KSTAT_DATA_UINT64 },
++};
++
++#define	ARCSTAT(stat)	(arc_stats.stat.value.ui64)
++
++#define	ARCSTAT_INCR(stat, val) \
++	atomic_add_64(&arc_stats.stat.value.ui64, (val));
++
++#define	ARCSTAT_BUMP(stat)	ARCSTAT_INCR(stat, 1)
++#define	ARCSTAT_BUMPDOWN(stat)	ARCSTAT_INCR(stat, -1)
++
++#define	ARCSTAT_MAX(stat, val) {					\
++	uint64_t m;							\
++	while ((val) > (m = arc_stats.stat.value.ui64) &&		\
++	    (m != atomic_cas_64(&arc_stats.stat.value.ui64, m, (val))))	\
++		continue;						\
++}
++
++#define	ARCSTAT_MAXSTAT(stat) \
++	ARCSTAT_MAX(stat##_max, arc_stats.stat.value.ui64)
++
++/*
++ * We define a macro to allow ARC hits/misses to be easily broken down by
++ * two separate conditions, giving a total of four different subtypes for
++ * each of hits and misses (so eight statistics total).
++ */
++#define	ARCSTAT_CONDSTAT(cond1, stat1, notstat1, cond2, stat2, notstat2, stat) \
++	if (cond1) {							\
++		if (cond2) {						\
++			ARCSTAT_BUMP(arcstat_##stat1##_##stat2##_##stat); \
++		} else {						\
++			ARCSTAT_BUMP(arcstat_##stat1##_##notstat2##_##stat); \
++		}							\
++	} else {							\
++		if (cond2) {						\
++			ARCSTAT_BUMP(arcstat_##notstat1##_##stat2##_##stat); \
++		} else {						\
++			ARCSTAT_BUMP(arcstat_##notstat1##_##notstat2##_##stat);\
++		}							\
++	}
++
++kstat_t			*arc_ksp;
++static arc_state_t	*arc_anon;
++static arc_state_t	*arc_mru;
++static arc_state_t	*arc_mru_ghost;
++static arc_state_t	*arc_mfu;
++static arc_state_t	*arc_mfu_ghost;
++static arc_state_t	*arc_l2c_only;
++
++/*
++ * There are several ARC variables that are critical to export as kstats --
++ * but we don't want to have to grovel around in the kstat whenever we wish to
++ * manipulate them.  For these variables, we therefore define them to be in
++ * terms of the statistic variable.  This assures that we are not introducing
++ * the possibility of inconsistency by having shadow copies of the variables,
++ * while still allowing the code to be readable.
++ */
++#define	arc_size	ARCSTAT(arcstat_size)	/* actual total arc size */
++#define	arc_p		ARCSTAT(arcstat_p)	/* target size of MRU */
++#define	arc_c		ARCSTAT(arcstat_c)	/* target size of cache */
++#define	arc_c_min	ARCSTAT(arcstat_c_min)	/* min target cache size */
++#define	arc_c_max	ARCSTAT(arcstat_c_max)	/* max target cache size */
++#define	arc_no_grow	ARCSTAT(arcstat_no_grow)
++#define	arc_tempreserve	ARCSTAT(arcstat_tempreserve)
++#define	arc_loaned_bytes	ARCSTAT(arcstat_loaned_bytes)
++#define	arc_meta_used	ARCSTAT(arcstat_meta_used)
++#define	arc_meta_limit	ARCSTAT(arcstat_meta_limit)
++#define	arc_meta_max	ARCSTAT(arcstat_meta_max)
++
++typedef struct l2arc_buf_hdr l2arc_buf_hdr_t;
++
++typedef struct arc_callback arc_callback_t;
++
++struct arc_callback {
++	void			*acb_private;
++	arc_done_func_t		*acb_done;
++	arc_buf_t		*acb_buf;
++	zio_t			*acb_zio_dummy;
++	arc_callback_t		*acb_next;
++};
++
++typedef struct arc_write_callback arc_write_callback_t;
++
++struct arc_write_callback {
++	void		*awcb_private;
++	arc_done_func_t	*awcb_ready;
++	arc_done_func_t	*awcb_done;
++	arc_buf_t	*awcb_buf;
++};
++
++struct arc_buf_hdr {
++	/* protected by hash lock */
++	dva_t			b_dva;
++	uint64_t		b_birth;
++	uint64_t		b_cksum0;
++
++	kmutex_t		b_freeze_lock;
++	zio_cksum_t		*b_freeze_cksum;
++	void			*b_thawed;
++
++	arc_buf_hdr_t		*b_hash_next;
++	arc_buf_t		*b_buf;
++	uint32_t		b_flags;
++	uint32_t		b_datacnt;
++
++	arc_callback_t		*b_acb;
++	kcondvar_t		b_cv;
++
++	/* immutable */
++	arc_buf_contents_t	b_type;
++	uint64_t		b_size;
++	uint64_t		b_spa;
++
++	/* protected by arc state mutex */
++	arc_state_t		*b_state;
++	list_node_t		b_arc_node;
++
++	/* updated atomically */
++	clock_t			b_arc_access;
++
++	/* self protecting */
++	refcount_t		b_refcnt;
++
++	l2arc_buf_hdr_t		*b_l2hdr;
++	list_node_t		b_l2node;
++};
++
++static list_t arc_prune_list;
++static kmutex_t arc_prune_mtx;
++static arc_buf_t *arc_eviction_list;
++static kmutex_t arc_eviction_mtx;
++static arc_buf_hdr_t arc_eviction_hdr;
++static void arc_get_data_buf(arc_buf_t *buf);
++static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock);
++static int arc_evict_needed(arc_buf_contents_t type);
++static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes);
++
++static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab);
++
++#define	GHOST_STATE(state)	\
++	((state) == arc_mru_ghost || (state) == arc_mfu_ghost ||	\
++	(state) == arc_l2c_only)
++
++/*
++ * Private ARC flags.  These flags are private ARC only flags that will show up
++ * in b_flags in the arc_hdr_buf_t.  Some flags are publicly declared, and can
++ * be passed in as arc_flags in things like arc_read.  However, these flags
++ * should never be passed and should only be set by ARC code.  When adding new
++ * public flags, make sure not to smash the private ones.
++ */
++
++#define	ARC_IN_HASH_TABLE	(1 << 9)	/* this buffer is hashed */
++#define	ARC_IO_IN_PROGRESS	(1 << 10)	/* I/O in progress for buf */
++#define	ARC_IO_ERROR		(1 << 11)	/* I/O failed for buf */
++#define	ARC_FREED_IN_READ	(1 << 12)	/* buf freed while in read */
++#define	ARC_BUF_AVAILABLE	(1 << 13)	/* block not in active use */
++#define	ARC_INDIRECT		(1 << 14)	/* this is an indirect block */
++#define	ARC_FREE_IN_PROGRESS	(1 << 15)	/* hdr about to be freed */
++#define	ARC_L2_WRITING		(1 << 16)	/* L2ARC write in progress */
++#define	ARC_L2_EVICTED		(1 << 17)	/* evicted during I/O */
++#define	ARC_L2_WRITE_HEAD	(1 << 18)	/* head of write list */
++
++#define	HDR_IN_HASH_TABLE(hdr)	((hdr)->b_flags & ARC_IN_HASH_TABLE)
++#define	HDR_IO_IN_PROGRESS(hdr)	((hdr)->b_flags & ARC_IO_IN_PROGRESS)
++#define	HDR_IO_ERROR(hdr)	((hdr)->b_flags & ARC_IO_ERROR)
++#define	HDR_PREFETCH(hdr)	((hdr)->b_flags & ARC_PREFETCH)
++#define	HDR_FREED_IN_READ(hdr)	((hdr)->b_flags & ARC_FREED_IN_READ)
++#define	HDR_BUF_AVAILABLE(hdr)	((hdr)->b_flags & ARC_BUF_AVAILABLE)
++#define	HDR_FREE_IN_PROGRESS(hdr)	((hdr)->b_flags & ARC_FREE_IN_PROGRESS)
++#define	HDR_L2CACHE(hdr)	((hdr)->b_flags & ARC_L2CACHE)
++#define	HDR_L2_READING(hdr)	((hdr)->b_flags & ARC_IO_IN_PROGRESS &&	\
++				    (hdr)->b_l2hdr != NULL)
++#define	HDR_L2_WRITING(hdr)	((hdr)->b_flags & ARC_L2_WRITING)
++#define	HDR_L2_EVICTED(hdr)	((hdr)->b_flags & ARC_L2_EVICTED)
++#define	HDR_L2_WRITE_HEAD(hdr)	((hdr)->b_flags & ARC_L2_WRITE_HEAD)
++
++/*
++ * Other sizes
++ */
++
++#define	HDR_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
++#define	L2HDR_SIZE ((int64_t)sizeof (l2arc_buf_hdr_t))
++
++/*
++ * Hash table routines
++ */
++
++#define	HT_LOCK_ALIGN	64
++#define	HT_LOCK_PAD	(P2NPHASE(sizeof (kmutex_t), (HT_LOCK_ALIGN)))
++
++struct ht_lock {
++	kmutex_t	ht_lock;
++#ifdef _KERNEL
++	unsigned char	pad[HT_LOCK_PAD];
++#endif
++};
++
++#define	BUF_LOCKS 256
++typedef struct buf_hash_table {
++	uint64_t ht_mask;
++	arc_buf_hdr_t **ht_table;
++	struct ht_lock ht_locks[BUF_LOCKS];
++} buf_hash_table_t;
++
++static buf_hash_table_t buf_hash_table;
++
++#define	BUF_HASH_INDEX(spa, dva, birth) \
++	(buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
++#define	BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
++#define	BUF_HASH_LOCK(idx)	(&(BUF_HASH_LOCK_NTRY(idx).ht_lock))
++#define	HDR_LOCK(hdr) \
++	(BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))
++
++uint64_t zfs_crc64_table[256];
++
++/*
++ * Level 2 ARC
++ */
++
++#define	L2ARC_WRITE_SIZE	(8 * 1024 * 1024)	/* initial write max */
++#define	L2ARC_HEADROOM		2		/* num of writes */
++#define	L2ARC_FEED_SECS		1		/* caching interval secs */
++#define	L2ARC_FEED_MIN_MS	200		/* min caching interval ms */
++
++#define	l2arc_writes_sent	ARCSTAT(arcstat_l2_writes_sent)
++#define	l2arc_writes_done	ARCSTAT(arcstat_l2_writes_done)
++
++/*
++ * L2ARC Performance Tunables
++ */
++unsigned long l2arc_write_max = L2ARC_WRITE_SIZE;	/* def max write size */
++unsigned long l2arc_write_boost = L2ARC_WRITE_SIZE;	/* extra warmup write */
++unsigned long l2arc_headroom = L2ARC_HEADROOM;		/* # of dev writes */
++unsigned long l2arc_feed_secs = L2ARC_FEED_SECS;	/* interval seconds */
++unsigned long l2arc_feed_min_ms = L2ARC_FEED_MIN_MS;	/* min interval msecs */
++int l2arc_noprefetch = B_TRUE;			/* don't cache prefetch bufs */
++int l2arc_feed_again = B_TRUE;			/* turbo warmup */
++int l2arc_norw = B_TRUE;			/* no reads during writes */
++
++/*
++ * L2ARC Internals
++ */
++typedef struct l2arc_dev {
++	vdev_t			*l2ad_vdev;	/* vdev */
++	spa_t			*l2ad_spa;	/* spa */
++	uint64_t		l2ad_hand;	/* next write location */
++	uint64_t		l2ad_write;	/* desired write size, bytes */
++	uint64_t		l2ad_boost;	/* warmup write boost, bytes */
++	uint64_t		l2ad_start;	/* first addr on device */
++	uint64_t		l2ad_end;	/* last addr on device */
++	uint64_t		l2ad_evict;	/* last addr eviction reached */
++	boolean_t		l2ad_first;	/* first sweep through */
++	boolean_t		l2ad_writing;	/* currently writing */
++	list_t			*l2ad_buflist;	/* buffer list */
++	list_node_t		l2ad_node;	/* device list node */
++} l2arc_dev_t;
++
++static list_t L2ARC_dev_list;			/* device list */
++static list_t *l2arc_dev_list;			/* device list pointer */
++static kmutex_t l2arc_dev_mtx;			/* device list mutex */
++static l2arc_dev_t *l2arc_dev_last;		/* last device used */
++static kmutex_t l2arc_buflist_mtx;		/* mutex for all buflists */
++static list_t L2ARC_free_on_write;		/* free after write buf list */
++static list_t *l2arc_free_on_write;		/* free after write list ptr */
++static kmutex_t l2arc_free_on_write_mtx;	/* mutex for list */
++static uint64_t l2arc_ndev;			/* number of devices */
++
++typedef struct l2arc_read_callback {
++	arc_buf_t	*l2rcb_buf;		/* read buffer */
++	spa_t		*l2rcb_spa;		/* spa */
++	blkptr_t	l2rcb_bp;		/* original blkptr */
++	zbookmark_t	l2rcb_zb;		/* original bookmark */
++	int		l2rcb_flags;		/* original flags */
++} l2arc_read_callback_t;
++
++typedef struct l2arc_write_callback {
++	l2arc_dev_t	*l2wcb_dev;		/* device info */
++	arc_buf_hdr_t	*l2wcb_head;		/* head of write buflist */
++} l2arc_write_callback_t;
++
++struct l2arc_buf_hdr {
++	/* protected by arc_buf_hdr  mutex */
++	l2arc_dev_t	*b_dev;			/* L2ARC device */
++	uint64_t	b_daddr;		/* disk address, offset byte */
++};
++
++typedef struct l2arc_data_free {
++	/* protected by l2arc_free_on_write_mtx */
++	void		*l2df_data;
++	size_t		l2df_size;
++	void		(*l2df_func)(void *, size_t);
++	list_node_t	l2df_list_node;
++} l2arc_data_free_t;
++
++static kmutex_t l2arc_feed_thr_lock;
++static kcondvar_t l2arc_feed_thr_cv;
++static uint8_t l2arc_thread_exit;
++
++static void l2arc_read_done(zio_t *zio);
++static void l2arc_hdr_stat_add(void);
++static void l2arc_hdr_stat_remove(void);
++
++static uint64_t
++buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth)
++{
++	uint8_t *vdva = (uint8_t *)dva;
++	uint64_t crc = -1ULL;
++	int i;
++
++	ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
++
++	for (i = 0; i < sizeof (dva_t); i++)
++		crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ vdva[i]) & 0xFF];
++
++	crc ^= (spa>>8) ^ birth;
++
++	return (crc);
++}
++
++#define	BUF_EMPTY(buf)						\
++	((buf)->b_dva.dva_word[0] == 0 &&			\
++	(buf)->b_dva.dva_word[1] == 0 &&			\
++	(buf)->b_birth == 0)
++
++#define	BUF_EQUAL(spa, dva, birth, buf)				\
++	((buf)->b_dva.dva_word[0] == (dva)->dva_word[0]) &&	\
++	((buf)->b_dva.dva_word[1] == (dva)->dva_word[1]) &&	\
++	((buf)->b_birth == birth) && ((buf)->b_spa == spa)
++
++static void
++buf_discard_identity(arc_buf_hdr_t *hdr)
++{
++	hdr->b_dva.dva_word[0] = 0;
++	hdr->b_dva.dva_word[1] = 0;
++	hdr->b_birth = 0;
++	hdr->b_cksum0 = 0;
++}
++
++static arc_buf_hdr_t *
++buf_hash_find(uint64_t spa, const dva_t *dva, uint64_t birth, kmutex_t **lockp)
++{
++	uint64_t idx = BUF_HASH_INDEX(spa, dva, birth);
++	kmutex_t *hash_lock = BUF_HASH_LOCK(idx);
++	arc_buf_hdr_t *buf;
++
++	mutex_enter(hash_lock);
++	for (buf = buf_hash_table.ht_table[idx]; buf != NULL;
++	    buf = buf->b_hash_next) {
++		if (BUF_EQUAL(spa, dva, birth, buf)) {
++			*lockp = hash_lock;
++			return (buf);
++		}
++	}
++	mutex_exit(hash_lock);
++	*lockp = NULL;
++	return (NULL);
++}
++
++/*
++ * Insert an entry into the hash table.  If there is already an element
++ * equal to elem in the hash table, then the already existing element
++ * will be returned and the new element will not be inserted.
++ * Otherwise returns NULL.
++ */
++static arc_buf_hdr_t *
++buf_hash_insert(arc_buf_hdr_t *buf, kmutex_t **lockp)
++{
++	uint64_t idx = BUF_HASH_INDEX(buf->b_spa, &buf->b_dva, buf->b_birth);
++	kmutex_t *hash_lock = BUF_HASH_LOCK(idx);
++	arc_buf_hdr_t *fbuf;
++	uint32_t i;
++
++	ASSERT(!HDR_IN_HASH_TABLE(buf));
++	*lockp = hash_lock;
++	mutex_enter(hash_lock);
++	for (fbuf = buf_hash_table.ht_table[idx], i = 0; fbuf != NULL;
++	    fbuf = fbuf->b_hash_next, i++) {
++		if (BUF_EQUAL(buf->b_spa, &buf->b_dva, buf->b_birth, fbuf))
++			return (fbuf);
++	}
++
++	buf->b_hash_next = buf_hash_table.ht_table[idx];
++	buf_hash_table.ht_table[idx] = buf;
++	buf->b_flags |= ARC_IN_HASH_TABLE;
++
++	/* collect some hash table performance data */
++	if (i > 0) {
++		ARCSTAT_BUMP(arcstat_hash_collisions);
++		if (i == 1)
++			ARCSTAT_BUMP(arcstat_hash_chains);
++
++		ARCSTAT_MAX(arcstat_hash_chain_max, i);
++	}
++
++	ARCSTAT_BUMP(arcstat_hash_elements);
++	ARCSTAT_MAXSTAT(arcstat_hash_elements);
++
++	return (NULL);
++}
++
++static void
++buf_hash_remove(arc_buf_hdr_t *buf)
++{
++	arc_buf_hdr_t *fbuf, **bufp;
++	uint64_t idx = BUF_HASH_INDEX(buf->b_spa, &buf->b_dva, buf->b_birth);
++
++	ASSERT(MUTEX_HELD(BUF_HASH_LOCK(idx)));
++	ASSERT(HDR_IN_HASH_TABLE(buf));
++
++	bufp = &buf_hash_table.ht_table[idx];
++	while ((fbuf = *bufp) != buf) {
++		ASSERT(fbuf != NULL);
++		bufp = &fbuf->b_hash_next;
++	}
++	*bufp = buf->b_hash_next;
++	buf->b_hash_next = NULL;
++	buf->b_flags &= ~ARC_IN_HASH_TABLE;
++
++	/* collect some hash table performance data */
++	ARCSTAT_BUMPDOWN(arcstat_hash_elements);
++
++	if (buf_hash_table.ht_table[idx] &&
++	    buf_hash_table.ht_table[idx]->b_hash_next == NULL)
++		ARCSTAT_BUMPDOWN(arcstat_hash_chains);
++}
++
++/*
++ * Global data structures and functions for the buf kmem cache.
++ */
++static kmem_cache_t *hdr_cache;
++static kmem_cache_t *buf_cache;
++
++static void
++buf_fini(void)
++{
++	int i;
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++	/* Large allocations which do not require contiguous pages
++	 * should be using vmem_free() in the linux kernel */
++	vmem_free(buf_hash_table.ht_table,
++	    (buf_hash_table.ht_mask + 1) * sizeof (void *));
++#else
++	kmem_free(buf_hash_table.ht_table,
++	    (buf_hash_table.ht_mask + 1) * sizeof (void *));
++#endif
++	for (i = 0; i < BUF_LOCKS; i++)
++		mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
++	kmem_cache_destroy(hdr_cache);
++	kmem_cache_destroy(buf_cache);
++}
++
++/*
++ * Constructor callback - called when the cache is empty
++ * and a new buf is requested.
++ */
++/* ARGSUSED */
++static int
++hdr_cons(void *vbuf, void *unused, int kmflag)
++{
++	arc_buf_hdr_t *buf = vbuf;
++
++	bzero(buf, sizeof (arc_buf_hdr_t));
++	refcount_create(&buf->b_refcnt);
++	cv_init(&buf->b_cv, NULL, CV_DEFAULT, NULL);
++	mutex_init(&buf->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
++	list_link_init(&buf->b_arc_node);
++	list_link_init(&buf->b_l2node);
++	arc_space_consume(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS);
++
++	return (0);
++}
++
++/* ARGSUSED */
++static int
++buf_cons(void *vbuf, void *unused, int kmflag)
++{
++	arc_buf_t *buf = vbuf;
++
++	bzero(buf, sizeof (arc_buf_t));
++	mutex_init(&buf->b_evict_lock, NULL, MUTEX_DEFAULT, NULL);
++	rw_init(&buf->b_data_lock, NULL, RW_DEFAULT, NULL);
++	arc_space_consume(sizeof (arc_buf_t), ARC_SPACE_HDRS);
++
++	return (0);
++}
++
++/*
++ * Destructor callback - called when a cached buf is
++ * no longer required.
++ */
++/* ARGSUSED */
++static void
++hdr_dest(void *vbuf, void *unused)
++{
++	arc_buf_hdr_t *buf = vbuf;
++
++	ASSERT(BUF_EMPTY(buf));
++	refcount_destroy(&buf->b_refcnt);
++	cv_destroy(&buf->b_cv);
++	mutex_destroy(&buf->b_freeze_lock);
++	arc_space_return(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS);
++}
++
++/* ARGSUSED */
++static void
++buf_dest(void *vbuf, void *unused)
++{
++	arc_buf_t *buf = vbuf;
++
++	mutex_destroy(&buf->b_evict_lock);
++	rw_destroy(&buf->b_data_lock);
++	arc_space_return(sizeof (arc_buf_t), ARC_SPACE_HDRS);
++}
++
++static void
++buf_init(void)
++{
++	uint64_t *ct;
++	uint64_t hsize = 1ULL << 12;
++	int i, j;
++
++	/*
++	 * The hash table is big enough to fill all of physical memory
++	 * with an average 64K block size.  The table will take up
++	 * totalmem*sizeof(void*)/64K (eg. 128KB/GB with 8-byte pointers).
++	 */
++	while (hsize * 65536 < physmem * PAGESIZE)
++		hsize <<= 1;
++retry:
++	buf_hash_table.ht_mask = hsize - 1;
++#if defined(_KERNEL) && defined(HAVE_SPL)
++	/* Large allocations which do not require contiguous pages
++	 * should be using vmem_alloc() in the linux kernel */
++	buf_hash_table.ht_table =
++	    vmem_zalloc(hsize * sizeof (void*), KM_SLEEP);
++#else
++	buf_hash_table.ht_table =
++	    kmem_zalloc(hsize * sizeof (void*), KM_NOSLEEP);
++#endif
++	if (buf_hash_table.ht_table == NULL) {
++		ASSERT(hsize > (1ULL << 8));
++		hsize >>= 1;
++		goto retry;
++	}
++
++	hdr_cache = kmem_cache_create("arc_buf_hdr_t", sizeof (arc_buf_hdr_t),
++	    0, hdr_cons, hdr_dest, NULL, NULL, NULL, 0);
++	buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t),
++	    0, buf_cons, buf_dest, NULL, NULL, NULL, 0);
++
++	for (i = 0; i < 256; i++)
++		for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--)
++			*ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
++
++	for (i = 0; i < BUF_LOCKS; i++) {
++		mutex_init(&buf_hash_table.ht_locks[i].ht_lock,
++		    NULL, MUTEX_DEFAULT, NULL);
++	}
++}
++
++#define	ARC_MINTIME	(hz>>4) /* 62 ms */
++
++static void
++arc_cksum_verify(arc_buf_t *buf)
++{
++	zio_cksum_t zc;
++
++	if (!(zfs_flags & ZFS_DEBUG_MODIFY))
++		return;
++
++	mutex_enter(&buf->b_hdr->b_freeze_lock);
++	if (buf->b_hdr->b_freeze_cksum == NULL ||
++	    (buf->b_hdr->b_flags & ARC_IO_ERROR)) {
++		mutex_exit(&buf->b_hdr->b_freeze_lock);
++		return;
++	}
++	fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
++	if (!ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc))
++		panic("buffer modified while frozen!");
++	mutex_exit(&buf->b_hdr->b_freeze_lock);
++}
++
++static int
++arc_cksum_equal(arc_buf_t *buf)
++{
++	zio_cksum_t zc;
++	int equal;
++
++	mutex_enter(&buf->b_hdr->b_freeze_lock);
++	fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
++	equal = ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc);
++	mutex_exit(&buf->b_hdr->b_freeze_lock);
++
++	return (equal);
++}
++
++static void
++arc_cksum_compute(arc_buf_t *buf, boolean_t force)
++{
++	if (!force && !(zfs_flags & ZFS_DEBUG_MODIFY))
++		return;
++
++	mutex_enter(&buf->b_hdr->b_freeze_lock);
++	if (buf->b_hdr->b_freeze_cksum != NULL) {
++		mutex_exit(&buf->b_hdr->b_freeze_lock);
++		return;
++	}
++	buf->b_hdr->b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t),
++	                                        KM_PUSHPAGE);
++	fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
++	    buf->b_hdr->b_freeze_cksum);
++	mutex_exit(&buf->b_hdr->b_freeze_lock);
++}
++
++void
++arc_buf_thaw(arc_buf_t *buf)
++{
++	if (zfs_flags & ZFS_DEBUG_MODIFY) {
++		if (buf->b_hdr->b_state != arc_anon)
++			panic("modifying non-anon buffer!");
++		if (buf->b_hdr->b_flags & ARC_IO_IN_PROGRESS)
++			panic("modifying buffer while i/o in progress!");
++		arc_cksum_verify(buf);
++	}
++
++	mutex_enter(&buf->b_hdr->b_freeze_lock);
++	if (buf->b_hdr->b_freeze_cksum != NULL) {
++		kmem_free(buf->b_hdr->b_freeze_cksum, sizeof (zio_cksum_t));
++		buf->b_hdr->b_freeze_cksum = NULL;
++	}
++
++	if (zfs_flags & ZFS_DEBUG_MODIFY) {
++		if (buf->b_hdr->b_thawed)
++			kmem_free(buf->b_hdr->b_thawed, 1);
++		buf->b_hdr->b_thawed = kmem_alloc(1, KM_SLEEP);
++	}
++
++	mutex_exit(&buf->b_hdr->b_freeze_lock);
++}
++
++void
++arc_buf_freeze(arc_buf_t *buf)
++{
++	kmutex_t *hash_lock;
++
++	if (!(zfs_flags & ZFS_DEBUG_MODIFY))
++		return;
++
++	hash_lock = HDR_LOCK(buf->b_hdr);
++	mutex_enter(hash_lock);
++
++	ASSERT(buf->b_hdr->b_freeze_cksum != NULL ||
++	    buf->b_hdr->b_state == arc_anon);
++	arc_cksum_compute(buf, B_FALSE);
++	mutex_exit(hash_lock);
++}
++
++static void
++add_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag)
++{
++	ASSERT(MUTEX_HELD(hash_lock));
++
++	if ((refcount_add(&ab->b_refcnt, tag) == 1) &&
++	    (ab->b_state != arc_anon)) {
++		uint64_t delta = ab->b_size * ab->b_datacnt;
++		list_t *list = &ab->b_state->arcs_list[ab->b_type];
++		uint64_t *size = &ab->b_state->arcs_lsize[ab->b_type];
++
++		ASSERT(!MUTEX_HELD(&ab->b_state->arcs_mtx));
++		mutex_enter(&ab->b_state->arcs_mtx);
++		ASSERT(list_link_active(&ab->b_arc_node));
++		list_remove(list, ab);
++		if (GHOST_STATE(ab->b_state)) {
++			ASSERT3U(ab->b_datacnt, ==, 0);
++			ASSERT3P(ab->b_buf, ==, NULL);
++			delta = ab->b_size;
++		}
++		ASSERT(delta > 0);
++		ASSERT3U(*size, >=, delta);
++		atomic_add_64(size, -delta);
++		mutex_exit(&ab->b_state->arcs_mtx);
++		/* remove the prefetch flag if we get a reference */
++		if (ab->b_flags & ARC_PREFETCH)
++			ab->b_flags &= ~ARC_PREFETCH;
++	}
++}
++
++static int
++remove_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag)
++{
++	int cnt;
++	arc_state_t *state = ab->b_state;
++
++	ASSERT(state == arc_anon || MUTEX_HELD(hash_lock));
++	ASSERT(!GHOST_STATE(state));
++
++	if (((cnt = refcount_remove(&ab->b_refcnt, tag)) == 0) &&
++	    (state != arc_anon)) {
++		uint64_t *size = &state->arcs_lsize[ab->b_type];
++
++		ASSERT(!MUTEX_HELD(&state->arcs_mtx));
++		mutex_enter(&state->arcs_mtx);
++		ASSERT(!list_link_active(&ab->b_arc_node));
++		list_insert_head(&state->arcs_list[ab->b_type], ab);
++		ASSERT(ab->b_datacnt > 0);
++		atomic_add_64(size, ab->b_size * ab->b_datacnt);
++		mutex_exit(&state->arcs_mtx);
++	}
++	return (cnt);
++}
++
++/*
++ * Move the supplied buffer to the indicated state.  The mutex
++ * for the buffer must be held by the caller.
++ */
++static void
++arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *ab, kmutex_t *hash_lock)
++{
++	arc_state_t *old_state = ab->b_state;
++	int64_t refcnt = refcount_count(&ab->b_refcnt);
++	uint64_t from_delta, to_delta;
++
++	ASSERT(MUTEX_HELD(hash_lock));
++	ASSERT(new_state != old_state);
++	ASSERT(refcnt == 0 || ab->b_datacnt > 0);
++	ASSERT(ab->b_datacnt == 0 || !GHOST_STATE(new_state));
++	ASSERT(ab->b_datacnt <= 1 || old_state != arc_anon);
++
++	from_delta = to_delta = ab->b_datacnt * ab->b_size;
++
++	/*
++	 * If this buffer is evictable, transfer it from the
++	 * old state list to the new state list.
++	 */
++	if (refcnt == 0) {
++		if (old_state != arc_anon) {
++			int use_mutex = !MUTEX_HELD(&old_state->arcs_mtx);
++			uint64_t *size = &old_state->arcs_lsize[ab->b_type];
++
++			if (use_mutex)
++				mutex_enter(&old_state->arcs_mtx);
++
++			ASSERT(list_link_active(&ab->b_arc_node));
++			list_remove(&old_state->arcs_list[ab->b_type], ab);
++
++			/*
++			 * If prefetching out of the ghost cache,
++			 * we will have a non-zero datacnt.
++			 */
++			if (GHOST_STATE(old_state) && ab->b_datacnt == 0) {
++				/* ghost elements have a ghost size */
++				ASSERT(ab->b_buf == NULL);
++				from_delta = ab->b_size;
++			}
++			ASSERT3U(*size, >=, from_delta);
++			atomic_add_64(size, -from_delta);
++
++			if (use_mutex)
++				mutex_exit(&old_state->arcs_mtx);
++		}
++		if (new_state != arc_anon) {
++			int use_mutex = !MUTEX_HELD(&new_state->arcs_mtx);
++			uint64_t *size = &new_state->arcs_lsize[ab->b_type];
++
++			if (use_mutex)
++				mutex_enter(&new_state->arcs_mtx);
++
++			list_insert_head(&new_state->arcs_list[ab->b_type], ab);
++
++			/* ghost elements have a ghost size */
++			if (GHOST_STATE(new_state)) {
++				ASSERT(ab->b_datacnt == 0);
++				ASSERT(ab->b_buf == NULL);
++				to_delta = ab->b_size;
++			}
++			atomic_add_64(size, to_delta);
++
++			if (use_mutex)
++				mutex_exit(&new_state->arcs_mtx);
++		}
++	}
++
++	ASSERT(!BUF_EMPTY(ab));
++	if (new_state == arc_anon && HDR_IN_HASH_TABLE(ab))
++		buf_hash_remove(ab);
++
++	/* adjust state sizes */
++	if (to_delta)
++		atomic_add_64(&new_state->arcs_size, to_delta);
++	if (from_delta) {
++		ASSERT3U(old_state->arcs_size, >=, from_delta);
++		atomic_add_64(&old_state->arcs_size, -from_delta);
++	}
++	ab->b_state = new_state;
++
++	/* adjust l2arc hdr stats */
++	if (new_state == arc_l2c_only)
++		l2arc_hdr_stat_add();
++	else if (old_state == arc_l2c_only)
++		l2arc_hdr_stat_remove();
++}
++
++void
++arc_space_consume(uint64_t space, arc_space_type_t type)
++{
++	ASSERT(type >= 0 && type < ARC_SPACE_NUMTYPES);
++
++	switch (type) {
++	default:
++		break;
++	case ARC_SPACE_DATA:
++		ARCSTAT_INCR(arcstat_data_size, space);
++		break;
++	case ARC_SPACE_OTHER:
++		ARCSTAT_INCR(arcstat_other_size, space);
++		break;
++	case ARC_SPACE_HDRS:
++		ARCSTAT_INCR(arcstat_hdr_size, space);
++		break;
++	case ARC_SPACE_L2HDRS:
++		ARCSTAT_INCR(arcstat_l2_hdr_size, space);
++		break;
++	}
++
++	atomic_add_64(&arc_meta_used, space);
++	atomic_add_64(&arc_size, space);
++}
++
++void
++arc_space_return(uint64_t space, arc_space_type_t type)
++{
++	ASSERT(type >= 0 && type < ARC_SPACE_NUMTYPES);
++
++	switch (type) {
++	default:
++		break;
++	case ARC_SPACE_DATA:
++		ARCSTAT_INCR(arcstat_data_size, -space);
++		break;
++	case ARC_SPACE_OTHER:
++		ARCSTAT_INCR(arcstat_other_size, -space);
++		break;
++	case ARC_SPACE_HDRS:
++		ARCSTAT_INCR(arcstat_hdr_size, -space);
++		break;
++	case ARC_SPACE_L2HDRS:
++		ARCSTAT_INCR(arcstat_l2_hdr_size, -space);
++		break;
++	}
++
++	ASSERT(arc_meta_used >= space);
++	if (arc_meta_max < arc_meta_used)
++		arc_meta_max = arc_meta_used;
++	atomic_add_64(&arc_meta_used, -space);
++	ASSERT(arc_size >= space);
++	atomic_add_64(&arc_size, -space);
++}
++
++void *
++arc_data_buf_alloc(uint64_t size)
++{
++	if (arc_evict_needed(ARC_BUFC_DATA))
++		cv_signal(&arc_reclaim_thr_cv);
++	atomic_add_64(&arc_size, size);
++	return (zio_data_buf_alloc(size));
++}
++
++void
++arc_data_buf_free(void *buf, uint64_t size)
++{
++	zio_data_buf_free(buf, size);
++	ASSERT(arc_size >= size);
++	atomic_add_64(&arc_size, -size);
++}
++
++arc_buf_t *
++arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
++{
++	arc_buf_hdr_t *hdr;
++	arc_buf_t *buf;
++
++	ASSERT3U(size, >, 0);
++	hdr = kmem_cache_alloc(hdr_cache, KM_PUSHPAGE);
++	ASSERT(BUF_EMPTY(hdr));
++	hdr->b_size = size;
++	hdr->b_type = type;
++	hdr->b_spa = spa_load_guid(spa);
++	hdr->b_state = arc_anon;
++	hdr->b_arc_access = 0;
++	buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
++	buf->b_hdr = hdr;
++	buf->b_data = NULL;
++	buf->b_efunc = NULL;
++	buf->b_private = NULL;
++	buf->b_next = NULL;
++	hdr->b_buf = buf;
++	arc_get_data_buf(buf);
++	hdr->b_datacnt = 1;
++	hdr->b_flags = 0;
++	ASSERT(refcount_is_zero(&hdr->b_refcnt));
++	(void) refcount_add(&hdr->b_refcnt, tag);
++
++	return (buf);
++}
++
++static char *arc_onloan_tag = "onloan";
++
++/*
++ * Loan out an anonymous arc buffer. Loaned buffers are not counted as in
++ * flight data by arc_tempreserve_space() until they are "returned". Loaned
++ * buffers must be returned to the arc before they can be used by the DMU or
++ * freed.
++ */
++arc_buf_t *
++arc_loan_buf(spa_t *spa, int size)
++{
++	arc_buf_t *buf;
++
++	buf = arc_buf_alloc(spa, size, arc_onloan_tag, ARC_BUFC_DATA);
++
++	atomic_add_64(&arc_loaned_bytes, size);
++	return (buf);
++}
++
++/*
++ * Return a loaned arc buffer to the arc.
++ */
++void
++arc_return_buf(arc_buf_t *buf, void *tag)
++{
++	arc_buf_hdr_t *hdr = buf->b_hdr;
++
++	ASSERT(buf->b_data != NULL);
++	(void) refcount_add(&hdr->b_refcnt, tag);
++	(void) refcount_remove(&hdr->b_refcnt, arc_onloan_tag);
++
++	atomic_add_64(&arc_loaned_bytes, -hdr->b_size);
++}
++
++/* Detach an arc_buf from a dbuf (tag) */
++void
++arc_loan_inuse_buf(arc_buf_t *buf, void *tag)
++{
++	arc_buf_hdr_t *hdr;
++
++	ASSERT(buf->b_data != NULL);
++	hdr = buf->b_hdr;
++	(void) refcount_add(&hdr->b_refcnt, arc_onloan_tag);
++	(void) refcount_remove(&hdr->b_refcnt, tag);
++	buf->b_efunc = NULL;
++	buf->b_private = NULL;
++
++	atomic_add_64(&arc_loaned_bytes, hdr->b_size);
++}
++
++static arc_buf_t *
++arc_buf_clone(arc_buf_t *from)
++{
++	arc_buf_t *buf;
++	arc_buf_hdr_t *hdr = from->b_hdr;
++	uint64_t size = hdr->b_size;
++
++	ASSERT(hdr->b_state != arc_anon);
++
++	buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
++	buf->b_hdr = hdr;
++	buf->b_data = NULL;
++	buf->b_efunc = NULL;
++	buf->b_private = NULL;
++	buf->b_next = hdr->b_buf;
++	hdr->b_buf = buf;
++	arc_get_data_buf(buf);
++	bcopy(from->b_data, buf->b_data, size);
++	hdr->b_datacnt += 1;
++	return (buf);
++}
++
++void
++arc_buf_add_ref(arc_buf_t *buf, void* tag)
++{
++	arc_buf_hdr_t *hdr;
++	kmutex_t *hash_lock;
++
++	/*
++	 * Check to see if this buffer is evicted.  Callers
++	 * must verify b_data != NULL to know if the add_ref
++	 * was successful.
++	 */
++	mutex_enter(&buf->b_evict_lock);
++	if (buf->b_data == NULL) {
++		mutex_exit(&buf->b_evict_lock);
++		return;
++	}
++	hash_lock = HDR_LOCK(buf->b_hdr);
++	mutex_enter(hash_lock);
++	hdr = buf->b_hdr;
++	ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
++	mutex_exit(&buf->b_evict_lock);
++
++	ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
++	add_reference(hdr, hash_lock, tag);
++	DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
++	arc_access(hdr, hash_lock);
++	mutex_exit(hash_lock);
++	ARCSTAT_BUMP(arcstat_hits);
++	ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH),
++	    demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
++	    data, metadata, hits);
++}
++
++/*
++ * Free the arc data buffer.  If it is an l2arc write in progress,
++ * the buffer is placed on l2arc_free_on_write to be freed later.
++ */
++static void
++arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(void *, size_t),
++    void *data, size_t size)
++{
++	if (HDR_L2_WRITING(hdr)) {
++		l2arc_data_free_t *df;
++		df = kmem_alloc(sizeof (l2arc_data_free_t), KM_PUSHPAGE);
++		df->l2df_data = data;
++		df->l2df_size = size;
++		df->l2df_func = free_func;
++		mutex_enter(&l2arc_free_on_write_mtx);
++		list_insert_head(l2arc_free_on_write, df);
++		mutex_exit(&l2arc_free_on_write_mtx);
++		ARCSTAT_BUMP(arcstat_l2_free_on_write);
++	} else {
++		free_func(data, size);
++	}
++}
++
++static void
++arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
++{
++	arc_buf_t **bufp;
++
++	/* free up data associated with the buf */
++	if (buf->b_data) {
++		arc_state_t *state = buf->b_hdr->b_state;
++		uint64_t size = buf->b_hdr->b_size;
++		arc_buf_contents_t type = buf->b_hdr->b_type;
++
++		arc_cksum_verify(buf);
++
++		if (!recycle) {
++			if (type == ARC_BUFC_METADATA) {
++				arc_buf_data_free(buf->b_hdr, zio_buf_free,
++				    buf->b_data, size);
++				arc_space_return(size, ARC_SPACE_DATA);
++			} else {
++				ASSERT(type == ARC_BUFC_DATA);
++				arc_buf_data_free(buf->b_hdr,
++				    zio_data_buf_free, buf->b_data, size);
++				ARCSTAT_INCR(arcstat_data_size, -size);
++				atomic_add_64(&arc_size, -size);
++			}
++		}
++		if (list_link_active(&buf->b_hdr->b_arc_node)) {
++			uint64_t *cnt = &state->arcs_lsize[type];
++
++			ASSERT(refcount_is_zero(&buf->b_hdr->b_refcnt));
++			ASSERT(state != arc_anon);
++
++			ASSERT3U(*cnt, >=, size);
++			atomic_add_64(cnt, -size);
++		}
++		ASSERT3U(state->arcs_size, >=, size);
++		atomic_add_64(&state->arcs_size, -size);
++		buf->b_data = NULL;
++		ASSERT(buf->b_hdr->b_datacnt > 0);
++		buf->b_hdr->b_datacnt -= 1;
++	}
++
++	/* only remove the buf if requested */
++	if (!all)
++		return;
++
++	/* remove the buf from the hdr list */
++	for (bufp = &buf->b_hdr->b_buf; *bufp != buf; bufp = &(*bufp)->b_next)
++		continue;
++	*bufp = buf->b_next;
++	buf->b_next = NULL;
++
++	ASSERT(buf->b_efunc == NULL);
++
++	/* clean up the buf */
++	buf->b_hdr = NULL;
++	kmem_cache_free(buf_cache, buf);
++}
++
++static void
++arc_hdr_destroy(arc_buf_hdr_t *hdr)
++{
++	l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
++
++	ASSERT(refcount_is_zero(&hdr->b_refcnt));
++	ASSERT3P(hdr->b_state, ==, arc_anon);
++	ASSERT(!HDR_IO_IN_PROGRESS(hdr));
++
++	if (l2hdr != NULL) {
++		boolean_t buflist_held = MUTEX_HELD(&l2arc_buflist_mtx);
++		/*
++		 * To prevent arc_free() and l2arc_evict() from
++		 * attempting to free the same buffer at the same time,
++		 * a FREE_IN_PROGRESS flag is given to arc_free() to
++		 * give it priority.  l2arc_evict() can't destroy this
++		 * header while we are waiting on l2arc_buflist_mtx.
++		 *
++		 * The hdr may be removed from l2ad_buflist before we
++		 * grab l2arc_buflist_mtx, so b_l2hdr is rechecked.
++		 */
++		if (!buflist_held) {
++			mutex_enter(&l2arc_buflist_mtx);
++			l2hdr = hdr->b_l2hdr;
++		}
++
++		if (l2hdr != NULL) {
++			list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
++			ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
++			kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
++			if (hdr->b_state == arc_l2c_only)
++				l2arc_hdr_stat_remove();
++			hdr->b_l2hdr = NULL;
++		}
++
++		if (!buflist_held)
++			mutex_exit(&l2arc_buflist_mtx);
++	}
++
++	if (!BUF_EMPTY(hdr)) {
++		ASSERT(!HDR_IN_HASH_TABLE(hdr));
++		buf_discard_identity(hdr);
++	}
++	while (hdr->b_buf) {
++		arc_buf_t *buf = hdr->b_buf;
++
++		if (buf->b_efunc) {
++			mutex_enter(&arc_eviction_mtx);
++			mutex_enter(&buf->b_evict_lock);
++			ASSERT(buf->b_hdr != NULL);
++			arc_buf_destroy(hdr->b_buf, FALSE, FALSE);
++			hdr->b_buf = buf->b_next;
++			buf->b_hdr = &arc_eviction_hdr;
++			buf->b_next = arc_eviction_list;
++			arc_eviction_list = buf;
++			mutex_exit(&buf->b_evict_lock);
++			mutex_exit(&arc_eviction_mtx);
++		} else {
++			arc_buf_destroy(hdr->b_buf, FALSE, TRUE);
++		}
++	}
++	if (hdr->b_freeze_cksum != NULL) {
++		kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t));
++		hdr->b_freeze_cksum = NULL;
++	}
++	if (hdr->b_thawed) {
++		kmem_free(hdr->b_thawed, 1);
++		hdr->b_thawed = NULL;
++	}
++
++	ASSERT(!list_link_active(&hdr->b_arc_node));
++	ASSERT3P(hdr->b_hash_next, ==, NULL);
++	ASSERT3P(hdr->b_acb, ==, NULL);
++	kmem_cache_free(hdr_cache, hdr);
++}
++
++void
++arc_buf_free(arc_buf_t *buf, void *tag)
++{
++	arc_buf_hdr_t *hdr = buf->b_hdr;
++	int hashed = hdr->b_state != arc_anon;
++
++	ASSERT(buf->b_efunc == NULL);
++	ASSERT(buf->b_data != NULL);
++
++	if (hashed) {
++		kmutex_t *hash_lock = HDR_LOCK(hdr);
++
++		mutex_enter(hash_lock);
++		hdr = buf->b_hdr;
++		ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
++
++		(void) remove_reference(hdr, hash_lock, tag);
++		if (hdr->b_datacnt > 1) {
++			arc_buf_destroy(buf, FALSE, TRUE);
++		} else {
++			ASSERT(buf == hdr->b_buf);
++			ASSERT(buf->b_efunc == NULL);
++			hdr->b_flags |= ARC_BUF_AVAILABLE;
++		}
++		mutex_exit(hash_lock);
++	} else if (HDR_IO_IN_PROGRESS(hdr)) {
++		int destroy_hdr;
++		/*
++		 * We are in the middle of an async write.  Don't destroy
++		 * this buffer unless the write completes before we finish
++		 * decrementing the reference count.
++		 */
++		mutex_enter(&arc_eviction_mtx);
++		(void) remove_reference(hdr, NULL, tag);
++		ASSERT(refcount_is_zero(&hdr->b_refcnt));
++		destroy_hdr = !HDR_IO_IN_PROGRESS(hdr);
++		mutex_exit(&arc_eviction_mtx);
++		if (destroy_hdr)
++			arc_hdr_destroy(hdr);
++	} else {
++		if (remove_reference(hdr, NULL, tag) > 0)
++			arc_buf_destroy(buf, FALSE, TRUE);
++		else
++			arc_hdr_destroy(hdr);
++	}
++}
++
++int
++arc_buf_remove_ref(arc_buf_t *buf, void* tag)
++{
++	arc_buf_hdr_t *hdr = buf->b_hdr;
++	kmutex_t *hash_lock = HDR_LOCK(hdr);
++	int no_callback = (buf->b_efunc == NULL);
++
++	if (hdr->b_state == arc_anon) {
++		ASSERT(hdr->b_datacnt == 1);
++		arc_buf_free(buf, tag);
++		return (no_callback);
++	}
++
++	mutex_enter(hash_lock);
++	hdr = buf->b_hdr;
++	ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
++	ASSERT(hdr->b_state != arc_anon);
++	ASSERT(buf->b_data != NULL);
++
++	(void) remove_reference(hdr, hash_lock, tag);
++	if (hdr->b_datacnt > 1) {
++		if (no_callback)
++			arc_buf_destroy(buf, FALSE, TRUE);
++	} else if (no_callback) {
++		ASSERT(hdr->b_buf == buf && buf->b_next == NULL);
++		ASSERT(buf->b_efunc == NULL);
++		hdr->b_flags |= ARC_BUF_AVAILABLE;
++	}
++	ASSERT(no_callback || hdr->b_datacnt > 1 ||
++	    refcount_is_zero(&hdr->b_refcnt));
++	mutex_exit(hash_lock);
++	return (no_callback);
++}
++
++int
++arc_buf_size(arc_buf_t *buf)
++{
++	return (buf->b_hdr->b_size);
++}
++
++/*
++ * Evict buffers from list until we've removed the specified number of
++ * bytes.  Move the removed buffers to the appropriate evict state.
++ * If the recycle flag is set, then attempt to "recycle" a buffer:
++ * - look for a buffer to evict that is `bytes' long.
++ * - return the data block from this buffer rather than freeing it.
++ * This flag is used by callers that are trying to make space for a
++ * new buffer in a full arc cache.
++ *
++ * This function makes a "best effort".  It skips over any buffers
++ * it can't get a hash_lock on, and so may not catch all candidates.
++ * It may also return without evicting as much space as requested.
++ */
++static void *
++arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
++    arc_buf_contents_t type)
++{
++	arc_state_t *evicted_state;
++	uint64_t bytes_evicted = 0, skipped = 0, missed = 0;
++	arc_buf_hdr_t *ab, *ab_prev = NULL;
++	list_t *list = &state->arcs_list[type];
++	kmutex_t *hash_lock;
++	boolean_t have_lock;
++	void *stolen = NULL;
++
++	ASSERT(state == arc_mru || state == arc_mfu);
++
++	evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
++
++	mutex_enter(&state->arcs_mtx);
++	mutex_enter(&evicted_state->arcs_mtx);
++
++	for (ab = list_tail(list); ab; ab = ab_prev) {
++		ab_prev = list_prev(list, ab);
++		/* prefetch buffers have a minimum lifespan */
++		if (HDR_IO_IN_PROGRESS(ab) ||
++		    (spa && ab->b_spa != spa) ||
++		    (ab->b_flags & (ARC_PREFETCH|ARC_INDIRECT) &&
++		    ddi_get_lbolt() - ab->b_arc_access <
++		    arc_min_prefetch_lifespan)) {
++			skipped++;
++			continue;
++		}
++		/* "lookahead" for better eviction candidate */
++		if (recycle && ab->b_size != bytes &&
++		    ab_prev && ab_prev->b_size == bytes)
++			continue;
++		hash_lock = HDR_LOCK(ab);
++		have_lock = MUTEX_HELD(hash_lock);
++		if (have_lock || mutex_tryenter(hash_lock)) {
++			ASSERT3U(refcount_count(&ab->b_refcnt), ==, 0);
++			ASSERT(ab->b_datacnt > 0);
++			while (ab->b_buf) {
++				arc_buf_t *buf = ab->b_buf;
++				if (!mutex_tryenter(&buf->b_evict_lock)) {
++					missed += 1;
++					break;
++				}
++				if (buf->b_data) {
++					bytes_evicted += ab->b_size;
++					if (recycle && ab->b_type == type &&
++					    ab->b_size == bytes &&
++					    !HDR_L2_WRITING(ab)) {
++						stolen = buf->b_data;
++						recycle = FALSE;
++					}
++				}
++				if (buf->b_efunc) {
++					mutex_enter(&arc_eviction_mtx);
++					arc_buf_destroy(buf,
++					    buf->b_data == stolen, FALSE);
++					ab->b_buf = buf->b_next;
++					buf->b_hdr = &arc_eviction_hdr;
++					buf->b_next = arc_eviction_list;
++					arc_eviction_list = buf;
++					mutex_exit(&arc_eviction_mtx);
++					mutex_exit(&buf->b_evict_lock);
++				} else {
++					mutex_exit(&buf->b_evict_lock);
++					arc_buf_destroy(buf,
++					    buf->b_data == stolen, TRUE);
++				}
++			}
++
++			if (ab->b_l2hdr) {
++				ARCSTAT_INCR(arcstat_evict_l2_cached,
++				    ab->b_size);
++			} else {
++				if (l2arc_write_eligible(ab->b_spa, ab)) {
++					ARCSTAT_INCR(arcstat_evict_l2_eligible,
++					    ab->b_size);
++				} else {
++					ARCSTAT_INCR(
++					    arcstat_evict_l2_ineligible,
++					    ab->b_size);
++				}
++			}
++
++			if (ab->b_datacnt == 0) {
++				arc_change_state(evicted_state, ab, hash_lock);
++				ASSERT(HDR_IN_HASH_TABLE(ab));
++				ab->b_flags |= ARC_IN_HASH_TABLE;
++				ab->b_flags &= ~ARC_BUF_AVAILABLE;
++				DTRACE_PROBE1(arc__evict, arc_buf_hdr_t *, ab);
++			}
++			if (!have_lock)
++				mutex_exit(hash_lock);
++			if (bytes >= 0 && bytes_evicted >= bytes)
++				break;
++		} else {
++			missed += 1;
++		}
++	}
++
++	mutex_exit(&evicted_state->arcs_mtx);
++	mutex_exit(&state->arcs_mtx);
++
++	if (bytes_evicted < bytes)
++		dprintf("only evicted %lld bytes from %x\n",
++		    (longlong_t)bytes_evicted, state);
++
++	if (skipped)
++		ARCSTAT_INCR(arcstat_evict_skip, skipped);
++
++	if (missed)
++		ARCSTAT_INCR(arcstat_mutex_miss, missed);
++
++	/*
++	 * We have just evicted some date into the ghost state, make
++	 * sure we also adjust the ghost state size if necessary.
++	 */
++	if (arc_no_grow &&
++	    arc_mru_ghost->arcs_size + arc_mfu_ghost->arcs_size > arc_c) {
++		int64_t mru_over = arc_anon->arcs_size + arc_mru->arcs_size +
++		    arc_mru_ghost->arcs_size - arc_c;
++
++		if (mru_over > 0 && arc_mru_ghost->arcs_lsize[type] > 0) {
++			int64_t todelete =
++			    MIN(arc_mru_ghost->arcs_lsize[type], mru_over);
++			arc_evict_ghost(arc_mru_ghost, 0, todelete);
++		} else if (arc_mfu_ghost->arcs_lsize[type] > 0) {
++			int64_t todelete = MIN(arc_mfu_ghost->arcs_lsize[type],
++			    arc_mru_ghost->arcs_size +
++			    arc_mfu_ghost->arcs_size - arc_c);
++			arc_evict_ghost(arc_mfu_ghost, 0, todelete);
++		}
++	}
++
++	return (stolen);
++}
++
++/*
++ * Remove buffers from list until we've removed the specified number of
++ * bytes.  Destroy the buffers that are removed.
++ */
++static void
++arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes)
++{
++	arc_buf_hdr_t *ab, *ab_prev;
++	arc_buf_hdr_t marker;
++	list_t *list = &state->arcs_list[ARC_BUFC_DATA];
++	kmutex_t *hash_lock;
++	uint64_t bytes_deleted = 0;
++	uint64_t bufs_skipped = 0;
++
++	ASSERT(GHOST_STATE(state));
++	bzero(&marker, sizeof(marker));
++top:
++	mutex_enter(&state->arcs_mtx);
++	for (ab = list_tail(list); ab; ab = ab_prev) {
++		ab_prev = list_prev(list, ab);
++		if (spa && ab->b_spa != spa)
++			continue;
++
++		/* ignore markers */
++		if (ab->b_spa == 0)
++			continue;
++
++		hash_lock = HDR_LOCK(ab);
++		/* caller may be trying to modify this buffer, skip it */
++		if (MUTEX_HELD(hash_lock))
++			continue;
++		if (mutex_tryenter(hash_lock)) {
++			ASSERT(!HDR_IO_IN_PROGRESS(ab));
++			ASSERT(ab->b_buf == NULL);
++			ARCSTAT_BUMP(arcstat_deleted);
++			bytes_deleted += ab->b_size;
++
++			if (ab->b_l2hdr != NULL) {
++				/*
++				 * This buffer is cached on the 2nd Level ARC;
++				 * don't destroy the header.
++				 */
++				arc_change_state(arc_l2c_only, ab, hash_lock);
++				mutex_exit(hash_lock);
++			} else {
++				arc_change_state(arc_anon, ab, hash_lock);
++				mutex_exit(hash_lock);
++				arc_hdr_destroy(ab);
++			}
++
++			DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, ab);
++			if (bytes >= 0 && bytes_deleted >= bytes)
++				break;
++		} else if (bytes < 0) {
++			/*
++			 * Insert a list marker and then wait for the
++			 * hash lock to become available. Once its
++			 * available, restart from where we left off.
++			 */
++			list_insert_after(list, ab, &marker);
++			mutex_exit(&state->arcs_mtx);
++			mutex_enter(hash_lock);
++			mutex_exit(hash_lock);
++			mutex_enter(&state->arcs_mtx);
++			ab_prev = list_prev(list, &marker);
++			list_remove(list, &marker);
++		} else
++			bufs_skipped += 1;
++	}
++	mutex_exit(&state->arcs_mtx);
++
++	if (list == &state->arcs_list[ARC_BUFC_DATA] &&
++	    (bytes < 0 || bytes_deleted < bytes)) {
++		list = &state->arcs_list[ARC_BUFC_METADATA];
++		goto top;
++	}
++
++	if (bufs_skipped) {
++		ARCSTAT_INCR(arcstat_mutex_miss, bufs_skipped);
++		ASSERT(bytes >= 0);
++	}
++
++	if (bytes_deleted < bytes)
++		dprintf("only deleted %lld bytes from %p\n",
++		    (longlong_t)bytes_deleted, state);
++}
++
++static void
++arc_adjust(void)
++{
++	int64_t adjustment, delta;
++
++	/*
++	 * Adjust MRU size
++	 */
++
++	adjustment = MIN((int64_t)(arc_size - arc_c),
++	    (int64_t)(arc_anon->arcs_size + arc_mru->arcs_size + arc_meta_used -
++	    arc_p));
++
++	if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_DATA] > 0) {
++		delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_DATA], adjustment);
++		(void) arc_evict(arc_mru, 0, delta, FALSE, ARC_BUFC_DATA);
++		adjustment -= delta;
++	}
++
++	if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_METADATA] > 0) {
++		delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_METADATA], adjustment);
++		(void) arc_evict(arc_mru, 0, delta, FALSE,
++		    ARC_BUFC_METADATA);
++	}
++
++	/*
++	 * Adjust MFU size
++	 */
++
++	adjustment = arc_size - arc_c;
++
++	if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_DATA] > 0) {
++		delta = MIN(adjustment, arc_mfu->arcs_lsize[ARC_BUFC_DATA]);
++		(void) arc_evict(arc_mfu, 0, delta, FALSE, ARC_BUFC_DATA);
++		adjustment -= delta;
++	}
++
++	if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_METADATA] > 0) {
++		int64_t delta = MIN(adjustment,
++		    arc_mfu->arcs_lsize[ARC_BUFC_METADATA]);
++		(void) arc_evict(arc_mfu, 0, delta, FALSE,
++		    ARC_BUFC_METADATA);
++	}
++
++	/*
++	 * Adjust ghost lists
++	 */
++
++	adjustment = arc_mru->arcs_size + arc_mru_ghost->arcs_size - arc_c;
++
++	if (adjustment > 0 && arc_mru_ghost->arcs_size > 0) {
++		delta = MIN(arc_mru_ghost->arcs_size, adjustment);
++		arc_evict_ghost(arc_mru_ghost, 0, delta);
++	}
++
++	adjustment =
++	    arc_mru_ghost->arcs_size + arc_mfu_ghost->arcs_size - arc_c;
++
++	if (adjustment > 0 && arc_mfu_ghost->arcs_size > 0) {
++		delta = MIN(arc_mfu_ghost->arcs_size, adjustment);
++		arc_evict_ghost(arc_mfu_ghost, 0, delta);
++	}
++}
++
++/*
++ * Request that arc user drop references so that N bytes can be released
++ * from the cache.  This provides a mechanism to ensure the arc can honor
++ * the arc_meta_limit and reclaim buffers which are pinned in the cache
++ * by higher layers.  (i.e. the zpl)
++ */
++static void
++arc_do_user_prune(int64_t adjustment)
++{
++	arc_prune_func_t *func;
++	void *private;
++	arc_prune_t *cp, *np;
++
++	mutex_enter(&arc_prune_mtx);
++
++	cp = list_head(&arc_prune_list);
++	while (cp != NULL) {
++		func = cp->p_pfunc;
++		private = cp->p_private;
++		np = list_next(&arc_prune_list, cp);
++		refcount_add(&cp->p_refcnt, func);
++		mutex_exit(&arc_prune_mtx);
++
++		if (func != NULL)
++			func(adjustment, private);
++
++		mutex_enter(&arc_prune_mtx);
++
++		/* User removed prune callback concurrently with execution */
++		if (refcount_remove(&cp->p_refcnt, func) == 0) {
++			ASSERT(!list_link_active(&cp->p_node));
++			refcount_destroy(&cp->p_refcnt);
++			kmem_free(cp, sizeof (*cp));
++		}
++
++		cp = np;
++	}
++
++	ARCSTAT_BUMP(arcstat_prune);
++	mutex_exit(&arc_prune_mtx);
++}
++
++static void
++arc_do_user_evicts(void)
++{
++	mutex_enter(&arc_eviction_mtx);
++	while (arc_eviction_list != NULL) {
++		arc_buf_t *buf = arc_eviction_list;
++		arc_eviction_list = buf->b_next;
++		mutex_enter(&buf->b_evict_lock);
++		buf->b_hdr = NULL;
++		mutex_exit(&buf->b_evict_lock);
++		mutex_exit(&arc_eviction_mtx);
++
++		if (buf->b_efunc != NULL)
++			VERIFY(buf->b_efunc(buf) == 0);
++
++		buf->b_efunc = NULL;
++		buf->b_private = NULL;
++		kmem_cache_free(buf_cache, buf);
++		mutex_enter(&arc_eviction_mtx);
++	}
++	mutex_exit(&arc_eviction_mtx);
++}
++
++/*
++ * Evict only meta data objects from the cache leaving the data objects.
++ * This is only used to enforce the tunable arc_meta_limit, if we are
++ * unable to evict enough buffers notify the user via the prune callback.
++ */
++void
++arc_adjust_meta(int64_t adjustment, boolean_t may_prune)
++{
++	int64_t delta;
++
++	if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_METADATA] > 0) {
++		delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_METADATA], adjustment);
++		arc_evict(arc_mru, 0, delta, FALSE, ARC_BUFC_METADATA);
++		adjustment -= delta;
++	}
++
++	if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_METADATA] > 0) {
++		delta = MIN(arc_mfu->arcs_lsize[ARC_BUFC_METADATA], adjustment);
++		arc_evict(arc_mfu, 0, delta, FALSE, ARC_BUFC_METADATA);
++		adjustment -= delta;
++	}
++
++	if (may_prune && (adjustment > 0) && (arc_meta_used > arc_meta_limit))
++		arc_do_user_prune(arc_meta_prune);
++}
++
++/*
++ * Flush all *evictable* data from the cache for the given spa.
++ * NOTE: this will not touch "active" (i.e. referenced) data.
++ */
++void
++arc_flush(spa_t *spa)
++{
++	uint64_t guid = 0;
++
++	if (spa)
++		guid = spa_load_guid(spa);
++
++	while (list_head(&arc_mru->arcs_list[ARC_BUFC_DATA])) {
++		(void) arc_evict(arc_mru, guid, -1, FALSE, ARC_BUFC_DATA);
++		if (spa)
++			break;
++	}
++	while (list_head(&arc_mru->arcs_list[ARC_BUFC_METADATA])) {
++		(void) arc_evict(arc_mru, guid, -1, FALSE, ARC_BUFC_METADATA);
++		if (spa)
++			break;
++	}
++	while (list_head(&arc_mfu->arcs_list[ARC_BUFC_DATA])) {
++		(void) arc_evict(arc_mfu, guid, -1, FALSE, ARC_BUFC_DATA);
++		if (spa)
++			break;
++	}
++	while (list_head(&arc_mfu->arcs_list[ARC_BUFC_METADATA])) {
++		(void) arc_evict(arc_mfu, guid, -1, FALSE, ARC_BUFC_METADATA);
++		if (spa)
++			break;
++	}
++
++	arc_evict_ghost(arc_mru_ghost, guid, -1);
++	arc_evict_ghost(arc_mfu_ghost, guid, -1);
++
++	mutex_enter(&arc_reclaim_thr_lock);
++	arc_do_user_evicts();
++	mutex_exit(&arc_reclaim_thr_lock);
++	ASSERT(spa || arc_eviction_list == NULL);
++}
++
++void
++arc_shrink(uint64_t bytes)
++{
++	if (arc_c > arc_c_min) {
++		uint64_t to_free;
++
++		to_free = bytes ? bytes : arc_c >> arc_shrink_shift;
++
++		if (arc_c > arc_c_min + to_free)
++			atomic_add_64(&arc_c, -to_free);
++		else
++			arc_c = arc_c_min;
++
++		atomic_add_64(&arc_p, -(arc_p >> arc_shrink_shift));
++		if (arc_c > arc_size)
++			arc_c = MAX(arc_size, arc_c_min);
++		if (arc_p > arc_c)
++			arc_p = (arc_c >> 1);
++		ASSERT(arc_c >= arc_c_min);
++		ASSERT((int64_t)arc_p >= 0);
++	}
++
++	if (arc_size > arc_c)
++		arc_adjust();
++}
++
++static void
++arc_kmem_reap_now(arc_reclaim_strategy_t strat, uint64_t bytes)
++{
++	size_t			i;
++	kmem_cache_t		*prev_cache = NULL;
++	kmem_cache_t		*prev_data_cache = NULL;
++	extern kmem_cache_t	*zio_buf_cache[];
++	extern kmem_cache_t	*zio_data_buf_cache[];
++
++	/*
++	 * An aggressive reclamation will shrink the cache size as well as
++	 * reap free buffers from the arc kmem caches.
++	 */
++	if (strat == ARC_RECLAIM_AGGR)
++		arc_shrink(bytes);
++
++	for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
++		if (zio_buf_cache[i] != prev_cache) {
++			prev_cache = zio_buf_cache[i];
++			kmem_cache_reap_now(zio_buf_cache[i]);
++		}
++		if (zio_data_buf_cache[i] != prev_data_cache) {
++			prev_data_cache = zio_data_buf_cache[i];
++			kmem_cache_reap_now(zio_data_buf_cache[i]);
++		}
++	}
++
++	kmem_cache_reap_now(buf_cache);
++	kmem_cache_reap_now(hdr_cache);
++}
++
++/*
++ * Unlike other ZFS implementations this thread is only responsible for
++ * adapting the target ARC size on Linux.  The responsibility for memory
++ * reclamation has been entirely delegated to the arc_shrinker_func()
++ * which is registered with the VM.  To reflect this change in behavior
++ * the arc_reclaim thread has been renamed to arc_adapt.
++ */
++static void
++arc_adapt_thread(void)
++{
++	callb_cpr_t		cpr;
++	int64_t			prune;
++
++	CALLB_CPR_INIT(&cpr, &arc_reclaim_thr_lock, callb_generic_cpr, FTAG);
++
++	mutex_enter(&arc_reclaim_thr_lock);
++	while (arc_thread_exit == 0) {
++#ifndef _KERNEL
++		arc_reclaim_strategy_t	last_reclaim = ARC_RECLAIM_CONS;
++
++		if (spa_get_random(100) == 0) {
++
++			if (arc_no_grow) {
++				if (last_reclaim == ARC_RECLAIM_CONS) {
++					last_reclaim = ARC_RECLAIM_AGGR;
++				} else {
++					last_reclaim = ARC_RECLAIM_CONS;
++				}
++			} else {
++				arc_no_grow = TRUE;
++				last_reclaim = ARC_RECLAIM_AGGR;
++				membar_producer();
++			}
++
++			/* reset the growth delay for every reclaim */
++			arc_grow_time = ddi_get_lbolt()+(arc_grow_retry * hz);
++
++			arc_kmem_reap_now(last_reclaim, 0);
++			arc_warm = B_TRUE;
++		}
++#endif /* !_KERNEL */
++
++		/* No recent memory pressure allow the ARC to grow. */
++		if (arc_no_grow && ddi_get_lbolt() >= arc_grow_time)
++			arc_no_grow = FALSE;
++
++		/*
++		 * Keep meta data usage within limits, arc_shrink() is not
++		 * used to avoid collapsing the arc_c value when only the
++		 * arc_meta_limit is being exceeded.
++		 */
++		prune = (int64_t)arc_meta_used - (int64_t)arc_meta_limit;
++		if (prune > 0)
++			arc_adjust_meta(prune, B_TRUE);
++
++		arc_adjust();
++
++		if (arc_eviction_list != NULL)
++			arc_do_user_evicts();
++
++		/* block until needed, or one second, whichever is shorter */
++		CALLB_CPR_SAFE_BEGIN(&cpr);
++		(void) cv_timedwait_interruptible(&arc_reclaim_thr_cv,
++		    &arc_reclaim_thr_lock, (ddi_get_lbolt() + hz));
++		CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_thr_lock);
++	}
++
++	arc_thread_exit = 0;
++	cv_broadcast(&arc_reclaim_thr_cv);
++	CALLB_CPR_EXIT(&cpr);		/* drops arc_reclaim_thr_lock */
++	thread_exit();
++}
++
++#ifdef _KERNEL
++/*
++ * Determine the amount of memory eligible for eviction contained in the
++ * ARC. All clean data reported by the ghost lists can always be safely
++ * evicted. Due to arc_c_min, the same does not hold for all clean data
++ * contained by the regular mru and mfu lists.
++ *
++ * In the case of the regular mru and mfu lists, we need to report as
++ * much clean data as possible, such that evicting that same reported
++ * data will not bring arc_size below arc_c_min. Thus, in certain
++ * circumstances, the total amount of clean data in the mru and mfu
++ * lists might not actually be evictable.
++ *
++ * The following two distinct cases are accounted for:
++ *
++ * 1. The sum of the amount of dirty data contained by both the mru and
++ *    mfu lists, plus the ARC's other accounting (e.g. the anon list),
++ *    is greater than or equal to arc_c_min.
++ *    (i.e. amount of dirty data >= arc_c_min)
++ *
++ *    This is the easy case; all clean data contained by the mru and mfu
++ *    lists is evictable. Evicting all clean data can only drop arc_size
++ *    to the amount of dirty data, which is greater than arc_c_min.
++ *
++ * 2. The sum of the amount of dirty data contained by both the mru and
++ *    mfu lists, plus the ARC's other accounting (e.g. the anon list),
++ *    is less than arc_c_min.
++ *    (i.e. arc_c_min > amount of dirty data)
++ *
++ *    2.1. arc_size is greater than or equal arc_c_min.
++ *         (i.e. arc_size >= arc_c_min > amount of dirty data)
++ *
++ *         In this case, not all clean data from the regular mru and mfu
++ *         lists is actually evictable; we must leave enough clean data
++ *         to keep arc_size above arc_c_min. Thus, the maximum amount of
++ *         evictable data from the two lists combined, is exactly the
++ *         difference between arc_size and arc_c_min.
++ *
++ *    2.2. arc_size is less than arc_c_min
++ *         (i.e. arc_c_min > arc_size > amount of dirty data)
++ *
++ *         In this case, none of the data contained in the mru and mfu
++ *         lists is evictable, even if it's clean. Since arc_size is
++ *         already below arc_c_min, evicting any more would only
++ *         increase this negative difference.
++ */
++static uint64_t
++arc_evictable_memory(void) {
++	uint64_t arc_clean =
++	    arc_mru->arcs_lsize[ARC_BUFC_DATA] +
++	    arc_mru->arcs_lsize[ARC_BUFC_METADATA] +
++	    arc_mfu->arcs_lsize[ARC_BUFC_DATA] +
++	    arc_mfu->arcs_lsize[ARC_BUFC_METADATA];
++	uint64_t ghost_clean =
++	    arc_mru_ghost->arcs_lsize[ARC_BUFC_DATA] +
++	    arc_mru_ghost->arcs_lsize[ARC_BUFC_METADATA] +
++	    arc_mfu_ghost->arcs_lsize[ARC_BUFC_DATA] +
++	    arc_mfu_ghost->arcs_lsize[ARC_BUFC_METADATA];
++	uint64_t arc_dirty = MAX((int64_t)arc_size - (int64_t)arc_clean, 0);
++
++	if (arc_dirty >= arc_c_min)
++		return (ghost_clean + arc_clean);
++
++	return (ghost_clean + MAX((int64_t)arc_size - (int64_t)arc_c_min, 0));
++}
++
++static int
++__arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
++{
++	uint64_t pages;
++
++	/* The arc is considered warm once reclaim has occurred */
++	if (unlikely(arc_warm == B_FALSE))
++		arc_warm = B_TRUE;
++
++	/* Return the potential number of reclaimable pages */
++	pages = btop(arc_evictable_memory());
++	if (sc->nr_to_scan == 0)
++		return (pages);
++
++	/* Not allowed to perform filesystem reclaim */
++	if (!(sc->gfp_mask & __GFP_FS))
++		return (-1);
++
++	/* Reclaim in progress */
++	if (mutex_tryenter(&arc_reclaim_thr_lock) == 0)
++		return (-1);
++
++	/*
++	 * Evict the requested number of pages by shrinking arc_c the
++	 * requested amount.  If there is nothing left to evict just
++	 * reap whatever we can from the various arc slabs.
++	 */
++	if (pages > 0) {
++		arc_kmem_reap_now(ARC_RECLAIM_AGGR, ptob(sc->nr_to_scan));
++		pages = btop(arc_evictable_memory());
++	} else {
++		arc_kmem_reap_now(ARC_RECLAIM_CONS, ptob(sc->nr_to_scan));
++		pages = -1;
++	}
++
++	/*
++	 * When direct reclaim is observed it usually indicates a rapid
++	 * increase in memory pressure.  This occurs because the kswapd
++	 * threads were unable to asynchronously keep enough free memory
++	 * available.  In this case set arc_no_grow to briefly pause arc
++	 * growth to avoid compounding the memory pressure.
++	 */
++	if (current_is_kswapd()) {
++		ARCSTAT_BUMP(arcstat_memory_indirect_count);
++	} else {
++		arc_no_grow = B_TRUE;
++		arc_grow_time = ddi_get_lbolt() + (arc_grow_retry * hz);
++		ARCSTAT_BUMP(arcstat_memory_direct_count);
++	}
++
++	mutex_exit(&arc_reclaim_thr_lock);
++
++	return (pages);
++}
++SPL_SHRINKER_CALLBACK_WRAPPER(arc_shrinker_func);
++
++SPL_SHRINKER_DECLARE(arc_shrinker, arc_shrinker_func, DEFAULT_SEEKS);
++#endif /* _KERNEL */
++
++/*
++ * Adapt arc info given the number of bytes we are trying to add and
++ * the state that we are comming from.  This function is only called
++ * when we are adding new content to the cache.
++ */
++static void
++arc_adapt(int bytes, arc_state_t *state)
++{
++	int mult;
++	uint64_t arc_p_min = (arc_c >> arc_p_min_shift);
++
++	if (state == arc_l2c_only)
++		return;
++
++	ASSERT(bytes > 0);
++	/*
++	 * Adapt the target size of the MRU list:
++	 *	- if we just hit in the MRU ghost list, then increase
++	 *	  the target size of the MRU list.
++	 *	- if we just hit in the MFU ghost list, then increase
++	 *	  the target size of the MFU list by decreasing the
++	 *	  target size of the MRU list.
++	 */
++	if (state == arc_mru_ghost) {
++		mult = ((arc_mru_ghost->arcs_size >= arc_mfu_ghost->arcs_size) ?
++		    1 : (arc_mfu_ghost->arcs_size/arc_mru_ghost->arcs_size));
++		mult = MIN(mult, 10); /* avoid wild arc_p adjustment */
++
++		arc_p = MIN(arc_c - arc_p_min, arc_p + bytes * mult);
++	} else if (state == arc_mfu_ghost) {
++		uint64_t delta;
++
++		mult = ((arc_mfu_ghost->arcs_size >= arc_mru_ghost->arcs_size) ?
++		    1 : (arc_mru_ghost->arcs_size/arc_mfu_ghost->arcs_size));
++		mult = MIN(mult, 10);
++
++		delta = MIN(bytes * mult, arc_p);
++		arc_p = MAX(arc_p_min, arc_p - delta);
++	}
++	ASSERT((int64_t)arc_p >= 0);
++
++	if (arc_no_grow)
++		return;
++
++	if (arc_c >= arc_c_max)
++		return;
++
++	/*
++	 * If we're within (2 * maxblocksize) bytes of the target
++	 * cache size, increment the target cache size
++	 */
++	if (arc_size > arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) {
++		atomic_add_64(&arc_c, (int64_t)bytes);
++		if (arc_c > arc_c_max)
++			arc_c = arc_c_max;
++		else if (state == arc_anon)
++			atomic_add_64(&arc_p, (int64_t)bytes);
++		if (arc_p > arc_c)
++			arc_p = arc_c;
++	}
++	ASSERT((int64_t)arc_p >= 0);
++}
++
++/*
++ * Check if the cache has reached its limits and eviction is required
++ * prior to insert.
++ */
++static int
++arc_evict_needed(arc_buf_contents_t type)
++{
++	if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit)
++		return (1);
++
++	if (arc_no_grow)
++		return (1);
++
++	return (arc_size > arc_c);
++}
++
++/*
++ * The buffer, supplied as the first argument, needs a data block.
++ * So, if we are at cache max, determine which cache should be victimized.
++ * We have the following cases:
++ *
++ * 1. Insert for MRU, p > sizeof(arc_anon + arc_mru) ->
++ * In this situation if we're out of space, but the resident size of the MFU is
++ * under the limit, victimize the MFU cache to satisfy this insertion request.
++ *
++ * 2. Insert for MRU, p <= sizeof(arc_anon + arc_mru) ->
++ * Here, we've used up all of the available space for the MRU, so we need to
++ * evict from our own cache instead.  Evict from the set of resident MRU
++ * entries.
++ *
++ * 3. Insert for MFU (c - p) > sizeof(arc_mfu) ->
++ * c minus p represents the MFU space in the cache, since p is the size of the
++ * cache that is dedicated to the MRU.  In this situation there's still space on
++ * the MFU side, so the MRU side needs to be victimized.
++ *
++ * 4. Insert for MFU (c - p) < sizeof(arc_mfu) ->
++ * MFU's resident set is consuming more space than it has been allotted.  In
++ * this situation, we must victimize our own cache, the MFU, for this insertion.
++ */
++static void
++arc_get_data_buf(arc_buf_t *buf)
++{
++	arc_state_t		*state = buf->b_hdr->b_state;
++	uint64_t		size = buf->b_hdr->b_size;
++	arc_buf_contents_t	type = buf->b_hdr->b_type;
++
++	arc_adapt(size, state);
++
++	/*
++	 * We have not yet reached cache maximum size,
++	 * just allocate a new buffer.
++	 */
++	if (!arc_evict_needed(type)) {
++		if (type == ARC_BUFC_METADATA) {
++			buf->b_data = zio_buf_alloc(size);
++			arc_space_consume(size, ARC_SPACE_DATA);
++		} else {
++			ASSERT(type == ARC_BUFC_DATA);
++			buf->b_data = zio_data_buf_alloc(size);
++			ARCSTAT_INCR(arcstat_data_size, size);
++			atomic_add_64(&arc_size, size);
++		}
++		goto out;
++	}
++
++	/*
++	 * If we are prefetching from the mfu ghost list, this buffer
++	 * will end up on the mru list; so steal space from there.
++	 */
++	if (state == arc_mfu_ghost)
++		state = buf->b_hdr->b_flags & ARC_PREFETCH ? arc_mru : arc_mfu;
++	else if (state == arc_mru_ghost)
++		state = arc_mru;
++
++	if (state == arc_mru || state == arc_anon) {
++		uint64_t mru_used = arc_anon->arcs_size + arc_mru->arcs_size;
++		state = (arc_mfu->arcs_lsize[type] >= size &&
++		    arc_p > mru_used) ? arc_mfu : arc_mru;
++	} else {
++		/* MFU cases */
++		uint64_t mfu_space = arc_c - arc_p;
++		state =  (arc_mru->arcs_lsize[type] >= size &&
++		    mfu_space > arc_mfu->arcs_size) ? arc_mru : arc_mfu;
++	}
++
++	if ((buf->b_data = arc_evict(state, 0, size, TRUE, type)) == NULL) {
++		if (type == ARC_BUFC_METADATA) {
++			buf->b_data = zio_buf_alloc(size);
++			arc_space_consume(size, ARC_SPACE_DATA);
++
++			/*
++			 * If we are unable to recycle an existing meta buffer
++			 * signal the reclaim thread.  It will notify users
++			 * via the prune callback to drop references.  The
++			 * prune callback in run in the context of the reclaim
++			 * thread to avoid deadlocking on the hash_lock.
++			 */
++			cv_signal(&arc_reclaim_thr_cv);
++		} else {
++			ASSERT(type == ARC_BUFC_DATA);
++			buf->b_data = zio_data_buf_alloc(size);
++			ARCSTAT_INCR(arcstat_data_size, size);
++			atomic_add_64(&arc_size, size);
++		}
++
++		ARCSTAT_BUMP(arcstat_recycle_miss);
++	}
++	ASSERT(buf->b_data != NULL);
++out:
++	/*
++	 * Update the state size.  Note that ghost states have a
++	 * "ghost size" and so don't need to be updated.
++	 */
++	if (!GHOST_STATE(buf->b_hdr->b_state)) {
++		arc_buf_hdr_t *hdr = buf->b_hdr;
++
++		atomic_add_64(&hdr->b_state->arcs_size, size);
++		if (list_link_active(&hdr->b_arc_node)) {
++			ASSERT(refcount_is_zero(&hdr->b_refcnt));
++			atomic_add_64(&hdr->b_state->arcs_lsize[type], size);
++		}
++		/*
++		 * If we are growing the cache, and we are adding anonymous
++		 * data, and we have outgrown arc_p, update arc_p
++		 */
++		if (arc_size < arc_c && hdr->b_state == arc_anon &&
++		    arc_anon->arcs_size + arc_mru->arcs_size > arc_p)
++			arc_p = MIN(arc_c, arc_p + size);
++	}
++}
++
++/*
++ * This routine is called whenever a buffer is accessed.
++ * NOTE: the hash lock is dropped in this function.
++ */
++static void
++arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock)
++{
++	clock_t now;
++
++	ASSERT(MUTEX_HELD(hash_lock));
++
++	if (buf->b_state == arc_anon) {
++		/*
++		 * This buffer is not in the cache, and does not
++		 * appear in our "ghost" list.  Add the new buffer
++		 * to the MRU state.
++		 */
++
++		ASSERT(buf->b_arc_access == 0);
++		buf->b_arc_access = ddi_get_lbolt();
++		DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, buf);
++		arc_change_state(arc_mru, buf, hash_lock);
++
++	} else if (buf->b_state == arc_mru) {
++		now = ddi_get_lbolt();
++
++		/*
++		 * If this buffer is here because of a prefetch, then either:
++		 * - clear the flag if this is a "referencing" read
++		 *   (any subsequent access will bump this into the MFU state).
++		 * or
++		 * - move the buffer to the head of the list if this is
++		 *   another prefetch (to make it less likely to be evicted).
++		 */
++		if ((buf->b_flags & ARC_PREFETCH) != 0) {
++			if (refcount_count(&buf->b_refcnt) == 0) {
++				ASSERT(list_link_active(&buf->b_arc_node));
++			} else {
++				buf->b_flags &= ~ARC_PREFETCH;
++				ARCSTAT_BUMP(arcstat_mru_hits);
++			}
++			buf->b_arc_access = now;
++			return;
++		}
++
++		/*
++		 * This buffer has been "accessed" only once so far,
++		 * but it is still in the cache. Move it to the MFU
++		 * state.
++		 */
++		if (now > buf->b_arc_access + ARC_MINTIME) {
++			/*
++			 * More than 125ms have passed since we
++			 * instantiated this buffer.  Move it to the
++			 * most frequently used state.
++			 */
++			buf->b_arc_access = now;
++			DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf);
++			arc_change_state(arc_mfu, buf, hash_lock);
++		}
++		ARCSTAT_BUMP(arcstat_mru_hits);
++	} else if (buf->b_state == arc_mru_ghost) {
++		arc_state_t	*new_state;
++		/*
++		 * This buffer has been "accessed" recently, but
++		 * was evicted from the cache.  Move it to the
++		 * MFU state.
++		 */
++
++		if (buf->b_flags & ARC_PREFETCH) {
++			new_state = arc_mru;
++			if (refcount_count(&buf->b_refcnt) > 0)
++				buf->b_flags &= ~ARC_PREFETCH;
++			DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, buf);
++		} else {
++			new_state = arc_mfu;
++			DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf);
++		}
++
++		buf->b_arc_access = ddi_get_lbolt();
++		arc_change_state(new_state, buf, hash_lock);
++
++		ARCSTAT_BUMP(arcstat_mru_ghost_hits);
++	} else if (buf->b_state == arc_mfu) {
++		/*
++		 * This buffer has been accessed more than once and is
++		 * still in the cache.  Keep it in the MFU state.
++		 *
++		 * NOTE: an add_reference() that occurred when we did
++		 * the arc_read() will have kicked this off the list.
++		 * If it was a prefetch, we will explicitly move it to
++		 * the head of the list now.
++		 */
++		if ((buf->b_flags & ARC_PREFETCH) != 0) {
++			ASSERT(refcount_count(&buf->b_refcnt) == 0);
++			ASSERT(list_link_active(&buf->b_arc_node));
++		}
++		ARCSTAT_BUMP(arcstat_mfu_hits);
++		buf->b_arc_access = ddi_get_lbolt();
++	} else if (buf->b_state == arc_mfu_ghost) {
++		arc_state_t	*new_state = arc_mfu;
++		/*
++		 * This buffer has been accessed more than once but has
++		 * been evicted from the cache.  Move it back to the
++		 * MFU state.
++		 */
++
++		if (buf->b_flags & ARC_PREFETCH) {
++			/*
++			 * This is a prefetch access...
++			 * move this block back to the MRU state.
++			 */
++			ASSERT3U(refcount_count(&buf->b_refcnt), ==, 0);
++			new_state = arc_mru;
++		}
++
++		buf->b_arc_access = ddi_get_lbolt();
++		DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf);
++		arc_change_state(new_state, buf, hash_lock);
++
++		ARCSTAT_BUMP(arcstat_mfu_ghost_hits);
++	} else if (buf->b_state == arc_l2c_only) {
++		/*
++		 * This buffer is on the 2nd Level ARC.
++		 */
++
++		buf->b_arc_access = ddi_get_lbolt();
++		DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf);
++		arc_change_state(arc_mfu, buf, hash_lock);
++	} else {
++		ASSERT(!"invalid arc state");
++	}
++}
++
++/* a generic arc_done_func_t which you can use */
++/* ARGSUSED */
++void
++arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg)
++{
++	if (zio == NULL || zio->io_error == 0)
++		bcopy(buf->b_data, arg, buf->b_hdr->b_size);
++	VERIFY(arc_buf_remove_ref(buf, arg) == 1);
++}
++
++/* a generic arc_done_func_t */
++void
++arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg)
++{
++	arc_buf_t **bufp = arg;
++	if (zio && zio->io_error) {
++		VERIFY(arc_buf_remove_ref(buf, arg) == 1);
++		*bufp = NULL;
++	} else {
++		*bufp = buf;
++		ASSERT(buf->b_data);
++	}
++}
++
++static void
++arc_read_done(zio_t *zio)
++{
++	arc_buf_hdr_t	*hdr, *found;
++	arc_buf_t	*buf;
++	arc_buf_t	*abuf;	/* buffer we're assigning to callback */
++	kmutex_t	*hash_lock;
++	arc_callback_t	*callback_list, *acb;
++	int		freeable = FALSE;
++
++	buf = zio->io_private;
++	hdr = buf->b_hdr;
++
++	/*
++	 * The hdr was inserted into hash-table and removed from lists
++	 * prior to starting I/O.  We should find this header, since
++	 * it's in the hash table, and it should be legit since it's
++	 * not possible to evict it during the I/O.  The only possible
++	 * reason for it not to be found is if we were freed during the
++	 * read.
++	 */
++	found = buf_hash_find(hdr->b_spa, &hdr->b_dva, hdr->b_birth,
++	    &hash_lock);
++
++	ASSERT((found == NULL && HDR_FREED_IN_READ(hdr) && hash_lock == NULL) ||
++	    (found == hdr && DVA_EQUAL(&hdr->b_dva, BP_IDENTITY(zio->io_bp))) ||
++	    (found == hdr && HDR_L2_READING(hdr)));
++
++	hdr->b_flags &= ~ARC_L2_EVICTED;
++	if (l2arc_noprefetch && (hdr->b_flags & ARC_PREFETCH))
++		hdr->b_flags &= ~ARC_L2CACHE;
++
++	/* byteswap if necessary */
++	callback_list = hdr->b_acb;
++	ASSERT(callback_list != NULL);
++	if (BP_SHOULD_BYTESWAP(zio->io_bp) && zio->io_error == 0) {
++		arc_byteswap_func_t *func = BP_GET_LEVEL(zio->io_bp) > 0 ?
++		    byteswap_uint64_array :
++		    dmu_ot[BP_GET_TYPE(zio->io_bp)].ot_byteswap;
++		func(buf->b_data, hdr->b_size);
++	}
++
++	arc_cksum_compute(buf, B_FALSE);
++
++	if (hash_lock && zio->io_error == 0 && hdr->b_state == arc_anon) {
++		/*
++		 * Only call arc_access on anonymous buffers.  This is because
++		 * if we've issued an I/O for an evicted buffer, we've already
++		 * called arc_access (to prevent any simultaneous readers from
++		 * getting confused).
++		 */
++		arc_access(hdr, hash_lock);
++	}
++
++	/* create copies of the data buffer for the callers */
++	abuf = buf;
++	for (acb = callback_list; acb; acb = acb->acb_next) {
++		if (acb->acb_done) {
++			if (abuf == NULL)
++				abuf = arc_buf_clone(buf);
++			acb->acb_buf = abuf;
++			abuf = NULL;
++		}
++	}
++	hdr->b_acb = NULL;
++	hdr->b_flags &= ~ARC_IO_IN_PROGRESS;
++	ASSERT(!HDR_BUF_AVAILABLE(hdr));
++	if (abuf == buf) {
++		ASSERT(buf->b_efunc == NULL);
++		ASSERT(hdr->b_datacnt == 1);
++		hdr->b_flags |= ARC_BUF_AVAILABLE;
++	}
++
++	ASSERT(refcount_is_zero(&hdr->b_refcnt) || callback_list != NULL);
++
++	if (zio->io_error != 0) {
++		hdr->b_flags |= ARC_IO_ERROR;
++		if (hdr->b_state != arc_anon)
++			arc_change_state(arc_anon, hdr, hash_lock);
++		if (HDR_IN_HASH_TABLE(hdr))
++			buf_hash_remove(hdr);
++		freeable = refcount_is_zero(&hdr->b_refcnt);
++	}
++
++	/*
++	 * Broadcast before we drop the hash_lock to avoid the possibility
++	 * that the hdr (and hence the cv) might be freed before we get to
++	 * the cv_broadcast().
++	 */
++	cv_broadcast(&hdr->b_cv);
++
++	if (hash_lock) {
++		mutex_exit(hash_lock);
++	} else {
++		/*
++		 * This block was freed while we waited for the read to
++		 * complete.  It has been removed from the hash table and
++		 * moved to the anonymous state (so that it won't show up
++		 * in the cache).
++		 */
++		ASSERT3P(hdr->b_state, ==, arc_anon);
++		freeable = refcount_is_zero(&hdr->b_refcnt);
++	}
++
++	/* execute each callback and free its structure */
++	while ((acb = callback_list) != NULL) {
++		if (acb->acb_done)
++			acb->acb_done(zio, acb->acb_buf, acb->acb_private);
++
++		if (acb->acb_zio_dummy != NULL) {
++			acb->acb_zio_dummy->io_error = zio->io_error;
++			zio_nowait(acb->acb_zio_dummy);
++		}
++
++		callback_list = acb->acb_next;
++		kmem_free(acb, sizeof (arc_callback_t));
++	}
++
++	if (freeable)
++		arc_hdr_destroy(hdr);
++}
++
++/*
++ * "Read" the block block at the specified DVA (in bp) via the
++ * cache.  If the block is found in the cache, invoke the provided
++ * callback immediately and return.  Note that the `zio' parameter
++ * in the callback will be NULL in this case, since no IO was
++ * required.  If the block is not in the cache pass the read request
++ * on to the spa with a substitute callback function, so that the
++ * requested block will be added to the cache.
++ *
++ * If a read request arrives for a block that has a read in-progress,
++ * either wait for the in-progress read to complete (and return the
++ * results); or, if this is a read with a "done" func, add a record
++ * to the read to invoke the "done" func when the read completes,
++ * and return; or just return.
++ *
++ * arc_read_done() will invoke all the requested "done" functions
++ * for readers of this block.
++ *
++ * Normal callers should use arc_read and pass the arc buffer and offset
++ * for the bp.  But if you know you don't need locking, you can use
++ * arc_read_bp.
++ */
++int
++arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *pbuf,
++    arc_done_func_t *done, void *private, int priority, int zio_flags,
++    uint32_t *arc_flags, const zbookmark_t *zb)
++{
++	int err;
++
++	if (pbuf == NULL) {
++		/*
++		 * XXX This happens from traverse callback funcs, for
++		 * the objset_phys_t block.
++		 */
++		return (arc_read_nolock(pio, spa, bp, done, private, priority,
++		    zio_flags, arc_flags, zb));
++	}
++
++	ASSERT(!refcount_is_zero(&pbuf->b_hdr->b_refcnt));
++	ASSERT3U((char *)bp - (char *)pbuf->b_data, <, pbuf->b_hdr->b_size);
++	rw_enter(&pbuf->b_data_lock, RW_READER);
++
++	err = arc_read_nolock(pio, spa, bp, done, private, priority,
++	    zio_flags, arc_flags, zb);
++	rw_exit(&pbuf->b_data_lock);
++
++	return (err);
++}
++
++int
++arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,
++    arc_done_func_t *done, void *private, int priority, int zio_flags,
++    uint32_t *arc_flags, const zbookmark_t *zb)
++{
++	arc_buf_hdr_t *hdr;
++	arc_buf_t *buf = NULL;
++	kmutex_t *hash_lock;
++	zio_t *rzio;
++	uint64_t guid = spa_load_guid(spa);
++
++top:
++	hdr = buf_hash_find(guid, BP_IDENTITY(bp), BP_PHYSICAL_BIRTH(bp),
++	    &hash_lock);
++	if (hdr && hdr->b_datacnt > 0) {
++
++		*arc_flags |= ARC_CACHED;
++
++		if (HDR_IO_IN_PROGRESS(hdr)) {
++
++			if (*arc_flags & ARC_WAIT) {
++				cv_wait(&hdr->b_cv, hash_lock);
++				mutex_exit(hash_lock);
++				goto top;
++			}
++			ASSERT(*arc_flags & ARC_NOWAIT);
++
++			if (done) {
++				arc_callback_t	*acb = NULL;
++
++				acb = kmem_zalloc(sizeof (arc_callback_t),
++				    KM_PUSHPAGE);
++				acb->acb_done = done;
++				acb->acb_private = private;
++				if (pio != NULL)
++					acb->acb_zio_dummy = zio_null(pio,
++					    spa, NULL, NULL, NULL, zio_flags);
++
++				ASSERT(acb->acb_done != NULL);
++				acb->acb_next = hdr->b_acb;
++				hdr->b_acb = acb;
++				add_reference(hdr, hash_lock, private);
++				mutex_exit(hash_lock);
++				return (0);
++			}
++			mutex_exit(hash_lock);
++			return (0);
++		}
++
++		ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
++
++		if (done) {
++			add_reference(hdr, hash_lock, private);
++			/*
++			 * If this block is already in use, create a new
++			 * copy of the data so that we will be guaranteed
++			 * that arc_release() will always succeed.
++			 */
++			buf = hdr->b_buf;
++			ASSERT(buf);
++			ASSERT(buf->b_data);
++			if (HDR_BUF_AVAILABLE(hdr)) {
++				ASSERT(buf->b_efunc == NULL);
++				hdr->b_flags &= ~ARC_BUF_AVAILABLE;
++			} else {
++				buf = arc_buf_clone(buf);
++			}
++
++		} else if (*arc_flags & ARC_PREFETCH &&
++		    refcount_count(&hdr->b_refcnt) == 0) {
++			hdr->b_flags |= ARC_PREFETCH;
++		}
++		DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
++		arc_access(hdr, hash_lock);
++		if (*arc_flags & ARC_L2CACHE)
++			hdr->b_flags |= ARC_L2CACHE;
++		mutex_exit(hash_lock);
++		ARCSTAT_BUMP(arcstat_hits);
++		ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH),
++		    demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
++		    data, metadata, hits);
++
++		if (done)
++			done(NULL, buf, private);
++	} else {
++		uint64_t size = BP_GET_LSIZE(bp);
++		arc_callback_t	*acb;
++		vdev_t *vd = NULL;
++		uint64_t addr = -1;
++		boolean_t devw = B_FALSE;
++
++		if (hdr == NULL) {
++			/* this block is not in the cache */
++			arc_buf_hdr_t	*exists;
++			arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
++			buf = arc_buf_alloc(spa, size, private, type);
++			hdr = buf->b_hdr;
++			hdr->b_dva = *BP_IDENTITY(bp);
++			hdr->b_birth = BP_PHYSICAL_BIRTH(bp);
++			hdr->b_cksum0 = bp->blk_cksum.zc_word[0];
++			exists = buf_hash_insert(hdr, &hash_lock);
++			if (exists) {
++				/* somebody beat us to the hash insert */
++				mutex_exit(hash_lock);
++				buf_discard_identity(hdr);
++				(void) arc_buf_remove_ref(buf, private);
++				goto top; /* restart the IO request */
++			}
++			/* if this is a prefetch, we don't have a reference */
++			if (*arc_flags & ARC_PREFETCH) {
++				(void) remove_reference(hdr, hash_lock,
++				    private);
++				hdr->b_flags |= ARC_PREFETCH;
++			}
++			if (*arc_flags & ARC_L2CACHE)
++				hdr->b_flags |= ARC_L2CACHE;
++			if (BP_GET_LEVEL(bp) > 0)
++				hdr->b_flags |= ARC_INDIRECT;
++		} else {
++			/* this block is in the ghost cache */
++			ASSERT(GHOST_STATE(hdr->b_state));
++			ASSERT(!HDR_IO_IN_PROGRESS(hdr));
++			ASSERT3U(refcount_count(&hdr->b_refcnt), ==, 0);
++			ASSERT(hdr->b_buf == NULL);
++
++			/* if this is a prefetch, we don't have a reference */
++			if (*arc_flags & ARC_PREFETCH)
++				hdr->b_flags |= ARC_PREFETCH;
++			else
++				add_reference(hdr, hash_lock, private);
++			if (*arc_flags & ARC_L2CACHE)
++				hdr->b_flags |= ARC_L2CACHE;
++			buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
++			buf->b_hdr = hdr;
++			buf->b_data = NULL;
++			buf->b_efunc = NULL;
++			buf->b_private = NULL;
++			buf->b_next = NULL;
++			hdr->b_buf = buf;
++			ASSERT(hdr->b_datacnt == 0);
++			hdr->b_datacnt = 1;
++			arc_get_data_buf(buf);
++			arc_access(hdr, hash_lock);
++		}
++
++		ASSERT(!GHOST_STATE(hdr->b_state));
++
++		acb = kmem_zalloc(sizeof (arc_callback_t), KM_PUSHPAGE);
++		acb->acb_done = done;
++		acb->acb_private = private;
++
++		ASSERT(hdr->b_acb == NULL);
++		hdr->b_acb = acb;
++		hdr->b_flags |= ARC_IO_IN_PROGRESS;
++
++		if (HDR_L2CACHE(hdr) && hdr->b_l2hdr != NULL &&
++		    (vd = hdr->b_l2hdr->b_dev->l2ad_vdev) != NULL) {
++			devw = hdr->b_l2hdr->b_dev->l2ad_writing;
++			addr = hdr->b_l2hdr->b_daddr;
++			/*
++			 * Lock out device removal.
++			 */
++			if (vdev_is_dead(vd) ||
++			    !spa_config_tryenter(spa, SCL_L2ARC, vd, RW_READER))
++				vd = NULL;
++		}
++
++		mutex_exit(hash_lock);
++
++		ASSERT3U(hdr->b_size, ==, size);
++		DTRACE_PROBE4(arc__miss, arc_buf_hdr_t *, hdr, blkptr_t *, bp,
++		    uint64_t, size, zbookmark_t *, zb);
++		ARCSTAT_BUMP(arcstat_misses);
++		ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH),
++		    demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
++		    data, metadata, misses);
++
++		if (vd != NULL && l2arc_ndev != 0 && !(l2arc_norw && devw)) {
++			/*
++			 * Read from the L2ARC if the following are true:
++			 * 1. The L2ARC vdev was previously cached.
++			 * 2. This buffer still has L2ARC metadata.
++			 * 3. This buffer isn't currently writing to the L2ARC.
++			 * 4. The L2ARC entry wasn't evicted, which may
++			 *    also have invalidated the vdev.
++			 * 5. This isn't prefetch and l2arc_noprefetch is set.
++			 */
++			if (hdr->b_l2hdr != NULL &&
++			    !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&
++			    !(l2arc_noprefetch && HDR_PREFETCH(hdr))) {
++				l2arc_read_callback_t *cb;
++
++				DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
++				ARCSTAT_BUMP(arcstat_l2_hits);
++
++				cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
++				    KM_PUSHPAGE);
++				cb->l2rcb_buf = buf;
++				cb->l2rcb_spa = spa;
++				cb->l2rcb_bp = *bp;
++				cb->l2rcb_zb = *zb;
++				cb->l2rcb_flags = zio_flags;
++
++				/*
++				 * l2arc read.  The SCL_L2ARC lock will be
++				 * released by l2arc_read_done().
++				 */
++				rzio = zio_read_phys(pio, vd, addr, size,
++				    buf->b_data, ZIO_CHECKSUM_OFF,
++				    l2arc_read_done, cb, priority, zio_flags |
++				    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL |
++				    ZIO_FLAG_DONT_PROPAGATE |
++				    ZIO_FLAG_DONT_RETRY, B_FALSE);
++				DTRACE_PROBE2(l2arc__read, vdev_t *, vd,
++				    zio_t *, rzio);
++				ARCSTAT_INCR(arcstat_l2_read_bytes, size);
++
++				if (*arc_flags & ARC_NOWAIT) {
++					zio_nowait(rzio);
++					return (0);
++				}
++
++				ASSERT(*arc_flags & ARC_WAIT);
++				if (zio_wait(rzio) == 0)
++					return (0);
++
++				/* l2arc read error; goto zio_read() */
++			} else {
++				DTRACE_PROBE1(l2arc__miss,
++				    arc_buf_hdr_t *, hdr);
++				ARCSTAT_BUMP(arcstat_l2_misses);
++				if (HDR_L2_WRITING(hdr))
++					ARCSTAT_BUMP(arcstat_l2_rw_clash);
++				spa_config_exit(spa, SCL_L2ARC, vd);
++			}
++		} else {
++			if (vd != NULL)
++				spa_config_exit(spa, SCL_L2ARC, vd);
++			if (l2arc_ndev != 0) {
++				DTRACE_PROBE1(l2arc__miss,
++				    arc_buf_hdr_t *, hdr);
++				ARCSTAT_BUMP(arcstat_l2_misses);
++			}
++		}
++
++		rzio = zio_read(pio, spa, bp, buf->b_data, size,
++		    arc_read_done, buf, priority, zio_flags, zb);
++
++		if (*arc_flags & ARC_WAIT)
++			return (zio_wait(rzio));
++
++		ASSERT(*arc_flags & ARC_NOWAIT);
++		zio_nowait(rzio);
++	}
++	return (0);
++}
++
++arc_prune_t *
++arc_add_prune_callback(arc_prune_func_t *func, void *private)
++{
++	arc_prune_t *p;
++
++	p = kmem_alloc(sizeof(*p), KM_SLEEP);
++	p->p_pfunc = func;
++	p->p_private = private;
++	list_link_init(&p->p_node);
++	refcount_create(&p->p_refcnt);
++
++	mutex_enter(&arc_prune_mtx);
++	refcount_add(&p->p_refcnt, &arc_prune_list);
++	list_insert_head(&arc_prune_list, p);
++	mutex_exit(&arc_prune_mtx);
++
++	return (p);
++}
++
++void
++arc_remove_prune_callback(arc_prune_t *p)
++{
++	mutex_enter(&arc_prune_mtx);
++	list_remove(&arc_prune_list, p);
++	if (refcount_remove(&p->p_refcnt, &arc_prune_list) == 0) {
++		refcount_destroy(&p->p_refcnt);
++		kmem_free(p, sizeof (*p));
++	}
++	mutex_exit(&arc_prune_mtx);
++}
++
++void
++arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private)
++{
++	ASSERT(buf->b_hdr != NULL);
++	ASSERT(buf->b_hdr->b_state != arc_anon);
++	ASSERT(!refcount_is_zero(&buf->b_hdr->b_refcnt) || func == NULL);
++	ASSERT(buf->b_efunc == NULL);
++	ASSERT(!HDR_BUF_AVAILABLE(buf->b_hdr));
++
++	buf->b_efunc = func;
++	buf->b_private = private;
++}
++
++/*
++ * This is used by the DMU to let the ARC know that a buffer is
++ * being evicted, so the ARC should clean up.  If this arc buf
++ * is not yet in the evicted state, it will be put there.
++ */
++int
++arc_buf_evict(arc_buf_t *buf)
++{
++	arc_buf_hdr_t *hdr;
++	kmutex_t *hash_lock;
++	arc_buf_t **bufp;
++
++	mutex_enter(&buf->b_evict_lock);
++	hdr = buf->b_hdr;
++	if (hdr == NULL) {
++		/*
++		 * We are in arc_do_user_evicts().
++		 */
++		ASSERT(buf->b_data == NULL);
++		mutex_exit(&buf->b_evict_lock);
++		return (0);
++	} else if (buf->b_data == NULL) {
++		arc_buf_t copy = *buf; /* structure assignment */
++		/*
++		 * We are on the eviction list; process this buffer now
++		 * but let arc_do_user_evicts() do the reaping.
++		 */
++		buf->b_efunc = NULL;
++		mutex_exit(&buf->b_evict_lock);
++		VERIFY(copy.b_efunc(&copy) == 0);
++		return (1);
++	}
++	hash_lock = HDR_LOCK(hdr);
++	mutex_enter(hash_lock);
++	hdr = buf->b_hdr;
++	ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
++
++	ASSERT3U(refcount_count(&hdr->b_refcnt), <, hdr->b_datacnt);
++	ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
++
++	/*
++	 * Pull this buffer off of the hdr
++	 */
++	bufp = &hdr->b_buf;
++	while (*bufp != buf)
++		bufp = &(*bufp)->b_next;
++	*bufp = buf->b_next;
++
++	ASSERT(buf->b_data != NULL);
++	arc_buf_destroy(buf, FALSE, FALSE);
++
++	if (hdr->b_datacnt == 0) {
++		arc_state_t *old_state = hdr->b_state;
++		arc_state_t *evicted_state;
++
++		ASSERT(hdr->b_buf == NULL);
++		ASSERT(refcount_is_zero(&hdr->b_refcnt));
++
++		evicted_state =
++		    (old_state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
++
++		mutex_enter(&old_state->arcs_mtx);
++		mutex_enter(&evicted_state->arcs_mtx);
++
++		arc_change_state(evicted_state, hdr, hash_lock);
++		ASSERT(HDR_IN_HASH_TABLE(hdr));
++		hdr->b_flags |= ARC_IN_HASH_TABLE;
++		hdr->b_flags &= ~ARC_BUF_AVAILABLE;
++
++		mutex_exit(&evicted_state->arcs_mtx);
++		mutex_exit(&old_state->arcs_mtx);
++	}
++	mutex_exit(hash_lock);
++	mutex_exit(&buf->b_evict_lock);
++
++	VERIFY(buf->b_efunc(buf) == 0);
++	buf->b_efunc = NULL;
++	buf->b_private = NULL;
++	buf->b_hdr = NULL;
++	buf->b_next = NULL;
++	kmem_cache_free(buf_cache, buf);
++	return (1);
++}
++
++/*
++ * Release this buffer from the cache.  This must be done
++ * after a read and prior to modifying the buffer contents.
++ * If the buffer has more than one reference, we must make
++ * a new hdr for the buffer.
++ */
++void
++arc_release(arc_buf_t *buf, void *tag)
++{
++	arc_buf_hdr_t *hdr;
++	kmutex_t *hash_lock = NULL;
++	l2arc_buf_hdr_t *l2hdr;
++	uint64_t buf_size = 0;
++
++	/*
++	 * It would be nice to assert that if it's DMU metadata (level >
++	 * 0 || it's the dnode file), then it must be syncing context.
++	 * But we don't know that information at this level.
++	 */
++
++	mutex_enter(&buf->b_evict_lock);
++	hdr = buf->b_hdr;
++
++	/* this buffer is not on any list */
++	ASSERT(refcount_count(&hdr->b_refcnt) > 0);
++
++	if (hdr->b_state == arc_anon) {
++		/* this buffer is already released */
++		ASSERT(buf->b_efunc == NULL);
++	} else {
++		hash_lock = HDR_LOCK(hdr);
++		mutex_enter(hash_lock);
++		hdr = buf->b_hdr;
++		ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
++	}
++
++	l2hdr = hdr->b_l2hdr;
++	if (l2hdr) {
++		mutex_enter(&l2arc_buflist_mtx);
++		hdr->b_l2hdr = NULL;
++		buf_size = hdr->b_size;
++	}
++
++	/*
++	 * Do we have more than one buf?
++	 */
++	if (hdr->b_datacnt > 1) {
++		arc_buf_hdr_t *nhdr;
++		arc_buf_t **bufp;
++		uint64_t blksz = hdr->b_size;
++		uint64_t spa = hdr->b_spa;
++		arc_buf_contents_t type = hdr->b_type;
++		uint32_t flags = hdr->b_flags;
++
++		ASSERT(hdr->b_buf != buf || buf->b_next != NULL);
++		/*
++		 * Pull the data off of this hdr and attach it to
++		 * a new anonymous hdr.
++		 */
++		(void) remove_reference(hdr, hash_lock, tag);
++		bufp = &hdr->b_buf;
++		while (*bufp != buf)
++			bufp = &(*bufp)->b_next;
++		*bufp = buf->b_next;
++		buf->b_next = NULL;
++
++		ASSERT3U(hdr->b_state->arcs_size, >=, hdr->b_size);
++		atomic_add_64(&hdr->b_state->arcs_size, -hdr->b_size);
++		if (refcount_is_zero(&hdr->b_refcnt)) {
++			uint64_t *size = &hdr->b_state->arcs_lsize[hdr->b_type];
++			ASSERT3U(*size, >=, hdr->b_size);
++			atomic_add_64(size, -hdr->b_size);
++		}
++		hdr->b_datacnt -= 1;
++		arc_cksum_verify(buf);
++
++		mutex_exit(hash_lock);
++
++		nhdr = kmem_cache_alloc(hdr_cache, KM_PUSHPAGE);
++		nhdr->b_size = blksz;
++		nhdr->b_spa = spa;
++		nhdr->b_type = type;
++		nhdr->b_buf = buf;
++		nhdr->b_state = arc_anon;
++		nhdr->b_arc_access = 0;
++		nhdr->b_flags = flags & ARC_L2_WRITING;
++		nhdr->b_l2hdr = NULL;
++		nhdr->b_datacnt = 1;
++		nhdr->b_freeze_cksum = NULL;
++		(void) refcount_add(&nhdr->b_refcnt, tag);
++		buf->b_hdr = nhdr;
++		mutex_exit(&buf->b_evict_lock);
++		atomic_add_64(&arc_anon->arcs_size, blksz);
++	} else {
++		mutex_exit(&buf->b_evict_lock);
++		ASSERT(refcount_count(&hdr->b_refcnt) == 1);
++		ASSERT(!list_link_active(&hdr->b_arc_node));
++		ASSERT(!HDR_IO_IN_PROGRESS(hdr));
++		if (hdr->b_state != arc_anon)
++			arc_change_state(arc_anon, hdr, hash_lock);
++		hdr->b_arc_access = 0;
++		if (hash_lock)
++			mutex_exit(hash_lock);
++
++		buf_discard_identity(hdr);
++		arc_buf_thaw(buf);
++	}
++	buf->b_efunc = NULL;
++	buf->b_private = NULL;
++
++	if (l2hdr) {
++		list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
++		kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
++		ARCSTAT_INCR(arcstat_l2_size, -buf_size);
++		mutex_exit(&l2arc_buflist_mtx);
++	}
++}
++
++/*
++ * Release this buffer.  If it does not match the provided BP, fill it
++ * with that block's contents.
++ */
++/* ARGSUSED */
++int
++arc_release_bp(arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa,
++    zbookmark_t *zb)
++{
++	arc_release(buf, tag);
++	return (0);
++}
++
++int
++arc_released(arc_buf_t *buf)
++{
++	int released;
++
++	mutex_enter(&buf->b_evict_lock);
++	released = (buf->b_data != NULL && buf->b_hdr->b_state == arc_anon);
++	mutex_exit(&buf->b_evict_lock);
++	return (released);
++}
++
++int
++arc_has_callback(arc_buf_t *buf)
++{
++	int callback;
++
++	mutex_enter(&buf->b_evict_lock);
++	callback = (buf->b_efunc != NULL);
++	mutex_exit(&buf->b_evict_lock);
++	return (callback);
++}
++
++#ifdef ZFS_DEBUG
++int
++arc_referenced(arc_buf_t *buf)
++{
++	int referenced;
++
++	mutex_enter(&buf->b_evict_lock);
++	referenced = (refcount_count(&buf->b_hdr->b_refcnt));
++	mutex_exit(&buf->b_evict_lock);
++	return (referenced);
++}
++#endif
++
++static void
++arc_write_ready(zio_t *zio)
++{
++	arc_write_callback_t *callback = zio->io_private;
++	arc_buf_t *buf = callback->awcb_buf;
++	arc_buf_hdr_t *hdr = buf->b_hdr;
++
++	ASSERT(!refcount_is_zero(&buf->b_hdr->b_refcnt));
++	callback->awcb_ready(zio, buf, callback->awcb_private);
++
++	/*
++	 * If the IO is already in progress, then this is a re-write
++	 * attempt, so we need to thaw and re-compute the cksum.
++	 * It is the responsibility of the callback to handle the
++	 * accounting for any re-write attempt.
++	 */
++	if (HDR_IO_IN_PROGRESS(hdr)) {
++		mutex_enter(&hdr->b_freeze_lock);
++		if (hdr->b_freeze_cksum != NULL) {
++			kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t));
++			hdr->b_freeze_cksum = NULL;
++		}
++		mutex_exit(&hdr->b_freeze_lock);
++	}
++	arc_cksum_compute(buf, B_FALSE);
++	hdr->b_flags |= ARC_IO_IN_PROGRESS;
++}
++
++static void
++arc_write_done(zio_t *zio)
++{
++	arc_write_callback_t *callback = zio->io_private;
++	arc_buf_t *buf = callback->awcb_buf;
++	arc_buf_hdr_t *hdr = buf->b_hdr;
++
++	ASSERT(hdr->b_acb == NULL);
++
++	if (zio->io_error == 0) {
++		hdr->b_dva = *BP_IDENTITY(zio->io_bp);
++		hdr->b_birth = BP_PHYSICAL_BIRTH(zio->io_bp);
++		hdr->b_cksum0 = zio->io_bp->blk_cksum.zc_word[0];
++	} else {
++		ASSERT(BUF_EMPTY(hdr));
++	}
++
++	/*
++	 * If the block to be written was all-zero, we may have
++	 * compressed it away.  In this case no write was performed
++	 * so there will be no dva/birth/checksum.  The buffer must
++	 * therefore remain anonymous (and uncached).
++	 */
++	if (!BUF_EMPTY(hdr)) {
++		arc_buf_hdr_t *exists;
++		kmutex_t *hash_lock;
++
++		ASSERT(zio->io_error == 0);
++
++		arc_cksum_verify(buf);
++
++		exists = buf_hash_insert(hdr, &hash_lock);
++		if (exists) {
++			/*
++			 * This can only happen if we overwrite for
++			 * sync-to-convergence, because we remove
++			 * buffers from the hash table when we arc_free().
++			 */
++			if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
++				if (!BP_EQUAL(&zio->io_bp_orig, zio->io_bp))
++					panic("bad overwrite, hdr=%p exists=%p",
++					    (void *)hdr, (void *)exists);
++				ASSERT(refcount_is_zero(&exists->b_refcnt));
++				arc_change_state(arc_anon, exists, hash_lock);
++				mutex_exit(hash_lock);
++				arc_hdr_destroy(exists);
++				exists = buf_hash_insert(hdr, &hash_lock);
++				ASSERT3P(exists, ==, NULL);
++			} else {
++				/* Dedup */
++				ASSERT(hdr->b_datacnt == 1);
++				ASSERT(hdr->b_state == arc_anon);
++				ASSERT(BP_GET_DEDUP(zio->io_bp));
++				ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
++			}
++		}
++		hdr->b_flags &= ~ARC_IO_IN_PROGRESS;
++		/* if it's not anon, we are doing a scrub */
++		if (!exists && hdr->b_state == arc_anon)
++			arc_access(hdr, hash_lock);
++		mutex_exit(hash_lock);
++	} else {
++		hdr->b_flags &= ~ARC_IO_IN_PROGRESS;
++	}
++
++	ASSERT(!refcount_is_zero(&hdr->b_refcnt));
++	callback->awcb_done(zio, buf, callback->awcb_private);
++
++	kmem_free(callback, sizeof (arc_write_callback_t));
++}
++
++zio_t *
++arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
++    blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
++    arc_done_func_t *ready, arc_done_func_t *done, void *private,
++    int priority, int zio_flags, const zbookmark_t *zb)
++{
++	arc_buf_hdr_t *hdr = buf->b_hdr;
++	arc_write_callback_t *callback;
++	zio_t *zio;
++
++	ASSERT(ready != NULL);
++	ASSERT(done != NULL);
++	ASSERT(!HDR_IO_ERROR(hdr));
++	ASSERT((hdr->b_flags & ARC_IO_IN_PROGRESS) == 0);
++	ASSERT(hdr->b_acb == NULL);
++	if (l2arc)
++		hdr->b_flags |= ARC_L2CACHE;
++	callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_PUSHPAGE);
++	callback->awcb_ready = ready;
++	callback->awcb_done = done;
++	callback->awcb_private = private;
++	callback->awcb_buf = buf;
++
++	zio = zio_write(pio, spa, txg, bp, buf->b_data, hdr->b_size, zp,
++	    arc_write_ready, arc_write_done, callback, priority, zio_flags, zb);
++
++	return (zio);
++}
++
++static int
++arc_memory_throttle(uint64_t reserve, uint64_t inflight_data, uint64_t txg)
++{
++#ifdef _KERNEL
++	uint64_t available_memory;
++
++	/* Easily reclaimable memory (free + inactive + arc-evictable) */
++	available_memory = ptob(spl_kmem_availrmem()) + arc_evictable_memory();
++
++	if (available_memory <= zfs_write_limit_max) {
++		ARCSTAT_INCR(arcstat_memory_throttle_count, 1);
++		DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim);
++		return (EAGAIN);
++	}
++
++	if (inflight_data > available_memory / 4) {
++		ARCSTAT_INCR(arcstat_memory_throttle_count, 1);
++		DMU_TX_STAT_BUMP(dmu_tx_memory_inflight);
++		return (ERESTART);
++	}
++#endif
++	return (0);
++}
++
++void
++arc_tempreserve_clear(uint64_t reserve)
++{
++	atomic_add_64(&arc_tempreserve, -reserve);
++	ASSERT((int64_t)arc_tempreserve >= 0);
++}
++
++int
++arc_tempreserve_space(uint64_t reserve, uint64_t txg)
++{
++	int error;
++	uint64_t anon_size;
++
++#ifdef ZFS_DEBUG
++	/*
++	 * Once in a while, fail for no reason.  Everything should cope.
++	 */
++	if (spa_get_random(10000) == 0) {
++		dprintf("forcing random failure\n");
++		return (ERESTART);
++	}
++#endif
++	if (reserve > arc_c/4 && !arc_no_grow)
++		arc_c = MIN(arc_c_max, reserve * 4);
++	if (reserve > arc_c) {
++		DMU_TX_STAT_BUMP(dmu_tx_memory_reserve);
++		return (ENOMEM);
++	}
++
++	/*
++	 * Don't count loaned bufs as in flight dirty data to prevent long
++	 * network delays from blocking transactions that are ready to be
++	 * assigned to a txg.
++	 */
++	anon_size = MAX((int64_t)(arc_anon->arcs_size - arc_loaned_bytes), 0);
++
++	/*
++	 * Writes will, almost always, require additional memory allocations
++	 * in order to compress/encrypt/etc the data.  We therefor need to
++	 * make sure that there is sufficient available memory for this.
++	 */
++	if ((error = arc_memory_throttle(reserve, anon_size, txg)))
++		return (error);
++
++	/*
++	 * Throttle writes when the amount of dirty data in the cache
++	 * gets too large.  We try to keep the cache less than half full
++	 * of dirty blocks so that our sync times don't grow too large.
++	 * Note: if two requests come in concurrently, we might let them
++	 * both succeed, when one of them should fail.  Not a huge deal.
++	 */
++
++	if (reserve + arc_tempreserve + anon_size > arc_c / 2 &&
++	    anon_size > arc_c / 4) {
++		dprintf("failing, arc_tempreserve=%lluK anon_meta=%lluK "
++		    "anon_data=%lluK tempreserve=%lluK arc_c=%lluK\n",
++		    arc_tempreserve>>10,
++		    arc_anon->arcs_lsize[ARC_BUFC_METADATA]>>10,
++		    arc_anon->arcs_lsize[ARC_BUFC_DATA]>>10,
++		    reserve>>10, arc_c>>10);
++		DMU_TX_STAT_BUMP(dmu_tx_dirty_throttle);
++		return (ERESTART);
++	}
++	atomic_add_64(&arc_tempreserve, reserve);
++	return (0);
++}
++
++static void
++arc_kstat_update_state(arc_state_t *state, kstat_named_t *size,
++    kstat_named_t *evict_data, kstat_named_t *evict_metadata)
++{
++	size->value.ui64 = state->arcs_size;
++	evict_data->value.ui64 = state->arcs_lsize[ARC_BUFC_DATA];
++	evict_metadata->value.ui64 = state->arcs_lsize[ARC_BUFC_METADATA];
++}
++
++static int
++arc_kstat_update(kstat_t *ksp, int rw)
++{
++	arc_stats_t *as = ksp->ks_data;
++
++	if (rw == KSTAT_WRITE) {
++		return (EACCES);
++	} else {
++		arc_kstat_update_state(arc_anon,
++		    &as->arcstat_anon_size,
++		    &as->arcstat_anon_evict_data,
++		    &as->arcstat_anon_evict_metadata);
++		arc_kstat_update_state(arc_mru,
++		    &as->arcstat_mru_size,
++		    &as->arcstat_mru_evict_data,
++		    &as->arcstat_mru_evict_metadata);
++		arc_kstat_update_state(arc_mru_ghost,
++		    &as->arcstat_mru_ghost_size,
++		    &as->arcstat_mru_ghost_evict_data,
++		    &as->arcstat_mru_ghost_evict_metadata);
++		arc_kstat_update_state(arc_mfu,
++		    &as->arcstat_mfu_size,
++		    &as->arcstat_mfu_evict_data,
++		    &as->arcstat_mfu_evict_metadata);
++		arc_kstat_update_state(arc_mfu_ghost,
++		    &as->arcstat_mfu_ghost_size,
++		    &as->arcstat_mfu_ghost_evict_data,
++		    &as->arcstat_mfu_ghost_evict_metadata);
++	}
++
++	return (0);
++}
++
++void
++arc_init(void)
++{
++	mutex_init(&arc_reclaim_thr_lock, NULL, MUTEX_DEFAULT, NULL);
++	cv_init(&arc_reclaim_thr_cv, NULL, CV_DEFAULT, NULL);
++
++	/* Convert seconds to clock ticks */
++	arc_min_prefetch_lifespan = 1 * hz;
++
++	/* Start out with 1/8 of all memory */
++	arc_c = physmem * PAGESIZE / 8;
++
++#ifdef _KERNEL
++	/*
++	 * On architectures where the physical memory can be larger
++	 * than the addressable space (intel in 32-bit mode), we may
++	 * need to limit the cache to 1/8 of VM size.
++	 */
++	arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8);
++	/*
++	 * Register a shrinker to support synchronous (direct) memory
++	 * reclaim from the arc.  This is done to prevent kswapd from
++	 * swapping out pages when it is preferable to shrink the arc.
++	 */
++	spl_register_shrinker(&arc_shrinker);
++#endif
++
++	/* set min cache to 1/32 of all memory, or 64MB, whichever is more */
++	arc_c_min = MAX(arc_c / 4, 64<<20);
++	/* set max to 1/2 of all memory */
++	arc_c_max = MAX(arc_c * 4, arc_c_max);
++
++	/*
++	 * Allow the tunables to override our calculations if they are
++	 * reasonable (ie. over 64MB)
++	 */
++	if (zfs_arc_max > 64<<20 && zfs_arc_max < physmem * PAGESIZE)
++		arc_c_max = zfs_arc_max;
++	if (zfs_arc_min > 64<<20 && zfs_arc_min <= arc_c_max)
++		arc_c_min = zfs_arc_min;
++
++	arc_c = arc_c_max;
++	arc_p = (arc_c >> 1);
++
++	/* limit meta-data to 1/4 of the arc capacity */
++	arc_meta_limit = arc_c_max / 4;
++	arc_meta_max = 0;
++
++	/* Allow the tunable to override if it is reasonable */
++	if (zfs_arc_meta_limit > 0 && zfs_arc_meta_limit <= arc_c_max)
++		arc_meta_limit = zfs_arc_meta_limit;
++
++	if (arc_c_min < arc_meta_limit / 2 && zfs_arc_min == 0)
++		arc_c_min = arc_meta_limit / 2;
++
++	if (zfs_arc_grow_retry > 0)
++		arc_grow_retry = zfs_arc_grow_retry;
++
++	if (zfs_arc_shrink_shift > 0)
++		arc_shrink_shift = zfs_arc_shrink_shift;
++
++	if (zfs_arc_p_min_shift > 0)
++		arc_p_min_shift = zfs_arc_p_min_shift;
++
++	if (zfs_arc_meta_prune > 0)
++		arc_meta_prune = zfs_arc_meta_prune;
++
++	/* if kmem_flags are set, lets try to use less memory */
++	if (kmem_debugging())
++		arc_c = arc_c / 2;
++	if (arc_c < arc_c_min)
++		arc_c = arc_c_min;
++
++	arc_anon = &ARC_anon;
++	arc_mru = &ARC_mru;
++	arc_mru_ghost = &ARC_mru_ghost;
++	arc_mfu = &ARC_mfu;
++	arc_mfu_ghost = &ARC_mfu_ghost;
++	arc_l2c_only = &ARC_l2c_only;
++	arc_size = 0;
++
++	mutex_init(&arc_anon->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&arc_mru->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&arc_mru_ghost->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&arc_mfu->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&arc_mfu_ghost->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&arc_l2c_only->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
++
++	list_create(&arc_mru->arcs_list[ARC_BUFC_METADATA],
++	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
++	list_create(&arc_mru->arcs_list[ARC_BUFC_DATA],
++	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
++	list_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA],
++	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
++	list_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA],
++	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
++	list_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA],
++	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
++	list_create(&arc_mfu->arcs_list[ARC_BUFC_DATA],
++	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
++	list_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA],
++	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
++	list_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA],
++	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
++	list_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA],
++	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
++	list_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
++	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
++
++	buf_init();
++
++	arc_thread_exit = 0;
++	list_create(&arc_prune_list, sizeof (arc_prune_t),
++	    offsetof(arc_prune_t, p_node));
++	arc_eviction_list = NULL;
++	mutex_init(&arc_prune_mtx, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&arc_eviction_mtx, NULL, MUTEX_DEFAULT, NULL);
++	bzero(&arc_eviction_hdr, sizeof (arc_buf_hdr_t));
++
++	arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED,
++	    sizeof (arc_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
++
++	if (arc_ksp != NULL) {
++		arc_ksp->ks_data = &arc_stats;
++		arc_ksp->ks_update = arc_kstat_update;
++		kstat_install(arc_ksp);
++	}
++
++	(void) thread_create(NULL, 0, arc_adapt_thread, NULL, 0, &p0,
++	    TS_RUN, minclsyspri);
++
++	arc_dead = FALSE;
++	arc_warm = B_FALSE;
++
++	if (zfs_write_limit_max == 0)
++		zfs_write_limit_max = ptob(physmem) >> zfs_write_limit_shift;
++	else
++		zfs_write_limit_shift = 0;
++	mutex_init(&zfs_write_limit_lock, NULL, MUTEX_DEFAULT, NULL);
++}
++
++void
++arc_fini(void)
++{
++	arc_prune_t *p;
++
++	mutex_enter(&arc_reclaim_thr_lock);
++#ifdef _KERNEL
++	spl_unregister_shrinker(&arc_shrinker);
++#endif /* _KERNEL */
++
++	arc_thread_exit = 1;
++	while (arc_thread_exit != 0)
++		cv_wait(&arc_reclaim_thr_cv, &arc_reclaim_thr_lock);
++	mutex_exit(&arc_reclaim_thr_lock);
++
++	arc_flush(NULL);
++
++	arc_dead = TRUE;
++
++	if (arc_ksp != NULL) {
++		kstat_delete(arc_ksp);
++		arc_ksp = NULL;
++	}
++
++	mutex_enter(&arc_prune_mtx);
++	while ((p = list_head(&arc_prune_list)) != NULL) {
++		list_remove(&arc_prune_list, p);
++		refcount_remove(&p->p_refcnt, &arc_prune_list);
++		refcount_destroy(&p->p_refcnt);
++		kmem_free(p, sizeof (*p));
++	}
++	mutex_exit(&arc_prune_mtx);
++
++	list_destroy(&arc_prune_list);
++	mutex_destroy(&arc_prune_mtx);
++	mutex_destroy(&arc_eviction_mtx);
++	mutex_destroy(&arc_reclaim_thr_lock);
++	cv_destroy(&arc_reclaim_thr_cv);
++
++	list_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]);
++	list_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
++	list_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]);
++	list_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
++	list_destroy(&arc_mru->arcs_list[ARC_BUFC_DATA]);
++	list_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
++	list_destroy(&arc_mfu->arcs_list[ARC_BUFC_DATA]);
++	list_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
++
++	mutex_destroy(&arc_anon->arcs_mtx);
++	mutex_destroy(&arc_mru->arcs_mtx);
++	mutex_destroy(&arc_mru_ghost->arcs_mtx);
++	mutex_destroy(&arc_mfu->arcs_mtx);
++	mutex_destroy(&arc_mfu_ghost->arcs_mtx);
++	mutex_destroy(&arc_l2c_only->arcs_mtx);
++
++	mutex_destroy(&zfs_write_limit_lock);
++
++	buf_fini();
++
++	ASSERT(arc_loaned_bytes == 0);
++}
++
++/*
++ * Level 2 ARC
++ *
++ * The level 2 ARC (L2ARC) is a cache layer in-between main memory and disk.
++ * It uses dedicated storage devices to hold cached data, which are populated
++ * using large infrequent writes.  The main role of this cache is to boost
++ * the performance of random read workloads.  The intended L2ARC devices
++ * include short-stroked disks, solid state disks, and other media with
++ * substantially faster read latency than disk.
++ *
++ *                 +-----------------------+
++ *                 |         ARC           |
++ *                 +-----------------------+
++ *                    |         ^     ^
++ *                    |         |     |
++ *      l2arc_feed_thread()    arc_read()
++ *                    |         |     |
++ *                    |  l2arc read   |
++ *                    V         |     |
++ *               +---------------+    |
++ *               |     L2ARC     |    |
++ *               +---------------+    |
++ *                   |    ^           |
++ *          l2arc_write() |           |
++ *                   |    |           |
++ *                   V    |           |
++ *                 +-------+      +-------+
++ *                 | vdev  |      | vdev  |
++ *                 | cache |      | cache |
++ *                 +-------+      +-------+
++ *                 +=========+     .-----.
++ *                 :  L2ARC  :    |-_____-|
++ *                 : devices :    | Disks |
++ *                 +=========+    `-_____-'
++ *
++ * Read requests are satisfied from the following sources, in order:
++ *
++ *	1) ARC
++ *	2) vdev cache of L2ARC devices
++ *	3) L2ARC devices
++ *	4) vdev cache of disks
++ *	5) disks
++ *
++ * Some L2ARC device types exhibit extremely slow write performance.
++ * To accommodate for this there are some significant differences between
++ * the L2ARC and traditional cache design:
++ *
++ * 1. There is no eviction path from the ARC to the L2ARC.  Evictions from
++ * the ARC behave as usual, freeing buffers and placing headers on ghost
++ * lists.  The ARC does not send buffers to the L2ARC during eviction as
++ * this would add inflated write latencies for all ARC memory pressure.
++ *
++ * 2. The L2ARC attempts to cache data from the ARC before it is evicted.
++ * It does this by periodically scanning buffers from the eviction-end of
++ * the MFU and MRU ARC lists, copying them to the L2ARC devices if they are
++ * not already there.  It scans until a headroom of buffers is satisfied,
++ * which itself is a buffer for ARC eviction.  The thread that does this is
++ * l2arc_feed_thread(), illustrated below; example sizes are included to
++ * provide a better sense of ratio than this diagram:
++ *
++ *	       head -->                        tail
++ *	        +---------------------+----------+
++ *	ARC_mfu |:::::#:::::::::::::::|o#o###o###|-->.   # already on L2ARC
++ *	        +---------------------+----------+   |   o L2ARC eligible
++ *	ARC_mru |:#:::::::::::::::::::|#o#ooo####|-->|   : ARC buffer
++ *	        +---------------------+----------+   |
++ *	             15.9 Gbytes      ^ 32 Mbytes    |
++ *	                           headroom          |
++ *	                                      l2arc_feed_thread()
++ *	                                             |
++ *	                 l2arc write hand <--[oooo]--'
++ *	                         |           8 Mbyte
++ *	                         |          write max
++ *	                         V
++ *		  +==============================+
++ *	L2ARC dev |####|#|###|###|    |####| ... |
++ *	          +==============================+
++ *	                     32 Gbytes
++ *
++ * 3. If an ARC buffer is copied to the L2ARC but then hit instead of
++ * evicted, then the L2ARC has cached a buffer much sooner than it probably
++ * needed to, potentially wasting L2ARC device bandwidth and storage.  It is
++ * safe to say that this is an uncommon case, since buffers at the end of
++ * the ARC lists have moved there due to inactivity.
++ *
++ * 4. If the ARC evicts faster than the L2ARC can maintain a headroom,
++ * then the L2ARC simply misses copying some buffers.  This serves as a
++ * pressure valve to prevent heavy read workloads from both stalling the ARC
++ * with waits and clogging the L2ARC with writes.  This also helps prevent
++ * the potential for the L2ARC to churn if it attempts to cache content too
++ * quickly, such as during backups of the entire pool.
++ *
++ * 5. After system boot and before the ARC has filled main memory, there are
++ * no evictions from the ARC and so the tails of the ARC_mfu and ARC_mru
++ * lists can remain mostly static.  Instead of searching from tail of these
++ * lists as pictured, the l2arc_feed_thread() will search from the list heads
++ * for eligible buffers, greatly increasing its chance of finding them.
++ *
++ * The L2ARC device write speed is also boosted during this time so that
++ * the L2ARC warms up faster.  Since there have been no ARC evictions yet,
++ * there are no L2ARC reads, and no fear of degrading read performance
++ * through increased writes.
++ *
++ * 6. Writes to the L2ARC devices are grouped and sent in-sequence, so that
++ * the vdev queue can aggregate them into larger and fewer writes.  Each
++ * device is written to in a rotor fashion, sweeping writes through
++ * available space then repeating.
++ *
++ * 7. The L2ARC does not store dirty content.  It never needs to flush
++ * write buffers back to disk based storage.
++ *
++ * 8. If an ARC buffer is written (and dirtied) which also exists in the
++ * L2ARC, the now stale L2ARC buffer is immediately dropped.
++ *
++ * The performance of the L2ARC can be tweaked by a number of tunables, which
++ * may be necessary for different workloads:
++ *
++ *	l2arc_write_max		max write bytes per interval
++ *	l2arc_write_boost	extra write bytes during device warmup
++ *	l2arc_noprefetch	skip caching prefetched buffers
++ *	l2arc_headroom		number of max device writes to precache
++ *	l2arc_feed_secs		seconds between L2ARC writing
++ *
++ * Tunables may be removed or added as future performance improvements are
++ * integrated, and also may become zpool properties.
++ *
++ * There are three key functions that control how the L2ARC warms up:
++ *
++ *	l2arc_write_eligible()	check if a buffer is eligible to cache
++ *	l2arc_write_size()	calculate how much to write
++ *	l2arc_write_interval()	calculate sleep delay between writes
++ *
++ * These three functions determine what to write, how much, and how quickly
++ * to send writes.
++ */
++
++static boolean_t
++l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab)
++{
++	/*
++	 * A buffer is *not* eligible for the L2ARC if it:
++	 * 1. belongs to a different spa.
++	 * 2. is already cached on the L2ARC.
++	 * 3. has an I/O in progress (it may be an incomplete read).
++	 * 4. is flagged not eligible (zfs property).
++	 */
++	if (ab->b_spa != spa_guid || ab->b_l2hdr != NULL ||
++	    HDR_IO_IN_PROGRESS(ab) || !HDR_L2CACHE(ab))
++		return (B_FALSE);
++
++	return (B_TRUE);
++}
++
++static uint64_t
++l2arc_write_size(l2arc_dev_t *dev)
++{
++	uint64_t size;
++
++	size = dev->l2ad_write;
++
++	if (arc_warm == B_FALSE)
++		size += dev->l2ad_boost;
++
++	return (size);
++
++}
++
++static clock_t
++l2arc_write_interval(clock_t began, uint64_t wanted, uint64_t wrote)
++{
++	clock_t interval, next, now;
++
++	/*
++	 * If the ARC lists are busy, increase our write rate; if the
++	 * lists are stale, idle back.  This is achieved by checking
++	 * how much we previously wrote - if it was more than half of
++	 * what we wanted, schedule the next write much sooner.
++	 */
++	if (l2arc_feed_again && wrote > (wanted / 2))
++		interval = (hz * l2arc_feed_min_ms) / 1000;
++	else
++		interval = hz * l2arc_feed_secs;
++
++	now = ddi_get_lbolt();
++	next = MAX(now, MIN(now + interval, began + interval));
++
++	return (next);
++}
++
++static void
++l2arc_hdr_stat_add(void)
++{
++	ARCSTAT_INCR(arcstat_l2_hdr_size, HDR_SIZE + L2HDR_SIZE);
++	ARCSTAT_INCR(arcstat_hdr_size, -HDR_SIZE);
++}
++
++static void
++l2arc_hdr_stat_remove(void)
++{
++	ARCSTAT_INCR(arcstat_l2_hdr_size, -(HDR_SIZE + L2HDR_SIZE));
++	ARCSTAT_INCR(arcstat_hdr_size, HDR_SIZE);
++}
++
++/*
++ * Cycle through L2ARC devices.  This is how L2ARC load balances.
++ * If a device is returned, this also returns holding the spa config lock.
++ */
++static l2arc_dev_t *
++l2arc_dev_get_next(void)
++{
++	l2arc_dev_t *first, *next = NULL;
++
++	/*
++	 * Lock out the removal of spas (spa_namespace_lock), then removal
++	 * of cache devices (l2arc_dev_mtx).  Once a device has been selected,
++	 * both locks will be dropped and a spa config lock held instead.
++	 */
++	mutex_enter(&spa_namespace_lock);
++	mutex_enter(&l2arc_dev_mtx);
++
++	/* if there are no vdevs, there is nothing to do */
++	if (l2arc_ndev == 0)
++		goto out;
++
++	first = NULL;
++	next = l2arc_dev_last;
++	do {
++		/* loop around the list looking for a non-faulted vdev */
++		if (next == NULL) {
++			next = list_head(l2arc_dev_list);
++		} else {
++			next = list_next(l2arc_dev_list, next);
++			if (next == NULL)
++				next = list_head(l2arc_dev_list);
++		}
++
++		/* if we have come back to the start, bail out */
++		if (first == NULL)
++			first = next;
++		else if (next == first)
++			break;
++
++	} while (vdev_is_dead(next->l2ad_vdev));
++
++	/* if we were unable to find any usable vdevs, return NULL */
++	if (vdev_is_dead(next->l2ad_vdev))
++		next = NULL;
++
++	l2arc_dev_last = next;
++
++out:
++	mutex_exit(&l2arc_dev_mtx);
++
++	/*
++	 * Grab the config lock to prevent the 'next' device from being
++	 * removed while we are writing to it.
++	 */
++	if (next != NULL)
++		spa_config_enter(next->l2ad_spa, SCL_L2ARC, next, RW_READER);
++	mutex_exit(&spa_namespace_lock);
++
++	return (next);
++}
++
++/*
++ * Free buffers that were tagged for destruction.
++ */
++static void
++l2arc_do_free_on_write(void)
++{
++	list_t *buflist;
++	l2arc_data_free_t *df, *df_prev;
++
++	mutex_enter(&l2arc_free_on_write_mtx);
++	buflist = l2arc_free_on_write;
++
++	for (df = list_tail(buflist); df; df = df_prev) {
++		df_prev = list_prev(buflist, df);
++		ASSERT(df->l2df_data != NULL);
++		ASSERT(df->l2df_func != NULL);
++		df->l2df_func(df->l2df_data, df->l2df_size);
++		list_remove(buflist, df);
++		kmem_free(df, sizeof (l2arc_data_free_t));
++	}
++
++	mutex_exit(&l2arc_free_on_write_mtx);
++}
++
++/*
++ * A write to a cache device has completed.  Update all headers to allow
++ * reads from these buffers to begin.
++ */
++static void
++l2arc_write_done(zio_t *zio)
++{
++	l2arc_write_callback_t *cb;
++	l2arc_dev_t *dev;
++	list_t *buflist;
++	arc_buf_hdr_t *head, *ab, *ab_prev;
++	l2arc_buf_hdr_t *abl2;
++	kmutex_t *hash_lock;
++
++	cb = zio->io_private;
++	ASSERT(cb != NULL);
++	dev = cb->l2wcb_dev;
++	ASSERT(dev != NULL);
++	head = cb->l2wcb_head;
++	ASSERT(head != NULL);
++	buflist = dev->l2ad_buflist;
++	ASSERT(buflist != NULL);
++	DTRACE_PROBE2(l2arc__iodone, zio_t *, zio,
++	    l2arc_write_callback_t *, cb);
++
++	if (zio->io_error != 0)
++		ARCSTAT_BUMP(arcstat_l2_writes_error);
++
++	mutex_enter(&l2arc_buflist_mtx);
++
++	/*
++	 * All writes completed, or an error was hit.
++	 */
++	for (ab = list_prev(buflist, head); ab; ab = ab_prev) {
++		ab_prev = list_prev(buflist, ab);
++
++		hash_lock = HDR_LOCK(ab);
++		if (!mutex_tryenter(hash_lock)) {
++			/*
++			 * This buffer misses out.  It may be in a stage
++			 * of eviction.  Its ARC_L2_WRITING flag will be
++			 * left set, denying reads to this buffer.
++			 */
++			ARCSTAT_BUMP(arcstat_l2_writes_hdr_miss);
++			continue;
++		}
++
++		if (zio->io_error != 0) {
++			/*
++			 * Error - drop L2ARC entry.
++			 */
++			list_remove(buflist, ab);
++			abl2 = ab->b_l2hdr;
++			ab->b_l2hdr = NULL;
++			kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
++			ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
++		}
++
++		/*
++		 * Allow ARC to begin reads to this L2ARC entry.
++		 */
++		ab->b_flags &= ~ARC_L2_WRITING;
++
++		mutex_exit(hash_lock);
++	}
++
++	atomic_inc_64(&l2arc_writes_done);
++	list_remove(buflist, head);
++	kmem_cache_free(hdr_cache, head);
++	mutex_exit(&l2arc_buflist_mtx);
++
++	l2arc_do_free_on_write();
++
++	kmem_free(cb, sizeof (l2arc_write_callback_t));
++}
++
++/*
++ * A read to a cache device completed.  Validate buffer contents before
++ * handing over to the regular ARC routines.
++ */
++static void
++l2arc_read_done(zio_t *zio)
++{
++	l2arc_read_callback_t *cb;
++	arc_buf_hdr_t *hdr;
++	arc_buf_t *buf;
++	kmutex_t *hash_lock;
++	int equal;
++
++	ASSERT(zio->io_vd != NULL);
++	ASSERT(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE);
++
++	spa_config_exit(zio->io_spa, SCL_L2ARC, zio->io_vd);
++
++	cb = zio->io_private;
++	ASSERT(cb != NULL);
++	buf = cb->l2rcb_buf;
++	ASSERT(buf != NULL);
++
++	hash_lock = HDR_LOCK(buf->b_hdr);
++	mutex_enter(hash_lock);
++	hdr = buf->b_hdr;
++	ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
++
++	/*
++	 * Check this survived the L2ARC journey.
++	 */
++	equal = arc_cksum_equal(buf);
++	if (equal && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) {
++		mutex_exit(hash_lock);
++		zio->io_private = buf;
++		zio->io_bp_copy = cb->l2rcb_bp;	/* XXX fix in L2ARC 2.0	*/
++		zio->io_bp = &zio->io_bp_copy;	/* XXX fix in L2ARC 2.0	*/
++		arc_read_done(zio);
++	} else {
++		mutex_exit(hash_lock);
++		/*
++		 * Buffer didn't survive caching.  Increment stats and
++		 * reissue to the original storage device.
++		 */
++		if (zio->io_error != 0) {
++			ARCSTAT_BUMP(arcstat_l2_io_error);
++		} else {
++			zio->io_error = EIO;
++		}
++		if (!equal)
++			ARCSTAT_BUMP(arcstat_l2_cksum_bad);
++
++		/*
++		 * If there's no waiter, issue an async i/o to the primary
++		 * storage now.  If there *is* a waiter, the caller must
++		 * issue the i/o in a context where it's OK to block.
++		 */
++		if (zio->io_waiter == NULL) {
++			zio_t *pio = zio_unique_parent(zio);
++
++			ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL);
++
++			zio_nowait(zio_read(pio, cb->l2rcb_spa, &cb->l2rcb_bp,
++			    buf->b_data, zio->io_size, arc_read_done, buf,
++			    zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb));
++		}
++	}
++
++	kmem_free(cb, sizeof (l2arc_read_callback_t));
++}
++
++/*
++ * This is the list priority from which the L2ARC will search for pages to
++ * cache.  This is used within loops (0..3) to cycle through lists in the
++ * desired order.  This order can have a significant effect on cache
++ * performance.
++ *
++ * Currently the metadata lists are hit first, MFU then MRU, followed by
++ * the data lists.  This function returns a locked list, and also returns
++ * the lock pointer.
++ */
++static list_t *
++l2arc_list_locked(int list_num, kmutex_t **lock)
++{
++	list_t *list = NULL;
++
++	ASSERT(list_num >= 0 && list_num <= 3);
++
++	switch (list_num) {
++	case 0:
++		list = &arc_mfu->arcs_list[ARC_BUFC_METADATA];
++		*lock = &arc_mfu->arcs_mtx;
++		break;
++	case 1:
++		list = &arc_mru->arcs_list[ARC_BUFC_METADATA];
++		*lock = &arc_mru->arcs_mtx;
++		break;
++	case 2:
++		list = &arc_mfu->arcs_list[ARC_BUFC_DATA];
++		*lock = &arc_mfu->arcs_mtx;
++		break;
++	case 3:
++		list = &arc_mru->arcs_list[ARC_BUFC_DATA];
++		*lock = &arc_mru->arcs_mtx;
++		break;
++	}
++
++	ASSERT(!(MUTEX_HELD(*lock)));
++	mutex_enter(*lock);
++	return (list);
++}
++
++/*
++ * Evict buffers from the device write hand to the distance specified in
++ * bytes.  This distance may span populated buffers, it may span nothing.
++ * This is clearing a region on the L2ARC device ready for writing.
++ * If the 'all' boolean is set, every buffer is evicted.
++ */
++static void
++l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
++{
++	list_t *buflist;
++	l2arc_buf_hdr_t *abl2;
++	arc_buf_hdr_t *ab, *ab_prev;
++	kmutex_t *hash_lock;
++	uint64_t taddr;
++
++	buflist = dev->l2ad_buflist;
++
++	if (buflist == NULL)
++		return;
++
++	if (!all && dev->l2ad_first) {
++		/*
++		 * This is the first sweep through the device.  There is
++		 * nothing to evict.
++		 */
++		return;
++	}
++
++	if (dev->l2ad_hand >= (dev->l2ad_end - (2 * distance))) {
++		/*
++		 * When nearing the end of the device, evict to the end
++		 * before the device write hand jumps to the start.
++		 */
++		taddr = dev->l2ad_end;
++	} else {
++		taddr = dev->l2ad_hand + distance;
++	}
++	DTRACE_PROBE4(l2arc__evict, l2arc_dev_t *, dev, list_t *, buflist,
++	    uint64_t, taddr, boolean_t, all);
++
++top:
++	mutex_enter(&l2arc_buflist_mtx);
++	for (ab = list_tail(buflist); ab; ab = ab_prev) {
++		ab_prev = list_prev(buflist, ab);
++
++		hash_lock = HDR_LOCK(ab);
++		if (!mutex_tryenter(hash_lock)) {
++			/*
++			 * Missed the hash lock.  Retry.
++			 */
++			ARCSTAT_BUMP(arcstat_l2_evict_lock_retry);
++			mutex_exit(&l2arc_buflist_mtx);
++			mutex_enter(hash_lock);
++			mutex_exit(hash_lock);
++			goto top;
++		}
++
++		if (HDR_L2_WRITE_HEAD(ab)) {
++			/*
++			 * We hit a write head node.  Leave it for
++			 * l2arc_write_done().
++			 */
++			list_remove(buflist, ab);
++			mutex_exit(hash_lock);
++			continue;
++		}
++
++		if (!all && ab->b_l2hdr != NULL &&
++		    (ab->b_l2hdr->b_daddr > taddr ||
++		    ab->b_l2hdr->b_daddr < dev->l2ad_hand)) {
++			/*
++			 * We've evicted to the target address,
++			 * or the end of the device.
++			 */
++			mutex_exit(hash_lock);
++			break;
++		}
++
++		if (HDR_FREE_IN_PROGRESS(ab)) {
++			/*
++			 * Already on the path to destruction.
++			 */
++			mutex_exit(hash_lock);
++			continue;
++		}
++
++		if (ab->b_state == arc_l2c_only) {
++			ASSERT(!HDR_L2_READING(ab));
++			/*
++			 * This doesn't exist in the ARC.  Destroy.
++			 * arc_hdr_destroy() will call list_remove()
++			 * and decrement arcstat_l2_size.
++			 */
++			arc_change_state(arc_anon, ab, hash_lock);
++			arc_hdr_destroy(ab);
++		} else {
++			/*
++			 * Invalidate issued or about to be issued
++			 * reads, since we may be about to write
++			 * over this location.
++			 */
++			if (HDR_L2_READING(ab)) {
++				ARCSTAT_BUMP(arcstat_l2_evict_reading);
++				ab->b_flags |= ARC_L2_EVICTED;
++			}
++
++			/*
++			 * Tell ARC this no longer exists in L2ARC.
++			 */
++			if (ab->b_l2hdr != NULL) {
++				abl2 = ab->b_l2hdr;
++				ab->b_l2hdr = NULL;
++				kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
++				ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
++			}
++			list_remove(buflist, ab);
++
++			/*
++			 * This may have been leftover after a
++			 * failed write.
++			 */
++			ab->b_flags &= ~ARC_L2_WRITING;
++		}
++		mutex_exit(hash_lock);
++	}
++	mutex_exit(&l2arc_buflist_mtx);
++
++	vdev_space_update(dev->l2ad_vdev, -(taddr - dev->l2ad_evict), 0, 0);
++	dev->l2ad_evict = taddr;
++}
++
++/*
++ * Find and write ARC buffers to the L2ARC device.
++ *
++ * An ARC_L2_WRITING flag is set so that the L2ARC buffers are not valid
++ * for reading until they have completed writing.
++ */
++static uint64_t
++l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
++{
++	arc_buf_hdr_t *ab, *ab_prev, *head;
++	l2arc_buf_hdr_t *hdrl2;
++	list_t *list;
++	uint64_t passed_sz, write_sz, buf_sz, headroom;
++	void *buf_data;
++	kmutex_t *hash_lock, *list_lock = NULL;
++	boolean_t have_lock, full;
++	l2arc_write_callback_t *cb;
++	zio_t *pio, *wzio;
++	uint64_t guid = spa_load_guid(spa);
++	int try;
++
++	ASSERT(dev->l2ad_vdev != NULL);
++
++	pio = NULL;
++	write_sz = 0;
++	full = B_FALSE;
++	head = kmem_cache_alloc(hdr_cache, KM_PUSHPAGE);
++	head->b_flags |= ARC_L2_WRITE_HEAD;
++
++	/*
++	 * Copy buffers for L2ARC writing.
++	 */
++	mutex_enter(&l2arc_buflist_mtx);
++	for (try = 0; try <= 3; try++) {
++		list = l2arc_list_locked(try, &list_lock);
++		passed_sz = 0;
++
++		/*
++		 * L2ARC fast warmup.
++		 *
++		 * Until the ARC is warm and starts to evict, read from the
++		 * head of the ARC lists rather than the tail.
++		 */
++		headroom = target_sz * l2arc_headroom;
++		if (arc_warm == B_FALSE)
++			ab = list_head(list);
++		else
++			ab = list_tail(list);
++
++		for (; ab; ab = ab_prev) {
++			if (arc_warm == B_FALSE)
++				ab_prev = list_next(list, ab);
++			else
++				ab_prev = list_prev(list, ab);
++
++			hash_lock = HDR_LOCK(ab);
++			have_lock = MUTEX_HELD(hash_lock);
++			if (!have_lock && !mutex_tryenter(hash_lock)) {
++				/*
++				 * Skip this buffer rather than waiting.
++				 */
++				continue;
++			}
++
++			passed_sz += ab->b_size;
++			if (passed_sz > headroom) {
++				/*
++				 * Searched too far.
++				 */
++				mutex_exit(hash_lock);
++				break;
++			}
++
++			if (!l2arc_write_eligible(guid, ab)) {
++				mutex_exit(hash_lock);
++				continue;
++			}
++
++			if ((write_sz + ab->b_size) > target_sz) {
++				full = B_TRUE;
++				mutex_exit(hash_lock);
++				break;
++			}
++
++			if (pio == NULL) {
++				/*
++				 * Insert a dummy header on the buflist so
++				 * l2arc_write_done() can find where the
++				 * write buffers begin without searching.
++				 */
++				list_insert_head(dev->l2ad_buflist, head);
++
++				cb = kmem_alloc(sizeof (l2arc_write_callback_t),
++				                KM_PUSHPAGE);
++				cb->l2wcb_dev = dev;
++				cb->l2wcb_head = head;
++				pio = zio_root(spa, l2arc_write_done, cb,
++				    ZIO_FLAG_CANFAIL);
++			}
++
++			/*
++			 * Create and add a new L2ARC header.
++			 */
++			hdrl2 = kmem_zalloc(sizeof (l2arc_buf_hdr_t),
++			                    KM_PUSHPAGE);
++			hdrl2->b_dev = dev;
++			hdrl2->b_daddr = dev->l2ad_hand;
++
++			ab->b_flags |= ARC_L2_WRITING;
++			ab->b_l2hdr = hdrl2;
++			list_insert_head(dev->l2ad_buflist, ab);
++			buf_data = ab->b_buf->b_data;
++			buf_sz = ab->b_size;
++
++			/*
++			 * Compute and store the buffer cksum before
++			 * writing.  On debug the cksum is verified first.
++			 */
++			arc_cksum_verify(ab->b_buf);
++			arc_cksum_compute(ab->b_buf, B_TRUE);
++
++			mutex_exit(hash_lock);
++
++			wzio = zio_write_phys(pio, dev->l2ad_vdev,
++			    dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF,
++			    NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE,
++			    ZIO_FLAG_CANFAIL, B_FALSE);
++
++			DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev,
++			    zio_t *, wzio);
++			(void) zio_nowait(wzio);
++
++			/*
++			 * Keep the clock hand suitably device-aligned.
++			 */
++			buf_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
++
++			write_sz += buf_sz;
++			dev->l2ad_hand += buf_sz;
++		}
++
++		mutex_exit(list_lock);
++
++		if (full == B_TRUE)
++			break;
++	}
++	mutex_exit(&l2arc_buflist_mtx);
++
++	if (pio == NULL) {
++		ASSERT3U(write_sz, ==, 0);
++		kmem_cache_free(hdr_cache, head);
++		return (0);
++	}
++
++	ASSERT3U(write_sz, <=, target_sz);
++	ARCSTAT_BUMP(arcstat_l2_writes_sent);
++	ARCSTAT_INCR(arcstat_l2_write_bytes, write_sz);
++	ARCSTAT_INCR(arcstat_l2_size, write_sz);
++	vdev_space_update(dev->l2ad_vdev, write_sz, 0, 0);
++
++	/*
++	 * Bump device hand to the device start if it is approaching the end.
++	 * l2arc_evict() will already have evicted ahead for this case.
++	 */
++	if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) {
++		vdev_space_update(dev->l2ad_vdev,
++		    dev->l2ad_end - dev->l2ad_hand, 0, 0);
++		dev->l2ad_hand = dev->l2ad_start;
++		dev->l2ad_evict = dev->l2ad_start;
++		dev->l2ad_first = B_FALSE;
++	}
++
++	dev->l2ad_writing = B_TRUE;
++	(void) zio_wait(pio);
++	dev->l2ad_writing = B_FALSE;
++
++	return (write_sz);
++}
++
++/*
++ * This thread feeds the L2ARC at regular intervals.  This is the beating
++ * heart of the L2ARC.
++ */
++static void
++l2arc_feed_thread(void)
++{
++	callb_cpr_t cpr;
++	l2arc_dev_t *dev;
++	spa_t *spa;
++	uint64_t size, wrote;
++	clock_t begin, next = ddi_get_lbolt();
++
++	CALLB_CPR_INIT(&cpr, &l2arc_feed_thr_lock, callb_generic_cpr, FTAG);
++
++	mutex_enter(&l2arc_feed_thr_lock);
++
++	while (l2arc_thread_exit == 0) {
++		CALLB_CPR_SAFE_BEGIN(&cpr);
++		(void) cv_timedwait_interruptible(&l2arc_feed_thr_cv,
++		    &l2arc_feed_thr_lock, next);
++		CALLB_CPR_SAFE_END(&cpr, &l2arc_feed_thr_lock);
++		next = ddi_get_lbolt() + hz;
++
++		/*
++		 * Quick check for L2ARC devices.
++		 */
++		mutex_enter(&l2arc_dev_mtx);
++		if (l2arc_ndev == 0) {
++			mutex_exit(&l2arc_dev_mtx);
++			continue;
++		}
++		mutex_exit(&l2arc_dev_mtx);
++		begin = ddi_get_lbolt();
++
++		/*
++		 * This selects the next l2arc device to write to, and in
++		 * doing so the next spa to feed from: dev->l2ad_spa.   This
++		 * will return NULL if there are now no l2arc devices or if
++		 * they are all faulted.
++		 *
++		 * If a device is returned, its spa's config lock is also
++		 * held to prevent device removal.  l2arc_dev_get_next()
++		 * will grab and release l2arc_dev_mtx.
++		 */
++		if ((dev = l2arc_dev_get_next()) == NULL)
++			continue;
++
++		spa = dev->l2ad_spa;
++		ASSERT(spa != NULL);
++
++		/*
++		 * If the pool is read-only then force the feed thread to
++		 * sleep a little longer.
++		 */
++		if (!spa_writeable(spa)) {
++			next = ddi_get_lbolt() + 5 * l2arc_feed_secs * hz;
++			spa_config_exit(spa, SCL_L2ARC, dev);
++			continue;
++		}
++
++		/*
++		 * Avoid contributing to memory pressure.
++		 */
++		if (arc_no_grow) {
++			ARCSTAT_BUMP(arcstat_l2_abort_lowmem);
++			spa_config_exit(spa, SCL_L2ARC, dev);
++			continue;
++		}
++
++		ARCSTAT_BUMP(arcstat_l2_feeds);
++
++		size = l2arc_write_size(dev);
++
++		/*
++		 * Evict L2ARC buffers that will be overwritten.
++		 */
++		l2arc_evict(dev, size, B_FALSE);
++
++		/*
++		 * Write ARC buffers.
++		 */
++		wrote = l2arc_write_buffers(spa, dev, size);
++
++		/*
++		 * Calculate interval between writes.
++		 */
++		next = l2arc_write_interval(begin, size, wrote);
++		spa_config_exit(spa, SCL_L2ARC, dev);
++	}
++
++	l2arc_thread_exit = 0;
++	cv_broadcast(&l2arc_feed_thr_cv);
++	CALLB_CPR_EXIT(&cpr);		/* drops l2arc_feed_thr_lock */
++	thread_exit();
++}
++
++boolean_t
++l2arc_vdev_present(vdev_t *vd)
++{
++	l2arc_dev_t *dev;
++
++	mutex_enter(&l2arc_dev_mtx);
++	for (dev = list_head(l2arc_dev_list); dev != NULL;
++	    dev = list_next(l2arc_dev_list, dev)) {
++		if (dev->l2ad_vdev == vd)
++			break;
++	}
++	mutex_exit(&l2arc_dev_mtx);
++
++	return (dev != NULL);
++}
++
++/*
++ * Add a vdev for use by the L2ARC.  By this point the spa has already
++ * validated the vdev and opened it.
++ */
++void
++l2arc_add_vdev(spa_t *spa, vdev_t *vd)
++{
++	l2arc_dev_t *adddev;
++
++	ASSERT(!l2arc_vdev_present(vd));
++
++	/*
++	 * Create a new l2arc device entry.
++	 */
++	adddev = kmem_zalloc(sizeof (l2arc_dev_t), KM_SLEEP);
++	adddev->l2ad_spa = spa;
++	adddev->l2ad_vdev = vd;
++	adddev->l2ad_write = l2arc_write_max;
++	adddev->l2ad_boost = l2arc_write_boost;
++	adddev->l2ad_start = VDEV_LABEL_START_SIZE;
++	adddev->l2ad_end = VDEV_LABEL_START_SIZE + vdev_get_min_asize(vd);
++	adddev->l2ad_hand = adddev->l2ad_start;
++	adddev->l2ad_evict = adddev->l2ad_start;
++	adddev->l2ad_first = B_TRUE;
++	adddev->l2ad_writing = B_FALSE;
++	list_link_init(&adddev->l2ad_node);
++	ASSERT3U(adddev->l2ad_write, >, 0);
++
++	/*
++	 * This is a list of all ARC buffers that are still valid on the
++	 * device.
++	 */
++	adddev->l2ad_buflist = kmem_zalloc(sizeof (list_t), KM_SLEEP);
++	list_create(adddev->l2ad_buflist, sizeof (arc_buf_hdr_t),
++	    offsetof(arc_buf_hdr_t, b_l2node));
++
++	vdev_space_update(vd, 0, 0, adddev->l2ad_end - adddev->l2ad_hand);
++
++	/*
++	 * Add device to global list
++	 */
++	mutex_enter(&l2arc_dev_mtx);
++	list_insert_head(l2arc_dev_list, adddev);
++	atomic_inc_64(&l2arc_ndev);
++	mutex_exit(&l2arc_dev_mtx);
++}
++
++/*
++ * Remove a vdev from the L2ARC.
++ */
++void
++l2arc_remove_vdev(vdev_t *vd)
++{
++	l2arc_dev_t *dev, *nextdev, *remdev = NULL;
++
++	/*
++	 * Find the device by vdev
++	 */
++	mutex_enter(&l2arc_dev_mtx);
++	for (dev = list_head(l2arc_dev_list); dev; dev = nextdev) {
++		nextdev = list_next(l2arc_dev_list, dev);
++		if (vd == dev->l2ad_vdev) {
++			remdev = dev;
++			break;
++		}
++	}
++	ASSERT(remdev != NULL);
++
++	/*
++	 * Remove device from global list
++	 */
++	list_remove(l2arc_dev_list, remdev);
++	l2arc_dev_last = NULL;		/* may have been invalidated */
++	atomic_dec_64(&l2arc_ndev);
++	mutex_exit(&l2arc_dev_mtx);
++
++	/*
++	 * Clear all buflists and ARC references.  L2ARC device flush.
++	 */
++	l2arc_evict(remdev, 0, B_TRUE);
++	list_destroy(remdev->l2ad_buflist);
++	kmem_free(remdev->l2ad_buflist, sizeof (list_t));
++	kmem_free(remdev, sizeof (l2arc_dev_t));
++}
++
++void
++l2arc_init(void)
++{
++	l2arc_thread_exit = 0;
++	l2arc_ndev = 0;
++	l2arc_writes_sent = 0;
++	l2arc_writes_done = 0;
++
++	mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL);
++	cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL);
++	mutex_init(&l2arc_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&l2arc_buflist_mtx, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&l2arc_free_on_write_mtx, NULL, MUTEX_DEFAULT, NULL);
++
++	l2arc_dev_list = &L2ARC_dev_list;
++	l2arc_free_on_write = &L2ARC_free_on_write;
++	list_create(l2arc_dev_list, sizeof (l2arc_dev_t),
++	    offsetof(l2arc_dev_t, l2ad_node));
++	list_create(l2arc_free_on_write, sizeof (l2arc_data_free_t),
++	    offsetof(l2arc_data_free_t, l2df_list_node));
++}
++
++void
++l2arc_fini(void)
++{
++	/*
++	 * This is called from dmu_fini(), which is called from spa_fini();
++	 * Because of this, we can assume that all l2arc devices have
++	 * already been removed when the pools themselves were removed.
++	 */
++
++	l2arc_do_free_on_write();
++
++	mutex_destroy(&l2arc_feed_thr_lock);
++	cv_destroy(&l2arc_feed_thr_cv);
++	mutex_destroy(&l2arc_dev_mtx);
++	mutex_destroy(&l2arc_buflist_mtx);
++	mutex_destroy(&l2arc_free_on_write_mtx);
++
++	list_destroy(l2arc_dev_list);
++	list_destroy(l2arc_free_on_write);
++}
++
++void
++l2arc_start(void)
++{
++	if (!(spa_mode_global & FWRITE))
++		return;
++
++	(void) thread_create(NULL, 0, l2arc_feed_thread, NULL, 0, &p0,
++	    TS_RUN, minclsyspri);
++}
++
++void
++l2arc_stop(void)
++{
++	if (!(spa_mode_global & FWRITE))
++		return;
++
++	mutex_enter(&l2arc_feed_thr_lock);
++	cv_signal(&l2arc_feed_thr_cv);	/* kick thread out of startup */
++	l2arc_thread_exit = 1;
++	while (l2arc_thread_exit != 0)
++		cv_wait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock);
++	mutex_exit(&l2arc_feed_thr_lock);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(arc_read);
++EXPORT_SYMBOL(arc_buf_remove_ref);
++EXPORT_SYMBOL(arc_getbuf_func);
++EXPORT_SYMBOL(arc_add_prune_callback);
++EXPORT_SYMBOL(arc_remove_prune_callback);
++
++module_param(zfs_arc_min, ulong, 0444);
++MODULE_PARM_DESC(zfs_arc_min, "Min arc size");
++
++module_param(zfs_arc_max, ulong, 0444);
++MODULE_PARM_DESC(zfs_arc_max, "Max arc size");
++
++module_param(zfs_arc_meta_limit, ulong, 0444);
++MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size");
++
++module_param(zfs_arc_meta_prune, int, 0444);
++MODULE_PARM_DESC(zfs_arc_meta_prune, "Bytes of meta data to prune");
++
++module_param(zfs_arc_grow_retry, int, 0444);
++MODULE_PARM_DESC(zfs_arc_grow_retry, "Seconds before growing arc size");
++
++module_param(zfs_arc_shrink_shift, int, 0444);
++MODULE_PARM_DESC(zfs_arc_shrink_shift, "log2(fraction of arc to reclaim)");
++
++module_param(zfs_arc_p_min_shift, int, 0444);
++MODULE_PARM_DESC(zfs_arc_p_min_shift, "arc_c shift to calc min/max arc_p");
++
++module_param(l2arc_write_max, ulong, 0444);
++MODULE_PARM_DESC(l2arc_write_max, "Max write bytes per interval");
++
++module_param(l2arc_write_boost, ulong, 0444);
++MODULE_PARM_DESC(l2arc_write_boost, "Extra write bytes during device warmup");
++
++module_param(l2arc_headroom, ulong, 0444);
++MODULE_PARM_DESC(l2arc_headroom, "Number of max device writes to precache");
++
++module_param(l2arc_feed_secs, ulong, 0444);
++MODULE_PARM_DESC(l2arc_feed_secs, "Seconds between L2ARC writing");
++
++module_param(l2arc_feed_min_ms, ulong, 0444);
++MODULE_PARM_DESC(l2arc_feed_min_ms, "Min feed interval in milliseconds");
++
++module_param(l2arc_noprefetch, int, 0444);
++MODULE_PARM_DESC(l2arc_noprefetch, "Skip caching prefetched buffers");
++
++module_param(l2arc_feed_again, int, 0444);
++MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup");
++
++module_param(l2arc_norw, int, 0444);
++MODULE_PARM_DESC(l2arc_norw, "No reads during writes");
++
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/bplist.c linux-3.2.33-go/fs/zfs/zfs/bplist.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/bplist.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/bplist.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,69 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/bplist.h>
++#include <sys/zfs_context.h>
++
++
++void
++bplist_create(bplist_t *bpl)
++{
++	mutex_init(&bpl->bpl_lock, NULL, MUTEX_DEFAULT, NULL);
++	list_create(&bpl->bpl_list, sizeof (bplist_entry_t),
++	    offsetof(bplist_entry_t, bpe_node));
++}
++
++void
++bplist_destroy(bplist_t *bpl)
++{
++	list_destroy(&bpl->bpl_list);
++	mutex_destroy(&bpl->bpl_lock);
++}
++
++void
++bplist_append(bplist_t *bpl, const blkptr_t *bp)
++{
++	bplist_entry_t *bpe = kmem_alloc(sizeof (*bpe), KM_PUSHPAGE);
++
++	mutex_enter(&bpl->bpl_lock);
++	bpe->bpe_blk = *bp;
++	list_insert_tail(&bpl->bpl_list, bpe);
++	mutex_exit(&bpl->bpl_lock);
++}
++
++void
++bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx)
++{
++	bplist_entry_t *bpe;
++
++	mutex_enter(&bpl->bpl_lock);
++	while ((bpe = list_head(&bpl->bpl_list))) {
++		list_remove(&bpl->bpl_list, bpe);
++		mutex_exit(&bpl->bpl_lock);
++		func(arg, &bpe->bpe_blk, tx);
++		kmem_free(bpe, sizeof (*bpe));
++		mutex_enter(&bpl->bpl_lock);
++	}
++	mutex_exit(&bpl->bpl_lock);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/bpobj.c linux-3.2.33-go/fs/zfs/zfs/bpobj.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/bpobj.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/bpobj.c	2012-11-16 23:25:34.350039322 +0100
+@@ -0,0 +1,500 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ */
++
++#include <sys/bpobj.h>
++#include <sys/zfs_context.h>
++#include <sys/refcount.h>
++#include <sys/dsl_pool.h>
++
++uint64_t
++bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
++{
++	int size;
++
++	if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_BPOBJ_ACCOUNT)
++		size = BPOBJ_SIZE_V0;
++	else if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS)
++		size = BPOBJ_SIZE_V1;
++	else
++		size = sizeof (bpobj_phys_t);
++
++	return (dmu_object_alloc(os, DMU_OT_BPOBJ, blocksize,
++	    DMU_OT_BPOBJ_HDR, size, tx));
++}
++
++void
++bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
++{
++	int64_t i;
++	bpobj_t bpo;
++	dmu_object_info_t doi;
++	int epb;
++	dmu_buf_t *dbuf = NULL;
++
++	VERIFY3U(0, ==, bpobj_open(&bpo, os, obj));
++
++	mutex_enter(&bpo.bpo_lock);
++
++	if (!bpo.bpo_havesubobj || bpo.bpo_phys->bpo_subobjs == 0)
++		goto out;
++
++	VERIFY3U(0, ==, dmu_object_info(os, bpo.bpo_phys->bpo_subobjs, &doi));
++	epb = doi.doi_data_block_size / sizeof (uint64_t);
++
++	for (i = bpo.bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
++		uint64_t *objarray;
++		uint64_t offset, blkoff;
++
++		offset = i * sizeof (uint64_t);
++		blkoff = P2PHASE(i, epb);
++
++		if (dbuf == NULL || dbuf->db_offset > offset) {
++			if (dbuf)
++				dmu_buf_rele(dbuf, FTAG);
++			VERIFY3U(0, ==, dmu_buf_hold(os,
++			    bpo.bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0));
++		}
++
++		ASSERT3U(offset, >=, dbuf->db_offset);
++		ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
++
++		objarray = dbuf->db_data;
++		bpobj_free(os, objarray[blkoff], tx);
++	}
++	if (dbuf) {
++		dmu_buf_rele(dbuf, FTAG);
++		dbuf = NULL;
++	}
++	VERIFY3U(0, ==, dmu_object_free(os, bpo.bpo_phys->bpo_subobjs, tx));
++
++out:
++	mutex_exit(&bpo.bpo_lock);
++	bpobj_close(&bpo);
++
++	VERIFY3U(0, ==, dmu_object_free(os, obj, tx));
++}
++
++int
++bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object)
++{
++	dmu_object_info_t doi;
++	int err;
++
++	err = dmu_object_info(os, object, &doi);
++	if (err)
++		return (err);
++
++	bzero(bpo, sizeof (*bpo));
++	mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL);
++
++	ASSERT(bpo->bpo_dbuf == NULL);
++	ASSERT(bpo->bpo_phys == NULL);
++	ASSERT(object != 0);
++	ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ);
++	ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR);
++
++	err = dmu_bonus_hold(os, object, bpo, &bpo->bpo_dbuf);
++	if (err)
++		return (err);
++
++	bpo->bpo_os = os;
++	bpo->bpo_object = object;
++	bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT;
++	bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0);
++	bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1);
++	bpo->bpo_phys = bpo->bpo_dbuf->db_data;
++	return (0);
++}
++
++void
++bpobj_close(bpobj_t *bpo)
++{
++	/* Lame workaround for closing a bpobj that was never opened. */
++	if (bpo->bpo_object == 0)
++		return;
++
++	dmu_buf_rele(bpo->bpo_dbuf, bpo);
++	if (bpo->bpo_cached_dbuf != NULL)
++		dmu_buf_rele(bpo->bpo_cached_dbuf, bpo);
++	bpo->bpo_dbuf = NULL;
++	bpo->bpo_phys = NULL;
++	bpo->bpo_cached_dbuf = NULL;
++	bpo->bpo_object = 0;
++
++	mutex_destroy(&bpo->bpo_lock);
++}
++
++static int
++bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
++    boolean_t free)
++{
++	dmu_object_info_t doi;
++	int epb;
++	int64_t i;
++	int err = 0;
++	dmu_buf_t *dbuf = NULL;
++
++	mutex_enter(&bpo->bpo_lock);
++
++	if (free)
++		dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
++
++	for (i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) {
++		blkptr_t *bparray;
++		blkptr_t *bp;
++		uint64_t offset, blkoff;
++
++		offset = i * sizeof (blkptr_t);
++		blkoff = P2PHASE(i, bpo->bpo_epb);
++
++		if (dbuf == NULL || dbuf->db_offset > offset) {
++			if (dbuf)
++				dmu_buf_rele(dbuf, FTAG);
++			err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset,
++			    FTAG, &dbuf, 0);
++			if (err)
++				break;
++		}
++
++		ASSERT3U(offset, >=, dbuf->db_offset);
++		ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
++
++		bparray = dbuf->db_data;
++		bp = &bparray[blkoff];
++		err = func(arg, bp, tx);
++		if (err)
++			break;
++		if (free) {
++			bpo->bpo_phys->bpo_bytes -=
++			    bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
++			ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0);
++			if (bpo->bpo_havecomp) {
++				bpo->bpo_phys->bpo_comp -= BP_GET_PSIZE(bp);
++				bpo->bpo_phys->bpo_uncomp -= BP_GET_UCSIZE(bp);
++			}
++			bpo->bpo_phys->bpo_num_blkptrs--;
++			ASSERT3S(bpo->bpo_phys->bpo_num_blkptrs, >=, 0);
++		}
++	}
++	if (dbuf) {
++		dmu_buf_rele(dbuf, FTAG);
++		dbuf = NULL;
++	}
++	if (free) {
++		i++;
++		VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object,
++		    i * sizeof (blkptr_t), -1ULL, tx));
++	}
++	if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0)
++		goto out;
++
++	ASSERT(bpo->bpo_havecomp);
++	err = dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi);
++	if (err) {
++		mutex_exit(&bpo->bpo_lock);
++		return (err);
++	}
++	epb = doi.doi_data_block_size / sizeof (uint64_t);
++
++	for (i = bpo->bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
++		uint64_t *objarray;
++		uint64_t offset, blkoff;
++		bpobj_t sublist;
++		uint64_t used_before, comp_before, uncomp_before;
++		uint64_t used_after, comp_after, uncomp_after;
++
++		offset = i * sizeof (uint64_t);
++		blkoff = P2PHASE(i, epb);
++
++		if (dbuf == NULL || dbuf->db_offset > offset) {
++			if (dbuf)
++				dmu_buf_rele(dbuf, FTAG);
++			err = dmu_buf_hold(bpo->bpo_os,
++			    bpo->bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0);
++			if (err)
++				break;
++		}
++
++		ASSERT3U(offset, >=, dbuf->db_offset);
++		ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
++
++		objarray = dbuf->db_data;
++		err = bpobj_open(&sublist, bpo->bpo_os, objarray[blkoff]);
++		if (err)
++			break;
++		if (free) {
++			err = bpobj_space(&sublist,
++			    &used_before, &comp_before, &uncomp_before);
++			if (err)
++				break;
++		}
++		err = bpobj_iterate_impl(&sublist, func, arg, tx, free);
++		if (free) {
++			VERIFY3U(0, ==, bpobj_space(&sublist,
++			    &used_after, &comp_after, &uncomp_after));
++			bpo->bpo_phys->bpo_bytes -= used_before - used_after;
++			ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0);
++			bpo->bpo_phys->bpo_comp -= comp_before - comp_after;
++			bpo->bpo_phys->bpo_uncomp -=
++			    uncomp_before - uncomp_after;
++		}
++
++		bpobj_close(&sublist);
++		if (err)
++			break;
++		if (free) {
++			err = dmu_object_free(bpo->bpo_os,
++			    objarray[blkoff], tx);
++			if (err)
++				break;
++			bpo->bpo_phys->bpo_num_subobjs--;
++			ASSERT3S(bpo->bpo_phys->bpo_num_subobjs, >=, 0);
++		}
++	}
++	if (dbuf) {
++		dmu_buf_rele(dbuf, FTAG);
++		dbuf = NULL;
++	}
++	if (free) {
++		VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os,
++		    bpo->bpo_phys->bpo_subobjs,
++		    (i + 1) * sizeof (uint64_t), -1ULL, tx));
++	}
++
++out:
++	/* If there are no entries, there should be no bytes. */
++	ASSERT(bpo->bpo_phys->bpo_num_blkptrs > 0 ||
++	    (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_num_subobjs > 0) ||
++	    bpo->bpo_phys->bpo_bytes == 0);
++
++	mutex_exit(&bpo->bpo_lock);
++	return (err);
++}
++
++/*
++ * Iterate and remove the entries.  If func returns nonzero, iteration
++ * will stop and that entry will not be removed.
++ */
++int
++bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
++{
++	return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE));
++}
++
++/*
++ * Iterate the entries.  If func returns nonzero, iteration will stop.
++ */
++int
++bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
++{
++	return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE));
++}
++
++void
++bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
++{
++	bpobj_t subbpo;
++	uint64_t used, comp, uncomp, subsubobjs;
++
++	ASSERT(bpo->bpo_havesubobj);
++	ASSERT(bpo->bpo_havecomp);
++
++	VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
++	VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
++
++	if (used == 0) {
++		/* No point in having an empty subobj. */
++		bpobj_close(&subbpo);
++		bpobj_free(bpo->bpo_os, subobj, tx);
++		return;
++	}
++
++	dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
++	if (bpo->bpo_phys->bpo_subobjs == 0) {
++		bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
++		    DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx);
++	}
++
++	mutex_enter(&bpo->bpo_lock);
++	dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
++	    bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
++	    sizeof (subobj), &subobj, tx);
++	bpo->bpo_phys->bpo_num_subobjs++;
++
++	/*
++	 * If subobj has only one block of subobjs, then move subobj's
++	 * subobjs to bpo's subobj list directly.  This reduces
++	 * recursion in bpobj_iterate due to nested subobjs.
++	 */
++	subsubobjs = subbpo.bpo_phys->bpo_subobjs;
++	if (subsubobjs != 0) {
++		dmu_object_info_t doi;
++
++		VERIFY3U(0, ==, dmu_object_info(bpo->bpo_os, subsubobjs, &doi));
++		if (doi.doi_max_offset == doi.doi_data_block_size) {
++			dmu_buf_t *subdb;
++			uint64_t numsubsub = subbpo.bpo_phys->bpo_num_subobjs;
++
++			VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, subsubobjs,
++			    0, FTAG, &subdb, 0));
++			dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
++			    bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
++			    numsubsub * sizeof (subobj), subdb->db_data, tx);
++			dmu_buf_rele(subdb, FTAG);
++			bpo->bpo_phys->bpo_num_subobjs += numsubsub;
++
++			dmu_buf_will_dirty(subbpo.bpo_dbuf, tx);
++			subbpo.bpo_phys->bpo_subobjs = 0;
++			VERIFY3U(0, ==, dmu_object_free(bpo->bpo_os,
++			    subsubobjs, tx));
++		}
++	}
++	bpo->bpo_phys->bpo_bytes += used;
++	bpo->bpo_phys->bpo_comp += comp;
++	bpo->bpo_phys->bpo_uncomp += uncomp;
++	mutex_exit(&bpo->bpo_lock);
++
++	bpobj_close(&subbpo);
++}
++
++void
++bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx)
++{
++	blkptr_t stored_bp = *bp;
++	uint64_t offset;
++	int blkoff;
++	blkptr_t *bparray;
++
++	ASSERT(!BP_IS_HOLE(bp));
++
++	/* We never need the fill count. */
++	stored_bp.blk_fill = 0;
++
++	/* The bpobj will compress better if we can leave off the checksum */
++	if (!BP_GET_DEDUP(bp))
++		bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum));
++
++	mutex_enter(&bpo->bpo_lock);
++
++	offset = bpo->bpo_phys->bpo_num_blkptrs * sizeof (stored_bp);
++	blkoff = P2PHASE(bpo->bpo_phys->bpo_num_blkptrs, bpo->bpo_epb);
++
++	if (bpo->bpo_cached_dbuf == NULL ||
++	    offset < bpo->bpo_cached_dbuf->db_offset ||
++	    offset >= bpo->bpo_cached_dbuf->db_offset +
++	    bpo->bpo_cached_dbuf->db_size) {
++		if (bpo->bpo_cached_dbuf)
++			dmu_buf_rele(bpo->bpo_cached_dbuf, bpo);
++		VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
++		    offset, bpo, &bpo->bpo_cached_dbuf, 0));
++	}
++
++	dmu_buf_will_dirty(bpo->bpo_cached_dbuf, tx);
++	bparray = bpo->bpo_cached_dbuf->db_data;
++	bparray[blkoff] = stored_bp;
++
++	dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
++	bpo->bpo_phys->bpo_num_blkptrs++;
++	bpo->bpo_phys->bpo_bytes +=
++	    bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
++	if (bpo->bpo_havecomp) {
++		bpo->bpo_phys->bpo_comp += BP_GET_PSIZE(bp);
++		bpo->bpo_phys->bpo_uncomp += BP_GET_UCSIZE(bp);
++	}
++	mutex_exit(&bpo->bpo_lock);
++}
++
++struct space_range_arg {
++	spa_t *spa;
++	uint64_t mintxg;
++	uint64_t maxtxg;
++	uint64_t used;
++	uint64_t comp;
++	uint64_t uncomp;
++};
++
++/* ARGSUSED */
++static int
++space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
++{
++	struct space_range_arg *sra = arg;
++
++	if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
++		if (dsl_pool_sync_context(spa_get_dsl(sra->spa)))
++			sra->used += bp_get_dsize_sync(sra->spa, bp);
++		else
++			sra->used += bp_get_dsize(sra->spa, bp);
++		sra->comp += BP_GET_PSIZE(bp);
++		sra->uncomp += BP_GET_UCSIZE(bp);
++	}
++	return (0);
++}
++
++int
++bpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
++{
++	mutex_enter(&bpo->bpo_lock);
++
++	*usedp = bpo->bpo_phys->bpo_bytes;
++	if (bpo->bpo_havecomp) {
++		*compp = bpo->bpo_phys->bpo_comp;
++		*uncompp = bpo->bpo_phys->bpo_uncomp;
++		mutex_exit(&bpo->bpo_lock);
++		return (0);
++	} else {
++		mutex_exit(&bpo->bpo_lock);
++		return (bpobj_space_range(bpo, 0, UINT64_MAX,
++		    usedp, compp, uncompp));
++	}
++}
++
++/*
++ * Return the amount of space in the bpobj which is:
++ * mintxg < blk_birth <= maxtxg
++ */
++int
++bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,
++    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
++{
++	struct space_range_arg sra = { 0 };
++	int err;
++
++	/*
++	 * As an optimization, if they want the whole txg range, just
++	 * get bpo_bytes rather than iterating over the bps.
++	 */
++	if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX && bpo->bpo_havecomp)
++		return (bpobj_space(bpo, usedp, compp, uncompp));
++
++	sra.spa = dmu_objset_spa(bpo->bpo_os);
++	sra.mintxg = mintxg;
++	sra.maxtxg = maxtxg;
++
++	err = bpobj_iterate_nofree(bpo, space_range_cb, &sra, NULL);
++	*usedp = sra.used;
++	*compp = sra.comp;
++	*uncompp = sra.uncomp;
++	return (err);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dbuf.c linux-3.2.33-go/fs/zfs/zfs/dbuf.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dbuf.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dbuf.c	2012-11-16 23:25:34.353039289 +0100
+@@ -0,0 +1,2869 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/arc.h>
++#include <sys/dmu.h>
++#include <sys/dmu_impl.h>
++#include <sys/dbuf.h>
++#include <sys/dmu_objset.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_dir.h>
++#include <sys/dmu_tx.h>
++#include <sys/spa.h>
++#include <sys/zio.h>
++#include <sys/dmu_zfetch.h>
++#include <sys/sa.h>
++#include <sys/sa_impl.h>
++
++struct dbuf_hold_impl_data {
++	/* Function arguments */
++	dnode_t *dh_dn;
++	uint8_t dh_level;
++	uint64_t dh_blkid;
++	int dh_fail_sparse;
++	void *dh_tag;
++	dmu_buf_impl_t **dh_dbp;
++	/* Local variables */
++	dmu_buf_impl_t *dh_db;
++	dmu_buf_impl_t *dh_parent;
++	blkptr_t *dh_bp;
++	int dh_err;
++	dbuf_dirty_record_t *dh_dr;
++	arc_buf_contents_t dh_type;
++	int dh_depth;
++};
++
++static void __dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh,
++    dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
++    void *tag, dmu_buf_impl_t **dbp, int depth);
++static int __dbuf_hold_impl(struct dbuf_hold_impl_data *dh);
++
++static void dbuf_destroy(dmu_buf_impl_t *db);
++static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
++static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
++
++/*
++ * Global data structures and functions for the dbuf cache.
++ */
++static kmem_cache_t *dbuf_cache;
++
++/* ARGSUSED */
++static int
++dbuf_cons(void *vdb, void *unused, int kmflag)
++{
++	dmu_buf_impl_t *db = vdb;
++	bzero(db, sizeof (dmu_buf_impl_t));
++
++	mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL);
++	cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
++	refcount_create(&db->db_holds);
++	list_link_init(&db->db_link);
++	return (0);
++}
++
++/* ARGSUSED */
++static void
++dbuf_dest(void *vdb, void *unused)
++{
++	dmu_buf_impl_t *db = vdb;
++	mutex_destroy(&db->db_mtx);
++	cv_destroy(&db->db_changed);
++	refcount_destroy(&db->db_holds);
++}
++
++/*
++ * dbuf hash table routines
++ */
++static dbuf_hash_table_t dbuf_hash_table;
++
++static uint64_t dbuf_hash_count;
++
++static uint64_t
++dbuf_hash(void *os, uint64_t obj, uint8_t lvl, uint64_t blkid)
++{
++	uintptr_t osv = (uintptr_t)os;
++	uint64_t crc = -1ULL;
++
++	ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
++	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (lvl)) & 0xFF];
++	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 6)) & 0xFF];
++	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF];
++	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF];
++	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 0)) & 0xFF];
++	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 8)) & 0xFF];
++
++	crc ^= (osv>>14) ^ (obj>>16) ^ (blkid>>16);
++
++	return (crc);
++}
++
++#define	DBUF_HASH(os, obj, level, blkid) dbuf_hash(os, obj, level, blkid);
++
++#define	DBUF_EQUAL(dbuf, os, obj, level, blkid)		\
++	((dbuf)->db.db_object == (obj) &&		\
++	(dbuf)->db_objset == (os) &&			\
++	(dbuf)->db_level == (level) &&			\
++	(dbuf)->db_blkid == (blkid))
++
++dmu_buf_impl_t *
++dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid)
++{
++	dbuf_hash_table_t *h = &dbuf_hash_table;
++	objset_t *os = dn->dn_objset;
++	uint64_t obj;
++	uint64_t hv;
++	uint64_t idx;
++	dmu_buf_impl_t *db;
++
++	obj = dn->dn_object;
++	hv = DBUF_HASH(os, obj, level, blkid);
++	idx = hv & h->hash_table_mask;
++
++	mutex_enter(DBUF_HASH_MUTEX(h, idx));
++	for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) {
++		if (DBUF_EQUAL(db, os, obj, level, blkid)) {
++			mutex_enter(&db->db_mtx);
++			if (db->db_state != DB_EVICTING) {
++				mutex_exit(DBUF_HASH_MUTEX(h, idx));
++				return (db);
++			}
++			mutex_exit(&db->db_mtx);
++		}
++	}
++	mutex_exit(DBUF_HASH_MUTEX(h, idx));
++	return (NULL);
++}
++
++/*
++ * Insert an entry into the hash table.  If there is already an element
++ * equal to elem in the hash table, then the already existing element
++ * will be returned and the new element will not be inserted.
++ * Otherwise returns NULL.
++ */
++static dmu_buf_impl_t *
++dbuf_hash_insert(dmu_buf_impl_t *db)
++{
++	dbuf_hash_table_t *h = &dbuf_hash_table;
++	objset_t *os = db->db_objset;
++	uint64_t obj = db->db.db_object;
++	int level = db->db_level;
++	uint64_t blkid, hv, idx;
++	dmu_buf_impl_t *dbf;
++
++	blkid = db->db_blkid;
++	hv = DBUF_HASH(os, obj, level, blkid);
++	idx = hv & h->hash_table_mask;
++
++	mutex_enter(DBUF_HASH_MUTEX(h, idx));
++	for (dbf = h->hash_table[idx]; dbf != NULL; dbf = dbf->db_hash_next) {
++		if (DBUF_EQUAL(dbf, os, obj, level, blkid)) {
++			mutex_enter(&dbf->db_mtx);
++			if (dbf->db_state != DB_EVICTING) {
++				mutex_exit(DBUF_HASH_MUTEX(h, idx));
++				return (dbf);
++			}
++			mutex_exit(&dbf->db_mtx);
++		}
++	}
++
++	mutex_enter(&db->db_mtx);
++	db->db_hash_next = h->hash_table[idx];
++	h->hash_table[idx] = db;
++	mutex_exit(DBUF_HASH_MUTEX(h, idx));
++	atomic_add_64(&dbuf_hash_count, 1);
++
++	return (NULL);
++}
++
++/*
++ * Remove an entry from the hash table.  This operation will
++ * fail if there are any existing holds on the db.
++ */
++static void
++dbuf_hash_remove(dmu_buf_impl_t *db)
++{
++	dbuf_hash_table_t *h = &dbuf_hash_table;
++	uint64_t hv, idx;
++	dmu_buf_impl_t *dbf, **dbp;
++
++	hv = DBUF_HASH(db->db_objset, db->db.db_object,
++	    db->db_level, db->db_blkid);
++	idx = hv & h->hash_table_mask;
++
++	/*
++	 * We musn't hold db_mtx to maintin lock ordering:
++	 * DBUF_HASH_MUTEX > db_mtx.
++	 */
++	ASSERT(refcount_is_zero(&db->db_holds));
++	ASSERT(db->db_state == DB_EVICTING);
++	ASSERT(!MUTEX_HELD(&db->db_mtx));
++
++	mutex_enter(DBUF_HASH_MUTEX(h, idx));
++	dbp = &h->hash_table[idx];
++	while ((dbf = *dbp) != db) {
++		dbp = &dbf->db_hash_next;
++		ASSERT(dbf != NULL);
++	}
++	*dbp = db->db_hash_next;
++	db->db_hash_next = NULL;
++	mutex_exit(DBUF_HASH_MUTEX(h, idx));
++	atomic_add_64(&dbuf_hash_count, -1);
++}
++
++static arc_evict_func_t dbuf_do_evict;
++
++static void
++dbuf_evict_user(dmu_buf_impl_t *db)
++{
++	ASSERT(MUTEX_HELD(&db->db_mtx));
++
++	if (db->db_level != 0 || db->db_evict_func == NULL)
++		return;
++
++	if (db->db_user_data_ptr_ptr)
++		*db->db_user_data_ptr_ptr = db->db.db_data;
++	db->db_evict_func(&db->db, db->db_user_ptr);
++	db->db_user_ptr = NULL;
++	db->db_user_data_ptr_ptr = NULL;
++	db->db_evict_func = NULL;
++}
++
++boolean_t
++dbuf_is_metadata(dmu_buf_impl_t *db)
++{
++	if (db->db_level > 0) {
++		return (B_TRUE);
++	} else {
++		boolean_t is_metadata;
++
++		DB_DNODE_ENTER(db);
++		is_metadata = dmu_ot[DB_DNODE(db)->dn_type].ot_metadata;
++		DB_DNODE_EXIT(db);
++
++		return (is_metadata);
++	}
++}
++
++void
++dbuf_evict(dmu_buf_impl_t *db)
++{
++	ASSERT(MUTEX_HELD(&db->db_mtx));
++	ASSERT(db->db_buf == NULL);
++	ASSERT(db->db_data_pending == NULL);
++
++	dbuf_clear(db);
++	dbuf_destroy(db);
++}
++
++void
++dbuf_init(void)
++{
++	uint64_t hsize = 1ULL << 16;
++	dbuf_hash_table_t *h = &dbuf_hash_table;
++	int i;
++
++	/*
++	 * The hash table is big enough to fill all of physical memory
++	 * with an average 4K block size.  The table will take up
++	 * totalmem*sizeof(void*)/4K (i.e. 2MB/GB with 8-byte pointers).
++	 */
++	while (hsize * 4096 < physmem * PAGESIZE)
++		hsize <<= 1;
++
++retry:
++	h->hash_table_mask = hsize - 1;
++#if defined(_KERNEL) && defined(HAVE_SPL)
++	/* Large allocations which do not require contiguous pages
++	 * should be using vmem_alloc() in the linux kernel */
++	h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_PUSHPAGE);
++#else
++	h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP);
++#endif
++	if (h->hash_table == NULL) {
++		/* XXX - we should really return an error instead of assert */
++		ASSERT(hsize > (1ULL << 10));
++		hsize >>= 1;
++		goto retry;
++	}
++
++	dbuf_cache = kmem_cache_create("dmu_buf_impl_t",
++	    sizeof (dmu_buf_impl_t),
++	    0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);
++
++	for (i = 0; i < DBUF_MUTEXES; i++)
++		mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
++}
++
++void
++dbuf_fini(void)
++{
++	dbuf_hash_table_t *h = &dbuf_hash_table;
++	int i;
++
++	for (i = 0; i < DBUF_MUTEXES; i++)
++		mutex_destroy(&h->hash_mutexes[i]);
++#if defined(_KERNEL) && defined(HAVE_SPL)
++	/* Large allocations which do not require contiguous pages
++	 * should be using vmem_free() in the linux kernel */
++	vmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
++#else
++	kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
++#endif
++	kmem_cache_destroy(dbuf_cache);
++}
++
++/*
++ * Other stuff.
++ */
++
++#ifdef ZFS_DEBUG
++static void
++dbuf_verify(dmu_buf_impl_t *db)
++{
++	dnode_t *dn;
++	dbuf_dirty_record_t *dr;
++
++	ASSERT(MUTEX_HELD(&db->db_mtx));
++
++	if (!(zfs_flags & ZFS_DEBUG_DBUF_VERIFY))
++		return;
++
++	ASSERT(db->db_objset != NULL);
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	if (dn == NULL) {
++		ASSERT(db->db_parent == NULL);
++		ASSERT(db->db_blkptr == NULL);
++	} else {
++		ASSERT3U(db->db.db_object, ==, dn->dn_object);
++		ASSERT3P(db->db_objset, ==, dn->dn_objset);
++		ASSERT3U(db->db_level, <, dn->dn_nlevels);
++		ASSERT(db->db_blkid == DMU_BONUS_BLKID ||
++		    db->db_blkid == DMU_SPILL_BLKID ||
++		    !list_is_empty(&dn->dn_dbufs));
++	}
++	if (db->db_blkid == DMU_BONUS_BLKID) {
++		ASSERT(dn != NULL);
++		ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
++		ASSERT3U(db->db.db_offset, ==, DMU_BONUS_BLKID);
++	} else if (db->db_blkid == DMU_SPILL_BLKID) {
++		ASSERT(dn != NULL);
++		ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
++		ASSERT3U(db->db.db_offset, ==, 0);
++	} else {
++		ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
++	}
++
++	for (dr = db->db_data_pending; dr != NULL; dr = dr->dr_next)
++		ASSERT(dr->dr_dbuf == db);
++
++	for (dr = db->db_last_dirty; dr != NULL; dr = dr->dr_next)
++		ASSERT(dr->dr_dbuf == db);
++
++	/*
++	 * We can't assert that db_size matches dn_datablksz because it
++	 * can be momentarily different when another thread is doing
++	 * dnode_set_blksz().
++	 */
++	if (db->db_level == 0 && db->db.db_object == DMU_META_DNODE_OBJECT) {
++		dr = db->db_data_pending;
++		/*
++		 * It should only be modified in syncing context, so
++		 * make sure we only have one copy of the data.
++		 */
++		ASSERT(dr == NULL || dr->dt.dl.dr_data == db->db_buf);
++	}
++
++	/* verify db->db_blkptr */
++	if (db->db_blkptr) {
++		if (db->db_parent == dn->dn_dbuf) {
++			/* db is pointed to by the dnode */
++			/* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */
++			if (DMU_OBJECT_IS_SPECIAL(db->db.db_object))
++				ASSERT(db->db_parent == NULL);
++			else
++				ASSERT(db->db_parent != NULL);
++			if (db->db_blkid != DMU_SPILL_BLKID)
++				ASSERT3P(db->db_blkptr, ==,
++				    &dn->dn_phys->dn_blkptr[db->db_blkid]);
++		} else {
++			/* db is pointed to by an indirect block */
++			ASSERTV(int epb = db->db_parent->db.db_size >>
++				SPA_BLKPTRSHIFT);
++			ASSERT3U(db->db_parent->db_level, ==, db->db_level+1);
++			ASSERT3U(db->db_parent->db.db_object, ==,
++			    db->db.db_object);
++			/*
++			 * dnode_grow_indblksz() can make this fail if we don't
++			 * have the struct_rwlock.  XXX indblksz no longer
++			 * grows.  safe to do this now?
++			 */
++			if (RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
++				ASSERT3P(db->db_blkptr, ==,
++				    ((blkptr_t *)db->db_parent->db.db_data +
++				    db->db_blkid % epb));
++			}
++		}
++	}
++	if ((db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr)) &&
++	    (db->db_buf == NULL || db->db_buf->b_data) &&
++	    db->db.db_data && db->db_blkid != DMU_BONUS_BLKID &&
++	    db->db_state != DB_FILL && !dn->dn_free_txg) {
++		/*
++		 * If the blkptr isn't set but they have nonzero data,
++		 * it had better be dirty, otherwise we'll lose that
++		 * data when we evict this buffer.
++		 */
++		if (db->db_dirtycnt == 0) {
++			ASSERTV(uint64_t *buf = db->db.db_data);
++			int i;
++
++			for (i = 0; i < db->db.db_size >> 3; i++) {
++				ASSERT(buf[i] == 0);
++			}
++		}
++	}
++	DB_DNODE_EXIT(db);
++}
++#endif
++
++static void
++dbuf_update_data(dmu_buf_impl_t *db)
++{
++	ASSERT(MUTEX_HELD(&db->db_mtx));
++	if (db->db_level == 0 && db->db_user_data_ptr_ptr) {
++		ASSERT(!refcount_is_zero(&db->db_holds));
++		*db->db_user_data_ptr_ptr = db->db.db_data;
++	}
++}
++
++static void
++dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
++{
++	ASSERT(MUTEX_HELD(&db->db_mtx));
++	ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf));
++	db->db_buf = buf;
++	if (buf != NULL) {
++		ASSERT(buf->b_data != NULL);
++		db->db.db_data = buf->b_data;
++		if (!arc_released(buf))
++			arc_set_callback(buf, dbuf_do_evict, db);
++		dbuf_update_data(db);
++	} else {
++		dbuf_evict_user(db);
++		db->db.db_data = NULL;
++		if (db->db_state != DB_NOFILL)
++			db->db_state = DB_UNCACHED;
++	}
++}
++
++/*
++ * Loan out an arc_buf for read.  Return the loaned arc_buf.
++ */
++arc_buf_t *
++dbuf_loan_arcbuf(dmu_buf_impl_t *db)
++{
++	arc_buf_t *abuf;
++
++	mutex_enter(&db->db_mtx);
++	if (arc_released(db->db_buf) || refcount_count(&db->db_holds) > 1) {
++		int blksz = db->db.db_size;
++		spa_t *spa;
++
++		mutex_exit(&db->db_mtx);
++		DB_GET_SPA(&spa, db);
++		abuf = arc_loan_buf(spa, blksz);
++		bcopy(db->db.db_data, abuf->b_data, blksz);
++	} else {
++		abuf = db->db_buf;
++		arc_loan_inuse_buf(abuf, db);
++		dbuf_set_data(db, NULL);
++		mutex_exit(&db->db_mtx);
++	}
++	return (abuf);
++}
++
++uint64_t
++dbuf_whichblock(dnode_t *dn, uint64_t offset)
++{
++	if (dn->dn_datablkshift) {
++		return (offset >> dn->dn_datablkshift);
++	} else {
++		ASSERT3U(offset, <, dn->dn_datablksz);
++		return (0);
++	}
++}
++
++static void
++dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb)
++{
++	dmu_buf_impl_t *db = vdb;
++
++	mutex_enter(&db->db_mtx);
++	ASSERT3U(db->db_state, ==, DB_READ);
++	/*
++	 * All reads are synchronous, so we must have a hold on the dbuf
++	 */
++	ASSERT(refcount_count(&db->db_holds) > 0);
++	ASSERT(db->db_buf == NULL);
++	ASSERT(db->db.db_data == NULL);
++	if (db->db_level == 0 && db->db_freed_in_flight) {
++		/* we were freed in flight; disregard any error */
++		arc_release(buf, db);
++		bzero(buf->b_data, db->db.db_size);
++		arc_buf_freeze(buf);
++		db->db_freed_in_flight = FALSE;
++		dbuf_set_data(db, buf);
++		db->db_state = DB_CACHED;
++	} else if (zio == NULL || zio->io_error == 0) {
++		dbuf_set_data(db, buf);
++		db->db_state = DB_CACHED;
++	} else {
++		ASSERT(db->db_blkid != DMU_BONUS_BLKID);
++		ASSERT3P(db->db_buf, ==, NULL);
++		VERIFY(arc_buf_remove_ref(buf, db) == 1);
++		db->db_state = DB_UNCACHED;
++	}
++	cv_broadcast(&db->db_changed);
++	dbuf_rele_and_unlock(db, NULL);
++}
++
++static void
++dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
++{
++	dnode_t *dn;
++	spa_t *spa;
++	zbookmark_t zb;
++	uint32_t aflags = ARC_NOWAIT;
++	arc_buf_t *pbuf;
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	ASSERT(!refcount_is_zero(&db->db_holds));
++	/* We need the struct_rwlock to prevent db_blkptr from changing. */
++	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
++	ASSERT(MUTEX_HELD(&db->db_mtx));
++	ASSERT(db->db_state == DB_UNCACHED);
++	ASSERT(db->db_buf == NULL);
++
++	if (db->db_blkid == DMU_BONUS_BLKID) {
++		int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
++
++		ASSERT3U(bonuslen, <=, db->db.db_size);
++		db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
++		arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
++		if (bonuslen < DN_MAX_BONUSLEN)
++			bzero(db->db.db_data, DN_MAX_BONUSLEN);
++		if (bonuslen)
++			bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen);
++		DB_DNODE_EXIT(db);
++		dbuf_update_data(db);
++		db->db_state = DB_CACHED;
++		mutex_exit(&db->db_mtx);
++		return;
++	}
++
++	/*
++	 * Recheck BP_IS_HOLE() after dnode_block_freed() in case dnode_sync()
++	 * processes the delete record and clears the bp while we are waiting
++	 * for the dn_mtx (resulting in a "no" from block_freed).
++	 */
++	if (db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr) ||
++	    (db->db_level == 0 && (dnode_block_freed(dn, db->db_blkid) ||
++	    BP_IS_HOLE(db->db_blkptr)))) {
++		arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
++
++		dbuf_set_data(db, arc_buf_alloc(dn->dn_objset->os_spa,
++		    db->db.db_size, db, type));
++		DB_DNODE_EXIT(db);
++		bzero(db->db.db_data, db->db.db_size);
++		db->db_state = DB_CACHED;
++		*flags |= DB_RF_CACHED;
++		mutex_exit(&db->db_mtx);
++		return;
++	}
++
++	spa = dn->dn_objset->os_spa;
++	DB_DNODE_EXIT(db);
++
++	db->db_state = DB_READ;
++	mutex_exit(&db->db_mtx);
++
++	if (DBUF_IS_L2CACHEABLE(db))
++		aflags |= ARC_L2CACHE;
++
++	SET_BOOKMARK(&zb, db->db_objset->os_dsl_dataset ?
++	    db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET,
++	    db->db.db_object, db->db_level, db->db_blkid);
++
++	dbuf_add_ref(db, NULL);
++	/* ZIO_FLAG_CANFAIL callers have to check the parent zio's error */
++
++	if (db->db_parent)
++		pbuf = db->db_parent->db_buf;
++	else
++		pbuf = db->db_objset->os_phys_buf;
++
++	(void) dsl_read(zio, spa, db->db_blkptr, pbuf,
++	    dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
++	    (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
++	    &aflags, &zb);
++	if (aflags & ARC_CACHED)
++		*flags |= DB_RF_CACHED;
++}
++
++int
++dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
++{
++	int err = 0;
++	int havepzio = (zio != NULL);
++	int prefetch;
++	dnode_t *dn;
++
++	/*
++	 * We don't have to hold the mutex to check db_state because it
++	 * can't be freed while we have a hold on the buffer.
++	 */
++	ASSERT(!refcount_is_zero(&db->db_holds));
++
++	if (db->db_state == DB_NOFILL)
++		return (EIO);
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	if ((flags & DB_RF_HAVESTRUCT) == 0)
++		rw_enter(&dn->dn_struct_rwlock, RW_READER);
++
++	prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
++	    (flags & DB_RF_NOPREFETCH) == 0 && dn != NULL &&
++	    DBUF_IS_CACHEABLE(db);
++
++	mutex_enter(&db->db_mtx);
++	if (db->db_state == DB_CACHED) {
++		mutex_exit(&db->db_mtx);
++		if (prefetch)
++			dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
++			    db->db.db_size, TRUE);
++		if ((flags & DB_RF_HAVESTRUCT) == 0)
++			rw_exit(&dn->dn_struct_rwlock);
++		DB_DNODE_EXIT(db);
++	} else if (db->db_state == DB_UNCACHED) {
++		spa_t *spa = dn->dn_objset->os_spa;
++
++		if (zio == NULL)
++			zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
++		dbuf_read_impl(db, zio, &flags);
++
++		/* dbuf_read_impl has dropped db_mtx for us */
++
++		if (prefetch)
++			dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
++			    db->db.db_size, flags & DB_RF_CACHED);
++
++		if ((flags & DB_RF_HAVESTRUCT) == 0)
++			rw_exit(&dn->dn_struct_rwlock);
++		DB_DNODE_EXIT(db);
++
++		if (!havepzio)
++			err = zio_wait(zio);
++	} else {
++		mutex_exit(&db->db_mtx);
++		if (prefetch)
++			dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
++			    db->db.db_size, TRUE);
++		if ((flags & DB_RF_HAVESTRUCT) == 0)
++			rw_exit(&dn->dn_struct_rwlock);
++		DB_DNODE_EXIT(db);
++
++		mutex_enter(&db->db_mtx);
++		if ((flags & DB_RF_NEVERWAIT) == 0) {
++			while (db->db_state == DB_READ ||
++			    db->db_state == DB_FILL) {
++				ASSERT(db->db_state == DB_READ ||
++				    (flags & DB_RF_HAVESTRUCT) == 0);
++				cv_wait(&db->db_changed, &db->db_mtx);
++			}
++			if (db->db_state == DB_UNCACHED)
++				err = EIO;
++		}
++		mutex_exit(&db->db_mtx);
++	}
++
++	ASSERT(err || havepzio || db->db_state == DB_CACHED);
++	return (err);
++}
++
++static void
++dbuf_noread(dmu_buf_impl_t *db)
++{
++	ASSERT(!refcount_is_zero(&db->db_holds));
++	ASSERT(db->db_blkid != DMU_BONUS_BLKID);
++	mutex_enter(&db->db_mtx);
++	while (db->db_state == DB_READ || db->db_state == DB_FILL)
++		cv_wait(&db->db_changed, &db->db_mtx);
++	if (db->db_state == DB_UNCACHED) {
++		arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
++		spa_t *spa;
++
++		ASSERT(db->db_buf == NULL);
++		ASSERT(db->db.db_data == NULL);
++		DB_GET_SPA(&spa, db);
++		dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type));
++		db->db_state = DB_FILL;
++	} else if (db->db_state == DB_NOFILL) {
++		dbuf_set_data(db, NULL);
++	} else {
++		ASSERT3U(db->db_state, ==, DB_CACHED);
++	}
++	mutex_exit(&db->db_mtx);
++}
++
++/*
++ * This is our just-in-time copy function.  It makes a copy of
++ * buffers, that have been modified in a previous transaction
++ * group, before we modify them in the current active group.
++ *
++ * This function is used in two places: when we are dirtying a
++ * buffer for the first time in a txg, and when we are freeing
++ * a range in a dnode that includes this buffer.
++ *
++ * Note that when we are called from dbuf_free_range() we do
++ * not put a hold on the buffer, we just traverse the active
++ * dbuf list for the dnode.
++ */
++static void
++dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
++{
++	dbuf_dirty_record_t *dr = db->db_last_dirty;
++
++	ASSERT(MUTEX_HELD(&db->db_mtx));
++	ASSERT(db->db.db_data != NULL);
++	ASSERT(db->db_level == 0);
++	ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT);
++
++	if (dr == NULL ||
++	    (dr->dt.dl.dr_data !=
++	    ((db->db_blkid  == DMU_BONUS_BLKID) ? db->db.db_data : db->db_buf)))
++		return;
++
++	/*
++	 * If the last dirty record for this dbuf has not yet synced
++	 * and its referencing the dbuf data, either:
++	 *	reset the reference to point to a new copy,
++	 * or (if there a no active holders)
++	 *	just null out the current db_data pointer.
++	 */
++	ASSERT(dr->dr_txg >= txg - 2);
++	if (db->db_blkid == DMU_BONUS_BLKID) {
++		/* Note that the data bufs here are zio_bufs */
++		dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN);
++		arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
++		bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN);
++	} else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
++		int size = db->db.db_size;
++		arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
++		spa_t *spa;
++
++		DB_GET_SPA(&spa, db);
++		dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type);
++		bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
++	} else {
++		dbuf_set_data(db, NULL);
++	}
++}
++
++void
++dbuf_unoverride(dbuf_dirty_record_t *dr)
++{
++	dmu_buf_impl_t *db = dr->dr_dbuf;
++	blkptr_t *bp = &dr->dt.dl.dr_overridden_by;
++	uint64_t txg = dr->dr_txg;
++
++	ASSERT(MUTEX_HELD(&db->db_mtx));
++	ASSERT(dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC);
++	ASSERT(db->db_level == 0);
++
++	if (db->db_blkid == DMU_BONUS_BLKID ||
++	    dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN)
++		return;
++
++	ASSERT(db->db_data_pending != dr);
++
++	/* free this block */
++	if (!BP_IS_HOLE(bp)) {
++		spa_t *spa;
++
++		DB_GET_SPA(&spa, db);
++		zio_free(spa, txg, bp);
++	}
++	dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
++	/*
++	 * Release the already-written buffer, so we leave it in
++	 * a consistent dirty state.  Note that all callers are
++	 * modifying the buffer, so they will immediately do
++	 * another (redundant) arc_release().  Therefore, leave
++	 * the buf thawed to save the effort of freezing &
++	 * immediately re-thawing it.
++	 */
++	arc_release(dr->dt.dl.dr_data, db);
++}
++
++/*
++ * Evict (if its unreferenced) or clear (if its referenced) any level-0
++ * data blocks in the free range, so that any future readers will find
++ * empty blocks.  Also, if we happen accross any level-1 dbufs in the
++ * range that have not already been marked dirty, mark them dirty so
++ * they stay in memory.
++ */
++void
++dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
++{
++	dmu_buf_impl_t *db, *db_next;
++	uint64_t txg = tx->tx_txg;
++	int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
++	uint64_t first_l1 = start >> epbs;
++	uint64_t last_l1 = end >> epbs;
++
++	if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID)) {
++		end = dn->dn_maxblkid;
++		last_l1 = end >> epbs;
++	}
++	dprintf_dnode(dn, "start=%llu end=%llu\n", start, end);
++	mutex_enter(&dn->dn_dbufs_mtx);
++	for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
++		db_next = list_next(&dn->dn_dbufs, db);
++		ASSERT(db->db_blkid != DMU_BONUS_BLKID);
++
++		if (db->db_level == 1 &&
++		    db->db_blkid >= first_l1 && db->db_blkid <= last_l1) {
++			mutex_enter(&db->db_mtx);
++			if (db->db_last_dirty &&
++			    db->db_last_dirty->dr_txg < txg) {
++				dbuf_add_ref(db, FTAG);
++				mutex_exit(&db->db_mtx);
++				dbuf_will_dirty(db, tx);
++				dbuf_rele(db, FTAG);
++			} else {
++				mutex_exit(&db->db_mtx);
++			}
++		}
++
++		if (db->db_level != 0)
++			continue;
++		dprintf_dbuf(db, "found buf %s\n", "");
++		if (db->db_blkid < start || db->db_blkid > end)
++			continue;
++
++		/* found a level 0 buffer in the range */
++		if (dbuf_undirty(db, tx))
++			continue;
++
++		mutex_enter(&db->db_mtx);
++		if (db->db_state == DB_UNCACHED ||
++		    db->db_state == DB_NOFILL ||
++		    db->db_state == DB_EVICTING) {
++			ASSERT(db->db.db_data == NULL);
++			mutex_exit(&db->db_mtx);
++			continue;
++		}
++		if (db->db_state == DB_READ || db->db_state == DB_FILL) {
++			/* will be handled in dbuf_read_done or dbuf_rele */
++			db->db_freed_in_flight = TRUE;
++			mutex_exit(&db->db_mtx);
++			continue;
++		}
++		if (refcount_count(&db->db_holds) == 0) {
++			ASSERT(db->db_buf);
++			dbuf_clear(db);
++			continue;
++		}
++		/* The dbuf is referenced */
++
++		if (db->db_last_dirty != NULL) {
++			dbuf_dirty_record_t *dr = db->db_last_dirty;
++
++			if (dr->dr_txg == txg) {
++				/*
++				 * This buffer is "in-use", re-adjust the file
++				 * size to reflect that this buffer may
++				 * contain new data when we sync.
++				 */
++				if (db->db_blkid != DMU_SPILL_BLKID &&
++				    db->db_blkid > dn->dn_maxblkid)
++					dn->dn_maxblkid = db->db_blkid;
++				dbuf_unoverride(dr);
++			} else {
++				/*
++				 * This dbuf is not dirty in the open context.
++				 * Either uncache it (if its not referenced in
++				 * the open context) or reset its contents to
++				 * empty.
++				 */
++				dbuf_fix_old_data(db, txg);
++			}
++		}
++		/* clear the contents if its cached */
++		if (db->db_state == DB_CACHED) {
++			ASSERT(db->db.db_data != NULL);
++			arc_release(db->db_buf, db);
++			bzero(db->db.db_data, db->db.db_size);
++			arc_buf_freeze(db->db_buf);
++		}
++
++		mutex_exit(&db->db_mtx);
++	}
++	mutex_exit(&dn->dn_dbufs_mtx);
++}
++
++static int
++dbuf_block_freeable(dmu_buf_impl_t *db)
++{
++	dsl_dataset_t *ds = db->db_objset->os_dsl_dataset;
++	uint64_t birth_txg = 0;
++
++	/*
++	 * We don't need any locking to protect db_blkptr:
++	 * If it's syncing, then db_last_dirty will be set
++	 * so we'll ignore db_blkptr.
++	 */
++	ASSERT(MUTEX_HELD(&db->db_mtx));
++	if (db->db_last_dirty)
++		birth_txg = db->db_last_dirty->dr_txg;
++	else if (db->db_blkptr)
++		birth_txg = db->db_blkptr->blk_birth;
++
++	/*
++	 * If we don't exist or are in a snapshot, we can't be freed.
++	 * Don't pass the bp to dsl_dataset_block_freeable() since we
++	 * are holding the db_mtx lock and might deadlock if we are
++	 * prefetching a dedup-ed block.
++	 */
++	if (birth_txg)
++		return (ds == NULL ||
++		    dsl_dataset_block_freeable(ds, NULL, birth_txg));
++	else
++		return (FALSE);
++}
++
++void
++dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
++{
++	arc_buf_t *buf, *obuf;
++	int osize = db->db.db_size;
++	arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
++	dnode_t *dn;
++
++	ASSERT(db->db_blkid != DMU_BONUS_BLKID);
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++
++	/* XXX does *this* func really need the lock? */
++	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
++
++	/*
++	 * This call to dbuf_will_dirty() with the dn_struct_rwlock held
++	 * is OK, because there can be no other references to the db
++	 * when we are changing its size, so no concurrent DB_FILL can
++	 * be happening.
++	 */
++	/*
++	 * XXX we should be doing a dbuf_read, checking the return
++	 * value and returning that up to our callers
++	 */
++	dbuf_will_dirty(db, tx);
++
++	/* create the data buffer for the new block */
++	buf = arc_buf_alloc(dn->dn_objset->os_spa, size, db, type);
++
++	/* copy old block data to the new block */
++	obuf = db->db_buf;
++	bcopy(obuf->b_data, buf->b_data, MIN(osize, size));
++	/* zero the remainder */
++	if (size > osize)
++		bzero((uint8_t *)buf->b_data + osize, size - osize);
++
++	mutex_enter(&db->db_mtx);
++	dbuf_set_data(db, buf);
++	VERIFY(arc_buf_remove_ref(obuf, db) == 1);
++	db->db.db_size = size;
++
++	if (db->db_level == 0) {
++		ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg);
++		db->db_last_dirty->dt.dl.dr_data = buf;
++	}
++	mutex_exit(&db->db_mtx);
++
++	dnode_willuse_space(dn, size-osize, tx);
++	DB_DNODE_EXIT(db);
++}
++
++void
++dbuf_release_bp(dmu_buf_impl_t *db)
++{
++	objset_t *os;
++	zbookmark_t zb;
++
++	DB_GET_OBJSET(&os, db);
++	ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
++	ASSERT(arc_released(os->os_phys_buf) ||
++	    list_link_active(&os->os_dsl_dataset->ds_synced_link));
++	ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf));
++
++	zb.zb_objset = os->os_dsl_dataset ?
++	    os->os_dsl_dataset->ds_object : 0;
++	zb.zb_object = db->db.db_object;
++	zb.zb_level = db->db_level;
++	zb.zb_blkid = db->db_blkid;
++	(void) arc_release_bp(db->db_buf, db,
++	    db->db_blkptr, os->os_spa, &zb);
++}
++
++dbuf_dirty_record_t *
++dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
++{
++	dnode_t *dn;
++	objset_t *os;
++	dbuf_dirty_record_t **drp, *dr;
++	int drop_struct_lock = FALSE;
++	boolean_t do_free_accounting = B_FALSE;
++	int txgoff = tx->tx_txg & TXG_MASK;
++
++	ASSERT(tx->tx_txg != 0);
++	ASSERT(!refcount_is_zero(&db->db_holds));
++	DMU_TX_DIRTY_BUF(tx, db);
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	/*
++	 * Shouldn't dirty a regular buffer in syncing context.  Private
++	 * objects may be dirtied in syncing context, but only if they
++	 * were already pre-dirtied in open context.
++	 */
++	ASSERT(!dmu_tx_is_syncing(tx) ||
++	    BP_IS_HOLE(dn->dn_objset->os_rootbp) ||
++	    DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
++	    dn->dn_objset->os_dsl_dataset == NULL);
++	/*
++	 * We make this assert for private objects as well, but after we
++	 * check if we're already dirty.  They are allowed to re-dirty
++	 * in syncing context.
++	 */
++	ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
++	    dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
++	    (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
++
++	mutex_enter(&db->db_mtx);
++	/*
++	 * XXX make this true for indirects too?  The problem is that
++	 * transactions created with dmu_tx_create_assigned() from
++	 * syncing context don't bother holding ahead.
++	 */
++	ASSERT(db->db_level != 0 ||
++	    db->db_state == DB_CACHED || db->db_state == DB_FILL ||
++	    db->db_state == DB_NOFILL);
++
++	mutex_enter(&dn->dn_mtx);
++	/*
++	 * Don't set dirtyctx to SYNC if we're just modifying this as we
++	 * initialize the objset.
++	 */
++	if (dn->dn_dirtyctx == DN_UNDIRTIED &&
++	    !BP_IS_HOLE(dn->dn_objset->os_rootbp)) {
++		dn->dn_dirtyctx =
++		    (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN);
++		ASSERT(dn->dn_dirtyctx_firstset == NULL);
++		dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_PUSHPAGE);
++	}
++	mutex_exit(&dn->dn_mtx);
++
++	if (db->db_blkid == DMU_SPILL_BLKID)
++		dn->dn_have_spill = B_TRUE;
++
++	/*
++	 * If this buffer is already dirty, we're done.
++	 */
++	drp = &db->db_last_dirty;
++	ASSERT(*drp == NULL || (*drp)->dr_txg <= tx->tx_txg ||
++	    db->db.db_object == DMU_META_DNODE_OBJECT);
++	while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg)
++		drp = &dr->dr_next;
++	if (dr && dr->dr_txg == tx->tx_txg) {
++		DB_DNODE_EXIT(db);
++
++		if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) {
++			/*
++			 * If this buffer has already been written out,
++			 * we now need to reset its state.
++			 */
++			dbuf_unoverride(dr);
++			if (db->db.db_object != DMU_META_DNODE_OBJECT &&
++			    db->db_state != DB_NOFILL)
++				arc_buf_thaw(db->db_buf);
++		}
++		mutex_exit(&db->db_mtx);
++		return (dr);
++	}
++
++	/*
++	 * Only valid if not already dirty.
++	 */
++	ASSERT(dn->dn_object == 0 ||
++	    dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
++	    (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
++
++	ASSERT3U(dn->dn_nlevels, >, db->db_level);
++	ASSERT((dn->dn_phys->dn_nlevels == 0 && db->db_level == 0) ||
++	    dn->dn_phys->dn_nlevels > db->db_level ||
++	    dn->dn_next_nlevels[txgoff] > db->db_level ||
++	    dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level ||
++	    dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level);
++
++	/*
++	 * We should only be dirtying in syncing context if it's the
++	 * mos or we're initializing the os or it's a special object.
++	 * However, we are allowed to dirty in syncing context provided
++	 * we already dirtied it in open context.  Hence we must make
++	 * this assertion only if we're not already dirty.
++	 */
++	os = dn->dn_objset;
++	ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
++	    os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp));
++	ASSERT(db->db.db_size != 0);
++
++	dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
++
++	if (db->db_blkid != DMU_BONUS_BLKID) {
++		/*
++		 * Update the accounting.
++		 * Note: we delay "free accounting" until after we drop
++		 * the db_mtx.  This keeps us from grabbing other locks
++		 * (and possibly deadlocking) in bp_get_dsize() while
++		 * also holding the db_mtx.
++		 */
++		dnode_willuse_space(dn, db->db.db_size, tx);
++		do_free_accounting = dbuf_block_freeable(db);
++	}
++
++	/*
++	 * If this buffer is dirty in an old transaction group we need
++	 * to make a copy of it so that the changes we make in this
++	 * transaction group won't leak out when we sync the older txg.
++	 */
++	dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_PUSHPAGE);
++	list_link_init(&dr->dr_dirty_node);
++	if (db->db_level == 0) {
++		void *data_old = db->db_buf;
++
++		if (db->db_state != DB_NOFILL) {
++			if (db->db_blkid == DMU_BONUS_BLKID) {
++				dbuf_fix_old_data(db, tx->tx_txg);
++				data_old = db->db.db_data;
++			} else if (db->db.db_object != DMU_META_DNODE_OBJECT) {
++				/*
++				 * Release the data buffer from the cache so
++				 * that we can modify it without impacting
++				 * possible other users of this cached data
++				 * block.  Note that indirect blocks and
++				 * private objects are not released until the
++				 * syncing state (since they are only modified
++				 * then).
++				 */
++				arc_release(db->db_buf, db);
++				dbuf_fix_old_data(db, tx->tx_txg);
++				data_old = db->db_buf;
++			}
++			ASSERT(data_old != NULL);
++		}
++		dr->dt.dl.dr_data = data_old;
++	} else {
++		mutex_init(&dr->dt.di.dr_mtx, NULL, MUTEX_DEFAULT, NULL);
++		list_create(&dr->dt.di.dr_children,
++		    sizeof (dbuf_dirty_record_t),
++		    offsetof(dbuf_dirty_record_t, dr_dirty_node));
++	}
++	dr->dr_dbuf = db;
++	dr->dr_txg = tx->tx_txg;
++	dr->dr_next = *drp;
++	*drp = dr;
++
++	/*
++	 * We could have been freed_in_flight between the dbuf_noread
++	 * and dbuf_dirty.  We win, as though the dbuf_noread() had
++	 * happened after the free.
++	 */
++	if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
++	    db->db_blkid != DMU_SPILL_BLKID) {
++		mutex_enter(&dn->dn_mtx);
++		dnode_clear_range(dn, db->db_blkid, 1, tx);
++		mutex_exit(&dn->dn_mtx);
++		db->db_freed_in_flight = FALSE;
++	}
++
++	/*
++	 * This buffer is now part of this txg
++	 */
++	dbuf_add_ref(db, (void *)(uintptr_t)tx->tx_txg);
++	db->db_dirtycnt += 1;
++	ASSERT3U(db->db_dirtycnt, <=, 3);
++
++	mutex_exit(&db->db_mtx);
++
++	if (db->db_blkid == DMU_BONUS_BLKID ||
++	    db->db_blkid == DMU_SPILL_BLKID) {
++		mutex_enter(&dn->dn_mtx);
++		ASSERT(!list_link_active(&dr->dr_dirty_node));
++		list_insert_tail(&dn->dn_dirty_records[txgoff], dr);
++		mutex_exit(&dn->dn_mtx);
++		dnode_setdirty(dn, tx);
++		DB_DNODE_EXIT(db);
++		return (dr);
++	} else if (do_free_accounting) {
++		blkptr_t *bp = db->db_blkptr;
++		int64_t willfree = (bp && !BP_IS_HOLE(bp)) ?
++		    bp_get_dsize(os->os_spa, bp) : db->db.db_size;
++		/*
++		 * This is only a guess -- if the dbuf is dirty
++		 * in a previous txg, we don't know how much
++		 * space it will use on disk yet.  We should
++		 * really have the struct_rwlock to access
++		 * db_blkptr, but since this is just a guess,
++		 * it's OK if we get an odd answer.
++		 */
++		ddt_prefetch(os->os_spa, bp);
++		dnode_willuse_space(dn, -willfree, tx);
++	}
++
++	if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
++		rw_enter(&dn->dn_struct_rwlock, RW_READER);
++		drop_struct_lock = TRUE;
++	}
++
++	if (db->db_level == 0) {
++		dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock);
++		ASSERT(dn->dn_maxblkid >= db->db_blkid);
++	}
++
++	if (db->db_level+1 < dn->dn_nlevels) {
++		dmu_buf_impl_t *parent = db->db_parent;
++		dbuf_dirty_record_t *di;
++		int parent_held = FALSE;
++
++		if (db->db_parent == NULL || db->db_parent == dn->dn_dbuf) {
++			int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
++
++			parent = dbuf_hold_level(dn, db->db_level+1,
++			    db->db_blkid >> epbs, FTAG);
++			ASSERT(parent != NULL);
++			parent_held = TRUE;
++		}
++		if (drop_struct_lock)
++			rw_exit(&dn->dn_struct_rwlock);
++		ASSERT3U(db->db_level+1, ==, parent->db_level);
++		di = dbuf_dirty(parent, tx);
++		if (parent_held)
++			dbuf_rele(parent, FTAG);
++
++		mutex_enter(&db->db_mtx);
++		/*  possible race with dbuf_undirty() */
++		if (db->db_last_dirty == dr ||
++		    dn->dn_object == DMU_META_DNODE_OBJECT) {
++			mutex_enter(&di->dt.di.dr_mtx);
++			ASSERT3U(di->dr_txg, ==, tx->tx_txg);
++			ASSERT(!list_link_active(&dr->dr_dirty_node));
++			list_insert_tail(&di->dt.di.dr_children, dr);
++			mutex_exit(&di->dt.di.dr_mtx);
++			dr->dr_parent = di;
++		}
++		mutex_exit(&db->db_mtx);
++	} else {
++		ASSERT(db->db_level+1 == dn->dn_nlevels);
++		ASSERT(db->db_blkid < dn->dn_nblkptr);
++		ASSERT(db->db_parent == NULL || db->db_parent == dn->dn_dbuf);
++		mutex_enter(&dn->dn_mtx);
++		ASSERT(!list_link_active(&dr->dr_dirty_node));
++		list_insert_tail(&dn->dn_dirty_records[txgoff], dr);
++		mutex_exit(&dn->dn_mtx);
++		if (drop_struct_lock)
++			rw_exit(&dn->dn_struct_rwlock);
++	}
++
++	dnode_setdirty(dn, tx);
++	DB_DNODE_EXIT(db);
++	return (dr);
++}
++
++static int
++dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
++{
++	dnode_t *dn;
++	uint64_t txg = tx->tx_txg;
++	dbuf_dirty_record_t *dr, **drp;
++
++	ASSERT(txg != 0);
++	ASSERT(db->db_blkid != DMU_BONUS_BLKID);
++
++	mutex_enter(&db->db_mtx);
++	/*
++	 * If this buffer is not dirty, we're done.
++	 */
++	for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
++		if (dr->dr_txg <= txg)
++			break;
++	if (dr == NULL || dr->dr_txg < txg) {
++		mutex_exit(&db->db_mtx);
++		return (0);
++	}
++	ASSERT(dr->dr_txg == txg);
++	ASSERT(dr->dr_dbuf == db);
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++
++	/*
++	 * If this buffer is currently held, we cannot undirty
++	 * it, since one of the current holders may be in the
++	 * middle of an update.  Note that users of dbuf_undirty()
++	 * should not place a hold on the dbuf before the call.
++	 * Also note: we can get here with a spill block, so
++	 * test for that similar to how dbuf_dirty does.
++	 */
++	if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
++		mutex_exit(&db->db_mtx);
++		/* Make sure we don't toss this buffer at sync phase */
++		if (db->db_blkid != DMU_SPILL_BLKID) {
++			mutex_enter(&dn->dn_mtx);
++			dnode_clear_range(dn, db->db_blkid, 1, tx);
++			mutex_exit(&dn->dn_mtx);
++		}
++		DB_DNODE_EXIT(db);
++		return (0);
++	}
++
++	dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
++
++	ASSERT(db->db.db_size != 0);
++
++	/* XXX would be nice to fix up dn_towrite_space[] */
++
++	*drp = dr->dr_next;
++
++	/*
++	 * Note that there are three places in dbuf_dirty()
++	 * where this dirty record may be put on a list.
++	 * Make sure to do a list_remove corresponding to
++	 * every one of those list_insert calls.
++	 */
++	if (dr->dr_parent) {
++		mutex_enter(&dr->dr_parent->dt.di.dr_mtx);
++		list_remove(&dr->dr_parent->dt.di.dr_children, dr);
++		mutex_exit(&dr->dr_parent->dt.di.dr_mtx);
++	} else if (db->db_blkid == DMU_SPILL_BLKID ||
++	    db->db_level+1 == dn->dn_nlevels) {
++		ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf);
++		mutex_enter(&dn->dn_mtx);
++		list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr);
++		mutex_exit(&dn->dn_mtx);
++	}
++	DB_DNODE_EXIT(db);
++
++	if (db->db_level == 0) {
++		if (db->db_state != DB_NOFILL) {
++			dbuf_unoverride(dr);
++
++			ASSERT(db->db_buf != NULL);
++			ASSERT(dr->dt.dl.dr_data != NULL);
++			if (dr->dt.dl.dr_data != db->db_buf)
++				VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data,
++				    db) == 1);
++		}
++	} else {
++		ASSERT(db->db_buf != NULL);
++		ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
++		mutex_destroy(&dr->dt.di.dr_mtx);
++		list_destroy(&dr->dt.di.dr_children);
++	}
++	kmem_free(dr, sizeof (dbuf_dirty_record_t));
++
++	ASSERT(db->db_dirtycnt > 0);
++	db->db_dirtycnt -= 1;
++
++	if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) {
++		arc_buf_t *buf = db->db_buf;
++
++		ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
++		dbuf_set_data(db, NULL);
++		VERIFY(arc_buf_remove_ref(buf, db) == 1);
++		dbuf_evict(db);
++		return (1);
++	}
++
++	mutex_exit(&db->db_mtx);
++	return (0);
++}
++
++#pragma weak dmu_buf_will_dirty = dbuf_will_dirty
++void
++dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
++{
++	int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
++
++	ASSERT(tx->tx_txg != 0);
++	ASSERT(!refcount_is_zero(&db->db_holds));
++
++	DB_DNODE_ENTER(db);
++	if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock))
++		rf |= DB_RF_HAVESTRUCT;
++	DB_DNODE_EXIT(db);
++	(void) dbuf_read(db, NULL, rf);
++	(void) dbuf_dirty(db, tx);
++}
++
++void
++dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
++
++	db->db_state = DB_NOFILL;
++
++	dmu_buf_will_fill(db_fake, tx);
++}
++
++void
++dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
++
++	ASSERT(db->db_blkid != DMU_BONUS_BLKID);
++	ASSERT(tx->tx_txg != 0);
++	ASSERT(db->db_level == 0);
++	ASSERT(!refcount_is_zero(&db->db_holds));
++
++	ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT ||
++	    dmu_tx_private_ok(tx));
++
++	dbuf_noread(db);
++	(void) dbuf_dirty(db, tx);
++}
++
++#pragma weak dmu_buf_fill_done = dbuf_fill_done
++/* ARGSUSED */
++void
++dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx)
++{
++	mutex_enter(&db->db_mtx);
++	DBUF_VERIFY(db);
++
++	if (db->db_state == DB_FILL) {
++		if (db->db_level == 0 && db->db_freed_in_flight) {
++			ASSERT(db->db_blkid != DMU_BONUS_BLKID);
++			/* we were freed while filling */
++			/* XXX dbuf_undirty? */
++			bzero(db->db.db_data, db->db.db_size);
++			db->db_freed_in_flight = FALSE;
++		}
++		db->db_state = DB_CACHED;
++		cv_broadcast(&db->db_changed);
++	}
++	mutex_exit(&db->db_mtx);
++}
++
++/*
++ * Directly assign a provided arc buf to a given dbuf if it's not referenced
++ * by anybody except our caller. Otherwise copy arcbuf's contents to dbuf.
++ */
++void
++dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
++{
++	ASSERT(!refcount_is_zero(&db->db_holds));
++	ASSERT(db->db_blkid != DMU_BONUS_BLKID);
++	ASSERT(db->db_level == 0);
++	ASSERT(DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA);
++	ASSERT(buf != NULL);
++	ASSERT(arc_buf_size(buf) == db->db.db_size);
++	ASSERT(tx->tx_txg != 0);
++
++	arc_return_buf(buf, db);
++	ASSERT(arc_released(buf));
++
++	mutex_enter(&db->db_mtx);
++
++	while (db->db_state == DB_READ || db->db_state == DB_FILL)
++		cv_wait(&db->db_changed, &db->db_mtx);
++
++	ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED);
++
++	if (db->db_state == DB_CACHED &&
++	    refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) {
++		mutex_exit(&db->db_mtx);
++		(void) dbuf_dirty(db, tx);
++		bcopy(buf->b_data, db->db.db_data, db->db.db_size);
++		VERIFY(arc_buf_remove_ref(buf, db) == 1);
++		xuio_stat_wbuf_copied();
++		return;
++	}
++
++	xuio_stat_wbuf_nocopy();
++	if (db->db_state == DB_CACHED) {
++		dbuf_dirty_record_t *dr = db->db_last_dirty;
++
++		ASSERT(db->db_buf != NULL);
++		if (dr != NULL && dr->dr_txg == tx->tx_txg) {
++			ASSERT(dr->dt.dl.dr_data == db->db_buf);
++			if (!arc_released(db->db_buf)) {
++				ASSERT(dr->dt.dl.dr_override_state ==
++				    DR_OVERRIDDEN);
++				arc_release(db->db_buf, db);
++			}
++			dr->dt.dl.dr_data = buf;
++			VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
++		} else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) {
++			arc_release(db->db_buf, db);
++			VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
++		}
++		db->db_buf = NULL;
++	}
++	ASSERT(db->db_buf == NULL);
++	dbuf_set_data(db, buf);
++	db->db_state = DB_FILL;
++	mutex_exit(&db->db_mtx);
++	(void) dbuf_dirty(db, tx);
++	dbuf_fill_done(db, tx);
++}
++
++/*
++ * "Clear" the contents of this dbuf.  This will mark the dbuf
++ * EVICTING and clear *most* of its references.  Unfortunetely,
++ * when we are not holding the dn_dbufs_mtx, we can't clear the
++ * entry in the dn_dbufs list.  We have to wait until dbuf_destroy()
++ * in this case.  For callers from the DMU we will usually see:
++ *	dbuf_clear()->arc_buf_evict()->dbuf_do_evict()->dbuf_destroy()
++ * For the arc callback, we will usually see:
++ *	dbuf_do_evict()->dbuf_clear();dbuf_destroy()
++ * Sometimes, though, we will get a mix of these two:
++ *	DMU: dbuf_clear()->arc_buf_evict()
++ *	ARC: dbuf_do_evict()->dbuf_destroy()
++ */
++void
++dbuf_clear(dmu_buf_impl_t *db)
++{
++	dnode_t *dn;
++	dmu_buf_impl_t *parent = db->db_parent;
++	dmu_buf_impl_t *dndb;
++	int dbuf_gone = FALSE;
++
++	ASSERT(MUTEX_HELD(&db->db_mtx));
++	ASSERT(refcount_is_zero(&db->db_holds));
++
++	dbuf_evict_user(db);
++
++	if (db->db_state == DB_CACHED) {
++		ASSERT(db->db.db_data != NULL);
++		if (db->db_blkid == DMU_BONUS_BLKID) {
++			zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN);
++			arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
++		}
++		db->db.db_data = NULL;
++		db->db_state = DB_UNCACHED;
++	}
++
++	ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
++	ASSERT(db->db_data_pending == NULL);
++
++	db->db_state = DB_EVICTING;
++	db->db_blkptr = NULL;
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	dndb = dn->dn_dbuf;
++	if (db->db_blkid != DMU_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) {
++		list_remove(&dn->dn_dbufs, db);
++		(void) atomic_dec_32_nv(&dn->dn_dbufs_count);
++		membar_producer();
++		DB_DNODE_EXIT(db);
++		/*
++		 * Decrementing the dbuf count means that the hold corresponding
++		 * to the removed dbuf is no longer discounted in dnode_move(),
++		 * so the dnode cannot be moved until after we release the hold.
++		 * The membar_producer() ensures visibility of the decremented
++		 * value in dnode_move(), since DB_DNODE_EXIT doesn't actually
++		 * release any lock.
++		 */
++		dnode_rele(dn, db);
++		db->db_dnode_handle = NULL;
++	} else {
++		DB_DNODE_EXIT(db);
++	}
++
++	if (db->db_buf)
++		dbuf_gone = arc_buf_evict(db->db_buf);
++
++	if (!dbuf_gone)
++		mutex_exit(&db->db_mtx);
++
++	/*
++	 * If this dbuf is referenced from an indirect dbuf,
++	 * decrement the ref count on the indirect dbuf.
++	 */
++	if (parent && parent != dndb)
++		dbuf_rele(parent, db);
++}
++
++__attribute__((always_inline))
++static inline int
++dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
++    dmu_buf_impl_t **parentp, blkptr_t **bpp, struct dbuf_hold_impl_data *dh)
++{
++	int nlevels, epbs;
++
++	*parentp = NULL;
++	*bpp = NULL;
++
++	ASSERT(blkid != DMU_BONUS_BLKID);
++
++	if (blkid == DMU_SPILL_BLKID) {
++		mutex_enter(&dn->dn_mtx);
++		if (dn->dn_have_spill &&
++		    (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR))
++			*bpp = &dn->dn_phys->dn_spill;
++		else
++			*bpp = NULL;
++		dbuf_add_ref(dn->dn_dbuf, NULL);
++		*parentp = dn->dn_dbuf;
++		mutex_exit(&dn->dn_mtx);
++		return (0);
++	}
++
++	if (dn->dn_phys->dn_nlevels == 0)
++		nlevels = 1;
++	else
++		nlevels = dn->dn_phys->dn_nlevels;
++
++	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
++
++	ASSERT3U(level * epbs, <, 64);
++	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
++	if (level >= nlevels ||
++	    (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) {
++		/* the buffer has no parent yet */
++		return (ENOENT);
++	} else if (level < nlevels-1) {
++		/* this block is referenced from an indirect block */
++		int err;
++		if (dh == NULL) {
++			err = dbuf_hold_impl(dn, level+1, blkid >> epbs,
++					fail_sparse, NULL, parentp);
++		}
++		else {
++			__dbuf_hold_impl_init(dh + 1, dn, dh->dh_level + 1,
++					blkid >> epbs, fail_sparse, NULL,
++					parentp, dh->dh_depth + 1);
++			err = __dbuf_hold_impl(dh + 1);
++		}
++		if (err)
++			return (err);
++		err = dbuf_read(*parentp, NULL,
++		    (DB_RF_HAVESTRUCT | DB_RF_NOPREFETCH | DB_RF_CANFAIL));
++		if (err) {
++			dbuf_rele(*parentp, NULL);
++			*parentp = NULL;
++			return (err);
++		}
++		*bpp = ((blkptr_t *)(*parentp)->db.db_data) +
++		    (blkid & ((1ULL << epbs) - 1));
++		return (0);
++	} else {
++		/* the block is referenced from the dnode */
++		ASSERT3U(level, ==, nlevels-1);
++		ASSERT(dn->dn_phys->dn_nblkptr == 0 ||
++		    blkid < dn->dn_phys->dn_nblkptr);
++		if (dn->dn_dbuf) {
++			dbuf_add_ref(dn->dn_dbuf, NULL);
++			*parentp = dn->dn_dbuf;
++		}
++		*bpp = &dn->dn_phys->dn_blkptr[blkid];
++		return (0);
++	}
++}
++
++static dmu_buf_impl_t *
++dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
++    dmu_buf_impl_t *parent, blkptr_t *blkptr)
++{
++	objset_t *os = dn->dn_objset;
++	dmu_buf_impl_t *db, *odb;
++
++	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
++	ASSERT(dn->dn_type != DMU_OT_NONE);
++
++	db = kmem_cache_alloc(dbuf_cache, KM_PUSHPAGE);
++
++	db->db_objset = os;
++	db->db.db_object = dn->dn_object;
++	db->db_level = level;
++	db->db_blkid = blkid;
++	db->db_last_dirty = NULL;
++	db->db_dirtycnt = 0;
++	db->db_dnode_handle = dn->dn_handle;
++	db->db_parent = parent;
++	db->db_blkptr = blkptr;
++
++	db->db_user_ptr = NULL;
++	db->db_user_data_ptr_ptr = NULL;
++	db->db_evict_func = NULL;
++	db->db_immediate_evict = 0;
++	db->db_freed_in_flight = 0;
++
++	if (blkid == DMU_BONUS_BLKID) {
++		ASSERT3P(parent, ==, dn->dn_dbuf);
++		db->db.db_size = DN_MAX_BONUSLEN -
++		    (dn->dn_nblkptr-1) * sizeof (blkptr_t);
++		ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
++		db->db.db_offset = DMU_BONUS_BLKID;
++		db->db_state = DB_UNCACHED;
++		/* the bonus dbuf is not placed in the hash table */
++		arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
++		return (db);
++	} else if (blkid == DMU_SPILL_BLKID) {
++		db->db.db_size = (blkptr != NULL) ?
++		    BP_GET_LSIZE(blkptr) : SPA_MINBLOCKSIZE;
++		db->db.db_offset = 0;
++	} else {
++		int blocksize =
++		    db->db_level ? 1<<dn->dn_indblkshift :  dn->dn_datablksz;
++		db->db.db_size = blocksize;
++		db->db.db_offset = db->db_blkid * blocksize;
++	}
++
++	/*
++	 * Hold the dn_dbufs_mtx while we get the new dbuf
++	 * in the hash table *and* added to the dbufs list.
++	 * This prevents a possible deadlock with someone
++	 * trying to look up this dbuf before its added to the
++	 * dn_dbufs list.
++	 */
++	mutex_enter(&dn->dn_dbufs_mtx);
++	db->db_state = DB_EVICTING;
++	if ((odb = dbuf_hash_insert(db)) != NULL) {
++		/* someone else inserted it first */
++		kmem_cache_free(dbuf_cache, db);
++		mutex_exit(&dn->dn_dbufs_mtx);
++		return (odb);
++	}
++	list_insert_head(&dn->dn_dbufs, db);
++	db->db_state = DB_UNCACHED;
++	mutex_exit(&dn->dn_dbufs_mtx);
++	arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
++
++	if (parent && parent != dn->dn_dbuf)
++		dbuf_add_ref(parent, db);
++
++	ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
++	    refcount_count(&dn->dn_holds) > 0);
++	(void) refcount_add(&dn->dn_holds, db);
++	(void) atomic_inc_32_nv(&dn->dn_dbufs_count);
++
++	dprintf_dbuf(db, "db=%p\n", db);
++
++	return (db);
++}
++
++static int
++dbuf_do_evict(void *private)
++{
++	arc_buf_t *buf = private;
++	dmu_buf_impl_t *db = buf->b_private;
++
++	if (!MUTEX_HELD(&db->db_mtx))
++		mutex_enter(&db->db_mtx);
++
++	ASSERT(refcount_is_zero(&db->db_holds));
++
++	if (db->db_state != DB_EVICTING) {
++		ASSERT(db->db_state == DB_CACHED);
++		DBUF_VERIFY(db);
++		db->db_buf = NULL;
++		dbuf_evict(db);
++	} else {
++		mutex_exit(&db->db_mtx);
++		dbuf_destroy(db);
++	}
++	return (0);
++}
++
++static void
++dbuf_destroy(dmu_buf_impl_t *db)
++{
++	ASSERT(refcount_is_zero(&db->db_holds));
++
++	if (db->db_blkid != DMU_BONUS_BLKID) {
++		/*
++		 * If this dbuf is still on the dn_dbufs list,
++		 * remove it from that list.
++		 */
++		if (db->db_dnode_handle != NULL) {
++			dnode_t *dn;
++
++			DB_DNODE_ENTER(db);
++			dn = DB_DNODE(db);
++			mutex_enter(&dn->dn_dbufs_mtx);
++			list_remove(&dn->dn_dbufs, db);
++			(void) atomic_dec_32_nv(&dn->dn_dbufs_count);
++			mutex_exit(&dn->dn_dbufs_mtx);
++			DB_DNODE_EXIT(db);
++			/*
++			 * Decrementing the dbuf count means that the hold
++			 * corresponding to the removed dbuf is no longer
++			 * discounted in dnode_move(), so the dnode cannot be
++			 * moved until after we release the hold.
++			 */
++			dnode_rele(dn, db);
++			db->db_dnode_handle = NULL;
++		}
++		dbuf_hash_remove(db);
++	}
++	db->db_parent = NULL;
++	db->db_buf = NULL;
++
++	ASSERT(!list_link_active(&db->db_link));
++	ASSERT(db->db.db_data == NULL);
++	ASSERT(db->db_hash_next == NULL);
++	ASSERT(db->db_blkptr == NULL);
++	ASSERT(db->db_data_pending == NULL);
++
++	kmem_cache_free(dbuf_cache, db);
++	arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
++}
++
++void
++dbuf_prefetch(dnode_t *dn, uint64_t blkid)
++{
++	dmu_buf_impl_t *db = NULL;
++	blkptr_t *bp = NULL;
++
++	ASSERT(blkid != DMU_BONUS_BLKID);
++	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
++
++	if (dnode_block_freed(dn, blkid))
++		return;
++
++	/* dbuf_find() returns with db_mtx held */
++	if ((db = dbuf_find(dn, 0, blkid))) {
++		/*
++		 * This dbuf is already in the cache.  We assume that
++		 * it is already CACHED, or else about to be either
++		 * read or filled.
++		 */
++		mutex_exit(&db->db_mtx);
++		return;
++	}
++
++	if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp, NULL) == 0) {
++		if (bp && !BP_IS_HOLE(bp)) {
++			int priority = dn->dn_type == DMU_OT_DDT_ZAP ?
++			    ZIO_PRIORITY_DDT_PREFETCH : ZIO_PRIORITY_ASYNC_READ;
++			arc_buf_t *pbuf;
++			dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
++			uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
++			zbookmark_t zb;
++
++			SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
++			    dn->dn_object, 0, blkid);
++
++			if (db)
++				pbuf = db->db_buf;
++			else
++				pbuf = dn->dn_objset->os_phys_buf;
++
++			(void) dsl_read(NULL, dn->dn_objset->os_spa,
++			    bp, pbuf, NULL, NULL, priority,
++			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
++			    &aflags, &zb);
++		}
++		if (db)
++			dbuf_rele(db, NULL);
++	}
++}
++
++#define DBUF_HOLD_IMPL_MAX_DEPTH	20
++
++/*
++ * Returns with db_holds incremented, and db_mtx not held.
++ * Note: dn_struct_rwlock must be held.
++ */
++static int
++__dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
++{
++	ASSERT3S(dh->dh_depth, <, DBUF_HOLD_IMPL_MAX_DEPTH);
++	dh->dh_parent = NULL;
++
++	ASSERT(dh->dh_blkid != DMU_BONUS_BLKID);
++	ASSERT(RW_LOCK_HELD(&dh->dh_dn->dn_struct_rwlock));
++	ASSERT3U(dh->dh_dn->dn_nlevels, >, dh->dh_level);
++
++	*(dh->dh_dbp) = NULL;
++top:
++	/* dbuf_find() returns with db_mtx held */
++	dh->dh_db = dbuf_find(dh->dh_dn, dh->dh_level, dh->dh_blkid);
++
++	if (dh->dh_db == NULL) {
++		dh->dh_bp = NULL;
++
++		ASSERT3P(dh->dh_parent, ==, NULL);
++		dh->dh_err = dbuf_findbp(dh->dh_dn, dh->dh_level, dh->dh_blkid,
++					dh->dh_fail_sparse, &dh->dh_parent,
++					&dh->dh_bp, dh);
++		if (dh->dh_fail_sparse) {
++			if (dh->dh_err == 0 && dh->dh_bp && BP_IS_HOLE(dh->dh_bp))
++				dh->dh_err = ENOENT;
++			if (dh->dh_err) {
++				if (dh->dh_parent)
++					dbuf_rele(dh->dh_parent, NULL);
++				return (dh->dh_err);
++			}
++		}
++		if (dh->dh_err && dh->dh_err != ENOENT)
++			return (dh->dh_err);
++		dh->dh_db = dbuf_create(dh->dh_dn, dh->dh_level, dh->dh_blkid,
++					dh->dh_parent, dh->dh_bp);
++	}
++
++	if (dh->dh_db->db_buf && refcount_is_zero(&dh->dh_db->db_holds)) {
++		arc_buf_add_ref(dh->dh_db->db_buf, dh->dh_db);
++		if (dh->dh_db->db_buf->b_data == NULL) {
++			dbuf_clear(dh->dh_db);
++			if (dh->dh_parent) {
++				dbuf_rele(dh->dh_parent, NULL);
++				dh->dh_parent = NULL;
++			}
++			goto top;
++		}
++		ASSERT3P(dh->dh_db->db.db_data, ==, dh->dh_db->db_buf->b_data);
++	}
++
++	ASSERT(dh->dh_db->db_buf == NULL || arc_referenced(dh->dh_db->db_buf));
++
++	/*
++	 * If this buffer is currently syncing out, and we are are
++	 * still referencing it from db_data, we need to make a copy
++	 * of it in case we decide we want to dirty it again in this txg.
++	 */
++	if (dh->dh_db->db_level == 0 &&
++	    dh->dh_db->db_blkid != DMU_BONUS_BLKID &&
++	    dh->dh_dn->dn_object != DMU_META_DNODE_OBJECT &&
++	    dh->dh_db->db_state == DB_CACHED && dh->dh_db->db_data_pending) {
++		dh->dh_dr = dh->dh_db->db_data_pending;
++
++		if (dh->dh_dr->dt.dl.dr_data == dh->dh_db->db_buf) {
++			dh->dh_type = DBUF_GET_BUFC_TYPE(dh->dh_db);
++
++			dbuf_set_data(dh->dh_db,
++			    arc_buf_alloc(dh->dh_dn->dn_objset->os_spa,
++			    dh->dh_db->db.db_size, dh->dh_db, dh->dh_type));
++			bcopy(dh->dh_dr->dt.dl.dr_data->b_data,
++			    dh->dh_db->db.db_data, dh->dh_db->db.db_size);
++		}
++	}
++
++	(void) refcount_add(&dh->dh_db->db_holds, dh->dh_tag);
++	dbuf_update_data(dh->dh_db);
++	DBUF_VERIFY(dh->dh_db);
++	mutex_exit(&dh->dh_db->db_mtx);
++
++	/* NOTE: we can't rele the parent until after we drop the db_mtx */
++	if (dh->dh_parent)
++		dbuf_rele(dh->dh_parent, NULL);
++
++	ASSERT3P(DB_DNODE(dh->dh_db), ==, dh->dh_dn);
++	ASSERT3U(dh->dh_db->db_blkid, ==, dh->dh_blkid);
++	ASSERT3U(dh->dh_db->db_level, ==, dh->dh_level);
++	*(dh->dh_dbp) = dh->dh_db;
++
++	return (0);
++}
++
++/*
++ * The following code preserves the recursive function dbuf_hold_impl()
++ * but moves the local variables AND function arguments to the heap to
++ * minimize the stack frame size.  Enough space is initially allocated
++ * on the stack for 20 levels of recursion.
++ */
++int
++dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
++    void *tag, dmu_buf_impl_t **dbp)
++{
++	struct dbuf_hold_impl_data *dh;
++	int error;
++
++	dh = kmem_zalloc(sizeof(struct dbuf_hold_impl_data) *
++	    DBUF_HOLD_IMPL_MAX_DEPTH, KM_PUSHPAGE);
++	__dbuf_hold_impl_init(dh, dn, level, blkid, fail_sparse, tag, dbp, 0);
++
++	error = __dbuf_hold_impl(dh);
++
++	kmem_free(dh, sizeof(struct dbuf_hold_impl_data) *
++	    DBUF_HOLD_IMPL_MAX_DEPTH);
++
++	return (error);
++}
++
++static void
++__dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh,
++    dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
++    void *tag, dmu_buf_impl_t **dbp, int depth)
++{
++	dh->dh_dn = dn;
++	dh->dh_level = level;
++	dh->dh_blkid = blkid;
++	dh->dh_fail_sparse = fail_sparse;
++	dh->dh_tag = tag;
++	dh->dh_dbp = dbp;
++	dh->dh_depth = depth;
++}
++
++dmu_buf_impl_t *
++dbuf_hold(dnode_t *dn, uint64_t blkid, void *tag)
++{
++	dmu_buf_impl_t *db;
++	int err = dbuf_hold_impl(dn, 0, blkid, FALSE, tag, &db);
++	return (err ? NULL : db);
++}
++
++dmu_buf_impl_t *
++dbuf_hold_level(dnode_t *dn, int level, uint64_t blkid, void *tag)
++{
++	dmu_buf_impl_t *db;
++	int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db);
++	return (err ? NULL : db);
++}
++
++void
++dbuf_create_bonus(dnode_t *dn)
++{
++	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
++
++	ASSERT(dn->dn_bonus == NULL);
++	dn->dn_bonus = dbuf_create(dn, 0, DMU_BONUS_BLKID, dn->dn_dbuf, NULL);
++}
++
++int
++dbuf_spill_set_blksz(dmu_buf_t *db_fake, uint64_t blksz, dmu_tx_t *tx)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
++	dnode_t *dn;
++
++	if (db->db_blkid != DMU_SPILL_BLKID)
++		return (ENOTSUP);
++	if (blksz == 0)
++		blksz = SPA_MINBLOCKSIZE;
++	if (blksz > SPA_MAXBLOCKSIZE)
++		blksz = SPA_MAXBLOCKSIZE;
++	else
++		blksz = P2ROUNDUP(blksz, SPA_MINBLOCKSIZE);
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
++	dbuf_new_size(db, blksz, tx);
++	rw_exit(&dn->dn_struct_rwlock);
++	DB_DNODE_EXIT(db);
++
++	return (0);
++}
++
++void
++dbuf_rm_spill(dnode_t *dn, dmu_tx_t *tx)
++{
++	dbuf_free_range(dn, DMU_SPILL_BLKID, DMU_SPILL_BLKID, tx);
++}
++
++#pragma weak dmu_buf_add_ref = dbuf_add_ref
++void
++dbuf_add_ref(dmu_buf_impl_t *db, void *tag)
++{
++	VERIFY(refcount_add(&db->db_holds, tag) > 1);
++}
++
++/*
++ * If you call dbuf_rele() you had better not be referencing the dnode handle
++ * unless you have some other direct or indirect hold on the dnode. (An indirect
++ * hold is a hold on one of the dnode's dbufs, including the bonus buffer.)
++ * Without that, the dbuf_rele() could lead to a dnode_rele() followed by the
++ * dnode's parent dbuf evicting its dnode handles.
++ */
++#pragma weak dmu_buf_rele = dbuf_rele
++void
++dbuf_rele(dmu_buf_impl_t *db, void *tag)
++{
++	mutex_enter(&db->db_mtx);
++	dbuf_rele_and_unlock(db, tag);
++}
++
++/*
++ * dbuf_rele() for an already-locked dbuf.  This is necessary to allow
++ * db_dirtycnt and db_holds to be updated atomically.
++ */
++void
++dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
++{
++	int64_t holds;
++
++	ASSERT(MUTEX_HELD(&db->db_mtx));
++	DBUF_VERIFY(db);
++
++	/*
++	 * Remove the reference to the dbuf before removing its hold on the
++	 * dnode so we can guarantee in dnode_move() that a referenced bonus
++	 * buffer has a corresponding dnode hold.
++	 */
++	holds = refcount_remove(&db->db_holds, tag);
++	ASSERT(holds >= 0);
++
++	/*
++	 * We can't freeze indirects if there is a possibility that they
++	 * may be modified in the current syncing context.
++	 */
++	if (db->db_buf && holds == (db->db_level == 0 ? db->db_dirtycnt : 0))
++		arc_buf_freeze(db->db_buf);
++
++	if (holds == db->db_dirtycnt &&
++	    db->db_level == 0 && db->db_immediate_evict)
++		dbuf_evict_user(db);
++
++	if (holds == 0) {
++		if (db->db_blkid == DMU_BONUS_BLKID) {
++			mutex_exit(&db->db_mtx);
++
++			/*
++			 * If the dnode moves here, we cannot cross this barrier
++			 * until the move completes.
++			 */
++			DB_DNODE_ENTER(db);
++			(void) atomic_dec_32_nv(&DB_DNODE(db)->dn_dbufs_count);
++			DB_DNODE_EXIT(db);
++			/*
++			 * The bonus buffer's dnode hold is no longer discounted
++			 * in dnode_move(). The dnode cannot move until after
++			 * the dnode_rele().
++			 */
++			dnode_rele(DB_DNODE(db), db);
++		} else if (db->db_buf == NULL) {
++			/*
++			 * This is a special case: we never associated this
++			 * dbuf with any data allocated from the ARC.
++			 */
++			ASSERT(db->db_state == DB_UNCACHED ||
++			    db->db_state == DB_NOFILL);
++			dbuf_evict(db);
++		} else if (arc_released(db->db_buf)) {
++			arc_buf_t *buf = db->db_buf;
++			/*
++			 * This dbuf has anonymous data associated with it.
++			 */
++			dbuf_set_data(db, NULL);
++			VERIFY(arc_buf_remove_ref(buf, db) == 1);
++			dbuf_evict(db);
++		} else {
++			VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
++			if (!DBUF_IS_CACHEABLE(db))
++				dbuf_clear(db);
++			else
++				mutex_exit(&db->db_mtx);
++		}
++	} else {
++		mutex_exit(&db->db_mtx);
++	}
++}
++
++#pragma weak dmu_buf_refcount = dbuf_refcount
++uint64_t
++dbuf_refcount(dmu_buf_impl_t *db)
++{
++	return (refcount_count(&db->db_holds));
++}
++
++void *
++dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr, void *user_data_ptr_ptr,
++    dmu_buf_evict_func_t *evict_func)
++{
++	return (dmu_buf_update_user(db_fake, NULL, user_ptr,
++	    user_data_ptr_ptr, evict_func));
++}
++
++void *
++dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr, void *user_data_ptr_ptr,
++    dmu_buf_evict_func_t *evict_func)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
++
++	db->db_immediate_evict = TRUE;
++	return (dmu_buf_update_user(db_fake, NULL, user_ptr,
++	    user_data_ptr_ptr, evict_func));
++}
++
++void *
++dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr,
++    void *user_data_ptr_ptr, dmu_buf_evict_func_t *evict_func)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
++	ASSERT(db->db_level == 0);
++
++	ASSERT((user_ptr == NULL) == (evict_func == NULL));
++
++	mutex_enter(&db->db_mtx);
++
++	if (db->db_user_ptr == old_user_ptr) {
++		db->db_user_ptr = user_ptr;
++		db->db_user_data_ptr_ptr = user_data_ptr_ptr;
++		db->db_evict_func = evict_func;
++
++		dbuf_update_data(db);
++	} else {
++		old_user_ptr = db->db_user_ptr;
++	}
++
++	mutex_exit(&db->db_mtx);
++	return (old_user_ptr);
++}
++
++void *
++dmu_buf_get_user(dmu_buf_t *db_fake)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
++	ASSERT(!refcount_is_zero(&db->db_holds));
++
++	return (db->db_user_ptr);
++}
++
++boolean_t
++dmu_buf_freeable(dmu_buf_t *dbuf)
++{
++	boolean_t res = B_FALSE;
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
++
++	if (db->db_blkptr)
++		res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset,
++		    db->db_blkptr, db->db_blkptr->blk_birth);
++
++	return (res);
++}
++
++static void
++dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
++{
++	/* ASSERT(dmu_tx_is_syncing(tx) */
++	ASSERT(MUTEX_HELD(&db->db_mtx));
++
++	if (db->db_blkptr != NULL)
++		return;
++
++	if (db->db_blkid == DMU_SPILL_BLKID) {
++		db->db_blkptr = &dn->dn_phys->dn_spill;
++		BP_ZERO(db->db_blkptr);
++		return;
++	}
++	if (db->db_level == dn->dn_phys->dn_nlevels-1) {
++		/*
++		 * This buffer was allocated at a time when there was
++		 * no available blkptrs from the dnode, or it was
++		 * inappropriate to hook it in (i.e., nlevels mis-match).
++		 */
++		ASSERT(db->db_blkid < dn->dn_phys->dn_nblkptr);
++		ASSERT(db->db_parent == NULL);
++		db->db_parent = dn->dn_dbuf;
++		db->db_blkptr = &dn->dn_phys->dn_blkptr[db->db_blkid];
++		DBUF_VERIFY(db);
++	} else {
++		dmu_buf_impl_t *parent = db->db_parent;
++		int epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
++
++		ASSERT(dn->dn_phys->dn_nlevels > 1);
++		if (parent == NULL) {
++			mutex_exit(&db->db_mtx);
++			rw_enter(&dn->dn_struct_rwlock, RW_READER);
++			(void) dbuf_hold_impl(dn, db->db_level+1,
++			    db->db_blkid >> epbs, FALSE, db, &parent);
++			rw_exit(&dn->dn_struct_rwlock);
++			mutex_enter(&db->db_mtx);
++			db->db_parent = parent;
++		}
++		db->db_blkptr = (blkptr_t *)parent->db.db_data +
++		    (db->db_blkid & ((1ULL << epbs) - 1));
++		DBUF_VERIFY(db);
++	}
++}
++
++/* dbuf_sync_indirect() is called recursively from dbuf_sync_list() so it
++ * is critical the we not allow the compiler to inline this function in to
++ * dbuf_sync_list() thereby drastically bloating the stack usage.
++ */
++noinline static void
++dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
++{
++	dmu_buf_impl_t *db = dr->dr_dbuf;
++	dnode_t *dn;
++	zio_t *zio;
++
++	ASSERT(dmu_tx_is_syncing(tx));
++
++	dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr);
++
++	mutex_enter(&db->db_mtx);
++
++	ASSERT(db->db_level > 0);
++	DBUF_VERIFY(db);
++
++	if (db->db_buf == NULL) {
++		mutex_exit(&db->db_mtx);
++		(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
++		mutex_enter(&db->db_mtx);
++	}
++	ASSERT3U(db->db_state, ==, DB_CACHED);
++	ASSERT(db->db_buf != NULL);
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
++	dbuf_check_blkptr(dn, db);
++	DB_DNODE_EXIT(db);
++
++	db->db_data_pending = dr;
++
++	mutex_exit(&db->db_mtx);
++	dbuf_write(dr, db->db_buf, tx);
++
++	zio = dr->dr_zio;
++	mutex_enter(&dr->dt.di.dr_mtx);
++	dbuf_sync_list(&dr->dt.di.dr_children, tx);
++	ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
++	mutex_exit(&dr->dt.di.dr_mtx);
++	zio_nowait(zio);
++}
++
++/* dbuf_sync_leaf() is called recursively from dbuf_sync_list() so it is
++ * critical the we not allow the compiler to inline this function in to
++ * dbuf_sync_list() thereby drastically bloating the stack usage.
++ */
++noinline static void
++dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
++{
++	arc_buf_t **datap = &dr->dt.dl.dr_data;
++	dmu_buf_impl_t *db = dr->dr_dbuf;
++	dnode_t *dn;
++	objset_t *os;
++	uint64_t txg = tx->tx_txg;
++
++	ASSERT(dmu_tx_is_syncing(tx));
++
++	dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr);
++
++	mutex_enter(&db->db_mtx);
++	/*
++	 * To be synced, we must be dirtied.  But we
++	 * might have been freed after the dirty.
++	 */
++	if (db->db_state == DB_UNCACHED) {
++		/* This buffer has been freed since it was dirtied */
++		ASSERT(db->db.db_data == NULL);
++	} else if (db->db_state == DB_FILL) {
++		/* This buffer was freed and is now being re-filled */
++		ASSERT(db->db.db_data != dr->dt.dl.dr_data);
++	} else {
++		ASSERT(db->db_state == DB_CACHED || db->db_state == DB_NOFILL);
++	}
++	DBUF_VERIFY(db);
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++
++	if (db->db_blkid == DMU_SPILL_BLKID) {
++		mutex_enter(&dn->dn_mtx);
++		dn->dn_phys->dn_flags |= DNODE_FLAG_SPILL_BLKPTR;
++		mutex_exit(&dn->dn_mtx);
++	}
++
++	/*
++	 * If this is a bonus buffer, simply copy the bonus data into the
++	 * dnode.  It will be written out when the dnode is synced (and it
++	 * will be synced, since it must have been dirty for dbuf_sync to
++	 * be called).
++	 */
++	if (db->db_blkid == DMU_BONUS_BLKID) {
++		dbuf_dirty_record_t **drp;
++
++		ASSERT(*datap != NULL);
++		ASSERT3U(db->db_level, ==, 0);
++		ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN);
++		bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen);
++		DB_DNODE_EXIT(db);
++
++		if (*datap != db->db.db_data) {
++			zio_buf_free(*datap, DN_MAX_BONUSLEN);
++			arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
++		}
++		db->db_data_pending = NULL;
++		drp = &db->db_last_dirty;
++		while (*drp != dr)
++			drp = &(*drp)->dr_next;
++		ASSERT(dr->dr_next == NULL);
++		ASSERT(dr->dr_dbuf == db);
++		*drp = dr->dr_next;
++		if (dr->dr_dbuf->db_level != 0) {
++			mutex_destroy(&dr->dt.di.dr_mtx);
++			list_destroy(&dr->dt.di.dr_children);
++		}
++		kmem_free(dr, sizeof (dbuf_dirty_record_t));
++		ASSERT(db->db_dirtycnt > 0);
++		db->db_dirtycnt -= 1;
++		dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
++		return;
++	}
++
++	os = dn->dn_objset;
++
++	/*
++	 * This function may have dropped the db_mtx lock allowing a dmu_sync
++	 * operation to sneak in. As a result, we need to ensure that we
++	 * don't check the dr_override_state until we have returned from
++	 * dbuf_check_blkptr.
++	 */
++	dbuf_check_blkptr(dn, db);
++
++	/*
++	 * If this buffer is in the middle of an immediate write,
++	 * wait for the synchronous IO to complete.
++	 */
++	while (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC) {
++		ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
++		cv_wait(&db->db_changed, &db->db_mtx);
++		ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN);
++	}
++
++	if (db->db_state != DB_NOFILL &&
++	    dn->dn_object != DMU_META_DNODE_OBJECT &&
++	    refcount_count(&db->db_holds) > 1 &&
++	    dr->dt.dl.dr_override_state != DR_OVERRIDDEN &&
++	    *datap == db->db_buf) {
++		/*
++		 * If this buffer is currently "in use" (i.e., there
++		 * are active holds and db_data still references it),
++		 * then make a copy before we start the write so that
++		 * any modifications from the open txg will not leak
++		 * into this write.
++		 *
++		 * NOTE: this copy does not need to be made for
++		 * objects only modified in the syncing context (e.g.
++		 * DNONE_DNODE blocks).
++		 */
++		int blksz = arc_buf_size(*datap);
++		arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
++		*datap = arc_buf_alloc(os->os_spa, blksz, db, type);
++		bcopy(db->db.db_data, (*datap)->b_data, blksz);
++	}
++	db->db_data_pending = dr;
++
++	mutex_exit(&db->db_mtx);
++
++	dbuf_write(dr, *datap, tx);
++
++	ASSERT(!list_link_active(&dr->dr_dirty_node));
++	if (dn->dn_object == DMU_META_DNODE_OBJECT) {
++		list_insert_tail(&dn->dn_dirty_records[txg&TXG_MASK], dr);
++		DB_DNODE_EXIT(db);
++	} else {
++		/*
++		 * Although zio_nowait() does not "wait for an IO", it does
++		 * initiate the IO. If this is an empty write it seems plausible
++		 * that the IO could actually be completed before the nowait
++		 * returns. We need to DB_DNODE_EXIT() first in case
++		 * zio_nowait() invalidates the dbuf.
++		 */
++		DB_DNODE_EXIT(db);
++		zio_nowait(dr->dr_zio);
++	}
++}
++
++void
++dbuf_sync_list(list_t *list, dmu_tx_t *tx)
++{
++	dbuf_dirty_record_t *dr;
++
++	while ((dr = list_head(list))) {
++		if (dr->dr_zio != NULL) {
++			/*
++			 * If we find an already initialized zio then we
++			 * are processing the meta-dnode, and we have finished.
++			 * The dbufs for all dnodes are put back on the list
++			 * during processing, so that we can zio_wait()
++			 * these IOs after initiating all child IOs.
++			 */
++			ASSERT3U(dr->dr_dbuf->db.db_object, ==,
++			    DMU_META_DNODE_OBJECT);
++			break;
++		}
++		list_remove(list, dr);
++		if (dr->dr_dbuf->db_level > 0)
++			dbuf_sync_indirect(dr, tx);
++		else
++			dbuf_sync_leaf(dr, tx);
++	}
++}
++
++/* ARGSUSED */
++static void
++dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
++{
++	dmu_buf_impl_t *db = vdb;
++	dnode_t *dn;
++	blkptr_t *bp = zio->io_bp;
++	blkptr_t *bp_orig = &zio->io_bp_orig;
++	spa_t *spa = zio->io_spa;
++	int64_t delta;
++	uint64_t fill = 0;
++	int i;
++
++	ASSERT(db->db_blkptr == bp);
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	delta = bp_get_dsize_sync(spa, bp) - bp_get_dsize_sync(spa, bp_orig);
++	dnode_diduse_space(dn, delta - zio->io_prev_space_delta);
++	zio->io_prev_space_delta = delta;
++
++	if (BP_IS_HOLE(bp)) {
++		ASSERT(bp->blk_fill == 0);
++		DB_DNODE_EXIT(db);
++		return;
++	}
++
++	ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
++	    BP_GET_TYPE(bp) == dn->dn_type) ||
++	    (db->db_blkid == DMU_SPILL_BLKID &&
++	    BP_GET_TYPE(bp) == dn->dn_bonustype));
++	ASSERT(BP_GET_LEVEL(bp) == db->db_level);
++
++	mutex_enter(&db->db_mtx);
++
++#ifdef ZFS_DEBUG
++	if (db->db_blkid == DMU_SPILL_BLKID) {
++		ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
++		ASSERT(!(BP_IS_HOLE(db->db_blkptr)) &&
++		    db->db_blkptr == &dn->dn_phys->dn_spill);
++	}
++#endif
++
++	if (db->db_level == 0) {
++		mutex_enter(&dn->dn_mtx);
++		if (db->db_blkid > dn->dn_phys->dn_maxblkid &&
++		    db->db_blkid != DMU_SPILL_BLKID)
++			dn->dn_phys->dn_maxblkid = db->db_blkid;
++		mutex_exit(&dn->dn_mtx);
++
++		if (dn->dn_type == DMU_OT_DNODE) {
++			dnode_phys_t *dnp = db->db.db_data;
++			for (i = db->db.db_size >> DNODE_SHIFT; i > 0;
++			    i--, dnp++) {
++				if (dnp->dn_type != DMU_OT_NONE)
++					fill++;
++			}
++		} else {
++			fill = 1;
++		}
++	} else {
++		blkptr_t *ibp = db->db.db_data;
++		ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
++		for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, ibp++) {
++			if (BP_IS_HOLE(ibp))
++				continue;
++			fill += ibp->blk_fill;
++		}
++	}
++	DB_DNODE_EXIT(db);
++
++	bp->blk_fill = fill;
++
++	mutex_exit(&db->db_mtx);
++}
++
++/* ARGSUSED */
++static void
++dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
++{
++	dmu_buf_impl_t *db = vdb;
++	blkptr_t *bp = zio->io_bp;
++	blkptr_t *bp_orig = &zio->io_bp_orig;
++	uint64_t txg = zio->io_txg;
++	dbuf_dirty_record_t **drp, *dr;
++
++	ASSERT3U(zio->io_error, ==, 0);
++	ASSERT(db->db_blkptr == bp);
++
++	if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
++		ASSERT(BP_EQUAL(bp, bp_orig));
++	} else {
++		objset_t *os;
++		dsl_dataset_t *ds;
++		dmu_tx_t *tx;
++
++		DB_GET_OBJSET(&os, db);
++		ds = os->os_dsl_dataset;
++		tx = os->os_synctx;
++
++		(void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
++		dsl_dataset_block_born(ds, bp, tx);
++	}
++
++	mutex_enter(&db->db_mtx);
++
++	DBUF_VERIFY(db);
++
++	drp = &db->db_last_dirty;
++	while ((dr = *drp) != db->db_data_pending)
++		drp = &dr->dr_next;
++	ASSERT(!list_link_active(&dr->dr_dirty_node));
++	ASSERT(dr->dr_txg == txg);
++	ASSERT(dr->dr_dbuf == db);
++	ASSERT(dr->dr_next == NULL);
++	*drp = dr->dr_next;
++
++#ifdef ZFS_DEBUG
++	if (db->db_blkid == DMU_SPILL_BLKID) {
++		dnode_t *dn;
++
++		DB_DNODE_ENTER(db);
++		dn = DB_DNODE(db);
++		ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
++		ASSERT(!(BP_IS_HOLE(db->db_blkptr)) &&
++		    db->db_blkptr == &dn->dn_phys->dn_spill);
++		DB_DNODE_EXIT(db);
++	}
++#endif
++
++	if (db->db_level == 0) {
++		ASSERT(db->db_blkid != DMU_BONUS_BLKID);
++		ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN);
++		if (db->db_state != DB_NOFILL) {
++			if (dr->dt.dl.dr_data != db->db_buf)
++				VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data,
++				    db) == 1);
++			else if (!arc_released(db->db_buf))
++				arc_set_callback(db->db_buf, dbuf_do_evict, db);
++		}
++	} else {
++		dnode_t *dn;
++
++		DB_DNODE_ENTER(db);
++		dn = DB_DNODE(db);
++		ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
++		ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
++		if (!BP_IS_HOLE(db->db_blkptr)) {
++			ASSERTV(int epbs = dn->dn_phys->dn_indblkshift -
++			    SPA_BLKPTRSHIFT);
++			ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
++			    db->db.db_size);
++			ASSERT3U(dn->dn_phys->dn_maxblkid
++			    >> (db->db_level * epbs), >=, db->db_blkid);
++			arc_set_callback(db->db_buf, dbuf_do_evict, db);
++		}
++		DB_DNODE_EXIT(db);
++		mutex_destroy(&dr->dt.di.dr_mtx);
++		list_destroy(&dr->dt.di.dr_children);
++	}
++	kmem_free(dr, sizeof (dbuf_dirty_record_t));
++
++	cv_broadcast(&db->db_changed);
++	ASSERT(db->db_dirtycnt > 0);
++	db->db_dirtycnt -= 1;
++	db->db_data_pending = NULL;
++	dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
++}
++
++static void
++dbuf_write_nofill_ready(zio_t *zio)
++{
++	dbuf_write_ready(zio, NULL, zio->io_private);
++}
++
++static void
++dbuf_write_nofill_done(zio_t *zio)
++{
++	dbuf_write_done(zio, NULL, zio->io_private);
++}
++
++static void
++dbuf_write_override_ready(zio_t *zio)
++{
++	dbuf_dirty_record_t *dr = zio->io_private;
++	dmu_buf_impl_t *db = dr->dr_dbuf;
++
++	dbuf_write_ready(zio, NULL, db);
++}
++
++static void
++dbuf_write_override_done(zio_t *zio)
++{
++	dbuf_dirty_record_t *dr = zio->io_private;
++	dmu_buf_impl_t *db = dr->dr_dbuf;
++	blkptr_t *obp = &dr->dt.dl.dr_overridden_by;
++
++	mutex_enter(&db->db_mtx);
++	if (!BP_EQUAL(zio->io_bp, obp)) {
++		if (!BP_IS_HOLE(obp))
++			dsl_free(spa_get_dsl(zio->io_spa), zio->io_txg, obp);
++		arc_release(dr->dt.dl.dr_data, db);
++	}
++	mutex_exit(&db->db_mtx);
++
++	dbuf_write_done(zio, NULL, db);
++}
++
++static void
++dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
++{
++	dmu_buf_impl_t *db = dr->dr_dbuf;
++	dnode_t *dn;
++	objset_t *os;
++	dmu_buf_impl_t *parent = db->db_parent;
++	uint64_t txg = tx->tx_txg;
++	zbookmark_t zb;
++	zio_prop_t zp;
++	zio_t *zio;
++	int wp_flag = 0;
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	os = dn->dn_objset;
++
++	if (db->db_state != DB_NOFILL) {
++		if (db->db_level > 0 || dn->dn_type == DMU_OT_DNODE) {
++			/*
++			 * Private object buffers are released here rather
++			 * than in dbuf_dirty() since they are only modified
++			 * in the syncing context and we don't want the
++			 * overhead of making multiple copies of the data.
++			 */
++			if (BP_IS_HOLE(db->db_blkptr)) {
++				arc_buf_thaw(data);
++			} else {
++				dbuf_release_bp(db);
++			}
++		}
++	}
++
++	if (parent != dn->dn_dbuf) {
++		ASSERT(parent && parent->db_data_pending);
++		ASSERT(db->db_level == parent->db_level-1);
++		ASSERT(arc_released(parent->db_buf));
++		zio = parent->db_data_pending->dr_zio;
++	} else {
++		ASSERT((db->db_level == dn->dn_phys->dn_nlevels-1 &&
++		    db->db_blkid != DMU_SPILL_BLKID) ||
++		    (db->db_blkid == DMU_SPILL_BLKID && db->db_level == 0));
++		if (db->db_blkid != DMU_SPILL_BLKID)
++			ASSERT3P(db->db_blkptr, ==,
++			    &dn->dn_phys->dn_blkptr[db->db_blkid]);
++		zio = dn->dn_zio;
++	}
++
++	ASSERT(db->db_level == 0 || data == db->db_buf);
++	ASSERT3U(db->db_blkptr->blk_birth, <=, txg);
++	ASSERT(zio);
++
++	SET_BOOKMARK(&zb, os->os_dsl_dataset ?
++	    os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
++	    db->db.db_object, db->db_level, db->db_blkid);
++
++	if (db->db_blkid == DMU_SPILL_BLKID)
++		wp_flag = WP_SPILL;
++	wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0;
++
++	dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
++	DB_DNODE_EXIT(db);
++
++	if (db->db_level == 0 && dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
++		ASSERT(db->db_state != DB_NOFILL);
++		dr->dr_zio = zio_write(zio, os->os_spa, txg,
++		    db->db_blkptr, data->b_data, arc_buf_size(data), &zp,
++		    dbuf_write_override_ready, dbuf_write_override_done, dr,
++		    ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
++		mutex_enter(&db->db_mtx);
++		dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
++		zio_write_override(dr->dr_zio, &dr->dt.dl.dr_overridden_by,
++		    dr->dt.dl.dr_copies);
++		mutex_exit(&db->db_mtx);
++	} else if (db->db_state == DB_NOFILL) {
++		ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF);
++		dr->dr_zio = zio_write(zio, os->os_spa, txg,
++		    db->db_blkptr, NULL, db->db.db_size, &zp,
++		    dbuf_write_nofill_ready, dbuf_write_nofill_done, db,
++		    ZIO_PRIORITY_ASYNC_WRITE,
++		    ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
++	} else {
++		ASSERT(arc_released(data));
++		dr->dr_zio = arc_write(zio, os->os_spa, txg,
++		    db->db_blkptr, data, DBUF_IS_L2CACHEABLE(db), &zp,
++		    dbuf_write_ready, dbuf_write_done, db,
++		    ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
++	}
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(dbuf_find);
++EXPORT_SYMBOL(dbuf_is_metadata);
++EXPORT_SYMBOL(dbuf_evict);
++EXPORT_SYMBOL(dbuf_loan_arcbuf);
++EXPORT_SYMBOL(dbuf_whichblock);
++EXPORT_SYMBOL(dbuf_read);
++EXPORT_SYMBOL(dbuf_unoverride);
++EXPORT_SYMBOL(dbuf_free_range);
++EXPORT_SYMBOL(dbuf_new_size);
++EXPORT_SYMBOL(dbuf_release_bp);
++EXPORT_SYMBOL(dbuf_dirty);
++EXPORT_SYMBOL(dmu_buf_will_dirty);
++EXPORT_SYMBOL(dmu_buf_will_not_fill);
++EXPORT_SYMBOL(dmu_buf_will_fill);
++EXPORT_SYMBOL(dmu_buf_fill_done);
++EXPORT_SYMBOL(dmu_buf_rele);
++EXPORT_SYMBOL(dbuf_assign_arcbuf);
++EXPORT_SYMBOL(dbuf_clear);
++EXPORT_SYMBOL(dbuf_prefetch);
++EXPORT_SYMBOL(dbuf_hold_impl);
++EXPORT_SYMBOL(dbuf_hold);
++EXPORT_SYMBOL(dbuf_hold_level);
++EXPORT_SYMBOL(dbuf_create_bonus);
++EXPORT_SYMBOL(dbuf_spill_set_blksz);
++EXPORT_SYMBOL(dbuf_rm_spill);
++EXPORT_SYMBOL(dbuf_add_ref);
++EXPORT_SYMBOL(dbuf_rele);
++EXPORT_SYMBOL(dbuf_rele_and_unlock);
++EXPORT_SYMBOL(dbuf_refcount);
++EXPORT_SYMBOL(dbuf_sync_list);
++EXPORT_SYMBOL(dmu_buf_set_user);
++EXPORT_SYMBOL(dmu_buf_set_user_ie);
++EXPORT_SYMBOL(dmu_buf_update_user);
++EXPORT_SYMBOL(dmu_buf_get_user);
++EXPORT_SYMBOL(dmu_buf_freeable);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/ddt.c linux-3.2.33-go/fs/zfs/zfs/ddt.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/ddt.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/ddt.c	2012-11-16 23:25:34.349039334 +0100
+@@ -0,0 +1,1213 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/spa_impl.h>
++#include <sys/zio.h>
++#include <sys/ddt.h>
++#include <sys/zap.h>
++#include <sys/dmu_tx.h>
++#include <sys/arc.h>
++#include <sys/dsl_pool.h>
++#include <sys/zio_checksum.h>
++#include <sys/zio_compress.h>
++#include <sys/dsl_scan.h>
++
++/*
++ * Enable/disable prefetching of dedup-ed blocks which are going to be freed.
++ */
++int zfs_dedup_prefetch = 1;
++
++static const ddt_ops_t *ddt_ops[DDT_TYPES] = {
++	&ddt_zap_ops,
++};
++
++static const char *ddt_class_name[DDT_CLASSES] = {
++	"ditto",
++	"duplicate",
++	"unique",
++};
++
++static void
++ddt_object_create(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
++    dmu_tx_t *tx)
++{
++	spa_t *spa = ddt->ddt_spa;
++	objset_t *os = ddt->ddt_os;
++	uint64_t *objectp = &ddt->ddt_object[type][class];
++	boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_dedup;
++	char name[DDT_NAMELEN];
++
++	ddt_object_name(ddt, type, class, name);
++
++	ASSERT(*objectp == 0);
++	VERIFY(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash) == 0);
++	ASSERT(*objectp != 0);
++
++	VERIFY(zap_add(os, DMU_POOL_DIRECTORY_OBJECT, name,
++	    sizeof (uint64_t), 1, objectp, tx) == 0);
++
++	VERIFY(zap_add(os, spa->spa_ddt_stat_object, name,
++	    sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t),
++	    &ddt->ddt_histogram[type][class], tx) == 0);
++}
++
++static void
++ddt_object_destroy(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
++    dmu_tx_t *tx)
++{
++	spa_t *spa = ddt->ddt_spa;
++	objset_t *os = ddt->ddt_os;
++	uint64_t *objectp = &ddt->ddt_object[type][class];
++	uint64_t count;
++	char name[DDT_NAMELEN];
++
++	ddt_object_name(ddt, type, class, name);
++
++	ASSERT(*objectp != 0);
++	ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class]));
++	VERIFY(ddt_object_count(ddt, type, class, &count) == 0 && count == 0);
++	VERIFY(zap_remove(os, DMU_POOL_DIRECTORY_OBJECT, name, tx) == 0);
++	VERIFY(zap_remove(os, spa->spa_ddt_stat_object, name, tx) == 0);
++	VERIFY(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx) == 0);
++	bzero(&ddt->ddt_object_stats[type][class], sizeof (ddt_object_t));
++
++	*objectp = 0;
++}
++
++static int
++ddt_object_load(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
++{
++	ddt_object_t *ddo = &ddt->ddt_object_stats[type][class];
++	dmu_object_info_t doi;
++	uint64_t count;
++	char name[DDT_NAMELEN];
++	int error;
++
++	ddt_object_name(ddt, type, class, name);
++
++	error = zap_lookup(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, name,
++	    sizeof (uint64_t), 1, &ddt->ddt_object[type][class]);
++
++	if (error)
++		return (error);
++
++	error = zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,
++	    sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t),
++	    &ddt->ddt_histogram[type][class]);
++
++	/*
++	 * Seed the cached statistics.
++	 */
++	error = ddt_object_info(ddt, type, class, &doi);
++	if (error)
++		return (error);
++
++	error = ddt_object_count(ddt, type, class, &count);
++	if (error)
++		return (error);
++
++	ddo->ddo_count = count;
++	ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9;
++	ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size;
++
++	ASSERT(error == 0);
++	return (error);
++}
++
++static void
++ddt_object_sync(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
++    dmu_tx_t *tx)
++{
++	ddt_object_t *ddo = &ddt->ddt_object_stats[type][class];
++	dmu_object_info_t doi;
++	uint64_t count;
++	char name[DDT_NAMELEN];
++
++	ddt_object_name(ddt, type, class, name);
++
++	VERIFY(zap_update(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,
++	    sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t),
++	    &ddt->ddt_histogram[type][class], tx) == 0);
++
++	/*
++	 * Cache DDT statistics; this is the only time they'll change.
++	 */
++	VERIFY(ddt_object_info(ddt, type, class, &doi) == 0);
++	VERIFY(ddt_object_count(ddt, type, class, &count) == 0);
++
++	ddo->ddo_count = count;
++	ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9;
++	ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size;
++}
++
++static int
++ddt_object_lookup(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
++    ddt_entry_t *dde)
++{
++	if (!ddt_object_exists(ddt, type, class))
++		return (ENOENT);
++
++	return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os,
++	    ddt->ddt_object[type][class], dde));
++}
++
++static void
++ddt_object_prefetch(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
++    ddt_entry_t *dde)
++{
++	if (!ddt_object_exists(ddt, type, class))
++		return;
++
++	ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os,
++	    ddt->ddt_object[type][class], dde);
++}
++
++int
++ddt_object_update(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
++    ddt_entry_t *dde, dmu_tx_t *tx)
++{
++	ASSERT(ddt_object_exists(ddt, type, class));
++
++	return (ddt_ops[type]->ddt_op_update(ddt->ddt_os,
++	    ddt->ddt_object[type][class], dde, tx));
++}
++
++static int
++ddt_object_remove(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
++    ddt_entry_t *dde, dmu_tx_t *tx)
++{
++	ASSERT(ddt_object_exists(ddt, type, class));
++
++	return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os,
++	    ddt->ddt_object[type][class], dde, tx));
++}
++
++int
++ddt_object_walk(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
++    uint64_t *walk, ddt_entry_t *dde)
++{
++	ASSERT(ddt_object_exists(ddt, type, class));
++
++	return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os,
++	    ddt->ddt_object[type][class], dde, walk));
++}
++
++int
++ddt_object_count(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
++    uint64_t *count)
++{
++	ASSERT(ddt_object_exists(ddt, type, class));
++
++	return (ddt_ops[type]->ddt_op_count(ddt->ddt_os,
++	    ddt->ddt_object[type][class], count));
++}
++
++int
++ddt_object_info(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
++    dmu_object_info_t *doi)
++{
++	if (!ddt_object_exists(ddt, type, class))
++		return (ENOENT);
++
++	return (dmu_object_info(ddt->ddt_os, ddt->ddt_object[type][class],
++	    doi));
++}
++
++boolean_t
++ddt_object_exists(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
++{
++	return (!!ddt->ddt_object[type][class]);
++}
++
++void
++ddt_object_name(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
++    char *name)
++{
++	(void) sprintf(name, DMU_POOL_DDT,
++	    zio_checksum_table[ddt->ddt_checksum].ci_name,
++	    ddt_ops[type]->ddt_op_name, ddt_class_name[class]);
++}
++
++void
++ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg)
++{
++	int d;
++	ASSERT(txg != 0);
++
++	for (d = 0; d < SPA_DVAS_PER_BP; d++)
++		bp->blk_dva[d] = ddp->ddp_dva[d];
++	BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth);
++}
++
++void
++ddt_bp_create(enum zio_checksum checksum,
++    const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp)
++{
++	BP_ZERO(bp);
++
++	if (ddp != NULL)
++		ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth);
++
++	bp->blk_cksum = ddk->ddk_cksum;
++	bp->blk_fill = 1;
++
++	BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk));
++	BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk));
++	BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk));
++	BP_SET_CHECKSUM(bp, checksum);
++	BP_SET_TYPE(bp, DMU_OT_DEDUP);
++	BP_SET_LEVEL(bp, 0);
++	BP_SET_DEDUP(bp, 0);
++	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
++}
++
++void
++ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp)
++{
++	ddk->ddk_cksum = bp->blk_cksum;
++	ddk->ddk_prop = 0;
++
++	DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp));
++	DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp));
++	DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp));
++}
++
++void
++ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp)
++{
++	int d;
++	ASSERT(ddp->ddp_phys_birth == 0);
++
++	for (d = 0; d < SPA_DVAS_PER_BP; d++)
++		ddp->ddp_dva[d] = bp->blk_dva[d];
++	ddp->ddp_phys_birth = BP_PHYSICAL_BIRTH(bp);
++}
++
++void
++ddt_phys_clear(ddt_phys_t *ddp)
++{
++	bzero(ddp, sizeof (*ddp));
++}
++
++void
++ddt_phys_addref(ddt_phys_t *ddp)
++{
++	ddp->ddp_refcnt++;
++}
++
++void
++ddt_phys_decref(ddt_phys_t *ddp)
++{
++	ASSERT((int64_t)ddp->ddp_refcnt > 0);
++	ddp->ddp_refcnt--;
++}
++
++void
++ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp, uint64_t txg)
++{
++	blkptr_t blk;
++
++	ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
++	ddt_phys_clear(ddp);
++	zio_free(ddt->ddt_spa, txg, &blk);
++}
++
++ddt_phys_t *
++ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp)
++{
++	ddt_phys_t *ddp = (ddt_phys_t *)dde->dde_phys;
++	int p;
++
++	for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
++		if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_dva[0]) &&
++		    BP_PHYSICAL_BIRTH(bp) == ddp->ddp_phys_birth)
++			return (ddp);
++	}
++	return (NULL);
++}
++
++uint64_t
++ddt_phys_total_refcnt(const ddt_entry_t *dde)
++{
++	uint64_t refcnt = 0;
++	int p;
++
++	for (p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++)
++		refcnt += dde->dde_phys[p].ddp_refcnt;
++
++	return (refcnt);
++}
++
++static void
++ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
++{
++	spa_t *spa = ddt->ddt_spa;
++	ddt_phys_t *ddp = dde->dde_phys;
++	ddt_key_t *ddk = &dde->dde_key;
++	uint64_t lsize = DDK_GET_LSIZE(ddk);
++	uint64_t psize = DDK_GET_PSIZE(ddk);
++	int p, d;
++
++	bzero(dds, sizeof (*dds));
++
++	for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
++		uint64_t dsize = 0;
++		uint64_t refcnt = ddp->ddp_refcnt;
++
++		if (ddp->ddp_phys_birth == 0)
++			continue;
++
++		for (d = 0; d < SPA_DVAS_PER_BP; d++)
++			dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]);
++
++		dds->dds_blocks += 1;
++		dds->dds_lsize += lsize;
++		dds->dds_psize += psize;
++		dds->dds_dsize += dsize;
++
++		dds->dds_ref_blocks += refcnt;
++		dds->dds_ref_lsize += lsize * refcnt;
++		dds->dds_ref_psize += psize * refcnt;
++		dds->dds_ref_dsize += dsize * refcnt;
++	}
++}
++
++void
++ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg)
++{
++	const uint64_t *s = (const uint64_t *)src;
++	uint64_t *d = (uint64_t *)dst;
++	uint64_t *d_end = (uint64_t *)(dst + 1);
++
++	ASSERT(neg == 0 || neg == -1ULL);	/* add or subtract */
++
++	while (d < d_end)
++		*d++ += (*s++ ^ neg) - neg;
++}
++
++static void
++ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg)
++{
++	ddt_stat_t dds;
++	ddt_histogram_t *ddh;
++	int bucket;
++
++	ddt_stat_generate(ddt, dde, &dds);
++
++	bucket = highbit(dds.dds_ref_blocks) - 1;
++	ASSERT(bucket >= 0);
++
++	ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class];
++
++	ddt_stat_add(&ddh->ddh_stat[bucket], &dds, neg);
++}
++
++void
++ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src)
++{
++	int h;
++
++	for (h = 0; h < 64; h++)
++		ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0);
++}
++
++void
++ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh)
++{
++	int h;
++
++	bzero(dds, sizeof (*dds));
++
++	for (h = 0; h < 64; h++)
++		ddt_stat_add(dds, &ddh->ddh_stat[h], 0);
++}
++
++boolean_t
++ddt_histogram_empty(const ddt_histogram_t *ddh)
++{
++	const uint64_t *s = (const uint64_t *)ddh;
++	const uint64_t *s_end = (const uint64_t *)(ddh + 1);
++
++	while (s < s_end)
++		if (*s++ != 0)
++			return (B_FALSE);
++
++	return (B_TRUE);
++}
++
++void
++ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total)
++{
++	enum zio_checksum c;
++	enum ddt_type type;
++	enum ddt_class class;
++
++	/* Sum the statistics we cached in ddt_object_sync(). */
++	for (c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
++		ddt_t *ddt = spa->spa_ddt[c];
++		for (type = 0; type < DDT_TYPES; type++) {
++			for (class = 0; class < DDT_CLASSES;
++			    class++) {
++				ddt_object_t *ddo =
++				    &ddt->ddt_object_stats[type][class];
++				ddo_total->ddo_count += ddo->ddo_count;
++				ddo_total->ddo_dspace += ddo->ddo_dspace;
++				ddo_total->ddo_mspace += ddo->ddo_mspace;
++			}
++		}
++	}
++
++	/* ... and compute the averages. */
++	if (ddo_total->ddo_count != 0) {
++		ddo_total->ddo_dspace /= ddo_total->ddo_count;
++		ddo_total->ddo_mspace /= ddo_total->ddo_count;
++	}
++}
++
++void
++ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh)
++{
++	enum zio_checksum c;
++	enum ddt_type type;
++	enum ddt_class class;
++
++	for (c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
++		ddt_t *ddt = spa->spa_ddt[c];
++		for (type = 0; type < DDT_TYPES; type++) {
++			for (class = 0; class < DDT_CLASSES;
++			    class++) {
++				ddt_histogram_add(ddh,
++				    &ddt->ddt_histogram_cache[type][class]);
++			}
++		}
++	}
++}
++
++void
++ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total)
++{
++	ddt_histogram_t *ddh_total;
++
++	/* XXX: Move to a slab */
++	ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_PUSHPAGE);
++	ddt_get_dedup_histogram(spa, ddh_total);
++	ddt_histogram_stat(dds_total, ddh_total);
++	kmem_free(ddh_total, sizeof (ddt_histogram_t));
++}
++
++uint64_t
++ddt_get_dedup_dspace(spa_t *spa)
++{
++	ddt_stat_t dds_total = { 0 };
++
++	ddt_get_dedup_stats(spa, &dds_total);
++	return (dds_total.dds_ref_dsize - dds_total.dds_dsize);
++}
++
++uint64_t
++ddt_get_pool_dedup_ratio(spa_t *spa)
++{
++	ddt_stat_t dds_total = { 0 };
++
++	ddt_get_dedup_stats(spa, &dds_total);
++	if (dds_total.dds_dsize == 0)
++		return (100);
++
++	return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize);
++}
++
++int
++ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref)
++{
++	spa_t *spa = ddt->ddt_spa;
++	uint64_t total_refcnt = 0;
++	uint64_t ditto = spa->spa_dedup_ditto;
++	int total_copies = 0;
++	int desired_copies = 0;
++	int p;
++
++	for (p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
++		ddt_phys_t *ddp = &dde->dde_phys[p];
++		zio_t *zio = dde->dde_lead_zio[p];
++		uint64_t refcnt = ddp->ddp_refcnt;	/* committed refs */
++		if (zio != NULL)
++			refcnt += zio->io_parent_count;	/* pending refs */
++		if (ddp == ddp_willref)
++			refcnt++;			/* caller's ref */
++		if (refcnt != 0) {
++			total_refcnt += refcnt;
++			total_copies += p;
++		}
++	}
++
++	if (ditto == 0 || ditto > UINT32_MAX)
++		ditto = UINT32_MAX;
++
++	if (total_refcnt >= 1)
++		desired_copies++;
++	if (total_refcnt >= ditto)
++		desired_copies++;
++	if (total_refcnt >= ditto * ditto)
++		desired_copies++;
++
++	return (MAX(desired_copies, total_copies) - total_copies);
++}
++
++int
++ddt_ditto_copies_present(ddt_entry_t *dde)
++{
++	ddt_phys_t *ddp = &dde->dde_phys[DDT_PHYS_DITTO];
++	dva_t *dva = ddp->ddp_dva;
++	int copies = 0 - DVA_GET_GANG(dva);
++	int d;
++
++	for (d = 0; d < SPA_DVAS_PER_BP; d++, dva++)
++		if (DVA_IS_VALID(dva))
++			copies++;
++
++	ASSERT(copies >= 0 && copies < SPA_DVAS_PER_BP);
++
++	return (copies);
++}
++
++size_t
++ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len)
++{
++	uchar_t *version = dst++;
++	int cpfunc = ZIO_COMPRESS_ZLE;
++	zio_compress_info_t *ci = &zio_compress_table[cpfunc];
++	size_t c_len;
++
++	ASSERT(d_len >= s_len + 1);	/* no compression plus version byte */
++
++	c_len = ci->ci_compress(src, dst, s_len, d_len - 1, ci->ci_level);
++
++	if (c_len == s_len) {
++		cpfunc = ZIO_COMPRESS_OFF;
++		bcopy(src, dst, s_len);
++	}
++
++	*version = (ZFS_HOST_BYTEORDER & DDT_COMPRESS_BYTEORDER_MASK) | cpfunc;
++
++	return (c_len + 1);
++}
++
++void
++ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len)
++{
++	uchar_t version = *src++;
++	int cpfunc = version & DDT_COMPRESS_FUNCTION_MASK;
++	zio_compress_info_t *ci = &zio_compress_table[cpfunc];
++
++	if (ci->ci_decompress != NULL)
++		(void) ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level);
++	else
++		bcopy(src, dst, d_len);
++
++	if ((version ^ ZFS_HOST_BYTEORDER) & DDT_COMPRESS_BYTEORDER_MASK)
++		byteswap_uint64_array(dst, d_len);
++}
++
++ddt_t *
++ddt_select_by_checksum(spa_t *spa, enum zio_checksum c)
++{
++	return (spa->spa_ddt[c]);
++}
++
++ddt_t *
++ddt_select(spa_t *spa, const blkptr_t *bp)
++{
++	return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]);
++}
++
++void
++ddt_enter(ddt_t *ddt)
++{
++	mutex_enter(&ddt->ddt_lock);
++}
++
++void
++ddt_exit(ddt_t *ddt)
++{
++	mutex_exit(&ddt->ddt_lock);
++}
++
++static ddt_entry_t *
++ddt_alloc(const ddt_key_t *ddk)
++{
++	ddt_entry_t *dde;
++
++	/* XXX: Move to a slab */
++	dde = kmem_zalloc(sizeof (ddt_entry_t), KM_PUSHPAGE);
++	cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL);
++
++	dde->dde_key = *ddk;
++
++	return (dde);
++}
++
++static void
++ddt_free(ddt_entry_t *dde)
++{
++	int p;
++
++	ASSERT(!dde->dde_loading);
++
++	for (p = 0; p < DDT_PHYS_TYPES; p++)
++		ASSERT(dde->dde_lead_zio[p] == NULL);
++
++	if (dde->dde_repair_data != NULL)
++		zio_buf_free(dde->dde_repair_data,
++		    DDK_GET_PSIZE(&dde->dde_key));
++
++	cv_destroy(&dde->dde_cv);
++	kmem_free(dde, sizeof (*dde));
++}
++
++void
++ddt_remove(ddt_t *ddt, ddt_entry_t *dde)
++{
++	ASSERT(MUTEX_HELD(&ddt->ddt_lock));
++
++	avl_remove(&ddt->ddt_tree, dde);
++	ddt_free(dde);
++}
++
++ddt_entry_t *
++ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add)
++{
++	ddt_entry_t *dde, dde_search;
++	enum ddt_type type;
++	enum ddt_class class;
++	avl_index_t where;
++	int error;
++
++	ASSERT(MUTEX_HELD(&ddt->ddt_lock));
++
++	ddt_key_fill(&dde_search.dde_key, bp);
++
++	dde = avl_find(&ddt->ddt_tree, &dde_search, &where);
++	if (dde == NULL) {
++		if (!add)
++			return (NULL);
++		dde = ddt_alloc(&dde_search.dde_key);
++		avl_insert(&ddt->ddt_tree, dde, where);
++	}
++
++	while (dde->dde_loading)
++		cv_wait(&dde->dde_cv, &ddt->ddt_lock);
++
++	if (dde->dde_loaded)
++		return (dde);
++
++	dde->dde_loading = B_TRUE;
++
++	ddt_exit(ddt);
++
++	error = ENOENT;
++
++	for (type = 0; type < DDT_TYPES; type++) {
++		for (class = 0; class < DDT_CLASSES; class++) {
++			error = ddt_object_lookup(ddt, type, class, dde);
++			if (error != ENOENT)
++				break;
++		}
++		if (error != ENOENT)
++			break;
++	}
++
++	ASSERT(error == 0 || error == ENOENT);
++
++	ddt_enter(ddt);
++
++	ASSERT(dde->dde_loaded == B_FALSE);
++	ASSERT(dde->dde_loading == B_TRUE);
++
++	dde->dde_type = type;	/* will be DDT_TYPES if no entry found */
++	dde->dde_class = class;	/* will be DDT_CLASSES if no entry found */
++	dde->dde_loaded = B_TRUE;
++	dde->dde_loading = B_FALSE;
++
++	if (error == 0)
++		ddt_stat_update(ddt, dde, -1ULL);
++
++	cv_broadcast(&dde->dde_cv);
++
++	return (dde);
++}
++
++void
++ddt_prefetch(spa_t *spa, const blkptr_t *bp)
++{
++	ddt_t *ddt;
++	ddt_entry_t dde;
++	enum ddt_type type;
++	enum ddt_class class;
++
++	if (!zfs_dedup_prefetch || bp == NULL || !BP_GET_DEDUP(bp))
++		return;
++
++	/*
++	 * We only remove the DDT once all tables are empty and only
++	 * prefetch dedup blocks when there are entries in the DDT.
++	 * Thus no locking is required as the DDT can't disappear on us.
++	 */
++	ddt = ddt_select(spa, bp);
++	ddt_key_fill(&dde.dde_key, bp);
++
++	for (type = 0; type < DDT_TYPES; type++) {
++		for (class = 0; class < DDT_CLASSES; class++) {
++			ddt_object_prefetch(ddt, type, class, &dde);
++		}
++	}
++}
++
++int
++ddt_entry_compare(const void *x1, const void *x2)
++{
++	const ddt_entry_t *dde1 = x1;
++	const ddt_entry_t *dde2 = x2;
++	const uint64_t *u1 = (const uint64_t *)&dde1->dde_key;
++	const uint64_t *u2 = (const uint64_t *)&dde2->dde_key;
++	int i;
++
++	for (i = 0; i < DDT_KEY_WORDS; i++) {
++		if (u1[i] < u2[i])
++			return (-1);
++		if (u1[i] > u2[i])
++			return (1);
++	}
++
++	return (0);
++}
++
++static ddt_t *
++ddt_table_alloc(spa_t *spa, enum zio_checksum c)
++{
++	ddt_t *ddt;
++
++	/* XXX: Move to a slab */
++	ddt = kmem_zalloc(sizeof (*ddt), KM_PUSHPAGE | KM_NODEBUG);
++
++	mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL);
++	avl_create(&ddt->ddt_tree, ddt_entry_compare,
++	    sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node));
++	avl_create(&ddt->ddt_repair_tree, ddt_entry_compare,
++	    sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node));
++	ddt->ddt_checksum = c;
++	ddt->ddt_spa = spa;
++	ddt->ddt_os = spa->spa_meta_objset;
++
++	return (ddt);
++}
++
++static void
++ddt_table_free(ddt_t *ddt)
++{
++	ASSERT(avl_numnodes(&ddt->ddt_tree) == 0);
++	ASSERT(avl_numnodes(&ddt->ddt_repair_tree) == 0);
++	avl_destroy(&ddt->ddt_tree);
++	avl_destroy(&ddt->ddt_repair_tree);
++	mutex_destroy(&ddt->ddt_lock);
++	kmem_free(ddt, sizeof (*ddt));
++}
++
++void
++ddt_create(spa_t *spa)
++{
++	enum zio_checksum c;
++
++	spa->spa_dedup_checksum = ZIO_DEDUPCHECKSUM;
++
++	for (c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++)
++		spa->spa_ddt[c] = ddt_table_alloc(spa, c);
++}
++
++int
++ddt_load(spa_t *spa)
++{
++	enum zio_checksum c;
++	enum ddt_type type;
++	enum ddt_class class;
++	int error;
++
++	ddt_create(spa);
++
++	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
++	    DMU_POOL_DDT_STATS, sizeof (uint64_t), 1,
++	    &spa->spa_ddt_stat_object);
++
++	if (error)
++		return (error == ENOENT ? 0 : error);
++
++	for (c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
++		ddt_t *ddt = spa->spa_ddt[c];
++		for (type = 0; type < DDT_TYPES; type++) {
++			for (class = 0; class < DDT_CLASSES;
++			    class++) {
++				error = ddt_object_load(ddt, type, class);
++				if (error != 0 && error != ENOENT)
++					return (error);
++			}
++		}
++
++		/*
++		 * Seed the cached histograms.
++		 */
++		bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache,
++		    sizeof (ddt->ddt_histogram));
++	}
++
++	return (0);
++}
++
++void
++ddt_unload(spa_t *spa)
++{
++	enum zio_checksum c;
++
++	for (c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
++		if (spa->spa_ddt[c]) {
++			ddt_table_free(spa->spa_ddt[c]);
++			spa->spa_ddt[c] = NULL;
++		}
++	}
++}
++
++boolean_t
++ddt_class_contains(spa_t *spa, enum ddt_class max_class, const blkptr_t *bp)
++{
++	ddt_t *ddt;
++	ddt_entry_t *dde;
++	enum ddt_type type;
++	enum ddt_class class;
++
++	if (!BP_GET_DEDUP(bp))
++		return (B_FALSE);
++
++	if (max_class == DDT_CLASS_UNIQUE)
++		return (B_TRUE);
++
++	ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)];
++	dde = kmem_alloc(sizeof(ddt_entry_t), KM_PUSHPAGE);
++
++	ddt_key_fill(&(dde->dde_key), bp);
++
++	for (type = 0; type < DDT_TYPES; type++) {
++		for (class = 0; class <= max_class; class++) {
++			if (ddt_object_lookup(ddt, type, class, dde) == 0) {
++				kmem_free(dde, sizeof(ddt_entry_t));
++				return (B_TRUE);
++			}
++		}
++	}
++
++	kmem_free(dde, sizeof(ddt_entry_t));
++	return (B_FALSE);
++}
++
++ddt_entry_t *
++ddt_repair_start(ddt_t *ddt, const blkptr_t *bp)
++{
++	ddt_key_t ddk;
++	ddt_entry_t *dde;
++	enum ddt_type type;
++	enum ddt_class class;
++
++	ddt_key_fill(&ddk, bp);
++
++	dde = ddt_alloc(&ddk);
++
++	for (type = 0; type < DDT_TYPES; type++) {
++		for (class = 0; class < DDT_CLASSES; class++) {
++			/*
++			 * We can only do repair if there are multiple copies
++			 * of the block.  For anything in the UNIQUE class,
++			 * there's definitely only one copy, so don't even try.
++			 */
++			if (class != DDT_CLASS_UNIQUE &&
++			    ddt_object_lookup(ddt, type, class, dde) == 0)
++				return (dde);
++		}
++	}
++
++	bzero(dde->dde_phys, sizeof (dde->dde_phys));
++
++	return (dde);
++}
++
++void
++ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde)
++{
++	avl_index_t where;
++
++	ddt_enter(ddt);
++
++	if (dde->dde_repair_data != NULL && spa_writeable(ddt->ddt_spa) &&
++	    avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL)
++		avl_insert(&ddt->ddt_repair_tree, dde, where);
++	else
++		ddt_free(dde);
++
++	ddt_exit(ddt);
++}
++
++static void
++ddt_repair_entry_done(zio_t *zio)
++{
++	ddt_entry_t *rdde = zio->io_private;
++
++	ddt_free(rdde);
++}
++
++static void
++ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio)
++{
++	ddt_phys_t *ddp = dde->dde_phys;
++	ddt_phys_t *rddp = rdde->dde_phys;
++	ddt_key_t *ddk = &dde->dde_key;
++	ddt_key_t *rddk = &rdde->dde_key;
++	zio_t *zio;
++	blkptr_t blk;
++	int p;
++
++	zio = zio_null(rio, rio->io_spa, NULL,
++	    ddt_repair_entry_done, rdde, rio->io_flags);
++
++	for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++, rddp++) {
++		if (ddp->ddp_phys_birth == 0 ||
++		    ddp->ddp_phys_birth != rddp->ddp_phys_birth ||
++		    bcmp(ddp->ddp_dva, rddp->ddp_dva, sizeof (ddp->ddp_dva)))
++			continue;
++		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
++		zio_nowait(zio_rewrite(zio, zio->io_spa, 0, &blk,
++		    rdde->dde_repair_data, DDK_GET_PSIZE(rddk), NULL, NULL,
++		    ZIO_PRIORITY_SYNC_WRITE, ZIO_DDT_CHILD_FLAGS(zio), NULL));
++	}
++
++	zio_nowait(zio);
++}
++
++static void
++ddt_repair_table(ddt_t *ddt, zio_t *rio)
++{
++	spa_t *spa = ddt->ddt_spa;
++	ddt_entry_t *dde, *rdde_next, *rdde;
++	avl_tree_t *t = &ddt->ddt_repair_tree;
++	blkptr_t blk;
++
++	if (spa_sync_pass(spa) > 1)
++		return;
++
++	ddt_enter(ddt);
++	for (rdde = avl_first(t); rdde != NULL; rdde = rdde_next) {
++		rdde_next = AVL_NEXT(t, rdde);
++		avl_remove(&ddt->ddt_repair_tree, rdde);
++		ddt_exit(ddt);
++		ddt_bp_create(ddt->ddt_checksum, &rdde->dde_key, NULL, &blk);
++		dde = ddt_repair_start(ddt, &blk);
++		ddt_repair_entry(ddt, dde, rdde, rio);
++		ddt_repair_done(ddt, dde);
++		ddt_enter(ddt);
++	}
++	ddt_exit(ddt);
++}
++
++static void
++ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
++{
++	dsl_pool_t *dp = ddt->ddt_spa->spa_dsl_pool;
++	ddt_phys_t *ddp = dde->dde_phys;
++	ddt_key_t *ddk = &dde->dde_key;
++	enum ddt_type otype = dde->dde_type;
++	enum ddt_type ntype = DDT_TYPE_CURRENT;
++	enum ddt_class oclass = dde->dde_class;
++	enum ddt_class nclass;
++	uint64_t total_refcnt = 0;
++	int p;
++
++	ASSERT(dde->dde_loaded);
++	ASSERT(!dde->dde_loading);
++
++	for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
++		ASSERT(dde->dde_lead_zio[p] == NULL);
++		ASSERT((int64_t)ddp->ddp_refcnt >= 0);
++		if (ddp->ddp_phys_birth == 0) {
++			ASSERT(ddp->ddp_refcnt == 0);
++			continue;
++		}
++		if (p == DDT_PHYS_DITTO) {
++			if (ddt_ditto_copies_needed(ddt, dde, NULL) == 0)
++				ddt_phys_free(ddt, ddk, ddp, txg);
++			continue;
++		}
++		if (ddp->ddp_refcnt == 0)
++			ddt_phys_free(ddt, ddk, ddp, txg);
++		total_refcnt += ddp->ddp_refcnt;
++	}
++
++	if (dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth != 0)
++		nclass = DDT_CLASS_DITTO;
++	else if (total_refcnt > 1)
++		nclass = DDT_CLASS_DUPLICATE;
++	else
++		nclass = DDT_CLASS_UNIQUE;
++
++	if (otype != DDT_TYPES &&
++	    (otype != ntype || oclass != nclass || total_refcnt == 0)) {
++		VERIFY(ddt_object_remove(ddt, otype, oclass, dde, tx) == 0);
++		ASSERT(ddt_object_lookup(ddt, otype, oclass, dde) == ENOENT);
++	}
++
++	if (total_refcnt != 0) {
++		dde->dde_type = ntype;
++		dde->dde_class = nclass;
++		ddt_stat_update(ddt, dde, 0);
++		if (!ddt_object_exists(ddt, ntype, nclass))
++			ddt_object_create(ddt, ntype, nclass, tx);
++		VERIFY(ddt_object_update(ddt, ntype, nclass, dde, tx) == 0);
++
++		/*
++		 * If the class changes, the order that we scan this bp
++		 * changes.  If it decreases, we could miss it, so
++		 * scan it right now.  (This covers both class changing
++		 * while we are doing ddt_walk(), and when we are
++		 * traversing.)
++		 */
++		if (nclass < oclass) {
++			dsl_scan_ddt_entry(dp->dp_scan,
++			    ddt->ddt_checksum, dde, tx);
++		}
++	}
++}
++
++static void
++ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg)
++{
++	spa_t *spa = ddt->ddt_spa;
++	ddt_entry_t *dde;
++	void *cookie = NULL;
++	enum ddt_type type;
++	enum ddt_class class;
++
++	if (avl_numnodes(&ddt->ddt_tree) == 0)
++		return;
++
++	ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP);
++
++	if (spa->spa_ddt_stat_object == 0) {
++		spa->spa_ddt_stat_object = zap_create(ddt->ddt_os,
++		    DMU_OT_DDT_STATS, DMU_OT_NONE, 0, tx);
++		VERIFY(zap_add(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT,
++		    DMU_POOL_DDT_STATS, sizeof (uint64_t), 1,
++		    &spa->spa_ddt_stat_object, tx) == 0);
++	}
++
++	while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) {
++		ddt_sync_entry(ddt, dde, tx, txg);
++		ddt_free(dde);
++	}
++
++	for (type = 0; type < DDT_TYPES; type++) {
++		uint64_t add, count = 0;
++		for (class = 0; class < DDT_CLASSES; class++) {
++			if (ddt_object_exists(ddt, type, class)) {
++				ddt_object_sync(ddt, type, class, tx);
++				VERIFY(ddt_object_count(ddt, type, class,
++				    &add) == 0);
++				count += add;
++			}
++		}
++		for (class = 0; class < DDT_CLASSES; class++) {
++			if (count == 0 && ddt_object_exists(ddt, type, class))
++				ddt_object_destroy(ddt, type, class, tx);
++		}
++	}
++
++	bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache,
++	    sizeof (ddt->ddt_histogram));
++}
++
++void
++ddt_sync(spa_t *spa, uint64_t txg)
++{
++	dmu_tx_t *tx;
++	zio_t *rio = zio_root(spa, NULL, NULL,
++	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
++	enum zio_checksum c;
++
++	ASSERT(spa_syncing_txg(spa) == txg);
++
++	tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
++
++	for (c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
++		ddt_t *ddt = spa->spa_ddt[c];
++		if (ddt == NULL)
++			continue;
++		ddt_sync_table(ddt, tx, txg);
++		ddt_repair_table(ddt, rio);
++	}
++
++	(void) zio_wait(rio);
++
++	dmu_tx_commit(tx);
++}
++
++int
++ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde)
++{
++	do {
++		do {
++			do {
++				ddt_t *ddt = spa->spa_ddt[ddb->ddb_checksum];
++				int error = ENOENT;
++				if (ddt_object_exists(ddt, ddb->ddb_type,
++				    ddb->ddb_class)) {
++					error = ddt_object_walk(ddt,
++					    ddb->ddb_type, ddb->ddb_class,
++					    &ddb->ddb_cursor, dde);
++				}
++				dde->dde_type = ddb->ddb_type;
++				dde->dde_class = ddb->ddb_class;
++				if (error == 0)
++					return (0);
++				if (error != ENOENT)
++					return (error);
++				ddb->ddb_cursor = 0;
++			} while (++ddb->ddb_checksum < ZIO_CHECKSUM_FUNCTIONS);
++			ddb->ddb_checksum = 0;
++		} while (++ddb->ddb_type < DDT_TYPES);
++		ddb->ddb_type = 0;
++	} while (++ddb->ddb_class < DDT_CLASSES);
++
++	return (ENOENT);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++module_param(zfs_dedup_prefetch, int, 0644);
++MODULE_PARM_DESC(zfs_dedup_prefetch,"Enable prefetching dedup-ed blks");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/ddt_zap.c linux-3.2.33-go/fs/zfs/zfs/ddt_zap.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/ddt_zap.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/ddt_zap.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,157 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/zio.h>
++#include <sys/ddt.h>
++#include <sys/zap.h>
++#include <sys/dmu_tx.h>
++#include <util/sscanf.h>
++
++int ddt_zap_leaf_blockshift = 12;
++int ddt_zap_indirect_blockshift = 12;
++
++static int
++ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)
++{
++	zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY;
++
++	if (prehash)
++		flags |= ZAP_FLAG_PRE_HASHED_KEY;
++
++	*objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP,
++	    ddt_zap_leaf_blockshift, ddt_zap_indirect_blockshift,
++	    DMU_OT_NONE, 0, tx);
++
++	return (*objectp == 0 ? ENOTSUP : 0);
++}
++
++static int
++ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)
++{
++	return (zap_destroy(os, object, tx));
++}
++
++static int
++ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde)
++{
++	uchar_t *cbuf;
++	uint64_t one, csize;
++	int error;
++
++	cbuf = kmem_alloc(sizeof (dde->dde_phys) + 1, KM_PUSHPAGE);
++
++	error = zap_length_uint64(os, object, (uint64_t *)&dde->dde_key,
++	    DDT_KEY_WORDS, &one, &csize);
++	if (error)
++		goto out;
++
++	ASSERT(one == 1);
++	ASSERT(csize <= (sizeof (dde->dde_phys) + 1));
++
++	error = zap_lookup_uint64(os, object, (uint64_t *)&dde->dde_key,
++	    DDT_KEY_WORDS, 1, csize, cbuf);
++	if (error)
++		goto out;
++
++	ddt_decompress(cbuf, dde->dde_phys, csize, sizeof (dde->dde_phys));
++out:
++	kmem_free(cbuf, sizeof (dde->dde_phys) + 1);
++
++	return (error);
++}
++
++static void
++ddt_zap_prefetch(objset_t *os, uint64_t object, ddt_entry_t *dde)
++{
++	(void) zap_prefetch_uint64(os, object, (uint64_t *)&dde->dde_key,
++	    DDT_KEY_WORDS);
++}
++
++static int
++ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
++{
++	uchar_t cbuf[sizeof (dde->dde_phys) + 1];
++	uint64_t csize;
++
++	csize = ddt_compress(dde->dde_phys, cbuf,
++	    sizeof (dde->dde_phys), sizeof (cbuf));
++
++	return (zap_update_uint64(os, object, (uint64_t *)&dde->dde_key,
++	    DDT_KEY_WORDS, 1, csize, cbuf, tx));
++}
++
++static int
++ddt_zap_remove(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
++{
++	return (zap_remove_uint64(os, object, (uint64_t *)&dde->dde_key,
++	    DDT_KEY_WORDS, tx));
++}
++
++static int
++ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk)
++{
++	zap_cursor_t zc;
++	zap_attribute_t za;
++	int error;
++
++	zap_cursor_init_serialized(&zc, os, object, *walk);
++	if ((error = zap_cursor_retrieve(&zc, &za)) == 0) {
++		uchar_t cbuf[sizeof (dde->dde_phys) + 1];
++		uint64_t csize = za.za_num_integers;
++		ASSERT(za.za_integer_length == 1);
++		error = zap_lookup_uint64(os, object, (uint64_t *)za.za_name,
++		    DDT_KEY_WORDS, 1, csize, cbuf);
++		ASSERT(error == 0);
++		if (error == 0) {
++			ddt_decompress(cbuf, dde->dde_phys, csize,
++			    sizeof (dde->dde_phys));
++			dde->dde_key = *(ddt_key_t *)za.za_name;
++		}
++		zap_cursor_advance(&zc);
++		*walk = zap_cursor_serialize(&zc);
++	}
++	zap_cursor_fini(&zc);
++	return (error);
++}
++
++static int
++ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count)
++{
++	return zap_count(os, object, count);
++}
++
++const ddt_ops_t ddt_zap_ops = {
++	"zap",
++	ddt_zap_create,
++	ddt_zap_destroy,
++	ddt_zap_lookup,
++	ddt_zap_prefetch,
++	ddt_zap_update,
++	ddt_zap_remove,
++	ddt_zap_walk,
++	ddt_zap_count,
++};
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dmu.c linux-3.2.33-go/fs/zfs/zfs/dmu.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dmu.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dmu.c	2012-11-16 23:25:34.353039289 +0100
+@@ -0,0 +1,1983 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/dmu.h>
++#include <sys/dmu_impl.h>
++#include <sys/dmu_tx.h>
++#include <sys/dbuf.h>
++#include <sys/dnode.h>
++#include <sys/zfs_context.h>
++#include <sys/dmu_objset.h>
++#include <sys/dmu_traverse.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_pool.h>
++#include <sys/dsl_synctask.h>
++#include <sys/dsl_prop.h>
++#include <sys/dmu_zfetch.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/zap.h>
++#include <sys/zio_checksum.h>
++#include <sys/sa.h>
++#ifdef _KERNEL
++#include <sys/vmsystm.h>
++#include <sys/zfs_znode.h>
++#endif
++
++const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
++	{	byteswap_uint8_array,	TRUE,	"unallocated"		},
++	{	zap_byteswap,		TRUE,	"object directory"	},
++	{	byteswap_uint64_array,	TRUE,	"object array"		},
++	{	byteswap_uint8_array,	TRUE,	"packed nvlist"		},
++	{	byteswap_uint64_array,	TRUE,	"packed nvlist size"	},
++	{	byteswap_uint64_array,	TRUE,	"bpobj"			},
++	{	byteswap_uint64_array,	TRUE,	"bpobj header"		},
++	{	byteswap_uint64_array,	TRUE,	"SPA space map header"	},
++	{	byteswap_uint64_array,	TRUE,	"SPA space map"		},
++	{	byteswap_uint64_array,	TRUE,	"ZIL intent log"	},
++	{	dnode_buf_byteswap,	TRUE,	"DMU dnode"		},
++	{	dmu_objset_byteswap,	TRUE,	"DMU objset"		},
++	{	byteswap_uint64_array,	TRUE,	"DSL directory"		},
++	{	zap_byteswap,		TRUE,	"DSL directory child map"},
++	{	zap_byteswap,		TRUE,	"DSL dataset snap map"	},
++	{	zap_byteswap,		TRUE,	"DSL props"		},
++	{	byteswap_uint64_array,	TRUE,	"DSL dataset"		},
++	{	zfs_znode_byteswap,	TRUE,	"ZFS znode"		},
++	{	zfs_oldacl_byteswap,	TRUE,	"ZFS V0 ACL"		},
++	{	byteswap_uint8_array,	FALSE,	"ZFS plain file"	},
++	{	zap_byteswap,		TRUE,	"ZFS directory"		},
++	{	zap_byteswap,		TRUE,	"ZFS master node"	},
++	{	zap_byteswap,		TRUE,	"ZFS delete queue"	},
++	{	byteswap_uint8_array,	FALSE,	"zvol object"		},
++	{	zap_byteswap,		TRUE,	"zvol prop"		},
++	{	byteswap_uint8_array,	FALSE,	"other uint8[]"		},
++	{	byteswap_uint64_array,	FALSE,	"other uint64[]"	},
++	{	zap_byteswap,		TRUE,	"other ZAP"		},
++	{	zap_byteswap,		TRUE,	"persistent error log"	},
++	{	byteswap_uint8_array,	TRUE,	"SPA history"		},
++	{	byteswap_uint64_array,	TRUE,	"SPA history offsets"	},
++	{	zap_byteswap,		TRUE,	"Pool properties"	},
++	{	zap_byteswap,		TRUE,	"DSL permissions"	},
++	{	zfs_acl_byteswap,	TRUE,	"ZFS ACL"		},
++	{	byteswap_uint8_array,	TRUE,	"ZFS SYSACL"		},
++	{	byteswap_uint8_array,	TRUE,	"FUID table"		},
++	{	byteswap_uint64_array,	TRUE,	"FUID table size"	},
++	{	zap_byteswap,		TRUE,	"DSL dataset next clones"},
++	{	zap_byteswap,		TRUE,	"scan work queue"	},
++	{	zap_byteswap,		TRUE,	"ZFS user/group used"	},
++	{	zap_byteswap,		TRUE,	"ZFS user/group quota"	},
++	{	zap_byteswap,		TRUE,	"snapshot refcount tags"},
++	{	zap_byteswap,		TRUE,	"DDT ZAP algorithm"	},
++	{	zap_byteswap,		TRUE,	"DDT statistics"	},
++	{	byteswap_uint8_array,	TRUE,	"System attributes"	},
++	{	zap_byteswap,		TRUE,	"SA master node"	},
++	{	zap_byteswap,		TRUE,	"SA attr registration"	},
++	{	zap_byteswap,		TRUE,	"SA attr layouts"	},
++	{	zap_byteswap,		TRUE,	"scan translations"	},
++	{	byteswap_uint8_array,	FALSE,	"deduplicated block"	},
++	{	zap_byteswap,		TRUE,	"DSL deadlist map"	},
++	{	byteswap_uint64_array,	TRUE,	"DSL deadlist map hdr"	},
++	{	zap_byteswap,		TRUE,	"DSL dir clones"	},
++	{	byteswap_uint64_array,	TRUE,	"bpobj subobj"		},
++};
++
++int
++dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
++    void *tag, dmu_buf_t **dbp, int flags)
++{
++	dnode_t *dn;
++	uint64_t blkid;
++	dmu_buf_impl_t *db;
++	int err;
++	int db_flags = DB_RF_CANFAIL;
++
++	if (flags & DMU_READ_NO_PREFETCH)
++		db_flags |= DB_RF_NOPREFETCH;
++
++	err = dnode_hold(os, object, FTAG, &dn);
++	if (err)
++		return (err);
++	blkid = dbuf_whichblock(dn, offset);
++	rw_enter(&dn->dn_struct_rwlock, RW_READER);
++	db = dbuf_hold(dn, blkid, tag);
++	rw_exit(&dn->dn_struct_rwlock);
++	if (db == NULL) {
++		err = EIO;
++	} else {
++		err = dbuf_read(db, NULL, db_flags);
++		if (err) {
++			dbuf_rele(db, tag);
++			db = NULL;
++		}
++	}
++
++	dnode_rele(dn, FTAG);
++	*dbp = &db->db; /* NULL db plus first field offset is NULL */
++	return (err);
++}
++
++int
++dmu_bonus_max(void)
++{
++	return (DN_MAX_BONUSLEN);
++}
++
++int
++dmu_set_bonus(dmu_buf_t *db_fake, int newsize, dmu_tx_t *tx)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
++	dnode_t *dn;
++	int error;
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++
++	if (dn->dn_bonus != db) {
++		error = EINVAL;
++	} else if (newsize < 0 || newsize > db_fake->db_size) {
++		error = EINVAL;
++	} else {
++		dnode_setbonuslen(dn, newsize, tx);
++		error = 0;
++	}
++
++	DB_DNODE_EXIT(db);
++	return (error);
++}
++
++int
++dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
++	dnode_t *dn;
++	int error;
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++
++	if (type > DMU_OT_NUMTYPES) {
++		error = EINVAL;
++	} else if (dn->dn_bonus != db) {
++		error = EINVAL;
++	} else {
++		dnode_setbonus_type(dn, type, tx);
++		error = 0;
++	}
++
++	DB_DNODE_EXIT(db);
++	return (error);
++}
++
++dmu_object_type_t
++dmu_get_bonustype(dmu_buf_t *db_fake)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
++	dnode_t *dn;
++	dmu_object_type_t type;
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	type = dn->dn_bonustype;
++	DB_DNODE_EXIT(db);
++
++	return (type);
++}
++
++int
++dmu_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
++{
++	dnode_t *dn;
++	int error;
++
++	error = dnode_hold(os, object, FTAG, &dn);
++	dbuf_rm_spill(dn, tx);
++	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
++	dnode_rm_spill(dn, tx);
++	rw_exit(&dn->dn_struct_rwlock);
++	dnode_rele(dn, FTAG);
++	return (error);
++}
++
++/*
++ * returns ENOENT, EIO, or 0.
++ */
++int
++dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp)
++{
++	dnode_t *dn;
++	dmu_buf_impl_t *db;
++	int error;
++
++	error = dnode_hold(os, object, FTAG, &dn);
++	if (error)
++		return (error);
++
++	rw_enter(&dn->dn_struct_rwlock, RW_READER);
++	if (dn->dn_bonus == NULL) {
++		rw_exit(&dn->dn_struct_rwlock);
++		rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
++		if (dn->dn_bonus == NULL)
++			dbuf_create_bonus(dn);
++	}
++	db = dn->dn_bonus;
++
++	/* as long as the bonus buf is held, the dnode will be held */
++	if (refcount_add(&db->db_holds, tag) == 1) {
++		VERIFY(dnode_add_ref(dn, db));
++		(void) atomic_inc_32_nv(&dn->dn_dbufs_count);
++	}
++
++	/*
++	 * Wait to drop dn_struct_rwlock until after adding the bonus dbuf's
++	 * hold and incrementing the dbuf count to ensure that dnode_move() sees
++	 * a dnode hold for every dbuf.
++	 */
++	rw_exit(&dn->dn_struct_rwlock);
++
++	dnode_rele(dn, FTAG);
++
++	VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH));
++
++	*dbp = &db->db;
++	return (0);
++}
++
++/*
++ * returns ENOENT, EIO, or 0.
++ *
++ * This interface will allocate a blank spill dbuf when a spill blk
++ * doesn't already exist on the dnode.
++ *
++ * if you only want to find an already existing spill db, then
++ * dmu_spill_hold_existing() should be used.
++ */
++int
++dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags, void *tag, dmu_buf_t **dbp)
++{
++	dmu_buf_impl_t *db = NULL;
++	int err;
++
++	if ((flags & DB_RF_HAVESTRUCT) == 0)
++		rw_enter(&dn->dn_struct_rwlock, RW_READER);
++
++	db = dbuf_hold(dn, DMU_SPILL_BLKID, tag);
++
++	if ((flags & DB_RF_HAVESTRUCT) == 0)
++		rw_exit(&dn->dn_struct_rwlock);
++
++	ASSERT(db != NULL);
++	err = dbuf_read(db, NULL, flags);
++	if (err == 0)
++		*dbp = &db->db;
++	else
++		dbuf_rele(db, tag);
++	return (err);
++}
++
++int
++dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)bonus;
++	dnode_t *dn;
++	int err;
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++
++	if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_SA) {
++		err = EINVAL;
++	} else {
++		rw_enter(&dn->dn_struct_rwlock, RW_READER);
++
++		if (!dn->dn_have_spill) {
++			err = ENOENT;
++		} else {
++			err = dmu_spill_hold_by_dnode(dn,
++			    DB_RF_HAVESTRUCT | DB_RF_CANFAIL, tag, dbp);
++		}
++
++		rw_exit(&dn->dn_struct_rwlock);
++	}
++
++	DB_DNODE_EXIT(db);
++	return (err);
++}
++
++int
++dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)bonus;
++	dnode_t *dn;
++	int err;
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	err = dmu_spill_hold_by_dnode(dn, DB_RF_CANFAIL, tag, dbp);
++	DB_DNODE_EXIT(db);
++
++	return (err);
++}
++
++/*
++ * Note: longer-term, we should modify all of the dmu_buf_*() interfaces
++ * to take a held dnode rather than <os, object> -- the lookup is wasteful,
++ * and can induce severe lock contention when writing to several files
++ * whose dnodes are in the same block.
++ */
++static int
++dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
++    int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
++{
++	dsl_pool_t *dp = NULL;
++	dmu_buf_t **dbp;
++	uint64_t blkid, nblks, i;
++	uint32_t dbuf_flags;
++	int err;
++	zio_t *zio;
++	hrtime_t start = 0;
++
++	ASSERT(length <= DMU_MAX_ACCESS);
++
++	dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT;
++	if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz)
++		dbuf_flags |= DB_RF_NOPREFETCH;
++
++	rw_enter(&dn->dn_struct_rwlock, RW_READER);
++	if (dn->dn_datablkshift) {
++		int blkshift = dn->dn_datablkshift;
++		nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) -
++		    P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift;
++	} else {
++		if (offset + length > dn->dn_datablksz) {
++			zfs_panic_recover("zfs: accessing past end of object "
++			    "%llx/%llx (size=%u access=%llu+%llu)",
++			    (longlong_t)dn->dn_objset->
++			    os_dsl_dataset->ds_object,
++			    (longlong_t)dn->dn_object, dn->dn_datablksz,
++			    (longlong_t)offset, (longlong_t)length);
++			rw_exit(&dn->dn_struct_rwlock);
++			return (EIO);
++		}
++		nblks = 1;
++	}
++	dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_PUSHPAGE | KM_NODEBUG);
++
++	if (dn->dn_objset->os_dsl_dataset)
++		dp = dn->dn_objset->os_dsl_dataset->ds_dir->dd_pool;
++	if (dp && dsl_pool_sync_context(dp))
++		start = gethrtime();
++	zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL);
++	blkid = dbuf_whichblock(dn, offset);
++	for (i = 0; i < nblks; i++) {
++		dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag);
++		if (db == NULL) {
++			rw_exit(&dn->dn_struct_rwlock);
++			dmu_buf_rele_array(dbp, nblks, tag);
++			zio_nowait(zio);
++			return (EIO);
++		}
++		/* initiate async i/o */
++		if (read) {
++			(void) dbuf_read(db, zio, dbuf_flags);
++		}
++		dbp[i] = &db->db;
++	}
++	rw_exit(&dn->dn_struct_rwlock);
++
++	/* wait for async i/o */
++	err = zio_wait(zio);
++	/* track read overhead when we are in sync context */
++	if (dp && dsl_pool_sync_context(dp))
++		dp->dp_read_overhead += gethrtime() - start;
++	if (err) {
++		dmu_buf_rele_array(dbp, nblks, tag);
++		return (err);
++	}
++
++	/* wait for other io to complete */
++	if (read) {
++		for (i = 0; i < nblks; i++) {
++			dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i];
++			mutex_enter(&db->db_mtx);
++			while (db->db_state == DB_READ ||
++			    db->db_state == DB_FILL)
++				cv_wait(&db->db_changed, &db->db_mtx);
++			if (db->db_state == DB_UNCACHED)
++				err = EIO;
++			mutex_exit(&db->db_mtx);
++			if (err) {
++				dmu_buf_rele_array(dbp, nblks, tag);
++				return (err);
++			}
++		}
++	}
++
++	*numbufsp = nblks;
++	*dbpp = dbp;
++	return (0);
++}
++
++static int
++dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
++    uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
++{
++	dnode_t *dn;
++	int err;
++
++	err = dnode_hold(os, object, FTAG, &dn);
++	if (err)
++		return (err);
++
++	err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
++	    numbufsp, dbpp, DMU_READ_PREFETCH);
++
++	dnode_rele(dn, FTAG);
++
++	return (err);
++}
++
++int
++dmu_buf_hold_array_by_bonus(dmu_buf_t *db_fake, uint64_t offset,
++    uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
++	dnode_t *dn;
++	int err;
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
++	    numbufsp, dbpp, DMU_READ_PREFETCH);
++	DB_DNODE_EXIT(db);
++
++	return (err);
++}
++
++void
++dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag)
++{
++	int i;
++	dmu_buf_impl_t **dbp = (dmu_buf_impl_t **)dbp_fake;
++
++	if (numbufs == 0)
++		return;
++
++	for (i = 0; i < numbufs; i++) {
++		if (dbp[i])
++			dbuf_rele(dbp[i], tag);
++	}
++
++	kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs);
++}
++
++void
++dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
++{
++	dnode_t *dn;
++	uint64_t blkid;
++	int nblks, i, err;
++
++	if (zfs_prefetch_disable)
++		return;
++
++	if (len == 0) {  /* they're interested in the bonus buffer */
++		dn = DMU_META_DNODE(os);
++
++		if (object == 0 || object >= DN_MAX_OBJECT)
++			return;
++
++		rw_enter(&dn->dn_struct_rwlock, RW_READER);
++		blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t));
++		dbuf_prefetch(dn, blkid);
++		rw_exit(&dn->dn_struct_rwlock);
++		return;
++	}
++
++	/*
++	 * XXX - Note, if the dnode for the requested object is not
++	 * already cached, we will do a *synchronous* read in the
++	 * dnode_hold() call.  The same is true for any indirects.
++	 */
++	err = dnode_hold(os, object, FTAG, &dn);
++	if (err != 0)
++		return;
++
++	rw_enter(&dn->dn_struct_rwlock, RW_READER);
++	if (dn->dn_datablkshift) {
++		int blkshift = dn->dn_datablkshift;
++		nblks = (P2ROUNDUP(offset+len, 1<<blkshift) -
++		    P2ALIGN(offset, 1<<blkshift)) >> blkshift;
++	} else {
++		nblks = (offset < dn->dn_datablksz);
++	}
++
++	if (nblks != 0) {
++		blkid = dbuf_whichblock(dn, offset);
++		for (i = 0; i < nblks; i++)
++			dbuf_prefetch(dn, blkid+i);
++	}
++
++	rw_exit(&dn->dn_struct_rwlock);
++
++	dnode_rele(dn, FTAG);
++}
++
++/*
++ * Get the next "chunk" of file data to free.  We traverse the file from
++ * the end so that the file gets shorter over time (if we crashes in the
++ * middle, this will leave us in a better state).  We find allocated file
++ * data by simply searching the allocated level 1 indirects.
++ */
++static int
++get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t limit)
++{
++	uint64_t len = *start - limit;
++	uint64_t blkcnt = 0;
++	uint64_t maxblks = DMU_MAX_ACCESS / (1ULL << (dn->dn_indblkshift + 1));
++	uint64_t iblkrange =
++	    dn->dn_datablksz * EPB(dn->dn_indblkshift, SPA_BLKPTRSHIFT);
++
++	ASSERT(limit <= *start);
++
++	if (len <= iblkrange * maxblks) {
++		*start = limit;
++		return (0);
++	}
++	ASSERT(ISP2(iblkrange));
++
++	while (*start > limit && blkcnt < maxblks) {
++		int err;
++
++		/* find next allocated L1 indirect */
++		err = dnode_next_offset(dn,
++		    DNODE_FIND_BACKWARDS, start, 2, 1, 0);
++
++		/* if there are no more, then we are done */
++		if (err == ESRCH) {
++			*start = limit;
++			return (0);
++		} else if (err) {
++			return (err);
++		}
++		blkcnt += 1;
++
++		/* reset offset to end of "next" block back */
++		*start = P2ALIGN(*start, iblkrange);
++		if (*start <= limit)
++			*start = limit;
++		else
++			*start -= 1;
++	}
++	return (0);
++}
++
++static int
++dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
++    uint64_t length, boolean_t free_dnode)
++{
++	dmu_tx_t *tx;
++	uint64_t object_size, start, end, len;
++	boolean_t trunc = (length == DMU_OBJECT_END);
++	int align, err;
++
++	align = 1 << dn->dn_datablkshift;
++	ASSERT(align > 0);
++	object_size = align == 1 ? dn->dn_datablksz :
++	    (dn->dn_maxblkid + 1) << dn->dn_datablkshift;
++
++	end = offset + length;
++	if (trunc || end > object_size)
++		end = object_size;
++	if (end <= offset)
++		return (0);
++	length = end - offset;
++
++	while (length) {
++		start = end;
++		/* assert(offset <= start) */
++		err = get_next_chunk(dn, &start, offset);
++		if (err)
++			return (err);
++		len = trunc ? DMU_OBJECT_END : end - start;
++
++		tx = dmu_tx_create(os);
++		dmu_tx_hold_free(tx, dn->dn_object, start, len);
++		err = dmu_tx_assign(tx, TXG_WAIT);
++		if (err) {
++			dmu_tx_abort(tx);
++			return (err);
++		}
++
++		dnode_free_range(dn, start, trunc ? -1 : len, tx);
++
++		if (start == 0 && free_dnode) {
++			ASSERT(trunc);
++			dnode_free(dn, tx);
++		}
++
++		length -= end - start;
++
++		dmu_tx_commit(tx);
++		end = start;
++	}
++	return (0);
++}
++
++int
++dmu_free_long_range(objset_t *os, uint64_t object,
++    uint64_t offset, uint64_t length)
++{
++	dnode_t *dn;
++	int err;
++
++	err = dnode_hold(os, object, FTAG, &dn);
++	if (err != 0)
++		return (err);
++	err = dmu_free_long_range_impl(os, dn, offset, length, FALSE);
++	dnode_rele(dn, FTAG);
++	return (err);
++}
++
++int
++dmu_free_object(objset_t *os, uint64_t object)
++{
++	dnode_t *dn;
++	dmu_tx_t *tx;
++	int err;
++
++	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
++	    FTAG, &dn);
++	if (err != 0)
++		return (err);
++	if (dn->dn_nlevels == 1) {
++		tx = dmu_tx_create(os);
++		dmu_tx_hold_bonus(tx, object);
++		dmu_tx_hold_free(tx, dn->dn_object, 0, DMU_OBJECT_END);
++		err = dmu_tx_assign(tx, TXG_WAIT);
++		if (err == 0) {
++			dnode_free_range(dn, 0, DMU_OBJECT_END, tx);
++			dnode_free(dn, tx);
++			dmu_tx_commit(tx);
++		} else {
++			dmu_tx_abort(tx);
++		}
++	} else {
++		err = dmu_free_long_range_impl(os, dn, 0, DMU_OBJECT_END, TRUE);
++	}
++	dnode_rele(dn, FTAG);
++	return (err);
++}
++
++int
++dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
++    uint64_t size, dmu_tx_t *tx)
++{
++	dnode_t *dn;
++	int err = dnode_hold(os, object, FTAG, &dn);
++	if (err)
++		return (err);
++	ASSERT(offset < UINT64_MAX);
++	ASSERT(size == -1ULL || size <= UINT64_MAX - offset);
++	dnode_free_range(dn, offset, size, tx);
++	dnode_rele(dn, FTAG);
++	return (0);
++}
++
++int
++dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
++    void *buf, uint32_t flags)
++{
++	dnode_t *dn;
++	dmu_buf_t **dbp;
++	int numbufs, err;
++
++	err = dnode_hold(os, object, FTAG, &dn);
++	if (err)
++		return (err);
++
++	/*
++	 * Deal with odd block sizes, where there can't be data past the first
++	 * block.  If we ever do the tail block optimization, we will need to
++	 * handle that here as well.
++	 */
++	if (dn->dn_maxblkid == 0) {
++		int newsz = offset > dn->dn_datablksz ? 0 :
++		    MIN(size, dn->dn_datablksz - offset);
++		bzero((char *)buf + newsz, size - newsz);
++		size = newsz;
++	}
++
++	while (size > 0) {
++		uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2);
++		int i;
++
++		/*
++		 * NB: we could do this block-at-a-time, but it's nice
++		 * to be reading in parallel.
++		 */
++		err = dmu_buf_hold_array_by_dnode(dn, offset, mylen,
++		    TRUE, FTAG, &numbufs, &dbp, flags);
++		if (err)
++			break;
++
++		for (i = 0; i < numbufs; i++) {
++			int tocpy;
++			int bufoff;
++			dmu_buf_t *db = dbp[i];
++
++			ASSERT(size > 0);
++
++			bufoff = offset - db->db_offset;
++			tocpy = (int)MIN(db->db_size - bufoff, size);
++
++			bcopy((char *)db->db_data + bufoff, buf, tocpy);
++
++			offset += tocpy;
++			size -= tocpy;
++			buf = (char *)buf + tocpy;
++		}
++		dmu_buf_rele_array(dbp, numbufs, FTAG);
++	}
++	dnode_rele(dn, FTAG);
++	return (err);
++}
++
++void
++dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
++    const void *buf, dmu_tx_t *tx)
++{
++	dmu_buf_t **dbp;
++	int numbufs, i;
++
++	if (size == 0)
++		return;
++
++	VERIFY(0 == dmu_buf_hold_array(os, object, offset, size,
++	    FALSE, FTAG, &numbufs, &dbp));
++
++	for (i = 0; i < numbufs; i++) {
++		int tocpy;
++		int bufoff;
++		dmu_buf_t *db = dbp[i];
++
++		ASSERT(size > 0);
++
++		bufoff = offset - db->db_offset;
++		tocpy = (int)MIN(db->db_size - bufoff, size);
++
++		ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
++
++		if (tocpy == db->db_size)
++			dmu_buf_will_fill(db, tx);
++		else
++			dmu_buf_will_dirty(db, tx);
++
++		(void) memcpy((char *)db->db_data + bufoff, buf, tocpy);
++
++		if (tocpy == db->db_size)
++			dmu_buf_fill_done(db, tx);
++
++		offset += tocpy;
++		size -= tocpy;
++		buf = (char *)buf + tocpy;
++	}
++	dmu_buf_rele_array(dbp, numbufs, FTAG);
++}
++
++void
++dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
++    dmu_tx_t *tx)
++{
++	dmu_buf_t **dbp;
++	int numbufs, i;
++
++	if (size == 0)
++		return;
++
++	VERIFY(0 == dmu_buf_hold_array(os, object, offset, size,
++	    FALSE, FTAG, &numbufs, &dbp));
++
++	for (i = 0; i < numbufs; i++) {
++		dmu_buf_t *db = dbp[i];
++
++		dmu_buf_will_not_fill(db, tx);
++	}
++	dmu_buf_rele_array(dbp, numbufs, FTAG);
++}
++
++/*
++ * DMU support for xuio
++ */
++kstat_t *xuio_ksp = NULL;
++
++typedef struct xuio_stats {
++	/* loaned yet not returned arc_buf */
++	kstat_named_t xuiostat_onloan_rbuf;
++	kstat_named_t xuiostat_onloan_wbuf;
++	/* whether a copy is made when loaning out a read buffer */
++	kstat_named_t xuiostat_rbuf_copied;
++	kstat_named_t xuiostat_rbuf_nocopy;
++	/* whether a copy is made when assigning a write buffer */
++	kstat_named_t xuiostat_wbuf_copied;
++	kstat_named_t xuiostat_wbuf_nocopy;
++} xuio_stats_t;
++
++static xuio_stats_t xuio_stats = {
++	{ "onloan_read_buf",	KSTAT_DATA_UINT64 },
++	{ "onloan_write_buf",	KSTAT_DATA_UINT64 },
++	{ "read_buf_copied",	KSTAT_DATA_UINT64 },
++	{ "read_buf_nocopy",	KSTAT_DATA_UINT64 },
++	{ "write_buf_copied",	KSTAT_DATA_UINT64 },
++	{ "write_buf_nocopy",	KSTAT_DATA_UINT64 }
++};
++
++#define XUIOSTAT_INCR(stat, val)        \
++        atomic_add_64(&xuio_stats.stat.value.ui64, (val))
++#define XUIOSTAT_BUMP(stat)     XUIOSTAT_INCR(stat, 1)
++
++int
++dmu_xuio_init(xuio_t *xuio, int nblk)
++{
++	dmu_xuio_t *priv;
++	uio_t *uio = &xuio->xu_uio;
++
++	uio->uio_iovcnt = nblk;
++	uio->uio_iov = kmem_zalloc(nblk * sizeof (iovec_t), KM_PUSHPAGE);
++
++	priv = kmem_zalloc(sizeof (dmu_xuio_t), KM_PUSHPAGE);
++	priv->cnt = nblk;
++	priv->bufs = kmem_zalloc(nblk * sizeof (arc_buf_t *), KM_PUSHPAGE);
++	priv->iovp = uio->uio_iov;
++	XUIO_XUZC_PRIV(xuio) = priv;
++
++	if (XUIO_XUZC_RW(xuio) == UIO_READ)
++		XUIOSTAT_INCR(xuiostat_onloan_rbuf, nblk);
++	else
++		XUIOSTAT_INCR(xuiostat_onloan_wbuf, nblk);
++
++	return (0);
++}
++
++void
++dmu_xuio_fini(xuio_t *xuio)
++{
++	dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
++	int nblk = priv->cnt;
++
++	kmem_free(priv->iovp, nblk * sizeof (iovec_t));
++	kmem_free(priv->bufs, nblk * sizeof (arc_buf_t *));
++	kmem_free(priv, sizeof (dmu_xuio_t));
++
++	if (XUIO_XUZC_RW(xuio) == UIO_READ)
++		XUIOSTAT_INCR(xuiostat_onloan_rbuf, -nblk);
++	else
++		XUIOSTAT_INCR(xuiostat_onloan_wbuf, -nblk);
++}
++
++/*
++ * Initialize iov[priv->next] and priv->bufs[priv->next] with { off, n, abuf }
++ * and increase priv->next by 1.
++ */
++int
++dmu_xuio_add(xuio_t *xuio, arc_buf_t *abuf, offset_t off, size_t n)
++{
++	struct iovec *iov;
++	uio_t *uio = &xuio->xu_uio;
++	dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
++	int i = priv->next++;
++
++	ASSERT(i < priv->cnt);
++	ASSERT(off + n <= arc_buf_size(abuf));
++	iov = uio->uio_iov + i;
++	iov->iov_base = (char *)abuf->b_data + off;
++	iov->iov_len = n;
++	priv->bufs[i] = abuf;
++	return (0);
++}
++
++int
++dmu_xuio_cnt(xuio_t *xuio)
++{
++	dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
++	return (priv->cnt);
++}
++
++arc_buf_t *
++dmu_xuio_arcbuf(xuio_t *xuio, int i)
++{
++	dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
++
++	ASSERT(i < priv->cnt);
++	return (priv->bufs[i]);
++}
++
++void
++dmu_xuio_clear(xuio_t *xuio, int i)
++{
++	dmu_xuio_t *priv = XUIO_XUZC_PRIV(xuio);
++
++	ASSERT(i < priv->cnt);
++	priv->bufs[i] = NULL;
++}
++
++static void
++xuio_stat_init(void)
++{
++	xuio_ksp = kstat_create("zfs", 0, "xuio_stats", "misc",
++	    KSTAT_TYPE_NAMED, sizeof (xuio_stats) / sizeof (kstat_named_t),
++	    KSTAT_FLAG_VIRTUAL);
++	if (xuio_ksp != NULL) {
++		xuio_ksp->ks_data = &xuio_stats;
++		kstat_install(xuio_ksp);
++	}
++}
++
++static void
++xuio_stat_fini(void)
++{
++	if (xuio_ksp != NULL) {
++		kstat_delete(xuio_ksp);
++		xuio_ksp = NULL;
++	}
++}
++
++void
++xuio_stat_wbuf_copied()
++{
++	XUIOSTAT_BUMP(xuiostat_wbuf_copied);
++}
++
++void
++xuio_stat_wbuf_nocopy()
++{
++	XUIOSTAT_BUMP(xuiostat_wbuf_nocopy);
++}
++
++#ifdef _KERNEL
++
++/*
++ * Copy up to size bytes between arg_buf and req based on the data direction
++ * described by the req.  If an entire req's data cannot be transfered the
++ * req's is updated such that it's current index and bv offsets correctly
++ * reference any residual data which could not be copied.  The return value
++ * is the number of bytes successfully copied to arg_buf.
++ */
++static int
++dmu_req_copy(void *arg_buf, int size, int *offset, struct request *req)
++{
++	struct bio_vec *bv;
++	struct req_iterator iter;
++	char *bv_buf;
++	int tocpy;
++
++	*offset = 0;
++	rq_for_each_segment(bv, req, iter) {
++
++		/* Fully consumed the passed arg_buf */
++		ASSERT3S(*offset, <=, size);
++		if (size == *offset)
++			break;
++
++		/* Skip fully consumed bv's */
++		if (bv->bv_len == 0)
++			continue;
++
++		tocpy = MIN(bv->bv_len, size - *offset);
++		ASSERT3S(tocpy, >=, 0);
++
++		bv_buf = page_address(bv->bv_page) + bv->bv_offset;
++		ASSERT3P(bv_buf, !=, NULL);
++
++		if (rq_data_dir(req) == WRITE)
++			memcpy(arg_buf + *offset, bv_buf, tocpy);
++		else
++			memcpy(bv_buf, arg_buf + *offset, tocpy);
++
++		*offset += tocpy;
++		bv->bv_offset += tocpy;
++		bv->bv_len -= tocpy;
++	}
++
++	return 0;
++}
++
++static void
++dmu_bio_put(struct bio *bio)
++{
++	struct bio *bio_next;
++
++	while (bio) {
++		bio_next = bio->bi_next;
++		bio_put(bio);
++		bio = bio_next;
++	}
++}
++
++static int
++dmu_bio_clone(struct bio *bio, struct bio **bio_copy)
++{
++	struct bio *bio_root = NULL;
++	struct bio *bio_last = NULL;
++	struct bio *bio_new;
++
++	if (bio == NULL)
++		return EINVAL;
++
++	while (bio) {
++		bio_new = bio_clone(bio, GFP_NOIO);
++		if (bio_new == NULL) {
++			dmu_bio_put(bio_root);
++			return ENOMEM;
++		}
++
++		if (bio_last) {
++			bio_last->bi_next = bio_new;
++			bio_last = bio_new;
++		} else {
++			bio_root = bio_new;
++			bio_last = bio_new;
++		}
++
++		bio = bio->bi_next;
++	}
++
++	*bio_copy = bio_root;
++
++	return 0;
++}
++
++int
++dmu_read_req(objset_t *os, uint64_t object, struct request *req)
++{
++	uint64_t size = blk_rq_bytes(req);
++	uint64_t offset = blk_rq_pos(req) << 9;
++	struct bio *bio_saved = req->bio;
++	dmu_buf_t **dbp;
++	int numbufs, i, err;
++
++	/*
++	 * NB: we could do this block-at-a-time, but it's nice
++	 * to be reading in parallel.
++	 */
++	err = dmu_buf_hold_array(os, object, offset, size, TRUE, FTAG,
++				 &numbufs, &dbp);
++	if (err)
++		return (err);
++
++	/*
++	 * Clone the bio list so the bv->bv_offset and bv->bv_len members
++	 * can be safely modified.  The original bio list is relinked in to
++	 * the request when the function exits.  This is required because
++	 * some file systems blindly assume that these values will remain
++	 * constant between bio_submit() and the IO completion callback.
++	 */
++	err = dmu_bio_clone(bio_saved, &req->bio);
++	if (err)
++		goto error;
++
++	for (i = 0; i < numbufs; i++) {
++		int tocpy, didcpy, bufoff;
++		dmu_buf_t *db = dbp[i];
++
++		bufoff = offset - db->db_offset;
++		ASSERT3S(bufoff, >=, 0);
++
++		tocpy = (int)MIN(db->db_size - bufoff, size);
++		if (tocpy == 0)
++			break;
++
++		err = dmu_req_copy(db->db_data + bufoff, tocpy, &didcpy, req);
++
++		if (didcpy < tocpy)
++			err = EIO;
++
++		if (err)
++			break;
++
++		size -= tocpy;
++		offset += didcpy;
++		err = 0;
++	}
++
++	dmu_bio_put(req->bio);
++	req->bio = bio_saved;
++error:
++	dmu_buf_rele_array(dbp, numbufs, FTAG);
++
++	return (err);
++}
++
++int
++dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
++{
++	uint64_t size = blk_rq_bytes(req);
++	uint64_t offset = blk_rq_pos(req) << 9;
++	struct bio *bio_saved = req->bio;
++	dmu_buf_t **dbp;
++	int numbufs;
++	int err = 0;
++	int i;
++
++	if (size == 0)
++		return (0);
++
++	err = dmu_buf_hold_array(os, object, offset, size, FALSE, FTAG,
++				 &numbufs, &dbp);
++	if (err)
++		return (err);
++
++	/*
++	 * Clone the bio list so the bv->bv_offset and bv->bv_len members
++	 * can be safely modified.  The original bio list is relinked in to
++	 * the request when the function exits.  This is required because
++	 * some file systems blindly assume that these values will remain
++	 * constant between bio_submit() and the IO completion callback.
++	 */
++	err = dmu_bio_clone(bio_saved, &req->bio);
++	if (err)
++		goto error;
++
++	for (i = 0; i < numbufs; i++) {
++		int tocpy, didcpy, bufoff;
++		dmu_buf_t *db = dbp[i];
++
++		bufoff = offset - db->db_offset;
++		ASSERT3S(bufoff, >=, 0);
++
++		tocpy = (int)MIN(db->db_size - bufoff, size);
++		if (tocpy == 0)
++			break;
++
++		ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
++
++		if (tocpy == db->db_size)
++			dmu_buf_will_fill(db, tx);
++		else
++			dmu_buf_will_dirty(db, tx);
++
++		err = dmu_req_copy(db->db_data + bufoff, tocpy, &didcpy, req);
++
++		if (tocpy == db->db_size)
++			dmu_buf_fill_done(db, tx);
++
++		if (didcpy < tocpy)
++			err = EIO;
++
++		if (err)
++			break;
++
++		size -= tocpy;
++		offset += didcpy;
++		err = 0;
++	}
++
++	dmu_bio_put(req->bio);
++	req->bio = bio_saved;
++error:
++	dmu_buf_rele_array(dbp, numbufs, FTAG);
++
++	return (err);
++}
++
++int
++dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
++{
++	dmu_buf_t **dbp;
++	int numbufs, i, err;
++	xuio_t *xuio = NULL;
++
++	/*
++	 * NB: we could do this block-at-a-time, but it's nice
++	 * to be reading in parallel.
++	 */
++	err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG,
++	    &numbufs, &dbp);
++	if (err)
++		return (err);
++
++	for (i = 0; i < numbufs; i++) {
++		int tocpy;
++		int bufoff;
++		dmu_buf_t *db = dbp[i];
++
++		ASSERT(size > 0);
++
++		bufoff = uio->uio_loffset - db->db_offset;
++		tocpy = (int)MIN(db->db_size - bufoff, size);
++
++		if (xuio) {
++			dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
++			arc_buf_t *dbuf_abuf = dbi->db_buf;
++			arc_buf_t *abuf = dbuf_loan_arcbuf(dbi);
++			err = dmu_xuio_add(xuio, abuf, bufoff, tocpy);
++			if (!err) {
++				uio->uio_resid -= tocpy;
++				uio->uio_loffset += tocpy;
++			}
++
++			if (abuf == dbuf_abuf)
++				XUIOSTAT_BUMP(xuiostat_rbuf_nocopy);
++			else
++				XUIOSTAT_BUMP(xuiostat_rbuf_copied);
++		} else {
++			err = uiomove((char *)db->db_data + bufoff, tocpy,
++			    UIO_READ, uio);
++		}
++		if (err)
++			break;
++
++		size -= tocpy;
++	}
++	dmu_buf_rele_array(dbp, numbufs, FTAG);
++
++	return (err);
++}
++
++static int
++dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
++{
++	dmu_buf_t **dbp;
++	int numbufs;
++	int err = 0;
++	int i;
++
++	err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size,
++	    FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH);
++	if (err)
++		return (err);
++
++	for (i = 0; i < numbufs; i++) {
++		int tocpy;
++		int bufoff;
++		dmu_buf_t *db = dbp[i];
++
++		ASSERT(size > 0);
++
++		bufoff = uio->uio_loffset - db->db_offset;
++		tocpy = (int)MIN(db->db_size - bufoff, size);
++
++		ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
++
++		if (tocpy == db->db_size)
++			dmu_buf_will_fill(db, tx);
++		else
++			dmu_buf_will_dirty(db, tx);
++
++		/*
++		 * XXX uiomove could block forever (eg.nfs-backed
++		 * pages).  There needs to be a uiolockdown() function
++		 * to lock the pages in memory, so that uiomove won't
++		 * block.
++		 */
++		err = uiomove((char *)db->db_data + bufoff, tocpy,
++		    UIO_WRITE, uio);
++
++		if (tocpy == db->db_size)
++			dmu_buf_fill_done(db, tx);
++
++		if (err)
++			break;
++
++		size -= tocpy;
++	}
++
++	dmu_buf_rele_array(dbp, numbufs, FTAG);
++	return (err);
++}
++
++int
++dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size,
++    dmu_tx_t *tx)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb;
++	dnode_t *dn;
++	int err;
++
++	if (size == 0)
++		return (0);
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	err = dmu_write_uio_dnode(dn, uio, size, tx);
++	DB_DNODE_EXIT(db);
++
++	return (err);
++}
++
++int
++dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size,
++    dmu_tx_t *tx)
++{
++	dnode_t *dn;
++	int err;
++
++	if (size == 0)
++		return (0);
++
++	err = dnode_hold(os, object, FTAG, &dn);
++	if (err)
++		return (err);
++
++	err = dmu_write_uio_dnode(dn, uio, size, tx);
++
++	dnode_rele(dn, FTAG);
++
++	return (err);
++}
++#endif /* _KERNEL */
++
++/*
++ * Allocate a loaned anonymous arc buffer.
++ */
++arc_buf_t *
++dmu_request_arcbuf(dmu_buf_t *handle, int size)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
++	spa_t *spa;
++
++	DB_GET_SPA(&spa, db);
++	return (arc_loan_buf(spa, size));
++}
++
++/*
++ * Free a loaned arc buffer.
++ */
++void
++dmu_return_arcbuf(arc_buf_t *buf)
++{
++	arc_return_buf(buf, FTAG);
++	VERIFY(arc_buf_remove_ref(buf, FTAG) == 1);
++}
++
++/*
++ * When possible directly assign passed loaned arc buffer to a dbuf.
++ * If this is not possible copy the contents of passed arc buf via
++ * dmu_write().
++ */
++void
++dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
++    dmu_tx_t *tx)
++{
++	dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
++	dnode_t *dn;
++	dmu_buf_impl_t *db;
++	uint32_t blksz = (uint32_t)arc_buf_size(buf);
++	uint64_t blkid;
++
++	DB_DNODE_ENTER(dbuf);
++	dn = DB_DNODE(dbuf);
++	rw_enter(&dn->dn_struct_rwlock, RW_READER);
++	blkid = dbuf_whichblock(dn, offset);
++	VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
++	rw_exit(&dn->dn_struct_rwlock);
++	DB_DNODE_EXIT(dbuf);
++
++	if (offset == db->db.db_offset && blksz == db->db.db_size) {
++		dbuf_assign_arcbuf(db, buf, tx);
++		dbuf_rele(db, FTAG);
++	} else {
++		objset_t *os;
++		uint64_t object;
++
++		DB_DNODE_ENTER(dbuf);
++		dn = DB_DNODE(dbuf);
++		os = dn->dn_objset;
++		object = dn->dn_object;
++		DB_DNODE_EXIT(dbuf);
++
++		dbuf_rele(db, FTAG);
++		dmu_write(os, object, offset, blksz, buf->b_data, tx);
++		dmu_return_arcbuf(buf);
++		XUIOSTAT_BUMP(xuiostat_wbuf_copied);
++	}
++}
++
++typedef struct {
++	dbuf_dirty_record_t	*dsa_dr;
++	dmu_sync_cb_t		*dsa_done;
++	zgd_t			*dsa_zgd;
++	dmu_tx_t		*dsa_tx;
++} dmu_sync_arg_t;
++
++/* ARGSUSED */
++static void
++dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg)
++{
++	dmu_sync_arg_t *dsa = varg;
++	dmu_buf_t *db = dsa->dsa_zgd->zgd_db;
++	blkptr_t *bp = zio->io_bp;
++
++	if (zio->io_error == 0) {
++		if (BP_IS_HOLE(bp)) {
++			/*
++			 * A block of zeros may compress to a hole, but the
++			 * block size still needs to be known for replay.
++			 */
++			BP_SET_LSIZE(bp, db->db_size);
++		} else {
++			ASSERT(BP_GET_LEVEL(bp) == 0);
++			bp->blk_fill = 1;
++		}
++	}
++}
++
++static void
++dmu_sync_late_arrival_ready(zio_t *zio)
++{
++	dmu_sync_ready(zio, NULL, zio->io_private);
++}
++
++/* ARGSUSED */
++static void
++dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
++{
++	dmu_sync_arg_t *dsa = varg;
++	dbuf_dirty_record_t *dr = dsa->dsa_dr;
++	dmu_buf_impl_t *db = dr->dr_dbuf;
++
++	mutex_enter(&db->db_mtx);
++	ASSERT(dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC);
++	if (zio->io_error == 0) {
++		dr->dt.dl.dr_overridden_by = *zio->io_bp;
++		dr->dt.dl.dr_override_state = DR_OVERRIDDEN;
++		dr->dt.dl.dr_copies = zio->io_prop.zp_copies;
++		if (BP_IS_HOLE(&dr->dt.dl.dr_overridden_by))
++			BP_ZERO(&dr->dt.dl.dr_overridden_by);
++	} else {
++		dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
++	}
++	cv_broadcast(&db->db_changed);
++	mutex_exit(&db->db_mtx);
++
++	dsa->dsa_done(dsa->dsa_zgd, zio->io_error);
++
++	kmem_free(dsa, sizeof (*dsa));
++}
++
++static void
++dmu_sync_late_arrival_done(zio_t *zio)
++{
++	blkptr_t *bp = zio->io_bp;
++	dmu_sync_arg_t *dsa = zio->io_private;
++
++	if (zio->io_error == 0 && !BP_IS_HOLE(bp)) {
++		ASSERT(zio->io_bp->blk_birth == zio->io_txg);
++		ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa));
++		zio_free(zio->io_spa, zio->io_txg, zio->io_bp);
++	}
++
++	dmu_tx_commit(dsa->dsa_tx);
++
++	dsa->dsa_done(dsa->dsa_zgd, zio->io_error);
++
++	kmem_free(dsa, sizeof (*dsa));
++}
++
++static int
++dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
++    zio_prop_t *zp, zbookmark_t *zb)
++{
++	dmu_sync_arg_t *dsa;
++	dmu_tx_t *tx;
++
++	tx = dmu_tx_create(os);
++	dmu_tx_hold_space(tx, zgd->zgd_db->db_size);
++	if (dmu_tx_assign(tx, TXG_WAIT) != 0) {
++		dmu_tx_abort(tx);
++		return (EIO);	/* Make zl_get_data do txg_waited_synced() */
++	}
++
++	dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_PUSHPAGE);
++	dsa->dsa_dr = NULL;
++	dsa->dsa_done = done;
++	dsa->dsa_zgd = zgd;
++	dsa->dsa_tx = tx;
++
++	zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
++	    zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp,
++	    dmu_sync_late_arrival_ready, dmu_sync_late_arrival_done, dsa,
++	    ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, zb));
++
++	return (0);
++}
++
++/*
++ * Intent log support: sync the block associated with db to disk.
++ * N.B. and XXX: the caller is responsible for making sure that the
++ * data isn't changing while dmu_sync() is writing it.
++ *
++ * Return values:
++ *
++ *	EEXIST: this txg has already been synced, so there's nothing to to.
++ *		The caller should not log the write.
++ *
++ *	ENOENT: the block was dbuf_free_range()'d, so there's nothing to do.
++ *		The caller should not log the write.
++ *
++ *	EALREADY: this block is already in the process of being synced.
++ *		The caller should track its progress (somehow).
++ *
++ *	EIO: could not do the I/O.
++ *		The caller should do a txg_wait_synced().
++ *
++ *	0: the I/O has been initiated.
++ *		The caller should log this blkptr in the done callback.
++ *		It is possible that the I/O will fail, in which case
++ *		the error will be reported to the done callback and
++ *		propagated to pio from zio_done().
++ */
++int
++dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
++{
++	blkptr_t *bp = zgd->zgd_bp;
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)zgd->zgd_db;
++	objset_t *os = db->db_objset;
++	dsl_dataset_t *ds = os->os_dsl_dataset;
++	dbuf_dirty_record_t *dr;
++	dmu_sync_arg_t *dsa;
++	zbookmark_t zb;
++	zio_prop_t zp;
++	dnode_t *dn;
++
++	ASSERT(pio != NULL);
++	ASSERT(BP_IS_HOLE(bp));
++	ASSERT(txg != 0);
++
++	SET_BOOKMARK(&zb, ds->ds_object,
++	    db->db.db_object, db->db_level, db->db_blkid);
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	dmu_write_policy(os, dn, db->db_level, WP_DMU_SYNC, &zp);
++	DB_DNODE_EXIT(db);
++
++	/*
++	 * If we're frozen (running ziltest), we always need to generate a bp.
++	 */
++	if (txg > spa_freeze_txg(os->os_spa))
++		return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb));
++
++	/*
++	 * Grabbing db_mtx now provides a barrier between dbuf_sync_leaf()
++	 * and us.  If we determine that this txg is not yet syncing,
++	 * but it begins to sync a moment later, that's OK because the
++	 * sync thread will block in dbuf_sync_leaf() until we drop db_mtx.
++	 */
++	mutex_enter(&db->db_mtx);
++
++	if (txg <= spa_last_synced_txg(os->os_spa)) {
++		/*
++		 * This txg has already synced.  There's nothing to do.
++		 */
++		mutex_exit(&db->db_mtx);
++		return (EEXIST);
++	}
++
++	if (txg <= spa_syncing_txg(os->os_spa)) {
++		/*
++		 * This txg is currently syncing, so we can't mess with
++		 * the dirty record anymore; just write a new log block.
++		 */
++		mutex_exit(&db->db_mtx);
++		return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb));
++	}
++
++	dr = db->db_last_dirty;
++	while (dr && dr->dr_txg != txg)
++		dr = dr->dr_next;
++
++	if (dr == NULL) {
++		/*
++		 * There's no dr for this dbuf, so it must have been freed.
++		 * There's no need to log writes to freed blocks, so we're done.
++		 */
++		mutex_exit(&db->db_mtx);
++		return (ENOENT);
++	}
++
++	ASSERT(dr->dr_txg == txg);
++	if (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC ||
++	    dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
++		/*
++		 * We have already issued a sync write for this buffer,
++		 * or this buffer has already been synced.  It could not
++		 * have been dirtied since, or we would have cleared the state.
++		 */
++		mutex_exit(&db->db_mtx);
++		return (EALREADY);
++	}
++
++	ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN);
++	dr->dt.dl.dr_override_state = DR_IN_DMU_SYNC;
++	mutex_exit(&db->db_mtx);
++
++	dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_PUSHPAGE);
++	dsa->dsa_dr = dr;
++	dsa->dsa_done = done;
++	dsa->dsa_zgd = zgd;
++	dsa->dsa_tx = NULL;
++
++	zio_nowait(arc_write(pio, os->os_spa, txg,
++	    bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db), &zp,
++	    dmu_sync_ready, dmu_sync_done, dsa,
++	    ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb));
++
++	return (0);
++}
++
++int
++dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs,
++	dmu_tx_t *tx)
++{
++	dnode_t *dn;
++	int err;
++
++	err = dnode_hold(os, object, FTAG, &dn);
++	if (err)
++		return (err);
++	err = dnode_set_blksz(dn, size, ibs, tx);
++	dnode_rele(dn, FTAG);
++	return (err);
++}
++
++void
++dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
++	dmu_tx_t *tx)
++{
++	dnode_t *dn;
++
++	/* XXX assumes dnode_hold will not get an i/o error */
++	(void) dnode_hold(os, object, FTAG, &dn);
++	ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS);
++	dn->dn_checksum = checksum;
++	dnode_setdirty(dn, tx);
++	dnode_rele(dn, FTAG);
++}
++
++void
++dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
++	dmu_tx_t *tx)
++{
++	dnode_t *dn;
++
++	/* XXX assumes dnode_hold will not get an i/o error */
++	(void) dnode_hold(os, object, FTAG, &dn);
++	ASSERT(compress < ZIO_COMPRESS_FUNCTIONS);
++	dn->dn_compress = compress;
++	dnode_setdirty(dn, tx);
++	dnode_rele(dn, FTAG);
++}
++
++int zfs_mdcomp_disable = 0;
++
++void
++dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
++{
++	dmu_object_type_t type = dn ? dn->dn_type : DMU_OT_OBJSET;
++	boolean_t ismd = (level > 0 || dmu_ot[type].ot_metadata ||
++	    (wp & WP_SPILL));
++	enum zio_checksum checksum = os->os_checksum;
++	enum zio_compress compress = os->os_compress;
++	enum zio_checksum dedup_checksum = os->os_dedup_checksum;
++	boolean_t dedup;
++	boolean_t dedup_verify = os->os_dedup_verify;
++	int copies = os->os_copies;
++
++	/*
++	 * Determine checksum setting.
++	 */
++	if (ismd) {
++		/*
++		 * Metadata always gets checksummed.  If the data
++		 * checksum is multi-bit correctable, and it's not a
++		 * ZBT-style checksum, then it's suitable for metadata
++		 * as well.  Otherwise, the metadata checksum defaults
++		 * to fletcher4.
++		 */
++		if (zio_checksum_table[checksum].ci_correctable < 1 ||
++		    zio_checksum_table[checksum].ci_eck)
++			checksum = ZIO_CHECKSUM_FLETCHER_4;
++	} else {
++		checksum = zio_checksum_select(dn->dn_checksum, checksum);
++	}
++
++	/*
++	 * Determine compression setting.
++	 */
++	if (ismd) {
++		/*
++		 * XXX -- we should design a compression algorithm
++		 * that specializes in arrays of bps.
++		 */
++		compress = zfs_mdcomp_disable ? ZIO_COMPRESS_EMPTY :
++		    ZIO_COMPRESS_LZJB;
++	} else {
++		compress = zio_compress_select(dn->dn_compress, compress);
++	}
++
++	/*
++	 * Determine dedup setting.  If we are in dmu_sync(), we won't
++	 * actually dedup now because that's all done in syncing context;
++	 * but we do want to use the dedup checkum.  If the checksum is not
++	 * strong enough to ensure unique signatures, force dedup_verify.
++	 */
++	dedup = (!ismd && dedup_checksum != ZIO_CHECKSUM_OFF);
++	if (dedup) {
++		checksum = dedup_checksum;
++		if (!zio_checksum_table[checksum].ci_dedup)
++			dedup_verify = 1;
++	}
++
++	if (wp & WP_DMU_SYNC)
++		dedup = 0;
++
++	if (wp & WP_NOFILL) {
++		ASSERT(!ismd && level == 0);
++		checksum = ZIO_CHECKSUM_OFF;
++		compress = ZIO_COMPRESS_OFF;
++		dedup = B_FALSE;
++	}
++
++	zp->zp_checksum = checksum;
++	zp->zp_compress = compress;
++	zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type;
++	zp->zp_level = level;
++	zp->zp_copies = MIN(copies + ismd, spa_max_replication(os->os_spa));
++	zp->zp_dedup = dedup;
++	zp->zp_dedup_verify = dedup && dedup_verify;
++}
++
++int
++dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
++{
++	dnode_t *dn;
++	int i, err;
++
++	err = dnode_hold(os, object, FTAG, &dn);
++	if (err)
++		return (err);
++	/*
++	 * Sync any current changes before
++	 * we go trundling through the block pointers.
++	 */
++	for (i = 0; i < TXG_SIZE; i++) {
++		if (list_link_active(&dn->dn_dirty_link[i]))
++			break;
++	}
++	if (i != TXG_SIZE) {
++		dnode_rele(dn, FTAG);
++		txg_wait_synced(dmu_objset_pool(os), 0);
++		err = dnode_hold(os, object, FTAG, &dn);
++		if (err)
++			return (err);
++	}
++
++	err = dnode_next_offset(dn, (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
++	dnode_rele(dn, FTAG);
++
++	return (err);
++}
++
++void
++dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
++{
++	dnode_phys_t *dnp;
++	int i;
++
++	rw_enter(&dn->dn_struct_rwlock, RW_READER);
++	mutex_enter(&dn->dn_mtx);
++
++	dnp = dn->dn_phys;
++
++	doi->doi_data_block_size = dn->dn_datablksz;
++	doi->doi_metadata_block_size = dn->dn_indblkshift ?
++	    1ULL << dn->dn_indblkshift : 0;
++	doi->doi_type = dn->dn_type;
++	doi->doi_bonus_type = dn->dn_bonustype;
++	doi->doi_bonus_size = dn->dn_bonuslen;
++	doi->doi_indirection = dn->dn_nlevels;
++	doi->doi_checksum = dn->dn_checksum;
++	doi->doi_compress = dn->dn_compress;
++	doi->doi_physical_blocks_512 = (DN_USED_BYTES(dnp) + 256) >> 9;
++	doi->doi_max_offset = (dnp->dn_maxblkid + 1) * dn->dn_datablksz;
++	doi->doi_fill_count = 0;
++	for (i = 0; i < dnp->dn_nblkptr; i++)
++		doi->doi_fill_count += dnp->dn_blkptr[i].blk_fill;
++
++	mutex_exit(&dn->dn_mtx);
++	rw_exit(&dn->dn_struct_rwlock);
++}
++
++/*
++ * Get information on a DMU object.
++ * If doi is NULL, just indicates whether the object exists.
++ */
++int
++dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi)
++{
++	dnode_t *dn;
++	int err = dnode_hold(os, object, FTAG, &dn);
++
++	if (err)
++		return (err);
++
++	if (doi != NULL)
++		dmu_object_info_from_dnode(dn, doi);
++
++	dnode_rele(dn, FTAG);
++	return (0);
++}
++
++/*
++ * As above, but faster; can be used when you have a held dbuf in hand.
++ */
++void
++dmu_object_info_from_db(dmu_buf_t *db_fake, dmu_object_info_t *doi)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
++
++	DB_DNODE_ENTER(db);
++	dmu_object_info_from_dnode(DB_DNODE(db), doi);
++	DB_DNODE_EXIT(db);
++}
++
++/*
++ * Faster still when you only care about the size.
++ * This is specifically optimized for zfs_getattr().
++ */
++void
++dmu_object_size_from_db(dmu_buf_t *db_fake, uint32_t *blksize,
++    u_longlong_t *nblk512)
++{
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
++	dnode_t *dn;
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++
++	*blksize = dn->dn_datablksz;
++	/* add 1 for dnode space */
++	*nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >>
++	    SPA_MINBLOCKSHIFT) + 1;
++	DB_DNODE_EXIT(db);
++}
++
++void
++byteswap_uint64_array(void *vbuf, size_t size)
++{
++	uint64_t *buf = vbuf;
++	size_t count = size >> 3;
++	int i;
++
++	ASSERT((size & 7) == 0);
++
++	for (i = 0; i < count; i++)
++		buf[i] = BSWAP_64(buf[i]);
++}
++
++void
++byteswap_uint32_array(void *vbuf, size_t size)
++{
++	uint32_t *buf = vbuf;
++	size_t count = size >> 2;
++	int i;
++
++	ASSERT((size & 3) == 0);
++
++	for (i = 0; i < count; i++)
++		buf[i] = BSWAP_32(buf[i]);
++}
++
++void
++byteswap_uint16_array(void *vbuf, size_t size)
++{
++	uint16_t *buf = vbuf;
++	size_t count = size >> 1;
++	int i;
++
++	ASSERT((size & 1) == 0);
++
++	for (i = 0; i < count; i++)
++		buf[i] = BSWAP_16(buf[i]);
++}
++
++/* ARGSUSED */
++void
++byteswap_uint8_array(void *vbuf, size_t size)
++{
++}
++
++void
++dmu_init(void)
++{
++	zfs_dbgmsg_init();
++	sa_cache_init();
++	xuio_stat_init();
++	dmu_objset_init();
++	dnode_init();
++	dbuf_init();
++	zfetch_init();
++	dmu_tx_init();
++	arc_init();
++	l2arc_init();
++}
++
++void
++dmu_fini(void)
++{
++	l2arc_fini();
++	arc_fini();
++	dmu_tx_fini();
++	zfetch_fini();
++	dbuf_fini();
++	dnode_fini();
++	dmu_objset_fini();
++	xuio_stat_fini();
++	sa_cache_fini();
++	zfs_dbgmsg_fini();
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(dmu_bonus_hold);
++EXPORT_SYMBOL(dmu_buf_hold_array_by_bonus);
++EXPORT_SYMBOL(dmu_buf_rele_array);
++EXPORT_SYMBOL(dmu_free_range);
++EXPORT_SYMBOL(dmu_read);
++EXPORT_SYMBOL(dmu_write);
++EXPORT_SYMBOL(dmu_object_info);
++EXPORT_SYMBOL(dmu_object_info_from_dnode);
++EXPORT_SYMBOL(dmu_object_info_from_db);
++EXPORT_SYMBOL(dmu_object_size_from_db);
++EXPORT_SYMBOL(dmu_object_set_blocksize);
++EXPORT_SYMBOL(dmu_object_set_checksum);
++EXPORT_SYMBOL(dmu_object_set_compress);
++EXPORT_SYMBOL(dmu_request_arcbuf);
++EXPORT_SYMBOL(dmu_return_arcbuf);
++EXPORT_SYMBOL(dmu_assign_arcbuf);
++EXPORT_SYMBOL(dmu_buf_hold);
++EXPORT_SYMBOL(dmu_ot);
++
++module_param(zfs_mdcomp_disable, int, 0644);
++MODULE_PARM_DESC(zfs_mdcomp_disable, "Disable meta data compression");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dmu_diff.c linux-3.2.33-go/fs/zfs/zfs/dmu_diff.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dmu_diff.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dmu_diff.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,221 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/dmu.h>
++#include <sys/dmu_impl.h>
++#include <sys/dmu_tx.h>
++#include <sys/dbuf.h>
++#include <sys/dnode.h>
++#include <sys/zfs_context.h>
++#include <sys/dmu_objset.h>
++#include <sys/dmu_traverse.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_pool.h>
++#include <sys/dsl_synctask.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/zap.h>
++#include <sys/zio_checksum.h>
++#include <sys/zfs_znode.h>
++
++struct diffarg {
++	struct vnode *da_vp;		/* file to which we are reporting */
++	offset_t *da_offp;
++	int da_err;			/* error that stopped diff search */
++	dmu_diff_record_t da_ddr;
++};
++
++static int
++write_record(struct diffarg *da)
++{
++	ssize_t resid; /* have to get resid to get detailed errno */
++
++	if (da->da_ddr.ddr_type == DDR_NONE) {
++		da->da_err = 0;
++		return (0);
++	}
++
++	da->da_err = vn_rdwr(UIO_WRITE, da->da_vp, (caddr_t)&da->da_ddr,
++	    sizeof (da->da_ddr), 0, UIO_SYSSPACE, FAPPEND,
++	    RLIM64_INFINITY, CRED(), &resid);
++	*da->da_offp += sizeof (da->da_ddr);
++	return (da->da_err);
++}
++
++static int
++report_free_dnode_range(struct diffarg *da, uint64_t first, uint64_t last)
++{
++	ASSERT(first <= last);
++	if (da->da_ddr.ddr_type != DDR_FREE ||
++	    first != da->da_ddr.ddr_last + 1) {
++		if (write_record(da) != 0)
++			return (da->da_err);
++		da->da_ddr.ddr_type = DDR_FREE;
++		da->da_ddr.ddr_first = first;
++		da->da_ddr.ddr_last = last;
++		return (0);
++	}
++	da->da_ddr.ddr_last = last;
++	return (0);
++}
++
++static int
++report_dnode(struct diffarg *da, uint64_t object, dnode_phys_t *dnp)
++{
++	ASSERT(dnp != NULL);
++	if (dnp->dn_type == DMU_OT_NONE)
++		return (report_free_dnode_range(da, object, object));
++
++	if (da->da_ddr.ddr_type != DDR_INUSE ||
++	    object != da->da_ddr.ddr_last + 1) {
++		if (write_record(da) != 0)
++			return (da->da_err);
++		da->da_ddr.ddr_type = DDR_INUSE;
++		da->da_ddr.ddr_first = da->da_ddr.ddr_last = object;
++		return (0);
++	}
++	da->da_ddr.ddr_last = object;
++	return (0);
++}
++
++#define	DBP_SPAN(dnp, level)				  \
++	(((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \
++	(level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
++
++/* ARGSUSED */
++static int
++diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
++    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
++{
++	struct diffarg *da = arg;
++	int err = 0;
++
++	if (issig(JUSTLOOKING) && issig(FORREAL))
++		return (EINTR);
++
++	if (zb->zb_object != DMU_META_DNODE_OBJECT)
++		return (0);
++
++	if (bp == NULL) {
++		uint64_t span = DBP_SPAN(dnp, zb->zb_level);
++		uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
++
++		err = report_free_dnode_range(da, dnobj,
++		    dnobj + (span >> DNODE_SHIFT) - 1);
++		if (err)
++			return (err);
++	} else if (zb->zb_level == 0) {
++		dnode_phys_t *blk;
++		arc_buf_t *abuf;
++		uint32_t aflags = ARC_WAIT;
++		int blksz = BP_GET_LSIZE(bp);
++		int i;
++
++		if (dsl_read(NULL, spa, bp, pbuf,
++		    arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
++		    ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
++			return (EIO);
++
++		blk = abuf->b_data;
++		for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
++			uint64_t dnobj = (zb->zb_blkid <<
++			    (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
++			err = report_dnode(da, dnobj, blk+i);
++			if (err)
++				break;
++		}
++		(void) arc_buf_remove_ref(abuf, &abuf);
++		if (err)
++			return (err);
++		/* Don't care about the data blocks */
++		return (TRAVERSE_VISIT_NO_CHILDREN);
++	}
++	return (0);
++}
++
++int
++dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp, offset_t *offp)
++{
++	struct diffarg da;
++	dsl_dataset_t *ds = tosnap->os_dsl_dataset;
++	dsl_dataset_t *fromds = fromsnap->os_dsl_dataset;
++	dsl_dataset_t *findds;
++	dsl_dataset_t *relds;
++	int err = 0;
++
++	/* make certain we are looking at snapshots */
++	if (!dsl_dataset_is_snapshot(ds) || !dsl_dataset_is_snapshot(fromds))
++		return (EINVAL);
++
++	/* fromsnap must be earlier and from the same lineage as tosnap */
++	if (fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)
++		return (EXDEV);
++
++	relds = NULL;
++	findds = ds;
++
++	while (fromds->ds_dir != findds->ds_dir) {
++		dsl_pool_t *dp = ds->ds_dir->dd_pool;
++
++		if (!dsl_dir_is_clone(findds->ds_dir)) {
++			if (relds)
++				dsl_dataset_rele(relds, FTAG);
++			return (EXDEV);
++		}
++
++		rw_enter(&dp->dp_config_rwlock, RW_READER);
++		err = dsl_dataset_hold_obj(dp,
++		    findds->ds_dir->dd_phys->dd_origin_obj, FTAG, &findds);
++		rw_exit(&dp->dp_config_rwlock);
++
++		if (relds)
++			dsl_dataset_rele(relds, FTAG);
++
++		if (err)
++			return (EXDEV);
++
++		relds = findds;
++	}
++
++	if (relds)
++		dsl_dataset_rele(relds, FTAG);
++
++	da.da_vp = vp;
++	da.da_offp = offp;
++	da.da_ddr.ddr_type = DDR_NONE;
++	da.da_ddr.ddr_first = da.da_ddr.ddr_last = 0;
++	da.da_err = 0;
++
++	err = traverse_dataset(ds, fromds->ds_phys->ds_creation_txg,
++	    TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, diff_cb, &da);
++
++	if (err) {
++		da.da_err = err;
++	} else {
++		/* we set the da.da_err we return as side-effect */
++		(void) write_record(&da);
++	}
++
++	return (da.da_err);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dmu_object.c linux-3.2.33-go/fs/zfs/zfs/dmu_object.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dmu_object.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dmu_object.c	2012-11-16 23:25:34.353039289 +0100
+@@ -0,0 +1,204 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/dmu.h>
++#include <sys/dmu_objset.h>
++#include <sys/dmu_tx.h>
++#include <sys/dnode.h>
++
++uint64_t
++dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
++{
++	uint64_t object;
++	uint64_t L2_dnode_count = DNODES_PER_BLOCK <<
++	    (DMU_META_DNODE(os)->dn_indblkshift - SPA_BLKPTRSHIFT);
++	dnode_t *dn = NULL;
++	int restarted = B_FALSE;
++
++	mutex_enter(&os->os_obj_lock);
++	for (;;) {
++		object = os->os_obj_next;
++		/*
++		 * Each time we polish off an L2 bp worth of dnodes
++		 * (2^13 objects), move to another L2 bp that's still
++		 * reasonably sparse (at most 1/4 full).  Look from the
++		 * beginning once, but after that keep looking from here.
++		 * If we can't find one, just keep going from here.
++		 */
++		if (P2PHASE(object, L2_dnode_count) == 0) {
++			uint64_t offset = restarted ? object << DNODE_SHIFT : 0;
++			int error = dnode_next_offset(DMU_META_DNODE(os),
++			    DNODE_FIND_HOLE,
++			    &offset, 2, DNODES_PER_BLOCK >> 2, 0);
++			restarted = B_TRUE;
++			if (error == 0)
++				object = offset >> DNODE_SHIFT;
++		}
++		os->os_obj_next = ++object;
++
++		/*
++		 * XXX We should check for an i/o error here and return
++		 * up to our caller.  Actually we should pre-read it in
++		 * dmu_tx_assign(), but there is currently no mechanism
++		 * to do so.
++		 */
++		(void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE,
++		    FTAG, &dn);
++		if (dn)
++			break;
++
++		if (dmu_object_next(os, &object, B_TRUE, 0) == 0)
++			os->os_obj_next = object - 1;
++	}
++
++	dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
++	dnode_rele(dn, FTAG);
++
++	mutex_exit(&os->os_obj_lock);
++
++	dmu_tx_add_new_object(tx, os, object);
++	return (object);
++}
++
++int
++dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
++    int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
++{
++	dnode_t *dn;
++	int err;
++
++	if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
++		return (EBADF);
++
++	err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, FTAG, &dn);
++	if (err)
++		return (err);
++	dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
++	dnode_rele(dn, FTAG);
++
++	dmu_tx_add_new_object(tx, os, object);
++	return (0);
++}
++
++int
++dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
++    int blocksize, dmu_object_type_t bonustype, int bonuslen)
++{
++	dnode_t *dn;
++	dmu_tx_t *tx;
++	int nblkptr;
++	int err;
++
++	if (object == DMU_META_DNODE_OBJECT)
++		return (EBADF);
++
++	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
++	    FTAG, &dn);
++	if (err)
++		return (err);
++
++	if (dn->dn_type == ot && dn->dn_datablksz == blocksize &&
++	    dn->dn_bonustype == bonustype && dn->dn_bonuslen == bonuslen) {
++		/* nothing is changing, this is a noop */
++		dnode_rele(dn, FTAG);
++		return (0);
++	}
++
++	if (bonustype == DMU_OT_SA) {
++		nblkptr = 1;
++	} else {
++		nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
++	}
++
++	/*
++	 * If we are losing blkptrs or changing the block size this must
++	 * be a new file instance.   We must clear out the previous file
++	 * contents before we can change this type of metadata in the dnode.
++	 */
++	if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize) {
++		err = dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
++		if (err)
++			goto out;
++	}
++
++	tx = dmu_tx_create(os);
++	dmu_tx_hold_bonus(tx, object);
++	err = dmu_tx_assign(tx, TXG_WAIT);
++	if (err) {
++		dmu_tx_abort(tx);
++		goto out;
++	}
++
++	dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx);
++
++	dmu_tx_commit(tx);
++out:
++	dnode_rele(dn, FTAG);
++
++	return (err);
++}
++
++int
++dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
++{
++	dnode_t *dn;
++	int err;
++
++	ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
++
++	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
++	    FTAG, &dn);
++	if (err)
++		return (err);
++
++	ASSERT(dn->dn_type != DMU_OT_NONE);
++	dnode_free_range(dn, 0, DMU_OBJECT_END, tx);
++	dnode_free(dn, tx);
++	dnode_rele(dn, FTAG);
++
++	return (0);
++}
++
++int
++dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg)
++{
++	uint64_t offset = (*objectp + 1) << DNODE_SHIFT;
++	int error;
++
++	error = dnode_next_offset(DMU_META_DNODE(os),
++	    (hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg);
++
++	*objectp = offset >> DNODE_SHIFT;
++
++	return (error);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(dmu_object_alloc);
++EXPORT_SYMBOL(dmu_object_claim);
++EXPORT_SYMBOL(dmu_object_reclaim);
++EXPORT_SYMBOL(dmu_object_free);
++EXPORT_SYMBOL(dmu_object_next);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dmu_objset.c linux-3.2.33-go/fs/zfs/zfs/dmu_objset.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dmu_objset.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dmu_objset.c	2012-11-16 23:25:34.350039322 +0100
+@@ -0,0 +1,1862 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/* Portions Copyright 2010 Robert Milkowski */
++
++#include <sys/cred.h>
++#include <sys/zfs_context.h>
++#include <sys/dmu_objset.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_prop.h>
++#include <sys/dsl_pool.h>
++#include <sys/dsl_synctask.h>
++#include <sys/dsl_deleg.h>
++#include <sys/dnode.h>
++#include <sys/dbuf.h>
++#include <sys/zvol.h>
++#include <sys/dmu_tx.h>
++#include <sys/zap.h>
++#include <sys/zil.h>
++#include <sys/dmu_impl.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/sa.h>
++#include <sys/zfs_onexit.h>
++
++/*
++ * Needed to close a window in dnode_move() that allows the objset to be freed
++ * before it can be safely accessed.
++ */
++krwlock_t os_lock;
++
++void
++dmu_objset_init(void)
++{
++	rw_init(&os_lock, NULL, RW_DEFAULT, NULL);
++}
++
++void
++dmu_objset_fini(void)
++{
++	rw_destroy(&os_lock);
++}
++
++spa_t *
++dmu_objset_spa(objset_t *os)
++{
++	return (os->os_spa);
++}
++
++zilog_t *
++dmu_objset_zil(objset_t *os)
++{
++	return (os->os_zil);
++}
++
++dsl_pool_t *
++dmu_objset_pool(objset_t *os)
++{
++	dsl_dataset_t *ds;
++
++	if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir)
++		return (ds->ds_dir->dd_pool);
++	else
++		return (spa_get_dsl(os->os_spa));
++}
++
++dsl_dataset_t *
++dmu_objset_ds(objset_t *os)
++{
++	return (os->os_dsl_dataset);
++}
++
++dmu_objset_type_t
++dmu_objset_type(objset_t *os)
++{
++	return (os->os_phys->os_type);
++}
++
++void
++dmu_objset_name(objset_t *os, char *buf)
++{
++	dsl_dataset_name(os->os_dsl_dataset, buf);
++}
++
++uint64_t
++dmu_objset_id(objset_t *os)
++{
++	dsl_dataset_t *ds = os->os_dsl_dataset;
++
++	return (ds ? ds->ds_object : 0);
++}
++
++uint64_t
++dmu_objset_syncprop(objset_t *os)
++{
++	return (os->os_sync);
++}
++
++uint64_t
++dmu_objset_logbias(objset_t *os)
++{
++	return (os->os_logbias);
++}
++
++static void
++checksum_changed_cb(void *arg, uint64_t newval)
++{
++	objset_t *os = arg;
++
++	/*
++	 * Inheritance should have been done by now.
++	 */
++	ASSERT(newval != ZIO_CHECKSUM_INHERIT);
++
++	os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
++}
++
++static void
++compression_changed_cb(void *arg, uint64_t newval)
++{
++	objset_t *os = arg;
++
++	/*
++	 * Inheritance and range checking should have been done by now.
++	 */
++	ASSERT(newval != ZIO_COMPRESS_INHERIT);
++
++	os->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE);
++}
++
++static void
++copies_changed_cb(void *arg, uint64_t newval)
++{
++	objset_t *os = arg;
++
++	/*
++	 * Inheritance and range checking should have been done by now.
++	 */
++	ASSERT(newval > 0);
++	ASSERT(newval <= spa_max_replication(os->os_spa));
++
++	os->os_copies = newval;
++}
++
++static void
++dedup_changed_cb(void *arg, uint64_t newval)
++{
++	objset_t *os = arg;
++	spa_t *spa = os->os_spa;
++	enum zio_checksum checksum;
++
++	/*
++	 * Inheritance should have been done by now.
++	 */
++	ASSERT(newval != ZIO_CHECKSUM_INHERIT);
++
++	checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF);
++
++	os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK;
++	os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY);
++}
++
++static void
++primary_cache_changed_cb(void *arg, uint64_t newval)
++{
++	objset_t *os = arg;
++
++	/*
++	 * Inheritance and range checking should have been done by now.
++	 */
++	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
++	    newval == ZFS_CACHE_METADATA);
++
++	os->os_primary_cache = newval;
++}
++
++static void
++secondary_cache_changed_cb(void *arg, uint64_t newval)
++{
++	objset_t *os = arg;
++
++	/*
++	 * Inheritance and range checking should have been done by now.
++	 */
++	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
++	    newval == ZFS_CACHE_METADATA);
++
++	os->os_secondary_cache = newval;
++}
++
++static void
++sync_changed_cb(void *arg, uint64_t newval)
++{
++	objset_t *os = arg;
++
++	/*
++	 * Inheritance and range checking should have been done by now.
++	 */
++	ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS ||
++	    newval == ZFS_SYNC_DISABLED);
++
++	os->os_sync = newval;
++	if (os->os_zil)
++		zil_set_sync(os->os_zil, newval);
++}
++
++static void
++logbias_changed_cb(void *arg, uint64_t newval)
++{
++	objset_t *os = arg;
++
++	ASSERT(newval == ZFS_LOGBIAS_LATENCY ||
++	    newval == ZFS_LOGBIAS_THROUGHPUT);
++	os->os_logbias = newval;
++	if (os->os_zil)
++		zil_set_logbias(os->os_zil, newval);
++}
++
++void
++dmu_objset_byteswap(void *buf, size_t size)
++{
++	objset_phys_t *osp = buf;
++
++	ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t));
++	dnode_byteswap(&osp->os_meta_dnode);
++	byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
++	osp->os_type = BSWAP_64(osp->os_type);
++	osp->os_flags = BSWAP_64(osp->os_flags);
++	if (size == sizeof (objset_phys_t)) {
++		dnode_byteswap(&osp->os_userused_dnode);
++		dnode_byteswap(&osp->os_groupused_dnode);
++	}
++}
++
++int
++dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
++    objset_t **osp)
++{
++	objset_t *os;
++	int i, err;
++
++	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
++
++	os = kmem_zalloc(sizeof (objset_t), KM_PUSHPAGE);
++	os->os_dsl_dataset = ds;
++	os->os_spa = spa;
++	os->os_rootbp = bp;
++	if (!BP_IS_HOLE(os->os_rootbp)) {
++		uint32_t aflags = ARC_WAIT;
++		zbookmark_t zb;
++		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
++		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
++
++		if (DMU_OS_IS_L2CACHEABLE(os))
++			aflags |= ARC_L2CACHE;
++
++		dprintf_bp(os->os_rootbp, "reading %s", "");
++		/*
++		 * XXX when bprewrite scrub can change the bp,
++		 * and this is called from dmu_objset_open_ds_os, the bp
++		 * could change, and we'll need a lock.
++		 */
++		err = dsl_read_nolock(NULL, spa, os->os_rootbp,
++		    arc_getbuf_func, &os->os_phys_buf,
++		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
++		if (err) {
++			kmem_free(os, sizeof (objset_t));
++			/* convert checksum errors into IO errors */
++			if (err == ECKSUM)
++				err = EIO;
++			return (err);
++		}
++
++		/* Increase the blocksize if we are permitted. */
++		if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
++		    arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
++			arc_buf_t *buf = arc_buf_alloc(spa,
++			    sizeof (objset_phys_t), &os->os_phys_buf,
++			    ARC_BUFC_METADATA);
++			bzero(buf->b_data, sizeof (objset_phys_t));
++			bcopy(os->os_phys_buf->b_data, buf->b_data,
++			    arc_buf_size(os->os_phys_buf));
++			(void) arc_buf_remove_ref(os->os_phys_buf,
++			    &os->os_phys_buf);
++			os->os_phys_buf = buf;
++		}
++
++		os->os_phys = os->os_phys_buf->b_data;
++		os->os_flags = os->os_phys->os_flags;
++	} else {
++		int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
++		    sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
++		os->os_phys_buf = arc_buf_alloc(spa, size,
++		    &os->os_phys_buf, ARC_BUFC_METADATA);
++		os->os_phys = os->os_phys_buf->b_data;
++		bzero(os->os_phys, size);
++	}
++
++	/*
++	 * Note: the changed_cb will be called once before the register
++	 * func returns, thus changing the checksum/compression from the
++	 * default (fletcher2/off).  Snapshots don't need to know about
++	 * checksum/compression/copies.
++	 */
++	if (ds) {
++		err = dsl_prop_register(ds, "primarycache",
++		    primary_cache_changed_cb, os);
++		if (err == 0)
++			err = dsl_prop_register(ds, "secondarycache",
++			    secondary_cache_changed_cb, os);
++		if (!dsl_dataset_is_snapshot(ds)) {
++			if (err == 0)
++				err = dsl_prop_register(ds, "checksum",
++				    checksum_changed_cb, os);
++			if (err == 0)
++				err = dsl_prop_register(ds, "compression",
++				    compression_changed_cb, os);
++			if (err == 0)
++				err = dsl_prop_register(ds, "copies",
++				    copies_changed_cb, os);
++			if (err == 0)
++				err = dsl_prop_register(ds, "dedup",
++				    dedup_changed_cb, os);
++			if (err == 0)
++				err = dsl_prop_register(ds, "logbias",
++				    logbias_changed_cb, os);
++			if (err == 0)
++				err = dsl_prop_register(ds, "sync",
++				    sync_changed_cb, os);
++		}
++		if (err) {
++			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
++			    &os->os_phys_buf) == 1);
++			kmem_free(os, sizeof (objset_t));
++			return (err);
++		}
++	} else if (ds == NULL) {
++		/* It's the meta-objset. */
++		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
++		os->os_compress = ZIO_COMPRESS_LZJB;
++		os->os_copies = spa_max_replication(spa);
++		os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
++		os->os_dedup_verify = 0;
++		os->os_logbias = 0;
++		os->os_sync = 0;
++		os->os_primary_cache = ZFS_CACHE_ALL;
++		os->os_secondary_cache = ZFS_CACHE_ALL;
++	}
++
++	if (ds == NULL || !dsl_dataset_is_snapshot(ds))
++		os->os_zil_header = os->os_phys->os_zil_header;
++	os->os_zil = zil_alloc(os, &os->os_zil_header);
++
++	for (i = 0; i < TXG_SIZE; i++) {
++		list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
++		    offsetof(dnode_t, dn_dirty_link[i]));
++		list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
++		    offsetof(dnode_t, dn_dirty_link[i]));
++	}
++	list_create(&os->os_dnodes, sizeof (dnode_t),
++	    offsetof(dnode_t, dn_link));
++	list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
++	    offsetof(dmu_buf_impl_t, db_link));
++
++	mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
++
++	DMU_META_DNODE(os) = dnode_special_open(os,
++	    &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT,
++	    &os->os_meta_dnode);
++	if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
++		DMU_USERUSED_DNODE(os) = dnode_special_open(os,
++		    &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT,
++		    &os->os_userused_dnode);
++		DMU_GROUPUSED_DNODE(os) = dnode_special_open(os,
++		    &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT,
++		    &os->os_groupused_dnode);
++	}
++
++	/*
++	 * We should be the only thread trying to do this because we
++	 * have ds_opening_lock
++	 */
++	if (ds) {
++		mutex_enter(&ds->ds_lock);
++		ASSERT(ds->ds_objset == NULL);
++		ds->ds_objset = os;
++		mutex_exit(&ds->ds_lock);
++	}
++
++	*osp = os;
++	return (0);
++}
++
++int
++dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
++{
++	int err = 0;
++
++	mutex_enter(&ds->ds_opening_lock);
++	*osp = ds->ds_objset;
++	if (*osp == NULL) {
++		err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
++		    ds, dsl_dataset_get_blkptr(ds), osp);
++	}
++	mutex_exit(&ds->ds_opening_lock);
++	return (err);
++}
++
++/* called from zpl */
++int
++dmu_objset_hold(const char *name, void *tag, objset_t **osp)
++{
++	dsl_dataset_t *ds;
++	int err;
++
++	err = dsl_dataset_hold(name, tag, &ds);
++	if (err)
++		return (err);
++
++	err = dmu_objset_from_ds(ds, osp);
++	if (err)
++		dsl_dataset_rele(ds, tag);
++
++	return (err);
++}
++
++/* called from zpl */
++int
++dmu_objset_own(const char *name, dmu_objset_type_t type,
++    boolean_t readonly, void *tag, objset_t **osp)
++{
++	dsl_dataset_t *ds;
++	int err;
++
++	err = dsl_dataset_own(name, B_FALSE, tag, &ds);
++	if (err)
++		return (err);
++
++	err = dmu_objset_from_ds(ds, osp);
++	if (err) {
++		dsl_dataset_disown(ds, tag);
++	} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
++		dmu_objset_disown(*osp, tag);
++		return (EINVAL);
++	} else if (!readonly && dsl_dataset_is_snapshot(ds)) {
++		dmu_objset_disown(*osp, tag);
++		return (EROFS);
++	}
++	return (err);
++}
++
++void
++dmu_objset_rele(objset_t *os, void *tag)
++{
++	dsl_dataset_rele(os->os_dsl_dataset, tag);
++}
++
++void
++dmu_objset_disown(objset_t *os, void *tag)
++{
++	dsl_dataset_disown(os->os_dsl_dataset, tag);
++}
++
++int
++dmu_objset_evict_dbufs(objset_t *os)
++{
++	dnode_t *dn;
++
++	mutex_enter(&os->os_lock);
++
++	/* process the mdn last, since the other dnodes have holds on it */
++	list_remove(&os->os_dnodes, DMU_META_DNODE(os));
++	list_insert_tail(&os->os_dnodes, DMU_META_DNODE(os));
++
++	/*
++	 * Find the first dnode with holds.  We have to do this dance
++	 * because dnode_add_ref() only works if you already have a
++	 * hold.  If there are no holds then it has no dbufs so OK to
++	 * skip.
++	 */
++	for (dn = list_head(&os->os_dnodes);
++	    dn && !dnode_add_ref(dn, FTAG);
++	    dn = list_next(&os->os_dnodes, dn))
++		continue;
++
++	while (dn) {
++		dnode_t *next_dn = dn;
++
++		do {
++			next_dn = list_next(&os->os_dnodes, next_dn);
++		} while (next_dn && !dnode_add_ref(next_dn, FTAG));
++
++		mutex_exit(&os->os_lock);
++		dnode_evict_dbufs(dn);
++		dnode_rele(dn, FTAG);
++		mutex_enter(&os->os_lock);
++		dn = next_dn;
++	}
++	dn = list_head(&os->os_dnodes);
++	mutex_exit(&os->os_lock);
++	return (dn != DMU_META_DNODE(os));
++}
++
++void
++dmu_objset_evict(objset_t *os)
++{
++	dsl_dataset_t *ds = os->os_dsl_dataset;
++	int t;
++
++	for (t = 0; t < TXG_SIZE; t++)
++		ASSERT(!dmu_objset_is_dirty(os, t));
++
++	if (ds) {
++		if (!dsl_dataset_is_snapshot(ds)) {
++			VERIFY(0 == dsl_prop_unregister(ds, "checksum",
++			    checksum_changed_cb, os));
++			VERIFY(0 == dsl_prop_unregister(ds, "compression",
++			    compression_changed_cb, os));
++			VERIFY(0 == dsl_prop_unregister(ds, "copies",
++			    copies_changed_cb, os));
++			VERIFY(0 == dsl_prop_unregister(ds, "dedup",
++			    dedup_changed_cb, os));
++			VERIFY(0 == dsl_prop_unregister(ds, "logbias",
++			    logbias_changed_cb, os));
++			VERIFY(0 == dsl_prop_unregister(ds, "sync",
++			    sync_changed_cb, os));
++		}
++		VERIFY(0 == dsl_prop_unregister(ds, "primarycache",
++		    primary_cache_changed_cb, os));
++		VERIFY(0 == dsl_prop_unregister(ds, "secondarycache",
++		    secondary_cache_changed_cb, os));
++	}
++
++	if (os->os_sa)
++		sa_tear_down(os);
++
++	/*
++	 * We should need only a single pass over the dnode list, since
++	 * nothing can be added to the list at this point.
++	 */
++	(void) dmu_objset_evict_dbufs(os);
++
++	dnode_special_close(&os->os_meta_dnode);
++	if (DMU_USERUSED_DNODE(os)) {
++		dnode_special_close(&os->os_userused_dnode);
++		dnode_special_close(&os->os_groupused_dnode);
++	}
++	zil_free(os->os_zil);
++
++	ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
++
++	VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1);
++
++	/*
++	 * This is a barrier to prevent the objset from going away in
++	 * dnode_move() until we can safely ensure that the objset is still in
++	 * use. We consider the objset valid before the barrier and invalid
++	 * after the barrier.
++	 */
++	rw_enter(&os_lock, RW_READER);
++	rw_exit(&os_lock);
++
++	mutex_destroy(&os->os_lock);
++	mutex_destroy(&os->os_obj_lock);
++	mutex_destroy(&os->os_user_ptr_lock);
++	kmem_free(os, sizeof (objset_t));
++}
++
++timestruc_t
++dmu_objset_snap_cmtime(objset_t *os)
++{
++	return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
++}
++
++/* called from dsl for meta-objset */
++objset_t *
++dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
++    dmu_objset_type_t type, dmu_tx_t *tx)
++{
++	objset_t *os;
++	dnode_t *mdn;
++
++	ASSERT(dmu_tx_is_syncing(tx));
++	if (ds != NULL)
++		VERIFY(0 == dmu_objset_from_ds(ds, &os));
++	else
++		VERIFY(0 == dmu_objset_open_impl(spa, NULL, bp, &os));
++
++	mdn = DMU_META_DNODE(os);
++
++	dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT,
++	    DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx);
++
++	/*
++	 * We don't want to have to increase the meta-dnode's nlevels
++	 * later, because then we could do it in quescing context while
++	 * we are also accessing it in open context.
++	 *
++	 * This precaution is not necessary for the MOS (ds == NULL),
++	 * because the MOS is only updated in syncing context.
++	 * This is most fortunate: the MOS is the only objset that
++	 * needs to be synced multiple times as spa_sync() iterates
++	 * to convergence, so minimizing its dn_nlevels matters.
++	 */
++	if (ds != NULL) {
++		int levels = 1;
++
++		/*
++		 * Determine the number of levels necessary for the meta-dnode
++		 * to contain DN_MAX_OBJECT dnodes.
++		 */
++		while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift +
++		    (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
++		    DN_MAX_OBJECT * sizeof (dnode_phys_t))
++			levels++;
++
++		mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
++		    mdn->dn_nlevels = levels;
++	}
++
++	ASSERT(type != DMU_OST_NONE);
++	ASSERT(type != DMU_OST_ANY);
++	ASSERT(type < DMU_OST_NUMTYPES);
++	os->os_phys->os_type = type;
++	if (dmu_objset_userused_enabled(os)) {
++		os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
++		os->os_flags = os->os_phys->os_flags;
++	}
++
++	dsl_dataset_dirty(ds, tx);
++
++	return (os);
++}
++
++struct oscarg {
++	void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
++	void *userarg;
++	dsl_dataset_t *clone_origin;
++	const char *lastname;
++	dmu_objset_type_t type;
++	uint64_t flags;
++	cred_t *cr;
++};
++
++/*ARGSUSED*/
++static int
++dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dir_t *dd = arg1;
++	struct oscarg *oa = arg2;
++	objset_t *mos = dd->dd_pool->dp_meta_objset;
++	int err;
++	uint64_t ddobj;
++
++	err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
++	    oa->lastname, sizeof (uint64_t), 1, &ddobj);
++	if (err != ENOENT)
++		return (err ? err : EEXIST);
++
++	if (oa->clone_origin != NULL) {
++		/* You can't clone across pools. */
++		if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool)
++			return (EXDEV);
++
++		/* You can only clone snapshots, not the head datasets. */
++		if (!dsl_dataset_is_snapshot(oa->clone_origin))
++			return (EINVAL);
++	}
++
++	return (0);
++}
++
++static void
++dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dir_t *dd = arg1;
++	spa_t *spa = dd->dd_pool->dp_spa;
++	struct oscarg *oa = arg2;
++	uint64_t obj;
++
++	ASSERT(dmu_tx_is_syncing(tx));
++
++	obj = dsl_dataset_create_sync(dd, oa->lastname,
++	    oa->clone_origin, oa->flags, oa->cr, tx);
++
++	if (oa->clone_origin == NULL) {
++		dsl_pool_t *dp = dd->dd_pool;
++		dsl_dataset_t *ds;
++		blkptr_t *bp;
++		objset_t *os;
++
++		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
++		bp = dsl_dataset_get_blkptr(ds);
++		ASSERT(BP_IS_HOLE(bp));
++
++		os = dmu_objset_create_impl(spa, ds, bp, oa->type, tx);
++
++		if (oa->userfunc)
++			oa->userfunc(os, oa->userarg, oa->cr, tx);
++		dsl_dataset_rele(ds, FTAG);
++	}
++
++	spa_history_log_internal(LOG_DS_CREATE, spa, tx, "dataset = %llu", obj);
++}
++
++int
++dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
++    void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg)
++{
++	dsl_dir_t *pdd;
++	const char *tail;
++	int err = 0;
++	struct oscarg oa = { 0 };
++
++	ASSERT(strchr(name, '@') == NULL);
++	err = dsl_dir_open(name, FTAG, &pdd, &tail);
++	if (err)
++		return (err);
++	if (tail == NULL) {
++		dsl_dir_close(pdd, FTAG);
++		return (EEXIST);
++	}
++
++	oa.userfunc = func;
++	oa.userarg = arg;
++	oa.lastname = tail;
++	oa.type = type;
++	oa.flags = flags;
++	oa.cr = CRED();
++
++	err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
++	    dmu_objset_create_sync, pdd, &oa, 5);
++	dsl_dir_close(pdd, FTAG);
++	return (err);
++}
++
++int
++dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags)
++{
++	dsl_dir_t *pdd;
++	const char *tail;
++	int err = 0;
++	struct oscarg oa = { 0 };
++
++	ASSERT(strchr(name, '@') == NULL);
++	err = dsl_dir_open(name, FTAG, &pdd, &tail);
++	if (err)
++		return (err);
++	if (tail == NULL) {
++		dsl_dir_close(pdd, FTAG);
++		return (EEXIST);
++	}
++
++	oa.lastname = tail;
++	oa.clone_origin = clone_origin;
++	oa.flags = flags;
++	oa.cr = CRED();
++
++	err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
++	    dmu_objset_create_sync, pdd, &oa, 5);
++	dsl_dir_close(pdd, FTAG);
++	return (err);
++}
++
++int
++dmu_objset_destroy(const char *name, boolean_t defer)
++{
++	dsl_dataset_t *ds;
++	int error;
++
++	error = dsl_dataset_own(name, B_TRUE, FTAG, &ds);
++	if (error == 0) {
++		error = dsl_dataset_destroy(ds, FTAG, defer);
++		/* dsl_dataset_destroy() closes the ds. */
++	}
++
++	return (error);
++}
++
++struct snaparg {
++	dsl_sync_task_group_t *dstg;
++	char *snapname;
++	char *htag;
++	char failed[MAXPATHLEN];
++	boolean_t recursive;
++	boolean_t needsuspend;
++	boolean_t temporary;
++	nvlist_t *props;
++	struct dsl_ds_holdarg *ha;	/* only needed in the temporary case */
++	dsl_dataset_t *newds;
++};
++
++static int
++snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	objset_t *os = arg1;
++	struct snaparg *sn = arg2;
++	int error;
++
++	/* The props have already been checked by zfs_check_userprops(). */
++
++	error = dsl_dataset_snapshot_check(os->os_dsl_dataset,
++	    sn->snapname, tx);
++	if (error)
++		return (error);
++
++	if (sn->temporary) {
++		/*
++		 * Ideally we would just call
++		 * dsl_dataset_user_hold_check() and
++		 * dsl_dataset_destroy_check() here.  However the
++		 * dataset we want to hold and destroy is the snapshot
++		 * that we just confirmed we can create, but it won't
++		 * exist until after these checks are run.  Do any
++		 * checks we can here and if more checks are added to
++		 * those routines in the future, similar checks may be
++		 * necessary here.
++		 */
++		if (spa_version(os->os_spa) < SPA_VERSION_USERREFS)
++			return (ENOTSUP);
++		/*
++		 * Not checking number of tags because the tag will be
++		 * unique, as it will be the only tag.
++		 */
++		if (strlen(sn->htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
++			return (E2BIG);
++
++		sn->ha = kmem_alloc(sizeof(struct dsl_ds_holdarg), KM_PUSHPAGE);
++		sn->ha->temphold = B_TRUE;
++		sn->ha->htag = sn->htag;
++	}
++	return (error);
++}
++
++static void
++snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	objset_t *os = arg1;
++	dsl_dataset_t *ds = os->os_dsl_dataset;
++	struct snaparg *sn = arg2;
++
++	dsl_dataset_snapshot_sync(ds, sn->snapname, tx);
++
++	if (sn->props) {
++		dsl_props_arg_t pa;
++		pa.pa_props = sn->props;
++		pa.pa_source = ZPROP_SRC_LOCAL;
++		dsl_props_set_sync(ds->ds_prev, &pa, tx);
++	}
++
++	if (sn->temporary) {
++		struct dsl_ds_destroyarg da;
++
++		dsl_dataset_user_hold_sync(ds->ds_prev, sn->ha, tx);
++		kmem_free(sn->ha, sizeof (struct dsl_ds_holdarg));
++		sn->ha = NULL;
++		sn->newds = ds->ds_prev;
++
++		da.ds = ds->ds_prev;
++		da.defer = B_TRUE;
++		dsl_dataset_destroy_sync(&da, FTAG, tx);
++	}
++}
++
++static int
++dmu_objset_snapshot_one(const char *name, void *arg)
++{
++	struct snaparg *sn = arg;
++	objset_t *os;
++	int err;
++	char *cp;
++
++	/*
++	 * If the objset starts with a '%', then ignore it unless it was
++	 * explicitly named (ie, not recursive).  These hidden datasets
++	 * are always inconsistent, and by not opening them here, we can
++	 * avoid a race with dsl_dir_destroy_check().
++	 */
++	cp = strrchr(name, '/');
++	if (cp && cp[1] == '%' && sn->recursive)
++		return (0);
++
++	(void) strcpy(sn->failed, name);
++
++	/*
++	 * Check permissions if we are doing a recursive snapshot.  The
++	 * permission checks for the starting dataset have already been
++	 * performed in zfs_secpolicy_snapshot()
++	 */
++	if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED())))
++		return (err);
++
++	err = dmu_objset_hold(name, sn, &os);
++	if (err != 0)
++		return (err);
++
++	/*
++	 * If the objset is in an inconsistent state (eg, in the process
++	 * of being destroyed), don't snapshot it.  As with %hidden
++	 * datasets, we return EBUSY if this name was explicitly
++	 * requested (ie, not recursive), and otherwise ignore it.
++	 */
++	if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) {
++		dmu_objset_rele(os, sn);
++		return (sn->recursive ? 0 : EBUSY);
++	}
++
++	if (sn->needsuspend) {
++		err = zil_suspend(dmu_objset_zil(os));
++		if (err) {
++			dmu_objset_rele(os, sn);
++			return (err);
++		}
++	}
++	dsl_sync_task_create(sn->dstg, snapshot_check, snapshot_sync,
++	    os, sn, 3);
++
++	return (0);
++}
++
++int
++dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
++    nvlist_t *props, boolean_t recursive, boolean_t temporary, int cleanup_fd)
++{
++	dsl_sync_task_t *dst;
++	struct snaparg *sn;
++	spa_t *spa;
++	minor_t minor;
++	int err;
++
++	sn = kmem_alloc(sizeof (struct snaparg), KM_SLEEP);
++	(void) strcpy(sn->failed, fsname);
++
++	err = spa_open(fsname, &spa, FTAG);
++	if (err) {
++		kmem_free(sn, sizeof (struct snaparg));
++		return (err);
++	}
++
++	if (temporary) {
++		if (cleanup_fd < 0) {
++			spa_close(spa, FTAG);
++			return (EINVAL);
++		}
++		if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) {
++			spa_close(spa, FTAG);
++			return (err);
++		}
++	}
++
++	sn->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
++	sn->snapname = snapname;
++	sn->htag = tag;
++	sn->props = props;
++	sn->recursive = recursive;
++	sn->needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
++	sn->temporary = temporary;
++	sn->ha = NULL;
++	sn->newds = NULL;
++
++	if (recursive) {
++		err = dmu_objset_find(fsname,
++		    dmu_objset_snapshot_one, sn, DS_FIND_CHILDREN);
++	} else {
++		err = dmu_objset_snapshot_one(fsname, sn);
++	}
++
++	if (err == 0)
++		err = dsl_sync_task_group_wait(sn->dstg);
++
++	for (dst = list_head(&sn->dstg->dstg_tasks); dst;
++	    dst = list_next(&sn->dstg->dstg_tasks, dst)) {
++		objset_t *os = dst->dst_arg1;
++		dsl_dataset_t *ds = os->os_dsl_dataset;
++		if (dst->dst_err) {
++			dsl_dataset_name(ds, sn->failed);
++		} else if (temporary) {
++			dsl_register_onexit_hold_cleanup(sn->newds, tag, minor);
++		}
++		if (sn->needsuspend)
++			zil_resume(dmu_objset_zil(os));
++		dmu_objset_rele(os, sn);
++	}
++
++	if (err)
++		(void) strcpy(fsname, sn->failed);
++	if (temporary)
++		zfs_onexit_fd_rele(cleanup_fd);
++	dsl_sync_task_group_destroy(sn->dstg);
++	spa_close(spa, FTAG);
++	kmem_free(sn, sizeof (struct snaparg));
++	return (err);
++}
++
++static void
++dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
++{
++	dnode_t *dn;
++
++	while ((dn = list_head(list))) {
++		ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
++		ASSERT(dn->dn_dbuf->db_data_pending);
++		/*
++		 * Initialize dn_zio outside dnode_sync() because the
++		 * meta-dnode needs to set it ouside dnode_sync().
++		 */
++		dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio;
++		ASSERT(dn->dn_zio);
++
++		ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
++		list_remove(list, dn);
++
++		if (newlist) {
++			(void) dnode_add_ref(dn, newlist);
++			list_insert_tail(newlist, dn);
++		}
++
++		dnode_sync(dn, tx);
++	}
++}
++
++/* ARGSUSED */
++static void
++dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
++{
++	int i;
++
++	blkptr_t *bp = zio->io_bp;
++	objset_t *os = arg;
++	dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
++
++	ASSERT(bp == os->os_rootbp);
++	ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET);
++	ASSERT(BP_GET_LEVEL(bp) == 0);
++
++	/*
++	 * Update rootbp fill count: it should be the number of objects
++	 * allocated in the object set (not counting the "special"
++	 * objects that are stored in the objset_phys_t -- the meta
++	 * dnode and user/group accounting objects).
++	 */
++	bp->blk_fill = 0;
++	for (i = 0; i < dnp->dn_nblkptr; i++)
++		bp->blk_fill += dnp->dn_blkptr[i].blk_fill;
++}
++
++/* ARGSUSED */
++static void
++dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg)
++{
++	blkptr_t *bp = zio->io_bp;
++	blkptr_t *bp_orig = &zio->io_bp_orig;
++	objset_t *os = arg;
++
++	if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
++		ASSERT(BP_EQUAL(bp, bp_orig));
++	} else {
++		dsl_dataset_t *ds = os->os_dsl_dataset;
++		dmu_tx_t *tx = os->os_synctx;
++
++		(void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
++		dsl_dataset_block_born(ds, bp, tx);
++	}
++}
++
++/* called from dsl */
++void
++dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
++{
++	int txgoff;
++	zbookmark_t zb;
++	zio_prop_t zp;
++	zio_t *zio;
++	list_t *list;
++	list_t *newlist = NULL;
++	dbuf_dirty_record_t *dr;
++
++	dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
++
++	ASSERT(dmu_tx_is_syncing(tx));
++	/* XXX the write_done callback should really give us the tx... */
++	os->os_synctx = tx;
++
++	if (os->os_dsl_dataset == NULL) {
++		/*
++		 * This is the MOS.  If we have upgraded,
++		 * spa_max_replication() could change, so reset
++		 * os_copies here.
++		 */
++		os->os_copies = spa_max_replication(os->os_spa);
++	}
++
++	/*
++	 * Create the root block IO
++	 */
++	SET_BOOKMARK(&zb, os->os_dsl_dataset ?
++	    os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
++	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
++	VERIFY3U(0, ==, arc_release_bp(os->os_phys_buf, &os->os_phys_buf,
++	    os->os_rootbp, os->os_spa, &zb));
++
++	dmu_write_policy(os, NULL, 0, 0, &zp);
++
++	zio = arc_write(pio, os->os_spa, tx->tx_txg,
++	    os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp,
++	    dmu_objset_write_ready, dmu_objset_write_done, os,
++	    ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
++
++	/*
++	 * Sync special dnodes - the parent IO for the sync is the root block
++	 */
++	DMU_META_DNODE(os)->dn_zio = zio;
++	dnode_sync(DMU_META_DNODE(os), tx);
++
++	os->os_phys->os_flags = os->os_flags;
++
++	if (DMU_USERUSED_DNODE(os) &&
++	    DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) {
++		DMU_USERUSED_DNODE(os)->dn_zio = zio;
++		dnode_sync(DMU_USERUSED_DNODE(os), tx);
++		DMU_GROUPUSED_DNODE(os)->dn_zio = zio;
++		dnode_sync(DMU_GROUPUSED_DNODE(os), tx);
++	}
++
++	txgoff = tx->tx_txg & TXG_MASK;
++
++	if (dmu_objset_userused_enabled(os)) {
++		newlist = &os->os_synced_dnodes;
++		/*
++		 * We must create the list here because it uses the
++		 * dn_dirty_link[] of this txg.
++		 */
++		list_create(newlist, sizeof (dnode_t),
++		    offsetof(dnode_t, dn_dirty_link[txgoff]));
++	}
++
++	dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
++	dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);
++
++	list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
++	while ((dr = list_head(list)) != NULL) {
++		ASSERT(dr->dr_dbuf->db_level == 0);
++		list_remove(list, dr);
++		if (dr->dr_zio)
++			zio_nowait(dr->dr_zio);
++	}
++	/*
++	 * Free intent log blocks up to this tx.
++	 */
++	zil_sync(os->os_zil, tx);
++	os->os_phys->os_zil_header = os->os_zil_header;
++	zio_nowait(zio);
++}
++
++boolean_t
++dmu_objset_is_dirty(objset_t *os, uint64_t txg)
++{
++	return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) ||
++	    !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK]));
++}
++
++static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
++
++void
++dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
++{
++	used_cbs[ost] = cb;
++}
++
++boolean_t
++dmu_objset_userused_enabled(objset_t *os)
++{
++	return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
++	    used_cbs[os->os_phys->os_type] != NULL &&
++	    DMU_USERUSED_DNODE(os) != NULL);
++}
++
++static void
++do_userquota_update(objset_t *os, uint64_t used, uint64_t flags,
++    uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx)
++{
++	if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) {
++		int64_t delta = DNODE_SIZE + used;
++		if (subtract)
++			delta = -delta;
++		VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT,
++		    user, delta, tx));
++		VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT,
++		    group, delta, tx));
++	}
++}
++
++void
++dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
++{
++	dnode_t *dn;
++	list_t *list = &os->os_synced_dnodes;
++
++	ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
++
++	while ((dn = list_head(list)) != NULL) {
++		int flags;
++		ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
++		ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
++		    dn->dn_phys->dn_flags &
++		    DNODE_FLAG_USERUSED_ACCOUNTED);
++
++		/* Allocate the user/groupused objects if necessary. */
++		if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
++			VERIFY(0 == zap_create_claim(os,
++			    DMU_USERUSED_OBJECT,
++			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
++			VERIFY(0 == zap_create_claim(os,
++			    DMU_GROUPUSED_OBJECT,
++			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
++		}
++
++		/*
++		 * We intentionally modify the zap object even if the
++		 * net delta is zero.  Otherwise
++		 * the block of the zap obj could be shared between
++		 * datasets but need to be different between them after
++		 * a bprewrite.
++		 */
++
++		flags = dn->dn_id_flags;
++		ASSERT(flags);
++		if (flags & DN_ID_OLD_EXIST)  {
++			do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags,
++			    dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx);
++		}
++		if (flags & DN_ID_NEW_EXIST) {
++			do_userquota_update(os, DN_USED_BYTES(dn->dn_phys),
++			    dn->dn_phys->dn_flags,  dn->dn_newuid,
++			    dn->dn_newgid, B_FALSE, tx);
++		}
++
++		mutex_enter(&dn->dn_mtx);
++		dn->dn_oldused = 0;
++		dn->dn_oldflags = 0;
++		if (dn->dn_id_flags & DN_ID_NEW_EXIST) {
++			dn->dn_olduid = dn->dn_newuid;
++			dn->dn_oldgid = dn->dn_newgid;
++			dn->dn_id_flags |= DN_ID_OLD_EXIST;
++			if (dn->dn_bonuslen == 0)
++				dn->dn_id_flags |= DN_ID_CHKED_SPILL;
++			else
++				dn->dn_id_flags |= DN_ID_CHKED_BONUS;
++		}
++		dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
++		mutex_exit(&dn->dn_mtx);
++
++		list_remove(list, dn);
++		dnode_rele(dn, list);
++	}
++}
++
++/*
++ * Returns a pointer to data to find uid/gid from
++ *
++ * If a dirty record for transaction group that is syncing can't
++ * be found then NULL is returned.  In the NULL case it is assumed
++ * the uid/gid aren't changing.
++ */
++static void *
++dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx)
++{
++	dbuf_dirty_record_t *dr, **drp;
++	void *data;
++
++	if (db->db_dirtycnt == 0)
++		return (db->db.db_data);  /* Nothing is changing */
++
++	for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
++		if (dr->dr_txg == tx->tx_txg)
++			break;
++
++	if (dr == NULL) {
++		data = NULL;
++	} else {
++		dnode_t *dn;
++
++		DB_DNODE_ENTER(dr->dr_dbuf);
++		dn = DB_DNODE(dr->dr_dbuf);
++
++		if (dn->dn_bonuslen == 0 &&
++		    dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID)
++			data = dr->dt.dl.dr_data->b_data;
++		else
++			data = dr->dt.dl.dr_data;
++
++		DB_DNODE_EXIT(dr->dr_dbuf);
++	}
++
++	return (data);
++}
++
++void
++dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
++{
++	objset_t *os = dn->dn_objset;
++	void *data = NULL;
++	dmu_buf_impl_t *db = NULL;
++	uint64_t *user = NULL, *group = NULL;
++	int flags = dn->dn_id_flags;
++	int error;
++	boolean_t have_spill = B_FALSE;
++
++	if (!dmu_objset_userused_enabled(dn->dn_objset))
++		return;
++
++	if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST|
++	    DN_ID_CHKED_SPILL)))
++		return;
++
++	if (before && dn->dn_bonuslen != 0)
++		data = DN_BONUS(dn->dn_phys);
++	else if (!before && dn->dn_bonuslen != 0) {
++		if (dn->dn_bonus) {
++			db = dn->dn_bonus;
++			mutex_enter(&db->db_mtx);
++			data = dmu_objset_userquota_find_data(db, tx);
++		} else {
++			data = DN_BONUS(dn->dn_phys);
++		}
++	} else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) {
++			int rf = 0;
++
++			if (RW_WRITE_HELD(&dn->dn_struct_rwlock))
++				rf |= DB_RF_HAVESTRUCT;
++			error = dmu_spill_hold_by_dnode(dn,
++			    rf | DB_RF_MUST_SUCCEED,
++			    FTAG, (dmu_buf_t **)&db);
++			ASSERT(error == 0);
++			mutex_enter(&db->db_mtx);
++			data = (before) ? db->db.db_data :
++			    dmu_objset_userquota_find_data(db, tx);
++			have_spill = B_TRUE;
++	} else {
++		mutex_enter(&dn->dn_mtx);
++		dn->dn_id_flags |= DN_ID_CHKED_BONUS;
++		mutex_exit(&dn->dn_mtx);
++		return;
++	}
++
++	if (before) {
++		ASSERT(data);
++		user = &dn->dn_olduid;
++		group = &dn->dn_oldgid;
++	} else if (data) {
++		user = &dn->dn_newuid;
++		group = &dn->dn_newgid;
++	}
++
++	/*
++	 * Must always call the callback in case the object
++	 * type has changed and that type isn't an object type to track
++	 */
++	error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data,
++	    user, group);
++
++	/*
++	 * Preserve existing uid/gid when the callback can't determine
++	 * what the new uid/gid are and the callback returned EEXIST.
++	 * The EEXIST error tells us to just use the existing uid/gid.
++	 * If we don't know what the old values are then just assign
++	 * them to 0, since that is a new file  being created.
++	 */
++	if (!before && data == NULL && error == EEXIST) {
++		if (flags & DN_ID_OLD_EXIST) {
++			dn->dn_newuid = dn->dn_olduid;
++			dn->dn_newgid = dn->dn_oldgid;
++		} else {
++			dn->dn_newuid = 0;
++			dn->dn_newgid = 0;
++		}
++		error = 0;
++	}
++
++	if (db)
++		mutex_exit(&db->db_mtx);
++
++	mutex_enter(&dn->dn_mtx);
++	if (error == 0 && before)
++		dn->dn_id_flags |= DN_ID_OLD_EXIST;
++	if (error == 0 && !before)
++		dn->dn_id_flags |= DN_ID_NEW_EXIST;
++
++	if (have_spill) {
++		dn->dn_id_flags |= DN_ID_CHKED_SPILL;
++	} else {
++		dn->dn_id_flags |= DN_ID_CHKED_BONUS;
++	}
++	mutex_exit(&dn->dn_mtx);
++	if (have_spill)
++		dmu_buf_rele((dmu_buf_t *)db, FTAG);
++}
++
++boolean_t
++dmu_objset_userspace_present(objset_t *os)
++{
++	return (os->os_phys->os_flags &
++	    OBJSET_FLAG_USERACCOUNTING_COMPLETE);
++}
++
++int
++dmu_objset_userspace_upgrade(objset_t *os)
++{
++	uint64_t obj;
++	int err = 0;
++
++	if (dmu_objset_userspace_present(os))
++		return (0);
++	if (!dmu_objset_userused_enabled(os))
++		return (ENOTSUP);
++	if (dmu_objset_is_snapshot(os))
++		return (EINVAL);
++
++	/*
++	 * We simply need to mark every object dirty, so that it will be
++	 * synced out and now accounted.  If this is called
++	 * concurrently, or if we already did some work before crashing,
++	 * that's fine, since we track each object's accounted state
++	 * independently.
++	 */
++
++	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
++		dmu_tx_t *tx;
++		dmu_buf_t *db;
++		int objerr;
++
++		if (issig(JUSTLOOKING) && issig(FORREAL))
++			return (EINTR);
++
++		objerr = dmu_bonus_hold(os, obj, FTAG, &db);
++		if (objerr)
++			continue;
++		tx = dmu_tx_create(os);
++		dmu_tx_hold_bonus(tx, obj);
++		objerr = dmu_tx_assign(tx, TXG_WAIT);
++		if (objerr) {
++			dmu_tx_abort(tx);
++			continue;
++		}
++		dmu_buf_will_dirty(db, tx);
++		dmu_buf_rele(db, FTAG);
++		dmu_tx_commit(tx);
++	}
++
++	os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
++	txg_wait_synced(dmu_objset_pool(os), 0);
++	return (0);
++}
++
++void
++dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
++    uint64_t *usedobjsp, uint64_t *availobjsp)
++{
++	dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp,
++	    usedobjsp, availobjsp);
++}
++
++uint64_t
++dmu_objset_fsid_guid(objset_t *os)
++{
++	return (dsl_dataset_fsid_guid(os->os_dsl_dataset));
++}
++
++void
++dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat)
++{
++	stat->dds_type = os->os_phys->os_type;
++	if (os->os_dsl_dataset)
++		dsl_dataset_fast_stat(os->os_dsl_dataset, stat);
++}
++
++void
++dmu_objset_stats(objset_t *os, nvlist_t *nv)
++{
++	ASSERT(os->os_dsl_dataset ||
++	    os->os_phys->os_type == DMU_OST_META);
++
++	if (os->os_dsl_dataset != NULL)
++		dsl_dataset_stats(os->os_dsl_dataset, nv);
++
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
++	    os->os_phys->os_type);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
++	    dmu_objset_userspace_present(os));
++}
++
++int
++dmu_objset_is_snapshot(objset_t *os)
++{
++	if (os->os_dsl_dataset != NULL)
++		return (dsl_dataset_is_snapshot(os->os_dsl_dataset));
++	else
++		return (B_FALSE);
++}
++
++int
++dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
++    boolean_t *conflict)
++{
++	dsl_dataset_t *ds = os->os_dsl_dataset;
++	uint64_t ignored;
++
++	if (ds->ds_phys->ds_snapnames_zapobj == 0)
++		return (ENOENT);
++
++	return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset,
++	    ds->ds_phys->ds_snapnames_zapobj, name, 8, 1, &ignored, MT_FIRST,
++	    real, maxlen, conflict));
++}
++
++int
++dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
++    uint64_t *idp, uint64_t *offp, boolean_t *case_conflict)
++{
++	dsl_dataset_t *ds = os->os_dsl_dataset;
++	zap_cursor_t cursor;
++	zap_attribute_t attr;
++
++	if (ds->ds_phys->ds_snapnames_zapobj == 0)
++		return (ENOENT);
++
++	zap_cursor_init_serialized(&cursor,
++	    ds->ds_dir->dd_pool->dp_meta_objset,
++	    ds->ds_phys->ds_snapnames_zapobj, *offp);
++
++	if (zap_cursor_retrieve(&cursor, &attr) != 0) {
++		zap_cursor_fini(&cursor);
++		return (ENOENT);
++	}
++
++	if (strlen(attr.za_name) + 1 > namelen) {
++		zap_cursor_fini(&cursor);
++		return (ENAMETOOLONG);
++	}
++
++	(void) strcpy(name, attr.za_name);
++	if (idp)
++		*idp = attr.za_first_integer;
++	if (case_conflict)
++		*case_conflict = attr.za_normalization_conflict;
++	zap_cursor_advance(&cursor);
++	*offp = zap_cursor_serialize(&cursor);
++	zap_cursor_fini(&cursor);
++
++	return (0);
++}
++
++/*
++ * Determine the objset id for a given snapshot name.
++ */
++int
++dmu_snapshot_id(objset_t *os, const char *snapname, uint64_t *idp)
++{
++	dsl_dataset_t *ds = os->os_dsl_dataset;
++	zap_cursor_t cursor;
++	zap_attribute_t attr;
++	int error;
++
++	if (ds->ds_phys->ds_snapnames_zapobj == 0)
++		return (ENOENT);
++
++	zap_cursor_init(&cursor, ds->ds_dir->dd_pool->dp_meta_objset,
++	    ds->ds_phys->ds_snapnames_zapobj);
++
++	error = zap_cursor_move_to_key(&cursor, snapname, MT_EXACT);
++	if (error) {
++		zap_cursor_fini(&cursor);
++		return (error);
++	}
++
++	error = zap_cursor_retrieve(&cursor, &attr);
++	if (error) {
++		zap_cursor_fini(&cursor);
++		return (error);
++	}
++
++	*idp = attr.za_first_integer;
++	zap_cursor_fini(&cursor);
++
++	return (0);
++}
++
++int
++dmu_dir_list_next(objset_t *os, int namelen, char *name,
++    uint64_t *idp, uint64_t *offp)
++{
++	dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
++	zap_cursor_t cursor;
++	zap_attribute_t attr;
++
++	/* there is no next dir on a snapshot! */
++	if (os->os_dsl_dataset->ds_object !=
++	    dd->dd_phys->dd_head_dataset_obj)
++		return (ENOENT);
++
++	zap_cursor_init_serialized(&cursor,
++	    dd->dd_pool->dp_meta_objset,
++	    dd->dd_phys->dd_child_dir_zapobj, *offp);
++
++	if (zap_cursor_retrieve(&cursor, &attr) != 0) {
++		zap_cursor_fini(&cursor);
++		return (ENOENT);
++	}
++
++	if (strlen(attr.za_name) + 1 > namelen) {
++		zap_cursor_fini(&cursor);
++		return (ENAMETOOLONG);
++	}
++
++	(void) strcpy(name, attr.za_name);
++	if (idp)
++		*idp = attr.za_first_integer;
++	zap_cursor_advance(&cursor);
++	*offp = zap_cursor_serialize(&cursor);
++	zap_cursor_fini(&cursor);
++
++	return (0);
++}
++
++struct findarg {
++	int (*func)(const char *, void *);
++	void *arg;
++};
++
++/* ARGSUSED */
++static int
++findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
++{
++	struct findarg *fa = arg;
++	return (fa->func(dsname, fa->arg));
++}
++
++/*
++ * Find all objsets under name, and for each, call 'func(child_name, arg)'.
++ * Perhaps change all callers to use dmu_objset_find_spa()?
++ */
++int
++dmu_objset_find(char *name, int func(const char *, void *), void *arg,
++    int flags)
++{
++	struct findarg fa;
++	fa.func = func;
++	fa.arg = arg;
++	return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags));
++}
++
++/*
++ * Find all objsets under name, call func on each
++ */
++int
++dmu_objset_find_spa(spa_t *spa, const char *name,
++    int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags)
++{
++	dsl_dir_t *dd;
++	dsl_pool_t *dp;
++	dsl_dataset_t *ds;
++	zap_cursor_t zc;
++	zap_attribute_t *attr;
++	char *child;
++	uint64_t thisobj;
++	int err;
++
++	if (name == NULL)
++		name = spa_name(spa);
++	err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL);
++	if (err)
++		return (err);
++
++	/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
++	if (dd->dd_myname[0] == '$') {
++		dsl_dir_close(dd, FTAG);
++		return (0);
++	}
++
++	thisobj = dd->dd_phys->dd_head_dataset_obj;
++	attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
++	dp = dd->dd_pool;
++
++	/*
++	 * Iterate over all children.
++	 */
++	if (flags & DS_FIND_CHILDREN) {
++		for (zap_cursor_init(&zc, dp->dp_meta_objset,
++		    dd->dd_phys->dd_child_dir_zapobj);
++		    zap_cursor_retrieve(&zc, attr) == 0;
++		    (void) zap_cursor_advance(&zc)) {
++			ASSERT(attr->za_integer_length == sizeof (uint64_t));
++			ASSERT(attr->za_num_integers == 1);
++
++			child = kmem_asprintf("%s/%s", name, attr->za_name);
++			err = dmu_objset_find_spa(spa, child, func, arg, flags);
++			strfree(child);
++			if (err)
++				break;
++		}
++		zap_cursor_fini(&zc);
++
++		if (err) {
++			dsl_dir_close(dd, FTAG);
++			kmem_free(attr, sizeof (zap_attribute_t));
++			return (err);
++		}
++	}
++
++	/*
++	 * Iterate over all snapshots.
++	 */
++	if (flags & DS_FIND_SNAPSHOTS) {
++		if (!dsl_pool_sync_context(dp))
++			rw_enter(&dp->dp_config_rwlock, RW_READER);
++		err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
++		if (!dsl_pool_sync_context(dp))
++			rw_exit(&dp->dp_config_rwlock);
++
++		if (err == 0) {
++			uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
++			dsl_dataset_rele(ds, FTAG);
++
++			for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
++			    zap_cursor_retrieve(&zc, attr) == 0;
++			    (void) zap_cursor_advance(&zc)) {
++				ASSERT(attr->za_integer_length ==
++				    sizeof (uint64_t));
++				ASSERT(attr->za_num_integers == 1);
++
++				child = kmem_asprintf("%s@%s",
++				    name, attr->za_name);
++				err = func(spa, attr->za_first_integer,
++				    child, arg);
++				strfree(child);
++				if (err)
++					break;
++			}
++			zap_cursor_fini(&zc);
++		}
++	}
++
++	dsl_dir_close(dd, FTAG);
++	kmem_free(attr, sizeof (zap_attribute_t));
++
++	if (err)
++		return (err);
++
++	/*
++	 * Apply to self if appropriate.
++	 */
++	err = func(spa, thisobj, name, arg);
++	return (err);
++}
++
++/* ARGSUSED */
++int
++dmu_objset_prefetch(const char *name, void *arg)
++{
++	dsl_dataset_t *ds;
++
++	if (dsl_dataset_hold(name, FTAG, &ds))
++		return (0);
++
++	if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) {
++		mutex_enter(&ds->ds_opening_lock);
++		if (ds->ds_objset == NULL) {
++			uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
++			zbookmark_t zb;
++
++			SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT,
++			    ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
++
++			(void) dsl_read_nolock(NULL, dsl_dataset_get_spa(ds),
++			    &ds->ds_phys->ds_bp, NULL, NULL,
++			    ZIO_PRIORITY_ASYNC_READ,
++			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
++			    &aflags, &zb);
++		}
++		mutex_exit(&ds->ds_opening_lock);
++	}
++
++	dsl_dataset_rele(ds, FTAG);
++	return (0);
++}
++
++void
++dmu_objset_set_user(objset_t *os, void *user_ptr)
++{
++	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
++	os->os_user_ptr = user_ptr;
++}
++
++void *
++dmu_objset_get_user(objset_t *os)
++{
++	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
++	return (os->os_user_ptr);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(dmu_objset_zil);
++EXPORT_SYMBOL(dmu_objset_pool);
++EXPORT_SYMBOL(dmu_objset_ds);
++EXPORT_SYMBOL(dmu_objset_type);
++EXPORT_SYMBOL(dmu_objset_name);
++EXPORT_SYMBOL(dmu_objset_hold);
++EXPORT_SYMBOL(dmu_objset_own);
++EXPORT_SYMBOL(dmu_objset_rele);
++EXPORT_SYMBOL(dmu_objset_disown);
++EXPORT_SYMBOL(dmu_objset_from_ds);
++EXPORT_SYMBOL(dmu_objset_create);
++EXPORT_SYMBOL(dmu_objset_clone);
++EXPORT_SYMBOL(dmu_objset_destroy);
++EXPORT_SYMBOL(dmu_objset_snapshot);
++EXPORT_SYMBOL(dmu_objset_stats);
++EXPORT_SYMBOL(dmu_objset_fast_stat);
++EXPORT_SYMBOL(dmu_objset_spa);
++EXPORT_SYMBOL(dmu_objset_space);
++EXPORT_SYMBOL(dmu_objset_fsid_guid);
++EXPORT_SYMBOL(dmu_objset_find);
++EXPORT_SYMBOL(dmu_objset_find_spa);
++EXPORT_SYMBOL(dmu_objset_prefetch);
++EXPORT_SYMBOL(dmu_objset_byteswap);
++EXPORT_SYMBOL(dmu_objset_evict_dbufs);
++EXPORT_SYMBOL(dmu_objset_snap_cmtime);
++
++EXPORT_SYMBOL(dmu_objset_sync);
++EXPORT_SYMBOL(dmu_objset_is_dirty);
++EXPORT_SYMBOL(dmu_objset_create_impl);
++EXPORT_SYMBOL(dmu_objset_open_impl);
++EXPORT_SYMBOL(dmu_objset_evict);
++EXPORT_SYMBOL(dmu_objset_register_type);
++EXPORT_SYMBOL(dmu_objset_do_userquota_updates);
++EXPORT_SYMBOL(dmu_objset_userquota_get_ids);
++EXPORT_SYMBOL(dmu_objset_userused_enabled);
++EXPORT_SYMBOL(dmu_objset_userspace_upgrade);
++EXPORT_SYMBOL(dmu_objset_userspace_present);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dmu_send.c linux-3.2.33-go/fs/zfs/zfs/dmu_send.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dmu_send.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dmu_send.c	2012-11-16 23:25:34.350039322 +0100
+@@ -0,0 +1,1687 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
++ */
++
++#include <sys/dmu.h>
++#include <sys/dmu_impl.h>
++#include <sys/dmu_tx.h>
++#include <sys/dbuf.h>
++#include <sys/dnode.h>
++#include <sys/zfs_context.h>
++#include <sys/dmu_objset.h>
++#include <sys/dmu_traverse.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_prop.h>
++#include <sys/dsl_pool.h>
++#include <sys/dsl_synctask.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/zap.h>
++#include <sys/zio_checksum.h>
++#include <sys/zfs_znode.h>
++#include <zfs_fletcher.h>
++#include <sys/avl.h>
++#include <sys/ddt.h>
++#include <sys/zfs_onexit.h>
++
++/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
++int zfs_send_corrupt_data = B_FALSE;
++
++static char *dmu_recv_tag = "dmu_recv_tag";
++
++static int
++dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
++{
++	dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
++	ssize_t resid; /* have to get resid to get detailed errno */
++	ASSERT3U(len % 8, ==, 0);
++
++	fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
++	dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
++	    (caddr_t)buf, len,
++	    0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
++
++	mutex_enter(&ds->ds_sendstream_lock);
++	*dsp->dsa_off += len;
++	mutex_exit(&ds->ds_sendstream_lock);
++
++	return (dsp->dsa_err);
++}
++
++static int
++dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
++    uint64_t length)
++{
++	struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
++
++	if (length != -1ULL && offset + length < offset)
++		length = -1ULL;
++
++	/*
++	 * If there is a pending op, but it's not PENDING_FREE, push it out,
++	 * since free block aggregation can only be done for blocks of the
++	 * same type (i.e., DRR_FREE records can only be aggregated with
++	 * other DRR_FREE records.  DRR_FREEOBJECTS records can only be
++	 * aggregated with other DRR_FREEOBJECTS records.
++	 */
++	if (dsp->dsa_pending_op != PENDING_NONE &&
++	    dsp->dsa_pending_op != PENDING_FREE) {
++		if (dump_bytes(dsp, dsp->dsa_drr,
++		    sizeof (dmu_replay_record_t)) != 0)
++			return (EINTR);
++		dsp->dsa_pending_op = PENDING_NONE;
++	}
++
++	if (dsp->dsa_pending_op == PENDING_FREE) {
++		/*
++		 * There should never be a PENDING_FREE if length is -1
++		 * (because dump_dnode is the only place where this
++		 * function is called with a -1, and only after flushing
++		 * any pending record).
++		 */
++		ASSERT(length != -1ULL);
++		/*
++		 * Check to see whether this free block can be aggregated
++		 * with pending one.
++		 */
++		if (drrf->drr_object == object && drrf->drr_offset +
++		    drrf->drr_length == offset) {
++			drrf->drr_length += length;
++			return (0);
++		} else {
++			/* not a continuation.  Push out pending record */
++			if (dump_bytes(dsp, dsp->dsa_drr,
++			    sizeof (dmu_replay_record_t)) != 0)
++				return (EINTR);
++			dsp->dsa_pending_op = PENDING_NONE;
++		}
++	}
++	/* create a FREE record and make it pending */
++	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
++	dsp->dsa_drr->drr_type = DRR_FREE;
++	drrf->drr_object = object;
++	drrf->drr_offset = offset;
++	drrf->drr_length = length;
++	drrf->drr_toguid = dsp->dsa_toguid;
++	if (length == -1ULL) {
++		if (dump_bytes(dsp, dsp->dsa_drr,
++		    sizeof (dmu_replay_record_t)) != 0)
++			return (EINTR);
++	} else {
++		dsp->dsa_pending_op = PENDING_FREE;
++	}
++
++	return (0);
++}
++
++static int
++dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
++    uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data)
++{
++	struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
++
++
++	/*
++	 * If there is any kind of pending aggregation (currently either
++	 * a grouping of free objects or free blocks), push it out to
++	 * the stream, since aggregation can't be done across operations
++	 * of different types.
++	 */
++	if (dsp->dsa_pending_op != PENDING_NONE) {
++		if (dump_bytes(dsp, dsp->dsa_drr,
++		    sizeof (dmu_replay_record_t)) != 0)
++			return (EINTR);
++		dsp->dsa_pending_op = PENDING_NONE;
++	}
++	/* write a DATA record */
++	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
++	dsp->dsa_drr->drr_type = DRR_WRITE;
++	drrw->drr_object = object;
++	drrw->drr_type = type;
++	drrw->drr_offset = offset;
++	drrw->drr_length = blksz;
++	drrw->drr_toguid = dsp->dsa_toguid;
++	drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
++	if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
++		drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
++	DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
++	DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
++	DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
++	drrw->drr_key.ddk_cksum = bp->blk_cksum;
++
++	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
++		return (EINTR);
++	if (dump_bytes(dsp, data, blksz) != 0)
++		return (EINTR);
++	return (0);
++}
++
++static int
++dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
++{
++	struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
++
++	if (dsp->dsa_pending_op != PENDING_NONE) {
++		if (dump_bytes(dsp, dsp->dsa_drr,
++		    sizeof (dmu_replay_record_t)) != 0)
++			return (EINTR);
++		dsp->dsa_pending_op = PENDING_NONE;
++	}
++
++	/* write a SPILL record */
++	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
++	dsp->dsa_drr->drr_type = DRR_SPILL;
++	drrs->drr_object = object;
++	drrs->drr_length = blksz;
++	drrs->drr_toguid = dsp->dsa_toguid;
++
++	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
++		return (EINTR);
++	if (dump_bytes(dsp, data, blksz))
++		return (EINTR);
++	return (0);
++}
++
++static int
++dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
++{
++	struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
++
++	/*
++	 * If there is a pending op, but it's not PENDING_FREEOBJECTS,
++	 * push it out, since free block aggregation can only be done for
++	 * blocks of the same type (i.e., DRR_FREE records can only be
++	 * aggregated with other DRR_FREE records.  DRR_FREEOBJECTS records
++	 * can only be aggregated with other DRR_FREEOBJECTS records.
++	 */
++	if (dsp->dsa_pending_op != PENDING_NONE &&
++	    dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
++		if (dump_bytes(dsp, dsp->dsa_drr,
++		    sizeof (dmu_replay_record_t)) != 0)
++			return (EINTR);
++		dsp->dsa_pending_op = PENDING_NONE;
++	}
++	if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) {
++		/*
++		 * See whether this free object array can be aggregated
++		 * with pending one
++		 */
++		if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) {
++			drrfo->drr_numobjs += numobjs;
++			return (0);
++		} else {
++			/* can't be aggregated.  Push out pending record */
++			if (dump_bytes(dsp, dsp->dsa_drr,
++			    sizeof (dmu_replay_record_t)) != 0)
++				return (EINTR);
++			dsp->dsa_pending_op = PENDING_NONE;
++		}
++	}
++
++	/* write a FREEOBJECTS record */
++	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
++	dsp->dsa_drr->drr_type = DRR_FREEOBJECTS;
++	drrfo->drr_firstobj = firstobj;
++	drrfo->drr_numobjs = numobjs;
++	drrfo->drr_toguid = dsp->dsa_toguid;
++
++	dsp->dsa_pending_op = PENDING_FREEOBJECTS;
++
++	return (0);
++}
++
++static int
++dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
++{
++	struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
++
++	if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
++		return (dump_freeobjects(dsp, object, 1));
++
++	if (dsp->dsa_pending_op != PENDING_NONE) {
++		if (dump_bytes(dsp, dsp->dsa_drr,
++		    sizeof (dmu_replay_record_t)) != 0)
++			return (EINTR);
++		dsp->dsa_pending_op = PENDING_NONE;
++	}
++
++	/* write an OBJECT record */
++	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
++	dsp->dsa_drr->drr_type = DRR_OBJECT;
++	drro->drr_object = object;
++	drro->drr_type = dnp->dn_type;
++	drro->drr_bonustype = dnp->dn_bonustype;
++	drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
++	drro->drr_bonuslen = dnp->dn_bonuslen;
++	drro->drr_checksumtype = dnp->dn_checksum;
++	drro->drr_compress = dnp->dn_compress;
++	drro->drr_toguid = dsp->dsa_toguid;
++
++	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
++		return (EINTR);
++
++	if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
++		return (EINTR);
++
++	/* free anything past the end of the file */
++	if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
++	    (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL))
++		return (EINTR);
++	if (dsp->dsa_err)
++		return (EINTR);
++	return (0);
++}
++
++#define	BP_SPAN(dnp, level) \
++	(((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \
++	(level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
++
++/* ARGSUSED */
++static int
++backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
++    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
++{
++	dmu_sendarg_t *dsp = arg;
++	dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
++	int err = 0;
++
++	if (issig(JUSTLOOKING) && issig(FORREAL))
++		return (EINTR);
++
++	if (zb->zb_object != DMU_META_DNODE_OBJECT &&
++	    DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
++		return (0);
++	} else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) {
++		uint64_t span = BP_SPAN(dnp, zb->zb_level);
++		uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
++		err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
++	} else if (bp == NULL) {
++		uint64_t span = BP_SPAN(dnp, zb->zb_level);
++		err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span);
++	} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
++		return (0);
++	} else if (type == DMU_OT_DNODE) {
++		dnode_phys_t *blk;
++		int i;
++		int blksz = BP_GET_LSIZE(bp);
++		uint32_t aflags = ARC_WAIT;
++		arc_buf_t *abuf;
++
++		if (dsl_read(NULL, spa, bp, pbuf,
++		    arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
++		    ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
++			return (EIO);
++
++		blk = abuf->b_data;
++		for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
++			uint64_t dnobj = (zb->zb_blkid <<
++			    (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
++			err = dump_dnode(dsp, dnobj, blk+i);
++			if (err)
++				break;
++		}
++		(void) arc_buf_remove_ref(abuf, &abuf);
++	} else if (type == DMU_OT_SA) {
++		uint32_t aflags = ARC_WAIT;
++		arc_buf_t *abuf;
++		int blksz = BP_GET_LSIZE(bp);
++
++		if (arc_read_nolock(NULL, spa, bp,
++		    arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
++		    ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
++			return (EIO);
++
++		err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
++		(void) arc_buf_remove_ref(abuf, &abuf);
++	} else { /* it's a level-0 block of a regular object */
++		uint32_t aflags = ARC_WAIT;
++		arc_buf_t *abuf;
++		int blksz = BP_GET_LSIZE(bp);
++
++		if (dsl_read(NULL, spa, bp, pbuf,
++		    arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
++		    ZIO_FLAG_CANFAIL, &aflags, zb) != 0) {
++			if (zfs_send_corrupt_data) {
++				uint64_t *ptr;
++				/* Send a block filled with 0x"zfs badd bloc" */
++				abuf = arc_buf_alloc(spa, blksz, &abuf,
++				    ARC_BUFC_DATA);
++				for (ptr = abuf->b_data;
++				    (char *)ptr < (char *)abuf->b_data + blksz;
++				    ptr++)
++					*ptr = 0x2f5baddb10c;
++			} else {
++				return (EIO);
++			}
++		}
++
++		err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
++		    blksz, bp, abuf->b_data);
++		(void) arc_buf_remove_ref(abuf, &abuf);
++	}
++
++	ASSERT(err == 0 || err == EINTR);
++	return (err);
++}
++
++int
++dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
++    int outfd, vnode_t *vp, offset_t *off)
++{
++	dsl_dataset_t *ds = tosnap->os_dsl_dataset;
++	dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
++	dmu_replay_record_t *drr;
++	dmu_sendarg_t *dsp;
++	int err;
++	uint64_t fromtxg = 0;
++
++	/* tosnap must be a snapshot */
++	if (ds->ds_phys->ds_next_snap_obj == 0)
++		return (EINVAL);
++
++	/* fromsnap must be an earlier snapshot from the same fs as tosnap */
++	if (fromds && (ds->ds_dir != fromds->ds_dir ||
++	    fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg))
++		return (EXDEV);
++
++	if (fromorigin) {
++		dsl_pool_t *dp = ds->ds_dir->dd_pool;
++
++		if (fromsnap)
++			return (EINVAL);
++
++		if (dsl_dir_is_clone(ds->ds_dir)) {
++			rw_enter(&dp->dp_config_rwlock, RW_READER);
++			err = dsl_dataset_hold_obj(dp,
++			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds);
++			rw_exit(&dp->dp_config_rwlock);
++			if (err)
++				return (err);
++		} else {
++			fromorigin = B_FALSE;
++		}
++	}
++
++
++	drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
++	drr->drr_type = DRR_BEGIN;
++	drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
++	DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo,
++	    DMU_SUBSTREAM);
++
++#ifdef _KERNEL
++	if (dmu_objset_type(tosnap) == DMU_OST_ZFS) {
++		uint64_t version;
++		if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) {
++			kmem_free(drr, sizeof (dmu_replay_record_t));
++			return (EINVAL);
++		}
++		if (version == ZPL_VERSION_SA) {
++			DMU_SET_FEATUREFLAGS(
++			    drr->drr_u.drr_begin.drr_versioninfo,
++			    DMU_BACKUP_FEATURE_SA_SPILL);
++		}
++	}
++#endif
++
++	drr->drr_u.drr_begin.drr_creation_time =
++	    ds->ds_phys->ds_creation_time;
++	drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type;
++	if (fromorigin)
++		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
++	drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid;
++	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
++		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
++
++	if (fromds)
++		drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid;
++	dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
++
++	if (fromds)
++		fromtxg = fromds->ds_phys->ds_creation_txg;
++	if (fromorigin)
++		dsl_dataset_rele(fromds, FTAG);
++
++	dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
++
++	dsp->dsa_drr = drr;
++	dsp->dsa_vp = vp;
++	dsp->dsa_outfd = outfd;
++	dsp->dsa_proc = curproc;
++	dsp->dsa_os = tosnap;
++	dsp->dsa_off = off;
++	dsp->dsa_toguid = ds->ds_phys->ds_guid;
++	ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
++	dsp->dsa_pending_op = PENDING_NONE;
++
++	mutex_enter(&ds->ds_sendstream_lock);
++	list_insert_head(&ds->ds_sendstreams, dsp);
++	mutex_exit(&ds->ds_sendstream_lock);
++
++	if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
++		err = dsp->dsa_err;
++		goto out;
++	}
++
++	err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
++	    backup_cb, dsp);
++
++	if (dsp->dsa_pending_op != PENDING_NONE)
++		if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
++			err = EINTR;
++
++	if (err) {
++		if (err == EINTR && dsp->dsa_err)
++			err = dsp->dsa_err;
++		goto out;
++	}
++
++	bzero(drr, sizeof (dmu_replay_record_t));
++	drr->drr_type = DRR_END;
++	drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
++	drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
++
++	if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
++		err = dsp->dsa_err;
++		goto out;
++	}
++
++out:
++	mutex_enter(&ds->ds_sendstream_lock);
++	list_remove(&ds->ds_sendstreams, dsp);
++	mutex_exit(&ds->ds_sendstream_lock);
++
++	kmem_free(drr, sizeof (dmu_replay_record_t));
++	kmem_free(dsp, sizeof (dmu_sendarg_t));
++
++	return (err);
++}
++
++int
++dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
++    uint64_t *sizep)
++{
++	dsl_dataset_t *ds = tosnap->os_dsl_dataset;
++	dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
++	dsl_pool_t *dp = ds->ds_dir->dd_pool;
++	int err;
++	uint64_t size, recordsize;
++
++	/* tosnap must be a snapshot */
++	if (ds->ds_phys->ds_next_snap_obj == 0)
++		return (EINVAL);
++
++	/* fromsnap must be an earlier snapshot from the same fs as tosnap */
++	if (fromds && (ds->ds_dir != fromds->ds_dir ||
++	    fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg))
++		return (EXDEV);
++
++	if (fromorigin) {
++		if (fromsnap)
++			return (EINVAL);
++
++		if (dsl_dir_is_clone(ds->ds_dir)) {
++			rw_enter(&dp->dp_config_rwlock, RW_READER);
++			err = dsl_dataset_hold_obj(dp,
++			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds);
++			rw_exit(&dp->dp_config_rwlock);
++			if (err)
++				return (err);
++		} else {
++			fromorigin = B_FALSE;
++		}
++	}
++
++	/* Get uncompressed size estimate of changed data. */
++	if (fromds == NULL) {
++		size = ds->ds_phys->ds_uncompressed_bytes;
++	} else {
++		uint64_t used, comp;
++		err = dsl_dataset_space_written(fromds, ds,
++		    &used, &comp, &size);
++		if (fromorigin)
++			dsl_dataset_rele(fromds, FTAG);
++		if (err)
++			return (err);
++	}
++
++	/*
++	 * Assume that space (both on-disk and in-stream) is dominated by
++	 * data.  We will adjust for indirect blocks and the copies property,
++	 * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
++	 */
++
++	/*
++	 * Subtract out approximate space used by indirect blocks.
++	 * Assume most space is used by data blocks (non-indirect, non-dnode).
++	 * Assume all blocks are recordsize.  Assume ditto blocks and
++	 * internal fragmentation counter out compression.
++	 *
++	 * Therefore, space used by indirect blocks is sizeof(blkptr_t) per
++	 * block, which we observe in practice.
++	 */
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++	err = dsl_prop_get_ds(ds, "recordsize",
++	    sizeof (recordsize), 1, &recordsize, NULL);
++	rw_exit(&dp->dp_config_rwlock);
++	if (err)
++		return (err);
++	size -= size / recordsize * sizeof (blkptr_t);
++
++	/* Add in the space for the record associated with each block. */
++	size += size / recordsize * sizeof (dmu_replay_record_t);
++
++	*sizep = size;
++
++	return (0);
++}
++
++struct recvbeginsyncarg {
++	const char *tofs;
++	const char *tosnap;
++	dsl_dataset_t *origin;
++	uint64_t fromguid;
++	dmu_objset_type_t type;
++	void *tag;
++	boolean_t force;
++	uint64_t dsflags;
++	char clonelastname[MAXNAMELEN];
++	dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */
++	cred_t *cr;
++};
++
++/* ARGSUSED */
++static int
++recv_new_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dir_t *dd = arg1;
++	struct recvbeginsyncarg *rbsa = arg2;
++	objset_t *mos = dd->dd_pool->dp_meta_objset;
++	uint64_t val;
++	int err;
++
++	err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
++	    strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val);
++
++	if (err != ENOENT)
++		return (err ? err : EEXIST);
++
++	if (rbsa->origin) {
++		/* make sure it's a snap in the same pool */
++		if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool)
++			return (EXDEV);
++		if (!dsl_dataset_is_snapshot(rbsa->origin))
++			return (EINVAL);
++		if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid)
++			return (ENODEV);
++	}
++
++	return (0);
++}
++
++static void
++recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dir_t *dd = arg1;
++	struct recvbeginsyncarg *rbsa = arg2;
++	uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
++	uint64_t dsobj;
++
++	/* Create and open new dataset. */
++	dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1,
++	    rbsa->origin, flags, rbsa->cr, tx);
++	VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj,
++	    B_TRUE, dmu_recv_tag, &rbsa->ds));
++
++	if (rbsa->origin == NULL) {
++		(void) dmu_objset_create_impl(dd->dd_pool->dp_spa,
++		    rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx);
++	}
++
++	spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC,
++	    dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj);
++}
++
++/* ARGSUSED */
++static int
++recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	struct recvbeginsyncarg *rbsa = arg2;
++	int err;
++	uint64_t val;
++
++	/* must not have any changes since most recent snapshot */
++	if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds))
++		return (ETXTBSY);
++
++	/* new snapshot name must not exist */
++	err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
++	    ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val);
++	if (err == 0)
++		return (EEXIST);
++	if (err != ENOENT)
++		return (err);
++
++	if (rbsa->fromguid) {
++		/* if incremental, most recent snapshot must match fromguid */
++		if (ds->ds_prev == NULL)
++			return (ENODEV);
++
++		/*
++		 * most recent snapshot must match fromguid, or there are no
++		 * changes since the fromguid one
++		 */
++		if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) {
++			uint64_t birth = ds->ds_prev->ds_phys->ds_bp.blk_birth;
++			uint64_t obj = ds->ds_prev->ds_phys->ds_prev_snap_obj;
++			while (obj != 0) {
++				dsl_dataset_t *snap;
++				err = dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
++				    obj, FTAG, &snap);
++				if (err)
++					return (ENODEV);
++				if (snap->ds_phys->ds_creation_txg < birth) {
++					dsl_dataset_rele(snap, FTAG);
++					return (ENODEV);
++				}
++				if (snap->ds_phys->ds_guid == rbsa->fromguid) {
++					dsl_dataset_rele(snap, FTAG);
++					break; /* it's ok */
++				}
++				obj = snap->ds_phys->ds_prev_snap_obj;
++				dsl_dataset_rele(snap, FTAG);
++			}
++			if (obj == 0)
++				return (ENODEV);
++		}
++	} else {
++		/* if full, most recent snapshot must be $ORIGIN */
++		if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL)
++			return (ENODEV);
++	}
++
++	/* temporary clone name must not exist */
++	err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
++	    ds->ds_dir->dd_phys->dd_child_dir_zapobj,
++	    rbsa->clonelastname, 8, 1, &val);
++	if (err == 0)
++		return (EEXIST);
++	if (err != ENOENT)
++		return (err);
++
++	return (0);
++}
++
++/* ARGSUSED */
++static void
++recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ohds = arg1;
++	struct recvbeginsyncarg *rbsa = arg2;
++	dsl_pool_t *dp = ohds->ds_dir->dd_pool;
++	dsl_dataset_t *cds;
++	uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
++	uint64_t dsobj;
++
++	/* create and open the temporary clone */
++	dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname,
++	    ohds->ds_prev, flags, rbsa->cr, tx);
++	VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds));
++
++	/*
++	 * If we actually created a non-clone, we need to create the
++	 * objset in our new dataset.
++	 */
++	if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) {
++		(void) dmu_objset_create_impl(dp->dp_spa,
++		    cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx);
++	}
++
++	rbsa->ds = cds;
++
++	spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC,
++	    dp->dp_spa, tx, "dataset = %lld", dsobj);
++}
++
++static boolean_t
++dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb)
++{
++	int featureflags;
++
++	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
++
++	/* Verify pool version supports SA if SA_SPILL feature set */
++	return ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
++	    (spa_version(dsl_dataset_get_spa(ds)) < SPA_VERSION_SA));
++}
++
++/*
++ * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
++ * succeeds; otherwise we will leak the holds on the datasets.
++ */
++int
++dmu_recv_begin(char *tofs, char *tosnap, char *top_ds, struct drr_begin *drrb,
++    boolean_t force, objset_t *origin, dmu_recv_cookie_t *drc)
++{
++	int err = 0;
++	boolean_t byteswap;
++	struct recvbeginsyncarg rbsa = { 0 };
++	uint64_t versioninfo;
++	int flags;
++	dsl_dataset_t *ds;
++
++	if (drrb->drr_magic == DMU_BACKUP_MAGIC)
++		byteswap = FALSE;
++	else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
++		byteswap = TRUE;
++	else
++		return (EINVAL);
++
++	rbsa.tofs = tofs;
++	rbsa.tosnap = tosnap;
++	rbsa.origin = origin ? origin->os_dsl_dataset : NULL;
++	rbsa.fromguid = drrb->drr_fromguid;
++	rbsa.type = drrb->drr_type;
++	rbsa.tag = FTAG;
++	rbsa.dsflags = 0;
++	rbsa.cr = CRED();
++	versioninfo = drrb->drr_versioninfo;
++	flags = drrb->drr_flags;
++
++	if (byteswap) {
++		rbsa.type = BSWAP_32(rbsa.type);
++		rbsa.fromguid = BSWAP_64(rbsa.fromguid);
++		versioninfo = BSWAP_64(versioninfo);
++		flags = BSWAP_32(flags);
++	}
++
++	if (DMU_GET_STREAM_HDRTYPE(versioninfo) == DMU_COMPOUNDSTREAM ||
++	    rbsa.type >= DMU_OST_NUMTYPES ||
++	    ((flags & DRR_FLAG_CLONE) && origin == NULL))
++		return (EINVAL);
++
++	if (flags & DRR_FLAG_CI_DATA)
++		rbsa.dsflags = DS_FLAG_CI_DATASET;
++
++	bzero(drc, sizeof (dmu_recv_cookie_t));
++	drc->drc_drrb = drrb;
++	drc->drc_tosnap = tosnap;
++	drc->drc_top_ds = top_ds;
++	drc->drc_force = force;
++
++	/*
++	 * Process the begin in syncing context.
++	 */
++
++	/* open the dataset we are logically receiving into */
++	err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds);
++	if (err == 0) {
++		if (dmu_recv_verify_features(ds, drrb)) {
++			dsl_dataset_rele(ds, dmu_recv_tag);
++			return (ENOTSUP);
++		}
++		/* target fs already exists; recv into temp clone */
++
++		/* Can't recv a clone into an existing fs */
++		if (flags & DRR_FLAG_CLONE) {
++			dsl_dataset_rele(ds, dmu_recv_tag);
++			return (EINVAL);
++		}
++
++		/* must not have an incremental recv already in progress */
++		if (!mutex_tryenter(&ds->ds_recvlock)) {
++			dsl_dataset_rele(ds, dmu_recv_tag);
++			return (EBUSY);
++		}
++
++		/* tmp clone name is: tofs/%tosnap" */
++		(void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname),
++		    "%%%s", tosnap);
++		rbsa.force = force;
++		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
++		    recv_existing_check, recv_existing_sync, ds, &rbsa, 5);
++		if (err) {
++			mutex_exit(&ds->ds_recvlock);
++			dsl_dataset_rele(ds, dmu_recv_tag);
++			return (err);
++		}
++		drc->drc_logical_ds = ds;
++		drc->drc_real_ds = rbsa.ds;
++	} else if (err == ENOENT) {
++		/* target fs does not exist; must be a full backup or clone */
++		char *cp;
++
++		/*
++		 * If it's a non-clone incremental, we are missing the
++		 * target fs, so fail the recv.
++		 */
++		if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE))
++			return (ENOENT);
++
++		/* Open the parent of tofs */
++		cp = strrchr(tofs, '/');
++		*cp = '\0';
++		err = dsl_dataset_hold(tofs, FTAG, &ds);
++		*cp = '/';
++		if (err)
++			return (err);
++
++		if (dmu_recv_verify_features(ds, drrb)) {
++			dsl_dataset_rele(ds, FTAG);
++			return (ENOTSUP);
++		}
++
++		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
++		    recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5);
++		dsl_dataset_rele(ds, FTAG);
++		if (err)
++			return (err);
++		drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds;
++		drc->drc_newfs = B_TRUE;
++	}
++
++	return (err);
++}
++
++struct restorearg {
++	int err;
++	int byteswap;
++	vnode_t *vp;
++	char *buf;
++	uint64_t voff;
++	int bufsize; /* amount of memory allocated for buf */
++	zio_cksum_t cksum;
++	avl_tree_t *guid_to_ds_map;
++};
++
++typedef struct guid_map_entry {
++	uint64_t	guid;
++	dsl_dataset_t	*gme_ds;
++	avl_node_t	avlnode;
++} guid_map_entry_t;
++
++static int
++guid_compare(const void *arg1, const void *arg2)
++{
++	const guid_map_entry_t *gmep1 = arg1;
++	const guid_map_entry_t *gmep2 = arg2;
++
++	if (gmep1->guid < gmep2->guid)
++		return (-1);
++	else if (gmep1->guid > gmep2->guid)
++		return (1);
++	return (0);
++}
++
++static void
++free_guid_map_onexit(void *arg)
++{
++	avl_tree_t *ca = arg;
++	void *cookie = NULL;
++	guid_map_entry_t *gmep;
++
++	while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
++		dsl_dataset_rele(gmep->gme_ds, ca);
++		kmem_free(gmep, sizeof (guid_map_entry_t));
++	}
++	avl_destroy(ca);
++	kmem_free(ca, sizeof (avl_tree_t));
++}
++
++static void *
++restore_read(struct restorearg *ra, int len)
++{
++	void *rv;
++	int done = 0;
++
++	/* some things will require 8-byte alignment, so everything must */
++	ASSERT3U(len % 8, ==, 0);
++
++	while (done < len) {
++		ssize_t resid;
++
++		ra->err = vn_rdwr(UIO_READ, ra->vp,
++		    (caddr_t)ra->buf + done, len - done,
++		    ra->voff, UIO_SYSSPACE, FAPPEND,
++		    RLIM64_INFINITY, CRED(), &resid);
++
++		if (resid == len - done)
++			ra->err = EINVAL;
++		ra->voff += len - done - resid;
++		done = len - resid;
++		if (ra->err)
++			return (NULL);
++	}
++
++	ASSERT3U(done, ==, len);
++	rv = ra->buf;
++	if (ra->byteswap)
++		fletcher_4_incremental_byteswap(rv, len, &ra->cksum);
++	else
++		fletcher_4_incremental_native(rv, len, &ra->cksum);
++	return (rv);
++}
++
++noinline static void
++backup_byteswap(dmu_replay_record_t *drr)
++{
++#define	DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
++#define	DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
++	drr->drr_type = BSWAP_32(drr->drr_type);
++	drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen);
++	switch (drr->drr_type) {
++	case DRR_BEGIN:
++		DO64(drr_begin.drr_magic);
++		DO64(drr_begin.drr_versioninfo);
++		DO64(drr_begin.drr_creation_time);
++		DO32(drr_begin.drr_type);
++		DO32(drr_begin.drr_flags);
++		DO64(drr_begin.drr_toguid);
++		DO64(drr_begin.drr_fromguid);
++		break;
++	case DRR_OBJECT:
++		DO64(drr_object.drr_object);
++		/* DO64(drr_object.drr_allocation_txg); */
++		DO32(drr_object.drr_type);
++		DO32(drr_object.drr_bonustype);
++		DO32(drr_object.drr_blksz);
++		DO32(drr_object.drr_bonuslen);
++		DO64(drr_object.drr_toguid);
++		break;
++	case DRR_FREEOBJECTS:
++		DO64(drr_freeobjects.drr_firstobj);
++		DO64(drr_freeobjects.drr_numobjs);
++		DO64(drr_freeobjects.drr_toguid);
++		break;
++	case DRR_WRITE:
++		DO64(drr_write.drr_object);
++		DO32(drr_write.drr_type);
++		DO64(drr_write.drr_offset);
++		DO64(drr_write.drr_length);
++		DO64(drr_write.drr_toguid);
++		DO64(drr_write.drr_key.ddk_cksum.zc_word[0]);
++		DO64(drr_write.drr_key.ddk_cksum.zc_word[1]);
++		DO64(drr_write.drr_key.ddk_cksum.zc_word[2]);
++		DO64(drr_write.drr_key.ddk_cksum.zc_word[3]);
++		DO64(drr_write.drr_key.ddk_prop);
++		break;
++	case DRR_WRITE_BYREF:
++		DO64(drr_write_byref.drr_object);
++		DO64(drr_write_byref.drr_offset);
++		DO64(drr_write_byref.drr_length);
++		DO64(drr_write_byref.drr_toguid);
++		DO64(drr_write_byref.drr_refguid);
++		DO64(drr_write_byref.drr_refobject);
++		DO64(drr_write_byref.drr_refoffset);
++		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]);
++		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]);
++		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]);
++		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]);
++		DO64(drr_write_byref.drr_key.ddk_prop);
++		break;
++	case DRR_FREE:
++		DO64(drr_free.drr_object);
++		DO64(drr_free.drr_offset);
++		DO64(drr_free.drr_length);
++		DO64(drr_free.drr_toguid);
++		break;
++	case DRR_SPILL:
++		DO64(drr_spill.drr_object);
++		DO64(drr_spill.drr_length);
++		DO64(drr_spill.drr_toguid);
++		break;
++	case DRR_END:
++		DO64(drr_end.drr_checksum.zc_word[0]);
++		DO64(drr_end.drr_checksum.zc_word[1]);
++		DO64(drr_end.drr_checksum.zc_word[2]);
++		DO64(drr_end.drr_checksum.zc_word[3]);
++		DO64(drr_end.drr_toguid);
++		break;
++	default:
++		break;
++	}
++#undef DO64
++#undef DO32
++}
++
++noinline static int
++restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
++{
++	int err;
++	dmu_tx_t *tx;
++	void *data = NULL;
++
++	if (drro->drr_type == DMU_OT_NONE ||
++	    drro->drr_type >= DMU_OT_NUMTYPES ||
++	    drro->drr_bonustype >= DMU_OT_NUMTYPES ||
++	    drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS ||
++	    drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
++	    P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
++	    drro->drr_blksz < SPA_MINBLOCKSIZE ||
++	    drro->drr_blksz > SPA_MAXBLOCKSIZE ||
++	    drro->drr_bonuslen > DN_MAX_BONUSLEN) {
++		return (EINVAL);
++	}
++
++	err = dmu_object_info(os, drro->drr_object, NULL);
++
++	if (err != 0 && err != ENOENT)
++		return (EINVAL);
++
++	if (drro->drr_bonuslen) {
++		data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
++		if (ra->err)
++			return (ra->err);
++	}
++
++	if (err == ENOENT) {
++		/* currently free, want to be allocated */
++		tx = dmu_tx_create(os);
++		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
++		err = dmu_tx_assign(tx, TXG_WAIT);
++		if (err) {
++			dmu_tx_abort(tx);
++			return (err);
++		}
++		err = dmu_object_claim(os, drro->drr_object,
++		    drro->drr_type, drro->drr_blksz,
++		    drro->drr_bonustype, drro->drr_bonuslen, tx);
++		dmu_tx_commit(tx);
++	} else {
++		/* currently allocated, want to be allocated */
++		err = dmu_object_reclaim(os, drro->drr_object,
++		    drro->drr_type, drro->drr_blksz,
++		    drro->drr_bonustype, drro->drr_bonuslen);
++	}
++	if (err) {
++		return (EINVAL);
++	}
++
++	tx = dmu_tx_create(os);
++	dmu_tx_hold_bonus(tx, drro->drr_object);
++	err = dmu_tx_assign(tx, TXG_WAIT);
++	if (err) {
++		dmu_tx_abort(tx);
++		return (err);
++	}
++
++	dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype,
++	    tx);
++	dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
++
++	if (data != NULL) {
++		dmu_buf_t *db;
++
++		VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
++		dmu_buf_will_dirty(db, tx);
++
++		ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
++		bcopy(data, db->db_data, drro->drr_bonuslen);
++		if (ra->byteswap) {
++			dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data,
++			    drro->drr_bonuslen);
++		}
++		dmu_buf_rele(db, FTAG);
++	}
++	dmu_tx_commit(tx);
++	return (0);
++}
++
++/* ARGSUSED */
++noinline static int
++restore_freeobjects(struct restorearg *ra, objset_t *os,
++    struct drr_freeobjects *drrfo)
++{
++	uint64_t obj;
++
++	if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj)
++		return (EINVAL);
++
++	for (obj = drrfo->drr_firstobj;
++	    obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
++	    (void) dmu_object_next(os, &obj, FALSE, 0)) {
++		int err;
++
++		if (dmu_object_info(os, obj, NULL) != 0)
++			continue;
++
++		err = dmu_free_object(os, obj);
++		if (err)
++			return (err);
++	}
++	return (0);
++}
++
++noinline static int
++restore_write(struct restorearg *ra, objset_t *os,
++    struct drr_write *drrw)
++{
++	dmu_tx_t *tx;
++	void *data;
++	int err;
++
++	if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
++	    drrw->drr_type >= DMU_OT_NUMTYPES)
++		return (EINVAL);
++
++	data = restore_read(ra, drrw->drr_length);
++	if (data == NULL)
++		return (ra->err);
++
++	if (dmu_object_info(os, drrw->drr_object, NULL) != 0)
++		return (EINVAL);
++
++	tx = dmu_tx_create(os);
++
++	dmu_tx_hold_write(tx, drrw->drr_object,
++	    drrw->drr_offset, drrw->drr_length);
++	err = dmu_tx_assign(tx, TXG_WAIT);
++	if (err) {
++		dmu_tx_abort(tx);
++		return (err);
++	}
++	if (ra->byteswap)
++		dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length);
++	dmu_write(os, drrw->drr_object,
++	    drrw->drr_offset, drrw->drr_length, data, tx);
++	dmu_tx_commit(tx);
++	return (0);
++}
++
++/*
++ * Handle a DRR_WRITE_BYREF record.  This record is used in dedup'ed
++ * streams to refer to a copy of the data that is already on the
++ * system because it came in earlier in the stream.  This function
++ * finds the earlier copy of the data, and uses that copy instead of
++ * data from the stream to fulfill this write.
++ */
++static int
++restore_write_byref(struct restorearg *ra, objset_t *os,
++    struct drr_write_byref *drrwbr)
++{
++	dmu_tx_t *tx;
++	int err;
++	guid_map_entry_t gmesrch;
++	guid_map_entry_t *gmep;
++	avl_index_t	where;
++	objset_t *ref_os = NULL;
++	dmu_buf_t *dbp;
++
++	if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset)
++		return (EINVAL);
++
++	/*
++	 * If the GUID of the referenced dataset is different from the
++	 * GUID of the target dataset, find the referenced dataset.
++	 */
++	if (drrwbr->drr_toguid != drrwbr->drr_refguid) {
++		gmesrch.guid = drrwbr->drr_refguid;
++		if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch,
++		    &where)) == NULL) {
++			return (EINVAL);
++		}
++		if (dmu_objset_from_ds(gmep->gme_ds, &ref_os))
++			return (EINVAL);
++	} else {
++		ref_os = os;
++	}
++
++	err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
++	    drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH);
++	if (err)
++		return (err);
++
++	tx = dmu_tx_create(os);
++
++	dmu_tx_hold_write(tx, drrwbr->drr_object,
++	    drrwbr->drr_offset, drrwbr->drr_length);
++	err = dmu_tx_assign(tx, TXG_WAIT);
++	if (err) {
++		dmu_tx_abort(tx);
++		return (err);
++	}
++	dmu_write(os, drrwbr->drr_object,
++	    drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
++	dmu_buf_rele(dbp, FTAG);
++	dmu_tx_commit(tx);
++	return (0);
++}
++
++static int
++restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
++{
++	dmu_tx_t *tx;
++	void *data;
++	dmu_buf_t *db, *db_spill;
++	int err;
++
++	if (drrs->drr_length < SPA_MINBLOCKSIZE ||
++	    drrs->drr_length > SPA_MAXBLOCKSIZE)
++		return (EINVAL);
++
++	data = restore_read(ra, drrs->drr_length);
++	if (data == NULL)
++		return (ra->err);
++
++	if (dmu_object_info(os, drrs->drr_object, NULL) != 0)
++		return (EINVAL);
++
++	VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db));
++	if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) {
++		dmu_buf_rele(db, FTAG);
++		return (err);
++	}
++
++	tx = dmu_tx_create(os);
++
++	dmu_tx_hold_spill(tx, db->db_object);
++
++	err = dmu_tx_assign(tx, TXG_WAIT);
++	if (err) {
++		dmu_buf_rele(db, FTAG);
++		dmu_buf_rele(db_spill, FTAG);
++		dmu_tx_abort(tx);
++		return (err);
++	}
++	dmu_buf_will_dirty(db_spill, tx);
++
++	if (db_spill->db_size < drrs->drr_length)
++		VERIFY(0 == dbuf_spill_set_blksz(db_spill,
++		    drrs->drr_length, tx));
++	bcopy(data, db_spill->db_data, drrs->drr_length);
++
++	dmu_buf_rele(db, FTAG);
++	dmu_buf_rele(db_spill, FTAG);
++
++	dmu_tx_commit(tx);
++	return (0);
++}
++
++/* ARGSUSED */
++noinline static int
++restore_free(struct restorearg *ra, objset_t *os,
++    struct drr_free *drrf)
++{
++	int err;
++
++	if (drrf->drr_length != -1ULL &&
++	    drrf->drr_offset + drrf->drr_length < drrf->drr_offset)
++		return (EINVAL);
++
++	if (dmu_object_info(os, drrf->drr_object, NULL) != 0)
++		return (EINVAL);
++
++	err = dmu_free_long_range(os, drrf->drr_object,
++	    drrf->drr_offset, drrf->drr_length);
++	return (err);
++}
++
++/*
++ * NB: callers *must* call dmu_recv_end() if this succeeds.
++ */
++int
++dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
++    int cleanup_fd, uint64_t *action_handlep)
++{
++	struct restorearg ra = { 0 };
++	dmu_replay_record_t *drr;
++	objset_t *os;
++	zio_cksum_t pcksum;
++	int featureflags;
++
++	if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
++		ra.byteswap = TRUE;
++
++	{
++		/* compute checksum of drr_begin record */
++		dmu_replay_record_t *drr;
++		drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
++
++		drr->drr_type = DRR_BEGIN;
++		drr->drr_u.drr_begin = *drc->drc_drrb;
++		if (ra.byteswap) {
++			fletcher_4_incremental_byteswap(drr,
++			    sizeof (dmu_replay_record_t), &ra.cksum);
++		} else {
++			fletcher_4_incremental_native(drr,
++			    sizeof (dmu_replay_record_t), &ra.cksum);
++		}
++		kmem_free(drr, sizeof (dmu_replay_record_t));
++	}
++
++	if (ra.byteswap) {
++		struct drr_begin *drrb = drc->drc_drrb;
++		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
++		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
++		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
++		drrb->drr_type = BSWAP_32(drrb->drr_type);
++		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
++		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
++	}
++
++	ra.vp = vp;
++	ra.voff = *voffp;
++	ra.bufsize = 1<<20;
++	ra.buf = vmem_alloc(ra.bufsize, KM_SLEEP);
++
++	/* these were verified in dmu_recv_begin */
++	ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) ==
++	    DMU_SUBSTREAM);
++	ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES);
++
++	/*
++	 * Open the objset we are modifying.
++	 */
++	VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0);
++
++	ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT);
++
++	featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo);
++
++	/* if this stream is dedup'ed, set up the avl tree for guid mapping */
++	if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
++		minor_t minor;
++
++		if (cleanup_fd == -1) {
++			ra.err = EBADF;
++			goto out;
++		}
++		ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor);
++		if (ra.err) {
++			cleanup_fd = -1;
++			goto out;
++		}
++
++		if (*action_handlep == 0) {
++			ra.guid_to_ds_map =
++			    kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
++			avl_create(ra.guid_to_ds_map, guid_compare,
++			    sizeof (guid_map_entry_t),
++			    offsetof(guid_map_entry_t, avlnode));
++			ra.err = zfs_onexit_add_cb(minor,
++			    free_guid_map_onexit, ra.guid_to_ds_map,
++			    action_handlep);
++			if (ra.err)
++				goto out;
++		} else {
++			ra.err = zfs_onexit_cb_data(minor, *action_handlep,
++			    (void **)&ra.guid_to_ds_map);
++			if (ra.err)
++				goto out;
++		}
++
++		drc->drc_guid_to_ds_map = ra.guid_to_ds_map;
++	}
++
++	/*
++	 * Read records and process them.
++	 */
++	pcksum = ra.cksum;
++	while (ra.err == 0 &&
++	    NULL != (drr = restore_read(&ra, sizeof (*drr)))) {
++		if (issig(JUSTLOOKING) && issig(FORREAL)) {
++			ra.err = EINTR;
++			goto out;
++		}
++
++		if (ra.byteswap)
++			backup_byteswap(drr);
++
++		switch (drr->drr_type) {
++		case DRR_OBJECT:
++		{
++			/*
++			 * We need to make a copy of the record header,
++			 * because restore_{object,write} may need to
++			 * restore_read(), which will invalidate drr.
++			 */
++			struct drr_object drro = drr->drr_u.drr_object;
++			ra.err = restore_object(&ra, os, &drro);
++			break;
++		}
++		case DRR_FREEOBJECTS:
++		{
++			struct drr_freeobjects drrfo =
++			    drr->drr_u.drr_freeobjects;
++			ra.err = restore_freeobjects(&ra, os, &drrfo);
++			break;
++		}
++		case DRR_WRITE:
++		{
++			struct drr_write drrw = drr->drr_u.drr_write;
++			ra.err = restore_write(&ra, os, &drrw);
++			break;
++		}
++		case DRR_WRITE_BYREF:
++		{
++			struct drr_write_byref drrwbr =
++			    drr->drr_u.drr_write_byref;
++			ra.err = restore_write_byref(&ra, os, &drrwbr);
++			break;
++		}
++		case DRR_FREE:
++		{
++			struct drr_free drrf = drr->drr_u.drr_free;
++			ra.err = restore_free(&ra, os, &drrf);
++			break;
++		}
++		case DRR_END:
++		{
++			struct drr_end drre = drr->drr_u.drr_end;
++			/*
++			 * We compare against the *previous* checksum
++			 * value, because the stored checksum is of
++			 * everything before the DRR_END record.
++			 */
++			if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum))
++				ra.err = ECKSUM;
++			goto out;
++		}
++		case DRR_SPILL:
++		{
++			struct drr_spill drrs = drr->drr_u.drr_spill;
++			ra.err = restore_spill(&ra, os, &drrs);
++			break;
++		}
++		default:
++			ra.err = EINVAL;
++			goto out;
++		}
++		pcksum = ra.cksum;
++	}
++	ASSERT(ra.err != 0);
++
++out:
++	if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
++		zfs_onexit_fd_rele(cleanup_fd);
++
++	if (ra.err != 0) {
++		/*
++		 * destroy what we created, so we don't leave it in the
++		 * inconsistent restoring state.
++		 */
++		txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0);
++
++		(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
++		    B_FALSE);
++		if (drc->drc_real_ds != drc->drc_logical_ds) {
++			mutex_exit(&drc->drc_logical_ds->ds_recvlock);
++			dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag);
++		}
++	}
++
++	vmem_free(ra.buf, ra.bufsize);
++	*voffp = ra.voff;
++	return (ra.err);
++}
++
++struct recvendsyncarg {
++	char *tosnap;
++	uint64_t creation_time;
++	uint64_t toguid;
++};
++
++static int
++recv_end_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	struct recvendsyncarg *resa = arg2;
++
++	return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx));
++}
++
++static void
++recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	struct recvendsyncarg *resa = arg2;
++
++	dsl_dataset_snapshot_sync(ds, resa->tosnap, tx);
++
++	/* set snapshot's creation time and guid */
++	dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
++	ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time;
++	ds->ds_prev->ds_phys->ds_guid = resa->toguid;
++	ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
++
++	dmu_buf_will_dirty(ds->ds_dbuf, tx);
++	ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
++}
++
++static int
++add_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds)
++{
++	dsl_pool_t *dp = ds->ds_dir->dd_pool;
++	uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj;
++	dsl_dataset_t *snapds;
++	guid_map_entry_t *gmep;
++	int err;
++
++	ASSERT(guid_map != NULL);
++
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++	err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds);
++	if (err == 0) {
++		gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP);
++		gmep->guid = snapds->ds_phys->ds_guid;
++		gmep->gme_ds = snapds;
++		avl_add(guid_map, gmep);
++	}
++
++	rw_exit(&dp->dp_config_rwlock);
++	return (err);
++}
++
++static int
++dmu_recv_existing_end(dmu_recv_cookie_t *drc)
++{
++	struct recvendsyncarg resa;
++	dsl_dataset_t *ds = drc->drc_logical_ds;
++	int err, myerr;
++
++	/*
++	 * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
++	 * expects it to have a ds_user_ptr (and zil), but clone_swap()
++	 * can close it.
++	 */
++	txg_wait_synced(ds->ds_dir->dd_pool, 0);
++
++	if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) {
++		err = dsl_dataset_clone_swap(drc->drc_real_ds, ds,
++		    drc->drc_force);
++		if (err)
++			goto out;
++	} else {
++		mutex_exit(&ds->ds_recvlock);
++		dsl_dataset_rele(ds, dmu_recv_tag);
++		(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
++		    B_FALSE);
++		return (EBUSY);
++	}
++
++	resa.creation_time = drc->drc_drrb->drr_creation_time;
++	resa.toguid = drc->drc_drrb->drr_toguid;
++	resa.tosnap = drc->drc_tosnap;
++
++	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
++	    recv_end_check, recv_end_sync, ds, &resa, 3);
++	if (err) {
++		/* swap back */
++		(void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE);
++	}
++
++out:
++	mutex_exit(&ds->ds_recvlock);
++	if (err == 0 && drc->drc_guid_to_ds_map != NULL)
++		(void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
++	dsl_dataset_disown(ds, dmu_recv_tag);
++	myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE);
++	ASSERT3U(myerr, ==, 0);
++	return (err);
++}
++
++static int
++dmu_recv_new_end(dmu_recv_cookie_t *drc)
++{
++	struct recvendsyncarg resa;
++	dsl_dataset_t *ds = drc->drc_logical_ds;
++	int err;
++
++	/*
++	 * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
++	 * expects it to have a ds_user_ptr (and zil), but clone_swap()
++	 * can close it.
++	 */
++	txg_wait_synced(ds->ds_dir->dd_pool, 0);
++
++	resa.creation_time = drc->drc_drrb->drr_creation_time;
++	resa.toguid = drc->drc_drrb->drr_toguid;
++	resa.tosnap = drc->drc_tosnap;
++
++	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
++	    recv_end_check, recv_end_sync, ds, &resa, 3);
++	if (err) {
++		/* clean up the fs we just recv'd into */
++		(void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE);
++	} else {
++		if (drc->drc_guid_to_ds_map != NULL)
++			(void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
++		/* release the hold from dmu_recv_begin */
++		dsl_dataset_disown(ds, dmu_recv_tag);
++	}
++	return (err);
++}
++
++int
++dmu_recv_end(dmu_recv_cookie_t *drc)
++{
++	if (drc->drc_logical_ds != drc->drc_real_ds)
++		return (dmu_recv_existing_end(drc));
++	else
++		return (dmu_recv_new_end(drc));
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dmu_traverse.c linux-3.2.33-go/fs/zfs/zfs/dmu_traverse.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dmu_traverse.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dmu_traverse.c	2012-11-16 23:25:34.349039334 +0100
+@@ -0,0 +1,498 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/dmu_objset.h>
++#include <sys/dmu_traverse.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_pool.h>
++#include <sys/dnode.h>
++#include <sys/spa.h>
++#include <sys/zio.h>
++#include <sys/dmu_impl.h>
++#include <sys/sa.h>
++#include <sys/sa_impl.h>
++#include <sys/callb.h>
++
++int zfs_pd_blks_max = 100;
++
++typedef struct prefetch_data {
++	kmutex_t pd_mtx;
++	kcondvar_t pd_cv;
++	int pd_blks_max;
++	int pd_blks_fetched;
++	int pd_flags;
++	boolean_t pd_cancel;
++	boolean_t pd_exited;
++} prefetch_data_t;
++
++typedef struct traverse_data {
++	spa_t *td_spa;
++	uint64_t td_objset;
++	blkptr_t *td_rootbp;
++	uint64_t td_min_txg;
++	int td_flags;
++	prefetch_data_t *td_pfd;
++	blkptr_cb_t *td_func;
++	void *td_arg;
++} traverse_data_t;
++
++static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
++    arc_buf_t *buf, uint64_t objset, uint64_t object);
++
++static int
++traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
++{
++	traverse_data_t *td = arg;
++	zbookmark_t zb;
++
++	if (bp->blk_birth == 0)
++		return (0);
++
++	if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(td->td_spa))
++		return (0);
++
++	SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
++	    bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
++
++	(void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL, td->td_arg);
++
++	return (0);
++}
++
++static int
++traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
++{
++	traverse_data_t *td = arg;
++
++	if (lrc->lrc_txtype == TX_WRITE) {
++		lr_write_t *lr = (lr_write_t *)lrc;
++		blkptr_t *bp = &lr->lr_blkptr;
++		zbookmark_t zb;
++
++		if (bp->blk_birth == 0)
++			return (0);
++
++		if (claim_txg == 0 || bp->blk_birth < claim_txg)
++			return (0);
++
++		SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid,
++		    ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
++
++		(void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL,
++		    td->td_arg);
++	}
++	return (0);
++}
++
++static void
++traverse_zil(traverse_data_t *td, zil_header_t *zh)
++{
++	uint64_t claim_txg = zh->zh_claim_txg;
++	zilog_t *zilog;
++
++	/*
++	 * We only want to visit blocks that have been claimed but not yet
++	 * replayed; plus, in read-only mode, blocks that are already stable.
++	 */
++	if (claim_txg == 0 && spa_writeable(td->td_spa))
++		return;
++
++	zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh);
++
++	(void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td,
++	    claim_txg);
++
++	zil_free(zilog);
++}
++
++static int
++traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
++    arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
++{
++	zbookmark_t czb;
++	int err = 0, lasterr = 0;
++	arc_buf_t *buf = NULL;
++	prefetch_data_t *pd = td->td_pfd;
++	boolean_t hard = td->td_flags & TRAVERSE_HARD;
++
++	if (bp->blk_birth == 0) {
++		err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp,
++		    td->td_arg);
++		return (err);
++	}
++
++	if (bp->blk_birth <= td->td_min_txg)
++		return (0);
++
++	if (pd && !pd->pd_exited &&
++	    ((pd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
++	    BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0)) {
++		mutex_enter(&pd->pd_mtx);
++		ASSERT(pd->pd_blks_fetched >= 0);
++		while (pd->pd_blks_fetched == 0 && !pd->pd_exited)
++			cv_wait(&pd->pd_cv, &pd->pd_mtx);
++		pd->pd_blks_fetched--;
++		cv_broadcast(&pd->pd_cv);
++		mutex_exit(&pd->pd_mtx);
++	}
++
++	if (td->td_flags & TRAVERSE_PRE) {
++		err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
++		    td->td_arg);
++		if (err == TRAVERSE_VISIT_NO_CHILDREN)
++			return (0);
++		if (err)
++			return (err);
++	}
++
++	if (BP_GET_LEVEL(bp) > 0) {
++		uint32_t flags = ARC_WAIT;
++		int i;
++		blkptr_t *cbp;
++		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
++
++		err = dsl_read(NULL, td->td_spa, bp, pbuf,
++		    arc_getbuf_func, &buf,
++		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
++		if (err)
++			return (err);
++
++		/* recursively visitbp() blocks below this */
++		cbp = buf->b_data;
++		for (i = 0; i < epb; i++, cbp++) {
++			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
++			    zb->zb_level - 1,
++			    zb->zb_blkid * epb + i);
++			err = traverse_visitbp(td, dnp, buf, cbp, &czb);
++			if (err) {
++				if (!hard)
++					break;
++				lasterr = err;
++			}
++		}
++	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
++		uint32_t flags = ARC_WAIT;
++		int i;
++		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
++
++		err = dsl_read(NULL, td->td_spa, bp, pbuf,
++		    arc_getbuf_func, &buf,
++		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
++		if (err)
++			return (err);
++
++		/* recursively visitbp() blocks below this */
++		dnp = buf->b_data;
++		for (i = 0; i < epb; i++, dnp++) {
++			err = traverse_dnode(td, dnp, buf, zb->zb_objset,
++			    zb->zb_blkid * epb + i);
++			if (err) {
++				if (!hard)
++					break;
++				lasterr = err;
++			}
++		}
++	} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
++		uint32_t flags = ARC_WAIT;
++		objset_phys_t *osp;
++		dnode_phys_t *dnp;
++
++		err = dsl_read_nolock(NULL, td->td_spa, bp,
++		    arc_getbuf_func, &buf,
++		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
++		if (err)
++			return (err);
++
++		osp = buf->b_data;
++		dnp = &osp->os_meta_dnode;
++		err = traverse_dnode(td, dnp, buf, zb->zb_objset,
++		    DMU_META_DNODE_OBJECT);
++		if (err && hard) {
++			lasterr = err;
++			err = 0;
++		}
++		if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
++			dnp = &osp->os_userused_dnode;
++			err = traverse_dnode(td, dnp, buf, zb->zb_objset,
++			    DMU_USERUSED_OBJECT);
++		}
++		if (err && hard) {
++			lasterr = err;
++			err = 0;
++		}
++		if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
++			dnp = &osp->os_groupused_dnode;
++			err = traverse_dnode(td, dnp, buf, zb->zb_objset,
++			    DMU_GROUPUSED_OBJECT);
++		}
++	}
++
++	if (buf)
++		(void) arc_buf_remove_ref(buf, &buf);
++
++	if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
++		err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
++		    td->td_arg);
++	}
++
++	return (err != 0 ? err : lasterr);
++}
++
++static int
++traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
++    arc_buf_t *buf, uint64_t objset, uint64_t object)
++{
++	int j, err = 0, lasterr = 0;
++	zbookmark_t czb;
++	boolean_t hard = (td->td_flags & TRAVERSE_HARD);
++
++	for (j = 0; j < dnp->dn_nblkptr; j++) {
++		SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
++		err = traverse_visitbp(td, dnp, buf,
++		    (blkptr_t *)&dnp->dn_blkptr[j], &czb);
++		if (err) {
++			if (!hard)
++				break;
++			lasterr = err;
++		}
++	}
++
++	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
++		SET_BOOKMARK(&czb, objset,
++		    object, 0, DMU_SPILL_BLKID);
++		err = traverse_visitbp(td, dnp, buf,
++		    (blkptr_t *)&dnp->dn_spill, &czb);
++		if (err) {
++			if (!hard)
++				return (err);
++			lasterr = err;
++		}
++	}
++	return (err != 0 ? err : lasterr);
++}
++
++/* ARGSUSED */
++static int
++traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
++    arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp,
++    void *arg)
++{
++	prefetch_data_t *pfd = arg;
++	uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
++
++	ASSERT(pfd->pd_blks_fetched >= 0);
++	if (pfd->pd_cancel)
++		return (EINTR);
++
++	if (bp == NULL || !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
++	    BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0) ||
++	    BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG)
++		return (0);
++
++	mutex_enter(&pfd->pd_mtx);
++	while (!pfd->pd_cancel && pfd->pd_blks_fetched >= pfd->pd_blks_max)
++		cv_wait(&pfd->pd_cv, &pfd->pd_mtx);
++	pfd->pd_blks_fetched++;
++	cv_broadcast(&pfd->pd_cv);
++	mutex_exit(&pfd->pd_mtx);
++
++	(void) dsl_read(NULL, spa, bp, pbuf, NULL, NULL,
++	    ZIO_PRIORITY_ASYNC_READ,
++	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
++	    &aflags, zb);
++
++	return (0);
++}
++
++static void
++traverse_prefetch_thread(void *arg)
++{
++	traverse_data_t *td_main = arg;
++	traverse_data_t td = *td_main;
++	zbookmark_t czb;
++
++	td.td_func = traverse_prefetcher;
++	td.td_arg = td_main->td_pfd;
++	td.td_pfd = NULL;
++
++	SET_BOOKMARK(&czb, td.td_objset,
++	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
++	(void) traverse_visitbp(&td, NULL, NULL, td.td_rootbp, &czb);
++
++	mutex_enter(&td_main->td_pfd->pd_mtx);
++	td_main->td_pfd->pd_exited = B_TRUE;
++	cv_broadcast(&td_main->td_pfd->pd_cv);
++	mutex_exit(&td_main->td_pfd->pd_mtx);
++}
++
++/*
++ * NB: dataset must not be changing on-disk (eg, is a snapshot or we are
++ * in syncing context).
++ */
++static int
++traverse_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *rootbp,
++    uint64_t txg_start, int flags, blkptr_cb_t func, void *arg)
++{
++	traverse_data_t *td;
++	prefetch_data_t *pd;
++	zbookmark_t *czb;
++	int err;
++
++	td = kmem_alloc(sizeof(traverse_data_t), KM_PUSHPAGE);
++	pd = kmem_zalloc(sizeof(prefetch_data_t), KM_PUSHPAGE);
++	czb = kmem_alloc(sizeof(zbookmark_t), KM_PUSHPAGE);
++
++	td->td_spa = spa;
++	td->td_objset = ds ? ds->ds_object : 0;
++	td->td_rootbp = rootbp;
++	td->td_min_txg = txg_start;
++	td->td_func = func;
++	td->td_arg = arg;
++	td->td_pfd = pd;
++	td->td_flags = flags;
++
++	pd->pd_blks_max = zfs_pd_blks_max;
++	pd->pd_flags = flags;
++	mutex_init(&pd->pd_mtx, NULL, MUTEX_DEFAULT, NULL);
++	cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL);
++
++	/* See comment on ZIL traversal in dsl_scan_visitds. */
++	if (ds != NULL && !dsl_dataset_is_snapshot(ds)) {
++		objset_t *os;
++
++		err = dmu_objset_from_ds(ds, &os);
++		if (err)
++			return (err);
++
++		traverse_zil(td, &os->os_zil_header);
++	}
++
++	if (!(flags & TRAVERSE_PREFETCH) ||
++	    0 == taskq_dispatch(system_taskq, traverse_prefetch_thread,
++	    td, TQ_NOQUEUE))
++		pd->pd_exited = B_TRUE;
++
++	SET_BOOKMARK(czb, td->td_objset,
++	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
++	err = traverse_visitbp(td, NULL, NULL, rootbp, czb);
++
++	mutex_enter(&pd->pd_mtx);
++	pd->pd_cancel = B_TRUE;
++	cv_broadcast(&pd->pd_cv);
++	while (!pd->pd_exited)
++		cv_wait(&pd->pd_cv, &pd->pd_mtx);
++	mutex_exit(&pd->pd_mtx);
++
++	mutex_destroy(&pd->pd_mtx);
++	cv_destroy(&pd->pd_cv);
++
++	kmem_free(czb, sizeof(zbookmark_t));
++	kmem_free(pd, sizeof(struct prefetch_data));
++	kmem_free(td, sizeof(struct traverse_data));
++
++	return (err);
++}
++
++/*
++ * NB: dataset must not be changing on-disk (eg, is a snapshot or we are
++ * in syncing context).
++ */
++int
++traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
++    blkptr_cb_t func, void *arg)
++{
++	return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds,
++	    &ds->ds_phys->ds_bp, txg_start, flags, func, arg));
++}
++
++/*
++ * NB: pool must not be changing on-disk (eg, from zdb or sync context).
++ */
++int
++traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
++    blkptr_cb_t func, void *arg)
++{
++	int err, lasterr = 0;
++	uint64_t obj;
++	dsl_pool_t *dp = spa_get_dsl(spa);
++	objset_t *mos = dp->dp_meta_objset;
++	boolean_t hard = (flags & TRAVERSE_HARD);
++
++	/* visit the MOS */
++	err = traverse_impl(spa, NULL, spa_get_rootblkptr(spa),
++	    txg_start, flags, func, arg);
++	if (err)
++		return (err);
++
++	/* visit each dataset */
++	for (obj = 1; err == 0 || (err != ESRCH && hard);
++	    err = dmu_object_next(mos, &obj, FALSE, txg_start)) {
++		dmu_object_info_t doi;
++
++		err = dmu_object_info(mos, obj, &doi);
++		if (err) {
++			if (!hard)
++				return (err);
++			lasterr = err;
++			continue;
++		}
++
++		if (doi.doi_type == DMU_OT_DSL_DATASET) {
++			dsl_dataset_t *ds;
++			uint64_t txg = txg_start;
++
++			rw_enter(&dp->dp_config_rwlock, RW_READER);
++			err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
++			rw_exit(&dp->dp_config_rwlock);
++			if (err) {
++				if (!hard)
++					return (err);
++				lasterr = err;
++				continue;
++			}
++			if (ds->ds_phys->ds_prev_snap_txg > txg)
++				txg = ds->ds_phys->ds_prev_snap_txg;
++			err = traverse_dataset(ds, txg, flags, func, arg);
++			dsl_dataset_rele(ds, FTAG);
++			if (err) {
++				if (!hard)
++					return (err);
++				lasterr = err;
++			}
++		}
++	}
++	if (err == ESRCH)
++		err = 0;
++	return (err != 0 ? err : lasterr);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(traverse_dataset);
++EXPORT_SYMBOL(traverse_pool);
++
++module_param(zfs_pd_blks_max, int, 0644);
++MODULE_PARM_DESC(zfs_pd_blks_max, "Max number of blocks to prefetch");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dmu_tx.c linux-3.2.33-go/fs/zfs/zfs/dmu_tx.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dmu_tx.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dmu_tx.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,1453 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++/*
++ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
++ */
++
++#include <sys/dmu.h>
++#include <sys/dmu_impl.h>
++#include <sys/dbuf.h>
++#include <sys/dmu_tx.h>
++#include <sys/dmu_objset.h>
++#include <sys/dsl_dataset.h> /* for dsl_dataset_block_freeable() */
++#include <sys/dsl_dir.h> /* for dsl_dir_tempreserve_*() */
++#include <sys/dsl_pool.h>
++#include <sys/zap_impl.h> /* for fzap_default_block_shift */
++#include <sys/spa.h>
++#include <sys/sa.h>
++#include <sys/sa_impl.h>
++#include <sys/zfs_context.h>
++#include <sys/varargs.h>
++
++typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
++    uint64_t arg1, uint64_t arg2);
++
++dmu_tx_stats_t dmu_tx_stats = {
++	{ "dmu_tx_assigned",		KSTAT_DATA_UINT64 },
++	{ "dmu_tx_delay",		KSTAT_DATA_UINT64 },
++	{ "dmu_tx_error",		KSTAT_DATA_UINT64 },
++	{ "dmu_tx_suspended",		KSTAT_DATA_UINT64 },
++	{ "dmu_tx_group",		KSTAT_DATA_UINT64 },
++	{ "dmu_tx_how",			KSTAT_DATA_UINT64 },
++	{ "dmu_tx_memory_reserve",	KSTAT_DATA_UINT64 },
++	{ "dmu_tx_memory_reclaim",	KSTAT_DATA_UINT64 },
++	{ "dmu_tx_memory_inflight",	KSTAT_DATA_UINT64 },
++	{ "dmu_tx_dirty_throttle",	KSTAT_DATA_UINT64 },
++	{ "dmu_tx_write_limit",		KSTAT_DATA_UINT64 },
++	{ "dmu_tx_quota",		KSTAT_DATA_UINT64 },
++};
++
++static kstat_t *dmu_tx_ksp;
++
++dmu_tx_t *
++dmu_tx_create_dd(dsl_dir_t *dd)
++{
++	dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_PUSHPAGE);
++	tx->tx_dir = dd;
++	if (dd)
++		tx->tx_pool = dd->dd_pool;
++	list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t),
++	    offsetof(dmu_tx_hold_t, txh_node));
++	list_create(&tx->tx_callbacks, sizeof (dmu_tx_callback_t),
++	    offsetof(dmu_tx_callback_t, dcb_node));
++#ifdef DEBUG_DMU_TX
++	refcount_create(&tx->tx_space_written);
++	refcount_create(&tx->tx_space_freed);
++#endif
++	return (tx);
++}
++
++dmu_tx_t *
++dmu_tx_create(objset_t *os)
++{
++	dmu_tx_t *tx = dmu_tx_create_dd(os->os_dsl_dataset->ds_dir);
++	tx->tx_objset = os;
++	tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os_dsl_dataset);
++	return (tx);
++}
++
++dmu_tx_t *
++dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg)
++{
++	dmu_tx_t *tx = dmu_tx_create_dd(NULL);
++
++	ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg);
++	tx->tx_pool = dp;
++	tx->tx_txg = txg;
++	tx->tx_anyobj = TRUE;
++
++	return (tx);
++}
++
++int
++dmu_tx_is_syncing(dmu_tx_t *tx)
++{
++	return (tx->tx_anyobj);
++}
++
++int
++dmu_tx_private_ok(dmu_tx_t *tx)
++{
++	return (tx->tx_anyobj);
++}
++
++static dmu_tx_hold_t *
++dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object,
++    enum dmu_tx_hold_type type, uint64_t arg1, uint64_t arg2)
++{
++	dmu_tx_hold_t *txh;
++	dnode_t *dn = NULL;
++	int err;
++
++	if (object != DMU_NEW_OBJECT) {
++		err = dnode_hold(os, object, tx, &dn);
++		if (err) {
++			tx->tx_err = err;
++			return (NULL);
++		}
++
++		if (err == 0 && tx->tx_txg != 0) {
++			mutex_enter(&dn->dn_mtx);
++			/*
++			 * dn->dn_assigned_txg == tx->tx_txg doesn't pose a
++			 * problem, but there's no way for it to happen (for
++			 * now, at least).
++			 */
++			ASSERT(dn->dn_assigned_txg == 0);
++			dn->dn_assigned_txg = tx->tx_txg;
++			(void) refcount_add(&dn->dn_tx_holds, tx);
++			mutex_exit(&dn->dn_mtx);
++		}
++	}
++
++	txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_PUSHPAGE);
++	txh->txh_tx = tx;
++	txh->txh_dnode = dn;
++#ifdef DEBUG_DMU_TX
++	txh->txh_type = type;
++	txh->txh_arg1 = arg1;
++	txh->txh_arg2 = arg2;
++#endif
++	list_insert_tail(&tx->tx_holds, txh);
++
++	return (txh);
++}
++
++void
++dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object)
++{
++	/*
++	 * If we're syncing, they can manipulate any object anyhow, and
++	 * the hold on the dnode_t can cause problems.
++	 */
++	if (!dmu_tx_is_syncing(tx)) {
++		(void) dmu_tx_hold_object_impl(tx, os,
++		    object, THT_NEWOBJECT, 0, 0);
++	}
++}
++
++static int
++dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
++{
++	int err;
++	dmu_buf_impl_t *db;
++
++	rw_enter(&dn->dn_struct_rwlock, RW_READER);
++	db = dbuf_hold_level(dn, level, blkid, FTAG);
++	rw_exit(&dn->dn_struct_rwlock);
++	if (db == NULL)
++		return (EIO);
++	err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH);
++	dbuf_rele(db, FTAG);
++	return (err);
++}
++
++static void
++dmu_tx_count_twig(dmu_tx_hold_t *txh, dnode_t *dn, dmu_buf_impl_t *db,
++    int level, uint64_t blkid, boolean_t freeable, uint64_t *history)
++{
++	objset_t *os = dn->dn_objset;
++	dsl_dataset_t *ds = os->os_dsl_dataset;
++	int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
++	dmu_buf_impl_t *parent = NULL;
++	blkptr_t *bp = NULL;
++	uint64_t space;
++
++	if (level >= dn->dn_nlevels || history[level] == blkid)
++		return;
++
++	history[level] = blkid;
++
++	space = (level == 0) ? dn->dn_datablksz : (1ULL << dn->dn_indblkshift);
++
++	if (db == NULL || db == dn->dn_dbuf) {
++		ASSERT(level != 0);
++		db = NULL;
++	} else {
++		ASSERT(DB_DNODE(db) == dn);
++		ASSERT(db->db_level == level);
++		ASSERT(db->db.db_size == space);
++		ASSERT(db->db_blkid == blkid);
++		bp = db->db_blkptr;
++		parent = db->db_parent;
++	}
++
++	freeable = (bp && (freeable ||
++	    dsl_dataset_block_freeable(ds, bp, bp->blk_birth)));
++
++	if (freeable)
++		txh->txh_space_tooverwrite += space;
++	else
++		txh->txh_space_towrite += space;
++	if (bp)
++		txh->txh_space_tounref += bp_get_dsize(os->os_spa, bp);
++
++	dmu_tx_count_twig(txh, dn, parent, level + 1,
++	    blkid >> epbs, freeable, history);
++}
++
++/* ARGSUSED */
++static void
++dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
++{
++	dnode_t *dn = txh->txh_dnode;
++	uint64_t start, end, i;
++	int min_bs, max_bs, min_ibs, max_ibs, epbs, bits;
++	int err = 0;
++	int l;
++
++	if (len == 0)
++		return;
++
++	min_bs = SPA_MINBLOCKSHIFT;
++	max_bs = SPA_MAXBLOCKSHIFT;
++	min_ibs = DN_MIN_INDBLKSHIFT;
++	max_ibs = DN_MAX_INDBLKSHIFT;
++
++	if (dn) {
++		uint64_t history[DN_MAX_LEVELS];
++		int nlvls = dn->dn_nlevels;
++		int delta;
++
++		/*
++		 * For i/o error checking, read the first and last level-0
++		 * blocks (if they are not aligned), and all the level-1 blocks.
++		 */
++		if (dn->dn_maxblkid == 0) {
++			delta = dn->dn_datablksz;
++			start = (off < dn->dn_datablksz) ? 0 : 1;
++			end = (off+len <= dn->dn_datablksz) ? 0 : 1;
++			if (start == 0 && (off > 0 || len < dn->dn_datablksz)) {
++				err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
++				if (err)
++					goto out;
++				delta -= off;
++			}
++		} else {
++			zio_t *zio = zio_root(dn->dn_objset->os_spa,
++			    NULL, NULL, ZIO_FLAG_CANFAIL);
++
++			/* first level-0 block */
++			start = off >> dn->dn_datablkshift;
++			if (P2PHASE(off, dn->dn_datablksz) ||
++			    len < dn->dn_datablksz) {
++				err = dmu_tx_check_ioerr(zio, dn, 0, start);
++				if (err)
++					goto out;
++			}
++
++			/* last level-0 block */
++			end = (off+len-1) >> dn->dn_datablkshift;
++			if (end != start && end <= dn->dn_maxblkid &&
++			    P2PHASE(off+len, dn->dn_datablksz)) {
++				err = dmu_tx_check_ioerr(zio, dn, 0, end);
++				if (err)
++					goto out;
++			}
++
++			/* level-1 blocks */
++			if (nlvls > 1) {
++				int shft = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
++				for (i = (start>>shft)+1; i < end>>shft; i++) {
++					err = dmu_tx_check_ioerr(zio, dn, 1, i);
++					if (err)
++						goto out;
++				}
++			}
++
++			err = zio_wait(zio);
++			if (err)
++				goto out;
++			delta = P2NPHASE(off, dn->dn_datablksz);
++		}
++
++		if (dn->dn_maxblkid > 0) {
++			/*
++			 * The blocksize can't change,
++			 * so we can make a more precise estimate.
++			 */
++			ASSERT(dn->dn_datablkshift != 0);
++			min_bs = max_bs = dn->dn_datablkshift;
++			min_ibs = max_ibs = dn->dn_indblkshift;
++		} else if (dn->dn_indblkshift > max_ibs) {
++			/*
++			 * This ensures that if we reduce DN_MAX_INDBLKSHIFT,
++			 * the code will still work correctly on older pools.
++			 */
++			min_ibs = max_ibs = dn->dn_indblkshift;
++		}
++
++		/*
++		 * If this write is not off the end of the file
++		 * we need to account for overwrites/unref.
++		 */
++		if (start <= dn->dn_maxblkid) {
++			for (l = 0; l < DN_MAX_LEVELS; l++)
++				history[l] = -1ULL;
++		}
++		while (start <= dn->dn_maxblkid) {
++			dmu_buf_impl_t *db;
++
++			rw_enter(&dn->dn_struct_rwlock, RW_READER);
++			err = dbuf_hold_impl(dn, 0, start, FALSE, FTAG, &db);
++			rw_exit(&dn->dn_struct_rwlock);
++
++			if (err) {
++				txh->txh_tx->tx_err = err;
++				return;
++			}
++
++			dmu_tx_count_twig(txh, dn, db, 0, start, B_FALSE,
++			    history);
++			dbuf_rele(db, FTAG);
++			if (++start > end) {
++				/*
++				 * Account for new indirects appearing
++				 * before this IO gets assigned into a txg.
++				 */
++				bits = 64 - min_bs;
++				epbs = min_ibs - SPA_BLKPTRSHIFT;
++				for (bits -= epbs * (nlvls - 1);
++				    bits >= 0; bits -= epbs)
++					txh->txh_fudge += 1ULL << max_ibs;
++				goto out;
++			}
++			off += delta;
++			if (len >= delta)
++				len -= delta;
++			delta = dn->dn_datablksz;
++		}
++	}
++
++	/*
++	 * 'end' is the last thing we will access, not one past.
++	 * This way we won't overflow when accessing the last byte.
++	 */
++	start = P2ALIGN(off, 1ULL << max_bs);
++	end = P2ROUNDUP(off + len, 1ULL << max_bs) - 1;
++	txh->txh_space_towrite += end - start + 1;
++
++	start >>= min_bs;
++	end >>= min_bs;
++
++	epbs = min_ibs - SPA_BLKPTRSHIFT;
++
++	/*
++	 * The object contains at most 2^(64 - min_bs) blocks,
++	 * and each indirect level maps 2^epbs.
++	 */
++	for (bits = 64 - min_bs; bits >= 0; bits -= epbs) {
++		start >>= epbs;
++		end >>= epbs;
++		ASSERT3U(end, >=, start);
++		txh->txh_space_towrite += (end - start + 1) << max_ibs;
++		if (start != 0) {
++			/*
++			 * We also need a new blkid=0 indirect block
++			 * to reference any existing file data.
++			 */
++			txh->txh_space_towrite += 1ULL << max_ibs;
++		}
++	}
++
++out:
++	if (txh->txh_space_towrite + txh->txh_space_tooverwrite >
++	    2 * DMU_MAX_ACCESS)
++		err = EFBIG;
++
++	if (err)
++		txh->txh_tx->tx_err = err;
++}
++
++static void
++dmu_tx_count_dnode(dmu_tx_hold_t *txh)
++{
++	dnode_t *dn = txh->txh_dnode;
++	dnode_t *mdn = DMU_META_DNODE(txh->txh_tx->tx_objset);
++	uint64_t space = mdn->dn_datablksz +
++	    ((mdn->dn_nlevels-1) << mdn->dn_indblkshift);
++
++	if (dn && dn->dn_dbuf->db_blkptr &&
++	    dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
++	    dn->dn_dbuf->db_blkptr, dn->dn_dbuf->db_blkptr->blk_birth)) {
++		txh->txh_space_tooverwrite += space;
++		txh->txh_space_tounref += space;
++	} else {
++		txh->txh_space_towrite += space;
++		if (dn && dn->dn_dbuf->db_blkptr)
++			txh->txh_space_tounref += space;
++	}
++}
++
++void
++dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
++{
++	dmu_tx_hold_t *txh;
++
++	ASSERT(tx->tx_txg == 0);
++	ASSERT(len < DMU_MAX_ACCESS);
++	ASSERT(len == 0 || UINT64_MAX - off >= len - 1);
++
++	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
++	    object, THT_WRITE, off, len);
++	if (txh == NULL)
++		return;
++
++	dmu_tx_count_write(txh, off, len);
++	dmu_tx_count_dnode(txh);
++}
++
++static void
++dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
++{
++	uint64_t blkid, nblks, lastblk;
++	uint64_t space = 0, unref = 0, skipped = 0;
++	dnode_t *dn = txh->txh_dnode;
++	dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
++	spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
++	int epbs;
++
++	if (dn->dn_nlevels == 0)
++		return;
++
++	/*
++	 * The struct_rwlock protects us against dn_nlevels
++	 * changing, in case (against all odds) we manage to dirty &
++	 * sync out the changes after we check for being dirty.
++	 * Also, dbuf_hold_impl() wants us to have the struct_rwlock.
++	 */
++	rw_enter(&dn->dn_struct_rwlock, RW_READER);
++	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
++	if (dn->dn_maxblkid == 0) {
++		if (off == 0 && len >= dn->dn_datablksz) {
++			blkid = 0;
++			nblks = 1;
++		} else {
++			rw_exit(&dn->dn_struct_rwlock);
++			return;
++		}
++	} else {
++		blkid = off >> dn->dn_datablkshift;
++		nblks = (len + dn->dn_datablksz - 1) >> dn->dn_datablkshift;
++
++		if (blkid >= dn->dn_maxblkid) {
++			rw_exit(&dn->dn_struct_rwlock);
++			return;
++		}
++		if (blkid + nblks > dn->dn_maxblkid)
++			nblks = dn->dn_maxblkid - blkid;
++
++	}
++	if (dn->dn_nlevels == 1) {
++		int i;
++		for (i = 0; i < nblks; i++) {
++			blkptr_t *bp = dn->dn_phys->dn_blkptr;
++			ASSERT3U(blkid + i, <, dn->dn_nblkptr);
++			bp += blkid + i;
++			if (dsl_dataset_block_freeable(ds, bp, bp->blk_birth)) {
++				dprintf_bp(bp, "can free old%s", "");
++				space += bp_get_dsize(spa, bp);
++			}
++			unref += BP_GET_ASIZE(bp);
++		}
++		nblks = 0;
++	}
++
++	/*
++	 * Add in memory requirements of higher-level indirects.
++	 * This assumes a worst-possible scenario for dn_nlevels.
++	 */
++	{
++		uint64_t blkcnt = 1 + ((nblks >> epbs) >> epbs);
++		int level = (dn->dn_nlevels > 1) ? 2 : 1;
++
++		while (level++ < DN_MAX_LEVELS) {
++			txh->txh_memory_tohold += blkcnt << dn->dn_indblkshift;
++			blkcnt = 1 + (blkcnt >> epbs);
++		}
++		ASSERT(blkcnt <= dn->dn_nblkptr);
++	}
++
++	lastblk = blkid + nblks - 1;
++	while (nblks) {
++		dmu_buf_impl_t *dbuf;
++		uint64_t ibyte, new_blkid;
++		int epb = 1 << epbs;
++		int err, i, blkoff, tochk;
++		blkptr_t *bp;
++
++		ibyte = blkid << dn->dn_datablkshift;
++		err = dnode_next_offset(dn,
++		    DNODE_FIND_HAVELOCK, &ibyte, 2, 1, 0);
++		new_blkid = ibyte >> dn->dn_datablkshift;
++		if (err == ESRCH) {
++			skipped += (lastblk >> epbs) - (blkid >> epbs) + 1;
++			break;
++		}
++		if (err) {
++			txh->txh_tx->tx_err = err;
++			break;
++		}
++		if (new_blkid > lastblk) {
++			skipped += (lastblk >> epbs) - (blkid >> epbs) + 1;
++			break;
++		}
++
++		if (new_blkid > blkid) {
++			ASSERT((new_blkid >> epbs) > (blkid >> epbs));
++			skipped += (new_blkid >> epbs) - (blkid >> epbs) - 1;
++			nblks -= new_blkid - blkid;
++			blkid = new_blkid;
++		}
++		blkoff = P2PHASE(blkid, epb);
++		tochk = MIN(epb - blkoff, nblks);
++
++		err = dbuf_hold_impl(dn, 1, blkid >> epbs, FALSE, FTAG, &dbuf);
++		if (err) {
++			txh->txh_tx->tx_err = err;
++			break;
++		}
++
++		txh->txh_memory_tohold += dbuf->db.db_size;
++
++		/*
++		 * We don't check memory_tohold against DMU_MAX_ACCESS because
++		 * memory_tohold is an over-estimation (especially the >L1
++		 * indirect blocks), so it could fail.  Callers should have
++		 * already verified that they will not be holding too much
++		 * memory.
++		 */
++
++		err = dbuf_read(dbuf, NULL, DB_RF_HAVESTRUCT | DB_RF_CANFAIL);
++		if (err != 0) {
++			txh->txh_tx->tx_err = err;
++			dbuf_rele(dbuf, FTAG);
++			break;
++		}
++
++		bp = dbuf->db.db_data;
++		bp += blkoff;
++
++		for (i = 0; i < tochk; i++) {
++			if (dsl_dataset_block_freeable(ds, &bp[i],
++			    bp[i].blk_birth)) {
++				dprintf_bp(&bp[i], "can free old%s", "");
++				space += bp_get_dsize(spa, &bp[i]);
++			}
++			unref += BP_GET_ASIZE(bp);
++		}
++		dbuf_rele(dbuf, FTAG);
++
++		blkid += tochk;
++		nblks -= tochk;
++	}
++	rw_exit(&dn->dn_struct_rwlock);
++
++	/* account for new level 1 indirect blocks that might show up */
++	if (skipped > 0) {
++		txh->txh_fudge += skipped << dn->dn_indblkshift;
++		skipped = MIN(skipped, DMU_MAX_DELETEBLKCNT >> epbs);
++		txh->txh_memory_tohold += skipped << dn->dn_indblkshift;
++	}
++	txh->txh_space_tofree += space;
++	txh->txh_space_tounref += unref;
++}
++
++void
++dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
++{
++	dmu_tx_hold_t *txh;
++	dnode_t *dn;
++	uint64_t start, end, i;
++	int err, shift;
++	zio_t *zio;
++
++	ASSERT(tx->tx_txg == 0);
++
++	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
++	    object, THT_FREE, off, len);
++	if (txh == NULL)
++		return;
++	dn = txh->txh_dnode;
++
++	/* first block */
++	if (off != 0)
++		dmu_tx_count_write(txh, off, 1);
++	/* last block */
++	if (len != DMU_OBJECT_END)
++		dmu_tx_count_write(txh, off+len, 1);
++
++	dmu_tx_count_dnode(txh);
++
++	if (off >= (dn->dn_maxblkid+1) * dn->dn_datablksz)
++		return;
++	if (len == DMU_OBJECT_END)
++		len = (dn->dn_maxblkid+1) * dn->dn_datablksz - off;
++
++	/*
++	 * For i/o error checking, read the first and last level-0
++	 * blocks, and all the level-1 blocks.  The above count_write's
++	 * have already taken care of the level-0 blocks.
++	 */
++	if (dn->dn_nlevels > 1) {
++		shift = dn->dn_datablkshift + dn->dn_indblkshift -
++		    SPA_BLKPTRSHIFT;
++		start = off >> shift;
++		end = dn->dn_datablkshift ? ((off+len) >> shift) : 0;
++
++		zio = zio_root(tx->tx_pool->dp_spa,
++		    NULL, NULL, ZIO_FLAG_CANFAIL);
++		for (i = start; i <= end; i++) {
++			uint64_t ibyte = i << shift;
++			err = dnode_next_offset(dn, 0, &ibyte, 2, 1, 0);
++			i = ibyte >> shift;
++			if (err == ESRCH)
++				break;
++			if (err) {
++				tx->tx_err = err;
++				return;
++			}
++
++			err = dmu_tx_check_ioerr(zio, dn, 1, i);
++			if (err) {
++				tx->tx_err = err;
++				return;
++			}
++		}
++		err = zio_wait(zio);
++		if (err) {
++			tx->tx_err = err;
++			return;
++		}
++	}
++
++	dmu_tx_count_free(txh, off, len);
++}
++
++void
++dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
++{
++	dmu_tx_hold_t *txh;
++	dnode_t *dn;
++	uint64_t nblocks;
++	int epbs, err;
++
++	ASSERT(tx->tx_txg == 0);
++
++	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
++	    object, THT_ZAP, add, (uintptr_t)name);
++	if (txh == NULL)
++		return;
++	dn = txh->txh_dnode;
++
++	dmu_tx_count_dnode(txh);
++
++	if (dn == NULL) {
++		/*
++		 * We will be able to fit a new object's entries into one leaf
++		 * block.  So there will be at most 2 blocks total,
++		 * including the header block.
++		 */
++		dmu_tx_count_write(txh, 0, 2 << fzap_default_block_shift);
++		return;
++	}
++
++	ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap);
++
++	if (dn->dn_maxblkid == 0 && !add) {
++		blkptr_t *bp;
++
++		/*
++		 * If there is only one block  (i.e. this is a micro-zap)
++		 * and we are not adding anything, the accounting is simple.
++		 */
++		err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
++		if (err) {
++			tx->tx_err = err;
++			return;
++		}
++
++		/*
++		 * Use max block size here, since we don't know how much
++		 * the size will change between now and the dbuf dirty call.
++		 */
++		bp = &dn->dn_phys->dn_blkptr[0];
++		if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
++		    bp, bp->blk_birth))
++			txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
++		else
++			txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
++		if (!BP_IS_HOLE(bp))
++			txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
++		return;
++	}
++
++	if (dn->dn_maxblkid > 0 && name) {
++		/*
++		 * access the name in this fat-zap so that we'll check
++		 * for i/o errors to the leaf blocks, etc.
++		 */
++		err = zap_lookup(dn->dn_objset, dn->dn_object, name,
++		    8, 0, NULL);
++		if (err == EIO) {
++			tx->tx_err = err;
++			return;
++		}
++	}
++
++	err = zap_count_write(dn->dn_objset, dn->dn_object, name, add,
++	    &txh->txh_space_towrite, &txh->txh_space_tooverwrite);
++
++	/*
++	 * If the modified blocks are scattered to the four winds,
++	 * we'll have to modify an indirect twig for each.
++	 */
++	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
++	for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs)
++		if (dn->dn_objset->os_dsl_dataset->ds_phys->ds_prev_snap_obj)
++			txh->txh_space_towrite += 3 << dn->dn_indblkshift;
++		else
++			txh->txh_space_tooverwrite += 3 << dn->dn_indblkshift;
++}
++
++void
++dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object)
++{
++	dmu_tx_hold_t *txh;
++
++	ASSERT(tx->tx_txg == 0);
++
++	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
++	    object, THT_BONUS, 0, 0);
++	if (txh)
++		dmu_tx_count_dnode(txh);
++}
++
++void
++dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space)
++{
++	dmu_tx_hold_t *txh;
++	ASSERT(tx->tx_txg == 0);
++
++	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
++	    DMU_NEW_OBJECT, THT_SPACE, space, 0);
++
++	txh->txh_space_towrite += space;
++}
++
++int
++dmu_tx_holds(dmu_tx_t *tx, uint64_t object)
++{
++	dmu_tx_hold_t *txh;
++	int holds = 0;
++
++	/*
++	 * By asserting that the tx is assigned, we're counting the
++	 * number of dn_tx_holds, which is the same as the number of
++	 * dn_holds.  Otherwise, we'd be counting dn_holds, but
++	 * dn_tx_holds could be 0.
++	 */
++	ASSERT(tx->tx_txg != 0);
++
++	/* if (tx->tx_anyobj == TRUE) */
++		/* return (0); */
++
++	for (txh = list_head(&tx->tx_holds); txh;
++	    txh = list_next(&tx->tx_holds, txh)) {
++		if (txh->txh_dnode && txh->txh_dnode->dn_object == object)
++			holds++;
++	}
++
++	return (holds);
++}
++
++#ifdef DEBUG_DMU_TX
++void
++dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
++{
++	dmu_tx_hold_t *txh;
++	int match_object = FALSE, match_offset = FALSE;
++	dnode_t *dn;
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	ASSERT(dn != NULL);
++	ASSERT(tx->tx_txg != 0);
++	ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset);
++	ASSERT3U(dn->dn_object, ==, db->db.db_object);
++
++	if (tx->tx_anyobj) {
++		DB_DNODE_EXIT(db);
++		return;
++	}
++
++	/* XXX No checking on the meta dnode for now */
++	if (db->db.db_object == DMU_META_DNODE_OBJECT) {
++		DB_DNODE_EXIT(db);
++		return;
++	}
++
++	for (txh = list_head(&tx->tx_holds); txh;
++	    txh = list_next(&tx->tx_holds, txh)) {
++		ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
++		if (txh->txh_dnode == dn && txh->txh_type != THT_NEWOBJECT)
++			match_object = TRUE;
++		if (txh->txh_dnode == NULL || txh->txh_dnode == dn) {
++			int datablkshift = dn->dn_datablkshift ?
++			    dn->dn_datablkshift : SPA_MAXBLOCKSHIFT;
++			int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
++			int shift = datablkshift + epbs * db->db_level;
++			uint64_t beginblk = shift >= 64 ? 0 :
++			    (txh->txh_arg1 >> shift);
++			uint64_t endblk = shift >= 64 ? 0 :
++			    ((txh->txh_arg1 + txh->txh_arg2 - 1) >> shift);
++			uint64_t blkid = db->db_blkid;
++
++			/* XXX txh_arg2 better not be zero... */
++
++			dprintf("found txh type %x beginblk=%llx endblk=%llx\n",
++			    txh->txh_type, beginblk, endblk);
++
++			switch (txh->txh_type) {
++			case THT_WRITE:
++				if (blkid >= beginblk && blkid <= endblk)
++					match_offset = TRUE;
++				/*
++				 * We will let this hold work for the bonus
++				 * or spill buffer so that we don't need to
++				 * hold it when creating a new object.
++				 */
++				if (blkid == DMU_BONUS_BLKID ||
++				    blkid == DMU_SPILL_BLKID)
++					match_offset = TRUE;
++				/*
++				 * They might have to increase nlevels,
++				 * thus dirtying the new TLIBs.  Or the
++				 * might have to change the block size,
++				 * thus dirying the new lvl=0 blk=0.
++				 */
++				if (blkid == 0)
++					match_offset = TRUE;
++				break;
++			case THT_FREE:
++				/*
++				 * We will dirty all the level 1 blocks in
++				 * the free range and perhaps the first and
++				 * last level 0 block.
++				 */
++				if (blkid >= beginblk && (blkid <= endblk ||
++				    txh->txh_arg2 == DMU_OBJECT_END))
++					match_offset = TRUE;
++				break;
++			case THT_SPILL:
++				if (blkid == DMU_SPILL_BLKID)
++					match_offset = TRUE;
++				break;
++			case THT_BONUS:
++				if (blkid == DMU_BONUS_BLKID)
++					match_offset = TRUE;
++				break;
++			case THT_ZAP:
++				match_offset = TRUE;
++				break;
++			case THT_NEWOBJECT:
++				match_object = TRUE;
++				break;
++			default:
++				ASSERT(!"bad txh_type");
++			}
++		}
++		if (match_object && match_offset) {
++			DB_DNODE_EXIT(db);
++			return;
++		}
++	}
++	DB_DNODE_EXIT(db);
++	panic("dirtying dbuf obj=%llx lvl=%u blkid=%llx but not tx_held\n",
++	    (u_longlong_t)db->db.db_object, db->db_level,
++	    (u_longlong_t)db->db_blkid);
++}
++#endif
++
++static int
++dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
++{
++	dmu_tx_hold_t *txh;
++	spa_t *spa = tx->tx_pool->dp_spa;
++	uint64_t memory, asize, fsize, usize;
++	uint64_t towrite, tofree, tooverwrite, tounref, tohold, fudge;
++
++	ASSERT3U(tx->tx_txg, ==, 0);
++
++	if (tx->tx_err) {
++		DMU_TX_STAT_BUMP(dmu_tx_error);
++		return (tx->tx_err);
++	}
++
++	if (spa_suspended(spa)) {
++		DMU_TX_STAT_BUMP(dmu_tx_suspended);
++
++		/*
++		 * If the user has indicated a blocking failure mode
++		 * then return ERESTART which will block in dmu_tx_wait().
++		 * Otherwise, return EIO so that an error can get
++		 * propagated back to the VOP calls.
++		 *
++		 * Note that we always honor the txg_how flag regardless
++		 * of the failuremode setting.
++		 */
++		if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE &&
++		    txg_how != TXG_WAIT)
++			return (EIO);
++
++		return (ERESTART);
++	}
++
++	tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh);
++	tx->tx_needassign_txh = NULL;
++
++	/*
++	 * NB: No error returns are allowed after txg_hold_open, but
++	 * before processing the dnode holds, due to the
++	 * dmu_tx_unassign() logic.
++	 */
++
++	towrite = tofree = tooverwrite = tounref = tohold = fudge = 0;
++	for (txh = list_head(&tx->tx_holds); txh;
++	    txh = list_next(&tx->tx_holds, txh)) {
++		dnode_t *dn = txh->txh_dnode;
++		if (dn != NULL) {
++			mutex_enter(&dn->dn_mtx);
++			if (dn->dn_assigned_txg == tx->tx_txg - 1) {
++				mutex_exit(&dn->dn_mtx);
++				tx->tx_needassign_txh = txh;
++				DMU_TX_STAT_BUMP(dmu_tx_group);
++				return (ERESTART);
++			}
++			if (dn->dn_assigned_txg == 0)
++				dn->dn_assigned_txg = tx->tx_txg;
++			ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
++			(void) refcount_add(&dn->dn_tx_holds, tx);
++			mutex_exit(&dn->dn_mtx);
++		}
++		towrite += txh->txh_space_towrite;
++		tofree += txh->txh_space_tofree;
++		tooverwrite += txh->txh_space_tooverwrite;
++		tounref += txh->txh_space_tounref;
++		tohold += txh->txh_memory_tohold;
++		fudge += txh->txh_fudge;
++	}
++
++	/*
++	 * NB: This check must be after we've held the dnodes, so that
++	 * the dmu_tx_unassign() logic will work properly
++	 */
++	if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) {
++		DMU_TX_STAT_BUMP(dmu_tx_how);
++		return (ERESTART);
++	}
++
++	/*
++	 * If a snapshot has been taken since we made our estimates,
++	 * assume that we won't be able to free or overwrite anything.
++	 */
++	if (tx->tx_objset &&
++	    dsl_dataset_prev_snap_txg(tx->tx_objset->os_dsl_dataset) >
++	    tx->tx_lastsnap_txg) {
++		towrite += tooverwrite;
++		tooverwrite = tofree = 0;
++	}
++
++	/* needed allocation: worst-case estimate of write space */
++	asize = spa_get_asize(tx->tx_pool->dp_spa, towrite + tooverwrite);
++	/* freed space estimate: worst-case overwrite + free estimate */
++	fsize = spa_get_asize(tx->tx_pool->dp_spa, tooverwrite) + tofree;
++	/* convert unrefd space to worst-case estimate */
++	usize = spa_get_asize(tx->tx_pool->dp_spa, tounref);
++	/* calculate memory footprint estimate */
++	memory = towrite + tooverwrite + tohold;
++
++#ifdef DEBUG_DMU_TX
++	/*
++	 * Add in 'tohold' to account for our dirty holds on this memory
++	 * XXX - the "fudge" factor is to account for skipped blocks that
++	 * we missed because dnode_next_offset() misses in-core-only blocks.
++	 */
++	tx->tx_space_towrite = asize +
++	    spa_get_asize(tx->tx_pool->dp_spa, tohold + fudge);
++	tx->tx_space_tofree = tofree;
++	tx->tx_space_tooverwrite = tooverwrite;
++	tx->tx_space_tounref = tounref;
++#endif
++
++	if (tx->tx_dir && asize != 0) {
++		int err = dsl_dir_tempreserve_space(tx->tx_dir, memory,
++		    asize, fsize, usize, &tx->tx_tempreserve_cookie, tx);
++		if (err)
++			return (err);
++	}
++
++	DMU_TX_STAT_BUMP(dmu_tx_assigned);
++
++	return (0);
++}
++
++static void
++dmu_tx_unassign(dmu_tx_t *tx)
++{
++	dmu_tx_hold_t *txh;
++
++	if (tx->tx_txg == 0)
++		return;
++
++	txg_rele_to_quiesce(&tx->tx_txgh);
++
++	for (txh = list_head(&tx->tx_holds); txh != tx->tx_needassign_txh;
++	    txh = list_next(&tx->tx_holds, txh)) {
++		dnode_t *dn = txh->txh_dnode;
++
++		if (dn == NULL)
++			continue;
++		mutex_enter(&dn->dn_mtx);
++		ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
++
++		if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
++			dn->dn_assigned_txg = 0;
++			cv_broadcast(&dn->dn_notxholds);
++		}
++		mutex_exit(&dn->dn_mtx);
++	}
++
++	txg_rele_to_sync(&tx->tx_txgh);
++
++	tx->tx_lasttried_txg = tx->tx_txg;
++	tx->tx_txg = 0;
++}
++
++/*
++ * Assign tx to a transaction group.  txg_how can be one of:
++ *
++ * (1)	TXG_WAIT.  If the current open txg is full, waits until there's
++ *	a new one.  This should be used when you're not holding locks.
++ *	If will only fail if we're truly out of space (or over quota).
++ *
++ * (2)	TXG_NOWAIT.  If we can't assign into the current open txg without
++ *	blocking, returns immediately with ERESTART.  This should be used
++ *	whenever you're holding locks.  On an ERESTART error, the caller
++ *	should drop locks, do a dmu_tx_wait(tx), and try again.
++ *
++ * (3)	A specific txg.  Use this if you need to ensure that multiple
++ *	transactions all sync in the same txg.  Like TXG_NOWAIT, it
++ *	returns ERESTART if it can't assign you into the requested txg.
++ */
++int
++dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
++{
++	int err;
++
++	ASSERT(tx->tx_txg == 0);
++	ASSERT(txg_how != 0);
++	ASSERT(!dsl_pool_sync_context(tx->tx_pool));
++
++	while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
++		dmu_tx_unassign(tx);
++
++		if (err != ERESTART || txg_how != TXG_WAIT)
++			return (err);
++
++		dmu_tx_wait(tx);
++	}
++
++	txg_rele_to_quiesce(&tx->tx_txgh);
++
++	return (0);
++}
++
++void
++dmu_tx_wait(dmu_tx_t *tx)
++{
++	spa_t *spa = tx->tx_pool->dp_spa;
++
++	ASSERT(tx->tx_txg == 0);
++
++	/*
++	 * It's possible that the pool has become active after this thread
++	 * has tried to obtain a tx. If that's the case then his
++	 * tx_lasttried_txg would not have been assigned.
++	 */
++	if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) {
++		txg_wait_synced(tx->tx_pool, spa_last_synced_txg(spa) + 1);
++	} else if (tx->tx_needassign_txh) {
++		dnode_t *dn = tx->tx_needassign_txh->txh_dnode;
++
++		mutex_enter(&dn->dn_mtx);
++		while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1)
++			cv_wait(&dn->dn_notxholds, &dn->dn_mtx);
++		mutex_exit(&dn->dn_mtx);
++		tx->tx_needassign_txh = NULL;
++	} else {
++		txg_wait_open(tx->tx_pool, tx->tx_lasttried_txg + 1);
++	}
++}
++
++void
++dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta)
++{
++#ifdef DEBUG_DMU_TX
++	if (tx->tx_dir == NULL || delta == 0)
++		return;
++
++	if (delta > 0) {
++		ASSERT3U(refcount_count(&tx->tx_space_written) + delta, <=,
++		    tx->tx_space_towrite);
++		(void) refcount_add_many(&tx->tx_space_written, delta, NULL);
++	} else {
++		(void) refcount_add_many(&tx->tx_space_freed, -delta, NULL);
++	}
++#endif
++}
++
++void
++dmu_tx_commit(dmu_tx_t *tx)
++{
++	dmu_tx_hold_t *txh;
++
++	ASSERT(tx->tx_txg != 0);
++
++	while ((txh = list_head(&tx->tx_holds))) {
++		dnode_t *dn = txh->txh_dnode;
++
++		list_remove(&tx->tx_holds, txh);
++		kmem_free(txh, sizeof (dmu_tx_hold_t));
++		if (dn == NULL)
++			continue;
++		mutex_enter(&dn->dn_mtx);
++		ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
++
++		if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
++			dn->dn_assigned_txg = 0;
++			cv_broadcast(&dn->dn_notxholds);
++		}
++		mutex_exit(&dn->dn_mtx);
++		dnode_rele(dn, tx);
++	}
++
++	if (tx->tx_tempreserve_cookie)
++		dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx);
++
++	if (!list_is_empty(&tx->tx_callbacks))
++		txg_register_callbacks(&tx->tx_txgh, &tx->tx_callbacks);
++
++	if (tx->tx_anyobj == FALSE)
++		txg_rele_to_sync(&tx->tx_txgh);
++
++	list_destroy(&tx->tx_callbacks);
++	list_destroy(&tx->tx_holds);
++#ifdef DEBUG_DMU_TX
++	dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n",
++	    tx->tx_space_towrite, refcount_count(&tx->tx_space_written),
++	    tx->tx_space_tofree, refcount_count(&tx->tx_space_freed));
++	refcount_destroy_many(&tx->tx_space_written,
++	    refcount_count(&tx->tx_space_written));
++	refcount_destroy_many(&tx->tx_space_freed,
++	    refcount_count(&tx->tx_space_freed));
++#endif
++	kmem_free(tx, sizeof (dmu_tx_t));
++}
++
++void
++dmu_tx_abort(dmu_tx_t *tx)
++{
++	dmu_tx_hold_t *txh;
++
++	ASSERT(tx->tx_txg == 0);
++
++	while ((txh = list_head(&tx->tx_holds))) {
++		dnode_t *dn = txh->txh_dnode;
++
++		list_remove(&tx->tx_holds, txh);
++		kmem_free(txh, sizeof (dmu_tx_hold_t));
++		if (dn != NULL)
++			dnode_rele(dn, tx);
++	}
++
++	/*
++	 * Call any registered callbacks with an error code.
++	 */
++	if (!list_is_empty(&tx->tx_callbacks))
++		dmu_tx_do_callbacks(&tx->tx_callbacks, ECANCELED);
++
++	list_destroy(&tx->tx_callbacks);
++	list_destroy(&tx->tx_holds);
++#ifdef DEBUG_DMU_TX
++	refcount_destroy_many(&tx->tx_space_written,
++	    refcount_count(&tx->tx_space_written));
++	refcount_destroy_many(&tx->tx_space_freed,
++	    refcount_count(&tx->tx_space_freed));
++#endif
++	kmem_free(tx, sizeof (dmu_tx_t));
++}
++
++uint64_t
++dmu_tx_get_txg(dmu_tx_t *tx)
++{
++	ASSERT(tx->tx_txg != 0);
++	return (tx->tx_txg);
++}
++
++void
++dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data)
++{
++	dmu_tx_callback_t *dcb;
++
++	dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_PUSHPAGE);
++
++	dcb->dcb_func = func;
++	dcb->dcb_data = data;
++
++	list_insert_tail(&tx->tx_callbacks, dcb);
++}
++
++/*
++ * Call all the commit callbacks on a list, with a given error code.
++ */
++void
++dmu_tx_do_callbacks(list_t *cb_list, int error)
++{
++	dmu_tx_callback_t *dcb;
++
++	while ((dcb = list_head(cb_list))) {
++		list_remove(cb_list, dcb);
++		dcb->dcb_func(dcb->dcb_data, error);
++		kmem_free(dcb, sizeof (dmu_tx_callback_t));
++	}
++}
++
++/*
++ * Interface to hold a bunch of attributes.
++ * used for creating new files.
++ * attrsize is the total size of all attributes
++ * to be added during object creation
++ *
++ * For updating/adding a single attribute dmu_tx_hold_sa() should be used.
++ */
++
++/*
++ * hold necessary attribute name for attribute registration.
++ * should be a very rare case where this is needed.  If it does
++ * happen it would only happen on the first write to the file system.
++ */
++static void
++dmu_tx_sa_registration_hold(sa_os_t *sa, dmu_tx_t *tx)
++{
++	int i;
++
++	if (!sa->sa_need_attr_registration)
++		return;
++
++	for (i = 0; i != sa->sa_num_attrs; i++) {
++		if (!sa->sa_attr_table[i].sa_registered) {
++			if (sa->sa_reg_attr_obj)
++				dmu_tx_hold_zap(tx, sa->sa_reg_attr_obj,
++				    B_TRUE, sa->sa_attr_table[i].sa_name);
++			else
++				dmu_tx_hold_zap(tx, DMU_NEW_OBJECT,
++				    B_TRUE, sa->sa_attr_table[i].sa_name);
++		}
++	}
++}
++
++
++void
++dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
++{
++	dnode_t *dn;
++	dmu_tx_hold_t *txh;
++
++	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object,
++	    THT_SPILL, 0, 0);
++
++	dn = txh->txh_dnode;
++
++	if (dn == NULL)
++		return;
++
++	/* If blkptr doesn't exist then add space to towrite */
++	if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) {
++		txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
++	} else {
++		blkptr_t *bp;
++
++		bp = &dn->dn_phys->dn_spill;
++		if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
++		    bp, bp->blk_birth))
++			txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
++		else
++			txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
++		if (!BP_IS_HOLE(bp))
++			txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
++	}
++}
++
++void
++dmu_tx_hold_sa_create(dmu_tx_t *tx, int attrsize)
++{
++	sa_os_t *sa = tx->tx_objset->os_sa;
++
++	dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
++
++	if (tx->tx_objset->os_sa->sa_master_obj == 0)
++		return;
++
++	if (tx->tx_objset->os_sa->sa_layout_attr_obj)
++		dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL);
++	else {
++		dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS);
++		dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY);
++		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
++		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
++	}
++
++	dmu_tx_sa_registration_hold(sa, tx);
++
++	if (attrsize <= DN_MAX_BONUSLEN && !sa->sa_force_spill)
++		return;
++
++	(void) dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT,
++	    THT_SPILL, 0, 0);
++}
++
++/*
++ * Hold SA attribute
++ *
++ * dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *, attribute, add, size)
++ *
++ * variable_size is the total size of all variable sized attributes
++ * passed to this function.  It is not the total size of all
++ * variable size attributes that *may* exist on this object.
++ */
++void
++dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow)
++{
++	uint64_t object;
++	sa_os_t *sa = tx->tx_objset->os_sa;
++
++	ASSERT(hdl != NULL);
++
++	object = sa_handle_object(hdl);
++
++	dmu_tx_hold_bonus(tx, object);
++
++	if (tx->tx_objset->os_sa->sa_master_obj == 0)
++		return;
++
++	if (tx->tx_objset->os_sa->sa_reg_attr_obj == 0 ||
++	    tx->tx_objset->os_sa->sa_layout_attr_obj == 0) {
++		dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS);
++		dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY);
++		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
++		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
++	}
++
++	dmu_tx_sa_registration_hold(sa, tx);
++
++	if (may_grow && tx->tx_objset->os_sa->sa_layout_attr_obj)
++		dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL);
++
++	if (sa->sa_force_spill || may_grow || hdl->sa_spill) {
++		ASSERT(tx->tx_txg == 0);
++		dmu_tx_hold_spill(tx, object);
++	} else {
++		dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus;
++		dnode_t *dn;
++
++		DB_DNODE_ENTER(db);
++		dn = DB_DNODE(db);
++		if (dn->dn_have_spill) {
++			ASSERT(tx->tx_txg == 0);
++			dmu_tx_hold_spill(tx, object);
++		}
++		DB_DNODE_EXIT(db);
++	}
++}
++
++void
++dmu_tx_init(void)
++{
++	dmu_tx_ksp = kstat_create("zfs", 0, "dmu_tx", "misc",
++	    KSTAT_TYPE_NAMED, sizeof (dmu_tx_stats) / sizeof (kstat_named_t),
++	    KSTAT_FLAG_VIRTUAL);
++
++	if (dmu_tx_ksp != NULL) {
++		dmu_tx_ksp->ks_data = &dmu_tx_stats;
++		kstat_install(dmu_tx_ksp);
++	}
++}
++
++void
++dmu_tx_fini(void)
++{
++	if (dmu_tx_ksp != NULL) {
++		kstat_delete(dmu_tx_ksp);
++		dmu_tx_ksp = NULL;
++	}
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(dmu_tx_create);
++EXPORT_SYMBOL(dmu_tx_hold_write);
++EXPORT_SYMBOL(dmu_tx_hold_free);
++EXPORT_SYMBOL(dmu_tx_hold_zap);
++EXPORT_SYMBOL(dmu_tx_hold_bonus);
++EXPORT_SYMBOL(dmu_tx_abort);
++EXPORT_SYMBOL(dmu_tx_assign);
++EXPORT_SYMBOL(dmu_tx_wait);
++EXPORT_SYMBOL(dmu_tx_commit);
++EXPORT_SYMBOL(dmu_tx_get_txg);
++EXPORT_SYMBOL(dmu_tx_callback_register);
++EXPORT_SYMBOL(dmu_tx_do_callbacks);
++EXPORT_SYMBOL(dmu_tx_hold_spill);
++EXPORT_SYMBOL(dmu_tx_hold_sa_create);
++EXPORT_SYMBOL(dmu_tx_hold_sa);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dmu_zfetch.c linux-3.2.33-go/fs/zfs/zfs/dmu_zfetch.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dmu_zfetch.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dmu_zfetch.c	2012-11-16 23:25:34.350039322 +0100
+@@ -0,0 +1,742 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/dnode.h>
++#include <sys/dmu_objset.h>
++#include <sys/dmu_zfetch.h>
++#include <sys/dmu.h>
++#include <sys/dbuf.h>
++#include <sys/kstat.h>
++
++/*
++ * I'm against tune-ables, but these should probably exist as tweakable globals
++ * until we can get this working the way we want it to.
++ */
++
++int zfs_prefetch_disable = 0;
++
++/* max # of streams per zfetch */
++unsigned int	zfetch_max_streams = 8;
++/* min time before stream reclaim */
++unsigned int	zfetch_min_sec_reap = 2;
++/* max number of blocks to fetch at a time */
++unsigned int	zfetch_block_cap = 256;
++/* number of bytes in a array_read at which we stop prefetching (1Mb) */
++unsigned long	zfetch_array_rd_sz = 1024 * 1024;
++
++/* forward decls for static routines */
++static int		dmu_zfetch_colinear(zfetch_t *, zstream_t *);
++static void		dmu_zfetch_dofetch(zfetch_t *, zstream_t *);
++static uint64_t		dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t);
++static uint64_t		dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t);
++static int		dmu_zfetch_find(zfetch_t *, zstream_t *, int);
++static int		dmu_zfetch_stream_insert(zfetch_t *, zstream_t *);
++static zstream_t	*dmu_zfetch_stream_reclaim(zfetch_t *);
++static void		dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
++static int		dmu_zfetch_streams_equal(zstream_t *, zstream_t *);
++
++typedef struct zfetch_stats {
++	kstat_named_t zfetchstat_hits;
++	kstat_named_t zfetchstat_misses;
++	kstat_named_t zfetchstat_colinear_hits;
++	kstat_named_t zfetchstat_colinear_misses;
++	kstat_named_t zfetchstat_stride_hits;
++	kstat_named_t zfetchstat_stride_misses;
++	kstat_named_t zfetchstat_reclaim_successes;
++	kstat_named_t zfetchstat_reclaim_failures;
++	kstat_named_t zfetchstat_stream_resets;
++	kstat_named_t zfetchstat_stream_noresets;
++	kstat_named_t zfetchstat_bogus_streams;
++} zfetch_stats_t;
++
++static zfetch_stats_t zfetch_stats = {
++	{ "hits",			KSTAT_DATA_UINT64 },
++	{ "misses",			KSTAT_DATA_UINT64 },
++	{ "colinear_hits",		KSTAT_DATA_UINT64 },
++	{ "colinear_misses",		KSTAT_DATA_UINT64 },
++	{ "stride_hits",		KSTAT_DATA_UINT64 },
++	{ "stride_misses",		KSTAT_DATA_UINT64 },
++	{ "reclaim_successes",		KSTAT_DATA_UINT64 },
++	{ "reclaim_failures",		KSTAT_DATA_UINT64 },
++	{ "streams_resets",		KSTAT_DATA_UINT64 },
++	{ "streams_noresets",		KSTAT_DATA_UINT64 },
++	{ "bogus_streams",		KSTAT_DATA_UINT64 },
++};
++
++#define	ZFETCHSTAT_INCR(stat, val) \
++	atomic_add_64(&zfetch_stats.stat.value.ui64, (val));
++
++#define	ZFETCHSTAT_BUMP(stat)		ZFETCHSTAT_INCR(stat, 1);
++
++kstat_t		*zfetch_ksp;
++
++/*
++ * Given a zfetch structure and a zstream structure, determine whether the
++ * blocks to be read are part of a co-linear pair of existing prefetch
++ * streams.  If a set is found, coalesce the streams, removing one, and
++ * configure the prefetch so it looks for a strided access pattern.
++ *
++ * In other words: if we find two sequential access streams that are
++ * the same length and distance N appart, and this read is N from the
++ * last stream, then we are probably in a strided access pattern.  So
++ * combine the two sequential streams into a single strided stream.
++ *
++ * If no co-linear streams are found, return NULL.
++ */
++static int
++dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh)
++{
++	zstream_t	*z_walk;
++	zstream_t	*z_comp;
++
++	if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
++		return (0);
++
++	if (zh == NULL) {
++		rw_exit(&zf->zf_rwlock);
++		return (0);
++	}
++
++	for (z_walk = list_head(&zf->zf_stream); z_walk;
++	    z_walk = list_next(&zf->zf_stream, z_walk)) {
++		for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp;
++		    z_comp = list_next(&zf->zf_stream, z_comp)) {
++			int64_t		diff;
++
++			if (z_walk->zst_len != z_walk->zst_stride ||
++			    z_comp->zst_len != z_comp->zst_stride) {
++				continue;
++			}
++
++			diff = z_comp->zst_offset - z_walk->zst_offset;
++			if (z_comp->zst_offset + diff == zh->zst_offset) {
++				z_walk->zst_offset = zh->zst_offset;
++				z_walk->zst_direction = diff < 0 ? -1 : 1;
++				z_walk->zst_stride =
++				    diff * z_walk->zst_direction;
++				z_walk->zst_ph_offset =
++				    zh->zst_offset + z_walk->zst_stride;
++				dmu_zfetch_stream_remove(zf, z_comp);
++				mutex_destroy(&z_comp->zst_lock);
++				kmem_free(z_comp, sizeof (zstream_t));
++
++				dmu_zfetch_dofetch(zf, z_walk);
++
++				rw_exit(&zf->zf_rwlock);
++				return (1);
++			}
++
++			diff = z_walk->zst_offset - z_comp->zst_offset;
++			if (z_walk->zst_offset + diff == zh->zst_offset) {
++				z_walk->zst_offset = zh->zst_offset;
++				z_walk->zst_direction = diff < 0 ? -1 : 1;
++				z_walk->zst_stride =
++				    diff * z_walk->zst_direction;
++				z_walk->zst_ph_offset =
++				    zh->zst_offset + z_walk->zst_stride;
++				dmu_zfetch_stream_remove(zf, z_comp);
++				mutex_destroy(&z_comp->zst_lock);
++				kmem_free(z_comp, sizeof (zstream_t));
++
++				dmu_zfetch_dofetch(zf, z_walk);
++
++				rw_exit(&zf->zf_rwlock);
++				return (1);
++			}
++		}
++	}
++
++	rw_exit(&zf->zf_rwlock);
++	return (0);
++}
++
++/*
++ * Given a zstream_t, determine the bounds of the prefetch.  Then call the
++ * routine that actually prefetches the individual blocks.
++ */
++static void
++dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs)
++{
++	uint64_t	prefetch_tail;
++	uint64_t	prefetch_limit;
++	uint64_t	prefetch_ofst;
++	uint64_t	prefetch_len;
++	uint64_t	blocks_fetched;
++
++	zs->zst_stride = MAX((int64_t)zs->zst_stride, zs->zst_len);
++	zs->zst_cap = MIN(zfetch_block_cap, 2 * zs->zst_cap);
++
++	prefetch_tail = MAX((int64_t)zs->zst_ph_offset,
++	    (int64_t)(zs->zst_offset + zs->zst_stride));
++	/*
++	 * XXX: use a faster division method?
++	 */
++	prefetch_limit = zs->zst_offset + zs->zst_len +
++	    (zs->zst_cap * zs->zst_stride) / zs->zst_len;
++
++	while (prefetch_tail < prefetch_limit) {
++		prefetch_ofst = zs->zst_offset + zs->zst_direction *
++		    (prefetch_tail - zs->zst_offset);
++
++		prefetch_len = zs->zst_len;
++
++		/*
++		 * Don't prefetch beyond the end of the file, if working
++		 * backwards.
++		 */
++		if ((zs->zst_direction == ZFETCH_BACKWARD) &&
++		    (prefetch_ofst > prefetch_tail)) {
++			prefetch_len += prefetch_ofst;
++			prefetch_ofst = 0;
++		}
++
++		/* don't prefetch more than we're supposed to */
++		if (prefetch_len > zs->zst_len)
++			break;
++
++		blocks_fetched = dmu_zfetch_fetch(zf->zf_dnode,
++		    prefetch_ofst, zs->zst_len);
++
++		prefetch_tail += zs->zst_stride;
++		/* stop if we've run out of stuff to prefetch */
++		if (blocks_fetched < zs->zst_len)
++			break;
++	}
++	zs->zst_ph_offset = prefetch_tail;
++	zs->zst_last = ddi_get_lbolt();
++}
++
++void
++zfetch_init(void)
++{
++
++	zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
++	    KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
++	    KSTAT_FLAG_VIRTUAL);
++
++	if (zfetch_ksp != NULL) {
++		zfetch_ksp->ks_data = &zfetch_stats;
++		kstat_install(zfetch_ksp);
++	}
++}
++
++void
++zfetch_fini(void)
++{
++	if (zfetch_ksp != NULL) {
++		kstat_delete(zfetch_ksp);
++		zfetch_ksp = NULL;
++	}
++}
++
++/*
++ * This takes a pointer to a zfetch structure and a dnode.  It performs the
++ * necessary setup for the zfetch structure, grokking data from the
++ * associated dnode.
++ */
++void
++dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
++{
++	if (zf == NULL) {
++		return;
++	}
++
++	zf->zf_dnode = dno;
++	zf->zf_stream_cnt = 0;
++	zf->zf_alloc_fail = 0;
++
++	list_create(&zf->zf_stream, sizeof (zstream_t),
++	    offsetof(zstream_t, zst_node));
++
++	rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
++}
++
++/*
++ * This function computes the actual size, in blocks, that can be prefetched,
++ * and fetches it.
++ */
++static uint64_t
++dmu_zfetch_fetch(dnode_t *dn, uint64_t blkid, uint64_t nblks)
++{
++	uint64_t	fetchsz;
++	uint64_t	i;
++
++	fetchsz = dmu_zfetch_fetchsz(dn, blkid, nblks);
++
++	for (i = 0; i < fetchsz; i++) {
++		dbuf_prefetch(dn, blkid + i);
++	}
++
++	return (fetchsz);
++}
++
++/*
++ * this function returns the number of blocks that would be prefetched, based
++ * upon the supplied dnode, blockid, and nblks.  This is used so that we can
++ * update streams in place, and then prefetch with their old value after the
++ * fact.  This way, we can delay the prefetch, but subsequent accesses to the
++ * stream won't result in the same data being prefetched multiple times.
++ */
++static uint64_t
++dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks)
++{
++	uint64_t	fetchsz;
++
++	if (blkid > dn->dn_maxblkid) {
++		return (0);
++	}
++
++	/* compute fetch size */
++	if (blkid + nblks + 1 > dn->dn_maxblkid) {
++		fetchsz = (dn->dn_maxblkid - blkid) + 1;
++		ASSERT(blkid + fetchsz - 1 <= dn->dn_maxblkid);
++	} else {
++		fetchsz = nblks;
++	}
++
++
++	return (fetchsz);
++}
++
++/*
++ * given a zfetch and a zstream structure, see if there is an associated zstream
++ * for this block read.  If so, it starts a prefetch for the stream it
++ * located and returns true, otherwise it returns false
++ */
++static int
++dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched)
++{
++	zstream_t	*zs;
++	int64_t		diff;
++	int		reset = !prefetched;
++	int		rc = 0;
++
++	if (zh == NULL)
++		return (0);
++
++	/*
++	 * XXX: This locking strategy is a bit coarse; however, it's impact has
++	 * yet to be tested.  If this turns out to be an issue, it can be
++	 * modified in a number of different ways.
++	 */
++
++	rw_enter(&zf->zf_rwlock, RW_READER);
++top:
++
++	for (zs = list_head(&zf->zf_stream); zs;
++	    zs = list_next(&zf->zf_stream, zs)) {
++
++		/*
++		 * XXX - should this be an assert?
++		 */
++		if (zs->zst_len == 0) {
++			/* bogus stream */
++			ZFETCHSTAT_BUMP(zfetchstat_bogus_streams);
++			continue;
++		}
++
++		/*
++		 * We hit this case when we are in a strided prefetch stream:
++		 * we will read "len" blocks before "striding".
++		 */
++		if (zh->zst_offset >= zs->zst_offset &&
++		    zh->zst_offset < zs->zst_offset + zs->zst_len) {
++			if (prefetched) {
++				/* already fetched */
++				ZFETCHSTAT_BUMP(zfetchstat_stride_hits);
++				rc = 1;
++				goto out;
++			} else {
++				ZFETCHSTAT_BUMP(zfetchstat_stride_misses);
++			}
++		}
++
++		/*
++		 * This is the forward sequential read case: we increment
++		 * len by one each time we hit here, so we will enter this
++		 * case on every read.
++		 */
++		if (zh->zst_offset == zs->zst_offset + zs->zst_len) {
++
++			reset = !prefetched && zs->zst_len > 1;
++
++			mutex_enter(&zs->zst_lock);
++
++			if (zh->zst_offset != zs->zst_offset + zs->zst_len) {
++				mutex_exit(&zs->zst_lock);
++				goto top;
++			}
++			zs->zst_len += zh->zst_len;
++			diff = zs->zst_len - zfetch_block_cap;
++			if (diff > 0) {
++				zs->zst_offset += diff;
++				zs->zst_len = zs->zst_len > diff ?
++				    zs->zst_len - diff : 0;
++			}
++			zs->zst_direction = ZFETCH_FORWARD;
++
++			break;
++
++		/*
++		 * Same as above, but reading backwards through the file.
++		 */
++		} else if (zh->zst_offset == zs->zst_offset - zh->zst_len) {
++			/* backwards sequential access */
++
++			reset = !prefetched && zs->zst_len > 1;
++
++			mutex_enter(&zs->zst_lock);
++
++			if (zh->zst_offset != zs->zst_offset - zh->zst_len) {
++				mutex_exit(&zs->zst_lock);
++				goto top;
++			}
++
++			zs->zst_offset = zs->zst_offset > zh->zst_len ?
++			    zs->zst_offset - zh->zst_len : 0;
++			zs->zst_ph_offset = zs->zst_ph_offset > zh->zst_len ?
++			    zs->zst_ph_offset - zh->zst_len : 0;
++			zs->zst_len += zh->zst_len;
++
++			diff = zs->zst_len - zfetch_block_cap;
++			if (diff > 0) {
++				zs->zst_ph_offset = zs->zst_ph_offset > diff ?
++				    zs->zst_ph_offset - diff : 0;
++				zs->zst_len = zs->zst_len > diff ?
++				    zs->zst_len - diff : zs->zst_len;
++			}
++			zs->zst_direction = ZFETCH_BACKWARD;
++
++			break;
++
++		} else if ((zh->zst_offset - zs->zst_offset - zs->zst_stride <
++		    zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
++			/* strided forward access */
++
++			mutex_enter(&zs->zst_lock);
++
++			if ((zh->zst_offset - zs->zst_offset - zs->zst_stride >=
++			    zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
++				mutex_exit(&zs->zst_lock);
++				goto top;
++			}
++
++			zs->zst_offset += zs->zst_stride;
++			zs->zst_direction = ZFETCH_FORWARD;
++
++			break;
++
++		} else if ((zh->zst_offset - zs->zst_offset + zs->zst_stride <
++		    zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
++			/* strided reverse access */
++
++			mutex_enter(&zs->zst_lock);
++
++			if ((zh->zst_offset - zs->zst_offset + zs->zst_stride >=
++			    zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
++				mutex_exit(&zs->zst_lock);
++				goto top;
++			}
++
++			zs->zst_offset = zs->zst_offset > zs->zst_stride ?
++			    zs->zst_offset - zs->zst_stride : 0;
++			zs->zst_ph_offset = (zs->zst_ph_offset >
++			    (2 * zs->zst_stride)) ?
++			    (zs->zst_ph_offset - (2 * zs->zst_stride)) : 0;
++			zs->zst_direction = ZFETCH_BACKWARD;
++
++			break;
++		}
++	}
++
++	if (zs) {
++		if (reset) {
++			zstream_t *remove = zs;
++
++			ZFETCHSTAT_BUMP(zfetchstat_stream_resets);
++			rc = 0;
++			mutex_exit(&zs->zst_lock);
++			rw_exit(&zf->zf_rwlock);
++			rw_enter(&zf->zf_rwlock, RW_WRITER);
++			/*
++			 * Relocate the stream, in case someone removes
++			 * it while we were acquiring the WRITER lock.
++			 */
++			for (zs = list_head(&zf->zf_stream); zs;
++			    zs = list_next(&zf->zf_stream, zs)) {
++				if (zs == remove) {
++					dmu_zfetch_stream_remove(zf, zs);
++					mutex_destroy(&zs->zst_lock);
++					kmem_free(zs, sizeof (zstream_t));
++					break;
++				}
++			}
++		} else {
++			ZFETCHSTAT_BUMP(zfetchstat_stream_noresets);
++			rc = 1;
++			dmu_zfetch_dofetch(zf, zs);
++			mutex_exit(&zs->zst_lock);
++		}
++	}
++out:
++	rw_exit(&zf->zf_rwlock);
++	return (rc);
++}
++
++/*
++ * Clean-up state associated with a zfetch structure.  This frees allocated
++ * structure members, empties the zf_stream tree, and generally makes things
++ * nice.  This doesn't free the zfetch_t itself, that's left to the caller.
++ */
++void
++dmu_zfetch_rele(zfetch_t *zf)
++{
++	zstream_t	*zs;
++	zstream_t	*zs_next;
++
++	ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
++
++	for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
++		zs_next = list_next(&zf->zf_stream, zs);
++
++		list_remove(&zf->zf_stream, zs);
++		mutex_destroy(&zs->zst_lock);
++		kmem_free(zs, sizeof (zstream_t));
++	}
++	list_destroy(&zf->zf_stream);
++	rw_destroy(&zf->zf_rwlock);
++
++	zf->zf_dnode = NULL;
++}
++
++/*
++ * Given a zfetch and zstream structure, insert the zstream structure into the
++ * AVL tree contained within the zfetch structure.  Peform the appropriate
++ * book-keeping.  It is possible that another thread has inserted a stream which
++ * matches one that we are about to insert, so we must be sure to check for this
++ * case.  If one is found, return failure, and let the caller cleanup the
++ * duplicates.
++ */
++static int
++dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs)
++{
++	zstream_t	*zs_walk;
++	zstream_t	*zs_next;
++
++	ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
++
++	for (zs_walk = list_head(&zf->zf_stream); zs_walk; zs_walk = zs_next) {
++		zs_next = list_next(&zf->zf_stream, zs_walk);
++
++		if (dmu_zfetch_streams_equal(zs_walk, zs)) {
++			return (0);
++		}
++	}
++
++	list_insert_head(&zf->zf_stream, zs);
++	zf->zf_stream_cnt++;
++	return (1);
++}
++
++
++/*
++ * Walk the list of zstreams in the given zfetch, find an old one (by time), and
++ * reclaim it for use by the caller.
++ */
++static zstream_t *
++dmu_zfetch_stream_reclaim(zfetch_t *zf)
++{
++	zstream_t	*zs;
++
++	if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
++		return (0);
++
++	for (zs = list_head(&zf->zf_stream); zs;
++	    zs = list_next(&zf->zf_stream, zs)) {
++
++		if (((ddi_get_lbolt() - zs->zst_last)/hz) > zfetch_min_sec_reap)
++			break;
++	}
++
++	if (zs) {
++		dmu_zfetch_stream_remove(zf, zs);
++		mutex_destroy(&zs->zst_lock);
++		bzero(zs, sizeof (zstream_t));
++	} else {
++		zf->zf_alloc_fail++;
++	}
++	rw_exit(&zf->zf_rwlock);
++
++	return (zs);
++}
++
++/*
++ * Given a zfetch and zstream structure, remove the zstream structure from its
++ * container in the zfetch structure.  Perform the appropriate book-keeping.
++ */
++static void
++dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
++{
++	ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
++
++	list_remove(&zf->zf_stream, zs);
++	zf->zf_stream_cnt--;
++}
++
++static int
++dmu_zfetch_streams_equal(zstream_t *zs1, zstream_t *zs2)
++{
++	if (zs1->zst_offset != zs2->zst_offset)
++		return (0);
++
++	if (zs1->zst_len != zs2->zst_len)
++		return (0);
++
++	if (zs1->zst_stride != zs2->zst_stride)
++		return (0);
++
++	if (zs1->zst_ph_offset != zs2->zst_ph_offset)
++		return (0);
++
++	if (zs1->zst_cap != zs2->zst_cap)
++		return (0);
++
++	if (zs1->zst_direction != zs2->zst_direction)
++		return (0);
++
++	return (1);
++}
++
++/*
++ * This is the prefetch entry point.  It calls all of the other dmu_zfetch
++ * routines to create, delete, find, or operate upon prefetch streams.
++ */
++void
++dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
++{
++	zstream_t	zst;
++	zstream_t	*newstream;
++	int		fetched;
++	int		inserted;
++	unsigned int	blkshft;
++	uint64_t	blksz;
++
++	if (zfs_prefetch_disable)
++		return;
++
++	/* files that aren't ln2 blocksz are only one block -- nothing to do */
++	if (!zf->zf_dnode->dn_datablkshift)
++		return;
++
++	/* convert offset and size, into blockid and nblocks */
++	blkshft = zf->zf_dnode->dn_datablkshift;
++	blksz = (1 << blkshft);
++
++	bzero(&zst, sizeof (zstream_t));
++	zst.zst_offset = offset >> blkshft;
++	zst.zst_len = (P2ROUNDUP(offset + size, blksz) -
++	    P2ALIGN(offset, blksz)) >> blkshft;
++
++	fetched = dmu_zfetch_find(zf, &zst, prefetched);
++	if (fetched) {
++		ZFETCHSTAT_BUMP(zfetchstat_hits);
++	} else {
++		ZFETCHSTAT_BUMP(zfetchstat_misses);
++		if ((fetched = dmu_zfetch_colinear(zf, &zst))) {
++			ZFETCHSTAT_BUMP(zfetchstat_colinear_hits);
++		} else {
++			ZFETCHSTAT_BUMP(zfetchstat_colinear_misses);
++		}
++	}
++
++	if (!fetched) {
++		newstream = dmu_zfetch_stream_reclaim(zf);
++
++		/*
++		 * we still couldn't find a stream, drop the lock, and allocate
++		 * one if possible.  Otherwise, give up and go home.
++		 */
++		if (newstream) {
++			ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes);
++		} else {
++			uint64_t	maxblocks;
++			uint32_t	max_streams;
++			uint32_t	cur_streams;
++
++			ZFETCHSTAT_BUMP(zfetchstat_reclaim_failures);
++			cur_streams = zf->zf_stream_cnt;
++			maxblocks = zf->zf_dnode->dn_maxblkid;
++
++			max_streams = MIN(zfetch_max_streams,
++			    (maxblocks / zfetch_block_cap));
++			if (max_streams == 0) {
++				max_streams++;
++			}
++
++			if (cur_streams >= max_streams) {
++				return;
++			}
++			newstream = kmem_zalloc(sizeof (zstream_t), KM_PUSHPAGE);
++		}
++
++		newstream->zst_offset = zst.zst_offset;
++		newstream->zst_len = zst.zst_len;
++		newstream->zst_stride = zst.zst_len;
++		newstream->zst_ph_offset = zst.zst_len + zst.zst_offset;
++		newstream->zst_cap = zst.zst_len;
++		newstream->zst_direction = ZFETCH_FORWARD;
++		newstream->zst_last = ddi_get_lbolt();
++
++		mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL);
++
++		rw_enter(&zf->zf_rwlock, RW_WRITER);
++		inserted = dmu_zfetch_stream_insert(zf, newstream);
++		rw_exit(&zf->zf_rwlock);
++
++		if (!inserted) {
++			mutex_destroy(&newstream->zst_lock);
++			kmem_free(newstream, sizeof (zstream_t));
++		}
++	}
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++module_param(zfs_prefetch_disable, int, 0644);
++MODULE_PARM_DESC(zfs_prefetch_disable, "Disable all ZFS prefetching");
++
++module_param(zfetch_max_streams, uint, 0644);
++MODULE_PARM_DESC(zfetch_max_streams, "Max number of streams per zfetch");
++
++module_param(zfetch_min_sec_reap, uint, 0644);
++MODULE_PARM_DESC(zfetch_min_sec_reap, "Min time before stream reclaim");
++
++module_param(zfetch_block_cap, uint, 0644);
++MODULE_PARM_DESC(zfetch_block_cap, "Max number of blocks to fetch at a time");
++
++module_param(zfetch_array_rd_sz, ulong, 0644);
++MODULE_PARM_DESC(zfetch_array_rd_sz, "Number of bytes in a array_read");
++#endif
++
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dnode.c linux-3.2.33-go/fs/zfs/zfs/dnode.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dnode.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dnode.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,1994 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/dbuf.h>
++#include <sys/dnode.h>
++#include <sys/dmu.h>
++#include <sys/dmu_impl.h>
++#include <sys/dmu_tx.h>
++#include <sys/dmu_objset.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_dataset.h>
++#include <sys/spa.h>
++#include <sys/zio.h>
++#include <sys/dmu_zfetch.h>
++
++static int free_range_compar(const void *node1, const void *node2);
++
++static kmem_cache_t *dnode_cache;
++/*
++ * Define DNODE_STATS to turn on statistic gathering. By default, it is only
++ * turned on when DEBUG is also defined.
++ */
++#ifdef	DEBUG
++#define	DNODE_STATS
++#endif	/* DEBUG */
++
++#ifdef	DNODE_STATS
++#define	DNODE_STAT_ADD(stat)			((stat)++)
++#else
++#define	DNODE_STAT_ADD(stat)			/* nothing */
++#endif	/* DNODE_STATS */
++
++ASSERTV(static dnode_phys_t dnode_phys_zero);
++
++int zfs_default_bs = SPA_MINBLOCKSHIFT;
++int zfs_default_ibs = DN_MAX_INDBLKSHIFT;
++
++#ifdef	_KERNEL
++static kmem_cbrc_t dnode_move(void *, void *, size_t, void *);
++#endif /* _KERNEL */
++
++/* ARGSUSED */
++static int
++dnode_cons(void *arg, void *unused, int kmflag)
++{
++	dnode_t *dn = arg;
++	int i;
++
++	rw_init(&dn->dn_struct_rwlock, NULL, RW_DEFAULT, NULL);
++	mutex_init(&dn->dn_mtx, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL);
++	cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL);
++
++	refcount_create(&dn->dn_holds);
++	refcount_create(&dn->dn_tx_holds);
++	list_link_init(&dn->dn_link);
++
++	bzero(&dn->dn_next_nblkptr[0], sizeof (dn->dn_next_nblkptr));
++	bzero(&dn->dn_next_nlevels[0], sizeof (dn->dn_next_nlevels));
++	bzero(&dn->dn_next_indblkshift[0], sizeof (dn->dn_next_indblkshift));
++	bzero(&dn->dn_next_bonustype[0], sizeof (dn->dn_next_bonustype));
++	bzero(&dn->dn_rm_spillblk[0], sizeof (dn->dn_rm_spillblk));
++	bzero(&dn->dn_next_bonuslen[0], sizeof (dn->dn_next_bonuslen));
++	bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz));
++
++	for (i = 0; i < TXG_SIZE; i++) {
++		list_link_init(&dn->dn_dirty_link[i]);
++		avl_create(&dn->dn_ranges[i], free_range_compar,
++		    sizeof (free_range_t),
++		    offsetof(struct free_range, fr_node));
++		list_create(&dn->dn_dirty_records[i],
++		    sizeof (dbuf_dirty_record_t),
++		    offsetof(dbuf_dirty_record_t, dr_dirty_node));
++	}
++
++	dn->dn_allocated_txg = 0;
++	dn->dn_free_txg = 0;
++	dn->dn_assigned_txg = 0;
++	dn->dn_dirtyctx = 0;
++	dn->dn_dirtyctx_firstset = NULL;
++	dn->dn_bonus = NULL;
++	dn->dn_have_spill = B_FALSE;
++	dn->dn_zio = NULL;
++	dn->dn_oldused = 0;
++	dn->dn_oldflags = 0;
++	dn->dn_olduid = 0;
++	dn->dn_oldgid = 0;
++	dn->dn_newuid = 0;
++	dn->dn_newgid = 0;
++	dn->dn_id_flags = 0;
++
++	dn->dn_dbufs_count = 0;
++	list_create(&dn->dn_dbufs, sizeof (dmu_buf_impl_t),
++	    offsetof(dmu_buf_impl_t, db_link));
++
++	dn->dn_moved = 0;
++	return (0);
++}
++
++/* ARGSUSED */
++static void
++dnode_dest(void *arg, void *unused)
++{
++	int i;
++	dnode_t *dn = arg;
++
++	rw_destroy(&dn->dn_struct_rwlock);
++	mutex_destroy(&dn->dn_mtx);
++	mutex_destroy(&dn->dn_dbufs_mtx);
++	cv_destroy(&dn->dn_notxholds);
++	refcount_destroy(&dn->dn_holds);
++	refcount_destroy(&dn->dn_tx_holds);
++	ASSERT(!list_link_active(&dn->dn_link));
++
++	for (i = 0; i < TXG_SIZE; i++) {
++		ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
++		avl_destroy(&dn->dn_ranges[i]);
++		list_destroy(&dn->dn_dirty_records[i]);
++		ASSERT3U(dn->dn_next_nblkptr[i], ==, 0);
++		ASSERT3U(dn->dn_next_nlevels[i], ==, 0);
++		ASSERT3U(dn->dn_next_indblkshift[i], ==, 0);
++		ASSERT3U(dn->dn_next_bonustype[i], ==, 0);
++		ASSERT3U(dn->dn_rm_spillblk[i], ==, 0);
++		ASSERT3U(dn->dn_next_bonuslen[i], ==, 0);
++		ASSERT3U(dn->dn_next_blksz[i], ==, 0);
++	}
++
++	ASSERT3U(dn->dn_allocated_txg, ==, 0);
++	ASSERT3U(dn->dn_free_txg, ==, 0);
++	ASSERT3U(dn->dn_assigned_txg, ==, 0);
++	ASSERT3U(dn->dn_dirtyctx, ==, 0);
++	ASSERT3P(dn->dn_dirtyctx_firstset, ==, NULL);
++	ASSERT3P(dn->dn_bonus, ==, NULL);
++	ASSERT(!dn->dn_have_spill);
++	ASSERT3P(dn->dn_zio, ==, NULL);
++	ASSERT3U(dn->dn_oldused, ==, 0);
++	ASSERT3U(dn->dn_oldflags, ==, 0);
++	ASSERT3U(dn->dn_olduid, ==, 0);
++	ASSERT3U(dn->dn_oldgid, ==, 0);
++	ASSERT3U(dn->dn_newuid, ==, 0);
++	ASSERT3U(dn->dn_newgid, ==, 0);
++	ASSERT3U(dn->dn_id_flags, ==, 0);
++
++	ASSERT3U(dn->dn_dbufs_count, ==, 0);
++	list_destroy(&dn->dn_dbufs);
++}
++
++void
++dnode_init(void)
++{
++	ASSERT(dnode_cache == NULL);
++	dnode_cache = kmem_cache_create("dnode_t", sizeof (dnode_t),
++	    0, dnode_cons, dnode_dest, NULL, NULL, NULL, KMC_KMEM);
++	kmem_cache_set_move(dnode_cache, dnode_move);
++}
++
++void
++dnode_fini(void)
++{
++	kmem_cache_destroy(dnode_cache);
++	dnode_cache = NULL;
++}
++
++
++#ifdef ZFS_DEBUG
++void
++dnode_verify(dnode_t *dn)
++{
++	int drop_struct_lock = FALSE;
++
++	ASSERT(dn->dn_phys);
++	ASSERT(dn->dn_objset);
++	ASSERT(dn->dn_handle->dnh_dnode == dn);
++
++	ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
++
++	if (!(zfs_flags & ZFS_DEBUG_DNODE_VERIFY))
++		return;
++
++	if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
++		rw_enter(&dn->dn_struct_rwlock, RW_READER);
++		drop_struct_lock = TRUE;
++	}
++	if (dn->dn_phys->dn_type != DMU_OT_NONE || dn->dn_allocated_txg != 0) {
++		int i;
++		ASSERT3U(dn->dn_indblkshift, <=, SPA_MAXBLOCKSHIFT);
++		if (dn->dn_datablkshift) {
++			ASSERT3U(dn->dn_datablkshift, >=, SPA_MINBLOCKSHIFT);
++			ASSERT3U(dn->dn_datablkshift, <=, SPA_MAXBLOCKSHIFT);
++			ASSERT3U(1<<dn->dn_datablkshift, ==, dn->dn_datablksz);
++		}
++		ASSERT3U(dn->dn_nlevels, <=, 30);
++		ASSERT3U(dn->dn_type, <=, DMU_OT_NUMTYPES);
++		ASSERT3U(dn->dn_nblkptr, >=, 1);
++		ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
++		ASSERT3U(dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
++		ASSERT3U(dn->dn_datablksz, ==,
++		    dn->dn_datablkszsec << SPA_MINBLOCKSHIFT);
++		ASSERT3U(ISP2(dn->dn_datablksz), ==, dn->dn_datablkshift != 0);
++		ASSERT3U((dn->dn_nblkptr - 1) * sizeof (blkptr_t) +
++		    dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
++		for (i = 0; i < TXG_SIZE; i++) {
++			ASSERT3U(dn->dn_next_nlevels[i], <=, dn->dn_nlevels);
++		}
++	}
++	if (dn->dn_phys->dn_type != DMU_OT_NONE)
++		ASSERT3U(dn->dn_phys->dn_nlevels, <=, dn->dn_nlevels);
++	ASSERT(DMU_OBJECT_IS_SPECIAL(dn->dn_object) || dn->dn_dbuf != NULL);
++	if (dn->dn_dbuf != NULL) {
++		ASSERT3P(dn->dn_phys, ==,
++		    (dnode_phys_t *)dn->dn_dbuf->db.db_data +
++		    (dn->dn_object % (dn->dn_dbuf->db.db_size >> DNODE_SHIFT)));
++	}
++	if (drop_struct_lock)
++		rw_exit(&dn->dn_struct_rwlock);
++}
++#endif
++
++void
++dnode_byteswap(dnode_phys_t *dnp)
++{
++	uint64_t *buf64 = (void*)&dnp->dn_blkptr;
++	int i;
++
++	if (dnp->dn_type == DMU_OT_NONE) {
++		bzero(dnp, sizeof (dnode_phys_t));
++		return;
++	}
++
++	dnp->dn_datablkszsec = BSWAP_16(dnp->dn_datablkszsec);
++	dnp->dn_bonuslen = BSWAP_16(dnp->dn_bonuslen);
++	dnp->dn_maxblkid = BSWAP_64(dnp->dn_maxblkid);
++	dnp->dn_used = BSWAP_64(dnp->dn_used);
++
++	/*
++	 * dn_nblkptr is only one byte, so it's OK to read it in either
++	 * byte order.  We can't read dn_bouslen.
++	 */
++	ASSERT(dnp->dn_indblkshift <= SPA_MAXBLOCKSHIFT);
++	ASSERT(dnp->dn_nblkptr <= DN_MAX_NBLKPTR);
++	for (i = 0; i < dnp->dn_nblkptr * sizeof (blkptr_t)/8; i++)
++		buf64[i] = BSWAP_64(buf64[i]);
++
++	/*
++	 * OK to check dn_bonuslen for zero, because it won't matter if
++	 * we have the wrong byte order.  This is necessary because the
++	 * dnode dnode is smaller than a regular dnode.
++	 */
++	if (dnp->dn_bonuslen != 0) {
++		/*
++		 * Note that the bonus length calculated here may be
++		 * longer than the actual bonus buffer.  This is because
++		 * we always put the bonus buffer after the last block
++		 * pointer (instead of packing it against the end of the
++		 * dnode buffer).
++		 */
++		int off = (dnp->dn_nblkptr-1) * sizeof (blkptr_t);
++		size_t len = DN_MAX_BONUSLEN - off;
++		ASSERT3U(dnp->dn_bonustype, <, DMU_OT_NUMTYPES);
++		dmu_ot[dnp->dn_bonustype].ot_byteswap(dnp->dn_bonus + off, len);
++	}
++
++	/* Swap SPILL block if we have one */
++	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
++		byteswap_uint64_array(&dnp->dn_spill, sizeof (blkptr_t));
++
++}
++
++void
++dnode_buf_byteswap(void *vbuf, size_t size)
++{
++	dnode_phys_t *buf = vbuf;
++	int i;
++
++	ASSERT3U(sizeof (dnode_phys_t), ==, (1<<DNODE_SHIFT));
++	ASSERT((size & (sizeof (dnode_phys_t)-1)) == 0);
++
++	size >>= DNODE_SHIFT;
++	for (i = 0; i < size; i++) {
++		dnode_byteswap(buf);
++		buf++;
++	}
++}
++
++static int
++free_range_compar(const void *node1, const void *node2)
++{
++	const free_range_t *rp1 = node1;
++	const free_range_t *rp2 = node2;
++
++	if (rp1->fr_blkid < rp2->fr_blkid)
++		return (-1);
++	else if (rp1->fr_blkid > rp2->fr_blkid)
++		return (1);
++	else return (0);
++}
++
++void
++dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx)
++{
++	ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
++
++	dnode_setdirty(dn, tx);
++	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
++	ASSERT3U(newsize, <=, DN_MAX_BONUSLEN -
++	    (dn->dn_nblkptr-1) * sizeof (blkptr_t));
++	dn->dn_bonuslen = newsize;
++	if (newsize == 0)
++		dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = DN_ZERO_BONUSLEN;
++	else
++		dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen;
++	rw_exit(&dn->dn_struct_rwlock);
++}
++
++void
++dnode_setbonus_type(dnode_t *dn, dmu_object_type_t newtype, dmu_tx_t *tx)
++{
++	ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
++	dnode_setdirty(dn, tx);
++	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
++	dn->dn_bonustype = newtype;
++	dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
++	rw_exit(&dn->dn_struct_rwlock);
++}
++
++void
++dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx)
++{
++	ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
++	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
++	dnode_setdirty(dn, tx);
++	dn->dn_rm_spillblk[tx->tx_txg&TXG_MASK] = DN_KILL_SPILLBLK;
++	dn->dn_have_spill = B_FALSE;
++}
++
++static void
++dnode_setdblksz(dnode_t *dn, int size)
++{
++	ASSERT3U(P2PHASE(size, SPA_MINBLOCKSIZE), ==, 0);
++	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
++	ASSERT3U(size, >=, SPA_MINBLOCKSIZE);
++	ASSERT3U(size >> SPA_MINBLOCKSHIFT, <,
++	    1<<(sizeof (dn->dn_phys->dn_datablkszsec) * 8));
++	dn->dn_datablksz = size;
++	dn->dn_datablkszsec = size >> SPA_MINBLOCKSHIFT;
++	dn->dn_datablkshift = ISP2(size) ? highbit(size - 1) : 0;
++}
++
++static dnode_t *
++dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
++    uint64_t object, dnode_handle_t *dnh)
++{
++	dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_PUSHPAGE);
++
++	ASSERT(!POINTER_IS_VALID(dn->dn_objset));
++	dn->dn_moved = 0;
++
++	/*
++	 * Defer setting dn_objset until the dnode is ready to be a candidate
++	 * for the dnode_move() callback.
++	 */
++	dn->dn_object = object;
++	dn->dn_dbuf = db;
++	dn->dn_handle = dnh;
++	dn->dn_phys = dnp;
++
++	if (dnp->dn_datablkszsec) {
++		dnode_setdblksz(dn, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
++	} else {
++		dn->dn_datablksz = 0;
++		dn->dn_datablkszsec = 0;
++		dn->dn_datablkshift = 0;
++	}
++	dn->dn_indblkshift = dnp->dn_indblkshift;
++	dn->dn_nlevels = dnp->dn_nlevels;
++	dn->dn_type = dnp->dn_type;
++	dn->dn_nblkptr = dnp->dn_nblkptr;
++	dn->dn_checksum = dnp->dn_checksum;
++	dn->dn_compress = dnp->dn_compress;
++	dn->dn_bonustype = dnp->dn_bonustype;
++	dn->dn_bonuslen = dnp->dn_bonuslen;
++	dn->dn_maxblkid = dnp->dn_maxblkid;
++	dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0);
++	dn->dn_id_flags = 0;
++
++	dmu_zfetch_init(&dn->dn_zfetch, dn);
++
++	ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
++
++	mutex_enter(&os->os_lock);
++	list_insert_head(&os->os_dnodes, dn);
++	membar_producer();
++	/*
++	 * Everything else must be valid before assigning dn_objset makes the
++	 * dnode eligible for dnode_move().
++	 */
++	dn->dn_objset = os;
++	mutex_exit(&os->os_lock);
++
++	arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER);
++	return (dn);
++}
++
++/*
++ * Caller must be holding the dnode handle, which is released upon return.
++ */
++static void
++dnode_destroy(dnode_t *dn)
++{
++	objset_t *os = dn->dn_objset;
++
++	ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
++
++	mutex_enter(&os->os_lock);
++	POINTER_INVALIDATE(&dn->dn_objset);
++	list_remove(&os->os_dnodes, dn);
++	mutex_exit(&os->os_lock);
++
++	/* the dnode can no longer move, so we can release the handle */
++	zrl_remove(&dn->dn_handle->dnh_zrlock);
++
++	dn->dn_allocated_txg = 0;
++	dn->dn_free_txg = 0;
++	dn->dn_assigned_txg = 0;
++
++	dn->dn_dirtyctx = 0;
++	if (dn->dn_dirtyctx_firstset != NULL) {
++		kmem_free(dn->dn_dirtyctx_firstset, 1);
++		dn->dn_dirtyctx_firstset = NULL;
++	}
++	if (dn->dn_bonus != NULL) {
++		mutex_enter(&dn->dn_bonus->db_mtx);
++		dbuf_evict(dn->dn_bonus);
++		dn->dn_bonus = NULL;
++	}
++	dn->dn_zio = NULL;
++
++	dn->dn_have_spill = B_FALSE;
++	dn->dn_oldused = 0;
++	dn->dn_oldflags = 0;
++	dn->dn_olduid = 0;
++	dn->dn_oldgid = 0;
++	dn->dn_newuid = 0;
++	dn->dn_newgid = 0;
++	dn->dn_id_flags = 0;
++
++	dmu_zfetch_rele(&dn->dn_zfetch);
++	kmem_cache_free(dnode_cache, dn);
++	arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
++}
++
++void
++dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
++{
++	int i;
++
++	if (blocksize == 0)
++		blocksize = 1 << zfs_default_bs;
++	else if (blocksize > SPA_MAXBLOCKSIZE)
++		blocksize = SPA_MAXBLOCKSIZE;
++	else
++		blocksize = P2ROUNDUP(blocksize, SPA_MINBLOCKSIZE);
++
++	if (ibs == 0)
++		ibs = zfs_default_ibs;
++
++	ibs = MIN(MAX(ibs, DN_MIN_INDBLKSHIFT), DN_MAX_INDBLKSHIFT);
++
++	dprintf("os=%p obj=%llu txg=%llu blocksize=%d ibs=%d\n", dn->dn_objset,
++	    dn->dn_object, tx->tx_txg, blocksize, ibs);
++
++	ASSERT(dn->dn_type == DMU_OT_NONE);
++	ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0);
++	ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE);
++	ASSERT(ot != DMU_OT_NONE);
++	ASSERT3U(ot, <, DMU_OT_NUMTYPES);
++	ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
++	    (bonustype == DMU_OT_SA && bonuslen == 0) ||
++	    (bonustype != DMU_OT_NONE && bonuslen != 0));
++	ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
++	ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
++	ASSERT(dn->dn_type == DMU_OT_NONE);
++	ASSERT3U(dn->dn_maxblkid, ==, 0);
++	ASSERT3U(dn->dn_allocated_txg, ==, 0);
++	ASSERT3U(dn->dn_assigned_txg, ==, 0);
++	ASSERT(refcount_is_zero(&dn->dn_tx_holds));
++	ASSERT3U(refcount_count(&dn->dn_holds), <=, 1);
++	ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
++
++	for (i = 0; i < TXG_SIZE; i++) {
++		ASSERT3U(dn->dn_next_nblkptr[i], ==, 0);
++		ASSERT3U(dn->dn_next_nlevels[i], ==, 0);
++		ASSERT3U(dn->dn_next_indblkshift[i], ==, 0);
++		ASSERT3U(dn->dn_next_bonuslen[i], ==, 0);
++		ASSERT3U(dn->dn_next_bonustype[i], ==, 0);
++		ASSERT3U(dn->dn_rm_spillblk[i], ==, 0);
++		ASSERT3U(dn->dn_next_blksz[i], ==, 0);
++		ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
++		ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
++		ASSERT3U(avl_numnodes(&dn->dn_ranges[i]), ==, 0);
++	}
++
++	dn->dn_type = ot;
++	dnode_setdblksz(dn, blocksize);
++	dn->dn_indblkshift = ibs;
++	dn->dn_nlevels = 1;
++	if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
++		dn->dn_nblkptr = 1;
++	else
++		dn->dn_nblkptr = 1 +
++		    ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
++	dn->dn_bonustype = bonustype;
++	dn->dn_bonuslen = bonuslen;
++	dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
++	dn->dn_compress = ZIO_COMPRESS_INHERIT;
++	dn->dn_dirtyctx = 0;
++
++	dn->dn_free_txg = 0;
++	if (dn->dn_dirtyctx_firstset) {
++		kmem_free(dn->dn_dirtyctx_firstset, 1);
++		dn->dn_dirtyctx_firstset = NULL;
++	}
++
++	dn->dn_allocated_txg = tx->tx_txg;
++	dn->dn_id_flags = 0;
++
++	dnode_setdirty(dn, tx);
++	dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs;
++	dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen;
++	dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
++	dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = dn->dn_datablksz;
++}
++
++void
++dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
++{
++	int nblkptr;
++
++	ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE);
++	ASSERT3U(blocksize, <=, SPA_MAXBLOCKSIZE);
++	ASSERT3U(blocksize % SPA_MINBLOCKSIZE, ==, 0);
++	ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
++	ASSERT(tx->tx_txg != 0);
++	ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
++	    (bonustype != DMU_OT_NONE && bonuslen != 0) ||
++	    (bonustype == DMU_OT_SA && bonuslen == 0));
++	ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
++	ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
++
++	/* clean up any unreferenced dbufs */
++	dnode_evict_dbufs(dn);
++
++	dn->dn_id_flags = 0;
++
++	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
++	dnode_setdirty(dn, tx);
++	if (dn->dn_datablksz != blocksize) {
++		/* change blocksize */
++		ASSERT(dn->dn_maxblkid == 0 &&
++		    (BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) ||
++		    dnode_block_freed(dn, 0)));
++		dnode_setdblksz(dn, blocksize);
++		dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize;
++	}
++	if (dn->dn_bonuslen != bonuslen)
++		dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
++
++	if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
++		nblkptr = 1;
++	else
++		nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
++	if (dn->dn_bonustype != bonustype)
++		dn->dn_next_bonustype[tx->tx_txg&TXG_MASK] = bonustype;
++	if (dn->dn_nblkptr != nblkptr)
++		dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr;
++	if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
++		dbuf_rm_spill(dn, tx);
++		dnode_rm_spill(dn, tx);
++	}
++	rw_exit(&dn->dn_struct_rwlock);
++
++	/* change type */
++	dn->dn_type = ot;
++
++	/* change bonus size and type */
++	mutex_enter(&dn->dn_mtx);
++	dn->dn_bonustype = bonustype;
++	dn->dn_bonuslen = bonuslen;
++	dn->dn_nblkptr = nblkptr;
++	dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
++	dn->dn_compress = ZIO_COMPRESS_INHERIT;
++	ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
++
++	/* fix up the bonus db_size */
++	if (dn->dn_bonus) {
++		dn->dn_bonus->db.db_size =
++		    DN_MAX_BONUSLEN - (dn->dn_nblkptr-1) * sizeof (blkptr_t);
++		ASSERT(dn->dn_bonuslen <= dn->dn_bonus->db.db_size);
++	}
++
++	dn->dn_allocated_txg = tx->tx_txg;
++	mutex_exit(&dn->dn_mtx);
++}
++
++#ifdef	_KERNEL
++#ifdef	DNODE_STATS
++static struct {
++	uint64_t dms_dnode_invalid;
++	uint64_t dms_dnode_recheck1;
++	uint64_t dms_dnode_recheck2;
++	uint64_t dms_dnode_special;
++	uint64_t dms_dnode_handle;
++	uint64_t dms_dnode_rwlock;
++	uint64_t dms_dnode_active;
++} dnode_move_stats;
++#endif	/* DNODE_STATS */
++
++static void
++dnode_move_impl(dnode_t *odn, dnode_t *ndn)
++{
++	int i;
++
++	ASSERT(!RW_LOCK_HELD(&odn->dn_struct_rwlock));
++	ASSERT(MUTEX_NOT_HELD(&odn->dn_mtx));
++	ASSERT(MUTEX_NOT_HELD(&odn->dn_dbufs_mtx));
++	ASSERT(!RW_LOCK_HELD(&odn->dn_zfetch.zf_rwlock));
++
++	/* Copy fields. */
++	ndn->dn_objset = odn->dn_objset;
++	ndn->dn_object = odn->dn_object;
++	ndn->dn_dbuf = odn->dn_dbuf;
++	ndn->dn_handle = odn->dn_handle;
++	ndn->dn_phys = odn->dn_phys;
++	ndn->dn_type = odn->dn_type;
++	ndn->dn_bonuslen = odn->dn_bonuslen;
++	ndn->dn_bonustype = odn->dn_bonustype;
++	ndn->dn_nblkptr = odn->dn_nblkptr;
++	ndn->dn_checksum = odn->dn_checksum;
++	ndn->dn_compress = odn->dn_compress;
++	ndn->dn_nlevels = odn->dn_nlevels;
++	ndn->dn_indblkshift = odn->dn_indblkshift;
++	ndn->dn_datablkshift = odn->dn_datablkshift;
++	ndn->dn_datablkszsec = odn->dn_datablkszsec;
++	ndn->dn_datablksz = odn->dn_datablksz;
++	ndn->dn_maxblkid = odn->dn_maxblkid;
++	bcopy(&odn->dn_next_nblkptr[0], &ndn->dn_next_nblkptr[0],
++	    sizeof (odn->dn_next_nblkptr));
++	bcopy(&odn->dn_next_nlevels[0], &ndn->dn_next_nlevels[0],
++	    sizeof (odn->dn_next_nlevels));
++	bcopy(&odn->dn_next_indblkshift[0], &ndn->dn_next_indblkshift[0],
++	    sizeof (odn->dn_next_indblkshift));
++	bcopy(&odn->dn_next_bonustype[0], &ndn->dn_next_bonustype[0],
++	    sizeof (odn->dn_next_bonustype));
++	bcopy(&odn->dn_rm_spillblk[0], &ndn->dn_rm_spillblk[0],
++	    sizeof (odn->dn_rm_spillblk));
++	bcopy(&odn->dn_next_bonuslen[0], &ndn->dn_next_bonuslen[0],
++	    sizeof (odn->dn_next_bonuslen));
++	bcopy(&odn->dn_next_blksz[0], &ndn->dn_next_blksz[0],
++	    sizeof (odn->dn_next_blksz));
++	for (i = 0; i < TXG_SIZE; i++) {
++		list_move_tail(&ndn->dn_dirty_records[i],
++		    &odn->dn_dirty_records[i]);
++	}
++	bcopy(&odn->dn_ranges[0], &ndn->dn_ranges[0], sizeof (odn->dn_ranges));
++	ndn->dn_allocated_txg = odn->dn_allocated_txg;
++	ndn->dn_free_txg = odn->dn_free_txg;
++	ndn->dn_assigned_txg = odn->dn_assigned_txg;
++	ndn->dn_dirtyctx = odn->dn_dirtyctx;
++	ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset;
++	ASSERT(refcount_count(&odn->dn_tx_holds) == 0);
++	refcount_transfer(&ndn->dn_holds, &odn->dn_holds);
++	ASSERT(list_is_empty(&ndn->dn_dbufs));
++	list_move_tail(&ndn->dn_dbufs, &odn->dn_dbufs);
++	ndn->dn_dbufs_count = odn->dn_dbufs_count;
++	ndn->dn_bonus = odn->dn_bonus;
++	ndn->dn_have_spill = odn->dn_have_spill;
++	ndn->dn_zio = odn->dn_zio;
++	ndn->dn_oldused = odn->dn_oldused;
++	ndn->dn_oldflags = odn->dn_oldflags;
++	ndn->dn_olduid = odn->dn_olduid;
++	ndn->dn_oldgid = odn->dn_oldgid;
++	ndn->dn_newuid = odn->dn_newuid;
++	ndn->dn_newgid = odn->dn_newgid;
++	ndn->dn_id_flags = odn->dn_id_flags;
++	dmu_zfetch_init(&ndn->dn_zfetch, NULL);
++	list_move_tail(&ndn->dn_zfetch.zf_stream, &odn->dn_zfetch.zf_stream);
++	ndn->dn_zfetch.zf_dnode = odn->dn_zfetch.zf_dnode;
++	ndn->dn_zfetch.zf_stream_cnt = odn->dn_zfetch.zf_stream_cnt;
++	ndn->dn_zfetch.zf_alloc_fail = odn->dn_zfetch.zf_alloc_fail;
++
++	/*
++	 * Update back pointers. Updating the handle fixes the back pointer of
++	 * every descendant dbuf as well as the bonus dbuf.
++	 */
++	ASSERT(ndn->dn_handle->dnh_dnode == odn);
++	ndn->dn_handle->dnh_dnode = ndn;
++	if (ndn->dn_zfetch.zf_dnode == odn) {
++		ndn->dn_zfetch.zf_dnode = ndn;
++	}
++
++	/*
++	 * Invalidate the original dnode by clearing all of its back pointers.
++	 */
++	odn->dn_dbuf = NULL;
++	odn->dn_handle = NULL;
++	list_create(&odn->dn_dbufs, sizeof (dmu_buf_impl_t),
++	    offsetof(dmu_buf_impl_t, db_link));
++	odn->dn_dbufs_count = 0;
++	odn->dn_bonus = NULL;
++	odn->dn_zfetch.zf_dnode = NULL;
++
++	/*
++	 * Set the low bit of the objset pointer to ensure that dnode_move()
++	 * recognizes the dnode as invalid in any subsequent callback.
++	 */
++	POINTER_INVALIDATE(&odn->dn_objset);
++
++	/*
++	 * Satisfy the destructor.
++	 */
++	for (i = 0; i < TXG_SIZE; i++) {
++		list_create(&odn->dn_dirty_records[i],
++		    sizeof (dbuf_dirty_record_t),
++		    offsetof(dbuf_dirty_record_t, dr_dirty_node));
++		odn->dn_ranges[i].avl_root = NULL;
++		odn->dn_ranges[i].avl_numnodes = 0;
++		odn->dn_next_nlevels[i] = 0;
++		odn->dn_next_indblkshift[i] = 0;
++		odn->dn_next_bonustype[i] = 0;
++		odn->dn_rm_spillblk[i] = 0;
++		odn->dn_next_bonuslen[i] = 0;
++		odn->dn_next_blksz[i] = 0;
++	}
++	odn->dn_allocated_txg = 0;
++	odn->dn_free_txg = 0;
++	odn->dn_assigned_txg = 0;
++	odn->dn_dirtyctx = 0;
++	odn->dn_dirtyctx_firstset = NULL;
++	odn->dn_have_spill = B_FALSE;
++	odn->dn_zio = NULL;
++	odn->dn_oldused = 0;
++	odn->dn_oldflags = 0;
++	odn->dn_olduid = 0;
++	odn->dn_oldgid = 0;
++	odn->dn_newuid = 0;
++	odn->dn_newgid = 0;
++	odn->dn_id_flags = 0;
++
++	/*
++	 * Mark the dnode.
++	 */
++	ndn->dn_moved = 1;
++	odn->dn_moved = (uint8_t)-1;
++}
++
++/*ARGSUSED*/
++static kmem_cbrc_t
++dnode_move(void *buf, void *newbuf, size_t size, void *arg)
++{
++	dnode_t *odn = buf, *ndn = newbuf;
++	objset_t *os;
++	int64_t refcount;
++	uint32_t dbufs;
++
++	/*
++	 * The dnode is on the objset's list of known dnodes if the objset
++	 * pointer is valid. We set the low bit of the objset pointer when
++	 * freeing the dnode to invalidate it, and the memory patterns written
++	 * by kmem (baddcafe and deadbeef) set at least one of the two low bits.
++	 * A newly created dnode sets the objset pointer last of all to indicate
++	 * that the dnode is known and in a valid state to be moved by this
++	 * function.
++	 */
++	os = odn->dn_objset;
++	if (!POINTER_IS_VALID(os)) {
++		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_invalid);
++		return (KMEM_CBRC_DONT_KNOW);
++	}
++
++	/*
++	 * Ensure that the objset does not go away during the move.
++	 */
++	rw_enter(&os_lock, RW_WRITER);
++	if (os != odn->dn_objset) {
++		rw_exit(&os_lock);
++		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_recheck1);
++		return (KMEM_CBRC_DONT_KNOW);
++	}
++
++	/*
++	 * If the dnode is still valid, then so is the objset. We know that no
++	 * valid objset can be freed while we hold os_lock, so we can safely
++	 * ensure that the objset remains in use.
++	 */
++	mutex_enter(&os->os_lock);
++
++	/*
++	 * Recheck the objset pointer in case the dnode was removed just before
++	 * acquiring the lock.
++	 */
++	if (os != odn->dn_objset) {
++		mutex_exit(&os->os_lock);
++		rw_exit(&os_lock);
++		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_recheck2);
++		return (KMEM_CBRC_DONT_KNOW);
++	}
++
++	/*
++	 * At this point we know that as long as we hold os->os_lock, the dnode
++	 * cannot be freed and fields within the dnode can be safely accessed.
++	 * The objset listing this dnode cannot go away as long as this dnode is
++	 * on its list.
++	 */
++	rw_exit(&os_lock);
++	if (DMU_OBJECT_IS_SPECIAL(odn->dn_object)) {
++		mutex_exit(&os->os_lock);
++		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_special);
++		return (KMEM_CBRC_NO);
++	}
++	ASSERT(odn->dn_dbuf != NULL); /* only "special" dnodes have no parent */
++
++	/*
++	 * Lock the dnode handle to prevent the dnode from obtaining any new
++	 * holds. This also prevents the descendant dbufs and the bonus dbuf
++	 * from accessing the dnode, so that we can discount their holds. The
++	 * handle is safe to access because we know that while the dnode cannot
++	 * go away, neither can its handle. Once we hold dnh_zrlock, we can
++	 * safely move any dnode referenced only by dbufs.
++	 */
++	if (!zrl_tryenter(&odn->dn_handle->dnh_zrlock)) {
++		mutex_exit(&os->os_lock);
++		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_handle);
++		return (KMEM_CBRC_LATER);
++	}
++
++	/*
++	 * Ensure a consistent view of the dnode's holds and the dnode's dbufs.
++	 * We need to guarantee that there is a hold for every dbuf in order to
++	 * determine whether the dnode is actively referenced. Falsely matching
++	 * a dbuf to an active hold would lead to an unsafe move. It's possible
++	 * that a thread already having an active dnode hold is about to add a
++	 * dbuf, and we can't compare hold and dbuf counts while the add is in
++	 * progress.
++	 */
++	if (!rw_tryenter(&odn->dn_struct_rwlock, RW_WRITER)) {
++		zrl_exit(&odn->dn_handle->dnh_zrlock);
++		mutex_exit(&os->os_lock);
++		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_rwlock);
++		return (KMEM_CBRC_LATER);
++	}
++
++	/*
++	 * A dbuf may be removed (evicted) without an active dnode hold. In that
++	 * case, the dbuf count is decremented under the handle lock before the
++	 * dbuf's hold is released. This order ensures that if we count the hold
++	 * after the dbuf is removed but before its hold is released, we will
++	 * treat the unmatched hold as active and exit safely. If we count the
++	 * hold before the dbuf is removed, the hold is discounted, and the
++	 * removal is blocked until the move completes.
++	 */
++	refcount = refcount_count(&odn->dn_holds);
++	ASSERT(refcount >= 0);
++	dbufs = odn->dn_dbufs_count;
++
++	/* We can't have more dbufs than dnode holds. */
++	ASSERT3U(dbufs, <=, refcount);
++	DTRACE_PROBE3(dnode__move, dnode_t *, odn, int64_t, refcount,
++	    uint32_t, dbufs);
++
++	if (refcount > dbufs) {
++		rw_exit(&odn->dn_struct_rwlock);
++		zrl_exit(&odn->dn_handle->dnh_zrlock);
++		mutex_exit(&os->os_lock);
++		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_active);
++		return (KMEM_CBRC_LATER);
++	}
++
++	rw_exit(&odn->dn_struct_rwlock);
++
++	/*
++	 * At this point we know that anyone with a hold on the dnode is not
++	 * actively referencing it. The dnode is known and in a valid state to
++	 * move. We're holding the locks needed to execute the critical section.
++	 */
++	dnode_move_impl(odn, ndn);
++
++	list_link_replace(&odn->dn_link, &ndn->dn_link);
++	/* If the dnode was safe to move, the refcount cannot have changed. */
++	ASSERT(refcount == refcount_count(&ndn->dn_holds));
++	ASSERT(dbufs == ndn->dn_dbufs_count);
++	zrl_exit(&ndn->dn_handle->dnh_zrlock); /* handle has moved */
++	mutex_exit(&os->os_lock);
++
++	return (KMEM_CBRC_YES);
++}
++#endif	/* _KERNEL */
++
++void
++dnode_special_close(dnode_handle_t *dnh)
++{
++	dnode_t *dn = dnh->dnh_dnode;
++
++	/*
++	 * Wait for final references to the dnode to clear.  This can
++	 * only happen if the arc is asyncronously evicting state that
++	 * has a hold on this dnode while we are trying to evict this
++	 * dnode.
++	 */
++	while (refcount_count(&dn->dn_holds) > 0)
++		delay(1);
++	zrl_add(&dnh->dnh_zrlock);
++	dnode_destroy(dn); /* implicit zrl_remove() */
++	zrl_destroy(&dnh->dnh_zrlock);
++	dnh->dnh_dnode = NULL;
++}
++
++dnode_t *
++dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
++    dnode_handle_t *dnh)
++{
++	dnode_t *dn = dnode_create(os, dnp, NULL, object, dnh);
++	dnh->dnh_dnode = dn;
++	zrl_init(&dnh->dnh_zrlock);
++	DNODE_VERIFY(dn);
++	return (dn);
++}
++
++static void
++dnode_buf_pageout(dmu_buf_t *db, void *arg)
++{
++	dnode_children_t *children_dnodes = arg;
++	int i;
++	int epb = db->db_size >> DNODE_SHIFT;
++
++	ASSERT(epb == children_dnodes->dnc_count);
++
++	for (i = 0; i < epb; i++) {
++		dnode_handle_t *dnh = &children_dnodes->dnc_children[i];
++		dnode_t *dn;
++
++		/*
++		 * The dnode handle lock guards against the dnode moving to
++		 * another valid address, so there is no need here to guard
++		 * against changes to or from NULL.
++		 */
++		if (dnh->dnh_dnode == NULL) {
++			zrl_destroy(&dnh->dnh_zrlock);
++			continue;
++		}
++
++		zrl_add(&dnh->dnh_zrlock);
++		dn = dnh->dnh_dnode;
++		/*
++		 * If there are holds on this dnode, then there should
++		 * be holds on the dnode's containing dbuf as well; thus
++		 * it wouldn't be eligible for eviction and this function
++		 * would not have been called.
++		 */
++		ASSERT(refcount_is_zero(&dn->dn_holds));
++		ASSERT(refcount_is_zero(&dn->dn_tx_holds));
++
++		dnode_destroy(dn); /* implicit zrl_remove() */
++		zrl_destroy(&dnh->dnh_zrlock);
++		dnh->dnh_dnode = NULL;
++	}
++	kmem_free(children_dnodes, sizeof (dnode_children_t) +
++	    (epb - 1) * sizeof (dnode_handle_t));
++}
++
++/*
++ * errors:
++ * EINVAL - invalid object number.
++ * EIO - i/o error.
++ * succeeds even for free dnodes.
++ */
++int
++dnode_hold_impl(objset_t *os, uint64_t object, int flag,
++    void *tag, dnode_t **dnp)
++{
++	int epb, idx, err;
++	int drop_struct_lock = FALSE;
++	int type;
++	uint64_t blk;
++	dnode_t *mdn, *dn;
++	dmu_buf_impl_t *db;
++	dnode_children_t *children_dnodes;
++	dnode_handle_t *dnh;
++
++	/*
++	 * If you are holding the spa config lock as writer, you shouldn't
++	 * be asking the DMU to do *anything* unless it's the root pool
++	 * which may require us to read from the root filesystem while
++	 * holding some (not all) of the locks as writer.
++	 */
++	ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0 ||
++	    (spa_is_root(os->os_spa) &&
++	    spa_config_held(os->os_spa, SCL_STATE, RW_WRITER)));
++
++	if (object == DMU_USERUSED_OBJECT || object == DMU_GROUPUSED_OBJECT) {
++		dn = (object == DMU_USERUSED_OBJECT) ?
++		    DMU_USERUSED_DNODE(os) : DMU_GROUPUSED_DNODE(os);
++		if (dn == NULL)
++			return (ENOENT);
++		type = dn->dn_type;
++		if ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE)
++			return (ENOENT);
++		if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)
++			return (EEXIST);
++		DNODE_VERIFY(dn);
++		(void) refcount_add(&dn->dn_holds, tag);
++		*dnp = dn;
++		return (0);
++	}
++
++	if (object == 0 || object >= DN_MAX_OBJECT)
++		return (EINVAL);
++
++	mdn = DMU_META_DNODE(os);
++	ASSERT(mdn->dn_object == DMU_META_DNODE_OBJECT);
++
++	DNODE_VERIFY(mdn);
++
++	if (!RW_WRITE_HELD(&mdn->dn_struct_rwlock)) {
++		rw_enter(&mdn->dn_struct_rwlock, RW_READER);
++		drop_struct_lock = TRUE;
++	}
++
++	blk = dbuf_whichblock(mdn, object * sizeof (dnode_phys_t));
++
++	db = dbuf_hold(mdn, blk, FTAG);
++	if (drop_struct_lock)
++		rw_exit(&mdn->dn_struct_rwlock);
++	if (db == NULL)
++		return (EIO);
++	err = dbuf_read(db, NULL, DB_RF_CANFAIL);
++	if (err) {
++		dbuf_rele(db, FTAG);
++		return (err);
++	}
++
++	ASSERT3U(db->db.db_size, >=, 1<<DNODE_SHIFT);
++	epb = db->db.db_size >> DNODE_SHIFT;
++
++	idx = object & (epb-1);
++
++	ASSERT(DB_DNODE(db)->dn_type == DMU_OT_DNODE);
++	children_dnodes = dmu_buf_get_user(&db->db);
++	if (children_dnodes == NULL) {
++		int i;
++		dnode_children_t *winner;
++		children_dnodes = kmem_alloc(sizeof (dnode_children_t) +
++		    (epb - 1) * sizeof (dnode_handle_t),
++		    KM_PUSHPAGE | KM_NODEBUG);
++		children_dnodes->dnc_count = epb;
++		dnh = &children_dnodes->dnc_children[0];
++		for (i = 0; i < epb; i++) {
++			zrl_init(&dnh[i].dnh_zrlock);
++			dnh[i].dnh_dnode = NULL;
++		}
++		if ((winner = dmu_buf_set_user(&db->db, children_dnodes, NULL,
++		    dnode_buf_pageout))) {
++			kmem_free(children_dnodes, sizeof (dnode_children_t) +
++			    (epb - 1) * sizeof (dnode_handle_t));
++			children_dnodes = winner;
++		}
++	}
++	ASSERT(children_dnodes->dnc_count == epb);
++
++	dnh = &children_dnodes->dnc_children[idx];
++	zrl_add(&dnh->dnh_zrlock);
++	if ((dn = dnh->dnh_dnode) == NULL) {
++		dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
++		dnode_t *winner;
++
++		dn = dnode_create(os, phys, db, object, dnh);
++		winner = atomic_cas_ptr(&dnh->dnh_dnode, NULL, dn);
++		if (winner != NULL) {
++			zrl_add(&dnh->dnh_zrlock);
++			dnode_destroy(dn); /* implicit zrl_remove() */
++			dn = winner;
++		}
++	}
++
++	mutex_enter(&dn->dn_mtx);
++	type = dn->dn_type;
++	if (dn->dn_free_txg ||
++	    ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
++	    ((flag & DNODE_MUST_BE_FREE) &&
++	    (type != DMU_OT_NONE || !refcount_is_zero(&dn->dn_holds)))) {
++		mutex_exit(&dn->dn_mtx);
++		zrl_remove(&dnh->dnh_zrlock);
++		dbuf_rele(db, FTAG);
++		return (type == DMU_OT_NONE ? ENOENT : EEXIST);
++	}
++	mutex_exit(&dn->dn_mtx);
++
++	if (refcount_add(&dn->dn_holds, tag) == 1)
++		dbuf_add_ref(db, dnh);
++	/* Now we can rely on the hold to prevent the dnode from moving. */
++	zrl_remove(&dnh->dnh_zrlock);
++
++	DNODE_VERIFY(dn);
++	ASSERT3P(dn->dn_dbuf, ==, db);
++	ASSERT3U(dn->dn_object, ==, object);
++	dbuf_rele(db, FTAG);
++
++	*dnp = dn;
++	return (0);
++}
++
++/*
++ * Return held dnode if the object is allocated, NULL if not.
++ */
++int
++dnode_hold(objset_t *os, uint64_t object, void *tag, dnode_t **dnp)
++{
++	return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp));
++}
++
++/*
++ * Can only add a reference if there is already at least one
++ * reference on the dnode.  Returns FALSE if unable to add a
++ * new reference.
++ */
++boolean_t
++dnode_add_ref(dnode_t *dn, void *tag)
++{
++	mutex_enter(&dn->dn_mtx);
++	if (refcount_is_zero(&dn->dn_holds)) {
++		mutex_exit(&dn->dn_mtx);
++		return (FALSE);
++	}
++	VERIFY(1 < refcount_add(&dn->dn_holds, tag));
++	mutex_exit(&dn->dn_mtx);
++	return (TRUE);
++}
++
++void
++dnode_rele(dnode_t *dn, void *tag)
++{
++	uint64_t refs;
++	/* Get while the hold prevents the dnode from moving. */
++	dmu_buf_impl_t *db = dn->dn_dbuf;
++	dnode_handle_t *dnh = dn->dn_handle;
++
++	mutex_enter(&dn->dn_mtx);
++	refs = refcount_remove(&dn->dn_holds, tag);
++	mutex_exit(&dn->dn_mtx);
++
++	/*
++	 * It's unsafe to release the last hold on a dnode by dnode_rele() or
++	 * indirectly by dbuf_rele() while relying on the dnode handle to
++	 * prevent the dnode from moving, since releasing the last hold could
++	 * result in the dnode's parent dbuf evicting its dnode handles. For
++	 * that reason anyone calling dnode_rele() or dbuf_rele() without some
++	 * other direct or indirect hold on the dnode must first drop the dnode
++	 * handle.
++	 */
++	ASSERT(refs > 0 || dnh->dnh_zrlock.zr_owner != curthread);
++
++	/* NOTE: the DNODE_DNODE does not have a dn_dbuf */
++	if (refs == 0 && db != NULL) {
++		/*
++		 * Another thread could add a hold to the dnode handle in
++		 * dnode_hold_impl() while holding the parent dbuf. Since the
++		 * hold on the parent dbuf prevents the handle from being
++		 * destroyed, the hold on the handle is OK. We can't yet assert
++		 * that the handle has zero references, but that will be
++		 * asserted anyway when the handle gets destroyed.
++		 */
++		dbuf_rele(db, dnh);
++	}
++}
++
++void
++dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
++{
++	objset_t *os = dn->dn_objset;
++	uint64_t txg = tx->tx_txg;
++
++	if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
++		dsl_dataset_dirty(os->os_dsl_dataset, tx);
++		return;
++	}
++
++	DNODE_VERIFY(dn);
++
++#ifdef ZFS_DEBUG
++	mutex_enter(&dn->dn_mtx);
++	ASSERT(dn->dn_phys->dn_type || dn->dn_allocated_txg);
++	ASSERT(dn->dn_free_txg == 0 || dn->dn_free_txg >= txg);
++	mutex_exit(&dn->dn_mtx);
++#endif
++
++	/*
++	 * Determine old uid/gid when necessary
++	 */
++	dmu_objset_userquota_get_ids(dn, B_TRUE, tx);
++
++	mutex_enter(&os->os_lock);
++
++	/*
++	 * If we are already marked dirty, we're done.
++	 */
++	if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) {
++		mutex_exit(&os->os_lock);
++		return;
++	}
++
++	ASSERT(!refcount_is_zero(&dn->dn_holds) || list_head(&dn->dn_dbufs));
++	ASSERT(dn->dn_datablksz != 0);
++	ASSERT3U(dn->dn_next_bonuslen[txg&TXG_MASK], ==, 0);
++	ASSERT3U(dn->dn_next_blksz[txg&TXG_MASK], ==, 0);
++	ASSERT3U(dn->dn_next_bonustype[txg&TXG_MASK], ==, 0);
++
++	dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n",
++	    dn->dn_object, txg);
++
++	if (dn->dn_free_txg > 0 && dn->dn_free_txg <= txg) {
++		list_insert_tail(&os->os_free_dnodes[txg&TXG_MASK], dn);
++	} else {
++		list_insert_tail(&os->os_dirty_dnodes[txg&TXG_MASK], dn);
++	}
++
++	mutex_exit(&os->os_lock);
++
++	/*
++	 * The dnode maintains a hold on its containing dbuf as
++	 * long as there are holds on it.  Each instantiated child
++	 * dbuf maintains a hold on the dnode.  When the last child
++	 * drops its hold, the dnode will drop its hold on the
++	 * containing dbuf. We add a "dirty hold" here so that the
++	 * dnode will hang around after we finish processing its
++	 * children.
++	 */
++	VERIFY(dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg));
++
++	(void) dbuf_dirty(dn->dn_dbuf, tx);
++
++	dsl_dataset_dirty(os->os_dsl_dataset, tx);
++}
++
++void
++dnode_free(dnode_t *dn, dmu_tx_t *tx)
++{
++	int txgoff = tx->tx_txg & TXG_MASK;
++
++	dprintf("dn=%p txg=%llu\n", dn, tx->tx_txg);
++
++	/* we should be the only holder... hopefully */
++	/* ASSERT3U(refcount_count(&dn->dn_holds), ==, 1); */
++
++	mutex_enter(&dn->dn_mtx);
++	if (dn->dn_type == DMU_OT_NONE || dn->dn_free_txg) {
++		mutex_exit(&dn->dn_mtx);
++		return;
++	}
++	dn->dn_free_txg = tx->tx_txg;
++	mutex_exit(&dn->dn_mtx);
++
++	/*
++	 * If the dnode is already dirty, it needs to be moved from
++	 * the dirty list to the free list.
++	 */
++	mutex_enter(&dn->dn_objset->os_lock);
++	if (list_link_active(&dn->dn_dirty_link[txgoff])) {
++		list_remove(&dn->dn_objset->os_dirty_dnodes[txgoff], dn);
++		list_insert_tail(&dn->dn_objset->os_free_dnodes[txgoff], dn);
++		mutex_exit(&dn->dn_objset->os_lock);
++	} else {
++		mutex_exit(&dn->dn_objset->os_lock);
++		dnode_setdirty(dn, tx);
++	}
++}
++
++/*
++ * Try to change the block size for the indicated dnode.  This can only
++ * succeed if there are no blocks allocated or dirty beyond first block
++ */
++int
++dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
++{
++	dmu_buf_impl_t *db, *db_next;
++	int err;
++
++	if (size == 0)
++		size = SPA_MINBLOCKSIZE;
++	if (size > SPA_MAXBLOCKSIZE)
++		size = SPA_MAXBLOCKSIZE;
++	else
++		size = P2ROUNDUP(size, SPA_MINBLOCKSIZE);
++
++	if (ibs == dn->dn_indblkshift)
++		ibs = 0;
++
++	if (size >> SPA_MINBLOCKSHIFT == dn->dn_datablkszsec && ibs == 0)
++		return (0);
++
++	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
++
++	/* Check for any allocated blocks beyond the first */
++	if (dn->dn_phys->dn_maxblkid != 0)
++		goto fail;
++
++	mutex_enter(&dn->dn_dbufs_mtx);
++	for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
++		db_next = list_next(&dn->dn_dbufs, db);
++
++		if (db->db_blkid != 0 && db->db_blkid != DMU_BONUS_BLKID &&
++		    db->db_blkid != DMU_SPILL_BLKID) {
++			mutex_exit(&dn->dn_dbufs_mtx);
++			goto fail;
++		}
++	}
++	mutex_exit(&dn->dn_dbufs_mtx);
++
++	if (ibs && dn->dn_nlevels != 1)
++		goto fail;
++
++	/* resize the old block */
++	err = dbuf_hold_impl(dn, 0, 0, TRUE, FTAG, &db);
++	if (err == 0)
++		dbuf_new_size(db, size, tx);
++	else if (err != ENOENT)
++		goto fail;
++
++	dnode_setdblksz(dn, size);
++	dnode_setdirty(dn, tx);
++	dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = size;
++	if (ibs) {
++		dn->dn_indblkshift = ibs;
++		dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs;
++	}
++	/* rele after we have fixed the blocksize in the dnode */
++	if (db)
++		dbuf_rele(db, FTAG);
++
++	rw_exit(&dn->dn_struct_rwlock);
++	return (0);
++
++fail:
++	rw_exit(&dn->dn_struct_rwlock);
++	return (ENOTSUP);
++}
++
++/* read-holding callers must not rely on the lock being continuously held */
++void
++dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
++{
++	uint64_t txgoff = tx->tx_txg & TXG_MASK;
++	int epbs, new_nlevels;
++	uint64_t sz;
++
++	ASSERT(blkid != DMU_BONUS_BLKID);
++
++	ASSERT(have_read ?
++	    RW_READ_HELD(&dn->dn_struct_rwlock) :
++	    RW_WRITE_HELD(&dn->dn_struct_rwlock));
++
++	/*
++	 * if we have a read-lock, check to see if we need to do any work
++	 * before upgrading to a write-lock.
++	 */
++	if (have_read) {
++		if (blkid <= dn->dn_maxblkid)
++			return;
++
++		if (!rw_tryupgrade(&dn->dn_struct_rwlock)) {
++			rw_exit(&dn->dn_struct_rwlock);
++			rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
++		}
++	}
++
++	if (blkid <= dn->dn_maxblkid)
++		goto out;
++
++	dn->dn_maxblkid = blkid;
++
++	/*
++	 * Compute the number of levels necessary to support the new maxblkid.
++	 */
++	new_nlevels = 1;
++	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
++	for (sz = dn->dn_nblkptr;
++	    sz <= blkid && sz >= dn->dn_nblkptr; sz <<= epbs)
++		new_nlevels++;
++
++	if (new_nlevels > dn->dn_nlevels) {
++		int old_nlevels = dn->dn_nlevels;
++		dmu_buf_impl_t *db;
++		list_t *list;
++		dbuf_dirty_record_t *new, *dr, *dr_next;
++
++		dn->dn_nlevels = new_nlevels;
++
++		ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]);
++		dn->dn_next_nlevels[txgoff] = new_nlevels;
++
++		/* dirty the left indirects */
++		db = dbuf_hold_level(dn, old_nlevels, 0, FTAG);
++		ASSERT(db != NULL);
++		new = dbuf_dirty(db, tx);
++		dbuf_rele(db, FTAG);
++
++		/* transfer the dirty records to the new indirect */
++		mutex_enter(&dn->dn_mtx);
++		mutex_enter(&new->dt.di.dr_mtx);
++		list = &dn->dn_dirty_records[txgoff];
++		for (dr = list_head(list); dr; dr = dr_next) {
++			dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
++			if (dr->dr_dbuf->db_level != new_nlevels-1 &&
++			    dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
++			    dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
++				ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
++				list_remove(&dn->dn_dirty_records[txgoff], dr);
++				list_insert_tail(&new->dt.di.dr_children, dr);
++				dr->dr_parent = new;
++			}
++		}
++		mutex_exit(&new->dt.di.dr_mtx);
++		mutex_exit(&dn->dn_mtx);
++	}
++
++out:
++	if (have_read)
++		rw_downgrade(&dn->dn_struct_rwlock);
++}
++
++void
++dnode_clear_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
++{
++	avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK];
++	avl_index_t where;
++	free_range_t *rp;
++	free_range_t rp_tofind;
++	uint64_t endblk = blkid + nblks;
++
++	ASSERT(MUTEX_HELD(&dn->dn_mtx));
++	ASSERT(nblks <= UINT64_MAX - blkid); /* no overflow */
++
++	dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
++	    blkid, nblks, tx->tx_txg);
++	rp_tofind.fr_blkid = blkid;
++	rp = avl_find(tree, &rp_tofind, &where);
++	if (rp == NULL)
++		rp = avl_nearest(tree, where, AVL_BEFORE);
++	if (rp == NULL)
++		rp = avl_nearest(tree, where, AVL_AFTER);
++
++	while (rp && (rp->fr_blkid <= blkid + nblks)) {
++		uint64_t fr_endblk = rp->fr_blkid + rp->fr_nblks;
++		free_range_t *nrp = AVL_NEXT(tree, rp);
++
++		if (blkid <= rp->fr_blkid && endblk >= fr_endblk) {
++			/* clear this entire range */
++			avl_remove(tree, rp);
++			kmem_free(rp, sizeof (free_range_t));
++		} else if (blkid <= rp->fr_blkid &&
++		    endblk > rp->fr_blkid && endblk < fr_endblk) {
++			/* clear the beginning of this range */
++			rp->fr_blkid = endblk;
++			rp->fr_nblks = fr_endblk - endblk;
++		} else if (blkid > rp->fr_blkid && blkid < fr_endblk &&
++		    endblk >= fr_endblk) {
++			/* clear the end of this range */
++			rp->fr_nblks = blkid - rp->fr_blkid;
++		} else if (blkid > rp->fr_blkid && endblk < fr_endblk) {
++			/* clear a chunk out of this range */
++			free_range_t *new_rp =
++			    kmem_alloc(sizeof (free_range_t), KM_PUSHPAGE);
++
++			new_rp->fr_blkid = endblk;
++			new_rp->fr_nblks = fr_endblk - endblk;
++			avl_insert_here(tree, new_rp, rp, AVL_AFTER);
++			rp->fr_nblks = blkid - rp->fr_blkid;
++		}
++		/* there may be no overlap */
++		rp = nrp;
++	}
++}
++
++void
++dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
++{
++	dmu_buf_impl_t *db;
++	uint64_t blkoff, blkid, nblks;
++	int blksz, blkshift, head, tail;
++	int trunc = FALSE;
++	int epbs;
++
++	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
++	blksz = dn->dn_datablksz;
++	blkshift = dn->dn_datablkshift;
++	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
++
++	if (len == -1ULL) {
++		len = UINT64_MAX - off;
++		trunc = TRUE;
++	}
++
++	/*
++	 * First, block align the region to free:
++	 */
++	if (ISP2(blksz)) {
++		head = P2NPHASE(off, blksz);
++		blkoff = P2PHASE(off, blksz);
++		if ((off >> blkshift) > dn->dn_maxblkid)
++			goto out;
++	} else {
++		ASSERT(dn->dn_maxblkid == 0);
++		if (off == 0 && len >= blksz) {
++			/* Freeing the whole block; fast-track this request */
++			blkid = 0;
++			nblks = 1;
++			goto done;
++		} else if (off >= blksz) {
++			/* Freeing past end-of-data */
++			goto out;
++		} else {
++			/* Freeing part of the block. */
++			head = blksz - off;
++			ASSERT3U(head, >, 0);
++		}
++		blkoff = off;
++	}
++	/* zero out any partial block data at the start of the range */
++	if (head) {
++		ASSERT3U(blkoff + head, ==, blksz);
++		if (len < head)
++			head = len;
++		if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off), TRUE,
++		    FTAG, &db) == 0) {
++			caddr_t data;
++
++			/* don't dirty if it isn't on disk and isn't dirty */
++			if (db->db_last_dirty ||
++			    (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
++				rw_exit(&dn->dn_struct_rwlock);
++				dbuf_will_dirty(db, tx);
++				rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
++				data = db->db.db_data;
++				bzero(data + blkoff, head);
++			}
++			dbuf_rele(db, FTAG);
++		}
++		off += head;
++		len -= head;
++	}
++
++	/* If the range was less than one block, we're done */
++	if (len == 0)
++		goto out;
++
++	/* If the remaining range is past end of file, we're done */
++	if ((off >> blkshift) > dn->dn_maxblkid)
++		goto out;
++
++	ASSERT(ISP2(blksz));
++	if (trunc)
++		tail = 0;
++	else
++		tail = P2PHASE(len, blksz);
++
++	ASSERT3U(P2PHASE(off, blksz), ==, 0);
++	/* zero out any partial block data at the end of the range */
++	if (tail) {
++		if (len < tail)
++			tail = len;
++		if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off+len),
++		    TRUE, FTAG, &db) == 0) {
++			/* don't dirty if not on disk and not dirty */
++			if (db->db_last_dirty ||
++			    (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
++				rw_exit(&dn->dn_struct_rwlock);
++				dbuf_will_dirty(db, tx);
++				rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
++				bzero(db->db.db_data, tail);
++			}
++			dbuf_rele(db, FTAG);
++		}
++		len -= tail;
++	}
++
++	/* If the range did not include a full block, we are done */
++	if (len == 0)
++		goto out;
++
++	ASSERT(IS_P2ALIGNED(off, blksz));
++	ASSERT(trunc || IS_P2ALIGNED(len, blksz));
++	blkid = off >> blkshift;
++	nblks = len >> blkshift;
++	if (trunc)
++		nblks += 1;
++
++	/*
++	 * Read in and mark all the level-1 indirects dirty,
++	 * so that they will stay in memory until syncing phase.
++	 * Always dirty the first and last indirect to make sure
++	 * we dirty all the partial indirects.
++	 */
++	if (dn->dn_nlevels > 1) {
++		uint64_t i, first, last;
++		int shift = epbs + dn->dn_datablkshift;
++
++		first = blkid >> epbs;
++		if ((db = dbuf_hold_level(dn, 1, first, FTAG))) {
++			dbuf_will_dirty(db, tx);
++			dbuf_rele(db, FTAG);
++		}
++		if (trunc)
++			last = dn->dn_maxblkid >> epbs;
++		else
++			last = (blkid + nblks - 1) >> epbs;
++		if (last > first && (db = dbuf_hold_level(dn, 1, last, FTAG))) {
++			dbuf_will_dirty(db, tx);
++			dbuf_rele(db, FTAG);
++		}
++		for (i = first + 1; i < last; i++) {
++			uint64_t ibyte = i << shift;
++			int err;
++
++			err = dnode_next_offset(dn,
++			    DNODE_FIND_HAVELOCK, &ibyte, 1, 1, 0);
++			i = ibyte >> shift;
++			if (err == ESRCH || i >= last)
++				break;
++			ASSERT(err == 0);
++			db = dbuf_hold_level(dn, 1, i, FTAG);
++			if (db) {
++				dbuf_will_dirty(db, tx);
++				dbuf_rele(db, FTAG);
++			}
++		}
++	}
++done:
++	/*
++	 * Add this range to the dnode range list.
++	 * We will finish up this free operation in the syncing phase.
++	 */
++	mutex_enter(&dn->dn_mtx);
++	dnode_clear_range(dn, blkid, nblks, tx);
++	{
++		free_range_t *rp, *found;
++		avl_index_t where;
++		avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK];
++
++		/* Add new range to dn_ranges */
++		rp = kmem_alloc(sizeof (free_range_t), KM_PUSHPAGE);
++		rp->fr_blkid = blkid;
++		rp->fr_nblks = nblks;
++		found = avl_find(tree, rp, &where);
++		ASSERT(found == NULL);
++		avl_insert(tree, rp, where);
++		dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
++		    blkid, nblks, tx->tx_txg);
++	}
++	mutex_exit(&dn->dn_mtx);
++
++	dbuf_free_range(dn, blkid, blkid + nblks - 1, tx);
++	dnode_setdirty(dn, tx);
++out:
++	if (trunc && dn->dn_maxblkid >= (off >> blkshift))
++		dn->dn_maxblkid = (off >> blkshift ? (off >> blkshift) - 1 : 0);
++
++	rw_exit(&dn->dn_struct_rwlock);
++}
++
++static boolean_t
++dnode_spill_freed(dnode_t *dn)
++{
++	int i;
++
++	mutex_enter(&dn->dn_mtx);
++	for (i = 0; i < TXG_SIZE; i++) {
++		if (dn->dn_rm_spillblk[i] == DN_KILL_SPILLBLK)
++			break;
++	}
++	mutex_exit(&dn->dn_mtx);
++	return (i < TXG_SIZE);
++}
++
++/* return TRUE if this blkid was freed in a recent txg, or FALSE if it wasn't */
++uint64_t
++dnode_block_freed(dnode_t *dn, uint64_t blkid)
++{
++	free_range_t range_tofind;
++	void *dp = spa_get_dsl(dn->dn_objset->os_spa);
++	int i;
++
++	if (blkid == DMU_BONUS_BLKID)
++		return (FALSE);
++
++	/*
++	 * If we're in the process of opening the pool, dp will not be
++	 * set yet, but there shouldn't be anything dirty.
++	 */
++	if (dp == NULL)
++		return (FALSE);
++
++	if (dn->dn_free_txg)
++		return (TRUE);
++
++	if (blkid == DMU_SPILL_BLKID)
++		return (dnode_spill_freed(dn));
++
++	range_tofind.fr_blkid = blkid;
++	mutex_enter(&dn->dn_mtx);
++	for (i = 0; i < TXG_SIZE; i++) {
++		free_range_t *range_found;
++		avl_index_t idx;
++
++		range_found = avl_find(&dn->dn_ranges[i], &range_tofind, &idx);
++		if (range_found) {
++			ASSERT(range_found->fr_nblks > 0);
++			break;
++		}
++		range_found = avl_nearest(&dn->dn_ranges[i], idx, AVL_BEFORE);
++		if (range_found &&
++		    range_found->fr_blkid + range_found->fr_nblks > blkid)
++			break;
++	}
++	mutex_exit(&dn->dn_mtx);
++	return (i < TXG_SIZE);
++}
++
++/* call from syncing context when we actually write/free space for this dnode */
++void
++dnode_diduse_space(dnode_t *dn, int64_t delta)
++{
++	uint64_t space;
++	dprintf_dnode(dn, "dn=%p dnp=%p used=%llu delta=%lld\n",
++	    dn, dn->dn_phys,
++	    (u_longlong_t)dn->dn_phys->dn_used,
++	    (longlong_t)delta);
++
++	mutex_enter(&dn->dn_mtx);
++	space = DN_USED_BYTES(dn->dn_phys);
++	if (delta > 0) {
++		ASSERT3U(space + delta, >=, space); /* no overflow */
++	} else {
++		ASSERT3U(space, >=, -delta); /* no underflow */
++	}
++	space += delta;
++	if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_DNODE_BYTES) {
++		ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0);
++		ASSERT3U(P2PHASE(space, 1<<DEV_BSHIFT), ==, 0);
++		dn->dn_phys->dn_used = space >> DEV_BSHIFT;
++	} else {
++		dn->dn_phys->dn_used = space;
++		dn->dn_phys->dn_flags |= DNODE_FLAG_USED_BYTES;
++	}
++	mutex_exit(&dn->dn_mtx);
++}
++
++/*
++ * Call when we think we're going to write/free space in open context.
++ * Be conservative (ie. OK to write less than this or free more than
++ * this, but don't write more or free less).
++ */
++void
++dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx)
++{
++	objset_t *os = dn->dn_objset;
++	dsl_dataset_t *ds = os->os_dsl_dataset;
++
++	if (space > 0)
++		space = spa_get_asize(os->os_spa, space);
++
++	if (ds)
++		dsl_dir_willuse_space(ds->ds_dir, space, tx);
++
++	dmu_tx_willuse_space(tx, space);
++}
++
++/*
++ * This function scans a block at the indicated "level" looking for
++ * a hole or data (depending on 'flags').  If level > 0, then we are
++ * scanning an indirect block looking at its pointers.  If level == 0,
++ * then we are looking at a block of dnodes.  If we don't find what we
++ * are looking for in the block, we return ESRCH.  Otherwise, return
++ * with *offset pointing to the beginning (if searching forwards) or
++ * end (if searching backwards) of the range covered by the block
++ * pointer we matched on (or dnode).
++ *
++ * The basic search algorithm used below by dnode_next_offset() is to
++ * use this function to search up the block tree (widen the search) until
++ * we find something (i.e., we don't return ESRCH) and then search back
++ * down the tree (narrow the search) until we reach our original search
++ * level.
++ */
++static int
++dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
++	int lvl, uint64_t blkfill, uint64_t txg)
++{
++	dmu_buf_impl_t *db = NULL;
++	void *data = NULL;
++	uint64_t epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
++	uint64_t epb = 1ULL << epbs;
++	uint64_t minfill, maxfill;
++	boolean_t hole;
++	int i, inc, error, span;
++
++	dprintf("probing object %llu offset %llx level %d of %u\n",
++	    dn->dn_object, *offset, lvl, dn->dn_phys->dn_nlevels);
++
++	hole = ((flags & DNODE_FIND_HOLE) != 0);
++	inc = (flags & DNODE_FIND_BACKWARDS) ? -1 : 1;
++	ASSERT(txg == 0 || !hole);
++
++	if (lvl == dn->dn_phys->dn_nlevels) {
++		error = 0;
++		epb = dn->dn_phys->dn_nblkptr;
++		data = dn->dn_phys->dn_blkptr;
++	} else {
++		uint64_t blkid = dbuf_whichblock(dn, *offset) >> (epbs * lvl);
++		error = dbuf_hold_impl(dn, lvl, blkid, TRUE, FTAG, &db);
++		if (error) {
++			if (error != ENOENT)
++				return (error);
++			if (hole)
++				return (0);
++			/*
++			 * This can only happen when we are searching up
++			 * the block tree for data.  We don't really need to
++			 * adjust the offset, as we will just end up looking
++			 * at the pointer to this block in its parent, and its
++			 * going to be unallocated, so we will skip over it.
++			 */
++			return (ESRCH);
++		}
++		error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT);
++		if (error) {
++			dbuf_rele(db, FTAG);
++			return (error);
++		}
++		data = db->db.db_data;
++	}
++
++	if (db && txg &&
++	    (db->db_blkptr == NULL || db->db_blkptr->blk_birth <= txg)) {
++		/*
++		 * This can only happen when we are searching up the tree
++		 * and these conditions mean that we need to keep climbing.
++		 */
++		error = ESRCH;
++	} else if (lvl == 0) {
++		dnode_phys_t *dnp = data;
++		span = DNODE_SHIFT;
++		ASSERT(dn->dn_type == DMU_OT_DNODE);
++
++		for (i = (*offset >> span) & (blkfill - 1);
++		    i >= 0 && i < blkfill; i += inc) {
++			if ((dnp[i].dn_type == DMU_OT_NONE) == hole)
++				break;
++			*offset += (1ULL << span) * inc;
++		}
++		if (i < 0 || i == blkfill)
++			error = ESRCH;
++	} else {
++		blkptr_t *bp = data;
++		uint64_t start = *offset;
++		span = (lvl - 1) * epbs + dn->dn_datablkshift;
++		minfill = 0;
++		maxfill = blkfill << ((lvl - 1) * epbs);
++
++		if (hole)
++			maxfill--;
++		else
++			minfill++;
++
++		*offset = *offset >> span;
++		for (i = BF64_GET(*offset, 0, epbs);
++		    i >= 0 && i < epb; i += inc) {
++			if (bp[i].blk_fill >= minfill &&
++			    bp[i].blk_fill <= maxfill &&
++			    (hole || bp[i].blk_birth > txg))
++				break;
++			if (inc > 0 || *offset > 0)
++				*offset += inc;
++		}
++		*offset = *offset << span;
++		if (inc < 0) {
++			/* traversing backwards; position offset at the end */
++			ASSERT3U(*offset, <=, start);
++			*offset = MIN(*offset + (1ULL << span) - 1, start);
++		} else if (*offset < start) {
++			*offset = start;
++		}
++		if (i < 0 || i >= epb)
++			error = ESRCH;
++	}
++
++	if (db)
++		dbuf_rele(db, FTAG);
++
++	return (error);
++}
++
++/*
++ * Find the next hole, data, or sparse region at or after *offset.
++ * The value 'blkfill' tells us how many items we expect to find
++ * in an L0 data block; this value is 1 for normal objects,
++ * DNODES_PER_BLOCK for the meta dnode, and some fraction of
++ * DNODES_PER_BLOCK when searching for sparse regions thereof.
++ *
++ * Examples:
++ *
++ * dnode_next_offset(dn, flags, offset, 1, 1, 0);
++ *	Finds the next/previous hole/data in a file.
++ *	Used in dmu_offset_next().
++ *
++ * dnode_next_offset(mdn, flags, offset, 0, DNODES_PER_BLOCK, txg);
++ *	Finds the next free/allocated dnode an objset's meta-dnode.
++ *	Only finds objects that have new contents since txg (ie.
++ *	bonus buffer changes and content removal are ignored).
++ *	Used in dmu_object_next().
++ *
++ * dnode_next_offset(mdn, DNODE_FIND_HOLE, offset, 2, DNODES_PER_BLOCK >> 2, 0);
++ *	Finds the next L2 meta-dnode bp that's at most 1/4 full.
++ *	Used in dmu_object_alloc().
++ */
++int
++dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset,
++    int minlvl, uint64_t blkfill, uint64_t txg)
++{
++	uint64_t initial_offset = *offset;
++	int lvl, maxlvl;
++	int error = 0;
++
++	if (!(flags & DNODE_FIND_HAVELOCK))
++		rw_enter(&dn->dn_struct_rwlock, RW_READER);
++
++	if (dn->dn_phys->dn_nlevels == 0) {
++		error = ESRCH;
++		goto out;
++	}
++
++	if (dn->dn_datablkshift == 0) {
++		if (*offset < dn->dn_datablksz) {
++			if (flags & DNODE_FIND_HOLE)
++				*offset = dn->dn_datablksz;
++		} else {
++			error = ESRCH;
++		}
++		goto out;
++	}
++
++	maxlvl = dn->dn_phys->dn_nlevels;
++
++	for (lvl = minlvl; lvl <= maxlvl; lvl++) {
++		error = dnode_next_offset_level(dn,
++		    flags, offset, lvl, blkfill, txg);
++		if (error != ESRCH)
++			break;
++	}
++
++	while (error == 0 && --lvl >= minlvl) {
++		error = dnode_next_offset_level(dn,
++		    flags, offset, lvl, blkfill, txg);
++	}
++
++	if (error == 0 && (flags & DNODE_FIND_BACKWARDS ?
++	    initial_offset < *offset : initial_offset > *offset))
++		error = ESRCH;
++out:
++	if (!(flags & DNODE_FIND_HAVELOCK))
++		rw_exit(&dn->dn_struct_rwlock);
++
++	return (error);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dnode_sync.c linux-3.2.33-go/fs/zfs/zfs/dnode_sync.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dnode_sync.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dnode_sync.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,697 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/dbuf.h>
++#include <sys/dnode.h>
++#include <sys/dmu.h>
++#include <sys/dmu_tx.h>
++#include <sys/dmu_objset.h>
++#include <sys/dsl_dataset.h>
++#include <sys/spa.h>
++
++static void
++dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
++{
++	dmu_buf_impl_t *db;
++	int txgoff = tx->tx_txg & TXG_MASK;
++	int nblkptr = dn->dn_phys->dn_nblkptr;
++	int old_toplvl = dn->dn_phys->dn_nlevels - 1;
++	int new_level = dn->dn_next_nlevels[txgoff];
++	int i;
++
++	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
++
++	/* this dnode can't be paged out because it's dirty */
++	ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
++	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
++	ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0);
++
++	db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG);
++	ASSERT(db != NULL);
++
++	dn->dn_phys->dn_nlevels = new_level;
++	dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset,
++	    dn->dn_object, dn->dn_phys->dn_nlevels);
++
++	/* check for existing blkptrs in the dnode */
++	for (i = 0; i < nblkptr; i++)
++		if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i]))
++			break;
++	if (i != nblkptr) {
++		/* transfer dnode's block pointers to new indirect block */
++		(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
++		ASSERT(db->db.db_data);
++		ASSERT(arc_released(db->db_buf));
++		ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
++		bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
++		    sizeof (blkptr_t) * nblkptr);
++		arc_buf_freeze(db->db_buf);
++	}
++
++	/* set dbuf's parent pointers to new indirect buf */
++	for (i = 0; i < nblkptr; i++) {
++		dmu_buf_impl_t *child = dbuf_find(dn, old_toplvl, i);
++
++		if (child == NULL)
++			continue;
++#ifdef	DEBUG
++		DB_DNODE_ENTER(child);
++		ASSERT3P(DB_DNODE(child), ==, dn);
++		DB_DNODE_EXIT(child);
++#endif	/* DEBUG */
++		if (child->db_parent && child->db_parent != dn->dn_dbuf) {
++			ASSERT(child->db_parent->db_level == db->db_level);
++			ASSERT(child->db_blkptr !=
++			    &dn->dn_phys->dn_blkptr[child->db_blkid]);
++			mutex_exit(&child->db_mtx);
++			continue;
++		}
++		ASSERT(child->db_parent == NULL ||
++		    child->db_parent == dn->dn_dbuf);
++
++		child->db_parent = db;
++		dbuf_add_ref(db, child);
++		if (db->db.db_data)
++			child->db_blkptr = (blkptr_t *)db->db.db_data + i;
++		else
++			child->db_blkptr = NULL;
++		dprintf_dbuf_bp(child, child->db_blkptr,
++		    "changed db_blkptr to new indirect %s", "");
++
++		mutex_exit(&child->db_mtx);
++	}
++
++	bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr);
++
++	dbuf_rele(db, FTAG);
++
++	rw_exit(&dn->dn_struct_rwlock);
++}
++
++static int
++free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
++	uint64_t bytesfreed = 0;
++	int i, blocks_freed = 0;
++
++	dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num);
++
++	for (i = 0; i < num; i++, bp++) {
++		if (BP_IS_HOLE(bp))
++			continue;
++
++		bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE);
++		ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));
++		bzero(bp, sizeof (blkptr_t));
++		blocks_freed += 1;
++	}
++	dnode_diduse_space(dn, -bytesfreed);
++	return (blocks_freed);
++}
++
++#ifdef ZFS_DEBUG
++static void
++free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
++{
++	int off, num;
++	int i, err, epbs;
++	uint64_t txg = tx->tx_txg;
++	dnode_t *dn;
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
++	off = start - (db->db_blkid * 1<<epbs);
++	num = end - start + 1;
++
++	ASSERT3U(off, >=, 0);
++	ASSERT3U(num, >=, 0);
++	ASSERT3U(db->db_level, >, 0);
++	ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
++	ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT);
++	ASSERT(db->db_blkptr != NULL);
++
++	for (i = off; i < off+num; i++) {
++		uint64_t *buf;
++		dmu_buf_impl_t *child;
++		dbuf_dirty_record_t *dr;
++		int j;
++
++		ASSERT(db->db_level == 1);
++
++		rw_enter(&dn->dn_struct_rwlock, RW_READER);
++		err = dbuf_hold_impl(dn, db->db_level-1,
++		    (db->db_blkid << epbs) + i, TRUE, FTAG, &child);
++		rw_exit(&dn->dn_struct_rwlock);
++		if (err == ENOENT)
++			continue;
++		ASSERT(err == 0);
++		ASSERT(child->db_level == 0);
++		dr = child->db_last_dirty;
++		while (dr && dr->dr_txg > txg)
++			dr = dr->dr_next;
++		ASSERT(dr == NULL || dr->dr_txg == txg);
++
++		/* data_old better be zeroed */
++		if (dr) {
++			buf = dr->dt.dl.dr_data->b_data;
++			for (j = 0; j < child->db.db_size >> 3; j++) {
++				if (buf[j] != 0) {
++					panic("freed data not zero: "
++					    "child=%p i=%d off=%d num=%d\n",
++					    (void *)child, i, off, num);
++				}
++			}
++		}
++
++		/*
++		 * db_data better be zeroed unless it's dirty in a
++		 * future txg.
++		 */
++		mutex_enter(&child->db_mtx);
++		buf = child->db.db_data;
++		if (buf != NULL && child->db_state != DB_FILL &&
++		    child->db_last_dirty == NULL) {
++			for (j = 0; j < child->db.db_size >> 3; j++) {
++				if (buf[j] != 0) {
++					panic("freed data not zero: "
++					    "child=%p i=%d off=%d num=%d\n",
++					    (void *)child, i, off, num);
++				}
++			}
++		}
++		mutex_exit(&child->db_mtx);
++
++		dbuf_rele(child, FTAG);
++	}
++	DB_DNODE_EXIT(db);
++}
++#endif
++
++#define	ALL -1
++
++static int
++free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
++    dmu_tx_t *tx)
++{
++	dnode_t *dn;
++	blkptr_t *bp;
++	dmu_buf_impl_t *subdb;
++	uint64_t start, end, dbstart, dbend, i;
++	int epbs, shift, err;
++	int all = TRUE;
++	int blocks_freed = 0;
++
++	/*
++	 * There is a small possibility that this block will not be cached:
++	 *   1 - if level > 1 and there are no children with level <= 1
++	 *   2 - if we didn't get a dirty hold (because this block had just
++	 *	 finished being written -- and so had no holds), and then this
++	 *	 block got evicted before we got here.
++	 */
++	if (db->db_state != DB_CACHED)
++		(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
++
++	dbuf_release_bp(db);
++	bp = (blkptr_t *)db->db.db_data;
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
++	shift = (db->db_level - 1) * epbs;
++	dbstart = db->db_blkid << epbs;
++	start = blkid >> shift;
++	if (dbstart < start) {
++		bp += start - dbstart;
++		all = FALSE;
++	} else {
++		start = dbstart;
++	}
++	dbend = ((db->db_blkid + 1) << epbs) - 1;
++	end = (blkid + nblks - 1) >> shift;
++	if (dbend <= end)
++		end = dbend;
++	else if (all)
++		all = trunc;
++	ASSERT3U(start, <=, end);
++
++	if (db->db_level == 1) {
++		FREE_VERIFY(db, start, end, tx);
++		blocks_freed = free_blocks(dn, bp, end-start+1, tx);
++		arc_buf_freeze(db->db_buf);
++		ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
++		DB_DNODE_EXIT(db);
++		return (all ? ALL : blocks_freed);
++	}
++
++	for (i = start; i <= end; i++, bp++) {
++		if (BP_IS_HOLE(bp))
++			continue;
++		rw_enter(&dn->dn_struct_rwlock, RW_READER);
++		err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb);
++		ASSERT3U(err, ==, 0);
++		rw_exit(&dn->dn_struct_rwlock);
++
++		if (free_children(subdb, blkid, nblks, trunc, tx) == ALL) {
++			ASSERT3P(subdb->db_blkptr, ==, bp);
++			blocks_freed += free_blocks(dn, bp, 1, tx);
++		} else {
++			all = FALSE;
++		}
++		dbuf_rele(subdb, FTAG);
++	}
++	DB_DNODE_EXIT(db);
++	arc_buf_freeze(db->db_buf);
++#ifdef ZFS_DEBUG
++	bp -= (end-start)+1;
++	for (i = start; i <= end; i++, bp++) {
++		if (i == start && blkid != 0)
++			continue;
++		else if (i == end && !trunc)
++			continue;
++		ASSERT3U(bp->blk_birth, ==, 0);
++	}
++#endif
++	ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
++	return (all ? ALL : blocks_freed);
++}
++
++/*
++ * free_range: Traverse the indicated range of the provided file
++ * and "free" all the blocks contained there.
++ */
++static void
++dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
++{
++	blkptr_t *bp = dn->dn_phys->dn_blkptr;
++	dmu_buf_impl_t *db;
++	int trunc, start, end, shift, i, err;
++	int dnlevel = dn->dn_phys->dn_nlevels;
++
++	if (blkid > dn->dn_phys->dn_maxblkid)
++		return;
++
++	ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX);
++	trunc = blkid + nblks > dn->dn_phys->dn_maxblkid;
++	if (trunc)
++		nblks = dn->dn_phys->dn_maxblkid - blkid + 1;
++
++	/* There are no indirect blocks in the object */
++	if (dnlevel == 1) {
++		if (blkid >= dn->dn_phys->dn_nblkptr) {
++			/* this range was never made persistent */
++			return;
++		}
++		ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr);
++		(void) free_blocks(dn, bp + blkid, nblks, tx);
++		if (trunc) {
++			ASSERTV(uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
++			    (dn->dn_phys->dn_datablkszsec<<SPA_MINBLOCKSHIFT));
++			dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
++			ASSERT(off < dn->dn_phys->dn_maxblkid ||
++			    dn->dn_phys->dn_maxblkid == 0 ||
++			    dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
++		}
++		return;
++	}
++
++	shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT);
++	start = blkid >> shift;
++	ASSERT(start < dn->dn_phys->dn_nblkptr);
++	end = (blkid + nblks - 1) >> shift;
++	bp += start;
++	for (i = start; i <= end; i++, bp++) {
++		if (BP_IS_HOLE(bp))
++			continue;
++		rw_enter(&dn->dn_struct_rwlock, RW_READER);
++		err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db);
++		ASSERT3U(err, ==, 0);
++		rw_exit(&dn->dn_struct_rwlock);
++
++		if (free_children(db, blkid, nblks, trunc, tx) == ALL) {
++			ASSERT3P(db->db_blkptr, ==, bp);
++			(void) free_blocks(dn, bp, 1, tx);
++		}
++		dbuf_rele(db, FTAG);
++	}
++	if (trunc) {
++		ASSERTV(uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
++		    (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT));
++		dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
++		ASSERT(off < dn->dn_phys->dn_maxblkid ||
++		    dn->dn_phys->dn_maxblkid == 0 ||
++		    dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
++	}
++}
++
++/*
++ * Try to kick all the dnodes dbufs out of the cache...
++ */
++void
++dnode_evict_dbufs(dnode_t *dn)
++{
++	int progress;
++	int pass = 0;
++
++	do {
++		dmu_buf_impl_t *db, marker;
++		int evicting = FALSE;
++
++		progress = FALSE;
++		mutex_enter(&dn->dn_dbufs_mtx);
++		list_insert_tail(&dn->dn_dbufs, &marker);
++		db = list_head(&dn->dn_dbufs);
++		for (; db != &marker; db = list_head(&dn->dn_dbufs)) {
++			list_remove(&dn->dn_dbufs, db);
++			list_insert_tail(&dn->dn_dbufs, db);
++#ifdef	DEBUG
++			DB_DNODE_ENTER(db);
++			ASSERT3P(DB_DNODE(db), ==, dn);
++			DB_DNODE_EXIT(db);
++#endif	/* DEBUG */
++
++			mutex_enter(&db->db_mtx);
++			if (db->db_state == DB_EVICTING) {
++				progress = TRUE;
++				evicting = TRUE;
++				mutex_exit(&db->db_mtx);
++			} else if (refcount_is_zero(&db->db_holds)) {
++				progress = TRUE;
++				dbuf_clear(db); /* exits db_mtx for us */
++			} else {
++				mutex_exit(&db->db_mtx);
++			}
++
++		}
++		list_remove(&dn->dn_dbufs, &marker);
++		/*
++		 * NB: we need to drop dn_dbufs_mtx between passes so
++		 * that any DB_EVICTING dbufs can make progress.
++		 * Ideally, we would have some cv we could wait on, but
++		 * since we don't, just wait a bit to give the other
++		 * thread a chance to run.
++		 */
++		mutex_exit(&dn->dn_dbufs_mtx);
++		if (evicting)
++			delay(1);
++		pass++;
++		if ((pass % 100) == 0)
++			dprintf("Exceeded %d passes evicting dbufs\n", pass);
++	} while (progress);
++
++	if (pass >= 100)
++		dprintf("Required %d passes to evict dbufs\n", pass);
++
++	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
++	if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
++		mutex_enter(&dn->dn_bonus->db_mtx);
++		dbuf_evict(dn->dn_bonus);
++		dn->dn_bonus = NULL;
++	}
++	rw_exit(&dn->dn_struct_rwlock);
++}
++
++static void
++dnode_undirty_dbufs(list_t *list)
++{
++	dbuf_dirty_record_t *dr;
++
++	while ((dr = list_head(list))) {
++		dmu_buf_impl_t *db = dr->dr_dbuf;
++		uint64_t txg = dr->dr_txg;
++
++		if (db->db_level != 0)
++			dnode_undirty_dbufs(&dr->dt.di.dr_children);
++
++		mutex_enter(&db->db_mtx);
++		/* XXX - use dbuf_undirty()? */
++		list_remove(list, dr);
++		ASSERT(db->db_last_dirty == dr);
++		db->db_last_dirty = NULL;
++		db->db_dirtycnt -= 1;
++		if (db->db_level == 0) {
++			ASSERT(db->db_blkid == DMU_BONUS_BLKID ||
++			    dr->dt.dl.dr_data == db->db_buf);
++			dbuf_unoverride(dr);
++		}
++		kmem_free(dr, sizeof (dbuf_dirty_record_t));
++		dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
++	}
++}
++
++static void
++dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
++{
++	int txgoff = tx->tx_txg & TXG_MASK;
++
++	ASSERT(dmu_tx_is_syncing(tx));
++
++	/*
++	 * Our contents should have been freed in dnode_sync() by the
++	 * free range record inserted by the caller of dnode_free().
++	 */
++	ASSERT3U(DN_USED_BYTES(dn->dn_phys), ==, 0);
++	ASSERT(BP_IS_HOLE(dn->dn_phys->dn_blkptr));
++
++	dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
++	dnode_evict_dbufs(dn);
++	ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
++
++	/*
++	 * XXX - It would be nice to assert this, but we may still
++	 * have residual holds from async evictions from the arc...
++	 *
++	 * zfs_obj_to_path() also depends on this being
++	 * commented out.
++	 *
++	 * ASSERT3U(refcount_count(&dn->dn_holds), ==, 1);
++	 */
++
++	/* Undirty next bits */
++	dn->dn_next_nlevels[txgoff] = 0;
++	dn->dn_next_indblkshift[txgoff] = 0;
++	dn->dn_next_blksz[txgoff] = 0;
++
++	/* ASSERT(blkptrs are zero); */
++	ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
++	ASSERT(dn->dn_type != DMU_OT_NONE);
++
++	ASSERT(dn->dn_free_txg > 0);
++	if (dn->dn_allocated_txg != dn->dn_free_txg)
++		dbuf_will_dirty(dn->dn_dbuf, tx);
++	bzero(dn->dn_phys, sizeof (dnode_phys_t));
++
++	mutex_enter(&dn->dn_mtx);
++	dn->dn_type = DMU_OT_NONE;
++	dn->dn_maxblkid = 0;
++	dn->dn_allocated_txg = 0;
++	dn->dn_free_txg = 0;
++	dn->dn_have_spill = B_FALSE;
++	mutex_exit(&dn->dn_mtx);
++
++	ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
++
++	dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
++	/*
++	 * Now that we've released our hold, the dnode may
++	 * be evicted, so we musn't access it.
++	 */
++}
++
++/*
++ * Write out the dnode's dirty buffers.
++ */
++void
++dnode_sync(dnode_t *dn, dmu_tx_t *tx)
++{
++	free_range_t *rp;
++	dnode_phys_t *dnp = dn->dn_phys;
++	int txgoff = tx->tx_txg & TXG_MASK;
++	list_t *list = &dn->dn_dirty_records[txgoff];
++	boolean_t kill_spill = B_FALSE;
++	ASSERTV(static const dnode_phys_t zerodn = { 0 });
++
++	ASSERT(dmu_tx_is_syncing(tx));
++	ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg);
++	ASSERT(dnp->dn_type != DMU_OT_NONE ||
++	    bcmp(dnp, &zerodn, DNODE_SIZE) == 0);
++	DNODE_VERIFY(dn);
++
++	ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf));
++
++	if (dmu_objset_userused_enabled(dn->dn_objset) &&
++	    !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
++		mutex_enter(&dn->dn_mtx);
++		dn->dn_oldused = DN_USED_BYTES(dn->dn_phys);
++		dn->dn_oldflags = dn->dn_phys->dn_flags;
++		dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED;
++		mutex_exit(&dn->dn_mtx);
++		dmu_objset_userquota_get_ids(dn, B_FALSE, tx);
++	} else {
++		/* Once we account for it, we should always account for it. */
++		ASSERT(!(dn->dn_phys->dn_flags &
++		    DNODE_FLAG_USERUSED_ACCOUNTED));
++	}
++
++	mutex_enter(&dn->dn_mtx);
++	if (dn->dn_allocated_txg == tx->tx_txg) {
++		/* The dnode is newly allocated or reallocated */
++		if (dnp->dn_type == DMU_OT_NONE) {
++			/* this is a first alloc, not a realloc */
++			dnp->dn_nlevels = 1;
++			dnp->dn_nblkptr = dn->dn_nblkptr;
++		}
++
++		dnp->dn_type = dn->dn_type;
++		dnp->dn_bonustype = dn->dn_bonustype;
++		dnp->dn_bonuslen = dn->dn_bonuslen;
++	}
++
++	ASSERT(dnp->dn_nlevels > 1 ||
++	    BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
++	    BP_GET_LSIZE(&dnp->dn_blkptr[0]) ==
++	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
++
++	if (dn->dn_next_blksz[txgoff]) {
++		ASSERT(P2PHASE(dn->dn_next_blksz[txgoff],
++		    SPA_MINBLOCKSIZE) == 0);
++		ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
++		    dn->dn_maxblkid == 0 || list_head(list) != NULL ||
++		    avl_last(&dn->dn_ranges[txgoff]) ||
++		    dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT ==
++		    dnp->dn_datablkszsec);
++		dnp->dn_datablkszsec =
++		    dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT;
++		dn->dn_next_blksz[txgoff] = 0;
++	}
++
++	if (dn->dn_next_bonuslen[txgoff]) {
++		if (dn->dn_next_bonuslen[txgoff] == DN_ZERO_BONUSLEN)
++			dnp->dn_bonuslen = 0;
++		else
++			dnp->dn_bonuslen = dn->dn_next_bonuslen[txgoff];
++		ASSERT(dnp->dn_bonuslen <= DN_MAX_BONUSLEN);
++		dn->dn_next_bonuslen[txgoff] = 0;
++	}
++
++	if (dn->dn_next_bonustype[txgoff]) {
++		ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES);
++		dnp->dn_bonustype = dn->dn_next_bonustype[txgoff];
++		dn->dn_next_bonustype[txgoff] = 0;
++	}
++
++	/*
++	 * We will either remove a spill block when a file is being removed
++	 * or we have been asked to remove it.
++	 */
++	if (dn->dn_rm_spillblk[txgoff] ||
++	    ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
++	    dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg)) {
++		if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR))
++			kill_spill = B_TRUE;
++		dn->dn_rm_spillblk[txgoff] = 0;
++	}
++
++	if (dn->dn_next_indblkshift[txgoff]) {
++		ASSERT(dnp->dn_nlevels == 1);
++		dnp->dn_indblkshift = dn->dn_next_indblkshift[txgoff];
++		dn->dn_next_indblkshift[txgoff] = 0;
++	}
++
++	/*
++	 * Just take the live (open-context) values for checksum and compress.
++	 * Strictly speaking it's a future leak, but nothing bad happens if we
++	 * start using the new checksum or compress algorithm a little early.
++	 */
++	dnp->dn_checksum = dn->dn_checksum;
++	dnp->dn_compress = dn->dn_compress;
++
++	mutex_exit(&dn->dn_mtx);
++
++	if (kill_spill) {
++		(void) free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
++		mutex_enter(&dn->dn_mtx);
++		dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR;
++		mutex_exit(&dn->dn_mtx);
++	}
++
++	/* process all the "freed" ranges in the file */
++	while ((rp = avl_last(&dn->dn_ranges[txgoff]))) {
++		dnode_sync_free_range(dn, rp->fr_blkid, rp->fr_nblks, tx);
++		/* grab the mutex so we don't race with dnode_block_freed() */
++		mutex_enter(&dn->dn_mtx);
++		avl_remove(&dn->dn_ranges[txgoff], rp);
++		mutex_exit(&dn->dn_mtx);
++		kmem_free(rp, sizeof (free_range_t));
++	}
++
++	if (dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg) {
++		dnode_sync_free(dn, tx);
++		return;
++	}
++
++	if (dn->dn_next_nblkptr[txgoff]) {
++		/* this should only happen on a realloc */
++		ASSERT(dn->dn_allocated_txg == tx->tx_txg);
++		if (dn->dn_next_nblkptr[txgoff] > dnp->dn_nblkptr) {
++			/* zero the new blkptrs we are gaining */
++			bzero(dnp->dn_blkptr + dnp->dn_nblkptr,
++			    sizeof (blkptr_t) *
++			    (dn->dn_next_nblkptr[txgoff] - dnp->dn_nblkptr));
++#ifdef ZFS_DEBUG
++		} else {
++			int i;
++			ASSERT(dn->dn_next_nblkptr[txgoff] < dnp->dn_nblkptr);
++			/* the blkptrs we are losing better be unallocated */
++			for (i = dn->dn_next_nblkptr[txgoff];
++			    i < dnp->dn_nblkptr; i++)
++				ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[i]));
++#endif
++		}
++		mutex_enter(&dn->dn_mtx);
++		dnp->dn_nblkptr = dn->dn_next_nblkptr[txgoff];
++		dn->dn_next_nblkptr[txgoff] = 0;
++		mutex_exit(&dn->dn_mtx);
++	}
++
++	if (dn->dn_next_nlevels[txgoff]) {
++		dnode_increase_indirection(dn, tx);
++		dn->dn_next_nlevels[txgoff] = 0;
++	}
++
++	dbuf_sync_list(list, tx);
++
++	if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
++		ASSERT3P(list_head(list), ==, NULL);
++		dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
++	}
++
++	/*
++	 * Although we have dropped our reference to the dnode, it
++	 * can't be evicted until its written, and we haven't yet
++	 * initiated the IO for the dnode's dbuf.
++	 */
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dsl_dataset.c linux-3.2.33-go/fs/zfs/zfs/dsl_dataset.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dsl_dataset.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dsl_dataset.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,4314 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
++ */
++
++#include <sys/dmu_objset.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_prop.h>
++#include <sys/dsl_synctask.h>
++#include <sys/dmu_traverse.h>
++#include <sys/dmu_impl.h>
++#include <sys/dmu_tx.h>
++#include <sys/arc.h>
++#include <sys/zio.h>
++#include <sys/zap.h>
++#include <sys/unique.h>
++#include <sys/zfs_context.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/spa.h>
++#include <sys/zfs_znode.h>
++#include <sys/zfs_onexit.h>
++#include <sys/zvol.h>
++#include <sys/dsl_scan.h>
++#include <sys/dsl_deadlist.h>
++
++static char *dsl_reaper = "the grim reaper";
++
++static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
++static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
++static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
++
++#define	SWITCH64(x, y) \
++	{ \
++		uint64_t __tmp = (x); \
++		(x) = (y); \
++		(y) = __tmp; \
++	}
++
++#define	DS_REF_MAX	(1ULL << 62)
++
++#define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
++
++#define	DSL_DATASET_IS_DESTROYED(ds)	((ds)->ds_owner == dsl_reaper)
++
++
++/*
++ * Figure out how much of this delta should be propogated to the dsl_dir
++ * layer.  If there's a refreservation, that space has already been
++ * partially accounted for in our ancestors.
++ */
++static int64_t
++parent_delta(dsl_dataset_t *ds, int64_t delta)
++{
++	uint64_t old_bytes, new_bytes;
++
++	if (ds->ds_reserved == 0)
++		return (delta);
++
++	old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
++	new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
++
++	ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
++	return (new_bytes - old_bytes);
++}
++
++void
++dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
++{
++	int used, compressed, uncompressed;
++	int64_t delta;
++
++	used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
++	compressed = BP_GET_PSIZE(bp);
++	uncompressed = BP_GET_UCSIZE(bp);
++
++	dprintf_bp(bp, "ds=%p", ds);
++
++	ASSERT(dmu_tx_is_syncing(tx));
++	/* It could have been compressed away to nothing */
++	if (BP_IS_HOLE(bp))
++		return;
++	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
++	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
++	if (ds == NULL) {
++		/*
++		 * Account for the meta-objset space in its placeholder
++		 * dsl_dir.
++		 */
++		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
++		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
++		    used, compressed, uncompressed, tx);
++		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
++		return;
++	}
++	dmu_buf_will_dirty(ds->ds_dbuf, tx);
++
++	mutex_enter(&ds->ds_dir->dd_lock);
++	mutex_enter(&ds->ds_lock);
++	delta = parent_delta(ds, used);
++	ds->ds_phys->ds_used_bytes += used;
++	ds->ds_phys->ds_compressed_bytes += compressed;
++	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
++	ds->ds_phys->ds_unique_bytes += used;
++	mutex_exit(&ds->ds_lock);
++	dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
++	    compressed, uncompressed, tx);
++	dsl_dir_transfer_space(ds->ds_dir, used - delta,
++	    DD_USED_REFRSRV, DD_USED_HEAD, tx);
++	mutex_exit(&ds->ds_dir->dd_lock);
++}
++
++int
++dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
++    boolean_t async)
++{
++	int used, compressed, uncompressed;
++
++	if (BP_IS_HOLE(bp))
++		return (0);
++
++	ASSERT(dmu_tx_is_syncing(tx));
++	ASSERT(bp->blk_birth <= tx->tx_txg);
++
++	used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
++	compressed = BP_GET_PSIZE(bp);
++	uncompressed = BP_GET_UCSIZE(bp);
++
++	ASSERT(used > 0);
++	if (ds == NULL) {
++		/*
++		 * Account for the meta-objset space in its placeholder
++		 * dataset.
++		 */
++		dsl_free(tx->tx_pool, tx->tx_txg, bp);
++
++		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
++		    -used, -compressed, -uncompressed, tx);
++		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
++		return (used);
++	}
++	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
++
++	ASSERT(!dsl_dataset_is_snapshot(ds));
++	dmu_buf_will_dirty(ds->ds_dbuf, tx);
++
++	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
++		int64_t delta;
++
++		dprintf_bp(bp, "freeing ds=%llu", ds->ds_object);
++		dsl_free(tx->tx_pool, tx->tx_txg, bp);
++
++		mutex_enter(&ds->ds_dir->dd_lock);
++		mutex_enter(&ds->ds_lock);
++		ASSERT(ds->ds_phys->ds_unique_bytes >= used ||
++		    !DS_UNIQUE_IS_ACCURATE(ds));
++		delta = parent_delta(ds, -used);
++		ds->ds_phys->ds_unique_bytes -= used;
++		mutex_exit(&ds->ds_lock);
++		dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
++		    delta, -compressed, -uncompressed, tx);
++		dsl_dir_transfer_space(ds->ds_dir, -used - delta,
++		    DD_USED_REFRSRV, DD_USED_HEAD, tx);
++		mutex_exit(&ds->ds_dir->dd_lock);
++	} else {
++		dprintf_bp(bp, "putting on dead list: %s", "");
++		if (async) {
++			/*
++			 * We are here as part of zio's write done callback,
++			 * which means we're a zio interrupt thread.  We can't
++			 * call dsl_deadlist_insert() now because it may block
++			 * waiting for I/O.  Instead, put bp on the deferred
++			 * queue and let dsl_pool_sync() finish the job.
++			 */
++			bplist_append(&ds->ds_pending_deadlist, bp);
++		} else {
++			dsl_deadlist_insert(&ds->ds_deadlist, bp, tx);
++		}
++		ASSERT3U(ds->ds_prev->ds_object, ==,
++		    ds->ds_phys->ds_prev_snap_obj);
++		ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
++		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
++		if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
++		    ds->ds_object && bp->blk_birth >
++		    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
++			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
++			mutex_enter(&ds->ds_prev->ds_lock);
++			ds->ds_prev->ds_phys->ds_unique_bytes += used;
++			mutex_exit(&ds->ds_prev->ds_lock);
++		}
++		if (bp->blk_birth > ds->ds_dir->dd_origin_txg) {
++			dsl_dir_transfer_space(ds->ds_dir, used,
++			    DD_USED_HEAD, DD_USED_SNAP, tx);
++		}
++	}
++	mutex_enter(&ds->ds_lock);
++	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
++	ds->ds_phys->ds_used_bytes -= used;
++	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
++	ds->ds_phys->ds_compressed_bytes -= compressed;
++	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
++	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
++	mutex_exit(&ds->ds_lock);
++
++	return (used);
++}
++
++uint64_t
++dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
++{
++	uint64_t trysnap = 0;
++
++	if (ds == NULL)
++		return (0);
++	/*
++	 * The snapshot creation could fail, but that would cause an
++	 * incorrect FALSE return, which would only result in an
++	 * overestimation of the amount of space that an operation would
++	 * consume, which is OK.
++	 *
++	 * There's also a small window where we could miss a pending
++	 * snapshot, because we could set the sync task in the quiescing
++	 * phase.  So this should only be used as a guess.
++	 */
++	if (ds->ds_trysnap_txg >
++	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
++		trysnap = ds->ds_trysnap_txg;
++	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
++}
++
++boolean_t
++dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
++    uint64_t blk_birth)
++{
++	if (blk_birth <= dsl_dataset_prev_snap_txg(ds))
++		return (B_FALSE);
++
++	ddt_prefetch(dsl_dataset_get_spa(ds), bp);
++
++	return (B_TRUE);
++}
++
++/* ARGSUSED */
++static void
++dsl_dataset_evict(dmu_buf_t *db, void *dsv)
++{
++	dsl_dataset_t *ds = dsv;
++
++	ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds));
++
++	unique_remove(ds->ds_fsid_guid);
++
++	if (ds->ds_objset != NULL)
++		dmu_objset_evict(ds->ds_objset);
++
++	if (ds->ds_prev) {
++		dsl_dataset_drop_ref(ds->ds_prev, ds);
++		ds->ds_prev = NULL;
++	}
++
++	bplist_destroy(&ds->ds_pending_deadlist);
++	if (db != NULL) {
++		dsl_deadlist_close(&ds->ds_deadlist);
++	} else {
++		ASSERT(ds->ds_deadlist.dl_dbuf == NULL);
++		ASSERT(!ds->ds_deadlist.dl_oldfmt);
++	}
++	if (ds->ds_dir)
++		dsl_dir_close(ds->ds_dir, ds);
++
++	ASSERT(!list_link_active(&ds->ds_synced_link));
++
++	mutex_destroy(&ds->ds_lock);
++	mutex_destroy(&ds->ds_recvlock);
++	mutex_destroy(&ds->ds_opening_lock);
++	rw_destroy(&ds->ds_rwlock);
++	cv_destroy(&ds->ds_exclusive_cv);
++
++	kmem_free(ds, sizeof (dsl_dataset_t));
++}
++
++static int
++dsl_dataset_get_snapname(dsl_dataset_t *ds)
++{
++	dsl_dataset_phys_t *headphys;
++	int err;
++	dmu_buf_t *headdbuf;
++	dsl_pool_t *dp = ds->ds_dir->dd_pool;
++	objset_t *mos = dp->dp_meta_objset;
++
++	if (ds->ds_snapname[0])
++		return (0);
++	if (ds->ds_phys->ds_next_snap_obj == 0)
++		return (0);
++
++	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
++	    FTAG, &headdbuf);
++	if (err)
++		return (err);
++	headphys = headdbuf->db_data;
++	err = zap_value_search(dp->dp_meta_objset,
++	    headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname);
++	dmu_buf_rele(headdbuf, FTAG);
++	return (err);
++}
++
++static int
++dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
++{
++	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
++	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
++	matchtype_t mt;
++	int err;
++
++	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
++		mt = MT_FIRST;
++	else
++		mt = MT_EXACT;
++
++	err = zap_lookup_norm(mos, snapobj, name, 8, 1,
++	    value, mt, NULL, 0, NULL);
++	if (err == ENOTSUP && mt == MT_FIRST)
++		err = zap_lookup(mos, snapobj, name, 8, 1, value);
++	return (err);
++}
++
++static int
++dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
++{
++	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
++	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
++	matchtype_t mt;
++	int err;
++
++	dsl_dir_snap_cmtime_update(ds->ds_dir);
++
++	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
++		mt = MT_FIRST;
++	else
++		mt = MT_EXACT;
++
++	err = zap_remove_norm(mos, snapobj, name, mt, tx);
++	if (err == ENOTSUP && mt == MT_FIRST)
++		err = zap_remove(mos, snapobj, name, tx);
++	return (err);
++}
++
++static int
++dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
++    dsl_dataset_t **dsp)
++{
++	objset_t *mos = dp->dp_meta_objset;
++	dmu_buf_t *dbuf;
++	dsl_dataset_t *ds;
++	int err;
++	dmu_object_info_t doi;
++
++	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
++	    dsl_pool_sync_context(dp));
++
++	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
++	if (err)
++		return (err);
++
++	/* Make sure dsobj has the correct object type. */
++	dmu_object_info_from_db(dbuf, &doi);
++	if (doi.doi_type != DMU_OT_DSL_DATASET)
++		return (EINVAL);
++
++	ds = dmu_buf_get_user(dbuf);
++	if (ds == NULL) {
++		dsl_dataset_t *winner = NULL;
++
++		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_PUSHPAGE);
++		ds->ds_dbuf = dbuf;
++		ds->ds_object = dsobj;
++		ds->ds_phys = dbuf->db_data;
++		list_link_init(&ds->ds_synced_link);
++
++		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
++		mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL);
++		mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
++		mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
++
++		rw_init(&ds->ds_rwlock, NULL, RW_DEFAULT, NULL);
++		cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
++
++		bplist_create(&ds->ds_pending_deadlist);
++		dsl_deadlist_open(&ds->ds_deadlist,
++		    mos, ds->ds_phys->ds_deadlist_obj);
++
++		list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
++		    offsetof(dmu_sendarg_t, dsa_link));
++
++		if (err == 0) {
++			err = dsl_dir_open_obj(dp,
++			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
++		}
++		if (err) {
++			mutex_destroy(&ds->ds_lock);
++			mutex_destroy(&ds->ds_recvlock);
++			mutex_destroy(&ds->ds_opening_lock);
++			rw_destroy(&ds->ds_rwlock);
++			cv_destroy(&ds->ds_exclusive_cv);
++			bplist_destroy(&ds->ds_pending_deadlist);
++			dsl_deadlist_close(&ds->ds_deadlist);
++			kmem_free(ds, sizeof (dsl_dataset_t));
++			dmu_buf_rele(dbuf, tag);
++			return (err);
++		}
++
++		if (!dsl_dataset_is_snapshot(ds)) {
++			ds->ds_snapname[0] = '\0';
++			if (ds->ds_phys->ds_prev_snap_obj) {
++				err = dsl_dataset_get_ref(dp,
++				    ds->ds_phys->ds_prev_snap_obj,
++				    ds, &ds->ds_prev);
++			}
++		} else {
++			if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
++				err = dsl_dataset_get_snapname(ds);
++			if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) {
++				err = zap_count(
++				    ds->ds_dir->dd_pool->dp_meta_objset,
++				    ds->ds_phys->ds_userrefs_obj,
++				    &ds->ds_userrefs);
++			}
++		}
++
++		if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
++			/*
++			 * In sync context, we're called with either no lock
++			 * or with the write lock.  If we're not syncing,
++			 * we're always called with the read lock held.
++			 */
++			boolean_t need_lock =
++			    !RW_WRITE_HELD(&dp->dp_config_rwlock) &&
++			    dsl_pool_sync_context(dp);
++
++			if (need_lock)
++				rw_enter(&dp->dp_config_rwlock, RW_READER);
++
++			err = dsl_prop_get_ds(ds,
++			    "refreservation", sizeof (uint64_t), 1,
++			    &ds->ds_reserved, NULL);
++			if (err == 0) {
++				err = dsl_prop_get_ds(ds,
++				    "refquota", sizeof (uint64_t), 1,
++				    &ds->ds_quota, NULL);
++			}
++
++			if (need_lock)
++				rw_exit(&dp->dp_config_rwlock);
++		} else {
++			ds->ds_reserved = ds->ds_quota = 0;
++		}
++
++		if (err == 0) {
++			winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
++			    dsl_dataset_evict);
++		}
++		if (err || winner) {
++			bplist_destroy(&ds->ds_pending_deadlist);
++			dsl_deadlist_close(&ds->ds_deadlist);
++			if (ds->ds_prev)
++				dsl_dataset_drop_ref(ds->ds_prev, ds);
++			dsl_dir_close(ds->ds_dir, ds);
++			mutex_destroy(&ds->ds_lock);
++			mutex_destroy(&ds->ds_recvlock);
++			mutex_destroy(&ds->ds_opening_lock);
++			rw_destroy(&ds->ds_rwlock);
++			cv_destroy(&ds->ds_exclusive_cv);
++			kmem_free(ds, sizeof (dsl_dataset_t));
++			if (err) {
++				dmu_buf_rele(dbuf, tag);
++				return (err);
++			}
++			ds = winner;
++		} else {
++			ds->ds_fsid_guid =
++			    unique_insert(ds->ds_phys->ds_fsid_guid);
++		}
++	}
++	ASSERT3P(ds->ds_dbuf, ==, dbuf);
++	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
++	ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 ||
++	    spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
++	    dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
++	mutex_enter(&ds->ds_lock);
++	if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) {
++		mutex_exit(&ds->ds_lock);
++		dmu_buf_rele(ds->ds_dbuf, tag);
++		return (ENOENT);
++	}
++	mutex_exit(&ds->ds_lock);
++	*dsp = ds;
++	return (0);
++}
++
++static int
++dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag)
++{
++	dsl_pool_t *dp = ds->ds_dir->dd_pool;
++
++	/*
++	 * In syncing context we don't want the rwlock lock: there
++	 * may be an existing writer waiting for sync phase to
++	 * finish.  We don't need to worry about such writers, since
++	 * sync phase is single-threaded, so the writer can't be
++	 * doing anything while we are active.
++	 */
++	if (dsl_pool_sync_context(dp)) {
++		ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
++		return (0);
++	}
++
++	/*
++	 * Normal users will hold the ds_rwlock as a READER until they
++	 * are finished (i.e., call dsl_dataset_rele()).  "Owners" will
++	 * drop their READER lock after they set the ds_owner field.
++	 *
++	 * If the dataset is being destroyed, the destroy thread will
++	 * obtain a WRITER lock for exclusive access after it's done its
++	 * open-context work and then change the ds_owner to
++	 * dsl_reaper once destruction is assured.  So threads
++	 * may block here temporarily, until the "destructability" of
++	 * the dataset is determined.
++	 */
++	ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock));
++	mutex_enter(&ds->ds_lock);
++	while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) {
++		rw_exit(&dp->dp_config_rwlock);
++		cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock);
++		if (DSL_DATASET_IS_DESTROYED(ds)) {
++			mutex_exit(&ds->ds_lock);
++			dsl_dataset_drop_ref(ds, tag);
++			rw_enter(&dp->dp_config_rwlock, RW_READER);
++			return (ENOENT);
++		}
++		/*
++		 * The dp_config_rwlock lives above the ds_lock. And
++		 * we need to check DSL_DATASET_IS_DESTROYED() while
++		 * holding the ds_lock, so we have to drop and reacquire
++		 * the ds_lock here.
++		 */
++		mutex_exit(&ds->ds_lock);
++		rw_enter(&dp->dp_config_rwlock, RW_READER);
++		mutex_enter(&ds->ds_lock);
++	}
++	mutex_exit(&ds->ds_lock);
++	return (0);
++}
++
++int
++dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
++    dsl_dataset_t **dsp)
++{
++	int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp);
++
++	if (err)
++		return (err);
++	return (dsl_dataset_hold_ref(*dsp, tag));
++}
++
++int
++dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok,
++    void *tag, dsl_dataset_t **dsp)
++{
++	int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
++	if (err)
++		return (err);
++	if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
++		dsl_dataset_rele(*dsp, tag);
++		*dsp = NULL;
++		return (EBUSY);
++	}
++	return (0);
++}
++
++int
++dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp)
++{
++	dsl_dir_t *dd;
++	dsl_pool_t *dp;
++	const char *snapname;
++	uint64_t obj;
++	int err = 0;
++
++	err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname);
++	if (err)
++		return (err);
++
++	dp = dd->dd_pool;
++	obj = dd->dd_phys->dd_head_dataset_obj;
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++	if (obj)
++		err = dsl_dataset_get_ref(dp, obj, tag, dsp);
++	else
++		err = ENOENT;
++	if (err)
++		goto out;
++
++	err = dsl_dataset_hold_ref(*dsp, tag);
++
++	/* we may be looking for a snapshot */
++	if (err == 0 && snapname != NULL) {
++		dsl_dataset_t *ds = NULL;
++
++		if (*snapname++ != '@') {
++			dsl_dataset_rele(*dsp, tag);
++			err = ENOENT;
++			goto out;
++		}
++
++		dprintf("looking for snapshot '%s'\n", snapname);
++		err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
++		if (err == 0)
++			err = dsl_dataset_get_ref(dp, obj, tag, &ds);
++		dsl_dataset_rele(*dsp, tag);
++
++		ASSERT3U((err == 0), ==, (ds != NULL));
++
++		if (ds) {
++			mutex_enter(&ds->ds_lock);
++			if (ds->ds_snapname[0] == 0)
++				(void) strlcpy(ds->ds_snapname, snapname,
++				    sizeof (ds->ds_snapname));
++			mutex_exit(&ds->ds_lock);
++			err = dsl_dataset_hold_ref(ds, tag);
++			*dsp = err ? NULL : ds;
++		}
++	}
++out:
++	rw_exit(&dp->dp_config_rwlock);
++	dsl_dir_close(dd, FTAG);
++	return (err);
++}
++
++int
++dsl_dataset_own(const char *name, boolean_t inconsistentok,
++    void *tag, dsl_dataset_t **dsp)
++{
++	int err = dsl_dataset_hold(name, tag, dsp);
++	if (err)
++		return (err);
++	if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
++		dsl_dataset_rele(*dsp, tag);
++		return (EBUSY);
++	}
++	return (0);
++}
++
++void
++dsl_dataset_name(dsl_dataset_t *ds, char *name)
++{
++	if (ds == NULL) {
++		(void) strcpy(name, "mos");
++	} else {
++		dsl_dir_name(ds->ds_dir, name);
++		VERIFY(0 == dsl_dataset_get_snapname(ds));
++		if (ds->ds_snapname[0]) {
++			(void) strcat(name, "@");
++			/*
++			 * We use a "recursive" mutex so that we
++			 * can call dprintf_ds() with ds_lock held.
++			 */
++			if (!MUTEX_HELD(&ds->ds_lock)) {
++				mutex_enter(&ds->ds_lock);
++				(void) strcat(name, ds->ds_snapname);
++				mutex_exit(&ds->ds_lock);
++			} else {
++				(void) strcat(name, ds->ds_snapname);
++			}
++		}
++	}
++}
++
++static int
++dsl_dataset_namelen(dsl_dataset_t *ds)
++{
++	int result;
++
++	if (ds == NULL) {
++		result = 3;	/* "mos" */
++	} else {
++		result = dsl_dir_namelen(ds->ds_dir);
++		VERIFY(0 == dsl_dataset_get_snapname(ds));
++		if (ds->ds_snapname[0]) {
++			++result;	/* adding one for the @-sign */
++			if (!MUTEX_HELD(&ds->ds_lock)) {
++				mutex_enter(&ds->ds_lock);
++				result += strlen(ds->ds_snapname);
++				mutex_exit(&ds->ds_lock);
++			} else {
++				result += strlen(ds->ds_snapname);
++			}
++		}
++	}
++
++	return (result);
++}
++
++void
++dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag)
++{
++	dmu_buf_rele(ds->ds_dbuf, tag);
++}
++
++void
++dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
++{
++	if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) {
++		rw_exit(&ds->ds_rwlock);
++	}
++	dsl_dataset_drop_ref(ds, tag);
++}
++
++void
++dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
++{
++	ASSERT((ds->ds_owner == tag && ds->ds_dbuf) ||
++	    (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL));
++
++	mutex_enter(&ds->ds_lock);
++	ds->ds_owner = NULL;
++	if (RW_WRITE_HELD(&ds->ds_rwlock)) {
++		rw_exit(&ds->ds_rwlock);
++		cv_broadcast(&ds->ds_exclusive_cv);
++	}
++	mutex_exit(&ds->ds_lock);
++	if (ds->ds_dbuf)
++		dsl_dataset_drop_ref(ds, tag);
++	else
++		dsl_dataset_evict(NULL, ds);
++}
++
++boolean_t
++dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag)
++{
++	boolean_t gotit = FALSE;
++
++	mutex_enter(&ds->ds_lock);
++	if (ds->ds_owner == NULL &&
++	    (!DS_IS_INCONSISTENT(ds) || inconsistentok)) {
++		ds->ds_owner = tag;
++		if (!dsl_pool_sync_context(ds->ds_dir->dd_pool))
++			rw_exit(&ds->ds_rwlock);
++		gotit = TRUE;
++	}
++	mutex_exit(&ds->ds_lock);
++	return (gotit);
++}
++
++void
++dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner)
++{
++	ASSERT3P(owner, ==, ds->ds_owner);
++	if (!RW_WRITE_HELD(&ds->ds_rwlock))
++		rw_enter(&ds->ds_rwlock, RW_WRITER);
++}
++
++uint64_t
++dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
++    uint64_t flags, dmu_tx_t *tx)
++{
++	dsl_pool_t *dp = dd->dd_pool;
++	dmu_buf_t *dbuf;
++	dsl_dataset_phys_t *dsphys;
++	uint64_t dsobj;
++	objset_t *mos = dp->dp_meta_objset;
++
++	if (origin == NULL)
++		origin = dp->dp_origin_snap;
++
++	ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
++	ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
++	ASSERT(dmu_tx_is_syncing(tx));
++	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
++
++	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
++	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
++	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
++	dmu_buf_will_dirty(dbuf, tx);
++	dsphys = dbuf->db_data;
++	bzero(dsphys, sizeof (dsl_dataset_phys_t));
++	dsphys->ds_dir_obj = dd->dd_object;
++	dsphys->ds_flags = flags;
++	dsphys->ds_fsid_guid = unique_create();
++	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
++	    sizeof (dsphys->ds_guid));
++	dsphys->ds_snapnames_zapobj =
++	    zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
++	    DMU_OT_NONE, 0, tx);
++	dsphys->ds_creation_time = gethrestime_sec();
++	dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;
++
++	if (origin == NULL) {
++		dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);
++	} else {
++		dsl_dataset_t *ohds;
++
++		dsphys->ds_prev_snap_obj = origin->ds_object;
++		dsphys->ds_prev_snap_txg =
++		    origin->ds_phys->ds_creation_txg;
++		dsphys->ds_used_bytes =
++		    origin->ds_phys->ds_used_bytes;
++		dsphys->ds_compressed_bytes =
++		    origin->ds_phys->ds_compressed_bytes;
++		dsphys->ds_uncompressed_bytes =
++		    origin->ds_phys->ds_uncompressed_bytes;
++		dsphys->ds_bp = origin->ds_phys->ds_bp;
++		dsphys->ds_flags |= origin->ds_phys->ds_flags;
++
++		dmu_buf_will_dirty(origin->ds_dbuf, tx);
++		origin->ds_phys->ds_num_children++;
++
++		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
++		    origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds));
++		dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist,
++		    dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx);
++		dsl_dataset_rele(ohds, FTAG);
++
++		if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) {
++			if (origin->ds_phys->ds_next_clones_obj == 0) {
++				origin->ds_phys->ds_next_clones_obj =
++				    zap_create(mos,
++				    DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
++			}
++			VERIFY(0 == zap_add_int(mos,
++			    origin->ds_phys->ds_next_clones_obj,
++			    dsobj, tx));
++		}
++
++		dmu_buf_will_dirty(dd->dd_dbuf, tx);
++		dd->dd_phys->dd_origin_obj = origin->ds_object;
++		if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
++			if (origin->ds_dir->dd_phys->dd_clones == 0) {
++				dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx);
++				origin->ds_dir->dd_phys->dd_clones =
++				    zap_create(mos,
++				    DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
++			}
++			VERIFY3U(0, ==, zap_add_int(mos,
++			    origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
++		}
++	}
++
++	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
++		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
++
++	dmu_buf_rele(dbuf, FTAG);
++
++	dmu_buf_will_dirty(dd->dd_dbuf, tx);
++	dd->dd_phys->dd_head_dataset_obj = dsobj;
++
++	return (dsobj);
++}
++
++uint64_t
++dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
++    dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
++{
++	dsl_pool_t *dp = pdd->dd_pool;
++	uint64_t dsobj, ddobj;
++	dsl_dir_t *dd;
++
++	ASSERT(lastname[0] != '@');
++
++	ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
++	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
++
++	dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx);
++
++	dsl_deleg_set_create_perms(dd, tx, cr);
++
++	dsl_dir_close(dd, FTAG);
++
++	/*
++	 * If we are creating a clone, make sure we zero out any stale
++	 * data from the origin snapshots zil header.
++	 */
++	if (origin != NULL) {
++		dsl_dataset_t *ds;
++		objset_t *os;
++
++		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
++		VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
++		bzero(&os->os_zil_header, sizeof (os->os_zil_header));
++		dsl_dataset_dirty(ds, tx);
++		dsl_dataset_rele(ds, FTAG);
++	}
++
++	return (dsobj);
++}
++
++/*
++ * The snapshots must all be in the same pool.
++ */
++int
++dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed)
++{
++	int err;
++	dsl_sync_task_t *dst;
++	spa_t *spa;
++	nvpair_t *pair;
++	dsl_sync_task_group_t *dstg;
++
++	pair = nvlist_next_nvpair(snaps, NULL);
++	if (pair == NULL)
++		return (0);
++
++	err = spa_open(nvpair_name(pair), &spa, FTAG);
++	if (err)
++		return (err);
++	dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
++
++	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
++	    pair = nvlist_next_nvpair(snaps, pair)) {
++		dsl_dataset_t *ds;
++		int err;
++
++		err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
++		if (err == 0) {
++			struct dsl_ds_destroyarg *dsda;
++
++			dsl_dataset_make_exclusive(ds, dstg);
++			dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg),
++			    KM_SLEEP);
++			dsda->ds = ds;
++			dsda->defer = defer;
++			dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
++			    dsl_dataset_destroy_sync, dsda, dstg, 0);
++		} else if (err == ENOENT) {
++			err = 0;
++		} else {
++			(void) strcpy(failed, nvpair_name(pair));
++			break;
++		}
++	}
++
++	if (err == 0)
++		err = dsl_sync_task_group_wait(dstg);
++
++	for (dst = list_head(&dstg->dstg_tasks); dst;
++	    dst = list_next(&dstg->dstg_tasks, dst)) {
++		struct dsl_ds_destroyarg *dsda = dst->dst_arg1;
++		dsl_dataset_t *ds = dsda->ds;
++
++		/*
++		 * Return the file system name that triggered the error
++		 */
++		if (dst->dst_err) {
++			dsl_dataset_name(ds, failed);
++		}
++		ASSERT3P(dsda->rm_origin, ==, NULL);
++		dsl_dataset_disown(ds, dstg);
++		kmem_free(dsda, sizeof (struct dsl_ds_destroyarg));
++	}
++
++	dsl_sync_task_group_destroy(dstg);
++	spa_close(spa, FTAG);
++	return (err);
++
++}
++
++static boolean_t
++dsl_dataset_might_destroy_origin(dsl_dataset_t *ds)
++{
++	boolean_t might_destroy = B_FALSE;
++
++	mutex_enter(&ds->ds_lock);
++	if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 &&
++	    DS_IS_DEFER_DESTROY(ds))
++		might_destroy = B_TRUE;
++	mutex_exit(&ds->ds_lock);
++
++	return (might_destroy);
++}
++
++/*
++ * If we're removing a clone, and these three conditions are true:
++ *	1) the clone's origin has no other children
++ *	2) the clone's origin has no user references
++ *	3) the clone's origin has been marked for deferred destruction
++ * Then, prepare to remove the origin as part of this sync task group.
++ */
++static int
++dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag)
++{
++	dsl_dataset_t *ds = dsda->ds;
++	dsl_dataset_t *origin = ds->ds_prev;
++
++	if (dsl_dataset_might_destroy_origin(origin)) {
++		char *name;
++		int namelen;
++		int error;
++
++		namelen = dsl_dataset_namelen(origin) + 1;
++		name = kmem_alloc(namelen, KM_SLEEP);
++		dsl_dataset_name(origin, name);
++#ifdef _KERNEL
++		error = zfs_unmount_snap(name, NULL);
++		if (error) {
++			kmem_free(name, namelen);
++			return (error);
++		}
++#endif
++		error = dsl_dataset_own(name, B_TRUE, tag, &origin);
++		kmem_free(name, namelen);
++		if (error)
++			return (error);
++		dsda->rm_origin = origin;
++		dsl_dataset_make_exclusive(origin, tag);
++	}
++
++	return (0);
++}
++
++/*
++ * ds must be opened as OWNER.  On return (whether successful or not),
++ * ds will be closed and caller can no longer dereference it.
++ */
++int
++dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
++{
++	int err;
++	dsl_sync_task_group_t *dstg;
++	objset_t *os;
++	dsl_dir_t *dd;
++	uint64_t obj;
++	struct dsl_ds_destroyarg dsda = { 0 };
++	dsl_dataset_t *dummy_ds;
++
++	dsda.ds = ds;
++
++	if (dsl_dataset_is_snapshot(ds)) {
++		/* Destroying a snapshot is simpler */
++		dsl_dataset_make_exclusive(ds, tag);
++
++		dsda.defer = defer;
++		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
++		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
++		    &dsda, tag, 0);
++		ASSERT3P(dsda.rm_origin, ==, NULL);
++		goto out;
++	} else if (defer) {
++		err = EINVAL;
++		goto out;
++	}
++
++	dd = ds->ds_dir;
++	dummy_ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
++	dummy_ds->ds_dir = dd;
++	dummy_ds->ds_object = ds->ds_object;
++
++	/*
++	 * Check for errors and mark this ds as inconsistent, in
++	 * case we crash while freeing the objects.
++	 */
++	err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
++	    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
++	if (err)
++		goto out_free;
++
++	err = dmu_objset_from_ds(ds, &os);
++	if (err)
++		goto out_free;
++
++	/*
++	 * remove the objects in open context, so that we won't
++	 * have too much to do in syncing context.
++	 */
++	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
++	    ds->ds_phys->ds_prev_snap_txg)) {
++		/*
++		 * Ignore errors, if there is not enough disk space
++		 * we will deal with it in dsl_dataset_destroy_sync().
++		 */
++		(void) dmu_free_object(os, obj);
++	}
++	if (err != ESRCH)
++		goto out_free;
++
++	/*
++	 * Only the ZIL knows how to free log blocks.
++	 */
++	zil_destroy(dmu_objset_zil(os), B_FALSE);
++
++	/*
++	 * Sync out all in-flight IO.
++	 */
++	txg_wait_synced(dd->dd_pool, 0);
++
++	/*
++	 * If we managed to free all the objects in open
++	 * context, the user space accounting should be zero.
++	 */
++	if (ds->ds_phys->ds_bp.blk_fill == 0 &&
++	    dmu_objset_userused_enabled(os)) {
++		ASSERTV(uint64_t count);
++		ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
++		    count == 0);
++		ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 ||
++		    count == 0);
++	}
++
++	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
++	err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
++	rw_exit(&dd->dd_pool->dp_config_rwlock);
++
++	if (err)
++		goto out_free;
++
++	/*
++	 * Blow away the dsl_dir + head dataset.
++	 */
++	dsl_dataset_make_exclusive(ds, tag);
++	/*
++	 * If we're removing a clone, we might also need to remove its
++	 * origin.
++	 */
++	do {
++		dsda.need_prep = B_FALSE;
++		if (dsl_dir_is_clone(dd)) {
++			err = dsl_dataset_origin_rm_prep(&dsda, tag);
++			if (err) {
++				dsl_dir_close(dd, FTAG);
++				goto out_free;
++			}
++		}
++
++		dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
++		dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
++		    dsl_dataset_destroy_sync, &dsda, tag, 0);
++		dsl_sync_task_create(dstg, dsl_dir_destroy_check,
++		    dsl_dir_destroy_sync, dummy_ds, FTAG, 0);
++		err = dsl_sync_task_group_wait(dstg);
++		dsl_sync_task_group_destroy(dstg);
++
++		/*
++		 * We could be racing against 'zfs release' or 'zfs destroy -d'
++		 * on the origin snap, in which case we can get EBUSY if we
++		 * needed to destroy the origin snap but were not ready to
++		 * do so.
++		 */
++		if (dsda.need_prep) {
++			ASSERT(err == EBUSY);
++			ASSERT(dsl_dir_is_clone(dd));
++			ASSERT(dsda.rm_origin == NULL);
++		}
++	} while (dsda.need_prep);
++
++	if (dsda.rm_origin != NULL)
++		dsl_dataset_disown(dsda.rm_origin, tag);
++
++	/* if it is successful, dsl_dir_destroy_sync will close the dd */
++	if (err)
++		dsl_dir_close(dd, FTAG);
++
++out_free:
++	kmem_free(dummy_ds, sizeof (dsl_dataset_t));
++out:
++	dsl_dataset_disown(ds, tag);
++	return (err);
++}
++
++blkptr_t *
++dsl_dataset_get_blkptr(dsl_dataset_t *ds)
++{
++	return (&ds->ds_phys->ds_bp);
++}
++
++void
++dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
++{
++	ASSERT(dmu_tx_is_syncing(tx));
++	/* If it's the meta-objset, set dp_meta_rootbp */
++	if (ds == NULL) {
++		tx->tx_pool->dp_meta_rootbp = *bp;
++	} else {
++		dmu_buf_will_dirty(ds->ds_dbuf, tx);
++		ds->ds_phys->ds_bp = *bp;
++	}
++}
++
++spa_t *
++dsl_dataset_get_spa(dsl_dataset_t *ds)
++{
++	return (ds->ds_dir->dd_pool->dp_spa);
++}
++
++void
++dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
++{
++	dsl_pool_t *dp;
++
++	if (ds == NULL) /* this is the meta-objset */
++		return;
++
++	ASSERT(ds->ds_objset != NULL);
++
++	if (ds->ds_phys->ds_next_snap_obj != 0)
++		panic("dirtying snapshot!");
++
++	dp = ds->ds_dir->dd_pool;
++
++	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
++		/* up the hold count until we can be written out */
++		dmu_buf_add_ref(ds->ds_dbuf, ds);
++	}
++}
++
++boolean_t
++dsl_dataset_is_dirty(dsl_dataset_t *ds)
++{
++	int t;
++
++	for (t = 0; t < TXG_SIZE; t++) {
++		if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
++		    ds, t))
++			return (B_TRUE);
++	}
++	return (B_FALSE);
++}
++
++/*
++ * The unique space in the head dataset can be calculated by subtracting
++ * the space used in the most recent snapshot, that is still being used
++ * in this file system, from the space currently in use.  To figure out
++ * the space in the most recent snapshot still in use, we need to take
++ * the total space used in the snapshot and subtract out the space that
++ * has been freed up since the snapshot was taken.
++ */
++static void
++dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
++{
++	uint64_t mrs_used;
++	uint64_t dlused, dlcomp, dluncomp;
++
++	ASSERT(!dsl_dataset_is_snapshot(ds));
++
++	if (ds->ds_phys->ds_prev_snap_obj != 0)
++		mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
++	else
++		mrs_used = 0;
++
++	dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
++
++	ASSERT3U(dlused, <=, mrs_used);
++	ds->ds_phys->ds_unique_bytes =
++	    ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
++
++	if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
++	    SPA_VERSION_UNIQUE_ACCURATE)
++		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
++}
++
++struct killarg {
++	dsl_dataset_t *ds;
++	dmu_tx_t *tx;
++};
++
++/* ARGSUSED */
++static int
++kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
++    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
++{
++	struct killarg *ka = arg;
++	dmu_tx_t *tx = ka->tx;
++
++	if (bp == NULL)
++		return (0);
++
++	if (zb->zb_level == ZB_ZIL_LEVEL) {
++		ASSERT(zilog != NULL);
++		/*
++		 * It's a block in the intent log.  It has no
++		 * accounting, so just free it.
++		 */
++		dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
++	} else {
++		ASSERT(zilog == NULL);
++		ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
++		(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
++	}
++
++	return (0);
++}
++
++/* ARGSUSED */
++static int
++dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
++	uint64_t count;
++	int err;
++
++	/*
++	 * Can't delete a head dataset if there are snapshots of it.
++	 * (Except if the only snapshots are from the branch we cloned
++	 * from.)
++	 */
++	if (ds->ds_prev != NULL &&
++	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
++		return (EBUSY);
++
++	/*
++	 * This is really a dsl_dir thing, but check it here so that
++	 * we'll be less likely to leave this dataset inconsistent &
++	 * nearly destroyed.
++	 */
++	err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
++	if (err)
++		return (err);
++	if (count != 0)
++		return (EEXIST);
++
++	return (0);
++}
++
++/* ARGSUSED */
++static void
++dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	dsl_pool_t *dp = ds->ds_dir->dd_pool;
++
++	/* Mark it as inconsistent on-disk, in case we crash */
++	dmu_buf_will_dirty(ds->ds_dbuf, tx);
++	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
++
++	spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx,
++	    "dataset = %llu", ds->ds_object);
++}
++
++static int
++dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag,
++    dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = dsda->ds;
++	dsl_dataset_t *ds_prev = ds->ds_prev;
++
++	if (dsl_dataset_might_destroy_origin(ds_prev)) {
++		struct dsl_ds_destroyarg ndsda = {0};
++
++		/*
++		 * If we're not prepared to remove the origin, don't remove
++		 * the clone either.
++		 */
++		if (dsda->rm_origin == NULL) {
++			dsda->need_prep = B_TRUE;
++			return (EBUSY);
++		}
++
++		ndsda.ds = ds_prev;
++		ndsda.is_origin_rm = B_TRUE;
++		return (dsl_dataset_destroy_check(&ndsda, tag, tx));
++	}
++
++	/*
++	 * If we're not going to remove the origin after all,
++	 * undo the open context setup.
++	 */
++	if (dsda->rm_origin != NULL) {
++		dsl_dataset_disown(dsda->rm_origin, tag);
++		dsda->rm_origin = NULL;
++	}
++
++	return (0);
++}
++
++/*
++ * If you add new checks here, you may need to add
++ * additional checks to the "temporary" case in
++ * snapshot_check() in dmu_objset.c.
++ */
++/* ARGSUSED */
++int
++dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	struct dsl_ds_destroyarg *dsda = arg1;
++	dsl_dataset_t *ds = dsda->ds;
++
++	/* we have an owner hold, so noone else can destroy us */
++	ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
++
++	/*
++	 * Only allow deferred destroy on pools that support it.
++	 * NOTE: deferred destroy is only supported on snapshots.
++	 */
++	if (dsda->defer) {
++		if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
++		    SPA_VERSION_USERREFS)
++			return (ENOTSUP);
++		ASSERT(dsl_dataset_is_snapshot(ds));
++		return (0);
++	}
++
++	/*
++	 * Can't delete a head dataset if there are snapshots of it.
++	 * (Except if the only snapshots are from the branch we cloned
++	 * from.)
++	 */
++	if (ds->ds_prev != NULL &&
++	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
++		return (EBUSY);
++
++	/*
++	 * If we made changes this txg, traverse_dsl_dataset won't find
++	 * them.  Try again.
++	 */
++	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
++		return (EAGAIN);
++
++	if (dsl_dataset_is_snapshot(ds)) {
++		/*
++		 * If this snapshot has an elevated user reference count,
++		 * we can't destroy it yet.
++		 */
++		if (ds->ds_userrefs > 0 && !dsda->releasing)
++			return (EBUSY);
++
++		mutex_enter(&ds->ds_lock);
++		/*
++		 * Can't delete a branch point. However, if we're destroying
++		 * a clone and removing its origin due to it having a user
++		 * hold count of 0 and having been marked for deferred destroy,
++		 * it's OK for the origin to have a single clone.
++		 */
++		if (ds->ds_phys->ds_num_children >
++		    (dsda->is_origin_rm ? 2 : 1)) {
++			mutex_exit(&ds->ds_lock);
++			return (EEXIST);
++		}
++		mutex_exit(&ds->ds_lock);
++	} else if (dsl_dir_is_clone(ds->ds_dir)) {
++		return (dsl_dataset_origin_check(dsda, arg2, tx));
++	}
++
++	/* XXX we should do some i/o error checking... */
++	return (0);
++}
++
++struct refsarg {
++	kmutex_t lock;
++	boolean_t gone;
++	kcondvar_t cv;
++};
++
++/* ARGSUSED */
++static void
++dsl_dataset_refs_gone(dmu_buf_t *db, void *argv)
++{
++	struct refsarg *arg = argv;
++
++	mutex_enter(&arg->lock);
++	arg->gone = TRUE;
++	cv_signal(&arg->cv);
++	mutex_exit(&arg->lock);
++}
++
++static void
++dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag)
++{
++	struct refsarg arg;
++
++	mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL);
++	cv_init(&arg.cv, NULL, CV_DEFAULT, NULL);
++	arg.gone = FALSE;
++	(void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys,
++	    dsl_dataset_refs_gone);
++	dmu_buf_rele(ds->ds_dbuf, tag);
++	mutex_enter(&arg.lock);
++	while (!arg.gone)
++		cv_wait(&arg.cv, &arg.lock);
++	ASSERT(arg.gone);
++	mutex_exit(&arg.lock);
++	ds->ds_dbuf = NULL;
++	ds->ds_phys = NULL;
++	mutex_destroy(&arg.lock);
++	cv_destroy(&arg.cv);
++}
++
++static void
++remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx)
++{
++	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
++	int err;
++	ASSERTV(uint64_t count);
++
++	ASSERT(ds->ds_phys->ds_num_children >= 2);
++	err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx);
++	/*
++	 * The err should not be ENOENT, but a bug in a previous version
++	 * of the code could cause upgrade_clones_cb() to not set
++	 * ds_next_snap_obj when it should, leading to a missing entry.
++	 * If we knew that the pool was created after
++	 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't
++	 * ENOENT.  However, at least we can check that we don't have
++	 * too many entries in the next_clones_obj even after failing to
++	 * remove this one.
++	 */
++	if (err != ENOENT) {
++		VERIFY3U(err, ==, 0);
++	}
++	ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
++	    &count));
++	ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2);
++}
++
++static void
++dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
++{
++	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
++	zap_cursor_t zc;
++	zap_attribute_t za;
++
++	/*
++	 * If it is the old version, dd_clones doesn't exist so we can't
++	 * find the clones, but deadlist_remove_key() is a no-op so it
++	 * doesn't matter.
++	 */
++	if (ds->ds_dir->dd_phys->dd_clones == 0)
++		return;
++
++	for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones);
++	    zap_cursor_retrieve(&zc, &za) == 0;
++	    zap_cursor_advance(&zc)) {
++		dsl_dataset_t *clone;
++
++		VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
++		    za.za_first_integer, FTAG, &clone));
++		if (clone->ds_dir->dd_origin_txg > mintxg) {
++			dsl_deadlist_remove_key(&clone->ds_deadlist,
++			    mintxg, tx);
++			dsl_dataset_remove_clones_key(clone, mintxg, tx);
++		}
++		dsl_dataset_rele(clone, FTAG);
++	}
++	zap_cursor_fini(&zc);
++}
++
++struct process_old_arg {
++	dsl_dataset_t *ds;
++	dsl_dataset_t *ds_prev;
++	boolean_t after_branch_point;
++	zio_t *pio;
++	uint64_t used, comp, uncomp;
++};
++
++static int
++process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
++{
++	struct process_old_arg *poa = arg;
++	dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
++
++	if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
++		dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
++		if (poa->ds_prev && !poa->after_branch_point &&
++		    bp->blk_birth >
++		    poa->ds_prev->ds_phys->ds_prev_snap_txg) {
++			poa->ds_prev->ds_phys->ds_unique_bytes +=
++			    bp_get_dsize_sync(dp->dp_spa, bp);
++		}
++	} else {
++		poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
++		poa->comp += BP_GET_PSIZE(bp);
++		poa->uncomp += BP_GET_UCSIZE(bp);
++		dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
++	}
++	return (0);
++}
++
++static void
++process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
++    dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
++{
++	struct process_old_arg poa = { 0 };
++	dsl_pool_t *dp = ds->ds_dir->dd_pool;
++	objset_t *mos = dp->dp_meta_objset;
++
++	ASSERT(ds->ds_deadlist.dl_oldfmt);
++	ASSERT(ds_next->ds_deadlist.dl_oldfmt);
++
++	poa.ds = ds;
++	poa.ds_prev = ds_prev;
++	poa.after_branch_point = after_branch_point;
++	poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
++	VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
++	    process_old_cb, &poa, tx));
++	VERIFY3U(zio_wait(poa.pio), ==, 0);
++	ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
++
++	/* change snapused */
++	dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
++	    -poa.used, -poa.comp, -poa.uncomp, tx);
++
++	/* swap next's deadlist to our deadlist */
++	dsl_deadlist_close(&ds->ds_deadlist);
++	dsl_deadlist_close(&ds_next->ds_deadlist);
++	SWITCH64(ds_next->ds_phys->ds_deadlist_obj,
++	    ds->ds_phys->ds_deadlist_obj);
++	dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
++	dsl_deadlist_open(&ds_next->ds_deadlist, mos,
++	    ds_next->ds_phys->ds_deadlist_obj);
++}
++
++void
++dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
++{
++	struct dsl_ds_destroyarg *dsda = arg1;
++	dsl_dataset_t *ds = dsda->ds;
++	int err;
++	int after_branch_point = FALSE;
++	dsl_pool_t *dp = ds->ds_dir->dd_pool;
++	objset_t *mos = dp->dp_meta_objset;
++	dsl_dataset_t *ds_prev = NULL;
++	boolean_t wont_destroy;
++	uint64_t obj;
++
++	wont_destroy = (dsda->defer &&
++	    (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1));
++
++	ASSERT(ds->ds_owner || wont_destroy);
++	ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1);
++	ASSERT(ds->ds_prev == NULL ||
++	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
++	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
++
++	if (wont_destroy) {
++		ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
++		dmu_buf_will_dirty(ds->ds_dbuf, tx);
++		ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
++		return;
++	}
++
++	/* signal any waiters that this dataset is going away */
++	mutex_enter(&ds->ds_lock);
++	ds->ds_owner = dsl_reaper;
++	cv_broadcast(&ds->ds_exclusive_cv);
++	mutex_exit(&ds->ds_lock);
++
++	/* Remove our reservation */
++	if (ds->ds_reserved != 0) {
++		dsl_prop_setarg_t psa;
++		uint64_t value = 0;
++
++		dsl_prop_setarg_init_uint64(&psa, "refreservation",
++		    (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
++		    &value);
++		psa.psa_effective_value = 0;	/* predict default value */
++
++		dsl_dataset_set_reservation_sync(ds, &psa, tx);
++		ASSERT3U(ds->ds_reserved, ==, 0);
++	}
++
++	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
++
++	dsl_scan_ds_destroyed(ds, tx);
++
++	obj = ds->ds_object;
++
++	if (ds->ds_phys->ds_prev_snap_obj != 0) {
++		if (ds->ds_prev) {
++			ds_prev = ds->ds_prev;
++		} else {
++			VERIFY(0 == dsl_dataset_hold_obj(dp,
++			    ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
++		}
++		after_branch_point =
++		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
++
++		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
++		if (after_branch_point &&
++		    ds_prev->ds_phys->ds_next_clones_obj != 0) {
++			remove_from_next_clones(ds_prev, obj, tx);
++			if (ds->ds_phys->ds_next_snap_obj != 0) {
++				VERIFY(0 == zap_add_int(mos,
++				    ds_prev->ds_phys->ds_next_clones_obj,
++				    ds->ds_phys->ds_next_snap_obj, tx));
++			}
++		}
++		if (after_branch_point &&
++		    ds->ds_phys->ds_next_snap_obj == 0) {
++			/* This clone is toast. */
++			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
++			ds_prev->ds_phys->ds_num_children--;
++
++			/*
++			 * If the clone's origin has no other clones, no
++			 * user holds, and has been marked for deferred
++			 * deletion, then we should have done the necessary
++			 * destroy setup for it.
++			 */
++			if (ds_prev->ds_phys->ds_num_children == 1 &&
++			    ds_prev->ds_userrefs == 0 &&
++			    DS_IS_DEFER_DESTROY(ds_prev)) {
++				ASSERT3P(dsda->rm_origin, !=, NULL);
++			} else {
++				ASSERT3P(dsda->rm_origin, ==, NULL);
++			}
++		} else if (!after_branch_point) {
++			ds_prev->ds_phys->ds_next_snap_obj =
++			    ds->ds_phys->ds_next_snap_obj;
++		}
++	}
++
++	if (dsl_dataset_is_snapshot(ds)) {
++		dsl_dataset_t *ds_next;
++		uint64_t old_unique;
++		uint64_t used = 0, comp = 0, uncomp = 0;
++
++		VERIFY(0 == dsl_dataset_hold_obj(dp,
++		    ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
++		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
++
++		old_unique = ds_next->ds_phys->ds_unique_bytes;
++
++		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
++		ds_next->ds_phys->ds_prev_snap_obj =
++		    ds->ds_phys->ds_prev_snap_obj;
++		ds_next->ds_phys->ds_prev_snap_txg =
++		    ds->ds_phys->ds_prev_snap_txg;
++		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
++		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
++
++
++		if (ds_next->ds_deadlist.dl_oldfmt) {
++			process_old_deadlist(ds, ds_prev, ds_next,
++			    after_branch_point, tx);
++		} else {
++			/* Adjust prev's unique space. */
++			if (ds_prev && !after_branch_point) {
++				dsl_deadlist_space_range(&ds_next->ds_deadlist,
++				    ds_prev->ds_phys->ds_prev_snap_txg,
++				    ds->ds_phys->ds_prev_snap_txg,
++				    &used, &comp, &uncomp);
++				ds_prev->ds_phys->ds_unique_bytes += used;
++			}
++
++			/* Adjust snapused. */
++			dsl_deadlist_space_range(&ds_next->ds_deadlist,
++			    ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
++			    &used, &comp, &uncomp);
++			dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
++			    -used, -comp, -uncomp, tx);
++
++			/* Move blocks to be freed to pool's free list. */
++			dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
++			    &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
++			    tx);
++			dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
++			    DD_USED_HEAD, used, comp, uncomp, tx);
++			dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx);
++
++			/* Merge our deadlist into next's and free it. */
++			dsl_deadlist_merge(&ds_next->ds_deadlist,
++			    ds->ds_phys->ds_deadlist_obj, tx);
++		}
++		dsl_deadlist_close(&ds->ds_deadlist);
++		dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
++
++		/* Collapse range in clone heads */
++		dsl_dataset_remove_clones_key(ds,
++		    ds->ds_phys->ds_creation_txg, tx);
++
++		if (dsl_dataset_is_snapshot(ds_next)) {
++			dsl_dataset_t *ds_nextnext;
++			dsl_dataset_t *hds;
++
++			/*
++			 * Update next's unique to include blocks which
++			 * were previously shared by only this snapshot
++			 * and it.  Those blocks will be born after the
++			 * prev snap and before this snap, and will have
++			 * died after the next snap and before the one
++			 * after that (ie. be on the snap after next's
++			 * deadlist).
++			 */
++			VERIFY(0 == dsl_dataset_hold_obj(dp,
++			    ds_next->ds_phys->ds_next_snap_obj,
++			    FTAG, &ds_nextnext));
++			dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
++			    ds->ds_phys->ds_prev_snap_txg,
++			    ds->ds_phys->ds_creation_txg,
++			    &used, &comp, &uncomp);
++			ds_next->ds_phys->ds_unique_bytes += used;
++			dsl_dataset_rele(ds_nextnext, FTAG);
++			ASSERT3P(ds_next->ds_prev, ==, NULL);
++
++			/* Collapse range in this head. */
++			VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
++			    ds->ds_dir->dd_phys->dd_head_dataset_obj,
++			    FTAG, &hds));
++			dsl_deadlist_remove_key(&hds->ds_deadlist,
++			    ds->ds_phys->ds_creation_txg, tx);
++			dsl_dataset_rele(hds, FTAG);
++
++		} else {
++			ASSERT3P(ds_next->ds_prev, ==, ds);
++			dsl_dataset_drop_ref(ds_next->ds_prev, ds_next);
++			ds_next->ds_prev = NULL;
++			if (ds_prev) {
++				VERIFY(0 == dsl_dataset_get_ref(dp,
++				    ds->ds_phys->ds_prev_snap_obj,
++				    ds_next, &ds_next->ds_prev));
++			}
++
++			dsl_dataset_recalc_head_uniq(ds_next);
++
++			/*
++			 * Reduce the amount of our unconsmed refreservation
++			 * being charged to our parent by the amount of
++			 * new unique data we have gained.
++			 */
++			if (old_unique < ds_next->ds_reserved) {
++				int64_t mrsdelta;
++				uint64_t new_unique =
++				    ds_next->ds_phys->ds_unique_bytes;
++
++				ASSERT(old_unique <= new_unique);
++				mrsdelta = MIN(new_unique - old_unique,
++				    ds_next->ds_reserved - old_unique);
++				dsl_dir_diduse_space(ds->ds_dir,
++				    DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
++			}
++		}
++		dsl_dataset_rele(ds_next, FTAG);
++	} else {
++		/*
++		 * There's no next snapshot, so this is a head dataset.
++		 * Destroy the deadlist.  Unless it's a clone, the
++		 * deadlist should be empty.  (If it's a clone, it's
++		 * safe to ignore the deadlist contents.)
++		 */
++		struct killarg ka;
++
++		dsl_deadlist_close(&ds->ds_deadlist);
++		dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
++		ds->ds_phys->ds_deadlist_obj = 0;
++
++		/*
++		 * Free everything that we point to (that's born after
++		 * the previous snapshot, if we are a clone)
++		 *
++		 * NB: this should be very quick, because we already
++		 * freed all the objects in open context.
++		 */
++		ka.ds = ds;
++		ka.tx = tx;
++		err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
++		    TRAVERSE_POST, kill_blkptr, &ka);
++		ASSERT3U(err, ==, 0);
++		ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
++		    ds->ds_phys->ds_unique_bytes == 0);
++
++		if (ds->ds_prev != NULL) {
++			if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
++				VERIFY3U(0, ==, zap_remove_int(mos,
++				    ds->ds_prev->ds_dir->dd_phys->dd_clones,
++				    ds->ds_object, tx));
++			}
++			dsl_dataset_rele(ds->ds_prev, ds);
++			ds->ds_prev = ds_prev = NULL;
++		}
++	}
++
++	/*
++	 * This must be done after the dsl_traverse(), because it will
++	 * re-open the objset.
++	 */
++	if (ds->ds_objset) {
++		dmu_objset_evict(ds->ds_objset);
++		ds->ds_objset = NULL;
++	}
++
++	if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
++		/* Erase the link in the dir */
++		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
++		ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
++		ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
++		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
++		ASSERT(err == 0);
++	} else {
++		/* remove from snapshot namespace */
++		dsl_dataset_t *ds_head;
++		ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
++		VERIFY(0 == dsl_dataset_hold_obj(dp,
++		    ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
++		VERIFY(0 == dsl_dataset_get_snapname(ds));
++#ifdef ZFS_DEBUG
++		{
++			uint64_t val;
++
++			err = dsl_dataset_snap_lookup(ds_head,
++			    ds->ds_snapname, &val);
++			ASSERT3U(err, ==, 0);
++			ASSERT3U(val, ==, obj);
++		}
++#endif
++		err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
++		ASSERT(err == 0);
++		dsl_dataset_rele(ds_head, FTAG);
++	}
++
++	if (ds_prev && ds->ds_prev != ds_prev)
++		dsl_dataset_rele(ds_prev, FTAG);
++
++	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
++	spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx,
++	    "dataset = %llu", ds->ds_object);
++
++	if (ds->ds_phys->ds_next_clones_obj != 0) {
++		ASSERTV(uint64_t count);
++		ASSERT(0 == zap_count(mos,
++		    ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
++		VERIFY(0 == dmu_object_free(mos,
++		    ds->ds_phys->ds_next_clones_obj, tx));
++	}
++	if (ds->ds_phys->ds_props_obj != 0)
++		VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
++	if (ds->ds_phys->ds_userrefs_obj != 0)
++		VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
++	dsl_dir_close(ds->ds_dir, ds);
++	ds->ds_dir = NULL;
++	dsl_dataset_drain_refs(ds, tag);
++	VERIFY(0 == dmu_object_free(mos, obj, tx));
++
++	if (dsda->rm_origin) {
++		/*
++		 * Remove the origin of the clone we just destroyed.
++		 */
++		struct dsl_ds_destroyarg ndsda = {0};
++
++		ndsda.ds = dsda->rm_origin;
++		dsl_dataset_destroy_sync(&ndsda, tag, tx);
++	}
++}
++
++static int
++dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
++{
++	uint64_t asize;
++
++	if (!dmu_tx_is_syncing(tx))
++		return (0);
++
++	/*
++	 * If there's an fs-only reservation, any blocks that might become
++	 * owned by the snapshot dataset must be accommodated by space
++	 * outside of the reservation.
++	 */
++	ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
++	asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
++	if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
++		return (ENOSPC);
++
++	/*
++	 * Propogate any reserved space for this snapshot to other
++	 * snapshot checks in this sync group.
++	 */
++	if (asize > 0)
++		dsl_dir_willuse_space(ds->ds_dir, asize, tx);
++
++	return (0);
++}
++
++int
++dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	const char *snapname = arg2;
++	int err;
++	uint64_t value;
++
++	/*
++	 * We don't allow multiple snapshots of the same txg.  If there
++	 * is already one, try again.
++	 */
++	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
++		return (EAGAIN);
++
++	/*
++	 * Check for conflicting name snapshot name.
++	 */
++	err = dsl_dataset_snap_lookup(ds, snapname, &value);
++	if (err == 0)
++		return (EEXIST);
++	if (err != ENOENT)
++		return (err);
++
++	/*
++	 * Check that the dataset's name is not too long.  Name consists
++	 * of the dataset's length + 1 for the @-sign + snapshot name's length
++	 */
++	if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
++		return (ENAMETOOLONG);
++
++	err = dsl_dataset_snapshot_reserve_space(ds, tx);
++	if (err)
++		return (err);
++
++	ds->ds_trysnap_txg = tx->tx_txg;
++	return (0);
++}
++
++void
++dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	const char *snapname = arg2;
++	dsl_pool_t *dp = ds->ds_dir->dd_pool;
++	dmu_buf_t *dbuf;
++	dsl_dataset_phys_t *dsphys;
++	uint64_t dsobj, crtxg;
++	objset_t *mos = dp->dp_meta_objset;
++	int err;
++
++	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
++
++	/*
++	 * The origin's ds_creation_txg has to be < TXG_INITIAL
++	 */
++	if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
++		crtxg = 1;
++	else
++		crtxg = tx->tx_txg;
++
++	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
++	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
++	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
++	dmu_buf_will_dirty(dbuf, tx);
++	dsphys = dbuf->db_data;
++	bzero(dsphys, sizeof (dsl_dataset_phys_t));
++	dsphys->ds_dir_obj = ds->ds_dir->dd_object;
++	dsphys->ds_fsid_guid = unique_create();
++	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
++	    sizeof (dsphys->ds_guid));
++	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
++	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
++	dsphys->ds_next_snap_obj = ds->ds_object;
++	dsphys->ds_num_children = 1;
++	dsphys->ds_creation_time = gethrestime_sec();
++	dsphys->ds_creation_txg = crtxg;
++	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
++	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
++	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
++	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
++	dsphys->ds_flags = ds->ds_phys->ds_flags;
++	dsphys->ds_bp = ds->ds_phys->ds_bp;
++	dmu_buf_rele(dbuf, FTAG);
++
++	ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
++	if (ds->ds_prev) {
++		uint64_t next_clones_obj =
++		    ds->ds_prev->ds_phys->ds_next_clones_obj;
++		ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
++		    ds->ds_object ||
++		    ds->ds_prev->ds_phys->ds_num_children > 1);
++		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
++			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
++			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
++			    ds->ds_prev->ds_phys->ds_creation_txg);
++			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
++		} else if (next_clones_obj != 0) {
++			remove_from_next_clones(ds->ds_prev,
++			    dsphys->ds_next_snap_obj, tx);
++			VERIFY3U(0, ==, zap_add_int(mos,
++			    next_clones_obj, dsobj, tx));
++		}
++	}
++
++	/*
++	 * If we have a reference-reservation on this dataset, we will
++	 * need to increase the amount of refreservation being charged
++	 * since our unique space is going to zero.
++	 */
++	if (ds->ds_reserved) {
++		int64_t delta;
++		ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
++		delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
++		dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV,
++		    delta, 0, 0, tx);
++	}
++
++	dmu_buf_will_dirty(ds->ds_dbuf, tx);
++	zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu",
++	    ds->ds_dir->dd_myname, snapname, dsobj,
++	    ds->ds_phys->ds_prev_snap_txg);
++	ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist,
++	    UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx);
++	dsl_deadlist_close(&ds->ds_deadlist);
++	dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
++	dsl_deadlist_add_key(&ds->ds_deadlist,
++	    ds->ds_phys->ds_prev_snap_txg, tx);
++
++	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg);
++	ds->ds_phys->ds_prev_snap_obj = dsobj;
++	ds->ds_phys->ds_prev_snap_txg = crtxg;
++	ds->ds_phys->ds_unique_bytes = 0;
++	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
++		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
++
++	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
++	    snapname, 8, 1, &dsobj, tx);
++	ASSERT(err == 0);
++
++	if (ds->ds_prev)
++		dsl_dataset_drop_ref(ds->ds_prev, ds);
++	VERIFY(0 == dsl_dataset_get_ref(dp,
++	    ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
++
++	dsl_scan_ds_snapshotted(ds, tx);
++
++	dsl_dir_snap_cmtime_update(ds->ds_dir);
++
++	spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx,
++	    "dataset = %llu", dsobj);
++}
++
++void
++dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
++{
++	ASSERT(dmu_tx_is_syncing(tx));
++	ASSERT(ds->ds_objset != NULL);
++	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
++
++	/*
++	 * in case we had to change ds_fsid_guid when we opened it,
++	 * sync it out now.
++	 */
++	dmu_buf_will_dirty(ds->ds_dbuf, tx);
++	ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
++
++	dsl_dir_dirty(ds->ds_dir, tx);
++	dmu_objset_sync(ds->ds_objset, zio, tx);
++}
++
++static void
++get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
++{
++	uint64_t count = 0;
++	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
++	zap_cursor_t zc;
++	zap_attribute_t za;
++	nvlist_t *propval;
++	nvlist_t *val;
++
++	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
++	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++	VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++
++	/*
++	 * There may me missing entries in ds_next_clones_obj
++	 * due to a bug in a previous version of the code.
++	 * Only trust it if it has the right number of entries.
++	 */
++	if (ds->ds_phys->ds_next_clones_obj != 0) {
++		ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
++		    &count));
++	}
++	if (count != ds->ds_phys->ds_num_children - 1) {
++		goto fail;
++	}
++	for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
++	    zap_cursor_retrieve(&zc, &za) == 0;
++	    zap_cursor_advance(&zc)) {
++		dsl_dataset_t *clone;
++		char buf[ZFS_MAXNAMELEN];
++		if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
++		    za.za_first_integer, FTAG, &clone) != 0) {
++			goto fail;
++		}
++		dsl_dir_name(clone->ds_dir, buf);
++		VERIFY(nvlist_add_boolean(val, buf) == 0);
++		dsl_dataset_rele(clone, FTAG);
++	}
++	zap_cursor_fini(&zc);
++	VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0);
++	VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
++	    propval) == 0);
++fail:
++	nvlist_free(val);
++	nvlist_free(propval);
++	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
++}
++
++void
++dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
++{
++	uint64_t refd, avail, uobjs, aobjs, ratio;
++
++	dsl_dir_stats(ds->ds_dir, nv);
++
++	dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
++
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
++	    ds->ds_phys->ds_creation_time);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
++	    ds->ds_phys->ds_creation_txg);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
++	    ds->ds_quota);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
++	    ds->ds_reserved);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
++	    ds->ds_phys->ds_guid);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE,
++	    ds->ds_phys->ds_unique_bytes);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID,
++	    ds->ds_object);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS,
++	    ds->ds_userrefs);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
++	    DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
++
++	if (ds->ds_phys->ds_prev_snap_obj != 0) {
++		uint64_t written, comp, uncomp;
++		dsl_pool_t *dp = ds->ds_dir->dd_pool;
++		dsl_dataset_t *prev;
++		int err;
++
++		rw_enter(&dp->dp_config_rwlock, RW_READER);
++		err = dsl_dataset_hold_obj(dp,
++		    ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
++		rw_exit(&dp->dp_config_rwlock);
++		if (err == 0) {
++			err = dsl_dataset_space_written(prev, ds, &written,
++			    &comp, &uncomp);
++			dsl_dataset_rele(prev, FTAG);
++			if (err == 0) {
++				dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
++				    written);
++			}
++		}
++	}
++
++	ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
++	    (ds->ds_phys->ds_uncompressed_bytes * 100 /
++	    ds->ds_phys->ds_compressed_bytes);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
++
++	if (ds->ds_phys->ds_next_snap_obj) {
++		/*
++		 * This is a snapshot; override the dd's space used with
++		 * our unique space and compression ratio.
++		 */
++		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
++		    ds->ds_phys->ds_unique_bytes);
++		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
++
++		get_clones_stat(ds, nv);
++	}
++}
++
++void
++dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
++{
++	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
++	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
++	stat->dds_guid = ds->ds_phys->ds_guid;
++	if (ds->ds_phys->ds_next_snap_obj) {
++		stat->dds_is_snapshot = B_TRUE;
++		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
++	} else {
++		stat->dds_is_snapshot = B_FALSE;
++		stat->dds_num_clones = 0;
++	}
++
++	/* clone origin is really a dsl_dir thing... */
++	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
++	if (dsl_dir_is_clone(ds->ds_dir)) {
++		dsl_dataset_t *ods;
++
++		VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
++		    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
++		dsl_dataset_name(ods, stat->dds_origin);
++		dsl_dataset_drop_ref(ods, FTAG);
++	} else {
++		stat->dds_origin[0] = '\0';
++	}
++	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
++}
++
++uint64_t
++dsl_dataset_fsid_guid(dsl_dataset_t *ds)
++{
++	return (ds->ds_fsid_guid);
++}
++
++void
++dsl_dataset_space(dsl_dataset_t *ds,
++    uint64_t *refdbytesp, uint64_t *availbytesp,
++    uint64_t *usedobjsp, uint64_t *availobjsp)
++{
++	*refdbytesp = ds->ds_phys->ds_used_bytes;
++	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
++	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
++		*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
++	if (ds->ds_quota != 0) {
++		/*
++		 * Adjust available bytes according to refquota
++		 */
++		if (*refdbytesp < ds->ds_quota)
++			*availbytesp = MIN(*availbytesp,
++			    ds->ds_quota - *refdbytesp);
++		else
++			*availbytesp = 0;
++	}
++	*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
++	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
++}
++
++boolean_t
++dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
++{
++	ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool);
++
++	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
++	    dsl_pool_sync_context(dp));
++	if (ds->ds_prev == NULL)
++		return (B_FALSE);
++	if (ds->ds_phys->ds_bp.blk_birth >
++	    ds->ds_prev->ds_phys->ds_creation_txg) {
++		objset_t *os, *os_prev;
++		/*
++		 * It may be that only the ZIL differs, because it was
++		 * reset in the head.  Don't count that as being
++		 * modified.
++		 */
++		if (dmu_objset_from_ds(ds, &os) != 0)
++			return (B_TRUE);
++		if (dmu_objset_from_ds(ds->ds_prev, &os_prev) != 0)
++			return (B_TRUE);
++		return (bcmp(&os->os_phys->os_meta_dnode,
++		    &os_prev->os_phys->os_meta_dnode,
++		    sizeof (os->os_phys->os_meta_dnode)) != 0);
++	}
++	return (B_FALSE);
++}
++
++/* ARGSUSED */
++static int
++dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	char *newsnapname = arg2;
++	dsl_dir_t *dd = ds->ds_dir;
++	dsl_dataset_t *hds;
++	uint64_t val;
++	int err;
++
++	err = dsl_dataset_hold_obj(dd->dd_pool,
++	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds);
++	if (err)
++		return (err);
++
++	/* new name better not be in use */
++	err = dsl_dataset_snap_lookup(hds, newsnapname, &val);
++	dsl_dataset_rele(hds, FTAG);
++
++	if (err == 0)
++		err = EEXIST;
++	else if (err == ENOENT)
++		err = 0;
++
++	/* dataset name + 1 for the "@" + the new snapshot name must fit */
++	if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
++		err = ENAMETOOLONG;
++
++	return (err);
++}
++
++static void
++dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	const char *newsnapname = arg2;
++	dsl_dir_t *dd = ds->ds_dir;
++	objset_t *mos = dd->dd_pool->dp_meta_objset;
++	dsl_dataset_t *hds;
++	int err;
++
++	ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
++
++	VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
++	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
++
++	VERIFY(0 == dsl_dataset_get_snapname(ds));
++	err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
++	ASSERT3U(err, ==, 0);
++	mutex_enter(&ds->ds_lock);
++	(void) strcpy(ds->ds_snapname, newsnapname);
++	mutex_exit(&ds->ds_lock);
++	err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
++	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
++	ASSERT3U(err, ==, 0);
++
++	spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
++	    "dataset = %llu", ds->ds_object);
++	dsl_dataset_rele(hds, FTAG);
++}
++
++struct renamesnaparg {
++	dsl_sync_task_group_t *dstg;
++	char failed[MAXPATHLEN];
++	char *oldsnap;
++	char *newsnap;
++};
++
++static int
++dsl_snapshot_rename_one(const char *name, void *arg)
++{
++	struct renamesnaparg *ra = arg;
++	dsl_dataset_t *ds = NULL;
++	char *snapname;
++	int err;
++
++	snapname = kmem_asprintf("%s@%s", name, ra->oldsnap);
++	(void) strlcpy(ra->failed, snapname, sizeof (ra->failed));
++
++	/*
++	 * For recursive snapshot renames the parent won't be changing
++	 * so we just pass name for both the to/from argument.
++	 */
++	err = zfs_secpolicy_rename_perms(snapname, snapname, CRED());
++	if (err != 0) {
++		strfree(snapname);
++		return (err == ENOENT ? 0 : err);
++	}
++
++#ifdef _KERNEL
++	/*
++	 * For all filesystems undergoing rename, we'll need to unmount it.
++	 */
++	(void) zfs_unmount_snap(snapname, NULL);
++#endif
++	err = dsl_dataset_hold(snapname, ra->dstg, &ds);
++	strfree(snapname);
++	if (err != 0)
++		return (err == ENOENT ? 0 : err);
++
++	dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
++	    dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
++
++	return (0);
++}
++
++static int
++dsl_recursive_rename(char *oldname, const char *newname)
++{
++	int err;
++	struct renamesnaparg *ra;
++	dsl_sync_task_t *dst;
++	spa_t *spa;
++	char *cp, *fsname = spa_strdup(oldname);
++	int len = strlen(oldname) + 1;
++
++	/* truncate the snapshot name to get the fsname */
++	cp = strchr(fsname, '@');
++	*cp = '\0';
++
++	err = spa_open(fsname, &spa, FTAG);
++	if (err) {
++		kmem_free(fsname, len);
++		return (err);
++	}
++	ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP);
++	ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
++
++	ra->oldsnap = strchr(oldname, '@') + 1;
++	ra->newsnap = strchr(newname, '@') + 1;
++	*ra->failed = '\0';
++
++	err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
++	    DS_FIND_CHILDREN);
++	kmem_free(fsname, len);
++
++	if (err == 0) {
++		err = dsl_sync_task_group_wait(ra->dstg);
++	}
++
++	for (dst = list_head(&ra->dstg->dstg_tasks); dst;
++	    dst = list_next(&ra->dstg->dstg_tasks, dst)) {
++		dsl_dataset_t *ds = dst->dst_arg1;
++		if (dst->dst_err) {
++			dsl_dir_name(ds->ds_dir, ra->failed);
++			(void) strlcat(ra->failed, "@", sizeof (ra->failed));
++			(void) strlcat(ra->failed, ra->newsnap,
++			    sizeof (ra->failed));
++		}
++		dsl_dataset_rele(ds, ra->dstg);
++	}
++
++	if (err)
++		(void) strlcpy(oldname, ra->failed, sizeof (ra->failed));
++
++	dsl_sync_task_group_destroy(ra->dstg);
++	kmem_free(ra, sizeof (struct renamesnaparg));
++	spa_close(spa, FTAG);
++	return (err);
++}
++
++static int
++dsl_valid_rename(const char *oldname, void *arg)
++{
++	int delta = *(int *)arg;
++
++	if (strlen(oldname) + delta >= MAXNAMELEN)
++		return (ENAMETOOLONG);
++
++	return (0);
++}
++
++#pragma weak dmu_objset_rename = dsl_dataset_rename
++int
++dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive)
++{
++	dsl_dir_t *dd;
++	dsl_dataset_t *ds;
++	const char *tail;
++	int err;
++
++	err = dsl_dir_open(oldname, FTAG, &dd, &tail);
++	if (err)
++		return (err);
++
++	if (tail == NULL) {
++		int delta = strlen(newname) - strlen(oldname);
++
++		/* if we're growing, validate child name lengths */
++		if (delta > 0)
++			err = dmu_objset_find(oldname, dsl_valid_rename,
++			    &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
++
++		if (err == 0)
++			err = dsl_dir_rename(dd, newname);
++		dsl_dir_close(dd, FTAG);
++		return (err);
++	}
++
++	if (tail[0] != '@') {
++		/* the name ended in a nonexistent component */
++		dsl_dir_close(dd, FTAG);
++		return (ENOENT);
++	}
++
++	dsl_dir_close(dd, FTAG);
++
++	/* new name must be snapshot in same filesystem */
++	tail = strchr(newname, '@');
++	if (tail == NULL)
++		return (EINVAL);
++	tail++;
++	if (strncmp(oldname, newname, tail - newname) != 0)
++		return (EXDEV);
++
++	if (recursive) {
++		err = dsl_recursive_rename(oldname, newname);
++	} else {
++		err = dsl_dataset_hold(oldname, FTAG, &ds);
++		if (err)
++			return (err);
++
++		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
++		    dsl_dataset_snapshot_rename_check,
++		    dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
++
++		dsl_dataset_rele(ds, FTAG);
++	}
++
++	return (err);
++}
++
++struct promotenode {
++	list_node_t link;
++	dsl_dataset_t *ds;
++};
++
++struct promotearg {
++	list_t shared_snaps, origin_snaps, clone_snaps;
++	dsl_dataset_t *origin_origin;
++	uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
++	char *err_ds;
++};
++
++static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
++
++static int
++dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *hds = arg1;
++	struct promotearg *pa = arg2;
++	struct promotenode *snap = list_head(&pa->shared_snaps);
++	dsl_dataset_t *origin_ds = snap->ds;
++	int err;
++	uint64_t unused;
++
++	/* Check that it is a real clone */
++	if (!dsl_dir_is_clone(hds->ds_dir))
++		return (EINVAL);
++
++	/* Since this is so expensive, don't do the preliminary check */
++	if (!dmu_tx_is_syncing(tx))
++		return (0);
++
++	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)
++		return (EXDEV);
++
++	/* compute origin's new unique space */
++	snap = list_tail(&pa->clone_snaps);
++	ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
++	dsl_deadlist_space_range(&snap->ds->ds_deadlist,
++	    origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
++	    &pa->unique, &unused, &unused);
++
++	/*
++	 * Walk the snapshots that we are moving
++	 *
++	 * Compute space to transfer.  Consider the incremental changes
++	 * to used for each snapshot:
++	 * (my used) = (prev's used) + (blocks born) - (blocks killed)
++	 * So each snapshot gave birth to:
++	 * (blocks born) = (my used) - (prev's used) + (blocks killed)
++	 * So a sequence would look like:
++	 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
++	 * Which simplifies to:
++	 * uN + kN + kN-1 + ... + k1 + k0
++	 * Note however, if we stop before we reach the ORIGIN we get:
++	 * uN + kN + kN-1 + ... + kM - uM-1
++	 */
++	pa->used = origin_ds->ds_phys->ds_used_bytes;
++	pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
++	pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
++	for (snap = list_head(&pa->shared_snaps); snap;
++	    snap = list_next(&pa->shared_snaps, snap)) {
++		uint64_t val, dlused, dlcomp, dluncomp;
++		dsl_dataset_t *ds = snap->ds;
++
++		/* Check that the snapshot name does not conflict */
++		VERIFY(0 == dsl_dataset_get_snapname(ds));
++		err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
++		if (err == 0) {
++			err = EEXIST;
++			goto out;
++		}
++		if (err != ENOENT)
++			goto out;
++
++		/* The very first snapshot does not have a deadlist */
++		if (ds->ds_phys->ds_prev_snap_obj == 0)
++			continue;
++
++		dsl_deadlist_space(&ds->ds_deadlist,
++		    &dlused, &dlcomp, &dluncomp);
++		pa->used += dlused;
++		pa->comp += dlcomp;
++		pa->uncomp += dluncomp;
++	}
++
++	/*
++	 * If we are a clone of a clone then we never reached ORIGIN,
++	 * so we need to subtract out the clone origin's used space.
++	 */
++	if (pa->origin_origin) {
++		pa->used -= pa->origin_origin->ds_phys->ds_used_bytes;
++		pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
++		pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
++	}
++
++	/* Check that there is enough space here */
++	err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
++	    pa->used);
++	if (err)
++		return (err);
++
++	/*
++	 * Compute the amounts of space that will be used by snapshots
++	 * after the promotion (for both origin and clone).  For each,
++	 * it is the amount of space that will be on all of their
++	 * deadlists (that was not born before their new origin).
++	 */
++	if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
++		uint64_t space;
++
++		/*
++		 * Note, typically this will not be a clone of a clone,
++		 * so dd_origin_txg will be < TXG_INITIAL, so
++		 * these snaplist_space() -> dsl_deadlist_space_range()
++		 * calls will be fast because they do not have to
++		 * iterate over all bps.
++		 */
++		snap = list_head(&pa->origin_snaps);
++		err = snaplist_space(&pa->shared_snaps,
++		    snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap);
++		if (err)
++			return (err);
++
++		err = snaplist_space(&pa->clone_snaps,
++		    snap->ds->ds_dir->dd_origin_txg, &space);
++		if (err)
++			return (err);
++		pa->cloneusedsnap += space;
++	}
++	if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
++		err = snaplist_space(&pa->origin_snaps,
++		    origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap);
++		if (err)
++			return (err);
++	}
++
++	return (0);
++out:
++	pa->err_ds =  snap->ds->ds_snapname;
++	return (err);
++}
++
++static void
++dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *hds = arg1;
++	struct promotearg *pa = arg2;
++	struct promotenode *snap = list_head(&pa->shared_snaps);
++	dsl_dataset_t *origin_ds = snap->ds;
++	dsl_dataset_t *origin_head;
++	dsl_dir_t *dd = hds->ds_dir;
++	dsl_pool_t *dp = hds->ds_dir->dd_pool;
++	dsl_dir_t *odd = NULL;
++	uint64_t oldnext_obj;
++	int64_t delta;
++
++	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
++
++	snap = list_head(&pa->origin_snaps);
++	origin_head = snap->ds;
++
++	/*
++	 * We need to explicitly open odd, since origin_ds's dd will be
++	 * changing.
++	 */
++	VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
++	    NULL, FTAG, &odd));
++
++	/* change origin's next snap */
++	dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
++	oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj;
++	snap = list_tail(&pa->clone_snaps);
++	ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
++	origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object;
++
++	/* change the origin's next clone */
++	if (origin_ds->ds_phys->ds_next_clones_obj) {
++		remove_from_next_clones(origin_ds, snap->ds->ds_object, tx);
++		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
++		    origin_ds->ds_phys->ds_next_clones_obj,
++		    oldnext_obj, tx));
++	}
++
++	/* change origin */
++	dmu_buf_will_dirty(dd->dd_dbuf, tx);
++	ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
++	dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
++	dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg;
++	dmu_buf_will_dirty(odd->dd_dbuf, tx);
++	odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
++	origin_head->ds_dir->dd_origin_txg =
++	    origin_ds->ds_phys->ds_creation_txg;
++
++	/* change dd_clone entries */
++	if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
++		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
++		    odd->dd_phys->dd_clones, hds->ds_object, tx));
++		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
++		    pa->origin_origin->ds_dir->dd_phys->dd_clones,
++		    hds->ds_object, tx));
++
++		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
++		    pa->origin_origin->ds_dir->dd_phys->dd_clones,
++		    origin_head->ds_object, tx));
++		if (dd->dd_phys->dd_clones == 0) {
++			dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset,
++			    DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
++		}
++		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
++		    dd->dd_phys->dd_clones, origin_head->ds_object, tx));
++
++	}
++
++	/* move snapshots to this dir */
++	for (snap = list_head(&pa->shared_snaps); snap;
++	    snap = list_next(&pa->shared_snaps, snap)) {
++		dsl_dataset_t *ds = snap->ds;
++
++		/* unregister props as dsl_dir is changing */
++		if (ds->ds_objset) {
++			dmu_objset_evict(ds->ds_objset);
++			ds->ds_objset = NULL;
++		}
++		/* move snap name entry */
++		VERIFY(0 == dsl_dataset_get_snapname(ds));
++		VERIFY(0 == dsl_dataset_snap_remove(origin_head,
++		    ds->ds_snapname, tx));
++		VERIFY(0 == zap_add(dp->dp_meta_objset,
++		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
++		    8, 1, &ds->ds_object, tx));
++
++		/* change containing dsl_dir */
++		dmu_buf_will_dirty(ds->ds_dbuf, tx);
++		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
++		ds->ds_phys->ds_dir_obj = dd->dd_object;
++		ASSERT3P(ds->ds_dir, ==, odd);
++		dsl_dir_close(ds->ds_dir, ds);
++		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
++		    NULL, ds, &ds->ds_dir));
++
++		/* move any clone references */
++		if (ds->ds_phys->ds_next_clones_obj &&
++		    spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
++			zap_cursor_t zc;
++			zap_attribute_t za;
++
++			for (zap_cursor_init(&zc, dp->dp_meta_objset,
++			    ds->ds_phys->ds_next_clones_obj);
++			    zap_cursor_retrieve(&zc, &za) == 0;
++			    zap_cursor_advance(&zc)) {
++				dsl_dataset_t *cnds;
++				uint64_t o;
++
++				if (za.za_first_integer == oldnext_obj) {
++					/*
++					 * We've already moved the
++					 * origin's reference.
++					 */
++					continue;
++				}
++
++				VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
++				    za.za_first_integer, FTAG, &cnds));
++				o = cnds->ds_dir->dd_phys->dd_head_dataset_obj;
++
++				VERIFY3U(zap_remove_int(dp->dp_meta_objset,
++				    odd->dd_phys->dd_clones, o, tx), ==, 0);
++				VERIFY3U(zap_add_int(dp->dp_meta_objset,
++				    dd->dd_phys->dd_clones, o, tx), ==, 0);
++				dsl_dataset_rele(cnds, FTAG);
++			}
++			zap_cursor_fini(&zc);
++		}
++
++		ASSERT3U(dsl_prop_numcb(ds), ==, 0);
++	}
++
++	/*
++	 * Change space accounting.
++	 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either
++	 * both be valid, or both be 0 (resulting in delta == 0).  This
++	 * is true for each of {clone,origin} independently.
++	 */
++
++	delta = pa->cloneusedsnap -
++	    dd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
++	ASSERT3S(delta, >=, 0);
++	ASSERT3U(pa->used, >=, delta);
++	dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx);
++	dsl_dir_diduse_space(dd, DD_USED_HEAD,
++	    pa->used - delta, pa->comp, pa->uncomp, tx);
++
++	delta = pa->originusedsnap -
++	    odd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
++	ASSERT3S(delta, <=, 0);
++	ASSERT3U(pa->used, >=, -delta);
++	dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx);
++	dsl_dir_diduse_space(odd, DD_USED_HEAD,
++	    -pa->used - delta, -pa->comp, -pa->uncomp, tx);
++
++	origin_ds->ds_phys->ds_unique_bytes = pa->unique;
++
++	/* log history record */
++	spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
++	    "dataset = %llu", hds->ds_object);
++
++	dsl_dir_close(odd, FTAG);
++}
++
++static char *snaplist_tag = "snaplist";
++/*
++ * Make a list of dsl_dataset_t's for the snapshots between first_obj
++ * (exclusive) and last_obj (inclusive).  The list will be in reverse
++ * order (last_obj will be the list_head()).  If first_obj == 0, do all
++ * snapshots back to this dataset's origin.
++ */
++static int
++snaplist_make(dsl_pool_t *dp, boolean_t own,
++    uint64_t first_obj, uint64_t last_obj, list_t *l)
++{
++	uint64_t obj = last_obj;
++
++	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock));
++
++	list_create(l, sizeof (struct promotenode),
++	    offsetof(struct promotenode, link));
++
++	while (obj != first_obj) {
++		dsl_dataset_t *ds;
++		struct promotenode *snap;
++		int err;
++
++		if (own) {
++			err = dsl_dataset_own_obj(dp, obj,
++			    0, snaplist_tag, &ds);
++			if (err == 0)
++				dsl_dataset_make_exclusive(ds, snaplist_tag);
++		} else {
++			err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds);
++		}
++		if (err == ENOENT) {
++			/* lost race with snapshot destroy */
++			struct promotenode *last = list_tail(l);
++			ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj);
++			obj = last->ds->ds_phys->ds_prev_snap_obj;
++			continue;
++		} else if (err) {
++			return (err);
++		}
++
++		if (first_obj == 0)
++			first_obj = ds->ds_dir->dd_phys->dd_origin_obj;
++
++		snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP);
++		snap->ds = ds;
++		list_insert_tail(l, snap);
++		obj = ds->ds_phys->ds_prev_snap_obj;
++	}
++
++	return (0);
++}
++
++static int
++snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep)
++{
++	struct promotenode *snap;
++
++	*spacep = 0;
++	for (snap = list_head(l); snap; snap = list_next(l, snap)) {
++		uint64_t used, comp, uncomp;
++		dsl_deadlist_space_range(&snap->ds->ds_deadlist,
++		    mintxg, UINT64_MAX, &used, &comp, &uncomp);
++		*spacep += used;
++	}
++	return (0);
++}
++
++static void
++snaplist_destroy(list_t *l, boolean_t own)
++{
++	struct promotenode *snap;
++
++	if (!l || !list_link_active(&l->list_head))
++		return;
++
++	while ((snap = list_tail(l)) != NULL) {
++		list_remove(l, snap);
++		if (own)
++			dsl_dataset_disown(snap->ds, snaplist_tag);
++		else
++			dsl_dataset_rele(snap->ds, snaplist_tag);
++		kmem_free(snap, sizeof (struct promotenode));
++	}
++	list_destroy(l);
++}
++
++/*
++ * Promote a clone.  Nomenclature note:
++ * "clone" or "cds": the original clone which is being promoted
++ * "origin" or "ods": the snapshot which is originally clone's origin
++ * "origin head" or "ohds": the dataset which is the head
++ * (filesystem/volume) for the origin
++ * "origin origin": the origin of the origin's filesystem (typically
++ * NULL, indicating that the clone is not a clone of a clone).
++ */
++int
++dsl_dataset_promote(const char *name, char *conflsnap)
++{
++	dsl_dataset_t *ds;
++	dsl_dir_t *dd;
++	dsl_pool_t *dp;
++	dmu_object_info_t doi;
++	struct promotearg pa;
++	struct promotenode *snap;
++	int err;
++
++	bzero(&pa, sizeof(struct promotearg));
++	err = dsl_dataset_hold(name, FTAG, &ds);
++	if (err)
++		return (err);
++	dd = ds->ds_dir;
++	dp = dd->dd_pool;
++
++	err = dmu_object_info(dp->dp_meta_objset,
++	    ds->ds_phys->ds_snapnames_zapobj, &doi);
++	if (err) {
++		dsl_dataset_rele(ds, FTAG);
++		return (err);
++	}
++
++	if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) {
++		dsl_dataset_rele(ds, FTAG);
++		return (EINVAL);
++	}
++
++	/*
++	 * We are going to inherit all the snapshots taken before our
++	 * origin (i.e., our new origin will be our parent's origin).
++	 * Take ownership of them so that we can rename them into our
++	 * namespace.
++	 */
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++
++	err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj,
++	    &pa.shared_snaps);
++	if (err != 0)
++		goto out;
++
++	err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps);
++	if (err != 0)
++		goto out;
++
++	snap = list_head(&pa.shared_snaps);
++	ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
++	err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj,
++	    snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps);
++	if (err != 0)
++		goto out;
++
++	if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) {
++		err = dsl_dataset_hold_obj(dp,
++		    snap->ds->ds_dir->dd_phys->dd_origin_obj,
++		    FTAG, &pa.origin_origin);
++		if (err != 0)
++			goto out;
++	}
++
++out:
++	rw_exit(&dp->dp_config_rwlock);
++
++	/*
++	 * Add in 128x the snapnames zapobj size, since we will be moving
++	 * a bunch of snapnames to the promoted ds, and dirtying their
++	 * bonus buffers.
++	 */
++	if (err == 0) {
++		err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
++		    dsl_dataset_promote_sync, ds, &pa,
++		    2 + 2 * doi.doi_physical_blocks_512);
++		if (err && pa.err_ds && conflsnap)
++			(void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN);
++	}
++
++	snaplist_destroy(&pa.shared_snaps, B_TRUE);
++	snaplist_destroy(&pa.clone_snaps, B_FALSE);
++	snaplist_destroy(&pa.origin_snaps, B_FALSE);
++	if (pa.origin_origin)
++		dsl_dataset_rele(pa.origin_origin, FTAG);
++	dsl_dataset_rele(ds, FTAG);
++	return (err);
++}
++
++struct cloneswaparg {
++	dsl_dataset_t *cds; /* clone dataset */
++	dsl_dataset_t *ohds; /* origin's head dataset */
++	boolean_t force;
++	int64_t unused_refres_delta; /* change in unconsumed refreservation */
++};
++
++/* ARGSUSED */
++static int
++dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	struct cloneswaparg *csa = arg1;
++
++	/* they should both be heads */
++	if (dsl_dataset_is_snapshot(csa->cds) ||
++	    dsl_dataset_is_snapshot(csa->ohds))
++		return (EINVAL);
++
++	/* the branch point should be just before them */
++	if (csa->cds->ds_prev != csa->ohds->ds_prev)
++		return (EINVAL);
++
++	/* cds should be the clone (unless they are unrelated) */
++	if (csa->cds->ds_prev != NULL &&
++	    csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap &&
++	    csa->ohds->ds_object !=
++	    csa->cds->ds_prev->ds_phys->ds_next_snap_obj)
++		return (EINVAL);
++
++	/* the clone should be a child of the origin */
++	if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir)
++		return (EINVAL);
++
++	/* ohds shouldn't be modified unless 'force' */
++	if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds))
++		return (ETXTBSY);
++
++	/* adjust amount of any unconsumed refreservation */
++	csa->unused_refres_delta =
++	    (int64_t)MIN(csa->ohds->ds_reserved,
++	    csa->ohds->ds_phys->ds_unique_bytes) -
++	    (int64_t)MIN(csa->ohds->ds_reserved,
++	    csa->cds->ds_phys->ds_unique_bytes);
++
++	if (csa->unused_refres_delta > 0 &&
++	    csa->unused_refres_delta >
++	    dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE))
++		return (ENOSPC);
++
++	if (csa->ohds->ds_quota != 0 &&
++	    csa->cds->ds_phys->ds_unique_bytes > csa->ohds->ds_quota)
++		return (EDQUOT);
++
++	return (0);
++}
++
++/* ARGSUSED */
++static void
++dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	struct cloneswaparg *csa = arg1;
++	dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
++
++	ASSERT(csa->cds->ds_reserved == 0);
++	ASSERT(csa->ohds->ds_quota == 0 ||
++	    csa->cds->ds_phys->ds_unique_bytes <= csa->ohds->ds_quota);
++
++	dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
++	dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
++
++	if (csa->cds->ds_objset != NULL) {
++		dmu_objset_evict(csa->cds->ds_objset);
++		csa->cds->ds_objset = NULL;
++	}
++
++	if (csa->ohds->ds_objset != NULL) {
++		dmu_objset_evict(csa->ohds->ds_objset);
++		csa->ohds->ds_objset = NULL;
++	}
++
++	/*
++	 * Reset origin's unique bytes, if it exists.
++	 */
++	if (csa->cds->ds_prev) {
++		dsl_dataset_t *origin = csa->cds->ds_prev;
++		uint64_t comp, uncomp;
++
++		dmu_buf_will_dirty(origin->ds_dbuf, tx);
++		dsl_deadlist_space_range(&csa->cds->ds_deadlist,
++		    origin->ds_phys->ds_prev_snap_txg, UINT64_MAX,
++		    &origin->ds_phys->ds_unique_bytes, &comp, &uncomp);
++	}
++
++	/* swap blkptrs */
++	{
++		blkptr_t tmp;
++		tmp = csa->ohds->ds_phys->ds_bp;
++		csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
++		csa->cds->ds_phys->ds_bp = tmp;
++	}
++
++	/* set dd_*_bytes */
++	{
++		int64_t dused, dcomp, duncomp;
++		uint64_t cdl_used, cdl_comp, cdl_uncomp;
++		uint64_t odl_used, odl_comp, odl_uncomp;
++
++		ASSERT3U(csa->cds->ds_dir->dd_phys->
++		    dd_used_breakdown[DD_USED_SNAP], ==, 0);
++
++		dsl_deadlist_space(&csa->cds->ds_deadlist,
++		    &cdl_used, &cdl_comp, &cdl_uncomp);
++		dsl_deadlist_space(&csa->ohds->ds_deadlist,
++		    &odl_used, &odl_comp, &odl_uncomp);
++
++		dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
++		    (csa->ohds->ds_phys->ds_used_bytes + odl_used);
++		dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
++		    (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
++		duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
++		    cdl_uncomp -
++		    (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
++
++		dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD,
++		    dused, dcomp, duncomp, tx);
++		dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD,
++		    -dused, -dcomp, -duncomp, tx);
++
++		/*
++		 * The difference in the space used by snapshots is the
++		 * difference in snapshot space due to the head's
++		 * deadlist (since that's the only thing that's
++		 * changing that affects the snapused).
++		 */
++		dsl_deadlist_space_range(&csa->cds->ds_deadlist,
++		    csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
++		    &cdl_used, &cdl_comp, &cdl_uncomp);
++		dsl_deadlist_space_range(&csa->ohds->ds_deadlist,
++		    csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
++		    &odl_used, &odl_comp, &odl_uncomp);
++		dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
++		    DD_USED_HEAD, DD_USED_SNAP, tx);
++	}
++
++	/* swap ds_*_bytes */
++	SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
++	    csa->cds->ds_phys->ds_used_bytes);
++	SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
++	    csa->cds->ds_phys->ds_compressed_bytes);
++	SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
++	    csa->cds->ds_phys->ds_uncompressed_bytes);
++	SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
++	    csa->cds->ds_phys->ds_unique_bytes);
++
++	/* apply any parent delta for change in unconsumed refreservation */
++	dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV,
++	    csa->unused_refres_delta, 0, 0, tx);
++
++	/*
++	 * Swap deadlists.
++	 */
++	dsl_deadlist_close(&csa->cds->ds_deadlist);
++	dsl_deadlist_close(&csa->ohds->ds_deadlist);
++	SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
++	    csa->cds->ds_phys->ds_deadlist_obj);
++	dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
++	    csa->cds->ds_phys->ds_deadlist_obj);
++	dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
++	    csa->ohds->ds_phys->ds_deadlist_obj);
++
++	dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx);
++}
++
++/*
++ * Swap 'clone' with its origin head datasets.  Used at the end of "zfs
++ * recv" into an existing fs to swizzle the file system to the new
++ * version, and by "zfs rollback".  Can also be used to swap two
++ * independent head datasets if neither has any snapshots.
++ */
++int
++dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
++    boolean_t force)
++{
++	struct cloneswaparg csa;
++	int error;
++
++	ASSERT(clone->ds_owner);
++	ASSERT(origin_head->ds_owner);
++retry:
++	/*
++	 * Need exclusive access for the swap. If we're swapping these
++	 * datasets back after an error, we already hold the locks.
++	 */
++	if (!RW_WRITE_HELD(&clone->ds_rwlock))
++		rw_enter(&clone->ds_rwlock, RW_WRITER);
++	if (!RW_WRITE_HELD(&origin_head->ds_rwlock) &&
++	    !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
++		rw_exit(&clone->ds_rwlock);
++		rw_enter(&origin_head->ds_rwlock, RW_WRITER);
++		if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) {
++			rw_exit(&origin_head->ds_rwlock);
++			goto retry;
++		}
++	}
++	csa.cds = clone;
++	csa.ohds = origin_head;
++	csa.force = force;
++	error = dsl_sync_task_do(clone->ds_dir->dd_pool,
++	    dsl_dataset_clone_swap_check,
++	    dsl_dataset_clone_swap_sync, &csa, NULL, 9);
++	return (error);
++}
++
++/*
++ * Given a pool name and a dataset object number in that pool,
++ * return the name of that dataset.
++ */
++int
++dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
++{
++	spa_t *spa;
++	dsl_pool_t *dp;
++	dsl_dataset_t *ds;
++	int error;
++
++	if ((error = spa_open(pname, &spa, FTAG)) != 0)
++		return (error);
++	dp = spa_get_dsl(spa);
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++	if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) {
++		dsl_dataset_name(ds, buf);
++		dsl_dataset_rele(ds, FTAG);
++	}
++	rw_exit(&dp->dp_config_rwlock);
++	spa_close(spa, FTAG);
++
++	return (error);
++}
++
++int
++dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
++    uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv)
++{
++	int error = 0;
++
++	ASSERT3S(asize, >, 0);
++
++	/*
++	 * *ref_rsrv is the portion of asize that will come from any
++	 * unconsumed refreservation space.
++	 */
++	*ref_rsrv = 0;
++
++	mutex_enter(&ds->ds_lock);
++	/*
++	 * Make a space adjustment for reserved bytes.
++	 */
++	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) {
++		ASSERT3U(*used, >=,
++		    ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
++		*used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
++		*ref_rsrv =
++		    asize - MIN(asize, parent_delta(ds, asize + inflight));
++	}
++
++	if (!check_quota || ds->ds_quota == 0) {
++		mutex_exit(&ds->ds_lock);
++		return (0);
++	}
++	/*
++	 * If they are requesting more space, and our current estimate
++	 * is over quota, they get to try again unless the actual
++	 * on-disk is over quota and there are no pending changes (which
++	 * may free up space for us).
++	 */
++	if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
++		if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
++			error = ERESTART;
++		else
++			error = EDQUOT;
++
++		DMU_TX_STAT_BUMP(dmu_tx_quota);
++	}
++	mutex_exit(&ds->ds_lock);
++
++	return (error);
++}
++
++/* ARGSUSED */
++static int
++dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	dsl_prop_setarg_t *psa = arg2;
++	int err;
++
++	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
++		return (ENOTSUP);
++
++	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
++		return (err);
++
++	if (psa->psa_effective_value == 0)
++		return (0);
++
++	if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes ||
++	    psa->psa_effective_value < ds->ds_reserved)
++		return (ENOSPC);
++
++	return (0);
++}
++
++extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *);
++
++void
++dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	dsl_prop_setarg_t *psa = arg2;
++	uint64_t effective_value = psa->psa_effective_value;
++
++	dsl_prop_set_sync(ds, psa, tx);
++	DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
++
++	if (ds->ds_quota != effective_value) {
++		dmu_buf_will_dirty(ds->ds_dbuf, tx);
++		ds->ds_quota = effective_value;
++	}
++}
++
++int
++dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota)
++{
++	dsl_dataset_t *ds;
++	dsl_prop_setarg_t psa;
++	int err;
++
++	dsl_prop_setarg_init_uint64(&psa, "refquota", source, &quota);
++
++	err = dsl_dataset_hold(dsname, FTAG, &ds);
++	if (err)
++		return (err);
++
++	/*
++	 * If someone removes a file, then tries to set the quota, we
++	 * want to make sure the file freeing takes effect.
++	 */
++	txg_wait_open(ds->ds_dir->dd_pool, 0);
++
++	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
++	    dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync,
++	    ds, &psa, 0);
++
++	dsl_dataset_rele(ds, FTAG);
++	return (err);
++}
++
++static int
++dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	dsl_prop_setarg_t *psa = arg2;
++	uint64_t effective_value;
++	uint64_t unique;
++	int err;
++
++	if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
++	    SPA_VERSION_REFRESERVATION)
++		return (ENOTSUP);
++
++	if (dsl_dataset_is_snapshot(ds))
++		return (EINVAL);
++
++	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
++		return (err);
++
++	effective_value = psa->psa_effective_value;
++
++	/*
++	 * If we are doing the preliminary check in open context, the
++	 * space estimates may be inaccurate.
++	 */
++	if (!dmu_tx_is_syncing(tx))
++		return (0);
++
++	mutex_enter(&ds->ds_lock);
++	if (!DS_UNIQUE_IS_ACCURATE(ds))
++		dsl_dataset_recalc_head_uniq(ds);
++	unique = ds->ds_phys->ds_unique_bytes;
++	mutex_exit(&ds->ds_lock);
++
++	if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) {
++		uint64_t delta = MAX(unique, effective_value) -
++		    MAX(unique, ds->ds_reserved);
++
++		if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
++			return (ENOSPC);
++		if (ds->ds_quota > 0 &&
++		    effective_value > ds->ds_quota)
++			return (ENOSPC);
++	}
++
++	return (0);
++}
++
++static void
++dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	dsl_prop_setarg_t *psa = arg2;
++	uint64_t effective_value = psa->psa_effective_value;
++	uint64_t unique;
++	int64_t delta;
++
++	dsl_prop_set_sync(ds, psa, tx);
++	DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
++
++	dmu_buf_will_dirty(ds->ds_dbuf, tx);
++
++	mutex_enter(&ds->ds_dir->dd_lock);
++	mutex_enter(&ds->ds_lock);
++	ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
++	unique = ds->ds_phys->ds_unique_bytes;
++	delta = MAX(0, (int64_t)(effective_value - unique)) -
++	    MAX(0, (int64_t)(ds->ds_reserved - unique));
++	ds->ds_reserved = effective_value;
++	mutex_exit(&ds->ds_lock);
++
++	dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
++	mutex_exit(&ds->ds_dir->dd_lock);
++}
++
++int
++dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
++    uint64_t reservation)
++{
++	dsl_dataset_t *ds;
++	dsl_prop_setarg_t psa;
++	int err;
++
++	dsl_prop_setarg_init_uint64(&psa, "refreservation", source,
++	    &reservation);
++
++	err = dsl_dataset_hold(dsname, FTAG, &ds);
++	if (err)
++		return (err);
++
++	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
++	    dsl_dataset_set_reservation_check,
++	    dsl_dataset_set_reservation_sync, ds, &psa, 0);
++
++	dsl_dataset_rele(ds, FTAG);
++	return (err);
++}
++
++typedef struct zfs_hold_cleanup_arg {
++	dsl_pool_t *dp;
++	uint64_t dsobj;
++	char htag[MAXNAMELEN];
++} zfs_hold_cleanup_arg_t;
++
++static void
++dsl_dataset_user_release_onexit(void *arg)
++{
++	zfs_hold_cleanup_arg_t *ca = arg;
++
++	(void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag,
++	    B_TRUE);
++	kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
++}
++
++void
++dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
++    minor_t minor)
++{
++	zfs_hold_cleanup_arg_t *ca;
++
++	ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP);
++	ca->dp = ds->ds_dir->dd_pool;
++	ca->dsobj = ds->ds_object;
++	(void) strlcpy(ca->htag, htag, sizeof (ca->htag));
++	VERIFY3U(0, ==, zfs_onexit_add_cb(minor,
++	    dsl_dataset_user_release_onexit, ca, NULL));
++}
++
++/*
++ * If you add new checks here, you may need to add
++ * additional checks to the "temporary" case in
++ * snapshot_check() in dmu_objset.c.
++ */
++static int
++dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	struct dsl_ds_holdarg *ha = arg2;
++	char *htag = ha->htag;
++	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
++	int error = 0;
++
++	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
++		return (ENOTSUP);
++
++	if (!dsl_dataset_is_snapshot(ds))
++		return (EINVAL);
++
++	/* tags must be unique */
++	mutex_enter(&ds->ds_lock);
++	if (ds->ds_phys->ds_userrefs_obj) {
++		error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag,
++		    8, 1, tx);
++		if (error == 0)
++			error = EEXIST;
++		else if (error == ENOENT)
++			error = 0;
++	}
++	mutex_exit(&ds->ds_lock);
++
++	if (error == 0 && ha->temphold &&
++	    strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
++		error = E2BIG;
++
++	return (error);
++}
++
++void
++dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	struct dsl_ds_holdarg *ha = arg2;
++	char *htag = ha->htag;
++	dsl_pool_t *dp = ds->ds_dir->dd_pool;
++	objset_t *mos = dp->dp_meta_objset;
++	uint64_t now = gethrestime_sec();
++	uint64_t zapobj;
++
++	mutex_enter(&ds->ds_lock);
++	if (ds->ds_phys->ds_userrefs_obj == 0) {
++		/*
++		 * This is the first user hold for this dataset.  Create
++		 * the userrefs zap object.
++		 */
++		dmu_buf_will_dirty(ds->ds_dbuf, tx);
++		zapobj = ds->ds_phys->ds_userrefs_obj =
++		    zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
++	} else {
++		zapobj = ds->ds_phys->ds_userrefs_obj;
++	}
++	ds->ds_userrefs++;
++	mutex_exit(&ds->ds_lock);
++
++	VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx));
++
++	if (ha->temphold) {
++		VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object,
++		    htag, &now, tx));
++	}
++
++	spa_history_log_internal(LOG_DS_USER_HOLD,
++	    dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag,
++	    (int)ha->temphold, ds->ds_object);
++}
++
++static int
++dsl_dataset_user_hold_one(const char *dsname, void *arg)
++{
++	struct dsl_ds_holdarg *ha = arg;
++	dsl_dataset_t *ds;
++	int error;
++	char *name;
++
++	/* alloc a buffer to hold dsname@snapname plus terminating NULL */
++	name = kmem_asprintf("%s@%s", dsname, ha->snapname);
++	error = dsl_dataset_hold(name, ha->dstg, &ds);
++	strfree(name);
++	if (error == 0) {
++		ha->gotone = B_TRUE;
++		dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check,
++		    dsl_dataset_user_hold_sync, ds, ha, 0);
++	} else if (error == ENOENT && ha->recursive) {
++		error = 0;
++	} else {
++		(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
++	}
++	return (error);
++}
++
++int
++dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
++    boolean_t temphold)
++{
++	struct dsl_ds_holdarg *ha;
++	int error;
++
++	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
++	ha->htag = htag;
++	ha->temphold = temphold;
++	error = dsl_sync_task_do(ds->ds_dir->dd_pool,
++	    dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync,
++	    ds, ha, 0);
++	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
++
++	return (error);
++}
++
++int
++dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
++    boolean_t recursive, boolean_t temphold, int cleanup_fd)
++{
++	struct dsl_ds_holdarg *ha;
++	dsl_sync_task_t *dst;
++	spa_t *spa;
++	int error;
++	minor_t minor = 0;
++
++	if (cleanup_fd != -1) {
++		/* Currently we only support cleanup-on-exit of tempholds. */
++		if (!temphold)
++			return (EINVAL);
++		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
++		if (error)
++			return (error);
++	}
++
++	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
++
++	(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
++
++	error = spa_open(dsname, &spa, FTAG);
++	if (error) {
++		kmem_free(ha, sizeof (struct dsl_ds_holdarg));
++		if (cleanup_fd != -1)
++			zfs_onexit_fd_rele(cleanup_fd);
++		return (error);
++	}
++
++	ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
++	ha->htag = htag;
++	ha->snapname = snapname;
++	ha->recursive = recursive;
++	ha->temphold = temphold;
++
++	if (recursive) {
++		error = dmu_objset_find(dsname, dsl_dataset_user_hold_one,
++		    ha, DS_FIND_CHILDREN);
++	} else {
++		error = dsl_dataset_user_hold_one(dsname, ha);
++	}
++	if (error == 0)
++		error = dsl_sync_task_group_wait(ha->dstg);
++
++	for (dst = list_head(&ha->dstg->dstg_tasks); dst;
++	    dst = list_next(&ha->dstg->dstg_tasks, dst)) {
++		dsl_dataset_t *ds = dst->dst_arg1;
++
++		if (dst->dst_err) {
++			dsl_dataset_name(ds, ha->failed);
++			*strchr(ha->failed, '@') = '\0';
++		} else if (error == 0 && minor != 0 && temphold) {
++			/*
++			 * If this hold is to be released upon process exit,
++			 * register that action now.
++			 */
++			dsl_register_onexit_hold_cleanup(ds, htag, minor);
++		}
++		dsl_dataset_rele(ds, ha->dstg);
++	}
++
++	if (error == 0 && recursive && !ha->gotone)
++		error = ENOENT;
++
++	if (error)
++		(void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
++
++	dsl_sync_task_group_destroy(ha->dstg);
++
++	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
++	spa_close(spa, FTAG);
++	if (cleanup_fd != -1)
++		zfs_onexit_fd_rele(cleanup_fd);
++	return (error);
++}
++
++struct dsl_ds_releasearg {
++	dsl_dataset_t *ds;
++	const char *htag;
++	boolean_t own;		/* do we own or just hold ds? */
++};
++
++static int
++dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag,
++    boolean_t *might_destroy)
++{
++	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
++	uint64_t zapobj;
++	uint64_t tmp;
++	int error;
++
++	*might_destroy = B_FALSE;
++
++	mutex_enter(&ds->ds_lock);
++	zapobj = ds->ds_phys->ds_userrefs_obj;
++	if (zapobj == 0) {
++		/* The tag can't possibly exist */
++		mutex_exit(&ds->ds_lock);
++		return (ESRCH);
++	}
++
++	/* Make sure the tag exists */
++	error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp);
++	if (error) {
++		mutex_exit(&ds->ds_lock);
++		if (error == ENOENT)
++			error = ESRCH;
++		return (error);
++	}
++
++	if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 &&
++	    DS_IS_DEFER_DESTROY(ds))
++		*might_destroy = B_TRUE;
++
++	mutex_exit(&ds->ds_lock);
++	return (0);
++}
++
++static int
++dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx)
++{
++	struct dsl_ds_releasearg *ra = arg1;
++	dsl_dataset_t *ds = ra->ds;
++	boolean_t might_destroy;
++	int error;
++
++	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
++		return (ENOTSUP);
++
++	error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy);
++	if (error)
++		return (error);
++
++	if (might_destroy) {
++		struct dsl_ds_destroyarg dsda = {0};
++
++		if (dmu_tx_is_syncing(tx)) {
++			/*
++			 * If we're not prepared to remove the snapshot,
++			 * we can't allow the release to happen right now.
++			 */
++			if (!ra->own)
++				return (EBUSY);
++		}
++		dsda.ds = ds;
++		dsda.releasing = B_TRUE;
++		return (dsl_dataset_destroy_check(&dsda, tag, tx));
++	}
++
++	return (0);
++}
++
++static void
++dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx)
++{
++	struct dsl_ds_releasearg *ra = arg1;
++	dsl_dataset_t *ds = ra->ds;
++	dsl_pool_t *dp = ds->ds_dir->dd_pool;
++	objset_t *mos = dp->dp_meta_objset;
++	uint64_t zapobj;
++	uint64_t dsobj = ds->ds_object;
++	uint64_t refs;
++	int error;
++
++	mutex_enter(&ds->ds_lock);
++	ds->ds_userrefs--;
++	refs = ds->ds_userrefs;
++	mutex_exit(&ds->ds_lock);
++	error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx);
++	VERIFY(error == 0 || error == ENOENT);
++	zapobj = ds->ds_phys->ds_userrefs_obj;
++	VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx));
++
++	spa_history_log_internal(LOG_DS_USER_RELEASE,
++	    dp->dp_spa, tx, "<%s> %lld dataset = %llu",
++	    ra->htag, (longlong_t)refs, dsobj);
++
++	if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 &&
++	    DS_IS_DEFER_DESTROY(ds)) {
++		struct dsl_ds_destroyarg dsda = {0};
++
++		ASSERT(ra->own);
++		dsda.ds = ds;
++		dsda.releasing = B_TRUE;
++		/* We already did the destroy_check */
++		dsl_dataset_destroy_sync(&dsda, tag, tx);
++	}
++}
++
++static int
++dsl_dataset_user_release_one(const char *dsname, void *arg)
++{
++	struct dsl_ds_holdarg *ha = arg;
++	struct dsl_ds_releasearg *ra;
++	dsl_dataset_t *ds;
++	int error;
++	void *dtag = ha->dstg;
++	char *name;
++	boolean_t own = B_FALSE;
++	boolean_t might_destroy;
++
++	/* alloc a buffer to hold dsname@snapname, plus the terminating NULL */
++	name = kmem_asprintf("%s@%s", dsname, ha->snapname);
++	error = dsl_dataset_hold(name, dtag, &ds);
++	strfree(name);
++	if (error == ENOENT && ha->recursive)
++		return (0);
++	(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
++	if (error)
++		return (error);
++
++	ha->gotone = B_TRUE;
++
++	ASSERT(dsl_dataset_is_snapshot(ds));
++
++	error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy);
++	if (error) {
++		dsl_dataset_rele(ds, dtag);
++		return (error);
++	}
++
++	if (might_destroy) {
++#ifdef _KERNEL
++		name = kmem_asprintf("%s@%s", dsname, ha->snapname);
++		error = zfs_unmount_snap(name, NULL);
++		strfree(name);
++		if (error) {
++			dsl_dataset_rele(ds, dtag);
++			return (error);
++		}
++#endif
++		if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) {
++			dsl_dataset_rele(ds, dtag);
++			return (EBUSY);
++		} else {
++			own = B_TRUE;
++			dsl_dataset_make_exclusive(ds, dtag);
++		}
++	}
++
++	ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP);
++	ra->ds = ds;
++	ra->htag = ha->htag;
++	ra->own = own;
++	dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check,
++	    dsl_dataset_user_release_sync, ra, dtag, 0);
++
++	return (0);
++}
++
++int
++dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
++    boolean_t recursive)
++{
++	struct dsl_ds_holdarg *ha;
++	dsl_sync_task_t *dst;
++	spa_t *spa;
++	int error;
++
++top:
++	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
++
++	(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
++
++	error = spa_open(dsname, &spa, FTAG);
++	if (error) {
++		kmem_free(ha, sizeof (struct dsl_ds_holdarg));
++		return (error);
++	}
++
++	ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
++	ha->htag = htag;
++	ha->snapname = snapname;
++	ha->recursive = recursive;
++	if (recursive) {
++		error = dmu_objset_find(dsname, dsl_dataset_user_release_one,
++		    ha, DS_FIND_CHILDREN);
++	} else {
++		error = dsl_dataset_user_release_one(dsname, ha);
++	}
++	if (error == 0)
++		error = dsl_sync_task_group_wait(ha->dstg);
++
++	for (dst = list_head(&ha->dstg->dstg_tasks); dst;
++	    dst = list_next(&ha->dstg->dstg_tasks, dst)) {
++		struct dsl_ds_releasearg *ra = dst->dst_arg1;
++		dsl_dataset_t *ds = ra->ds;
++
++		if (dst->dst_err)
++			dsl_dataset_name(ds, ha->failed);
++
++		if (ra->own)
++			dsl_dataset_disown(ds, ha->dstg);
++		else
++			dsl_dataset_rele(ds, ha->dstg);
++
++		kmem_free(ra, sizeof (struct dsl_ds_releasearg));
++	}
++
++	if (error == 0 && recursive && !ha->gotone)
++		error = ENOENT;
++
++	if (error && error != EBUSY)
++		(void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
++
++	dsl_sync_task_group_destroy(ha->dstg);
++	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
++	spa_close(spa, FTAG);
++
++	/*
++	 * We can get EBUSY if we were racing with deferred destroy and
++	 * dsl_dataset_user_release_check() hadn't done the necessary
++	 * open context setup.  We can also get EBUSY if we're racing
++	 * with destroy and that thread is the ds_owner.  Either way
++	 * the busy condition should be transient, and we should retry
++	 * the release operation.
++	 */
++	if (error == EBUSY)
++		goto top;
++
++	return (error);
++}
++
++/*
++ * Called at spa_load time (with retry == B_FALSE) to release a stale
++ * temporary user hold. Also called by the onexit code (with retry == B_TRUE).
++ */
++int
++dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag,
++    boolean_t retry)
++{
++	dsl_dataset_t *ds;
++	char *snap;
++	char *name;
++	int namelen;
++	int error;
++
++	do {
++		rw_enter(&dp->dp_config_rwlock, RW_READER);
++		error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
++		rw_exit(&dp->dp_config_rwlock);
++		if (error)
++			return (error);
++		namelen = dsl_dataset_namelen(ds)+1;
++		name = kmem_alloc(namelen, KM_SLEEP);
++		dsl_dataset_name(ds, name);
++		dsl_dataset_rele(ds, FTAG);
++
++		snap = strchr(name, '@');
++		*snap = '\0';
++		++snap;
++		error = dsl_dataset_user_release(name, snap, htag, B_FALSE);
++		kmem_free(name, namelen);
++
++		/*
++		 * The object can't have been destroyed because we have a hold,
++		 * but it might have been renamed, resulting in ENOENT.  Retry
++		 * if we've been requested to do so.
++		 *
++		 * It would be nice if we could use the dsobj all the way
++		 * through and avoid ENOENT entirely.  But we might need to
++		 * unmount the snapshot, and there's currently no way to lookup
++		 * a vfsp using a ZFS object id.
++		 */
++	} while ((error == ENOENT) && retry);
++
++	return (error);
++}
++
++int
++dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp)
++{
++	dsl_dataset_t *ds;
++	int err;
++
++	err = dsl_dataset_hold(dsname, FTAG, &ds);
++	if (err)
++		return (err);
++
++	VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP));
++	if (ds->ds_phys->ds_userrefs_obj != 0) {
++		zap_attribute_t *za;
++		zap_cursor_t zc;
++
++		za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
++		for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
++		    ds->ds_phys->ds_userrefs_obj);
++		    zap_cursor_retrieve(&zc, za) == 0;
++		    zap_cursor_advance(&zc)) {
++			VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name,
++			    za->za_first_integer));
++		}
++		zap_cursor_fini(&zc);
++		kmem_free(za, sizeof (zap_attribute_t));
++	}
++	dsl_dataset_rele(ds, FTAG);
++	return (0);
++}
++
++/*
++ * Note, this function is used as the callback for dmu_objset_find().  We
++ * always return 0 so that we will continue to find and process
++ * inconsistent datasets, even if we encounter an error trying to
++ * process one of them.
++ */
++/* ARGSUSED */
++int
++dsl_destroy_inconsistent(const char *dsname, void *arg)
++{
++	dsl_dataset_t *ds;
++
++	if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) {
++		if (DS_IS_INCONSISTENT(ds))
++			(void) dsl_dataset_destroy(ds, FTAG, B_FALSE);
++		else
++			dsl_dataset_disown(ds, FTAG);
++	}
++	return (0);
++}
++
++
++/*
++ * Return (in *usedp) the amount of space written in new that is not
++ * present in oldsnap.  New may be a snapshot or the head.  Old must be
++ * a snapshot before new, in new's filesystem (or its origin).  If not then
++ * fail and return EINVAL.
++ *
++ * The written space is calculated by considering two components:  First, we
++ * ignore any freed space, and calculate the written as new's used space
++ * minus old's used space.  Next, we add in the amount of space that was freed
++ * between the two snapshots, thus reducing new's used space relative to old's.
++ * Specifically, this is the space that was born before old->ds_creation_txg,
++ * and freed before new (ie. on new's deadlist or a previous deadlist).
++ *
++ * space freed                         [---------------------]
++ * snapshots                       ---O-------O--------O-------O------
++ *                                         oldsnap            new
++ */
++int
++dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
++    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
++{
++	int err = 0;
++	uint64_t snapobj;
++	dsl_pool_t *dp = new->ds_dir->dd_pool;
++
++	*usedp = 0;
++	*usedp += new->ds_phys->ds_used_bytes;
++	*usedp -= oldsnap->ds_phys->ds_used_bytes;
++
++	*compp = 0;
++	*compp += new->ds_phys->ds_compressed_bytes;
++	*compp -= oldsnap->ds_phys->ds_compressed_bytes;
++
++	*uncompp = 0;
++	*uncompp += new->ds_phys->ds_uncompressed_bytes;
++	*uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes;
++
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++	snapobj = new->ds_object;
++	while (snapobj != oldsnap->ds_object) {
++		dsl_dataset_t *snap;
++		uint64_t used, comp, uncomp;
++
++		err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
++		if (err != 0)
++			break;
++
++		if (snap->ds_phys->ds_prev_snap_txg ==
++		    oldsnap->ds_phys->ds_creation_txg) {
++			/*
++			 * The blocks in the deadlist can not be born after
++			 * ds_prev_snap_txg, so get the whole deadlist space,
++			 * which is more efficient (especially for old-format
++			 * deadlists).  Unfortunately the deadlist code
++			 * doesn't have enough information to make this
++			 * optimization itself.
++			 */
++			dsl_deadlist_space(&snap->ds_deadlist,
++			    &used, &comp, &uncomp);
++		} else {
++			dsl_deadlist_space_range(&snap->ds_deadlist,
++			    0, oldsnap->ds_phys->ds_creation_txg,
++			    &used, &comp, &uncomp);
++		}
++		*usedp += used;
++		*compp += comp;
++		*uncompp += uncomp;
++
++		/*
++		 * If we get to the beginning of the chain of snapshots
++		 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
++		 * was not a snapshot of/before new.
++		 */
++		snapobj = snap->ds_phys->ds_prev_snap_obj;
++		dsl_dataset_rele(snap, FTAG);
++		if (snapobj == 0) {
++			err = EINVAL;
++			break;
++		}
++
++	}
++	rw_exit(&dp->dp_config_rwlock);
++	return (err);
++}
++
++/*
++ * Return (in *usedp) the amount of space that will be reclaimed if firstsnap,
++ * lastsnap, and all snapshots in between are deleted.
++ *
++ * blocks that would be freed            [---------------------------]
++ * snapshots                       ---O-------O--------O-------O--------O
++ *                                        firstsnap        lastsnap
++ *
++ * This is the set of blocks that were born after the snap before firstsnap,
++ * (birth > firstsnap->prev_snap_txg) and died before the snap after the
++ * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
++ * We calculate this by iterating over the relevant deadlists (from the snap
++ * after lastsnap, backward to the snap after firstsnap), summing up the
++ * space on the deadlist that was born after the snap before firstsnap.
++ */
++int
++dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
++    dsl_dataset_t *lastsnap,
++    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
++{
++	int err = 0;
++	uint64_t snapobj;
++	dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
++
++	ASSERT(dsl_dataset_is_snapshot(firstsnap));
++	ASSERT(dsl_dataset_is_snapshot(lastsnap));
++
++	/*
++	 * Check that the snapshots are in the same dsl_dir, and firstsnap
++	 * is before lastsnap.
++	 */
++	if (firstsnap->ds_dir != lastsnap->ds_dir ||
++	    firstsnap->ds_phys->ds_creation_txg >
++	    lastsnap->ds_phys->ds_creation_txg)
++		return (EINVAL);
++
++	*usedp = *compp = *uncompp = 0;
++
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++	snapobj = lastsnap->ds_phys->ds_next_snap_obj;
++	while (snapobj != firstsnap->ds_object) {
++		dsl_dataset_t *ds;
++		uint64_t used, comp, uncomp;
++
++		err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
++		if (err != 0)
++			break;
++
++		dsl_deadlist_space_range(&ds->ds_deadlist,
++		    firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX,
++		    &used, &comp, &uncomp);
++		*usedp += used;
++		*compp += comp;
++		*uncompp += uncomp;
++
++		snapobj = ds->ds_phys->ds_prev_snap_obj;
++		ASSERT3U(snapobj, !=, 0);
++		dsl_dataset_rele(ds, FTAG);
++	}
++	rw_exit(&dp->dp_config_rwlock);
++	return (err);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(dmu_snapshots_destroy_nvl);
++EXPORT_SYMBOL(dsl_dataset_hold);
++EXPORT_SYMBOL(dsl_dataset_hold_obj);
++EXPORT_SYMBOL(dsl_dataset_own);
++EXPORT_SYMBOL(dsl_dataset_own_obj);
++EXPORT_SYMBOL(dsl_dataset_name);
++EXPORT_SYMBOL(dsl_dataset_rele);
++EXPORT_SYMBOL(dsl_dataset_disown);
++EXPORT_SYMBOL(dsl_dataset_drop_ref);
++EXPORT_SYMBOL(dsl_dataset_tryown);
++EXPORT_SYMBOL(dsl_dataset_make_exclusive);
++EXPORT_SYMBOL(dsl_dataset_create_sync);
++EXPORT_SYMBOL(dsl_dataset_create_sync_dd);
++EXPORT_SYMBOL(dsl_dataset_destroy);
++EXPORT_SYMBOL(dsl_dataset_destroy_check);
++EXPORT_SYMBOL(dsl_dataset_destroy_sync);
++EXPORT_SYMBOL(dsl_dataset_snapshot_check);
++EXPORT_SYMBOL(dsl_dataset_snapshot_sync);
++EXPORT_SYMBOL(dsl_dataset_rename);
++EXPORT_SYMBOL(dsl_dataset_promote);
++EXPORT_SYMBOL(dsl_dataset_clone_swap);
++EXPORT_SYMBOL(dsl_dataset_user_hold);
++EXPORT_SYMBOL(dsl_dataset_user_release);
++EXPORT_SYMBOL(dsl_dataset_user_release_tmp);
++EXPORT_SYMBOL(dsl_dataset_get_holds);
++EXPORT_SYMBOL(dsl_dataset_get_blkptr);
++EXPORT_SYMBOL(dsl_dataset_set_blkptr);
++EXPORT_SYMBOL(dsl_dataset_get_spa);
++EXPORT_SYMBOL(dsl_dataset_modified_since_lastsnap);
++EXPORT_SYMBOL(dsl_dataset_space_written);
++EXPORT_SYMBOL(dsl_dataset_space_wouldfree);
++EXPORT_SYMBOL(dsl_dataset_sync);
++EXPORT_SYMBOL(dsl_dataset_block_born);
++EXPORT_SYMBOL(dsl_dataset_block_kill);
++EXPORT_SYMBOL(dsl_dataset_block_freeable);
++EXPORT_SYMBOL(dsl_dataset_prev_snap_txg);
++EXPORT_SYMBOL(dsl_dataset_dirty);
++EXPORT_SYMBOL(dsl_dataset_stats);
++EXPORT_SYMBOL(dsl_dataset_fast_stat);
++EXPORT_SYMBOL(dsl_dataset_space);
++EXPORT_SYMBOL(dsl_dataset_fsid_guid);
++EXPORT_SYMBOL(dsl_dsobj_to_dsname);
++EXPORT_SYMBOL(dsl_dataset_check_quota);
++EXPORT_SYMBOL(dsl_dataset_set_quota);
++EXPORT_SYMBOL(dsl_dataset_set_quota_sync);
++EXPORT_SYMBOL(dsl_dataset_set_reservation);
++EXPORT_SYMBOL(dsl_destroy_inconsistent);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dsl_deadlist.c linux-3.2.33-go/fs/zfs/zfs/dsl_deadlist.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dsl_deadlist.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dsl_deadlist.c	2012-11-16 23:25:34.347039358 +0100
+@@ -0,0 +1,500 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ */
++
++#include <sys/dsl_dataset.h>
++#include <sys/dmu.h>
++#include <sys/refcount.h>
++#include <sys/zap.h>
++#include <sys/zfs_context.h>
++#include <sys/dsl_pool.h>
++
++/*
++ * Deadlist concurrency:
++ *
++ * Deadlists can only be modified from the syncing thread.
++ *
++ * Except for dsl_deadlist_insert(), it can only be modified with the
++ * dp_config_rwlock held with RW_WRITER.
++ *
++ * The accessors (dsl_deadlist_space() and dsl_deadlist_space_range()) can
++ * be called concurrently, from open context, with the dl_config_rwlock held
++ * with RW_READER.
++ *
++ * Therefore, we only need to provide locking between dsl_deadlist_insert() and
++ * the accessors, protecting:
++ *     dl_phys->dl_used,comp,uncomp
++ *     and protecting the dl_tree from being loaded.
++ * The locking is provided by dl_lock.  Note that locking on the bpobj_t
++ * provides its own locking, and dl_oldfmt is immutable.
++ */
++
++static int
++dsl_deadlist_compare(const void *arg1, const void *arg2)
++{
++	const dsl_deadlist_entry_t *dle1 = arg1;
++	const dsl_deadlist_entry_t *dle2 = arg2;
++
++	if (dle1->dle_mintxg < dle2->dle_mintxg)
++		return (-1);
++	else if (dle1->dle_mintxg > dle2->dle_mintxg)
++		return (+1);
++	else
++		return (0);
++}
++
++static void
++dsl_deadlist_load_tree(dsl_deadlist_t *dl)
++{
++	zap_cursor_t zc;
++	zap_attribute_t za;
++
++	ASSERT(!dl->dl_oldfmt);
++	if (dl->dl_havetree)
++		return;
++
++	avl_create(&dl->dl_tree, dsl_deadlist_compare,
++	    sizeof (dsl_deadlist_entry_t),
++	    offsetof(dsl_deadlist_entry_t, dle_node));
++	for (zap_cursor_init(&zc, dl->dl_os, dl->dl_object);
++	    zap_cursor_retrieve(&zc, &za) == 0;
++	    zap_cursor_advance(&zc)) {
++		dsl_deadlist_entry_t *dle;
++
++		dle = kmem_alloc(sizeof (*dle), KM_PUSHPAGE);
++		dle->dle_mintxg = strtonum(za.za_name, NULL);
++		VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os,
++		    za.za_first_integer));
++		avl_add(&dl->dl_tree, dle);
++	}
++	zap_cursor_fini(&zc);
++	dl->dl_havetree = B_TRUE;
++}
++
++void
++dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object)
++{
++	dmu_object_info_t doi;
++
++	mutex_init(&dl->dl_lock, NULL, MUTEX_DEFAULT, NULL);
++	dl->dl_os = os;
++	dl->dl_object = object;
++	VERIFY3U(0, ==, dmu_bonus_hold(os, object, dl, &dl->dl_dbuf));
++	dmu_object_info_from_db(dl->dl_dbuf, &doi);
++	if (doi.doi_type == DMU_OT_BPOBJ) {
++		dmu_buf_rele(dl->dl_dbuf, dl);
++		dl->dl_dbuf = NULL;
++		dl->dl_oldfmt = B_TRUE;
++		VERIFY3U(0, ==, bpobj_open(&dl->dl_bpobj, os, object));
++		return;
++	}
++
++	dl->dl_oldfmt = B_FALSE;
++	dl->dl_phys = dl->dl_dbuf->db_data;
++	dl->dl_havetree = B_FALSE;
++}
++
++void
++dsl_deadlist_close(dsl_deadlist_t *dl)
++{
++	void *cookie = NULL;
++	dsl_deadlist_entry_t *dle;
++
++	if (dl->dl_oldfmt) {
++		dl->dl_oldfmt = B_FALSE;
++		bpobj_close(&dl->dl_bpobj);
++		return;
++	}
++
++	if (dl->dl_havetree) {
++		while ((dle = avl_destroy_nodes(&dl->dl_tree, &cookie))
++		    != NULL) {
++			bpobj_close(&dle->dle_bpobj);
++			kmem_free(dle, sizeof (*dle));
++		}
++		avl_destroy(&dl->dl_tree);
++	}
++	dmu_buf_rele(dl->dl_dbuf, dl);
++	mutex_destroy(&dl->dl_lock);
++	dl->dl_dbuf = NULL;
++	dl->dl_phys = NULL;
++}
++
++uint64_t
++dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx)
++{
++	if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS)
++		return (bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx));
++	return (zap_create(os, DMU_OT_DEADLIST, DMU_OT_DEADLIST_HDR,
++	    sizeof (dsl_deadlist_phys_t), tx));
++}
++
++void
++dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx)
++{
++	dmu_object_info_t doi;
++	zap_cursor_t zc;
++	zap_attribute_t za;
++
++	VERIFY3U(0, ==, dmu_object_info(os, dlobj, &doi));
++	if (doi.doi_type == DMU_OT_BPOBJ) {
++		bpobj_free(os, dlobj, tx);
++		return;
++	}
++
++	for (zap_cursor_init(&zc, os, dlobj);
++	    zap_cursor_retrieve(&zc, &za) == 0;
++	    zap_cursor_advance(&zc))
++		bpobj_free(os, za.za_first_integer, tx);
++	zap_cursor_fini(&zc);
++	VERIFY3U(0, ==, dmu_object_free(os, dlobj, tx));
++}
++
++void
++dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)
++{
++	dsl_deadlist_entry_t dle_tofind;
++	dsl_deadlist_entry_t *dle;
++	avl_index_t where;
++
++	if (dl->dl_oldfmt) {
++		bpobj_enqueue(&dl->dl_bpobj, bp, tx);
++		return;
++	}
++
++	dsl_deadlist_load_tree(dl);
++
++	dmu_buf_will_dirty(dl->dl_dbuf, tx);
++	mutex_enter(&dl->dl_lock);
++	dl->dl_phys->dl_used +=
++	    bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp);
++	dl->dl_phys->dl_comp += BP_GET_PSIZE(bp);
++	dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp);
++	mutex_exit(&dl->dl_lock);
++
++	dle_tofind.dle_mintxg = bp->blk_birth;
++	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
++	if (dle == NULL)
++		dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
++	else
++		dle = AVL_PREV(&dl->dl_tree, dle);
++	bpobj_enqueue(&dle->dle_bpobj, bp, tx);
++}
++
++/*
++ * Insert new key in deadlist, which must be > all current entries.
++ * mintxg is not inclusive.
++ */
++void
++dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
++{
++	uint64_t obj;
++	dsl_deadlist_entry_t *dle;
++
++	if (dl->dl_oldfmt)
++		return;
++
++	dsl_deadlist_load_tree(dl);
++
++	dle = kmem_alloc(sizeof (*dle), KM_PUSHPAGE);
++	dle->dle_mintxg = mintxg;
++	obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx);
++	VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
++	avl_add(&dl->dl_tree, dle);
++
++	VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object,
++	    mintxg, obj, tx));
++}
++
++/*
++ * Remove this key, merging its entries into the previous key.
++ */
++void
++dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
++{
++	dsl_deadlist_entry_t dle_tofind;
++	dsl_deadlist_entry_t *dle, *dle_prev;
++
++	if (dl->dl_oldfmt)
++		return;
++
++	dsl_deadlist_load_tree(dl);
++
++	dle_tofind.dle_mintxg = mintxg;
++	dle = avl_find(&dl->dl_tree, &dle_tofind, NULL);
++	dle_prev = AVL_PREV(&dl->dl_tree, dle);
++
++	bpobj_enqueue_subobj(&dle_prev->dle_bpobj,
++	    dle->dle_bpobj.bpo_object, tx);
++
++	avl_remove(&dl->dl_tree, dle);
++	bpobj_close(&dle->dle_bpobj);
++	kmem_free(dle, sizeof (*dle));
++
++	VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx));
++}
++
++/*
++ * Walk ds's snapshots to regenerate generate ZAP & AVL.
++ */
++static void
++dsl_deadlist_regenerate(objset_t *os, uint64_t dlobj,
++    uint64_t mrs_obj, dmu_tx_t *tx)
++{
++	dsl_deadlist_t dl;
++	dsl_pool_t *dp = dmu_objset_pool(os);
++
++	dsl_deadlist_open(&dl, os, dlobj);
++	if (dl.dl_oldfmt) {
++		dsl_deadlist_close(&dl);
++		return;
++	}
++
++	while (mrs_obj != 0) {
++		dsl_dataset_t *ds;
++		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, mrs_obj, FTAG, &ds));
++		dsl_deadlist_add_key(&dl, ds->ds_phys->ds_prev_snap_txg, tx);
++		mrs_obj = ds->ds_phys->ds_prev_snap_obj;
++		dsl_dataset_rele(ds, FTAG);
++	}
++	dsl_deadlist_close(&dl);
++}
++
++uint64_t
++dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
++    uint64_t mrs_obj, dmu_tx_t *tx)
++{
++	dsl_deadlist_entry_t *dle;
++	uint64_t newobj;
++
++	newobj = dsl_deadlist_alloc(dl->dl_os, tx);
++
++	if (dl->dl_oldfmt) {
++		dsl_deadlist_regenerate(dl->dl_os, newobj, mrs_obj, tx);
++		return (newobj);
++	}
++
++	dsl_deadlist_load_tree(dl);
++
++	for (dle = avl_first(&dl->dl_tree); dle;
++	    dle = AVL_NEXT(&dl->dl_tree, dle)) {
++		uint64_t obj;
++
++		if (dle->dle_mintxg >= maxtxg)
++			break;
++
++		obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx);
++		VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj,
++		    dle->dle_mintxg, obj, tx));
++	}
++	return (newobj);
++}
++
++void
++dsl_deadlist_space(dsl_deadlist_t *dl,
++    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
++{
++	if (dl->dl_oldfmt) {
++		VERIFY3U(0, ==, bpobj_space(&dl->dl_bpobj,
++		    usedp, compp, uncompp));
++		return;
++	}
++
++	mutex_enter(&dl->dl_lock);
++	*usedp = dl->dl_phys->dl_used;
++	*compp = dl->dl_phys->dl_comp;
++	*uncompp = dl->dl_phys->dl_uncomp;
++	mutex_exit(&dl->dl_lock);
++}
++
++/*
++ * return space used in the range (mintxg, maxtxg].
++ * Includes maxtxg, does not include mintxg.
++ * mintxg and maxtxg must both be keys in the deadlist (unless maxtxg is
++ * larger than any bp in the deadlist (eg. UINT64_MAX)).
++ */
++void
++dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
++    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
++{
++	dsl_deadlist_entry_t *dle;
++	dsl_deadlist_entry_t dle_tofind;
++	avl_index_t where;
++
++	if (dl->dl_oldfmt) {
++		VERIFY3U(0, ==, bpobj_space_range(&dl->dl_bpobj,
++		    mintxg, maxtxg, usedp, compp, uncompp));
++		return;
++	}
++
++	*usedp = *compp = *uncompp = 0;
++
++	mutex_enter(&dl->dl_lock);
++	dsl_deadlist_load_tree(dl);
++	dle_tofind.dle_mintxg = mintxg;
++	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
++	/*
++	 * If we don't find this mintxg, there shouldn't be anything
++	 * after it either.
++	 */
++	ASSERT(dle != NULL ||
++	    avl_nearest(&dl->dl_tree, where, AVL_AFTER) == NULL);
++
++	for (; dle && dle->dle_mintxg < maxtxg;
++	    dle = AVL_NEXT(&dl->dl_tree, dle)) {
++		uint64_t used, comp, uncomp;
++
++		VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
++		    &used, &comp, &uncomp));
++
++		*usedp += used;
++		*compp += comp;
++		*uncompp += uncomp;
++	}
++	mutex_exit(&dl->dl_lock);
++}
++
++static void
++dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
++    dmu_tx_t *tx)
++{
++	dsl_deadlist_entry_t dle_tofind;
++	dsl_deadlist_entry_t *dle;
++	avl_index_t where;
++	uint64_t used, comp, uncomp;
++	bpobj_t bpo;
++
++	VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
++	VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp));
++	bpobj_close(&bpo);
++
++	dsl_deadlist_load_tree(dl);
++
++	dmu_buf_will_dirty(dl->dl_dbuf, tx);
++	mutex_enter(&dl->dl_lock);
++	dl->dl_phys->dl_used += used;
++	dl->dl_phys->dl_comp += comp;
++	dl->dl_phys->dl_uncomp += uncomp;
++	mutex_exit(&dl->dl_lock);
++
++	dle_tofind.dle_mintxg = birth;
++	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
++	if (dle == NULL)
++		dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
++	bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx);
++}
++
++static int
++dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
++{
++	dsl_deadlist_t *dl = arg;
++	dsl_deadlist_insert(dl, bp, tx);
++	return (0);
++}
++
++/*
++ * Merge the deadlist pointed to by 'obj' into dl.  obj will be left as
++ * an empty deadlist.
++ */
++void
++dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
++{
++	zap_cursor_t zc;
++	zap_attribute_t za;
++	dmu_buf_t *bonus;
++	dsl_deadlist_phys_t *dlp;
++	dmu_object_info_t doi;
++
++	VERIFY3U(0, ==, dmu_object_info(dl->dl_os, obj, &doi));
++	if (doi.doi_type == DMU_OT_BPOBJ) {
++		bpobj_t bpo;
++		VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
++		VERIFY3U(0, ==, bpobj_iterate(&bpo,
++		    dsl_deadlist_insert_cb, dl, tx));
++		bpobj_close(&bpo);
++		return;
++	}
++
++	for (zap_cursor_init(&zc, dl->dl_os, obj);
++	    zap_cursor_retrieve(&zc, &za) == 0;
++	    zap_cursor_advance(&zc)) {
++		uint64_t mintxg = strtonum(za.za_name, NULL);
++		dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx);
++		VERIFY3U(0, ==, zap_remove_int(dl->dl_os, obj, mintxg, tx));
++	}
++	zap_cursor_fini(&zc);
++
++	VERIFY3U(0, ==, dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus));
++	dlp = bonus->db_data;
++	dmu_buf_will_dirty(bonus, tx);
++	bzero(dlp, sizeof (*dlp));
++	dmu_buf_rele(bonus, FTAG);
++}
++
++/*
++ * Remove entries on dl that are >= mintxg, and put them on the bpobj.
++ */
++void
++dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
++    dmu_tx_t *tx)
++{
++	dsl_deadlist_entry_t dle_tofind;
++	dsl_deadlist_entry_t *dle;
++	avl_index_t where;
++
++	ASSERT(!dl->dl_oldfmt);
++	dmu_buf_will_dirty(dl->dl_dbuf, tx);
++	dsl_deadlist_load_tree(dl);
++
++	dle_tofind.dle_mintxg = mintxg;
++	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
++	if (dle == NULL)
++		dle = avl_nearest(&dl->dl_tree, where, AVL_AFTER);
++	while (dle) {
++		uint64_t used, comp, uncomp;
++		dsl_deadlist_entry_t *dle_next;
++
++		bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx);
++
++		VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
++		    &used, &comp, &uncomp));
++		mutex_enter(&dl->dl_lock);
++		ASSERT3U(dl->dl_phys->dl_used, >=, used);
++		ASSERT3U(dl->dl_phys->dl_comp, >=, comp);
++		ASSERT3U(dl->dl_phys->dl_uncomp, >=, uncomp);
++		dl->dl_phys->dl_used -= used;
++		dl->dl_phys->dl_comp -= comp;
++		dl->dl_phys->dl_uncomp -= uncomp;
++		mutex_exit(&dl->dl_lock);
++
++		VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object,
++		    dle->dle_mintxg, tx));
++
++		dle_next = AVL_NEXT(&dl->dl_tree, dle);
++		avl_remove(&dl->dl_tree, dle);
++		bpobj_close(&dle->dle_bpobj);
++		kmem_free(dle, sizeof (*dle));
++		dle = dle_next;
++	}
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dsl_deleg.c linux-3.2.33-go/fs/zfs/zfs/dsl_deleg.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dsl_deleg.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dsl_deleg.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,763 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ */
++
++/*
++ * DSL permissions are stored in a two level zap attribute
++ * mechanism.   The first level identifies the "class" of
++ * entry.  The class is identified by the first 2 letters of
++ * the attribute.  The second letter "l" or "d" identifies whether
++ * it is a local or descendent permission.  The first letter
++ * identifies the type of entry.
++ *
++ * ul$<id>    identifies permissions granted locally for this userid.
++ * ud$<id>    identifies permissions granted on descendent datasets for
++ *            this userid.
++ * Ul$<id>    identifies permission sets granted locally for this userid.
++ * Ud$<id>    identifies permission sets granted on descendent datasets for
++ *            this userid.
++ * gl$<id>    identifies permissions granted locally for this groupid.
++ * gd$<id>    identifies permissions granted on descendent datasets for
++ *            this groupid.
++ * Gl$<id>    identifies permission sets granted locally for this groupid.
++ * Gd$<id>    identifies permission sets granted on descendent datasets for
++ *            this groupid.
++ * el$        identifies permissions granted locally for everyone.
++ * ed$        identifies permissions granted on descendent datasets
++ *            for everyone.
++ * El$        identifies permission sets granted locally for everyone.
++ * Ed$        identifies permission sets granted to descendent datasets for
++ *            everyone.
++ * c-$        identifies permission to create at dataset creation time.
++ * C-$        identifies permission sets to grant locally at dataset creation
++ *            time.
++ * s-$@<name> permissions defined in specified set @<name>
++ * S-$@<name> Sets defined in named set @<name>
++ *
++ * Each of the above entities points to another zap attribute that contains one
++ * attribute for each allowed permission, such as create, destroy,...
++ * All of the "upper" case class types will specify permission set names
++ * rather than permissions.
++ *
++ * Basically it looks something like this:
++ * ul$12 -> ZAP OBJ -> permissions...
++ *
++ * The ZAP OBJ is referred to as the jump object.
++ */
++
++#include <sys/dmu.h>
++#include <sys/dmu_objset.h>
++#include <sys/dmu_tx.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_prop.h>
++#include <sys/dsl_synctask.h>
++#include <sys/dsl_deleg.h>
++#include <sys/spa.h>
++#include <sys/zap.h>
++#include <sys/fs/zfs.h>
++#include <sys/cred.h>
++#include <sys/sunddi.h>
++
++#include "zfs_deleg.h"
++
++/*
++ * Validate that user is allowed to delegate specified permissions.
++ *
++ * In order to delegate "create" you must have "create"
++ * and "allow".
++ */
++int
++dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr)
++{
++	nvpair_t *whopair = NULL;
++	int error;
++
++	if ((error = dsl_deleg_access(ddname, ZFS_DELEG_PERM_ALLOW, cr)) != 0)
++		return (error);
++
++	while ((whopair = nvlist_next_nvpair(nvp, whopair))) {
++		nvlist_t *perms;
++		nvpair_t *permpair = NULL;
++
++		VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
++
++		while ((permpair = nvlist_next_nvpair(perms, permpair))) {
++			const char *perm = nvpair_name(permpair);
++
++			if (strcmp(perm, ZFS_DELEG_PERM_ALLOW) == 0)
++				return (EPERM);
++
++			if ((error = dsl_deleg_access(ddname, perm, cr)) != 0)
++				return (error);
++		}
++	}
++	return (0);
++}
++
++/*
++ * Validate that user is allowed to unallow specified permissions.  They
++ * must have the 'allow' permission, and even then can only unallow
++ * perms for their uid.
++ */
++int
++dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr)
++{
++	nvpair_t *whopair = NULL;
++	int error;
++	char idstr[32];
++
++	if ((error = dsl_deleg_access(ddname, ZFS_DELEG_PERM_ALLOW, cr)) != 0)
++		return (error);
++
++	(void) snprintf(idstr, sizeof (idstr), "%lld",
++	    (longlong_t)crgetuid(cr));
++
++	while ((whopair = nvlist_next_nvpair(nvp, whopair))) {
++		zfs_deleg_who_type_t type = nvpair_name(whopair)[0];
++
++		if (type != ZFS_DELEG_USER &&
++		    type != ZFS_DELEG_USER_SETS)
++			return (EPERM);
++
++		if (strcmp(idstr, &nvpair_name(whopair)[3]) != 0)
++			return (EPERM);
++	}
++	return (0);
++}
++
++static void
++dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dir_t *dd = arg1;
++	nvlist_t *nvp = arg2;
++	objset_t *mos = dd->dd_pool->dp_meta_objset;
++	nvpair_t *whopair = NULL;
++	uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
++
++	if (zapobj == 0) {
++		dmu_buf_will_dirty(dd->dd_dbuf, tx);
++		zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos,
++		    DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
++	}
++
++	while ((whopair = nvlist_next_nvpair(nvp, whopair))) {
++		const char *whokey = nvpair_name(whopair);
++		nvlist_t *perms;
++		nvpair_t *permpair = NULL;
++		uint64_t jumpobj;
++
++		VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
++
++		if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
++			jumpobj = zap_create(mos, DMU_OT_DSL_PERMS,
++			    DMU_OT_NONE, 0, tx);
++			VERIFY(zap_update(mos, zapobj,
++			    whokey, 8, 1, &jumpobj, tx) == 0);
++		}
++
++		while ((permpair = nvlist_next_nvpair(perms, permpair))) {
++			const char *perm = nvpair_name(permpair);
++			uint64_t n = 0;
++
++			VERIFY(zap_update(mos, jumpobj,
++			    perm, 8, 1, &n, tx) == 0);
++			spa_history_log_internal(LOG_DS_PERM_UPDATE,
++			    dd->dd_pool->dp_spa, tx,
++			    "%s %s dataset = %llu", whokey, perm,
++			    dd->dd_phys->dd_head_dataset_obj);
++		}
++	}
++}
++
++static void
++dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dir_t *dd = arg1;
++	nvlist_t *nvp = arg2;
++	objset_t *mos = dd->dd_pool->dp_meta_objset;
++	nvpair_t *whopair = NULL;
++	uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
++
++	if (zapobj == 0)
++		return;
++
++	while ((whopair = nvlist_next_nvpair(nvp, whopair))) {
++		const char *whokey = nvpair_name(whopair);
++		nvlist_t *perms;
++		nvpair_t *permpair = NULL;
++		uint64_t jumpobj;
++
++		if (nvpair_value_nvlist(whopair, &perms) != 0) {
++			if (zap_lookup(mos, zapobj, whokey, 8,
++			    1, &jumpobj) == 0) {
++				(void) zap_remove(mos, zapobj, whokey, tx);
++				VERIFY(0 == zap_destroy(mos, jumpobj, tx));
++			}
++			spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE,
++			    dd->dd_pool->dp_spa, tx,
++			    "%s dataset = %llu", whokey,
++			    dd->dd_phys->dd_head_dataset_obj);
++			continue;
++		}
++
++		if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0)
++			continue;
++
++		while ((permpair = nvlist_next_nvpair(perms, permpair))) {
++			const char *perm = nvpair_name(permpair);
++			uint64_t n = 0;
++
++			(void) zap_remove(mos, jumpobj, perm, tx);
++			if (zap_count(mos, jumpobj, &n) == 0 && n == 0) {
++				(void) zap_remove(mos, zapobj,
++				    whokey, tx);
++				VERIFY(0 == zap_destroy(mos,
++				    jumpobj, tx));
++			}
++			spa_history_log_internal(LOG_DS_PERM_REMOVE,
++			    dd->dd_pool->dp_spa, tx,
++			    "%s %s dataset = %llu", whokey, perm,
++			    dd->dd_phys->dd_head_dataset_obj);
++		}
++	}
++}
++
++int
++dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset)
++{
++	dsl_dir_t *dd;
++	int error;
++	nvpair_t *whopair = NULL;
++	int blocks_modified = 0;
++
++	error = dsl_dir_open(ddname, FTAG, &dd, NULL);
++	if (error)
++		return (error);
++
++	if (spa_version(dmu_objset_spa(dd->dd_pool->dp_meta_objset)) <
++	    SPA_VERSION_DELEGATED_PERMS) {
++		dsl_dir_close(dd, FTAG);
++		return (ENOTSUP);
++	}
++
++	while ((whopair = nvlist_next_nvpair(nvp, whopair)))
++		blocks_modified++;
++
++	error = dsl_sync_task_do(dd->dd_pool, NULL,
++	    unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync,
++	    dd, nvp, blocks_modified);
++	dsl_dir_close(dd, FTAG);
++
++	return (error);
++}
++
++/*
++ * Find all 'allow' permissions from a given point and then continue
++ * traversing up to the root.
++ *
++ * This function constructs an nvlist of nvlists.
++ * each setpoint is an nvlist composed of an nvlist of an nvlist
++ * of the individual * users/groups/everyone/create
++ * permissions.
++ *
++ * The nvlist will look like this.
++ *
++ * { source fsname -> { whokeys { permissions,...}, ...}}
++ *
++ * The fsname nvpairs will be arranged in a bottom up order.  For example,
++ * if we have the following structure a/b/c then the nvpairs for the fsnames
++ * will be ordered a/b/c, a/b, a.
++ */
++int
++dsl_deleg_get(const char *ddname, nvlist_t **nvp)
++{
++	dsl_dir_t *dd, *startdd;
++	dsl_pool_t *dp;
++	int error;
++	objset_t *mos;
++	zap_cursor_t *basezc, *zc;
++	zap_attribute_t *baseza, *za;
++	char *source;
++
++	error = dsl_dir_open(ddname, FTAG, &startdd, NULL);
++	if (error)
++		return (error);
++
++	dp = startdd->dd_pool;
++	mos = dp->dp_meta_objset;
++
++	zc = kmem_alloc(sizeof(zap_cursor_t), KM_SLEEP);
++	za = kmem_alloc(sizeof(zap_attribute_t), KM_SLEEP);
++	basezc = kmem_alloc(sizeof(zap_cursor_t), KM_SLEEP);
++	baseza = kmem_alloc(sizeof(zap_attribute_t), KM_SLEEP);
++	source = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, KM_SLEEP);
++	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++	for (dd = startdd; dd != NULL; dd = dd->dd_parent) {
++		nvlist_t *sp_nvp;
++		uint64_t n;
++
++		if (dd->dd_phys->dd_deleg_zapobj &&
++		    (zap_count(mos, dd->dd_phys->dd_deleg_zapobj,
++		    &n) == 0) && n) {
++			VERIFY(nvlist_alloc(&sp_nvp,
++			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
++		} else {
++			continue;
++		}
++
++		for (zap_cursor_init(basezc, mos,
++		    dd->dd_phys->dd_deleg_zapobj);
++		    zap_cursor_retrieve(basezc, baseza) == 0;
++		    zap_cursor_advance(basezc)) {
++			nvlist_t *perms_nvp;
++
++			ASSERT(baseza->za_integer_length == 8);
++			ASSERT(baseza->za_num_integers == 1);
++
++			VERIFY(nvlist_alloc(&perms_nvp,
++			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
++			for (zap_cursor_init(zc, mos, baseza->za_first_integer);
++			    zap_cursor_retrieve(zc, za) == 0;
++			    zap_cursor_advance(zc)) {
++				VERIFY(nvlist_add_boolean(perms_nvp,
++				    za->za_name) == 0);
++			}
++			zap_cursor_fini(zc);
++			VERIFY(nvlist_add_nvlist(sp_nvp, baseza->za_name,
++			    perms_nvp) == 0);
++			nvlist_free(perms_nvp);
++		}
++
++		zap_cursor_fini(basezc);
++
++		dsl_dir_name(dd, source);
++		VERIFY(nvlist_add_nvlist(*nvp, source, sp_nvp) == 0);
++		nvlist_free(sp_nvp);
++	}
++	rw_exit(&dp->dp_config_rwlock);
++
++	kmem_free(source, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
++	kmem_free(baseza, sizeof(zap_attribute_t));
++	kmem_free(basezc, sizeof(zap_cursor_t));
++	kmem_free(za, sizeof(zap_attribute_t));
++	kmem_free(zc, sizeof(zap_cursor_t));
++
++	dsl_dir_close(startdd, FTAG);
++	return (0);
++}
++
++/*
++ * Routines for dsl_deleg_access() -- access checking.
++ */
++typedef struct perm_set {
++	avl_node_t	p_node;
++	boolean_t	p_matched;
++	char		p_setname[ZFS_MAX_DELEG_NAME];
++} perm_set_t;
++
++static int
++perm_set_compare(const void *arg1, const void *arg2)
++{
++	const perm_set_t *node1 = arg1;
++	const perm_set_t *node2 = arg2;
++	int val;
++
++	val = strcmp(node1->p_setname, node2->p_setname);
++	if (val == 0)
++		return (0);
++	return (val > 0 ? 1 : -1);
++}
++
++/*
++ * Determine whether a specified permission exists.
++ *
++ * First the base attribute has to be retrieved.  i.e. ul$12
++ * Once the base object has been retrieved the actual permission
++ * is lookup up in the zap object the base object points to.
++ *
++ * Return 0 if permission exists, ENOENT if there is no whokey, EPERM if
++ * there is no perm in that jumpobj.
++ */
++static int
++dsl_check_access(objset_t *mos, uint64_t zapobj,
++    char type, char checkflag, void *valp, const char *perm)
++{
++	int error;
++	uint64_t jumpobj, zero;
++	char whokey[ZFS_MAX_DELEG_NAME];
++
++	zfs_deleg_whokey(whokey, type, checkflag, valp);
++	error = zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj);
++	if (error == 0) {
++		error = zap_lookup(mos, jumpobj, perm, 8, 1, &zero);
++		if (error == ENOENT)
++			error = EPERM;
++	}
++	return (error);
++}
++
++/*
++ * check a specified user/group for a requested permission
++ */
++static int
++dsl_check_user_access(objset_t *mos, uint64_t zapobj, const char *perm,
++    int checkflag, cred_t *cr)
++{
++	const	gid_t *gids;
++	int	ngids;
++	int	i;
++	uint64_t id;
++
++	/* check for user */
++	id = crgetuid(cr);
++	if (dsl_check_access(mos, zapobj,
++	    ZFS_DELEG_USER, checkflag, &id, perm) == 0)
++		return (0);
++
++	/* check for users primary group */
++	id = crgetgid(cr);
++	if (dsl_check_access(mos, zapobj,
++	    ZFS_DELEG_GROUP, checkflag, &id, perm) == 0)
++		return (0);
++
++	/* check for everyone entry */
++	id = -1;
++	if (dsl_check_access(mos, zapobj,
++	    ZFS_DELEG_EVERYONE, checkflag, &id, perm) == 0)
++		return (0);
++
++	/* check each supplemental group user is a member of */
++	ngids = crgetngroups(cr);
++	gids = crgetgroups(cr);
++	for (i = 0; i != ngids; i++) {
++		id = gids[i];
++		if (dsl_check_access(mos, zapobj,
++		    ZFS_DELEG_GROUP, checkflag, &id, perm) == 0)
++			return (0);
++	}
++
++	return (EPERM);
++}
++
++/*
++ * Iterate over the sets specified in the specified zapobj
++ * and load them into the permsets avl tree.
++ */
++static int
++dsl_load_sets(objset_t *mos, uint64_t zapobj,
++    char type, char checkflag, void *valp, avl_tree_t *avl)
++{
++	zap_cursor_t zc;
++	zap_attribute_t za;
++	perm_set_t *permnode;
++	avl_index_t idx;
++	uint64_t jumpobj;
++	int error;
++	char whokey[ZFS_MAX_DELEG_NAME];
++
++	zfs_deleg_whokey(whokey, type, checkflag, valp);
++
++	error = zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj);
++	if (error != 0)
++		return (error);
++
++	for (zap_cursor_init(&zc, mos, jumpobj);
++	    zap_cursor_retrieve(&zc, &za) == 0;
++	    zap_cursor_advance(&zc)) {
++		permnode = kmem_alloc(sizeof (perm_set_t), KM_SLEEP);
++		(void) strlcpy(permnode->p_setname, za.za_name,
++		    sizeof (permnode->p_setname));
++		permnode->p_matched = B_FALSE;
++
++		if (avl_find(avl, permnode, &idx) == NULL) {
++			avl_insert(avl, permnode, idx);
++		} else {
++			kmem_free(permnode, sizeof (perm_set_t));
++		}
++	}
++	zap_cursor_fini(&zc);
++	return (0);
++}
++
++/*
++ * Load all permissions user based on cred belongs to.
++ */
++static void
++dsl_load_user_sets(objset_t *mos, uint64_t zapobj, avl_tree_t *avl,
++    char checkflag, cred_t *cr)
++{
++	const	gid_t *gids;
++	int	ngids, i;
++	uint64_t id;
++
++	id = crgetuid(cr);
++	(void) dsl_load_sets(mos, zapobj,
++	    ZFS_DELEG_USER_SETS, checkflag, &id, avl);
++
++	id = crgetgid(cr);
++	(void) dsl_load_sets(mos, zapobj,
++	    ZFS_DELEG_GROUP_SETS, checkflag, &id, avl);
++
++	(void) dsl_load_sets(mos, zapobj,
++	    ZFS_DELEG_EVERYONE_SETS, checkflag, NULL, avl);
++
++	ngids = crgetngroups(cr);
++	gids = crgetgroups(cr);
++	for (i = 0; i != ngids; i++) {
++		id = gids[i];
++		(void) dsl_load_sets(mos, zapobj,
++		    ZFS_DELEG_GROUP_SETS, checkflag, &id, avl);
++	}
++}
++
++/*
++ * Check if user has requested permission.  If descendent is set, must have
++ * descendent perms.
++ */
++int
++dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm,
++    cred_t *cr)
++{
++	dsl_dir_t *dd;
++	dsl_pool_t *dp;
++	void *cookie;
++	int	error;
++	char	checkflag;
++	objset_t *mos;
++	avl_tree_t permsets;
++	perm_set_t *setnode;
++
++	dp = ds->ds_dir->dd_pool;
++	mos = dp->dp_meta_objset;
++
++	if (dsl_delegation_on(mos) == B_FALSE)
++		return (ECANCELED);
++
++	if (spa_version(dmu_objset_spa(dp->dp_meta_objset)) <
++	    SPA_VERSION_DELEGATED_PERMS)
++		return (EPERM);
++
++	if (dsl_dataset_is_snapshot(ds) || descendent) {
++		/*
++		 * Snapshots are treated as descendents only,
++		 * local permissions do not apply.
++		 */
++		checkflag = ZFS_DELEG_DESCENDENT;
++	} else {
++		checkflag = ZFS_DELEG_LOCAL;
++	}
++
++	avl_create(&permsets, perm_set_compare, sizeof (perm_set_t),
++	    offsetof(perm_set_t, p_node));
++
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++	for (dd = ds->ds_dir; dd != NULL; dd = dd->dd_parent,
++	    checkflag = ZFS_DELEG_DESCENDENT) {
++		uint64_t zapobj;
++		boolean_t expanded;
++
++		/*
++		 * If not in global zone then make sure
++		 * the zoned property is set
++		 */
++		if (!INGLOBALZONE(curproc)) {
++			uint64_t zoned;
++
++			if (dsl_prop_get_dd(dd,
++			    zfs_prop_to_name(ZFS_PROP_ZONED),
++			    8, 1, &zoned, NULL, B_FALSE) != 0)
++				break;
++			if (!zoned)
++				break;
++		}
++		zapobj = dd->dd_phys->dd_deleg_zapobj;
++
++		if (zapobj == 0)
++			continue;
++
++		dsl_load_user_sets(mos, zapobj, &permsets, checkflag, cr);
++again:
++		expanded = B_FALSE;
++		for (setnode = avl_first(&permsets); setnode;
++		    setnode = AVL_NEXT(&permsets, setnode)) {
++			if (setnode->p_matched == B_TRUE)
++				continue;
++
++			/* See if this set directly grants this permission */
++			error = dsl_check_access(mos, zapobj,
++			    ZFS_DELEG_NAMED_SET, 0, setnode->p_setname, perm);
++			if (error == 0)
++				goto success;
++			if (error == EPERM)
++				setnode->p_matched = B_TRUE;
++
++			/* See if this set includes other sets */
++			error = dsl_load_sets(mos, zapobj,
++			    ZFS_DELEG_NAMED_SET_SETS, 0,
++			    setnode->p_setname, &permsets);
++			if (error == 0)
++				setnode->p_matched = expanded = B_TRUE;
++		}
++		/*
++		 * If we expanded any sets, that will define more sets,
++		 * which we need to check.
++		 */
++		if (expanded)
++			goto again;
++
++		error = dsl_check_user_access(mos, zapobj, perm, checkflag, cr);
++		if (error == 0)
++			goto success;
++	}
++	error = EPERM;
++success:
++	rw_exit(&dp->dp_config_rwlock);
++
++	cookie = NULL;
++	while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL)
++		kmem_free(setnode, sizeof (perm_set_t));
++
++	return (error);
++}
++
++int
++dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
++{
++	dsl_dataset_t *ds;
++	int error;
++
++	error = dsl_dataset_hold(dsname, FTAG, &ds);
++	if (error)
++		return (error);
++
++	error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
++	dsl_dataset_rele(ds, FTAG);
++
++	return (error);
++}
++
++/*
++ * Other routines.
++ */
++
++static void
++copy_create_perms(dsl_dir_t *dd, uint64_t pzapobj,
++    boolean_t dosets, uint64_t uid, dmu_tx_t *tx)
++{
++	objset_t *mos = dd->dd_pool->dp_meta_objset;
++	uint64_t jumpobj, pjumpobj;
++	uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
++	zap_cursor_t zc;
++	zap_attribute_t za;
++	char whokey[ZFS_MAX_DELEG_NAME];
++
++	zfs_deleg_whokey(whokey,
++	    dosets ? ZFS_DELEG_CREATE_SETS : ZFS_DELEG_CREATE,
++	    ZFS_DELEG_LOCAL, NULL);
++	if (zap_lookup(mos, pzapobj, whokey, 8, 1, &pjumpobj) != 0)
++		return;
++
++	if (zapobj == 0) {
++		dmu_buf_will_dirty(dd->dd_dbuf, tx);
++		zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos,
++		    DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
++	}
++
++	zfs_deleg_whokey(whokey,
++	    dosets ? ZFS_DELEG_USER_SETS : ZFS_DELEG_USER,
++	    ZFS_DELEG_LOCAL, &uid);
++	if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) == ENOENT) {
++		jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
++		VERIFY(zap_add(mos, zapobj, whokey, 8, 1, &jumpobj, tx) == 0);
++	}
++
++	for (zap_cursor_init(&zc, mos, pjumpobj);
++	    zap_cursor_retrieve(&zc, &za) == 0;
++	    zap_cursor_advance(&zc)) {
++		uint64_t zero = 0;
++		ASSERT(za.za_integer_length == 8 && za.za_num_integers == 1);
++
++		VERIFY(zap_update(mos, jumpobj, za.za_name,
++		    8, 1, &zero, tx) == 0);
++	}
++	zap_cursor_fini(&zc);
++}
++
++/*
++ * set all create time permission on new dataset.
++ */
++void
++dsl_deleg_set_create_perms(dsl_dir_t *sdd, dmu_tx_t *tx, cred_t *cr)
++{
++	dsl_dir_t *dd;
++	uint64_t uid = crgetuid(cr);
++
++	if (spa_version(dmu_objset_spa(sdd->dd_pool->dp_meta_objset)) <
++	    SPA_VERSION_DELEGATED_PERMS)
++		return;
++
++	for (dd = sdd->dd_parent; dd != NULL; dd = dd->dd_parent) {
++		uint64_t pzapobj = dd->dd_phys->dd_deleg_zapobj;
++
++		if (pzapobj == 0)
++			continue;
++
++		copy_create_perms(sdd, pzapobj, B_FALSE, uid, tx);
++		copy_create_perms(sdd, pzapobj, B_TRUE, uid, tx);
++	}
++}
++
++int
++dsl_deleg_destroy(objset_t *mos, uint64_t zapobj, dmu_tx_t *tx)
++{
++	zap_cursor_t zc;
++	zap_attribute_t za;
++
++	if (zapobj == 0)
++		return (0);
++
++	for (zap_cursor_init(&zc, mos, zapobj);
++	    zap_cursor_retrieve(&zc, &za) == 0;
++	    zap_cursor_advance(&zc)) {
++		ASSERT(za.za_integer_length == 8 && za.za_num_integers == 1);
++		VERIFY(0 == zap_destroy(mos, za.za_first_integer, tx));
++	}
++	zap_cursor_fini(&zc);
++	VERIFY(0 == zap_destroy(mos, zapobj, tx));
++	return (0);
++}
++
++boolean_t
++dsl_delegation_on(objset_t *os)
++{
++	return (!!spa_delegation(os->os_spa));
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(dsl_deleg_get);
++EXPORT_SYMBOL(dsl_deleg_set);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dsl_dir.c linux-3.2.33-go/fs/zfs/zfs/dsl_dir.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dsl_dir.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dsl_dir.c	2012-11-16 23:25:34.347039358 +0100
+@@ -0,0 +1,1422 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/dmu.h>
++#include <sys/dmu_objset.h>
++#include <sys/dmu_tx.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_prop.h>
++#include <sys/dsl_synctask.h>
++#include <sys/dsl_deleg.h>
++#include <sys/spa.h>
++#include <sys/metaslab.h>
++#include <sys/zap.h>
++#include <sys/zio.h>
++#include <sys/arc.h>
++#include <sys/sunddi.h>
++#include "zfs_namecheck.h"
++
++static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
++static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx);
++
++
++/* ARGSUSED */
++static void
++dsl_dir_evict(dmu_buf_t *db, void *arg)
++{
++	dsl_dir_t *dd = arg;
++	ASSERTV(dsl_pool_t *dp = dd->dd_pool;)
++	int t;
++
++	for (t = 0; t < TXG_SIZE; t++) {
++		ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
++		ASSERT(dd->dd_tempreserved[t] == 0);
++		ASSERT(dd->dd_space_towrite[t] == 0);
++	}
++
++	if (dd->dd_parent)
++		dsl_dir_close(dd->dd_parent, dd);
++
++	spa_close(dd->dd_pool->dp_spa, dd);
++
++	/*
++	 * The props callback list should have been cleaned up by
++	 * objset_evict().
++	 */
++	list_destroy(&dd->dd_prop_cbs);
++	mutex_destroy(&dd->dd_lock);
++	kmem_free(dd, sizeof (dsl_dir_t));
++}
++
++int
++dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
++    const char *tail, void *tag, dsl_dir_t **ddp)
++{
++	dmu_buf_t *dbuf;
++	dsl_dir_t *dd;
++	int err;
++
++	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
++	    dsl_pool_sync_context(dp));
++
++	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
++	if (err)
++		return (err);
++	dd = dmu_buf_get_user(dbuf);
++#ifdef ZFS_DEBUG
++	{
++		dmu_object_info_t doi;
++		dmu_object_info_from_db(dbuf, &doi);
++		ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
++		ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
++	}
++#endif
++	if (dd == NULL) {
++		dsl_dir_t *winner;
++
++		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_PUSHPAGE);
++		dd->dd_object = ddobj;
++		dd->dd_dbuf = dbuf;
++		dd->dd_pool = dp;
++		dd->dd_phys = dbuf->db_data;
++		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
++
++		list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
++		    offsetof(dsl_prop_cb_record_t, cbr_node));
++
++		dsl_dir_snap_cmtime_update(dd);
++
++		if (dd->dd_phys->dd_parent_obj) {
++			err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
++			    NULL, dd, &dd->dd_parent);
++			if (err)
++				goto errout;
++			if (tail) {
++#ifdef ZFS_DEBUG
++				uint64_t foundobj;
++
++				err = zap_lookup(dp->dp_meta_objset,
++				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
++				    tail, sizeof (foundobj), 1, &foundobj);
++				ASSERT(err || foundobj == ddobj);
++#endif
++				(void) strcpy(dd->dd_myname, tail);
++			} else {
++				err = zap_value_search(dp->dp_meta_objset,
++				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
++				    ddobj, 0, dd->dd_myname);
++			}
++			if (err)
++				goto errout;
++		} else {
++			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
++		}
++
++		if (dsl_dir_is_clone(dd)) {
++			dmu_buf_t *origin_bonus;
++			dsl_dataset_phys_t *origin_phys;
++
++			/*
++			 * We can't open the origin dataset, because
++			 * that would require opening this dsl_dir.
++			 * Just look at its phys directly instead.
++			 */
++			err = dmu_bonus_hold(dp->dp_meta_objset,
++			    dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
++			if (err)
++				goto errout;
++			origin_phys = origin_bonus->db_data;
++			dd->dd_origin_txg =
++			    origin_phys->ds_creation_txg;
++			dmu_buf_rele(origin_bonus, FTAG);
++		}
++
++		winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
++		    dsl_dir_evict);
++		if (winner) {
++			if (dd->dd_parent)
++				dsl_dir_close(dd->dd_parent, dd);
++			mutex_destroy(&dd->dd_lock);
++			kmem_free(dd, sizeof (dsl_dir_t));
++			dd = winner;
++		} else {
++			spa_open_ref(dp->dp_spa, dd);
++		}
++	}
++
++	/*
++	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
++	 * holds on the spa.  We need the open-to-close holds because
++	 * otherwise the spa_refcnt wouldn't change when we open a
++	 * dir which the spa also has open, so we could incorrectly
++	 * think it was OK to unload/export/destroy the pool.  We need
++	 * the instantiate-to-evict hold because the dsl_dir_t has a
++	 * pointer to the dd_pool, which has a pointer to the spa_t.
++	 */
++	spa_open_ref(dp->dp_spa, tag);
++	ASSERT3P(dd->dd_pool, ==, dp);
++	ASSERT3U(dd->dd_object, ==, ddobj);
++	ASSERT3P(dd->dd_dbuf, ==, dbuf);
++	*ddp = dd;
++	return (0);
++
++errout:
++	if (dd->dd_parent)
++		dsl_dir_close(dd->dd_parent, dd);
++	mutex_destroy(&dd->dd_lock);
++	kmem_free(dd, sizeof (dsl_dir_t));
++	dmu_buf_rele(dbuf, tag);
++	return (err);
++
++}
++
++void
++dsl_dir_close(dsl_dir_t *dd, void *tag)
++{
++	dprintf_dd(dd, "%s\n", "");
++	spa_close(dd->dd_pool->dp_spa, tag);
++	dmu_buf_rele(dd->dd_dbuf, tag);
++}
++
++/* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
++void
++dsl_dir_name(dsl_dir_t *dd, char *buf)
++{
++	if (dd->dd_parent) {
++		dsl_dir_name(dd->dd_parent, buf);
++		(void) strcat(buf, "/");
++	} else {
++		buf[0] = '\0';
++	}
++	if (!MUTEX_HELD(&dd->dd_lock)) {
++		/*
++		 * recursive mutex so that we can use
++		 * dprintf_dd() with dd_lock held
++		 */
++		mutex_enter(&dd->dd_lock);
++		(void) strcat(buf, dd->dd_myname);
++		mutex_exit(&dd->dd_lock);
++	} else {
++		(void) strcat(buf, dd->dd_myname);
++	}
++}
++
++/* Calculate name legnth, avoiding all the strcat calls of dsl_dir_name */
++int
++dsl_dir_namelen(dsl_dir_t *dd)
++{
++	int result = 0;
++
++	if (dd->dd_parent) {
++		/* parent's name + 1 for the "/" */
++		result = dsl_dir_namelen(dd->dd_parent) + 1;
++	}
++
++	if (!MUTEX_HELD(&dd->dd_lock)) {
++		/* see dsl_dir_name */
++		mutex_enter(&dd->dd_lock);
++		result += strlen(dd->dd_myname);
++		mutex_exit(&dd->dd_lock);
++	} else {
++		result += strlen(dd->dd_myname);
++	}
++
++	return (result);
++}
++
++static int
++getcomponent(const char *path, char *component, const char **nextp)
++{
++	char *p;
++	if ((path == NULL) || (path[0] == '\0'))
++		return (ENOENT);
++	/* This would be a good place to reserve some namespace... */
++	p = strpbrk(path, "/@");
++	if (p && (p[1] == '/' || p[1] == '@')) {
++		/* two separators in a row */
++		return (EINVAL);
++	}
++	if (p == NULL || p == path) {
++		/*
++		 * if the first thing is an @ or /, it had better be an
++		 * @ and it had better not have any more ats or slashes,
++		 * and it had better have something after the @.
++		 */
++		if (p != NULL &&
++		    (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
++			return (EINVAL);
++		if (strlen(path) >= MAXNAMELEN)
++			return (ENAMETOOLONG);
++		(void) strcpy(component, path);
++		p = NULL;
++	} else if (p[0] == '/') {
++		if (p-path >= MAXNAMELEN)
++			return (ENAMETOOLONG);
++		(void) strncpy(component, path, p - path);
++		component[p-path] = '\0';
++		p++;
++	} else if (p[0] == '@') {
++		/*
++		 * if the next separator is an @, there better not be
++		 * any more slashes.
++		 */
++		if (strchr(path, '/'))
++			return (EINVAL);
++		if (p-path >= MAXNAMELEN)
++			return (ENAMETOOLONG);
++		(void) strncpy(component, path, p - path);
++		component[p-path] = '\0';
++	} else {
++		ASSERT(!"invalid p");
++	}
++	*nextp = p;
++	return (0);
++}
++
++/*
++ * same as dsl_open_dir, ignore the first component of name and use the
++ * spa instead
++ */
++int
++dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
++    dsl_dir_t **ddp, const char **tailp)
++{
++	char *buf;
++	const char *next, *nextnext = NULL;
++	int err;
++	dsl_dir_t *dd;
++	dsl_pool_t *dp;
++	uint64_t ddobj;
++	int openedspa = FALSE;
++
++	dprintf("%s\n", name);
++
++	buf = kmem_alloc(MAXNAMELEN, KM_SLEEP);
++	err = getcomponent(name, buf, &next);
++	if (err)
++		goto error;
++	if (spa == NULL) {
++		err = spa_open(buf, &spa, FTAG);
++		if (err) {
++			dprintf("spa_open(%s) failed\n", buf);
++			goto error;
++		}
++		openedspa = TRUE;
++
++		/* XXX this assertion belongs in spa_open */
++		ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
++	}
++
++	dp = spa_get_dsl(spa);
++
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++	err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
++	if (err) {
++		rw_exit(&dp->dp_config_rwlock);
++		if (openedspa)
++			spa_close(spa, FTAG);
++		goto error;
++	}
++
++	while (next != NULL) {
++		dsl_dir_t *child_ds;
++		err = getcomponent(next, buf, &nextnext);
++		if (err)
++			break;
++		ASSERT(next[0] != '\0');
++		if (next[0] == '@')
++			break;
++		dprintf("looking up %s in obj%lld\n",
++		    buf, dd->dd_phys->dd_child_dir_zapobj);
++
++		err = zap_lookup(dp->dp_meta_objset,
++		    dd->dd_phys->dd_child_dir_zapobj,
++		    buf, sizeof (ddobj), 1, &ddobj);
++		if (err) {
++			if (err == ENOENT)
++				err = 0;
++			break;
++		}
++
++		err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
++		if (err)
++			break;
++		dsl_dir_close(dd, tag);
++		dd = child_ds;
++		next = nextnext;
++	}
++	rw_exit(&dp->dp_config_rwlock);
++
++	if (err) {
++		dsl_dir_close(dd, tag);
++		if (openedspa)
++			spa_close(spa, FTAG);
++		goto error;
++	}
++
++	/*
++	 * It's an error if there's more than one component left, or
++	 * tailp==NULL and there's any component left.
++	 */
++	if (next != NULL &&
++	    (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
++		/* bad path name */
++		dsl_dir_close(dd, tag);
++		dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
++		err = ENOENT;
++	}
++	if (tailp)
++		*tailp = next;
++	if (openedspa)
++		spa_close(spa, FTAG);
++	*ddp = dd;
++error:
++	kmem_free(buf, MAXNAMELEN);
++	return (err);
++}
++
++/*
++ * Return the dsl_dir_t, and possibly the last component which couldn't
++ * be found in *tail.  Return NULL if the path is bogus, or if
++ * tail==NULL and we couldn't parse the whole name.  (*tail)[0] == '@'
++ * means that the last component is a snapshot.
++ */
++int
++dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
++{
++	return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
++}
++
++uint64_t
++dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
++    dmu_tx_t *tx)
++{
++	objset_t *mos = dp->dp_meta_objset;
++	uint64_t ddobj;
++	dsl_dir_phys_t *ddphys;
++	dmu_buf_t *dbuf;
++
++	ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
++	    DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
++	if (pds) {
++		VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
++		    name, sizeof (uint64_t), 1, &ddobj, tx));
++	} else {
++		/* it's the root dir */
++		VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
++		    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
++	}
++	VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
++	dmu_buf_will_dirty(dbuf, tx);
++	ddphys = dbuf->db_data;
++
++	ddphys->dd_creation_time = gethrestime_sec();
++	if (pds)
++		ddphys->dd_parent_obj = pds->dd_object;
++	ddphys->dd_props_zapobj = zap_create(mos,
++	    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
++	ddphys->dd_child_dir_zapobj = zap_create(mos,
++	    DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
++	if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
++		ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
++	dmu_buf_rele(dbuf, FTAG);
++
++	return (ddobj);
++}
++
++/* ARGSUSED */
++int
++dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	dsl_dir_t *dd = ds->ds_dir;
++	dsl_pool_t *dp = dd->dd_pool;
++	objset_t *mos = dp->dp_meta_objset;
++	int err;
++	uint64_t count;
++
++	/*
++	 * There should be exactly two holds, both from
++	 * dsl_dataset_destroy: one on the dd directory, and one on its
++	 * head ds.  If there are more holds, then a concurrent thread is
++	 * performing a lookup inside this dir while we're trying to destroy
++	 * it.  To minimize this possibility, we perform this check only
++	 * in syncing context and fail the operation if we encounter
++	 * additional holds.  The dp_config_rwlock ensures that nobody else
++	 * opens it after we check.
++	 */
++	if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2)
++		return (EBUSY);
++
++	err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
++	if (err)
++		return (err);
++	if (count != 0)
++		return (EEXIST);
++
++	return (0);
++}
++
++void
++dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	dsl_dir_t *dd = ds->ds_dir;
++	objset_t *mos = dd->dd_pool->dp_meta_objset;
++	dsl_prop_setarg_t psa;
++	uint64_t value = 0;
++	uint64_t obj;
++	dd_used_t t;
++
++	ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
++	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
++
++	/* Remove our reservation. */
++	dsl_prop_setarg_init_uint64(&psa, "reservation",
++	    (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
++	    &value);
++	psa.psa_effective_value = 0;	/* predict default value */
++
++	dsl_dir_set_reservation_sync(ds, &psa, tx);
++
++	ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0);
++	ASSERT3U(dd->dd_phys->dd_reserved, ==, 0);
++	for (t = 0; t < DD_USED_NUM; t++)
++		ASSERT3U(dd->dd_phys->dd_used_breakdown[t], ==, 0);
++
++	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
++	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
++	VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
++	VERIFY(0 == zap_remove(mos,
++	    dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
++
++	obj = dd->dd_object;
++	dsl_dir_close(dd, tag);
++	VERIFY(0 == dmu_object_free(mos, obj, tx));
++}
++
++boolean_t
++dsl_dir_is_clone(dsl_dir_t *dd)
++{
++	return (dd->dd_phys->dd_origin_obj &&
++	    (dd->dd_pool->dp_origin_snap == NULL ||
++	    dd->dd_phys->dd_origin_obj !=
++	    dd->dd_pool->dp_origin_snap->ds_object));
++}
++
++void
++dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
++{
++	mutex_enter(&dd->dd_lock);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
++	    dd->dd_phys->dd_used_bytes);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
++	    dd->dd_phys->dd_reserved);
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
++	    dd->dd_phys->dd_compressed_bytes == 0 ? 100 :
++	    (dd->dd_phys->dd_uncompressed_bytes * 100 /
++	    dd->dd_phys->dd_compressed_bytes));
++	if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
++		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
++		    dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]);
++		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
++		    dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]);
++		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
++		    dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]);
++		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
++		    dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] +
++		    dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]);
++	}
++	mutex_exit(&dd->dd_lock);
++
++	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
++	if (dsl_dir_is_clone(dd)) {
++		dsl_dataset_t *ds;
++		char buf[MAXNAMELEN];
++
++		VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
++		    dd->dd_phys->dd_origin_obj, FTAG, &ds));
++		dsl_dataset_name(ds, buf);
++		dsl_dataset_rele(ds, FTAG);
++		dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
++	}
++	rw_exit(&dd->dd_pool->dp_config_rwlock);
++}
++
++void
++dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
++{
++	dsl_pool_t *dp = dd->dd_pool;
++
++	ASSERT(dd->dd_phys);
++
++	if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
++		/* up the hold count until we can be written out */
++		dmu_buf_add_ref(dd->dd_dbuf, dd);
++	}
++}
++
++static int64_t
++parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
++{
++	uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved);
++	uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved);
++	return (new_accounted - old_accounted);
++}
++
++void
++dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
++{
++	ASSERT(dmu_tx_is_syncing(tx));
++
++	dmu_buf_will_dirty(dd->dd_dbuf, tx);
++
++	mutex_enter(&dd->dd_lock);
++	ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0);
++	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
++	    dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
++	dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
++	mutex_exit(&dd->dd_lock);
++
++	/* release the hold from dsl_dir_dirty */
++	dmu_buf_rele(dd->dd_dbuf, dd);
++}
++
++static uint64_t
++dsl_dir_space_towrite(dsl_dir_t *dd)
++{
++	uint64_t space = 0;
++	int i;
++
++	ASSERT(MUTEX_HELD(&dd->dd_lock));
++
++	for (i = 0; i < TXG_SIZE; i++) {
++		space += dd->dd_space_towrite[i&TXG_MASK];
++		ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
++	}
++	return (space);
++}
++
++/*
++ * How much space would dd have available if ancestor had delta applied
++ * to it?  If ondiskonly is set, we're only interested in what's
++ * on-disk, not estimated pending changes.
++ */
++uint64_t
++dsl_dir_space_available(dsl_dir_t *dd,
++    dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
++{
++	uint64_t parentspace, myspace, quota, used;
++
++	/*
++	 * If there are no restrictions otherwise, assume we have
++	 * unlimited space available.
++	 */
++	quota = UINT64_MAX;
++	parentspace = UINT64_MAX;
++
++	if (dd->dd_parent != NULL) {
++		parentspace = dsl_dir_space_available(dd->dd_parent,
++		    ancestor, delta, ondiskonly);
++	}
++
++	mutex_enter(&dd->dd_lock);
++	if (dd->dd_phys->dd_quota != 0)
++		quota = dd->dd_phys->dd_quota;
++	used = dd->dd_phys->dd_used_bytes;
++	if (!ondiskonly)
++		used += dsl_dir_space_towrite(dd);
++
++	if (dd->dd_parent == NULL) {
++		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
++		quota = MIN(quota, poolsize);
++	}
++
++	if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) {
++		/*
++		 * We have some space reserved, in addition to what our
++		 * parent gave us.
++		 */
++		parentspace += dd->dd_phys->dd_reserved - used;
++	}
++
++	if (dd == ancestor) {
++		ASSERT(delta <= 0);
++		ASSERT(used >= -delta);
++		used += delta;
++		if (parentspace != UINT64_MAX)
++			parentspace -= delta;
++	}
++
++	if (used > quota) {
++		/* over quota */
++		myspace = 0;
++	} else {
++		/*
++		 * the lesser of the space provided by our parent and
++		 * the space left in our quota
++		 */
++		myspace = MIN(parentspace, quota - used);
++	}
++
++	mutex_exit(&dd->dd_lock);
++
++	return (myspace);
++}
++
++struct tempreserve {
++	list_node_t tr_node;
++	dsl_pool_t *tr_dp;
++	dsl_dir_t *tr_ds;
++	uint64_t tr_size;
++};
++
++static int
++dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
++    boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list,
++    dmu_tx_t *tx, boolean_t first)
++{
++	uint64_t txg = tx->tx_txg;
++	uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
++	uint64_t deferred = 0;
++	struct tempreserve *tr;
++	int retval = EDQUOT;
++	int txgidx = txg & TXG_MASK;
++	int i;
++	uint64_t ref_rsrv = 0;
++
++	ASSERT3U(txg, !=, 0);
++	ASSERT3S(asize, >, 0);
++
++	mutex_enter(&dd->dd_lock);
++
++	/*
++	 * Check against the dsl_dir's quota.  We don't add in the delta
++	 * when checking for over-quota because they get one free hit.
++	 */
++	est_inflight = dsl_dir_space_towrite(dd);
++	for (i = 0; i < TXG_SIZE; i++)
++		est_inflight += dd->dd_tempreserved[i];
++	used_on_disk = dd->dd_phys->dd_used_bytes;
++
++	/*
++	 * On the first iteration, fetch the dataset's used-on-disk and
++	 * refreservation values. Also, if checkrefquota is set, test if
++	 * allocating this space would exceed the dataset's refquota.
++	 */
++	if (first && tx->tx_objset) {
++		int error;
++		dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
++
++		error = dsl_dataset_check_quota(ds, checkrefquota,
++		    asize, est_inflight, &used_on_disk, &ref_rsrv);
++		if (error) {
++			mutex_exit(&dd->dd_lock);
++			return (error);
++		}
++	}
++
++	/*
++	 * If this transaction will result in a net free of space,
++	 * we want to let it through.
++	 */
++	if (ignorequota || netfree || dd->dd_phys->dd_quota == 0)
++		quota = UINT64_MAX;
++	else
++		quota = dd->dd_phys->dd_quota;
++
++	/*
++	 * Adjust the quota against the actual pool size at the root
++	 * minus any outstanding deferred frees.
++	 * To ensure that it's possible to remove files from a full
++	 * pool without inducing transient overcommits, we throttle
++	 * netfree transactions against a quota that is slightly larger,
++	 * but still within the pool's allocation slop.  In cases where
++	 * we're very close to full, this will allow a steady trickle of
++	 * removes to get through.
++	 */
++	if (dd->dd_parent == NULL) {
++		spa_t *spa = dd->dd_pool->dp_spa;
++		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
++		deferred = metaslab_class_get_deferred(spa_normal_class(spa));
++		if (poolsize - deferred < quota) {
++			quota = poolsize - deferred;
++			retval = ENOSPC;
++		}
++	}
++
++	/*
++	 * If they are requesting more space, and our current estimate
++	 * is over quota, they get to try again unless the actual
++	 * on-disk is over quota and there are no pending changes (which
++	 * may free up space for us).
++	 */
++	if (used_on_disk + est_inflight >= quota) {
++		if (est_inflight > 0 || used_on_disk < quota ||
++		    (retval == ENOSPC && used_on_disk < quota + deferred))
++			retval = ERESTART;
++		dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
++		    "quota=%lluK tr=%lluK err=%d\n",
++		    used_on_disk>>10, est_inflight>>10,
++		    quota>>10, asize>>10, retval);
++		mutex_exit(&dd->dd_lock);
++		return (retval);
++	}
++
++	/* We need to up our estimated delta before dropping dd_lock */
++	dd->dd_tempreserved[txgidx] += asize;
++
++	parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
++	    asize - ref_rsrv);
++	mutex_exit(&dd->dd_lock);
++
++	tr = kmem_zalloc(sizeof (struct tempreserve), KM_PUSHPAGE);
++	tr->tr_ds = dd;
++	tr->tr_size = asize;
++	list_insert_tail(tr_list, tr);
++
++	/* see if it's OK with our parent */
++	if (dd->dd_parent && parent_rsrv) {
++		boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0);
++
++		return (dsl_dir_tempreserve_impl(dd->dd_parent,
++		    parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE));
++	} else {
++		return (0);
++	}
++}
++
++/*
++ * Reserve space in this dsl_dir, to be used in this tx's txg.
++ * After the space has been dirtied (and dsl_dir_willuse_space()
++ * has been called), the reservation should be canceled, using
++ * dsl_dir_tempreserve_clear().
++ */
++int
++dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
++    uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx)
++{
++	int err;
++	list_t *tr_list;
++
++	if (asize == 0) {
++		*tr_cookiep = NULL;
++		return (0);
++	}
++
++	tr_list = kmem_alloc(sizeof (list_t), KM_PUSHPAGE);
++	list_create(tr_list, sizeof (struct tempreserve),
++	    offsetof(struct tempreserve, tr_node));
++	ASSERT3S(asize, >, 0);
++	ASSERT3S(fsize, >=, 0);
++
++	err = arc_tempreserve_space(lsize, tx->tx_txg);
++	if (err == 0) {
++		struct tempreserve *tr;
++
++		tr = kmem_zalloc(sizeof (struct tempreserve), KM_PUSHPAGE);
++		tr->tr_size = lsize;
++		list_insert_tail(tr_list, tr);
++
++		err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx);
++	} else {
++		if (err == EAGAIN) {
++			txg_delay(dd->dd_pool, tx->tx_txg, 1);
++			err = ERESTART;
++		}
++		dsl_pool_memory_pressure(dd->dd_pool);
++	}
++
++	if (err == 0) {
++		struct tempreserve *tr;
++
++		tr = kmem_zalloc(sizeof (struct tempreserve), KM_PUSHPAGE);
++		tr->tr_dp = dd->dd_pool;
++		tr->tr_size = asize;
++		list_insert_tail(tr_list, tr);
++
++		err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
++		    FALSE, asize > usize, tr_list, tx, TRUE);
++	}
++
++	if (err)
++		dsl_dir_tempreserve_clear(tr_list, tx);
++	else
++		*tr_cookiep = tr_list;
++
++	return (err);
++}
++
++/*
++ * Clear a temporary reservation that we previously made with
++ * dsl_dir_tempreserve_space().
++ */
++void
++dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
++{
++	int txgidx = tx->tx_txg & TXG_MASK;
++	list_t *tr_list = tr_cookie;
++	struct tempreserve *tr;
++
++	ASSERT3U(tx->tx_txg, !=, 0);
++
++	if (tr_cookie == NULL)
++		return;
++
++	while ((tr = list_head(tr_list))) {
++		if (tr->tr_dp) {
++			dsl_pool_tempreserve_clear(tr->tr_dp, tr->tr_size, tx);
++		} else if (tr->tr_ds) {
++			mutex_enter(&tr->tr_ds->dd_lock);
++			ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
++			    tr->tr_size);
++			tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
++			mutex_exit(&tr->tr_ds->dd_lock);
++		} else {
++			arc_tempreserve_clear(tr->tr_size);
++		}
++		list_remove(tr_list, tr);
++		kmem_free(tr, sizeof (struct tempreserve));
++	}
++
++	kmem_free(tr_list, sizeof (list_t));
++}
++
++static void
++dsl_dir_willuse_space_impl(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
++{
++	int64_t parent_space;
++	uint64_t est_used;
++
++	mutex_enter(&dd->dd_lock);
++	if (space > 0)
++		dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
++
++	est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes;
++	parent_space = parent_delta(dd, est_used, space);
++	mutex_exit(&dd->dd_lock);
++
++	/* Make sure that we clean up dd_space_to* */
++	dsl_dir_dirty(dd, tx);
++
++	/* XXX this is potentially expensive and unnecessary... */
++	if (parent_space && dd->dd_parent)
++		dsl_dir_willuse_space_impl(dd->dd_parent, parent_space, tx);
++}
++
++/*
++ * Call in open context when we think we're going to write/free space,
++ * eg. when dirtying data.  Be conservative (ie. OK to write less than
++ * this or free more than this, but don't write more or free less).
++ */
++void
++dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
++{
++	dsl_pool_willuse_space(dd->dd_pool, space, tx);
++	dsl_dir_willuse_space_impl(dd, space, tx);
++}
++
++/* call from syncing context when we actually write/free space for this dd */
++void
++dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
++    int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
++{
++	int64_t accounted_delta;
++	boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
++
++	ASSERT(dmu_tx_is_syncing(tx));
++	ASSERT(type < DD_USED_NUM);
++
++	dsl_dir_dirty(dd, tx);
++
++	if (needlock)
++		mutex_enter(&dd->dd_lock);
++	accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
++	ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used);
++	ASSERT(compressed >= 0 ||
++	    dd->dd_phys->dd_compressed_bytes >= -compressed);
++	ASSERT(uncompressed >= 0 ||
++	    dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
++	dd->dd_phys->dd_used_bytes += used;
++	dd->dd_phys->dd_uncompressed_bytes += uncompressed;
++	dd->dd_phys->dd_compressed_bytes += compressed;
++
++	if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
++		ASSERT(used > 0 ||
++		    dd->dd_phys->dd_used_breakdown[type] >= -used);
++		dd->dd_phys->dd_used_breakdown[type] += used;
++#ifdef DEBUG
++		{
++			dd_used_t t;
++			uint64_t u = 0;
++			for (t = 0; t < DD_USED_NUM; t++)
++				u += dd->dd_phys->dd_used_breakdown[t];
++			ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes);
++		}
++#endif
++	}
++	if (needlock)
++		mutex_exit(&dd->dd_lock);
++
++	if (dd->dd_parent != NULL) {
++		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
++		    accounted_delta, compressed, uncompressed, tx);
++		dsl_dir_transfer_space(dd->dd_parent,
++		    used - accounted_delta,
++		    DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
++	}
++}
++
++void
++dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
++    dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
++{
++	boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
++
++	ASSERT(dmu_tx_is_syncing(tx));
++	ASSERT(oldtype < DD_USED_NUM);
++	ASSERT(newtype < DD_USED_NUM);
++
++	if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
++		return;
++
++	dsl_dir_dirty(dd, tx);
++	if (needlock)
++		mutex_enter(&dd->dd_lock);
++	ASSERT(delta > 0 ?
++	    dd->dd_phys->dd_used_breakdown[oldtype] >= delta :
++	    dd->dd_phys->dd_used_breakdown[newtype] >= -delta);
++	ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta));
++	dd->dd_phys->dd_used_breakdown[oldtype] -= delta;
++	dd->dd_phys->dd_used_breakdown[newtype] += delta;
++	if (needlock)
++		mutex_exit(&dd->dd_lock);
++}
++
++static int
++dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	dsl_dir_t *dd = ds->ds_dir;
++	dsl_prop_setarg_t *psa = arg2;
++	int err;
++	uint64_t towrite;
++
++	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
++		return (err);
++
++	if (psa->psa_effective_value == 0)
++		return (0);
++
++	mutex_enter(&dd->dd_lock);
++	/*
++	 * If we are doing the preliminary check in open context, and
++	 * there are pending changes, then don't fail it, since the
++	 * pending changes could under-estimate the amount of space to be
++	 * freed up.
++	 */
++	towrite = dsl_dir_space_towrite(dd);
++	if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
++	    (psa->psa_effective_value < dd->dd_phys->dd_reserved ||
++	    psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) {
++		err = ENOSPC;
++	}
++	mutex_exit(&dd->dd_lock);
++	return (err);
++}
++
++extern dsl_syncfunc_t dsl_prop_set_sync;
++
++static void
++dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	dsl_dir_t *dd = ds->ds_dir;
++	dsl_prop_setarg_t *psa = arg2;
++	uint64_t effective_value = psa->psa_effective_value;
++
++	dsl_prop_set_sync(ds, psa, tx);
++	DSL_PROP_CHECK_PREDICTION(dd, psa);
++
++	dmu_buf_will_dirty(dd->dd_dbuf, tx);
++
++	mutex_enter(&dd->dd_lock);
++	dd->dd_phys->dd_quota = effective_value;
++	mutex_exit(&dd->dd_lock);
++}
++
++int
++dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
++{
++	dsl_dir_t *dd;
++	dsl_dataset_t *ds;
++	dsl_prop_setarg_t psa;
++	int err;
++
++	dsl_prop_setarg_init_uint64(&psa, "quota", source, &quota);
++
++	err = dsl_dataset_hold(ddname, FTAG, &ds);
++	if (err)
++		return (err);
++
++	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
++	if (err) {
++		dsl_dataset_rele(ds, FTAG);
++		return (err);
++	}
++
++	ASSERT(ds->ds_dir == dd);
++
++	/*
++	 * If someone removes a file, then tries to set the quota, we want to
++	 * make sure the file freeing takes effect.
++	 */
++	txg_wait_open(dd->dd_pool, 0);
++
++	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
++	    dsl_dir_set_quota_sync, ds, &psa, 0);
++
++	dsl_dir_close(dd, FTAG);
++	dsl_dataset_rele(ds, FTAG);
++	return (err);
++}
++
++int
++dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	dsl_dir_t *dd = ds->ds_dir;
++	dsl_prop_setarg_t *psa = arg2;
++	uint64_t effective_value;
++	uint64_t used, avail;
++	int err;
++
++	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
++		return (err);
++
++	effective_value = psa->psa_effective_value;
++
++	/*
++	 * If we are doing the preliminary check in open context, the
++	 * space estimates may be inaccurate.
++	 */
++	if (!dmu_tx_is_syncing(tx))
++		return (0);
++
++	mutex_enter(&dd->dd_lock);
++	used = dd->dd_phys->dd_used_bytes;
++	mutex_exit(&dd->dd_lock);
++
++	if (dd->dd_parent) {
++		avail = dsl_dir_space_available(dd->dd_parent,
++		    NULL, 0, FALSE);
++	} else {
++		avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
++	}
++
++	if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) {
++		uint64_t delta = MAX(used, effective_value) -
++		    MAX(used, dd->dd_phys->dd_reserved);
++
++		if (delta > avail)
++			return (ENOSPC);
++		if (dd->dd_phys->dd_quota > 0 &&
++		    effective_value > dd->dd_phys->dd_quota)
++			return (ENOSPC);
++	}
++
++	return (0);
++}
++
++static void
++dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	dsl_dir_t *dd = ds->ds_dir;
++	dsl_prop_setarg_t *psa = arg2;
++	uint64_t effective_value = psa->psa_effective_value;
++	uint64_t used;
++	int64_t delta;
++
++	dsl_prop_set_sync(ds, psa, tx);
++	DSL_PROP_CHECK_PREDICTION(dd, psa);
++
++	dmu_buf_will_dirty(dd->dd_dbuf, tx);
++
++	mutex_enter(&dd->dd_lock);
++	used = dd->dd_phys->dd_used_bytes;
++	delta = MAX(used, effective_value) -
++	    MAX(used, dd->dd_phys->dd_reserved);
++	dd->dd_phys->dd_reserved = effective_value;
++
++	if (dd->dd_parent != NULL) {
++		/* Roll up this additional usage into our ancestors */
++		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
++		    delta, 0, 0, tx);
++	}
++	mutex_exit(&dd->dd_lock);
++}
++
++int
++dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
++    uint64_t reservation)
++{
++	dsl_dir_t *dd;
++	dsl_dataset_t *ds;
++	dsl_prop_setarg_t psa;
++	int err;
++
++	dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation);
++
++	err = dsl_dataset_hold(ddname, FTAG, &ds);
++	if (err)
++		return (err);
++
++	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
++	if (err) {
++		dsl_dataset_rele(ds, FTAG);
++		return (err);
++	}
++
++	ASSERT(ds->ds_dir == dd);
++
++	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
++	    dsl_dir_set_reservation_sync, ds, &psa, 0);
++
++	dsl_dir_close(dd, FTAG);
++	dsl_dataset_rele(ds, FTAG);
++	return (err);
++}
++
++static dsl_dir_t *
++closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
++{
++	for (; ds1; ds1 = ds1->dd_parent) {
++		dsl_dir_t *dd;
++		for (dd = ds2; dd; dd = dd->dd_parent) {
++			if (ds1 == dd)
++				return (dd);
++		}
++	}
++	return (NULL);
++}
++
++/*
++ * If delta is applied to dd, how much of that delta would be applied to
++ * ancestor?  Syncing context only.
++ */
++static int64_t
++would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
++{
++	if (dd == ancestor)
++		return (delta);
++
++	mutex_enter(&dd->dd_lock);
++	delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta);
++	mutex_exit(&dd->dd_lock);
++	return (would_change(dd->dd_parent, delta, ancestor));
++}
++
++struct renamearg {
++	dsl_dir_t *newparent;
++	const char *mynewname;
++};
++
++static int
++dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dir_t *dd = arg1;
++	struct renamearg *ra = arg2;
++	dsl_pool_t *dp = dd->dd_pool;
++	objset_t *mos = dp->dp_meta_objset;
++	int err;
++	uint64_t val;
++
++	/*
++	 * There should only be one reference, from dmu_objset_rename().
++	 * Fleeting holds are also possible (eg, from "zfs list" getting
++	 * stats), but any that are present in open context will likely
++	 * be gone by syncing context, so only fail from syncing
++	 * context.
++	 */
++	if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 1)
++		return (EBUSY);
++
++	/* check for existing name */
++	err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
++	    ra->mynewname, 8, 1, &val);
++	if (err == 0)
++		return (EEXIST);
++	if (err != ENOENT)
++		return (err);
++
++	if (ra->newparent != dd->dd_parent) {
++		/* is there enough space? */
++		uint64_t myspace =
++		    MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
++
++		/* no rename into our descendant */
++		if (closest_common_ancestor(dd, ra->newparent) == dd)
++			return (EINVAL);
++
++		if ((err = dsl_dir_transfer_possible(dd->dd_parent,
++		    ra->newparent, myspace)))
++			return (err);
++	}
++
++	return (0);
++}
++
++static void
++dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dir_t *dd = arg1;
++	struct renamearg *ra = arg2;
++	dsl_pool_t *dp = dd->dd_pool;
++	objset_t *mos = dp->dp_meta_objset;
++	int err;
++
++	ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
++
++	if (ra->newparent != dd->dd_parent) {
++		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
++		    -dd->dd_phys->dd_used_bytes,
++		    -dd->dd_phys->dd_compressed_bytes,
++		    -dd->dd_phys->dd_uncompressed_bytes, tx);
++		dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD,
++		    dd->dd_phys->dd_used_bytes,
++		    dd->dd_phys->dd_compressed_bytes,
++		    dd->dd_phys->dd_uncompressed_bytes, tx);
++
++		if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) {
++			uint64_t unused_rsrv = dd->dd_phys->dd_reserved -
++			    dd->dd_phys->dd_used_bytes;
++
++			dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
++			    -unused_rsrv, 0, 0, tx);
++			dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV,
++			    unused_rsrv, 0, 0, tx);
++		}
++	}
++
++	dmu_buf_will_dirty(dd->dd_dbuf, tx);
++
++	/* remove from old parent zapobj */
++	err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
++	    dd->dd_myname, tx);
++	ASSERT3U(err, ==, 0);
++
++	(void) strcpy(dd->dd_myname, ra->mynewname);
++	dsl_dir_close(dd->dd_parent, dd);
++	dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
++	VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
++	    ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
++
++	/* add to new parent zapobj */
++	err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
++	    dd->dd_myname, 8, 1, &dd->dd_object, tx);
++	ASSERT3U(err, ==, 0);
++
++	spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa,
++	    tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
++}
++
++int
++dsl_dir_rename(dsl_dir_t *dd, const char *newname)
++{
++	struct renamearg ra;
++	int err;
++
++	/* new parent should exist */
++	err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
++	if (err)
++		return (err);
++
++	/* can't rename to different pool */
++	if (dd->dd_pool != ra.newparent->dd_pool) {
++		err = ENXIO;
++		goto out;
++	}
++
++	/* new name should not already exist */
++	if (ra.mynewname == NULL) {
++		err = EEXIST;
++		goto out;
++	}
++
++	err = dsl_sync_task_do(dd->dd_pool,
++	    dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
++
++out:
++	dsl_dir_close(ra.newparent, FTAG);
++	return (err);
++}
++
++int
++dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
++{
++	dsl_dir_t *ancestor;
++	int64_t adelta;
++	uint64_t avail;
++
++	ancestor = closest_common_ancestor(sdd, tdd);
++	adelta = would_change(sdd, -space, ancestor);
++	avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
++	if (avail < space)
++		return (ENOSPC);
++
++	return (0);
++}
++
++timestruc_t
++dsl_dir_snap_cmtime(dsl_dir_t *dd)
++{
++	timestruc_t t;
++
++	mutex_enter(&dd->dd_lock);
++	t = dd->dd_snap_cmtime;
++	mutex_exit(&dd->dd_lock);
++
++	return (t);
++}
++
++void
++dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
++{
++	timestruc_t t;
++
++	gethrestime(&t);
++	mutex_enter(&dd->dd_lock);
++	dd->dd_snap_cmtime = t;
++	mutex_exit(&dd->dd_lock);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(dsl_dir_set_quota);
++EXPORT_SYMBOL(dsl_dir_set_reservation);
++EXPORT_SYMBOL(dsl_dir_open);
++EXPORT_SYMBOL(dsl_dir_close);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dsl_pool.c linux-3.2.33-go/fs/zfs/zfs/dsl_pool.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dsl_pool.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dsl_pool.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,1022 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ */
++
++#include <sys/dsl_pool.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_prop.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_synctask.h>
++#include <sys/dsl_scan.h>
++#include <sys/dnode.h>
++#include <sys/dmu_tx.h>
++#include <sys/dmu_objset.h>
++#include <sys/arc.h>
++#include <sys/zap.h>
++#include <sys/zio.h>
++#include <sys/zfs_context.h>
++#include <sys/fs/zfs.h>
++#include <sys/zfs_znode.h>
++#include <sys/spa_impl.h>
++#include <sys/dsl_deadlist.h>
++
++int zfs_no_write_throttle = 0;
++int zfs_write_limit_shift = 3;			/* 1/8th of physical memory */
++int zfs_txg_synctime_ms = 1000;		/* target millisecs to sync a txg */
++int zfs_txg_history = 60;		/* statistics for the last N txgs */
++
++unsigned long zfs_write_limit_min = 32 << 20;	/* min write limit is 32MB */
++unsigned long zfs_write_limit_max = 0;		/* max data payload per txg */
++unsigned long zfs_write_limit_inflated = 0;
++unsigned long zfs_write_limit_override = 0;
++
++kmutex_t zfs_write_limit_lock;
++
++static pgcnt_t old_physmem = 0;
++
++static int
++dsl_pool_txg_history_update(kstat_t *ksp, int rw)
++{
++	dsl_pool_t *dp = ksp->ks_private;
++	txg_history_t *th;
++	int i = 0;
++
++	if (rw == KSTAT_WRITE)
++		return (EACCES);
++
++	if (ksp->ks_data)
++		kmem_free(ksp->ks_data, ksp->ks_data_size);
++
++	mutex_enter(&dp->dp_lock);
++
++	ksp->ks_ndata = dp->dp_txg_history_size;
++	ksp->ks_data_size = dp->dp_txg_history_size * sizeof(kstat_txg_t);
++	if (ksp->ks_data_size > 0)
++		ksp->ks_data = kmem_alloc(ksp->ks_data_size, KM_PUSHPAGE);
++
++	/* Traversed oldest to youngest for the most readable kstat output */
++	for (th = list_tail(&dp->dp_txg_history); th != NULL;
++	     th = list_prev(&dp->dp_txg_history, th)) {
++		mutex_enter(&th->th_lock);
++		ASSERT3S(i + sizeof(kstat_txg_t), <=, ksp->ks_data_size);
++		memcpy(ksp->ks_data + i, &th->th_kstat, sizeof(kstat_txg_t));
++		i += sizeof(kstat_txg_t);
++		mutex_exit(&th->th_lock);
++	}
++
++	mutex_exit(&dp->dp_lock);
++
++	return (0);
++}
++
++static void
++dsl_pool_txg_history_init(dsl_pool_t *dp, uint64_t txg)
++{
++	char name[KSTAT_STRLEN];
++
++	list_create(&dp->dp_txg_history, sizeof (txg_history_t),
++	    offsetof(txg_history_t, th_link));
++	dsl_pool_txg_history_add(dp, txg);
++
++	(void) snprintf(name, KSTAT_STRLEN, "txgs-%s", spa_name(dp->dp_spa));
++	dp->dp_txg_kstat = kstat_create("zfs", 0, name, "misc",
++	    KSTAT_TYPE_TXG, 0, KSTAT_FLAG_VIRTUAL);
++	if (dp->dp_txg_kstat) {
++		dp->dp_txg_kstat->ks_data = NULL;
++		dp->dp_txg_kstat->ks_private = dp;
++		dp->dp_txg_kstat->ks_update = dsl_pool_txg_history_update;
++		kstat_install(dp->dp_txg_kstat);
++	}
++}
++
++static void
++dsl_pool_txg_history_destroy(dsl_pool_t *dp)
++{
++	txg_history_t *th;
++
++	if (dp->dp_txg_kstat) {
++		if (dp->dp_txg_kstat->ks_data)
++			kmem_free(dp->dp_txg_kstat->ks_data,
++			    dp->dp_txg_kstat->ks_data_size);
++
++		kstat_delete(dp->dp_txg_kstat);
++	}
++
++	mutex_enter(&dp->dp_lock);
++	while ((th = list_remove_head(&dp->dp_txg_history))) {
++		dp->dp_txg_history_size--;
++		mutex_destroy(&th->th_lock);
++		kmem_free(th, sizeof(txg_history_t));
++	}
++
++	ASSERT3U(dp->dp_txg_history_size, ==, 0);
++	list_destroy(&dp->dp_txg_history);
++	mutex_exit(&dp->dp_lock);
++}
++
++txg_history_t *
++dsl_pool_txg_history_add(dsl_pool_t *dp, uint64_t txg)
++{
++	txg_history_t *th, *rm;
++
++	th = kmem_zalloc(sizeof(txg_history_t), KM_SLEEP);
++	mutex_init(&th->th_lock, NULL, MUTEX_DEFAULT, NULL);
++	th->th_kstat.txg = txg;
++	th->th_kstat.state = TXG_STATE_OPEN;
++	th->th_kstat.birth = gethrtime();
++
++	mutex_enter(&dp->dp_lock);
++
++	list_insert_head(&dp->dp_txg_history, th);
++	dp->dp_txg_history_size++;
++
++	while (dp->dp_txg_history_size > zfs_txg_history) {
++		dp->dp_txg_history_size--;
++		rm = list_remove_tail(&dp->dp_txg_history);
++		mutex_destroy(&rm->th_lock);
++		kmem_free(rm, sizeof(txg_history_t));
++	}
++
++	mutex_exit(&dp->dp_lock);
++
++	return (th);
++}
++
++/*
++ * Traversed youngest to oldest because lookups are only done for open
++ * or syncing txgs which are guaranteed to be at the head of the list.
++ * The txg_history_t structure will be returned locked.
++ */
++txg_history_t *
++dsl_pool_txg_history_get(dsl_pool_t *dp, uint64_t txg)
++{
++	txg_history_t *th;
++
++	mutex_enter(&dp->dp_lock);
++	for (th = list_head(&dp->dp_txg_history); th != NULL;
++	     th = list_next(&dp->dp_txg_history, th)) {
++		if (th->th_kstat.txg == txg) {
++			mutex_enter(&th->th_lock);
++			break;
++		}
++	}
++	mutex_exit(&dp->dp_lock);
++
++	return (th);
++}
++
++void
++dsl_pool_txg_history_put(txg_history_t *th)
++{
++	mutex_exit(&th->th_lock);
++}
++
++int
++dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
++{
++	uint64_t obj;
++	int err;
++
++	err = zap_lookup(dp->dp_meta_objset,
++	    dp->dp_root_dir->dd_phys->dd_child_dir_zapobj,
++	    name, sizeof (obj), 1, &obj);
++	if (err)
++		return (err);
++
++	return (dsl_dir_open_obj(dp, obj, name, dp, ddp));
++}
++
++static dsl_pool_t *
++dsl_pool_open_impl(spa_t *spa, uint64_t txg)
++{
++	dsl_pool_t *dp;
++	blkptr_t *bp = spa_get_rootblkptr(spa);
++
++	dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP);
++	dp->dp_spa = spa;
++	dp->dp_meta_rootbp = *bp;
++	rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL);
++	dp->dp_write_limit = zfs_write_limit_min;
++	txg_init(dp, txg);
++
++	txg_list_create(&dp->dp_dirty_datasets,
++	    offsetof(dsl_dataset_t, ds_dirty_link));
++	txg_list_create(&dp->dp_dirty_dirs,
++	    offsetof(dsl_dir_t, dd_dirty_link));
++	txg_list_create(&dp->dp_sync_tasks,
++	    offsetof(dsl_sync_task_group_t, dstg_node));
++	list_create(&dp->dp_synced_datasets, sizeof (dsl_dataset_t),
++	    offsetof(dsl_dataset_t, ds_synced_link));
++
++	mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
++
++	dp->dp_iput_taskq = taskq_create("zfs_iput_taskq", 1, minclsyspri,
++	    1, 4, 0);
++
++	dsl_pool_txg_history_init(dp, txg);
++
++	return (dp);
++}
++
++int
++dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
++{
++	int err;
++	dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
++	dsl_dir_t *dd;
++	dsl_dataset_t *ds;
++	uint64_t obj;
++
++	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
++	err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
++	    &dp->dp_meta_objset);
++	if (err)
++		goto out;
++
++	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
++	    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
++	    &dp->dp_root_dir_obj);
++	if (err)
++		goto out;
++
++	err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
++	    NULL, dp, &dp->dp_root_dir);
++	if (err)
++		goto out;
++
++	err = dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir);
++	if (err)
++		goto out;
++
++	if (spa_version(spa) >= SPA_VERSION_ORIGIN) {
++		err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd);
++		if (err)
++			goto out;
++		err = dsl_dataset_hold_obj(dp, dd->dd_phys->dd_head_dataset_obj,
++		    FTAG, &ds);
++		if (err == 0) {
++			err = dsl_dataset_hold_obj(dp,
++			    ds->ds_phys->ds_prev_snap_obj, dp,
++			    &dp->dp_origin_snap);
++			dsl_dataset_rele(ds, FTAG);
++		}
++		dsl_dir_close(dd, dp);
++		if (err)
++			goto out;
++	}
++
++	if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
++		err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME,
++		    &dp->dp_free_dir);
++		if (err)
++			goto out;
++
++		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
++		    DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj);
++		if (err)
++			goto out;
++		VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
++		    dp->dp_meta_objset, obj));
++	}
++
++	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
++	    DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1,
++	    &dp->dp_tmp_userrefs_obj);
++	if (err == ENOENT)
++		err = 0;
++	if (err)
++		goto out;
++
++	err = dsl_scan_init(dp, txg);
++
++out:
++	rw_exit(&dp->dp_config_rwlock);
++	if (err)
++		dsl_pool_close(dp);
++	else
++		*dpp = dp;
++
++	return (err);
++}
++
++void
++dsl_pool_close(dsl_pool_t *dp)
++{
++	/* drop our references from dsl_pool_open() */
++
++	/*
++	 * Since we held the origin_snap from "syncing" context (which
++	 * includes pool-opening context), it actually only got a "ref"
++	 * and not a hold, so just drop that here.
++	 */
++	if (dp->dp_origin_snap)
++		dsl_dataset_drop_ref(dp->dp_origin_snap, dp);
++	if (dp->dp_mos_dir)
++		dsl_dir_close(dp->dp_mos_dir, dp);
++	if (dp->dp_free_dir)
++		dsl_dir_close(dp->dp_free_dir, dp);
++	if (dp->dp_root_dir)
++		dsl_dir_close(dp->dp_root_dir, dp);
++
++	bpobj_close(&dp->dp_free_bpobj);
++
++	/* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */
++	if (dp->dp_meta_objset)
++		dmu_objset_evict(dp->dp_meta_objset);
++
++	txg_list_destroy(&dp->dp_dirty_datasets);
++	txg_list_destroy(&dp->dp_sync_tasks);
++	txg_list_destroy(&dp->dp_dirty_dirs);
++	list_destroy(&dp->dp_synced_datasets);
++
++	arc_flush(dp->dp_spa);
++	txg_fini(dp);
++	dsl_scan_fini(dp);
++	dsl_pool_txg_history_destroy(dp);
++	rw_destroy(&dp->dp_config_rwlock);
++	mutex_destroy(&dp->dp_lock);
++	taskq_destroy(dp->dp_iput_taskq);
++	if (dp->dp_blkstats)
++		kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
++	kmem_free(dp, sizeof (dsl_pool_t));
++}
++
++dsl_pool_t *
++dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
++{
++	int err;
++	dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
++	dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
++	objset_t *os;
++	dsl_dataset_t *ds;
++	uint64_t obj;
++
++	/* create and open the MOS (meta-objset) */
++	dp->dp_meta_objset = dmu_objset_create_impl(spa,
++	    NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
++
++	/* create the pool directory */
++	err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
++	    DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx);
++	ASSERT3U(err, ==, 0);
++
++	/* Initialize scan structures */
++	VERIFY3U(0, ==, dsl_scan_init(dp, txg));
++
++	/* create and open the root dir */
++	dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx);
++	VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
++	    NULL, dp, &dp->dp_root_dir));
++
++	/* create and open the meta-objset dir */
++	(void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx);
++	VERIFY(0 == dsl_pool_open_special_dir(dp,
++	    MOS_DIR_NAME, &dp->dp_mos_dir));
++
++	if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
++		/* create and open the free dir */
++		(void) dsl_dir_create_sync(dp, dp->dp_root_dir,
++		    FREE_DIR_NAME, tx);
++		VERIFY(0 == dsl_pool_open_special_dir(dp,
++		    FREE_DIR_NAME, &dp->dp_free_dir));
++
++		/* create and open the free_bplist */
++		obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx);
++		VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
++		    DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0);
++		VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
++		    dp->dp_meta_objset, obj));
++	}
++
++	if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB)
++		dsl_pool_create_origin(dp, tx);
++
++	/* create the root dataset */
++	obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);
++
++	/* create the root objset */
++	VERIFY(0 == dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
++	VERIFY(NULL != (os = dmu_objset_create_impl(dp->dp_spa, ds,
++	    dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx)));
++#ifdef _KERNEL
++	zfs_create_fs(os, kcred, zplprops, tx);
++#endif
++	dsl_dataset_rele(ds, FTAG);
++
++	dmu_tx_commit(tx);
++
++	return (dp);
++}
++
++static int
++deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
++{
++	dsl_deadlist_t *dl = arg;
++	dsl_pool_t *dp = dmu_objset_pool(dl->dl_os);
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++	dsl_deadlist_insert(dl, bp, tx);
++	rw_exit(&dp->dp_config_rwlock);
++	return (0);
++}
++
++void
++dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
++{
++	zio_t *zio;
++	dmu_tx_t *tx;
++	dsl_dir_t *dd;
++	dsl_dataset_t *ds;
++	dsl_sync_task_group_t *dstg;
++	objset_t *mos = dp->dp_meta_objset;
++	hrtime_t start, write_time;
++	uint64_t data_written;
++	int err;
++
++	/*
++	 * We need to copy dp_space_towrite() before doing
++	 * dsl_sync_task_group_sync(), because
++	 * dsl_dataset_snapshot_reserve_space() will increase
++	 * dp_space_towrite but not actually write anything.
++	 */
++	data_written = dp->dp_space_towrite[txg & TXG_MASK];
++
++	tx = dmu_tx_create_assigned(dp, txg);
++
++	dp->dp_read_overhead = 0;
++	start = gethrtime();
++
++	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
++	while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg))) {
++		/*
++		 * We must not sync any non-MOS datasets twice, because
++		 * we may have taken a snapshot of them.  However, we
++		 * may sync newly-created datasets on pass 2.
++		 */
++		ASSERT(!list_link_active(&ds->ds_synced_link));
++		list_insert_tail(&dp->dp_synced_datasets, ds);
++		dsl_dataset_sync(ds, zio, tx);
++	}
++	DTRACE_PROBE(pool_sync__1setup);
++	err = zio_wait(zio);
++
++	write_time = gethrtime() - start;
++	ASSERT(err == 0);
++	DTRACE_PROBE(pool_sync__2rootzio);
++
++	for (ds = list_head(&dp->dp_synced_datasets); ds;
++	    ds = list_next(&dp->dp_synced_datasets, ds))
++		dmu_objset_do_userquota_updates(ds->ds_objset, tx);
++
++	/*
++	 * Sync the datasets again to push out the changes due to
++	 * userspace updates.  This must be done before we process the
++	 * sync tasks, because that could cause a snapshot of a dataset
++	 * whose ds_bp will be rewritten when we do this 2nd sync.
++	 */
++	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
++	while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg))) {
++		ASSERT(list_link_active(&ds->ds_synced_link));
++		dmu_buf_rele(ds->ds_dbuf, ds);
++		dsl_dataset_sync(ds, zio, tx);
++	}
++	err = zio_wait(zio);
++
++	/*
++	 * Move dead blocks from the pending deadlist to the on-disk
++	 * deadlist.
++	 */
++	for (ds = list_head(&dp->dp_synced_datasets); ds;
++	    ds = list_next(&dp->dp_synced_datasets, ds)) {
++		bplist_iterate(&ds->ds_pending_deadlist,
++		    deadlist_enqueue_cb, &ds->ds_deadlist, tx);
++	}
++
++	while ((dstg = txg_list_remove(&dp->dp_sync_tasks, txg))) {
++		/*
++		 * No more sync tasks should have been added while we
++		 * were syncing.
++		 */
++		ASSERT(spa_sync_pass(dp->dp_spa) == 1);
++		dsl_sync_task_group_sync(dstg, tx);
++	}
++	DTRACE_PROBE(pool_sync__3task);
++
++	start = gethrtime();
++	while ((dd = txg_list_remove(&dp->dp_dirty_dirs, txg)))
++		dsl_dir_sync(dd, tx);
++	write_time += gethrtime() - start;
++
++	start = gethrtime();
++	if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
++	    list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) {
++		zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
++		dmu_objset_sync(mos, zio, tx);
++		err = zio_wait(zio);
++		ASSERT(err == 0);
++		dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", "");
++		spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
++	}
++	write_time += gethrtime() - start;
++	DTRACE_PROBE2(pool_sync__4io, hrtime_t, write_time,
++	    hrtime_t, dp->dp_read_overhead);
++	write_time -= dp->dp_read_overhead;
++
++	dmu_tx_commit(tx);
++
++	dp->dp_space_towrite[txg & TXG_MASK] = 0;
++	ASSERT(dp->dp_tempreserved[txg & TXG_MASK] == 0);
++
++	/*
++	 * If the write limit max has not been explicitly set, set it
++	 * to a fraction of available physical memory (default 1/8th).
++	 * Note that we must inflate the limit because the spa
++	 * inflates write sizes to account for data replication.
++	 * Check this each sync phase to catch changing memory size.
++	 */
++	if (physmem != old_physmem && zfs_write_limit_shift) {
++		mutex_enter(&zfs_write_limit_lock);
++		old_physmem = physmem;
++		zfs_write_limit_max = ptob(physmem) >> zfs_write_limit_shift;
++		zfs_write_limit_inflated = MAX(zfs_write_limit_min,
++		    spa_get_asize(dp->dp_spa, zfs_write_limit_max));
++		mutex_exit(&zfs_write_limit_lock);
++	}
++
++	/*
++	 * Attempt to keep the sync time consistent by adjusting the
++	 * amount of write traffic allowed into each transaction group.
++	 * Weight the throughput calculation towards the current value:
++	 * 	thru = 3/4 old_thru + 1/4 new_thru
++	 *
++	 * Note: write_time is in nanosecs, so write_time/MICROSEC
++	 * yields millisecs
++	 */
++	ASSERT(zfs_write_limit_min > 0);
++	if (data_written > zfs_write_limit_min / 8 && write_time > MICROSEC) {
++		uint64_t throughput = data_written / (write_time / MICROSEC);
++
++		if (dp->dp_throughput)
++			dp->dp_throughput = throughput / 4 +
++			    3 * dp->dp_throughput / 4;
++		else
++			dp->dp_throughput = throughput;
++		dp->dp_write_limit = MIN(zfs_write_limit_inflated,
++		    MAX(zfs_write_limit_min,
++		    dp->dp_throughput * zfs_txg_synctime_ms));
++	}
++}
++
++void
++dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
++{
++	dsl_dataset_t *ds;
++	objset_t *os;
++
++	while ((ds = list_head(&dp->dp_synced_datasets))) {
++		list_remove(&dp->dp_synced_datasets, ds);
++		os = ds->ds_objset;
++		zil_clean(os->os_zil, txg);
++		ASSERT(!dmu_objset_is_dirty(os, txg));
++		dmu_buf_rele(ds->ds_dbuf, ds);
++	}
++	ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
++}
++
++/*
++ * TRUE if the current thread is the tx_sync_thread or if we
++ * are being called from SPA context during pool initialization.
++ */
++int
++dsl_pool_sync_context(dsl_pool_t *dp)
++{
++	return (curthread == dp->dp_tx.tx_sync_thread ||
++	    spa_get_dsl(dp->dp_spa) == NULL);
++}
++
++uint64_t
++dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree)
++{
++	uint64_t space, resv;
++
++	/*
++	 * Reserve about 1.6% (1/64), or at least 32MB, for allocation
++	 * efficiency.
++	 * XXX The intent log is not accounted for, so it must fit
++	 * within this slop.
++	 *
++	 * If we're trying to assess whether it's OK to do a free,
++	 * cut the reservation in half to allow forward progress
++	 * (e.g. make it possible to rm(1) files from a full pool).
++	 */
++	space = spa_get_dspace(dp->dp_spa);
++	resv = MAX(space >> 6, SPA_MINDEVSIZE >> 1);
++	if (netfree)
++		resv >>= 1;
++
++	return (space - resv);
++}
++
++int
++dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx)
++{
++	uint64_t reserved = 0;
++	uint64_t write_limit = (zfs_write_limit_override ?
++	    zfs_write_limit_override : dp->dp_write_limit);
++
++	if (zfs_no_write_throttle) {
++		atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK],
++		    space);
++		return (0);
++	}
++
++	/*
++	 * Check to see if we have exceeded the maximum allowed IO for
++	 * this transaction group.  We can do this without locks since
++	 * a little slop here is ok.  Note that we do the reserved check
++	 * with only half the requested reserve: this is because the
++	 * reserve requests are worst-case, and we really don't want to
++	 * throttle based off of worst-case estimates.
++	 */
++	if (write_limit > 0) {
++		reserved = dp->dp_space_towrite[tx->tx_txg & TXG_MASK]
++		    + dp->dp_tempreserved[tx->tx_txg & TXG_MASK] / 2;
++
++		if (reserved && reserved > write_limit) {
++			DMU_TX_STAT_BUMP(dmu_tx_write_limit);
++			return (ERESTART);
++		}
++	}
++
++	atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], space);
++
++	/*
++	 * If this transaction group is over 7/8ths capacity, delay
++	 * the caller 1 clock tick.  This will slow down the "fill"
++	 * rate until the sync process can catch up with us.
++	 */
++	if (reserved && reserved > (write_limit - (write_limit >> 3)))
++		txg_delay(dp, tx->tx_txg, 1);
++
++	return (0);
++}
++
++void
++dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx)
++{
++	ASSERT(dp->dp_tempreserved[tx->tx_txg & TXG_MASK] >= space);
++	atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], -space);
++}
++
++void
++dsl_pool_memory_pressure(dsl_pool_t *dp)
++{
++	uint64_t space_inuse = 0;
++	int i;
++
++	if (dp->dp_write_limit == zfs_write_limit_min)
++		return;
++
++	for (i = 0; i < TXG_SIZE; i++) {
++		space_inuse += dp->dp_space_towrite[i];
++		space_inuse += dp->dp_tempreserved[i];
++	}
++	dp->dp_write_limit = MAX(zfs_write_limit_min,
++	    MIN(dp->dp_write_limit, space_inuse / 4));
++}
++
++void
++dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx)
++{
++	if (space > 0) {
++		mutex_enter(&dp->dp_lock);
++		dp->dp_space_towrite[tx->tx_txg & TXG_MASK] += space;
++		mutex_exit(&dp->dp_lock);
++	}
++}
++
++/* ARGSUSED */
++static int
++upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
++{
++	dmu_tx_t *tx = arg;
++	dsl_dataset_t *ds, *prev = NULL;
++	int err;
++	dsl_pool_t *dp = spa_get_dsl(spa);
++
++	err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
++	if (err)
++		return (err);
++
++	while (ds->ds_phys->ds_prev_snap_obj != 0) {
++		err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
++		    FTAG, &prev);
++		if (err) {
++			dsl_dataset_rele(ds, FTAG);
++			return (err);
++		}
++
++		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object)
++			break;
++		dsl_dataset_rele(ds, FTAG);
++		ds = prev;
++		prev = NULL;
++	}
++
++	if (prev == NULL) {
++		prev = dp->dp_origin_snap;
++
++		/*
++		 * The $ORIGIN can't have any data, or the accounting
++		 * will be wrong.
++		 */
++		ASSERT(prev->ds_phys->ds_bp.blk_birth == 0);
++
++		/* The origin doesn't get attached to itself */
++		if (ds->ds_object == prev->ds_object) {
++			dsl_dataset_rele(ds, FTAG);
++			return (0);
++		}
++
++		dmu_buf_will_dirty(ds->ds_dbuf, tx);
++		ds->ds_phys->ds_prev_snap_obj = prev->ds_object;
++		ds->ds_phys->ds_prev_snap_txg = prev->ds_phys->ds_creation_txg;
++
++		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
++		ds->ds_dir->dd_phys->dd_origin_obj = prev->ds_object;
++
++		dmu_buf_will_dirty(prev->ds_dbuf, tx);
++		prev->ds_phys->ds_num_children++;
++
++		if (ds->ds_phys->ds_next_snap_obj == 0) {
++			ASSERT(ds->ds_prev == NULL);
++			VERIFY(0 == dsl_dataset_hold_obj(dp,
++			    ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
++		}
++	}
++
++	ASSERT(ds->ds_dir->dd_phys->dd_origin_obj == prev->ds_object);
++	ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object);
++
++	if (prev->ds_phys->ds_next_clones_obj == 0) {
++		dmu_buf_will_dirty(prev->ds_dbuf, tx);
++		prev->ds_phys->ds_next_clones_obj =
++		    zap_create(dp->dp_meta_objset,
++		    DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
++	}
++	VERIFY(0 == zap_add_int(dp->dp_meta_objset,
++	    prev->ds_phys->ds_next_clones_obj, ds->ds_object, tx));
++
++	dsl_dataset_rele(ds, FTAG);
++	if (prev != dp->dp_origin_snap)
++		dsl_dataset_rele(prev, FTAG);
++	return (0);
++}
++
++void
++dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx)
++{
++	ASSERT(dmu_tx_is_syncing(tx));
++	ASSERT(dp->dp_origin_snap != NULL);
++
++	VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb,
++	    tx, DS_FIND_CHILDREN));
++}
++
++/* ARGSUSED */
++static int
++upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
++{
++	dmu_tx_t *tx = arg;
++	dsl_dataset_t *ds;
++	dsl_pool_t *dp = spa_get_dsl(spa);
++	objset_t *mos = dp->dp_meta_objset;
++
++	VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
++
++	if (ds->ds_dir->dd_phys->dd_origin_obj) {
++		dsl_dataset_t *origin;
++
++		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
++		    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin));
++
++		if (origin->ds_dir->dd_phys->dd_clones == 0) {
++			dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx);
++			origin->ds_dir->dd_phys->dd_clones = zap_create(mos,
++			    DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
++		}
++
++		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
++		    origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
++
++		dsl_dataset_rele(origin, FTAG);
++	}
++
++	dsl_dataset_rele(ds, FTAG);
++	return (0);
++}
++
++void
++dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
++{
++	uint64_t obj;
++
++	ASSERT(dmu_tx_is_syncing(tx));
++
++	(void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx);
++	VERIFY(0 == dsl_pool_open_special_dir(dp,
++	    FREE_DIR_NAME, &dp->dp_free_dir));
++
++	/*
++	 * We can't use bpobj_alloc(), because spa_version() still
++	 * returns the old version, and we need a new-version bpobj with
++	 * subobj support.  So call dmu_object_alloc() directly.
++	 */
++	obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ,
++	    SPA_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx);
++	VERIFY3U(0, ==, zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
++	    DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx));
++	VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
++	    dp->dp_meta_objset, obj));
++
++	VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL,
++	    upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN));
++}
++
++void
++dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
++{
++	uint64_t dsobj;
++	dsl_dataset_t *ds;
++
++	ASSERT(dmu_tx_is_syncing(tx));
++	ASSERT(dp->dp_origin_snap == NULL);
++
++	/* create the origin dir, ds, & snap-ds */
++	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
++	dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME,
++	    NULL, 0, kcred, tx);
++	VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
++	dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, tx);
++	VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
++	    dp, &dp->dp_origin_snap));
++	dsl_dataset_rele(ds, FTAG);
++	rw_exit(&dp->dp_config_rwlock);
++}
++
++taskq_t *
++dsl_pool_iput_taskq(dsl_pool_t *dp)
++{
++	return (dp->dp_iput_taskq);
++}
++
++/*
++ * Walk through the pool-wide zap object of temporary snapshot user holds
++ * and release them.
++ */
++void
++dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp)
++{
++	zap_attribute_t za;
++	zap_cursor_t zc;
++	objset_t *mos = dp->dp_meta_objset;
++	uint64_t zapobj = dp->dp_tmp_userrefs_obj;
++
++	if (zapobj == 0)
++		return;
++	ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
++
++	for (zap_cursor_init(&zc, mos, zapobj);
++	    zap_cursor_retrieve(&zc, &za) == 0;
++	    zap_cursor_advance(&zc)) {
++		char *htag;
++		uint64_t dsobj;
++
++		htag = strchr(za.za_name, '-');
++		*htag = '\0';
++		++htag;
++		dsobj = strtonum(za.za_name, NULL);
++		(void) dsl_dataset_user_release_tmp(dp, dsobj, htag, B_FALSE);
++	}
++	zap_cursor_fini(&zc);
++}
++
++/*
++ * Create the pool-wide zap object for storing temporary snapshot holds.
++ */
++void
++dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
++{
++	objset_t *mos = dp->dp_meta_objset;
++
++	ASSERT(dp->dp_tmp_userrefs_obj == 0);
++	ASSERT(dmu_tx_is_syncing(tx));
++
++	dp->dp_tmp_userrefs_obj = zap_create(mos, DMU_OT_USERREFS,
++	    DMU_OT_NONE, 0, tx);
++
++	VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS,
++	    sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj, tx) == 0);
++}
++
++static int
++dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
++    const char *tag, uint64_t *now, dmu_tx_t *tx, boolean_t holding)
++{
++	objset_t *mos = dp->dp_meta_objset;
++	uint64_t zapobj = dp->dp_tmp_userrefs_obj;
++	char *name;
++	int error;
++
++	ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
++	ASSERT(dmu_tx_is_syncing(tx));
++
++	/*
++	 * If the pool was created prior to SPA_VERSION_USERREFS, the
++	 * zap object for temporary holds might not exist yet.
++	 */
++	if (zapobj == 0) {
++		if (holding) {
++			dsl_pool_user_hold_create_obj(dp, tx);
++			zapobj = dp->dp_tmp_userrefs_obj;
++		} else {
++			return (ENOENT);
++		}
++	}
++
++	name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag);
++	if (holding)
++		error = zap_add(mos, zapobj, name, 8, 1, now, tx);
++	else
++		error = zap_remove(mos, zapobj, name, tx);
++	strfree(name);
++
++	return (error);
++}
++
++/*
++ * Add a temporary hold for the given dataset object and tag.
++ */
++int
++dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
++    uint64_t *now, dmu_tx_t *tx)
++{
++	return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE));
++}
++
++/*
++ * Release a temporary hold for the given dataset object and tag.
++ */
++int
++dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
++    dmu_tx_t *tx)
++{
++	return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, NULL,
++	    tx, B_FALSE));
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++module_param(zfs_no_write_throttle, int, 0644);
++MODULE_PARM_DESC(zfs_no_write_throttle, "Disable write throttling");
++
++module_param(zfs_write_limit_shift, int, 0444);
++MODULE_PARM_DESC(zfs_write_limit_shift, "log2(fraction of memory) per txg");
++
++module_param(zfs_txg_synctime_ms, int, 0644);
++MODULE_PARM_DESC(zfs_txg_synctime_ms, "Target milliseconds between txg sync");
++
++module_param(zfs_txg_history, int, 0644);
++MODULE_PARM_DESC(zfs_txg_history, "Historic statistics for the last N txgs");
++
++module_param(zfs_write_limit_min, ulong, 0444);
++MODULE_PARM_DESC(zfs_write_limit_min, "Min txg write limit");
++
++module_param(zfs_write_limit_max, ulong, 0444);
++MODULE_PARM_DESC(zfs_write_limit_max, "Max txg write limit");
++
++module_param(zfs_write_limit_inflated, ulong, 0444);
++MODULE_PARM_DESC(zfs_write_limit_inflated, "Inflated txg write limit");
++
++module_param(zfs_write_limit_override, ulong, 0444);
++MODULE_PARM_DESC(zfs_write_limit_override, "Override txg write limit");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dsl_prop.c linux-3.2.33-go/fs/zfs/zfs/dsl_prop.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dsl_prop.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dsl_prop.c	2012-11-16 23:25:34.353039289 +0100
+@@ -0,0 +1,1170 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/dmu.h>
++#include <sys/dmu_objset.h>
++#include <sys/dmu_tx.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_prop.h>
++#include <sys/dsl_synctask.h>
++#include <sys/spa.h>
++#include <sys/zap.h>
++#include <sys/fs/zfs.h>
++
++#include "zfs_prop.h"
++
++#define	ZPROP_INHERIT_SUFFIX "$inherit"
++#define	ZPROP_RECVD_SUFFIX "$recvd"
++
++static int
++dodefault(const char *propname, int intsz, int numints, void *buf)
++{
++	zfs_prop_t prop;
++
++	/*
++	 * The setonce properties are read-only, BUT they still
++	 * have a default value that can be used as the initial
++	 * value.
++	 */
++	if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL ||
++	    (zfs_prop_readonly(prop) && !zfs_prop_setonce(prop)))
++		return (ENOENT);
++
++	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
++		if (intsz != 1)
++			return (EOVERFLOW);
++		(void) strncpy(buf, zfs_prop_default_string(prop),
++		    numints);
++	} else {
++		if (intsz != 8 || numints < 1)
++			return (EOVERFLOW);
++
++		*(uint64_t *)buf = zfs_prop_default_numeric(prop);
++	}
++
++	return (0);
++}
++
++int
++dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
++    int intsz, int numints, void *buf, char *setpoint, boolean_t snapshot)
++{
++	int err = ENOENT;
++	dsl_dir_t *target = dd;
++	objset_t *mos = dd->dd_pool->dp_meta_objset;
++	zfs_prop_t prop;
++	boolean_t inheritable;
++	boolean_t inheriting = B_FALSE;
++	char *inheritstr;
++	char *recvdstr;
++
++	ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
++
++	if (setpoint)
++		setpoint[0] = '\0';
++
++	prop = zfs_name_to_prop(propname);
++	inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
++	inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
++	recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
++
++	/*
++	 * Note: dd may become NULL, therefore we shouldn't dereference it
++	 * after this loop.
++	 */
++	for (; dd != NULL; dd = dd->dd_parent) {
++		ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
++
++		if (dd != target || snapshot) {
++			if (!inheritable)
++				break;
++			inheriting = B_TRUE;
++		}
++
++		/* Check for a local value. */
++		err = zap_lookup(mos, dd->dd_phys->dd_props_zapobj, propname,
++		    intsz, numints, buf);
++		if (err != ENOENT) {
++			if (setpoint != NULL && err == 0)
++				dsl_dir_name(dd, setpoint);
++			break;
++		}
++
++		/*
++		 * Skip the check for a received value if there is an explicit
++		 * inheritance entry.
++		 */
++		err = zap_contains(mos, dd->dd_phys->dd_props_zapobj,
++		    inheritstr);
++		if (err != 0 && err != ENOENT)
++			break;
++
++		if (err == ENOENT) {
++			/* Check for a received value. */
++			err = zap_lookup(mos, dd->dd_phys->dd_props_zapobj,
++			    recvdstr, intsz, numints, buf);
++			if (err != ENOENT) {
++				if (setpoint != NULL && err == 0) {
++					if (inheriting) {
++						dsl_dir_name(dd, setpoint);
++					} else {
++						(void) strcpy(setpoint,
++						    ZPROP_SOURCE_VAL_RECVD);
++					}
++				}
++				break;
++			}
++		}
++
++		/*
++		 * If we found an explicit inheritance entry, err is zero even
++		 * though we haven't yet found the value, so reinitializing err
++		 * at the end of the loop (instead of at the beginning) ensures
++		 * that err has a valid post-loop value.
++		 */
++		err = ENOENT;
++	}
++
++	if (err == ENOENT)
++		err = dodefault(propname, intsz, numints, buf);
++
++	strfree(inheritstr);
++	strfree(recvdstr);
++
++	return (err);
++}
++
++int
++dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
++    int intsz, int numints, void *buf, char *setpoint)
++{
++	zfs_prop_t prop = zfs_name_to_prop(propname);
++	boolean_t inheritable;
++	boolean_t snapshot;
++	uint64_t zapobj;
++
++	ASSERT(RW_LOCK_HELD(&ds->ds_dir->dd_pool->dp_config_rwlock));
++	inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
++	snapshot = (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds));
++	zapobj = (ds->ds_phys == NULL ? 0 : ds->ds_phys->ds_props_obj);
++
++	if (zapobj != 0) {
++		objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
++		int err;
++
++		ASSERT(snapshot);
++
++		/* Check for a local value. */
++		err = zap_lookup(mos, zapobj, propname, intsz, numints, buf);
++		if (err != ENOENT) {
++			if (setpoint != NULL && err == 0)
++				dsl_dataset_name(ds, setpoint);
++			return (err);
++		}
++
++		/*
++		 * Skip the check for a received value if there is an explicit
++		 * inheritance entry.
++		 */
++		if (inheritable) {
++			char *inheritstr = kmem_asprintf("%s%s", propname,
++			    ZPROP_INHERIT_SUFFIX);
++			err = zap_contains(mos, zapobj, inheritstr);
++			strfree(inheritstr);
++			if (err != 0 && err != ENOENT)
++				return (err);
++		}
++
++		if (err == ENOENT) {
++			/* Check for a received value. */
++			char *recvdstr = kmem_asprintf("%s%s", propname,
++			    ZPROP_RECVD_SUFFIX);
++			err = zap_lookup(mos, zapobj, recvdstr,
++			    intsz, numints, buf);
++			strfree(recvdstr);
++			if (err != ENOENT) {
++				if (setpoint != NULL && err == 0)
++					(void) strcpy(setpoint,
++					    ZPROP_SOURCE_VAL_RECVD);
++				return (err);
++			}
++		}
++	}
++
++	return (dsl_prop_get_dd(ds->ds_dir, propname,
++	    intsz, numints, buf, setpoint, snapshot));
++}
++
++/*
++ * Register interest in the named property.  We'll call the callback
++ * once to notify it of the current property value, and again each time
++ * the property changes, until this callback is unregistered.
++ *
++ * Return 0 on success, errno if the prop is not an integer value.
++ */
++int
++dsl_prop_register(dsl_dataset_t *ds, const char *propname,
++    dsl_prop_changed_cb_t *callback, void *cbarg)
++{
++	dsl_dir_t *dd = ds->ds_dir;
++	dsl_pool_t *dp = dd->dd_pool;
++	uint64_t value;
++	dsl_prop_cb_record_t *cbr;
++	int err;
++	int need_rwlock;
++
++	need_rwlock = !RW_WRITE_HELD(&dp->dp_config_rwlock);
++	if (need_rwlock)
++		rw_enter(&dp->dp_config_rwlock, RW_READER);
++
++	err = dsl_prop_get_ds(ds, propname, 8, 1, &value, NULL);
++	if (err != 0) {
++		if (need_rwlock)
++			rw_exit(&dp->dp_config_rwlock);
++		return (err);
++	}
++
++	cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_PUSHPAGE);
++	cbr->cbr_ds = ds;
++	cbr->cbr_propname = kmem_alloc(strlen(propname)+1, KM_PUSHPAGE);
++	(void) strcpy((char *)cbr->cbr_propname, propname);
++	cbr->cbr_func = callback;
++	cbr->cbr_arg = cbarg;
++	mutex_enter(&dd->dd_lock);
++	list_insert_head(&dd->dd_prop_cbs, cbr);
++	mutex_exit(&dd->dd_lock);
++
++	cbr->cbr_func(cbr->cbr_arg, value);
++
++	if (need_rwlock)
++		rw_exit(&dp->dp_config_rwlock);
++	return (0);
++}
++
++int
++dsl_prop_get(const char *dsname, const char *propname,
++    int intsz, int numints, void *buf, char *setpoint)
++{
++	dsl_dataset_t *ds;
++	int err;
++
++	err = dsl_dataset_hold(dsname, FTAG, &ds);
++	if (err)
++		return (err);
++
++	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
++	err = dsl_prop_get_ds(ds, propname, intsz, numints, buf, setpoint);
++	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
++
++	dsl_dataset_rele(ds, FTAG);
++	return (err);
++}
++
++/*
++ * Get the current property value.  It may have changed by the time this
++ * function returns, so it is NOT safe to follow up with
++ * dsl_prop_register() and assume that the value has not changed in
++ * between.
++ *
++ * Return 0 on success, ENOENT if ddname is invalid.
++ */
++int
++dsl_prop_get_integer(const char *ddname, const char *propname,
++    uint64_t *valuep, char *setpoint)
++{
++	return (dsl_prop_get(ddname, propname, 8, 1, valuep, setpoint));
++}
++
++void
++dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
++    zprop_source_t source, uint64_t *value)
++{
++	psa->psa_name = propname;
++	psa->psa_source = source;
++	psa->psa_intsz = 8;
++	psa->psa_numints = 1;
++	psa->psa_value = value;
++
++	psa->psa_effective_value = -1ULL;
++}
++
++/*
++ * Predict the effective value of the given special property if it were set with
++ * the given value and source. This is not a general purpose function. It exists
++ * only to handle the special requirements of the quota and reservation
++ * properties. The fact that these properties are non-inheritable greatly
++ * simplifies the prediction logic.
++ *
++ * Returns 0 on success, a positive error code on failure, or -1 if called with
++ * a property not handled by this function.
++ */
++int
++dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
++{
++	const char *propname = psa->psa_name;
++	zfs_prop_t prop = zfs_name_to_prop(propname);
++	zprop_source_t source = psa->psa_source;
++	objset_t *mos;
++	uint64_t zapobj;
++	uint64_t version;
++	char *recvdstr;
++	int err = 0;
++
++	switch (prop) {
++	case ZFS_PROP_QUOTA:
++	case ZFS_PROP_RESERVATION:
++	case ZFS_PROP_REFQUOTA:
++	case ZFS_PROP_REFRESERVATION:
++		break;
++	default:
++		return (-1);
++	}
++
++	mos = dd->dd_pool->dp_meta_objset;
++	zapobj = dd->dd_phys->dd_props_zapobj;
++	recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
++
++	version = spa_version(dd->dd_pool->dp_spa);
++	if (version < SPA_VERSION_RECVD_PROPS) {
++		if (source & ZPROP_SRC_NONE)
++			source = ZPROP_SRC_NONE;
++		else if (source & ZPROP_SRC_RECEIVED)
++			source = ZPROP_SRC_LOCAL;
++	}
++
++	switch ((int)source) {
++	case ZPROP_SRC_NONE:
++		/* Revert to the received value, if any. */
++		err = zap_lookup(mos, zapobj, recvdstr, 8, 1,
++		    &psa->psa_effective_value);
++		if (err == ENOENT)
++			psa->psa_effective_value = 0;
++		break;
++	case ZPROP_SRC_LOCAL:
++		psa->psa_effective_value = *(uint64_t *)psa->psa_value;
++		break;
++	case ZPROP_SRC_RECEIVED:
++		/*
++		 * If there's no local setting, then the new received value will
++		 * be the effective value.
++		 */
++		err = zap_lookup(mos, zapobj, propname, 8, 1,
++		    &psa->psa_effective_value);
++		if (err == ENOENT)
++			psa->psa_effective_value = *(uint64_t *)psa->psa_value;
++		break;
++	case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED):
++		/*
++		 * We're clearing the received value, so the local setting (if
++		 * it exists) remains the effective value.
++		 */
++		err = zap_lookup(mos, zapobj, propname, 8, 1,
++		    &psa->psa_effective_value);
++		if (err == ENOENT)
++			psa->psa_effective_value = 0;
++		break;
++	default:
++		cmn_err(CE_PANIC, "unexpected property source: %d", source);
++	}
++
++	strfree(recvdstr);
++
++	if (err == ENOENT)
++		return (0);
++
++	return (err);
++}
++
++#ifdef	ZFS_DEBUG
++void
++dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
++{
++	zfs_prop_t prop = zfs_name_to_prop(psa->psa_name);
++	uint64_t intval;
++	char setpoint[MAXNAMELEN];
++	uint64_t version = spa_version(dd->dd_pool->dp_spa);
++	int err;
++
++	if (version < SPA_VERSION_RECVD_PROPS) {
++		switch (prop) {
++		case ZFS_PROP_QUOTA:
++		case ZFS_PROP_RESERVATION:
++			return;
++		default:
++			break;
++		}
++	}
++
++	err = dsl_prop_get_dd(dd, psa->psa_name, 8, 1, &intval,
++	    setpoint, B_FALSE);
++	if (err == 0 && intval != psa->psa_effective_value) {
++		cmn_err(CE_PANIC, "%s property, source: %x, "
++		    "predicted effective value: %llu, "
++		    "actual effective value: %llu (setpoint: %s)",
++		    psa->psa_name, psa->psa_source,
++		    (unsigned long long)psa->psa_effective_value,
++		    (unsigned long long)intval, setpoint);
++	}
++}
++#endif
++
++/*
++ * Unregister this callback.  Return 0 on success, ENOENT if ddname is
++ * invalid, ENOMSG if no matching callback registered.
++ */
++int
++dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
++    dsl_prop_changed_cb_t *callback, void *cbarg)
++{
++	dsl_dir_t *dd = ds->ds_dir;
++	dsl_prop_cb_record_t *cbr;
++
++	mutex_enter(&dd->dd_lock);
++	for (cbr = list_head(&dd->dd_prop_cbs);
++	    cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
++		if (cbr->cbr_ds == ds &&
++		    cbr->cbr_func == callback &&
++		    cbr->cbr_arg == cbarg &&
++		    strcmp(cbr->cbr_propname, propname) == 0)
++			break;
++	}
++
++	if (cbr == NULL) {
++		mutex_exit(&dd->dd_lock);
++		return (ENOMSG);
++	}
++
++	list_remove(&dd->dd_prop_cbs, cbr);
++	mutex_exit(&dd->dd_lock);
++	kmem_free((void*)cbr->cbr_propname, strlen(cbr->cbr_propname)+1);
++	kmem_free(cbr, sizeof (dsl_prop_cb_record_t));
++
++	return (0);
++}
++
++/*
++ * Return the number of callbacks that are registered for this dataset.
++ */
++int
++dsl_prop_numcb(dsl_dataset_t *ds)
++{
++	dsl_dir_t *dd = ds->ds_dir;
++	dsl_prop_cb_record_t *cbr;
++	int num = 0;
++
++	mutex_enter(&dd->dd_lock);
++	for (cbr = list_head(&dd->dd_prop_cbs);
++	    cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
++		if (cbr->cbr_ds == ds)
++			num++;
++	}
++	mutex_exit(&dd->dd_lock);
++
++	return (num);
++}
++
++static void
++dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
++    const char *propname, uint64_t value, int first)
++{
++	dsl_dir_t *dd;
++	dsl_prop_cb_record_t *cbr;
++	objset_t *mos = dp->dp_meta_objset;
++	zap_cursor_t zc;
++	zap_attribute_t *za;
++	int err;
++
++	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
++	err = dsl_dir_open_obj(dp, ddobj, NULL, FTAG, &dd);
++	if (err)
++		return;
++
++	if (!first) {
++		/*
++		 * If the prop is set here, then this change is not
++		 * being inherited here or below; stop the recursion.
++		 */
++		err = zap_contains(mos, dd->dd_phys->dd_props_zapobj, propname);
++		if (err == 0) {
++			dsl_dir_close(dd, FTAG);
++			return;
++		}
++		ASSERT3U(err, ==, ENOENT);
++	}
++
++	mutex_enter(&dd->dd_lock);
++	for (cbr = list_head(&dd->dd_prop_cbs); cbr;
++	    cbr = list_next(&dd->dd_prop_cbs, cbr)) {
++		uint64_t propobj = cbr->cbr_ds->ds_phys->ds_props_obj;
++
++		if (strcmp(cbr->cbr_propname, propname) != 0)
++			continue;
++
++		/*
++		 * If the property is set on this ds, then it is not
++		 * inherited here; don't call the callback.
++		 */
++		if (propobj && 0 == zap_contains(mos, propobj, propname))
++			continue;
++
++		cbr->cbr_func(cbr->cbr_arg, value);
++	}
++	mutex_exit(&dd->dd_lock);
++
++	za = kmem_alloc(sizeof (zap_attribute_t), KM_PUSHPAGE);
++	for (zap_cursor_init(&zc, mos,
++	    dd->dd_phys->dd_child_dir_zapobj);
++	    zap_cursor_retrieve(&zc, za) == 0;
++	    zap_cursor_advance(&zc)) {
++		dsl_prop_changed_notify(dp, za->za_first_integer,
++		    propname, value, FALSE);
++	}
++	kmem_free(za, sizeof (zap_attribute_t));
++	zap_cursor_fini(&zc);
++	dsl_dir_close(dd, FTAG);
++}
++
++void
++dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	dsl_prop_setarg_t *psa = arg2;
++	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
++	uint64_t zapobj, intval, dummy;
++	int isint;
++	char valbuf[32];
++	char *valstr = NULL;
++	char *inheritstr;
++	char *recvdstr;
++	char *tbuf = NULL;
++	int err;
++	uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa);
++	const char *propname = psa->psa_name;
++	zprop_source_t source = psa->psa_source;
++
++	isint = (dodefault(propname, 8, 1, &intval) == 0);
++
++	if (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)) {
++		ASSERT(version >= SPA_VERSION_SNAP_PROPS);
++		if (ds->ds_phys->ds_props_obj == 0) {
++			dmu_buf_will_dirty(ds->ds_dbuf, tx);
++			ds->ds_phys->ds_props_obj =
++			    zap_create(mos,
++			    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
++		}
++		zapobj = ds->ds_phys->ds_props_obj;
++	} else {
++		zapobj = ds->ds_dir->dd_phys->dd_props_zapobj;
++	}
++
++	if (version < SPA_VERSION_RECVD_PROPS) {
++		zfs_prop_t prop = zfs_name_to_prop(propname);
++		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION)
++			return;
++
++		if (source & ZPROP_SRC_NONE)
++			source = ZPROP_SRC_NONE;
++		else if (source & ZPROP_SRC_RECEIVED)
++			source = ZPROP_SRC_LOCAL;
++	}
++
++	inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
++	recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
++
++	switch ((int)source) {
++	case ZPROP_SRC_NONE:
++		/*
++		 * revert to received value, if any (inherit -S)
++		 * - remove propname
++		 * - remove propname$inherit
++		 */
++		err = zap_remove(mos, zapobj, propname, tx);
++		ASSERT(err == 0 || err == ENOENT);
++		err = zap_remove(mos, zapobj, inheritstr, tx);
++		ASSERT(err == 0 || err == ENOENT);
++		break;
++	case ZPROP_SRC_LOCAL:
++		/*
++		 * remove propname$inherit
++		 * set propname -> value
++		 */
++		err = zap_remove(mos, zapobj, inheritstr, tx);
++		ASSERT(err == 0 || err == ENOENT);
++		VERIFY(0 == zap_update(mos, zapobj, propname,
++		    psa->psa_intsz, psa->psa_numints, psa->psa_value, tx));
++		break;
++	case ZPROP_SRC_INHERITED:
++		/*
++		 * explicitly inherit
++		 * - remove propname
++		 * - set propname$inherit
++		 */
++		err = zap_remove(mos, zapobj, propname, tx);
++		ASSERT(err == 0 || err == ENOENT);
++		if (version >= SPA_VERSION_RECVD_PROPS &&
++		    dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy,
++		    NULL) == 0) {
++			dummy = 0;
++			err = zap_update(mos, zapobj, inheritstr,
++			    8, 1, &dummy, tx);
++			ASSERT(err == 0);
++		}
++		break;
++	case ZPROP_SRC_RECEIVED:
++		/*
++		 * set propname$recvd -> value
++		 */
++		err = zap_update(mos, zapobj, recvdstr,
++		    psa->psa_intsz, psa->psa_numints, psa->psa_value, tx);
++		ASSERT(err == 0);
++		break;
++	case (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED):
++		/*
++		 * clear local and received settings
++		 * - remove propname
++		 * - remove propname$inherit
++		 * - remove propname$recvd
++		 */
++		err = zap_remove(mos, zapobj, propname, tx);
++		ASSERT(err == 0 || err == ENOENT);
++		err = zap_remove(mos, zapobj, inheritstr, tx);
++		ASSERT(err == 0 || err == ENOENT);
++		/* FALLTHRU */
++	case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED):
++		/*
++		 * remove propname$recvd
++		 */
++		err = zap_remove(mos, zapobj, recvdstr, tx);
++		ASSERT(err == 0 || err == ENOENT);
++		break;
++	default:
++		cmn_err(CE_PANIC, "unexpected property source: %d", source);
++	}
++
++	strfree(inheritstr);
++	strfree(recvdstr);
++
++	if (isint) {
++		VERIFY(0 == dsl_prop_get_ds(ds, propname, 8, 1, &intval, NULL));
++
++		if (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)) {
++			dsl_prop_cb_record_t *cbr;
++			/*
++			 * It's a snapshot; nothing can inherit this
++			 * property, so just look for callbacks on this
++			 * ds here.
++			 */
++			mutex_enter(&ds->ds_dir->dd_lock);
++			for (cbr = list_head(&ds->ds_dir->dd_prop_cbs); cbr;
++			    cbr = list_next(&ds->ds_dir->dd_prop_cbs, cbr)) {
++				if (cbr->cbr_ds == ds &&
++				    strcmp(cbr->cbr_propname, propname) == 0)
++					cbr->cbr_func(cbr->cbr_arg, intval);
++			}
++			mutex_exit(&ds->ds_dir->dd_lock);
++		} else {
++			dsl_prop_changed_notify(ds->ds_dir->dd_pool,
++			    ds->ds_dir->dd_object, propname, intval, TRUE);
++		}
++
++		(void) snprintf(valbuf, sizeof (valbuf),
++		    "%lld", (longlong_t)intval);
++		valstr = valbuf;
++	} else {
++		if (source == ZPROP_SRC_LOCAL) {
++			valstr = (char *)psa->psa_value;
++		} else {
++			tbuf = kmem_alloc(ZAP_MAXVALUELEN, KM_PUSHPAGE);
++			if (dsl_prop_get_ds(ds, propname, 1,
++			    ZAP_MAXVALUELEN, tbuf, NULL) == 0)
++				valstr = tbuf;
++		}
++	}
++
++	spa_history_log_internal((source == ZPROP_SRC_NONE ||
++	    source == ZPROP_SRC_INHERITED) ? LOG_DS_INHERIT :
++	    LOG_DS_PROPSET, ds->ds_dir->dd_pool->dp_spa, tx,
++	    "%s=%s dataset = %llu", propname,
++	    (valstr == NULL ? "" : valstr), ds->ds_object);
++
++	if (tbuf != NULL)
++		kmem_free(tbuf, ZAP_MAXVALUELEN);
++}
++
++void
++dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_dataset_t *ds = arg1;
++	dsl_props_arg_t *pa = arg2;
++	nvlist_t *props = pa->pa_props;
++	dsl_prop_setarg_t psa;
++	nvpair_t *elem = NULL;
++
++	psa.psa_source = pa->pa_source;
++
++	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
++		nvpair_t *pair = elem;
++
++		psa.psa_name = nvpair_name(pair);
++
++		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
++			/*
++			 * dsl_prop_get_all_impl() returns properties in this
++			 * format.
++			 */
++			nvlist_t *attrs;
++			VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
++			VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
++			    &pair) == 0);
++		}
++
++		if (nvpair_type(pair) == DATA_TYPE_STRING) {
++			VERIFY(nvpair_value_string(pair,
++			    (char **)&psa.psa_value) == 0);
++			psa.psa_intsz = 1;
++			psa.psa_numints = strlen(psa.psa_value) + 1;
++		} else {
++			uint64_t intval;
++			VERIFY(nvpair_value_uint64(pair, &intval) == 0);
++			psa.psa_intsz = sizeof (intval);
++			psa.psa_numints = 1;
++			psa.psa_value = &intval;
++		}
++		dsl_prop_set_sync(ds, &psa, tx);
++	}
++}
++
++void
++dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
++    dmu_tx_t *tx)
++{
++	objset_t *mos = dd->dd_pool->dp_meta_objset;
++	uint64_t zapobj = dd->dd_phys->dd_props_zapobj;
++
++	ASSERT(dmu_tx_is_syncing(tx));
++
++	VERIFY(0 == zap_update(mos, zapobj, name, sizeof (val), 1, &val, tx));
++
++	dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, name, val, TRUE);
++
++	spa_history_log_internal(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx,
++	    "%s=%llu dataset = %llu", name, (u_longlong_t)val,
++	    dd->dd_phys->dd_head_dataset_obj);
++}
++
++int
++dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source,
++    int intsz, int numints, const void *buf)
++{
++	dsl_dataset_t *ds;
++	uint64_t version;
++	int err;
++	dsl_prop_setarg_t psa;
++
++	/*
++	 * We must do these checks before we get to the syncfunc, since
++	 * it can't fail.
++	 */
++	if (strlen(propname) >= ZAP_MAXNAMELEN)
++		return (ENAMETOOLONG);
++
++	err = dsl_dataset_hold(dsname, FTAG, &ds);
++	if (err)
++		return (err);
++
++	version = spa_version(ds->ds_dir->dd_pool->dp_spa);
++	if (intsz * numints >= (version < SPA_VERSION_STMF_PROP ?
++	    ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
++		dsl_dataset_rele(ds, FTAG);
++		return (E2BIG);
++	}
++	if (dsl_dataset_is_snapshot(ds) &&
++	    version < SPA_VERSION_SNAP_PROPS) {
++		dsl_dataset_rele(ds, FTAG);
++		return (ENOTSUP);
++	}
++
++	psa.psa_name = propname;
++	psa.psa_source = source;
++	psa.psa_intsz = intsz;
++	psa.psa_numints = numints;
++	psa.psa_value = buf;
++	psa.psa_effective_value = -1ULL;
++
++	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
++	    NULL, dsl_prop_set_sync, ds, &psa, 2);
++
++	dsl_dataset_rele(ds, FTAG);
++	return (err);
++}
++
++int
++dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
++{
++	dsl_dataset_t *ds;
++	uint64_t version;
++	nvpair_t *elem = NULL;
++	dsl_props_arg_t pa;
++	int err;
++
++	if ((err = dsl_dataset_hold(dsname, FTAG, &ds)))
++		return (err);
++	/*
++	 * Do these checks before the syncfunc, since it can't fail.
++	 */
++	version = spa_version(ds->ds_dir->dd_pool->dp_spa);
++	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
++		if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
++			dsl_dataset_rele(ds, FTAG);
++			return (ENAMETOOLONG);
++		}
++		if (nvpair_type(elem) == DATA_TYPE_STRING) {
++			char *valstr;
++			VERIFY(nvpair_value_string(elem, &valstr) == 0);
++			if (strlen(valstr) >= (version <
++			    SPA_VERSION_STMF_PROP ?
++			    ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
++				dsl_dataset_rele(ds, FTAG);
++				return (E2BIG);
++			}
++		}
++	}
++
++	if (dsl_dataset_is_snapshot(ds) &&
++	    version < SPA_VERSION_SNAP_PROPS) {
++		dsl_dataset_rele(ds, FTAG);
++		return (ENOTSUP);
++	}
++
++	pa.pa_props = props;
++	pa.pa_source = source;
++
++	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
++	    NULL, dsl_props_set_sync, ds, &pa, 2);
++
++	dsl_dataset_rele(ds, FTAG);
++	return (err);
++}
++
++typedef enum dsl_prop_getflags {
++	DSL_PROP_GET_INHERITING = 0x1,	/* searching parent of target ds */
++	DSL_PROP_GET_SNAPSHOT = 0x2,	/* snapshot dataset */
++	DSL_PROP_GET_LOCAL = 0x4,	/* local properties */
++	DSL_PROP_GET_RECEIVED = 0x8	/* received properties */
++} dsl_prop_getflags_t;
++
++static int
++dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
++    const char *setpoint, dsl_prop_getflags_t flags, nvlist_t *nv)
++{
++	zap_cursor_t zc;
++	zap_attribute_t za;
++	int err = 0;
++
++	for (zap_cursor_init(&zc, mos, propobj);
++	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
++	    zap_cursor_advance(&zc)) {
++		nvlist_t *propval;
++		zfs_prop_t prop;
++		char buf[ZAP_MAXNAMELEN];
++		char *valstr;
++		const char *suffix;
++		const char *propname;
++		const char *source;
++
++		suffix = strchr(za.za_name, '$');
++
++		if (suffix == NULL) {
++			/*
++			 * Skip local properties if we only want received
++			 * properties.
++			 */
++			if (flags & DSL_PROP_GET_RECEIVED)
++				continue;
++
++			propname = za.za_name;
++			source = setpoint;
++		} else if (strcmp(suffix, ZPROP_INHERIT_SUFFIX) == 0) {
++			/* Skip explicitly inherited entries. */
++			continue;
++		} else if (strcmp(suffix, ZPROP_RECVD_SUFFIX) == 0) {
++			if (flags & DSL_PROP_GET_LOCAL)
++				continue;
++
++			(void) strncpy(buf, za.za_name, (suffix - za.za_name));
++			buf[suffix - za.za_name] = '\0';
++			propname = buf;
++
++			if (!(flags & DSL_PROP_GET_RECEIVED)) {
++				/* Skip if locally overridden. */
++				err = zap_contains(mos, propobj, propname);
++				if (err == 0)
++					continue;
++				if (err != ENOENT)
++					break;
++
++				/* Skip if explicitly inherited. */
++				valstr = kmem_asprintf("%s%s", propname,
++				    ZPROP_INHERIT_SUFFIX);
++				err = zap_contains(mos, propobj, valstr);
++				strfree(valstr);
++				if (err == 0)
++					continue;
++				if (err != ENOENT)
++					break;
++			}
++
++			source = ((flags & DSL_PROP_GET_INHERITING) ?
++			    setpoint : ZPROP_SOURCE_VAL_RECVD);
++		} else {
++			/*
++			 * For backward compatibility, skip suffixes we don't
++			 * recognize.
++			 */
++			continue;
++		}
++
++		prop = zfs_name_to_prop(propname);
++
++		/* Skip non-inheritable properties. */
++		if ((flags & DSL_PROP_GET_INHERITING) && prop != ZPROP_INVAL &&
++		    !zfs_prop_inheritable(prop))
++			continue;
++
++		/* Skip properties not valid for this type. */
++		if ((flags & DSL_PROP_GET_SNAPSHOT) && prop != ZPROP_INVAL &&
++		    !zfs_prop_valid_for_type(prop, ZFS_TYPE_SNAPSHOT))
++			continue;
++
++		/* Skip properties already defined. */
++		if (nvlist_exists(nv, propname))
++			continue;
++
++		VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++		if (za.za_integer_length == 1) {
++			/*
++			 * String property
++			 */
++			char *tmp = kmem_alloc(za.za_num_integers,
++			    KM_SLEEP);
++			err = zap_lookup(mos, propobj,
++			    za.za_name, 1, za.za_num_integers, tmp);
++			if (err != 0) {
++				kmem_free(tmp, za.za_num_integers);
++				break;
++			}
++			VERIFY(nvlist_add_string(propval, ZPROP_VALUE,
++			    tmp) == 0);
++			kmem_free(tmp, za.za_num_integers);
++		} else {
++			/*
++			 * Integer property
++			 */
++			ASSERT(za.za_integer_length == 8);
++			(void) nvlist_add_uint64(propval, ZPROP_VALUE,
++			    za.za_first_integer);
++		}
++
++		VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, source) == 0);
++		VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
++		nvlist_free(propval);
++	}
++	zap_cursor_fini(&zc);
++	if (err == ENOENT)
++		err = 0;
++	return (err);
++}
++
++/*
++ * Iterate over all properties for this dataset and return them in an nvlist.
++ */
++static int
++dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
++    dsl_prop_getflags_t flags)
++{
++	dsl_dir_t *dd = ds->ds_dir;
++	dsl_pool_t *dp = dd->dd_pool;
++	objset_t *mos = dp->dp_meta_objset;
++	int err = 0;
++	char setpoint[MAXNAMELEN];
++
++	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++
++	if (dsl_dataset_is_snapshot(ds))
++		flags |= DSL_PROP_GET_SNAPSHOT;
++
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++
++	if (ds->ds_phys->ds_props_obj != 0) {
++		ASSERT(flags & DSL_PROP_GET_SNAPSHOT);
++		dsl_dataset_name(ds, setpoint);
++		err = dsl_prop_get_all_impl(mos, ds->ds_phys->ds_props_obj,
++		    setpoint, flags, *nvp);
++		if (err)
++			goto out;
++	}
++
++	for (; dd != NULL; dd = dd->dd_parent) {
++		if (dd != ds->ds_dir || (flags & DSL_PROP_GET_SNAPSHOT)) {
++			if (flags & (DSL_PROP_GET_LOCAL |
++			    DSL_PROP_GET_RECEIVED))
++				break;
++			flags |= DSL_PROP_GET_INHERITING;
++		}
++		dsl_dir_name(dd, setpoint);
++		err = dsl_prop_get_all_impl(mos, dd->dd_phys->dd_props_zapobj,
++		    setpoint, flags, *nvp);
++		if (err)
++			break;
++	}
++out:
++	rw_exit(&dp->dp_config_rwlock);
++	return (err);
++}
++
++boolean_t
++dsl_prop_get_hasrecvd(objset_t *os)
++{
++	dsl_dataset_t *ds = os->os_dsl_dataset;
++	int rc;
++	uint64_t dummy;
++
++	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
++	rc = dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy, NULL);
++	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
++	ASSERT(rc != 0 || spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS);
++	return (rc == 0);
++}
++
++static void
++dsl_prop_set_hasrecvd_impl(objset_t *os, zprop_source_t source)
++{
++	dsl_dataset_t *ds = os->os_dsl_dataset;
++	uint64_t dummy = 0;
++	dsl_prop_setarg_t psa;
++
++	if (spa_version(os->os_spa) < SPA_VERSION_RECVD_PROPS)
++		return;
++
++	dsl_prop_setarg_init_uint64(&psa, ZPROP_HAS_RECVD, source, &dummy);
++
++	(void) dsl_sync_task_do(ds->ds_dir->dd_pool, NULL,
++	    dsl_prop_set_sync, ds, &psa, 2);
++}
++
++/*
++ * Call after successfully receiving properties to ensure that only the first
++ * receive on or after SPA_VERSION_RECVD_PROPS blows away local properties.
++ */
++void
++dsl_prop_set_hasrecvd(objset_t *os)
++{
++	if (dsl_prop_get_hasrecvd(os)) {
++		ASSERT(spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS);
++		return;
++	}
++	dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_LOCAL);
++}
++
++void
++dsl_prop_unset_hasrecvd(objset_t *os)
++{
++	dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_NONE);
++}
++
++int
++dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
++{
++	return (dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, 0));
++}
++
++int
++dsl_prop_get_received(objset_t *os, nvlist_t **nvp)
++{
++	/*
++	 * Received properties are not distinguishable from local properties
++	 * until the dataset has received properties on or after
++	 * SPA_VERSION_RECVD_PROPS.
++	 */
++	dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(os) ?
++	    DSL_PROP_GET_RECEIVED : DSL_PROP_GET_LOCAL);
++	return (dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags));
++}
++
++void
++dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value)
++{
++	nvlist_t *propval;
++	const char *propname = zfs_prop_to_name(prop);
++	uint64_t default_value;
++
++	if (nvlist_lookup_nvlist(nv, propname, &propval) == 0) {
++		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0);
++		return;
++	}
++
++	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++	VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0);
++	/* Indicate the default source if we can. */
++	if (dodefault(propname, 8, 1, &default_value) == 0 &&
++	    value == default_value) {
++		VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, "") == 0);
++	}
++	VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
++	nvlist_free(propval);
++}
++
++void
++dsl_prop_nvlist_add_string(nvlist_t *nv, zfs_prop_t prop, const char *value)
++{
++	nvlist_t *propval;
++	const char *propname = zfs_prop_to_name(prop);
++
++	if (nvlist_lookup_nvlist(nv, propname, &propval) == 0) {
++		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, value) == 0);
++		return;
++	}
++
++	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++	VERIFY(nvlist_add_string(propval, ZPROP_VALUE, value) == 0);
++	VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
++	nvlist_free(propval);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(dsl_prop_register);
++EXPORT_SYMBOL(dsl_prop_unregister);
++EXPORT_SYMBOL(dsl_prop_numcb);
++EXPORT_SYMBOL(dsl_prop_set);
++EXPORT_SYMBOL(dsl_prop_get);
++EXPORT_SYMBOL(dsl_prop_get_integer);
++EXPORT_SYMBOL(dsl_prop_get_all);
++EXPORT_SYMBOL(dsl_prop_get_received);
++EXPORT_SYMBOL(dsl_prop_get_ds);
++EXPORT_SYMBOL(dsl_prop_get_dd);
++EXPORT_SYMBOL(dsl_prop_nvlist_add_uint64);
++EXPORT_SYMBOL(dsl_prop_nvlist_add_string);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dsl_scan.c linux-3.2.33-go/fs/zfs/zfs/dsl_scan.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dsl_scan.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dsl_scan.c	2012-11-16 23:25:34.353039289 +0100
+@@ -0,0 +1,1814 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/dsl_scan.h>
++#include <sys/dsl_pool.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_prop.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_synctask.h>
++#include <sys/dnode.h>
++#include <sys/dmu_tx.h>
++#include <sys/dmu_objset.h>
++#include <sys/arc.h>
++#include <sys/zap.h>
++#include <sys/zio.h>
++#include <sys/zfs_context.h>
++#include <sys/fs/zfs.h>
++#include <sys/zfs_znode.h>
++#include <sys/spa_impl.h>
++#include <sys/vdev_impl.h>
++#include <sys/zil_impl.h>
++#include <sys/zio_checksum.h>
++#include <sys/ddt.h>
++#include <sys/sa.h>
++#include <sys/sa_impl.h>
++#ifdef _KERNEL
++#include <sys/zfs_vfsops.h>
++#endif
++
++typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
++
++static scan_cb_t dsl_scan_scrub_cb;
++static dsl_syncfunc_t dsl_scan_cancel_sync;
++static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
++
++int zfs_top_maxinflight = 32;		/* maximum I/Os per top-level */
++int zfs_resilver_delay = 2;		/* number of ticks to delay resilver */
++int zfs_scrub_delay = 4;		/* number of ticks to delay scrub */
++int zfs_scan_idle = 50;			/* idle window in clock ticks */
++
++int zfs_scan_min_time_ms = 1000; /* min millisecs to scrub per txg */
++int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */
++int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver per txg */
++int zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
++int zfs_no_scrub_prefetch = B_FALSE; /* set to disable srub prefetching */
++enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
++int dsl_scan_delay_completion = B_FALSE; /* set to delay scan completion */
++
++#define	DSL_SCAN_IS_SCRUB_RESILVER(scn) \
++	((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \
++	(scn)->scn_phys.scn_func == POOL_SCAN_RESILVER)
++
++/* the order has to match pool_scan_type */
++static scan_cb_t *scan_funcs[POOL_SCAN_FUNCS] = {
++	NULL,
++	dsl_scan_scrub_cb,	/* POOL_SCAN_SCRUB */
++	dsl_scan_scrub_cb,	/* POOL_SCAN_RESILVER */
++};
++
++int
++dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
++{
++	int err;
++	dsl_scan_t *scn;
++	spa_t *spa = dp->dp_spa;
++	uint64_t f;
++
++	scn = dp->dp_scan = kmem_zalloc(sizeof (dsl_scan_t), KM_SLEEP);
++	scn->scn_dp = dp;
++
++	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
++	    "scrub_func", sizeof (uint64_t), 1, &f);
++	if (err == 0) {
++		/*
++		 * There was an old-style scrub in progress.  Restart a
++		 * new-style scrub from the beginning.
++		 */
++		scn->scn_restart_txg = txg;
++		zfs_dbgmsg("old-style scrub was in progress; "
++		    "restarting new-style scrub in txg %llu",
++		    scn->scn_restart_txg);
++
++		/*
++		 * Load the queue obj from the old location so that it
++		 * can be freed by dsl_scan_done().
++		 */
++		(void) zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
++		    "scrub_queue", sizeof (uint64_t), 1,
++		    &scn->scn_phys.scn_queue_obj);
++	} else {
++		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
++		    DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
++		    &scn->scn_phys);
++		if (err == ENOENT)
++			return (0);
++		else if (err)
++			return (err);
++
++		if (scn->scn_phys.scn_state == DSS_SCANNING &&
++		    spa_prev_software_version(dp->dp_spa) < SPA_VERSION_SCAN) {
++			/*
++			 * A new-type scrub was in progress on an old
++			 * pool, and the pool was accessed by old
++			 * software.  Restart from the beginning, since
++			 * the old software may have changed the pool in
++			 * the meantime.
++			 */
++			scn->scn_restart_txg = txg;
++			zfs_dbgmsg("new-style scrub was modified "
++			    "by old software; restarting in txg %llu",
++			    scn->scn_restart_txg);
++		}
++	}
++
++	spa_scan_stat_init(spa);
++	return (0);
++}
++
++void
++dsl_scan_fini(dsl_pool_t *dp)
++{
++	if (dp->dp_scan) {
++		kmem_free(dp->dp_scan, sizeof (dsl_scan_t));
++		dp->dp_scan = NULL;
++	}
++}
++
++/* ARGSUSED */
++static int
++dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_scan_t *scn = arg1;
++
++	if (scn->scn_phys.scn_state == DSS_SCANNING)
++		return (EBUSY);
++
++	return (0);
++}
++
++/* ARGSUSED */
++static void
++dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_scan_t *scn = arg1;
++	pool_scan_func_t *funcp = arg2;
++	dmu_object_type_t ot = 0;
++	dsl_pool_t *dp = scn->scn_dp;
++	spa_t *spa = dp->dp_spa;
++
++	ASSERT(scn->scn_phys.scn_state != DSS_SCANNING);
++	ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS);
++	bzero(&scn->scn_phys, sizeof (scn->scn_phys));
++	scn->scn_phys.scn_func = *funcp;
++	scn->scn_phys.scn_state = DSS_SCANNING;
++	scn->scn_phys.scn_min_txg = 0;
++	scn->scn_phys.scn_max_txg = tx->tx_txg;
++	scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */
++	scn->scn_phys.scn_start_time = gethrestime_sec();
++	scn->scn_phys.scn_errors = 0;
++	scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc;
++	scn->scn_restart_txg = 0;
++	spa_scan_stat_init(spa);
++
++	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
++		scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
++
++		/* rewrite all disk labels */
++		vdev_config_dirty(spa->spa_root_vdev);
++
++		if (vdev_resilver_needed(spa->spa_root_vdev,
++		    &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
++			spa_event_notify(spa, NULL, FM_EREPORT_ZFS_RESILVER_START);
++		} else {
++			spa_event_notify(spa, NULL, FM_EREPORT_ZFS_SCRUB_START);
++		}
++
++		spa->spa_scrub_started = B_TRUE;
++		/*
++		 * If this is an incremental scrub, limit the DDT scrub phase
++		 * to just the auto-ditto class (for correctness); the rest
++		 * of the scrub should go faster using top-down pruning.
++		 */
++		if (scn->scn_phys.scn_min_txg > TXG_INITIAL)
++			scn->scn_phys.scn_ddt_class_max = DDT_CLASS_DITTO;
++
++	}
++
++	/* back to the generic stuff */
++
++	if (dp->dp_blkstats == NULL) {
++		dp->dp_blkstats = kmem_alloc(sizeof (zfs_all_blkstats_t),
++		    KM_PUSHPAGE | KM_NODEBUG);
++	}
++	bzero(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
++
++	if (spa_version(spa) < SPA_VERSION_DSL_SCRUB)
++		ot = DMU_OT_ZAP_OTHER;
++
++	scn->scn_phys.scn_queue_obj = zap_create(dp->dp_meta_objset,
++	    ot ? ot : DMU_OT_SCAN_QUEUE, DMU_OT_NONE, 0, tx);
++
++	dsl_scan_sync_state(scn, tx);
++
++	spa_history_log_internal(LOG_POOL_SCAN, spa, tx,
++	    "func=%u mintxg=%llu maxtxg=%llu",
++	    *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg);
++}
++
++/* ARGSUSED */
++static void
++dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
++{
++	static const char *old_names[] = {
++		"scrub_bookmark",
++		"scrub_ddt_bookmark",
++		"scrub_ddt_class_max",
++		"scrub_queue",
++		"scrub_min_txg",
++		"scrub_max_txg",
++		"scrub_func",
++		"scrub_errors",
++		NULL
++	};
++
++	dsl_pool_t *dp = scn->scn_dp;
++	spa_t *spa = dp->dp_spa;
++	int i;
++
++	/* Remove any remnants of an old-style scrub. */
++	for (i = 0; old_names[i]; i++) {
++		(void) zap_remove(dp->dp_meta_objset,
++		    DMU_POOL_DIRECTORY_OBJECT, old_names[i], tx);
++	}
++
++	if (scn->scn_phys.scn_queue_obj != 0) {
++		VERIFY(0 == dmu_object_free(dp->dp_meta_objset,
++		    scn->scn_phys.scn_queue_obj, tx));
++		scn->scn_phys.scn_queue_obj = 0;
++	}
++
++	/*
++	 * If we were "restarted" from a stopped state, don't bother
++	 * with anything else.
++	 */
++	if (scn->scn_phys.scn_state != DSS_SCANNING)
++		return;
++
++	if (complete)
++		scn->scn_phys.scn_state = DSS_FINISHED;
++	else
++		scn->scn_phys.scn_state = DSS_CANCELED;
++
++	spa_history_log_internal(LOG_POOL_SCAN_DONE, spa, tx,
++	    "complete=%u", complete);
++
++	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
++		mutex_enter(&spa->spa_scrub_lock);
++		while (spa->spa_scrub_inflight > 0) {
++			cv_wait(&spa->spa_scrub_io_cv,
++			    &spa->spa_scrub_lock);
++		}
++		mutex_exit(&spa->spa_scrub_lock);
++		spa->spa_scrub_started = B_FALSE;
++		spa->spa_scrub_active = B_FALSE;
++
++		/*
++		 * If the scrub/resilver completed, update all DTLs to
++		 * reflect this.  Whether it succeeded or not, vacate
++		 * all temporary scrub DTLs.
++		 */
++		vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg,
++		    complete ? scn->scn_phys.scn_max_txg : 0, B_TRUE);
++		if (complete) {
++			spa_event_notify(spa, NULL, scn->scn_phys.scn_min_txg ?
++			    FM_EREPORT_ZFS_RESILVER_FINISH :
++			    FM_EREPORT_ZFS_SCRUB_FINISH);
++		}
++		spa_errlog_rotate(spa);
++
++		/*
++		 * We may have finished replacing a device.
++		 * Let the async thread assess this and handle the detach.
++		 */
++		spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
++	}
++
++	scn->scn_phys.scn_end_time = gethrestime_sec();
++}
++
++/* ARGSUSED */
++static int
++dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_scan_t *scn = arg1;
++
++	if (scn->scn_phys.scn_state != DSS_SCANNING)
++		return (ENOENT);
++	return (0);
++}
++
++/* ARGSUSED */
++static void
++dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	dsl_scan_t *scn = arg1;
++
++	dsl_scan_done(scn, B_FALSE, tx);
++	dsl_scan_sync_state(scn, tx);
++}
++
++int
++dsl_scan_cancel(dsl_pool_t *dp)
++{
++	boolean_t complete = B_FALSE;
++	int err;
++
++	err = dsl_sync_task_do(dp, dsl_scan_cancel_check,
++	    dsl_scan_cancel_sync, dp->dp_scan, &complete, 3);
++	return (err);
++}
++
++static void dsl_scan_visitbp(blkptr_t *bp,
++    const zbookmark_t *zb, dnode_phys_t *dnp, arc_buf_t *pbuf,
++    dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype,
++    dmu_tx_t *tx);
++inline __attribute__((always_inline)) static void dsl_scan_visitdnode(
++    dsl_scan_t *, dsl_dataset_t *ds, dmu_objset_type_t ostype,
++    dnode_phys_t *dnp, arc_buf_t *buf, uint64_t object, dmu_tx_t *tx);
++
++void
++dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bp)
++{
++	zio_free(dp->dp_spa, txg, bp);
++}
++
++void
++dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp)
++{
++	ASSERT(dsl_pool_sync_context(dp));
++	zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, pio->io_flags));
++}
++
++int
++dsl_read(zio_t *pio, spa_t *spa, const blkptr_t *bpp, arc_buf_t *pbuf,
++    arc_done_func_t *done, void *private, int priority, int zio_flags,
++    uint32_t *arc_flags, const zbookmark_t *zb)
++{
++	return (arc_read(pio, spa, bpp, pbuf, done, private,
++	    priority, zio_flags, arc_flags, zb));
++}
++
++int
++dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
++    arc_done_func_t *done, void *private, int priority, int zio_flags,
++    uint32_t *arc_flags, const zbookmark_t *zb)
++{
++	return (arc_read_nolock(pio, spa, bpp, done, private,
++	    priority, zio_flags, arc_flags, zb));
++}
++
++static boolean_t
++bookmark_is_zero(const zbookmark_t *zb)
++{
++	return (zb->zb_objset == 0 && zb->zb_object == 0 &&
++	    zb->zb_level == 0 && zb->zb_blkid == 0);
++}
++
++/* dnp is the dnode for zb1->zb_object */
++static boolean_t
++bookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1,
++    const zbookmark_t *zb2)
++{
++	uint64_t zb1nextL0, zb2thisobj;
++
++	ASSERT(zb1->zb_objset == zb2->zb_objset);
++	ASSERT(zb2->zb_level == 0);
++
++	/*
++	 * A bookmark in the deadlist is considered to be after
++	 * everything else.
++	 */
++	if (zb2->zb_object == DMU_DEADLIST_OBJECT)
++		return (B_TRUE);
++
++	/* The objset_phys_t isn't before anything. */
++	if (dnp == NULL)
++		return (B_FALSE);
++
++	zb1nextL0 = (zb1->zb_blkid + 1) <<
++	    ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
++
++	zb2thisobj = zb2->zb_object ? zb2->zb_object :
++	    zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
++
++	if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
++		uint64_t nextobj = zb1nextL0 *
++		    (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
++		return (nextobj <= zb2thisobj);
++	}
++
++	if (zb1->zb_object < zb2thisobj)
++		return (B_TRUE);
++	if (zb1->zb_object > zb2thisobj)
++		return (B_FALSE);
++	if (zb2->zb_object == DMU_META_DNODE_OBJECT)
++		return (B_FALSE);
++	return (zb1nextL0 <= zb2->zb_blkid);
++}
++
++static uint64_t
++dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
++{
++	uint64_t smt = ds->ds_dir->dd_pool->dp_scan->scn_phys.scn_max_txg;
++	if (dsl_dataset_is_snapshot(ds))
++		return (MIN(smt, ds->ds_phys->ds_creation_txg));
++	return (smt);
++}
++
++static void
++dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
++{
++	VERIFY(0 == zap_update(scn->scn_dp->dp_meta_objset,
++	    DMU_POOL_DIRECTORY_OBJECT,
++	    DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
++	    &scn->scn_phys, tx));
++}
++
++static boolean_t
++dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb)
++{
++	uint64_t elapsed_nanosecs;
++	int mintime;
++
++	/* we never skip user/group accounting objects */
++	if (zb && (int64_t)zb->zb_object < 0)
++		return (B_FALSE);
++
++	if (scn->scn_pausing)
++		return (B_TRUE); /* we're already pausing */
++
++	if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark))
++		return (B_FALSE); /* we're resuming */
++
++	/* We only know how to resume from level-0 blocks. */
++	if (zb && zb->zb_level != 0)
++		return (B_FALSE);
++
++	mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
++	    zfs_resilver_min_time_ms : zfs_scan_min_time_ms;
++	elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
++	if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
++	    (elapsed_nanosecs / MICROSEC > mintime &&
++	    txg_sync_waiting(scn->scn_dp)) ||
++	    spa_shutting_down(scn->scn_dp->dp_spa)) {
++		if (zb) {
++			dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n",
++			    (longlong_t)zb->zb_objset,
++			    (longlong_t)zb->zb_object,
++			    (longlong_t)zb->zb_level,
++			    (longlong_t)zb->zb_blkid);
++			scn->scn_phys.scn_bookmark = *zb;
++		}
++		dprintf("pausing at DDT bookmark %llx/%llx/%llx/%llx\n",
++		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
++		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
++		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
++		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
++		scn->scn_pausing = B_TRUE;
++		return (B_TRUE);
++	}
++	return (B_FALSE);
++}
++
++typedef struct zil_scan_arg {
++	dsl_pool_t	*zsa_dp;
++	zil_header_t	*zsa_zh;
++} zil_scan_arg_t;
++
++/* ARGSUSED */
++static int
++dsl_scan_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
++{
++	zil_scan_arg_t *zsa = arg;
++	dsl_pool_t *dp = zsa->zsa_dp;
++	dsl_scan_t *scn = dp->dp_scan;
++	zil_header_t *zh = zsa->zsa_zh;
++	zbookmark_t zb;
++
++	if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
++		return (0);
++
++	/*
++	 * One block ("stubby") can be allocated a long time ago; we
++	 * want to visit that one because it has been allocated
++	 * (on-disk) even if it hasn't been claimed (even though for
++	 * scrub there's nothing to do to it).
++	 */
++	if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(dp->dp_spa))
++		return (0);
++
++	SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
++	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
++
++	VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
++	return (0);
++}
++
++/* ARGSUSED */
++static int
++dsl_scan_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
++{
++	if (lrc->lrc_txtype == TX_WRITE) {
++		zil_scan_arg_t *zsa = arg;
++		dsl_pool_t *dp = zsa->zsa_dp;
++		dsl_scan_t *scn = dp->dp_scan;
++		zil_header_t *zh = zsa->zsa_zh;
++		lr_write_t *lr = (lr_write_t *)lrc;
++		blkptr_t *bp = &lr->lr_blkptr;
++		zbookmark_t zb;
++
++		if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
++			return (0);
++
++		/*
++		 * birth can be < claim_txg if this record's txg is
++		 * already txg sync'ed (but this log block contains
++		 * other records that are not synced)
++		 */
++		if (claim_txg == 0 || bp->blk_birth < claim_txg)
++			return (0);
++
++		SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
++		    lr->lr_foid, ZB_ZIL_LEVEL,
++		    lr->lr_offset / BP_GET_LSIZE(bp));
++
++		VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
++	}
++	return (0);
++}
++
++static void
++dsl_scan_zil(dsl_pool_t *dp, zil_header_t *zh)
++{
++	uint64_t claim_txg = zh->zh_claim_txg;
++	zil_scan_arg_t zsa = { dp, zh };
++	zilog_t *zilog;
++
++	/*
++	 * We only want to visit blocks that have been claimed but not yet
++	 * replayed (or, in read-only mode, blocks that *would* be claimed).
++	 */
++	if (claim_txg == 0 && spa_writeable(dp->dp_spa))
++		return;
++
++	zilog = zil_alloc(dp->dp_meta_objset, zh);
++
++	(void) zil_parse(zilog, dsl_scan_zil_block, dsl_scan_zil_record, &zsa,
++	    claim_txg);
++
++	zil_free(zilog);
++}
++
++/* ARGSUSED */
++static void
++dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp,
++    uint64_t objset, uint64_t object, uint64_t blkid)
++{
++	zbookmark_t czb;
++	uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;
++
++	if (zfs_no_scrub_prefetch)
++		return;
++
++	if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_min_txg ||
++	    (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE))
++		return;
++
++	SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid);
++
++	/*
++	 * XXX need to make sure all of these arc_read() prefetches are
++	 * done before setting xlateall (similar to dsl_read())
++	 */
++	(void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, bp,
++	    buf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
++	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD, &flags, &czb);
++}
++
++static boolean_t
++dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
++    const zbookmark_t *zb)
++{
++	/*
++	 * We never skip over user/group accounting objects (obj<0)
++	 */
++	if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark) &&
++	    (int64_t)zb->zb_object >= 0) {
++		/*
++		 * If we already visited this bp & everything below (in
++		 * a prior txg sync), don't bother doing it again.
++		 */
++		if (bookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
++			return (B_TRUE);
++
++		/*
++		 * If we found the block we're trying to resume from, or
++		 * we went past it to a different object, zero it out to
++		 * indicate that it's OK to start checking for pausing
++		 * again.
++		 */
++		if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 ||
++		    zb->zb_object > scn->scn_phys.scn_bookmark.zb_object) {
++			dprintf("resuming at %llx/%llx/%llx/%llx\n",
++			    (longlong_t)zb->zb_objset,
++			    (longlong_t)zb->zb_object,
++			    (longlong_t)zb->zb_level,
++			    (longlong_t)zb->zb_blkid);
++			bzero(&scn->scn_phys.scn_bookmark, sizeof (*zb));
++		}
++	}
++	return (B_FALSE);
++}
++
++/*
++ * Return nonzero on i/o error.
++ * Return new buf to write out in *bufp.
++ */
++inline __attribute__((always_inline)) static int
++dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
++    dnode_phys_t *dnp, const blkptr_t *bp,
++    const zbookmark_t *zb, dmu_tx_t *tx, arc_buf_t **bufp)
++{
++	dsl_pool_t *dp = scn->scn_dp;
++	int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD;
++	int err;
++
++	if (BP_GET_LEVEL(bp) > 0) {
++		uint32_t flags = ARC_WAIT;
++		int i;
++		blkptr_t *cbp;
++		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
++
++		err = arc_read_nolock(NULL, dp->dp_spa, bp,
++		    arc_getbuf_func, bufp,
++		    ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
++		if (err) {
++			scn->scn_phys.scn_errors++;
++			return (err);
++		}
++		for (i = 0, cbp = (*bufp)->b_data; i < epb; i++, cbp++) {
++			dsl_scan_prefetch(scn, *bufp, cbp, zb->zb_objset,
++			    zb->zb_object, zb->zb_blkid * epb + i);
++		}
++		for (i = 0, cbp = (*bufp)->b_data; i < epb; i++, cbp++) {
++			zbookmark_t czb;
++
++			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
++			    zb->zb_level - 1,
++			    zb->zb_blkid * epb + i);
++			dsl_scan_visitbp(cbp, &czb, dnp,
++			    *bufp, ds, scn, ostype, tx);
++		}
++	} else if (BP_GET_TYPE(bp) == DMU_OT_USERGROUP_USED) {
++		uint32_t flags = ARC_WAIT;
++
++		err = arc_read_nolock(NULL, dp->dp_spa, bp,
++		    arc_getbuf_func, bufp,
++		    ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
++		if (err) {
++			scn->scn_phys.scn_errors++;
++			return (err);
++		}
++	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
++		uint32_t flags = ARC_WAIT;
++		dnode_phys_t *cdnp;
++		int i, j;
++		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
++
++		err = arc_read_nolock(NULL, dp->dp_spa, bp,
++		    arc_getbuf_func, bufp,
++		    ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
++		if (err) {
++			scn->scn_phys.scn_errors++;
++			return (err);
++		}
++		for (i = 0, cdnp = (*bufp)->b_data; i < epb; i++, cdnp++) {
++			for (j = 0; j < cdnp->dn_nblkptr; j++) {
++				blkptr_t *cbp = &cdnp->dn_blkptr[j];
++				dsl_scan_prefetch(scn, *bufp, cbp,
++				    zb->zb_objset, zb->zb_blkid * epb + i, j);
++			}
++		}
++		for (i = 0, cdnp = (*bufp)->b_data; i < epb; i++, cdnp++) {
++			dsl_scan_visitdnode(scn, ds, ostype,
++			    cdnp, *bufp, zb->zb_blkid * epb + i, tx);
++		}
++
++	} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
++		uint32_t flags = ARC_WAIT;
++		objset_phys_t *osp;
++
++		err = arc_read_nolock(NULL, dp->dp_spa, bp,
++		    arc_getbuf_func, bufp,
++		    ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
++		if (err) {
++			scn->scn_phys.scn_errors++;
++			return (err);
++		}
++
++		osp = (*bufp)->b_data;
++
++		dsl_scan_visitdnode(scn, ds, osp->os_type,
++		    &osp->os_meta_dnode, *bufp, DMU_META_DNODE_OBJECT, tx);
++
++		if (OBJSET_BUF_HAS_USERUSED(*bufp)) {
++			/*
++			 * We also always visit user/group accounting
++			 * objects, and never skip them, even if we are
++			 * pausing.  This is necessary so that the space
++			 * deltas from this txg get integrated.
++			 */
++			dsl_scan_visitdnode(scn, ds, osp->os_type,
++			    &osp->os_groupused_dnode, *bufp,
++			    DMU_GROUPUSED_OBJECT, tx);
++			dsl_scan_visitdnode(scn, ds, osp->os_type,
++			    &osp->os_userused_dnode, *bufp,
++			    DMU_USERUSED_OBJECT, tx);
++		}
++	}
++
++	return (0);
++}
++
++inline __attribute__((always_inline)) static void
++dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds,
++    dmu_objset_type_t ostype, dnode_phys_t *dnp, arc_buf_t *buf,
++    uint64_t object, dmu_tx_t *tx)
++{
++	int j;
++
++	for (j = 0; j < dnp->dn_nblkptr; j++) {
++		zbookmark_t czb;
++
++		SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object,
++		    dnp->dn_nlevels - 1, j);
++		dsl_scan_visitbp(&dnp->dn_blkptr[j],
++		    &czb, dnp, buf, ds, scn, ostype, tx);
++	}
++
++	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
++		zbookmark_t czb;
++		SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object,
++		    0, DMU_SPILL_BLKID);
++		dsl_scan_visitbp(&dnp->dn_spill,
++		    &czb, dnp, buf, ds, scn, ostype, tx);
++	}
++}
++
++/*
++ * The arguments are in this order because mdb can only print the
++ * first 5; we want them to be useful.
++ */
++static void
++dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb,
++    dnode_phys_t *dnp, arc_buf_t *pbuf,
++    dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype,
++    dmu_tx_t *tx)
++{
++	dsl_pool_t *dp = scn->scn_dp;
++	arc_buf_t *buf = NULL;
++	blkptr_t *bp_toread;
++
++	bp_toread = kmem_alloc(sizeof (blkptr_t), KM_PUSHPAGE);
++	*bp_toread = *bp;
++
++	/* ASSERT(pbuf == NULL || arc_released(pbuf)); */
++
++	if (dsl_scan_check_pause(scn, zb))
++		goto out;
++
++	if (dsl_scan_check_resume(scn, dnp, zb))
++		goto out;
++
++	if (bp->blk_birth == 0)
++		goto out;
++
++	scn->scn_visited_this_txg++;
++
++	/*
++	 * This debugging is commented out to conserve stack space.  This
++	 * function is called recursively and the debugging addes several
++	 * bytes to the stack for each call.  It can be commented back in
++	 * if required to debug an issue in dsl_scan_visitbp().
++	 *
++	 * dprintf_bp(bp,
++	 *    "visiting ds=%p/%llu zb=%llx/%llx/%llx/%llx buf=%p bp=%p",
++	 *    ds, ds ? ds->ds_object : 0,
++	 *    zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid,
++	 *    pbuf, bp);
++	 */
++
++	if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
++		goto out;
++
++	if (BP_GET_TYPE(bp) != DMU_OT_USERGROUP_USED) {
++		/*
++		 * For non-user-accounting blocks, we need to read the
++		 * new bp (from a deleted snapshot, found in
++		 * check_existing_xlation).  If we used the old bp,
++		 * pointers inside this block from before we resumed
++		 * would be untranslated.
++		 *
++		 * For user-accounting blocks, we need to read the old
++		 * bp, because we will apply the entire space delta to
++		 * it (original untranslated -> translations from
++		 * deleted snap -> now).
++		 */
++		*bp_toread = *bp;
++	}
++
++	if (dsl_scan_recurse(scn, ds, ostype, dnp, bp_toread, zb, tx,
++	    &buf) != 0)
++		goto out;
++
++	/*
++	 * If dsl_scan_ddt() has aready visited this block, it will have
++	 * already done any translations or scrubbing, so don't call the
++	 * callback again.
++	 */
++	if (ddt_class_contains(dp->dp_spa,
++	    scn->scn_phys.scn_ddt_class_max, bp)) {
++		ASSERT(buf == NULL);
++		goto out;
++	}
++
++	/*
++	 * If this block is from the future (after cur_max_txg), then we
++	 * are doing this on behalf of a deleted snapshot, and we will
++	 * revisit the future block on the next pass of this dataset.
++	 * Don't scan it now unless we need to because something
++	 * under it was modified.
++	 */
++	if (bp->blk_birth <= scn->scn_phys.scn_cur_max_txg) {
++		scan_funcs[scn->scn_phys.scn_func](dp, bp, zb);
++	}
++	if (buf)
++		(void) arc_buf_remove_ref(buf, &buf);
++out:
++	kmem_free(bp_toread, sizeof(blkptr_t));
++}
++
++static void
++dsl_scan_visit_rootbp(dsl_scan_t *scn, dsl_dataset_t *ds, blkptr_t *bp,
++    dmu_tx_t *tx)
++{
++	zbookmark_t zb;
++
++	SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
++	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
++	dsl_scan_visitbp(bp, &zb, NULL, NULL,
++	    ds, scn, DMU_OST_NONE, tx);
++
++	dprintf_ds(ds, "finished scan%s", "");
++}
++
++void
++dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
++{
++	dsl_pool_t *dp = ds->ds_dir->dd_pool;
++	dsl_scan_t *scn = dp->dp_scan;
++	uint64_t mintxg;
++
++	if (scn->scn_phys.scn_state != DSS_SCANNING)
++		return;
++
++	if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) {
++		if (dsl_dataset_is_snapshot(ds)) {
++			/* Note, scn_cur_{min,max}_txg stays the same. */
++			scn->scn_phys.scn_bookmark.zb_objset =
++			    ds->ds_phys->ds_next_snap_obj;
++			zfs_dbgmsg("destroying ds %llu; currently traversing; "
++			    "reset zb_objset to %llu",
++			    (u_longlong_t)ds->ds_object,
++			    (u_longlong_t)ds->ds_phys->ds_next_snap_obj);
++			scn->scn_phys.scn_flags |= DSF_VISIT_DS_AGAIN;
++		} else {
++			SET_BOOKMARK(&scn->scn_phys.scn_bookmark,
++			    ZB_DESTROYED_OBJSET, 0, 0, 0);
++			zfs_dbgmsg("destroying ds %llu; currently traversing; "
++			    "reset bookmark to -1,0,0,0",
++			    (u_longlong_t)ds->ds_object);
++		}
++	} else if (zap_lookup_int_key(dp->dp_meta_objset,
++	    scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) {
++		ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
++		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
++		    scn->scn_phys.scn_queue_obj, ds->ds_object, tx));
++		if (dsl_dataset_is_snapshot(ds)) {
++			/*
++			 * We keep the same mintxg; it could be >
++			 * ds_creation_txg if the previous snapshot was
++			 * deleted too.
++			 */
++			VERIFY(zap_add_int_key(dp->dp_meta_objset,
++			    scn->scn_phys.scn_queue_obj,
++			    ds->ds_phys->ds_next_snap_obj, mintxg, tx) == 0);
++			zfs_dbgmsg("destroying ds %llu; in queue; "
++			    "replacing with %llu",
++			    (u_longlong_t)ds->ds_object,
++			    (u_longlong_t)ds->ds_phys->ds_next_snap_obj);
++		} else {
++			zfs_dbgmsg("destroying ds %llu; in queue; removing",
++			    (u_longlong_t)ds->ds_object);
++		}
++	} else {
++		zfs_dbgmsg("destroying ds %llu; ignoring",
++		    (u_longlong_t)ds->ds_object);
++	}
++
++	/*
++	 * dsl_scan_sync() should be called after this, and should sync
++	 * out our changed state, but just to be safe, do it here.
++	 */
++	dsl_scan_sync_state(scn, tx);
++}
++
++void
++dsl_scan_ds_snapshotted(dsl_dataset_t *ds, dmu_tx_t *tx)
++{
++	dsl_pool_t *dp = ds->ds_dir->dd_pool;
++	dsl_scan_t *scn = dp->dp_scan;
++	uint64_t mintxg;
++
++	if (scn->scn_phys.scn_state != DSS_SCANNING)
++		return;
++
++	ASSERT(ds->ds_phys->ds_prev_snap_obj != 0);
++
++	if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) {
++		scn->scn_phys.scn_bookmark.zb_objset =
++		    ds->ds_phys->ds_prev_snap_obj;
++		zfs_dbgmsg("snapshotting ds %llu; currently traversing; "
++		    "reset zb_objset to %llu",
++		    (u_longlong_t)ds->ds_object,
++		    (u_longlong_t)ds->ds_phys->ds_prev_snap_obj);
++	} else if (zap_lookup_int_key(dp->dp_meta_objset,
++	    scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) {
++		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
++		    scn->scn_phys.scn_queue_obj, ds->ds_object, tx));
++		VERIFY(zap_add_int_key(dp->dp_meta_objset,
++		    scn->scn_phys.scn_queue_obj,
++		    ds->ds_phys->ds_prev_snap_obj, mintxg, tx) == 0);
++		zfs_dbgmsg("snapshotting ds %llu; in queue; "
++		    "replacing with %llu",
++		    (u_longlong_t)ds->ds_object,
++		    (u_longlong_t)ds->ds_phys->ds_prev_snap_obj);
++	}
++	dsl_scan_sync_state(scn, tx);
++}
++
++void
++dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx)
++{
++	dsl_pool_t *dp = ds1->ds_dir->dd_pool;
++	dsl_scan_t *scn = dp->dp_scan;
++	uint64_t mintxg;
++
++	if (scn->scn_phys.scn_state != DSS_SCANNING)
++		return;
++
++	if (scn->scn_phys.scn_bookmark.zb_objset == ds1->ds_object) {
++		scn->scn_phys.scn_bookmark.zb_objset = ds2->ds_object;
++		zfs_dbgmsg("clone_swap ds %llu; currently traversing; "
++		    "reset zb_objset to %llu",
++		    (u_longlong_t)ds1->ds_object,
++		    (u_longlong_t)ds2->ds_object);
++	} else if (scn->scn_phys.scn_bookmark.zb_objset == ds2->ds_object) {
++		scn->scn_phys.scn_bookmark.zb_objset = ds1->ds_object;
++		zfs_dbgmsg("clone_swap ds %llu; currently traversing; "
++		    "reset zb_objset to %llu",
++		    (u_longlong_t)ds2->ds_object,
++		    (u_longlong_t)ds1->ds_object);
++	}
++
++	if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj,
++	    ds1->ds_object, &mintxg) == 0) {
++		int err;
++
++		ASSERT3U(mintxg, ==, ds1->ds_phys->ds_prev_snap_txg);
++		ASSERT3U(mintxg, ==, ds2->ds_phys->ds_prev_snap_txg);
++		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
++		    scn->scn_phys.scn_queue_obj, ds1->ds_object, tx));
++		err = zap_add_int_key(dp->dp_meta_objset,
++		    scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg, tx);
++		VERIFY(err == 0 || err == EEXIST);
++		if (err == EEXIST) {
++			/* Both were there to begin with */
++			VERIFY(0 == zap_add_int_key(dp->dp_meta_objset,
++			    scn->scn_phys.scn_queue_obj,
++			    ds1->ds_object, mintxg, tx));
++		}
++		zfs_dbgmsg("clone_swap ds %llu; in queue; "
++		    "replacing with %llu",
++		    (u_longlong_t)ds1->ds_object,
++		    (u_longlong_t)ds2->ds_object);
++	} else if (zap_lookup_int_key(dp->dp_meta_objset,
++	    scn->scn_phys.scn_queue_obj, ds2->ds_object, &mintxg) == 0) {
++		ASSERT3U(mintxg, ==, ds1->ds_phys->ds_prev_snap_txg);
++		ASSERT3U(mintxg, ==, ds2->ds_phys->ds_prev_snap_txg);
++		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
++		    scn->scn_phys.scn_queue_obj, ds2->ds_object, tx));
++		VERIFY(0 == zap_add_int_key(dp->dp_meta_objset,
++		    scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg, tx));
++		zfs_dbgmsg("clone_swap ds %llu; in queue; "
++		    "replacing with %llu",
++		    (u_longlong_t)ds2->ds_object,
++		    (u_longlong_t)ds1->ds_object);
++	}
++
++	dsl_scan_sync_state(scn, tx);
++}
++
++struct enqueue_clones_arg {
++	dmu_tx_t *tx;
++	uint64_t originobj;
++};
++
++/* ARGSUSED */
++static int
++enqueue_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
++{
++	struct enqueue_clones_arg *eca = arg;
++	dsl_dataset_t *ds;
++	int err;
++	dsl_pool_t *dp = spa->spa_dsl_pool;
++	dsl_scan_t *scn = dp->dp_scan;
++
++	err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
++	if (err)
++		return (err);
++
++	if (ds->ds_dir->dd_phys->dd_origin_obj == eca->originobj) {
++		while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
++			dsl_dataset_t *prev;
++			err = dsl_dataset_hold_obj(dp,
++			    ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
++
++			dsl_dataset_rele(ds, FTAG);
++			if (err)
++				return (err);
++			ds = prev;
++		}
++		VERIFY(zap_add_int_key(dp->dp_meta_objset,
++		    scn->scn_phys.scn_queue_obj, ds->ds_object,
++		    ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
++	}
++	dsl_dataset_rele(ds, FTAG);
++	return (0);
++}
++
++static void
++dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
++{
++	dsl_pool_t *dp = scn->scn_dp;
++	dsl_dataset_t *ds;
++	objset_t *os;
++	char *dsname;
++
++	VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
++
++	if (dmu_objset_from_ds(ds, &os))
++		goto out;
++
++	/*
++	 * Only the ZIL in the head (non-snapshot) is valid.  Even though
++	 * snapshots can have ZIL block pointers (which may be the same
++	 * BP as in the head), they must be ignored.  So we traverse the
++	 * ZIL here, rather than in scan_recurse(), because the regular
++	 * snapshot block-sharing rules don't apply to it.
++	 */
++	if (DSL_SCAN_IS_SCRUB_RESILVER(scn) && !dsl_dataset_is_snapshot(ds))
++		dsl_scan_zil(dp, &os->os_zil_header);
++
++	/*
++	 * Iterate over the bps in this ds.
++	 */
++	dmu_buf_will_dirty(ds->ds_dbuf, tx);
++	dsl_scan_visit_rootbp(scn, ds, &ds->ds_phys->ds_bp, tx);
++
++	dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_PUSHPAGE);
++	dsl_dataset_name(ds, dsname);
++	zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; "
++	    "pausing=%u",
++	    (longlong_t)dsobj, dsname,
++	    (longlong_t)scn->scn_phys.scn_cur_min_txg,
++	    (longlong_t)scn->scn_phys.scn_cur_max_txg,
++	    (int)scn->scn_pausing);
++	kmem_free(dsname, ZFS_MAXNAMELEN);
++
++	if (scn->scn_pausing)
++		goto out;
++
++	/*
++	 * We've finished this pass over this dataset.
++	 */
++
++	/*
++	 * If we did not completely visit this dataset, do another pass.
++	 */
++	if (scn->scn_phys.scn_flags & DSF_VISIT_DS_AGAIN) {
++		zfs_dbgmsg("incomplete pass; visiting again");
++		scn->scn_phys.scn_flags &= ~DSF_VISIT_DS_AGAIN;
++		VERIFY(zap_add_int_key(dp->dp_meta_objset,
++		    scn->scn_phys.scn_queue_obj, ds->ds_object,
++		    scn->scn_phys.scn_cur_max_txg, tx) == 0);
++		goto out;
++	}
++
++	/*
++	 * Add descendent datasets to work queue.
++	 */
++	if (ds->ds_phys->ds_next_snap_obj != 0) {
++		VERIFY(zap_add_int_key(dp->dp_meta_objset,
++		    scn->scn_phys.scn_queue_obj, ds->ds_phys->ds_next_snap_obj,
++		    ds->ds_phys->ds_creation_txg, tx) == 0);
++	}
++	if (ds->ds_phys->ds_num_children > 1) {
++		boolean_t usenext = B_FALSE;
++		if (ds->ds_phys->ds_next_clones_obj != 0) {
++			uint64_t count;
++			/*
++			 * A bug in a previous version of the code could
++			 * cause upgrade_clones_cb() to not set
++			 * ds_next_snap_obj when it should, leading to a
++			 * missing entry.  Therefore we can only use the
++			 * next_clones_obj when its count is correct.
++			 */
++			int err = zap_count(dp->dp_meta_objset,
++			    ds->ds_phys->ds_next_clones_obj, &count);
++			if (err == 0 &&
++			    count == ds->ds_phys->ds_num_children - 1)
++				usenext = B_TRUE;
++		}
++
++		if (usenext) {
++			VERIFY(zap_join_key(dp->dp_meta_objset,
++			    ds->ds_phys->ds_next_clones_obj,
++			    scn->scn_phys.scn_queue_obj,
++			    ds->ds_phys->ds_creation_txg, tx) == 0);
++		} else {
++			struct enqueue_clones_arg eca;
++			eca.tx = tx;
++			eca.originobj = ds->ds_object;
++
++			(void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa,
++			    NULL, enqueue_clones_cb, &eca, DS_FIND_CHILDREN);
++		}
++	}
++
++out:
++	dsl_dataset_rele(ds, FTAG);
++}
++
++/* ARGSUSED */
++static int
++enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
++{
++	dmu_tx_t *tx = arg;
++	dsl_dataset_t *ds;
++	int err;
++	dsl_pool_t *dp = spa->spa_dsl_pool;
++	dsl_scan_t *scn = dp->dp_scan;
++
++	err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
++	if (err)
++		return (err);
++
++	while (ds->ds_phys->ds_prev_snap_obj != 0) {
++		dsl_dataset_t *prev;
++		err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
++		    FTAG, &prev);
++		if (err) {
++			dsl_dataset_rele(ds, FTAG);
++			return (err);
++		}
++
++		/*
++		 * If this is a clone, we don't need to worry about it for now.
++		 */
++		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
++			dsl_dataset_rele(ds, FTAG);
++			dsl_dataset_rele(prev, FTAG);
++			return (0);
++		}
++		dsl_dataset_rele(ds, FTAG);
++		ds = prev;
++	}
++
++	VERIFY(zap_add_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj,
++	    ds->ds_object, ds->ds_phys->ds_prev_snap_txg, tx) == 0);
++	dsl_dataset_rele(ds, FTAG);
++	return (0);
++}
++
++/*
++ * Scrub/dedup interaction.
++ *
++ * If there are N references to a deduped block, we don't want to scrub it
++ * N times -- ideally, we should scrub it exactly once.
++ *
++ * We leverage the fact that the dde's replication class (enum ddt_class)
++ * is ordered from highest replication class (DDT_CLASS_DITTO) to lowest
++ * (DDT_CLASS_UNIQUE) so that we may walk the DDT in that order.
++ *
++ * To prevent excess scrubbing, the scrub begins by walking the DDT
++ * to find all blocks with refcnt > 1, and scrubs each of these once.
++ * Since there are two replication classes which contain blocks with
++ * refcnt > 1, we scrub the highest replication class (DDT_CLASS_DITTO) first.
++ * Finally the top-down scrub begins, only visiting blocks with refcnt == 1.
++ *
++ * There would be nothing more to say if a block's refcnt couldn't change
++ * during a scrub, but of course it can so we must account for changes
++ * in a block's replication class.
++ *
++ * Here's an example of what can occur:
++ *
++ * If a block has refcnt > 1 during the DDT scrub phase, but has refcnt == 1
++ * when visited during the top-down scrub phase, it will be scrubbed twice.
++ * This negates our scrub optimization, but is otherwise harmless.
++ *
++ * If a block has refcnt == 1 during the DDT scrub phase, but has refcnt > 1
++ * on each visit during the top-down scrub phase, it will never be scrubbed.
++ * To catch this, ddt_sync_entry() notifies the scrub code whenever a block's
++ * reference class transitions to a higher level (i.e DDT_CLASS_UNIQUE to
++ * DDT_CLASS_DUPLICATE); if it transitions from refcnt == 1 to refcnt > 1
++ * while a scrub is in progress, it scrubs the block right then.
++ */
++static void
++dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
++{
++	ddt_bookmark_t *ddb = &scn->scn_phys.scn_ddt_bookmark;
++	ddt_entry_t dde;
++	int error;
++	uint64_t n = 0;
++
++	bzero(&dde, sizeof (ddt_entry_t));
++
++	while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &dde)) == 0) {
++		ddt_t *ddt;
++
++		if (ddb->ddb_class > scn->scn_phys.scn_ddt_class_max)
++			break;
++		dprintf("visiting ddb=%llu/%llu/%llu/%llx\n",
++		    (longlong_t)ddb->ddb_class,
++		    (longlong_t)ddb->ddb_type,
++		    (longlong_t)ddb->ddb_checksum,
++		    (longlong_t)ddb->ddb_cursor);
++
++		/* There should be no pending changes to the dedup table */
++		ddt = scn->scn_dp->dp_spa->spa_ddt[ddb->ddb_checksum];
++		ASSERT(avl_first(&ddt->ddt_tree) == NULL);
++
++		dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx);
++		n++;
++
++		if (dsl_scan_check_pause(scn, NULL))
++			break;
++	}
++
++	zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; pausing=%u",
++	    (longlong_t)n, (int)scn->scn_phys.scn_ddt_class_max,
++	    (int)scn->scn_pausing);
++
++	ASSERT(error == 0 || error == ENOENT);
++	ASSERT(error != ENOENT ||
++	    ddb->ddb_class > scn->scn_phys.scn_ddt_class_max);
++}
++
++/* ARGSUSED */
++void
++dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
++    ddt_entry_t *dde, dmu_tx_t *tx)
++{
++	const ddt_key_t *ddk = &dde->dde_key;
++	ddt_phys_t *ddp = dde->dde_phys;
++	blkptr_t bp;
++	zbookmark_t zb = { 0 };
++	int p;
++
++	if (scn->scn_phys.scn_state != DSS_SCANNING)
++		return;
++
++	for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
++		if (ddp->ddp_phys_birth == 0 ||
++		    ddp->ddp_phys_birth > scn->scn_phys.scn_cur_max_txg)
++			continue;
++		ddt_bp_create(checksum, ddk, ddp, &bp);
++
++		scn->scn_visited_this_txg++;
++		scan_funcs[scn->scn_phys.scn_func](scn->scn_dp, &bp, &zb);
++	}
++}
++
++static void
++dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
++{
++	dsl_pool_t *dp = scn->scn_dp;
++	zap_cursor_t *zc;
++	zap_attribute_t *za;
++
++	if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
++	    scn->scn_phys.scn_ddt_class_max) {
++		scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
++		scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
++		dsl_scan_ddt(scn, tx);
++		if (scn->scn_pausing)
++			return;
++	}
++
++	if (scn->scn_phys.scn_bookmark.zb_objset == DMU_META_OBJSET) {
++		/* First do the MOS & ORIGIN */
++
++		scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
++		scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
++		dsl_scan_visit_rootbp(scn, NULL,
++		    &dp->dp_meta_rootbp, tx);
++		spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
++		if (scn->scn_pausing)
++			return;
++
++		if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
++			VERIFY(0 == dmu_objset_find_spa(dp->dp_spa,
++			    NULL, enqueue_cb, tx, DS_FIND_CHILDREN));
++		} else {
++			dsl_scan_visitds(scn,
++			    dp->dp_origin_snap->ds_object, tx);
++		}
++		ASSERT(!scn->scn_pausing);
++	} else if (scn->scn_phys.scn_bookmark.zb_objset !=
++	    ZB_DESTROYED_OBJSET) {
++		/*
++		 * If we were paused, continue from here.  Note if the
++		 * ds we were paused on was deleted, the zb_objset may
++		 * be -1, so we will skip this and find a new objset
++		 * below.
++		 */
++		dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_objset, tx);
++		if (scn->scn_pausing)
++			return;
++	}
++
++	/*
++	 * In case we were paused right at the end of the ds, zero the
++	 * bookmark so we don't think that we're still trying to resume.
++	 */
++	bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_t));
++	zc = kmem_alloc(sizeof(zap_cursor_t), KM_PUSHPAGE);
++	za = kmem_alloc(sizeof(zap_attribute_t), KM_PUSHPAGE);
++
++	/* keep pulling things out of the zap-object-as-queue */
++	while (zap_cursor_init(zc, dp->dp_meta_objset,
++	    scn->scn_phys.scn_queue_obj),
++	    zap_cursor_retrieve(zc, za) == 0) {
++		dsl_dataset_t *ds;
++		uint64_t dsobj;
++
++		dsobj = strtonum(za->za_name, NULL);
++		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
++		    scn->scn_phys.scn_queue_obj, dsobj, tx));
++
++		/* Set up min/max txg */
++		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
++		if (za->za_first_integer != 0) {
++			scn->scn_phys.scn_cur_min_txg =
++			    MAX(scn->scn_phys.scn_min_txg,
++			    za->za_first_integer);
++		} else {
++			scn->scn_phys.scn_cur_min_txg =
++			    MAX(scn->scn_phys.scn_min_txg,
++			    ds->ds_phys->ds_prev_snap_txg);
++		}
++		scn->scn_phys.scn_cur_max_txg = dsl_scan_ds_maxtxg(ds);
++		dsl_dataset_rele(ds, FTAG);
++
++		dsl_scan_visitds(scn, dsobj, tx);
++		zap_cursor_fini(zc);
++		if (scn->scn_pausing)
++			goto out;
++	}
++	zap_cursor_fini(zc);
++out:
++	kmem_free(za, sizeof(zap_attribute_t));
++	kmem_free(zc, sizeof(zap_cursor_t));
++}
++
++static int
++dsl_scan_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
++{
++	dsl_scan_t *scn = arg;
++	uint64_t elapsed_nanosecs;
++
++	elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
++
++	if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
++	    (elapsed_nanosecs / MICROSEC > zfs_free_min_time_ms &&
++	    txg_sync_waiting(scn->scn_dp)) ||
++	    spa_shutting_down(scn->scn_dp->dp_spa))
++		return (ERESTART);
++
++	zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,
++	    dmu_tx_get_txg(tx), bp, 0));
++	dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, DD_USED_HEAD,
++	    -bp_get_dsize_sync(scn->scn_dp->dp_spa, bp),
++	    -BP_GET_PSIZE(bp), -BP_GET_UCSIZE(bp), tx);
++	scn->scn_visited_this_txg++;
++	return (0);
++}
++
++boolean_t
++dsl_scan_active(dsl_scan_t *scn)
++{
++	spa_t *spa = scn->scn_dp->dp_spa;
++	uint64_t used = 0, comp, uncomp;
++
++	if (spa->spa_load_state != SPA_LOAD_NONE)
++		return (B_FALSE);
++	if (spa_shutting_down(spa))
++		return (B_FALSE);
++
++	if (scn->scn_phys.scn_state == DSS_SCANNING)
++		return (B_TRUE);
++
++	if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
++		(void) bpobj_space(&scn->scn_dp->dp_free_bpobj,
++		    &used, &comp, &uncomp);
++	}
++	return (used != 0);
++}
++
++void
++dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
++{
++	dsl_scan_t *scn = dp->dp_scan;
++	spa_t *spa = dp->dp_spa;
++	int err;
++
++	/*
++	 * Check for scn_restart_txg before checking spa_load_state, so
++	 * that we can restart an old-style scan while the pool is being
++	 * imported (see dsl_scan_init).
++	 */
++	if (scn->scn_restart_txg != 0 &&
++	    scn->scn_restart_txg <= tx->tx_txg) {
++		pool_scan_func_t func = POOL_SCAN_SCRUB;
++		dsl_scan_done(scn, B_FALSE, tx);
++		if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
++			func = POOL_SCAN_RESILVER;
++		zfs_dbgmsg("restarting scan func=%u txg=%llu",
++		    func, tx->tx_txg);
++		dsl_scan_setup_sync(scn, &func, tx);
++	}
++
++	if (!dsl_scan_active(scn) ||
++	    spa_sync_pass(dp->dp_spa) > 1)
++		return;
++
++	scn->scn_visited_this_txg = 0;
++	scn->scn_pausing = B_FALSE;
++	scn->scn_sync_start_time = gethrtime();
++	spa->spa_scrub_active = B_TRUE;
++
++	/*
++	 * First process the free list.  If we pause the free, don't do
++	 * any scanning.  This ensures that there is no free list when
++	 * we are scanning, so the scan code doesn't have to worry about
++	 * traversing it.
++	 */
++	if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
++		scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
++		    NULL, ZIO_FLAG_MUSTSUCCEED);
++		err = bpobj_iterate(&dp->dp_free_bpobj,
++		    dsl_scan_free_cb, scn, tx);
++		VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
++		if (scn->scn_visited_this_txg) {
++			zfs_dbgmsg("freed %llu blocks in %llums from "
++			    "free_bpobj txg %llu",
++			    (longlong_t)scn->scn_visited_this_txg,
++			    (longlong_t)
++			    (gethrtime() - scn->scn_sync_start_time) / MICROSEC,
++			    (longlong_t)tx->tx_txg);
++			scn->scn_visited_this_txg = 0;
++			/*
++			 * Re-sync the ddt so that we can further modify
++			 * it when doing bprewrite.
++			 */
++			ddt_sync(spa, tx->tx_txg);
++		}
++		if (err == ERESTART)
++			return;
++	}
++
++	if (scn->scn_phys.scn_state != DSS_SCANNING)
++		return;
++
++	if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
++	    scn->scn_phys.scn_ddt_class_max) {
++		zfs_dbgmsg("doing scan sync txg %llu; "
++		    "ddt bm=%llu/%llu/%llu/%llx",
++		    (longlong_t)tx->tx_txg,
++		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
++		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
++		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
++		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
++		ASSERT(scn->scn_phys.scn_bookmark.zb_objset == 0);
++		ASSERT(scn->scn_phys.scn_bookmark.zb_object == 0);
++		ASSERT(scn->scn_phys.scn_bookmark.zb_level == 0);
++		ASSERT(scn->scn_phys.scn_bookmark.zb_blkid == 0);
++	} else {
++		zfs_dbgmsg("doing scan sync txg %llu; bm=%llu/%llu/%llu/%llu",
++		    (longlong_t)tx->tx_txg,
++		    (longlong_t)scn->scn_phys.scn_bookmark.zb_objset,
++		    (longlong_t)scn->scn_phys.scn_bookmark.zb_object,
++		    (longlong_t)scn->scn_phys.scn_bookmark.zb_level,
++		    (longlong_t)scn->scn_phys.scn_bookmark.zb_blkid);
++	}
++
++	scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
++	    NULL, ZIO_FLAG_CANFAIL);
++	dsl_scan_visit(scn, tx);
++	(void) zio_wait(scn->scn_zio_root);
++	scn->scn_zio_root = NULL;
++
++	zfs_dbgmsg("visited %llu blocks in %llums",
++	    (longlong_t)scn->scn_visited_this_txg,
++	    (longlong_t)(gethrtime() - scn->scn_sync_start_time) / MICROSEC);
++
++	if (!scn->scn_pausing) {
++		/* finished with scan. */
++		zfs_dbgmsg("finished scan txg %llu", (longlong_t)tx->tx_txg);
++		dsl_scan_done(scn, B_TRUE, tx);
++	}
++
++	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
++		mutex_enter(&spa->spa_scrub_lock);
++		while (spa->spa_scrub_inflight > 0) {
++			cv_wait(&spa->spa_scrub_io_cv,
++			    &spa->spa_scrub_lock);
++		}
++		mutex_exit(&spa->spa_scrub_lock);
++	}
++
++	dsl_scan_sync_state(scn, tx);
++}
++
++/*
++ * This will start a new scan, or restart an existing one.
++ */
++void
++dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg)
++{
++	if (txg == 0) {
++		dmu_tx_t *tx;
++		tx = dmu_tx_create_dd(dp->dp_mos_dir);
++		VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
++
++		txg = dmu_tx_get_txg(tx);
++		dp->dp_scan->scn_restart_txg = txg;
++		dmu_tx_commit(tx);
++	} else {
++		dp->dp_scan->scn_restart_txg = txg;
++	}
++	zfs_dbgmsg("restarting resilver txg=%llu", txg);
++}
++
++boolean_t
++dsl_scan_resilvering(dsl_pool_t *dp)
++{
++	return (dp->dp_scan->scn_phys.scn_state == DSS_SCANNING &&
++	    dp->dp_scan->scn_phys.scn_func == POOL_SCAN_RESILVER);
++}
++
++/*
++ * scrub consumers
++ */
++
++static void
++count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
++{
++	int i;
++
++	/*
++	 * If we resume after a reboot, zab will be NULL; don't record
++	 * incomplete stats in that case.
++	 */
++	if (zab == NULL)
++		return;
++
++	for (i = 0; i < 4; i++) {
++		int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS;
++		int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL;
++		zfs_blkstat_t *zb = &zab->zab_type[l][t];
++		int equal;
++
++		zb->zb_count++;
++		zb->zb_asize += BP_GET_ASIZE(bp);
++		zb->zb_lsize += BP_GET_LSIZE(bp);
++		zb->zb_psize += BP_GET_PSIZE(bp);
++		zb->zb_gangs += BP_COUNT_GANG(bp);
++
++		switch (BP_GET_NDVAS(bp)) {
++		case 2:
++			if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
++			    DVA_GET_VDEV(&bp->blk_dva[1]))
++				zb->zb_ditto_2_of_2_samevdev++;
++			break;
++		case 3:
++			equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
++			    DVA_GET_VDEV(&bp->blk_dva[1])) +
++			    (DVA_GET_VDEV(&bp->blk_dva[0]) ==
++			    DVA_GET_VDEV(&bp->blk_dva[2])) +
++			    (DVA_GET_VDEV(&bp->blk_dva[1]) ==
++			    DVA_GET_VDEV(&bp->blk_dva[2]));
++			if (equal == 1)
++				zb->zb_ditto_2_of_3_samevdev++;
++			else if (equal == 3)
++				zb->zb_ditto_3_of_3_samevdev++;
++			break;
++		}
++	}
++}
++
++static void
++dsl_scan_scrub_done(zio_t *zio)
++{
++	spa_t *spa = zio->io_spa;
++
++	zio_data_buf_free(zio->io_data, zio->io_size);
++
++	mutex_enter(&spa->spa_scrub_lock);
++	spa->spa_scrub_inflight--;
++	cv_broadcast(&spa->spa_scrub_io_cv);
++
++	if (zio->io_error && (zio->io_error != ECKSUM ||
++	    !(zio->io_flags & ZIO_FLAG_SPECULATIVE))) {
++		spa->spa_dsl_pool->dp_scan->scn_phys.scn_errors++;
++	}
++	mutex_exit(&spa->spa_scrub_lock);
++}
++
++static int
++dsl_scan_scrub_cb(dsl_pool_t *dp,
++    const blkptr_t *bp, const zbookmark_t *zb)
++{
++	dsl_scan_t *scn = dp->dp_scan;
++	size_t size = BP_GET_PSIZE(bp);
++	spa_t *spa = dp->dp_spa;
++	uint64_t phys_birth = BP_PHYSICAL_BIRTH(bp);
++	boolean_t needs_io = B_FALSE;
++	int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
++	int zio_priority = 0;
++	int scan_delay = 0;
++	int d;
++
++	if (phys_birth <= scn->scn_phys.scn_min_txg ||
++	    phys_birth >= scn->scn_phys.scn_max_txg)
++		return (0);
++
++	count_block(dp->dp_blkstats, bp);
++
++	ASSERT(DSL_SCAN_IS_SCRUB_RESILVER(scn));
++	if (scn->scn_phys.scn_func == POOL_SCAN_SCRUB) {
++		zio_flags |= ZIO_FLAG_SCRUB;
++		zio_priority = ZIO_PRIORITY_SCRUB;
++		needs_io = B_TRUE;
++		scan_delay = zfs_scrub_delay;
++	} else if (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) {
++		zio_flags |= ZIO_FLAG_RESILVER;
++		zio_priority = ZIO_PRIORITY_RESILVER;
++		needs_io = B_FALSE;
++		scan_delay = zfs_resilver_delay;
++	}
++
++	/* If it's an intent log block, failure is expected. */
++	if (zb->zb_level == ZB_ZIL_LEVEL)
++		zio_flags |= ZIO_FLAG_SPECULATIVE;
++
++	for (d = 0; d < BP_GET_NDVAS(bp); d++) {
++		vdev_t *vd = vdev_lookup_top(spa,
++		    DVA_GET_VDEV(&bp->blk_dva[d]));
++
++		/*
++		 * Keep track of how much data we've examined so that
++		 * zpool(1M) status can make useful progress reports.
++		 */
++		scn->scn_phys.scn_examined += DVA_GET_ASIZE(&bp->blk_dva[d]);
++		spa->spa_scan_pass_exam += DVA_GET_ASIZE(&bp->blk_dva[d]);
++
++		/* if it's a resilver, this may not be in the target range */
++		if (!needs_io) {
++			if (DVA_GET_GANG(&bp->blk_dva[d])) {
++				/*
++				 * Gang members may be spread across multiple
++				 * vdevs, so the best estimate we have is the
++				 * scrub range, which has already been checked.
++				 * XXX -- it would be better to change our
++				 * allocation policy to ensure that all
++				 * gang members reside on the same vdev.
++				 */
++				needs_io = B_TRUE;
++			} else {
++				needs_io = vdev_dtl_contains(vd, DTL_PARTIAL,
++				    phys_birth, 1);
++			}
++		}
++	}
++
++	if (needs_io && !zfs_no_scrub_io) {
++		vdev_t *rvd = spa->spa_root_vdev;
++		uint64_t maxinflight = rvd->vdev_children * zfs_top_maxinflight;
++		void *data = zio_data_buf_alloc(size);
++
++		mutex_enter(&spa->spa_scrub_lock);
++		while (spa->spa_scrub_inflight >= maxinflight)
++			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
++		spa->spa_scrub_inflight++;
++		mutex_exit(&spa->spa_scrub_lock);
++
++		/*
++		 * If we're seeing recent (zfs_scan_idle) "important" I/Os
++		 * then throttle our workload to limit the impact of a scan.
++		 */
++		if (ddi_get_lbolt64() - spa->spa_last_io <= zfs_scan_idle)
++			delay(scan_delay);
++
++		zio_nowait(zio_read(NULL, spa, bp, data, size,
++		    dsl_scan_scrub_done, NULL, zio_priority,
++		    zio_flags, zb));
++	}
++
++	/* do not relocate this block */
++	return (0);
++}
++
++int
++dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
++{
++	spa_t *spa = dp->dp_spa;
++
++	/*
++	 * Purge all vdev caches and probe all devices.  We do this here
++	 * rather than in sync context because this requires a writer lock
++	 * on the spa_config lock, which we can't do from sync context.  The
++	 * spa_scrub_reopen flag indicates that vdev_open() should not
++	 * attempt to start another scrub.
++	 */
++	spa_vdev_state_enter(spa, SCL_NONE);
++	spa->spa_scrub_reopen = B_TRUE;
++	vdev_reopen(spa->spa_root_vdev);
++	spa->spa_scrub_reopen = B_FALSE;
++	(void) spa_vdev_state_exit(spa, NULL, 0);
++
++	return (dsl_sync_task_do(dp, dsl_scan_setup_check,
++	    dsl_scan_setup_sync, dp->dp_scan, &func, 0));
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++module_param(zfs_top_maxinflight, int, 0644);
++MODULE_PARM_DESC(zfs_top_maxinflight, "Max I/Os per top-level");
++
++module_param(zfs_resilver_delay, int, 0644);
++MODULE_PARM_DESC(zfs_resilver_delay, "Number of ticks to delay resilver");
++
++module_param(zfs_scrub_delay, int, 0644);
++MODULE_PARM_DESC(zfs_scrub_delay, "Number of ticks to delay scrub");
++
++module_param(zfs_scan_idle, int, 0644);
++MODULE_PARM_DESC(zfs_scan_idle, "Idle window in clock ticks");
++
++module_param(zfs_scan_min_time_ms, int, 0644);
++MODULE_PARM_DESC(zfs_scan_min_time_ms, "Min millisecs to scrub per txg");
++
++module_param(zfs_free_min_time_ms, int, 0644);
++MODULE_PARM_DESC(zfs_free_min_time_ms, "Min millisecs to free per txg");
++
++module_param(zfs_resilver_min_time_ms, int, 0644);
++MODULE_PARM_DESC(zfs_resilver_min_time_ms, "Min millisecs to resilver per txg");
++
++module_param(zfs_no_scrub_io, int, 0644);
++MODULE_PARM_DESC(zfs_no_scrub_io, "Set to disable scrub I/O");
++
++module_param(zfs_no_scrub_prefetch, int, 0644);
++MODULE_PARM_DESC(zfs_no_scrub_prefetch, "Set to disable scrub prefetching");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/dsl_synctask.c linux-3.2.33-go/fs/zfs/zfs/dsl_synctask.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/dsl_synctask.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/dsl_synctask.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,245 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/dmu.h>
++#include <sys/dmu_tx.h>
++#include <sys/dsl_pool.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_synctask.h>
++#include <sys/metaslab.h>
++
++#define	DST_AVG_BLKSHIFT 14
++
++/* ARGSUSED */
++static int
++dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	return (0);
++}
++
++dsl_sync_task_group_t *
++dsl_sync_task_group_create(dsl_pool_t *dp)
++{
++	dsl_sync_task_group_t *dstg;
++
++	dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP);
++	list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
++	    offsetof(dsl_sync_task_t, dst_node));
++	dstg->dstg_pool = dp;
++
++	return (dstg);
++}
++
++void
++dsl_sync_task_create(dsl_sync_task_group_t *dstg,
++    dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
++    void *arg1, void *arg2, int blocks_modified)
++{
++	dsl_sync_task_t *dst;
++
++	if (checkfunc == NULL)
++		checkfunc = dsl_null_checkfunc;
++	dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP);
++	dst->dst_checkfunc = checkfunc;
++	dst->dst_syncfunc = syncfunc;
++	dst->dst_arg1 = arg1;
++	dst->dst_arg2 = arg2;
++	list_insert_tail(&dstg->dstg_tasks, dst);
++
++	dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT;
++}
++
++int
++dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg)
++{
++	dmu_tx_t *tx;
++	uint64_t txg;
++	dsl_sync_task_t *dst;
++
++top:
++	tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir);
++	VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
++
++	txg = dmu_tx_get_txg(tx);
++
++	/* Do a preliminary error check. */
++	dstg->dstg_err = 0;
++	rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER);
++	for (dst = list_head(&dstg->dstg_tasks); dst;
++	    dst = list_next(&dstg->dstg_tasks, dst)) {
++#ifdef ZFS_DEBUG
++		/*
++		 * Only check half the time, otherwise, the sync-context
++		 * check will almost never fail.
++		 */
++		if (spa_get_random(2) == 0)
++			continue;
++#endif
++		dst->dst_err =
++		    dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
++		if (dst->dst_err)
++			dstg->dstg_err = dst->dst_err;
++	}
++	rw_exit(&dstg->dstg_pool->dp_config_rwlock);
++
++	if (dstg->dstg_err) {
++		dmu_tx_commit(tx);
++		return (dstg->dstg_err);
++	}
++
++	/*
++	 * We don't generally have many sync tasks, so pay the price of
++	 * add_tail to get the tasks executed in the right order.
++	 */
++	VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
++	    dstg, txg));
++
++	dmu_tx_commit(tx);
++
++	txg_wait_synced(dstg->dstg_pool, txg);
++
++	if (dstg->dstg_err == EAGAIN) {
++		txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE);
++		goto top;
++	}
++
++	return (dstg->dstg_err);
++}
++
++void
++dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
++{
++	uint64_t txg;
++
++	dstg->dstg_nowaiter = B_TRUE;
++	txg = dmu_tx_get_txg(tx);
++	/*
++	 * We don't generally have many sync tasks, so pay the price of
++	 * add_tail to get the tasks executed in the right order.
++	 */
++	VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
++	    dstg, txg));
++}
++
++void
++dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg)
++{
++	dsl_sync_task_t *dst;
++
++	while ((dst = list_head(&dstg->dstg_tasks))) {
++		list_remove(&dstg->dstg_tasks, dst);
++		kmem_free(dst, sizeof (dsl_sync_task_t));
++	}
++	kmem_free(dstg, sizeof (dsl_sync_task_group_t));
++}
++
++void
++dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
++{
++	dsl_sync_task_t *dst;
++	dsl_pool_t *dp = dstg->dstg_pool;
++	uint64_t quota, used;
++
++	ASSERT3U(dstg->dstg_err, ==, 0);
++
++	/*
++	 * Check for sufficient space.  We just check against what's
++	 * on-disk; we don't want any in-flight accounting to get in our
++	 * way, because open context may have already used up various
++	 * in-core limits (arc_tempreserve, dsl_pool_tempreserve).
++	 */
++	quota = dsl_pool_adjustedsize(dp, B_FALSE) -
++	    metaslab_class_get_deferred(spa_normal_class(dp->dp_spa));
++	used = dp->dp_root_dir->dd_phys->dd_used_bytes;
++	/* MOS space is triple-dittoed, so we multiply by 3. */
++	if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) {
++		dstg->dstg_err = ENOSPC;
++		return;
++	}
++
++	/*
++	 * Check for errors by calling checkfuncs.
++	 */
++	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
++	for (dst = list_head(&dstg->dstg_tasks); dst;
++	    dst = list_next(&dstg->dstg_tasks, dst)) {
++		dst->dst_err =
++		    dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
++		if (dst->dst_err)
++			dstg->dstg_err = dst->dst_err;
++	}
++
++	if (dstg->dstg_err == 0) {
++		/*
++		 * Execute sync tasks.
++		 */
++		for (dst = list_head(&dstg->dstg_tasks); dst;
++		    dst = list_next(&dstg->dstg_tasks, dst)) {
++			dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx);
++		}
++	}
++	rw_exit(&dp->dp_config_rwlock);
++
++	if (dstg->dstg_nowaiter)
++		dsl_sync_task_group_destroy(dstg);
++}
++
++int
++dsl_sync_task_do(dsl_pool_t *dp,
++    dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
++    void *arg1, void *arg2, int blocks_modified)
++{
++	dsl_sync_task_group_t *dstg;
++	int err;
++
++	ASSERT(spa_writeable(dp->dp_spa));
++
++	dstg = dsl_sync_task_group_create(dp);
++	dsl_sync_task_create(dstg, checkfunc, syncfunc,
++	    arg1, arg2, blocks_modified);
++	err = dsl_sync_task_group_wait(dstg);
++	dsl_sync_task_group_destroy(dstg);
++	return (err);
++}
++
++void
++dsl_sync_task_do_nowait(dsl_pool_t *dp,
++    dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
++    void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx)
++{
++	dsl_sync_task_group_t *dstg;
++
++	if (!spa_writeable(dp->dp_spa))
++		return;
++
++	dstg = dsl_sync_task_group_create(dp);
++	dsl_sync_task_create(dstg, checkfunc, syncfunc,
++	    arg1, arg2, blocks_modified);
++	dsl_sync_task_group_nowait(dstg, tx);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(dsl_sync_task_do);
++EXPORT_SYMBOL(dsl_sync_task_do_nowait);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/fm.c linux-3.2.33-go/fs/zfs/zfs/fm.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/fm.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/fm.c	2012-11-16 23:25:34.353039289 +0100
+@@ -0,0 +1,1556 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/*
++ * Fault Management Architecture (FMA) Resource and Protocol Support
++ *
++ * The routines contained herein provide services to support kernel subsystems
++ * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
++ *
++ * Name-Value Pair Lists
++ *
++ * The embodiment of an FMA protocol element (event, fmri or authority) is a
++ * name-value pair list (nvlist_t).  FMA-specific nvlist construtor and
++ * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
++ * to create an nvpair list using custom allocators.  Callers may choose to
++ * allocate either from the kernel memory allocator, or from a preallocated
++ * buffer, useful in constrained contexts like high-level interrupt routines.
++ *
++ * Protocol Event and FMRI Construction
++ *
++ * Convenience routines are provided to construct nvlist events according to
++ * the FMA Event Protocol and Naming Schema specification for ereports and
++ * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
++ *
++ * ENA Manipulation
++ *
++ * Routines to generate ENA formats 0, 1 and 2 are available as well as
++ * routines to increment formats 1 and 2.  Individual fields within the
++ * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
++ * fm_ena_format_get() and fm_ena_gen_get().
++ */
++
++#include <sys/types.h>
++#include <sys/time.h>
++#include <sys/list.h>
++#include <sys/nvpair.h>
++#include <sys/cmn_err.h>
++#include <sys/sysmacros.h>
++#include <sys/compress.h>
++#include <sys/sunddi.h>
++#include <sys/systeminfo.h>
++#include <sys/fm/util.h>
++#include <sys/fm/protocol.h>
++#include <sys/kstat.h>
++#include <sys/zfs_context.h>
++#ifdef _KERNEL
++#include <sys/atomic.h>
++#include <sys/condvar.h>
++#include <sys/cpuvar.h>
++#include <sys/systm.h>
++#include <sys/dumphdr.h>
++#include <sys/cpuvar.h>
++#include <sys/console.h>
++#include <sys/kobj.h>
++#include <sys/time.h>
++#include <sys/zfs_ioctl.h>
++
++int zfs_zevent_len_max = 0;
++int zfs_zevent_cols = 80;
++int zfs_zevent_console = 0;
++
++static int zevent_len_cur = 0;
++static int zevent_waiters = 0;
++static int zevent_flags = 0;
++
++static kmutex_t zevent_lock;
++static list_t zevent_list;
++static kcondvar_t zevent_cv;
++#endif /* _KERNEL */
++
++extern void fastreboot_disable_highpil(void);
++
++/*
++ * Common fault management kstats to record event generation failures
++ */
++
++struct erpt_kstat {
++	kstat_named_t	erpt_dropped;		/* num erpts dropped on post */
++	kstat_named_t	erpt_set_failed;	/* num erpt set failures */
++	kstat_named_t	fmri_set_failed;	/* num fmri set failures */
++	kstat_named_t	payload_set_failed;	/* num payload set failures */
++};
++
++static struct erpt_kstat erpt_kstat_data = {
++	{ "erpt-dropped", KSTAT_DATA_UINT64 },
++	{ "erpt-set-failed", KSTAT_DATA_UINT64 },
++	{ "fmri-set-failed", KSTAT_DATA_UINT64 },
++	{ "payload-set-failed", KSTAT_DATA_UINT64 }
++};
++
++kstat_t *fm_ksp;
++
++#ifdef _KERNEL
++
++/*
++ * Formatting utility function for fm_nvprintr.  We attempt to wrap chunks of
++ * output so they aren't split across console lines, and return the end column.
++ */
++/*PRINTFLIKE4*/
++static int
++fm_printf(int depth, int c, int cols, const char *format, ...)
++{
++	va_list ap;
++	int width;
++	char c1;
++
++	va_start(ap, format);
++	width = vsnprintf(&c1, sizeof (c1), format, ap);
++	va_end(ap);
++
++	if (c + width >= cols) {
++		console_printf("\n");
++		c = 0;
++		if (format[0] != ' ' && depth > 0) {
++			console_printf(" ");
++			c++;
++		}
++	}
++
++	va_start(ap, format);
++	console_vprintf(format, ap);
++	va_end(ap);
++
++	return ((c + width) % cols);
++}
++
++/*
++ * Recursively print a nvlist in the specified column width and return the
++ * column we end up in.  This function is called recursively by fm_nvprint(),
++ * below.  We generically format the entire nvpair using hexadecimal
++ * integers and strings, and elide any integer arrays.  Arrays are basically
++ * used for cache dumps right now, so we suppress them so as not to overwhelm
++ * the amount of console output we produce at panic time.  This can be further
++ * enhanced as FMA technology grows based upon the needs of consumers.  All
++ * FMA telemetry is logged using the dump device transport, so the console
++ * output serves only as a fallback in case this procedure is unsuccessful.
++ */
++static int
++fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
++{
++	nvpair_t *nvp;
++
++	for (nvp = nvlist_next_nvpair(nvl, NULL);
++	    nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
++
++		data_type_t type = nvpair_type(nvp);
++		const char *name = nvpair_name(nvp);
++
++		boolean_t b;
++		uint8_t i8;
++		uint16_t i16;
++		uint32_t i32;
++		uint64_t i64;
++		char *str;
++		nvlist_t *cnv;
++
++		if (strcmp(name, FM_CLASS) == 0)
++			continue; /* already printed by caller */
++
++		c = fm_printf(d, c, cols, " %s=", name);
++
++		switch (type) {
++		case DATA_TYPE_BOOLEAN:
++			c = fm_printf(d + 1, c, cols, " 1");
++			break;
++
++		case DATA_TYPE_BOOLEAN_VALUE:
++			(void) nvpair_value_boolean_value(nvp, &b);
++			c = fm_printf(d + 1, c, cols, b ? "1" : "0");
++			break;
++
++		case DATA_TYPE_BYTE:
++			(void) nvpair_value_byte(nvp, &i8);
++			c = fm_printf(d + 1, c, cols, "0x%x", i8);
++			break;
++
++		case DATA_TYPE_INT8:
++			(void) nvpair_value_int8(nvp, (void *)&i8);
++			c = fm_printf(d + 1, c, cols, "0x%x", i8);
++			break;
++
++		case DATA_TYPE_UINT8:
++			(void) nvpair_value_uint8(nvp, &i8);
++			c = fm_printf(d + 1, c, cols, "0x%x", i8);
++			break;
++
++		case DATA_TYPE_INT16:
++			(void) nvpair_value_int16(nvp, (void *)&i16);
++			c = fm_printf(d + 1, c, cols, "0x%x", i16);
++			break;
++
++		case DATA_TYPE_UINT16:
++			(void) nvpair_value_uint16(nvp, &i16);
++			c = fm_printf(d + 1, c, cols, "0x%x", i16);
++			break;
++
++		case DATA_TYPE_INT32:
++			(void) nvpair_value_int32(nvp, (void *)&i32);
++			c = fm_printf(d + 1, c, cols, "0x%x", i32);
++			break;
++
++		case DATA_TYPE_UINT32:
++			(void) nvpair_value_uint32(nvp, &i32);
++			c = fm_printf(d + 1, c, cols, "0x%x", i32);
++			break;
++
++		case DATA_TYPE_INT64:
++			(void) nvpair_value_int64(nvp, (void *)&i64);
++			c = fm_printf(d + 1, c, cols, "0x%llx",
++			    (u_longlong_t)i64);
++			break;
++
++		case DATA_TYPE_UINT64:
++			(void) nvpair_value_uint64(nvp, &i64);
++			c = fm_printf(d + 1, c, cols, "0x%llx",
++			    (u_longlong_t)i64);
++			break;
++
++		case DATA_TYPE_HRTIME:
++			(void) nvpair_value_hrtime(nvp, (void *)&i64);
++			c = fm_printf(d + 1, c, cols, "0x%llx",
++			    (u_longlong_t)i64);
++			break;
++
++		case DATA_TYPE_STRING:
++			(void) nvpair_value_string(nvp, &str);
++			c = fm_printf(d + 1, c, cols, "\"%s\"",
++			    str ? str : "<NULL>");
++			break;
++
++		case DATA_TYPE_NVLIST:
++			c = fm_printf(d + 1, c, cols, "[");
++			(void) nvpair_value_nvlist(nvp, &cnv);
++			c = fm_nvprintr(cnv, d + 1, c, cols);
++			c = fm_printf(d + 1, c, cols, " ]");
++			break;
++
++		case DATA_TYPE_NVLIST_ARRAY: {
++			nvlist_t **val;
++			uint_t i, nelem;
++
++			c = fm_printf(d + 1, c, cols, "[");
++			(void) nvpair_value_nvlist_array(nvp, &val, &nelem);
++			for (i = 0; i < nelem; i++) {
++				c = fm_nvprintr(val[i], d + 1, c, cols);
++			}
++			c = fm_printf(d + 1, c, cols, " ]");
++			}
++			break;
++
++		case DATA_TYPE_INT8_ARRAY: {
++			int8_t *val;
++			uint_t i, nelem;
++
++			c = fm_printf(d + 1, c, cols, "[ ");
++			(void) nvpair_value_int8_array(nvp, &val, &nelem);
++			for (i = 0; i < nelem; i++)
++			        c = fm_printf(d + 1, c, cols, "0x%llx ",
++					      (u_longlong_t)val[i]);
++
++			c = fm_printf(d + 1, c, cols, "]");
++			break;
++			}
++
++		case DATA_TYPE_UINT8_ARRAY: {
++			uint8_t *val;
++			uint_t i, nelem;
++
++			c = fm_printf(d + 1, c, cols, "[ ");
++			(void) nvpair_value_uint8_array(nvp, &val, &nelem);
++			for (i = 0; i < nelem; i++)
++			        c = fm_printf(d + 1, c, cols, "0x%llx ",
++					      (u_longlong_t)val[i]);
++
++			c = fm_printf(d + 1, c, cols, "]");
++			break;
++			}
++
++		case DATA_TYPE_INT16_ARRAY: {
++			int16_t *val;
++			uint_t i, nelem;
++
++			c = fm_printf(d + 1, c, cols, "[ ");
++			(void) nvpair_value_int16_array(nvp, &val, &nelem);
++			for (i = 0; i < nelem; i++)
++			        c = fm_printf(d + 1, c, cols, "0x%llx ",
++					      (u_longlong_t)val[i]);
++
++			c = fm_printf(d + 1, c, cols, "]");
++			break;
++			}
++
++		case DATA_TYPE_UINT16_ARRAY: {
++			uint16_t *val;
++			uint_t i, nelem;
++
++			c = fm_printf(d + 1, c, cols, "[ ");
++			(void) nvpair_value_uint16_array(nvp, &val, &nelem);
++			for (i = 0; i < nelem; i++)
++			        c = fm_printf(d + 1, c, cols, "0x%llx ",
++					      (u_longlong_t)val[i]);
++
++			c = fm_printf(d + 1, c, cols, "]");
++			break;
++			}
++
++		case DATA_TYPE_INT32_ARRAY: {
++			int32_t *val;
++			uint_t i, nelem;
++
++			c = fm_printf(d + 1, c, cols, "[ ");
++			(void) nvpair_value_int32_array(nvp, &val, &nelem);
++			for (i = 0; i < nelem; i++)
++			        c = fm_printf(d + 1, c, cols, "0x%llx ",
++					      (u_longlong_t)val[i]);
++
++			c = fm_printf(d + 1, c, cols, "]");
++			break;
++			}
++
++		case DATA_TYPE_UINT32_ARRAY: {
++			uint32_t *val;
++			uint_t i, nelem;
++
++			c = fm_printf(d + 1, c, cols, "[ ");
++			(void) nvpair_value_uint32_array(nvp, &val, &nelem);
++			for (i = 0; i < nelem; i++)
++			        c = fm_printf(d + 1, c, cols, "0x%llx ",
++					      (u_longlong_t)val[i]);
++
++			c = fm_printf(d + 1, c, cols, "]");
++			break;
++			}
++
++		case DATA_TYPE_INT64_ARRAY: {
++			int64_t *val;
++			uint_t i, nelem;
++
++			c = fm_printf(d + 1, c, cols, "[ ");
++			(void) nvpair_value_int64_array(nvp, &val, &nelem);
++			for (i = 0; i < nelem; i++)
++			        c = fm_printf(d + 1, c, cols, "0x%llx ",
++					      (u_longlong_t)val[i]);
++
++			c = fm_printf(d + 1, c, cols, "]");
++			break;
++			}
++
++		case DATA_TYPE_UINT64_ARRAY: {
++			uint64_t *val;
++			uint_t i, nelem;
++
++			c = fm_printf(d + 1, c, cols, "[ ");
++			(void) nvpair_value_uint64_array(nvp, &val, &nelem);
++			for (i = 0; i < nelem; i++)
++			        c = fm_printf(d + 1, c, cols, "0x%llx ",
++					      (u_longlong_t)val[i]);
++
++			c = fm_printf(d + 1, c, cols, "]");
++			break;
++			}
++
++		case DATA_TYPE_STRING_ARRAY:
++		case DATA_TYPE_BOOLEAN_ARRAY:
++		case DATA_TYPE_BYTE_ARRAY:
++			c = fm_printf(d + 1, c, cols, "[...]");
++			break;
++
++		case DATA_TYPE_UNKNOWN:
++			c = fm_printf(d + 1, c, cols, "<unknown>");
++			break;
++		}
++	}
++
++	return (c);
++}
++
++void
++fm_nvprint(nvlist_t *nvl)
++{
++	char *class;
++	int c = 0;
++
++	console_printf("\n");
++
++	if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
++		c = fm_printf(0, c, zfs_zevent_cols, "%s", class);
++
++	if (fm_nvprintr(nvl, 0, c, zfs_zevent_cols) != 0)
++		console_printf("\n");
++
++	console_printf("\n");
++}
++
++static zevent_t *
++zfs_zevent_alloc(void)
++{
++	zevent_t *ev;
++
++	ev = kmem_zalloc(sizeof(zevent_t), KM_PUSHPAGE);
++	if (ev == NULL)
++		return NULL;
++
++	list_create(&ev->ev_ze_list, sizeof(zfs_zevent_t),
++		    offsetof(zfs_zevent_t, ze_node));
++	list_link_init(&ev->ev_node);
++
++	return ev;
++}
++
++static void
++zfs_zevent_free(zevent_t *ev)
++{
++	/* Run provided cleanup callback */
++	ev->ev_cb(ev->ev_nvl, ev->ev_detector);
++
++	list_destroy(&ev->ev_ze_list);
++	kmem_free(ev, sizeof(zevent_t));
++}
++
++static void
++zfs_zevent_drain(zevent_t *ev)
++{
++	zfs_zevent_t *ze;
++
++	ASSERT(MUTEX_HELD(&zevent_lock));
++	list_remove(&zevent_list, ev);
++
++	/* Remove references to this event in all private file data */
++	while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
++		list_remove(&ev->ev_ze_list, ze);
++		ze->ze_zevent = NULL;
++		ze->ze_dropped++;
++	}
++
++	zfs_zevent_free(ev);
++}
++
++void
++zfs_zevent_drain_all(int *count)
++{
++	zevent_t *ev;
++
++	mutex_enter(&zevent_lock);
++	while ((ev = list_head(&zevent_list)) != NULL)
++		zfs_zevent_drain(ev);
++
++	*count = zevent_len_cur;
++	zevent_len_cur = 0;
++	mutex_exit(&zevent_lock);
++}
++
++/*
++ * New zevents are inserted at the head.  If the maximum queue
++ * length is exceeded a zevent will be drained from the tail.
++ * As part of this any user space processes which currently have
++ * a reference to this zevent_t in their private data will have
++ * this reference set to NULL.
++ */
++static void
++zfs_zevent_insert(zevent_t *ev)
++{
++	ASSERT(MUTEX_HELD(&zevent_lock));
++	list_insert_head(&zevent_list, ev);
++
++	if (zevent_len_cur >= zfs_zevent_len_max)
++		zfs_zevent_drain(list_tail(&zevent_list));
++	else
++		zevent_len_cur++;
++}
++
++/*
++ * Post a zevent
++ */
++void
++zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
++{
++	int64_t tv_array[2];
++	timestruc_t tv;
++	size_t nvl_size = 0;
++	zevent_t *ev;
++
++	gethrestime(&tv);
++	tv_array[0] = tv.tv_sec;
++	tv_array[1] = tv.tv_nsec;
++	if (nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2)) {
++		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
++		return;
++	}
++
++	(void) nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
++	if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
++		atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
++		return;
++	}
++
++	if (zfs_zevent_console)
++		fm_nvprint(nvl);
++
++	ev = zfs_zevent_alloc();
++	if (ev == NULL) {
++		atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
++		return;
++	}
++
++        ev->ev_nvl = nvl;
++	ev->ev_detector = detector;
++	ev->ev_cb = cb;
++
++	mutex_enter(&zevent_lock);
++	zfs_zevent_insert(ev);
++	cv_broadcast(&zevent_cv);
++	mutex_exit(&zevent_lock);
++}
++
++static int
++zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
++{
++	*ze = zfsdev_get_state(minor, ZST_ZEVENT);
++	if (*ze == NULL)
++		return (EBADF);
++
++	return (0);
++}
++
++int
++zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
++{
++	file_t *fp;
++	int error;
++
++        fp = getf(fd);
++        if (fp == NULL)
++                return (EBADF);
++
++        *minorp = zfsdev_getminor(fp->f_file);
++        error = zfs_zevent_minor_to_state(*minorp, ze);
++
++	if (error)
++		zfs_zevent_fd_rele(fd);
++
++	return (error);
++}
++
++void
++zfs_zevent_fd_rele(int fd)
++{
++	releasef(fd);
++}
++
++/*
++ * Get the next zevent in the stream and place a copy in 'event'.  This
++ * may fail with ENOMEM if the encoded nvlist size exceeds the passed
++ * 'event_size'.  In this case the stream pointer is not advanced and
++ * and 'event_size' is set to the minimum required buffer size.
++ */
++int
++zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
++                uint64_t *dropped)
++{
++	zevent_t *ev;
++	size_t size;
++	int error = 0;
++
++	mutex_enter(&zevent_lock);
++	if (ze->ze_zevent == NULL) {
++		/* New stream start at the beginning/tail */
++		ev = list_tail(&zevent_list);
++		if (ev == NULL) {
++			error = ENOENT;
++			goto out;
++		}
++	} else {
++		/* Existing stream continue with the next element and remove
++		 * ourselves from the wait queue for the previous element */
++		ev = list_prev(&zevent_list, ze->ze_zevent);
++		if (ev == NULL) {
++			error = ENOENT;
++			goto out;
++		}
++	}
++
++	VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
++	if (size > *event_size) {
++		*event_size = size;
++		error = ENOMEM;
++		goto out;
++	}
++
++	if (ze->ze_zevent)
++		list_remove(&ze->ze_zevent->ev_ze_list, ze);
++
++	ze->ze_zevent = ev;
++	list_insert_head(&ev->ev_ze_list, ze);
++	nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
++	*dropped = ze->ze_dropped;
++	ze->ze_dropped = 0;
++out:
++	mutex_exit(&zevent_lock);
++
++	return error;
++}
++
++int
++zfs_zevent_wait(zfs_zevent_t *ze)
++{
++	int error = 0;
++
++	mutex_enter(&zevent_lock);
++
++	if (zevent_flags & ZEVENT_SHUTDOWN) {
++		error = ESHUTDOWN;
++		goto out;
++	}
++
++	zevent_waiters++;
++	cv_wait_interruptible(&zevent_cv, &zevent_lock);
++	if (issig(JUSTLOOKING))
++		error = EINTR;
++
++	zevent_waiters--;
++out:
++	mutex_exit(&zevent_lock);
++
++	return error;
++}
++
++void
++zfs_zevent_init(zfs_zevent_t **zep)
++{
++	zfs_zevent_t *ze;
++
++	ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
++	list_link_init(&ze->ze_node);
++}
++
++void
++zfs_zevent_destroy(zfs_zevent_t *ze)
++{
++	mutex_enter(&zevent_lock);
++	if (ze->ze_zevent)
++		list_remove(&ze->ze_zevent->ev_ze_list, ze);
++	mutex_exit(&zevent_lock);
++
++	kmem_free(ze, sizeof (zfs_zevent_t));
++}
++#endif /* _KERNEL */
++
++/*
++ * Wrapppers for FM nvlist allocators
++ */
++/* ARGSUSED */
++static void *
++i_fm_alloc(nv_alloc_t *nva, size_t size)
++{
++	return (kmem_zalloc(size, KM_PUSHPAGE));
++}
++
++/* ARGSUSED */
++static void
++i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
++{
++	kmem_free(buf, size);
++}
++
++const nv_alloc_ops_t fm_mem_alloc_ops = {
++	NULL,
++	NULL,
++	i_fm_alloc,
++	i_fm_free,
++	NULL
++};
++
++/*
++ * Create and initialize a new nv_alloc_t for a fixed buffer, buf.  A pointer
++ * to the newly allocated nv_alloc_t structure is returned upon success or NULL
++ * is returned to indicate that the nv_alloc structure could not be created.
++ */
++nv_alloc_t *
++fm_nva_xcreate(char *buf, size_t bufsz)
++{
++	nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
++
++	if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
++		kmem_free(nvhdl, sizeof (nv_alloc_t));
++		return (NULL);
++	}
++
++	return (nvhdl);
++}
++
++/*
++ * Destroy a previously allocated nv_alloc structure.  The fixed buffer
++ * associated with nva must be freed by the caller.
++ */
++void
++fm_nva_xdestroy(nv_alloc_t *nva)
++{
++	nv_alloc_fini(nva);
++	kmem_free(nva, sizeof (nv_alloc_t));
++}
++
++/*
++ * Create a new nv list.  A pointer to a new nv list structure is returned
++ * upon success or NULL is returned to indicate that the structure could
++ * not be created.  The newly created nv list is created and managed by the
++ * operations installed in nva.   If nva is NULL, the default FMA nva
++ * operations are installed and used.
++ *
++ * When called from the kernel and nva == NULL, this function must be called
++ * from passive kernel context with no locks held that can prevent a
++ * sleeping memory allocation from occurring.  Otherwise, this function may
++ * be called from other kernel contexts as long a valid nva created via
++ * fm_nva_create() is supplied.
++ */
++nvlist_t *
++fm_nvlist_create(nv_alloc_t *nva)
++{
++	int hdl_alloced = 0;
++	nvlist_t *nvl;
++	nv_alloc_t *nvhdl;
++
++	if (nva == NULL) {
++		nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_PUSHPAGE);
++
++		if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
++			kmem_free(nvhdl, sizeof (nv_alloc_t));
++			return (NULL);
++		}
++		hdl_alloced = 1;
++	} else {
++		nvhdl = nva;
++	}
++
++	if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
++		if (hdl_alloced) {
++			nv_alloc_fini(nvhdl);
++			kmem_free(nvhdl, sizeof (nv_alloc_t));
++		}
++		return (NULL);
++	}
++
++	return (nvl);
++}
++
++/*
++ * Destroy a previously allocated nvlist structure.  flag indicates whether
++ * or not the associated nva structure should be freed (FM_NVA_FREE) or
++ * retained (FM_NVA_RETAIN).  Retaining the nv alloc structure allows
++ * it to be re-used for future nvlist creation operations.
++ */
++void
++fm_nvlist_destroy(nvlist_t *nvl, int flag)
++{
++	nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
++
++	nvlist_free(nvl);
++
++	if (nva != NULL) {
++		if (flag == FM_NVA_FREE)
++			fm_nva_xdestroy(nva);
++	}
++}
++
++int
++i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
++{
++	int nelem, ret = 0;
++	data_type_t type;
++
++	while (ret == 0 && name != NULL) {
++		type = va_arg(ap, data_type_t);
++		switch (type) {
++		case DATA_TYPE_BYTE:
++			ret = nvlist_add_byte(payload, name,
++			    va_arg(ap, uint_t));
++			break;
++		case DATA_TYPE_BYTE_ARRAY:
++			nelem = va_arg(ap, int);
++			ret = nvlist_add_byte_array(payload, name,
++			    va_arg(ap, uchar_t *), nelem);
++			break;
++		case DATA_TYPE_BOOLEAN_VALUE:
++			ret = nvlist_add_boolean_value(payload, name,
++			    va_arg(ap, boolean_t));
++			break;
++		case DATA_TYPE_BOOLEAN_ARRAY:
++			nelem = va_arg(ap, int);
++			ret = nvlist_add_boolean_array(payload, name,
++			    va_arg(ap, boolean_t *), nelem);
++			break;
++		case DATA_TYPE_INT8:
++			ret = nvlist_add_int8(payload, name,
++			    va_arg(ap, int));
++			break;
++		case DATA_TYPE_INT8_ARRAY:
++			nelem = va_arg(ap, int);
++			ret = nvlist_add_int8_array(payload, name,
++			    va_arg(ap, int8_t *), nelem);
++			break;
++		case DATA_TYPE_UINT8:
++			ret = nvlist_add_uint8(payload, name,
++			    va_arg(ap, uint_t));
++			break;
++		case DATA_TYPE_UINT8_ARRAY:
++			nelem = va_arg(ap, int);
++			ret = nvlist_add_uint8_array(payload, name,
++			    va_arg(ap, uint8_t *), nelem);
++			break;
++		case DATA_TYPE_INT16:
++			ret = nvlist_add_int16(payload, name,
++			    va_arg(ap, int));
++			break;
++		case DATA_TYPE_INT16_ARRAY:
++			nelem = va_arg(ap, int);
++			ret = nvlist_add_int16_array(payload, name,
++			    va_arg(ap, int16_t *), nelem);
++			break;
++		case DATA_TYPE_UINT16:
++			ret = nvlist_add_uint16(payload, name,
++			    va_arg(ap, uint_t));
++			break;
++		case DATA_TYPE_UINT16_ARRAY:
++			nelem = va_arg(ap, int);
++			ret = nvlist_add_uint16_array(payload, name,
++			    va_arg(ap, uint16_t *), nelem);
++			break;
++		case DATA_TYPE_INT32:
++			ret = nvlist_add_int32(payload, name,
++			    va_arg(ap, int32_t));
++			break;
++		case DATA_TYPE_INT32_ARRAY:
++			nelem = va_arg(ap, int);
++			ret = nvlist_add_int32_array(payload, name,
++			    va_arg(ap, int32_t *), nelem);
++			break;
++		case DATA_TYPE_UINT32:
++			ret = nvlist_add_uint32(payload, name,
++			    va_arg(ap, uint32_t));
++			break;
++		case DATA_TYPE_UINT32_ARRAY:
++			nelem = va_arg(ap, int);
++			ret = nvlist_add_uint32_array(payload, name,
++			    va_arg(ap, uint32_t *), nelem);
++			break;
++		case DATA_TYPE_INT64:
++			ret = nvlist_add_int64(payload, name,
++			    va_arg(ap, int64_t));
++			break;
++		case DATA_TYPE_INT64_ARRAY:
++			nelem = va_arg(ap, int);
++			ret = nvlist_add_int64_array(payload, name,
++			    va_arg(ap, int64_t *), nelem);
++			break;
++		case DATA_TYPE_UINT64:
++			ret = nvlist_add_uint64(payload, name,
++			    va_arg(ap, uint64_t));
++			break;
++		case DATA_TYPE_UINT64_ARRAY:
++			nelem = va_arg(ap, int);
++			ret = nvlist_add_uint64_array(payload, name,
++			    va_arg(ap, uint64_t *), nelem);
++			break;
++		case DATA_TYPE_STRING:
++			ret = nvlist_add_string(payload, name,
++			    va_arg(ap, char *));
++			break;
++		case DATA_TYPE_STRING_ARRAY:
++			nelem = va_arg(ap, int);
++			ret = nvlist_add_string_array(payload, name,
++			    va_arg(ap, char **), nelem);
++			break;
++		case DATA_TYPE_NVLIST:
++			ret = nvlist_add_nvlist(payload, name,
++			    va_arg(ap, nvlist_t *));
++			break;
++		case DATA_TYPE_NVLIST_ARRAY:
++			nelem = va_arg(ap, int);
++			ret = nvlist_add_nvlist_array(payload, name,
++			    va_arg(ap, nvlist_t **), nelem);
++			break;
++		default:
++			ret = EINVAL;
++		}
++
++		name = va_arg(ap, char *);
++	}
++	return (ret);
++}
++
++void
++fm_payload_set(nvlist_t *payload, ...)
++{
++	int ret;
++	const char *name;
++	va_list ap;
++
++	va_start(ap, payload);
++	name = va_arg(ap, char *);
++	ret = i_fm_payload_set(payload, name, ap);
++	va_end(ap);
++
++	if (ret)
++		atomic_add_64(
++		    &erpt_kstat_data.payload_set_failed.value.ui64, 1);
++}
++
++/*
++ * Set-up and validate the members of an ereport event according to:
++ *
++ *	Member name		Type		Value
++ *	====================================================
++ *	class			string		ereport
++ *	version			uint8_t		0
++ *	ena			uint64_t	<ena>
++ *	detector		nvlist_t	<detector>
++ *	ereport-payload		nvlist_t	<var args>
++ *
++ * We don't actually add a 'version' member to the payload.  Really,
++ * the version quoted to us by our caller is that of the category 1
++ * "ereport" event class (and we require FM_EREPORT_VERS0) but
++ * the payload version of the actual leaf class event under construction
++ * may be something else.  Callers should supply a version in the varargs,
++ * or (better) we could take two version arguments - one for the
++ * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
++ * for the leaf class.
++ */
++void
++fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
++    uint64_t ena, const nvlist_t *detector, ...)
++{
++	char ereport_class[FM_MAX_CLASS];
++	const char *name;
++	va_list ap;
++	int ret;
++
++	if (version != FM_EREPORT_VERS0) {
++		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
++		return;
++	}
++
++	(void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
++	    FM_EREPORT_CLASS, erpt_class);
++	if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
++		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
++		return;
++	}
++
++	if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
++		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
++	}
++
++	if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
++	    (nvlist_t *)detector) != 0) {
++		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
++	}
++
++	va_start(ap, detector);
++	name = va_arg(ap, const char *);
++	ret = i_fm_payload_set(ereport, name, ap);
++	va_end(ap);
++
++	if (ret)
++		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
++}
++
++/*
++ * Set-up and validate the members of an hc fmri according to;
++ *
++ *	Member name		Type		Value
++ *	===================================================
++ *	version			uint8_t		0
++ *	auth			nvlist_t	<auth>
++ *	hc-name			string		<name>
++ *	hc-id			string		<id>
++ *
++ * Note that auth and hc-id are optional members.
++ */
++
++#define	HC_MAXPAIRS	20
++#define	HC_MAXNAMELEN	50
++
++static int
++fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
++{
++	if (version != FM_HC_SCHEME_VERSION) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		return (0);
++	}
++
++	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
++	    nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		return (0);
++	}
++
++	if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
++	    (nvlist_t *)auth) != 0) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		return (0);
++	}
++
++	return (1);
++}
++
++void
++fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
++    nvlist_t *snvl, int npairs, ...)
++{
++	nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
++	nvlist_t *pairs[HC_MAXPAIRS];
++	va_list ap;
++	int i;
++
++	if (!fm_fmri_hc_set_common(fmri, version, auth))
++		return;
++
++	npairs = MIN(npairs, HC_MAXPAIRS);
++
++	va_start(ap, npairs);
++	for (i = 0; i < npairs; i++) {
++		const char *name = va_arg(ap, const char *);
++		uint32_t id = va_arg(ap, uint32_t);
++		char idstr[11];
++
++		(void) snprintf(idstr, sizeof (idstr), "%u", id);
++
++		pairs[i] = fm_nvlist_create(nva);
++		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
++		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
++			atomic_add_64(
++			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		}
++	}
++	va_end(ap);
++
++	if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0)
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++
++	for (i = 0; i < npairs; i++)
++		fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
++
++	if (snvl != NULL) {
++		if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
++			atomic_add_64(
++			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		}
++	}
++}
++
++void
++fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
++    nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
++{
++	nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
++	nvlist_t *pairs[HC_MAXPAIRS];
++	nvlist_t **hcl;
++	uint_t n;
++	int i, j;
++	va_list ap;
++	char *hcname, *hcid;
++
++	if (!fm_fmri_hc_set_common(fmri, version, auth))
++		return;
++
++	/*
++	 * copy the bboard nvpairs to the pairs array
++	 */
++	if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
++	    != 0) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		return;
++	}
++
++	for (i = 0; i < n; i++) {
++		if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
++		    &hcname) != 0) {
++			atomic_add_64(
++			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++			return;
++		}
++		if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
++			atomic_add_64(
++			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++			return;
++		}
++
++		pairs[i] = fm_nvlist_create(nva);
++		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
++		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
++			for (j = 0; j <= i; j++) {
++				if (pairs[j] != NULL)
++					fm_nvlist_destroy(pairs[j],
++					    FM_NVA_RETAIN);
++			}
++			atomic_add_64(
++			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++			return;
++		}
++	}
++
++	/*
++	 * create the pairs from passed in pairs
++	 */
++	npairs = MIN(npairs, HC_MAXPAIRS);
++
++	va_start(ap, npairs);
++	for (i = n; i < npairs + n; i++) {
++		const char *name = va_arg(ap, const char *);
++		uint32_t id = va_arg(ap, uint32_t);
++		char idstr[11];
++		(void) snprintf(idstr, sizeof (idstr), "%u", id);
++		pairs[i] = fm_nvlist_create(nva);
++		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
++		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
++			for (j = 0; j <= i; j++) {
++				if (pairs[j] != NULL)
++					fm_nvlist_destroy(pairs[j],
++					    FM_NVA_RETAIN);
++			}
++			atomic_add_64(
++			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++			return;
++		}
++	}
++	va_end(ap);
++
++	/*
++	 * Create the fmri hc list
++	 */
++	if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
++	    npairs + n) != 0) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		return;
++	}
++
++	for (i = 0; i < npairs + n; i++) {
++			fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
++	}
++
++	if (snvl != NULL) {
++		if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
++			atomic_add_64(
++			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++			return;
++		}
++	}
++}
++
++/*
++ * Set-up and validate the members of an dev fmri according to:
++ *
++ *	Member name		Type		Value
++ *	====================================================
++ *	version			uint8_t		0
++ *	auth			nvlist_t	<auth>
++ *	devpath			string		<devpath>
++ *	[devid]			string		<devid>
++ *	[target-port-l0id]	string		<target-port-lun0-id>
++ *
++ * Note that auth and devid are optional members.
++ */
++void
++fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
++    const char *devpath, const char *devid, const char *tpl0)
++{
++	int err = 0;
++
++	if (version != DEV_SCHEME_VERSION0) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		return;
++	}
++
++	err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
++	err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
++
++	if (auth != NULL) {
++		err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
++		    (nvlist_t *)auth);
++	}
++
++	err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
++
++	if (devid != NULL)
++		err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
++
++	if (tpl0 != NULL)
++		err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
++
++	if (err)
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++
++}
++
++/*
++ * Set-up and validate the members of an cpu fmri according to:
++ *
++ *	Member name		Type		Value
++ *	====================================================
++ *	version			uint8_t		0
++ *	auth			nvlist_t	<auth>
++ *	cpuid			uint32_t	<cpu_id>
++ *	cpumask			uint8_t		<cpu_mask>
++ *	serial			uint64_t	<serial_id>
++ *
++ * Note that auth, cpumask, serial are optional members.
++ *
++ */
++void
++fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
++    uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
++{
++	uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
++
++	if (version < CPU_SCHEME_VERSION1) {
++		atomic_add_64(failedp, 1);
++		return;
++	}
++
++	if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
++		atomic_add_64(failedp, 1);
++		return;
++	}
++
++	if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
++	    FM_FMRI_SCHEME_CPU) != 0) {
++		atomic_add_64(failedp, 1);
++		return;
++	}
++
++	if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
++	    (nvlist_t *)auth) != 0)
++		atomic_add_64(failedp, 1);
++
++	if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
++		atomic_add_64(failedp, 1);
++
++	if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
++	    *cpu_maskp) != 0)
++		atomic_add_64(failedp, 1);
++
++	if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
++	    FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
++			atomic_add_64(failedp, 1);
++}
++
++/*
++ * Set-up and validate the members of a mem according to:
++ *
++ *	Member name		Type		Value
++ *	====================================================
++ *	version			uint8_t		0
++ *	auth			nvlist_t	<auth>		[optional]
++ *	unum			string		<unum>
++ *	serial			string		<serial>	[optional*]
++ *	offset			uint64_t	<offset>	[optional]
++ *
++ *	* serial is required if offset is present
++ */
++void
++fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
++    const char *unum, const char *serial, uint64_t offset)
++{
++	if (version != MEM_SCHEME_VERSION0) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		return;
++	}
++
++	if (!serial && (offset != (uint64_t)-1)) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		return;
++	}
++
++	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		return;
++	}
++
++	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		return;
++	}
++
++	if (auth != NULL) {
++		if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
++		    (nvlist_t *)auth) != 0) {
++			atomic_add_64(
++			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		}
++	}
++
++	if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++	}
++
++	if (serial != NULL) {
++		if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
++		    (char **)&serial, 1) != 0) {
++			atomic_add_64(
++			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		}
++		if (offset != (uint64_t)-1) {
++			if (nvlist_add_uint64(fmri, FM_FMRI_MEM_OFFSET,
++			    offset) != 0) {
++				atomic_add_64(&erpt_kstat_data.
++				    fmri_set_failed.value.ui64, 1);
++			}
++		}
++	}
++}
++
++void
++fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
++    uint64_t vdev_guid)
++{
++	if (version != ZFS_SCHEME_VERSION0) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		return;
++	}
++
++	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		return;
++	}
++
++	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		return;
++	}
++
++	if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
++		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++	}
++
++	if (vdev_guid != 0) {
++		if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
++			atomic_add_64(
++			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
++		}
++	}
++}
++
++uint64_t
++fm_ena_increment(uint64_t ena)
++{
++	uint64_t new_ena;
++
++	switch (ENA_FORMAT(ena)) {
++	case FM_ENA_FMT1:
++		new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
++		break;
++	case FM_ENA_FMT2:
++		new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
++		break;
++	default:
++		new_ena = 0;
++	}
++
++	return (new_ena);
++}
++
++uint64_t
++fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
++{
++	uint64_t ena = 0;
++
++	switch (format) {
++	case FM_ENA_FMT1:
++		if (timestamp) {
++			ena = (uint64_t)((format & ENA_FORMAT_MASK) |
++			    ((cpuid << ENA_FMT1_CPUID_SHFT) &
++			    ENA_FMT1_CPUID_MASK) |
++			    ((timestamp << ENA_FMT1_TIME_SHFT) &
++			    ENA_FMT1_TIME_MASK));
++		} else {
++			ena = (uint64_t)((format & ENA_FORMAT_MASK) |
++			    ((cpuid << ENA_FMT1_CPUID_SHFT) &
++			    ENA_FMT1_CPUID_MASK) |
++			    ((gethrtime() << ENA_FMT1_TIME_SHFT) &
++			    ENA_FMT1_TIME_MASK));
++		}
++		break;
++	case FM_ENA_FMT2:
++		ena = (uint64_t)((format & ENA_FORMAT_MASK) |
++		    ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
++		break;
++	default:
++		break;
++	}
++
++	return (ena);
++}
++
++uint64_t
++fm_ena_generate(uint64_t timestamp, uchar_t format)
++{
++	uint64_t ena;
++
++	kpreempt_disable();
++	ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);
++	kpreempt_enable();
++
++	return (ena);
++}
++
++uint64_t
++fm_ena_generation_get(uint64_t ena)
++{
++	uint64_t gen;
++
++	switch (ENA_FORMAT(ena)) {
++	case FM_ENA_FMT1:
++		gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
++		break;
++	case FM_ENA_FMT2:
++		gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
++		break;
++	default:
++		gen = 0;
++		break;
++	}
++
++	return (gen);
++}
++
++uchar_t
++fm_ena_format_get(uint64_t ena)
++{
++
++	return (ENA_FORMAT(ena));
++}
++
++uint64_t
++fm_ena_id_get(uint64_t ena)
++{
++	uint64_t id;
++
++	switch (ENA_FORMAT(ena)) {
++	case FM_ENA_FMT1:
++		id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
++		break;
++	case FM_ENA_FMT2:
++		id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
++		break;
++	default:
++		id = 0;
++	}
++
++	return (id);
++}
++
++uint64_t
++fm_ena_time_get(uint64_t ena)
++{
++	uint64_t time;
++
++	switch (ENA_FORMAT(ena)) {
++	case FM_ENA_FMT1:
++		time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
++		break;
++	case FM_ENA_FMT2:
++		time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
++		break;
++	default:
++		time = 0;
++	}
++
++	return (time);
++}
++
++#ifdef _KERNEL
++void
++fm_init(void)
++{
++	zevent_len_cur = 0;
++	zevent_flags = 0;
++
++	if (zfs_zevent_len_max == 0)
++		zfs_zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
++
++	/* Initialize zevent allocation and generation kstats */
++	fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
++	    sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
++	    KSTAT_FLAG_VIRTUAL);
++
++	if (fm_ksp != NULL) {
++		fm_ksp->ks_data = &erpt_kstat_data;
++		kstat_install(fm_ksp);
++	} else {
++		cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
++	}
++
++	mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
++	list_create(&zevent_list, sizeof(zevent_t), offsetof(zevent_t, ev_node));
++	cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
++}
++
++void
++fm_fini(void)
++{
++	int count;
++
++	zfs_zevent_drain_all(&count);
++
++	mutex_enter(&zevent_lock);
++	cv_broadcast(&zevent_cv);
++
++	zevent_flags |= ZEVENT_SHUTDOWN;
++	while (zevent_waiters > 0) {
++		mutex_exit(&zevent_lock);
++		schedule();
++		mutex_enter(&zevent_lock);
++	}
++	mutex_exit(&zevent_lock);
++
++	cv_destroy(&zevent_cv);
++	list_destroy(&zevent_list);
++	mutex_destroy(&zevent_lock);
++
++	if (fm_ksp != NULL) {
++		kstat_delete(fm_ksp);
++		fm_ksp = NULL;
++	}
++}
++
++module_param(zfs_zevent_len_max, int, 0644);
++MODULE_PARM_DESC(zfs_zevent_len_max, "Max event queue length");
++
++module_param(zfs_zevent_cols, int, 0644);
++MODULE_PARM_DESC(zfs_zevent_cols, "Max event column width");
++
++module_param(zfs_zevent_console, int, 0644);
++MODULE_PARM_DESC(zfs_zevent_console, "Log events to the console");
++
++#endif /* _KERNEL */
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/gzip.c linux-3.2.33-go/fs/zfs/zfs/gzip.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/gzip.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/gzip.c	2012-11-16 23:25:34.353039289 +0100
+@@ -0,0 +1,82 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++
++
++#include <sys/debug.h>
++#include <sys/types.h>
++
++#ifdef _KERNEL
++
++#include <sys/systm.h>
++#include <sys/zmod.h>
++
++typedef size_t zlen_t;
++#define compress_func   z_compress_level
++#define uncompress_func z_uncompress
++
++#else /* _KERNEL */
++
++#include <strings.h>
++#include <zlib.h>
++
++typedef uLongf zlen_t;
++#define compress_func   compress2
++#define uncompress_func uncompress
++
++#endif
++
++size_t
++gzip_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
++{
++	zlen_t dstlen = d_len;
++
++	ASSERT(d_len <= s_len);
++
++	if (compress_func(d_start, &dstlen, s_start, s_len, n) != Z_OK) {
++		if (d_len != s_len)
++			return (s_len);
++
++		bcopy(s_start, d_start, s_len);
++		return (s_len);
++	}
++
++	return ((size_t) dstlen);
++}
++
++/*ARGSUSED*/
++int
++gzip_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
++{
++	zlen_t dstlen = d_len;
++
++	ASSERT(d_len >= s_len);
++
++	if (uncompress_func(d_start, &dstlen, s_start, s_len) != Z_OK)
++		return (-1);
++
++	return (0);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/lzjb.c linux-3.2.33-go/fs/zfs/zfs/lzjb.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/lzjb.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/lzjb.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,128 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/*
++ * We keep our own copy of this algorithm for 3 main reasons:
++ *	1. If we didn't, anyone modifying common/os/compress.c would
++ *         directly break our on disk format
++ *	2. Our version of lzjb does not have a number of checks that the
++ *         common/os version needs and uses
++ *	3. We initialize the lempel to ensure deterministic results,
++ *	   so that identical blocks can always be deduplicated.
++ * In particular, we are adding the "feature" that compress() can
++ * take a destination buffer size and returns the compressed length, or the
++ * source length if compression would overflow the destination buffer.
++ */
++
++#include <sys/zfs_context.h>
++
++#define	MATCH_BITS	6
++#define	MATCH_MIN	3
++#define	MATCH_MAX	((1 << MATCH_BITS) + (MATCH_MIN - 1))
++#define	OFFSET_MASK	((1 << (16 - MATCH_BITS)) - 1)
++#define	LEMPEL_SIZE	1024
++
++/*ARGSUSED*/
++size_t
++lzjb_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
++{
++	uchar_t *src = s_start;
++	uchar_t *dst = d_start;
++	uchar_t *cpy, *copymap = NULL;
++	int copymask = 1 << (NBBY - 1);
++	int mlen, offset, hash;
++	uint16_t *hp;
++	uint16_t *lempel;
++
++	lempel = kmem_zalloc(LEMPEL_SIZE * sizeof (uint16_t), KM_PUSHPAGE);
++	while (src < (uchar_t *)s_start + s_len) {
++		if ((copymask <<= 1) == (1 << NBBY)) {
++			if (dst >= (uchar_t *)d_start + d_len - 1 - 2 * NBBY) {
++				kmem_free(lempel, LEMPEL_SIZE*sizeof(uint16_t));
++				return (s_len);
++			}
++			copymask = 1;
++			copymap = dst;
++			*dst++ = 0;
++		}
++		if (src > (uchar_t *)s_start + s_len - MATCH_MAX) {
++			*dst++ = *src++;
++			continue;
++		}
++		hash = (src[0] << 16) + (src[1] << 8) + src[2];
++		hash += hash >> 9;
++		hash += hash >> 5;
++		hp = &lempel[hash & (LEMPEL_SIZE - 1)];
++		offset = (intptr_t)(src - *hp) & OFFSET_MASK;
++		*hp = (uint16_t)(uintptr_t)src;
++		cpy = src - offset;
++		if (cpy >= (uchar_t *)s_start && cpy != src &&
++		    src[0] == cpy[0] && src[1] == cpy[1] && src[2] == cpy[2]) {
++			*copymap |= copymask;
++			for (mlen = MATCH_MIN; mlen < MATCH_MAX; mlen++)
++				if (src[mlen] != cpy[mlen])
++					break;
++			*dst++ = ((mlen - MATCH_MIN) << (NBBY - MATCH_BITS)) |
++			    (offset >> NBBY);
++			*dst++ = (uchar_t)offset;
++			src += mlen;
++		} else {
++			*dst++ = *src++;
++		}
++	}
++
++	kmem_free(lempel, LEMPEL_SIZE * sizeof (uint16_t));
++	return (dst - (uchar_t *)d_start);
++}
++
++/*ARGSUSED*/
++int
++lzjb_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
++{
++	uchar_t *src = s_start;
++	uchar_t *dst = d_start;
++	uchar_t *d_end = (uchar_t *)d_start + d_len;
++	uchar_t *cpy, copymap = 0;
++	int copymask = 1 << (NBBY - 1);
++
++	while (dst < d_end) {
++		if ((copymask <<= 1) == (1 << NBBY)) {
++			copymask = 1;
++			copymap = *src++;
++		}
++		if (copymap & copymask) {
++			int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN;
++			int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK;
++			src += 2;
++			if ((cpy = dst - offset) < (uchar_t *)d_start)
++				return (-1);
++			while (--mlen >= 0 && dst < d_end)
++				*dst++ = *cpy++;
++		} else {
++			*dst++ = *src++;
++		}
++	}
++	return (0);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/Makefile linux-3.2.33-go/fs/zfs/zfs/Makefile
+--- linux-3.2.33-go.orig/fs/zfs/zfs/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/Makefile	2012-11-16 23:25:34.374039048 +0100
+@@ -0,0 +1,91 @@
++MODULE := zfs
++
++EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS)  -Wno-unused-but-set-variable -DHAVE_SPL -D_KERNEL -DTEXT_DOMAIN=\"zfs-linux-kernel\" -DNDEBUG 
++
++obj-$(CONFIG_ZFS) := $(MODULE).o
++
++$(MODULE)-objs += arc.o
++$(MODULE)-objs += bplist.o
++$(MODULE)-objs += bpobj.o
++$(MODULE)-objs += dbuf.o
++$(MODULE)-objs += ddt.o
++$(MODULE)-objs += ddt_zap.o
++$(MODULE)-objs += dmu.o
++$(MODULE)-objs += dmu_diff.o
++$(MODULE)-objs += dmu_object.o
++$(MODULE)-objs += dmu_objset.o
++$(MODULE)-objs += dmu_send.o
++$(MODULE)-objs += dmu_traverse.o
++$(MODULE)-objs += dmu_tx.o
++$(MODULE)-objs += dmu_zfetch.o
++$(MODULE)-objs += dnode.o
++$(MODULE)-objs += dnode_sync.o
++$(MODULE)-objs += dsl_dataset.o
++$(MODULE)-objs += dsl_deadlist.o
++$(MODULE)-objs += dsl_deleg.o
++$(MODULE)-objs += dsl_dir.o
++$(MODULE)-objs += dsl_pool.o
++$(MODULE)-objs += dsl_prop.o
++$(MODULE)-objs += dsl_scan.o
++$(MODULE)-objs += dsl_synctask.o
++$(MODULE)-objs += fm.o
++$(MODULE)-objs += gzip.o
++$(MODULE)-objs += lzjb.o
++$(MODULE)-objs += metaslab.o
++$(MODULE)-objs += refcount.o
++$(MODULE)-objs += rrwlock.o
++$(MODULE)-objs += sa.o
++$(MODULE)-objs += sha256.o
++$(MODULE)-objs += spa.o
++$(MODULE)-objs += spa_boot.o
++$(MODULE)-objs += spa_config.o
++$(MODULE)-objs += spa_errlog.o
++$(MODULE)-objs += spa_history.o
++$(MODULE)-objs += spa_misc.o
++$(MODULE)-objs += space_map.o
++$(MODULE)-objs += txg.o
++$(MODULE)-objs += uberblock.o
++$(MODULE)-objs += unique.o
++$(MODULE)-objs += vdev.o
++$(MODULE)-objs += vdev_cache.o
++$(MODULE)-objs += vdev_disk.o
++$(MODULE)-objs += vdev_file.o
++$(MODULE)-objs += vdev_label.o
++$(MODULE)-objs += vdev_mirror.o
++$(MODULE)-objs += vdev_missing.o
++$(MODULE)-objs += vdev_queue.o
++$(MODULE)-objs += vdev_raidz.o
++$(MODULE)-objs += vdev_root.o
++$(MODULE)-objs += zap.o
++$(MODULE)-objs += zap_leaf.o
++$(MODULE)-objs += zap_micro.o
++$(MODULE)-objs += zfs_acl.o
++$(MODULE)-objs += zfs_byteswap.o
++$(MODULE)-objs += zfs_ctldir.o
++$(MODULE)-objs += zfs_debug.o
++$(MODULE)-objs += zfs_dir.o
++$(MODULE)-objs += zfs_fm.o
++$(MODULE)-objs += zfs_fuid.o
++$(MODULE)-objs += zfs_ioctl.o
++$(MODULE)-objs += zfs_log.o
++$(MODULE)-objs += zfs_onexit.o
++$(MODULE)-objs += zfs_replay.o
++$(MODULE)-objs += zfs_rlock.o
++$(MODULE)-objs += zfs_sa.o
++$(MODULE)-objs += zfs_vfsops.o
++$(MODULE)-objs += zfs_vnops.o
++$(MODULE)-objs += zfs_znode.o
++$(MODULE)-objs += zil.o
++$(MODULE)-objs += zio.o
++$(MODULE)-objs += zio_checksum.o
++$(MODULE)-objs += zio_compress.o
++$(MODULE)-objs += zio_inject.o
++$(MODULE)-objs += zle.o
++$(MODULE)-objs += zpl_ctldir.o
++$(MODULE)-objs += zpl_export.o
++$(MODULE)-objs += zpl_file.o
++$(MODULE)-objs += zpl_inode.o
++$(MODULE)-objs += zpl_super.o
++$(MODULE)-objs += zpl_xattr.o
++$(MODULE)-objs += zrlock.o
++$(MODULE)-objs += zvol.o
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/Makefile.in linux-3.2.33-go/fs/zfs/zfs/Makefile.in
+--- linux-3.2.33-go.orig/fs/zfs/zfs/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/Makefile.in	2012-11-16 23:25:34.352039300 +0100
+@@ -0,0 +1,91 @@
++MODULE := zfs
++
++EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS) @KERNELCPPFLAGS@
++
++obj-$(CONFIG_ZFS) := $(MODULE).o
++
++$(MODULE)-objs += @top_srcdir@/module/zfs/arc.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/bplist.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/bpobj.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dbuf.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/ddt.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/ddt_zap.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dmu.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dmu_diff.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dmu_object.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dmu_objset.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dmu_send.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dmu_traverse.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dmu_tx.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dmu_zfetch.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dnode.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dnode_sync.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dsl_dataset.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dsl_deadlist.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dsl_deleg.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dsl_dir.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dsl_pool.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dsl_prop.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dsl_scan.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/dsl_synctask.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/fm.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/gzip.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/lzjb.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/metaslab.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/refcount.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/rrwlock.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/sa.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/sha256.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/spa.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/spa_boot.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/spa_config.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/spa_errlog.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/spa_history.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/spa_misc.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/space_map.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/txg.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/uberblock.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/unique.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/vdev.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/vdev_cache.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/vdev_disk.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/vdev_file.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/vdev_label.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/vdev_mirror.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/vdev_missing.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/vdev_queue.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/vdev_raidz.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/vdev_root.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zap.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zap_leaf.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zap_micro.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_acl.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_byteswap.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_ctldir.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_debug.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_dir.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_fm.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_fuid.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_ioctl.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_log.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_onexit.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_replay.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_rlock.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_sa.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_vfsops.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_vnops.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_znode.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zil.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zio.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zio_checksum.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zio_compress.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zio_inject.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zle.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_ctldir.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_export.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_file.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_inode.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_super.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_xattr.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zrlock.o
++$(MODULE)-objs += @top_srcdir@/module/zfs/zvol.o
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/metaslab.c linux-3.2.33-go/fs/zfs/zfs/metaslab.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/metaslab.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/metaslab.c	2012-11-16 23:25:34.350039322 +0100
+@@ -0,0 +1,1748 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/dmu.h>
++#include <sys/dmu_tx.h>
++#include <sys/space_map.h>
++#include <sys/metaslab_impl.h>
++#include <sys/vdev_impl.h>
++#include <sys/zio.h>
++
++#define WITH_DF_BLOCK_ALLOCATOR
++
++/*
++ * Allow allocations to switch to gang blocks quickly. We do this to
++ * avoid having to load lots of space_maps in a given txg. There are,
++ * however, some cases where we want to avoid "fast" ganging and instead
++ * we want to do an exhaustive search of all metaslabs on this device.
++ * Currently we don't allow any gang, zil, or dump device related allocations
++ * to "fast" gang.
++ */
++#define	CAN_FASTGANG(flags) \
++	(!((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | \
++	METASLAB_GANG_AVOID)))
++
++uint64_t metaslab_aliquot = 512ULL << 10;
++uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1;	/* force gang blocks */
++
++/*
++ * This value defines the number of allowed allocation failures per vdev.
++ * If a device reaches this threshold in a given txg then we consider skipping
++ * allocations on that device.
++ */
++int zfs_mg_alloc_failures;
++
++/*
++ * Metaslab debugging: when set, keeps all space maps in core to verify frees.
++ */
++static int metaslab_debug = 0;
++
++/*
++ * Minimum size which forces the dynamic allocator to change
++ * it's allocation strategy.  Once the space map cannot satisfy
++ * an allocation of this size then it switches to using more
++ * aggressive strategy (i.e search by size rather than offset).
++ */
++uint64_t metaslab_df_alloc_threshold = SPA_MAXBLOCKSIZE;
++
++/*
++ * The minimum free space, in percent, which must be available
++ * in a space map to continue allocations in a first-fit fashion.
++ * Once the space_map's free space drops below this level we dynamically
++ * switch to using best-fit allocations.
++ */
++int metaslab_df_free_pct = 4;
++
++/*
++ * A metaslab is considered "free" if it contains a contiguous
++ * segment which is greater than metaslab_min_alloc_size.
++ */
++uint64_t metaslab_min_alloc_size = DMU_MAX_ACCESS;
++
++/*
++ * Max number of space_maps to prefetch.
++ */
++int metaslab_prefetch_limit = SPA_DVAS_PER_BP;
++
++/*
++ * Percentage bonus multiplier for metaslabs that are in the bonus area.
++ */
++int metaslab_smo_bonus_pct = 150;
++
++/*
++ * ==========================================================================
++ * Metaslab classes
++ * ==========================================================================
++ */
++metaslab_class_t *
++metaslab_class_create(spa_t *spa, space_map_ops_t *ops)
++{
++	metaslab_class_t *mc;
++
++	mc = kmem_zalloc(sizeof (metaslab_class_t), KM_PUSHPAGE);
++
++	mc->mc_spa = spa;
++	mc->mc_rotor = NULL;
++	mc->mc_ops = ops;
++	mutex_init(&mc->mc_fastwrite_lock, NULL, MUTEX_DEFAULT, NULL);
++
++	return (mc);
++}
++
++void
++metaslab_class_destroy(metaslab_class_t *mc)
++{
++	ASSERT(mc->mc_rotor == NULL);
++	ASSERT(mc->mc_alloc == 0);
++	ASSERT(mc->mc_deferred == 0);
++	ASSERT(mc->mc_space == 0);
++	ASSERT(mc->mc_dspace == 0);
++
++	mutex_destroy(&mc->mc_fastwrite_lock);
++	kmem_free(mc, sizeof (metaslab_class_t));
++}
++
++int
++metaslab_class_validate(metaslab_class_t *mc)
++{
++	metaslab_group_t *mg;
++	vdev_t *vd;
++
++	/*
++	 * Must hold one of the spa_config locks.
++	 */
++	ASSERT(spa_config_held(mc->mc_spa, SCL_ALL, RW_READER) ||
++	    spa_config_held(mc->mc_spa, SCL_ALL, RW_WRITER));
++
++	if ((mg = mc->mc_rotor) == NULL)
++		return (0);
++
++	do {
++		vd = mg->mg_vd;
++		ASSERT(vd->vdev_mg != NULL);
++		ASSERT3P(vd->vdev_top, ==, vd);
++		ASSERT3P(mg->mg_class, ==, mc);
++		ASSERT3P(vd->vdev_ops, !=, &vdev_hole_ops);
++	} while ((mg = mg->mg_next) != mc->mc_rotor);
++
++	return (0);
++}
++
++void
++metaslab_class_space_update(metaslab_class_t *mc, int64_t alloc_delta,
++    int64_t defer_delta, int64_t space_delta, int64_t dspace_delta)
++{
++	atomic_add_64(&mc->mc_alloc, alloc_delta);
++	atomic_add_64(&mc->mc_deferred, defer_delta);
++	atomic_add_64(&mc->mc_space, space_delta);
++	atomic_add_64(&mc->mc_dspace, dspace_delta);
++}
++
++uint64_t
++metaslab_class_get_alloc(metaslab_class_t *mc)
++{
++	return (mc->mc_alloc);
++}
++
++uint64_t
++metaslab_class_get_deferred(metaslab_class_t *mc)
++{
++	return (mc->mc_deferred);
++}
++
++uint64_t
++metaslab_class_get_space(metaslab_class_t *mc)
++{
++	return (mc->mc_space);
++}
++
++uint64_t
++metaslab_class_get_dspace(metaslab_class_t *mc)
++{
++	return (spa_deflate(mc->mc_spa) ? mc->mc_dspace : mc->mc_space);
++}
++
++/*
++ * ==========================================================================
++ * Metaslab groups
++ * ==========================================================================
++ */
++static int
++metaslab_compare(const void *x1, const void *x2)
++{
++	const metaslab_t *m1 = x1;
++	const metaslab_t *m2 = x2;
++
++	if (m1->ms_weight < m2->ms_weight)
++		return (1);
++	if (m1->ms_weight > m2->ms_weight)
++		return (-1);
++
++	/*
++	 * If the weights are identical, use the offset to force uniqueness.
++	 */
++	if (m1->ms_map.sm_start < m2->ms_map.sm_start)
++		return (-1);
++	if (m1->ms_map.sm_start > m2->ms_map.sm_start)
++		return (1);
++
++	ASSERT3P(m1, ==, m2);
++
++	return (0);
++}
++
++metaslab_group_t *
++metaslab_group_create(metaslab_class_t *mc, vdev_t *vd)
++{
++	metaslab_group_t *mg;
++
++	mg = kmem_zalloc(sizeof (metaslab_group_t), KM_PUSHPAGE);
++	mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL);
++	avl_create(&mg->mg_metaslab_tree, metaslab_compare,
++	    sizeof (metaslab_t), offsetof(struct metaslab, ms_group_node));
++	mg->mg_vd = vd;
++	mg->mg_class = mc;
++	mg->mg_activation_count = 0;
++
++	return (mg);
++}
++
++void
++metaslab_group_destroy(metaslab_group_t *mg)
++{
++	ASSERT(mg->mg_prev == NULL);
++	ASSERT(mg->mg_next == NULL);
++	/*
++	 * We may have gone below zero with the activation count
++	 * either because we never activated in the first place or
++	 * because we're done, and possibly removing the vdev.
++	 */
++	ASSERT(mg->mg_activation_count <= 0);
++
++	avl_destroy(&mg->mg_metaslab_tree);
++	mutex_destroy(&mg->mg_lock);
++	kmem_free(mg, sizeof (metaslab_group_t));
++}
++
++void
++metaslab_group_activate(metaslab_group_t *mg)
++{
++	metaslab_class_t *mc = mg->mg_class;
++	metaslab_group_t *mgprev, *mgnext;
++
++	ASSERT(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER));
++
++	ASSERT(mc->mc_rotor != mg);
++	ASSERT(mg->mg_prev == NULL);
++	ASSERT(mg->mg_next == NULL);
++	ASSERT(mg->mg_activation_count <= 0);
++
++	if (++mg->mg_activation_count <= 0)
++		return;
++
++	mg->mg_aliquot = metaslab_aliquot * MAX(1, mg->mg_vd->vdev_children);
++
++	if ((mgprev = mc->mc_rotor) == NULL) {
++		mg->mg_prev = mg;
++		mg->mg_next = mg;
++	} else {
++		mgnext = mgprev->mg_next;
++		mg->mg_prev = mgprev;
++		mg->mg_next = mgnext;
++		mgprev->mg_next = mg;
++		mgnext->mg_prev = mg;
++	}
++	mc->mc_rotor = mg;
++}
++
++void
++metaslab_group_passivate(metaslab_group_t *mg)
++{
++	metaslab_class_t *mc = mg->mg_class;
++	metaslab_group_t *mgprev, *mgnext;
++
++	ASSERT(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER));
++
++	if (--mg->mg_activation_count != 0) {
++		ASSERT(mc->mc_rotor != mg);
++		ASSERT(mg->mg_prev == NULL);
++		ASSERT(mg->mg_next == NULL);
++		ASSERT(mg->mg_activation_count < 0);
++		return;
++	}
++
++	mgprev = mg->mg_prev;
++	mgnext = mg->mg_next;
++
++	if (mg == mgnext) {
++		mc->mc_rotor = NULL;
++	} else {
++		mc->mc_rotor = mgnext;
++		mgprev->mg_next = mgnext;
++		mgnext->mg_prev = mgprev;
++	}
++
++	mg->mg_prev = NULL;
++	mg->mg_next = NULL;
++}
++
++static void
++metaslab_group_add(metaslab_group_t *mg, metaslab_t *msp)
++{
++	mutex_enter(&mg->mg_lock);
++	ASSERT(msp->ms_group == NULL);
++	msp->ms_group = mg;
++	msp->ms_weight = 0;
++	avl_add(&mg->mg_metaslab_tree, msp);
++	mutex_exit(&mg->mg_lock);
++}
++
++static void
++metaslab_group_remove(metaslab_group_t *mg, metaslab_t *msp)
++{
++	mutex_enter(&mg->mg_lock);
++	ASSERT(msp->ms_group == mg);
++	avl_remove(&mg->mg_metaslab_tree, msp);
++	msp->ms_group = NULL;
++	mutex_exit(&mg->mg_lock);
++}
++
++static void
++metaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight)
++{
++	/*
++	 * Although in principle the weight can be any value, in
++	 * practice we do not use values in the range [1, 510].
++	 */
++	ASSERT(weight >= SPA_MINBLOCKSIZE-1 || weight == 0);
++	ASSERT(MUTEX_HELD(&msp->ms_lock));
++
++	mutex_enter(&mg->mg_lock);
++	ASSERT(msp->ms_group == mg);
++	avl_remove(&mg->mg_metaslab_tree, msp);
++	msp->ms_weight = weight;
++	avl_add(&mg->mg_metaslab_tree, msp);
++	mutex_exit(&mg->mg_lock);
++}
++
++/*
++ * ==========================================================================
++ * Common allocator routines
++ * ==========================================================================
++ */
++static int
++metaslab_segsize_compare(const void *x1, const void *x2)
++{
++	const space_seg_t *s1 = x1;
++	const space_seg_t *s2 = x2;
++	uint64_t ss_size1 = s1->ss_end - s1->ss_start;
++	uint64_t ss_size2 = s2->ss_end - s2->ss_start;
++
++	if (ss_size1 < ss_size2)
++		return (-1);
++	if (ss_size1 > ss_size2)
++		return (1);
++
++	if (s1->ss_start < s2->ss_start)
++		return (-1);
++	if (s1->ss_start > s2->ss_start)
++		return (1);
++
++	return (0);
++}
++
++#if defined(WITH_FF_BLOCK_ALLOCATOR) || \
++    defined(WITH_DF_BLOCK_ALLOCATOR) || \
++    defined(WITH_CDF_BLOCK_ALLOCATOR)
++/*
++ * This is a helper function that can be used by the allocator to find
++ * a suitable block to allocate. This will search the specified AVL
++ * tree looking for a block that matches the specified criteria.
++ */
++static uint64_t
++metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size,
++    uint64_t align)
++{
++	space_seg_t *ss, ssearch;
++	avl_index_t where;
++
++	ssearch.ss_start = *cursor;
++	ssearch.ss_end = *cursor + size;
++
++	ss = avl_find(t, &ssearch, &where);
++	if (ss == NULL)
++		ss = avl_nearest(t, where, AVL_AFTER);
++
++	while (ss != NULL) {
++		uint64_t offset = P2ROUNDUP(ss->ss_start, align);
++
++		if (offset + size <= ss->ss_end) {
++			*cursor = offset + size;
++			return (offset);
++		}
++		ss = AVL_NEXT(t, ss);
++	}
++
++	/*
++	 * If we know we've searched the whole map (*cursor == 0), give up.
++	 * Otherwise, reset the cursor to the beginning and try again.
++	 */
++	if (*cursor == 0)
++		return (-1ULL);
++
++	*cursor = 0;
++	return (metaslab_block_picker(t, cursor, size, align));
++}
++#endif /* WITH_FF/DF/CDF_BLOCK_ALLOCATOR */
++
++static void
++metaslab_pp_load(space_map_t *sm)
++{
++	space_seg_t *ss;
++
++	ASSERT(sm->sm_ppd == NULL);
++	sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_PUSHPAGE);
++
++	sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_PUSHPAGE);
++	avl_create(sm->sm_pp_root, metaslab_segsize_compare,
++	    sizeof (space_seg_t), offsetof(struct space_seg, ss_pp_node));
++
++	for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss))
++		avl_add(sm->sm_pp_root, ss);
++}
++
++static void
++metaslab_pp_unload(space_map_t *sm)
++{
++	void *cookie = NULL;
++
++	kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t));
++	sm->sm_ppd = NULL;
++
++	while (avl_destroy_nodes(sm->sm_pp_root, &cookie) != NULL) {
++		/* tear down the tree */
++	}
++
++	avl_destroy(sm->sm_pp_root);
++	kmem_free(sm->sm_pp_root, sizeof (avl_tree_t));
++	sm->sm_pp_root = NULL;
++}
++
++/* ARGSUSED */
++static void
++metaslab_pp_claim(space_map_t *sm, uint64_t start, uint64_t size)
++{
++	/* No need to update cursor */
++}
++
++/* ARGSUSED */
++static void
++metaslab_pp_free(space_map_t *sm, uint64_t start, uint64_t size)
++{
++	/* No need to update cursor */
++}
++
++/*
++ * Return the maximum contiguous segment within the metaslab.
++ */
++uint64_t
++metaslab_pp_maxsize(space_map_t *sm)
++{
++	avl_tree_t *t = sm->sm_pp_root;
++	space_seg_t *ss;
++
++	if (t == NULL || (ss = avl_last(t)) == NULL)
++		return (0ULL);
++
++	return (ss->ss_end - ss->ss_start);
++}
++
++#if defined(WITH_FF_BLOCK_ALLOCATOR)
++/*
++ * ==========================================================================
++ * The first-fit block allocator
++ * ==========================================================================
++ */
++static uint64_t
++metaslab_ff_alloc(space_map_t *sm, uint64_t size)
++{
++	avl_tree_t *t = &sm->sm_root;
++	uint64_t align = size & -size;
++	uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
++
++	return (metaslab_block_picker(t, cursor, size, align));
++}
++
++/* ARGSUSED */
++boolean_t
++metaslab_ff_fragmented(space_map_t *sm)
++{
++	return (B_TRUE);
++}
++
++static space_map_ops_t metaslab_ff_ops = {
++	metaslab_pp_load,
++	metaslab_pp_unload,
++	metaslab_ff_alloc,
++	metaslab_pp_claim,
++	metaslab_pp_free,
++	metaslab_pp_maxsize,
++	metaslab_ff_fragmented
++};
++
++space_map_ops_t *zfs_metaslab_ops = &metaslab_ff_ops;
++#endif /* WITH_FF_BLOCK_ALLOCATOR */
++
++#if defined(WITH_DF_BLOCK_ALLOCATOR)
++/*
++ * ==========================================================================
++ * Dynamic block allocator -
++ * Uses the first fit allocation scheme until space get low and then
++ * adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold
++ * and metaslab_df_free_pct to determine when to switch the allocation scheme.
++ * ==========================================================================
++ */
++static uint64_t
++metaslab_df_alloc(space_map_t *sm, uint64_t size)
++{
++	avl_tree_t *t = &sm->sm_root;
++	uint64_t align = size & -size;
++	uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
++	uint64_t max_size = metaslab_pp_maxsize(sm);
++	int free_pct = sm->sm_space * 100 / sm->sm_size;
++
++	ASSERT(MUTEX_HELD(sm->sm_lock));
++	ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root));
++
++	if (max_size < size)
++		return (-1ULL);
++
++	/*
++	 * If we're running low on space switch to using the size
++	 * sorted AVL tree (best-fit).
++	 */
++	if (max_size < metaslab_df_alloc_threshold ||
++	    free_pct < metaslab_df_free_pct) {
++		t = sm->sm_pp_root;
++		*cursor = 0;
++	}
++
++	return (metaslab_block_picker(t, cursor, size, 1ULL));
++}
++
++static boolean_t
++metaslab_df_fragmented(space_map_t *sm)
++{
++	uint64_t max_size = metaslab_pp_maxsize(sm);
++	int free_pct = sm->sm_space * 100 / sm->sm_size;
++
++	if (max_size >= metaslab_df_alloc_threshold &&
++	    free_pct >= metaslab_df_free_pct)
++		return (B_FALSE);
++
++	return (B_TRUE);
++}
++
++static space_map_ops_t metaslab_df_ops = {
++	metaslab_pp_load,
++	metaslab_pp_unload,
++	metaslab_df_alloc,
++	metaslab_pp_claim,
++	metaslab_pp_free,
++	metaslab_pp_maxsize,
++	metaslab_df_fragmented
++};
++
++space_map_ops_t *zfs_metaslab_ops = &metaslab_df_ops;
++#endif /* WITH_DF_BLOCK_ALLOCATOR */
++
++/*
++ * ==========================================================================
++ * Other experimental allocators
++ * ==========================================================================
++ */
++#if defined(WITH_CDF_BLOCK_ALLOCATOR)
++static uint64_t
++metaslab_cdf_alloc(space_map_t *sm, uint64_t size)
++{
++	avl_tree_t *t = &sm->sm_root;
++	uint64_t *cursor = (uint64_t *)sm->sm_ppd;
++	uint64_t *extent_end = (uint64_t *)sm->sm_ppd + 1;
++	uint64_t max_size = metaslab_pp_maxsize(sm);
++	uint64_t rsize = size;
++	uint64_t offset = 0;
++
++	ASSERT(MUTEX_HELD(sm->sm_lock));
++	ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root));
++
++	if (max_size < size)
++		return (-1ULL);
++
++	ASSERT3U(*extent_end, >=, *cursor);
++
++	/*
++	 * If we're running low on space switch to using the size
++	 * sorted AVL tree (best-fit).
++	 */
++	if ((*cursor + size) > *extent_end) {
++
++		t = sm->sm_pp_root;
++		*cursor = *extent_end = 0;
++
++		if (max_size > 2 * SPA_MAXBLOCKSIZE)
++			rsize = MIN(metaslab_min_alloc_size, max_size);
++		offset = metaslab_block_picker(t, extent_end, rsize, 1ULL);
++		if (offset != -1)
++			*cursor = offset + size;
++	} else {
++		offset = metaslab_block_picker(t, cursor, rsize, 1ULL);
++	}
++	ASSERT3U(*cursor, <=, *extent_end);
++	return (offset);
++}
++
++static boolean_t
++metaslab_cdf_fragmented(space_map_t *sm)
++{
++	uint64_t max_size = metaslab_pp_maxsize(sm);
++
++	if (max_size > (metaslab_min_alloc_size * 10))
++		return (B_FALSE);
++	return (B_TRUE);
++}
++
++static space_map_ops_t metaslab_cdf_ops = {
++	metaslab_pp_load,
++	metaslab_pp_unload,
++	metaslab_cdf_alloc,
++	metaslab_pp_claim,
++	metaslab_pp_free,
++	metaslab_pp_maxsize,
++	metaslab_cdf_fragmented
++};
++
++space_map_ops_t *zfs_metaslab_ops = &metaslab_cdf_ops;
++#endif /* WITH_CDF_BLOCK_ALLOCATOR */
++
++#if defined(WITH_NDF_BLOCK_ALLOCATOR)
++uint64_t metaslab_ndf_clump_shift = 4;
++
++static uint64_t
++metaslab_ndf_alloc(space_map_t *sm, uint64_t size)
++{
++	avl_tree_t *t = &sm->sm_root;
++	avl_index_t where;
++	space_seg_t *ss, ssearch;
++	uint64_t hbit = highbit(size);
++	uint64_t *cursor = (uint64_t *)sm->sm_ppd + hbit - 1;
++	uint64_t max_size = metaslab_pp_maxsize(sm);
++
++	ASSERT(MUTEX_HELD(sm->sm_lock));
++	ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root));
++
++	if (max_size < size)
++		return (-1ULL);
++
++	ssearch.ss_start = *cursor;
++	ssearch.ss_end = *cursor + size;
++
++	ss = avl_find(t, &ssearch, &where);
++	if (ss == NULL || (ss->ss_start + size > ss->ss_end)) {
++		t = sm->sm_pp_root;
++
++		ssearch.ss_start = 0;
++		ssearch.ss_end = MIN(max_size,
++		    1ULL << (hbit + metaslab_ndf_clump_shift));
++		ss = avl_find(t, &ssearch, &where);
++		if (ss == NULL)
++			ss = avl_nearest(t, where, AVL_AFTER);
++		ASSERT(ss != NULL);
++	}
++
++	if (ss != NULL) {
++		if (ss->ss_start + size <= ss->ss_end) {
++			*cursor = ss->ss_start + size;
++			return (ss->ss_start);
++		}
++	}
++	return (-1ULL);
++}
++
++static boolean_t
++metaslab_ndf_fragmented(space_map_t *sm)
++{
++	uint64_t max_size = metaslab_pp_maxsize(sm);
++
++	if (max_size > (metaslab_min_alloc_size << metaslab_ndf_clump_shift))
++		return (B_FALSE);
++	return (B_TRUE);
++}
++
++
++static space_map_ops_t metaslab_ndf_ops = {
++	metaslab_pp_load,
++	metaslab_pp_unload,
++	metaslab_ndf_alloc,
++	metaslab_pp_claim,
++	metaslab_pp_free,
++	metaslab_pp_maxsize,
++	metaslab_ndf_fragmented
++};
++
++space_map_ops_t *zfs_metaslab_ops = &metaslab_ndf_ops;
++#endif /* WITH_NDF_BLOCK_ALLOCATOR */
++
++/*
++ * ==========================================================================
++ * Metaslabs
++ * ==========================================================================
++ */
++metaslab_t *
++metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
++	uint64_t start, uint64_t size, uint64_t txg)
++{
++	vdev_t *vd = mg->mg_vd;
++	metaslab_t *msp;
++
++	msp = kmem_zalloc(sizeof (metaslab_t), KM_PUSHPAGE);
++	mutex_init(&msp->ms_lock, NULL, MUTEX_DEFAULT, NULL);
++
++	msp->ms_smo_syncing = *smo;
++
++	/*
++	 * We create the main space map here, but we don't create the
++	 * allocmaps and freemaps until metaslab_sync_done().  This serves
++	 * two purposes: it allows metaslab_sync_done() to detect the
++	 * addition of new space; and for debugging, it ensures that we'd
++	 * data fault on any attempt to use this metaslab before it's ready.
++	 */
++	space_map_create(&msp->ms_map, start, size,
++	    vd->vdev_ashift, &msp->ms_lock);
++
++	metaslab_group_add(mg, msp);
++
++	if (metaslab_debug && smo->smo_object != 0) {
++		mutex_enter(&msp->ms_lock);
++		VERIFY(space_map_load(&msp->ms_map, mg->mg_class->mc_ops,
++		    SM_FREE, smo, spa_meta_objset(vd->vdev_spa)) == 0);
++		mutex_exit(&msp->ms_lock);
++	}
++
++	/*
++	 * If we're opening an existing pool (txg == 0) or creating
++	 * a new one (txg == TXG_INITIAL), all space is available now.
++	 * If we're adding space to an existing pool, the new space
++	 * does not become available until after this txg has synced.
++	 */
++	if (txg <= TXG_INITIAL)
++		metaslab_sync_done(msp, 0);
++
++	if (txg != 0) {
++		vdev_dirty(vd, 0, NULL, txg);
++		vdev_dirty(vd, VDD_METASLAB, msp, txg);
++	}
++
++	return (msp);
++}
++
++void
++metaslab_fini(metaslab_t *msp)
++{
++	metaslab_group_t *mg = msp->ms_group;
++	int t;
++
++	vdev_space_update(mg->mg_vd,
++	    -msp->ms_smo.smo_alloc, 0, -msp->ms_map.sm_size);
++
++	metaslab_group_remove(mg, msp);
++
++	mutex_enter(&msp->ms_lock);
++
++	space_map_unload(&msp->ms_map);
++	space_map_destroy(&msp->ms_map);
++
++	for (t = 0; t < TXG_SIZE; t++) {
++		space_map_destroy(&msp->ms_allocmap[t]);
++		space_map_destroy(&msp->ms_freemap[t]);
++	}
++
++	for (t = 0; t < TXG_DEFER_SIZE; t++)
++		space_map_destroy(&msp->ms_defermap[t]);
++
++	ASSERT3S(msp->ms_deferspace, ==, 0);
++
++	mutex_exit(&msp->ms_lock);
++	mutex_destroy(&msp->ms_lock);
++
++	kmem_free(msp, sizeof (metaslab_t));
++}
++
++#define	METASLAB_WEIGHT_PRIMARY		(1ULL << 63)
++#define	METASLAB_WEIGHT_SECONDARY	(1ULL << 62)
++#define	METASLAB_ACTIVE_MASK		\
++	(METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY)
++
++static uint64_t
++metaslab_weight(metaslab_t *msp)
++{
++	metaslab_group_t *mg = msp->ms_group;
++	space_map_t *sm = &msp->ms_map;
++	space_map_obj_t *smo = &msp->ms_smo;
++	vdev_t *vd = mg->mg_vd;
++	uint64_t weight, space;
++
++	ASSERT(MUTEX_HELD(&msp->ms_lock));
++
++	/*
++	 * The baseline weight is the metaslab's free space.
++	 */
++	space = sm->sm_size - smo->smo_alloc;
++	weight = space;
++
++	/*
++	 * Modern disks have uniform bit density and constant angular velocity.
++	 * Therefore, the outer recording zones are faster (higher bandwidth)
++	 * than the inner zones by the ratio of outer to inner track diameter,
++	 * which is typically around 2:1.  We account for this by assigning
++	 * higher weight to lower metaslabs (multiplier ranging from 2x to 1x).
++	 * In effect, this means that we'll select the metaslab with the most
++	 * free bandwidth rather than simply the one with the most free space.
++	 */
++	weight = 2 * weight -
++	    ((sm->sm_start >> vd->vdev_ms_shift) * weight) / vd->vdev_ms_count;
++	ASSERT(weight >= space && weight <= 2 * space);
++
++	/*
++	 * For locality, assign higher weight to metaslabs which have
++	 * a lower offset than what we've already activated.
++	 */
++	if (sm->sm_start <= mg->mg_bonus_area)
++		weight *= (metaslab_smo_bonus_pct / 100);
++	ASSERT(weight >= space &&
++	    weight <= 2 * (metaslab_smo_bonus_pct / 100) * space);
++
++	if (sm->sm_loaded && !sm->sm_ops->smop_fragmented(sm)) {
++		/*
++		 * If this metaslab is one we're actively using, adjust its
++		 * weight to make it preferable to any inactive metaslab so
++		 * we'll polish it off.
++		 */
++		weight |= (msp->ms_weight & METASLAB_ACTIVE_MASK);
++	}
++	return (weight);
++}
++
++static void
++metaslab_prefetch(metaslab_group_t *mg)
++{
++	spa_t *spa = mg->mg_vd->vdev_spa;
++	metaslab_t *msp;
++	avl_tree_t *t = &mg->mg_metaslab_tree;
++	int m;
++
++	mutex_enter(&mg->mg_lock);
++
++	/*
++	 * Prefetch the next potential metaslabs
++	 */
++	for (msp = avl_first(t), m = 0; msp; msp = AVL_NEXT(t, msp), m++) {
++		space_map_t *sm = &msp->ms_map;
++		space_map_obj_t *smo = &msp->ms_smo;
++
++		/* If we have reached our prefetch limit then we're done */
++		if (m >= metaslab_prefetch_limit)
++			break;
++
++		if (!sm->sm_loaded && smo->smo_object != 0) {
++			mutex_exit(&mg->mg_lock);
++			dmu_prefetch(spa_meta_objset(spa), smo->smo_object,
++			    0ULL, smo->smo_objsize);
++			mutex_enter(&mg->mg_lock);
++		}
++	}
++	mutex_exit(&mg->mg_lock);
++}
++
++static int
++metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
++{
++	metaslab_group_t *mg = msp->ms_group;
++	space_map_t *sm = &msp->ms_map;
++	space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops;
++	int t;
++
++	ASSERT(MUTEX_HELD(&msp->ms_lock));
++
++	if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
++		space_map_load_wait(sm);
++		if (!sm->sm_loaded) {
++			int error = space_map_load(sm, sm_ops, SM_FREE,
++			    &msp->ms_smo,
++			    spa_meta_objset(msp->ms_group->mg_vd->vdev_spa));
++			if (error)  {
++				metaslab_group_sort(msp->ms_group, msp, 0);
++				return (error);
++			}
++			for (t = 0; t < TXG_DEFER_SIZE; t++)
++				space_map_walk(&msp->ms_defermap[t],
++				    space_map_claim, sm);
++
++		}
++
++		/*
++		 * Track the bonus area as we activate new metaslabs.
++		 */
++		if (sm->sm_start > mg->mg_bonus_area) {
++			mutex_enter(&mg->mg_lock);
++			mg->mg_bonus_area = sm->sm_start;
++			mutex_exit(&mg->mg_lock);
++		}
++
++		metaslab_group_sort(msp->ms_group, msp,
++		    msp->ms_weight | activation_weight);
++	}
++	ASSERT(sm->sm_loaded);
++	ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
++
++	return (0);
++}
++
++static void
++metaslab_passivate(metaslab_t *msp, uint64_t size)
++{
++	/*
++	 * If size < SPA_MINBLOCKSIZE, then we will not allocate from
++	 * this metaslab again.  In that case, it had better be empty,
++	 * or we would be leaving space on the table.
++	 */
++	ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map.sm_space == 0);
++	metaslab_group_sort(msp->ms_group, msp, MIN(msp->ms_weight, size));
++	ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0);
++}
++
++/*
++ * Write a metaslab to disk in the context of the specified transaction group.
++ */
++void
++metaslab_sync(metaslab_t *msp, uint64_t txg)
++{
++	vdev_t *vd = msp->ms_group->mg_vd;
++	spa_t *spa = vd->vdev_spa;
++	objset_t *mos = spa_meta_objset(spa);
++	space_map_t *allocmap = &msp->ms_allocmap[txg & TXG_MASK];
++	space_map_t *freemap = &msp->ms_freemap[txg & TXG_MASK];
++	space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
++	space_map_t *sm = &msp->ms_map;
++	space_map_obj_t *smo = &msp->ms_smo_syncing;
++	dmu_buf_t *db;
++	dmu_tx_t *tx;
++	int t;
++
++	ASSERT(!vd->vdev_ishole);
++
++	if (allocmap->sm_space == 0 && freemap->sm_space == 0)
++		return;
++
++	/*
++	 * The only state that can actually be changing concurrently with
++	 * metaslab_sync() is the metaslab's ms_map.  No other thread can
++	 * be modifying this txg's allocmap, freemap, freed_map, or smo.
++	 * Therefore, we only hold ms_lock to satify space_map ASSERTs.
++	 * We drop it whenever we call into the DMU, because the DMU
++	 * can call down to us (e.g. via zio_free()) at any time.
++	 */
++
++	tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
++
++	if (smo->smo_object == 0) {
++		ASSERT(smo->smo_objsize == 0);
++		ASSERT(smo->smo_alloc == 0);
++		smo->smo_object = dmu_object_alloc(mos,
++		    DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT,
++		    DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx);
++		ASSERT(smo->smo_object != 0);
++		dmu_write(mos, vd->vdev_ms_array, sizeof (uint64_t) *
++		    (sm->sm_start >> vd->vdev_ms_shift),
++		    sizeof (uint64_t), &smo->smo_object, tx);
++	}
++
++	mutex_enter(&msp->ms_lock);
++
++	space_map_walk(freemap, space_map_add, freed_map);
++
++	if (sm->sm_loaded && spa_sync_pass(spa) == 1 && smo->smo_objsize >=
++	    2 * sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) {
++		/*
++		 * The in-core space map representation is twice as compact
++		 * as the on-disk one, so it's time to condense the latter
++		 * by generating a pure allocmap from first principles.
++		 *
++		 * This metaslab is 100% allocated,
++		 * minus the content of the in-core map (sm),
++		 * minus what's been freed this txg (freed_map),
++		 * minus deferred frees (ms_defermap[]),
++		 * minus allocations from txgs in the future
++		 * (because they haven't been committed yet).
++		 */
++		space_map_vacate(allocmap, NULL, NULL);
++		space_map_vacate(freemap, NULL, NULL);
++
++		space_map_add(allocmap, allocmap->sm_start, allocmap->sm_size);
++
++		space_map_walk(sm, space_map_remove, allocmap);
++		space_map_walk(freed_map, space_map_remove, allocmap);
++
++		for (t = 0; t < TXG_DEFER_SIZE; t++)
++			space_map_walk(&msp->ms_defermap[t],
++			    space_map_remove, allocmap);
++
++		for (t = 1; t < TXG_CONCURRENT_STATES; t++)
++			space_map_walk(&msp->ms_allocmap[(txg + t) & TXG_MASK],
++			    space_map_remove, allocmap);
++
++		mutex_exit(&msp->ms_lock);
++		space_map_truncate(smo, mos, tx);
++		mutex_enter(&msp->ms_lock);
++	}
++
++	space_map_sync(allocmap, SM_ALLOC, smo, mos, tx);
++	space_map_sync(freemap, SM_FREE, smo, mos, tx);
++
++	mutex_exit(&msp->ms_lock);
++
++	VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db));
++	dmu_buf_will_dirty(db, tx);
++	ASSERT3U(db->db_size, >=, sizeof (*smo));
++	bcopy(smo, db->db_data, sizeof (*smo));
++	dmu_buf_rele(db, FTAG);
++
++	dmu_tx_commit(tx);
++}
++
++/*
++ * Called after a transaction group has completely synced to mark
++ * all of the metaslab's free space as usable.
++ */
++void
++metaslab_sync_done(metaslab_t *msp, uint64_t txg)
++{
++	space_map_obj_t *smo = &msp->ms_smo;
++	space_map_obj_t *smosync = &msp->ms_smo_syncing;
++	space_map_t *sm = &msp->ms_map;
++	space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
++	space_map_t *defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE];
++	metaslab_group_t *mg = msp->ms_group;
++	vdev_t *vd = mg->mg_vd;
++	int64_t alloc_delta, defer_delta;
++	int t;
++
++	ASSERT(!vd->vdev_ishole);
++
++	mutex_enter(&msp->ms_lock);
++
++	/*
++	 * If this metaslab is just becoming available, initialize its
++	 * allocmaps and freemaps and add its capacity to the vdev.
++	 */
++	if (freed_map->sm_size == 0) {
++		for (t = 0; t < TXG_SIZE; t++) {
++			space_map_create(&msp->ms_allocmap[t], sm->sm_start,
++			    sm->sm_size, sm->sm_shift, sm->sm_lock);
++			space_map_create(&msp->ms_freemap[t], sm->sm_start,
++			    sm->sm_size, sm->sm_shift, sm->sm_lock);
++		}
++
++		for (t = 0; t < TXG_DEFER_SIZE; t++)
++			space_map_create(&msp->ms_defermap[t], sm->sm_start,
++			    sm->sm_size, sm->sm_shift, sm->sm_lock);
++
++		vdev_space_update(vd, 0, 0, sm->sm_size);
++	}
++
++	alloc_delta = smosync->smo_alloc - smo->smo_alloc;
++	defer_delta = freed_map->sm_space - defer_map->sm_space;
++
++	vdev_space_update(vd, alloc_delta + defer_delta, defer_delta, 0);
++
++	ASSERT(msp->ms_allocmap[txg & TXG_MASK].sm_space == 0);
++	ASSERT(msp->ms_freemap[txg & TXG_MASK].sm_space == 0);
++
++	/*
++	 * If there's a space_map_load() in progress, wait for it to complete
++	 * so that we have a consistent view of the in-core space map.
++	 * Then, add defer_map (oldest deferred frees) to this map and
++	 * transfer freed_map (this txg's frees) to defer_map.
++	 */
++	space_map_load_wait(sm);
++	space_map_vacate(defer_map, sm->sm_loaded ? space_map_free : NULL, sm);
++	space_map_vacate(freed_map, space_map_add, defer_map);
++
++	*smo = *smosync;
++
++	msp->ms_deferspace += defer_delta;
++	ASSERT3S(msp->ms_deferspace, >=, 0);
++	ASSERT3S(msp->ms_deferspace, <=, sm->sm_size);
++	if (msp->ms_deferspace != 0) {
++		/*
++		 * Keep syncing this metaslab until all deferred frees
++		 * are back in circulation.
++		 */
++		vdev_dirty(vd, VDD_METASLAB, msp, txg + 1);
++	}
++
++	/*
++	 * If the map is loaded but no longer active, evict it as soon as all
++	 * future allocations have synced.  (If we unloaded it now and then
++	 * loaded a moment later, the map wouldn't reflect those allocations.)
++	 */
++	if (sm->sm_loaded && (msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
++		int evictable = 1;
++
++		for (t = 1; t < TXG_CONCURRENT_STATES; t++)
++			if (msp->ms_allocmap[(txg + t) & TXG_MASK].sm_space)
++				evictable = 0;
++
++		if (evictable && !metaslab_debug)
++			space_map_unload(sm);
++	}
++
++	metaslab_group_sort(mg, msp, metaslab_weight(msp));
++
++	mutex_exit(&msp->ms_lock);
++}
++
++void
++metaslab_sync_reassess(metaslab_group_t *mg)
++{
++	vdev_t *vd = mg->mg_vd;
++	int64_t failures = mg->mg_alloc_failures;
++	int m;
++
++	/*
++	 * Re-evaluate all metaslabs which have lower offsets than the
++	 * bonus area.
++	 */
++	for (m = 0; m < vd->vdev_ms_count; m++) {
++		metaslab_t *msp = vd->vdev_ms[m];
++
++		if (msp->ms_map.sm_start > mg->mg_bonus_area)
++			break;
++
++		mutex_enter(&msp->ms_lock);
++		metaslab_group_sort(mg, msp, metaslab_weight(msp));
++		mutex_exit(&msp->ms_lock);
++	}
++
++	atomic_add_64(&mg->mg_alloc_failures, -failures);
++
++	/*
++	 * Prefetch the next potential metaslabs
++	 */
++	metaslab_prefetch(mg);
++}
++
++static uint64_t
++metaslab_distance(metaslab_t *msp, dva_t *dva)
++{
++	uint64_t ms_shift = msp->ms_group->mg_vd->vdev_ms_shift;
++	uint64_t offset = DVA_GET_OFFSET(dva) >> ms_shift;
++	uint64_t start = msp->ms_map.sm_start >> ms_shift;
++
++	if (msp->ms_group->mg_vd->vdev_id != DVA_GET_VDEV(dva))
++		return (1ULL << 63);
++
++	if (offset < start)
++		return ((start - offset) << ms_shift);
++	if (offset > start)
++		return ((offset - start) << ms_shift);
++	return (0);
++}
++
++static uint64_t
++metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
++    uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags)
++{
++	spa_t *spa = mg->mg_vd->vdev_spa;
++	metaslab_t *msp = NULL;
++	uint64_t offset = -1ULL;
++	avl_tree_t *t = &mg->mg_metaslab_tree;
++	uint64_t activation_weight;
++	uint64_t target_distance;
++	int i;
++
++	activation_weight = METASLAB_WEIGHT_PRIMARY;
++	for (i = 0; i < d; i++) {
++		if (DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) {
++			activation_weight = METASLAB_WEIGHT_SECONDARY;
++			break;
++		}
++	}
++
++	for (;;) {
++		boolean_t was_active;
++
++		mutex_enter(&mg->mg_lock);
++		for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) {
++			if (msp->ms_weight < asize) {
++				spa_dbgmsg(spa, "%s: failed to meet weight "
++				    "requirement: vdev %llu, txg %llu, mg %p, "
++				    "msp %p, psize %llu, asize %llu, "
++				    "failures %llu, weight %llu",
++				    spa_name(spa), mg->mg_vd->vdev_id, txg,
++				    mg, msp, psize, asize,
++				    mg->mg_alloc_failures, msp->ms_weight);
++				mutex_exit(&mg->mg_lock);
++				return (-1ULL);
++			}
++			was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
++			if (activation_weight == METASLAB_WEIGHT_PRIMARY)
++				break;
++
++			target_distance = min_distance +
++			    (msp->ms_smo.smo_alloc ? 0 : min_distance >> 1);
++
++			for (i = 0; i < d; i++)
++				if (metaslab_distance(msp, &dva[i]) <
++				    target_distance)
++					break;
++			if (i == d)
++				break;
++		}
++		mutex_exit(&mg->mg_lock);
++		if (msp == NULL)
++			return (-1ULL);
++
++		/*
++		 * If we've already reached the allowable number of failed
++		 * allocation attempts on this metaslab group then we
++		 * consider skipping it. We skip it only if we're allowed
++		 * to "fast" gang, the physical size is larger than
++		 * a gang block, and we're attempting to allocate from
++		 * the primary metaslab.
++		 */
++		if (mg->mg_alloc_failures > zfs_mg_alloc_failures &&
++		    CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE &&
++		    activation_weight == METASLAB_WEIGHT_PRIMARY) {
++			spa_dbgmsg(spa, "%s: skipping metaslab group: "
++			    "vdev %llu, txg %llu, mg %p, psize %llu, "
++			    "asize %llu, failures %llu", spa_name(spa),
++			    mg->mg_vd->vdev_id, txg, mg, psize, asize,
++			    mg->mg_alloc_failures);
++			return (-1ULL);
++		}
++
++		mutex_enter(&msp->ms_lock);
++
++		/*
++		 * Ensure that the metaslab we have selected is still
++		 * capable of handling our request. It's possible that
++		 * another thread may have changed the weight while we
++		 * were blocked on the metaslab lock.
++		 */
++		if (msp->ms_weight < asize || (was_active &&
++		    !(msp->ms_weight & METASLAB_ACTIVE_MASK) &&
++		    activation_weight == METASLAB_WEIGHT_PRIMARY)) {
++			mutex_exit(&msp->ms_lock);
++			continue;
++		}
++
++		if ((msp->ms_weight & METASLAB_WEIGHT_SECONDARY) &&
++		    activation_weight == METASLAB_WEIGHT_PRIMARY) {
++			metaslab_passivate(msp,
++			    msp->ms_weight & ~METASLAB_ACTIVE_MASK);
++			mutex_exit(&msp->ms_lock);
++			continue;
++		}
++
++		if (metaslab_activate(msp, activation_weight) != 0) {
++			mutex_exit(&msp->ms_lock);
++			continue;
++		}
++
++		if ((offset = space_map_alloc(&msp->ms_map, asize)) != -1ULL)
++			break;
++
++		atomic_inc_64(&mg->mg_alloc_failures);
++
++		metaslab_passivate(msp, space_map_maxsize(&msp->ms_map));
++
++		mutex_exit(&msp->ms_lock);
++	}
++
++	if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
++		vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
++
++	space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, asize);
++
++	mutex_exit(&msp->ms_lock);
++
++	return (offset);
++}
++
++/*
++ * Allocate a block for the specified i/o.
++ */
++static int
++metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
++    dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags)
++{
++	metaslab_group_t *mg, *fast_mg, *rotor;
++	vdev_t *vd;
++	int dshift = 3;
++	int all_zero;
++	int zio_lock = B_FALSE;
++	boolean_t allocatable;
++	uint64_t offset = -1ULL;
++	uint64_t asize;
++	uint64_t distance;
++
++	ASSERT(!DVA_IS_VALID(&dva[d]));
++
++	/*
++	 * For testing, make some blocks above a certain size be gang blocks.
++	 */
++	if (psize >= metaslab_gang_bang && (ddi_get_lbolt() & 3) == 0)
++		return (ENOSPC);
++
++	if (flags & METASLAB_FASTWRITE)
++		mutex_enter(&mc->mc_fastwrite_lock);
++
++	/*
++	 * Start at the rotor and loop through all mgs until we find something.
++	 * Note that there's no locking on mc_rotor or mc_aliquot because
++	 * nothing actually breaks if we miss a few updates -- we just won't
++	 * allocate quite as evenly.  It all balances out over time.
++	 *
++	 * If we are doing ditto or log blocks, try to spread them across
++	 * consecutive vdevs.  If we're forced to reuse a vdev before we've
++	 * allocated all of our ditto blocks, then try and spread them out on
++	 * that vdev as much as possible.  If it turns out to not be possible,
++	 * gradually lower our standards until anything becomes acceptable.
++	 * Also, allocating on consecutive vdevs (as opposed to random vdevs)
++	 * gives us hope of containing our fault domains to something we're
++	 * able to reason about.  Otherwise, any two top-level vdev failures
++	 * will guarantee the loss of data.  With consecutive allocation,
++	 * only two adjacent top-level vdev failures will result in data loss.
++	 *
++	 * If we are doing gang blocks (hintdva is non-NULL), try to keep
++	 * ourselves on the same vdev as our gang block header.  That
++	 * way, we can hope for locality in vdev_cache, plus it makes our
++	 * fault domains something tractable.
++	 */
++	if (hintdva) {
++		vd = vdev_lookup_top(spa, DVA_GET_VDEV(&hintdva[d]));
++
++		/*
++		 * It's possible the vdev we're using as the hint no
++		 * longer exists (i.e. removed). Consult the rotor when
++		 * all else fails.
++		 */
++		if (vd != NULL) {
++			mg = vd->vdev_mg;
++
++			if (flags & METASLAB_HINTBP_AVOID &&
++			    mg->mg_next != NULL)
++				mg = mg->mg_next;
++		} else {
++			mg = mc->mc_rotor;
++		}
++	} else if (d != 0) {
++		vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
++		mg = vd->vdev_mg->mg_next;
++	} else if (flags & METASLAB_FASTWRITE) {
++		mg = fast_mg = mc->mc_rotor;
++
++		do {
++			if (fast_mg->mg_vd->vdev_pending_fastwrite <
++			    mg->mg_vd->vdev_pending_fastwrite)
++				mg = fast_mg;
++		} while ((fast_mg = fast_mg->mg_next) != mc->mc_rotor);
++
++	} else {
++		mg = mc->mc_rotor;
++	}
++
++	/*
++	 * If the hint put us into the wrong metaslab class, or into a
++	 * metaslab group that has been passivated, just follow the rotor.
++	 */
++	if (mg->mg_class != mc || mg->mg_activation_count <= 0)
++		mg = mc->mc_rotor;
++
++	rotor = mg;
++top:
++	all_zero = B_TRUE;
++	do {
++		ASSERT(mg->mg_activation_count == 1);
++
++		vd = mg->mg_vd;
++
++		/*
++		 * Don't allocate from faulted devices.
++		 */
++		if (zio_lock) {
++			spa_config_enter(spa, SCL_ZIO, FTAG, RW_READER);
++			allocatable = vdev_allocatable(vd);
++			spa_config_exit(spa, SCL_ZIO, FTAG);
++		} else {
++			allocatable = vdev_allocatable(vd);
++		}
++		if (!allocatable)
++			goto next;
++
++		/*
++		 * Avoid writing single-copy data to a failing vdev
++		 */
++		if ((vd->vdev_stat.vs_write_errors > 0 ||
++		    vd->vdev_state < VDEV_STATE_HEALTHY) &&
++		    d == 0 && dshift == 3) {
++			all_zero = B_FALSE;
++			goto next;
++		}
++
++		ASSERT(mg->mg_class == mc);
++
++		distance = vd->vdev_asize >> dshift;
++		if (distance <= (1ULL << vd->vdev_ms_shift))
++			distance = 0;
++		else
++			all_zero = B_FALSE;
++
++		asize = vdev_psize_to_asize(vd, psize);
++		ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
++
++		offset = metaslab_group_alloc(mg, psize, asize, txg, distance,
++		    dva, d, flags);
++		if (offset != -1ULL) {
++			/*
++			 * If we've just selected this metaslab group,
++			 * figure out whether the corresponding vdev is
++			 * over- or under-used relative to the pool,
++			 * and set an allocation bias to even it out.
++			 */
++			if (mc->mc_aliquot == 0) {
++				vdev_stat_t *vs = &vd->vdev_stat;
++				int64_t vu, cu;
++
++				vu = (vs->vs_alloc * 100) / (vs->vs_space + 1);
++				cu = (mc->mc_alloc * 100) / (mc->mc_space + 1);
++
++				/*
++				 * Calculate how much more or less we should
++				 * try to allocate from this device during
++				 * this iteration around the rotor.
++				 * For example, if a device is 80% full
++				 * and the pool is 20% full then we should
++				 * reduce allocations by 60% on this device.
++				 *
++				 * mg_bias = (20 - 80) * 512K / 100 = -307K
++				 *
++				 * This reduces allocations by 307K for this
++				 * iteration.
++				 */
++				mg->mg_bias = ((cu - vu) *
++				    (int64_t)mg->mg_aliquot) / 100;
++			}
++
++			if ((flags & METASLAB_FASTWRITE) ||
++			    atomic_add_64_nv(&mc->mc_aliquot, asize) >=
++			    mg->mg_aliquot + mg->mg_bias) {
++				mc->mc_rotor = mg->mg_next;
++				mc->mc_aliquot = 0;
++			}
++
++			DVA_SET_VDEV(&dva[d], vd->vdev_id);
++			DVA_SET_OFFSET(&dva[d], offset);
++			DVA_SET_GANG(&dva[d], !!(flags & METASLAB_GANG_HEADER));
++			DVA_SET_ASIZE(&dva[d], asize);
++
++			if (flags & METASLAB_FASTWRITE) {
++				atomic_add_64(&vd->vdev_pending_fastwrite,
++				    psize);
++				mutex_exit(&mc->mc_fastwrite_lock);
++			}
++
++			return (0);
++		}
++next:
++		mc->mc_rotor = mg->mg_next;
++		mc->mc_aliquot = 0;
++	} while ((mg = mg->mg_next) != rotor);
++
++	if (!all_zero) {
++		dshift++;
++		ASSERT(dshift < 64);
++		goto top;
++	}
++
++	if (!allocatable && !zio_lock) {
++		dshift = 3;
++		zio_lock = B_TRUE;
++		goto top;
++	}
++
++	bzero(&dva[d], sizeof (dva_t));
++
++	if (flags & METASLAB_FASTWRITE)
++		mutex_exit(&mc->mc_fastwrite_lock);
++	return (ENOSPC);
++}
++
++/*
++ * Free the block represented by DVA in the context of the specified
++ * transaction group.
++ */
++static void
++metaslab_free_dva(spa_t *spa, const dva_t *dva, uint64_t txg, boolean_t now)
++{
++	uint64_t vdev = DVA_GET_VDEV(dva);
++	uint64_t offset = DVA_GET_OFFSET(dva);
++	uint64_t size = DVA_GET_ASIZE(dva);
++	vdev_t *vd;
++	metaslab_t *msp;
++
++	ASSERT(DVA_IS_VALID(dva));
++
++	if (txg > spa_freeze_txg(spa))
++		return;
++
++	if ((vd = vdev_lookup_top(spa, vdev)) == NULL ||
++	    (offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) {
++		cmn_err(CE_WARN, "metaslab_free_dva(): bad DVA %llu:%llu",
++		    (u_longlong_t)vdev, (u_longlong_t)offset);
++		ASSERT(0);
++		return;
++	}
++
++	msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
++
++	if (DVA_GET_GANG(dva))
++		size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
++
++	mutex_enter(&msp->ms_lock);
++
++	if (now) {
++		space_map_remove(&msp->ms_allocmap[txg & TXG_MASK],
++		    offset, size);
++		space_map_free(&msp->ms_map, offset, size);
++	} else {
++		if (msp->ms_freemap[txg & TXG_MASK].sm_space == 0)
++			vdev_dirty(vd, VDD_METASLAB, msp, txg);
++		space_map_add(&msp->ms_freemap[txg & TXG_MASK], offset, size);
++	}
++
++	mutex_exit(&msp->ms_lock);
++}
++
++/*
++ * Intent log support: upon opening the pool after a crash, notify the SPA
++ * of blocks that the intent log has allocated for immediate write, but
++ * which are still considered free by the SPA because the last transaction
++ * group didn't commit yet.
++ */
++static int
++metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
++{
++	uint64_t vdev = DVA_GET_VDEV(dva);
++	uint64_t offset = DVA_GET_OFFSET(dva);
++	uint64_t size = DVA_GET_ASIZE(dva);
++	vdev_t *vd;
++	metaslab_t *msp;
++	int error = 0;
++
++	ASSERT(DVA_IS_VALID(dva));
++
++	if ((vd = vdev_lookup_top(spa, vdev)) == NULL ||
++	    (offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count)
++		return (ENXIO);
++
++	msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
++
++	if (DVA_GET_GANG(dva))
++		size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
++
++	mutex_enter(&msp->ms_lock);
++
++	if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded)
++		error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY);
++
++	if (error == 0 && !space_map_contains(&msp->ms_map, offset, size))
++		error = ENOENT;
++
++	if (error || txg == 0) {	/* txg == 0 indicates dry run */
++		mutex_exit(&msp->ms_lock);
++		return (error);
++	}
++
++	space_map_claim(&msp->ms_map, offset, size);
++
++	if (spa_writeable(spa)) {	/* don't dirty if we're zdb(1M) */
++		if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
++			vdev_dirty(vd, VDD_METASLAB, msp, txg);
++		space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
++	}
++
++	mutex_exit(&msp->ms_lock);
++
++	return (0);
++}
++
++int
++metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp,
++    int ndvas, uint64_t txg, blkptr_t *hintbp, int flags)
++{
++	dva_t *dva = bp->blk_dva;
++	dva_t *hintdva = hintbp->blk_dva;
++	int d, error = 0;
++
++	ASSERT(bp->blk_birth == 0);
++	ASSERT(BP_PHYSICAL_BIRTH(bp) == 0);
++
++	spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER);
++
++	if (mc->mc_rotor == NULL) {	/* no vdevs in this class */
++		spa_config_exit(spa, SCL_ALLOC, FTAG);
++		return (ENOSPC);
++	}
++
++	ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa));
++	ASSERT(BP_GET_NDVAS(bp) == 0);
++	ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp));
++
++	for (d = 0; d < ndvas; d++) {
++		error = metaslab_alloc_dva(spa, mc, psize, dva, d, hintdva,
++		    txg, flags);
++		if (error) {
++			for (d--; d >= 0; d--) {
++				metaslab_free_dva(spa, &dva[d], txg, B_TRUE);
++				bzero(&dva[d], sizeof (dva_t));
++			}
++			spa_config_exit(spa, SCL_ALLOC, FTAG);
++			return (error);
++		}
++	}
++	ASSERT(error == 0);
++	ASSERT(BP_GET_NDVAS(bp) == ndvas);
++
++	spa_config_exit(spa, SCL_ALLOC, FTAG);
++
++	BP_SET_BIRTH(bp, txg, txg);
++
++	return (0);
++}
++
++void
++metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now)
++{
++	const dva_t *dva = bp->blk_dva;
++	int d, ndvas = BP_GET_NDVAS(bp);
++
++	ASSERT(!BP_IS_HOLE(bp));
++	ASSERT(!now || bp->blk_birth >= spa_syncing_txg(spa));
++
++	spa_config_enter(spa, SCL_FREE, FTAG, RW_READER);
++
++	for (d = 0; d < ndvas; d++)
++		metaslab_free_dva(spa, &dva[d], txg, now);
++
++	spa_config_exit(spa, SCL_FREE, FTAG);
++}
++
++int
++metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
++{
++	const dva_t *dva = bp->blk_dva;
++	int ndvas = BP_GET_NDVAS(bp);
++	int d, error = 0;
++
++	ASSERT(!BP_IS_HOLE(bp));
++
++	if (txg != 0) {
++		/*
++		 * First do a dry run to make sure all DVAs are claimable,
++		 * so we don't have to unwind from partial failures below.
++		 */
++		if ((error = metaslab_claim(spa, bp, 0)) != 0)
++			return (error);
++	}
++
++	spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER);
++
++	for (d = 0; d < ndvas; d++)
++		if ((error = metaslab_claim_dva(spa, &dva[d], txg)) != 0)
++			break;
++
++	spa_config_exit(spa, SCL_ALLOC, FTAG);
++
++	ASSERT(error == 0 || txg == 0);
++
++	return (error);
++}
++
++void metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp)
++{
++	const dva_t *dva = bp->blk_dva;
++	int ndvas = BP_GET_NDVAS(bp);
++	uint64_t psize = BP_GET_PSIZE(bp);
++	int d;
++	vdev_t *vd;
++
++	ASSERT(!BP_IS_HOLE(bp));
++	ASSERT(psize > 0);
++
++	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
++
++	for (d = 0; d < ndvas; d++) {
++		if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
++			continue;
++		atomic_add_64(&vd->vdev_pending_fastwrite, psize);
++	}
++
++	spa_config_exit(spa, SCL_VDEV, FTAG);
++}
++
++void metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp)
++{
++	const dva_t *dva = bp->blk_dva;
++	int ndvas = BP_GET_NDVAS(bp);
++	uint64_t psize = BP_GET_PSIZE(bp);
++	int d;
++	vdev_t *vd;
++
++	ASSERT(!BP_IS_HOLE(bp));
++	ASSERT(psize > 0);
++
++	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
++
++	for (d = 0; d < ndvas; d++) {
++		if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
++			continue;
++		ASSERT3U(vd->vdev_pending_fastwrite, >=, psize);
++		atomic_sub_64(&vd->vdev_pending_fastwrite, psize);
++	}
++
++	spa_config_exit(spa, SCL_VDEV, FTAG);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/refcount.c linux-3.2.33-go/fs/zfs/zfs/refcount.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/refcount.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/refcount.c	2012-11-16 23:25:34.349039334 +0100
+@@ -0,0 +1,223 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/refcount.h>
++
++#ifdef	ZFS_DEBUG
++
++#ifdef _KERNEL
++int reference_tracking_enable = FALSE; /* runs out of memory too easily */
++#else
++int reference_tracking_enable = TRUE;
++#endif
++int reference_history = 4; /* tunable */
++
++static kmem_cache_t *reference_cache;
++static kmem_cache_t *reference_history_cache;
++
++void
++refcount_init(void)
++{
++	reference_cache = kmem_cache_create("reference_cache",
++	    sizeof (reference_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
++
++	reference_history_cache = kmem_cache_create("reference_history_cache",
++	    sizeof (uint64_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
++}
++
++void
++refcount_fini(void)
++{
++	kmem_cache_destroy(reference_cache);
++	kmem_cache_destroy(reference_history_cache);
++}
++
++void
++refcount_create(refcount_t *rc)
++{
++	mutex_init(&rc->rc_mtx, NULL, MUTEX_DEFAULT, NULL);
++	list_create(&rc->rc_list, sizeof (reference_t),
++	    offsetof(reference_t, ref_link));
++	list_create(&rc->rc_removed, sizeof (reference_t),
++	    offsetof(reference_t, ref_link));
++	rc->rc_count = 0;
++	rc->rc_removed_count = 0;
++}
++
++void
++refcount_destroy_many(refcount_t *rc, uint64_t number)
++{
++	reference_t *ref;
++
++	ASSERT(rc->rc_count == number);
++	while ((ref = list_head(&rc->rc_list))) {
++		list_remove(&rc->rc_list, ref);
++		kmem_cache_free(reference_cache, ref);
++	}
++	list_destroy(&rc->rc_list);
++
++	while ((ref = list_head(&rc->rc_removed))) {
++		list_remove(&rc->rc_removed, ref);
++		kmem_cache_free(reference_history_cache, ref->ref_removed);
++		kmem_cache_free(reference_cache, ref);
++	}
++	list_destroy(&rc->rc_removed);
++	mutex_destroy(&rc->rc_mtx);
++}
++
++void
++refcount_destroy(refcount_t *rc)
++{
++	refcount_destroy_many(rc, 0);
++}
++
++int
++refcount_is_zero(refcount_t *rc)
++{
++	ASSERT(rc->rc_count >= 0);
++	return (rc->rc_count == 0);
++}
++
++int64_t
++refcount_count(refcount_t *rc)
++{
++	ASSERT(rc->rc_count >= 0);
++	return (rc->rc_count);
++}
++
++int64_t
++refcount_add_many(refcount_t *rc, uint64_t number, void *holder)
++{
++	reference_t *ref = NULL;
++	int64_t count;
++
++	if (reference_tracking_enable) {
++		ref = kmem_cache_alloc(reference_cache, KM_PUSHPAGE);
++		ref->ref_holder = holder;
++		ref->ref_number = number;
++	}
++	mutex_enter(&rc->rc_mtx);
++	ASSERT(rc->rc_count >= 0);
++	if (reference_tracking_enable)
++		list_insert_head(&rc->rc_list, ref);
++	rc->rc_count += number;
++	count = rc->rc_count;
++	mutex_exit(&rc->rc_mtx);
++
++	return (count);
++}
++
++int64_t
++refcount_add(refcount_t *rc, void *holder)
++{
++	return (refcount_add_many(rc, 1, holder));
++}
++
++int64_t
++refcount_remove_many(refcount_t *rc, uint64_t number, void *holder)
++{
++	reference_t *ref;
++	int64_t count;
++
++	mutex_enter(&rc->rc_mtx);
++	ASSERT(rc->rc_count >= number);
++
++	if (!reference_tracking_enable) {
++		rc->rc_count -= number;
++		count = rc->rc_count;
++		mutex_exit(&rc->rc_mtx);
++		return (count);
++	}
++
++	for (ref = list_head(&rc->rc_list); ref;
++	    ref = list_next(&rc->rc_list, ref)) {
++		if (ref->ref_holder == holder && ref->ref_number == number) {
++			list_remove(&rc->rc_list, ref);
++			if (reference_history > 0) {
++				ref->ref_removed =
++				    kmem_cache_alloc(reference_history_cache,
++				    KM_PUSHPAGE);
++				list_insert_head(&rc->rc_removed, ref);
++				rc->rc_removed_count++;
++				if (rc->rc_removed_count >= reference_history) {
++					ref = list_tail(&rc->rc_removed);
++					list_remove(&rc->rc_removed, ref);
++					kmem_cache_free(reference_history_cache,
++					    ref->ref_removed);
++					kmem_cache_free(reference_cache, ref);
++					rc->rc_removed_count--;
++				}
++			} else {
++				kmem_cache_free(reference_cache, ref);
++			}
++			rc->rc_count -= number;
++			count = rc->rc_count;
++			mutex_exit(&rc->rc_mtx);
++			return (count);
++		}
++	}
++	panic("No such hold %p on refcount %llx", holder,
++	    (u_longlong_t)(uintptr_t)rc);
++	return (-1);
++}
++
++int64_t
++refcount_remove(refcount_t *rc, void *holder)
++{
++	return (refcount_remove_many(rc, 1, holder));
++}
++
++void
++refcount_transfer(refcount_t *dst, refcount_t *src)
++{
++	int64_t count, removed_count;
++	list_t list, removed;
++
++	list_create(&list, sizeof (reference_t),
++	    offsetof(reference_t, ref_link));
++	list_create(&removed, sizeof (reference_t),
++	    offsetof(reference_t, ref_link));
++
++	mutex_enter(&src->rc_mtx);
++	count = src->rc_count;
++	removed_count = src->rc_removed_count;
++	src->rc_count = 0;
++	src->rc_removed_count = 0;
++	list_move_tail(&list, &src->rc_list);
++	list_move_tail(&removed, &src->rc_removed);
++	mutex_exit(&src->rc_mtx);
++
++	mutex_enter(&dst->rc_mtx);
++	dst->rc_count += count;
++	dst->rc_removed_count += removed_count;
++	list_move_tail(&dst->rc_list, &list);
++	list_move_tail(&dst->rc_removed, &removed);
++	mutex_exit(&dst->rc_mtx);
++
++	list_destroy(&list);
++	list_destroy(&removed);
++}
++
++#endif	/* ZFS_DEBUG */
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/rrwlock.c linux-3.2.33-go/fs/zfs/zfs/rrwlock.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/rrwlock.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/rrwlock.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,264 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#include <sys/refcount.h>
++#include <sys/rrwlock.h>
++
++/*
++ * This file contains the implementation of a re-entrant read
++ * reader/writer lock (aka "rrwlock").
++ *
++ * This is a normal reader/writer lock with the additional feature
++ * of allowing threads who have already obtained a read lock to
++ * re-enter another read lock (re-entrant read) - even if there are
++ * waiting writers.
++ *
++ * Callers who have not obtained a read lock give waiting writers priority.
++ *
++ * The rrwlock_t lock does not allow re-entrant writers, nor does it
++ * allow a re-entrant mix of reads and writes (that is, it does not
++ * allow a caller who has already obtained a read lock to be able to
++ * then grab a write lock without first dropping all read locks, and
++ * vice versa).
++ *
++ * The rrwlock_t uses tsd (thread specific data) to keep a list of
++ * nodes (rrw_node_t), where each node keeps track of which specific
++ * lock (rrw_node_t::rn_rrl) the thread has grabbed.  Since re-entering
++ * should be rare, a thread that grabs multiple reads on the same rrwlock_t
++ * will store multiple rrw_node_ts of the same 'rrn_rrl'. Nodes on the
++ * tsd list can represent a different rrwlock_t.  This allows a thread
++ * to enter multiple and unique rrwlock_ts for read locks at the same time.
++ *
++ * Since using tsd exposes some overhead, the rrwlock_t only needs to
++ * keep tsd data when writers are waiting.  If no writers are waiting, then
++ * a reader just bumps the anonymous read count (rr_anon_rcount) - no tsd
++ * is needed.  Once a writer attempts to grab the lock, readers then
++ * keep tsd data and bump the linked readers count (rr_linked_rcount).
++ *
++ * If there are waiting writers and there are anonymous readers, then a
++ * reader doesn't know if it is a re-entrant lock. But since it may be one,
++ * we allow the read to proceed (otherwise it could deadlock).  Since once
++ * waiting writers are active, readers no longer bump the anonymous count,
++ * the anonymous readers will eventually flush themselves out.  At this point,
++ * readers will be able to tell if they are a re-entrant lock (have a
++ * rrw_node_t entry for the lock) or not. If they are a re-entrant lock, then
++ * we must let the proceed.  If they are not, then the reader blocks for the
++ * waiting writers.  Hence, we do not starve writers.
++ */
++
++/* global key for TSD */
++uint_t rrw_tsd_key;
++
++typedef struct rrw_node {
++	struct rrw_node	*rn_next;
++	rrwlock_t	*rn_rrl;
++} rrw_node_t;
++
++static rrw_node_t *
++rrn_find(rrwlock_t *rrl)
++{
++	rrw_node_t *rn;
++
++	if (refcount_count(&rrl->rr_linked_rcount) == 0)
++		return (NULL);
++
++	for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) {
++		if (rn->rn_rrl == rrl)
++			return (rn);
++	}
++	return (NULL);
++}
++
++/*
++ * Add a node to the head of the singly linked list.
++ */
++static void
++rrn_add(rrwlock_t *rrl)
++{
++	rrw_node_t *rn;
++
++	rn = kmem_alloc(sizeof (*rn), KM_SLEEP);
++	rn->rn_rrl = rrl;
++	rn->rn_next = tsd_get(rrw_tsd_key);
++	VERIFY(tsd_set(rrw_tsd_key, rn) == 0);
++}
++
++/*
++ * If a node is found for 'rrl', then remove the node from this
++ * thread's list and return TRUE; otherwise return FALSE.
++ */
++static boolean_t
++rrn_find_and_remove(rrwlock_t *rrl)
++{
++	rrw_node_t *rn;
++	rrw_node_t *prev = NULL;
++
++	if (refcount_count(&rrl->rr_linked_rcount) == 0)
++		return (B_FALSE);
++
++	for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) {
++		if (rn->rn_rrl == rrl) {
++			if (prev)
++				prev->rn_next = rn->rn_next;
++			else
++				VERIFY(tsd_set(rrw_tsd_key, rn->rn_next) == 0);
++			kmem_free(rn, sizeof (*rn));
++			return (B_TRUE);
++		}
++		prev = rn;
++	}
++	return (B_FALSE);
++}
++
++void
++rrw_init(rrwlock_t *rrl)
++{
++	mutex_init(&rrl->rr_lock, NULL, MUTEX_DEFAULT, NULL);
++	cv_init(&rrl->rr_cv, NULL, CV_DEFAULT, NULL);
++	rrl->rr_writer = NULL;
++	refcount_create(&rrl->rr_anon_rcount);
++	refcount_create(&rrl->rr_linked_rcount);
++	rrl->rr_writer_wanted = B_FALSE;
++}
++
++void
++rrw_destroy(rrwlock_t *rrl)
++{
++	mutex_destroy(&rrl->rr_lock);
++	cv_destroy(&rrl->rr_cv);
++	ASSERT(rrl->rr_writer == NULL);
++	refcount_destroy(&rrl->rr_anon_rcount);
++	refcount_destroy(&rrl->rr_linked_rcount);
++}
++
++static void
++rrw_enter_read(rrwlock_t *rrl, void *tag)
++{
++	mutex_enter(&rrl->rr_lock);
++#if !defined(DEBUG) && defined(_KERNEL)
++	if (!rrl->rr_writer && !rrl->rr_writer_wanted) {
++		rrl->rr_anon_rcount.rc_count++;
++		mutex_exit(&rrl->rr_lock);
++		return;
++	}
++	DTRACE_PROBE(zfs__rrwfastpath__rdmiss);
++#endif
++	ASSERT(rrl->rr_writer != curthread);
++	ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0);
++
++	while (rrl->rr_writer || (rrl->rr_writer_wanted &&
++	    refcount_is_zero(&rrl->rr_anon_rcount) &&
++	    rrn_find(rrl) == NULL))
++		cv_wait(&rrl->rr_cv, &rrl->rr_lock);
++
++	if (rrl->rr_writer_wanted) {
++		/* may or may not be a re-entrant enter */
++		rrn_add(rrl);
++		(void) refcount_add(&rrl->rr_linked_rcount, tag);
++	} else {
++		(void) refcount_add(&rrl->rr_anon_rcount, tag);
++	}
++	ASSERT(rrl->rr_writer == NULL);
++	mutex_exit(&rrl->rr_lock);
++}
++
++static void
++rrw_enter_write(rrwlock_t *rrl)
++{
++	mutex_enter(&rrl->rr_lock);
++	ASSERT(rrl->rr_writer != curthread);
++
++	while (refcount_count(&rrl->rr_anon_rcount) > 0 ||
++	    refcount_count(&rrl->rr_linked_rcount) > 0 ||
++	    rrl->rr_writer != NULL) {
++		rrl->rr_writer_wanted = B_TRUE;
++		cv_wait(&rrl->rr_cv, &rrl->rr_lock);
++	}
++	rrl->rr_writer_wanted = B_FALSE;
++	rrl->rr_writer = curthread;
++	mutex_exit(&rrl->rr_lock);
++}
++
++void
++rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag)
++{
++	if (rw == RW_READER)
++		rrw_enter_read(rrl, tag);
++	else
++		rrw_enter_write(rrl);
++}
++
++void
++rrw_exit(rrwlock_t *rrl, void *tag)
++{
++	mutex_enter(&rrl->rr_lock);
++#if !defined(DEBUG) && defined(_KERNEL)
++	if (!rrl->rr_writer && rrl->rr_linked_rcount.rc_count == 0) {
++		rrl->rr_anon_rcount.rc_count--;
++		if (rrl->rr_anon_rcount.rc_count == 0)
++			cv_broadcast(&rrl->rr_cv);
++		mutex_exit(&rrl->rr_lock);
++		return;
++	}
++	DTRACE_PROBE(zfs__rrwfastpath__exitmiss);
++#endif
++	ASSERT(!refcount_is_zero(&rrl->rr_anon_rcount) ||
++	    !refcount_is_zero(&rrl->rr_linked_rcount) ||
++	    rrl->rr_writer != NULL);
++
++	if (rrl->rr_writer == NULL) {
++		int64_t count;
++		if (rrn_find_and_remove(rrl))
++			count = refcount_remove(&rrl->rr_linked_rcount, tag);
++		else
++			count = refcount_remove(&rrl->rr_anon_rcount, tag);
++		if (count == 0)
++			cv_broadcast(&rrl->rr_cv);
++	} else {
++		ASSERT(rrl->rr_writer == curthread);
++		ASSERT(refcount_is_zero(&rrl->rr_anon_rcount) &&
++		    refcount_is_zero(&rrl->rr_linked_rcount));
++		rrl->rr_writer = NULL;
++		cv_broadcast(&rrl->rr_cv);
++	}
++	mutex_exit(&rrl->rr_lock);
++}
++
++boolean_t
++rrw_held(rrwlock_t *rrl, krw_t rw)
++{
++	boolean_t held;
++
++	mutex_enter(&rrl->rr_lock);
++	if (rw == RW_WRITER) {
++		held = (rrl->rr_writer == curthread);
++	} else {
++		held = (!refcount_is_zero(&rrl->rr_anon_rcount) ||
++		    !refcount_is_zero(&rrl->rr_linked_rcount));
++	}
++	mutex_exit(&rrl->rr_lock);
++
++	return (held);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/sa.c linux-3.2.33-go/fs/zfs/zfs/sa.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/sa.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/sa.c	2012-11-16 23:25:34.352039300 +0100
+@@ -0,0 +1,2060 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/types.h>
++#include <sys/param.h>
++#include <sys/systm.h>
++#include <sys/sysmacros.h>
++#include <sys/dmu.h>
++#include <sys/dmu_impl.h>
++#include <sys/dmu_objset.h>
++#include <sys/dbuf.h>
++#include <sys/dnode.h>
++#include <sys/zap.h>
++#include <sys/sa.h>
++#include <sys/sunddi.h>
++#include <sys/sa_impl.h>
++#include <sys/dnode.h>
++#include <sys/errno.h>
++#include <sys/zfs_context.h>
++
++/*
++ * ZFS System attributes:
++ *
++ * A generic mechanism to allow for arbitrary attributes
++ * to be stored in a dnode.  The data will be stored in the bonus buffer of
++ * the dnode and if necessary a special "spill" block will be used to handle
++ * overflow situations.  The spill block will be sized to fit the data
++ * from 512 - 128K.  When a spill block is used the BP (blkptr_t) for the
++ * spill block is stored at the end of the current bonus buffer.  Any
++ * attributes that would be in the way of the blkptr_t will be relocated
++ * into the spill block.
++ *
++ * Attribute registration:
++ *
++ * Stored persistently on a per dataset basis
++ * a mapping between attribute "string" names and their actual attribute
++ * numeric values, length, and byteswap function.  The names are only used
++ * during registration.  All  attributes are known by their unique attribute
++ * id value.  If an attribute can have a variable size then the value
++ * 0 will be used to indicate this.
++ *
++ * Attribute Layout:
++ *
++ * Attribute layouts are a way to compactly store multiple attributes, but
++ * without taking the overhead associated with managing each attribute
++ * individually.  Since you will typically have the same set of attributes
++ * stored in the same order a single table will be used to represent that
++ * layout.  The ZPL for example will usually have only about 10 different
++ * layouts (regular files, device files, symlinks,
++ * regular files + scanstamp, files/dir with extended attributes, and then
++ * you have the possibility of all of those minus ACL, because it would
++ * be kicked out into the spill block)
++ *
++ * Layouts are simply an array of the attributes and their
++ * ordering i.e. [0, 1, 4, 5, 2]
++ *
++ * Each distinct layout is given a unique layout number and that is whats
++ * stored in the header at the beginning of the SA data buffer.
++ *
++ * A layout only covers a single dbuf (bonus or spill).  If a set of
++ * attributes is split up between the bonus buffer and a spill buffer then
++ * two different layouts will be used.  This allows us to byteswap the
++ * spill without looking at the bonus buffer and keeps the on disk format of
++ * the bonus and spill buffer the same.
++ *
++ * Adding a single attribute will cause the entire set of attributes to
++ * be rewritten and could result in a new layout number being constructed
++ * as part of the rewrite if no such layout exists for the new set of
++ * attribues.  The new attribute will be appended to the end of the already
++ * existing attributes.
++ *
++ * Both the attribute registration and attribute layout information are
++ * stored in normal ZAP attributes.  Their should be a small number of
++ * known layouts and the set of attributes is assumed to typically be quite
++ * small.
++ *
++ * The registered attributes and layout "table" information is maintained
++ * in core and a special "sa_os_t" is attached to the objset_t.
++ *
++ * A special interface is provided to allow for quickly applying
++ * a large set of attributes at once.  sa_replace_all_by_template() is
++ * used to set an array of attributes.  This is used by the ZPL when
++ * creating a brand new file.  The template that is passed into the function
++ * specifies the attribute, size for variable length attributes, location of
++ * data and special "data locator" function if the data isn't in a contiguous
++ * location.
++ *
++ * Byteswap implications:
++ * Since the SA attributes are not entirely self describing we can't do
++ * the normal byteswap processing.  The special ZAP layout attribute and
++ * attribute registration attributes define the byteswap function and the
++ * size of the attributes, unless it is variable sized.
++ * The normal ZFS byteswapping infrastructure assumes you don't need
++ * to read any objects in order to do the necessary byteswapping.  Whereas
++ * SA attributes can only be properly byteswapped if the dataset is opened
++ * and the layout/attribute ZAP attributes are available.  Because of this
++ * the SA attributes will be byteswapped when they are first accessed by
++ * the SA code that will read the SA data.
++ */
++
++typedef void (sa_iterfunc_t)(void *hdr, void *addr, sa_attr_type_t,
++    uint16_t length, int length_idx, boolean_t, void *userp);
++
++static int sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype);
++static void sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab);
++static void *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype,
++    void *data);
++static void sa_idx_tab_rele(objset_t *os, void *arg);
++static void sa_copy_data(sa_data_locator_t *func, void *start, void *target,
++    int buflen);
++static int sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
++    sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
++    uint16_t buflen, dmu_tx_t *tx);
++
++arc_byteswap_func_t *sa_bswap_table[] = {
++	byteswap_uint64_array,
++	byteswap_uint32_array,
++	byteswap_uint16_array,
++	byteswap_uint8_array,
++	zfs_acl_byteswap,
++};
++
++#define	SA_COPY_DATA(f, s, t, l) \
++	{ \
++		if (f == NULL) { \
++			if (l == 8) { \
++				*(uint64_t *)t = *(uint64_t *)s; \
++			} else if (l == 16) { \
++				*(uint64_t *)t = *(uint64_t *)s; \
++				*(uint64_t *)((uintptr_t)t + 8) = \
++				    *(uint64_t *)((uintptr_t)s + 8); \
++			} else { \
++				bcopy(s, t, l); \
++			} \
++		} else \
++			sa_copy_data(f, s, t, l); \
++	}
++
++/*
++ * This table is fixed and cannot be changed.  Its purpose is to
++ * allow the SA code to work with both old/new ZPL file systems.
++ * It contains the list of legacy attributes.  These attributes aren't
++ * stored in the "attribute" registry zap objects, since older ZPL file systems
++ * won't have the registry.  Only objsets of type ZFS_TYPE_FILESYSTEM will
++ * use this static table.
++ */
++sa_attr_reg_t sa_legacy_attrs[] = {
++	{"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0},
++	{"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1},
++	{"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2},
++	{"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3},
++	{"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4},
++	{"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5},
++	{"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6},
++	{"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7},
++	{"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8},
++	{"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9},
++	{"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10},
++	{"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11},
++	{"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12},
++	{"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13},
++	{"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14},
++	{"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15},
++};
++
++/*
++ * ZPL legacy layout
++ * This is only used for objects of type DMU_OT_ZNODE
++ */
++sa_attr_type_t sa_legacy_zpl_layout[] = {
++    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
++};
++
++/*
++ * Special dummy layout used for buffers with no attributes.
++ */
++
++sa_attr_type_t sa_dummy_zpl_layout[] = { 0 };
++
++static int sa_legacy_attr_count = 16;
++static kmem_cache_t *sa_cache = NULL;
++static kmem_cache_t *spill_cache = NULL;
++
++/*ARGSUSED*/
++static int
++sa_cache_constructor(void *buf, void *unused, int kmflag)
++{
++	sa_handle_t *hdl = buf;
++
++	hdl->sa_bonus_tab = NULL;
++	hdl->sa_spill_tab = NULL;
++	hdl->sa_os = NULL;
++	hdl->sa_userp = NULL;
++	hdl->sa_bonus = NULL;
++	hdl->sa_spill = NULL;
++	mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL);
++	return (0);
++}
++
++/*ARGSUSED*/
++static void
++sa_cache_destructor(void *buf, void *unused)
++{
++	sa_handle_t *hdl = buf;
++	mutex_destroy(&hdl->sa_lock);
++}
++
++void
++sa_cache_init(void)
++{
++	sa_cache = kmem_cache_create("sa_cache",
++	    sizeof (sa_handle_t), 0, sa_cache_constructor,
++	    sa_cache_destructor, NULL, NULL, NULL, 0);
++	spill_cache = kmem_cache_create("spill_cache",
++	    SPA_MAXBLOCKSIZE, 0, NULL, NULL, NULL, NULL, NULL, 0);
++}
++
++void
++sa_cache_fini(void)
++{
++	if (sa_cache)
++		kmem_cache_destroy(sa_cache);
++
++	if (spill_cache)
++		kmem_cache_destroy(spill_cache);
++}
++
++void *
++sa_spill_alloc(int flags)
++{
++	return kmem_cache_alloc(spill_cache, flags);
++}
++
++void
++sa_spill_free(void *obj)
++{
++	kmem_cache_free(spill_cache, obj);
++}
++
++static int
++layout_num_compare(const void *arg1, const void *arg2)
++{
++	const sa_lot_t *node1 = arg1;
++	const sa_lot_t *node2 = arg2;
++
++	if (node1->lot_num > node2->lot_num)
++		return (1);
++	else if (node1->lot_num < node2->lot_num)
++		return (-1);
++	return (0);
++}
++
++static int
++layout_hash_compare(const void *arg1, const void *arg2)
++{
++	const sa_lot_t *node1 = arg1;
++	const sa_lot_t *node2 = arg2;
++
++	if (node1->lot_hash > node2->lot_hash)
++		return (1);
++	if (node1->lot_hash < node2->lot_hash)
++		return (-1);
++	if (node1->lot_instance > node2->lot_instance)
++		return (1);
++	if (node1->lot_instance < node2->lot_instance)
++		return (-1);
++	return (0);
++}
++
++boolean_t
++sa_layout_equal(sa_lot_t *tbf, sa_attr_type_t *attrs, int count)
++{
++	int i;
++
++	if (count != tbf->lot_attr_count)
++		return (1);
++
++	for (i = 0; i != count; i++) {
++		if (attrs[i] != tbf->lot_attrs[i])
++			return (1);
++	}
++	return (0);
++}
++
++#define	SA_ATTR_HASH(attr) (zfs_crc64_table[(-1ULL ^ attr) & 0xFF])
++
++static uint64_t
++sa_layout_info_hash(sa_attr_type_t *attrs, int attr_count)
++{
++	int i;
++	uint64_t crc = -1ULL;
++
++	for (i = 0; i != attr_count; i++)
++		crc ^= SA_ATTR_HASH(attrs[i]);
++
++	return (crc);
++}
++
++static int
++sa_get_spill(sa_handle_t *hdl)
++{
++	int rc;
++	if (hdl->sa_spill == NULL) {
++		if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL,
++		    &hdl->sa_spill)) == 0)
++			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
++	} else {
++		rc = 0;
++	}
++
++	return (rc);
++}
++
++/*
++ * Main attribute lookup/update function
++ * returns 0 for success or non zero for failures
++ *
++ * Operates on bulk array, first failure will abort further processing
++ */
++int
++sa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
++    sa_data_op_t data_op, dmu_tx_t *tx)
++{
++	sa_os_t *sa = hdl->sa_os->os_sa;
++	int i;
++	int error = 0;
++	sa_buf_type_t buftypes;
++
++	buftypes = 0;
++
++	ASSERT(count > 0);
++	for (i = 0; i != count; i++) {
++		ASSERT(bulk[i].sa_attr <= hdl->sa_os->os_sa->sa_num_attrs);
++
++		bulk[i].sa_addr = NULL;
++		/* First check the bonus buffer */
++
++		if (hdl->sa_bonus_tab && TOC_ATTR_PRESENT(
++		    hdl->sa_bonus_tab->sa_idx_tab[bulk[i].sa_attr])) {
++			SA_ATTR_INFO(sa, hdl->sa_bonus_tab,
++			    SA_GET_HDR(hdl, SA_BONUS),
++			    bulk[i].sa_attr, bulk[i], SA_BONUS, hdl);
++			if (tx && !(buftypes & SA_BONUS)) {
++				dmu_buf_will_dirty(hdl->sa_bonus, tx);
++				buftypes |= SA_BONUS;
++			}
++		}
++		if (bulk[i].sa_addr == NULL &&
++		    ((error = sa_get_spill(hdl)) == 0)) {
++			if (TOC_ATTR_PRESENT(
++			    hdl->sa_spill_tab->sa_idx_tab[bulk[i].sa_attr])) {
++				SA_ATTR_INFO(sa, hdl->sa_spill_tab,
++				    SA_GET_HDR(hdl, SA_SPILL),
++				    bulk[i].sa_attr, bulk[i], SA_SPILL, hdl);
++				if (tx && !(buftypes & SA_SPILL) &&
++				    bulk[i].sa_size == bulk[i].sa_length) {
++					dmu_buf_will_dirty(hdl->sa_spill, tx);
++					buftypes |= SA_SPILL;
++				}
++			}
++		}
++		if (error && error != ENOENT) {
++			return ((error == ECKSUM) ? EIO : error);
++		}
++
++		switch (data_op) {
++		case SA_LOOKUP:
++			if (bulk[i].sa_addr == NULL)
++				return (ENOENT);
++			if (bulk[i].sa_data) {
++				SA_COPY_DATA(bulk[i].sa_data_func,
++				    bulk[i].sa_addr, bulk[i].sa_data,
++				    bulk[i].sa_size);
++			}
++			continue;
++
++		case SA_UPDATE:
++			/* existing rewrite of attr */
++			if (bulk[i].sa_addr &&
++			    bulk[i].sa_size == bulk[i].sa_length) {
++				SA_COPY_DATA(bulk[i].sa_data_func,
++				    bulk[i].sa_data, bulk[i].sa_addr,
++				    bulk[i].sa_length);
++				continue;
++			} else if (bulk[i].sa_addr) { /* attr size change */
++				error = sa_modify_attrs(hdl, bulk[i].sa_attr,
++				    SA_REPLACE, bulk[i].sa_data_func,
++				    bulk[i].sa_data, bulk[i].sa_length, tx);
++			} else { /* adding new attribute */
++				error = sa_modify_attrs(hdl, bulk[i].sa_attr,
++				    SA_ADD, bulk[i].sa_data_func,
++				    bulk[i].sa_data, bulk[i].sa_length, tx);
++			}
++			if (error)
++				return (error);
++			break;
++		default:
++			break;
++		}
++	}
++	return (error);
++}
++
++static sa_lot_t *
++sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count,
++    uint64_t lot_num, uint64_t hash, boolean_t zapadd, dmu_tx_t *tx)
++{
++	sa_os_t *sa = os->os_sa;
++	sa_lot_t *tb, *findtb;
++	int i;
++	avl_index_t loc;
++
++	ASSERT(MUTEX_HELD(&sa->sa_lock));
++	tb = kmem_zalloc(sizeof (sa_lot_t), KM_PUSHPAGE);
++	tb->lot_attr_count = attr_count;
++	tb->lot_attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
++	    KM_PUSHPAGE);
++	bcopy(attrs, tb->lot_attrs, sizeof (sa_attr_type_t) * attr_count);
++	tb->lot_num = lot_num;
++	tb->lot_hash = hash;
++	tb->lot_instance = 0;
++
++	if (zapadd) {
++		char attr_name[8];
++
++		if (sa->sa_layout_attr_obj == 0) {
++			sa->sa_layout_attr_obj = zap_create(os,
++			    DMU_OT_SA_ATTR_LAYOUTS, DMU_OT_NONE, 0, tx);
++			VERIFY(zap_add(os, sa->sa_master_obj, SA_LAYOUTS, 8, 1,
++			    &sa->sa_layout_attr_obj, tx) == 0);
++		}
++
++		(void) snprintf(attr_name, sizeof (attr_name),
++		    "%d", (int)lot_num);
++		VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj,
++		    attr_name, 2, attr_count, attrs, tx));
++	}
++
++	list_create(&tb->lot_idx_tab, sizeof (sa_idx_tab_t),
++	    offsetof(sa_idx_tab_t, sa_next));
++
++	for (i = 0; i != attr_count; i++) {
++		if (sa->sa_attr_table[tb->lot_attrs[i]].sa_length == 0)
++			tb->lot_var_sizes++;
++	}
++
++	avl_add(&sa->sa_layout_num_tree, tb);
++
++	/* verify we don't have a hash collision */
++	if ((findtb = avl_find(&sa->sa_layout_hash_tree, tb, &loc)) != NULL) {
++		for (; findtb && findtb->lot_hash == hash;
++		    findtb = AVL_NEXT(&sa->sa_layout_hash_tree, findtb)) {
++			if (findtb->lot_instance != tb->lot_instance)
++				break;
++			tb->lot_instance++;
++		}
++	}
++	avl_add(&sa->sa_layout_hash_tree, tb);
++	return (tb);
++}
++
++static void
++sa_find_layout(objset_t *os, uint64_t hash, sa_attr_type_t *attrs,
++    int count, dmu_tx_t *tx, sa_lot_t **lot)
++{
++	sa_lot_t *tb, tbsearch;
++	avl_index_t loc;
++	sa_os_t *sa = os->os_sa;
++	boolean_t found = B_FALSE;
++
++	mutex_enter(&sa->sa_lock);
++	tbsearch.lot_hash = hash;
++	tbsearch.lot_instance = 0;
++	tb = avl_find(&sa->sa_layout_hash_tree, &tbsearch, &loc);
++	if (tb) {
++		for (; tb && tb->lot_hash == hash;
++		    tb = AVL_NEXT(&sa->sa_layout_hash_tree, tb)) {
++			if (sa_layout_equal(tb, attrs, count) == 0) {
++				found = B_TRUE;
++				break;
++			}
++		}
++	}
++	if (!found) {
++		tb = sa_add_layout_entry(os, attrs, count,
++		    avl_numnodes(&sa->sa_layout_num_tree), hash, B_TRUE, tx);
++	}
++	mutex_exit(&sa->sa_lock);
++	*lot = tb;
++}
++
++static int
++sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx)
++{
++	int error;
++	uint32_t blocksize;
++
++	if (size == 0) {
++		blocksize = SPA_MINBLOCKSIZE;
++	} else if (size > SPA_MAXBLOCKSIZE) {
++		ASSERT(0);
++		return (EFBIG);
++	} else {
++		blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t);
++	}
++
++	error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx);
++	ASSERT(error == 0);
++	return (error);
++}
++
++static void
++sa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen)
++{
++	if (func == NULL) {
++		bcopy(datastart, target, buflen);
++	} else {
++		boolean_t start;
++		int bytes;
++		void *dataptr;
++		void *saptr = target;
++		uint32_t length;
++
++		start = B_TRUE;
++		bytes = 0;
++		while (bytes < buflen) {
++			func(&dataptr, &length, buflen, start, datastart);
++			bcopy(dataptr, saptr, length);
++			saptr = (void *)((caddr_t)saptr + length);
++			bytes += length;
++			start = B_FALSE;
++		}
++	}
++}
++
++/*
++ * Determine several different sizes
++ * first the sa header size
++ * the number of bytes to be stored
++ * if spill would occur the index in the attribute array is returned
++ *
++ * the boolean will_spill will be set when spilling is necessary.  It
++ * is only set when the buftype is SA_BONUS
++ */
++static int
++sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count,
++    dmu_buf_t *db, sa_buf_type_t buftype, int *index, int *total,
++    boolean_t *will_spill)
++{
++	int var_size = 0;
++	int i;
++	int full_space;
++	int hdrsize;
++	boolean_t done = B_FALSE;
++
++	if (buftype == SA_BONUS && sa->sa_force_spill) {
++		*total = 0;
++		*index = 0;
++		*will_spill = B_TRUE;
++		return (0);
++	}
++
++	*index = -1;
++	*total = 0;
++
++	if (buftype == SA_BONUS)
++		*will_spill = B_FALSE;
++
++	hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 :
++	    sizeof (sa_hdr_phys_t);
++
++	full_space = (buftype == SA_BONUS) ? DN_MAX_BONUSLEN : db->db_size;
++
++	for (i = 0; i != attr_count; i++) {
++		boolean_t is_var_sz;
++
++		*total += attr_desc[i].sa_length;
++		if (done)
++			goto next;
++
++		is_var_sz = (SA_REGISTERED_LEN(sa, attr_desc[i].sa_attr) == 0);
++		if (is_var_sz) {
++			var_size++;
++		}
++
++		if (is_var_sz && var_size > 1) {
++			if (P2ROUNDUP(hdrsize + sizeof (uint16_t), 8) +
++			    *total < full_space) {
++				hdrsize += sizeof (uint16_t);
++			} else {
++				done = B_TRUE;
++				*index = i;
++				if (buftype == SA_BONUS)
++					*will_spill = B_TRUE;
++				continue;
++			}
++		}
++
++		/*
++		 * find index of where spill *could* occur.
++		 * Then continue to count of remainder attribute
++		 * space.  The sum is used later for sizing bonus
++		 * and spill buffer.
++		 */
++		if (buftype == SA_BONUS && *index == -1 &&
++		    (*total + P2ROUNDUP(hdrsize, 8)) >
++		    (full_space - sizeof (blkptr_t))) {
++			*index = i;
++			done = B_TRUE;
++		}
++
++next:
++		if ((*total + P2ROUNDUP(hdrsize, 8)) > full_space &&
++		    buftype == SA_BONUS)
++			*will_spill = B_TRUE;
++	}
++
++	hdrsize = P2ROUNDUP(hdrsize, 8);
++	return (hdrsize);
++}
++
++#define	BUF_SPACE_NEEDED(total, header) (total + header)
++
++/*
++ * Find layout that corresponds to ordering of attributes
++ * If not found a new layout number is created and added to
++ * persistent layout tables.
++ */
++static int
++sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
++    dmu_tx_t *tx)
++{
++	sa_os_t *sa = hdl->sa_os->os_sa;
++	uint64_t hash;
++	sa_buf_type_t buftype;
++	sa_hdr_phys_t *sahdr;
++	void *data_start;
++	int buf_space;
++	sa_attr_type_t *attrs, *attrs_start;
++	int i, lot_count;
++	int hdrsize, spillhdrsize = 0;
++	int used;
++	dmu_object_type_t bonustype;
++	sa_lot_t *lot;
++	int len_idx;
++	int spill_used;
++	boolean_t spilling;
++
++	dmu_buf_will_dirty(hdl->sa_bonus, tx);
++	bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus);
++
++	/* first determine bonus header size and sum of all attributes */
++	hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus,
++	    SA_BONUS, &i, &used, &spilling);
++
++	if (used > SPA_MAXBLOCKSIZE)
++		return (EFBIG);
++
++	VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ?
++	    MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) :
++	    used + hdrsize, tx));
++
++	ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) ||
++	    bonustype == DMU_OT_SA);
++
++	/* setup and size spill buffer when needed */
++	if (spilling) {
++		boolean_t dummy;
++
++		if (hdl->sa_spill == NULL) {
++			VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL,
++			    &hdl->sa_spill) == 0);
++		}
++		dmu_buf_will_dirty(hdl->sa_spill, tx);
++
++		spillhdrsize = sa_find_sizes(sa, &attr_desc[i],
++		    attr_count - i, hdl->sa_spill, SA_SPILL, &i,
++		    &spill_used, &dummy);
++
++		if (spill_used > SPA_MAXBLOCKSIZE)
++			return (EFBIG);
++
++		buf_space = hdl->sa_spill->db_size - spillhdrsize;
++		if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) >
++		    hdl->sa_spill->db_size)
++			VERIFY(0 == sa_resize_spill(hdl,
++			    BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx));
++	}
++
++	/* setup starting pointers to lay down data */
++	data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize);
++	sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data;
++	buftype = SA_BONUS;
++
++	if (spilling)
++		buf_space = (sa->sa_force_spill) ?
++		    0 : SA_BLKPTR_SPACE - hdrsize;
++	else
++		buf_space = hdl->sa_bonus->db_size - hdrsize;
++
++	attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
++	    KM_PUSHPAGE);
++	lot_count = 0;
++
++	for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) {
++		uint16_t length;
++
++		attrs[i] = attr_desc[i].sa_attr;
++		length = SA_REGISTERED_LEN(sa, attrs[i]);
++		if (length == 0)
++			length = attr_desc[i].sa_length;
++
++		if (buf_space < length) {  /* switch to spill buffer */
++			VERIFY(bonustype == DMU_OT_SA);
++			if (buftype == SA_BONUS && !sa->sa_force_spill) {
++				sa_find_layout(hdl->sa_os, hash, attrs_start,
++				    lot_count, tx, &lot);
++				SA_SET_HDR(sahdr, lot->lot_num, hdrsize);
++			}
++
++			buftype = SA_SPILL;
++			hash = -1ULL;
++			len_idx = 0;
++
++			sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data;
++			sahdr->sa_magic = SA_MAGIC;
++			data_start = (void *)((uintptr_t)sahdr +
++			    spillhdrsize);
++			attrs_start = &attrs[i];
++			buf_space = hdl->sa_spill->db_size - spillhdrsize;
++			lot_count = 0;
++		}
++		hash ^= SA_ATTR_HASH(attrs[i]);
++		attr_desc[i].sa_addr = data_start;
++		attr_desc[i].sa_size = length;
++		SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data,
++		    data_start, length);
++		if (sa->sa_attr_table[attrs[i]].sa_length == 0) {
++			sahdr->sa_lengths[len_idx++] = length;
++		}
++		data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
++		    length), 8);
++		buf_space -= P2ROUNDUP(length, 8);
++		lot_count++;
++	}
++
++	sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot);
++
++	/*
++	 * Verify that old znodes always have layout number 0.
++	 * Must be DMU_OT_SA for arbitrary layouts
++	 */
++	VERIFY((bonustype == DMU_OT_ZNODE && lot->lot_num == 0) ||
++	    (bonustype == DMU_OT_SA && lot->lot_num > 1));
++
++	if (bonustype == DMU_OT_SA) {
++		SA_SET_HDR(sahdr, lot->lot_num,
++		    buftype == SA_BONUS ? hdrsize : spillhdrsize);
++	}
++
++	kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count);
++	if (hdl->sa_bonus_tab) {
++		sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
++		hdl->sa_bonus_tab = NULL;
++	}
++	if (!sa->sa_force_spill)
++		VERIFY(0 == sa_build_index(hdl, SA_BONUS));
++	if (hdl->sa_spill) {
++		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
++		if (!spilling) {
++			/*
++			 * remove spill block that is no longer needed.
++			 */
++			dmu_buf_rele(hdl->sa_spill, NULL);
++			hdl->sa_spill = NULL;
++			hdl->sa_spill_tab = NULL;
++			VERIFY(0 == dmu_rm_spill(hdl->sa_os,
++			    sa_handle_object(hdl), tx));
++		} else {
++			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
++		}
++	}
++
++	return (0);
++}
++
++static void
++sa_free_attr_table(sa_os_t *sa)
++{
++	int i;
++
++	if (sa->sa_attr_table == NULL)
++		return;
++
++	for (i = 0; i != sa->sa_num_attrs; i++) {
++		if (sa->sa_attr_table[i].sa_name)
++			kmem_free(sa->sa_attr_table[i].sa_name,
++			    strlen(sa->sa_attr_table[i].sa_name) + 1);
++	}
++
++	kmem_free(sa->sa_attr_table,
++	    sizeof (sa_attr_table_t) * sa->sa_num_attrs);
++
++	sa->sa_attr_table = NULL;
++}
++
++static int
++sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count)
++{
++	sa_os_t *sa = os->os_sa;
++	uint64_t sa_attr_count = 0;
++	uint64_t sa_reg_count = 0;
++	int error = 0;
++	uint64_t attr_value;
++	sa_attr_table_t *tb;
++	zap_cursor_t zc;
++	zap_attribute_t za;
++	int registered_count = 0;
++	int i;
++	dmu_objset_type_t ostype = dmu_objset_type(os);
++
++	sa->sa_user_table =
++	    kmem_zalloc(count * sizeof (sa_attr_type_t), KM_PUSHPAGE);
++	sa->sa_user_table_sz = count * sizeof (sa_attr_type_t);
++
++	if (sa->sa_reg_attr_obj != 0) {
++		error = zap_count(os, sa->sa_reg_attr_obj,
++		    &sa_attr_count);
++
++		/*
++		 * Make sure we retrieved a count and that it isn't zero
++		 */
++		if (error || (error == 0 && sa_attr_count == 0)) {
++			if (error == 0)
++				error = EINVAL;
++			goto bail;
++		}
++		sa_reg_count = sa_attr_count;
++	}
++
++	if (ostype == DMU_OST_ZFS && sa_attr_count == 0)
++		sa_attr_count += sa_legacy_attr_count;
++
++	/* Allocate attribute numbers for attributes that aren't registered */
++	for (i = 0; i != count; i++) {
++		boolean_t found = B_FALSE;
++		int j;
++
++		if (ostype == DMU_OST_ZFS) {
++			for (j = 0; j != sa_legacy_attr_count; j++) {
++				if (strcmp(reg_attrs[i].sa_name,
++				    sa_legacy_attrs[j].sa_name) == 0) {
++					sa->sa_user_table[i] =
++					    sa_legacy_attrs[j].sa_attr;
++					found = B_TRUE;
++				}
++			}
++		}
++		if (found)
++			continue;
++
++		if (sa->sa_reg_attr_obj)
++			error = zap_lookup(os, sa->sa_reg_attr_obj,
++			    reg_attrs[i].sa_name, 8, 1, &attr_value);
++		else
++			error = ENOENT;
++		switch (error) {
++		case ENOENT:
++			sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count;
++			sa_attr_count++;
++			break;
++		case 0:
++			sa->sa_user_table[i] = ATTR_NUM(attr_value);
++			break;
++		default:
++			goto bail;
++		}
++	}
++
++	sa->sa_num_attrs = sa_attr_count;
++	tb = sa->sa_attr_table =
++	    kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_PUSHPAGE);
++
++	/*
++	 * Attribute table is constructed from requested attribute list,
++	 * previously foreign registered attributes, and also the legacy
++	 * ZPL set of attributes.
++	 */
++
++	if (sa->sa_reg_attr_obj) {
++		for (zap_cursor_init(&zc, os, sa->sa_reg_attr_obj);
++		    (error = zap_cursor_retrieve(&zc, &za)) == 0;
++		    zap_cursor_advance(&zc)) {
++			uint64_t value;
++			value  = za.za_first_integer;
++
++			registered_count++;
++			tb[ATTR_NUM(value)].sa_attr = ATTR_NUM(value);
++			tb[ATTR_NUM(value)].sa_length = ATTR_LENGTH(value);
++			tb[ATTR_NUM(value)].sa_byteswap = ATTR_BSWAP(value);
++			tb[ATTR_NUM(value)].sa_registered = B_TRUE;
++
++			if (tb[ATTR_NUM(value)].sa_name) {
++				continue;
++			}
++			tb[ATTR_NUM(value)].sa_name =
++			    kmem_zalloc(strlen(za.za_name) +1, KM_PUSHPAGE);
++			(void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name,
++			    strlen(za.za_name) +1);
++		}
++		zap_cursor_fini(&zc);
++		/*
++		 * Make sure we processed the correct number of registered
++		 * attributes
++		 */
++		if (registered_count != sa_reg_count) {
++			ASSERT(error != 0);
++			goto bail;
++		}
++
++	}
++
++	if (ostype == DMU_OST_ZFS) {
++		for (i = 0; i != sa_legacy_attr_count; i++) {
++			if (tb[i].sa_name)
++				continue;
++			tb[i].sa_attr = sa_legacy_attrs[i].sa_attr;
++			tb[i].sa_length = sa_legacy_attrs[i].sa_length;
++			tb[i].sa_byteswap = sa_legacy_attrs[i].sa_byteswap;
++			tb[i].sa_registered = B_FALSE;
++			tb[i].sa_name =
++			    kmem_zalloc(strlen(sa_legacy_attrs[i].sa_name) +1,
++			    KM_PUSHPAGE);
++			(void) strlcpy(tb[i].sa_name,
++			    sa_legacy_attrs[i].sa_name,
++			    strlen(sa_legacy_attrs[i].sa_name) + 1);
++		}
++	}
++
++	for (i = 0; i != count; i++) {
++		sa_attr_type_t attr_id;
++
++		attr_id = sa->sa_user_table[i];
++		if (tb[attr_id].sa_name)
++			continue;
++
++		tb[attr_id].sa_length = reg_attrs[i].sa_length;
++		tb[attr_id].sa_byteswap = reg_attrs[i].sa_byteswap;
++		tb[attr_id].sa_attr = attr_id;
++		tb[attr_id].sa_name =
++		    kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_PUSHPAGE);
++		(void) strlcpy(tb[attr_id].sa_name, reg_attrs[i].sa_name,
++		    strlen(reg_attrs[i].sa_name) + 1);
++	}
++
++	sa->sa_need_attr_registration =
++	    (sa_attr_count != registered_count);
++
++	return (0);
++bail:
++	kmem_free(sa->sa_user_table, count * sizeof (sa_attr_type_t));
++	sa->sa_user_table = NULL;
++	sa_free_attr_table(sa);
++	return ((error != 0) ? error : EINVAL);
++}
++
++int
++sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count,
++    sa_attr_type_t **user_table)
++{
++	zap_cursor_t zc;
++	zap_attribute_t za;
++	sa_os_t *sa;
++	dmu_objset_type_t ostype = dmu_objset_type(os);
++	sa_attr_type_t *tb;
++	int error;
++
++	mutex_enter(&os->os_lock);
++	if (os->os_sa) {
++		mutex_enter(&os->os_sa->sa_lock);
++		mutex_exit(&os->os_lock);
++		tb = os->os_sa->sa_user_table;
++		mutex_exit(&os->os_sa->sa_lock);
++		*user_table = tb;
++		return (0);
++	}
++
++	sa = kmem_zalloc(sizeof (sa_os_t), KM_PUSHPAGE);
++	mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL);
++	sa->sa_master_obj = sa_obj;
++
++	os->os_sa = sa;
++	mutex_enter(&sa->sa_lock);
++	mutex_exit(&os->os_lock);
++	avl_create(&sa->sa_layout_num_tree, layout_num_compare,
++	    sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node));
++	avl_create(&sa->sa_layout_hash_tree, layout_hash_compare,
++	    sizeof (sa_lot_t), offsetof(sa_lot_t, lot_hash_node));
++
++	if (sa_obj) {
++		error = zap_lookup(os, sa_obj, SA_LAYOUTS,
++		    8, 1, &sa->sa_layout_attr_obj);
++		if (error != 0 && error != ENOENT)
++			goto fail;
++		error = zap_lookup(os, sa_obj, SA_REGISTRY,
++		    8, 1, &sa->sa_reg_attr_obj);
++		if (error != 0 && error != ENOENT)
++			goto fail;
++	}
++
++	if ((error = sa_attr_table_setup(os, reg_attrs, count)) != 0)
++		goto fail;
++
++	if (sa->sa_layout_attr_obj != 0) {
++		uint64_t layout_count;
++
++		error = zap_count(os, sa->sa_layout_attr_obj,
++		    &layout_count);
++
++		/*
++		 * Layout number count should be > 0
++		 */
++		if (error || (error == 0 && layout_count == 0)) {
++			if (error == 0)
++				error = EINVAL;
++			goto fail;
++		}
++
++		for (zap_cursor_init(&zc, os, sa->sa_layout_attr_obj);
++		    (error = zap_cursor_retrieve(&zc, &za)) == 0;
++		    zap_cursor_advance(&zc)) {
++			sa_attr_type_t *lot_attrs;
++			uint64_t lot_num;
++
++			lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) *
++			    za.za_num_integers, KM_PUSHPAGE);
++
++			if ((error = (zap_lookup(os, sa->sa_layout_attr_obj,
++			    za.za_name, 2, za.za_num_integers,
++			    lot_attrs))) != 0) {
++				kmem_free(lot_attrs, sizeof (sa_attr_type_t) *
++				    za.za_num_integers);
++				break;
++			}
++			VERIFY(ddi_strtoull(za.za_name, NULL, 10,
++			    (unsigned long long *)&lot_num) == 0);
++
++			(void) sa_add_layout_entry(os, lot_attrs,
++			    za.za_num_integers, lot_num,
++			    sa_layout_info_hash(lot_attrs,
++			    za.za_num_integers), B_FALSE, NULL);
++			kmem_free(lot_attrs, sizeof (sa_attr_type_t) *
++			    za.za_num_integers);
++		}
++		zap_cursor_fini(&zc);
++
++		/*
++		 * Make sure layout count matches number of entries added
++		 * to AVL tree
++		 */
++		if (avl_numnodes(&sa->sa_layout_num_tree) != layout_count) {
++			ASSERT(error != 0);
++			goto fail;
++		}
++	}
++
++	/* Add special layout number for old ZNODES */
++	if (ostype == DMU_OST_ZFS) {
++		(void) sa_add_layout_entry(os, sa_legacy_zpl_layout,
++		    sa_legacy_attr_count, 0,
++		    sa_layout_info_hash(sa_legacy_zpl_layout,
++		    sa_legacy_attr_count), B_FALSE, NULL);
++
++		(void) sa_add_layout_entry(os, sa_dummy_zpl_layout, 0, 1,
++		    0, B_FALSE, NULL);
++	}
++	*user_table = os->os_sa->sa_user_table;
++	mutex_exit(&sa->sa_lock);
++	return (0);
++fail:
++	os->os_sa = NULL;
++	sa_free_attr_table(sa);
++	if (sa->sa_user_table)
++		kmem_free(sa->sa_user_table, sa->sa_user_table_sz);
++	mutex_exit(&sa->sa_lock);
++	kmem_free(sa, sizeof (sa_os_t));
++	return ((error == ECKSUM) ? EIO : error);
++}
++
++void
++sa_tear_down(objset_t *os)
++{
++	sa_os_t *sa = os->os_sa;
++	sa_lot_t *layout;
++	void *cookie;
++
++	kmem_free(sa->sa_user_table, sa->sa_user_table_sz);
++
++	/* Free up attr table */
++
++	sa_free_attr_table(sa);
++
++	cookie = NULL;
++	while ((layout = avl_destroy_nodes(&sa->sa_layout_hash_tree, &cookie))){
++		sa_idx_tab_t *tab;
++		while ((tab = list_head(&layout->lot_idx_tab))) {
++			ASSERT(refcount_count(&tab->sa_refcount));
++			sa_idx_tab_rele(os, tab);
++		}
++	}
++
++	cookie = NULL;
++	while ((layout = avl_destroy_nodes(&sa->sa_layout_num_tree, &cookie))){
++		kmem_free(layout->lot_attrs,
++		    sizeof (sa_attr_type_t) * layout->lot_attr_count);
++		kmem_free(layout, sizeof (sa_lot_t));
++	}
++
++	avl_destroy(&sa->sa_layout_hash_tree);
++	avl_destroy(&sa->sa_layout_num_tree);
++
++	kmem_free(sa, sizeof (sa_os_t));
++	os->os_sa = NULL;
++}
++
++void
++sa_build_idx_tab(void *hdr, void *attr_addr, sa_attr_type_t attr,
++    uint16_t length, int length_idx, boolean_t var_length, void *userp)
++{
++	sa_idx_tab_t *idx_tab = userp;
++
++	if (var_length) {
++		ASSERT(idx_tab->sa_variable_lengths);
++		idx_tab->sa_variable_lengths[length_idx] = length;
++	}
++	TOC_ATTR_ENCODE(idx_tab->sa_idx_tab[attr], length_idx,
++	    (uint32_t)((uintptr_t)attr_addr - (uintptr_t)hdr));
++}
++
++static void
++sa_attr_iter(objset_t *os, sa_hdr_phys_t *hdr, dmu_object_type_t type,
++    sa_iterfunc_t func, sa_lot_t *tab, void *userp)
++{
++	void *data_start;
++	sa_lot_t *tb = tab;
++	sa_lot_t search;
++	avl_index_t loc;
++	sa_os_t *sa = os->os_sa;
++	int i;
++	uint16_t *length_start = NULL;
++	uint8_t length_idx = 0;
++
++	if (tab == NULL) {
++		search.lot_num = SA_LAYOUT_NUM(hdr, type);
++		tb = avl_find(&sa->sa_layout_num_tree, &search, &loc);
++		ASSERT(tb);
++	}
++
++	if (IS_SA_BONUSTYPE(type)) {
++		data_start = (void *)P2ROUNDUP(((uintptr_t)hdr +
++		    offsetof(sa_hdr_phys_t, sa_lengths) +
++		    (sizeof (uint16_t) * tb->lot_var_sizes)), 8);
++		length_start = hdr->sa_lengths;
++	} else {
++		data_start = hdr;
++	}
++
++	for (i = 0; i != tb->lot_attr_count; i++) {
++		int attr_length, reg_length;
++		uint8_t idx_len;
++
++		reg_length = sa->sa_attr_table[tb->lot_attrs[i]].sa_length;
++		if (reg_length) {
++			attr_length = reg_length;
++			idx_len = 0;
++		} else {
++			attr_length = length_start[length_idx];
++			idx_len = length_idx++;
++		}
++
++		func(hdr, data_start, tb->lot_attrs[i], attr_length,
++		    idx_len, reg_length == 0 ? B_TRUE : B_FALSE, userp);
++
++		data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
++		    attr_length), 8);
++	}
++}
++
++/*ARGSUSED*/
++void
++sa_byteswap_cb(void *hdr, void *attr_addr, sa_attr_type_t attr,
++    uint16_t length, int length_idx, boolean_t variable_length, void *userp)
++{
++	sa_handle_t *hdl = userp;
++	sa_os_t *sa = hdl->sa_os->os_sa;
++
++	sa_bswap_table[sa->sa_attr_table[attr].sa_byteswap](attr_addr, length);
++}
++
++void
++sa_byteswap(sa_handle_t *hdl, sa_buf_type_t buftype)
++{
++	sa_hdr_phys_t *sa_hdr_phys = SA_GET_HDR(hdl, buftype);
++	dmu_buf_impl_t *db;
++	int num_lengths = 1;
++	int i;
++	ASSERTV(sa_os_t *sa = hdl->sa_os->os_sa);
++
++	ASSERT(MUTEX_HELD(&sa->sa_lock));
++	if (sa_hdr_phys->sa_magic == SA_MAGIC)
++		return;
++
++	db = SA_GET_DB(hdl, buftype);
++
++	if (buftype == SA_SPILL) {
++		arc_release(db->db_buf, NULL);
++		arc_buf_thaw(db->db_buf);
++	}
++
++	sa_hdr_phys->sa_magic = BSWAP_32(sa_hdr_phys->sa_magic);
++	sa_hdr_phys->sa_layout_info = BSWAP_16(sa_hdr_phys->sa_layout_info);
++
++	/*
++	 * Determine number of variable lenghts in header
++	 * The standard 8 byte header has one for free and a
++	 * 16 byte header would have 4 + 1;
++	 */
++	if (SA_HDR_SIZE(sa_hdr_phys) > 8)
++		num_lengths += (SA_HDR_SIZE(sa_hdr_phys) - 8) >> 1;
++	for (i = 0; i != num_lengths; i++)
++		sa_hdr_phys->sa_lengths[i] =
++		    BSWAP_16(sa_hdr_phys->sa_lengths[i]);
++
++	sa_attr_iter(hdl->sa_os, sa_hdr_phys, DMU_OT_SA,
++	    sa_byteswap_cb, NULL, hdl);
++
++	if (buftype == SA_SPILL)
++		arc_buf_freeze(((dmu_buf_impl_t *)hdl->sa_spill)->db_buf);
++}
++
++static int
++sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype)
++{
++	sa_hdr_phys_t *sa_hdr_phys;
++	dmu_buf_impl_t *db = SA_GET_DB(hdl, buftype);
++	dmu_object_type_t bonustype = SA_BONUSTYPE_FROM_DB(db);
++	sa_os_t *sa = hdl->sa_os->os_sa;
++	sa_idx_tab_t *idx_tab;
++
++	sa_hdr_phys = SA_GET_HDR(hdl, buftype);
++
++	mutex_enter(&sa->sa_lock);
++
++	/* Do we need to byteswap? */
++
++	/* only check if not old znode */
++	if (IS_SA_BONUSTYPE(bonustype) && sa_hdr_phys->sa_magic != SA_MAGIC &&
++	    sa_hdr_phys->sa_magic != 0) {
++		VERIFY(BSWAP_32(sa_hdr_phys->sa_magic) == SA_MAGIC);
++		sa_byteswap(hdl, buftype);
++	}
++
++	idx_tab = sa_find_idx_tab(hdl->sa_os, bonustype, sa_hdr_phys);
++
++	if (buftype == SA_BONUS)
++		hdl->sa_bonus_tab = idx_tab;
++	else
++		hdl->sa_spill_tab = idx_tab;
++
++	mutex_exit(&sa->sa_lock);
++	return (0);
++}
++
++/*ARGSUSED*/
++void
++sa_evict(dmu_buf_t *db, void *sap)
++{
++	panic("evicting sa dbuf %p\n", (void *)db);
++}
++
++static void
++sa_idx_tab_rele(objset_t *os, void *arg)
++{
++	sa_os_t *sa = os->os_sa;
++	sa_idx_tab_t *idx_tab = arg;
++
++	if (idx_tab == NULL)
++		return;
++
++	mutex_enter(&sa->sa_lock);
++	if (refcount_remove(&idx_tab->sa_refcount, NULL) == 0) {
++		list_remove(&idx_tab->sa_layout->lot_idx_tab, idx_tab);
++		if (idx_tab->sa_variable_lengths)
++			kmem_free(idx_tab->sa_variable_lengths,
++			    sizeof (uint16_t) *
++			    idx_tab->sa_layout->lot_var_sizes);
++		refcount_destroy(&idx_tab->sa_refcount);
++		kmem_free(idx_tab->sa_idx_tab,
++		    sizeof (uint32_t) * sa->sa_num_attrs);
++		kmem_free(idx_tab, sizeof (sa_idx_tab_t));
++	}
++	mutex_exit(&sa->sa_lock);
++}
++
++static void
++sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab)
++{
++	ASSERTV(sa_os_t *sa = os->os_sa);
++
++	ASSERT(MUTEX_HELD(&sa->sa_lock));
++	(void) refcount_add(&idx_tab->sa_refcount, NULL);
++}
++
++void
++sa_spill_rele(sa_handle_t *hdl)
++{
++	mutex_enter(&hdl->sa_lock);
++	if (hdl->sa_spill) {
++		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
++		dmu_buf_rele(hdl->sa_spill, NULL);
++		hdl->sa_spill = NULL;
++		hdl->sa_spill_tab = NULL;
++	}
++	mutex_exit(&hdl->sa_lock);
++}
++
++void
++sa_handle_destroy(sa_handle_t *hdl)
++{
++	mutex_enter(&hdl->sa_lock);
++	(void) dmu_buf_update_user((dmu_buf_t *)hdl->sa_bonus, hdl,
++	    NULL, NULL, NULL);
++
++	if (hdl->sa_bonus_tab) {
++		sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
++		hdl->sa_bonus_tab = NULL;
++	}
++	if (hdl->sa_spill_tab) {
++		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
++		hdl->sa_spill_tab = NULL;
++	}
++
++	dmu_buf_rele(hdl->sa_bonus, NULL);
++
++	if (hdl->sa_spill)
++		dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL);
++	mutex_exit(&hdl->sa_lock);
++
++	kmem_cache_free(sa_cache, hdl);
++}
++
++int
++sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
++    sa_handle_type_t hdl_type, sa_handle_t **handlepp)
++{
++	int error = 0;
++	sa_handle_t *handle;
++#ifdef ZFS_DEBUG
++	dmu_object_info_t doi;
++
++	dmu_object_info_from_db(db, &doi);
++	ASSERT(doi.doi_bonus_type == DMU_OT_SA ||
++	    doi.doi_bonus_type == DMU_OT_ZNODE);
++#endif
++	/* find handle, if it exists */
++	/* if one doesn't exist then create a new one, and initialize it */
++
++	handle = (hdl_type == SA_HDL_SHARED) ? dmu_buf_get_user(db) : NULL;
++	if (handle == NULL) {
++		sa_handle_t *newhandle;
++		handle = kmem_cache_alloc(sa_cache, KM_SLEEP);
++		handle->sa_userp = userp;
++		handle->sa_bonus = db;
++		handle->sa_os = os;
++		handle->sa_spill = NULL;
++
++		error = sa_build_index(handle, SA_BONUS);
++		newhandle = (hdl_type == SA_HDL_SHARED) ?
++		    dmu_buf_set_user_ie(db, handle,
++		    NULL, sa_evict) : NULL;
++
++		if (newhandle != NULL) {
++			kmem_cache_free(sa_cache, handle);
++			handle = newhandle;
++		}
++	}
++	*handlepp = handle;
++
++	return (error);
++}
++
++int
++sa_handle_get(objset_t *objset, uint64_t objid, void *userp,
++    sa_handle_type_t hdl_type, sa_handle_t **handlepp)
++{
++	dmu_buf_t *db;
++	int error;
++
++	if ((error = dmu_bonus_hold(objset, objid, NULL, &db)))
++		return (error);
++
++	return (sa_handle_get_from_db(objset, db, userp, hdl_type,
++	    handlepp));
++}
++
++int
++sa_buf_hold(objset_t *objset, uint64_t obj_num, void *tag, dmu_buf_t **db)
++{
++	return (dmu_bonus_hold(objset, obj_num, tag, db));
++}
++
++void
++sa_buf_rele(dmu_buf_t *db, void *tag)
++{
++	dmu_buf_rele(db, tag);
++}
++
++int
++sa_lookup_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count)
++{
++	ASSERT(hdl);
++	ASSERT(MUTEX_HELD(&hdl->sa_lock));
++	return (sa_attr_op(hdl, bulk, count, SA_LOOKUP, NULL));
++}
++
++int
++sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen)
++{
++	int error;
++	sa_bulk_attr_t bulk;
++
++	bulk.sa_attr = attr;
++	bulk.sa_data = buf;
++	bulk.sa_length = buflen;
++	bulk.sa_data_func = NULL;
++
++	ASSERT(hdl);
++	mutex_enter(&hdl->sa_lock);
++	error = sa_lookup_impl(hdl, &bulk, 1);
++	mutex_exit(&hdl->sa_lock);
++	return (error);
++}
++
++#ifdef _KERNEL
++int
++sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio)
++{
++	int error;
++	sa_bulk_attr_t bulk;
++
++	bulk.sa_data = NULL;
++	bulk.sa_attr = attr;
++	bulk.sa_data_func = NULL;
++
++	ASSERT(hdl);
++
++	mutex_enter(&hdl->sa_lock);
++	if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) == 0) {
++		error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size,
++		    uio->uio_resid), UIO_READ, uio);
++	}
++	mutex_exit(&hdl->sa_lock);
++	return (error);
++}
++#endif
++
++void *
++sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, void *data)
++{
++	sa_idx_tab_t *idx_tab;
++	sa_hdr_phys_t *hdr = (sa_hdr_phys_t *)data;
++	sa_os_t *sa = os->os_sa;
++	sa_lot_t *tb, search;
++	avl_index_t loc;
++
++	/*
++	 * Deterimine layout number.  If SA node and header == 0 then
++	 * force the index table to the dummy "1" empty layout.
++	 *
++	 * The layout number would only be zero for a newly created file
++	 * that has not added any attributes yet, or with crypto enabled which
++	 * doesn't write any attributes to the bonus buffer.
++	 */
++
++	search.lot_num = SA_LAYOUT_NUM(hdr, bonustype);
++
++	tb = avl_find(&sa->sa_layout_num_tree, &search, &loc);
++
++	/* Verify header size is consistent with layout information */
++	ASSERT(tb);
++	ASSERT((IS_SA_BONUSTYPE(bonustype) &&
++	    SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb)) || !IS_SA_BONUSTYPE(bonustype) ||
++	    (IS_SA_BONUSTYPE(bonustype) && hdr->sa_layout_info == 0));
++
++	/*
++	 * See if any of the already existing TOC entries can be reused?
++	 */
++
++	for (idx_tab = list_head(&tb->lot_idx_tab); idx_tab;
++	    idx_tab = list_next(&tb->lot_idx_tab, idx_tab)) {
++		boolean_t valid_idx = B_TRUE;
++		int i;
++
++		if (tb->lot_var_sizes != 0 &&
++		    idx_tab->sa_variable_lengths != NULL) {
++			for (i = 0; i != tb->lot_var_sizes; i++) {
++				if (hdr->sa_lengths[i] !=
++				    idx_tab->sa_variable_lengths[i]) {
++					valid_idx = B_FALSE;
++					break;
++				}
++			}
++		}
++		if (valid_idx) {
++			sa_idx_tab_hold(os, idx_tab);
++			return (idx_tab);
++		}
++	}
++
++	/* No such luck, create a new entry */
++	idx_tab = kmem_zalloc(sizeof (sa_idx_tab_t), KM_PUSHPAGE);
++	idx_tab->sa_idx_tab =
++	    kmem_zalloc(sizeof (uint32_t) * sa->sa_num_attrs, KM_PUSHPAGE);
++	idx_tab->sa_layout = tb;
++	refcount_create(&idx_tab->sa_refcount);
++	if (tb->lot_var_sizes)
++		idx_tab->sa_variable_lengths = kmem_alloc(sizeof (uint16_t) *
++		    tb->lot_var_sizes, KM_PUSHPAGE);
++
++	sa_attr_iter(os, hdr, bonustype, sa_build_idx_tab,
++	    tb, idx_tab);
++	sa_idx_tab_hold(os, idx_tab);   /* one hold for consumer */
++	sa_idx_tab_hold(os, idx_tab);	/* one for layout */
++	list_insert_tail(&tb->lot_idx_tab, idx_tab);
++	return (idx_tab);
++}
++
++void
++sa_default_locator(void **dataptr, uint32_t *len, uint32_t total_len,
++    boolean_t start, void *userdata)
++{
++	ASSERT(start);
++
++	*dataptr = userdata;
++	*len = total_len;
++}
++
++static void
++sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx)
++{
++	uint64_t attr_value = 0;
++	sa_os_t *sa = hdl->sa_os->os_sa;
++	sa_attr_table_t *tb = sa->sa_attr_table;
++	int i;
++
++	mutex_enter(&sa->sa_lock);
++
++	if (!sa->sa_need_attr_registration || sa->sa_master_obj == 0) {
++		mutex_exit(&sa->sa_lock);
++		return;
++	}
++
++	if (sa->sa_reg_attr_obj == 0) {
++		sa->sa_reg_attr_obj = zap_create(hdl->sa_os,
++		    DMU_OT_SA_ATTR_REGISTRATION, DMU_OT_NONE, 0, tx);
++		VERIFY(zap_add(hdl->sa_os, sa->sa_master_obj,
++		    SA_REGISTRY, 8, 1, &sa->sa_reg_attr_obj, tx) == 0);
++	}
++	for (i = 0; i != sa->sa_num_attrs; i++) {
++		if (sa->sa_attr_table[i].sa_registered)
++			continue;
++		ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length,
++		    tb[i].sa_byteswap);
++		VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj,
++		    tb[i].sa_name, 8, 1, &attr_value, tx));
++		tb[i].sa_registered = B_TRUE;
++	}
++	sa->sa_need_attr_registration = B_FALSE;
++	mutex_exit(&sa->sa_lock);
++}
++
++/*
++ * Replace all attributes with attributes specified in template.
++ * If dnode had a spill buffer then those attributes will be
++ * also be replaced, possibly with just an empty spill block
++ *
++ * This interface is intended to only be used for bulk adding of
++ * attributes for a new file.  It will also be used by the ZPL
++ * when converting and old formatted znode to native SA support.
++ */
++int
++sa_replace_all_by_template_locked(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc,
++    int attr_count, dmu_tx_t *tx)
++{
++	sa_os_t *sa = hdl->sa_os->os_sa;
++
++	if (sa->sa_need_attr_registration)
++		sa_attr_register_sync(hdl, tx);
++	return (sa_build_layouts(hdl, attr_desc, attr_count, tx));
++}
++
++int
++sa_replace_all_by_template(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc,
++    int attr_count, dmu_tx_t *tx)
++{
++	int error;
++
++	mutex_enter(&hdl->sa_lock);
++	error = sa_replace_all_by_template_locked(hdl, attr_desc,
++	    attr_count, tx);
++	mutex_exit(&hdl->sa_lock);
++	return (error);
++}
++
++/*
++ * add/remove/replace a single attribute and then rewrite the entire set
++ * of attributes.
++ */
++static int
++sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
++    sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
++    uint16_t buflen, dmu_tx_t *tx)
++{
++	sa_os_t *sa = hdl->sa_os->os_sa;
++	dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus;
++	dnode_t *dn;
++	sa_bulk_attr_t *attr_desc;
++	void *old_data[2];
++	int bonus_attr_count = 0;
++	int bonus_data_size = 0;
++	int spill_attr_count = 0;
++	int error;
++	uint16_t length;
++	int i, j, k, length_idx;
++	sa_hdr_phys_t *hdr;
++	sa_idx_tab_t *idx_tab;
++	int attr_count;
++	int count;
++
++	ASSERT(MUTEX_HELD(&hdl->sa_lock));
++
++	/* First make of copy of the old data */
++
++	DB_DNODE_ENTER(db);
++	dn = DB_DNODE(db);
++	if (dn->dn_bonuslen != 0) {
++		bonus_data_size = hdl->sa_bonus->db_size;
++		old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP);
++		bcopy(hdl->sa_bonus->db_data, old_data[0],
++		    hdl->sa_bonus->db_size);
++		bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count;
++	} else {
++		old_data[0] = NULL;
++	}
++	DB_DNODE_EXIT(db);
++
++	/* Bring spill buffer online if it isn't currently */
++
++	if ((error = sa_get_spill(hdl)) == 0) {
++		ASSERT3U(hdl->sa_spill->db_size, <=, SPA_MAXBLOCKSIZE);
++		old_data[1] = sa_spill_alloc(KM_SLEEP);
++		bcopy(hdl->sa_spill->db_data, old_data[1],
++		    hdl->sa_spill->db_size);
++		spill_attr_count =
++		    hdl->sa_spill_tab->sa_layout->lot_attr_count;
++	} else if (error && error != ENOENT) {
++		if (old_data[0])
++			kmem_free(old_data[0], bonus_data_size);
++		return (error);
++	} else {
++		old_data[1] = NULL;
++	}
++
++	/* build descriptor of all attributes */
++
++	attr_count = bonus_attr_count + spill_attr_count;
++	if (action == SA_ADD)
++		attr_count++;
++	else if (action == SA_REMOVE)
++		attr_count--;
++
++	attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP);
++
++	/*
++	 * loop through bonus and spill buffer if it exists, and
++	 * build up new attr_descriptor to reset the attributes
++	 */
++	k = j = 0;
++	count = bonus_attr_count;
++	hdr = SA_GET_HDR(hdl, SA_BONUS);
++	idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS);
++	for (; k != 2; k++) {
++		/* iterate over each attribute in layout */
++		for (i = 0, length_idx = 0; i != count; i++) {
++			sa_attr_type_t attr;
++
++			attr = idx_tab->sa_layout->lot_attrs[i];
++			if (attr == newattr) {
++				if (action == SA_REMOVE) {
++					j++;
++					continue;
++				}
++				ASSERT(SA_REGISTERED_LEN(sa, attr) == 0);
++				ASSERT(action == SA_REPLACE);
++				SA_ADD_BULK_ATTR(attr_desc, j, attr,
++				    locator, datastart, buflen);
++			} else {
++				length = SA_REGISTERED_LEN(sa, attr);
++				if (length == 0) {
++					length = hdr->sa_lengths[length_idx++];
++				}
++
++				SA_ADD_BULK_ATTR(attr_desc, j, attr,
++				    NULL, (void *)
++				    (TOC_OFF(idx_tab->sa_idx_tab[attr]) +
++				    (uintptr_t)old_data[k]), length);
++			}
++		}
++		if (k == 0 && hdl->sa_spill) {
++			hdr = SA_GET_HDR(hdl, SA_SPILL);
++			idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL);
++			count = spill_attr_count;
++		} else {
++			break;
++		}
++	}
++	if (action == SA_ADD) {
++		length = SA_REGISTERED_LEN(sa, newattr);
++		if (length == 0) {
++			length = buflen;
++		}
++		SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator,
++		    datastart, buflen);
++	}
++
++	error = sa_build_layouts(hdl, attr_desc, attr_count, tx);
++
++	if (old_data[0])
++		kmem_free(old_data[0], bonus_data_size);
++	if (old_data[1])
++		sa_spill_free(old_data[1]);
++	kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count);
++
++	return (error);
++}
++
++static int
++sa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
++    dmu_tx_t *tx)
++{
++	int error;
++	sa_os_t *sa = hdl->sa_os->os_sa;
++	dmu_object_type_t bonustype;
++	dmu_buf_t *saved_spill;
++
++	ASSERT(hdl);
++	ASSERT(MUTEX_HELD(&hdl->sa_lock));
++
++	bonustype = SA_BONUSTYPE_FROM_DB(SA_GET_DB(hdl, SA_BONUS));
++	saved_spill = hdl->sa_spill;
++
++	/* sync out registration table if necessary */
++	if (sa->sa_need_attr_registration)
++		sa_attr_register_sync(hdl, tx);
++
++	error = sa_attr_op(hdl, bulk, count, SA_UPDATE, tx);
++	if (error == 0 && !IS_SA_BONUSTYPE(bonustype) && sa->sa_update_cb)
++		sa->sa_update_cb(hdl, tx);
++
++	/*
++	 * If saved_spill is NULL and current sa_spill is not NULL that
++	 * means we increased the refcount of the spill buffer through
++	 * sa_get_spill() or dmu_spill_hold_by_dnode().  Therefore we
++	 * must release the hold before calling dmu_tx_commit() to avoid
++	 * making a copy of this buffer in dbuf_sync_leaf() due to the
++	 * reference count now being greater than 1.
++	 */
++	if (!saved_spill && hdl->sa_spill) {
++		if (hdl->sa_spill_tab) {
++			sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
++			hdl->sa_spill_tab = NULL;
++		}
++
++		dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL);
++		hdl->sa_spill = NULL;
++	}
++
++	return (error);
++}
++
++/*
++ * update or add new attribute
++ */
++int
++sa_update(sa_handle_t *hdl, sa_attr_type_t type,
++    void *buf, uint32_t buflen, dmu_tx_t *tx)
++{
++	int error;
++	sa_bulk_attr_t bulk;
++
++	bulk.sa_attr = type;
++	bulk.sa_data_func = NULL;
++	bulk.sa_length = buflen;
++	bulk.sa_data = buf;
++
++	mutex_enter(&hdl->sa_lock);
++	error = sa_bulk_update_impl(hdl, &bulk, 1, tx);
++	mutex_exit(&hdl->sa_lock);
++	return (error);
++}
++
++int
++sa_update_from_cb(sa_handle_t *hdl, sa_attr_type_t attr,
++    uint32_t buflen, sa_data_locator_t *locator, void *userdata, dmu_tx_t *tx)
++{
++	int error;
++	sa_bulk_attr_t bulk;
++
++	bulk.sa_attr = attr;
++	bulk.sa_data = userdata;
++	bulk.sa_data_func = locator;
++	bulk.sa_length = buflen;
++
++	mutex_enter(&hdl->sa_lock);
++	error = sa_bulk_update_impl(hdl, &bulk, 1, tx);
++	mutex_exit(&hdl->sa_lock);
++	return (error);
++}
++
++/*
++ * Return size of an attribute
++ */
++
++int
++sa_size(sa_handle_t *hdl, sa_attr_type_t attr, int *size)
++{
++	sa_bulk_attr_t bulk;
++	int error;
++
++	bulk.sa_data = NULL;
++	bulk.sa_attr = attr;
++	bulk.sa_data_func = NULL;
++
++	ASSERT(hdl);
++	mutex_enter(&hdl->sa_lock);
++	if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) != 0) {
++		mutex_exit(&hdl->sa_lock);
++		return (error);
++	}
++	*size = bulk.sa_size;
++
++	mutex_exit(&hdl->sa_lock);
++	return (0);
++}
++
++int
++sa_bulk_lookup_locked(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count)
++{
++	ASSERT(hdl);
++	ASSERT(MUTEX_HELD(&hdl->sa_lock));
++	return (sa_lookup_impl(hdl, attrs, count));
++}
++
++int
++sa_bulk_lookup(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count)
++{
++	int error;
++
++	ASSERT(hdl);
++	mutex_enter(&hdl->sa_lock);
++	error = sa_bulk_lookup_locked(hdl, attrs, count);
++	mutex_exit(&hdl->sa_lock);
++	return (error);
++}
++
++int
++sa_bulk_update(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count, dmu_tx_t *tx)
++{
++	int error;
++
++	ASSERT(hdl);
++	mutex_enter(&hdl->sa_lock);
++	error = sa_bulk_update_impl(hdl, attrs, count, tx);
++	mutex_exit(&hdl->sa_lock);
++	return (error);
++}
++
++int
++sa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx)
++{
++	int error;
++
++	mutex_enter(&hdl->sa_lock);
++	error = sa_modify_attrs(hdl, attr, SA_REMOVE, NULL,
++	    NULL, 0, tx);
++	mutex_exit(&hdl->sa_lock);
++	return (error);
++}
++
++void
++sa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi)
++{
++	dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi);
++}
++
++void
++sa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks)
++{
++	dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus,
++	    blksize, nblocks);
++}
++
++void
++sa_update_user(sa_handle_t *newhdl, sa_handle_t *oldhdl)
++{
++	(void) dmu_buf_update_user((dmu_buf_t *)newhdl->sa_bonus,
++	    oldhdl, newhdl, NULL, sa_evict);
++	oldhdl->sa_bonus = NULL;
++}
++
++void
++sa_set_userp(sa_handle_t *hdl, void *ptr)
++{
++	hdl->sa_userp = ptr;
++}
++
++dmu_buf_t *
++sa_get_db(sa_handle_t *hdl)
++{
++	return ((dmu_buf_t *)hdl->sa_bonus);
++}
++
++void *
++sa_get_userdata(sa_handle_t *hdl)
++{
++	return (hdl->sa_userp);
++}
++
++void
++sa_register_update_callback_locked(objset_t *os, sa_update_cb_t *func)
++{
++	ASSERT(MUTEX_HELD(&os->os_sa->sa_lock));
++	os->os_sa->sa_update_cb = func;
++}
++
++void
++sa_register_update_callback(objset_t *os, sa_update_cb_t *func)
++{
++
++	mutex_enter(&os->os_sa->sa_lock);
++	sa_register_update_callback_locked(os, func);
++	mutex_exit(&os->os_sa->sa_lock);
++}
++
++uint64_t
++sa_handle_object(sa_handle_t *hdl)
++{
++	return (hdl->sa_bonus->db_object);
++}
++
++boolean_t
++sa_enabled(objset_t *os)
++{
++	return (os->os_sa == NULL);
++}
++
++int
++sa_set_sa_object(objset_t *os, uint64_t sa_object)
++{
++	sa_os_t *sa = os->os_sa;
++
++	if (sa->sa_master_obj)
++		return (1);
++
++	sa->sa_master_obj = sa_object;
++
++	return (0);
++}
++
++int
++sa_hdrsize(void *arg)
++{
++	sa_hdr_phys_t *hdr = arg;
++
++	return (SA_HDR_SIZE(hdr));
++}
++
++void
++sa_handle_lock(sa_handle_t *hdl)
++{
++	ASSERT(hdl);
++	mutex_enter(&hdl->sa_lock);
++}
++
++void
++sa_handle_unlock(sa_handle_t *hdl)
++{
++	ASSERT(hdl);
++	mutex_exit(&hdl->sa_lock);
++}
++
++#ifdef _KERNEL
++EXPORT_SYMBOL(sa_handle_get);
++EXPORT_SYMBOL(sa_handle_get_from_db);
++EXPORT_SYMBOL(sa_handle_destroy);
++EXPORT_SYMBOL(sa_buf_hold);
++EXPORT_SYMBOL(sa_buf_rele);
++EXPORT_SYMBOL(sa_spill_rele);
++EXPORT_SYMBOL(sa_lookup);
++EXPORT_SYMBOL(sa_update);
++EXPORT_SYMBOL(sa_remove);
++EXPORT_SYMBOL(sa_bulk_lookup);
++EXPORT_SYMBOL(sa_bulk_lookup_locked);
++EXPORT_SYMBOL(sa_bulk_update);
++EXPORT_SYMBOL(sa_size);
++EXPORT_SYMBOL(sa_update_from_cb);
++EXPORT_SYMBOL(sa_object_info);
++EXPORT_SYMBOL(sa_object_size);
++EXPORT_SYMBOL(sa_update_user);
++EXPORT_SYMBOL(sa_get_userdata);
++EXPORT_SYMBOL(sa_set_userp);
++EXPORT_SYMBOL(sa_get_db);
++EXPORT_SYMBOL(sa_handle_object);
++EXPORT_SYMBOL(sa_register_update_callback);
++EXPORT_SYMBOL(sa_setup);
++EXPORT_SYMBOL(sa_replace_all_by_template);
++EXPORT_SYMBOL(sa_replace_all_by_template_locked);
++EXPORT_SYMBOL(sa_enabled);
++EXPORT_SYMBOL(sa_cache_init);
++EXPORT_SYMBOL(sa_cache_fini);
++EXPORT_SYMBOL(sa_spill_alloc);
++EXPORT_SYMBOL(sa_spill_free);
++EXPORT_SYMBOL(sa_set_sa_object);
++EXPORT_SYMBOL(sa_hdrsize);
++EXPORT_SYMBOL(sa_handle_lock);
++EXPORT_SYMBOL(sa_handle_unlock);
++EXPORT_SYMBOL(sa_lookup_uio);
++#endif /* _KERNEL */
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/sha256.c linux-3.2.33-go/fs/zfs/zfs/sha256.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/sha256.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/sha256.c	2012-11-16 23:25:34.352039300 +0100
+@@ -0,0 +1,127 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/zio.h>
++#include <sys/zio_checksum.h>
++
++/*
++ * SHA-256 checksum, as specified in FIPS 180-3, available at:
++ * http://csrc.nist.gov/publications/PubsFIPS.html
++ *
++ * This is a very compact implementation of SHA-256.
++ * It is designed to be simple and portable, not to be fast.
++ */
++
++/*
++ * The literal definitions of Ch() and Maj() according to FIPS 180-3 are:
++ *
++ * 	Ch(x, y, z)     (x & y) ^ (~x & z)
++ * 	Maj(x, y, z)    (x & y) ^ (x & z) ^ (y & z)
++ *
++ * We use equivalent logical reductions here that require one less op.
++ */
++#define	Ch(x, y, z)	((z) ^ ((x) & ((y) ^ (z))))
++#define	Maj(x, y, z)	(((x) & (y)) ^ ((z) & ((x) ^ (y))))
++#define	Rot32(x, s)	(((x) >> s) | ((x) << (32 - s)))
++#define	SIGMA0(x)	(Rot32(x, 2) ^ Rot32(x, 13) ^ Rot32(x, 22))
++#define	SIGMA1(x)	(Rot32(x, 6) ^ Rot32(x, 11) ^ Rot32(x, 25))
++#define	sigma0(x)	(Rot32(x, 7) ^ Rot32(x, 18) ^ ((x) >> 3))
++#define	sigma1(x)	(Rot32(x, 17) ^ Rot32(x, 19) ^ ((x) >> 10))
++
++static const uint32_t SHA256_K[64] = {
++	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
++	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
++	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
++	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
++	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
++	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
++	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
++	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
++	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
++	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
++	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
++	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
++	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
++	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
++	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
++	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
++};
++
++static void
++SHA256Transform(uint32_t *H, const uint8_t *cp)
++{
++	uint32_t a, b, c, d, e, f, g, h, t, T1, T2, W[64];
++
++	for (t = 0; t < 16; t++, cp += 4)
++		W[t] = (cp[0] << 24) | (cp[1] << 16) | (cp[2] << 8) | cp[3];
++
++	for (t = 16; t < 64; t++)
++		W[t] = sigma1(W[t - 2]) + W[t - 7] +
++		    sigma0(W[t - 15]) + W[t - 16];
++
++	a = H[0]; b = H[1]; c = H[2]; d = H[3];
++	e = H[4]; f = H[5]; g = H[6]; h = H[7];
++
++	for (t = 0; t < 64; t++) {
++		T1 = h + SIGMA1(e) + Ch(e, f, g) + SHA256_K[t] + W[t];
++		T2 = SIGMA0(a) + Maj(a, b, c);
++		h = g; g = f; f = e; e = d + T1;
++		d = c; c = b; b = a; a = T1 + T2;
++	}
++
++	H[0] += a; H[1] += b; H[2] += c; H[3] += d;
++	H[4] += e; H[5] += f; H[6] += g; H[7] += h;
++}
++
++void
++zio_checksum_SHA256(const void *buf, uint64_t size, zio_cksum_t *zcp)
++{
++	uint32_t H[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
++	    0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
++	uint8_t pad[128];
++	int i, padsize;
++
++	for (i = 0; i < (size & ~63ULL); i += 64)
++		SHA256Transform(H, (uint8_t *)buf + i);
++
++	for (padsize = 0; i < size; i++)
++		pad[padsize++] = *((uint8_t *)buf + i);
++
++	for (pad[padsize++] = 0x80; (padsize & 63) != 56; padsize++)
++		pad[padsize] = 0;
++
++	for (i = 56; i >= 0; i -= 8)
++		pad[padsize++] = (size << 3) >> i;
++
++	for (i = 0; i < padsize; i += 64)
++		SHA256Transform(H, pad + i);
++
++	ZIO_SET_CHECKSUM(zcp,
++	    (uint64_t)H[0] << 32 | H[1],
++	    (uint64_t)H[2] << 32 | H[3],
++	    (uint64_t)H[4] << 32 | H[5],
++	    (uint64_t)H[6] << 32 | H[7]);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/spa_boot.c linux-3.2.33-go/fs/zfs/zfs/spa_boot.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/spa_boot.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/spa_boot.c	2012-11-16 23:25:34.353039289 +0100
+@@ -0,0 +1,50 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifdef _KERNEL
++
++#include <sys/zio.h>
++#include <sys/spa.h>
++#include <sys/sunddi.h>
++
++char *
++spa_get_bootprop(char *propname)
++{
++	char *value;
++
++	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
++	    DDI_PROP_DONTPASS, propname, &value) != DDI_SUCCESS)
++		return (NULL);
++	return (value);
++}
++
++void
++spa_free_bootprop(char *value)
++{
++	ddi_prop_free(value);
++}
++
++#endif /* _KERNEL */
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/spa.c linux-3.2.33-go/fs/zfs/zfs/spa.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/spa.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/spa.c	2012-11-16 23:25:34.350039322 +0100
+@@ -0,0 +1,6019 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ */
++
++/*
++ * This file contains all the routines used when modifying on-disk SPA state.
++ * This includes opening, importing, destroying, exporting a pool, and syncing a
++ * pool.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/fm/fs/zfs.h>
++#include <sys/spa_impl.h>
++#include <sys/zio.h>
++#include <sys/zio_checksum.h>
++#include <sys/dmu.h>
++#include <sys/dmu_tx.h>
++#include <sys/zap.h>
++#include <sys/zil.h>
++#include <sys/ddt.h>
++#include <sys/vdev_impl.h>
++#include <sys/vdev_disk.h>
++#include <sys/metaslab.h>
++#include <sys/metaslab_impl.h>
++#include <sys/uberblock_impl.h>
++#include <sys/txg.h>
++#include <sys/avl.h>
++#include <sys/dmu_traverse.h>
++#include <sys/dmu_objset.h>
++#include <sys/unique.h>
++#include <sys/dsl_pool.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_prop.h>
++#include <sys/dsl_synctask.h>
++#include <sys/fs/zfs.h>
++#include <sys/arc.h>
++#include <sys/callb.h>
++#include <sys/systeminfo.h>
++#include <sys/spa_boot.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/dsl_scan.h>
++
++#ifdef	_KERNEL
++#include <sys/bootprops.h>
++#include <sys/callb.h>
++#include <sys/cpupart.h>
++#include <sys/pool.h>
++#include <sys/sysdc.h>
++#include <sys/zone.h>
++#endif	/* _KERNEL */
++
++#include "zfs_prop.h"
++#include "zfs_comutil.h"
++
++typedef enum zti_modes {
++	zti_mode_fixed,			/* value is # of threads (min 1) */
++	zti_mode_online_percent,	/* value is % of online CPUs */
++	zti_mode_batch,			/* cpu-intensive; value is ignored */
++	zti_mode_null,			/* don't create a taskq */
++	zti_nmodes
++} zti_modes_t;
++
++#define	ZTI_FIX(n)	{ zti_mode_fixed, (n) }
++#define	ZTI_PCT(n)	{ zti_mode_online_percent, (n) }
++#define	ZTI_BATCH	{ zti_mode_batch, 0 }
++#define	ZTI_NULL	{ zti_mode_null, 0 }
++
++#define	ZTI_ONE		ZTI_FIX(1)
++
++typedef struct zio_taskq_info {
++	enum zti_modes zti_mode;
++	uint_t zti_value;
++} zio_taskq_info_t;
++
++static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
++	"iss", "iss_h", "int", "int_h"
++};
++
++/*
++ * Define the taskq threads for the following I/O types:
++ * 	NULL, READ, WRITE, FREE, CLAIM, and IOCTL
++ */
++const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
++	/* ISSUE	ISSUE_HIGH	INTR		INTR_HIGH */
++	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
++	{ ZTI_FIX(8),	ZTI_NULL,	ZTI_BATCH,	ZTI_NULL },
++	{ ZTI_BATCH,	ZTI_FIX(5),	ZTI_FIX(16),	ZTI_FIX(5) },
++	{ ZTI_PCT(100),	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
++	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
++	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
++};
++
++static dsl_syncfunc_t spa_sync_props;
++static boolean_t spa_has_active_shared_spare(spa_t *spa);
++static inline int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
++    spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
++    char **ereport);
++static void spa_vdev_resilver_done(spa_t *spa);
++
++uint_t		zio_taskq_batch_pct = 100;	/* 1 thread per cpu in pset */
++id_t		zio_taskq_psrset_bind = PS_NONE;
++boolean_t	zio_taskq_sysdc = B_TRUE;	/* use SDC scheduling class */
++uint_t		zio_taskq_basedc = 80;		/* base duty cycle */
++
++boolean_t	spa_create_process = B_TRUE;	/* no process ==> no sysdc */
++
++/*
++ * This (illegal) pool name is used when temporarily importing a spa_t in order
++ * to get the vdev stats associated with the imported devices.
++ */
++#define	TRYIMPORT_NAME	"$import"
++
++/*
++ * ==========================================================================
++ * SPA properties routines
++ * ==========================================================================
++ */
++
++/*
++ * Add a (source=src, propname=propval) list to an nvlist.
++ */
++static void
++spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
++    uint64_t intval, zprop_source_t src)
++{
++	const char *propname = zpool_prop_to_name(prop);
++	nvlist_t *propval;
++
++	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++	VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
++
++	if (strval != NULL)
++		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
++	else
++		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
++
++	VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
++	nvlist_free(propval);
++}
++
++/*
++ * Get property values from the spa configuration.
++ */
++static void
++spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
++{
++	vdev_t *rvd = spa->spa_root_vdev;
++	uint64_t size;
++	uint64_t alloc;
++	uint64_t space;
++	uint64_t cap, version;
++	zprop_source_t src = ZPROP_SRC_NONE;
++	spa_config_dirent_t *dp;
++	int c;
++
++	ASSERT(MUTEX_HELD(&spa->spa_props_lock));
++
++	if (rvd != NULL) {
++		alloc = metaslab_class_get_alloc(spa_normal_class(spa));
++		size = metaslab_class_get_space(spa_normal_class(spa));
++		spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
++		spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
++		spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
++		spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
++		    size - alloc, src);
++
++		space = 0;
++		for (c = 0; c < rvd->vdev_children; c++) {
++			vdev_t *tvd = rvd->vdev_child[c];
++			space += tvd->vdev_max_asize - tvd->vdev_asize;
++		}
++		spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, space,
++		    src);
++
++		spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL,
++		    (spa_mode(spa) == FREAD), src);
++
++		cap = (size == 0) ? 0 : (alloc * 100 / size);
++		spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
++
++		spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL,
++		    ddt_get_pool_dedup_ratio(spa), src);
++
++		spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
++		    rvd->vdev_state, src);
++
++		version = spa_version(spa);
++		if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
++			src = ZPROP_SRC_DEFAULT;
++		else
++			src = ZPROP_SRC_LOCAL;
++		spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
++	}
++
++	spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
++
++	if (spa->spa_comment != NULL) {
++		spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment,
++		    0, ZPROP_SRC_LOCAL);
++	}
++
++	if (spa->spa_root != NULL)
++		spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
++		    0, ZPROP_SRC_LOCAL);
++
++	if ((dp = list_head(&spa->spa_config_list)) != NULL) {
++		if (dp->scd_path == NULL) {
++			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
++			    "none", 0, ZPROP_SRC_LOCAL);
++		} else if (strcmp(dp->scd_path, spa_config_path) != 0) {
++			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
++			    dp->scd_path, 0, ZPROP_SRC_LOCAL);
++		}
++	}
++}
++
++/*
++ * Get zpool property values.
++ */
++int
++spa_prop_get(spa_t *spa, nvlist_t **nvp)
++{
++	objset_t *mos = spa->spa_meta_objset;
++	zap_cursor_t zc;
++	zap_attribute_t za;
++	int err;
++
++	err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_PUSHPAGE);
++	if (err)
++		return err;
++
++	mutex_enter(&spa->spa_props_lock);
++
++	/*
++	 * Get properties from the spa config.
++	 */
++	spa_prop_get_config(spa, nvp);
++
++	/* If no pool property object, no more prop to get. */
++	if (mos == NULL || spa->spa_pool_props_object == 0) {
++		mutex_exit(&spa->spa_props_lock);
++		goto out;
++	}
++
++	/*
++	 * Get properties from the MOS pool property object.
++	 */
++	for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object);
++	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
++	    zap_cursor_advance(&zc)) {
++		uint64_t intval = 0;
++		char *strval = NULL;
++		zprop_source_t src = ZPROP_SRC_DEFAULT;
++		zpool_prop_t prop;
++
++		if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL)
++			continue;
++
++		switch (za.za_integer_length) {
++		case 8:
++			/* integer property */
++			if (za.za_first_integer !=
++			    zpool_prop_default_numeric(prop))
++				src = ZPROP_SRC_LOCAL;
++
++			if (prop == ZPOOL_PROP_BOOTFS) {
++				dsl_pool_t *dp;
++				dsl_dataset_t *ds = NULL;
++
++				dp = spa_get_dsl(spa);
++				rw_enter(&dp->dp_config_rwlock, RW_READER);
++				if ((err = dsl_dataset_hold_obj(dp,
++				    za.za_first_integer, FTAG, &ds))) {
++					rw_exit(&dp->dp_config_rwlock);
++					break;
++				}
++
++				strval = kmem_alloc(
++				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1,
++				    KM_PUSHPAGE);
++				dsl_dataset_name(ds, strval);
++				dsl_dataset_rele(ds, FTAG);
++				rw_exit(&dp->dp_config_rwlock);
++			} else {
++				strval = NULL;
++				intval = za.za_first_integer;
++			}
++
++			spa_prop_add_list(*nvp, prop, strval, intval, src);
++
++			if (strval != NULL)
++				kmem_free(strval,
++				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
++
++			break;
++
++		case 1:
++			/* string property */
++			strval = kmem_alloc(za.za_num_integers, KM_PUSHPAGE);
++			err = zap_lookup(mos, spa->spa_pool_props_object,
++			    za.za_name, 1, za.za_num_integers, strval);
++			if (err) {
++				kmem_free(strval, za.za_num_integers);
++				break;
++			}
++			spa_prop_add_list(*nvp, prop, strval, 0, src);
++			kmem_free(strval, za.za_num_integers);
++			break;
++
++		default:
++			break;
++		}
++	}
++	zap_cursor_fini(&zc);
++	mutex_exit(&spa->spa_props_lock);
++out:
++	if (err && err != ENOENT) {
++		nvlist_free(*nvp);
++		*nvp = NULL;
++		return (err);
++	}
++
++	return (0);
++}
++
++/*
++ * Validate the given pool properties nvlist and modify the list
++ * for the property values to be set.
++ */
++static int
++spa_prop_validate(spa_t *spa, nvlist_t *props)
++{
++	nvpair_t *elem;
++	int error = 0, reset_bootfs = 0;
++	uint64_t objnum = 0;
++
++	elem = NULL;
++	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
++		zpool_prop_t prop;
++		char *propname, *strval;
++		uint64_t intval;
++		objset_t *os;
++		char *slash, *check;
++
++		propname = nvpair_name(elem);
++
++		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL)
++			return (EINVAL);
++
++		switch (prop) {
++		case ZPOOL_PROP_VERSION:
++			error = nvpair_value_uint64(elem, &intval);
++			if (!error &&
++			    (intval < spa_version(spa) || intval > SPA_VERSION))
++				error = EINVAL;
++			break;
++
++		case ZPOOL_PROP_DELEGATION:
++		case ZPOOL_PROP_AUTOREPLACE:
++		case ZPOOL_PROP_LISTSNAPS:
++		case ZPOOL_PROP_AUTOEXPAND:
++			error = nvpair_value_uint64(elem, &intval);
++			if (!error && intval > 1)
++				error = EINVAL;
++			break;
++
++		case ZPOOL_PROP_BOOTFS:
++			/*
++			 * If the pool version is less than SPA_VERSION_BOOTFS,
++			 * or the pool is still being created (version == 0),
++			 * the bootfs property cannot be set.
++			 */
++			if (spa_version(spa) < SPA_VERSION_BOOTFS) {
++				error = ENOTSUP;
++				break;
++			}
++
++			/*
++			 * Make sure the vdev config is bootable
++			 */
++			if (!vdev_is_bootable(spa->spa_root_vdev)) {
++				error = ENOTSUP;
++				break;
++			}
++
++			reset_bootfs = 1;
++
++			error = nvpair_value_string(elem, &strval);
++
++			if (!error) {
++				uint64_t compress;
++
++				if (strval == NULL || strval[0] == '\0') {
++					objnum = zpool_prop_default_numeric(
++					    ZPOOL_PROP_BOOTFS);
++					break;
++				}
++
++				if ((error = dmu_objset_hold(strval,FTAG,&os)))
++					break;
++
++				/* Must be ZPL and not gzip compressed. */
++
++				if (dmu_objset_type(os) != DMU_OST_ZFS) {
++					error = ENOTSUP;
++				} else if ((error = dsl_prop_get_integer(strval,
++				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
++				    &compress, NULL)) == 0 &&
++				    !BOOTFS_COMPRESS_VALID(compress)) {
++					error = ENOTSUP;
++				} else {
++					objnum = dmu_objset_id(os);
++				}
++				dmu_objset_rele(os, FTAG);
++			}
++			break;
++
++		case ZPOOL_PROP_FAILUREMODE:
++			error = nvpair_value_uint64(elem, &intval);
++			if (!error && (intval < ZIO_FAILURE_MODE_WAIT ||
++			    intval > ZIO_FAILURE_MODE_PANIC))
++				error = EINVAL;
++
++			/*
++			 * This is a special case which only occurs when
++			 * the pool has completely failed. This allows
++			 * the user to change the in-core failmode property
++			 * without syncing it out to disk (I/Os might
++			 * currently be blocked). We do this by returning
++			 * EIO to the caller (spa_prop_set) to trick it
++			 * into thinking we encountered a property validation
++			 * error.
++			 */
++			if (!error && spa_suspended(spa)) {
++				spa->spa_failmode = intval;
++				error = EIO;
++			}
++			break;
++
++		case ZPOOL_PROP_CACHEFILE:
++			if ((error = nvpair_value_string(elem, &strval)) != 0)
++				break;
++
++			if (strval[0] == '\0')
++				break;
++
++			if (strcmp(strval, "none") == 0)
++				break;
++
++			if (strval[0] != '/') {
++				error = EINVAL;
++				break;
++			}
++
++			slash = strrchr(strval, '/');
++			ASSERT(slash != NULL);
++
++			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
++			    strcmp(slash, "/..") == 0)
++				error = EINVAL;
++			break;
++
++		case ZPOOL_PROP_COMMENT:
++			if ((error = nvpair_value_string(elem, &strval)) != 0)
++				break;
++			for (check = strval; *check != '\0'; check++) {
++				if (!isprint(*check)) {
++					error = EINVAL;
++					break;
++				}
++				check++;
++			}
++			if (strlen(strval) > ZPROP_MAX_COMMENT)
++				error = E2BIG;
++			break;
++
++		case ZPOOL_PROP_DEDUPDITTO:
++			if (spa_version(spa) < SPA_VERSION_DEDUP)
++				error = ENOTSUP;
++			else
++				error = nvpair_value_uint64(elem, &intval);
++			if (error == 0 &&
++			    intval != 0 && intval < ZIO_DEDUPDITTO_MIN)
++				error = EINVAL;
++			break;
++
++		default:
++			break;
++		}
++
++		if (error)
++			break;
++	}
++
++	if (!error && reset_bootfs) {
++		error = nvlist_remove(props,
++		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
++
++		if (!error) {
++			error = nvlist_add_uint64(props,
++			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum);
++		}
++	}
++
++	return (error);
++}
++
++void
++spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync)
++{
++	char *cachefile;
++	spa_config_dirent_t *dp;
++
++	if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
++	    &cachefile) != 0)
++		return;
++
++	dp = kmem_alloc(sizeof (spa_config_dirent_t),
++	    KM_PUSHPAGE);
++
++	if (cachefile[0] == '\0')
++		dp->scd_path = spa_strdup(spa_config_path);
++	else if (strcmp(cachefile, "none") == 0)
++		dp->scd_path = NULL;
++	else
++		dp->scd_path = spa_strdup(cachefile);
++
++	list_insert_head(&spa->spa_config_list, dp);
++	if (need_sync)
++		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
++}
++
++int
++spa_prop_set(spa_t *spa, nvlist_t *nvp)
++{
++	int error;
++	nvpair_t *elem;
++	boolean_t need_sync = B_FALSE;
++	zpool_prop_t prop;
++
++	if ((error = spa_prop_validate(spa, nvp)) != 0)
++		return (error);
++
++	elem = NULL;
++	while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
++		if ((prop = zpool_name_to_prop(
++		    nvpair_name(elem))) == ZPROP_INVAL)
++			return (EINVAL);
++
++		if (prop == ZPOOL_PROP_CACHEFILE ||
++		    prop == ZPOOL_PROP_ALTROOT ||
++		    prop == ZPOOL_PROP_READONLY)
++			continue;
++
++		need_sync = B_TRUE;
++		break;
++	}
++
++	if (need_sync)
++		return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
++		    spa, nvp, 3));
++	else
++		return (0);
++}
++
++/*
++ * If the bootfs property value is dsobj, clear it.
++ */
++void
++spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
++{
++	if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) {
++		VERIFY(zap_remove(spa->spa_meta_objset,
++		    spa->spa_pool_props_object,
++		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0);
++		spa->spa_bootfs = 0;
++	}
++}
++
++/*
++ * Change the GUID for the pool.  This is done so that we can later
++ * re-import a pool built from a clone of our own vdevs.  We will modify
++ * the root vdev's guid, our own pool guid, and then mark all of our
++ * vdevs dirty.  Note that we must make sure that all our vdevs are
++ * online when we do this, or else any vdevs that weren't present
++ * would be orphaned from our pool.  We are also going to issue a
++ * sysevent to update any watchers.
++ */
++int
++spa_change_guid(spa_t *spa)
++{
++	uint64_t	oldguid, newguid;
++	uint64_t	txg;
++
++	if (!(spa_mode_global & FWRITE))
++		return (EROFS);
++
++	txg = spa_vdev_enter(spa);
++
++	if (spa->spa_root_vdev->vdev_state != VDEV_STATE_HEALTHY)
++		return (spa_vdev_exit(spa, NULL, txg, ENXIO));
++
++	oldguid = spa_guid(spa);
++	newguid = spa_generate_guid(NULL);
++	ASSERT3U(oldguid, !=, newguid);
++
++	spa->spa_root_vdev->vdev_guid = newguid;
++	spa->spa_root_vdev->vdev_guid_sum += (newguid - oldguid);
++
++	vdev_config_dirty(spa->spa_root_vdev);
++
++	spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_REGUID);
++
++	return (spa_vdev_exit(spa, NULL, txg, 0));
++}
++
++/*
++ * ==========================================================================
++ * SPA state manipulation (open/create/destroy/import/export)
++ * ==========================================================================
++ */
++
++static int
++spa_error_entry_compare(const void *a, const void *b)
++{
++	spa_error_entry_t *sa = (spa_error_entry_t *)a;
++	spa_error_entry_t *sb = (spa_error_entry_t *)b;
++	int ret;
++
++	ret = bcmp(&sa->se_bookmark, &sb->se_bookmark,
++	    sizeof (zbookmark_t));
++
++	if (ret < 0)
++		return (-1);
++	else if (ret > 0)
++		return (1);
++	else
++		return (0);
++}
++
++/*
++ * Utility function which retrieves copies of the current logs and
++ * re-initializes them in the process.
++ */
++void
++spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
++{
++	ASSERT(MUTEX_HELD(&spa->spa_errlist_lock));
++
++	bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t));
++	bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t));
++
++	avl_create(&spa->spa_errlist_scrub,
++	    spa_error_entry_compare, sizeof (spa_error_entry_t),
++	    offsetof(spa_error_entry_t, se_avl));
++	avl_create(&spa->spa_errlist_last,
++	    spa_error_entry_compare, sizeof (spa_error_entry_t),
++	    offsetof(spa_error_entry_t, se_avl));
++}
++
++static taskq_t *
++spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode,
++    uint_t value)
++{
++	uint_t flags = TASKQ_PREPOPULATE;
++	boolean_t batch = B_FALSE;
++
++	switch (mode) {
++	case zti_mode_null:
++		return (NULL);		/* no taskq needed */
++
++	case zti_mode_fixed:
++		ASSERT3U(value, >=, 1);
++		value = MAX(value, 1);
++		break;
++
++	case zti_mode_batch:
++		batch = B_TRUE;
++		flags |= TASKQ_THREADS_CPU_PCT;
++		value = zio_taskq_batch_pct;
++		break;
++
++	case zti_mode_online_percent:
++		flags |= TASKQ_THREADS_CPU_PCT;
++		break;
++
++	default:
++		panic("unrecognized mode for %s taskq (%u:%u) in "
++		    "spa_activate()",
++		    name, mode, value);
++		break;
++	}
++
++	if (zio_taskq_sysdc && spa->spa_proc != &p0) {
++		if (batch)
++			flags |= TASKQ_DC_BATCH;
++
++		return (taskq_create_sysdc(name, value, 50, INT_MAX,
++		    spa->spa_proc, zio_taskq_basedc, flags));
++	}
++	return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX,
++	    spa->spa_proc, flags));
++}
++
++static void
++spa_create_zio_taskqs(spa_t *spa)
++{
++	int t, q;
++
++	for (t = 0; t < ZIO_TYPES; t++) {
++		for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
++			const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
++			enum zti_modes mode = ztip->zti_mode;
++			uint_t value = ztip->zti_value;
++			char name[32];
++
++			(void) snprintf(name, sizeof (name),
++			    "%s_%s", zio_type_name[t], zio_taskq_types[q]);
++
++			spa->spa_zio_taskq[t][q] =
++			    spa_taskq_create(spa, name, mode, value);
++		}
++	}
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPA_THREAD)
++static void
++spa_thread(void *arg)
++{
++	callb_cpr_t cprinfo;
++
++	spa_t *spa = arg;
++	user_t *pu = PTOU(curproc);
++
++	CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr,
++	    spa->spa_name);
++
++	ASSERT(curproc != &p0);
++	(void) snprintf(pu->u_psargs, sizeof (pu->u_psargs),
++	    "zpool-%s", spa->spa_name);
++	(void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm));
++
++	/* bind this thread to the requested psrset */
++	if (zio_taskq_psrset_bind != PS_NONE) {
++		pool_lock();
++		mutex_enter(&cpu_lock);
++		mutex_enter(&pidlock);
++		mutex_enter(&curproc->p_lock);
++
++		if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind,
++		    0, NULL, NULL) == 0)  {
++			curthread->t_bind_pset = zio_taskq_psrset_bind;
++		} else {
++			cmn_err(CE_WARN,
++			    "Couldn't bind process for zfs pool \"%s\" to "
++			    "pset %d\n", spa->spa_name, zio_taskq_psrset_bind);
++		}
++
++		mutex_exit(&curproc->p_lock);
++		mutex_exit(&pidlock);
++		mutex_exit(&cpu_lock);
++		pool_unlock();
++	}
++
++	if (zio_taskq_sysdc) {
++		sysdc_thread_enter(curthread, 100, 0);
++	}
++
++	spa->spa_proc = curproc;
++	spa->spa_did = curthread->t_did;
++
++	spa_create_zio_taskqs(spa);
++
++	mutex_enter(&spa->spa_proc_lock);
++	ASSERT(spa->spa_proc_state == SPA_PROC_CREATED);
++
++	spa->spa_proc_state = SPA_PROC_ACTIVE;
++	cv_broadcast(&spa->spa_proc_cv);
++
++	CALLB_CPR_SAFE_BEGIN(&cprinfo);
++	while (spa->spa_proc_state == SPA_PROC_ACTIVE)
++		cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
++	CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock);
++
++	ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE);
++	spa->spa_proc_state = SPA_PROC_GONE;
++	spa->spa_proc = &p0;
++	cv_broadcast(&spa->spa_proc_cv);
++	CALLB_CPR_EXIT(&cprinfo);	/* drops spa_proc_lock */
++
++	mutex_enter(&curproc->p_lock);
++	lwp_exit();
++}
++#endif
++
++/*
++ * Activate an uninitialized pool.
++ */
++static void
++spa_activate(spa_t *spa, int mode)
++{
++	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
++
++	spa->spa_state = POOL_STATE_ACTIVE;
++	spa->spa_mode = mode;
++
++	spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops);
++	spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops);
++
++	/* Try to create a covering process */
++	mutex_enter(&spa->spa_proc_lock);
++	ASSERT(spa->spa_proc_state == SPA_PROC_NONE);
++	ASSERT(spa->spa_proc == &p0);
++	spa->spa_did = 0;
++
++#ifdef HAVE_SPA_THREAD
++	/* Only create a process if we're going to be around a while. */
++	if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) {
++		if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri,
++		    NULL, 0) == 0) {
++			spa->spa_proc_state = SPA_PROC_CREATED;
++			while (spa->spa_proc_state == SPA_PROC_CREATED) {
++				cv_wait(&spa->spa_proc_cv,
++				    &spa->spa_proc_lock);
++			}
++			ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
++			ASSERT(spa->spa_proc != &p0);
++			ASSERT(spa->spa_did != 0);
++		} else {
++#ifdef _KERNEL
++			cmn_err(CE_WARN,
++			    "Couldn't create process for zfs pool \"%s\"\n",
++			    spa->spa_name);
++#endif
++		}
++	}
++#endif /* HAVE_SPA_THREAD */
++	mutex_exit(&spa->spa_proc_lock);
++
++	/* If we didn't create a process, we need to create our taskqs. */
++	if (spa->spa_proc == &p0) {
++		spa_create_zio_taskqs(spa);
++	}
++
++	list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
++	    offsetof(vdev_t, vdev_config_dirty_node));
++	list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
++	    offsetof(vdev_t, vdev_state_dirty_node));
++
++	txg_list_create(&spa->spa_vdev_txg_list,
++	    offsetof(struct vdev, vdev_txg_node));
++
++	avl_create(&spa->spa_errlist_scrub,
++	    spa_error_entry_compare, sizeof (spa_error_entry_t),
++	    offsetof(spa_error_entry_t, se_avl));
++	avl_create(&spa->spa_errlist_last,
++	    spa_error_entry_compare, sizeof (spa_error_entry_t),
++	    offsetof(spa_error_entry_t, se_avl));
++}
++
++/*
++ * Opposite of spa_activate().
++ */
++static void
++spa_deactivate(spa_t *spa)
++{
++	int t, q;
++
++	ASSERT(spa->spa_sync_on == B_FALSE);
++	ASSERT(spa->spa_dsl_pool == NULL);
++	ASSERT(spa->spa_root_vdev == NULL);
++	ASSERT(spa->spa_async_zio_root == NULL);
++	ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
++
++	txg_list_destroy(&spa->spa_vdev_txg_list);
++
++	list_destroy(&spa->spa_config_dirty_list);
++	list_destroy(&spa->spa_state_dirty_list);
++
++	for (t = 0; t < ZIO_TYPES; t++) {
++		for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
++			if (spa->spa_zio_taskq[t][q] != NULL)
++				taskq_destroy(spa->spa_zio_taskq[t][q]);
++			spa->spa_zio_taskq[t][q] = NULL;
++		}
++	}
++
++	metaslab_class_destroy(spa->spa_normal_class);
++	spa->spa_normal_class = NULL;
++
++	metaslab_class_destroy(spa->spa_log_class);
++	spa->spa_log_class = NULL;
++
++	/*
++	 * If this was part of an import or the open otherwise failed, we may
++	 * still have errors left in the queues.  Empty them just in case.
++	 */
++	spa_errlog_drain(spa);
++
++	avl_destroy(&spa->spa_errlist_scrub);
++	avl_destroy(&spa->spa_errlist_last);
++
++	spa->spa_state = POOL_STATE_UNINITIALIZED;
++
++	mutex_enter(&spa->spa_proc_lock);
++	if (spa->spa_proc_state != SPA_PROC_NONE) {
++		ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
++		spa->spa_proc_state = SPA_PROC_DEACTIVATE;
++		cv_broadcast(&spa->spa_proc_cv);
++		while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) {
++			ASSERT(spa->spa_proc != &p0);
++			cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
++		}
++		ASSERT(spa->spa_proc_state == SPA_PROC_GONE);
++		spa->spa_proc_state = SPA_PROC_NONE;
++	}
++	ASSERT(spa->spa_proc == &p0);
++	mutex_exit(&spa->spa_proc_lock);
++
++	/*
++	 * We want to make sure spa_thread() has actually exited the ZFS
++	 * module, so that the module can't be unloaded out from underneath
++	 * it.
++	 */
++	if (spa->spa_did != 0) {
++		thread_join(spa->spa_did);
++		spa->spa_did = 0;
++	}
++}
++
++/*
++ * Verify a pool configuration, and construct the vdev tree appropriately.  This
++ * will create all the necessary vdevs in the appropriate layout, with each vdev
++ * in the CLOSED state.  This will prep the pool before open/creation/import.
++ * All vdev validation is done by the vdev_alloc() routine.
++ */
++static int
++spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
++    uint_t id, int atype)
++{
++	nvlist_t **child;
++	uint_t children;
++	int error;
++	int c;
++
++	if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
++		return (error);
++
++	if ((*vdp)->vdev_ops->vdev_op_leaf)
++		return (0);
++
++	error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
++	    &child, &children);
++
++	if (error == ENOENT)
++		return (0);
++
++	if (error) {
++		vdev_free(*vdp);
++		*vdp = NULL;
++		return (EINVAL);
++	}
++
++	for (c = 0; c < children; c++) {
++		vdev_t *vd;
++		if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
++		    atype)) != 0) {
++			vdev_free(*vdp);
++			*vdp = NULL;
++			return (error);
++		}
++	}
++
++	ASSERT(*vdp != NULL);
++
++	return (0);
++}
++
++/*
++ * Opposite of spa_load().
++ */
++static void
++spa_unload(spa_t *spa)
++{
++	int i;
++
++	ASSERT(MUTEX_HELD(&spa_namespace_lock));
++
++	/*
++	 * Stop async tasks.
++	 */
++	spa_async_suspend(spa);
++
++	/*
++	 * Stop syncing.
++	 */
++	if (spa->spa_sync_on) {
++		txg_sync_stop(spa->spa_dsl_pool);
++		spa->spa_sync_on = B_FALSE;
++	}
++
++	/*
++	 * Wait for any outstanding async I/O to complete.
++	 */
++	if (spa->spa_async_zio_root != NULL) {
++		(void) zio_wait(spa->spa_async_zio_root);
++		spa->spa_async_zio_root = NULL;
++	}
++
++	bpobj_close(&spa->spa_deferred_bpobj);
++
++	/*
++	 * Close the dsl pool.
++	 */
++	if (spa->spa_dsl_pool) {
++		dsl_pool_close(spa->spa_dsl_pool);
++		spa->spa_dsl_pool = NULL;
++		spa->spa_meta_objset = NULL;
++	}
++
++	ddt_unload(spa);
++
++	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++
++	/*
++	 * Drop and purge level 2 cache
++	 */
++	spa_l2cache_drop(spa);
++
++	/*
++	 * Close all vdevs.
++	 */
++	if (spa->spa_root_vdev)
++		vdev_free(spa->spa_root_vdev);
++	ASSERT(spa->spa_root_vdev == NULL);
++
++	for (i = 0; i < spa->spa_spares.sav_count; i++)
++		vdev_free(spa->spa_spares.sav_vdevs[i]);
++	if (spa->spa_spares.sav_vdevs) {
++		kmem_free(spa->spa_spares.sav_vdevs,
++		    spa->spa_spares.sav_count * sizeof (void *));
++		spa->spa_spares.sav_vdevs = NULL;
++	}
++	if (spa->spa_spares.sav_config) {
++		nvlist_free(spa->spa_spares.sav_config);
++		spa->spa_spares.sav_config = NULL;
++	}
++	spa->spa_spares.sav_count = 0;
++
++	for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
++		vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]);
++		vdev_free(spa->spa_l2cache.sav_vdevs[i]);
++	}
++	if (spa->spa_l2cache.sav_vdevs) {
++		kmem_free(spa->spa_l2cache.sav_vdevs,
++		    spa->spa_l2cache.sav_count * sizeof (void *));
++		spa->spa_l2cache.sav_vdevs = NULL;
++	}
++	if (spa->spa_l2cache.sav_config) {
++		nvlist_free(spa->spa_l2cache.sav_config);
++		spa->spa_l2cache.sav_config = NULL;
++	}
++	spa->spa_l2cache.sav_count = 0;
++
++	spa->spa_async_suspended = 0;
++
++	if (spa->spa_comment != NULL) {
++		spa_strfree(spa->spa_comment);
++		spa->spa_comment = NULL;
++	}
++
++	spa_config_exit(spa, SCL_ALL, FTAG);
++}
++
++/*
++ * Load (or re-load) the current list of vdevs describing the active spares for
++ * this pool.  When this is called, we have some form of basic information in
++ * 'spa_spares.sav_config'.  We parse this into vdevs, try to open them, and
++ * then re-generate a more complete list including status information.
++ */
++static void
++spa_load_spares(spa_t *spa)
++{
++	nvlist_t **spares;
++	uint_t nspares;
++	int i;
++	vdev_t *vd, *tvd;
++
++	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
++
++	/*
++	 * First, close and free any existing spare vdevs.
++	 */
++	for (i = 0; i < spa->spa_spares.sav_count; i++) {
++		vd = spa->spa_spares.sav_vdevs[i];
++
++		/* Undo the call to spa_activate() below */
++		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
++		    B_FALSE)) != NULL && tvd->vdev_isspare)
++			spa_spare_remove(tvd);
++		vdev_close(vd);
++		vdev_free(vd);
++	}
++
++	if (spa->spa_spares.sav_vdevs)
++		kmem_free(spa->spa_spares.sav_vdevs,
++		    spa->spa_spares.sav_count * sizeof (void *));
++
++	if (spa->spa_spares.sav_config == NULL)
++		nspares = 0;
++	else
++		VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
++		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
++
++	spa->spa_spares.sav_count = (int)nspares;
++	spa->spa_spares.sav_vdevs = NULL;
++
++	if (nspares == 0)
++		return;
++
++	/*
++	 * Construct the array of vdevs, opening them to get status in the
++	 * process.   For each spare, there is potentially two different vdev_t
++	 * structures associated with it: one in the list of spares (used only
++	 * for basic validation purposes) and one in the active vdev
++	 * configuration (if it's spared in).  During this phase we open and
++	 * validate each vdev on the spare list.  If the vdev also exists in the
++	 * active configuration, then we also mark this vdev as an active spare.
++	 */
++	spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *),
++	    KM_PUSHPAGE);
++	for (i = 0; i < spa->spa_spares.sav_count; i++) {
++		VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
++		    VDEV_ALLOC_SPARE) == 0);
++		ASSERT(vd != NULL);
++
++		spa->spa_spares.sav_vdevs[i] = vd;
++
++		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
++		    B_FALSE)) != NULL) {
++			if (!tvd->vdev_isspare)
++				spa_spare_add(tvd);
++
++			/*
++			 * We only mark the spare active if we were successfully
++			 * able to load the vdev.  Otherwise, importing a pool
++			 * with a bad active spare would result in strange
++			 * behavior, because multiple pool would think the spare
++			 * is actively in use.
++			 *
++			 * There is a vulnerability here to an equally bizarre
++			 * circumstance, where a dead active spare is later
++			 * brought back to life (onlined or otherwise).  Given
++			 * the rarity of this scenario, and the extra complexity
++			 * it adds, we ignore the possibility.
++			 */
++			if (!vdev_is_dead(tvd))
++				spa_spare_activate(tvd);
++		}
++
++		vd->vdev_top = vd;
++		vd->vdev_aux = &spa->spa_spares;
++
++		if (vdev_open(vd) != 0)
++			continue;
++
++		if (vdev_validate_aux(vd) == 0)
++			spa_spare_add(vd);
++	}
++
++	/*
++	 * Recompute the stashed list of spares, with status information
++	 * this time.
++	 */
++	VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
++	    DATA_TYPE_NVLIST_ARRAY) == 0);
++
++	spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *),
++	    KM_PUSHPAGE);
++	for (i = 0; i < spa->spa_spares.sav_count; i++)
++		spares[i] = vdev_config_generate(spa,
++		    spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE);
++	VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
++	    ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0);
++	for (i = 0; i < spa->spa_spares.sav_count; i++)
++		nvlist_free(spares[i]);
++	kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *));
++}
++
++/*
++ * Load (or re-load) the current list of vdevs describing the active l2cache for
++ * this pool.  When this is called, we have some form of basic information in
++ * 'spa_l2cache.sav_config'.  We parse this into vdevs, try to open them, and
++ * then re-generate a more complete list including status information.
++ * Devices which are already active have their details maintained, and are
++ * not re-opened.
++ */
++static void
++spa_load_l2cache(spa_t *spa)
++{
++	nvlist_t **l2cache;
++	uint_t nl2cache;
++	int i, j, oldnvdevs;
++	uint64_t guid;
++	vdev_t *vd, **oldvdevs, **newvdevs = NULL;
++	spa_aux_vdev_t *sav = &spa->spa_l2cache;
++
++	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
++
++	if (sav->sav_config != NULL) {
++		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
++		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
++		newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_PUSHPAGE);
++	} else {
++		nl2cache = 0;
++	}
++
++	oldvdevs = sav->sav_vdevs;
++	oldnvdevs = sav->sav_count;
++	sav->sav_vdevs = NULL;
++	sav->sav_count = 0;
++
++	/*
++	 * Process new nvlist of vdevs.
++	 */
++	for (i = 0; i < nl2cache; i++) {
++		VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID,
++		    &guid) == 0);
++
++		newvdevs[i] = NULL;
++		for (j = 0; j < oldnvdevs; j++) {
++			vd = oldvdevs[j];
++			if (vd != NULL && guid == vd->vdev_guid) {
++				/*
++				 * Retain previous vdev for add/remove ops.
++				 */
++				newvdevs[i] = vd;
++				oldvdevs[j] = NULL;
++				break;
++			}
++		}
++
++		if (newvdevs[i] == NULL) {
++			/*
++			 * Create new vdev
++			 */
++			VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0,
++			    VDEV_ALLOC_L2CACHE) == 0);
++			ASSERT(vd != NULL);
++			newvdevs[i] = vd;
++
++			/*
++			 * Commit this vdev as an l2cache device,
++			 * even if it fails to open.
++			 */
++			spa_l2cache_add(vd);
++
++			vd->vdev_top = vd;
++			vd->vdev_aux = sav;
++
++			spa_l2cache_activate(vd);
++
++			if (vdev_open(vd) != 0)
++				continue;
++
++			(void) vdev_validate_aux(vd);
++
++			if (!vdev_is_dead(vd))
++				l2arc_add_vdev(spa, vd);
++		}
++	}
++
++	/*
++	 * Purge vdevs that were dropped
++	 */
++	for (i = 0; i < oldnvdevs; i++) {
++		uint64_t pool;
++
++		vd = oldvdevs[i];
++		if (vd != NULL) {
++			ASSERT(vd->vdev_isl2cache);
++
++			if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
++			    pool != 0ULL && l2arc_vdev_present(vd))
++				l2arc_remove_vdev(vd);
++			vdev_clear_stats(vd);
++			vdev_free(vd);
++		}
++	}
++
++	if (oldvdevs)
++		kmem_free(oldvdevs, oldnvdevs * sizeof (void *));
++
++	if (sav->sav_config == NULL)
++		goto out;
++
++	sav->sav_vdevs = newvdevs;
++	sav->sav_count = (int)nl2cache;
++
++	/*
++	 * Recompute the stashed list of l2cache devices, with status
++	 * information this time.
++	 */
++	VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
++	    DATA_TYPE_NVLIST_ARRAY) == 0);
++
++	l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_PUSHPAGE);
++	for (i = 0; i < sav->sav_count; i++)
++		l2cache[i] = vdev_config_generate(spa,
++		    sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE);
++	VERIFY(nvlist_add_nvlist_array(sav->sav_config,
++	    ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
++out:
++	for (i = 0; i < sav->sav_count; i++)
++		nvlist_free(l2cache[i]);
++	if (sav->sav_count)
++		kmem_free(l2cache, sav->sav_count * sizeof (void *));
++}
++
++static int
++load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
++{
++	dmu_buf_t *db;
++	char *packed = NULL;
++	size_t nvsize = 0;
++	int error;
++	*value = NULL;
++
++	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
++	nvsize = *(uint64_t *)db->db_data;
++	dmu_buf_rele(db, FTAG);
++
++	packed = kmem_alloc(nvsize, KM_PUSHPAGE | KM_NODEBUG);
++	error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed,
++	    DMU_READ_PREFETCH);
++	if (error == 0)
++		error = nvlist_unpack(packed, nvsize, value, 0);
++	kmem_free(packed, nvsize);
++
++	return (error);
++}
++
++/*
++ * Checks to see if the given vdev could not be opened, in which case we post a
++ * sysevent to notify the autoreplace code that the device has been removed.
++ */
++static void
++spa_check_removed(vdev_t *vd)
++{
++	int c;
++
++	for (c = 0; c < vd->vdev_children; c++)
++		spa_check_removed(vd->vdev_child[c]);
++
++	if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) {
++		zfs_ereport_post(FM_EREPORT_RESOURCE_AUTOREPLACE,
++		    vd->vdev_spa, vd, NULL, 0, 0);
++		spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_CHECK);
++	}
++}
++
++/*
++ * Validate the current config against the MOS config
++ */
++static boolean_t
++spa_config_valid(spa_t *spa, nvlist_t *config)
++{
++	vdev_t *mrvd, *rvd = spa->spa_root_vdev;
++	nvlist_t *nv;
++	int c, i;
++
++	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) == 0);
++
++	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++	VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
++
++	ASSERT3U(rvd->vdev_children, ==, mrvd->vdev_children);
++
++	/*
++	 * If we're doing a normal import, then build up any additional
++	 * diagnostic information about missing devices in this config.
++	 * We'll pass this up to the user for further processing.
++	 */
++	if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) {
++		nvlist_t **child, *nv;
++		uint64_t idx = 0;
++
++		child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t **),
++		    KM_PUSHPAGE);
++		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++
++		for (c = 0; c < rvd->vdev_children; c++) {
++			vdev_t *tvd = rvd->vdev_child[c];
++			vdev_t *mtvd  = mrvd->vdev_child[c];
++
++			if (tvd->vdev_ops == &vdev_missing_ops &&
++			    mtvd->vdev_ops != &vdev_missing_ops &&
++			    mtvd->vdev_islog)
++				child[idx++] = vdev_config_generate(spa, mtvd,
++				    B_FALSE, 0);
++		}
++
++		if (idx) {
++			VERIFY(nvlist_add_nvlist_array(nv,
++			    ZPOOL_CONFIG_CHILDREN, child, idx) == 0);
++			VERIFY(nvlist_add_nvlist(spa->spa_load_info,
++			    ZPOOL_CONFIG_MISSING_DEVICES, nv) == 0);
++
++			for (i = 0; i < idx; i++)
++				nvlist_free(child[i]);
++		}
++		nvlist_free(nv);
++		kmem_free(child, rvd->vdev_children * sizeof (char **));
++	}
++
++	/*
++	 * Compare the root vdev tree with the information we have
++	 * from the MOS config (mrvd). Check each top-level vdev
++	 * with the corresponding MOS config top-level (mtvd).
++	 */
++	for (c = 0; c < rvd->vdev_children; c++) {
++		vdev_t *tvd = rvd->vdev_child[c];
++		vdev_t *mtvd  = mrvd->vdev_child[c];
++
++		/*
++		 * Resolve any "missing" vdevs in the current configuration.
++		 * If we find that the MOS config has more accurate information
++		 * about the top-level vdev then use that vdev instead.
++		 */
++		if (tvd->vdev_ops == &vdev_missing_ops &&
++		    mtvd->vdev_ops != &vdev_missing_ops) {
++
++			if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG))
++				continue;
++
++			/*
++			 * Device specific actions.
++			 */
++			if (mtvd->vdev_islog) {
++				spa_set_log_state(spa, SPA_LOG_CLEAR);
++			} else {
++				/*
++				 * XXX - once we have 'readonly' pool
++				 * support we should be able to handle
++				 * missing data devices by transitioning
++				 * the pool to readonly.
++				 */
++				continue;
++			}
++
++			/*
++			 * Swap the missing vdev with the data we were
++			 * able to obtain from the MOS config.
++			 */
++			vdev_remove_child(rvd, tvd);
++			vdev_remove_child(mrvd, mtvd);
++
++			vdev_add_child(rvd, mtvd);
++			vdev_add_child(mrvd, tvd);
++
++			spa_config_exit(spa, SCL_ALL, FTAG);
++			vdev_load(mtvd);
++			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++
++			vdev_reopen(rvd);
++		} else if (mtvd->vdev_islog) {
++			/*
++			 * Load the slog device's state from the MOS config
++			 * since it's possible that the label does not
++			 * contain the most up-to-date information.
++			 */
++			vdev_load_log_state(tvd, mtvd);
++			vdev_reopen(tvd);
++		}
++	}
++	vdev_free(mrvd);
++	spa_config_exit(spa, SCL_ALL, FTAG);
++
++	/*
++	 * Ensure we were able to validate the config.
++	 */
++	return (rvd->vdev_guid_sum == spa->spa_uberblock.ub_guid_sum);
++}
++
++/*
++ * Check for missing log devices
++ */
++static int
++spa_check_logs(spa_t *spa)
++{
++	switch (spa->spa_log_state) {
++	default:
++		break;
++	case SPA_LOG_MISSING:
++		/* need to recheck in case slog has been restored */
++	case SPA_LOG_UNKNOWN:
++		if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
++		    DS_FIND_CHILDREN)) {
++			spa_set_log_state(spa, SPA_LOG_MISSING);
++			return (1);
++		}
++		break;
++	}
++	return (0);
++}
++
++static boolean_t
++spa_passivate_log(spa_t *spa)
++{
++	vdev_t *rvd = spa->spa_root_vdev;
++	boolean_t slog_found = B_FALSE;
++	int c;
++
++	ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
++
++	if (!spa_has_slogs(spa))
++		return (B_FALSE);
++
++	for (c = 0; c < rvd->vdev_children; c++) {
++		vdev_t *tvd = rvd->vdev_child[c];
++		metaslab_group_t *mg = tvd->vdev_mg;
++
++		if (tvd->vdev_islog) {
++			metaslab_group_passivate(mg);
++			slog_found = B_TRUE;
++		}
++	}
++
++	return (slog_found);
++}
++
++static void
++spa_activate_log(spa_t *spa)
++{
++	vdev_t *rvd = spa->spa_root_vdev;
++	int c;
++
++	ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
++
++	for (c = 0; c < rvd->vdev_children; c++) {
++		vdev_t *tvd = rvd->vdev_child[c];
++		metaslab_group_t *mg = tvd->vdev_mg;
++
++		if (tvd->vdev_islog)
++			metaslab_group_activate(mg);
++	}
++}
++
++int
++spa_offline_log(spa_t *spa)
++{
++	int error = 0;
++
++	if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
++	    NULL, DS_FIND_CHILDREN)) == 0) {
++
++		/*
++		 * We successfully offlined the log device, sync out the
++		 * current txg so that the "stubby" block can be removed
++		 * by zil_sync().
++		 */
++		txg_wait_synced(spa->spa_dsl_pool, 0);
++	}
++	return (error);
++}
++
++static void
++spa_aux_check_removed(spa_aux_vdev_t *sav)
++{
++	int i;
++
++	for (i = 0; i < sav->sav_count; i++)
++		spa_check_removed(sav->sav_vdevs[i]);
++}
++
++void
++spa_claim_notify(zio_t *zio)
++{
++	spa_t *spa = zio->io_spa;
++
++	if (zio->io_error)
++		return;
++
++	mutex_enter(&spa->spa_props_lock);	/* any mutex will do */
++	if (spa->spa_claim_max_txg < zio->io_bp->blk_birth)
++		spa->spa_claim_max_txg = zio->io_bp->blk_birth;
++	mutex_exit(&spa->spa_props_lock);
++}
++
++typedef struct spa_load_error {
++	uint64_t	sle_meta_count;
++	uint64_t	sle_data_count;
++} spa_load_error_t;
++
++static void
++spa_load_verify_done(zio_t *zio)
++{
++	blkptr_t *bp = zio->io_bp;
++	spa_load_error_t *sle = zio->io_private;
++	dmu_object_type_t type = BP_GET_TYPE(bp);
++	int error = zio->io_error;
++
++	if (error) {
++		if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) &&
++		    type != DMU_OT_INTENT_LOG)
++			atomic_add_64(&sle->sle_meta_count, 1);
++		else
++			atomic_add_64(&sle->sle_data_count, 1);
++	}
++	zio_data_buf_free(zio->io_data, zio->io_size);
++}
++
++/*ARGSUSED*/
++static int
++spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
++    arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
++{
++	if (bp != NULL) {
++		zio_t *rio = arg;
++		size_t size = BP_GET_PSIZE(bp);
++		void *data = zio_data_buf_alloc(size);
++
++		zio_nowait(zio_read(rio, spa, bp, data, size,
++		    spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
++		    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
++		    ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
++	}
++	return (0);
++}
++
++static int
++spa_load_verify(spa_t *spa)
++{
++	zio_t *rio;
++	spa_load_error_t sle = { 0 };
++	zpool_rewind_policy_t policy;
++	boolean_t verify_ok = B_FALSE;
++	int error;
++
++	zpool_get_rewind_policy(spa->spa_config, &policy);
++
++	if (policy.zrp_request & ZPOOL_NEVER_REWIND)
++		return (0);
++
++	rio = zio_root(spa, NULL, &sle,
++	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
++
++	error = traverse_pool(spa, spa->spa_verify_min_txg,
++	    TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio);
++
++	(void) zio_wait(rio);
++
++	spa->spa_load_meta_errors = sle.sle_meta_count;
++	spa->spa_load_data_errors = sle.sle_data_count;
++
++	if (!error && sle.sle_meta_count <= policy.zrp_maxmeta &&
++	    sle.sle_data_count <= policy.zrp_maxdata) {
++		int64_t loss = 0;
++
++		verify_ok = B_TRUE;
++		spa->spa_load_txg = spa->spa_uberblock.ub_txg;
++		spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp;
++
++		loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts;
++		VERIFY(nvlist_add_uint64(spa->spa_load_info,
++		    ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0);
++		VERIFY(nvlist_add_int64(spa->spa_load_info,
++		    ZPOOL_CONFIG_REWIND_TIME, loss) == 0);
++		VERIFY(nvlist_add_uint64(spa->spa_load_info,
++		    ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0);
++	} else {
++		spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
++	}
++
++	if (error) {
++		if (error != ENXIO && error != EIO)
++			error = EIO;
++		return (error);
++	}
++
++	return (verify_ok ? 0 : EIO);
++}
++
++/*
++ * Find a value in the pool props object.
++ */
++static void
++spa_prop_find(spa_t *spa, zpool_prop_t prop, uint64_t *val)
++{
++	(void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object,
++	    zpool_prop_to_name(prop), sizeof (uint64_t), 1, val);
++}
++
++/*
++ * Find a value in the pool directory object.
++ */
++static int
++spa_dir_prop(spa_t *spa, const char *name, uint64_t *val)
++{
++	return (zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
++	    name, sizeof (uint64_t), 1, val));
++}
++
++static int
++spa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err)
++{
++	vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux);
++	return (err);
++}
++
++/*
++ * Fix up config after a partly-completed split.  This is done with the
++ * ZPOOL_CONFIG_SPLIT nvlist.  Both the splitting pool and the split-off
++ * pool have that entry in their config, but only the splitting one contains
++ * a list of all the guids of the vdevs that are being split off.
++ *
++ * This function determines what to do with that list: either rejoin
++ * all the disks to the pool, or complete the splitting process.  To attempt
++ * the rejoin, each disk that is offlined is marked online again, and
++ * we do a reopen() call.  If the vdev label for every disk that was
++ * marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL)
++ * then we call vdev_split() on each disk, and complete the split.
++ *
++ * Otherwise we leave the config alone, with all the vdevs in place in
++ * the original pool.
++ */
++static void
++spa_try_repair(spa_t *spa, nvlist_t *config)
++{
++	uint_t extracted;
++	uint64_t *glist;
++	uint_t i, gcount;
++	nvlist_t *nvl;
++	vdev_t **vd;
++	boolean_t attempt_reopen;
++
++	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0)
++		return;
++
++	/* check that the config is complete */
++	if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
++	    &glist, &gcount) != 0)
++		return;
++
++	vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_PUSHPAGE);
++
++	/* attempt to online all the vdevs & validate */
++	attempt_reopen = B_TRUE;
++	for (i = 0; i < gcount; i++) {
++		if (glist[i] == 0)	/* vdev is hole */
++			continue;
++
++		vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE);
++		if (vd[i] == NULL) {
++			/*
++			 * Don't bother attempting to reopen the disks;
++			 * just do the split.
++			 */
++			attempt_reopen = B_FALSE;
++		} else {
++			/* attempt to re-online it */
++			vd[i]->vdev_offline = B_FALSE;
++		}
++	}
++
++	if (attempt_reopen) {
++		vdev_reopen(spa->spa_root_vdev);
++
++		/* check each device to see what state it's in */
++		for (extracted = 0, i = 0; i < gcount; i++) {
++			if (vd[i] != NULL &&
++			    vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL)
++				break;
++			++extracted;
++		}
++	}
++
++	/*
++	 * If every disk has been moved to the new pool, or if we never
++	 * even attempted to look at them, then we split them off for
++	 * good.
++	 */
++	if (!attempt_reopen || gcount == extracted) {
++		for (i = 0; i < gcount; i++)
++			if (vd[i] != NULL)
++				vdev_split(vd[i]);
++		vdev_reopen(spa->spa_root_vdev);
++	}
++
++	kmem_free(vd, gcount * sizeof (vdev_t *));
++}
++
++static int
++spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
++    boolean_t mosconfig)
++{
++	nvlist_t *config = spa->spa_config;
++	char *ereport = FM_EREPORT_ZFS_POOL;
++	char *comment;
++	int error;
++	uint64_t pool_guid;
++	nvlist_t *nvl;
++
++	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid))
++		return (EINVAL);
++
++	ASSERT(spa->spa_comment == NULL);
++	if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0)
++		spa->spa_comment = spa_strdup(comment);
++
++	/*
++	 * Versioning wasn't explicitly added to the label until later, so if
++	 * it's not present treat it as the initial version.
++	 */
++	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
++	    &spa->spa_ubsync.ub_version) != 0)
++		spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL;
++
++	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
++	    &spa->spa_config_txg);
++
++	if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
++	    spa_guid_exists(pool_guid, 0)) {
++		error = EEXIST;
++	} else {
++		spa->spa_config_guid = pool_guid;
++
++		if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT,
++		    &nvl) == 0) {
++			VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting,
++			    KM_PUSHPAGE) == 0);
++		}
++
++		gethrestime(&spa->spa_loaded_ts);
++		error = spa_load_impl(spa, pool_guid, config, state, type,
++		    mosconfig, &ereport);
++	}
++
++	spa->spa_minref = refcount_count(&spa->spa_refcount);
++	if (error) {
++		if (error != EEXIST) {
++			spa->spa_loaded_ts.tv_sec = 0;
++			spa->spa_loaded_ts.tv_nsec = 0;
++		}
++		if (error != EBADF) {
++			zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
++		}
++	}
++	spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
++	spa->spa_ena = 0;
++
++	return (error);
++}
++
++/*
++ * Load an existing storage pool, using the pool's builtin spa_config as a
++ * source of configuration information.
++ */
++__attribute__((always_inline))
++static inline int
++spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
++    spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
++    char **ereport)
++{
++	int error = 0;
++	nvlist_t *nvroot = NULL;
++	vdev_t *rvd;
++	uberblock_t *ub = &spa->spa_uberblock;
++	uint64_t children, config_cache_txg = spa->spa_config_txg;
++	int orig_mode = spa->spa_mode;
++	int parse;
++	uint64_t obj;
++
++	/*
++	 * If this is an untrusted config, access the pool in read-only mode.
++	 * This prevents things like resilvering recently removed devices.
++	 */
++	if (!mosconfig)
++		spa->spa_mode = FREAD;
++
++	ASSERT(MUTEX_HELD(&spa_namespace_lock));
++
++	spa->spa_load_state = state;
++
++	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot))
++		return (EINVAL);
++
++	parse = (type == SPA_IMPORT_EXISTING ?
++	    VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT);
++
++	/*
++	 * Create "The Godfather" zio to hold all async IOs
++	 */
++	spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
++	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
++
++	/*
++	 * Parse the configuration into a vdev tree.  We explicitly set the
++	 * value that will be returned by spa_version() since parsing the
++	 * configuration requires knowing the version number.
++	 */
++	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, parse);
++	spa_config_exit(spa, SCL_ALL, FTAG);
++
++	if (error != 0)
++		return (error);
++
++	ASSERT(spa->spa_root_vdev == rvd);
++
++	if (type != SPA_IMPORT_ASSEMBLE) {
++		ASSERT(spa_guid(spa) == pool_guid);
++	}
++
++	/*
++	 * Try to open all vdevs, loading each label in the process.
++	 */
++	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++	error = vdev_open(rvd);
++	spa_config_exit(spa, SCL_ALL, FTAG);
++	if (error != 0)
++		return (error);
++
++	/*
++	 * We need to validate the vdev labels against the configuration that
++	 * we have in hand, which is dependent on the setting of mosconfig. If
++	 * mosconfig is true then we're validating the vdev labels based on
++	 * that config.  Otherwise, we're validating against the cached config
++	 * (zpool.cache) that was read when we loaded the zfs module, and then
++	 * later we will recursively call spa_load() and validate against
++	 * the vdev config.
++	 *
++	 * If we're assembling a new pool that's been split off from an
++	 * existing pool, the labels haven't yet been updated so we skip
++	 * validation for now.
++	 */
++	if (type != SPA_IMPORT_ASSEMBLE) {
++		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++		error = vdev_validate(rvd, mosconfig);
++		spa_config_exit(spa, SCL_ALL, FTAG);
++
++		if (error != 0)
++			return (error);
++
++		if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
++			return (ENXIO);
++	}
++
++	/*
++	 * Find the best uberblock.
++	 */
++	vdev_uberblock_load(NULL, rvd, ub);
++
++	/*
++	 * If we weren't able to find a single valid uberblock, return failure.
++	 */
++	if (ub->ub_txg == 0)
++		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
++
++	/*
++	 * If the pool is newer than the code, we can't open it.
++	 */
++	if (ub->ub_version > SPA_VERSION)
++		return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP));
++
++	/*
++	 * If the vdev guid sum doesn't match the uberblock, we have an
++	 * incomplete configuration.  We first check to see if the pool
++	 * is aware of the complete config (i.e ZPOOL_CONFIG_VDEV_CHILDREN).
++	 * If it is, defer the vdev_guid_sum check till later so we
++	 * can handle missing vdevs.
++	 */
++	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
++	    &children) != 0 && mosconfig && type != SPA_IMPORT_ASSEMBLE &&
++	    rvd->vdev_guid_sum != ub->ub_guid_sum)
++		return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO));
++
++	if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) {
++		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++		spa_try_repair(spa, config);
++		spa_config_exit(spa, SCL_ALL, FTAG);
++		nvlist_free(spa->spa_config_splitting);
++		spa->spa_config_splitting = NULL;
++	}
++
++	/*
++	 * Initialize internal SPA structures.
++	 */
++	spa->spa_state = POOL_STATE_ACTIVE;
++	spa->spa_ubsync = spa->spa_uberblock;
++	spa->spa_verify_min_txg = spa->spa_extreme_rewind ?
++	    TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1;
++	spa->spa_first_txg = spa->spa_last_ubsync_txg ?
++	    spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1;
++	spa->spa_claim_max_txg = spa->spa_first_txg;
++	spa->spa_prev_software_version = ub->ub_software_version;
++
++	error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
++	if (error)
++		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++	spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
++
++	if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0)
++		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++
++	if (!mosconfig) {
++		uint64_t hostid;
++		nvlist_t *policy = NULL, *nvconfig;
++
++		if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
++			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++
++		if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig,
++		    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
++			char *hostname;
++			unsigned long myhostid = 0;
++
++			VERIFY(nvlist_lookup_string(nvconfig,
++			    ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
++
++#ifdef	_KERNEL
++			myhostid = zone_get_hostid(NULL);
++#else	/* _KERNEL */
++			/*
++			 * We're emulating the system's hostid in userland, so
++			 * we can't use zone_get_hostid().
++			 */
++			(void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
++#endif	/* _KERNEL */
++			if (hostid != 0 && myhostid != 0 &&
++			    hostid != myhostid) {
++				nvlist_free(nvconfig);
++				cmn_err(CE_WARN, "pool '%s' could not be "
++				    "loaded as it was last accessed by "
++				    "another system (host: %s hostid: 0x%lx). "
++				    "See: http://zfsonlinux.org/msg/ZFS-8000-EY",
++				    spa_name(spa), hostname,
++				    (unsigned long)hostid);
++				return (EBADF);
++			}
++		}
++		if (nvlist_lookup_nvlist(spa->spa_config,
++		    ZPOOL_REWIND_POLICY, &policy) == 0)
++			VERIFY(nvlist_add_nvlist(nvconfig,
++			    ZPOOL_REWIND_POLICY, policy) == 0);
++
++		spa_config_set(spa, nvconfig);
++		spa_unload(spa);
++		spa_deactivate(spa);
++		spa_activate(spa, orig_mode);
++
++		return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE));
++	}
++
++	if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj) != 0)
++		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++	error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj);
++	if (error != 0)
++		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++
++	/*
++	 * Load the bit that tells us to use the new accounting function
++	 * (raid-z deflation).  If we have an older pool, this will not
++	 * be present.
++	 */
++	error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate);
++	if (error != 0 && error != ENOENT)
++		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++
++	error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION,
++	    &spa->spa_creation_version);
++	if (error != 0 && error != ENOENT)
++		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++
++	/*
++	 * Load the persistent error log.  If we have an older pool, this will
++	 * not be present.
++	 */
++	error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last);
++	if (error != 0 && error != ENOENT)
++		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++
++	error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB,
++	    &spa->spa_errlog_scrub);
++	if (error != 0 && error != ENOENT)
++		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++
++	/*
++	 * Load the history object.  If we have an older pool, this
++	 * will not be present.
++	 */
++	error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history);
++	if (error != 0 && error != ENOENT)
++		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++
++	/*
++	 * If we're assembling the pool from the split-off vdevs of
++	 * an existing pool, we don't want to attach the spares & cache
++	 * devices.
++	 */
++
++	/*
++	 * Load any hot spares for this pool.
++	 */
++	error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object);
++	if (error != 0 && error != ENOENT)
++		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++	if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {
++		ASSERT(spa_version(spa) >= SPA_VERSION_SPARES);
++		if (load_nvlist(spa, spa->spa_spares.sav_object,
++		    &spa->spa_spares.sav_config) != 0)
++			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++
++		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++		spa_load_spares(spa);
++		spa_config_exit(spa, SCL_ALL, FTAG);
++	} else if (error == 0) {
++		spa->spa_spares.sav_sync = B_TRUE;
++	}
++
++	/*
++	 * Load any level 2 ARC devices for this pool.
++	 */
++	error = spa_dir_prop(spa, DMU_POOL_L2CACHE,
++	    &spa->spa_l2cache.sav_object);
++	if (error != 0 && error != ENOENT)
++		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++	if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {
++		ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE);
++		if (load_nvlist(spa, spa->spa_l2cache.sav_object,
++		    &spa->spa_l2cache.sav_config) != 0)
++			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++
++		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++		spa_load_l2cache(spa);
++		spa_config_exit(spa, SCL_ALL, FTAG);
++	} else if (error == 0) {
++		spa->spa_l2cache.sav_sync = B_TRUE;
++	}
++
++	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
++
++	error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object);
++	if (error && error != ENOENT)
++		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++
++	if (error == 0) {
++		uint64_t autoreplace;
++
++		spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs);
++		spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace);
++		spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation);
++		spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
++		spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
++		spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
++		    &spa->spa_dedup_ditto);
++
++		spa->spa_autoreplace = (autoreplace != 0);
++	}
++
++	/*
++	 * If the 'autoreplace' property is set, then post a resource notifying
++	 * the ZFS DE that it should not issue any faults for unopenable
++	 * devices.  We also iterate over the vdevs, and post a sysevent for any
++	 * unopenable vdevs so that the normal autoreplace handler can take
++	 * over.
++	 */
++	if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) {
++		spa_check_removed(spa->spa_root_vdev);
++		/*
++		 * For the import case, this is done in spa_import(), because
++		 * at this point we're using the spare definitions from
++		 * the MOS config, not necessarily from the userland config.
++		 */
++		if (state != SPA_LOAD_IMPORT) {
++			spa_aux_check_removed(&spa->spa_spares);
++			spa_aux_check_removed(&spa->spa_l2cache);
++		}
++	}
++
++	/*
++	 * Load the vdev state for all toplevel vdevs.
++	 */
++	vdev_load(rvd);
++
++	/*
++	 * Propagate the leaf DTLs we just loaded all the way up the tree.
++	 */
++	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++	vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
++	spa_config_exit(spa, SCL_ALL, FTAG);
++
++	/*
++	 * Load the DDTs (dedup tables).
++	 */
++	error = ddt_load(spa);
++	if (error != 0)
++		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++
++	spa_update_dspace(spa);
++
++	/*
++	 * Validate the config, using the MOS config to fill in any
++	 * information which might be missing.  If we fail to validate
++	 * the config then declare the pool unfit for use. If we're
++	 * assembling a pool from a split, the log is not transferred
++	 * over.
++	 */
++	if (type != SPA_IMPORT_ASSEMBLE) {
++		nvlist_t *nvconfig;
++
++		if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
++			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
++
++		if (!spa_config_valid(spa, nvconfig)) {
++			nvlist_free(nvconfig);
++			return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM,
++			    ENXIO));
++		}
++		nvlist_free(nvconfig);
++
++		/*
++		 * Now that we've validate the config, check the state of the
++		 * root vdev.  If it can't be opened, it indicates one or
++		 * more toplevel vdevs are faulted.
++		 */
++		if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
++			return (ENXIO);
++
++		if (spa_check_logs(spa)) {
++			*ereport = FM_EREPORT_ZFS_LOG_REPLAY;
++			return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO));
++		}
++	}
++
++	/*
++	 * We've successfully opened the pool, verify that we're ready
++	 * to start pushing transactions.
++	 */
++	if (state != SPA_LOAD_TRYIMPORT) {
++		if ((error = spa_load_verify(spa)))
++			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
++			    error));
++	}
++
++	if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER ||
++	    spa->spa_load_max_txg == UINT64_MAX)) {
++		dmu_tx_t *tx;
++		int need_update = B_FALSE;
++		int c;
++
++		ASSERT(state != SPA_LOAD_TRYIMPORT);
++
++		/*
++		 * Claim log blocks that haven't been committed yet.
++		 * This must all happen in a single txg.
++		 * Note: spa_claim_max_txg is updated by spa_claim_notify(),
++		 * invoked from zil_claim_log_block()'s i/o done callback.
++		 * Price of rollback is that we abandon the log.
++		 */
++		spa->spa_claiming = B_TRUE;
++
++		tx = dmu_tx_create_assigned(spa_get_dsl(spa),
++		    spa_first_txg(spa));
++		(void) dmu_objset_find(spa_name(spa),
++		    zil_claim, tx, DS_FIND_CHILDREN);
++		dmu_tx_commit(tx);
++
++		spa->spa_claiming = B_FALSE;
++
++		spa_set_log_state(spa, SPA_LOG_GOOD);
++		spa->spa_sync_on = B_TRUE;
++		txg_sync_start(spa->spa_dsl_pool);
++
++		/*
++		 * Wait for all claims to sync.  We sync up to the highest
++		 * claimed log block birth time so that claimed log blocks
++		 * don't appear to be from the future.  spa_claim_max_txg
++		 * will have been set for us by either zil_check_log_chain()
++		 * (invoked from spa_check_logs()) or zil_claim() above.
++		 */
++		txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg);
++
++		/*
++		 * If the config cache is stale, or we have uninitialized
++		 * metaslabs (see spa_vdev_add()), then update the config.
++		 *
++		 * If this is a verbatim import, trust the current
++		 * in-core spa_config and update the disk labels.
++		 */
++		if (config_cache_txg != spa->spa_config_txg ||
++		    state == SPA_LOAD_IMPORT ||
++		    state == SPA_LOAD_RECOVER ||
++		    (spa->spa_import_flags & ZFS_IMPORT_VERBATIM))
++			need_update = B_TRUE;
++
++		for (c = 0; c < rvd->vdev_children; c++)
++			if (rvd->vdev_child[c]->vdev_ms_array == 0)
++				need_update = B_TRUE;
++
++		/*
++		 * Update the config cache asychronously in case we're the
++		 * root pool, in which case the config cache isn't writable yet.
++		 */
++		if (need_update)
++			spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
++
++		/*
++		 * Check all DTLs to see if anything needs resilvering.
++		 */
++		if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
++		    vdev_resilver_needed(rvd, NULL, NULL))
++			spa_async_request(spa, SPA_ASYNC_RESILVER);
++
++		/*
++		 * Delete any inconsistent datasets.
++		 */
++		(void) dmu_objset_find(spa_name(spa),
++		    dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
++
++		/*
++		 * Clean up any stale temporary dataset userrefs.
++		 */
++		dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
++	}
++
++	return (0);
++}
++
++static int
++spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
++{
++	int mode = spa->spa_mode;
++
++	spa_unload(spa);
++	spa_deactivate(spa);
++
++	spa->spa_load_max_txg--;
++
++	spa_activate(spa, mode);
++	spa_async_suspend(spa);
++
++	return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig));
++}
++
++static int
++spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
++    uint64_t max_request, int rewind_flags)
++{
++	nvlist_t *config = NULL;
++	int load_error, rewind_error;
++	uint64_t safe_rewind_txg;
++	uint64_t min_txg;
++
++	if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) {
++		spa->spa_load_max_txg = spa->spa_load_txg;
++		spa_set_log_state(spa, SPA_LOG_CLEAR);
++	} else {
++		spa->spa_load_max_txg = max_request;
++	}
++
++	load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING,
++	    mosconfig);
++	if (load_error == 0)
++		return (0);
++
++	if (spa->spa_root_vdev != NULL)
++		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
++
++	spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg;
++	spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp;
++
++	if (rewind_flags & ZPOOL_NEVER_REWIND) {
++		nvlist_free(config);
++		return (load_error);
++	}
++
++	/* Price of rolling back is discarding txgs, including log */
++	if (state == SPA_LOAD_RECOVER)
++		spa_set_log_state(spa, SPA_LOG_CLEAR);
++
++	spa->spa_load_max_txg = spa->spa_last_ubsync_txg;
++	safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE;
++	min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ?
++	    TXG_INITIAL : safe_rewind_txg;
++
++	/*
++	 * Continue as long as we're finding errors, we're still within
++	 * the acceptable rewind range, and we're still finding uberblocks
++	 */
++	while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg &&
++	    spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) {
++		if (spa->spa_load_max_txg < safe_rewind_txg)
++			spa->spa_extreme_rewind = B_TRUE;
++		rewind_error = spa_load_retry(spa, state, mosconfig);
++	}
++
++	spa->spa_extreme_rewind = B_FALSE;
++	spa->spa_load_max_txg = UINT64_MAX;
++
++	if (config && (rewind_error || state != SPA_LOAD_RECOVER))
++		spa_config_set(spa, config);
++
++	return (state == SPA_LOAD_RECOVER ? rewind_error : load_error);
++}
++
++/*
++ * Pool Open/Import
++ *
++ * The import case is identical to an open except that the configuration is sent
++ * down from userland, instead of grabbed from the configuration cache.  For the
++ * case of an open, the pool configuration will exist in the
++ * POOL_STATE_UNINITIALIZED state.
++ *
++ * The stats information (gen/count/ustats) is used to gather vdev statistics at
++ * the same time open the pool, without having to keep around the spa_t in some
++ * ambiguous state.
++ */
++static int
++spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
++    nvlist_t **config)
++{
++	spa_t *spa;
++	spa_load_state_t state = SPA_LOAD_OPEN;
++	int error;
++	int locked = B_FALSE;
++
++	*spapp = NULL;
++
++	/*
++	 * As disgusting as this is, we need to support recursive calls to this
++	 * function because dsl_dir_open() is called during spa_load(), and ends
++	 * up calling spa_open() again.  The real fix is to figure out how to
++	 * avoid dsl_dir_open() calling this in the first place.
++	 */
++	if (mutex_owner(&spa_namespace_lock) != curthread) {
++		mutex_enter(&spa_namespace_lock);
++		locked = B_TRUE;
++	}
++
++	if ((spa = spa_lookup(pool)) == NULL) {
++		if (locked)
++			mutex_exit(&spa_namespace_lock);
++		return (ENOENT);
++	}
++
++	if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
++		zpool_rewind_policy_t policy;
++
++		zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config,
++		    &policy);
++		if (policy.zrp_request & ZPOOL_DO_REWIND)
++			state = SPA_LOAD_RECOVER;
++
++		spa_activate(spa, spa_mode_global);
++
++		if (state != SPA_LOAD_RECOVER)
++			spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
++
++		error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg,
++		    policy.zrp_request);
++
++		if (error == EBADF) {
++			/*
++			 * If vdev_validate() returns failure (indicated by
++			 * EBADF), it indicates that one of the vdevs indicates
++			 * that the pool has been exported or destroyed.  If
++			 * this is the case, the config cache is out of sync and
++			 * we should remove the pool from the namespace.
++			 */
++			spa_unload(spa);
++			spa_deactivate(spa);
++			spa_config_sync(spa, B_TRUE, B_TRUE);
++			spa_remove(spa);
++			if (locked)
++				mutex_exit(&spa_namespace_lock);
++			return (ENOENT);
++		}
++
++		if (error) {
++			/*
++			 * We can't open the pool, but we still have useful
++			 * information: the state of each vdev after the
++			 * attempted vdev_open().  Return this to the user.
++			 */
++			if (config != NULL && spa->spa_config) {
++				VERIFY(nvlist_dup(spa->spa_config, config,
++				    KM_PUSHPAGE) == 0);
++				VERIFY(nvlist_add_nvlist(*config,
++				    ZPOOL_CONFIG_LOAD_INFO,
++				    spa->spa_load_info) == 0);
++			}
++			spa_unload(spa);
++			spa_deactivate(spa);
++			spa->spa_last_open_failed = error;
++			if (locked)
++				mutex_exit(&spa_namespace_lock);
++			*spapp = NULL;
++			return (error);
++		}
++	}
++
++	spa_open_ref(spa, tag);
++
++	if (config != NULL)
++		*config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
++
++	/*
++	 * If we've recovered the pool, pass back any information we
++	 * gathered while doing the load.
++	 */
++	if (state == SPA_LOAD_RECOVER) {
++		VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO,
++		    spa->spa_load_info) == 0);
++	}
++
++	if (locked) {
++		spa->spa_last_open_failed = 0;
++		spa->spa_last_ubsync_txg = 0;
++		spa->spa_load_txg = 0;
++		mutex_exit(&spa_namespace_lock);
++	}
++
++	*spapp = spa;
++
++	return (0);
++}
++
++int
++spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy,
++    nvlist_t **config)
++{
++	return (spa_open_common(name, spapp, tag, policy, config));
++}
++
++int
++spa_open(const char *name, spa_t **spapp, void *tag)
++{
++	return (spa_open_common(name, spapp, tag, NULL, NULL));
++}
++
++/*
++ * Lookup the given spa_t, incrementing the inject count in the process,
++ * preventing it from being exported or destroyed.
++ */
++spa_t *
++spa_inject_addref(char *name)
++{
++	spa_t *spa;
++
++	mutex_enter(&spa_namespace_lock);
++	if ((spa = spa_lookup(name)) == NULL) {
++		mutex_exit(&spa_namespace_lock);
++		return (NULL);
++	}
++	spa->spa_inject_ref++;
++	mutex_exit(&spa_namespace_lock);
++
++	return (spa);
++}
++
++void
++spa_inject_delref(spa_t *spa)
++{
++	mutex_enter(&spa_namespace_lock);
++	spa->spa_inject_ref--;
++	mutex_exit(&spa_namespace_lock);
++}
++
++/*
++ * Add spares device information to the nvlist.
++ */
++static void
++spa_add_spares(spa_t *spa, nvlist_t *config)
++{
++	nvlist_t **spares;
++	uint_t i, nspares;
++	nvlist_t *nvroot;
++	uint64_t guid;
++	vdev_stat_t *vs;
++	uint_t vsc;
++	uint64_t pool;
++
++	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
++
++	if (spa->spa_spares.sav_count == 0)
++		return;
++
++	VERIFY(nvlist_lookup_nvlist(config,
++	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
++	VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
++	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
++	if (nspares != 0) {
++		VERIFY(nvlist_add_nvlist_array(nvroot,
++		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
++		VERIFY(nvlist_lookup_nvlist_array(nvroot,
++		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
++
++		/*
++		 * Go through and find any spares which have since been
++		 * repurposed as an active spare.  If this is the case, update
++		 * their status appropriately.
++		 */
++		for (i = 0; i < nspares; i++) {
++			VERIFY(nvlist_lookup_uint64(spares[i],
++			    ZPOOL_CONFIG_GUID, &guid) == 0);
++			if (spa_spare_exists(guid, &pool, NULL) &&
++			    pool != 0ULL) {
++				VERIFY(nvlist_lookup_uint64_array(
++				    spares[i], ZPOOL_CONFIG_VDEV_STATS,
++				    (uint64_t **)&vs, &vsc) == 0);
++				vs->vs_state = VDEV_STATE_CANT_OPEN;
++				vs->vs_aux = VDEV_AUX_SPARED;
++			}
++		}
++	}
++}
++
++/*
++ * Add l2cache device information to the nvlist, including vdev stats.
++ */
++static void
++spa_add_l2cache(spa_t *spa, nvlist_t *config)
++{
++	nvlist_t **l2cache;
++	uint_t i, j, nl2cache;
++	nvlist_t *nvroot;
++	uint64_t guid;
++	vdev_t *vd;
++	vdev_stat_t *vs;
++	uint_t vsc;
++
++	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
++
++	if (spa->spa_l2cache.sav_count == 0)
++		return;
++
++	VERIFY(nvlist_lookup_nvlist(config,
++	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
++	VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
++	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
++	if (nl2cache != 0) {
++		VERIFY(nvlist_add_nvlist_array(nvroot,
++		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
++		VERIFY(nvlist_lookup_nvlist_array(nvroot,
++		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
++
++		/*
++		 * Update level 2 cache device stats.
++		 */
++
++		for (i = 0; i < nl2cache; i++) {
++			VERIFY(nvlist_lookup_uint64(l2cache[i],
++			    ZPOOL_CONFIG_GUID, &guid) == 0);
++
++			vd = NULL;
++			for (j = 0; j < spa->spa_l2cache.sav_count; j++) {
++				if (guid ==
++				    spa->spa_l2cache.sav_vdevs[j]->vdev_guid) {
++					vd = spa->spa_l2cache.sav_vdevs[j];
++					break;
++				}
++			}
++			ASSERT(vd != NULL);
++
++			VERIFY(nvlist_lookup_uint64_array(l2cache[i],
++			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
++			    == 0);
++			vdev_get_stats(vd, vs);
++		}
++	}
++}
++
++int
++spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
++{
++	int error;
++	spa_t *spa;
++
++	*config = NULL;
++	error = spa_open_common(name, &spa, FTAG, NULL, config);
++
++	if (spa != NULL) {
++		/*
++		 * This still leaves a window of inconsistency where the spares
++		 * or l2cache devices could change and the config would be
++		 * self-inconsistent.
++		 */
++		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
++
++		if (*config != NULL) {
++			uint64_t loadtimes[2];
++
++			loadtimes[0] = spa->spa_loaded_ts.tv_sec;
++			loadtimes[1] = spa->spa_loaded_ts.tv_nsec;
++			VERIFY(nvlist_add_uint64_array(*config,
++			    ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0);
++
++			VERIFY(nvlist_add_uint64(*config,
++			    ZPOOL_CONFIG_ERRCOUNT,
++			    spa_get_errlog_size(spa)) == 0);
++
++			if (spa_suspended(spa))
++				VERIFY(nvlist_add_uint64(*config,
++				    ZPOOL_CONFIG_SUSPENDED,
++				    spa->spa_failmode) == 0);
++
++			spa_add_spares(spa, *config);
++			spa_add_l2cache(spa, *config);
++		}
++	}
++
++	/*
++	 * We want to get the alternate root even for faulted pools, so we cheat
++	 * and call spa_lookup() directly.
++	 */
++	if (altroot) {
++		if (spa == NULL) {
++			mutex_enter(&spa_namespace_lock);
++			spa = spa_lookup(name);
++			if (spa)
++				spa_altroot(spa, altroot, buflen);
++			else
++				altroot[0] = '\0';
++			spa = NULL;
++			mutex_exit(&spa_namespace_lock);
++		} else {
++			spa_altroot(spa, altroot, buflen);
++		}
++	}
++
++	if (spa != NULL) {
++		spa_config_exit(spa, SCL_CONFIG, FTAG);
++		spa_close(spa, FTAG);
++	}
++
++	return (error);
++}
++
++/*
++ * Validate that the auxiliary device array is well formed.  We must have an
++ * array of nvlists, each which describes a valid leaf vdev.  If this is an
++ * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be
++ * specified, as long as they are well-formed.
++ */
++static int
++spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
++    spa_aux_vdev_t *sav, const char *config, uint64_t version,
++    vdev_labeltype_t label)
++{
++	nvlist_t **dev;
++	uint_t i, ndev;
++	vdev_t *vd;
++	int error;
++
++	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
++
++	/*
++	 * It's acceptable to have no devs specified.
++	 */
++	if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0)
++		return (0);
++
++	if (ndev == 0)
++		return (EINVAL);
++
++	/*
++	 * Make sure the pool is formatted with a version that supports this
++	 * device type.
++	 */
++	if (spa_version(spa) < version)
++		return (ENOTSUP);
++
++	/*
++	 * Set the pending device list so we correctly handle device in-use
++	 * checking.
++	 */
++	sav->sav_pending = dev;
++	sav->sav_npending = ndev;
++
++	for (i = 0; i < ndev; i++) {
++		if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0,
++		    mode)) != 0)
++			goto out;
++
++		if (!vd->vdev_ops->vdev_op_leaf) {
++			vdev_free(vd);
++			error = EINVAL;
++			goto out;
++		}
++
++		/*
++		 * The L2ARC currently only supports disk devices in
++		 * kernel context.  For user-level testing, we allow it.
++		 */
++#ifdef _KERNEL
++		if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
++		    strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
++			error = ENOTBLK;
++			vdev_free(vd);
++			goto out;
++		}
++#endif
++		vd->vdev_top = vd;
++
++		if ((error = vdev_open(vd)) == 0 &&
++		    (error = vdev_label_init(vd, crtxg, label)) == 0) {
++			VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
++			    vd->vdev_guid) == 0);
++		}
++
++		vdev_free(vd);
++
++		if (error &&
++		    (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE))
++			goto out;
++		else
++			error = 0;
++	}
++
++out:
++	sav->sav_pending = NULL;
++	sav->sav_npending = 0;
++	return (error);
++}
++
++static int
++spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode)
++{
++	int error;
++
++	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
++
++	if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode,
++	    &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES,
++	    VDEV_LABEL_SPARE)) != 0) {
++		return (error);
++	}
++
++	return (spa_validate_aux_devs(spa, nvroot, crtxg, mode,
++	    &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE,
++	    VDEV_LABEL_L2CACHE));
++}
++
++static void
++spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs,
++    const char *config)
++{
++	int i;
++
++	if (sav->sav_config != NULL) {
++		nvlist_t **olddevs;
++		uint_t oldndevs;
++		nvlist_t **newdevs;
++
++		/*
++		 * Generate new dev list by concatentating with the
++		 * current dev list.
++		 */
++		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config,
++		    &olddevs, &oldndevs) == 0);
++
++		newdevs = kmem_alloc(sizeof (void *) *
++		    (ndevs + oldndevs), KM_PUSHPAGE);
++		for (i = 0; i < oldndevs; i++)
++			VERIFY(nvlist_dup(olddevs[i], &newdevs[i],
++			    KM_PUSHPAGE) == 0);
++		for (i = 0; i < ndevs; i++)
++			VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs],
++			    KM_PUSHPAGE) == 0);
++
++		VERIFY(nvlist_remove(sav->sav_config, config,
++		    DATA_TYPE_NVLIST_ARRAY) == 0);
++
++		VERIFY(nvlist_add_nvlist_array(sav->sav_config,
++		    config, newdevs, ndevs + oldndevs) == 0);
++		for (i = 0; i < oldndevs + ndevs; i++)
++			nvlist_free(newdevs[i]);
++		kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *));
++	} else {
++		/*
++		 * Generate a new dev list.
++		 */
++		VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME,
++		    KM_PUSHPAGE) == 0);
++		VERIFY(nvlist_add_nvlist_array(sav->sav_config, config,
++		    devs, ndevs) == 0);
++	}
++}
++
++/*
++ * Stop and drop level 2 ARC devices
++ */
++void
++spa_l2cache_drop(spa_t *spa)
++{
++	vdev_t *vd;
++	int i;
++	spa_aux_vdev_t *sav = &spa->spa_l2cache;
++
++	for (i = 0; i < sav->sav_count; i++) {
++		uint64_t pool;
++
++		vd = sav->sav_vdevs[i];
++		ASSERT(vd != NULL);
++
++		if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
++		    pool != 0ULL && l2arc_vdev_present(vd))
++			l2arc_remove_vdev(vd);
++	}
++}
++
++/*
++ * Pool Creation
++ */
++int
++spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
++    const char *history_str, nvlist_t *zplprops)
++{
++	spa_t *spa;
++	char *altroot = NULL;
++	vdev_t *rvd;
++	dsl_pool_t *dp;
++	dmu_tx_t *tx;
++	int error = 0;
++	uint64_t txg = TXG_INITIAL;
++	nvlist_t **spares, **l2cache;
++	uint_t nspares, nl2cache;
++	uint64_t version, obj;
++	int c;
++
++	/*
++	 * If this pool already exists, return failure.
++	 */
++	mutex_enter(&spa_namespace_lock);
++	if (spa_lookup(pool) != NULL) {
++		mutex_exit(&spa_namespace_lock);
++		return (EEXIST);
++	}
++
++	/*
++	 * Allocate a new spa_t structure.
++	 */
++	(void) nvlist_lookup_string(props,
++	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
++	spa = spa_add(pool, NULL, altroot);
++	spa_activate(spa, spa_mode_global);
++
++	if (props && (error = spa_prop_validate(spa, props))) {
++		spa_deactivate(spa);
++		spa_remove(spa);
++		mutex_exit(&spa_namespace_lock);
++		return (error);
++	}
++
++	if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION),
++	    &version) != 0)
++		version = SPA_VERSION;
++	ASSERT(version <= SPA_VERSION);
++
++	spa->spa_first_txg = txg;
++	spa->spa_uberblock.ub_txg = txg - 1;
++	spa->spa_uberblock.ub_version = version;
++	spa->spa_ubsync = spa->spa_uberblock;
++
++	/*
++	 * Create "The Godfather" zio to hold all async IOs
++	 */
++	spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
++	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
++
++	/*
++	 * Create the root vdev.
++	 */
++	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++
++	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD);
++
++	ASSERT(error != 0 || rvd != NULL);
++	ASSERT(error != 0 || spa->spa_root_vdev == rvd);
++
++	if (error == 0 && !zfs_allocatable_devs(nvroot))
++		error = EINVAL;
++
++	if (error == 0 &&
++	    (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
++	    (error = spa_validate_aux(spa, nvroot, txg,
++	    VDEV_ALLOC_ADD)) == 0) {
++		for (c = 0; c < rvd->vdev_children; c++) {
++			vdev_metaslab_set_size(rvd->vdev_child[c]);
++			vdev_expand(rvd->vdev_child[c], txg);
++		}
++	}
++
++	spa_config_exit(spa, SCL_ALL, FTAG);
++
++	if (error != 0) {
++		spa_unload(spa);
++		spa_deactivate(spa);
++		spa_remove(spa);
++		mutex_exit(&spa_namespace_lock);
++		return (error);
++	}
++
++	/*
++	 * Get the list of spares, if specified.
++	 */
++	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
++	    &spares, &nspares) == 0) {
++		VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME,
++		    KM_PUSHPAGE) == 0);
++		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
++		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
++		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++		spa_load_spares(spa);
++		spa_config_exit(spa, SCL_ALL, FTAG);
++		spa->spa_spares.sav_sync = B_TRUE;
++	}
++
++	/*
++	 * Get the list of level 2 cache devices, if specified.
++	 */
++	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
++	    &l2cache, &nl2cache) == 0) {
++		VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
++		    NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
++		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
++		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++		spa_load_l2cache(spa);
++		spa_config_exit(spa, SCL_ALL, FTAG);
++		spa->spa_l2cache.sav_sync = B_TRUE;
++	}
++
++	spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
++	spa->spa_meta_objset = dp->dp_meta_objset;
++
++	/*
++	 * Create DDTs (dedup tables).
++	 */
++	ddt_create(spa);
++
++	spa_update_dspace(spa);
++
++	tx = dmu_tx_create_assigned(dp, txg);
++
++	/*
++	 * Create the pool config object.
++	 */
++	spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset,
++	    DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE,
++	    DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx);
++
++	if (zap_add(spa->spa_meta_objset,
++	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
++	    sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) {
++		cmn_err(CE_PANIC, "failed to add pool config");
++	}
++
++	if (zap_add(spa->spa_meta_objset,
++	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION,
++	    sizeof (uint64_t), 1, &version, tx) != 0) {
++		cmn_err(CE_PANIC, "failed to add pool version");
++	}
++
++	/* Newly created pools with the right version are always deflated. */
++	if (version >= SPA_VERSION_RAIDZ_DEFLATE) {
++		spa->spa_deflate = TRUE;
++		if (zap_add(spa->spa_meta_objset,
++		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
++		    sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) {
++			cmn_err(CE_PANIC, "failed to add deflate");
++		}
++	}
++
++	/*
++	 * Create the deferred-free bpobj.  Turn off compression
++	 * because sync-to-convergence takes longer if the blocksize
++	 * keeps changing.
++	 */
++	obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx);
++	dmu_object_set_compress(spa->spa_meta_objset, obj,
++	    ZIO_COMPRESS_OFF, tx);
++	if (zap_add(spa->spa_meta_objset,
++	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPOBJ,
++	    sizeof (uint64_t), 1, &obj, tx) != 0) {
++		cmn_err(CE_PANIC, "failed to add bpobj");
++	}
++	VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj,
++	    spa->spa_meta_objset, obj));
++
++	/*
++	 * Create the pool's history object.
++	 */
++	if (version >= SPA_VERSION_ZPOOL_HISTORY)
++		spa_history_create_obj(spa, tx);
++
++	/*
++	 * Set pool properties.
++	 */
++	spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS);
++	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
++	spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
++	spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
++
++	if (props != NULL) {
++		spa_configfile_set(spa, props, B_FALSE);
++		spa_sync_props(spa, props, tx);
++	}
++
++	dmu_tx_commit(tx);
++
++	spa->spa_sync_on = B_TRUE;
++	txg_sync_start(spa->spa_dsl_pool);
++
++	/*
++	 * We explicitly wait for the first transaction to complete so that our
++	 * bean counters are appropriately updated.
++	 */
++	txg_wait_synced(spa->spa_dsl_pool, txg);
++
++	spa_config_sync(spa, B_FALSE, B_TRUE);
++
++	if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL)
++		(void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE);
++	spa_history_log_version(spa, LOG_POOL_CREATE);
++
++	spa->spa_minref = refcount_count(&spa->spa_refcount);
++
++	mutex_exit(&spa_namespace_lock);
++
++	return (0);
++}
++
++#ifdef _KERNEL
++/*
++ * Get the root pool information from the root disk, then import the root pool
++ * during the system boot up time.
++ */
++extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
++
++static nvlist_t *
++spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid)
++{
++	nvlist_t *config;
++	nvlist_t *nvtop, *nvroot;
++	uint64_t pgid;
++
++	if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0)
++		return (NULL);
++
++	/*
++	 * Add this top-level vdev to the child array.
++	 */
++	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
++	    &nvtop) == 0);
++	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
++	    &pgid) == 0);
++	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0);
++
++	/*
++	 * Put this pool's top-level vdevs into a root vdev.
++	 */
++	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++	VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
++	    VDEV_TYPE_ROOT) == 0);
++	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
++	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0);
++	VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
++	    &nvtop, 1) == 0);
++
++	/*
++	 * Replace the existing vdev_tree with the new root vdev in
++	 * this pool's configuration (remove the old, add the new).
++	 */
++	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
++	nvlist_free(nvroot);
++	return (config);
++}
++
++/*
++ * Walk the vdev tree and see if we can find a device with "better"
++ * configuration. A configuration is "better" if the label on that
++ * device has a more recent txg.
++ */
++static void
++spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg)
++{
++	int c;
++
++	for (c = 0; c < vd->vdev_children; c++)
++		spa_alt_rootvdev(vd->vdev_child[c], avd, txg);
++
++	if (vd->vdev_ops->vdev_op_leaf) {
++		nvlist_t *label;
++		uint64_t label_txg;
++
++		if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid,
++		    &label) != 0)
++			return;
++
++		VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG,
++		    &label_txg) == 0);
++
++		/*
++		 * Do we have a better boot device?
++		 */
++		if (label_txg > *txg) {
++			*txg = label_txg;
++			*avd = vd;
++		}
++		nvlist_free(label);
++	}
++}
++
++/*
++ * Import a root pool.
++ *
++ * For x86. devpath_list will consist of devid and/or physpath name of
++ * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a").
++ * The GRUB "findroot" command will return the vdev we should boot.
++ *
++ * For Sparc, devpath_list consists the physpath name of the booting device
++ * no matter the rootpool is a single device pool or a mirrored pool.
++ * e.g.
++ *	"/pci@1f,0/ide@d/disk@0,0:a"
++ */
++int
++spa_import_rootpool(char *devpath, char *devid)
++{
++	spa_t *spa;
++	vdev_t *rvd, *bvd, *avd = NULL;
++	nvlist_t *config, *nvtop;
++	uint64_t guid, txg;
++	char *pname;
++	int error;
++
++	/*
++	 * Read the label from the boot device and generate a configuration.
++	 */
++	config = spa_generate_rootconf(devpath, devid, &guid);
++#if defined(_OBP) && defined(_KERNEL)
++	if (config == NULL) {
++		if (strstr(devpath, "/iscsi/ssd") != NULL) {
++			/* iscsi boot */
++			get_iscsi_bootpath_phy(devpath);
++			config = spa_generate_rootconf(devpath, devid, &guid);
++		}
++	}
++#endif
++	if (config == NULL) {
++		cmn_err(CE_NOTE, "Can not read the pool label from '%s'",
++		    devpath);
++		return (EIO);
++	}
++
++	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
++	    &pname) == 0);
++	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
++
++	mutex_enter(&spa_namespace_lock);
++	if ((spa = spa_lookup(pname)) != NULL) {
++		/*
++		 * Remove the existing root pool from the namespace so that we
++		 * can replace it with the correct config we just read in.
++		 */
++		spa_remove(spa);
++	}
++
++	spa = spa_add(pname, config, NULL);
++	spa->spa_is_root = B_TRUE;
++	spa->spa_import_flags = ZFS_IMPORT_VERBATIM;
++
++	/*
++	 * Build up a vdev tree based on the boot device's label config.
++	 */
++	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
++	    &nvtop) == 0);
++	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++	error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
++	    VDEV_ALLOC_ROOTPOOL);
++	spa_config_exit(spa, SCL_ALL, FTAG);
++	if (error) {
++		mutex_exit(&spa_namespace_lock);
++		nvlist_free(config);
++		cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
++		    pname);
++		return (error);
++	}
++
++	/*
++	 * Get the boot vdev.
++	 */
++	if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) {
++		cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu",
++		    (u_longlong_t)guid);
++		error = ENOENT;
++		goto out;
++	}
++
++	/*
++	 * Determine if there is a better boot device.
++	 */
++	avd = bvd;
++	spa_alt_rootvdev(rvd, &avd, &txg);
++	if (avd != bvd) {
++		cmn_err(CE_NOTE, "The boot device is 'degraded'. Please "
++		    "try booting from '%s'", avd->vdev_path);
++		error = EINVAL;
++		goto out;
++	}
++
++	/*
++	 * If the boot device is part of a spare vdev then ensure that
++	 * we're booting off the active spare.
++	 */
++	if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
++	    !bvd->vdev_isspare) {
++		cmn_err(CE_NOTE, "The boot device is currently spared. Please "
++		    "try booting from '%s'",
++		    bvd->vdev_parent->
++		    vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path);
++		error = EINVAL;
++		goto out;
++	}
++
++	error = 0;
++	spa_history_log_version(spa, LOG_POOL_IMPORT);
++out:
++	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++	vdev_free(rvd);
++	spa_config_exit(spa, SCL_ALL, FTAG);
++	mutex_exit(&spa_namespace_lock);
++
++	nvlist_free(config);
++	return (error);
++}
++
++#endif
++
++/*
++ * Import a non-root pool into the system.
++ */
++int
++spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
++{
++	spa_t *spa;
++	char *altroot = NULL;
++	spa_load_state_t state = SPA_LOAD_IMPORT;
++	zpool_rewind_policy_t policy;
++	uint64_t mode = spa_mode_global;
++	uint64_t readonly = B_FALSE;
++	int error;
++	nvlist_t *nvroot;
++	nvlist_t **spares, **l2cache;
++	uint_t nspares, nl2cache;
++
++	/*
++	 * If a pool with this name exists, return failure.
++	 */
++	mutex_enter(&spa_namespace_lock);
++	if (spa_lookup(pool) != NULL) {
++		mutex_exit(&spa_namespace_lock);
++		return (EEXIST);
++	}
++
++	/*
++	 * Create and initialize the spa structure.
++	 */
++	(void) nvlist_lookup_string(props,
++	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
++	(void) nvlist_lookup_uint64(props,
++	    zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
++	if (readonly)
++		mode = FREAD;
++	spa = spa_add(pool, config, altroot);
++	spa->spa_import_flags = flags;
++
++	/*
++	 * Verbatim import - Take a pool and insert it into the namespace
++	 * as if it had been loaded at boot.
++	 */
++	if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) {
++		if (props != NULL)
++			spa_configfile_set(spa, props, B_FALSE);
++
++		spa_config_sync(spa, B_FALSE, B_TRUE);
++
++		mutex_exit(&spa_namespace_lock);
++		spa_history_log_version(spa, LOG_POOL_IMPORT);
++
++		return (0);
++	}
++
++	spa_activate(spa, mode);
++
++	/*
++	 * Don't start async tasks until we know everything is healthy.
++	 */
++	spa_async_suspend(spa);
++
++	zpool_get_rewind_policy(config, &policy);
++	if (policy.zrp_request & ZPOOL_DO_REWIND)
++		state = SPA_LOAD_RECOVER;
++
++	/*
++	 * Pass off the heavy lifting to spa_load().  Pass TRUE for mosconfig
++	 * because the user-supplied config is actually the one to trust when
++	 * doing an import.
++	 */
++	if (state != SPA_LOAD_RECOVER)
++		spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
++
++	error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg,
++	    policy.zrp_request);
++
++	/*
++	 * Propagate anything learned while loading the pool and pass it
++	 * back to caller (i.e. rewind info, missing devices, etc).
++	 */
++	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
++	    spa->spa_load_info) == 0);
++
++	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++	/*
++	 * Toss any existing sparelist, as it doesn't have any validity
++	 * anymore, and conflicts with spa_has_spare().
++	 */
++	if (spa->spa_spares.sav_config) {
++		nvlist_free(spa->spa_spares.sav_config);
++		spa->spa_spares.sav_config = NULL;
++		spa_load_spares(spa);
++	}
++	if (spa->spa_l2cache.sav_config) {
++		nvlist_free(spa->spa_l2cache.sav_config);
++		spa->spa_l2cache.sav_config = NULL;
++		spa_load_l2cache(spa);
++	}
++
++	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
++	    &nvroot) == 0);
++	if (error == 0)
++		error = spa_validate_aux(spa, nvroot, -1ULL,
++		    VDEV_ALLOC_SPARE);
++	if (error == 0)
++		error = spa_validate_aux(spa, nvroot, -1ULL,
++		    VDEV_ALLOC_L2CACHE);
++	spa_config_exit(spa, SCL_ALL, FTAG);
++
++	if (props != NULL)
++		spa_configfile_set(spa, props, B_FALSE);
++
++	if (error != 0 || (props && spa_writeable(spa) &&
++	    (error = spa_prop_set(spa, props)))) {
++		spa_unload(spa);
++		spa_deactivate(spa);
++		spa_remove(spa);
++		mutex_exit(&spa_namespace_lock);
++		return (error);
++	}
++
++	spa_async_resume(spa);
++
++	/*
++	 * Override any spares and level 2 cache devices as specified by
++	 * the user, as these may have correct device names/devids, etc.
++	 */
++	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
++	    &spares, &nspares) == 0) {
++		if (spa->spa_spares.sav_config)
++			VERIFY(nvlist_remove(spa->spa_spares.sav_config,
++			    ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
++		else
++			VERIFY(nvlist_alloc(&spa->spa_spares.sav_config,
++			    NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
++		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
++		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++		spa_load_spares(spa);
++		spa_config_exit(spa, SCL_ALL, FTAG);
++		spa->spa_spares.sav_sync = B_TRUE;
++	}
++	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
++	    &l2cache, &nl2cache) == 0) {
++		if (spa->spa_l2cache.sav_config)
++			VERIFY(nvlist_remove(spa->spa_l2cache.sav_config,
++			    ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0);
++		else
++			VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
++			    NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
++		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
++		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++		spa_load_l2cache(spa);
++		spa_config_exit(spa, SCL_ALL, FTAG);
++		spa->spa_l2cache.sav_sync = B_TRUE;
++	}
++
++	/*
++	 * Check for any removed devices.
++	 */
++	if (spa->spa_autoreplace) {
++		spa_aux_check_removed(&spa->spa_spares);
++		spa_aux_check_removed(&spa->spa_l2cache);
++	}
++
++	if (spa_writeable(spa)) {
++		/*
++		 * Update the config cache to include the newly-imported pool.
++		 */
++		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
++	}
++
++	/*
++	 * It's possible that the pool was expanded while it was exported.
++	 * We kick off an async task to handle this for us.
++	 */
++	spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
++
++	mutex_exit(&spa_namespace_lock);
++	spa_history_log_version(spa, LOG_POOL_IMPORT);
++
++	return (0);
++}
++
++nvlist_t *
++spa_tryimport(nvlist_t *tryconfig)
++{
++	nvlist_t *config = NULL;
++	char *poolname;
++	spa_t *spa;
++	uint64_t state;
++	int error;
++
++	if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname))
++		return (NULL);
++
++	if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state))
++		return (NULL);
++
++	/*
++	 * Create and initialize the spa structure.
++	 */
++	mutex_enter(&spa_namespace_lock);
++	spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL);
++	spa_activate(spa, FREAD);
++
++	/*
++	 * Pass off the heavy lifting to spa_load().
++	 * Pass TRUE for mosconfig because the user-supplied config
++	 * is actually the one to trust when doing an import.
++	 */
++	error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING, B_TRUE);
++
++	/*
++	 * If 'tryconfig' was at least parsable, return the current config.
++	 */
++	if (spa->spa_root_vdev != NULL) {
++		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
++		VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
++		    poolname) == 0);
++		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
++		    state) == 0);
++		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
++		    spa->spa_uberblock.ub_timestamp) == 0);
++
++		/*
++		 * If the bootfs property exists on this pool then we
++		 * copy it out so that external consumers can tell which
++		 * pools are bootable.
++		 */
++		if ((!error || error == EEXIST) && spa->spa_bootfs) {
++			char *tmpname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE);
++
++			/*
++			 * We have to play games with the name since the
++			 * pool was opened as TRYIMPORT_NAME.
++			 */
++			if (dsl_dsobj_to_dsname(spa_name(spa),
++			    spa->spa_bootfs, tmpname) == 0) {
++				char *cp;
++				char *dsname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE);
++
++				cp = strchr(tmpname, '/');
++				if (cp == NULL) {
++					(void) strlcpy(dsname, tmpname,
++					    MAXPATHLEN);
++				} else {
++					(void) snprintf(dsname, MAXPATHLEN,
++					    "%s/%s", poolname, ++cp);
++				}
++				VERIFY(nvlist_add_string(config,
++				    ZPOOL_CONFIG_BOOTFS, dsname) == 0);
++				kmem_free(dsname, MAXPATHLEN);
++			}
++			kmem_free(tmpname, MAXPATHLEN);
++		}
++
++		/*
++		 * Add the list of hot spares and level 2 cache devices.
++		 */
++		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
++		spa_add_spares(spa, config);
++		spa_add_l2cache(spa, config);
++		spa_config_exit(spa, SCL_CONFIG, FTAG);
++	}
++
++	spa_unload(spa);
++	spa_deactivate(spa);
++	spa_remove(spa);
++	mutex_exit(&spa_namespace_lock);
++
++	return (config);
++}
++
++/*
++ * Pool export/destroy
++ *
++ * The act of destroying or exporting a pool is very simple.  We make sure there
++ * is no more pending I/O and any references to the pool are gone.  Then, we
++ * update the pool state and sync all the labels to disk, removing the
++ * configuration from the cache afterwards. If the 'hardforce' flag is set, then
++ * we don't sync the labels or remove the configuration cache.
++ */
++static int
++spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
++    boolean_t force, boolean_t hardforce)
++{
++	spa_t *spa;
++
++	if (oldconfig)
++		*oldconfig = NULL;
++
++	if (!(spa_mode_global & FWRITE))
++		return (EROFS);
++
++	mutex_enter(&spa_namespace_lock);
++	if ((spa = spa_lookup(pool)) == NULL) {
++		mutex_exit(&spa_namespace_lock);
++		return (ENOENT);
++	}
++
++	/*
++	 * Put a hold on the pool, drop the namespace lock, stop async tasks,
++	 * reacquire the namespace lock, and see if we can export.
++	 */
++	spa_open_ref(spa, FTAG);
++	mutex_exit(&spa_namespace_lock);
++	spa_async_suspend(spa);
++	mutex_enter(&spa_namespace_lock);
++	spa_close(spa, FTAG);
++
++	/*
++	 * The pool will be in core if it's openable,
++	 * in which case we can modify its state.
++	 */
++	if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) {
++		/*
++		 * Objsets may be open only because they're dirty, so we
++		 * have to force it to sync before checking spa_refcnt.
++		 */
++		txg_wait_synced(spa->spa_dsl_pool, 0);
++
++		/*
++		 * A pool cannot be exported or destroyed if there are active
++		 * references.  If we are resetting a pool, allow references by
++		 * fault injection handlers.
++		 */
++		if (!spa_refcount_zero(spa) ||
++		    (spa->spa_inject_ref != 0 &&
++		    new_state != POOL_STATE_UNINITIALIZED)) {
++			spa_async_resume(spa);
++			mutex_exit(&spa_namespace_lock);
++			return (EBUSY);
++		}
++
++		/*
++		 * A pool cannot be exported if it has an active shared spare.
++		 * This is to prevent other pools stealing the active spare
++		 * from an exported pool. At user's own will, such pool can
++		 * be forcedly exported.
++		 */
++		if (!force && new_state == POOL_STATE_EXPORTED &&
++		    spa_has_active_shared_spare(spa)) {
++			spa_async_resume(spa);
++			mutex_exit(&spa_namespace_lock);
++			return (EXDEV);
++		}
++
++		/*
++		 * We want this to be reflected on every label,
++		 * so mark them all dirty.  spa_unload() will do the
++		 * final sync that pushes these changes out.
++		 */
++		if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) {
++			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++			spa->spa_state = new_state;
++			spa->spa_final_txg = spa_last_synced_txg(spa) +
++			    TXG_DEFER_SIZE + 1;
++			vdev_config_dirty(spa->spa_root_vdev);
++			spa_config_exit(spa, SCL_ALL, FTAG);
++		}
++	}
++
++	spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_DESTROY);
++
++	if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
++		spa_unload(spa);
++		spa_deactivate(spa);
++	}
++
++	if (oldconfig && spa->spa_config)
++		VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0);
++
++	if (new_state != POOL_STATE_UNINITIALIZED) {
++		if (!hardforce)
++			spa_config_sync(spa, B_TRUE, B_TRUE);
++		spa_remove(spa);
++	}
++	mutex_exit(&spa_namespace_lock);
++
++	return (0);
++}
++
++/*
++ * Destroy a storage pool.
++ */
++int
++spa_destroy(char *pool)
++{
++	return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL,
++	    B_FALSE, B_FALSE));
++}
++
++/*
++ * Export a storage pool.
++ */
++int
++spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
++    boolean_t hardforce)
++{
++	return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig,
++	    force, hardforce));
++}
++
++/*
++ * Similar to spa_export(), this unloads the spa_t without actually removing it
++ * from the namespace in any way.
++ */
++int
++spa_reset(char *pool)
++{
++	return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL,
++	    B_FALSE, B_FALSE));
++}
++
++/*
++ * ==========================================================================
++ * Device manipulation
++ * ==========================================================================
++ */
++
++/*
++ * Add a device to a storage pool.
++ */
++int
++spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
++{
++	uint64_t txg, id;
++	int error;
++	vdev_t *rvd = spa->spa_root_vdev;
++	vdev_t *vd, *tvd;
++	nvlist_t **spares, **l2cache;
++	uint_t nspares, nl2cache;
++	int c;
++
++	ASSERT(spa_writeable(spa));
++
++	txg = spa_vdev_enter(spa);
++
++	if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0,
++	    VDEV_ALLOC_ADD)) != 0)
++		return (spa_vdev_exit(spa, NULL, txg, error));
++
++	spa->spa_pending_vdev = vd;	/* spa_vdev_exit() will clear this */
++
++	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
++	    &nspares) != 0)
++		nspares = 0;
++
++	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache,
++	    &nl2cache) != 0)
++		nl2cache = 0;
++
++	if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0)
++		return (spa_vdev_exit(spa, vd, txg, EINVAL));
++
++	if (vd->vdev_children != 0 &&
++	    (error = vdev_create(vd, txg, B_FALSE)) != 0)
++		return (spa_vdev_exit(spa, vd, txg, error));
++
++	/*
++	 * We must validate the spares and l2cache devices after checking the
++	 * children.  Otherwise, vdev_inuse() will blindly overwrite the spare.
++	 */
++	if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0)
++		return (spa_vdev_exit(spa, vd, txg, error));
++
++	/*
++	 * Transfer each new top-level vdev from vd to rvd.
++	 */
++	for (c = 0; c < vd->vdev_children; c++) {
++
++		/*
++		 * Set the vdev id to the first hole, if one exists.
++		 */
++		for (id = 0; id < rvd->vdev_children; id++) {
++			if (rvd->vdev_child[id]->vdev_ishole) {
++				vdev_free(rvd->vdev_child[id]);
++				break;
++			}
++		}
++		tvd = vd->vdev_child[c];
++		vdev_remove_child(vd, tvd);
++		tvd->vdev_id = id;
++		vdev_add_child(rvd, tvd);
++		vdev_config_dirty(tvd);
++	}
++
++	if (nspares != 0) {
++		spa_set_aux_vdevs(&spa->spa_spares, spares, nspares,
++		    ZPOOL_CONFIG_SPARES);
++		spa_load_spares(spa);
++		spa->spa_spares.sav_sync = B_TRUE;
++	}
++
++	if (nl2cache != 0) {
++		spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache,
++		    ZPOOL_CONFIG_L2CACHE);
++		spa_load_l2cache(spa);
++		spa->spa_l2cache.sav_sync = B_TRUE;
++	}
++
++	/*
++	 * We have to be careful when adding new vdevs to an existing pool.
++	 * If other threads start allocating from these vdevs before we
++	 * sync the config cache, and we lose power, then upon reboot we may
++	 * fail to open the pool because there are DVAs that the config cache
++	 * can't translate.  Therefore, we first add the vdevs without
++	 * initializing metaslabs; sync the config cache (via spa_vdev_exit());
++	 * and then let spa_config_update() initialize the new metaslabs.
++	 *
++	 * spa_load() checks for added-but-not-initialized vdevs, so that
++	 * if we lose power at any point in this sequence, the remaining
++	 * steps will be completed the next time we load the pool.
++	 */
++	(void) spa_vdev_exit(spa, vd, txg, 0);
++
++	mutex_enter(&spa_namespace_lock);
++	spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
++	mutex_exit(&spa_namespace_lock);
++
++	return (0);
++}
++
++/*
++ * Attach a device to a mirror.  The arguments are the path to any device
++ * in the mirror, and the nvroot for the new device.  If the path specifies
++ * a device that is not mirrored, we automatically insert the mirror vdev.
++ *
++ * If 'replacing' is specified, the new device is intended to replace the
++ * existing device; in this case the two devices are made into their own
++ * mirror using the 'replacing' vdev, which is functionally identical to
++ * the mirror vdev (it actually reuses all the same ops) but has a few
++ * extra rules: you can't attach to it after it's been created, and upon
++ * completion of resilvering, the first disk (the one being replaced)
++ * is automatically detached.
++ */
++int
++spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
++{
++	uint64_t txg, dtl_max_txg;
++	ASSERTV(vdev_t *rvd = spa->spa_root_vdev;)
++	vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
++	vdev_ops_t *pvops;
++	char *oldvdpath, *newvdpath;
++	int newvd_isspare;
++	int error;
++
++	ASSERT(spa_writeable(spa));
++
++	txg = spa_vdev_enter(spa);
++
++	oldvd = spa_lookup_by_guid(spa, guid, B_FALSE);
++
++	if (oldvd == NULL)
++		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
++
++	if (!oldvd->vdev_ops->vdev_op_leaf)
++		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
++
++	pvd = oldvd->vdev_parent;
++
++	if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
++	    VDEV_ALLOC_ATTACH)) != 0)
++		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
++
++	if (newrootvd->vdev_children != 1)
++		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
++
++	newvd = newrootvd->vdev_child[0];
++
++	if (!newvd->vdev_ops->vdev_op_leaf)
++		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
++
++	if ((error = vdev_create(newrootvd, txg, replacing)) != 0)
++		return (spa_vdev_exit(spa, newrootvd, txg, error));
++
++	/*
++	 * Spares can't replace logs
++	 */
++	if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
++		return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
++
++	if (!replacing) {
++		/*
++		 * For attach, the only allowable parent is a mirror or the root
++		 * vdev.
++		 */
++		if (pvd->vdev_ops != &vdev_mirror_ops &&
++		    pvd->vdev_ops != &vdev_root_ops)
++			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
++
++		pvops = &vdev_mirror_ops;
++	} else {
++		/*
++		 * Active hot spares can only be replaced by inactive hot
++		 * spares.
++		 */
++		if (pvd->vdev_ops == &vdev_spare_ops &&
++		    oldvd->vdev_isspare &&
++		    !spa_has_spare(spa, newvd->vdev_guid))
++			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
++
++		/*
++		 * If the source is a hot spare, and the parent isn't already a
++		 * spare, then we want to create a new hot spare.  Otherwise, we
++		 * want to create a replacing vdev.  The user is not allowed to
++		 * attach to a spared vdev child unless the 'isspare' state is
++		 * the same (spare replaces spare, non-spare replaces
++		 * non-spare).
++		 */
++		if (pvd->vdev_ops == &vdev_replacing_ops &&
++		    spa_version(spa) < SPA_VERSION_MULTI_REPLACE) {
++			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
++		} else if (pvd->vdev_ops == &vdev_spare_ops &&
++		    newvd->vdev_isspare != oldvd->vdev_isspare) {
++			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
++		}
++
++		if (newvd->vdev_isspare)
++			pvops = &vdev_spare_ops;
++		else
++			pvops = &vdev_replacing_ops;
++	}
++
++	/*
++	 * Make sure the new device is big enough.
++	 */
++	if (newvd->vdev_asize < vdev_get_min_asize(oldvd))
++		return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW));
++
++	/*
++	 * The new device cannot have a higher alignment requirement
++	 * than the top-level vdev.
++	 */
++	if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift)
++		return (spa_vdev_exit(spa, newrootvd, txg, EDOM));
++
++	/*
++	 * If this is an in-place replacement, update oldvd's path and devid
++	 * to make it distinguishable from newvd, and unopenable from now on.
++	 */
++	if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) {
++		spa_strfree(oldvd->vdev_path);
++		oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5,
++		    KM_PUSHPAGE);
++		(void) sprintf(oldvd->vdev_path, "%s/%s",
++		    newvd->vdev_path, "old");
++		if (oldvd->vdev_devid != NULL) {
++			spa_strfree(oldvd->vdev_devid);
++			oldvd->vdev_devid = NULL;
++		}
++	}
++
++	/* mark the device being resilvered */
++	newvd->vdev_resilvering = B_TRUE;
++
++	/*
++	 * If the parent is not a mirror, or if we're replacing, insert the new
++	 * mirror/replacing/spare vdev above oldvd.
++	 */
++	if (pvd->vdev_ops != pvops)
++		pvd = vdev_add_parent(oldvd, pvops);
++
++	ASSERT(pvd->vdev_top->vdev_parent == rvd);
++	ASSERT(pvd->vdev_ops == pvops);
++	ASSERT(oldvd->vdev_parent == pvd);
++
++	/*
++	 * Extract the new device from its root and add it to pvd.
++	 */
++	vdev_remove_child(newrootvd, newvd);
++	newvd->vdev_id = pvd->vdev_children;
++	newvd->vdev_crtxg = oldvd->vdev_crtxg;
++	vdev_add_child(pvd, newvd);
++
++	tvd = newvd->vdev_top;
++	ASSERT(pvd->vdev_top == tvd);
++	ASSERT(tvd->vdev_parent == rvd);
++
++	vdev_config_dirty(tvd);
++
++	/*
++	 * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account
++	 * for any dmu_sync-ed blocks.  It will propagate upward when
++	 * spa_vdev_exit() calls vdev_dtl_reassess().
++	 */
++	dtl_max_txg = txg + TXG_CONCURRENT_STATES;
++
++	vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL,
++	    dtl_max_txg - TXG_INITIAL);
++
++	if (newvd->vdev_isspare) {
++		spa_spare_activate(newvd);
++		spa_event_notify(spa, newvd, FM_EREPORT_ZFS_DEVICE_SPARE);
++	}
++
++	oldvdpath = spa_strdup(oldvd->vdev_path);
++	newvdpath = spa_strdup(newvd->vdev_path);
++	newvd_isspare = newvd->vdev_isspare;
++
++	/*
++	 * Mark newvd's DTL dirty in this txg.
++	 */
++	vdev_dirty(tvd, VDD_DTL, newvd, txg);
++
++	/*
++	 * Restart the resilver
++	 */
++	dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
++
++	/*
++	 * Commit the config
++	 */
++	(void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0);
++
++	spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL,
++	    "%s vdev=%s %s vdev=%s",
++	    replacing && newvd_isspare ? "spare in" :
++	    replacing ? "replace" : "attach", newvdpath,
++	    replacing ? "for" : "to", oldvdpath);
++
++	spa_strfree(oldvdpath);
++	spa_strfree(newvdpath);
++
++	if (spa->spa_bootfs)
++		spa_event_notify(spa, newvd, FM_EREPORT_ZFS_BOOTFS_VDEV_ATTACH);
++
++	return (0);
++}
++
++/*
++ * Detach a device from a mirror or replacing vdev.
++ * If 'replace_done' is specified, only detach if the parent
++ * is a replacing vdev.
++ */
++int
++spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
++{
++	uint64_t txg;
++	int error;
++	ASSERTV(vdev_t *rvd = spa->spa_root_vdev;)
++	vdev_t *vd, *pvd, *cvd, *tvd;
++	boolean_t unspare = B_FALSE;
++	uint64_t unspare_guid = 0;
++	char *vdpath;
++	int c, t;
++
++	ASSERT(spa_writeable(spa));
++
++	txg = spa_vdev_enter(spa);
++
++	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
++
++	if (vd == NULL)
++		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
++
++	if (!vd->vdev_ops->vdev_op_leaf)
++		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
++
++	pvd = vd->vdev_parent;
++
++	/*
++	 * If the parent/child relationship is not as expected, don't do it.
++	 * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing
++	 * vdev that's replacing B with C.  The user's intent in replacing
++	 * is to go from M(A,B) to M(A,C).  If the user decides to cancel
++	 * the replace by detaching C, the expected behavior is to end up
++	 * M(A,B).  But suppose that right after deciding to detach C,
++	 * the replacement of B completes.  We would have M(A,C), and then
++	 * ask to detach C, which would leave us with just A -- not what
++	 * the user wanted.  To prevent this, we make sure that the
++	 * parent/child relationship hasn't changed -- in this example,
++	 * that C's parent is still the replacing vdev R.
++	 */
++	if (pvd->vdev_guid != pguid && pguid != 0)
++		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
++
++	/*
++	 * Only 'replacing' or 'spare' vdevs can be replaced.
++	 */
++	if (replace_done && pvd->vdev_ops != &vdev_replacing_ops &&
++	    pvd->vdev_ops != &vdev_spare_ops)
++		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
++
++	ASSERT(pvd->vdev_ops != &vdev_spare_ops ||
++	    spa_version(spa) >= SPA_VERSION_SPARES);
++
++	/*
++	 * Only mirror, replacing, and spare vdevs support detach.
++	 */
++	if (pvd->vdev_ops != &vdev_replacing_ops &&
++	    pvd->vdev_ops != &vdev_mirror_ops &&
++	    pvd->vdev_ops != &vdev_spare_ops)
++		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
++
++	/*
++	 * If this device has the only valid copy of some data,
++	 * we cannot safely detach it.
++	 */
++	if (vdev_dtl_required(vd))
++		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
++
++	ASSERT(pvd->vdev_children >= 2);
++
++	/*
++	 * If we are detaching the second disk from a replacing vdev, then
++	 * check to see if we changed the original vdev's path to have "/old"
++	 * at the end in spa_vdev_attach().  If so, undo that change now.
++	 */
++	if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id > 0 &&
++	    vd->vdev_path != NULL) {
++		size_t len = strlen(vd->vdev_path);
++
++		for (c = 0; c < pvd->vdev_children; c++) {
++			cvd = pvd->vdev_child[c];
++
++			if (cvd == vd || cvd->vdev_path == NULL)
++				continue;
++
++			if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 &&
++			    strcmp(cvd->vdev_path + len, "/old") == 0) {
++				spa_strfree(cvd->vdev_path);
++				cvd->vdev_path = spa_strdup(vd->vdev_path);
++				break;
++			}
++		}
++	}
++
++	/*
++	 * If we are detaching the original disk from a spare, then it implies
++	 * that the spare should become a real disk, and be removed from the
++	 * active spare list for the pool.
++	 */
++	if (pvd->vdev_ops == &vdev_spare_ops &&
++	    vd->vdev_id == 0 &&
++	    pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare)
++		unspare = B_TRUE;
++
++	/*
++	 * Erase the disk labels so the disk can be used for other things.
++	 * This must be done after all other error cases are handled,
++	 * but before we disembowel vd (so we can still do I/O to it).
++	 * But if we can't do it, don't treat the error as fatal --
++	 * it may be that the unwritability of the disk is the reason
++	 * it's being detached!
++	 */
++	error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
++
++	/*
++	 * Remove vd from its parent and compact the parent's children.
++	 */
++	vdev_remove_child(pvd, vd);
++	vdev_compact_children(pvd);
++
++	/*
++	 * Remember one of the remaining children so we can get tvd below.
++	 */
++	cvd = pvd->vdev_child[pvd->vdev_children - 1];
++
++	/*
++	 * If we need to remove the remaining child from the list of hot spares,
++	 * do it now, marking the vdev as no longer a spare in the process.
++	 * We must do this before vdev_remove_parent(), because that can
++	 * change the GUID if it creates a new toplevel GUID.  For a similar
++	 * reason, we must remove the spare now, in the same txg as the detach;
++	 * otherwise someone could attach a new sibling, change the GUID, and
++	 * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail.
++	 */
++	if (unspare) {
++		ASSERT(cvd->vdev_isspare);
++		spa_spare_remove(cvd);
++		unspare_guid = cvd->vdev_guid;
++		(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
++		cvd->vdev_unspare = B_TRUE;
++	}
++
++	/*
++	 * If the parent mirror/replacing vdev only has one child,
++	 * the parent is no longer needed.  Remove it from the tree.
++	 */
++	if (pvd->vdev_children == 1) {
++		if (pvd->vdev_ops == &vdev_spare_ops)
++			cvd->vdev_unspare = B_FALSE;
++		vdev_remove_parent(cvd);
++		cvd->vdev_resilvering = B_FALSE;
++	}
++
++
++	/*
++	 * We don't set tvd until now because the parent we just removed
++	 * may have been the previous top-level vdev.
++	 */
++	tvd = cvd->vdev_top;
++	ASSERT(tvd->vdev_parent == rvd);
++
++	/*
++	 * Reevaluate the parent vdev state.
++	 */
++	vdev_propagate_state(cvd);
++
++	/*
++	 * If the 'autoexpand' property is set on the pool then automatically
++	 * try to expand the size of the pool. For example if the device we
++	 * just detached was smaller than the others, it may be possible to
++	 * add metaslabs (i.e. grow the pool). We need to reopen the vdev
++	 * first so that we can obtain the updated sizes of the leaf vdevs.
++	 */
++	if (spa->spa_autoexpand) {
++		vdev_reopen(tvd);
++		vdev_expand(tvd, txg);
++	}
++
++	vdev_config_dirty(tvd);
++
++	/*
++	 * Mark vd's DTL as dirty in this txg.  vdev_dtl_sync() will see that
++	 * vd->vdev_detached is set and free vd's DTL object in syncing context.
++	 * But first make sure we're not on any *other* txg's DTL list, to
++	 * prevent vd from being accessed after it's freed.
++	 */
++	vdpath = spa_strdup(vd->vdev_path);
++	for (t = 0; t < TXG_SIZE; t++)
++		(void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
++	vd->vdev_detached = B_TRUE;
++	vdev_dirty(tvd, VDD_DTL, vd, txg);
++
++	spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_REMOVE);
++
++	/* hang on to the spa before we release the lock */
++	spa_open_ref(spa, FTAG);
++
++	error = spa_vdev_exit(spa, vd, txg, 0);
++
++	spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL,
++	    "vdev=%s", vdpath);
++	spa_strfree(vdpath);
++
++	/*
++	 * If this was the removal of the original device in a hot spare vdev,
++	 * then we want to go through and remove the device from the hot spare
++	 * list of every other pool.
++	 */
++	if (unspare) {
++		spa_t *altspa = NULL;
++
++		mutex_enter(&spa_namespace_lock);
++		while ((altspa = spa_next(altspa)) != NULL) {
++			if (altspa->spa_state != POOL_STATE_ACTIVE ||
++			    altspa == spa)
++				continue;
++
++			spa_open_ref(altspa, FTAG);
++			mutex_exit(&spa_namespace_lock);
++			(void) spa_vdev_remove(altspa, unspare_guid, B_TRUE);
++			mutex_enter(&spa_namespace_lock);
++			spa_close(altspa, FTAG);
++		}
++		mutex_exit(&spa_namespace_lock);
++
++		/* search the rest of the vdevs for spares to remove */
++		spa_vdev_resilver_done(spa);
++	}
++
++	/* all done with the spa; OK to release */
++	mutex_enter(&spa_namespace_lock);
++	spa_close(spa, FTAG);
++	mutex_exit(&spa_namespace_lock);
++
++	return (error);
++}
++
++/*
++ * Split a set of devices from their mirrors, and create a new pool from them.
++ */
++int
++spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
++    nvlist_t *props, boolean_t exp)
++{
++	int error = 0;
++	uint64_t txg, *glist;
++	spa_t *newspa;
++	uint_t c, children, lastlog;
++	nvlist_t **child, *nvl, *tmp;
++	dmu_tx_t *tx;
++	char *altroot = NULL;
++	vdev_t *rvd, **vml = NULL;			/* vdev modify list */
++	boolean_t activate_slog;
++
++	ASSERT(spa_writeable(spa));
++
++	txg = spa_vdev_enter(spa);
++
++	/* clear the log and flush everything up to now */
++	activate_slog = spa_passivate_log(spa);
++	(void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
++	error = spa_offline_log(spa);
++	txg = spa_vdev_config_enter(spa);
++
++	if (activate_slog)
++		spa_activate_log(spa);
++
++	if (error != 0)
++		return (spa_vdev_exit(spa, NULL, txg, error));
++
++	/* check new spa name before going any further */
++	if (spa_lookup(newname) != NULL)
++		return (spa_vdev_exit(spa, NULL, txg, EEXIST));
++
++	/*
++	 * scan through all the children to ensure they're all mirrors
++	 */
++	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) != 0 ||
++	    nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child,
++	    &children) != 0)
++		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
++
++	/* first, check to ensure we've got the right child count */
++	rvd = spa->spa_root_vdev;
++	lastlog = 0;
++	for (c = 0; c < rvd->vdev_children; c++) {
++		vdev_t *vd = rvd->vdev_child[c];
++
++		/* don't count the holes & logs as children */
++		if (vd->vdev_islog || vd->vdev_ishole) {
++			if (lastlog == 0)
++				lastlog = c;
++			continue;
++		}
++
++		lastlog = 0;
++	}
++	if (children != (lastlog != 0 ? lastlog : rvd->vdev_children))
++		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
++
++	/* next, ensure no spare or cache devices are part of the split */
++	if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_SPARES, &tmp) == 0 ||
++	    nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0)
++		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
++
++	vml = kmem_zalloc(children * sizeof (vdev_t *), KM_PUSHPAGE);
++	glist = kmem_zalloc(children * sizeof (uint64_t), KM_PUSHPAGE);
++
++	/* then, loop over each vdev and validate it */
++	for (c = 0; c < children; c++) {
++		uint64_t is_hole = 0;
++
++		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
++		    &is_hole);
++
++		if (is_hole != 0) {
++			if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole ||
++			    spa->spa_root_vdev->vdev_child[c]->vdev_islog) {
++				continue;
++			} else {
++				error = EINVAL;
++				break;
++			}
++		}
++
++		/* which disk is going to be split? */
++		if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID,
++		    &glist[c]) != 0) {
++			error = EINVAL;
++			break;
++		}
++
++		/* look it up in the spa */
++		vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE);
++		if (vml[c] == NULL) {
++			error = ENODEV;
++			break;
++		}
++
++		/* make sure there's nothing stopping the split */
++		if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops ||
++		    vml[c]->vdev_islog ||
++		    vml[c]->vdev_ishole ||
++		    vml[c]->vdev_isspare ||
++		    vml[c]->vdev_isl2cache ||
++		    !vdev_writeable(vml[c]) ||
++		    vml[c]->vdev_children != 0 ||
++		    vml[c]->vdev_state != VDEV_STATE_HEALTHY ||
++		    c != spa->spa_root_vdev->vdev_child[c]->vdev_id) {
++			error = EINVAL;
++			break;
++		}
++
++		if (vdev_dtl_required(vml[c])) {
++			error = EBUSY;
++			break;
++		}
++
++		/* we need certain info from the top level */
++		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY,
++		    vml[c]->vdev_top->vdev_ms_array) == 0);
++		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT,
++		    vml[c]->vdev_top->vdev_ms_shift) == 0);
++		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE,
++		    vml[c]->vdev_top->vdev_asize) == 0);
++		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT,
++		    vml[c]->vdev_top->vdev_ashift) == 0);
++	}
++
++	if (error != 0) {
++		kmem_free(vml, children * sizeof (vdev_t *));
++		kmem_free(glist, children * sizeof (uint64_t));
++		return (spa_vdev_exit(spa, NULL, txg, error));
++	}
++
++	/* stop writers from using the disks */
++	for (c = 0; c < children; c++) {
++		if (vml[c] != NULL)
++			vml[c]->vdev_offline = B_TRUE;
++	}
++	vdev_reopen(spa->spa_root_vdev);
++
++	/*
++	 * Temporarily record the splitting vdevs in the spa config.  This
++	 * will disappear once the config is regenerated.
++	 */
++	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++	VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
++	    glist, children) == 0);
++	kmem_free(glist, children * sizeof (uint64_t));
++
++	mutex_enter(&spa->spa_props_lock);
++	VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT,
++	    nvl) == 0);
++	mutex_exit(&spa->spa_props_lock);
++	spa->spa_config_splitting = nvl;
++	vdev_config_dirty(spa->spa_root_vdev);
++
++	/* configure and create the new pool */
++	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0);
++	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
++	    exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0);
++	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
++	    spa_version(spa)) == 0);
++	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
++	    spa->spa_config_txg) == 0);
++	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
++	    spa_generate_guid(NULL)) == 0);
++	(void) nvlist_lookup_string(props,
++	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
++
++	/* add the new pool to the namespace */
++	newspa = spa_add(newname, config, altroot);
++	newspa->spa_config_txg = spa->spa_config_txg;
++	spa_set_log_state(newspa, SPA_LOG_CLEAR);
++
++	/* release the spa config lock, retaining the namespace lock */
++	spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
++
++	if (zio_injection_enabled)
++		zio_handle_panic_injection(spa, FTAG, 1);
++
++	spa_activate(newspa, spa_mode_global);
++	spa_async_suspend(newspa);
++
++	/* create the new pool from the disks of the original pool */
++	error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE, B_TRUE);
++	if (error)
++		goto out;
++
++	/* if that worked, generate a real config for the new pool */
++	if (newspa->spa_root_vdev != NULL) {
++		VERIFY(nvlist_alloc(&newspa->spa_config_splitting,
++		    NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++		VERIFY(nvlist_add_uint64(newspa->spa_config_splitting,
++		    ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0);
++		spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL,
++		    B_TRUE));
++	}
++
++	/* set the props */
++	if (props != NULL) {
++		spa_configfile_set(newspa, props, B_FALSE);
++		error = spa_prop_set(newspa, props);
++		if (error)
++			goto out;
++	}
++
++	/* flush everything */
++	txg = spa_vdev_config_enter(newspa);
++	vdev_config_dirty(newspa->spa_root_vdev);
++	(void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG);
++
++	if (zio_injection_enabled)
++		zio_handle_panic_injection(spa, FTAG, 2);
++
++	spa_async_resume(newspa);
++
++	/* finally, update the original pool's config */
++	txg = spa_vdev_config_enter(spa);
++	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
++	error = dmu_tx_assign(tx, TXG_WAIT);
++	if (error != 0)
++		dmu_tx_abort(tx);
++	for (c = 0; c < children; c++) {
++		if (vml[c] != NULL) {
++			vdev_split(vml[c]);
++			if (error == 0)
++				spa_history_log_internal(LOG_POOL_VDEV_DETACH,
++				    spa, tx, "vdev=%s",
++				    vml[c]->vdev_path);
++			vdev_free(vml[c]);
++		}
++	}
++	vdev_config_dirty(spa->spa_root_vdev);
++	spa->spa_config_splitting = NULL;
++	nvlist_free(nvl);
++	if (error == 0)
++		dmu_tx_commit(tx);
++	(void) spa_vdev_exit(spa, NULL, txg, 0);
++
++	if (zio_injection_enabled)
++		zio_handle_panic_injection(spa, FTAG, 3);
++
++	/* split is complete; log a history record */
++	spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL,
++	    "split new pool %s from pool %s", newname, spa_name(spa));
++
++	kmem_free(vml, children * sizeof (vdev_t *));
++
++	/* if we're not going to mount the filesystems in userland, export */
++	if (exp)
++		error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL,
++		    B_FALSE, B_FALSE);
++
++	return (error);
++
++out:
++	spa_unload(newspa);
++	spa_deactivate(newspa);
++	spa_remove(newspa);
++
++	txg = spa_vdev_config_enter(spa);
++
++	/* re-online all offlined disks */
++	for (c = 0; c < children; c++) {
++		if (vml[c] != NULL)
++			vml[c]->vdev_offline = B_FALSE;
++	}
++	vdev_reopen(spa->spa_root_vdev);
++
++	nvlist_free(spa->spa_config_splitting);
++	spa->spa_config_splitting = NULL;
++	(void) spa_vdev_exit(spa, NULL, txg, error);
++
++	kmem_free(vml, children * sizeof (vdev_t *));
++	return (error);
++}
++
++static nvlist_t *
++spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid)
++{
++	int i;
++
++	for (i = 0; i < count; i++) {
++		uint64_t guid;
++
++		VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID,
++		    &guid) == 0);
++
++		if (guid == target_guid)
++			return (nvpp[i]);
++	}
++
++	return (NULL);
++}
++
++static void
++spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count,
++	nvlist_t *dev_to_remove)
++{
++	nvlist_t **newdev = NULL;
++	int i, j;
++
++	if (count > 1)
++		newdev = kmem_alloc((count - 1) * sizeof (void *), KM_PUSHPAGE);
++
++	for (i = 0, j = 0; i < count; i++) {
++		if (dev[i] == dev_to_remove)
++			continue;
++		VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_PUSHPAGE) == 0);
++	}
++
++	VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0);
++	VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0);
++
++	for (i = 0; i < count - 1; i++)
++		nvlist_free(newdev[i]);
++
++	if (count > 1)
++		kmem_free(newdev, (count - 1) * sizeof (void *));
++}
++
++/*
++ * Evacuate the device.
++ */
++static int
++spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd)
++{
++	uint64_t txg;
++	int error = 0;
++
++	ASSERT(MUTEX_HELD(&spa_namespace_lock));
++	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
++	ASSERT(vd == vd->vdev_top);
++
++	/*
++	 * Evacuate the device.  We don't hold the config lock as writer
++	 * since we need to do I/O but we do keep the
++	 * spa_namespace_lock held.  Once this completes the device
++	 * should no longer have any blocks allocated on it.
++	 */
++	if (vd->vdev_islog) {
++		if (vd->vdev_stat.vs_alloc != 0)
++			error = spa_offline_log(spa);
++	} else {
++		error = ENOTSUP;
++	}
++
++	if (error)
++		return (error);
++
++	/*
++	 * The evacuation succeeded.  Remove any remaining MOS metadata
++	 * associated with this vdev, and wait for these changes to sync.
++	 */
++	ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0);
++	txg = spa_vdev_config_enter(spa);
++	vd->vdev_removing = B_TRUE;
++	vdev_dirty(vd, 0, NULL, txg);
++	vdev_config_dirty(vd);
++	spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
++
++	return (0);
++}
++
++/*
++ * Complete the removal by cleaning up the namespace.
++ */
++static void
++spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd)
++{
++	vdev_t *rvd = spa->spa_root_vdev;
++	uint64_t id = vd->vdev_id;
++	boolean_t last_vdev = (id == (rvd->vdev_children - 1));
++
++	ASSERT(MUTEX_HELD(&spa_namespace_lock));
++	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
++	ASSERT(vd == vd->vdev_top);
++
++	/*
++	 * Only remove any devices which are empty.
++	 */
++	if (vd->vdev_stat.vs_alloc != 0)
++		return;
++
++	(void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
++
++	if (list_link_active(&vd->vdev_state_dirty_node))
++		vdev_state_clean(vd);
++	if (list_link_active(&vd->vdev_config_dirty_node))
++		vdev_config_clean(vd);
++
++	vdev_free(vd);
++
++	if (last_vdev) {
++		vdev_compact_children(rvd);
++	} else {
++		vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops);
++		vdev_add_child(rvd, vd);
++	}
++	vdev_config_dirty(rvd);
++
++	/*
++	 * Reassess the health of our root vdev.
++	 */
++	vdev_reopen(rvd);
++}
++
++/*
++ * Remove a device from the pool -
++ *
++ * Removing a device from the vdev namespace requires several steps
++ * and can take a significant amount of time.  As a result we use
++ * the spa_vdev_config_[enter/exit] functions which allow us to
++ * grab and release the spa_config_lock while still holding the namespace
++ * lock.  During each step the configuration is synced out.
++ */
++
++/*
++ * Remove a device from the pool.  Currently, this supports removing only hot
++ * spares, slogs, and level 2 ARC devices.
++ */
++int
++spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
++{
++	vdev_t *vd;
++	metaslab_group_t *mg;
++	nvlist_t **spares, **l2cache, *nv;
++	uint64_t txg = 0;
++	uint_t nspares, nl2cache;
++	int error = 0;
++	boolean_t locked = MUTEX_HELD(&spa_namespace_lock);
++
++	ASSERT(spa_writeable(spa));
++
++	if (!locked)
++		txg = spa_vdev_enter(spa);
++
++	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
++
++	if (spa->spa_spares.sav_vdevs != NULL &&
++	    nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
++	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 &&
++	    (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) {
++		/*
++		 * Only remove the hot spare if it's not currently in use
++		 * in this pool.
++		 */
++		if (vd == NULL || unspare) {
++			spa_vdev_remove_aux(spa->spa_spares.sav_config,
++			    ZPOOL_CONFIG_SPARES, spares, nspares, nv);
++			spa_load_spares(spa);
++			spa->spa_spares.sav_sync = B_TRUE;
++		} else {
++			error = EBUSY;
++		}
++	} else if (spa->spa_l2cache.sav_vdevs != NULL &&
++	    nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
++	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 &&
++	    (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) {
++		/*
++		 * Cache devices can always be removed.
++		 */
++		spa_vdev_remove_aux(spa->spa_l2cache.sav_config,
++		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv);
++		spa_load_l2cache(spa);
++		spa->spa_l2cache.sav_sync = B_TRUE;
++	} else if (vd != NULL && vd->vdev_islog) {
++		ASSERT(!locked);
++		ASSERT(vd == vd->vdev_top);
++
++		/*
++		 * XXX - Once we have bp-rewrite this should
++		 * become the common case.
++		 */
++
++		mg = vd->vdev_mg;
++
++		/*
++		 * Stop allocating from this vdev.
++		 */
++		metaslab_group_passivate(mg);
++
++		/*
++		 * Wait for the youngest allocations and frees to sync,
++		 * and then wait for the deferral of those frees to finish.
++		 */
++		spa_vdev_config_exit(spa, NULL,
++		    txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG);
++
++		/*
++		 * Attempt to evacuate the vdev.
++		 */
++		error = spa_vdev_remove_evacuate(spa, vd);
++
++		txg = spa_vdev_config_enter(spa);
++
++		/*
++		 * If we couldn't evacuate the vdev, unwind.
++		 */
++		if (error) {
++			metaslab_group_activate(mg);
++			return (spa_vdev_exit(spa, NULL, txg, error));
++		}
++
++		/*
++		 * Clean up the vdev namespace.
++		 */
++		spa_vdev_remove_from_namespace(spa, vd);
++
++	} else if (vd != NULL) {
++		/*
++		 * Normal vdevs cannot be removed (yet).
++		 */
++		error = ENOTSUP;
++	} else {
++		/*
++		 * There is no vdev of any kind with the specified guid.
++		 */
++		error = ENOENT;
++	}
++
++	if (!locked)
++		return (spa_vdev_exit(spa, NULL, txg, error));
++
++	return (error);
++}
++
++/*
++ * Find any device that's done replacing, or a vdev marked 'unspare' that's
++ * current spared, so we can detach it.
++ */
++static vdev_t *
++spa_vdev_resilver_done_hunt(vdev_t *vd)
++{
++	vdev_t *newvd, *oldvd;
++	int c;
++
++	for (c = 0; c < vd->vdev_children; c++) {
++		oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]);
++		if (oldvd != NULL)
++			return (oldvd);
++	}
++
++	/*
++	 * Check for a completed replacement.  We always consider the first
++	 * vdev in the list to be the oldest vdev, and the last one to be
++	 * the newest (see spa_vdev_attach() for how that works).  In
++	 * the case where the newest vdev is faulted, we will not automatically
++	 * remove it after a resilver completes.  This is OK as it will require
++	 * user intervention to determine which disk the admin wishes to keep.
++	 */
++	if (vd->vdev_ops == &vdev_replacing_ops) {
++		ASSERT(vd->vdev_children > 1);
++
++		newvd = vd->vdev_child[vd->vdev_children - 1];
++		oldvd = vd->vdev_child[0];
++
++		if (vdev_dtl_empty(newvd, DTL_MISSING) &&
++		    vdev_dtl_empty(newvd, DTL_OUTAGE) &&
++		    !vdev_dtl_required(oldvd))
++			return (oldvd);
++	}
++
++	/*
++	 * Check for a completed resilver with the 'unspare' flag set.
++	 */
++	if (vd->vdev_ops == &vdev_spare_ops) {
++		vdev_t *first = vd->vdev_child[0];
++		vdev_t *last = vd->vdev_child[vd->vdev_children - 1];
++
++		if (last->vdev_unspare) {
++			oldvd = first;
++			newvd = last;
++		} else if (first->vdev_unspare) {
++			oldvd = last;
++			newvd = first;
++		} else {
++			oldvd = NULL;
++		}
++
++		if (oldvd != NULL &&
++		    vdev_dtl_empty(newvd, DTL_MISSING) &&
++		    vdev_dtl_empty(newvd, DTL_OUTAGE) &&
++		    !vdev_dtl_required(oldvd))
++			return (oldvd);
++
++		/*
++		 * If there are more than two spares attached to a disk,
++		 * and those spares are not required, then we want to
++		 * attempt to free them up now so that they can be used
++		 * by other pools.  Once we're back down to a single
++		 * disk+spare, we stop removing them.
++		 */
++		if (vd->vdev_children > 2) {
++			newvd = vd->vdev_child[1];
++
++			if (newvd->vdev_isspare && last->vdev_isspare &&
++			    vdev_dtl_empty(last, DTL_MISSING) &&
++			    vdev_dtl_empty(last, DTL_OUTAGE) &&
++			    !vdev_dtl_required(newvd))
++				return (newvd);
++		}
++	}
++
++	return (NULL);
++}
++
++static void
++spa_vdev_resilver_done(spa_t *spa)
++{
++	vdev_t *vd, *pvd, *ppvd;
++	uint64_t guid, sguid, pguid, ppguid;
++
++	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++
++	while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) {
++		pvd = vd->vdev_parent;
++		ppvd = pvd->vdev_parent;
++		guid = vd->vdev_guid;
++		pguid = pvd->vdev_guid;
++		ppguid = ppvd->vdev_guid;
++		sguid = 0;
++		/*
++		 * If we have just finished replacing a hot spared device, then
++		 * we need to detach the parent's first child (the original hot
++		 * spare) as well.
++		 */
++		if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 &&
++		    ppvd->vdev_children == 2) {
++			ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
++			sguid = ppvd->vdev_child[1]->vdev_guid;
++		}
++		spa_config_exit(spa, SCL_ALL, FTAG);
++		if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0)
++			return;
++		if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0)
++			return;
++		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++	}
++
++	spa_config_exit(spa, SCL_ALL, FTAG);
++}
++
++/*
++ * Update the stored path or FRU for this vdev.
++ */
++int
++spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value,
++    boolean_t ispath)
++{
++	vdev_t *vd;
++	boolean_t sync = B_FALSE;
++
++	ASSERT(spa_writeable(spa));
++
++	spa_vdev_state_enter(spa, SCL_ALL);
++
++	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
++		return (spa_vdev_state_exit(spa, NULL, ENOENT));
++
++	if (!vd->vdev_ops->vdev_op_leaf)
++		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
++
++	if (ispath) {
++		if (strcmp(value, vd->vdev_path) != 0) {
++			spa_strfree(vd->vdev_path);
++			vd->vdev_path = spa_strdup(value);
++			sync = B_TRUE;
++		}
++	} else {
++		if (vd->vdev_fru == NULL) {
++			vd->vdev_fru = spa_strdup(value);
++			sync = B_TRUE;
++		} else if (strcmp(value, vd->vdev_fru) != 0) {
++			spa_strfree(vd->vdev_fru);
++			vd->vdev_fru = spa_strdup(value);
++			sync = B_TRUE;
++		}
++	}
++
++	return (spa_vdev_state_exit(spa, sync ? vd : NULL, 0));
++}
++
++int
++spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath)
++{
++	return (spa_vdev_set_common(spa, guid, newpath, B_TRUE));
++}
++
++int
++spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru)
++{
++	return (spa_vdev_set_common(spa, guid, newfru, B_FALSE));
++}
++
++/*
++ * ==========================================================================
++ * SPA Scanning
++ * ==========================================================================
++ */
++
++int
++spa_scan_stop(spa_t *spa)
++{
++	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
++	if (dsl_scan_resilvering(spa->spa_dsl_pool))
++		return (EBUSY);
++	return (dsl_scan_cancel(spa->spa_dsl_pool));
++}
++
++int
++spa_scan(spa_t *spa, pool_scan_func_t func)
++{
++	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
++
++	if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE)
++		return (ENOTSUP);
++
++	/*
++	 * If a resilver was requested, but there is no DTL on a
++	 * writeable leaf device, we have nothing to do.
++	 */
++	if (func == POOL_SCAN_RESILVER &&
++	    !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) {
++		spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
++		return (0);
++	}
++
++	return (dsl_scan(spa->spa_dsl_pool, func));
++}
++
++/*
++ * ==========================================================================
++ * SPA async task processing
++ * ==========================================================================
++ */
++
++static void
++spa_async_remove(spa_t *spa, vdev_t *vd)
++{
++	int c;
++
++	if (vd->vdev_remove_wanted) {
++		vd->vdev_remove_wanted = B_FALSE;
++		vd->vdev_delayed_close = B_FALSE;
++		vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE);
++
++		/*
++		 * We want to clear the stats, but we don't want to do a full
++		 * vdev_clear() as that will cause us to throw away
++		 * degraded/faulted state as well as attempt to reopen the
++		 * device, all of which is a waste.
++		 */
++		vd->vdev_stat.vs_read_errors = 0;
++		vd->vdev_stat.vs_write_errors = 0;
++		vd->vdev_stat.vs_checksum_errors = 0;
++
++		vdev_state_dirty(vd->vdev_top);
++	}
++
++	for (c = 0; c < vd->vdev_children; c++)
++		spa_async_remove(spa, vd->vdev_child[c]);
++}
++
++static void
++spa_async_probe(spa_t *spa, vdev_t *vd)
++{
++	int c;
++
++	if (vd->vdev_probe_wanted) {
++		vd->vdev_probe_wanted = B_FALSE;
++		vdev_reopen(vd);	/* vdev_open() does the actual probe */
++	}
++
++	for (c = 0; c < vd->vdev_children; c++)
++		spa_async_probe(spa, vd->vdev_child[c]);
++}
++
++static void
++spa_async_autoexpand(spa_t *spa, vdev_t *vd)
++{
++	int c;
++
++	if (!spa->spa_autoexpand)
++		return;
++
++	for (c = 0; c < vd->vdev_children; c++) {
++		vdev_t *cvd = vd->vdev_child[c];
++		spa_async_autoexpand(spa, cvd);
++	}
++
++	if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
++		return;
++
++	spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_AUTOEXPAND);
++}
++
++static void
++spa_async_thread(spa_t *spa)
++{
++	int tasks, i;
++
++	ASSERT(spa->spa_sync_on);
++
++	mutex_enter(&spa->spa_async_lock);
++	tasks = spa->spa_async_tasks;
++	spa->spa_async_tasks = 0;
++	mutex_exit(&spa->spa_async_lock);
++
++	/*
++	 * See if the config needs to be updated.
++	 */
++	if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
++		uint64_t old_space, new_space;
++
++		mutex_enter(&spa_namespace_lock);
++		old_space = metaslab_class_get_space(spa_normal_class(spa));
++		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
++		new_space = metaslab_class_get_space(spa_normal_class(spa));
++		mutex_exit(&spa_namespace_lock);
++
++		/*
++		 * If the pool grew as a result of the config update,
++		 * then log an internal history event.
++		 */
++		if (new_space != old_space) {
++			spa_history_log_internal(LOG_POOL_VDEV_ONLINE,
++			    spa, NULL,
++			    "pool '%s' size: %llu(+%llu)",
++			    spa_name(spa), new_space, new_space - old_space);
++		}
++	}
++
++	/*
++	 * See if any devices need to be marked REMOVED.
++	 */
++	if (tasks & SPA_ASYNC_REMOVE) {
++		spa_vdev_state_enter(spa, SCL_NONE);
++		spa_async_remove(spa, spa->spa_root_vdev);
++		for (i = 0; i < spa->spa_l2cache.sav_count; i++)
++			spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
++		for (i = 0; i < spa->spa_spares.sav_count; i++)
++			spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
++		(void) spa_vdev_state_exit(spa, NULL, 0);
++	}
++
++	if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) {
++		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
++		spa_async_autoexpand(spa, spa->spa_root_vdev);
++		spa_config_exit(spa, SCL_CONFIG, FTAG);
++	}
++
++	/*
++	 * See if any devices need to be probed.
++	 */
++	if (tasks & SPA_ASYNC_PROBE) {
++		spa_vdev_state_enter(spa, SCL_NONE);
++		spa_async_probe(spa, spa->spa_root_vdev);
++		(void) spa_vdev_state_exit(spa, NULL, 0);
++	}
++
++	/*
++	 * If any devices are done replacing, detach them.
++	 */
++	if (tasks & SPA_ASYNC_RESILVER_DONE)
++		spa_vdev_resilver_done(spa);
++
++	/*
++	 * Kick off a resilver.
++	 */
++	if (tasks & SPA_ASYNC_RESILVER)
++		dsl_resilver_restart(spa->spa_dsl_pool, 0);
++
++	/*
++	 * Let the world know that we're done.
++	 */
++	mutex_enter(&spa->spa_async_lock);
++	spa->spa_async_thread = NULL;
++	cv_broadcast(&spa->spa_async_cv);
++	mutex_exit(&spa->spa_async_lock);
++	thread_exit();
++}
++
++void
++spa_async_suspend(spa_t *spa)
++{
++	mutex_enter(&spa->spa_async_lock);
++	spa->spa_async_suspended++;
++	while (spa->spa_async_thread != NULL)
++		cv_wait(&spa->spa_async_cv, &spa->spa_async_lock);
++	mutex_exit(&spa->spa_async_lock);
++}
++
++void
++spa_async_resume(spa_t *spa)
++{
++	mutex_enter(&spa->spa_async_lock);
++	ASSERT(spa->spa_async_suspended != 0);
++	spa->spa_async_suspended--;
++	mutex_exit(&spa->spa_async_lock);
++}
++
++static void
++spa_async_dispatch(spa_t *spa)
++{
++	mutex_enter(&spa->spa_async_lock);
++	if (spa->spa_async_tasks && !spa->spa_async_suspended &&
++	    spa->spa_async_thread == NULL &&
++	    rootdir != NULL && !vn_is_readonly(rootdir))
++		spa->spa_async_thread = thread_create(NULL, 0,
++		    spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri);
++	mutex_exit(&spa->spa_async_lock);
++}
++
++void
++spa_async_request(spa_t *spa, int task)
++{
++	zfs_dbgmsg("spa=%s async request task=%u", spa->spa_name, task);
++	mutex_enter(&spa->spa_async_lock);
++	spa->spa_async_tasks |= task;
++	mutex_exit(&spa->spa_async_lock);
++}
++
++/*
++ * ==========================================================================
++ * SPA syncing routines
++ * ==========================================================================
++ */
++
++static int
++bpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
++{
++	bpobj_t *bpo = arg;
++	bpobj_enqueue(bpo, bp, tx);
++	return (0);
++}
++
++static int
++spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
++{
++	zio_t *zio = arg;
++
++	zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp,
++	    zio->io_flags));
++	return (0);
++}
++
++static void
++spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
++{
++	char *packed = NULL;
++	size_t bufsize;
++	size_t nvsize = 0;
++	dmu_buf_t *db;
++
++	VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
++
++	/*
++	 * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration
++	 * information.  This avoids the dbuf_will_dirty() path and
++	 * saves us a pre-read to get data we don't actually care about.
++	 */
++	bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE);
++	packed = vmem_alloc(bufsize, KM_PUSHPAGE);
++
++	VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
++	    KM_PUSHPAGE) == 0);
++	bzero(packed + nvsize, bufsize - nvsize);
++
++	dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
++
++	vmem_free(packed, bufsize);
++
++	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
++	dmu_buf_will_dirty(db, tx);
++	*(uint64_t *)db->db_data = nvsize;
++	dmu_buf_rele(db, FTAG);
++}
++
++static void
++spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx,
++    const char *config, const char *entry)
++{
++	nvlist_t *nvroot;
++	nvlist_t **list;
++	int i;
++
++	if (!sav->sav_sync)
++		return;
++
++	/*
++	 * Update the MOS nvlist describing the list of available devices.
++	 * spa_validate_aux() will have already made sure this nvlist is
++	 * valid and the vdevs are labeled appropriately.
++	 */
++	if (sav->sav_object == 0) {
++		sav->sav_object = dmu_object_alloc(spa->spa_meta_objset,
++		    DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE,
++		    sizeof (uint64_t), tx);
++		VERIFY(zap_update(spa->spa_meta_objset,
++		    DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1,
++		    &sav->sav_object, tx) == 0);
++	}
++
++	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++	if (sav->sav_count == 0) {
++		VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0);
++	} else {
++		list = kmem_alloc(sav->sav_count * sizeof (void *), KM_PUSHPAGE);
++		for (i = 0; i < sav->sav_count; i++)
++			list[i] = vdev_config_generate(spa, sav->sav_vdevs[i],
++			    B_FALSE, VDEV_CONFIG_L2CACHE);
++		VERIFY(nvlist_add_nvlist_array(nvroot, config, list,
++		    sav->sav_count) == 0);
++		for (i = 0; i < sav->sav_count; i++)
++			nvlist_free(list[i]);
++		kmem_free(list, sav->sav_count * sizeof (void *));
++	}
++
++	spa_sync_nvlist(spa, sav->sav_object, nvroot, tx);
++	nvlist_free(nvroot);
++
++	sav->sav_sync = B_FALSE;
++}
++
++static void
++spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
++{
++	nvlist_t *config;
++
++	if (list_is_empty(&spa->spa_config_dirty_list))
++		return;
++
++	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
++
++	config = spa_config_generate(spa, spa->spa_root_vdev,
++	    dmu_tx_get_txg(tx), B_FALSE);
++
++	spa_config_exit(spa, SCL_STATE, FTAG);
++
++	if (spa->spa_config_syncing)
++		nvlist_free(spa->spa_config_syncing);
++	spa->spa_config_syncing = config;
++
++	spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
++}
++
++/*
++ * Set zpool properties.
++ */
++static void
++spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	spa_t *spa = arg1;
++	objset_t *mos = spa->spa_meta_objset;
++	nvlist_t *nvp = arg2;
++	nvpair_t *elem;
++	uint64_t intval;
++	char *strval;
++	zpool_prop_t prop;
++	const char *propname;
++	zprop_type_t proptype;
++
++	mutex_enter(&spa->spa_props_lock);
++
++	elem = NULL;
++	while ((elem = nvlist_next_nvpair(nvp, elem))) {
++		switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
++		case ZPOOL_PROP_VERSION:
++			/*
++			 * Only set version for non-zpool-creation cases
++			 * (set/import). spa_create() needs special care
++			 * for version setting.
++			 */
++			if (tx->tx_txg != TXG_INITIAL) {
++				VERIFY(nvpair_value_uint64(elem,
++				    &intval) == 0);
++				ASSERT(intval <= SPA_VERSION);
++				ASSERT(intval >= spa_version(spa));
++				spa->spa_uberblock.ub_version = intval;
++				vdev_config_dirty(spa->spa_root_vdev);
++			}
++			break;
++
++		case ZPOOL_PROP_ALTROOT:
++			/*
++			 * 'altroot' is a non-persistent property. It should
++			 * have been set temporarily at creation or import time.
++			 */
++			ASSERT(spa->spa_root != NULL);
++			break;
++
++		case ZPOOL_PROP_READONLY:
++		case ZPOOL_PROP_CACHEFILE:
++			/*
++			 * 'readonly' and 'cachefile' are also non-persisitent
++			 * properties.
++			 */
++			break;
++		case ZPOOL_PROP_COMMENT:
++			VERIFY(nvpair_value_string(elem, &strval) == 0);
++			if (spa->spa_comment != NULL)
++				spa_strfree(spa->spa_comment);
++			spa->spa_comment = spa_strdup(strval);
++			/*
++			 * We need to dirty the configuration on all the vdevs
++			 * so that their labels get updated.  It's unnecessary
++			 * to do this for pool creation since the vdev's
++			 * configuratoin has already been dirtied.
++			 */
++			if (tx->tx_txg != TXG_INITIAL)
++				vdev_config_dirty(spa->spa_root_vdev);
++			break;
++		default:
++			/*
++			 * Set pool property values in the poolprops mos object.
++			 */
++			if (spa->spa_pool_props_object == 0) {
++				VERIFY((spa->spa_pool_props_object =
++				    zap_create(mos, DMU_OT_POOL_PROPS,
++				    DMU_OT_NONE, 0, tx)) > 0);
++
++				VERIFY(zap_update(mos,
++				    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
++				    8, 1, &spa->spa_pool_props_object, tx)
++				    == 0);
++			}
++
++			/* normalize the property name */
++			propname = zpool_prop_to_name(prop);
++			proptype = zpool_prop_get_type(prop);
++
++			if (nvpair_type(elem) == DATA_TYPE_STRING) {
++				ASSERT(proptype == PROP_TYPE_STRING);
++				VERIFY(nvpair_value_string(elem, &strval) == 0);
++				VERIFY(zap_update(mos,
++				    spa->spa_pool_props_object, propname,
++				    1, strlen(strval) + 1, strval, tx) == 0);
++
++			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
++				VERIFY(nvpair_value_uint64(elem, &intval) == 0);
++
++				if (proptype == PROP_TYPE_INDEX) {
++					const char *unused;
++					VERIFY(zpool_prop_index_to_string(
++					    prop, intval, &unused) == 0);
++				}
++				VERIFY(zap_update(mos,
++				    spa->spa_pool_props_object, propname,
++				    8, 1, &intval, tx) == 0);
++			} else {
++				ASSERT(0); /* not allowed */
++			}
++
++			switch (prop) {
++			case ZPOOL_PROP_DELEGATION:
++				spa->spa_delegation = intval;
++				break;
++			case ZPOOL_PROP_BOOTFS:
++				spa->spa_bootfs = intval;
++				break;
++			case ZPOOL_PROP_FAILUREMODE:
++				spa->spa_failmode = intval;
++				break;
++			case ZPOOL_PROP_AUTOEXPAND:
++				spa->spa_autoexpand = intval;
++				if (tx->tx_txg != TXG_INITIAL)
++					spa_async_request(spa,
++					    SPA_ASYNC_AUTOEXPAND);
++				break;
++			case ZPOOL_PROP_DEDUPDITTO:
++				spa->spa_dedup_ditto = intval;
++				break;
++			default:
++				break;
++			}
++		}
++
++		/* log internal history if this is not a zpool create */
++		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY &&
++		    tx->tx_txg != TXG_INITIAL) {
++			spa_history_log_internal(LOG_POOL_PROPSET,
++			    spa, tx, "%s %lld %s",
++			    nvpair_name(elem), intval, spa_name(spa));
++		}
++	}
++
++	mutex_exit(&spa->spa_props_lock);
++}
++
++/*
++ * Perform one-time upgrade on-disk changes.  spa_version() does not
++ * reflect the new version this txg, so there must be no changes this
++ * txg to anything that the upgrade code depends on after it executes.
++ * Therefore this must be called after dsl_pool_sync() does the sync
++ * tasks.
++ */
++static void
++spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
++{
++	dsl_pool_t *dp = spa->spa_dsl_pool;
++
++	ASSERT(spa->spa_sync_pass == 1);
++
++	if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
++	    spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) {
++		dsl_pool_create_origin(dp, tx);
++
++		/* Keeping the origin open increases spa_minref */
++		spa->spa_minref += 3;
++	}
++
++	if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES &&
++	    spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) {
++		dsl_pool_upgrade_clones(dp, tx);
++	}
++
++	if (spa->spa_ubsync.ub_version < SPA_VERSION_DIR_CLONES &&
++	    spa->spa_uberblock.ub_version >= SPA_VERSION_DIR_CLONES) {
++		dsl_pool_upgrade_dir_clones(dp, tx);
++
++		/* Keeping the freedir open increases spa_minref */
++		spa->spa_minref += 3;
++	}
++}
++
++/*
++ * Sync the specified transaction group.  New blocks may be dirtied as
++ * part of the process, so we iterate until it converges.
++ */
++void
++spa_sync(spa_t *spa, uint64_t txg)
++{
++	dsl_pool_t *dp = spa->spa_dsl_pool;
++	objset_t *mos = spa->spa_meta_objset;
++	bpobj_t *defer_bpo = &spa->spa_deferred_bpobj;
++	bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
++	vdev_t *rvd = spa->spa_root_vdev;
++	vdev_t *vd;
++	dmu_tx_t *tx;
++	int error;
++	int c;
++
++	VERIFY(spa_writeable(spa));
++
++	/*
++	 * Lock out configuration changes.
++	 */
++	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
++
++	spa->spa_syncing_txg = txg;
++	spa->spa_sync_pass = 0;
++
++	/*
++	 * If there are any pending vdev state changes, convert them
++	 * into config changes that go out with this transaction group.
++	 */
++	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
++	while (list_head(&spa->spa_state_dirty_list) != NULL) {
++		/*
++		 * We need the write lock here because, for aux vdevs,
++		 * calling vdev_config_dirty() modifies sav_config.
++		 * This is ugly and will become unnecessary when we
++		 * eliminate the aux vdev wart by integrating all vdevs
++		 * into the root vdev tree.
++		 */
++		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
++		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER);
++		while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) {
++			vdev_state_clean(vd);
++			vdev_config_dirty(vd);
++		}
++		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
++		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
++	}
++	spa_config_exit(spa, SCL_STATE, FTAG);
++
++	tx = dmu_tx_create_assigned(dp, txg);
++
++	/*
++	 * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg,
++	 * set spa_deflate if we have no raid-z vdevs.
++	 */
++	if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE &&
++	    spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) {
++		int i;
++
++		for (i = 0; i < rvd->vdev_children; i++) {
++			vd = rvd->vdev_child[i];
++			if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE)
++				break;
++		}
++		if (i == rvd->vdev_children) {
++			spa->spa_deflate = TRUE;
++			VERIFY(0 == zap_add(spa->spa_meta_objset,
++			    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
++			    sizeof (uint64_t), 1, &spa->spa_deflate, tx));
++		}
++	}
++
++	/*
++	 * If anything has changed in this txg, or if someone is waiting
++	 * for this txg to sync (eg, spa_vdev_remove()), push the
++	 * deferred frees from the previous txg.  If not, leave them
++	 * alone so that we don't generate work on an otherwise idle
++	 * system.
++	 */
++	if (!txg_list_empty(&dp->dp_dirty_datasets, txg) ||
++	    !txg_list_empty(&dp->dp_dirty_dirs, txg) ||
++	    !txg_list_empty(&dp->dp_sync_tasks, txg) ||
++	    ((dsl_scan_active(dp->dp_scan) ||
++	    txg_sync_waiting(dp)) && !spa_shutting_down(spa))) {
++		zio_t *zio = zio_root(spa, NULL, NULL, 0);
++		VERIFY3U(bpobj_iterate(defer_bpo,
++		    spa_free_sync_cb, zio, tx), ==, 0);
++		VERIFY3U(zio_wait(zio), ==, 0);
++	}
++
++	/*
++	 * Iterate to convergence.
++	 */
++	do {
++		int pass = ++spa->spa_sync_pass;
++
++		spa_sync_config_object(spa, tx);
++		spa_sync_aux_dev(spa, &spa->spa_spares, tx,
++		    ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES);
++		spa_sync_aux_dev(spa, &spa->spa_l2cache, tx,
++		    ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE);
++		spa_errlog_sync(spa, txg);
++		dsl_pool_sync(dp, txg);
++
++		if (pass <= SYNC_PASS_DEFERRED_FREE) {
++			zio_t *zio = zio_root(spa, NULL, NULL, 0);
++			bplist_iterate(free_bpl, spa_free_sync_cb,
++			    zio, tx);
++			VERIFY(zio_wait(zio) == 0);
++		} else {
++			bplist_iterate(free_bpl, bpobj_enqueue_cb,
++			    defer_bpo, tx);
++		}
++
++		ddt_sync(spa, txg);
++		dsl_scan_sync(dp, tx);
++
++		while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)))
++			vdev_sync(vd, txg);
++
++		if (pass == 1)
++			spa_sync_upgrades(spa, tx);
++
++	} while (dmu_objset_is_dirty(mos, txg));
++
++	/*
++	 * Rewrite the vdev configuration (which includes the uberblock)
++	 * to commit the transaction group.
++	 *
++	 * If there are no dirty vdevs, we sync the uberblock to a few
++	 * random top-level vdevs that are known to be visible in the
++	 * config cache (see spa_vdev_add() for a complete description).
++	 * If there *are* dirty vdevs, sync the uberblock to all vdevs.
++	 */
++	for (;;) {
++		/*
++		 * We hold SCL_STATE to prevent vdev open/close/etc.
++		 * while we're attempting to write the vdev labels.
++		 */
++		spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
++
++		if (list_is_empty(&spa->spa_config_dirty_list)) {
++			vdev_t *svd[SPA_DVAS_PER_BP];
++			int svdcount = 0;
++			int children = rvd->vdev_children;
++			int c0 = spa_get_random(children);
++
++			for (c = 0; c < children; c++) {
++				vd = rvd->vdev_child[(c0 + c) % children];
++				if (vd->vdev_ms_array == 0 || vd->vdev_islog)
++					continue;
++				svd[svdcount++] = vd;
++				if (svdcount == SPA_DVAS_PER_BP)
++					break;
++			}
++			error = vdev_config_sync(svd, svdcount, txg, B_FALSE);
++			if (error != 0)
++				error = vdev_config_sync(svd, svdcount, txg,
++				    B_TRUE);
++		} else {
++			error = vdev_config_sync(rvd->vdev_child,
++			    rvd->vdev_children, txg, B_FALSE);
++			if (error != 0)
++				error = vdev_config_sync(rvd->vdev_child,
++				    rvd->vdev_children, txg, B_TRUE);
++		}
++
++		spa_config_exit(spa, SCL_STATE, FTAG);
++
++		if (error == 0)
++			break;
++		zio_suspend(spa, NULL);
++		zio_resume_wait(spa);
++	}
++	dmu_tx_commit(tx);
++
++	/*
++	 * Clear the dirty config list.
++	 */
++	while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL)
++		vdev_config_clean(vd);
++
++	/*
++	 * Now that the new config has synced transactionally,
++	 * let it become visible to the config cache.
++	 */
++	if (spa->spa_config_syncing != NULL) {
++		spa_config_set(spa, spa->spa_config_syncing);
++		spa->spa_config_txg = txg;
++		spa->spa_config_syncing = NULL;
++	}
++
++	spa->spa_ubsync = spa->spa_uberblock;
++
++	dsl_pool_sync_done(dp, txg);
++
++	/*
++	 * Update usable space statistics.
++	 */
++	while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))))
++		vdev_sync_done(vd, txg);
++
++	spa_update_dspace(spa);
++
++	/*
++	 * It had better be the case that we didn't dirty anything
++	 * since vdev_config_sync().
++	 */
++	ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg));
++	ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
++	ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg));
++
++	spa->spa_sync_pass = 0;
++
++	spa_config_exit(spa, SCL_CONFIG, FTAG);
++
++	spa_handle_ignored_writes(spa);
++
++	/*
++	 * If any async tasks have been requested, kick them off.
++	 */
++	spa_async_dispatch(spa);
++}
++
++/*
++ * Sync all pools.  We don't want to hold the namespace lock across these
++ * operations, so we take a reference on the spa_t and drop the lock during the
++ * sync.
++ */
++void
++spa_sync_allpools(void)
++{
++	spa_t *spa = NULL;
++	mutex_enter(&spa_namespace_lock);
++	while ((spa = spa_next(spa)) != NULL) {
++		if (spa_state(spa) != POOL_STATE_ACTIVE ||
++		    !spa_writeable(spa) || spa_suspended(spa))
++			continue;
++		spa_open_ref(spa, FTAG);
++		mutex_exit(&spa_namespace_lock);
++		txg_wait_synced(spa_get_dsl(spa), 0);
++		mutex_enter(&spa_namespace_lock);
++		spa_close(spa, FTAG);
++	}
++	mutex_exit(&spa_namespace_lock);
++}
++
++/*
++ * ==========================================================================
++ * Miscellaneous routines
++ * ==========================================================================
++ */
++
++/*
++ * Remove all pools in the system.
++ */
++void
++spa_evict_all(void)
++{
++	spa_t *spa;
++
++	/*
++	 * Remove all cached state.  All pools should be closed now,
++	 * so every spa in the AVL tree should be unreferenced.
++	 */
++	mutex_enter(&spa_namespace_lock);
++	while ((spa = spa_next(NULL)) != NULL) {
++		/*
++		 * Stop async tasks.  The async thread may need to detach
++		 * a device that's been replaced, which requires grabbing
++		 * spa_namespace_lock, so we must drop it here.
++		 */
++		spa_open_ref(spa, FTAG);
++		mutex_exit(&spa_namespace_lock);
++		spa_async_suspend(spa);
++		mutex_enter(&spa_namespace_lock);
++		spa_close(spa, FTAG);
++
++		if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
++			spa_unload(spa);
++			spa_deactivate(spa);
++		}
++		spa_remove(spa);
++	}
++	mutex_exit(&spa_namespace_lock);
++}
++
++vdev_t *
++spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux)
++{
++	vdev_t *vd;
++	int i;
++
++	if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL)
++		return (vd);
++
++	if (aux) {
++		for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
++			vd = spa->spa_l2cache.sav_vdevs[i];
++			if (vd->vdev_guid == guid)
++				return (vd);
++		}
++
++		for (i = 0; i < spa->spa_spares.sav_count; i++) {
++			vd = spa->spa_spares.sav_vdevs[i];
++			if (vd->vdev_guid == guid)
++				return (vd);
++		}
++	}
++
++	return (NULL);
++}
++
++void
++spa_upgrade(spa_t *spa, uint64_t version)
++{
++	ASSERT(spa_writeable(spa));
++
++	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++
++	/*
++	 * This should only be called for a non-faulted pool, and since a
++	 * future version would result in an unopenable pool, this shouldn't be
++	 * possible.
++	 */
++	ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION);
++	ASSERT(version >= spa->spa_uberblock.ub_version);
++
++	spa->spa_uberblock.ub_version = version;
++	vdev_config_dirty(spa->spa_root_vdev);
++
++	spa_config_exit(spa, SCL_ALL, FTAG);
++
++	txg_wait_synced(spa_get_dsl(spa), 0);
++}
++
++boolean_t
++spa_has_spare(spa_t *spa, uint64_t guid)
++{
++	int i;
++	uint64_t spareguid;
++	spa_aux_vdev_t *sav = &spa->spa_spares;
++
++	for (i = 0; i < sav->sav_count; i++)
++		if (sav->sav_vdevs[i]->vdev_guid == guid)
++			return (B_TRUE);
++
++	for (i = 0; i < sav->sav_npending; i++) {
++		if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID,
++		    &spareguid) == 0 && spareguid == guid)
++			return (B_TRUE);
++	}
++
++	return (B_FALSE);
++}
++
++/*
++ * Check if a pool has an active shared spare device.
++ * Note: reference count of an active spare is 2, as a spare and as a replace
++ */
++static boolean_t
++spa_has_active_shared_spare(spa_t *spa)
++{
++	int i, refcnt;
++	uint64_t pool;
++	spa_aux_vdev_t *sav = &spa->spa_spares;
++
++	for (i = 0; i < sav->sav_count; i++) {
++		if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool,
++		    &refcnt) && pool != 0ULL && pool == spa_guid(spa) &&
++		    refcnt > 2)
++			return (B_TRUE);
++	}
++
++	return (B_FALSE);
++}
++
++/*
++ * Post a FM_EREPORT_ZFS_* event from sys/fm/fs/zfs.h.  The payload will be
++ * filled in from the spa and (optionally) the vdev.  This doesn't do anything
++ * in the userland libzpool, as we don't want consumers to misinterpret ztest
++ * or zdb as real changes.
++ */
++void
++spa_event_notify(spa_t *spa, vdev_t *vd, const char *name)
++{
++#ifdef _KERNEL
++	zfs_ereport_post(name, spa, vd, NULL, 0, 0);
++#endif
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++/* state manipulation functions */
++EXPORT_SYMBOL(spa_open);
++EXPORT_SYMBOL(spa_open_rewind);
++EXPORT_SYMBOL(spa_get_stats);
++EXPORT_SYMBOL(spa_create);
++EXPORT_SYMBOL(spa_import_rootpool);
++EXPORT_SYMBOL(spa_import);
++EXPORT_SYMBOL(spa_tryimport);
++EXPORT_SYMBOL(spa_destroy);
++EXPORT_SYMBOL(spa_export);
++EXPORT_SYMBOL(spa_reset);
++EXPORT_SYMBOL(spa_async_request);
++EXPORT_SYMBOL(spa_async_suspend);
++EXPORT_SYMBOL(spa_async_resume);
++EXPORT_SYMBOL(spa_inject_addref);
++EXPORT_SYMBOL(spa_inject_delref);
++EXPORT_SYMBOL(spa_scan_stat_init);
++EXPORT_SYMBOL(spa_scan_get_stats);
++
++/* device maniion */
++EXPORT_SYMBOL(spa_vdev_add);
++EXPORT_SYMBOL(spa_vdev_attach);
++EXPORT_SYMBOL(spa_vdev_detach);
++EXPORT_SYMBOL(spa_vdev_remove);
++EXPORT_SYMBOL(spa_vdev_setpath);
++EXPORT_SYMBOL(spa_vdev_setfru);
++EXPORT_SYMBOL(spa_vdev_split_mirror);
++
++/* spare statech is global across all pools) */
++EXPORT_SYMBOL(spa_spare_add);
++EXPORT_SYMBOL(spa_spare_remove);
++EXPORT_SYMBOL(spa_spare_exists);
++EXPORT_SYMBOL(spa_spare_activate);
++
++/* L2ARC statech is global across all pools) */
++EXPORT_SYMBOL(spa_l2cache_add);
++EXPORT_SYMBOL(spa_l2cache_remove);
++EXPORT_SYMBOL(spa_l2cache_exists);
++EXPORT_SYMBOL(spa_l2cache_activate);
++EXPORT_SYMBOL(spa_l2cache_drop);
++
++/* scanning */
++EXPORT_SYMBOL(spa_scan);
++EXPORT_SYMBOL(spa_scan_stop);
++
++/* spa syncing */
++EXPORT_SYMBOL(spa_sync); /* only for DMU use */
++EXPORT_SYMBOL(spa_sync_allpools);
++
++/* properties */
++EXPORT_SYMBOL(spa_prop_set);
++EXPORT_SYMBOL(spa_prop_get);
++EXPORT_SYMBOL(spa_prop_clear_bootfs);
++
++/* asynchronous event notification */
++EXPORT_SYMBOL(spa_event_notify);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/space_map.c linux-3.2.33-go/fs/zfs/zfs/space_map.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/space_map.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/space_map.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,616 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/dmu.h>
++#include <sys/zio.h>
++#include <sys/space_map.h>
++
++/*
++ * Space map routines.
++ * NOTE: caller is responsible for all locking.
++ */
++static int
++space_map_seg_compare(const void *x1, const void *x2)
++{
++	const space_seg_t *s1 = x1;
++	const space_seg_t *s2 = x2;
++
++	if (s1->ss_start < s2->ss_start) {
++		if (s1->ss_end > s2->ss_start)
++			return (0);
++		return (-1);
++	}
++	if (s1->ss_start > s2->ss_start) {
++		if (s1->ss_start < s2->ss_end)
++			return (0);
++		return (1);
++	}
++	return (0);
++}
++
++void
++space_map_create(space_map_t *sm, uint64_t start, uint64_t size, uint8_t shift,
++	kmutex_t *lp)
++{
++	bzero(sm, sizeof (*sm));
++
++	cv_init(&sm->sm_load_cv, NULL, CV_DEFAULT, NULL);
++
++	avl_create(&sm->sm_root, space_map_seg_compare,
++	    sizeof (space_seg_t), offsetof(struct space_seg, ss_node));
++
++	sm->sm_start = start;
++	sm->sm_size = size;
++	sm->sm_shift = shift;
++	sm->sm_lock = lp;
++}
++
++void
++space_map_destroy(space_map_t *sm)
++{
++	ASSERT(!sm->sm_loaded && !sm->sm_loading);
++	VERIFY3U(sm->sm_space, ==, 0);
++	avl_destroy(&sm->sm_root);
++	cv_destroy(&sm->sm_load_cv);
++}
++
++void
++space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
++{
++	avl_index_t where;
++	space_seg_t ssearch, *ss_before, *ss_after, *ss;
++	uint64_t end = start + size;
++	int merge_before, merge_after;
++
++	ASSERT(MUTEX_HELD(sm->sm_lock));
++	VERIFY(size != 0);
++	VERIFY3U(start, >=, sm->sm_start);
++	VERIFY3U(end, <=, sm->sm_start + sm->sm_size);
++	VERIFY(sm->sm_space + size <= sm->sm_size);
++	VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
++	VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
++
++	ssearch.ss_start = start;
++	ssearch.ss_end = end;
++	ss = avl_find(&sm->sm_root, &ssearch, &where);
++
++	if (ss != NULL && ss->ss_start <= start && ss->ss_end >= end) {
++		zfs_panic_recover("zfs: allocating allocated segment"
++		    "(offset=%llu size=%llu)\n",
++		    (longlong_t)start, (longlong_t)size);
++		return;
++	}
++
++	/* Make sure we don't overlap with either of our neighbors */
++	VERIFY(ss == NULL);
++
++	ss_before = avl_nearest(&sm->sm_root, where, AVL_BEFORE);
++	ss_after = avl_nearest(&sm->sm_root, where, AVL_AFTER);
++
++	merge_before = (ss_before != NULL && ss_before->ss_end == start);
++	merge_after = (ss_after != NULL && ss_after->ss_start == end);
++
++	if (merge_before && merge_after) {
++		avl_remove(&sm->sm_root, ss_before);
++		if (sm->sm_pp_root) {
++			avl_remove(sm->sm_pp_root, ss_before);
++			avl_remove(sm->sm_pp_root, ss_after);
++		}
++		ss_after->ss_start = ss_before->ss_start;
++		kmem_free(ss_before, sizeof (*ss_before));
++		ss = ss_after;
++	} else if (merge_before) {
++		ss_before->ss_end = end;
++		if (sm->sm_pp_root)
++			avl_remove(sm->sm_pp_root, ss_before);
++		ss = ss_before;
++	} else if (merge_after) {
++		ss_after->ss_start = start;
++		if (sm->sm_pp_root)
++			avl_remove(sm->sm_pp_root, ss_after);
++		ss = ss_after;
++	} else {
++		ss = kmem_alloc(sizeof (*ss), KM_PUSHPAGE);
++		ss->ss_start = start;
++		ss->ss_end = end;
++		avl_insert(&sm->sm_root, ss, where);
++	}
++
++	if (sm->sm_pp_root)
++		avl_add(sm->sm_pp_root, ss);
++
++	sm->sm_space += size;
++}
++
++void
++space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
++{
++	avl_index_t where;
++	space_seg_t ssearch, *ss, *newseg;
++	uint64_t end = start + size;
++	int left_over, right_over;
++
++	ASSERT(MUTEX_HELD(sm->sm_lock));
++	VERIFY(size != 0);
++	VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
++	VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
++
++	ssearch.ss_start = start;
++	ssearch.ss_end = end;
++	ss = avl_find(&sm->sm_root, &ssearch, &where);
++
++	/* Make sure we completely overlap with someone */
++	if (ss == NULL) {
++		zfs_panic_recover("zfs: freeing free segment "
++		    "(offset=%llu size=%llu)",
++		    (longlong_t)start, (longlong_t)size);
++		return;
++	}
++	VERIFY3U(ss->ss_start, <=, start);
++	VERIFY3U(ss->ss_end, >=, end);
++	VERIFY(sm->sm_space - size <= sm->sm_size);
++
++	left_over = (ss->ss_start != start);
++	right_over = (ss->ss_end != end);
++
++	if (sm->sm_pp_root)
++		avl_remove(sm->sm_pp_root, ss);
++
++	if (left_over && right_over) {
++		newseg = kmem_alloc(sizeof (*newseg), KM_PUSHPAGE);
++		newseg->ss_start = end;
++		newseg->ss_end = ss->ss_end;
++		ss->ss_end = start;
++		avl_insert_here(&sm->sm_root, newseg, ss, AVL_AFTER);
++		if (sm->sm_pp_root)
++			avl_add(sm->sm_pp_root, newseg);
++	} else if (left_over) {
++		ss->ss_end = start;
++	} else if (right_over) {
++		ss->ss_start = end;
++	} else {
++		avl_remove(&sm->sm_root, ss);
++		kmem_free(ss, sizeof (*ss));
++		ss = NULL;
++	}
++
++	if (sm->sm_pp_root && ss != NULL)
++		avl_add(sm->sm_pp_root, ss);
++
++	sm->sm_space -= size;
++}
++
++boolean_t
++space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
++{
++	avl_index_t where;
++	space_seg_t ssearch, *ss;
++	uint64_t end = start + size;
++
++	ASSERT(MUTEX_HELD(sm->sm_lock));
++	VERIFY(size != 0);
++	VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
++	VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
++
++	ssearch.ss_start = start;
++	ssearch.ss_end = end;
++	ss = avl_find(&sm->sm_root, &ssearch, &where);
++
++	return (ss != NULL && ss->ss_start <= start && ss->ss_end >= end);
++}
++
++void
++space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest)
++{
++	space_seg_t *ss;
++	void *cookie = NULL;
++
++	ASSERT(MUTEX_HELD(sm->sm_lock));
++
++	while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) {
++		if (func != NULL)
++			func(mdest, ss->ss_start, ss->ss_end - ss->ss_start);
++		kmem_free(ss, sizeof (*ss));
++	}
++	sm->sm_space = 0;
++}
++
++void
++space_map_walk(space_map_t *sm, space_map_func_t *func, space_map_t *mdest)
++{
++	space_seg_t *ss;
++
++	ASSERT(MUTEX_HELD(sm->sm_lock));
++
++	for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss))
++		func(mdest, ss->ss_start, ss->ss_end - ss->ss_start);
++}
++
++/*
++ * Wait for any in-progress space_map_load() to complete.
++ */
++void
++space_map_load_wait(space_map_t *sm)
++{
++	ASSERT(MUTEX_HELD(sm->sm_lock));
++
++	while (sm->sm_loading) {
++		ASSERT(!sm->sm_loaded);
++		cv_wait(&sm->sm_load_cv, sm->sm_lock);
++	}
++}
++
++/*
++ * Note: space_map_load() will drop sm_lock across dmu_read() calls.
++ * The caller must be OK with this.
++ */
++int
++space_map_load(space_map_t *sm, space_map_ops_t *ops, uint8_t maptype,
++	space_map_obj_t *smo, objset_t *os)
++{
++	uint64_t *entry, *entry_map, *entry_map_end;
++	uint64_t bufsize, size, offset, end, space;
++	uint64_t mapstart = sm->sm_start;
++	int error = 0;
++
++	ASSERT(MUTEX_HELD(sm->sm_lock));
++	ASSERT(!sm->sm_loaded);
++	ASSERT(!sm->sm_loading);
++
++	sm->sm_loading = B_TRUE;
++	end = smo->smo_objsize;
++	space = smo->smo_alloc;
++
++	ASSERT(sm->sm_ops == NULL);
++	VERIFY3U(sm->sm_space, ==, 0);
++
++	if (maptype == SM_FREE) {
++		space_map_add(sm, sm->sm_start, sm->sm_size);
++		space = sm->sm_size - space;
++	}
++
++	bufsize = 1ULL << SPACE_MAP_BLOCKSHIFT;
++	entry_map = zio_buf_alloc(bufsize);
++
++	mutex_exit(sm->sm_lock);
++	if (end > bufsize)
++		dmu_prefetch(os, smo->smo_object, bufsize, end - bufsize);
++	mutex_enter(sm->sm_lock);
++
++	for (offset = 0; offset < end; offset += bufsize) {
++		size = MIN(end - offset, bufsize);
++		VERIFY(P2PHASE(size, sizeof (uint64_t)) == 0);
++		VERIFY(size != 0);
++
++		dprintf("object=%llu  offset=%llx  size=%llx\n",
++		    smo->smo_object, offset, size);
++
++		mutex_exit(sm->sm_lock);
++		error = dmu_read(os, smo->smo_object, offset, size, entry_map,
++		    DMU_READ_PREFETCH);
++		mutex_enter(sm->sm_lock);
++		if (error != 0)
++			break;
++
++		entry_map_end = entry_map + (size / sizeof (uint64_t));
++		for (entry = entry_map; entry < entry_map_end; entry++) {
++			uint64_t e = *entry;
++
++			if (SM_DEBUG_DECODE(e))		/* Skip debug entries */
++				continue;
++
++			(SM_TYPE_DECODE(e) == maptype ?
++			    space_map_add : space_map_remove)(sm,
++			    (SM_OFFSET_DECODE(e) << sm->sm_shift) + mapstart,
++			    SM_RUN_DECODE(e) << sm->sm_shift);
++		}
++	}
++
++	if (error == 0) {
++		VERIFY3U(sm->sm_space, ==, space);
++
++		sm->sm_loaded = B_TRUE;
++		sm->sm_ops = ops;
++		if (ops != NULL)
++			ops->smop_load(sm);
++	} else {
++		space_map_vacate(sm, NULL, NULL);
++	}
++
++	zio_buf_free(entry_map, bufsize);
++
++	sm->sm_loading = B_FALSE;
++
++	cv_broadcast(&sm->sm_load_cv);
++
++	return (error);
++}
++
++void
++space_map_unload(space_map_t *sm)
++{
++	ASSERT(MUTEX_HELD(sm->sm_lock));
++
++	if (sm->sm_loaded && sm->sm_ops != NULL)
++		sm->sm_ops->smop_unload(sm);
++
++	sm->sm_loaded = B_FALSE;
++	sm->sm_ops = NULL;
++
++	space_map_vacate(sm, NULL, NULL);
++}
++
++uint64_t
++space_map_maxsize(space_map_t *sm)
++{
++	ASSERT(sm->sm_ops != NULL);
++	return (sm->sm_ops->smop_max(sm));
++}
++
++uint64_t
++space_map_alloc(space_map_t *sm, uint64_t size)
++{
++	uint64_t start;
++
++	start = sm->sm_ops->smop_alloc(sm, size);
++	if (start != -1ULL)
++		space_map_remove(sm, start, size);
++	return (start);
++}
++
++void
++space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
++{
++	sm->sm_ops->smop_claim(sm, start, size);
++	space_map_remove(sm, start, size);
++}
++
++void
++space_map_free(space_map_t *sm, uint64_t start, uint64_t size)
++{
++	space_map_add(sm, start, size);
++	sm->sm_ops->smop_free(sm, start, size);
++}
++
++/*
++ * Note: space_map_sync() will drop sm_lock across dmu_write() calls.
++ */
++void
++space_map_sync(space_map_t *sm, uint8_t maptype,
++	space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx)
++{
++	spa_t *spa = dmu_objset_spa(os);
++	void *cookie = NULL;
++	space_seg_t *ss;
++	uint64_t bufsize, start, size, run_len;
++	uint64_t *entry, *entry_map, *entry_map_end;
++
++	ASSERT(MUTEX_HELD(sm->sm_lock));
++
++	if (sm->sm_space == 0)
++		return;
++
++	dprintf("object %4llu, txg %llu, pass %d, %c, count %lu, space %llx\n",
++	    smo->smo_object, dmu_tx_get_txg(tx), spa_sync_pass(spa),
++	    maptype == SM_ALLOC ? 'A' : 'F', avl_numnodes(&sm->sm_root),
++	    sm->sm_space);
++
++	if (maptype == SM_ALLOC)
++		smo->smo_alloc += sm->sm_space;
++	else
++		smo->smo_alloc -= sm->sm_space;
++
++	bufsize = (8 + avl_numnodes(&sm->sm_root)) * sizeof (uint64_t);
++	bufsize = MIN(bufsize, 1ULL << SPACE_MAP_BLOCKSHIFT);
++	entry_map = zio_buf_alloc(bufsize);
++	entry_map_end = entry_map + (bufsize / sizeof (uint64_t));
++	entry = entry_map;
++
++	*entry++ = SM_DEBUG_ENCODE(1) |
++	    SM_DEBUG_ACTION_ENCODE(maptype) |
++	    SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) |
++	    SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
++
++	while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) {
++		size = ss->ss_end - ss->ss_start;
++		start = (ss->ss_start - sm->sm_start) >> sm->sm_shift;
++
++		sm->sm_space -= size;
++		size >>= sm->sm_shift;
++
++		while (size) {
++			run_len = MIN(size, SM_RUN_MAX);
++
++			if (entry == entry_map_end) {
++				mutex_exit(sm->sm_lock);
++				dmu_write(os, smo->smo_object, smo->smo_objsize,
++				    bufsize, entry_map, tx);
++				mutex_enter(sm->sm_lock);
++				smo->smo_objsize += bufsize;
++				entry = entry_map;
++			}
++
++			*entry++ = SM_OFFSET_ENCODE(start) |
++			    SM_TYPE_ENCODE(maptype) |
++			    SM_RUN_ENCODE(run_len);
++
++			start += run_len;
++			size -= run_len;
++		}
++		kmem_free(ss, sizeof (*ss));
++	}
++
++	if (entry != entry_map) {
++		size = (entry - entry_map) * sizeof (uint64_t);
++		mutex_exit(sm->sm_lock);
++		dmu_write(os, smo->smo_object, smo->smo_objsize,
++		    size, entry_map, tx);
++		mutex_enter(sm->sm_lock);
++		smo->smo_objsize += size;
++	}
++
++	zio_buf_free(entry_map, bufsize);
++
++	VERIFY3U(sm->sm_space, ==, 0);
++}
++
++void
++space_map_truncate(space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx)
++{
++	VERIFY(dmu_free_range(os, smo->smo_object, 0, -1ULL, tx) == 0);
++
++	smo->smo_objsize = 0;
++	smo->smo_alloc = 0;
++}
++
++/*
++ * Space map reference trees.
++ *
++ * A space map is a collection of integers.  Every integer is either
++ * in the map, or it's not.  A space map reference tree generalizes
++ * the idea: it allows its members to have arbitrary reference counts,
++ * as opposed to the implicit reference count of 0 or 1 in a space map.
++ * This representation comes in handy when computing the union or
++ * intersection of multiple space maps.  For example, the union of
++ * N space maps is the subset of the reference tree with refcnt >= 1.
++ * The intersection of N space maps is the subset with refcnt >= N.
++ *
++ * [It's very much like a Fourier transform.  Unions and intersections
++ * are hard to perform in the 'space map domain', so we convert the maps
++ * into the 'reference count domain', where it's trivial, then invert.]
++ *
++ * vdev_dtl_reassess() uses computations of this form to determine
++ * DTL_MISSING and DTL_OUTAGE for interior vdevs -- e.g. a RAID-Z vdev
++ * has an outage wherever refcnt >= vdev_nparity + 1, and a mirror vdev
++ * has an outage wherever refcnt >= vdev_children.
++ */
++static int
++space_map_ref_compare(const void *x1, const void *x2)
++{
++	const space_ref_t *sr1 = x1;
++	const space_ref_t *sr2 = x2;
++
++	if (sr1->sr_offset < sr2->sr_offset)
++		return (-1);
++	if (sr1->sr_offset > sr2->sr_offset)
++		return (1);
++
++	if (sr1 < sr2)
++		return (-1);
++	if (sr1 > sr2)
++		return (1);
++
++	return (0);
++}
++
++void
++space_map_ref_create(avl_tree_t *t)
++{
++	avl_create(t, space_map_ref_compare,
++	    sizeof (space_ref_t), offsetof(space_ref_t, sr_node));
++}
++
++void
++space_map_ref_destroy(avl_tree_t *t)
++{
++	space_ref_t *sr;
++	void *cookie = NULL;
++
++	while ((sr = avl_destroy_nodes(t, &cookie)) != NULL)
++		kmem_free(sr, sizeof (*sr));
++
++	avl_destroy(t);
++}
++
++static void
++space_map_ref_add_node(avl_tree_t *t, uint64_t offset, int64_t refcnt)
++{
++	space_ref_t *sr;
++
++	sr = kmem_alloc(sizeof (*sr), KM_PUSHPAGE);
++	sr->sr_offset = offset;
++	sr->sr_refcnt = refcnt;
++
++	avl_add(t, sr);
++}
++
++void
++space_map_ref_add_seg(avl_tree_t *t, uint64_t start, uint64_t end,
++	int64_t refcnt)
++{
++	space_map_ref_add_node(t, start, refcnt);
++	space_map_ref_add_node(t, end, -refcnt);
++}
++
++/*
++ * Convert (or add) a space map into a reference tree.
++ */
++void
++space_map_ref_add_map(avl_tree_t *t, space_map_t *sm, int64_t refcnt)
++{
++	space_seg_t *ss;
++
++	ASSERT(MUTEX_HELD(sm->sm_lock));
++
++	for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss))
++		space_map_ref_add_seg(t, ss->ss_start, ss->ss_end, refcnt);
++}
++
++/*
++ * Convert a reference tree into a space map.  The space map will contain
++ * all members of the reference tree for which refcnt >= minref.
++ */
++void
++space_map_ref_generate_map(avl_tree_t *t, space_map_t *sm, int64_t minref)
++{
++	uint64_t start = -1ULL;
++	int64_t refcnt = 0;
++	space_ref_t *sr;
++
++	ASSERT(MUTEX_HELD(sm->sm_lock));
++
++	space_map_vacate(sm, NULL, NULL);
++
++	for (sr = avl_first(t); sr != NULL; sr = AVL_NEXT(t, sr)) {
++		refcnt += sr->sr_refcnt;
++		if (refcnt >= minref) {
++			if (start == -1ULL) {
++				start = sr->sr_offset;
++			}
++		} else {
++			if (start != -1ULL) {
++				uint64_t end = sr->sr_offset;
++				ASSERT(start <= end);
++				if (end > start)
++					space_map_add(sm, start, end - start);
++				start = -1ULL;
++			}
++		}
++	}
++	ASSERT(refcnt == 0);
++	ASSERT(start == -1ULL);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/spa_config.c linux-3.2.33-go/fs/zfs/zfs/spa_config.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/spa_config.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/spa_config.c	2012-11-16 23:25:34.353039289 +0100
+@@ -0,0 +1,504 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ */
++
++#include <sys/spa.h>
++#include <sys/spa_impl.h>
++#include <sys/nvpair.h>
++#include <sys/uio.h>
++#include <sys/fs/zfs.h>
++#include <sys/vdev_impl.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/utsname.h>
++#include <sys/systeminfo.h>
++#include <sys/sunddi.h>
++#ifdef _KERNEL
++#include <sys/kobj.h>
++#include <sys/zone.h>
++#endif
++
++/*
++ * Pool configuration repository.
++ *
++ * Pool configuration is stored as a packed nvlist on the filesystem.  By
++ * default, all pools are stored in /etc/zfs/zpool.cache and loaded on boot
++ * (when the ZFS module is loaded).  Pools can also have the 'cachefile'
++ * property set that allows them to be stored in an alternate location until
++ * the control of external software.
++ *
++ * For each cache file, we have a single nvlist which holds all the
++ * configuration information.  When the module loads, we read this information
++ * from /etc/zfs/zpool.cache and populate the SPA namespace.  This namespace is
++ * maintained independently in spa.c.  Whenever the namespace is modified, or
++ * the configuration of a pool is changed, we call spa_config_sync(), which
++ * walks through all the active pools and writes the configuration to disk.
++ */
++
++static uint64_t spa_config_generation = 1;
++
++/*
++ * This can be overridden in userland to preserve an alternate namespace for
++ * userland pools when doing testing.
++ */
++char *spa_config_path = ZPOOL_CACHE;
++
++/*
++ * Called when the module is first loaded, this routine loads the configuration
++ * file into the SPA namespace.  It does not actually open or load the pools; it
++ * only populates the namespace.
++ */
++void
++spa_config_load(void)
++{
++	void *buf = NULL;
++	nvlist_t *nvlist, *child;
++	nvpair_t *nvpair;
++	char *pathname;
++	struct _buf *file;
++	uint64_t fsize;
++
++	/*
++	 * Open the configuration file.
++	 */
++	pathname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE);
++
++	(void) snprintf(pathname, MAXPATHLEN, "%s%s",
++	    (rootdir != NULL) ? "./" : "", spa_config_path);
++
++	file = kobj_open_file(pathname);
++
++	kmem_free(pathname, MAXPATHLEN);
++
++	if (file == (struct _buf *)-1)
++		return;
++
++	if (kobj_get_filesize(file, &fsize) != 0)
++		goto out;
++
++	buf = kmem_alloc(fsize, KM_PUSHPAGE | KM_NODEBUG);
++
++	/*
++	 * Read the nvlist from the file.
++	 */
++	if (kobj_read_file(file, buf, fsize, 0) < 0)
++		goto out;
++
++	/*
++	 * Unpack the nvlist.
++	 */
++	if (nvlist_unpack(buf, fsize, &nvlist, KM_PUSHPAGE) != 0)
++		goto out;
++
++	/*
++	 * Iterate over all elements in the nvlist, creating a new spa_t for
++	 * each one with the specified configuration.
++	 */
++	mutex_enter(&spa_namespace_lock);
++	nvpair = NULL;
++	while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) {
++		if (nvpair_type(nvpair) != DATA_TYPE_NVLIST)
++			continue;
++
++		VERIFY(nvpair_value_nvlist(nvpair, &child) == 0);
++
++		if (spa_lookup(nvpair_name(nvpair)) != NULL)
++			continue;
++		(void) spa_add(nvpair_name(nvpair), child, NULL);
++	}
++	mutex_exit(&spa_namespace_lock);
++
++	nvlist_free(nvlist);
++
++out:
++	if (buf != NULL)
++		kmem_free(buf, fsize);
++
++	kobj_close_file(file);
++}
++
++static void
++spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
++{
++	size_t buflen;
++	char *buf;
++	vnode_t *vp;
++	int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX;
++	char *temp;
++
++	/*
++	 * If the nvlist is empty (NULL), then remove the old cachefile.
++	 */
++	if (nvl == NULL) {
++		(void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
++		return;
++	}
++
++	/*
++	 * Pack the configuration into a buffer.
++	 */
++	VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0);
++
++	buf = kmem_alloc(buflen, KM_PUSHPAGE | KM_NODEBUG);
++	temp = kmem_zalloc(MAXPATHLEN, KM_PUSHPAGE);
++
++	VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR,
++	    KM_PUSHPAGE) == 0);
++
++	/*
++	 * Write the configuration to disk.  We need to do the traditional
++	 * 'write to temporary file, sync, move over original' to make sure we
++	 * always have a consistent view of the data.
++	 */
++	(void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path);
++
++	if (vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0) == 0) {
++		if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
++		    0, RLIM64_INFINITY, kcred, NULL) == 0 &&
++		    VOP_FSYNC(vp, FSYNC, kcred, NULL) == 0) {
++			(void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
++		}
++		(void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
++	}
++
++	(void) vn_remove(temp, UIO_SYSSPACE, RMFILE);
++
++	kmem_free(buf, buflen);
++	kmem_free(temp, MAXPATHLEN);
++}
++
++/*
++ * Synchronize pool configuration to disk.  This must be called with the
++ * namespace lock held.
++ */
++void
++spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
++{
++	spa_config_dirent_t *dp, *tdp;
++	nvlist_t *nvl;
++
++	ASSERT(MUTEX_HELD(&spa_namespace_lock));
++
++	if (rootdir == NULL || !(spa_mode_global & FWRITE))
++		return;
++
++	/*
++	 * Iterate over all cachefiles for the pool, past or present.  When the
++	 * cachefile is changed, the new one is pushed onto this list, allowing
++	 * us to update previous cachefiles that no longer contain this pool.
++	 */
++	for (dp = list_head(&target->spa_config_list); dp != NULL;
++	    dp = list_next(&target->spa_config_list, dp)) {
++		spa_t *spa = NULL;
++		if (dp->scd_path == NULL)
++			continue;
++
++		/*
++		 * Iterate over all pools, adding any matching pools to 'nvl'.
++		 */
++		nvl = NULL;
++		while ((spa = spa_next(spa)) != NULL) {
++			if (spa == target && removing)
++				continue;
++
++			mutex_enter(&spa->spa_props_lock);
++			tdp = list_head(&spa->spa_config_list);
++			if (spa->spa_config == NULL ||
++			    tdp->scd_path == NULL ||
++			    strcmp(tdp->scd_path, dp->scd_path) != 0) {
++				mutex_exit(&spa->spa_props_lock);
++				continue;
++			}
++
++			if (nvl == NULL)
++				VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME,
++				    KM_PUSHPAGE) == 0);
++
++			VERIFY(nvlist_add_nvlist(nvl, spa->spa_name,
++			    spa->spa_config) == 0);
++			mutex_exit(&spa->spa_props_lock);
++		}
++
++		spa_config_write(dp, nvl);
++		nvlist_free(nvl);
++	}
++
++	/*
++	 * Remove any config entries older than the current one.
++	 */
++	dp = list_head(&target->spa_config_list);
++	while ((tdp = list_next(&target->spa_config_list, dp)) != NULL) {
++		list_remove(&target->spa_config_list, tdp);
++		if (tdp->scd_path != NULL)
++			spa_strfree(tdp->scd_path);
++		kmem_free(tdp, sizeof (spa_config_dirent_t));
++	}
++
++	spa_config_generation++;
++
++	if (postsysevent)
++		spa_event_notify(target, NULL, FM_EREPORT_ZFS_CONFIG_SYNC);
++}
++
++/*
++ * Sigh.  Inside a local zone, we don't have access to /etc/zfs/zpool.cache,
++ * and we don't want to allow the local zone to see all the pools anyway.
++ * So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration
++ * information for all pool visible within the zone.
++ */
++nvlist_t *
++spa_all_configs(uint64_t *generation)
++{
++	nvlist_t *pools;
++	spa_t *spa = NULL;
++
++	if (*generation == spa_config_generation)
++		return (NULL);
++
++	VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++
++	mutex_enter(&spa_namespace_lock);
++	while ((spa = spa_next(spa)) != NULL) {
++		if (INGLOBALZONE(curproc) ||
++		    zone_dataset_visible(spa_name(spa), NULL)) {
++			mutex_enter(&spa->spa_props_lock);
++			VERIFY(nvlist_add_nvlist(pools, spa_name(spa),
++			    spa->spa_config) == 0);
++			mutex_exit(&spa->spa_props_lock);
++		}
++	}
++	*generation = spa_config_generation;
++	mutex_exit(&spa_namespace_lock);
++
++	return (pools);
++}
++
++void
++spa_config_set(spa_t *spa, nvlist_t *config)
++{
++	mutex_enter(&spa->spa_props_lock);
++	if (spa->spa_config != NULL)
++		nvlist_free(spa->spa_config);
++	spa->spa_config = config;
++	mutex_exit(&spa->spa_props_lock);
++}
++
++/*
++ * Generate the pool's configuration based on the current in-core state.
++ * We infer whether to generate a complete config or just one top-level config
++ * based on whether vd is the root vdev.
++ */
++nvlist_t *
++spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
++{
++	nvlist_t *config, *nvroot;
++	vdev_t *rvd = spa->spa_root_vdev;
++	unsigned long hostid = 0;
++	boolean_t locked = B_FALSE;
++	uint64_t split_guid;
++
++	if (vd == NULL) {
++		vd = rvd;
++		locked = B_TRUE;
++		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
++	}
++
++	ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
++	    (SCL_CONFIG | SCL_STATE));
++
++	/*
++	 * If txg is -1, report the current value of spa->spa_config_txg.
++	 */
++	if (txg == -1ULL)
++		txg = spa->spa_config_txg;
++
++	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++
++	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
++	    spa_version(spa)) == 0);
++	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
++	    spa_name(spa)) == 0);
++	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
++	    spa_state(spa)) == 0);
++	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
++	    txg) == 0);
++	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
++	    spa_guid(spa)) == 0);
++	VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
++	    ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);
++
++
++#ifdef	_KERNEL
++	hostid = zone_get_hostid(NULL);
++#else	/* _KERNEL */
++	/*
++	 * We're emulating the system's hostid in userland, so we can't use
++	 * zone_get_hostid().
++	 */
++	(void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
++#endif	/* _KERNEL */
++	if (hostid != 0) {
++		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
++		    hostid) == 0);
++	}
++	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
++	    utsname.nodename) == 0);
++
++	if (vd != rvd) {
++		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
++		    vd->vdev_top->vdev_guid) == 0);
++		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
++		    vd->vdev_guid) == 0);
++		if (vd->vdev_isspare)
++			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
++			    1ULL) == 0);
++		if (vd->vdev_islog)
++			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
++			    1ULL) == 0);
++		vd = vd->vdev_top;		/* label contains top config */
++	} else {
++		/*
++		 * Only add the (potentially large) split information
++		 * in the mos config, and not in the vdev labels
++		 */
++		if (spa->spa_config_splitting != NULL)
++			VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
++			    spa->spa_config_splitting) == 0);
++	}
++
++	/*
++	 * Add the top-level config.  We even add this on pools which
++	 * don't support holes in the namespace.
++	 */
++	vdev_top_config_generate(spa, config);
++
++	/*
++	 * If we're splitting, record the original pool's guid.
++	 */
++	if (spa->spa_config_splitting != NULL &&
++	    nvlist_lookup_uint64(spa->spa_config_splitting,
++	    ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) {
++		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID,
++		    split_guid) == 0);
++	}
++
++	nvroot = vdev_config_generate(spa, vd, getstats, 0);
++	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
++	nvlist_free(nvroot);
++
++	if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
++		ddt_histogram_t *ddh;
++		ddt_stat_t *dds;
++		ddt_object_t *ddo;
++
++		ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_PUSHPAGE);
++		ddt_get_dedup_histogram(spa, ddh);
++		VERIFY(nvlist_add_uint64_array(config,
++		    ZPOOL_CONFIG_DDT_HISTOGRAM,
++		    (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0);
++		kmem_free(ddh, sizeof (ddt_histogram_t));
++
++		ddo = kmem_zalloc(sizeof (ddt_object_t), KM_PUSHPAGE);
++		ddt_get_dedup_object_stats(spa, ddo);
++		VERIFY(nvlist_add_uint64_array(config,
++		    ZPOOL_CONFIG_DDT_OBJ_STATS,
++		    (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0);
++		kmem_free(ddo, sizeof (ddt_object_t));
++
++		dds = kmem_zalloc(sizeof (ddt_stat_t), KM_PUSHPAGE);
++		ddt_get_dedup_stats(spa, dds);
++		VERIFY(nvlist_add_uint64_array(config,
++		    ZPOOL_CONFIG_DDT_STATS,
++		    (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0);
++		kmem_free(dds, sizeof (ddt_stat_t));
++	}
++
++	if (locked)
++		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
++
++	return (config);
++}
++
++/*
++ * Update all disk labels, generate a fresh config based on the current
++ * in-core state, and sync the global config cache (do not sync the config
++ * cache if this is a booting rootpool).
++ */
++void
++spa_config_update(spa_t *spa, int what)
++{
++	vdev_t *rvd = spa->spa_root_vdev;
++	uint64_t txg;
++	int c;
++
++	ASSERT(MUTEX_HELD(&spa_namespace_lock));
++
++	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++	txg = spa_last_synced_txg(spa) + 1;
++	if (what == SPA_CONFIG_UPDATE_POOL) {
++		vdev_config_dirty(rvd);
++	} else {
++		/*
++		 * If we have top-level vdevs that were added but have
++		 * not yet been prepared for allocation, do that now.
++		 * (It's safe now because the config cache is up to date,
++		 * so it will be able to translate the new DVAs.)
++		 * See comments in spa_vdev_add() for full details.
++		 */
++		for (c = 0; c < rvd->vdev_children; c++) {
++			vdev_t *tvd = rvd->vdev_child[c];
++			if (tvd->vdev_ms_array == 0)
++				vdev_metaslab_set_size(tvd);
++			vdev_expand(tvd, txg);
++		}
++	}
++	spa_config_exit(spa, SCL_ALL, FTAG);
++
++	/*
++	 * Wait for the mosconfig to be regenerated and synced.
++	 */
++	txg_wait_synced(spa->spa_dsl_pool, txg);
++
++	/*
++	 * Update the global config cache to reflect the new mosconfig.
++	 */
++	if (!spa->spa_is_root)
++		spa_config_sync(spa, B_FALSE, what != SPA_CONFIG_UPDATE_POOL);
++
++	if (what == SPA_CONFIG_UPDATE_POOL)
++		spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(spa_config_sync);
++EXPORT_SYMBOL(spa_config_load);
++EXPORT_SYMBOL(spa_all_configs);
++EXPORT_SYMBOL(spa_config_set);
++EXPORT_SYMBOL(spa_config_generate);
++EXPORT_SYMBOL(spa_config_update);
++
++module_param(spa_config_path, charp, 0444);
++MODULE_PARM_DESC(spa_config_path, "SPA config file (/etc/zfs/zpool.cache)");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/spa_errlog.c linux-3.2.33-go/fs/zfs/zfs/spa_errlog.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/spa_errlog.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/spa_errlog.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,414 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/*
++ * Routines to manage the on-disk persistent error log.
++ *
++ * Each pool stores a log of all logical data errors seen during normal
++ * operation.  This is actually the union of two distinct logs: the last log,
++ * and the current log.  All errors seen are logged to the current log.  When a
++ * scrub completes, the current log becomes the last log, the last log is thrown
++ * out, and the current log is reinitialized.  This way, if an error is somehow
++ * corrected, a new scrub will show that that it no longer exists, and will be
++ * deleted from the log when the scrub completes.
++ *
++ * The log is stored using a ZAP object whose key is a string form of the
++ * zbookmark tuple (objset, object, level, blkid), and whose contents is an
++ * optional 'objset:object' human-readable string describing the data.  When an
++ * error is first logged, this string will be empty, indicating that no name is
++ * known.  This prevents us from having to issue a potentially large amount of
++ * I/O to discover the object name during an error path.  Instead, we do the
++ * calculation when the data is requested, storing the result so future queries
++ * will be faster.
++ *
++ * This log is then shipped into an nvlist where the key is the dataset name and
++ * the value is the object name.  Userland is then responsible for uniquifying
++ * this list and displaying it to the user.
++ */
++
++#include <sys/dmu_tx.h>
++#include <sys/spa.h>
++#include <sys/spa_impl.h>
++#include <sys/zap.h>
++#include <sys/zio.h>
++
++
++/*
++ * Convert a bookmark to a string.
++ */
++static void
++bookmark_to_name(zbookmark_t *zb, char *buf, size_t len)
++{
++	(void) snprintf(buf, len, "%llx:%llx:%llx:%llx",
++	    (u_longlong_t)zb->zb_objset, (u_longlong_t)zb->zb_object,
++	    (u_longlong_t)zb->zb_level, (u_longlong_t)zb->zb_blkid);
++}
++
++/*
++ * Convert a string to a bookmark
++ */
++#ifdef _KERNEL
++static void
++name_to_bookmark(char *buf, zbookmark_t *zb)
++{
++	zb->zb_objset = strtonum(buf, &buf);
++	ASSERT(*buf == ':');
++	zb->zb_object = strtonum(buf + 1, &buf);
++	ASSERT(*buf == ':');
++	zb->zb_level = (int)strtonum(buf + 1, &buf);
++	ASSERT(*buf == ':');
++	zb->zb_blkid = strtonum(buf + 1, &buf);
++	ASSERT(*buf == '\0');
++}
++#endif
++
++/*
++ * Log an uncorrectable error to the persistent error log.  We add it to the
++ * spa's list of pending errors.  The changes are actually synced out to disk
++ * during spa_errlog_sync().
++ */
++void
++spa_log_error(spa_t *spa, zio_t *zio)
++{
++	zbookmark_t *zb = &zio->io_logical->io_bookmark;
++	spa_error_entry_t search;
++	spa_error_entry_t *new;
++	avl_tree_t *tree;
++	avl_index_t where;
++
++	/*
++	 * If we are trying to import a pool, ignore any errors, as we won't be
++	 * writing to the pool any time soon.
++	 */
++	if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT)
++		return;
++
++	mutex_enter(&spa->spa_errlist_lock);
++
++	/*
++	 * If we have had a request to rotate the log, log it to the next list
++	 * instead of the current one.
++	 */
++	if (spa->spa_scrub_active || spa->spa_scrub_finished)
++		tree = &spa->spa_errlist_scrub;
++	else
++		tree = &spa->spa_errlist_last;
++
++	search.se_bookmark = *zb;
++	if (avl_find(tree, &search, &where) != NULL) {
++		mutex_exit(&spa->spa_errlist_lock);
++		return;
++	}
++
++	new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP);
++	new->se_bookmark = *zb;
++	avl_insert(tree, new, where);
++
++	mutex_exit(&spa->spa_errlist_lock);
++}
++
++/*
++ * Return the number of errors currently in the error log.  This is actually the
++ * sum of both the last log and the current log, since we don't know the union
++ * of these logs until we reach userland.
++ */
++uint64_t
++spa_get_errlog_size(spa_t *spa)
++{
++	uint64_t total = 0, count;
++
++	mutex_enter(&spa->spa_errlog_lock);
++	if (spa->spa_errlog_scrub != 0 &&
++	    zap_count(spa->spa_meta_objset, spa->spa_errlog_scrub,
++	    &count) == 0)
++		total += count;
++
++	if (spa->spa_errlog_last != 0 && !spa->spa_scrub_finished &&
++	    zap_count(spa->spa_meta_objset, spa->spa_errlog_last,
++	    &count) == 0)
++		total += count;
++	mutex_exit(&spa->spa_errlog_lock);
++
++	mutex_enter(&spa->spa_errlist_lock);
++	total += avl_numnodes(&spa->spa_errlist_last);
++	total += avl_numnodes(&spa->spa_errlist_scrub);
++	mutex_exit(&spa->spa_errlist_lock);
++
++	return (total);
++}
++
++#ifdef _KERNEL
++static int
++process_error_log(spa_t *spa, uint64_t obj, void *addr, size_t *count)
++{
++	zap_cursor_t zc;
++	zap_attribute_t za;
++	zbookmark_t zb;
++
++	if (obj == 0)
++		return (0);
++
++	for (zap_cursor_init(&zc, spa->spa_meta_objset, obj);
++	    zap_cursor_retrieve(&zc, &za) == 0;
++	    zap_cursor_advance(&zc)) {
++
++		if (*count == 0) {
++			zap_cursor_fini(&zc);
++			return (ENOMEM);
++		}
++
++		name_to_bookmark(za.za_name, &zb);
++
++		if (copyout(&zb, (char *)addr +
++		    (*count - 1) * sizeof (zbookmark_t),
++		    sizeof (zbookmark_t)) != 0)
++			return (EFAULT);
++
++		*count -= 1;
++	}
++
++	zap_cursor_fini(&zc);
++
++	return (0);
++}
++
++static int
++process_error_list(avl_tree_t *list, void *addr, size_t *count)
++{
++	spa_error_entry_t *se;
++
++	for (se = avl_first(list); se != NULL; se = AVL_NEXT(list, se)) {
++
++		if (*count == 0)
++			return (ENOMEM);
++
++		if (copyout(&se->se_bookmark, (char *)addr +
++		    (*count - 1) * sizeof (zbookmark_t),
++		    sizeof (zbookmark_t)) != 0)
++			return (EFAULT);
++
++		*count -= 1;
++	}
++
++	return (0);
++}
++#endif
++
++/*
++ * Copy all known errors to userland as an array of bookmarks.  This is
++ * actually a union of the on-disk last log and current log, as well as any
++ * pending error requests.
++ *
++ * Because the act of reading the on-disk log could cause errors to be
++ * generated, we have two separate locks: one for the error log and one for the
++ * in-core error lists.  We only need the error list lock to log and error, so
++ * we grab the error log lock while we read the on-disk logs, and only pick up
++ * the error list lock when we are finished.
++ */
++int
++spa_get_errlog(spa_t *spa, void *uaddr, size_t *count)
++{
++	int ret = 0;
++
++#ifdef _KERNEL
++	mutex_enter(&spa->spa_errlog_lock);
++
++	ret = process_error_log(spa, spa->spa_errlog_scrub, uaddr, count);
++
++	if (!ret && !spa->spa_scrub_finished)
++		ret = process_error_log(spa, spa->spa_errlog_last, uaddr,
++		    count);
++
++	mutex_enter(&spa->spa_errlist_lock);
++	if (!ret)
++		ret = process_error_list(&spa->spa_errlist_scrub, uaddr,
++		    count);
++	if (!ret)
++		ret = process_error_list(&spa->spa_errlist_last, uaddr,
++		    count);
++	mutex_exit(&spa->spa_errlist_lock);
++
++	mutex_exit(&spa->spa_errlog_lock);
++#endif
++
++	return (ret);
++}
++
++/*
++ * Called when a scrub completes.  This simply set a bit which tells which AVL
++ * tree to add new errors.  spa_errlog_sync() is responsible for actually
++ * syncing the changes to the underlying objects.
++ */
++void
++spa_errlog_rotate(spa_t *spa)
++{
++	mutex_enter(&spa->spa_errlist_lock);
++	spa->spa_scrub_finished = B_TRUE;
++	mutex_exit(&spa->spa_errlist_lock);
++}
++
++/*
++ * Discard any pending errors from the spa_t.  Called when unloading a faulted
++ * pool, as the errors encountered during the open cannot be synced to disk.
++ */
++void
++spa_errlog_drain(spa_t *spa)
++{
++	spa_error_entry_t *se;
++	void *cookie;
++
++	mutex_enter(&spa->spa_errlist_lock);
++
++	cookie = NULL;
++	while ((se = avl_destroy_nodes(&spa->spa_errlist_last,
++	    &cookie)) != NULL)
++		kmem_free(se, sizeof (spa_error_entry_t));
++	cookie = NULL;
++	while ((se = avl_destroy_nodes(&spa->spa_errlist_scrub,
++	    &cookie)) != NULL)
++		kmem_free(se, sizeof (spa_error_entry_t));
++
++	mutex_exit(&spa->spa_errlist_lock);
++}
++
++/*
++ * Process a list of errors into the current on-disk log.
++ */
++static void
++sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx)
++{
++	spa_error_entry_t *se;
++	char buf[64];
++	void *cookie;
++
++	if (avl_numnodes(t) != 0) {
++		/* create log if necessary */
++		if (*obj == 0)
++			*obj = zap_create(spa->spa_meta_objset,
++			    DMU_OT_ERROR_LOG, DMU_OT_NONE,
++			    0, tx);
++
++		/* add errors to the current log */
++		for (se = avl_first(t); se != NULL; se = AVL_NEXT(t, se)) {
++			char *name = se->se_name ? se->se_name : "";
++
++			bookmark_to_name(&se->se_bookmark, buf, sizeof (buf));
++
++			(void) zap_update(spa->spa_meta_objset,
++			    *obj, buf, 1, strlen(name) + 1, name, tx);
++		}
++
++		/* purge the error list */
++		cookie = NULL;
++		while ((se = avl_destroy_nodes(t, &cookie)) != NULL)
++			kmem_free(se, sizeof (spa_error_entry_t));
++	}
++}
++
++/*
++ * Sync the error log out to disk.  This is a little tricky because the act of
++ * writing the error log requires the spa_errlist_lock.  So, we need to lock the
++ * error lists, take a copy of the lists, and then reinitialize them.  Then, we
++ * drop the error list lock and take the error log lock, at which point we
++ * do the errlog processing.  Then, if we encounter an I/O error during this
++ * process, we can successfully add the error to the list.  Note that this will
++ * result in the perpetual recycling of errors, but it is an unlikely situation
++ * and not a performance critical operation.
++ */
++void
++spa_errlog_sync(spa_t *spa, uint64_t txg)
++{
++	dmu_tx_t *tx;
++	avl_tree_t scrub, last;
++	int scrub_finished;
++
++	mutex_enter(&spa->spa_errlist_lock);
++
++	/*
++	 * Bail out early under normal circumstances.
++	 */
++	if (avl_numnodes(&spa->spa_errlist_scrub) == 0 &&
++	    avl_numnodes(&spa->spa_errlist_last) == 0 &&
++	    !spa->spa_scrub_finished) {
++		mutex_exit(&spa->spa_errlist_lock);
++		return;
++	}
++
++	spa_get_errlists(spa, &last, &scrub);
++	scrub_finished = spa->spa_scrub_finished;
++	spa->spa_scrub_finished = B_FALSE;
++
++	mutex_exit(&spa->spa_errlist_lock);
++	mutex_enter(&spa->spa_errlog_lock);
++
++	tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
++
++	/*
++	 * Sync out the current list of errors.
++	 */
++	sync_error_list(spa, &last, &spa->spa_errlog_last, tx);
++
++	/*
++	 * Rotate the log if necessary.
++	 */
++	if (scrub_finished) {
++		if (spa->spa_errlog_last != 0)
++			VERIFY(dmu_object_free(spa->spa_meta_objset,
++			    spa->spa_errlog_last, tx) == 0);
++		spa->spa_errlog_last = spa->spa_errlog_scrub;
++		spa->spa_errlog_scrub = 0;
++
++		sync_error_list(spa, &scrub, &spa->spa_errlog_last, tx);
++	}
++
++	/*
++	 * Sync out any pending scrub errors.
++	 */
++	sync_error_list(spa, &scrub, &spa->spa_errlog_scrub, tx);
++
++	/*
++	 * Update the MOS to reflect the new values.
++	 */
++	(void) zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
++	    DMU_POOL_ERRLOG_LAST, sizeof (uint64_t), 1,
++	    &spa->spa_errlog_last, tx);
++	(void) zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
++	    DMU_POOL_ERRLOG_SCRUB, sizeof (uint64_t), 1,
++	    &spa->spa_errlog_scrub, tx);
++
++	dmu_tx_commit(tx);
++
++	mutex_exit(&spa->spa_errlog_lock);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++/* error handling */
++EXPORT_SYMBOL(spa_log_error);
++EXPORT_SYMBOL(spa_get_errlog_size);
++EXPORT_SYMBOL(spa_get_errlog);
++EXPORT_SYMBOL(spa_errlog_rotate);
++EXPORT_SYMBOL(spa_errlog_drain);
++EXPORT_SYMBOL(spa_errlog_sync);
++EXPORT_SYMBOL(spa_get_errlists);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/spa_history.c linux-3.2.33-go/fs/zfs/zfs/spa_history.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/spa_history.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/spa_history.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,514 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ */
++
++#include <sys/spa.h>
++#include <sys/spa_impl.h>
++#include <sys/zap.h>
++#include <sys/dsl_synctask.h>
++#include <sys/dmu_tx.h>
++#include <sys/dmu_objset.h>
++#include <sys/utsname.h>
++#include <sys/cmn_err.h>
++#include <sys/sunddi.h>
++#include "zfs_comutil.h"
++#ifdef _KERNEL
++#include <sys/zone.h>
++#endif
++
++/*
++ * Routines to manage the on-disk history log.
++ *
++ * The history log is stored as a dmu object containing
++ * <packed record length, record nvlist> tuples.
++ *
++ * Where "record nvlist" is a nvlist containing uint64_ts and strings, and
++ * "packed record length" is the packed length of the "record nvlist" stored
++ * as a little endian uint64_t.
++ *
++ * The log is implemented as a ring buffer, though the original creation
++ * of the pool ('zpool create') is never overwritten.
++ *
++ * The history log is tracked as object 'spa_t::spa_history'.  The bonus buffer
++ * of 'spa_history' stores the offsets for logging/retrieving history as
++ * 'spa_history_phys_t'.  'sh_pool_create_len' is the ending offset in bytes of
++ * where the 'zpool create' record is stored.  This allows us to never
++ * overwrite the original creation of the pool.  'sh_phys_max_off' is the
++ * physical ending offset in bytes of the log.  This tells you the length of
++ * the buffer. 'sh_eof' is the logical EOF (in bytes).  Whenever a record
++ * is added, 'sh_eof' is incremented by the the size of the record.
++ * 'sh_eof' is never decremented.  'sh_bof' is the logical BOF (in bytes).
++ * This is where the consumer should start reading from after reading in
++ * the 'zpool create' portion of the log.
++ *
++ * 'sh_records_lost' keeps track of how many records have been overwritten
++ * and permanently lost.
++ */
++
++/* convert a logical offset to physical */
++static uint64_t
++spa_history_log_to_phys(uint64_t log_off, spa_history_phys_t *shpp)
++{
++	uint64_t phys_len;
++
++	phys_len = shpp->sh_phys_max_off - shpp->sh_pool_create_len;
++	return ((log_off - shpp->sh_pool_create_len) % phys_len
++	    + shpp->sh_pool_create_len);
++}
++
++void
++spa_history_create_obj(spa_t *spa, dmu_tx_t *tx)
++{
++	dmu_buf_t *dbp;
++	spa_history_phys_t *shpp;
++	objset_t *mos = spa->spa_meta_objset;
++
++	ASSERT(spa->spa_history == 0);
++	spa->spa_history = dmu_object_alloc(mos, DMU_OT_SPA_HISTORY,
++	    SPA_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS,
++	    sizeof (spa_history_phys_t), tx);
++
++	VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
++	    DMU_POOL_HISTORY, sizeof (uint64_t), 1,
++	    &spa->spa_history, tx) == 0);
++
++	VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
++	ASSERT(dbp->db_size >= sizeof (spa_history_phys_t));
++
++	shpp = dbp->db_data;
++	dmu_buf_will_dirty(dbp, tx);
++
++	/*
++	 * Figure out maximum size of history log.  We set it at
++	 * 0.1% of pool size, with a max of 1G and min of 128KB.
++	 */
++	shpp->sh_phys_max_off =
++	    metaslab_class_get_dspace(spa_normal_class(spa)) / 1000;
++	shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 1<<30);
++	shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10);
++
++	dmu_buf_rele(dbp, FTAG);
++}
++
++/*
++ * Change 'sh_bof' to the beginning of the next record.
++ */
++static int
++spa_history_advance_bof(spa_t *spa, spa_history_phys_t *shpp)
++{
++	objset_t *mos = spa->spa_meta_objset;
++	uint64_t firstread, reclen, phys_bof;
++	char buf[sizeof (reclen)];
++	int err;
++
++	phys_bof = spa_history_log_to_phys(shpp->sh_bof, shpp);
++	firstread = MIN(sizeof (reclen), shpp->sh_phys_max_off - phys_bof);
++
++	if ((err = dmu_read(mos, spa->spa_history, phys_bof, firstread,
++	    buf, DMU_READ_PREFETCH)) != 0)
++		return (err);
++	if (firstread != sizeof (reclen)) {
++		if ((err = dmu_read(mos, spa->spa_history,
++		    shpp->sh_pool_create_len, sizeof (reclen) - firstread,
++		    buf + firstread, DMU_READ_PREFETCH)) != 0)
++			return (err);
++	}
++
++	reclen = LE_64(*((uint64_t *)buf));
++	shpp->sh_bof += reclen + sizeof (reclen);
++	shpp->sh_records_lost++;
++	return (0);
++}
++
++static int
++spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp,
++    dmu_tx_t *tx)
++{
++	uint64_t firstwrite, phys_eof;
++	objset_t *mos = spa->spa_meta_objset;
++	int err;
++
++	ASSERT(MUTEX_HELD(&spa->spa_history_lock));
++
++	/* see if we need to reset logical BOF */
++	while (shpp->sh_phys_max_off - shpp->sh_pool_create_len -
++	    (shpp->sh_eof - shpp->sh_bof) <= len) {
++		if ((err = spa_history_advance_bof(spa, shpp)) != 0) {
++			return (err);
++		}
++	}
++
++	phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);
++	firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof);
++	shpp->sh_eof += len;
++	dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx);
++
++	len -= firstwrite;
++	if (len > 0) {
++		/* write out the rest at the beginning of physical file */
++		dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len,
++		    len, (char *)buf + firstwrite, tx);
++	}
++
++	return (0);
++}
++
++static char *
++spa_history_zone(void)
++{
++#ifdef _KERNEL
++#ifdef HAVE_SPL
++	return ("linux");
++#else
++	return (curproc->p_zone->zone_name);
++#endif
++#else
++	return ("global");
++#endif
++}
++
++/*
++ * Write out a history event.
++ */
++/*ARGSUSED*/
++static void
++spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
++{
++	spa_t		*spa = arg1;
++	history_arg_t	*hap = arg2;
++	const char	*history_str = hap->ha_history_str;
++	objset_t	*mos = spa->spa_meta_objset;
++	dmu_buf_t	*dbp;
++	spa_history_phys_t *shpp;
++	size_t		reclen;
++	uint64_t	le_len;
++	nvlist_t	*nvrecord;
++	char		*record_packed = NULL;
++	int		ret;
++
++	/*
++	 * If we have an older pool that doesn't have a command
++	 * history object, create it now.
++	 */
++	mutex_enter(&spa->spa_history_lock);
++	if (!spa->spa_history)
++		spa_history_create_obj(spa, tx);
++	mutex_exit(&spa->spa_history_lock);
++
++	/*
++	 * Get the offset of where we need to write via the bonus buffer.
++	 * Update the offset when the write completes.
++	 */
++	VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
++	shpp = dbp->db_data;
++
++	dmu_buf_will_dirty(dbp, tx);
++
++#ifdef ZFS_DEBUG
++	{
++		dmu_object_info_t doi;
++		dmu_object_info_from_db(dbp, &doi);
++		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
++	}
++#endif
++
++	VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME,
++	    gethrestime_sec()) == 0);
++	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0);
++	if (hap->ha_zone != NULL)
++		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE,
++		    hap->ha_zone) == 0);
++#ifdef _KERNEL
++	VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST,
++	    utsname.nodename) == 0);
++#endif
++	if (hap->ha_log_type == LOG_CMD_POOL_CREATE ||
++	    hap->ha_log_type == LOG_CMD_NORMAL) {
++		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD,
++		    history_str) == 0);
++
++		zfs_dbgmsg("command: %s", history_str);
++	} else {
++		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT,
++		    hap->ha_event) == 0);
++		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG,
++		    tx->tx_txg) == 0);
++		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR,
++		    history_str) == 0);
++
++		zfs_dbgmsg("internal %s pool:%s txg:%llu %s",
++		    zfs_history_event_names[hap->ha_event], spa_name(spa),
++		    (longlong_t)tx->tx_txg, history_str);
++
++	}
++
++	VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0);
++	record_packed = kmem_alloc(reclen, KM_PUSHPAGE);
++
++	VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen,
++	    NV_ENCODE_XDR, KM_PUSHPAGE) == 0);
++
++	mutex_enter(&spa->spa_history_lock);
++	if (hap->ha_log_type == LOG_CMD_POOL_CREATE)
++		VERIFY(shpp->sh_eof == shpp->sh_pool_create_len);
++
++	/* write out the packed length as little endian */
++	le_len = LE_64((uint64_t)reclen);
++	ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx);
++	if (!ret)
++		ret = spa_history_write(spa, record_packed, reclen, shpp, tx);
++
++	if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) {
++		shpp->sh_pool_create_len += sizeof (le_len) + reclen;
++		shpp->sh_bof = shpp->sh_pool_create_len;
++	}
++
++	mutex_exit(&spa->spa_history_lock);
++	nvlist_free(nvrecord);
++	kmem_free(record_packed, reclen);
++	dmu_buf_rele(dbp, FTAG);
++
++	strfree(hap->ha_history_str);
++	if (hap->ha_zone != NULL)
++		strfree(hap->ha_zone);
++	kmem_free(hap, sizeof (history_arg_t));
++}
++
++/*
++ * Write out a history event.
++ */
++int
++spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what)
++{
++	history_arg_t *ha;
++	int err = 0;
++	dmu_tx_t *tx;
++
++	ASSERT(what != LOG_INTERNAL);
++
++	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
++	err = dmu_tx_assign(tx, TXG_WAIT);
++	if (err) {
++		dmu_tx_abort(tx);
++		return (err);
++	}
++
++	ha = kmem_alloc(sizeof (history_arg_t), KM_PUSHPAGE);
++	ha->ha_history_str = strdup(history_str);
++	ha->ha_zone = strdup(spa_history_zone());
++	ha->ha_log_type = what;
++	ha->ha_uid = crgetuid(CRED());
++
++	/* Kick this off asynchronously; errors are ignored. */
++	dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
++	    spa_history_log_sync, spa, ha, 0, tx);
++	dmu_tx_commit(tx);
++
++	/* spa_history_log_sync will free ha and strings */
++	return (err);
++}
++
++/*
++ * Read out the command history.
++ */
++int
++spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)
++{
++	objset_t *mos = spa->spa_meta_objset;
++	dmu_buf_t *dbp;
++	uint64_t read_len, phys_read_off, phys_eof;
++	uint64_t leftover = 0;
++	spa_history_phys_t *shpp;
++	int err;
++
++	/*
++	 * If the command history  doesn't exist (older pool),
++	 * that's ok, just return ENOENT.
++	 */
++	if (!spa->spa_history)
++		return (ENOENT);
++
++	/*
++	 * The history is logged asynchronously, so when they request
++	 * the first chunk of history, make sure everything has been
++	 * synced to disk so that we get it.
++	 */
++	if (*offp == 0 && spa_writeable(spa))
++		txg_wait_synced(spa_get_dsl(spa), 0);
++
++	if ((err = dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)) != 0)
++		return (err);
++	shpp = dbp->db_data;
++
++#ifdef ZFS_DEBUG
++	{
++		dmu_object_info_t doi;
++		dmu_object_info_from_db(dbp, &doi);
++		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
++	}
++#endif
++
++	mutex_enter(&spa->spa_history_lock);
++	phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);
++
++	if (*offp < shpp->sh_pool_create_len) {
++		/* read in just the zpool create history */
++		phys_read_off = *offp;
++		read_len = MIN(*len, shpp->sh_pool_create_len -
++		    phys_read_off);
++	} else {
++		/*
++		 * Need to reset passed in offset to BOF if the passed in
++		 * offset has since been overwritten.
++		 */
++		*offp = MAX(*offp, shpp->sh_bof);
++		phys_read_off = spa_history_log_to_phys(*offp, shpp);
++
++		/*
++		 * Read up to the minimum of what the user passed down or
++		 * the EOF (physical or logical).  If we hit physical EOF,
++		 * use 'leftover' to read from the physical BOF.
++		 */
++		if (phys_read_off <= phys_eof) {
++			read_len = MIN(*len, phys_eof - phys_read_off);
++		} else {
++			read_len = MIN(*len,
++			    shpp->sh_phys_max_off - phys_read_off);
++			if (phys_read_off + *len > shpp->sh_phys_max_off) {
++				leftover = MIN(*len - read_len,
++				    phys_eof - shpp->sh_pool_create_len);
++			}
++		}
++	}
++
++	/* offset for consumer to use next */
++	*offp += read_len + leftover;
++
++	/* tell the consumer how much you actually read */
++	*len = read_len + leftover;
++
++	if (read_len == 0) {
++		mutex_exit(&spa->spa_history_lock);
++		dmu_buf_rele(dbp, FTAG);
++		return (0);
++	}
++
++	err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf,
++	    DMU_READ_PREFETCH);
++	if (leftover && err == 0) {
++		err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len,
++		    leftover, buf + read_len, DMU_READ_PREFETCH);
++	}
++	mutex_exit(&spa->spa_history_lock);
++
++	dmu_buf_rele(dbp, FTAG);
++	return (err);
++}
++
++static void
++log_internal(history_internal_events_t event, spa_t *spa,
++    dmu_tx_t *tx, const char *fmt, va_list adx)
++{
++	history_arg_t *ha;
++	va_list adx_copy;
++
++	/*
++	 * If this is part of creating a pool, not everything is
++	 * initialized yet, so don't bother logging the internal events.
++	 */
++	if (tx->tx_txg == TXG_INITIAL)
++		return;
++
++	ha = kmem_alloc(sizeof (history_arg_t), KM_PUSHPAGE);
++	va_copy(adx_copy, adx);
++	ha->ha_history_str = kmem_vasprintf(fmt, adx_copy);
++	va_end(adx_copy);
++	ha->ha_log_type = LOG_INTERNAL;
++	ha->ha_event = event;
++	ha->ha_zone = NULL;
++	ha->ha_uid = 0;
++
++	if (dmu_tx_is_syncing(tx)) {
++		spa_history_log_sync(spa, ha, tx);
++	} else {
++		dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
++		    spa_history_log_sync, spa, ha, 0, tx);
++	}
++	/* spa_history_log_sync() will free ha and strings */
++}
++
++void
++spa_history_log_internal(history_internal_events_t event, spa_t *spa,
++    dmu_tx_t *tx, const char *fmt, ...)
++{
++	dmu_tx_t *htx = tx;
++	va_list adx;
++
++	/* create a tx if we didn't get one */
++	if (tx == NULL) {
++		htx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
++		if (dmu_tx_assign(htx, TXG_WAIT) != 0) {
++			dmu_tx_abort(htx);
++			return;
++		}
++	}
++
++	va_start(adx, fmt);
++	log_internal(event, spa, htx, fmt, adx);
++	va_end(adx);
++
++	/* if we didn't get a tx from the caller, commit the one we made */
++	if (tx == NULL)
++		dmu_tx_commit(htx);
++}
++
++void
++spa_history_log_version(spa_t *spa, history_internal_events_t event)
++{
++#ifdef _KERNEL
++	uint64_t current_vers = spa_version(spa);
++
++	if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) {
++		spa_history_log_internal(event, spa, NULL,
++		    "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s",
++		    (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
++		    utsname.nodename, utsname.release, utsname.version,
++		    utsname.machine);
++	}
++	cmn_err(CE_CONT, "!%s version %llu pool %s using %llu",
++	    event == LOG_POOL_IMPORT ? "imported" :
++	    event == LOG_POOL_CREATE ? "created" : "accessed",
++	    (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
++#endif
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(spa_history_create_obj);
++EXPORT_SYMBOL(spa_history_get);
++EXPORT_SYMBOL(spa_history_log);
++EXPORT_SYMBOL(spa_history_log_internal);
++EXPORT_SYMBOL(spa_history_log_version);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/spa_misc.c linux-3.2.33-go/fs/zfs/zfs/spa_misc.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/spa_misc.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/spa_misc.c	2012-11-16 23:25:34.352039300 +0100
+@@ -0,0 +1,1755 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/spa_impl.h>
++#include <sys/zio.h>
++#include <sys/zio_checksum.h>
++#include <sys/zio_compress.h>
++#include <sys/dmu.h>
++#include <sys/dmu_tx.h>
++#include <sys/zap.h>
++#include <sys/zil.h>
++#include <sys/vdev_impl.h>
++#include <sys/metaslab.h>
++#include <sys/uberblock_impl.h>
++#include <sys/txg.h>
++#include <sys/avl.h>
++#include <sys/unique.h>
++#include <sys/dsl_pool.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_prop.h>
++#include <sys/fm/util.h>
++#include <sys/dsl_scan.h>
++#include <sys/fs/zfs.h>
++#include <sys/metaslab_impl.h>
++#include <sys/arc.h>
++#include <sys/ddt.h>
++#include "zfs_prop.h"
++
++/*
++ * SPA locking
++ *
++ * There are four basic locks for managing spa_t structures:
++ *
++ * spa_namespace_lock (global mutex)
++ *
++ *	This lock must be acquired to do any of the following:
++ *
++ *		- Lookup a spa_t by name
++ *		- Add or remove a spa_t from the namespace
++ *		- Increase spa_refcount from non-zero
++ *		- Check if spa_refcount is zero
++ *		- Rename a spa_t
++ *		- add/remove/attach/detach devices
++ *		- Held for the duration of create/destroy/import/export
++ *
++ *	It does not need to handle recursion.  A create or destroy may
++ *	reference objects (files or zvols) in other pools, but by
++ *	definition they must have an existing reference, and will never need
++ *	to lookup a spa_t by name.
++ *
++ * spa_refcount (per-spa refcount_t protected by mutex)
++ *
++ *	This reference count keep track of any active users of the spa_t.  The
++ *	spa_t cannot be destroyed or freed while this is non-zero.  Internally,
++ *	the refcount is never really 'zero' - opening a pool implicitly keeps
++ *	some references in the DMU.  Internally we check against spa_minref, but
++ *	present the image of a zero/non-zero value to consumers.
++ *
++ * spa_config_lock[] (per-spa array of rwlocks)
++ *
++ *	This protects the spa_t from config changes, and must be held in
++ *	the following circumstances:
++ *
++ *		- RW_READER to perform I/O to the spa
++ *		- RW_WRITER to change the vdev config
++ *
++ * The locking order is fairly straightforward:
++ *
++ *		spa_namespace_lock	->	spa_refcount
++ *
++ *	The namespace lock must be acquired to increase the refcount from 0
++ *	or to check if it is zero.
++ *
++ *		spa_refcount		->	spa_config_lock[]
++ *
++ *	There must be at least one valid reference on the spa_t to acquire
++ *	the config lock.
++ *
++ *		spa_namespace_lock	->	spa_config_lock[]
++ *
++ *	The namespace lock must always be taken before the config lock.
++ *
++ *
++ * The spa_namespace_lock can be acquired directly and is globally visible.
++ *
++ * The namespace is manipulated using the following functions, all of which
++ * require the spa_namespace_lock to be held.
++ *
++ *	spa_lookup()		Lookup a spa_t by name.
++ *
++ *	spa_add()		Create a new spa_t in the namespace.
++ *
++ *	spa_remove()		Remove a spa_t from the namespace.  This also
++ *				frees up any memory associated with the spa_t.
++ *
++ *	spa_next()		Returns the next spa_t in the system, or the
++ *				first if NULL is passed.
++ *
++ *	spa_evict_all()		Shutdown and remove all spa_t structures in
++ *				the system.
++ *
++ *	spa_guid_exists()	Determine whether a pool/device guid exists.
++ *
++ * The spa_refcount is manipulated using the following functions:
++ *
++ *	spa_open_ref()		Adds a reference to the given spa_t.  Must be
++ *				called with spa_namespace_lock held if the
++ *				refcount is currently zero.
++ *
++ *	spa_close()		Remove a reference from the spa_t.  This will
++ *				not free the spa_t or remove it from the
++ *				namespace.  No locking is required.
++ *
++ *	spa_refcount_zero()	Returns true if the refcount is currently
++ *				zero.  Must be called with spa_namespace_lock
++ *				held.
++ *
++ * The spa_config_lock[] is an array of rwlocks, ordered as follows:
++ * SCL_CONFIG > SCL_STATE > SCL_ALLOC > SCL_ZIO > SCL_FREE > SCL_VDEV.
++ * spa_config_lock[] is manipulated with spa_config_{enter,exit,held}().
++ *
++ * To read the configuration, it suffices to hold one of these locks as reader.
++ * To modify the configuration, you must hold all locks as writer.  To modify
++ * vdev state without altering the vdev tree's topology (e.g. online/offline),
++ * you must hold SCL_STATE and SCL_ZIO as writer.
++ *
++ * We use these distinct config locks to avoid recursive lock entry.
++ * For example, spa_sync() (which holds SCL_CONFIG as reader) induces
++ * block allocations (SCL_ALLOC), which may require reading space maps
++ * from disk (dmu_read() -> zio_read() -> SCL_ZIO).
++ *
++ * The spa config locks cannot be normal rwlocks because we need the
++ * ability to hand off ownership.  For example, SCL_ZIO is acquired
++ * by the issuing thread and later released by an interrupt thread.
++ * They do, however, obey the usual write-wanted semantics to prevent
++ * writer (i.e. system administrator) starvation.
++ *
++ * The lock acquisition rules are as follows:
++ *
++ * SCL_CONFIG
++ *	Protects changes to the vdev tree topology, such as vdev
++ *	add/remove/attach/detach.  Protects the dirty config list
++ *	(spa_config_dirty_list) and the set of spares and l2arc devices.
++ *
++ * SCL_STATE
++ *	Protects changes to pool state and vdev state, such as vdev
++ *	online/offline/fault/degrade/clear.  Protects the dirty state list
++ *	(spa_state_dirty_list) and global pool state (spa_state).
++ *
++ * SCL_ALLOC
++ *	Protects changes to metaslab groups and classes.
++ *	Held as reader by metaslab_alloc() and metaslab_claim().
++ *
++ * SCL_ZIO
++ *	Held by bp-level zios (those which have no io_vd upon entry)
++ *	to prevent changes to the vdev tree.  The bp-level zio implicitly
++ *	protects all of its vdev child zios, which do not hold SCL_ZIO.
++ *
++ * SCL_FREE
++ *	Protects changes to metaslab groups and classes.
++ *	Held as reader by metaslab_free().  SCL_FREE is distinct from
++ *	SCL_ALLOC, and lower than SCL_ZIO, so that we can safely free
++ *	blocks in zio_done() while another i/o that holds either
++ *	SCL_ALLOC or SCL_ZIO is waiting for this i/o to complete.
++ *
++ * SCL_VDEV
++ *	Held as reader to prevent changes to the vdev tree during trivial
++ *	inquiries such as bp_get_dsize().  SCL_VDEV is distinct from the
++ *	other locks, and lower than all of them, to ensure that it's safe
++ *	to acquire regardless of caller context.
++ *
++ * In addition, the following rules apply:
++ *
++ * (a)	spa_props_lock protects pool properties, spa_config and spa_config_list.
++ *	The lock ordering is SCL_CONFIG > spa_props_lock.
++ *
++ * (b)	I/O operations on leaf vdevs.  For any zio operation that takes
++ *	an explicit vdev_t argument -- such as zio_ioctl(), zio_read_phys(),
++ *	or zio_write_phys() -- the caller must ensure that the config cannot
++ *	cannot change in the interim, and that the vdev cannot be reopened.
++ *	SCL_STATE as reader suffices for both.
++ *
++ * The vdev configuration is protected by spa_vdev_enter() / spa_vdev_exit().
++ *
++ *	spa_vdev_enter()	Acquire the namespace lock and the config lock
++ *				for writing.
++ *
++ *	spa_vdev_exit()		Release the config lock, wait for all I/O
++ *				to complete, sync the updated configs to the
++ *				cache, and release the namespace lock.
++ *
++ * vdev state is protected by spa_vdev_state_enter() / spa_vdev_state_exit().
++ * Like spa_vdev_enter/exit, these are convenience wrappers -- the actual
++ * locking is, always, based on spa_namespace_lock and spa_config_lock[].
++ *
++ * spa_rename() is also implemented within this file since is requires
++ * manipulation of the namespace.
++ */
++
++static avl_tree_t spa_namespace_avl;
++kmutex_t spa_namespace_lock;
++static kcondvar_t spa_namespace_cv;
++static int spa_active_count;
++int spa_max_replication_override = SPA_DVAS_PER_BP;
++
++static kmutex_t spa_spare_lock;
++static avl_tree_t spa_spare_avl;
++static kmutex_t spa_l2cache_lock;
++static avl_tree_t spa_l2cache_avl;
++
++kmem_cache_t *spa_buffer_pool;
++int spa_mode_global;
++
++/*
++ * ==========================================================================
++ * SPA config locking
++ * ==========================================================================
++ */
++static void
++spa_config_lock_init(spa_t *spa)
++{
++	int i;
++
++	for (i = 0; i < SCL_LOCKS; i++) {
++		spa_config_lock_t *scl = &spa->spa_config_lock[i];
++		mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
++		cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
++		refcount_create(&scl->scl_count);
++		scl->scl_writer = NULL;
++		scl->scl_write_wanted = 0;
++	}
++}
++
++static void
++spa_config_lock_destroy(spa_t *spa)
++{
++	int i;
++
++	for (i = 0; i < SCL_LOCKS; i++) {
++		spa_config_lock_t *scl = &spa->spa_config_lock[i];
++		mutex_destroy(&scl->scl_lock);
++		cv_destroy(&scl->scl_cv);
++		refcount_destroy(&scl->scl_count);
++		ASSERT(scl->scl_writer == NULL);
++		ASSERT(scl->scl_write_wanted == 0);
++	}
++}
++
++int
++spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
++{
++	int i;
++
++	for (i = 0; i < SCL_LOCKS; i++) {
++		spa_config_lock_t *scl = &spa->spa_config_lock[i];
++		if (!(locks & (1 << i)))
++			continue;
++		mutex_enter(&scl->scl_lock);
++		if (rw == RW_READER) {
++			if (scl->scl_writer || scl->scl_write_wanted) {
++				mutex_exit(&scl->scl_lock);
++				spa_config_exit(spa, locks ^ (1 << i), tag);
++				return (0);
++			}
++		} else {
++			ASSERT(scl->scl_writer != curthread);
++			if (!refcount_is_zero(&scl->scl_count)) {
++				mutex_exit(&scl->scl_lock);
++				spa_config_exit(spa, locks ^ (1 << i), tag);
++				return (0);
++			}
++			scl->scl_writer = curthread;
++		}
++		(void) refcount_add(&scl->scl_count, tag);
++		mutex_exit(&scl->scl_lock);
++	}
++	return (1);
++}
++
++void
++spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw)
++{
++	int wlocks_held = 0;
++	int i;
++
++	for (i = 0; i < SCL_LOCKS; i++) {
++		spa_config_lock_t *scl = &spa->spa_config_lock[i];
++		if (scl->scl_writer == curthread)
++			wlocks_held |= (1 << i);
++		if (!(locks & (1 << i)))
++			continue;
++		mutex_enter(&scl->scl_lock);
++		if (rw == RW_READER) {
++			while (scl->scl_writer || scl->scl_write_wanted) {
++				cv_wait(&scl->scl_cv, &scl->scl_lock);
++			}
++		} else {
++			ASSERT(scl->scl_writer != curthread);
++			while (!refcount_is_zero(&scl->scl_count)) {
++				scl->scl_write_wanted++;
++				cv_wait(&scl->scl_cv, &scl->scl_lock);
++				scl->scl_write_wanted--;
++			}
++			scl->scl_writer = curthread;
++		}
++		(void) refcount_add(&scl->scl_count, tag);
++		mutex_exit(&scl->scl_lock);
++	}
++	ASSERT(wlocks_held <= locks);
++}
++
++void
++spa_config_exit(spa_t *spa, int locks, void *tag)
++{
++	int i;
++
++	for (i = SCL_LOCKS - 1; i >= 0; i--) {
++		spa_config_lock_t *scl = &spa->spa_config_lock[i];
++		if (!(locks & (1 << i)))
++			continue;
++		mutex_enter(&scl->scl_lock);
++		ASSERT(!refcount_is_zero(&scl->scl_count));
++		if (refcount_remove(&scl->scl_count, tag) == 0) {
++			ASSERT(scl->scl_writer == NULL ||
++			    scl->scl_writer == curthread);
++			scl->scl_writer = NULL;	/* OK in either case */
++			cv_broadcast(&scl->scl_cv);
++		}
++		mutex_exit(&scl->scl_lock);
++	}
++}
++
++int
++spa_config_held(spa_t *spa, int locks, krw_t rw)
++{
++	int i, locks_held = 0;
++
++	for (i = 0; i < SCL_LOCKS; i++) {
++		spa_config_lock_t *scl = &spa->spa_config_lock[i];
++		if (!(locks & (1 << i)))
++			continue;
++		if ((rw == RW_READER && !refcount_is_zero(&scl->scl_count)) ||
++		    (rw == RW_WRITER && scl->scl_writer == curthread))
++			locks_held |= 1 << i;
++	}
++
++	return (locks_held);
++}
++
++/*
++ * ==========================================================================
++ * SPA namespace functions
++ * ==========================================================================
++ */
++
++/*
++ * Lookup the named spa_t in the AVL tree.  The spa_namespace_lock must be held.
++ * Returns NULL if no matching spa_t is found.
++ */
++spa_t *
++spa_lookup(const char *name)
++{
++	static spa_t search;	/* spa_t is large; don't allocate on stack */
++	spa_t *spa;
++	avl_index_t where;
++	char c = 0;
++	char *cp;
++
++	ASSERT(MUTEX_HELD(&spa_namespace_lock));
++
++	/*
++	 * If it's a full dataset name, figure out the pool name and
++	 * just use that.
++	 */
++	cp = strpbrk(name, "/@");
++	if (cp) {
++		c = *cp;
++		*cp = '\0';
++	}
++
++	(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
++	spa = avl_find(&spa_namespace_avl, &search, &where);
++
++	if (cp)
++		*cp = c;
++
++	return (spa);
++}
++
++/*
++ * Create an uninitialized spa_t with the given name.  Requires
++ * spa_namespace_lock.  The caller must ensure that the spa_t doesn't already
++ * exist by calling spa_lookup() first.
++ */
++spa_t *
++spa_add(const char *name, nvlist_t *config, const char *altroot)
++{
++	spa_t *spa;
++	spa_config_dirent_t *dp;
++	int t;
++
++	ASSERT(MUTEX_HELD(&spa_namespace_lock));
++
++	spa = kmem_zalloc(sizeof (spa_t), KM_PUSHPAGE | KM_NODEBUG);
++
++	mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
++
++	cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
++	cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
++	cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
++	cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
++
++	for (t = 0; t < TXG_SIZE; t++)
++		bplist_create(&spa->spa_free_bplist[t]);
++
++	(void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name));
++	spa->spa_state = POOL_STATE_UNINITIALIZED;
++	spa->spa_freeze_txg = UINT64_MAX;
++	spa->spa_final_txg = UINT64_MAX;
++	spa->spa_load_max_txg = UINT64_MAX;
++	spa->spa_proc = &p0;
++	spa->spa_proc_state = SPA_PROC_NONE;
++
++	refcount_create(&spa->spa_refcount);
++	spa_config_lock_init(spa);
++
++	avl_add(&spa_namespace_avl, spa);
++
++	/*
++	 * Set the alternate root, if there is one.
++	 */
++	if (altroot) {
++		spa->spa_root = spa_strdup(altroot);
++		spa_active_count++;
++	}
++
++	/*
++	 * Every pool starts with the default cachefile
++	 */
++	list_create(&spa->spa_config_list, sizeof (spa_config_dirent_t),
++	    offsetof(spa_config_dirent_t, scd_link));
++
++	dp = kmem_zalloc(sizeof (spa_config_dirent_t), KM_PUSHPAGE);
++	dp->scd_path = altroot ? NULL : spa_strdup(spa_config_path);
++	list_insert_head(&spa->spa_config_list, dp);
++
++	VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME,
++	    KM_PUSHPAGE) == 0);
++
++	if (config != NULL)
++		VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
++
++	return (spa);
++}
++
++/*
++ * Removes a spa_t from the namespace, freeing up any memory used.  Requires
++ * spa_namespace_lock.  This is called only after the spa_t has been closed and
++ * deactivated.
++ */
++void
++spa_remove(spa_t *spa)
++{
++	spa_config_dirent_t *dp;
++	int t;
++
++	ASSERT(MUTEX_HELD(&spa_namespace_lock));
++	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
++
++	nvlist_free(spa->spa_config_splitting);
++
++	avl_remove(&spa_namespace_avl, spa);
++	cv_broadcast(&spa_namespace_cv);
++
++	if (spa->spa_root) {
++		spa_strfree(spa->spa_root);
++		spa_active_count--;
++	}
++
++	while ((dp = list_head(&spa->spa_config_list)) != NULL) {
++		list_remove(&spa->spa_config_list, dp);
++		if (dp->scd_path != NULL)
++			spa_strfree(dp->scd_path);
++		kmem_free(dp, sizeof (spa_config_dirent_t));
++	}
++
++	list_destroy(&spa->spa_config_list);
++
++	nvlist_free(spa->spa_load_info);
++	spa_config_set(spa, NULL);
++
++	refcount_destroy(&spa->spa_refcount);
++
++	spa_config_lock_destroy(spa);
++
++	for (t = 0; t < TXG_SIZE; t++)
++		bplist_destroy(&spa->spa_free_bplist[t]);
++
++	cv_destroy(&spa->spa_async_cv);
++	cv_destroy(&spa->spa_proc_cv);
++	cv_destroy(&spa->spa_scrub_io_cv);
++	cv_destroy(&spa->spa_suspend_cv);
++
++	mutex_destroy(&spa->spa_async_lock);
++	mutex_destroy(&spa->spa_errlist_lock);
++	mutex_destroy(&spa->spa_errlog_lock);
++	mutex_destroy(&spa->spa_history_lock);
++	mutex_destroy(&spa->spa_proc_lock);
++	mutex_destroy(&spa->spa_props_lock);
++	mutex_destroy(&spa->spa_scrub_lock);
++	mutex_destroy(&spa->spa_suspend_lock);
++	mutex_destroy(&spa->spa_vdev_top_lock);
++
++	kmem_free(spa, sizeof (spa_t));
++}
++
++/*
++ * Given a pool, return the next pool in the namespace, or NULL if there is
++ * none.  If 'prev' is NULL, return the first pool.
++ */
++spa_t *
++spa_next(spa_t *prev)
++{
++	ASSERT(MUTEX_HELD(&spa_namespace_lock));
++
++	if (prev)
++		return (AVL_NEXT(&spa_namespace_avl, prev));
++	else
++		return (avl_first(&spa_namespace_avl));
++}
++
++/*
++ * ==========================================================================
++ * SPA refcount functions
++ * ==========================================================================
++ */
++
++/*
++ * Add a reference to the given spa_t.  Must have at least one reference, or
++ * have the namespace lock held.
++ */
++void
++spa_open_ref(spa_t *spa, void *tag)
++{
++	ASSERT(refcount_count(&spa->spa_refcount) >= spa->spa_minref ||
++	    MUTEX_HELD(&spa_namespace_lock));
++	(void) refcount_add(&spa->spa_refcount, tag);
++}
++
++/*
++ * Remove a reference to the given spa_t.  Must have at least one reference, or
++ * have the namespace lock held.
++ */
++void
++spa_close(spa_t *spa, void *tag)
++{
++	ASSERT(refcount_count(&spa->spa_refcount) > spa->spa_minref ||
++	    MUTEX_HELD(&spa_namespace_lock));
++	(void) refcount_remove(&spa->spa_refcount, tag);
++}
++
++/*
++ * Check to see if the spa refcount is zero.  Must be called with
++ * spa_namespace_lock held.  We really compare against spa_minref, which is the
++ * number of references acquired when opening a pool
++ */
++boolean_t
++spa_refcount_zero(spa_t *spa)
++{
++	ASSERT(MUTEX_HELD(&spa_namespace_lock));
++
++	return (refcount_count(&spa->spa_refcount) == spa->spa_minref);
++}
++
++/*
++ * ==========================================================================
++ * SPA spare and l2cache tracking
++ * ==========================================================================
++ */
++
++/*
++ * Hot spares and cache devices are tracked using the same code below,
++ * for 'auxiliary' devices.
++ */
++
++typedef struct spa_aux {
++	uint64_t	aux_guid;
++	uint64_t	aux_pool;
++	avl_node_t	aux_avl;
++	int		aux_count;
++} spa_aux_t;
++
++static int
++spa_aux_compare(const void *a, const void *b)
++{
++	const spa_aux_t *sa = a;
++	const spa_aux_t *sb = b;
++
++	if (sa->aux_guid < sb->aux_guid)
++		return (-1);
++	else if (sa->aux_guid > sb->aux_guid)
++		return (1);
++	else
++		return (0);
++}
++
++void
++spa_aux_add(vdev_t *vd, avl_tree_t *avl)
++{
++	avl_index_t where;
++	spa_aux_t search;
++	spa_aux_t *aux;
++
++	search.aux_guid = vd->vdev_guid;
++	if ((aux = avl_find(avl, &search, &where)) != NULL) {
++		aux->aux_count++;
++	} else {
++		aux = kmem_zalloc(sizeof (spa_aux_t), KM_PUSHPAGE);
++		aux->aux_guid = vd->vdev_guid;
++		aux->aux_count = 1;
++		avl_insert(avl, aux, where);
++	}
++}
++
++void
++spa_aux_remove(vdev_t *vd, avl_tree_t *avl)
++{
++	spa_aux_t search;
++	spa_aux_t *aux;
++	avl_index_t where;
++
++	search.aux_guid = vd->vdev_guid;
++	aux = avl_find(avl, &search, &where);
++
++	ASSERT(aux != NULL);
++
++	if (--aux->aux_count == 0) {
++		avl_remove(avl, aux);
++		kmem_free(aux, sizeof (spa_aux_t));
++	} else if (aux->aux_pool == spa_guid(vd->vdev_spa)) {
++		aux->aux_pool = 0ULL;
++	}
++}
++
++boolean_t
++spa_aux_exists(uint64_t guid, uint64_t *pool, int *refcnt, avl_tree_t *avl)
++{
++	spa_aux_t search, *found;
++
++	search.aux_guid = guid;
++	found = avl_find(avl, &search, NULL);
++
++	if (pool) {
++		if (found)
++			*pool = found->aux_pool;
++		else
++			*pool = 0ULL;
++	}
++
++	if (refcnt) {
++		if (found)
++			*refcnt = found->aux_count;
++		else
++			*refcnt = 0;
++	}
++
++	return (found != NULL);
++}
++
++void
++spa_aux_activate(vdev_t *vd, avl_tree_t *avl)
++{
++	spa_aux_t search, *found;
++	avl_index_t where;
++
++	search.aux_guid = vd->vdev_guid;
++	found = avl_find(avl, &search, &where);
++	ASSERT(found != NULL);
++	ASSERT(found->aux_pool == 0ULL);
++
++	found->aux_pool = spa_guid(vd->vdev_spa);
++}
++
++/*
++ * Spares are tracked globally due to the following constraints:
++ *
++ * 	- A spare may be part of multiple pools.
++ * 	- A spare may be added to a pool even if it's actively in use within
++ *	  another pool.
++ * 	- A spare in use in any pool can only be the source of a replacement if
++ *	  the target is a spare in the same pool.
++ *
++ * We keep track of all spares on the system through the use of a reference
++ * counted AVL tree.  When a vdev is added as a spare, or used as a replacement
++ * spare, then we bump the reference count in the AVL tree.  In addition, we set
++ * the 'vdev_isspare' member to indicate that the device is a spare (active or
++ * inactive).  When a spare is made active (used to replace a device in the
++ * pool), we also keep track of which pool its been made a part of.
++ *
++ * The 'spa_spare_lock' protects the AVL tree.  These functions are normally
++ * called under the spa_namespace lock as part of vdev reconfiguration.  The
++ * separate spare lock exists for the status query path, which does not need to
++ * be completely consistent with respect to other vdev configuration changes.
++ */
++
++static int
++spa_spare_compare(const void *a, const void *b)
++{
++	return (spa_aux_compare(a, b));
++}
++
++void
++spa_spare_add(vdev_t *vd)
++{
++	mutex_enter(&spa_spare_lock);
++	ASSERT(!vd->vdev_isspare);
++	spa_aux_add(vd, &spa_spare_avl);
++	vd->vdev_isspare = B_TRUE;
++	mutex_exit(&spa_spare_lock);
++}
++
++void
++spa_spare_remove(vdev_t *vd)
++{
++	mutex_enter(&spa_spare_lock);
++	ASSERT(vd->vdev_isspare);
++	spa_aux_remove(vd, &spa_spare_avl);
++	vd->vdev_isspare = B_FALSE;
++	mutex_exit(&spa_spare_lock);
++}
++
++boolean_t
++spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt)
++{
++	boolean_t found;
++
++	mutex_enter(&spa_spare_lock);
++	found = spa_aux_exists(guid, pool, refcnt, &spa_spare_avl);
++	mutex_exit(&spa_spare_lock);
++
++	return (found);
++}
++
++void
++spa_spare_activate(vdev_t *vd)
++{
++	mutex_enter(&spa_spare_lock);
++	ASSERT(vd->vdev_isspare);
++	spa_aux_activate(vd, &spa_spare_avl);
++	mutex_exit(&spa_spare_lock);
++}
++
++/*
++ * Level 2 ARC devices are tracked globally for the same reasons as spares.
++ * Cache devices currently only support one pool per cache device, and so
++ * for these devices the aux reference count is currently unused beyond 1.
++ */
++
++static int
++spa_l2cache_compare(const void *a, const void *b)
++{
++	return (spa_aux_compare(a, b));
++}
++
++void
++spa_l2cache_add(vdev_t *vd)
++{
++	mutex_enter(&spa_l2cache_lock);
++	ASSERT(!vd->vdev_isl2cache);
++	spa_aux_add(vd, &spa_l2cache_avl);
++	vd->vdev_isl2cache = B_TRUE;
++	mutex_exit(&spa_l2cache_lock);
++}
++
++void
++spa_l2cache_remove(vdev_t *vd)
++{
++	mutex_enter(&spa_l2cache_lock);
++	ASSERT(vd->vdev_isl2cache);
++	spa_aux_remove(vd, &spa_l2cache_avl);
++	vd->vdev_isl2cache = B_FALSE;
++	mutex_exit(&spa_l2cache_lock);
++}
++
++boolean_t
++spa_l2cache_exists(uint64_t guid, uint64_t *pool)
++{
++	boolean_t found;
++
++	mutex_enter(&spa_l2cache_lock);
++	found = spa_aux_exists(guid, pool, NULL, &spa_l2cache_avl);
++	mutex_exit(&spa_l2cache_lock);
++
++	return (found);
++}
++
++void
++spa_l2cache_activate(vdev_t *vd)
++{
++	mutex_enter(&spa_l2cache_lock);
++	ASSERT(vd->vdev_isl2cache);
++	spa_aux_activate(vd, &spa_l2cache_avl);
++	mutex_exit(&spa_l2cache_lock);
++}
++
++/*
++ * ==========================================================================
++ * SPA vdev locking
++ * ==========================================================================
++ */
++
++/*
++ * Lock the given spa_t for the purpose of adding or removing a vdev.
++ * Grabs the global spa_namespace_lock plus the spa config lock for writing.
++ * It returns the next transaction group for the spa_t.
++ */
++uint64_t
++spa_vdev_enter(spa_t *spa)
++{
++	mutex_enter(&spa->spa_vdev_top_lock);
++	mutex_enter(&spa_namespace_lock);
++	return (spa_vdev_config_enter(spa));
++}
++
++/*
++ * Internal implementation for spa_vdev_enter().  Used when a vdev
++ * operation requires multiple syncs (i.e. removing a device) while
++ * keeping the spa_namespace_lock held.
++ */
++uint64_t
++spa_vdev_config_enter(spa_t *spa)
++{
++	ASSERT(MUTEX_HELD(&spa_namespace_lock));
++
++	spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
++
++	return (spa_last_synced_txg(spa) + 1);
++}
++
++/*
++ * Used in combination with spa_vdev_config_enter() to allow the syncing
++ * of multiple transactions without releasing the spa_namespace_lock.
++ */
++void
++spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
++{
++	int config_changed = B_FALSE;
++
++	ASSERT(MUTEX_HELD(&spa_namespace_lock));
++	ASSERT(txg > spa_last_synced_txg(spa));
++
++	spa->spa_pending_vdev = NULL;
++
++	/*
++	 * Reassess the DTLs.
++	 */
++	vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE);
++
++	if (error == 0 && !list_is_empty(&spa->spa_config_dirty_list)) {
++		config_changed = B_TRUE;
++		spa->spa_config_generation++;
++	}
++
++	/*
++	 * Verify the metaslab classes.
++	 */
++	ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0);
++	ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0);
++
++	spa_config_exit(spa, SCL_ALL, spa);
++
++	/*
++	 * Panic the system if the specified tag requires it.  This
++	 * is useful for ensuring that configurations are updated
++	 * transactionally.
++	 */
++	if (zio_injection_enabled)
++		zio_handle_panic_injection(spa, tag, 0);
++
++	/*
++	 * Note: this txg_wait_synced() is important because it ensures
++	 * that there won't be more than one config change per txg.
++	 * This allows us to use the txg as the generation number.
++	 */
++	if (error == 0)
++		txg_wait_synced(spa->spa_dsl_pool, txg);
++
++	if (vd != NULL) {
++		ASSERT(!vd->vdev_detached || vd->vdev_dtl_smo.smo_object == 0);
++		spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
++		vdev_free(vd);
++		spa_config_exit(spa, SCL_ALL, spa);
++	}
++
++	/*
++	 * If the config changed, update the config cache.
++	 */
++	if (config_changed)
++		spa_config_sync(spa, B_FALSE, B_TRUE);
++}
++
++/*
++ * Unlock the spa_t after adding or removing a vdev.  Besides undoing the
++ * locking of spa_vdev_enter(), we also want make sure the transactions have
++ * synced to disk, and then update the global configuration cache with the new
++ * information.
++ */
++int
++spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
++{
++	spa_vdev_config_exit(spa, vd, txg, error, FTAG);
++	mutex_exit(&spa_namespace_lock);
++	mutex_exit(&spa->spa_vdev_top_lock);
++
++	return (error);
++}
++
++/*
++ * Lock the given spa_t for the purpose of changing vdev state.
++ */
++void
++spa_vdev_state_enter(spa_t *spa, int oplocks)
++{
++	int locks = SCL_STATE_ALL | oplocks;
++
++	/*
++	 * Root pools may need to read of the underlying devfs filesystem
++	 * when opening up a vdev.  Unfortunately if we're holding the
++	 * SCL_ZIO lock it will result in a deadlock when we try to issue
++	 * the read from the root filesystem.  Instead we "prefetch"
++	 * the associated vnodes that we need prior to opening the
++	 * underlying devices and cache them so that we can prevent
++	 * any I/O when we are doing the actual open.
++	 */
++	if (spa_is_root(spa)) {
++		int low = locks & ~(SCL_ZIO - 1);
++		int high = locks & ~low;
++
++		spa_config_enter(spa, high, spa, RW_WRITER);
++		vdev_hold(spa->spa_root_vdev);
++		spa_config_enter(spa, low, spa, RW_WRITER);
++	} else {
++		spa_config_enter(spa, locks, spa, RW_WRITER);
++	}
++	spa->spa_vdev_locks = locks;
++}
++
++int
++spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
++{
++	boolean_t config_changed = B_FALSE;
++
++	if (vd != NULL || error == 0)
++		vdev_dtl_reassess(vd ? vd->vdev_top : spa->spa_root_vdev,
++		    0, 0, B_FALSE);
++
++	if (vd != NULL) {
++		vdev_state_dirty(vd->vdev_top);
++		config_changed = B_TRUE;
++		spa->spa_config_generation++;
++	}
++
++	if (spa_is_root(spa))
++		vdev_rele(spa->spa_root_vdev);
++
++	ASSERT3U(spa->spa_vdev_locks, >=, SCL_STATE_ALL);
++	spa_config_exit(spa, spa->spa_vdev_locks, spa);
++
++	/*
++	 * If anything changed, wait for it to sync.  This ensures that,
++	 * from the system administrator's perspective, zpool(1M) commands
++	 * are synchronous.  This is important for things like zpool offline:
++	 * when the command completes, you expect no further I/O from ZFS.
++	 */
++	if (vd != NULL)
++		txg_wait_synced(spa->spa_dsl_pool, 0);
++
++	/*
++	 * If the config changed, update the config cache.
++	 */
++	if (config_changed) {
++		mutex_enter(&spa_namespace_lock);
++		spa_config_sync(spa, B_FALSE, B_TRUE);
++		mutex_exit(&spa_namespace_lock);
++	}
++
++	return (error);
++}
++
++/*
++ * ==========================================================================
++ * Miscellaneous functions
++ * ==========================================================================
++ */
++
++/*
++ * Rename a spa_t.
++ */
++int
++spa_rename(const char *name, const char *newname)
++{
++	spa_t *spa;
++	int err;
++
++	/*
++	 * Lookup the spa_t and grab the config lock for writing.  We need to
++	 * actually open the pool so that we can sync out the necessary labels.
++	 * It's OK to call spa_open() with the namespace lock held because we
++	 * allow recursive calls for other reasons.
++	 */
++	mutex_enter(&spa_namespace_lock);
++	if ((err = spa_open(name, &spa, FTAG)) != 0) {
++		mutex_exit(&spa_namespace_lock);
++		return (err);
++	}
++
++	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++
++	avl_remove(&spa_namespace_avl, spa);
++	(void) strlcpy(spa->spa_name, newname, sizeof (spa->spa_name));
++	avl_add(&spa_namespace_avl, spa);
++
++	/*
++	 * Sync all labels to disk with the new names by marking the root vdev
++	 * dirty and waiting for it to sync.  It will pick up the new pool name
++	 * during the sync.
++	 */
++	vdev_config_dirty(spa->spa_root_vdev);
++
++	spa_config_exit(spa, SCL_ALL, FTAG);
++
++	txg_wait_synced(spa->spa_dsl_pool, 0);
++
++	/*
++	 * Sync the updated config cache.
++	 */
++	spa_config_sync(spa, B_FALSE, B_TRUE);
++
++	spa_close(spa, FTAG);
++
++	mutex_exit(&spa_namespace_lock);
++
++	return (0);
++}
++
++/*
++ * Return the spa_t associated with given pool_guid, if it exists.  If
++ * device_guid is non-zero, determine whether the pool exists *and* contains
++ * a device with the specified device_guid.
++ */
++spa_t *
++spa_by_guid(uint64_t pool_guid, uint64_t device_guid)
++{
++	spa_t *spa;
++	avl_tree_t *t = &spa_namespace_avl;
++
++	ASSERT(MUTEX_HELD(&spa_namespace_lock));
++
++	for (spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) {
++		if (spa->spa_state == POOL_STATE_UNINITIALIZED)
++			continue;
++		if (spa->spa_root_vdev == NULL)
++			continue;
++		if (spa_guid(spa) == pool_guid) {
++			if (device_guid == 0)
++				break;
++
++			if (vdev_lookup_by_guid(spa->spa_root_vdev,
++			    device_guid) != NULL)
++				break;
++
++			/*
++			 * Check any devices we may be in the process of adding.
++			 */
++			if (spa->spa_pending_vdev) {
++				if (vdev_lookup_by_guid(spa->spa_pending_vdev,
++				    device_guid) != NULL)
++					break;
++			}
++		}
++	}
++
++	return (spa);
++}
++
++/*
++ * Determine whether a pool with the given pool_guid exists.
++ */
++boolean_t
++spa_guid_exists(uint64_t pool_guid, uint64_t device_guid)
++{
++	return (spa_by_guid(pool_guid, device_guid) != NULL);
++}
++
++char *
++spa_strdup(const char *s)
++{
++	size_t len;
++	char *new;
++
++	len = strlen(s);
++	new = kmem_alloc(len + 1, KM_PUSHPAGE);
++	bcopy(s, new, len);
++	new[len] = '\0';
++
++	return (new);
++}
++
++void
++spa_strfree(char *s)
++{
++	kmem_free(s, strlen(s) + 1);
++}
++
++uint64_t
++spa_get_random(uint64_t range)
++{
++	uint64_t r;
++
++	ASSERT(range != 0);
++
++	(void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t));
++
++	return (r % range);
++}
++
++uint64_t
++spa_generate_guid(spa_t *spa)
++{
++	uint64_t guid = spa_get_random(-1ULL);
++
++	if (spa != NULL) {
++		while (guid == 0 || spa_guid_exists(spa_guid(spa), guid))
++			guid = spa_get_random(-1ULL);
++	} else {
++		while (guid == 0 || spa_guid_exists(guid, 0))
++			guid = spa_get_random(-1ULL);
++	}
++
++	return (guid);
++}
++
++void
++sprintf_blkptr(char *buf, const blkptr_t *bp)
++{
++	char *type = NULL;
++	char *checksum = NULL;
++	char *compress = NULL;
++
++	if (bp != NULL) {
++		type = dmu_ot[BP_GET_TYPE(bp)].ot_name;
++		checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
++		compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
++	}
++
++	SPRINTF_BLKPTR(snprintf, ' ', buf, bp, type, checksum, compress);
++}
++
++void
++spa_freeze(spa_t *spa)
++{
++	uint64_t freeze_txg = 0;
++
++	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++	if (spa->spa_freeze_txg == UINT64_MAX) {
++		freeze_txg = spa_last_synced_txg(spa) + TXG_SIZE;
++		spa->spa_freeze_txg = freeze_txg;
++	}
++	spa_config_exit(spa, SCL_ALL, FTAG);
++	if (freeze_txg != 0)
++		txg_wait_synced(spa_get_dsl(spa), freeze_txg);
++}
++
++/*
++ * This is a stripped-down version of strtoull, suitable only for converting
++ * lowercase hexidecimal numbers that don't overflow.
++ */
++uint64_t
++strtonum(const char *str, char **nptr)
++{
++	uint64_t val = 0;
++	char c;
++	int digit;
++
++	while ((c = *str) != '\0') {
++		if (c >= '0' && c <= '9')
++			digit = c - '0';
++		else if (c >= 'a' && c <= 'f')
++			digit = 10 + c - 'a';
++		else
++			break;
++
++		val *= 16;
++		val += digit;
++
++		str++;
++	}
++
++	if (nptr)
++		*nptr = (char *)str;
++
++	return (val);
++}
++
++/*
++ * ==========================================================================
++ * Accessor functions
++ * ==========================================================================
++ */
++
++boolean_t
++spa_shutting_down(spa_t *spa)
++{
++	return (spa->spa_async_suspended);
++}
++
++dsl_pool_t *
++spa_get_dsl(spa_t *spa)
++{
++	return (spa->spa_dsl_pool);
++}
++
++blkptr_t *
++spa_get_rootblkptr(spa_t *spa)
++{
++	return (&spa->spa_ubsync.ub_rootbp);
++}
++
++void
++spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp)
++{
++	spa->spa_uberblock.ub_rootbp = *bp;
++}
++
++void
++spa_altroot(spa_t *spa, char *buf, size_t buflen)
++{
++	if (spa->spa_root == NULL)
++		buf[0] = '\0';
++	else
++		(void) strncpy(buf, spa->spa_root, buflen);
++}
++
++int
++spa_sync_pass(spa_t *spa)
++{
++	return (spa->spa_sync_pass);
++}
++
++char *
++spa_name(spa_t *spa)
++{
++	return (spa->spa_name);
++}
++
++uint64_t
++spa_guid(spa_t *spa)
++{
++	/*
++	 * If we fail to parse the config during spa_load(), we can go through
++	 * the error path (which posts an ereport) and end up here with no root
++	 * vdev.  We stash the original pool guid in 'spa_config_guid' to handle
++	 * this case.
++	 */
++	if (spa->spa_root_vdev != NULL)
++		return (spa->spa_root_vdev->vdev_guid);
++	else
++		return (spa->spa_config_guid);
++}
++
++uint64_t
++spa_load_guid(spa_t *spa)
++{
++	/*
++	 * This is a GUID that exists solely as a reference for the
++	 * purposes of the arc.  It is generated at load time, and
++	 * is never written to persistent storage.
++	 */
++	return (spa->spa_load_guid);
++}
++
++uint64_t
++spa_last_synced_txg(spa_t *spa)
++{
++	return (spa->spa_ubsync.ub_txg);
++}
++
++uint64_t
++spa_first_txg(spa_t *spa)
++{
++	return (spa->spa_first_txg);
++}
++
++uint64_t
++spa_syncing_txg(spa_t *spa)
++{
++	return (spa->spa_syncing_txg);
++}
++
++pool_state_t
++spa_state(spa_t *spa)
++{
++	return (spa->spa_state);
++}
++
++spa_load_state_t
++spa_load_state(spa_t *spa)
++{
++	return (spa->spa_load_state);
++}
++
++uint64_t
++spa_freeze_txg(spa_t *spa)
++{
++	return (spa->spa_freeze_txg);
++}
++
++/* ARGSUSED */
++uint64_t
++spa_get_asize(spa_t *spa, uint64_t lsize)
++{
++	/*
++	 * The worst case is single-sector max-parity RAID-Z blocks, in which
++	 * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1)
++	 * times the size; so just assume that.  Add to this the fact that
++	 * we can have up to 3 DVAs per bp, and one more factor of 2 because
++	 * the block may be dittoed with up to 3 DVAs by ddt_sync().
++	 */
++	return (lsize * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2);
++}
++
++uint64_t
++spa_get_dspace(spa_t *spa)
++{
++	return (spa->spa_dspace);
++}
++
++void
++spa_update_dspace(spa_t *spa)
++{
++	spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) +
++	    ddt_get_dedup_dspace(spa);
++}
++
++/*
++ * Return the failure mode that has been set to this pool. The default
++ * behavior will be to block all I/Os when a complete failure occurs.
++ */
++uint8_t
++spa_get_failmode(spa_t *spa)
++{
++	return (spa->spa_failmode);
++}
++
++boolean_t
++spa_suspended(spa_t *spa)
++{
++	return (spa->spa_suspended);
++}
++
++uint64_t
++spa_version(spa_t *spa)
++{
++	return (spa->spa_ubsync.ub_version);
++}
++
++boolean_t
++spa_deflate(spa_t *spa)
++{
++	return (spa->spa_deflate);
++}
++
++metaslab_class_t *
++spa_normal_class(spa_t *spa)
++{
++	return (spa->spa_normal_class);
++}
++
++metaslab_class_t *
++spa_log_class(spa_t *spa)
++{
++	return (spa->spa_log_class);
++}
++
++int
++spa_max_replication(spa_t *spa)
++{
++	/*
++	 * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to
++	 * handle BPs with more than one DVA allocated.  Set our max
++	 * replication level accordingly.
++	 */
++	if (spa_version(spa) < SPA_VERSION_DITTO_BLOCKS)
++		return (1);
++	return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override));
++}
++
++int
++spa_prev_software_version(spa_t *spa)
++{
++	return (spa->spa_prev_software_version);
++}
++
++uint64_t
++dva_get_dsize_sync(spa_t *spa, const dva_t *dva)
++{
++	uint64_t asize = DVA_GET_ASIZE(dva);
++	uint64_t dsize = asize;
++
++	ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
++
++	if (asize != 0 && spa->spa_deflate) {
++		vdev_t *vd = vdev_lookup_top(spa, DVA_GET_VDEV(dva));
++		dsize = (asize >> SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio;
++	}
++
++	return (dsize);
++}
++
++uint64_t
++bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp)
++{
++	uint64_t dsize = 0;
++	int d;
++
++	for (d = 0; d < SPA_DVAS_PER_BP; d++)
++		dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]);
++
++	return (dsize);
++}
++
++uint64_t
++bp_get_dsize(spa_t *spa, const blkptr_t *bp)
++{
++	uint64_t dsize = 0;
++	int d;
++
++	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
++
++	for (d = 0; d < SPA_DVAS_PER_BP; d++)
++		dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]);
++
++	spa_config_exit(spa, SCL_VDEV, FTAG);
++
++	return (dsize);
++}
++
++/*
++ * ==========================================================================
++ * Initialization and Termination
++ * ==========================================================================
++ */
++
++static int
++spa_name_compare(const void *a1, const void *a2)
++{
++	const spa_t *s1 = a1;
++	const spa_t *s2 = a2;
++	int s;
++
++	s = strcmp(s1->spa_name, s2->spa_name);
++	if (s > 0)
++		return (1);
++	if (s < 0)
++		return (-1);
++	return (0);
++}
++
++void
++spa_boot_init(void)
++{
++	spa_config_load();
++}
++
++void
++spa_init(int mode)
++{
++	mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&spa_l2cache_lock, NULL, MUTEX_DEFAULT, NULL);
++	cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL);
++
++	avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t),
++	    offsetof(spa_t, spa_avl));
++
++	avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_aux_t),
++	    offsetof(spa_aux_t, aux_avl));
++
++	avl_create(&spa_l2cache_avl, spa_l2cache_compare, sizeof (spa_aux_t),
++	    offsetof(spa_aux_t, aux_avl));
++
++	spa_mode_global = mode;
++
++	fm_init();
++	refcount_init();
++	unique_init();
++	zio_init();
++	dmu_init();
++	zil_init();
++	vdev_cache_stat_init();
++	zfs_prop_init();
++	zpool_prop_init();
++	spa_config_load();
++	l2arc_start();
++}
++
++void
++spa_fini(void)
++{
++	l2arc_stop();
++
++	spa_evict_all();
++
++	vdev_cache_stat_fini();
++	zil_fini();
++	dmu_fini();
++	zio_fini();
++	unique_fini();
++	refcount_fini();
++	fm_fini();
++
++	avl_destroy(&spa_namespace_avl);
++	avl_destroy(&spa_spare_avl);
++	avl_destroy(&spa_l2cache_avl);
++
++	cv_destroy(&spa_namespace_cv);
++	mutex_destroy(&spa_namespace_lock);
++	mutex_destroy(&spa_spare_lock);
++	mutex_destroy(&spa_l2cache_lock);
++}
++
++/*
++ * Return whether this pool has slogs. No locking needed.
++ * It's not a problem if the wrong answer is returned as it's only for
++ * performance and not correctness
++ */
++boolean_t
++spa_has_slogs(spa_t *spa)
++{
++	return (spa->spa_log_class->mc_rotor != NULL);
++}
++
++spa_log_state_t
++spa_get_log_state(spa_t *spa)
++{
++	return (spa->spa_log_state);
++}
++
++void
++spa_set_log_state(spa_t *spa, spa_log_state_t state)
++{
++	spa->spa_log_state = state;
++}
++
++boolean_t
++spa_is_root(spa_t *spa)
++{
++	return (spa->spa_is_root);
++}
++
++boolean_t
++spa_writeable(spa_t *spa)
++{
++	return (!!(spa->spa_mode & FWRITE));
++}
++
++int
++spa_mode(spa_t *spa)
++{
++	return (spa->spa_mode);
++}
++
++uint64_t
++spa_bootfs(spa_t *spa)
++{
++	return (spa->spa_bootfs);
++}
++
++uint64_t
++spa_delegation(spa_t *spa)
++{
++	return (spa->spa_delegation);
++}
++
++objset_t *
++spa_meta_objset(spa_t *spa)
++{
++	return (spa->spa_meta_objset);
++}
++
++enum zio_checksum
++spa_dedup_checksum(spa_t *spa)
++{
++	return (spa->spa_dedup_checksum);
++}
++
++/*
++ * Reset pool scan stat per scan pass (or reboot).
++ */
++void
++spa_scan_stat_init(spa_t *spa)
++{
++	/* data not stored on disk */
++	spa->spa_scan_pass_start = gethrestime_sec();
++	spa->spa_scan_pass_exam = 0;
++	vdev_scan_stat_init(spa->spa_root_vdev);
++}
++
++/*
++ * Get scan stats for zpool status reports
++ */
++int
++spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
++{
++	dsl_scan_t *scn = spa->spa_dsl_pool ? spa->spa_dsl_pool->dp_scan : NULL;
++
++	if (scn == NULL || scn->scn_phys.scn_func == POOL_SCAN_NONE)
++		return (ENOENT);
++	bzero(ps, sizeof (pool_scan_stat_t));
++
++	/* data stored on disk */
++	ps->pss_func = scn->scn_phys.scn_func;
++	ps->pss_start_time = scn->scn_phys.scn_start_time;
++	ps->pss_end_time = scn->scn_phys.scn_end_time;
++	ps->pss_to_examine = scn->scn_phys.scn_to_examine;
++	ps->pss_examined = scn->scn_phys.scn_examined;
++	ps->pss_to_process = scn->scn_phys.scn_to_process;
++	ps->pss_processed = scn->scn_phys.scn_processed;
++	ps->pss_errors = scn->scn_phys.scn_errors;
++	ps->pss_state = scn->scn_phys.scn_state;
++
++	/* data not stored on disk */
++	ps->pss_pass_start = spa->spa_scan_pass_start;
++	ps->pss_pass_exam = spa->spa_scan_pass_exam;
++
++	return (0);
++}
++
++boolean_t
++spa_debug_enabled(spa_t *spa)
++{
++	return (spa->spa_debug);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++/* Namespace manipulation */
++EXPORT_SYMBOL(spa_lookup);
++EXPORT_SYMBOL(spa_add);
++EXPORT_SYMBOL(spa_remove);
++EXPORT_SYMBOL(spa_next);
++
++/* Refcount functions */
++EXPORT_SYMBOL(spa_open_ref);
++EXPORT_SYMBOL(spa_close);
++EXPORT_SYMBOL(spa_refcount_zero);
++
++/* Pool configuration lock */
++EXPORT_SYMBOL(spa_config_tryenter);
++EXPORT_SYMBOL(spa_config_enter);
++EXPORT_SYMBOL(spa_config_exit);
++EXPORT_SYMBOL(spa_config_held);
++
++/* Pool vdev add/remove lock */
++EXPORT_SYMBOL(spa_vdev_enter);
++EXPORT_SYMBOL(spa_vdev_exit);
++
++/* Pool vdev state change lock */
++EXPORT_SYMBOL(spa_vdev_state_enter);
++EXPORT_SYMBOL(spa_vdev_state_exit);
++
++/* Accessor functions */
++EXPORT_SYMBOL(spa_shutting_down);
++EXPORT_SYMBOL(spa_get_dsl);
++EXPORT_SYMBOL(spa_get_rootblkptr);
++EXPORT_SYMBOL(spa_set_rootblkptr);
++EXPORT_SYMBOL(spa_altroot);
++EXPORT_SYMBOL(spa_sync_pass);
++EXPORT_SYMBOL(spa_name);
++EXPORT_SYMBOL(spa_guid);
++EXPORT_SYMBOL(spa_last_synced_txg);
++EXPORT_SYMBOL(spa_first_txg);
++EXPORT_SYMBOL(spa_syncing_txg);
++EXPORT_SYMBOL(spa_version);
++EXPORT_SYMBOL(spa_state);
++EXPORT_SYMBOL(spa_load_state);
++EXPORT_SYMBOL(spa_freeze_txg);
++EXPORT_SYMBOL(spa_get_asize);
++EXPORT_SYMBOL(spa_get_dspace);
++EXPORT_SYMBOL(spa_update_dspace);
++EXPORT_SYMBOL(spa_deflate);
++EXPORT_SYMBOL(spa_normal_class);
++EXPORT_SYMBOL(spa_log_class);
++EXPORT_SYMBOL(spa_max_replication);
++EXPORT_SYMBOL(spa_prev_software_version);
++EXPORT_SYMBOL(spa_get_failmode);
++EXPORT_SYMBOL(spa_suspended);
++EXPORT_SYMBOL(spa_bootfs);
++EXPORT_SYMBOL(spa_delegation);
++EXPORT_SYMBOL(spa_meta_objset);
++
++/* Miscellaneous support routines */
++EXPORT_SYMBOL(spa_rename);
++EXPORT_SYMBOL(spa_guid_exists);
++EXPORT_SYMBOL(spa_strdup);
++EXPORT_SYMBOL(spa_strfree);
++EXPORT_SYMBOL(spa_get_random);
++EXPORT_SYMBOL(spa_generate_guid);
++EXPORT_SYMBOL(sprintf_blkptr);
++EXPORT_SYMBOL(spa_freeze);
++EXPORT_SYMBOL(spa_upgrade);
++EXPORT_SYMBOL(spa_evict_all);
++EXPORT_SYMBOL(spa_lookup_by_guid);
++EXPORT_SYMBOL(spa_has_spare);
++EXPORT_SYMBOL(dva_get_dsize_sync);
++EXPORT_SYMBOL(bp_get_dsize_sync);
++EXPORT_SYMBOL(bp_get_dsize);
++EXPORT_SYMBOL(spa_has_slogs);
++EXPORT_SYMBOL(spa_is_root);
++EXPORT_SYMBOL(spa_writeable);
++EXPORT_SYMBOL(spa_mode);
++
++EXPORT_SYMBOL(spa_namespace_lock);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/txg.c linux-3.2.33-go/fs/zfs/zfs/txg.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/txg.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/txg.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,827 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/txg_impl.h>
++#include <sys/dmu_impl.h>
++#include <sys/dmu_tx.h>
++#include <sys/dsl_pool.h>
++#include <sys/dsl_scan.h>
++#include <sys/callb.h>
++#include <sys/spa_impl.h>
++
++/*
++ * Pool-wide transaction groups.
++ */
++
++static void txg_sync_thread(dsl_pool_t *dp);
++static void txg_quiesce_thread(dsl_pool_t *dp);
++
++int zfs_txg_timeout = 5;	/* max seconds worth of delta per txg */
++
++/*
++ * Prepare the txg subsystem.
++ */
++void
++txg_init(dsl_pool_t *dp, uint64_t txg)
++{
++	tx_state_t *tx = &dp->dp_tx;
++	int c;
++	bzero(tx, sizeof (tx_state_t));
++
++	tx->tx_cpu = vmem_zalloc(max_ncpus * sizeof (tx_cpu_t), KM_SLEEP);
++
++	for (c = 0; c < max_ncpus; c++) {
++		int i;
++
++		mutex_init(&tx->tx_cpu[c].tc_lock, NULL, MUTEX_DEFAULT, NULL);
++		for (i = 0; i < TXG_SIZE; i++) {
++			cv_init(&tx->tx_cpu[c].tc_cv[i], NULL, CV_DEFAULT,
++			    NULL);
++			list_create(&tx->tx_cpu[c].tc_callbacks[i],
++			    sizeof (dmu_tx_callback_t),
++			    offsetof(dmu_tx_callback_t, dcb_node));
++		}
++	}
++
++	mutex_init(&tx->tx_sync_lock, NULL, MUTEX_DEFAULT, NULL);
++
++	cv_init(&tx->tx_sync_more_cv, NULL, CV_DEFAULT, NULL);
++	cv_init(&tx->tx_sync_done_cv, NULL, CV_DEFAULT, NULL);
++	cv_init(&tx->tx_quiesce_more_cv, NULL, CV_DEFAULT, NULL);
++	cv_init(&tx->tx_quiesce_done_cv, NULL, CV_DEFAULT, NULL);
++	cv_init(&tx->tx_exit_cv, NULL, CV_DEFAULT, NULL);
++
++	tx->tx_open_txg = txg;
++}
++
++/*
++ * Close down the txg subsystem.
++ */
++void
++txg_fini(dsl_pool_t *dp)
++{
++	tx_state_t *tx = &dp->dp_tx;
++	int c;
++
++	ASSERT(tx->tx_threads == 0);
++
++	mutex_destroy(&tx->tx_sync_lock);
++
++	cv_destroy(&tx->tx_sync_more_cv);
++	cv_destroy(&tx->tx_sync_done_cv);
++	cv_destroy(&tx->tx_quiesce_more_cv);
++	cv_destroy(&tx->tx_quiesce_done_cv);
++	cv_destroy(&tx->tx_exit_cv);
++
++	for (c = 0; c < max_ncpus; c++) {
++		int i;
++
++		mutex_destroy(&tx->tx_cpu[c].tc_lock);
++		for (i = 0; i < TXG_SIZE; i++) {
++			cv_destroy(&tx->tx_cpu[c].tc_cv[i]);
++			list_destroy(&tx->tx_cpu[c].tc_callbacks[i]);
++		}
++	}
++
++	if (tx->tx_commit_cb_taskq != NULL)
++		taskq_destroy(tx->tx_commit_cb_taskq);
++
++	vmem_free(tx->tx_cpu, max_ncpus * sizeof (tx_cpu_t));
++
++	bzero(tx, sizeof (tx_state_t));
++}
++
++/*
++ * Start syncing transaction groups.
++ */
++void
++txg_sync_start(dsl_pool_t *dp)
++{
++	tx_state_t *tx = &dp->dp_tx;
++
++	mutex_enter(&tx->tx_sync_lock);
++
++	dprintf("pool %p\n", dp);
++
++	ASSERT(tx->tx_threads == 0);
++
++	tx->tx_threads = 2;
++
++	tx->tx_quiesce_thread = thread_create(NULL, 0, txg_quiesce_thread,
++	    dp, 0, &p0, TS_RUN, minclsyspri);
++
++	/*
++	 * The sync thread can need a larger-than-default stack size on
++	 * 32-bit x86.  This is due in part to nested pools and
++	 * scrub_visitbp() recursion.
++	 */
++	tx->tx_sync_thread = thread_create(NULL, 32<<10, txg_sync_thread,
++	    dp, 0, &p0, TS_RUN, minclsyspri);
++
++	mutex_exit(&tx->tx_sync_lock);
++}
++
++static void
++txg_thread_enter(tx_state_t *tx, callb_cpr_t *cpr)
++{
++	CALLB_CPR_INIT(cpr, &tx->tx_sync_lock, callb_generic_cpr, FTAG);
++	mutex_enter(&tx->tx_sync_lock);
++}
++
++static void
++txg_thread_exit(tx_state_t *tx, callb_cpr_t *cpr, kthread_t **tpp)
++{
++	ASSERT(*tpp != NULL);
++	*tpp = NULL;
++	tx->tx_threads--;
++	cv_broadcast(&tx->tx_exit_cv);
++	CALLB_CPR_EXIT(cpr);		/* drops &tx->tx_sync_lock */
++	thread_exit();
++}
++
++static void
++txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, uint64_t time)
++{
++	CALLB_CPR_SAFE_BEGIN(cpr);
++
++	if (time)
++		(void) cv_timedwait_interruptible(cv, &tx->tx_sync_lock,
++		    ddi_get_lbolt() + time);
++	else
++		cv_wait_interruptible(cv, &tx->tx_sync_lock);
++
++	CALLB_CPR_SAFE_END(cpr, &tx->tx_sync_lock);
++}
++
++/*
++ * Stop syncing transaction groups.
++ */
++void
++txg_sync_stop(dsl_pool_t *dp)
++{
++	tx_state_t *tx = &dp->dp_tx;
++
++	dprintf("pool %p\n", dp);
++	/*
++	 * Finish off any work in progress.
++	 */
++	ASSERT(tx->tx_threads == 2);
++
++	/*
++	 * We need to ensure that we've vacated the deferred space_maps.
++	 */
++	txg_wait_synced(dp, tx->tx_open_txg + TXG_DEFER_SIZE);
++
++	/*
++	 * Wake all sync threads and wait for them to die.
++	 */
++	mutex_enter(&tx->tx_sync_lock);
++
++	ASSERT(tx->tx_threads == 2);
++
++	tx->tx_exiting = 1;
++
++	cv_broadcast(&tx->tx_quiesce_more_cv);
++	cv_broadcast(&tx->tx_quiesce_done_cv);
++	cv_broadcast(&tx->tx_sync_more_cv);
++
++	while (tx->tx_threads != 0)
++		cv_wait(&tx->tx_exit_cv, &tx->tx_sync_lock);
++
++	tx->tx_exiting = 0;
++
++	mutex_exit(&tx->tx_sync_lock);
++}
++
++uint64_t
++txg_hold_open(dsl_pool_t *dp, txg_handle_t *th)
++{
++	tx_state_t *tx = &dp->dp_tx;
++	tx_cpu_t *tc;
++	uint64_t txg;
++
++	/*
++	 * It appears the processor id is simply used as a "random"
++	 * number to index into the array, and there isn't any other
++	 * significance to the chosen tx_cpu. Because.. Why not use
++	 * the current cpu to index into the array?
++	 */
++	kpreempt_disable();
++	tc = &tx->tx_cpu[CPU_SEQID];
++	kpreempt_enable();
++
++	mutex_enter(&tc->tc_lock);
++
++	txg = tx->tx_open_txg;
++	tc->tc_count[txg & TXG_MASK]++;
++
++	th->th_cpu = tc;
++	th->th_txg = txg;
++
++	return (txg);
++}
++
++void
++txg_rele_to_quiesce(txg_handle_t *th)
++{
++	tx_cpu_t *tc = th->th_cpu;
++
++	mutex_exit(&tc->tc_lock);
++}
++
++void
++txg_register_callbacks(txg_handle_t *th, list_t *tx_callbacks)
++{
++	tx_cpu_t *tc = th->th_cpu;
++	int g = th->th_txg & TXG_MASK;
++
++	mutex_enter(&tc->tc_lock);
++	list_move_tail(&tc->tc_callbacks[g], tx_callbacks);
++	mutex_exit(&tc->tc_lock);
++}
++
++void
++txg_rele_to_sync(txg_handle_t *th)
++{
++	tx_cpu_t *tc = th->th_cpu;
++	int g = th->th_txg & TXG_MASK;
++
++	mutex_enter(&tc->tc_lock);
++	ASSERT(tc->tc_count[g] != 0);
++	if (--tc->tc_count[g] == 0)
++		cv_broadcast(&tc->tc_cv[g]);
++	mutex_exit(&tc->tc_lock);
++
++	th->th_cpu = NULL;	/* defensive */
++}
++
++static void
++txg_quiesce(dsl_pool_t *dp, uint64_t txg)
++{
++	hrtime_t start;
++	txg_history_t *th;
++	tx_state_t *tx = &dp->dp_tx;
++	int g = txg & TXG_MASK;
++	int c;
++
++	/*
++	 * Grab all tx_cpu locks so nobody else can get into this txg.
++	 */
++	for (c = 0; c < max_ncpus; c++)
++		mutex_enter(&tx->tx_cpu[c].tc_lock);
++
++	ASSERT(txg == tx->tx_open_txg);
++	tx->tx_open_txg++;
++
++	/*
++	 * Measure how long the txg was open and replace the kstat.
++	 */
++	th = dsl_pool_txg_history_get(dp, txg);
++	th->th_kstat.open_time = gethrtime() - th->th_kstat.birth;
++	th->th_kstat.state = TXG_STATE_QUIESCING;
++	dsl_pool_txg_history_put(th);
++	dsl_pool_txg_history_add(dp, tx->tx_open_txg);
++
++	/*
++	 * Now that we've incremented tx_open_txg, we can let threads
++	 * enter the next transaction group.
++	 */
++	for (c = 0; c < max_ncpus; c++)
++		mutex_exit(&tx->tx_cpu[c].tc_lock);
++
++	/*
++	 * Quiesce the transaction group by waiting for everyone to txg_exit().
++	 */
++	start = gethrtime();
++
++	for (c = 0; c < max_ncpus; c++) {
++		tx_cpu_t *tc = &tx->tx_cpu[c];
++		mutex_enter(&tc->tc_lock);
++		while (tc->tc_count[g] != 0)
++			cv_wait(&tc->tc_cv[g], &tc->tc_lock);
++		mutex_exit(&tc->tc_lock);
++	}
++
++	/*
++	 * Measure how long the txg took to quiesce.
++	 */
++	th = dsl_pool_txg_history_get(dp, txg);
++	th->th_kstat.quiesce_time = gethrtime() - start;
++	dsl_pool_txg_history_put(th);
++}
++
++static void
++txg_do_callbacks(list_t *cb_list)
++{
++	dmu_tx_do_callbacks(cb_list, 0);
++
++	list_destroy(cb_list);
++
++	kmem_free(cb_list, sizeof (list_t));
++}
++
++/*
++ * Dispatch the commit callbacks registered on this txg to worker threads.
++ */
++static void
++txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg)
++{
++	int c;
++	tx_state_t *tx = &dp->dp_tx;
++	list_t *cb_list;
++
++	for (c = 0; c < max_ncpus; c++) {
++		tx_cpu_t *tc = &tx->tx_cpu[c];
++		/* No need to lock tx_cpu_t at this point */
++
++		int g = txg & TXG_MASK;
++
++		if (list_is_empty(&tc->tc_callbacks[g]))
++			continue;
++
++		if (tx->tx_commit_cb_taskq == NULL) {
++			/*
++			 * Commit callback taskq hasn't been created yet.
++			 */
++			tx->tx_commit_cb_taskq = taskq_create("tx_commit_cb",
++			    100, minclsyspri, max_ncpus, INT_MAX,
++			    TASKQ_THREADS_CPU_PCT | TASKQ_PREPOPULATE);
++		}
++
++		cb_list = kmem_alloc(sizeof (list_t), KM_PUSHPAGE);
++		list_create(cb_list, sizeof (dmu_tx_callback_t),
++		    offsetof(dmu_tx_callback_t, dcb_node));
++
++		list_move_tail(cb_list, &tc->tc_callbacks[g]);
++
++		(void) taskq_dispatch(tx->tx_commit_cb_taskq, (task_func_t *)
++		    txg_do_callbacks, cb_list, TQ_SLEEP);
++	}
++}
++
++/*
++ * Wait for pending commit callbacks of already-synced transactions to finish
++ * processing.
++ * Calling this function from within a commit callback will deadlock.
++ */
++void
++txg_wait_callbacks(dsl_pool_t *dp)
++{
++	tx_state_t *tx = &dp->dp_tx;
++
++	if (tx->tx_commit_cb_taskq != NULL)
++		taskq_wait(tx->tx_commit_cb_taskq);
++}
++
++static void
++txg_sync_thread(dsl_pool_t *dp)
++{
++	spa_t *spa = dp->dp_spa;
++	tx_state_t *tx = &dp->dp_tx;
++	callb_cpr_t cpr;
++	uint64_t start, delta;
++
++#ifdef _KERNEL
++	/*
++	 * Annotate this process with a flag that indicates that it is
++	 * unsafe to use KM_SLEEP during memory allocations due to the
++	 * potential for a deadlock.  KM_PUSHPAGE should be used instead.
++	 */
++	current->flags |= PF_NOFS;
++#endif /* _KERNEL */
++
++	txg_thread_enter(tx, &cpr);
++
++	start = delta = 0;
++	for (;;) {
++		hrtime_t hrstart;
++		txg_history_t *th;
++		uint64_t timer, timeout;
++		uint64_t txg;
++
++		timeout = zfs_txg_timeout * hz;
++
++		/*
++		 * We sync when we're scanning, there's someone waiting
++		 * on us, or the quiesce thread has handed off a txg to
++		 * us, or we have reached our timeout.
++		 */
++		timer = (delta >= timeout ? 0 : timeout - delta);
++		while (!dsl_scan_active(dp->dp_scan) &&
++		    !tx->tx_exiting && timer > 0 &&
++		    tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
++		    tx->tx_quiesced_txg == 0) {
++			dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
++			    tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
++			txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, timer);
++			delta = ddi_get_lbolt() - start;
++			timer = (delta > timeout ? 0 : timeout - delta);
++		}
++
++		/*
++		 * Wait until the quiesce thread hands off a txg to us,
++		 * prompting it to do so if necessary.
++		 */
++		while (!tx->tx_exiting && tx->tx_quiesced_txg == 0) {
++			if (tx->tx_quiesce_txg_waiting < tx->tx_open_txg+1)
++				tx->tx_quiesce_txg_waiting = tx->tx_open_txg+1;
++			cv_broadcast(&tx->tx_quiesce_more_cv);
++			txg_thread_wait(tx, &cpr, &tx->tx_quiesce_done_cv, 0);
++		}
++
++		if (tx->tx_exiting)
++			txg_thread_exit(tx, &cpr, &tx->tx_sync_thread);
++
++		/*
++		 * Consume the quiesced txg which has been handed off to
++		 * us.  This may cause the quiescing thread to now be
++		 * able to quiesce another txg, so we must signal it.
++		 */
++		txg = tx->tx_quiesced_txg;
++		tx->tx_quiesced_txg = 0;
++		tx->tx_syncing_txg = txg;
++		cv_broadcast(&tx->tx_quiesce_more_cv);
++
++		th = dsl_pool_txg_history_get(dp, txg);
++		th->th_kstat.state = TXG_STATE_SYNCING;
++		vdev_get_stats(spa->spa_root_vdev, &th->th_vs1);
++		dsl_pool_txg_history_put(th);
++
++		dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
++		    txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting);
++		mutex_exit(&tx->tx_sync_lock);
++
++		start = ddi_get_lbolt();
++		hrstart = gethrtime();
++		spa_sync(spa, txg);
++		delta = ddi_get_lbolt() - start;
++
++		mutex_enter(&tx->tx_sync_lock);
++		tx->tx_synced_txg = txg;
++		tx->tx_syncing_txg = 0;
++		cv_broadcast(&tx->tx_sync_done_cv);
++
++		/*
++		 * Dispatch commit callbacks to worker threads.
++		 */
++		txg_dispatch_callbacks(dp, txg);
++
++		/*
++		 * Measure the txg sync time determine the amount of I/O done.
++		 */
++		th = dsl_pool_txg_history_get(dp, txg);
++		vdev_get_stats(spa->spa_root_vdev, &th->th_vs2);
++		th->th_kstat.sync_time = gethrtime() - hrstart;
++		th->th_kstat.nread = th->th_vs2.vs_bytes[ZIO_TYPE_READ] -
++		    th->th_vs1.vs_bytes[ZIO_TYPE_READ];
++		th->th_kstat.nwritten = th->th_vs2.vs_bytes[ZIO_TYPE_WRITE] -
++		    th->th_vs1.vs_bytes[ZIO_TYPE_WRITE];
++		th->th_kstat.reads = th->th_vs2.vs_ops[ZIO_TYPE_READ] -
++		    th->th_vs1.vs_ops[ZIO_TYPE_READ];
++		th->th_kstat.writes = th->th_vs2.vs_ops[ZIO_TYPE_WRITE] -
++		    th->th_vs1.vs_ops[ZIO_TYPE_WRITE];
++		th->th_kstat.state = TXG_STATE_COMMITTED;
++		dsl_pool_txg_history_put(th);
++	}
++}
++
++static void
++txg_quiesce_thread(dsl_pool_t *dp)
++{
++	tx_state_t *tx = &dp->dp_tx;
++	callb_cpr_t cpr;
++
++	txg_thread_enter(tx, &cpr);
++
++	for (;;) {
++		uint64_t txg;
++
++		/*
++		 * We quiesce when there's someone waiting on us.
++		 * However, we can only have one txg in "quiescing" or
++		 * "quiesced, waiting to sync" state.  So we wait until
++		 * the "quiesced, waiting to sync" txg has been consumed
++		 * by the sync thread.
++		 */
++		while (!tx->tx_exiting &&
++		    (tx->tx_open_txg >= tx->tx_quiesce_txg_waiting ||
++		    tx->tx_quiesced_txg != 0))
++			txg_thread_wait(tx, &cpr, &tx->tx_quiesce_more_cv, 0);
++
++		if (tx->tx_exiting)
++			txg_thread_exit(tx, &cpr, &tx->tx_quiesce_thread);
++
++		txg = tx->tx_open_txg;
++		dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
++		    txg, tx->tx_quiesce_txg_waiting,
++		    tx->tx_sync_txg_waiting);
++		mutex_exit(&tx->tx_sync_lock);
++		txg_quiesce(dp, txg);
++		mutex_enter(&tx->tx_sync_lock);
++
++		/*
++		 * Hand this txg off to the sync thread.
++		 */
++		dprintf("quiesce done, handing off txg %llu\n", txg);
++		tx->tx_quiesced_txg = txg;
++		cv_broadcast(&tx->tx_sync_more_cv);
++		cv_broadcast(&tx->tx_quiesce_done_cv);
++	}
++}
++
++/*
++ * Delay this thread by 'ticks' if we are still in the open transaction
++ * group and there is already a waiting txg quiesing or quiesced.  Abort
++ * the delay if this txg stalls or enters the quiesing state.
++ */
++void
++txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks)
++{
++	tx_state_t *tx = &dp->dp_tx;
++	clock_t timeout = ddi_get_lbolt() + ticks;
++
++	/* don't delay if this txg could transition to quiesing immediately */
++	if (tx->tx_open_txg > txg ||
++	    tx->tx_syncing_txg == txg-1 || tx->tx_synced_txg == txg-1)
++		return;
++
++	mutex_enter(&tx->tx_sync_lock);
++	if (tx->tx_open_txg > txg || tx->tx_synced_txg == txg-1) {
++		mutex_exit(&tx->tx_sync_lock);
++		return;
++	}
++
++	while (ddi_get_lbolt() < timeout &&
++	    tx->tx_syncing_txg < txg-1 && !txg_stalled(dp))
++		(void) cv_timedwait(&tx->tx_quiesce_more_cv, &tx->tx_sync_lock,
++		    timeout);
++
++	DMU_TX_STAT_BUMP(dmu_tx_delay);
++
++	mutex_exit(&tx->tx_sync_lock);
++}
++
++void
++txg_wait_synced(dsl_pool_t *dp, uint64_t txg)
++{
++	tx_state_t *tx = &dp->dp_tx;
++
++	mutex_enter(&tx->tx_sync_lock);
++	ASSERT(tx->tx_threads == 2);
++	if (txg == 0)
++		txg = tx->tx_open_txg + TXG_DEFER_SIZE;
++	if (tx->tx_sync_txg_waiting < txg)
++		tx->tx_sync_txg_waiting = txg;
++	dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
++	    txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting);
++	while (tx->tx_synced_txg < txg) {
++		dprintf("broadcasting sync more "
++		    "tx_synced=%llu waiting=%llu dp=%p\n",
++		    tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
++		cv_broadcast(&tx->tx_sync_more_cv);
++		cv_wait(&tx->tx_sync_done_cv, &tx->tx_sync_lock);
++	}
++	mutex_exit(&tx->tx_sync_lock);
++}
++
++void
++txg_wait_open(dsl_pool_t *dp, uint64_t txg)
++{
++	tx_state_t *tx = &dp->dp_tx;
++
++	mutex_enter(&tx->tx_sync_lock);
++	ASSERT(tx->tx_threads == 2);
++	if (txg == 0)
++		txg = tx->tx_open_txg + 1;
++	if (tx->tx_quiesce_txg_waiting < txg)
++		tx->tx_quiesce_txg_waiting = txg;
++	dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
++	    txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting);
++	while (tx->tx_open_txg < txg) {
++		cv_broadcast(&tx->tx_quiesce_more_cv);
++		cv_wait(&tx->tx_quiesce_done_cv, &tx->tx_sync_lock);
++	}
++	mutex_exit(&tx->tx_sync_lock);
++}
++
++boolean_t
++txg_stalled(dsl_pool_t *dp)
++{
++	tx_state_t *tx = &dp->dp_tx;
++	return (tx->tx_quiesce_txg_waiting > tx->tx_open_txg);
++}
++
++boolean_t
++txg_sync_waiting(dsl_pool_t *dp)
++{
++	tx_state_t *tx = &dp->dp_tx;
++
++	return (tx->tx_syncing_txg <= tx->tx_sync_txg_waiting ||
++	    tx->tx_quiesced_txg != 0);
++}
++
++/*
++ * Per-txg object lists.
++ */
++void
++txg_list_create(txg_list_t *tl, size_t offset)
++{
++	int t;
++
++	mutex_init(&tl->tl_lock, NULL, MUTEX_DEFAULT, NULL);
++
++	tl->tl_offset = offset;
++
++	for (t = 0; t < TXG_SIZE; t++)
++		tl->tl_head[t] = NULL;
++}
++
++void
++txg_list_destroy(txg_list_t *tl)
++{
++	int t;
++
++	for (t = 0; t < TXG_SIZE; t++)
++		ASSERT(txg_list_empty(tl, t));
++
++	mutex_destroy(&tl->tl_lock);
++}
++
++int
++txg_list_empty(txg_list_t *tl, uint64_t txg)
++{
++	return (tl->tl_head[txg & TXG_MASK] == NULL);
++}
++
++/*
++ * Add an entry to the list.
++ * Returns 0 if it's a new entry, 1 if it's already there.
++ */
++int
++txg_list_add(txg_list_t *tl, void *p, uint64_t txg)
++{
++	int t = txg & TXG_MASK;
++	txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
++	int already_on_list;
++
++	mutex_enter(&tl->tl_lock);
++	already_on_list = tn->tn_member[t];
++	if (!already_on_list) {
++		tn->tn_member[t] = 1;
++		tn->tn_next[t] = tl->tl_head[t];
++		tl->tl_head[t] = tn;
++	}
++	mutex_exit(&tl->tl_lock);
++
++	return (already_on_list);
++}
++
++/*
++ * Add an entry to the end of the list (walks list to find end).
++ * Returns 0 if it's a new entry, 1 if it's already there.
++ */
++int
++txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg)
++{
++	int t = txg & TXG_MASK;
++	txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
++	int already_on_list;
++
++	mutex_enter(&tl->tl_lock);
++	already_on_list = tn->tn_member[t];
++	if (!already_on_list) {
++		txg_node_t **tp;
++
++		for (tp = &tl->tl_head[t]; *tp != NULL; tp = &(*tp)->tn_next[t])
++			continue;
++
++		tn->tn_member[t] = 1;
++		tn->tn_next[t] = NULL;
++		*tp = tn;
++	}
++	mutex_exit(&tl->tl_lock);
++
++	return (already_on_list);
++}
++
++/*
++ * Remove the head of the list and return it.
++ */
++void *
++txg_list_remove(txg_list_t *tl, uint64_t txg)
++{
++	int t = txg & TXG_MASK;
++	txg_node_t *tn;
++	void *p = NULL;
++
++	mutex_enter(&tl->tl_lock);
++	if ((tn = tl->tl_head[t]) != NULL) {
++		p = (char *)tn - tl->tl_offset;
++		tl->tl_head[t] = tn->tn_next[t];
++		tn->tn_next[t] = NULL;
++		tn->tn_member[t] = 0;
++	}
++	mutex_exit(&tl->tl_lock);
++
++	return (p);
++}
++
++/*
++ * Remove a specific item from the list and return it.
++ */
++void *
++txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg)
++{
++	int t = txg & TXG_MASK;
++	txg_node_t *tn, **tp;
++
++	mutex_enter(&tl->tl_lock);
++
++	for (tp = &tl->tl_head[t]; (tn = *tp) != NULL; tp = &tn->tn_next[t]) {
++		if ((char *)tn - tl->tl_offset == p) {
++			*tp = tn->tn_next[t];
++			tn->tn_next[t] = NULL;
++			tn->tn_member[t] = 0;
++			mutex_exit(&tl->tl_lock);
++			return (p);
++		}
++	}
++
++	mutex_exit(&tl->tl_lock);
++
++	return (NULL);
++}
++
++int
++txg_list_member(txg_list_t *tl, void *p, uint64_t txg)
++{
++	int t = txg & TXG_MASK;
++	txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
++
++	return (tn->tn_member[t]);
++}
++
++/*
++ * Walk a txg list -- only safe if you know it's not changing.
++ */
++void *
++txg_list_head(txg_list_t *tl, uint64_t txg)
++{
++	int t = txg & TXG_MASK;
++	txg_node_t *tn = tl->tl_head[t];
++
++	return (tn == NULL ? NULL : (char *)tn - tl->tl_offset);
++}
++
++void *
++txg_list_next(txg_list_t *tl, void *p, uint64_t txg)
++{
++	int t = txg & TXG_MASK;
++	txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
++
++	tn = tn->tn_next[t];
++
++	return (tn == NULL ? NULL : (char *)tn - tl->tl_offset);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(txg_init);
++EXPORT_SYMBOL(txg_fini);
++EXPORT_SYMBOL(txg_sync_start);
++EXPORT_SYMBOL(txg_sync_stop);
++EXPORT_SYMBOL(txg_hold_open);
++EXPORT_SYMBOL(txg_rele_to_quiesce);
++EXPORT_SYMBOL(txg_rele_to_sync);
++EXPORT_SYMBOL(txg_register_callbacks);
++EXPORT_SYMBOL(txg_delay);
++EXPORT_SYMBOL(txg_wait_synced);
++EXPORT_SYMBOL(txg_wait_open);
++EXPORT_SYMBOL(txg_wait_callbacks);
++EXPORT_SYMBOL(txg_stalled);
++EXPORT_SYMBOL(txg_sync_waiting);
++
++module_param(zfs_txg_timeout, int, 0644);
++MODULE_PARM_DESC(zfs_txg_timeout, "Max seconds worth of delta per txg");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/uberblock.c linux-3.2.33-go/fs/zfs/zfs/uberblock.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/uberblock.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/uberblock.c	2012-11-16 23:25:34.349039334 +0100
+@@ -0,0 +1,61 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/uberblock_impl.h>
++#include <sys/vdev_impl.h>
++
++int
++uberblock_verify(uberblock_t *ub)
++{
++	if (ub->ub_magic == BSWAP_64((uint64_t)UBERBLOCK_MAGIC))
++		byteswap_uint64_array(ub, sizeof (uberblock_t));
++
++	if (ub->ub_magic != UBERBLOCK_MAGIC)
++		return (EINVAL);
++
++	return (0);
++}
++
++/*
++ * Update the uberblock and return a boolean value indicating whether
++ * anything changed in this transaction group.
++ */
++int
++uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg)
++{
++	ASSERT(ub->ub_txg < txg);
++
++	/*
++	 * We explicitly do not set ub_version here, so that older versions
++	 * continue to be written with the previous uberblock version.
++	 */
++	ub->ub_magic = UBERBLOCK_MAGIC;
++	ub->ub_txg = txg;
++	ub->ub_guid_sum = rvd->vdev_guid_sum;
++	ub->ub_timestamp = gethrestime_sec();
++	ub->ub_software_version = SPA_VERSION;
++
++	return (ub->ub_rootbp.blk_birth == txg);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/unique.c linux-3.2.33-go/fs/zfs/zfs/unique.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/unique.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/unique.c	2012-11-16 23:25:34.352039300 +0100
+@@ -0,0 +1,116 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++
++
++#include <sys/zfs_context.h>
++#include <sys/avl.h>
++#include <sys/unique.h>
++
++static avl_tree_t unique_avl;
++static kmutex_t unique_mtx;
++
++typedef struct unique {
++	avl_node_t un_link;
++	uint64_t un_value;
++} unique_t;
++
++#define	UNIQUE_MASK ((1ULL << UNIQUE_BITS) - 1)
++
++static int
++unique_compare(const void *a, const void *b)
++{
++	const unique_t *una = a;
++	const unique_t *unb = b;
++
++	if (una->un_value < unb->un_value)
++		return (-1);
++	if (una->un_value > unb->un_value)
++		return (+1);
++	return (0);
++}
++
++void
++unique_init(void)
++{
++	avl_create(&unique_avl, unique_compare,
++	    sizeof (unique_t), offsetof(unique_t, un_link));
++	mutex_init(&unique_mtx, NULL, MUTEX_DEFAULT, NULL);
++}
++
++void
++unique_fini(void)
++{
++	avl_destroy(&unique_avl);
++	mutex_destroy(&unique_mtx);
++}
++
++uint64_t
++unique_create(void)
++{
++	uint64_t value = unique_insert(0);
++	unique_remove(value);
++	return (value);
++}
++
++uint64_t
++unique_insert(uint64_t value)
++{
++	avl_index_t idx;
++	unique_t *un = kmem_alloc(sizeof (unique_t), KM_PUSHPAGE);
++
++	un->un_value = value;
++
++	mutex_enter(&unique_mtx);
++	while (un->un_value == 0 || un->un_value & ~UNIQUE_MASK ||
++	    avl_find(&unique_avl, un, &idx)) {
++		mutex_exit(&unique_mtx);
++		(void) random_get_pseudo_bytes((void*)&un->un_value,
++		    sizeof (un->un_value));
++		un->un_value &= UNIQUE_MASK;
++		mutex_enter(&unique_mtx);
++	}
++
++	avl_insert(&unique_avl, un, idx);
++	mutex_exit(&unique_mtx);
++
++	return (un->un_value);
++}
++
++void
++unique_remove(uint64_t value)
++{
++	unique_t un_tofind;
++	unique_t *un;
++
++	un_tofind.un_value = value;
++	mutex_enter(&unique_mtx);
++	un = avl_find(&unique_avl, &un_tofind, NULL);
++	if (un != NULL) {
++		avl_remove(&unique_avl, un);
++		kmem_free(un, sizeof (unique_t));
++	}
++	mutex_exit(&unique_mtx);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/vdev.c linux-3.2.33-go/fs/zfs/zfs/vdev.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/vdev.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/vdev.c	2012-11-16 23:25:34.349039334 +0100
+@@ -0,0 +1,3207 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/fm/fs/zfs.h>
++#include <sys/spa.h>
++#include <sys/spa_impl.h>
++#include <sys/dmu.h>
++#include <sys/dmu_tx.h>
++#include <sys/vdev_impl.h>
++#include <sys/uberblock_impl.h>
++#include <sys/metaslab.h>
++#include <sys/metaslab_impl.h>
++#include <sys/space_map.h>
++#include <sys/zio.h>
++#include <sys/zap.h>
++#include <sys/fs/zfs.h>
++#include <sys/arc.h>
++#include <sys/zil.h>
++#include <sys/dsl_scan.h>
++
++/*
++ * Virtual device management.
++ */
++
++static vdev_ops_t *vdev_ops_table[] = {
++	&vdev_root_ops,
++	&vdev_raidz_ops,
++	&vdev_mirror_ops,
++	&vdev_replacing_ops,
++	&vdev_spare_ops,
++	&vdev_disk_ops,
++	&vdev_file_ops,
++	&vdev_missing_ops,
++	&vdev_hole_ops,
++	NULL
++};
++
++/* maximum scrub/resilver I/O queue per leaf vdev */
++int zfs_scrub_limit = 10;
++
++/*
++ * Given a vdev type, return the appropriate ops vector.
++ */
++static vdev_ops_t *
++vdev_getops(const char *type)
++{
++	vdev_ops_t *ops, **opspp;
++
++	for (opspp = vdev_ops_table; (ops = *opspp) != NULL; opspp++)
++		if (strcmp(ops->vdev_op_type, type) == 0)
++			break;
++
++	return (ops);
++}
++
++/*
++ * Default asize function: return the MAX of psize with the asize of
++ * all children.  This is what's used by anything other than RAID-Z.
++ */
++uint64_t
++vdev_default_asize(vdev_t *vd, uint64_t psize)
++{
++	uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_top->vdev_ashift);
++	uint64_t csize;
++	int c;
++
++	for (c = 0; c < vd->vdev_children; c++) {
++		csize = vdev_psize_to_asize(vd->vdev_child[c], psize);
++		asize = MAX(asize, csize);
++	}
++
++	return (asize);
++}
++
++/*
++ * Get the minimum allocatable size. We define the allocatable size as
++ * the vdev's asize rounded to the nearest metaslab. This allows us to
++ * replace or attach devices which don't have the same physical size but
++ * can still satisfy the same number of allocations.
++ */
++uint64_t
++vdev_get_min_asize(vdev_t *vd)
++{
++	vdev_t *pvd = vd->vdev_parent;
++
++	/*
++	 * If our parent is NULL (inactive spare or cache) or is the root,
++	 * just return our own asize.
++	 */
++	if (pvd == NULL)
++		return (vd->vdev_asize);
++
++	/*
++	 * The top-level vdev just returns the allocatable size rounded
++	 * to the nearest metaslab.
++	 */
++	if (vd == vd->vdev_top)
++		return (P2ALIGN(vd->vdev_asize, 1ULL << vd->vdev_ms_shift));
++
++	/*
++	 * The allocatable space for a raidz vdev is N * sizeof(smallest child),
++	 * so each child must provide at least 1/Nth of its asize.
++	 */
++	if (pvd->vdev_ops == &vdev_raidz_ops)
++		return (pvd->vdev_min_asize / pvd->vdev_children);
++
++	return (pvd->vdev_min_asize);
++}
++
++void
++vdev_set_min_asize(vdev_t *vd)
++{
++	int c;
++	vd->vdev_min_asize = vdev_get_min_asize(vd);
++
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_set_min_asize(vd->vdev_child[c]);
++}
++
++vdev_t *
++vdev_lookup_top(spa_t *spa, uint64_t vdev)
++{
++	vdev_t *rvd = spa->spa_root_vdev;
++
++	ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
++
++	if (vdev < rvd->vdev_children) {
++		ASSERT(rvd->vdev_child[vdev] != NULL);
++		return (rvd->vdev_child[vdev]);
++	}
++
++	return (NULL);
++}
++
++vdev_t *
++vdev_lookup_by_guid(vdev_t *vd, uint64_t guid)
++{
++	vdev_t *mvd;
++	int c;
++
++	if (vd->vdev_guid == guid)
++		return (vd);
++
++	for (c = 0; c < vd->vdev_children; c++)
++		if ((mvd = vdev_lookup_by_guid(vd->vdev_child[c], guid)) !=
++		    NULL)
++			return (mvd);
++
++	return (NULL);
++}
++
++void
++vdev_add_child(vdev_t *pvd, vdev_t *cvd)
++{
++	size_t oldsize, newsize;
++	uint64_t id = cvd->vdev_id;
++	vdev_t **newchild;
++
++	ASSERT(spa_config_held(cvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL);
++	ASSERT(cvd->vdev_parent == NULL);
++
++	cvd->vdev_parent = pvd;
++
++	if (pvd == NULL)
++		return;
++
++	ASSERT(id >= pvd->vdev_children || pvd->vdev_child[id] == NULL);
++
++	oldsize = pvd->vdev_children * sizeof (vdev_t *);
++	pvd->vdev_children = MAX(pvd->vdev_children, id + 1);
++	newsize = pvd->vdev_children * sizeof (vdev_t *);
++
++	newchild = kmem_zalloc(newsize, KM_PUSHPAGE);
++	if (pvd->vdev_child != NULL) {
++		bcopy(pvd->vdev_child, newchild, oldsize);
++		kmem_free(pvd->vdev_child, oldsize);
++	}
++
++	pvd->vdev_child = newchild;
++	pvd->vdev_child[id] = cvd;
++
++	cvd->vdev_top = (pvd->vdev_top ? pvd->vdev_top: cvd);
++	ASSERT(cvd->vdev_top->vdev_parent->vdev_parent == NULL);
++
++	/*
++	 * Walk up all ancestors to update guid sum.
++	 */
++	for (; pvd != NULL; pvd = pvd->vdev_parent)
++		pvd->vdev_guid_sum += cvd->vdev_guid_sum;
++}
++
++void
++vdev_remove_child(vdev_t *pvd, vdev_t *cvd)
++{
++	int c;
++	uint_t id = cvd->vdev_id;
++
++	ASSERT(cvd->vdev_parent == pvd);
++
++	if (pvd == NULL)
++		return;
++
++	ASSERT(id < pvd->vdev_children);
++	ASSERT(pvd->vdev_child[id] == cvd);
++
++	pvd->vdev_child[id] = NULL;
++	cvd->vdev_parent = NULL;
++
++	for (c = 0; c < pvd->vdev_children; c++)
++		if (pvd->vdev_child[c])
++			break;
++
++	if (c == pvd->vdev_children) {
++		kmem_free(pvd->vdev_child, c * sizeof (vdev_t *));
++		pvd->vdev_child = NULL;
++		pvd->vdev_children = 0;
++	}
++
++	/*
++	 * Walk up all ancestors to update guid sum.
++	 */
++	for (; pvd != NULL; pvd = pvd->vdev_parent)
++		pvd->vdev_guid_sum -= cvd->vdev_guid_sum;
++}
++
++/*
++ * Remove any holes in the child array.
++ */
++void
++vdev_compact_children(vdev_t *pvd)
++{
++	vdev_t **newchild, *cvd;
++	int oldc = pvd->vdev_children;
++	int newc;
++	int c;
++
++	ASSERT(spa_config_held(pvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL);
++
++	for (c = newc = 0; c < oldc; c++)
++		if (pvd->vdev_child[c])
++			newc++;
++
++	newchild = kmem_alloc(newc * sizeof (vdev_t *), KM_PUSHPAGE);
++
++	for (c = newc = 0; c < oldc; c++) {
++		if ((cvd = pvd->vdev_child[c]) != NULL) {
++			newchild[newc] = cvd;
++			cvd->vdev_id = newc++;
++		}
++	}
++
++	kmem_free(pvd->vdev_child, oldc * sizeof (vdev_t *));
++	pvd->vdev_child = newchild;
++	pvd->vdev_children = newc;
++}
++
++/*
++ * Allocate and minimally initialize a vdev_t.
++ */
++vdev_t *
++vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
++{
++	vdev_t *vd;
++	int t;
++
++	vd = kmem_zalloc(sizeof (vdev_t), KM_PUSHPAGE);
++
++	if (spa->spa_root_vdev == NULL) {
++		ASSERT(ops == &vdev_root_ops);
++		spa->spa_root_vdev = vd;
++		spa->spa_load_guid = spa_generate_guid(NULL);
++	}
++
++	if (guid == 0 && ops != &vdev_hole_ops) {
++		if (spa->spa_root_vdev == vd) {
++			/*
++			 * The root vdev's guid will also be the pool guid,
++			 * which must be unique among all pools.
++			 */
++			guid = spa_generate_guid(NULL);
++		} else {
++			/*
++			 * Any other vdev's guid must be unique within the pool.
++			 */
++			guid = spa_generate_guid(spa);
++		}
++		ASSERT(!spa_guid_exists(spa_guid(spa), guid));
++	}
++
++	vd->vdev_spa = spa;
++	vd->vdev_id = id;
++	vd->vdev_guid = guid;
++	vd->vdev_guid_sum = guid;
++	vd->vdev_ops = ops;
++	vd->vdev_state = VDEV_STATE_CLOSED;
++	vd->vdev_ishole = (ops == &vdev_hole_ops);
++
++	list_link_init(&vd->vdev_config_dirty_node);
++	list_link_init(&vd->vdev_state_dirty_node);
++	mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL);
++	for (t = 0; t < DTL_TYPES; t++) {
++		space_map_create(&vd->vdev_dtl[t], 0, -1ULL, 0,
++		    &vd->vdev_dtl_lock);
++	}
++	txg_list_create(&vd->vdev_ms_list,
++	    offsetof(struct metaslab, ms_txg_node));
++	txg_list_create(&vd->vdev_dtl_list,
++	    offsetof(struct vdev, vdev_dtl_node));
++	vd->vdev_stat.vs_timestamp = gethrtime();
++	vdev_queue_init(vd);
++	vdev_cache_init(vd);
++
++	return (vd);
++}
++
++/*
++ * Allocate a new vdev.  The 'alloctype' is used to control whether we are
++ * creating a new vdev or loading an existing one - the behavior is slightly
++ * different for each case.
++ */
++int
++vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
++    int alloctype)
++{
++	vdev_ops_t *ops;
++	char *type;
++	uint64_t guid = 0, islog, nparity;
++	vdev_t *vd;
++
++	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
++
++	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
++		return (EINVAL);
++
++	if ((ops = vdev_getops(type)) == NULL)
++		return (EINVAL);
++
++	/*
++	 * If this is a load, get the vdev guid from the nvlist.
++	 * Otherwise, vdev_alloc_common() will generate one for us.
++	 */
++	if (alloctype == VDEV_ALLOC_LOAD) {
++		uint64_t label_id;
++
++		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &label_id) ||
++		    label_id != id)
++			return (EINVAL);
++
++		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
++			return (EINVAL);
++	} else if (alloctype == VDEV_ALLOC_SPARE) {
++		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
++			return (EINVAL);
++	} else if (alloctype == VDEV_ALLOC_L2CACHE) {
++		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
++			return (EINVAL);
++	} else if (alloctype == VDEV_ALLOC_ROOTPOOL) {
++		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
++			return (EINVAL);
++	}
++
++	/*
++	 * The first allocated vdev must be of type 'root'.
++	 */
++	if (ops != &vdev_root_ops && spa->spa_root_vdev == NULL)
++		return (EINVAL);
++
++	/*
++	 * Determine whether we're a log vdev.
++	 */
++	islog = 0;
++	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &islog);
++	if (islog && spa_version(spa) < SPA_VERSION_SLOGS)
++		return (ENOTSUP);
++
++	if (ops == &vdev_hole_ops && spa_version(spa) < SPA_VERSION_HOLES)
++		return (ENOTSUP);
++
++	/*
++	 * Set the nparity property for RAID-Z vdevs.
++	 */
++	nparity = -1ULL;
++	if (ops == &vdev_raidz_ops) {
++		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
++		    &nparity) == 0) {
++			if (nparity == 0 || nparity > VDEV_RAIDZ_MAXPARITY)
++				return (EINVAL);
++			/*
++			 * Previous versions could only support 1 or 2 parity
++			 * device.
++			 */
++			if (nparity > 1 &&
++			    spa_version(spa) < SPA_VERSION_RAIDZ2)
++				return (ENOTSUP);
++			if (nparity > 2 &&
++			    spa_version(spa) < SPA_VERSION_RAIDZ3)
++				return (ENOTSUP);
++		} else {
++			/*
++			 * We require the parity to be specified for SPAs that
++			 * support multiple parity levels.
++			 */
++			if (spa_version(spa) >= SPA_VERSION_RAIDZ2)
++				return (EINVAL);
++			/*
++			 * Otherwise, we default to 1 parity device for RAID-Z.
++			 */
++			nparity = 1;
++		}
++	} else {
++		nparity = 0;
++	}
++	ASSERT(nparity != -1ULL);
++
++	vd = vdev_alloc_common(spa, id, guid, ops);
++
++	vd->vdev_islog = islog;
++	vd->vdev_nparity = nparity;
++
++	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0)
++		vd->vdev_path = spa_strdup(vd->vdev_path);
++	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0)
++		vd->vdev_devid = spa_strdup(vd->vdev_devid);
++	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH,
++	    &vd->vdev_physpath) == 0)
++		vd->vdev_physpath = spa_strdup(vd->vdev_physpath);
++	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &vd->vdev_fru) == 0)
++		vd->vdev_fru = spa_strdup(vd->vdev_fru);
++
++	/*
++	 * Set the whole_disk property.  If it's not specified, leave the value
++	 * as -1.
++	 */
++	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
++	    &vd->vdev_wholedisk) != 0)
++		vd->vdev_wholedisk = -1ULL;
++
++	/*
++	 * Look for the 'not present' flag.  This will only be set if the device
++	 * was not present at the time of import.
++	 */
++	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
++	    &vd->vdev_not_present);
++
++	/*
++	 * Get the alignment requirement.
++	 */
++	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift);
++
++	/*
++	 * Retrieve the vdev creation time.
++	 */
++	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_CREATE_TXG,
++	    &vd->vdev_crtxg);
++
++	/*
++	 * If we're a top-level vdev, try to load the allocation parameters.
++	 */
++	if (parent && !parent->vdev_parent &&
++	    (alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_SPLIT)) {
++		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
++		    &vd->vdev_ms_array);
++		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT,
++		    &vd->vdev_ms_shift);
++		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASIZE,
++		    &vd->vdev_asize);
++		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVING,
++		    &vd->vdev_removing);
++	}
++
++	if (parent && !parent->vdev_parent && alloctype != VDEV_ALLOC_ATTACH) {
++		ASSERT(alloctype == VDEV_ALLOC_LOAD ||
++		    alloctype == VDEV_ALLOC_ADD ||
++		    alloctype == VDEV_ALLOC_SPLIT ||
++		    alloctype == VDEV_ALLOC_ROOTPOOL);
++		vd->vdev_mg = metaslab_group_create(islog ?
++		    spa_log_class(spa) : spa_normal_class(spa), vd);
++	}
++
++	/*
++	 * If we're a leaf vdev, try to load the DTL object and other state.
++	 */
++	if (vd->vdev_ops->vdev_op_leaf &&
++	    (alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_L2CACHE ||
++	    alloctype == VDEV_ALLOC_ROOTPOOL)) {
++		if (alloctype == VDEV_ALLOC_LOAD) {
++			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DTL,
++			    &vd->vdev_dtl_smo.smo_object);
++			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_UNSPARE,
++			    &vd->vdev_unspare);
++		}
++
++		if (alloctype == VDEV_ALLOC_ROOTPOOL) {
++			uint64_t spare = 0;
++
++			if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
++			    &spare) == 0 && spare)
++				spa_spare_add(vd);
++		}
++
++		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE,
++		    &vd->vdev_offline);
++
++		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVERING,
++		    &vd->vdev_resilvering);
++
++		/*
++		 * When importing a pool, we want to ignore the persistent fault
++		 * state, as the diagnosis made on another system may not be
++		 * valid in the current context.  Local vdevs will
++		 * remain in the faulted state.
++		 */
++		if (spa_load_state(spa) == SPA_LOAD_OPEN) {
++			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED,
++			    &vd->vdev_faulted);
++			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DEGRADED,
++			    &vd->vdev_degraded);
++			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED,
++			    &vd->vdev_removed);
++
++			if (vd->vdev_faulted || vd->vdev_degraded) {
++				char *aux;
++
++				vd->vdev_label_aux =
++				    VDEV_AUX_ERR_EXCEEDED;
++				if (nvlist_lookup_string(nv,
++				    ZPOOL_CONFIG_AUX_STATE, &aux) == 0 &&
++				    strcmp(aux, "external") == 0)
++					vd->vdev_label_aux = VDEV_AUX_EXTERNAL;
++			}
++		}
++	}
++
++	/*
++	 * Add ourselves to the parent's list of children.
++	 */
++	vdev_add_child(parent, vd);
++
++	*vdp = vd;
++
++	return (0);
++}
++
++void
++vdev_free(vdev_t *vd)
++{
++	int c, t;
++	spa_t *spa = vd->vdev_spa;
++
++	/*
++	 * vdev_free() implies closing the vdev first.  This is simpler than
++	 * trying to ensure complicated semantics for all callers.
++	 */
++	vdev_close(vd);
++
++	ASSERT(!list_link_active(&vd->vdev_config_dirty_node));
++	ASSERT(!list_link_active(&vd->vdev_state_dirty_node));
++
++	/*
++	 * Free all children.
++	 */
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_free(vd->vdev_child[c]);
++
++	ASSERT(vd->vdev_child == NULL);
++	ASSERT(vd->vdev_guid_sum == vd->vdev_guid);
++
++	/*
++	 * Discard allocation state.
++	 */
++	if (vd->vdev_mg != NULL) {
++		vdev_metaslab_fini(vd);
++		metaslab_group_destroy(vd->vdev_mg);
++	}
++
++	ASSERT3U(vd->vdev_stat.vs_space, ==, 0);
++	ASSERT3U(vd->vdev_stat.vs_dspace, ==, 0);
++	ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0);
++
++	/*
++	 * Remove this vdev from its parent's child list.
++	 */
++	vdev_remove_child(vd->vdev_parent, vd);
++
++	ASSERT(vd->vdev_parent == NULL);
++
++	/*
++	 * Clean up vdev structure.
++	 */
++	vdev_queue_fini(vd);
++	vdev_cache_fini(vd);
++
++	if (vd->vdev_path)
++		spa_strfree(vd->vdev_path);
++	if (vd->vdev_devid)
++		spa_strfree(vd->vdev_devid);
++	if (vd->vdev_physpath)
++		spa_strfree(vd->vdev_physpath);
++	if (vd->vdev_fru)
++		spa_strfree(vd->vdev_fru);
++
++	if (vd->vdev_isspare)
++		spa_spare_remove(vd);
++	if (vd->vdev_isl2cache)
++		spa_l2cache_remove(vd);
++
++	txg_list_destroy(&vd->vdev_ms_list);
++	txg_list_destroy(&vd->vdev_dtl_list);
++
++	mutex_enter(&vd->vdev_dtl_lock);
++	for (t = 0; t < DTL_TYPES; t++) {
++		space_map_unload(&vd->vdev_dtl[t]);
++		space_map_destroy(&vd->vdev_dtl[t]);
++	}
++	mutex_exit(&vd->vdev_dtl_lock);
++
++	mutex_destroy(&vd->vdev_dtl_lock);
++	mutex_destroy(&vd->vdev_stat_lock);
++	mutex_destroy(&vd->vdev_probe_lock);
++
++	if (vd == spa->spa_root_vdev)
++		spa->spa_root_vdev = NULL;
++
++	kmem_free(vd, sizeof (vdev_t));
++}
++
++/*
++ * Transfer top-level vdev state from svd to tvd.
++ */
++static void
++vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
++{
++	spa_t *spa = svd->vdev_spa;
++	metaslab_t *msp;
++	vdev_t *vd;
++	int t;
++
++	ASSERT(tvd == tvd->vdev_top);
++
++	tvd->vdev_ms_array = svd->vdev_ms_array;
++	tvd->vdev_ms_shift = svd->vdev_ms_shift;
++	tvd->vdev_ms_count = svd->vdev_ms_count;
++
++	svd->vdev_ms_array = 0;
++	svd->vdev_ms_shift = 0;
++	svd->vdev_ms_count = 0;
++
++	if (tvd->vdev_mg)
++		ASSERT3P(tvd->vdev_mg, ==, svd->vdev_mg);
++	tvd->vdev_mg = svd->vdev_mg;
++	tvd->vdev_ms = svd->vdev_ms;
++
++	svd->vdev_mg = NULL;
++	svd->vdev_ms = NULL;
++
++	if (tvd->vdev_mg != NULL)
++		tvd->vdev_mg->mg_vd = tvd;
++
++	tvd->vdev_stat.vs_alloc = svd->vdev_stat.vs_alloc;
++	tvd->vdev_stat.vs_space = svd->vdev_stat.vs_space;
++	tvd->vdev_stat.vs_dspace = svd->vdev_stat.vs_dspace;
++
++	svd->vdev_stat.vs_alloc = 0;
++	svd->vdev_stat.vs_space = 0;
++	svd->vdev_stat.vs_dspace = 0;
++
++	for (t = 0; t < TXG_SIZE; t++) {
++		while ((msp = txg_list_remove(&svd->vdev_ms_list, t)) != NULL)
++			(void) txg_list_add(&tvd->vdev_ms_list, msp, t);
++		while ((vd = txg_list_remove(&svd->vdev_dtl_list, t)) != NULL)
++			(void) txg_list_add(&tvd->vdev_dtl_list, vd, t);
++		if (txg_list_remove_this(&spa->spa_vdev_txg_list, svd, t))
++			(void) txg_list_add(&spa->spa_vdev_txg_list, tvd, t);
++	}
++
++	if (list_link_active(&svd->vdev_config_dirty_node)) {
++		vdev_config_clean(svd);
++		vdev_config_dirty(tvd);
++	}
++
++	if (list_link_active(&svd->vdev_state_dirty_node)) {
++		vdev_state_clean(svd);
++		vdev_state_dirty(tvd);
++	}
++
++	tvd->vdev_deflate_ratio = svd->vdev_deflate_ratio;
++	svd->vdev_deflate_ratio = 0;
++
++	tvd->vdev_islog = svd->vdev_islog;
++	svd->vdev_islog = 0;
++}
++
++static void
++vdev_top_update(vdev_t *tvd, vdev_t *vd)
++{
++	int c;
++
++	if (vd == NULL)
++		return;
++
++	vd->vdev_top = tvd;
++
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_top_update(tvd, vd->vdev_child[c]);
++}
++
++/*
++ * Add a mirror/replacing vdev above an existing vdev.
++ */
++vdev_t *
++vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops)
++{
++	spa_t *spa = cvd->vdev_spa;
++	vdev_t *pvd = cvd->vdev_parent;
++	vdev_t *mvd;
++
++	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
++
++	mvd = vdev_alloc_common(spa, cvd->vdev_id, 0, ops);
++
++	mvd->vdev_asize = cvd->vdev_asize;
++	mvd->vdev_min_asize = cvd->vdev_min_asize;
++	mvd->vdev_max_asize = cvd->vdev_max_asize;
++	mvd->vdev_ashift = cvd->vdev_ashift;
++	mvd->vdev_state = cvd->vdev_state;
++	mvd->vdev_crtxg = cvd->vdev_crtxg;
++
++	vdev_remove_child(pvd, cvd);
++	vdev_add_child(pvd, mvd);
++	cvd->vdev_id = mvd->vdev_children;
++	vdev_add_child(mvd, cvd);
++	vdev_top_update(cvd->vdev_top, cvd->vdev_top);
++
++	if (mvd == mvd->vdev_top)
++		vdev_top_transfer(cvd, mvd);
++
++	return (mvd);
++}
++
++/*
++ * Remove a 1-way mirror/replacing vdev from the tree.
++ */
++void
++vdev_remove_parent(vdev_t *cvd)
++{
++	vdev_t *mvd = cvd->vdev_parent;
++	vdev_t *pvd = mvd->vdev_parent;
++
++	ASSERT(spa_config_held(cvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL);
++
++	ASSERT(mvd->vdev_children == 1);
++	ASSERT(mvd->vdev_ops == &vdev_mirror_ops ||
++	    mvd->vdev_ops == &vdev_replacing_ops ||
++	    mvd->vdev_ops == &vdev_spare_ops);
++	cvd->vdev_ashift = mvd->vdev_ashift;
++
++	vdev_remove_child(mvd, cvd);
++	vdev_remove_child(pvd, mvd);
++
++	/*
++	 * If cvd will replace mvd as a top-level vdev, preserve mvd's guid.
++	 * Otherwise, we could have detached an offline device, and when we
++	 * go to import the pool we'll think we have two top-level vdevs,
++	 * instead of a different version of the same top-level vdev.
++	 */
++	if (mvd->vdev_top == mvd) {
++		uint64_t guid_delta = mvd->vdev_guid - cvd->vdev_guid;
++		cvd->vdev_orig_guid = cvd->vdev_guid;
++		cvd->vdev_guid += guid_delta;
++		cvd->vdev_guid_sum += guid_delta;
++	}
++	cvd->vdev_id = mvd->vdev_id;
++	vdev_add_child(pvd, cvd);
++	vdev_top_update(cvd->vdev_top, cvd->vdev_top);
++
++	if (cvd == cvd->vdev_top)
++		vdev_top_transfer(mvd, cvd);
++
++	ASSERT(mvd->vdev_children == 0);
++	vdev_free(mvd);
++}
++
++int
++vdev_metaslab_init(vdev_t *vd, uint64_t txg)
++{
++	spa_t *spa = vd->vdev_spa;
++	objset_t *mos = spa->spa_meta_objset;
++	uint64_t m;
++	uint64_t oldc = vd->vdev_ms_count;
++	uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift;
++	metaslab_t **mspp;
++	int error;
++
++	ASSERT(txg == 0 || spa_config_held(spa, SCL_ALLOC, RW_WRITER));
++
++	/*
++	 * This vdev is not being allocated from yet or is a hole.
++	 */
++	if (vd->vdev_ms_shift == 0)
++		return (0);
++
++	ASSERT(!vd->vdev_ishole);
++
++	/*
++	 * Compute the raidz-deflation ratio.  Note, we hard-code
++	 * in 128k (1 << 17) because it is the current "typical" blocksize.
++	 * Even if SPA_MAXBLOCKSIZE changes, this algorithm must never change,
++	 * or we will inconsistently account for existing bp's.
++	 */
++	vd->vdev_deflate_ratio = (1 << 17) /
++	    (vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT);
++
++	ASSERT(oldc <= newc);
++
++	mspp = kmem_zalloc(newc * sizeof (*mspp), KM_PUSHPAGE | KM_NODEBUG);
++
++	if (oldc != 0) {
++		bcopy(vd->vdev_ms, mspp, oldc * sizeof (*mspp));
++		kmem_free(vd->vdev_ms, oldc * sizeof (*mspp));
++	}
++
++	vd->vdev_ms = mspp;
++	vd->vdev_ms_count = newc;
++
++	for (m = oldc; m < newc; m++) {
++		space_map_obj_t smo = { 0, 0, 0 };
++		if (txg == 0) {
++			uint64_t object = 0;
++			error = dmu_read(mos, vd->vdev_ms_array,
++			    m * sizeof (uint64_t), sizeof (uint64_t), &object,
++			    DMU_READ_PREFETCH);
++			if (error)
++				return (error);
++			if (object != 0) {
++				dmu_buf_t *db;
++				error = dmu_bonus_hold(mos, object, FTAG, &db);
++				if (error)
++					return (error);
++				ASSERT3U(db->db_size, >=, sizeof (smo));
++				bcopy(db->db_data, &smo, sizeof (smo));
++				ASSERT3U(smo.smo_object, ==, object);
++				dmu_buf_rele(db, FTAG);
++			}
++		}
++		vd->vdev_ms[m] = metaslab_init(vd->vdev_mg, &smo,
++		    m << vd->vdev_ms_shift, 1ULL << vd->vdev_ms_shift, txg);
++	}
++
++	if (txg == 0)
++		spa_config_enter(spa, SCL_ALLOC, FTAG, RW_WRITER);
++
++	/*
++	 * If the vdev is being removed we don't activate
++	 * the metaslabs since we want to ensure that no new
++	 * allocations are performed on this device.
++	 */
++	if (oldc == 0 && !vd->vdev_removing)
++		metaslab_group_activate(vd->vdev_mg);
++
++	if (txg == 0)
++		spa_config_exit(spa, SCL_ALLOC, FTAG);
++
++	return (0);
++}
++
++void
++vdev_metaslab_fini(vdev_t *vd)
++{
++	uint64_t m;
++	uint64_t count = vd->vdev_ms_count;
++
++	if (vd->vdev_ms != NULL) {
++		metaslab_group_passivate(vd->vdev_mg);
++		for (m = 0; m < count; m++)
++			if (vd->vdev_ms[m] != NULL)
++				metaslab_fini(vd->vdev_ms[m]);
++		kmem_free(vd->vdev_ms, count * sizeof (metaslab_t *));
++		vd->vdev_ms = NULL;
++	}
++
++	ASSERT3U(vd->vdev_pending_fastwrite, ==, 0);
++}
++
++typedef struct vdev_probe_stats {
++	boolean_t	vps_readable;
++	boolean_t	vps_writeable;
++	int		vps_flags;
++} vdev_probe_stats_t;
++
++static void
++vdev_probe_done(zio_t *zio)
++{
++	spa_t *spa = zio->io_spa;
++	vdev_t *vd = zio->io_vd;
++	vdev_probe_stats_t *vps = zio->io_private;
++
++	ASSERT(vd->vdev_probe_zio != NULL);
++
++	if (zio->io_type == ZIO_TYPE_READ) {
++		if (zio->io_error == 0)
++			vps->vps_readable = 1;
++		if (zio->io_error == 0 && spa_writeable(spa)) {
++			zio_nowait(zio_write_phys(vd->vdev_probe_zio, vd,
++			    zio->io_offset, zio->io_size, zio->io_data,
++			    ZIO_CHECKSUM_OFF, vdev_probe_done, vps,
++			    ZIO_PRIORITY_SYNC_WRITE, vps->vps_flags, B_TRUE));
++		} else {
++			zio_buf_free(zio->io_data, zio->io_size);
++		}
++	} else if (zio->io_type == ZIO_TYPE_WRITE) {
++		if (zio->io_error == 0)
++			vps->vps_writeable = 1;
++		zio_buf_free(zio->io_data, zio->io_size);
++	} else if (zio->io_type == ZIO_TYPE_NULL) {
++		zio_t *pio;
++
++		vd->vdev_cant_read |= !vps->vps_readable;
++		vd->vdev_cant_write |= !vps->vps_writeable;
++
++		if (vdev_readable(vd) &&
++		    (vdev_writeable(vd) || !spa_writeable(spa))) {
++			zio->io_error = 0;
++		} else {
++			ASSERT(zio->io_error != 0);
++			zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
++			    spa, vd, NULL, 0, 0);
++			zio->io_error = ENXIO;
++		}
++
++		mutex_enter(&vd->vdev_probe_lock);
++		ASSERT(vd->vdev_probe_zio == zio);
++		vd->vdev_probe_zio = NULL;
++		mutex_exit(&vd->vdev_probe_lock);
++
++		while ((pio = zio_walk_parents(zio)) != NULL)
++			if (!vdev_accessible(vd, pio))
++				pio->io_error = ENXIO;
++
++		kmem_free(vps, sizeof (*vps));
++	}
++}
++
++/*
++ * Determine whether this device is accessible by reading and writing
++ * to several known locations: the pad regions of each vdev label
++ * but the first (which we leave alone in case it contains a VTOC).
++ */
++zio_t *
++vdev_probe(vdev_t *vd, zio_t *zio)
++{
++	spa_t *spa = vd->vdev_spa;
++	vdev_probe_stats_t *vps = NULL;
++	zio_t *pio;
++	int l;
++
++	ASSERT(vd->vdev_ops->vdev_op_leaf);
++
++	/*
++	 * Don't probe the probe.
++	 */
++	if (zio && (zio->io_flags & ZIO_FLAG_PROBE))
++		return (NULL);
++
++	/*
++	 * To prevent 'probe storms' when a device fails, we create
++	 * just one probe i/o at a time.  All zios that want to probe
++	 * this vdev will become parents of the probe io.
++	 */
++	mutex_enter(&vd->vdev_probe_lock);
++
++	if ((pio = vd->vdev_probe_zio) == NULL) {
++		vps = kmem_zalloc(sizeof (*vps), KM_PUSHPAGE);
++
++		vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
++		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE |
++		    ZIO_FLAG_TRYHARD;
++
++		if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
++			/*
++			 * vdev_cant_read and vdev_cant_write can only
++			 * transition from TRUE to FALSE when we have the
++			 * SCL_ZIO lock as writer; otherwise they can only
++			 * transition from FALSE to TRUE.  This ensures that
++			 * any zio looking at these values can assume that
++			 * failures persist for the life of the I/O.  That's
++			 * important because when a device has intermittent
++			 * connectivity problems, we want to ensure that
++			 * they're ascribed to the device (ENXIO) and not
++			 * the zio (EIO).
++			 *
++			 * Since we hold SCL_ZIO as writer here, clear both
++			 * values so the probe can reevaluate from first
++			 * principles.
++			 */
++			vps->vps_flags |= ZIO_FLAG_CONFIG_WRITER;
++			vd->vdev_cant_read = B_FALSE;
++			vd->vdev_cant_write = B_FALSE;
++		}
++
++		vd->vdev_probe_zio = pio = zio_null(NULL, spa, vd,
++		    vdev_probe_done, vps,
++		    vps->vps_flags | ZIO_FLAG_DONT_PROPAGATE);
++
++		/*
++		 * We can't change the vdev state in this context, so we
++		 * kick off an async task to do it on our behalf.
++		 */
++		if (zio != NULL) {
++			vd->vdev_probe_wanted = B_TRUE;
++			spa_async_request(spa, SPA_ASYNC_PROBE);
++		}
++	}
++
++	if (zio != NULL)
++		zio_add_child(zio, pio);
++
++	mutex_exit(&vd->vdev_probe_lock);
++
++	if (vps == NULL) {
++		ASSERT(zio != NULL);
++		return (NULL);
++	}
++
++	for (l = 1; l < VDEV_LABELS; l++) {
++		zio_nowait(zio_read_phys(pio, vd,
++		    vdev_label_offset(vd->vdev_psize, l,
++		    offsetof(vdev_label_t, vl_pad2)),
++		    VDEV_PAD_SIZE, zio_buf_alloc(VDEV_PAD_SIZE),
++		    ZIO_CHECKSUM_OFF, vdev_probe_done, vps,
++		    ZIO_PRIORITY_SYNC_READ, vps->vps_flags, B_TRUE));
++	}
++
++	if (zio == NULL)
++		return (pio);
++
++	zio_nowait(pio);
++	return (NULL);
++}
++
++static void
++vdev_open_child(void *arg)
++{
++	vdev_t *vd = arg;
++
++	vd->vdev_open_thread = curthread;
++	vd->vdev_open_error = vdev_open(vd);
++	vd->vdev_open_thread = NULL;
++}
++
++boolean_t
++vdev_uses_zvols(vdev_t *vd)
++{
++/*
++ * Stacking zpools on top of zvols is unsupported until we implement a method
++ * for determining if an arbitrary block device is a zvol without using the
++ * path.  Solaris would check the 'zvol' path component but this does not
++ * exist in the Linux port, so we really should do something like stat the
++ * file and check the major number.  This is complicated by the fact that
++ * we need to do this portably in user or kernel space.
++ */
++#if 0
++	int c;
++
++	if (vd->vdev_path && strncmp(vd->vdev_path, ZVOL_DIR,
++	    strlen(ZVOL_DIR)) == 0)
++		return (B_TRUE);
++	for (c = 0; c < vd->vdev_children; c++)
++		if (vdev_uses_zvols(vd->vdev_child[c]))
++			return (B_TRUE);
++#endif
++	return (B_FALSE);
++}
++
++void
++vdev_open_children(vdev_t *vd)
++{
++	taskq_t *tq;
++	int children = vd->vdev_children;
++	int c;
++
++	/*
++	 * in order to handle pools on top of zvols, do the opens
++	 * in a single thread so that the same thread holds the
++	 * spa_namespace_lock
++	 */
++	if (vdev_uses_zvols(vd)) {
++		for (c = 0; c < children; c++)
++			vd->vdev_child[c]->vdev_open_error =
++			    vdev_open(vd->vdev_child[c]);
++		return;
++	}
++	tq = taskq_create("vdev_open", children, minclsyspri,
++	    children, children, TASKQ_PREPOPULATE);
++
++	for (c = 0; c < children; c++)
++		VERIFY(taskq_dispatch(tq, vdev_open_child, vd->vdev_child[c],
++		    TQ_SLEEP) != 0);
++
++	taskq_destroy(tq);
++}
++
++/*
++ * Prepare a virtual device for access.
++ */
++int
++vdev_open(vdev_t *vd)
++{
++	spa_t *spa = vd->vdev_spa;
++	int error;
++	uint64_t osize = 0;
++	uint64_t max_osize = 0;
++	uint64_t asize, max_asize, psize;
++	uint64_t ashift = 0;
++	int c;
++
++	ASSERT(vd->vdev_open_thread == curthread ||
++	    spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
++	ASSERT(vd->vdev_state == VDEV_STATE_CLOSED ||
++	    vd->vdev_state == VDEV_STATE_CANT_OPEN ||
++	    vd->vdev_state == VDEV_STATE_OFFLINE);
++
++	vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
++	vd->vdev_cant_read = B_FALSE;
++	vd->vdev_cant_write = B_FALSE;
++	vd->vdev_min_asize = vdev_get_min_asize(vd);
++
++	/*
++	 * If this vdev is not removed, check its fault status.  If it's
++	 * faulted, bail out of the open.
++	 */
++	if (!vd->vdev_removed && vd->vdev_faulted) {
++		ASSERT(vd->vdev_children == 0);
++		ASSERT(vd->vdev_label_aux == VDEV_AUX_ERR_EXCEEDED ||
++		    vd->vdev_label_aux == VDEV_AUX_EXTERNAL);
++		vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
++		    vd->vdev_label_aux);
++		return (ENXIO);
++	} else if (vd->vdev_offline) {
++		ASSERT(vd->vdev_children == 0);
++		vdev_set_state(vd, B_TRUE, VDEV_STATE_OFFLINE, VDEV_AUX_NONE);
++		return (ENXIO);
++	}
++
++	error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift);
++
++	/*
++	 * Reset the vdev_reopening flag so that we actually close
++	 * the vdev on error.
++	 */
++	vd->vdev_reopening = B_FALSE;
++	if (zio_injection_enabled && error == 0)
++		error = zio_handle_device_injection(vd, NULL, ENXIO);
++
++	if (error) {
++		if (vd->vdev_removed &&
++		    vd->vdev_stat.vs_aux != VDEV_AUX_OPEN_FAILED)
++			vd->vdev_removed = B_FALSE;
++
++		vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
++		    vd->vdev_stat.vs_aux);
++		return (error);
++	}
++
++	vd->vdev_removed = B_FALSE;
++
++	/*
++	 * Recheck the faulted flag now that we have confirmed that
++	 * the vdev is accessible.  If we're faulted, bail.
++	 */
++	if (vd->vdev_faulted) {
++		ASSERT(vd->vdev_children == 0);
++		ASSERT(vd->vdev_label_aux == VDEV_AUX_ERR_EXCEEDED ||
++		    vd->vdev_label_aux == VDEV_AUX_EXTERNAL);
++		vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
++		    vd->vdev_label_aux);
++		return (ENXIO);
++	}
++
++	if (vd->vdev_degraded) {
++		ASSERT(vd->vdev_children == 0);
++		vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED,
++		    VDEV_AUX_ERR_EXCEEDED);
++	} else {
++		vdev_set_state(vd, B_TRUE, VDEV_STATE_HEALTHY, 0);
++	}
++
++	/*
++	 * For hole or missing vdevs we just return success.
++	 */
++	if (vd->vdev_ishole || vd->vdev_ops == &vdev_missing_ops)
++		return (0);
++
++	for (c = 0; c < vd->vdev_children; c++) {
++		if (vd->vdev_child[c]->vdev_state != VDEV_STATE_HEALTHY) {
++			vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED,
++			    VDEV_AUX_NONE);
++			break;
++		}
++	}
++
++	osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t));
++	max_osize = P2ALIGN(max_osize, (uint64_t)sizeof (vdev_label_t));
++
++	if (vd->vdev_children == 0) {
++		if (osize < SPA_MINDEVSIZE) {
++			vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
++			    VDEV_AUX_TOO_SMALL);
++			return (EOVERFLOW);
++		}
++		psize = osize;
++		asize = osize - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE);
++		max_asize = max_osize - (VDEV_LABEL_START_SIZE +
++		    VDEV_LABEL_END_SIZE);
++	} else {
++		if (vd->vdev_parent != NULL && osize < SPA_MINDEVSIZE -
++		    (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) {
++			vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
++			    VDEV_AUX_TOO_SMALL);
++			return (EOVERFLOW);
++		}
++		psize = 0;
++		asize = osize;
++		max_asize = max_osize;
++	}
++
++	vd->vdev_psize = psize;
++
++	/*
++	 * Make sure the allocatable size hasn't shrunk.
++	 */
++	if (asize < vd->vdev_min_asize) {
++		vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
++		    VDEV_AUX_BAD_LABEL);
++		return (EINVAL);
++	}
++
++	if (vd->vdev_asize == 0) {
++		/*
++		 * This is the first-ever open, so use the computed values.
++		 * For testing purposes, a higher ashift can be requested.
++		 */
++		vd->vdev_asize = asize;
++		vd->vdev_max_asize = max_asize;
++		vd->vdev_ashift = MAX(ashift, vd->vdev_ashift);
++	} else {
++		/*
++		 * Make sure the alignment requirement hasn't increased.
++		 */
++		if (ashift > vd->vdev_top->vdev_ashift) {
++			vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
++			    VDEV_AUX_BAD_LABEL);
++			return (EINVAL);
++		}
++		vd->vdev_max_asize = max_asize;
++	}
++
++	/*
++	 * If all children are healthy and the asize has increased,
++	 * then we've experienced dynamic LUN growth.  If automatic
++	 * expansion is enabled then use the additional space.
++	 */
++	if (vd->vdev_state == VDEV_STATE_HEALTHY && asize > vd->vdev_asize &&
++	    (vd->vdev_expanding || spa->spa_autoexpand))
++		vd->vdev_asize = asize;
++
++	vdev_set_min_asize(vd);
++
++	/*
++	 * Ensure we can issue some IO before declaring the
++	 * vdev open for business.
++	 */
++	if (vd->vdev_ops->vdev_op_leaf &&
++	    (error = zio_wait(vdev_probe(vd, NULL))) != 0) {
++		vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
++		    VDEV_AUX_ERR_EXCEEDED);
++		return (error);
++	}
++
++	/*
++	 * If a leaf vdev has a DTL, and seems healthy, then kick off a
++	 * resilver.  But don't do this if we are doing a reopen for a scrub,
++	 * since this would just restart the scrub we are already doing.
++	 */
++	if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen &&
++	    vdev_resilver_needed(vd, NULL, NULL))
++		spa_async_request(spa, SPA_ASYNC_RESILVER);
++
++	return (0);
++}
++
++/*
++ * Called once the vdevs are all opened, this routine validates the label
++ * contents.  This needs to be done before vdev_load() so that we don't
++ * inadvertently do repair I/Os to the wrong device.
++ *
++ * If 'strict' is false ignore the spa guid check. This is necessary because
++ * if the machine crashed during a re-guid the new guid might have been written
++ * to all of the vdev labels, but not the cached config. The strict check
++ * will be performed when the pool is opened again using the mos config.
++ *
++ * This function will only return failure if one of the vdevs indicates that it
++ * has since been destroyed or exported.  This is only possible if
++ * /etc/zfs/zpool.cache was readonly at the time.  Otherwise, the vdev state
++ * will be updated but the function will return 0.
++ */
++int
++vdev_validate(vdev_t *vd, boolean_t strict)
++{
++	spa_t *spa = vd->vdev_spa;
++	nvlist_t *label;
++	uint64_t guid = 0, top_guid;
++	uint64_t state;
++	int c;
++
++	for (c = 0; c < vd->vdev_children; c++)
++		if (vdev_validate(vd->vdev_child[c], strict) != 0)
++			return (EBADF);
++
++	/*
++	 * If the device has already failed, or was marked offline, don't do
++	 * any further validation.  Otherwise, label I/O will fail and we will
++	 * overwrite the previous state.
++	 */
++	if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
++		uint64_t aux_guid = 0;
++		nvlist_t *nvl;
++
++		if ((label = vdev_label_read_config(vd)) == NULL) {
++			vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
++			    VDEV_AUX_BAD_LABEL);
++			return (0);
++		}
++
++		/*
++		 * Determine if this vdev has been split off into another
++		 * pool.  If so, then refuse to open it.
++		 */
++		if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_SPLIT_GUID,
++		    &aux_guid) == 0 && aux_guid == spa_guid(spa)) {
++			vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
++			    VDEV_AUX_SPLIT_POOL);
++			nvlist_free(label);
++			return (0);
++		}
++
++		if (strict && (nvlist_lookup_uint64(label,
++		    ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
++		    guid != spa_guid(spa))) {
++			vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
++			    VDEV_AUX_CORRUPT_DATA);
++			nvlist_free(label);
++			return (0);
++		}
++
++		if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_VDEV_TREE, &nvl)
++		    != 0 || nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_ORIG_GUID,
++		    &aux_guid) != 0)
++			aux_guid = 0;
++
++		/*
++		 * If this vdev just became a top-level vdev because its
++		 * sibling was detached, it will have adopted the parent's
++		 * vdev guid -- but the label may or may not be on disk yet.
++		 * Fortunately, either version of the label will have the
++		 * same top guid, so if we're a top-level vdev, we can
++		 * safely compare to that instead.
++		 *
++		 * If we split this vdev off instead, then we also check the
++		 * original pool's guid.  We don't want to consider the vdev
++		 * corrupt if it is partway through a split operation.
++		 */
++		if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID,
++		    &guid) != 0 ||
++		    nvlist_lookup_uint64(label, ZPOOL_CONFIG_TOP_GUID,
++		    &top_guid) != 0 ||
++		    ((vd->vdev_guid != guid && vd->vdev_guid != aux_guid) &&
++		    (vd->vdev_guid != top_guid || vd != vd->vdev_top))) {
++			vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
++			    VDEV_AUX_CORRUPT_DATA);
++			nvlist_free(label);
++			return (0);
++		}
++
++		if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE,
++		    &state) != 0) {
++			vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
++			    VDEV_AUX_CORRUPT_DATA);
++			nvlist_free(label);
++			return (0);
++		}
++
++		nvlist_free(label);
++
++		/*
++		 * If this is a verbatim import, no need to check the
++		 * state of the pool.
++		 */
++		if (!(spa->spa_import_flags & ZFS_IMPORT_VERBATIM) &&
++		    spa_load_state(spa) == SPA_LOAD_OPEN &&
++		    state != POOL_STATE_ACTIVE)
++			return (EBADF);
++
++		/*
++		 * If we were able to open and validate a vdev that was
++		 * previously marked permanently unavailable, clear that state
++		 * now.
++		 */
++		if (vd->vdev_not_present)
++			vd->vdev_not_present = 0;
++	}
++
++	return (0);
++}
++
++/*
++ * Close a virtual device.
++ */
++void
++vdev_close(vdev_t *vd)
++{
++	vdev_t *pvd = vd->vdev_parent;
++	ASSERTV(spa_t *spa = vd->vdev_spa);
++
++	ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
++
++	/*
++	 * If our parent is reopening, then we are as well, unless we are
++	 * going offline.
++	 */
++	if (pvd != NULL && pvd->vdev_reopening)
++		vd->vdev_reopening = (pvd->vdev_reopening && !vd->vdev_offline);
++
++	vd->vdev_ops->vdev_op_close(vd);
++
++	vdev_cache_purge(vd);
++
++	/*
++	 * We record the previous state before we close it, so that if we are
++	 * doing a reopen(), we don't generate FMA ereports if we notice that
++	 * it's still faulted.
++	 */
++	vd->vdev_prevstate = vd->vdev_state;
++
++	if (vd->vdev_offline)
++		vd->vdev_state = VDEV_STATE_OFFLINE;
++	else
++		vd->vdev_state = VDEV_STATE_CLOSED;
++	vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
++}
++
++void
++vdev_hold(vdev_t *vd)
++{
++	spa_t *spa = vd->vdev_spa;
++	int c;
++
++	ASSERT(spa_is_root(spa));
++	if (spa->spa_state == POOL_STATE_UNINITIALIZED)
++		return;
++
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_hold(vd->vdev_child[c]);
++
++	if (vd->vdev_ops->vdev_op_leaf)
++		vd->vdev_ops->vdev_op_hold(vd);
++}
++
++void
++vdev_rele(vdev_t *vd)
++{
++	int c;
++
++	ASSERT(spa_is_root(vd->vdev_spa));
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_rele(vd->vdev_child[c]);
++
++	if (vd->vdev_ops->vdev_op_leaf)
++		vd->vdev_ops->vdev_op_rele(vd);
++}
++
++/*
++ * Reopen all interior vdevs and any unopened leaves.  We don't actually
++ * reopen leaf vdevs which had previously been opened as they might deadlock
++ * on the spa_config_lock.  Instead we only obtain the leaf's physical size.
++ * If the leaf has never been opened then open it, as usual.
++ */
++void
++vdev_reopen(vdev_t *vd)
++{
++	spa_t *spa = vd->vdev_spa;
++
++	ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
++
++	/* set the reopening flag unless we're taking the vdev offline */
++	vd->vdev_reopening = !vd->vdev_offline;
++	vdev_close(vd);
++	(void) vdev_open(vd);
++
++	/*
++	 * Call vdev_validate() here to make sure we have the same device.
++	 * Otherwise, a device with an invalid label could be successfully
++	 * opened in response to vdev_reopen().
++	 */
++	if (vd->vdev_aux) {
++		(void) vdev_validate_aux(vd);
++		if (vdev_readable(vd) && vdev_writeable(vd) &&
++		    vd->vdev_aux == &spa->spa_l2cache &&
++		    !l2arc_vdev_present(vd))
++			l2arc_add_vdev(spa, vd);
++	} else {
++		(void) vdev_validate(vd, B_TRUE);
++	}
++
++	/*
++	 * Reassess parent vdev's health.
++	 */
++	vdev_propagate_state(vd);
++}
++
++int
++vdev_create(vdev_t *vd, uint64_t txg, boolean_t isreplacing)
++{
++	int error;
++
++	/*
++	 * Normally, partial opens (e.g. of a mirror) are allowed.
++	 * For a create, however, we want to fail the request if
++	 * there are any components we can't open.
++	 */
++	error = vdev_open(vd);
++
++	if (error || vd->vdev_state != VDEV_STATE_HEALTHY) {
++		vdev_close(vd);
++		return (error ? error : ENXIO);
++	}
++
++	/*
++	 * Recursively initialize all labels.
++	 */
++	if ((error = vdev_label_init(vd, txg, isreplacing ?
++	    VDEV_LABEL_REPLACE : VDEV_LABEL_CREATE)) != 0) {
++		vdev_close(vd);
++		return (error);
++	}
++
++	return (0);
++}
++
++void
++vdev_metaslab_set_size(vdev_t *vd)
++{
++	/*
++	 * Aim for roughly 200 metaslabs per vdev.
++	 */
++	vd->vdev_ms_shift = highbit(vd->vdev_asize / 200);
++	vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT);
++}
++
++void
++vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg)
++{
++	ASSERT(vd == vd->vdev_top);
++	ASSERT(!vd->vdev_ishole);
++	ASSERT(ISP2(flags));
++	ASSERT(spa_writeable(vd->vdev_spa));
++
++	if (flags & VDD_METASLAB)
++		(void) txg_list_add(&vd->vdev_ms_list, arg, txg);
++
++	if (flags & VDD_DTL)
++		(void) txg_list_add(&vd->vdev_dtl_list, arg, txg);
++
++	(void) txg_list_add(&vd->vdev_spa->spa_vdev_txg_list, vd, txg);
++}
++
++/*
++ * DTLs.
++ *
++ * A vdev's DTL (dirty time log) is the set of transaction groups for which
++ * the vdev has less than perfect replication.  There are four kinds of DTL:
++ *
++ * DTL_MISSING: txgs for which the vdev has no valid copies of the data
++ *
++ * DTL_PARTIAL: txgs for which data is available, but not fully replicated
++ *
++ * DTL_SCRUB: the txgs that could not be repaired by the last scrub; upon
++ *	scrub completion, DTL_SCRUB replaces DTL_MISSING in the range of
++ *	txgs that was scrubbed.
++ *
++ * DTL_OUTAGE: txgs which cannot currently be read, whether due to
++ *	persistent errors or just some device being offline.
++ *	Unlike the other three, the DTL_OUTAGE map is not generally
++ *	maintained; it's only computed when needed, typically to
++ *	determine whether a device can be detached.
++ *
++ * For leaf vdevs, DTL_MISSING and DTL_PARTIAL are identical: the device
++ * either has the data or it doesn't.
++ *
++ * For interior vdevs such as mirror and RAID-Z the picture is more complex.
++ * A vdev's DTL_PARTIAL is the union of its children's DTL_PARTIALs, because
++ * if any child is less than fully replicated, then so is its parent.
++ * A vdev's DTL_MISSING is a modified union of its children's DTL_MISSINGs,
++ * comprising only those txgs which appear in 'maxfaults' or more children;
++ * those are the txgs we don't have enough replication to read.  For example,
++ * double-parity RAID-Z can tolerate up to two missing devices (maxfaults == 2);
++ * thus, its DTL_MISSING consists of the set of txgs that appear in more than
++ * two child DTL_MISSING maps.
++ *
++ * It should be clear from the above that to compute the DTLs and outage maps
++ * for all vdevs, it suffices to know just the leaf vdevs' DTL_MISSING maps.
++ * Therefore, that is all we keep on disk.  When loading the pool, or after
++ * a configuration change, we generate all other DTLs from first principles.
++ */
++void
++vdev_dtl_dirty(vdev_t *vd, vdev_dtl_type_t t, uint64_t txg, uint64_t size)
++{
++	space_map_t *sm = &vd->vdev_dtl[t];
++
++	ASSERT(t < DTL_TYPES);
++	ASSERT(vd != vd->vdev_spa->spa_root_vdev);
++	ASSERT(spa_writeable(vd->vdev_spa));
++
++	mutex_enter(sm->sm_lock);
++	if (!space_map_contains(sm, txg, size))
++		space_map_add(sm, txg, size);
++	mutex_exit(sm->sm_lock);
++}
++
++boolean_t
++vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t t, uint64_t txg, uint64_t size)
++{
++	space_map_t *sm = &vd->vdev_dtl[t];
++	boolean_t dirty = B_FALSE;
++
++	ASSERT(t < DTL_TYPES);
++	ASSERT(vd != vd->vdev_spa->spa_root_vdev);
++
++	mutex_enter(sm->sm_lock);
++	if (sm->sm_space != 0)
++		dirty = space_map_contains(sm, txg, size);
++	mutex_exit(sm->sm_lock);
++
++	return (dirty);
++}
++
++boolean_t
++vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t t)
++{
++	space_map_t *sm = &vd->vdev_dtl[t];
++	boolean_t empty;
++
++	mutex_enter(sm->sm_lock);
++	empty = (sm->sm_space == 0);
++	mutex_exit(sm->sm_lock);
++
++	return (empty);
++}
++
++/*
++ * Reassess DTLs after a config change or scrub completion.
++ */
++void
++vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
++{
++	spa_t *spa = vd->vdev_spa;
++	avl_tree_t reftree;
++	int c, t, minref;
++
++	ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
++
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_dtl_reassess(vd->vdev_child[c], txg,
++		    scrub_txg, scrub_done);
++
++	if (vd == spa->spa_root_vdev || vd->vdev_ishole || vd->vdev_aux)
++		return;
++
++	if (vd->vdev_ops->vdev_op_leaf) {
++		dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
++
++		mutex_enter(&vd->vdev_dtl_lock);
++		if (scrub_txg != 0 &&
++		    (spa->spa_scrub_started ||
++		    (scn && scn->scn_phys.scn_errors == 0))) {
++			/*
++			 * We completed a scrub up to scrub_txg.  If we
++			 * did it without rebooting, then the scrub dtl
++			 * will be valid, so excise the old region and
++			 * fold in the scrub dtl.  Otherwise, leave the
++			 * dtl as-is if there was an error.
++			 *
++			 * There's little trick here: to excise the beginning
++			 * of the DTL_MISSING map, we put it into a reference
++			 * tree and then add a segment with refcnt -1 that
++			 * covers the range [0, scrub_txg).  This means
++			 * that each txg in that range has refcnt -1 or 0.
++			 * We then add DTL_SCRUB with a refcnt of 2, so that
++			 * entries in the range [0, scrub_txg) will have a
++			 * positive refcnt -- either 1 or 2.  We then convert
++			 * the reference tree into the new DTL_MISSING map.
++			 */
++			space_map_ref_create(&reftree);
++			space_map_ref_add_map(&reftree,
++			    &vd->vdev_dtl[DTL_MISSING], 1);
++			space_map_ref_add_seg(&reftree, 0, scrub_txg, -1);
++			space_map_ref_add_map(&reftree,
++			    &vd->vdev_dtl[DTL_SCRUB], 2);
++			space_map_ref_generate_map(&reftree,
++			    &vd->vdev_dtl[DTL_MISSING], 1);
++			space_map_ref_destroy(&reftree);
++		}
++		space_map_vacate(&vd->vdev_dtl[DTL_PARTIAL], NULL, NULL);
++		space_map_walk(&vd->vdev_dtl[DTL_MISSING],
++		    space_map_add, &vd->vdev_dtl[DTL_PARTIAL]);
++		if (scrub_done)
++			space_map_vacate(&vd->vdev_dtl[DTL_SCRUB], NULL, NULL);
++		space_map_vacate(&vd->vdev_dtl[DTL_OUTAGE], NULL, NULL);
++		if (!vdev_readable(vd))
++			space_map_add(&vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL);
++		else
++			space_map_walk(&vd->vdev_dtl[DTL_MISSING],
++			    space_map_add, &vd->vdev_dtl[DTL_OUTAGE]);
++		mutex_exit(&vd->vdev_dtl_lock);
++
++		if (txg != 0)
++			vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg);
++		return;
++	}
++
++	mutex_enter(&vd->vdev_dtl_lock);
++	for (t = 0; t < DTL_TYPES; t++) {
++		/* account for child's outage in parent's missing map */
++		int s = (t == DTL_MISSING) ? DTL_OUTAGE: t;
++		if (t == DTL_SCRUB)
++			continue;			/* leaf vdevs only */
++		if (t == DTL_PARTIAL)
++			minref = 1;			/* i.e. non-zero */
++		else if (vd->vdev_nparity != 0)
++			minref = vd->vdev_nparity + 1;	/* RAID-Z */
++		else
++			minref = vd->vdev_children;	/* any kind of mirror */
++		space_map_ref_create(&reftree);
++		for (c = 0; c < vd->vdev_children; c++) {
++			vdev_t *cvd = vd->vdev_child[c];
++			mutex_enter(&cvd->vdev_dtl_lock);
++			space_map_ref_add_map(&reftree, &cvd->vdev_dtl[s], 1);
++			mutex_exit(&cvd->vdev_dtl_lock);
++		}
++		space_map_ref_generate_map(&reftree, &vd->vdev_dtl[t], minref);
++		space_map_ref_destroy(&reftree);
++	}
++	mutex_exit(&vd->vdev_dtl_lock);
++}
++
++static int
++vdev_dtl_load(vdev_t *vd)
++{
++	spa_t *spa = vd->vdev_spa;
++	space_map_obj_t *smo = &vd->vdev_dtl_smo;
++	objset_t *mos = spa->spa_meta_objset;
++	dmu_buf_t *db;
++	int error;
++
++	ASSERT(vd->vdev_children == 0);
++
++	if (smo->smo_object == 0)
++		return (0);
++
++	ASSERT(!vd->vdev_ishole);
++
++	if ((error = dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)) != 0)
++		return (error);
++
++	ASSERT3U(db->db_size, >=, sizeof (*smo));
++	bcopy(db->db_data, smo, sizeof (*smo));
++	dmu_buf_rele(db, FTAG);
++
++	mutex_enter(&vd->vdev_dtl_lock);
++	error = space_map_load(&vd->vdev_dtl[DTL_MISSING],
++	    NULL, SM_ALLOC, smo, mos);
++	mutex_exit(&vd->vdev_dtl_lock);
++
++	return (error);
++}
++
++void
++vdev_dtl_sync(vdev_t *vd, uint64_t txg)
++{
++	spa_t *spa = vd->vdev_spa;
++	space_map_obj_t *smo = &vd->vdev_dtl_smo;
++	space_map_t *sm = &vd->vdev_dtl[DTL_MISSING];
++	objset_t *mos = spa->spa_meta_objset;
++	space_map_t smsync;
++	kmutex_t smlock;
++	dmu_buf_t *db;
++	dmu_tx_t *tx;
++
++	ASSERT(!vd->vdev_ishole);
++
++	tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
++
++	if (vd->vdev_detached) {
++		if (smo->smo_object != 0) {
++			VERIFY(0 == dmu_object_free(mos, smo->smo_object, tx));
++			smo->smo_object = 0;
++		}
++		dmu_tx_commit(tx);
++		return;
++	}
++
++	if (smo->smo_object == 0) {
++		ASSERT(smo->smo_objsize == 0);
++		ASSERT(smo->smo_alloc == 0);
++		smo->smo_object = dmu_object_alloc(mos,
++		    DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT,
++		    DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx);
++		ASSERT(smo->smo_object != 0);
++		vdev_config_dirty(vd->vdev_top);
++	}
++
++	mutex_init(&smlock, NULL, MUTEX_DEFAULT, NULL);
++
++	space_map_create(&smsync, sm->sm_start, sm->sm_size, sm->sm_shift,
++	    &smlock);
++
++	mutex_enter(&smlock);
++
++	mutex_enter(&vd->vdev_dtl_lock);
++	space_map_walk(sm, space_map_add, &smsync);
++	mutex_exit(&vd->vdev_dtl_lock);
++
++	space_map_truncate(smo, mos, tx);
++	space_map_sync(&smsync, SM_ALLOC, smo, mos, tx);
++
++	space_map_destroy(&smsync);
++
++	mutex_exit(&smlock);
++	mutex_destroy(&smlock);
++
++	VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db));
++	dmu_buf_will_dirty(db, tx);
++	ASSERT3U(db->db_size, >=, sizeof (*smo));
++	bcopy(smo, db->db_data, sizeof (*smo));
++	dmu_buf_rele(db, FTAG);
++
++	dmu_tx_commit(tx);
++}
++
++/*
++ * Determine whether the specified vdev can be offlined/detached/removed
++ * without losing data.
++ */
++boolean_t
++vdev_dtl_required(vdev_t *vd)
++{
++	spa_t *spa = vd->vdev_spa;
++	vdev_t *tvd = vd->vdev_top;
++	uint8_t cant_read = vd->vdev_cant_read;
++	boolean_t required;
++
++	ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
++
++	if (vd == spa->spa_root_vdev || vd == tvd)
++		return (B_TRUE);
++
++	/*
++	 * Temporarily mark the device as unreadable, and then determine
++	 * whether this results in any DTL outages in the top-level vdev.
++	 * If not, we can safely offline/detach/remove the device.
++	 */
++	vd->vdev_cant_read = B_TRUE;
++	vdev_dtl_reassess(tvd, 0, 0, B_FALSE);
++	required = !vdev_dtl_empty(tvd, DTL_OUTAGE);
++	vd->vdev_cant_read = cant_read;
++	vdev_dtl_reassess(tvd, 0, 0, B_FALSE);
++
++	if (!required && zio_injection_enabled)
++		required = !!zio_handle_device_injection(vd, NULL, ECHILD);
++
++	return (required);
++}
++
++/*
++ * Determine if resilver is needed, and if so the txg range.
++ */
++boolean_t
++vdev_resilver_needed(vdev_t *vd, uint64_t *minp, uint64_t *maxp)
++{
++	boolean_t needed = B_FALSE;
++	uint64_t thismin = UINT64_MAX;
++	uint64_t thismax = 0;
++	int c;
++
++	if (vd->vdev_children == 0) {
++		mutex_enter(&vd->vdev_dtl_lock);
++		if (vd->vdev_dtl[DTL_MISSING].sm_space != 0 &&
++		    vdev_writeable(vd)) {
++			space_seg_t *ss;
++
++			ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root);
++			thismin = ss->ss_start - 1;
++			ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root);
++			thismax = ss->ss_end;
++			needed = B_TRUE;
++		}
++		mutex_exit(&vd->vdev_dtl_lock);
++	} else {
++		for (c = 0; c < vd->vdev_children; c++) {
++			vdev_t *cvd = vd->vdev_child[c];
++			uint64_t cmin, cmax;
++
++			if (vdev_resilver_needed(cvd, &cmin, &cmax)) {
++				thismin = MIN(thismin, cmin);
++				thismax = MAX(thismax, cmax);
++				needed = B_TRUE;
++			}
++		}
++	}
++
++	if (needed && minp) {
++		*minp = thismin;
++		*maxp = thismax;
++	}
++	return (needed);
++}
++
++void
++vdev_load(vdev_t *vd)
++{
++	int c;
++
++	/*
++	 * Recursively load all children.
++	 */
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_load(vd->vdev_child[c]);
++
++	/*
++	 * If this is a top-level vdev, initialize its metaslabs.
++	 */
++	if (vd == vd->vdev_top && !vd->vdev_ishole &&
++	    (vd->vdev_ashift == 0 || vd->vdev_asize == 0 ||
++	    vdev_metaslab_init(vd, 0) != 0))
++		vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
++		    VDEV_AUX_CORRUPT_DATA);
++
++	/*
++	 * If this is a leaf vdev, load its DTL.
++	 */
++	if (vd->vdev_ops->vdev_op_leaf && vdev_dtl_load(vd) != 0)
++		vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
++		    VDEV_AUX_CORRUPT_DATA);
++}
++
++/*
++ * The special vdev case is used for hot spares and l2cache devices.  Its
++ * sole purpose it to set the vdev state for the associated vdev.  To do this,
++ * we make sure that we can open the underlying device, then try to read the
++ * label, and make sure that the label is sane and that it hasn't been
++ * repurposed to another pool.
++ */
++int
++vdev_validate_aux(vdev_t *vd)
++{
++	nvlist_t *label;
++	uint64_t guid, version;
++	uint64_t state;
++
++	if (!vdev_readable(vd))
++		return (0);
++
++	if ((label = vdev_label_read_config(vd)) == NULL) {
++		vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
++		    VDEV_AUX_CORRUPT_DATA);
++		return (-1);
++	}
++
++	if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_VERSION, &version) != 0 ||
++	    version > SPA_VERSION ||
++	    nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) != 0 ||
++	    guid != vd->vdev_guid ||
++	    nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0) {
++		vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
++		    VDEV_AUX_CORRUPT_DATA);
++		nvlist_free(label);
++		return (-1);
++	}
++
++	/*
++	 * We don't actually check the pool state here.  If it's in fact in
++	 * use by another pool, we update this fact on the fly when requested.
++	 */
++	nvlist_free(label);
++	return (0);
++}
++
++void
++vdev_remove(vdev_t *vd, uint64_t txg)
++{
++	spa_t *spa = vd->vdev_spa;
++	objset_t *mos = spa->spa_meta_objset;
++	dmu_tx_t *tx;
++	int m;
++
++	tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
++
++	if (vd->vdev_dtl_smo.smo_object) {
++		ASSERT3U(vd->vdev_dtl_smo.smo_alloc, ==, 0);
++		(void) dmu_object_free(mos, vd->vdev_dtl_smo.smo_object, tx);
++		vd->vdev_dtl_smo.smo_object = 0;
++	}
++
++	if (vd->vdev_ms != NULL) {
++		for (m = 0; m < vd->vdev_ms_count; m++) {
++			metaslab_t *msp = vd->vdev_ms[m];
++
++			if (msp == NULL || msp->ms_smo.smo_object == 0)
++				continue;
++
++			ASSERT3U(msp->ms_smo.smo_alloc, ==, 0);
++			(void) dmu_object_free(mos, msp->ms_smo.smo_object, tx);
++			msp->ms_smo.smo_object = 0;
++		}
++	}
++
++	if (vd->vdev_ms_array) {
++		(void) dmu_object_free(mos, vd->vdev_ms_array, tx);
++		vd->vdev_ms_array = 0;
++		vd->vdev_ms_shift = 0;
++	}
++	dmu_tx_commit(tx);
++}
++
++void
++vdev_sync_done(vdev_t *vd, uint64_t txg)
++{
++	metaslab_t *msp;
++	boolean_t reassess = !txg_list_empty(&vd->vdev_ms_list, TXG_CLEAN(txg));
++
++	ASSERT(!vd->vdev_ishole);
++
++	while ((msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg))))
++		metaslab_sync_done(msp, txg);
++
++	if (reassess)
++		metaslab_sync_reassess(vd->vdev_mg);
++}
++
++void
++vdev_sync(vdev_t *vd, uint64_t txg)
++{
++	spa_t *spa = vd->vdev_spa;
++	vdev_t *lvd;
++	metaslab_t *msp;
++	dmu_tx_t *tx;
++
++	ASSERT(!vd->vdev_ishole);
++
++	if (vd->vdev_ms_array == 0 && vd->vdev_ms_shift != 0) {
++		ASSERT(vd == vd->vdev_top);
++		tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
++		vd->vdev_ms_array = dmu_object_alloc(spa->spa_meta_objset,
++		    DMU_OT_OBJECT_ARRAY, 0, DMU_OT_NONE, 0, tx);
++		ASSERT(vd->vdev_ms_array != 0);
++		vdev_config_dirty(vd);
++		dmu_tx_commit(tx);
++	}
++
++	/*
++	 * Remove the metadata associated with this vdev once it's empty.
++	 */
++	if (vd->vdev_stat.vs_alloc == 0 && vd->vdev_removing)
++		vdev_remove(vd, txg);
++
++	while ((msp = txg_list_remove(&vd->vdev_ms_list, txg)) != NULL) {
++		metaslab_sync(msp, txg);
++		(void) txg_list_add(&vd->vdev_ms_list, msp, TXG_CLEAN(txg));
++	}
++
++	while ((lvd = txg_list_remove(&vd->vdev_dtl_list, txg)) != NULL)
++		vdev_dtl_sync(lvd, txg);
++
++	(void) txg_list_add(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg));
++}
++
++uint64_t
++vdev_psize_to_asize(vdev_t *vd, uint64_t psize)
++{
++	return (vd->vdev_ops->vdev_op_asize(vd, psize));
++}
++
++/*
++ * Mark the given vdev faulted.  A faulted vdev behaves as if the device could
++ * not be opened, and no I/O is attempted.
++ */
++int
++vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux)
++{
++	vdev_t *vd, *tvd;
++
++	spa_vdev_state_enter(spa, SCL_NONE);
++
++	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
++		return (spa_vdev_state_exit(spa, NULL, ENODEV));
++
++	if (!vd->vdev_ops->vdev_op_leaf)
++		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
++
++	tvd = vd->vdev_top;
++
++	/*
++	 * We don't directly use the aux state here, but if we do a
++	 * vdev_reopen(), we need this value to be present to remember why we
++	 * were faulted.
++	 */
++	vd->vdev_label_aux = aux;
++
++	/*
++	 * Faulted state takes precedence over degraded.
++	 */
++	vd->vdev_delayed_close = B_FALSE;
++	vd->vdev_faulted = 1ULL;
++	vd->vdev_degraded = 0ULL;
++	vdev_set_state(vd, B_FALSE, VDEV_STATE_FAULTED, aux);
++
++	/*
++	 * If this device has the only valid copy of the data, then
++	 * back off and simply mark the vdev as degraded instead.
++	 */
++	if (!tvd->vdev_islog && vd->vdev_aux == NULL && vdev_dtl_required(vd)) {
++		vd->vdev_degraded = 1ULL;
++		vd->vdev_faulted = 0ULL;
++
++		/*
++		 * If we reopen the device and it's not dead, only then do we
++		 * mark it degraded.
++		 */
++		vdev_reopen(tvd);
++
++		if (vdev_readable(vd))
++			vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, aux);
++	}
++
++	return (spa_vdev_state_exit(spa, vd, 0));
++}
++
++/*
++ * Mark the given vdev degraded.  A degraded vdev is purely an indication to the
++ * user that something is wrong.  The vdev continues to operate as normal as far
++ * as I/O is concerned.
++ */
++int
++vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux)
++{
++	vdev_t *vd;
++
++	spa_vdev_state_enter(spa, SCL_NONE);
++
++	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
++		return (spa_vdev_state_exit(spa, NULL, ENODEV));
++
++	if (!vd->vdev_ops->vdev_op_leaf)
++		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
++
++	/*
++	 * If the vdev is already faulted, then don't do anything.
++	 */
++	if (vd->vdev_faulted || vd->vdev_degraded)
++		return (spa_vdev_state_exit(spa, NULL, 0));
++
++	vd->vdev_degraded = 1ULL;
++	if (!vdev_is_dead(vd))
++		vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED,
++		    aux);
++
++	return (spa_vdev_state_exit(spa, vd, 0));
++}
++
++/*
++ * Online the given vdev.  If 'unspare' is set, it implies two things.  First,
++ * any attached spare device should be detached when the device finishes
++ * resilvering.  Second, the online should be treated like a 'test' online case,
++ * so no FMA events are generated if the device fails to open.
++ */
++int
++vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
++{
++	vdev_t *vd, *tvd, *pvd, *rvd = spa->spa_root_vdev;
++
++	spa_vdev_state_enter(spa, SCL_NONE);
++
++	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
++		return (spa_vdev_state_exit(spa, NULL, ENODEV));
++
++	if (!vd->vdev_ops->vdev_op_leaf)
++		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
++
++	tvd = vd->vdev_top;
++	vd->vdev_offline = B_FALSE;
++	vd->vdev_tmpoffline = B_FALSE;
++	vd->vdev_checkremove = !!(flags & ZFS_ONLINE_CHECKREMOVE);
++	vd->vdev_forcefault = !!(flags & ZFS_ONLINE_FORCEFAULT);
++
++	/* XXX - L2ARC 1.0 does not support expansion */
++	if (!vd->vdev_aux) {
++		for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent)
++			pvd->vdev_expanding = !!(flags & ZFS_ONLINE_EXPAND);
++	}
++
++	vdev_reopen(tvd);
++	vd->vdev_checkremove = vd->vdev_forcefault = B_FALSE;
++
++	if (!vd->vdev_aux) {
++		for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent)
++			pvd->vdev_expanding = B_FALSE;
++	}
++
++	if (newstate)
++		*newstate = vd->vdev_state;
++	if ((flags & ZFS_ONLINE_UNSPARE) &&
++	    !vdev_is_dead(vd) && vd->vdev_parent &&
++	    vd->vdev_parent->vdev_ops == &vdev_spare_ops &&
++	    vd->vdev_parent->vdev_child[0] == vd)
++		vd->vdev_unspare = B_TRUE;
++
++	if ((flags & ZFS_ONLINE_EXPAND) || spa->spa_autoexpand) {
++
++		/* XXX - L2ARC 1.0 does not support expansion */
++		if (vd->vdev_aux)
++			return (spa_vdev_state_exit(spa, vd, ENOTSUP));
++		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
++	}
++	return (spa_vdev_state_exit(spa, vd, 0));
++}
++
++static int
++vdev_offline_locked(spa_t *spa, uint64_t guid, uint64_t flags)
++{
++	vdev_t *vd, *tvd;
++	int error = 0;
++	uint64_t generation;
++	metaslab_group_t *mg;
++
++top:
++	spa_vdev_state_enter(spa, SCL_ALLOC);
++
++	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
++		return (spa_vdev_state_exit(spa, NULL, ENODEV));
++
++	if (!vd->vdev_ops->vdev_op_leaf)
++		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
++
++	tvd = vd->vdev_top;
++	mg = tvd->vdev_mg;
++	generation = spa->spa_config_generation + 1;
++
++	/*
++	 * If the device isn't already offline, try to offline it.
++	 */
++	if (!vd->vdev_offline) {
++		/*
++		 * If this device has the only valid copy of some data,
++		 * don't allow it to be offlined. Log devices are always
++		 * expendable.
++		 */
++		if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
++		    vdev_dtl_required(vd))
++			return (spa_vdev_state_exit(spa, NULL, EBUSY));
++
++		/*
++		 * If the top-level is a slog and it has had allocations
++		 * then proceed.  We check that the vdev's metaslab group
++		 * is not NULL since it's possible that we may have just
++		 * added this vdev but not yet initialized its metaslabs.
++		 */
++		if (tvd->vdev_islog && mg != NULL) {
++			/*
++			 * Prevent any future allocations.
++			 */
++			metaslab_group_passivate(mg);
++			(void) spa_vdev_state_exit(spa, vd, 0);
++
++			error = spa_offline_log(spa);
++
++			spa_vdev_state_enter(spa, SCL_ALLOC);
++
++			/*
++			 * Check to see if the config has changed.
++			 */
++			if (error || generation != spa->spa_config_generation) {
++				metaslab_group_activate(mg);
++				if (error)
++					return (spa_vdev_state_exit(spa,
++					    vd, error));
++				(void) spa_vdev_state_exit(spa, vd, 0);
++				goto top;
++			}
++			ASSERT3U(tvd->vdev_stat.vs_alloc, ==, 0);
++		}
++
++		/*
++		 * Offline this device and reopen its top-level vdev.
++		 * If the top-level vdev is a log device then just offline
++		 * it. Otherwise, if this action results in the top-level
++		 * vdev becoming unusable, undo it and fail the request.
++		 */
++		vd->vdev_offline = B_TRUE;
++		vdev_reopen(tvd);
++
++		if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
++		    vdev_is_dead(tvd)) {
++			vd->vdev_offline = B_FALSE;
++			vdev_reopen(tvd);
++			return (spa_vdev_state_exit(spa, NULL, EBUSY));
++		}
++
++		/*
++		 * Add the device back into the metaslab rotor so that
++		 * once we online the device it's open for business.
++		 */
++		if (tvd->vdev_islog && mg != NULL)
++			metaslab_group_activate(mg);
++	}
++
++	vd->vdev_tmpoffline = !!(flags & ZFS_OFFLINE_TEMPORARY);
++
++	return (spa_vdev_state_exit(spa, vd, 0));
++}
++
++int
++vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags)
++{
++	int error;
++
++	mutex_enter(&spa->spa_vdev_top_lock);
++	error = vdev_offline_locked(spa, guid, flags);
++	mutex_exit(&spa->spa_vdev_top_lock);
++
++	return (error);
++}
++
++/*
++ * Clear the error counts associated with this vdev.  Unlike vdev_online() and
++ * vdev_offline(), we assume the spa config is locked.  We also clear all
++ * children.  If 'vd' is NULL, then the user wants to clear all vdevs.
++ */
++void
++vdev_clear(spa_t *spa, vdev_t *vd)
++{
++	vdev_t *rvd = spa->spa_root_vdev;
++	int c;
++
++	ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
++
++	if (vd == NULL)
++		vd = rvd;
++
++	vd->vdev_stat.vs_read_errors = 0;
++	vd->vdev_stat.vs_write_errors = 0;
++	vd->vdev_stat.vs_checksum_errors = 0;
++
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_clear(spa, vd->vdev_child[c]);
++
++	/*
++	 * If we're in the FAULTED state or have experienced failed I/O, then
++	 * clear the persistent state and attempt to reopen the device.  We
++	 * also mark the vdev config dirty, so that the new faulted state is
++	 * written out to disk.
++	 */
++	if (vd->vdev_faulted || vd->vdev_degraded ||
++	    !vdev_readable(vd) || !vdev_writeable(vd)) {
++
++		/*
++		 * When reopening in reponse to a clear event, it may be due to
++		 * a fmadm repair request.  In this case, if the device is
++		 * still broken, we want to still post the ereport again.
++		 */
++		vd->vdev_forcefault = B_TRUE;
++
++		vd->vdev_faulted = vd->vdev_degraded = 0ULL;
++		vd->vdev_cant_read = B_FALSE;
++		vd->vdev_cant_write = B_FALSE;
++
++		vdev_reopen(vd == rvd ? rvd : vd->vdev_top);
++
++		vd->vdev_forcefault = B_FALSE;
++
++		if (vd != rvd && vdev_writeable(vd->vdev_top))
++			vdev_state_dirty(vd->vdev_top);
++
++		if (vd->vdev_aux == NULL && !vdev_is_dead(vd))
++			spa_async_request(spa, SPA_ASYNC_RESILVER);
++
++		spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_CLEAR);
++	}
++
++	/*
++	 * When clearing a FMA-diagnosed fault, we always want to
++	 * unspare the device, as we assume that the original spare was
++	 * done in response to the FMA fault.
++	 */
++	if (!vdev_is_dead(vd) && vd->vdev_parent != NULL &&
++	    vd->vdev_parent->vdev_ops == &vdev_spare_ops &&
++	    vd->vdev_parent->vdev_child[0] == vd)
++		vd->vdev_unspare = B_TRUE;
++}
++
++boolean_t
++vdev_is_dead(vdev_t *vd)
++{
++	/*
++	 * Holes and missing devices are always considered "dead".
++	 * This simplifies the code since we don't have to check for
++	 * these types of devices in the various code paths.
++	 * Instead we rely on the fact that we skip over dead devices
++	 * before issuing I/O to them.
++	 */
++	return (vd->vdev_state < VDEV_STATE_DEGRADED || vd->vdev_ishole ||
++	    vd->vdev_ops == &vdev_missing_ops);
++}
++
++boolean_t
++vdev_readable(vdev_t *vd)
++{
++	return (!vdev_is_dead(vd) && !vd->vdev_cant_read);
++}
++
++boolean_t
++vdev_writeable(vdev_t *vd)
++{
++	return (!vdev_is_dead(vd) && !vd->vdev_cant_write);
++}
++
++boolean_t
++vdev_allocatable(vdev_t *vd)
++{
++	uint64_t state = vd->vdev_state;
++
++	/*
++	 * We currently allow allocations from vdevs which may be in the
++	 * process of reopening (i.e. VDEV_STATE_CLOSED). If the device
++	 * fails to reopen then we'll catch it later when we're holding
++	 * the proper locks.  Note that we have to get the vdev state
++	 * in a local variable because although it changes atomically,
++	 * we're asking two separate questions about it.
++	 */
++	return (!(state < VDEV_STATE_DEGRADED && state != VDEV_STATE_CLOSED) &&
++	    !vd->vdev_cant_write && !vd->vdev_ishole);
++}
++
++boolean_t
++vdev_accessible(vdev_t *vd, zio_t *zio)
++{
++	ASSERT(zio->io_vd == vd);
++
++	if (vdev_is_dead(vd) || vd->vdev_remove_wanted)
++		return (B_FALSE);
++
++	if (zio->io_type == ZIO_TYPE_READ)
++		return (!vd->vdev_cant_read);
++
++	if (zio->io_type == ZIO_TYPE_WRITE)
++		return (!vd->vdev_cant_write);
++
++	return (B_TRUE);
++}
++
++/*
++ * Get statistics for the given vdev.
++ */
++void
++vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
++{
++	vdev_t *rvd = vd->vdev_spa->spa_root_vdev;
++	int c, t;
++
++	mutex_enter(&vd->vdev_stat_lock);
++	bcopy(&vd->vdev_stat, vs, sizeof (*vs));
++	vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
++	vs->vs_state = vd->vdev_state;
++	vs->vs_rsize = vdev_get_min_asize(vd);
++	if (vd->vdev_ops->vdev_op_leaf)
++		vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
++	vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
++	mutex_exit(&vd->vdev_stat_lock);
++
++	/*
++	 * If we're getting stats on the root vdev, aggregate the I/O counts
++	 * over all top-level vdevs (i.e. the direct children of the root).
++	 */
++	if (vd == rvd) {
++		for (c = 0; c < rvd->vdev_children; c++) {
++			vdev_t *cvd = rvd->vdev_child[c];
++			vdev_stat_t *cvs = &cvd->vdev_stat;
++
++			mutex_enter(&vd->vdev_stat_lock);
++			for (t = 0; t < ZIO_TYPES; t++) {
++				vs->vs_ops[t] += cvs->vs_ops[t];
++				vs->vs_bytes[t] += cvs->vs_bytes[t];
++			}
++			cvs->vs_scan_removing = cvd->vdev_removing;
++			mutex_exit(&vd->vdev_stat_lock);
++		}
++	}
++}
++
++void
++vdev_clear_stats(vdev_t *vd)
++{
++	mutex_enter(&vd->vdev_stat_lock);
++	vd->vdev_stat.vs_space = 0;
++	vd->vdev_stat.vs_dspace = 0;
++	vd->vdev_stat.vs_alloc = 0;
++	mutex_exit(&vd->vdev_stat_lock);
++}
++
++void
++vdev_scan_stat_init(vdev_t *vd)
++{
++	vdev_stat_t *vs = &vd->vdev_stat;
++	int c;
++
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_scan_stat_init(vd->vdev_child[c]);
++
++	mutex_enter(&vd->vdev_stat_lock);
++	vs->vs_scan_processed = 0;
++	mutex_exit(&vd->vdev_stat_lock);
++}
++
++void
++vdev_stat_update(zio_t *zio, uint64_t psize)
++{
++	spa_t *spa = zio->io_spa;
++	vdev_t *rvd = spa->spa_root_vdev;
++	vdev_t *vd = zio->io_vd ? zio->io_vd : rvd;
++	vdev_t *pvd;
++	uint64_t txg = zio->io_txg;
++	vdev_stat_t *vs = &vd->vdev_stat;
++	zio_type_t type = zio->io_type;
++	int flags = zio->io_flags;
++
++	/*
++	 * If this i/o is a gang leader, it didn't do any actual work.
++	 */
++	if (zio->io_gang_tree)
++		return;
++
++	if (zio->io_error == 0) {
++		/*
++		 * If this is a root i/o, don't count it -- we've already
++		 * counted the top-level vdevs, and vdev_get_stats() will
++		 * aggregate them when asked.  This reduces contention on
++		 * the root vdev_stat_lock and implicitly handles blocks
++		 * that compress away to holes, for which there is no i/o.
++		 * (Holes never create vdev children, so all the counters
++		 * remain zero, which is what we want.)
++		 *
++		 * Note: this only applies to successful i/o (io_error == 0)
++		 * because unlike i/o counts, errors are not additive.
++		 * When reading a ditto block, for example, failure of
++		 * one top-level vdev does not imply a root-level error.
++		 */
++		if (vd == rvd)
++			return;
++
++		ASSERT(vd == zio->io_vd);
++
++		if (flags & ZIO_FLAG_IO_BYPASS)
++			return;
++
++		mutex_enter(&vd->vdev_stat_lock);
++
++		if (flags & ZIO_FLAG_IO_REPAIR) {
++			if (flags & ZIO_FLAG_SCAN_THREAD) {
++				dsl_scan_phys_t *scn_phys =
++				    &spa->spa_dsl_pool->dp_scan->scn_phys;
++				uint64_t *processed = &scn_phys->scn_processed;
++
++				/* XXX cleanup? */
++				if (vd->vdev_ops->vdev_op_leaf)
++					atomic_add_64(processed, psize);
++				vs->vs_scan_processed += psize;
++			}
++
++			if (flags & ZIO_FLAG_SELF_HEAL)
++				vs->vs_self_healed += psize;
++		}
++
++		vs->vs_ops[type]++;
++		vs->vs_bytes[type] += psize;
++
++		mutex_exit(&vd->vdev_stat_lock);
++		return;
++	}
++
++	if (flags & ZIO_FLAG_SPECULATIVE)
++		return;
++
++	/*
++	 * If this is an I/O error that is going to be retried, then ignore the
++	 * error.  Otherwise, the user may interpret B_FAILFAST I/O errors as
++	 * hard errors, when in reality they can happen for any number of
++	 * innocuous reasons (bus resets, MPxIO link failure, etc).
++	 */
++	if (zio->io_error == EIO &&
++	    !(zio->io_flags & ZIO_FLAG_IO_RETRY))
++		return;
++
++	/*
++	 * Intent logs writes won't propagate their error to the root
++	 * I/O so don't mark these types of failures as pool-level
++	 * errors.
++	 */
++	if (zio->io_vd == NULL && (zio->io_flags & ZIO_FLAG_DONT_PROPAGATE))
++		return;
++
++	mutex_enter(&vd->vdev_stat_lock);
++	if (type == ZIO_TYPE_READ && !vdev_is_dead(vd)) {
++		if (zio->io_error == ECKSUM)
++			vs->vs_checksum_errors++;
++		else
++			vs->vs_read_errors++;
++	}
++	if (type == ZIO_TYPE_WRITE && !vdev_is_dead(vd))
++		vs->vs_write_errors++;
++	mutex_exit(&vd->vdev_stat_lock);
++
++	if (type == ZIO_TYPE_WRITE && txg != 0 &&
++	    (!(flags & ZIO_FLAG_IO_REPAIR) ||
++	    (flags & ZIO_FLAG_SCAN_THREAD) ||
++	    spa->spa_claiming)) {
++		/*
++		 * This is either a normal write (not a repair), or it's
++		 * a repair induced by the scrub thread, or it's a repair
++		 * made by zil_claim() during spa_load() in the first txg.
++		 * In the normal case, we commit the DTL change in the same
++		 * txg as the block was born.  In the scrub-induced repair
++		 * case, we know that scrubs run in first-pass syncing context,
++		 * so we commit the DTL change in spa_syncing_txg(spa).
++		 * In the zil_claim() case, we commit in spa_first_txg(spa).
++		 *
++		 * We currently do not make DTL entries for failed spontaneous
++		 * self-healing writes triggered by normal (non-scrubbing)
++		 * reads, because we have no transactional context in which to
++		 * do so -- and it's not clear that it'd be desirable anyway.
++		 */
++		if (vd->vdev_ops->vdev_op_leaf) {
++			uint64_t commit_txg = txg;
++			if (flags & ZIO_FLAG_SCAN_THREAD) {
++				ASSERT(flags & ZIO_FLAG_IO_REPAIR);
++				ASSERT(spa_sync_pass(spa) == 1);
++				vdev_dtl_dirty(vd, DTL_SCRUB, txg, 1);
++				commit_txg = spa_syncing_txg(spa);
++			} else if (spa->spa_claiming) {
++				ASSERT(flags & ZIO_FLAG_IO_REPAIR);
++				commit_txg = spa_first_txg(spa);
++			}
++			ASSERT(commit_txg >= spa_syncing_txg(spa));
++			if (vdev_dtl_contains(vd, DTL_MISSING, txg, 1))
++				return;
++			for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent)
++				vdev_dtl_dirty(pvd, DTL_PARTIAL, txg, 1);
++			vdev_dirty(vd->vdev_top, VDD_DTL, vd, commit_txg);
++		}
++		if (vd != rvd)
++			vdev_dtl_dirty(vd, DTL_MISSING, txg, 1);
++	}
++}
++
++/*
++ * Update the in-core space usage stats for this vdev, its metaslab class,
++ * and the root vdev.
++ */
++void
++vdev_space_update(vdev_t *vd, int64_t alloc_delta, int64_t defer_delta,
++    int64_t space_delta)
++{
++	int64_t dspace_delta = space_delta;
++	spa_t *spa = vd->vdev_spa;
++	vdev_t *rvd = spa->spa_root_vdev;
++	metaslab_group_t *mg = vd->vdev_mg;
++	metaslab_class_t *mc = mg ? mg->mg_class : NULL;
++
++	ASSERT(vd == vd->vdev_top);
++
++	/*
++	 * Apply the inverse of the psize-to-asize (ie. RAID-Z) space-expansion
++	 * factor.  We must calculate this here and not at the root vdev
++	 * because the root vdev's psize-to-asize is simply the max of its
++	 * childrens', thus not accurate enough for us.
++	 */
++	ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0);
++	ASSERT(vd->vdev_deflate_ratio != 0 || vd->vdev_isl2cache);
++	dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) *
++	    vd->vdev_deflate_ratio;
++
++	mutex_enter(&vd->vdev_stat_lock);
++	vd->vdev_stat.vs_alloc += alloc_delta;
++	vd->vdev_stat.vs_space += space_delta;
++	vd->vdev_stat.vs_dspace += dspace_delta;
++	mutex_exit(&vd->vdev_stat_lock);
++
++	if (mc == spa_normal_class(spa)) {
++		mutex_enter(&rvd->vdev_stat_lock);
++		rvd->vdev_stat.vs_alloc += alloc_delta;
++		rvd->vdev_stat.vs_space += space_delta;
++		rvd->vdev_stat.vs_dspace += dspace_delta;
++		mutex_exit(&rvd->vdev_stat_lock);
++	}
++
++	if (mc != NULL) {
++		ASSERT(rvd == vd->vdev_parent);
++		ASSERT(vd->vdev_ms_count != 0);
++
++		metaslab_class_space_update(mc,
++		    alloc_delta, defer_delta, space_delta, dspace_delta);
++	}
++}
++
++/*
++ * Mark a top-level vdev's config as dirty, placing it on the dirty list
++ * so that it will be written out next time the vdev configuration is synced.
++ * If the root vdev is specified (vdev_top == NULL), dirty all top-level vdevs.
++ */
++void
++vdev_config_dirty(vdev_t *vd)
++{
++	spa_t *spa = vd->vdev_spa;
++	vdev_t *rvd = spa->spa_root_vdev;
++	int c;
++
++	ASSERT(spa_writeable(spa));
++
++	/*
++	 * If this is an aux vdev (as with l2cache and spare devices), then we
++	 * update the vdev config manually and set the sync flag.
++	 */
++	if (vd->vdev_aux != NULL) {
++		spa_aux_vdev_t *sav = vd->vdev_aux;
++		nvlist_t **aux;
++		uint_t naux;
++
++		for (c = 0; c < sav->sav_count; c++) {
++			if (sav->sav_vdevs[c] == vd)
++				break;
++		}
++
++		if (c == sav->sav_count) {
++			/*
++			 * We're being removed.  There's nothing more to do.
++			 */
++			ASSERT(sav->sav_sync == B_TRUE);
++			return;
++		}
++
++		sav->sav_sync = B_TRUE;
++
++		if (nvlist_lookup_nvlist_array(sav->sav_config,
++		    ZPOOL_CONFIG_L2CACHE, &aux, &naux) != 0) {
++			VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
++			    ZPOOL_CONFIG_SPARES, &aux, &naux) == 0);
++		}
++
++		ASSERT(c < naux);
++
++		/*
++		 * Setting the nvlist in the middle if the array is a little
++		 * sketchy, but it will work.
++		 */
++		nvlist_free(aux[c]);
++		aux[c] = vdev_config_generate(spa, vd, B_TRUE, 0);
++
++		return;
++	}
++
++	/*
++	 * The dirty list is protected by the SCL_CONFIG lock.  The caller
++	 * must either hold SCL_CONFIG as writer, or must be the sync thread
++	 * (which holds SCL_CONFIG as reader).  There's only one sync thread,
++	 * so this is sufficient to ensure mutual exclusion.
++	 */
++	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_WRITER) ||
++	    (dsl_pool_sync_context(spa_get_dsl(spa)) &&
++	    spa_config_held(spa, SCL_CONFIG, RW_READER)));
++
++	if (vd == rvd) {
++		for (c = 0; c < rvd->vdev_children; c++)
++			vdev_config_dirty(rvd->vdev_child[c]);
++	} else {
++		ASSERT(vd == vd->vdev_top);
++
++		if (!list_link_active(&vd->vdev_config_dirty_node) &&
++		    !vd->vdev_ishole)
++			list_insert_head(&spa->spa_config_dirty_list, vd);
++	}
++}
++
++void
++vdev_config_clean(vdev_t *vd)
++{
++	spa_t *spa = vd->vdev_spa;
++
++	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_WRITER) ||
++	    (dsl_pool_sync_context(spa_get_dsl(spa)) &&
++	    spa_config_held(spa, SCL_CONFIG, RW_READER)));
++
++	ASSERT(list_link_active(&vd->vdev_config_dirty_node));
++	list_remove(&spa->spa_config_dirty_list, vd);
++}
++
++/*
++ * Mark a top-level vdev's state as dirty, so that the next pass of
++ * spa_sync() can convert this into vdev_config_dirty().  We distinguish
++ * the state changes from larger config changes because they require
++ * much less locking, and are often needed for administrative actions.
++ */
++void
++vdev_state_dirty(vdev_t *vd)
++{
++	spa_t *spa = vd->vdev_spa;
++
++	ASSERT(spa_writeable(spa));
++	ASSERT(vd == vd->vdev_top);
++
++	/*
++	 * The state list is protected by the SCL_STATE lock.  The caller
++	 * must either hold SCL_STATE as writer, or must be the sync thread
++	 * (which holds SCL_STATE as reader).  There's only one sync thread,
++	 * so this is sufficient to ensure mutual exclusion.
++	 */
++	ASSERT(spa_config_held(spa, SCL_STATE, RW_WRITER) ||
++	    (dsl_pool_sync_context(spa_get_dsl(spa)) &&
++	    spa_config_held(spa, SCL_STATE, RW_READER)));
++
++	if (!list_link_active(&vd->vdev_state_dirty_node) && !vd->vdev_ishole)
++		list_insert_head(&spa->spa_state_dirty_list, vd);
++}
++
++void
++vdev_state_clean(vdev_t *vd)
++{
++	spa_t *spa = vd->vdev_spa;
++
++	ASSERT(spa_config_held(spa, SCL_STATE, RW_WRITER) ||
++	    (dsl_pool_sync_context(spa_get_dsl(spa)) &&
++	    spa_config_held(spa, SCL_STATE, RW_READER)));
++
++	ASSERT(list_link_active(&vd->vdev_state_dirty_node));
++	list_remove(&spa->spa_state_dirty_list, vd);
++}
++
++/*
++ * Propagate vdev state up from children to parent.
++ */
++void
++vdev_propagate_state(vdev_t *vd)
++{
++	spa_t *spa = vd->vdev_spa;
++	vdev_t *rvd = spa->spa_root_vdev;
++	int degraded = 0, faulted = 0;
++	int corrupted = 0;
++	vdev_t *child;
++	int c;
++
++	if (vd->vdev_children > 0) {
++		for (c = 0; c < vd->vdev_children; c++) {
++			child = vd->vdev_child[c];
++
++			/*
++			 * Don't factor holes into the decision.
++			 */
++			if (child->vdev_ishole)
++				continue;
++
++			if (!vdev_readable(child) ||
++			    (!vdev_writeable(child) && spa_writeable(spa))) {
++				/*
++				 * Root special: if there is a top-level log
++				 * device, treat the root vdev as if it were
++				 * degraded.
++				 */
++				if (child->vdev_islog && vd == rvd)
++					degraded++;
++				else
++					faulted++;
++			} else if (child->vdev_state <= VDEV_STATE_DEGRADED) {
++				degraded++;
++			}
++
++			if (child->vdev_stat.vs_aux == VDEV_AUX_CORRUPT_DATA)
++				corrupted++;
++		}
++
++		vd->vdev_ops->vdev_op_state_change(vd, faulted, degraded);
++
++		/*
++		 * Root special: if there is a top-level vdev that cannot be
++		 * opened due to corrupted metadata, then propagate the root
++		 * vdev's aux state as 'corrupt' rather than 'insufficient
++		 * replicas'.
++		 */
++		if (corrupted && vd == rvd &&
++		    rvd->vdev_state == VDEV_STATE_CANT_OPEN)
++			vdev_set_state(rvd, B_FALSE, VDEV_STATE_CANT_OPEN,
++			    VDEV_AUX_CORRUPT_DATA);
++	}
++
++	if (vd->vdev_parent)
++		vdev_propagate_state(vd->vdev_parent);
++}
++
++/*
++ * Set a vdev's state.  If this is during an open, we don't update the parent
++ * state, because we're in the process of opening children depth-first.
++ * Otherwise, we propagate the change to the parent.
++ *
++ * If this routine places a device in a faulted state, an appropriate ereport is
++ * generated.
++ */
++void
++vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
++{
++	uint64_t save_state;
++	spa_t *spa = vd->vdev_spa;
++
++	if (state == vd->vdev_state) {
++		vd->vdev_stat.vs_aux = aux;
++		return;
++	}
++
++	save_state = vd->vdev_state;
++
++	vd->vdev_state = state;
++	vd->vdev_stat.vs_aux = aux;
++
++	/*
++	 * If we are setting the vdev state to anything but an open state, then
++	 * always close the underlying device unless the device has requested
++	 * a delayed close (i.e. we're about to remove or fault the device).
++	 * Otherwise, we keep accessible but invalid devices open forever.
++	 * We don't call vdev_close() itself, because that implies some extra
++	 * checks (offline, etc) that we don't want here.  This is limited to
++	 * leaf devices, because otherwise closing the device will affect other
++	 * children.
++	 */
++	if (!vd->vdev_delayed_close && vdev_is_dead(vd) &&
++	    vd->vdev_ops->vdev_op_leaf)
++		vd->vdev_ops->vdev_op_close(vd);
++
++	/*
++	 * If we have brought this vdev back into service, we need
++	 * to notify fmd so that it can gracefully repair any outstanding
++	 * cases due to a missing device.  We do this in all cases, even those
++	 * that probably don't correlate to a repaired fault.  This is sure to
++	 * catch all cases, and we let the zfs-retire agent sort it out.  If
++	 * this is a transient state it's OK, as the retire agent will
++	 * double-check the state of the vdev before repairing it.
++	 */
++	if (state == VDEV_STATE_HEALTHY && vd->vdev_ops->vdev_op_leaf &&
++	    vd->vdev_prevstate != state)
++		zfs_post_state_change(spa, vd);
++
++	if (vd->vdev_removed &&
++	    state == VDEV_STATE_CANT_OPEN &&
++	    (aux == VDEV_AUX_OPEN_FAILED || vd->vdev_checkremove)) {
++		/*
++		 * If the previous state is set to VDEV_STATE_REMOVED, then this
++		 * device was previously marked removed and someone attempted to
++		 * reopen it.  If this failed due to a nonexistent device, then
++		 * keep the device in the REMOVED state.  We also let this be if
++		 * it is one of our special test online cases, which is only
++		 * attempting to online the device and shouldn't generate an FMA
++		 * fault.
++		 */
++		vd->vdev_state = VDEV_STATE_REMOVED;
++		vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
++	} else if (state == VDEV_STATE_REMOVED) {
++		vd->vdev_removed = B_TRUE;
++	} else if (state == VDEV_STATE_CANT_OPEN) {
++		/*
++		 * If we fail to open a vdev during an import or recovery, we
++		 * mark it as "not available", which signifies that it was
++		 * never there to begin with.  Failure to open such a device
++		 * is not considered an error.
++		 */
++		if ((spa_load_state(spa) == SPA_LOAD_IMPORT ||
++		    spa_load_state(spa) == SPA_LOAD_RECOVER) &&
++		    vd->vdev_ops->vdev_op_leaf)
++			vd->vdev_not_present = 1;
++
++		/*
++		 * Post the appropriate ereport.  If the 'prevstate' field is
++		 * set to something other than VDEV_STATE_UNKNOWN, it indicates
++		 * that this is part of a vdev_reopen().  In this case, we don't
++		 * want to post the ereport if the device was already in the
++		 * CANT_OPEN state beforehand.
++		 *
++		 * If the 'checkremove' flag is set, then this is an attempt to
++		 * online the device in response to an insertion event.  If we
++		 * hit this case, then we have detected an insertion event for a
++		 * faulted or offline device that wasn't in the removed state.
++		 * In this scenario, we don't post an ereport because we are
++		 * about to replace the device, or attempt an online with
++		 * vdev_forcefault, which will generate the fault for us.
++		 */
++		if ((vd->vdev_prevstate != state || vd->vdev_forcefault) &&
++		    !vd->vdev_not_present && !vd->vdev_checkremove &&
++		    vd != spa->spa_root_vdev) {
++			const char *class;
++
++			switch (aux) {
++			case VDEV_AUX_OPEN_FAILED:
++				class = FM_EREPORT_ZFS_DEVICE_OPEN_FAILED;
++				break;
++			case VDEV_AUX_CORRUPT_DATA:
++				class = FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA;
++				break;
++			case VDEV_AUX_NO_REPLICAS:
++				class = FM_EREPORT_ZFS_DEVICE_NO_REPLICAS;
++				break;
++			case VDEV_AUX_BAD_GUID_SUM:
++				class = FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM;
++				break;
++			case VDEV_AUX_TOO_SMALL:
++				class = FM_EREPORT_ZFS_DEVICE_TOO_SMALL;
++				break;
++			case VDEV_AUX_BAD_LABEL:
++				class = FM_EREPORT_ZFS_DEVICE_BAD_LABEL;
++				break;
++			default:
++				class = FM_EREPORT_ZFS_DEVICE_UNKNOWN;
++			}
++
++			zfs_ereport_post(class, spa, vd, NULL, save_state, 0);
++		}
++
++		/* Erase any notion of persistent removed state */
++		vd->vdev_removed = B_FALSE;
++	} else {
++		vd->vdev_removed = B_FALSE;
++	}
++
++	if (!isopen && vd->vdev_parent)
++		vdev_propagate_state(vd->vdev_parent);
++}
++
++/*
++ * Check the vdev configuration to ensure that it's capable of supporting
++ * a root pool.
++ */
++boolean_t
++vdev_is_bootable(vdev_t *vd)
++{
++#if defined(__sun__) || defined(__sun)
++	/*
++	 * Currently, we do not support RAID-Z or partial configuration.
++	 * In addition, only a single top-level vdev is allowed and none of the
++	 * leaves can be wholedisks.
++	 */
++	int c;
++
++	if (!vd->vdev_ops->vdev_op_leaf) {
++		char *vdev_type = vd->vdev_ops->vdev_op_type;
++
++		if (strcmp(vdev_type, VDEV_TYPE_ROOT) == 0 &&
++		    vd->vdev_children > 1) {
++			return (B_FALSE);
++		} else if (strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 ||
++		    strcmp(vdev_type, VDEV_TYPE_MISSING) == 0) {
++			return (B_FALSE);
++		}
++	} else if (vd->vdev_wholedisk == 1) {
++		return (B_FALSE);
++	}
++
++	for (c = 0; c < vd->vdev_children; c++) {
++		if (!vdev_is_bootable(vd->vdev_child[c]))
++			return (B_FALSE);
++	}
++#endif /* __sun__ || __sun */
++	return (B_TRUE);
++}
++
++/*
++ * Load the state from the original vdev tree (ovd) which
++ * we've retrieved from the MOS config object. If the original
++ * vdev was offline or faulted then we transfer that state to the
++ * device in the current vdev tree (nvd).
++ */
++void
++vdev_load_log_state(vdev_t *nvd, vdev_t *ovd)
++{
++	int c;
++
++	ASSERT(nvd->vdev_top->vdev_islog);
++	ASSERT(spa_config_held(nvd->vdev_spa,
++	    SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
++	ASSERT3U(nvd->vdev_guid, ==, ovd->vdev_guid);
++
++	for (c = 0; c < nvd->vdev_children; c++)
++		vdev_load_log_state(nvd->vdev_child[c], ovd->vdev_child[c]);
++
++	if (nvd->vdev_ops->vdev_op_leaf) {
++		/*
++		 * Restore the persistent vdev state
++		 */
++		nvd->vdev_offline = ovd->vdev_offline;
++		nvd->vdev_faulted = ovd->vdev_faulted;
++		nvd->vdev_degraded = ovd->vdev_degraded;
++		nvd->vdev_removed = ovd->vdev_removed;
++	}
++}
++
++/*
++ * Determine if a log device has valid content.  If the vdev was
++ * removed or faulted in the MOS config then we know that
++ * the content on the log device has already been written to the pool.
++ */
++boolean_t
++vdev_log_state_valid(vdev_t *vd)
++{
++	int c;
++
++	if (vd->vdev_ops->vdev_op_leaf && !vd->vdev_faulted &&
++	    !vd->vdev_removed)
++		return (B_TRUE);
++
++	for (c = 0; c < vd->vdev_children; c++)
++		if (vdev_log_state_valid(vd->vdev_child[c]))
++			return (B_TRUE);
++
++	return (B_FALSE);
++}
++
++/*
++ * Expand a vdev if possible.
++ */
++void
++vdev_expand(vdev_t *vd, uint64_t txg)
++{
++	ASSERT(vd->vdev_top == vd);
++	ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL);
++
++	if ((vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count) {
++		VERIFY(vdev_metaslab_init(vd, txg) == 0);
++		vdev_config_dirty(vd);
++	}
++}
++
++/*
++ * Split a vdev.
++ */
++void
++vdev_split(vdev_t *vd)
++{
++	vdev_t *cvd, *pvd = vd->vdev_parent;
++
++	vdev_remove_child(pvd, vd);
++	vdev_compact_children(pvd);
++
++	cvd = pvd->vdev_child[0];
++	if (pvd->vdev_children == 1) {
++		vdev_remove_parent(cvd);
++		cvd->vdev_splitting = B_TRUE;
++	}
++	vdev_propagate_state(cvd);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(vdev_fault);
++EXPORT_SYMBOL(vdev_degrade);
++EXPORT_SYMBOL(vdev_online);
++EXPORT_SYMBOL(vdev_offline);
++EXPORT_SYMBOL(vdev_clear);
++
++module_param(zfs_scrub_limit, int, 0644);
++MODULE_PARM_DESC(zfs_scrub_limit, "Max scrub/resilver I/O per leaf vdev");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/vdev_cache.c linux-3.2.33-go/fs/zfs/zfs/vdev_cache.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/vdev_cache.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/vdev_cache.c	2012-11-16 23:25:34.347039358 +0100
+@@ -0,0 +1,436 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/vdev_impl.h>
++#include <sys/zio.h>
++#include <sys/kstat.h>
++
++/*
++ * Virtual device read-ahead caching.
++ *
++ * This file implements a simple LRU read-ahead cache.  When the DMU reads
++ * a given block, it will often want other, nearby blocks soon thereafter.
++ * We take advantage of this by reading a larger disk region and caching
++ * the result.  In the best case, this can turn 128 back-to-back 512-byte
++ * reads into a single 64k read followed by 127 cache hits; this reduces
++ * latency dramatically.  In the worst case, it can turn an isolated 512-byte
++ * read into a 64k read, which doesn't affect latency all that much but is
++ * terribly wasteful of bandwidth.  A more intelligent version of the cache
++ * could keep track of access patterns and not do read-ahead unless it sees
++ * at least two temporally close I/Os to the same region.  Currently, only
++ * metadata I/O is inflated.  A futher enhancement could take advantage of
++ * more semantic information about the I/O.  And it could use something
++ * faster than an AVL tree; that was chosen solely for convenience.
++ *
++ * There are five cache operations: allocate, fill, read, write, evict.
++ *
++ * (1) Allocate.  This reserves a cache entry for the specified region.
++ *     We separate the allocate and fill operations so that multiple threads
++ *     don't generate I/O for the same cache miss.
++ *
++ * (2) Fill.  When the I/O for a cache miss completes, the fill routine
++ *     places the data in the previously allocated cache entry.
++ *
++ * (3) Read.  Read data from the cache.
++ *
++ * (4) Write.  Update cache contents after write completion.
++ *
++ * (5) Evict.  When allocating a new entry, we evict the oldest (LRU) entry
++ *     if the total cache size exceeds zfs_vdev_cache_size.
++ */
++
++/*
++ * These tunables are for performance analysis.
++ */
++/*
++ * All i/os smaller than zfs_vdev_cache_max will be turned into
++ * 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software
++ * track buffer).  At most zfs_vdev_cache_size bytes will be kept in each
++ * vdev's vdev_cache.
++ *
++ * TODO: Note that with the current ZFS code, it turns out that the
++ * vdev cache is not helpful, and in some cases actually harmful.  It
++ * is better if we disable this.  Once some time has passed, we should
++ * actually remove this to simplify the code.  For now we just disable
++ * it by setting the zfs_vdev_cache_size to zero.  Note that Solaris 11
++ * has made these same changes.
++ */
++int zfs_vdev_cache_max = 1<<14;			/* 16KB */
++int zfs_vdev_cache_size = 0;
++int zfs_vdev_cache_bshift = 16;
++
++#define	VCBS (1 << zfs_vdev_cache_bshift)	/* 64KB */
++
++kstat_t	*vdc_ksp = NULL;
++
++typedef struct vdc_stats {
++	kstat_named_t vdc_stat_delegations;
++	kstat_named_t vdc_stat_hits;
++	kstat_named_t vdc_stat_misses;
++} vdc_stats_t;
++
++static vdc_stats_t vdc_stats = {
++	{ "delegations",	KSTAT_DATA_UINT64 },
++	{ "hits",		KSTAT_DATA_UINT64 },
++	{ "misses",		KSTAT_DATA_UINT64 }
++};
++
++#define	VDCSTAT_BUMP(stat)	atomic_add_64(&vdc_stats.stat.value.ui64, 1);
++
++static int
++vdev_cache_offset_compare(const void *a1, const void *a2)
++{
++	const vdev_cache_entry_t *ve1 = a1;
++	const vdev_cache_entry_t *ve2 = a2;
++
++	if (ve1->ve_offset < ve2->ve_offset)
++		return (-1);
++	if (ve1->ve_offset > ve2->ve_offset)
++		return (1);
++	return (0);
++}
++
++static int
++vdev_cache_lastused_compare(const void *a1, const void *a2)
++{
++	const vdev_cache_entry_t *ve1 = a1;
++	const vdev_cache_entry_t *ve2 = a2;
++
++	if (ve1->ve_lastused < ve2->ve_lastused)
++		return (-1);
++	if (ve1->ve_lastused > ve2->ve_lastused)
++		return (1);
++
++	/*
++	 * Among equally old entries, sort by offset to ensure uniqueness.
++	 */
++	return (vdev_cache_offset_compare(a1, a2));
++}
++
++/*
++ * Evict the specified entry from the cache.
++ */
++static void
++vdev_cache_evict(vdev_cache_t *vc, vdev_cache_entry_t *ve)
++{
++	ASSERT(MUTEX_HELD(&vc->vc_lock));
++	ASSERT(ve->ve_fill_io == NULL);
++	ASSERT(ve->ve_data != NULL);
++
++	avl_remove(&vc->vc_lastused_tree, ve);
++	avl_remove(&vc->vc_offset_tree, ve);
++	zio_buf_free(ve->ve_data, VCBS);
++	kmem_free(ve, sizeof (vdev_cache_entry_t));
++}
++
++/*
++ * Allocate an entry in the cache.  At the point we don't have the data,
++ * we're just creating a placeholder so that multiple threads don't all
++ * go off and read the same blocks.
++ */
++static vdev_cache_entry_t *
++vdev_cache_allocate(zio_t *zio)
++{
++	vdev_cache_t *vc = &zio->io_vd->vdev_cache;
++	uint64_t offset = P2ALIGN(zio->io_offset, VCBS);
++	vdev_cache_entry_t *ve;
++
++	ASSERT(MUTEX_HELD(&vc->vc_lock));
++
++	if (zfs_vdev_cache_size == 0)
++		return (NULL);
++
++	/*
++	 * If adding a new entry would exceed the cache size,
++	 * evict the oldest entry (LRU).
++	 */
++	if ((avl_numnodes(&vc->vc_lastused_tree) << zfs_vdev_cache_bshift) >
++	    zfs_vdev_cache_size) {
++		ve = avl_first(&vc->vc_lastused_tree);
++		if (ve->ve_fill_io != NULL)
++			return (NULL);
++		ASSERT(ve->ve_hits != 0);
++		vdev_cache_evict(vc, ve);
++	}
++
++	ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_PUSHPAGE);
++	ve->ve_offset = offset;
++	ve->ve_lastused = ddi_get_lbolt();
++	ve->ve_data = zio_buf_alloc(VCBS);
++
++	avl_add(&vc->vc_offset_tree, ve);
++	avl_add(&vc->vc_lastused_tree, ve);
++
++	return (ve);
++}
++
++static void
++vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio)
++{
++	uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);
++
++	ASSERT(MUTEX_HELD(&vc->vc_lock));
++	ASSERT(ve->ve_fill_io == NULL);
++
++	if (ve->ve_lastused != ddi_get_lbolt()) {
++		avl_remove(&vc->vc_lastused_tree, ve);
++		ve->ve_lastused = ddi_get_lbolt();
++		avl_add(&vc->vc_lastused_tree, ve);
++	}
++
++	ve->ve_hits++;
++	bcopy(ve->ve_data + cache_phase, zio->io_data, zio->io_size);
++}
++
++/*
++ * Fill a previously allocated cache entry with data.
++ */
++static void
++vdev_cache_fill(zio_t *fio)
++{
++	vdev_t *vd = fio->io_vd;
++	vdev_cache_t *vc = &vd->vdev_cache;
++	vdev_cache_entry_t *ve = fio->io_private;
++	zio_t *pio;
++
++	ASSERT(fio->io_size == VCBS);
++
++	/*
++	 * Add data to the cache.
++	 */
++	mutex_enter(&vc->vc_lock);
++
++	ASSERT(ve->ve_fill_io == fio);
++	ASSERT(ve->ve_offset == fio->io_offset);
++	ASSERT(ve->ve_data == fio->io_data);
++
++	ve->ve_fill_io = NULL;
++
++	/*
++	 * Even if this cache line was invalidated by a missed write update,
++	 * any reads that were queued up before the missed update are still
++	 * valid, so we can satisfy them from this line before we evict it.
++	 */
++	while ((pio = zio_walk_parents(fio)) != NULL)
++		vdev_cache_hit(vc, ve, pio);
++
++	if (fio->io_error || ve->ve_missed_update)
++		vdev_cache_evict(vc, ve);
++
++	mutex_exit(&vc->vc_lock);
++}
++
++/*
++ * Read data from the cache.  Returns 0 on cache hit, errno on a miss.
++ */
++int
++vdev_cache_read(zio_t *zio)
++{
++	vdev_cache_t *vc = &zio->io_vd->vdev_cache;
++	vdev_cache_entry_t *ve, *ve_search;
++	uint64_t cache_offset = P2ALIGN(zio->io_offset, VCBS);
++	ASSERTV(uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);)
++	zio_t *fio;
++
++	ASSERT(zio->io_type == ZIO_TYPE_READ);
++
++	if (zio->io_flags & ZIO_FLAG_DONT_CACHE)
++		return (EINVAL);
++
++	if (zio->io_size > zfs_vdev_cache_max)
++		return (EOVERFLOW);
++
++	/*
++	 * If the I/O straddles two or more cache blocks, don't cache it.
++	 */
++	if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS))
++		return (EXDEV);
++
++	ASSERT(cache_phase + zio->io_size <= VCBS);
++
++	mutex_enter(&vc->vc_lock);
++
++	ve_search = kmem_alloc(sizeof(vdev_cache_entry_t), KM_PUSHPAGE);
++	ve_search->ve_offset = cache_offset;
++	ve = avl_find(&vc->vc_offset_tree, ve_search, NULL);
++	kmem_free(ve_search, sizeof(vdev_cache_entry_t));
++
++	if (ve != NULL) {
++		if (ve->ve_missed_update) {
++			mutex_exit(&vc->vc_lock);
++			return (ESTALE);
++		}
++
++		if ((fio = ve->ve_fill_io) != NULL) {
++			zio_vdev_io_bypass(zio);
++			zio_add_child(zio, fio);
++			mutex_exit(&vc->vc_lock);
++			VDCSTAT_BUMP(vdc_stat_delegations);
++			return (0);
++		}
++
++		vdev_cache_hit(vc, ve, zio);
++		zio_vdev_io_bypass(zio);
++
++		mutex_exit(&vc->vc_lock);
++		VDCSTAT_BUMP(vdc_stat_hits);
++		return (0);
++	}
++
++	ve = vdev_cache_allocate(zio);
++
++	if (ve == NULL) {
++		mutex_exit(&vc->vc_lock);
++		return (ENOMEM);
++	}
++
++	fio = zio_vdev_delegated_io(zio->io_vd, cache_offset,
++	    ve->ve_data, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_CACHE_FILL,
++	    ZIO_FLAG_DONT_CACHE, vdev_cache_fill, ve);
++
++	ve->ve_fill_io = fio;
++	zio_vdev_io_bypass(zio);
++	zio_add_child(zio, fio);
++
++	mutex_exit(&vc->vc_lock);
++	zio_nowait(fio);
++	VDCSTAT_BUMP(vdc_stat_misses);
++
++	return (0);
++}
++
++/*
++ * Update cache contents upon write completion.
++ */
++void
++vdev_cache_write(zio_t *zio)
++{
++	vdev_cache_t *vc = &zio->io_vd->vdev_cache;
++	vdev_cache_entry_t *ve, ve_search;
++	uint64_t io_start = zio->io_offset;
++	uint64_t io_end = io_start + zio->io_size;
++	uint64_t min_offset = P2ALIGN(io_start, VCBS);
++	uint64_t max_offset = P2ROUNDUP(io_end, VCBS);
++	avl_index_t where;
++
++	ASSERT(zio->io_type == ZIO_TYPE_WRITE);
++
++	mutex_enter(&vc->vc_lock);
++
++	ve_search.ve_offset = min_offset;
++	ve = avl_find(&vc->vc_offset_tree, &ve_search, &where);
++
++	if (ve == NULL)
++		ve = avl_nearest(&vc->vc_offset_tree, where, AVL_AFTER);
++
++	while (ve != NULL && ve->ve_offset < max_offset) {
++		uint64_t start = MAX(ve->ve_offset, io_start);
++		uint64_t end = MIN(ve->ve_offset + VCBS, io_end);
++
++		if (ve->ve_fill_io != NULL) {
++			ve->ve_missed_update = 1;
++		} else {
++			bcopy((char *)zio->io_data + start - io_start,
++			    ve->ve_data + start - ve->ve_offset, end - start);
++		}
++		ve = AVL_NEXT(&vc->vc_offset_tree, ve);
++	}
++	mutex_exit(&vc->vc_lock);
++}
++
++void
++vdev_cache_purge(vdev_t *vd)
++{
++	vdev_cache_t *vc = &vd->vdev_cache;
++	vdev_cache_entry_t *ve;
++
++	mutex_enter(&vc->vc_lock);
++	while ((ve = avl_first(&vc->vc_offset_tree)) != NULL)
++		vdev_cache_evict(vc, ve);
++	mutex_exit(&vc->vc_lock);
++}
++
++void
++vdev_cache_init(vdev_t *vd)
++{
++	vdev_cache_t *vc = &vd->vdev_cache;
++
++	mutex_init(&vc->vc_lock, NULL, MUTEX_DEFAULT, NULL);
++
++	avl_create(&vc->vc_offset_tree, vdev_cache_offset_compare,
++	    sizeof (vdev_cache_entry_t),
++	    offsetof(struct vdev_cache_entry, ve_offset_node));
++
++	avl_create(&vc->vc_lastused_tree, vdev_cache_lastused_compare,
++	    sizeof (vdev_cache_entry_t),
++	    offsetof(struct vdev_cache_entry, ve_lastused_node));
++}
++
++void
++vdev_cache_fini(vdev_t *vd)
++{
++	vdev_cache_t *vc = &vd->vdev_cache;
++
++	vdev_cache_purge(vd);
++
++	avl_destroy(&vc->vc_offset_tree);
++	avl_destroy(&vc->vc_lastused_tree);
++
++	mutex_destroy(&vc->vc_lock);
++}
++
++void
++vdev_cache_stat_init(void)
++{
++	vdc_ksp = kstat_create("zfs", 0, "vdev_cache_stats", "misc",
++	    KSTAT_TYPE_NAMED, sizeof (vdc_stats) / sizeof (kstat_named_t),
++	    KSTAT_FLAG_VIRTUAL);
++	if (vdc_ksp != NULL) {
++		vdc_ksp->ks_data = &vdc_stats;
++		kstat_install(vdc_ksp);
++	}
++}
++
++void
++vdev_cache_stat_fini(void)
++{
++	if (vdc_ksp != NULL) {
++		kstat_delete(vdc_ksp);
++		vdc_ksp = NULL;
++	}
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++module_param(zfs_vdev_cache_max, int, 0644);
++MODULE_PARM_DESC(zfs_vdev_cache_max, "Inflate reads small than max");
++
++module_param(zfs_vdev_cache_size, int, 0444);
++MODULE_PARM_DESC(zfs_vdev_cache_size, "Total size of the per-disk cache");
++
++module_param(zfs_vdev_cache_bshift, int, 0644);
++MODULE_PARM_DESC(zfs_vdev_cache_bshift, "Shift size to inflate reads too");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/vdev_disk.c linux-3.2.33-go/fs/zfs/zfs/vdev_disk.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/vdev_disk.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/vdev_disk.c	2012-11-16 23:25:34.353039289 +0100
+@@ -0,0 +1,841 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
++ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ * Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
++ * LLNL-CODE-403049.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/vdev_disk.h>
++#include <sys/vdev_impl.h>
++#include <sys/fs/zfs.h>
++#include <sys/zio.h>
++#include <sys/sunldi.h>
++
++char *zfs_vdev_scheduler = VDEV_SCHEDULER;
++
++/*
++ * Virtual device vector for disks.
++ */
++typedef struct dio_request {
++	struct completion	dr_comp;	/* Completion for sync IO */
++	atomic_t		dr_ref;		/* References */
++	zio_t			*dr_zio;	/* Parent ZIO */
++	int			dr_rw;		/* Read/Write */
++	int			dr_error;	/* Bio error */
++	int			dr_bio_count;	/* Count of bio's */
++        struct bio		*dr_bio[0];	/* Attached bio's */
++} dio_request_t;
++
++
++#ifdef HAVE_OPEN_BDEV_EXCLUSIVE
++static fmode_t
++vdev_bdev_mode(int smode)
++{
++	fmode_t mode = 0;
++
++	ASSERT3S(smode & (FREAD | FWRITE), !=, 0);
++
++	if (smode & FREAD)
++		mode |= FMODE_READ;
++
++	if (smode & FWRITE)
++		mode |= FMODE_WRITE;
++
++	return mode;
++}
++#else
++static int
++vdev_bdev_mode(int smode)
++{
++	int mode = 0;
++
++	ASSERT3S(smode & (FREAD | FWRITE), !=, 0);
++
++	if ((smode & FREAD) && !(smode & FWRITE))
++		mode = MS_RDONLY;
++
++	return mode;
++}
++#endif /* HAVE_OPEN_BDEV_EXCLUSIVE */
++
++static uint64_t
++bdev_capacity(struct block_device *bdev)
++{
++	struct hd_struct *part = bdev->bd_part;
++
++	/* The partition capacity referenced by the block device */
++	if (part)
++		return (part->nr_sects << 9);
++
++	/* Otherwise assume the full device capacity */
++	return (get_capacity(bdev->bd_disk) << 9);
++}
++
++static void
++vdev_disk_error(zio_t *zio)
++{
++#ifdef ZFS_DEBUG
++	printk("ZFS: zio error=%d type=%d offset=%llu size=%llu "
++	    "flags=%x delay=%llu\n", zio->io_error, zio->io_type,
++	    (u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size,
++	    zio->io_flags, (u_longlong_t)zio->io_delay);
++#endif
++}
++
++/*
++ * Use the Linux 'noop' elevator for zfs managed block devices.  This
++ * strikes the ideal balance by allowing the zfs elevator to do all
++ * request ordering and prioritization.  While allowing the Linux
++ * elevator to do the maximum front/back merging allowed by the
++ * physical device.  This yields the largest possible requests for
++ * the device with the lowest total overhead.
++ */
++static int
++vdev_elevator_switch(vdev_t *v, char *elevator)
++{
++	vdev_disk_t *vd = v->vdev_tsd;
++	struct block_device *bdev = vd->vd_bdev;
++	struct request_queue *q = bdev_get_queue(bdev);
++	char *device = bdev->bd_disk->disk_name;
++	int error;
++
++	/* Skip devices which are not whole disks (partitions) */
++	if (!v->vdev_wholedisk)
++		return (0);
++
++	/* Skip devices without schedulers (loop, ram, dm, etc) */
++	if (!q->elevator || !blk_queue_stackable(q))
++		return (0);
++
++	/* Leave existing scheduler when set to "none" */
++	if (!strncmp(elevator, "none", 4) && (strlen(elevator) == 4))
++		return (0);
++
++#ifdef HAVE_ELEVATOR_CHANGE
++	error = elevator_change(q, elevator);
++#else
++	/* For pre-2.6.36 kernels elevator_change() is not available.
++	 * Therefore we fall back to using a usermodehelper to echo the
++	 * elevator into sysfs;  This requires /bin/echo and sysfs to be
++	 * mounted which may not be true early in the boot process.
++	 */
++# define SET_SCHEDULER_CMD \
++	"exec 0</dev/null " \
++	"     1>/sys/block/%s/queue/scheduler " \
++	"     2>/dev/null; " \
++	"echo %s"
++
++	{
++		char *argv[] = { "/bin/sh", "-c", NULL, NULL };
++		char *envp[] = { NULL };
++
++		argv[2] = kmem_asprintf(SET_SCHEDULER_CMD, device, elevator);
++		error = call_usermodehelper(argv[0], argv, envp, 1);
++		strfree(argv[2]);
++	}
++#endif /* HAVE_ELEVATOR_CHANGE */
++	if (error)
++		printk("ZFS: Unable to set \"%s\" scheduler for %s (%s): %d\n",
++		       elevator, v->vdev_path, device, error);
++
++	return (error);
++}
++
++/*
++ * Expanding a whole disk vdev involves invoking BLKRRPART on the
++ * whole disk device. This poses a problem, because BLKRRPART will
++ * return EBUSY if one of the disk's partitions is open. That's why
++ * we have to do it here, just before opening the data partition.
++ * Unfortunately, BLKRRPART works by dropping all partitions and
++ * recreating them, which means that for a short time window, all
++ * /dev/sdxN device files disappear (until udev recreates them).
++ * This means two things:
++ *  - When we open the data partition just after a BLKRRPART, we
++ *    can't do it using the normal device file path because of the
++ *    obvious race condition with udev. Instead, we use reliable
++ *    kernel APIs to get a handle to the new partition device from
++ *    the whole disk device.
++ *  - Because vdev_disk_open() initially needs to find the device
++ *    using its path, multiple vdev_disk_open() invocations in
++ *    short succession on the same disk with BLKRRPARTs in the
++ *    middle have a high probability of failure (because of the
++ *    race condition with udev). A typical situation where this
++ *    might happen is when the zpool userspace tool does a
++ *    TRYIMPORT immediately followed by an IMPORT. For this
++ *    reason, we only invoke BLKRRPART in the module when strictly
++ *    necessary (zpool online -e case), and rely on userspace to
++ *    do it when possible.
++ */
++static struct block_device *
++vdev_disk_rrpart(const char *path, int mode, vdev_disk_t *vd)
++{
++#if defined(HAVE_3ARG_BLKDEV_GET) && defined(HAVE_GET_GENDISK)
++	struct block_device *bdev, *result = ERR_PTR(-ENXIO);
++	struct gendisk *disk;
++	int error, partno;
++
++	bdev = vdev_bdev_open(path, vdev_bdev_mode(mode), vd);
++	if (IS_ERR(bdev))
++		return bdev;
++
++	disk = get_gendisk(bdev->bd_dev, &partno);
++	vdev_bdev_close(bdev, vdev_bdev_mode(mode));
++
++	if (disk) {
++		bdev = bdget(disk_devt(disk));
++		if (bdev) {
++			error = blkdev_get(bdev, vdev_bdev_mode(mode), vd);
++			if (error == 0)
++				error = ioctl_by_bdev(bdev, BLKRRPART, 0);
++			vdev_bdev_close(bdev, vdev_bdev_mode(mode));
++		}
++
++		bdev = bdget_disk(disk, partno);
++		if (bdev) {
++			error = blkdev_get(bdev,
++			    vdev_bdev_mode(mode) | FMODE_EXCL, vd);
++			if (error == 0)
++				result = bdev;
++		}
++		put_disk(disk);
++	}
++
++	return result;
++#else
++	return ERR_PTR(-EOPNOTSUPP);
++#endif /* defined(HAVE_3ARG_BLKDEV_GET) && defined(HAVE_GET_GENDISK) */
++}
++
++static int
++vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
++    uint64_t *ashift)
++{
++	struct block_device *bdev = ERR_PTR(-ENXIO);
++	vdev_disk_t *vd;
++	int mode, block_size;
++
++	/* Must have a pathname and it must be absolute. */
++	if (v->vdev_path == NULL || v->vdev_path[0] != '/') {
++		v->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
++		return EINVAL;
++	}
++
++	vd = kmem_zalloc(sizeof(vdev_disk_t), KM_PUSHPAGE);
++	if (vd == NULL)
++		return ENOMEM;
++
++	/*
++	 * Devices are always opened by the path provided at configuration
++	 * time.  This means that if the provided path is a udev by-id path
++	 * then drives may be recabled without an issue.  If the provided
++	 * path is a udev by-path path then the physical location information
++	 * will be preserved.  This can be critical for more complicated
++	 * configurations where drives are located in specific physical
++	 * locations to maximize the systems tolerence to component failure.
++	 * Alternately you can provide your own udev rule to flexibly map
++	 * the drives as you see fit.  It is not advised that you use the
++	 * /dev/[hd]d devices which may be reorder due to probing order.
++	 * Devices in the wrong locations will be detected by the higher
++	 * level vdev validation.
++	 */
++	mode = spa_mode(v->vdev_spa);
++	if (v->vdev_wholedisk && v->vdev_expanding)
++		bdev = vdev_disk_rrpart(v->vdev_path, mode, vd);
++	if (IS_ERR(bdev))
++		bdev = vdev_bdev_open(v->vdev_path, vdev_bdev_mode(mode), vd);
++	if (IS_ERR(bdev)) {
++		kmem_free(vd, sizeof(vdev_disk_t));
++		return -PTR_ERR(bdev);
++	}
++
++	v->vdev_tsd = vd;
++	vd->vd_bdev = bdev;
++	block_size =  vdev_bdev_block_size(bdev);
++
++	/* We think the wholedisk property should always be set when this
++	 * function is called.  ASSERT here so if any legitimate cases exist
++	 * where it's not set, we'll find them during debugging.  If we never
++	 * hit the ASSERT, this and the following conditional statement can be
++	 * removed. */
++	ASSERT3S(v->vdev_wholedisk, !=, -1ULL);
++
++	/* The wholedisk property was initialized to -1 in vdev_alloc() if it
++	 * was unspecified.  In that case, check if this is a whole device.
++	 * When bdev->bd_contains == bdev we have a whole device and not simply
++	 * a partition. */
++	if (v->vdev_wholedisk == -1ULL)
++		v->vdev_wholedisk = (bdev->bd_contains == bdev);
++
++	/* Clear the nowritecache bit, causes vdev_reopen() to try again. */
++	v->vdev_nowritecache = B_FALSE;
++
++	/* Physical volume size in bytes */
++	*psize = bdev_capacity(bdev);
++
++	/* TODO: report possible expansion size */
++	*max_psize = *psize;
++
++	/* Based on the minimum sector size set the block size */
++	*ashift = highbit(MAX(block_size, SPA_MINBLOCKSIZE)) - 1;
++
++	/* Try to set the io scheduler elevator algorithm */
++	(void) vdev_elevator_switch(v, zfs_vdev_scheduler);
++
++	return 0;
++}
++
++static void
++vdev_disk_close(vdev_t *v)
++{
++	vdev_disk_t *vd = v->vdev_tsd;
++
++	if (vd == NULL)
++		return;
++
++	if (vd->vd_bdev != NULL)
++		vdev_bdev_close(vd->vd_bdev,
++		                vdev_bdev_mode(spa_mode(v->vdev_spa)));
++
++	kmem_free(vd, sizeof(vdev_disk_t));
++	v->vdev_tsd = NULL;
++}
++
++static dio_request_t *
++vdev_disk_dio_alloc(int bio_count)
++{
++	dio_request_t *dr;
++	int i;
++
++	dr = kmem_zalloc(sizeof(dio_request_t) +
++	                 sizeof(struct bio *) * bio_count, KM_PUSHPAGE);
++	if (dr) {
++		init_completion(&dr->dr_comp);
++		atomic_set(&dr->dr_ref, 0);
++		dr->dr_bio_count = bio_count;
++		dr->dr_error = 0;
++
++		for (i = 0; i < dr->dr_bio_count; i++)
++			dr->dr_bio[i] = NULL;
++	}
++
++	return dr;
++}
++
++static void
++vdev_disk_dio_free(dio_request_t *dr)
++{
++	int i;
++
++	for (i = 0; i < dr->dr_bio_count; i++)
++		if (dr->dr_bio[i])
++			bio_put(dr->dr_bio[i]);
++
++	kmem_free(dr, sizeof(dio_request_t) +
++	          sizeof(struct bio *) * dr->dr_bio_count);
++}
++
++static int
++vdev_disk_dio_is_sync(dio_request_t *dr)
++{
++#ifdef HAVE_BIO_RW_SYNC
++	/* BIO_RW_SYNC preferred interface from 2.6.12-2.6.29 */
++        return (dr->dr_rw & (1 << BIO_RW_SYNC));
++#else
++# ifdef HAVE_BIO_RW_SYNCIO
++	/* BIO_RW_SYNCIO preferred interface from 2.6.30-2.6.35 */
++        return (dr->dr_rw & (1 << BIO_RW_SYNCIO));
++# else
++#  ifdef HAVE_REQ_SYNC
++	/* REQ_SYNC preferred interface from 2.6.36-2.6.xx */
++        return (dr->dr_rw & REQ_SYNC);
++#  else
++#   error "Unable to determine bio sync flag"
++#  endif /* HAVE_REQ_SYNC */
++# endif /* HAVE_BIO_RW_SYNC */
++#endif /* HAVE_BIO_RW_SYNCIO */
++}
++
++static void
++vdev_disk_dio_get(dio_request_t *dr)
++{
++	atomic_inc(&dr->dr_ref);
++}
++
++static int
++vdev_disk_dio_put(dio_request_t *dr)
++{
++	int rc = atomic_dec_return(&dr->dr_ref);
++
++	/*
++	 * Free the dio_request when the last reference is dropped and
++	 * ensure zio_interpret is called only once with the correct zio
++	 */
++	if (rc == 0) {
++		zio_t *zio = dr->dr_zio;
++		int error = dr->dr_error;
++
++		vdev_disk_dio_free(dr);
++
++		if (zio) {
++			zio->io_delay = jiffies_to_msecs(
++			    jiffies_64 - zio->io_delay);
++			zio->io_error = error;
++			ASSERT3S(zio->io_error, >=, 0);
++			if (zio->io_error)
++				vdev_disk_error(zio);
++			zio_interrupt(zio);
++		}
++	}
++
++	return rc;
++}
++
++BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, size, error)
++{
++	dio_request_t *dr = bio->bi_private;
++	int rc;
++
++	/* Fatal error but print some useful debugging before asserting */
++	if (dr == NULL)
++		PANIC("dr == NULL, bio->bi_private == NULL\n"
++		    "bi_next: %p, bi_flags: %lx, bi_rw: %lu, bi_vcnt: %d\n"
++		    "bi_idx: %d, bi_size: %d, bi_end_io: %p, bi_cnt: %d\n",
++		    bio->bi_next, bio->bi_flags, bio->bi_rw, bio->bi_vcnt,
++		    bio->bi_idx, bio->bi_size, bio->bi_end_io,
++		    atomic_read(&bio->bi_cnt));
++
++#ifndef HAVE_2ARGS_BIO_END_IO_T
++	if (bio->bi_size)
++		return 1;
++#endif /* HAVE_2ARGS_BIO_END_IO_T */
++
++	if (error == 0 && !test_bit(BIO_UPTODATE, &bio->bi_flags))
++		error = -EIO;
++
++	if (dr->dr_error == 0)
++		dr->dr_error = -error;
++
++	/* Drop reference aquired by __vdev_disk_physio */
++	rc = vdev_disk_dio_put(dr);
++
++	/* Wake up synchronous waiter this is the last outstanding bio */
++	if ((rc == 1) && vdev_disk_dio_is_sync(dr))
++		complete(&dr->dr_comp);
++
++	BIO_END_IO_RETURN(0);
++}
++
++static inline unsigned long
++bio_nr_pages(void *bio_ptr, unsigned int bio_size)
++{
++	return ((((unsigned long)bio_ptr + bio_size + PAGE_SIZE - 1) >>
++	        PAGE_SHIFT) - ((unsigned long)bio_ptr >> PAGE_SHIFT));
++}
++
++static unsigned int
++bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size)
++{
++	unsigned int offset, size, i;
++	struct page *page;
++
++	offset = offset_in_page(bio_ptr);
++	for (i = 0; i < bio->bi_max_vecs; i++) {
++		size = PAGE_SIZE - offset;
++
++		if (bio_size <= 0)
++			break;
++
++		if (size > bio_size)
++			size = bio_size;
++
++		if (kmem_virt(bio_ptr))
++			page = vmalloc_to_page(bio_ptr);
++		else
++			page = virt_to_page(bio_ptr);
++
++		if (bio_add_page(bio, page, size, offset) != size)
++			break;
++
++		bio_ptr  += size;
++		bio_size -= size;
++		offset = 0;
++	}
++
++        return bio_size;
++}
++
++static int
++__vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
++                   size_t kbuf_size, uint64_t kbuf_offset, int flags)
++{
++        dio_request_t *dr;
++	caddr_t bio_ptr;
++	uint64_t bio_offset;
++	int bio_size, bio_count = 16;
++	int i = 0, error = 0;
++
++	ASSERT3U(kbuf_offset + kbuf_size, <=, bdev->bd_inode->i_size);
++
++retry:
++	dr = vdev_disk_dio_alloc(bio_count);
++	if (dr == NULL)
++		return ENOMEM;
++
++	if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
++			bio_set_flags_failfast(bdev, &flags);
++
++	dr->dr_zio = zio;
++	dr->dr_rw = flags;
++
++	/*
++	 * When the IO size exceeds the maximum bio size for the request
++	 * queue we are forced to break the IO in multiple bio's and wait
++	 * for them all to complete.  Ideally, all pool users will set
++	 * their volume block size to match the maximum request size and
++	 * the common case will be one bio per vdev IO request.
++	 */
++	bio_ptr    = kbuf_ptr;
++	bio_offset = kbuf_offset;
++	bio_size   = kbuf_size;
++	for (i = 0; i <= dr->dr_bio_count; i++) {
++
++		/* Finished constructing bio's for given buffer */
++		if (bio_size <= 0)
++			break;
++
++		/*
++		 * By default only 'bio_count' bio's per dio are allowed.
++		 * However, if we find ourselves in a situation where more
++		 * are needed we allocate a larger dio and warn the user.
++		 */
++		if (dr->dr_bio_count == i) {
++			vdev_disk_dio_free(dr);
++			bio_count *= 2;
++			goto retry;
++		}
++
++		dr->dr_bio[i] = bio_alloc(GFP_NOIO,
++		                          bio_nr_pages(bio_ptr, bio_size));
++		if (dr->dr_bio[i] == NULL) {
++			vdev_disk_dio_free(dr);
++			return ENOMEM;
++		}
++
++		/* Matching put called by vdev_disk_physio_completion */
++		vdev_disk_dio_get(dr);
++
++		dr->dr_bio[i]->bi_bdev = bdev;
++		dr->dr_bio[i]->bi_sector = bio_offset >> 9;
++		dr->dr_bio[i]->bi_rw = dr->dr_rw;
++		dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion;
++		dr->dr_bio[i]->bi_private = dr;
++
++		/* Remaining size is returned to become the new size */
++		bio_size = bio_map(dr->dr_bio[i], bio_ptr, bio_size);
++
++		/* Advance in buffer and construct another bio if needed */
++		bio_ptr    += dr->dr_bio[i]->bi_size;
++		bio_offset += dr->dr_bio[i]->bi_size;
++	}
++
++	/* Extra reference to protect dio_request during submit_bio */
++	vdev_disk_dio_get(dr);
++	if (zio)
++		zio->io_delay = jiffies_64;
++
++	/* Submit all bio's associated with this dio */
++	for (i = 0; i < dr->dr_bio_count; i++)
++		if (dr->dr_bio[i])
++			submit_bio(dr->dr_rw, dr->dr_bio[i]);
++
++	/*
++	 * On synchronous blocking requests we wait for all bio the completion
++	 * callbacks to run.  We will be woken when the last callback runs
++	 * for this dio.  We are responsible for putting the last dio_request
++	 * reference will in turn put back the last bio references.  The
++	 * only synchronous consumer is vdev_disk_read_rootlabel() all other
++	 * IO originating from vdev_disk_io_start() is asynchronous.
++	 */
++	if (vdev_disk_dio_is_sync(dr)) {
++		wait_for_completion(&dr->dr_comp);
++		error = dr->dr_error;
++		ASSERT3S(atomic_read(&dr->dr_ref), ==, 1);
++	}
++
++	(void)vdev_disk_dio_put(dr);
++
++	return error;
++}
++
++int
++vdev_disk_physio(struct block_device *bdev, caddr_t kbuf,
++		 size_t size, uint64_t offset, int flags)
++{
++	bio_set_flags_failfast(bdev, &flags);
++	return __vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags);
++}
++
++/* 2.6.24 API change */
++#ifdef HAVE_BIO_EMPTY_BARRIER
++BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, size, rc)
++{
++	zio_t *zio = bio->bi_private;
++
++	zio->io_delay = jiffies_to_msecs(jiffies_64 - zio->io_delay);
++	zio->io_error = -rc;
++	if (rc && (rc == -EOPNOTSUPP))
++		zio->io_vd->vdev_nowritecache = B_TRUE;
++
++	bio_put(bio);
++	ASSERT3S(zio->io_error, >=, 0);
++	if (zio->io_error)
++		vdev_disk_error(zio);
++	zio_interrupt(zio);
++
++	BIO_END_IO_RETURN(0);
++}
++
++static int
++vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
++{
++	struct request_queue *q;
++	struct bio *bio;
++
++	q = bdev_get_queue(bdev);
++	if (!q)
++		return ENXIO;
++
++	bio = bio_alloc(GFP_KERNEL, 0);
++	if (!bio)
++		return ENOMEM;
++
++	bio->bi_end_io = vdev_disk_io_flush_completion;
++	bio->bi_private = zio;
++	bio->bi_bdev = bdev;
++	zio->io_delay = jiffies_64;
++	submit_bio(VDEV_WRITE_FLUSH_FUA, bio);
++
++	return 0;
++}
++#else
++static int
++vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
++{
++	return ENOTSUP;
++}
++#endif /* HAVE_BIO_EMPTY_BARRIER */
++
++static int
++vdev_disk_io_start(zio_t *zio)
++{
++	vdev_t *v = zio->io_vd;
++	vdev_disk_t *vd = v->vdev_tsd;
++	int flags, error;
++
++	switch (zio->io_type) {
++	case ZIO_TYPE_IOCTL:
++
++		if (!vdev_readable(v)) {
++			zio->io_error = ENXIO;
++			return ZIO_PIPELINE_CONTINUE;
++		}
++
++		switch (zio->io_cmd) {
++		case DKIOCFLUSHWRITECACHE:
++
++			if (zfs_nocacheflush)
++				break;
++
++			if (v->vdev_nowritecache) {
++				zio->io_error = ENOTSUP;
++				break;
++			}
++
++			error = vdev_disk_io_flush(vd->vd_bdev, zio);
++			if (error == 0)
++				return ZIO_PIPELINE_STOP;
++
++			zio->io_error = error;
++			if (error == ENOTSUP)
++				v->vdev_nowritecache = B_TRUE;
++
++			break;
++
++		default:
++			zio->io_error = ENOTSUP;
++		}
++
++		return ZIO_PIPELINE_CONTINUE;
++
++	case ZIO_TYPE_WRITE:
++		flags = WRITE;
++		break;
++
++	case ZIO_TYPE_READ:
++		flags = READ;
++		break;
++
++	default:
++		zio->io_error = ENOTSUP;
++		return ZIO_PIPELINE_CONTINUE;
++	}
++
++	error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data,
++		                   zio->io_size, zio->io_offset, flags);
++	if (error) {
++		zio->io_error = error;
++		return ZIO_PIPELINE_CONTINUE;
++	}
++
++	return ZIO_PIPELINE_STOP;
++}
++
++static void
++vdev_disk_io_done(zio_t *zio)
++{
++	/*
++	 * If the device returned EIO, we revalidate the media.  If it is
++	 * determined the media has changed this triggers the asynchronous
++	 * removal of the device from the configuration.
++	 */
++	if (zio->io_error == EIO) {
++	        vdev_t *v = zio->io_vd;
++		vdev_disk_t *vd = v->vdev_tsd;
++
++		if (check_disk_change(vd->vd_bdev)) {
++			vdev_bdev_invalidate(vd->vd_bdev);
++			v->vdev_remove_wanted = B_TRUE;
++			spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
++		}
++	}
++}
++
++static void
++vdev_disk_hold(vdev_t *vd)
++{
++	ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
++
++	/* We must have a pathname, and it must be absolute. */
++	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/')
++		return;
++
++	/*
++	 * Only prefetch path and devid info if the device has
++	 * never been opened.
++	 */
++	if (vd->vdev_tsd != NULL)
++		return;
++
++	/* XXX: Implement me as a vnode lookup for the device */
++	vd->vdev_name_vp = NULL;
++	vd->vdev_devid_vp = NULL;
++}
++
++static void
++vdev_disk_rele(vdev_t *vd)
++{
++	ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
++
++	/* XXX: Implement me as a vnode rele for the device */
++}
++
++vdev_ops_t vdev_disk_ops = {
++	vdev_disk_open,
++	vdev_disk_close,
++	vdev_default_asize,
++	vdev_disk_io_start,
++	vdev_disk_io_done,
++	NULL,
++	vdev_disk_hold,
++	vdev_disk_rele,
++	VDEV_TYPE_DISK,		/* name of this vdev type */
++	B_TRUE			/* leaf vdev */
++};
++
++/*
++ * Given the root disk device devid or pathname, read the label from
++ * the device, and construct a configuration nvlist.
++ */
++int
++vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
++{
++	struct block_device *bdev;
++	vdev_label_t *label;
++	uint64_t s, size;
++	int i;
++
++	bdev = vdev_bdev_open(devpath, vdev_bdev_mode(FREAD), NULL);
++	if (IS_ERR(bdev))
++		return -PTR_ERR(bdev);
++
++	s = bdev_capacity(bdev);
++	if (s == 0) {
++		vdev_bdev_close(bdev, vdev_bdev_mode(FREAD));
++		return EIO;
++	}
++
++	size = P2ALIGN_TYPED(s, sizeof(vdev_label_t), uint64_t);
++	label = vmem_alloc(sizeof(vdev_label_t), KM_PUSHPAGE);
++
++	for (i = 0; i < VDEV_LABELS; i++) {
++	        uint64_t offset, state, txg = 0;
++
++		/* read vdev label */
++		offset = vdev_label_offset(size, i, 0);
++		if (vdev_disk_physio(bdev, (caddr_t)label,
++		    VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, READ_SYNC) != 0)
++			continue;
++
++		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
++		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) {
++			*config = NULL;
++			continue;
++		}
++
++		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
++		    &state) != 0 || state >= POOL_STATE_DESTROYED) {
++			nvlist_free(*config);
++			*config = NULL;
++			continue;
++		}
++
++		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
++		    &txg) != 0 || txg == 0) {
++			nvlist_free(*config);
++			*config = NULL;
++			continue;
++		}
++
++		break;
++	}
++
++	vmem_free(label, sizeof(vdev_label_t));
++	vdev_bdev_close(bdev, vdev_bdev_mode(FREAD));
++
++	return 0;
++}
++
++module_param(zfs_vdev_scheduler, charp, 0644);
++MODULE_PARM_DESC(zfs_vdev_scheduler, "I/O scheduler");
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/vdev_file.c linux-3.2.33-go/fs/zfs/zfs/vdev_file.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/vdev_file.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/vdev_file.c	2012-11-16 23:25:34.352039300 +0100
+@@ -0,0 +1,219 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/vdev_file.h>
++#include <sys/vdev_impl.h>
++#include <sys/zio.h>
++#include <sys/fs/zfs.h>
++#include <sys/fm/fs/zfs.h>
++
++/*
++ * Virtual device vector for files.
++ */
++
++static void
++vdev_file_hold(vdev_t *vd)
++{
++	ASSERT(vd->vdev_path != NULL);
++}
++
++static void
++vdev_file_rele(vdev_t *vd)
++{
++	ASSERT(vd->vdev_path != NULL);
++}
++
++static int
++vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
++    uint64_t *ashift)
++{
++	vdev_file_t *vf;
++	vnode_t *vp;
++	vattr_t vattr;
++	int error;
++
++	/*
++	 * We must have a pathname, and it must be absolute.
++	 */
++	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
++		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
++		return (EINVAL);
++	}
++
++	/*
++	 * Reopen the device if it's not currently open.  Otherwise,
++	 * just update the physical size of the device.
++	 */
++	if (vd->vdev_tsd != NULL) {
++		ASSERT(vd->vdev_reopening);
++		vf = vd->vdev_tsd;
++		goto skip_open;
++	}
++
++	vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_PUSHPAGE);
++
++	/*
++	 * We always open the files from the root of the global zone, even if
++	 * we're in a local zone.  If the user has gotten to this point, the
++	 * administrator has already decided that the pool should be available
++	 * to local zone users, so the underlying devices should be as well.
++	 */
++	ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
++	error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE,
++	    spa_mode(vd->vdev_spa) | FOFFMAX, 0, &vp, 0, 0, rootdir, -1);
++
++	if (error) {
++		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
++		return (error);
++	}
++
++	vf->vf_vnode = vp;
++
++#ifdef _KERNEL
++	/*
++	 * Make sure it's a regular file.
++	 */
++	if (vp->v_type != VREG) {
++		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
++		return (ENODEV);
++	}
++#endif
++
++skip_open:
++	/*
++	 * Determine the physical size of the file.
++	 */
++	vattr.va_mask = AT_SIZE;
++	error = VOP_GETATTR(vf->vf_vnode, &vattr, 0, kcred, NULL);
++	if (error) {
++		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
++		return (error);
++	}
++
++	*max_psize = *psize = vattr.va_size;
++	*ashift = SPA_MINBLOCKSHIFT;
++
++	return (0);
++}
++
++static void
++vdev_file_close(vdev_t *vd)
++{
++	vdev_file_t *vf = vd->vdev_tsd;
++
++	if (vd->vdev_reopening || vf == NULL)
++		return;
++
++	if (vf->vf_vnode != NULL) {
++		(void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL);
++		(void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0,
++		    kcred, NULL);
++	}
++
++	vd->vdev_delayed_close = B_FALSE;
++	kmem_free(vf, sizeof (vdev_file_t));
++	vd->vdev_tsd = NULL;
++}
++
++static int
++vdev_file_io_start(zio_t *zio)
++{
++	vdev_t *vd = zio->io_vd;
++	vdev_file_t *vf;
++	ssize_t resid = 0;
++
++	if (!vdev_readable(vd)) {
++		zio->io_error = ENXIO;
++		return (ZIO_PIPELINE_CONTINUE);
++	}
++
++	vf = vd->vdev_tsd;
++
++	if (zio->io_type == ZIO_TYPE_IOCTL) {
++		switch (zio->io_cmd) {
++		case DKIOCFLUSHWRITECACHE:
++			zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
++			    kcred, NULL);
++			break;
++		default:
++			zio->io_error = ENOTSUP;
++		}
++
++		return (ZIO_PIPELINE_CONTINUE);
++	}
++
++	zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
++	    UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data,
++	    zio->io_size, zio->io_offset, UIO_SYSSPACE,
++	    0, RLIM64_INFINITY, kcred, &resid);
++
++	if (resid != 0 && zio->io_error == 0)
++		zio->io_error = ENOSPC;
++
++	zio_interrupt(zio);
++
++	return (ZIO_PIPELINE_STOP);
++}
++
++/* ARGSUSED */
++static void
++vdev_file_io_done(zio_t *zio)
++{
++}
++
++vdev_ops_t vdev_file_ops = {
++	vdev_file_open,
++	vdev_file_close,
++	vdev_default_asize,
++	vdev_file_io_start,
++	vdev_file_io_done,
++	NULL,
++	vdev_file_hold,
++	vdev_file_rele,
++	VDEV_TYPE_FILE,		/* name of this vdev type */
++	B_TRUE			/* leaf vdev */
++};
++
++/*
++ * From userland we access disks just like files.
++ */
++#ifndef _KERNEL
++
++vdev_ops_t vdev_disk_ops = {
++	vdev_file_open,
++	vdev_file_close,
++	vdev_default_asize,
++	vdev_file_io_start,
++	vdev_file_io_done,
++	NULL,
++	vdev_file_hold,
++	vdev_file_rele,
++	VDEV_TYPE_DISK,		/* name of this vdev type */
++	B_TRUE			/* leaf vdev */
++};
++
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/vdev_label.c linux-3.2.33-go/fs/zfs/zfs/vdev_label.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/vdev_label.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/vdev_label.c	2012-11-16 23:25:34.352039300 +0100
+@@ -0,0 +1,1225 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/*
++ * Virtual Device Labels
++ * ---------------------
++ *
++ * The vdev label serves several distinct purposes:
++ *
++ *	1. Uniquely identify this device as part of a ZFS pool and confirm its
++ *	   identity within the pool.
++ *
++ * 	2. Verify that all the devices given in a configuration are present
++ *         within the pool.
++ *
++ * 	3. Determine the uberblock for the pool.
++ *
++ * 	4. In case of an import operation, determine the configuration of the
++ *         toplevel vdev of which it is a part.
++ *
++ * 	5. If an import operation cannot find all the devices in the pool,
++ *         provide enough information to the administrator to determine which
++ *         devices are missing.
++ *
++ * It is important to note that while the kernel is responsible for writing the
++ * label, it only consumes the information in the first three cases.  The
++ * latter information is only consumed in userland when determining the
++ * configuration to import a pool.
++ *
++ *
++ * Label Organization
++ * ------------------
++ *
++ * Before describing the contents of the label, it's important to understand how
++ * the labels are written and updated with respect to the uberblock.
++ *
++ * When the pool configuration is altered, either because it was newly created
++ * or a device was added, we want to update all the labels such that we can deal
++ * with fatal failure at any point.  To this end, each disk has two labels which
++ * are updated before and after the uberblock is synced.  Assuming we have
++ * labels and an uberblock with the following transaction groups:
++ *
++ *              L1          UB          L2
++ *           +------+    +------+    +------+
++ *           |      |    |      |    |      |
++ *           | t10  |    | t10  |    | t10  |
++ *           |      |    |      |    |      |
++ *           +------+    +------+    +------+
++ *
++ * In this stable state, the labels and the uberblock were all updated within
++ * the same transaction group (10).  Each label is mirrored and checksummed, so
++ * that we can detect when we fail partway through writing the label.
++ *
++ * In order to identify which labels are valid, the labels are written in the
++ * following manner:
++ *
++ * 	1. For each vdev, update 'L1' to the new label
++ * 	2. Update the uberblock
++ * 	3. For each vdev, update 'L2' to the new label
++ *
++ * Given arbitrary failure, we can determine the correct label to use based on
++ * the transaction group.  If we fail after updating L1 but before updating the
++ * UB, we will notice that L1's transaction group is greater than the uberblock,
++ * so L2 must be valid.  If we fail after writing the uberblock but before
++ * writing L2, we will notice that L2's transaction group is less than L1, and
++ * therefore L1 is valid.
++ *
++ * Another added complexity is that not every label is updated when the config
++ * is synced.  If we add a single device, we do not want to have to re-write
++ * every label for every device in the pool.  This means that both L1 and L2 may
++ * be older than the pool uberblock, because the necessary information is stored
++ * on another vdev.
++ *
++ *
++ * On-disk Format
++ * --------------
++ *
++ * The vdev label consists of two distinct parts, and is wrapped within the
++ * vdev_label_t structure.  The label includes 8k of padding to permit legacy
++ * VTOC disk labels, but is otherwise ignored.
++ *
++ * The first half of the label is a packed nvlist which contains pool wide
++ * properties, per-vdev properties, and configuration information.  It is
++ * described in more detail below.
++ *
++ * The latter half of the label consists of a redundant array of uberblocks.
++ * These uberblocks are updated whenever a transaction group is committed,
++ * or when the configuration is updated.  When a pool is loaded, we scan each
++ * vdev for the 'best' uberblock.
++ *
++ *
++ * Configuration Information
++ * -------------------------
++ *
++ * The nvlist describing the pool and vdev contains the following elements:
++ *
++ * 	version		ZFS on-disk version
++ * 	name		Pool name
++ * 	state		Pool state
++ * 	txg		Transaction group in which this label was written
++ * 	pool_guid	Unique identifier for this pool
++ * 	vdev_tree	An nvlist describing vdev tree.
++ *
++ * Each leaf device label also contains the following:
++ *
++ * 	top_guid	Unique ID for top-level vdev in which this is contained
++ * 	guid		Unique ID for the leaf vdev
++ *
++ * The 'vs' configuration follows the format described in 'spa_config.c'.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/spa_impl.h>
++#include <sys/dmu.h>
++#include <sys/zap.h>
++#include <sys/vdev.h>
++#include <sys/vdev_impl.h>
++#include <sys/uberblock_impl.h>
++#include <sys/metaslab.h>
++#include <sys/zio.h>
++#include <sys/dsl_scan.h>
++#include <sys/fs/zfs.h>
++
++/*
++ * Basic routines to read and write from a vdev label.
++ * Used throughout the rest of this file.
++ */
++uint64_t
++vdev_label_offset(uint64_t psize, int l, uint64_t offset)
++{
++	ASSERT(offset < sizeof (vdev_label_t));
++	ASSERT(P2PHASE_TYPED(psize, sizeof (vdev_label_t), uint64_t) == 0);
++
++	return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
++	    0 : psize - VDEV_LABELS * sizeof (vdev_label_t)));
++}
++
++/*
++ * Returns back the vdev label associated with the passed in offset.
++ */
++int
++vdev_label_number(uint64_t psize, uint64_t offset)
++{
++	int l;
++
++	if (offset >= psize - VDEV_LABEL_END_SIZE) {
++		offset -= psize - VDEV_LABEL_END_SIZE;
++		offset += (VDEV_LABELS / 2) * sizeof (vdev_label_t);
++	}
++	l = offset / sizeof (vdev_label_t);
++	return (l < VDEV_LABELS ? l : -1);
++}
++
++static void
++vdev_label_read(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
++	uint64_t size, zio_done_func_t *done, void *private, int flags)
++{
++	ASSERT(spa_config_held(zio->io_spa, SCL_STATE_ALL, RW_WRITER) ==
++	    SCL_STATE_ALL);
++	ASSERT(flags & ZIO_FLAG_CONFIG_WRITER);
++
++	zio_nowait(zio_read_phys(zio, vd,
++	    vdev_label_offset(vd->vdev_psize, l, offset),
++	    size, buf, ZIO_CHECKSUM_LABEL, done, private,
++	    ZIO_PRIORITY_SYNC_READ, flags, B_TRUE));
++}
++
++static void
++vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
++	uint64_t size, zio_done_func_t *done, void *private, int flags)
++{
++	ASSERT(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL ||
++	    (spa_config_held(zio->io_spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
++	    (SCL_CONFIG | SCL_STATE) &&
++	    dsl_pool_sync_context(spa_get_dsl(zio->io_spa))));
++	ASSERT(flags & ZIO_FLAG_CONFIG_WRITER);
++
++	zio_nowait(zio_write_phys(zio, vd,
++	    vdev_label_offset(vd->vdev_psize, l, offset),
++	    size, buf, ZIO_CHECKSUM_LABEL, done, private,
++	    ZIO_PRIORITY_SYNC_WRITE, flags, B_TRUE));
++}
++
++/*
++ * Generate the nvlist representing this vdev's config.
++ */
++nvlist_t *
++vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
++    vdev_config_flag_t flags)
++{
++	nvlist_t *nv = NULL;
++
++	VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++
++	VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
++	    vd->vdev_ops->vdev_op_type) == 0);
++	if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)))
++		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id)
++		    == 0);
++	VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0);
++
++	if (vd->vdev_path != NULL)
++		VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_PATH,
++		    vd->vdev_path) == 0);
++
++	if (vd->vdev_devid != NULL)
++		VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_DEVID,
++		    vd->vdev_devid) == 0);
++
++	if (vd->vdev_physpath != NULL)
++		VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
++		    vd->vdev_physpath) == 0);
++
++	if (vd->vdev_fru != NULL)
++		VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_FRU,
++		    vd->vdev_fru) == 0);
++
++	if (vd->vdev_nparity != 0) {
++		ASSERT(strcmp(vd->vdev_ops->vdev_op_type,
++		    VDEV_TYPE_RAIDZ) == 0);
++
++		/*
++		 * Make sure someone hasn't managed to sneak a fancy new vdev
++		 * into a crufty old storage pool.
++		 */
++		ASSERT(vd->vdev_nparity == 1 ||
++		    (vd->vdev_nparity <= 2 &&
++		    spa_version(spa) >= SPA_VERSION_RAIDZ2) ||
++		    (vd->vdev_nparity <= 3 &&
++		    spa_version(spa) >= SPA_VERSION_RAIDZ3));
++
++		/*
++		 * Note that we'll add the nparity tag even on storage pools
++		 * that only support a single parity device -- older software
++		 * will just ignore it.
++		 */
++		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY,
++		    vd->vdev_nparity) == 0);
++	}
++
++	if (vd->vdev_wholedisk != -1ULL)
++		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
++		    vd->vdev_wholedisk) == 0);
++
++	if (vd->vdev_not_present)
++		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1) == 0);
++
++	if (vd->vdev_isspare)
++		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1) == 0);
++
++	if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
++	    vd == vd->vdev_top) {
++		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
++		    vd->vdev_ms_array) == 0);
++		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT,
++		    vd->vdev_ms_shift) == 0);
++		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT,
++		    vd->vdev_ashift) == 0);
++		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE,
++		    vd->vdev_asize) == 0);
++		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG,
++		    vd->vdev_islog) == 0);
++		if (vd->vdev_removing)
++			VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING,
++			    vd->vdev_removing) == 0);
++	}
++
++	if (vd->vdev_dtl_smo.smo_object != 0)
++		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_DTL,
++		    vd->vdev_dtl_smo.smo_object) == 0);
++
++	if (vd->vdev_crtxg)
++		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG,
++		    vd->vdev_crtxg) == 0);
++
++	if (getstats) {
++		vdev_stat_t vs;
++		pool_scan_stat_t ps;
++
++		vdev_get_stats(vd, &vs);
++		VERIFY(nvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
++		    (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t)) == 0);
++
++		/* provide either current or previous scan information */
++		if (spa_scan_get_stats(spa, &ps) == 0) {
++			VERIFY(nvlist_add_uint64_array(nv,
++			    ZPOOL_CONFIG_SCAN_STATS, (uint64_t *)&ps,
++			    sizeof (pool_scan_stat_t) / sizeof (uint64_t))
++			    == 0);
++		}
++	}
++
++	if (!vd->vdev_ops->vdev_op_leaf) {
++		nvlist_t **child;
++		int c, idx;
++
++		ASSERT(!vd->vdev_ishole);
++
++		child = kmem_alloc(vd->vdev_children * sizeof (nvlist_t *),
++		    KM_PUSHPAGE);
++
++		for (c = 0, idx = 0; c < vd->vdev_children; c++) {
++			vdev_t *cvd = vd->vdev_child[c];
++
++			/*
++			 * If we're generating an nvlist of removing
++			 * vdevs then skip over any device which is
++			 * not being removed.
++			 */
++			if ((flags & VDEV_CONFIG_REMOVING) &&
++			    !cvd->vdev_removing)
++				continue;
++
++			child[idx++] = vdev_config_generate(spa, cvd,
++			    getstats, flags);
++		}
++
++		if (idx) {
++			VERIFY(nvlist_add_nvlist_array(nv,
++			    ZPOOL_CONFIG_CHILDREN, child, idx) == 0);
++		}
++
++		for (c = 0; c < idx; c++)
++			nvlist_free(child[c]);
++
++		kmem_free(child, vd->vdev_children * sizeof (nvlist_t *));
++
++	} else {
++		const char *aux = NULL;
++
++		if (vd->vdev_offline && !vd->vdev_tmpoffline)
++			VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_OFFLINE,
++			    B_TRUE) == 0);
++		if (vd->vdev_resilvering)
++			VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_RESILVERING,
++			    B_TRUE) == 0);
++		if (vd->vdev_faulted)
++			VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_FAULTED,
++			    B_TRUE) == 0);
++		if (vd->vdev_degraded)
++			VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_DEGRADED,
++			    B_TRUE) == 0);
++		if (vd->vdev_removed)
++			VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVED,
++			    B_TRUE) == 0);
++		if (vd->vdev_unspare)
++			VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_UNSPARE,
++			    B_TRUE) == 0);
++		if (vd->vdev_ishole)
++			VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE,
++			    B_TRUE) == 0);
++
++		switch (vd->vdev_stat.vs_aux) {
++		case VDEV_AUX_ERR_EXCEEDED:
++			aux = "err_exceeded";
++			break;
++
++		case VDEV_AUX_EXTERNAL:
++			aux = "external";
++			break;
++		}
++
++		if (aux != NULL)
++			VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE,
++			    aux) == 0);
++
++		if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) {
++			VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID,
++			    vd->vdev_orig_guid) == 0);
++		}
++	}
++
++	return (nv);
++}
++
++/*
++ * Generate a view of the top-level vdevs.  If we currently have holes
++ * in the namespace, then generate an array which contains a list of holey
++ * vdevs.  Additionally, add the number of top-level children that currently
++ * exist.
++ */
++void
++vdev_top_config_generate(spa_t *spa, nvlist_t *config)
++{
++	vdev_t *rvd = spa->spa_root_vdev;
++	uint64_t *array;
++	uint_t c, idx;
++
++	array = kmem_alloc(rvd->vdev_children * sizeof (uint64_t), KM_PUSHPAGE);
++
++	for (c = 0, idx = 0; c < rvd->vdev_children; c++) {
++		vdev_t *tvd = rvd->vdev_child[c];
++
++		if (tvd->vdev_ishole)
++			array[idx++] = c;
++	}
++
++	if (idx) {
++		VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY,
++		    array, idx) == 0);
++	}
++
++	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
++	    rvd->vdev_children) == 0);
++
++	kmem_free(array, rvd->vdev_children * sizeof (uint64_t));
++}
++
++nvlist_t *
++vdev_label_read_config(vdev_t *vd)
++{
++	spa_t *spa = vd->vdev_spa;
++	nvlist_t *config = NULL;
++	vdev_phys_t *vp;
++	zio_t *zio;
++	int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
++	    ZIO_FLAG_SPECULATIVE;
++	int l;
++
++	ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
++
++	if (!vdev_readable(vd))
++		return (NULL);
++
++	vp = zio_buf_alloc(sizeof (vdev_phys_t));
++
++retry:
++	for (l = 0; l < VDEV_LABELS; l++) {
++
++		zio = zio_root(spa, NULL, NULL, flags);
++
++		vdev_label_read(zio, vd, l, vp,
++		    offsetof(vdev_label_t, vl_vdev_phys),
++		    sizeof (vdev_phys_t), NULL, NULL, flags);
++
++		if (zio_wait(zio) == 0 &&
++		    nvlist_unpack(vp->vp_nvlist, sizeof (vp->vp_nvlist),
++		    &config, 0) == 0)
++			break;
++
++		if (config != NULL) {
++			nvlist_free(config);
++			config = NULL;
++		}
++	}
++
++	if (config == NULL && !(flags & ZIO_FLAG_TRYHARD)) {
++		flags |= ZIO_FLAG_TRYHARD;
++		goto retry;
++	}
++
++	zio_buf_free(vp, sizeof (vdev_phys_t));
++
++	return (config);
++}
++
++/*
++ * Determine if a device is in use.  The 'spare_guid' parameter will be filled
++ * in with the device guid if this spare is active elsewhere on the system.
++ */
++static boolean_t
++vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason,
++    uint64_t *spare_guid, uint64_t *l2cache_guid)
++{
++	spa_t *spa = vd->vdev_spa;
++	uint64_t state, pool_guid, device_guid, txg, spare_pool;
++	uint64_t vdtxg = 0;
++	nvlist_t *label;
++
++	if (spare_guid)
++		*spare_guid = 0ULL;
++	if (l2cache_guid)
++		*l2cache_guid = 0ULL;
++
++	/*
++	 * Read the label, if any, and perform some basic sanity checks.
++	 */
++	if ((label = vdev_label_read_config(vd)) == NULL)
++		return (B_FALSE);
++
++	(void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
++	    &vdtxg);
++
++	if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE,
++	    &state) != 0 ||
++	    nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID,
++	    &device_guid) != 0) {
++		nvlist_free(label);
++		return (B_FALSE);
++	}
++
++	if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
++	    (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID,
++	    &pool_guid) != 0 ||
++	    nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG,
++	    &txg) != 0)) {
++		nvlist_free(label);
++		return (B_FALSE);
++	}
++
++	nvlist_free(label);
++
++	/*
++	 * Check to see if this device indeed belongs to the pool it claims to
++	 * be a part of.  The only way this is allowed is if the device is a hot
++	 * spare (which we check for later on).
++	 */
++	if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
++	    !spa_guid_exists(pool_guid, device_guid) &&
++	    !spa_spare_exists(device_guid, NULL, NULL) &&
++	    !spa_l2cache_exists(device_guid, NULL))
++		return (B_FALSE);
++
++	/*
++	 * If the transaction group is zero, then this an initialized (but
++	 * unused) label.  This is only an error if the create transaction
++	 * on-disk is the same as the one we're using now, in which case the
++	 * user has attempted to add the same vdev multiple times in the same
++	 * transaction.
++	 */
++	if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
++	    txg == 0 && vdtxg == crtxg)
++		return (B_TRUE);
++
++	/*
++	 * Check to see if this is a spare device.  We do an explicit check for
++	 * spa_has_spare() here because it may be on our pending list of spares
++	 * to add.  We also check if it is an l2cache device.
++	 */
++	if (spa_spare_exists(device_guid, &spare_pool, NULL) ||
++	    spa_has_spare(spa, device_guid)) {
++		if (spare_guid)
++			*spare_guid = device_guid;
++
++		switch (reason) {
++		case VDEV_LABEL_CREATE:
++		case VDEV_LABEL_L2CACHE:
++			return (B_TRUE);
++
++		case VDEV_LABEL_REPLACE:
++			return (!spa_has_spare(spa, device_guid) ||
++			    spare_pool != 0ULL);
++
++		case VDEV_LABEL_SPARE:
++			return (spa_has_spare(spa, device_guid));
++		default:
++			break;
++		}
++	}
++
++	/*
++	 * Check to see if this is an l2cache device.
++	 */
++	if (spa_l2cache_exists(device_guid, NULL))
++		return (B_TRUE);
++
++	/*
++	 * We can't rely on a pool's state if it's been imported
++	 * read-only.  Instead we look to see if the pools is marked
++	 * read-only in the namespace and set the state to active.
++	 */
++	if ((spa = spa_by_guid(pool_guid, device_guid)) != NULL &&
++	    spa_mode(spa) == FREAD)
++		state = POOL_STATE_ACTIVE;
++
++	/*
++	 * If the device is marked ACTIVE, then this device is in use by another
++	 * pool on the system.
++	 */
++	return (state == POOL_STATE_ACTIVE);
++}
++
++/*
++ * Initialize a vdev label.  We check to make sure each leaf device is not in
++ * use, and writable.  We put down an initial label which we will later
++ * overwrite with a complete label.  Note that it's important to do this
++ * sequentially, not in parallel, so that we catch cases of multiple use of the
++ * same leaf vdev in the vdev we're creating -- e.g. mirroring a disk with
++ * itself.
++ */
++int
++vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
++{
++	spa_t *spa = vd->vdev_spa;
++	nvlist_t *label;
++	vdev_phys_t *vp;
++	char *pad2;
++	uberblock_t *ub;
++	zio_t *zio;
++	char *buf;
++	size_t buflen;
++	int error;
++	uint64_t spare_guid = 0, l2cache_guid = 0;
++	int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
++	int c, l;
++	vdev_t *pvd;
++
++	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
++
++	for (c = 0; c < vd->vdev_children; c++)
++		if ((error = vdev_label_init(vd->vdev_child[c],
++		    crtxg, reason)) != 0)
++			return (error);
++
++	/* Track the creation time for this vdev */
++	vd->vdev_crtxg = crtxg;
++
++	if (!vd->vdev_ops->vdev_op_leaf)
++		return (0);
++
++	/*
++	 * Dead vdevs cannot be initialized.
++	 */
++	if (vdev_is_dead(vd))
++		return (EIO);
++
++	/*
++	 * Determine if the vdev is in use.
++	 */
++	if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPLIT &&
++	    vdev_inuse(vd, crtxg, reason, &spare_guid, &l2cache_guid))
++		return (EBUSY);
++
++	/*
++	 * If this is a request to add or replace a spare or l2cache device
++	 * that is in use elsewhere on the system, then we must update the
++	 * guid (which was initialized to a random value) to reflect the
++	 * actual GUID (which is shared between multiple pools).
++	 */
++	if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_L2CACHE &&
++	    spare_guid != 0ULL) {
++		uint64_t guid_delta = spare_guid - vd->vdev_guid;
++
++		vd->vdev_guid += guid_delta;
++
++		for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent)
++			pvd->vdev_guid_sum += guid_delta;
++
++		/*
++		 * If this is a replacement, then we want to fallthrough to the
++		 * rest of the code.  If we're adding a spare, then it's already
++		 * labeled appropriately and we can just return.
++		 */
++		if (reason == VDEV_LABEL_SPARE)
++			return (0);
++		ASSERT(reason == VDEV_LABEL_REPLACE ||
++		    reason == VDEV_LABEL_SPLIT);
++	}
++
++	if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPARE &&
++	    l2cache_guid != 0ULL) {
++		uint64_t guid_delta = l2cache_guid - vd->vdev_guid;
++
++		vd->vdev_guid += guid_delta;
++
++		for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent)
++			pvd->vdev_guid_sum += guid_delta;
++
++		/*
++		 * If this is a replacement, then we want to fallthrough to the
++		 * rest of the code.  If we're adding an l2cache, then it's
++		 * already labeled appropriately and we can just return.
++		 */
++		if (reason == VDEV_LABEL_L2CACHE)
++			return (0);
++		ASSERT(reason == VDEV_LABEL_REPLACE);
++	}
++
++	/*
++	 * Initialize its label.
++	 */
++	vp = zio_buf_alloc(sizeof (vdev_phys_t));
++	bzero(vp, sizeof (vdev_phys_t));
++
++	/*
++	 * Generate a label describing the pool and our top-level vdev.
++	 * We mark it as being from txg 0 to indicate that it's not
++	 * really part of an active pool just yet.  The labels will
++	 * be written again with a meaningful txg by spa_sync().
++	 */
++	if (reason == VDEV_LABEL_SPARE ||
++	    (reason == VDEV_LABEL_REMOVE && vd->vdev_isspare)) {
++		/*
++		 * For inactive hot spares, we generate a special label that
++		 * identifies as a mutually shared hot spare.  We write the
++		 * label if we are adding a hot spare, or if we are removing an
++		 * active hot spare (in which case we want to revert the
++		 * labels).
++		 */
++		VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++
++		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION,
++		    spa_version(spa)) == 0);
++		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE,
++		    POOL_STATE_SPARE) == 0);
++		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID,
++		    vd->vdev_guid) == 0);
++	} else if (reason == VDEV_LABEL_L2CACHE ||
++	    (reason == VDEV_LABEL_REMOVE && vd->vdev_isl2cache)) {
++		/*
++		 * For level 2 ARC devices, add a special label.
++		 */
++		VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
++
++		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION,
++		    spa_version(spa)) == 0);
++		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE,
++		    POOL_STATE_L2CACHE) == 0);
++		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID,
++		    vd->vdev_guid) == 0);
++	} else {
++		uint64_t txg = 0ULL;
++
++		if (reason == VDEV_LABEL_SPLIT)
++			txg = spa->spa_uberblock.ub_txg;
++		label = spa_config_generate(spa, vd, txg, B_FALSE);
++
++		/*
++		 * Add our creation time.  This allows us to detect multiple
++		 * vdev uses as described above, and automatically expires if we
++		 * fail.
++		 */
++		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
++		    crtxg) == 0);
++	}
++
++	buf = vp->vp_nvlist;
++	buflen = sizeof (vp->vp_nvlist);
++
++	error = nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_PUSHPAGE);
++	if (error != 0) {
++		nvlist_free(label);
++		zio_buf_free(vp, sizeof (vdev_phys_t));
++		/* EFAULT means nvlist_pack ran out of room */
++		return (error == EFAULT ? ENAMETOOLONG : EINVAL);
++	}
++
++	/*
++	 * Initialize uberblock template.
++	 */
++	ub = zio_buf_alloc(VDEV_UBERBLOCK_RING);
++	bzero(ub, VDEV_UBERBLOCK_RING);
++	*ub = spa->spa_uberblock;
++	ub->ub_txg = 0;
++
++	/* Initialize the 2nd padding area. */
++	pad2 = zio_buf_alloc(VDEV_PAD_SIZE);
++	bzero(pad2, VDEV_PAD_SIZE);
++
++	/*
++	 * Write everything in parallel.
++	 */
++retry:
++	zio = zio_root(spa, NULL, NULL, flags);
++
++	for (l = 0; l < VDEV_LABELS; l++) {
++
++		vdev_label_write(zio, vd, l, vp,
++		    offsetof(vdev_label_t, vl_vdev_phys),
++		    sizeof (vdev_phys_t), NULL, NULL, flags);
++
++		/*
++		 * Skip the 1st padding area.
++		 * Zero out the 2nd padding area where it might have
++		 * left over data from previous filesystem format.
++		 */
++		vdev_label_write(zio, vd, l, pad2,
++		    offsetof(vdev_label_t, vl_pad2),
++		    VDEV_PAD_SIZE, NULL, NULL, flags);
++
++		vdev_label_write(zio, vd, l, ub,
++		    offsetof(vdev_label_t, vl_uberblock),
++		    VDEV_UBERBLOCK_RING, NULL, NULL, flags);
++	}
++
++	error = zio_wait(zio);
++
++	if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
++		flags |= ZIO_FLAG_TRYHARD;
++		goto retry;
++	}
++
++	nvlist_free(label);
++	zio_buf_free(pad2, VDEV_PAD_SIZE);
++	zio_buf_free(ub, VDEV_UBERBLOCK_RING);
++	zio_buf_free(vp, sizeof (vdev_phys_t));
++
++	/*
++	 * If this vdev hasn't been previously identified as a spare, then we
++	 * mark it as such only if a) we are labeling it as a spare, or b) it
++	 * exists as a spare elsewhere in the system.  Do the same for
++	 * level 2 ARC devices.
++	 */
++	if (error == 0 && !vd->vdev_isspare &&
++	    (reason == VDEV_LABEL_SPARE ||
++	    spa_spare_exists(vd->vdev_guid, NULL, NULL)))
++		spa_spare_add(vd);
++
++	if (error == 0 && !vd->vdev_isl2cache &&
++	    (reason == VDEV_LABEL_L2CACHE ||
++	    spa_l2cache_exists(vd->vdev_guid, NULL)))
++		spa_l2cache_add(vd);
++
++	return (error);
++}
++
++/*
++ * ==========================================================================
++ * uberblock load/sync
++ * ==========================================================================
++ */
++
++/*
++ * Consider the following situation: txg is safely synced to disk.  We've
++ * written the first uberblock for txg + 1, and then we lose power.  When we
++ * come back up, we fail to see the uberblock for txg + 1 because, say,
++ * it was on a mirrored device and the replica to which we wrote txg + 1
++ * is now offline.  If we then make some changes and sync txg + 1, and then
++ * the missing replica comes back, then for a new seconds we'll have two
++ * conflicting uberblocks on disk with the same txg.  The solution is simple:
++ * among uberblocks with equal txg, choose the one with the latest timestamp.
++ */
++static int
++vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
++{
++	if (ub1->ub_txg < ub2->ub_txg)
++		return (-1);
++	if (ub1->ub_txg > ub2->ub_txg)
++		return (1);
++
++	if (ub1->ub_timestamp < ub2->ub_timestamp)
++		return (-1);
++	if (ub1->ub_timestamp > ub2->ub_timestamp)
++		return (1);
++
++	return (0);
++}
++
++static void
++vdev_uberblock_load_done(zio_t *zio)
++{
++	spa_t *spa = zio->io_spa;
++	zio_t *rio = zio->io_private;
++	uberblock_t *ub = zio->io_data;
++	uberblock_t *ubbest = rio->io_private;
++
++	ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(zio->io_vd));
++
++	if (zio->io_error == 0 && uberblock_verify(ub) == 0) {
++		mutex_enter(&rio->io_lock);
++		if (ub->ub_txg <= spa->spa_load_max_txg &&
++		    vdev_uberblock_compare(ub, ubbest) > 0)
++			*ubbest = *ub;
++		mutex_exit(&rio->io_lock);
++	}
++
++	zio_buf_free(zio->io_data, zio->io_size);
++}
++
++void
++vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
++{
++	spa_t *spa = vd->vdev_spa;
++	vdev_t *rvd = spa->spa_root_vdev;
++	int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
++	    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
++	int c, l, n;
++
++	if (vd == rvd) {
++		ASSERT(zio == NULL);
++		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
++		zio = zio_root(spa, NULL, ubbest, flags);
++		bzero(ubbest, sizeof (uberblock_t));
++	}
++
++	ASSERT(zio != NULL);
++
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_uberblock_load(zio, vd->vdev_child[c], ubbest);
++
++	if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
++		for (l = 0; l < VDEV_LABELS; l++) {
++			for (n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
++				vdev_label_read(zio, vd, l,
++				    zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)),
++				    VDEV_UBERBLOCK_OFFSET(vd, n),
++				    VDEV_UBERBLOCK_SIZE(vd),
++				    vdev_uberblock_load_done, zio, flags);
++			}
++		}
++	}
++
++	if (vd == rvd) {
++		(void) zio_wait(zio);
++		spa_config_exit(spa, SCL_ALL, FTAG);
++	}
++}
++
++/*
++ * On success, increment root zio's count of good writes.
++ * We only get credit for writes to known-visible vdevs; see spa_vdev_add().
++ */
++static void
++vdev_uberblock_sync_done(zio_t *zio)
++{
++	uint64_t *good_writes = zio->io_private;
++
++	if (zio->io_error == 0 && zio->io_vd->vdev_top->vdev_ms_array != 0)
++		atomic_add_64(good_writes, 1);
++}
++
++/*
++ * Write the uberblock to all labels of all leaves of the specified vdev.
++ */
++static void
++vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags)
++{
++	uberblock_t *ubbuf;
++	int c, l, n;
++
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_uberblock_sync(zio, ub, vd->vdev_child[c], flags);
++
++	if (!vd->vdev_ops->vdev_op_leaf)
++		return;
++
++	if (!vdev_writeable(vd))
++		return;
++
++	n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1);
++
++	ubbuf = zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd));
++	bzero(ubbuf, VDEV_UBERBLOCK_SIZE(vd));
++	*ubbuf = *ub;
++
++	for (l = 0; l < VDEV_LABELS; l++)
++		vdev_label_write(zio, vd, l, ubbuf,
++		    VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd),
++		    vdev_uberblock_sync_done, zio->io_private,
++		    flags | ZIO_FLAG_DONT_PROPAGATE);
++
++	zio_buf_free(ubbuf, VDEV_UBERBLOCK_SIZE(vd));
++}
++
++int
++vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags)
++{
++	spa_t *spa = svd[0]->vdev_spa;
++	zio_t *zio;
++	uint64_t good_writes = 0;
++	int v;
++
++	zio = zio_root(spa, NULL, &good_writes, flags);
++
++	for (v = 0; v < svdcount; v++)
++		vdev_uberblock_sync(zio, ub, svd[v], flags);
++
++	(void) zio_wait(zio);
++
++	/*
++	 * Flush the uberblocks to disk.  This ensures that the odd labels
++	 * are no longer needed (because the new uberblocks and the even
++	 * labels are safely on disk), so it is safe to overwrite them.
++	 */
++	zio = zio_root(spa, NULL, NULL, flags);
++
++	for (v = 0; v < svdcount; v++)
++		zio_flush(zio, svd[v]);
++
++	(void) zio_wait(zio);
++
++	return (good_writes >= 1 ? 0 : EIO);
++}
++
++/*
++ * On success, increment the count of good writes for our top-level vdev.
++ */
++static void
++vdev_label_sync_done(zio_t *zio)
++{
++	uint64_t *good_writes = zio->io_private;
++
++	if (zio->io_error == 0)
++		atomic_add_64(good_writes, 1);
++}
++
++/*
++ * If there weren't enough good writes, indicate failure to the parent.
++ */
++static void
++vdev_label_sync_top_done(zio_t *zio)
++{
++	uint64_t *good_writes = zio->io_private;
++
++	if (*good_writes == 0)
++		zio->io_error = EIO;
++
++	kmem_free(good_writes, sizeof (uint64_t));
++}
++
++/*
++ * We ignore errors for log and cache devices, simply free the private data.
++ */
++static void
++vdev_label_sync_ignore_done(zio_t *zio)
++{
++	kmem_free(zio->io_private, sizeof (uint64_t));
++}
++
++/*
++ * Write all even or odd labels to all leaves of the specified vdev.
++ */
++static void
++vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags)
++{
++	nvlist_t *label;
++	vdev_phys_t *vp;
++	char *buf;
++	size_t buflen;
++	int c;
++
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_label_sync(zio, vd->vdev_child[c], l, txg, flags);
++
++	if (!vd->vdev_ops->vdev_op_leaf)
++		return;
++
++	if (!vdev_writeable(vd))
++		return;
++
++	/*
++	 * Generate a label describing the top-level config to which we belong.
++	 */
++	label = spa_config_generate(vd->vdev_spa, vd, txg, B_FALSE);
++
++	vp = zio_buf_alloc(sizeof (vdev_phys_t));
++	bzero(vp, sizeof (vdev_phys_t));
++
++	buf = vp->vp_nvlist;
++	buflen = sizeof (vp->vp_nvlist);
++
++	if (nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_PUSHPAGE) == 0) {
++		for (; l < VDEV_LABELS; l += 2) {
++			vdev_label_write(zio, vd, l, vp,
++			    offsetof(vdev_label_t, vl_vdev_phys),
++			    sizeof (vdev_phys_t),
++			    vdev_label_sync_done, zio->io_private,
++			    flags | ZIO_FLAG_DONT_PROPAGATE);
++		}
++	}
++
++	zio_buf_free(vp, sizeof (vdev_phys_t));
++	nvlist_free(label);
++}
++
++int
++vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags)
++{
++	list_t *dl = &spa->spa_config_dirty_list;
++	vdev_t *vd;
++	zio_t *zio;
++	int error;
++
++	/*
++	 * Write the new labels to disk.
++	 */
++	zio = zio_root(spa, NULL, NULL, flags);
++
++	for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) {
++		uint64_t *good_writes;
++		zio_t *vio;
++
++		ASSERT(!vd->vdev_ishole);
++
++		good_writes = kmem_zalloc(sizeof (uint64_t), KM_PUSHPAGE);
++		vio = zio_null(zio, spa, NULL,
++		    (vd->vdev_islog || vd->vdev_aux != NULL) ?
++		    vdev_label_sync_ignore_done : vdev_label_sync_top_done,
++		    good_writes, flags);
++		vdev_label_sync(vio, vd, l, txg, flags);
++		zio_nowait(vio);
++	}
++
++	error = zio_wait(zio);
++
++	/*
++	 * Flush the new labels to disk.
++	 */
++	zio = zio_root(spa, NULL, NULL, flags);
++
++	for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd))
++		zio_flush(zio, vd);
++
++	(void) zio_wait(zio);
++
++	return (error);
++}
++
++/*
++ * Sync the uberblock and any changes to the vdev configuration.
++ *
++ * The order of operations is carefully crafted to ensure that
++ * if the system panics or loses power at any time, the state on disk
++ * is still transactionally consistent.  The in-line comments below
++ * describe the failure semantics at each stage.
++ *
++ * Moreover, vdev_config_sync() is designed to be idempotent: if it fails
++ * at any time, you can just call it again, and it will resume its work.
++ */
++int
++vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard)
++{
++	spa_t *spa = svd[0]->vdev_spa;
++	uberblock_t *ub = &spa->spa_uberblock;
++	vdev_t *vd;
++	zio_t *zio;
++	int error;
++	int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
++
++	/*
++	 * Normally, we don't want to try too hard to write every label and
++	 * uberblock.  If there is a flaky disk, we don't want the rest of the
++	 * sync process to block while we retry.  But if we can't write a
++	 * single label out, we should retry with ZIO_FLAG_TRYHARD before
++	 * bailing out and declaring the pool faulted.
++	 */
++	if (tryhard)
++		flags |= ZIO_FLAG_TRYHARD;
++
++	ASSERT(ub->ub_txg <= txg);
++
++	/*
++	 * If this isn't a resync due to I/O errors,
++	 * and nothing changed in this transaction group,
++	 * and the vdev configuration hasn't changed,
++	 * then there's nothing to do.
++	 */
++	if (ub->ub_txg < txg &&
++	    uberblock_update(ub, spa->spa_root_vdev, txg) == B_FALSE &&
++	    list_is_empty(&spa->spa_config_dirty_list))
++		return (0);
++
++	if (txg > spa_freeze_txg(spa))
++		return (0);
++
++	ASSERT(txg <= spa->spa_final_txg);
++
++	/*
++	 * Flush the write cache of every disk that's been written to
++	 * in this transaction group.  This ensures that all blocks
++	 * written in this txg will be committed to stable storage
++	 * before any uberblock that references them.
++	 */
++	zio = zio_root(spa, NULL, NULL, flags);
++
++	for (vd = txg_list_head(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)); vd;
++	    vd = txg_list_next(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg)))
++		zio_flush(zio, vd);
++
++	(void) zio_wait(zio);
++
++	/*
++	 * Sync out the even labels (L0, L2) for every dirty vdev.  If the
++	 * system dies in the middle of this process, that's OK: all of the
++	 * even labels that made it to disk will be newer than any uberblock,
++	 * and will therefore be considered invalid.  The odd labels (L1, L3),
++	 * which have not yet been touched, will still be valid.  We flush
++	 * the new labels to disk to ensure that all even-label updates
++	 * are committed to stable storage before the uberblock update.
++	 */
++	if ((error = vdev_label_sync_list(spa, 0, txg, flags)) != 0)
++		return (error);
++
++	/*
++	 * Sync the uberblocks to all vdevs in svd[].
++	 * If the system dies in the middle of this step, there are two cases
++	 * to consider, and the on-disk state is consistent either way:
++	 *
++	 * (1)	If none of the new uberblocks made it to disk, then the
++	 *	previous uberblock will be the newest, and the odd labels
++	 *	(which had not yet been touched) will be valid with respect
++	 *	to that uberblock.
++	 *
++	 * (2)	If one or more new uberblocks made it to disk, then they
++	 *	will be the newest, and the even labels (which had all
++	 *	been successfully committed) will be valid with respect
++	 *	to the new uberblocks.
++	 */
++	if ((error = vdev_uberblock_sync_list(svd, svdcount, ub, flags)) != 0)
++		return (error);
++
++	/*
++	 * Sync out odd labels for every dirty vdev.  If the system dies
++	 * in the middle of this process, the even labels and the new
++	 * uberblocks will suffice to open the pool.  The next time
++	 * the pool is opened, the first thing we'll do -- before any
++	 * user data is modified -- is mark every vdev dirty so that
++	 * all labels will be brought up to date.  We flush the new labels
++	 * to disk to ensure that all odd-label updates are committed to
++	 * stable storage before the next transaction group begins.
++	 */
++	return (vdev_label_sync_list(spa, 1, txg, flags));
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/vdev_mirror.c linux-3.2.33-go/fs/zfs/zfs/vdev_mirror.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/vdev_mirror.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/vdev_mirror.c	2012-11-16 23:25:34.349039334 +0100
+@@ -0,0 +1,494 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++/*
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/vdev_impl.h>
++#include <sys/zio.h>
++#include <sys/fs/zfs.h>
++
++/*
++ * Virtual device vector for mirroring.
++ */
++
++typedef struct mirror_child {
++	vdev_t		*mc_vd;
++	uint64_t	mc_offset;
++	int		mc_error;
++	uint8_t		mc_tried;
++	uint8_t		mc_skipped;
++	uint8_t		mc_speculative;
++} mirror_child_t;
++
++typedef struct mirror_map {
++	int		mm_children;
++	int		mm_replacing;
++	int		mm_preferred;
++	int		mm_root;
++	mirror_child_t	mm_child[1];
++} mirror_map_t;
++
++int vdev_mirror_shift = 21;
++
++static void
++vdev_mirror_map_free(zio_t *zio)
++{
++	mirror_map_t *mm = zio->io_vsd;
++
++	kmem_free(mm, offsetof(mirror_map_t, mm_child[mm->mm_children]));
++}
++
++static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
++	vdev_mirror_map_free,
++	zio_vsd_default_cksum_report
++};
++
++static mirror_map_t *
++vdev_mirror_map_alloc(zio_t *zio)
++{
++	mirror_map_t *mm = NULL;
++	mirror_child_t *mc;
++	vdev_t *vd = zio->io_vd;
++	int c, d;
++
++	if (vd == NULL) {
++		dva_t *dva = zio->io_bp->blk_dva;
++		spa_t *spa = zio->io_spa;
++
++		c = BP_GET_NDVAS(zio->io_bp);
++
++		mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_PUSHPAGE);
++		mm->mm_children = c;
++		mm->mm_replacing = B_FALSE;
++		mm->mm_preferred = spa_get_random(c);
++		mm->mm_root = B_TRUE;
++
++		/*
++		 * Check the other, lower-index DVAs to see if they're on
++		 * the same vdev as the child we picked.  If they are, use
++		 * them since they are likely to have been allocated from
++		 * the primary metaslab in use at the time, and hence are
++		 * more likely to have locality with single-copy data.
++		 */
++		for (c = mm->mm_preferred, d = c - 1; d >= 0; d--) {
++			if (DVA_GET_VDEV(&dva[d]) == DVA_GET_VDEV(&dva[c]))
++				mm->mm_preferred = d;
++		}
++
++		for (c = 0; c < mm->mm_children; c++) {
++			mc = &mm->mm_child[c];
++
++			mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c]));
++			mc->mc_offset = DVA_GET_OFFSET(&dva[c]);
++		}
++	} else {
++		c = vd->vdev_children;
++
++		mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_PUSHPAGE);
++		mm->mm_children = c;
++		mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops ||
++		    vd->vdev_ops == &vdev_spare_ops);
++		mm->mm_preferred = mm->mm_replacing ? 0 :
++		    (zio->io_offset >> vdev_mirror_shift) % c;
++		mm->mm_root = B_FALSE;
++
++		for (c = 0; c < mm->mm_children; c++) {
++			mc = &mm->mm_child[c];
++			mc->mc_vd = vd->vdev_child[c];
++			mc->mc_offset = zio->io_offset;
++		}
++	}
++
++	zio->io_vsd = mm;
++	zio->io_vsd_ops = &vdev_mirror_vsd_ops;
++	return (mm);
++}
++
++static int
++vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
++    uint64_t *ashift)
++{
++	int numerrors = 0;
++	int lasterror = 0;
++	int c;
++
++	if (vd->vdev_children == 0) {
++		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
++		return (EINVAL);
++	}
++
++	vdev_open_children(vd);
++
++	for (c = 0; c < vd->vdev_children; c++) {
++		vdev_t *cvd = vd->vdev_child[c];
++
++		if (cvd->vdev_open_error) {
++			lasterror = cvd->vdev_open_error;
++			numerrors++;
++			continue;
++		}
++
++		*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
++		*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
++		*ashift = MAX(*ashift, cvd->vdev_ashift);
++	}
++
++	if (numerrors == vd->vdev_children) {
++		vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
++		return (lasterror);
++	}
++
++	return (0);
++}
++
++static void
++vdev_mirror_close(vdev_t *vd)
++{
++	int c;
++
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_close(vd->vdev_child[c]);
++}
++
++static void
++vdev_mirror_child_done(zio_t *zio)
++{
++	mirror_child_t *mc = zio->io_private;
++
++	mc->mc_error = zio->io_error;
++	mc->mc_tried = 1;
++	mc->mc_skipped = 0;
++}
++
++static void
++vdev_mirror_scrub_done(zio_t *zio)
++{
++	mirror_child_t *mc = zio->io_private;
++
++	if (zio->io_error == 0) {
++		zio_t *pio;
++
++		mutex_enter(&zio->io_lock);
++		while ((pio = zio_walk_parents(zio)) != NULL) {
++			mutex_enter(&pio->io_lock);
++			ASSERT3U(zio->io_size, >=, pio->io_size);
++			bcopy(zio->io_data, pio->io_data, pio->io_size);
++			mutex_exit(&pio->io_lock);
++		}
++		mutex_exit(&zio->io_lock);
++	}
++
++	zio_buf_free(zio->io_data, zio->io_size);
++
++	mc->mc_error = zio->io_error;
++	mc->mc_tried = 1;
++	mc->mc_skipped = 0;
++}
++
++/*
++ * Try to find a child whose DTL doesn't contain the block we want to read.
++ * If we can't, try the read on any vdev we haven't already tried.
++ */
++static int
++vdev_mirror_child_select(zio_t *zio)
++{
++	mirror_map_t *mm = zio->io_vsd;
++	mirror_child_t *mc;
++	uint64_t txg = zio->io_txg;
++	int i, c;
++
++	ASSERT(zio->io_bp == NULL || BP_PHYSICAL_BIRTH(zio->io_bp) == txg);
++
++	/*
++	 * Try to find a child whose DTL doesn't contain the block to read.
++	 * If a child is known to be completely inaccessible (indicated by
++	 * vdev_readable() returning B_FALSE), don't even try.
++	 */
++	for (i = 0, c = mm->mm_preferred; i < mm->mm_children; i++, c++) {
++		if (c >= mm->mm_children)
++			c = 0;
++		mc = &mm->mm_child[c];
++		if (mc->mc_tried || mc->mc_skipped)
++			continue;
++		if (!vdev_readable(mc->mc_vd)) {
++			mc->mc_error = ENXIO;
++			mc->mc_tried = 1;	/* don't even try */
++			mc->mc_skipped = 1;
++			continue;
++		}
++		if (!vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1))
++			return (c);
++		mc->mc_error = ESTALE;
++		mc->mc_skipped = 1;
++		mc->mc_speculative = 1;
++	}
++
++	/*
++	 * Every device is either missing or has this txg in its DTL.
++	 * Look for any child we haven't already tried before giving up.
++	 */
++	for (c = 0; c < mm->mm_children; c++)
++		if (!mm->mm_child[c].mc_tried)
++			return (c);
++
++	/*
++	 * Every child failed.  There's no place left to look.
++	 */
++	return (-1);
++}
++
++static int
++vdev_mirror_io_start(zio_t *zio)
++{
++	mirror_map_t *mm;
++	mirror_child_t *mc;
++	int c, children;
++
++	mm = vdev_mirror_map_alloc(zio);
++
++	if (zio->io_type == ZIO_TYPE_READ) {
++		if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing) {
++			/*
++			 * For scrubbing reads we need to allocate a read
++			 * buffer for each child and issue reads to all
++			 * children.  If any child succeeds, it will copy its
++			 * data into zio->io_data in vdev_mirror_scrub_done.
++			 */
++			for (c = 0; c < mm->mm_children; c++) {
++				mc = &mm->mm_child[c];
++				zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
++				    mc->mc_vd, mc->mc_offset,
++				    zio_buf_alloc(zio->io_size), zio->io_size,
++				    zio->io_type, zio->io_priority, 0,
++				    vdev_mirror_scrub_done, mc));
++			}
++			return (ZIO_PIPELINE_CONTINUE);
++		}
++		/*
++		 * For normal reads just pick one child.
++		 */
++		c = vdev_mirror_child_select(zio);
++		children = (c >= 0);
++	} else {
++		ASSERT(zio->io_type == ZIO_TYPE_WRITE);
++
++		/*
++		 * Writes go to all children.
++		 */
++		c = 0;
++		children = mm->mm_children;
++	}
++
++	while (children--) {
++		mc = &mm->mm_child[c];
++		zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
++		    mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size,
++		    zio->io_type, zio->io_priority, 0,
++		    vdev_mirror_child_done, mc));
++		c++;
++	}
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++static int
++vdev_mirror_worst_error(mirror_map_t *mm)
++{
++	int c, error[2] = { 0, 0 };
++
++	for (c = 0; c < mm->mm_children; c++) {
++		mirror_child_t *mc = &mm->mm_child[c];
++		int s = mc->mc_speculative;
++		error[s] = zio_worst_error(error[s], mc->mc_error);
++	}
++
++	return (error[0] ? error[0] : error[1]);
++}
++
++static void
++vdev_mirror_io_done(zio_t *zio)
++{
++	mirror_map_t *mm = zio->io_vsd;
++	mirror_child_t *mc;
++	int c;
++	int good_copies = 0;
++	int unexpected_errors = 0;
++
++	for (c = 0; c < mm->mm_children; c++) {
++		mc = &mm->mm_child[c];
++
++		if (mc->mc_error) {
++			if (!mc->mc_skipped)
++				unexpected_errors++;
++		} else if (mc->mc_tried) {
++			good_copies++;
++		}
++	}
++
++	if (zio->io_type == ZIO_TYPE_WRITE) {
++		/*
++		 * XXX -- for now, treat partial writes as success.
++		 *
++		 * Now that we support write reallocation, it would be better
++		 * to treat partial failure as real failure unless there are
++		 * no non-degraded top-level vdevs left, and not update DTLs
++		 * if we intend to reallocate.
++		 */
++		/* XXPOLICY */
++		if (good_copies != mm->mm_children) {
++			/*
++			 * Always require at least one good copy.
++			 *
++			 * For ditto blocks (io_vd == NULL), require
++			 * all copies to be good.
++			 *
++			 * XXX -- for replacing vdevs, there's no great answer.
++			 * If the old device is really dead, we may not even
++			 * be able to access it -- so we only want to
++			 * require good writes to the new device.  But if
++			 * the new device turns out to be flaky, we want
++			 * to be able to detach it -- which requires all
++			 * writes to the old device to have succeeded.
++			 */
++			if (good_copies == 0 || zio->io_vd == NULL)
++				zio->io_error = vdev_mirror_worst_error(mm);
++		}
++		return;
++	}
++
++	ASSERT(zio->io_type == ZIO_TYPE_READ);
++
++	/*
++	 * If we don't have a good copy yet, keep trying other children.
++	 */
++	/* XXPOLICY */
++	if (good_copies == 0 && (c = vdev_mirror_child_select(zio)) != -1) {
++		ASSERT(c >= 0 && c < mm->mm_children);
++		mc = &mm->mm_child[c];
++		zio_vdev_io_redone(zio);
++		zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
++		    mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size,
++		    ZIO_TYPE_READ, zio->io_priority, 0,
++		    vdev_mirror_child_done, mc));
++		return;
++	}
++
++	/* XXPOLICY */
++	if (good_copies == 0) {
++		zio->io_error = vdev_mirror_worst_error(mm);
++		ASSERT(zio->io_error != 0);
++	}
++
++	if (good_copies && spa_writeable(zio->io_spa) &&
++	    (unexpected_errors ||
++	    (zio->io_flags & ZIO_FLAG_RESILVER) ||
++	    ((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_replacing))) {
++		/*
++		 * Use the good data we have in hand to repair damaged children.
++		 */
++		for (c = 0; c < mm->mm_children; c++) {
++			/*
++			 * Don't rewrite known good children.
++			 * Not only is it unnecessary, it could
++			 * actually be harmful: if the system lost
++			 * power while rewriting the only good copy,
++			 * there would be no good copies left!
++			 */
++			mc = &mm->mm_child[c];
++
++			if (mc->mc_error == 0) {
++				if (mc->mc_tried)
++					continue;
++				if (!(zio->io_flags & ZIO_FLAG_SCRUB) &&
++				    !vdev_dtl_contains(mc->mc_vd, DTL_PARTIAL,
++				    zio->io_txg, 1))
++					continue;
++				mc->mc_error = ESTALE;
++			}
++
++			zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
++			    mc->mc_vd, mc->mc_offset,
++			    zio->io_data, zio->io_size,
++			    ZIO_TYPE_WRITE, zio->io_priority,
++			    ZIO_FLAG_IO_REPAIR | (unexpected_errors ?
++			    ZIO_FLAG_SELF_HEAL : 0), NULL, NULL));
++		}
++	}
++}
++
++static void
++vdev_mirror_state_change(vdev_t *vd, int faulted, int degraded)
++{
++	if (faulted == vd->vdev_children)
++		vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
++		    VDEV_AUX_NO_REPLICAS);
++	else if (degraded + faulted != 0)
++		vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE);
++	else
++		vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE);
++}
++
++vdev_ops_t vdev_mirror_ops = {
++	vdev_mirror_open,
++	vdev_mirror_close,
++	vdev_default_asize,
++	vdev_mirror_io_start,
++	vdev_mirror_io_done,
++	vdev_mirror_state_change,
++	NULL,
++	NULL,
++	VDEV_TYPE_MIRROR,	/* name of this vdev type */
++	B_FALSE			/* not a leaf vdev */
++};
++
++vdev_ops_t vdev_replacing_ops = {
++	vdev_mirror_open,
++	vdev_mirror_close,
++	vdev_default_asize,
++	vdev_mirror_io_start,
++	vdev_mirror_io_done,
++	vdev_mirror_state_change,
++	NULL,
++	NULL,
++	VDEV_TYPE_REPLACING,	/* name of this vdev type */
++	B_FALSE			/* not a leaf vdev */
++};
++
++vdev_ops_t vdev_spare_ops = {
++	vdev_mirror_open,
++	vdev_mirror_close,
++	vdev_default_asize,
++	vdev_mirror_io_start,
++	vdev_mirror_io_done,
++	vdev_mirror_state_change,
++	NULL,
++	NULL,
++	VDEV_TYPE_SPARE,	/* name of this vdev type */
++	B_FALSE			/* not a leaf vdev */
++};
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/vdev_missing.c linux-3.2.33-go/fs/zfs/zfs/vdev_missing.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/vdev_missing.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/vdev_missing.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,106 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++/*
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ */
++
++/*
++ * The 'missing' vdev is a special vdev type used only during import.  It
++ * signifies a placeholder in the root vdev for some vdev that we know is
++ * missing.  We pass it down to the kernel to allow the rest of the
++ * configuration to parsed and an attempt made to open all available devices.
++ * Because its GUID is always 0, we know that the guid sum will mismatch and we
++ * won't be able to open the pool anyway.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/vdev_impl.h>
++#include <sys/fs/zfs.h>
++#include <sys/zio.h>
++
++/* ARGSUSED */
++static int
++vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
++    uint64_t *ashift)
++{
++	/*
++	 * Really this should just fail.  But then the root vdev will be in the
++	 * faulted state with VDEV_AUX_NO_REPLICAS, when what we really want is
++	 * VDEV_AUX_BAD_GUID_SUM.  So we pretend to succeed, knowing that we
++	 * will fail the GUID sum check before ever trying to open the pool.
++	 */
++	*psize = 0;
++	*max_psize = 0;
++	*ashift = 0;
++	return (0);
++}
++
++/* ARGSUSED */
++static void
++vdev_missing_close(vdev_t *vd)
++{
++}
++
++/* ARGSUSED */
++static int
++vdev_missing_io_start(zio_t *zio)
++{
++	zio->io_error = ENOTSUP;
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++/* ARGSUSED */
++static void
++vdev_missing_io_done(zio_t *zio)
++{
++}
++
++vdev_ops_t vdev_missing_ops = {
++	vdev_missing_open,
++	vdev_missing_close,
++	vdev_default_asize,
++	vdev_missing_io_start,
++	vdev_missing_io_done,
++	NULL,
++	NULL,
++	NULL,
++	VDEV_TYPE_MISSING,	/* name of this vdev type */
++	B_TRUE			/* leaf vdev */
++};
++
++vdev_ops_t vdev_hole_ops = {
++	vdev_missing_open,
++	vdev_missing_close,
++	vdev_default_asize,
++	vdev_missing_io_start,
++	vdev_missing_io_done,
++	NULL,
++	NULL,
++	NULL,
++	VDEV_TYPE_HOLE,		/* name of this vdev type */
++	B_TRUE			/* leaf vdev */
++};
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/vdev_queue.c linux-3.2.33-go/fs/zfs/zfs/vdev_queue.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/vdev_queue.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/vdev_queue.c	2012-11-16 23:25:34.353039289 +0100
+@@ -0,0 +1,462 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/vdev_impl.h>
++#include <sys/zio.h>
++#include <sys/avl.h>
++
++/*
++ * These tunables are for performance analysis.
++ */
++/*
++ * zfs_vdev_max_pending is the maximum number of i/os concurrently
++ * pending to each device.  zfs_vdev_min_pending is the initial number
++ * of i/os pending to each device (before it starts ramping up to
++ * max_pending).
++ */
++int zfs_vdev_max_pending = 10;
++int zfs_vdev_min_pending = 4;
++
++/* deadline = pri + ddi_get_lbolt64() >> time_shift) */
++int zfs_vdev_time_shift = 6;
++
++/* exponential I/O issue ramp-up rate */
++int zfs_vdev_ramp_rate = 2;
++
++/*
++ * To reduce IOPs, we aggregate small adjacent I/Os into one large I/O.
++ * For read I/Os, we also aggregate across small adjacency gaps; for writes
++ * we include spans of optional I/Os to aid aggregation at the disk even when
++ * they aren't able to help us aggregate at this level.
++ */
++int zfs_vdev_aggregation_limit = SPA_MAXBLOCKSIZE;
++int zfs_vdev_read_gap_limit = 32 << 10;
++int zfs_vdev_write_gap_limit = 4 << 10;
++
++/*
++ * Virtual device vector for disk I/O scheduling.
++ */
++int
++vdev_queue_deadline_compare(const void *x1, const void *x2)
++{
++	const zio_t *z1 = x1;
++	const zio_t *z2 = x2;
++
++	if (z1->io_deadline < z2->io_deadline)
++		return (-1);
++	if (z1->io_deadline > z2->io_deadline)
++		return (1);
++
++	if (z1->io_offset < z2->io_offset)
++		return (-1);
++	if (z1->io_offset > z2->io_offset)
++		return (1);
++
++	if (z1 < z2)
++		return (-1);
++	if (z1 > z2)
++		return (1);
++
++	return (0);
++}
++
++int
++vdev_queue_offset_compare(const void *x1, const void *x2)
++{
++	const zio_t *z1 = x1;
++	const zio_t *z2 = x2;
++
++	if (z1->io_offset < z2->io_offset)
++		return (-1);
++	if (z1->io_offset > z2->io_offset)
++		return (1);
++
++	if (z1 < z2)
++		return (-1);
++	if (z1 > z2)
++		return (1);
++
++	return (0);
++}
++
++void
++vdev_queue_init(vdev_t *vd)
++{
++	vdev_queue_t *vq = &vd->vdev_queue;
++	int i;
++
++	mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
++
++	avl_create(&vq->vq_deadline_tree, vdev_queue_deadline_compare,
++	    sizeof (zio_t), offsetof(struct zio, io_deadline_node));
++
++	avl_create(&vq->vq_read_tree, vdev_queue_offset_compare,
++	    sizeof (zio_t), offsetof(struct zio, io_offset_node));
++
++	avl_create(&vq->vq_write_tree, vdev_queue_offset_compare,
++	    sizeof (zio_t), offsetof(struct zio, io_offset_node));
++
++	avl_create(&vq->vq_pending_tree, vdev_queue_offset_compare,
++	    sizeof (zio_t), offsetof(struct zio, io_offset_node));
++
++	/*
++	 * A list of buffers which can be used for aggregate I/O, this
++	 * avoids the need to allocate them on demand when memory is low.
++	 */
++	list_create(&vq->vq_io_list, sizeof (vdev_io_t),
++	    offsetof(vdev_io_t, vi_node));
++
++	for (i = 0; i < zfs_vdev_max_pending; i++)
++		list_insert_tail(&vq->vq_io_list, zio_vdev_alloc());
++}
++
++void
++vdev_queue_fini(vdev_t *vd)
++{
++	vdev_queue_t *vq = &vd->vdev_queue;
++	vdev_io_t *vi;
++
++	avl_destroy(&vq->vq_deadline_tree);
++	avl_destroy(&vq->vq_read_tree);
++	avl_destroy(&vq->vq_write_tree);
++	avl_destroy(&vq->vq_pending_tree);
++
++	while ((vi = list_head(&vq->vq_io_list)) != NULL) {
++		list_remove(&vq->vq_io_list, vi);
++		zio_vdev_free(vi);
++	}
++
++	list_destroy(&vq->vq_io_list);
++
++	mutex_destroy(&vq->vq_lock);
++}
++
++static void
++vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
++{
++	avl_add(&vq->vq_deadline_tree, zio);
++	avl_add(zio->io_vdev_tree, zio);
++}
++
++static void
++vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
++{
++	avl_remove(&vq->vq_deadline_tree, zio);
++	avl_remove(zio->io_vdev_tree, zio);
++}
++
++static void
++vdev_queue_agg_io_done(zio_t *aio)
++{
++	vdev_queue_t *vq = &aio->io_vd->vdev_queue;
++	vdev_io_t *vi = aio->io_data;
++	zio_t *pio;
++
++	while ((pio = zio_walk_parents(aio)) != NULL)
++		if (aio->io_type == ZIO_TYPE_READ)
++			bcopy((char *)aio->io_data + (pio->io_offset -
++			    aio->io_offset), pio->io_data, pio->io_size);
++
++	mutex_enter(&vq->vq_lock);
++	list_insert_tail(&vq->vq_io_list, vi);
++	mutex_exit(&vq->vq_lock);
++}
++
++/*
++ * Compute the range spanned by two i/os, which is the endpoint of the last
++ * (lio->io_offset + lio->io_size) minus start of the first (fio->io_offset).
++ * Conveniently, the gap between fio and lio is given by -IO_SPAN(lio, fio);
++ * thus fio and lio are adjacent if and only if IO_SPAN(lio, fio) == 0.
++ */
++#define	IO_SPAN(fio, lio) ((lio)->io_offset + (lio)->io_size - (fio)->io_offset)
++#define	IO_GAP(fio, lio) (-IO_SPAN(lio, fio))
++
++static zio_t *
++vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit)
++{
++	zio_t *fio, *lio, *aio, *dio, *nio, *mio;
++	avl_tree_t *t;
++	vdev_io_t *vi;
++	int flags;
++	uint64_t maxspan = MIN(zfs_vdev_aggregation_limit, SPA_MAXBLOCKSIZE);
++	uint64_t maxgap;
++	int stretch;
++
++again:
++	ASSERT(MUTEX_HELD(&vq->vq_lock));
++
++	if (avl_numnodes(&vq->vq_pending_tree) >= pending_limit ||
++	    avl_numnodes(&vq->vq_deadline_tree) == 0)
++		return (NULL);
++
++	fio = lio = avl_first(&vq->vq_deadline_tree);
++
++	t = fio->io_vdev_tree;
++	flags = fio->io_flags & ZIO_FLAG_AGG_INHERIT;
++	maxgap = (t == &vq->vq_read_tree) ? zfs_vdev_read_gap_limit : 0;
++
++	vi = list_head(&vq->vq_io_list);
++	if (vi == NULL) {
++		vi = zio_vdev_alloc();
++		list_insert_head(&vq->vq_io_list, vi);
++	}
++
++	if (!(flags & ZIO_FLAG_DONT_AGGREGATE)) {
++		/*
++		 * We can aggregate I/Os that are sufficiently adjacent and of
++		 * the same flavor, as expressed by the AGG_INHERIT flags.
++		 * The latter requirement is necessary so that certain
++		 * attributes of the I/O, such as whether it's a normal I/O
++		 * or a scrub/resilver, can be preserved in the aggregate.
++		 * We can include optional I/Os, but don't allow them
++		 * to begin a range as they add no benefit in that situation.
++		 */
++
++		/*
++		 * We keep track of the last non-optional I/O.
++		 */
++		mio = (fio->io_flags & ZIO_FLAG_OPTIONAL) ? NULL : fio;
++
++		/*
++		 * Walk backwards through sufficiently contiguous I/Os
++		 * recording the last non-option I/O.
++		 */
++		while ((dio = AVL_PREV(t, fio)) != NULL &&
++		    (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
++		    IO_SPAN(dio, lio) <= maxspan &&
++		    IO_GAP(dio, fio) <= maxgap) {
++			fio = dio;
++			if (mio == NULL && !(fio->io_flags & ZIO_FLAG_OPTIONAL))
++				mio = fio;
++		}
++
++		/*
++		 * Skip any initial optional I/Os.
++		 */
++		while ((fio->io_flags & ZIO_FLAG_OPTIONAL) && fio != lio) {
++			fio = AVL_NEXT(t, fio);
++			ASSERT(fio != NULL);
++		}
++
++		/*
++		 * Walk forward through sufficiently contiguous I/Os.
++		 */
++		while ((dio = AVL_NEXT(t, lio)) != NULL &&
++		    (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
++		    IO_SPAN(fio, dio) <= maxspan &&
++		    IO_GAP(lio, dio) <= maxgap) {
++			lio = dio;
++			if (!(lio->io_flags & ZIO_FLAG_OPTIONAL))
++				mio = lio;
++		}
++
++		/*
++		 * Now that we've established the range of the I/O aggregation
++		 * we must decide what to do with trailing optional I/Os.
++		 * For reads, there's nothing to do. While we are unable to
++		 * aggregate further, it's possible that a trailing optional
++		 * I/O would allow the underlying device to aggregate with
++		 * subsequent I/Os. We must therefore determine if the next
++		 * non-optional I/O is close enough to make aggregation
++		 * worthwhile.
++		 */
++		stretch = B_FALSE;
++		if (t != &vq->vq_read_tree && mio != NULL) {
++			nio = lio;
++			while ((dio = AVL_NEXT(t, nio)) != NULL &&
++			    IO_GAP(nio, dio) == 0 &&
++			    IO_GAP(mio, dio) <= zfs_vdev_write_gap_limit) {
++				nio = dio;
++				if (!(nio->io_flags & ZIO_FLAG_OPTIONAL)) {
++					stretch = B_TRUE;
++					break;
++				}
++			}
++		}
++
++		if (stretch) {
++			/* This may be a no-op. */
++			VERIFY((dio = AVL_NEXT(t, lio)) != NULL);
++			dio->io_flags &= ~ZIO_FLAG_OPTIONAL;
++		} else {
++			while (lio != mio && lio != fio) {
++				ASSERT(lio->io_flags & ZIO_FLAG_OPTIONAL);
++				lio = AVL_PREV(t, lio);
++				ASSERT(lio != NULL);
++			}
++		}
++	}
++
++	if (fio != lio) {
++		uint64_t size = IO_SPAN(fio, lio);
++		ASSERT(size <= maxspan);
++		ASSERT(vi != NULL);
++
++		aio = zio_vdev_delegated_io(fio->io_vd, fio->io_offset,
++		    vi, size, fio->io_type, ZIO_PRIORITY_AGG,
++		    flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
++		    vdev_queue_agg_io_done, NULL);
++
++		nio = fio;
++		do {
++			dio = nio;
++			nio = AVL_NEXT(t, dio);
++			ASSERT(dio->io_type == aio->io_type);
++			ASSERT(dio->io_vdev_tree == t);
++
++			if (dio->io_flags & ZIO_FLAG_NODATA) {
++				ASSERT(dio->io_type == ZIO_TYPE_WRITE);
++				bzero((char *)aio->io_data + (dio->io_offset -
++				    aio->io_offset), dio->io_size);
++			} else if (dio->io_type == ZIO_TYPE_WRITE) {
++				bcopy(dio->io_data, (char *)aio->io_data +
++				    (dio->io_offset - aio->io_offset),
++				    dio->io_size);
++			}
++
++			zio_add_child(dio, aio);
++			vdev_queue_io_remove(vq, dio);
++			zio_vdev_io_bypass(dio);
++			zio_execute(dio);
++		} while (dio != lio);
++
++		avl_add(&vq->vq_pending_tree, aio);
++		list_remove(&vq->vq_io_list, vi);
++
++		return (aio);
++	}
++
++	ASSERT(fio->io_vdev_tree == t);
++	vdev_queue_io_remove(vq, fio);
++
++	/*
++	 * If the I/O is or was optional and therefore has no data, we need to
++	 * simply discard it. We need to drop the vdev queue's lock to avoid a
++	 * deadlock that we could encounter since this I/O will complete
++	 * immediately.
++	 */
++	if (fio->io_flags & ZIO_FLAG_NODATA) {
++		mutex_exit(&vq->vq_lock);
++		zio_vdev_io_bypass(fio);
++		zio_execute(fio);
++		mutex_enter(&vq->vq_lock);
++		goto again;
++	}
++
++	avl_add(&vq->vq_pending_tree, fio);
++
++	return (fio);
++}
++
++zio_t *
++vdev_queue_io(zio_t *zio)
++{
++	vdev_queue_t *vq = &zio->io_vd->vdev_queue;
++	zio_t *nio;
++
++	ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
++
++	if (zio->io_flags & ZIO_FLAG_DONT_QUEUE)
++		return (zio);
++
++	zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE;
++
++	if (zio->io_type == ZIO_TYPE_READ)
++		zio->io_vdev_tree = &vq->vq_read_tree;
++	else
++		zio->io_vdev_tree = &vq->vq_write_tree;
++
++	mutex_enter(&vq->vq_lock);
++
++	zio->io_deadline = (ddi_get_lbolt64() >> zfs_vdev_time_shift) +
++	    zio->io_priority;
++
++	vdev_queue_io_add(vq, zio);
++
++	nio = vdev_queue_io_to_issue(vq, zfs_vdev_min_pending);
++
++	mutex_exit(&vq->vq_lock);
++
++	if (nio == NULL)
++		return (NULL);
++
++	if (nio->io_done == vdev_queue_agg_io_done) {
++		zio_nowait(nio);
++		return (NULL);
++	}
++
++	return (nio);
++}
++
++void
++vdev_queue_io_done(zio_t *zio)
++{
++	vdev_queue_t *vq = &zio->io_vd->vdev_queue;
++	int i;
++
++	mutex_enter(&vq->vq_lock);
++
++	avl_remove(&vq->vq_pending_tree, zio);
++
++	for (i = 0; i < zfs_vdev_ramp_rate; i++) {
++		zio_t *nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending);
++		if (nio == NULL)
++			break;
++		mutex_exit(&vq->vq_lock);
++		if (nio->io_done == vdev_queue_agg_io_done) {
++			zio_nowait(nio);
++		} else {
++			zio_vdev_io_reissue(nio);
++			zio_execute(nio);
++		}
++		mutex_enter(&vq->vq_lock);
++	}
++
++	mutex_exit(&vq->vq_lock);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++module_param(zfs_vdev_max_pending, int, 0644);
++MODULE_PARM_DESC(zfs_vdev_max_pending, "Max pending per-vdev I/Os");
++
++module_param(zfs_vdev_min_pending, int, 0644);
++MODULE_PARM_DESC(zfs_vdev_min_pending, "Min pending per-vdev I/Os");
++
++module_param(zfs_vdev_aggregation_limit, int, 0644);
++MODULE_PARM_DESC(zfs_vdev_aggregation_limit, "Max vdev I/O aggregation size");
++
++module_param(zfs_vdev_time_shift, int, 0644);
++MODULE_PARM_DESC(zfs_vdev_time_shift, "Deadline time shift for vdev I/O");
++
++module_param(zfs_vdev_ramp_rate, int, 0644);
++MODULE_PARM_DESC(zfs_vdev_ramp_rate, "Exponential I/O issue ramp-up rate");
++
++module_param(zfs_vdev_read_gap_limit, int, 0644);
++MODULE_PARM_DESC(zfs_vdev_read_gap_limit, "Aggregate read I/O over gap");
++
++module_param(zfs_vdev_write_gap_limit, int, 0644);
++MODULE_PARM_DESC(zfs_vdev_write_gap_limit, "Aggregate write I/O over gap");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/vdev_raidz.c linux-3.2.33-go/fs/zfs/zfs/vdev_raidz.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/vdev_raidz.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/vdev_raidz.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,2153 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/vdev_impl.h>
++#include <sys/zio.h>
++#include <sys/zio_checksum.h>
++#include <sys/fs/zfs.h>
++#include <sys/fm/fs/zfs.h>
++
++/*
++ * Virtual device vector for RAID-Z.
++ *
++ * This vdev supports single, double, and triple parity. For single parity,
++ * we use a simple XOR of all the data columns. For double or triple parity,
++ * we use a special case of Reed-Solomon coding. This extends the
++ * technique described in "The mathematics of RAID-6" by H. Peter Anvin by
++ * drawing on the system described in "A Tutorial on Reed-Solomon Coding for
++ * Fault-Tolerance in RAID-like Systems" by James S. Plank on which the
++ * former is also based. The latter is designed to provide higher performance
++ * for writes.
++ *
++ * Note that the Plank paper claimed to support arbitrary N+M, but was then
++ * amended six years later identifying a critical flaw that invalidates its
++ * claims. Nevertheless, the technique can be adapted to work for up to
++ * triple parity. For additional parity, the amendment "Note: Correction to
++ * the 1997 Tutorial on Reed-Solomon Coding" by James S. Plank and Ying Ding
++ * is viable, but the additional complexity means that write performance will
++ * suffer.
++ *
++ * All of the methods above operate on a Galois field, defined over the
++ * integers mod 2^N. In our case we choose N=8 for GF(8) so that all elements
++ * can be expressed with a single byte. Briefly, the operations on the
++ * field are defined as follows:
++ *
++ *   o addition (+) is represented by a bitwise XOR
++ *   o subtraction (-) is therefore identical to addition: A + B = A - B
++ *   o multiplication of A by 2 is defined by the following bitwise expression:
++ *	(A * 2)_7 = A_6
++ *	(A * 2)_6 = A_5
++ *	(A * 2)_5 = A_4
++ *	(A * 2)_4 = A_3 + A_7
++ *	(A * 2)_3 = A_2 + A_7
++ *	(A * 2)_2 = A_1 + A_7
++ *	(A * 2)_1 = A_0
++ *	(A * 2)_0 = A_7
++ *
++ * In C, multiplying by 2 is therefore ((a << 1) ^ ((a & 0x80) ? 0x1d : 0)).
++ * As an aside, this multiplication is derived from the error correcting
++ * primitive polynomial x^8 + x^4 + x^3 + x^2 + 1.
++ *
++ * Observe that any number in the field (except for 0) can be expressed as a
++ * power of 2 -- a generator for the field. We store a table of the powers of
++ * 2 and logs base 2 for quick look ups, and exploit the fact that A * B can
++ * be rewritten as 2^(log_2(A) + log_2(B)) (where '+' is normal addition rather
++ * than field addition). The inverse of a field element A (A^-1) is therefore
++ * A ^ (255 - 1) = A^254.
++ *
++ * The up-to-three parity columns, P, Q, R over several data columns,
++ * D_0, ... D_n-1, can be expressed by field operations:
++ *
++ *	P = D_0 + D_1 + ... + D_n-2 + D_n-1
++ *	Q = 2^n-1 * D_0 + 2^n-2 * D_1 + ... + 2^1 * D_n-2 + 2^0 * D_n-1
++ *	  = ((...((D_0) * 2 + D_1) * 2 + ...) * 2 + D_n-2) * 2 + D_n-1
++ *	R = 4^n-1 * D_0 + 4^n-2 * D_1 + ... + 4^1 * D_n-2 + 4^0 * D_n-1
++ *	  = ((...((D_0) * 4 + D_1) * 4 + ...) * 4 + D_n-2) * 4 + D_n-1
++ *
++ * We chose 1, 2, and 4 as our generators because 1 corresponds to the trival
++ * XOR operation, and 2 and 4 can be computed quickly and generate linearly-
++ * independent coefficients. (There are no additional coefficients that have
++ * this property which is why the uncorrected Plank method breaks down.)
++ *
++ * See the reconstruction code below for how P, Q and R can used individually
++ * or in concert to recover missing data columns.
++ */
++
++typedef struct raidz_col {
++	uint64_t rc_devidx;		/* child device index for I/O */
++	uint64_t rc_offset;		/* device offset */
++	uint64_t rc_size;		/* I/O size */
++	void *rc_data;			/* I/O data */
++	void *rc_gdata;			/* used to store the "good" version */
++	int rc_error;			/* I/O error for this device */
++	uint8_t rc_tried;		/* Did we attempt this I/O column? */
++	uint8_t rc_skipped;		/* Did we skip this I/O column? */
++} raidz_col_t;
++
++typedef struct raidz_map {
++	uint64_t rm_cols;		/* Regular column count */
++	uint64_t rm_scols;		/* Count including skipped columns */
++	uint64_t rm_bigcols;		/* Number of oversized columns */
++	uint64_t rm_asize;		/* Actual total I/O size */
++	uint64_t rm_missingdata;	/* Count of missing data devices */
++	uint64_t rm_missingparity;	/* Count of missing parity devices */
++	uint64_t rm_firstdatacol;	/* First data column/parity count */
++	uint64_t rm_nskip;		/* Skipped sectors for padding */
++	uint64_t rm_skipstart;	/* Column index of padding start */
++	void *rm_datacopy;		/* rm_asize-buffer of copied data */
++	uintptr_t rm_reports;		/* # of referencing checksum reports */
++	uint8_t	rm_freed;		/* map no longer has referencing ZIO */
++	uint8_t	rm_ecksuminjected;	/* checksum error was injected */
++	raidz_col_t rm_col[1];		/* Flexible array of I/O columns */
++} raidz_map_t;
++
++#define	VDEV_RAIDZ_P		0
++#define	VDEV_RAIDZ_Q		1
++#define	VDEV_RAIDZ_R		2
++
++#define	VDEV_RAIDZ_MUL_2(x)	(((x) << 1) ^ (((x) & 0x80) ? 0x1d : 0))
++#define	VDEV_RAIDZ_MUL_4(x)	(VDEV_RAIDZ_MUL_2(VDEV_RAIDZ_MUL_2(x)))
++
++/*
++ * We provide a mechanism to perform the field multiplication operation on a
++ * 64-bit value all at once rather than a byte at a time. This works by
++ * creating a mask from the top bit in each byte and using that to
++ * conditionally apply the XOR of 0x1d.
++ */
++#define	VDEV_RAIDZ_64MUL_2(x, mask) \
++{ \
++	(mask) = (x) & 0x8080808080808080ULL; \
++	(mask) = ((mask) << 1) - ((mask) >> 7); \
++	(x) = (((x) << 1) & 0xfefefefefefefefeULL) ^ \
++	    ((mask) & 0x1d1d1d1d1d1d1d1dULL); \
++}
++
++#define	VDEV_RAIDZ_64MUL_4(x, mask) \
++{ \
++	VDEV_RAIDZ_64MUL_2((x), mask); \
++	VDEV_RAIDZ_64MUL_2((x), mask); \
++}
++
++/*
++ * Force reconstruction to use the general purpose method.
++ */
++int vdev_raidz_default_to_general;
++
++/*
++ * These two tables represent powers and logs of 2 in the Galois field defined
++ * above. These values were computed by repeatedly multiplying by 2 as above.
++ */
++static const uint8_t vdev_raidz_pow2[256] = {
++	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
++	0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
++	0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
++	0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
++	0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
++	0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
++	0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
++	0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
++	0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
++	0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
++	0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
++	0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
++	0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
++	0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
++	0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
++	0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
++	0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
++	0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
++	0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
++	0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
++	0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
++	0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
++	0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
++	0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
++	0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
++	0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
++	0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
++	0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
++	0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
++	0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
++	0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
++	0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
++};
++static const uint8_t vdev_raidz_log2[256] = {
++	0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
++	0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
++	0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
++	0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
++	0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
++	0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
++	0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
++	0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
++	0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
++	0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
++	0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
++	0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
++	0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
++	0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
++	0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
++	0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
++	0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
++	0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
++	0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
++	0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
++	0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
++	0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
++	0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
++	0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
++	0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
++	0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
++	0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
++	0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
++	0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
++	0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
++	0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
++	0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
++};
++
++static void vdev_raidz_generate_parity(raidz_map_t *rm);
++
++/*
++ * Multiply a given number by 2 raised to the given power.
++ */
++static uint8_t
++vdev_raidz_exp2(uint_t a, int exp)
++{
++	if (a == 0)
++		return (0);
++
++	ASSERT(exp >= 0);
++	ASSERT(vdev_raidz_log2[a] > 0 || a == 1);
++
++	exp += vdev_raidz_log2[a];
++	if (exp > 255)
++		exp -= 255;
++
++	return (vdev_raidz_pow2[exp]);
++}
++
++static void
++vdev_raidz_map_free(raidz_map_t *rm)
++{
++	int c;
++	size_t size;
++
++	for (c = 0; c < rm->rm_firstdatacol; c++) {
++		zio_buf_free(rm->rm_col[c].rc_data, rm->rm_col[c].rc_size);
++
++		if (rm->rm_col[c].rc_gdata != NULL)
++			zio_buf_free(rm->rm_col[c].rc_gdata,
++			    rm->rm_col[c].rc_size);
++	}
++
++	size = 0;
++	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
++		size += rm->rm_col[c].rc_size;
++
++	if (rm->rm_datacopy != NULL)
++		zio_buf_free(rm->rm_datacopy, size);
++
++	kmem_free(rm, offsetof(raidz_map_t, rm_col[rm->rm_scols]));
++}
++
++static void
++vdev_raidz_map_free_vsd(zio_t *zio)
++{
++	raidz_map_t *rm = zio->io_vsd;
++
++	ASSERT3U(rm->rm_freed, ==, 0);
++	rm->rm_freed = 1;
++
++	if (rm->rm_reports == 0)
++		vdev_raidz_map_free(rm);
++}
++
++/*ARGSUSED*/
++static void
++vdev_raidz_cksum_free(void *arg, size_t ignored)
++{
++	raidz_map_t *rm = arg;
++
++	ASSERT3U(rm->rm_reports, >, 0);
++
++	if (--rm->rm_reports == 0 && rm->rm_freed != 0)
++		vdev_raidz_map_free(rm);
++}
++
++static void
++vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
++{
++	raidz_map_t *rm = zcr->zcr_cbdata;
++	size_t c = zcr->zcr_cbinfo;
++	size_t x;
++
++	const char *good = NULL;
++	const char *bad = rm->rm_col[c].rc_data;
++
++	if (good_data == NULL) {
++		zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE);
++		return;
++	}
++
++	if (c < rm->rm_firstdatacol) {
++		/*
++		 * The first time through, calculate the parity blocks for
++		 * the good data (this relies on the fact that the good
++		 * data never changes for a given logical ZIO)
++		 */
++		if (rm->rm_col[0].rc_gdata == NULL) {
++			char *bad_parity[VDEV_RAIDZ_MAXPARITY];
++			char *buf;
++
++			/*
++			 * Set up the rm_col[]s to generate the parity for
++			 * good_data, first saving the parity bufs and
++			 * replacing them with buffers to hold the result.
++			 */
++			for (x = 0; x < rm->rm_firstdatacol; x++) {
++				bad_parity[x] = rm->rm_col[x].rc_data;
++				rm->rm_col[x].rc_data = rm->rm_col[x].rc_gdata =
++				    zio_buf_alloc(rm->rm_col[x].rc_size);
++			}
++
++			/* fill in the data columns from good_data */
++			buf = (char *)good_data;
++			for (; x < rm->rm_cols; x++) {
++				rm->rm_col[x].rc_data = buf;
++				buf += rm->rm_col[x].rc_size;
++			}
++
++			/*
++			 * Construct the parity from the good data.
++			 */
++			vdev_raidz_generate_parity(rm);
++
++			/* restore everything back to its original state */
++			for (x = 0; x < rm->rm_firstdatacol; x++)
++				rm->rm_col[x].rc_data = bad_parity[x];
++
++			buf = rm->rm_datacopy;
++			for (x = rm->rm_firstdatacol; x < rm->rm_cols; x++) {
++				rm->rm_col[x].rc_data = buf;
++				buf += rm->rm_col[x].rc_size;
++			}
++		}
++
++		ASSERT3P(rm->rm_col[c].rc_gdata, !=, NULL);
++		good = rm->rm_col[c].rc_gdata;
++	} else {
++		/* adjust good_data to point at the start of our column */
++		good = good_data;
++
++		for (x = rm->rm_firstdatacol; x < c; x++)
++			good += rm->rm_col[x].rc_size;
++	}
++
++	/* we drop the ereport if it ends up that the data was good */
++	zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE);
++}
++
++/*
++ * Invoked indirectly by zfs_ereport_start_checksum(), called
++ * below when our read operation fails completely.  The main point
++ * is to keep a copy of everything we read from disk, so that at
++ * vdev_raidz_cksum_finish() time we can compare it with the good data.
++ */
++static void
++vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
++{
++	size_t c = (size_t)(uintptr_t)arg;
++	caddr_t buf;
++
++	raidz_map_t *rm = zio->io_vsd;
++	size_t size;
++
++	/* set up the report and bump the refcount  */
++	zcr->zcr_cbdata = rm;
++	zcr->zcr_cbinfo = c;
++	zcr->zcr_finish = vdev_raidz_cksum_finish;
++	zcr->zcr_free = vdev_raidz_cksum_free;
++
++	rm->rm_reports++;
++	ASSERT3U(rm->rm_reports, >, 0);
++
++	if (rm->rm_datacopy != NULL)
++		return;
++
++	/*
++	 * It's the first time we're called for this raidz_map_t, so we need
++	 * to copy the data aside; there's no guarantee that our zio's buffer
++	 * won't be re-used for something else.
++	 *
++	 * Our parity data is already in separate buffers, so there's no need
++	 * to copy them.
++	 */
++
++	size = 0;
++	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
++		size += rm->rm_col[c].rc_size;
++
++	buf = rm->rm_datacopy = zio_buf_alloc(size);
++
++	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
++		raidz_col_t *col = &rm->rm_col[c];
++
++		bcopy(col->rc_data, buf, col->rc_size);
++		col->rc_data = buf;
++
++		buf += col->rc_size;
++	}
++	ASSERT3P(buf - (caddr_t)rm->rm_datacopy, ==, size);
++}
++
++static const zio_vsd_ops_t vdev_raidz_vsd_ops = {
++	vdev_raidz_map_free_vsd,
++	vdev_raidz_cksum_report
++};
++
++static raidz_map_t *
++vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols,
++    uint64_t nparity)
++{
++	raidz_map_t *rm;
++	uint64_t b = zio->io_offset >> unit_shift;
++	uint64_t s = zio->io_size >> unit_shift;
++	uint64_t f = b % dcols;
++	uint64_t o = (b / dcols) << unit_shift;
++	uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot;
++
++	q = s / (dcols - nparity);
++	r = s - q * (dcols - nparity);
++	bc = (r == 0 ? 0 : r + nparity);
++	tot = s + nparity * (q + (r == 0 ? 0 : 1));
++
++	if (q == 0) {
++		acols = bc;
++		scols = MIN(dcols, roundup(bc, nparity + 1));
++	} else {
++		acols = dcols;
++		scols = dcols;
++	}
++
++	ASSERT3U(acols, <=, scols);
++
++	rm = kmem_alloc(offsetof(raidz_map_t, rm_col[scols]), KM_PUSHPAGE);
++
++	rm->rm_cols = acols;
++	rm->rm_scols = scols;
++	rm->rm_bigcols = bc;
++	rm->rm_skipstart = bc;
++	rm->rm_missingdata = 0;
++	rm->rm_missingparity = 0;
++	rm->rm_firstdatacol = nparity;
++	rm->rm_datacopy = NULL;
++	rm->rm_reports = 0;
++	rm->rm_freed = 0;
++	rm->rm_ecksuminjected = 0;
++
++	asize = 0;
++
++	for (c = 0; c < scols; c++) {
++		col = f + c;
++		coff = o;
++		if (col >= dcols) {
++			col -= dcols;
++			coff += 1ULL << unit_shift;
++		}
++		rm->rm_col[c].rc_devidx = col;
++		rm->rm_col[c].rc_offset = coff;
++		rm->rm_col[c].rc_data = NULL;
++		rm->rm_col[c].rc_gdata = NULL;
++		rm->rm_col[c].rc_error = 0;
++		rm->rm_col[c].rc_tried = 0;
++		rm->rm_col[c].rc_skipped = 0;
++
++		if (c >= acols)
++			rm->rm_col[c].rc_size = 0;
++		else if (c < bc)
++			rm->rm_col[c].rc_size = (q + 1) << unit_shift;
++		else
++			rm->rm_col[c].rc_size = q << unit_shift;
++
++		asize += rm->rm_col[c].rc_size;
++	}
++
++	ASSERT3U(asize, ==, tot << unit_shift);
++	rm->rm_asize = roundup(asize, (nparity + 1) << unit_shift);
++	rm->rm_nskip = roundup(tot, nparity + 1) - tot;
++	ASSERT3U(rm->rm_asize - asize, ==, rm->rm_nskip << unit_shift);
++	ASSERT3U(rm->rm_nskip, <=, nparity);
++
++	for (c = 0; c < rm->rm_firstdatacol; c++)
++		rm->rm_col[c].rc_data = zio_buf_alloc(rm->rm_col[c].rc_size);
++
++	rm->rm_col[c].rc_data = zio->io_data;
++
++	for (c = c + 1; c < acols; c++)
++		rm->rm_col[c].rc_data = (char *)rm->rm_col[c - 1].rc_data +
++		    rm->rm_col[c - 1].rc_size;
++
++	/*
++	 * If all data stored spans all columns, there's a danger that parity
++	 * will always be on the same device and, since parity isn't read
++	 * during normal operation, that that device's I/O bandwidth won't be
++	 * used effectively. We therefore switch the parity every 1MB.
++	 *
++	 * ... at least that was, ostensibly, the theory. As a practical
++	 * matter unless we juggle the parity between all devices evenly, we
++	 * won't see any benefit. Further, occasional writes that aren't a
++	 * multiple of the LCM of the number of children and the minimum
++	 * stripe width are sufficient to avoid pessimal behavior.
++	 * Unfortunately, this decision created an implicit on-disk format
++	 * requirement that we need to support for all eternity, but only
++	 * for single-parity RAID-Z.
++	 *
++	 * If we intend to skip a sector in the zeroth column for padding
++	 * we must make sure to note this swap. We will never intend to
++	 * skip the first column since at least one data and one parity
++	 * column must appear in each row.
++	 */
++	ASSERT(rm->rm_cols >= 2);
++	ASSERT(rm->rm_col[0].rc_size == rm->rm_col[1].rc_size);
++
++	if (rm->rm_firstdatacol == 1 && (zio->io_offset & (1ULL << 20))) {
++		devidx = rm->rm_col[0].rc_devidx;
++		o = rm->rm_col[0].rc_offset;
++		rm->rm_col[0].rc_devidx = rm->rm_col[1].rc_devidx;
++		rm->rm_col[0].rc_offset = rm->rm_col[1].rc_offset;
++		rm->rm_col[1].rc_devidx = devidx;
++		rm->rm_col[1].rc_offset = o;
++
++		if (rm->rm_skipstart == 0)
++			rm->rm_skipstart = 1;
++	}
++
++	zio->io_vsd = rm;
++	zio->io_vsd_ops = &vdev_raidz_vsd_ops;
++	return (rm);
++}
++
++static void
++vdev_raidz_generate_parity_p(raidz_map_t *rm)
++{
++	uint64_t *p, *src, pcount, ccount, i;
++	int c;
++
++	pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
++
++	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
++		src = rm->rm_col[c].rc_data;
++		p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
++		ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
++
++		if (c == rm->rm_firstdatacol) {
++			ASSERT(ccount == pcount);
++			for (i = 0; i < ccount; i++, src++, p++) {
++				*p = *src;
++			}
++		} else {
++			ASSERT(ccount <= pcount);
++			for (i = 0; i < ccount; i++, src++, p++) {
++				*p ^= *src;
++			}
++		}
++	}
++}
++
++static void
++vdev_raidz_generate_parity_pq(raidz_map_t *rm)
++{
++	uint64_t *p, *q, *src, pcnt, ccnt, mask, i;
++	int c;
++
++	pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
++	ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
++	    rm->rm_col[VDEV_RAIDZ_Q].rc_size);
++
++	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
++		src = rm->rm_col[c].rc_data;
++		p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
++		q = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
++
++		ccnt = rm->rm_col[c].rc_size / sizeof (src[0]);
++
++		if (c == rm->rm_firstdatacol) {
++			ASSERT(ccnt == pcnt || ccnt == 0);
++			for (i = 0; i < ccnt; i++, src++, p++, q++) {
++				*p = *src;
++				*q = *src;
++			}
++			for (; i < pcnt; i++, src++, p++, q++) {
++				*p = 0;
++				*q = 0;
++			}
++		} else {
++			ASSERT(ccnt <= pcnt);
++
++			/*
++			 * Apply the algorithm described above by multiplying
++			 * the previous result and adding in the new value.
++			 */
++			for (i = 0; i < ccnt; i++, src++, p++, q++) {
++				*p ^= *src;
++
++				VDEV_RAIDZ_64MUL_2(*q, mask);
++				*q ^= *src;
++			}
++
++			/*
++			 * Treat short columns as though they are full of 0s.
++			 * Note that there's therefore nothing needed for P.
++			 */
++			for (; i < pcnt; i++, q++) {
++				VDEV_RAIDZ_64MUL_2(*q, mask);
++			}
++		}
++	}
++}
++
++static void
++vdev_raidz_generate_parity_pqr(raidz_map_t *rm)
++{
++	uint64_t *p, *q, *r, *src, pcnt, ccnt, mask, i;
++	int c;
++
++	pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
++	ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
++	    rm->rm_col[VDEV_RAIDZ_Q].rc_size);
++	ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
++	    rm->rm_col[VDEV_RAIDZ_R].rc_size);
++
++	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
++		src = rm->rm_col[c].rc_data;
++		p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
++		q = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
++		r = rm->rm_col[VDEV_RAIDZ_R].rc_data;
++
++		ccnt = rm->rm_col[c].rc_size / sizeof (src[0]);
++
++		if (c == rm->rm_firstdatacol) {
++			ASSERT(ccnt == pcnt || ccnt == 0);
++			for (i = 0; i < ccnt; i++, src++, p++, q++, r++) {
++				*p = *src;
++				*q = *src;
++				*r = *src;
++			}
++			for (; i < pcnt; i++, src++, p++, q++, r++) {
++				*p = 0;
++				*q = 0;
++				*r = 0;
++			}
++		} else {
++			ASSERT(ccnt <= pcnt);
++
++			/*
++			 * Apply the algorithm described above by multiplying
++			 * the previous result and adding in the new value.
++			 */
++			for (i = 0; i < ccnt; i++, src++, p++, q++, r++) {
++				*p ^= *src;
++
++				VDEV_RAIDZ_64MUL_2(*q, mask);
++				*q ^= *src;
++
++				VDEV_RAIDZ_64MUL_4(*r, mask);
++				*r ^= *src;
++			}
++
++			/*
++			 * Treat short columns as though they are full of 0s.
++			 * Note that there's therefore nothing needed for P.
++			 */
++			for (; i < pcnt; i++, q++, r++) {
++				VDEV_RAIDZ_64MUL_2(*q, mask);
++				VDEV_RAIDZ_64MUL_4(*r, mask);
++			}
++		}
++	}
++}
++
++/*
++ * Generate RAID parity in the first virtual columns according to the number of
++ * parity columns available.
++ */
++static void
++vdev_raidz_generate_parity(raidz_map_t *rm)
++{
++	switch (rm->rm_firstdatacol) {
++	case 1:
++		vdev_raidz_generate_parity_p(rm);
++		break;
++	case 2:
++		vdev_raidz_generate_parity_pq(rm);
++		break;
++	case 3:
++		vdev_raidz_generate_parity_pqr(rm);
++		break;
++	default:
++		cmn_err(CE_PANIC, "invalid RAID-Z configuration");
++	}
++}
++
++static int
++vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts)
++{
++	uint64_t *dst, *src, xcount, ccount, count, i;
++	int x = tgts[0];
++	int c;
++
++	ASSERT(ntgts == 1);
++	ASSERT(x >= rm->rm_firstdatacol);
++	ASSERT(x < rm->rm_cols);
++
++	xcount = rm->rm_col[x].rc_size / sizeof (src[0]);
++	ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]));
++	ASSERT(xcount > 0);
++
++	src = rm->rm_col[VDEV_RAIDZ_P].rc_data;
++	dst = rm->rm_col[x].rc_data;
++	for (i = 0; i < xcount; i++, dst++, src++) {
++		*dst = *src;
++	}
++
++	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
++		src = rm->rm_col[c].rc_data;
++		dst = rm->rm_col[x].rc_data;
++
++		if (c == x)
++			continue;
++
++		ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
++		count = MIN(ccount, xcount);
++
++		for (i = 0; i < count; i++, dst++, src++) {
++			*dst ^= *src;
++		}
++	}
++
++	return (1 << VDEV_RAIDZ_P);
++}
++
++static int
++vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts, int ntgts)
++{
++	uint64_t *dst, *src, xcount, ccount, count, mask, i;
++	uint8_t *b;
++	int x = tgts[0];
++	int c, j, exp;
++
++	ASSERT(ntgts == 1);
++
++	xcount = rm->rm_col[x].rc_size / sizeof (src[0]);
++	ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_Q].rc_size / sizeof (src[0]));
++
++	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
++		src = rm->rm_col[c].rc_data;
++		dst = rm->rm_col[x].rc_data;
++
++		if (c == x)
++			ccount = 0;
++		else
++			ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
++
++		count = MIN(ccount, xcount);
++
++		if (c == rm->rm_firstdatacol) {
++			for (i = 0; i < count; i++, dst++, src++) {
++				*dst = *src;
++			}
++			for (; i < xcount; i++, dst++) {
++				*dst = 0;
++			}
++
++		} else {
++			for (i = 0; i < count; i++, dst++, src++) {
++				VDEV_RAIDZ_64MUL_2(*dst, mask);
++				*dst ^= *src;
++			}
++
++			for (; i < xcount; i++, dst++) {
++				VDEV_RAIDZ_64MUL_2(*dst, mask);
++			}
++		}
++	}
++
++	src = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
++	dst = rm->rm_col[x].rc_data;
++	exp = 255 - (rm->rm_cols - 1 - x);
++
++	for (i = 0; i < xcount; i++, dst++, src++) {
++		*dst ^= *src;
++		for (j = 0, b = (uint8_t *)dst; j < 8; j++, b++) {
++			*b = vdev_raidz_exp2(*b, exp);
++		}
++	}
++
++	return (1 << VDEV_RAIDZ_Q);
++}
++
++static int
++vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts)
++{
++	uint8_t *p, *q, *pxy, *qxy, *xd, *yd, tmp, a, b, aexp, bexp;
++	void *pdata, *qdata;
++	uint64_t xsize, ysize, i;
++	int x = tgts[0];
++	int y = tgts[1];
++
++	ASSERT(ntgts == 2);
++	ASSERT(x < y);
++	ASSERT(x >= rm->rm_firstdatacol);
++	ASSERT(y < rm->rm_cols);
++
++	ASSERT(rm->rm_col[x].rc_size >= rm->rm_col[y].rc_size);
++
++	/*
++	 * Move the parity data aside -- we're going to compute parity as
++	 * though columns x and y were full of zeros -- Pxy and Qxy. We want to
++	 * reuse the parity generation mechanism without trashing the actual
++	 * parity so we make those columns appear to be full of zeros by
++	 * setting their lengths to zero.
++	 */
++	pdata = rm->rm_col[VDEV_RAIDZ_P].rc_data;
++	qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
++	xsize = rm->rm_col[x].rc_size;
++	ysize = rm->rm_col[y].rc_size;
++
++	rm->rm_col[VDEV_RAIDZ_P].rc_data =
++	    zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_P].rc_size);
++	rm->rm_col[VDEV_RAIDZ_Q].rc_data =
++	    zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_Q].rc_size);
++	rm->rm_col[x].rc_size = 0;
++	rm->rm_col[y].rc_size = 0;
++
++	vdev_raidz_generate_parity_pq(rm);
++
++	rm->rm_col[x].rc_size = xsize;
++	rm->rm_col[y].rc_size = ysize;
++
++	p = pdata;
++	q = qdata;
++	pxy = rm->rm_col[VDEV_RAIDZ_P].rc_data;
++	qxy = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
++	xd = rm->rm_col[x].rc_data;
++	yd = rm->rm_col[y].rc_data;
++
++	/*
++	 * We now have:
++	 *	Pxy = P + D_x + D_y
++	 *	Qxy = Q + 2^(ndevs - 1 - x) * D_x + 2^(ndevs - 1 - y) * D_y
++	 *
++	 * We can then solve for D_x:
++	 *	D_x = A * (P + Pxy) + B * (Q + Qxy)
++	 * where
++	 *	A = 2^(x - y) * (2^(x - y) + 1)^-1
++	 *	B = 2^(ndevs - 1 - x) * (2^(x - y) + 1)^-1
++	 *
++	 * With D_x in hand, we can easily solve for D_y:
++	 *	D_y = P + Pxy + D_x
++	 */
++
++	a = vdev_raidz_pow2[255 + x - y];
++	b = vdev_raidz_pow2[255 - (rm->rm_cols - 1 - x)];
++	tmp = 255 - vdev_raidz_log2[a ^ 1];
++
++	aexp = vdev_raidz_log2[vdev_raidz_exp2(a, tmp)];
++	bexp = vdev_raidz_log2[vdev_raidz_exp2(b, tmp)];
++
++	for (i = 0; i < xsize; i++, p++, q++, pxy++, qxy++, xd++, yd++) {
++		*xd = vdev_raidz_exp2(*p ^ *pxy, aexp) ^
++		    vdev_raidz_exp2(*q ^ *qxy, bexp);
++
++		if (i < ysize)
++			*yd = *p ^ *pxy ^ *xd;
++	}
++
++	zio_buf_free(rm->rm_col[VDEV_RAIDZ_P].rc_data,
++	    rm->rm_col[VDEV_RAIDZ_P].rc_size);
++	zio_buf_free(rm->rm_col[VDEV_RAIDZ_Q].rc_data,
++	    rm->rm_col[VDEV_RAIDZ_Q].rc_size);
++
++	/*
++	 * Restore the saved parity data.
++	 */
++	rm->rm_col[VDEV_RAIDZ_P].rc_data = pdata;
++	rm->rm_col[VDEV_RAIDZ_Q].rc_data = qdata;
++
++	return ((1 << VDEV_RAIDZ_P) | (1 << VDEV_RAIDZ_Q));
++}
++
++/* BEGIN CSTYLED */
++/*
++ * In the general case of reconstruction, we must solve the system of linear
++ * equations defined by the coeffecients used to generate parity as well as
++ * the contents of the data and parity disks. This can be expressed with
++ * vectors for the original data (D) and the actual data (d) and parity (p)
++ * and a matrix composed of the identity matrix (I) and a dispersal matrix (V):
++ *
++ *            __   __                     __     __
++ *            |     |         __     __   |  p_0  |
++ *            |  V  |         |  D_0  |   | p_m-1 |
++ *            |     |    x    |   :   | = |  d_0  |
++ *            |  I  |         | D_n-1 |   |   :   |
++ *            |     |         ~~     ~~   | d_n-1 |
++ *            ~~   ~~                     ~~     ~~
++ *
++ * I is simply a square identity matrix of size n, and V is a vandermonde
++ * matrix defined by the coeffecients we chose for the various parity columns
++ * (1, 2, 4). Note that these values were chosen both for simplicity, speedy
++ * computation as well as linear separability.
++ *
++ *      __               __               __     __
++ *      |   1   ..  1 1 1 |               |  p_0  |
++ *      | 2^n-1 ..  4 2 1 |   __     __   |   :   |
++ *      | 4^n-1 .. 16 4 1 |   |  D_0  |   | p_m-1 |
++ *      |   1   ..  0 0 0 |   |  D_1  |   |  d_0  |
++ *      |   0   ..  0 0 0 | x |  D_2  | = |  d_1  |
++ *      |   :       : : : |   |   :   |   |  d_2  |
++ *      |   0   ..  1 0 0 |   | D_n-1 |   |   :   |
++ *      |   0   ..  0 1 0 |   ~~     ~~   |   :   |
++ *      |   0   ..  0 0 1 |               | d_n-1 |
++ *      ~~               ~~               ~~     ~~
++ *
++ * Note that I, V, d, and p are known. To compute D, we must invert the
++ * matrix and use the known data and parity values to reconstruct the unknown
++ * data values. We begin by removing the rows in V|I and d|p that correspond
++ * to failed or missing columns; we then make V|I square (n x n) and d|p
++ * sized n by removing rows corresponding to unused parity from the bottom up
++ * to generate (V|I)' and (d|p)'. We can then generate the inverse of (V|I)'
++ * using Gauss-Jordan elimination. In the example below we use m=3 parity
++ * columns, n=8 data columns, with errors in d_1, d_2, and p_1:
++ *           __                               __
++ *           |  1   1   1   1   1   1   1   1  |
++ *           | 128  64  32  16  8   4   2   1  | <-----+-+-- missing disks
++ *           |  19 205 116  29  64  16  4   1  |      / /
++ *           |  1   0   0   0   0   0   0   0  |     / /
++ *           |  0   1   0   0   0   0   0   0  | <--' /
++ *  (V|I)  = |  0   0   1   0   0   0   0   0  | <---'
++ *           |  0   0   0   1   0   0   0   0  |
++ *           |  0   0   0   0   1   0   0   0  |
++ *           |  0   0   0   0   0   1   0   0  |
++ *           |  0   0   0   0   0   0   1   0  |
++ *           |  0   0   0   0   0   0   0   1  |
++ *           ~~                               ~~
++ *           __                               __
++ *           |  1   1   1   1   1   1   1   1  |
++ *           | 128  64  32  16  8   4   2   1  |
++ *           |  19 205 116  29  64  16  4   1  |
++ *           |  1   0   0   0   0   0   0   0  |
++ *           |  0   1   0   0   0   0   0   0  |
++ *  (V|I)' = |  0   0   1   0   0   0   0   0  |
++ *           |  0   0   0   1   0   0   0   0  |
++ *           |  0   0   0   0   1   0   0   0  |
++ *           |  0   0   0   0   0   1   0   0  |
++ *           |  0   0   0   0   0   0   1   0  |
++ *           |  0   0   0   0   0   0   0   1  |
++ *           ~~                               ~~
++ *
++ * Here we employ Gauss-Jordan elimination to find the inverse of (V|I)'. We
++ * have carefully chosen the seed values 1, 2, and 4 to ensure that this
++ * matrix is not singular.
++ * __                                                                 __
++ * |  1   1   1   1   1   1   1   1     1   0   0   0   0   0   0   0  |
++ * |  19 205 116  29  64  16  4   1     0   1   0   0   0   0   0   0  |
++ * |  1   0   0   0   0   0   0   0     0   0   1   0   0   0   0   0  |
++ * |  0   0   0   1   0   0   0   0     0   0   0   1   0   0   0   0  |
++ * |  0   0   0   0   1   0   0   0     0   0   0   0   1   0   0   0  |
++ * |  0   0   0   0   0   1   0   0     0   0   0   0   0   1   0   0  |
++ * |  0   0   0   0   0   0   1   0     0   0   0   0   0   0   1   0  |
++ * |  0   0   0   0   0   0   0   1     0   0   0   0   0   0   0   1  |
++ * ~~                                                                 ~~
++ * __                                                                 __
++ * |  1   0   0   0   0   0   0   0     0   0   1   0   0   0   0   0  |
++ * |  1   1   1   1   1   1   1   1     1   0   0   0   0   0   0   0  |
++ * |  19 205 116  29  64  16  4   1     0   1   0   0   0   0   0   0  |
++ * |  0   0   0   1   0   0   0   0     0   0   0   1   0   0   0   0  |
++ * |  0   0   0   0   1   0   0   0     0   0   0   0   1   0   0   0  |
++ * |  0   0   0   0   0   1   0   0     0   0   0   0   0   1   0   0  |
++ * |  0   0   0   0   0   0   1   0     0   0   0   0   0   0   1   0  |
++ * |  0   0   0   0   0   0   0   1     0   0   0   0   0   0   0   1  |
++ * ~~                                                                 ~~
++ * __                                                                 __
++ * |  1   0   0   0   0   0   0   0     0   0   1   0   0   0   0   0  |
++ * |  0   1   1   0   0   0   0   0     1   0   1   1   1   1   1   1  |
++ * |  0  205 116  0   0   0   0   0     0   1   19  29  64  16  4   1  |
++ * |  0   0   0   1   0   0   0   0     0   0   0   1   0   0   0   0  |
++ * |  0   0   0   0   1   0   0   0     0   0   0   0   1   0   0   0  |
++ * |  0   0   0   0   0   1   0   0     0   0   0   0   0   1   0   0  |
++ * |  0   0   0   0   0   0   1   0     0   0   0   0   0   0   1   0  |
++ * |  0   0   0   0   0   0   0   1     0   0   0   0   0   0   0   1  |
++ * ~~                                                                 ~~
++ * __                                                                 __
++ * |  1   0   0   0   0   0   0   0     0   0   1   0   0   0   0   0  |
++ * |  0   1   1   0   0   0   0   0     1   0   1   1   1   1   1   1  |
++ * |  0   0  185  0   0   0   0   0    205  1  222 208 141 221 201 204 |
++ * |  0   0   0   1   0   0   0   0     0   0   0   1   0   0   0   0  |
++ * |  0   0   0   0   1   0   0   0     0   0   0   0   1   0   0   0  |
++ * |  0   0   0   0   0   1   0   0     0   0   0   0   0   1   0   0  |
++ * |  0   0   0   0   0   0   1   0     0   0   0   0   0   0   1   0  |
++ * |  0   0   0   0   0   0   0   1     0   0   0   0   0   0   0   1  |
++ * ~~                                                                 ~~
++ * __                                                                 __
++ * |  1   0   0   0   0   0   0   0     0   0   1   0   0   0   0   0  |
++ * |  0   1   1   0   0   0   0   0     1   0   1   1   1   1   1   1  |
++ * |  0   0   1   0   0   0   0   0    166 100  4   40 158 168 216 209 |
++ * |  0   0   0   1   0   0   0   0     0   0   0   1   0   0   0   0  |
++ * |  0   0   0   0   1   0   0   0     0   0   0   0   1   0   0   0  |
++ * |  0   0   0   0   0   1   0   0     0   0   0   0   0   1   0   0  |
++ * |  0   0   0   0   0   0   1   0     0   0   0   0   0   0   1   0  |
++ * |  0   0   0   0   0   0   0   1     0   0   0   0   0   0   0   1  |
++ * ~~                                                                 ~~
++ * __                                                                 __
++ * |  1   0   0   0   0   0   0   0     0   0   1   0   0   0   0   0  |
++ * |  0   1   0   0   0   0   0   0    167 100  5   41 159 169 217 208 |
++ * |  0   0   1   0   0   0   0   0    166 100  4   40 158 168 216 209 |
++ * |  0   0   0   1   0   0   0   0     0   0   0   1   0   0   0   0  |
++ * |  0   0   0   0   1   0   0   0     0   0   0   0   1   0   0   0  |
++ * |  0   0   0   0   0   1   0   0     0   0   0   0   0   1   0   0  |
++ * |  0   0   0   0   0   0   1   0     0   0   0   0   0   0   1   0  |
++ * |  0   0   0   0   0   0   0   1     0   0   0   0   0   0   0   1  |
++ * ~~                                                                 ~~
++ *                   __                               __
++ *                   |  0   0   1   0   0   0   0   0  |
++ *                   | 167 100  5   41 159 169 217 208 |
++ *                   | 166 100  4   40 158 168 216 209 |
++ *       (V|I)'^-1 = |  0   0   0   1   0   0   0   0  |
++ *                   |  0   0   0   0   1   0   0   0  |
++ *                   |  0   0   0   0   0   1   0   0  |
++ *                   |  0   0   0   0   0   0   1   0  |
++ *                   |  0   0   0   0   0   0   0   1  |
++ *                   ~~                               ~~
++ *
++ * We can then simply compute D = (V|I)'^-1 x (d|p)' to discover the values
++ * of the missing data.
++ *
++ * As is apparent from the example above, the only non-trivial rows in the
++ * inverse matrix correspond to the data disks that we're trying to
++ * reconstruct. Indeed, those are the only rows we need as the others would
++ * only be useful for reconstructing data known or assumed to be valid. For
++ * that reason, we only build the coefficients in the rows that correspond to
++ * targeted columns.
++ */
++/* END CSTYLED */
++
++static void
++vdev_raidz_matrix_init(raidz_map_t *rm, int n, int nmap, int *map,
++    uint8_t **rows)
++{
++	int i, j;
++	int pow;
++
++	ASSERT(n == rm->rm_cols - rm->rm_firstdatacol);
++
++	/*
++	 * Fill in the missing rows of interest.
++	 */
++	for (i = 0; i < nmap; i++) {
++		ASSERT3S(0, <=, map[i]);
++		ASSERT3S(map[i], <=, 2);
++
++		pow = map[i] * n;
++		if (pow > 255)
++			pow -= 255;
++		ASSERT(pow <= 255);
++
++		for (j = 0; j < n; j++) {
++			pow -= map[i];
++			if (pow < 0)
++				pow += 255;
++			rows[i][j] = vdev_raidz_pow2[pow];
++		}
++	}
++}
++
++static void
++vdev_raidz_matrix_invert(raidz_map_t *rm, int n, int nmissing, int *missing,
++    uint8_t **rows, uint8_t **invrows, const uint8_t *used)
++{
++	int i, j, ii, jj;
++	uint8_t log;
++
++	/*
++	 * Assert that the first nmissing entries from the array of used
++	 * columns correspond to parity columns and that subsequent entries
++	 * correspond to data columns.
++	 */
++	for (i = 0; i < nmissing; i++) {
++		ASSERT3S(used[i], <, rm->rm_firstdatacol);
++	}
++	for (; i < n; i++) {
++		ASSERT3S(used[i], >=, rm->rm_firstdatacol);
++	}
++
++	/*
++	 * First initialize the storage where we'll compute the inverse rows.
++	 */
++	for (i = 0; i < nmissing; i++) {
++		for (j = 0; j < n; j++) {
++			invrows[i][j] = (i == j) ? 1 : 0;
++		}
++	}
++
++	/*
++	 * Subtract all trivial rows from the rows of consequence.
++	 */
++	for (i = 0; i < nmissing; i++) {
++		for (j = nmissing; j < n; j++) {
++			ASSERT3U(used[j], >=, rm->rm_firstdatacol);
++			jj = used[j] - rm->rm_firstdatacol;
++			ASSERT3S(jj, <, n);
++			invrows[i][j] = rows[i][jj];
++			rows[i][jj] = 0;
++		}
++	}
++
++	/*
++	 * For each of the rows of interest, we must normalize it and subtract
++	 * a multiple of it from the other rows.
++	 */
++	for (i = 0; i < nmissing; i++) {
++		for (j = 0; j < missing[i]; j++) {
++			ASSERT3U(rows[i][j], ==, 0);
++		}
++		ASSERT3U(rows[i][missing[i]], !=, 0);
++
++		/*
++		 * Compute the inverse of the first element and multiply each
++		 * element in the row by that value.
++		 */
++		log = 255 - vdev_raidz_log2[rows[i][missing[i]]];
++
++		for (j = 0; j < n; j++) {
++			rows[i][j] = vdev_raidz_exp2(rows[i][j], log);
++			invrows[i][j] = vdev_raidz_exp2(invrows[i][j], log);
++		}
++
++		for (ii = 0; ii < nmissing; ii++) {
++			if (i == ii)
++				continue;
++
++			ASSERT3U(rows[ii][missing[i]], !=, 0);
++
++			log = vdev_raidz_log2[rows[ii][missing[i]]];
++
++			for (j = 0; j < n; j++) {
++				rows[ii][j] ^=
++				    vdev_raidz_exp2(rows[i][j], log);
++				invrows[ii][j] ^=
++				    vdev_raidz_exp2(invrows[i][j], log);
++			}
++		}
++	}
++
++	/*
++	 * Verify that the data that is left in the rows are properly part of
++	 * an identity matrix.
++	 */
++	for (i = 0; i < nmissing; i++) {
++		for (j = 0; j < n; j++) {
++			if (j == missing[i]) {
++				ASSERT3U(rows[i][j], ==, 1);
++			} else {
++				ASSERT3U(rows[i][j], ==, 0);
++			}
++		}
++	}
++}
++
++static void
++vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing,
++    int *missing, uint8_t **invrows, const uint8_t *used)
++{
++	int i, j, x, cc, c;
++	uint8_t *src;
++	uint64_t ccount;
++	uint8_t *dst[VDEV_RAIDZ_MAXPARITY];
++	uint64_t dcount[VDEV_RAIDZ_MAXPARITY];
++	uint8_t log = 0, val;
++	int ll;
++	uint8_t *invlog[VDEV_RAIDZ_MAXPARITY];
++	uint8_t *p, *pp;
++	size_t psize;
++
++	psize = sizeof (invlog[0][0]) * n * nmissing;
++	p = kmem_alloc(psize, KM_PUSHPAGE);
++
++	for (pp = p, i = 0; i < nmissing; i++) {
++		invlog[i] = pp;
++		pp += n;
++	}
++
++	for (i = 0; i < nmissing; i++) {
++		for (j = 0; j < n; j++) {
++			ASSERT3U(invrows[i][j], !=, 0);
++			invlog[i][j] = vdev_raidz_log2[invrows[i][j]];
++		}
++	}
++
++	for (i = 0; i < n; i++) {
++		c = used[i];
++		ASSERT3U(c, <, rm->rm_cols);
++
++		src = rm->rm_col[c].rc_data;
++		ccount = rm->rm_col[c].rc_size;
++		for (j = 0; j < nmissing; j++) {
++			cc = missing[j] + rm->rm_firstdatacol;
++			ASSERT3U(cc, >=, rm->rm_firstdatacol);
++			ASSERT3U(cc, <, rm->rm_cols);
++			ASSERT3U(cc, !=, c);
++
++			dst[j] = rm->rm_col[cc].rc_data;
++			dcount[j] = rm->rm_col[cc].rc_size;
++		}
++
++		ASSERT(ccount >= rm->rm_col[missing[0]].rc_size || i > 0);
++
++		for (x = 0; x < ccount; x++, src++) {
++			if (*src != 0)
++				log = vdev_raidz_log2[*src];
++
++			for (cc = 0; cc < nmissing; cc++) {
++				if (x >= dcount[cc])
++					continue;
++
++				if (*src == 0) {
++					val = 0;
++				} else {
++					if ((ll = log + invlog[cc][i]) >= 255)
++						ll -= 255;
++					val = vdev_raidz_pow2[ll];
++				}
++
++				if (i == 0)
++					dst[cc][x] = val;
++				else
++					dst[cc][x] ^= val;
++			}
++		}
++	}
++
++	kmem_free(p, psize);
++}
++
++static int
++vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts)
++{
++	int n, i, c, t, tt;
++	int nmissing_rows;
++	int missing_rows[VDEV_RAIDZ_MAXPARITY];
++	int parity_map[VDEV_RAIDZ_MAXPARITY];
++
++	uint8_t *p, *pp;
++	size_t psize;
++
++	uint8_t *rows[VDEV_RAIDZ_MAXPARITY];
++	uint8_t *invrows[VDEV_RAIDZ_MAXPARITY];
++	uint8_t *used;
++
++	int code = 0;
++
++
++	n = rm->rm_cols - rm->rm_firstdatacol;
++
++	/*
++	 * Figure out which data columns are missing.
++	 */
++	nmissing_rows = 0;
++	for (t = 0; t < ntgts; t++) {
++		if (tgts[t] >= rm->rm_firstdatacol) {
++			missing_rows[nmissing_rows++] =
++			    tgts[t] - rm->rm_firstdatacol;
++		}
++	}
++
++	/*
++	 * Figure out which parity columns to use to help generate the missing
++	 * data columns.
++	 */
++	for (tt = 0, c = 0, i = 0; i < nmissing_rows; c++) {
++		ASSERT(tt < ntgts);
++		ASSERT(c < rm->rm_firstdatacol);
++
++		/*
++		 * Skip any targeted parity columns.
++		 */
++		if (c == tgts[tt]) {
++			tt++;
++			continue;
++		}
++
++		code |= 1 << c;
++
++		parity_map[i] = c;
++		i++;
++	}
++
++	ASSERT(code != 0);
++	ASSERT3U(code, <, 1 << VDEV_RAIDZ_MAXPARITY);
++
++	psize = (sizeof (rows[0][0]) + sizeof (invrows[0][0])) *
++	    nmissing_rows * n + sizeof (used[0]) * n;
++	p = kmem_alloc(psize, KM_PUSHPAGE);
++
++	for (pp = p, i = 0; i < nmissing_rows; i++) {
++		rows[i] = pp;
++		pp += n;
++		invrows[i] = pp;
++		pp += n;
++	}
++	used = pp;
++
++	for (i = 0; i < nmissing_rows; i++) {
++		used[i] = parity_map[i];
++	}
++
++	for (tt = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
++		if (tt < nmissing_rows &&
++		    c == missing_rows[tt] + rm->rm_firstdatacol) {
++			tt++;
++			continue;
++		}
++
++		ASSERT3S(i, <, n);
++		used[i] = c;
++		i++;
++	}
++
++	/*
++	 * Initialize the interesting rows of the matrix.
++	 */
++	vdev_raidz_matrix_init(rm, n, nmissing_rows, parity_map, rows);
++
++	/*
++	 * Invert the matrix.
++	 */
++	vdev_raidz_matrix_invert(rm, n, nmissing_rows, missing_rows, rows,
++	    invrows, used);
++
++	/*
++	 * Reconstruct the missing data using the generated matrix.
++	 */
++	vdev_raidz_matrix_reconstruct(rm, n, nmissing_rows, missing_rows,
++	    invrows, used);
++
++	kmem_free(p, psize);
++
++	return (code);
++}
++
++static int
++vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt)
++{
++	int tgts[VDEV_RAIDZ_MAXPARITY], *dt;
++	int ntgts;
++	int i, c;
++	int code;
++	int nbadparity, nbaddata;
++	int parity_valid[VDEV_RAIDZ_MAXPARITY];
++
++	/*
++	 * The tgts list must already be sorted.
++	 */
++	for (i = 1; i < nt; i++) {
++		ASSERT(t[i] > t[i - 1]);
++	}
++
++	nbadparity = rm->rm_firstdatacol;
++	nbaddata = rm->rm_cols - nbadparity;
++	ntgts = 0;
++	for (i = 0, c = 0; c < rm->rm_cols; c++) {
++		if (c < rm->rm_firstdatacol)
++			parity_valid[c] = B_FALSE;
++
++		if (i < nt && c == t[i]) {
++			tgts[ntgts++] = c;
++			i++;
++		} else if (rm->rm_col[c].rc_error != 0) {
++			tgts[ntgts++] = c;
++		} else if (c >= rm->rm_firstdatacol) {
++			nbaddata--;
++		} else {
++			parity_valid[c] = B_TRUE;
++			nbadparity--;
++		}
++	}
++
++	ASSERT(ntgts >= nt);
++	ASSERT(nbaddata >= 0);
++	ASSERT(nbaddata + nbadparity == ntgts);
++
++	dt = &tgts[nbadparity];
++
++	/*
++	 * See if we can use any of our optimized reconstruction routines.
++	 */
++	if (!vdev_raidz_default_to_general) {
++		switch (nbaddata) {
++		case 1:
++			if (parity_valid[VDEV_RAIDZ_P])
++				return (vdev_raidz_reconstruct_p(rm, dt, 1));
++
++			ASSERT(rm->rm_firstdatacol > 1);
++
++			if (parity_valid[VDEV_RAIDZ_Q])
++				return (vdev_raidz_reconstruct_q(rm, dt, 1));
++
++			ASSERT(rm->rm_firstdatacol > 2);
++			break;
++
++		case 2:
++			ASSERT(rm->rm_firstdatacol > 1);
++
++			if (parity_valid[VDEV_RAIDZ_P] &&
++			    parity_valid[VDEV_RAIDZ_Q])
++				return (vdev_raidz_reconstruct_pq(rm, dt, 2));
++
++			ASSERT(rm->rm_firstdatacol > 2);
++
++			break;
++		}
++	}
++
++	code = vdev_raidz_reconstruct_general(rm, tgts, ntgts);
++	ASSERT(code < (1 << VDEV_RAIDZ_MAXPARITY));
++	ASSERT(code > 0);
++	return (code);
++}
++
++static int
++vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
++    uint64_t *ashift)
++{
++	vdev_t *cvd;
++	uint64_t nparity = vd->vdev_nparity;
++	int c;
++	int lasterror = 0;
++	int numerrors = 0;
++
++	ASSERT(nparity > 0);
++
++	if (nparity > VDEV_RAIDZ_MAXPARITY ||
++	    vd->vdev_children < nparity + 1) {
++		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
++		return (EINVAL);
++	}
++
++	vdev_open_children(vd);
++
++	for (c = 0; c < vd->vdev_children; c++) {
++		cvd = vd->vdev_child[c];
++
++		if (cvd->vdev_open_error != 0) {
++			lasterror = cvd->vdev_open_error;
++			numerrors++;
++			continue;
++		}
++
++		*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
++		*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
++		*ashift = MAX(*ashift, cvd->vdev_ashift);
++	}
++
++	*asize *= vd->vdev_children;
++	*max_asize *= vd->vdev_children;
++
++	if (numerrors > nparity) {
++		vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
++		return (lasterror);
++	}
++
++	return (0);
++}
++
++static void
++vdev_raidz_close(vdev_t *vd)
++{
++	int c;
++
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_close(vd->vdev_child[c]);
++}
++
++static uint64_t
++vdev_raidz_asize(vdev_t *vd, uint64_t psize)
++{
++	uint64_t asize;
++	uint64_t ashift = vd->vdev_top->vdev_ashift;
++	uint64_t cols = vd->vdev_children;
++	uint64_t nparity = vd->vdev_nparity;
++
++	asize = ((psize - 1) >> ashift) + 1;
++	asize += nparity * ((asize + cols - nparity - 1) / (cols - nparity));
++	asize = roundup(asize, nparity + 1) << ashift;
++
++	return (asize);
++}
++
++static void
++vdev_raidz_child_done(zio_t *zio)
++{
++	raidz_col_t *rc = zio->io_private;
++
++	rc->rc_error = zio->io_error;
++	rc->rc_tried = 1;
++	rc->rc_skipped = 0;
++}
++
++static int
++vdev_raidz_io_start(zio_t *zio)
++{
++	vdev_t *vd = zio->io_vd;
++	vdev_t *tvd = vd->vdev_top;
++	vdev_t *cvd;
++	raidz_map_t *rm;
++	raidz_col_t *rc;
++	int c, i;
++
++	rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children,
++	    vd->vdev_nparity);
++
++	ASSERT3U(rm->rm_asize, ==, vdev_psize_to_asize(vd, zio->io_size));
++
++	if (zio->io_type == ZIO_TYPE_WRITE) {
++		vdev_raidz_generate_parity(rm);
++
++		for (c = 0; c < rm->rm_cols; c++) {
++			rc = &rm->rm_col[c];
++			cvd = vd->vdev_child[rc->rc_devidx];
++			zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
++			    rc->rc_offset, rc->rc_data, rc->rc_size,
++			    zio->io_type, zio->io_priority, 0,
++			    vdev_raidz_child_done, rc));
++		}
++
++		/*
++		 * Generate optional I/Os for any skipped sectors to improve
++		 * aggregation contiguity.
++		 */
++		for (c = rm->rm_skipstart, i = 0; i < rm->rm_nskip; c++, i++) {
++			ASSERT(c <= rm->rm_scols);
++			if (c == rm->rm_scols)
++				c = 0;
++			rc = &rm->rm_col[c];
++			cvd = vd->vdev_child[rc->rc_devidx];
++			zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
++			    rc->rc_offset + rc->rc_size, NULL,
++			    1 << tvd->vdev_ashift,
++			    zio->io_type, zio->io_priority,
++			    ZIO_FLAG_NODATA | ZIO_FLAG_OPTIONAL, NULL, NULL));
++		}
++
++		return (ZIO_PIPELINE_CONTINUE);
++	}
++
++	ASSERT(zio->io_type == ZIO_TYPE_READ);
++
++	/*
++	 * Iterate over the columns in reverse order so that we hit the parity
++	 * last -- any errors along the way will force us to read the parity.
++	 */
++	for (c = rm->rm_cols - 1; c >= 0; c--) {
++		rc = &rm->rm_col[c];
++		cvd = vd->vdev_child[rc->rc_devidx];
++		if (!vdev_readable(cvd)) {
++			if (c >= rm->rm_firstdatacol)
++				rm->rm_missingdata++;
++			else
++				rm->rm_missingparity++;
++			rc->rc_error = ENXIO;
++			rc->rc_tried = 1;	/* don't even try */
++			rc->rc_skipped = 1;
++			continue;
++		}
++		if (vdev_dtl_contains(cvd, DTL_MISSING, zio->io_txg, 1)) {
++			if (c >= rm->rm_firstdatacol)
++				rm->rm_missingdata++;
++			else
++				rm->rm_missingparity++;
++			rc->rc_error = ESTALE;
++			rc->rc_skipped = 1;
++			continue;
++		}
++		if (c >= rm->rm_firstdatacol || rm->rm_missingdata > 0 ||
++		    (zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) {
++			zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
++			    rc->rc_offset, rc->rc_data, rc->rc_size,
++			    zio->io_type, zio->io_priority, 0,
++			    vdev_raidz_child_done, rc));
++		}
++	}
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++
++/*
++ * Report a checksum error for a child of a RAID-Z device.
++ */
++static void
++raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data)
++{
++	vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx];
++
++	if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
++		zio_bad_cksum_t zbc;
++		raidz_map_t *rm = zio->io_vsd;
++
++		mutex_enter(&vd->vdev_stat_lock);
++		vd->vdev_stat.vs_checksum_errors++;
++		mutex_exit(&vd->vdev_stat_lock);
++
++		zbc.zbc_has_cksum = 0;
++		zbc.zbc_injected = rm->rm_ecksuminjected;
++
++		zfs_ereport_post_checksum(zio->io_spa, vd, zio,
++		    rc->rc_offset, rc->rc_size, rc->rc_data, bad_data,
++		    &zbc);
++	}
++}
++
++/*
++ * We keep track of whether or not there were any injected errors, so that
++ * any ereports we generate can note it.
++ */
++static int
++raidz_checksum_verify(zio_t *zio)
++{
++	zio_bad_cksum_t zbc;
++	raidz_map_t *rm = zio->io_vsd;
++	int ret;
++
++	bzero(&zbc, sizeof (zio_bad_cksum_t));
++
++	ret = zio_checksum_error(zio, &zbc);
++	if (ret != 0 && zbc.zbc_injected != 0)
++		rm->rm_ecksuminjected = 1;
++
++	return (ret);
++}
++
++/*
++ * Generate the parity from the data columns. If we tried and were able to
++ * read the parity without error, verify that the generated parity matches the
++ * data we read. If it doesn't, we fire off a checksum error. Return the
++ * number such failures.
++ */
++static int
++raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
++{
++	void *orig[VDEV_RAIDZ_MAXPARITY];
++	int c, ret = 0;
++	raidz_col_t *rc;
++
++	for (c = 0; c < rm->rm_firstdatacol; c++) {
++		rc = &rm->rm_col[c];
++		if (!rc->rc_tried || rc->rc_error != 0)
++			continue;
++		orig[c] = zio_buf_alloc(rc->rc_size);
++		bcopy(rc->rc_data, orig[c], rc->rc_size);
++	}
++
++	vdev_raidz_generate_parity(rm);
++
++	for (c = 0; c < rm->rm_firstdatacol; c++) {
++		rc = &rm->rm_col[c];
++		if (!rc->rc_tried || rc->rc_error != 0)
++			continue;
++		if (bcmp(orig[c], rc->rc_data, rc->rc_size) != 0) {
++			raidz_checksum_error(zio, rc, orig[c]);
++			rc->rc_error = ECKSUM;
++			ret++;
++		}
++		zio_buf_free(orig[c], rc->rc_size);
++	}
++
++	return (ret);
++}
++
++/*
++ * Keep statistics on all the ways that we used parity to correct data.
++ */
++static uint64_t raidz_corrected[1 << VDEV_RAIDZ_MAXPARITY];
++
++static int
++vdev_raidz_worst_error(raidz_map_t *rm)
++{
++	int c, error = 0;
++
++	for (c = 0; c < rm->rm_cols; c++)
++		error = zio_worst_error(error, rm->rm_col[c].rc_error);
++
++	return (error);
++}
++
++/*
++ * Iterate over all combinations of bad data and attempt a reconstruction.
++ * Note that the algorithm below is non-optimal because it doesn't take into
++ * account how reconstruction is actually performed. For example, with
++ * triple-parity RAID-Z the reconstruction procedure is the same if column 4
++ * is targeted as invalid as if columns 1 and 4 are targeted since in both
++ * cases we'd only use parity information in column 0.
++ */
++static int
++vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
++{
++	raidz_map_t *rm = zio->io_vsd;
++	raidz_col_t *rc;
++	void *orig[VDEV_RAIDZ_MAXPARITY];
++	int tstore[VDEV_RAIDZ_MAXPARITY + 2];
++	int *tgts = &tstore[1];
++	int curr, next, i, c, n;
++	int code, ret = 0;
++
++	ASSERT(total_errors < rm->rm_firstdatacol);
++
++	/*
++	 * This simplifies one edge condition.
++	 */
++	tgts[-1] = -1;
++
++	for (n = 1; n <= rm->rm_firstdatacol - total_errors; n++) {
++		/*
++		 * Initialize the targets array by finding the first n columns
++		 * that contain no error.
++		 *
++		 * If there were no data errors, we need to ensure that we're
++		 * always explicitly attempting to reconstruct at least one
++		 * data column. To do this, we simply push the highest target
++		 * up into the data columns.
++		 */
++		for (c = 0, i = 0; i < n; i++) {
++			if (i == n - 1 && data_errors == 0 &&
++			    c < rm->rm_firstdatacol) {
++				c = rm->rm_firstdatacol;
++			}
++
++			while (rm->rm_col[c].rc_error != 0) {
++				c++;
++				ASSERT3S(c, <, rm->rm_cols);
++			}
++
++			tgts[i] = c++;
++		}
++
++		/*
++		 * Setting tgts[n] simplifies the other edge condition.
++		 */
++		tgts[n] = rm->rm_cols;
++
++		/*
++		 * These buffers were allocated in previous iterations.
++		 */
++		for (i = 0; i < n - 1; i++) {
++			ASSERT(orig[i] != NULL);
++		}
++
++		orig[n - 1] = zio_buf_alloc(rm->rm_col[0].rc_size);
++
++		curr = 0;
++		next = tgts[curr];
++
++		while (curr != n) {
++			tgts[curr] = next;
++			curr = 0;
++
++			/*
++			 * Save off the original data that we're going to
++			 * attempt to reconstruct.
++			 */
++			for (i = 0; i < n; i++) {
++				ASSERT(orig[i] != NULL);
++				c = tgts[i];
++				ASSERT3S(c, >=, 0);
++				ASSERT3S(c, <, rm->rm_cols);
++				rc = &rm->rm_col[c];
++				bcopy(rc->rc_data, orig[i], rc->rc_size);
++			}
++
++			/*
++			 * Attempt a reconstruction and exit the outer loop on
++			 * success.
++			 */
++			code = vdev_raidz_reconstruct(rm, tgts, n);
++			if (raidz_checksum_verify(zio) == 0) {
++				atomic_inc_64(&raidz_corrected[code]);
++
++				for (i = 0; i < n; i++) {
++					c = tgts[i];
++					rc = &rm->rm_col[c];
++					ASSERT(rc->rc_error == 0);
++					if (rc->rc_tried)
++						raidz_checksum_error(zio, rc,
++						    orig[i]);
++					rc->rc_error = ECKSUM;
++				}
++
++				ret = code;
++				goto done;
++			}
++
++			/*
++			 * Restore the original data.
++			 */
++			for (i = 0; i < n; i++) {
++				c = tgts[i];
++				rc = &rm->rm_col[c];
++				bcopy(orig[i], rc->rc_data, rc->rc_size);
++			}
++
++			do {
++				/*
++				 * Find the next valid column after the curr
++				 * position..
++				 */
++				for (next = tgts[curr] + 1;
++				    next < rm->rm_cols &&
++				    rm->rm_col[next].rc_error != 0; next++)
++					continue;
++
++				ASSERT(next <= tgts[curr + 1]);
++
++				/*
++				 * If that spot is available, we're done here.
++				 */
++				if (next != tgts[curr + 1])
++					break;
++
++				/*
++				 * Otherwise, find the next valid column after
++				 * the previous position.
++				 */
++				for (c = tgts[curr - 1] + 1;
++				    rm->rm_col[c].rc_error != 0; c++)
++					continue;
++
++				tgts[curr] = c;
++				curr++;
++
++			} while (curr != n);
++		}
++	}
++	n--;
++done:
++	for (i = 0; i < n; i++) {
++		zio_buf_free(orig[i], rm->rm_col[0].rc_size);
++	}
++
++	return (ret);
++}
++
++static void
++vdev_raidz_io_done(zio_t *zio)
++{
++	vdev_t *vd = zio->io_vd;
++	vdev_t *cvd;
++	raidz_map_t *rm = zio->io_vsd;
++	raidz_col_t *rc = NULL;
++	int unexpected_errors = 0;
++	int parity_errors = 0;
++	int parity_untried = 0;
++	int data_errors = 0;
++	int total_errors = 0;
++	int n, c;
++	int tgts[VDEV_RAIDZ_MAXPARITY];
++	int code;
++
++	ASSERT(zio->io_bp != NULL);  /* XXX need to add code to enforce this */
++
++	ASSERT(rm->rm_missingparity <= rm->rm_firstdatacol);
++	ASSERT(rm->rm_missingdata <= rm->rm_cols - rm->rm_firstdatacol);
++
++	for (c = 0; c < rm->rm_cols; c++) {
++		rc = &rm->rm_col[c];
++
++		if (rc->rc_error) {
++			ASSERT(rc->rc_error != ECKSUM);	/* child has no bp */
++
++			if (c < rm->rm_firstdatacol)
++				parity_errors++;
++			else
++				data_errors++;
++
++			if (!rc->rc_skipped)
++				unexpected_errors++;
++
++			total_errors++;
++		} else if (c < rm->rm_firstdatacol && !rc->rc_tried) {
++			parity_untried++;
++		}
++	}
++
++	if (zio->io_type == ZIO_TYPE_WRITE) {
++		/*
++		 * XXX -- for now, treat partial writes as a success.
++		 * (If we couldn't write enough columns to reconstruct
++		 * the data, the I/O failed.  Otherwise, good enough.)
++		 *
++		 * Now that we support write reallocation, it would be better
++		 * to treat partial failure as real failure unless there are
++		 * no non-degraded top-level vdevs left, and not update DTLs
++		 * if we intend to reallocate.
++		 */
++		/* XXPOLICY */
++		if (total_errors > rm->rm_firstdatacol)
++			zio->io_error = vdev_raidz_worst_error(rm);
++
++		return;
++	}
++
++	ASSERT(zio->io_type == ZIO_TYPE_READ);
++	/*
++	 * There are three potential phases for a read:
++	 *	1. produce valid data from the columns read
++	 *	2. read all disks and try again
++	 *	3. perform combinatorial reconstruction
++	 *
++	 * Each phase is progressively both more expensive and less likely to
++	 * occur. If we encounter more errors than we can repair or all phases
++	 * fail, we have no choice but to return an error.
++	 */
++
++	/*
++	 * If the number of errors we saw was correctable -- less than or equal
++	 * to the number of parity disks read -- attempt to produce data that
++	 * has a valid checksum. Naturally, this case applies in the absence of
++	 * any errors.
++	 */
++	if (total_errors <= rm->rm_firstdatacol - parity_untried) {
++		if (data_errors == 0) {
++			if (raidz_checksum_verify(zio) == 0) {
++				/*
++				 * If we read parity information (unnecessarily
++				 * as it happens since no reconstruction was
++				 * needed) regenerate and verify the parity.
++				 * We also regenerate parity when resilvering
++				 * so we can write it out to the failed device
++				 * later.
++				 */
++				if (parity_errors + parity_untried <
++				    rm->rm_firstdatacol ||
++				    (zio->io_flags & ZIO_FLAG_RESILVER)) {
++					n = raidz_parity_verify(zio, rm);
++					unexpected_errors += n;
++					ASSERT(parity_errors + n <=
++					    rm->rm_firstdatacol);
++				}
++				goto done;
++			}
++		} else {
++			/*
++			 * We either attempt to read all the parity columns or
++			 * none of them. If we didn't try to read parity, we
++			 * wouldn't be here in the correctable case. There must
++			 * also have been fewer parity errors than parity
++			 * columns or, again, we wouldn't be in this code path.
++			 */
++			ASSERT(parity_untried == 0);
++			ASSERT(parity_errors < rm->rm_firstdatacol);
++
++			/*
++			 * Identify the data columns that reported an error.
++			 */
++			n = 0;
++			for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
++				rc = &rm->rm_col[c];
++				if (rc->rc_error != 0) {
++					ASSERT(n < VDEV_RAIDZ_MAXPARITY);
++					tgts[n++] = c;
++				}
++			}
++
++			ASSERT(rm->rm_firstdatacol >= n);
++
++			code = vdev_raidz_reconstruct(rm, tgts, n);
++
++			if (raidz_checksum_verify(zio) == 0) {
++				atomic_inc_64(&raidz_corrected[code]);
++
++				/*
++				 * If we read more parity disks than were used
++				 * for reconstruction, confirm that the other
++				 * parity disks produced correct data. This
++				 * routine is suboptimal in that it regenerates
++				 * the parity that we already used in addition
++				 * to the parity that we're attempting to
++				 * verify, but this should be a relatively
++				 * uncommon case, and can be optimized if it
++				 * becomes a problem. Note that we regenerate
++				 * parity when resilvering so we can write it
++				 * out to failed devices later.
++				 */
++				if (parity_errors < rm->rm_firstdatacol - n ||
++				    (zio->io_flags & ZIO_FLAG_RESILVER)) {
++					n = raidz_parity_verify(zio, rm);
++					unexpected_errors += n;
++					ASSERT(parity_errors + n <=
++					    rm->rm_firstdatacol);
++				}
++
++				goto done;
++			}
++		}
++	}
++
++	/*
++	 * This isn't a typical situation -- either we got a read error or
++	 * a child silently returned bad data. Read every block so we can
++	 * try again with as much data and parity as we can track down. If
++	 * we've already been through once before, all children will be marked
++	 * as tried so we'll proceed to combinatorial reconstruction.
++	 */
++	unexpected_errors = 1;
++	rm->rm_missingdata = 0;
++	rm->rm_missingparity = 0;
++
++	for (c = 0; c < rm->rm_cols; c++) {
++		if (rm->rm_col[c].rc_tried)
++			continue;
++
++		zio_vdev_io_redone(zio);
++		do {
++			rc = &rm->rm_col[c];
++			if (rc->rc_tried)
++				continue;
++			zio_nowait(zio_vdev_child_io(zio, NULL,
++			    vd->vdev_child[rc->rc_devidx],
++			    rc->rc_offset, rc->rc_data, rc->rc_size,
++			    zio->io_type, zio->io_priority, 0,
++			    vdev_raidz_child_done, rc));
++		} while (++c < rm->rm_cols);
++
++		return;
++	}
++
++	/*
++	 * At this point we've attempted to reconstruct the data given the
++	 * errors we detected, and we've attempted to read all columns. There
++	 * must, therefore, be one or more additional problems -- silent errors
++	 * resulting in invalid data rather than explicit I/O errors resulting
++	 * in absent data. We check if there is enough additional data to
++	 * possibly reconstruct the data and then perform combinatorial
++	 * reconstruction over all possible combinations. If that fails,
++	 * we're cooked.
++	 */
++	if (total_errors > rm->rm_firstdatacol) {
++		zio->io_error = vdev_raidz_worst_error(rm);
++
++	} else if (total_errors < rm->rm_firstdatacol &&
++	    (code = vdev_raidz_combrec(zio, total_errors, data_errors)) != 0) {
++		/*
++		 * If we didn't use all the available parity for the
++		 * combinatorial reconstruction, verify that the remaining
++		 * parity is correct.
++		 */
++		if (code != (1 << rm->rm_firstdatacol) - 1)
++			(void) raidz_parity_verify(zio, rm);
++	} else {
++		/*
++		 * We're here because either:
++		 *
++		 *	total_errors == rm_first_datacol, or
++		 *	vdev_raidz_combrec() failed
++		 *
++		 * In either case, there is enough bad data to prevent
++		 * reconstruction.
++		 *
++		 * Start checksum ereports for all children which haven't
++		 * failed, and the IO wasn't speculative.
++		 */
++		zio->io_error = ECKSUM;
++
++		if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
++			for (c = 0; c < rm->rm_cols; c++) {
++				rc = &rm->rm_col[c];
++				if (rc->rc_error == 0) {
++					zio_bad_cksum_t zbc;
++					zbc.zbc_has_cksum = 0;
++					zbc.zbc_injected =
++					    rm->rm_ecksuminjected;
++
++					zfs_ereport_start_checksum(
++					    zio->io_spa,
++					    vd->vdev_child[rc->rc_devidx],
++					    zio, rc->rc_offset, rc->rc_size,
++					    (void *)(uintptr_t)c, &zbc);
++				}
++			}
++		}
++	}
++
++done:
++	zio_checksum_verified(zio);
++
++	if (zio->io_error == 0 && spa_writeable(zio->io_spa) &&
++	    (unexpected_errors || (zio->io_flags & ZIO_FLAG_RESILVER))) {
++		/*
++		 * Use the good data we have in hand to repair damaged children.
++		 */
++		for (c = 0; c < rm->rm_cols; c++) {
++			rc = &rm->rm_col[c];
++			cvd = vd->vdev_child[rc->rc_devidx];
++
++			if (rc->rc_error == 0)
++				continue;
++
++			zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
++			    rc->rc_offset, rc->rc_data, rc->rc_size,
++			    ZIO_TYPE_WRITE, zio->io_priority,
++			    ZIO_FLAG_IO_REPAIR | (unexpected_errors ?
++			    ZIO_FLAG_SELF_HEAL : 0), NULL, NULL));
++		}
++	}
++}
++
++static void
++vdev_raidz_state_change(vdev_t *vd, int faulted, int degraded)
++{
++	if (faulted > vd->vdev_nparity)
++		vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
++		    VDEV_AUX_NO_REPLICAS);
++	else if (degraded + faulted != 0)
++		vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE);
++	else
++		vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE);
++}
++
++vdev_ops_t vdev_raidz_ops = {
++	vdev_raidz_open,
++	vdev_raidz_close,
++	vdev_raidz_asize,
++	vdev_raidz_io_start,
++	vdev_raidz_io_done,
++	vdev_raidz_state_change,
++	NULL,
++	NULL,
++	VDEV_TYPE_RAIDZ,	/* name of this vdev type */
++	B_FALSE			/* not a leaf vdev */
++};
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/vdev_root.c linux-3.2.33-go/fs/zfs/zfs/vdev_root.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/vdev_root.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/vdev_root.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,125 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++/*
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/vdev_impl.h>
++#include <sys/zio.h>
++#include <sys/fs/zfs.h>
++
++/*
++ * Virtual device vector for the pool's root vdev.
++ */
++
++/*
++ * We should be able to tolerate one failure with absolutely no damage
++ * to our metadata.  Two failures will take out space maps, a bunch of
++ * indirect block trees, meta dnodes, dnodes, etc.  Probably not a happy
++ * place to live.  When we get smarter, we can liberalize this policy.
++ * e.g. If we haven't lost two consecutive top-level vdevs, then we are
++ * probably fine.  Adding bean counters during alloc/free can make this
++ * future guesswork more accurate.
++ */
++static int
++too_many_errors(vdev_t *vd, int numerrors)
++{
++	ASSERT3U(numerrors, <=, vd->vdev_children);
++	return (numerrors > 0);
++}
++
++static int
++vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
++    uint64_t *ashift)
++{
++	int lasterror = 0;
++	int numerrors = 0;
++	int c;
++
++	if (vd->vdev_children == 0) {
++		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
++		return (EINVAL);
++	}
++
++	vdev_open_children(vd);
++
++	for (c = 0; c < vd->vdev_children; c++) {
++		vdev_t *cvd = vd->vdev_child[c];
++
++		if (cvd->vdev_open_error && !cvd->vdev_islog) {
++			lasterror = cvd->vdev_open_error;
++			numerrors++;
++		}
++	}
++
++	if (too_many_errors(vd, numerrors)) {
++		vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
++		return (lasterror);
++	}
++
++	*asize = 0;
++	*max_asize = 0;
++	*ashift = 0;
++
++	return (0);
++}
++
++static void
++vdev_root_close(vdev_t *vd)
++{
++	int c;
++
++	for (c = 0; c < vd->vdev_children; c++)
++		vdev_close(vd->vdev_child[c]);
++}
++
++static void
++vdev_root_state_change(vdev_t *vd, int faulted, int degraded)
++{
++	if (too_many_errors(vd, faulted)) {
++		vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
++		    VDEV_AUX_NO_REPLICAS);
++	} else if (degraded) {
++		vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE);
++	} else {
++		vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE);
++	}
++}
++
++vdev_ops_t vdev_root_ops = {
++	vdev_root_open,
++	vdev_root_close,
++	vdev_default_asize,
++	NULL,			/* io_start - not applicable to the root */
++	NULL,			/* io_done - not applicable to the root */
++	vdev_root_state_change,
++	NULL,
++	NULL,
++	VDEV_TYPE_ROOT,		/* name of this vdev type */
++	B_FALSE			/* not a leaf vdev */
++};
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zap.c linux-3.2.33-go/fs/zfs/zfs/zap.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zap.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zap.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,1354 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/*
++ * This file contains the top half of the zfs directory structure
++ * implementation. The bottom half is in zap_leaf.c.
++ *
++ * The zdir is an extendable hash data structure. There is a table of
++ * pointers to buckets (zap_t->zd_data->zd_leafs). The buckets are
++ * each a constant size and hold a variable number of directory entries.
++ * The buckets (aka "leaf nodes") are implemented in zap_leaf.c.
++ *
++ * The pointer table holds a power of 2 number of pointers.
++ * (1<<zap_t->zd_data->zd_phys->zd_prefix_len).  The bucket pointed to
++ * by the pointer at index i in the table holds entries whose hash value
++ * has a zd_prefix_len - bit prefix
++ */
++
++#include <sys/spa.h>
++#include <sys/dmu.h>
++#include <sys/zfs_context.h>
++#include <sys/zfs_znode.h>
++#include <sys/fs/zfs.h>
++#include <sys/zap.h>
++#include <sys/refcount.h>
++#include <sys/zap_impl.h>
++#include <sys/zap_leaf.h>
++
++int fzap_default_block_shift = 14; /* 16k blocksize */
++
++static void zap_leaf_pageout(dmu_buf_t *db, void *vl);
++static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
++
++
++void
++fzap_byteswap(void *vbuf, size_t size)
++{
++	uint64_t block_type;
++
++	block_type = *(uint64_t *)vbuf;
++
++	if (block_type == ZBT_LEAF || block_type == BSWAP_64(ZBT_LEAF))
++		zap_leaf_byteswap(vbuf, size);
++	else {
++		/* it's a ptrtbl block */
++		byteswap_uint64_array(vbuf, size);
++	}
++}
++
++void
++fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
++{
++	dmu_buf_t *db;
++	zap_leaf_t *l;
++	int i;
++	zap_phys_t *zp;
++
++	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
++	zap->zap_ismicro = FALSE;
++
++	(void) dmu_buf_update_user(zap->zap_dbuf, zap, zap,
++	    &zap->zap_f.zap_phys, zap_evict);
++
++	mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
++	zap->zap_f.zap_block_shift = highbit(zap->zap_dbuf->db_size) - 1;
++
++	zp = zap->zap_f.zap_phys;
++	/*
++	 * explicitly zero it since it might be coming from an
++	 * initialized microzap
++	 */
++	bzero(zap->zap_dbuf->db_data, zap->zap_dbuf->db_size);
++	zp->zap_block_type = ZBT_HEADER;
++	zp->zap_magic = ZAP_MAGIC;
++
++	zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap);
++
++	zp->zap_freeblk = 2;		/* block 1 will be the first leaf */
++	zp->zap_num_leafs = 1;
++	zp->zap_num_entries = 0;
++	zp->zap_salt = zap->zap_salt;
++	zp->zap_normflags = zap->zap_normflags;
++	zp->zap_flags = flags;
++
++	/* block 1 will be the first leaf */
++	for (i = 0; i < (1<<zp->zap_ptrtbl.zt_shift); i++)
++		ZAP_EMBEDDED_PTRTBL_ENT(zap, i) = 1;
++
++	/*
++	 * set up block 1 - the first leaf
++	 */
++	VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
++	    1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH));
++	dmu_buf_will_dirty(db, tx);
++
++	l = kmem_zalloc(sizeof (zap_leaf_t), KM_PUSHPAGE);
++	l->l_dbuf = db;
++	l->l_phys = db->db_data;
++
++	zap_leaf_init(l, zp->zap_normflags != 0);
++
++	kmem_free(l, sizeof (zap_leaf_t));
++	dmu_buf_rele(db, FTAG);
++}
++
++static int
++zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx)
++{
++	if (RW_WRITE_HELD(&zap->zap_rwlock))
++		return (1);
++	if (rw_tryupgrade(&zap->zap_rwlock)) {
++		dmu_buf_will_dirty(zap->zap_dbuf, tx);
++		return (1);
++	}
++	return (0);
++}
++
++/*
++ * Generic routines for dealing with the pointer & cookie tables.
++ */
++
++static int
++zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
++    void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n),
++    dmu_tx_t *tx)
++{
++	uint64_t b, newblk;
++	dmu_buf_t *db_old, *db_new;
++	int err;
++	int bs = FZAP_BLOCK_SHIFT(zap);
++	int hepb = 1<<(bs-4);
++	/* hepb = half the number of entries in a block */
++
++	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
++	ASSERT(tbl->zt_blk != 0);
++	ASSERT(tbl->zt_numblks > 0);
++
++	if (tbl->zt_nextblk != 0) {
++		newblk = tbl->zt_nextblk;
++	} else {
++		newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
++		tbl->zt_nextblk = newblk;
++		ASSERT3U(tbl->zt_blks_copied, ==, 0);
++		dmu_prefetch(zap->zap_objset, zap->zap_object,
++		    tbl->zt_blk << bs, tbl->zt_numblks << bs);
++	}
++
++	/*
++	 * Copy the ptrtbl from the old to new location.
++	 */
++
++	b = tbl->zt_blks_copied;
++	err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
++	    (tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH);
++	if (err)
++		return (err);
++
++	/* first half of entries in old[b] go to new[2*b+0] */
++	VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
++	    (newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
++	dmu_buf_will_dirty(db_new, tx);
++	transfer_func(db_old->db_data, db_new->db_data, hepb);
++	dmu_buf_rele(db_new, FTAG);
++
++	/* second half of entries in old[b] go to new[2*b+1] */
++	VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
++	    (newblk + 2*b+1) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
++	dmu_buf_will_dirty(db_new, tx);
++	transfer_func((uint64_t *)db_old->db_data + hepb,
++	    db_new->db_data, hepb);
++	dmu_buf_rele(db_new, FTAG);
++
++	dmu_buf_rele(db_old, FTAG);
++
++	tbl->zt_blks_copied++;
++
++	dprintf("copied block %llu of %llu\n",
++	    tbl->zt_blks_copied, tbl->zt_numblks);
++
++	if (tbl->zt_blks_copied == tbl->zt_numblks) {
++		(void) dmu_free_range(zap->zap_objset, zap->zap_object,
++		    tbl->zt_blk << bs, tbl->zt_numblks << bs, tx);
++
++		tbl->zt_blk = newblk;
++		tbl->zt_numblks *= 2;
++		tbl->zt_shift++;
++		tbl->zt_nextblk = 0;
++		tbl->zt_blks_copied = 0;
++
++		dprintf("finished; numblocks now %llu (%lluk entries)\n",
++		    tbl->zt_numblks, 1<<(tbl->zt_shift-10));
++	}
++
++	return (0);
++}
++
++static int
++zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val,
++    dmu_tx_t *tx)
++{
++	int err;
++	uint64_t blk, off;
++	int bs = FZAP_BLOCK_SHIFT(zap);
++	dmu_buf_t *db;
++
++	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
++	ASSERT(tbl->zt_blk != 0);
++
++	dprintf("storing %llx at index %llx\n", val, idx);
++
++	blk = idx >> (bs-3);
++	off = idx & ((1<<(bs-3))-1);
++
++	err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
++	    (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
++	if (err)
++		return (err);
++	dmu_buf_will_dirty(db, tx);
++
++	if (tbl->zt_nextblk != 0) {
++		uint64_t idx2 = idx * 2;
++		uint64_t blk2 = idx2 >> (bs-3);
++		uint64_t off2 = idx2 & ((1<<(bs-3))-1);
++		dmu_buf_t *db2;
++
++		err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
++		    (tbl->zt_nextblk + blk2) << bs, FTAG, &db2,
++		    DMU_READ_NO_PREFETCH);
++		if (err) {
++			dmu_buf_rele(db, FTAG);
++			return (err);
++		}
++		dmu_buf_will_dirty(db2, tx);
++		((uint64_t *)db2->db_data)[off2] = val;
++		((uint64_t *)db2->db_data)[off2+1] = val;
++		dmu_buf_rele(db2, FTAG);
++	}
++
++	((uint64_t *)db->db_data)[off] = val;
++	dmu_buf_rele(db, FTAG);
++
++	return (0);
++}
++
++static int
++zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
++{
++	uint64_t blk, off;
++	int err;
++	dmu_buf_t *db;
++	int bs = FZAP_BLOCK_SHIFT(zap);
++
++	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
++
++	blk = idx >> (bs-3);
++	off = idx & ((1<<(bs-3))-1);
++
++	err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
++	    (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
++	if (err)
++		return (err);
++	*valp = ((uint64_t *)db->db_data)[off];
++	dmu_buf_rele(db, FTAG);
++
++	if (tbl->zt_nextblk != 0) {
++		/*
++		 * read the nextblk for the sake of i/o error checking,
++		 * so that zap_table_load() will catch errors for
++		 * zap_table_store.
++		 */
++		blk = (idx*2) >> (bs-3);
++
++		err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
++		    (tbl->zt_nextblk + blk) << bs, FTAG, &db,
++		    DMU_READ_NO_PREFETCH);
++		dmu_buf_rele(db, FTAG);
++	}
++	return (err);
++}
++
++/*
++ * Routines for growing the ptrtbl.
++ */
++
++static void
++zap_ptrtbl_transfer(const uint64_t *src, uint64_t *dst, int n)
++{
++	int i;
++	for (i = 0; i < n; i++) {
++		uint64_t lb = src[i];
++		dst[2*i+0] = lb;
++		dst[2*i+1] = lb;
++	}
++}
++
++static int
++zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx)
++{
++	/*
++	 * The pointer table should never use more hash bits than we
++	 * have (otherwise we'd be using useless zero bits to index it).
++	 * If we are within 2 bits of running out, stop growing, since
++	 * this is already an aberrant condition.
++	 */
++	if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2)
++		return (ENOSPC);
++
++	if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
++		/*
++		 * We are outgrowing the "embedded" ptrtbl (the one
++		 * stored in the header block).  Give it its own entire
++		 * block, which will double the size of the ptrtbl.
++		 */
++		uint64_t newblk;
++		dmu_buf_t *db_new;
++		int err;
++
++		ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
++		    ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
++		ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk, ==, 0);
++
++		newblk = zap_allocate_blocks(zap, 1);
++		err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
++		    newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new,
++		    DMU_READ_NO_PREFETCH);
++		if (err)
++			return (err);
++		dmu_buf_will_dirty(db_new, tx);
++		zap_ptrtbl_transfer(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
++		    db_new->db_data, 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
++		dmu_buf_rele(db_new, FTAG);
++
++		zap->zap_f.zap_phys->zap_ptrtbl.zt_blk = newblk;
++		zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks = 1;
++		zap->zap_f.zap_phys->zap_ptrtbl.zt_shift++;
++
++		ASSERT3U(1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
++		    zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks <<
++		    (FZAP_BLOCK_SHIFT(zap)-3));
++
++		return (0);
++	} else {
++		return (zap_table_grow(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
++		    zap_ptrtbl_transfer, tx));
++	}
++}
++
++static void
++zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx)
++{
++	dmu_buf_will_dirty(zap->zap_dbuf, tx);
++	mutex_enter(&zap->zap_f.zap_num_entries_mtx);
++	ASSERT(delta > 0 || zap->zap_f.zap_phys->zap_num_entries >= -delta);
++	zap->zap_f.zap_phys->zap_num_entries += delta;
++	mutex_exit(&zap->zap_f.zap_num_entries_mtx);
++}
++
++static uint64_t
++zap_allocate_blocks(zap_t *zap, int nblocks)
++{
++	uint64_t newblk;
++	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
++	newblk = zap->zap_f.zap_phys->zap_freeblk;
++	zap->zap_f.zap_phys->zap_freeblk += nblocks;
++	return (newblk);
++}
++
++static zap_leaf_t *
++zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
++{
++	void *winner;
++	zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_PUSHPAGE);
++
++	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
++
++	rw_init(&l->l_rwlock, NULL, RW_DEFAULT, NULL);
++	rw_enter(&l->l_rwlock, RW_WRITER);
++	l->l_blkid = zap_allocate_blocks(zap, 1);
++	l->l_dbuf = NULL;
++	l->l_phys = NULL;
++
++	VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
++	    l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
++	    DMU_READ_NO_PREFETCH));
++	winner = dmu_buf_set_user(l->l_dbuf, l, &l->l_phys, zap_leaf_pageout);
++	ASSERT(winner == NULL);
++	dmu_buf_will_dirty(l->l_dbuf, tx);
++
++	zap_leaf_init(l, zap->zap_normflags != 0);
++
++	zap->zap_f.zap_phys->zap_num_leafs++;
++
++	return (l);
++}
++
++int
++fzap_count(zap_t *zap, uint64_t *count)
++{
++	ASSERT(!zap->zap_ismicro);
++	mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */
++	*count = zap->zap_f.zap_phys->zap_num_entries;
++	mutex_exit(&zap->zap_f.zap_num_entries_mtx);
++	return (0);
++}
++
++/*
++ * Routines for obtaining zap_leaf_t's
++ */
++
++void
++zap_put_leaf(zap_leaf_t *l)
++{
++	rw_exit(&l->l_rwlock);
++	dmu_buf_rele(l->l_dbuf, NULL);
++}
++
++_NOTE(ARGSUSED(0))
++static void
++zap_leaf_pageout(dmu_buf_t *db, void *vl)
++{
++	zap_leaf_t *l = vl;
++
++	rw_destroy(&l->l_rwlock);
++	kmem_free(l, sizeof (zap_leaf_t));
++}
++
++static zap_leaf_t *
++zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
++{
++	zap_leaf_t *l, *winner;
++
++	ASSERT(blkid != 0);
++
++	l = kmem_alloc(sizeof (zap_leaf_t), KM_PUSHPAGE);
++	rw_init(&l->l_rwlock, NULL, RW_DEFAULT, NULL);
++	rw_enter(&l->l_rwlock, RW_WRITER);
++	l->l_blkid = blkid;
++	l->l_bs = highbit(db->db_size)-1;
++	l->l_dbuf = db;
++	l->l_phys = NULL;
++
++	winner = dmu_buf_set_user(db, l, &l->l_phys, zap_leaf_pageout);
++
++	rw_exit(&l->l_rwlock);
++	if (winner != NULL) {
++		/* someone else set it first */
++		zap_leaf_pageout(NULL, l);
++		l = winner;
++	}
++
++	/*
++	 * lhr_pad was previously used for the next leaf in the leaf
++	 * chain.  There should be no chained leafs (as we have removed
++	 * support for them).
++	 */
++	ASSERT3U(l->l_phys->l_hdr.lh_pad1, ==, 0);
++
++	/*
++	 * There should be more hash entries than there can be
++	 * chunks to put in the hash table
++	 */
++	ASSERT3U(ZAP_LEAF_HASH_NUMENTRIES(l), >, ZAP_LEAF_NUMCHUNKS(l) / 3);
++
++	/* The chunks should begin at the end of the hash table */
++	ASSERT3P(&ZAP_LEAF_CHUNK(l, 0), ==, (zap_leaf_chunk_t *)
++	    &l->l_phys->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]);
++
++	/* The chunks should end at the end of the block */
++	ASSERT3U((uintptr_t)&ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)) -
++	    (uintptr_t)l->l_phys, ==, l->l_dbuf->db_size);
++
++	return (l);
++}
++
++static int
++zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
++    zap_leaf_t **lp)
++{
++	dmu_buf_t *db;
++	zap_leaf_t *l;
++	int bs = FZAP_BLOCK_SHIFT(zap);
++	int err;
++
++	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
++
++	err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
++	    blkid << bs, NULL, &db, DMU_READ_NO_PREFETCH);
++	if (err)
++		return (err);
++
++	ASSERT3U(db->db_object, ==, zap->zap_object);
++	ASSERT3U(db->db_offset, ==, blkid << bs);
++	ASSERT3U(db->db_size, ==, 1 << bs);
++	ASSERT(blkid != 0);
++
++	l = dmu_buf_get_user(db);
++
++	if (l == NULL)
++		l = zap_open_leaf(blkid, db);
++
++	rw_enter(&l->l_rwlock, lt);
++	/*
++	 * Must lock before dirtying, otherwise l->l_phys could change,
++	 * causing ASSERT below to fail.
++	 */
++	if (lt == RW_WRITER)
++		dmu_buf_will_dirty(db, tx);
++	ASSERT3U(l->l_blkid, ==, blkid);
++	ASSERT3P(l->l_dbuf, ==, db);
++	ASSERT3P(l->l_phys, ==, l->l_dbuf->db_data);
++	ASSERT3U(l->l_phys->l_hdr.lh_block_type, ==, ZBT_LEAF);
++	ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
++
++	*lp = l;
++	return (0);
++}
++
++static int
++zap_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t *valp)
++{
++	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
++
++	if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
++		ASSERT3U(idx, <,
++		    (1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift));
++		*valp = ZAP_EMBEDDED_PTRTBL_ENT(zap, idx);
++		return (0);
++	} else {
++		return (zap_table_load(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
++		    idx, valp));
++	}
++}
++
++static int
++zap_set_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t blk, dmu_tx_t *tx)
++{
++	ASSERT(tx != NULL);
++	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
++
++	if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0) {
++		ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) = blk;
++		return (0);
++	} else {
++		return (zap_table_store(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
++		    idx, blk, tx));
++	}
++}
++
++static int
++zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt, zap_leaf_t **lp)
++{
++	uint64_t idx, blk;
++	int err;
++
++	ASSERT(zap->zap_dbuf == NULL ||
++	    zap->zap_f.zap_phys == zap->zap_dbuf->db_data);
++	ASSERT3U(zap->zap_f.zap_phys->zap_magic, ==, ZAP_MAGIC);
++	idx = ZAP_HASH_IDX(h, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
++	err = zap_idx_to_blk(zap, idx, &blk);
++	if (err != 0)
++		return (err);
++	err = zap_get_leaf_byblk(zap, blk, tx, lt, lp);
++
++	ASSERT(err || ZAP_HASH_IDX(h, (*lp)->l_phys->l_hdr.lh_prefix_len) ==
++	    (*lp)->l_phys->l_hdr.lh_prefix);
++	return (err);
++}
++
++static int
++zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx, zap_leaf_t **lp)
++{
++	zap_t *zap = zn->zn_zap;
++	uint64_t hash = zn->zn_hash;
++	zap_leaf_t *nl;
++	int prefix_diff, i, err;
++	uint64_t sibling;
++	int old_prefix_len = l->l_phys->l_hdr.lh_prefix_len;
++
++	ASSERT3U(old_prefix_len, <=, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
++	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
++
++	ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
++	    l->l_phys->l_hdr.lh_prefix);
++
++	if (zap_tryupgradedir(zap, tx) == 0 ||
++	    old_prefix_len == zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) {
++		/* We failed to upgrade, or need to grow the pointer table */
++		objset_t *os = zap->zap_objset;
++		uint64_t object = zap->zap_object;
++
++		zap_put_leaf(l);
++		zap_unlockdir(zap);
++		err = zap_lockdir(os, object, tx, RW_WRITER,
++		    FALSE, FALSE, &zn->zn_zap);
++		zap = zn->zn_zap;
++		if (err)
++			return (err);
++		ASSERT(!zap->zap_ismicro);
++
++		while (old_prefix_len ==
++		    zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) {
++			err = zap_grow_ptrtbl(zap, tx);
++			if (err)
++				return (err);
++		}
++
++		err = zap_deref_leaf(zap, hash, tx, RW_WRITER, &l);
++		if (err)
++			return (err);
++
++		if (l->l_phys->l_hdr.lh_prefix_len != old_prefix_len) {
++			/* it split while our locks were down */
++			*lp = l;
++			return (0);
++		}
++	}
++	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
++	ASSERT3U(old_prefix_len, <, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
++	ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
++	    l->l_phys->l_hdr.lh_prefix);
++
++	prefix_diff = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift -
++	    (old_prefix_len + 1);
++	sibling = (ZAP_HASH_IDX(hash, old_prefix_len + 1) | 1) << prefix_diff;
++
++	/* check for i/o errors before doing zap_leaf_split */
++	for (i = 0; i < (1ULL<<prefix_diff); i++) {
++		uint64_t blk;
++		err = zap_idx_to_blk(zap, sibling+i, &blk);
++		if (err)
++			return (err);
++		ASSERT3U(blk, ==, l->l_blkid);
++	}
++
++	nl = zap_create_leaf(zap, tx);
++	zap_leaf_split(l, nl, zap->zap_normflags != 0);
++
++	/* set sibling pointers */
++	for (i = 0; i < (1ULL<<prefix_diff); i++) {
++		err = zap_set_idx_to_blk(zap, sibling+i, nl->l_blkid, tx);
++		ASSERT3U(err, ==, 0); /* we checked for i/o errors above */
++	}
++
++	if (hash & (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len))) {
++		/* we want the sibling */
++		zap_put_leaf(l);
++		*lp = nl;
++	} else {
++		zap_put_leaf(nl);
++		*lp = l;
++	}
++
++	return (0);
++}
++
++static void
++zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
++{
++	zap_t *zap = zn->zn_zap;
++	int shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
++	int leaffull = (l->l_phys->l_hdr.lh_prefix_len == shift &&
++	    l->l_phys->l_hdr.lh_nfree < ZAP_LEAF_LOW_WATER);
++
++	zap_put_leaf(l);
++
++	if (leaffull || zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk) {
++		int err;
++
++		/*
++		 * We are in the middle of growing the pointer table, or
++		 * this leaf will soon make us grow it.
++		 */
++		if (zap_tryupgradedir(zap, tx) == 0) {
++			objset_t *os = zap->zap_objset;
++			uint64_t zapobj = zap->zap_object;
++
++			zap_unlockdir(zap);
++			err = zap_lockdir(os, zapobj, tx,
++			    RW_WRITER, FALSE, FALSE, &zn->zn_zap);
++			zap = zn->zn_zap;
++			if (err)
++				return;
++		}
++
++		/* could have finished growing while our locks were down */
++		if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift == shift)
++			(void) zap_grow_ptrtbl(zap, tx);
++	}
++}
++
++static int
++fzap_checkname(zap_name_t *zn)
++{
++	if (zn->zn_key_orig_numints * zn->zn_key_intlen > ZAP_MAXNAMELEN)
++		return (ENAMETOOLONG);
++	return (0);
++}
++
++static int
++fzap_checksize(uint64_t integer_size, uint64_t num_integers)
++{
++	/* Only integer sizes supported by C */
++	switch (integer_size) {
++	case 1:
++	case 2:
++	case 4:
++	case 8:
++		break;
++	default:
++		return (EINVAL);
++	}
++
++	if (integer_size * num_integers > ZAP_MAXVALUELEN)
++		return (E2BIG);
++
++	return (0);
++}
++
++static int
++fzap_check(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers)
++{
++	int err;
++
++	if ((err = fzap_checkname(zn)) != 0)
++		return (err);
++	return (fzap_checksize(integer_size, num_integers));
++}
++
++/*
++ * Routines for manipulating attributes.
++ */
++int
++fzap_lookup(zap_name_t *zn,
++    uint64_t integer_size, uint64_t num_integers, void *buf,
++    char *realname, int rn_len, boolean_t *ncp)
++{
++	zap_leaf_t *l;
++	int err;
++	zap_entry_handle_t zeh;
++
++	if ((err = fzap_checkname(zn)) != 0)
++		return (err);
++
++	err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, NULL, RW_READER, &l);
++	if (err != 0)
++		return (err);
++	err = zap_leaf_lookup(l, zn, &zeh);
++	if (err == 0) {
++		if ((err = fzap_checksize(integer_size, num_integers)) != 0) {
++			zap_put_leaf(l);
++			return (err);
++		}
++
++		err = zap_entry_read(&zeh, integer_size, num_integers, buf);
++		(void) zap_entry_read_name(zn->zn_zap, &zeh, rn_len, realname);
++		if (ncp) {
++			*ncp = zap_entry_normalization_conflict(&zeh,
++			    zn, NULL, zn->zn_zap);
++		}
++	}
++
++	zap_put_leaf(l);
++	return (err);
++}
++
++int
++fzap_add_cd(zap_name_t *zn,
++    uint64_t integer_size, uint64_t num_integers,
++    const void *val, uint32_t cd, dmu_tx_t *tx)
++{
++	zap_leaf_t *l;
++	int err;
++	zap_entry_handle_t zeh;
++	zap_t *zap = zn->zn_zap;
++
++	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
++	ASSERT(!zap->zap_ismicro);
++	ASSERT(fzap_check(zn, integer_size, num_integers) == 0);
++
++	err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l);
++	if (err != 0)
++		return (err);
++retry:
++	err = zap_leaf_lookup(l, zn, &zeh);
++	if (err == 0) {
++		err = EEXIST;
++		goto out;
++	}
++	if (err != ENOENT)
++		goto out;
++
++	err = zap_entry_create(l, zn, cd,
++	    integer_size, num_integers, val, &zeh);
++
++	if (err == 0) {
++		zap_increment_num_entries(zap, 1, tx);
++	} else if (err == EAGAIN) {
++		err = zap_expand_leaf(zn, l, tx, &l);
++		zap = zn->zn_zap;	/* zap_expand_leaf() may change zap */
++		if (err == 0)
++			goto retry;
++	}
++
++out:
++	if (zap != NULL)
++		zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx);
++	return (err);
++}
++
++int
++fzap_add(zap_name_t *zn,
++    uint64_t integer_size, uint64_t num_integers,
++    const void *val, dmu_tx_t *tx)
++{
++	int err = fzap_check(zn, integer_size, num_integers);
++	if (err != 0)
++		return (err);
++
++	return (fzap_add_cd(zn, integer_size, num_integers,
++	    val, ZAP_NEED_CD, tx));
++}
++
++int
++fzap_update(zap_name_t *zn,
++    int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
++{
++	zap_leaf_t *l;
++	int err, create;
++	zap_entry_handle_t zeh;
++	zap_t *zap = zn->zn_zap;
++
++	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
++	err = fzap_check(zn, integer_size, num_integers);
++	if (err != 0)
++		return (err);
++
++	err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l);
++	if (err != 0)
++		return (err);
++retry:
++	err = zap_leaf_lookup(l, zn, &zeh);
++	create = (err == ENOENT);
++	ASSERT(err == 0 || err == ENOENT);
++
++	if (create) {
++		err = zap_entry_create(l, zn, ZAP_NEED_CD,
++		    integer_size, num_integers, val, &zeh);
++		if (err == 0)
++			zap_increment_num_entries(zap, 1, tx);
++	} else {
++		err = zap_entry_update(&zeh, integer_size, num_integers, val);
++	}
++
++	if (err == EAGAIN) {
++		err = zap_expand_leaf(zn, l, tx, &l);
++		zap = zn->zn_zap;	/* zap_expand_leaf() may change zap */
++		if (err == 0)
++			goto retry;
++	}
++
++	if (zap != NULL)
++		zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx);
++	return (err);
++}
++
++int
++fzap_length(zap_name_t *zn,
++    uint64_t *integer_size, uint64_t *num_integers)
++{
++	zap_leaf_t *l;
++	int err;
++	zap_entry_handle_t zeh;
++
++	err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, NULL, RW_READER, &l);
++	if (err != 0)
++		return (err);
++	err = zap_leaf_lookup(l, zn, &zeh);
++	if (err != 0)
++		goto out;
++
++	if (integer_size)
++		*integer_size = zeh.zeh_integer_size;
++	if (num_integers)
++		*num_integers = zeh.zeh_num_integers;
++out:
++	zap_put_leaf(l);
++	return (err);
++}
++
++int
++fzap_remove(zap_name_t *zn, dmu_tx_t *tx)
++{
++	zap_leaf_t *l;
++	int err;
++	zap_entry_handle_t zeh;
++
++	err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, tx, RW_WRITER, &l);
++	if (err != 0)
++		return (err);
++	err = zap_leaf_lookup(l, zn, &zeh);
++	if (err == 0) {
++		zap_entry_remove(&zeh);
++		zap_increment_num_entries(zn->zn_zap, -1, tx);
++	}
++	zap_put_leaf(l);
++	return (err);
++}
++
++void
++fzap_prefetch(zap_name_t *zn)
++{
++	uint64_t idx, blk;
++	zap_t *zap = zn->zn_zap;
++	int bs;
++
++	idx = ZAP_HASH_IDX(zn->zn_hash,
++	    zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
++	if (zap_idx_to_blk(zap, idx, &blk) != 0)
++		return;
++	bs = FZAP_BLOCK_SHIFT(zap);
++	dmu_prefetch(zap->zap_objset, zap->zap_object, blk << bs, 1 << bs);
++}
++
++/*
++ * Helper functions for consumers.
++ */
++
++int
++zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask,
++    char *name)
++{
++	zap_cursor_t zc;
++	zap_attribute_t *za;
++	int err;
++
++	if (mask == 0)
++		mask = -1ULL;
++
++	za = kmem_alloc(sizeof (zap_attribute_t), KM_PUSHPAGE);
++	for (zap_cursor_init(&zc, os, zapobj);
++	    (err = zap_cursor_retrieve(&zc, za)) == 0;
++	    zap_cursor_advance(&zc)) {
++		if ((za->za_first_integer & mask) == (value & mask)) {
++			(void) strcpy(name, za->za_name);
++			break;
++		}
++	}
++	zap_cursor_fini(&zc);
++	kmem_free(za, sizeof (zap_attribute_t));
++	return (err);
++}
++
++int
++zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx)
++{
++	zap_cursor_t zc;
++	zap_attribute_t za;
++	int err;
++
++	for (zap_cursor_init(&zc, os, fromobj);
++	    zap_cursor_retrieve(&zc, &za) == 0;
++	    (void) zap_cursor_advance(&zc)) {
++		if (za.za_integer_length != 8 || za.za_num_integers != 1)
++			return (EINVAL);
++		err = zap_add(os, intoobj, za.za_name,
++		    8, 1, &za.za_first_integer, tx);
++		if (err)
++			return (err);
++	}
++	zap_cursor_fini(&zc);
++	return (0);
++}
++
++int
++zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj,
++    uint64_t value, dmu_tx_t *tx)
++{
++	zap_cursor_t zc;
++	zap_attribute_t za;
++	int err;
++
++	for (zap_cursor_init(&zc, os, fromobj);
++	    zap_cursor_retrieve(&zc, &za) == 0;
++	    (void) zap_cursor_advance(&zc)) {
++		if (za.za_integer_length != 8 || za.za_num_integers != 1)
++			return (EINVAL);
++		err = zap_add(os, intoobj, za.za_name,
++		    8, 1, &value, tx);
++		if (err)
++			return (err);
++	}
++	zap_cursor_fini(&zc);
++	return (0);
++}
++
++int
++zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,
++    dmu_tx_t *tx)
++{
++	zap_cursor_t zc;
++	zap_attribute_t za;
++	int err;
++
++	for (zap_cursor_init(&zc, os, fromobj);
++	    zap_cursor_retrieve(&zc, &za) == 0;
++	    (void) zap_cursor_advance(&zc)) {
++		uint64_t delta = 0;
++
++		if (za.za_integer_length != 8 || za.za_num_integers != 1)
++			return (EINVAL);
++
++		err = zap_lookup(os, intoobj, za.za_name, 8, 1, &delta);
++		if (err != 0 && err != ENOENT)
++			return (err);
++		delta += za.za_first_integer;
++		err = zap_update(os, intoobj, za.za_name, 8, 1, &delta, tx);
++		if (err)
++			return (err);
++	}
++	zap_cursor_fini(&zc);
++	return (0);
++}
++
++int
++zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx)
++{
++	char name[20];
++
++	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)value);
++	return (zap_add(os, obj, name, 8, 1, &value, tx));
++}
++
++int
++zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx)
++{
++	char name[20];
++
++	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)value);
++	return (zap_remove(os, obj, name, tx));
++}
++
++int
++zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value)
++{
++	char name[20];
++
++	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)value);
++	return (zap_lookup(os, obj, name, 8, 1, &value));
++}
++
++int
++zap_add_int_key(objset_t *os, uint64_t obj,
++    uint64_t key, uint64_t value, dmu_tx_t *tx)
++{
++	char name[20];
++
++	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
++	return (zap_add(os, obj, name, 8, 1, &value, tx));
++}
++
++int
++zap_lookup_int_key(objset_t *os, uint64_t obj, uint64_t key, uint64_t *valuep)
++{
++	char name[20];
++
++	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
++	return (zap_lookup(os, obj, name, 8, 1, valuep));
++}
++
++int
++zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
++    dmu_tx_t *tx)
++{
++	uint64_t value = 0;
++	int err;
++
++	if (delta == 0)
++		return (0);
++
++	err = zap_lookup(os, obj, name, 8, 1, &value);
++	if (err != 0 && err != ENOENT)
++		return (err);
++	value += delta;
++	if (value == 0)
++		err = zap_remove(os, obj, name, tx);
++	else
++		err = zap_update(os, obj, name, 8, 1, &value, tx);
++	return (err);
++}
++
++int
++zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
++    dmu_tx_t *tx)
++{
++	char name[20];
++
++	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
++	return (zap_increment(os, obj, name, delta, tx));
++}
++
++/*
++ * Routines for iterating over the attributes.
++ */
++
++int
++fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
++{
++	int err = ENOENT;
++	zap_entry_handle_t zeh;
++	zap_leaf_t *l;
++
++	/* retrieve the next entry at or after zc_hash/zc_cd */
++	/* if no entry, return ENOENT */
++
++	if (zc->zc_leaf &&
++	    (ZAP_HASH_IDX(zc->zc_hash,
++	    zc->zc_leaf->l_phys->l_hdr.lh_prefix_len) !=
++	    zc->zc_leaf->l_phys->l_hdr.lh_prefix)) {
++		rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
++		zap_put_leaf(zc->zc_leaf);
++		zc->zc_leaf = NULL;
++	}
++
++again:
++	if (zc->zc_leaf == NULL) {
++		err = zap_deref_leaf(zap, zc->zc_hash, NULL, RW_READER,
++		    &zc->zc_leaf);
++		if (err != 0)
++			return (err);
++	} else {
++		rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
++	}
++	l = zc->zc_leaf;
++
++	err = zap_leaf_lookup_closest(l, zc->zc_hash, zc->zc_cd, &zeh);
++
++	if (err == ENOENT) {
++		uint64_t nocare =
++		    (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len)) - 1;
++		zc->zc_hash = (zc->zc_hash & ~nocare) + nocare + 1;
++		zc->zc_cd = 0;
++		if (l->l_phys->l_hdr.lh_prefix_len == 0 || zc->zc_hash == 0) {
++			zc->zc_hash = -1ULL;
++		} else {
++			zap_put_leaf(zc->zc_leaf);
++			zc->zc_leaf = NULL;
++			goto again;
++		}
++	}
++
++	if (err == 0) {
++		zc->zc_hash = zeh.zeh_hash;
++		zc->zc_cd = zeh.zeh_cd;
++		za->za_integer_length = zeh.zeh_integer_size;
++		za->za_num_integers = zeh.zeh_num_integers;
++		if (zeh.zeh_num_integers == 0) {
++			za->za_first_integer = 0;
++		} else {
++			err = zap_entry_read(&zeh, 8, 1, &za->za_first_integer);
++			ASSERT(err == 0 || err == EOVERFLOW);
++		}
++		err = zap_entry_read_name(zap, &zeh,
++		    sizeof (za->za_name), za->za_name);
++		ASSERT(err == 0);
++
++		za->za_normalization_conflict =
++		    zap_entry_normalization_conflict(&zeh,
++		    NULL, za->za_name, zap);
++	}
++	rw_exit(&zc->zc_leaf->l_rwlock);
++	return (err);
++}
++
++static void
++zap_stats_ptrtbl(zap_t *zap, uint64_t *tbl, int len, zap_stats_t *zs)
++{
++	int i, err;
++	uint64_t lastblk = 0;
++
++	/*
++	 * NB: if a leaf has more pointers than an entire ptrtbl block
++	 * can hold, then it'll be accounted for more than once, since
++	 * we won't have lastblk.
++	 */
++	for (i = 0; i < len; i++) {
++		zap_leaf_t *l;
++
++		if (tbl[i] == lastblk)
++			continue;
++		lastblk = tbl[i];
++
++		err = zap_get_leaf_byblk(zap, tbl[i], NULL, RW_READER, &l);
++		if (err == 0) {
++			zap_leaf_stats(zap, l, zs);
++			zap_put_leaf(l);
++		}
++	}
++}
++
++int
++fzap_cursor_move_to_key(zap_cursor_t *zc, zap_name_t *zn)
++{
++	int err;
++	zap_leaf_t *l;
++	zap_entry_handle_t zeh;
++
++	if (zn->zn_key_orig_numints * zn->zn_key_intlen > ZAP_MAXNAMELEN)
++		return (ENAMETOOLONG);
++
++	err = zap_deref_leaf(zc->zc_zap, zn->zn_hash, NULL, RW_READER, &l);
++	if (err != 0)
++		return (err);
++
++	err = zap_leaf_lookup(l, zn, &zeh);
++	if (err != 0)
++		return (err);
++
++	zc->zc_leaf = l;
++	zc->zc_hash = zeh.zeh_hash;
++	zc->zc_cd = zeh.zeh_cd;
++
++	return (err);
++}
++
++void
++fzap_get_stats(zap_t *zap, zap_stats_t *zs)
++{
++	int bs = FZAP_BLOCK_SHIFT(zap);
++	zs->zs_blocksize = 1ULL << bs;
++
++	/*
++	 * Set zap_phys_t fields
++	 */
++	zs->zs_num_leafs = zap->zap_f.zap_phys->zap_num_leafs;
++	zs->zs_num_entries = zap->zap_f.zap_phys->zap_num_entries;
++	zs->zs_num_blocks = zap->zap_f.zap_phys->zap_freeblk;
++	zs->zs_block_type = zap->zap_f.zap_phys->zap_block_type;
++	zs->zs_magic = zap->zap_f.zap_phys->zap_magic;
++	zs->zs_salt = zap->zap_f.zap_phys->zap_salt;
++
++	/*
++	 * Set zap_ptrtbl fields
++	 */
++	zs->zs_ptrtbl_len = 1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
++	zs->zs_ptrtbl_nextblk = zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk;
++	zs->zs_ptrtbl_blks_copied =
++	    zap->zap_f.zap_phys->zap_ptrtbl.zt_blks_copied;
++	zs->zs_ptrtbl_zt_blk = zap->zap_f.zap_phys->zap_ptrtbl.zt_blk;
++	zs->zs_ptrtbl_zt_numblks = zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks;
++	zs->zs_ptrtbl_zt_shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
++
++	if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
++		/* the ptrtbl is entirely in the header block. */
++		zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
++		    1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs);
++	} else {
++		int b;
++
++		dmu_prefetch(zap->zap_objset, zap->zap_object,
++		    zap->zap_f.zap_phys->zap_ptrtbl.zt_blk << bs,
++		    zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << bs);
++
++		for (b = 0; b < zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks;
++		    b++) {
++			dmu_buf_t *db;
++			int err;
++
++			err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
++			    (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk + b) << bs,
++			    FTAG, &db, DMU_READ_NO_PREFETCH);
++			if (err == 0) {
++				zap_stats_ptrtbl(zap, db->db_data,
++				    1<<(bs-3), zs);
++				dmu_buf_rele(db, FTAG);
++			}
++		}
++	}
++}
++
++int
++fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
++    uint64_t *tooverwrite)
++{
++	zap_t *zap = zn->zn_zap;
++	zap_leaf_t *l;
++	int err;
++
++	/*
++	 * Account for the header block of the fatzap.
++	 */
++	if (!add && dmu_buf_freeable(zap->zap_dbuf)) {
++		*tooverwrite += zap->zap_dbuf->db_size;
++	} else {
++		*towrite += zap->zap_dbuf->db_size;
++	}
++
++	/*
++	 * Account for the pointer table blocks.
++	 * If we are adding we need to account for the following cases :
++	 * - If the pointer table is embedded, this operation could force an
++	 *   external pointer table.
++	 * - If this already has an external pointer table this operation
++	 *   could extend the table.
++	 */
++	if (add) {
++		if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0)
++			*towrite += zap->zap_dbuf->db_size;
++		else
++			*towrite += (zap->zap_dbuf->db_size * 3);
++	}
++
++	/*
++	 * Now, check if the block containing leaf is freeable
++	 * and account accordingly.
++	 */
++	err = zap_deref_leaf(zap, zn->zn_hash, NULL, RW_READER, &l);
++	if (err != 0) {
++		return (err);
++	}
++
++	if (!add && dmu_buf_freeable(l->l_dbuf)) {
++		*tooverwrite += l->l_dbuf->db_size;
++	} else {
++		/*
++		 * If this an add operation, the leaf block could split.
++		 * Hence, we need to account for an additional leaf block.
++		 */
++		*towrite += (add ? 2 : 1) * l->l_dbuf->db_size;
++	}
++
++	zap_put_leaf(l);
++	return (0);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zap_leaf.c linux-3.2.33-go/fs/zfs/zfs/zap_leaf.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zap_leaf.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zap_leaf.c	2012-11-16 23:25:34.352039300 +0100
+@@ -0,0 +1,872 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/*
++ * The 512-byte leaf is broken into 32 16-byte chunks.
++ * chunk number n means l_chunk[n], even though the header precedes it.
++ * the names are stored null-terminated.
++ */
++
++#include <sys/zio.h>
++#include <sys/spa.h>
++#include <sys/dmu.h>
++#include <sys/zfs_context.h>
++#include <sys/fs/zfs.h>
++#include <sys/zap.h>
++#include <sys/zap_impl.h>
++#include <sys/zap_leaf.h>
++#include <sys/arc.h>
++
++static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
++
++#define	CHAIN_END 0xffff /* end of the chunk chain */
++
++/* half the (current) minimum block size */
++#define	MAX_ARRAY_BYTES (8<<10)
++
++#define	LEAF_HASH(l, h) \
++	((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \
++	((h) >> (64 - ZAP_LEAF_HASH_SHIFT(l)-(l)->l_phys->l_hdr.lh_prefix_len)))
++
++#define	LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)])
++
++
++static void
++zap_memset(void *a, int c, size_t n)
++{
++	char *cp = a;
++	char *cpend = cp + n;
++
++	while (cp < cpend)
++		*cp++ = c;
++}
++
++static void
++stv(int len, void *addr, uint64_t value)
++{
++	switch (len) {
++	case 1:
++		*(uint8_t *)addr = value;
++		return;
++	case 2:
++		*(uint16_t *)addr = value;
++		return;
++	case 4:
++		*(uint32_t *)addr = value;
++		return;
++	case 8:
++		*(uint64_t *)addr = value;
++		return;
++	}
++	ASSERT(!"bad int len");
++}
++
++static uint64_t
++ldv(int len, const void *addr)
++{
++	switch (len) {
++	case 1:
++		return (*(uint8_t *)addr);
++	case 2:
++		return (*(uint16_t *)addr);
++	case 4:
++		return (*(uint32_t *)addr);
++	case 8:
++		return (*(uint64_t *)addr);
++	}
++	ASSERT(!"bad int len");
++	return (0xFEEDFACEDEADBEEFULL);
++}
++
++void
++zap_leaf_byteswap(zap_leaf_phys_t *buf, int size)
++{
++	int i;
++	zap_leaf_t l;
++	l.l_bs = highbit(size)-1;
++	l.l_phys = buf;
++
++	buf->l_hdr.lh_block_type = 	BSWAP_64(buf->l_hdr.lh_block_type);
++	buf->l_hdr.lh_prefix = 		BSWAP_64(buf->l_hdr.lh_prefix);
++	buf->l_hdr.lh_magic = 		BSWAP_32(buf->l_hdr.lh_magic);
++	buf->l_hdr.lh_nfree = 		BSWAP_16(buf->l_hdr.lh_nfree);
++	buf->l_hdr.lh_nentries = 	BSWAP_16(buf->l_hdr.lh_nentries);
++	buf->l_hdr.lh_prefix_len = 	BSWAP_16(buf->l_hdr.lh_prefix_len);
++	buf->l_hdr.lh_freelist = 	BSWAP_16(buf->l_hdr.lh_freelist);
++
++	for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++)
++		buf->l_hash[i] = BSWAP_16(buf->l_hash[i]);
++
++	for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
++		zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i);
++		struct zap_leaf_entry *le;
++
++		switch (lc->l_free.lf_type) {
++		case ZAP_CHUNK_ENTRY:
++			le = &lc->l_entry;
++
++			le->le_type =		BSWAP_8(le->le_type);
++			le->le_value_intlen =	BSWAP_8(le->le_value_intlen);
++			le->le_next =		BSWAP_16(le->le_next);
++			le->le_name_chunk =	BSWAP_16(le->le_name_chunk);
++			le->le_name_numints =	BSWAP_16(le->le_name_numints);
++			le->le_value_chunk =	BSWAP_16(le->le_value_chunk);
++			le->le_value_numints =	BSWAP_16(le->le_value_numints);
++			le->le_cd =		BSWAP_32(le->le_cd);
++			le->le_hash =		BSWAP_64(le->le_hash);
++			break;
++		case ZAP_CHUNK_FREE:
++			lc->l_free.lf_type =	BSWAP_8(lc->l_free.lf_type);
++			lc->l_free.lf_next =	BSWAP_16(lc->l_free.lf_next);
++			break;
++		case ZAP_CHUNK_ARRAY:
++			lc->l_array.la_type =	BSWAP_8(lc->l_array.la_type);
++			lc->l_array.la_next =	BSWAP_16(lc->l_array.la_next);
++			/* la_array doesn't need swapping */
++			break;
++		default:
++			ASSERT(!"bad leaf type");
++		}
++	}
++}
++
++void
++zap_leaf_init(zap_leaf_t *l, boolean_t sort)
++{
++	int i;
++
++	l->l_bs = highbit(l->l_dbuf->db_size)-1;
++	zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header));
++	zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
++	for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
++		ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE;
++		ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1;
++	}
++	ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END;
++	l->l_phys->l_hdr.lh_block_type = ZBT_LEAF;
++	l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
++	l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
++	if (sort)
++		l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
++}
++
++/*
++ * Routines which manipulate leaf chunks (l_chunk[]).
++ */
++
++static uint16_t
++zap_leaf_chunk_alloc(zap_leaf_t *l)
++{
++	int chunk;
++
++	ASSERT(l->l_phys->l_hdr.lh_nfree > 0);
++
++	chunk = l->l_phys->l_hdr.lh_freelist;
++	ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
++	ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE);
++
++	l->l_phys->l_hdr.lh_freelist = ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next;
++
++	l->l_phys->l_hdr.lh_nfree--;
++
++	return (chunk);
++}
++
++static void
++zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk)
++{
++	struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free;
++	ASSERT3U(l->l_phys->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l));
++	ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
++	ASSERT(zlf->lf_type != ZAP_CHUNK_FREE);
++
++	zlf->lf_type = ZAP_CHUNK_FREE;
++	zlf->lf_next = l->l_phys->l_hdr.lh_freelist;
++	bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */
++	l->l_phys->l_hdr.lh_freelist = chunk;
++
++	l->l_phys->l_hdr.lh_nfree++;
++}
++
++/*
++ * Routines which manipulate leaf arrays (zap_leaf_array type chunks).
++ */
++
++static uint16_t
++zap_leaf_array_create(zap_leaf_t *l, const char *buf,
++    int integer_size, int num_integers)
++{
++	uint16_t chunk_head;
++	uint16_t *chunkp = &chunk_head;
++	int byten = 0;
++	uint64_t value = 0;
++	int shift = (integer_size-1)*8;
++	int len = num_integers;
++
++	ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES);
++
++	while (len > 0) {
++		uint16_t chunk = zap_leaf_chunk_alloc(l);
++		struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
++		int i;
++
++		la->la_type = ZAP_CHUNK_ARRAY;
++		for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) {
++			if (byten == 0)
++				value = ldv(integer_size, buf);
++			la->la_array[i] = value >> shift;
++			value <<= 8;
++			if (++byten == integer_size) {
++				byten = 0;
++				buf += integer_size;
++				if (--len == 0)
++					break;
++			}
++		}
++
++		*chunkp = chunk;
++		chunkp = &la->la_next;
++	}
++	*chunkp = CHAIN_END;
++
++	return (chunk_head);
++}
++
++static void
++zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
++{
++	uint16_t chunk = *chunkp;
++
++	*chunkp = CHAIN_END;
++
++	while (chunk != CHAIN_END) {
++		int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
++		ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
++		    ZAP_CHUNK_ARRAY);
++		zap_leaf_chunk_free(l, chunk);
++		chunk = nextchunk;
++	}
++}
++
++/* array_len and buf_len are in integers, not bytes */
++static void
++zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk,
++    int array_int_len, int array_len, int buf_int_len, uint64_t buf_len,
++    void *buf)
++{
++	int len = MIN(array_len, buf_len);
++	int byten = 0;
++	uint64_t value = 0;
++	char *p = buf;
++
++	ASSERT3U(array_int_len, <=, buf_int_len);
++
++	/* Fast path for one 8-byte integer */
++	if (array_int_len == 8 && buf_int_len == 8 && len == 1) {
++		struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
++		uint8_t *ip = la->la_array;
++		uint64_t *buf64 = buf;
++
++		*buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
++		    (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
++		    (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
++		    (uint64_t)ip[6] << 8 | (uint64_t)ip[7];
++		return;
++	}
++
++	/* Fast path for an array of 1-byte integers (eg. the entry name) */
++	if (array_int_len == 1 && buf_int_len == 1 &&
++	    buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) {
++		while (chunk != CHAIN_END) {
++			struct zap_leaf_array *la =
++			    &ZAP_LEAF_CHUNK(l, chunk).l_array;
++			bcopy(la->la_array, p, ZAP_LEAF_ARRAY_BYTES);
++			p += ZAP_LEAF_ARRAY_BYTES;
++			chunk = la->la_next;
++		}
++		return;
++	}
++
++	while (len > 0) {
++		struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
++		int i;
++
++		ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
++		for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) {
++			value = (value << 8) | la->la_array[i];
++			byten++;
++			if (byten == array_int_len) {
++				stv(buf_int_len, p, value);
++				byten = 0;
++				len--;
++				if (len == 0)
++					return;
++				p += buf_int_len;
++			}
++		}
++		chunk = la->la_next;
++	}
++}
++
++static boolean_t
++zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn,
++    int chunk, int array_numints)
++{
++	int bseen = 0;
++
++	if (zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY) {
++		uint64_t *thiskey;
++		boolean_t match;
++
++		ASSERT(zn->zn_key_intlen == sizeof (*thiskey));
++		thiskey = kmem_alloc(array_numints * sizeof (*thiskey),
++		    KM_PUSHPAGE);
++
++		zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints,
++		    sizeof (*thiskey), array_numints, thiskey);
++		match = bcmp(thiskey, zn->zn_key_orig,
++		    array_numints * sizeof (*thiskey)) == 0;
++		kmem_free(thiskey, array_numints * sizeof (*thiskey));
++		return (match);
++	}
++
++	ASSERT(zn->zn_key_intlen == 1);
++	if (zn->zn_matchtype == MT_FIRST) {
++		char *thisname = kmem_alloc(array_numints, KM_PUSHPAGE);
++		boolean_t match;
++
++		zap_leaf_array_read(l, chunk, sizeof (char), array_numints,
++		    sizeof (char), array_numints, thisname);
++		match = zap_match(zn, thisname);
++		kmem_free(thisname, array_numints);
++		return (match);
++	}
++
++	/*
++	 * Fast path for exact matching.
++	 * First check that the lengths match, so that we don't read
++	 * past the end of the zn_key_orig array.
++	 */
++	if (array_numints != zn->zn_key_orig_numints)
++		return (B_FALSE);
++	while (bseen < array_numints) {
++		struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
++		int toread = MIN(array_numints - bseen, ZAP_LEAF_ARRAY_BYTES);
++		ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
++		if (bcmp(la->la_array, (char *)zn->zn_key_orig + bseen, toread))
++			break;
++		chunk = la->la_next;
++		bseen += toread;
++	}
++	return (bseen == array_numints);
++}
++
++/*
++ * Routines which manipulate leaf entries.
++ */
++
++int
++zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh)
++{
++	uint16_t *chunkp;
++	struct zap_leaf_entry *le;
++
++	ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
++
++again:
++	for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash);
++	    *chunkp != CHAIN_END; chunkp = &le->le_next) {
++		uint16_t chunk = *chunkp;
++		le = ZAP_LEAF_ENTRY(l, chunk);
++
++		ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
++		ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
++
++		if (le->le_hash != zn->zn_hash)
++			continue;
++
++		/*
++		 * NB: the entry chain is always sorted by cd on
++		 * normalized zap objects, so this will find the
++		 * lowest-cd match for MT_FIRST.
++		 */
++		ASSERT(zn->zn_matchtype == MT_EXACT ||
++		    (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED));
++		if (zap_leaf_array_match(l, zn, le->le_name_chunk,
++		    le->le_name_numints)) {
++			zeh->zeh_num_integers = le->le_value_numints;
++			zeh->zeh_integer_size = le->le_value_intlen;
++			zeh->zeh_cd = le->le_cd;
++			zeh->zeh_hash = le->le_hash;
++			zeh->zeh_chunkp = chunkp;
++			zeh->zeh_leaf = l;
++			return (0);
++		}
++	}
++
++	/*
++	 * NB: we could of course do this in one pass, but that would be
++	 * a pain.  We'll see if MT_BEST is even used much.
++	 */
++	if (zn->zn_matchtype == MT_BEST) {
++		zn->zn_matchtype = MT_FIRST;
++		goto again;
++	}
++
++	return (ENOENT);
++}
++
++/* Return (h1,cd1 >= h2,cd2) */
++#define	HCD_GTEQ(h1, cd1, h2, cd2) \
++	((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE))
++
++int
++zap_leaf_lookup_closest(zap_leaf_t *l,
++    uint64_t h, uint32_t cd, zap_entry_handle_t *zeh)
++{
++	uint16_t chunk;
++	uint64_t besth = -1ULL;
++	uint32_t bestcd = -1U;
++	uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1;
++	uint16_t lh;
++	struct zap_leaf_entry *le;
++
++	ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
++
++	for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) {
++		for (chunk = l->l_phys->l_hash[lh];
++		    chunk != CHAIN_END; chunk = le->le_next) {
++			le = ZAP_LEAF_ENTRY(l, chunk);
++
++			ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
++			ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
++
++			if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) &&
++			    HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) {
++				ASSERT3U(bestlh, >=, lh);
++				bestlh = lh;
++				besth = le->le_hash;
++				bestcd = le->le_cd;
++
++				zeh->zeh_num_integers = le->le_value_numints;
++				zeh->zeh_integer_size = le->le_value_intlen;
++				zeh->zeh_cd = le->le_cd;
++				zeh->zeh_hash = le->le_hash;
++				zeh->zeh_fakechunk = chunk;
++				zeh->zeh_chunkp = &zeh->zeh_fakechunk;
++				zeh->zeh_leaf = l;
++			}
++		}
++	}
++
++	return (bestcd == -1U ? ENOENT : 0);
++}
++
++int
++zap_entry_read(const zap_entry_handle_t *zeh,
++    uint8_t integer_size, uint64_t num_integers, void *buf)
++{
++	struct zap_leaf_entry *le =
++	    ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
++	ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
++
++	if (le->le_value_intlen > integer_size)
++		return (EINVAL);
++
++	zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk,
++	    le->le_value_intlen, le->le_value_numints,
++	    integer_size, num_integers, buf);
++
++	if (zeh->zeh_num_integers > num_integers)
++		return (EOVERFLOW);
++	return (0);
++
++}
++
++int
++zap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen,
++    char *buf)
++{
++	struct zap_leaf_entry *le =
++	    ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
++	ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
++
++	if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
++		zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 8,
++		    le->le_name_numints, 8, buflen / 8, buf);
++	} else {
++		zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1,
++		    le->le_name_numints, 1, buflen, buf);
++	}
++	if (le->le_name_numints > buflen)
++		return (EOVERFLOW);
++	return (0);
++}
++
++int
++zap_entry_update(zap_entry_handle_t *zeh,
++	uint8_t integer_size, uint64_t num_integers, const void *buf)
++{
++	int delta_chunks;
++	zap_leaf_t *l = zeh->zeh_leaf;
++	struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp);
++
++	delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) -
++	    ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen);
++
++	if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks)
++		return (EAGAIN);
++
++	zap_leaf_array_free(l, &le->le_value_chunk);
++	le->le_value_chunk =
++	    zap_leaf_array_create(l, buf, integer_size, num_integers);
++	le->le_value_numints = num_integers;
++	le->le_value_intlen = integer_size;
++	return (0);
++}
++
++void
++zap_entry_remove(zap_entry_handle_t *zeh)
++{
++	uint16_t entry_chunk;
++	struct zap_leaf_entry *le;
++	zap_leaf_t *l = zeh->zeh_leaf;
++
++	ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk);
++
++	entry_chunk = *zeh->zeh_chunkp;
++	le = ZAP_LEAF_ENTRY(l, entry_chunk);
++	ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
++
++	zap_leaf_array_free(l, &le->le_name_chunk);
++	zap_leaf_array_free(l, &le->le_value_chunk);
++
++	*zeh->zeh_chunkp = le->le_next;
++	zap_leaf_chunk_free(l, entry_chunk);
++
++	l->l_phys->l_hdr.lh_nentries--;
++}
++
++int
++zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd,
++    uint8_t integer_size, uint64_t num_integers, const void *buf,
++    zap_entry_handle_t *zeh)
++{
++	uint16_t chunk;
++	uint16_t *chunkp;
++	struct zap_leaf_entry *le;
++	uint64_t valuelen;
++	int numchunks;
++	uint64_t h = zn->zn_hash;
++
++	valuelen = integer_size * num_integers;
++
++	numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(zn->zn_key_orig_numints *
++	    zn->zn_key_intlen) + ZAP_LEAF_ARRAY_NCHUNKS(valuelen);
++	if (numchunks > ZAP_LEAF_NUMCHUNKS(l))
++		return (E2BIG);
++
++	if (cd == ZAP_NEED_CD) {
++		/* find the lowest unused cd */
++		if (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) {
++			cd = 0;
++
++			for (chunk = *LEAF_HASH_ENTPTR(l, h);
++			    chunk != CHAIN_END; chunk = le->le_next) {
++				le = ZAP_LEAF_ENTRY(l, chunk);
++				if (le->le_cd > cd)
++					break;
++				if (le->le_hash == h) {
++					ASSERT3U(cd, ==, le->le_cd);
++					cd++;
++				}
++			}
++		} else {
++			/* old unsorted format; do it the O(n^2) way */
++			for (cd = 0; ; cd++) {
++				for (chunk = *LEAF_HASH_ENTPTR(l, h);
++				    chunk != CHAIN_END; chunk = le->le_next) {
++					le = ZAP_LEAF_ENTRY(l, chunk);
++					if (le->le_hash == h &&
++					    le->le_cd == cd) {
++						break;
++					}
++				}
++				/* If this cd is not in use, we are good. */
++				if (chunk == CHAIN_END)
++					break;
++			}
++		}
++		/*
++		 * We would run out of space in a block before we could
++		 * store enough entries to run out of CD values.
++		 */
++		ASSERT3U(cd, <, zap_maxcd(zn->zn_zap));
++	}
++
++	if (l->l_phys->l_hdr.lh_nfree < numchunks)
++		return (EAGAIN);
++
++	/* make the entry */
++	chunk = zap_leaf_chunk_alloc(l);
++	le = ZAP_LEAF_ENTRY(l, chunk);
++	le->le_type = ZAP_CHUNK_ENTRY;
++	le->le_name_chunk = zap_leaf_array_create(l, zn->zn_key_orig,
++	    zn->zn_key_intlen, zn->zn_key_orig_numints);
++	le->le_name_numints = zn->zn_key_orig_numints;
++	le->le_value_chunk =
++	    zap_leaf_array_create(l, buf, integer_size, num_integers);
++	le->le_value_numints = num_integers;
++	le->le_value_intlen = integer_size;
++	le->le_hash = h;
++	le->le_cd = cd;
++
++	/* link it into the hash chain */
++	/* XXX if we did the search above, we could just use that */
++	chunkp = zap_leaf_rehash_entry(l, chunk);
++
++	l->l_phys->l_hdr.lh_nentries++;
++
++	zeh->zeh_leaf = l;
++	zeh->zeh_num_integers = num_integers;
++	zeh->zeh_integer_size = le->le_value_intlen;
++	zeh->zeh_cd = le->le_cd;
++	zeh->zeh_hash = le->le_hash;
++	zeh->zeh_chunkp = chunkp;
++
++	return (0);
++}
++
++/*
++ * Determine if there is another entry with the same normalized form.
++ * For performance purposes, either zn or name must be provided (the
++ * other can be NULL).  Note, there usually won't be any hash
++ * conflicts, in which case we don't need the concatenated/normalized
++ * form of the name.  But all callers have one of these on hand anyway,
++ * so might as well take advantage.  A cleaner but slower interface
++ * would accept neither argument, and compute the normalized name as
++ * needed (using zap_name_alloc(zap_entry_read_name(zeh))).
++ */
++boolean_t
++zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
++    const char *name, zap_t *zap)
++{
++	uint64_t chunk;
++	struct zap_leaf_entry *le;
++	boolean_t allocdzn = B_FALSE;
++
++	if (zap->zap_normflags == 0)
++		return (B_FALSE);
++
++	for (chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash);
++	    chunk != CHAIN_END; chunk = le->le_next) {
++		le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk);
++		if (le->le_hash != zeh->zeh_hash)
++			continue;
++		if (le->le_cd == zeh->zeh_cd)
++			continue;
++
++		if (zn == NULL) {
++			zn = zap_name_alloc(zap, name, MT_FIRST);
++			allocdzn = B_TRUE;
++		}
++		if (zap_leaf_array_match(zeh->zeh_leaf, zn,
++		    le->le_name_chunk, le->le_name_numints)) {
++			if (allocdzn)
++				zap_name_free(zn);
++			return (B_TRUE);
++		}
++	}
++	if (allocdzn)
++		zap_name_free(zn);
++	return (B_FALSE);
++}
++
++/*
++ * Routines for transferring entries between leafs.
++ */
++
++static uint16_t *
++zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry)
++{
++	struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
++	struct zap_leaf_entry *le2;
++	uint16_t *chunkp;
++
++	/*
++	 * keep the entry chain sorted by cd
++	 * NB: this will not cause problems for unsorted leafs, though
++	 * it is unnecessary there.
++	 */
++	for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash);
++	    *chunkp != CHAIN_END; chunkp = &le2->le_next) {
++		le2 = ZAP_LEAF_ENTRY(l, *chunkp);
++		if (le2->le_cd > le->le_cd)
++			break;
++	}
++
++	le->le_next = *chunkp;
++	*chunkp = entry;
++	return (chunkp);
++}
++
++static uint16_t
++zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
++{
++	uint16_t new_chunk;
++	uint16_t *nchunkp = &new_chunk;
++
++	while (chunk != CHAIN_END) {
++		uint16_t nchunk = zap_leaf_chunk_alloc(nl);
++		struct zap_leaf_array *nla =
++		    &ZAP_LEAF_CHUNK(nl, nchunk).l_array;
++		struct zap_leaf_array *la =
++		    &ZAP_LEAF_CHUNK(l, chunk).l_array;
++		int nextchunk = la->la_next;
++
++		ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
++		ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));
++
++		*nla = *la; /* structure assignment */
++
++		zap_leaf_chunk_free(l, chunk);
++		chunk = nextchunk;
++		*nchunkp = nchunk;
++		nchunkp = &nla->la_next;
++	}
++	*nchunkp = CHAIN_END;
++	return (new_chunk);
++}
++
++static void
++zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl)
++{
++	struct zap_leaf_entry *le, *nle;
++	uint16_t chunk;
++
++	le = ZAP_LEAF_ENTRY(l, entry);
++	ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
++
++	chunk = zap_leaf_chunk_alloc(nl);
++	nle = ZAP_LEAF_ENTRY(nl, chunk);
++	*nle = *le; /* structure assignment */
++
++	(void) zap_leaf_rehash_entry(nl, chunk);
++
++	nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
++	nle->le_value_chunk =
++	    zap_leaf_transfer_array(l, le->le_value_chunk, nl);
++
++	zap_leaf_chunk_free(l, entry);
++
++	l->l_phys->l_hdr.lh_nentries--;
++	nl->l_phys->l_hdr.lh_nentries++;
++}
++
++/*
++ * Transfer the entries whose hash prefix ends in 1 to the new leaf.
++ */
++void
++zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort)
++{
++	int i;
++	int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len;
++
++	/* set new prefix and prefix_len */
++	l->l_phys->l_hdr.lh_prefix <<= 1;
++	l->l_phys->l_hdr.lh_prefix_len++;
++	nl->l_phys->l_hdr.lh_prefix = l->l_phys->l_hdr.lh_prefix | 1;
++	nl->l_phys->l_hdr.lh_prefix_len = l->l_phys->l_hdr.lh_prefix_len;
++
++	/* break existing hash chains */
++	zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
++
++	if (sort)
++		l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
++
++	/*
++	 * Transfer entries whose hash bit 'bit' is set to nl; rehash
++	 * the remaining entries
++	 *
++	 * NB: We could find entries via the hashtable instead. That
++	 * would be O(hashents+numents) rather than O(numblks+numents),
++	 * but this accesses memory more sequentially, and when we're
++	 * called, the block is usually pretty full.
++	 */
++	for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
++		struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i);
++		if (le->le_type != ZAP_CHUNK_ENTRY)
++			continue;
++
++		if (le->le_hash & (1ULL << bit))
++			zap_leaf_transfer_entry(l, i, nl);
++		else
++			(void) zap_leaf_rehash_entry(l, i);
++	}
++}
++
++void
++zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)
++{
++	int i, n;
++
++	n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift -
++	    l->l_phys->l_hdr.lh_prefix_len;
++	n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
++	zs->zs_leafs_with_2n_pointers[n]++;
++
++
++	n = l->l_phys->l_hdr.lh_nentries/5;
++	n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
++	zs->zs_blocks_with_n5_entries[n]++;
++
++	n = ((1<<FZAP_BLOCK_SHIFT(zap)) -
++	    l->l_phys->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 /
++	    (1<<FZAP_BLOCK_SHIFT(zap));
++	n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
++	zs->zs_blocks_n_tenths_full[n]++;
++
++	for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) {
++		int nentries = 0;
++		int chunk = l->l_phys->l_hash[i];
++
++		while (chunk != CHAIN_END) {
++			struct zap_leaf_entry *le =
++			    ZAP_LEAF_ENTRY(l, chunk);
++
++			n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_numints) +
++			    ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints *
++			    le->le_value_intlen);
++			n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
++			zs->zs_entries_using_n_chunks[n]++;
++
++			chunk = le->le_next;
++			nentries++;
++		}
++
++		n = nentries;
++		n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
++		zs->zs_buckets_with_n_entries[n]++;
++	}
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zap_micro.c linux-3.2.33-go/fs/zfs/zfs/zap_micro.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zap_micro.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zap_micro.c	2012-11-16 23:25:34.353039289 +0100
+@@ -0,0 +1,1500 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ */
++
++#include <sys/zio.h>
++#include <sys/spa.h>
++#include <sys/dmu.h>
++#include <sys/zfs_context.h>
++#include <sys/zap.h>
++#include <sys/refcount.h>
++#include <sys/zap_impl.h>
++#include <sys/zap_leaf.h>
++#include <sys/avl.h>
++#include <sys/arc.h>
++
++#ifdef _KERNEL
++#include <sys/sunddi.h>
++#endif
++
++static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags);
++
++uint64_t
++zap_getflags(zap_t *zap)
++{
++	if (zap->zap_ismicro)
++		return (0);
++	return (zap->zap_u.zap_fat.zap_phys->zap_flags);
++}
++
++int
++zap_hashbits(zap_t *zap)
++{
++	if (zap_getflags(zap) & ZAP_FLAG_HASH64)
++		return (48);
++	else
++		return (28);
++}
++
++uint32_t
++zap_maxcd(zap_t *zap)
++{
++	if (zap_getflags(zap) & ZAP_FLAG_HASH64)
++		return ((1<<16)-1);
++	else
++		return (-1U);
++}
++
++static uint64_t
++zap_hash(zap_name_t *zn)
++{
++	zap_t *zap = zn->zn_zap;
++	uint64_t h = 0;
++
++	if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) {
++		ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY);
++		h = *(uint64_t *)zn->zn_key_orig;
++	} else {
++		h = zap->zap_salt;
++		ASSERT(h != 0);
++		ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
++
++		if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
++			int i;
++			const uint64_t *wp = zn->zn_key_norm;
++
++			ASSERT(zn->zn_key_intlen == 8);
++			for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) {
++				int j;
++				uint64_t word = *wp;
++
++				for (j = 0; j < zn->zn_key_intlen; j++) {
++					h = (h >> 8) ^
++					    zfs_crc64_table[(h ^ word) & 0xFF];
++					word >>= NBBY;
++				}
++			}
++		} else {
++			int i, len;
++			const uint8_t *cp = zn->zn_key_norm;
++
++			/*
++			 * We previously stored the terminating null on
++			 * disk, but didn't hash it, so we need to
++			 * continue to not hash it.  (The
++			 * zn_key_*_numints includes the terminating
++			 * null for non-binary keys.)
++			 */
++			len = zn->zn_key_norm_numints - 1;
++
++			ASSERT(zn->zn_key_intlen == 1);
++			for (i = 0; i < len; cp++, i++) {
++				h = (h >> 8) ^
++				    zfs_crc64_table[(h ^ *cp) & 0xFF];
++			}
++		}
++	}
++	/*
++	 * Don't use all 64 bits, since we need some in the cookie for
++	 * the collision differentiator.  We MUST use the high bits,
++	 * since those are the ones that we first pay attention to when
++	 * chosing the bucket.
++	 */
++	h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1);
++
++	return (h);
++}
++
++static int
++zap_normalize(zap_t *zap, const char *name, char *namenorm)
++{
++	size_t inlen, outlen;
++	int err;
++
++	ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY));
++
++	inlen = strlen(name) + 1;
++	outlen = ZAP_MAXNAMELEN;
++
++	err = 0;
++	(void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen,
++	    zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL |
++	    U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err);
++
++	return (err);
++}
++
++boolean_t
++zap_match(zap_name_t *zn, const char *matchname)
++{
++	ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY));
++
++	if (zn->zn_matchtype == MT_FIRST) {
++		char norm[ZAP_MAXNAMELEN];
++
++		if (zap_normalize(zn->zn_zap, matchname, norm) != 0)
++			return (B_FALSE);
++
++		return (strcmp(zn->zn_key_norm, norm) == 0);
++	} else {
++		/* MT_BEST or MT_EXACT */
++		return (strcmp(zn->zn_key_orig, matchname) == 0);
++	}
++}
++
++void
++zap_name_free(zap_name_t *zn)
++{
++	kmem_free(zn, sizeof (zap_name_t));
++}
++
++zap_name_t *
++zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
++{
++	zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_PUSHPAGE);
++
++	zn->zn_zap = zap;
++	zn->zn_key_intlen = sizeof (*key);
++	zn->zn_key_orig = key;
++	zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1;
++	zn->zn_matchtype = mt;
++	if (zap->zap_normflags) {
++		if (zap_normalize(zap, key, zn->zn_normbuf) != 0) {
++			zap_name_free(zn);
++			return (NULL);
++		}
++		zn->zn_key_norm = zn->zn_normbuf;
++		zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
++	} else {
++		if (mt != MT_EXACT) {
++			zap_name_free(zn);
++			return (NULL);
++		}
++		zn->zn_key_norm = zn->zn_key_orig;
++		zn->zn_key_norm_numints = zn->zn_key_orig_numints;
++	}
++
++	zn->zn_hash = zap_hash(zn);
++	return (zn);
++}
++
++zap_name_t *
++zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)
++{
++	zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_PUSHPAGE);
++
++	ASSERT(zap->zap_normflags == 0);
++	zn->zn_zap = zap;
++	zn->zn_key_intlen = sizeof (*key);
++	zn->zn_key_orig = zn->zn_key_norm = key;
++	zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints;
++	zn->zn_matchtype = MT_EXACT;
++
++	zn->zn_hash = zap_hash(zn);
++	return (zn);
++}
++
++static void
++mzap_byteswap(mzap_phys_t *buf, size_t size)
++{
++	int i, max;
++	buf->mz_block_type = BSWAP_64(buf->mz_block_type);
++	buf->mz_salt = BSWAP_64(buf->mz_salt);
++	buf->mz_normflags = BSWAP_64(buf->mz_normflags);
++	max = (size / MZAP_ENT_LEN) - 1;
++	for (i = 0; i < max; i++) {
++		buf->mz_chunk[i].mze_value =
++		    BSWAP_64(buf->mz_chunk[i].mze_value);
++		buf->mz_chunk[i].mze_cd =
++		    BSWAP_32(buf->mz_chunk[i].mze_cd);
++	}
++}
++
++void
++zap_byteswap(void *buf, size_t size)
++{
++	uint64_t block_type;
++
++	block_type = *(uint64_t *)buf;
++
++	if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) {
++		/* ASSERT(magic == ZAP_LEAF_MAGIC); */
++		mzap_byteswap(buf, size);
++	} else {
++		fzap_byteswap(buf, size);
++	}
++}
++
++static int
++mze_compare(const void *arg1, const void *arg2)
++{
++	const mzap_ent_t *mze1 = arg1;
++	const mzap_ent_t *mze2 = arg2;
++
++	if (mze1->mze_hash > mze2->mze_hash)
++		return (+1);
++	if (mze1->mze_hash < mze2->mze_hash)
++		return (-1);
++	if (mze1->mze_cd > mze2->mze_cd)
++		return (+1);
++	if (mze1->mze_cd < mze2->mze_cd)
++		return (-1);
++	return (0);
++}
++
++static void
++mze_insert(zap_t *zap, int chunkid, uint64_t hash)
++{
++	mzap_ent_t *mze;
++
++	ASSERT(zap->zap_ismicro);
++	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
++
++	mze = kmem_alloc(sizeof (mzap_ent_t), KM_PUSHPAGE);
++	mze->mze_chunkid = chunkid;
++	mze->mze_hash = hash;
++	mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd;
++	ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0);
++	avl_add(&zap->zap_m.zap_avl, mze);
++}
++
++static mzap_ent_t *
++mze_find(zap_name_t *zn)
++{
++	mzap_ent_t mze_tofind;
++	mzap_ent_t *mze;
++	avl_index_t idx;
++	avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl;
++
++	ASSERT(zn->zn_zap->zap_ismicro);
++	ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock));
++
++	mze_tofind.mze_hash = zn->zn_hash;
++	mze_tofind.mze_cd = 0;
++
++again:
++	mze = avl_find(avl, &mze_tofind, &idx);
++	if (mze == NULL)
++		mze = avl_nearest(avl, idx, AVL_AFTER);
++	for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) {
++		ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd);
++		if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name))
++			return (mze);
++	}
++	if (zn->zn_matchtype == MT_BEST) {
++		zn->zn_matchtype = MT_FIRST;
++		goto again;
++	}
++	return (NULL);
++}
++
++static uint32_t
++mze_find_unused_cd(zap_t *zap, uint64_t hash)
++{
++	mzap_ent_t mze_tofind;
++	mzap_ent_t *mze;
++	avl_index_t idx;
++	avl_tree_t *avl = &zap->zap_m.zap_avl;
++	uint32_t cd;
++
++	ASSERT(zap->zap_ismicro);
++	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
++
++	mze_tofind.mze_hash = hash;
++	mze_tofind.mze_cd = 0;
++
++	cd = 0;
++	for (mze = avl_find(avl, &mze_tofind, &idx);
++	    mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
++		if (mze->mze_cd != cd)
++			break;
++		cd++;
++	}
++
++	return (cd);
++}
++
++static void
++mze_remove(zap_t *zap, mzap_ent_t *mze)
++{
++	ASSERT(zap->zap_ismicro);
++	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
++
++	avl_remove(&zap->zap_m.zap_avl, mze);
++	kmem_free(mze, sizeof (mzap_ent_t));
++}
++
++static void
++mze_destroy(zap_t *zap)
++{
++	mzap_ent_t *mze;
++	void *avlcookie = NULL;
++
++	while ((mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie)))
++		kmem_free(mze, sizeof (mzap_ent_t));
++	avl_destroy(&zap->zap_m.zap_avl);
++}
++
++static zap_t *
++mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
++{
++	zap_t *winner;
++	zap_t *zap;
++	int i;
++
++	ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t));
++
++	zap = kmem_zalloc(sizeof (zap_t), KM_PUSHPAGE);
++	rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, NULL);
++	rw_enter(&zap->zap_rwlock, RW_WRITER);
++	zap->zap_objset = os;
++	zap->zap_object = obj;
++	zap->zap_dbuf = db;
++
++	if (*(uint64_t *)db->db_data != ZBT_MICRO) {
++		mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
++		zap->zap_f.zap_block_shift = highbit(db->db_size) - 1;
++	} else {
++		zap->zap_ismicro = TRUE;
++	}
++
++	/*
++	 * Make sure that zap_ismicro is set before we let others see
++	 * it, because zap_lockdir() checks zap_ismicro without the lock
++	 * held.
++	 */
++	winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict);
++
++	if (winner != NULL) {
++		rw_exit(&zap->zap_rwlock);
++		rw_destroy(&zap->zap_rwlock);
++		if (!zap->zap_ismicro)
++			mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
++		kmem_free(zap, sizeof (zap_t));
++		return (winner);
++	}
++
++	if (zap->zap_ismicro) {
++		zap->zap_salt = zap->zap_m.zap_phys->mz_salt;
++		zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags;
++		zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
++		avl_create(&zap->zap_m.zap_avl, mze_compare,
++		    sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
++
++		for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
++			mzap_ent_phys_t *mze =
++			    &zap->zap_m.zap_phys->mz_chunk[i];
++			if (mze->mze_name[0]) {
++				zap_name_t *zn;
++
++				zap->zap_m.zap_num_entries++;
++				zn = zap_name_alloc(zap, mze->mze_name,
++				    MT_EXACT);
++				mze_insert(zap, i, zn->zn_hash);
++				zap_name_free(zn);
++			}
++		}
++	} else {
++		zap->zap_salt = zap->zap_f.zap_phys->zap_salt;
++		zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags;
++
++		ASSERT3U(sizeof (struct zap_leaf_header), ==,
++		    2*ZAP_LEAF_CHUNKSIZE);
++
++		/*
++		 * The embedded pointer table should not overlap the
++		 * other members.
++		 */
++		ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
++		    &zap->zap_f.zap_phys->zap_salt);
++
++		/*
++		 * The embedded pointer table should end at the end of
++		 * the block
++		 */
++		ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
++		    1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
++		    (uintptr_t)zap->zap_f.zap_phys, ==,
++		    zap->zap_dbuf->db_size);
++	}
++	rw_exit(&zap->zap_rwlock);
++	return (zap);
++}
++
++int
++zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
++    krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
++{
++	zap_t *zap;
++	dmu_buf_t *db;
++	krw_t lt;
++	int err;
++
++	*zapp = NULL;
++
++	err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH);
++	if (err)
++		return (err);
++
++#ifdef ZFS_DEBUG
++	{
++		dmu_object_info_t doi;
++		dmu_object_info_from_db(db, &doi);
++		ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
++	}
++#endif
++
++	zap = dmu_buf_get_user(db);
++	if (zap == NULL)
++		zap = mzap_open(os, obj, db);
++
++	/*
++	 * We're checking zap_ismicro without the lock held, in order to
++	 * tell what type of lock we want.  Once we have some sort of
++	 * lock, see if it really is the right type.  In practice this
++	 * can only be different if it was upgraded from micro to fat,
++	 * and micro wanted WRITER but fat only needs READER.
++	 */
++	lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
++	rw_enter(&zap->zap_rwlock, lt);
++	if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
++		/* it was upgraded, now we only need reader */
++		ASSERT(lt == RW_WRITER);
++		ASSERT(RW_READER ==
++		    (!zap->zap_ismicro && fatreader) ? RW_READER : lti);
++		rw_downgrade(&zap->zap_rwlock);
++		lt = RW_READER;
++	}
++
++	zap->zap_objset = os;
++
++	if (lt == RW_WRITER)
++		dmu_buf_will_dirty(db, tx);
++
++	ASSERT3P(zap->zap_dbuf, ==, db);
++
++	ASSERT(!zap->zap_ismicro ||
++	    zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks);
++	if (zap->zap_ismicro && tx && adding &&
++	    zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {
++		uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
++		if (newsz > MZAP_MAX_BLKSZ) {
++			dprintf("upgrading obj %llu: num_entries=%u\n",
++			    obj, zap->zap_m.zap_num_entries);
++			*zapp = zap;
++			return (mzap_upgrade(zapp, tx, 0));
++		}
++		err = dmu_object_set_blocksize(os, obj, newsz, 0, tx);
++		ASSERT3U(err, ==, 0);
++		zap->zap_m.zap_num_chunks =
++		    db->db_size / MZAP_ENT_LEN - 1;
++	}
++
++	*zapp = zap;
++	return (0);
++}
++
++void
++zap_unlockdir(zap_t *zap)
++{
++	rw_exit(&zap->zap_rwlock);
++	dmu_buf_rele(zap->zap_dbuf, NULL);
++}
++
++static int
++mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
++{
++	mzap_phys_t *mzp;
++	int i, sz, nchunks;
++	int err = 0;
++	zap_t *zap = *zapp;
++
++	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
++
++	sz = zap->zap_dbuf->db_size;
++	mzp = kmem_alloc(sz, KM_PUSHPAGE | KM_NODEBUG);
++	bcopy(zap->zap_dbuf->db_data, mzp, sz);
++	nchunks = zap->zap_m.zap_num_chunks;
++
++	if (!flags) {
++		err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object,
++		    1ULL << fzap_default_block_shift, 0, tx);
++		if (err) {
++			kmem_free(mzp, sz);
++			return (err);
++		}
++	}
++
++	dprintf("upgrading obj=%llu with %u chunks\n",
++	    zap->zap_object, nchunks);
++	/* XXX destroy the avl later, so we can use the stored hash value */
++	mze_destroy(zap);
++
++	fzap_upgrade(zap, tx, flags);
++
++	for (i = 0; i < nchunks; i++) {
++		mzap_ent_phys_t *mze = &mzp->mz_chunk[i];
++		zap_name_t *zn;
++		if (mze->mze_name[0] == 0)
++			continue;
++		dprintf("adding %s=%llu\n",
++		    mze->mze_name, mze->mze_value);
++		zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT);
++		err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx);
++		zap = zn->zn_zap;	/* fzap_add_cd() may change zap */
++		zap_name_free(zn);
++		if (err)
++			break;
++	}
++	kmem_free(mzp, sz);
++	*zapp = zap;
++	return (err);
++}
++
++static void
++mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
++    dmu_tx_t *tx)
++{
++	dmu_buf_t *db;
++	mzap_phys_t *zp;
++
++	VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH));
++
++#ifdef ZFS_DEBUG
++	{
++		dmu_object_info_t doi;
++		dmu_object_info_from_db(db, &doi);
++		ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
++	}
++#endif
++
++	dmu_buf_will_dirty(db, tx);
++	zp = db->db_data;
++	zp->mz_block_type = ZBT_MICRO;
++	zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL;
++	zp->mz_normflags = normflags;
++	dmu_buf_rele(db, FTAG);
++
++	if (flags != 0) {
++		zap_t *zap;
++		/* Only fat zap supports flags; upgrade immediately. */
++		VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER,
++		    B_FALSE, B_FALSE, &zap));
++		VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags));
++		zap_unlockdir(zap);
++	}
++}
++
++int
++zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
++{
++	return (zap_create_claim_norm(os, obj,
++	    0, ot, bonustype, bonuslen, tx));
++}
++
++int
++zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags,
++    dmu_object_type_t ot,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
++{
++	int err;
++
++	err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx);
++	if (err != 0)
++		return (err);
++	mzap_create_impl(os, obj, normflags, 0, tx);
++	return (0);
++}
++
++uint64_t
++zap_create(objset_t *os, dmu_object_type_t ot,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
++{
++	return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx));
++}
++
++uint64_t
++zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
++{
++	uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
++
++	mzap_create_impl(os, obj, normflags, 0, tx);
++	return (obj);
++}
++
++uint64_t
++zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
++    dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
++{
++	uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
++
++	ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT &&
++	    leaf_blockshift <= SPA_MAXBLOCKSHIFT &&
++	    indirect_blockshift >= SPA_MINBLOCKSHIFT &&
++	    indirect_blockshift <= SPA_MAXBLOCKSHIFT);
++
++	VERIFY(dmu_object_set_blocksize(os, obj,
++	    1ULL << leaf_blockshift, indirect_blockshift, tx) == 0);
++
++	mzap_create_impl(os, obj, normflags, flags, tx);
++	return (obj);
++}
++
++int
++zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
++{
++	/*
++	 * dmu_object_free will free the object number and free the
++	 * data.  Freeing the data will cause our pageout function to be
++	 * called, which will destroy our data (zap_leaf_t's and zap_t).
++	 */
++
++	return (dmu_object_free(os, zapobj, tx));
++}
++
++_NOTE(ARGSUSED(0))
++void
++zap_evict(dmu_buf_t *db, void *vzap)
++{
++	zap_t *zap = vzap;
++
++	rw_destroy(&zap->zap_rwlock);
++
++	if (zap->zap_ismicro)
++		mze_destroy(zap);
++	else
++		mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
++
++	kmem_free(zap, sizeof (zap_t));
++}
++
++int
++zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
++{
++	zap_t *zap;
++	int err;
++
++	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
++	if (err)
++		return (err);
++	if (!zap->zap_ismicro) {
++		err = fzap_count(zap, count);
++	} else {
++		*count = zap->zap_m.zap_num_entries;
++	}
++	zap_unlockdir(zap);
++	return (err);
++}
++
++/*
++ * zn may be NULL; if not specified, it will be computed if needed.
++ * See also the comment above zap_entry_normalization_conflict().
++ */
++static boolean_t
++mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze)
++{
++	mzap_ent_t *other;
++	int direction = AVL_BEFORE;
++	boolean_t allocdzn = B_FALSE;
++
++	if (zap->zap_normflags == 0)
++		return (B_FALSE);
++
++again:
++	for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction);
++	    other && other->mze_hash == mze->mze_hash;
++	    other = avl_walk(&zap->zap_m.zap_avl, other, direction)) {
++
++		if (zn == NULL) {
++			zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name,
++			    MT_FIRST);
++			allocdzn = B_TRUE;
++		}
++		if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
++			if (allocdzn)
++				zap_name_free(zn);
++			return (B_TRUE);
++		}
++	}
++
++	if (direction == AVL_BEFORE) {
++		direction = AVL_AFTER;
++		goto again;
++	}
++
++	if (allocdzn)
++		zap_name_free(zn);
++	return (B_FALSE);
++}
++
++/*
++ * Routines for manipulating attributes.
++ */
++
++int
++zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
++    uint64_t integer_size, uint64_t num_integers, void *buf)
++{
++	return (zap_lookup_norm(os, zapobj, name, integer_size,
++	    num_integers, buf, MT_EXACT, NULL, 0, NULL));
++}
++
++int
++zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
++    uint64_t integer_size, uint64_t num_integers, void *buf,
++    matchtype_t mt, char *realname, int rn_len,
++    boolean_t *ncp)
++{
++	zap_t *zap;
++	int err;
++	mzap_ent_t *mze;
++	zap_name_t *zn;
++
++	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
++	if (err)
++		return (err);
++	zn = zap_name_alloc(zap, name, mt);
++	if (zn == NULL) {
++		zap_unlockdir(zap);
++		return (ENOTSUP);
++	}
++
++	if (!zap->zap_ismicro) {
++		err = fzap_lookup(zn, integer_size, num_integers, buf,
++		    realname, rn_len, ncp);
++	} else {
++		mze = mze_find(zn);
++		if (mze == NULL) {
++			err = ENOENT;
++		} else {
++			if (num_integers < 1) {
++				err = EOVERFLOW;
++			} else if (integer_size != 8) {
++				err = EINVAL;
++			} else {
++				*(uint64_t *)buf =
++				    MZE_PHYS(zap, mze)->mze_value;
++				(void) strlcpy(realname,
++				    MZE_PHYS(zap, mze)->mze_name, rn_len);
++				if (ncp) {
++					*ncp = mzap_normalization_conflict(zap,
++					    zn, mze);
++				}
++			}
++		}
++	}
++	zap_name_free(zn);
++	zap_unlockdir(zap);
++	return (err);
++}
++
++int
++zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
++    int key_numints)
++{
++	zap_t *zap;
++	int err;
++	zap_name_t *zn;
++
++	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
++	if (err)
++		return (err);
++	zn = zap_name_alloc_uint64(zap, key, key_numints);
++	if (zn == NULL) {
++		zap_unlockdir(zap);
++		return (ENOTSUP);
++	}
++
++	fzap_prefetch(zn);
++	zap_name_free(zn);
++	zap_unlockdir(zap);
++	return (err);
++}
++
++int
++zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
++    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
++{
++	zap_t *zap;
++	int err;
++	zap_name_t *zn;
++
++	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
++	if (err)
++		return (err);
++	zn = zap_name_alloc_uint64(zap, key, key_numints);
++	if (zn == NULL) {
++		zap_unlockdir(zap);
++		return (ENOTSUP);
++	}
++
++	err = fzap_lookup(zn, integer_size, num_integers, buf,
++	    NULL, 0, NULL);
++	zap_name_free(zn);
++	zap_unlockdir(zap);
++	return (err);
++}
++
++int
++zap_contains(objset_t *os, uint64_t zapobj, const char *name)
++{
++	int err = (zap_lookup_norm(os, zapobj, name, 0,
++	    0, NULL, MT_EXACT, NULL, 0, NULL));
++	if (err == EOVERFLOW || err == EINVAL)
++		err = 0; /* found, but skipped reading the value */
++	return (err);
++}
++
++int
++zap_length(objset_t *os, uint64_t zapobj, const char *name,
++    uint64_t *integer_size, uint64_t *num_integers)
++{
++	zap_t *zap;
++	int err;
++	mzap_ent_t *mze;
++	zap_name_t *zn;
++
++	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
++	if (err)
++		return (err);
++	zn = zap_name_alloc(zap, name, MT_EXACT);
++	if (zn == NULL) {
++		zap_unlockdir(zap);
++		return (ENOTSUP);
++	}
++	if (!zap->zap_ismicro) {
++		err = fzap_length(zn, integer_size, num_integers);
++	} else {
++		mze = mze_find(zn);
++		if (mze == NULL) {
++			err = ENOENT;
++		} else {
++			if (integer_size)
++				*integer_size = 8;
++			if (num_integers)
++				*num_integers = 1;
++		}
++	}
++	zap_name_free(zn);
++	zap_unlockdir(zap);
++	return (err);
++}
++
++int
++zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
++    int key_numints, uint64_t *integer_size, uint64_t *num_integers)
++{
++	zap_t *zap;
++	int err;
++	zap_name_t *zn;
++
++	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
++	if (err)
++		return (err);
++	zn = zap_name_alloc_uint64(zap, key, key_numints);
++	if (zn == NULL) {
++		zap_unlockdir(zap);
++		return (ENOTSUP);
++	}
++	err = fzap_length(zn, integer_size, num_integers);
++	zap_name_free(zn);
++	zap_unlockdir(zap);
++	return (err);
++}
++
++static void
++mzap_addent(zap_name_t *zn, uint64_t value)
++{
++	int i;
++	zap_t *zap = zn->zn_zap;
++	int start = zap->zap_m.zap_alloc_next;
++	uint32_t cd;
++
++	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
++
++#ifdef ZFS_DEBUG
++	for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
++		ASSERTV(mzap_ent_phys_t *mze=&zap->zap_m.zap_phys->mz_chunk[i]);
++		ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0);
++	}
++#endif
++
++	cd = mze_find_unused_cd(zap, zn->zn_hash);
++	/* given the limited size of the microzap, this can't happen */
++	ASSERT(cd < zap_maxcd(zap));
++
++again:
++	for (i = start; i < zap->zap_m.zap_num_chunks; i++) {
++		mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i];
++		if (mze->mze_name[0] == 0) {
++			mze->mze_value = value;
++			mze->mze_cd = cd;
++			(void) strcpy(mze->mze_name, zn->zn_key_orig);
++			zap->zap_m.zap_num_entries++;
++			zap->zap_m.zap_alloc_next = i+1;
++			if (zap->zap_m.zap_alloc_next ==
++			    zap->zap_m.zap_num_chunks)
++				zap->zap_m.zap_alloc_next = 0;
++			mze_insert(zap, i, zn->zn_hash);
++			return;
++		}
++	}
++	if (start != 0) {
++		start = 0;
++		goto again;
++	}
++	ASSERT(!"out of entries!");
++}
++
++int
++zap_add(objset_t *os, uint64_t zapobj, const char *key,
++    int integer_size, uint64_t num_integers,
++    const void *val, dmu_tx_t *tx)
++{
++	zap_t *zap;
++	int err;
++	mzap_ent_t *mze;
++	const uint64_t *intval = val;
++	zap_name_t *zn;
++
++	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
++	if (err)
++		return (err);
++	zn = zap_name_alloc(zap, key, MT_EXACT);
++	if (zn == NULL) {
++		zap_unlockdir(zap);
++		return (ENOTSUP);
++	}
++	if (!zap->zap_ismicro) {
++		err = fzap_add(zn, integer_size, num_integers, val, tx);
++		zap = zn->zn_zap;	/* fzap_add() may change zap */
++	} else if (integer_size != 8 || num_integers != 1 ||
++	    strlen(key) >= MZAP_NAME_LEN) {
++		err = mzap_upgrade(&zn->zn_zap, tx, 0);
++		if (err == 0)
++			err = fzap_add(zn, integer_size, num_integers, val, tx);
++		zap = zn->zn_zap;	/* fzap_add() may change zap */
++	} else {
++		mze = mze_find(zn);
++		if (mze != NULL) {
++			err = EEXIST;
++		} else {
++			mzap_addent(zn, *intval);
++		}
++	}
++	ASSERT(zap == zn->zn_zap);
++	zap_name_free(zn);
++	if (zap != NULL)	/* may be NULL if fzap_add() failed */
++		zap_unlockdir(zap);
++	return (err);
++}
++
++int
++zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
++    int key_numints, int integer_size, uint64_t num_integers,
++    const void *val, dmu_tx_t *tx)
++{
++	zap_t *zap;
++	int err;
++	zap_name_t *zn;
++
++	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
++	if (err)
++		return (err);
++	zn = zap_name_alloc_uint64(zap, key, key_numints);
++	if (zn == NULL) {
++		zap_unlockdir(zap);
++		return (ENOTSUP);
++	}
++	err = fzap_add(zn, integer_size, num_integers, val, tx);
++	zap = zn->zn_zap;	/* fzap_add() may change zap */
++	zap_name_free(zn);
++	if (zap != NULL)	/* may be NULL if fzap_add() failed */
++		zap_unlockdir(zap);
++	return (err);
++}
++
++int
++zap_update(objset_t *os, uint64_t zapobj, const char *name,
++    int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
++{
++	zap_t *zap;
++	mzap_ent_t *mze;
++	const uint64_t *intval = val;
++	zap_name_t *zn;
++	int err;
++
++#ifdef ZFS_DEBUG
++	uint64_t oldval;
++
++	/*
++	 * If there is an old value, it shouldn't change across the
++	 * lockdir (eg, due to bprewrite's xlation).
++	 */
++	if (integer_size == 8 && num_integers == 1)
++		(void) zap_lookup(os, zapobj, name, 8, 1, &oldval);
++#endif
++
++	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
++	if (err)
++		return (err);
++	zn = zap_name_alloc(zap, name, MT_EXACT);
++	if (zn == NULL) {
++		zap_unlockdir(zap);
++		return (ENOTSUP);
++	}
++	if (!zap->zap_ismicro) {
++		err = fzap_update(zn, integer_size, num_integers, val, tx);
++		zap = zn->zn_zap;	/* fzap_update() may change zap */
++	} else if (integer_size != 8 || num_integers != 1 ||
++	    strlen(name) >= MZAP_NAME_LEN) {
++		dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
++		    zapobj, integer_size, num_integers, name);
++		err = mzap_upgrade(&zn->zn_zap, tx, 0);
++		if (err == 0)
++			err = fzap_update(zn, integer_size, num_integers,
++			    val, tx);
++		zap = zn->zn_zap;	/* fzap_update() may change zap */
++	} else {
++		mze = mze_find(zn);
++		if (mze != NULL) {
++			ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval);
++			MZE_PHYS(zap, mze)->mze_value = *intval;
++		} else {
++			mzap_addent(zn, *intval);
++		}
++	}
++	ASSERT(zap == zn->zn_zap);
++	zap_name_free(zn);
++	if (zap != NULL)	/* may be NULL if fzap_upgrade() failed */
++		zap_unlockdir(zap);
++	return (err);
++}
++
++int
++zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
++    int key_numints,
++    int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
++{
++	zap_t *zap;
++	zap_name_t *zn;
++	int err;
++
++	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
++	if (err)
++		return (err);
++	zn = zap_name_alloc_uint64(zap, key, key_numints);
++	if (zn == NULL) {
++		zap_unlockdir(zap);
++		return (ENOTSUP);
++	}
++	err = fzap_update(zn, integer_size, num_integers, val, tx);
++	zap = zn->zn_zap;	/* fzap_update() may change zap */
++	zap_name_free(zn);
++	if (zap != NULL)	/* may be NULL if fzap_upgrade() failed */
++		zap_unlockdir(zap);
++	return (err);
++}
++
++int
++zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx)
++{
++	return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx));
++}
++
++int
++zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
++    matchtype_t mt, dmu_tx_t *tx)
++{
++	zap_t *zap;
++	int err;
++	mzap_ent_t *mze;
++	zap_name_t *zn;
++
++	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
++	if (err)
++		return (err);
++	zn = zap_name_alloc(zap, name, mt);
++	if (zn == NULL) {
++		zap_unlockdir(zap);
++		return (ENOTSUP);
++	}
++	if (!zap->zap_ismicro) {
++		err = fzap_remove(zn, tx);
++	} else {
++		mze = mze_find(zn);
++		if (mze == NULL) {
++			err = ENOENT;
++		} else {
++			zap->zap_m.zap_num_entries--;
++			bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid],
++			    sizeof (mzap_ent_phys_t));
++			mze_remove(zap, mze);
++		}
++	}
++	zap_name_free(zn);
++	zap_unlockdir(zap);
++	return (err);
++}
++
++int
++zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
++    int key_numints, dmu_tx_t *tx)
++{
++	zap_t *zap;
++	int err;
++	zap_name_t *zn;
++
++	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
++	if (err)
++		return (err);
++	zn = zap_name_alloc_uint64(zap, key, key_numints);
++	if (zn == NULL) {
++		zap_unlockdir(zap);
++		return (ENOTSUP);
++	}
++	err = fzap_remove(zn, tx);
++	zap_name_free(zn);
++	zap_unlockdir(zap);
++	return (err);
++}
++
++/*
++ * Routines for iterating over the attributes.
++ */
++
++void
++zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
++    uint64_t serialized)
++{
++	zc->zc_objset = os;
++	zc->zc_zap = NULL;
++	zc->zc_leaf = NULL;
++	zc->zc_zapobj = zapobj;
++	zc->zc_serialized = serialized;
++	zc->zc_hash = 0;
++	zc->zc_cd = 0;
++}
++
++void
++zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
++{
++	zap_cursor_init_serialized(zc, os, zapobj, 0);
++}
++
++void
++zap_cursor_fini(zap_cursor_t *zc)
++{
++	if (zc->zc_zap) {
++		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
++		zap_unlockdir(zc->zc_zap);
++		zc->zc_zap = NULL;
++	}
++	if (zc->zc_leaf) {
++		rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
++		zap_put_leaf(zc->zc_leaf);
++		zc->zc_leaf = NULL;
++	}
++	zc->zc_objset = NULL;
++}
++
++uint64_t
++zap_cursor_serialize(zap_cursor_t *zc)
++{
++	if (zc->zc_hash == -1ULL)
++		return (-1ULL);
++	if (zc->zc_zap == NULL)
++		return (zc->zc_serialized);
++	ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0);
++	ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap));
++
++	/*
++	 * We want to keep the high 32 bits of the cursor zero if we can, so
++	 * that 32-bit programs can access this.  So usually use a small
++	 * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits
++	 * of the cursor.
++	 *
++	 * [ collision differentiator | zap_hashbits()-bit hash value ]
++	 */
++	return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) |
++	    ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap)));
++}
++
++int
++zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
++{
++	int err;
++	avl_index_t idx;
++	mzap_ent_t mze_tofind;
++	mzap_ent_t *mze;
++
++	if (zc->zc_hash == -1ULL)
++		return (ENOENT);
++
++	if (zc->zc_zap == NULL) {
++		int hb;
++		err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
++		    RW_READER, TRUE, FALSE, &zc->zc_zap);
++		if (err)
++			return (err);
++
++		/*
++		 * To support zap_cursor_init_serialized, advance, retrieve,
++		 * we must add to the existing zc_cd, which may already
++		 * be 1 due to the zap_cursor_advance.
++		 */
++		ASSERT(zc->zc_hash == 0);
++		hb = zap_hashbits(zc->zc_zap);
++		zc->zc_hash = zc->zc_serialized << (64 - hb);
++		zc->zc_cd += zc->zc_serialized >> hb;
++		if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */
++			zc->zc_cd = 0;
++	} else {
++		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
++	}
++	if (!zc->zc_zap->zap_ismicro) {
++		err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
++	} else {
++		err = ENOENT;
++
++		mze_tofind.mze_hash = zc->zc_hash;
++		mze_tofind.mze_cd = zc->zc_cd;
++
++		mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx);
++		if (mze == NULL) {
++			mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl,
++			    idx, AVL_AFTER);
++		}
++		if (mze) {
++			mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze);
++			ASSERT3U(mze->mze_cd, ==, mzep->mze_cd);
++			za->za_normalization_conflict =
++			    mzap_normalization_conflict(zc->zc_zap, NULL, mze);
++			za->za_integer_length = 8;
++			za->za_num_integers = 1;
++			za->za_first_integer = mzep->mze_value;
++			(void) strcpy(za->za_name, mzep->mze_name);
++			zc->zc_hash = mze->mze_hash;
++			zc->zc_cd = mze->mze_cd;
++			err = 0;
++		} else {
++			zc->zc_hash = -1ULL;
++		}
++	}
++	rw_exit(&zc->zc_zap->zap_rwlock);
++	return (err);
++}
++
++void
++zap_cursor_advance(zap_cursor_t *zc)
++{
++	if (zc->zc_hash == -1ULL)
++		return;
++	zc->zc_cd++;
++}
++
++int
++zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt)
++{
++	int err = 0;
++	mzap_ent_t *mze;
++	zap_name_t *zn;
++
++	if (zc->zc_zap == NULL) {
++		err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
++		    RW_READER, TRUE, FALSE, &zc->zc_zap);
++		if (err)
++			return (err);
++	} else {
++		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
++	}
++
++	zn = zap_name_alloc(zc->zc_zap, name, mt);
++	if (zn == NULL) {
++		rw_exit(&zc->zc_zap->zap_rwlock);
++		return (ENOTSUP);
++	}
++
++	if (!zc->zc_zap->zap_ismicro) {
++		err = fzap_cursor_move_to_key(zc, zn);
++	} else {
++		mze = mze_find(zn);
++		if (mze == NULL) {
++			err = ENOENT;
++			goto out;
++		}
++		zc->zc_hash = mze->mze_hash;
++		zc->zc_cd = mze->mze_cd;
++	}
++
++out:
++	zap_name_free(zn);
++	rw_exit(&zc->zc_zap->zap_rwlock);
++	return (err);
++}
++
++int
++zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
++{
++	int err;
++	zap_t *zap;
++
++	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
++	if (err)
++		return (err);
++
++	bzero(zs, sizeof (zap_stats_t));
++
++	if (zap->zap_ismicro) {
++		zs->zs_blocksize = zap->zap_dbuf->db_size;
++		zs->zs_num_entries = zap->zap_m.zap_num_entries;
++		zs->zs_num_blocks = 1;
++	} else {
++		fzap_get_stats(zap, zs);
++	}
++	zap_unlockdir(zap);
++	return (0);
++}
++
++int
++zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
++    uint64_t *towrite, uint64_t *tooverwrite)
++{
++	zap_t *zap;
++	int err = 0;
++
++
++	/*
++	 * Since, we don't have a name, we cannot figure out which blocks will
++	 * be affected in this operation. So, account for the worst case :
++	 * - 3 blocks overwritten: target leaf, ptrtbl block, header block
++	 * - 4 new blocks written if adding:
++	 * 	- 2 blocks for possibly split leaves,
++	 * 	- 2 grown ptrtbl blocks
++	 *
++	 * This also accomodates the case where an add operation to a fairly
++	 * large microzap results in a promotion to fatzap.
++	 */
++	if (name == NULL) {
++		*towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
++		return (err);
++	}
++
++	/*
++	 * We lock the zap with adding == FALSE. Because, if we pass
++	 * the actual value of add, it could trigger a mzap_upgrade().
++	 * At present we are just evaluating the possibility of this operation
++	 * and hence we donot want to trigger an upgrade.
++	 */
++	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
++	if (err)
++		return (err);
++
++	if (!zap->zap_ismicro) {
++		zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT);
++		if (zn) {
++			err = fzap_count_write(zn, add, towrite,
++			    tooverwrite);
++			zap_name_free(zn);
++		} else {
++			/*
++			 * We treat this case as similar to (name == NULL)
++			 */
++			*towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
++		}
++	} else {
++		/*
++		 * We are here if (name != NULL) and this is a micro-zap.
++		 * We account for the header block depending on whether it
++		 * is freeable.
++		 *
++		 * Incase of an add-operation it is hard to find out
++		 * if this add will promote this microzap to fatzap.
++		 * Hence, we consider the worst case and account for the
++		 * blocks assuming this microzap would be promoted to a
++		 * fatzap.
++		 *
++		 * 1 block overwritten  : header block
++		 * 4 new blocks written : 2 new split leaf, 2 grown
++		 *			ptrtbl blocks
++		 */
++		if (dmu_buf_freeable(zap->zap_dbuf))
++			*tooverwrite += SPA_MAXBLOCKSIZE;
++		else
++			*towrite += SPA_MAXBLOCKSIZE;
++
++		if (add) {
++			*towrite += 4 * SPA_MAXBLOCKSIZE;
++		}
++	}
++
++	zap_unlockdir(zap);
++	return (err);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(zap_create);
++EXPORT_SYMBOL(zap_create_norm);
++EXPORT_SYMBOL(zap_create_flags);
++EXPORT_SYMBOL(zap_create_claim);
++EXPORT_SYMBOL(zap_create_claim_norm);
++EXPORT_SYMBOL(zap_destroy);
++EXPORT_SYMBOL(zap_lookup);
++EXPORT_SYMBOL(zap_lookup_norm);
++EXPORT_SYMBOL(zap_lookup_uint64);
++EXPORT_SYMBOL(zap_contains);
++EXPORT_SYMBOL(zap_prefetch_uint64);
++EXPORT_SYMBOL(zap_count_write);
++EXPORT_SYMBOL(zap_add);
++EXPORT_SYMBOL(zap_add_uint64);
++EXPORT_SYMBOL(zap_update);
++EXPORT_SYMBOL(zap_update_uint64);
++EXPORT_SYMBOL(zap_length);
++EXPORT_SYMBOL(zap_length_uint64);
++EXPORT_SYMBOL(zap_remove);
++EXPORT_SYMBOL(zap_remove_norm);
++EXPORT_SYMBOL(zap_remove_uint64);
++EXPORT_SYMBOL(zap_count);
++EXPORT_SYMBOL(zap_value_search);
++EXPORT_SYMBOL(zap_join);
++EXPORT_SYMBOL(zap_join_increment);
++EXPORT_SYMBOL(zap_add_int);
++EXPORT_SYMBOL(zap_remove_int);
++EXPORT_SYMBOL(zap_lookup_int);
++EXPORT_SYMBOL(zap_increment_int);
++EXPORT_SYMBOL(zap_add_int_key);
++EXPORT_SYMBOL(zap_lookup_int_key);
++EXPORT_SYMBOL(zap_increment);
++EXPORT_SYMBOL(zap_cursor_init);
++EXPORT_SYMBOL(zap_cursor_fini);
++EXPORT_SYMBOL(zap_cursor_retrieve);
++EXPORT_SYMBOL(zap_cursor_advance);
++EXPORT_SYMBOL(zap_cursor_serialize);
++EXPORT_SYMBOL(zap_cursor_move_to_key);
++EXPORT_SYMBOL(zap_cursor_init_serialized);
++EXPORT_SYMBOL(zap_get_stats);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_acl.c linux-3.2.33-go/fs/zfs/zfs/zfs_acl.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_acl.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_acl.c	2012-11-16 23:25:34.350039322 +0100
+@@ -0,0 +1,2799 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++
++#include <sys/types.h>
++#include <sys/param.h>
++#include <sys/time.h>
++#include <sys/systm.h>
++#include <sys/sysmacros.h>
++#include <sys/resource.h>
++#include <sys/vfs.h>
++#include <sys/vnode.h>
++#include <sys/sid.h>
++#include <sys/file.h>
++#include <sys/stat.h>
++#include <sys/kmem.h>
++#include <sys/cmn_err.h>
++#include <sys/errno.h>
++#include <sys/unistd.h>
++#include <sys/sdt.h>
++#include <sys/fs/zfs.h>
++#include <sys/mode.h>
++#include <sys/policy.h>
++#include <sys/zfs_znode.h>
++#include <sys/zfs_fuid.h>
++#include <sys/zfs_acl.h>
++#include <sys/zfs_dir.h>
++#include <sys/zfs_vfsops.h>
++#include <sys/dmu.h>
++#include <sys/dnode.h>
++#include <sys/zap.h>
++#include <sys/sa.h>
++#include "fs/fs_subr.h"
++
++#define	ALLOW	ACE_ACCESS_ALLOWED_ACE_TYPE
++#define	DENY	ACE_ACCESS_DENIED_ACE_TYPE
++#define	MAX_ACE_TYPE	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE
++#define	MIN_ACE_TYPE	ALLOW
++
++#define	OWNING_GROUP		(ACE_GROUP|ACE_IDENTIFIER_GROUP)
++#define	EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
++    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
++#define	EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
++    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
++#define	OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
++    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
++
++#define	ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \
++    ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \
++    ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \
++    ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)
++
++#define	WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
++#define	WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \
++    ACE_DELETE|ACE_DELETE_CHILD)
++#define	WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)
++
++#define	OGE_CLEAR	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
++    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
++
++#define	OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
++    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
++
++#define	ALL_INHERIT	(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
++    ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE)
++
++#define	RESTRICTED_CLEAR	(ACE_WRITE_ACL|ACE_WRITE_OWNER)
++
++#define	V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\
++    ZFS_ACL_PROTECTED)
++
++#define	ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
++    ZFS_ACL_OBJ_ACE)
++
++#define	ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
++
++static uint16_t
++zfs_ace_v0_get_type(void *acep)
++{
++	return (((zfs_oldace_t *)acep)->z_type);
++}
++
++static uint16_t
++zfs_ace_v0_get_flags(void *acep)
++{
++	return (((zfs_oldace_t *)acep)->z_flags);
++}
++
++static uint32_t
++zfs_ace_v0_get_mask(void *acep)
++{
++	return (((zfs_oldace_t *)acep)->z_access_mask);
++}
++
++static uint64_t
++zfs_ace_v0_get_who(void *acep)
++{
++	return (((zfs_oldace_t *)acep)->z_fuid);
++}
++
++static void
++zfs_ace_v0_set_type(void *acep, uint16_t type)
++{
++	((zfs_oldace_t *)acep)->z_type = type;
++}
++
++static void
++zfs_ace_v0_set_flags(void *acep, uint16_t flags)
++{
++	((zfs_oldace_t *)acep)->z_flags = flags;
++}
++
++static void
++zfs_ace_v0_set_mask(void *acep, uint32_t mask)
++{
++	((zfs_oldace_t *)acep)->z_access_mask = mask;
++}
++
++static void
++zfs_ace_v0_set_who(void *acep, uint64_t who)
++{
++	((zfs_oldace_t *)acep)->z_fuid = who;
++}
++
++/*ARGSUSED*/
++static size_t
++zfs_ace_v0_size(void *acep)
++{
++	return (sizeof (zfs_oldace_t));
++}
++
++static size_t
++zfs_ace_v0_abstract_size(void)
++{
++	return (sizeof (zfs_oldace_t));
++}
++
++static int
++zfs_ace_v0_mask_off(void)
++{
++	return (offsetof(zfs_oldace_t, z_access_mask));
++}
++
++/*ARGSUSED*/
++static int
++zfs_ace_v0_data(void *acep, void **datap)
++{
++	*datap = NULL;
++	return (0);
++}
++
++static acl_ops_t zfs_acl_v0_ops = {
++	zfs_ace_v0_get_mask,
++	zfs_ace_v0_set_mask,
++	zfs_ace_v0_get_flags,
++	zfs_ace_v0_set_flags,
++	zfs_ace_v0_get_type,
++	zfs_ace_v0_set_type,
++	zfs_ace_v0_get_who,
++	zfs_ace_v0_set_who,
++	zfs_ace_v0_size,
++	zfs_ace_v0_abstract_size,
++	zfs_ace_v0_mask_off,
++	zfs_ace_v0_data
++};
++
++static uint16_t
++zfs_ace_fuid_get_type(void *acep)
++{
++	return (((zfs_ace_hdr_t *)acep)->z_type);
++}
++
++static uint16_t
++zfs_ace_fuid_get_flags(void *acep)
++{
++	return (((zfs_ace_hdr_t *)acep)->z_flags);
++}
++
++static uint32_t
++zfs_ace_fuid_get_mask(void *acep)
++{
++	return (((zfs_ace_hdr_t *)acep)->z_access_mask);
++}
++
++static uint64_t
++zfs_ace_fuid_get_who(void *args)
++{
++	uint16_t entry_type;
++	zfs_ace_t *acep = args;
++
++	entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
++
++	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
++	    entry_type == ACE_EVERYONE)
++		return (-1);
++	return (((zfs_ace_t *)acep)->z_fuid);
++}
++
++static void
++zfs_ace_fuid_set_type(void *acep, uint16_t type)
++{
++	((zfs_ace_hdr_t *)acep)->z_type = type;
++}
++
++static void
++zfs_ace_fuid_set_flags(void *acep, uint16_t flags)
++{
++	((zfs_ace_hdr_t *)acep)->z_flags = flags;
++}
++
++static void
++zfs_ace_fuid_set_mask(void *acep, uint32_t mask)
++{
++	((zfs_ace_hdr_t *)acep)->z_access_mask = mask;
++}
++
++static void
++zfs_ace_fuid_set_who(void *arg, uint64_t who)
++{
++	zfs_ace_t *acep = arg;
++
++	uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
++
++	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
++	    entry_type == ACE_EVERYONE)
++		return;
++	acep->z_fuid = who;
++}
++
++static size_t
++zfs_ace_fuid_size(void *acep)
++{
++	zfs_ace_hdr_t *zacep = acep;
++	uint16_t entry_type;
++
++	switch (zacep->z_type) {
++	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
++	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
++	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
++	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
++		return (sizeof (zfs_object_ace_t));
++	case ALLOW:
++	case DENY:
++		entry_type =
++		    (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS);
++		if (entry_type == ACE_OWNER ||
++		    entry_type == OWNING_GROUP ||
++		    entry_type == ACE_EVERYONE)
++			return (sizeof (zfs_ace_hdr_t));
++		/*FALLTHROUGH*/
++	default:
++		return (sizeof (zfs_ace_t));
++	}
++}
++
++static size_t
++zfs_ace_fuid_abstract_size(void)
++{
++	return (sizeof (zfs_ace_hdr_t));
++}
++
++static int
++zfs_ace_fuid_mask_off(void)
++{
++	return (offsetof(zfs_ace_hdr_t, z_access_mask));
++}
++
++static int
++zfs_ace_fuid_data(void *acep, void **datap)
++{
++	zfs_ace_t *zacep = acep;
++	zfs_object_ace_t *zobjp;
++
++	switch (zacep->z_hdr.z_type) {
++	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
++	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
++	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
++	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
++		zobjp = acep;
++		*datap = (caddr_t)zobjp + sizeof (zfs_ace_t);
++		return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t));
++	default:
++		*datap = NULL;
++		return (0);
++	}
++}
++
++static acl_ops_t zfs_acl_fuid_ops = {
++	zfs_ace_fuid_get_mask,
++	zfs_ace_fuid_set_mask,
++	zfs_ace_fuid_get_flags,
++	zfs_ace_fuid_set_flags,
++	zfs_ace_fuid_get_type,
++	zfs_ace_fuid_set_type,
++	zfs_ace_fuid_get_who,
++	zfs_ace_fuid_set_who,
++	zfs_ace_fuid_size,
++	zfs_ace_fuid_abstract_size,
++	zfs_ace_fuid_mask_off,
++	zfs_ace_fuid_data
++};
++
++/*
++ * The following three functions are provided for compatibility with
++ * older ZPL version in order to determine if the file use to have
++ * an external ACL and what version of ACL previously existed on the
++ * file.  Would really be nice to not need this, sigh.
++ */
++uint64_t
++zfs_external_acl(znode_t *zp)
++{
++	zfs_acl_phys_t acl_phys;
++	int error;
++
++	if (zp->z_is_sa)
++		return (0);
++
++	/*
++	 * Need to deal with a potential
++	 * race where zfs_sa_upgrade could cause
++	 * z_isa_sa to change.
++	 *
++	 * If the lookup fails then the state of z_is_sa should have
++	 * changed.
++	 */
++
++	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(ZTOZSB(zp)),
++	    &acl_phys, sizeof (acl_phys))) == 0)
++		return (acl_phys.z_acl_extern_obj);
++	else {
++		/*
++		 * after upgrade the SA_ZPL_ZNODE_ACL should have been
++		 * removed
++		 */
++		VERIFY(zp->z_is_sa && error == ENOENT);
++		return (0);
++	}
++}
++
++/*
++ * Determine size of ACL in bytes
++ *
++ * This is more complicated than it should be since we have to deal
++ * with old external ACLs.
++ */
++static int
++zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,
++    zfs_acl_phys_t *aclphys)
++{
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	uint64_t acl_count;
++	int size;
++	int error;
++
++	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
++	if (zp->z_is_sa) {
++		if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zsb),
++		    &size)) != 0)
++			return (error);
++		*aclsize = size;
++		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zsb),
++		    &acl_count, sizeof (acl_count))) != 0)
++			return (error);
++		*aclcount = acl_count;
++	} else {
++		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zsb),
++		    aclphys, sizeof (*aclphys))) != 0)
++			return (error);
++
++		if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) {
++			*aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size);
++			*aclcount = aclphys->z_acl_size;
++		} else {
++			*aclsize = aclphys->z_acl_size;
++			*aclcount = aclphys->z_acl_count;
++		}
++	}
++	return (0);
++}
++
++int
++zfs_znode_acl_version(znode_t *zp)
++{
++	zfs_acl_phys_t acl_phys;
++
++	if (zp->z_is_sa)
++		return (ZFS_ACL_VERSION_FUID);
++	else {
++		int error;
++
++		/*
++		 * Need to deal with a potential
++		 * race where zfs_sa_upgrade could cause
++		 * z_isa_sa to change.
++		 *
++		 * If the lookup fails then the state of z_is_sa should have
++		 * changed.
++		 */
++		if ((error = sa_lookup(zp->z_sa_hdl,
++		    SA_ZPL_ZNODE_ACL(ZTOZSB(zp)),
++		    &acl_phys, sizeof (acl_phys))) == 0)
++			return (acl_phys.z_acl_version);
++		else {
++			/*
++			 * After upgrade SA_ZPL_ZNODE_ACL should have
++			 * been removed.
++			 */
++			VERIFY(zp->z_is_sa && error == ENOENT);
++			return (ZFS_ACL_VERSION_FUID);
++		}
++	}
++}
++
++static int
++zfs_acl_version(int version)
++{
++	if (version < ZPL_VERSION_FUID)
++		return (ZFS_ACL_VERSION_INITIAL);
++	else
++		return (ZFS_ACL_VERSION_FUID);
++}
++
++static int
++zfs_acl_version_zp(znode_t *zp)
++{
++	return (zfs_acl_version(ZTOZSB(zp)->z_version));
++}
++
++zfs_acl_t *
++zfs_acl_alloc(int vers)
++{
++	zfs_acl_t *aclp;
++
++	aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_PUSHPAGE);
++	list_create(&aclp->z_acl, sizeof (zfs_acl_node_t),
++	    offsetof(zfs_acl_node_t, z_next));
++	aclp->z_version = vers;
++	if (vers == ZFS_ACL_VERSION_FUID)
++		aclp->z_ops = &zfs_acl_fuid_ops;
++	else
++		aclp->z_ops = &zfs_acl_v0_ops;
++	return (aclp);
++}
++
++zfs_acl_node_t *
++zfs_acl_node_alloc(size_t bytes)
++{
++	zfs_acl_node_t *aclnode;
++
++	aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_PUSHPAGE);
++	if (bytes) {
++		aclnode->z_acldata = kmem_alloc(bytes, KM_PUSHPAGE);
++		aclnode->z_allocdata = aclnode->z_acldata;
++		aclnode->z_allocsize = bytes;
++		aclnode->z_size = bytes;
++	}
++
++	return (aclnode);
++}
++
++static void
++zfs_acl_node_free(zfs_acl_node_t *aclnode)
++{
++	if (aclnode->z_allocsize)
++		kmem_free(aclnode->z_allocdata, aclnode->z_allocsize);
++	kmem_free(aclnode, sizeof (zfs_acl_node_t));
++}
++
++static void
++zfs_acl_release_nodes(zfs_acl_t *aclp)
++{
++	zfs_acl_node_t *aclnode;
++
++	while ((aclnode = list_head(&aclp->z_acl))) {
++		list_remove(&aclp->z_acl, aclnode);
++		zfs_acl_node_free(aclnode);
++	}
++	aclp->z_acl_count = 0;
++	aclp->z_acl_bytes = 0;
++}
++
++void
++zfs_acl_free(zfs_acl_t *aclp)
++{
++	zfs_acl_release_nodes(aclp);
++	list_destroy(&aclp->z_acl);
++	kmem_free(aclp, sizeof (zfs_acl_t));
++}
++
++static boolean_t
++zfs_acl_valid_ace_type(uint_t type, uint_t flags)
++{
++	uint16_t entry_type;
++
++	switch (type) {
++	case ALLOW:
++	case DENY:
++	case ACE_SYSTEM_AUDIT_ACE_TYPE:
++	case ACE_SYSTEM_ALARM_ACE_TYPE:
++		entry_type = flags & ACE_TYPE_FLAGS;
++		return (entry_type == ACE_OWNER ||
++		    entry_type == OWNING_GROUP ||
++		    entry_type == ACE_EVERYONE || entry_type == 0 ||
++		    entry_type == ACE_IDENTIFIER_GROUP);
++	default:
++		if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE)
++			return (B_TRUE);
++	}
++	return (B_FALSE);
++}
++
++static boolean_t
++zfs_ace_valid(umode_t obj_mode, zfs_acl_t *aclp, uint16_t type, uint16_t iflags)
++{
++	/*
++	 * first check type of entry
++	 */
++
++	if (!zfs_acl_valid_ace_type(type, iflags))
++		return (B_FALSE);
++
++	switch (type) {
++	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
++	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
++	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
++	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
++		if (aclp->z_version < ZFS_ACL_VERSION_FUID)
++			return (B_FALSE);
++		aclp->z_hints |= ZFS_ACL_OBJ_ACE;
++	}
++
++	/*
++	 * next check inheritance level flags
++	 */
++
++	if (S_ISDIR(obj_mode) &&
++	    (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
++		aclp->z_hints |= ZFS_INHERIT_ACE;
++
++	if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
++		if ((iflags & (ACE_FILE_INHERIT_ACE|
++		    ACE_DIRECTORY_INHERIT_ACE)) == 0) {
++			return (B_FALSE);
++		}
++	}
++
++	return (B_TRUE);
++}
++
++static void *
++zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
++    uint32_t *access_mask, uint16_t *iflags, uint16_t *type)
++{
++	zfs_acl_node_t *aclnode;
++
++	ASSERT(aclp);
++
++	if (start == NULL) {
++		aclnode = list_head(&aclp->z_acl);
++		if (aclnode == NULL)
++			return (NULL);
++
++		aclp->z_next_ace = aclnode->z_acldata;
++		aclp->z_curr_node = aclnode;
++		aclnode->z_ace_idx = 0;
++	}
++
++	aclnode = aclp->z_curr_node;
++
++	if (aclnode == NULL)
++		return (NULL);
++
++	if (aclnode->z_ace_idx >= aclnode->z_ace_count) {
++		aclnode = list_next(&aclp->z_acl, aclnode);
++		if (aclnode == NULL)
++			return (NULL);
++		else {
++			aclp->z_curr_node = aclnode;
++			aclnode->z_ace_idx = 0;
++			aclp->z_next_ace = aclnode->z_acldata;
++		}
++	}
++
++	if (aclnode->z_ace_idx < aclnode->z_ace_count) {
++		void *acep = aclp->z_next_ace;
++		size_t ace_size;
++
++		/*
++		 * Make sure we don't overstep our bounds
++		 */
++		ace_size = aclp->z_ops->ace_size(acep);
++
++		if (((caddr_t)acep + ace_size) >
++		    ((caddr_t)aclnode->z_acldata + aclnode->z_size)) {
++			return (NULL);
++		}
++
++		*iflags = aclp->z_ops->ace_flags_get(acep);
++		*type = aclp->z_ops->ace_type_get(acep);
++		*access_mask = aclp->z_ops->ace_mask_get(acep);
++		*who = aclp->z_ops->ace_who_get(acep);
++		aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;
++		aclnode->z_ace_idx++;
++
++		return ((void *)acep);
++	}
++	return (NULL);
++}
++
++/*ARGSUSED*/
++static uint64_t
++zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
++    uint16_t *flags, uint16_t *type, uint32_t *mask)
++{
++	zfs_acl_t *aclp = datap;
++	zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
++	uint64_t who;
++
++	acep = zfs_acl_next_ace(aclp, acep, &who, mask,
++	    flags, type);
++	return ((uint64_t)(uintptr_t)acep);
++}
++
++/*
++ * Copy ACE to internal ZFS format.
++ * While processing the ACL each ACE will be validated for correctness.
++ * ACE FUIDs will be created later.
++ */
++int
++zfs_copy_ace_2_fuid(zfs_sb_t *zsb, umode_t obj_mode, zfs_acl_t *aclp,
++    void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,
++    zfs_fuid_info_t **fuidp, cred_t *cr)
++{
++	int i;
++	uint16_t entry_type;
++	zfs_ace_t *aceptr = z_acl;
++	ace_t *acep = datap;
++	zfs_object_ace_t *zobjacep;
++	ace_object_t *aceobjp;
++
++	for (i = 0; i != aclcnt; i++) {
++		aceptr->z_hdr.z_access_mask = acep->a_access_mask;
++		aceptr->z_hdr.z_flags = acep->a_flags;
++		aceptr->z_hdr.z_type = acep->a_type;
++		entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
++		if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
++		    entry_type != ACE_EVERYONE) {
++			aceptr->z_fuid = zfs_fuid_create(zsb, acep->a_who,
++			    cr, (entry_type == 0) ?
++			    ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
++		}
++
++		/*
++		 * Make sure ACE is valid
++		 */
++		if (zfs_ace_valid(obj_mode, aclp, aceptr->z_hdr.z_type,
++		    aceptr->z_hdr.z_flags) != B_TRUE)
++			return (EINVAL);
++
++		switch (acep->a_type) {
++		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
++		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
++		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
++		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
++			zobjacep = (zfs_object_ace_t *)aceptr;
++			aceobjp = (ace_object_t *)acep;
++
++			bcopy(aceobjp->a_obj_type, zobjacep->z_object_type,
++			    sizeof (aceobjp->a_obj_type));
++			bcopy(aceobjp->a_inherit_obj_type,
++			    zobjacep->z_inherit_type,
++			    sizeof (aceobjp->a_inherit_obj_type));
++			acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t));
++			break;
++		default:
++			acep = (ace_t *)((caddr_t)acep + sizeof (ace_t));
++		}
++
++		aceptr = (zfs_ace_t *)((caddr_t)aceptr +
++		    aclp->z_ops->ace_size(aceptr));
++	}
++
++	*size = (caddr_t)aceptr - (caddr_t)z_acl;
++
++	return (0);
++}
++
++/*
++ * Copy ZFS ACEs to fixed size ace_t layout
++ */
++static void
++zfs_copy_fuid_2_ace(zfs_sb_t *zsb, zfs_acl_t *aclp, cred_t *cr,
++    void *datap, int filter)
++{
++	uint64_t who;
++	uint32_t access_mask;
++	uint16_t iflags, type;
++	zfs_ace_hdr_t *zacep = NULL;
++	ace_t *acep = datap;
++	ace_object_t *objacep;
++	zfs_object_ace_t *zobjacep;
++	size_t ace_size;
++	uint16_t entry_type;
++
++	while ((zacep = zfs_acl_next_ace(aclp, zacep,
++	    &who, &access_mask, &iflags, &type))) {
++
++		switch (type) {
++		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
++		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
++		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
++		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
++			if (filter) {
++				continue;
++			}
++			zobjacep = (zfs_object_ace_t *)zacep;
++			objacep = (ace_object_t *)acep;
++			bcopy(zobjacep->z_object_type,
++			    objacep->a_obj_type,
++			    sizeof (zobjacep->z_object_type));
++			bcopy(zobjacep->z_inherit_type,
++			    objacep->a_inherit_obj_type,
++			    sizeof (zobjacep->z_inherit_type));
++			ace_size = sizeof (ace_object_t);
++			break;
++		default:
++			ace_size = sizeof (ace_t);
++			break;
++		}
++
++		entry_type = (iflags & ACE_TYPE_FLAGS);
++		if ((entry_type != ACE_OWNER &&
++		    entry_type != OWNING_GROUP &&
++		    entry_type != ACE_EVERYONE)) {
++			acep->a_who = zfs_fuid_map_id(zsb, who,
++			    cr, (entry_type & ACE_IDENTIFIER_GROUP) ?
++			    ZFS_ACE_GROUP : ZFS_ACE_USER);
++		} else {
++			acep->a_who = (uid_t)(int64_t)who;
++		}
++		acep->a_access_mask = access_mask;
++		acep->a_flags = iflags;
++		acep->a_type = type;
++		acep = (ace_t *)((caddr_t)acep + ace_size);
++	}
++}
++
++static int
++zfs_copy_ace_2_oldace(umode_t obj_mode, zfs_acl_t *aclp, ace_t *acep,
++    zfs_oldace_t *z_acl, int aclcnt, size_t *size)
++{
++	int i;
++	zfs_oldace_t *aceptr = z_acl;
++
++	for (i = 0; i != aclcnt; i++, aceptr++) {
++		aceptr->z_access_mask = acep[i].a_access_mask;
++		aceptr->z_type = acep[i].a_type;
++		aceptr->z_flags = acep[i].a_flags;
++		aceptr->z_fuid = acep[i].a_who;
++		/*
++		 * Make sure ACE is valid
++		 */
++		if (zfs_ace_valid(obj_mode, aclp, aceptr->z_type,
++		    aceptr->z_flags) != B_TRUE)
++			return (EINVAL);
++	}
++	*size = (caddr_t)aceptr - (caddr_t)z_acl;
++	return (0);
++}
++
++/*
++ * convert old ACL format to new
++ */
++void
++zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)
++{
++	zfs_oldace_t *oldaclp;
++	int i;
++	uint16_t type, iflags;
++	uint32_t access_mask;
++	uint64_t who;
++	void *cookie = NULL;
++	zfs_acl_node_t *newaclnode;
++
++	ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL);
++	/*
++	 * First create the ACE in a contiguous piece of memory
++	 * for zfs_copy_ace_2_fuid().
++	 *
++	 * We only convert an ACL once, so this won't happen
++	 * everytime.
++	 */
++	oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count,
++	    KM_SLEEP);
++	i = 0;
++	while ((cookie = zfs_acl_next_ace(aclp, cookie, &who,
++	    &access_mask, &iflags, &type))) {
++		oldaclp[i].z_flags = iflags;
++		oldaclp[i].z_type = type;
++		oldaclp[i].z_fuid = who;
++		oldaclp[i++].z_access_mask = access_mask;
++	}
++
++	newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *
++	    sizeof (zfs_object_ace_t));
++	aclp->z_ops = &zfs_acl_fuid_ops;
++	VERIFY(zfs_copy_ace_2_fuid(ZTOZSB(zp), ZTOI(zp)->i_mode,
++	    aclp, oldaclp, newaclnode->z_acldata, aclp->z_acl_count,
++	    &newaclnode->z_size, NULL, cr) == 0);
++	newaclnode->z_ace_count = aclp->z_acl_count;
++	aclp->z_version = ZFS_ACL_VERSION;
++	kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));
++
++	/*
++	 * Release all previous ACL nodes
++	 */
++
++	zfs_acl_release_nodes(aclp);
++
++	list_insert_head(&aclp->z_acl, newaclnode);
++
++	aclp->z_acl_bytes = newaclnode->z_size;
++	aclp->z_acl_count = newaclnode->z_ace_count;
++
++}
++
++/*
++ * Convert unix access mask to v4 access mask
++ */
++static uint32_t
++zfs_unix_to_v4(uint32_t access_mask)
++{
++	uint32_t new_mask = 0;
++
++	if (access_mask & S_IXOTH)
++		new_mask |= ACE_EXECUTE;
++	if (access_mask & S_IWOTH)
++		new_mask |= ACE_WRITE_DATA;
++	if (access_mask & S_IROTH)
++		new_mask |= ACE_READ_DATA;
++	return (new_mask);
++}
++
++static void
++zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
++    uint16_t access_type, uint64_t fuid, uint16_t entry_type)
++{
++	uint16_t type = entry_type & ACE_TYPE_FLAGS;
++
++	aclp->z_ops->ace_mask_set(acep, access_mask);
++	aclp->z_ops->ace_type_set(acep, access_type);
++	aclp->z_ops->ace_flags_set(acep, entry_type);
++	if ((type != ACE_OWNER && type != OWNING_GROUP &&
++	    type != ACE_EVERYONE))
++		aclp->z_ops->ace_who_set(acep, fuid);
++}
++
++/*
++ * Determine mode of file based on ACL.
++ * Also, create FUIDs for any User/Group ACEs
++ */
++uint64_t
++zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp,
++    uint64_t *pflags, uint64_t fuid, uint64_t fgid)
++{
++	int		entry_type;
++	mode_t		mode;
++	mode_t		seen = 0;
++	zfs_ace_hdr_t 	*acep = NULL;
++	uint64_t	who;
++	uint16_t	iflags, type;
++	uint32_t	access_mask;
++	boolean_t	an_exec_denied = B_FALSE;
++
++	mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
++
++	while ((acep = zfs_acl_next_ace(aclp, acep, &who,
++	    &access_mask, &iflags, &type))) {
++
++		if (!zfs_acl_valid_ace_type(type, iflags))
++			continue;
++
++		entry_type = (iflags & ACE_TYPE_FLAGS);
++
++		/*
++		 * Skip over owner@, group@ or everyone@ inherit only ACEs
++		 */
++		if ((iflags & ACE_INHERIT_ONLY_ACE) &&
++		    (entry_type == ACE_OWNER || entry_type == ACE_EVERYONE ||
++		    entry_type == OWNING_GROUP))
++			continue;
++
++		if (entry_type == ACE_OWNER || (entry_type == 0 &&
++		    who == fuid)) {
++			if ((access_mask & ACE_READ_DATA) &&
++			    (!(seen & S_IRUSR))) {
++				seen |= S_IRUSR;
++				if (type == ALLOW) {
++					mode |= S_IRUSR;
++				}
++			}
++			if ((access_mask & ACE_WRITE_DATA) &&
++			    (!(seen & S_IWUSR))) {
++				seen |= S_IWUSR;
++				if (type == ALLOW) {
++					mode |= S_IWUSR;
++				}
++			}
++			if ((access_mask & ACE_EXECUTE) &&
++			    (!(seen & S_IXUSR))) {
++				seen |= S_IXUSR;
++				if (type == ALLOW) {
++					mode |= S_IXUSR;
++				}
++			}
++		} else if (entry_type == OWNING_GROUP ||
++		    (entry_type == ACE_IDENTIFIER_GROUP && who == fgid)) {
++			if ((access_mask & ACE_READ_DATA) &&
++			    (!(seen & S_IRGRP))) {
++				seen |= S_IRGRP;
++				if (type == ALLOW) {
++					mode |= S_IRGRP;
++				}
++			}
++			if ((access_mask & ACE_WRITE_DATA) &&
++			    (!(seen & S_IWGRP))) {
++				seen |= S_IWGRP;
++				if (type == ALLOW) {
++					mode |= S_IWGRP;
++				}
++			}
++			if ((access_mask & ACE_EXECUTE) &&
++			    (!(seen & S_IXGRP))) {
++				seen |= S_IXGRP;
++				if (type == ALLOW) {
++					mode |= S_IXGRP;
++				}
++			}
++		} else if (entry_type == ACE_EVERYONE) {
++			if ((access_mask & ACE_READ_DATA)) {
++				if (!(seen & S_IRUSR)) {
++					seen |= S_IRUSR;
++					if (type == ALLOW) {
++						mode |= S_IRUSR;
++					}
++				}
++				if (!(seen & S_IRGRP)) {
++					seen |= S_IRGRP;
++					if (type == ALLOW) {
++						mode |= S_IRGRP;
++					}
++				}
++				if (!(seen & S_IROTH)) {
++					seen |= S_IROTH;
++					if (type == ALLOW) {
++						mode |= S_IROTH;
++					}
++				}
++			}
++			if ((access_mask & ACE_WRITE_DATA)) {
++				if (!(seen & S_IWUSR)) {
++					seen |= S_IWUSR;
++					if (type == ALLOW) {
++						mode |= S_IWUSR;
++					}
++				}
++				if (!(seen & S_IWGRP)) {
++					seen |= S_IWGRP;
++					if (type == ALLOW) {
++						mode |= S_IWGRP;
++					}
++				}
++				if (!(seen & S_IWOTH)) {
++					seen |= S_IWOTH;
++					if (type == ALLOW) {
++						mode |= S_IWOTH;
++					}
++				}
++			}
++			if ((access_mask & ACE_EXECUTE)) {
++				if (!(seen & S_IXUSR)) {
++					seen |= S_IXUSR;
++					if (type == ALLOW) {
++						mode |= S_IXUSR;
++					}
++				}
++				if (!(seen & S_IXGRP)) {
++					seen |= S_IXGRP;
++					if (type == ALLOW) {
++						mode |= S_IXGRP;
++					}
++				}
++				if (!(seen & S_IXOTH)) {
++					seen |= S_IXOTH;
++					if (type == ALLOW) {
++						mode |= S_IXOTH;
++					}
++				}
++			}
++		} else {
++			/*
++			 * Only care if this IDENTIFIER_GROUP or
++			 * USER ACE denies execute access to someone,
++			 * mode is not affected
++			 */
++			if ((access_mask & ACE_EXECUTE) && type == DENY)
++				an_exec_denied = B_TRUE;
++		}
++	}
++
++	/*
++	 * Failure to allow is effectively a deny, so execute permission
++	 * is denied if it was never mentioned or if we explicitly
++	 * weren't allowed it.
++	 */
++	if (!an_exec_denied &&
++	    ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
++	    (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
++		an_exec_denied = B_TRUE;
++
++	if (an_exec_denied)
++		*pflags &= ~ZFS_NO_EXECS_DENIED;
++	else
++		*pflags |= ZFS_NO_EXECS_DENIED;
++
++	return (mode);
++}
++
++/*
++ * Read an external acl object.  If the intent is to modify, always
++ * create a new acl and leave any cached acl in place.
++ */
++static int
++zfs_acl_node_read(znode_t *zp, boolean_t have_lock, zfs_acl_t **aclpp,
++    boolean_t will_modify)
++{
++	zfs_acl_t	*aclp;
++	int		aclsize;
++	int		acl_count;
++	zfs_acl_node_t	*aclnode;
++	zfs_acl_phys_t	znode_acl;
++	int		version;
++	int		error;
++	boolean_t	drop_lock = B_FALSE;
++
++	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
++
++	if (zp->z_acl_cached && !will_modify) {
++		*aclpp = zp->z_acl_cached;
++		return (0);
++	}
++
++	/*
++	 * close race where znode could be upgrade while trying to
++	 * read the znode attributes.
++	 *
++	 * But this could only happen if the file isn't already an SA
++	 * znode
++	 */
++	if (!zp->z_is_sa && !have_lock) {
++		mutex_enter(&zp->z_lock);
++		drop_lock = B_TRUE;
++	}
++	version = zfs_znode_acl_version(zp);
++
++	if ((error = zfs_acl_znode_info(zp, &aclsize,
++	    &acl_count, &znode_acl)) != 0) {
++		goto done;
++	}
++
++	aclp = zfs_acl_alloc(version);
++
++	aclp->z_acl_count = acl_count;
++	aclp->z_acl_bytes = aclsize;
++
++	aclnode = zfs_acl_node_alloc(aclsize);
++	aclnode->z_ace_count = aclp->z_acl_count;
++	aclnode->z_size = aclsize;
++
++	if (!zp->z_is_sa) {
++		if (znode_acl.z_acl_extern_obj) {
++			error = dmu_read(ZTOZSB(zp)->z_os,
++			    znode_acl.z_acl_extern_obj, 0, aclnode->z_size,
++			    aclnode->z_acldata, DMU_READ_PREFETCH);
++		} else {
++			bcopy(znode_acl.z_ace_data, aclnode->z_acldata,
++			    aclnode->z_size);
++		}
++	} else {
++		error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(ZTOZSB(zp)),
++		    aclnode->z_acldata, aclnode->z_size);
++	}
++
++	if (error != 0) {
++		zfs_acl_free(aclp);
++		zfs_acl_node_free(aclnode);
++		/* convert checksum errors into IO errors */
++		if (error == ECKSUM)
++			error = EIO;
++		goto done;
++	}
++
++	list_insert_head(&aclp->z_acl, aclnode);
++
++	*aclpp = aclp;
++	if (!will_modify)
++		zp->z_acl_cached = aclp;
++done:
++	if (drop_lock)
++		mutex_exit(&zp->z_lock);
++	return (error);
++}
++
++/*ARGSUSED*/
++void
++zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
++    boolean_t start, void *userdata)
++{
++	zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata;
++
++	if (start) {
++		cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl);
++	} else {
++		cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
++		    cb->cb_acl_node);
++	}
++	*dataptr = cb->cb_acl_node->z_acldata;
++	*length = cb->cb_acl_node->z_size;
++}
++
++int
++zfs_acl_chown_setattr(znode_t *zp)
++{
++	int error;
++	zfs_acl_t *aclp;
++
++	ASSERT(MUTEX_HELD(&zp->z_lock));
++	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
++
++	if ((error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE)) == 0)
++		zp->z_mode = zfs_mode_compute(zp->z_mode, aclp,
++		    &zp->z_pflags, zp->z_uid, zp->z_gid);
++	return (error);
++}
++
++static void
++acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1,
++    uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone)
++{
++	*deny1 = *deny2 = *allow0 = *group = 0;
++
++	if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH)))
++		*deny1 |= ACE_READ_DATA;
++	if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH)))
++		*deny1 |= ACE_WRITE_DATA;
++	if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH)))
++		*deny1 |= ACE_EXECUTE;
++
++	if (!(mode & S_IRGRP) && (mode & S_IROTH))
++		*deny2 = ACE_READ_DATA;
++	if (!(mode & S_IWGRP) && (mode & S_IWOTH))
++		*deny2 |= ACE_WRITE_DATA;
++	if (!(mode & S_IXGRP) && (mode & S_IXOTH))
++		*deny2 |= ACE_EXECUTE;
++
++	if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH)))
++		*allow0 |= ACE_READ_DATA;
++	if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH)))
++		*allow0 |= ACE_WRITE_DATA;
++	if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH)))
++		*allow0 |= ACE_EXECUTE;
++
++	*owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL|
++	    ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES|
++	    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE;
++	if (mode & S_IRUSR)
++		*owner |= ACE_READ_DATA;
++	if (mode & S_IWUSR)
++		*owner |= ACE_WRITE_DATA|ACE_APPEND_DATA;
++	if (mode & S_IXUSR)
++		*owner |= ACE_EXECUTE;
++
++	*group = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|
++	    ACE_SYNCHRONIZE;
++	if (mode & S_IRGRP)
++		*group |= ACE_READ_DATA;
++	if (mode & S_IWGRP)
++		*group |= ACE_WRITE_DATA|ACE_APPEND_DATA;
++	if (mode & S_IXGRP)
++		*group |= ACE_EXECUTE;
++
++	*everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|
++	    ACE_SYNCHRONIZE;
++	if (mode & S_IROTH)
++		*everyone |= ACE_READ_DATA;
++	if (mode & S_IWOTH)
++		*everyone |= ACE_WRITE_DATA|ACE_APPEND_DATA;
++	if (mode & S_IXOTH)
++		*everyone |= ACE_EXECUTE;
++}
++
++/*
++ * ace_trivial:
++ * determine whether an ace_t acl is trivial
++ *
++ * Trivialness implies that the acl is composed of only
++ * owner, group, everyone entries.  ACL can't
++ * have read_acl denied, and write_owner/write_acl/write_attributes
++ * can only be owner@ entry.
++ */
++static int
++ace_trivial_common(void *acep, int aclcnt,
++    uint64_t (*walk)(void *, uint64_t, int aclcnt,
++    uint16_t *, uint16_t *, uint32_t *))
++{
++	uint16_t flags;
++	uint32_t mask;
++	uint16_t type;
++	uint64_t cookie = 0;
++
++	while ((cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask))) {
++		switch (flags & ACE_TYPE_FLAGS) {
++		case ACE_OWNER:
++		case ACE_GROUP|ACE_IDENTIFIER_GROUP:
++		case ACE_EVERYONE:
++			break;
++		default:
++			return (1);
++		}
++
++		if (flags & (ACE_FILE_INHERIT_ACE|
++		    ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|
++		    ACE_INHERIT_ONLY_ACE))
++			return (1);
++
++		/*
++		 * Special check for some special bits
++		 *
++		 * Don't allow anybody to deny reading basic
++		 * attributes or a files ACL.
++		 */
++		if ((mask & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
++		    (type == ACE_ACCESS_DENIED_ACE_TYPE))
++			return (1);
++
++		/*
++		 * Delete permissions are never set by default
++		 */
++		if (mask & (ACE_DELETE|ACE_DELETE_CHILD))
++			return (1);
++		/*
++		 * only allow owner@ to have
++		 * write_acl/write_owner/write_attributes/write_xattr/
++		 */
++		if (type == ACE_ACCESS_ALLOWED_ACE_TYPE &&
++		    (!(flags & ACE_OWNER) && (mask &
++		    (ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_ATTRIBUTES|
++		    ACE_WRITE_NAMED_ATTRS))))
++			return (1);
++
++	}
++
++	return (0);
++}
++
++/*
++ * common code for setting ACLs.
++ *
++ * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
++ * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
++ * already checked the acl and knows whether to inherit.
++ */
++int
++zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
++{
++	int			error;
++	zfs_sb_t		*zsb = ZTOZSB(zp);
++	dmu_object_type_t	otype;
++	zfs_acl_locator_cb_t	locate = { 0 };
++	uint64_t		mode;
++	sa_bulk_attr_t		bulk[5];
++	uint64_t		ctime[2];
++	int			count = 0;
++
++	mode = zp->z_mode;
++
++	mode = zfs_mode_compute(mode, aclp, &zp->z_pflags,
++	    zp->z_uid, zp->z_gid);
++
++	zp->z_mode = mode;
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL,
++	    &mode, sizeof (mode));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
++	    &zp->z_pflags, sizeof (zp->z_pflags));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
++	    &ctime, sizeof (ctime));
++
++	if (zp->z_acl_cached) {
++		zfs_acl_free(zp->z_acl_cached);
++		zp->z_acl_cached = NULL;
++	}
++
++	/*
++	 * Upgrade needed?
++	 */
++	if (!zsb->z_use_fuids) {
++		otype = DMU_OT_OLDACL;
++	} else {
++		if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
++		    (zsb->z_version >= ZPL_VERSION_FUID))
++			zfs_acl_xform(zp, aclp, cr);
++		ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID);
++		otype = DMU_OT_ACL;
++	}
++
++	/*
++	 * Arrgh, we have to handle old on disk format
++	 * as well as newer (preferred) SA format.
++	 */
++
++	if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */
++		locate.cb_aclp = aclp;
++		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zsb),
++		    zfs_acl_data_locator, &locate, aclp->z_acl_bytes);
++		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zsb),
++		    NULL, &aclp->z_acl_count, sizeof (uint64_t));
++	} else { /* Painful legacy way */
++		zfs_acl_node_t *aclnode;
++		uint64_t off = 0;
++		zfs_acl_phys_t acl_phys;
++		uint64_t aoid;
++
++		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zsb),
++		    &acl_phys, sizeof (acl_phys))) != 0)
++			return (error);
++
++		aoid = acl_phys.z_acl_extern_obj;
++
++		if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
++			/*
++			 * If ACL was previously external and we are now
++			 * converting to new ACL format then release old
++			 * ACL object and create a new one.
++			 */
++			if (aoid &&
++			    aclp->z_version != acl_phys.z_acl_version) {
++				error = dmu_object_free(zsb->z_os, aoid, tx);
++				if (error)
++					return (error);
++				aoid = 0;
++			}
++			if (aoid == 0) {
++				aoid = dmu_object_alloc(zsb->z_os,
++				    otype, aclp->z_acl_bytes,
++				    otype == DMU_OT_ACL ?
++				    DMU_OT_SYSACL : DMU_OT_NONE,
++				    otype == DMU_OT_ACL ?
++				    DN_MAX_BONUSLEN : 0, tx);
++			} else {
++				(void) dmu_object_set_blocksize(zsb->z_os,
++				    aoid, aclp->z_acl_bytes, 0, tx);
++			}
++			acl_phys.z_acl_extern_obj = aoid;
++			for (aclnode = list_head(&aclp->z_acl); aclnode;
++			    aclnode = list_next(&aclp->z_acl, aclnode)) {
++				if (aclnode->z_ace_count == 0)
++					continue;
++				dmu_write(zsb->z_os, aoid, off,
++				    aclnode->z_size, aclnode->z_acldata, tx);
++				off += aclnode->z_size;
++			}
++		} else {
++			void *start = acl_phys.z_ace_data;
++			/*
++			 * Migrating back embedded?
++			 */
++			if (acl_phys.z_acl_extern_obj) {
++				error = dmu_object_free(zsb->z_os,
++				    acl_phys.z_acl_extern_obj, tx);
++				if (error)
++					return (error);
++				acl_phys.z_acl_extern_obj = 0;
++			}
++
++			for (aclnode = list_head(&aclp->z_acl); aclnode;
++			    aclnode = list_next(&aclp->z_acl, aclnode)) {
++				if (aclnode->z_ace_count == 0)
++					continue;
++				bcopy(aclnode->z_acldata, start,
++				    aclnode->z_size);
++				start = (caddr_t)start + aclnode->z_size;
++			}
++		}
++		/*
++		 * If Old version then swap count/bytes to match old
++		 * layout of znode_acl_phys_t.
++		 */
++		if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
++			acl_phys.z_acl_size = aclp->z_acl_count;
++			acl_phys.z_acl_count = aclp->z_acl_bytes;
++		} else {
++			acl_phys.z_acl_size = aclp->z_acl_bytes;
++			acl_phys.z_acl_count = aclp->z_acl_count;
++		}
++		acl_phys.z_acl_version = aclp->z_version;
++
++		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zsb), NULL,
++		    &acl_phys, sizeof (acl_phys));
++	}
++
++	/*
++	 * Replace ACL wide bits, but first clear them.
++	 */
++	zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS;
++
++	zp->z_pflags |= aclp->z_hints;
++
++	if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
++		zp->z_pflags |= ZFS_ACL_TRIVIAL;
++
++	zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime, B_TRUE);
++	return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
++}
++
++static void
++zfs_acl_chmod(zfs_sb_t *zsb, uint64_t mode, zfs_acl_t *aclp)
++{
++	void		*acep = NULL;
++	uint64_t	who;
++	int		new_count, new_bytes;
++	int		ace_size;
++	int		entry_type;
++	uint16_t	iflags, type;
++	uint32_t	access_mask;
++	zfs_acl_node_t	*newnode;
++	size_t		abstract_size = aclp->z_ops->ace_abstract_size();
++	void		*zacep;
++	uint32_t	owner, group, everyone;
++	uint32_t	deny1, deny2, allow0;
++
++	new_count = new_bytes = 0;
++
++	acl_trivial_access_masks((mode_t)mode, &allow0, &deny1, &deny2,
++	    &owner, &group, &everyone);
++
++	newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);
++
++	zacep = newnode->z_acldata;
++	if (allow0) {
++		zfs_set_ace(aclp, zacep, allow0, ALLOW, -1, ACE_OWNER);
++		zacep = (void *)((uintptr_t)zacep + abstract_size);
++		new_count++;
++		new_bytes += abstract_size;
++	} if (deny1) {
++		zfs_set_ace(aclp, zacep, deny1, DENY, -1, ACE_OWNER);
++		zacep = (void *)((uintptr_t)zacep + abstract_size);
++		new_count++;
++		new_bytes += abstract_size;
++	}
++	if (deny2) {
++		zfs_set_ace(aclp, zacep, deny2, DENY, -1, OWNING_GROUP);
++		zacep = (void *)((uintptr_t)zacep + abstract_size);
++		new_count++;
++		new_bytes += abstract_size;
++	}
++
++	while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
++	    &iflags, &type))) {
++		uint16_t inherit_flags;
++
++		entry_type = (iflags & ACE_TYPE_FLAGS);
++		inherit_flags = (iflags & ALL_INHERIT);
++
++		if ((entry_type == ACE_OWNER || entry_type == ACE_EVERYONE ||
++		    (entry_type == OWNING_GROUP)) &&
++		    ((inherit_flags & ACE_INHERIT_ONLY_ACE) == 0)) {
++			continue;
++		}
++
++		if ((type != ALLOW && type != DENY) ||
++		    (inherit_flags & ACE_INHERIT_ONLY_ACE)) {
++			if (inherit_flags)
++				aclp->z_hints |= ZFS_INHERIT_ACE;
++			switch (type) {
++			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
++			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
++			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
++			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
++				aclp->z_hints |= ZFS_ACL_OBJ_ACE;
++				break;
++			}
++		} else {
++
++			/*
++			 * Limit permissions to be no greater than
++			 * group permissions
++			 */
++			if (zsb->z_acl_inherit == ZFS_ACL_RESTRICTED) {
++				if (!(mode & S_IRGRP))
++					access_mask &= ~ACE_READ_DATA;
++				if (!(mode & S_IWGRP))
++					access_mask &=
++					    ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
++				if (!(mode & S_IXGRP))
++					access_mask &= ~ACE_EXECUTE;
++				access_mask &=
++				    ~(ACE_WRITE_OWNER|ACE_WRITE_ACL|
++				    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS);
++			}
++		}
++		zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);
++		ace_size = aclp->z_ops->ace_size(acep);
++		zacep = (void *)((uintptr_t)zacep + ace_size);
++		new_count++;
++		new_bytes += ace_size;
++	}
++	zfs_set_ace(aclp, zacep, owner, 0, -1, ACE_OWNER);
++	zacep = (void *)((uintptr_t)zacep + abstract_size);
++	zfs_set_ace(aclp, zacep, group, 0, -1, OWNING_GROUP);
++	zacep = (void *)((uintptr_t)zacep + abstract_size);
++	zfs_set_ace(aclp, zacep, everyone, 0, -1, ACE_EVERYONE);
++
++	new_count += 3;
++	new_bytes += abstract_size * 3;
++	zfs_acl_release_nodes(aclp);
++	aclp->z_acl_count = new_count;
++	aclp->z_acl_bytes = new_bytes;
++	newnode->z_ace_count = new_count;
++	newnode->z_size = new_bytes;
++	list_insert_tail(&aclp->z_acl, newnode);
++}
++
++void
++zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
++{
++	mutex_enter(&zp->z_acl_lock);
++	mutex_enter(&zp->z_lock);
++	*aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
++	(*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
++	zfs_acl_chmod(ZTOZSB(zp), mode, *aclp);
++	mutex_exit(&zp->z_lock);
++	mutex_exit(&zp->z_acl_lock);
++	ASSERT(*aclp);
++}
++
++/*
++ * strip off write_owner and write_acl
++ */
++static void
++zfs_restricted_update(zfs_sb_t *zsb, zfs_acl_t *aclp, void *acep)
++{
++	uint32_t mask = aclp->z_ops->ace_mask_get(acep);
++
++	if ((zsb->z_acl_inherit == ZFS_ACL_RESTRICTED) &&
++	    (aclp->z_ops->ace_type_get(acep) == ALLOW)) {
++		mask &= ~RESTRICTED_CLEAR;
++		aclp->z_ops->ace_mask_set(acep, mask);
++	}
++}
++
++/*
++ * Should ACE be inherited?
++ */
++static int
++zfs_ace_can_use(umode_t obj_mode, uint16_t acep_flags)
++{
++	int	iflags = (acep_flags & 0xf);
++
++	if (S_ISDIR(obj_mode) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
++		return (1);
++	else if (iflags & ACE_FILE_INHERIT_ACE)
++		return (!(S_ISDIR(obj_mode) &&
++		    (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
++	return (0);
++}
++
++/*
++ * inherit inheritable ACEs from parent
++ */
++static zfs_acl_t *
++zfs_acl_inherit(zfs_sb_t *zsb, umode_t obj_mode, zfs_acl_t *paclp,
++    uint64_t mode, boolean_t *need_chmod)
++{
++	void		*pacep;
++	void		*acep;
++	zfs_acl_node_t  *aclnode;
++	zfs_acl_t	*aclp = NULL;
++	uint64_t	who;
++	uint32_t	access_mask;
++	uint16_t	iflags, newflags, type;
++	size_t		ace_size;
++	void		*data1, *data2;
++	size_t		data1sz, data2sz;
++	boolean_t	vdir = S_ISDIR(obj_mode);
++	boolean_t	vreg = S_ISREG(obj_mode);
++	boolean_t	passthrough, passthrough_x, noallow;
++
++	passthrough_x =
++	    zsb->z_acl_inherit == ZFS_ACL_PASSTHROUGH_X;
++	passthrough = passthrough_x ||
++	    zsb->z_acl_inherit == ZFS_ACL_PASSTHROUGH;
++	noallow =
++	    zsb->z_acl_inherit == ZFS_ACL_NOALLOW;
++
++	*need_chmod = B_TRUE;
++	pacep = NULL;
++	aclp = zfs_acl_alloc(paclp->z_version);
++	if (zsb->z_acl_inherit == ZFS_ACL_DISCARD || S_ISLNK(obj_mode))
++		return (aclp);
++	while ((pacep = zfs_acl_next_ace(paclp, pacep, &who,
++	    &access_mask, &iflags, &type))) {
++
++		/*
++		 * don't inherit bogus ACEs
++		 */
++		if (!zfs_acl_valid_ace_type(type, iflags))
++			continue;
++
++		if (noallow && type == ALLOW)
++			continue;
++
++		ace_size = aclp->z_ops->ace_size(pacep);
++
++		if (!zfs_ace_can_use(obj_mode, iflags))
++			continue;
++
++		/*
++		 * If owner@, group@, or everyone@ inheritable
++		 * then zfs_acl_chmod() isn't needed.
++		 */
++		if (passthrough &&
++		    ((iflags & (ACE_OWNER|ACE_EVERYONE)) ||
++		    ((iflags & OWNING_GROUP) ==
++		    OWNING_GROUP)) && (vreg || (vdir && (iflags &
++		    ACE_DIRECTORY_INHERIT_ACE)))) {
++			*need_chmod = B_FALSE;
++		}
++
++		if (!vdir && passthrough_x &&
++		    ((mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) {
++			access_mask &= ~ACE_EXECUTE;
++		}
++
++		aclnode = zfs_acl_node_alloc(ace_size);
++		list_insert_tail(&aclp->z_acl, aclnode);
++		acep = aclnode->z_acldata;
++
++		zfs_set_ace(aclp, acep, access_mask, type,
++		    who, iflags|ACE_INHERITED_ACE);
++
++		/*
++		 * Copy special opaque data if any
++		 */
++		if ((data1sz = paclp->z_ops->ace_data(pacep, &data1)) != 0) {
++			VERIFY((data2sz = aclp->z_ops->ace_data(acep,
++			    &data2)) == data1sz);
++			bcopy(data1, data2, data2sz);
++		}
++
++		aclp->z_acl_count++;
++		aclnode->z_ace_count++;
++		aclp->z_acl_bytes += aclnode->z_size;
++		newflags = aclp->z_ops->ace_flags_get(acep);
++
++		if (vdir)
++			aclp->z_hints |= ZFS_INHERIT_ACE;
++
++		if ((iflags & ACE_NO_PROPAGATE_INHERIT_ACE) || !vdir) {
++			newflags &= ~ALL_INHERIT;
++			aclp->z_ops->ace_flags_set(acep,
++			    newflags|ACE_INHERITED_ACE);
++			zfs_restricted_update(zsb, aclp, acep);
++			continue;
++		}
++
++		ASSERT(vdir);
++
++		/*
++		 * If only FILE_INHERIT is set then turn on
++		 * inherit_only
++		 */
++		if ((iflags & (ACE_FILE_INHERIT_ACE |
++		    ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) {
++			newflags |= ACE_INHERIT_ONLY_ACE;
++			aclp->z_ops->ace_flags_set(acep,
++			    newflags|ACE_INHERITED_ACE);
++		} else {
++			newflags &= ~ACE_INHERIT_ONLY_ACE;
++			aclp->z_ops->ace_flags_set(acep,
++			    newflags|ACE_INHERITED_ACE);
++		}
++	}
++	return (aclp);
++}
++
++/*
++ * Create file system object initial permissions
++ * including inheritable ACEs.
++ */
++int
++zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
++    vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
++{
++	int		error;
++	zfs_sb_t	*zsb = ZTOZSB(dzp);
++	zfs_acl_t	*paclp;
++#ifdef HAVE_KSID
++	gid_t		gid;
++#endif /* HAVE_KSID */
++	boolean_t	need_chmod = B_TRUE;
++	boolean_t	inherited = B_FALSE;
++
++	bzero(acl_ids, sizeof (zfs_acl_ids_t));
++	acl_ids->z_mode = vap->va_mode;
++
++	if (vsecp)
++		if ((error = zfs_vsec_2_aclp(zsb, vap->va_mode, vsecp,
++		    cr, &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
++			return (error);
++
++	acl_ids->z_fuid = vap->va_uid;
++	acl_ids->z_fgid = vap->va_gid;
++#ifdef HAVE_KSID
++	/*
++	 * Determine uid and gid.
++	 */
++	if ((flag & IS_ROOT_NODE) || zsb->z_replay ||
++	    ((flag & IS_XATTR) && (S_ISDIR(vap->va_mode)))) {
++		acl_ids->z_fuid = zfs_fuid_create(zsb, (uint64_t)vap->va_uid,
++		    cr, ZFS_OWNER, &acl_ids->z_fuidp);
++		acl_ids->z_fgid = zfs_fuid_create(zsb, (uint64_t)vap->va_gid,
++		    cr, ZFS_GROUP, &acl_ids->z_fuidp);
++		gid = vap->va_gid;
++	} else {
++		acl_ids->z_fuid = zfs_fuid_create_cred(zsb, ZFS_OWNER,
++		    cr, &acl_ids->z_fuidp);
++		acl_ids->z_fgid = 0;
++		if (vap->va_mask & AT_GID)  {
++			acl_ids->z_fgid = zfs_fuid_create(zsb,
++			    (uint64_t)vap->va_gid,
++			    cr, ZFS_GROUP, &acl_ids->z_fuidp);
++			gid = vap->va_gid;
++			if (acl_ids->z_fgid != dzp->z_gid &&
++			    !groupmember(vap->va_gid, cr) &&
++			    secpolicy_vnode_create_gid(cr) != 0)
++				acl_ids->z_fgid = 0;
++		}
++		if (acl_ids->z_fgid == 0) {
++			if (dzp->z_mode & S_ISGID) {
++				char		*domain;
++				uint32_t	rid;
++
++				acl_ids->z_fgid = dzp->z_gid;
++				gid = zfs_fuid_map_id(zsb, acl_ids->z_fgid,
++				    cr, ZFS_GROUP);
++
++				if (zsb->z_use_fuids &&
++				    IS_EPHEMERAL(acl_ids->z_fgid)) {
++					domain = zfs_fuid_idx_domain(
++					    &zsb->z_fuid_idx,
++					    FUID_INDEX(acl_ids->z_fgid));
++					rid = FUID_RID(acl_ids->z_fgid);
++					zfs_fuid_node_add(&acl_ids->z_fuidp,
++					    domain, rid,
++					    FUID_INDEX(acl_ids->z_fgid),
++					    acl_ids->z_fgid, ZFS_GROUP);
++				}
++			} else {
++				acl_ids->z_fgid = zfs_fuid_create_cred(zsb,
++				    ZFS_GROUP, cr, &acl_ids->z_fuidp);
++				gid = crgetgid(cr);
++			}
++		}
++	}
++#endif /* HAVE_KSID */
++
++	/*
++	 * If we're creating a directory, and the parent directory has the
++	 * set-GID bit set, set in on the new directory.
++	 * Otherwise, if the user is neither privileged nor a member of the
++	 * file's new group, clear the file's set-GID bit.
++	 */
++
++	if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) &&
++	    (S_ISDIR(vap->va_mode))) {
++		acl_ids->z_mode |= S_ISGID;
++	} else {
++		if ((acl_ids->z_mode & S_ISGID) &&
++		    secpolicy_vnode_setids_setgids(cr, gid) != 0)
++			acl_ids->z_mode &= ~S_ISGID;
++	}
++
++	if (acl_ids->z_aclp == NULL) {
++		mutex_enter(&dzp->z_acl_lock);
++		mutex_enter(&dzp->z_lock);
++		if (!(flag & IS_ROOT_NODE) && (S_ISDIR(ZTOI(dzp)->i_mode) &&
++		    (dzp->z_pflags & ZFS_INHERIT_ACE)) &&
++		    !(dzp->z_pflags & ZFS_XATTR)) {
++			VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE,
++			    &paclp, B_FALSE));
++			acl_ids->z_aclp = zfs_acl_inherit(zsb,
++			    vap->va_mode, paclp, acl_ids->z_mode, &need_chmod);
++			inherited = B_TRUE;
++		} else {
++			acl_ids->z_aclp =
++			    zfs_acl_alloc(zfs_acl_version_zp(dzp));
++			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
++		}
++		mutex_exit(&dzp->z_lock);
++		mutex_exit(&dzp->z_acl_lock);
++		if (need_chmod) {
++			acl_ids->z_aclp->z_hints |= S_ISDIR(vap->va_mode) ?
++			    ZFS_ACL_AUTO_INHERIT : 0;
++			zfs_acl_chmod(zsb, acl_ids->z_mode, acl_ids->z_aclp);
++		}
++	}
++
++	if (inherited || vsecp) {
++		acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode,
++		    acl_ids->z_aclp, &acl_ids->z_aclp->z_hints,
++		    acl_ids->z_fuid, acl_ids->z_fgid);
++		if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0)
++			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
++	}
++
++	return (0);
++}
++
++/*
++ * Free ACL and fuid_infop, but not the acl_ids structure
++ */
++void
++zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
++{
++	if (acl_ids->z_aclp)
++		zfs_acl_free(acl_ids->z_aclp);
++	if (acl_ids->z_fuidp)
++		zfs_fuid_info_free(acl_ids->z_fuidp);
++	acl_ids->z_aclp = NULL;
++	acl_ids->z_fuidp = NULL;
++}
++
++boolean_t
++zfs_acl_ids_overquota(zfs_sb_t *zsb, zfs_acl_ids_t *acl_ids)
++{
++	return (zfs_fuid_overquota(zsb, B_FALSE, acl_ids->z_fuid) ||
++	    zfs_fuid_overquota(zsb, B_TRUE, acl_ids->z_fgid));
++}
++
++/*
++ * Retrieve a files ACL
++ */
++int
++zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
++{
++	zfs_acl_t	*aclp;
++	ulong_t		mask;
++	int		error;
++	int 		count = 0;
++	int		largeace = 0;
++
++	mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT |
++	    VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
++
++	if (mask == 0)
++		return (ENOSYS);
++
++	if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr)))
++		return (error);
++
++	mutex_enter(&zp->z_acl_lock);
++
++	error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
++	if (error != 0) {
++		mutex_exit(&zp->z_acl_lock);
++		return (error);
++	}
++
++	/*
++	 * Scan ACL to determine number of ACEs
++	 */
++	if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) {
++		void *zacep = NULL;
++		uint64_t who;
++		uint32_t access_mask;
++		uint16_t type, iflags;
++
++		while ((zacep = zfs_acl_next_ace(aclp, zacep,
++		    &who, &access_mask, &iflags, &type))) {
++			switch (type) {
++			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
++			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
++			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
++			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
++				largeace++;
++				continue;
++			default:
++				count++;
++			}
++		}
++		vsecp->vsa_aclcnt = count;
++	} else
++		count = (int)aclp->z_acl_count;
++
++	if (mask & VSA_ACECNT) {
++		vsecp->vsa_aclcnt = count;
++	}
++
++	if (mask & VSA_ACE) {
++		size_t aclsz;
++
++		aclsz = count * sizeof (ace_t) +
++		    sizeof (ace_object_t) * largeace;
++
++		vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP);
++		vsecp->vsa_aclentsz = aclsz;
++
++		if (aclp->z_version == ZFS_ACL_VERSION_FUID)
++			zfs_copy_fuid_2_ace(ZTOZSB(zp), aclp, cr,
++			    vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));
++		else {
++			zfs_acl_node_t *aclnode;
++			void *start = vsecp->vsa_aclentp;
++
++			for (aclnode = list_head(&aclp->z_acl); aclnode;
++			    aclnode = list_next(&aclp->z_acl, aclnode)) {
++				bcopy(aclnode->z_acldata, start,
++				    aclnode->z_size);
++				start = (caddr_t)start + aclnode->z_size;
++			}
++			ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp ==
++			    aclp->z_acl_bytes);
++		}
++	}
++	if (mask & VSA_ACE_ACLFLAGS) {
++		vsecp->vsa_aclflags = 0;
++		if (zp->z_pflags & ZFS_ACL_DEFAULTED)
++			vsecp->vsa_aclflags |= ACL_DEFAULTED;
++		if (zp->z_pflags & ZFS_ACL_PROTECTED)
++			vsecp->vsa_aclflags |= ACL_PROTECTED;
++		if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT)
++			vsecp->vsa_aclflags |= ACL_AUTO_INHERIT;
++	}
++
++	mutex_exit(&zp->z_acl_lock);
++
++	return (0);
++}
++
++int
++zfs_vsec_2_aclp(zfs_sb_t *zsb, umode_t obj_mode,
++    vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
++{
++	zfs_acl_t *aclp;
++	zfs_acl_node_t *aclnode;
++	int aclcnt = vsecp->vsa_aclcnt;
++	int error;
++
++	if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
++		return (EINVAL);
++
++	aclp = zfs_acl_alloc(zfs_acl_version(zsb->z_version));
++
++	aclp->z_hints = 0;
++	aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));
++	if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
++		if ((error = zfs_copy_ace_2_oldace(obj_mode, aclp,
++		    (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata,
++		    aclcnt, &aclnode->z_size)) != 0) {
++			zfs_acl_free(aclp);
++			zfs_acl_node_free(aclnode);
++			return (error);
++		}
++	} else {
++		if ((error = zfs_copy_ace_2_fuid(zsb, obj_mode, aclp,
++		    vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
++		    &aclnode->z_size, fuidp, cr)) != 0) {
++			zfs_acl_free(aclp);
++			zfs_acl_node_free(aclnode);
++			return (error);
++		}
++	}
++	aclp->z_acl_bytes = aclnode->z_size;
++	aclnode->z_ace_count = aclcnt;
++	aclp->z_acl_count = aclcnt;
++	list_insert_head(&aclp->z_acl, aclnode);
++
++	/*
++	 * If flags are being set then add them to z_hints
++	 */
++	if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) {
++		if (vsecp->vsa_aclflags & ACL_PROTECTED)
++			aclp->z_hints |= ZFS_ACL_PROTECTED;
++		if (vsecp->vsa_aclflags & ACL_DEFAULTED)
++			aclp->z_hints |= ZFS_ACL_DEFAULTED;
++		if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT)
++			aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
++	}
++
++	*zaclp = aclp;
++
++	return (0);
++}
++
++/*
++ * Set a files ACL
++ */
++int
++zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
++{
++	zfs_sb_t	*zsb = ZTOZSB(zp);
++	zilog_t		*zilog = zsb->z_log;
++	ulong_t		mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
++	dmu_tx_t	*tx;
++	int		error;
++	zfs_acl_t	*aclp;
++	zfs_fuid_info_t	*fuidp = NULL;
++	boolean_t	fuid_dirtied;
++	uint64_t	acl_obj;
++
++	if (mask == 0)
++		return (ENOSYS);
++
++	if (zp->z_pflags & ZFS_IMMUTABLE)
++		return (EPERM);
++
++	if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)))
++		return (error);
++
++	error = zfs_vsec_2_aclp(zsb, ZTOI(zp)->i_mode, vsecp, cr, &fuidp,
++	    &aclp);
++	if (error)
++		return (error);
++
++	/*
++	 * If ACL wide flags aren't being set then preserve any
++	 * existing flags.
++	 */
++	if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) {
++		aclp->z_hints |=
++		    (zp->z_pflags & V4_ACL_WIDE_FLAGS);
++	}
++top:
++	mutex_enter(&zp->z_acl_lock);
++	mutex_enter(&zp->z_lock);
++
++	tx = dmu_tx_create(zsb->z_os);
++
++	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
++
++	fuid_dirtied = zsb->z_fuid_dirty;
++	if (fuid_dirtied)
++		zfs_fuid_txhold(zsb, tx);
++
++	/*
++	 * If old version and ACL won't fit in bonus and we aren't
++	 * upgrading then take out necessary DMU holds
++	 */
++
++	if ((acl_obj = zfs_external_acl(zp)) != 0) {
++		if (zsb->z_version >= ZPL_VERSION_FUID &&
++		    zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) {
++			dmu_tx_hold_free(tx, acl_obj, 0,
++			    DMU_OBJECT_END);
++			dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
++			    aclp->z_acl_bytes);
++		} else {
++			dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes);
++		}
++	} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
++		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
++	}
++
++	zfs_sa_upgrade_txholds(tx, zp);
++	error = dmu_tx_assign(tx, TXG_NOWAIT);
++	if (error) {
++		mutex_exit(&zp->z_acl_lock);
++		mutex_exit(&zp->z_lock);
++
++		if (error == ERESTART) {
++			dmu_tx_wait(tx);
++			dmu_tx_abort(tx);
++			goto top;
++		}
++		dmu_tx_abort(tx);
++		zfs_acl_free(aclp);
++		return (error);
++	}
++
++	error = zfs_aclset_common(zp, aclp, cr, tx);
++	ASSERT(error == 0);
++	ASSERT(zp->z_acl_cached == NULL);
++	zp->z_acl_cached = aclp;
++
++	if (fuid_dirtied)
++		zfs_fuid_sync(zsb, tx);
++
++	zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
++
++	if (fuidp)
++		zfs_fuid_info_free(fuidp);
++	dmu_tx_commit(tx);
++
++	mutex_exit(&zp->z_lock);
++	mutex_exit(&zp->z_acl_lock);
++
++	return (error);
++}
++
++/*
++ * Check accesses of interest (AoI) against attributes of the dataset
++ * such as read-only.  Returns zero if no AoI conflict with dataset
++ * attributes, otherwise an appropriate errno is returned.
++ */
++static int
++zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
++{
++	if ((v4_mode & WRITE_MASK) && (zfs_is_readonly(ZTOZSB(zp))) &&
++	    (!S_ISDEV(ZTOI(zp)->i_mode) ||
++	    (S_ISDEV(ZTOI(zp)->i_mode) && (v4_mode & WRITE_MASK_ATTRS)))) {
++		return (EROFS);
++	}
++
++	/*
++	 * Only check for READONLY on non-directories.
++	 */
++	if ((v4_mode & WRITE_MASK_DATA) &&
++	    ((!S_ISDIR(ZTOI(zp)->i_mode) &&
++	    (zp->z_pflags & (ZFS_READONLY | ZFS_IMMUTABLE))) ||
++	    (S_ISDIR(ZTOI(zp)->i_mode) &&
++	    (zp->z_pflags & ZFS_IMMUTABLE)))) {
++		return (EPERM);
++	}
++
++	if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
++	    (zp->z_pflags & ZFS_NOUNLINK)) {
++		return (EPERM);
++	}
++
++	if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
++	    (zp->z_pflags & ZFS_AV_QUARANTINED))) {
++		return (EACCES);
++	}
++
++	return (0);
++}
++
++/*
++ * The primary usage of this function is to loop through all of the
++ * ACEs in the znode, determining what accesses of interest (AoI) to
++ * the caller are allowed or denied.  The AoI are expressed as bits in
++ * the working_mode parameter.  As each ACE is processed, bits covered
++ * by that ACE are removed from the working_mode.  This removal
++ * facilitates two things.  The first is that when the working mode is
++ * empty (= 0), we know we've looked at all the AoI. The second is
++ * that the ACE interpretation rules don't allow a later ACE to undo
++ * something granted or denied by an earlier ACE.  Removing the
++ * discovered access or denial enforces this rule.  At the end of
++ * processing the ACEs, all AoI that were found to be denied are
++ * placed into the working_mode, giving the caller a mask of denied
++ * accesses.  Returns:
++ *	0		if all AoI granted
++ *	EACCESS 	if the denied mask is non-zero
++ *	other error	if abnormal failure (e.g., IO error)
++ *
++ * A secondary usage of the function is to determine if any of the
++ * AoI are granted.  If an ACE grants any access in
++ * the working_mode, we immediately short circuit out of the function.
++ * This mode is chosen by setting anyaccess to B_TRUE.  The
++ * working_mode is not a denied access mask upon exit if the function
++ * is used in this manner.
++ */
++static int
++zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
++    boolean_t anyaccess, cred_t *cr)
++{
++	zfs_sb_t	*zsb = ZTOZSB(zp);
++	zfs_acl_t	*aclp;
++	int		error;
++	uid_t		uid = crgetuid(cr);
++	uint64_t	who;
++	uint16_t	type, iflags;
++	uint16_t	entry_type;
++	uint32_t	access_mask;
++	uint32_t	deny_mask = 0;
++	zfs_ace_hdr_t	*acep = NULL;
++	boolean_t	checkit;
++	uid_t		gowner;
++	uid_t		fowner;
++
++	zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
++
++	mutex_enter(&zp->z_acl_lock);
++
++	error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
++	if (error != 0) {
++		mutex_exit(&zp->z_acl_lock);
++		return (error);
++	}
++
++	ASSERT(zp->z_acl_cached);
++
++	while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
++	    &iflags, &type))) {
++		uint32_t mask_matched;
++
++		if (!zfs_acl_valid_ace_type(type, iflags))
++			continue;
++
++		if (S_ISDIR(ZTOI(zp)->i_mode) &&
++		    (iflags & ACE_INHERIT_ONLY_ACE))
++			continue;
++
++		/* Skip ACE if it does not affect any AoI */
++		mask_matched = (access_mask & *working_mode);
++		if (!mask_matched)
++			continue;
++
++		entry_type = (iflags & ACE_TYPE_FLAGS);
++
++		checkit = B_FALSE;
++
++		switch (entry_type) {
++		case ACE_OWNER:
++			if (uid == fowner)
++				checkit = B_TRUE;
++			break;
++		case OWNING_GROUP:
++			who = gowner;
++			/*FALLTHROUGH*/
++		case ACE_IDENTIFIER_GROUP:
++			checkit = zfs_groupmember(zsb, who, cr);
++			break;
++		case ACE_EVERYONE:
++			checkit = B_TRUE;
++			break;
++
++		/* USER Entry */
++		default:
++			if (entry_type == 0) {
++				uid_t newid;
++
++				newid = zfs_fuid_map_id(zsb, who, cr,
++				    ZFS_ACE_USER);
++				if (newid != IDMAP_WK_CREATOR_OWNER_UID &&
++				    uid == newid)
++					checkit = B_TRUE;
++				break;
++			} else {
++				mutex_exit(&zp->z_acl_lock);
++				return (EIO);
++			}
++		}
++
++		if (checkit) {
++			if (type == DENY) {
++				DTRACE_PROBE3(zfs__ace__denies,
++				    znode_t *, zp,
++				    zfs_ace_hdr_t *, acep,
++				    uint32_t, mask_matched);
++				deny_mask |= mask_matched;
++			} else {
++				DTRACE_PROBE3(zfs__ace__allows,
++				    znode_t *, zp,
++				    zfs_ace_hdr_t *, acep,
++				    uint32_t, mask_matched);
++				if (anyaccess) {
++					mutex_exit(&zp->z_acl_lock);
++					return (0);
++				}
++			}
++			*working_mode &= ~mask_matched;
++		}
++
++		/* Are we done? */
++		if (*working_mode == 0)
++			break;
++	}
++
++	mutex_exit(&zp->z_acl_lock);
++
++	/* Put the found 'denies' back on the working mode */
++	if (deny_mask) {
++		*working_mode |= deny_mask;
++		return (EACCES);
++	} else if (*working_mode) {
++		return (-1);
++	}
++
++	return (0);
++}
++
++/*
++ * Return true if any access whatsoever granted, we don't actually
++ * care what access is granted.
++ */
++boolean_t
++zfs_has_access(znode_t *zp, cred_t *cr)
++{
++	uint32_t have = ACE_ALL_PERMS;
++
++	if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
++		uid_t owner;
++
++		owner = zfs_fuid_map_id(ZTOZSB(zp), zp->z_uid, cr, ZFS_OWNER);
++		return (secpolicy_vnode_any_access(cr, ZTOI(zp), owner) == 0);
++	}
++	return (B_TRUE);
++}
++
++static int
++zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
++    boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
++{
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	int err;
++
++	*working_mode = v4_mode;
++	*check_privs = B_TRUE;
++
++	/*
++	 * Short circuit empty requests
++	 */
++	if (v4_mode == 0 || zsb->z_replay) {
++		*working_mode = 0;
++		return (0);
++	}
++
++	if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
++		*check_privs = B_FALSE;
++		return (err);
++	}
++
++	/*
++	 * The caller requested that the ACL check be skipped.  This
++	 * would only happen if the caller checked VOP_ACCESS() with a
++	 * 32 bit ACE mask and already had the appropriate permissions.
++	 */
++	if (skipaclchk) {
++		*working_mode = 0;
++		return (0);
++	}
++
++	return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
++}
++
++static int
++zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
++    cred_t *cr)
++{
++	if (*working_mode != ACE_WRITE_DATA)
++		return (EACCES);
++
++	return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
++	    check_privs, B_FALSE, cr));
++}
++
++int
++zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
++{
++	boolean_t owner = B_FALSE;
++	boolean_t groupmbr = B_FALSE;
++	boolean_t is_attr;
++	uid_t uid = crgetuid(cr);
++	int error;
++
++	if (zdp->z_pflags & ZFS_AV_QUARANTINED)
++		return (EACCES);
++
++	is_attr = ((zdp->z_pflags & ZFS_XATTR) &&
++	    (S_ISDIR(ZTOI(zdp)->i_mode)));
++	if (is_attr)
++		goto slow;
++
++
++	mutex_enter(&zdp->z_acl_lock);
++
++	if (zdp->z_pflags & ZFS_NO_EXECS_DENIED) {
++		mutex_exit(&zdp->z_acl_lock);
++		return (0);
++	}
++
++	if (FUID_INDEX(zdp->z_uid) != 0 || FUID_INDEX(zdp->z_gid) != 0) {
++		mutex_exit(&zdp->z_acl_lock);
++		goto slow;
++	}
++
++	if (uid == zdp->z_uid) {
++		owner = B_TRUE;
++		if (zdp->z_mode & S_IXUSR) {
++			mutex_exit(&zdp->z_acl_lock);
++			return (0);
++		} else {
++			mutex_exit(&zdp->z_acl_lock);
++			goto slow;
++		}
++	}
++	if (groupmember(zdp->z_gid, cr)) {
++		groupmbr = B_TRUE;
++		if (zdp->z_mode & S_IXGRP) {
++			mutex_exit(&zdp->z_acl_lock);
++			return (0);
++		} else {
++			mutex_exit(&zdp->z_acl_lock);
++			goto slow;
++		}
++	}
++	if (!owner && !groupmbr) {
++		if (zdp->z_mode & S_IXOTH) {
++			mutex_exit(&zdp->z_acl_lock);
++			return (0);
++		}
++	}
++
++	mutex_exit(&zdp->z_acl_lock);
++
++slow:
++	DTRACE_PROBE(zfs__fastpath__execute__access__miss);
++	ZFS_ENTER(ZTOZSB(zdp));
++	error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
++	ZFS_EXIT(ZTOZSB(zdp));
++	return (error);
++}
++
++/*
++ * Determine whether Access should be granted/denied.
++ * The least priv subsytem is always consulted as a basic privilege
++ * can define any form of access.
++ */
++int
++zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
++{
++	uint32_t	working_mode;
++	int		error;
++	int		is_attr;
++	boolean_t	check_privs;
++	znode_t		*xzp;
++	znode_t		*check_zp = zp;
++	mode_t		needed_bits;
++	uid_t		owner;
++
++	is_attr = ((zp->z_pflags & ZFS_XATTR) && S_ISDIR(ZTOI(zp)->i_mode));
++
++	/*
++	 * If attribute then validate against base file
++	 */
++	if (is_attr) {
++		uint64_t	parent;
++
++		if ((error = sa_lookup(zp->z_sa_hdl,
++		    SA_ZPL_PARENT(ZTOZSB(zp)), &parent,
++		    sizeof (parent))) != 0)
++			return (error);
++
++		if ((error = zfs_zget(ZTOZSB(zp),
++		    parent, &xzp)) != 0)	{
++			return (error);
++		}
++
++		check_zp = xzp;
++
++		/*
++		 * fixup mode to map to xattr perms
++		 */
++
++		if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {
++			mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
++			mode |= ACE_WRITE_NAMED_ATTRS;
++		}
++
++		if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {
++			mode &= ~(ACE_READ_DATA|ACE_EXECUTE);
++			mode |= ACE_READ_NAMED_ATTRS;
++		}
++	}
++
++	owner = zfs_fuid_map_id(ZTOZSB(zp), zp->z_uid, cr, ZFS_OWNER);
++	/*
++	 * Map the bits required to the standard inode flags
++	 * S_IRUSR|S_IWUSR|S_IXUSR in the needed_bits.  Map the bits
++	 * mapped by working_mode (currently missing) in missing_bits.
++	 * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode),
++	 * needed_bits.
++	 */
++	needed_bits = 0;
++
++	working_mode = mode;
++	if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
++	    owner == crgetuid(cr))
++		working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
++
++	if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
++	    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
++		needed_bits |= S_IRUSR;
++	if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
++	    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
++		needed_bits |= S_IWUSR;
++	if (working_mode & ACE_EXECUTE)
++		needed_bits |= S_IXUSR;
++
++	if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
++	    &check_privs, skipaclchk, cr)) == 0) {
++		if (is_attr)
++			iput(ZTOI(xzp));
++		return (secpolicy_vnode_access2(cr, ZTOI(zp), owner,
++		    needed_bits, needed_bits));
++	}
++
++	if (error && !check_privs) {
++		if (is_attr)
++			iput(ZTOI(xzp));
++		return (error);
++	}
++
++	if (error && (flags & V_APPEND)) {
++		error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
++	}
++
++	if (error && check_privs) {
++		mode_t		checkmode = 0;
++
++		/*
++		 * First check for implicit owner permission on
++		 * read_acl/read_attributes
++		 */
++
++		error = 0;
++		ASSERT(working_mode != 0);
++
++		if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
++		    owner == crgetuid(cr)))
++			working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
++
++		if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
++		    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
++			checkmode |= S_IRUSR;
++		if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
++		    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
++			checkmode |= S_IWUSR;
++		if (working_mode & ACE_EXECUTE)
++			checkmode |= S_IXUSR;
++
++		error = secpolicy_vnode_access2(cr, ZTOI(check_zp), owner,
++		    needed_bits & ~checkmode, needed_bits);
++
++		if (error == 0 && (working_mode & ACE_WRITE_OWNER))
++			error = secpolicy_vnode_chown(cr, owner);
++		if (error == 0 && (working_mode & ACE_WRITE_ACL))
++			error = secpolicy_vnode_setdac(cr, owner);
++
++		if (error == 0 && (working_mode &
++		    (ACE_DELETE|ACE_DELETE_CHILD)))
++			error = secpolicy_vnode_remove(cr);
++
++		if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
++			error = secpolicy_vnode_chown(cr, owner);
++		}
++		if (error == 0) {
++			/*
++			 * See if any bits other than those already checked
++			 * for are still present.  If so then return EACCES
++			 */
++			if (working_mode & ~(ZFS_CHECKED_MASKS)) {
++				error = EACCES;
++			}
++		}
++	} else if (error == 0) {
++		error = secpolicy_vnode_access2(cr, ZTOI(zp), owner,
++		    needed_bits, needed_bits);
++	}
++
++
++	if (is_attr)
++		iput(ZTOI(xzp));
++
++	return (error);
++}
++
++/*
++ * Translate traditional unix S_IRUSR/S_IWUSR/S_IXUSR mode into
++ * native ACL format and call zfs_zaccess()
++ */
++int
++zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
++{
++	return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
++}
++
++/*
++ * Access function for secpolicy_vnode_setattr
++ */
++int
++zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
++{
++	int v4_mode = zfs_unix_to_v4(mode >> 6);
++
++	return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
++}
++
++static int
++zfs_delete_final_check(znode_t *zp, znode_t *dzp,
++    mode_t available_perms, cred_t *cr)
++{
++	int error;
++	uid_t downer;
++
++	downer = zfs_fuid_map_id(ZTOZSB(dzp), dzp->z_uid, cr, ZFS_OWNER);
++
++	error = secpolicy_vnode_access2(cr, ZTOI(dzp),
++	    downer, available_perms, S_IWUSR|S_IXUSR);
++
++	if (error == 0)
++		error = zfs_sticky_remove_access(dzp, zp, cr);
++
++	return (error);
++}
++
++/*
++ * Determine whether Access should be granted/deny, without
++ * consulting least priv subsystem.
++ *
++ *
++ * The following chart is the recommended NFSv4 enforcement for
++ * ability to delete an object.
++ *
++ *      -------------------------------------------------------
++ *      |   Parent Dir  |           Target Object Permissions |
++ *      |  permissions  |                                     |
++ *      -------------------------------------------------------
++ *      |               | ACL Allows | ACL Denies| Delete     |
++ *      |               |  Delete    |  Delete   | unspecified|
++ *      -------------------------------------------------------
++ *      |  ACL Allows   | Permit     | Permit    | Permit     |
++ *      |  DELETE_CHILD |                                     |
++ *      -------------------------------------------------------
++ *      |  ACL Denies   | Permit     | Deny      | Deny       |
++ *      |  DELETE_CHILD |            |           |            |
++ *      -------------------------------------------------------
++ *      | ACL specifies |            |           |            |
++ *      | only allow    | Permit     | Permit    | Permit     |
++ *      | write and     |            |           |            |
++ *      | execute       |            |           |            |
++ *      -------------------------------------------------------
++ *      | ACL denies    |            |           |            |
++ *      | write and     | Permit     | Deny      | Deny       |
++ *      | execute       |            |           |            |
++ *      -------------------------------------------------------
++ *         ^
++ *         |
++ *         No search privilege, can't even look up file?
++ *
++ */
++int
++zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
++{
++	uint32_t dzp_working_mode = 0;
++	uint32_t zp_working_mode = 0;
++	int dzp_error, zp_error;
++	mode_t available_perms;
++	boolean_t dzpcheck_privs = B_TRUE;
++	boolean_t zpcheck_privs = B_TRUE;
++
++	/*
++	 * We want specific DELETE permissions to
++	 * take precedence over WRITE/EXECUTE.  We don't
++	 * want an ACL such as this to mess us up.
++	 * user:joe:write_data:deny,user:joe:delete:allow
++	 *
++	 * However, deny permissions may ultimately be overridden
++	 * by secpolicy_vnode_access().
++	 *
++	 * We will ask for all of the necessary permissions and then
++	 * look at the working modes from the directory and target object
++	 * to determine what was found.
++	 */
++
++	if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
++		return (EPERM);
++
++	/*
++	 * First row
++	 * If the directory permissions allow the delete, we are done.
++	 */
++	if ((dzp_error = zfs_zaccess_common(dzp, ACE_DELETE_CHILD,
++	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr)) == 0)
++		return (0);
++
++	/*
++	 * If target object has delete permission then we are done
++	 */
++	if ((zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
++	    &zpcheck_privs, B_FALSE, cr)) == 0)
++		return (0);
++
++	ASSERT(dzp_error && zp_error);
++
++	if (!dzpcheck_privs)
++		return (dzp_error);
++	if (!zpcheck_privs)
++		return (zp_error);
++
++	/*
++	 * Second row
++	 *
++	 * If directory returns EACCES then delete_child was denied
++	 * due to deny delete_child.  In this case send the request through
++	 * secpolicy_vnode_remove().  We don't use zfs_delete_final_check()
++	 * since that *could* allow the delete based on write/execute permission
++	 * and we want delete permissions to override write/execute.
++	 */
++
++	if (dzp_error == EACCES)
++		return (secpolicy_vnode_remove(cr));
++
++	/*
++	 * Third Row
++	 * only need to see if we have write/execute on directory.
++	 */
++
++	dzp_error = zfs_zaccess_common(dzp, ACE_EXECUTE|ACE_WRITE_DATA,
++	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr);
++
++	if (dzp_error != 0 && !dzpcheck_privs)
++		return (dzp_error);
++
++	/*
++	 * Fourth row
++	 */
++
++	available_perms = (dzp_working_mode & ACE_WRITE_DATA) ? 0 : S_IWUSR;
++	available_perms |= (dzp_working_mode & ACE_EXECUTE) ? 0 : S_IXUSR;
++
++	return (zfs_delete_final_check(zp, dzp, available_perms, cr));
++
++}
++
++int
++zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
++    znode_t *tzp, cred_t *cr)
++{
++	int add_perm;
++	int error;
++
++	if (szp->z_pflags & ZFS_AV_QUARANTINED)
++		return (EACCES);
++
++	add_perm = S_ISDIR(ZTOI(szp)->i_mode) ?
++	    ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
++
++	/*
++	 * Rename permissions are combination of delete permission +
++	 * add file/subdir permission.
++	 */
++
++	/*
++	 * first make sure we do the delete portion.
++	 *
++	 * If that succeeds then check for add_file/add_subdir permissions
++	 */
++
++	if ((error = zfs_zaccess_delete(sdzp, szp, cr)))
++		return (error);
++
++	/*
++	 * If we have a tzp, see if we can delete it?
++	 */
++	if (tzp) {
++		if ((error = zfs_zaccess_delete(tdzp, tzp, cr)))
++			return (error);
++	}
++
++	/*
++	 * Now check for add permissions
++	 */
++	error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
++
++	return (error);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_byteswap.c linux-3.2.33-go/fs/zfs/zfs/zfs_byteswap.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_byteswap.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_byteswap.c	2012-11-16 23:25:34.349039334 +0100
+@@ -0,0 +1,205 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/vfs.h>
++#include <sys/fs/zfs.h>
++#include <sys/zfs_znode.h>
++#include <sys/zfs_sa.h>
++#include <sys/zfs_acl.h>
++
++void
++zfs_oldace_byteswap(ace_t *ace, int ace_cnt)
++{
++	int i;
++
++	for (i = 0; i != ace_cnt; i++, ace++) {
++		ace->a_who = BSWAP_32(ace->a_who);
++		ace->a_access_mask = BSWAP_32(ace->a_access_mask);
++		ace->a_flags = BSWAP_16(ace->a_flags);
++		ace->a_type = BSWAP_16(ace->a_type);
++	}
++}
++
++/*
++ * swap ace_t and ace_oject_t
++ */
++void
++zfs_ace_byteswap(void *buf, size_t size, boolean_t zfs_layout)
++{
++	caddr_t end;
++	caddr_t ptr;
++	zfs_ace_t *zacep = NULL;
++	ace_t *acep;
++	uint16_t entry_type;
++	size_t entry_size;
++	int ace_type;
++
++	end = (caddr_t)buf + size;
++	ptr = buf;
++
++	while (ptr < end) {
++		if (zfs_layout) {
++			/*
++			 * Avoid overrun.  Embedded aces can have one
++			 * of several sizes.  We don't know exactly
++			 * how many our present, only the size of the
++			 * buffer containing them.  That size may be
++			 * larger than needed to hold the aces
++			 * present.  As long as we do not do any
++			 * swapping beyond the end of our block we are
++			 * okay.  It it safe to swap any non-ace data
++			 * within the block since it is just zeros.
++			 */
++			if (ptr + sizeof (zfs_ace_hdr_t) > end) {
++				break;
++			}
++			zacep = (zfs_ace_t *)ptr;
++			zacep->z_hdr.z_access_mask =
++			    BSWAP_32(zacep->z_hdr.z_access_mask);
++			zacep->z_hdr.z_flags = BSWAP_16(zacep->z_hdr.z_flags);
++			ace_type = zacep->z_hdr.z_type =
++			    BSWAP_16(zacep->z_hdr.z_type);
++			entry_type = zacep->z_hdr.z_flags & ACE_TYPE_FLAGS;
++		} else {
++			/* Overrun avoidance */
++			if (ptr + sizeof (ace_t) > end) {
++				break;
++			}
++			acep = (ace_t *)ptr;
++			acep->a_access_mask = BSWAP_32(acep->a_access_mask);
++			acep->a_flags = BSWAP_16(acep->a_flags);
++			ace_type = acep->a_type = BSWAP_16(acep->a_type);
++			acep->a_who = BSWAP_32(acep->a_who);
++			entry_type = acep->a_flags & ACE_TYPE_FLAGS;
++		}
++		switch (entry_type) {
++		case ACE_OWNER:
++		case ACE_EVERYONE:
++		case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
++			entry_size = zfs_layout ?
++			    sizeof (zfs_ace_hdr_t) : sizeof (ace_t);
++			break;
++		case ACE_IDENTIFIER_GROUP:
++		default:
++			/* Overrun avoidance */
++			if (zfs_layout) {
++				if (ptr + sizeof (zfs_ace_t) <= end) {
++					zacep->z_fuid = BSWAP_64(zacep->z_fuid);
++				} else {
++					entry_size = sizeof (zfs_ace_t);
++					break;
++				}
++			}
++			switch (ace_type) {
++			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
++			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
++			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
++			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
++				entry_size = zfs_layout ?
++				    sizeof (zfs_object_ace_t) :
++				    sizeof (ace_object_t);
++				break;
++			default:
++				entry_size = zfs_layout ? sizeof (zfs_ace_t) :
++				    sizeof (ace_t);
++				break;
++			}
++		}
++		ptr = ptr + entry_size;
++	}
++}
++
++/* ARGSUSED */
++void
++zfs_oldacl_byteswap(void *buf, size_t size)
++{
++	int cnt;
++
++	/*
++	 * Arggh, since we don't know how many ACEs are in
++	 * the array, we have to swap the entire block
++	 */
++
++	cnt = size / sizeof (ace_t);
++
++	zfs_oldace_byteswap((ace_t *)buf, cnt);
++}
++
++/* ARGSUSED */
++void
++zfs_acl_byteswap(void *buf, size_t size)
++{
++	zfs_ace_byteswap(buf, size, B_TRUE);
++}
++
++void
++zfs_znode_byteswap(void *buf, size_t size)
++{
++	znode_phys_t *zp = buf;
++
++	ASSERT(size >= sizeof (znode_phys_t));
++
++	zp->zp_crtime[0] = BSWAP_64(zp->zp_crtime[0]);
++	zp->zp_crtime[1] = BSWAP_64(zp->zp_crtime[1]);
++	zp->zp_atime[0] = BSWAP_64(zp->zp_atime[0]);
++	zp->zp_atime[1] = BSWAP_64(zp->zp_atime[1]);
++	zp->zp_mtime[0] = BSWAP_64(zp->zp_mtime[0]);
++	zp->zp_mtime[1] = BSWAP_64(zp->zp_mtime[1]);
++	zp->zp_ctime[0] = BSWAP_64(zp->zp_ctime[0]);
++	zp->zp_ctime[1] = BSWAP_64(zp->zp_ctime[1]);
++	zp->zp_gen = BSWAP_64(zp->zp_gen);
++	zp->zp_mode = BSWAP_64(zp->zp_mode);
++	zp->zp_size = BSWAP_64(zp->zp_size);
++	zp->zp_parent = BSWAP_64(zp->zp_parent);
++	zp->zp_links = BSWAP_64(zp->zp_links);
++	zp->zp_xattr = BSWAP_64(zp->zp_xattr);
++	zp->zp_rdev = BSWAP_64(zp->zp_rdev);
++	zp->zp_flags = BSWAP_64(zp->zp_flags);
++	zp->zp_uid = BSWAP_64(zp->zp_uid);
++	zp->zp_gid = BSWAP_64(zp->zp_gid);
++	zp->zp_zap = BSWAP_64(zp->zp_zap);
++	zp->zp_pad[0] = BSWAP_64(zp->zp_pad[0]);
++	zp->zp_pad[1] = BSWAP_64(zp->zp_pad[1]);
++	zp->zp_pad[2] = BSWAP_64(zp->zp_pad[2]);
++
++	zp->zp_acl.z_acl_extern_obj = BSWAP_64(zp->zp_acl.z_acl_extern_obj);
++	zp->zp_acl.z_acl_size = BSWAP_32(zp->zp_acl.z_acl_size);
++	zp->zp_acl.z_acl_version = BSWAP_16(zp->zp_acl.z_acl_version);
++	zp->zp_acl.z_acl_count = BSWAP_16(zp->zp_acl.z_acl_count);
++	if (zp->zp_acl.z_acl_version == ZFS_ACL_VERSION) {
++		zfs_acl_byteswap((void *)&zp->zp_acl.z_ace_data[0],
++		    ZFS_ACE_SPACE);
++	} else {
++		zfs_oldace_byteswap((ace_t *)&zp->zp_acl.z_ace_data[0],
++		    ACE_SLOT_CNT);
++	}
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(zfs_oldacl_byteswap);
++EXPORT_SYMBOL(zfs_acl_byteswap);
++EXPORT_SYMBOL(zfs_znode_byteswap);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_ctldir.c linux-3.2.33-go/fs/zfs/zfs/zfs_ctldir.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_ctldir.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_ctldir.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,992 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ *
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
++ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ * LLNL-CODE-403049.
++ * Rewritten for Linux by:
++ *   Rohan Puri <rohan.puri15@gmail.com>
++ *   Brian Behlendorf <behlendorf1@llnl.gov>
++ */
++
++/*
++ * ZFS control directory (a.k.a. ".zfs")
++ *
++ * This directory provides a common location for all ZFS meta-objects.
++ * Currently, this is only the 'snapshot' and 'shares' directory, but this may
++ * expand in the future.  The elements are built dynamically, as the hierarchy
++ * does not actually exist on disk.
++ *
++ * For 'snapshot', we don't want to have all snapshots always mounted, because
++ * this would take up a huge amount of space in /etc/mnttab.  We have three
++ * types of objects:
++ *
++ *	ctldir ------> snapshotdir -------> snapshot
++ *                                             |
++ *                                             |
++ *                                             V
++ *                                         mounted fs
++ *
++ * The 'snapshot' node contains just enough information to lookup '..' and act
++ * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
++ * perform an automount of the underlying filesystem and return the
++ * corresponding inode.
++ *
++ * All mounts are handled automatically by an user mode helper which invokes
++ * the mount mount procedure.  Unmounts are handled by allowing the mount
++ * point to expire so the kernel may automatically unmount it.
++ *
++ * The '.zfs', '.zfs/snapshot', and all directories created under
++ * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
++ * share the same zfs_sb_t as the head filesystem (what '.zfs' lives under).
++ *
++ * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
++ * (ie: snapshots) are complete ZFS filesystems and have their own unique
++ * zfs_sb_t.  However, the fsid reported by these mounts will be the same
++ * as that used by the parent zfs_sb_t to make NFS happy.
++ */
++
++#include <sys/types.h>
++#include <sys/param.h>
++#include <sys/time.h>
++#include <sys/systm.h>
++#include <sys/sysmacros.h>
++#include <sys/pathname.h>
++#include <sys/vfs.h>
++#include <sys/vfs_opreg.h>
++#include <sys/zfs_ctldir.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/zfs_vfsops.h>
++#include <sys/zfs_vnops.h>
++#include <sys/stat.h>
++#include <sys/dmu.h>
++#include <sys/dsl_deleg.h>
++#include <sys/mount.h>
++#include <sys/zpl.h>
++#include "zfs_namecheck.h"
++
++/*
++ * Control Directory Tunables (.zfs)
++ */
++int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
++
++static zfs_snapentry_t *
++zfsctl_sep_alloc(void)
++{
++	return kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP);
++}
++
++void
++zfsctl_sep_free(zfs_snapentry_t *sep)
++{
++	kmem_free(sep->se_name, MAXNAMELEN);
++	kmem_free(sep->se_path, PATH_MAX);
++	kmem_free(sep, sizeof (zfs_snapentry_t));
++}
++
++/*
++ * Attempt to expire an automounted snapshot, unmounts are attempted every
++ * 'zfs_expire_snapshot' seconds until they succeed.  The work request is
++ * responsible for rescheduling itself and freeing the zfs_expire_snapshot_t.
++ */
++static void
++zfsctl_expire_snapshot(void *data)
++{
++	zfs_snapentry_t *sep;
++	zfs_sb_t *zsb;
++	int error;
++
++	sep = spl_get_work_data(data, zfs_snapentry_t, se_work.work);
++	zsb = ITOZSB(sep->se_inode);
++
++	error = zfsctl_unmount_snapshot(zsb, sep->se_name, MNT_EXPIRE);
++	if (error == EBUSY)
++		schedule_delayed_work(&sep->se_work, zfs_expire_snapshot * HZ);
++}
++
++int
++snapentry_compare(const void *a, const void *b)
++{
++	const zfs_snapentry_t *sa = a;
++	const zfs_snapentry_t *sb = b;
++	int ret = strcmp(sa->se_name, sb->se_name);
++
++	if (ret < 0)
++		return (-1);
++	else if (ret > 0)
++		return (1);
++	else
++		return (0);
++}
++
++boolean_t
++zfsctl_is_node(struct inode *ip)
++{
++	return (ITOZ(ip)->z_is_ctldir);
++}
++
++boolean_t
++zfsctl_is_snapdir(struct inode *ip)
++{
++	return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS));
++}
++
++/*
++ * Allocate a new inode with the passed id and ops.
++ */
++static struct inode *
++zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
++    const struct file_operations *fops, const struct inode_operations *ops)
++{
++	struct timespec now = current_fs_time(zsb->z_sb);
++	struct inode *ip;
++	znode_t *zp;
++
++	ip = new_inode(zsb->z_sb);
++	if (ip == NULL)
++		return (NULL);
++
++	zp = ITOZ(ip);
++	ASSERT3P(zp->z_dirlocks, ==, NULL);
++	ASSERT3P(zp->z_acl_cached, ==, NULL);
++	ASSERT3P(zp->z_xattr_cached, ==, NULL);
++	zp->z_id = id;
++	zp->z_unlinked = 0;
++	zp->z_atime_dirty = 0;
++	zp->z_zn_prefetch = 0;
++	zp->z_moved = 0;
++	zp->z_sa_hdl = NULL;
++	zp->z_blksz = 0;
++	zp->z_seq = 0;
++	zp->z_mapcnt = 0;
++	zp->z_gen = 0;
++	zp->z_size = 0;
++	zp->z_atime[0] = 0;
++	zp->z_atime[1] = 0;
++	zp->z_links = 0;
++	zp->z_pflags = 0;
++	zp->z_uid = 0;
++	zp->z_gid = 0;
++	zp->z_mode = 0;
++	zp->z_sync_cnt = 0;
++	zp->z_is_zvol = B_FALSE;
++	zp->z_is_mapped = B_FALSE;
++	zp->z_is_ctldir = B_TRUE;
++	zp->z_is_sa = B_FALSE;
++	ip->i_ino = id;
++	ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO);
++	ip->i_uid = 0;
++	ip->i_gid = 0;
++	ip->i_blkbits = SPA_MINBLOCKSHIFT;
++	ip->i_atime = now;
++	ip->i_mtime = now;
++	ip->i_ctime = now;
++	ip->i_fop = fops;
++	ip->i_op = ops;
++
++	if (insert_inode_locked(ip)) {
++		unlock_new_inode(ip);
++		iput(ip);
++		return (NULL);
++	}
++
++	mutex_enter(&zsb->z_znodes_lock);
++	list_insert_tail(&zsb->z_all_znodes, zp);
++	zsb->z_nr_znodes++;
++	membar_producer();
++	mutex_exit(&zsb->z_znodes_lock);
++
++	unlock_new_inode(ip);
++
++	return (ip);
++}
++
++/*
++ * Lookup the inode with given id, it will be allocated if needed.
++ */
++static struct inode *
++zfsctl_inode_lookup(zfs_sb_t *zsb, uint64_t id,
++    const struct file_operations *fops, const struct inode_operations *ops)
++{
++	struct inode *ip = NULL;
++
++	while (ip == NULL) {
++		ip = ilookup(zsb->z_sb, (unsigned long)id);
++		if (ip)
++			break;
++
++		/* May fail due to concurrent zfsctl_inode_alloc() */
++		ip = zfsctl_inode_alloc(zsb, id, fops, ops);
++	}
++
++	return (ip);
++}
++
++/*
++ * Free zfsctl inode specific structures, currently there are none.
++ */
++void
++zfsctl_inode_destroy(struct inode *ip)
++{
++	return;
++}
++
++/*
++ * An inode is being evicted from the cache.
++ */
++void
++zfsctl_inode_inactive(struct inode *ip)
++{
++	if (zfsctl_is_snapdir(ip))
++		zfsctl_snapdir_inactive(ip);
++}
++
++/*
++ * Create the '.zfs' directory.  This directory is cached as part of the VFS
++ * structure.  This results in a hold on the zfs_sb_t.  The code in zfs_umount()
++ * therefore checks against a vfs_count of 2 instead of 1.  This reference
++ * is removed when the ctldir is destroyed in the unmount.  All other entities
++ * under the '.zfs' directory are created dynamically as needed.
++ *
++ * Because the dynamically created '.zfs' directory entries assume the use
++ * of 64-bit inode numbers this support must be disabled on 32-bit systems.
++ */
++int
++zfsctl_create(zfs_sb_t *zsb)
++{
++#if defined(CONFIG_64BIT)
++	ASSERT(zsb->z_ctldir == NULL);
++
++	zsb->z_ctldir = zfsctl_inode_alloc(zsb, ZFSCTL_INO_ROOT,
++	    &zpl_fops_root, &zpl_ops_root);
++	if (zsb->z_ctldir == NULL)
++		return (ENOENT);
++
++	return (0);
++#else
++	return (EOPNOTSUPP);
++#endif /* CONFIG_64BIT */
++}
++
++/*
++ * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
++ */
++void
++zfsctl_destroy(zfs_sb_t *zsb)
++{
++	iput(zsb->z_ctldir);
++	zsb->z_ctldir = NULL;
++}
++
++/*
++ * Given a root znode, retrieve the associated .zfs directory.
++ * Add a hold to the vnode and return it.
++ */
++struct inode *
++zfsctl_root(znode_t *zp)
++{
++	ASSERT(zfs_has_ctldir(zp));
++	igrab(ZTOZSB(zp)->z_ctldir);
++	return (ZTOZSB(zp)->z_ctldir);
++}
++
++/*ARGSUSED*/
++int
++zfsctl_fid(struct inode *ip, fid_t *fidp)
++{
++	znode_t		*zp = ITOZ(ip);
++	zfs_sb_t	*zsb = ITOZSB(ip);
++	uint64_t	object = zp->z_id;
++	zfid_short_t	*zfid;
++	int		i;
++
++	ZFS_ENTER(zsb);
++
++	if (fidp->fid_len < SHORT_FID_LEN) {
++		fidp->fid_len = SHORT_FID_LEN;
++		ZFS_EXIT(zsb);
++		return (ENOSPC);
++	}
++
++	zfid = (zfid_short_t *)fidp;
++
++	zfid->zf_len = SHORT_FID_LEN;
++
++	for (i = 0; i < sizeof (zfid->zf_object); i++)
++		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
++
++	/* .zfs znodes always have a generation number of 0 */
++	for (i = 0; i < sizeof (zfid->zf_gen); i++)
++		zfid->zf_gen[i] = 0;
++
++	ZFS_EXIT(zsb);
++	return (0);
++}
++
++static int
++zfsctl_snapshot_zname(struct inode *ip, const char *name, int len, char *zname)
++{
++	objset_t *os = ITOZSB(ip)->z_os;
++
++	if (snapshot_namecheck(name, NULL, NULL) != 0)
++		return (EILSEQ);
++
++	dmu_objset_name(os, zname);
++	if ((strlen(zname) + 1 + strlen(name)) >= len)
++		return (ENAMETOOLONG);
++
++	(void) strcat(zname, "@");
++	(void) strcat(zname, name);
++
++	return (0);
++}
++
++static int
++zfsctl_snapshot_zpath(struct path *path, int len, char *zpath)
++{
++	char *path_buffer, *path_ptr;
++	int path_len, error = 0;
++
++	path_buffer = kmem_alloc(len, KM_SLEEP);
++
++	path_ptr = d_path(path, path_buffer, len);
++	if (IS_ERR(path_ptr)) {
++		error = -PTR_ERR(path_ptr);
++		goto out;
++	}
++
++	path_len = path_buffer + len - 1 - path_ptr;
++	if (path_len > len) {
++		error = EFAULT;
++		goto out;
++	}
++
++	memcpy(zpath, path_ptr, path_len);
++	zpath[path_len] = '\0';
++out:
++	kmem_free(path_buffer, len);
++
++	return (error);
++}
++
++/*
++ * Special case the handling of "..".
++ */
++/* ARGSUSED */
++int
++zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp,
++    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
++{
++	zfs_sb_t *zsb = ITOZSB(dip);
++	int error = 0;
++
++	ZFS_ENTER(zsb);
++
++	if (strcmp(name, "..") == 0) {
++		*ipp = dip->i_sb->s_root->d_inode;
++	} else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) {
++		*ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIR,
++		    &zpl_fops_snapdir, &zpl_ops_snapdir);
++	} else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) {
++		*ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SHARES,
++		    &zpl_fops_shares, &zpl_ops_shares);
++	} else {
++		*ipp = NULL;
++	}
++
++	if (*ipp == NULL)
++		error = ENOENT;
++
++	ZFS_EXIT(zsb);
++
++	return (error);
++}
++
++/*
++ * Lookup entry point for the 'snapshot' directory.  Try to open the
++ * snapshot if it exist, creating the pseudo filesystem inode as necessary.
++ * Perform a mount of the associated dataset on top of the inode.
++ */
++/* ARGSUSED */
++int
++zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp,
++    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
++{
++	zfs_sb_t *zsb = ITOZSB(dip);
++	uint64_t id;
++	int error;
++
++	ZFS_ENTER(zsb);
++
++	error = dmu_snapshot_id(zsb->z_os, name, &id);
++	if (error) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	*ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIRS - id,
++	    &simple_dir_operations, &simple_dir_inode_operations);
++	if (*ipp) {
++#ifdef HAVE_AUTOMOUNT
++		(*ipp)->i_flags |= S_AUTOMOUNT;
++#endif /* HAVE_AUTOMOUNT */
++	} else {
++		error = ENOENT;
++	}
++
++	ZFS_EXIT(zsb);
++
++	return (error);
++}
++
++static void
++zfsctl_rename_snap(zfs_sb_t *zsb, zfs_snapentry_t *sep, const char *name)
++{
++	avl_index_t where;
++
++	ASSERT(MUTEX_HELD(&zsb->z_ctldir_lock));
++	ASSERT(sep != NULL);
++
++	/*
++	 * Change the name in the AVL tree.
++	 */
++	avl_remove(&zsb->z_ctldir_snaps, sep);
++	(void) strcpy(sep->se_name, name);
++	VERIFY(avl_find(&zsb->z_ctldir_snaps, sep, &where) == NULL);
++	avl_insert(&zsb->z_ctldir_snaps, sep, where);
++}
++
++/*
++ * Renaming a directory under '.zfs/snapshot' will automatically trigger
++ * a rename of the snapshot to the new given name.  The rename is confined
++ * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere.
++ */
++/*ARGSUSED*/
++int
++zfsctl_snapdir_rename(struct inode *sdip, char *sname,
++    struct inode *tdip, char *tname, cred_t *cr, int flags)
++{
++	zfs_sb_t *zsb = ITOZSB(sdip);
++	zfs_snapentry_t search, *sep;
++	avl_index_t where;
++	char *to, *from, *real;
++	int error;
++
++	ZFS_ENTER(zsb);
++
++	to = kmem_alloc(MAXNAMELEN, KM_SLEEP);
++	from = kmem_alloc(MAXNAMELEN, KM_SLEEP);
++	real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
++
++	if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
++		error = dmu_snapshot_realname(zsb->z_os, sname, real,
++		    MAXNAMELEN, NULL);
++		if (error == 0) {
++			sname = real;
++		} else if (error != ENOTSUP) {
++			goto out;
++		}
++	}
++
++	error = zfsctl_snapshot_zname(sdip, sname, MAXNAMELEN, from);
++	if (!error)
++		error = zfsctl_snapshot_zname(tdip, tname, MAXNAMELEN, to);
++	if (!error)
++		error = zfs_secpolicy_rename_perms(from, to, cr);
++	if (error)
++		goto out;
++
++	/*
++	 * Cannot move snapshots out of the snapdir.
++	 */
++	if (sdip != tdip) {
++		error = EINVAL;
++		goto out;
++	}
++
++	/*
++	 * No-op when names are identical.
++	 */
++	if (strcmp(sname, tname) == 0) {
++		error = 0;
++		goto out;
++	}
++
++	mutex_enter(&zsb->z_ctldir_lock);
++
++	error = dmu_objset_rename(from, to, B_FALSE);
++	if (error)
++		goto out_unlock;
++
++	search.se_name = (char *)sname;
++	sep = avl_find(&zsb->z_ctldir_snaps, &search, &where);
++	if (sep)
++		zfsctl_rename_snap(zsb, sep, tname);
++
++out_unlock:
++	mutex_exit(&zsb->z_ctldir_lock);
++out:
++	kmem_free(from, MAXNAMELEN);
++	kmem_free(to, MAXNAMELEN);
++	kmem_free(real, MAXNAMELEN);
++
++	ZFS_EXIT(zsb);
++
++	return (error);
++}
++
++/*
++ * Removing a directory under '.zfs/snapshot' will automatically trigger
++ * the removal of the snapshot with the given name.
++ */
++/* ARGSUSED */
++int
++zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
++{
++	zfs_sb_t *zsb = ITOZSB(dip);
++	char *snapname, *real;
++	int error;
++
++	ZFS_ENTER(zsb);
++
++	snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
++	real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
++
++	if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
++		error = dmu_snapshot_realname(zsb->z_os, name, real,
++		    MAXNAMELEN, NULL);
++		if (error == 0) {
++			name = real;
++		} else if (error != ENOTSUP) {
++			goto out;
++		}
++	}
++
++	error = zfsctl_snapshot_zname(dip, name, MAXNAMELEN, snapname);
++	if (!error)
++		error = zfs_secpolicy_destroy_perms(snapname, cr);
++	if (error)
++		goto out;
++
++	error = zfsctl_unmount_snapshot(zsb, name, MNT_FORCE);
++	if ((error == 0) || (error == ENOENT))
++		error = dmu_objset_destroy(snapname, B_FALSE);
++out:
++	kmem_free(snapname, MAXNAMELEN);
++	kmem_free(real, MAXNAMELEN);
++
++	ZFS_EXIT(zsb);
++
++	return (error);
++}
++
++/*
++ * Creating a directory under '.zfs/snapshot' will automatically trigger
++ * the creation of a new snapshot with the given name.
++ */
++/* ARGSUSED */
++int
++zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
++	struct inode **ipp, cred_t *cr, int flags)
++{
++	zfs_sb_t *zsb = ITOZSB(dip);
++	char *dsname;
++	int error;
++
++	dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
++
++	if (snapshot_namecheck(dirname, NULL, NULL) != 0) {
++		error = EILSEQ;
++		goto out;
++	}
++
++	dmu_objset_name(zsb->z_os, dsname);
++
++	error = zfs_secpolicy_snapshot_perms(dsname, cr);
++	if (error)
++		goto out;
++
++	if (error == 0) {
++		error = dmu_objset_snapshot(dsname, dirname,
++		    NULL, NULL, B_FALSE, B_FALSE, -1);
++		if (error)
++			goto out;
++
++		error = zfsctl_snapdir_lookup(dip, dirname, ipp,
++		    0, cr, NULL, NULL);
++	}
++out:
++	kmem_free(dsname, MAXNAMELEN);
++
++	return (error);
++}
++
++/*
++ * When a .zfs/snapshot/<snapshot> inode is evicted they must be removed
++ * from the snapshot list.  This will normally happen as part of the auto
++ * unmount, however in the case of a manual snapshot unmount this will be
++ * the only notification we receive.
++ */
++void
++zfsctl_snapdir_inactive(struct inode *ip)
++{
++	zfs_sb_t *zsb = ITOZSB(ip);
++	zfs_snapentry_t *sep, *next;
++
++	mutex_enter(&zsb->z_ctldir_lock);
++
++	sep = avl_first(&zsb->z_ctldir_snaps);
++	while (sep != NULL) {
++		next = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
++
++		if (sep->se_inode == ip) {
++			avl_remove(&zsb->z_ctldir_snaps, sep);
++			cancel_delayed_work_sync(&sep->se_work);
++			zfsctl_sep_free(sep);
++			break;
++		}
++		sep = next;
++	}
++
++	mutex_exit(&zsb->z_ctldir_lock);
++}
++
++/*
++ * Attempt to unmount a snapshot by making a call to user space.
++ * There is no assurance that this can or will succeed, is just a
++ * best effort.  In the case where it does fail, perhaps because
++ * it's in use, the unmount will fail harmlessly.
++ */
++#define SET_UNMOUNT_CMD \
++	"exec 0</dev/null " \
++	"     1>/dev/null " \
++	"     2>/dev/null; " \
++	"umount -t zfs -n '%s%s'"
++
++static int
++__zfsctl_unmount_snapshot(zfs_snapentry_t *sep, int flags)
++{
++	char *argv[] = { "/bin/sh", "-c", NULL, NULL };
++	char *envp[] = { NULL };
++	int error;
++
++	argv[2] = kmem_asprintf(SET_UNMOUNT_CMD,
++	    flags & MNT_FORCE ? "-f " : "", sep->se_path);
++	error = call_usermodehelper(argv[0], argv, envp, 1);
++	strfree(argv[2]);
++
++	/*
++	 * The umount system utility will return 256 on error.  We must
++	 * assume this error is because the file system is busy so it is
++	 * converted to the more sensible EBUSY.
++	 */
++	if (error)
++		error = EBUSY;
++
++	/*
++	 * This was the result of a manual unmount, cancel the delayed work
++	 * to prevent zfsctl_expire_snapshot() from attempting a unmount.
++	 */
++	if ((error == 0) && !(flags & MNT_EXPIRE))
++		cancel_delayed_work(&sep->se_work);
++
++	return (error);
++}
++
++int
++zfsctl_unmount_snapshot(zfs_sb_t *zsb, char *name, int flags)
++{
++	zfs_snapentry_t search;
++	zfs_snapentry_t *sep;
++	int error = 0;
++
++	mutex_enter(&zsb->z_ctldir_lock);
++
++	search.se_name = name;
++	sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
++	if (sep) {
++		avl_remove(&zsb->z_ctldir_snaps, sep);
++		error = __zfsctl_unmount_snapshot(sep, flags);
++		if (error == EBUSY)
++			avl_add(&zsb->z_ctldir_snaps, sep);
++		else
++			zfsctl_sep_free(sep);
++	} else {
++		error = ENOENT;
++	}
++
++	mutex_exit(&zsb->z_ctldir_lock);
++	ASSERT3S(error, >=, 0);
++
++	return (error);
++}
++
++/*
++ * Traverse all mounted snapshots and attempt to unmount them.  This
++ * is best effort, on failure EEXIST is returned and count will be set
++ * to the number of file snapshots which could not be unmounted.
++ */
++int
++zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count)
++{
++	zfs_snapentry_t *sep, *next;
++	int error = 0;
++
++	*count = 0;
++
++	ASSERT(zsb->z_ctldir != NULL);
++	mutex_enter(&zsb->z_ctldir_lock);
++
++	sep = avl_first(&zsb->z_ctldir_snaps);
++	while (sep != NULL) {
++		next = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
++		avl_remove(&zsb->z_ctldir_snaps, sep);
++		error = __zfsctl_unmount_snapshot(sep, flags);
++		if (error == EBUSY) {
++			avl_add(&zsb->z_ctldir_snaps, sep);
++			(*count)++;
++		} else {
++			zfsctl_sep_free(sep);
++		}
++
++		sep = next;
++	}
++
++	mutex_exit(&zsb->z_ctldir_lock);
++
++	return ((*count > 0) ? EEXIST : 0);
++}
++
++#define SET_MOUNT_CMD \
++	"exec 0</dev/null " \
++	"     1>/dev/null " \
++	"     2>/dev/null; " \
++	"mount -t zfs -n '%s' '%s'"
++
++int
++zfsctl_mount_snapshot(struct path *path, int flags)
++{
++	struct dentry *dentry = path->dentry;
++	struct inode *ip = dentry->d_inode;
++	zfs_sb_t *zsb = ITOZSB(ip);
++	char *full_name, *full_path;
++	zfs_snapentry_t *sep;
++	zfs_snapentry_t search;
++	char *argv[] = { "/bin/sh", "-c", NULL, NULL };
++	char *envp[] = { NULL };
++	int error;
++
++	ZFS_ENTER(zsb);
++
++	full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
++	full_path = kmem_zalloc(PATH_MAX, KM_SLEEP);
++
++	error = zfsctl_snapshot_zname(ip, dname(dentry), MAXNAMELEN, full_name);
++	if (error)
++		goto error;
++
++	error = zfsctl_snapshot_zpath(path, PATH_MAX, full_path);
++	if (error)
++		goto error;
++
++	/*
++	 * Attempt to mount the snapshot from user space.  Normally this
++	 * would be done using the vfs_kern_mount() function, however that
++	 * function is marked GPL-only and cannot be used.  On error we
++	 * careful to log the real error to the console and return EISDIR
++	 * to safely abort the automount.  This should be very rare.
++	 */
++	argv[2] = kmem_asprintf(SET_MOUNT_CMD, full_name, full_path);
++	error = call_usermodehelper(argv[0], argv, envp, 1);
++	strfree(argv[2]);
++	if (error) {
++		printk("ZFS: Unable to automount %s at %s: %d\n",
++		    full_name, full_path, error);
++		error = EISDIR;
++		goto error;
++	}
++
++	mutex_enter(&zsb->z_ctldir_lock);
++
++	/*
++	 * Ensure a previous entry does not exist, if it does safely remove
++	 * it any cancel the outstanding expiration.  This can occur when a
++	 * snapshot is manually unmounted and then an automount is triggered.
++	 */
++	search.se_name = full_name;
++	sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
++	if (sep) {
++		avl_remove(&zsb->z_ctldir_snaps, sep);
++		cancel_delayed_work_sync(&sep->se_work);
++		zfsctl_sep_free(sep);
++	}
++
++	sep = zfsctl_sep_alloc();
++	sep->se_name = full_name;
++	sep->se_path = full_path;
++	sep->se_inode = ip;
++	avl_add(&zsb->z_ctldir_snaps, sep);
++
++        spl_init_delayed_work(&sep->se_work, zfsctl_expire_snapshot, sep);
++	schedule_delayed_work(&sep->se_work, zfs_expire_snapshot * HZ);
++
++	mutex_exit(&zsb->z_ctldir_lock);
++error:
++	if (error) {
++		kmem_free(full_name, MAXNAMELEN);
++		kmem_free(full_path, PATH_MAX);
++	}
++
++	ZFS_EXIT(zsb);
++
++	return (error);
++}
++
++/*
++ * Check if this super block has a matching objset id.
++ */
++static int
++zfsctl_test_super(struct super_block *sb, void *objsetidp)
++{
++	zfs_sb_t *zsb = sb->s_fs_info;
++	uint64_t objsetid = *(uint64_t *)objsetidp;
++
++	return (dmu_objset_id(zsb->z_os) == objsetid);
++}
++
++/*
++ * Prevent a new super block from being allocated if an existing one
++ * could not be located.  We only want to preform a lookup operation.
++ */
++static int
++zfsctl_set_super(struct super_block *sb, void *objsetidp)
++{
++	return (-EEXIST);
++}
++
++int
++zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp)
++{
++	zfs_sb_t *zsb = sb->s_fs_info;
++	struct super_block *sbp;
++	zfs_snapentry_t *sep;
++	uint64_t id;
++	int error;
++
++	ASSERT(zsb->z_ctldir != NULL);
++
++	mutex_enter(&zsb->z_ctldir_lock);
++
++	/*
++	 * Verify that the snapshot is mounted.
++	 */
++	sep = avl_first(&zsb->z_ctldir_snaps);
++	while (sep != NULL) {
++		error = dmu_snapshot_id(zsb->z_os, sep->se_name, &id);
++		if (error)
++			goto out;
++
++		if (id == objsetid)
++			break;
++
++		sep = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
++	}
++
++	if (sep != NULL) {
++		/*
++		 * Lookup the mounted root rather than the covered mount
++		 * point.  This may fail if the snapshot has just been
++		 * unmounted by an unrelated user space process.  This
++		 * race cannot occur to an expired mount point because
++		 * we hold the zsb->z_ctldir_lock to prevent the race.
++		 */
++		sbp = zpl_sget(&zpl_fs_type, zfsctl_test_super,
++		    zfsctl_set_super, 0, &id);
++		if (IS_ERR(sbp)) {
++			error = -PTR_ERR(sbp);
++		} else {
++			*zsbp = sbp->s_fs_info;
++			deactivate_super(sbp);
++		}
++	} else {
++		error = EINVAL;
++	}
++out:
++	mutex_exit(&zsb->z_ctldir_lock);
++	ASSERT3S(error, >=, 0);
++
++	return (error);
++}
++
++/* ARGSUSED */
++int
++zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
++    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
++{
++	zfs_sb_t *zsb = ITOZSB(dip);
++	struct inode *ip;
++	znode_t *dzp;
++	int error;
++
++	ZFS_ENTER(zsb);
++
++	if (zsb->z_shares_dir == 0) {
++		ZFS_EXIT(zsb);
++		return (ENOTSUP);
++	}
++
++	error = zfs_zget(zsb, zsb->z_shares_dir, &dzp);
++	if (error) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL);
++
++	iput(ZTOI(dzp));
++	ZFS_EXIT(zsb);
++
++	return (error);
++}
++
++
++/*
++ * Initialize the various pieces we'll need to create and manipulate .zfs
++ * directories.  Currently this is unused but available.
++ */
++void
++zfsctl_init(void)
++{
++}
++
++/*
++ * Cleanup the various pieces we needed for .zfs directories.  In particular
++ * ensure the expiry timer is canceled safely.
++ */
++void
++zfsctl_fini(void)
++{
++}
++
++module_param(zfs_expire_snapshot, int, 0644);
++MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot");
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_debug.c linux-3.2.33-go/fs/zfs/zfs/zfs_debug.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_debug.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_debug.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,83 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++
++/*
++ * Enable various debugging features.
++ */
++int zfs_flags = 0;
++
++/*
++ * zfs_recover can be set to nonzero to attempt to recover from
++ * otherwise-fatal errors, typically caused by on-disk corruption.  When
++ * set, calls to zfs_panic_recover() will turn into warning messages.
++ */
++int zfs_recover = 0;
++
++
++void
++zfs_panic_recover(const char *fmt, ...)
++{
++	va_list adx;
++
++	va_start(adx, fmt);
++	vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
++	va_end(adx);
++}
++
++/*
++ * Debug logging is enabled by default for production kernel builds.
++ * The overhead for this is negligible and the logs can be valuable when
++ * debugging.  For non-production user space builds all debugging except
++ * logging is enabled since performance is no longer a concern.
++ */
++void
++zfs_dbgmsg_init(void)
++{
++	if (zfs_flags == 0) {
++#if defined(_KERNEL)
++		zfs_flags = ZFS_DEBUG_DPRINTF;
++		spl_debug_set_mask(spl_debug_get_mask() | SD_DPRINTF);
++		spl_debug_set_subsys(spl_debug_get_subsys() | SS_USER1);
++#else
++		zfs_flags = ~ZFS_DEBUG_DPRINTF;
++#endif /* _KERNEL */
++	}
++}
++
++void
++zfs_dbgmsg_fini(void)
++{
++	return;
++}
++
++
++#if defined(_KERNEL)
++module_param(zfs_flags, int, 0644);
++MODULE_PARM_DESC(zfs_flags, "Set additional debugging flags");
++
++module_param(zfs_recover, int, 0644);
++MODULE_PARM_DESC(zfs_recover, "Set to attempt to recover from fatal errors");
++#endif /* _KERNEL */
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_dir.c linux-3.2.33-go/fs/zfs/zfs/zfs_dir.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_dir.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_dir.c	2012-11-16 23:25:34.352039300 +0100
+@@ -0,0 +1,1124 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++
++#include <sys/types.h>
++#include <sys/param.h>
++#include <sys/time.h>
++#include <sys/systm.h>
++#include <sys/sysmacros.h>
++#include <sys/resource.h>
++#include <sys/vfs.h>
++#include <sys/vnode.h>
++#include <sys/file.h>
++#include <sys/mode.h>
++#include <sys/kmem.h>
++#include <sys/uio.h>
++#include <sys/pathname.h>
++#include <sys/cmn_err.h>
++#include <sys/errno.h>
++#include <sys/stat.h>
++#include <sys/unistd.h>
++#include <sys/sunddi.h>
++#include <sys/random.h>
++#include <sys/policy.h>
++#include <sys/zfs_dir.h>
++#include <sys/zfs_acl.h>
++#include <sys/fs/zfs.h>
++#include "fs/fs_subr.h"
++#include <sys/zap.h>
++#include <sys/dmu.h>
++#include <sys/atomic.h>
++#include <sys/zfs_ctldir.h>
++#include <sys/zfs_fuid.h>
++#include <sys/sa.h>
++#include <sys/zfs_sa.h>
++#include <sys/dnlc.h>
++#include <sys/extdirent.h>
++
++/*
++ * zfs_match_find() is used by zfs_dirent_lock() to peform zap lookups
++ * of names after deciding which is the appropriate lookup interface.
++ */
++static int
++zfs_match_find(zfs_sb_t *zsb, znode_t *dzp, char *name, boolean_t exact,
++    boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid)
++{
++	boolean_t conflict = B_FALSE;
++	int error;
++
++	if (zsb->z_norm) {
++		matchtype_t mt = MT_FIRST;
++		size_t bufsz = 0;
++		char *buf = NULL;
++
++		if (rpnp) {
++			buf = rpnp->pn_buf;
++			bufsz = rpnp->pn_bufsize;
++		}
++		if (exact)
++			mt = MT_EXACT;
++		/*
++		 * In the non-mixed case we only expect there would ever
++		 * be one match, but we need to use the normalizing lookup.
++		 */
++		error = zap_lookup_norm(zsb->z_os, dzp->z_id, name, 8, 1,
++		    zoid, mt, buf, bufsz, &conflict);
++	} else {
++		error = zap_lookup(zsb->z_os, dzp->z_id, name, 8, 1, zoid);
++	}
++
++	/*
++	 * Allow multiple entries provided the first entry is
++	 * the object id.  Non-zpl consumers may safely make
++	 * use of the additional space.
++	 *
++	 * XXX: This should be a feature flag for compatibility
++	 */
++	if (error == EOVERFLOW)
++		error = 0;
++
++	if (zsb->z_norm && !error && deflags)
++		*deflags = conflict ? ED_CASE_CONFLICT : 0;
++
++	*zoid = ZFS_DIRENT_OBJ(*zoid);
++
++#ifdef HAVE_DNLC
++	if (error == ENOENT && update)
++		dnlc_update(ZTOI(dzp), name, DNLC_NO_VNODE);
++#endif /* HAVE_DNLC */
++
++	return (error);
++}
++
++/*
++ * Lock a directory entry.  A dirlock on <dzp, name> protects that name
++ * in dzp's directory zap object.  As long as you hold a dirlock, you can
++ * assume two things: (1) dzp cannot be reaped, and (2) no other thread
++ * can change the zap entry for (i.e. link or unlink) this name.
++ *
++ * Input arguments:
++ *	dzp	- znode for directory
++ *	name	- name of entry to lock
++ *	flag	- ZNEW: if the entry already exists, fail with EEXIST.
++ *		  ZEXISTS: if the entry does not exist, fail with ENOENT.
++ *		  ZSHARED: allow concurrent access with other ZSHARED callers.
++ *		  ZXATTR: we want dzp's xattr directory
++ *		  ZCILOOK: On a mixed sensitivity file system,
++ *			   this lookup should be case-insensitive.
++ *		  ZCIEXACT: On a purely case-insensitive file system,
++ *			    this lookup should be case-sensitive.
++ *		  ZRENAMING: we are locking for renaming, force narrow locks
++ *		  ZHAVELOCK: Don't grab the z_name_lock for this call. The
++ *			     current thread already holds it.
++ *
++ * Output arguments:
++ *	zpp	- pointer to the znode for the entry (NULL if there isn't one)
++ *	dlpp	- pointer to the dirlock for this entry (NULL on error)
++ *      direntflags - (case-insensitive lookup only)
++ *		flags if multiple case-sensitive matches exist in directory
++ *      realpnp     - (case-insensitive lookup only)
++ *		actual name matched within the directory
++ *
++ * Return value: 0 on success or errno on failure.
++ *
++ * NOTE: Always checks for, and rejects, '.' and '..'.
++ * NOTE: For case-insensitive file systems we take wide locks (see below),
++ *	 but return znode pointers to a single match.
++ */
++int
++zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
++    int flag, int *direntflags, pathname_t *realpnp)
++{
++	zfs_sb_t	*zsb = ZTOZSB(dzp);
++	zfs_dirlock_t	*dl;
++	boolean_t	update;
++	boolean_t	exact;
++	uint64_t	zoid;
++#ifdef HAVE_DNLC
++	vnode_t		*vp = NULL;
++#endif /* HAVE_DNLC */
++	int		error = 0;
++	int		cmpflags;
++
++	*zpp = NULL;
++	*dlpp = NULL;
++
++	/*
++	 * Verify that we are not trying to lock '.', '..', or '.zfs'
++	 */
++	if ((name[0] == '.' &&
++	    (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) ||
++	    (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0))
++		return (EEXIST);
++
++	/*
++	 * Case sensitivity and normalization preferences are set when
++	 * the file system is created.  These are stored in the
++	 * zsb->z_case and zsb->z_norm fields.  These choices
++	 * affect what vnodes can be cached in the DNLC, how we
++	 * perform zap lookups, and the "width" of our dirlocks.
++	 *
++	 * A normal dirlock locks a single name.  Note that with
++	 * normalization a name can be composed multiple ways, but
++	 * when normalized, these names all compare equal.  A wide
++	 * dirlock locks multiple names.  We need these when the file
++	 * system is supporting mixed-mode access.  It is sometimes
++	 * necessary to lock all case permutations of file name at
++	 * once so that simultaneous case-insensitive/case-sensitive
++	 * behaves as rationally as possible.
++	 */
++
++	/*
++	 * Decide if exact matches should be requested when performing
++	 * a zap lookup on file systems supporting case-insensitive
++	 * access.
++	 */
++	exact =
++	    ((zsb->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
++	    ((zsb->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));
++
++	/*
++	 * Only look in or update the DNLC if we are looking for the
++	 * name on a file system that does not require normalization
++	 * or case folding.  We can also look there if we happen to be
++	 * on a non-normalizing, mixed sensitivity file system IF we
++	 * are looking for the exact name.
++	 *
++	 * Maybe can add TO-UPPERed version of name to dnlc in ci-only
++	 * case for performance improvement?
++	 */
++	update = !zsb->z_norm ||
++	    ((zsb->z_case == ZFS_CASE_MIXED) &&
++	    !(zsb->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
++
++	/*
++	 * ZRENAMING indicates we are in a situation where we should
++	 * take narrow locks regardless of the file system's
++	 * preferences for normalizing and case folding.  This will
++	 * prevent us deadlocking trying to grab the same wide lock
++	 * twice if the two names happen to be case-insensitive
++	 * matches.
++	 */
++	if (flag & ZRENAMING)
++		cmpflags = 0;
++	else
++		cmpflags = zsb->z_norm;
++
++	/*
++	 * Wait until there are no locks on this name.
++	 *
++	 * Don't grab the the lock if it is already held. However, cannot
++	 * have both ZSHARED and ZHAVELOCK together.
++	 */
++	ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
++	if (!(flag & ZHAVELOCK))
++		rw_enter(&dzp->z_name_lock, RW_READER);
++
++	mutex_enter(&dzp->z_lock);
++	for (;;) {
++		if (dzp->z_unlinked) {
++			mutex_exit(&dzp->z_lock);
++			if (!(flag & ZHAVELOCK))
++				rw_exit(&dzp->z_name_lock);
++			return (ENOENT);
++		}
++		for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
++			if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
++			    U8_UNICODE_LATEST, &error) == 0) || error != 0)
++				break;
++		}
++		if (error != 0) {
++			mutex_exit(&dzp->z_lock);
++			if (!(flag & ZHAVELOCK))
++				rw_exit(&dzp->z_name_lock);
++			return (ENOENT);
++		}
++		if (dl == NULL)	{
++			/*
++			 * Allocate a new dirlock and add it to the list.
++			 */
++			dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
++			cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
++			dl->dl_name = name;
++			dl->dl_sharecnt = 0;
++			dl->dl_namelock = 0;
++			dl->dl_namesize = 0;
++			dl->dl_dzp = dzp;
++			dl->dl_next = dzp->z_dirlocks;
++			dzp->z_dirlocks = dl;
++			break;
++		}
++		if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
++			break;
++		cv_wait(&dl->dl_cv, &dzp->z_lock);
++	}
++
++	/*
++	 * If the z_name_lock was NOT held for this dirlock record it.
++	 */
++	if (flag & ZHAVELOCK)
++		dl->dl_namelock = 1;
++
++	if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
++		/*
++		 * We're the second shared reference to dl.  Make a copy of
++		 * dl_name in case the first thread goes away before we do.
++		 * Note that we initialize the new name before storing its
++		 * pointer into dl_name, because the first thread may load
++		 * dl->dl_name at any time.  He'll either see the old value,
++		 * which is his, or the new shared copy; either is OK.
++		 */
++		dl->dl_namesize = strlen(dl->dl_name) + 1;
++		name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
++		bcopy(dl->dl_name, name, dl->dl_namesize);
++		dl->dl_name = name;
++	}
++
++	mutex_exit(&dzp->z_lock);
++
++	/*
++	 * We have a dirlock on the name.  (Note that it is the dirlock,
++	 * not the dzp's z_lock, that protects the name in the zap object.)
++	 * See if there's an object by this name; if so, put a hold on it.
++	 */
++	if (flag & ZXATTR) {
++		error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zsb), &zoid,
++		    sizeof (zoid));
++		if (error == 0)
++			error = (zoid == 0 ? ENOENT : 0);
++	} else {
++#ifdef HAVE_DNLC
++		if (update)
++			vp = dnlc_lookup(ZTOI(dzp), name);
++		if (vp == DNLC_NO_VNODE) {
++			iput(vp);
++			error = ENOENT;
++		} else if (vp) {
++			if (flag & ZNEW) {
++				zfs_dirent_unlock(dl);
++				iput(vp);
++				return (EEXIST);
++			}
++			*dlpp = dl;
++			*zpp = VTOZ(vp);
++			return (0);
++		} else {
++			error = zfs_match_find(zsb, dzp, name, exact,
++			    update, direntflags, realpnp, &zoid);
++		}
++#else
++		error = zfs_match_find(zsb, dzp, name, exact,
++		    update, direntflags, realpnp, &zoid);
++#endif /* HAVE_DNLC */
++	}
++	if (error) {
++		if (error != ENOENT || (flag & ZEXISTS)) {
++			zfs_dirent_unlock(dl);
++			return (error);
++		}
++	} else {
++		if (flag & ZNEW) {
++			zfs_dirent_unlock(dl);
++			return (EEXIST);
++		}
++		error = zfs_zget(zsb, zoid, zpp);
++		if (error) {
++			zfs_dirent_unlock(dl);
++			return (error);
++		}
++#ifdef HAVE_DNLC
++		if (!(flag & ZXATTR) && update)
++			dnlc_update(ZTOI(dzp), name, ZTOI(*zpp));
++#endif /* HAVE_DNLC */
++	}
++
++	*dlpp = dl;
++
++	return (0);
++}
++
++/*
++ * Unlock this directory entry and wake anyone who was waiting for it.
++ */
++void
++zfs_dirent_unlock(zfs_dirlock_t *dl)
++{
++	znode_t *dzp = dl->dl_dzp;
++	zfs_dirlock_t **prev_dl, *cur_dl;
++
++	mutex_enter(&dzp->z_lock);
++
++	if (!dl->dl_namelock)
++		rw_exit(&dzp->z_name_lock);
++
++	if (dl->dl_sharecnt > 1) {
++		dl->dl_sharecnt--;
++		mutex_exit(&dzp->z_lock);
++		return;
++	}
++	prev_dl = &dzp->z_dirlocks;
++	while ((cur_dl = *prev_dl) != dl)
++		prev_dl = &cur_dl->dl_next;
++	*prev_dl = dl->dl_next;
++	cv_broadcast(&dl->dl_cv);
++	mutex_exit(&dzp->z_lock);
++
++	if (dl->dl_namesize != 0)
++		kmem_free(dl->dl_name, dl->dl_namesize);
++	cv_destroy(&dl->dl_cv);
++	kmem_free(dl, sizeof (*dl));
++}
++
++/*
++ * Look up an entry in a directory.
++ *
++ * NOTE: '.' and '..' are handled as special cases because
++ *	no directory entries are actually stored for them.  If this is
++ *	the root of a filesystem, then '.zfs' is also treated as a
++ *	special pseudo-directory.
++ */
++int
++zfs_dirlook(znode_t *dzp, char *name, struct inode **ipp, int flags,
++    int *deflg, pathname_t *rpnp)
++{
++	zfs_dirlock_t *dl;
++	znode_t *zp;
++	int error = 0;
++	uint64_t parent;
++
++	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
++		*ipp = ZTOI(dzp);
++		igrab(*ipp);
++	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
++		zfs_sb_t *zsb = ZTOZSB(dzp);
++
++		/*
++		 * If we are a snapshot mounted under .zfs, return
++		 * the inode pointer for the snapshot directory.
++		 */
++		if ((error = sa_lookup(dzp->z_sa_hdl,
++		    SA_ZPL_PARENT(zsb), &parent, sizeof (parent))) != 0)
++			return (error);
++
++		if (parent == dzp->z_id && zsb->z_parent != zsb) {
++			error = zfsctl_root_lookup(zsb->z_parent->z_ctldir,
++			    "snapshot", ipp, 0, kcred, NULL, NULL);
++			return (error);
++		}
++		rw_enter(&dzp->z_parent_lock, RW_READER);
++		error = zfs_zget(zsb, parent, &zp);
++		if (error == 0)
++			*ipp = ZTOI(zp);
++		rw_exit(&dzp->z_parent_lock);
++	} else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
++		*ipp = zfsctl_root(dzp);
++	} else {
++		int zf;
++
++		zf = ZEXISTS | ZSHARED;
++		if (flags & FIGNORECASE)
++			zf |= ZCILOOK;
++
++		error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp);
++		if (error == 0) {
++			*ipp = ZTOI(zp);
++			zfs_dirent_unlock(dl);
++			dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
++		}
++		rpnp = NULL;
++	}
++
++	if ((flags & FIGNORECASE) && rpnp && !error)
++		(void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize);
++
++	return (error);
++}
++
++/*
++ * unlinked Set (formerly known as the "delete queue") Error Handling
++ *
++ * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
++ * don't specify the name of the entry that we will be manipulating.  We
++ * also fib and say that we won't be adding any new entries to the
++ * unlinked set, even though we might (this is to lower the minimum file
++ * size that can be deleted in a full filesystem).  So on the small
++ * chance that the nlink list is using a fat zap (ie. has more than
++ * 2000 entries), we *may* not pre-read a block that's needed.
++ * Therefore it is remotely possible for some of the assertions
++ * regarding the unlinked set below to fail due to i/o error.  On a
++ * nondebug system, this will result in the space being leaked.
++ */
++void
++zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
++{
++	zfs_sb_t *zsb = ZTOZSB(zp);
++
++	ASSERT(zp->z_unlinked);
++	ASSERT(zp->z_links == 0);
++
++	VERIFY3U(0, ==,
++	    zap_add_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));
++}
++
++/*
++ * Delete the entire contents of a directory.  Return a count
++ * of the number of entries that could not be deleted. If we encounter
++ * an error, return a count of at least one so that the directory stays
++ * in the unlinked set.
++ *
++ * NOTE: this function assumes that the directory is inactive,
++ *	so there is no need to lock its entries before deletion.
++ *	Also, it assumes the directory contents is *only* regular
++ *	files.
++ */
++static int
++zfs_purgedir(znode_t *dzp)
++{
++	zap_cursor_t	zc;
++	zap_attribute_t	zap;
++	znode_t		*xzp;
++	dmu_tx_t	*tx;
++	zfs_sb_t	*zsb = ZTOZSB(dzp);
++	zfs_dirlock_t	dl;
++	int skipped = 0;
++	int error;
++
++	for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id);
++	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
++	    zap_cursor_advance(&zc)) {
++		error = zfs_zget(zsb,
++		    ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
++		if (error) {
++			skipped += 1;
++			continue;
++		}
++
++		ASSERT(S_ISREG(ZTOI(xzp)->i_mode)||S_ISLNK(ZTOI(xzp)->i_mode));
++
++		tx = dmu_tx_create(zsb->z_os);
++		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
++		dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
++		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
++		dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
++		/* Is this really needed ? */
++		zfs_sa_upgrade_txholds(tx, xzp);
++		error = dmu_tx_assign(tx, TXG_WAIT);
++		if (error) {
++			dmu_tx_abort(tx);
++			iput(ZTOI(xzp));
++			skipped += 1;
++			continue;
++		}
++		bzero(&dl, sizeof (dl));
++		dl.dl_dzp = dzp;
++		dl.dl_name = zap.za_name;
++
++		error = zfs_link_destroy(&dl, xzp, tx, 0, NULL);
++		if (error)
++			skipped += 1;
++		dmu_tx_commit(tx);
++
++		iput(ZTOI(xzp));
++	}
++	zap_cursor_fini(&zc);
++	if (error != ENOENT)
++		skipped += 1;
++	return (skipped);
++}
++
++/*
++ * Clean up any znodes that had no links when we either crashed or
++ * (force) umounted the file system.
++ */
++void
++zfs_unlinked_drain(zfs_sb_t *zsb)
++{
++	zap_cursor_t	zc;
++	zap_attribute_t zap;
++	dmu_object_info_t doi;
++	znode_t		*zp;
++	int		error;
++
++	/*
++	 * Interate over the contents of the unlinked set.
++	 */
++	for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj);
++	    zap_cursor_retrieve(&zc, &zap) == 0;
++	    zap_cursor_advance(&zc)) {
++
++		/*
++		 * See what kind of object we have in list
++		 */
++
++		error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi);
++		if (error != 0)
++			continue;
++
++		ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
++		    (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
++		/*
++		 * We need to re-mark these list entries for deletion,
++		 * so we pull them back into core and set zp->z_unlinked.
++		 */
++		error = zfs_zget(zsb, zap.za_first_integer, &zp);
++
++		/*
++		 * We may pick up znodes that are already marked for deletion.
++		 * This could happen during the purge of an extended attribute
++		 * directory.  All we need to do is skip over them, since they
++		 * are already in the system marked z_unlinked.
++		 */
++		if (error != 0)
++			continue;
++
++		zp->z_unlinked = B_TRUE;
++
++		/*
++		 * If this is an attribute directory, purge its contents.
++		 */
++		if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
++			/*
++			 * We don't need to check the return value of
++			 * zfs_purgedir here, because zfs_rmnode will just
++			 * return this xattr directory to the unlinked set
++			 * until all of its xattrs are gone.
++			 */
++			(void) zfs_purgedir(zp);
++		}
++
++		iput(ZTOI(zp));
++	}
++	zap_cursor_fini(&zc);
++}
++
++void
++zfs_rmnode(znode_t *zp)
++{
++	zfs_sb_t	*zsb = ZTOZSB(zp);
++	objset_t	*os = zsb->z_os;
++	znode_t		*xzp = NULL;
++	dmu_tx_t	*tx;
++	uint64_t	acl_obj;
++	uint64_t	xattr_obj;
++	uint64_t	count;
++	int		error;
++
++	ASSERT(zp->z_links == 0);
++	ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0);
++
++	/*
++	 * If this is an attribute directory, purge its contents.
++	 */
++	if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
++		error = zap_count(os, zp->z_id, &count);
++		if (error) {
++			zfs_znode_dmu_fini(zp);
++			return;
++		}
++
++		if (count > 0) {
++			taskq_t *taskq;
++
++			/*
++			 * There are still directory entries in this xattr
++			 * directory.  Let zfs_unlinked_drain() deal with
++			 * them to avoid deadlocking this process in the
++			 * zfs_purgedir()->zfs_zget()->ilookup() callpath
++			 * on the xattr inode's I_FREEING bit.
++			 */
++			taskq = dsl_pool_iput_taskq(dmu_objset_pool(os));
++			taskq_dispatch(taskq, (task_func_t *)
++			    zfs_unlinked_drain, zsb, TQ_SLEEP);
++
++			zfs_znode_dmu_fini(zp);
++			return;
++		}
++	}
++
++	/*
++	 * Free up all the data in the file.
++	 */
++	error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
++	if (error) {
++		/*
++		 * Not enough space.  Leave the file in the unlinked set.
++		 */
++		zfs_znode_dmu_fini(zp);
++		return;
++	}
++
++	/*
++	 * If the file has extended attributes, we're going to unlink
++	 * the xattr dir.
++	 */
++	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
++	    &xattr_obj, sizeof (xattr_obj));
++	if (error == 0 && xattr_obj) {
++		error = zfs_zget(zsb, xattr_obj, &xzp);
++		ASSERT(error == 0);
++	}
++
++	acl_obj = zfs_external_acl(zp);
++
++	/*
++	 * Set up the final transaction.
++	 */
++	tx = dmu_tx_create(os);
++	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
++	dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
++	if (xzp) {
++		dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, TRUE, NULL);
++		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
++	}
++	if (acl_obj)
++		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
++
++	zfs_sa_upgrade_txholds(tx, zp);
++	error = dmu_tx_assign(tx, TXG_WAIT);
++	if (error) {
++		/*
++		 * Not enough space to delete the file.  Leave it in the
++		 * unlinked set, leaking it until the fs is remounted (at
++		 * which point we'll call zfs_unlinked_drain() to process it).
++		 */
++		dmu_tx_abort(tx);
++		zfs_znode_dmu_fini(zp);
++		goto out;
++	}
++
++	if (xzp) {
++		ASSERT(error == 0);
++		mutex_enter(&xzp->z_lock);
++		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
++		xzp->z_links = 0;	/* no more links to it */
++		VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb),
++		    &xzp->z_links, sizeof (xzp->z_links), tx));
++		mutex_exit(&xzp->z_lock);
++		zfs_unlinked_add(xzp, tx);
++	}
++
++	/* Remove this znode from the unlinked set */
++	VERIFY3U(0, ==,
++	    zap_remove_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));
++
++	zfs_znode_delete(zp, tx);
++
++	dmu_tx_commit(tx);
++out:
++	if (xzp)
++		iput(ZTOI(xzp));
++}
++
++static uint64_t
++zfs_dirent(znode_t *zp, uint64_t mode)
++{
++	uint64_t de = zp->z_id;
++
++	if (ZTOZSB(zp)->z_version >= ZPL_VERSION_DIRENT_TYPE)
++		de |= IFTODT(mode) << 60;
++	return (de);
++}
++
++/*
++ * Link zp into dl.  Can only fail if zp has been unlinked.
++ */
++int
++zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
++{
++	znode_t *dzp = dl->dl_dzp;
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	uint64_t value;
++	int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
++	sa_bulk_attr_t bulk[5];
++	uint64_t mtime[2], ctime[2];
++	int count = 0;
++	int error;
++
++	mutex_enter(&zp->z_lock);
++
++	if (!(flag & ZRENAMING)) {
++		if (zp->z_unlinked) {	/* no new links to unlinked zp */
++			ASSERT(!(flag & (ZNEW | ZEXISTS)));
++			mutex_exit(&zp->z_lock);
++			return (ENOENT);
++		}
++		zp->z_links++;
++		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
++		    &zp->z_links, sizeof (zp->z_links));
++
++	}
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL,
++	    &dzp->z_id, sizeof (dzp->z_id));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
++	    &zp->z_pflags, sizeof (zp->z_pflags));
++
++	if (!(flag & ZNEW)) {
++		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
++		    ctime, sizeof (ctime));
++		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
++		    ctime, B_TRUE);
++	}
++	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
++	ASSERT(error == 0);
++
++	mutex_exit(&zp->z_lock);
++
++	mutex_enter(&dzp->z_lock);
++	dzp->z_size++;
++	dzp->z_links += zp_is_dir;
++	count = 0;
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL,
++	    &dzp->z_size, sizeof (dzp->z_size));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
++	    &dzp->z_links, sizeof (dzp->z_links));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL,
++	    mtime, sizeof (mtime));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
++	    ctime, sizeof (ctime));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
++	    &dzp->z_pflags, sizeof (dzp->z_pflags));
++	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
++	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
++	ASSERT(error == 0);
++	mutex_exit(&dzp->z_lock);
++
++	value = zfs_dirent(zp, zp->z_mode);
++	error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
++	    8, 1, &value, tx);
++	ASSERT(error == 0);
++
++	return (0);
++}
++
++static int
++zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
++    int flag)
++{
++	int error;
++
++	if (ZTOZSB(zp)->z_norm) {
++		if (((ZTOZSB(zp)->z_case == ZFS_CASE_INSENSITIVE) &&
++		    (flag & ZCIEXACT)) ||
++		    ((ZTOZSB(zp)->z_case == ZFS_CASE_MIXED) &&
++		    !(flag & ZCILOOK)))
++			error = zap_remove_norm(ZTOZSB(zp)->z_os,
++			    dzp->z_id, dl->dl_name, MT_EXACT, tx);
++		else
++			error = zap_remove_norm(ZTOZSB(zp)->z_os,
++			    dzp->z_id, dl->dl_name, MT_FIRST, tx);
++	} else {
++		error = zap_remove(ZTOZSB(zp)->z_os,
++		    dzp->z_id, dl->dl_name, tx);
++	}
++
++	return (error);
++}
++
++/*
++ * Unlink zp from dl, and mark zp for deletion if this was the last link. Can
++ * fail if zp is a mount point (EBUSY) or a non-empty directory (ENOTEMPTY).
++ * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
++ * If it's non-NULL, we use it to indicate whether the znode needs deletion,
++ * and it's the caller's job to do it.
++ */
++int
++zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
++	boolean_t *unlinkedp)
++{
++	znode_t *dzp = dl->dl_dzp;
++	zfs_sb_t *zsb = ZTOZSB(dzp);
++	int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
++	boolean_t unlinked = B_FALSE;
++	sa_bulk_attr_t bulk[5];
++	uint64_t mtime[2], ctime[2];
++	int count = 0;
++	int error;
++
++#ifdef HAVE_DNLC
++	dnlc_remove(ZTOI(dzp), dl->dl_name);
++#endif /* HAVE_DNLC */
++
++	if (!(flag & ZRENAMING)) {
++		mutex_enter(&zp->z_lock);
++
++		if (zp_is_dir && !zfs_dirempty(zp)) {
++			mutex_exit(&zp->z_lock);
++			return (ENOTEMPTY);
++		}
++
++		/*
++		 * If we get here, we are going to try to remove the object.
++		 * First try removing the name from the directory; if that
++		 * fails, return the error.
++		 */
++		error = zfs_dropname(dl, zp, dzp, tx, flag);
++		if (error != 0) {
++			mutex_exit(&zp->z_lock);
++			return (error);
++		}
++
++		if (zp->z_links <= zp_is_dir) {
++			zfs_panic_recover("zfs: link count on %lu is %u, "
++			    "should be at least %u", zp->z_id,
++			    (int)zp->z_links, zp_is_dir + 1);
++			zp->z_links = zp_is_dir + 1;
++		}
++		if (--zp->z_links == zp_is_dir) {
++			zp->z_unlinked = B_TRUE;
++			zp->z_links = 0;
++			unlinked = B_TRUE;
++		} else {
++			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb),
++			    NULL, &ctime, sizeof (ctime));
++			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
++			    NULL, &zp->z_pflags, sizeof (zp->z_pflags));
++			zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
++			    B_TRUE);
++		}
++		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb),
++		    NULL, &zp->z_links, sizeof (zp->z_links));
++		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
++		count = 0;
++		ASSERT(error == 0);
++		mutex_exit(&zp->z_lock);
++	} else {
++		error = zfs_dropname(dl, zp, dzp, tx, flag);
++		if (error != 0)
++			return (error);
++	}
++
++	mutex_enter(&dzp->z_lock);
++	dzp->z_size--;		/* one dirent removed */
++	dzp->z_links -= zp_is_dir;	/* ".." link from zp */
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb),
++	    NULL, &dzp->z_links, sizeof (dzp->z_links));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb),
++	    NULL, &dzp->z_size, sizeof (dzp->z_size));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb),
++	    NULL, ctime, sizeof (ctime));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb),
++	    NULL, mtime, sizeof (mtime));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
++	    NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
++	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
++	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
++	ASSERT(error == 0);
++	mutex_exit(&dzp->z_lock);
++
++	if (unlinkedp != NULL)
++		*unlinkedp = unlinked;
++	else if (unlinked)
++		zfs_unlinked_add(zp, tx);
++
++	return (0);
++}
++
++/*
++ * Indicate whether the directory is empty.  Works with or without z_lock
++ * held, but can only be consider a hint in the latter case.  Returns true
++ * if only "." and ".." remain and there's no work in progress.
++ */
++boolean_t
++zfs_dirempty(znode_t *dzp)
++{
++	return (dzp->z_size == 2 && dzp->z_dirlocks == 0);
++}
++
++int
++zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr)
++{
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	znode_t *xzp;
++	dmu_tx_t *tx;
++	int error;
++	zfs_acl_ids_t acl_ids;
++	boolean_t fuid_dirtied;
++#ifdef DEBUG
++	uint64_t parent;
++#endif
++
++	*xipp = NULL;
++
++	if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)))
++		return (error);
++
++	if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
++	    &acl_ids)) != 0)
++		return (error);
++	if (zfs_acl_ids_overquota(zsb, &acl_ids)) {
++		zfs_acl_ids_free(&acl_ids);
++		return (EDQUOT);
++	}
++
++top:
++	tx = dmu_tx_create(zsb->z_os);
++	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
++	    ZFS_SA_BASE_ATTR_SIZE);
++	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
++	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
++	fuid_dirtied = zsb->z_fuid_dirty;
++	if (fuid_dirtied)
++		zfs_fuid_txhold(zsb, tx);
++	error = dmu_tx_assign(tx, TXG_NOWAIT);
++	if (error) {
++		if (error == ERESTART) {
++			dmu_tx_wait(tx);
++			dmu_tx_abort(tx);
++			goto top;
++		}
++		zfs_acl_ids_free(&acl_ids);
++		dmu_tx_abort(tx);
++		return (error);
++	}
++	zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
++
++	if (fuid_dirtied)
++		zfs_fuid_sync(zsb, tx);
++
++#ifdef DEBUG
++	error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zsb),
++	    &parent, sizeof (parent));
++	ASSERT(error == 0 && parent == zp->z_id);
++#endif
++
++	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xzp->z_id,
++	    sizeof (xzp->z_id), tx));
++
++	(void) zfs_log_create(zsb->z_log, tx, TX_MKXATTR, zp,
++	    xzp, "", NULL, acl_ids.z_fuidp, vap);
++
++	zfs_acl_ids_free(&acl_ids);
++	dmu_tx_commit(tx);
++
++	*xipp = ZTOI(xzp);
++
++	return (0);
++}
++
++/*
++ * Return a znode for the extended attribute directory for zp.
++ * ** If the directory does not already exist, it is created **
++ *
++ *	IN:	zp	- znode to obtain attribute directory from
++ *		cr	- credentials of caller
++ *		flags	- flags from the VOP_LOOKUP call
++ *
++ *	OUT:	xipp	- pointer to extended attribute znode
++ *
++ *	RETURN:	0 on success
++ *		error number on failure
++ */
++int
++zfs_get_xattrdir(znode_t *zp, struct inode **xipp, cred_t *cr, int flags)
++{
++	zfs_sb_t	*zsb = ZTOZSB(zp);
++	znode_t		*xzp;
++	zfs_dirlock_t	*dl;
++	vattr_t		va;
++	int		error;
++top:
++	error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL);
++	if (error)
++		return (error);
++
++	if (xzp != NULL) {
++		*xipp = ZTOI(xzp);
++		zfs_dirent_unlock(dl);
++		return (0);
++	}
++
++	if (!(flags & CREATE_XATTR_DIR)) {
++		zfs_dirent_unlock(dl);
++		return (ENOENT);
++	}
++
++	if (zfs_is_readonly(zsb)) {
++		zfs_dirent_unlock(dl);
++		return (EROFS);
++	}
++
++	/*
++	 * The ability to 'create' files in an attribute
++	 * directory comes from the write_xattr permission on the base file.
++	 *
++	 * The ability to 'search' an attribute directory requires
++	 * read_xattr permission on the base file.
++	 *
++	 * Once in a directory the ability to read/write attributes
++	 * is controlled by the permissions on the attribute file.
++	 */
++	va.va_mask = ATTR_MODE | ATTR_UID | ATTR_GID;
++	va.va_mode = S_IFDIR | S_ISVTX | 0777;
++	zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
++
++	va.va_dentry = NULL;
++	error = zfs_make_xattrdir(zp, &va, xipp, cr);
++	zfs_dirent_unlock(dl);
++
++	if (error == ERESTART) {
++		/* NB: we already did dmu_tx_wait() if necessary */
++		goto top;
++	}
++
++	return (error);
++}
++
++/*
++ * Decide whether it is okay to remove within a sticky directory.
++ *
++ * In sticky directories, write access is not sufficient;
++ * you can remove entries from a directory only if:
++ *
++ *	you own the directory,
++ *	you own the entry,
++ *	the entry is a plain file and you have write access,
++ *	or you are privileged (checked in secpolicy...).
++ *
++ * The function returns 0 if remove access is granted.
++ */
++int
++zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
++{
++	uid_t		uid;
++	uid_t		downer;
++	uid_t		fowner;
++	zfs_sb_t	*zsb = ZTOZSB(zdp);
++
++	if (zsb->z_replay)
++		return (0);
++
++	if ((zdp->z_mode & S_ISVTX) == 0)
++		return (0);
++
++	downer = zfs_fuid_map_id(zsb, zdp->z_uid, cr, ZFS_OWNER);
++	fowner = zfs_fuid_map_id(zsb, zp->z_uid, cr, ZFS_OWNER);
++
++	if ((uid = crgetuid(cr)) == downer || uid == fowner ||
++	    (S_ISDIR(ZTOI(zp)->i_mode) &&
++	    zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
++		return (0);
++	else
++		return (secpolicy_vnode_remove(cr));
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_fm.c linux-3.2.33-go/fs/zfs/zfs/zfs_fm.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_fm.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_fm.c	2012-11-16 23:25:34.349039334 +0100
+@@ -0,0 +1,875 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++/*
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ */
++
++#include <sys/spa.h>
++#include <sys/spa_impl.h>
++#include <sys/vdev.h>
++#include <sys/vdev_impl.h>
++#include <sys/zio.h>
++#include <sys/zio_checksum.h>
++
++#include <sys/fm/fs/zfs.h>
++#include <sys/fm/protocol.h>
++#include <sys/fm/util.h>
++#include <sys/sysevent.h>
++
++/*
++ * This general routine is responsible for generating all the different ZFS
++ * ereports.  The payload is dependent on the class, and which arguments are
++ * supplied to the function:
++ *
++ * 	EREPORT			POOL	VDEV	IO
++ * 	block			X	X	X
++ * 	data			X		X
++ * 	device			X	X
++ * 	pool			X
++ *
++ * If we are in a loading state, all errors are chained together by the same
++ * SPA-wide ENA (Error Numeric Association).
++ *
++ * For isolated I/O requests, we get the ENA from the zio_t. The propagation
++ * gets very complicated due to RAID-Z, gang blocks, and vdev caching.  We want
++ * to chain together all ereports associated with a logical piece of data.  For
++ * read I/Os, there  are basically three 'types' of I/O, which form a roughly
++ * layered diagram:
++ *
++ *      +---------------+
++ * 	| Aggregate I/O |	No associated logical data or device
++ * 	+---------------+
++ *              |
++ *              V
++ * 	+---------------+	Reads associated with a piece of logical data.
++ * 	|   Read I/O    |	This includes reads on behalf of RAID-Z,
++ * 	+---------------+       mirrors, gang blocks, retries, etc.
++ *              |
++ *              V
++ * 	+---------------+	Reads associated with a particular device, but
++ * 	| Physical I/O  |	no logical data.  Issued as part of vdev caching
++ * 	+---------------+	and I/O aggregation.
++ *
++ * Note that 'physical I/O' here is not the same terminology as used in the rest
++ * of ZIO.  Typically, 'physical I/O' simply means that there is no attached
++ * blockpointer.  But I/O with no associated block pointer can still be related
++ * to a logical piece of data (i.e. RAID-Z requests).
++ *
++ * Purely physical I/O always have unique ENAs.  They are not related to a
++ * particular piece of logical data, and therefore cannot be chained together.
++ * We still generate an ereport, but the DE doesn't correlate it with any
++ * logical piece of data.  When such an I/O fails, the delegated I/O requests
++ * will issue a retry, which will trigger the 'real' ereport with the correct
++ * ENA.
++ *
++ * We keep track of the ENA for a ZIO chain through the 'io_logical' member.
++ * When a new logical I/O is issued, we set this to point to itself.  Child I/Os
++ * then inherit this pointer, so that when it is first set subsequent failures
++ * will use the same ENA.  For vdev cache fill and queue aggregation I/O,
++ * this pointer is set to NULL, and no ereport will be generated (since it
++ * doesn't actually correspond to any particular device or piece of data,
++ * and the caller will always retry without caching or queueing anyway).
++ *
++ * For checksum errors, we want to include more information about the actual
++ * error which occurs.  Accordingly, we build an ereport when the error is
++ * noticed, but instead of sending it in immediately, we hang it off of the
++ * io_cksum_report field of the logical IO.  When the logical IO completes
++ * (successfully or not), zfs_ereport_finish_checksum() is called with the
++ * good and bad versions of the buffer (if available), and we annotate the
++ * ereport with information about the differences.
++ */
++#ifdef _KERNEL
++static void
++zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector)
++{
++	if (nvl)
++		fm_nvlist_destroy(nvl, FM_NVA_FREE);
++
++	if (detector)
++		fm_nvlist_destroy(detector, FM_NVA_FREE);
++}
++
++static void
++zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
++    const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
++    uint64_t stateoroffset, uint64_t size)
++{
++	nvlist_t *ereport, *detector;
++
++	uint64_t ena;
++	char class[64];
++
++	/*
++	 * If we are doing a spa_tryimport() or in recovery mode,
++	 * ignore errors.
++	 */
++	if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT ||
++	    spa_load_state(spa) == SPA_LOAD_RECOVER)
++		return;
++
++	/*
++	 * If we are in the middle of opening a pool, and the previous attempt
++	 * failed, don't bother logging any new ereports - we're just going to
++	 * get the same diagnosis anyway.
++	 */
++	if (spa_load_state(spa) != SPA_LOAD_NONE &&
++	    spa->spa_last_open_failed)
++		return;
++
++	if (zio != NULL) {
++		/*
++		 * If this is not a read or write zio, ignore the error.  This
++		 * can occur if the DKIOCFLUSHWRITECACHE ioctl fails.
++		 */
++		if (zio->io_type != ZIO_TYPE_READ &&
++		    zio->io_type != ZIO_TYPE_WRITE)
++			return;
++
++		if (vd != NULL) {
++			/*
++			 * If the vdev has already been marked as failing due
++			 * to a failed probe, then ignore any subsequent I/O
++			 * errors, as the DE will automatically fault the vdev
++			 * on the first such failure.  This also catches cases
++			 * where vdev_remove_wanted is set and the device has
++			 * not yet been asynchronously placed into the REMOVED
++			 * state.
++			 */
++			if (zio->io_vd == vd && !vdev_accessible(vd, zio))
++				return;
++
++			/*
++			 * Ignore checksum errors for reads from DTL regions of
++			 * leaf vdevs.
++			 */
++			if (zio->io_type == ZIO_TYPE_READ &&
++			    zio->io_error == ECKSUM &&
++			    vd->vdev_ops->vdev_op_leaf &&
++			    vdev_dtl_contains(vd, DTL_MISSING, zio->io_txg, 1))
++				return;
++		}
++	}
++
++	/*
++	 * For probe failure, we want to avoid posting ereports if we've
++	 * already removed the device in the meantime.
++	 */
++	if (vd != NULL &&
++	    strcmp(subclass, FM_EREPORT_ZFS_PROBE_FAILURE) == 0 &&
++	    (vd->vdev_remove_wanted || vd->vdev_state == VDEV_STATE_REMOVED))
++		return;
++
++	if ((ereport = fm_nvlist_create(NULL)) == NULL)
++		return;
++
++	if ((detector = fm_nvlist_create(NULL)) == NULL) {
++		fm_nvlist_destroy(ereport, FM_NVA_FREE);
++		return;
++	}
++
++	/*
++	 * Serialize ereport generation
++	 */
++	mutex_enter(&spa->spa_errlist_lock);
++
++	/*
++	 * Determine the ENA to use for this event.  If we are in a loading
++	 * state, use a SPA-wide ENA.  Otherwise, if we are in an I/O state, use
++	 * a root zio-wide ENA.  Otherwise, simply use a unique ENA.
++	 */
++	if (spa_load_state(spa) != SPA_LOAD_NONE) {
++		if (spa->spa_ena == 0)
++			spa->spa_ena = fm_ena_generate(0, FM_ENA_FMT1);
++		ena = spa->spa_ena;
++	} else if (zio != NULL && zio->io_logical != NULL) {
++		if (zio->io_logical->io_ena == 0)
++			zio->io_logical->io_ena =
++			    fm_ena_generate(0, FM_ENA_FMT1);
++		ena = zio->io_logical->io_ena;
++	} else {
++		ena = fm_ena_generate(0, FM_ENA_FMT1);
++	}
++
++	/*
++	 * Construct the full class, detector, and other standard FMA fields.
++	 */
++	(void) snprintf(class, sizeof (class), "%s.%s",
++	    ZFS_ERROR_CLASS, subclass);
++
++	fm_fmri_zfs_set(detector, FM_ZFS_SCHEME_VERSION, spa_guid(spa),
++	    vd != NULL ? vd->vdev_guid : 0);
++
++	fm_ereport_set(ereport, FM_EREPORT_VERSION, class, ena, detector, NULL);
++
++	/*
++	 * Construct the per-ereport payload, depending on which parameters are
++	 * passed in.
++	 */
++
++	/*
++	 * Generic payload members common to all ereports.
++	 */
++	fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL,
++	    DATA_TYPE_STRING, spa_name(spa), FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
++	    DATA_TYPE_UINT64, spa_guid(spa),
++	    FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, DATA_TYPE_INT32,
++	    spa_load_state(spa), NULL);
++
++	if (spa != NULL) {
++		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE,
++		    DATA_TYPE_STRING,
++		    spa_get_failmode(spa) == ZIO_FAILURE_MODE_WAIT ?
++		    FM_EREPORT_FAILMODE_WAIT :
++		    spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE ?
++		    FM_EREPORT_FAILMODE_CONTINUE : FM_EREPORT_FAILMODE_PANIC,
++		    NULL);
++	}
++
++	if (vd != NULL) {
++		vdev_t *pvd = vd->vdev_parent;
++
++		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
++		    DATA_TYPE_UINT64, vd->vdev_guid,
++		    FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
++		    DATA_TYPE_STRING, vd->vdev_ops->vdev_op_type, NULL);
++		if (vd->vdev_path != NULL)
++			fm_payload_set(ereport,
++			    FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH,
++			    DATA_TYPE_STRING, vd->vdev_path, NULL);
++		if (vd->vdev_devid != NULL)
++			fm_payload_set(ereport,
++			    FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID,
++			    DATA_TYPE_STRING, vd->vdev_devid, NULL);
++		if (vd->vdev_fru != NULL)
++			fm_payload_set(ereport,
++			    FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU,
++			    DATA_TYPE_STRING, vd->vdev_fru, NULL);
++
++		if (pvd != NULL) {
++			fm_payload_set(ereport,
++			    FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID,
++			    DATA_TYPE_UINT64, pvd->vdev_guid,
++			    FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE,
++			    DATA_TYPE_STRING, pvd->vdev_ops->vdev_op_type,
++			    NULL);
++			if (pvd->vdev_path)
++				fm_payload_set(ereport,
++				    FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH,
++				    DATA_TYPE_STRING, pvd->vdev_path, NULL);
++			if (pvd->vdev_devid)
++				fm_payload_set(ereport,
++				    FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID,
++				    DATA_TYPE_STRING, pvd->vdev_devid, NULL);
++		}
++	}
++
++	if (zio != NULL) {
++		/*
++		 * Payload common to all I/Os.
++		 */
++		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR,
++		    DATA_TYPE_INT32, zio->io_error, NULL);
++		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS,
++		    DATA_TYPE_INT32, zio->io_flags, NULL);
++		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE,
++		    DATA_TYPE_UINT32, zio->io_stage, NULL);
++		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE,
++		    DATA_TYPE_UINT32, zio->io_pipeline, NULL);
++		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY,
++		    DATA_TYPE_UINT64, zio->io_delay, NULL);
++
++		/*
++		 * If the 'size' parameter is non-zero, it indicates this is a
++		 * RAID-Z or other I/O where the physical offset and length are
++		 * provided for us, instead of within the zio_t.
++		 */
++		if (vd != NULL) {
++			if (size)
++				fm_payload_set(ereport,
++				    FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
++				    DATA_TYPE_UINT64, stateoroffset,
++				    FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
++				    DATA_TYPE_UINT64, size, NULL);
++			else
++				fm_payload_set(ereport,
++				    FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
++				    DATA_TYPE_UINT64, zio->io_offset,
++				    FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
++				    DATA_TYPE_UINT64, zio->io_size, NULL);
++		}
++
++		/*
++		 * Payload for I/Os with corresponding logical information.
++		 */
++		if (zio->io_logical != NULL)
++			fm_payload_set(ereport,
++			    FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
++			    DATA_TYPE_UINT64,
++			    zio->io_logical->io_bookmark.zb_objset,
++			    FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT,
++			    DATA_TYPE_UINT64,
++			    zio->io_logical->io_bookmark.zb_object,
++			    FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL,
++			    DATA_TYPE_INT64,
++			    zio->io_logical->io_bookmark.zb_level,
++			    FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID,
++			    DATA_TYPE_UINT64,
++			    zio->io_logical->io_bookmark.zb_blkid, NULL);
++	} else if (vd != NULL) {
++		/*
++		 * If we have a vdev but no zio, this is a device fault, and the
++		 * 'stateoroffset' parameter indicates the previous state of the
++		 * vdev.
++		 */
++		fm_payload_set(ereport,
++		    FM_EREPORT_PAYLOAD_ZFS_PREV_STATE,
++		    DATA_TYPE_UINT64, stateoroffset, NULL);
++	}
++
++	mutex_exit(&spa->spa_errlist_lock);
++
++	*ereport_out = ereport;
++	*detector_out = detector;
++}
++
++/* if it's <= 128 bytes, save the corruption directly */
++#define	ZFM_MAX_INLINE		(128 / sizeof (uint64_t))
++
++#define	MAX_RANGES		16
++
++typedef struct zfs_ecksum_info {
++	/* histograms of set and cleared bits by bit number in a 64-bit word */
++	uint16_t zei_histogram_set[sizeof (uint64_t) * NBBY];
++	uint16_t zei_histogram_cleared[sizeof (uint64_t) * NBBY];
++
++	/* inline arrays of bits set and cleared. */
++	uint64_t zei_bits_set[ZFM_MAX_INLINE];
++	uint64_t zei_bits_cleared[ZFM_MAX_INLINE];
++
++	/*
++	 * for each range, the number of bits set and cleared.  The Hamming
++	 * distance between the good and bad buffers is the sum of them all.
++	 */
++	uint32_t zei_range_sets[MAX_RANGES];
++	uint32_t zei_range_clears[MAX_RANGES];
++
++	struct zei_ranges {
++		uint32_t	zr_start;
++		uint32_t	zr_end;
++	} zei_ranges[MAX_RANGES];
++
++	size_t	zei_range_count;
++	uint32_t zei_mingap;
++	uint32_t zei_allowed_mingap;
++
++} zfs_ecksum_info_t;
++
++static void
++update_histogram(uint64_t value_arg, uint16_t *hist, uint32_t *count)
++{
++	size_t i;
++	size_t bits = 0;
++	uint64_t value = BE_64(value_arg);
++
++	/* We store the bits in big-endian (largest-first) order */
++	for (i = 0; i < 64; i++) {
++		if (value & (1ull << i)) {
++			hist[63 - i]++;
++			++bits;
++		}
++	}
++	/* update the count of bits changed */
++	*count += bits;
++}
++
++/*
++ * We've now filled up the range array, and need to increase "mingap" and
++ * shrink the range list accordingly.  zei_mingap is always the smallest
++ * distance between array entries, so we set the new_allowed_gap to be
++ * one greater than that.  We then go through the list, joining together
++ * any ranges which are closer than the new_allowed_gap.
++ *
++ * By construction, there will be at least one.  We also update zei_mingap
++ * to the new smallest gap, to prepare for our next invocation.
++ */
++static void
++zei_shrink_ranges(zfs_ecksum_info_t *eip)
++{
++	uint32_t mingap = UINT32_MAX;
++	uint32_t new_allowed_gap = eip->zei_mingap + 1;
++
++	size_t idx, output;
++	size_t max = eip->zei_range_count;
++
++	struct zei_ranges *r = eip->zei_ranges;
++
++	ASSERT3U(eip->zei_range_count, >, 0);
++	ASSERT3U(eip->zei_range_count, <=, MAX_RANGES);
++
++	output = idx = 0;
++	while (idx < max - 1) {
++		uint32_t start = r[idx].zr_start;
++		uint32_t end = r[idx].zr_end;
++
++		while (idx < max - 1) {
++			uint32_t nstart, nend, gap;
++
++			idx++;
++			nstart = r[idx].zr_start;
++			nend = r[idx].zr_end;
++
++			gap = nstart - end;
++			if (gap < new_allowed_gap) {
++				end = nend;
++				continue;
++			}
++			if (gap < mingap)
++				mingap = gap;
++			break;
++		}
++		r[output].zr_start = start;
++		r[output].zr_end = end;
++		output++;
++	}
++	ASSERT3U(output, <, eip->zei_range_count);
++	eip->zei_range_count = output;
++	eip->zei_mingap = mingap;
++	eip->zei_allowed_mingap = new_allowed_gap;
++}
++
++static void
++zei_add_range(zfs_ecksum_info_t *eip, int start, int end)
++{
++	struct zei_ranges *r = eip->zei_ranges;
++	size_t count = eip->zei_range_count;
++
++	if (count >= MAX_RANGES) {
++		zei_shrink_ranges(eip);
++		count = eip->zei_range_count;
++	}
++	if (count == 0) {
++		eip->zei_mingap = UINT32_MAX;
++		eip->zei_allowed_mingap = 1;
++	} else {
++		int gap = start - r[count - 1].zr_end;
++
++		if (gap < eip->zei_allowed_mingap) {
++			r[count - 1].zr_end = end;
++			return;
++		}
++		if (gap < eip->zei_mingap)
++			eip->zei_mingap = gap;
++	}
++	r[count].zr_start = start;
++	r[count].zr_end = end;
++	eip->zei_range_count++;
++}
++
++static size_t
++zei_range_total_size(zfs_ecksum_info_t *eip)
++{
++	struct zei_ranges *r = eip->zei_ranges;
++	size_t count = eip->zei_range_count;
++	size_t result = 0;
++	size_t idx;
++
++	for (idx = 0; idx < count; idx++)
++		result += (r[idx].zr_end - r[idx].zr_start);
++
++	return (result);
++}
++
++static zfs_ecksum_info_t *
++annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
++    const uint8_t *goodbuf, const uint8_t *badbuf, size_t size,
++    boolean_t drop_if_identical)
++{
++	const uint64_t *good = (const uint64_t *)goodbuf;
++	const uint64_t *bad = (const uint64_t *)badbuf;
++
++	uint64_t allset = 0;
++	uint64_t allcleared = 0;
++
++	size_t nui64s = size / sizeof (uint64_t);
++
++	size_t inline_size;
++	int no_inline = 0;
++	size_t idx;
++	size_t range;
++
++	size_t offset = 0;
++	ssize_t start = -1;
++
++	zfs_ecksum_info_t *eip = kmem_zalloc(sizeof (*eip), KM_PUSHPAGE);
++
++	/* don't do any annotation for injected checksum errors */
++	if (info != NULL && info->zbc_injected)
++		return (eip);
++
++	if (info != NULL && info->zbc_has_cksum) {
++		fm_payload_set(ereport,
++		    FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED,
++		    DATA_TYPE_UINT64_ARRAY,
++		    sizeof (info->zbc_expected) / sizeof (uint64_t),
++		    (uint64_t *)&info->zbc_expected,
++		    FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL,
++		    DATA_TYPE_UINT64_ARRAY,
++		    sizeof (info->zbc_actual) / sizeof (uint64_t),
++		    (uint64_t *)&info->zbc_actual,
++		    FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO,
++		    DATA_TYPE_STRING,
++		    info->zbc_checksum_name,
++		    NULL);
++
++		if (info->zbc_byteswapped) {
++			fm_payload_set(ereport,
++			    FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP,
++			    DATA_TYPE_BOOLEAN, 1,
++			    NULL);
++		}
++	}
++
++	if (badbuf == NULL || goodbuf == NULL)
++		return (eip);
++
++	ASSERT3U(nui64s, <=, UINT16_MAX);
++	ASSERT3U(size, ==, nui64s * sizeof (uint64_t));
++	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
++	ASSERT3U(size, <=, UINT32_MAX);
++
++	/* build up the range list by comparing the two buffers. */
++	for (idx = 0; idx < nui64s; idx++) {
++		if (good[idx] == bad[idx]) {
++			if (start == -1)
++				continue;
++
++			zei_add_range(eip, start, idx);
++			start = -1;
++		} else {
++			if (start != -1)
++				continue;
++
++			start = idx;
++		}
++	}
++	if (start != -1)
++		zei_add_range(eip, start, idx);
++
++	/* See if it will fit in our inline buffers */
++	inline_size = zei_range_total_size(eip);
++	if (inline_size > ZFM_MAX_INLINE)
++		no_inline = 1;
++
++	/*
++	 * If there is no change and we want to drop if the buffers are
++	 * identical, do so.
++	 */
++	if (inline_size == 0 && drop_if_identical) {
++		kmem_free(eip, sizeof (*eip));
++		return (NULL);
++	}
++
++	/*
++	 * Now walk through the ranges, filling in the details of the
++	 * differences.  Also convert our uint64_t-array offsets to byte
++	 * offsets.
++	 */
++	for (range = 0; range < eip->zei_range_count; range++) {
++		size_t start = eip->zei_ranges[range].zr_start;
++		size_t end = eip->zei_ranges[range].zr_end;
++
++		for (idx = start; idx < end; idx++) {
++			uint64_t set, cleared;
++
++			// bits set in bad, but not in good
++			set = ((~good[idx]) & bad[idx]);
++			// bits set in good, but not in bad
++			cleared = (good[idx] & (~bad[idx]));
++
++			allset |= set;
++			allcleared |= cleared;
++
++			if (!no_inline) {
++				ASSERT3U(offset, <, inline_size);
++				eip->zei_bits_set[offset] = set;
++				eip->zei_bits_cleared[offset] = cleared;
++				offset++;
++			}
++
++			update_histogram(set, eip->zei_histogram_set,
++			    &eip->zei_range_sets[range]);
++			update_histogram(cleared, eip->zei_histogram_cleared,
++			    &eip->zei_range_clears[range]);
++		}
++
++		/* convert to byte offsets */
++		eip->zei_ranges[range].zr_start	*= sizeof (uint64_t);
++		eip->zei_ranges[range].zr_end	*= sizeof (uint64_t);
++	}
++	eip->zei_allowed_mingap	*= sizeof (uint64_t);
++	inline_size		*= sizeof (uint64_t);
++
++	/* fill in ereport */
++	fm_payload_set(ereport,
++	    FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES,
++	    DATA_TYPE_UINT32_ARRAY, 2 * eip->zei_range_count,
++	    (uint32_t *)eip->zei_ranges,
++	    FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP,
++	    DATA_TYPE_UINT32, eip->zei_allowed_mingap,
++	    FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS,
++	    DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_sets,
++	    FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS,
++	    DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_clears,
++	    NULL);
++
++	if (!no_inline) {
++		fm_payload_set(ereport,
++		    FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS,
++		    DATA_TYPE_UINT8_ARRAY,
++		    inline_size, (uint8_t *)eip->zei_bits_set,
++		    FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS,
++		    DATA_TYPE_UINT8_ARRAY,
++		    inline_size, (uint8_t *)eip->zei_bits_cleared,
++		    NULL);
++	} else {
++		fm_payload_set(ereport,
++		    FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM,
++		    DATA_TYPE_UINT16_ARRAY,
++		    NBBY * sizeof (uint64_t), eip->zei_histogram_set,
++		    FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM,
++		    DATA_TYPE_UINT16_ARRAY,
++		    NBBY * sizeof (uint64_t), eip->zei_histogram_cleared,
++		    NULL);
++	}
++	return (eip);
++}
++#endif
++
++void
++zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
++    uint64_t stateoroffset, uint64_t size)
++{
++#ifdef _KERNEL
++	nvlist_t *ereport = NULL;
++	nvlist_t *detector = NULL;
++
++	zfs_ereport_start(&ereport, &detector,
++	    subclass, spa, vd, zio, stateoroffset, size);
++
++	if (ereport == NULL)
++		return;
++
++	/* Cleanup is handled by the callback function */
++	zfs_zevent_post(ereport, detector, zfs_zevent_post_cb);
++#endif
++}
++
++void
++zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
++    struct zio *zio, uint64_t offset, uint64_t length, void *arg,
++    zio_bad_cksum_t *info)
++{
++	zio_cksum_report_t *report = kmem_zalloc(sizeof (*report), KM_PUSHPAGE);
++
++	if (zio->io_vsd != NULL)
++		zio->io_vsd_ops->vsd_cksum_report(zio, report, arg);
++	else
++		zio_vsd_default_cksum_report(zio, report, arg);
++
++	/* copy the checksum failure information if it was provided */
++	if (info != NULL) {
++		report->zcr_ckinfo = kmem_zalloc(sizeof (*info), KM_PUSHPAGE);
++		bcopy(info, report->zcr_ckinfo, sizeof (*info));
++	}
++
++	report->zcr_align = 1ULL << vd->vdev_top->vdev_ashift;
++	report->zcr_length = length;
++
++#ifdef _KERNEL
++	zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector,
++	    FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length);
++
++	if (report->zcr_ereport == NULL) {
++		report->zcr_free(report->zcr_cbdata, report->zcr_cbinfo);
++		if (report->zcr_ckinfo != NULL) {
++			kmem_free(report->zcr_ckinfo,
++			    sizeof (*report->zcr_ckinfo));
++		}
++		kmem_free(report, sizeof (*report));
++		return;
++	}
++#endif
++
++	mutex_enter(&spa->spa_errlist_lock);
++	report->zcr_next = zio->io_logical->io_cksum_report;
++	zio->io_logical->io_cksum_report = report;
++	mutex_exit(&spa->spa_errlist_lock);
++}
++
++void
++zfs_ereport_finish_checksum(zio_cksum_report_t *report,
++    const void *good_data, const void *bad_data, boolean_t drop_if_identical)
++{
++#ifdef _KERNEL
++	zfs_ecksum_info_t *info = NULL;
++	info = annotate_ecksum(report->zcr_ereport, report->zcr_ckinfo,
++	    good_data, bad_data, report->zcr_length, drop_if_identical);
++
++	if (info != NULL)
++		zfs_zevent_post(report->zcr_ereport,
++		    report->zcr_detector, zfs_zevent_post_cb);
++
++	report->zcr_ereport = report->zcr_detector = NULL;
++	if (info != NULL)
++		kmem_free(info, sizeof (*info));
++#endif
++}
++
++void
++zfs_ereport_free_checksum(zio_cksum_report_t *rpt)
++{
++#ifdef _KERNEL
++	if (rpt->zcr_ereport != NULL) {
++		fm_nvlist_destroy(rpt->zcr_ereport,
++		    FM_NVA_FREE);
++		fm_nvlist_destroy(rpt->zcr_detector,
++		    FM_NVA_FREE);
++	}
++#endif
++	rpt->zcr_free(rpt->zcr_cbdata, rpt->zcr_cbinfo);
++
++	if (rpt->zcr_ckinfo != NULL)
++		kmem_free(rpt->zcr_ckinfo, sizeof (*rpt->zcr_ckinfo));
++
++	kmem_free(rpt, sizeof (*rpt));
++}
++
++void
++zfs_ereport_send_interim_checksum(zio_cksum_report_t *report)
++{
++#ifdef _KERNEL
++	zfs_zevent_post(report->zcr_ereport, report->zcr_detector, NULL);
++#endif
++}
++
++void
++zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
++    struct zio *zio, uint64_t offset, uint64_t length,
++    const void *good_data, const void *bad_data, zio_bad_cksum_t *zbc)
++{
++#ifdef _KERNEL
++	nvlist_t *ereport = NULL;
++	nvlist_t *detector = NULL;
++	zfs_ecksum_info_t *info;
++
++	zfs_ereport_start(&ereport, &detector,
++	    FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length);
++
++	if (ereport == NULL)
++		return;
++
++	info = annotate_ecksum(ereport, zbc, good_data, bad_data, length,
++	    B_FALSE);
++
++	if (info != NULL) {
++		zfs_zevent_post(ereport, detector, zfs_zevent_post_cb);
++		kmem_free(info, sizeof (*info));
++	}
++#endif
++}
++
++static void
++zfs_post_common(spa_t *spa, vdev_t *vd, const char *name)
++{
++#ifdef _KERNEL
++	nvlist_t *resource;
++	char class[64];
++
++	if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT)
++		return;
++
++	if ((resource = fm_nvlist_create(NULL)) == NULL)
++		return;
++
++	(void) snprintf(class, sizeof (class), "%s.%s.%s", FM_RSRC_RESOURCE,
++	    ZFS_ERROR_CLASS, name);
++	VERIFY(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION) == 0);
++	VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0);
++	VERIFY(nvlist_add_uint64(resource,
++	    FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0);
++	if (vd) {
++		VERIFY(nvlist_add_uint64(resource,
++		    FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0);
++		VERIFY(nvlist_add_uint64(resource,
++		    FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, vd->vdev_state) == 0);
++	}
++
++	zfs_zevent_post(resource, NULL, zfs_zevent_post_cb);
++#endif
++}
++
++/*
++ * The 'resource.fs.zfs.removed' event is an internal signal that the given vdev
++ * has been removed from the system.  This will cause the DE to ignore any
++ * recent I/O errors, inferring that they are due to the asynchronous device
++ * removal.
++ */
++void
++zfs_post_remove(spa_t *spa, vdev_t *vd)
++{
++	zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_REMOVED);
++}
++
++/*
++ * The 'resource.fs.zfs.autoreplace' event is an internal signal that the pool
++ * has the 'autoreplace' property set, and therefore any broken vdevs will be
++ * handled by higher level logic, and no vdev fault should be generated.
++ */
++void
++zfs_post_autoreplace(spa_t *spa, vdev_t *vd)
++{
++	zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_AUTOREPLACE);
++}
++
++/*
++ * The 'resource.fs.zfs.statechange' event is an internal signal that the
++ * given vdev has transitioned its state to DEGRADED or HEALTHY.  This will
++ * cause the retire agent to repair any outstanding fault management cases
++ * open because the device was not found (fault.fs.zfs.device).
++ */
++void
++zfs_post_state_change(spa_t *spa, vdev_t *vd)
++{
++	zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_STATECHANGE);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(zfs_ereport_post);
++EXPORT_SYMBOL(zfs_ereport_post_checksum);
++EXPORT_SYMBOL(zfs_post_remove);
++EXPORT_SYMBOL(zfs_post_autoreplace);
++EXPORT_SYMBOL(zfs_post_state_change);
++#endif /* _KERNEL */
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_fuid.c linux-3.2.33-go/fs/zfs/zfs/zfs_fuid.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_fuid.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_fuid.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,775 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/dmu.h>
++#include <sys/avl.h>
++#include <sys/zap.h>
++#include <sys/refcount.h>
++#include <sys/nvpair.h>
++#ifdef _KERNEL
++#include <sys/kidmap.h>
++#include <sys/sid.h>
++#include <sys/zfs_vfsops.h>
++#include <sys/zfs_znode.h>
++#endif
++#include <sys/zfs_fuid.h>
++
++/*
++ * FUID Domain table(s).
++ *
++ * The FUID table is stored as a packed nvlist of an array
++ * of nvlists which contain an index, domain string and offset
++ *
++ * During file system initialization the nvlist(s) are read and
++ * two AVL trees are created.  One tree is keyed by the index number
++ * and the other by the domain string.  Nodes are never removed from
++ * trees, but new entries may be added.  If a new entry is added then
++ * the zsb->z_fuid_dirty flag is set to true and the caller will then
++ * be responsible for calling zfs_fuid_sync() to sync the changes to disk.
++ *
++ */
++
++#define	FUID_IDX	"fuid_idx"
++#define	FUID_DOMAIN	"fuid_domain"
++#define	FUID_OFFSET	"fuid_offset"
++#define	FUID_NVP_ARRAY	"fuid_nvlist"
++
++typedef struct fuid_domain {
++	avl_node_t	f_domnode;
++	avl_node_t	f_idxnode;
++	ksiddomain_t	*f_ksid;
++	uint64_t	f_idx;
++} fuid_domain_t;
++
++static char *nulldomain = "";
++
++/*
++ * Compare two indexes.
++ */
++static int
++idx_compare(const void *arg1, const void *arg2)
++{
++	const fuid_domain_t *node1 = arg1;
++	const fuid_domain_t *node2 = arg2;
++
++	if (node1->f_idx < node2->f_idx)
++		return (-1);
++	else if (node1->f_idx > node2->f_idx)
++		return (1);
++	return (0);
++}
++
++/*
++ * Compare two domain strings.
++ */
++static int
++domain_compare(const void *arg1, const void *arg2)
++{
++	const fuid_domain_t *node1 = arg1;
++	const fuid_domain_t *node2 = arg2;
++	int val;
++
++	val = strcmp(node1->f_ksid->kd_name, node2->f_ksid->kd_name);
++	if (val == 0)
++		return (0);
++	return (val > 0 ? 1 : -1);
++}
++
++void
++zfs_fuid_avl_tree_create(avl_tree_t *idx_tree, avl_tree_t *domain_tree)
++{
++	avl_create(idx_tree, idx_compare,
++	    sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_idxnode));
++	avl_create(domain_tree, domain_compare,
++	    sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_domnode));
++}
++
++/*
++ * load initial fuid domain and idx trees.  This function is used by
++ * both the kernel and zdb.
++ */
++uint64_t
++zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree,
++    avl_tree_t *domain_tree)
++{
++	dmu_buf_t *db;
++	uint64_t fuid_size;
++
++	ASSERT(fuid_obj != 0);
++	VERIFY(0 == dmu_bonus_hold(os, fuid_obj,
++	    FTAG, &db));
++	fuid_size = *(uint64_t *)db->db_data;
++	dmu_buf_rele(db, FTAG);
++
++	if (fuid_size)  {
++		nvlist_t **fuidnvp;
++		nvlist_t *nvp = NULL;
++		uint_t count;
++		char *packed;
++		int i;
++
++		packed = kmem_alloc(fuid_size, KM_SLEEP);
++		VERIFY(dmu_read(os, fuid_obj, 0,
++		    fuid_size, packed, DMU_READ_PREFETCH) == 0);
++		VERIFY(nvlist_unpack(packed, fuid_size,
++		    &nvp, 0) == 0);
++		VERIFY(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY,
++		    &fuidnvp, &count) == 0);
++
++		for (i = 0; i != count; i++) {
++			fuid_domain_t *domnode;
++			char *domain;
++			uint64_t idx;
++
++			VERIFY(nvlist_lookup_string(fuidnvp[i], FUID_DOMAIN,
++			    &domain) == 0);
++			VERIFY(nvlist_lookup_uint64(fuidnvp[i], FUID_IDX,
++			    &idx) == 0);
++
++			domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
++
++			domnode->f_idx = idx;
++			domnode->f_ksid = ksid_lookupdomain(domain);
++			avl_add(idx_tree, domnode);
++			avl_add(domain_tree, domnode);
++		}
++		nvlist_free(nvp);
++		kmem_free(packed, fuid_size);
++	}
++	return (fuid_size);
++}
++
++void
++zfs_fuid_table_destroy(avl_tree_t *idx_tree, avl_tree_t *domain_tree)
++{
++	fuid_domain_t *domnode;
++	void *cookie;
++
++	cookie = NULL;
++	while ((domnode = avl_destroy_nodes(domain_tree, &cookie)))
++		ksiddomain_rele(domnode->f_ksid);
++
++	avl_destroy(domain_tree);
++	cookie = NULL;
++	while ((domnode = avl_destroy_nodes(idx_tree, &cookie)))
++		kmem_free(domnode, sizeof (fuid_domain_t));
++	avl_destroy(idx_tree);
++}
++
++char *
++zfs_fuid_idx_domain(avl_tree_t *idx_tree, uint32_t idx)
++{
++	fuid_domain_t searchnode, *findnode;
++	avl_index_t loc;
++
++	searchnode.f_idx = idx;
++
++	findnode = avl_find(idx_tree, &searchnode, &loc);
++
++	return (findnode ? findnode->f_ksid->kd_name : nulldomain);
++}
++
++#ifdef _KERNEL
++/*
++ * Load the fuid table(s) into memory.
++ */
++static void
++zfs_fuid_init(zfs_sb_t *zsb)
++{
++	rw_enter(&zsb->z_fuid_lock, RW_WRITER);
++
++	if (zsb->z_fuid_loaded) {
++		rw_exit(&zsb->z_fuid_lock);
++		return;
++	}
++
++	zfs_fuid_avl_tree_create(&zsb->z_fuid_idx, &zsb->z_fuid_domain);
++
++	(void) zap_lookup(zsb->z_os, MASTER_NODE_OBJ,
++	    ZFS_FUID_TABLES, 8, 1, &zsb->z_fuid_obj);
++	if (zsb->z_fuid_obj != 0) {
++		zsb->z_fuid_size = zfs_fuid_table_load(zsb->z_os,
++		    zsb->z_fuid_obj, &zsb->z_fuid_idx,
++		    &zsb->z_fuid_domain);
++	}
++
++	zsb->z_fuid_loaded = B_TRUE;
++	rw_exit(&zsb->z_fuid_lock);
++}
++
++/*
++ * sync out AVL trees to persistent storage.
++ */
++void
++zfs_fuid_sync(zfs_sb_t *zsb, dmu_tx_t *tx)
++{
++	nvlist_t *nvp;
++	nvlist_t **fuids;
++	size_t nvsize = 0;
++	char *packed;
++	dmu_buf_t *db;
++	fuid_domain_t *domnode;
++	int numnodes;
++	int i;
++
++	if (!zsb->z_fuid_dirty) {
++		return;
++	}
++
++	rw_enter(&zsb->z_fuid_lock, RW_WRITER);
++
++	/*
++	 * First see if table needs to be created?
++	 */
++	if (zsb->z_fuid_obj == 0) {
++		zsb->z_fuid_obj = dmu_object_alloc(zsb->z_os,
++		    DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
++		    sizeof (uint64_t), tx);
++		VERIFY(zap_add(zsb->z_os, MASTER_NODE_OBJ,
++		    ZFS_FUID_TABLES, sizeof (uint64_t), 1,
++		    &zsb->z_fuid_obj, tx) == 0);
++	}
++
++	VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++
++	numnodes = avl_numnodes(&zsb->z_fuid_idx);
++	fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP);
++	for (i = 0, domnode = avl_first(&zsb->z_fuid_domain); domnode; i++,
++	    domnode = AVL_NEXT(&zsb->z_fuid_domain, domnode)) {
++		VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0);
++		VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
++		    domnode->f_idx) == 0);
++		VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0);
++		VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN,
++		    domnode->f_ksid->kd_name) == 0);
++	}
++	VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
++	    fuids, numnodes) == 0);
++	for (i = 0; i != numnodes; i++)
++		nvlist_free(fuids[i]);
++	kmem_free(fuids, numnodes * sizeof (void *));
++	VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
++	packed = kmem_alloc(nvsize, KM_SLEEP);
++	VERIFY(nvlist_pack(nvp, &packed, &nvsize,
++	    NV_ENCODE_XDR, KM_SLEEP) == 0);
++	nvlist_free(nvp);
++	zsb->z_fuid_size = nvsize;
++	dmu_write(zsb->z_os, zsb->z_fuid_obj, 0, zsb->z_fuid_size, packed, tx);
++	kmem_free(packed, zsb->z_fuid_size);
++	VERIFY(0 == dmu_bonus_hold(zsb->z_os, zsb->z_fuid_obj,
++	    FTAG, &db));
++	dmu_buf_will_dirty(db, tx);
++	*(uint64_t *)db->db_data = zsb->z_fuid_size;
++	dmu_buf_rele(db, FTAG);
++
++	zsb->z_fuid_dirty = B_FALSE;
++	rw_exit(&zsb->z_fuid_lock);
++}
++
++/*
++ * Query domain table for a given domain.
++ *
++ * If domain isn't found and addok is set, it is added to AVL trees and
++ * the zsb->z_fuid_dirty flag will be set to TRUE.  It will then be
++ * necessary for the caller or another thread to detect the dirty table
++ * and sync out the changes.
++ */
++int
++zfs_fuid_find_by_domain(zfs_sb_t *zsb, const char *domain,
++    char **retdomain, boolean_t addok)
++{
++	fuid_domain_t searchnode, *findnode;
++	avl_index_t loc;
++	krw_t rw = RW_READER;
++
++	/*
++	 * If the dummy "nobody" domain then return an index of 0
++	 * to cause the created FUID to be a standard POSIX id
++	 * for the user nobody.
++	 */
++	if (domain[0] == '\0') {
++		if (retdomain)
++			*retdomain = nulldomain;
++		return (0);
++	}
++
++	searchnode.f_ksid = ksid_lookupdomain(domain);
++	if (retdomain)
++		*retdomain = searchnode.f_ksid->kd_name;
++	if (!zsb->z_fuid_loaded)
++		zfs_fuid_init(zsb);
++
++retry:
++	rw_enter(&zsb->z_fuid_lock, rw);
++	findnode = avl_find(&zsb->z_fuid_domain, &searchnode, &loc);
++
++	if (findnode) {
++		rw_exit(&zsb->z_fuid_lock);
++		ksiddomain_rele(searchnode.f_ksid);
++		return (findnode->f_idx);
++	} else if (addok) {
++		fuid_domain_t *domnode;
++		uint64_t retidx;
++
++		if (rw == RW_READER && !rw_tryupgrade(&zsb->z_fuid_lock)) {
++			rw_exit(&zsb->z_fuid_lock);
++			rw = RW_WRITER;
++			goto retry;
++		}
++
++		domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
++		domnode->f_ksid = searchnode.f_ksid;
++
++		retidx = domnode->f_idx = avl_numnodes(&zsb->z_fuid_idx) + 1;
++
++		avl_add(&zsb->z_fuid_domain, domnode);
++		avl_add(&zsb->z_fuid_idx, domnode);
++		zsb->z_fuid_dirty = B_TRUE;
++		rw_exit(&zsb->z_fuid_lock);
++		return (retidx);
++	} else {
++		rw_exit(&zsb->z_fuid_lock);
++		return (-1);
++	}
++}
++
++/*
++ * Query domain table by index, returning domain string
++ *
++ * Returns a pointer from an avl node of the domain string.
++ *
++ */
++const char *
++zfs_fuid_find_by_idx(zfs_sb_t *zsb, uint32_t idx)
++{
++	char *domain;
++
++	if (idx == 0 || !zsb->z_use_fuids)
++		return (NULL);
++
++	if (!zsb->z_fuid_loaded)
++		zfs_fuid_init(zsb);
++
++	rw_enter(&zsb->z_fuid_lock, RW_READER);
++
++	if (zsb->z_fuid_obj || zsb->z_fuid_dirty)
++		domain = zfs_fuid_idx_domain(&zsb->z_fuid_idx, idx);
++	else
++		domain = nulldomain;
++	rw_exit(&zsb->z_fuid_lock);
++
++	ASSERT(domain);
++	return (domain);
++}
++
++void
++zfs_fuid_map_ids(znode_t *zp, cred_t *cr, uid_t *uidp, uid_t *gidp)
++{
++	*uidp = zfs_fuid_map_id(ZTOZSB(zp), zp->z_uid, cr, ZFS_OWNER);
++	*gidp = zfs_fuid_map_id(ZTOZSB(zp), zp->z_gid, cr, ZFS_GROUP);
++}
++
++uid_t
++zfs_fuid_map_id(zfs_sb_t *zsb, uint64_t fuid,
++    cred_t *cr, zfs_fuid_type_t type)
++{
++#ifdef HAVE_KSID
++	uint32_t index = FUID_INDEX(fuid);
++	const char *domain;
++	uid_t id;
++
++	if (index == 0)
++		return (fuid);
++
++	domain = zfs_fuid_find_by_idx(zsb, index);
++	ASSERT(domain != NULL);
++
++	if (type == ZFS_OWNER || type == ZFS_ACE_USER) {
++		(void) kidmap_getuidbysid(crgetzone(cr), domain,
++		    FUID_RID(fuid), &id);
++	} else {
++		(void) kidmap_getgidbysid(crgetzone(cr), domain,
++		    FUID_RID(fuid), &id);
++	}
++	return (id);
++#else
++	/*
++	 * The Linux port only supports POSIX IDs, use the passed id.
++	 */
++	return (fuid);
++#endif /* HAVE_KSID */
++}
++
++/*
++ * Add a FUID node to the list of fuid's being created for this
++ * ACL
++ *
++ * If ACL has multiple domains, then keep only one copy of each unique
++ * domain.
++ */
++void
++zfs_fuid_node_add(zfs_fuid_info_t **fuidpp, const char *domain, uint32_t rid,
++    uint64_t idx, uint64_t id, zfs_fuid_type_t type)
++{
++	zfs_fuid_t *fuid;
++	zfs_fuid_domain_t *fuid_domain;
++	zfs_fuid_info_t *fuidp;
++	uint64_t fuididx;
++	boolean_t found = B_FALSE;
++
++	if (*fuidpp == NULL)
++		*fuidpp = zfs_fuid_info_alloc();
++
++	fuidp = *fuidpp;
++	/*
++	 * First find fuid domain index in linked list
++	 *
++	 * If one isn't found then create an entry.
++	 */
++
++	for (fuididx = 1, fuid_domain = list_head(&fuidp->z_domains);
++	    fuid_domain; fuid_domain = list_next(&fuidp->z_domains,
++	    fuid_domain), fuididx++) {
++		if (idx == fuid_domain->z_domidx) {
++			found = B_TRUE;
++			break;
++		}
++	}
++
++	if (!found) {
++		fuid_domain = kmem_alloc(sizeof (zfs_fuid_domain_t), KM_SLEEP);
++		fuid_domain->z_domain = domain;
++		fuid_domain->z_domidx = idx;
++		list_insert_tail(&fuidp->z_domains, fuid_domain);
++		fuidp->z_domain_str_sz += strlen(domain) + 1;
++		fuidp->z_domain_cnt++;
++	}
++
++	if (type == ZFS_ACE_USER || type == ZFS_ACE_GROUP) {
++
++		/*
++		 * Now allocate fuid entry and add it on the end of the list
++		 */
++
++		fuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP);
++		fuid->z_id = id;
++		fuid->z_domidx = idx;
++		fuid->z_logfuid = FUID_ENCODE(fuididx, rid);
++
++		list_insert_tail(&fuidp->z_fuids, fuid);
++		fuidp->z_fuid_cnt++;
++	} else {
++		if (type == ZFS_OWNER)
++			fuidp->z_fuid_owner = FUID_ENCODE(fuididx, rid);
++		else
++			fuidp->z_fuid_group = FUID_ENCODE(fuididx, rid);
++	}
++}
++
++#ifdef HAVE_KSID
++/*
++ * Create a file system FUID, based on information in the users cred
++ *
++ * If cred contains KSID_OWNER then it should be used to determine
++ * the uid otherwise cred's uid will be used. By default cred's gid
++ * is used unless it's an ephemeral ID in which case KSID_GROUP will
++ * be used if it exists.
++ */
++uint64_t
++zfs_fuid_create_cred(zfs_sb_t *zsb, zfs_fuid_type_t type,
++    cred_t *cr, zfs_fuid_info_t **fuidp)
++{
++	uint64_t	idx;
++	ksid_t		*ksid;
++	uint32_t	rid;
++	char		*kdomain;
++	const char	*domain;
++	uid_t		id;
++
++	VERIFY(type == ZFS_OWNER || type == ZFS_GROUP);
++
++	ksid = crgetsid(cr, (type == ZFS_OWNER) ? KSID_OWNER : KSID_GROUP);
++
++	if (!zsb->z_use_fuids || (ksid == NULL)) {
++		id = (type == ZFS_OWNER) ? crgetuid(cr) : crgetgid(cr);
++
++		if (IS_EPHEMERAL(id))
++			return ((type == ZFS_OWNER) ? UID_NOBODY : GID_NOBODY);
++
++		return ((uint64_t)id);
++	}
++
++	/*
++	 * ksid is present and FUID is supported
++	 */
++	id = (type == ZFS_OWNER) ? ksid_getid(ksid) : crgetgid(cr);
++
++	if (!IS_EPHEMERAL(id))
++		return ((uint64_t)id);
++
++	if (type == ZFS_GROUP)
++		id = ksid_getid(ksid);
++
++	rid = ksid_getrid(ksid);
++	domain = ksid_getdomain(ksid);
++
++	idx = zfs_fuid_find_by_domain(zsb, domain, &kdomain, B_TRUE);
++
++	zfs_fuid_node_add(fuidp, kdomain, rid, idx, id, type);
++
++	return (FUID_ENCODE(idx, rid));
++}
++#endif /* HAVE_KSID */
++
++/*
++ * Create a file system FUID for an ACL ace
++ * or a chown/chgrp of the file.
++ * This is similar to zfs_fuid_create_cred, except that
++ * we can't find the domain + rid information in the
++ * cred.  Instead we have to query Winchester for the
++ * domain and rid.
++ *
++ * During replay operations the domain+rid information is
++ * found in the zfs_fuid_info_t that the replay code has
++ * attached to the zsb of the file system.
++ */
++uint64_t
++zfs_fuid_create(zfs_sb_t *zsb, uint64_t id, cred_t *cr,
++    zfs_fuid_type_t type, zfs_fuid_info_t **fuidpp)
++{
++#ifdef HAVE_KSID
++	const char *domain;
++	char *kdomain;
++	uint32_t fuid_idx = FUID_INDEX(id);
++	uint32_t rid;
++	idmap_stat status;
++	uint64_t idx;
++	zfs_fuid_t *zfuid = NULL;
++	zfs_fuid_info_t *fuidp;
++
++	/*
++	 * If POSIX ID, or entry is already a FUID then
++	 * just return the id
++	 *
++	 * We may also be handed an already FUID'ized id via
++	 * chmod.
++	 */
++
++	if (!zsb->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0)
++		return (id);
++
++	if (zsb->z_replay) {
++		fuidp = zsb->z_fuid_replay;
++
++		/*
++		 * If we are passed an ephemeral id, but no
++		 * fuid_info was logged then return NOBODY.
++		 * This is most likely a result of idmap service
++		 * not being available.
++		 */
++		if (fuidp == NULL)
++			return (UID_NOBODY);
++
++		switch (type) {
++		case ZFS_ACE_USER:
++		case ZFS_ACE_GROUP:
++			zfuid = list_head(&fuidp->z_fuids);
++			rid = FUID_RID(zfuid->z_logfuid);
++			idx = FUID_INDEX(zfuid->z_logfuid);
++			break;
++		case ZFS_OWNER:
++			rid = FUID_RID(fuidp->z_fuid_owner);
++			idx = FUID_INDEX(fuidp->z_fuid_owner);
++			break;
++		case ZFS_GROUP:
++			rid = FUID_RID(fuidp->z_fuid_group);
++			idx = FUID_INDEX(fuidp->z_fuid_group);
++			break;
++		};
++		domain = fuidp->z_domain_table[idx -1];
++	} else {
++		if (type == ZFS_OWNER || type == ZFS_ACE_USER)
++			status = kidmap_getsidbyuid(crgetzone(cr), id,
++			    &domain, &rid);
++		else
++			status = kidmap_getsidbygid(crgetzone(cr), id,
++			    &domain, &rid);
++
++		if (status != 0) {
++			/*
++			 * When returning nobody we will need to
++			 * make a dummy fuid table entry for logging
++			 * purposes.
++			 */
++			rid = UID_NOBODY;
++			domain = nulldomain;
++		}
++	}
++
++	idx = zfs_fuid_find_by_domain(zsb, domain, &kdomain, B_TRUE);
++
++	if (!zsb->z_replay)
++		zfs_fuid_node_add(fuidpp, kdomain,
++		    rid, idx, id, type);
++	else if (zfuid != NULL) {
++		list_remove(&fuidp->z_fuids, zfuid);
++		kmem_free(zfuid, sizeof (zfs_fuid_t));
++	}
++	return (FUID_ENCODE(idx, rid));
++#else
++	/*
++	 * The Linux port only supports POSIX IDs, use the passed id.
++	 */
++	return (id);
++#endif
++}
++
++void
++zfs_fuid_destroy(zfs_sb_t *zsb)
++{
++	rw_enter(&zsb->z_fuid_lock, RW_WRITER);
++	if (!zsb->z_fuid_loaded) {
++		rw_exit(&zsb->z_fuid_lock);
++		return;
++	}
++	zfs_fuid_table_destroy(&zsb->z_fuid_idx, &zsb->z_fuid_domain);
++	rw_exit(&zsb->z_fuid_lock);
++}
++
++/*
++ * Allocate zfs_fuid_info for tracking FUIDs created during
++ * zfs_mknode, VOP_SETATTR() or VOP_SETSECATTR()
++ */
++zfs_fuid_info_t *
++zfs_fuid_info_alloc(void)
++{
++	zfs_fuid_info_t *fuidp;
++
++	fuidp = kmem_zalloc(sizeof (zfs_fuid_info_t), KM_SLEEP);
++	list_create(&fuidp->z_domains, sizeof (zfs_fuid_domain_t),
++	    offsetof(zfs_fuid_domain_t, z_next));
++	list_create(&fuidp->z_fuids, sizeof (zfs_fuid_t),
++	    offsetof(zfs_fuid_t, z_next));
++	return (fuidp);
++}
++
++/*
++ * Release all memory associated with zfs_fuid_info_t
++ */
++void
++zfs_fuid_info_free(zfs_fuid_info_t *fuidp)
++{
++	zfs_fuid_t *zfuid;
++	zfs_fuid_domain_t *zdomain;
++
++	while ((zfuid = list_head(&fuidp->z_fuids)) != NULL) {
++		list_remove(&fuidp->z_fuids, zfuid);
++		kmem_free(zfuid, sizeof (zfs_fuid_t));
++	}
++
++	if (fuidp->z_domain_table != NULL)
++		kmem_free(fuidp->z_domain_table,
++		    (sizeof (char **)) * fuidp->z_domain_cnt);
++
++	while ((zdomain = list_head(&fuidp->z_domains)) != NULL) {
++		list_remove(&fuidp->z_domains, zdomain);
++		kmem_free(zdomain, sizeof (zfs_fuid_domain_t));
++	}
++
++	kmem_free(fuidp, sizeof (zfs_fuid_info_t));
++}
++
++/*
++ * Check to see if id is a groupmember.  If cred
++ * has ksid info then sidlist is checked first
++ * and if still not found then POSIX groups are checked
++ *
++ * Will use a straight FUID compare when possible.
++ */
++boolean_t
++zfs_groupmember(zfs_sb_t *zsb, uint64_t id, cred_t *cr)
++{
++#ifdef HAVE_KSID
++	ksid_t		*ksid = crgetsid(cr, KSID_GROUP);
++	ksidlist_t	*ksidlist = crgetsidlist(cr);
++	uid_t		gid;
++
++	if (ksid && ksidlist) {
++		int		i;
++		ksid_t		*ksid_groups;
++		uint32_t	idx = FUID_INDEX(id);
++		uint32_t	rid = FUID_RID(id);
++
++		ksid_groups = ksidlist->ksl_sids;
++
++		for (i = 0; i != ksidlist->ksl_nsid; i++) {
++			if (idx == 0) {
++				if (id != IDMAP_WK_CREATOR_GROUP_GID &&
++				    id == ksid_groups[i].ks_id) {
++					return (B_TRUE);
++				}
++			} else {
++				const char *domain;
++
++				domain = zfs_fuid_find_by_idx(zsb, idx);
++				ASSERT(domain != NULL);
++
++				if (strcmp(domain,
++				    IDMAP_WK_CREATOR_SID_AUTHORITY) == 0)
++					return (B_FALSE);
++
++				if ((strcmp(domain,
++				    ksid_groups[i].ks_domain->kd_name) == 0) &&
++				    rid == ksid_groups[i].ks_rid)
++					return (B_TRUE);
++			}
++		}
++	}
++
++	/*
++	 * Not found in ksidlist, check posix groups
++	 */
++	gid = zfs_fuid_map_id(zsb, id, cr, ZFS_GROUP);
++	return (groupmember(gid, cr));
++#else
++	return (B_TRUE);
++#endif
++}
++
++void
++zfs_fuid_txhold(zfs_sb_t *zsb, dmu_tx_t *tx)
++{
++	if (zsb->z_fuid_obj == 0) {
++		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
++		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
++		    FUID_SIZE_ESTIMATE(zsb));
++		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
++	} else {
++		dmu_tx_hold_bonus(tx, zsb->z_fuid_obj);
++		dmu_tx_hold_write(tx, zsb->z_fuid_obj, 0,
++		    FUID_SIZE_ESTIMATE(zsb));
++	}
++}
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_ioctl.c linux-3.2.33-go/fs/zfs/zfs/zfs_ioctl.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_ioctl.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_ioctl.c	2012-11-16 23:25:34.347039358 +0100
+@@ -0,0 +1,5259 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Portions Copyright 2011 Martin Matuska
++ * Portions Copyright 2012 Pawel Jakub Dawidek <pawel@dawidek.net>
++ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
++ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
++ */
++
++#include <sys/types.h>
++#include <sys/param.h>
++#include <sys/errno.h>
++#include <sys/uio.h>
++#include <sys/buf.h>
++#include <sys/modctl.h>
++#include <sys/open.h>
++#include <sys/file.h>
++#include <sys/kmem.h>
++#include <sys/conf.h>
++#include <sys/cmn_err.h>
++#include <sys/stat.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/zfs_vfsops.h>
++#include <sys/zfs_znode.h>
++#include <sys/zap.h>
++#include <sys/spa.h>
++#include <sys/spa_impl.h>
++#include <sys/vdev.h>
++#include <sys/priv_impl.h>
++#include <sys/dmu.h>
++#include <sys/dsl_dir.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_prop.h>
++#include <sys/dsl_deleg.h>
++#include <sys/dmu_objset.h>
++#include <sys/dmu_impl.h>
++#include <sys/ddi.h>
++#include <sys/sunddi.h>
++#include <sys/sunldi.h>
++#include <sys/policy.h>
++#include <sys/zone.h>
++#include <sys/nvpair.h>
++#include <sys/pathname.h>
++#include <sys/mount.h>
++#include <sys/sdt.h>
++#include <sys/fs/zfs.h>
++#include <sys/zfs_ctldir.h>
++#include <sys/zfs_dir.h>
++#include <sys/zfs_onexit.h>
++#include <sys/zvol.h>
++#include <sys/dsl_scan.h>
++#include <sharefs/share.h>
++#include <sys/dmu_objset.h>
++#include <sys/fm/util.h>
++
++#include <linux/miscdevice.h>
++
++#include "zfs_namecheck.h"
++#include "zfs_prop.h"
++#include "zfs_deleg.h"
++#include "zfs_comutil.h"
++
++kmutex_t zfsdev_state_lock;
++list_t zfsdev_state_list;
++
++extern void zfs_init(void);
++extern void zfs_fini(void);
++
++typedef int zfs_ioc_func_t(zfs_cmd_t *);
++typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
++
++typedef enum {
++	NO_NAME,
++	POOL_NAME,
++	DATASET_NAME
++} zfs_ioc_namecheck_t;
++
++typedef enum {
++	POOL_CHECK_NONE		= 1 << 0,
++	POOL_CHECK_SUSPENDED	= 1 << 1,
++	POOL_CHECK_READONLY	= 1 << 2
++} zfs_ioc_poolcheck_t;
++
++typedef struct zfs_ioc_vec {
++	zfs_ioc_func_t		*zvec_func;
++	zfs_secpolicy_func_t	*zvec_secpolicy;
++	zfs_ioc_namecheck_t	zvec_namecheck;
++	boolean_t		zvec_his_log;
++	zfs_ioc_poolcheck_t	zvec_pool_check;
++} zfs_ioc_vec_t;
++
++/* This array is indexed by zfs_userquota_prop_t */
++static const char *userquota_perms[] = {
++	ZFS_DELEG_PERM_USERUSED,
++	ZFS_DELEG_PERM_USERQUOTA,
++	ZFS_DELEG_PERM_GROUPUSED,
++	ZFS_DELEG_PERM_GROUPQUOTA,
++};
++
++static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
++static int zfs_check_settable(const char *name, nvpair_t *property,
++    cred_t *cr);
++static int zfs_check_clearable(char *dataset, nvlist_t *props,
++    nvlist_t **errors);
++static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
++    boolean_t *);
++int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
++
++static void
++history_str_free(char *buf)
++{
++	kmem_free(buf, HIS_MAX_RECORD_LEN);
++}
++
++static char *
++history_str_get(zfs_cmd_t *zc)
++{
++	char *buf;
++
++	if (zc->zc_history == 0)
++		return (NULL);
++
++	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP | KM_NODEBUG);
++	if (copyinstr((void *)(uintptr_t)zc->zc_history,
++	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
++		history_str_free(buf);
++		return (NULL);
++	}
++
++	buf[HIS_MAX_RECORD_LEN -1] = '\0';
++
++	return (buf);
++}
++
++/*
++ * Check to see if the named dataset is currently defined as bootable
++ */
++static boolean_t
++zfs_is_bootfs(const char *name)
++{
++	objset_t *os;
++
++	if (dmu_objset_hold(name, FTAG, &os) == 0) {
++		boolean_t ret;
++		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
++		dmu_objset_rele(os, FTAG);
++		return (ret);
++	}
++	return (B_FALSE);
++}
++
++/*
++ * zfs_earlier_version
++ *
++ *	Return non-zero if the spa version is less than requested version.
++ */
++static int
++zfs_earlier_version(const char *name, int version)
++{
++	spa_t *spa;
++
++	if (spa_open(name, &spa, FTAG) == 0) {
++		if (spa_version(spa) < version) {
++			spa_close(spa, FTAG);
++			return (1);
++		}
++		spa_close(spa, FTAG);
++	}
++	return (0);
++}
++
++/*
++ * zpl_earlier_version
++ *
++ * Return TRUE if the ZPL version is less than requested version.
++ */
++static boolean_t
++zpl_earlier_version(const char *name, int version)
++{
++	objset_t *os;
++	boolean_t rc = B_TRUE;
++
++	if (dmu_objset_hold(name, FTAG, &os) == 0) {
++		uint64_t zplversion;
++
++		if (dmu_objset_type(os) != DMU_OST_ZFS) {
++			dmu_objset_rele(os, FTAG);
++			return (B_TRUE);
++		}
++		/* XXX reading from non-owned objset */
++		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
++			rc = zplversion < version;
++		dmu_objset_rele(os, FTAG);
++	}
++	return (rc);
++}
++
++static void
++zfs_log_history(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	char *buf;
++
++	if ((buf = history_str_get(zc)) == NULL)
++		return;
++
++	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
++		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
++			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
++		spa_close(spa, FTAG);
++	}
++	history_str_free(buf);
++}
++
++/*
++ * Policy for top-level read operations (list pools).  Requires no privileges,
++ * and can be used in the local zone, as there is no associated dataset.
++ */
++/* ARGSUSED */
++static int
++zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
++{
++	return (0);
++}
++
++/*
++ * Policy for dataset read operations (list children, get statistics).  Requires
++ * no privileges, but must be visible in the local zone.
++ */
++/* ARGSUSED */
++static int
++zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
++{
++	if (INGLOBALZONE(curproc) ||
++	    zone_dataset_visible(zc->zc_name, NULL))
++		return (0);
++
++	return (ENOENT);
++}
++
++static int
++zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
++{
++	int writable = 1;
++
++	/*
++	 * The dataset must be visible by this zone -- check this first
++	 * so they don't see EPERM on something they shouldn't know about.
++	 */
++	if (!INGLOBALZONE(curproc) &&
++	    !zone_dataset_visible(dataset, &writable))
++		return (ENOENT);
++
++	if (INGLOBALZONE(curproc)) {
++		/*
++		 * If the fs is zoned, only root can access it from the
++		 * global zone.
++		 */
++		if (secpolicy_zfs(cr) && zoned)
++			return (EPERM);
++	} else {
++		/*
++		 * If we are in a local zone, the 'zoned' property must be set.
++		 */
++		if (!zoned)
++			return (EPERM);
++
++		/* must be writable by this zone */
++		if (!writable)
++			return (EPERM);
++	}
++	return (0);
++}
++
++static int
++zfs_dozonecheck(const char *dataset, cred_t *cr)
++{
++	uint64_t zoned;
++
++	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
++		return (ENOENT);
++
++	return (zfs_dozonecheck_impl(dataset, zoned, cr));
++}
++
++static int
++zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
++{
++	uint64_t zoned;
++
++	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
++	if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) {
++		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
++		return (ENOENT);
++	}
++	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
++
++	return (zfs_dozonecheck_impl(dataset, zoned, cr));
++}
++
++/*
++ * If name ends in a '@', then require recursive permissions.
++ */
++int
++zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
++{
++	int error;
++	boolean_t descendent = B_FALSE;
++	dsl_dataset_t *ds;
++	char *at;
++
++	at = strchr(name, '@');
++	if (at != NULL && at[1] == '\0') {
++		*at = '\0';
++		descendent = B_TRUE;
++	}
++
++	error = dsl_dataset_hold(name, FTAG, &ds);
++	if (at != NULL)
++		*at = '@';
++	if (error != 0)
++		return (error);
++
++	error = zfs_dozonecheck_ds(name, ds, cr);
++	if (error == 0) {
++		error = secpolicy_zfs(cr);
++		if (error)
++			error = dsl_deleg_access_impl(ds, descendent, perm, cr);
++	}
++
++	dsl_dataset_rele(ds, FTAG);
++	return (error);
++}
++
++int
++zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
++    const char *perm, cred_t *cr)
++{
++	int error;
++
++	error = zfs_dozonecheck_ds(name, ds, cr);
++	if (error == 0) {
++		error = secpolicy_zfs(cr);
++		if (error)
++			error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
++	}
++	return (error);
++}
++
++/*
++ * Policy for setting the security label property.
++ *
++ * Returns 0 for success, non-zero for access and other errors.
++ */
++static int
++zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
++{
++#ifdef HAVE_MLSLABEL
++	char		ds_hexsl[MAXNAMELEN];
++	bslabel_t	ds_sl, new_sl;
++	boolean_t	new_default = FALSE;
++	uint64_t	zoned;
++	int		needed_priv = -1;
++	int		error;
++
++	/* First get the existing dataset label. */
++	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
++	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
++	if (error)
++		return (EPERM);
++
++	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
++		new_default = TRUE;
++
++	/* The label must be translatable */
++	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
++		return (EINVAL);
++
++	/*
++	 * In a non-global zone, disallow attempts to set a label that
++	 * doesn't match that of the zone; otherwise no other checks
++	 * are needed.
++	 */
++	if (!INGLOBALZONE(curproc)) {
++		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
++			return (EPERM);
++		return (0);
++	}
++
++	/*
++	 * For global-zone datasets (i.e., those whose zoned property is
++	 * "off", verify that the specified new label is valid for the
++	 * global zone.
++	 */
++	if (dsl_prop_get_integer(name,
++	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
++		return (EPERM);
++	if (!zoned) {
++		if (zfs_check_global_label(name, strval) != 0)
++			return (EPERM);
++	}
++
++	/*
++	 * If the existing dataset label is nondefault, check if the
++	 * dataset is mounted (label cannot be changed while mounted).
++	 * Get the zfs_sb_t; if there isn't one, then the dataset isn't
++	 * mounted (or isn't a dataset, doesn't exist, ...).
++	 */
++	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
++		objset_t *os;
++		static char *setsl_tag = "setsl_tag";
++
++		/*
++		 * Try to own the dataset; abort if there is any error,
++		 * (e.g., already mounted, in use, or other error).
++		 */
++		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
++		    setsl_tag, &os);
++		if (error)
++			return (EPERM);
++
++		dmu_objset_disown(os, setsl_tag);
++
++		if (new_default) {
++			needed_priv = PRIV_FILE_DOWNGRADE_SL;
++			goto out_check;
++		}
++
++		if (hexstr_to_label(strval, &new_sl) != 0)
++			return (EPERM);
++
++		if (blstrictdom(&ds_sl, &new_sl))
++			needed_priv = PRIV_FILE_DOWNGRADE_SL;
++		else if (blstrictdom(&new_sl, &ds_sl))
++			needed_priv = PRIV_FILE_UPGRADE_SL;
++	} else {
++		/* dataset currently has a default label */
++		if (!new_default)
++			needed_priv = PRIV_FILE_UPGRADE_SL;
++	}
++
++out_check:
++	if (needed_priv != -1)
++		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
++	return (0);
++#else
++	return ENOTSUP;
++#endif /* HAVE_MLSLABEL */
++}
++
++static int
++zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
++    cred_t *cr)
++{
++	char *strval;
++
++	/*
++	 * Check permissions for special properties.
++	 */
++	switch (prop) {
++	default:
++		break;
++	case ZFS_PROP_ZONED:
++		/*
++		 * Disallow setting of 'zoned' from within a local zone.
++		 */
++		if (!INGLOBALZONE(curproc))
++			return (EPERM);
++		break;
++
++	case ZFS_PROP_QUOTA:
++		if (!INGLOBALZONE(curproc)) {
++			uint64_t zoned;
++			char setpoint[MAXNAMELEN];
++			/*
++			 * Unprivileged users are allowed to modify the
++			 * quota on things *under* (ie. contained by)
++			 * the thing they own.
++			 */
++			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
++			    setpoint))
++				return (EPERM);
++			if (!zoned || strlen(dsname) <= strlen(setpoint))
++				return (EPERM);
++		}
++		break;
++
++	case ZFS_PROP_MLSLABEL:
++		if (!is_system_labeled())
++			return (EPERM);
++
++		if (nvpair_value_string(propval, &strval) == 0) {
++			int err;
++
++			err = zfs_set_slabel_policy(dsname, strval, CRED());
++			if (err != 0)
++				return (err);
++		}
++		break;
++	}
++
++	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
++}
++
++int
++zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
++{
++	int error;
++
++	error = zfs_dozonecheck(zc->zc_name, cr);
++	if (error)
++		return (error);
++
++	/*
++	 * permission to set permissions will be evaluated later in
++	 * dsl_deleg_can_allow()
++	 */
++	return (0);
++}
++
++int
++zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
++{
++	return (zfs_secpolicy_write_perms(zc->zc_name,
++	    ZFS_DELEG_PERM_ROLLBACK, cr));
++}
++
++int
++zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
++{
++	spa_t *spa;
++	dsl_pool_t *dp;
++	dsl_dataset_t *ds;
++	char *cp;
++	int error;
++
++	/*
++	 * Generate the current snapshot name from the given objsetid, then
++	 * use that name for the secpolicy/zone checks.
++	 */
++	cp = strchr(zc->zc_name, '@');
++	if (cp == NULL)
++		return (EINVAL);
++	error = spa_open(zc->zc_name, &spa, FTAG);
++	if (error)
++		return (error);
++
++	dp = spa_get_dsl(spa);
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
++	rw_exit(&dp->dp_config_rwlock);
++	spa_close(spa, FTAG);
++	if (error)
++		return (error);
++
++	dsl_dataset_name(ds, zc->zc_name);
++
++	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
++	    ZFS_DELEG_PERM_SEND, cr);
++	dsl_dataset_rele(ds, FTAG);
++
++	return (error);
++}
++
++#ifdef HAVE_SMB_SHARE
++static int
++zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
++{
++	vnode_t *vp;
++	int error;
++
++	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
++	    NO_FOLLOW, NULL, &vp)) != 0)
++		return (error);
++
++	/* Now make sure mntpnt and dataset are ZFS */
++
++	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
++	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
++	    zc->zc_name) != 0)) {
++		VN_RELE(vp);
++		return (EPERM);
++	}
++
++	VN_RELE(vp);
++	return (dsl_deleg_access(zc->zc_name,
++	    ZFS_DELEG_PERM_SHARE, cr));
++}
++#endif /* HAVE_SMB_SHARE */
++
++int
++zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
++{
++#ifdef HAVE_SMB_SHARE
++	if (!INGLOBALZONE(curproc))
++		return (EPERM);
++
++	if (secpolicy_nfs(cr) == 0) {
++		return (0);
++	} else {
++		return (zfs_secpolicy_deleg_share(zc, cr));
++	}
++#else
++	return (ENOTSUP);
++#endif /* HAVE_SMB_SHARE */
++}
++
++int
++zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
++{
++#ifdef HAVE_SMB_SHARE
++	if (!INGLOBALZONE(curproc))
++		return (EPERM);
++
++	if (secpolicy_smb(cr) == 0) {
++		return (0);
++	} else {
++		return (zfs_secpolicy_deleg_share(zc, cr));
++	}
++#else
++	return (ENOTSUP);
++#endif /* HAVE_SMB_SHARE */
++}
++
++static int
++zfs_get_parent(const char *datasetname, char *parent, int parentsize)
++{
++	char *cp;
++
++	/*
++	 * Remove the @bla or /bla from the end of the name to get the parent.
++	 */
++	(void) strncpy(parent, datasetname, parentsize);
++	cp = strrchr(parent, '@');
++	if (cp != NULL) {
++		cp[0] = '\0';
++	} else {
++		cp = strrchr(parent, '/');
++		if (cp == NULL)
++			return (ENOENT);
++		cp[0] = '\0';
++	}
++
++	return (0);
++}
++
++int
++zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
++{
++	int error;
++
++	if ((error = zfs_secpolicy_write_perms(name,
++	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
++		return (error);
++
++	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
++}
++
++static int
++zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
++{
++	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
++}
++
++/*
++ * Destroying snapshots with delegated permissions requires
++ * descendent mount and destroy permissions.
++ */
++static int
++zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr)
++{
++	int error;
++	char *dsname;
++
++	dsname = kmem_asprintf("%s@", zc->zc_name);
++
++	error = zfs_secpolicy_destroy_perms(dsname, cr);
++	if (error == ENOENT)
++		error = zfs_secpolicy_destroy_perms(zc->zc_name, cr);
++
++	strfree(dsname);
++	return (error);
++}
++
++int
++zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
++{
++	char	parentname[MAXNAMELEN];
++	int	error;
++
++	if ((error = zfs_secpolicy_write_perms(from,
++	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
++		return (error);
++
++	if ((error = zfs_secpolicy_write_perms(from,
++	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
++		return (error);
++
++	if ((error = zfs_get_parent(to, parentname,
++	    sizeof (parentname))) != 0)
++		return (error);
++
++	if ((error = zfs_secpolicy_write_perms(parentname,
++	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
++		return (error);
++
++	if ((error = zfs_secpolicy_write_perms(parentname,
++	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
++		return (error);
++
++	return (error);
++}
++
++static int
++zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
++{
++	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
++}
++
++static int
++zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
++{
++	char	parentname[MAXNAMELEN];
++	objset_t *clone;
++	int error;
++
++	error = zfs_secpolicy_write_perms(zc->zc_name,
++	    ZFS_DELEG_PERM_PROMOTE, cr);
++	if (error)
++		return (error);
++
++	error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
++
++	if (error == 0) {
++		dsl_dataset_t *pclone = NULL;
++		dsl_dir_t *dd;
++		dd = clone->os_dsl_dataset->ds_dir;
++
++		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
++		error = dsl_dataset_hold_obj(dd->dd_pool,
++		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
++		rw_exit(&dd->dd_pool->dp_config_rwlock);
++		if (error) {
++			dmu_objset_rele(clone, FTAG);
++			return (error);
++		}
++
++		error = zfs_secpolicy_write_perms(zc->zc_name,
++		    ZFS_DELEG_PERM_MOUNT, cr);
++
++		dsl_dataset_name(pclone, parentname);
++		dmu_objset_rele(clone, FTAG);
++		dsl_dataset_rele(pclone, FTAG);
++		if (error == 0)
++			error = zfs_secpolicy_write_perms(parentname,
++			    ZFS_DELEG_PERM_PROMOTE, cr);
++	}
++	return (error);
++}
++
++static int
++zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
++{
++	int error;
++
++	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
++	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
++		return (error);
++
++	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
++	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
++		return (error);
++
++	return (zfs_secpolicy_write_perms(zc->zc_name,
++	    ZFS_DELEG_PERM_CREATE, cr));
++}
++
++int
++zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
++{
++	return (zfs_secpolicy_write_perms(name,
++	    ZFS_DELEG_PERM_SNAPSHOT, cr));
++}
++
++static int
++zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
++{
++
++	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
++}
++
++static int
++zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
++{
++	char	parentname[MAXNAMELEN];
++	int	error;
++
++	if ((error = zfs_get_parent(zc->zc_name, parentname,
++	    sizeof (parentname))) != 0)
++		return (error);
++
++	if (zc->zc_value[0] != '\0') {
++		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
++		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
++			return (error);
++	}
++
++	if ((error = zfs_secpolicy_write_perms(parentname,
++	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
++		return (error);
++
++	error = zfs_secpolicy_write_perms(parentname,
++	    ZFS_DELEG_PERM_MOUNT, cr);
++
++	return (error);
++}
++
++/*
++ * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
++ * SYS_CONFIG privilege, which is not available in a local zone.
++ */
++/* ARGSUSED */
++static int
++zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
++{
++	if (secpolicy_sys_config(cr, B_FALSE) != 0)
++		return (EPERM);
++
++	return (0);
++}
++
++/*
++ * Policy for object to name lookups.
++ */
++/* ARGSUSED */
++static int
++zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr)
++{
++	int error;
++
++	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
++		return (0);
++
++	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
++	return (error);
++}
++
++/*
++ * Policy for fault injection.  Requires all privileges.
++ */
++/* ARGSUSED */
++static int
++zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
++{
++	return (secpolicy_zinject(cr));
++}
++
++static int
++zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
++{
++	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
++
++	if (prop == ZPROP_INVAL) {
++		if (!zfs_prop_user(zc->zc_value))
++			return (EINVAL);
++		return (zfs_secpolicy_write_perms(zc->zc_name,
++		    ZFS_DELEG_PERM_USERPROP, cr));
++	} else {
++		return (zfs_secpolicy_setprop(zc->zc_name, prop,
++		    NULL, cr));
++	}
++}
++
++static int
++zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
++{
++	int err = zfs_secpolicy_read(zc, cr);
++	if (err)
++		return (err);
++
++	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
++		return (EINVAL);
++
++	if (zc->zc_value[0] == 0) {
++		/*
++		 * They are asking about a posix uid/gid.  If it's
++		 * themself, allow it.
++		 */
++		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
++		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
++			if (zc->zc_guid == crgetuid(cr))
++				return (0);
++		} else {
++			if (groupmember(zc->zc_guid, cr))
++				return (0);
++		}
++	}
++
++	return (zfs_secpolicy_write_perms(zc->zc_name,
++	    userquota_perms[zc->zc_objset_type], cr));
++}
++
++static int
++zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
++{
++	int err = zfs_secpolicy_read(zc, cr);
++	if (err)
++		return (err);
++
++	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
++		return (EINVAL);
++
++	return (zfs_secpolicy_write_perms(zc->zc_name,
++	    userquota_perms[zc->zc_objset_type], cr));
++}
++
++static int
++zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
++{
++	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
++	    NULL, cr));
++}
++
++static int
++zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
++{
++	return (zfs_secpolicy_write_perms(zc->zc_name,
++	    ZFS_DELEG_PERM_HOLD, cr));
++}
++
++static int
++zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
++{
++	return (zfs_secpolicy_write_perms(zc->zc_name,
++	    ZFS_DELEG_PERM_RELEASE, cr));
++}
++
++/*
++ * Policy for allowing temporary snapshots to be taken or released
++ */
++static int
++zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr)
++{
++	/*
++	 * A temporary snapshot is the same as a snapshot,
++	 * hold, destroy and release all rolled into one.
++	 * Delegated diff alone is sufficient that we allow this.
++	 */
++	int error;
++
++	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
++	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
++		return (0);
++
++	error = zfs_secpolicy_snapshot(zc, cr);
++	if (!error)
++		error = zfs_secpolicy_hold(zc, cr);
++	if (!error)
++		error = zfs_secpolicy_release(zc, cr);
++	if (!error)
++		error = zfs_secpolicy_destroy(zc, cr);
++	return (error);
++}
++
++/*
++ * Returns the nvlist as specified by the user in the zfs_cmd_t.
++ */
++static int
++get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
++{
++	char *packed;
++	int error;
++	nvlist_t *list = NULL;
++
++	/*
++	 * Read in and unpack the user-supplied nvlist.
++	 */
++	if (size == 0)
++		return (EINVAL);
++
++	packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG);
++
++	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
++	    iflag)) != 0) {
++		kmem_free(packed, size);
++		return (error);
++	}
++
++	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
++		kmem_free(packed, size);
++		return (error);
++	}
++
++	kmem_free(packed, size);
++
++	*nvp = list;
++	return (0);
++}
++
++static int
++fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
++{
++	size_t size;
++
++	VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
++
++	if (size > zc->zc_nvlist_dst_size) {
++		nvpair_t *more_errors;
++		int n = 0;
++
++		if (zc->zc_nvlist_dst_size < 1024)
++			return (ENOMEM);
++
++		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
++		more_errors = nvlist_prev_nvpair(*errors, NULL);
++
++		do {
++			nvpair_t *pair = nvlist_prev_nvpair(*errors,
++			    more_errors);
++			VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
++			n++;
++			VERIFY(nvlist_size(*errors, &size,
++			    NV_ENCODE_NATIVE) == 0);
++		} while (size > zc->zc_nvlist_dst_size);
++
++		VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
++		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
++		ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
++		ASSERT(size <= zc->zc_nvlist_dst_size);
++	}
++
++	return (0);
++}
++
++static int
++put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
++{
++	char *packed = NULL;
++	int error = 0;
++	size_t size;
++
++	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
++
++	if (size > zc->zc_nvlist_dst_size) {
++		error = ENOMEM;
++	} else {
++		packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG);
++		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
++		    KM_SLEEP) == 0);
++		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
++		    size, zc->zc_iflags) != 0)
++			error = EFAULT;
++		kmem_free(packed, size);
++	}
++
++	zc->zc_nvlist_dst_size = size;
++	return (error);
++}
++
++static int
++get_zfs_sb(const char *dsname, zfs_sb_t **zsbp)
++{
++	objset_t *os;
++	int error;
++
++	error = dmu_objset_hold(dsname, FTAG, &os);
++	if (error)
++		return (error);
++	if (dmu_objset_type(os) != DMU_OST_ZFS) {
++		dmu_objset_rele(os, FTAG);
++		return (EINVAL);
++	}
++
++	mutex_enter(&os->os_user_ptr_lock);
++	*zsbp = dmu_objset_get_user(os);
++	if (*zsbp && (*zsbp)->z_sb) {
++		atomic_inc(&((*zsbp)->z_sb->s_active));
++	} else {
++		error = ESRCH;
++	}
++	mutex_exit(&os->os_user_ptr_lock);
++	dmu_objset_rele(os, FTAG);
++	return (error);
++}
++
++/*
++ * Find a zfs_sb_t for a mounted filesystem, or create our own, in which
++ * case its z_sb will be NULL, and it will be opened as the owner.
++ */
++static int
++zfs_sb_hold(const char *name, void *tag, zfs_sb_t **zsbp, boolean_t writer)
++{
++	int error = 0;
++
++	if (get_zfs_sb(name, zsbp) != 0)
++		error = zfs_sb_create(name, zsbp);
++	if (error == 0) {
++		rrw_enter(&(*zsbp)->z_teardown_lock, (writer) ? RW_WRITER :
++		    RW_READER, tag);
++		if ((*zsbp)->z_unmounted) {
++			/*
++			 * XXX we could probably try again, since the unmounting
++			 * thread should be just about to disassociate the
++			 * objset from the zfsvfs.
++			 */
++			rrw_exit(&(*zsbp)->z_teardown_lock, tag);
++			return (EBUSY);
++		}
++	}
++	return (error);
++}
++
++static void
++zfs_sb_rele(zfs_sb_t *zsb, void *tag)
++{
++	rrw_exit(&zsb->z_teardown_lock, tag);
++
++	if (zsb->z_sb) {
++		deactivate_super(zsb->z_sb);
++	} else {
++		dmu_objset_disown(zsb->z_os, zsb);
++		zfs_sb_free(zsb);
++	}
++}
++
++static int
++zfs_ioc_pool_create(zfs_cmd_t *zc)
++{
++	int error;
++	nvlist_t *config, *props = NULL;
++	nvlist_t *rootprops = NULL;
++	nvlist_t *zplprops = NULL;
++	char *buf;
++
++	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
++	    zc->zc_iflags, &config)))
++		return (error);
++
++	if (zc->zc_nvlist_src_size != 0 && (error =
++	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
++	    zc->zc_iflags, &props))) {
++		nvlist_free(config);
++		return (error);
++	}
++
++	if (props) {
++		nvlist_t *nvl = NULL;
++		uint64_t version = SPA_VERSION;
++
++		(void) nvlist_lookup_uint64(props,
++		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
++		if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
++			error = EINVAL;
++			goto pool_props_bad;
++		}
++		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
++		if (nvl) {
++			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
++			if (error != 0) {
++				nvlist_free(config);
++				nvlist_free(props);
++				return (error);
++			}
++			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
++		}
++		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++		error = zfs_fill_zplprops_root(version, rootprops,
++		    zplprops, NULL);
++		if (error)
++			goto pool_props_bad;
++	}
++
++	buf = history_str_get(zc);
++
++	error = spa_create(zc->zc_name, config, props, buf, zplprops);
++
++	/*
++	 * Set the remaining root properties
++	 */
++	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
++	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
++		(void) spa_destroy(zc->zc_name);
++
++	if (buf != NULL)
++		history_str_free(buf);
++
++pool_props_bad:
++	nvlist_free(rootprops);
++	nvlist_free(zplprops);
++	nvlist_free(config);
++	nvlist_free(props);
++
++	return (error);
++}
++
++static int
++zfs_ioc_pool_destroy(zfs_cmd_t *zc)
++{
++	int error;
++	zfs_log_history(zc);
++	error = spa_destroy(zc->zc_name);
++	if (error == 0)
++		zvol_remove_minors(zc->zc_name);
++	return (error);
++}
++
++static int
++zfs_ioc_pool_import(zfs_cmd_t *zc)
++{
++	nvlist_t *config, *props = NULL;
++	uint64_t guid;
++	int error;
++
++	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
++	    zc->zc_iflags, &config)) != 0)
++		return (error);
++
++	if (zc->zc_nvlist_src_size != 0 && (error =
++	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
++	    zc->zc_iflags, &props))) {
++		nvlist_free(config);
++		return (error);
++	}
++
++	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
++	    guid != zc->zc_guid)
++		error = EINVAL;
++	else
++		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
++
++	if (zc->zc_nvlist_dst != 0) {
++		int err;
++
++		if ((err = put_nvlist(zc, config)) != 0)
++			error = err;
++	}
++
++	if (error == 0)
++		zvol_create_minors(zc->zc_name);
++
++	nvlist_free(config);
++
++	if (props)
++		nvlist_free(props);
++
++	return (error);
++}
++
++static int
++zfs_ioc_pool_export(zfs_cmd_t *zc)
++{
++	int error;
++	boolean_t force = (boolean_t)zc->zc_cookie;
++	boolean_t hardforce = (boolean_t)zc->zc_guid;
++
++	zfs_log_history(zc);
++	error = spa_export(zc->zc_name, NULL, force, hardforce);
++	if (error == 0)
++		zvol_remove_minors(zc->zc_name);
++	return (error);
++}
++
++static int
++zfs_ioc_pool_configs(zfs_cmd_t *zc)
++{
++	nvlist_t *configs;
++	int error;
++
++	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
++		return (EEXIST);
++
++	error = put_nvlist(zc, configs);
++
++	nvlist_free(configs);
++
++	return (error);
++}
++
++static int
++zfs_ioc_pool_stats(zfs_cmd_t *zc)
++{
++	nvlist_t *config;
++	int error;
++	int ret = 0;
++
++	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
++	    sizeof (zc->zc_value));
++
++	if (config != NULL) {
++		ret = put_nvlist(zc, config);
++		nvlist_free(config);
++
++		/*
++		 * The config may be present even if 'error' is non-zero.
++		 * In this case we return success, and preserve the real errno
++		 * in 'zc_cookie'.
++		 */
++		zc->zc_cookie = error;
++	} else {
++		ret = error;
++	}
++
++	return (ret);
++}
++
++/*
++ * Try to import the given pool, returning pool stats as appropriate so that
++ * user land knows which devices are available and overall pool health.
++ */
++static int
++zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
++{
++	nvlist_t *tryconfig, *config;
++	int error;
++
++	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
++	    zc->zc_iflags, &tryconfig)) != 0)
++		return (error);
++
++	config = spa_tryimport(tryconfig);
++
++	nvlist_free(tryconfig);
++
++	if (config == NULL)
++		return (EINVAL);
++
++	error = put_nvlist(zc, config);
++	nvlist_free(config);
++
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name              name of the pool
++ * zc_cookie            scan func (pool_scan_func_t)
++ */
++static int
++zfs_ioc_pool_scan(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	int error;
++
++	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
++		return (error);
++
++	if (zc->zc_cookie == POOL_SCAN_NONE)
++		error = spa_scan_stop(spa);
++	else
++		error = spa_scan(spa, zc->zc_cookie);
++
++	spa_close(spa, FTAG);
++
++	return (error);
++}
++
++static int
++zfs_ioc_pool_freeze(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	int error;
++
++	error = spa_open(zc->zc_name, &spa, FTAG);
++	if (error == 0) {
++		spa_freeze(spa);
++		spa_close(spa, FTAG);
++	}
++	return (error);
++}
++
++static int
++zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	int error;
++
++	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
++		return (error);
++
++	if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
++		spa_close(spa, FTAG);
++		return (EINVAL);
++	}
++
++	spa_upgrade(spa, zc->zc_cookie);
++	spa_close(spa, FTAG);
++
++	return (error);
++}
++
++static int
++zfs_ioc_pool_get_history(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	char *hist_buf;
++	uint64_t size;
++	int error;
++
++	if ((size = zc->zc_history_len) == 0)
++		return (EINVAL);
++
++	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
++		return (error);
++
++	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
++		spa_close(spa, FTAG);
++		return (ENOTSUP);
++	}
++
++	hist_buf = vmem_alloc(size, KM_SLEEP);
++	if ((error = spa_history_get(spa, &zc->zc_history_offset,
++	    &zc->zc_history_len, hist_buf)) == 0) {
++		error = ddi_copyout(hist_buf,
++		    (void *)(uintptr_t)zc->zc_history,
++		    zc->zc_history_len, zc->zc_iflags);
++	}
++
++	spa_close(spa, FTAG);
++	vmem_free(hist_buf, size);
++	return (error);
++}
++
++static int
++zfs_ioc_pool_reguid(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	int error;
++
++	error = spa_open(zc->zc_name, &spa, FTAG);
++	if (error == 0) {
++		error = spa_change_guid(spa);
++		spa_close(spa, FTAG);
++	}
++	return (error);
++}
++
++static int
++zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
++{
++	int error;
++
++	if ((error = dsl_dsobj_to_dsname(zc->zc_name,zc->zc_obj,zc->zc_value)))
++		return (error);
++
++	return (0);
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ * zc_obj		object to find
++ *
++ * outputs:
++ * zc_value		name of object
++ */
++static int
++zfs_ioc_obj_to_path(zfs_cmd_t *zc)
++{
++	objset_t *os;
++	int error;
++
++	/* XXX reading from objset not owned */
++	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
++		return (error);
++	if (dmu_objset_type(os) != DMU_OST_ZFS) {
++		dmu_objset_rele(os, FTAG);
++		return (EINVAL);
++	}
++	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
++	    sizeof (zc->zc_value));
++	dmu_objset_rele(os, FTAG);
++
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ * zc_obj		object to find
++ *
++ * outputs:
++ * zc_stat		stats on object
++ * zc_value		path to object
++ */
++static int
++zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
++{
++	objset_t *os;
++	int error;
++
++	/* XXX reading from objset not owned */
++	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
++		return (error);
++	if (dmu_objset_type(os) != DMU_OST_ZFS) {
++		dmu_objset_rele(os, FTAG);
++		return (EINVAL);
++	}
++	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
++	    sizeof (zc->zc_value));
++	dmu_objset_rele(os, FTAG);
++
++	return (error);
++}
++
++static int
++zfs_ioc_vdev_add(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	int error;
++	nvlist_t *config, **l2cache, **spares;
++	uint_t nl2cache = 0, nspares = 0;
++
++	error = spa_open(zc->zc_name, &spa, FTAG);
++	if (error != 0)
++		return (error);
++
++	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
++	    zc->zc_iflags, &config);
++	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
++	    &l2cache, &nl2cache);
++
++	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
++	    &spares, &nspares);
++
++	/*
++	 * A root pool with concatenated devices is not supported.
++	 * Thus, can not add a device to a root pool.
++	 *
++	 * Intent log device can not be added to a rootpool because
++	 * during mountroot, zil is replayed, a seperated log device
++	 * can not be accessed during the mountroot time.
++	 *
++	 * l2cache and spare devices are ok to be added to a rootpool.
++	 */
++	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
++		nvlist_free(config);
++		spa_close(spa, FTAG);
++		return (EDOM);
++	}
++
++	if (error == 0) {
++		error = spa_vdev_add(spa, config);
++		nvlist_free(config);
++	}
++	spa_close(spa, FTAG);
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name		name of the pool
++ * zc_nvlist_conf	nvlist of devices to remove
++ * zc_cookie		to stop the remove?
++ */
++static int
++zfs_ioc_vdev_remove(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	int error;
++
++	error = spa_open(zc->zc_name, &spa, FTAG);
++	if (error != 0)
++		return (error);
++	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
++	spa_close(spa, FTAG);
++	return (error);
++}
++
++static int
++zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	int error;
++	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
++
++	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
++		return (error);
++	switch (zc->zc_cookie) {
++	case VDEV_STATE_ONLINE:
++		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
++		break;
++
++	case VDEV_STATE_OFFLINE:
++		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
++		break;
++
++	case VDEV_STATE_FAULTED:
++		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
++		    zc->zc_obj != VDEV_AUX_EXTERNAL)
++			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
++
++		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
++		break;
++
++	case VDEV_STATE_DEGRADED:
++		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
++		    zc->zc_obj != VDEV_AUX_EXTERNAL)
++			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
++
++		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
++		break;
++
++	default:
++		error = EINVAL;
++	}
++	zc->zc_cookie = newstate;
++	spa_close(spa, FTAG);
++	return (error);
++}
++
++static int
++zfs_ioc_vdev_attach(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	int replacing = zc->zc_cookie;
++	nvlist_t *config;
++	int error;
++
++	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
++		return (error);
++
++	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
++	    zc->zc_iflags, &config)) == 0) {
++		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
++		nvlist_free(config);
++	}
++
++	spa_close(spa, FTAG);
++	return (error);
++}
++
++static int
++zfs_ioc_vdev_detach(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	int error;
++
++	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
++		return (error);
++
++	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
++
++	spa_close(spa, FTAG);
++	return (error);
++}
++
++static int
++zfs_ioc_vdev_split(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	nvlist_t *config, *props = NULL;
++	int error;
++	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
++
++	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
++		return (error);
++
++	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
++	    zc->zc_iflags, &config))) {
++		spa_close(spa, FTAG);
++		return (error);
++	}
++
++	if (zc->zc_nvlist_src_size != 0 && (error =
++	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
++	    zc->zc_iflags, &props))) {
++		spa_close(spa, FTAG);
++		nvlist_free(config);
++		return (error);
++	}
++
++	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
++
++	spa_close(spa, FTAG);
++
++	nvlist_free(config);
++	nvlist_free(props);
++
++	return (error);
++}
++
++static int
++zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	char *path = zc->zc_value;
++	uint64_t guid = zc->zc_guid;
++	int error;
++
++	error = spa_open(zc->zc_name, &spa, FTAG);
++	if (error != 0)
++		return (error);
++
++	error = spa_vdev_setpath(spa, guid, path);
++	spa_close(spa, FTAG);
++	return (error);
++}
++
++static int
++zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	char *fru = zc->zc_value;
++	uint64_t guid = zc->zc_guid;
++	int error;
++
++	error = spa_open(zc->zc_name, &spa, FTAG);
++	if (error != 0)
++		return (error);
++
++	error = spa_vdev_setfru(spa, guid, fru);
++	spa_close(spa, FTAG);
++	return (error);
++}
++
++static int
++zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
++{
++	int error = 0;
++	nvlist_t *nv;
++
++	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
++
++	if (zc->zc_nvlist_dst != 0 &&
++	    (error = dsl_prop_get_all(os, &nv)) == 0) {
++		dmu_objset_stats(os, nv);
++		/*
++		 * NB: zvol_get_stats() will read the objset contents,
++		 * which we aren't supposed to do with a
++		 * DS_MODE_USER hold, because it could be
++		 * inconsistent.  So this is a bit of a workaround...
++		 * XXX reading with out owning
++		 */
++		if (!zc->zc_objset_stats.dds_inconsistent &&
++		    dmu_objset_type(os) == DMU_OST_ZVOL) {
++			error = zvol_get_stats(os, nv);
++			if (error == EIO)
++				return (error);
++			VERIFY3S(error, ==, 0);
++		}
++		if (error == 0)
++			error = put_nvlist(zc, nv);
++		nvlist_free(nv);
++	}
++
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ * zc_nvlist_dst_size	size of buffer for property nvlist
++ *
++ * outputs:
++ * zc_objset_stats	stats
++ * zc_nvlist_dst	property nvlist
++ * zc_nvlist_dst_size	size of property nvlist
++ */
++static int
++zfs_ioc_objset_stats(zfs_cmd_t *zc)
++{
++	objset_t *os = NULL;
++	int error;
++
++	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)))
++		return (error);
++
++	error = zfs_ioc_objset_stats_impl(zc, os);
++
++	dmu_objset_rele(os, FTAG);
++
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ * zc_nvlist_dst_size	size of buffer for property nvlist
++ *
++ * outputs:
++ * zc_nvlist_dst	received property nvlist
++ * zc_nvlist_dst_size	size of received property nvlist
++ *
++ * Gets received properties (distinct from local properties on or after
++ * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
++ * local property values.
++ */
++static int
++zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
++{
++	objset_t *os = NULL;
++	int error;
++	nvlist_t *nv;
++
++	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)))
++		return (error);
++
++	/*
++	 * Without this check, we would return local property values if the
++	 * caller has not already received properties on or after
++	 * SPA_VERSION_RECVD_PROPS.
++	 */
++	if (!dsl_prop_get_hasrecvd(os)) {
++		dmu_objset_rele(os, FTAG);
++		return (ENOTSUP);
++	}
++
++	if (zc->zc_nvlist_dst != 0 &&
++	    (error = dsl_prop_get_received(os, &nv)) == 0) {
++		error = put_nvlist(zc, nv);
++		nvlist_free(nv);
++	}
++
++	dmu_objset_rele(os, FTAG);
++	return (error);
++}
++
++static int
++nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
++{
++	uint64_t value;
++	int error;
++
++	/*
++	 * zfs_get_zplprop() will either find a value or give us
++	 * the default value (if there is one).
++	 */
++	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
++		return (error);
++	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
++	return (0);
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ * zc_nvlist_dst_size	size of buffer for zpl property nvlist
++ *
++ * outputs:
++ * zc_nvlist_dst	zpl property nvlist
++ * zc_nvlist_dst_size	size of zpl property nvlist
++ */
++static int
++zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
++{
++	objset_t *os;
++	int err;
++
++	/* XXX reading without owning */
++	if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os)))
++		return (err);
++
++	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
++
++	/*
++	 * NB: nvl_add_zplprop() will read the objset contents,
++	 * which we aren't supposed to do with a DS_MODE_USER
++	 * hold, because it could be inconsistent.
++	 */
++	if (zc->zc_nvlist_dst != 0 &&
++	    !zc->zc_objset_stats.dds_inconsistent &&
++	    dmu_objset_type(os) == DMU_OST_ZFS) {
++		nvlist_t *nv;
++
++		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
++		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
++		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
++		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
++			err = put_nvlist(zc, nv);
++		nvlist_free(nv);
++	} else {
++		err = ENOENT;
++	}
++	dmu_objset_rele(os, FTAG);
++	return (err);
++}
++
++static boolean_t
++dataset_name_hidden(const char *name)
++{
++	/*
++	 * Skip over datasets that are not visible in this zone,
++	 * internal datasets (which have a $ in their name), and
++	 * temporary datasets (which have a % in their name).
++	 */
++	if (strchr(name, '$') != NULL)
++		return (B_TRUE);
++	if (strchr(name, '%') != NULL)
++		return (B_TRUE);
++	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
++		return (B_TRUE);
++	return (B_FALSE);
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ * zc_cookie		zap cursor
++ * zc_nvlist_dst_size	size of buffer for property nvlist
++ *
++ * outputs:
++ * zc_name		name of next filesystem
++ * zc_cookie		zap cursor
++ * zc_objset_stats	stats
++ * zc_nvlist_dst	property nvlist
++ * zc_nvlist_dst_size	size of property nvlist
++ */
++static int
++zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
++{
++	objset_t *os;
++	int error;
++	char *p;
++	size_t orig_len = strlen(zc->zc_name);
++
++top:
++	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
++		if (error == ENOENT)
++			error = ESRCH;
++		return (error);
++	}
++
++	p = strrchr(zc->zc_name, '/');
++	if (p == NULL || p[1] != '\0')
++		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
++	p = zc->zc_name + strlen(zc->zc_name);
++
++	/*
++	 * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
++	 * but is not declared void because its called by dmu_objset_find().
++	 */
++	if (zc->zc_cookie == 0) {
++		uint64_t cookie = 0;
++		int len = sizeof (zc->zc_name) - (p - zc->zc_name);
++
++		while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
++			if (!dataset_name_hidden(zc->zc_name))
++				(void) dmu_objset_prefetch(zc->zc_name, NULL);
++		}
++	}
++
++	do {
++		error = dmu_dir_list_next(os,
++		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
++		    NULL, &zc->zc_cookie);
++		if (error == ENOENT)
++			error = ESRCH;
++	} while (error == 0 && dataset_name_hidden(zc->zc_name));
++	dmu_objset_rele(os, FTAG);
++
++	/*
++	 * If it's an internal dataset (ie. with a '$' in its name),
++	 * don't try to get stats for it, otherwise we'll return ENOENT.
++	 */
++	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
++		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
++		if (error == ENOENT) {
++			/* We lost a race with destroy, get the next one. */
++			zc->zc_name[orig_len] = '\0';
++			goto top;
++		}
++	}
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ * zc_cookie		zap cursor
++ * zc_nvlist_dst_size	size of buffer for property nvlist
++ *
++ * outputs:
++ * zc_name		name of next snapshot
++ * zc_objset_stats	stats
++ * zc_nvlist_dst	property nvlist
++ * zc_nvlist_dst_size	size of property nvlist
++ */
++static int
++zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
++{
++	objset_t *os;
++	int error;
++
++top:
++	if (zc->zc_cookie == 0 && !zc->zc_simple)
++		(void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
++		    NULL, DS_FIND_SNAPSHOTS);
++
++	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
++	if (error)
++		return (error == ENOENT ? ESRCH : error);
++
++	/*
++	 * A dataset name of maximum length cannot have any snapshots,
++	 * so exit immediately.
++	 */
++	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
++		dmu_objset_rele(os, FTAG);
++		return (ESRCH);
++	}
++
++	error = dmu_snapshot_list_next(os,
++	    sizeof (zc->zc_name) - strlen(zc->zc_name),
++	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
++	    NULL);
++
++	if (error == 0 && !zc->zc_simple) {
++		dsl_dataset_t *ds;
++		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
++
++		/*
++		 * Since we probably don't have a hold on this snapshot,
++		 * it's possible that the objsetid could have been destroyed
++		 * and reused for a new objset. It's OK if this happens during
++		 * a zfs send operation, since the new createtxg will be
++		 * beyond the range we're interested in.
++		 */
++		rw_enter(&dp->dp_config_rwlock, RW_READER);
++		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
++		rw_exit(&dp->dp_config_rwlock);
++		if (error) {
++			if (error == ENOENT) {
++				/* Racing with destroy, get the next one. */
++				*strchr(zc->zc_name, '@') = '\0';
++				dmu_objset_rele(os, FTAG);
++				goto top;
++			}
++		} else {
++			objset_t *ossnap;
++
++			error = dmu_objset_from_ds(ds, &ossnap);
++			if (error == 0)
++				error = zfs_ioc_objset_stats_impl(zc, ossnap);
++			dsl_dataset_rele(ds, FTAG);
++		}
++	} else if (error == ENOENT) {
++		error = ESRCH;
++	}
++
++	dmu_objset_rele(os, FTAG);
++	/* if we failed, undo the @ that we tacked on to zc_name */
++	if (error)
++		*strchr(zc->zc_name, '@') = '\0';
++	return (error);
++}
++
++static int
++zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
++{
++	const char *propname = nvpair_name(pair);
++	uint64_t *valary;
++	unsigned int vallen;
++	const char *domain;
++	char *dash;
++	zfs_userquota_prop_t type;
++	uint64_t rid;
++	uint64_t quota;
++	zfs_sb_t *zsb;
++	int err;
++
++	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
++		nvlist_t *attrs;
++		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
++		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
++		    &pair) != 0)
++			return (EINVAL);
++	}
++
++	/*
++	 * A correctly constructed propname is encoded as
++	 * userquota@<rid>-<domain>.
++	 */
++	if ((dash = strchr(propname, '-')) == NULL ||
++	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
++	    vallen != 3)
++		return (EINVAL);
++
++	domain = dash + 1;
++	type = valary[0];
++	rid = valary[1];
++	quota = valary[2];
++
++	err = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE);
++	if (err == 0) {
++		err = zfs_set_userquota(zsb, type, domain, rid, quota);
++		zfs_sb_rele(zsb, FTAG);
++	}
++
++	return (err);
++}
++
++/*
++ * If the named property is one that has a special function to set its value,
++ * return 0 on success and a positive error code on failure; otherwise if it is
++ * not one of the special properties handled by this function, return -1.
++ *
++ * XXX: It would be better for callers of the property interface if we handled
++ * these special cases in dsl_prop.c (in the dsl layer).
++ */
++static int
++zfs_prop_set_special(const char *dsname, zprop_source_t source,
++    nvpair_t *pair)
++{
++	const char *propname = nvpair_name(pair);
++	zfs_prop_t prop = zfs_name_to_prop(propname);
++	uint64_t intval;
++	int err;
++
++	if (prop == ZPROP_INVAL) {
++		if (zfs_prop_userquota(propname))
++			return (zfs_prop_set_userquota(dsname, pair));
++		return (-1);
++	}
++
++	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
++		nvlist_t *attrs;
++		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
++		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
++		    &pair) == 0);
++	}
++
++	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
++		return (-1);
++
++	VERIFY(0 == nvpair_value_uint64(pair, &intval));
++
++	switch (prop) {
++	case ZFS_PROP_QUOTA:
++		err = dsl_dir_set_quota(dsname, source, intval);
++		break;
++	case ZFS_PROP_REFQUOTA:
++		err = dsl_dataset_set_quota(dsname, source, intval);
++		break;
++	case ZFS_PROP_RESERVATION:
++		err = dsl_dir_set_reservation(dsname, source, intval);
++		break;
++	case ZFS_PROP_REFRESERVATION:
++		err = dsl_dataset_set_reservation(dsname, source, intval);
++		break;
++	case ZFS_PROP_VOLSIZE:
++		err = zvol_set_volsize(dsname, intval);
++		break;
++	case ZFS_PROP_VERSION:
++	{
++		zfs_sb_t *zsb;
++
++		if ((err = zfs_sb_hold(dsname, FTAG, &zsb, B_TRUE)) != 0)
++			break;
++
++		err = zfs_set_version(zsb, intval);
++		zfs_sb_rele(zsb, FTAG);
++
++		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
++			zfs_cmd_t *zc;
++
++			zc = kmem_zalloc(sizeof (zfs_cmd_t),
++			    KM_SLEEP | KM_NODEBUG);
++			(void) strcpy(zc->zc_name, dsname);
++			(void) zfs_ioc_userspace_upgrade(zc);
++			kmem_free(zc, sizeof (zfs_cmd_t));
++		}
++		break;
++	}
++
++	default:
++		err = -1;
++	}
++
++	return (err);
++}
++
++/*
++ * This function is best effort. If it fails to set any of the given properties,
++ * it continues to set as many as it can and returns the first error
++ * encountered. If the caller provides a non-NULL errlist, it also gives the
++ * complete list of names of all the properties it failed to set along with the
++ * corresponding error numbers. The caller is responsible for freeing the
++ * returned errlist.
++ *
++ * If every property is set successfully, zero is returned and the list pointed
++ * at by errlist is NULL.
++ */
++int
++zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
++    nvlist_t **errlist)
++{
++	nvpair_t *pair;
++	nvpair_t *propval;
++	int rv = 0;
++	uint64_t intval;
++	char *strval;
++	nvlist_t *genericnvl;
++	nvlist_t *errors;
++	nvlist_t *retrynvl;
++
++	VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++	VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++
++retry:
++	pair = NULL;
++	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
++		const char *propname = nvpair_name(pair);
++		zfs_prop_t prop = zfs_name_to_prop(propname);
++		int err = 0;
++
++		/* decode the property value */
++		propval = pair;
++		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
++			nvlist_t *attrs;
++			VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
++			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
++			    &propval) != 0)
++				err = EINVAL;
++		}
++
++		/* Validate value type */
++		if (err == 0 && prop == ZPROP_INVAL) {
++			if (zfs_prop_user(propname)) {
++				if (nvpair_type(propval) != DATA_TYPE_STRING)
++					err = EINVAL;
++			} else if (zfs_prop_userquota(propname)) {
++				if (nvpair_type(propval) !=
++				    DATA_TYPE_UINT64_ARRAY)
++					err = EINVAL;
++			} else {
++				err = EINVAL;
++			}
++		} else if (err == 0) {
++			if (nvpair_type(propval) == DATA_TYPE_STRING) {
++				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
++					err = EINVAL;
++			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
++				const char *unused;
++
++				VERIFY(nvpair_value_uint64(propval,
++				    &intval) == 0);
++
++				switch (zfs_prop_get_type(prop)) {
++				case PROP_TYPE_NUMBER:
++					break;
++				case PROP_TYPE_STRING:
++					err = EINVAL;
++					break;
++				case PROP_TYPE_INDEX:
++					if (zfs_prop_index_to_string(prop,
++					    intval, &unused) != 0)
++						err = EINVAL;
++					break;
++				default:
++					cmn_err(CE_PANIC,
++					    "unknown property type");
++				}
++			} else {
++				err = EINVAL;
++			}
++		}
++
++		/* Validate permissions */
++		if (err == 0)
++			err = zfs_check_settable(dsname, pair, CRED());
++
++		if (err == 0) {
++			err = zfs_prop_set_special(dsname, source, pair);
++			if (err == -1) {
++				/*
++				 * For better performance we build up a list of
++				 * properties to set in a single transaction.
++				 */
++				err = nvlist_add_nvpair(genericnvl, pair);
++			} else if (err != 0 && nvl != retrynvl) {
++				/*
++				 * This may be a spurious error caused by
++				 * receiving quota and reservation out of order.
++				 * Try again in a second pass.
++				 */
++				err = nvlist_add_nvpair(retrynvl, pair);
++			}
++		}
++
++		if (err != 0)
++			VERIFY(nvlist_add_int32(errors, propname, err) == 0);
++	}
++
++	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
++		nvl = retrynvl;
++		goto retry;
++	}
++
++	if (!nvlist_empty(genericnvl) &&
++	    dsl_props_set(dsname, source, genericnvl) != 0) {
++		/*
++		 * If this fails, we still want to set as many properties as we
++		 * can, so try setting them individually.
++		 */
++		pair = NULL;
++		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
++			const char *propname = nvpair_name(pair);
++			int err = 0;
++
++			propval = pair;
++			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
++				nvlist_t *attrs;
++				VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
++				VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
++				    &propval) == 0);
++			}
++
++			if (nvpair_type(propval) == DATA_TYPE_STRING) {
++				VERIFY(nvpair_value_string(propval,
++				    &strval) == 0);
++				err = dsl_prop_set(dsname, propname, source, 1,
++				    strlen(strval) + 1, strval);
++			} else {
++				VERIFY(nvpair_value_uint64(propval,
++				    &intval) == 0);
++				err = dsl_prop_set(dsname, propname, source, 8,
++				    1, &intval);
++			}
++
++			if (err != 0) {
++				VERIFY(nvlist_add_int32(errors, propname,
++				    err) == 0);
++			}
++		}
++	}
++	nvlist_free(genericnvl);
++	nvlist_free(retrynvl);
++
++	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
++		nvlist_free(errors);
++		errors = NULL;
++	} else {
++		VERIFY(nvpair_value_int32(pair, &rv) == 0);
++	}
++
++	if (errlist == NULL)
++		nvlist_free(errors);
++	else
++		*errlist = errors;
++
++	return (rv);
++}
++
++/*
++ * Check that all the properties are valid user properties.
++ */
++static int
++zfs_check_userprops(char *fsname, nvlist_t *nvl)
++{
++	nvpair_t *pair = NULL;
++	int error = 0;
++
++	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
++		const char *propname = nvpair_name(pair);
++		char *valstr;
++
++		if (!zfs_prop_user(propname) ||
++		    nvpair_type(pair) != DATA_TYPE_STRING)
++			return (EINVAL);
++
++		if ((error = zfs_secpolicy_write_perms(fsname,
++		    ZFS_DELEG_PERM_USERPROP, CRED())))
++			return (error);
++
++		if (strlen(propname) >= ZAP_MAXNAMELEN)
++			return (ENAMETOOLONG);
++
++		VERIFY(nvpair_value_string(pair, &valstr) == 0);
++		if (strlen(valstr) >= ZAP_MAXVALUELEN)
++			return (E2BIG);
++	}
++	return (0);
++}
++
++static void
++props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
++{
++	nvpair_t *pair;
++
++	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++
++	pair = NULL;
++	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
++		if (nvlist_exists(skipped, nvpair_name(pair)))
++			continue;
++
++		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
++	}
++}
++
++static int
++clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
++    nvlist_t *skipped)
++{
++	int err = 0;
++	nvlist_t *cleared_props = NULL;
++	props_skip(props, skipped, &cleared_props);
++	if (!nvlist_empty(cleared_props)) {
++		/*
++		 * Acts on local properties until the dataset has received
++		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
++		 */
++		zprop_source_t flags = (ZPROP_SRC_NONE |
++		    (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
++		err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
++	}
++	nvlist_free(cleared_props);
++	return (err);
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ * zc_value		name of property to set
++ * zc_nvlist_src{_size}	nvlist of properties to apply
++ * zc_cookie		received properties flag
++ *
++ * outputs:
++ * zc_nvlist_dst{_size} error for each unapplied received property
++ */
++static int
++zfs_ioc_set_prop(zfs_cmd_t *zc)
++{
++	nvlist_t *nvl;
++	boolean_t received = zc->zc_cookie;
++	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
++	    ZPROP_SRC_LOCAL);
++	nvlist_t *errors = NULL;
++	int error;
++
++	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
++	    zc->zc_iflags, &nvl)) != 0)
++		return (error);
++
++	if (received) {
++		nvlist_t *origprops;
++		objset_t *os;
++
++		if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
++			if (dsl_prop_get_received(os, &origprops) == 0) {
++				(void) clear_received_props(os,
++				    zc->zc_name, origprops, nvl);
++				nvlist_free(origprops);
++			}
++
++			dsl_prop_set_hasrecvd(os);
++			dmu_objset_rele(os, FTAG);
++		}
++	}
++
++	error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
++
++	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
++		(void) put_nvlist(zc, errors);
++	}
++
++	nvlist_free(errors);
++	nvlist_free(nvl);
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ * zc_value		name of property to inherit
++ * zc_cookie		revert to received value if TRUE
++ *
++ * outputs:		none
++ */
++static int
++zfs_ioc_inherit_prop(zfs_cmd_t *zc)
++{
++	const char *propname = zc->zc_value;
++	zfs_prop_t prop = zfs_name_to_prop(propname);
++	boolean_t received = zc->zc_cookie;
++	zprop_source_t source = (received
++	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
++	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
++
++	if (received) {
++		nvlist_t *dummy;
++		nvpair_t *pair;
++		zprop_type_t type;
++		int err;
++
++		/*
++		 * zfs_prop_set_special() expects properties in the form of an
++		 * nvpair with type info.
++		 */
++		if (prop == ZPROP_INVAL) {
++			if (!zfs_prop_user(propname))
++				return (EINVAL);
++
++			type = PROP_TYPE_STRING;
++		} else if (prop == ZFS_PROP_VOLSIZE ||
++		    prop == ZFS_PROP_VERSION) {
++			return (EINVAL);
++		} else {
++			type = zfs_prop_get_type(prop);
++		}
++
++		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++
++		switch (type) {
++		case PROP_TYPE_STRING:
++			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
++			break;
++		case PROP_TYPE_NUMBER:
++		case PROP_TYPE_INDEX:
++			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
++			break;
++		default:
++			nvlist_free(dummy);
++			return (EINVAL);
++		}
++
++		pair = nvlist_next_nvpair(dummy, NULL);
++		err = zfs_prop_set_special(zc->zc_name, source, pair);
++		nvlist_free(dummy);
++		if (err != -1)
++			return (err); /* special property already handled */
++	} else {
++		/*
++		 * Only check this in the non-received case. We want to allow
++		 * 'inherit -S' to revert non-inheritable properties like quota
++		 * and reservation to the received or default values even though
++		 * they are not considered inheritable.
++		 */
++		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
++			return (EINVAL);
++	}
++
++	/* the property name has been validated by zfs_secpolicy_inherit() */
++	return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
++}
++
++static int
++zfs_ioc_pool_set_props(zfs_cmd_t *zc)
++{
++	nvlist_t *props;
++	spa_t *spa;
++	int error;
++	nvpair_t *pair;
++
++	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
++	    zc->zc_iflags, &props)))
++		return (error);
++
++	/*
++	 * If the only property is the configfile, then just do a spa_lookup()
++	 * to handle the faulted case.
++	 */
++	pair = nvlist_next_nvpair(props, NULL);
++	if (pair != NULL && strcmp(nvpair_name(pair),
++	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
++	    nvlist_next_nvpair(props, pair) == NULL) {
++		mutex_enter(&spa_namespace_lock);
++		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
++			spa_configfile_set(spa, props, B_FALSE);
++			spa_config_sync(spa, B_FALSE, B_TRUE);
++		}
++		mutex_exit(&spa_namespace_lock);
++		if (spa != NULL) {
++			nvlist_free(props);
++			return (0);
++		}
++	}
++
++	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
++		nvlist_free(props);
++		return (error);
++	}
++
++	error = spa_prop_set(spa, props);
++
++	nvlist_free(props);
++	spa_close(spa, FTAG);
++
++	return (error);
++}
++
++static int
++zfs_ioc_pool_get_props(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	int error;
++	nvlist_t *nvp = NULL;
++
++	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
++		/*
++		 * If the pool is faulted, there may be properties we can still
++		 * get (such as altroot and cachefile), so attempt to get them
++		 * anyway.
++		 */
++		mutex_enter(&spa_namespace_lock);
++		if ((spa = spa_lookup(zc->zc_name)) != NULL)
++			error = spa_prop_get(spa, &nvp);
++		mutex_exit(&spa_namespace_lock);
++	} else {
++		error = spa_prop_get(spa, &nvp);
++		spa_close(spa, FTAG);
++	}
++
++	if (error == 0 && zc->zc_nvlist_dst != 0)
++		error = put_nvlist(zc, nvp);
++	else
++		error = EFAULT;
++
++	nvlist_free(nvp);
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name              name of volume
++ *
++ * outputs:             none
++ */
++static int
++zfs_ioc_create_minor(zfs_cmd_t *zc)
++{
++	return (zvol_create_minor(zc->zc_name));
++}
++
++/*
++ * inputs:
++ * zc_name              name of volume
++ *
++ * outputs:             none
++ */
++static int
++zfs_ioc_remove_minor(zfs_cmd_t *zc)
++{
++	return (zvol_remove_minor(zc->zc_name));
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ * zc_nvlist_src{_size}	nvlist of delegated permissions
++ * zc_perm_action	allow/unallow flag
++ *
++ * outputs:		none
++ */
++static int
++zfs_ioc_set_fsacl(zfs_cmd_t *zc)
++{
++	int error;
++	nvlist_t *fsaclnv = NULL;
++
++	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
++	    zc->zc_iflags, &fsaclnv)) != 0)
++		return (error);
++
++	/*
++	 * Verify nvlist is constructed correctly
++	 */
++	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
++		nvlist_free(fsaclnv);
++		return (EINVAL);
++	}
++
++	/*
++	 * If we don't have PRIV_SYS_MOUNT, then validate
++	 * that user is allowed to hand out each permission in
++	 * the nvlist(s)
++	 */
++
++	error = secpolicy_zfs(CRED());
++	if (error) {
++		if (zc->zc_perm_action == B_FALSE) {
++			error = dsl_deleg_can_allow(zc->zc_name,
++			    fsaclnv, CRED());
++		} else {
++			error = dsl_deleg_can_unallow(zc->zc_name,
++			    fsaclnv, CRED());
++		}
++	}
++
++	if (error == 0)
++		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
++
++	nvlist_free(fsaclnv);
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ *
++ * outputs:
++ * zc_nvlist_src{_size}	nvlist of delegated permissions
++ */
++static int
++zfs_ioc_get_fsacl(zfs_cmd_t *zc)
++{
++	nvlist_t *nvp;
++	int error;
++
++	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
++		error = put_nvlist(zc, nvp);
++		nvlist_free(nvp);
++	}
++
++	return (error);
++}
++
++/* ARGSUSED */
++static void
++zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
++{
++	zfs_creat_t *zct = arg;
++
++	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
++}
++
++#define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
++
++/*
++ * inputs:
++ * createprops		list of properties requested by creator
++ * default_zplver	zpl version to use if unspecified in createprops
++ * fuids_ok		fuids allowed in this version of the spa?
++ * os			parent objset pointer (NULL if root fs)
++ *
++ * outputs:
++ * zplprops	values for the zplprops we attach to the master node object
++ * is_ci	true if requested file system will be purely case-insensitive
++ *
++ * Determine the settings for utf8only, normalization and
++ * casesensitivity.  Specific values may have been requested by the
++ * creator and/or we can inherit values from the parent dataset.  If
++ * the file system is of too early a vintage, a creator can not
++ * request settings for these properties, even if the requested
++ * setting is the default value.  We don't actually want to create dsl
++ * properties for these, so remove them from the source nvlist after
++ * processing.
++ */
++static int
++zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
++    boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
++    nvlist_t *zplprops, boolean_t *is_ci)
++{
++	uint64_t sense = ZFS_PROP_UNDEFINED;
++	uint64_t norm = ZFS_PROP_UNDEFINED;
++	uint64_t u8 = ZFS_PROP_UNDEFINED;
++	int error;
++
++	ASSERT(zplprops != NULL);
++
++	/*
++	 * Pull out creator prop choices, if any.
++	 */
++	if (createprops) {
++		(void) nvlist_lookup_uint64(createprops,
++		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
++		(void) nvlist_lookup_uint64(createprops,
++		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
++		(void) nvlist_remove_all(createprops,
++		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
++		(void) nvlist_lookup_uint64(createprops,
++		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
++		(void) nvlist_remove_all(createprops,
++		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
++		(void) nvlist_lookup_uint64(createprops,
++		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
++		(void) nvlist_remove_all(createprops,
++		    zfs_prop_to_name(ZFS_PROP_CASE));
++	}
++
++	/*
++	 * If the zpl version requested is whacky or the file system
++	 * or pool is version is too "young" to support normalization
++	 * and the creator tried to set a value for one of the props,
++	 * error out.
++	 */
++	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
++	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
++	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
++	    (zplver < ZPL_VERSION_NORMALIZATION &&
++	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
++	    sense != ZFS_PROP_UNDEFINED)))
++		return (ENOTSUP);
++
++	/*
++	 * Put the version in the zplprops
++	 */
++	VERIFY(nvlist_add_uint64(zplprops,
++	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
++
++	if (norm == ZFS_PROP_UNDEFINED &&
++	    (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
++		return (error);
++	VERIFY(nvlist_add_uint64(zplprops,
++	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
++
++	/*
++	 * If we're normalizing, names must always be valid UTF-8 strings.
++	 */
++	if (norm)
++		u8 = 1;
++	if (u8 == ZFS_PROP_UNDEFINED &&
++	    (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
++		return (error);
++	VERIFY(nvlist_add_uint64(zplprops,
++	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
++
++	if (sense == ZFS_PROP_UNDEFINED &&
++	    (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
++		return (error);
++	VERIFY(nvlist_add_uint64(zplprops,
++	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
++
++	if (is_ci)
++		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
++
++	return (0);
++}
++
++static int
++zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
++    nvlist_t *zplprops, boolean_t *is_ci)
++{
++	boolean_t fuids_ok, sa_ok;
++	uint64_t zplver = ZPL_VERSION;
++	objset_t *os = NULL;
++	char parentname[MAXNAMELEN];
++	char *cp;
++	spa_t *spa;
++	uint64_t spa_vers;
++	int error;
++
++	(void) strlcpy(parentname, dataset, sizeof (parentname));
++	cp = strrchr(parentname, '/');
++	ASSERT(cp != NULL);
++	cp[0] = '\0';
++
++	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
++		return (error);
++
++	spa_vers = spa_version(spa);
++	spa_close(spa, FTAG);
++
++	zplver = zfs_zpl_version_map(spa_vers);
++	fuids_ok = (zplver >= ZPL_VERSION_FUID);
++	sa_ok = (zplver >= ZPL_VERSION_SA);
++
++	/*
++	 * Open parent object set so we can inherit zplprop values.
++	 */
++	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
++		return (error);
++
++	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
++	    zplprops, is_ci);
++	dmu_objset_rele(os, FTAG);
++	return (error);
++}
++
++static int
++zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
++    nvlist_t *zplprops, boolean_t *is_ci)
++{
++	boolean_t fuids_ok;
++	boolean_t sa_ok;
++	uint64_t zplver = ZPL_VERSION;
++	int error;
++
++	zplver = zfs_zpl_version_map(spa_vers);
++	fuids_ok = (zplver >= ZPL_VERSION_FUID);
++	sa_ok = (zplver >= ZPL_VERSION_SA);
++
++	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
++	    createprops, zplprops, is_ci);
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_objset_type	type of objset to create (fs vs zvol)
++ * zc_name		name of new objset
++ * zc_value		name of snapshot to clone from (may be empty)
++ * zc_nvlist_src{_size}	nvlist of properties to apply
++ *
++ * outputs: none
++ */
++static int
++zfs_ioc_create(zfs_cmd_t *zc)
++{
++	objset_t *clone;
++	int error = 0;
++	zfs_creat_t zct;
++	nvlist_t *nvprops = NULL;
++	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
++	dmu_objset_type_t type = zc->zc_objset_type;
++
++	switch (type) {
++
++	case DMU_OST_ZFS:
++		cbfunc = zfs_create_cb;
++		break;
++
++	case DMU_OST_ZVOL:
++		cbfunc = zvol_create_cb;
++		break;
++
++	default:
++		cbfunc = NULL;
++		break;
++	}
++	if (strchr(zc->zc_name, '@') ||
++	    strchr(zc->zc_name, '%'))
++		return (EINVAL);
++
++	if (zc->zc_nvlist_src != 0 &&
++	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
++	    zc->zc_iflags, &nvprops)) != 0)
++		return (error);
++
++	zct.zct_zplprops = NULL;
++	zct.zct_props = nvprops;
++
++	if (zc->zc_value[0] != '\0') {
++		/*
++		 * We're creating a clone of an existing snapshot.
++		 */
++		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
++		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
++			nvlist_free(nvprops);
++			return (EINVAL);
++		}
++
++		error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
++		if (error) {
++			nvlist_free(nvprops);
++			return (error);
++		}
++
++		error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
++		dmu_objset_rele(clone, FTAG);
++		if (error) {
++			nvlist_free(nvprops);
++			return (error);
++		}
++	} else {
++		boolean_t is_insensitive = B_FALSE;
++
++		if (cbfunc == NULL) {
++			nvlist_free(nvprops);
++			return (EINVAL);
++		}
++
++		if (type == DMU_OST_ZVOL) {
++			uint64_t volsize, volblocksize;
++
++			if (nvprops == NULL ||
++			    nvlist_lookup_uint64(nvprops,
++			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
++			    &volsize) != 0) {
++				nvlist_free(nvprops);
++				return (EINVAL);
++			}
++
++			if ((error = nvlist_lookup_uint64(nvprops,
++			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
++			    &volblocksize)) != 0 && error != ENOENT) {
++				nvlist_free(nvprops);
++				return (EINVAL);
++			}
++
++			if (error != 0)
++				volblocksize = zfs_prop_default_numeric(
++				    ZFS_PROP_VOLBLOCKSIZE);
++
++			if ((error = zvol_check_volblocksize(
++			    volblocksize)) != 0 ||
++			    (error = zvol_check_volsize(volsize,
++			    volblocksize)) != 0) {
++				nvlist_free(nvprops);
++				return (error);
++			}
++		} else if (type == DMU_OST_ZFS) {
++			int error;
++
++			/*
++			 * We have to have normalization and
++			 * case-folding flags correct when we do the
++			 * file system creation, so go figure them out
++			 * now.
++			 */
++			VERIFY(nvlist_alloc(&zct.zct_zplprops,
++			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
++			error = zfs_fill_zplprops(zc->zc_name, nvprops,
++			    zct.zct_zplprops, &is_insensitive);
++			if (error != 0) {
++				nvlist_free(nvprops);
++				nvlist_free(zct.zct_zplprops);
++				return (error);
++			}
++		}
++		error = dmu_objset_create(zc->zc_name, type,
++		    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
++		nvlist_free(zct.zct_zplprops);
++	}
++
++	/*
++	 * It would be nice to do this atomically.
++	 */
++	if (error == 0) {
++		error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
++		    nvprops, NULL);
++		if (error != 0)
++			(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
++	}
++	nvlist_free(nvprops);
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name	name of filesystem
++ * zc_value	short name of snapshot
++ * zc_cookie	recursive flag
++ * zc_nvlist_src[_size] property list
++ *
++ * outputs:
++ * zc_value	short snapname (i.e. part after the '@')
++ */
++static int
++zfs_ioc_snapshot(zfs_cmd_t *zc)
++{
++	nvlist_t *nvprops = NULL;
++	int error;
++	boolean_t recursive = zc->zc_cookie;
++
++	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
++		return (EINVAL);
++
++	if (zc->zc_nvlist_src != 0 &&
++	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
++	    zc->zc_iflags, &nvprops)) != 0)
++		return (error);
++
++	error = zfs_check_userprops(zc->zc_name, nvprops);
++	if (error)
++		goto out;
++
++	if (!nvlist_empty(nvprops) &&
++	    zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
++		error = ENOTSUP;
++		goto out;
++	}
++
++	error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL,
++	    nvprops, recursive, B_FALSE, -1);
++
++out:
++	nvlist_free(nvprops);
++	return (error);
++}
++
++/*
++ * inputs:
++ * name		dataset name, or when 'arg == NULL' the full snapshot name
++ * arg		short snapshot name (i.e. part after the '@')
++ */
++int
++zfs_unmount_snap(const char *name, void *arg)
++{
++	zfs_sb_t *zsb = NULL;
++	char *dsname;
++	char *snapname;
++	char *fullname;
++	char *ptr;
++	int error;
++
++	if (arg) {
++		dsname = strdup(name);
++		snapname = strdup(arg);
++	} else {
++		ptr = strchr(name, '@');
++		if (ptr) {
++			dsname = strdup(name);
++			dsname[ptr - name] = '\0';
++			snapname = strdup(ptr + 1);
++		} else {
++			return (0);
++		}
++	}
++
++	fullname = kmem_asprintf("%s@%s", dsname, snapname);
++
++	error = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE);
++	if (error == 0) {
++		error = zfsctl_unmount_snapshot(zsb, fullname, MNT_FORCE);
++		zfs_sb_rele(zsb, FTAG);
++
++		/* Allow ENOENT for consistency with upstream */
++		if (error == ENOENT)
++			error = 0;
++	}
++
++	strfree(dsname);
++	strfree(snapname);
++	strfree(fullname);
++
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem, snaps must be under it
++ * zc_nvlist_src[_size]	full names of snapshots to destroy
++ * zc_defer_destroy	mark for deferred destroy
++ *
++ * outputs:
++ * zc_name		on failure, name of failed snapshot
++ */
++static int
++zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc)
++{
++	int err, len;
++	nvlist_t *nvl;
++	nvpair_t *pair;
++
++	if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
++	    zc->zc_iflags, &nvl)) != 0)
++		return (err);
++
++	len = strlen(zc->zc_name);
++	for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL;
++	    pair = nvlist_next_nvpair(nvl, pair)) {
++		const char *name = nvpair_name(pair);
++		/*
++		 * The snap name must be underneath the zc_name.  This ensures
++		 * that our permission checks were legitimate.
++		 */
++		if (strncmp(zc->zc_name, name, len) != 0 ||
++		    (name[len] != '@' && name[len] != '/')) {
++			nvlist_free(nvl);
++			return (EINVAL);
++		}
++
++		(void) zfs_unmount_snap(name, NULL);
++		(void) zvol_remove_minor(name);
++	}
++
++	err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy,
++	    zc->zc_name);
++	nvlist_free(nvl);
++	return (err);
++}
++
++/*
++ * inputs:
++ * zc_name		name of dataset to destroy
++ * zc_objset_type	type of objset
++ * zc_defer_destroy	mark for deferred destroy
++ *
++ * outputs:		none
++ */
++static int
++zfs_ioc_destroy(zfs_cmd_t *zc)
++{
++	int err;
++	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
++		err = zfs_unmount_snap(zc->zc_name, NULL);
++		if (err)
++			return (err);
++	}
++
++	err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
++	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
++		(void) zvol_remove_minor(zc->zc_name);
++	return (err);
++}
++
++/*
++ * inputs:
++ * zc_name	name of dataset to rollback (to most recent snapshot)
++ *
++ * outputs:	none
++ */
++static int
++zfs_ioc_rollback(zfs_cmd_t *zc)
++{
++	dsl_dataset_t *ds, *clone;
++	int error;
++	zfs_sb_t *zsb;
++	char *clone_name;
++
++	error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
++	if (error)
++		return (error);
++
++	/* must not be a snapshot */
++	if (dsl_dataset_is_snapshot(ds)) {
++		dsl_dataset_rele(ds, FTAG);
++		return (EINVAL);
++	}
++
++	/* must have a most recent snapshot */
++	if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
++		dsl_dataset_rele(ds, FTAG);
++		return (EINVAL);
++	}
++
++	/*
++	 * Create clone of most recent snapshot.
++	 */
++	clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
++	error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
++	if (error)
++		goto out;
++
++	error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
++	if (error)
++		goto out;
++
++	/*
++	 * Do clone swap.
++	 */
++	if (get_zfs_sb(zc->zc_name, &zsb) == 0) {
++		error = zfs_suspend_fs(zsb);
++		if (error == 0) {
++			int resume_err;
++
++			if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
++				error = dsl_dataset_clone_swap(clone, ds,
++				    B_TRUE);
++				dsl_dataset_disown(ds, FTAG);
++				ds = NULL;
++			} else {
++				error = EBUSY;
++			}
++			resume_err = zfs_resume_fs(zsb, zc->zc_name);
++			error = error ? error : resume_err;
++		}
++		deactivate_super(zsb->z_sb);
++	} else {
++		if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
++			error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
++			dsl_dataset_disown(ds, FTAG);
++			ds = NULL;
++		} else {
++			error = EBUSY;
++		}
++	}
++
++	/*
++	 * Destroy clone (which also closes it).
++	 */
++	(void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
++
++out:
++	strfree(clone_name);
++	if (ds)
++		dsl_dataset_rele(ds, FTAG);
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name	old name of dataset
++ * zc_value	new name of dataset
++ * zc_cookie	recursive flag (only valid for snapshots)
++ *
++ * outputs:	none
++ */
++static int
++zfs_ioc_rename(zfs_cmd_t *zc)
++{
++	boolean_t recursive = zc->zc_cookie & 1;
++	int err;
++
++	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
++	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
++	    strchr(zc->zc_value, '%'))
++		return (EINVAL);
++
++	/*
++	 * Unmount snapshot unless we're doing a recursive rename,
++	 * in which case the dataset code figures out which snapshots
++	 * to unmount.
++	 */
++	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
++	    zc->zc_objset_type == DMU_OST_ZFS) {
++		err = zfs_unmount_snap(zc->zc_name, NULL);
++		if (err)
++			return (err);
++	}
++
++	err = dmu_objset_rename(zc->zc_name, zc->zc_value, recursive);
++	if ((err == 0) && (zc->zc_objset_type == DMU_OST_ZVOL)) {
++		(void) zvol_remove_minor(zc->zc_name);
++		(void) zvol_create_minor(zc->zc_value);
++	}
++
++	return (err);
++}
++
++static int
++zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
++{
++	const char *propname = nvpair_name(pair);
++	boolean_t issnap = (strchr(dsname, '@') != NULL);
++	zfs_prop_t prop = zfs_name_to_prop(propname);
++	uint64_t intval;
++	int err;
++
++	if (prop == ZPROP_INVAL) {
++		if (zfs_prop_user(propname)) {
++			if ((err = zfs_secpolicy_write_perms(dsname,
++			    ZFS_DELEG_PERM_USERPROP, cr)))
++				return (err);
++			return (0);
++		}
++
++		if (!issnap && zfs_prop_userquota(propname)) {
++			const char *perm = NULL;
++			const char *uq_prefix =
++			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
++			const char *gq_prefix =
++			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
++
++			if (strncmp(propname, uq_prefix,
++			    strlen(uq_prefix)) == 0) {
++				perm = ZFS_DELEG_PERM_USERQUOTA;
++			} else if (strncmp(propname, gq_prefix,
++			    strlen(gq_prefix)) == 0) {
++				perm = ZFS_DELEG_PERM_GROUPQUOTA;
++			} else {
++				/* USERUSED and GROUPUSED are read-only */
++				return (EINVAL);
++			}
++
++			if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
++				return (err);
++			return (0);
++		}
++
++		return (EINVAL);
++	}
++
++	if (issnap)
++		return (EINVAL);
++
++	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
++		/*
++		 * dsl_prop_get_all_impl() returns properties in this
++		 * format.
++		 */
++		nvlist_t *attrs;
++		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
++		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
++		    &pair) == 0);
++	}
++
++	/*
++	 * Check that this value is valid for this pool version
++	 */
++	switch (prop) {
++	case ZFS_PROP_COMPRESSION:
++		/*
++		 * If the user specified gzip compression, make sure
++		 * the SPA supports it. We ignore any errors here since
++		 * we'll catch them later.
++		 */
++		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
++		    nvpair_value_uint64(pair, &intval) == 0) {
++			if (intval >= ZIO_COMPRESS_GZIP_1 &&
++			    intval <= ZIO_COMPRESS_GZIP_9 &&
++			    zfs_earlier_version(dsname,
++			    SPA_VERSION_GZIP_COMPRESSION)) {
++				return (ENOTSUP);
++			}
++
++			if (intval == ZIO_COMPRESS_ZLE &&
++			    zfs_earlier_version(dsname,
++			    SPA_VERSION_ZLE_COMPRESSION))
++				return (ENOTSUP);
++
++			/*
++			 * If this is a bootable dataset then
++			 * verify that the compression algorithm
++			 * is supported for booting. We must return
++			 * something other than ENOTSUP since it
++			 * implies a downrev pool version.
++			 */
++			if (zfs_is_bootfs(dsname) &&
++			    !BOOTFS_COMPRESS_VALID(intval)) {
++				return (ERANGE);
++			}
++		}
++		break;
++
++	case ZFS_PROP_COPIES:
++		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
++			return (ENOTSUP);
++		break;
++
++	case ZFS_PROP_DEDUP:
++		if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
++			return (ENOTSUP);
++		break;
++
++	case ZFS_PROP_SHARESMB:
++		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
++			return (ENOTSUP);
++		break;
++
++	case ZFS_PROP_ACLINHERIT:
++		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
++		    nvpair_value_uint64(pair, &intval) == 0) {
++			if (intval == ZFS_ACL_PASSTHROUGH_X &&
++			    zfs_earlier_version(dsname,
++			    SPA_VERSION_PASSTHROUGH_X))
++				return (ENOTSUP);
++		}
++		break;
++	default:
++		break;
++	}
++
++	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
++}
++
++/*
++ * Removes properties from the given props list that fail permission checks
++ * needed to clear them and to restore them in case of a receive error. For each
++ * property, make sure we have both set and inherit permissions.
++ *
++ * Returns the first error encountered if any permission checks fail. If the
++ * caller provides a non-NULL errlist, it also gives the complete list of names
++ * of all the properties that failed a permission check along with the
++ * corresponding error numbers. The caller is responsible for freeing the
++ * returned errlist.
++ *
++ * If every property checks out successfully, zero is returned and the list
++ * pointed at by errlist is NULL.
++ */
++static int
++zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
++{
++	zfs_cmd_t *zc;
++	nvpair_t *pair, *next_pair;
++	nvlist_t *errors;
++	int err, rv = 0;
++
++	if (props == NULL)
++		return (0);
++
++	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++
++	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG);
++	(void) strcpy(zc->zc_name, dataset);
++	pair = nvlist_next_nvpair(props, NULL);
++	while (pair != NULL) {
++		next_pair = nvlist_next_nvpair(props, pair);
++
++		(void) strcpy(zc->zc_value, nvpair_name(pair));
++		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
++		    (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
++			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
++			VERIFY(nvlist_add_int32(errors,
++			    zc->zc_value, err) == 0);
++		}
++		pair = next_pair;
++	}
++	kmem_free(zc, sizeof (zfs_cmd_t));
++
++	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
++		nvlist_free(errors);
++		errors = NULL;
++	} else {
++		VERIFY(nvpair_value_int32(pair, &rv) == 0);
++	}
++
++	if (errlist == NULL)
++		nvlist_free(errors);
++	else
++		*errlist = errors;
++
++	return (rv);
++}
++
++static boolean_t
++propval_equals(nvpair_t *p1, nvpair_t *p2)
++{
++	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
++		/* dsl_prop_get_all_impl() format */
++		nvlist_t *attrs;
++		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
++		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
++		    &p1) == 0);
++	}
++
++	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
++		nvlist_t *attrs;
++		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
++		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
++		    &p2) == 0);
++	}
++
++	if (nvpair_type(p1) != nvpair_type(p2))
++		return (B_FALSE);
++
++	if (nvpair_type(p1) == DATA_TYPE_STRING) {
++		char *valstr1, *valstr2;
++
++		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
++		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
++		return (strcmp(valstr1, valstr2) == 0);
++	} else {
++		uint64_t intval1, intval2;
++
++		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
++		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
++		return (intval1 == intval2);
++	}
++}
++
++/*
++ * Remove properties from props if they are not going to change (as determined
++ * by comparison with origprops). Remove them from origprops as well, since we
++ * do not need to clear or restore properties that won't change.
++ */
++static void
++props_reduce(nvlist_t *props, nvlist_t *origprops)
++{
++	nvpair_t *pair, *next_pair;
++
++	if (origprops == NULL)
++		return; /* all props need to be received */
++
++	pair = nvlist_next_nvpair(props, NULL);
++	while (pair != NULL) {
++		const char *propname = nvpair_name(pair);
++		nvpair_t *match;
++
++		next_pair = nvlist_next_nvpair(props, pair);
++
++		if ((nvlist_lookup_nvpair(origprops, propname,
++		    &match) != 0) || !propval_equals(pair, match))
++			goto next; /* need to set received value */
++
++		/* don't clear the existing received value */
++		(void) nvlist_remove_nvpair(origprops, match);
++		/* don't bother receiving the property */
++		(void) nvlist_remove_nvpair(props, pair);
++next:
++		pair = next_pair;
++	}
++}
++
++#ifdef	DEBUG
++static boolean_t zfs_ioc_recv_inject_err;
++#endif
++
++/*
++ * inputs:
++ * zc_name		name of containing filesystem
++ * zc_nvlist_src{_size}	nvlist of properties to apply
++ * zc_value		name of snapshot to create
++ * zc_string		name of clone origin (if DRR_FLAG_CLONE)
++ * zc_cookie		file descriptor to recv from
++ * zc_begin_record	the BEGIN record of the stream (not byteswapped)
++ * zc_guid		force flag
++ * zc_cleanup_fd	cleanup-on-exit file descriptor
++ * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
++ *
++ * outputs:
++ * zc_cookie		number of bytes read
++ * zc_nvlist_dst{_size} error for each unapplied received property
++ * zc_obj		zprop_errflags_t
++ * zc_action_handle	handle for this guid/ds mapping
++ */
++static int
++zfs_ioc_recv(zfs_cmd_t *zc)
++{
++	file_t *fp;
++	objset_t *os;
++	dmu_recv_cookie_t drc;
++	boolean_t force = (boolean_t)zc->zc_guid;
++	int fd;
++	int error = 0;
++	int props_error = 0;
++	nvlist_t *errors;
++	offset_t off;
++	nvlist_t *props = NULL; /* sent properties */
++	nvlist_t *origprops = NULL; /* existing properties */
++	objset_t *origin = NULL;
++	char *tosnap;
++	char tofs[ZFS_MAXNAMELEN];
++	boolean_t first_recvd_props = B_FALSE;
++
++	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
++	    strchr(zc->zc_value, '@') == NULL ||
++	    strchr(zc->zc_value, '%'))
++		return (EINVAL);
++
++	(void) strcpy(tofs, zc->zc_value);
++	tosnap = strchr(tofs, '@');
++	*tosnap++ = '\0';
++
++	if (zc->zc_nvlist_src != 0 &&
++	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
++	    zc->zc_iflags, &props)) != 0)
++		return (error);
++
++	fd = zc->zc_cookie;
++	fp = getf(fd);
++	if (fp == NULL) {
++		nvlist_free(props);
++		return (EBADF);
++	}
++
++	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
++
++	if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
++		if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
++		    !dsl_prop_get_hasrecvd(os)) {
++			first_recvd_props = B_TRUE;
++		}
++
++		/*
++		 * If new received properties are supplied, they are to
++		 * completely replace the existing received properties, so stash
++		 * away the existing ones.
++		 */
++		if (dsl_prop_get_received(os, &origprops) == 0) {
++			nvlist_t *errlist = NULL;
++			/*
++			 * Don't bother writing a property if its value won't
++			 * change (and avoid the unnecessary security checks).
++			 *
++			 * The first receive after SPA_VERSION_RECVD_PROPS is a
++			 * special case where we blow away all local properties
++			 * regardless.
++			 */
++			if (!first_recvd_props)
++				props_reduce(props, origprops);
++			if (zfs_check_clearable(tofs, origprops,
++			    &errlist) != 0)
++				(void) nvlist_merge(errors, errlist, 0);
++			nvlist_free(errlist);
++		}
++
++		dmu_objset_rele(os, FTAG);
++	}
++
++	if (zc->zc_string[0]) {
++		error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
++		if (error)
++			goto out;
++	}
++
++	error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
++	    &zc->zc_begin_record, force, origin, &drc);
++	if (origin)
++		dmu_objset_rele(origin, FTAG);
++	if (error)
++		goto out;
++
++	/*
++	 * Set properties before we receive the stream so that they are applied
++	 * to the new data. Note that we must call dmu_recv_stream() if
++	 * dmu_recv_begin() succeeds.
++	 */
++	if (props) {
++		nvlist_t *errlist;
++
++		if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
++			if (drc.drc_newfs) {
++				if (spa_version(os->os_spa) >=
++				    SPA_VERSION_RECVD_PROPS)
++					first_recvd_props = B_TRUE;
++			} else if (origprops != NULL) {
++				if (clear_received_props(os, tofs, origprops,
++				    first_recvd_props ? NULL : props) != 0)
++					zc->zc_obj |= ZPROP_ERR_NOCLEAR;
++			} else {
++				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
++			}
++			dsl_prop_set_hasrecvd(os);
++		} else if (!drc.drc_newfs) {
++			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
++		}
++
++		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
++		    props, &errlist);
++		(void) nvlist_merge(errors, errlist, 0);
++		nvlist_free(errlist);
++	}
++
++	if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
++		/*
++		 * Caller made zc->zc_nvlist_dst less than the minimum expected
++		 * size or supplied an invalid address.
++		 */
++		props_error = EINVAL;
++	}
++
++	off = fp->f_offset;
++	error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
++	    &zc->zc_action_handle);
++
++	if (error == 0) {
++		zfs_sb_t *zsb = NULL;
++
++		if (get_zfs_sb(tofs, &zsb) == 0) {
++			/* online recv */
++			int end_err;
++
++			error = zfs_suspend_fs(zsb);
++			/*
++			 * If the suspend fails, then the recv_end will
++			 * likely also fail, and clean up after itself.
++			 */
++			end_err = dmu_recv_end(&drc);
++			if (error == 0)
++				error = zfs_resume_fs(zsb, tofs);
++			error = error ? error : end_err;
++			deactivate_super(zsb->z_sb);
++		} else {
++			error = dmu_recv_end(&drc);
++		}
++	}
++
++	zc->zc_cookie = off - fp->f_offset;
++	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
++		fp->f_offset = off;
++
++#ifdef	DEBUG
++	if (zfs_ioc_recv_inject_err) {
++		zfs_ioc_recv_inject_err = B_FALSE;
++		error = 1;
++	}
++#endif
++	/*
++	 * On error, restore the original props.
++	 */
++	if (error && props) {
++		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
++			if (clear_received_props(os, tofs, props, NULL) != 0) {
++				/*
++				 * We failed to clear the received properties.
++				 * Since we may have left a $recvd value on the
++				 * system, we can't clear the $hasrecvd flag.
++				 */
++				zc->zc_obj |= ZPROP_ERR_NORESTORE;
++			} else if (first_recvd_props) {
++				dsl_prop_unset_hasrecvd(os);
++			}
++			dmu_objset_rele(os, FTAG);
++		} else if (!drc.drc_newfs) {
++			/* We failed to clear the received properties. */
++			zc->zc_obj |= ZPROP_ERR_NORESTORE;
++		}
++
++		if (origprops == NULL && !drc.drc_newfs) {
++			/* We failed to stash the original properties. */
++			zc->zc_obj |= ZPROP_ERR_NORESTORE;
++		}
++
++		/*
++		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
++		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
++		 * explictly if we're restoring local properties cleared in the
++		 * first new-style receive.
++		 */
++		if (origprops != NULL &&
++		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
++		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
++		    origprops, NULL) != 0) {
++			/*
++			 * We stashed the original properties but failed to
++			 * restore them.
++			 */
++			zc->zc_obj |= ZPROP_ERR_NORESTORE;
++		}
++	}
++out:
++	nvlist_free(props);
++	nvlist_free(origprops);
++	nvlist_free(errors);
++	releasef(fd);
++
++	if (error == 0)
++		error = props_error;
++
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name	name of snapshot to send
++ * zc_cookie	file descriptor to send stream to
++ * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
++ * zc_sendobj	objsetid of snapshot to send
++ * zc_fromobj	objsetid of incremental fromsnap (may be zero)
++ * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
++ *		output size in zc_objset_type.
++ *
++ * outputs: none
++ */
++static int
++zfs_ioc_send(zfs_cmd_t *zc)
++{
++	objset_t *fromsnap = NULL;
++	objset_t *tosnap;
++	int error;
++	offset_t off;
++	dsl_dataset_t *ds;
++	dsl_dataset_t *dsfrom = NULL;
++	spa_t *spa;
++	dsl_pool_t *dp;
++	boolean_t estimate = (zc->zc_guid != 0);
++
++	error = spa_open(zc->zc_name, &spa, FTAG);
++	if (error)
++		return (error);
++
++	dp = spa_get_dsl(spa);
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
++	rw_exit(&dp->dp_config_rwlock);
++	if (error) {
++		spa_close(spa, FTAG);
++		return (error);
++	}
++
++	error = dmu_objset_from_ds(ds, &tosnap);
++	if (error) {
++		dsl_dataset_rele(ds, FTAG);
++		spa_close(spa, FTAG);
++		return (error);
++	}
++
++	if (zc->zc_fromobj != 0) {
++		rw_enter(&dp->dp_config_rwlock, RW_READER);
++		error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);
++		rw_exit(&dp->dp_config_rwlock);
++		spa_close(spa, FTAG);
++		if (error) {
++			dsl_dataset_rele(ds, FTAG);
++			return (error);
++		}
++		error = dmu_objset_from_ds(dsfrom, &fromsnap);
++		if (error) {
++			dsl_dataset_rele(dsfrom, FTAG);
++			dsl_dataset_rele(ds, FTAG);
++			return (error);
++		}
++	} else {
++		spa_close(spa, FTAG);
++	}
++
++	if (estimate) {
++		error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj,
++		    &zc->zc_objset_type);
++	} else {
++		file_t *fp = getf(zc->zc_cookie);
++		if (fp == NULL) {
++			dsl_dataset_rele(ds, FTAG);
++			if (dsfrom)
++				dsl_dataset_rele(dsfrom, FTAG);
++			return (EBADF);
++		}
++
++		off = fp->f_offset;
++		error = dmu_send(tosnap, fromsnap, zc->zc_obj,
++		    zc->zc_cookie, fp->f_vnode, &off);
++
++		if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
++			fp->f_offset = off;
++		releasef(zc->zc_cookie);
++	}
++	if (dsfrom)
++		dsl_dataset_rele(dsfrom, FTAG);
++	dsl_dataset_rele(ds, FTAG);
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name	name of snapshot on which to report progress
++ * zc_cookie	file descriptor of send stream
++ *
++ * outputs:
++ * zc_cookie	number of bytes written in send stream thus far
++ */
++static int
++zfs_ioc_send_progress(zfs_cmd_t *zc)
++{
++	dsl_dataset_t *ds;
++	dmu_sendarg_t *dsp = NULL;
++	int error;
++
++	if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0)
++		return (error);
++
++	mutex_enter(&ds->ds_sendstream_lock);
++
++	/*
++	 * Iterate over all the send streams currently active on this dataset.
++	 * If there's one which matches the specified file descriptor _and_ the
++	 * stream was started by the current process, return the progress of
++	 * that stream.
++	 */
++
++	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
++	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
++		if (dsp->dsa_outfd == zc->zc_cookie &&
++		    dsp->dsa_proc->group_leader == curproc->group_leader)
++			break;
++	}
++
++	if (dsp != NULL)
++		zc->zc_cookie = *(dsp->dsa_off);
++	else
++		error = ENOENT;
++
++	mutex_exit(&ds->ds_sendstream_lock);
++	dsl_dataset_rele(ds, FTAG);
++	return (error);
++}
++
++static int
++zfs_ioc_inject_fault(zfs_cmd_t *zc)
++{
++	int id, error;
++
++	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
++	    &zc->zc_inject_record);
++
++	if (error == 0)
++		zc->zc_guid = (uint64_t)id;
++
++	return (error);
++}
++
++static int
++zfs_ioc_clear_fault(zfs_cmd_t *zc)
++{
++	return (zio_clear_fault((int)zc->zc_guid));
++}
++
++static int
++zfs_ioc_inject_list_next(zfs_cmd_t *zc)
++{
++	int id = (int)zc->zc_guid;
++	int error;
++
++	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
++	    &zc->zc_inject_record);
++
++	zc->zc_guid = id;
++
++	return (error);
++}
++
++static int
++zfs_ioc_error_log(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	int error;
++	size_t count = (size_t)zc->zc_nvlist_dst_size;
++
++	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
++		return (error);
++
++	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
++	    &count);
++	if (error == 0)
++		zc->zc_nvlist_dst_size = count;
++	else
++		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
++
++	spa_close(spa, FTAG);
++
++	return (error);
++}
++
++static int
++zfs_ioc_clear(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	vdev_t *vd;
++	int error;
++
++	/*
++	 * On zpool clear we also fix up missing slogs
++	 */
++	mutex_enter(&spa_namespace_lock);
++	spa = spa_lookup(zc->zc_name);
++	if (spa == NULL) {
++		mutex_exit(&spa_namespace_lock);
++		return (EIO);
++	}
++	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
++		/* we need to let spa_open/spa_load clear the chains */
++		spa_set_log_state(spa, SPA_LOG_CLEAR);
++	}
++	spa->spa_last_open_failed = 0;
++	mutex_exit(&spa_namespace_lock);
++
++	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
++		error = spa_open(zc->zc_name, &spa, FTAG);
++	} else {
++		nvlist_t *policy;
++		nvlist_t *config = NULL;
++
++		if (zc->zc_nvlist_src == 0)
++			return (EINVAL);
++
++		if ((error = get_nvlist(zc->zc_nvlist_src,
++		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
++			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
++			    policy, &config);
++			if (config != NULL) {
++				int err;
++
++				if ((err = put_nvlist(zc, config)) != 0)
++					error = err;
++				nvlist_free(config);
++			}
++			nvlist_free(policy);
++		}
++	}
++
++	if (error)
++		return (error);
++
++	spa_vdev_state_enter(spa, SCL_NONE);
++
++	if (zc->zc_guid == 0) {
++		vd = NULL;
++	} else {
++		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
++		if (vd == NULL) {
++			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
++			spa_close(spa, FTAG);
++			return (ENODEV);
++		}
++	}
++
++	vdev_clear(spa, vd);
++
++	(void) spa_vdev_state_exit(spa, NULL, 0);
++
++	/*
++	 * Resume any suspended I/Os.
++	 */
++	if (zio_resume(spa) != 0)
++		error = EIO;
++
++	spa_close(spa, FTAG);
++
++	return (error);
++}
++
++static int
++zfs_ioc_pool_reopen(zfs_cmd_t *zc)
++{
++	spa_t *spa;
++	int error;
++
++	error = spa_open(zc->zc_name, &spa, FTAG);
++	if (error)
++		return (error);
++
++	spa_vdev_state_enter(spa, SCL_NONE);
++
++	/*
++	 * If a resilver is already in progress then set the
++	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
++	 * the scan as a side effect of the reopen. Otherwise, let
++	 * vdev_open() decided if a resilver is required.
++	 */
++	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
++	vdev_reopen(spa->spa_root_vdev);
++	spa->spa_scrub_reopen = B_FALSE;
++
++	(void) spa_vdev_state_exit(spa, NULL, 0);
++	spa_close(spa, FTAG);
++	return (0);
++}
++/*
++ * inputs:
++ * zc_name	name of filesystem
++ * zc_value	name of origin snapshot
++ *
++ * outputs:
++ * zc_string	name of conflicting snapshot, if there is one
++ */
++static int
++zfs_ioc_promote(zfs_cmd_t *zc)
++{
++	char *cp;
++
++	/*
++	 * We don't need to unmount *all* the origin fs's snapshots, but
++	 * it's easier.
++	 */
++	cp = strchr(zc->zc_value, '@');
++	if (cp)
++		*cp = '\0';
++	(void) dmu_objset_find(zc->zc_value,
++	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
++	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
++}
++
++/*
++ * Retrieve a single {user|group}{used|quota}@... property.
++ *
++ * inputs:
++ * zc_name	name of filesystem
++ * zc_objset_type zfs_userquota_prop_t
++ * zc_value	domain name (eg. "S-1-234-567-89")
++ * zc_guid	RID/UID/GID
++ *
++ * outputs:
++ * zc_cookie	property value
++ */
++static int
++zfs_ioc_userspace_one(zfs_cmd_t *zc)
++{
++	zfs_sb_t *zsb;
++	int error;
++
++	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
++		return (EINVAL);
++
++	error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE);
++	if (error)
++		return (error);
++
++	error = zfs_userspace_one(zsb,
++	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
++	zfs_sb_rele(zsb, FTAG);
++
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ * zc_cookie		zap cursor
++ * zc_objset_type	zfs_userquota_prop_t
++ * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
++ *
++ * outputs:
++ * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
++ * zc_cookie	zap cursor
++ */
++static int
++zfs_ioc_userspace_many(zfs_cmd_t *zc)
++{
++	zfs_sb_t *zsb;
++	int bufsize = zc->zc_nvlist_dst_size;
++	int error;
++	void *buf;
++
++	if (bufsize <= 0)
++		return (ENOMEM);
++
++	error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE);
++	if (error)
++		return (error);
++
++	buf = vmem_alloc(bufsize, KM_SLEEP);
++
++	error = zfs_userspace_many(zsb, zc->zc_objset_type, &zc->zc_cookie,
++	    buf, &zc->zc_nvlist_dst_size);
++
++	if (error == 0) {
++		error = xcopyout(buf,
++		    (void *)(uintptr_t)zc->zc_nvlist_dst,
++		    zc->zc_nvlist_dst_size);
++	}
++	vmem_free(buf, bufsize);
++	zfs_sb_rele(zsb, FTAG);
++
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ *
++ * outputs:
++ * none
++ */
++static int
++zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
++{
++	objset_t *os;
++	int error = 0;
++	zfs_sb_t *zsb;
++
++	if (get_zfs_sb(zc->zc_name, &zsb) == 0) {
++		if (!dmu_objset_userused_enabled(zsb->z_os)) {
++			/*
++			 * If userused is not enabled, it may be because the
++			 * objset needs to be closed & reopened (to grow the
++			 * objset_phys_t).  Suspend/resume the fs will do that.
++			 */
++			error = zfs_suspend_fs(zsb);
++			if (error == 0)
++				error = zfs_resume_fs(zsb, zc->zc_name);
++		}
++		if (error == 0)
++			error = dmu_objset_userspace_upgrade(zsb->z_os);
++		deactivate_super(zsb->z_sb);
++	} else {
++		/* XXX kind of reading contents without owning */
++		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
++		if (error)
++			return (error);
++
++		error = dmu_objset_userspace_upgrade(os);
++		dmu_objset_rele(os, FTAG);
++	}
++
++	return (error);
++}
++
++static int
++zfs_ioc_share(zfs_cmd_t *zc)
++{
++	return (ENOSYS);
++}
++
++ace_t full_access[] = {
++	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
++};
++
++/*
++ * inputs:
++ * zc_name		name of containing filesystem
++ * zc_obj		object # beyond which we want next in-use object #
++ *
++ * outputs:
++ * zc_obj		next in-use object #
++ */
++static int
++zfs_ioc_next_obj(zfs_cmd_t *zc)
++{
++	objset_t *os = NULL;
++	int error;
++
++	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
++	if (error)
++		return (error);
++
++	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
++	    os->os_dsl_dataset->ds_phys->ds_prev_snap_txg);
++
++	dmu_objset_rele(os, FTAG);
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ * zc_value		prefix name for snapshot
++ * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
++ *
++ * outputs:
++ */
++static int
++zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
++{
++	char *snap_name;
++	int error;
++
++	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
++	    (u_longlong_t)ddi_get_lbolt64());
++
++	if (strlen(snap_name) >= MAXNAMELEN) {
++		strfree(snap_name);
++		return (E2BIG);
++	}
++
++	error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name,
++	    NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd);
++	if (error != 0) {
++		strfree(snap_name);
++		return (error);
++	}
++
++	(void) strcpy(zc->zc_value, snap_name);
++	strfree(snap_name);
++	return (0);
++}
++
++/*
++ * inputs:
++ * zc_name		name of "to" snapshot
++ * zc_value		name of "from" snapshot
++ * zc_cookie		file descriptor to write diff data on
++ *
++ * outputs:
++ * dmu_diff_record_t's to the file descriptor
++ */
++static int
++zfs_ioc_diff(zfs_cmd_t *zc)
++{
++	objset_t *fromsnap;
++	objset_t *tosnap;
++	file_t *fp;
++	offset_t off;
++	int error;
++
++	error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
++	if (error)
++		return (error);
++
++	error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap);
++	if (error) {
++		dmu_objset_rele(tosnap, FTAG);
++		return (error);
++	}
++
++	fp = getf(zc->zc_cookie);
++	if (fp == NULL) {
++		dmu_objset_rele(fromsnap, FTAG);
++		dmu_objset_rele(tosnap, FTAG);
++		return (EBADF);
++	}
++
++	off = fp->f_offset;
++
++	error = dmu_diff(tosnap, fromsnap, fp->f_vnode, &off);
++
++	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
++		fp->f_offset = off;
++	releasef(zc->zc_cookie);
++
++	dmu_objset_rele(fromsnap, FTAG);
++	dmu_objset_rele(tosnap, FTAG);
++	return (error);
++}
++
++/*
++ * Remove all ACL files in shares dir
++ */
++#ifdef HAVE_SMB_SHARE
++static int
++zfs_smb_acl_purge(znode_t *dzp)
++{
++	zap_cursor_t	zc;
++	zap_attribute_t	zap;
++	zfs_sb_t *zsb = ZTOZSB(dzp);
++	int error;
++
++	for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id);
++	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
++	    zap_cursor_advance(&zc)) {
++		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
++		    NULL, 0)) != 0)
++			break;
++	}
++	zap_cursor_fini(&zc);
++	return (error);
++}
++#endif /* HAVE_SMB_SHARE */
++
++static int
++zfs_ioc_smb_acl(zfs_cmd_t *zc)
++{
++#ifdef HAVE_SMB_SHARE
++	vnode_t *vp;
++	znode_t *dzp;
++	vnode_t *resourcevp = NULL;
++	znode_t *sharedir;
++	zfs_sb_t *zsb;
++	nvlist_t *nvlist;
++	char *src, *target;
++	vattr_t vattr;
++	vsecattr_t vsec;
++	int error = 0;
++
++	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
++	    NO_FOLLOW, NULL, &vp)) != 0)
++		return (error);
++
++	/* Now make sure mntpnt and dataset are ZFS */
++
++	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
++	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
++	    zc->zc_name) != 0)) {
++		VN_RELE(vp);
++		return (EINVAL);
++	}
++
++	dzp = VTOZ(vp);
++	zsb = ZTOZSB(dzp);
++	ZFS_ENTER(zsb);
++
++	/*
++	 * Create share dir if its missing.
++	 */
++	mutex_enter(&zsb->z_lock);
++	if (zsb->z_shares_dir == 0) {
++		dmu_tx_t *tx;
++
++		tx = dmu_tx_create(zsb->z_os);
++		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
++		    ZFS_SHARES_DIR);
++		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
++		error = dmu_tx_assign(tx, TXG_WAIT);
++		if (error) {
++			dmu_tx_abort(tx);
++		} else {
++			error = zfs_create_share_dir(zsb, tx);
++			dmu_tx_commit(tx);
++		}
++		if (error) {
++			mutex_exit(&zsb->z_lock);
++			VN_RELE(vp);
++			ZFS_EXIT(zsb);
++			return (error);
++		}
++	}
++	mutex_exit(&zsb->z_lock);
++
++	ASSERT(zsb->z_shares_dir);
++	if ((error = zfs_zget(zsb, zsb->z_shares_dir, &sharedir)) != 0) {
++		VN_RELE(vp);
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	switch (zc->zc_cookie) {
++	case ZFS_SMB_ACL_ADD:
++		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
++		vattr.va_mode = S_IFREG|0777;
++		vattr.va_uid = 0;
++		vattr.va_gid = 0;
++
++		vsec.vsa_mask = VSA_ACE;
++		vsec.vsa_aclentp = &full_access;
++		vsec.vsa_aclentsz = sizeof (full_access);
++		vsec.vsa_aclcnt = 1;
++
++		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
++		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
++		if (resourcevp)
++			VN_RELE(resourcevp);
++		break;
++
++	case ZFS_SMB_ACL_REMOVE:
++		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
++		    NULL, 0);
++		break;
++
++	case ZFS_SMB_ACL_RENAME:
++		if ((error = get_nvlist(zc->zc_nvlist_src,
++		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
++			VN_RELE(vp);
++			ZFS_EXIT(zsb);
++			return (error);
++		}
++		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
++		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
++		    &target)) {
++			VN_RELE(vp);
++			VN_RELE(ZTOV(sharedir));
++			ZFS_EXIT(zsb);
++			nvlist_free(nvlist);
++			return (error);
++		}
++		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
++		    kcred, NULL, 0);
++		nvlist_free(nvlist);
++		break;
++
++	case ZFS_SMB_ACL_PURGE:
++		error = zfs_smb_acl_purge(sharedir);
++		break;
++
++	default:
++		error = EINVAL;
++		break;
++	}
++
++	VN_RELE(vp);
++	VN_RELE(ZTOV(sharedir));
++
++	ZFS_EXIT(zsb);
++
++	return (error);
++#else
++	return (ENOTSUP);
++#endif /* HAVE_SMB_SHARE */
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ * zc_value		short name of snap
++ * zc_string		user-supplied tag for this hold
++ * zc_cookie		recursive flag
++ * zc_temphold		set if hold is temporary
++ * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
++ * zc_sendobj		if non-zero, the objid for zc_name@zc_value
++ * zc_createtxg		if zc_sendobj is non-zero, snap must have zc_createtxg
++ *
++ * outputs:		none
++ */
++static int
++zfs_ioc_hold(zfs_cmd_t *zc)
++{
++	boolean_t recursive = zc->zc_cookie;
++	spa_t *spa;
++	dsl_pool_t *dp;
++	dsl_dataset_t *ds;
++	int error;
++	minor_t minor = 0;
++
++	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
++		return (EINVAL);
++
++	if (zc->zc_sendobj == 0) {
++		return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
++		    zc->zc_string, recursive, zc->zc_temphold,
++		    zc->zc_cleanup_fd));
++	}
++
++	if (recursive)
++		return (EINVAL);
++
++	error = spa_open(zc->zc_name, &spa, FTAG);
++	if (error)
++		return (error);
++
++	dp = spa_get_dsl(spa);
++	rw_enter(&dp->dp_config_rwlock, RW_READER);
++	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
++	rw_exit(&dp->dp_config_rwlock);
++	spa_close(spa, FTAG);
++	if (error)
++		return (error);
++
++	/*
++	 * Until we have a hold on this snapshot, it's possible that
++	 * zc_sendobj could've been destroyed and reused as part
++	 * of a later txg.  Make sure we're looking at the right object.
++	 */
++	if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) {
++		dsl_dataset_rele(ds, FTAG);
++		return (ENOENT);
++	}
++
++	if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) {
++		error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
++		if (error) {
++			dsl_dataset_rele(ds, FTAG);
++			return (error);
++		}
++	}
++
++	error = dsl_dataset_user_hold_for_send(ds, zc->zc_string,
++	    zc->zc_temphold);
++	if (minor != 0) {
++		if (error == 0) {
++			dsl_register_onexit_hold_cleanup(ds, zc->zc_string,
++			    minor);
++		}
++		zfs_onexit_fd_rele(zc->zc_cleanup_fd);
++	}
++	dsl_dataset_rele(ds, FTAG);
++
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name	name of dataset from which we're releasing a user hold
++ * zc_value	short name of snap
++ * zc_string	user-supplied tag for this hold
++ * zc_cookie	recursive flag
++ *
++ * outputs:	none
++ */
++static int
++zfs_ioc_release(zfs_cmd_t *zc)
++{
++	boolean_t recursive = zc->zc_cookie;
++
++	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
++		return (EINVAL);
++
++	return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
++	    zc->zc_string, recursive));
++}
++
++/*
++ * inputs:
++ * zc_name		name of filesystem
++ *
++ * outputs:
++ * zc_nvlist_src{_size}	nvlist of snapshot holds
++ */
++static int
++zfs_ioc_get_holds(zfs_cmd_t *zc)
++{
++	nvlist_t *nvp;
++	int error;
++
++	if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
++		error = put_nvlist(zc, nvp);
++		nvlist_free(nvp);
++	}
++
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_guid		flags (ZEVENT_NONBLOCK)
++ *
++ * outputs:
++ * zc_nvlist_dst	next nvlist event
++ * zc_cookie		dropped events since last get
++ * zc_cleanup_fd	cleanup-on-exit file descriptor
++ */
++static int
++zfs_ioc_events_next(zfs_cmd_t *zc)
++{
++	zfs_zevent_t *ze;
++	nvlist_t *event = NULL;
++	minor_t minor;
++	uint64_t dropped = 0;
++	int error;
++
++	error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
++	if (error != 0)
++		return (error);
++
++	do {
++		error = zfs_zevent_next(ze, &event,
++			&zc->zc_nvlist_dst_size, &dropped);
++		if (event != NULL) {
++			zc->zc_cookie = dropped;
++			error = put_nvlist(zc, event);
++			nvlist_free(event);
++		}
++
++		if (zc->zc_guid & ZEVENT_NONBLOCK)
++			break;
++
++		if ((error == 0) || (error != ENOENT))
++			break;
++
++		error = zfs_zevent_wait(ze);
++		if (error)
++			break;
++	} while (1);
++
++	zfs_zevent_fd_rele(zc->zc_cleanup_fd);
++
++	return (error);
++}
++
++/*
++ * outputs:
++ * zc_cookie		cleared events count
++ */
++static int
++zfs_ioc_events_clear(zfs_cmd_t *zc)
++{
++	int count;
++
++	zfs_zevent_drain_all(&count);
++	zc->zc_cookie = count;
++
++	return 0;
++}
++
++/*
++ * inputs:
++ * zc_name		name of new filesystem or snapshot
++ * zc_value		full name of old snapshot
++ *
++ * outputs:
++ * zc_cookie		space in bytes
++ * zc_objset_type	compressed space in bytes
++ * zc_perm_action	uncompressed space in bytes
++ */
++static int
++zfs_ioc_space_written(zfs_cmd_t *zc)
++{
++	int error;
++	dsl_dataset_t *new, *old;
++
++	error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
++	if (error != 0)
++		return (error);
++	error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
++	if (error != 0) {
++		dsl_dataset_rele(new, FTAG);
++		return (error);
++	}
++
++	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
++	    &zc->zc_objset_type, &zc->zc_perm_action);
++	dsl_dataset_rele(old, FTAG);
++	dsl_dataset_rele(new, FTAG);
++	return (error);
++}
++
++/*
++ * inputs:
++ * zc_name		full name of last snapshot
++ * zc_value		full name of first snapshot
++ *
++ * outputs:
++ * zc_cookie		space in bytes
++ * zc_objset_type	compressed space in bytes
++ * zc_perm_action	uncompressed space in bytes
++ */
++static int
++zfs_ioc_space_snaps(zfs_cmd_t *zc)
++{
++	int error;
++	dsl_dataset_t *new, *old;
++
++	error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
++	if (error != 0)
++		return (error);
++	error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
++	if (error != 0) {
++		dsl_dataset_rele(new, FTAG);
++		return (error);
++	}
++
++	error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie,
++	    &zc->zc_objset_type, &zc->zc_perm_action);
++	dsl_dataset_rele(old, FTAG);
++	dsl_dataset_rele(new, FTAG);
++	return (error);
++}
++
++/*
++ * pool create, destroy, and export don't log the history as part of
++ * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
++ * do the logging of those commands.
++ */
++static zfs_ioc_vec_t zfs_ioc_vec[] = {
++	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
++	    POOL_CHECK_READONLY },
++	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_vdev_setfru,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_SUSPENDED },
++	{ zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_SUSPENDED },
++	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_SUSPENDED },
++	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_create_minor, zfs_secpolicy_config, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_remove_minor, zfs_secpolicy_config, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive,
++	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_SUSPENDED },
++	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME,
++	    B_FALSE, POOL_CHECK_NONE },
++	{ zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME,
++	    B_FALSE, POOL_CHECK_NONE },
++	{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
++	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_SUSPENDED },
++	{ zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME,
++	    B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_SUSPENDED },
++	{ zfs_ioc_events_next, zfs_secpolicy_config, NO_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_events_clear, zfs_secpolicy_config, NO_NAME, B_FALSE,
++	    POOL_CHECK_NONE },
++	{ zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
++	{ zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_SUSPENDED },
++	{ zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_SUSPENDED },
++	{ zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE,
++	    POOL_CHECK_SUSPENDED },
++	{ zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
++	    POOL_CHECK_NONE }
++};
++
++int
++pool_status_check(const char *name, zfs_ioc_namecheck_t type,
++    zfs_ioc_poolcheck_t check)
++{
++	spa_t *spa;
++	int error;
++
++	ASSERT(type == POOL_NAME || type == DATASET_NAME);
++
++	if (check & POOL_CHECK_NONE)
++		return (0);
++
++	error = spa_open(name, &spa, FTAG);
++	if (error == 0) {
++		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
++			error = EAGAIN;
++		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
++			error = EROFS;
++		spa_close(spa, FTAG);
++	}
++	return (error);
++}
++
++static void *
++zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which)
++{
++	zfsdev_state_t *zs;
++
++	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
++
++	for (zs = list_head(&zfsdev_state_list); zs != NULL;
++	     zs = list_next(&zfsdev_state_list, zs)) {
++		if (zs->zs_minor == minor) {
++			switch (which) {
++				case ZST_ONEXIT:  return (zs->zs_onexit);
++				case ZST_ZEVENT:  return (zs->zs_zevent);
++				case ZST_ALL:     return (zs);
++			}
++		}
++	}
++
++	return NULL;
++}
++
++void *
++zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
++{
++	void *ptr;
++
++	mutex_enter(&zfsdev_state_lock);
++	ptr = zfsdev_get_state_impl(minor, which);
++	mutex_exit(&zfsdev_state_lock);
++
++	return ptr;
++}
++
++minor_t
++zfsdev_getminor(struct file *filp)
++{
++	ASSERT(filp != NULL);
++	ASSERT(filp->private_data != NULL);
++
++	return (((zfsdev_state_t *)filp->private_data)->zs_minor);
++}
++
++/*
++ * Find a free minor number.  The zfsdev_state_list is expected to
++ * be short since it is only a list of currently open file handles.
++ */
++minor_t
++zfsdev_minor_alloc(void)
++{
++	static minor_t last_minor = 0;
++	minor_t m;
++
++	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
++
++	for (m = last_minor + 1; m != last_minor; m++) {
++		if (m > ZFSDEV_MAX_MINOR)
++			m = 1;
++		if (zfsdev_get_state_impl(m, ZST_ALL) == NULL) {
++			last_minor = m;
++			return (m);
++		}
++	}
++
++	return (0);
++}
++
++static int
++zfsdev_state_init(struct file *filp)
++{
++	zfsdev_state_t *zs;
++	minor_t minor;
++
++	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
++
++        minor = zfsdev_minor_alloc();
++        if (minor == 0)
++                return (ENXIO);
++
++	zs = kmem_zalloc( sizeof(zfsdev_state_t), KM_SLEEP);
++	if (zs == NULL)
++		return (ENOMEM);
++
++	zs->zs_file = filp;
++	zs->zs_minor = minor;
++	filp->private_data = zs;
++
++	zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
++	zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
++
++	list_insert_tail(&zfsdev_state_list, zs);
++
++	return (0);
++}
++
++static int
++zfsdev_state_destroy(struct file *filp)
++{
++	zfsdev_state_t *zs;
++
++	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
++	ASSERT(filp->private_data != NULL);
++
++	zs = filp->private_data;
++	zfs_onexit_destroy(zs->zs_onexit);
++	zfs_zevent_destroy(zs->zs_zevent);
++
++	list_remove(&zfsdev_state_list, zs);
++	kmem_free(zs, sizeof(zfsdev_state_t));
++
++	return 0;
++}
++
++static int
++zfsdev_open(struct inode *ino, struct file *filp)
++{
++	int error;
++
++	mutex_enter(&zfsdev_state_lock);
++	error = zfsdev_state_init(filp);
++	mutex_exit(&zfsdev_state_lock);
++
++	return (-error);
++}
++
++static int
++zfsdev_release(struct inode *ino, struct file *filp)
++{
++	int error;
++
++	mutex_enter(&zfsdev_state_lock);
++	error = zfsdev_state_destroy(filp);
++	mutex_exit(&zfsdev_state_lock);
++
++	return (-error);
++}
++
++static long
++zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
++{
++	zfs_cmd_t *zc;
++	uint_t vec;
++	int error, rc, flag = 0;
++
++	vec = cmd - ZFS_IOC;
++	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
++		return (-EINVAL);
++
++	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG);
++
++	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
++	if (error != 0)
++		error = EFAULT;
++
++	if ((error == 0) && !(flag & FKIOCTL))
++		error = zfs_ioc_vec[vec].zvec_secpolicy(zc, CRED());
++
++	/*
++	 * Ensure that all pool/dataset names are valid before we pass down to
++	 * the lower layers.
++	 */
++	if (error == 0) {
++		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
++		zc->zc_iflags = flag & FKIOCTL;
++		switch (zfs_ioc_vec[vec].zvec_namecheck) {
++		case POOL_NAME:
++			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
++				error = EINVAL;
++			error = pool_status_check(zc->zc_name,
++			    zfs_ioc_vec[vec].zvec_namecheck,
++			    zfs_ioc_vec[vec].zvec_pool_check);
++			break;
++
++		case DATASET_NAME:
++			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
++				error = EINVAL;
++			error = pool_status_check(zc->zc_name,
++			    zfs_ioc_vec[vec].zvec_namecheck,
++			    zfs_ioc_vec[vec].zvec_pool_check);
++			break;
++
++		case NO_NAME:
++			break;
++		}
++	}
++
++	if (error == 0)
++		error = zfs_ioc_vec[vec].zvec_func(zc);
++
++	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
++	if (error == 0) {
++		if (rc != 0)
++			error = EFAULT;
++		if (zfs_ioc_vec[vec].zvec_his_log)
++			zfs_log_history(zc);
++	}
++
++	kmem_free(zc, sizeof (zfs_cmd_t));
++	return (-error);
++}
++
++#ifdef CONFIG_COMPAT
++static long
++zfsdev_compat_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
++{
++        return zfsdev_ioctl(filp, cmd, arg);
++}
++#else
++#define zfsdev_compat_ioctl   NULL
++#endif
++
++static const struct file_operations zfsdev_fops = {
++	.open            = zfsdev_open,
++	.release         = zfsdev_release,
++	.unlocked_ioctl  = zfsdev_ioctl,
++	.compat_ioctl    = zfsdev_compat_ioctl,
++	.owner           = THIS_MODULE,
++};
++
++static struct miscdevice zfs_misc = {
++	.minor          = MISC_DYNAMIC_MINOR,
++	.name           = ZFS_DRIVER,
++	.fops           = &zfsdev_fops,
++};
++
++static int
++zfs_attach(void)
++{
++	int error;
++
++	mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
++	list_create(&zfsdev_state_list, sizeof (zfsdev_state_t),
++	    offsetof(zfsdev_state_t, zs_next));
++
++	error = misc_register(&zfs_misc);
++        if (error) {
++		printk(KERN_INFO "ZFS: misc_register() failed %d\n", error);
++		return (error);
++	}
++
++	return (0);
++}
++
++static void
++zfs_detach(void)
++{
++	int error;
++
++	error = misc_deregister(&zfs_misc);
++	if (error)
++		printk(KERN_INFO "ZFS: misc_deregister() failed %d\n", error);
++
++	mutex_destroy(&zfsdev_state_lock);
++	list_destroy(&zfsdev_state_list);
++}
++
++uint_t zfs_fsyncer_key;
++extern uint_t rrw_tsd_key;
++
++#ifdef DEBUG
++#define ZFS_DEBUG_STR	" (DEBUG mode)"
++#else
++#define ZFS_DEBUG_STR	""
++#endif
++
++int
++_init(void)
++{
++	int error;
++
++	spa_init(FREAD | FWRITE);
++	zfs_init();
++
++	if ((error = zvol_init()) != 0)
++		goto out1;
++
++	if ((error = zfs_attach()) != 0)
++		goto out2;
++
++	tsd_create(&zfs_fsyncer_key, NULL);
++	tsd_create(&rrw_tsd_key, NULL);
++
++	printk(KERN_NOTICE "ZFS: Loaded module v%s-%s%s, "
++	       "ZFS pool version %s, ZFS filesystem version %s\n",
++	       ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR,
++	       SPA_VERSION_STRING, ZPL_VERSION_STRING);
++
++	return (0);
++
++out2:
++	(void) zvol_fini();
++out1:
++	zfs_fini();
++	spa_fini();
++	printk(KERN_NOTICE "ZFS: Failed to Load ZFS Filesystem v%s-%s%s"
++	       ", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE,
++	       ZFS_DEBUG_STR, error);
++
++	return (error);
++}
++
++int
++_fini(void)
++{
++	zfs_detach();
++	zvol_fini();
++	zfs_fini();
++	spa_fini();
++
++	tsd_destroy(&zfs_fsyncer_key);
++	tsd_destroy(&rrw_tsd_key);
++
++	printk(KERN_NOTICE "ZFS: Unloaded module v%s-%s%s\n",
++	       ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
++
++	return (0);
++}
++
++#ifdef HAVE_SPL
++spl_module_init(_init);
++spl_module_exit(_fini);
++
++MODULE_DESCRIPTION("ZFS");
++MODULE_AUTHOR(ZFS_META_AUTHOR);
++MODULE_LICENSE(ZFS_META_LICENSE);
++#endif /* HAVE_SPL */
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_log.c linux-3.2.33-go/fs/zfs/zfs/zfs_log.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_log.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_log.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,682 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++
++#include <sys/types.h>
++#include <sys/param.h>
++#include <sys/systm.h>
++#include <sys/sysmacros.h>
++#include <sys/cmn_err.h>
++#include <sys/kmem.h>
++#include <sys/thread.h>
++#include <sys/file.h>
++#include <sys/vfs.h>
++#include <sys/zfs_znode.h>
++#include <sys/zfs_dir.h>
++#include <sys/zil.h>
++#include <sys/zil_impl.h>
++#include <sys/byteorder.h>
++#include <sys/policy.h>
++#include <sys/stat.h>
++#include <sys/mode.h>
++#include <sys/acl.h>
++#include <sys/dmu.h>
++#include <sys/spa.h>
++#include <sys/zfs_fuid.h>
++#include <sys/ddi.h>
++#include <sys/dsl_dataset.h>
++
++/*
++ * These zfs_log_* functions must be called within a dmu tx, in one
++ * of 2 contexts depending on zilog->z_replay:
++ *
++ * Non replay mode
++ * ---------------
++ * We need to record the transaction so that if it is committed to
++ * the Intent Log then it can be replayed.  An intent log transaction
++ * structure (itx_t) is allocated and all the information necessary to
++ * possibly replay the transaction is saved in it. The itx is then assigned
++ * a sequence number and inserted in the in-memory list anchored in the zilog.
++ *
++ * Replay mode
++ * -----------
++ * We need to mark the intent log record as replayed in the log header.
++ * This is done in the same transaction as the replay so that they
++ * commit atomically.
++ */
++
++int
++zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap)
++{
++	int isxvattr = (vap->va_mask & ATTR_XVATTR);
++	switch (type) {
++	case Z_FILE:
++		if (vsecp == NULL && !isxvattr)
++			return (TX_CREATE);
++		if (vsecp && isxvattr)
++			return (TX_CREATE_ACL_ATTR);
++		if (vsecp)
++			return (TX_CREATE_ACL);
++		else
++			return (TX_CREATE_ATTR);
++		/*NOTREACHED*/
++	case Z_DIR:
++		if (vsecp == NULL && !isxvattr)
++			return (TX_MKDIR);
++		if (vsecp && isxvattr)
++			return (TX_MKDIR_ACL_ATTR);
++		if (vsecp)
++			return (TX_MKDIR_ACL);
++		else
++			return (TX_MKDIR_ATTR);
++	case Z_XATTRDIR:
++		return (TX_MKXATTR);
++	}
++	ASSERT(0);
++	return (TX_MAX_TYPE);
++}
++
++/*
++ * build up the log data necessary for logging xvattr_t
++ * First lr_attr_t is initialized.  following the lr_attr_t
++ * is the mapsize and attribute bitmap copied from the xvattr_t.
++ * Following the bitmap and bitmapsize two 64 bit words are reserved
++ * for the create time which may be set.  Following the create time
++ * records a single 64 bit integer which has the bits to set on
++ * replay for the xvattr.
++ */
++static void
++zfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
++{
++	uint32_t	*bitmap;
++	uint64_t	*attrs;
++	uint64_t	*crtime;
++	xoptattr_t	*xoap;
++	void		*scanstamp;
++	int		i;
++
++	xoap = xva_getxoptattr(xvap);
++	ASSERT(xoap);
++
++	lrattr->lr_attr_masksize = xvap->xva_mapsize;
++	bitmap = &lrattr->lr_attr_bitmap;
++	for (i = 0; i != xvap->xva_mapsize; i++, bitmap++) {
++		*bitmap = xvap->xva_reqattrmap[i];
++	}
++
++	/* Now pack the attributes up in a single uint64_t */
++	attrs = (uint64_t *)bitmap;
++	crtime = attrs + 1;
++	scanstamp = (caddr_t)(crtime + 2);
++	*attrs = 0;
++	if (XVA_ISSET_REQ(xvap, XAT_READONLY))
++		*attrs |= (xoap->xoa_readonly == 0) ? 0 :
++		    XAT0_READONLY;
++	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN))
++		*attrs |= (xoap->xoa_hidden == 0) ? 0 :
++		    XAT0_HIDDEN;
++	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM))
++		*attrs |= (xoap->xoa_system == 0) ? 0 :
++		    XAT0_SYSTEM;
++	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE))
++		*attrs |= (xoap->xoa_archive == 0) ? 0 :
++		    XAT0_ARCHIVE;
++	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE))
++		*attrs |= (xoap->xoa_immutable == 0) ? 0 :
++		    XAT0_IMMUTABLE;
++	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK))
++		*attrs |= (xoap->xoa_nounlink == 0) ? 0 :
++		    XAT0_NOUNLINK;
++	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY))
++		*attrs |= (xoap->xoa_appendonly == 0) ? 0 :
++		    XAT0_APPENDONLY;
++	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE))
++		*attrs |= (xoap->xoa_opaque == 0) ? 0 :
++		    XAT0_APPENDONLY;
++	if (XVA_ISSET_REQ(xvap, XAT_NODUMP))
++		*attrs |= (xoap->xoa_nodump == 0) ? 0 :
++		    XAT0_NODUMP;
++	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED))
++		*attrs |= (xoap->xoa_av_quarantined == 0) ? 0 :
++		    XAT0_AV_QUARANTINED;
++	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED))
++		*attrs |= (xoap->xoa_av_modified == 0) ? 0 :
++		    XAT0_AV_MODIFIED;
++	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
++		ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime);
++	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
++		bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ);
++	if (XVA_ISSET_REQ(xvap, XAT_REPARSE))
++		*attrs |= (xoap->xoa_reparse == 0) ? 0 :
++		    XAT0_REPARSE;
++	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE))
++		*attrs |= (xoap->xoa_offline == 0) ? 0 :
++		    XAT0_OFFLINE;
++	if (XVA_ISSET_REQ(xvap, XAT_SPARSE))
++		*attrs |= (xoap->xoa_sparse == 0) ? 0 :
++		    XAT0_SPARSE;
++}
++
++static void *
++zfs_log_fuid_ids(zfs_fuid_info_t *fuidp, void *start)
++{
++	zfs_fuid_t *zfuid;
++	uint64_t *fuidloc = start;
++
++	/* First copy in the ACE FUIDs */
++	for (zfuid = list_head(&fuidp->z_fuids); zfuid;
++	    zfuid = list_next(&fuidp->z_fuids, zfuid)) {
++		*fuidloc++ = zfuid->z_logfuid;
++	}
++	return (fuidloc);
++}
++
++
++static void *
++zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start)
++{
++	zfs_fuid_domain_t *zdomain;
++
++	/* now copy in the domain info, if any */
++	if (fuidp->z_domain_str_sz != 0) {
++		for (zdomain = list_head(&fuidp->z_domains); zdomain;
++		    zdomain = list_next(&fuidp->z_domains, zdomain)) {
++			bcopy((void *)zdomain->z_domain, start,
++			    strlen(zdomain->z_domain) + 1);
++			start = (caddr_t)start +
++			    strlen(zdomain->z_domain) + 1;
++		}
++	}
++	return (start);
++}
++
++/*
++ * zfs_log_create() is used to handle TX_CREATE, TX_CREATE_ATTR, TX_MKDIR,
++ * TX_MKDIR_ATTR and TX_MKXATTR
++ * transactions.
++ *
++ * TX_CREATE and TX_MKDIR are standard creates, but they may have FUID
++ * domain information appended prior to the name.  In this case the
++ * uid/gid in the log record will be a log centric FUID.
++ *
++ * TX_CREATE_ACL_ATTR and TX_MKDIR_ACL_ATTR handle special creates that
++ * may contain attributes, ACL and optional fuid information.
++ *
++ * TX_CREATE_ACL and TX_MKDIR_ACL handle special creates that specify
++ * and ACL and normal users/groups in the ACEs.
++ *
++ * There may be an optional xvattr attribute information similar
++ * to zfs_log_setattr.
++ *
++ * Also, after the file name "domain" strings may be appended.
++ */
++void
++zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
++    znode_t *dzp, znode_t *zp, char *name, vsecattr_t *vsecp,
++    zfs_fuid_info_t *fuidp, vattr_t *vap)
++{
++	itx_t *itx;
++	lr_create_t *lr;
++	lr_acl_create_t *lracl;
++	xvattr_t *xvap = (xvattr_t *)vap;
++	size_t aclsize = 0;
++	size_t xvatsize = 0;
++	size_t txsize;
++	void *end;
++	size_t lrsize;
++	size_t namesize = strlen(name) + 1;
++	size_t fuidsz = 0;
++
++	if (zil_replaying(zilog, tx))
++		return;
++
++	/*
++	 * If we have FUIDs present then add in space for
++	 * domains and ACE fuid's if any.
++	 */
++	if (fuidp) {
++		fuidsz += fuidp->z_domain_str_sz;
++		fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t);
++	}
++
++	if (vap->va_mask & ATTR_XVATTR)
++		xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize);
++
++	if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR ||
++	    (int)txtype == TX_CREATE || (int)txtype == TX_MKDIR ||
++	    (int)txtype == TX_MKXATTR) {
++		txsize = sizeof (*lr) + namesize + fuidsz + xvatsize;
++		lrsize = sizeof (*lr);
++	} else {
++		aclsize = (vsecp) ? vsecp->vsa_aclentsz : 0;
++		txsize =
++		    sizeof (lr_acl_create_t) + namesize + fuidsz +
++		    ZIL_ACE_LENGTH(aclsize) + xvatsize;
++		lrsize = sizeof (lr_acl_create_t);
++	}
++
++	itx = zil_itx_create(txtype, txsize);
++
++	lr = (lr_create_t *)&itx->itx_lr;
++	lr->lr_doid = dzp->z_id;
++	lr->lr_foid = zp->z_id;
++	lr->lr_mode = zp->z_mode;
++	if (!IS_EPHEMERAL(zp->z_uid)) {
++		lr->lr_uid = (uint64_t)zp->z_uid;
++	} else {
++		lr->lr_uid = fuidp->z_fuid_owner;
++	}
++	if (!IS_EPHEMERAL(zp->z_gid)) {
++		lr->lr_gid = (uint64_t)zp->z_gid;
++	} else {
++		lr->lr_gid = fuidp->z_fuid_group;
++	}
++	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen,
++	    sizeof (uint64_t));
++	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
++	    lr->lr_crtime, sizeof (uint64_t) * 2);
++
++	if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(zp)), &lr->lr_rdev,
++	    sizeof (lr->lr_rdev)) != 0)
++		lr->lr_rdev = 0;
++
++	/*
++	 * Fill in xvattr info if any
++	 */
++	if (vap->va_mask & ATTR_XVATTR) {
++		zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap);
++		end = (caddr_t)lr + lrsize + xvatsize;
++	} else {
++		end = (caddr_t)lr + lrsize;
++	}
++
++	/* Now fill in any ACL info */
++
++	if (vsecp) {
++		lracl = (lr_acl_create_t *)&itx->itx_lr;
++		lracl->lr_aclcnt = vsecp->vsa_aclcnt;
++		lracl->lr_acl_bytes = aclsize;
++		lracl->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0;
++		lracl->lr_fuidcnt  = fuidp ? fuidp->z_fuid_cnt : 0;
++		if (vsecp->vsa_aclflags & VSA_ACE_ACLFLAGS)
++			lracl->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags;
++		else
++			lracl->lr_acl_flags = 0;
++
++		bcopy(vsecp->vsa_aclentp, end, aclsize);
++		end = (caddr_t)end + ZIL_ACE_LENGTH(aclsize);
++	}
++
++	/* drop in FUID info */
++	if (fuidp) {
++		end = zfs_log_fuid_ids(fuidp, end);
++		end = zfs_log_fuid_domains(fuidp, end);
++	}
++	/*
++	 * Now place file name in log record
++	 */
++	bcopy(name, end, namesize);
++
++	zil_itx_assign(zilog, itx, tx);
++}
++
++/*
++ * zfs_log_remove() handles both TX_REMOVE and TX_RMDIR transactions.
++ */
++void
++zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
++	znode_t *dzp, char *name, uint64_t foid)
++{
++	itx_t *itx;
++	lr_remove_t *lr;
++	size_t namesize = strlen(name) + 1;
++
++	if (zil_replaying(zilog, tx))
++		return;
++
++	itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
++	lr = (lr_remove_t *)&itx->itx_lr;
++	lr->lr_doid = dzp->z_id;
++	bcopy(name, (char *)(lr + 1), namesize);
++
++	itx->itx_oid = foid;
++
++	zil_itx_assign(zilog, itx, tx);
++}
++
++/*
++ * zfs_log_link() handles TX_LINK transactions.
++ */
++void
++zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
++	znode_t *dzp, znode_t *zp, char *name)
++{
++	itx_t *itx;
++	lr_link_t *lr;
++	size_t namesize = strlen(name) + 1;
++
++	if (zil_replaying(zilog, tx))
++		return;
++
++	itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
++	lr = (lr_link_t *)&itx->itx_lr;
++	lr->lr_doid = dzp->z_id;
++	lr->lr_link_obj = zp->z_id;
++	bcopy(name, (char *)(lr + 1), namesize);
++
++	zil_itx_assign(zilog, itx, tx);
++}
++
++/*
++ * zfs_log_symlink() handles TX_SYMLINK transactions.
++ */
++void
++zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
++    znode_t *dzp, znode_t *zp, char *name, char *link)
++{
++	itx_t *itx;
++	lr_create_t *lr;
++	size_t namesize = strlen(name) + 1;
++	size_t linksize = strlen(link) + 1;
++
++	if (zil_replaying(zilog, tx))
++		return;
++
++	itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize);
++	lr = (lr_create_t *)&itx->itx_lr;
++	lr->lr_doid = dzp->z_id;
++	lr->lr_foid = zp->z_id;
++	lr->lr_uid = zp->z_uid;
++	lr->lr_gid = zp->z_gid;
++	lr->lr_mode = zp->z_mode;
++	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen,
++	    sizeof (uint64_t));
++	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
++	    lr->lr_crtime, sizeof (uint64_t) * 2);
++	bcopy(name, (char *)(lr + 1), namesize);
++	bcopy(link, (char *)(lr + 1) + namesize, linksize);
++
++	zil_itx_assign(zilog, itx, tx);
++}
++
++/*
++ * zfs_log_rename() handles TX_RENAME transactions.
++ */
++void
++zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
++	znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp)
++{
++	itx_t *itx;
++	lr_rename_t *lr;
++	size_t snamesize = strlen(sname) + 1;
++	size_t dnamesize = strlen(dname) + 1;
++
++	if (zil_replaying(zilog, tx))
++		return;
++
++	itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
++	lr = (lr_rename_t *)&itx->itx_lr;
++	lr->lr_sdoid = sdzp->z_id;
++	lr->lr_tdoid = tdzp->z_id;
++	bcopy(sname, (char *)(lr + 1), snamesize);
++	bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize);
++	itx->itx_oid = szp->z_id;
++
++	zil_itx_assign(zilog, itx, tx);
++}
++
++/*
++ * zfs_log_write() handles TX_WRITE transactions.
++ */
++long zfs_immediate_write_sz = 32768;
++
++void
++zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
++	znode_t *zp, offset_t off, ssize_t resid, int ioflag)
++{
++	itx_wr_state_t write_state;
++	boolean_t slogging;
++	uintptr_t fsync_cnt;
++	ssize_t immediate_write_sz;
++
++	if (zil_replaying(zilog, tx) || zp->z_unlinked)
++		return;
++
++	immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
++	    ? 0 : (ssize_t)zfs_immediate_write_sz;
++
++	slogging = spa_has_slogs(zilog->zl_spa) &&
++	    (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
++	if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz)
++		write_state = WR_INDIRECT;
++	else if (ioflag & (FSYNC | FDSYNC))
++		write_state = WR_COPIED;
++	else
++		write_state = WR_NEED_COPY;
++
++	if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
++		(void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
++	}
++
++	while (resid) {
++		itx_t *itx;
++		lr_write_t *lr;
++		ssize_t len;
++
++		/*
++		 * If the write would overflow the largest block then split it.
++		 */
++		if (write_state != WR_INDIRECT && resid > ZIL_MAX_LOG_DATA)
++			len = SPA_MAXBLOCKSIZE >> 1;
++		else
++			len = resid;
++
++		itx = zil_itx_create(txtype, sizeof (*lr) +
++		    (write_state == WR_COPIED ? len : 0));
++		lr = (lr_write_t *)&itx->itx_lr;
++		if (write_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os,
++		    zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
++			zil_itx_destroy(itx);
++			itx = zil_itx_create(txtype, sizeof (*lr));
++			lr = (lr_write_t *)&itx->itx_lr;
++			write_state = WR_NEED_COPY;
++		}
++
++		itx->itx_wr_state = write_state;
++		if (write_state == WR_NEED_COPY)
++			itx->itx_sod += len;
++		lr->lr_foid = zp->z_id;
++		lr->lr_offset = off;
++		lr->lr_length = len;
++		lr->lr_blkoff = 0;
++		BP_ZERO(&lr->lr_blkptr);
++
++		itx->itx_private = ZTOZSB(zp);
++
++		if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) &&
++		    (fsync_cnt == 0))
++			itx->itx_sync = B_FALSE;
++
++		zil_itx_assign(zilog, itx, tx);
++
++		off += len;
++		resid -= len;
++	}
++}
++
++/*
++ * zfs_log_truncate() handles TX_TRUNCATE transactions.
++ */
++void
++zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
++	znode_t *zp, uint64_t off, uint64_t len)
++{
++	itx_t *itx;
++	lr_truncate_t *lr;
++
++	if (zil_replaying(zilog, tx) || zp->z_unlinked)
++		return;
++
++	itx = zil_itx_create(txtype, sizeof (*lr));
++	lr = (lr_truncate_t *)&itx->itx_lr;
++	lr->lr_foid = zp->z_id;
++	lr->lr_offset = off;
++	lr->lr_length = len;
++
++	itx->itx_sync = (zp->z_sync_cnt != 0);
++	zil_itx_assign(zilog, itx, tx);
++}
++
++/*
++ * zfs_log_setattr() handles TX_SETATTR transactions.
++ */
++void
++zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
++    znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp)
++{
++	itx_t		*itx;
++	lr_setattr_t	*lr;
++	xvattr_t	*xvap = (xvattr_t *)vap;
++	size_t		recsize = sizeof (lr_setattr_t);
++	void		*start;
++
++	if (zil_replaying(zilog, tx) || zp->z_unlinked)
++		return;
++
++	/*
++	 * If XVATTR set, then log record size needs to allow
++	 * for lr_attr_t + xvattr mask, mapsize and create time
++	 * plus actual attribute values
++	 */
++	if (vap->va_mask & ATTR_XVATTR)
++		recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize);
++
++	if (fuidp)
++		recsize += fuidp->z_domain_str_sz;
++
++	itx = zil_itx_create(txtype, recsize);
++	lr = (lr_setattr_t *)&itx->itx_lr;
++	lr->lr_foid = zp->z_id;
++	lr->lr_mask = (uint64_t)mask_applied;
++	lr->lr_mode = (uint64_t)vap->va_mode;
++	if ((mask_applied & ATTR_UID) && IS_EPHEMERAL(vap->va_uid))
++		lr->lr_uid = fuidp->z_fuid_owner;
++	else
++		lr->lr_uid = (uint64_t)vap->va_uid;
++
++	if ((mask_applied & ATTR_GID) && IS_EPHEMERAL(vap->va_gid))
++		lr->lr_gid = fuidp->z_fuid_group;
++	else
++		lr->lr_gid = (uint64_t)vap->va_gid;
++
++	lr->lr_size = (uint64_t)vap->va_size;
++	ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime);
++	ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime);
++	start = (lr_setattr_t *)(lr + 1);
++	if (vap->va_mask & ATTR_XVATTR) {
++		zfs_log_xvattr((lr_attr_t *)start, xvap);
++		start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize);
++	}
++
++	/*
++	 * Now stick on domain information if any on end
++	 */
++
++	if (fuidp)
++		(void) zfs_log_fuid_domains(fuidp, start);
++
++	itx->itx_sync = (zp->z_sync_cnt != 0);
++	zil_itx_assign(zilog, itx, tx);
++}
++
++/*
++ * zfs_log_acl() handles TX_ACL transactions.
++ */
++void
++zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
++    vsecattr_t *vsecp, zfs_fuid_info_t *fuidp)
++{
++	itx_t *itx;
++	lr_acl_v0_t *lrv0;
++	lr_acl_t *lr;
++	int txtype;
++	int lrsize;
++	size_t txsize;
++	size_t aclbytes = vsecp->vsa_aclentsz;
++
++	if (zil_replaying(zilog, tx) || zp->z_unlinked)
++		return;
++
++	txtype = (ZTOZSB(zp)->z_version < ZPL_VERSION_FUID) ?
++	    TX_ACL_V0 : TX_ACL;
++
++	if (txtype == TX_ACL)
++		lrsize = sizeof (*lr);
++	else
++		lrsize = sizeof (*lrv0);
++
++	txsize = lrsize +
++	    ((txtype == TX_ACL) ? ZIL_ACE_LENGTH(aclbytes) : aclbytes) +
++	    (fuidp ? fuidp->z_domain_str_sz : 0) +
++	    sizeof (uint64_t) * (fuidp ? fuidp->z_fuid_cnt : 0);
++
++	itx = zil_itx_create(txtype, txsize);
++
++	lr = (lr_acl_t *)&itx->itx_lr;
++	lr->lr_foid = zp->z_id;
++	if (txtype == TX_ACL) {
++		lr->lr_acl_bytes = aclbytes;
++		lr->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0;
++		lr->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0;
++		if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS)
++			lr->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags;
++		else
++			lr->lr_acl_flags = 0;
++	}
++	lr->lr_aclcnt = (uint64_t)vsecp->vsa_aclcnt;
++
++	if (txtype == TX_ACL_V0) {
++		lrv0 = (lr_acl_v0_t *)lr;
++		bcopy(vsecp->vsa_aclentp, (ace_t *)(lrv0 + 1), aclbytes);
++	} else {
++		void *start = (ace_t *)(lr + 1);
++
++		bcopy(vsecp->vsa_aclentp, start, aclbytes);
++
++		start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes);
++
++		if (fuidp) {
++			start = zfs_log_fuid_ids(fuidp, start);
++			(void) zfs_log_fuid_domains(fuidp, start);
++		}
++	}
++
++	itx->itx_sync = (zp->z_sync_cnt != 0);
++	zil_itx_assign(zilog, itx, tx);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++module_param(zfs_immediate_write_sz, long, 0644);
++MODULE_PARM_DESC(zfs_immediate_write_sz, "Largest data block to write to zil");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_onexit.c linux-3.2.33-go/fs/zfs/zfs/zfs_onexit.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_onexit.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_onexit.c	2012-11-16 23:25:34.347039358 +0100
+@@ -0,0 +1,247 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/types.h>
++#include <sys/param.h>
++#include <sys/errno.h>
++#include <sys/open.h>
++#include <sys/kmem.h>
++#include <sys/conf.h>
++#include <sys/ddi.h>
++#include <sys/sunddi.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/mkdev.h>
++#include <sys/zfs_onexit.h>
++#include <sys/zvol.h>
++
++/*
++ * ZFS kernel routines may add/delete callback routines to be invoked
++ * upon process exit (triggered via the close operation from the /dev/zfs
++ * driver).
++ *
++ * These cleanup callbacks are intended to allow for the accumulation
++ * of kernel state across multiple ioctls.  User processes participate
++ * simply by opening ZFS_DEV. This causes the ZFS driver to do create
++ * some private data for the file descriptor and generating a unique
++ * minor number. The process then passes along that file descriptor to
++ * each ioctl that might have a cleanup operation.
++ *
++ * Consumers of the onexit routines should call zfs_onexit_fd_hold() early
++ * on to validate the given fd and add a reference to its file table entry.
++ * This allows the consumer to do its work and then add a callback, knowing
++ * that zfs_onexit_add_cb() won't fail with EBADF.  When finished, consumers
++ * should call zfs_onexit_fd_rele().
++ *
++ * A simple example is zfs_ioc_recv(), where we might create an AVL tree
++ * with dataset/GUID mappings and then reuse that tree on subsequent
++ * zfs_ioc_recv() calls.
++ *
++ * On the first zfs_ioc_recv() call, dmu_recv_stream() will kmem_alloc()
++ * the AVL tree and pass it along with a callback function to
++ * zfs_onexit_add_cb(). The zfs_onexit_add_cb() routine will register the
++ * callback and return an action handle.
++ *
++ * The action handle is then passed from user space to subsequent
++ * zfs_ioc_recv() calls, so that dmu_recv_stream() can fetch its AVL tree
++ * by calling zfs_onexit_cb_data() with the device minor number and
++ * action handle.
++ *
++ * If the user process exits abnormally, the callback is invoked implicitly
++ * as part of the driver close operation.  Once the user space process is
++ * finished with the accumulated kernel state, it can also just call close(2)
++ * on the cleanup fd to trigger the cleanup callback.
++ */
++
++void
++zfs_onexit_init(zfs_onexit_t **zop)
++{
++	zfs_onexit_t *zo;
++
++	zo = *zop = kmem_zalloc(sizeof (zfs_onexit_t), KM_SLEEP);
++	mutex_init(&zo->zo_lock, NULL, MUTEX_DEFAULT, NULL);
++	list_create(&zo->zo_actions, sizeof (zfs_onexit_action_node_t),
++	    offsetof(zfs_onexit_action_node_t, za_link));
++}
++
++void
++zfs_onexit_destroy(zfs_onexit_t *zo)
++{
++	zfs_onexit_action_node_t *ap;
++
++	mutex_enter(&zo->zo_lock);
++	while ((ap = list_head(&zo->zo_actions)) != NULL) {
++		list_remove(&zo->zo_actions, ap);
++		mutex_exit(&zo->zo_lock);
++		ap->za_func(ap->za_data);
++		kmem_free(ap, sizeof (zfs_onexit_action_node_t));
++		mutex_enter(&zo->zo_lock);
++	}
++	mutex_exit(&zo->zo_lock);
++
++	list_destroy(&zo->zo_actions);
++	mutex_destroy(&zo->zo_lock);
++	kmem_free(zo, sizeof (zfs_onexit_t));
++}
++
++static int
++zfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo)
++{
++	*zo = zfsdev_get_state(minor, ZST_ONEXIT);
++	if (*zo == NULL)
++		return (EBADF);
++
++	return (0);
++}
++
++/*
++ * Consumers might need to operate by minor number instead of fd, since
++ * they might be running in another thread (e.g. txg_sync_thread). Callers
++ * of this function must call zfs_onexit_fd_rele() when they're finished
++ * using the minor number.
++ */
++int
++zfs_onexit_fd_hold(int fd, minor_t *minorp)
++{
++	file_t *fp;
++	zfs_onexit_t *zo;
++
++	fp = getf(fd);
++	if (fp == NULL)
++		return (EBADF);
++
++	*minorp = zfsdev_getminor(fp->f_file);
++	return (zfs_onexit_minor_to_state(*minorp, &zo));
++}
++
++void
++zfs_onexit_fd_rele(int fd)
++{
++	releasef(fd);
++}
++
++/*
++ * Add a callback to be invoked when the calling process exits.
++ */
++int
++zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
++    uint64_t *action_handle)
++{
++	zfs_onexit_t *zo;
++	zfs_onexit_action_node_t *ap;
++	int error;
++
++	error = zfs_onexit_minor_to_state(minor, &zo);
++	if (error)
++		return (error);
++
++	ap = kmem_alloc(sizeof (zfs_onexit_action_node_t), KM_SLEEP);
++	list_link_init(&ap->za_link);
++	ap->za_func = func;
++	ap->za_data = data;
++
++	mutex_enter(&zo->zo_lock);
++	list_insert_tail(&zo->zo_actions, ap);
++	mutex_exit(&zo->zo_lock);
++	if (action_handle)
++		*action_handle = (uint64_t)(uintptr_t)ap;
++
++	return (0);
++}
++
++static zfs_onexit_action_node_t *
++zfs_onexit_find_cb(zfs_onexit_t *zo, uint64_t action_handle)
++{
++	zfs_onexit_action_node_t *match;
++	zfs_onexit_action_node_t *ap;
++	list_t *l;
++
++	ASSERT(MUTEX_HELD(&zo->zo_lock));
++
++	match = (zfs_onexit_action_node_t *)(uintptr_t)action_handle;
++	l = &zo->zo_actions;
++	for (ap = list_head(l); ap != NULL; ap = list_next(l, ap)) {
++		if (match == ap)
++			break;
++	}
++	return (ap);
++}
++
++/*
++ * Delete the callback, triggering it first if 'fire' is set.
++ */
++int
++zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
++{
++	zfs_onexit_t *zo;
++	zfs_onexit_action_node_t *ap;
++	int error;
++
++	error = zfs_onexit_minor_to_state(minor, &zo);
++	if (error)
++		return (error);
++
++	mutex_enter(&zo->zo_lock);
++	ap = zfs_onexit_find_cb(zo, action_handle);
++	if (ap != NULL) {
++		list_remove(&zo->zo_actions, ap);
++		mutex_exit(&zo->zo_lock);
++		if (fire)
++			ap->za_func(ap->za_data);
++		kmem_free(ap, sizeof (zfs_onexit_action_node_t));
++	} else {
++		mutex_exit(&zo->zo_lock);
++		error = ENOENT;
++	}
++
++	return (error);
++}
++
++/*
++ * Return the data associated with this callback.  This allows consumers
++ * of the cleanup-on-exit interfaces to stash kernel data across system
++ * calls, knowing that it will be cleaned up if the calling process exits.
++ */
++int
++zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
++{
++	zfs_onexit_t *zo;
++	zfs_onexit_action_node_t *ap;
++	int error;
++
++	*data = NULL;
++
++	error = zfs_onexit_minor_to_state(minor, &zo);
++	if (error)
++		return (error);
++
++	mutex_enter(&zo->zo_lock);
++	ap = zfs_onexit_find_cb(zo, action_handle);
++	if (ap != NULL)
++		*data = ap->za_data;
++	else
++		error = ENOENT;
++	mutex_exit(&zo->zo_lock);
++
++	return (error);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_replay.c linux-3.2.33-go/fs/zfs/zfs/zfs_replay.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_replay.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_replay.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,935 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2012 Cyril Plisko. All rights reserved.
++ */
++
++#include <sys/types.h>
++#include <sys/param.h>
++#include <sys/systm.h>
++#include <sys/sysmacros.h>
++#include <sys/cmn_err.h>
++#include <sys/kmem.h>
++#include <sys/thread.h>
++#include <sys/file.h>
++#include <sys/fcntl.h>
++#include <sys/vfs.h>
++#include <sys/fs/zfs.h>
++#include <sys/zfs_znode.h>
++#include <sys/zfs_dir.h>
++#include <sys/zfs_acl.h>
++#include <sys/zfs_fuid.h>
++#include <sys/zfs_vnops.h>
++#include <sys/spa.h>
++#include <sys/zil.h>
++#include <sys/byteorder.h>
++#include <sys/stat.h>
++#include <sys/mode.h>
++#include <sys/acl.h>
++#include <sys/atomic.h>
++#include <sys/cred.h>
++#include <sys/zpl.h>
++
++/*
++ * Functions to replay ZFS intent log (ZIL) records
++ * The functions are called through a function vector (zfs_replay_vector)
++ * which is indexed by the transaction type.
++ */
++
++static void
++zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode,
++	uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid)
++{
++	bzero(vap, sizeof (*vap));
++	vap->va_mask = (uint_t)mask;
++	vap->va_type = IFTOVT(mode);
++	vap->va_mode = mode;
++	vap->va_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid;
++	vap->va_gid = (gid_t)(IS_EPHEMERAL(gid)) ? -1 : gid;
++	vap->va_rdev = rdev;
++	vap->va_nodeid = nodeid;
++}
++
++/* ARGSUSED */
++static int
++zfs_replay_error(zfs_sb_t *zsb, lr_t *lr, boolean_t byteswap)
++{
++	return (ENOTSUP);
++}
++
++static void
++zfs_replay_xvattr(lr_attr_t *lrattr, xvattr_t *xvap)
++{
++	xoptattr_t *xoap = NULL;
++	uint64_t *attrs;
++	uint64_t *crtime;
++	uint32_t *bitmap;
++	void *scanstamp;
++	int i;
++
++	xvap->xva_vattr.va_mask |= ATTR_XVATTR;
++	if ((xoap = xva_getxoptattr(xvap)) == NULL) {
++		xvap->xva_vattr.va_mask &= ~ATTR_XVATTR; /* shouldn't happen */
++		return;
++	}
++
++	ASSERT(lrattr->lr_attr_masksize == xvap->xva_mapsize);
++
++	bitmap = &lrattr->lr_attr_bitmap;
++	for (i = 0; i != lrattr->lr_attr_masksize; i++, bitmap++)
++		xvap->xva_reqattrmap[i] = *bitmap;
++
++	attrs = (uint64_t *)(lrattr + lrattr->lr_attr_masksize - 1);
++	crtime = attrs + 1;
++	scanstamp = (caddr_t)(crtime + 2);
++
++	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN))
++		xoap->xoa_hidden = ((*attrs & XAT0_HIDDEN) != 0);
++	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM))
++		xoap->xoa_system = ((*attrs & XAT0_SYSTEM) != 0);
++	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE))
++		xoap->xoa_archive = ((*attrs & XAT0_ARCHIVE) != 0);
++	if (XVA_ISSET_REQ(xvap, XAT_READONLY))
++		xoap->xoa_readonly = ((*attrs & XAT0_READONLY) != 0);
++	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE))
++		xoap->xoa_immutable = ((*attrs & XAT0_IMMUTABLE) != 0);
++	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK))
++		xoap->xoa_nounlink = ((*attrs & XAT0_NOUNLINK) != 0);
++	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY))
++		xoap->xoa_appendonly = ((*attrs & XAT0_APPENDONLY) != 0);
++	if (XVA_ISSET_REQ(xvap, XAT_NODUMP))
++		xoap->xoa_nodump = ((*attrs & XAT0_NODUMP) != 0);
++	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE))
++		xoap->xoa_opaque = ((*attrs & XAT0_OPAQUE) != 0);
++	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED))
++		xoap->xoa_av_modified = ((*attrs & XAT0_AV_MODIFIED) != 0);
++	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED))
++		xoap->xoa_av_quarantined =
++		    ((*attrs & XAT0_AV_QUARANTINED) != 0);
++	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
++		ZFS_TIME_DECODE(&xoap->xoa_createtime, crtime);
++	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
++		bcopy(scanstamp, xoap->xoa_av_scanstamp, AV_SCANSTAMP_SZ);
++	if (XVA_ISSET_REQ(xvap, XAT_REPARSE))
++		xoap->xoa_reparse = ((*attrs & XAT0_REPARSE) != 0);
++	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE))
++		xoap->xoa_offline = ((*attrs & XAT0_OFFLINE) != 0);
++	if (XVA_ISSET_REQ(xvap, XAT_SPARSE))
++		xoap->xoa_sparse = ((*attrs & XAT0_SPARSE) != 0);
++}
++
++static int
++zfs_replay_domain_cnt(uint64_t uid, uint64_t gid)
++{
++	uint64_t uid_idx;
++	uint64_t gid_idx;
++	int domcnt = 0;
++
++	uid_idx = FUID_INDEX(uid);
++	gid_idx = FUID_INDEX(gid);
++	if (uid_idx)
++		domcnt++;
++	if (gid_idx > 0 && gid_idx != uid_idx)
++		domcnt++;
++
++	return (domcnt);
++}
++
++static void *
++zfs_replay_fuid_domain_common(zfs_fuid_info_t *fuid_infop, void *start,
++    int domcnt)
++{
++	int i;
++
++	for (i = 0; i != domcnt; i++) {
++		fuid_infop->z_domain_table[i] = start;
++		start = (caddr_t)start + strlen(start) + 1;
++	}
++
++	return (start);
++}
++
++/*
++ * Set the uid/gid in the fuid_info structure.
++ */
++static void
++zfs_replay_fuid_ugid(zfs_fuid_info_t *fuid_infop, uint64_t uid, uint64_t gid)
++{
++	/*
++	 * If owner or group are log specific FUIDs then slurp up
++	 * domain information and build zfs_fuid_info_t
++	 */
++	if (IS_EPHEMERAL(uid))
++		fuid_infop->z_fuid_owner = uid;
++
++	if (IS_EPHEMERAL(gid))
++		fuid_infop->z_fuid_group = gid;
++}
++
++/*
++ * Load fuid domains into fuid_info_t
++ */
++static zfs_fuid_info_t *
++zfs_replay_fuid_domain(void *buf, void **end, uint64_t uid, uint64_t gid)
++{
++	int domcnt;
++
++	zfs_fuid_info_t *fuid_infop;
++
++	fuid_infop = zfs_fuid_info_alloc();
++
++	domcnt = zfs_replay_domain_cnt(uid, gid);
++
++	if (domcnt == 0)
++		return (fuid_infop);
++
++	fuid_infop->z_domain_table =
++	    kmem_zalloc(domcnt * sizeof (char **), KM_SLEEP);
++
++	zfs_replay_fuid_ugid(fuid_infop, uid, gid);
++
++	fuid_infop->z_domain_cnt = domcnt;
++	*end = zfs_replay_fuid_domain_common(fuid_infop, buf, domcnt);
++	return (fuid_infop);
++}
++
++/*
++ * load zfs_fuid_t's and fuid_domains into fuid_info_t
++ */
++static zfs_fuid_info_t *
++zfs_replay_fuids(void *start, void **end, int idcnt, int domcnt, uint64_t uid,
++    uint64_t gid)
++{
++	uint64_t *log_fuid = (uint64_t *)start;
++	zfs_fuid_info_t *fuid_infop;
++	int i;
++
++	fuid_infop = zfs_fuid_info_alloc();
++	fuid_infop->z_domain_cnt = domcnt;
++
++	fuid_infop->z_domain_table =
++	    kmem_zalloc(domcnt * sizeof (char **), KM_SLEEP);
++
++	for (i = 0; i != idcnt; i++) {
++		zfs_fuid_t *zfuid;
++
++		zfuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP);
++		zfuid->z_logfuid = *log_fuid;
++		zfuid->z_id = -1;
++		zfuid->z_domidx = 0;
++		list_insert_tail(&fuid_infop->z_fuids, zfuid);
++		log_fuid++;
++	}
++
++	zfs_replay_fuid_ugid(fuid_infop, uid, gid);
++
++	*end = zfs_replay_fuid_domain_common(fuid_infop, log_fuid, domcnt);
++	return (fuid_infop);
++}
++
++static void
++zfs_replay_swap_attrs(lr_attr_t *lrattr)
++{
++	/* swap the lr_attr structure */
++	byteswap_uint32_array(lrattr, sizeof (*lrattr));
++	/* swap the bitmap */
++	byteswap_uint32_array(lrattr + 1, (lrattr->lr_attr_masksize - 1) *
++	    sizeof (uint32_t));
++	/* swap the attributes, create time + 64 bit word for attributes */
++	byteswap_uint64_array((caddr_t)(lrattr + 1) + (sizeof (uint32_t) *
++	    (lrattr->lr_attr_masksize - 1)), 3 * sizeof (uint64_t));
++}
++
++/*
++ * Replay file create with optional ACL, xvattr information as well
++ * as option FUID information.
++ */
++static int
++zfs_replay_create_acl(zfs_sb_t *zsb, lr_acl_create_t *lracl, boolean_t byteswap)
++{
++	char *name = NULL;		/* location determined later */
++	lr_create_t *lr = (lr_create_t *)lracl;
++	znode_t *dzp;
++	struct inode *ip = NULL;
++	xvattr_t xva;
++	int vflg = 0;
++	vsecattr_t vsec = { 0 };
++	lr_attr_t *lrattr;
++	void *aclstart;
++	void *fuidstart;
++	size_t xvatlen = 0;
++	uint64_t txtype;
++	int error;
++
++	txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
++	if (byteswap) {
++		byteswap_uint64_array(lracl, sizeof (*lracl));
++		if (txtype == TX_CREATE_ACL_ATTR ||
++		    txtype == TX_MKDIR_ACL_ATTR) {
++			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
++			zfs_replay_swap_attrs(lrattr);
++			xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
++		}
++
++		aclstart = (caddr_t)(lracl + 1) + xvatlen;
++		zfs_ace_byteswap(aclstart, lracl->lr_acl_bytes, B_FALSE);
++		/* swap fuids */
++		if (lracl->lr_fuidcnt) {
++			byteswap_uint64_array((caddr_t)aclstart +
++			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes),
++			    lracl->lr_fuidcnt * sizeof (uint64_t));
++		}
++	}
++
++	if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
++		return (error);
++
++	xva_init(&xva);
++	zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
++	    lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
++
++	/*
++	 * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
++	 * eventually end up in zfs_mknode(), which assigns the object's
++	 * creation time and generation number.  The generic zfs_create()
++	 * doesn't have either concept, so we smuggle the values inside
++	 * the vattr's otherwise unused va_ctime and va_nblocks fields.
++	 */
++	ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
++	xva.xva_vattr.va_nblocks = lr->lr_gen;
++
++	error = dmu_object_info(zsb->z_os, lr->lr_foid, NULL);
++	if (error != ENOENT)
++		goto bail;
++
++	if (lr->lr_common.lrc_txtype & TX_CI)
++		vflg |= FIGNORECASE;
++	switch (txtype) {
++	case TX_CREATE_ACL:
++		aclstart = (caddr_t)(lracl + 1);
++		fuidstart = (caddr_t)aclstart +
++		    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
++		zsb->z_fuid_replay = zfs_replay_fuids(fuidstart,
++		    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
++		    lr->lr_uid, lr->lr_gid);
++		/*FALLTHROUGH*/
++	case TX_CREATE_ACL_ATTR:
++		if (name == NULL) {
++			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
++			xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
++			xva.xva_vattr.va_mask |= ATTR_XVATTR;
++			zfs_replay_xvattr(lrattr, &xva);
++		}
++		vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS;
++		vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen;
++		vsec.vsa_aclcnt = lracl->lr_aclcnt;
++		vsec.vsa_aclentsz = lracl->lr_acl_bytes;
++		vsec.vsa_aclflags = lracl->lr_acl_flags;
++		if (zsb->z_fuid_replay == NULL) {
++			fuidstart = (caddr_t)(lracl + 1) + xvatlen +
++			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
++			zsb->z_fuid_replay =
++			    zfs_replay_fuids(fuidstart,
++			    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
++			    lr->lr_uid, lr->lr_gid);
++		}
++
++		error = zfs_create(ZTOI(dzp), name, &xva.xva_vattr,
++		    0, 0, &ip, kcred, vflg, &vsec);
++		break;
++	case TX_MKDIR_ACL:
++		aclstart = (caddr_t)(lracl + 1);
++		fuidstart = (caddr_t)aclstart +
++		    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
++		zsb->z_fuid_replay = zfs_replay_fuids(fuidstart,
++		    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
++		    lr->lr_uid, lr->lr_gid);
++		/*FALLTHROUGH*/
++	case TX_MKDIR_ACL_ATTR:
++		if (name == NULL) {
++			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
++			xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
++			zfs_replay_xvattr(lrattr, &xva);
++		}
++		vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS;
++		vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen;
++		vsec.vsa_aclcnt = lracl->lr_aclcnt;
++		vsec.vsa_aclentsz = lracl->lr_acl_bytes;
++		vsec.vsa_aclflags = lracl->lr_acl_flags;
++		if (zsb->z_fuid_replay == NULL) {
++			fuidstart = (caddr_t)(lracl + 1) + xvatlen +
++			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
++			zsb->z_fuid_replay =
++			    zfs_replay_fuids(fuidstart,
++			    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
++			    lr->lr_uid, lr->lr_gid);
++		}
++		error = zfs_mkdir(ZTOI(dzp), name, &xva.xva_vattr,
++		    &ip, kcred, vflg, &vsec);
++		break;
++	default:
++		error = ENOTSUP;
++	}
++
++bail:
++	if (error == 0 && ip != NULL)
++		iput(ip);
++
++	iput(ZTOI(dzp));
++
++	if (zsb->z_fuid_replay)
++		zfs_fuid_info_free(zsb->z_fuid_replay);
++	zsb->z_fuid_replay = NULL;
++
++	return (error);
++}
++
++static int
++zfs_replay_create(zfs_sb_t *zsb, lr_create_t *lr, boolean_t byteswap)
++{
++	char *name = NULL;		/* location determined later */
++	char *link;			/* symlink content follows name */
++	znode_t *dzp;
++	struct inode *ip = NULL;
++	xvattr_t xva;
++	int vflg = 0;
++	size_t lrsize = sizeof (lr_create_t);
++	lr_attr_t *lrattr;
++	void *start;
++	size_t xvatlen;
++	uint64_t txtype;
++	int error;
++
++	txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
++	if (byteswap) {
++		byteswap_uint64_array(lr, sizeof (*lr));
++		if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR)
++			zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
++	}
++
++
++	if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
++		return (error);
++
++	xva_init(&xva);
++	zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
++	    lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
++
++	/*
++	 * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
++	 * eventually end up in zfs_mknode(), which assigns the object's
++	 * creation time and generation number.  The generic zfs_create()
++	 * doesn't have either concept, so we smuggle the values inside
++	 * the vattr's otherwise unused va_ctime and va_nblocks fields.
++	 */
++	ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
++	xva.xva_vattr.va_nblocks = lr->lr_gen;
++
++	error = dmu_object_info(zsb->z_os, lr->lr_foid, NULL);
++	if (error != ENOENT)
++		goto out;
++
++	if (lr->lr_common.lrc_txtype & TX_CI)
++		vflg |= FIGNORECASE;
++
++	/*
++	 * Symlinks don't have fuid info, and CIFS never creates
++	 * symlinks.
++	 *
++	 * The _ATTR versions will grab the fuid info in their subcases.
++	 */
++	if ((int)lr->lr_common.lrc_txtype != TX_SYMLINK &&
++	    (int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR &&
++	    (int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) {
++		start = (lr + 1);
++		zsb->z_fuid_replay =
++		    zfs_replay_fuid_domain(start, &start,
++		    lr->lr_uid, lr->lr_gid);
++	}
++
++	switch (txtype) {
++	case TX_CREATE_ATTR:
++		lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
++		xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
++		zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
++		start = (caddr_t)(lr + 1) + xvatlen;
++		zsb->z_fuid_replay =
++		    zfs_replay_fuid_domain(start, &start,
++		    lr->lr_uid, lr->lr_gid);
++		name = (char *)start;
++
++		/*FALLTHROUGH*/
++	case TX_CREATE:
++		if (name == NULL)
++			name = (char *)start;
++
++		error = zfs_create(ZTOI(dzp), name, &xva.xva_vattr,
++		    0, 0, &ip, kcred, vflg, NULL);
++		break;
++	case TX_MKDIR_ATTR:
++		lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
++		xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
++		zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
++		start = (caddr_t)(lr + 1) + xvatlen;
++		zsb->z_fuid_replay =
++		    zfs_replay_fuid_domain(start, &start,
++		    lr->lr_uid, lr->lr_gid);
++		name = (char *)start;
++
++		/*FALLTHROUGH*/
++	case TX_MKDIR:
++		if (name == NULL)
++			name = (char *)(lr + 1);
++
++		error = zfs_mkdir(ZTOI(dzp), name, &xva.xva_vattr,
++		    &ip, kcred, vflg, NULL);
++		break;
++	case TX_MKXATTR:
++		error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &ip, kcred);
++		break;
++	case TX_SYMLINK:
++		name = (char *)(lr + 1);
++		link = name + strlen(name) + 1;
++		error = zfs_symlink(ZTOI(dzp), name, &xva.xva_vattr,
++		    link, &ip, kcred, vflg);
++		break;
++	default:
++		error = ENOTSUP;
++	}
++
++out:
++	if (error == 0 && ip != NULL)
++		iput(ip);
++
++	iput(ZTOI(dzp));
++
++	if (zsb->z_fuid_replay)
++		zfs_fuid_info_free(zsb->z_fuid_replay);
++	zsb->z_fuid_replay = NULL;
++	return (error);
++}
++
++static int
++zfs_replay_remove(zfs_sb_t *zsb, lr_remove_t *lr, boolean_t byteswap)
++{
++	char *name = (char *)(lr + 1);	/* name follows lr_remove_t */
++	znode_t *dzp;
++	int error;
++	int vflg = 0;
++
++	if (byteswap)
++		byteswap_uint64_array(lr, sizeof (*lr));
++
++	if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
++		return (error);
++
++	if (lr->lr_common.lrc_txtype & TX_CI)
++		vflg |= FIGNORECASE;
++
++	switch ((int)lr->lr_common.lrc_txtype) {
++	case TX_REMOVE:
++		error = zfs_remove(ZTOI(dzp), name, kcred);
++		break;
++	case TX_RMDIR:
++		error = zfs_rmdir(ZTOI(dzp), name, NULL, kcred, vflg);
++		break;
++	default:
++		error = ENOTSUP;
++	}
++
++	iput(ZTOI(dzp));
++
++	return (error);
++}
++
++static int
++zfs_replay_link(zfs_sb_t *zsb, lr_link_t *lr, boolean_t byteswap)
++{
++	char *name = (char *)(lr + 1);	/* name follows lr_link_t */
++	znode_t *dzp, *zp;
++	int error;
++	int vflg = 0;
++
++	if (byteswap)
++		byteswap_uint64_array(lr, sizeof (*lr));
++
++	if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
++		return (error);
++
++	if ((error = zfs_zget(zsb, lr->lr_link_obj, &zp)) != 0) {
++		iput(ZTOI(dzp));
++		return (error);
++	}
++
++	if (lr->lr_common.lrc_txtype & TX_CI)
++		vflg |= FIGNORECASE;
++
++	error = zfs_link(ZTOI(dzp), ZTOI(zp), name, kcred);
++
++	iput(ZTOI(zp));
++	iput(ZTOI(dzp));
++
++	return (error);
++}
++
++static int
++zfs_replay_rename(zfs_sb_t *zsb, lr_rename_t *lr, boolean_t byteswap)
++{
++	char *sname = (char *)(lr + 1);	/* sname and tname follow lr_rename_t */
++	char *tname = sname + strlen(sname) + 1;
++	znode_t *sdzp, *tdzp;
++	int error;
++	int vflg = 0;
++
++	if (byteswap)
++		byteswap_uint64_array(lr, sizeof (*lr));
++
++	if ((error = zfs_zget(zsb, lr->lr_sdoid, &sdzp)) != 0)
++		return (error);
++
++	if ((error = zfs_zget(zsb, lr->lr_tdoid, &tdzp)) != 0) {
++		iput(ZTOI(sdzp));
++		return (error);
++	}
++
++	if (lr->lr_common.lrc_txtype & TX_CI)
++		vflg |= FIGNORECASE;
++
++	error = zfs_rename(ZTOI(sdzp), sname, ZTOI(tdzp), tname, kcred, vflg);
++
++	iput(ZTOI(tdzp));
++	iput(ZTOI(sdzp));
++
++	return (error);
++}
++
++static int
++zfs_replay_write(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
++{
++	char *data = (char *)(lr + 1);	/* data follows lr_write_t */
++	znode_t	*zp;
++	int error, written;
++	uint64_t eod, offset, length;
++
++	if (byteswap)
++		byteswap_uint64_array(lr, sizeof (*lr));
++
++	if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0) {
++		/*
++		 * As we can log writes out of order, it's possible the
++		 * file has been removed. In this case just drop the write
++		 * and return success.
++		 */
++		if (error == ENOENT)
++			error = 0;
++		return (error);
++	}
++
++	offset = lr->lr_offset;
++	length = lr->lr_length;
++	eod = offset + length;	/* end of data for this write */
++
++	/*
++	 * This may be a write from a dmu_sync() for a whole block,
++	 * and may extend beyond the current end of the file.
++	 * We can't just replay what was written for this TX_WRITE as
++	 * a future TX_WRITE2 may extend the eof and the data for that
++	 * write needs to be there. So we write the whole block and
++	 * reduce the eof. This needs to be done within the single dmu
++	 * transaction created within vn_rdwr -> zfs_write. So a possible
++	 * new end of file is passed through in zsb->z_replay_eof
++	 */
++
++	zsb->z_replay_eof = 0; /* 0 means don't change end of file */
++
++	/* If it's a dmu_sync() block, write the whole block */
++	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
++		uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
++		if (length < blocksize) {
++			offset -= offset % blocksize;
++			length = blocksize;
++		}
++		if (zp->z_size < eod)
++			zsb->z_replay_eof = eod;
++	}
++
++	written = zpl_write_common(ZTOI(zp), data, length, offset,
++	    UIO_SYSSPACE, 0, kcred);
++	if (written < 0)
++		error = -written;
++	else if (written < length)
++		error = EIO; /* short write */
++
++	iput(ZTOI(zp));
++	zsb->z_replay_eof = 0;	/* safety */
++
++	return (error);
++}
++
++/*
++ * TX_WRITE2 are only generated when dmu_sync() returns EALREADY
++ * meaning the pool block is already being synced. So now that we always write
++ * out full blocks, all we have to do is expand the eof if
++ * the file is grown.
++ */
++static int
++zfs_replay_write2(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
++{
++	znode_t	*zp;
++	int error;
++	uint64_t end;
++
++	if (byteswap)
++		byteswap_uint64_array(lr, sizeof (*lr));
++
++	if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
++		return (error);
++
++top:
++	end = lr->lr_offset + lr->lr_length;
++	if (end > zp->z_size) {
++		dmu_tx_t *tx = dmu_tx_create(zsb->z_os);
++
++		zp->z_size = end;
++		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
++		error = dmu_tx_assign(tx, TXG_WAIT);
++		if (error) {
++			iput(ZTOI(zp));
++			if (error == ERESTART) {
++				dmu_tx_wait(tx);
++				dmu_tx_abort(tx);
++				goto top;
++			}
++			dmu_tx_abort(tx);
++			return (error);
++		}
++		(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zsb),
++		    (void *)&zp->z_size, sizeof (uint64_t), tx);
++
++		/* Ensure the replayed seq is updated */
++		(void) zil_replaying(zsb->z_log, tx);
++
++		dmu_tx_commit(tx);
++	}
++
++	iput(ZTOI(zp));
++
++	return (error);
++}
++
++static int
++zfs_replay_truncate(zfs_sb_t *zsb, lr_truncate_t *lr, boolean_t byteswap)
++{
++	znode_t *zp;
++	flock64_t fl;
++	int error;
++
++	if (byteswap)
++		byteswap_uint64_array(lr, sizeof (*lr));
++
++	if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
++		return (error);
++
++	bzero(&fl, sizeof (fl));
++	fl.l_type = F_WRLCK;
++	fl.l_whence = 0;
++	fl.l_start = lr->lr_offset;
++	fl.l_len = lr->lr_length;
++
++	error = zfs_space(ZTOI(zp), F_FREESP, &fl, FWRITE | FOFFMAX,
++	    lr->lr_offset, kcred);
++
++	iput(ZTOI(zp));
++
++	return (error);
++}
++
++static int
++zfs_replay_setattr(zfs_sb_t *zsb, lr_setattr_t *lr, boolean_t byteswap)
++{
++	znode_t *zp;
++	xvattr_t xva;
++	vattr_t *vap = &xva.xva_vattr;
++	int error;
++	void *start;
++
++	xva_init(&xva);
++	if (byteswap) {
++		byteswap_uint64_array(lr, sizeof (*lr));
++
++		if ((lr->lr_mask & ATTR_XVATTR) &&
++		    zsb->z_version >= ZPL_VERSION_INITIAL)
++			zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
++	}
++
++	if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
++		return (error);
++
++	zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode,
++	    lr->lr_uid, lr->lr_gid, 0, lr->lr_foid);
++
++	vap->va_size = lr->lr_size;
++	ZFS_TIME_DECODE(&vap->va_atime, lr->lr_atime);
++	ZFS_TIME_DECODE(&vap->va_mtime, lr->lr_mtime);
++
++	/*
++	 * Fill in xvattr_t portions if necessary.
++	 */
++
++	start = (lr_setattr_t *)(lr + 1);
++	if (vap->va_mask & ATTR_XVATTR) {
++		zfs_replay_xvattr((lr_attr_t *)start, &xva);
++		start = (caddr_t)start +
++		    ZIL_XVAT_SIZE(((lr_attr_t *)start)->lr_attr_masksize);
++	} else
++		xva.xva_vattr.va_mask &= ~ATTR_XVATTR;
++
++	zsb->z_fuid_replay = zfs_replay_fuid_domain(start, &start,
++	    lr->lr_uid, lr->lr_gid);
++
++	error = zfs_setattr(ZTOI(zp), vap, 0, kcred);
++
++	zfs_fuid_info_free(zsb->z_fuid_replay);
++	zsb->z_fuid_replay = NULL;
++	iput(ZTOI(zp));
++
++	return (error);
++}
++
++static int
++zfs_replay_acl_v0(zfs_sb_t *zsb, lr_acl_v0_t *lr, boolean_t byteswap)
++{
++	ace_t *ace = (ace_t *)(lr + 1);	/* ace array follows lr_acl_t */
++	vsecattr_t vsa;
++	znode_t *zp;
++	int error;
++
++	if (byteswap) {
++		byteswap_uint64_array(lr, sizeof (*lr));
++		zfs_oldace_byteswap(ace, lr->lr_aclcnt);
++	}
++
++	if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
++		return (error);
++
++	bzero(&vsa, sizeof (vsa));
++	vsa.vsa_mask = VSA_ACE | VSA_ACECNT;
++	vsa.vsa_aclcnt = lr->lr_aclcnt;
++	vsa.vsa_aclentsz = sizeof (ace_t) * vsa.vsa_aclcnt;
++	vsa.vsa_aclflags = 0;
++	vsa.vsa_aclentp = ace;
++
++	error = zfs_setsecattr(ZTOI(zp), &vsa, 0, kcred);
++
++	iput(ZTOI(zp));
++
++	return (error);
++}
++
++/*
++ * Replaying ACLs is complicated by FUID support.
++ * The log record may contain some optional data
++ * to be used for replaying FUID's.  These pieces
++ * are the actual FUIDs that were created initially.
++ * The FUID table index may no longer be valid and
++ * during zfs_create() a new index may be assigned.
++ * Because of this the log will contain the original
++ * doman+rid in order to create a new FUID.
++ *
++ * The individual ACEs may contain an ephemeral uid/gid which is no
++ * longer valid and will need to be replaced with an actual FUID.
++ *
++ */
++static int
++zfs_replay_acl(zfs_sb_t *zsb, lr_acl_t *lr, boolean_t byteswap)
++{
++	ace_t *ace = (ace_t *)(lr + 1);
++	vsecattr_t vsa;
++	znode_t *zp;
++	int error;
++
++	if (byteswap) {
++		byteswap_uint64_array(lr, sizeof (*lr));
++		zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE);
++		if (lr->lr_fuidcnt) {
++			byteswap_uint64_array((caddr_t)ace +
++			    ZIL_ACE_LENGTH(lr->lr_acl_bytes),
++			    lr->lr_fuidcnt * sizeof (uint64_t));
++		}
++	}
++
++	if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
++		return (error);
++
++	bzero(&vsa, sizeof (vsa));
++	vsa.vsa_mask = VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS;
++	vsa.vsa_aclcnt = lr->lr_aclcnt;
++	vsa.vsa_aclentp = ace;
++	vsa.vsa_aclentsz = lr->lr_acl_bytes;
++	vsa.vsa_aclflags = lr->lr_acl_flags;
++
++	if (lr->lr_fuidcnt) {
++		void *fuidstart = (caddr_t)ace +
++		    ZIL_ACE_LENGTH(lr->lr_acl_bytes);
++
++		zsb->z_fuid_replay =
++		    zfs_replay_fuids(fuidstart, &fuidstart,
++		    lr->lr_fuidcnt, lr->lr_domcnt, 0, 0);
++	}
++
++	error = zfs_setsecattr(ZTOI(zp), &vsa, 0, kcred);
++
++	if (zsb->z_fuid_replay)
++		zfs_fuid_info_free(zsb->z_fuid_replay);
++
++	zsb->z_fuid_replay = NULL;
++	iput(ZTOI(zp));
++
++	return (error);
++}
++
++/*
++ * Callback vectors for replaying records
++ */
++zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = {
++	(zil_replay_func_t *)zfs_replay_error,		/* no such type */
++	(zil_replay_func_t *)zfs_replay_create,		/* TX_CREATE */
++	(zil_replay_func_t *)zfs_replay_create,		/* TX_MKDIR */
++	(zil_replay_func_t *)zfs_replay_create,		/* TX_MKXATTR */
++	(zil_replay_func_t *)zfs_replay_create,		/* TX_SYMLINK */
++	(zil_replay_func_t *)zfs_replay_remove,		/* TX_REMOVE */
++	(zil_replay_func_t *)zfs_replay_remove,		/* TX_RMDIR */
++	(zil_replay_func_t *)zfs_replay_link,		/* TX_LINK */
++	(zil_replay_func_t *)zfs_replay_rename,		/* TX_RENAME */
++	(zil_replay_func_t *)zfs_replay_write,		/* TX_WRITE */
++	(zil_replay_func_t *)zfs_replay_truncate,	/* TX_TRUNCATE */
++	(zil_replay_func_t *)zfs_replay_setattr,	/* TX_SETATTR */
++	(zil_replay_func_t *)zfs_replay_acl_v0,		/* TX_ACL_V0 */
++	(zil_replay_func_t *)zfs_replay_acl,		/* TX_ACL */
++	(zil_replay_func_t *)zfs_replay_create_acl,	/* TX_CREATE_ACL */
++	(zil_replay_func_t *)zfs_replay_create,		/* TX_CREATE_ATTR */
++	(zil_replay_func_t *)zfs_replay_create_acl,	/* TX_CREATE_ACL_ATTR */
++	(zil_replay_func_t *)zfs_replay_create_acl,	/* TX_MKDIR_ACL */
++	(zil_replay_func_t *)zfs_replay_create,		/* TX_MKDIR_ATTR */
++	(zil_replay_func_t *)zfs_replay_create_acl,	/* TX_MKDIR_ACL_ATTR */
++	(zil_replay_func_t *)zfs_replay_write2,		/* TX_WRITE2 */
++};
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_rlock.c linux-3.2.33-go/fs/zfs/zfs/zfs_rlock.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_rlock.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_rlock.c	2012-11-16 23:25:34.352039300 +0100
+@@ -0,0 +1,625 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++/*
++ * This file contains the code to implement file range locking in
++ * ZFS, although there isn't much specific to ZFS (all that comes to mind
++ * support for growing the blocksize).
++ *
++ * Interface
++ * ---------
++ * Defined in zfs_rlock.h but essentially:
++ *	rl = zfs_range_lock(zp, off, len, lock_type);
++ *	zfs_range_unlock(rl);
++ *	zfs_range_reduce(rl, off, len);
++ *
++ * AVL tree
++ * --------
++ * An AVL tree is used to maintain the state of the existing ranges
++ * that are locked for exclusive (writer) or shared (reader) use.
++ * The starting range offset is used for searching and sorting the tree.
++ *
++ * Common case
++ * -----------
++ * The (hopefully) usual case is of no overlaps or contention for
++ * locks. On entry to zfs_lock_range() a rl_t is allocated; the tree
++ * searched that finds no overlap, and *this* rl_t is placed in the tree.
++ *
++ * Overlaps/Reference counting/Proxy locks
++ * ---------------------------------------
++ * The avl code only allows one node at a particular offset. Also it's very
++ * inefficient to search through all previous entries looking for overlaps
++ * (because the very 1st in the ordered list might be at offset 0 but
++ * cover the whole file).
++ * So this implementation uses reference counts and proxy range locks.
++ * Firstly, only reader locks use reference counts and proxy locks,
++ * because writer locks are exclusive.
++ * When a reader lock overlaps with another then a proxy lock is created
++ * for that range and replaces the original lock. If the overlap
++ * is exact then the reference count of the proxy is simply incremented.
++ * Otherwise, the proxy lock is split into smaller lock ranges and
++ * new proxy locks created for non overlapping ranges.
++ * The reference counts are adjusted accordingly.
++ * Meanwhile, the orginal lock is kept around (this is the callers handle)
++ * and its offset and length are used when releasing the lock.
++ *
++ * Thread coordination
++ * -------------------
++ * In order to make wakeups efficient and to ensure multiple continuous
++ * readers on a range don't starve a writer for the same range lock,
++ * two condition variables are allocated in each rl_t.
++ * If a writer (or reader) can't get a range it initialises the writer
++ * (or reader) cv; sets a flag saying there's a writer (or reader) waiting;
++ * and waits on that cv. When a thread unlocks that range it wakes up all
++ * writers then all readers before destroying the lock.
++ *
++ * Append mode writes
++ * ------------------
++ * Append mode writes need to lock a range at the end of a file.
++ * The offset of the end of the file is determined under the
++ * range locking mutex, and the lock type converted from RL_APPEND to
++ * RL_WRITER and the range locked.
++ *
++ * Grow block handling
++ * -------------------
++ * ZFS supports multiple block sizes currently upto 128K. The smallest
++ * block size is used for the file which is grown as needed. During this
++ * growth all other writers and readers must be excluded.
++ * So if the block size needs to be grown then the whole file is
++ * exclusively locked, then later the caller will reduce the lock
++ * range to just the range to be written using zfs_reduce_range.
++ */
++
++#include <sys/zfs_rlock.h>
++
++/*
++ * Check if a write lock can be grabbed, or wait and recheck until available.
++ */
++static void
++zfs_range_lock_writer(znode_t *zp, rl_t *new)
++{
++	avl_tree_t *tree = &zp->z_range_avl;
++	rl_t *rl;
++	avl_index_t where;
++	uint64_t end_size;
++	uint64_t off = new->r_off;
++	uint64_t len = new->r_len;
++
++	for (;;) {
++		/*
++		 * Range locking is also used by zvol and uses a
++		 * dummied up znode. However, for zvol, we don't need to
++		 * append or grow blocksize, and besides we don't have
++		 * a "sa" data or zfs_sb_t - so skip that processing.
++		 *
++		 * Yes, this is ugly, and would be solved by not handling
++		 * grow or append in range lock code. If that was done then
++		 * we could make the range locking code generically available
++		 * to other non-zfs consumers.
++		 */
++		if (!zp->z_is_zvol) { /* caller is ZPL */
++			/*
++			 * If in append mode pick up the current end of file.
++			 * This is done under z_range_lock to avoid races.
++			 */
++			if (new->r_type == RL_APPEND)
++				new->r_off = zp->z_size;
++
++			/*
++			 * If we need to grow the block size then grab the whole
++			 * file range. This is also done under z_range_lock to
++			 * avoid races.
++			 */
++			end_size = MAX(zp->z_size, new->r_off + len);
++			if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
++			    zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {
++				new->r_off = 0;
++				new->r_len = UINT64_MAX;
++			}
++		}
++
++		/*
++		 * First check for the usual case of no locks
++		 */
++		if (avl_numnodes(tree) == 0) {
++			new->r_type = RL_WRITER; /* convert to writer */
++			avl_add(tree, new);
++			return;
++		}
++
++		/*
++		 * Look for any locks in the range.
++		 */
++		rl = avl_find(tree, new, &where);
++		if (rl)
++			goto wait; /* already locked at same offset */
++
++		rl = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
++		if (rl && (rl->r_off < new->r_off + new->r_len))
++			goto wait;
++
++		rl = (rl_t *)avl_nearest(tree, where, AVL_BEFORE);
++		if (rl && rl->r_off + rl->r_len > new->r_off)
++			goto wait;
++
++		new->r_type = RL_WRITER; /* convert possible RL_APPEND */
++		avl_insert(tree, new, where);
++		return;
++wait:
++		if (!rl->r_write_wanted) {
++			cv_init(&rl->r_wr_cv, NULL, CV_DEFAULT, NULL);
++			rl->r_write_wanted = B_TRUE;
++		}
++		cv_wait(&rl->r_wr_cv, &zp->z_range_lock);
++
++		/* reset to original */
++		new->r_off = off;
++		new->r_len = len;
++	}
++}
++
++/*
++ * If this is an original (non-proxy) lock then replace it by
++ * a proxy and return the proxy.
++ */
++static rl_t *
++zfs_range_proxify(avl_tree_t *tree, rl_t *rl)
++{
++	rl_t *proxy;
++
++	if (rl->r_proxy)
++		return (rl); /* already a proxy */
++
++	ASSERT3U(rl->r_cnt, ==, 1);
++	ASSERT(rl->r_write_wanted == B_FALSE);
++	ASSERT(rl->r_read_wanted == B_FALSE);
++	avl_remove(tree, rl);
++	rl->r_cnt = 0;
++
++	/* create a proxy range lock */
++	proxy = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE);
++	proxy->r_off = rl->r_off;
++	proxy->r_len = rl->r_len;
++	proxy->r_cnt = 1;
++	proxy->r_type = RL_READER;
++	proxy->r_proxy = B_TRUE;
++	proxy->r_write_wanted = B_FALSE;
++	proxy->r_read_wanted = B_FALSE;
++	avl_add(tree, proxy);
++
++	return (proxy);
++}
++
++/*
++ * Split the range lock at the supplied offset
++ * returning the *front* proxy.
++ */
++static rl_t *
++zfs_range_split(avl_tree_t *tree, rl_t *rl, uint64_t off)
++{
++	rl_t *front, *rear;
++
++	ASSERT3U(rl->r_len, >, 1);
++	ASSERT3U(off, >, rl->r_off);
++	ASSERT3U(off, <, rl->r_off + rl->r_len);
++	ASSERT(rl->r_write_wanted == B_FALSE);
++	ASSERT(rl->r_read_wanted == B_FALSE);
++
++	/* create the rear proxy range lock */
++	rear = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE);
++	rear->r_off = off;
++	rear->r_len = rl->r_off + rl->r_len - off;
++	rear->r_cnt = rl->r_cnt;
++	rear->r_type = RL_READER;
++	rear->r_proxy = B_TRUE;
++	rear->r_write_wanted = B_FALSE;
++	rear->r_read_wanted = B_FALSE;
++
++	front = zfs_range_proxify(tree, rl);
++	front->r_len = off - rl->r_off;
++
++	avl_insert_here(tree, rear, front, AVL_AFTER);
++	return (front);
++}
++
++/*
++ * Create and add a new proxy range lock for the supplied range.
++ */
++static void
++zfs_range_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len)
++{
++	rl_t *rl;
++
++	ASSERT(len);
++	rl = kmem_alloc(sizeof (rl_t), KM_SLEEP);
++	rl->r_off = off;
++	rl->r_len = len;
++	rl->r_cnt = 1;
++	rl->r_type = RL_READER;
++	rl->r_proxy = B_TRUE;
++	rl->r_write_wanted = B_FALSE;
++	rl->r_read_wanted = B_FALSE;
++	avl_add(tree, rl);
++}
++
++static void
++zfs_range_add_reader(avl_tree_t *tree, rl_t *new, rl_t *prev, avl_index_t where)
++{
++	rl_t *next;
++	uint64_t off = new->r_off;
++	uint64_t len = new->r_len;
++
++	/*
++	 * prev arrives either:
++	 * - pointing to an entry at the same offset
++	 * - pointing to the entry with the closest previous offset whose
++	 *   range may overlap with the new range
++	 * - null, if there were no ranges starting before the new one
++	 */
++	if (prev) {
++		if (prev->r_off + prev->r_len <= off) {
++			prev = NULL;
++		} else if (prev->r_off != off) {
++			/*
++			 * convert to proxy if needed then
++			 * split this entry and bump ref count
++			 */
++			prev = zfs_range_split(tree, prev, off);
++			prev = AVL_NEXT(tree, prev); /* move to rear range */
++		}
++	}
++	ASSERT((prev == NULL) || (prev->r_off == off));
++
++	if (prev)
++		next = prev;
++	else
++		next = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
++
++	if (next == NULL || off + len <= next->r_off) {
++		/* no overlaps, use the original new rl_t in the tree */
++		avl_insert(tree, new, where);
++		return;
++	}
++
++	if (off < next->r_off) {
++		/* Add a proxy for initial range before the overlap */
++		zfs_range_new_proxy(tree, off, next->r_off - off);
++	}
++
++	new->r_cnt = 0; /* will use proxies in tree */
++	/*
++	 * We now search forward through the ranges, until we go past the end
++	 * of the new range. For each entry we make it a proxy if it
++	 * isn't already, then bump its reference count. If there's any
++	 * gaps between the ranges then we create a new proxy range.
++	 */
++	for (prev = NULL; next; prev = next, next = AVL_NEXT(tree, next)) {
++		if (off + len <= next->r_off)
++			break;
++		if (prev && prev->r_off + prev->r_len < next->r_off) {
++			/* there's a gap */
++			ASSERT3U(next->r_off, >, prev->r_off + prev->r_len);
++			zfs_range_new_proxy(tree, prev->r_off + prev->r_len,
++			    next->r_off - (prev->r_off + prev->r_len));
++		}
++		if (off + len == next->r_off + next->r_len) {
++			/* exact overlap with end */
++			next = zfs_range_proxify(tree, next);
++			next->r_cnt++;
++			return;
++		}
++		if (off + len < next->r_off + next->r_len) {
++			/* new range ends in the middle of this block */
++			next = zfs_range_split(tree, next, off + len);
++			next->r_cnt++;
++			return;
++		}
++		ASSERT3U(off + len, >, next->r_off + next->r_len);
++		next = zfs_range_proxify(tree, next);
++		next->r_cnt++;
++	}
++
++	/* Add the remaining end range. */
++	zfs_range_new_proxy(tree, prev->r_off + prev->r_len,
++	    (off + len) - (prev->r_off + prev->r_len));
++}
++
++/*
++ * Check if a reader lock can be grabbed, or wait and recheck until available.
++ */
++static void
++zfs_range_lock_reader(znode_t *zp, rl_t *new)
++{
++	avl_tree_t *tree = &zp->z_range_avl;
++	rl_t *prev, *next;
++	avl_index_t where;
++	uint64_t off = new->r_off;
++	uint64_t len = new->r_len;
++
++	/*
++	 * Look for any writer locks in the range.
++	 */
++retry:
++	prev = avl_find(tree, new, &where);
++	if (prev == NULL)
++		prev = (rl_t *)avl_nearest(tree, where, AVL_BEFORE);
++
++	/*
++	 * Check the previous range for a writer lock overlap.
++	 */
++	if (prev && (off < prev->r_off + prev->r_len)) {
++		if ((prev->r_type == RL_WRITER) || (prev->r_write_wanted)) {
++			if (!prev->r_read_wanted) {
++				cv_init(&prev->r_rd_cv, NULL, CV_DEFAULT, NULL);
++				prev->r_read_wanted = B_TRUE;
++			}
++			cv_wait(&prev->r_rd_cv, &zp->z_range_lock);
++			goto retry;
++		}
++		if (off + len < prev->r_off + prev->r_len)
++			goto got_lock;
++	}
++
++	/*
++	 * Search through the following ranges to see if there's
++	 * write lock any overlap.
++	 */
++	if (prev)
++		next = AVL_NEXT(tree, prev);
++	else
++		next = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
++	for (; next; next = AVL_NEXT(tree, next)) {
++		if (off + len <= next->r_off)
++			goto got_lock;
++		if ((next->r_type == RL_WRITER) || (next->r_write_wanted)) {
++			if (!next->r_read_wanted) {
++				cv_init(&next->r_rd_cv, NULL, CV_DEFAULT, NULL);
++				next->r_read_wanted = B_TRUE;
++			}
++			cv_wait(&next->r_rd_cv, &zp->z_range_lock);
++			goto retry;
++		}
++		if (off + len <= next->r_off + next->r_len)
++			goto got_lock;
++	}
++
++got_lock:
++	/*
++	 * Add the read lock, which may involve splitting existing
++	 * locks and bumping ref counts (r_cnt).
++	 */
++	zfs_range_add_reader(tree, new, prev, where);
++}
++
++/*
++ * Lock a range (offset, length) as either shared (RL_READER)
++ * or exclusive (RL_WRITER). Returns the range lock structure
++ * for later unlocking or reduce range (if entire file
++ * previously locked as RL_WRITER).
++ */
++rl_t *
++zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
++{
++	rl_t *new;
++
++	ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND);
++
++	new = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE);
++	new->r_zp = zp;
++	new->r_off = off;
++	if (len + off < off)	/* overflow */
++		len = UINT64_MAX - off;
++	new->r_len = len;
++	new->r_cnt = 1; /* assume it's going to be in the tree */
++	new->r_type = type;
++	new->r_proxy = B_FALSE;
++	new->r_write_wanted = B_FALSE;
++	new->r_read_wanted = B_FALSE;
++
++	mutex_enter(&zp->z_range_lock);
++	if (type == RL_READER) {
++		/*
++		 * First check for the usual case of no locks
++		 */
++		if (avl_numnodes(&zp->z_range_avl) == 0)
++			avl_add(&zp->z_range_avl, new);
++		else
++			zfs_range_lock_reader(zp, new);
++	} else
++		zfs_range_lock_writer(zp, new); /* RL_WRITER or RL_APPEND */
++	mutex_exit(&zp->z_range_lock);
++	return (new);
++}
++
++static void
++zfs_range_free(void *arg)
++{
++	rl_t *rl = arg;
++
++	if (rl->r_write_wanted)
++		cv_destroy(&rl->r_wr_cv);
++
++	if (rl->r_read_wanted)
++		cv_destroy(&rl->r_rd_cv);
++
++	kmem_free(rl, sizeof (rl_t));
++}
++
++/*
++ * Unlock a reader lock
++ */
++static void
++zfs_range_unlock_reader(znode_t *zp, rl_t *remove, list_t *free_list)
++{
++	avl_tree_t *tree = &zp->z_range_avl;
++	rl_t *rl, *next = NULL;
++	uint64_t len;
++
++	/*
++	 * The common case is when the remove entry is in the tree
++	 * (cnt == 1) meaning there's been no other reader locks overlapping
++	 * with this one. Otherwise the remove entry will have been
++	 * removed from the tree and replaced by proxies (one or
++	 * more ranges mapping to the entire range).
++	 */
++	if (remove->r_cnt == 1) {
++		avl_remove(tree, remove);
++
++		if (remove->r_write_wanted)
++			cv_broadcast(&remove->r_wr_cv);
++
++		if (remove->r_read_wanted)
++			cv_broadcast(&remove->r_rd_cv);
++
++		list_insert_tail(free_list, remove);
++	} else {
++		ASSERT3U(remove->r_cnt, ==, 0);
++		ASSERT3U(remove->r_write_wanted, ==, 0);
++		ASSERT3U(remove->r_read_wanted, ==, 0);
++		/*
++		 * Find start proxy representing this reader lock,
++		 * then decrement ref count on all proxies
++		 * that make up this range, freeing them as needed.
++		 */
++		rl = avl_find(tree, remove, NULL);
++		ASSERT(rl);
++		ASSERT(rl->r_cnt);
++		ASSERT(rl->r_type == RL_READER);
++		for (len = remove->r_len; len != 0; rl = next) {
++			len -= rl->r_len;
++			if (len) {
++				next = AVL_NEXT(tree, rl);
++				ASSERT(next);
++				ASSERT(rl->r_off + rl->r_len == next->r_off);
++				ASSERT(next->r_cnt);
++				ASSERT(next->r_type == RL_READER);
++			}
++			rl->r_cnt--;
++			if (rl->r_cnt == 0) {
++				avl_remove(tree, rl);
++
++				if (rl->r_write_wanted)
++					cv_broadcast(&rl->r_wr_cv);
++
++				if (rl->r_read_wanted)
++					cv_broadcast(&rl->r_rd_cv);
++
++				list_insert_tail(free_list, rl);
++			}
++		}
++
++		kmem_free(remove, sizeof (rl_t));
++	}
++}
++
++/*
++ * Unlock range and destroy range lock structure.
++ */
++void
++zfs_range_unlock(rl_t *rl)
++{
++	znode_t *zp = rl->r_zp;
++	list_t free_list;
++	rl_t *free_rl;
++
++	ASSERT(rl->r_type == RL_WRITER || rl->r_type == RL_READER);
++	ASSERT(rl->r_cnt == 1 || rl->r_cnt == 0);
++	ASSERT(!rl->r_proxy);
++	list_create(&free_list, sizeof(rl_t), offsetof(rl_t, rl_node));
++
++	mutex_enter(&zp->z_range_lock);
++	if (rl->r_type == RL_WRITER) {
++		/* writer locks can't be shared or split */
++		avl_remove(&zp->z_range_avl, rl);
++		if (rl->r_write_wanted)
++			cv_broadcast(&rl->r_wr_cv);
++
++		if (rl->r_read_wanted)
++			cv_broadcast(&rl->r_rd_cv);
++
++		list_insert_tail(&free_list, rl);
++	} else {
++		/*
++		 * lock may be shared, let zfs_range_unlock_reader()
++		 * release the zp->z_range_lock lock and free the rl_t
++		 */
++		zfs_range_unlock_reader(zp, rl, &free_list);
++	}
++	mutex_exit(&zp->z_range_lock);
++
++	while ((free_rl = list_head(&free_list)) != NULL) {
++		list_remove(&free_list, free_rl);
++		zfs_range_free(free_rl);
++	}
++
++	list_destroy(&free_list);
++}
++
++/*
++ * Reduce range locked as RL_WRITER from whole file to specified range.
++ * Asserts the whole file is exclusivly locked and so there's only one
++ * entry in the tree.
++ */
++void
++zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len)
++{
++	znode_t *zp = rl->r_zp;
++
++	/* Ensure there are no other locks */
++	ASSERT(avl_numnodes(&zp->z_range_avl) == 1);
++	ASSERT(rl->r_off == 0);
++	ASSERT(rl->r_type == RL_WRITER);
++	ASSERT(!rl->r_proxy);
++	ASSERT3U(rl->r_len, ==, UINT64_MAX);
++	ASSERT3U(rl->r_cnt, ==, 1);
++
++	mutex_enter(&zp->z_range_lock);
++	rl->r_off = off;
++	rl->r_len = len;
++
++	if (rl->r_write_wanted)
++		cv_broadcast(&rl->r_wr_cv);
++	if (rl->r_read_wanted)
++		cv_broadcast(&rl->r_rd_cv);
++
++	mutex_exit(&zp->z_range_lock);
++}
++
++/*
++ * AVL comparison function used to order range locks
++ * Locks are ordered on the start offset of the range.
++ */
++int
++zfs_range_compare(const void *arg1, const void *arg2)
++{
++	const rl_t *rl1 = arg1;
++	const rl_t *rl2 = arg2;
++
++	if (rl1->r_off > rl2->r_off)
++		return (1);
++	if (rl1->r_off < rl2->r_off)
++		return (-1);
++	return (0);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_sa.c linux-3.2.33-go/fs/zfs/zfs/zfs_sa.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_sa.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_sa.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,425 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/types.h>
++#include <sys/param.h>
++#include <sys/vnode.h>
++#include <sys/sa.h>
++#include <sys/zfs_acl.h>
++#include <sys/zfs_sa.h>
++
++/*
++ * ZPL attribute registration table.
++ * Order of attributes doesn't matter
++ * a unique value will be assigned for each
++ * attribute that is file system specific
++ *
++ * This is just the set of ZPL attributes that this
++ * version of ZFS deals with natively.  The file system
++ * could have other attributes stored in files, but they will be
++ * ignored.  The SA framework will preserve them, just that
++ * this version of ZFS won't change or delete them.
++ */
++
++sa_attr_reg_t zfs_attr_table[ZPL_END+1] = {
++	{"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0},
++	{"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1},
++	{"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2},
++	{"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3},
++	{"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4},
++	{"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5},
++	{"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6},
++	{"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7},
++	{"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8},
++	{"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9},
++	{"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10},
++	{"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11},
++	{"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12},
++	{"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13},
++	{"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14},
++	{"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15},
++	{"ZPL_DACL_COUNT", sizeof (uint64_t), SA_UINT64_ARRAY, 0},
++	{"ZPL_SYMLINK", 0, SA_UINT8_ARRAY, 0},
++	{"ZPL_SCANSTAMP", 32, SA_UINT8_ARRAY, 0},
++	{"ZPL_DACL_ACES", 0, SA_ACL, 0},
++	{"ZPL_DXATTR", 0, SA_UINT8_ARRAY, 0},
++	{NULL, 0, 0, 0}
++};
++
++#ifdef _KERNEL
++int
++zfs_sa_readlink(znode_t *zp, uio_t *uio)
++{
++	dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
++	size_t bufsz;
++	int error;
++
++	bufsz = zp->z_size;
++	if (bufsz + ZFS_OLD_ZNODE_PHYS_SIZE <= db->db_size) {
++		error = uiomove((caddr_t)db->db_data +
++		    ZFS_OLD_ZNODE_PHYS_SIZE,
++		    MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
++	} else {
++		dmu_buf_t *dbp;
++		if ((error = dmu_buf_hold(ZTOZSB(zp)->z_os, zp->z_id,
++		    0, FTAG, &dbp, DMU_READ_NO_PREFETCH)) == 0) {
++			error = uiomove(dbp->db_data,
++			    MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
++			dmu_buf_rele(dbp, FTAG);
++		}
++	}
++	return (error);
++}
++
++void
++zfs_sa_symlink(znode_t *zp, char *link, int len, dmu_tx_t *tx)
++{
++	dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
++
++	if (ZFS_OLD_ZNODE_PHYS_SIZE + len <= dmu_bonus_max()) {
++		VERIFY(dmu_set_bonus(db,
++		    len + ZFS_OLD_ZNODE_PHYS_SIZE, tx) == 0);
++		if (len) {
++			bcopy(link, (caddr_t)db->db_data +
++			    ZFS_OLD_ZNODE_PHYS_SIZE, len);
++		}
++	} else {
++		dmu_buf_t *dbp;
++
++		zfs_grow_blocksize(zp, len, tx);
++		VERIFY(0 == dmu_buf_hold(ZTOZSB(zp)->z_os,
++		    zp->z_id, 0, FTAG, &dbp, DMU_READ_NO_PREFETCH));
++
++		dmu_buf_will_dirty(dbp, tx);
++
++		ASSERT3U(len, <=, dbp->db_size);
++		bcopy(link, dbp->db_data, len);
++		dmu_buf_rele(dbp, FTAG);
++	}
++}
++
++void
++zfs_sa_get_scanstamp(znode_t *zp, xvattr_t *xvap)
++{
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	xoptattr_t *xoap;
++
++	ASSERT(MUTEX_HELD(&zp->z_lock));
++	VERIFY((xoap = xva_getxoptattr(xvap)) != NULL);
++	if (zp->z_is_sa) {
++		if (sa_lookup(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zsb),
++		    &xoap->xoa_av_scanstamp,
++		    sizeof (xoap->xoa_av_scanstamp)) != 0)
++			return;
++	} else {
++		dmu_object_info_t doi;
++		dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
++		int len;
++
++		if (!(zp->z_pflags & ZFS_BONUS_SCANSTAMP))
++			return;
++
++		sa_object_info(zp->z_sa_hdl, &doi);
++		len = sizeof (xoap->xoa_av_scanstamp) +
++		    ZFS_OLD_ZNODE_PHYS_SIZE;
++
++		if (len <= doi.doi_bonus_size) {
++			(void) memcpy(xoap->xoa_av_scanstamp,
++			    (caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
++			    sizeof (xoap->xoa_av_scanstamp));
++		}
++	}
++	XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
++}
++
++void
++zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
++{
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	xoptattr_t *xoap;
++
++	ASSERT(MUTEX_HELD(&zp->z_lock));
++	VERIFY((xoap = xva_getxoptattr(xvap)) != NULL);
++	if (zp->z_is_sa)
++		VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zsb),
++		    &xoap->xoa_av_scanstamp,
++		    sizeof (xoap->xoa_av_scanstamp), tx));
++	else {
++		dmu_object_info_t doi;
++		dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
++		int len;
++
++		sa_object_info(zp->z_sa_hdl, &doi);
++		len = sizeof (xoap->xoa_av_scanstamp) +
++		    ZFS_OLD_ZNODE_PHYS_SIZE;
++		if (len > doi.doi_bonus_size)
++			VERIFY(dmu_set_bonus(db, len, tx) == 0);
++		(void) memcpy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
++		    xoap->xoa_av_scanstamp, sizeof (xoap->xoa_av_scanstamp));
++
++		zp->z_pflags |= ZFS_BONUS_SCANSTAMP;
++		VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zsb),
++		    &zp->z_pflags, sizeof (uint64_t), tx));
++	}
++}
++
++int
++zfs_sa_get_xattr(znode_t *zp)
++{
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	char *obj;
++	int size;
++	int error;
++
++	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
++	ASSERT(!zp->z_xattr_cached);
++	ASSERT(zp->z_is_sa);
++
++	error = sa_size(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), &size);
++	if (error) {
++		if (error == ENOENT)
++			return nvlist_alloc(&zp->z_xattr_cached,
++			    NV_UNIQUE_NAME, KM_SLEEP);
++		else
++			return (error);
++	}
++
++	obj = sa_spill_alloc(KM_SLEEP);
++
++	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), obj, size);
++	if (error == 0)
++		error = nvlist_unpack(obj, size, &zp->z_xattr_cached, KM_SLEEP);
++
++	sa_spill_free(obj);
++
++	return (error);
++}
++
++int
++zfs_sa_set_xattr(znode_t *zp)
++{
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	dmu_tx_t *tx;
++	char *obj;
++	size_t size;
++	int error;
++
++	ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
++	ASSERT(zp->z_xattr_cached);
++	ASSERT(zp->z_is_sa);
++
++	error = nvlist_size(zp->z_xattr_cached, &size, NV_ENCODE_XDR);
++	if (error)
++		goto out;
++
++	obj = sa_spill_alloc(KM_SLEEP);
++
++	error = nvlist_pack(zp->z_xattr_cached, &obj, &size,
++	    NV_ENCODE_XDR, KM_SLEEP);
++	if (error)
++		goto out_free;
++
++	tx = dmu_tx_create(zsb->z_os);
++	dmu_tx_hold_sa_create(tx, size);
++	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
++
++	error = dmu_tx_assign(tx, TXG_WAIT);
++	if (error) {
++		dmu_tx_abort(tx);
++	} else {
++		error = sa_update(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb),
++		    obj, size, tx);
++		if (error)
++			dmu_tx_abort(tx);
++		else
++			dmu_tx_commit(tx);
++	}
++out_free:
++	sa_spill_free(obj);
++out:
++	return (error);
++}
++
++/*
++ * I'm not convinced we should do any of this upgrade.
++ * since the SA code can read both old/new znode formats
++ * with probably little to know performance difference.
++ *
++ * All new files will be created with the new format.
++ */
++
++void
++zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
++{
++	dmu_buf_t *db = sa_get_db(hdl);
++	znode_t *zp = sa_get_userdata(hdl);
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	int count = 0;
++	sa_bulk_attr_t *bulk, *sa_attrs;
++	zfs_acl_locator_cb_t locate = { 0 };
++	uint64_t uid, gid, mode, rdev, xattr, parent;
++	uint64_t crtime[2], mtime[2], ctime[2];
++	zfs_acl_phys_t znode_acl;
++	char scanstamp[AV_SCANSTAMP_SZ];
++	boolean_t drop_lock = B_FALSE;
++
++	/*
++	 * No upgrade if ACL isn't cached
++	 * since we won't know which locks are held
++	 * and ready the ACL would require special "locked"
++	 * interfaces that would be messy
++	 */
++	if (zp->z_acl_cached == NULL || S_ISLNK(ZTOI(zp)->i_mode))
++		return;
++
++	/*
++	 * If the z_lock is held and we aren't the owner
++	 * the just return since we don't want to deadlock
++	 * trying to update the status of z_is_sa.  This
++	 * file can then be upgraded at a later time.
++	 *
++	 * Otherwise, we know we are doing the
++	 * sa_update() that caused us to enter this function.
++	 */
++	if (mutex_owner(&zp->z_lock) != curthread) {
++		if (mutex_tryenter(&zp->z_lock) == 0)
++			return;
++		else
++			drop_lock = B_TRUE;
++	}
++
++	/* First do a bulk query of the attributes that aren't cached */
++	bulk = kmem_alloc(sizeof(sa_bulk_attr_t) * 20, KM_SLEEP);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, 8);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL, &parent, 8);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zsb), NULL, &xattr, 8);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zsb), NULL, &rdev, 8);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &uid, 8);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &gid, 8);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zsb), NULL,
++	    &znode_acl, 88);
++
++	if (sa_bulk_lookup_locked(hdl, bulk, count) != 0) {
++		kmem_free(bulk, sizeof(sa_bulk_attr_t) * 20);
++		goto done;
++	}
++
++	/*
++	 * While the order here doesn't matter its best to try and organize
++	 * it is such a way to pick up an already existing layout number
++	 */
++	count = 0;
++	sa_attrs = kmem_zalloc(sizeof(sa_bulk_attr_t) * 20, KM_SLEEP);
++	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zsb), NULL, &mode, 8);
++	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zsb), NULL,
++	    &zp->z_size, 8);
++	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zsb),
++	    NULL, &zp->z_gen, 8);
++	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zsb), NULL, &uid, 8);
++	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zsb), NULL, &gid, 8);
++	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zsb),
++	    NULL, &parent, 8);
++	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zsb), NULL,
++	    &zp->z_pflags, 8);
++	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zsb), NULL,
++	    zp->z_atime, 16);
++	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zsb), NULL,
++	    &mtime, 16);
++	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zsb), NULL,
++	    &ctime, 16);
++	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zsb), NULL,
++	    &crtime, 16);
++	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zsb), NULL,
++	    &zp->z_links, 8);
++	if (S_ISBLK(ZTOI(zp)->i_mode) || S_ISCHR(ZTOI(zp)->i_mode))
++		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zsb), NULL,
++		    &rdev, 8);
++	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zsb), NULL,
++	    &zp->z_acl_cached->z_acl_count, 8);
++
++	if (zp->z_acl_cached->z_version < ZFS_ACL_VERSION_FUID)
++		zfs_acl_xform(zp, zp->z_acl_cached, CRED());
++
++	locate.cb_aclp = zp->z_acl_cached;
++	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zsb),
++	    zfs_acl_data_locator, &locate, zp->z_acl_cached->z_acl_bytes);
++
++	if (xattr)
++		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zsb),
++		    NULL, &xattr, 8);
++
++	/* if scanstamp then add scanstamp */
++
++	if (zp->z_pflags & ZFS_BONUS_SCANSTAMP) {
++		bcopy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
++		    scanstamp, AV_SCANSTAMP_SZ);
++		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zsb),
++		    NULL, scanstamp, AV_SCANSTAMP_SZ);
++		zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP;
++	}
++
++	VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0);
++	VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs,
++	    count, tx) == 0);
++	if (znode_acl.z_acl_extern_obj)
++		VERIFY(0 == dmu_object_free(zsb->z_os,
++		    znode_acl.z_acl_extern_obj, tx));
++
++	zp->z_is_sa = B_TRUE;
++	kmem_free(sa_attrs, sizeof(sa_bulk_attr_t) * 20);
++	kmem_free(bulk, sizeof(sa_bulk_attr_t) * 20);
++done:
++	if (drop_lock)
++		mutex_exit(&zp->z_lock);
++}
++
++void
++zfs_sa_upgrade_txholds(dmu_tx_t *tx, znode_t *zp)
++{
++	if (!ZTOZSB(zp)->z_use_sa || zp->z_is_sa)
++		return;
++
++
++	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
++
++	if (zfs_external_acl(zp)) {
++		dmu_tx_hold_free(tx, zfs_external_acl(zp), 0,
++		    DMU_OBJECT_END);
++	}
++}
++
++EXPORT_SYMBOL(zfs_attr_table);
++EXPORT_SYMBOL(zfs_sa_readlink);
++EXPORT_SYMBOL(zfs_sa_symlink);
++EXPORT_SYMBOL(zfs_sa_get_scanstamp);
++EXPORT_SYMBOL(zfs_sa_set_scanstamp);
++EXPORT_SYMBOL(zfs_sa_get_xattr);
++EXPORT_SYMBOL(zfs_sa_set_xattr);
++EXPORT_SYMBOL(zfs_sa_upgrade);
++EXPORT_SYMBOL(zfs_sa_upgrade_txholds);
++
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_vfsops.c linux-3.2.33-go/fs/zfs/zfs/zfs_vfsops.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_vfsops.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_vfsops.c	2012-11-16 23:25:34.350039322 +0100
+@@ -0,0 +1,1593 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/* Portions Copyright 2010 Robert Milkowski */
++
++#include <sys/types.h>
++#include <sys/param.h>
++#include <sys/systm.h>
++#include <sys/sysmacros.h>
++#include <sys/kmem.h>
++#include <sys/pathname.h>
++#include <sys/vnode.h>
++#include <sys/vfs.h>
++#include <sys/vfs_opreg.h>
++#include <sys/mntent.h>
++#include <sys/mount.h>
++#include <sys/cmn_err.h>
++#include "fs/fs_subr.h"
++#include <sys/zfs_znode.h>
++#include <sys/zfs_vnops.h>
++#include <sys/zfs_dir.h>
++#include <sys/zil.h>
++#include <sys/fs/zfs.h>
++#include <sys/dmu.h>
++#include <sys/dsl_prop.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_deleg.h>
++#include <sys/spa.h>
++#include <sys/zap.h>
++#include <sys/sa.h>
++#include <sys/varargs.h>
++#include <sys/policy.h>
++#include <sys/atomic.h>
++#include <sys/mkdev.h>
++#include <sys/modctl.h>
++#include <sys/refstr.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/zfs_ctldir.h>
++#include <sys/zfs_fuid.h>
++#include <sys/bootconf.h>
++#include <sys/sunddi.h>
++#include <sys/dnlc.h>
++#include <sys/dmu_objset.h>
++#include <sys/spa_boot.h>
++#include <sys/sa.h>
++#include <sys/zpl.h>
++#include "zfs_comutil.h"
++
++
++/*ARGSUSED*/
++int
++zfs_sync(struct super_block *sb, int wait, cred_t *cr)
++{
++	zfs_sb_t *zsb = sb->s_fs_info;
++
++	/*
++	 * Data integrity is job one.  We don't want a compromised kernel
++	 * writing to the storage pool, so we never sync during panic.
++	 */
++	if (unlikely(oops_in_progress))
++		return (0);
++
++	/*
++	 * Semantically, the only requirement is that the sync be initiated.
++	 * The DMU syncs out txgs frequently, so there's nothing to do.
++	 */
++	if (!wait)
++		return (0);
++
++	if (zsb != NULL) {
++		/*
++		 * Sync a specific filesystem.
++		 */
++		dsl_pool_t *dp;
++
++		ZFS_ENTER(zsb);
++		dp = dmu_objset_pool(zsb->z_os);
++
++		/*
++		 * If the system is shutting down, then skip any
++		 * filesystems which may exist on a suspended pool.
++		 */
++		if (spa_suspended(dp->dp_spa)) {
++			ZFS_EXIT(zsb);
++			return (0);
++		}
++
++		if (zsb->z_log != NULL)
++			zil_commit(zsb->z_log, 0);
++
++		ZFS_EXIT(zsb);
++	} else {
++		/*
++		 * Sync all ZFS filesystems.  This is what happens when you
++		 * run sync(1M).  Unlike other filesystems, ZFS honors the
++		 * request by waiting for all pools to commit all dirty data.
++		 */
++		spa_sync_allpools();
++	}
++
++	return (0);
++}
++EXPORT_SYMBOL(zfs_sync);
++
++boolean_t
++zfs_is_readonly(zfs_sb_t *zsb)
++{
++	return (!!(zsb->z_sb->s_flags & MS_RDONLY));
++}
++EXPORT_SYMBOL(zfs_is_readonly);
++
++static void
++atime_changed_cb(void *arg, uint64_t newval)
++{
++	((zfs_sb_t *)arg)->z_atime = newval;
++}
++
++static void
++xattr_changed_cb(void *arg, uint64_t newval)
++{
++	zfs_sb_t *zsb = arg;
++
++	if (newval == ZFS_XATTR_OFF) {
++		zsb->z_flags &= ~ZSB_XATTR;
++	} else {
++		zsb->z_flags |= ZSB_XATTR;
++
++		if (newval == ZFS_XATTR_SA)
++			zsb->z_xattr_sa = B_TRUE;
++		else
++			zsb->z_xattr_sa = B_FALSE;
++	}
++}
++
++static void
++blksz_changed_cb(void *arg, uint64_t newval)
++{
++	zfs_sb_t *zsb = arg;
++
++	if (newval < SPA_MINBLOCKSIZE ||
++	    newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
++		newval = SPA_MAXBLOCKSIZE;
++
++	zsb->z_max_blksz = newval;
++}
++
++static void
++readonly_changed_cb(void *arg, uint64_t newval)
++{
++	zfs_sb_t *zsb = arg;
++	struct super_block *sb = zsb->z_sb;
++
++	if (sb == NULL)
++		return;
++
++	if (newval)
++		sb->s_flags |= MS_RDONLY;
++	else
++		sb->s_flags &= ~MS_RDONLY;
++}
++
++static void
++devices_changed_cb(void *arg, uint64_t newval)
++{
++}
++
++static void
++setuid_changed_cb(void *arg, uint64_t newval)
++{
++}
++
++static void
++exec_changed_cb(void *arg, uint64_t newval)
++{
++}
++
++static void
++nbmand_changed_cb(void *arg, uint64_t newval)
++{
++	zfs_sb_t *zsb = arg;
++	struct super_block *sb = zsb->z_sb;
++
++	if (sb == NULL)
++		return;
++
++	if (newval == TRUE)
++		sb->s_flags |= MS_MANDLOCK;
++	else
++		sb->s_flags &= ~MS_MANDLOCK;
++}
++
++static void
++snapdir_changed_cb(void *arg, uint64_t newval)
++{
++	((zfs_sb_t *)arg)->z_show_ctldir = newval;
++}
++
++static void
++vscan_changed_cb(void *arg, uint64_t newval)
++{
++	((zfs_sb_t *)arg)->z_vscan = newval;
++}
++
++static void
++acl_inherit_changed_cb(void *arg, uint64_t newval)
++{
++	((zfs_sb_t *)arg)->z_acl_inherit = newval;
++}
++
++int
++zfs_register_callbacks(zfs_sb_t *zsb)
++{
++	struct dsl_dataset *ds = NULL;
++	objset_t *os = zsb->z_os;
++	int error = 0;
++
++	if (zfs_is_readonly(zsb) || !spa_writeable(dmu_objset_spa(os)))
++		readonly_changed_cb(zsb, B_TRUE);
++
++	/*
++	 * Register property callbacks.
++	 *
++	 * It would probably be fine to just check for i/o error from
++	 * the first prop_register(), but I guess I like to go
++	 * overboard...
++	 */
++	ds = dmu_objset_ds(os);
++	error = dsl_prop_register(ds,
++	    "atime", atime_changed_cb, zsb);
++	error = error ? error : dsl_prop_register(ds,
++	    "xattr", xattr_changed_cb, zsb);
++	error = error ? error : dsl_prop_register(ds,
++	    "recordsize", blksz_changed_cb, zsb);
++	error = error ? error : dsl_prop_register(ds,
++	    "readonly", readonly_changed_cb, zsb);
++	error = error ? error : dsl_prop_register(ds,
++	    "devices", devices_changed_cb, zsb);
++	error = error ? error : dsl_prop_register(ds,
++	    "setuid", setuid_changed_cb, zsb);
++	error = error ? error : dsl_prop_register(ds,
++	    "exec", exec_changed_cb, zsb);
++	error = error ? error : dsl_prop_register(ds,
++	    "snapdir", snapdir_changed_cb, zsb);
++	error = error ? error : dsl_prop_register(ds,
++	    "aclinherit", acl_inherit_changed_cb, zsb);
++	error = error ? error : dsl_prop_register(ds,
++	    "vscan", vscan_changed_cb, zsb);
++	error = error ? error : dsl_prop_register(ds,
++	    "nbmand", nbmand_changed_cb, zsb);
++	if (error)
++		goto unregister;
++
++	return (0);
++
++unregister:
++	/*
++	 * We may attempt to unregister some callbacks that are not
++	 * registered, but this is OK; it will simply return ENOMSG,
++	 * which we will ignore.
++	 */
++	(void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zsb);
++	(void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zsb);
++	(void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zsb);
++	(void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zsb);
++	(void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zsb);
++	(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zsb);
++	(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zsb);
++	(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zsb);
++	(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
++	    zsb);
++	(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zsb);
++	(void) dsl_prop_unregister(ds, "nbmand", nbmand_changed_cb, zsb);
++
++	return (error);
++}
++EXPORT_SYMBOL(zfs_register_callbacks);
++
++static int
++zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
++    uint64_t *userp, uint64_t *groupp)
++{
++	znode_phys_t *znp = data;
++	int error = 0;
++
++	/*
++	 * Is it a valid type of object to track?
++	 */
++	if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
++		return (ENOENT);
++
++	/*
++	 * If we have a NULL data pointer
++	 * then assume the id's aren't changing and
++	 * return EEXIST to the dmu to let it know to
++	 * use the same ids
++	 */
++	if (data == NULL)
++		return (EEXIST);
++
++	if (bonustype == DMU_OT_ZNODE) {
++		*userp = znp->zp_uid;
++		*groupp = znp->zp_gid;
++	} else {
++		int hdrsize;
++
++		ASSERT(bonustype == DMU_OT_SA);
++		hdrsize = sa_hdrsize(data);
++
++		if (hdrsize != 0) {
++			*userp = *((uint64_t *)((uintptr_t)data + hdrsize +
++			    SA_UID_OFFSET));
++			*groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
++			    SA_GID_OFFSET));
++		} else {
++			/*
++			 * This should only happen for newly created
++			 * files that haven't had the znode data filled
++			 * in yet.
++			 */
++			*userp = 0;
++			*groupp = 0;
++		}
++	}
++	return (error);
++}
++
++static void
++fuidstr_to_sid(zfs_sb_t *zsb, const char *fuidstr,
++    char *domainbuf, int buflen, uid_t *ridp)
++{
++	uint64_t fuid;
++	const char *domain;
++
++	fuid = strtonum(fuidstr, NULL);
++
++	domain = zfs_fuid_find_by_idx(zsb, FUID_INDEX(fuid));
++	if (domain)
++		(void) strlcpy(domainbuf, domain, buflen);
++	else
++		domainbuf[0] = '\0';
++	*ridp = FUID_RID(fuid);
++}
++
++static uint64_t
++zfs_userquota_prop_to_obj(zfs_sb_t *zsb, zfs_userquota_prop_t type)
++{
++	switch (type) {
++	case ZFS_PROP_USERUSED:
++		return (DMU_USERUSED_OBJECT);
++	case ZFS_PROP_GROUPUSED:
++		return (DMU_GROUPUSED_OBJECT);
++	case ZFS_PROP_USERQUOTA:
++		return (zsb->z_userquota_obj);
++	case ZFS_PROP_GROUPQUOTA:
++		return (zsb->z_groupquota_obj);
++	default:
++		return (ENOTSUP);
++	}
++	return (0);
++}
++
++int
++zfs_userspace_many(zfs_sb_t *zsb, zfs_userquota_prop_t type,
++    uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
++{
++	int error;
++	zap_cursor_t zc;
++	zap_attribute_t za;
++	zfs_useracct_t *buf = vbuf;
++	uint64_t obj;
++
++	if (!dmu_objset_userspace_present(zsb->z_os))
++		return (ENOTSUP);
++
++	obj = zfs_userquota_prop_to_obj(zsb, type);
++	if (obj == 0) {
++		*bufsizep = 0;
++		return (0);
++	}
++
++	for (zap_cursor_init_serialized(&zc, zsb->z_os, obj, *cookiep);
++	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
++	    zap_cursor_advance(&zc)) {
++		if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
++		    *bufsizep)
++			break;
++
++		fuidstr_to_sid(zsb, za.za_name,
++		    buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
++
++		buf->zu_space = za.za_first_integer;
++		buf++;
++	}
++	if (error == ENOENT)
++		error = 0;
++
++	ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
++	*bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
++	*cookiep = zap_cursor_serialize(&zc);
++	zap_cursor_fini(&zc);
++	return (error);
++}
++EXPORT_SYMBOL(zfs_userspace_many);
++
++/*
++ * buf must be big enough (eg, 32 bytes)
++ */
++static int
++id_to_fuidstr(zfs_sb_t *zsb, const char *domain, uid_t rid,
++    char *buf, boolean_t addok)
++{
++	uint64_t fuid;
++	int domainid = 0;
++
++	if (domain && domain[0]) {
++		domainid = zfs_fuid_find_by_domain(zsb, domain, NULL, addok);
++		if (domainid == -1)
++			return (ENOENT);
++	}
++	fuid = FUID_ENCODE(domainid, rid);
++	(void) sprintf(buf, "%llx", (longlong_t)fuid);
++	return (0);
++}
++
++int
++zfs_userspace_one(zfs_sb_t *zsb, zfs_userquota_prop_t type,
++    const char *domain, uint64_t rid, uint64_t *valp)
++{
++	char buf[32];
++	int err;
++	uint64_t obj;
++
++	*valp = 0;
++
++	if (!dmu_objset_userspace_present(zsb->z_os))
++		return (ENOTSUP);
++
++	obj = zfs_userquota_prop_to_obj(zsb, type);
++	if (obj == 0)
++		return (0);
++
++	err = id_to_fuidstr(zsb, domain, rid, buf, B_FALSE);
++	if (err)
++		return (err);
++
++	err = zap_lookup(zsb->z_os, obj, buf, 8, 1, valp);
++	if (err == ENOENT)
++		err = 0;
++	return (err);
++}
++EXPORT_SYMBOL(zfs_userspace_one);
++
++int
++zfs_set_userquota(zfs_sb_t *zsb, zfs_userquota_prop_t type,
++    const char *domain, uint64_t rid, uint64_t quota)
++{
++	char buf[32];
++	int err;
++	dmu_tx_t *tx;
++	uint64_t *objp;
++	boolean_t fuid_dirtied;
++
++	if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
++		return (EINVAL);
++
++	if (zsb->z_version < ZPL_VERSION_USERSPACE)
++		return (ENOTSUP);
++
++	objp = (type == ZFS_PROP_USERQUOTA) ? &zsb->z_userquota_obj :
++	    &zsb->z_groupquota_obj;
++
++	err = id_to_fuidstr(zsb, domain, rid, buf, B_TRUE);
++	if (err)
++		return (err);
++	fuid_dirtied = zsb->z_fuid_dirty;
++
++	tx = dmu_tx_create(zsb->z_os);
++	dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
++	if (*objp == 0) {
++		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
++		    zfs_userquota_prop_prefixes[type]);
++	}
++	if (fuid_dirtied)
++		zfs_fuid_txhold(zsb, tx);
++	err = dmu_tx_assign(tx, TXG_WAIT);
++	if (err) {
++		dmu_tx_abort(tx);
++		return (err);
++	}
++
++	mutex_enter(&zsb->z_lock);
++	if (*objp == 0) {
++		*objp = zap_create(zsb->z_os, DMU_OT_USERGROUP_QUOTA,
++		    DMU_OT_NONE, 0, tx);
++		VERIFY(0 == zap_add(zsb->z_os, MASTER_NODE_OBJ,
++		    zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
++	}
++	mutex_exit(&zsb->z_lock);
++
++	if (quota == 0) {
++		err = zap_remove(zsb->z_os, *objp, buf, tx);
++		if (err == ENOENT)
++			err = 0;
++	} else {
++		err = zap_update(zsb->z_os, *objp, buf, 8, 1, &quota, tx);
++	}
++	ASSERT(err == 0);
++	if (fuid_dirtied)
++		zfs_fuid_sync(zsb, tx);
++	dmu_tx_commit(tx);
++	return (err);
++}
++EXPORT_SYMBOL(zfs_set_userquota);
++
++boolean_t
++zfs_fuid_overquota(zfs_sb_t *zsb, boolean_t isgroup, uint64_t fuid)
++{
++	char buf[32];
++	uint64_t used, quota, usedobj, quotaobj;
++	int err;
++
++	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
++	quotaobj = isgroup ? zsb->z_groupquota_obj : zsb->z_userquota_obj;
++
++	if (quotaobj == 0 || zsb->z_replay)
++		return (B_FALSE);
++
++	(void) sprintf(buf, "%llx", (longlong_t)fuid);
++	err = zap_lookup(zsb->z_os, quotaobj, buf, 8, 1, &quota);
++	if (err != 0)
++		return (B_FALSE);
++
++	err = zap_lookup(zsb->z_os, usedobj, buf, 8, 1, &used);
++	if (err != 0)
++		return (B_FALSE);
++	return (used >= quota);
++}
++EXPORT_SYMBOL(zfs_fuid_overquota);
++
++boolean_t
++zfs_owner_overquota(zfs_sb_t *zsb, znode_t *zp, boolean_t isgroup)
++{
++	uint64_t fuid;
++	uint64_t quotaobj;
++
++	quotaobj = isgroup ? zsb->z_groupquota_obj : zsb->z_userquota_obj;
++
++	fuid = isgroup ? zp->z_gid : zp->z_uid;
++
++	if (quotaobj == 0 || zsb->z_replay)
++		return (B_FALSE);
++
++	return (zfs_fuid_overquota(zsb, isgroup, fuid));
++}
++EXPORT_SYMBOL(zfs_owner_overquota);
++
++int
++zfs_sb_create(const char *osname, zfs_sb_t **zsbp)
++{
++	objset_t *os;
++	zfs_sb_t *zsb;
++	uint64_t zval;
++	int i, error;
++	uint64_t sa_obj;
++
++	zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP | KM_NODEBUG);
++
++	/*
++	 * We claim to always be readonly so we can open snapshots;
++	 * other ZPL code will prevent us from writing to snapshots.
++	 */
++	error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zsb, &os);
++	if (error) {
++		kmem_free(zsb, sizeof (zfs_sb_t));
++		return (error);
++	}
++
++	/*
++	 * Initialize the zfs-specific filesystem structure.
++	 * Should probably make this a kmem cache, shuffle fields,
++	 * and just bzero up to z_hold_mtx[].
++	 */
++	zsb->z_sb = NULL;
++	zsb->z_parent = zsb;
++	zsb->z_max_blksz = SPA_MAXBLOCKSIZE;
++	zsb->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
++	zsb->z_os = os;
++
++	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zsb->z_version);
++	if (error) {
++		goto out;
++	} else if (zsb->z_version >
++	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
++		(void) printk("Can't mount a version %lld file system "
++		    "on a version %lld pool\n. Pool must be upgraded to mount "
++		    "this file system.", (u_longlong_t)zsb->z_version,
++		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
++		error = ENOTSUP;
++		goto out;
++	}
++	if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
++		goto out;
++	zsb->z_norm = (int)zval;
++
++	if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
++		goto out;
++	zsb->z_utf8 = (zval != 0);
++
++	if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
++		goto out;
++	zsb->z_case = (uint_t)zval;
++
++	/*
++	 * Fold case on file systems that are always or sometimes case
++	 * insensitive.
++	 */
++	if (zsb->z_case == ZFS_CASE_INSENSITIVE ||
++	    zsb->z_case == ZFS_CASE_MIXED)
++		zsb->z_norm |= U8_TEXTPREP_TOUPPER;
++
++	zsb->z_use_fuids = USE_FUIDS(zsb->z_version, zsb->z_os);
++	zsb->z_use_sa = USE_SA(zsb->z_version, zsb->z_os);
++
++	if (zsb->z_use_sa) {
++		/* should either have both of these objects or none */
++		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
++		    &sa_obj);
++		if (error)
++			goto out;
++
++		error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &zval);
++		if ((error == 0) && (zval == ZFS_XATTR_SA))
++			zsb->z_xattr_sa = B_TRUE;
++	} else {
++		/*
++		 * Pre SA versions file systems should never touch
++		 * either the attribute registration or layout objects.
++		 */
++		sa_obj = 0;
++	}
++
++	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
++	    &zsb->z_attr_table);
++	if (error)
++		goto out;
++
++	if (zsb->z_version >= ZPL_VERSION_SA)
++		sa_register_update_callback(os, zfs_sa_upgrade);
++
++	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
++	    &zsb->z_root);
++	if (error)
++		goto out;
++	ASSERT(zsb->z_root != 0);
++
++	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
++	    &zsb->z_unlinkedobj);
++	if (error)
++		goto out;
++
++	error = zap_lookup(os, MASTER_NODE_OBJ,
++	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
++	    8, 1, &zsb->z_userquota_obj);
++	if (error && error != ENOENT)
++		goto out;
++
++	error = zap_lookup(os, MASTER_NODE_OBJ,
++	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
++	    8, 1, &zsb->z_groupquota_obj);
++	if (error && error != ENOENT)
++		goto out;
++
++	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
++	    &zsb->z_fuid_obj);
++	if (error && error != ENOENT)
++		goto out;
++
++	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
++	    &zsb->z_shares_dir);
++	if (error && error != ENOENT)
++		goto out;
++
++	mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
++	mutex_init(&zsb->z_lock, NULL, MUTEX_DEFAULT, NULL);
++	list_create(&zsb->z_all_znodes, sizeof (znode_t),
++	    offsetof(znode_t, z_link_node));
++	rrw_init(&zsb->z_teardown_lock);
++	rw_init(&zsb->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
++	rw_init(&zsb->z_fuid_lock, NULL, RW_DEFAULT, NULL);
++	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
++		mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
++
++	avl_create(&zsb->z_ctldir_snaps, snapentry_compare,
++	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
++	mutex_init(&zsb->z_ctldir_lock, NULL, MUTEX_DEFAULT, NULL);
++
++	*zsbp = zsb;
++	return (0);
++
++out:
++	dmu_objset_disown(os, zsb);
++	*zsbp = NULL;
++	kmem_free(zsb, sizeof (zfs_sb_t));
++	return (error);
++}
++EXPORT_SYMBOL(zfs_sb_create);
++
++int
++zfs_sb_setup(zfs_sb_t *zsb, boolean_t mounting)
++{
++	int error;
++
++	error = zfs_register_callbacks(zsb);
++	if (error)
++		return (error);
++
++	/*
++	 * Set the objset user_ptr to track its zsb.
++	 */
++	mutex_enter(&zsb->z_os->os_user_ptr_lock);
++	dmu_objset_set_user(zsb->z_os, zsb);
++	mutex_exit(&zsb->z_os->os_user_ptr_lock);
++
++	zsb->z_log = zil_open(zsb->z_os, zfs_get_data);
++
++	/*
++	 * If we are not mounting (ie: online recv), then we don't
++	 * have to worry about replaying the log as we blocked all
++	 * operations out since we closed the ZIL.
++	 */
++	if (mounting) {
++		boolean_t readonly;
++
++		/*
++		 * During replay we remove the read only flag to
++		 * allow replays to succeed.
++		 */
++		readonly = zfs_is_readonly(zsb);
++		if (readonly != 0)
++			readonly_changed_cb(zsb, B_FALSE);
++		else
++			zfs_unlinked_drain(zsb);
++
++		/*
++		 * Parse and replay the intent log.
++		 *
++		 * Because of ziltest, this must be done after
++		 * zfs_unlinked_drain().  (Further note: ziltest
++		 * doesn't use readonly mounts, where
++		 * zfs_unlinked_drain() isn't called.)  This is because
++		 * ziltest causes spa_sync() to think it's committed,
++		 * but actually it is not, so the intent log contains
++		 * many txg's worth of changes.
++		 *
++		 * In particular, if object N is in the unlinked set in
++		 * the last txg to actually sync, then it could be
++		 * actually freed in a later txg and then reallocated
++		 * in a yet later txg.  This would write a "create
++		 * object N" record to the intent log.  Normally, this
++		 * would be fine because the spa_sync() would have
++		 * written out the fact that object N is free, before
++		 * we could write the "create object N" intent log
++		 * record.
++		 *
++		 * But when we are in ziltest mode, we advance the "open
++		 * txg" without actually spa_sync()-ing the changes to
++		 * disk.  So we would see that object N is still
++		 * allocated and in the unlinked set, and there is an
++		 * intent log record saying to allocate it.
++		 */
++		if (spa_writeable(dmu_objset_spa(zsb->z_os))) {
++			if (zil_replay_disable) {
++				zil_destroy(zsb->z_log, B_FALSE);
++			} else {
++				zsb->z_replay = B_TRUE;
++				zil_replay(zsb->z_os, zsb,
++				    zfs_replay_vector);
++				zsb->z_replay = B_FALSE;
++			}
++		}
++
++		/* restore readonly bit */
++		if (readonly != 0)
++			readonly_changed_cb(zsb, B_TRUE);
++	}
++
++	return (0);
++}
++EXPORT_SYMBOL(zfs_sb_setup);
++
++void
++zfs_sb_free(zfs_sb_t *zsb)
++{
++	int i;
++
++	zfs_fuid_destroy(zsb);
++
++	mutex_destroy(&zsb->z_znodes_lock);
++	mutex_destroy(&zsb->z_lock);
++	list_destroy(&zsb->z_all_znodes);
++	rrw_destroy(&zsb->z_teardown_lock);
++	rw_destroy(&zsb->z_teardown_inactive_lock);
++	rw_destroy(&zsb->z_fuid_lock);
++	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
++		mutex_destroy(&zsb->z_hold_mtx[i]);
++	mutex_destroy(&zsb->z_ctldir_lock);
++	avl_destroy(&zsb->z_ctldir_snaps);
++	kmem_free(zsb, sizeof (zfs_sb_t));
++}
++EXPORT_SYMBOL(zfs_sb_free);
++
++static void
++zfs_set_fuid_feature(zfs_sb_t *zsb)
++{
++	zsb->z_use_fuids = USE_FUIDS(zsb->z_version, zsb->z_os);
++	zsb->z_use_sa = USE_SA(zsb->z_version, zsb->z_os);
++}
++
++void
++zfs_unregister_callbacks(zfs_sb_t *zsb)
++{
++	objset_t *os = zsb->z_os;
++	struct dsl_dataset *ds;
++
++	/*
++	 * Unregister properties.
++	 */
++	if (!dmu_objset_is_snapshot(os)) {
++		ds = dmu_objset_ds(os);
++		VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
++		    zsb) == 0);
++
++		VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
++		    zsb) == 0);
++
++		VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
++		    zsb) == 0);
++
++		VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
++		    zsb) == 0);
++
++		VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb,
++		    zsb) == 0);
++
++		VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
++		    zsb) == 0);
++
++		VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
++		    zsb) == 0);
++
++		VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
++		    zsb) == 0);
++
++		VERIFY(dsl_prop_unregister(ds, "aclinherit",
++		    acl_inherit_changed_cb, zsb) == 0);
++
++		VERIFY(dsl_prop_unregister(ds, "vscan",
++		    vscan_changed_cb, zsb) == 0);
++
++		VERIFY(dsl_prop_unregister(ds, "nbmand",
++		    nbmand_changed_cb, zsb) == 0);
++	}
++}
++EXPORT_SYMBOL(zfs_unregister_callbacks);
++
++#ifdef HAVE_MLSLABEL
++/*
++ * zfs_check_global_label:
++ *	Check that the hex label string is appropriate for the dataset
++ *	being mounted into the global_zone proper.
++ *
++ *	Return an error if the hex label string is not default or
++ *	admin_low/admin_high.  For admin_low labels, the corresponding
++ *	dataset must be readonly.
++ */
++int
++zfs_check_global_label(const char *dsname, const char *hexsl)
++{
++	if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
++		return (0);
++	if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
++		return (0);
++	if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
++		/* must be readonly */
++		uint64_t rdonly;
++
++		if (dsl_prop_get_integer(dsname,
++		    zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
++			return (EACCES);
++		return (rdonly ? 0 : EACCES);
++	}
++	return (EACCES);
++}
++EXPORT_SYMBOL(zfs_check_global_label);
++#endif /* HAVE_MLSLABEL */
++
++int
++zfs_statvfs(struct dentry *dentry, struct kstatfs *statp)
++{
++	zfs_sb_t *zsb = dentry->d_sb->s_fs_info;
++	uint64_t refdbytes, availbytes, usedobjs, availobjs;
++	uint64_t fsid;
++	uint32_t bshift;
++
++	ZFS_ENTER(zsb);
++
++	dmu_objset_space(zsb->z_os,
++	    &refdbytes, &availbytes, &usedobjs, &availobjs);
++
++	fsid = dmu_objset_fsid_guid(zsb->z_os);
++	/*
++	 * The underlying storage pool actually uses multiple block
++	 * size.  Under Solaris frsize (fragment size) is reported as
++	 * the smallest block size we support, and bsize (block size)
++	 * as the filesystem's maximum block size.  Unfortunately,
++	 * under Linux the fragment size and block size are often used
++	 * interchangeably.  Thus we are forced to report both of them
++	 * as the filesystem's maximum block size.
++	 */
++	statp->f_frsize = zsb->z_max_blksz;
++	statp->f_bsize = zsb->z_max_blksz;
++	bshift = fls(statp->f_bsize) - 1;
++
++	/*
++	 * The following report "total" blocks of various kinds in
++	 * the file system, but reported in terms of f_bsize - the
++	 * "preferred" size.
++	 */
++
++	statp->f_blocks = (refdbytes + availbytes) >> bshift;
++	statp->f_bfree = availbytes >> bshift;
++	statp->f_bavail = statp->f_bfree; /* no root reservation */
++
++	/*
++	 * statvfs() should really be called statufs(), because it assumes
++	 * static metadata.  ZFS doesn't preallocate files, so the best
++	 * we can do is report the max that could possibly fit in f_files,
++	 * and that minus the number actually used in f_ffree.
++	 * For f_ffree, report the smaller of the number of object available
++	 * and the number of blocks (each object will take at least a block).
++	 */
++	statp->f_ffree = MIN(availobjs, availbytes >> DNODE_SHIFT);
++	statp->f_files = statp->f_ffree + usedobjs;
++	statp->f_fsid.val[0] = (uint32_t)fsid;
++	statp->f_fsid.val[1] = (uint32_t)(fsid >> 32);
++	statp->f_type = ZFS_SUPER_MAGIC;
++	statp->f_namelen = ZFS_MAXNAMELEN;
++
++	/*
++	 * We have all of 40 characters to stuff a string here.
++	 * Is there anything useful we could/should provide?
++	 */
++	bzero(statp->f_spare, sizeof (statp->f_spare));
++
++	ZFS_EXIT(zsb);
++	return (0);
++}
++EXPORT_SYMBOL(zfs_statvfs);
++
++int
++zfs_root(zfs_sb_t *zsb, struct inode **ipp)
++{
++	znode_t *rootzp;
++	int error;
++
++	ZFS_ENTER(zsb);
++
++	error = zfs_zget(zsb, zsb->z_root, &rootzp);
++	if (error == 0)
++		*ipp = ZTOI(rootzp);
++
++	ZFS_EXIT(zsb);
++	return (error);
++}
++EXPORT_SYMBOL(zfs_root);
++
++#ifdef HAVE_SHRINK
++int
++zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
++{
++	zfs_sb_t *zsb = sb->s_fs_info;
++	struct shrinker *shrinker = &sb->s_shrink;
++	struct shrink_control sc = {
++		.nr_to_scan = nr_to_scan,
++		.gfp_mask = GFP_KERNEL,
++	};
++
++	ZFS_ENTER(zsb);
++	*objects = (*shrinker->shrink)(shrinker, &sc);
++	ZFS_EXIT(zsb);
++
++	return (0);
++}
++EXPORT_SYMBOL(zfs_sb_prune);
++#endif /* HAVE_SHRINK */
++
++/*
++ * Teardown the zfs_sb_t::z_os.
++ *
++ * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
++ * and 'z_teardown_inactive_lock' held.
++ */
++int
++zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
++{
++	znode_t	*zp;
++
++	rrw_enter(&zsb->z_teardown_lock, RW_WRITER, FTAG);
++
++	if (!unmounting) {
++		/*
++		 * We purge the parent filesystem's super block as the
++		 * parent filesystem and all of its snapshots have their
++		 * inode's super block set to the parent's filesystem's
++		 * super block.  Note,  'z_parent' is self referential
++		 * for non-snapshots.
++		 */
++		shrink_dcache_sb(zsb->z_parent->z_sb);
++		(void) spl_invalidate_inodes(zsb->z_parent->z_sb, 0);
++	}
++
++	/*
++	 * Drain the iput_taskq to ensure all active references to the
++	 * zfs_sb_t have been handled only then can it be safely destroyed.
++	 */
++	taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os)));
++
++	/*
++	 * Close the zil. NB: Can't close the zil while zfs_inactive
++	 * threads are blocked as zil_close can call zfs_inactive.
++	 */
++	if (zsb->z_log) {
++		zil_close(zsb->z_log);
++		zsb->z_log = NULL;
++	}
++
++	rw_enter(&zsb->z_teardown_inactive_lock, RW_WRITER);
++
++	/*
++	 * If we are not unmounting (ie: online recv) and someone already
++	 * unmounted this file system while we were doing the switcheroo,
++	 * or a reopen of z_os failed then just bail out now.
++	 */
++	if (!unmounting && (zsb->z_unmounted || zsb->z_os == NULL)) {
++		rw_exit(&zsb->z_teardown_inactive_lock);
++		rrw_exit(&zsb->z_teardown_lock, FTAG);
++		return (EIO);
++	}
++
++	/*
++	 * At this point there are no vops active, and any new vops will
++	 * fail with EIO since we have z_teardown_lock for writer (only
++	 * relavent for forced unmount).
++	 *
++	 * Release all holds on dbufs.
++	 */
++	mutex_enter(&zsb->z_znodes_lock);
++	for (zp = list_head(&zsb->z_all_znodes); zp != NULL;
++	    zp = list_next(&zsb->z_all_znodes, zp))
++		if (zp->z_sa_hdl) {
++			ASSERT(atomic_read(&ZTOI(zp)->i_count) > 0);
++			zfs_znode_dmu_fini(zp);
++		}
++	mutex_exit(&zsb->z_znodes_lock);
++
++	/*
++	 * If we are unmounting, set the unmounted flag and let new vops
++	 * unblock.  zfs_inactive will have the unmounted behavior, and all
++	 * other vops will fail with EIO.
++	 */
++	if (unmounting) {
++		zsb->z_unmounted = B_TRUE;
++		rrw_exit(&zsb->z_teardown_lock, FTAG);
++		rw_exit(&zsb->z_teardown_inactive_lock);
++	}
++
++	/*
++	 * z_os will be NULL if there was an error in attempting to reopen
++	 * zsb, so just return as the properties had already been
++	 *
++	 * unregistered and cached data had been evicted before.
++	 */
++	if (zsb->z_os == NULL)
++		return (0);
++
++	/*
++	 * Unregister properties.
++	 */
++	zfs_unregister_callbacks(zsb);
++
++	/*
++	 * Evict cached data
++	 */
++	if (dsl_dataset_is_dirty(dmu_objset_ds(zsb->z_os)) &&
++	    !zfs_is_readonly(zsb))
++		txg_wait_synced(dmu_objset_pool(zsb->z_os), 0);
++	(void) dmu_objset_evict_dbufs(zsb->z_os);
++
++	return (0);
++}
++EXPORT_SYMBOL(zfs_sb_teardown);
++
++#if defined(HAVE_BDI) && !defined(HAVE_BDI_SETUP_AND_REGISTER)
++atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0);
++#endif /* HAVE_BDI && !HAVE_BDI_SETUP_AND_REGISTER */
++
++int
++zfs_domount(struct super_block *sb, void *data, int silent)
++{
++	zpl_mount_data_t *zmd = data;
++	const char *osname = zmd->z_osname;
++	zfs_sb_t *zsb;
++	struct inode *root_inode;
++	uint64_t recordsize;
++	int error;
++
++	error = zfs_sb_create(osname, &zsb);
++	if (error)
++		return (error);
++
++	if ((error = dsl_prop_get_integer(osname, "recordsize",
++	    &recordsize, NULL)))
++		goto out;
++
++	zsb->z_sb = sb;
++	sb->s_fs_info = zsb;
++	sb->s_magic = ZFS_SUPER_MAGIC;
++	sb->s_maxbytes = MAX_LFS_FILESIZE;
++	sb->s_time_gran = 1;
++	sb->s_blocksize = recordsize;
++	sb->s_blocksize_bits = ilog2(recordsize);
++
++#ifdef HAVE_BDI
++	/*
++	 * 2.6.32 API change,
++	 * Added backing_device_info (BDI) per super block interfaces.  A BDI
++	 * must be configured when using a non-device backed filesystem for
++	 * proper writeback.  This is not required for older pdflush kernels.
++	 *
++	 * NOTE: Linux read-ahead is disabled in favor of zfs read-ahead.
++	 */
++	zsb->z_bdi.ra_pages = 0;
++	sb->s_bdi = &zsb->z_bdi;
++
++	error = -bdi_setup_and_register(&zsb->z_bdi, "zfs", BDI_CAP_MAP_COPY);
++	if (error)
++		goto out;
++#endif /* HAVE_BDI */
++
++	/* Set callback operations for the file system. */
++	sb->s_op = &zpl_super_operations;
++	sb->s_xattr = zpl_xattr_handlers;
++	sb->s_export_op = &zpl_export_operations;
++
++	/* Set features for file system. */
++	zfs_set_fuid_feature(zsb);
++
++	if (dmu_objset_is_snapshot(zsb->z_os)) {
++		uint64_t pval;
++
++		atime_changed_cb(zsb, B_FALSE);
++		readonly_changed_cb(zsb, B_TRUE);
++		if ((error = dsl_prop_get_integer(osname,"xattr",&pval,NULL)))
++			goto out;
++		xattr_changed_cb(zsb, pval);
++		zsb->z_issnap = B_TRUE;
++		zsb->z_os->os_sync = ZFS_SYNC_DISABLED;
++
++		mutex_enter(&zsb->z_os->os_user_ptr_lock);
++		dmu_objset_set_user(zsb->z_os, zsb);
++		mutex_exit(&zsb->z_os->os_user_ptr_lock);
++	} else {
++		error = zfs_sb_setup(zsb, B_TRUE);
++	}
++
++	/* Allocate a root inode for the filesystem. */
++	error = zfs_root(zsb, &root_inode);
++	if (error) {
++		(void) zfs_umount(sb);
++		goto out;
++	}
++
++	/* Allocate a root dentry for the filesystem */
++	sb->s_root = d_make_root(root_inode);
++	if (sb->s_root == NULL) {
++		(void) zfs_umount(sb);
++		error = ENOMEM;
++		goto out;
++	}
++
++	if (!zsb->z_issnap)
++		zfsctl_create(zsb);
++out:
++	if (error) {
++		dmu_objset_disown(zsb->z_os, zsb);
++		zfs_sb_free(zsb);
++	}
++
++	return (error);
++}
++EXPORT_SYMBOL(zfs_domount);
++
++/*
++ * Called when an unmount is requested and certain sanity checks have
++ * already passed.  At this point no dentries or inodes have been reclaimed
++ * from their respective caches.  We drop the extra reference on the .zfs
++ * control directory to allow everything to be reclaimed.  All snapshots
++ * must already have been unmounted to reach this point.
++ */
++void
++zfs_preumount(struct super_block *sb)
++{
++	zfs_sb_t *zsb = sb->s_fs_info;
++
++	if (zsb != NULL && zsb->z_ctldir != NULL)
++		zfsctl_destroy(zsb);
++}
++EXPORT_SYMBOL(zfs_preumount);
++
++/*
++ * Called once all other unmount released tear down has occurred.
++ * It is our responsibility to release any remaining infrastructure.
++ */
++/*ARGSUSED*/
++int
++zfs_umount(struct super_block *sb)
++{
++	zfs_sb_t *zsb = sb->s_fs_info;
++	objset_t *os;
++
++	VERIFY(zfs_sb_teardown(zsb, B_TRUE) == 0);
++	os = zsb->z_os;
++
++#ifdef HAVE_BDI
++	bdi_destroy(sb->s_bdi);
++#endif /* HAVE_BDI */
++
++	/*
++	 * z_os will be NULL if there was an error in
++	 * attempting to reopen zsb.
++	 */
++	if (os != NULL) {
++		/*
++		 * Unset the objset user_ptr.
++		 */
++		mutex_enter(&os->os_user_ptr_lock);
++		dmu_objset_set_user(os, NULL);
++		mutex_exit(&os->os_user_ptr_lock);
++
++		/*
++		 * Finally release the objset
++		 */
++		dmu_objset_disown(os, zsb);
++	}
++
++	zfs_sb_free(zsb);
++	return (0);
++}
++EXPORT_SYMBOL(zfs_umount);
++
++int
++zfs_remount(struct super_block *sb, int *flags, char *data)
++{
++	/*
++	 * All namespace flags (MNT_*) and super block flags (MS_*) will
++	 * be handled by the Linux VFS.  Only handle custom options here.
++	 */
++	return (0);
++}
++EXPORT_SYMBOL(zfs_remount);
++
++int
++zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
++{
++	zfs_sb_t	*zsb = sb->s_fs_info;
++	znode_t		*zp;
++	uint64_t	object = 0;
++	uint64_t	fid_gen = 0;
++	uint64_t	gen_mask;
++	uint64_t	zp_gen;
++	int		i, err;
++
++	*ipp = NULL;
++
++	ZFS_ENTER(zsb);
++
++	if (fidp->fid_len == LONG_FID_LEN) {
++		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
++		uint64_t	objsetid = 0;
++		uint64_t	setgen = 0;
++
++		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
++			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
++
++		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
++			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
++
++		ZFS_EXIT(zsb);
++
++		err = zfsctl_lookup_objset(sb, objsetid, &zsb);
++		if (err)
++			return (EINVAL);
++
++		ZFS_ENTER(zsb);
++	}
++
++	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
++		zfid_short_t	*zfid = (zfid_short_t *)fidp;
++
++		for (i = 0; i < sizeof (zfid->zf_object); i++)
++			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
++
++		for (i = 0; i < sizeof (zfid->zf_gen); i++)
++			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
++	} else {
++		ZFS_EXIT(zsb);
++		return (EINVAL);
++	}
++
++	/* A zero fid_gen means we are in the .zfs control directories */
++	if (fid_gen == 0 &&
++	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
++		*ipp = zsb->z_ctldir;
++		ASSERT(*ipp != NULL);
++		if (object == ZFSCTL_INO_SNAPDIR) {
++			VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp,
++			    0, kcred, NULL, NULL) == 0);
++		} else {
++			igrab(*ipp);
++		}
++		ZFS_EXIT(zsb);
++		return (0);
++	}
++
++	gen_mask = -1ULL >> (64 - 8 * i);
++
++	dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
++	if ((err = zfs_zget(zsb, object, &zp))) {
++		ZFS_EXIT(zsb);
++		return (err);
++	}
++	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zsb), &zp_gen,
++	    sizeof (uint64_t));
++	zp_gen = zp_gen & gen_mask;
++	if (zp_gen == 0)
++		zp_gen = 1;
++	if (zp->z_unlinked || zp_gen != fid_gen) {
++		dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
++		iput(ZTOI(zp));
++		ZFS_EXIT(zsb);
++		return (EINVAL);
++	}
++
++	*ipp = ZTOI(zp);
++	if (*ipp)
++		zfs_inode_update(ITOZ(*ipp));
++
++	ZFS_EXIT(zsb);
++	return (0);
++}
++EXPORT_SYMBOL(zfs_vget);
++
++/*
++ * Block out VOPs and close zfs_sb_t::z_os
++ *
++ * Note, if successful, then we return with the 'z_teardown_lock' and
++ * 'z_teardown_inactive_lock' write held.
++ */
++int
++zfs_suspend_fs(zfs_sb_t *zsb)
++{
++	int error;
++
++	if ((error = zfs_sb_teardown(zsb, B_FALSE)) != 0)
++		return (error);
++	dmu_objset_disown(zsb->z_os, zsb);
++
++	return (0);
++}
++EXPORT_SYMBOL(zfs_suspend_fs);
++
++/*
++ * Reopen zfs_sb_t::z_os and release VOPs.
++ */
++int
++zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
++{
++	int err, err2;
++
++	ASSERT(RRW_WRITE_HELD(&zsb->z_teardown_lock));
++	ASSERT(RW_WRITE_HELD(&zsb->z_teardown_inactive_lock));
++
++	err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zsb, &zsb->z_os);
++	if (err) {
++		zsb->z_os = NULL;
++	} else {
++		znode_t *zp;
++		uint64_t sa_obj = 0;
++
++		err2 = zap_lookup(zsb->z_os, MASTER_NODE_OBJ,
++		    ZFS_SA_ATTRS, 8, 1, &sa_obj);
++
++		if ((err || err2) && zsb->z_version >= ZPL_VERSION_SA)
++			goto bail;
++
++
++		if ((err = sa_setup(zsb->z_os, sa_obj,
++		    zfs_attr_table,  ZPL_END, &zsb->z_attr_table)) != 0)
++			goto bail;
++
++		VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0);
++
++		/*
++		 * Attempt to re-establish all the active znodes with
++		 * their dbufs.  If a zfs_rezget() fails, then we'll let
++		 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
++		 * when they try to use their znode.
++		 */
++		mutex_enter(&zsb->z_znodes_lock);
++		for (zp = list_head(&zsb->z_all_znodes); zp;
++		    zp = list_next(&zsb->z_all_znodes, zp)) {
++			(void) zfs_rezget(zp);
++		}
++		mutex_exit(&zsb->z_znodes_lock);
++
++	}
++
++bail:
++	/* release the VOPs */
++	rw_exit(&zsb->z_teardown_inactive_lock);
++	rrw_exit(&zsb->z_teardown_lock, FTAG);
++
++	if (err) {
++		/*
++		 * Since we couldn't reopen zfs_sb_t::z_os, force
++		 * unmount this file system.
++		 */
++		(void) zfs_umount(zsb->z_sb);
++	}
++	return (err);
++}
++EXPORT_SYMBOL(zfs_resume_fs);
++
++int
++zfs_set_version(zfs_sb_t *zsb, uint64_t newvers)
++{
++	int error;
++	objset_t *os = zsb->z_os;
++	dmu_tx_t *tx;
++
++	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
++		return (EINVAL);
++
++	if (newvers < zsb->z_version)
++		return (EINVAL);
++
++	if (zfs_spa_version_map(newvers) >
++	    spa_version(dmu_objset_spa(zsb->z_os)))
++		return (ENOTSUP);
++
++	tx = dmu_tx_create(os);
++	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
++	if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) {
++		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
++		    ZFS_SA_ATTRS);
++		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
++	}
++	error = dmu_tx_assign(tx, TXG_WAIT);
++	if (error) {
++		dmu_tx_abort(tx);
++		return (error);
++	}
++
++	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
++	    8, 1, &newvers, tx);
++
++	if (error) {
++		dmu_tx_commit(tx);
++		return (error);
++	}
++
++	if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) {
++		uint64_t sa_obj;
++
++		ASSERT3U(spa_version(dmu_objset_spa(zsb->z_os)), >=,
++		    SPA_VERSION_SA);
++		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
++		    DMU_OT_NONE, 0, tx);
++
++		error = zap_add(os, MASTER_NODE_OBJ,
++		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
++		ASSERT3U(error, ==, 0);
++
++		VERIFY(0 == sa_set_sa_object(os, sa_obj));
++		sa_register_update_callback(os, zfs_sa_upgrade);
++	}
++
++	spa_history_log_internal(LOG_DS_UPGRADE,
++	    dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
++	    zsb->z_version, newvers, dmu_objset_id(os));
++
++	dmu_tx_commit(tx);
++
++	zsb->z_version = newvers;
++
++	if (zsb->z_version >= ZPL_VERSION_FUID)
++		zfs_set_fuid_feature(zsb);
++
++	return (0);
++}
++EXPORT_SYMBOL(zfs_set_version);
++
++/*
++ * Read a property stored within the master node.
++ */
++int
++zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
++{
++	const char *pname;
++	int error = ENOENT;
++
++	/*
++	 * Look up the file system's value for the property.  For the
++	 * version property, we look up a slightly different string.
++	 */
++	if (prop == ZFS_PROP_VERSION)
++		pname = ZPL_VERSION_STR;
++	else
++		pname = zfs_prop_to_name(prop);
++
++	if (os != NULL)
++		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
++
++	if (error == ENOENT) {
++		/* No value set, use the default value */
++		switch (prop) {
++		case ZFS_PROP_VERSION:
++			*value = ZPL_VERSION;
++			break;
++		case ZFS_PROP_NORMALIZE:
++		case ZFS_PROP_UTF8ONLY:
++			*value = 0;
++			break;
++		case ZFS_PROP_CASE:
++			*value = ZFS_CASE_SENSITIVE;
++			break;
++		default:
++			return (error);
++		}
++		error = 0;
++	}
++	return (error);
++}
++EXPORT_SYMBOL(zfs_get_zplprop);
++
++void
++zfs_init(void)
++{
++	zfsctl_init();
++	zfs_znode_init();
++	dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
++	register_filesystem(&zpl_fs_type);
++	(void) arc_add_prune_callback(zpl_prune_sbs, NULL);
++}
++
++void
++zfs_fini(void)
++{
++	unregister_filesystem(&zpl_fs_type);
++	zfs_znode_fini();
++	zfsctl_fini();
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_vnops.c linux-3.2.33-go/fs/zfs/zfs/zfs_vnops.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_vnops.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_vnops.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,4466 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/* Portions Copyright 2007 Jeremy Teo */
++/* Portions Copyright 2010 Robert Milkowski */
++
++
++#include <sys/types.h>
++#include <sys/param.h>
++#include <sys/time.h>
++#include <sys/systm.h>
++#include <sys/sysmacros.h>
++#include <sys/resource.h>
++#include <sys/vfs.h>
++#include <sys/vfs_opreg.h>
++#include <sys/file.h>
++#include <sys/stat.h>
++#include <sys/kmem.h>
++#include <sys/taskq.h>
++#include <sys/uio.h>
++#include <sys/vmsystm.h>
++#include <sys/atomic.h>
++#include <vm/pvn.h>
++#include <sys/pathname.h>
++#include <sys/cmn_err.h>
++#include <sys/errno.h>
++#include <sys/unistd.h>
++#include <sys/zfs_dir.h>
++#include <sys/zfs_acl.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/fs/zfs.h>
++#include <sys/dmu.h>
++#include <sys/dmu_objset.h>
++#include <sys/spa.h>
++#include <sys/txg.h>
++#include <sys/dbuf.h>
++#include <sys/zap.h>
++#include <sys/sa.h>
++#include <sys/dirent.h>
++#include <sys/policy.h>
++#include <sys/sunddi.h>
++#include <sys/sid.h>
++#include <sys/mode.h>
++#include "fs/fs_subr.h"
++#include <sys/zfs_ctldir.h>
++#include <sys/zfs_fuid.h>
++#include <sys/zfs_sa.h>
++#include <sys/zfs_vnops.h>
++#include <sys/dnlc.h>
++#include <sys/zfs_rlock.h>
++#include <sys/extdirent.h>
++#include <sys/kidmap.h>
++#include <sys/cred.h>
++#include <sys/attr.h>
++#include <sys/zpl.h>
++
++/*
++ * Programming rules.
++ *
++ * Each vnode op performs some logical unit of work.  To do this, the ZPL must
++ * properly lock its in-core state, create a DMU transaction, do the work,
++ * record this work in the intent log (ZIL), commit the DMU transaction,
++ * and wait for the intent log to commit if it is a synchronous operation.
++ * Moreover, the vnode ops must work in both normal and log replay context.
++ * The ordering of events is important to avoid deadlocks and references
++ * to freed memory.  The example below illustrates the following Big Rules:
++ *
++ *  (1) A check must be made in each zfs thread for a mounted file system.
++ *	This is done avoiding races using ZFS_ENTER(zsb).
++ *      A ZFS_EXIT(zsb) is needed before all returns.  Any znodes
++ *      must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
++ *      can return EIO from the calling function.
++ *
++ *  (2)	iput() should always be the last thing except for zil_commit()
++ *	(if necessary) and ZFS_EXIT(). This is for 3 reasons:
++ *	First, if it's the last reference, the vnode/znode
++ *	can be freed, so the zp may point to freed memory.  Second, the last
++ *	reference will call zfs_zinactive(), which may induce a lot of work --
++ *	pushing cached pages (which acquires range locks) and syncing out
++ *	cached atime changes.  Third, zfs_zinactive() may require a new tx,
++ *	which could deadlock the system if you were already holding one.
++ *	If you must call iput() within a tx then use iput_ASYNC().
++ *
++ *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
++ *	as they can span dmu_tx_assign() calls.
++ *
++ *  (4)	Always pass TXG_NOWAIT as the second argument to dmu_tx_assign().
++ *	This is critical because we don't want to block while holding locks.
++ *	Note, in particular, that if a lock is sometimes acquired before
++ *	the tx assigns, and sometimes after (e.g. z_lock), then failing to
++ *	use a non-blocking assign can deadlock the system.  The scenario:
++ *
++ *	Thread A has grabbed a lock before calling dmu_tx_assign().
++ *	Thread B is in an already-assigned tx, and blocks for this lock.
++ *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
++ *	forever, because the previous txg can't quiesce until B's tx commits.
++ *
++ *	If dmu_tx_assign() returns ERESTART and zsb->z_assign is TXG_NOWAIT,
++ *	then drop all locks, call dmu_tx_wait(), and try again.
++ *
++ *  (5)	If the operation succeeded, generate the intent log entry for it
++ *	before dropping locks.  This ensures that the ordering of events
++ *	in the intent log matches the order in which they actually occurred.
++ *      During ZIL replay the zfs_log_* functions will update the sequence
++ *	number to indicate the zil transaction has replayed.
++ *
++ *  (6)	At the end of each vnode op, the DMU tx must always commit,
++ *	regardless of whether there were any errors.
++ *
++ *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
++ *	to ensure that synchronous semantics are provided when necessary.
++ *
++ * In general, this is how things should be ordered in each vnode op:
++ *
++ *	ZFS_ENTER(zsb);		// exit if unmounted
++ * top:
++ *	zfs_dirent_lock(&dl, ...)	// lock directory entry (may igrab())
++ *	rw_enter(...);			// grab any other locks you need
++ *	tx = dmu_tx_create(...);	// get DMU tx
++ *	dmu_tx_hold_*();		// hold each object you might modify
++ *	error = dmu_tx_assign(tx, TXG_NOWAIT);	// try to assign
++ *	if (error) {
++ *		rw_exit(...);		// drop locks
++ *		zfs_dirent_unlock(dl);	// unlock directory entry
++ *		iput(...);		// release held vnodes
++ *		if (error == ERESTART) {
++ *			dmu_tx_wait(tx);
++ *			dmu_tx_abort(tx);
++ *			goto top;
++ *		}
++ *		dmu_tx_abort(tx);	// abort DMU tx
++ *		ZFS_EXIT(zsb);	// finished in zfs
++ *		return (error);		// really out of space
++ *	}
++ *	error = do_real_work();		// do whatever this VOP does
++ *	if (error == 0)
++ *		zfs_log_*(...);		// on success, make ZIL entry
++ *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
++ *	rw_exit(...);			// drop locks
++ *	zfs_dirent_unlock(dl);		// unlock directory entry
++ *	iput(...);			// release held vnodes
++ *	zil_commit(zilog, foid);	// synchronous when necessary
++ *	ZFS_EXIT(zsb);		// finished in zfs
++ *	return (error);			// done, report error
++ */
++
++/*
++ * Virus scanning is unsupported.  It would be possible to add a hook
++ * here to performance the required virus scan.  This could be done
++ * entirely in the kernel or potentially as an update to invoke a
++ * scanning utility.
++ */
++static int
++zfs_vscan(struct inode *ip, cred_t *cr, int async)
++{
++	return (0);
++}
++
++/* ARGSUSED */
++int
++zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
++{
++	znode_t	*zp = ITOZ(ip);
++	zfs_sb_t *zsb = ITOZSB(ip);
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	/* Honor ZFS_APPENDONLY file attribute */
++	if ((mode & FMODE_WRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
++	    ((flag & O_APPEND) == 0)) {
++		ZFS_EXIT(zsb);
++		return (EPERM);
++	}
++
++	/* Virus scan eligible files on open */
++	if (!zfs_has_ctldir(zp) && zsb->z_vscan && S_ISREG(ip->i_mode) &&
++	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
++		if (zfs_vscan(ip, cr, 0) != 0) {
++			ZFS_EXIT(zsb);
++			return (EACCES);
++		}
++	}
++
++	/* Keep a count of the synchronous opens in the znode */
++	if (flag & O_SYNC)
++		atomic_inc_32(&zp->z_sync_cnt);
++
++	ZFS_EXIT(zsb);
++	return (0);
++}
++EXPORT_SYMBOL(zfs_open);
++
++/* ARGSUSED */
++int
++zfs_close(struct inode *ip, int flag, cred_t *cr)
++{
++	znode_t	*zp = ITOZ(ip);
++	zfs_sb_t *zsb = ITOZSB(ip);
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	/*
++	 * Zero the synchronous opens in the znode.  Under Linux the
++	 * zfs_close() hook is not symmetric with zfs_open(), it is
++	 * only called once when the last reference is dropped.
++	 */
++	if (flag & O_SYNC)
++		zp->z_sync_cnt = 0;
++
++	if (!zfs_has_ctldir(zp) && zsb->z_vscan && S_ISREG(ip->i_mode) &&
++	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
++		VERIFY(zfs_vscan(ip, cr, 1) == 0);
++
++	ZFS_EXIT(zsb);
++	return (0);
++}
++EXPORT_SYMBOL(zfs_close);
++
++#if defined(_KERNEL)
++/*
++ * When a file is memory mapped, we must keep the IO data synchronized
++ * between the DMU cache and the memory mapped pages.  What this means:
++ *
++ * On Write:	If we find a memory mapped page, we write to *both*
++ *		the page and the dmu buffer.
++ */
++static void
++update_pages(struct inode *ip, int64_t start, int len,
++    objset_t *os, uint64_t oid)
++{
++	struct address_space *mp = ip->i_mapping;
++	struct page *pp;
++	uint64_t nbytes;
++	int64_t	off;
++	void *pb;
++
++	off = start & (PAGE_CACHE_SIZE-1);
++	for (start &= PAGE_CACHE_MASK; len > 0; start += PAGE_CACHE_SIZE) {
++		nbytes = MIN(PAGE_CACHE_SIZE - off, len);
++
++		pp = find_lock_page(mp, start >> PAGE_CACHE_SHIFT);
++		if (pp) {
++			if (mapping_writably_mapped(mp))
++				flush_dcache_page(pp);
++
++			pb = kmap(pp);
++			(void) dmu_read(os, oid, start+off, nbytes, pb+off,
++			    DMU_READ_PREFETCH);
++			kunmap(pp);
++
++			if (mapping_writably_mapped(mp))
++				flush_dcache_page(pp);
++
++			mark_page_accessed(pp);
++			SetPageUptodate(pp);
++			ClearPageError(pp);
++			unlock_page(pp);
++			page_cache_release(pp);
++		}
++
++		len -= nbytes;
++		off = 0;
++	}
++}
++
++/*
++ * When a file is memory mapped, we must keep the IO data synchronized
++ * between the DMU cache and the memory mapped pages.  What this means:
++ *
++ * On Read:	We "read" preferentially from memory mapped pages,
++ *		else we default from the dmu buffer.
++ *
++ * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
++ *	the file is memory mapped.
++ */
++static int
++mappedread(struct inode *ip, int nbytes, uio_t *uio)
++{
++	struct address_space *mp = ip->i_mapping;
++	struct page *pp;
++	znode_t *zp = ITOZ(ip);
++	objset_t *os = ITOZSB(ip)->z_os;
++	int64_t	start, off;
++	uint64_t bytes;
++	int len = nbytes;
++	int error = 0;
++	void *pb;
++
++	start = uio->uio_loffset;
++	off = start & (PAGE_CACHE_SIZE-1);
++	for (start &= PAGE_CACHE_MASK; len > 0; start += PAGE_CACHE_SIZE) {
++		bytes = MIN(PAGE_CACHE_SIZE - off, len);
++
++		pp = find_lock_page(mp, start >> PAGE_CACHE_SHIFT);
++		if (pp) {
++			ASSERT(PageUptodate(pp));
++
++			pb = kmap(pp);
++			error = uiomove(pb + off, bytes, UIO_READ, uio);
++			kunmap(pp);
++
++			if (mapping_writably_mapped(mp))
++				flush_dcache_page(pp);
++
++			mark_page_accessed(pp);
++			unlock_page(pp);
++			page_cache_release(pp);
++		} else {
++			error = dmu_read_uio(os, zp->z_id, uio, bytes);
++		}
++
++		len -= bytes;
++		off = 0;
++		if (error)
++			break;
++	}
++	return (error);
++}
++#endif /* _KERNEL */
++
++unsigned long zfs_read_chunk_size = 1024 * 1024; /* Tunable */
++
++/*
++ * Read bytes from specified file into supplied buffer.
++ *
++ *	IN:	ip	- inode of file to be read from.
++ *		uio	- structure supplying read location, range info,
++ *			  and return buffer.
++ *		ioflag	- FSYNC flags; used to provide FRSYNC semantics.
++ *			  O_DIRECT flag; used to bypass page cache.
++ *		cr	- credentials of caller.
++ *
++ *	OUT:	uio	- updated offset and range, buffer filled.
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Side Effects:
++ *	inode - atime updated if byte count > 0
++ */
++/* ARGSUSED */
++int
++zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
++{
++	znode_t		*zp = ITOZ(ip);
++	zfs_sb_t	*zsb = ITOZSB(ip);
++	objset_t	*os;
++	ssize_t		n, nbytes;
++	int		error = 0;
++	rl_t		*rl;
++#ifdef HAVE_UIO_ZEROCOPY
++	xuio_t		*xuio = NULL;
++#endif /* HAVE_UIO_ZEROCOPY */
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++	os = zsb->z_os;
++
++	if (zp->z_pflags & ZFS_AV_QUARANTINED) {
++		ZFS_EXIT(zsb);
++		return (EACCES);
++	}
++
++	/*
++	 * Validate file offset
++	 */
++	if (uio->uio_loffset < (offset_t)0) {
++		ZFS_EXIT(zsb);
++		return (EINVAL);
++	}
++
++	/*
++	 * Fasttrack empty reads
++	 */
++	if (uio->uio_resid == 0) {
++		ZFS_EXIT(zsb);
++		return (0);
++	}
++
++	/*
++	 * Check for mandatory locks
++	 */
++	if (mandatory_lock(ip) &&
++	    !lock_may_read(ip, uio->uio_loffset, uio->uio_resid)) {
++		ZFS_EXIT(zsb);
++		return (EAGAIN);
++	}
++
++	/*
++	 * If we're in FRSYNC mode, sync out this znode before reading it.
++	 */
++	if (ioflag & FRSYNC || zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
++		zil_commit(zsb->z_log, zp->z_id);
++
++	/*
++	 * Lock the range against changes.
++	 */
++	rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER);
++
++	/*
++	 * If we are reading past end-of-file we can skip
++	 * to the end; but we might still need to set atime.
++	 */
++	if (uio->uio_loffset >= zp->z_size) {
++		error = 0;
++		goto out;
++	}
++
++	ASSERT(uio->uio_loffset < zp->z_size);
++	n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset);
++
++#ifdef HAVE_UIO_ZEROCOPY
++	if ((uio->uio_extflg == UIO_XUIO) &&
++	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) {
++		int nblk;
++		int blksz = zp->z_blksz;
++		uint64_t offset = uio->uio_loffset;
++
++		xuio = (xuio_t *)uio;
++		if ((ISP2(blksz))) {
++			nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset,
++			    blksz)) / blksz;
++		} else {
++			ASSERT(offset + n <= blksz);
++			nblk = 1;
++		}
++		(void) dmu_xuio_init(xuio, nblk);
++
++		if (vn_has_cached_data(ip)) {
++			/*
++			 * For simplicity, we always allocate a full buffer
++			 * even if we only expect to read a portion of a block.
++			 */
++			while (--nblk >= 0) {
++				(void) dmu_xuio_add(xuio,
++				    dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
++				    blksz), 0, blksz);
++			}
++		}
++	}
++#endif /* HAVE_UIO_ZEROCOPY */
++
++	while (n > 0) {
++		nbytes = MIN(n, zfs_read_chunk_size -
++		    P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
++
++		if (zp->z_is_mapped && !(ioflag & O_DIRECT))
++			error = mappedread(ip, nbytes, uio);
++		else
++			error = dmu_read_uio(os, zp->z_id, uio, nbytes);
++
++		if (error) {
++			/* convert checksum errors into IO errors */
++			if (error == ECKSUM)
++				error = EIO;
++			break;
++		}
++
++		n -= nbytes;
++	}
++out:
++	zfs_range_unlock(rl);
++
++	ZFS_ACCESSTIME_STAMP(zsb, zp);
++	zfs_inode_update(zp);
++	ZFS_EXIT(zsb);
++	return (error);
++}
++EXPORT_SYMBOL(zfs_read);
++
++/*
++ * Write the bytes to a file.
++ *
++ *	IN:	ip	- inode of file to be written to.
++ *		uio	- structure supplying write location, range info,
++ *			  and data buffer.
++ *		ioflag	- FAPPEND flag set if in append mode.
++ *			  O_DIRECT flag; used to bypass page cache.
++ *		cr	- credentials of caller.
++ *
++ *	OUT:	uio	- updated offset and range.
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	ip - ctime|mtime updated if byte count > 0
++ */
++
++/* ARGSUSED */
++int
++zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
++{
++	znode_t		*zp = ITOZ(ip);
++	rlim64_t	limit = uio->uio_limit;
++	ssize_t		start_resid = uio->uio_resid;
++	ssize_t		tx_bytes;
++	uint64_t	end_size;
++	dmu_tx_t	*tx;
++	zfs_sb_t	*zsb = ZTOZSB(zp);
++	zilog_t		*zilog;
++	offset_t	woff;
++	ssize_t		n, nbytes;
++	rl_t		*rl;
++	int		max_blksz = zsb->z_max_blksz;
++	int		error = 0;
++	arc_buf_t	*abuf;
++	iovec_t		*aiov = NULL;
++	xuio_t		*xuio = NULL;
++	int		i_iov = 0;
++	iovec_t		*iovp = uio->uio_iov;
++	int		write_eof;
++	int		count = 0;
++	sa_bulk_attr_t	bulk[4];
++	uint64_t	mtime[2], ctime[2];
++	ASSERTV(int	iovcnt = uio->uio_iovcnt);
++
++	/*
++	 * Fasttrack empty write
++	 */
++	n = start_resid;
++	if (n == 0)
++		return (0);
++
++	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
++		limit = MAXOFFSET_T;
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
++	    &zp->z_pflags, 8);
++
++	/*
++	 * If immutable or not appending then return EPERM
++	 */
++	if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) ||
++	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
++	    (uio->uio_loffset < zp->z_size))) {
++		ZFS_EXIT(zsb);
++		return (EPERM);
++	}
++
++	zilog = zsb->z_log;
++
++	/*
++	 * Validate file offset
++	 */
++	woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
++	if (woff < 0) {
++		ZFS_EXIT(zsb);
++		return (EINVAL);
++	}
++
++	/*
++	 * Check for mandatory locks before calling zfs_range_lock()
++	 * in order to prevent a deadlock with locks set via fcntl().
++	 */
++	if (mandatory_lock(ip) && !lock_may_write(ip, woff, n)) {
++		ZFS_EXIT(zsb);
++		return (EAGAIN);
++	}
++
++#ifdef HAVE_UIO_ZEROCOPY
++	/*
++	 * Pre-fault the pages to ensure slow (eg NFS) pages
++	 * don't hold up txg.
++	 * Skip this if uio contains loaned arc_buf.
++	 */
++	if ((uio->uio_extflg == UIO_XUIO) &&
++	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
++		xuio = (xuio_t *)uio;
++	else
++		uio_prefaultpages(MIN(n, max_blksz), uio);
++#endif /* HAVE_UIO_ZEROCOPY */
++
++	/*
++	 * If in append mode, set the io offset pointer to eof.
++	 */
++	if (ioflag & FAPPEND) {
++		/*
++		 * Obtain an appending range lock to guarantee file append
++		 * semantics.  We reset the write offset once we have the lock.
++		 */
++		rl = zfs_range_lock(zp, 0, n, RL_APPEND);
++		woff = rl->r_off;
++		if (rl->r_len == UINT64_MAX) {
++			/*
++			 * We overlocked the file because this write will cause
++			 * the file block size to increase.
++			 * Note that zp_size cannot change with this lock held.
++			 */
++			woff = zp->z_size;
++		}
++		uio->uio_loffset = woff;
++	} else {
++		/*
++		 * Note that if the file block size will change as a result of
++		 * this write, then this range lock will lock the entire file
++		 * so that we can re-write the block safely.
++		 */
++		rl = zfs_range_lock(zp, woff, n, RL_WRITER);
++	}
++
++	if (woff >= limit) {
++		zfs_range_unlock(rl);
++		ZFS_EXIT(zsb);
++		return (EFBIG);
++	}
++
++	if ((woff + n) > limit || woff > (limit - n))
++		n = limit - woff;
++
++	/* Will this write extend the file length? */
++	write_eof = (woff + n > zp->z_size);
++
++	end_size = MAX(zp->z_size, woff + n);
++
++	/*
++	 * Write the file in reasonable size chunks.  Each chunk is written
++	 * in a separate transaction; this keeps the intent log records small
++	 * and allows us to do more fine-grained space accounting.
++	 */
++	while (n > 0) {
++		abuf = NULL;
++		woff = uio->uio_loffset;
++again:
++		if (zfs_owner_overquota(zsb, zp, B_FALSE) ||
++		    zfs_owner_overquota(zsb, zp, B_TRUE)) {
++			if (abuf != NULL)
++				dmu_return_arcbuf(abuf);
++			error = EDQUOT;
++			break;
++		}
++
++		if (xuio && abuf == NULL) {
++			ASSERT(i_iov < iovcnt);
++			aiov = &iovp[i_iov];
++			abuf = dmu_xuio_arcbuf(xuio, i_iov);
++			dmu_xuio_clear(xuio, i_iov);
++			ASSERT((aiov->iov_base == abuf->b_data) ||
++			    ((char *)aiov->iov_base - (char *)abuf->b_data +
++			    aiov->iov_len == arc_buf_size(abuf)));
++			i_iov++;
++		} else if (abuf == NULL && n >= max_blksz &&
++		    woff >= zp->z_size &&
++		    P2PHASE(woff, max_blksz) == 0 &&
++		    zp->z_blksz == max_blksz) {
++			/*
++			 * This write covers a full block.  "Borrow" a buffer
++			 * from the dmu so that we can fill it before we enter
++			 * a transaction.  This avoids the possibility of
++			 * holding up the transaction if the data copy hangs
++			 * up on a pagefault (e.g., from an NFS server mapping).
++			 */
++			size_t cbytes;
++
++			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
++			    max_blksz);
++			ASSERT(abuf != NULL);
++			ASSERT(arc_buf_size(abuf) == max_blksz);
++			if ((error = uiocopy(abuf->b_data, max_blksz,
++			    UIO_WRITE, uio, &cbytes))) {
++				dmu_return_arcbuf(abuf);
++				break;
++			}
++			ASSERT(cbytes == max_blksz);
++		}
++
++		/*
++		 * Start a transaction.
++		 */
++		tx = dmu_tx_create(zsb->z_os);
++		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
++		dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
++		zfs_sa_upgrade_txholds(tx, zp);
++		error = dmu_tx_assign(tx, TXG_NOWAIT);
++		if (error) {
++			if (error == ERESTART) {
++				dmu_tx_wait(tx);
++				dmu_tx_abort(tx);
++				goto again;
++			}
++			dmu_tx_abort(tx);
++			if (abuf != NULL)
++				dmu_return_arcbuf(abuf);
++			break;
++		}
++
++		/*
++		 * If zfs_range_lock() over-locked we grow the blocksize
++		 * and then reduce the lock range.  This will only happen
++		 * on the first iteration since zfs_range_reduce() will
++		 * shrink down r_len to the appropriate size.
++		 */
++		if (rl->r_len == UINT64_MAX) {
++			uint64_t new_blksz;
++
++			if (zp->z_blksz > max_blksz) {
++				ASSERT(!ISP2(zp->z_blksz));
++				new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE);
++			} else {
++				new_blksz = MIN(end_size, max_blksz);
++			}
++			zfs_grow_blocksize(zp, new_blksz, tx);
++			zfs_range_reduce(rl, woff, n);
++		}
++
++		/*
++		 * XXX - should we really limit each write to z_max_blksz?
++		 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
++		 */
++		nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
++
++		if (abuf == NULL) {
++			tx_bytes = uio->uio_resid;
++			error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
++			    uio, nbytes, tx);
++			tx_bytes -= uio->uio_resid;
++		} else {
++			tx_bytes = nbytes;
++			ASSERT(xuio == NULL || tx_bytes == aiov->iov_len);
++			/*
++			 * If this is not a full block write, but we are
++			 * extending the file past EOF and this data starts
++			 * block-aligned, use assign_arcbuf().  Otherwise,
++			 * write via dmu_write().
++			 */
++			if (tx_bytes < max_blksz && (!write_eof ||
++			    aiov->iov_base != abuf->b_data)) {
++				ASSERT(xuio);
++				dmu_write(zsb->z_os, zp->z_id, woff,
++				    aiov->iov_len, aiov->iov_base, tx);
++				dmu_return_arcbuf(abuf);
++				xuio_stat_wbuf_copied();
++			} else {
++				ASSERT(xuio || tx_bytes == max_blksz);
++				dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
++				    woff, abuf, tx);
++			}
++			ASSERT(tx_bytes <= uio->uio_resid);
++			uioskip(uio, tx_bytes);
++		}
++
++		if (tx_bytes && zp->z_is_mapped && !(ioflag & O_DIRECT))
++			update_pages(ip, woff, tx_bytes, zsb->z_os, zp->z_id);
++
++		/*
++		 * If we made no progress, we're done.  If we made even
++		 * partial progress, update the znode and ZIL accordingly.
++		 */
++		if (tx_bytes == 0) {
++			(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zsb),
++			    (void *)&zp->z_size, sizeof (uint64_t), tx);
++			dmu_tx_commit(tx);
++			ASSERT(error != 0);
++			break;
++		}
++
++		/*
++		 * Clear Set-UID/Set-GID bits on successful write if not
++		 * privileged and at least one of the excute bits is set.
++		 *
++		 * It would be nice to to this after all writes have
++		 * been done, but that would still expose the ISUID/ISGID
++		 * to another app after the partial write is committed.
++		 *
++		 * Note: we don't call zfs_fuid_map_id() here because
++		 * user 0 is not an ephemeral uid.
++		 */
++		mutex_enter(&zp->z_acl_lock);
++		if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
++		    (S_IXUSR >> 6))) != 0 &&
++		    (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
++		    secpolicy_vnode_setid_retain(cr,
++		    (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) {
++			uint64_t newmode;
++			zp->z_mode &= ~(S_ISUID | S_ISGID);
++			newmode = zp->z_mode;
++			(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zsb),
++			    (void *)&newmode, sizeof (uint64_t), tx);
++		}
++		mutex_exit(&zp->z_acl_lock);
++
++		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
++		    B_TRUE);
++
++		/*
++		 * Update the file size (zp_size) if it has changed;
++		 * account for possible concurrent updates.
++		 */
++		while ((end_size = zp->z_size) < uio->uio_loffset) {
++			(void) atomic_cas_64(&zp->z_size, end_size,
++			    uio->uio_loffset);
++			ASSERT(error == 0);
++		}
++		/*
++		 * If we are replaying and eof is non zero then force
++		 * the file size to the specified eof. Note, there's no
++		 * concurrency during replay.
++		 */
++		if (zsb->z_replay && zsb->z_replay_eof != 0)
++			zp->z_size = zsb->z_replay_eof;
++
++		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
++
++		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
++		dmu_tx_commit(tx);
++
++		if (error != 0)
++			break;
++		ASSERT(tx_bytes == nbytes);
++		n -= nbytes;
++
++		if (!xuio && n > 0)
++			uio_prefaultpages(MIN(n, max_blksz), uio);
++	}
++
++	zfs_range_unlock(rl);
++
++	/*
++	 * If we're in replay mode, or we made no progress, return error.
++	 * Otherwise, it's at least a partial write, so it's successful.
++	 */
++	if (zsb->z_replay || uio->uio_resid == start_resid) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	if (ioflag & (FSYNC | FDSYNC) ||
++	    zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
++		zil_commit(zilog, zp->z_id);
++
++	zfs_inode_update(zp);
++	ZFS_EXIT(zsb);
++	return (0);
++}
++EXPORT_SYMBOL(zfs_write);
++
++static void
++iput_async(struct inode *ip, taskq_t *taskq)
++{
++	ASSERT(atomic_read(&ip->i_count) > 0);
++	if (atomic_read(&ip->i_count) == 1)
++		taskq_dispatch(taskq, (task_func_t *)iput, ip, TQ_PUSHPAGE);
++	else
++		iput(ip);
++}
++
++void
++zfs_get_done(zgd_t *zgd, int error)
++{
++	znode_t *zp = zgd->zgd_private;
++	objset_t *os = ZTOZSB(zp)->z_os;
++
++	if (zgd->zgd_db)
++		dmu_buf_rele(zgd->zgd_db, zgd);
++
++	zfs_range_unlock(zgd->zgd_rl);
++
++	/*
++	 * Release the vnode asynchronously as we currently have the
++	 * txg stopped from syncing.
++	 */
++	iput_async(ZTOI(zp), dsl_pool_iput_taskq(dmu_objset_pool(os)));
++
++	if (error == 0 && zgd->zgd_bp)
++		zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
++
++	kmem_free(zgd, sizeof (zgd_t));
++}
++
++#ifdef DEBUG
++static int zil_fault_io = 0;
++#endif
++
++/*
++ * Get data to generate a TX_WRITE intent log record.
++ */
++int
++zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
++{
++	zfs_sb_t *zsb = arg;
++	objset_t *os = zsb->z_os;
++	znode_t *zp;
++	uint64_t object = lr->lr_foid;
++	uint64_t offset = lr->lr_offset;
++	uint64_t size = lr->lr_length;
++	blkptr_t *bp = &lr->lr_blkptr;
++	dmu_buf_t *db;
++	zgd_t *zgd;
++	int error = 0;
++
++	ASSERT(zio != NULL);
++	ASSERT(size != 0);
++
++	/*
++	 * Nothing to do if the file has been removed
++	 */
++	if (zfs_zget(zsb, object, &zp) != 0)
++		return (ENOENT);
++	if (zp->z_unlinked) {
++		/*
++		 * Release the vnode asynchronously as we currently have the
++		 * txg stopped from syncing.
++		 */
++		iput_async(ZTOI(zp), dsl_pool_iput_taskq(dmu_objset_pool(os)));
++		return (ENOENT);
++	}
++
++	zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_PUSHPAGE);
++	zgd->zgd_zilog = zsb->z_log;
++	zgd->zgd_private = zp;
++
++	/*
++	 * Write records come in two flavors: immediate and indirect.
++	 * For small writes it's cheaper to store the data with the
++	 * log record (immediate); for large writes it's cheaper to
++	 * sync the data and get a pointer to it (indirect) so that
++	 * we don't have to write the data twice.
++	 */
++	if (buf != NULL) { /* immediate write */
++		zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER);
++		/* test for truncation needs to be done while range locked */
++		if (offset >= zp->z_size) {
++			error = ENOENT;
++		} else {
++			error = dmu_read(os, object, offset, size, buf,
++			    DMU_READ_NO_PREFETCH);
++		}
++		ASSERT(error == 0 || error == ENOENT);
++	} else { /* indirect write */
++		/*
++		 * Have to lock the whole block to ensure when it's
++		 * written out and it's checksum is being calculated
++		 * that no one can change the data. We need to re-check
++		 * blocksize after we get the lock in case it's changed!
++		 */
++		for (;;) {
++			uint64_t blkoff;
++			size = zp->z_blksz;
++			blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
++			offset -= blkoff;
++			zgd->zgd_rl = zfs_range_lock(zp, offset, size,
++			    RL_READER);
++			if (zp->z_blksz == size)
++				break;
++			offset += blkoff;
++			zfs_range_unlock(zgd->zgd_rl);
++		}
++		/* test for truncation needs to be done while range locked */
++		if (lr->lr_offset >= zp->z_size)
++			error = ENOENT;
++#ifdef DEBUG
++		if (zil_fault_io) {
++			error = EIO;
++			zil_fault_io = 0;
++		}
++#endif
++		if (error == 0)
++			error = dmu_buf_hold(os, object, offset, zgd, &db,
++			    DMU_READ_NO_PREFETCH);
++
++		if (error == 0) {
++			zgd->zgd_db = db;
++			zgd->zgd_bp = bp;
++
++			ASSERT(db->db_offset == offset);
++			ASSERT(db->db_size == size);
++
++			error = dmu_sync(zio, lr->lr_common.lrc_txg,
++			    zfs_get_done, zgd);
++			ASSERT(error || lr->lr_length <= zp->z_blksz);
++
++			/*
++			 * On success, we need to wait for the write I/O
++			 * initiated by dmu_sync() to complete before we can
++			 * release this dbuf.  We will finish everything up
++			 * in the zfs_get_done() callback.
++			 */
++			if (error == 0)
++				return (0);
++
++			if (error == EALREADY) {
++				lr->lr_common.lrc_txtype = TX_WRITE2;
++				error = 0;
++			}
++		}
++	}
++
++	zfs_get_done(zgd, error);
++
++	return (error);
++}
++
++/*ARGSUSED*/
++int
++zfs_access(struct inode *ip, int mode, int flag, cred_t *cr)
++{
++	znode_t *zp = ITOZ(ip);
++	zfs_sb_t *zsb = ITOZSB(ip);
++	int error;
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	if (flag & V_ACE_MASK)
++		error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
++	else
++		error = zfs_zaccess_rwx(zp, mode, flag, cr);
++
++	ZFS_EXIT(zsb);
++	return (error);
++}
++EXPORT_SYMBOL(zfs_access);
++
++/*
++ * Lookup an entry in a directory, or an extended attribute directory.
++ * If it exists, return a held inode reference for it.
++ *
++ *	IN:	dip	- inode of directory to search.
++ *		nm	- name of entry to lookup.
++ *		flags	- LOOKUP_XATTR set if looking for an attribute.
++ *		cr	- credentials of caller.
++ *		direntflags - directory lookup flags
++ *		realpnp - returned pathname.
++ *
++ *	OUT:	ipp	- inode of located entry, NULL if not found.
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	NA
++ */
++/* ARGSUSED */
++int
++zfs_lookup(struct inode *dip, char *nm, struct inode **ipp, int flags,
++    cred_t *cr, int *direntflags, pathname_t *realpnp)
++{
++	znode_t *zdp = ITOZ(dip);
++	zfs_sb_t *zsb = ITOZSB(dip);
++	int error = 0;
++
++	/* fast path */
++	if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
++
++		if (!S_ISDIR(dip->i_mode)) {
++			return (ENOTDIR);
++		} else if (zdp->z_sa_hdl == NULL) {
++			return (EIO);
++		}
++
++		if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
++			error = zfs_fastaccesschk_execute(zdp, cr);
++			if (!error) {
++				*ipp = dip;
++				igrab(*ipp);
++				return (0);
++			}
++			return (error);
++#ifdef HAVE_DNLC
++		} else {
++			vnode_t *tvp = dnlc_lookup(dvp, nm);
++
++			if (tvp) {
++				error = zfs_fastaccesschk_execute(zdp, cr);
++				if (error) {
++					iput(tvp);
++					return (error);
++				}
++				if (tvp == DNLC_NO_VNODE) {
++					iput(tvp);
++					return (ENOENT);
++				} else {
++					*vpp = tvp;
++					return (specvp_check(vpp, cr));
++				}
++			}
++#endif /* HAVE_DNLC */
++		}
++	}
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zdp);
++
++	*ipp = NULL;
++
++	if (flags & LOOKUP_XATTR) {
++		/*
++		 * We don't allow recursive attributes..
++		 * Maybe someday we will.
++		 */
++		if (zdp->z_pflags & ZFS_XATTR) {
++			ZFS_EXIT(zsb);
++			return (EINVAL);
++		}
++
++		if ((error = zfs_get_xattrdir(zdp, ipp, cr, flags))) {
++			ZFS_EXIT(zsb);
++			return (error);
++		}
++
++		/*
++		 * Do we have permission to get into attribute directory?
++		 */
++
++		if ((error = zfs_zaccess(ITOZ(*ipp), ACE_EXECUTE, 0,
++		    B_FALSE, cr))) {
++			iput(*ipp);
++			*ipp = NULL;
++		}
++
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	if (!S_ISDIR(dip->i_mode)) {
++		ZFS_EXIT(zsb);
++		return (ENOTDIR);
++	}
++
++	/*
++	 * Check accessibility of directory.
++	 */
++
++	if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	if (zsb->z_utf8 && u8_validate(nm, strlen(nm),
++	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
++		ZFS_EXIT(zsb);
++		return (EILSEQ);
++	}
++
++	error = zfs_dirlook(zdp, nm, ipp, flags, direntflags, realpnp);
++	if ((error == 0) && (*ipp))
++		zfs_inode_update(ITOZ(*ipp));
++
++	ZFS_EXIT(zsb);
++	return (error);
++}
++EXPORT_SYMBOL(zfs_lookup);
++
++/*
++ * Attempt to create a new entry in a directory.  If the entry
++ * already exists, truncate the file if permissible, else return
++ * an error.  Return the ip of the created or trunc'd file.
++ *
++ *	IN:	dip	- inode of directory to put new file entry in.
++ *		name	- name of new file entry.
++ *		vap	- attributes of new file.
++ *		excl	- flag indicating exclusive or non-exclusive mode.
++ *		mode	- mode to open file with.
++ *		cr	- credentials of caller.
++ *		flag	- large file flag [UNUSED].
++ *		vsecp	- ACL to be set
++ *
++ *	OUT:	ipp	- inode of created or trunc'd entry.
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	dip - ctime|mtime updated if new entry created
++ *	 ip - ctime|mtime always, atime if new
++ */
++
++/* ARGSUSED */
++int
++zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
++    int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
++{
++	znode_t		*zp, *dzp = ITOZ(dip);
++	zfs_sb_t	*zsb = ITOZSB(dip);
++	zilog_t		*zilog;
++	objset_t	*os;
++	zfs_dirlock_t	*dl;
++	dmu_tx_t	*tx;
++	int		error;
++	uid_t		uid;
++	gid_t		gid;
++	zfs_acl_ids_t   acl_ids;
++	boolean_t	fuid_dirtied;
++	boolean_t	have_acl = B_FALSE;
++
++	/*
++	 * If we have an ephemeral id, ACL, or XVATTR then
++	 * make sure file system is at proper version
++	 */
++
++	gid = crgetgid(cr);
++	uid = crgetuid(cr);
++
++	if (zsb->z_use_fuids == B_FALSE &&
++	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
++		return (EINVAL);
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(dzp);
++	os = zsb->z_os;
++	zilog = zsb->z_log;
++
++	if (zsb->z_utf8 && u8_validate(name, strlen(name),
++	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
++		ZFS_EXIT(zsb);
++		return (EILSEQ);
++	}
++
++	if (vap->va_mask & ATTR_XVATTR) {
++		if ((error = secpolicy_xvattr((xvattr_t *)vap,
++		    crgetuid(cr), cr, vap->va_mode)) != 0) {
++			ZFS_EXIT(zsb);
++			return (error);
++		}
++	}
++
++top:
++	*ipp = NULL;
++	if (*name == '\0') {
++		/*
++		 * Null component name refers to the directory itself.
++		 */
++		igrab(dip);
++		zp = dzp;
++		dl = NULL;
++		error = 0;
++	} else {
++		/* possible igrab(zp) */
++		int zflg = 0;
++
++		if (flag & FIGNORECASE)
++			zflg |= ZCILOOK;
++
++		error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
++		    NULL, NULL);
++		if (error) {
++			if (have_acl)
++				zfs_acl_ids_free(&acl_ids);
++			if (strcmp(name, "..") == 0)
++				error = EISDIR;
++			ZFS_EXIT(zsb);
++			return (error);
++		}
++	}
++
++	if (zp == NULL) {
++		uint64_t txtype;
++
++		/*
++		 * Create a new file object and update the directory
++		 * to reference it.
++		 */
++		if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
++			if (have_acl)
++				zfs_acl_ids_free(&acl_ids);
++			goto out;
++		}
++
++		/*
++		 * We only support the creation of regular files in
++		 * extended attribute directories.
++		 */
++
++		if ((dzp->z_pflags & ZFS_XATTR) && !S_ISREG(vap->va_mode)) {
++			if (have_acl)
++				zfs_acl_ids_free(&acl_ids);
++			error = EINVAL;
++			goto out;
++		}
++
++		if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
++		    cr, vsecp, &acl_ids)) != 0)
++			goto out;
++		have_acl = B_TRUE;
++
++		if (zfs_acl_ids_overquota(zsb, &acl_ids)) {
++			zfs_acl_ids_free(&acl_ids);
++			error = EDQUOT;
++			goto out;
++		}
++
++		tx = dmu_tx_create(os);
++
++		dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
++		    ZFS_SA_BASE_ATTR_SIZE);
++
++		fuid_dirtied = zsb->z_fuid_dirty;
++		if (fuid_dirtied)
++			zfs_fuid_txhold(zsb, tx);
++		dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
++		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
++		if (!zsb->z_use_sa &&
++		    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
++			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
++			    0, acl_ids.z_aclp->z_acl_bytes);
++		}
++		error = dmu_tx_assign(tx, TXG_NOWAIT);
++		if (error) {
++			zfs_dirent_unlock(dl);
++			if (error == ERESTART) {
++				dmu_tx_wait(tx);
++				dmu_tx_abort(tx);
++				goto top;
++			}
++			zfs_acl_ids_free(&acl_ids);
++			dmu_tx_abort(tx);
++			ZFS_EXIT(zsb);
++			return (error);
++		}
++		zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
++
++		if (fuid_dirtied)
++			zfs_fuid_sync(zsb, tx);
++
++		(void) zfs_link_create(dl, zp, tx, ZNEW);
++		txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
++		if (flag & FIGNORECASE)
++			txtype |= TX_CI;
++		zfs_log_create(zilog, tx, txtype, dzp, zp, name,
++		    vsecp, acl_ids.z_fuidp, vap);
++		zfs_acl_ids_free(&acl_ids);
++		dmu_tx_commit(tx);
++	} else {
++		int aflags = (flag & FAPPEND) ? V_APPEND : 0;
++
++		if (have_acl)
++			zfs_acl_ids_free(&acl_ids);
++		have_acl = B_FALSE;
++
++		/*
++		 * A directory entry already exists for this name.
++		 */
++		/*
++		 * Can't truncate an existing file if in exclusive mode.
++		 */
++		if (excl) {
++			error = EEXIST;
++			goto out;
++		}
++		/*
++		 * Can't open a directory for writing.
++		 */
++		if (S_ISDIR(ZTOI(zp)->i_mode)) {
++			error = EISDIR;
++			goto out;
++		}
++		/*
++		 * Verify requested access to file.
++		 */
++		if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) {
++			goto out;
++		}
++
++		mutex_enter(&dzp->z_lock);
++		dzp->z_seq++;
++		mutex_exit(&dzp->z_lock);
++
++		/*
++		 * Truncate regular files if requested.
++		 */
++		if (S_ISREG(ZTOI(zp)->i_mode) &&
++		    (vap->va_mask & ATTR_SIZE) && (vap->va_size == 0)) {
++			/* we can't hold any locks when calling zfs_freesp() */
++			zfs_dirent_unlock(dl);
++			dl = NULL;
++			error = zfs_freesp(zp, 0, 0, mode, TRUE);
++		}
++	}
++out:
++
++	if (dl)
++		zfs_dirent_unlock(dl);
++
++	if (error) {
++		if (zp)
++			iput(ZTOI(zp));
++	} else {
++		zfs_inode_update(dzp);
++		zfs_inode_update(zp);
++		*ipp = ZTOI(zp);
++	}
++
++	if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
++		zil_commit(zilog, 0);
++
++	ZFS_EXIT(zsb);
++	return (error);
++}
++EXPORT_SYMBOL(zfs_create);
++
++/*
++ * Remove an entry from a directory.
++ *
++ *	IN:	dip	- inode of directory to remove entry from.
++ *		name	- name of entry to remove.
++ *		cr	- credentials of caller.
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	dip - ctime|mtime
++ *	 ip - ctime (if nlink > 0)
++ */
++
++uint64_t null_xattr = 0;
++
++/*ARGSUSED*/
++int
++zfs_remove(struct inode *dip, char *name, cred_t *cr)
++{
++	znode_t		*zp, *dzp = ITOZ(dip);
++	znode_t		*xzp;
++	struct inode	*ip;
++	zfs_sb_t	*zsb = ITOZSB(dip);
++	zilog_t		*zilog;
++	uint64_t	xattr_obj;
++	uint64_t	xattr_obj_unlinked = 0;
++	uint64_t	obj = 0;
++	zfs_dirlock_t	*dl;
++	dmu_tx_t	*tx;
++	boolean_t	unlinked;
++	uint64_t	txtype;
++	pathname_t	*realnmp = NULL;
++#ifdef HAVE_PN_UTILS
++	pathname_t	realnm;
++#endif /* HAVE_PN_UTILS */
++	int		error;
++	int		zflg = ZEXISTS;
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(dzp);
++	zilog = zsb->z_log;
++
++#ifdef HAVE_PN_UTILS
++	if (flags & FIGNORECASE) {
++		zflg |= ZCILOOK;
++		pn_alloc(&realnm);
++		realnmp = &realnm;
++	}
++#endif /* HAVE_PN_UTILS */
++
++top:
++	xattr_obj = 0;
++	xzp = NULL;
++	/*
++	 * Attempt to lock directory; fail if entry doesn't exist.
++	 */
++	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
++	    NULL, realnmp))) {
++#ifdef HAVE_PN_UTILS
++		if (realnmp)
++			pn_free(realnmp);
++#endif /* HAVE_PN_UTILS */
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	ip = ZTOI(zp);
++
++	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
++		goto out;
++	}
++
++	/*
++	 * Need to use rmdir for removing directories.
++	 */
++	if (S_ISDIR(ip->i_mode)) {
++		error = EPERM;
++		goto out;
++	}
++
++#ifdef HAVE_DNLC
++	if (realnmp)
++		dnlc_remove(dvp, realnmp->pn_buf);
++	else
++		dnlc_remove(dvp, name);
++#endif /* HAVE_DNLC */
++
++	/*
++	 * We never delete the znode and always place it in the unlinked
++	 * set.  The dentry cache will always hold the last reference and
++	 * is responsible for safely freeing the znode.
++	 */
++	obj = zp->z_id;
++	tx = dmu_tx_create(zsb->z_os);
++	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
++	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
++	zfs_sa_upgrade_txholds(tx, zp);
++	zfs_sa_upgrade_txholds(tx, dzp);
++
++	/* are there any extended attributes? */
++	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
++	    &xattr_obj, sizeof (xattr_obj));
++	if (error == 0 && xattr_obj) {
++		error = zfs_zget(zsb, xattr_obj, &xzp);
++		ASSERT3U(error, ==, 0);
++		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
++		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
++	}
++
++	/* charge as an update -- would be nice not to charge at all */
++	dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
++
++	error = dmu_tx_assign(tx, TXG_NOWAIT);
++	if (error) {
++		zfs_dirent_unlock(dl);
++		iput(ip);
++		if (xzp)
++			iput(ZTOI(xzp));
++		if (error == ERESTART) {
++			dmu_tx_wait(tx);
++			dmu_tx_abort(tx);
++			goto top;
++		}
++#ifdef HAVE_PN_UTILS
++		if (realnmp)
++			pn_free(realnmp);
++#endif /* HAVE_PN_UTILS */
++		dmu_tx_abort(tx);
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	/*
++	 * Remove the directory entry.
++	 */
++	error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked);
++
++	if (error) {
++		dmu_tx_commit(tx);
++		goto out;
++	}
++
++	if (unlinked) {
++		/*
++		 * Hold z_lock so that we can make sure that the ACL obj
++		 * hasn't changed.  Could have been deleted due to
++		 * zfs_sa_upgrade().
++		 */
++		mutex_enter(&zp->z_lock);
++		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
++		    &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
++		mutex_exit(&zp->z_lock);
++		zfs_unlinked_add(zp, tx);
++	}
++
++	txtype = TX_REMOVE;
++#ifdef HAVE_PN_UTILS
++	if (flags & FIGNORECASE)
++		txtype |= TX_CI;
++#endif /* HAVE_PN_UTILS */
++	zfs_log_remove(zilog, tx, txtype, dzp, name, obj);
++
++	dmu_tx_commit(tx);
++out:
++#ifdef HAVE_PN_UTILS
++	if (realnmp)
++		pn_free(realnmp);
++#endif /* HAVE_PN_UTILS */
++
++	zfs_dirent_unlock(dl);
++	zfs_inode_update(dzp);
++	zfs_inode_update(zp);
++	if (xzp)
++		zfs_inode_update(xzp);
++
++	iput(ip);
++	if (xzp)
++		iput(ZTOI(xzp));
++
++	if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
++		zil_commit(zilog, 0);
++
++	ZFS_EXIT(zsb);
++	return (error);
++}
++EXPORT_SYMBOL(zfs_remove);
++
++/*
++ * Create a new directory and insert it into dip using the name
++ * provided.  Return a pointer to the inserted directory.
++ *
++ *	IN:	dip	- inode of directory to add subdir to.
++ *		dirname	- name of new directory.
++ *		vap	- attributes of new directory.
++ *		cr	- credentials of caller.
++ *		vsecp	- ACL to be set
++ *
++ *	OUT:	ipp	- inode of created directory.
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	dip - ctime|mtime updated
++ *	ipp - ctime|mtime|atime updated
++ */
++/*ARGSUSED*/
++int
++zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp,
++    cred_t *cr, int flags, vsecattr_t *vsecp)
++{
++	znode_t		*zp, *dzp = ITOZ(dip);
++	zfs_sb_t	*zsb = ITOZSB(dip);
++	zilog_t		*zilog;
++	zfs_dirlock_t	*dl;
++	uint64_t	txtype;
++	dmu_tx_t	*tx;
++	int		error;
++	int		zf = ZNEW;
++	uid_t		uid;
++	gid_t		gid = crgetgid(cr);
++	zfs_acl_ids_t   acl_ids;
++	boolean_t	fuid_dirtied;
++
++	ASSERT(S_ISDIR(vap->va_mode));
++
++	/*
++	 * If we have an ephemeral id, ACL, or XVATTR then
++	 * make sure file system is at proper version
++	 */
++
++	uid = crgetuid(cr);
++	if (zsb->z_use_fuids == B_FALSE &&
++	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
++		return (EINVAL);
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(dzp);
++	zilog = zsb->z_log;
++
++	if (dzp->z_pflags & ZFS_XATTR) {
++		ZFS_EXIT(zsb);
++		return (EINVAL);
++	}
++
++	if (zsb->z_utf8 && u8_validate(dirname,
++	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
++		ZFS_EXIT(zsb);
++		return (EILSEQ);
++	}
++	if (flags & FIGNORECASE)
++		zf |= ZCILOOK;
++
++	if (vap->va_mask & ATTR_XVATTR) {
++		if ((error = secpolicy_xvattr((xvattr_t *)vap,
++		    crgetuid(cr), cr, vap->va_mode)) != 0) {
++			ZFS_EXIT(zsb);
++			return (error);
++		}
++	}
++
++	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
++	    vsecp, &acl_ids)) != 0) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++	/*
++	 * First make sure the new directory doesn't exist.
++	 *
++	 * Existence is checked first to make sure we don't return
++	 * EACCES instead of EEXIST which can cause some applications
++	 * to fail.
++	 */
++top:
++	*ipp = NULL;
++
++	if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
++	    NULL, NULL))) {
++		zfs_acl_ids_free(&acl_ids);
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
++		zfs_acl_ids_free(&acl_ids);
++		zfs_dirent_unlock(dl);
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	if (zfs_acl_ids_overquota(zsb, &acl_ids)) {
++		zfs_acl_ids_free(&acl_ids);
++		zfs_dirent_unlock(dl);
++		ZFS_EXIT(zsb);
++		return (EDQUOT);
++	}
++
++	/*
++	 * Add a new entry to the directory.
++	 */
++	tx = dmu_tx_create(zsb->z_os);
++	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
++	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
++	fuid_dirtied = zsb->z_fuid_dirty;
++	if (fuid_dirtied)
++		zfs_fuid_txhold(zsb, tx);
++	if (!zsb->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
++		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
++		    acl_ids.z_aclp->z_acl_bytes);
++	}
++
++	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
++	    ZFS_SA_BASE_ATTR_SIZE);
++
++	error = dmu_tx_assign(tx, TXG_NOWAIT);
++	if (error) {
++		zfs_dirent_unlock(dl);
++		if (error == ERESTART) {
++			dmu_tx_wait(tx);
++			dmu_tx_abort(tx);
++			goto top;
++		}
++		zfs_acl_ids_free(&acl_ids);
++		dmu_tx_abort(tx);
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	/*
++	 * Create new node.
++	 */
++	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
++
++	if (fuid_dirtied)
++		zfs_fuid_sync(zsb, tx);
++
++	/*
++	 * Now put new name in parent dir.
++	 */
++	(void) zfs_link_create(dl, zp, tx, ZNEW);
++
++	*ipp = ZTOI(zp);
++
++	txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap);
++	if (flags & FIGNORECASE)
++		txtype |= TX_CI;
++	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
++	    acl_ids.z_fuidp, vap);
++
++	zfs_acl_ids_free(&acl_ids);
++
++	dmu_tx_commit(tx);
++
++	zfs_dirent_unlock(dl);
++
++	if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
++		zil_commit(zilog, 0);
++
++	zfs_inode_update(dzp);
++	zfs_inode_update(zp);
++	ZFS_EXIT(zsb);
++	return (0);
++}
++EXPORT_SYMBOL(zfs_mkdir);
++
++/*
++ * Remove a directory subdir entry.  If the current working
++ * directory is the same as the subdir to be removed, the
++ * remove will fail.
++ *
++ *	IN:	dip	- inode of directory to remove from.
++ *		name	- name of directory to be removed.
++ *		cwd	- inode of current working directory.
++ *		cr	- credentials of caller.
++ *		flags	- case flags
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	dip - ctime|mtime updated
++ */
++/*ARGSUSED*/
++int
++zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr,
++    int flags)
++{
++	znode_t		*dzp = ITOZ(dip);
++	znode_t		*zp;
++	struct inode	*ip;
++	zfs_sb_t	*zsb = ITOZSB(dip);
++	zilog_t		*zilog;
++	zfs_dirlock_t	*dl;
++	dmu_tx_t	*tx;
++	int		error;
++	int		zflg = ZEXISTS;
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(dzp);
++	zilog = zsb->z_log;
++
++	if (flags & FIGNORECASE)
++		zflg |= ZCILOOK;
++top:
++	zp = NULL;
++
++	/*
++	 * Attempt to lock directory; fail if entry doesn't exist.
++	 */
++	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
++	    NULL, NULL))) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	ip = ZTOI(zp);
++
++	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
++		goto out;
++	}
++
++	if (!S_ISDIR(ip->i_mode)) {
++		error = ENOTDIR;
++		goto out;
++	}
++
++	if (ip == cwd) {
++		error = EINVAL;
++		goto out;
++	}
++
++	/*
++	 * Grab a lock on the directory to make sure that noone is
++	 * trying to add (or lookup) entries while we are removing it.
++	 */
++	rw_enter(&zp->z_name_lock, RW_WRITER);
++
++	/*
++	 * Grab a lock on the parent pointer to make sure we play well
++	 * with the treewalk and directory rename code.
++	 */
++	rw_enter(&zp->z_parent_lock, RW_WRITER);
++
++	tx = dmu_tx_create(zsb->z_os);
++	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
++	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
++	dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
++	zfs_sa_upgrade_txholds(tx, zp);
++	zfs_sa_upgrade_txholds(tx, dzp);
++	error = dmu_tx_assign(tx, TXG_NOWAIT);
++	if (error) {
++		rw_exit(&zp->z_parent_lock);
++		rw_exit(&zp->z_name_lock);
++		zfs_dirent_unlock(dl);
++		iput(ip);
++		if (error == ERESTART) {
++			dmu_tx_wait(tx);
++			dmu_tx_abort(tx);
++			goto top;
++		}
++		dmu_tx_abort(tx);
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	error = zfs_link_destroy(dl, zp, tx, zflg, NULL);
++
++	if (error == 0) {
++		uint64_t txtype = TX_RMDIR;
++		if (flags & FIGNORECASE)
++			txtype |= TX_CI;
++		zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT);
++	}
++
++	dmu_tx_commit(tx);
++
++	rw_exit(&zp->z_parent_lock);
++	rw_exit(&zp->z_name_lock);
++out:
++	zfs_dirent_unlock(dl);
++
++	zfs_inode_update(dzp);
++	zfs_inode_update(zp);
++	iput(ip);
++
++	if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
++		zil_commit(zilog, 0);
++
++	ZFS_EXIT(zsb);
++	return (error);
++}
++EXPORT_SYMBOL(zfs_rmdir);
++
++/*
++ * Read as many directory entries as will fit into the provided
++ * dirent buffer from the given directory cursor position.
++ *
++ *	IN:	ip	- inode of directory to read.
++ *		dirent	- buffer for directory entries.
++ *
++ *	OUT:	dirent	- filler buffer of directory entries.
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	ip - atime updated
++ *
++ * Note that the low 4 bits of the cookie returned by zap is always zero.
++ * This allows us to use the low range for "special" directory entries:
++ * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
++ * we use the offset 2 for the '.zfs' directory.
++ */
++/* ARGSUSED */
++int
++zfs_readdir(struct inode *ip, void *dirent, filldir_t filldir,
++    loff_t *pos, cred_t *cr)
++{
++	znode_t		*zp = ITOZ(ip);
++	zfs_sb_t	*zsb = ITOZSB(ip);
++	objset_t	*os;
++	zap_cursor_t	zc;
++	zap_attribute_t	zap;
++	int		outcount;
++	int		error;
++	uint8_t		prefetch;
++	int		done = 0;
++	uint64_t	parent;
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zsb),
++	    &parent, sizeof (parent))) != 0)
++		goto out;
++
++	/*
++	 * Quit if directory has been removed (posix)
++	 */
++	error = 0;
++	if (zp->z_unlinked)
++		goto out;
++
++	os = zsb->z_os;
++	prefetch = zp->z_zn_prefetch;
++
++	/*
++	 * Initialize the iterator cursor.
++	 */
++	if (*pos <= 3) {
++		/*
++		 * Start iteration from the beginning of the directory.
++		 */
++		zap_cursor_init(&zc, os, zp->z_id);
++	} else {
++		/*
++		 * The offset is a serialized cursor.
++		 */
++		zap_cursor_init_serialized(&zc, os, zp->z_id, *pos);
++	}
++
++	/*
++	 * Transform to file-system independent format
++	 */
++	outcount = 0;
++
++	while (!done) {
++		uint64_t objnum;
++		/*
++		 * Special case `.', `..', and `.zfs'.
++		 */
++		if (*pos == 0) {
++			(void) strcpy(zap.za_name, ".");
++			zap.za_normalization_conflict = 0;
++			objnum = zp->z_id;
++		} else if (*pos == 1) {
++			(void) strcpy(zap.za_name, "..");
++			zap.za_normalization_conflict = 0;
++			objnum = parent;
++		} else if (*pos == 2 && zfs_show_ctldir(zp)) {
++			(void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
++			zap.za_normalization_conflict = 0;
++			objnum = ZFSCTL_INO_ROOT;
++		} else {
++			/*
++			 * Grab next entry.
++			 */
++			if ((error = zap_cursor_retrieve(&zc, &zap))) {
++				if (error == ENOENT)
++					break;
++				else
++					goto update;
++			}
++
++			/*
++			 * Allow multiple entries provided the first entry is
++			 * the object id.  Non-zpl consumers may safely make
++			 * use of the additional space.
++			 *
++			 * XXX: This should be a feature flag for compatibility
++			 */
++			if (zap.za_integer_length != 8 ||
++			    zap.za_num_integers == 0) {
++				cmn_err(CE_WARN, "zap_readdir: bad directory "
++				    "entry, obj = %lld, offset = %lld, "
++				    "length = %d, num = %lld\n",
++				    (u_longlong_t)zp->z_id,
++				    (u_longlong_t)*pos,
++				    zap.za_integer_length,
++				    (u_longlong_t)zap.za_num_integers);
++				error = ENXIO;
++				goto update;
++			}
++
++			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
++		}
++		done = filldir(dirent, zap.za_name, strlen(zap.za_name),
++			       zap_cursor_serialize(&zc), objnum, 0);
++		if (done) {
++			break;
++		}
++
++		/* Prefetch znode */
++		if (prefetch) {
++			dmu_prefetch(os, objnum, 0, 0);
++		}
++
++		if (*pos > 2 || (*pos == 2 && !zfs_show_ctldir(zp))) {
++			zap_cursor_advance(&zc);
++			*pos = zap_cursor_serialize(&zc);
++		} else {
++			(*pos)++;
++		}
++	}
++	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
++
++update:
++	zap_cursor_fini(&zc);
++	if (error == ENOENT)
++		error = 0;
++
++	ZFS_ACCESSTIME_STAMP(zsb, zp);
++	zfs_inode_update(zp);
++
++out:
++	ZFS_EXIT(zsb);
++
++	return (error);
++}
++EXPORT_SYMBOL(zfs_readdir);
++
++ulong_t zfs_fsync_sync_cnt = 4;
++
++int
++zfs_fsync(struct inode *ip, int syncflag, cred_t *cr)
++{
++	znode_t	*zp = ITOZ(ip);
++	zfs_sb_t *zsb = ITOZSB(ip);
++
++	(void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
++
++	if (zsb->z_os->os_sync != ZFS_SYNC_DISABLED) {
++		ZFS_ENTER(zsb);
++		ZFS_VERIFY_ZP(zp);
++		zil_commit(zsb->z_log, zp->z_id);
++		ZFS_EXIT(zsb);
++	}
++	return (0);
++}
++EXPORT_SYMBOL(zfs_fsync);
++
++
++/*
++ * Get the requested file attributes and place them in the provided
++ * vattr structure.
++ *
++ *	IN:	ip	- inode of file.
++ *		vap	- va_mask identifies requested attributes.
++ *			  If ATTR_XVATTR set, then optional attrs are requested
++ *		flags	- ATTR_NOACLCHECK (CIFS server context)
++ *		cr	- credentials of caller.
++ *
++ *	OUT:	vap	- attribute values.
++ *
++ *	RETURN:	0 (always succeeds)
++ */
++/* ARGSUSED */
++int
++zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
++{
++	znode_t *zp = ITOZ(ip);
++	zfs_sb_t *zsb = ITOZSB(ip);
++	int	error = 0;
++	uint64_t links;
++	uint64_t mtime[2], ctime[2];
++	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
++	xoptattr_t *xoap = NULL;
++	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
++	sa_bulk_attr_t bulk[2];
++	int count = 0;
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
++
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16);
++
++	if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	/*
++	 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
++	 * Also, if we are the owner don't bother, since owner should
++	 * always be allowed to read basic attributes of file.
++	 */
++	if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
++	    (vap->va_uid != crgetuid(cr))) {
++		if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
++		    skipaclchk, cr))) {
++			ZFS_EXIT(zsb);
++			return (error);
++		}
++	}
++
++	/*
++	 * Return all attributes.  It's cheaper to provide the answer
++	 * than to determine whether we were asked the question.
++	 */
++
++	mutex_enter(&zp->z_lock);
++	vap->va_type = vn_mode_to_vtype(zp->z_mode);
++	vap->va_mode = zp->z_mode;
++	vap->va_fsid = ZTOI(zp)->i_sb->s_dev;
++	vap->va_nodeid = zp->z_id;
++	if ((zp->z_id == zsb->z_root) && zfs_show_ctldir(zp))
++		links = zp->z_links + 1;
++	else
++		links = zp->z_links;
++	vap->va_nlink = MIN(links, ZFS_LINK_MAX);
++	vap->va_size = i_size_read(ip);
++	vap->va_rdev = ip->i_rdev;
++	vap->va_seq = ip->i_generation;
++
++	/*
++	 * Add in any requested optional attributes and the create time.
++	 * Also set the corresponding bits in the returned attribute bitmap.
++	 */
++	if ((xoap = xva_getxoptattr(xvap)) != NULL && zsb->z_use_fuids) {
++		if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
++			xoap->xoa_archive =
++			    ((zp->z_pflags & ZFS_ARCHIVE) != 0);
++			XVA_SET_RTN(xvap, XAT_ARCHIVE);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
++			xoap->xoa_readonly =
++			    ((zp->z_pflags & ZFS_READONLY) != 0);
++			XVA_SET_RTN(xvap, XAT_READONLY);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
++			xoap->xoa_system =
++			    ((zp->z_pflags & ZFS_SYSTEM) != 0);
++			XVA_SET_RTN(xvap, XAT_SYSTEM);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
++			xoap->xoa_hidden =
++			    ((zp->z_pflags & ZFS_HIDDEN) != 0);
++			XVA_SET_RTN(xvap, XAT_HIDDEN);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
++			xoap->xoa_nounlink =
++			    ((zp->z_pflags & ZFS_NOUNLINK) != 0);
++			XVA_SET_RTN(xvap, XAT_NOUNLINK);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
++			xoap->xoa_immutable =
++			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
++			XVA_SET_RTN(xvap, XAT_IMMUTABLE);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
++			xoap->xoa_appendonly =
++			    ((zp->z_pflags & ZFS_APPENDONLY) != 0);
++			XVA_SET_RTN(xvap, XAT_APPENDONLY);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
++			xoap->xoa_nodump =
++			    ((zp->z_pflags & ZFS_NODUMP) != 0);
++			XVA_SET_RTN(xvap, XAT_NODUMP);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
++			xoap->xoa_opaque =
++			    ((zp->z_pflags & ZFS_OPAQUE) != 0);
++			XVA_SET_RTN(xvap, XAT_OPAQUE);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
++			xoap->xoa_av_quarantined =
++			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
++			XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
++			xoap->xoa_av_modified =
++			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
++			XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
++		    S_ISREG(ip->i_mode)) {
++			zfs_sa_get_scanstamp(zp, xvap);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
++			uint64_t times[2];
++
++			(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zsb),
++			    times, sizeof (times));
++			ZFS_TIME_DECODE(&xoap->xoa_createtime, times);
++			XVA_SET_RTN(xvap, XAT_CREATETIME);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
++			xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
++			XVA_SET_RTN(xvap, XAT_REPARSE);
++		}
++		if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
++			xoap->xoa_generation = zp->z_gen;
++			XVA_SET_RTN(xvap, XAT_GEN);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
++			xoap->xoa_offline =
++			    ((zp->z_pflags & ZFS_OFFLINE) != 0);
++			XVA_SET_RTN(xvap, XAT_OFFLINE);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
++			xoap->xoa_sparse =
++			    ((zp->z_pflags & ZFS_SPARSE) != 0);
++			XVA_SET_RTN(xvap, XAT_SPARSE);
++		}
++	}
++
++	ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
++	ZFS_TIME_DECODE(&vap->va_mtime, mtime);
++	ZFS_TIME_DECODE(&vap->va_ctime, ctime);
++
++	mutex_exit(&zp->z_lock);
++
++	sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks);
++
++	if (zp->z_blksz == 0) {
++		/*
++		 * Block size hasn't been set; suggest maximal I/O transfers.
++		 */
++		vap->va_blksize = zsb->z_max_blksz;
++	}
++
++	ZFS_EXIT(zsb);
++	return (0);
++}
++EXPORT_SYMBOL(zfs_getattr);
++
++/*
++ * Get the basic file attributes and place them in the provided kstat
++ * structure.  The inode is assumed to be the authoritative source
++ * for most of the attributes.  However, the znode currently has the
++ * authoritative atime, blksize, and block count.
++ *
++ *	IN:	ip	- inode of file.
++ *
++ *	OUT:	sp	- kstat values.
++ *
++ *	RETURN:	0 (always succeeds)
++ */
++/* ARGSUSED */
++int
++zfs_getattr_fast(struct inode *ip, struct kstat *sp)
++{
++	znode_t *zp = ITOZ(ip);
++	zfs_sb_t *zsb = ITOZSB(ip);
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	mutex_enter(&zp->z_lock);
++
++	generic_fillattr(ip, sp);
++	ZFS_TIME_DECODE(&sp->atime, zp->z_atime);
++
++	sa_object_size(zp->z_sa_hdl, (uint32_t *)&sp->blksize, &sp->blocks);
++	if (unlikely(zp->z_blksz == 0)) {
++		/*
++		 * Block size hasn't been set; suggest maximal I/O transfers.
++		 */
++		sp->blksize = zsb->z_max_blksz;
++	}
++
++	mutex_exit(&zp->z_lock);
++
++	ZFS_EXIT(zsb);
++
++	return (0);
++}
++EXPORT_SYMBOL(zfs_getattr_fast);
++
++/*
++ * Set the file attributes to the values contained in the
++ * vattr structure.
++ *
++ *	IN:	ip	- inode of file to be modified.
++ *		vap	- new attribute values.
++ *			  If ATTR_XVATTR set, then optional attrs are being set
++ *		flags	- ATTR_UTIME set if non-default time values provided.
++ *			- ATTR_NOACLCHECK (CIFS context only).
++ *		cr	- credentials of caller.
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	ip - ctime updated, mtime updated if size changed.
++ */
++/* ARGSUSED */
++int
++zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
++{
++	znode_t		*zp = ITOZ(ip);
++	zfs_sb_t	*zsb = ITOZSB(ip);
++	zilog_t		*zilog;
++	dmu_tx_t	*tx;
++	vattr_t		oldva;
++	xvattr_t	*tmpxvattr;
++	uint_t		mask = vap->va_mask;
++	uint_t		saved_mask;
++	int		trim_mask = 0;
++	uint64_t	new_mode;
++	uint64_t	new_uid, new_gid;
++	uint64_t	xattr_obj;
++	uint64_t	mtime[2], ctime[2];
++	znode_t		*attrzp;
++	int		need_policy = FALSE;
++	int		err, err2;
++	zfs_fuid_info_t *fuidp = NULL;
++	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
++	xoptattr_t	*xoap;
++	zfs_acl_t	*aclp;
++	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
++	boolean_t	fuid_dirtied = B_FALSE;
++	sa_bulk_attr_t	*bulk, *xattr_bulk;
++	int		count = 0, xattr_count = 0;
++
++	if (mask == 0)
++		return (0);
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	zilog = zsb->z_log;
++
++	/*
++	 * Make sure that if we have ephemeral uid/gid or xvattr specified
++	 * that file system is at proper version level
++	 */
++
++	if (zsb->z_use_fuids == B_FALSE &&
++	    (((mask & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) ||
++	    ((mask & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) ||
++	    (mask & ATTR_XVATTR))) {
++		ZFS_EXIT(zsb);
++		return (EINVAL);
++	}
++
++	if (mask & ATTR_SIZE && S_ISDIR(ip->i_mode)) {
++		ZFS_EXIT(zsb);
++		return (EISDIR);
++	}
++
++	if (mask & ATTR_SIZE && !S_ISREG(ip->i_mode) && !S_ISFIFO(ip->i_mode)) {
++		ZFS_EXIT(zsb);
++		return (EINVAL);
++	}
++
++	/*
++	 * If this is an xvattr_t, then get a pointer to the structure of
++	 * optional attributes.  If this is NULL, then we have a vattr_t.
++	 */
++	xoap = xva_getxoptattr(xvap);
++
++	tmpxvattr = kmem_alloc(sizeof(xvattr_t), KM_SLEEP);
++	xva_init(tmpxvattr);
++
++	bulk = kmem_alloc(sizeof(sa_bulk_attr_t) * 7, KM_SLEEP);
++	xattr_bulk = kmem_alloc(sizeof(sa_bulk_attr_t) * 7, KM_SLEEP);
++
++	/*
++	 * Immutable files can only alter immutable bit and atime
++	 */
++	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
++	    ((mask & (ATTR_SIZE|ATTR_UID|ATTR_GID|ATTR_MTIME|ATTR_MODE)) ||
++	    ((mask & ATTR_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
++		err = EPERM;
++		goto out3;
++	}
++
++	if ((mask & ATTR_SIZE) && (zp->z_pflags & ZFS_READONLY)) {
++		err = EPERM;
++		goto out3;
++	}
++
++	/*
++	 * Verify timestamps doesn't overflow 32 bits.
++	 * ZFS can handle large timestamps, but 32bit syscalls can't
++	 * handle times greater than 2039.  This check should be removed
++	 * once large timestamps are fully supported.
++	 */
++	if (mask & (ATTR_ATIME | ATTR_MTIME)) {
++		if (((mask & ATTR_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
++		    ((mask & ATTR_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
++			err = EOVERFLOW;
++			goto out3;
++		}
++	}
++
++top:
++	attrzp = NULL;
++	aclp = NULL;
++
++	/* Can this be moved to before the top label? */
++	if (zfs_is_readonly(zsb)) {
++		err = EROFS;
++		goto out3;
++	}
++
++	/*
++	 * First validate permissions
++	 */
++
++	if (mask & ATTR_SIZE) {
++		err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr);
++		if (err)
++			goto out3;
++
++		truncate_setsize(ip, vap->va_size);
++
++		/*
++		 * XXX - Note, we are not providing any open
++		 * mode flags here (like FNDELAY), so we may
++		 * block if there are locks present... this
++		 * should be addressed in openat().
++		 */
++		/* XXX - would it be OK to generate a log record here? */
++		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
++		if (err)
++			goto out3;
++	}
++
++	if (mask & (ATTR_ATIME|ATTR_MTIME) ||
++	    ((mask & ATTR_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
++	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
++	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
++	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
++	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
++	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
++	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
++		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
++		    skipaclchk, cr);
++	}
++
++	if (mask & (ATTR_UID|ATTR_GID)) {
++		int	idmask = (mask & (ATTR_UID|ATTR_GID));
++		int	take_owner;
++		int	take_group;
++
++		/*
++		 * NOTE: even if a new mode is being set,
++		 * we may clear S_ISUID/S_ISGID bits.
++		 */
++
++		if (!(mask & ATTR_MODE))
++			vap->va_mode = zp->z_mode;
++
++		/*
++		 * Take ownership or chgrp to group we are a member of
++		 */
++
++		take_owner = (mask & ATTR_UID) && (vap->va_uid == crgetuid(cr));
++		take_group = (mask & ATTR_GID) &&
++		    zfs_groupmember(zsb, vap->va_gid, cr);
++
++		/*
++		 * If both ATTR_UID and ATTR_GID are set then take_owner and
++		 * take_group must both be set in order to allow taking
++		 * ownership.
++		 *
++		 * Otherwise, send the check through secpolicy_vnode_setattr()
++		 *
++		 */
++
++		if (((idmask == (ATTR_UID|ATTR_GID)) &&
++		    take_owner && take_group) ||
++		    ((idmask == ATTR_UID) && take_owner) ||
++		    ((idmask == ATTR_GID) && take_group)) {
++			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
++			    skipaclchk, cr) == 0) {
++				/*
++				 * Remove setuid/setgid for non-privileged users
++				 */
++				(void) secpolicy_setid_clear(vap, cr);
++				trim_mask = (mask & (ATTR_UID|ATTR_GID));
++			} else {
++				need_policy =  TRUE;
++			}
++		} else {
++			need_policy =  TRUE;
++		}
++	}
++
++	mutex_enter(&zp->z_lock);
++	oldva.va_mode = zp->z_mode;
++	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
++	if (mask & ATTR_XVATTR) {
++		/*
++		 * Update xvattr mask to include only those attributes
++		 * that are actually changing.
++		 *
++		 * the bits will be restored prior to actually setting
++		 * the attributes so the caller thinks they were set.
++		 */
++		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
++			if (xoap->xoa_appendonly !=
++			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
++				need_policy = TRUE;
++			} else {
++				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
++				XVA_SET_REQ(tmpxvattr, XAT_APPENDONLY);
++			}
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
++			if (xoap->xoa_nounlink !=
++			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
++				need_policy = TRUE;
++			} else {
++				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
++				XVA_SET_REQ(tmpxvattr, XAT_NOUNLINK);
++			}
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
++			if (xoap->xoa_immutable !=
++			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
++				need_policy = TRUE;
++			} else {
++				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
++				XVA_SET_REQ(tmpxvattr, XAT_IMMUTABLE);
++			}
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
++			if (xoap->xoa_nodump !=
++			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
++				need_policy = TRUE;
++			} else {
++				XVA_CLR_REQ(xvap, XAT_NODUMP);
++				XVA_SET_REQ(tmpxvattr, XAT_NODUMP);
++			}
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
++			if (xoap->xoa_av_modified !=
++			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
++				need_policy = TRUE;
++			} else {
++				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
++				XVA_SET_REQ(tmpxvattr, XAT_AV_MODIFIED);
++			}
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
++			if ((!S_ISREG(ip->i_mode) &&
++			    xoap->xoa_av_quarantined) ||
++			    xoap->xoa_av_quarantined !=
++			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
++				need_policy = TRUE;
++			} else {
++				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
++				XVA_SET_REQ(tmpxvattr, XAT_AV_QUARANTINED);
++			}
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
++			mutex_exit(&zp->z_lock);
++			err = EPERM;
++			goto out3;
++		}
++
++		if (need_policy == FALSE &&
++		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
++		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
++			need_policy = TRUE;
++		}
++	}
++
++	mutex_exit(&zp->z_lock);
++
++	if (mask & ATTR_MODE) {
++		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
++			err = secpolicy_setid_setsticky_clear(ip, vap,
++			    &oldva, cr);
++			if (err)
++				goto out3;
++
++			trim_mask |= ATTR_MODE;
++		} else {
++			need_policy = TRUE;
++		}
++	}
++
++	if (need_policy) {
++		/*
++		 * If trim_mask is set then take ownership
++		 * has been granted or write_acl is present and user
++		 * has the ability to modify mode.  In that case remove
++		 * UID|GID and or MODE from mask so that
++		 * secpolicy_vnode_setattr() doesn't revoke it.
++		 */
++
++		if (trim_mask) {
++			saved_mask = vap->va_mask;
++			vap->va_mask &= ~trim_mask;
++		}
++		err = secpolicy_vnode_setattr(cr, ip, vap, &oldva, flags,
++		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
++		if (err)
++			goto out3;
++
++		if (trim_mask)
++			vap->va_mask |= saved_mask;
++	}
++
++	/*
++	 * secpolicy_vnode_setattr, or take ownership may have
++	 * changed va_mask
++	 */
++	mask = vap->va_mask;
++
++	if ((mask & (ATTR_UID | ATTR_GID))) {
++		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
++		    &xattr_obj, sizeof (xattr_obj));
++
++		if (err == 0 && xattr_obj) {
++			err = zfs_zget(ZTOZSB(zp), xattr_obj, &attrzp);
++			if (err)
++				goto out2;
++		}
++		if (mask & ATTR_UID) {
++			new_uid = zfs_fuid_create(zsb,
++			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
++			if (new_uid != zp->z_uid &&
++			    zfs_fuid_overquota(zsb, B_FALSE, new_uid)) {
++				if (attrzp)
++					iput(ZTOI(attrzp));
++				err = EDQUOT;
++				goto out2;
++			}
++		}
++
++		if (mask & ATTR_GID) {
++			new_gid = zfs_fuid_create(zsb, (uint64_t)vap->va_gid,
++			    cr, ZFS_GROUP, &fuidp);
++			if (new_gid != zp->z_gid &&
++			    zfs_fuid_overquota(zsb, B_TRUE, new_gid)) {
++				if (attrzp)
++					iput(ZTOI(attrzp));
++				err = EDQUOT;
++				goto out2;
++			}
++		}
++	}
++	tx = dmu_tx_create(zsb->z_os);
++
++	if (mask & ATTR_MODE) {
++		uint64_t pmode = zp->z_mode;
++		uint64_t acl_obj;
++		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
++
++		zfs_acl_chmod_setattr(zp, &aclp, new_mode);
++
++		mutex_enter(&zp->z_lock);
++		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
++			/*
++			 * Are we upgrading ACL from old V0 format
++			 * to V1 format?
++			 */
++			if (zsb->z_version >= ZPL_VERSION_FUID &&
++			    zfs_znode_acl_version(zp) ==
++			    ZFS_ACL_VERSION_INITIAL) {
++				dmu_tx_hold_free(tx, acl_obj, 0,
++				    DMU_OBJECT_END);
++				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
++				    0, aclp->z_acl_bytes);
++			} else {
++				dmu_tx_hold_write(tx, acl_obj, 0,
++				    aclp->z_acl_bytes);
++			}
++		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
++			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
++			    0, aclp->z_acl_bytes);
++		}
++		mutex_exit(&zp->z_lock);
++		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
++	} else {
++		if ((mask & ATTR_XVATTR) &&
++		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
++			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
++		else
++			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
++	}
++
++	if (attrzp) {
++		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
++	}
++
++	fuid_dirtied = zsb->z_fuid_dirty;
++	if (fuid_dirtied)
++		zfs_fuid_txhold(zsb, tx);
++
++	zfs_sa_upgrade_txholds(tx, zp);
++
++	err = dmu_tx_assign(tx, TXG_NOWAIT);
++	if (err) {
++		if (err == ERESTART)
++			dmu_tx_wait(tx);
++		goto out;
++	}
++
++	count = 0;
++	/*
++	 * Set each attribute requested.
++	 * We group settings according to the locks they need to acquire.
++	 *
++	 * Note: you cannot set ctime directly, although it will be
++	 * updated as a side-effect of calling this function.
++	 */
++
++
++	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
++		mutex_enter(&zp->z_acl_lock);
++	mutex_enter(&zp->z_lock);
++
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
++	    &zp->z_pflags, sizeof (zp->z_pflags));
++
++	if (attrzp) {
++		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
++			mutex_enter(&attrzp->z_acl_lock);
++		mutex_enter(&attrzp->z_lock);
++		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
++		    SA_ZPL_FLAGS(zsb), NULL, &attrzp->z_pflags,
++		    sizeof (attrzp->z_pflags));
++	}
++
++	if (mask & (ATTR_UID|ATTR_GID)) {
++
++		if (mask & ATTR_UID) {
++			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL,
++			    &new_uid, sizeof (new_uid));
++			zp->z_uid = new_uid;
++			if (attrzp) {
++				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
++				    SA_ZPL_UID(zsb), NULL, &new_uid,
++				    sizeof (new_uid));
++				attrzp->z_uid = new_uid;
++			}
++		}
++
++		if (mask & ATTR_GID) {
++			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb),
++			    NULL, &new_gid, sizeof (new_gid));
++			zp->z_gid = new_gid;
++			if (attrzp) {
++				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
++				    SA_ZPL_GID(zsb), NULL, &new_gid,
++				    sizeof (new_gid));
++				attrzp->z_gid = new_gid;
++			}
++		}
++		if (!(mask & ATTR_MODE)) {
++			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb),
++			    NULL, &new_mode, sizeof (new_mode));
++			new_mode = zp->z_mode;
++		}
++		err = zfs_acl_chown_setattr(zp);
++		ASSERT(err == 0);
++		if (attrzp) {
++			err = zfs_acl_chown_setattr(attrzp);
++			ASSERT(err == 0);
++		}
++	}
++
++	if (mask & ATTR_MODE) {
++		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL,
++		    &new_mode, sizeof (new_mode));
++		zp->z_mode = new_mode;
++		ASSERT3P(aclp, !=, NULL);
++		err = zfs_aclset_common(zp, aclp, cr, tx);
++		ASSERT3U(err, ==, 0);
++		if (zp->z_acl_cached)
++			zfs_acl_free(zp->z_acl_cached);
++		zp->z_acl_cached = aclp;
++		aclp = NULL;
++	}
++
++
++	if (mask & ATTR_ATIME) {
++		ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
++		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL,
++		    &zp->z_atime, sizeof (zp->z_atime));
++	}
++
++	if (mask & ATTR_MTIME) {
++		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
++		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL,
++		    mtime, sizeof (mtime));
++	}
++
++	/* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
++	if (mask & ATTR_SIZE && !(mask & ATTR_MTIME)) {
++		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb),
++		    NULL, mtime, sizeof (mtime));
++		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
++		    &ctime, sizeof (ctime));
++		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
++		    B_TRUE);
++	} else if (mask != 0) {
++		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
++		    &ctime, sizeof (ctime));
++		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
++		    B_TRUE);
++		if (attrzp) {
++			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
++			    SA_ZPL_CTIME(zsb), NULL,
++			    &ctime, sizeof (ctime));
++			zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
++			    mtime, ctime, B_TRUE);
++		}
++	}
++	/*
++	 * Do this after setting timestamps to prevent timestamp
++	 * update from toggling bit
++	 */
++
++	if (xoap && (mask & ATTR_XVATTR)) {
++
++		/*
++		 * restore trimmed off masks
++		 * so that return masks can be set for caller.
++		 */
++
++		if (XVA_ISSET_REQ(tmpxvattr, XAT_APPENDONLY)) {
++			XVA_SET_REQ(xvap, XAT_APPENDONLY);
++		}
++		if (XVA_ISSET_REQ(tmpxvattr, XAT_NOUNLINK)) {
++			XVA_SET_REQ(xvap, XAT_NOUNLINK);
++		}
++		if (XVA_ISSET_REQ(tmpxvattr, XAT_IMMUTABLE)) {
++			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
++		}
++		if (XVA_ISSET_REQ(tmpxvattr, XAT_NODUMP)) {
++			XVA_SET_REQ(xvap, XAT_NODUMP);
++		}
++		if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_MODIFIED)) {
++			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
++		}
++		if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_QUARANTINED)) {
++			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
++		}
++
++		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
++			ASSERT(S_ISREG(ip->i_mode));
++
++		zfs_xvattr_set(zp, xvap, tx);
++	}
++
++	if (fuid_dirtied)
++		zfs_fuid_sync(zsb, tx);
++
++	if (mask != 0)
++		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
++
++	mutex_exit(&zp->z_lock);
++	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
++		mutex_exit(&zp->z_acl_lock);
++
++	if (attrzp) {
++		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
++			mutex_exit(&attrzp->z_acl_lock);
++		mutex_exit(&attrzp->z_lock);
++	}
++out:
++	if (err == 0 && attrzp) {
++		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
++		    xattr_count, tx);
++		ASSERT(err2 == 0);
++	}
++
++	if (attrzp)
++		iput(ZTOI(attrzp));
++	if (aclp)
++		zfs_acl_free(aclp);
++
++	if (fuidp) {
++		zfs_fuid_info_free(fuidp);
++		fuidp = NULL;
++	}
++
++	if (err) {
++		dmu_tx_abort(tx);
++		if (err == ERESTART)
++			goto top;
++	} else {
++		err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
++		dmu_tx_commit(tx);
++		zfs_inode_update(zp);
++	}
++
++out2:
++	if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
++		zil_commit(zilog, 0);
++
++out3:
++	kmem_free(xattr_bulk, sizeof(sa_bulk_attr_t) * 7);
++	kmem_free(bulk, sizeof(sa_bulk_attr_t) * 7);
++	kmem_free(tmpxvattr, sizeof(xvattr_t));
++	ZFS_EXIT(zsb);
++	return (err);
++}
++EXPORT_SYMBOL(zfs_setattr);
++
++typedef struct zfs_zlock {
++	krwlock_t	*zl_rwlock;	/* lock we acquired */
++	znode_t		*zl_znode;	/* znode we held */
++	struct zfs_zlock *zl_next;	/* next in list */
++} zfs_zlock_t;
++
++/*
++ * Drop locks and release vnodes that were held by zfs_rename_lock().
++ */
++static void
++zfs_rename_unlock(zfs_zlock_t **zlpp)
++{
++	zfs_zlock_t *zl;
++
++	while ((zl = *zlpp) != NULL) {
++		if (zl->zl_znode != NULL)
++			iput(ZTOI(zl->zl_znode));
++		rw_exit(zl->zl_rwlock);
++		*zlpp = zl->zl_next;
++		kmem_free(zl, sizeof (*zl));
++	}
++}
++
++/*
++ * Search back through the directory tree, using the ".." entries.
++ * Lock each directory in the chain to prevent concurrent renames.
++ * Fail any attempt to move a directory into one of its own descendants.
++ * XXX - z_parent_lock can overlap with map or grow locks
++ */
++static int
++zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
++{
++	zfs_zlock_t	*zl;
++	znode_t		*zp = tdzp;
++	uint64_t	rootid = ZTOZSB(zp)->z_root;
++	uint64_t	oidp = zp->z_id;
++	krwlock_t	*rwlp = &szp->z_parent_lock;
++	krw_t		rw = RW_WRITER;
++
++	/*
++	 * First pass write-locks szp and compares to zp->z_id.
++	 * Later passes read-lock zp and compare to zp->z_parent.
++	 */
++	do {
++		if (!rw_tryenter(rwlp, rw)) {
++			/*
++			 * Another thread is renaming in this path.
++			 * Note that if we are a WRITER, we don't have any
++			 * parent_locks held yet.
++			 */
++			if (rw == RW_READER && zp->z_id > szp->z_id) {
++				/*
++				 * Drop our locks and restart
++				 */
++				zfs_rename_unlock(&zl);
++				*zlpp = NULL;
++				zp = tdzp;
++				oidp = zp->z_id;
++				rwlp = &szp->z_parent_lock;
++				rw = RW_WRITER;
++				continue;
++			} else {
++				/*
++				 * Wait for other thread to drop its locks
++				 */
++				rw_enter(rwlp, rw);
++			}
++		}
++
++		zl = kmem_alloc(sizeof (*zl), KM_SLEEP);
++		zl->zl_rwlock = rwlp;
++		zl->zl_znode = NULL;
++		zl->zl_next = *zlpp;
++		*zlpp = zl;
++
++		if (oidp == szp->z_id)		/* We're a descendant of szp */
++			return (EINVAL);
++
++		if (oidp == rootid)		/* We've hit the top */
++			return (0);
++
++		if (rw == RW_READER) {		/* i.e. not the first pass */
++			int error = zfs_zget(ZTOZSB(zp), oidp, &zp);
++			if (error)
++				return (error);
++			zl->zl_znode = zp;
++		}
++		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(ZTOZSB(zp)),
++		    &oidp, sizeof (oidp));
++		rwlp = &zp->z_parent_lock;
++		rw = RW_READER;
++
++	} while (zp->z_id != sdzp->z_id);
++
++	return (0);
++}
++
++/*
++ * Move an entry from the provided source directory to the target
++ * directory.  Change the entry name as indicated.
++ *
++ *	IN:	sdip	- Source directory containing the "old entry".
++ *		snm	- Old entry name.
++ *		tdip	- Target directory to contain the "new entry".
++ *		tnm	- New entry name.
++ *		cr	- credentials of caller.
++ *		flags	- case flags
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	sdip,tdip - ctime|mtime updated
++ */
++/*ARGSUSED*/
++int
++zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
++    cred_t *cr, int flags)
++{
++	znode_t		*tdzp, *szp, *tzp;
++	znode_t		*sdzp = ITOZ(sdip);
++	zfs_sb_t	*zsb = ITOZSB(sdip);
++	zilog_t		*zilog;
++	zfs_dirlock_t	*sdl, *tdl;
++	dmu_tx_t	*tx;
++	zfs_zlock_t	*zl;
++	int		cmp, serr, terr;
++	int		error = 0;
++	int		zflg = 0;
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(sdzp);
++	zilog = zsb->z_log;
++
++	if (tdip->i_sb != sdip->i_sb) {
++		ZFS_EXIT(zsb);
++		return (EXDEV);
++	}
++
++	tdzp = ITOZ(tdip);
++	ZFS_VERIFY_ZP(tdzp);
++	if (zsb->z_utf8 && u8_validate(tnm,
++	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
++		ZFS_EXIT(zsb);
++		return (EILSEQ);
++	}
++
++	if (flags & FIGNORECASE)
++		zflg |= ZCILOOK;
++
++top:
++	szp = NULL;
++	tzp = NULL;
++	zl = NULL;
++
++	/*
++	 * This is to prevent the creation of links into attribute space
++	 * by renaming a linked file into/outof an attribute directory.
++	 * See the comment in zfs_link() for why this is considered bad.
++	 */
++	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
++		ZFS_EXIT(zsb);
++		return (EINVAL);
++	}
++
++	/*
++	 * Lock source and target directory entries.  To prevent deadlock,
++	 * a lock ordering must be defined.  We lock the directory with
++	 * the smallest object id first, or if it's a tie, the one with
++	 * the lexically first name.
++	 */
++	if (sdzp->z_id < tdzp->z_id) {
++		cmp = -1;
++	} else if (sdzp->z_id > tdzp->z_id) {
++		cmp = 1;
++	} else {
++		/*
++		 * First compare the two name arguments without
++		 * considering any case folding.
++		 */
++		int nofold = (zsb->z_norm & ~U8_TEXTPREP_TOUPPER);
++
++		cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error);
++		ASSERT(error == 0 || !zsb->z_utf8);
++		if (cmp == 0) {
++			/*
++			 * POSIX: "If the old argument and the new argument
++			 * both refer to links to the same existing file,
++			 * the rename() function shall return successfully
++			 * and perform no other action."
++			 */
++			ZFS_EXIT(zsb);
++			return (0);
++		}
++		/*
++		 * If the file system is case-folding, then we may
++		 * have some more checking to do.  A case-folding file
++		 * system is either supporting mixed case sensitivity
++		 * access or is completely case-insensitive.  Note
++		 * that the file system is always case preserving.
++		 *
++		 * In mixed sensitivity mode case sensitive behavior
++		 * is the default.  FIGNORECASE must be used to
++		 * explicitly request case insensitive behavior.
++		 *
++		 * If the source and target names provided differ only
++		 * by case (e.g., a request to rename 'tim' to 'Tim'),
++		 * we will treat this as a special case in the
++		 * case-insensitive mode: as long as the source name
++		 * is an exact match, we will allow this to proceed as
++		 * a name-change request.
++		 */
++		if ((zsb->z_case == ZFS_CASE_INSENSITIVE ||
++		    (zsb->z_case == ZFS_CASE_MIXED &&
++		    flags & FIGNORECASE)) &&
++		    u8_strcmp(snm, tnm, 0, zsb->z_norm, U8_UNICODE_LATEST,
++		    &error) == 0) {
++			/*
++			 * case preserving rename request, require exact
++			 * name matches
++			 */
++			zflg |= ZCIEXACT;
++			zflg &= ~ZCILOOK;
++		}
++	}
++
++	/*
++	 * If the source and destination directories are the same, we should
++	 * grab the z_name_lock of that directory only once.
++	 */
++	if (sdzp == tdzp) {
++		zflg |= ZHAVELOCK;
++		rw_enter(&sdzp->z_name_lock, RW_READER);
++	}
++
++	if (cmp < 0) {
++		serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp,
++		    ZEXISTS | zflg, NULL, NULL);
++		terr = zfs_dirent_lock(&tdl,
++		    tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL);
++	} else {
++		terr = zfs_dirent_lock(&tdl,
++		    tdzp, tnm, &tzp, zflg, NULL, NULL);
++		serr = zfs_dirent_lock(&sdl,
++		    sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg,
++		    NULL, NULL);
++	}
++
++	if (serr) {
++		/*
++		 * Source entry invalid or not there.
++		 */
++		if (!terr) {
++			zfs_dirent_unlock(tdl);
++			if (tzp)
++				iput(ZTOI(tzp));
++		}
++
++		if (sdzp == tdzp)
++			rw_exit(&sdzp->z_name_lock);
++
++		if (strcmp(snm, "..") == 0)
++			serr = EINVAL;
++		ZFS_EXIT(zsb);
++		return (serr);
++	}
++	if (terr) {
++		zfs_dirent_unlock(sdl);
++		iput(ZTOI(szp));
++
++		if (sdzp == tdzp)
++			rw_exit(&sdzp->z_name_lock);
++
++		if (strcmp(tnm, "..") == 0)
++			terr = EINVAL;
++		ZFS_EXIT(zsb);
++		return (terr);
++	}
++
++	/*
++	 * Must have write access at the source to remove the old entry
++	 * and write access at the target to create the new entry.
++	 * Note that if target and source are the same, this can be
++	 * done in a single check.
++	 */
++
++	if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
++		goto out;
++
++	if (S_ISDIR(ZTOI(szp)->i_mode)) {
++		/*
++		 * Check to make sure rename is valid.
++		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
++		 */
++		if ((error = zfs_rename_lock(szp, tdzp, sdzp, &zl)))
++			goto out;
++	}
++
++	/*
++	 * Does target exist?
++	 */
++	if (tzp) {
++		/*
++		 * Source and target must be the same type.
++		 */
++		if (S_ISDIR(ZTOI(szp)->i_mode)) {
++			if (!S_ISDIR(ZTOI(tzp)->i_mode)) {
++				error = ENOTDIR;
++				goto out;
++			}
++		} else {
++			if (S_ISDIR(ZTOI(tzp)->i_mode)) {
++				error = EISDIR;
++				goto out;
++			}
++		}
++		/*
++		 * POSIX dictates that when the source and target
++		 * entries refer to the same file object, rename
++		 * must do nothing and exit without error.
++		 */
++		if (szp->z_id == tzp->z_id) {
++			error = 0;
++			goto out;
++		}
++	}
++
++	tx = dmu_tx_create(zsb->z_os);
++	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
++	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
++	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
++	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
++	if (sdzp != tdzp) {
++		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
++		zfs_sa_upgrade_txholds(tx, tdzp);
++	}
++	if (tzp) {
++		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
++		zfs_sa_upgrade_txholds(tx, tzp);
++	}
++
++	zfs_sa_upgrade_txholds(tx, szp);
++	dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
++	error = dmu_tx_assign(tx, TXG_NOWAIT);
++	if (error) {
++		if (zl != NULL)
++			zfs_rename_unlock(&zl);
++		zfs_dirent_unlock(sdl);
++		zfs_dirent_unlock(tdl);
++
++		if (sdzp == tdzp)
++			rw_exit(&sdzp->z_name_lock);
++
++		iput(ZTOI(szp));
++		if (tzp)
++			iput(ZTOI(tzp));
++		if (error == ERESTART) {
++			dmu_tx_wait(tx);
++			dmu_tx_abort(tx);
++			goto top;
++		}
++		dmu_tx_abort(tx);
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	if (tzp)	/* Attempt to remove the existing target */
++		error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
++
++	if (error == 0) {
++		error = zfs_link_create(tdl, szp, tx, ZRENAMING);
++		if (error == 0) {
++			szp->z_pflags |= ZFS_AV_MODIFIED;
++
++			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zsb),
++			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
++			ASSERT3U(error, ==, 0);
++
++			error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
++			if (error == 0) {
++				zfs_log_rename(zilog, tx, TX_RENAME |
++				    (flags & FIGNORECASE ? TX_CI : 0), sdzp,
++				    sdl->dl_name, tdzp, tdl->dl_name, szp);
++			} else {
++				/*
++				 * At this point, we have successfully created
++				 * the target name, but have failed to remove
++				 * the source name.  Since the create was done
++				 * with the ZRENAMING flag, there are
++				 * complications; for one, the link count is
++				 * wrong.  The easiest way to deal with this
++				 * is to remove the newly created target, and
++				 * return the original error.  This must
++				 * succeed; fortunately, it is very unlikely to
++				 * fail, since we just created it.
++				 */
++				VERIFY3U(zfs_link_destroy(tdl, szp, tx,
++				    ZRENAMING, NULL), ==, 0);
++			}
++		}
++	}
++
++	dmu_tx_commit(tx);
++out:
++	if (zl != NULL)
++		zfs_rename_unlock(&zl);
++
++	zfs_dirent_unlock(sdl);
++	zfs_dirent_unlock(tdl);
++
++	zfs_inode_update(sdzp);
++	if (sdzp == tdzp)
++		rw_exit(&sdzp->z_name_lock);
++
++	if (sdzp != tdzp)
++		zfs_inode_update(tdzp);
++
++	zfs_inode_update(szp);
++	iput(ZTOI(szp));
++	if (tzp) {
++		zfs_inode_update(tzp);
++		iput(ZTOI(tzp));
++	}
++
++	if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
++		zil_commit(zilog, 0);
++
++	ZFS_EXIT(zsb);
++	return (error);
++}
++EXPORT_SYMBOL(zfs_rename);
++
++/*
++ * Insert the indicated symbolic reference entry into the directory.
++ *
++ *	IN:	dip	- Directory to contain new symbolic link.
++ *		link	- Name for new symlink entry.
++ *		vap	- Attributes of new entry.
++ *		target	- Target path of new symlink.
++ *
++ *		cr	- credentials of caller.
++ *		flags	- case flags
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	dip - ctime|mtime updated
++ */
++/*ARGSUSED*/
++int
++zfs_symlink(struct inode *dip, char *name, vattr_t *vap, char *link,
++    struct inode **ipp, cred_t *cr, int flags)
++{
++	znode_t		*zp, *dzp = ITOZ(dip);
++	zfs_dirlock_t	*dl;
++	dmu_tx_t	*tx;
++	zfs_sb_t	*zsb = ITOZSB(dip);
++	zilog_t		*zilog;
++	uint64_t	len = strlen(link);
++	int		error;
++	int		zflg = ZNEW;
++	zfs_acl_ids_t	acl_ids;
++	boolean_t	fuid_dirtied;
++	uint64_t	txtype = TX_SYMLINK;
++
++	ASSERT(S_ISLNK(vap->va_mode));
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(dzp);
++	zilog = zsb->z_log;
++
++	if (zsb->z_utf8 && u8_validate(name, strlen(name),
++	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
++		ZFS_EXIT(zsb);
++		return (EILSEQ);
++	}
++	if (flags & FIGNORECASE)
++		zflg |= ZCILOOK;
++
++	if (len > MAXPATHLEN) {
++		ZFS_EXIT(zsb);
++		return (ENAMETOOLONG);
++	}
++
++	if ((error = zfs_acl_ids_create(dzp, 0,
++	    vap, cr, NULL, &acl_ids)) != 0) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++top:
++	*ipp = NULL;
++
++	/*
++	 * Attempt to lock directory; fail if entry already exists.
++	 */
++	error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
++	if (error) {
++		zfs_acl_ids_free(&acl_ids);
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
++		zfs_acl_ids_free(&acl_ids);
++		zfs_dirent_unlock(dl);
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	if (zfs_acl_ids_overquota(zsb, &acl_ids)) {
++		zfs_acl_ids_free(&acl_ids);
++		zfs_dirent_unlock(dl);
++		ZFS_EXIT(zsb);
++		return (EDQUOT);
++	}
++	tx = dmu_tx_create(zsb->z_os);
++	fuid_dirtied = zsb->z_fuid_dirty;
++	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
++	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
++	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
++	    ZFS_SA_BASE_ATTR_SIZE + len);
++	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
++	if (!zsb->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
++		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
++		    acl_ids.z_aclp->z_acl_bytes);
++	}
++	if (fuid_dirtied)
++		zfs_fuid_txhold(zsb, tx);
++	error = dmu_tx_assign(tx, TXG_NOWAIT);
++	if (error) {
++		zfs_dirent_unlock(dl);
++		if (error == ERESTART) {
++			dmu_tx_wait(tx);
++			dmu_tx_abort(tx);
++			goto top;
++		}
++		zfs_acl_ids_free(&acl_ids);
++		dmu_tx_abort(tx);
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	/*
++	 * Create a new object for the symlink.
++	 * for version 4 ZPL datsets the symlink will be an SA attribute
++	 */
++	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
++
++	if (fuid_dirtied)
++		zfs_fuid_sync(zsb, tx);
++
++	mutex_enter(&zp->z_lock);
++	if (zp->z_is_sa)
++		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zsb),
++		    link, len, tx);
++	else
++		zfs_sa_symlink(zp, link, len, tx);
++	mutex_exit(&zp->z_lock);
++
++	zp->z_size = len;
++	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zsb),
++	    &zp->z_size, sizeof (zp->z_size), tx);
++	/*
++	 * Insert the new object into the directory.
++	 */
++	(void) zfs_link_create(dl, zp, tx, ZNEW);
++
++	if (flags & FIGNORECASE)
++		txtype |= TX_CI;
++	zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
++
++	zfs_inode_update(dzp);
++	zfs_inode_update(zp);
++
++	zfs_acl_ids_free(&acl_ids);
++
++	dmu_tx_commit(tx);
++
++	zfs_dirent_unlock(dl);
++
++	*ipp = ZTOI(zp);
++
++	if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
++		zil_commit(zilog, 0);
++
++	ZFS_EXIT(zsb);
++	return (error);
++}
++EXPORT_SYMBOL(zfs_symlink);
++
++/*
++ * Return, in the buffer contained in the provided uio structure,
++ * the symbolic path referred to by ip.
++ *
++ *	IN:	ip	- inode of symbolic link
++ *		uio	- structure to contain the link path.
++ *		cr	- credentials of caller.
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	ip - atime updated
++ */
++/* ARGSUSED */
++int
++zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr)
++{
++	znode_t		*zp = ITOZ(ip);
++	zfs_sb_t	*zsb = ITOZSB(ip);
++	int		error;
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	mutex_enter(&zp->z_lock);
++	if (zp->z_is_sa)
++		error = sa_lookup_uio(zp->z_sa_hdl,
++		    SA_ZPL_SYMLINK(zsb), uio);
++	else
++		error = zfs_sa_readlink(zp, uio);
++	mutex_exit(&zp->z_lock);
++
++	ZFS_ACCESSTIME_STAMP(zsb, zp);
++	zfs_inode_update(zp);
++	ZFS_EXIT(zsb);
++	return (error);
++}
++EXPORT_SYMBOL(zfs_readlink);
++
++/*
++ * Insert a new entry into directory tdip referencing sip.
++ *
++ *	IN:	tdip	- Directory to contain new entry.
++ *		sip	- inode of new entry.
++ *		name	- name of new entry.
++ *		cr	- credentials of caller.
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	tdip - ctime|mtime updated
++ *	 sip - ctime updated
++ */
++/* ARGSUSED */
++int
++zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr)
++{
++	znode_t		*dzp = ITOZ(tdip);
++	znode_t		*tzp, *szp;
++	zfs_sb_t	*zsb = ITOZSB(tdip);
++	zilog_t		*zilog;
++	zfs_dirlock_t	*dl;
++	dmu_tx_t	*tx;
++	int		error;
++	int		zf = ZNEW;
++	uint64_t	parent;
++	uid_t		owner;
++
++	ASSERT(S_ISDIR(tdip->i_mode));
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(dzp);
++	zilog = zsb->z_log;
++
++	/*
++	 * POSIX dictates that we return EPERM here.
++	 * Better choices include ENOTSUP or EISDIR.
++	 */
++	if (S_ISDIR(sip->i_mode)) {
++		ZFS_EXIT(zsb);
++		return (EPERM);
++	}
++
++	if (sip->i_sb != tdip->i_sb) {
++		ZFS_EXIT(zsb);
++		return (EXDEV);
++	}
++
++	szp = ITOZ(sip);
++	ZFS_VERIFY_ZP(szp);
++
++	/* Prevent links to .zfs/shares files */
++
++	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zsb),
++	    &parent, sizeof (uint64_t))) != 0) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++	if (parent == zsb->z_shares_dir) {
++		ZFS_EXIT(zsb);
++		return (EPERM);
++	}
++
++	if (zsb->z_utf8 && u8_validate(name,
++	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
++		ZFS_EXIT(zsb);
++		return (EILSEQ);
++	}
++#ifdef HAVE_PN_UTILS
++	if (flags & FIGNORECASE)
++		zf |= ZCILOOK;
++#endif /* HAVE_PN_UTILS */
++
++	/*
++	 * We do not support links between attributes and non-attributes
++	 * because of the potential security risk of creating links
++	 * into "normal" file space in order to circumvent restrictions
++	 * imposed in attribute space.
++	 */
++	if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) {
++		ZFS_EXIT(zsb);
++		return (EINVAL);
++	}
++
++	owner = zfs_fuid_map_id(zsb, szp->z_uid, cr, ZFS_OWNER);
++	if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) {
++		ZFS_EXIT(zsb);
++		return (EPERM);
++	}
++
++	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++top:
++	/*
++	 * Attempt to lock directory; fail if entry already exists.
++	 */
++	error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL);
++	if (error) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	tx = dmu_tx_create(zsb->z_os);
++	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
++	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
++	zfs_sa_upgrade_txholds(tx, szp);
++	zfs_sa_upgrade_txholds(tx, dzp);
++	error = dmu_tx_assign(tx, TXG_NOWAIT);
++	if (error) {
++		zfs_dirent_unlock(dl);
++		if (error == ERESTART) {
++			dmu_tx_wait(tx);
++			dmu_tx_abort(tx);
++			goto top;
++		}
++		dmu_tx_abort(tx);
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	error = zfs_link_create(dl, szp, tx, 0);
++
++	if (error == 0) {
++		uint64_t txtype = TX_LINK;
++#ifdef HAVE_PN_UTILS
++		if (flags & FIGNORECASE)
++			txtype |= TX_CI;
++#endif /* HAVE_PN_UTILS */
++		zfs_log_link(zilog, tx, txtype, dzp, szp, name);
++	}
++
++	dmu_tx_commit(tx);
++
++	zfs_dirent_unlock(dl);
++
++	if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
++		zil_commit(zilog, 0);
++
++	zfs_inode_update(dzp);
++	zfs_inode_update(szp);
++	ZFS_EXIT(zsb);
++	return (error);
++}
++EXPORT_SYMBOL(zfs_link);
++
++static void
++zfs_putpage_commit_cb(void *arg, int error)
++{
++	struct page *pp = arg;
++
++	if (error) {
++		__set_page_dirty_nobuffers(pp);
++
++		if (error != ECANCELED)
++			SetPageError(pp);
++	} else {
++		ClearPageError(pp);
++	}
++
++	end_page_writeback(pp);
++}
++
++/*
++ * Push a page out to disk, once the page is on stable storage the
++ * registered commit callback will be run as notification of completion.
++ *
++ *	IN:	ip	- page mapped for inode.
++ *		pp	- page to push (page is locked)
++ *		wbc	- writeback control data
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	ip - ctime|mtime updated
++ */
++/* ARGSUSED */
++int
++zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
++{
++	znode_t		*zp = ITOZ(ip);
++	zfs_sb_t	*zsb = ITOZSB(ip);
++	loff_t		offset;
++	loff_t		pgoff;
++	unsigned int	pglen;
++	rl_t		*rl;
++	dmu_tx_t	*tx;
++	caddr_t		va;
++	int		err = 0;
++	uint64_t	mtime[2], ctime[2];
++	sa_bulk_attr_t	bulk[3];
++	int		cnt = 0;
++	int		sync;
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	ASSERT(PageLocked(pp));
++
++	pgoff = page_offset(pp);     /* Page byte-offset in file */
++	offset = i_size_read(ip);    /* File length in bytes */
++	pglen = MIN(PAGE_CACHE_SIZE, /* Page length in bytes */
++	    P2ROUNDUP(offset, PAGE_CACHE_SIZE)-pgoff);
++
++	/* Page is beyond end of file */
++	if (pgoff >= offset) {
++		unlock_page(pp);
++		ZFS_EXIT(zsb);
++		return (0);
++	}
++
++	/* Truncate page length to end of file */
++	if (pgoff + pglen > offset)
++		pglen = offset - pgoff;
++
++#if 0
++	/*
++	 * FIXME: Allow mmap writes past its quota.  The correct fix
++	 * is to register a page_mkwrite() handler to count the page
++	 * against its quota when it is about to be dirtied.
++	 */
++	if (zfs_owner_overquota(zsb, zp, B_FALSE) ||
++	    zfs_owner_overquota(zsb, zp, B_TRUE)) {
++		err = EDQUOT;
++	}
++#endif
++
++	set_page_writeback(pp);
++	unlock_page(pp);
++
++	rl = zfs_range_lock(zp, pgoff, pglen, RL_WRITER);
++	tx = dmu_tx_create(zsb->z_os);
++
++	sync = ((zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) ||
++	        (wbc->sync_mode == WB_SYNC_ALL));
++	if (!sync)
++		dmu_tx_callback_register(tx, zfs_putpage_commit_cb, pp);
++
++	dmu_tx_hold_write(tx, zp->z_id, pgoff, pglen);
++
++	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
++	zfs_sa_upgrade_txholds(tx, zp);
++	err = dmu_tx_assign(tx, TXG_NOWAIT);
++	if (err != 0) {
++		if (err == ERESTART)
++			dmu_tx_wait(tx);
++
++		/* Will call all registered commit callbacks */
++		dmu_tx_abort(tx);
++
++		/*
++		 * For the synchronous case the commit callback must be
++		 * explicitly called because there is no registered callback.
++		 */
++		if (sync)
++			zfs_putpage_commit_cb(pp, ECANCELED);
++
++		zfs_range_unlock(rl);
++		ZFS_EXIT(zsb);
++		return (err);
++	}
++
++	va = kmap(pp);
++	ASSERT3U(pglen, <=, PAGE_CACHE_SIZE);
++	dmu_write(zsb->z_os, zp->z_id, pgoff, pglen, va, tx);
++	kunmap(pp);
++
++	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
++	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16);
++	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, 8);
++	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
++	zfs_log_write(zsb->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0);
++
++	dmu_tx_commit(tx);
++	zfs_range_unlock(rl);
++	ASSERT3S(err, ==, 0);
++
++	if (sync) {
++		zil_commit(zsb->z_log, zp->z_id);
++		zfs_putpage_commit_cb(pp, err);
++	}
++
++	ZFS_EXIT(zsb);
++	return (err);
++}
++
++/*ARGSUSED*/
++void
++zfs_inactive(struct inode *ip)
++{
++	znode_t	*zp = ITOZ(ip);
++	zfs_sb_t *zsb = ITOZSB(ip);
++	int error;
++
++	if (zfsctl_is_node(ip)) {
++		zfsctl_inode_inactive(ip);
++		return;
++	}
++
++	rw_enter(&zsb->z_teardown_inactive_lock, RW_READER);
++	if (zp->z_sa_hdl == NULL) {
++		rw_exit(&zsb->z_teardown_inactive_lock);
++		return;
++	}
++
++	if (zp->z_atime_dirty && zp->z_unlinked == 0) {
++		dmu_tx_t *tx = dmu_tx_create(zsb->z_os);
++
++		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
++		zfs_sa_upgrade_txholds(tx, zp);
++		error = dmu_tx_assign(tx, TXG_WAIT);
++		if (error) {
++			dmu_tx_abort(tx);
++		} else {
++			mutex_enter(&zp->z_lock);
++			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zsb),
++			    (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
++			zp->z_atime_dirty = 0;
++			mutex_exit(&zp->z_lock);
++			dmu_tx_commit(tx);
++		}
++	}
++
++	zfs_zinactive(zp);
++	rw_exit(&zsb->z_teardown_inactive_lock);
++}
++EXPORT_SYMBOL(zfs_inactive);
++
++/*
++ * Bounds-check the seek operation.
++ *
++ *	IN:	ip	- inode seeking within
++ *		ooff	- old file offset
++ *		noffp	- pointer to new file offset
++ *		ct	- caller context
++ *
++ *	RETURN:	0 if success
++ *		EINVAL if new offset invalid
++ */
++/* ARGSUSED */
++int
++zfs_seek(struct inode *ip, offset_t ooff, offset_t *noffp)
++{
++	if (S_ISDIR(ip->i_mode))
++		return (0);
++	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
++}
++EXPORT_SYMBOL(zfs_seek);
++
++/*
++ * Fill pages with data from the disk.
++ */
++static int
++zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
++{
++	znode_t	    *zp = ITOZ(ip);
++	zfs_sb_t    *zsb = ITOZSB(ip);
++	objset_t    *os;
++	struct page *cur_pp;
++	u_offset_t  io_off, total;
++	size_t      io_len;
++	loff_t      i_size;
++	unsigned    page_idx;
++	int         err;
++
++	os     = zsb->z_os;
++	io_len = nr_pages << PAGE_CACHE_SHIFT;
++	i_size = i_size_read(ip);
++	io_off = page_offset(pl[0]);
++
++	if (io_off + io_len > i_size)
++		io_len = i_size - io_off;
++
++	/*
++	 * Iterate over list of pages and read each page individually.
++	 */
++	page_idx = 0;
++	cur_pp   = pl[0];
++	for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
++		caddr_t va;
++
++		va = kmap(cur_pp);
++		err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
++		    DMU_READ_PREFETCH);
++		kunmap(cur_pp);
++		if (err) {
++			/* convert checksum errors into IO errors */
++			if (err == ECKSUM)
++				err = EIO;
++			return (err);
++		}
++		cur_pp = pl[++page_idx];
++	}
++
++	return (0);
++}
++
++/*
++ * Uses zfs_fillpage to read data from the file and fill the pages.
++ *
++ *	IN:	ip	 - inode of file to get data from.
++ *		pl	 - list of pages to read
++ *		nr_pages - number of pages to read
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	vp - atime updated
++ */
++/* ARGSUSED */
++int
++zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
++{
++	znode_t	 *zp  = ITOZ(ip);
++	zfs_sb_t *zsb = ITOZSB(ip);
++	int	 err;
++
++	if (pl == NULL)
++		return (0);
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	err = zfs_fillpage(ip, pl, nr_pages);
++
++	if (!err)
++		ZFS_ACCESSTIME_STAMP(zsb, zp);
++
++	ZFS_EXIT(zsb);
++	return (err);
++}
++EXPORT_SYMBOL(zfs_getpage);
++
++/*
++ * Check ZFS specific permissions to memory map a section of a file.
++ *
++ *	IN:	ip	- inode of the file to mmap
++ *		off	- file offset
++ *		addrp	- start address in memory region
++ *		len	- length of memory region
++ *		vm_flags- address flags
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ */
++/*ARGSUSED*/
++int
++zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
++    unsigned long vm_flags)
++{
++	znode_t  *zp = ITOZ(ip);
++	zfs_sb_t *zsb = ITOZSB(ip);
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	if ((vm_flags & VM_WRITE) && (zp->z_pflags &
++	    (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
++		ZFS_EXIT(zsb);
++		return (EPERM);
++	}
++
++	if ((vm_flags & (VM_READ | VM_EXEC)) &&
++	    (zp->z_pflags & ZFS_AV_QUARANTINED)) {
++		ZFS_EXIT(zsb);
++		return (EACCES);
++	}
++
++	if (off < 0 || len > MAXOFFSET_T - off) {
++		ZFS_EXIT(zsb);
++		return (ENXIO);
++	}
++
++	ZFS_EXIT(zsb);
++	return (0);
++}
++EXPORT_SYMBOL(zfs_map);
++
++/*
++ * convoff - converts the given data (start, whence) to the
++ * given whence.
++ */
++int
++convoff(struct inode *ip, flock64_t *lckdat, int  whence, offset_t offset)
++{
++	vattr_t vap;
++	int error;
++
++	if ((lckdat->l_whence == 2) || (whence == 2)) {
++		if ((error = zfs_getattr(ip, &vap, 0, CRED()) != 0))
++			return (error);
++	}
++
++	switch (lckdat->l_whence) {
++	case 1:
++		lckdat->l_start += offset;
++		break;
++	case 2:
++		lckdat->l_start += vap.va_size;
++		/* FALLTHRU */
++	case 0:
++		break;
++	default:
++		return (EINVAL);
++	}
++
++	if (lckdat->l_start < 0)
++		return (EINVAL);
++
++	switch (whence) {
++	case 1:
++		lckdat->l_start -= offset;
++		break;
++	case 2:
++		lckdat->l_start -= vap.va_size;
++		/* FALLTHRU */
++	case 0:
++		break;
++	default:
++		return (EINVAL);
++	}
++
++	lckdat->l_whence = (short)whence;
++	return (0);
++}
++
++/*
++ * Free or allocate space in a file.  Currently, this function only
++ * supports the `F_FREESP' command.  However, this command is somewhat
++ * misnamed, as its functionality includes the ability to allocate as
++ * well as free space.
++ *
++ *	IN:	ip	- inode of file to free data in.
++ *		cmd	- action to take (only F_FREESP supported).
++ *		bfp	- section of file to free/alloc.
++ *		flag	- current file open mode flags.
++ *		offset	- current file offset.
++ *		cr	- credentials of caller [UNUSED].
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ *
++ * Timestamps:
++ *	ip - ctime|mtime updated
++ */
++/* ARGSUSED */
++int
++zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag,
++    offset_t offset, cred_t *cr)
++{
++	znode_t		*zp = ITOZ(ip);
++	zfs_sb_t	*zsb = ITOZSB(ip);
++	uint64_t	off, len;
++	int		error;
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	if (cmd != F_FREESP) {
++		ZFS_EXIT(zsb);
++		return (EINVAL);
++	}
++
++	if ((error = convoff(ip, bfp, 0, offset))) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	if (bfp->l_len < 0) {
++		ZFS_EXIT(zsb);
++		return (EINVAL);
++	}
++
++	/*
++	 * Permissions aren't checked on Solaris because on this OS
++	 * zfs_space() can only be called with an opened file handle.
++	 * On Linux we can get here through truncate_range() which
++	 * operates directly on inodes, so we need to check access rights.
++	 */
++	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	off = bfp->l_start;
++	len = bfp->l_len; /* 0 means from off to end of file */
++
++	error = zfs_freesp(zp, off, len, flag, TRUE);
++
++	ZFS_EXIT(zsb);
++	return (error);
++}
++EXPORT_SYMBOL(zfs_space);
++
++/*ARGSUSED*/
++int
++zfs_fid(struct inode *ip, fid_t *fidp)
++{
++	znode_t		*zp = ITOZ(ip);
++	zfs_sb_t	*zsb = ITOZSB(ip);
++	uint32_t	gen;
++	uint64_t	gen64;
++	uint64_t	object = zp->z_id;
++	zfid_short_t	*zfid;
++	int		size, i, error;
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zsb),
++	    &gen64, sizeof (uint64_t))) != 0) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	gen = (uint32_t)gen64;
++
++	size = (zsb->z_parent != zsb) ? LONG_FID_LEN : SHORT_FID_LEN;
++	if (fidp->fid_len < size) {
++		fidp->fid_len = size;
++		ZFS_EXIT(zsb);
++		return (ENOSPC);
++	}
++
++	zfid = (zfid_short_t *)fidp;
++
++	zfid->zf_len = size;
++
++	for (i = 0; i < sizeof (zfid->zf_object); i++)
++		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
++
++	/* Must have a non-zero generation number to distinguish from .zfs */
++	if (gen == 0)
++		gen = 1;
++	for (i = 0; i < sizeof (zfid->zf_gen); i++)
++		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
++
++	if (size == LONG_FID_LEN) {
++		uint64_t	objsetid = dmu_objset_id(zsb->z_os);
++		zfid_long_t	*zlfid;
++
++		zlfid = (zfid_long_t *)fidp;
++
++		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
++			zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
++
++		/* XXX - this should be the generation number for the objset */
++		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
++			zlfid->zf_setgen[i] = 0;
++	}
++
++	ZFS_EXIT(zsb);
++	return (0);
++}
++EXPORT_SYMBOL(zfs_fid);
++
++/*ARGSUSED*/
++int
++zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr)
++{
++	znode_t *zp = ITOZ(ip);
++	zfs_sb_t *zsb = ITOZSB(ip);
++	int error;
++	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++	error = zfs_getacl(zp, vsecp, skipaclchk, cr);
++	ZFS_EXIT(zsb);
++
++	return (error);
++}
++EXPORT_SYMBOL(zfs_getsecattr);
++
++/*ARGSUSED*/
++int
++zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr)
++{
++	znode_t *zp = ITOZ(ip);
++	zfs_sb_t *zsb = ITOZSB(ip);
++	int error;
++	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
++	zilog_t	*zilog = zsb->z_log;
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++
++	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
++
++	if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
++		zil_commit(zilog, 0);
++
++	ZFS_EXIT(zsb);
++	return (error);
++}
++EXPORT_SYMBOL(zfs_setsecattr);
++
++#ifdef HAVE_UIO_ZEROCOPY
++/*
++ * Tunable, both must be a power of 2.
++ *
++ * zcr_blksz_min: the smallest read we may consider to loan out an arcbuf
++ * zcr_blksz_max: if set to less than the file block size, allow loaning out of
++ *		an arcbuf for a partial block read
++ */
++int zcr_blksz_min = (1 << 10);	/* 1K */
++int zcr_blksz_max = (1 << 17);	/* 128K */
++
++/*ARGSUSED*/
++static int
++zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr)
++{
++	znode_t	*zp = ITOZ(ip);
++	zfs_sb_t *zsb = ITOZSB(ip);
++	int max_blksz = zsb->z_max_blksz;
++	uio_t *uio = &xuio->xu_uio;
++	ssize_t size = uio->uio_resid;
++	offset_t offset = uio->uio_loffset;
++	int blksz;
++	int fullblk, i;
++	arc_buf_t *abuf;
++	ssize_t maxsize;
++	int preamble, postamble;
++
++	if (xuio->xu_type != UIOTYPE_ZEROCOPY)
++		return (EINVAL);
++
++	ZFS_ENTER(zsb);
++	ZFS_VERIFY_ZP(zp);
++	switch (ioflag) {
++	case UIO_WRITE:
++		/*
++		 * Loan out an arc_buf for write if write size is bigger than
++		 * max_blksz, and the file's block size is also max_blksz.
++		 */
++		blksz = max_blksz;
++		if (size < blksz || zp->z_blksz != blksz) {
++			ZFS_EXIT(zsb);
++			return (EINVAL);
++		}
++		/*
++		 * Caller requests buffers for write before knowing where the
++		 * write offset might be (e.g. NFS TCP write).
++		 */
++		if (offset == -1) {
++			preamble = 0;
++		} else {
++			preamble = P2PHASE(offset, blksz);
++			if (preamble) {
++				preamble = blksz - preamble;
++				size -= preamble;
++			}
++		}
++
++		postamble = P2PHASE(size, blksz);
++		size -= postamble;
++
++		fullblk = size / blksz;
++		(void) dmu_xuio_init(xuio,
++		    (preamble != 0) + fullblk + (postamble != 0));
++
++		/*
++		 * Have to fix iov base/len for partial buffers.  They
++		 * currently represent full arc_buf's.
++		 */
++		if (preamble) {
++			/* data begins in the middle of the arc_buf */
++			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
++			    blksz);
++			ASSERT(abuf);
++			(void) dmu_xuio_add(xuio, abuf,
++			    blksz - preamble, preamble);
++		}
++
++		for (i = 0; i < fullblk; i++) {
++			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
++			    blksz);
++			ASSERT(abuf);
++			(void) dmu_xuio_add(xuio, abuf, 0, blksz);
++		}
++
++		if (postamble) {
++			/* data ends in the middle of the arc_buf */
++			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
++			    blksz);
++			ASSERT(abuf);
++			(void) dmu_xuio_add(xuio, abuf, 0, postamble);
++		}
++		break;
++	case UIO_READ:
++		/*
++		 * Loan out an arc_buf for read if the read size is larger than
++		 * the current file block size.  Block alignment is not
++		 * considered.  Partial arc_buf will be loaned out for read.
++		 */
++		blksz = zp->z_blksz;
++		if (blksz < zcr_blksz_min)
++			blksz = zcr_blksz_min;
++		if (blksz > zcr_blksz_max)
++			blksz = zcr_blksz_max;
++		/* avoid potential complexity of dealing with it */
++		if (blksz > max_blksz) {
++			ZFS_EXIT(zsb);
++			return (EINVAL);
++		}
++
++		maxsize = zp->z_size - uio->uio_loffset;
++		if (size > maxsize)
++			size = maxsize;
++
++		if (size < blksz) {
++			ZFS_EXIT(zsb);
++			return (EINVAL);
++		}
++		break;
++	default:
++		ZFS_EXIT(zsb);
++		return (EINVAL);
++	}
++
++	uio->uio_extflg = UIO_XUIO;
++	XUIO_XUZC_RW(xuio) = ioflag;
++	ZFS_EXIT(zsb);
++	return (0);
++}
++
++/*ARGSUSED*/
++static int
++zfs_retzcbuf(struct inode *ip, xuio_t *xuio, cred_t *cr)
++{
++	int i;
++	arc_buf_t *abuf;
++	int ioflag = XUIO_XUZC_RW(xuio);
++
++	ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY);
++
++	i = dmu_xuio_cnt(xuio);
++	while (i-- > 0) {
++		abuf = dmu_xuio_arcbuf(xuio, i);
++		/*
++		 * if abuf == NULL, it must be a write buffer
++		 * that has been returned in zfs_write().
++		 */
++		if (abuf)
++			dmu_return_arcbuf(abuf);
++		ASSERT(abuf || ioflag == UIO_WRITE);
++	}
++
++	dmu_xuio_fini(xuio);
++	return (0);
++}
++#endif /* HAVE_UIO_ZEROCOPY */
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++module_param(zfs_read_chunk_size, long, 0644);
++MODULE_PARM_DESC(zfs_read_chunk_size, "Bytes to read per chunk");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zfs_znode.c linux-3.2.33-go/fs/zfs/zfs/zfs_znode.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zfs_znode.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zfs_znode.c	2012-11-16 23:25:34.349039334 +0100
+@@ -0,0 +1,1800 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/* Portions Copyright 2007 Jeremy Teo */
++
++#ifdef _KERNEL
++#include <sys/types.h>
++#include <sys/param.h>
++#include <sys/time.h>
++#include <sys/systm.h>
++#include <sys/sysmacros.h>
++#include <sys/resource.h>
++#include <sys/mntent.h>
++#include <sys/mkdev.h>
++#include <sys/u8_textprep.h>
++#include <sys/dsl_dataset.h>
++#include <sys/vfs.h>
++#include <sys/vfs_opreg.h>
++#include <sys/vnode.h>
++#include <sys/file.h>
++#include <sys/kmem.h>
++#include <sys/errno.h>
++#include <sys/unistd.h>
++#include <sys/mode.h>
++#include <sys/atomic.h>
++#include <vm/pvn.h>
++#include "fs/fs_subr.h"
++#include <sys/zfs_dir.h>
++#include <sys/zfs_acl.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/zfs_rlock.h>
++#include <sys/zfs_fuid.h>
++#include <sys/zfs_vnops.h>
++#include <sys/zfs_ctldir.h>
++#include <sys/dnode.h>
++#include <sys/fs/zfs.h>
++#include <sys/kidmap.h>
++#include <sys/zpl.h>
++#endif /* _KERNEL */
++
++#include <sys/dmu.h>
++#include <sys/refcount.h>
++#include <sys/stat.h>
++#include <sys/zap.h>
++#include <sys/zfs_znode.h>
++#include <sys/sa.h>
++#include <sys/zfs_sa.h>
++#include <sys/zfs_stat.h>
++
++#include "zfs_prop.h"
++#include "zfs_comutil.h"
++
++/*
++ * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
++ * turned on when DEBUG is also defined.
++ */
++#ifdef	DEBUG
++#define	ZNODE_STATS
++#endif	/* DEBUG */
++
++#ifdef	ZNODE_STATS
++#define	ZNODE_STAT_ADD(stat)			((stat)++)
++#else
++#define	ZNODE_STAT_ADD(stat)			/* nothing */
++#endif	/* ZNODE_STATS */
++
++/*
++ * Functions needed for userland (ie: libzpool) are not put under
++ * #ifdef_KERNEL; the rest of the functions have dependencies
++ * (such as VFS logic) that will not compile easily in userland.
++ */
++#ifdef _KERNEL
++
++static kmem_cache_t *znode_cache = NULL;
++
++/*ARGSUSED*/
++static int
++zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
++{
++	znode_t *zp = buf;
++
++	inode_init_once(ZTOI(zp));
++	list_link_init(&zp->z_link_node);
++
++	mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
++	rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
++	rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL);
++	mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
++	rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
++
++	mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL);
++	avl_create(&zp->z_range_avl, zfs_range_compare,
++	    sizeof (rl_t), offsetof(rl_t, r_node));
++
++	zp->z_dirlocks = NULL;
++	zp->z_acl_cached = NULL;
++	zp->z_xattr_cached = NULL;
++	zp->z_moved = 0;
++	return (0);
++}
++
++/*ARGSUSED*/
++static void
++zfs_znode_cache_destructor(void *buf, void *arg)
++{
++	znode_t *zp = buf;
++
++	ASSERT(!list_link_active(&zp->z_link_node));
++	mutex_destroy(&zp->z_lock);
++	rw_destroy(&zp->z_parent_lock);
++	rw_destroy(&zp->z_name_lock);
++	mutex_destroy(&zp->z_acl_lock);
++	rw_destroy(&zp->z_xattr_lock);
++	avl_destroy(&zp->z_range_avl);
++	mutex_destroy(&zp->z_range_lock);
++
++	ASSERT(zp->z_dirlocks == NULL);
++	ASSERT(zp->z_acl_cached == NULL);
++	ASSERT(zp->z_xattr_cached == NULL);
++}
++
++void
++zfs_znode_init(void)
++{
++	/*
++	 * Initialize zcache
++	 */
++	ASSERT(znode_cache == NULL);
++	znode_cache = kmem_cache_create("zfs_znode_cache",
++	    sizeof (znode_t), 0, zfs_znode_cache_constructor,
++	    zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_KMEM);
++}
++
++void
++zfs_znode_fini(void)
++{
++	/*
++	 * Cleanup zcache
++	 */
++	if (znode_cache)
++		kmem_cache_destroy(znode_cache);
++	znode_cache = NULL;
++}
++
++int
++zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx)
++{
++#ifdef HAVE_SMB_SHARE
++	zfs_acl_ids_t acl_ids;
++	vattr_t vattr;
++	znode_t *sharezp;
++	vnode_t *vp;
++	znode_t *zp;
++	int error;
++
++	vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
++	vattr.va_mode = S_IFDIR | 0555;
++	vattr.va_uid = crgetuid(kcred);
++	vattr.va_gid = crgetgid(kcred);
++
++	sharezp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
++	sharezp->z_moved = 0;
++	sharezp->z_unlinked = 0;
++	sharezp->z_atime_dirty = 0;
++	sharezp->z_zfsvfs = zfsvfs;
++	sharezp->z_is_sa = zfsvfs->z_use_sa;
++
++	vp = ZTOV(sharezp);
++	vn_reinit(vp);
++	vp->v_type = VDIR;
++
++	VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
++	    kcred, NULL, &acl_ids));
++	zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
++	ASSERT3P(zp, ==, sharezp);
++	ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */
++	POINTER_INVALIDATE(&sharezp->z_zfsvfs);
++	error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
++	    ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
++	zfsvfs->z_shares_dir = sharezp->z_id;
++
++	zfs_acl_ids_free(&acl_ids);
++	// ZTOV(sharezp)->v_count = 0;
++	sa_handle_destroy(sharezp->z_sa_hdl);
++	kmem_cache_free(znode_cache, sharezp);
++
++	return (error);
++#else
++	return (0);
++#endif /* HAVE_SMB_SHARE */
++}
++
++static void
++zfs_znode_sa_init(zfs_sb_t *zsb, znode_t *zp,
++    dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
++{
++	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zsb, zp->z_id)));
++
++	mutex_enter(&zp->z_lock);
++
++	ASSERT(zp->z_sa_hdl == NULL);
++	ASSERT(zp->z_acl_cached == NULL);
++	if (sa_hdl == NULL) {
++		VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, zp,
++		    SA_HDL_SHARED, &zp->z_sa_hdl));
++	} else {
++		zp->z_sa_hdl = sa_hdl;
++		sa_set_userp(sa_hdl, zp);
++	}
++
++	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
++
++	mutex_exit(&zp->z_lock);
++}
++
++void
++zfs_znode_dmu_fini(znode_t *zp)
++{
++	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(ZTOZSB(zp), zp->z_id)) ||
++	    zp->z_unlinked ||
++	    RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock));
++
++	sa_handle_destroy(zp->z_sa_hdl);
++	zp->z_sa_hdl = NULL;
++}
++
++/*
++ * Called by new_inode() to allocate a new inode.
++ */
++int
++zfs_inode_alloc(struct super_block *sb, struct inode **ip)
++{
++	znode_t *zp;
++
++	zp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
++	*ip = ZTOI(zp);
++
++	return (0);
++}
++
++/*
++ * Called in multiple places when an inode should be destroyed.
++ */
++void
++zfs_inode_destroy(struct inode *ip)
++{
++	znode_t *zp = ITOZ(ip);
++	zfs_sb_t *zsb = ZTOZSB(zp);
++
++	if (zfsctl_is_node(ip))
++		zfsctl_inode_destroy(ip);
++
++	mutex_enter(&zsb->z_znodes_lock);
++	list_remove(&zsb->z_all_znodes, zp);
++	zsb->z_nr_znodes--;
++	mutex_exit(&zsb->z_znodes_lock);
++
++	if (zp->z_acl_cached) {
++		zfs_acl_free(zp->z_acl_cached);
++		zp->z_acl_cached = NULL;
++	}
++
++	if (zp->z_xattr_cached) {
++		nvlist_free(zp->z_xattr_cached);
++		zp->z_xattr_cached = NULL;
++	}
++
++	kmem_cache_free(znode_cache, zp);
++}
++
++static void
++zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
++{
++	uint64_t rdev = 0;
++
++	switch (ip->i_mode & S_IFMT) {
++	case S_IFREG:
++		ip->i_op = &zpl_inode_operations;
++		ip->i_fop = &zpl_file_operations;
++		ip->i_mapping->a_ops = &zpl_address_space_operations;
++		break;
++
++	case S_IFDIR:
++		ip->i_op = &zpl_dir_inode_operations;
++		ip->i_fop = &zpl_dir_file_operations;
++		ITOZ(ip)->z_zn_prefetch = B_TRUE;
++		break;
++
++	case S_IFLNK:
++		ip->i_op = &zpl_symlink_inode_operations;
++		break;
++
++	/*
++	 * rdev is only stored in a SA only for device files.
++	 */
++	case S_IFCHR:
++	case S_IFBLK:
++		VERIFY(sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zsb),
++		    &rdev, sizeof (rdev)) == 0);
++		/*FALLTHROUGH*/
++	case S_IFIFO:
++	case S_IFSOCK:
++		init_special_inode(ip, ip->i_mode, rdev);
++		ip->i_op = &zpl_special_inode_operations;
++		break;
++
++	default:
++		printk("ZFS: Invalid mode: 0x%x\n", ip->i_mode);
++		VERIFY(0);
++	}
++}
++
++/*
++ * Construct a znode+inode and initialize.
++ *
++ * This does not do a call to dmu_set_user() that is
++ * up to the caller to do, in case you don't want to
++ * return the znode
++ */
++static znode_t *
++zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
++    dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl,
++    struct dentry *dentry, struct inode *dip)
++{
++	znode_t	*zp;
++	struct inode *ip;
++	uint64_t parent;
++	sa_bulk_attr_t bulk[9];
++	int count = 0;
++
++	ASSERT(zsb != NULL);
++
++	ip = new_inode(zsb->z_sb);
++	if (ip == NULL)
++		return (NULL);
++
++	zp = ITOZ(ip);
++	ASSERT(zp->z_dirlocks == NULL);
++	ASSERT3P(zp->z_acl_cached, ==, NULL);
++	ASSERT3P(zp->z_xattr_cached, ==, NULL);
++	zp->z_moved = 0;
++	zp->z_sa_hdl = NULL;
++	zp->z_unlinked = 0;
++	zp->z_atime_dirty = 0;
++	zp->z_mapcnt = 0;
++	zp->z_id = db->db_object;
++	zp->z_blksz = blksz;
++	zp->z_seq = 0x7A4653;
++	zp->z_sync_cnt = 0;
++	zp->z_is_zvol = B_FALSE;
++	zp->z_is_mapped = B_FALSE;
++	zp->z_is_ctldir = B_FALSE;
++
++	zfs_znode_sa_init(zsb, zp, db, obj_type, hdl);
++
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &zp->z_mode, 8);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL, &zp->z_gen, 8);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &zp->z_links, 8);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
++	    &zp->z_pflags, 8);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL,
++	    &parent, 8);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL,
++	    &zp->z_atime, 16);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &zp->z_uid, 8);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &zp->z_gid, 8);
++
++	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) {
++		if (hdl == NULL)
++			sa_handle_destroy(zp->z_sa_hdl);
++
++		goto error;
++	}
++
++	ip->i_ino = obj;
++	zfs_inode_update(zp);
++	zfs_inode_set_ops(zsb, ip);
++
++	if (insert_inode_locked(ip))
++		goto error;
++
++	if (dentry) {
++		if (zpl_xattr_security_init(ip, dip, &dentry->d_name))
++			goto error;
++
++		d_instantiate(dentry, ip);
++	}
++
++	mutex_enter(&zsb->z_znodes_lock);
++	list_insert_tail(&zsb->z_all_znodes, zp);
++	zsb->z_nr_znodes++;
++	membar_producer();
++	mutex_exit(&zsb->z_znodes_lock);
++
++	unlock_new_inode(ip);
++	return (zp);
++
++error:
++	unlock_new_inode(ip);
++	iput(ip);
++	return NULL;
++}
++
++/*
++ * Update the embedded inode given the znode.  We should work toward
++ * eliminating this function as soon as possible by removing values
++ * which are duplicated between the znode and inode.  If the generic
++ * inode has the correct field it should be used, and the ZFS code
++ * updated to access the inode.  This can be done incrementally.
++ */
++void
++zfs_inode_update(znode_t *zp)
++{
++	zfs_sb_t	*zsb;
++	struct inode	*ip;
++	uint32_t	blksize;
++	uint64_t	atime[2], mtime[2], ctime[2];
++
++	ASSERT(zp != NULL);
++	zsb = ZTOZSB(zp);
++	ip = ZTOI(zp);
++
++	/* Skip .zfs control nodes which do not exist on disk. */
++	if (zfsctl_is_node(ip))
++		return;
++
++	sa_lookup(zp->z_sa_hdl, SA_ZPL_ATIME(zsb), &atime, 16);
++	sa_lookup(zp->z_sa_hdl, SA_ZPL_MTIME(zsb), &mtime, 16);
++	sa_lookup(zp->z_sa_hdl, SA_ZPL_CTIME(zsb), &ctime, 16);
++
++	spin_lock(&ip->i_lock);
++	ip->i_generation = zp->z_gen;
++	ip->i_uid = zp->z_uid;
++	ip->i_gid = zp->z_gid;
++	set_nlink(ip, zp->z_links);
++	ip->i_mode = zp->z_mode;
++	ip->i_blkbits = SPA_MINBLOCKSHIFT;
++	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize,
++	    (u_longlong_t *)&ip->i_blocks);
++
++	ZFS_TIME_DECODE(&ip->i_atime, atime);
++	ZFS_TIME_DECODE(&ip->i_mtime, mtime);
++	ZFS_TIME_DECODE(&ip->i_ctime, ctime);
++
++	i_size_write(ip, zp->z_size);
++	spin_unlock(&ip->i_lock);
++}
++
++static uint64_t empty_xattr;
++static uint64_t pad[4];
++static zfs_acl_phys_t acl_phys;
++/*
++ * Create a new DMU object to hold a zfs znode.
++ *
++ *	IN:	dzp	- parent directory for new znode
++ *		vap	- file attributes for new znode
++ *		tx	- dmu transaction id for zap operations
++ *		cr	- credentials of caller
++ *		flag	- flags:
++ *			  IS_ROOT_NODE	- new object will be root
++ *			  IS_XATTR	- new object is an attribute
++ *		bonuslen - length of bonus buffer
++ *		setaclp  - File/Dir initial ACL
++ *		fuidp	 - Tracks fuid allocation.
++ *
++ *	OUT:	zpp	- allocated znode
++ *
++ */
++void
++zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
++    uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
++{
++	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
++	uint64_t	mode, size, links, parent, pflags;
++	uint64_t	dzp_pflags = 0;
++	uint64_t	rdev = 0;
++	zfs_sb_t	*zsb = ZTOZSB(dzp);
++	dmu_buf_t	*db;
++	timestruc_t	now;
++	uint64_t	gen, obj;
++	int		err;
++	int		bonuslen;
++	sa_handle_t	*sa_hdl;
++	dmu_object_type_t obj_type;
++	sa_bulk_attr_t	*sa_attrs;
++	int		cnt = 0;
++	zfs_acl_locator_cb_t locate = { 0 };
++
++	if (zsb->z_replay) {
++		obj = vap->va_nodeid;
++		now = vap->va_ctime;		/* see zfs_replay_create() */
++		gen = vap->va_nblocks;		/* ditto */
++	} else {
++		obj = 0;
++		gethrestime(&now);
++		gen = dmu_tx_get_txg(tx);
++	}
++
++	obj_type = zsb->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
++	bonuslen = (obj_type == DMU_OT_SA) ?
++	    DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE;
++
++	/*
++	 * Create a new DMU object.
++	 */
++	/*
++	 * There's currently no mechanism for pre-reading the blocks that will
++	 * be needed to allocate a new object, so we accept the small chance
++	 * that there will be an i/o error and we will fail one of the
++	 * assertions below.
++	 */
++	if (S_ISDIR(vap->va_mode)) {
++		if (zsb->z_replay) {
++			err = zap_create_claim_norm(zsb->z_os, obj,
++			    zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
++			    obj_type, bonuslen, tx);
++			ASSERT3U(err, ==, 0);
++		} else {
++			obj = zap_create_norm(zsb->z_os,
++			    zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
++			    obj_type, bonuslen, tx);
++		}
++	} else {
++		if (zsb->z_replay) {
++			err = dmu_object_claim(zsb->z_os, obj,
++			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
++			    obj_type, bonuslen, tx);
++			ASSERT3U(err, ==, 0);
++		} else {
++			obj = dmu_object_alloc(zsb->z_os,
++			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
++			    obj_type, bonuslen, tx);
++		}
++	}
++
++	ZFS_OBJ_HOLD_ENTER(zsb, obj);
++	VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db));
++
++	/*
++	 * If this is the root, fix up the half-initialized parent pointer
++	 * to reference the just-allocated physical data area.
++	 */
++	if (flag & IS_ROOT_NODE) {
++		dzp->z_id = obj;
++	} else {
++		dzp_pflags = dzp->z_pflags;
++	}
++
++	/*
++	 * If parent is an xattr, so am I.
++	 */
++	if (dzp_pflags & ZFS_XATTR) {
++		flag |= IS_XATTR;
++	}
++
++	if (zsb->z_use_fuids)
++		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
++	else
++		pflags = 0;
++
++	if (S_ISDIR(vap->va_mode)) {
++		size = 2;		/* contents ("." and "..") */
++		links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
++	} else {
++		size = links = 0;
++	}
++
++	if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
++		rdev = vap->va_rdev;
++
++	parent = dzp->z_id;
++	mode = acl_ids->z_mode;
++	if (flag & IS_XATTR)
++		pflags |= ZFS_XATTR;
++
++	/*
++	 * No execs denied will be deterimed when zfs_mode_compute() is called.
++	 */
++	pflags |= acl_ids->z_aclp->z_hints &
++	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
++	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
++
++	ZFS_TIME_ENCODE(&now, crtime);
++	ZFS_TIME_ENCODE(&now, ctime);
++
++	if (vap->va_mask & ATTR_ATIME) {
++		ZFS_TIME_ENCODE(&vap->va_atime, atime);
++	} else {
++		ZFS_TIME_ENCODE(&now, atime);
++	}
++
++	if (vap->va_mask & ATTR_MTIME) {
++		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
++	} else {
++		ZFS_TIME_ENCODE(&now, mtime);
++	}
++
++	/* Now add in all of the "SA" attributes */
++	VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, NULL, SA_HDL_SHARED,
++	    &sa_hdl));
++
++	/*
++	 * Setup the array of attributes to be replaced/set on the new file
++	 *
++	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
++	 * in the old znode_phys_t format.  Don't change this ordering
++	 */
++	sa_attrs = kmem_alloc(sizeof(sa_bulk_attr_t) * ZPL_END, KM_PUSHPAGE);
++
++	if (obj_type == DMU_OT_ZNODE) {
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
++		    NULL, &atime, 16);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
++		    NULL, &mtime, 16);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
++		    NULL, &ctime, 16);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
++		    NULL, &crtime, 16);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
++		    NULL, &gen, 8);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
++		    NULL, &mode, 8);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
++		    NULL, &size, 8);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
++		    NULL, &parent, 8);
++	} else {
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
++		    NULL, &mode, 8);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
++		    NULL, &size, 8);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
++		    NULL, &gen, 8);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb),
++		    NULL, &acl_ids->z_fuid, 8);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb),
++		    NULL, &acl_ids->z_fgid, 8);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
++		    NULL, &parent, 8);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
++		    NULL, &pflags, 8);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
++		    NULL, &atime, 16);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
++		    NULL, &mtime, 16);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
++		    NULL, &ctime, 16);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
++		    NULL, &crtime, 16);
++	}
++
++	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zsb), NULL, &links, 8);
++
++	if (obj_type == DMU_OT_ZNODE) {
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zsb), NULL,
++		    &empty_xattr, 8);
++	}
++	if (obj_type == DMU_OT_ZNODE ||
++	    (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb),
++		    NULL, &rdev, 8);
++	}
++	if (obj_type == DMU_OT_ZNODE) {
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
++		    NULL, &pflags, 8);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL,
++		    &acl_ids->z_fuid, 8);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL,
++		    &acl_ids->z_fgid, 8);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zsb), NULL, pad,
++		    sizeof (uint64_t) * 4);
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zsb), NULL,
++		    &acl_phys, sizeof (zfs_acl_phys_t));
++	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zsb), NULL,
++		    &acl_ids->z_aclp->z_acl_count, 8);
++		locate.cb_aclp = acl_ids->z_aclp;
++		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zsb),
++		    zfs_acl_data_locator, &locate,
++		    acl_ids->z_aclp->z_acl_bytes);
++		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
++		    acl_ids->z_fuid, acl_ids->z_fgid);
++	}
++
++	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
++
++	if (!(flag & IS_ROOT_NODE)) {
++		*zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl,
++		    vap->va_dentry, ZTOI(dzp));
++		ASSERT(*zpp != NULL);
++		ASSERT(dzp != NULL);
++	} else {
++		/*
++		 * If we are creating the root node, the "parent" we
++		 * passed in is the znode for the root.
++		 */
++		*zpp = dzp;
++
++		(*zpp)->z_sa_hdl = sa_hdl;
++	}
++
++	(*zpp)->z_pflags = pflags;
++	(*zpp)->z_mode = mode;
++
++	if (obj_type == DMU_OT_ZNODE ||
++	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
++		err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx);
++		ASSERT3S(err, ==, 0);
++	}
++	kmem_free(sa_attrs, sizeof(sa_bulk_attr_t) * ZPL_END);
++	ZFS_OBJ_HOLD_EXIT(zsb, obj);
++}
++
++/*
++ * zfs_xvattr_set only updates the in-core attributes
++ * it is assumed the caller will be doing an sa_bulk_update
++ * to push the changes out
++ */
++void
++zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
++{
++	xoptattr_t *xoap;
++
++	xoap = xva_getxoptattr(xvap);
++	ASSERT(xoap);
++
++	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
++		uint64_t times[2];
++		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
++		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
++		    &times, sizeof (times), tx);
++		XVA_SET_RTN(xvap, XAT_CREATETIME);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
++		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
++		    zp->z_pflags, tx);
++		XVA_SET_RTN(xvap, XAT_READONLY);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
++		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
++		    zp->z_pflags, tx);
++		XVA_SET_RTN(xvap, XAT_HIDDEN);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
++		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
++		    zp->z_pflags, tx);
++		XVA_SET_RTN(xvap, XAT_SYSTEM);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
++		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
++		    zp->z_pflags, tx);
++		XVA_SET_RTN(xvap, XAT_ARCHIVE);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
++		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
++		    zp->z_pflags, tx);
++		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
++		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
++		    zp->z_pflags, tx);
++		XVA_SET_RTN(xvap, XAT_NOUNLINK);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
++		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
++		    zp->z_pflags, tx);
++		XVA_SET_RTN(xvap, XAT_APPENDONLY);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
++		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
++		    zp->z_pflags, tx);
++		XVA_SET_RTN(xvap, XAT_NODUMP);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
++		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
++		    zp->z_pflags, tx);
++		XVA_SET_RTN(xvap, XAT_OPAQUE);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
++		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
++		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
++		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
++		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
++		    zp->z_pflags, tx);
++		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
++		zfs_sa_set_scanstamp(zp, xvap, tx);
++		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
++		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
++		    zp->z_pflags, tx);
++		XVA_SET_RTN(xvap, XAT_REPARSE);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
++		ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
++		    zp->z_pflags, tx);
++		XVA_SET_RTN(xvap, XAT_OFFLINE);
++	}
++	if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
++		ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
++		    zp->z_pflags, tx);
++		XVA_SET_RTN(xvap, XAT_SPARSE);
++	}
++}
++
++int
++zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
++{
++	dmu_object_info_t doi;
++	dmu_buf_t	*db;
++	znode_t		*zp;
++	int err;
++	sa_handle_t	*hdl;
++	struct inode	*ip;
++
++	*zpp = NULL;
++
++again:
++	ip = ilookup(zsb->z_sb, obj_num);
++
++	ZFS_OBJ_HOLD_ENTER(zsb, obj_num);
++
++	err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
++	if (err) {
++		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
++		iput(ip);
++		return (err);
++	}
++
++	dmu_object_info_from_db(db, &doi);
++	if (doi.doi_bonus_type != DMU_OT_SA &&
++	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
++	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
++	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
++		sa_buf_rele(db, NULL);
++		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
++		iput(ip);
++		return (EINVAL);
++	}
++
++	hdl = dmu_buf_get_user(db);
++	if (hdl != NULL) {
++		if (ip == NULL) {
++			/*
++			 * ilookup returned NULL, which means
++			 * the znode is dying - but the SA handle isn't
++			 * quite dead yet, we need to drop any locks
++			 * we're holding, re-schedule the task and try again.
++			 */
++			sa_buf_rele(db, NULL);
++			ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
++
++			schedule();
++			goto again;
++		}
++
++		zp = sa_get_userdata(hdl);
++
++		/*
++		 * Since "SA" does immediate eviction we
++		 * should never find a sa handle that doesn't
++		 * know about the znode.
++		 */
++
++		ASSERT3P(zp, !=, NULL);
++
++		mutex_enter(&zp->z_lock);
++		ASSERT3U(zp->z_id, ==, obj_num);
++		if (zp->z_unlinked) {
++			err = ENOENT;
++		} else {
++			igrab(ZTOI(zp));
++			*zpp = zp;
++			err = 0;
++		}
++		sa_buf_rele(db, NULL);
++		mutex_exit(&zp->z_lock);
++		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
++		iput(ip);
++		return (err);
++	}
++
++	ASSERT3P(ip, ==, NULL);
++
++	/*
++	 * Not found create new znode/vnode but only if file exists.
++	 *
++	 * There is a small window where zfs_vget() could
++	 * find this object while a file create is still in
++	 * progress.  This is checked for in zfs_znode_alloc()
++	 *
++	 * if zfs_znode_alloc() fails it will drop the hold on the
++	 * bonus buffer.
++	 */
++	zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
++	    doi.doi_bonus_type, obj_num, NULL, NULL, NULL);
++	if (zp == NULL) {
++		err = ENOENT;
++	} else {
++		*zpp = zp;
++	}
++	ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
++	return (err);
++}
++
++int
++zfs_rezget(znode_t *zp)
++{
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	dmu_object_info_t doi;
++	dmu_buf_t *db;
++	uint64_t obj_num = zp->z_id;
++	uint64_t mode;
++	sa_bulk_attr_t bulk[8];
++	int err;
++	int count = 0;
++	uint64_t gen;
++
++	ZFS_OBJ_HOLD_ENTER(zsb, obj_num);
++
++	mutex_enter(&zp->z_acl_lock);
++	if (zp->z_acl_cached) {
++		zfs_acl_free(zp->z_acl_cached);
++		zp->z_acl_cached = NULL;
++	}
++
++	mutex_exit(&zp->z_acl_lock);
++	ASSERT(zp->z_sa_hdl == NULL);
++	err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
++	if (err) {
++		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
++		return (err);
++	}
++
++	dmu_object_info_from_db(db, &doi);
++	if (doi.doi_bonus_type != DMU_OT_SA &&
++	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
++	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
++	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
++		sa_buf_rele(db, NULL);
++		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
++		return (EINVAL);
++	}
++
++	zfs_znode_sa_init(zsb, zp, db, doi.doi_bonus_type, NULL);
++
++	/* reload cached values */
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL,
++	    &gen, sizeof (gen));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL,
++	    &zp->z_size, sizeof (zp->z_size));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
++	    &zp->z_links, sizeof (zp->z_links));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
++	    &zp->z_pflags, sizeof (zp->z_pflags));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL,
++	    &zp->z_atime, sizeof (zp->z_atime));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL,
++	    &zp->z_uid, sizeof (zp->z_uid));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL,
++	    &zp->z_gid, sizeof (zp->z_gid));
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL,
++	    &mode, sizeof (mode));
++
++	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
++		zfs_znode_dmu_fini(zp);
++		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
++		return (EIO);
++	}
++
++	zp->z_mode = mode;
++
++	if (gen != zp->z_gen) {
++		zfs_znode_dmu_fini(zp);
++		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
++		return (EIO);
++	}
++
++	zp->z_unlinked = (zp->z_links == 0);
++	zp->z_blksz = doi.doi_data_block_size;
++
++	ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
++
++	return (0);
++}
++
++void
++zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
++{
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	objset_t *os = zsb->z_os;
++	uint64_t obj = zp->z_id;
++	uint64_t acl_obj = zfs_external_acl(zp);
++
++	ZFS_OBJ_HOLD_ENTER(zsb, obj);
++	if (acl_obj) {
++		VERIFY(!zp->z_is_sa);
++		VERIFY(0 == dmu_object_free(os, acl_obj, tx));
++	}
++	VERIFY(0 == dmu_object_free(os, obj, tx));
++	zfs_znode_dmu_fini(zp);
++	ZFS_OBJ_HOLD_EXIT(zsb, obj);
++}
++
++void
++zfs_zinactive(znode_t *zp)
++{
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	uint64_t z_id = zp->z_id;
++	boolean_t drop_mutex = 0;
++
++	ASSERT(zp->z_sa_hdl);
++
++	/*
++	 * Don't allow a zfs_zget() while were trying to release this znode.
++	 *
++	 * Linux allows direct memory reclaim which means that any KM_SLEEP
++	 * allocation may trigger inode eviction.  This can lead to a deadlock
++	 * through the ->shrink_icache_memory()->evict()->zfs_inactive()->
++	 * zfs_zinactive() call path.  To avoid this deadlock the process
++	 * must not reacquire the mutex when it is already holding it.
++	 */
++	if (!ZFS_OBJ_HOLD_OWNED(zsb, z_id)) {
++		ZFS_OBJ_HOLD_ENTER(zsb, z_id);
++		drop_mutex = 1;
++	}
++
++	mutex_enter(&zp->z_lock);
++
++	/*
++	 * If this was the last reference to a file with no links,
++	 * remove the file from the file system.
++	 */
++	if (zp->z_unlinked) {
++		mutex_exit(&zp->z_lock);
++
++		if (drop_mutex)
++			ZFS_OBJ_HOLD_EXIT(zsb, z_id);
++
++		zfs_rmnode(zp);
++		return;
++	}
++
++	mutex_exit(&zp->z_lock);
++	zfs_znode_dmu_fini(zp);
++
++	if (drop_mutex)
++		ZFS_OBJ_HOLD_EXIT(zsb, z_id);
++}
++
++void
++zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
++    uint64_t ctime[2], boolean_t have_tx)
++{
++	timestruc_t	now;
++
++	gethrestime(&now);
++
++	if (have_tx) {	/* will sa_bulk_update happen really soon? */
++		zp->z_atime_dirty = 0;
++		zp->z_seq++;
++	} else {
++		zp->z_atime_dirty = 1;
++	}
++
++	if (flag & ATTR_ATIME) {
++		ZFS_TIME_ENCODE(&now, zp->z_atime);
++	}
++
++	if (flag & ATTR_MTIME) {
++		ZFS_TIME_ENCODE(&now, mtime);
++		if (ZTOZSB(zp)->z_use_fuids) {
++			zp->z_pflags |= (ZFS_ARCHIVE |
++			    ZFS_AV_MODIFIED);
++		}
++	}
++
++	if (flag & ATTR_CTIME) {
++		ZFS_TIME_ENCODE(&now, ctime);
++		if (ZTOZSB(zp)->z_use_fuids)
++			zp->z_pflags |= ZFS_ARCHIVE;
++	}
++}
++
++/*
++ * Grow the block size for a file.
++ *
++ *	IN:	zp	- znode of file to free data in.
++ *		size	- requested block size
++ *		tx	- open transaction.
++ *
++ * NOTE: this function assumes that the znode is write locked.
++ */
++void
++zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
++{
++	int		error;
++	u_longlong_t	dummy;
++
++	if (size <= zp->z_blksz)
++		return;
++	/*
++	 * If the file size is already greater than the current blocksize,
++	 * we will not grow.  If there is more than one block in a file,
++	 * the blocksize cannot change.
++	 */
++	if (zp->z_blksz && zp->z_size > zp->z_blksz)
++		return;
++
++	error = dmu_object_set_blocksize(ZTOZSB(zp)->z_os, zp->z_id,
++	    size, 0, tx);
++
++	if (error == ENOTSUP)
++		return;
++	ASSERT3U(error, ==, 0);
++
++	/* What blocksize did we actually get? */
++	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
++}
++
++/*
++ * Increase the file length
++ *
++ *	IN:	zp	- znode of file to free data in.
++ *		end	- new end-of-file
++ *
++ * 	RETURN:	0 if success
++ *		error code if failure
++ */
++static int
++zfs_extend(znode_t *zp, uint64_t end)
++{
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	dmu_tx_t *tx;
++	rl_t *rl;
++	uint64_t newblksz;
++	int error;
++
++	/*
++	 * We will change zp_size, lock the whole file.
++	 */
++	rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
++
++	/*
++	 * Nothing to do if file already at desired length.
++	 */
++	if (end <= zp->z_size) {
++		zfs_range_unlock(rl);
++		return (0);
++	}
++top:
++	tx = dmu_tx_create(zsb->z_os);
++	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
++	zfs_sa_upgrade_txholds(tx, zp);
++	if (end > zp->z_blksz &&
++	    (!ISP2(zp->z_blksz) || zp->z_blksz < zsb->z_max_blksz)) {
++		/*
++		 * We are growing the file past the current block size.
++		 */
++		if (zp->z_blksz > ZTOZSB(zp)->z_max_blksz) {
++			ASSERT(!ISP2(zp->z_blksz));
++			newblksz = MIN(end, SPA_MAXBLOCKSIZE);
++		} else {
++			newblksz = MIN(end, ZTOZSB(zp)->z_max_blksz);
++		}
++		dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
++	} else {
++		newblksz = 0;
++	}
++
++	error = dmu_tx_assign(tx, TXG_NOWAIT);
++	if (error) {
++		if (error == ERESTART) {
++			dmu_tx_wait(tx);
++			dmu_tx_abort(tx);
++			goto top;
++		}
++		dmu_tx_abort(tx);
++		zfs_range_unlock(rl);
++		return (error);
++	}
++
++	if (newblksz)
++		zfs_grow_blocksize(zp, newblksz, tx);
++
++	zp->z_size = end;
++
++	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)),
++	    &zp->z_size, sizeof (zp->z_size), tx));
++
++	zfs_range_unlock(rl);
++
++	dmu_tx_commit(tx);
++
++	return (0);
++}
++
++/*
++ * Free space in a file.
++ *
++ *	IN:	zp	- znode of file to free data in.
++ *		off	- start of section to free.
++ *		len	- length of section to free.
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ */
++static int
++zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
++{
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	rl_t *rl;
++	int error;
++
++	/*
++	 * Lock the range being freed.
++	 */
++	rl = zfs_range_lock(zp, off, len, RL_WRITER);
++
++	/*
++	 * Nothing to do if file already at desired length.
++	 */
++	if (off >= zp->z_size) {
++		zfs_range_unlock(rl);
++		return (0);
++	}
++
++	if (off + len > zp->z_size)
++		len = zp->z_size - off;
++
++	error = dmu_free_long_range(zsb->z_os, zp->z_id, off, len);
++
++	zfs_range_unlock(rl);
++
++	return (error);
++}
++
++/*
++ * Truncate a file
++ *
++ *	IN:	zp	- znode of file to free data in.
++ *		end	- new end-of-file.
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ */
++static int
++zfs_trunc(znode_t *zp, uint64_t end)
++{
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	dmu_tx_t *tx;
++	rl_t *rl;
++	int error;
++	sa_bulk_attr_t bulk[2];
++	int count = 0;
++
++	/*
++	 * We will change zp_size, lock the whole file.
++	 */
++	rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
++
++	/*
++	 * Nothing to do if file already at desired length.
++	 */
++	if (end >= zp->z_size) {
++		zfs_range_unlock(rl);
++		return (0);
++	}
++
++	error = dmu_free_long_range(zsb->z_os, zp->z_id, end,  -1);
++	if (error) {
++		zfs_range_unlock(rl);
++		return (error);
++	}
++top:
++	tx = dmu_tx_create(zsb->z_os);
++	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
++	zfs_sa_upgrade_txholds(tx, zp);
++	error = dmu_tx_assign(tx, TXG_NOWAIT);
++	if (error) {
++		if (error == ERESTART) {
++			dmu_tx_wait(tx);
++			dmu_tx_abort(tx);
++			goto top;
++		}
++		dmu_tx_abort(tx);
++		zfs_range_unlock(rl);
++		return (error);
++	}
++
++	zp->z_size = end;
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb),
++	    NULL, &zp->z_size, sizeof (zp->z_size));
++
++	if (end == 0) {
++		zp->z_pflags &= ~ZFS_SPARSE;
++		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
++		    NULL, &zp->z_pflags, 8);
++	}
++	VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
++
++	dmu_tx_commit(tx);
++
++	zfs_range_unlock(rl);
++
++	return (0);
++}
++
++/*
++ * Free space in a file
++ *
++ *	IN:	zp	- znode of file to free data in.
++ *		off	- start of range
++ *		len	- end of range (0 => EOF)
++ *		flag	- current file open mode flags.
++ *		log	- TRUE if this action should be logged
++ *
++ *	RETURN:	0 if success
++ *		error code if failure
++ */
++int
++zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
++{
++	struct inode *ip = ZTOI(zp);
++	dmu_tx_t *tx;
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	zilog_t *zilog = zsb->z_log;
++	uint64_t mode;
++	uint64_t mtime[2], ctime[2];
++	sa_bulk_attr_t bulk[3];
++	int count = 0;
++	int error;
++
++	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zsb), &mode,
++	    sizeof (mode))) != 0)
++		return (error);
++
++	if (off > zp->z_size) {
++		error =  zfs_extend(zp, off+len);
++		if (error == 0 && log)
++			goto log;
++		else
++			return (error);
++	}
++
++	/*
++	 * Check for any locks in the region to be freed.
++	 */
++	if (ip->i_flock && mandatory_lock(ip)) {
++		uint64_t length = (len ? len : zp->z_size - off);
++		if (!lock_may_write(ip, off, length))
++			return (EAGAIN);
++	}
++
++	if (len == 0) {
++		error = zfs_trunc(zp, off);
++	} else {
++		if ((error = zfs_free_range(zp, off, len)) == 0 &&
++		    off + len > zp->z_size)
++			error = zfs_extend(zp, off+len);
++	}
++	if (error || !log)
++		return (error);
++log:
++	tx = dmu_tx_create(zsb->z_os);
++	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
++	zfs_sa_upgrade_txholds(tx, zp);
++	error = dmu_tx_assign(tx, TXG_NOWAIT);
++	if (error) {
++		if (error == ERESTART) {
++			dmu_tx_wait(tx);
++			dmu_tx_abort(tx);
++			goto log;
++		}
++		dmu_tx_abort(tx);
++		return (error);
++	}
++
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, 16);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, 16);
++	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
++	    NULL, &zp->z_pflags, 8);
++	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
++	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
++	ASSERT(error == 0);
++
++	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
++
++	dmu_tx_commit(tx);
++	zfs_inode_update(zp);
++	return (0);
++}
++
++void
++zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
++{
++	struct super_block *sb;
++	zfs_sb_t	*zsb;
++	uint64_t	moid, obj, sa_obj, version;
++	uint64_t	sense = ZFS_CASE_SENSITIVE;
++	uint64_t	norm = 0;
++	nvpair_t	*elem;
++	int		error;
++	int		i;
++	znode_t		*rootzp = NULL;
++	vattr_t		vattr;
++	znode_t		*zp;
++	zfs_acl_ids_t	acl_ids;
++
++	/*
++	 * First attempt to create master node.
++	 */
++	/*
++	 * In an empty objset, there are no blocks to read and thus
++	 * there can be no i/o errors (which we assert below).
++	 */
++	moid = MASTER_NODE_OBJ;
++	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
++	    DMU_OT_NONE, 0, tx);
++	ASSERT(error == 0);
++
++	/*
++	 * Set starting attributes.
++	 */
++	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
++	elem = NULL;
++	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
++		/* For the moment we expect all zpl props to be uint64_ts */
++		uint64_t val;
++		char *name;
++
++		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
++		VERIFY(nvpair_value_uint64(elem, &val) == 0);
++		name = nvpair_name(elem);
++		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
++			if (val < version)
++				version = val;
++		} else {
++			error = zap_update(os, moid, name, 8, 1, &val, tx);
++		}
++		ASSERT(error == 0);
++		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
++			norm = val;
++		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
++			sense = val;
++	}
++	ASSERT(version != 0);
++	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
++
++	/*
++	 * Create zap object used for SA attribute registration
++	 */
++
++	if (version >= ZPL_VERSION_SA) {
++		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
++		    DMU_OT_NONE, 0, tx);
++		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
++		ASSERT(error == 0);
++	} else {
++		sa_obj = 0;
++	}
++	/*
++	 * Create a delete queue.
++	 */
++	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
++
++	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
++	ASSERT(error == 0);
++
++	/*
++	 * Create root znode.  Create minimal znode/inode/zsb/sb
++	 * to allow zfs_mknode to work.
++	 */
++	vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
++	vattr.va_mode = S_IFDIR|0755;
++	vattr.va_uid = crgetuid(cr);
++	vattr.va_gid = crgetgid(cr);
++
++	rootzp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
++	rootzp->z_moved = 0;
++	rootzp->z_unlinked = 0;
++	rootzp->z_atime_dirty = 0;
++	rootzp->z_is_sa = USE_SA(version, os);
++
++	zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_PUSHPAGE);
++	zsb->z_os = os;
++	zsb->z_parent = zsb;
++	zsb->z_version = version;
++	zsb->z_use_fuids = USE_FUIDS(version, os);
++	zsb->z_use_sa = USE_SA(version, os);
++	zsb->z_norm = norm;
++
++	sb = kmem_zalloc(sizeof (struct super_block), KM_PUSHPAGE);
++	sb->s_fs_info = zsb;
++
++	ZTOI(rootzp)->i_sb = sb;
++
++	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
++	    &zsb->z_attr_table);
++
++	ASSERT(error == 0);
++
++	/*
++	 * Fold case on file systems that are always or sometimes case
++	 * insensitive.
++	 */
++	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
++		zsb->z_norm |= U8_TEXTPREP_TOUPPER;
++
++	mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
++	list_create(&zsb->z_all_znodes, sizeof (znode_t),
++	    offsetof(znode_t, z_link_node));
++
++	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
++		mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
++
++	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
++	    cr, NULL, &acl_ids));
++	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
++	ASSERT3P(zp, ==, rootzp);
++	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
++	ASSERT(error == 0);
++	zfs_acl_ids_free(&acl_ids);
++
++	atomic_set(&ZTOI(rootzp)->i_count, 0);
++	sa_handle_destroy(rootzp->z_sa_hdl);
++	kmem_cache_free(znode_cache, rootzp);
++
++	/*
++	 * Create shares directory
++	 */
++	error = zfs_create_share_dir(zsb, tx);
++	ASSERT(error == 0);
++
++	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
++		mutex_destroy(&zsb->z_hold_mtx[i]);
++
++	kmem_free(sb, sizeof (struct super_block));
++	kmem_free(zsb, sizeof (zfs_sb_t));
++}
++#endif /* _KERNEL */
++
++static int
++zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
++{
++	uint64_t sa_obj = 0;
++	int error;
++
++	error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
++	if (error != 0 && error != ENOENT)
++		return (error);
++
++	error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
++	return (error);
++}
++
++static int
++zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
++    dmu_buf_t **db, void *tag)
++{
++	dmu_object_info_t doi;
++	int error;
++
++	if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
++		return (error);
++
++	dmu_object_info_from_db(*db, &doi);
++	if ((doi.doi_bonus_type != DMU_OT_SA &&
++	    doi.doi_bonus_type != DMU_OT_ZNODE) ||
++	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
++	    doi.doi_bonus_size < sizeof (znode_phys_t))) {
++		sa_buf_rele(*db, tag);
++		return (ENOTSUP);
++	}
++
++	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
++	if (error != 0) {
++		sa_buf_rele(*db, tag);
++		return (error);
++	}
++
++	return (0);
++}
++
++void
++zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
++{
++	sa_handle_destroy(hdl);
++	sa_buf_rele(db, tag);
++}
++
++/*
++ * Given an object number, return its parent object number and whether
++ * or not the object is an extended attribute directory.
++ */
++static int
++zfs_obj_to_pobj(sa_handle_t *hdl, sa_attr_type_t *sa_table, uint64_t *pobjp,
++    int *is_xattrdir)
++{
++	uint64_t parent;
++	uint64_t pflags;
++	uint64_t mode;
++	sa_bulk_attr_t bulk[3];
++	int count = 0;
++	int error;
++
++	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
++	    &parent, sizeof (parent));
++	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
++	    &pflags, sizeof (pflags));
++	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
++	    &mode, sizeof (mode));
++
++	if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
++		return (error);
++
++	*pobjp = parent;
++	*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
++
++	return (0);
++}
++
++/*
++ * Given an object number, return some zpl level statistics
++ */
++static int
++zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
++    zfs_stat_t *sb)
++{
++	sa_bulk_attr_t bulk[4];
++	int count = 0;
++
++	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
++	    &sb->zs_mode, sizeof (sb->zs_mode));
++	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
++	    &sb->zs_gen, sizeof (sb->zs_gen));
++	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
++	    &sb->zs_links, sizeof (sb->zs_links));
++	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
++	    &sb->zs_ctime, sizeof (sb->zs_ctime));
++
++	return (sa_bulk_lookup(hdl, bulk, count));
++}
++
++static int
++zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
++    sa_attr_type_t *sa_table, char *buf, int len)
++{
++	sa_handle_t *sa_hdl;
++	sa_handle_t *prevhdl = NULL;
++	dmu_buf_t *prevdb = NULL;
++	dmu_buf_t *sa_db = NULL;
++	char *path = buf + len - 1;
++	int error;
++
++	*path = '\0';
++	sa_hdl = hdl;
++
++	for (;;) {
++		uint64_t pobj;
++		char component[MAXNAMELEN + 2];
++		size_t complen;
++		int is_xattrdir;
++
++		if (prevdb)
++			zfs_release_sa_handle(prevhdl, prevdb, FTAG);
++
++		if ((error = zfs_obj_to_pobj(sa_hdl, sa_table, &pobj,
++		    &is_xattrdir)) != 0)
++			break;
++
++		if (pobj == obj) {
++			if (path[0] != '/')
++				*--path = '/';
++			break;
++		}
++
++		component[0] = '/';
++		if (is_xattrdir) {
++			(void) sprintf(component + 1, "<xattrdir>");
++		} else {
++			error = zap_value_search(osp, pobj, obj,
++			    ZFS_DIRENT_OBJ(-1ULL), component + 1);
++			if (error != 0)
++				break;
++		}
++
++		complen = strlen(component);
++		path -= complen;
++		ASSERT(path >= buf);
++		bcopy(component, path, complen);
++		obj = pobj;
++
++		if (sa_hdl != hdl) {
++			prevhdl = sa_hdl;
++			prevdb = sa_db;
++		}
++		error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
++		if (error != 0) {
++			sa_hdl = prevhdl;
++			sa_db = prevdb;
++			break;
++		}
++	}
++
++	if (sa_hdl != NULL && sa_hdl != hdl) {
++		ASSERT(sa_db != NULL);
++		zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
++	}
++
++	if (error == 0)
++		(void) memmove(buf, path, buf + len - path);
++
++	return (error);
++}
++
++int
++zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
++{
++	sa_attr_type_t *sa_table;
++	sa_handle_t *hdl;
++	dmu_buf_t *db;
++	int error;
++
++	error = zfs_sa_setup(osp, &sa_table);
++	if (error != 0)
++		return (error);
++
++	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
++	if (error != 0)
++		return (error);
++
++	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
++
++	zfs_release_sa_handle(hdl, db, FTAG);
++	return (error);
++}
++
++int
++zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
++    char *buf, int len)
++{
++	char *path = buf + len - 1;
++	sa_attr_type_t *sa_table;
++	sa_handle_t *hdl;
++	dmu_buf_t *db;
++	int error;
++
++	*path = '\0';
++
++	error = zfs_sa_setup(osp, &sa_table);
++	if (error != 0)
++		return (error);
++
++	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
++	if (error != 0)
++		return (error);
++
++	error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
++	if (error != 0) {
++		zfs_release_sa_handle(hdl, db, FTAG);
++		return (error);
++	}
++
++	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
++
++	zfs_release_sa_handle(hdl, db, FTAG);
++	return (error);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++EXPORT_SYMBOL(zfs_create_fs);
++EXPORT_SYMBOL(zfs_obj_to_path);
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zil.c linux-3.2.33-go/fs/zfs/zfs/zil.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zil.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zil.c	2012-11-16 23:25:34.352039300 +0100
+@@ -0,0 +1,2111 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ */
++
++/* Portions Copyright 2010 Robert Milkowski */
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/dmu.h>
++#include <sys/zap.h>
++#include <sys/arc.h>
++#include <sys/stat.h>
++#include <sys/resource.h>
++#include <sys/zil.h>
++#include <sys/zil_impl.h>
++#include <sys/dsl_dataset.h>
++#include <sys/vdev_impl.h>
++#include <sys/dmu_tx.h>
++#include <sys/dsl_pool.h>
++#include <sys/metaslab.h>
++
++/*
++ * The zfs intent log (ZIL) saves transaction records of system calls
++ * that change the file system in memory with enough information
++ * to be able to replay them. These are stored in memory until
++ * either the DMU transaction group (txg) commits them to the stable pool
++ * and they can be discarded, or they are flushed to the stable log
++ * (also in the pool) due to a fsync, O_DSYNC or other synchronous
++ * requirement. In the event of a panic or power fail then those log
++ * records (transactions) are replayed.
++ *
++ * There is one ZIL per file system. Its on-disk (pool) format consists
++ * of 3 parts:
++ *
++ * 	- ZIL header
++ * 	- ZIL blocks
++ * 	- ZIL records
++ *
++ * A log record holds a system call transaction. Log blocks can
++ * hold many log records and the blocks are chained together.
++ * Each ZIL block contains a block pointer (blkptr_t) to the next
++ * ZIL block in the chain. The ZIL header points to the first
++ * block in the chain. Note there is not a fixed place in the pool
++ * to hold blocks. They are dynamically allocated and freed as
++ * needed from the blocks available. Figure X shows the ZIL structure:
++ */
++
++/*
++ * See zil.h for more information about these fields.
++ */
++zil_stats_t zil_stats = {
++	{ "zil_commit_count",              KSTAT_DATA_UINT64 },
++	{ "zil_commit_writer_count",       KSTAT_DATA_UINT64 },
++	{ "zil_itx_count",                 KSTAT_DATA_UINT64 },
++	{ "zil_itx_indirect_count",        KSTAT_DATA_UINT64 },
++	{ "zil_itx_indirect_bytes",        KSTAT_DATA_UINT64 },
++	{ "zil_itx_copied_count",          KSTAT_DATA_UINT64 },
++	{ "zil_itx_copied_bytes",          KSTAT_DATA_UINT64 },
++	{ "zil_itx_needcopy_count",        KSTAT_DATA_UINT64 },
++	{ "zil_itx_needcopy_bytes",        KSTAT_DATA_UINT64 },
++	{ "zil_itx_metaslab_normal_count", KSTAT_DATA_UINT64 },
++	{ "zil_itx_metaslab_normal_bytes", KSTAT_DATA_UINT64 },
++	{ "zil_itx_metaslab_slog_count",   KSTAT_DATA_UINT64 },
++	{ "zil_itx_metaslab_slog_bytes",   KSTAT_DATA_UINT64 },
++};
++
++static kstat_t *zil_ksp;
++
++/*
++ * This global ZIL switch affects all pools
++ */
++int zil_replay_disable = 0;    /* disable intent logging replay */
++
++/*
++ * Tunable parameter for debugging or performance analysis.  Setting
++ * zfs_nocacheflush will cause corruption on power loss if a volatile
++ * out-of-order write cache is enabled.
++ */
++int zfs_nocacheflush = 0;
++
++static kmem_cache_t *zil_lwb_cache;
++
++static void zil_async_to_sync(zilog_t *zilog, uint64_t foid);
++
++#define	LWB_EMPTY(lwb) ((BP_GET_LSIZE(&lwb->lwb_blk) - \
++    sizeof (zil_chain_t)) == (lwb->lwb_sz - lwb->lwb_nused))
++
++
++/*
++ * ziltest is by and large an ugly hack, but very useful in
++ * checking replay without tedious work.
++ * When running ziltest we want to keep all itx's and so maintain
++ * a single list in the zl_itxg[] that uses a high txg: ZILTEST_TXG
++ * We subtract TXG_CONCURRENT_STATES to allow for common code.
++ */
++#define	ZILTEST_TXG (UINT64_MAX - TXG_CONCURRENT_STATES)
++
++static int
++zil_bp_compare(const void *x1, const void *x2)
++{
++	const dva_t *dva1 = &((zil_bp_node_t *)x1)->zn_dva;
++	const dva_t *dva2 = &((zil_bp_node_t *)x2)->zn_dva;
++
++	if (DVA_GET_VDEV(dva1) < DVA_GET_VDEV(dva2))
++		return (-1);
++	if (DVA_GET_VDEV(dva1) > DVA_GET_VDEV(dva2))
++		return (1);
++
++	if (DVA_GET_OFFSET(dva1) < DVA_GET_OFFSET(dva2))
++		return (-1);
++	if (DVA_GET_OFFSET(dva1) > DVA_GET_OFFSET(dva2))
++		return (1);
++
++	return (0);
++}
++
++static void
++zil_bp_tree_init(zilog_t *zilog)
++{
++	avl_create(&zilog->zl_bp_tree, zil_bp_compare,
++	    sizeof (zil_bp_node_t), offsetof(zil_bp_node_t, zn_node));
++}
++
++static void
++zil_bp_tree_fini(zilog_t *zilog)
++{
++	avl_tree_t *t = &zilog->zl_bp_tree;
++	zil_bp_node_t *zn;
++	void *cookie = NULL;
++
++	while ((zn = avl_destroy_nodes(t, &cookie)) != NULL)
++		kmem_free(zn, sizeof (zil_bp_node_t));
++
++	avl_destroy(t);
++}
++
++int
++zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp)
++{
++	avl_tree_t *t = &zilog->zl_bp_tree;
++	const dva_t *dva = BP_IDENTITY(bp);
++	zil_bp_node_t *zn;
++	avl_index_t where;
++
++	if (avl_find(t, dva, &where) != NULL)
++		return (EEXIST);
++
++	zn = kmem_alloc(sizeof (zil_bp_node_t), KM_PUSHPAGE);
++	zn->zn_dva = *dva;
++	avl_insert(t, zn, where);
++
++	return (0);
++}
++
++static zil_header_t *
++zil_header_in_syncing_context(zilog_t *zilog)
++{
++	return ((zil_header_t *)zilog->zl_header);
++}
++
++static void
++zil_init_log_chain(zilog_t *zilog, blkptr_t *bp)
++{
++	zio_cksum_t *zc = &bp->blk_cksum;
++
++	zc->zc_word[ZIL_ZC_GUID_0] = spa_get_random(-1ULL);
++	zc->zc_word[ZIL_ZC_GUID_1] = spa_get_random(-1ULL);
++	zc->zc_word[ZIL_ZC_OBJSET] = dmu_objset_id(zilog->zl_os);
++	zc->zc_word[ZIL_ZC_SEQ] = 1ULL;
++}
++
++/*
++ * Read a log block and make sure it's valid.
++ */
++static int
++zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
++    char **end)
++{
++	enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
++	uint32_t aflags = ARC_WAIT;
++	arc_buf_t *abuf = NULL;
++	zbookmark_t zb;
++	int error;
++
++	if (zilog->zl_header->zh_claim_txg == 0)
++		zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB;
++
++	if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID))
++		zio_flags |= ZIO_FLAG_SPECULATIVE;
++
++	SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET],
++	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
++
++	error = dsl_read_nolock(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf,
++	    ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
++
++	if (error == 0) {
++		zio_cksum_t cksum = bp->blk_cksum;
++
++		/*
++		 * Validate the checksummed log block.
++		 *
++		 * Sequence numbers should be... sequential.  The checksum
++		 * verifier for the next block should be bp's checksum plus 1.
++		 *
++		 * Also check the log chain linkage and size used.
++		 */
++		cksum.zc_word[ZIL_ZC_SEQ]++;
++
++		if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
++			zil_chain_t *zilc = abuf->b_data;
++			char *lr = (char *)(zilc + 1);
++			uint64_t len = zilc->zc_nused - sizeof (zil_chain_t);
++
++			if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
++			    sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) {
++				error = ECKSUM;
++			} else {
++				bcopy(lr, dst, len);
++				*end = (char *)dst + len;
++				*nbp = zilc->zc_next_blk;
++			}
++		} else {
++			char *lr = abuf->b_data;
++			uint64_t size = BP_GET_LSIZE(bp);
++			zil_chain_t *zilc = (zil_chain_t *)(lr + size) - 1;
++
++			if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
++			    sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk) ||
++			    (zilc->zc_nused > (size - sizeof (*zilc)))) {
++				error = ECKSUM;
++			} else {
++				bcopy(lr, dst, zilc->zc_nused);
++				*end = (char *)dst + zilc->zc_nused;
++				*nbp = zilc->zc_next_blk;
++			}
++		}
++
++		VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
++	}
++
++	return (error);
++}
++
++/*
++ * Read a TX_WRITE log data block.
++ */
++static int
++zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
++{
++	enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
++	const blkptr_t *bp = &lr->lr_blkptr;
++	uint32_t aflags = ARC_WAIT;
++	arc_buf_t *abuf = NULL;
++	zbookmark_t zb;
++	int error;
++
++	if (BP_IS_HOLE(bp)) {
++		if (wbuf != NULL)
++			bzero(wbuf, MAX(BP_GET_LSIZE(bp), lr->lr_length));
++		return (0);
++	}
++
++	if (zilog->zl_header->zh_claim_txg == 0)
++		zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB;
++
++	SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid,
++	    ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
++
++	error = arc_read_nolock(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf,
++	    ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
++
++	if (error == 0) {
++		if (wbuf != NULL)
++			bcopy(abuf->b_data, wbuf, arc_buf_size(abuf));
++		(void) arc_buf_remove_ref(abuf, &abuf);
++	}
++
++	return (error);
++}
++
++/*
++ * Parse the intent log, and call parse_func for each valid record within.
++ */
++int
++zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
++    zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg)
++{
++	const zil_header_t *zh = zilog->zl_header;
++	boolean_t claimed = !!zh->zh_claim_txg;
++	uint64_t claim_blk_seq = claimed ? zh->zh_claim_blk_seq : UINT64_MAX;
++	uint64_t claim_lr_seq = claimed ? zh->zh_claim_lr_seq : UINT64_MAX;
++	uint64_t max_blk_seq = 0;
++	uint64_t max_lr_seq = 0;
++	uint64_t blk_count = 0;
++	uint64_t lr_count = 0;
++	blkptr_t blk, next_blk;
++	char *lrbuf, *lrp;
++	int error = 0;
++
++	bzero(&next_blk, sizeof(blkptr_t));
++
++	/*
++	 * Old logs didn't record the maximum zh_claim_lr_seq.
++	 */
++	if (!(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID))
++		claim_lr_seq = UINT64_MAX;
++
++	/*
++	 * Starting at the block pointed to by zh_log we read the log chain.
++	 * For each block in the chain we strongly check that block to
++	 * ensure its validity.  We stop when an invalid block is found.
++	 * For each block pointer in the chain we call parse_blk_func().
++	 * For each record in each valid block we call parse_lr_func().
++	 * If the log has been claimed, stop if we encounter a sequence
++	 * number greater than the highest claimed sequence number.
++	 */
++	lrbuf = zio_buf_alloc(SPA_MAXBLOCKSIZE);
++	zil_bp_tree_init(zilog);
++
++	for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) {
++		uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ];
++		int reclen;
++		char *end = NULL;
++
++		if (blk_seq > claim_blk_seq)
++			break;
++		if ((error = parse_blk_func(zilog, &blk, arg, txg)) != 0)
++			break;
++		ASSERT3U(max_blk_seq, <, blk_seq);
++		max_blk_seq = blk_seq;
++		blk_count++;
++
++		if (max_lr_seq == claim_lr_seq && max_blk_seq == claim_blk_seq)
++			break;
++
++		error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end);
++		if (error)
++			break;
++
++		for (lrp = lrbuf; lrp < end; lrp += reclen) {
++			lr_t *lr = (lr_t *)lrp;
++			reclen = lr->lrc_reclen;
++			ASSERT3U(reclen, >=, sizeof (lr_t));
++			if (lr->lrc_seq > claim_lr_seq)
++				goto done;
++			if ((error = parse_lr_func(zilog, lr, arg, txg)) != 0)
++				goto done;
++			ASSERT3U(max_lr_seq, <, lr->lrc_seq);
++			max_lr_seq = lr->lrc_seq;
++			lr_count++;
++		}
++	}
++done:
++	zilog->zl_parse_error = error;
++	zilog->zl_parse_blk_seq = max_blk_seq;
++	zilog->zl_parse_lr_seq = max_lr_seq;
++	zilog->zl_parse_blk_count = blk_count;
++	zilog->zl_parse_lr_count = lr_count;
++
++	ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) ||
++	    (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq));
++
++	zil_bp_tree_fini(zilog);
++	zio_buf_free(lrbuf, SPA_MAXBLOCKSIZE);
++
++	return (error);
++}
++
++static int
++zil_claim_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t first_txg)
++{
++	/*
++	 * Claim log block if not already committed and not already claimed.
++	 * If tx == NULL, just verify that the block is claimable.
++	 */
++	if (bp->blk_birth < first_txg || zil_bp_tree_add(zilog, bp) != 0)
++		return (0);
++
++	return (zio_wait(zio_claim(NULL, zilog->zl_spa,
++	    tx == NULL ? 0 : first_txg, bp, spa_claim_notify, NULL,
++	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB)));
++}
++
++static int
++zil_claim_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg)
++{
++	lr_write_t *lr = (lr_write_t *)lrc;
++	int error;
++
++	if (lrc->lrc_txtype != TX_WRITE)
++		return (0);
++
++	/*
++	 * If the block is not readable, don't claim it.  This can happen
++	 * in normal operation when a log block is written to disk before
++	 * some of the dmu_sync() blocks it points to.  In this case, the
++	 * transaction cannot have been committed to anyone (we would have
++	 * waited for all writes to be stable first), so it is semantically
++	 * correct to declare this the end of the log.
++	 */
++	if (lr->lr_blkptr.blk_birth >= first_txg &&
++	    (error = zil_read_log_data(zilog, lr, NULL)) != 0)
++		return (error);
++	return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg));
++}
++
++/* ARGSUSED */
++static int
++zil_free_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t claim_txg)
++{
++	zio_free_zil(zilog->zl_spa, dmu_tx_get_txg(tx), bp);
++
++	return (0);
++}
++
++static int
++zil_free_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t claim_txg)
++{
++	lr_write_t *lr = (lr_write_t *)lrc;
++	blkptr_t *bp = &lr->lr_blkptr;
++
++	/*
++	 * If we previously claimed it, we need to free it.
++	 */
++	if (claim_txg != 0 && lrc->lrc_txtype == TX_WRITE &&
++	    bp->blk_birth >= claim_txg && zil_bp_tree_add(zilog, bp) == 0)
++		zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp);
++
++	return (0);
++}
++
++static lwb_t *
++zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, uint64_t txg, boolean_t fastwrite)
++{
++	lwb_t *lwb;
++
++	lwb = kmem_cache_alloc(zil_lwb_cache, KM_PUSHPAGE);
++	lwb->lwb_zilog = zilog;
++	lwb->lwb_blk = *bp;
++	lwb->lwb_fastwrite = fastwrite;
++	lwb->lwb_buf = zio_buf_alloc(BP_GET_LSIZE(bp));
++	lwb->lwb_max_txg = txg;
++	lwb->lwb_zio = NULL;
++	lwb->lwb_tx = NULL;
++	if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
++		lwb->lwb_nused = sizeof (zil_chain_t);
++		lwb->lwb_sz = BP_GET_LSIZE(bp);
++	} else {
++		lwb->lwb_nused = 0;
++		lwb->lwb_sz = BP_GET_LSIZE(bp) - sizeof (zil_chain_t);
++	}
++
++	mutex_enter(&zilog->zl_lock);
++	list_insert_tail(&zilog->zl_lwb_list, lwb);
++	mutex_exit(&zilog->zl_lock);
++
++	return (lwb);
++}
++
++/*
++ * Create an on-disk intent log.
++ */
++static lwb_t *
++zil_create(zilog_t *zilog)
++{
++	const zil_header_t *zh = zilog->zl_header;
++	lwb_t *lwb = NULL;
++	uint64_t txg = 0;
++	dmu_tx_t *tx = NULL;
++	blkptr_t blk;
++	int error = 0;
++	boolean_t fastwrite = FALSE;
++
++	/*
++	 * Wait for any previous destroy to complete.
++	 */
++	txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
++
++	ASSERT(zh->zh_claim_txg == 0);
++	ASSERT(zh->zh_replay_seq == 0);
++
++	blk = zh->zh_log;
++
++	/*
++	 * Allocate an initial log block if:
++	 *    - there isn't one already
++	 *    - the existing block is the wrong endianess
++	 */
++	if (BP_IS_HOLE(&blk) || BP_SHOULD_BYTESWAP(&blk)) {
++		tx = dmu_tx_create(zilog->zl_os);
++		VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0);
++		dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
++		txg = dmu_tx_get_txg(tx);
++
++		if (!BP_IS_HOLE(&blk)) {
++			zio_free_zil(zilog->zl_spa, txg, &blk);
++			BP_ZERO(&blk);
++		}
++
++		error = zio_alloc_zil(zilog->zl_spa, txg, &blk,
++		    ZIL_MIN_BLKSZ, B_TRUE);
++		fastwrite = TRUE;
++
++		if (error == 0)
++			zil_init_log_chain(zilog, &blk);
++	}
++
++	/*
++	 * Allocate a log write buffer (lwb) for the first log block.
++	 */
++	if (error == 0)
++		lwb = zil_alloc_lwb(zilog, &blk, txg, fastwrite);
++
++	/*
++	 * If we just allocated the first log block, commit our transaction
++	 * and wait for zil_sync() to stuff the block poiner into zh_log.
++	 * (zh is part of the MOS, so we cannot modify it in open context.)
++	 */
++	if (tx != NULL) {
++		dmu_tx_commit(tx);
++		txg_wait_synced(zilog->zl_dmu_pool, txg);
++	}
++
++	ASSERT(bcmp(&blk, &zh->zh_log, sizeof (blk)) == 0);
++
++	return (lwb);
++}
++
++/*
++ * In one tx, free all log blocks and clear the log header.
++ * If keep_first is set, then we're replaying a log with no content.
++ * We want to keep the first block, however, so that the first
++ * synchronous transaction doesn't require a txg_wait_synced()
++ * in zil_create().  We don't need to txg_wait_synced() here either
++ * when keep_first is set, because both zil_create() and zil_destroy()
++ * will wait for any in-progress destroys to complete.
++ */
++void
++zil_destroy(zilog_t *zilog, boolean_t keep_first)
++{
++	const zil_header_t *zh = zilog->zl_header;
++	lwb_t *lwb;
++	dmu_tx_t *tx;
++	uint64_t txg;
++
++	/*
++	 * Wait for any previous destroy to complete.
++	 */
++	txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
++
++	zilog->zl_old_header = *zh;		/* debugging aid */
++
++	if (BP_IS_HOLE(&zh->zh_log))
++		return;
++
++	tx = dmu_tx_create(zilog->zl_os);
++	VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0);
++	dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
++	txg = dmu_tx_get_txg(tx);
++
++	mutex_enter(&zilog->zl_lock);
++
++	ASSERT3U(zilog->zl_destroy_txg, <, txg);
++	zilog->zl_destroy_txg = txg;
++	zilog->zl_keep_first = keep_first;
++
++	if (!list_is_empty(&zilog->zl_lwb_list)) {
++		ASSERT(zh->zh_claim_txg == 0);
++		VERIFY(!keep_first);
++		while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
++			ASSERT(lwb->lwb_zio == NULL);
++			if (lwb->lwb_fastwrite)
++				metaslab_fastwrite_unmark(zilog->zl_spa,
++				    &lwb->lwb_blk);
++			list_remove(&zilog->zl_lwb_list, lwb);
++			if (lwb->lwb_buf != NULL)
++				zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
++			zio_free_zil(zilog->zl_spa, txg, &lwb->lwb_blk);
++			kmem_cache_free(zil_lwb_cache, lwb);
++		}
++	} else if (!keep_first) {
++		(void) zil_parse(zilog, zil_free_log_block,
++		    zil_free_log_record, tx, zh->zh_claim_txg);
++	}
++	mutex_exit(&zilog->zl_lock);
++
++	dmu_tx_commit(tx);
++}
++
++int
++zil_claim(const char *osname, void *txarg)
++{
++	dmu_tx_t *tx = txarg;
++	uint64_t first_txg = dmu_tx_get_txg(tx);
++	zilog_t *zilog;
++	zil_header_t *zh;
++	objset_t *os;
++	int error;
++
++	error = dmu_objset_hold(osname, FTAG, &os);
++	if (error) {
++		cmn_err(CE_WARN, "can't open objset for %s", osname);
++		return (0);
++	}
++
++	zilog = dmu_objset_zil(os);
++	zh = zil_header_in_syncing_context(zilog);
++
++	if (spa_get_log_state(zilog->zl_spa) == SPA_LOG_CLEAR) {
++		if (!BP_IS_HOLE(&zh->zh_log))
++			zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log);
++		BP_ZERO(&zh->zh_log);
++		dsl_dataset_dirty(dmu_objset_ds(os), tx);
++		dmu_objset_rele(os, FTAG);
++		return (0);
++	}
++
++	/*
++	 * Claim all log blocks if we haven't already done so, and remember
++	 * the highest claimed sequence number.  This ensures that if we can
++	 * read only part of the log now (e.g. due to a missing device),
++	 * but we can read the entire log later, we will not try to replay
++	 * or destroy beyond the last block we successfully claimed.
++	 */
++	ASSERT3U(zh->zh_claim_txg, <=, first_txg);
++	if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) {
++		(void) zil_parse(zilog, zil_claim_log_block,
++		    zil_claim_log_record, tx, first_txg);
++		zh->zh_claim_txg = first_txg;
++		zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq;
++		zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq;
++		if (zilog->zl_parse_lr_count || zilog->zl_parse_blk_count > 1)
++			zh->zh_flags |= ZIL_REPLAY_NEEDED;
++		zh->zh_flags |= ZIL_CLAIM_LR_SEQ_VALID;
++		dsl_dataset_dirty(dmu_objset_ds(os), tx);
++	}
++
++	ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
++	dmu_objset_rele(os, FTAG);
++	return (0);
++}
++
++/*
++ * Check the log by walking the log chain.
++ * Checksum errors are ok as they indicate the end of the chain.
++ * Any other error (no device or read failure) returns an error.
++ */
++int
++zil_check_log_chain(const char *osname, void *tx)
++{
++	zilog_t *zilog;
++	objset_t *os;
++	blkptr_t *bp;
++	int error;
++
++	ASSERT(tx == NULL);
++
++	error = dmu_objset_hold(osname, FTAG, &os);
++	if (error) {
++		cmn_err(CE_WARN, "can't open objset for %s", osname);
++		return (0);
++	}
++
++	zilog = dmu_objset_zil(os);
++	bp = (blkptr_t *)&zilog->zl_header->zh_log;
++
++	/*
++	 * Check the first block and determine if it's on a log device
++	 * which may have been removed or faulted prior to loading this
++	 * pool.  If so, there's no point in checking the rest of the log
++	 * as its content should have already been synced to the pool.
++	 */
++	if (!BP_IS_HOLE(bp)) {
++		vdev_t *vd;
++		boolean_t valid = B_TRUE;
++
++		spa_config_enter(os->os_spa, SCL_STATE, FTAG, RW_READER);
++		vd = vdev_lookup_top(os->os_spa, DVA_GET_VDEV(&bp->blk_dva[0]));
++		if (vd->vdev_islog && vdev_is_dead(vd))
++			valid = vdev_log_state_valid(vd);
++		spa_config_exit(os->os_spa, SCL_STATE, FTAG);
++
++		if (!valid) {
++			dmu_objset_rele(os, FTAG);
++			return (0);
++		}
++	}
++
++	/*
++	 * Because tx == NULL, zil_claim_log_block() will not actually claim
++	 * any blocks, but just determine whether it is possible to do so.
++	 * In addition to checking the log chain, zil_claim_log_block()
++	 * will invoke zio_claim() with a done func of spa_claim_notify(),
++	 * which will update spa_max_claim_txg.  See spa_load() for details.
++	 */
++	error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx,
++	    zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa));
++
++	dmu_objset_rele(os, FTAG);
++
++	return ((error == ECKSUM || error == ENOENT) ? 0 : error);
++}
++
++static int
++zil_vdev_compare(const void *x1, const void *x2)
++{
++	const uint64_t v1 = ((zil_vdev_node_t *)x1)->zv_vdev;
++	const uint64_t v2 = ((zil_vdev_node_t *)x2)->zv_vdev;
++
++	if (v1 < v2)
++		return (-1);
++	if (v1 > v2)
++		return (1);
++
++	return (0);
++}
++
++void
++zil_add_block(zilog_t *zilog, const blkptr_t *bp)
++{
++	avl_tree_t *t = &zilog->zl_vdev_tree;
++	avl_index_t where;
++	zil_vdev_node_t *zv, zvsearch;
++	int ndvas = BP_GET_NDVAS(bp);
++	int i;
++
++	if (zfs_nocacheflush)
++		return;
++
++	ASSERT(zilog->zl_writer);
++
++	/*
++	 * Even though we're zl_writer, we still need a lock because the
++	 * zl_get_data() callbacks may have dmu_sync() done callbacks
++	 * that will run concurrently.
++	 */
++	mutex_enter(&zilog->zl_vdev_lock);
++	for (i = 0; i < ndvas; i++) {
++		zvsearch.zv_vdev = DVA_GET_VDEV(&bp->blk_dva[i]);
++		if (avl_find(t, &zvsearch, &where) == NULL) {
++			zv = kmem_alloc(sizeof (*zv), KM_PUSHPAGE);
++			zv->zv_vdev = zvsearch.zv_vdev;
++			avl_insert(t, zv, where);
++		}
++	}
++	mutex_exit(&zilog->zl_vdev_lock);
++}
++
++static void
++zil_flush_vdevs(zilog_t *zilog)
++{
++	spa_t *spa = zilog->zl_spa;
++	avl_tree_t *t = &zilog->zl_vdev_tree;
++	void *cookie = NULL;
++	zil_vdev_node_t *zv;
++	zio_t *zio;
++
++	ASSERT(zilog->zl_writer);
++
++	/*
++	 * We don't need zl_vdev_lock here because we're the zl_writer,
++	 * and all zl_get_data() callbacks are done.
++	 */
++	if (avl_numnodes(t) == 0)
++		return;
++
++	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
++
++	zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
++
++	while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) {
++		vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev);
++		if (vd != NULL)
++			zio_flush(zio, vd);
++		kmem_free(zv, sizeof (*zv));
++	}
++
++	/*
++	 * Wait for all the flushes to complete.  Not all devices actually
++	 * support the DKIOCFLUSHWRITECACHE ioctl, so it's OK if it fails.
++	 */
++	(void) zio_wait(zio);
++
++	spa_config_exit(spa, SCL_STATE, FTAG);
++}
++
++/*
++ * Function called when a log block write completes
++ */
++static void
++zil_lwb_write_done(zio_t *zio)
++{
++	lwb_t *lwb = zio->io_private;
++	zilog_t *zilog = lwb->lwb_zilog;
++	dmu_tx_t *tx = lwb->lwb_tx;
++
++	ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF);
++	ASSERT(BP_GET_TYPE(zio->io_bp) == DMU_OT_INTENT_LOG);
++	ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
++	ASSERT(BP_GET_BYTEORDER(zio->io_bp) == ZFS_HOST_BYTEORDER);
++	ASSERT(!BP_IS_GANG(zio->io_bp));
++	ASSERT(!BP_IS_HOLE(zio->io_bp));
++	ASSERT(zio->io_bp->blk_fill == 0);
++
++	/*
++	 * Ensure the lwb buffer pointer is cleared before releasing
++	 * the txg. If we have had an allocation failure and
++	 * the txg is waiting to sync then we want want zil_sync()
++	 * to remove the lwb so that it's not picked up as the next new
++	 * one in zil_commit_writer(). zil_sync() will only remove
++	 * the lwb if lwb_buf is null.
++	 */
++	zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
++	mutex_enter(&zilog->zl_lock);
++	lwb->lwb_zio = NULL;
++	lwb->lwb_fastwrite = FALSE;
++	lwb->lwb_buf = NULL;
++	lwb->lwb_tx = NULL;
++	mutex_exit(&zilog->zl_lock);
++
++	/*
++	 * Now that we've written this log block, we have a stable pointer
++	 * to the next block in the chain, so it's OK to let the txg in
++	 * which we allocated the next block sync.
++	 */
++	dmu_tx_commit(tx);
++}
++
++/*
++ * Initialize the io for a log block.
++ */
++static void
++zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb)
++{
++	zbookmark_t zb;
++
++	SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET],
++	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
++	    lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);
++
++	if (zilog->zl_root_zio == NULL) {
++		zilog->zl_root_zio = zio_root(zilog->zl_spa, NULL, NULL,
++		    ZIO_FLAG_CANFAIL);
++	}
++
++	/* Lock so zil_sync() doesn't fastwrite_unmark after zio is created */
++	mutex_enter(&zilog->zl_lock);
++	if (lwb->lwb_zio == NULL) {
++		if (!lwb->lwb_fastwrite) {
++			metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
++			lwb->lwb_fastwrite = 1;
++		}
++		lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa,
++		    0, &lwb->lwb_blk, lwb->lwb_buf, BP_GET_LSIZE(&lwb->lwb_blk),
++		    zil_lwb_write_done, lwb, ZIO_PRIORITY_LOG_WRITE,
++		    ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE |
++		    ZIO_FLAG_FASTWRITE, &zb);
++	}
++	mutex_exit(&zilog->zl_lock);
++}
++
++/*
++ * Define a limited set of intent log block sizes.
++ * These must be a multiple of 4KB. Note only the amount used (again
++ * aligned to 4KB) actually gets written. However, we can't always just
++ * allocate SPA_MAXBLOCKSIZE as the slog space could be exhausted.
++ */
++uint64_t zil_block_buckets[] = {
++    4096,		/* non TX_WRITE */
++    8192+4096,		/* data base */
++    32*1024 + 4096, 	/* NFS writes */
++    UINT64_MAX
++};
++
++/*
++ * Use the slog as long as the current commit size is less than the
++ * limit or the total list size is less than 2X the limit.  Limit
++ * checking is disabled by setting zil_slog_limit to UINT64_MAX.
++ */
++unsigned long zil_slog_limit = 1024 * 1024;
++#define	USE_SLOG(zilog) (((zilog)->zl_cur_used < zil_slog_limit) || \
++	((zilog)->zl_itx_list_sz < (zil_slog_limit << 1)))
++
++/*
++ * Start a log block write and advance to the next log block.
++ * Calls are serialized.
++ */
++static lwb_t *
++zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
++{
++	lwb_t *nlwb = NULL;
++	zil_chain_t *zilc;
++	spa_t *spa = zilog->zl_spa;
++	blkptr_t *bp;
++	dmu_tx_t *tx;
++	uint64_t txg;
++	uint64_t zil_blksz, wsz;
++	int i, error;
++	boolean_t use_slog;
++
++	if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
++		zilc = (zil_chain_t *)lwb->lwb_buf;
++		bp = &zilc->zc_next_blk;
++	} else {
++		zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
++		bp = &zilc->zc_next_blk;
++	}
++
++	ASSERT(lwb->lwb_nused <= lwb->lwb_sz);
++
++	/*
++	 * Allocate the next block and save its address in this block
++	 * before writing it in order to establish the log chain.
++	 * Note that if the allocation of nlwb synced before we wrote
++	 * the block that points at it (lwb), we'd leak it if we crashed.
++	 * Therefore, we don't do dmu_tx_commit() until zil_lwb_write_done().
++	 * We dirty the dataset to ensure that zil_sync() will be called
++	 * to clean up in the event of allocation failure or I/O failure.
++	 */
++	tx = dmu_tx_create(zilog->zl_os);
++	VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0);
++	dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
++	txg = dmu_tx_get_txg(tx);
++
++	lwb->lwb_tx = tx;
++
++	/*
++	 * Log blocks are pre-allocated. Here we select the size of the next
++	 * block, based on size used in the last block.
++	 * - first find the smallest bucket that will fit the block from a
++	 *   limited set of block sizes. This is because it's faster to write
++	 *   blocks allocated from the same metaslab as they are adjacent or
++	 *   close.
++	 * - next find the maximum from the new suggested size and an array of
++	 *   previous sizes. This lessens a picket fence effect of wrongly
++	 *   guesssing the size if we have a stream of say 2k, 64k, 2k, 64k
++	 *   requests.
++	 *
++	 * Note we only write what is used, but we can't just allocate
++	 * the maximum block size because we can exhaust the available
++	 * pool log space.
++	 */
++	zil_blksz = zilog->zl_cur_used + sizeof (zil_chain_t);
++	for (i = 0; zil_blksz > zil_block_buckets[i]; i++)
++		continue;
++	zil_blksz = zil_block_buckets[i];
++	if (zil_blksz == UINT64_MAX)
++		zil_blksz = SPA_MAXBLOCKSIZE;
++	zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz;
++	for (i = 0; i < ZIL_PREV_BLKS; i++)
++		zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]);
++	zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1);
++
++	BP_ZERO(bp);
++	use_slog = USE_SLOG(zilog);
++	error = zio_alloc_zil(spa, txg, bp, zil_blksz, USE_SLOG(zilog));
++	if (use_slog)
++	{
++		ZIL_STAT_BUMP(zil_itx_metaslab_slog_count);
++		ZIL_STAT_INCR(zil_itx_metaslab_slog_bytes, lwb->lwb_nused);
++	}
++	else
++	{
++		ZIL_STAT_BUMP(zil_itx_metaslab_normal_count);
++		ZIL_STAT_INCR(zil_itx_metaslab_normal_bytes, lwb->lwb_nused);
++	}
++	if (!error) {
++		ASSERT3U(bp->blk_birth, ==, txg);
++		bp->blk_cksum = lwb->lwb_blk.blk_cksum;
++		bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++;
++
++		/*
++		 * Allocate a new log write buffer (lwb).
++		 */
++		nlwb = zil_alloc_lwb(zilog, bp, txg, TRUE);
++
++		/* Record the block for later vdev flushing */
++		zil_add_block(zilog, &lwb->lwb_blk);
++	}
++
++	if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
++		/* For Slim ZIL only write what is used. */
++		wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, uint64_t);
++		ASSERT3U(wsz, <=, lwb->lwb_sz);
++		zio_shrink(lwb->lwb_zio, wsz);
++
++	} else {
++		wsz = lwb->lwb_sz;
++	}
++
++	zilc->zc_pad = 0;
++	zilc->zc_nused = lwb->lwb_nused;
++	zilc->zc_eck.zec_cksum = lwb->lwb_blk.blk_cksum;
++
++	/*
++	 * clear unused data for security
++	 */
++	bzero(lwb->lwb_buf + lwb->lwb_nused, wsz - lwb->lwb_nused);
++
++	zio_nowait(lwb->lwb_zio); /* Kick off the write for the old log block */
++
++	/*
++	 * If there was an allocation failure then nlwb will be null which
++	 * forces a txg_wait_synced().
++	 */
++	return (nlwb);
++}
++
++static lwb_t *
++zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
++{
++	lr_t *lrc = &itx->itx_lr; /* common log record */
++	lr_write_t *lrw = (lr_write_t *)lrc;
++	char *lr_buf;
++	uint64_t txg = lrc->lrc_txg;
++	uint64_t reclen = lrc->lrc_reclen;
++	uint64_t dlen = 0;
++
++	if (lwb == NULL)
++		return (NULL);
++
++	ASSERT(lwb->lwb_buf != NULL);
++
++	if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY)
++		dlen = P2ROUNDUP_TYPED(
++		    lrw->lr_length, sizeof (uint64_t), uint64_t);
++
++	zilog->zl_cur_used += (reclen + dlen);
++
++	zil_lwb_write_init(zilog, lwb);
++
++	/*
++	 * If this record won't fit in the current log block, start a new one.
++	 */
++	if (lwb->lwb_nused + reclen + dlen > lwb->lwb_sz) {
++		lwb = zil_lwb_write_start(zilog, lwb);
++		if (lwb == NULL)
++			return (NULL);
++		zil_lwb_write_init(zilog, lwb);
++		ASSERT(LWB_EMPTY(lwb));
++		if (lwb->lwb_nused + reclen + dlen > lwb->lwb_sz) {
++			txg_wait_synced(zilog->zl_dmu_pool, txg);
++			return (lwb);
++		}
++	}
++
++	lr_buf = lwb->lwb_buf + lwb->lwb_nused;
++	bcopy(lrc, lr_buf, reclen);
++	lrc = (lr_t *)lr_buf;
++	lrw = (lr_write_t *)lrc;
++
++	ZIL_STAT_BUMP(zil_itx_count);
++
++	/*
++	 * If it's a write, fetch the data or get its blkptr as appropriate.
++	 */
++	if (lrc->lrc_txtype == TX_WRITE) {
++		if (txg > spa_freeze_txg(zilog->zl_spa))
++			txg_wait_synced(zilog->zl_dmu_pool, txg);
++		if (itx->itx_wr_state == WR_COPIED) {
++			ZIL_STAT_BUMP(zil_itx_copied_count);
++			ZIL_STAT_INCR(zil_itx_copied_bytes, lrw->lr_length);
++		} else {
++			char *dbuf;
++			int error;
++
++			if (dlen) {
++				ASSERT(itx->itx_wr_state == WR_NEED_COPY);
++				dbuf = lr_buf + reclen;
++				lrw->lr_common.lrc_reclen += dlen;
++				ZIL_STAT_BUMP(zil_itx_needcopy_count);
++				ZIL_STAT_INCR(zil_itx_needcopy_bytes, lrw->lr_length);
++			} else {
++				ASSERT(itx->itx_wr_state == WR_INDIRECT);
++				dbuf = NULL;
++				ZIL_STAT_BUMP(zil_itx_indirect_count);
++				ZIL_STAT_INCR(zil_itx_indirect_bytes, lrw->lr_length);
++			}
++			error = zilog->zl_get_data(
++			    itx->itx_private, lrw, dbuf, lwb->lwb_zio);
++			if (error == EIO) {
++				txg_wait_synced(zilog->zl_dmu_pool, txg);
++				return (lwb);
++			}
++			if (error) {
++				ASSERT(error == ENOENT || error == EEXIST ||
++				    error == EALREADY);
++				return (lwb);
++			}
++		}
++	}
++
++	/*
++	 * We're actually making an entry, so update lrc_seq to be the
++	 * log record sequence number.  Note that this is generally not
++	 * equal to the itx sequence number because not all transactions
++	 * are synchronous, and sometimes spa_sync() gets there first.
++	 */
++	lrc->lrc_seq = ++zilog->zl_lr_seq; /* we are single threaded */
++	lwb->lwb_nused += reclen + dlen;
++	lwb->lwb_max_txg = MAX(lwb->lwb_max_txg, txg);
++	ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz);
++	ASSERT3U(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)), ==, 0);
++
++	return (lwb);
++}
++
++itx_t *
++zil_itx_create(uint64_t txtype, size_t lrsize)
++{
++	itx_t *itx;
++
++	lrsize = P2ROUNDUP_TYPED(lrsize, sizeof (uint64_t), size_t);
++
++	itx = kmem_alloc(offsetof(itx_t, itx_lr) + lrsize,
++	    KM_PUSHPAGE | KM_NODEBUG);
++	itx->itx_lr.lrc_txtype = txtype;
++	itx->itx_lr.lrc_reclen = lrsize;
++	itx->itx_sod = lrsize; /* if write & WR_NEED_COPY will be increased */
++	itx->itx_lr.lrc_seq = 0;	/* defensive */
++	itx->itx_sync = B_TRUE;		/* default is synchronous */
++
++	return (itx);
++}
++
++void
++zil_itx_destroy(itx_t *itx)
++{
++	kmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen);
++}
++
++/*
++ * Free up the sync and async itxs. The itxs_t has already been detached
++ * so no locks are needed.
++ */
++static void
++zil_itxg_clean(itxs_t *itxs)
++{
++	itx_t *itx;
++	list_t *list;
++	avl_tree_t *t;
++	void *cookie;
++	itx_async_node_t *ian;
++
++	list = &itxs->i_sync_list;
++	while ((itx = list_head(list)) != NULL) {
++		list_remove(list, itx);
++		kmem_free(itx, offsetof(itx_t, itx_lr) +
++		    itx->itx_lr.lrc_reclen);
++	}
++
++	cookie = NULL;
++	t = &itxs->i_async_tree;
++	while ((ian = avl_destroy_nodes(t, &cookie)) != NULL) {
++		list = &ian->ia_list;
++		while ((itx = list_head(list)) != NULL) {
++			list_remove(list, itx);
++			kmem_free(itx, offsetof(itx_t, itx_lr) +
++			    itx->itx_lr.lrc_reclen);
++		}
++		list_destroy(list);
++		kmem_free(ian, sizeof (itx_async_node_t));
++	}
++	avl_destroy(t);
++
++	kmem_free(itxs, sizeof (itxs_t));
++}
++
++static int
++zil_aitx_compare(const void *x1, const void *x2)
++{
++	const uint64_t o1 = ((itx_async_node_t *)x1)->ia_foid;
++	const uint64_t o2 = ((itx_async_node_t *)x2)->ia_foid;
++
++	if (o1 < o2)
++		return (-1);
++	if (o1 > o2)
++		return (1);
++
++	return (0);
++}
++
++/*
++ * Remove all async itx with the given oid.
++ */
++static void
++zil_remove_async(zilog_t *zilog, uint64_t oid)
++{
++	uint64_t otxg, txg;
++	itx_async_node_t *ian;
++	avl_tree_t *t;
++	avl_index_t where;
++	list_t clean_list;
++	itx_t *itx;
++
++	ASSERT(oid != 0);
++	list_create(&clean_list, sizeof (itx_t), offsetof(itx_t, itx_node));
++
++	if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */
++		otxg = ZILTEST_TXG;
++	else
++		otxg = spa_last_synced_txg(zilog->zl_spa) + 1;
++
++	for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) {
++		itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK];
++
++		mutex_enter(&itxg->itxg_lock);
++		if (itxg->itxg_txg != txg) {
++			mutex_exit(&itxg->itxg_lock);
++			continue;
++		}
++
++		/*
++		 * Locate the object node and append its list.
++		 */
++		t = &itxg->itxg_itxs->i_async_tree;
++		ian = avl_find(t, &oid, &where);
++		if (ian != NULL)
++			list_move_tail(&clean_list, &ian->ia_list);
++		mutex_exit(&itxg->itxg_lock);
++	}
++	while ((itx = list_head(&clean_list)) != NULL) {
++		list_remove(&clean_list, itx);
++		kmem_free(itx, offsetof(itx_t, itx_lr) +
++		    itx->itx_lr.lrc_reclen);
++	}
++	list_destroy(&clean_list);
++}
++
++void
++zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
++{
++	uint64_t txg;
++	itxg_t *itxg;
++	itxs_t *itxs, *clean = NULL;
++
++	/*
++	 * Object ids can be re-instantiated in the next txg so
++	 * remove any async transactions to avoid future leaks.
++	 * This can happen if a fsync occurs on the re-instantiated
++	 * object for a WR_INDIRECT or WR_NEED_COPY write, which gets
++	 * the new file data and flushes a write record for the old object.
++	 */
++	if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_REMOVE)
++		zil_remove_async(zilog, itx->itx_oid);
++
++	/*
++	 * Ensure the data of a renamed file is committed before the rename.
++	 */
++	if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_RENAME)
++		zil_async_to_sync(zilog, itx->itx_oid);
++
++	if (spa_freeze_txg(zilog->zl_spa) !=  UINT64_MAX)
++		txg = ZILTEST_TXG;
++	else
++		txg = dmu_tx_get_txg(tx);
++
++	itxg = &zilog->zl_itxg[txg & TXG_MASK];
++	mutex_enter(&itxg->itxg_lock);
++	itxs = itxg->itxg_itxs;
++	if (itxg->itxg_txg != txg) {
++		if (itxs != NULL) {
++			/*
++			 * The zil_clean callback hasn't got around to cleaning
++			 * this itxg. Save the itxs for release below.
++			 * This should be rare.
++			 */
++			atomic_add_64(&zilog->zl_itx_list_sz, -itxg->itxg_sod);
++			itxg->itxg_sod = 0;
++			clean = itxg->itxg_itxs;
++		}
++		ASSERT(itxg->itxg_sod == 0);
++		itxg->itxg_txg = txg;
++		itxs = itxg->itxg_itxs = kmem_zalloc(sizeof (itxs_t), KM_PUSHPAGE);
++
++		list_create(&itxs->i_sync_list, sizeof (itx_t),
++		    offsetof(itx_t, itx_node));
++		avl_create(&itxs->i_async_tree, zil_aitx_compare,
++		    sizeof (itx_async_node_t),
++		    offsetof(itx_async_node_t, ia_node));
++	}
++	if (itx->itx_sync) {
++		list_insert_tail(&itxs->i_sync_list, itx);
++		atomic_add_64(&zilog->zl_itx_list_sz, itx->itx_sod);
++		itxg->itxg_sod += itx->itx_sod;
++	} else {
++		avl_tree_t *t = &itxs->i_async_tree;
++		uint64_t foid = ((lr_ooo_t *)&itx->itx_lr)->lr_foid;
++		itx_async_node_t *ian;
++		avl_index_t where;
++
++		ian = avl_find(t, &foid, &where);
++		if (ian == NULL) {
++			ian = kmem_alloc(sizeof (itx_async_node_t), KM_PUSHPAGE);
++			list_create(&ian->ia_list, sizeof (itx_t),
++			    offsetof(itx_t, itx_node));
++			ian->ia_foid = foid;
++			avl_insert(t, ian, where);
++		}
++		list_insert_tail(&ian->ia_list, itx);
++	}
++
++	itx->itx_lr.lrc_txg = dmu_tx_get_txg(tx);
++	mutex_exit(&itxg->itxg_lock);
++
++	/* Release the old itxs now we've dropped the lock */
++	if (clean != NULL)
++		zil_itxg_clean(clean);
++}
++
++/*
++ * If there are any in-memory intent log transactions which have now been
++ * synced then start up a taskq to free them.
++ */
++void
++zil_clean(zilog_t *zilog, uint64_t synced_txg)
++{
++	itxg_t *itxg = &zilog->zl_itxg[synced_txg & TXG_MASK];
++	itxs_t *clean_me;
++
++	mutex_enter(&itxg->itxg_lock);
++	if (itxg->itxg_itxs == NULL || itxg->itxg_txg == ZILTEST_TXG) {
++		mutex_exit(&itxg->itxg_lock);
++		return;
++	}
++	ASSERT3U(itxg->itxg_txg, <=, synced_txg);
++	ASSERT(itxg->itxg_txg != 0);
++	ASSERT(zilog->zl_clean_taskq != NULL);
++	atomic_add_64(&zilog->zl_itx_list_sz, -itxg->itxg_sod);
++	itxg->itxg_sod = 0;
++	clean_me = itxg->itxg_itxs;
++	itxg->itxg_itxs = NULL;
++	itxg->itxg_txg = 0;
++	mutex_exit(&itxg->itxg_lock);
++	/*
++	 * Preferably start a task queue to free up the old itxs but
++	 * if taskq_dispatch can't allocate resources to do that then
++	 * free it in-line. This should be rare. Note, using TQ_SLEEP
++	 * created a bad performance problem.
++	 */
++	if (taskq_dispatch(zilog->zl_clean_taskq,
++	    (void (*)(void *))zil_itxg_clean, clean_me, TQ_NOSLEEP) == 0)
++		zil_itxg_clean(clean_me);
++}
++
++/*
++ * Get the list of itxs to commit into zl_itx_commit_list.
++ */
++static void
++zil_get_commit_list(zilog_t *zilog)
++{
++	uint64_t otxg, txg;
++	list_t *commit_list = &zilog->zl_itx_commit_list;
++	uint64_t push_sod = 0;
++
++	if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */
++		otxg = ZILTEST_TXG;
++	else
++		otxg = spa_last_synced_txg(zilog->zl_spa) + 1;
++
++	for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) {
++		itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK];
++
++		mutex_enter(&itxg->itxg_lock);
++		if (itxg->itxg_txg != txg) {
++			mutex_exit(&itxg->itxg_lock);
++			continue;
++		}
++
++		list_move_tail(commit_list, &itxg->itxg_itxs->i_sync_list);
++		push_sod += itxg->itxg_sod;
++		itxg->itxg_sod = 0;
++
++		mutex_exit(&itxg->itxg_lock);
++	}
++	atomic_add_64(&zilog->zl_itx_list_sz, -push_sod);
++}
++
++/*
++ * Move the async itxs for a specified object to commit into sync lists.
++ */
++static void
++zil_async_to_sync(zilog_t *zilog, uint64_t foid)
++{
++	uint64_t otxg, txg;
++	itx_async_node_t *ian;
++	avl_tree_t *t;
++	avl_index_t where;
++
++	if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */
++		otxg = ZILTEST_TXG;
++	else
++		otxg = spa_last_synced_txg(zilog->zl_spa) + 1;
++
++	for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) {
++		itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK];
++
++		mutex_enter(&itxg->itxg_lock);
++		if (itxg->itxg_txg != txg) {
++			mutex_exit(&itxg->itxg_lock);
++			continue;
++		}
++
++		/*
++		 * If a foid is specified then find that node and append its
++		 * list. Otherwise walk the tree appending all the lists
++		 * to the sync list. We add to the end rather than the
++		 * beginning to ensure the create has happened.
++		 */
++		t = &itxg->itxg_itxs->i_async_tree;
++		if (foid != 0) {
++			ian = avl_find(t, &foid, &where);
++			if (ian != NULL) {
++				list_move_tail(&itxg->itxg_itxs->i_sync_list,
++				    &ian->ia_list);
++			}
++		} else {
++			void *cookie = NULL;
++
++			while ((ian = avl_destroy_nodes(t, &cookie)) != NULL) {
++				list_move_tail(&itxg->itxg_itxs->i_sync_list,
++				    &ian->ia_list);
++				list_destroy(&ian->ia_list);
++				kmem_free(ian, sizeof (itx_async_node_t));
++			}
++		}
++		mutex_exit(&itxg->itxg_lock);
++	}
++}
++
++static void
++zil_commit_writer(zilog_t *zilog)
++{
++	uint64_t txg;
++	itx_t *itx;
++	lwb_t *lwb;
++	spa_t *spa = zilog->zl_spa;
++	int error = 0;
++
++	ASSERT(zilog->zl_root_zio == NULL);
++
++	mutex_exit(&zilog->zl_lock);
++
++	zil_get_commit_list(zilog);
++
++	/*
++	 * Return if there's nothing to commit before we dirty the fs by
++	 * calling zil_create().
++	 */
++	if (list_head(&zilog->zl_itx_commit_list) == NULL) {
++		mutex_enter(&zilog->zl_lock);
++		return;
++	}
++
++	if (zilog->zl_suspend) {
++		lwb = NULL;
++	} else {
++		lwb = list_tail(&zilog->zl_lwb_list);
++		if (lwb == NULL)
++			lwb = zil_create(zilog);
++	}
++
++	DTRACE_PROBE1(zil__cw1, zilog_t *, zilog);
++	while ((itx = list_head(&zilog->zl_itx_commit_list))) {
++		txg = itx->itx_lr.lrc_txg;
++		ASSERT(txg);
++
++		if (txg > spa_last_synced_txg(spa) || txg > spa_freeze_txg(spa))
++			lwb = zil_lwb_commit(zilog, itx, lwb);
++		list_remove(&zilog->zl_itx_commit_list, itx);
++		kmem_free(itx, offsetof(itx_t, itx_lr)
++		    + itx->itx_lr.lrc_reclen);
++	}
++	DTRACE_PROBE1(zil__cw2, zilog_t *, zilog);
++
++	/* write the last block out */
++	if (lwb != NULL && lwb->lwb_zio != NULL)
++		lwb = zil_lwb_write_start(zilog, lwb);
++
++	zilog->zl_cur_used = 0;
++
++	/*
++	 * Wait if necessary for the log blocks to be on stable storage.
++	 */
++	if (zilog->zl_root_zio) {
++		error = zio_wait(zilog->zl_root_zio);
++		zilog->zl_root_zio = NULL;
++		zil_flush_vdevs(zilog);
++	}
++
++	if (error || lwb == NULL)
++		txg_wait_synced(zilog->zl_dmu_pool, 0);
++
++	mutex_enter(&zilog->zl_lock);
++
++	/*
++	 * Remember the highest committed log sequence number for ztest.
++	 * We only update this value when all the log writes succeeded,
++	 * because ztest wants to ASSERT that it got the whole log chain.
++	 */
++	if (error == 0 && lwb != NULL)
++		zilog->zl_commit_lr_seq = zilog->zl_lr_seq;
++}
++
++/*
++ * Commit zfs transactions to stable storage.
++ * If foid is 0 push out all transactions, otherwise push only those
++ * for that object or might reference that object.
++ *
++ * itxs are committed in batches. In a heavily stressed zil there will be
++ * a commit writer thread who is writing out a bunch of itxs to the log
++ * for a set of committing threads (cthreads) in the same batch as the writer.
++ * Those cthreads are all waiting on the same cv for that batch.
++ *
++ * There will also be a different and growing batch of threads that are
++ * waiting to commit (qthreads). When the committing batch completes
++ * a transition occurs such that the cthreads exit and the qthreads become
++ * cthreads. One of the new cthreads becomes the writer thread for the
++ * batch. Any new threads arriving become new qthreads.
++ *
++ * Only 2 condition variables are needed and there's no transition
++ * between the two cvs needed. They just flip-flop between qthreads
++ * and cthreads.
++ *
++ * Using this scheme we can efficiently wakeup up only those threads
++ * that have been committed.
++ */
++void
++zil_commit(zilog_t *zilog, uint64_t foid)
++{
++	uint64_t mybatch;
++
++	if (zilog->zl_sync == ZFS_SYNC_DISABLED)
++		return;
++
++	ZIL_STAT_BUMP(zil_commit_count);
++
++	/* move the async itxs for the foid to the sync queues */
++	zil_async_to_sync(zilog, foid);
++
++	mutex_enter(&zilog->zl_lock);
++	mybatch = zilog->zl_next_batch;
++	while (zilog->zl_writer) {
++		cv_wait(&zilog->zl_cv_batch[mybatch & 1], &zilog->zl_lock);
++		if (mybatch <= zilog->zl_com_batch) {
++			mutex_exit(&zilog->zl_lock);
++			return;
++		}
++	}
++
++	zilog->zl_next_batch++;
++	zilog->zl_writer = B_TRUE;
++	ZIL_STAT_BUMP(zil_commit_writer_count);
++	zil_commit_writer(zilog);
++	zilog->zl_com_batch = mybatch;
++	zilog->zl_writer = B_FALSE;
++
++	/* wake up one thread to become the next writer */
++	cv_signal(&zilog->zl_cv_batch[(mybatch+1) & 1]);
++
++	/* wake up all threads waiting for this batch to be committed */
++	cv_broadcast(&zilog->zl_cv_batch[mybatch & 1]);
++
++	mutex_exit(&zilog->zl_lock);
++}
++
++/*
++ * Called in syncing context to free committed log blocks and update log header.
++ */
++void
++zil_sync(zilog_t *zilog, dmu_tx_t *tx)
++{
++	zil_header_t *zh = zil_header_in_syncing_context(zilog);
++	uint64_t txg = dmu_tx_get_txg(tx);
++	spa_t *spa = zilog->zl_spa;
++	uint64_t *replayed_seq = &zilog->zl_replayed_seq[txg & TXG_MASK];
++	lwb_t *lwb;
++
++	/*
++	 * We don't zero out zl_destroy_txg, so make sure we don't try
++	 * to destroy it twice.
++	 */
++	if (spa_sync_pass(spa) != 1)
++		return;
++
++	mutex_enter(&zilog->zl_lock);
++
++	ASSERT(zilog->zl_stop_sync == 0);
++
++	if (*replayed_seq != 0) {
++		ASSERT(zh->zh_replay_seq < *replayed_seq);
++		zh->zh_replay_seq = *replayed_seq;
++		*replayed_seq = 0;
++	}
++
++	if (zilog->zl_destroy_txg == txg) {
++		blkptr_t blk = zh->zh_log;
++
++		ASSERT(list_head(&zilog->zl_lwb_list) == NULL);
++
++		bzero(zh, sizeof (zil_header_t));
++		bzero(zilog->zl_replayed_seq, sizeof (zilog->zl_replayed_seq));
++
++		if (zilog->zl_keep_first) {
++			/*
++			 * If this block was part of log chain that couldn't
++			 * be claimed because a device was missing during
++			 * zil_claim(), but that device later returns,
++			 * then this block could erroneously appear valid.
++			 * To guard against this, assign a new GUID to the new
++			 * log chain so it doesn't matter what blk points to.
++			 */
++			zil_init_log_chain(zilog, &blk);
++			zh->zh_log = blk;
++		}
++	}
++
++	while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
++		zh->zh_log = lwb->lwb_blk;
++		if (lwb->lwb_buf != NULL || lwb->lwb_max_txg > txg)
++			break;
++
++		ASSERT(lwb->lwb_zio == NULL);
++
++		list_remove(&zilog->zl_lwb_list, lwb);
++		zio_free_zil(spa, txg, &lwb->lwb_blk);
++		kmem_cache_free(zil_lwb_cache, lwb);
++
++		/*
++		 * If we don't have anything left in the lwb list then
++		 * we've had an allocation failure and we need to zero
++		 * out the zil_header blkptr so that we don't end
++		 * up freeing the same block twice.
++		 */
++		if (list_head(&zilog->zl_lwb_list) == NULL)
++			BP_ZERO(&zh->zh_log);
++	}
++
++	/*
++	 * Remove fastwrite on any blocks that have been pre-allocated for
++	 * the next commit. This prevents fastwrite counter pollution by
++	 * unused, long-lived LWBs.
++	 */
++	for (; lwb != NULL; lwb = list_next(&zilog->zl_lwb_list, lwb)) {
++		if (lwb->lwb_fastwrite && !lwb->lwb_zio) {
++			metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk);
++			lwb->lwb_fastwrite = 0;
++		}
++	}
++
++	mutex_exit(&zilog->zl_lock);
++}
++
++void
++zil_init(void)
++{
++	zil_lwb_cache = kmem_cache_create("zil_lwb_cache",
++	    sizeof (struct lwb), 0, NULL, NULL, NULL, NULL, NULL, 0);
++
++	zil_ksp = kstat_create("zfs", 0, "zil", "misc",
++	    KSTAT_TYPE_NAMED, sizeof(zil_stats) / sizeof(kstat_named_t),
++	    KSTAT_FLAG_VIRTUAL);
++
++	if (zil_ksp != NULL) {
++		zil_ksp->ks_data = &zil_stats;
++		kstat_install(zil_ksp);
++	}
++}
++
++void
++zil_fini(void)
++{
++	kmem_cache_destroy(zil_lwb_cache);
++
++	if (zil_ksp != NULL) {
++		kstat_delete(zil_ksp);
++		zil_ksp = NULL;
++	}
++}
++
++void
++zil_set_sync(zilog_t *zilog, uint64_t sync)
++{
++	zilog->zl_sync = sync;
++}
++
++void
++zil_set_logbias(zilog_t *zilog, uint64_t logbias)
++{
++	zilog->zl_logbias = logbias;
++}
++
++zilog_t *
++zil_alloc(objset_t *os, zil_header_t *zh_phys)
++{
++	zilog_t *zilog;
++	int i;
++
++	zilog = kmem_zalloc(sizeof (zilog_t), KM_PUSHPAGE);
++
++	zilog->zl_header = zh_phys;
++	zilog->zl_os = os;
++	zilog->zl_spa = dmu_objset_spa(os);
++	zilog->zl_dmu_pool = dmu_objset_pool(os);
++	zilog->zl_destroy_txg = TXG_INITIAL - 1;
++	zilog->zl_logbias = dmu_objset_logbias(os);
++	zilog->zl_sync = dmu_objset_syncprop(os);
++	zilog->zl_next_batch = 1;
++
++	mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL);
++
++	for (i = 0; i < TXG_SIZE; i++) {
++		mutex_init(&zilog->zl_itxg[i].itxg_lock, NULL,
++		    MUTEX_DEFAULT, NULL);
++	}
++
++	list_create(&zilog->zl_lwb_list, sizeof (lwb_t),
++	    offsetof(lwb_t, lwb_node));
++
++	list_create(&zilog->zl_itx_commit_list, sizeof (itx_t),
++	    offsetof(itx_t, itx_node));
++
++	mutex_init(&zilog->zl_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
++
++	avl_create(&zilog->zl_vdev_tree, zil_vdev_compare,
++	    sizeof (zil_vdev_node_t), offsetof(zil_vdev_node_t, zv_node));
++
++	cv_init(&zilog->zl_cv_writer, NULL, CV_DEFAULT, NULL);
++	cv_init(&zilog->zl_cv_suspend, NULL, CV_DEFAULT, NULL);
++	cv_init(&zilog->zl_cv_batch[0], NULL, CV_DEFAULT, NULL);
++	cv_init(&zilog->zl_cv_batch[1], NULL, CV_DEFAULT, NULL);
++
++	return (zilog);
++}
++
++void
++zil_free(zilog_t *zilog)
++{
++	int i;
++
++	zilog->zl_stop_sync = 1;
++
++	ASSERT(list_is_empty(&zilog->zl_lwb_list));
++	list_destroy(&zilog->zl_lwb_list);
++
++	avl_destroy(&zilog->zl_vdev_tree);
++	mutex_destroy(&zilog->zl_vdev_lock);
++
++	ASSERT(list_is_empty(&zilog->zl_itx_commit_list));
++	list_destroy(&zilog->zl_itx_commit_list);
++
++	for (i = 0; i < TXG_SIZE; i++) {
++		/*
++		 * It's possible for an itx to be generated that doesn't dirty
++		 * a txg (e.g. ztest TX_TRUNCATE). So there's no zil_clean()
++		 * callback to remove the entry. We remove those here.
++		 *
++		 * Also free up the ziltest itxs.
++		 */
++		if (zilog->zl_itxg[i].itxg_itxs)
++			zil_itxg_clean(zilog->zl_itxg[i].itxg_itxs);
++		mutex_destroy(&zilog->zl_itxg[i].itxg_lock);
++	}
++
++	mutex_destroy(&zilog->zl_lock);
++
++	cv_destroy(&zilog->zl_cv_writer);
++	cv_destroy(&zilog->zl_cv_suspend);
++	cv_destroy(&zilog->zl_cv_batch[0]);
++	cv_destroy(&zilog->zl_cv_batch[1]);
++
++	kmem_free(zilog, sizeof (zilog_t));
++}
++
++/*
++ * Open an intent log.
++ */
++zilog_t *
++zil_open(objset_t *os, zil_get_data_t *get_data)
++{
++	zilog_t *zilog = dmu_objset_zil(os);
++
++	ASSERT(zilog->zl_clean_taskq == NULL);
++	ASSERT(zilog->zl_get_data == NULL);
++	ASSERT(list_is_empty(&zilog->zl_lwb_list));
++
++	zilog->zl_get_data = get_data;
++	zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri,
++	    2, 2, TASKQ_PREPOPULATE);
++
++	return (zilog);
++}
++
++/*
++ * Close an intent log.
++ */
++void
++zil_close(zilog_t *zilog)
++{
++	lwb_t *lwb;
++	uint64_t txg = 0;
++
++	zil_commit(zilog, 0); /* commit all itx */
++
++	/*
++	 * The lwb_max_txg for the stubby lwb will reflect the last activity
++	 * for the zil.  After a txg_wait_synced() on the txg we know all the
++	 * callbacks have occurred that may clean the zil.  Only then can we
++	 * destroy the zl_clean_taskq.
++	 */
++	mutex_enter(&zilog->zl_lock);
++	lwb = list_tail(&zilog->zl_lwb_list);
++	if (lwb != NULL)
++		txg = lwb->lwb_max_txg;
++	mutex_exit(&zilog->zl_lock);
++	if (txg)
++		txg_wait_synced(zilog->zl_dmu_pool, txg);
++
++	taskq_destroy(zilog->zl_clean_taskq);
++	zilog->zl_clean_taskq = NULL;
++	zilog->zl_get_data = NULL;
++
++	/*
++	 * We should have only one LWB left on the list; remove it now.
++	 */
++	mutex_enter(&zilog->zl_lock);
++	lwb = list_head(&zilog->zl_lwb_list);
++	if (lwb != NULL) {
++		ASSERT(lwb == list_tail(&zilog->zl_lwb_list));
++		ASSERT(lwb->lwb_zio == NULL);
++		if (lwb->lwb_fastwrite)
++			metaslab_fastwrite_unmark(zilog->zl_spa, &lwb->lwb_blk);
++		list_remove(&zilog->zl_lwb_list, lwb);
++		zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
++		kmem_cache_free(zil_lwb_cache, lwb);
++	}
++	mutex_exit(&zilog->zl_lock);
++}
++
++/*
++ * Suspend an intent log.  While in suspended mode, we still honor
++ * synchronous semantics, but we rely on txg_wait_synced() to do it.
++ * We suspend the log briefly when taking a snapshot so that the snapshot
++ * contains all the data it's supposed to, and has an empty intent log.
++ */
++int
++zil_suspend(zilog_t *zilog)
++{
++	const zil_header_t *zh = zilog->zl_header;
++
++	mutex_enter(&zilog->zl_lock);
++	if (zh->zh_flags & ZIL_REPLAY_NEEDED) {		/* unplayed log */
++		mutex_exit(&zilog->zl_lock);
++		return (EBUSY);
++	}
++	if (zilog->zl_suspend++ != 0) {
++		/*
++		 * Someone else already began a suspend.
++		 * Just wait for them to finish.
++		 */
++		while (zilog->zl_suspending)
++			cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock);
++		mutex_exit(&zilog->zl_lock);
++		return (0);
++	}
++	zilog->zl_suspending = B_TRUE;
++	mutex_exit(&zilog->zl_lock);
++
++	zil_commit(zilog, 0);
++
++	zil_destroy(zilog, B_FALSE);
++
++	mutex_enter(&zilog->zl_lock);
++	zilog->zl_suspending = B_FALSE;
++	cv_broadcast(&zilog->zl_cv_suspend);
++	mutex_exit(&zilog->zl_lock);
++
++	return (0);
++}
++
++void
++zil_resume(zilog_t *zilog)
++{
++	mutex_enter(&zilog->zl_lock);
++	ASSERT(zilog->zl_suspend != 0);
++	zilog->zl_suspend--;
++	mutex_exit(&zilog->zl_lock);
++}
++
++typedef struct zil_replay_arg {
++	zil_replay_func_t **zr_replay;
++	void		*zr_arg;
++	boolean_t	zr_byteswap;
++	char		*zr_lr;
++} zil_replay_arg_t;
++
++static int
++zil_replay_error(zilog_t *zilog, lr_t *lr, int error)
++{
++	char name[MAXNAMELEN];
++
++	zilog->zl_replaying_seq--;	/* didn't actually replay this one */
++
++	dmu_objset_name(zilog->zl_os, name);
++
++	cmn_err(CE_WARN, "ZFS replay transaction error %d, "
++	    "dataset %s, seq 0x%llx, txtype %llu %s\n", error, name,
++	    (u_longlong_t)lr->lrc_seq,
++	    (u_longlong_t)(lr->lrc_txtype & ~TX_CI),
++	    (lr->lrc_txtype & TX_CI) ? "CI" : "");
++
++	return (error);
++}
++
++static int
++zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
++{
++	zil_replay_arg_t *zr = zra;
++	const zil_header_t *zh = zilog->zl_header;
++	uint64_t reclen = lr->lrc_reclen;
++	uint64_t txtype = lr->lrc_txtype;
++	int error = 0;
++
++	zilog->zl_replaying_seq = lr->lrc_seq;
++
++	if (lr->lrc_seq <= zh->zh_replay_seq)	/* already replayed */
++		return (0);
++
++	if (lr->lrc_txg < claim_txg)		/* already committed */
++		return (0);
++
++	/* Strip case-insensitive bit, still present in log record */
++	txtype &= ~TX_CI;
++
++	if (txtype == 0 || txtype >= TX_MAX_TYPE)
++		return (zil_replay_error(zilog, lr, EINVAL));
++
++	/*
++	 * If this record type can be logged out of order, the object
++	 * (lr_foid) may no longer exist.  That's legitimate, not an error.
++	 */
++	if (TX_OOO(txtype)) {
++		error = dmu_object_info(zilog->zl_os,
++		    ((lr_ooo_t *)lr)->lr_foid, NULL);
++		if (error == ENOENT || error == EEXIST)
++			return (0);
++	}
++
++	/*
++	 * Make a copy of the data so we can revise and extend it.
++	 */
++	bcopy(lr, zr->zr_lr, reclen);
++
++	/*
++	 * If this is a TX_WRITE with a blkptr, suck in the data.
++	 */
++	if (txtype == TX_WRITE && reclen == sizeof (lr_write_t)) {
++		error = zil_read_log_data(zilog, (lr_write_t *)lr,
++		    zr->zr_lr + reclen);
++		if (error)
++			return (zil_replay_error(zilog, lr, error));
++	}
++
++	/*
++	 * The log block containing this lr may have been byteswapped
++	 * so that we can easily examine common fields like lrc_txtype.
++	 * However, the log is a mix of different record types, and only the
++	 * replay vectors know how to byteswap their records.  Therefore, if
++	 * the lr was byteswapped, undo it before invoking the replay vector.
++	 */
++	if (zr->zr_byteswap)
++		byteswap_uint64_array(zr->zr_lr, reclen);
++
++	/*
++	 * We must now do two things atomically: replay this log record,
++	 * and update the log header sequence number to reflect the fact that
++	 * we did so. At the end of each replay function the sequence number
++	 * is updated if we are in replay mode.
++	 */
++	error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, zr->zr_byteswap);
++	if (error) {
++		/*
++		 * The DMU's dnode layer doesn't see removes until the txg
++		 * commits, so a subsequent claim can spuriously fail with
++		 * EEXIST. So if we receive any error we try syncing out
++		 * any removes then retry the transaction.  Note that we
++		 * specify B_FALSE for byteswap now, so we don't do it twice.
++		 */
++		txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0);
++		error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, B_FALSE);
++		if (error)
++			return (zil_replay_error(zilog, lr, error));
++	}
++	return (0);
++}
++
++/* ARGSUSED */
++static int
++zil_incr_blks(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
++{
++	zilog->zl_replay_blks++;
++
++	return (0);
++}
++
++/*
++ * If this dataset has a non-empty intent log, replay it and destroy it.
++ */
++void
++zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE])
++{
++	zilog_t *zilog = dmu_objset_zil(os);
++	const zil_header_t *zh = zilog->zl_header;
++	zil_replay_arg_t zr;
++
++	if ((zh->zh_flags & ZIL_REPLAY_NEEDED) == 0) {
++		zil_destroy(zilog, B_TRUE);
++		return;
++	}
++
++	zr.zr_replay = replay_func;
++	zr.zr_arg = arg;
++	zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log);
++	zr.zr_lr = vmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_PUSHPAGE);
++
++	/*
++	 * Wait for in-progress removes to sync before starting replay.
++	 */
++	txg_wait_synced(zilog->zl_dmu_pool, 0);
++
++	zilog->zl_replay = B_TRUE;
++	zilog->zl_replay_time = ddi_get_lbolt();
++	ASSERT(zilog->zl_replay_blks == 0);
++	(void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr,
++	    zh->zh_claim_txg);
++	vmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE);
++
++	zil_destroy(zilog, B_FALSE);
++	txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
++	zilog->zl_replay = B_FALSE;
++}
++
++boolean_t
++zil_replaying(zilog_t *zilog, dmu_tx_t *tx)
++{
++	if (zilog->zl_sync == ZFS_SYNC_DISABLED)
++		return (B_TRUE);
++
++	if (zilog->zl_replay) {
++		dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
++		zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] =
++		    zilog->zl_replaying_seq;
++		return (B_TRUE);
++	}
++
++	return (B_FALSE);
++}
++
++/* ARGSUSED */
++int
++zil_vdev_offline(const char *osname, void *arg)
++{
++	objset_t *os;
++	zilog_t *zilog;
++	int error;
++
++	error = dmu_objset_hold(osname, FTAG, &os);
++	if (error)
++		return (error);
++
++	zilog = dmu_objset_zil(os);
++	if (zil_suspend(zilog) != 0)
++		error = EEXIST;
++	else
++		zil_resume(zilog);
++	dmu_objset_rele(os, FTAG);
++	return (error);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++module_param(zil_replay_disable, int, 0644);
++MODULE_PARM_DESC(zil_replay_disable, "Disable intent logging replay");
++
++module_param(zfs_nocacheflush, int, 0644);
++MODULE_PARM_DESC(zfs_nocacheflush, "Disable cache flushes");
++
++module_param(zil_slog_limit, ulong, 0644);
++MODULE_PARM_DESC(zil_slog_limit, "Max commit bytes to separate log device");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zio.c linux-3.2.33-go/fs/zfs/zfs/zio.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zio.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zio.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,3166 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/fm/fs/zfs.h>
++#include <sys/spa.h>
++#include <sys/txg.h>
++#include <sys/spa_impl.h>
++#include <sys/vdev_impl.h>
++#include <sys/zio_impl.h>
++#include <sys/zio_compress.h>
++#include <sys/zio_checksum.h>
++#include <sys/dmu_objset.h>
++#include <sys/arc.h>
++#include <sys/ddt.h>
++
++/*
++ * ==========================================================================
++ * I/O priority table
++ * ==========================================================================
++ */
++uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE] = {
++	0,	/* ZIO_PRIORITY_NOW		*/
++	0,	/* ZIO_PRIORITY_SYNC_READ	*/
++	0,	/* ZIO_PRIORITY_SYNC_WRITE	*/
++	0,	/* ZIO_PRIORITY_LOG_WRITE	*/
++	1,	/* ZIO_PRIORITY_CACHE_FILL	*/
++	1,	/* ZIO_PRIORITY_AGG		*/
++	4,	/* ZIO_PRIORITY_FREE		*/
++	4,	/* ZIO_PRIORITY_ASYNC_WRITE	*/
++	6,	/* ZIO_PRIORITY_ASYNC_READ	*/
++	10,	/* ZIO_PRIORITY_RESILVER	*/
++	20,	/* ZIO_PRIORITY_SCRUB		*/
++	2,	/* ZIO_PRIORITY_DDT_PREFETCH	*/
++};
++
++/*
++ * ==========================================================================
++ * I/O type descriptions
++ * ==========================================================================
++ */
++char *zio_type_name[ZIO_TYPES] = {
++	"z_null", "z_rd", "z_wr", "z_fr", "z_cl", "z_ioctl"
++};
++
++/*
++ * ==========================================================================
++ * I/O kmem caches
++ * ==========================================================================
++ */
++kmem_cache_t *zio_cache;
++kmem_cache_t *zio_link_cache;
++kmem_cache_t *zio_vdev_cache;
++kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
++kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
++int zio_bulk_flags = 0;
++int zio_delay_max = ZIO_DELAY_MAX;
++
++#ifdef _KERNEL
++extern vmem_t *zio_alloc_arena;
++#endif
++extern int zfs_mg_alloc_failures;
++
++/*
++ * An allocating zio is one that either currently has the DVA allocate
++ * stage set or will have it later in its lifetime.
++ */
++#define	IO_IS_ALLOCATING(zio) ((zio)->io_orig_pipeline & ZIO_STAGE_DVA_ALLOCATE)
++
++int zio_requeue_io_start_cut_in_line = 1;
++
++#ifdef ZFS_DEBUG
++int zio_buf_debug_limit = 16384;
++#else
++int zio_buf_debug_limit = 0;
++#endif
++
++static inline void __zio_execute(zio_t *zio);
++
++static int
++zio_cons(void *arg, void *unused, int kmflag)
++{
++	zio_t *zio = arg;
++
++	bzero(zio, sizeof (zio_t));
++
++	mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL);
++	cv_init(&zio->io_cv, NULL, CV_DEFAULT, NULL);
++
++	list_create(&zio->io_parent_list, sizeof (zio_link_t),
++	    offsetof(zio_link_t, zl_parent_node));
++	list_create(&zio->io_child_list, sizeof (zio_link_t),
++	    offsetof(zio_link_t, zl_child_node));
++
++	return (0);
++}
++
++static void
++zio_dest(void *arg, void *unused)
++{
++	zio_t *zio = arg;
++
++	mutex_destroy(&zio->io_lock);
++	cv_destroy(&zio->io_cv);
++	list_destroy(&zio->io_parent_list);
++	list_destroy(&zio->io_child_list);
++}
++
++void
++zio_init(void)
++{
++	size_t c;
++	vmem_t *data_alloc_arena = NULL;
++
++#ifdef _KERNEL
++	data_alloc_arena = zio_alloc_arena;
++#endif
++	zio_cache = kmem_cache_create("zio_cache", sizeof (zio_t), 0,
++	    zio_cons, zio_dest, NULL, NULL, NULL, KMC_KMEM);
++	zio_link_cache = kmem_cache_create("zio_link_cache",
++	    sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, KMC_KMEM);
++	zio_vdev_cache = kmem_cache_create("zio_vdev_cache", sizeof(vdev_io_t),
++	    PAGESIZE, NULL, NULL, NULL, NULL, NULL, KMC_VMEM);
++
++	/*
++	 * For small buffers, we want a cache for each multiple of
++	 * SPA_MINBLOCKSIZE.  For medium-size buffers, we want a cache
++	 * for each quarter-power of 2.  For large buffers, we want
++	 * a cache for each multiple of PAGESIZE.
++	 */
++	for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
++		size_t size = (c + 1) << SPA_MINBLOCKSHIFT;
++		size_t p2 = size;
++		size_t align = 0;
++
++		while (p2 & (p2 - 1))
++			p2 &= p2 - 1;
++
++		if (size <= 4 * SPA_MINBLOCKSIZE) {
++			align = SPA_MINBLOCKSIZE;
++		} else if (P2PHASE(size, PAGESIZE) == 0) {
++			align = PAGESIZE;
++		} else if (P2PHASE(size, p2 >> 2) == 0) {
++			align = p2 >> 2;
++		}
++
++		if (align != 0) {
++			char name[36];
++			int flags = zio_bulk_flags;
++
++			/*
++			 * The smallest buffers (512b) are heavily used and
++			 * experience a lot of churn.  The slabs allocated
++			 * for them are also relatively small (32K).  Thus
++			 * in over to avoid expensive calls to vmalloc() we
++			 * make an exception to the usual slab allocation
++			 * policy and force these buffers to be kmem backed.
++			 */
++			if (size == (1 << SPA_MINBLOCKSHIFT))
++				flags |= KMC_KMEM;
++
++			(void) sprintf(name, "zio_buf_%lu", (ulong_t)size);
++			zio_buf_cache[c] = kmem_cache_create(name, size,
++			    align, NULL, NULL, NULL, NULL, NULL, flags);
++
++			(void) sprintf(name, "zio_data_buf_%lu", (ulong_t)size);
++			zio_data_buf_cache[c] = kmem_cache_create(name, size,
++			    align, NULL, NULL, NULL, NULL,
++			    data_alloc_arena, flags);
++		}
++	}
++
++	while (--c != 0) {
++		ASSERT(zio_buf_cache[c] != NULL);
++		if (zio_buf_cache[c - 1] == NULL)
++			zio_buf_cache[c - 1] = zio_buf_cache[c];
++
++		ASSERT(zio_data_buf_cache[c] != NULL);
++		if (zio_data_buf_cache[c - 1] == NULL)
++			zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
++	}
++
++	/*
++	 * The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs
++	 * to fail 3 times per txg or 8 failures, whichever is greater.
++	 */
++	zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8);
++
++	zio_inject_init();
++}
++
++void
++zio_fini(void)
++{
++	size_t c;
++	kmem_cache_t *last_cache = NULL;
++	kmem_cache_t *last_data_cache = NULL;
++
++	for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
++		if (zio_buf_cache[c] != last_cache) {
++			last_cache = zio_buf_cache[c];
++			kmem_cache_destroy(zio_buf_cache[c]);
++		}
++		zio_buf_cache[c] = NULL;
++
++		if (zio_data_buf_cache[c] != last_data_cache) {
++			last_data_cache = zio_data_buf_cache[c];
++			kmem_cache_destroy(zio_data_buf_cache[c]);
++		}
++		zio_data_buf_cache[c] = NULL;
++	}
++
++	kmem_cache_destroy(zio_vdev_cache);
++	kmem_cache_destroy(zio_link_cache);
++	kmem_cache_destroy(zio_cache);
++
++	zio_inject_fini();
++}
++
++/*
++ * ==========================================================================
++ * Allocate and free I/O buffers
++ * ==========================================================================
++ */
++
++/*
++ * Use zio_buf_alloc to allocate ZFS metadata.  This data will appear in a
++ * crashdump if the kernel panics, so use it judiciously.  Obviously, it's
++ * useful to inspect ZFS metadata, but if possible, we should avoid keeping
++ * excess / transient data in-core during a crashdump.
++ */
++void *
++zio_buf_alloc(size_t size)
++{
++	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
++
++	ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
++
++	return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE | KM_NODEBUG));
++}
++
++/*
++ * Use zio_data_buf_alloc to allocate data.  The data will not appear in a
++ * crashdump if the kernel panics.  This exists so that we will limit the amount
++ * of ZFS data that shows up in a kernel crashdump.  (Thus reducing the amount
++ * of kernel heap dumped to disk when the kernel panics)
++ */
++void *
++zio_data_buf_alloc(size_t size)
++{
++	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
++
++	ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
++
++	return (kmem_cache_alloc(zio_data_buf_cache[c],
++	    KM_PUSHPAGE | KM_NODEBUG));
++}
++
++void
++zio_buf_free(void *buf, size_t size)
++{
++	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
++
++	ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
++
++	kmem_cache_free(zio_buf_cache[c], buf);
++}
++
++void
++zio_data_buf_free(void *buf, size_t size)
++{
++	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
++
++	ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
++
++	kmem_cache_free(zio_data_buf_cache[c], buf);
++}
++
++/*
++ * Dedicated I/O buffers to ensure that memory fragmentation never prevents
++ * or significantly delays the issuing of a zio.   These buffers are used
++ * to aggregate I/O and could be used for raidz stripes.
++ */
++void *
++zio_vdev_alloc(void)
++{
++	return (kmem_cache_alloc(zio_vdev_cache, KM_PUSHPAGE));
++}
++
++void
++zio_vdev_free(void *buf)
++{
++	kmem_cache_free(zio_vdev_cache, buf);
++
++}
++
++/*
++ * ==========================================================================
++ * Push and pop I/O transform buffers
++ * ==========================================================================
++ */
++static void
++zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize,
++	zio_transform_func_t *transform)
++{
++	zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_PUSHPAGE);
++
++	zt->zt_orig_data = zio->io_data;
++	zt->zt_orig_size = zio->io_size;
++	zt->zt_bufsize = bufsize;
++	zt->zt_transform = transform;
++
++	zt->zt_next = zio->io_transform_stack;
++	zio->io_transform_stack = zt;
++
++	zio->io_data = data;
++	zio->io_size = size;
++}
++
++static void
++zio_pop_transforms(zio_t *zio)
++{
++	zio_transform_t *zt;
++
++	while ((zt = zio->io_transform_stack) != NULL) {
++		if (zt->zt_transform != NULL)
++			zt->zt_transform(zio,
++			    zt->zt_orig_data, zt->zt_orig_size);
++
++		if (zt->zt_bufsize != 0)
++			zio_buf_free(zio->io_data, zt->zt_bufsize);
++
++		zio->io_data = zt->zt_orig_data;
++		zio->io_size = zt->zt_orig_size;
++		zio->io_transform_stack = zt->zt_next;
++
++		kmem_free(zt, sizeof (zio_transform_t));
++	}
++}
++
++/*
++ * ==========================================================================
++ * I/O transform callbacks for subblocks and decompression
++ * ==========================================================================
++ */
++static void
++zio_subblock(zio_t *zio, void *data, uint64_t size)
++{
++	ASSERT(zio->io_size > size);
++
++	if (zio->io_type == ZIO_TYPE_READ)
++		bcopy(zio->io_data, data, size);
++}
++
++static void
++zio_decompress(zio_t *zio, void *data, uint64_t size)
++{
++	if (zio->io_error == 0 &&
++	    zio_decompress_data(BP_GET_COMPRESS(zio->io_bp),
++	    zio->io_data, data, zio->io_size, size) != 0)
++		zio->io_error = EIO;
++}
++
++/*
++ * ==========================================================================
++ * I/O parent/child relationships and pipeline interlocks
++ * ==========================================================================
++ */
++/*
++ * NOTE - Callers to zio_walk_parents() and zio_walk_children must
++ *        continue calling these functions until they return NULL.
++ *        Otherwise, the next caller will pick up the list walk in
++ *        some indeterminate state.  (Otherwise every caller would
++ *        have to pass in a cookie to keep the state represented by
++ *        io_walk_link, which gets annoying.)
++ */
++zio_t *
++zio_walk_parents(zio_t *cio)
++{
++	zio_link_t *zl = cio->io_walk_link;
++	list_t *pl = &cio->io_parent_list;
++
++	zl = (zl == NULL) ? list_head(pl) : list_next(pl, zl);
++	cio->io_walk_link = zl;
++
++	if (zl == NULL)
++		return (NULL);
++
++	ASSERT(zl->zl_child == cio);
++	return (zl->zl_parent);
++}
++
++zio_t *
++zio_walk_children(zio_t *pio)
++{
++	zio_link_t *zl = pio->io_walk_link;
++	list_t *cl = &pio->io_child_list;
++
++	zl = (zl == NULL) ? list_head(cl) : list_next(cl, zl);
++	pio->io_walk_link = zl;
++
++	if (zl == NULL)
++		return (NULL);
++
++	ASSERT(zl->zl_parent == pio);
++	return (zl->zl_child);
++}
++
++zio_t *
++zio_unique_parent(zio_t *cio)
++{
++	zio_t *pio = zio_walk_parents(cio);
++
++	VERIFY(zio_walk_parents(cio) == NULL);
++	return (pio);
++}
++
++void
++zio_add_child(zio_t *pio, zio_t *cio)
++{
++	zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_PUSHPAGE);
++	int w;
++
++	/*
++	 * Logical I/Os can have logical, gang, or vdev children.
++	 * Gang I/Os can have gang or vdev children.
++	 * Vdev I/Os can only have vdev children.
++	 * The following ASSERT captures all of these constraints.
++	 */
++	ASSERT(cio->io_child_type <= pio->io_child_type);
++
++	zl->zl_parent = pio;
++	zl->zl_child = cio;
++
++	mutex_enter(&cio->io_lock);
++	mutex_enter(&pio->io_lock);
++
++	ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0);
++
++	for (w = 0; w < ZIO_WAIT_TYPES; w++)
++		pio->io_children[cio->io_child_type][w] += !cio->io_state[w];
++
++	list_insert_head(&pio->io_child_list, zl);
++	list_insert_head(&cio->io_parent_list, zl);
++
++	pio->io_child_count++;
++	cio->io_parent_count++;
++
++	mutex_exit(&pio->io_lock);
++	mutex_exit(&cio->io_lock);
++}
++
++static void
++zio_remove_child(zio_t *pio, zio_t *cio, zio_link_t *zl)
++{
++	ASSERT(zl->zl_parent == pio);
++	ASSERT(zl->zl_child == cio);
++
++	mutex_enter(&cio->io_lock);
++	mutex_enter(&pio->io_lock);
++
++	list_remove(&pio->io_child_list, zl);
++	list_remove(&cio->io_parent_list, zl);
++
++	pio->io_child_count--;
++	cio->io_parent_count--;
++
++	mutex_exit(&pio->io_lock);
++	mutex_exit(&cio->io_lock);
++
++	kmem_cache_free(zio_link_cache, zl);
++}
++
++static boolean_t
++zio_wait_for_children(zio_t *zio, enum zio_child child, enum zio_wait_type wait)
++{
++	uint64_t *countp = &zio->io_children[child][wait];
++	boolean_t waiting = B_FALSE;
++
++	mutex_enter(&zio->io_lock);
++	ASSERT(zio->io_stall == NULL);
++	if (*countp != 0) {
++		zio->io_stage >>= 1;
++		zio->io_stall = countp;
++		waiting = B_TRUE;
++	}
++	mutex_exit(&zio->io_lock);
++
++	return (waiting);
++}
++
++__attribute__((always_inline))
++static inline void
++zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait)
++{
++	uint64_t *countp = &pio->io_children[zio->io_child_type][wait];
++	int *errorp = &pio->io_child_error[zio->io_child_type];
++
++	mutex_enter(&pio->io_lock);
++	if (zio->io_error && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE))
++		*errorp = zio_worst_error(*errorp, zio->io_error);
++	pio->io_reexecute |= zio->io_reexecute;
++	ASSERT3U(*countp, >, 0);
++	if (--*countp == 0 && pio->io_stall == countp) {
++		pio->io_stall = NULL;
++		mutex_exit(&pio->io_lock);
++		__zio_execute(pio);
++	} else {
++		mutex_exit(&pio->io_lock);
++	}
++}
++
++static void
++zio_inherit_child_errors(zio_t *zio, enum zio_child c)
++{
++	if (zio->io_child_error[c] != 0 && zio->io_error == 0)
++		zio->io_error = zio->io_child_error[c];
++}
++
++/*
++ * ==========================================================================
++ * Create the various types of I/O (read, write, free, etc)
++ * ==========================================================================
++ */
++static zio_t *
++zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
++    void *data, uint64_t size, zio_done_func_t *done, void *private,
++    zio_type_t type, int priority, enum zio_flag flags,
++    vdev_t *vd, uint64_t offset, const zbookmark_t *zb,
++    enum zio_stage stage, enum zio_stage pipeline)
++{
++	zio_t *zio;
++
++	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
++	ASSERT(P2PHASE(size, SPA_MINBLOCKSIZE) == 0);
++	ASSERT(P2PHASE(offset, SPA_MINBLOCKSIZE) == 0);
++
++	ASSERT(!vd || spa_config_held(spa, SCL_STATE_ALL, RW_READER));
++	ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
++	ASSERT(vd || stage == ZIO_STAGE_OPEN);
++
++	zio = kmem_cache_alloc(zio_cache, KM_PUSHPAGE);
++
++	if (vd != NULL)
++		zio->io_child_type = ZIO_CHILD_VDEV;
++	else if (flags & ZIO_FLAG_GANG_CHILD)
++		zio->io_child_type = ZIO_CHILD_GANG;
++	else if (flags & ZIO_FLAG_DDT_CHILD)
++		zio->io_child_type = ZIO_CHILD_DDT;
++	else
++		zio->io_child_type = ZIO_CHILD_LOGICAL;
++
++	if (bp != NULL) {
++		zio->io_logical = NULL;
++		zio->io_bp = (blkptr_t *)bp;
++		zio->io_bp_copy = *bp;
++		zio->io_bp_orig = *bp;
++		if (type != ZIO_TYPE_WRITE ||
++		    zio->io_child_type == ZIO_CHILD_DDT)
++			zio->io_bp = &zio->io_bp_copy;	/* so caller can free */
++		if (zio->io_child_type == ZIO_CHILD_LOGICAL)
++			zio->io_logical = zio;
++		if (zio->io_child_type > ZIO_CHILD_GANG && BP_IS_GANG(bp))
++			pipeline |= ZIO_GANG_STAGES;
++	} else {
++		zio->io_logical = NULL;
++		zio->io_bp = NULL;
++		bzero(&zio->io_bp_copy, sizeof (blkptr_t));
++		bzero(&zio->io_bp_orig, sizeof (blkptr_t));
++	}
++
++	zio->io_spa = spa;
++	zio->io_txg = txg;
++	zio->io_ready = NULL;
++	zio->io_done = done;
++	zio->io_private = private;
++	zio->io_prev_space_delta = 0;
++	zio->io_type = type;
++	zio->io_priority = priority;
++	zio->io_vd = vd;
++	zio->io_vsd = NULL;
++	zio->io_vsd_ops = NULL;
++	zio->io_offset = offset;
++	zio->io_deadline = 0;
++	zio->io_orig_data = zio->io_data = data;
++	zio->io_orig_size = zio->io_size = size;
++	zio->io_orig_flags = zio->io_flags = flags;
++	zio->io_orig_stage = zio->io_stage = stage;
++	zio->io_orig_pipeline = zio->io_pipeline = pipeline;
++	bzero(&zio->io_prop, sizeof (zio_prop_t));
++	zio->io_cmd = 0;
++	zio->io_reexecute = 0;
++	zio->io_bp_override = NULL;
++	zio->io_walk_link = NULL;
++	zio->io_transform_stack = NULL;
++	zio->io_delay = 0;
++	zio->io_error = 0;
++	zio->io_child_count = 0;
++	zio->io_parent_count = 0;
++	zio->io_stall = NULL;
++	zio->io_gang_leader = NULL;
++	zio->io_gang_tree = NULL;
++	zio->io_executor = NULL;
++	zio->io_waiter = NULL;
++	zio->io_cksum_report = NULL;
++	zio->io_ena = 0;
++	bzero(zio->io_child_error, sizeof (int) * ZIO_CHILD_TYPES);
++	bzero(zio->io_children,
++	    sizeof (uint64_t) * ZIO_CHILD_TYPES * ZIO_WAIT_TYPES);
++	bzero(&zio->io_bookmark, sizeof (zbookmark_t));
++
++	zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY);
++	zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE);
++
++	if (zb != NULL)
++		zio->io_bookmark = *zb;
++
++	if (pio != NULL) {
++		if (zio->io_logical == NULL)
++			zio->io_logical = pio->io_logical;
++		if (zio->io_child_type == ZIO_CHILD_GANG)
++			zio->io_gang_leader = pio->io_gang_leader;
++		zio_add_child(pio, zio);
++	}
++
++	taskq_init_ent(&zio->io_tqent);
++
++	return (zio);
++}
++
++static void
++zio_destroy(zio_t *zio)
++{
++	kmem_cache_free(zio_cache, zio);
++}
++
++zio_t *
++zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done,
++    void *private, enum zio_flag flags)
++{
++	zio_t *zio;
++
++	zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private,
++	    ZIO_TYPE_NULL, ZIO_PRIORITY_NOW, flags, vd, 0, NULL,
++	    ZIO_STAGE_OPEN, ZIO_INTERLOCK_PIPELINE);
++
++	return (zio);
++}
++
++zio_t *
++zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags)
++{
++	return (zio_null(NULL, spa, NULL, done, private, flags));
++}
++
++zio_t *
++zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
++    void *data, uint64_t size, zio_done_func_t *done, void *private,
++    int priority, enum zio_flag flags, const zbookmark_t *zb)
++{
++	zio_t *zio;
++
++	zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp,
++	    data, size, done, private,
++	    ZIO_TYPE_READ, priority, flags, NULL, 0, zb,
++	    ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
++	    ZIO_DDT_CHILD_READ_PIPELINE : ZIO_READ_PIPELINE);
++
++	return (zio);
++}
++
++zio_t *
++zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
++    void *data, uint64_t size, const zio_prop_t *zp,
++    zio_done_func_t *ready, zio_done_func_t *done, void *private,
++    int priority, enum zio_flag flags, const zbookmark_t *zb)
++{
++	zio_t *zio;
++
++	ASSERT(zp->zp_checksum >= ZIO_CHECKSUM_OFF &&
++	    zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS &&
++	    zp->zp_compress >= ZIO_COMPRESS_OFF &&
++	    zp->zp_compress < ZIO_COMPRESS_FUNCTIONS &&
++	    zp->zp_type < DMU_OT_NUMTYPES &&
++	    zp->zp_level < 32 &&
++	    zp->zp_copies > 0 &&
++	    zp->zp_copies <= spa_max_replication(spa) &&
++	    zp->zp_dedup <= 1 &&
++	    zp->zp_dedup_verify <= 1);
++
++	zio = zio_create(pio, spa, txg, bp, data, size, done, private,
++	    ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
++	    ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
++	    ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE);
++
++	zio->io_ready = ready;
++	zio->io_prop = *zp;
++
++	return (zio);
++}
++
++zio_t *
++zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data,
++    uint64_t size, zio_done_func_t *done, void *private, int priority,
++    enum zio_flag flags, zbookmark_t *zb)
++{
++	zio_t *zio;
++
++	zio = zio_create(pio, spa, txg, bp, data, size, done, private,
++	    ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
++	    ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE);
++
++	return (zio);
++}
++
++void
++zio_write_override(zio_t *zio, blkptr_t *bp, int copies)
++{
++	ASSERT(zio->io_type == ZIO_TYPE_WRITE);
++	ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
++	ASSERT(zio->io_stage == ZIO_STAGE_OPEN);
++	ASSERT(zio->io_txg == spa_syncing_txg(zio->io_spa));
++
++	zio->io_prop.zp_copies = copies;
++	zio->io_bp_override = bp;
++}
++
++void
++zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
++{
++	bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp);
++}
++
++zio_t *
++zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
++    enum zio_flag flags)
++{
++	zio_t *zio;
++
++	dprintf_bp(bp, "freeing in txg %llu, pass %u",
++	    (longlong_t)txg, spa->spa_sync_pass);
++
++	ASSERT(!BP_IS_HOLE(bp));
++	ASSERT(spa_syncing_txg(spa) == txg);
++	ASSERT(spa_sync_pass(spa) <= SYNC_PASS_DEFERRED_FREE);
++
++	zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
++	    NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags,
++	    NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE);
++
++	return (zio);
++}
++
++zio_t *
++zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
++    zio_done_func_t *done, void *private, enum zio_flag flags)
++{
++	zio_t *zio;
++
++	/*
++	 * A claim is an allocation of a specific block.  Claims are needed
++	 * to support immediate writes in the intent log.  The issue is that
++	 * immediate writes contain committed data, but in a txg that was
++	 * *not* committed.  Upon opening the pool after an unclean shutdown,
++	 * the intent log claims all blocks that contain immediate write data
++	 * so that the SPA knows they're in use.
++	 *
++	 * All claims *must* be resolved in the first txg -- before the SPA
++	 * starts allocating blocks -- so that nothing is allocated twice.
++	 * If txg == 0 we just verify that the block is claimable.
++	 */
++	ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, spa_first_txg(spa));
++	ASSERT(txg == spa_first_txg(spa) || txg == 0);
++	ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa));	/* zdb(1M) */
++
++	zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
++	    done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags,
++	    NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE);
++
++	return (zio);
++}
++
++zio_t *
++zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
++    zio_done_func_t *done, void *private, int priority, enum zio_flag flags)
++{
++	zio_t *zio;
++	int c;
++
++	if (vd->vdev_children == 0) {
++		zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private,
++		    ZIO_TYPE_IOCTL, priority, flags, vd, 0, NULL,
++		    ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE);
++
++		zio->io_cmd = cmd;
++	} else {
++		zio = zio_null(pio, spa, NULL, NULL, NULL, flags);
++
++		for (c = 0; c < vd->vdev_children; c++)
++			zio_nowait(zio_ioctl(zio, spa, vd->vdev_child[c], cmd,
++			    done, private, priority, flags));
++	}
++
++	return (zio);
++}
++
++zio_t *
++zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
++    void *data, int checksum, zio_done_func_t *done, void *private,
++    int priority, enum zio_flag flags, boolean_t labels)
++{
++	zio_t *zio;
++
++	ASSERT(vd->vdev_children == 0);
++	ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
++	    offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
++	ASSERT3U(offset + size, <=, vd->vdev_psize);
++
++	zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done, private,
++	    ZIO_TYPE_READ, priority, flags, vd, offset, NULL,
++	    ZIO_STAGE_OPEN, ZIO_READ_PHYS_PIPELINE);
++
++	zio->io_prop.zp_checksum = checksum;
++
++	return (zio);
++}
++
++zio_t *
++zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
++    void *data, int checksum, zio_done_func_t *done, void *private,
++    int priority, enum zio_flag flags, boolean_t labels)
++{
++	zio_t *zio;
++
++	ASSERT(vd->vdev_children == 0);
++	ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
++	    offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
++	ASSERT3U(offset + size, <=, vd->vdev_psize);
++
++	zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done, private,
++	    ZIO_TYPE_WRITE, priority, flags, vd, offset, NULL,
++	    ZIO_STAGE_OPEN, ZIO_WRITE_PHYS_PIPELINE);
++
++	zio->io_prop.zp_checksum = checksum;
++
++	if (zio_checksum_table[checksum].ci_eck) {
++		/*
++		 * zec checksums are necessarily destructive -- they modify
++		 * the end of the write buffer to hold the verifier/checksum.
++		 * Therefore, we must make a local copy in case the data is
++		 * being written to multiple places in parallel.
++		 */
++		void *wbuf = zio_buf_alloc(size);
++		bcopy(data, wbuf, size);
++		zio_push_transform(zio, wbuf, size, size, NULL);
++	}
++
++	return (zio);
++}
++
++/*
++ * Create a child I/O to do some work for us.
++ */
++zio_t *
++zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
++	void *data, uint64_t size, int type, int priority, enum zio_flag flags,
++	zio_done_func_t *done, void *private)
++{
++	enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE;
++	zio_t *zio;
++
++	ASSERT(vd->vdev_parent ==
++	    (pio->io_vd ? pio->io_vd : pio->io_spa->spa_root_vdev));
++
++	if (type == ZIO_TYPE_READ && bp != NULL) {
++		/*
++		 * If we have the bp, then the child should perform the
++		 * checksum and the parent need not.  This pushes error
++		 * detection as close to the leaves as possible and
++		 * eliminates redundant checksums in the interior nodes.
++		 */
++		pipeline |= ZIO_STAGE_CHECKSUM_VERIFY;
++		pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
++	}
++
++	if (vd->vdev_children == 0)
++		offset += VDEV_LABEL_START_SIZE;
++
++	flags |= ZIO_VDEV_CHILD_FLAGS(pio) | ZIO_FLAG_DONT_PROPAGATE;
++
++	/*
++	 * If we've decided to do a repair, the write is not speculative --
++	 * even if the original read was.
++	 */
++	if (flags & ZIO_FLAG_IO_REPAIR)
++		flags &= ~ZIO_FLAG_SPECULATIVE;
++
++	zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size,
++	    done, private, type, priority, flags, vd, offset, &pio->io_bookmark,
++	    ZIO_STAGE_VDEV_IO_START >> 1, pipeline);
++
++	return (zio);
++}
++
++zio_t *
++zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, void *data, uint64_t size,
++	int type, int priority, enum zio_flag flags,
++	zio_done_func_t *done, void *private)
++{
++	zio_t *zio;
++
++	ASSERT(vd->vdev_ops->vdev_op_leaf);
++
++	zio = zio_create(NULL, vd->vdev_spa, 0, NULL,
++	    data, size, done, private, type, priority,
++	    flags | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY,
++	    vd, offset, NULL,
++	    ZIO_STAGE_VDEV_IO_START >> 1, ZIO_VDEV_CHILD_PIPELINE);
++
++	return (zio);
++}
++
++void
++zio_flush(zio_t *zio, vdev_t *vd)
++{
++	zio_nowait(zio_ioctl(zio, zio->io_spa, vd, DKIOCFLUSHWRITECACHE,
++	    NULL, NULL, ZIO_PRIORITY_NOW,
++	    ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY));
++}
++
++void
++zio_shrink(zio_t *zio, uint64_t size)
++{
++	ASSERT(zio->io_executor == NULL);
++	ASSERT(zio->io_orig_size == zio->io_size);
++	ASSERT(size <= zio->io_size);
++
++	/*
++	 * We don't shrink for raidz because of problems with the
++	 * reconstruction when reading back less than the block size.
++	 * Note, BP_IS_RAIDZ() assumes no compression.
++	 */
++	ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF);
++	if (!BP_IS_RAIDZ(zio->io_bp))
++		zio->io_orig_size = zio->io_size = size;
++}
++
++/*
++ * ==========================================================================
++ * Prepare to read and write logical blocks
++ * ==========================================================================
++ */
++
++static int
++zio_read_bp_init(zio_t *zio)
++{
++	blkptr_t *bp = zio->io_bp;
++
++	if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
++	    zio->io_child_type == ZIO_CHILD_LOGICAL &&
++	    !(zio->io_flags & ZIO_FLAG_RAW)) {
++		uint64_t psize = BP_GET_PSIZE(bp);
++		void *cbuf = zio_buf_alloc(psize);
++
++		zio_push_transform(zio, cbuf, psize, psize, zio_decompress);
++	}
++
++	if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0)
++		zio->io_flags |= ZIO_FLAG_DONT_CACHE;
++
++	if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP)
++		zio->io_flags |= ZIO_FLAG_DONT_CACHE;
++
++	if (BP_GET_DEDUP(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL)
++		zio->io_pipeline = ZIO_DDT_READ_PIPELINE;
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++static int
++zio_write_bp_init(zio_t *zio)
++{
++	spa_t *spa = zio->io_spa;
++	zio_prop_t *zp = &zio->io_prop;
++	enum zio_compress compress = zp->zp_compress;
++	blkptr_t *bp = zio->io_bp;
++	uint64_t lsize = zio->io_size;
++	uint64_t psize = lsize;
++	int pass = 1;
++
++	/*
++	 * If our children haven't all reached the ready stage,
++	 * wait for them and then repeat this pipeline stage.
++	 */
++	if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
++	    zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY))
++		return (ZIO_PIPELINE_STOP);
++
++	if (!IO_IS_ALLOCATING(zio))
++		return (ZIO_PIPELINE_CONTINUE);
++
++	ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
++
++	if (zio->io_bp_override) {
++		ASSERT(bp->blk_birth != zio->io_txg);
++		ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0);
++
++		*bp = *zio->io_bp_override;
++		zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
++
++		if (BP_IS_HOLE(bp) || !zp->zp_dedup)
++			return (ZIO_PIPELINE_CONTINUE);
++
++		ASSERT(zio_checksum_table[zp->zp_checksum].ci_dedup ||
++		    zp->zp_dedup_verify);
++
++		if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) {
++			BP_SET_DEDUP(bp, 1);
++			zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
++			return (ZIO_PIPELINE_CONTINUE);
++		}
++		zio->io_bp_override = NULL;
++		BP_ZERO(bp);
++	}
++
++	if (bp->blk_birth == zio->io_txg) {
++		/*
++		 * We're rewriting an existing block, which means we're
++		 * working on behalf of spa_sync().  For spa_sync() to
++		 * converge, it must eventually be the case that we don't
++		 * have to allocate new blocks.  But compression changes
++		 * the blocksize, which forces a reallocate, and makes
++		 * convergence take longer.  Therefore, after the first
++		 * few passes, stop compressing to ensure convergence.
++		 */
++		pass = spa_sync_pass(spa);
++
++		ASSERT(zio->io_txg == spa_syncing_txg(spa));
++		ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
++		ASSERT(!BP_GET_DEDUP(bp));
++
++		if (pass > SYNC_PASS_DONT_COMPRESS)
++			compress = ZIO_COMPRESS_OFF;
++
++		/* Make sure someone doesn't change their mind on overwrites */
++		ASSERT(MIN(zp->zp_copies + BP_IS_GANG(bp),
++		    spa_max_replication(spa)) == BP_GET_NDVAS(bp));
++	}
++
++	if (compress != ZIO_COMPRESS_OFF) {
++		void *cbuf = zio_buf_alloc(lsize);
++		psize = zio_compress_data(compress, zio->io_data, cbuf, lsize);
++		if (psize == 0 || psize == lsize) {
++			compress = ZIO_COMPRESS_OFF;
++			zio_buf_free(cbuf, lsize);
++		} else {
++			ASSERT(psize < lsize);
++			zio_push_transform(zio, cbuf, psize, lsize, NULL);
++		}
++	}
++
++	/*
++	 * The final pass of spa_sync() must be all rewrites, but the first
++	 * few passes offer a trade-off: allocating blocks defers convergence,
++	 * but newly allocated blocks are sequential, so they can be written
++	 * to disk faster.  Therefore, we allow the first few passes of
++	 * spa_sync() to allocate new blocks, but force rewrites after that.
++	 * There should only be a handful of blocks after pass 1 in any case.
++	 */
++	if (bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == psize &&
++	    pass > SYNC_PASS_REWRITE) {
++		enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES;
++		ASSERT(psize != 0);
++		zio->io_pipeline = ZIO_REWRITE_PIPELINE | gang_stages;
++		zio->io_flags |= ZIO_FLAG_IO_REWRITE;
++	} else {
++		BP_ZERO(bp);
++		zio->io_pipeline = ZIO_WRITE_PIPELINE;
++	}
++
++	if (psize == 0) {
++		zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
++	} else {
++		ASSERT(zp->zp_checksum != ZIO_CHECKSUM_GANG_HEADER);
++		BP_SET_LSIZE(bp, lsize);
++		BP_SET_PSIZE(bp, psize);
++		BP_SET_COMPRESS(bp, compress);
++		BP_SET_CHECKSUM(bp, zp->zp_checksum);
++		BP_SET_TYPE(bp, zp->zp_type);
++		BP_SET_LEVEL(bp, zp->zp_level);
++		BP_SET_DEDUP(bp, zp->zp_dedup);
++		BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
++		if (zp->zp_dedup) {
++			ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
++			ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
++			zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE;
++		}
++	}
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++static int
++zio_free_bp_init(zio_t *zio)
++{
++	blkptr_t *bp = zio->io_bp;
++
++	if (zio->io_child_type == ZIO_CHILD_LOGICAL) {
++		if (BP_GET_DEDUP(bp))
++			zio->io_pipeline = ZIO_DDT_FREE_PIPELINE;
++	}
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++/*
++ * ==========================================================================
++ * Execute the I/O pipeline
++ * ==========================================================================
++ */
++
++static void
++zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
++{
++	spa_t *spa = zio->io_spa;
++	zio_type_t t = zio->io_type;
++	int flags = (cutinline ? TQ_FRONT : 0);
++
++	/*
++	 * If we're a config writer or a probe, the normal issue and
++	 * interrupt threads may all be blocked waiting for the config lock.
++	 * In this case, select the otherwise-unused taskq for ZIO_TYPE_NULL.
++	 */
++	if (zio->io_flags & (ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_PROBE))
++		t = ZIO_TYPE_NULL;
++
++	/*
++	 * A similar issue exists for the L2ARC write thread until L2ARC 2.0.
++	 */
++	if (t == ZIO_TYPE_WRITE && zio->io_vd && zio->io_vd->vdev_aux)
++		t = ZIO_TYPE_NULL;
++
++	/*
++	 * If this is a high priority I/O, then use the high priority taskq.
++	 */
++	if (zio->io_priority == ZIO_PRIORITY_NOW &&
++	    spa->spa_zio_taskq[t][q + 1] != NULL)
++		q++;
++
++	ASSERT3U(q, <, ZIO_TASKQ_TYPES);
++
++	/*
++	 * NB: We are assuming that the zio can only be dispatched
++	 * to a single taskq at a time.  It would be a grievous error
++	 * to dispatch the zio to another taskq at the same time.
++	 */
++	ASSERT(taskq_empty_ent(&zio->io_tqent));
++	taskq_dispatch_ent(spa->spa_zio_taskq[t][q],
++	    (task_func_t *)zio_execute, zio, flags, &zio->io_tqent);
++}
++
++static boolean_t
++zio_taskq_member(zio_t *zio, enum zio_taskq_type q)
++{
++	kthread_t *executor = zio->io_executor;
++	spa_t *spa = zio->io_spa;
++	zio_type_t t;
++
++	for (t = 0; t < ZIO_TYPES; t++)
++		if (taskq_member(spa->spa_zio_taskq[t][q], executor))
++			return (B_TRUE);
++
++	return (B_FALSE);
++}
++
++static int
++zio_issue_async(zio_t *zio)
++{
++	zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_FALSE);
++
++	return (ZIO_PIPELINE_STOP);
++}
++
++void
++zio_interrupt(zio_t *zio)
++{
++	zio_taskq_dispatch(zio, ZIO_TASKQ_INTERRUPT, B_FALSE);
++}
++
++/*
++ * Execute the I/O pipeline until one of the following occurs:
++ * (1) the I/O completes; (2) the pipeline stalls waiting for
++ * dependent child I/Os; (3) the I/O issues, so we're waiting
++ * for an I/O completion interrupt; (4) the I/O is delegated by
++ * vdev-level caching or aggregation; (5) the I/O is deferred
++ * due to vdev-level queueing; (6) the I/O is handed off to
++ * another thread.  In all cases, the pipeline stops whenever
++ * there's no CPU work; it never burns a thread in cv_wait().
++ *
++ * There's no locking on io_stage because there's no legitimate way
++ * for multiple threads to be attempting to process the same I/O.
++ */
++static zio_pipe_stage_t *zio_pipeline[];
++
++/*
++ * zio_execute() is a wrapper around the static function
++ * __zio_execute() so that we can force  __zio_execute() to be
++ * inlined.  This reduces stack overhead which is important
++ * because __zio_execute() is called recursively in several zio
++ * code paths.  zio_execute() itself cannot be inlined because
++ * it is externally visible.
++ */
++void
++zio_execute(zio_t *zio)
++{
++	__zio_execute(zio);
++}
++
++__attribute__((always_inline))
++static inline void
++__zio_execute(zio_t *zio)
++{
++	zio->io_executor = curthread;
++
++	while (zio->io_stage < ZIO_STAGE_DONE) {
++		enum zio_stage pipeline = zio->io_pipeline;
++		enum zio_stage stage = zio->io_stage;
++		dsl_pool_t *dsl;
++		boolean_t cut;
++		int rv;
++
++		ASSERT(!MUTEX_HELD(&zio->io_lock));
++		ASSERT(ISP2(stage));
++		ASSERT(zio->io_stall == NULL);
++
++		do {
++			stage <<= 1;
++		} while ((stage & pipeline) == 0);
++
++		ASSERT(stage <= ZIO_STAGE_DONE);
++
++		dsl = spa_get_dsl(zio->io_spa);
++		cut = (stage == ZIO_STAGE_VDEV_IO_START) ?
++		    zio_requeue_io_start_cut_in_line : B_FALSE;
++
++		/*
++		 * If we are in interrupt context and this pipeline stage
++		 * will grab a config lock that is held across I/O,
++		 * or may wait for an I/O that needs an interrupt thread
++		 * to complete, issue async to avoid deadlock.
++		 *
++		 * If we are in the txg_sync_thread or being called
++		 * during pool init issue async to minimize stack depth.
++		 * Both of these call paths may be recursively called.
++		 *
++		 * For VDEV_IO_START, we cut in line so that the io will
++		 * be sent to disk promptly.
++		 */
++		if (((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL &&
++		    zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) ||
++		    (dsl != NULL && dsl_pool_sync_context(dsl))) {
++			zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut);
++			return;
++		}
++
++		zio->io_stage = stage;
++		rv = zio_pipeline[highbit(stage) - 1](zio);
++
++		if (rv == ZIO_PIPELINE_STOP)
++			return;
++
++		ASSERT(rv == ZIO_PIPELINE_CONTINUE);
++	}
++}
++
++
++/*
++ * ==========================================================================
++ * Initiate I/O, either sync or async
++ * ==========================================================================
++ */
++int
++zio_wait(zio_t *zio)
++{
++	uint64_t timeout;
++	int error;
++
++	ASSERT(zio->io_stage == ZIO_STAGE_OPEN);
++	ASSERT(zio->io_executor == NULL);
++
++	zio->io_waiter = curthread;
++	timeout = ddi_get_lbolt() + (zio_delay_max / MILLISEC * hz);
++
++	__zio_execute(zio);
++
++	mutex_enter(&zio->io_lock);
++	while (zio->io_executor != NULL) {
++		/*
++		 * Wake up periodically to prevent the kernel from complaining
++		 * about a blocked task.  However, check zio_delay_max to see
++		 * if the I/O has exceeded the timeout and post an ereport.
++		 */
++		cv_timedwait_interruptible(&zio->io_cv, &zio->io_lock,
++		    ddi_get_lbolt() + hz);
++
++		if (timeout && (ddi_get_lbolt() > timeout)) {
++			zio->io_delay = zio_delay_max;
++			zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
++			    zio->io_spa, zio->io_vd, zio, 0, 0);
++			timeout = 0;
++		}
++	}
++	mutex_exit(&zio->io_lock);
++
++	error = zio->io_error;
++	zio_destroy(zio);
++
++	return (error);
++}
++
++void
++zio_nowait(zio_t *zio)
++{
++	ASSERT(zio->io_executor == NULL);
++
++	if (zio->io_child_type == ZIO_CHILD_LOGICAL &&
++	    zio_unique_parent(zio) == NULL) {
++		/*
++		 * This is a logical async I/O with no parent to wait for it.
++		 * We add it to the spa_async_root_zio "Godfather" I/O which
++		 * will ensure they complete prior to unloading the pool.
++		 */
++		spa_t *spa = zio->io_spa;
++
++		zio_add_child(spa->spa_async_zio_root, zio);
++	}
++
++	__zio_execute(zio);
++}
++
++/*
++ * ==========================================================================
++ * Reexecute or suspend/resume failed I/O
++ * ==========================================================================
++ */
++
++static void
++zio_reexecute(zio_t *pio)
++{
++	zio_t *cio, *cio_next;
++	int c, w;
++
++	ASSERT(pio->io_child_type == ZIO_CHILD_LOGICAL);
++	ASSERT(pio->io_orig_stage == ZIO_STAGE_OPEN);
++	ASSERT(pio->io_gang_leader == NULL);
++	ASSERT(pio->io_gang_tree == NULL);
++
++	pio->io_flags = pio->io_orig_flags;
++	pio->io_stage = pio->io_orig_stage;
++	pio->io_pipeline = pio->io_orig_pipeline;
++	pio->io_reexecute = 0;
++	pio->io_error = 0;
++	for (w = 0; w < ZIO_WAIT_TYPES; w++)
++		pio->io_state[w] = 0;
++	for (c = 0; c < ZIO_CHILD_TYPES; c++)
++		pio->io_child_error[c] = 0;
++
++	if (IO_IS_ALLOCATING(pio))
++		BP_ZERO(pio->io_bp);
++
++	/*
++	 * As we reexecute pio's children, new children could be created.
++	 * New children go to the head of pio's io_child_list, however,
++	 * so we will (correctly) not reexecute them.  The key is that
++	 * the remainder of pio's io_child_list, from 'cio_next' onward,
++	 * cannot be affected by any side effects of reexecuting 'cio'.
++	 */
++	for (cio = zio_walk_children(pio); cio != NULL; cio = cio_next) {
++		cio_next = zio_walk_children(pio);
++		mutex_enter(&pio->io_lock);
++		for (w = 0; w < ZIO_WAIT_TYPES; w++)
++			pio->io_children[cio->io_child_type][w]++;
++		mutex_exit(&pio->io_lock);
++		zio_reexecute(cio);
++	}
++
++	/*
++	 * Now that all children have been reexecuted, execute the parent.
++	 * We don't reexecute "The Godfather" I/O here as it's the
++	 * responsibility of the caller to wait on him.
++	 */
++	if (!(pio->io_flags & ZIO_FLAG_GODFATHER))
++		__zio_execute(pio);
++}
++
++void
++zio_suspend(spa_t *spa, zio_t *zio)
++{
++	if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC)
++		fm_panic("Pool '%s' has encountered an uncorrectable I/O "
++		    "failure and the failure mode property for this pool "
++		    "is set to panic.", spa_name(spa));
++
++	zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, NULL, 0, 0);
++
++	mutex_enter(&spa->spa_suspend_lock);
++
++	if (spa->spa_suspend_zio_root == NULL)
++		spa->spa_suspend_zio_root = zio_root(spa, NULL, NULL,
++		    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
++		    ZIO_FLAG_GODFATHER);
++
++	spa->spa_suspended = B_TRUE;
++
++	if (zio != NULL) {
++		ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER));
++		ASSERT(zio != spa->spa_suspend_zio_root);
++		ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
++		ASSERT(zio_unique_parent(zio) == NULL);
++		ASSERT(zio->io_stage == ZIO_STAGE_DONE);
++		zio_add_child(spa->spa_suspend_zio_root, zio);
++	}
++
++	mutex_exit(&spa->spa_suspend_lock);
++}
++
++int
++zio_resume(spa_t *spa)
++{
++	zio_t *pio;
++
++	/*
++	 * Reexecute all previously suspended i/o.
++	 */
++	mutex_enter(&spa->spa_suspend_lock);
++	spa->spa_suspended = B_FALSE;
++	cv_broadcast(&spa->spa_suspend_cv);
++	pio = spa->spa_suspend_zio_root;
++	spa->spa_suspend_zio_root = NULL;
++	mutex_exit(&spa->spa_suspend_lock);
++
++	if (pio == NULL)
++		return (0);
++
++	zio_reexecute(pio);
++	return (zio_wait(pio));
++}
++
++void
++zio_resume_wait(spa_t *spa)
++{
++	mutex_enter(&spa->spa_suspend_lock);
++	while (spa_suspended(spa))
++		cv_wait(&spa->spa_suspend_cv, &spa->spa_suspend_lock);
++	mutex_exit(&spa->spa_suspend_lock);
++}
++
++/*
++ * ==========================================================================
++ * Gang blocks.
++ *
++ * A gang block is a collection of small blocks that looks to the DMU
++ * like one large block.  When zio_dva_allocate() cannot find a block
++ * of the requested size, due to either severe fragmentation or the pool
++ * being nearly full, it calls zio_write_gang_block() to construct the
++ * block from smaller fragments.
++ *
++ * A gang block consists of a gang header (zio_gbh_phys_t) and up to
++ * three (SPA_GBH_NBLKPTRS) gang members.  The gang header is just like
++ * an indirect block: it's an array of block pointers.  It consumes
++ * only one sector and hence is allocatable regardless of fragmentation.
++ * The gang header's bps point to its gang members, which hold the data.
++ *
++ * Gang blocks are self-checksumming, using the bp's <vdev, offset, txg>
++ * as the verifier to ensure uniqueness of the SHA256 checksum.
++ * Critically, the gang block bp's blk_cksum is the checksum of the data,
++ * not the gang header.  This ensures that data block signatures (needed for
++ * deduplication) are independent of how the block is physically stored.
++ *
++ * Gang blocks can be nested: a gang member may itself be a gang block.
++ * Thus every gang block is a tree in which root and all interior nodes are
++ * gang headers, and the leaves are normal blocks that contain user data.
++ * The root of the gang tree is called the gang leader.
++ *
++ * To perform any operation (read, rewrite, free, claim) on a gang block,
++ * zio_gang_assemble() first assembles the gang tree (minus data leaves)
++ * in the io_gang_tree field of the original logical i/o by recursively
++ * reading the gang leader and all gang headers below it.  This yields
++ * an in-core tree containing the contents of every gang header and the
++ * bps for every constituent of the gang block.
++ *
++ * With the gang tree now assembled, zio_gang_issue() just walks the gang tree
++ * and invokes a callback on each bp.  To free a gang block, zio_gang_issue()
++ * calls zio_free_gang() -- a trivial wrapper around zio_free() -- for each bp.
++ * zio_claim_gang() provides a similarly trivial wrapper for zio_claim().
++ * zio_read_gang() is a wrapper around zio_read() that omits reading gang
++ * headers, since we already have those in io_gang_tree.  zio_rewrite_gang()
++ * performs a zio_rewrite() of the data or, for gang headers, a zio_rewrite()
++ * of the gang header plus zio_checksum_compute() of the data to update the
++ * gang header's blk_cksum as described above.
++ *
++ * The two-phase assemble/issue model solves the problem of partial failure --
++ * what if you'd freed part of a gang block but then couldn't read the
++ * gang header for another part?  Assembling the entire gang tree first
++ * ensures that all the necessary gang header I/O has succeeded before
++ * starting the actual work of free, claim, or write.  Once the gang tree
++ * is assembled, free and claim are in-memory operations that cannot fail.
++ *
++ * In the event that a gang write fails, zio_dva_unallocate() walks the
++ * gang tree to immediately free (i.e. insert back into the space map)
++ * everything we've allocated.  This ensures that we don't get ENOSPC
++ * errors during repeated suspend/resume cycles due to a flaky device.
++ *
++ * Gang rewrites only happen during sync-to-convergence.  If we can't assemble
++ * the gang tree, we won't modify the block, so we can safely defer the free
++ * (knowing that the block is still intact).  If we *can* assemble the gang
++ * tree, then even if some of the rewrites fail, zio_dva_unallocate() will free
++ * each constituent bp and we can allocate a new block on the next sync pass.
++ *
++ * In all cases, the gang tree allows complete recovery from partial failure.
++ * ==========================================================================
++ */
++
++static zio_t *
++zio_read_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data)
++{
++	if (gn != NULL)
++		return (pio);
++
++	return (zio_read(pio, pio->io_spa, bp, data, BP_GET_PSIZE(bp),
++	    NULL, NULL, pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
++	    &pio->io_bookmark));
++}
++
++zio_t *
++zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data)
++{
++	zio_t *zio;
++
++	if (gn != NULL) {
++		zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp,
++		    gn->gn_gbh, SPA_GANGBLOCKSIZE, NULL, NULL, pio->io_priority,
++		    ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
++		/*
++		 * As we rewrite each gang header, the pipeline will compute
++		 * a new gang block header checksum for it; but no one will
++		 * compute a new data checksum, so we do that here.  The one
++		 * exception is the gang leader: the pipeline already computed
++		 * its data checksum because that stage precedes gang assembly.
++		 * (Presently, nothing actually uses interior data checksums;
++		 * this is just good hygiene.)
++		 */
++		if (gn != pio->io_gang_leader->io_gang_tree) {
++			zio_checksum_compute(zio, BP_GET_CHECKSUM(bp),
++			    data, BP_GET_PSIZE(bp));
++		}
++		/*
++		 * If we are here to damage data for testing purposes,
++		 * leave the GBH alone so that we can detect the damage.
++		 */
++		if (pio->io_gang_leader->io_flags & ZIO_FLAG_INDUCE_DAMAGE)
++			zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
++	} else {
++		zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp,
++		    data, BP_GET_PSIZE(bp), NULL, NULL, pio->io_priority,
++		    ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
++	}
++
++	return (zio);
++}
++
++/* ARGSUSED */
++zio_t *
++zio_free_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data)
++{
++	return (zio_free_sync(pio, pio->io_spa, pio->io_txg, bp,
++	    ZIO_GANG_CHILD_FLAGS(pio)));
++}
++
++/* ARGSUSED */
++zio_t *
++zio_claim_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data)
++{
++	return (zio_claim(pio, pio->io_spa, pio->io_txg, bp,
++	    NULL, NULL, ZIO_GANG_CHILD_FLAGS(pio)));
++}
++
++static zio_gang_issue_func_t *zio_gang_issue_func[ZIO_TYPES] = {
++	NULL,
++	zio_read_gang,
++	zio_rewrite_gang,
++	zio_free_gang,
++	zio_claim_gang,
++	NULL
++};
++
++static void zio_gang_tree_assemble_done(zio_t *zio);
++
++static zio_gang_node_t *
++zio_gang_node_alloc(zio_gang_node_t **gnpp)
++{
++	zio_gang_node_t *gn;
++
++	ASSERT(*gnpp == NULL);
++
++	gn = kmem_zalloc(sizeof (*gn), KM_PUSHPAGE);
++	gn->gn_gbh = zio_buf_alloc(SPA_GANGBLOCKSIZE);
++	*gnpp = gn;
++
++	return (gn);
++}
++
++static void
++zio_gang_node_free(zio_gang_node_t **gnpp)
++{
++	zio_gang_node_t *gn = *gnpp;
++	int g;
++
++	for (g = 0; g < SPA_GBH_NBLKPTRS; g++)
++		ASSERT(gn->gn_child[g] == NULL);
++
++	zio_buf_free(gn->gn_gbh, SPA_GANGBLOCKSIZE);
++	kmem_free(gn, sizeof (*gn));
++	*gnpp = NULL;
++}
++
++static void
++zio_gang_tree_free(zio_gang_node_t **gnpp)
++{
++	zio_gang_node_t *gn = *gnpp;
++	int g;
++
++	if (gn == NULL)
++		return;
++
++	for (g = 0; g < SPA_GBH_NBLKPTRS; g++)
++		zio_gang_tree_free(&gn->gn_child[g]);
++
++	zio_gang_node_free(gnpp);
++}
++
++static void
++zio_gang_tree_assemble(zio_t *gio, blkptr_t *bp, zio_gang_node_t **gnpp)
++{
++	zio_gang_node_t *gn = zio_gang_node_alloc(gnpp);
++
++	ASSERT(gio->io_gang_leader == gio);
++	ASSERT(BP_IS_GANG(bp));
++
++	zio_nowait(zio_read(gio, gio->io_spa, bp, gn->gn_gbh,
++	    SPA_GANGBLOCKSIZE, zio_gang_tree_assemble_done, gn,
++	    gio->io_priority, ZIO_GANG_CHILD_FLAGS(gio), &gio->io_bookmark));
++}
++
++static void
++zio_gang_tree_assemble_done(zio_t *zio)
++{
++	zio_t *gio = zio->io_gang_leader;
++	zio_gang_node_t *gn = zio->io_private;
++	blkptr_t *bp = zio->io_bp;
++	int g;
++
++	ASSERT(gio == zio_unique_parent(zio));
++	ASSERT(zio->io_child_count == 0);
++
++	if (zio->io_error)
++		return;
++
++	if (BP_SHOULD_BYTESWAP(bp))
++		byteswap_uint64_array(zio->io_data, zio->io_size);
++
++	ASSERT(zio->io_data == gn->gn_gbh);
++	ASSERT(zio->io_size == SPA_GANGBLOCKSIZE);
++	ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC);
++
++	for (g = 0; g < SPA_GBH_NBLKPTRS; g++) {
++		blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g];
++		if (!BP_IS_GANG(gbp))
++			continue;
++		zio_gang_tree_assemble(gio, gbp, &gn->gn_child[g]);
++	}
++}
++
++static void
++zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, void *data)
++{
++	zio_t *gio = pio->io_gang_leader;
++	zio_t *zio;
++	int g;
++
++	ASSERT(BP_IS_GANG(bp) == !!gn);
++	ASSERT(BP_GET_CHECKSUM(bp) == BP_GET_CHECKSUM(gio->io_bp));
++	ASSERT(BP_GET_LSIZE(bp) == BP_GET_PSIZE(bp) || gn == gio->io_gang_tree);
++
++	/*
++	 * If you're a gang header, your data is in gn->gn_gbh.
++	 * If you're a gang member, your data is in 'data' and gn == NULL.
++	 */
++	zio = zio_gang_issue_func[gio->io_type](pio, bp, gn, data);
++
++	if (gn != NULL) {
++		ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC);
++
++		for (g = 0; g < SPA_GBH_NBLKPTRS; g++) {
++			blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g];
++			if (BP_IS_HOLE(gbp))
++				continue;
++			zio_gang_tree_issue(zio, gn->gn_child[g], gbp, data);
++			data = (char *)data + BP_GET_PSIZE(gbp);
++		}
++	}
++
++	if (gn == gio->io_gang_tree)
++		ASSERT3P((char *)gio->io_data + gio->io_size, ==, data);
++
++	if (zio != pio)
++		zio_nowait(zio);
++}
++
++static int
++zio_gang_assemble(zio_t *zio)
++{
++	blkptr_t *bp = zio->io_bp;
++
++	ASSERT(BP_IS_GANG(bp) && zio->io_gang_leader == NULL);
++	ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
++
++	zio->io_gang_leader = zio;
++
++	zio_gang_tree_assemble(zio, bp, &zio->io_gang_tree);
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++static int
++zio_gang_issue(zio_t *zio)
++{
++	blkptr_t *bp = zio->io_bp;
++
++	if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE))
++		return (ZIO_PIPELINE_STOP);
++
++	ASSERT(BP_IS_GANG(bp) && zio->io_gang_leader == zio);
++	ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
++
++	if (zio->io_child_error[ZIO_CHILD_GANG] == 0)
++		zio_gang_tree_issue(zio, zio->io_gang_tree, bp, zio->io_data);
++	else
++		zio_gang_tree_free(&zio->io_gang_tree);
++
++	zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++static void
++zio_write_gang_member_ready(zio_t *zio)
++{
++	zio_t *pio = zio_unique_parent(zio);
++	ASSERTV(zio_t *gio = zio->io_gang_leader;)
++	dva_t *cdva = zio->io_bp->blk_dva;
++	dva_t *pdva = pio->io_bp->blk_dva;
++	uint64_t asize;
++	int d;
++
++	if (BP_IS_HOLE(zio->io_bp))
++		return;
++
++	ASSERT(BP_IS_HOLE(&zio->io_bp_orig));
++
++	ASSERT(zio->io_child_type == ZIO_CHILD_GANG);
++	ASSERT3U(zio->io_prop.zp_copies, ==, gio->io_prop.zp_copies);
++	ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(zio->io_bp));
++	ASSERT3U(pio->io_prop.zp_copies, <=, BP_GET_NDVAS(pio->io_bp));
++	ASSERT3U(BP_GET_NDVAS(zio->io_bp), <=, BP_GET_NDVAS(pio->io_bp));
++
++	mutex_enter(&pio->io_lock);
++	for (d = 0; d < BP_GET_NDVAS(zio->io_bp); d++) {
++		ASSERT(DVA_GET_GANG(&pdva[d]));
++		asize = DVA_GET_ASIZE(&pdva[d]);
++		asize += DVA_GET_ASIZE(&cdva[d]);
++		DVA_SET_ASIZE(&pdva[d], asize);
++	}
++	mutex_exit(&pio->io_lock);
++}
++
++static int
++zio_write_gang_block(zio_t *pio)
++{
++	spa_t *spa = pio->io_spa;
++	blkptr_t *bp = pio->io_bp;
++	zio_t *gio = pio->io_gang_leader;
++	zio_t *zio;
++	zio_gang_node_t *gn, **gnpp;
++	zio_gbh_phys_t *gbh;
++	uint64_t txg = pio->io_txg;
++	uint64_t resid = pio->io_size;
++	uint64_t lsize;
++	int copies = gio->io_prop.zp_copies;
++	int gbh_copies = MIN(copies + 1, spa_max_replication(spa));
++	zio_prop_t zp;
++	int g, error;
++
++	error = metaslab_alloc(spa, spa_normal_class(spa), SPA_GANGBLOCKSIZE,
++	    bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp,
++	    METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER);
++	if (error) {
++		pio->io_error = error;
++		return (ZIO_PIPELINE_CONTINUE);
++	}
++
++	if (pio == gio) {
++		gnpp = &gio->io_gang_tree;
++	} else {
++		gnpp = pio->io_private;
++		ASSERT(pio->io_ready == zio_write_gang_member_ready);
++	}
++
++	gn = zio_gang_node_alloc(gnpp);
++	gbh = gn->gn_gbh;
++	bzero(gbh, SPA_GANGBLOCKSIZE);
++
++	/*
++	 * Create the gang header.
++	 */
++	zio = zio_rewrite(pio, spa, txg, bp, gbh, SPA_GANGBLOCKSIZE, NULL, NULL,
++	    pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
++
++	/*
++	 * Create and nowait the gang children.
++	 */
++	for (g = 0; resid != 0; resid -= lsize, g++) {
++		lsize = P2ROUNDUP(resid / (SPA_GBH_NBLKPTRS - g),
++		    SPA_MINBLOCKSIZE);
++		ASSERT(lsize >= SPA_MINBLOCKSIZE && lsize <= resid);
++
++		zp.zp_checksum = gio->io_prop.zp_checksum;
++		zp.zp_compress = ZIO_COMPRESS_OFF;
++		zp.zp_type = DMU_OT_NONE;
++		zp.zp_level = 0;
++		zp.zp_copies = gio->io_prop.zp_copies;
++		zp.zp_dedup = 0;
++		zp.zp_dedup_verify = 0;
++
++		zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
++		    (char *)pio->io_data + (pio->io_size - resid), lsize, &zp,
++		    zio_write_gang_member_ready, NULL, &gn->gn_child[g],
++		    pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
++		    &pio->io_bookmark));
++	}
++
++	/*
++	 * Set pio's pipeline to just wait for zio to finish.
++	 */
++	pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
++
++	/*
++	 * We didn't allocate this bp, so make sure it doesn't get unmarked.
++	 */
++	pio->io_flags &= ~ZIO_FLAG_FASTWRITE;
++
++	zio_nowait(zio);
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++/*
++ * ==========================================================================
++ * Dedup
++ * ==========================================================================
++ */
++static void
++zio_ddt_child_read_done(zio_t *zio)
++{
++	blkptr_t *bp = zio->io_bp;
++	ddt_entry_t *dde = zio->io_private;
++	ddt_phys_t *ddp;
++	zio_t *pio = zio_unique_parent(zio);
++
++	mutex_enter(&pio->io_lock);
++	ddp = ddt_phys_select(dde, bp);
++	if (zio->io_error == 0)
++		ddt_phys_clear(ddp);	/* this ddp doesn't need repair */
++	if (zio->io_error == 0 && dde->dde_repair_data == NULL)
++		dde->dde_repair_data = zio->io_data;
++	else
++		zio_buf_free(zio->io_data, zio->io_size);
++	mutex_exit(&pio->io_lock);
++}
++
++static int
++zio_ddt_read_start(zio_t *zio)
++{
++	blkptr_t *bp = zio->io_bp;
++	int p;
++
++	ASSERT(BP_GET_DEDUP(bp));
++	ASSERT(BP_GET_PSIZE(bp) == zio->io_size);
++	ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
++
++	if (zio->io_child_error[ZIO_CHILD_DDT]) {
++		ddt_t *ddt = ddt_select(zio->io_spa, bp);
++		ddt_entry_t *dde = ddt_repair_start(ddt, bp);
++		ddt_phys_t *ddp = dde->dde_phys;
++		ddt_phys_t *ddp_self = ddt_phys_select(dde, bp);
++		blkptr_t blk;
++
++		ASSERT(zio->io_vsd == NULL);
++		zio->io_vsd = dde;
++
++		if (ddp_self == NULL)
++			return (ZIO_PIPELINE_CONTINUE);
++
++		for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
++			if (ddp->ddp_phys_birth == 0 || ddp == ddp_self)
++				continue;
++			ddt_bp_create(ddt->ddt_checksum, &dde->dde_key, ddp,
++			    &blk);
++			zio_nowait(zio_read(zio, zio->io_spa, &blk,
++			    zio_buf_alloc(zio->io_size), zio->io_size,
++			    zio_ddt_child_read_done, dde, zio->io_priority,
++			    ZIO_DDT_CHILD_FLAGS(zio) | ZIO_FLAG_DONT_PROPAGATE,
++			    &zio->io_bookmark));
++		}
++		return (ZIO_PIPELINE_CONTINUE);
++	}
++
++	zio_nowait(zio_read(zio, zio->io_spa, bp,
++	    zio->io_data, zio->io_size, NULL, NULL, zio->io_priority,
++	    ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark));
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++static int
++zio_ddt_read_done(zio_t *zio)
++{
++	blkptr_t *bp = zio->io_bp;
++
++	if (zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_DONE))
++		return (ZIO_PIPELINE_STOP);
++
++	ASSERT(BP_GET_DEDUP(bp));
++	ASSERT(BP_GET_PSIZE(bp) == zio->io_size);
++	ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
++
++	if (zio->io_child_error[ZIO_CHILD_DDT]) {
++		ddt_t *ddt = ddt_select(zio->io_spa, bp);
++		ddt_entry_t *dde = zio->io_vsd;
++		if (ddt == NULL) {
++			ASSERT(spa_load_state(zio->io_spa) != SPA_LOAD_NONE);
++			return (ZIO_PIPELINE_CONTINUE);
++		}
++		if (dde == NULL) {
++			zio->io_stage = ZIO_STAGE_DDT_READ_START >> 1;
++			zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_FALSE);
++			return (ZIO_PIPELINE_STOP);
++		}
++		if (dde->dde_repair_data != NULL) {
++			bcopy(dde->dde_repair_data, zio->io_data, zio->io_size);
++			zio->io_child_error[ZIO_CHILD_DDT] = 0;
++		}
++		ddt_repair_done(ddt, dde);
++		zio->io_vsd = NULL;
++	}
++
++	ASSERT(zio->io_vsd == NULL);
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++static boolean_t
++zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
++{
++	spa_t *spa = zio->io_spa;
++	int p;
++
++	/*
++	 * Note: we compare the original data, not the transformed data,
++	 * because when zio->io_bp is an override bp, we will not have
++	 * pushed the I/O transforms.  That's an important optimization
++	 * because otherwise we'd compress/encrypt all dmu_sync() data twice.
++	 */
++	for (p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
++		zio_t *lio = dde->dde_lead_zio[p];
++
++		if (lio != NULL) {
++			return (lio->io_orig_size != zio->io_orig_size ||
++			    bcmp(zio->io_orig_data, lio->io_orig_data,
++			    zio->io_orig_size) != 0);
++		}
++	}
++
++	for (p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
++		ddt_phys_t *ddp = &dde->dde_phys[p];
++
++		if (ddp->ddp_phys_birth != 0) {
++			arc_buf_t *abuf = NULL;
++			uint32_t aflags = ARC_WAIT;
++			blkptr_t blk = *zio->io_bp;
++			int error;
++
++			ddt_bp_fill(ddp, &blk, ddp->ddp_phys_birth);
++
++			ddt_exit(ddt);
++
++			error = arc_read_nolock(NULL, spa, &blk,
++			    arc_getbuf_func, &abuf, ZIO_PRIORITY_SYNC_READ,
++			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
++			    &aflags, &zio->io_bookmark);
++
++			if (error == 0) {
++				if (arc_buf_size(abuf) != zio->io_orig_size ||
++				    bcmp(abuf->b_data, zio->io_orig_data,
++				    zio->io_orig_size) != 0)
++					error = EEXIST;
++				VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
++			}
++
++			ddt_enter(ddt);
++			return (error != 0);
++		}
++	}
++
++	return (B_FALSE);
++}
++
++static void
++zio_ddt_child_write_ready(zio_t *zio)
++{
++	int p = zio->io_prop.zp_copies;
++	ddt_t *ddt = ddt_select(zio->io_spa, zio->io_bp);
++	ddt_entry_t *dde = zio->io_private;
++	ddt_phys_t *ddp = &dde->dde_phys[p];
++	zio_t *pio;
++
++	if (zio->io_error)
++		return;
++
++	ddt_enter(ddt);
++
++	ASSERT(dde->dde_lead_zio[p] == zio);
++
++	ddt_phys_fill(ddp, zio->io_bp);
++
++	while ((pio = zio_walk_parents(zio)) != NULL)
++		ddt_bp_fill(ddp, pio->io_bp, zio->io_txg);
++
++	ddt_exit(ddt);
++}
++
++static void
++zio_ddt_child_write_done(zio_t *zio)
++{
++	int p = zio->io_prop.zp_copies;
++	ddt_t *ddt = ddt_select(zio->io_spa, zio->io_bp);
++	ddt_entry_t *dde = zio->io_private;
++	ddt_phys_t *ddp = &dde->dde_phys[p];
++
++	ddt_enter(ddt);
++
++	ASSERT(ddp->ddp_refcnt == 0);
++	ASSERT(dde->dde_lead_zio[p] == zio);
++	dde->dde_lead_zio[p] = NULL;
++
++	if (zio->io_error == 0) {
++		while (zio_walk_parents(zio) != NULL)
++			ddt_phys_addref(ddp);
++	} else {
++		ddt_phys_clear(ddp);
++	}
++
++	ddt_exit(ddt);
++}
++
++static void
++zio_ddt_ditto_write_done(zio_t *zio)
++{
++	int p = DDT_PHYS_DITTO;
++	blkptr_t *bp = zio->io_bp;
++	ddt_t *ddt = ddt_select(zio->io_spa, bp);
++	ddt_entry_t *dde = zio->io_private;
++	ddt_phys_t *ddp = &dde->dde_phys[p];
++	ddt_key_t *ddk = &dde->dde_key;
++	ASSERTV(zio_prop_t *zp = &zio->io_prop);
++
++	ddt_enter(ddt);
++
++	ASSERT(ddp->ddp_refcnt == 0);
++	ASSERT(dde->dde_lead_zio[p] == zio);
++	dde->dde_lead_zio[p] = NULL;
++
++	if (zio->io_error == 0) {
++		ASSERT(ZIO_CHECKSUM_EQUAL(bp->blk_cksum, ddk->ddk_cksum));
++		ASSERT(zp->zp_copies < SPA_DVAS_PER_BP);
++		ASSERT(zp->zp_copies == BP_GET_NDVAS(bp) - BP_IS_GANG(bp));
++		if (ddp->ddp_phys_birth != 0)
++			ddt_phys_free(ddt, ddk, ddp, zio->io_txg);
++		ddt_phys_fill(ddp, bp);
++	}
++
++	ddt_exit(ddt);
++}
++
++static int
++zio_ddt_write(zio_t *zio)
++{
++	spa_t *spa = zio->io_spa;
++	blkptr_t *bp = zio->io_bp;
++	uint64_t txg = zio->io_txg;
++	zio_prop_t *zp = &zio->io_prop;
++	int p = zp->zp_copies;
++	int ditto_copies;
++	zio_t *cio = NULL;
++	zio_t *dio = NULL;
++	ddt_t *ddt = ddt_select(spa, bp);
++	ddt_entry_t *dde;
++	ddt_phys_t *ddp;
++
++	ASSERT(BP_GET_DEDUP(bp));
++	ASSERT(BP_GET_CHECKSUM(bp) == zp->zp_checksum);
++	ASSERT(BP_IS_HOLE(bp) || zio->io_bp_override);
++
++	ddt_enter(ddt);
++	dde = ddt_lookup(ddt, bp, B_TRUE);
++	ddp = &dde->dde_phys[p];
++
++	if (zp->zp_dedup_verify && zio_ddt_collision(zio, ddt, dde)) {
++		/*
++		 * If we're using a weak checksum, upgrade to a strong checksum
++		 * and try again.  If we're already using a strong checksum,
++		 * we can't resolve it, so just convert to an ordinary write.
++		 * (And automatically e-mail a paper to Nature?)
++		 */
++		if (!zio_checksum_table[zp->zp_checksum].ci_dedup) {
++			zp->zp_checksum = spa_dedup_checksum(spa);
++			zio_pop_transforms(zio);
++			zio->io_stage = ZIO_STAGE_OPEN;
++			BP_ZERO(bp);
++		} else {
++			zp->zp_dedup = 0;
++		}
++		zio->io_pipeline = ZIO_WRITE_PIPELINE;
++		ddt_exit(ddt);
++		return (ZIO_PIPELINE_CONTINUE);
++	}
++
++	ditto_copies = ddt_ditto_copies_needed(ddt, dde, ddp);
++	ASSERT(ditto_copies < SPA_DVAS_PER_BP);
++
++	if (ditto_copies > ddt_ditto_copies_present(dde) &&
++	    dde->dde_lead_zio[DDT_PHYS_DITTO] == NULL) {
++		zio_prop_t czp = *zp;
++
++		czp.zp_copies = ditto_copies;
++
++		/*
++		 * If we arrived here with an override bp, we won't have run
++		 * the transform stack, so we won't have the data we need to
++		 * generate a child i/o.  So, toss the override bp and restart.
++		 * This is safe, because using the override bp is just an
++		 * optimization; and it's rare, so the cost doesn't matter.
++		 */
++		if (zio->io_bp_override) {
++			zio_pop_transforms(zio);
++			zio->io_stage = ZIO_STAGE_OPEN;
++			zio->io_pipeline = ZIO_WRITE_PIPELINE;
++			zio->io_bp_override = NULL;
++			BP_ZERO(bp);
++			ddt_exit(ddt);
++			return (ZIO_PIPELINE_CONTINUE);
++		}
++
++		dio = zio_write(zio, spa, txg, bp, zio->io_orig_data,
++		    zio->io_orig_size, &czp, NULL,
++		    zio_ddt_ditto_write_done, dde, zio->io_priority,
++		    ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);
++
++		zio_push_transform(dio, zio->io_data, zio->io_size, 0, NULL);
++		dde->dde_lead_zio[DDT_PHYS_DITTO] = dio;
++	}
++
++	if (ddp->ddp_phys_birth != 0 || dde->dde_lead_zio[p] != NULL) {
++		if (ddp->ddp_phys_birth != 0)
++			ddt_bp_fill(ddp, bp, txg);
++		if (dde->dde_lead_zio[p] != NULL)
++			zio_add_child(zio, dde->dde_lead_zio[p]);
++		else
++			ddt_phys_addref(ddp);
++	} else if (zio->io_bp_override) {
++		ASSERT(bp->blk_birth == txg);
++		ASSERT(BP_EQUAL(bp, zio->io_bp_override));
++		ddt_phys_fill(ddp, bp);
++		ddt_phys_addref(ddp);
++	} else {
++		cio = zio_write(zio, spa, txg, bp, zio->io_orig_data,
++		    zio->io_orig_size, zp, zio_ddt_child_write_ready,
++		    zio_ddt_child_write_done, dde, zio->io_priority,
++		    ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);
++
++		zio_push_transform(cio, zio->io_data, zio->io_size, 0, NULL);
++		dde->dde_lead_zio[p] = cio;
++	}
++
++	ddt_exit(ddt);
++
++	if (cio)
++		zio_nowait(cio);
++	if (dio)
++		zio_nowait(dio);
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++ddt_entry_t *freedde; /* for debugging */
++
++static int
++zio_ddt_free(zio_t *zio)
++{
++	spa_t *spa = zio->io_spa;
++	blkptr_t *bp = zio->io_bp;
++	ddt_t *ddt = ddt_select(spa, bp);
++	ddt_entry_t *dde;
++	ddt_phys_t *ddp;
++
++	ASSERT(BP_GET_DEDUP(bp));
++	ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
++
++	ddt_enter(ddt);
++	freedde = dde = ddt_lookup(ddt, bp, B_TRUE);
++	ddp = ddt_phys_select(dde, bp);
++	ddt_phys_decref(ddp);
++	ddt_exit(ddt);
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++/*
++ * ==========================================================================
++ * Allocate and free blocks
++ * ==========================================================================
++ */
++static int
++zio_dva_allocate(zio_t *zio)
++{
++	spa_t *spa = zio->io_spa;
++	metaslab_class_t *mc = spa_normal_class(spa);
++	blkptr_t *bp = zio->io_bp;
++	int error;
++	int flags = 0;
++
++	if (zio->io_gang_leader == NULL) {
++		ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
++		zio->io_gang_leader = zio;
++	}
++
++	ASSERT(BP_IS_HOLE(bp));
++	ASSERT3U(BP_GET_NDVAS(bp), ==, 0);
++	ASSERT3U(zio->io_prop.zp_copies, >, 0);
++	ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
++	ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
++
++	/*
++	 * The dump device does not support gang blocks so allocation on
++	 * behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid
++	 * the "fast" gang feature.
++	 */
++	flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0;
++	flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ?
++	    METASLAB_GANG_CHILD : 0;
++	flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0;
++	error = metaslab_alloc(spa, mc, zio->io_size, bp,
++	    zio->io_prop.zp_copies, zio->io_txg, NULL, flags);
++
++	if (error) {
++		spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, "
++		    "size %llu, error %d", spa_name(spa), zio, zio->io_size,
++		    error);
++		if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE)
++			return (zio_write_gang_block(zio));
++		zio->io_error = error;
++	}
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++static int
++zio_dva_free(zio_t *zio)
++{
++	metaslab_free(zio->io_spa, zio->io_bp, zio->io_txg, B_FALSE);
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++static int
++zio_dva_claim(zio_t *zio)
++{
++	int error;
++
++	error = metaslab_claim(zio->io_spa, zio->io_bp, zio->io_txg);
++	if (error)
++		zio->io_error = error;
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++/*
++ * Undo an allocation.  This is used by zio_done() when an I/O fails
++ * and we want to give back the block we just allocated.
++ * This handles both normal blocks and gang blocks.
++ */
++static void
++zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp)
++{
++	int g;
++
++	ASSERT(bp->blk_birth == zio->io_txg || BP_IS_HOLE(bp));
++	ASSERT(zio->io_bp_override == NULL);
++
++	if (!BP_IS_HOLE(bp))
++		metaslab_free(zio->io_spa, bp, bp->blk_birth, B_TRUE);
++
++	if (gn != NULL) {
++		for (g = 0; g < SPA_GBH_NBLKPTRS; g++) {
++			zio_dva_unallocate(zio, gn->gn_child[g],
++			    &gn->gn_gbh->zg_blkptr[g]);
++		}
++	}
++}
++
++/*
++ * Try to allocate an intent log block.  Return 0 on success, errno on failure.
++ */
++int
++zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, uint64_t size,
++    boolean_t use_slog)
++{
++	int error = 1;
++
++	ASSERT(txg > spa_syncing_txg(spa));
++
++	/*
++	 * ZIL blocks are always contiguous (i.e. not gang blocks) so we
++	 * set the METASLAB_GANG_AVOID flag so that they don't "fast gang"
++	 * when allocating them.
++	 */
++	if (use_slog) {
++		error = metaslab_alloc(spa, spa_log_class(spa), size,
++		    new_bp, 1, txg, NULL,
++		    METASLAB_FASTWRITE | METASLAB_GANG_AVOID);
++	}
++
++	if (error) {
++		error = metaslab_alloc(spa, spa_normal_class(spa), size,
++		    new_bp, 1, txg, NULL,
++		    METASLAB_FASTWRITE | METASLAB_GANG_AVOID);
++	}
++
++	if (error == 0) {
++		BP_SET_LSIZE(new_bp, size);
++		BP_SET_PSIZE(new_bp, size);
++		BP_SET_COMPRESS(new_bp, ZIO_COMPRESS_OFF);
++		BP_SET_CHECKSUM(new_bp,
++		    spa_version(spa) >= SPA_VERSION_SLIM_ZIL
++		    ? ZIO_CHECKSUM_ZILOG2 : ZIO_CHECKSUM_ZILOG);
++		BP_SET_TYPE(new_bp, DMU_OT_INTENT_LOG);
++		BP_SET_LEVEL(new_bp, 0);
++		BP_SET_DEDUP(new_bp, 0);
++		BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER);
++	}
++
++	return (error);
++}
++
++/*
++ * Free an intent log block.
++ */
++void
++zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp)
++{
++	ASSERT(BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG);
++	ASSERT(!BP_IS_GANG(bp));
++
++	zio_free(spa, txg, bp);
++}
++
++/*
++ * ==========================================================================
++ * Read and write to physical devices
++ * ==========================================================================
++ */
++static int
++zio_vdev_io_start(zio_t *zio)
++{
++	vdev_t *vd = zio->io_vd;
++	uint64_t align;
++	spa_t *spa = zio->io_spa;
++
++	ASSERT(zio->io_error == 0);
++	ASSERT(zio->io_child_error[ZIO_CHILD_VDEV] == 0);
++
++	if (vd == NULL) {
++		if (!(zio->io_flags & ZIO_FLAG_CONFIG_WRITER))
++			spa_config_enter(spa, SCL_ZIO, zio, RW_READER);
++
++		/*
++		 * The mirror_ops handle multiple DVAs in a single BP.
++		 */
++		return (vdev_mirror_ops.vdev_op_io_start(zio));
++	}
++
++	/*
++	 * We keep track of time-sensitive I/Os so that the scan thread
++	 * can quickly react to certain workloads.  In particular, we care
++	 * about non-scrubbing, top-level reads and writes with the following
++	 * characteristics:
++	 * 	- synchronous writes of user data to non-slog devices
++	 *	- any reads of user data
++	 * When these conditions are met, adjust the timestamp of spa_last_io
++	 * which allows the scan thread to adjust its workload accordingly.
++	 */
++	if (!(zio->io_flags & ZIO_FLAG_SCAN_THREAD) && zio->io_bp != NULL &&
++	    vd == vd->vdev_top && !vd->vdev_islog &&
++	    zio->io_bookmark.zb_objset != DMU_META_OBJSET &&
++	    zio->io_txg != spa_syncing_txg(spa)) {
++		uint64_t old = spa->spa_last_io;
++		uint64_t new = ddi_get_lbolt64();
++		if (old != new)
++			(void) atomic_cas_64(&spa->spa_last_io, old, new);
++	}
++
++	align = 1ULL << vd->vdev_top->vdev_ashift;
++
++	if (P2PHASE(zio->io_size, align) != 0) {
++		uint64_t asize = P2ROUNDUP(zio->io_size, align);
++		char *abuf = zio_buf_alloc(asize);
++		ASSERT(vd == vd->vdev_top);
++		if (zio->io_type == ZIO_TYPE_WRITE) {
++			bcopy(zio->io_data, abuf, zio->io_size);
++			bzero(abuf + zio->io_size, asize - zio->io_size);
++		}
++		zio_push_transform(zio, abuf, asize, asize, zio_subblock);
++	}
++
++	ASSERT(P2PHASE(zio->io_offset, align) == 0);
++	ASSERT(P2PHASE(zio->io_size, align) == 0);
++	VERIFY(zio->io_type != ZIO_TYPE_WRITE || spa_writeable(spa));
++
++	/*
++	 * If this is a repair I/O, and there's no self-healing involved --
++	 * that is, we're just resilvering what we expect to resilver --
++	 * then don't do the I/O unless zio's txg is actually in vd's DTL.
++	 * This prevents spurious resilvering with nested replication.
++	 * For example, given a mirror of mirrors, (A+B)+(C+D), if only
++	 * A is out of date, we'll read from C+D, then use the data to
++	 * resilver A+B -- but we don't actually want to resilver B, just A.
++	 * The top-level mirror has no way to know this, so instead we just
++	 * discard unnecessary repairs as we work our way down the vdev tree.
++	 * The same logic applies to any form of nested replication:
++	 * ditto + mirror, RAID-Z + replacing, etc.  This covers them all.
++	 */
++	if ((zio->io_flags & ZIO_FLAG_IO_REPAIR) &&
++	    !(zio->io_flags & ZIO_FLAG_SELF_HEAL) &&
++	    zio->io_txg != 0 &&	/* not a delegated i/o */
++	    !vdev_dtl_contains(vd, DTL_PARTIAL, zio->io_txg, 1)) {
++		ASSERT(zio->io_type == ZIO_TYPE_WRITE);
++		zio_vdev_io_bypass(zio);
++		return (ZIO_PIPELINE_CONTINUE);
++	}
++
++	if (vd->vdev_ops->vdev_op_leaf &&
++	    (zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE)) {
++
++		if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
++			return (ZIO_PIPELINE_CONTINUE);
++
++		if ((zio = vdev_queue_io(zio)) == NULL)
++			return (ZIO_PIPELINE_STOP);
++
++		if (!vdev_accessible(vd, zio)) {
++			zio->io_error = ENXIO;
++			zio_interrupt(zio);
++			return (ZIO_PIPELINE_STOP);
++		}
++	}
++
++	return (vd->vdev_ops->vdev_op_io_start(zio));
++}
++
++static int
++zio_vdev_io_done(zio_t *zio)
++{
++	vdev_t *vd = zio->io_vd;
++	vdev_ops_t *ops = vd ? vd->vdev_ops : &vdev_mirror_ops;
++	boolean_t unexpected_error = B_FALSE;
++
++	if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE))
++		return (ZIO_PIPELINE_STOP);
++
++	ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
++
++	if (vd != NULL && vd->vdev_ops->vdev_op_leaf) {
++
++		vdev_queue_io_done(zio);
++
++		if (zio->io_type == ZIO_TYPE_WRITE)
++			vdev_cache_write(zio);
++
++		if (zio_injection_enabled && zio->io_error == 0)
++			zio->io_error = zio_handle_device_injection(vd,
++			    zio, EIO);
++
++		if (zio_injection_enabled && zio->io_error == 0)
++			zio->io_error = zio_handle_label_injection(zio, EIO);
++
++		if (zio->io_error) {
++			if (!vdev_accessible(vd, zio)) {
++				zio->io_error = ENXIO;
++			} else {
++				unexpected_error = B_TRUE;
++			}
++		}
++	}
++
++	ops->vdev_op_io_done(zio);
++
++	if (unexpected_error)
++		VERIFY(vdev_probe(vd, zio) == NULL);
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++/*
++ * For non-raidz ZIOs, we can just copy aside the bad data read from the
++ * disk, and use that to finish the checksum ereport later.
++ */
++static void
++zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr,
++    const void *good_buf)
++{
++	/* no processing needed */
++	zfs_ereport_finish_checksum(zcr, good_buf, zcr->zcr_cbdata, B_FALSE);
++}
++
++/*ARGSUSED*/
++void
++zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored)
++{
++	void *buf = zio_buf_alloc(zio->io_size);
++
++	bcopy(zio->io_data, buf, zio->io_size);
++
++	zcr->zcr_cbinfo = zio->io_size;
++	zcr->zcr_cbdata = buf;
++	zcr->zcr_finish = zio_vsd_default_cksum_finish;
++	zcr->zcr_free = zio_buf_free;
++}
++
++static int
++zio_vdev_io_assess(zio_t *zio)
++{
++	vdev_t *vd = zio->io_vd;
++
++	if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE))
++		return (ZIO_PIPELINE_STOP);
++
++	if (vd == NULL && !(zio->io_flags & ZIO_FLAG_CONFIG_WRITER))
++		spa_config_exit(zio->io_spa, SCL_ZIO, zio);
++
++	if (zio->io_vsd != NULL) {
++		zio->io_vsd_ops->vsd_free(zio);
++		zio->io_vsd = NULL;
++	}
++
++	if (zio_injection_enabled && zio->io_error == 0)
++		zio->io_error = zio_handle_fault_injection(zio, EIO);
++
++	/*
++	 * If the I/O failed, determine whether we should attempt to retry it.
++	 *
++	 * On retry, we cut in line in the issue queue, since we don't want
++	 * compression/checksumming/etc. work to prevent our (cheap) IO reissue.
++	 */
++	if (zio->io_error && vd == NULL &&
++	    !(zio->io_flags & (ZIO_FLAG_DONT_RETRY | ZIO_FLAG_IO_RETRY))) {
++		ASSERT(!(zio->io_flags & ZIO_FLAG_DONT_QUEUE));	/* not a leaf */
++		ASSERT(!(zio->io_flags & ZIO_FLAG_IO_BYPASS));	/* not a leaf */
++		zio->io_error = 0;
++		zio->io_flags |= ZIO_FLAG_IO_RETRY |
++		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE;
++		zio->io_stage = ZIO_STAGE_VDEV_IO_START >> 1;
++		zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE,
++		    zio_requeue_io_start_cut_in_line);
++		return (ZIO_PIPELINE_STOP);
++	}
++
++	/*
++	 * If we got an error on a leaf device, convert it to ENXIO
++	 * if the device is not accessible at all.
++	 */
++	if (zio->io_error && vd != NULL && vd->vdev_ops->vdev_op_leaf &&
++	    !vdev_accessible(vd, zio))
++		zio->io_error = ENXIO;
++
++	/*
++	 * If we can't write to an interior vdev (mirror or RAID-Z),
++	 * set vdev_cant_write so that we stop trying to allocate from it.
++	 */
++	if (zio->io_error == ENXIO && zio->io_type == ZIO_TYPE_WRITE &&
++	    vd != NULL && !vd->vdev_ops->vdev_op_leaf)
++		vd->vdev_cant_write = B_TRUE;
++
++	if (zio->io_error)
++		zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++void
++zio_vdev_io_reissue(zio_t *zio)
++{
++	ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_START);
++	ASSERT(zio->io_error == 0);
++
++	zio->io_stage >>= 1;
++}
++
++void
++zio_vdev_io_redone(zio_t *zio)
++{
++	ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_DONE);
++
++	zio->io_stage >>= 1;
++}
++
++void
++zio_vdev_io_bypass(zio_t *zio)
++{
++	ASSERT(zio->io_stage == ZIO_STAGE_VDEV_IO_START);
++	ASSERT(zio->io_error == 0);
++
++	zio->io_flags |= ZIO_FLAG_IO_BYPASS;
++	zio->io_stage = ZIO_STAGE_VDEV_IO_ASSESS >> 1;
++}
++
++/*
++ * ==========================================================================
++ * Generate and verify checksums
++ * ==========================================================================
++ */
++static int
++zio_checksum_generate(zio_t *zio)
++{
++	blkptr_t *bp = zio->io_bp;
++	enum zio_checksum checksum;
++
++	if (bp == NULL) {
++		/*
++		 * This is zio_write_phys().
++		 * We're either generating a label checksum, or none at all.
++		 */
++		checksum = zio->io_prop.zp_checksum;
++
++		if (checksum == ZIO_CHECKSUM_OFF)
++			return (ZIO_PIPELINE_CONTINUE);
++
++		ASSERT(checksum == ZIO_CHECKSUM_LABEL);
++	} else {
++		if (BP_IS_GANG(bp) && zio->io_child_type == ZIO_CHILD_GANG) {
++			ASSERT(!IO_IS_ALLOCATING(zio));
++			checksum = ZIO_CHECKSUM_GANG_HEADER;
++		} else {
++			checksum = BP_GET_CHECKSUM(bp);
++		}
++	}
++
++	zio_checksum_compute(zio, checksum, zio->io_data, zio->io_size);
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++static int
++zio_checksum_verify(zio_t *zio)
++{
++	zio_bad_cksum_t info;
++	blkptr_t *bp = zio->io_bp;
++	int error;
++
++	ASSERT(zio->io_vd != NULL);
++
++	if (bp == NULL) {
++		/*
++		 * This is zio_read_phys().
++		 * We're either verifying a label checksum, or nothing at all.
++		 */
++		if (zio->io_prop.zp_checksum == ZIO_CHECKSUM_OFF)
++			return (ZIO_PIPELINE_CONTINUE);
++
++		ASSERT(zio->io_prop.zp_checksum == ZIO_CHECKSUM_LABEL);
++	}
++
++	if ((error = zio_checksum_error(zio, &info)) != 0) {
++		zio->io_error = error;
++		if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
++			zfs_ereport_start_checksum(zio->io_spa,
++			    zio->io_vd, zio, zio->io_offset,
++			    zio->io_size, NULL, &info);
++		}
++	}
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++/*
++ * Called by RAID-Z to ensure we don't compute the checksum twice.
++ */
++void
++zio_checksum_verified(zio_t *zio)
++{
++	zio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
++}
++
++/*
++ * ==========================================================================
++ * Error rank.  Error are ranked in the order 0, ENXIO, ECKSUM, EIO, other.
++ * An error of 0 indictes success.  ENXIO indicates whole-device failure,
++ * which may be transient (e.g. unplugged) or permament.  ECKSUM and EIO
++ * indicate errors that are specific to one I/O, and most likely permanent.
++ * Any other error is presumed to be worse because we weren't expecting it.
++ * ==========================================================================
++ */
++int
++zio_worst_error(int e1, int e2)
++{
++	static int zio_error_rank[] = { 0, ENXIO, ECKSUM, EIO };
++	int r1, r2;
++
++	for (r1 = 0; r1 < sizeof (zio_error_rank) / sizeof (int); r1++)
++		if (e1 == zio_error_rank[r1])
++			break;
++
++	for (r2 = 0; r2 < sizeof (zio_error_rank) / sizeof (int); r2++)
++		if (e2 == zio_error_rank[r2])
++			break;
++
++	return (r1 > r2 ? e1 : e2);
++}
++
++/*
++ * ==========================================================================
++ * I/O completion
++ * ==========================================================================
++ */
++static int
++zio_ready(zio_t *zio)
++{
++	blkptr_t *bp = zio->io_bp;
++	zio_t *pio, *pio_next;
++
++	if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
++	    zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_READY))
++		return (ZIO_PIPELINE_STOP);
++
++	if (zio->io_ready) {
++		ASSERT(IO_IS_ALLOCATING(zio));
++		ASSERT(bp->blk_birth == zio->io_txg || BP_IS_HOLE(bp));
++		ASSERT(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY] == 0);
++
++		zio->io_ready(zio);
++	}
++
++	if (bp != NULL && bp != &zio->io_bp_copy)
++		zio->io_bp_copy = *bp;
++
++	if (zio->io_error)
++		zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
++
++	mutex_enter(&zio->io_lock);
++	zio->io_state[ZIO_WAIT_READY] = 1;
++	pio = zio_walk_parents(zio);
++	mutex_exit(&zio->io_lock);
++
++	/*
++	 * As we notify zio's parents, new parents could be added.
++	 * New parents go to the head of zio's io_parent_list, however,
++	 * so we will (correctly) not notify them.  The remainder of zio's
++	 * io_parent_list, from 'pio_next' onward, cannot change because
++	 * all parents must wait for us to be done before they can be done.
++	 */
++	for (; pio != NULL; pio = pio_next) {
++		pio_next = zio_walk_parents(zio);
++		zio_notify_parent(pio, zio, ZIO_WAIT_READY);
++	}
++
++	if (zio->io_flags & ZIO_FLAG_NODATA) {
++		if (BP_IS_GANG(bp)) {
++			zio->io_flags &= ~ZIO_FLAG_NODATA;
++		} else {
++			ASSERT((uintptr_t)zio->io_data < SPA_MAXBLOCKSIZE);
++			zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
++		}
++	}
++
++	if (zio_injection_enabled &&
++	    zio->io_spa->spa_syncing_txg == zio->io_txg)
++		zio_handle_ignored_writes(zio);
++
++	return (ZIO_PIPELINE_CONTINUE);
++}
++
++static int
++zio_done(zio_t *zio)
++{
++	zio_t *pio, *pio_next;
++	int c, w;
++
++	/*
++	 * If our children haven't all completed,
++	 * wait for them and then repeat this pipeline stage.
++	 */
++	if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE) ||
++	    zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE) ||
++	    zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_DONE) ||
++	    zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_DONE))
++		return (ZIO_PIPELINE_STOP);
++
++	for (c = 0; c < ZIO_CHILD_TYPES; c++)
++		for (w = 0; w < ZIO_WAIT_TYPES; w++)
++			ASSERT(zio->io_children[c][w] == 0);
++
++	if (zio->io_bp != NULL) {
++		ASSERT(zio->io_bp->blk_pad[0] == 0);
++		ASSERT(zio->io_bp->blk_pad[1] == 0);
++		ASSERT(bcmp(zio->io_bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 ||
++		    (zio->io_bp == zio_unique_parent(zio)->io_bp));
++		if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(zio->io_bp) &&
++		    zio->io_bp_override == NULL &&
++		    !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) {
++			ASSERT(!BP_SHOULD_BYTESWAP(zio->io_bp));
++			ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(zio->io_bp));
++			ASSERT(BP_COUNT_GANG(zio->io_bp) == 0 ||
++			    (BP_COUNT_GANG(zio->io_bp) == BP_GET_NDVAS(zio->io_bp)));
++		}
++	}
++
++	/*
++	 * If there were child vdev/gang/ddt errors, they apply to us now.
++	 */
++	zio_inherit_child_errors(zio, ZIO_CHILD_VDEV);
++	zio_inherit_child_errors(zio, ZIO_CHILD_GANG);
++	zio_inherit_child_errors(zio, ZIO_CHILD_DDT);
++
++	/*
++	 * If the I/O on the transformed data was successful, generate any
++	 * checksum reports now while we still have the transformed data.
++	 */
++	if (zio->io_error == 0) {
++		while (zio->io_cksum_report != NULL) {
++			zio_cksum_report_t *zcr = zio->io_cksum_report;
++			uint64_t align = zcr->zcr_align;
++			uint64_t asize = P2ROUNDUP(zio->io_size, align);
++			char *abuf = zio->io_data;
++
++			if (asize != zio->io_size) {
++				abuf = zio_buf_alloc(asize);
++				bcopy(zio->io_data, abuf, zio->io_size);
++				bzero(abuf + zio->io_size, asize - zio->io_size);
++			}
++
++			zio->io_cksum_report = zcr->zcr_next;
++			zcr->zcr_next = NULL;
++			zcr->zcr_finish(zcr, abuf);
++			zfs_ereport_free_checksum(zcr);
++
++			if (asize != zio->io_size)
++				zio_buf_free(abuf, asize);
++		}
++	}
++
++	zio_pop_transforms(zio);	/* note: may set zio->io_error */
++
++	vdev_stat_update(zio, zio->io_size);
++
++	/*
++	 * When an I/O completes but was slow post an ereport.
++	 */
++	if (zio->io_delay >= zio_delay_max)
++		zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
++		    zio->io_vd, zio, 0, 0);
++
++	if (zio->io_error) {
++		/*
++		 * If this I/O is attached to a particular vdev,
++		 * generate an error message describing the I/O failure
++		 * at the block level.  We ignore these errors if the
++		 * device is currently unavailable.
++		 */
++		if (zio->io_error != ECKSUM && zio->io_vd != NULL &&
++			!vdev_is_dead(zio->io_vd))
++			zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa,
++						zio->io_vd, zio, 0, 0);
++
++		if ((zio->io_error == EIO || !(zio->io_flags &
++		    (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) &&
++		    zio == zio->io_logical) {
++			/*
++			 * For logical I/O requests, tell the SPA to log the
++			 * error and generate a logical data ereport.
++			 */
++			spa_log_error(zio->io_spa, zio);
++			zfs_ereport_post(FM_EREPORT_ZFS_DATA, zio->io_spa, NULL, zio,
++			    0, 0);
++		}
++	}
++
++	if (zio->io_error && zio == zio->io_logical) {
++		/*
++		 * Determine whether zio should be reexecuted.  This will
++		 * propagate all the way to the root via zio_notify_parent().
++		 */
++		ASSERT(zio->io_vd == NULL && zio->io_bp != NULL);
++		ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
++
++		if (IO_IS_ALLOCATING(zio) &&
++		    !(zio->io_flags & ZIO_FLAG_CANFAIL)) {
++			if (zio->io_error != ENOSPC)
++				zio->io_reexecute |= ZIO_REEXECUTE_NOW;
++			else
++				zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND;
++		}
++
++		if ((zio->io_type == ZIO_TYPE_READ ||
++		    zio->io_type == ZIO_TYPE_FREE) &&
++		    !(zio->io_flags & ZIO_FLAG_SCAN_THREAD) &&
++		    zio->io_error == ENXIO &&
++		    spa_load_state(zio->io_spa) == SPA_LOAD_NONE &&
++		    spa_get_failmode(zio->io_spa) != ZIO_FAILURE_MODE_CONTINUE)
++			zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND;
++
++		if (!(zio->io_flags & ZIO_FLAG_CANFAIL) && !zio->io_reexecute)
++			zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND;
++
++		/*
++		 * Here is a possibly good place to attempt to do
++		 * either combinatorial reconstruction or error correction
++		 * based on checksums.  It also might be a good place
++		 * to send out preliminary ereports before we suspend
++		 * processing.
++		 */
++	}
++
++	/*
++	 * If there were logical child errors, they apply to us now.
++	 * We defer this until now to avoid conflating logical child
++	 * errors with errors that happened to the zio itself when
++	 * updating vdev stats and reporting FMA events above.
++	 */
++	zio_inherit_child_errors(zio, ZIO_CHILD_LOGICAL);
++
++	if ((zio->io_error || zio->io_reexecute) &&
++	    IO_IS_ALLOCATING(zio) && zio->io_gang_leader == zio &&
++	    !(zio->io_flags & ZIO_FLAG_IO_REWRITE))
++		zio_dva_unallocate(zio, zio->io_gang_tree, zio->io_bp);
++
++	zio_gang_tree_free(&zio->io_gang_tree);
++
++	/*
++	 * Godfather I/Os should never suspend.
++	 */
++	if ((zio->io_flags & ZIO_FLAG_GODFATHER) &&
++	    (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND))
++		zio->io_reexecute = 0;
++
++	if (zio->io_reexecute) {
++		/*
++		 * This is a logical I/O that wants to reexecute.
++		 *
++		 * Reexecute is top-down.  When an i/o fails, if it's not
++		 * the root, it simply notifies its parent and sticks around.
++		 * The parent, seeing that it still has children in zio_done(),
++		 * does the same.  This percolates all the way up to the root.
++		 * The root i/o will reexecute or suspend the entire tree.
++		 *
++		 * This approach ensures that zio_reexecute() honors
++		 * all the original i/o dependency relationships, e.g.
++		 * parents not executing until children are ready.
++		 */
++		ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
++
++		zio->io_gang_leader = NULL;
++
++		mutex_enter(&zio->io_lock);
++		zio->io_state[ZIO_WAIT_DONE] = 1;
++		mutex_exit(&zio->io_lock);
++
++		/*
++		 * "The Godfather" I/O monitors its children but is
++		 * not a true parent to them. It will track them through
++		 * the pipeline but severs its ties whenever they get into
++		 * trouble (e.g. suspended). This allows "The Godfather"
++		 * I/O to return status without blocking.
++		 */
++		for (pio = zio_walk_parents(zio); pio != NULL; pio = pio_next) {
++			zio_link_t *zl = zio->io_walk_link;
++			pio_next = zio_walk_parents(zio);
++
++			if ((pio->io_flags & ZIO_FLAG_GODFATHER) &&
++			    (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) {
++				zio_remove_child(pio, zio, zl);
++				zio_notify_parent(pio, zio, ZIO_WAIT_DONE);
++			}
++		}
++
++		if ((pio = zio_unique_parent(zio)) != NULL) {
++			/*
++			 * We're not a root i/o, so there's nothing to do
++			 * but notify our parent.  Don't propagate errors
++			 * upward since we haven't permanently failed yet.
++			 */
++			ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER));
++			zio->io_flags |= ZIO_FLAG_DONT_PROPAGATE;
++			zio_notify_parent(pio, zio, ZIO_WAIT_DONE);
++		} else if (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND) {
++			/*
++			 * We'd fail again if we reexecuted now, so suspend
++			 * until conditions improve (e.g. device comes online).
++			 */
++			zio_suspend(zio->io_spa, zio);
++		} else {
++			/*
++			 * Reexecution is potentially a huge amount of work.
++			 * Hand it off to the otherwise-unused claim taskq.
++			 */
++			ASSERT(taskq_empty_ent(&zio->io_tqent));
++			(void) taskq_dispatch_ent(
++			    zio->io_spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
++			    (task_func_t *)zio_reexecute, zio, 0,
++			    &zio->io_tqent);
++		}
++		return (ZIO_PIPELINE_STOP);
++	}
++
++	ASSERT(zio->io_child_count == 0);
++	ASSERT(zio->io_reexecute == 0);
++	ASSERT(zio->io_error == 0 || (zio->io_flags & ZIO_FLAG_CANFAIL));
++
++	/*
++	 * Report any checksum errors, since the I/O is complete.
++	 */
++	while (zio->io_cksum_report != NULL) {
++		zio_cksum_report_t *zcr = zio->io_cksum_report;
++		zio->io_cksum_report = zcr->zcr_next;
++		zcr->zcr_next = NULL;
++		zcr->zcr_finish(zcr, NULL);
++		zfs_ereport_free_checksum(zcr);
++	}
++
++	if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp &&
++	    !BP_IS_HOLE(zio->io_bp)) {
++		metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp);
++	}
++
++	/*
++	 * It is the responsibility of the done callback to ensure that this
++	 * particular zio is no longer discoverable for adoption, and as
++	 * such, cannot acquire any new parents.
++	 */
++	if (zio->io_done)
++		zio->io_done(zio);
++
++	mutex_enter(&zio->io_lock);
++	zio->io_state[ZIO_WAIT_DONE] = 1;
++	mutex_exit(&zio->io_lock);
++
++	for (pio = zio_walk_parents(zio); pio != NULL; pio = pio_next) {
++		zio_link_t *zl = zio->io_walk_link;
++		pio_next = zio_walk_parents(zio);
++		zio_remove_child(pio, zio, zl);
++		zio_notify_parent(pio, zio, ZIO_WAIT_DONE);
++	}
++
++	if (zio->io_waiter != NULL) {
++		mutex_enter(&zio->io_lock);
++		zio->io_executor = NULL;
++		cv_broadcast(&zio->io_cv);
++		mutex_exit(&zio->io_lock);
++	} else {
++		zio_destroy(zio);
++	}
++
++	return (ZIO_PIPELINE_STOP);
++}
++
++/*
++ * ==========================================================================
++ * I/O pipeline definition
++ * ==========================================================================
++ */
++static zio_pipe_stage_t *zio_pipeline[] = {
++	NULL,
++	zio_read_bp_init,
++	zio_free_bp_init,
++	zio_issue_async,
++	zio_write_bp_init,
++	zio_checksum_generate,
++	zio_ddt_read_start,
++	zio_ddt_read_done,
++	zio_ddt_write,
++	zio_ddt_free,
++	zio_gang_assemble,
++	zio_gang_issue,
++	zio_dva_allocate,
++	zio_dva_free,
++	zio_dva_claim,
++	zio_ready,
++	zio_vdev_io_start,
++	zio_vdev_io_done,
++	zio_vdev_io_assess,
++	zio_checksum_verify,
++	zio_done
++};
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++/* Fault injection */
++EXPORT_SYMBOL(zio_injection_enabled);
++EXPORT_SYMBOL(zio_inject_fault);
++EXPORT_SYMBOL(zio_inject_list_next);
++EXPORT_SYMBOL(zio_clear_fault);
++EXPORT_SYMBOL(zio_handle_fault_injection);
++EXPORT_SYMBOL(zio_handle_device_injection);
++EXPORT_SYMBOL(zio_handle_label_injection);
++EXPORT_SYMBOL(zio_priority_table);
++EXPORT_SYMBOL(zio_type_name);
++
++module_param(zio_bulk_flags, int, 0644);
++MODULE_PARM_DESC(zio_bulk_flags, "Additional flags to pass to bulk buffers");
++
++module_param(zio_delay_max, int, 0644);
++MODULE_PARM_DESC(zio_delay_max, "Max zio millisec delay before posting event");
++
++module_param(zio_requeue_io_start_cut_in_line, int, 0644);
++MODULE_PARM_DESC(zio_requeue_io_start_cut_in_line, "Prioritize requeued I/O");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zio_checksum.c linux-3.2.33-go/fs/zfs/zfs/zio_checksum.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zio_checksum.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zio_checksum.c	2012-11-16 23:25:34.352039300 +0100
+@@ -0,0 +1,274 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/zio.h>
++#include <sys/zio_checksum.h>
++#include <sys/zil.h>
++#include <zfs_fletcher.h>
++
++/*
++ * Checksum vectors.
++ *
++ * In the SPA, everything is checksummed.  We support checksum vectors
++ * for three distinct reasons:
++ *
++ *   1. Different kinds of data need different levels of protection.
++ *	For SPA metadata, we always want a very strong checksum.
++ *	For user data, we let users make the trade-off between speed
++ *	and checksum strength.
++ *
++ *   2. Cryptographic hash and MAC algorithms are an area of active research.
++ *	It is likely that in future hash functions will be at least as strong
++ *	as current best-of-breed, and may be substantially faster as well.
++ *	We want the ability to take advantage of these new hashes as soon as
++ *	they become available.
++ *
++ *   3. If someone develops hardware that can compute a strong hash quickly,
++ *	we want the ability to take advantage of that hardware.
++ *
++ * Of course, we don't want a checksum upgrade to invalidate existing
++ * data, so we store the checksum *function* in eight bits of the bp.
++ * This gives us room for up to 256 different checksum functions.
++ *
++ * When writing a block, we always checksum it with the latest-and-greatest
++ * checksum function of the appropriate strength.  When reading a block,
++ * we compare the expected checksum against the actual checksum, which we
++ * compute via the checksum function specified by BP_GET_CHECKSUM(bp).
++ */
++
++/*ARGSUSED*/
++static void
++zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
++{
++	ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
++}
++
++zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
++	{{NULL,			NULL},			0, 0, 0, "inherit"},
++	{{NULL,			NULL},			0, 0, 0, "on"},
++	{{zio_checksum_off,	zio_checksum_off},	0, 0, 0, "off"},
++	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 1, 0, "label"},
++	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 1, 0, "gang_header"},
++	{{fletcher_2_native,	fletcher_2_byteswap},	0, 1, 0, "zilog"},
++	{{fletcher_2_native,	fletcher_2_byteswap},	0, 0, 0, "fletcher2"},
++	{{fletcher_4_native,	fletcher_4_byteswap},	1, 0, 0, "fletcher4"},
++	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 0, 1, "sha256"},
++	{{fletcher_4_native,	fletcher_4_byteswap},	0, 1, 0, "zilog2"},
++};
++
++enum zio_checksum
++zio_checksum_select(enum zio_checksum child, enum zio_checksum parent)
++{
++	ASSERT(child < ZIO_CHECKSUM_FUNCTIONS);
++	ASSERT(parent < ZIO_CHECKSUM_FUNCTIONS);
++	ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON);
++
++	if (child == ZIO_CHECKSUM_INHERIT)
++		return (parent);
++
++	if (child == ZIO_CHECKSUM_ON)
++		return (ZIO_CHECKSUM_ON_VALUE);
++
++	return (child);
++}
++
++enum zio_checksum
++zio_checksum_dedup_select(spa_t *spa, enum zio_checksum child,
++    enum zio_checksum parent)
++{
++	ASSERT((child & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS);
++	ASSERT((parent & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS);
++	ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON);
++
++	if (child == ZIO_CHECKSUM_INHERIT)
++		return (parent);
++
++	if (child == ZIO_CHECKSUM_ON)
++		return (spa_dedup_checksum(spa));
++
++	if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY))
++		return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY);
++
++	ASSERT(zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_dedup ||
++	    (child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF);
++
++	return (child);
++}
++
++/*
++ * Set the external verifier for a gang block based on <vdev, offset, txg>,
++ * a tuple which is guaranteed to be unique for the life of the pool.
++ */
++static void
++zio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp)
++{
++	dva_t *dva = BP_IDENTITY(bp);
++	uint64_t txg = BP_PHYSICAL_BIRTH(bp);
++
++	ASSERT(BP_IS_GANG(bp));
++
++	ZIO_SET_CHECKSUM(zcp, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), txg, 0);
++}
++
++/*
++ * Set the external verifier for a label block based on its offset.
++ * The vdev is implicit, and the txg is unknowable at pool open time --
++ * hence the logic in vdev_uberblock_load() to find the most recent copy.
++ */
++static void
++zio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset)
++{
++	ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0);
++}
++
++/*
++ * Generate the checksum.
++ */
++void
++zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
++	void *data, uint64_t size)
++{
++	blkptr_t *bp = zio->io_bp;
++	uint64_t offset = zio->io_offset;
++	zio_checksum_info_t *ci = &zio_checksum_table[checksum];
++	zio_cksum_t cksum;
++
++	ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS);
++	ASSERT(ci->ci_func[0] != NULL);
++
++	if (ci->ci_eck) {
++		zio_eck_t *eck;
++
++		if (checksum == ZIO_CHECKSUM_ZILOG2) {
++			zil_chain_t *zilc = data;
++
++			size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ,
++			    uint64_t);
++			eck = &zilc->zc_eck;
++		} else {
++			eck = (zio_eck_t *)((char *)data + size) - 1;
++		}
++		if (checksum == ZIO_CHECKSUM_GANG_HEADER)
++			zio_checksum_gang_verifier(&eck->zec_cksum, bp);
++		else if (checksum == ZIO_CHECKSUM_LABEL)
++			zio_checksum_label_verifier(&eck->zec_cksum, offset);
++		else
++			bp->blk_cksum = eck->zec_cksum;
++		eck->zec_magic = ZEC_MAGIC;
++		ci->ci_func[0](data, size, &cksum);
++		eck->zec_cksum = cksum;
++	} else {
++		ci->ci_func[0](data, size, &bp->blk_cksum);
++	}
++}
++
++int
++zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
++{
++	blkptr_t *bp = zio->io_bp;
++	uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum :
++	    (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp)));
++	int byteswap;
++	int error;
++	uint64_t size = (bp == NULL ? zio->io_size :
++	    (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp)));
++	uint64_t offset = zio->io_offset;
++	void *data = zio->io_data;
++	zio_checksum_info_t *ci = &zio_checksum_table[checksum];
++	zio_cksum_t actual_cksum, expected_cksum, verifier;
++
++	if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
++		return (EINVAL);
++
++	if (ci->ci_eck) {
++		zio_eck_t *eck;
++
++		if (checksum == ZIO_CHECKSUM_ZILOG2) {
++			zil_chain_t *zilc = data;
++			uint64_t nused;
++
++			eck = &zilc->zc_eck;
++			if (eck->zec_magic == ZEC_MAGIC)
++				nused = zilc->zc_nused;
++			else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC))
++				nused = BSWAP_64(zilc->zc_nused);
++			else
++				return (ECKSUM);
++
++			if (nused > size)
++				return (ECKSUM);
++
++			size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t);
++		} else {
++			eck = (zio_eck_t *)((char *)data + size) - 1;
++		}
++
++		if (checksum == ZIO_CHECKSUM_GANG_HEADER)
++			zio_checksum_gang_verifier(&verifier, bp);
++		else if (checksum == ZIO_CHECKSUM_LABEL)
++			zio_checksum_label_verifier(&verifier, offset);
++		else
++			verifier = bp->blk_cksum;
++
++		byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC));
++
++		if (byteswap)
++			byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
++
++		expected_cksum = eck->zec_cksum;
++		eck->zec_cksum = verifier;
++		ci->ci_func[byteswap](data, size, &actual_cksum);
++		eck->zec_cksum = expected_cksum;
++
++		if (byteswap)
++			byteswap_uint64_array(&expected_cksum,
++			    sizeof (zio_cksum_t));
++	} else {
++		ASSERT(!BP_IS_GANG(bp));
++		byteswap = BP_SHOULD_BYTESWAP(bp);
++		expected_cksum = bp->blk_cksum;
++		ci->ci_func[byteswap](data, size, &actual_cksum);
++	}
++
++	info->zbc_expected = expected_cksum;
++	info->zbc_actual = actual_cksum;
++	info->zbc_checksum_name = ci->ci_name;
++	info->zbc_byteswapped = byteswap;
++	info->zbc_injected = 0;
++	info->zbc_has_cksum = 1;
++
++	if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
++		return (ECKSUM);
++
++	if (zio_injection_enabled && !zio->io_error &&
++	    (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) {
++
++		info->zbc_injected = 1;
++		return (error);
++	}
++
++	return (0);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zio_compress.c linux-3.2.33-go/fs/zfs/zfs/zio_compress.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zio_compress.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zio_compress.c	2012-11-16 23:25:34.352039300 +0100
+@@ -0,0 +1,132 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#include <sys/zfs_context.h>
++#include <sys/compress.h>
++#include <sys/spa.h>
++#include <sys/zio.h>
++#include <sys/zio_compress.h>
++
++/*
++ * Compression vectors.
++ */
++
++zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
++	{NULL,			NULL,			0,	"inherit"},
++	{NULL,			NULL,			0,	"on"},
++	{NULL,			NULL,			0,	"uncompressed"},
++	{lzjb_compress,		lzjb_decompress,	0,	"lzjb"},
++	{NULL,			NULL,			0,	"empty"},
++	{gzip_compress,		gzip_decompress,	1,	"gzip-1"},
++	{gzip_compress,		gzip_decompress,	2,	"gzip-2"},
++	{gzip_compress,		gzip_decompress,	3,	"gzip-3"},
++	{gzip_compress,		gzip_decompress,	4,	"gzip-4"},
++	{gzip_compress,		gzip_decompress,	5,	"gzip-5"},
++	{gzip_compress,		gzip_decompress,	6,	"gzip-6"},
++	{gzip_compress,		gzip_decompress,	7,	"gzip-7"},
++	{gzip_compress,		gzip_decompress,	8,	"gzip-8"},
++	{gzip_compress,		gzip_decompress,	9,	"gzip-9"},
++	{zle_compress,		zle_decompress,		64,	"zle"},
++};
++
++enum zio_compress
++zio_compress_select(enum zio_compress child, enum zio_compress parent)
++{
++	ASSERT(child < ZIO_COMPRESS_FUNCTIONS);
++	ASSERT(parent < ZIO_COMPRESS_FUNCTIONS);
++	ASSERT(parent != ZIO_COMPRESS_INHERIT && parent != ZIO_COMPRESS_ON);
++
++	if (child == ZIO_COMPRESS_INHERIT)
++		return (parent);
++
++	if (child == ZIO_COMPRESS_ON)
++		return (ZIO_COMPRESS_ON_VALUE);
++
++	return (child);
++}
++
++size_t
++zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len)
++{
++	uint64_t *word, *word_end;
++	size_t c_len, d_len, r_len;
++	zio_compress_info_t *ci = &zio_compress_table[c];
++
++	ASSERT((uint_t)c < ZIO_COMPRESS_FUNCTIONS);
++	ASSERT((uint_t)c == ZIO_COMPRESS_EMPTY || ci->ci_compress != NULL);
++
++	/*
++	 * If the data is all zeroes, we don't even need to allocate
++	 * a block for it.  We indicate this by returning zero size.
++	 */
++	word_end = (uint64_t *)((char *)src + s_len);
++	for (word = src; word < word_end; word++)
++		if (*word != 0)
++			break;
++
++	if (word == word_end)
++		return (0);
++
++	if (c == ZIO_COMPRESS_EMPTY)
++		return (s_len);
++
++	/* Compress at least 12.5% */
++	d_len = P2ALIGN(s_len - (s_len >> 3), (size_t)SPA_MINBLOCKSIZE);
++	if (d_len == 0)
++		return (s_len);
++
++	c_len = ci->ci_compress(src, dst, s_len, d_len, ci->ci_level);
++
++	if (c_len > d_len)
++		return (s_len);
++
++	/*
++	 * Cool.  We compressed at least as much as we were hoping to.
++	 * For both security and repeatability, pad out the last sector.
++	 */
++	r_len = P2ROUNDUP(c_len, (size_t)SPA_MINBLOCKSIZE);
++	if (r_len > c_len) {
++		bzero((char *)dst + c_len, r_len - c_len);
++		c_len = r_len;
++	}
++
++	ASSERT3U(c_len, <=, d_len);
++	ASSERT(P2PHASE(c_len, (size_t)SPA_MINBLOCKSIZE) == 0);
++
++	return (c_len);
++}
++
++int
++zio_decompress_data(enum zio_compress c, void *src, void *dst,
++    size_t s_len, size_t d_len)
++{
++	zio_compress_info_t *ci = &zio_compress_table[c];
++
++	if ((uint_t)c >= ZIO_COMPRESS_FUNCTIONS || ci->ci_decompress == NULL)
++		return (EINVAL);
++
++	return (ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level));
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zio_inject.c linux-3.2.33-go/fs/zfs/zfs/zio_inject.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zio_inject.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zio_inject.c	2012-11-16 23:25:34.347039358 +0100
+@@ -0,0 +1,520 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/*
++ * ZFS fault injection
++ *
++ * To handle fault injection, we keep track of a series of zinject_record_t
++ * structures which describe which logical block(s) should be injected with a
++ * fault.  These are kept in a global list.  Each record corresponds to a given
++ * spa_t and maintains a special hold on the spa_t so that it cannot be deleted
++ * or exported while the injection record exists.
++ *
++ * Device level injection is done using the 'zi_guid' field.  If this is set, it
++ * means that the error is destined for a particular device, not a piece of
++ * data.
++ *
++ * This is a rather poor data structure and algorithm, but we don't expect more
++ * than a few faults at any one time, so it should be sufficient for our needs.
++ */
++
++#include <sys/arc.h>
++#include <sys/zio_impl.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/vdev_impl.h>
++#include <sys/dmu_objset.h>
++#include <sys/fs/zfs.h>
++
++uint32_t zio_injection_enabled = 0;
++
++typedef struct inject_handler {
++	int			zi_id;
++	spa_t			*zi_spa;
++	zinject_record_t	zi_record;
++	list_node_t		zi_link;
++} inject_handler_t;
++
++static list_t inject_handlers;
++static krwlock_t inject_lock;
++static int inject_next_id = 1;
++
++/*
++ * Returns true if the given record matches the I/O in progress.
++ */
++static boolean_t
++zio_match_handler(zbookmark_t *zb, uint64_t type,
++    zinject_record_t *record, int error)
++{
++	/*
++	 * Check for a match against the MOS, which is based on type
++	 */
++	if (zb->zb_objset == DMU_META_OBJSET &&
++	    record->zi_objset == DMU_META_OBJSET &&
++	    record->zi_object == DMU_META_DNODE_OBJECT) {
++		if (record->zi_type == DMU_OT_NONE ||
++		    type == record->zi_type)
++			return (record->zi_freq == 0 ||
++			    spa_get_random(100) < record->zi_freq);
++		else
++			return (B_FALSE);
++	}
++
++	/*
++	 * Check for an exact match.
++	 */
++	if (zb->zb_objset == record->zi_objset &&
++	    zb->zb_object == record->zi_object &&
++	    zb->zb_level == record->zi_level &&
++	    zb->zb_blkid >= record->zi_start &&
++	    zb->zb_blkid <= record->zi_end &&
++	    error == record->zi_error)
++		return (record->zi_freq == 0 ||
++		    spa_get_random(100) < record->zi_freq);
++
++	return (B_FALSE);
++}
++
++/*
++ * Panic the system when a config change happens in the function
++ * specified by tag.
++ */
++void
++zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type)
++{
++	inject_handler_t *handler;
++
++	rw_enter(&inject_lock, RW_READER);
++
++	for (handler = list_head(&inject_handlers); handler != NULL;
++	    handler = list_next(&inject_handlers, handler)) {
++
++		if (spa != handler->zi_spa)
++			continue;
++
++		if (handler->zi_record.zi_type == type &&
++		    strcmp(tag, handler->zi_record.zi_func) == 0)
++			panic("Panic requested in function %s\n", tag);
++	}
++
++	rw_exit(&inject_lock);
++}
++
++/*
++ * Determine if the I/O in question should return failure.  Returns the errno
++ * to be returned to the caller.
++ */
++int
++zio_handle_fault_injection(zio_t *zio, int error)
++{
++	int ret = 0;
++	inject_handler_t *handler;
++
++	/*
++	 * Ignore I/O not associated with any logical data.
++	 */
++	if (zio->io_logical == NULL)
++		return (0);
++
++	/*
++	 * Currently, we only support fault injection on reads.
++	 */
++	if (zio->io_type != ZIO_TYPE_READ)
++		return (0);
++
++	rw_enter(&inject_lock, RW_READER);
++
++	for (handler = list_head(&inject_handlers); handler != NULL;
++	    handler = list_next(&inject_handlers, handler)) {
++
++		/* Ignore errors not destined for this pool */
++		if (zio->io_spa != handler->zi_spa)
++			continue;
++
++		/* Ignore device errors and panic injection */
++		if (handler->zi_record.zi_guid != 0 ||
++		    handler->zi_record.zi_func[0] != '\0' ||
++		    handler->zi_record.zi_duration != 0)
++			continue;
++
++		/* If this handler matches, return EIO */
++		if (zio_match_handler(&zio->io_logical->io_bookmark,
++		    zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE,
++		    &handler->zi_record, error)) {
++			ret = error;
++			break;
++		}
++	}
++
++	rw_exit(&inject_lock);
++
++	return (ret);
++}
++
++/*
++ * Determine if the zio is part of a label update and has an injection
++ * handler associated with that portion of the label. Currently, we
++ * allow error injection in either the nvlist or the uberblock region of
++ * of the vdev label.
++ */
++int
++zio_handle_label_injection(zio_t *zio, int error)
++{
++	inject_handler_t *handler;
++	vdev_t *vd = zio->io_vd;
++	uint64_t offset = zio->io_offset;
++	int label;
++	int ret = 0;
++
++	if (offset >= VDEV_LABEL_START_SIZE &&
++	    offset < vd->vdev_psize - VDEV_LABEL_END_SIZE)
++		return (0);
++
++	rw_enter(&inject_lock, RW_READER);
++
++	for (handler = list_head(&inject_handlers); handler != NULL;
++	    handler = list_next(&inject_handlers, handler)) {
++		uint64_t start = handler->zi_record.zi_start;
++		uint64_t end = handler->zi_record.zi_end;
++
++		/* Ignore device only faults or panic injection */
++		if (handler->zi_record.zi_start == 0 ||
++		    handler->zi_record.zi_func[0] != '\0' ||
++		    handler->zi_record.zi_duration != 0)
++			continue;
++
++		/*
++		 * The injection region is the relative offsets within a
++		 * vdev label. We must determine the label which is being
++		 * updated and adjust our region accordingly.
++		 */
++		label = vdev_label_number(vd->vdev_psize, offset);
++		start = vdev_label_offset(vd->vdev_psize, label, start);
++		end = vdev_label_offset(vd->vdev_psize, label, end);
++
++		if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid &&
++		    (offset >= start && offset <= end)) {
++			ret = error;
++			break;
++		}
++	}
++	rw_exit(&inject_lock);
++	return (ret);
++}
++
++
++int
++zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
++{
++	inject_handler_t *handler;
++	int ret = 0;
++
++	/*
++	 * We skip over faults in the labels unless it's during
++	 * device open (i.e. zio == NULL).
++	 */
++	if (zio != NULL) {
++		uint64_t offset = zio->io_offset;
++
++		if (offset < VDEV_LABEL_START_SIZE ||
++		    offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE)
++			return (0);
++	}
++
++	rw_enter(&inject_lock, RW_READER);
++
++	for (handler = list_head(&inject_handlers); handler != NULL;
++	    handler = list_next(&inject_handlers, handler)) {
++
++		/*
++		 * Ignore label specific faults, panic injection
++		 * or fake writes
++		 */
++		if (handler->zi_record.zi_start != 0 ||
++		    handler->zi_record.zi_func[0] != '\0' ||
++		    handler->zi_record.zi_duration != 0)
++			continue;
++
++		if (vd->vdev_guid == handler->zi_record.zi_guid) {
++			if (handler->zi_record.zi_failfast &&
++			    (zio == NULL || (zio->io_flags &
++			    (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) {
++				continue;
++			}
++
++			/* Handle type specific I/O failures */
++			if (zio != NULL &&
++			    handler->zi_record.zi_iotype != ZIO_TYPES &&
++			    handler->zi_record.zi_iotype != zio->io_type)
++				continue;
++
++			if (handler->zi_record.zi_error == error) {
++				/*
++				 * For a failed open, pretend like the device
++				 * has gone away.
++				 */
++				if (error == ENXIO)
++					vd->vdev_stat.vs_aux =
++					    VDEV_AUX_OPEN_FAILED;
++
++				/*
++				 * Treat these errors as if they had been
++				 * retried so that all the appropriate stats
++				 * and FMA events are generated.
++				 */
++				if (!handler->zi_record.zi_failfast &&
++				    zio != NULL)
++					zio->io_flags |= ZIO_FLAG_IO_RETRY;
++
++				ret = error;
++				break;
++			}
++			if (handler->zi_record.zi_error == ENXIO) {
++				ret = EIO;
++				break;
++			}
++		}
++	}
++
++	rw_exit(&inject_lock);
++
++	return (ret);
++}
++
++/*
++ * Simulate hardware that ignores cache flushes.  For requested number
++ * of seconds nix the actual writing to disk.
++ */
++void
++zio_handle_ignored_writes(zio_t *zio)
++{
++	inject_handler_t *handler;
++
++	rw_enter(&inject_lock, RW_READER);
++
++	for (handler = list_head(&inject_handlers); handler != NULL;
++	    handler = list_next(&inject_handlers, handler)) {
++
++		/* Ignore errors not destined for this pool */
++		if (zio->io_spa != handler->zi_spa)
++			continue;
++
++		if (handler->zi_record.zi_duration == 0)
++			continue;
++
++		/*
++		 * Positive duration implies # of seconds, negative
++		 * a number of txgs
++		 */
++		if (handler->zi_record.zi_timer == 0) {
++			if (handler->zi_record.zi_duration > 0)
++				handler->zi_record.zi_timer = ddi_get_lbolt64();
++			else
++				handler->zi_record.zi_timer = zio->io_txg;
++		}
++
++		/* Have a "problem" writing 60% of the time */
++		if (spa_get_random(100) < 60)
++			zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
++		break;
++	}
++
++	rw_exit(&inject_lock);
++}
++
++void
++spa_handle_ignored_writes(spa_t *spa)
++{
++	inject_handler_t *handler;
++
++	if (zio_injection_enabled == 0)
++		return;
++
++	rw_enter(&inject_lock, RW_READER);
++
++	for (handler = list_head(&inject_handlers); handler != NULL;
++	    handler = list_next(&inject_handlers, handler)) {
++
++		/* Ignore errors not destined for this pool */
++		if (spa != handler->zi_spa)
++			continue;
++
++		if (handler->zi_record.zi_duration == 0)
++			continue;
++
++		if (handler->zi_record.zi_duration > 0) {
++			VERIFY(handler->zi_record.zi_timer == 0 ||
++			    handler->zi_record.zi_timer +
++			    handler->zi_record.zi_duration * hz >
++			    ddi_get_lbolt64());
++		} else {
++			/* duration is negative so the subtraction here adds */
++			VERIFY(handler->zi_record.zi_timer == 0 ||
++			    handler->zi_record.zi_timer -
++			    handler->zi_record.zi_duration >=
++			    spa_syncing_txg(spa));
++		}
++	}
++
++	rw_exit(&inject_lock);
++}
++
++/*
++ * Create a new handler for the given record.  We add it to the list, adding
++ * a reference to the spa_t in the process.  We increment zio_injection_enabled,
++ * which is the switch to trigger all fault injection.
++ */
++int
++zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
++{
++	inject_handler_t *handler;
++	int error;
++	spa_t *spa;
++
++	/*
++	 * If this is pool-wide metadata, make sure we unload the corresponding
++	 * spa_t, so that the next attempt to load it will trigger the fault.
++	 * We call spa_reset() to unload the pool appropriately.
++	 */
++	if (flags & ZINJECT_UNLOAD_SPA)
++		if ((error = spa_reset(name)) != 0)
++			return (error);
++
++	if (!(flags & ZINJECT_NULL)) {
++		/*
++		 * spa_inject_ref() will add an injection reference, which will
++		 * prevent the pool from being removed from the namespace while
++		 * still allowing it to be unloaded.
++		 */
++		if ((spa = spa_inject_addref(name)) == NULL)
++			return (ENOENT);
++
++		handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
++
++		rw_enter(&inject_lock, RW_WRITER);
++
++		*id = handler->zi_id = inject_next_id++;
++		handler->zi_spa = spa;
++		handler->zi_record = *record;
++		list_insert_tail(&inject_handlers, handler);
++		atomic_add_32(&zio_injection_enabled, 1);
++
++		rw_exit(&inject_lock);
++	}
++
++	/*
++	 * Flush the ARC, so that any attempts to read this data will end up
++	 * going to the ZIO layer.  Note that this is a little overkill, but
++	 * we don't have the necessary ARC interfaces to do anything else, and
++	 * fault injection isn't a performance critical path.
++	 */
++	if (flags & ZINJECT_FLUSH_ARC)
++		arc_flush(NULL);
++
++	return (0);
++}
++
++/*
++ * Returns the next record with an ID greater than that supplied to the
++ * function.  Used to iterate over all handlers in the system.
++ */
++int
++zio_inject_list_next(int *id, char *name, size_t buflen,
++    zinject_record_t *record)
++{
++	inject_handler_t *handler;
++	int ret;
++
++	mutex_enter(&spa_namespace_lock);
++	rw_enter(&inject_lock, RW_READER);
++
++	for (handler = list_head(&inject_handlers); handler != NULL;
++	    handler = list_next(&inject_handlers, handler))
++		if (handler->zi_id > *id)
++			break;
++
++	if (handler) {
++		*record = handler->zi_record;
++		*id = handler->zi_id;
++		(void) strncpy(name, spa_name(handler->zi_spa), buflen);
++		ret = 0;
++	} else {
++		ret = ENOENT;
++	}
++
++	rw_exit(&inject_lock);
++	mutex_exit(&spa_namespace_lock);
++
++	return (ret);
++}
++
++/*
++ * Clear the fault handler with the given identifier, or return ENOENT if none
++ * exists.
++ */
++int
++zio_clear_fault(int id)
++{
++	inject_handler_t *handler;
++
++	rw_enter(&inject_lock, RW_WRITER);
++
++	for (handler = list_head(&inject_handlers); handler != NULL;
++	    handler = list_next(&inject_handlers, handler))
++		if (handler->zi_id == id)
++			break;
++
++	if (handler == NULL) {
++		rw_exit(&inject_lock);
++		return (ENOENT);
++	}
++
++	list_remove(&inject_handlers, handler);
++	rw_exit(&inject_lock);
++
++	spa_inject_delref(handler->zi_spa);
++	kmem_free(handler, sizeof (inject_handler_t));
++	atomic_add_32(&zio_injection_enabled, -1);
++
++	return (0);
++}
++
++void
++zio_inject_init(void)
++{
++	rw_init(&inject_lock, NULL, RW_DEFAULT, NULL);
++	list_create(&inject_handlers, sizeof (inject_handler_t),
++	    offsetof(inject_handler_t, zi_link));
++}
++
++void
++zio_inject_fini(void)
++{
++	list_destroy(&inject_handlers);
++	rw_destroy(&inject_lock);
++}
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++module_param(zio_injection_enabled, int, 0644);
++MODULE_PARM_DESC(zio_injection_enabled, "Enable fault injection");
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zle.c linux-3.2.33-go/fs/zfs/zfs/zle.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zle.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zle.c	2012-11-16 23:25:34.352039300 +0100
+@@ -0,0 +1,86 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++/*
++ * Zero-length encoding.  This is a fast and simple algorithm to eliminate
++ * runs of zeroes.  Each chunk of compressed data begins with a length byte, b.
++ * If b < n (where n is the compression parameter) then the next b + 1 bytes
++ * are literal values.  If b >= n then the next (256 - b + 1) bytes are zero.
++ */
++#include <sys/types.h>
++#include <sys/sysmacros.h>
++
++size_t
++zle_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
++{
++	uchar_t *src = s_start;
++	uchar_t *dst = d_start;
++	uchar_t *s_end = src + s_len;
++	uchar_t *d_end = dst + d_len;
++
++	while (src < s_end && dst < d_end - 1) {
++		uchar_t *first = src;
++		uchar_t *len = dst++;
++		if (src[0] == 0) {
++			uchar_t *last = src + (256 - n);
++			while (src < MIN(last, s_end) && src[0] == 0)
++				src++;
++			*len = src - first - 1 + n;
++		} else {
++			uchar_t *last = src + n;
++			if (d_end - dst < n)
++				break;
++			while (src < MIN(last, s_end) - 1 && (src[0] | src[1]))
++				*dst++ = *src++;
++			if (src[0])
++				*dst++ = *src++;
++			*len = src - first - 1;
++		}
++	}
++	return (src == s_end ? dst - (uchar_t *)d_start : s_len);
++}
++
++int
++zle_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
++{
++	uchar_t *src = s_start;
++	uchar_t *dst = d_start;
++	uchar_t *s_end = src + s_len;
++	uchar_t *d_end = dst + d_len;
++
++	while (src < s_end && dst < d_end) {
++		int len = 1 + *src++;
++		if (len <= n) {
++			while (len-- != 0)
++				*dst++ = *src++;
++		} else {
++			len -= n;
++			while (len-- != 0)
++				*dst++ = 0;
++		}
++	}
++	return (dst == d_end ? 0 : -1);
++}
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zpl_ctldir.c linux-3.2.33-go/fs/zfs/zfs/zpl_ctldir.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zpl_ctldir.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zpl_ctldir.c	2012-11-16 23:25:34.349039334 +0100
+@@ -0,0 +1,534 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
++ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ * LLNL-CODE-403049.
++ * Rewritten for Linux by:
++ *   Rohan Puri <rohan.puri15@gmail.com>
++ *   Brian Behlendorf <behlendorf1@llnl.gov>
++ */
++
++#include <sys/zfs_vfsops.h>
++#include <sys/zfs_vnops.h>
++#include <sys/zfs_znode.h>
++#include <sys/zfs_ctldir.h>
++#include <sys/zpl.h>
++
++/*
++ * Common open routine.  Disallow any write access.
++ */
++/* ARGSUSED */
++static int
++zpl_common_open(struct inode *ip, struct file *filp)
++{
++	if (filp->f_mode & FMODE_WRITE)
++		return (-EACCES);
++
++	return generic_file_open(ip, filp);
++}
++
++static int
++zpl_common_readdir(struct file *filp, void *dirent, filldir_t filldir)
++{
++	struct dentry *dentry = filp->f_path.dentry;
++	struct inode *ip = dentry->d_inode;
++	int error = 0;
++
++	switch (filp->f_pos) {
++	case 0:
++		error = filldir(dirent, ".", 1, 0, ip->i_ino, DT_DIR);
++		if (error)
++			break;
++
++		filp->f_pos++;
++		/* fall-thru */
++	case 1:
++		error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR);
++		if (error)
++			break;
++
++		filp->f_pos++;
++		/* fall-thru */
++	default:
++		break;
++	}
++
++	return (error);
++}
++
++/*
++ * Get root directory contents.
++ */
++static int
++zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
++{
++	struct dentry *dentry = filp->f_path.dentry;
++	struct inode *ip = dentry->d_inode;
++	zfs_sb_t *zsb = ITOZSB(ip);
++	int error = 0;
++
++	ZFS_ENTER(zsb);
++
++	switch (filp->f_pos) {
++	case 0:
++		error = filldir(dirent, ".", 1, 0, ip->i_ino, DT_DIR);
++		if (error)
++			goto out;
++
++		filp->f_pos++;
++		/* fall-thru */
++	case 1:
++		error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR);
++		if (error)
++			goto out;
++
++		filp->f_pos++;
++		/* fall-thru */
++	case 2:
++		error = filldir(dirent, ZFS_SNAPDIR_NAME,
++		    strlen(ZFS_SNAPDIR_NAME), 2, ZFSCTL_INO_SNAPDIR, DT_DIR);
++		if (error)
++			goto out;
++
++		filp->f_pos++;
++		/* fall-thru */
++	case 3:
++		error = filldir(dirent, ZFS_SHAREDIR_NAME,
++		    strlen(ZFS_SHAREDIR_NAME), 3, ZFSCTL_INO_SHARES, DT_DIR);
++		if (error)
++			goto out;
++
++		filp->f_pos++;
++		/* fall-thru */
++	}
++out:
++	ZFS_EXIT(zsb);
++
++	return (error);
++}
++
++/*
++ * Get root directory attributes.
++ */
++/* ARGSUSED */
++static int
++zpl_root_getattr(struct vfsmount *mnt, struct dentry *dentry,
++    struct kstat *stat)
++{
++	int error;
++
++	error = simple_getattr(mnt, dentry, stat);
++	stat->atime = CURRENT_TIME;
++
++	return (error);
++}
++
++static struct dentry *
++#ifdef HAVE_LOOKUP_NAMEIDATA
++zpl_root_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd)
++#else
++zpl_root_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags)
++#endif
++{
++	cred_t *cr = CRED();
++	struct inode *ip;
++	int error;
++
++	crhold(cr);
++	error = -zfsctl_root_lookup(dip, dname(dentry), &ip, 0, cr, NULL, NULL);
++	ASSERT3S(error, <=, 0);
++	crfree(cr);
++
++	if (error) {
++		if (error == -ENOENT)
++			return d_splice_alias(NULL, dentry);
++		else
++			return ERR_PTR(error);
++	}
++
++        return d_splice_alias(ip, dentry);
++}
++
++/*
++ * The '.zfs' control directory file and inode operations.
++ */
++const struct file_operations zpl_fops_root = {
++	.open		= zpl_common_open,
++	.llseek		= generic_file_llseek,
++	.read		= generic_read_dir,
++	.readdir	= zpl_root_readdir,
++};
++
++const struct inode_operations zpl_ops_root = {
++	.lookup		= zpl_root_lookup,
++	.getattr	= zpl_root_getattr,
++};
++
++static struct dentry *
++#ifdef HAVE_LOOKUP_NAMEIDATA
++zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
++    struct nameidata *nd)
++#else
++zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
++    unsigned int flags)
++#endif
++
++{
++	cred_t *cr = CRED();
++	struct inode *ip;
++	int error;
++
++	crhold(cr);
++	error = -zfsctl_snapdir_lookup(dip, dname(dentry), &ip,
++	    0, cr, NULL, NULL);
++	ASSERT3S(error, <=, 0);
++	crfree(cr);
++
++	if (error) {
++		if (error == -ENOENT)
++			return d_splice_alias(NULL, dentry);
++		else
++			return ERR_PTR(error);
++	}
++
++	/*
++	 * Auto mounting of snapshots is only supported for 2.6.37 and
++	 * newer kernels.  Prior to this kernel the ops->follow_link()
++	 * callback was used as a hack to trigger the mount.  The
++	 * resulting vfsmount was then explicitly grafted in to the
++	 * name space.  While it might be possible to add compatibility
++	 * code to accomplish this it would require considerable care.
++	 */
++#ifdef HAVE_AUTOMOUNT
++	dentry->d_op = &zpl_dops_snapdirs;
++#endif /* HAVE_AUTOMOUNT */
++
++	return d_splice_alias(ip, dentry);
++}
++
++/* ARGSUSED */
++static int
++zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir)
++{
++	struct dentry *dentry = filp->f_path.dentry;
++	struct inode *dip = dentry->d_inode;
++	zfs_sb_t *zsb = ITOZSB(dip);
++	char snapname[MAXNAMELEN];
++	uint64_t id, cookie;
++	boolean_t case_conflict;
++	int error = 0;
++
++	ZFS_ENTER(zsb);
++
++	cookie = filp->f_pos;
++	switch (filp->f_pos) {
++	case 0:
++		error = filldir(dirent, ".", 1, 0, dip->i_ino, DT_DIR);
++		if (error)
++			goto out;
++
++		filp->f_pos++;
++		/* fall-thru */
++	case 1:
++		error = filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR);
++		if (error)
++			goto out;
++
++		filp->f_pos++;
++		/* fall-thru */
++	default:
++		while (error == 0) {
++			error = -dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN,
++			    snapname, &id, &cookie, &case_conflict);
++			if (error)
++				goto out;
++
++			error = filldir(dirent, snapname, strlen(snapname),
++			    filp->f_pos, ZFSCTL_INO_SHARES - id, DT_DIR);
++			if (error)
++				goto out;
++
++			filp->f_pos = cookie;
++		}
++	}
++out:
++	ZFS_EXIT(zsb);
++
++	if (error == -ENOENT)
++		return (0);
++
++	return (error);
++}
++
++int
++zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
++    struct inode *tdip, struct dentry *tdentry)
++{
++	cred_t *cr = CRED();
++	int error;
++
++	crhold(cr);
++	error = -zfsctl_snapdir_rename(sdip, dname(sdentry),
++	    tdip, dname(tdentry), cr, 0);
++	ASSERT3S(error, <=, 0);
++	crfree(cr);
++
++	return (error);
++}
++
++static int
++zpl_snapdir_rmdir(struct inode *dip, struct dentry *dentry)
++{
++	cred_t *cr = CRED();
++	int error;
++
++	crhold(cr);
++	error = -zfsctl_snapdir_remove(dip, dname(dentry), cr, 0);
++	ASSERT3S(error, <=, 0);
++	crfree(cr);
++
++	return (error);
++}
++
++static int
++zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, zpl_umode_t mode)
++{
++	cred_t *cr = CRED();
++	vattr_t *vap;
++	struct inode *ip;
++	int error;
++
++	crhold(cr);
++	vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
++	zpl_vap_init(vap, dip, dentry, mode | S_IFDIR, cr);
++
++	error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
++	if (error == 0) {
++#ifdef HAVE_AUTOMOUNT
++		dentry->d_op = &zpl_dops_snapdirs;
++#endif /* HAVE_AUTOMOUNT */
++		d_instantiate(dentry, ip);
++	}
++
++	kmem_free(vap, sizeof(vattr_t));
++	ASSERT3S(error, <=, 0);
++	crfree(cr);
++
++	return (error);
++}
++
++#ifdef HAVE_AUTOMOUNT
++static struct vfsmount *
++zpl_snapdir_automount(struct path *path)
++{
++	struct dentry *dentry = path->dentry;
++	int error;
++
++	/*
++	 * We must briefly disable automounts for this dentry because the
++	 * user space mount utility will trigger another lookup on this
++	 * directory.  That will result in zpl_snapdir_automount() being
++	 * called repeatedly.  The DCACHE_NEED_AUTOMOUNT flag can be
++	 * safely reset once the mount completes.
++	 */
++	dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT;
++	error = -zfsctl_mount_snapshot(path, 0);
++	dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
++	if (error)
++		return ERR_PTR(error);
++
++	/*
++	 * Rather than returning the new vfsmount for the snapshot we must
++	 * return NULL to indicate a mount collision.  This is done because
++	 * the user space mount calls do_add_mount() which adds the vfsmount
++	 * to the name space.  If we returned the new mount here it would be
++	 * added again to the vfsmount list resulting in list corruption.
++	 */
++	return (NULL);
++}
++#endif /* HAVE_AUTOMOUNT */
++
++/*
++ * Get snapshot directory attributes.
++ */
++/* ARGSUSED */
++static int
++zpl_snapdir_getattr(struct vfsmount *mnt, struct dentry *dentry,
++    struct kstat *stat)
++{
++	zfs_sb_t *zsb = ITOZSB(dentry->d_inode);
++	int error;
++
++	ZFS_ENTER(zsb);
++	error = simple_getattr(mnt, dentry, stat);
++	stat->nlink = stat->size = avl_numnodes(&zsb->z_ctldir_snaps) + 2;
++	stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zsb->z_os);
++	stat->atime = CURRENT_TIME;
++	ZFS_EXIT(zsb);
++
++	return (error);
++}
++
++/*
++ * The '.zfs/snapshot' directory file operations.  These mainly control
++ * generating the list of available snapshots when doing an 'ls' in the
++ * directory.  See zpl_snapdir_readdir().
++ */
++const struct file_operations zpl_fops_snapdir = {
++	.open		= zpl_common_open,
++	.llseek		= generic_file_llseek,
++	.read		= generic_read_dir,
++	.readdir	= zpl_snapdir_readdir,
++};
++
++/*
++ * The '.zfs/snapshot' directory inode operations.  These mainly control
++ * creating an inode for a snapshot directory and initializing the needed
++ * infrastructure to automount the snapshot.  See zpl_snapdir_lookup().
++ */
++const struct inode_operations zpl_ops_snapdir = {
++	.lookup		= zpl_snapdir_lookup,
++	.getattr	= zpl_snapdir_getattr,
++	.rename		= zpl_snapdir_rename,
++	.rmdir		= zpl_snapdir_rmdir,
++	.mkdir		= zpl_snapdir_mkdir,
++};
++
++#ifdef HAVE_AUTOMOUNT
++const struct dentry_operations zpl_dops_snapdirs = {
++	.d_automount	= zpl_snapdir_automount,
++};
++#endif /* HAVE_AUTOMOUNT */
++
++static struct dentry *
++#ifdef HAVE_LOOKUP_NAMEIDATA
++zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
++    struct nameidata *nd)
++#else
++zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
++    unsigned int flags)
++#endif
++{
++	cred_t *cr = CRED();
++	struct inode *ip = NULL;
++	int error;
++
++	crhold(cr);
++	error = -zfsctl_shares_lookup(dip, dname(dentry), &ip,
++	    0, cr, NULL, NULL);
++	ASSERT3S(error, <=, 0);
++	crfree(cr);
++
++	if (error) {
++		if (error == -ENOENT)
++			return d_splice_alias(NULL, dentry);
++		else
++			return ERR_PTR(error);
++	}
++
++	return d_splice_alias(ip, dentry);
++}
++
++/* ARGSUSED */
++static int
++zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir)
++{
++	cred_t *cr = CRED();
++	struct dentry *dentry = filp->f_path.dentry;
++	struct inode *ip = dentry->d_inode;
++	zfs_sb_t *zsb = ITOZSB(ip);
++	znode_t *dzp;
++	int error;
++
++	ZFS_ENTER(zsb);
++
++	if (zsb->z_shares_dir == 0) {
++		error = zpl_common_readdir(filp, dirent, filldir);
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp);
++	if (error) {
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	crhold(cr);
++	error = -zfs_readdir(ZTOI(dzp), dirent, filldir, &filp->f_pos, cr);
++	crfree(cr);
++
++	iput(ZTOI(dzp));
++	ZFS_EXIT(zsb);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++/* ARGSUSED */
++static int
++zpl_shares_getattr(struct vfsmount *mnt, struct dentry *dentry,
++    struct kstat *stat)
++{
++	struct inode *ip = dentry->d_inode;
++	zfs_sb_t *zsb = ITOZSB(ip);
++	znode_t *dzp;
++	int error;
++
++	ZFS_ENTER(zsb);
++
++	if (zsb->z_shares_dir == 0) {
++		error = simple_getattr(mnt, dentry, stat);
++		stat->nlink = stat->size = 2;
++		stat->atime = CURRENT_TIME;
++		ZFS_EXIT(zsb);
++		return (error);
++	}
++
++	error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp);
++	if (error == 0)
++		error = -zfs_getattr_fast(dentry->d_inode, stat);
++
++	iput(ZTOI(dzp));
++	ZFS_EXIT(zsb);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++/*
++ * The '.zfs/shares' directory file operations.
++ */
++const struct file_operations zpl_fops_shares = {
++	.open		= zpl_common_open,
++	.llseek		= generic_file_llseek,
++	.read		= generic_read_dir,
++	.readdir	= zpl_shares_readdir,
++};
++
++/*
++ * The '.zfs/shares' directory inode operations.
++ */
++const struct inode_operations zpl_ops_shares = {
++	.lookup		= zpl_shares_lookup,
++	.getattr	= zpl_shares_getattr,
++};
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zpl_export.c linux-3.2.33-go/fs/zfs/zfs/zpl_export.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zpl_export.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zpl_export.c	2012-11-16 23:25:34.348039346 +0100
+@@ -0,0 +1,148 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2011 Gunnar Beutner
++ * Copyright (c) 2012 Cyril Plisko. All rights reserved.
++ */
++
++
++#include <sys/zfs_vnops.h>
++#include <sys/zfs_znode.h>
++#include <sys/zfs_ctldir.h>
++#include <sys/zpl.h>
++
++
++static int
++#ifdef HAVE_ENCODE_FH_WITH_INODE
++zpl_encode_fh(struct inode *ip, __u32 *fh, int *max_len, struct inode *parent)
++{
++#else
++zpl_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, int connectable)
++{
++	struct inode *ip = dentry->d_inode;
++#endif /* HAVE_ENCODE_FH_WITH_INODE */
++	fid_t *fid = (fid_t *)fh;
++	int len_bytes, rc;
++
++	len_bytes = *max_len * sizeof (__u32);
++
++	if (len_bytes < offsetof(fid_t, fid_data))
++		return 255;
++
++	fid->fid_len = len_bytes - offsetof(fid_t, fid_data);
++
++	if (zfsctl_is_node(ip))
++		rc = zfsctl_fid(ip, fid);
++	else
++		rc = zfs_fid(ip, fid);
++
++	len_bytes = offsetof(fid_t, fid_data) + fid->fid_len;
++	*max_len = roundup(len_bytes, sizeof (__u32)) / sizeof (__u32);
++
++	return (rc == 0 ? FILEID_INO32_GEN : 255);
++}
++
++static struct dentry *
++zpl_dentry_obtain_alias(struct inode *ip)
++{
++	struct dentry *result;
++
++#ifdef HAVE_D_OBTAIN_ALIAS
++	result = d_obtain_alias(ip);
++#else
++	result = d_alloc_anon(ip);
++
++	if (result == NULL) {
++		iput(ip);
++		result = ERR_PTR(-ENOMEM);
++	}
++#endif /* HAVE_D_OBTAIN_ALIAS */
++
++	return result;
++}
++
++static struct dentry *
++zpl_fh_to_dentry(struct super_block *sb, struct fid *fh,
++    int fh_len, int fh_type)
++{
++	fid_t *fid = (fid_t *)fh;
++	struct inode *ip;
++	int len_bytes, rc;
++
++	len_bytes = fh_len * sizeof (__u32);
++
++	if (fh_type != FILEID_INO32_GEN ||
++	    len_bytes < offsetof(fid_t, fid_data) ||
++	    len_bytes < offsetof(fid_t, fid_data) + fid->fid_len)
++		return ERR_PTR(-EINVAL);
++
++	rc = zfs_vget(sb, &ip, fid);
++
++	if (rc != 0)
++		return ERR_PTR(-rc);
++
++	ASSERT((ip != NULL) && !IS_ERR(ip));
++
++	return zpl_dentry_obtain_alias(ip);
++}
++
++static struct dentry *
++zpl_get_parent(struct dentry *child)
++{
++	cred_t *cr = CRED();
++	struct inode *ip;
++	int error;
++
++	crhold(cr);
++	error = -zfs_lookup(child->d_inode, "..", &ip, 0, cr, NULL, NULL);
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	if (error)
++		return ERR_PTR(error);
++
++	return zpl_dentry_obtain_alias(ip);
++}
++
++#ifdef HAVE_COMMIT_METADATA
++static int
++zpl_commit_metadata(struct inode *inode)
++{
++	cred_t *cr = CRED();
++	int error;
++
++	crhold(cr);
++	error = -zfs_fsync(inode, 0, cr);
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return error;
++}
++#endif /* HAVE_COMMIT_METADATA */
++
++const struct export_operations zpl_export_operations = {
++	.encode_fh	= zpl_encode_fh,
++	.fh_to_dentry	= zpl_fh_to_dentry,
++	.get_parent	= zpl_get_parent,
++#ifdef HAVE_COMMIT_METADATA
++	.commit_metadata= zpl_commit_metadata,
++#endif /* HAVE_COMMIT_METADATA */
++};
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zpl_file.c linux-3.2.33-go/fs/zfs/zfs/zpl_file.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zpl_file.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zpl_file.c	2012-11-16 23:25:34.350039322 +0100
+@@ -0,0 +1,462 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
++ */
++
++
++#include <sys/zfs_vfsops.h>
++#include <sys/zfs_vnops.h>
++#include <sys/zfs_znode.h>
++#include <sys/zpl.h>
++
++
++static int
++zpl_open(struct inode *ip, struct file *filp)
++{
++	cred_t *cr = CRED();
++	int error;
++
++	crhold(cr);
++	error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	if (error)
++		return (error);
++
++	return generic_file_open(ip, filp);
++}
++
++static int
++zpl_release(struct inode *ip, struct file *filp)
++{
++	cred_t *cr = CRED();
++	int error;
++
++	crhold(cr);
++	error = -zfs_close(ip, filp->f_flags, cr);
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static int
++zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
++{
++	struct dentry *dentry = filp->f_path.dentry;
++	cred_t *cr = CRED();
++	int error;
++
++	crhold(cr);
++	error = -zfs_readdir(dentry->d_inode, dirent, filldir,
++	    &filp->f_pos, cr);
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++#if defined(HAVE_FSYNC_WITH_DENTRY)
++/*
++ * Linux 2.6.x - 2.6.34 API,
++ * Through 2.6.34 the nfsd kernel server would pass a NULL 'file struct *'
++ * to the fops->fsync() hook.  For this reason, we must be careful not to
++ * use filp unconditionally.
++ */
++static int
++zpl_fsync(struct file *filp, struct dentry *dentry, int datasync)
++{
++	cred_t *cr = CRED();
++	int error;
++
++	crhold(cr);
++	error = -zfs_fsync(dentry->d_inode, datasync, cr);
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++#elif defined(HAVE_FSYNC_WITHOUT_DENTRY)
++/*
++ * Linux 2.6.35 - 3.0 API,
++ * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed
++ * redundant.  The dentry is still accessible via filp->f_path.dentry,
++ * and we are guaranteed that filp will never be NULL.
++ */
++static int
++zpl_fsync(struct file *filp, int datasync)
++{
++	struct inode *inode = filp->f_mapping->host;
++	cred_t *cr = CRED();
++	int error;
++
++	crhold(cr);
++	error = -zfs_fsync(inode, datasync, cr);
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++#elif defined(HAVE_FSYNC_RANGE)
++/*
++ * Linux 3.1 - 3.x API,
++ * As of 3.1 the responsibility to call filemap_write_and_wait_range() has
++ * been pushed down in to the .fsync() vfs hook.  Additionally, the i_mutex
++ * lock is no longer held by the caller, for zfs we don't require the lock
++ * to be held so we don't acquire it.
++ */
++static int
++zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
++{
++	struct inode *inode = filp->f_mapping->host;
++	cred_t *cr = CRED();
++	int error;
++
++	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
++	if (error)
++		return (error);
++
++	crhold(cr);
++	error = -zfs_fsync(inode, datasync, cr);
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++#else
++#error "Unsupported fops->fsync() implementation"
++#endif
++
++ssize_t
++zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t pos,
++     uio_seg_t segment, int flags, cred_t *cr)
++{
++	int error;
++	struct iovec iov;
++	uio_t uio;
++
++	iov.iov_base = (void *)buf;
++	iov.iov_len = len;
++
++	uio.uio_iov = &iov;
++	uio.uio_resid = len;
++	uio.uio_iovcnt = 1;
++	uio.uio_loffset = pos;
++	uio.uio_limit = MAXOFFSET_T;
++	uio.uio_segflg = segment;
++
++	error = -zfs_read(ip, &uio, flags, cr);
++	if (error < 0)
++		return (error);
++
++	return (len - uio.uio_resid);
++}
++
++static ssize_t
++zpl_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
++{
++	cred_t *cr = CRED();
++	ssize_t read;
++
++	crhold(cr);
++	read = zpl_read_common(filp->f_mapping->host, buf, len, *ppos,
++	    UIO_USERSPACE, filp->f_flags, cr);
++	crfree(cr);
++
++	if (read < 0)
++		return (read);
++
++	*ppos += read;
++	return (read);
++}
++
++ssize_t
++zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t pos,
++    uio_seg_t segment, int flags, cred_t *cr)
++{
++	int error;
++	struct iovec iov;
++	uio_t uio;
++
++	iov.iov_base = (void *)buf;
++	iov.iov_len = len;
++
++	uio.uio_iov = &iov;
++	uio.uio_resid = len,
++	uio.uio_iovcnt = 1;
++	uio.uio_loffset = pos;
++	uio.uio_limit = MAXOFFSET_T;
++	uio.uio_segflg = segment;
++
++	error = -zfs_write(ip, &uio, flags, cr);
++	if (error < 0)
++		return (error);
++
++	return (len - uio.uio_resid);
++}
++
++static ssize_t
++zpl_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
++{
++	cred_t *cr = CRED();
++	ssize_t wrote;
++
++	crhold(cr);
++	wrote = zpl_write_common(filp->f_mapping->host, buf, len, *ppos,
++	    UIO_USERSPACE, filp->f_flags, cr);
++	crfree(cr);
++
++	if (wrote < 0)
++		return (wrote);
++
++	*ppos += wrote;
++	return (wrote);
++}
++
++/*
++ * It's worth taking a moment to describe how mmap is implemented
++ * for zfs because it differs considerably from other Linux filesystems.
++ * However, this issue is handled the same way under OpenSolaris.
++ *
++ * The issue is that by design zfs bypasses the Linux page cache and
++ * leaves all caching up to the ARC.  This has been shown to work
++ * well for the common read(2)/write(2) case.  However, mmap(2)
++ * is problem because it relies on being tightly integrated with the
++ * page cache.  To handle this we cache mmap'ed files twice, once in
++ * the ARC and a second time in the page cache.  The code is careful
++ * to keep both copies synchronized.
++ *
++ * When a file with an mmap'ed region is written to using write(2)
++ * both the data in the ARC and existing pages in the page cache
++ * are updated.  For a read(2) data will be read first from the page
++ * cache then the ARC if needed.  Neither a write(2) or read(2) will
++ * will ever result in new pages being added to the page cache.
++ *
++ * New pages are added to the page cache only via .readpage() which
++ * is called when the vfs needs to read a page off disk to back the
++ * virtual memory region.  These pages may be modified without
++ * notifying the ARC and will be written out periodically via
++ * .writepage().  This will occur due to either a sync or the usual
++ * page aging behavior.  Note because a read(2) of a mmap'ed file
++ * will always check the page cache first even when the ARC is out
++ * of date correct data will still be returned.
++ *
++ * While this implementation ensures correct behavior it does have
++ * have some drawbacks.  The most obvious of which is that it
++ * increases the required memory footprint when access mmap'ed
++ * files.  It also adds additional complexity to the code keeping
++ * both caches synchronized.
++ *
++ * Longer term it may be possible to cleanly resolve this wart by
++ * mapping page cache pages directly on to the ARC buffers.  The
++ * Linux address space operations are flexible enough to allow
++ * selection of which pages back a particular index.  The trick
++ * would be working out the details of which subsystem is in
++ * charge, the ARC, the page cache, or both.  It may also prove
++ * helpful to move the ARC buffers to a scatter-gather lists
++ * rather than a vmalloc'ed region.
++ */
++static int
++zpl_mmap(struct file *filp, struct vm_area_struct *vma)
++{
++	struct inode *ip = filp->f_mapping->host;
++	znode_t *zp = ITOZ(ip);
++	int error;
++
++	error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
++	    (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
++	if (error)
++		return (error);
++
++	error = generic_file_mmap(filp, vma);
++	if (error)
++		return (error);
++
++	mutex_enter(&zp->z_lock);
++	zp->z_is_mapped = 1;
++	mutex_exit(&zp->z_lock);
++
++	return (error);
++}
++
++/*
++ * Populate a page with data for the Linux page cache.  This function is
++ * only used to support mmap(2).  There will be an identical copy of the
++ * data in the ARC which is kept up to date via .write() and .writepage().
++ *
++ * Current this function relies on zpl_read_common() and the O_DIRECT
++ * flag to read in a page.  This works but the more correct way is to
++ * update zfs_fillpage() to be Linux friendly and use that interface.
++ */
++static int
++zpl_readpage(struct file *filp, struct page *pp)
++{
++	struct inode *ip;
++	struct page *pl[1];
++	int error = 0;
++
++	ASSERT(PageLocked(pp));
++	ip = pp->mapping->host;
++	pl[0] = pp;
++
++	error = -zfs_getpage(ip, pl, 1);
++
++	if (error) {
++		SetPageError(pp);
++		ClearPageUptodate(pp);
++	} else {
++		ClearPageError(pp);
++		SetPageUptodate(pp);
++		flush_dcache_page(pp);
++	}
++
++	unlock_page(pp);
++	return error;
++}
++
++/*
++ * Populate a set of pages with data for the Linux page cache.  This
++ * function will only be called for read ahead and never for demand
++ * paging.  For simplicity, the code relies on read_cache_pages() to
++ * correctly lock each page for IO and call zpl_readpage().
++ */
++static int
++zpl_readpages(struct file *filp, struct address_space *mapping,
++	struct list_head *pages, unsigned nr_pages)
++{
++	return (read_cache_pages(mapping, pages,
++	    (filler_t *)zpl_readpage, filp));
++}
++
++int
++zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
++{
++	struct address_space *mapping = data;
++
++	ASSERT(PageLocked(pp));
++	ASSERT(!PageWriteback(pp));
++	ASSERT(!(current->flags & PF_NOFS));
++
++	/*
++	 * Annotate this call path with a flag that indicates that it is
++	 * unsafe to use KM_SLEEP during memory allocations due to the
++	 * potential for a deadlock.  KM_PUSHPAGE should be used instead.
++	 */
++	current->flags |= PF_NOFS;
++	(void) zfs_putpage(mapping->host, pp, wbc);
++	current->flags &= ~PF_NOFS;
++
++	return (0);
++}
++
++static int
++zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
++{
++	return write_cache_pages(mapping, wbc, zpl_putpage, mapping);
++}
++
++/*
++ * Write out dirty pages to the ARC, this function is only required to
++ * support mmap(2).  Mapped pages may be dirtied by memory operations
++ * which never call .write().  These dirty pages are kept in sync with
++ * the ARC buffers via this hook.
++ */
++static int
++zpl_writepage(struct page *pp, struct writeback_control *wbc)
++{
++	return zpl_putpage(pp, wbc, pp->mapping);
++}
++
++/*
++ * The only flag combination which matches the behavior of zfs_space()
++ * is FALLOC_FL_PUNCH_HOLE.  This flag was introduced in the 2.6.38 kernel.
++ */
++long
++zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
++{
++	cred_t *cr = CRED();
++	int error = -EOPNOTSUPP;
++
++	if (mode & FALLOC_FL_KEEP_SIZE)
++		return (-EOPNOTSUPP);
++
++	crhold(cr);
++
++#ifdef FALLOC_FL_PUNCH_HOLE
++	if (mode & FALLOC_FL_PUNCH_HOLE) {
++		flock64_t bf;
++
++		bf.l_type = F_WRLCK;
++		bf.l_whence = 0;
++		bf.l_start = offset;
++		bf.l_len = len;
++		bf.l_pid = 0;
++
++		error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
++	}
++#endif /* FALLOC_FL_PUNCH_HOLE */
++
++	crfree(cr);
++
++	ASSERT3S(error, <=, 0);
++	return (error);
++}
++
++#ifdef HAVE_FILE_FALLOCATE
++static long
++zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
++{
++	return zpl_fallocate_common(filp->f_path.dentry->d_inode,
++	    mode, offset, len);
++}
++#endif /* HAVE_FILE_FALLOCATE */
++
++const struct address_space_operations zpl_address_space_operations = {
++	.readpages	= zpl_readpages,
++	.readpage	= zpl_readpage,
++	.writepage	= zpl_writepage,
++	.writepages     = zpl_writepages,
++};
++
++const struct file_operations zpl_file_operations = {
++	.open		= zpl_open,
++	.release	= zpl_release,
++	.llseek		= generic_file_llseek,
++	.read		= zpl_read,
++	.write		= zpl_write,
++	.readdir	= zpl_readdir,
++	.mmap		= zpl_mmap,
++	.fsync		= zpl_fsync,
++#ifdef HAVE_FILE_FALLOCATE
++	.fallocate      = zpl_fallocate,
++#endif /* HAVE_FILE_FALLOCATE */
++};
++
++const struct file_operations zpl_dir_file_operations = {
++	.llseek		= generic_file_llseek,
++	.read		= generic_read_dir,
++	.readdir	= zpl_readdir,
++	.fsync		= zpl_fsync,
++};
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zpl_inode.c linux-3.2.33-go/fs/zfs/zfs/zpl_inode.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zpl_inode.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zpl_inode.c	2012-11-16 23:25:34.350039322 +0100
+@@ -0,0 +1,438 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
++ */
++
++
++#include <sys/zfs_vfsops.h>
++#include <sys/zfs_vnops.h>
++#include <sys/zfs_znode.h>
++#include <sys/vfs.h>
++#include <sys/zpl.h>
++
++
++static struct dentry *
++#ifdef HAVE_LOOKUP_NAMEIDATA
++zpl_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
++#else
++zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
++#endif
++{
++	cred_t *cr = CRED();
++	struct inode *ip;
++	int error;
++
++	crhold(cr);
++	error = -zfs_lookup(dir, dname(dentry), &ip, 0, cr, NULL, NULL);
++	ASSERT3S(error, <=, 0);
++	crfree(cr);
++
++	if (error) {
++		if (error == -ENOENT)
++			return d_splice_alias(NULL, dentry);
++		else
++			return ERR_PTR(error);
++	}
++
++	return d_splice_alias(ip, dentry);
++}
++
++void
++zpl_vap_init(vattr_t *vap, struct inode *dir, struct dentry *dentry,
++    zpl_umode_t mode, cred_t *cr)
++{
++	vap->va_mask = ATTR_MODE;
++	vap->va_mode = mode;
++	vap->va_dentry = dentry;
++	vap->va_uid = crgetfsuid(cr);
++
++	if (dir && dir->i_mode & S_ISGID) {
++		vap->va_gid = dir->i_gid;
++		if (S_ISDIR(mode))
++			vap->va_mode |= S_ISGID;
++	} else {
++		vap->va_gid = crgetfsgid(cr);
++	}
++}
++
++static int
++#ifdef HAVE_CREATE_NAMEIDATA
++zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
++    struct nameidata *nd)
++#else
++zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
++    bool flag)
++#endif
++{
++	cred_t *cr = CRED();
++	struct inode *ip;
++	vattr_t *vap;
++	int error;
++
++	crhold(cr);
++	vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
++	zpl_vap_init(vap, dir, dentry, mode, cr);
++
++	error = -zfs_create(dir, (char *)dentry->d_name.name,
++	    vap, 0, mode, &ip, cr, 0, NULL);
++	kmem_free(vap, sizeof(vattr_t));
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static int
++zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
++    dev_t rdev)
++{
++	cred_t *cr = CRED();
++	struct inode *ip;
++	vattr_t *vap;
++	int error;
++
++	/*
++	 * We currently expect Linux to supply rdev=0 for all sockets
++	 * and fifos, but we want to know if this behavior ever changes.
++	 */
++	if (S_ISSOCK(mode) || S_ISFIFO(mode))
++		ASSERT(rdev == 0);
++
++	crhold(cr);
++	vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
++	zpl_vap_init(vap, dir, dentry, mode, cr);
++	vap->va_rdev = rdev;
++
++	error = -zfs_create(dir, (char *)dentry->d_name.name,
++	    vap, 0, mode, &ip, cr, 0, NULL);
++	kmem_free(vap, sizeof(vattr_t));
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (-error);
++}
++
++static int
++zpl_unlink(struct inode *dir, struct dentry *dentry)
++{
++	cred_t *cr = CRED();
++	int error;
++
++	crhold(cr);
++	error = -zfs_remove(dir, dname(dentry), cr);
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static int
++zpl_mkdir(struct inode *dir, struct dentry *dentry, zpl_umode_t mode)
++{
++	cred_t *cr = CRED();
++	vattr_t *vap;
++	struct inode *ip;
++	int error;
++
++	crhold(cr);
++	vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
++	zpl_vap_init(vap, dir, dentry, mode | S_IFDIR, cr);
++
++	error = -zfs_mkdir(dir, dname(dentry), vap, &ip, cr, 0, NULL);
++	kmem_free(vap, sizeof(vattr_t));
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static int
++zpl_rmdir(struct inode * dir, struct dentry *dentry)
++{
++	cred_t *cr = CRED();
++	int error;
++
++	crhold(cr);
++	error = -zfs_rmdir(dir, dname(dentry), NULL, cr, 0);
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static int
++zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
++{
++	boolean_t issnap = ITOZSB(dentry->d_inode)->z_issnap;
++	int error;
++
++	/*
++	 * Ensure MNT_SHRINKABLE is set on snapshots to ensure they are
++	 * unmounted automatically with the parent file system.  This
++	 * is done on the first getattr because it's not easy to get the
++	 * vfsmount structure at mount time.  This call path is explicitly
++	 * marked unlikely to avoid any performance impact.  FWIW, ext4
++	 * resorts to a similar trick for sysadmin convenience.
++	 */
++	if (unlikely(issnap && !(mnt->mnt_flags & MNT_SHRINKABLE)))
++		mnt->mnt_flags |= MNT_SHRINKABLE;
++
++	error = -zfs_getattr_fast(dentry->d_inode, stat);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static int
++zpl_setattr(struct dentry *dentry, struct iattr *ia)
++{
++	cred_t *cr = CRED();
++	vattr_t *vap;
++	int error;
++
++	error = inode_change_ok(dentry->d_inode, ia);
++	if (error)
++		return (error);
++
++	crhold(cr);
++	vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
++	vap->va_mask = ia->ia_valid & ATTR_IATTR_MASK;
++	vap->va_mode = ia->ia_mode;
++	vap->va_uid = ia->ia_uid;
++	vap->va_gid = ia->ia_gid;
++	vap->va_size = ia->ia_size;
++	vap->va_atime = ia->ia_atime;
++	vap->va_mtime = ia->ia_mtime;
++	vap->va_ctime = ia->ia_ctime;
++
++	error = -zfs_setattr(dentry->d_inode, vap, 0, cr);
++
++	kmem_free(vap, sizeof(vattr_t));
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static int
++zpl_rename(struct inode *sdip, struct dentry *sdentry,
++    struct inode *tdip, struct dentry *tdentry)
++{
++	cred_t *cr = CRED();
++	int error;
++
++	crhold(cr);
++	error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, 0);
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static int
++zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
++{
++	cred_t *cr = CRED();
++	vattr_t *vap;
++	struct inode *ip;
++	int error;
++
++	crhold(cr);
++	vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
++	zpl_vap_init(vap, dir, dentry, S_IFLNK | S_IRWXUGO, cr);
++
++	error = -zfs_symlink(dir, dname(dentry), vap, (char *)name, &ip, cr, 0);
++	kmem_free(vap, sizeof(vattr_t));
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static void *
++zpl_follow_link(struct dentry *dentry, struct nameidata *nd)
++{
++	cred_t *cr = CRED();
++	struct inode *ip = dentry->d_inode;
++	struct iovec iov;
++	uio_t uio;
++	char *link;
++	int error;
++
++	crhold(cr);
++
++	iov.iov_len = MAXPATHLEN;
++	iov.iov_base = link = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
++
++	uio.uio_iov = &iov;
++	uio.uio_iovcnt = 1;
++	uio.uio_resid = (MAXPATHLEN - 1);
++	uio.uio_segflg = UIO_SYSSPACE;
++
++	error = -zfs_readlink(ip, &uio, cr);
++	if (error) {
++		kmem_free(link, MAXPATHLEN);
++		nd_set_link(nd, ERR_PTR(error));
++	} else {
++		nd_set_link(nd, link);
++	}
++
++	crfree(cr);
++	return (NULL);
++}
++
++static void
++zpl_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
++{
++	const char *link = nd_get_link(nd);
++
++	if (!IS_ERR(link))
++		kmem_free(link, MAXPATHLEN);
++}
++
++static int
++zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
++{
++	cred_t *cr = CRED();
++	struct inode *ip = old_dentry->d_inode;
++	int error;
++
++	if (ip->i_nlink >= ZFS_LINK_MAX)
++		return -EMLINK;
++
++	crhold(cr);
++	ip->i_ctime = CURRENT_TIME_SEC;
++	igrab(ip); /* Use ihold() if available */
++
++	error = -zfs_link(dir, ip, dname(dentry), cr);
++	if (error) {
++		iput(ip);
++		goto out;
++	}
++
++	d_instantiate(dentry, ip);
++out:
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++#ifdef HAVE_INODE_TRUNCATE_RANGE
++static void
++zpl_truncate_range(struct inode* ip, loff_t start, loff_t end)
++{
++	cred_t *cr = CRED();
++	flock64_t bf;
++
++	ASSERT3S(start, <=, end);
++
++	/*
++	 * zfs_freesp() will interpret (len == 0) as meaning "truncate until
++	 * the end of the file". We don't want that.
++	 */
++	if (start == end)
++		return;
++
++	crhold(cr);
++
++	bf.l_type = F_WRLCK;
++	bf.l_whence = 0;
++	bf.l_start = start;
++	bf.l_len = end - start;
++	bf.l_pid = 0;
++	zfs_space(ip, F_FREESP, &bf, FWRITE, start, cr);
++
++	crfree(cr);
++}
++#endif /* HAVE_INODE_TRUNCATE_RANGE */
++
++#ifdef HAVE_INODE_FALLOCATE
++static long
++zpl_fallocate(struct inode *ip, int mode, loff_t offset, loff_t len)
++{
++	return zpl_fallocate_common(ip, mode, offset, len);
++}
++#endif /* HAVE_INODE_FALLOCATE */
++
++
++const struct inode_operations zpl_inode_operations = {
++	.create		= zpl_create,
++	.link		= zpl_link,
++	.unlink		= zpl_unlink,
++	.symlink	= zpl_symlink,
++	.mkdir		= zpl_mkdir,
++	.rmdir		= zpl_rmdir,
++	.mknod		= zpl_mknod,
++	.rename		= zpl_rename,
++	.setattr	= zpl_setattr,
++	.getattr	= zpl_getattr,
++	.setxattr	= generic_setxattr,
++	.getxattr	= generic_getxattr,
++	.removexattr	= generic_removexattr,
++	.listxattr	= zpl_xattr_list,
++#ifdef HAVE_INODE_TRUNCATE_RANGE
++	.truncate_range = zpl_truncate_range,
++#endif /* HAVE_INODE_TRUNCATE_RANGE */
++#ifdef HAVE_INODE_FALLOCATE
++	.fallocate	= zpl_fallocate,
++#endif /* HAVE_INODE_FALLOCATE */
++};
++
++const struct inode_operations zpl_dir_inode_operations = {
++	.create		= zpl_create,
++	.lookup		= zpl_lookup,
++	.link		= zpl_link,
++	.unlink		= zpl_unlink,
++	.symlink	= zpl_symlink,
++	.mkdir		= zpl_mkdir,
++	.rmdir		= zpl_rmdir,
++	.mknod		= zpl_mknod,
++	.rename		= zpl_rename,
++	.setattr	= zpl_setattr,
++	.getattr	= zpl_getattr,
++	.setxattr	= generic_setxattr,
++	.getxattr	= generic_getxattr,
++	.removexattr	= generic_removexattr,
++	.listxattr	= zpl_xattr_list,
++};
++
++const struct inode_operations zpl_symlink_inode_operations = {
++	.readlink	= generic_readlink,
++	.follow_link	= zpl_follow_link,
++	.put_link	= zpl_put_link,
++	.setattr	= zpl_setattr,
++	.getattr	= zpl_getattr,
++	.setxattr	= generic_setxattr,
++	.getxattr	= generic_getxattr,
++	.removexattr	= generic_removexattr,
++	.listxattr	= zpl_xattr_list,
++};
++
++const struct inode_operations zpl_special_inode_operations = {
++	.setattr	= zpl_setattr,
++	.getattr	= zpl_getattr,
++	.setxattr	= generic_setxattr,
++	.getxattr	= generic_getxattr,
++	.removexattr	= generic_removexattr,
++	.listxattr	= zpl_xattr_list,
++};
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zpl_super.c linux-3.2.33-go/fs/zfs/zfs/zpl_super.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zpl_super.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zpl_super.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,342 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
++ */
++
++
++#include <sys/zfs_vfsops.h>
++#include <sys/zfs_vnops.h>
++#include <sys/zfs_znode.h>
++#include <sys/zfs_ctldir.h>
++#include <sys/zpl.h>
++
++
++static struct inode *
++zpl_inode_alloc(struct super_block *sb)
++{
++	struct inode *ip;
++
++	VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0);
++	ip->i_version = 1;
++
++	return (ip);
++}
++
++static void
++zpl_inode_destroy(struct inode *ip)
++{
++        ASSERT(atomic_read(&ip->i_count) == 0);
++	zfs_inode_destroy(ip);
++}
++
++/*
++ * When ->drop_inode() is called its return value indicates if the
++ * inode should be evicted from the inode cache.  If the inode is
++ * unhashed and has no links the default policy is to evict it
++ * immediately.
++ *
++ * Prior to 2.6.36 this eviction was accomplished by the vfs calling
++ * ->delete_inode().  It was ->delete_inode()'s responsibility to
++ * truncate the inode pages and call clear_inode().  The call to
++ * clear_inode() synchronously invalidates all the buffers and
++ * calls ->clear_inode().  It was ->clear_inode()'s responsibility
++ * to cleanup and filesystem specific data before freeing the inode.
++ *
++ * This elaborate mechanism was replaced by ->evict_inode() which
++ * does the job of both ->delete_inode() and ->clear_inode().  It
++ * will be called exactly once, and when it returns the inode must
++ * be in a state where it can simply be freed.i
++ *
++ * The ->evict_inode() callback must minimally truncate the inode pages,
++ * and call clear_inode().  For 2.6.35 and later kernels this will
++ * simply update the inode state, with the sync occurring before the
++ * truncate in evict().  For earlier kernels clear_inode() maps to
++ * end_writeback() which is responsible for completing all outstanding
++ * write back.  In either case, once this is done it is safe to cleanup
++ * any remaining inode specific data via zfs_inactive().
++ * remaining filesystem specific data.
++ */
++#ifdef HAVE_EVICT_INODE
++static void
++zpl_evict_inode(struct inode *ip)
++{
++	truncate_setsize(ip, 0);
++	clear_inode(ip);
++	zfs_inactive(ip);
++}
++
++#else
++
++static void
++zpl_clear_inode(struct inode *ip)
++{
++	zfs_inactive(ip);
++}
++
++static void
++zpl_inode_delete(struct inode *ip)
++{
++	truncate_setsize(ip, 0);
++	clear_inode(ip);
++}
++
++#endif /* HAVE_EVICT_INODE */
++
++static void
++zpl_put_super(struct super_block *sb)
++{
++	int error;
++
++	error = -zfs_umount(sb);
++	ASSERT3S(error, <=, 0);
++}
++
++static int
++zpl_sync_fs(struct super_block *sb, int wait)
++{
++	cred_t *cr = CRED();
++	int error;
++
++	crhold(cr);
++	error = -zfs_sync(sb, wait, cr);
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static int
++zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
++{
++	int error;
++
++	error = -zfs_statvfs(dentry, statp);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static int
++zpl_remount_fs(struct super_block *sb, int *flags, char *data)
++{
++	int error;
++	error = -zfs_remount(sb, flags, data);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static void
++zpl_umount_begin(struct super_block *sb)
++{
++	zfs_sb_t *zsb = sb->s_fs_info;
++	int count;
++
++	/*
++	 * Best effort to unmount snapshots in .zfs/snapshot/.  Normally this
++	 * isn't required because snapshots have the MNT_SHRINKABLE flag set.
++	 */
++	if (zsb->z_ctldir)
++		(void) zfsctl_unmount_snapshots(zsb, MNT_FORCE, &count);
++}
++
++/*
++ * The Linux VFS automatically handles the following flags:
++ * MNT_NOSUID, MNT_NODEV, MNT_NOEXEC, MNT_NOATIME, MNT_READONLY
++ */
++#ifdef HAVE_SHOW_OPTIONS_WITH_DENTRY
++static int
++zpl_show_options(struct seq_file *seq, struct dentry *root)
++{
++	zfs_sb_t *zsb = root->d_sb->s_fs_info;
++
++	seq_printf(seq, ",%s", zsb->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
++
++	return (0);
++}
++#else
++static int
++zpl_show_options(struct seq_file *seq, struct vfsmount *vfsp)
++{
++	zfs_sb_t *zsb = vfsp->mnt_sb->s_fs_info;
++
++	seq_printf(seq, ",%s", zsb->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
++
++	return (0);
++}
++#endif /* HAVE_SHOW_OPTIONS_WITH_DENTRY */
++
++static int
++zpl_fill_super(struct super_block *sb, void *data, int silent)
++{
++	int error;
++
++	error = -zfs_domount(sb, data, silent);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++#ifdef HAVE_MOUNT_NODEV
++static struct dentry *
++zpl_mount(struct file_system_type *fs_type, int flags,
++    const char *osname, void *data)
++{
++	zpl_mount_data_t zmd = { osname, data };
++
++	return mount_nodev(fs_type, flags, &zmd, zpl_fill_super);
++}
++#else
++static int
++zpl_get_sb(struct file_system_type *fs_type, int flags,
++    const char *osname, void *data, struct vfsmount *mnt)
++{
++	zpl_mount_data_t zmd = { osname, data };
++
++	return get_sb_nodev(fs_type, flags, &zmd, zpl_fill_super, mnt);
++}
++#endif /* HAVE_MOUNT_NODEV */
++
++static void
++zpl_kill_sb(struct super_block *sb)
++{
++	zfs_preumount(sb);
++	kill_anon_super(sb);
++}
++
++#ifdef HAVE_SHRINK
++/*
++ * Linux 3.1 - 3.x API
++ *
++ * The Linux 3.1 API introduced per-sb cache shrinkers to replace the
++ * global ones.  This allows us a mechanism to cleanly target a specific
++ * zfs file system when the dnode and inode caches grow too large.
++ *
++ * In addition, the 3.0 kernel added the iterate_supers_type() helper
++ * function which is used to safely walk all of the zfs file systems.
++ */
++static void
++zpl_prune_sb(struct super_block *sb, void *arg)
++{
++	int objects = 0;
++	int error;
++
++	error = -zfs_sb_prune(sb, *(unsigned long *)arg, &objects);
++	ASSERT3S(error, <=, 0);
++
++	return;
++}
++
++void
++zpl_prune_sbs(int64_t bytes_to_scan, void *private)
++{
++	unsigned long nr_to_scan = (bytes_to_scan / sizeof(znode_t));
++
++	iterate_supers_type(&zpl_fs_type, zpl_prune_sb, &nr_to_scan);
++	kmem_reap();
++}
++#else
++/*
++ * Linux 2.6.x - 3.0 API
++ *
++ * These are best effort interfaces are provided by the SPL to induce
++ * the Linux VM subsystem to reclaim a fraction of the both dnode and
++ * inode caches.  Ideally, we want to just target the zfs file systems
++ * however our only option is to reclaim from them all.
++ */
++void
++zpl_prune_sbs(int64_t bytes_to_scan, void *private)
++{
++	unsigned long nr_to_scan = (bytes_to_scan / sizeof(znode_t));
++
++        shrink_dcache_memory(nr_to_scan, GFP_KERNEL);
++        shrink_icache_memory(nr_to_scan, GFP_KERNEL);
++        kmem_reap();
++}
++#endif /* HAVE_SHRINK */
++
++#ifdef HAVE_NR_CACHED_OBJECTS
++static int
++zpl_nr_cached_objects(struct super_block *sb)
++{
++	zfs_sb_t *zsb = sb->s_fs_info;
++	int nr;
++
++	mutex_enter(&zsb->z_znodes_lock);
++	nr = zsb->z_nr_znodes;
++	mutex_exit(&zsb->z_znodes_lock);
++
++	return (nr);
++}
++#endif /* HAVE_NR_CACHED_OBJECTS */
++
++#ifdef HAVE_FREE_CACHED_OBJECTS
++/*
++ * Attempt to evict some meta data from the cache.  The ARC operates in
++ * terms of bytes while the Linux VFS uses objects.  Now because this is
++ * just a best effort eviction and the exact values aren't critical so we
++ * extrapolate from an object count to a byte size using the znode_t size.
++ */
++static void
++zpl_free_cached_objects(struct super_block *sb, int nr_to_scan)
++{
++	arc_adjust_meta(nr_to_scan * sizeof(znode_t), B_FALSE);
++}
++#endif /* HAVE_FREE_CACHED_OBJECTS */
++
++const struct super_operations zpl_super_operations = {
++	.alloc_inode		= zpl_inode_alloc,
++	.destroy_inode		= zpl_inode_destroy,
++	.dirty_inode		= NULL,
++	.write_inode		= NULL,
++	.drop_inode		= NULL,
++#ifdef HAVE_EVICT_INODE
++	.evict_inode		= zpl_evict_inode,
++#else
++	.clear_inode		= zpl_clear_inode,
++	.delete_inode		= zpl_inode_delete,
++#endif /* HAVE_EVICT_INODE */
++	.put_super		= zpl_put_super,
++	.sync_fs		= zpl_sync_fs,
++	.statfs			= zpl_statfs,
++	.remount_fs		= zpl_remount_fs,
++	.umount_begin		= zpl_umount_begin,
++	.show_options		= zpl_show_options,
++	.show_stats		= NULL,
++#ifdef HAVE_NR_CACHED_OBJECTS
++	.nr_cached_objects	= zpl_nr_cached_objects,
++#endif /* HAVE_NR_CACHED_OBJECTS */
++#ifdef HAVE_FREE_CACHED_OBJECTS
++	.free_cached_objects	= zpl_free_cached_objects,
++#endif /* HAVE_FREE_CACHED_OBJECTS */
++};
++
++struct file_system_type zpl_fs_type = {
++	.owner			= THIS_MODULE,
++	.name			= ZFS_DRIVER,
++#ifdef HAVE_MOUNT_NODEV
++	.mount			= zpl_mount,
++#else
++	.get_sb			= zpl_get_sb,
++#endif /* HAVE_MOUNT_NODEV */
++	.kill_sb		= zpl_kill_sb,
++};
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zpl_xattr.c linux-3.2.33-go/fs/zfs/zfs/zpl_xattr.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zpl_xattr.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zpl_xattr.c	2012-11-16 23:25:34.351039311 +0100
+@@ -0,0 +1,678 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
++ *
++ * Extended attributes (xattr) on Solaris are implemented as files
++ * which exist in a hidden xattr directory.  These extended attributes
++ * can be accessed using the attropen() system call which opens
++ * the extended attribute.  It can then be manipulated just like
++ * a standard file descriptor.  This has a couple advantages such
++ * as practically no size limit on the file, and the extended
++ * attributes permissions may differ from those of the parent file.
++ * This interface is really quite clever, but it's also completely
++ * different than what is supported on Linux.  It also comes with a
++ * steep performance penalty when accessing small xattrs because they
++ * are not stored with the parent file.
++ *
++ * Under Linux extended attributes are manipulated by the system
++ * calls getxattr(2), setxattr(2), and listxattr(2).  They consider
++ * extended attributes to be name/value pairs where the name is a
++ * NULL terminated string.  The name must also include one of the
++ * following namespace prefixes:
++ *
++ *   user     - No restrictions and is available to user applications.
++ *   trusted  - Restricted to kernel and root (CAP_SYS_ADMIN) use.
++ *   system   - Used for access control lists (system.nfs4_acl, etc).
++ *   security - Used by SELinux to store a files security context.
++ *
++ * The value under Linux to limited to 65536 bytes of binary data.
++ * In practice, individual xattrs tend to be much smaller than this
++ * and are typically less than 100 bytes.  A good example of this
++ * are the security.selinux xattrs which are less than 100 bytes and
++ * exist for every file when xattr labeling is enabled.
++ *
++ * The Linux xattr implemenation has been written to take advantage of
++ * this typical usage.  When the dataset property 'xattr=sa' is set,
++ * then xattrs will be preferentially stored as System Attributes (SA).
++ * This allows tiny xattrs (~100 bytes) to be stored with the dnode and
++ * up to 64k of xattrs to be stored in the spill block.  If additional
++ * xattr space is required, which is unlikely under Linux, they will
++ * be stored using the traditional directory approach.
++ *
++ * This optimization results in roughly a 3x performance improvement
++ * when accessing xattrs because it avoids the need to perform a seek
++ * for every xattr value.  When multiple xattrs are stored per-file
++ * the performance improvements are even greater because all of the
++ * xattrs stored in the spill block will be cached.
++ *
++ * However, by default SA based xattrs are disabled in the Linux port
++ * to maximize compatibility with other implementations.  If you do
++ * enable SA based xattrs then they will not be visible on platforms
++ * which do not support this feature.
++ *
++ * NOTE: One additional consequence of the xattr directory implementation
++ * is that when an extended attribute is manipulated an inode is created.
++ * This inode will exist in the Linux inode cache but there will be no
++ * associated entry in the dentry cache which references it.  This is
++ * safe but it may result in some confusion.  Enabling SA based xattrs
++ * largely avoids the issue except in the overflow case.
++ */
++
++#include <sys/zfs_vfsops.h>
++#include <sys/zfs_vnops.h>
++#include <sys/zfs_znode.h>
++#include <sys/vfs.h>
++#include <sys/zpl.h>
++
++typedef struct xattr_filldir {
++	size_t size;
++	size_t offset;
++	char *buf;
++	struct inode *inode;
++} xattr_filldir_t;
++
++static int
++zpl_xattr_filldir(void *arg, const char *name, int name_len,
++    loff_t offset, uint64_t objnum, unsigned int d_type)
++{
++	xattr_filldir_t *xf = arg;
++
++	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
++		if (!(ITOZSB(xf->inode)->z_flags & ZSB_XATTR))
++			return (0);
++
++	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
++		if (!capable(CAP_SYS_ADMIN))
++			return (0);
++
++	/* When xf->buf is NULL only calculate the required size. */
++	if (xf->buf) {
++		if (xf->offset + name_len + 1 > xf->size)
++			return (-ERANGE);
++
++		memcpy(xf->buf + xf->offset, name, name_len);
++		xf->buf[xf->offset + name_len] = '\0';
++	}
++
++	xf->offset += (name_len + 1);
++
++	return (0);
++}
++
++static ssize_t
++zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
++{
++	struct inode *ip = xf->inode;
++	struct inode *dxip = NULL;
++	loff_t pos = 3;  /* skip '.', '..', and '.zfs' entries. */
++	int error;
++
++	/* Lookup the xattr directory */
++	error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL);
++	if (error) {
++		if (error == -ENOENT)
++			error = 0;
++
++		return (error);
++	}
++
++	/* Fill provided buffer via zpl_zattr_filldir helper */
++	error = -zfs_readdir(dxip, (void *)xf, zpl_xattr_filldir, &pos, cr);
++	iput(dxip);
++
++	return (error);
++}
++
++static ssize_t
++zpl_xattr_list_sa(xattr_filldir_t *xf)
++{
++	znode_t *zp = ITOZ(xf->inode);
++	nvpair_t *nvp = NULL;
++	int error = 0;
++
++	mutex_enter(&zp->z_lock);
++	if (zp->z_xattr_cached == NULL)
++		error = -zfs_sa_get_xattr(zp);
++	mutex_exit(&zp->z_lock);
++
++	if (error)
++		return (error);
++
++	ASSERT(zp->z_xattr_cached);
++
++	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
++		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
++
++		error = zpl_xattr_filldir((void *)xf, nvpair_name(nvp),
++		     strlen(nvpair_name(nvp)), 0, 0, 0);
++		if (error)
++			return (error);
++	}
++
++	return (0);
++}
++
++ssize_t
++zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
++{
++	znode_t *zp = ITOZ(dentry->d_inode);
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	xattr_filldir_t xf = { buffer_size, 0, buffer, dentry->d_inode };
++	cred_t *cr = CRED();
++	int error = 0;
++
++	crhold(cr);
++	rw_enter(&zp->z_xattr_lock, RW_READER);
++
++	if (zsb->z_use_sa && zp->z_is_sa) {
++		error = zpl_xattr_list_sa(&xf);
++		if (error)
++			goto out;
++	}
++
++	error = zpl_xattr_list_dir(&xf, cr);
++	if (error)
++		goto out;
++
++	error = xf.offset;
++out:
++
++	rw_exit(&zp->z_xattr_lock);
++	crfree(cr);
++
++	return (error);
++}
++
++static int
++zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
++    size_t size, cred_t *cr)
++{
++	struct inode *dxip = NULL;
++	struct inode *xip = NULL;
++	int error;
++
++	/* Lookup the xattr directory */
++	error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL);
++	if (error)
++		goto out;
++
++	/* Lookup a specific xattr name in the directory */
++	error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL);
++	if (error)
++		goto out;
++
++	if (!size) {
++		error = i_size_read(xip);
++		goto out;
++	}
++
++	error = zpl_read_common(xip, value, size, 0, UIO_SYSSPACE, 0, cr);
++out:
++	if (xip)
++		iput(xip);
++
++	if (dxip)
++		iput(dxip);
++
++	return (error);
++}
++
++static int
++zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)
++{
++	znode_t *zp = ITOZ(ip);
++	uchar_t *nv_value;
++	uint_t nv_size;
++	int error = 0;
++
++	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
++
++	mutex_enter(&zp->z_lock);
++	if (zp->z_xattr_cached == NULL)
++		error = -zfs_sa_get_xattr(zp);
++	mutex_exit(&zp->z_lock);
++
++	if (error)
++		return (error);
++
++	ASSERT(zp->z_xattr_cached);
++	error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,
++	    &nv_value, &nv_size);
++	if (error)
++		return (error);
++
++	if (!size)
++		return (nv_size);
++
++	memcpy(value, nv_value, MIN(size, nv_size));
++
++	return (MIN(size, nv_size));
++}
++
++static int
++__zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
++    cred_t *cr)
++{
++	znode_t *zp = ITOZ(ip);
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	int error;
++
++	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
++
++	if (zsb->z_use_sa && zp->z_is_sa) {
++		error = zpl_xattr_get_sa(ip, name, value, size);
++		if (error >= 0)
++			goto out;
++	}
++
++	error = zpl_xattr_get_dir(ip, name, value, size, cr);
++out:
++	if (error == -ENOENT)
++		error = -ENODATA;
++
++	return (error);
++}
++
++static int
++zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
++{
++	znode_t *zp = ITOZ(ip);
++	cred_t *cr = CRED();
++	int error;
++
++	crhold(cr);
++	rw_enter(&zp->z_xattr_lock, RW_READER);
++	error = __zpl_xattr_get(ip, name, value, size, cr);
++	rw_exit(&zp->z_xattr_lock);
++	crfree(cr);
++
++	return (error);
++}
++
++static int
++zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
++    size_t size, int flags, cred_t *cr)
++{
++	struct inode *dxip = NULL;
++	struct inode *xip = NULL;
++	vattr_t *vap = NULL;
++	ssize_t wrote;
++	int error;
++	const int xattr_mode = S_IFREG | 0644;
++
++	/* Lookup the xattr directory and create it if required. */
++	error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR | CREATE_XATTR_DIR,
++	    cr, NULL, NULL);
++	if (error)
++		goto out;
++
++	/* Lookup a specific xattr name in the directory */
++	error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL);
++	if (error && (error != -ENOENT))
++		goto out;
++
++	error = 0;
++
++	/* Remove a specific name xattr when value is set to NULL. */
++	if (value == NULL) {
++		if (xip)
++			error = -zfs_remove(dxip, (char *)name, cr);
++
++		goto out;
++	}
++
++	/* Lookup failed create a new xattr. */
++	if (xip == NULL) {
++		vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
++		vap->va_mode = xattr_mode;
++		vap->va_mask = ATTR_MODE;
++		vap->va_uid = crgetfsuid(cr);
++		vap->va_gid = crgetfsgid(cr);
++
++		error = -zfs_create(dxip, (char *)name, vap, 0, 0644, &xip,
++		    cr, 0, NULL);
++		if (error)
++			goto out;
++	}
++
++	ASSERT(xip != NULL);
++
++	error = -zfs_freesp(ITOZ(xip), 0, 0, xattr_mode, TRUE);
++	if (error)
++		goto out;
++
++	wrote = zpl_write_common(xip, value, size, 0, UIO_SYSSPACE, 0, cr);
++	if (wrote < 0)
++		error = wrote;
++
++out:
++	if (vap)
++		kmem_free(vap, sizeof(vattr_t));
++
++	if (xip)
++		iput(xip);
++
++	if (dxip)
++		iput(dxip);
++
++	if (error == -ENOENT)
++		error = -ENODATA;
++
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static int
++zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
++    size_t size, int flags, cred_t *cr)
++{
++	znode_t *zp = ITOZ(ip);
++	nvlist_t *nvl;
++	size_t sa_size;
++	int error;
++
++	ASSERT(zp->z_xattr_cached);
++	nvl = zp->z_xattr_cached;
++
++	if (value == NULL) {
++		error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
++		if (error == -ENOENT)
++			error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);
++	} else {
++		/* Limited to 32k to keep nvpair memory allocations small */
++		if (size > DXATTR_MAX_ENTRY_SIZE)
++			return (-EFBIG);
++
++		/* Prevent the DXATTR SA from consuming the entire SA region */
++		error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
++		if (error)
++			return (error);
++
++		if (sa_size > DXATTR_MAX_SA_SIZE)
++			return (-EFBIG);
++
++		error = -nvlist_add_byte_array(nvl, name,
++		    (uchar_t *)value, size);
++		if (error)
++			return (error);
++	}
++
++	/* Update the SA for additions, modifications, and removals. */
++	if (!error)
++		error = -zfs_sa_set_xattr(zp);
++
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static int
++zpl_xattr_set(struct inode *ip, const char *name, const void *value,
++    size_t size, int flags)
++{
++	znode_t *zp = ITOZ(ip);
++	zfs_sb_t *zsb = ZTOZSB(zp);
++	cred_t *cr = CRED();
++	int error;
++
++	crhold(cr);
++	rw_enter(&ITOZ(ip)->z_xattr_lock, RW_WRITER);
++
++	/*
++	 * Before setting the xattr check to see if it already exists.
++	 * This is done to ensure the following optional flags are honored.
++	 *
++	 *   XATTR_CREATE: fail if xattr already exists
++	 *   XATTR_REPLACE: fail if xattr does not exist
++	 */
++	error = __zpl_xattr_get(ip, name, NULL, 0, cr);
++	if (error < 0) {
++		if (error != -ENODATA)
++			goto out;
++
++		if ((error == -ENODATA) && (flags & XATTR_REPLACE))
++			goto out;
++	} else {
++		error = -EEXIST;
++		if (flags & XATTR_CREATE)
++			goto out;
++	}
++
++	/* Preferentially store the xattr as a SA for better performance */
++	if (zsb->z_use_sa && zsb->z_xattr_sa && zp->z_is_sa) {
++		error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
++		if (error == 0)
++			goto out;
++	}
++
++	error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
++out:
++	rw_exit(&ITOZ(ip)->z_xattr_lock);
++	crfree(cr);
++	ASSERT3S(error, <=, 0);
++
++	return (error);
++}
++
++static int
++__zpl_xattr_user_get(struct inode *ip, const char *name,
++    void *value, size_t size)
++{
++	char *xattr_name;
++	int error;
++
++	if (strcmp(name, "") == 0)
++		return -EINVAL;
++
++	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
++		return -EOPNOTSUPP;
++
++	xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
++	error = zpl_xattr_get(ip, xattr_name, value, size);
++	strfree(xattr_name);
++
++	return (error);
++}
++ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
++
++static int
++__zpl_xattr_user_set(struct inode *ip, const char *name,
++    const void *value, size_t size, int flags)
++{
++	char *xattr_name;
++	int error;
++
++	if (strcmp(name, "") == 0)
++		return -EINVAL;
++
++	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
++		return -EOPNOTSUPP;
++
++	xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
++	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
++	strfree(xattr_name);
++
++	return (error);
++}
++ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
++
++xattr_handler_t zpl_xattr_user_handler = {
++	.prefix	= XATTR_USER_PREFIX,
++	.get	= zpl_xattr_user_get,
++	.set	= zpl_xattr_user_set,
++};
++
++static int
++__zpl_xattr_trusted_get(struct inode *ip, const char *name,
++    void *value, size_t size)
++{
++	char *xattr_name;
++	int error;
++
++	if (!capable(CAP_SYS_ADMIN))
++		return -EACCES;
++
++	if (strcmp(name, "") == 0)
++		return -EINVAL;
++
++	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
++	error = zpl_xattr_get(ip, xattr_name, value, size);
++	strfree(xattr_name);
++
++	return (error);
++}
++ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
++
++static int
++__zpl_xattr_trusted_set(struct inode *ip, const char *name,
++    const void *value, size_t size, int flags)
++{
++	char *xattr_name;
++	int error;
++
++	if (!capable(CAP_SYS_ADMIN))
++		return -EACCES;
++
++	if (strcmp(name, "") == 0)
++		return -EINVAL;
++
++	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
++	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
++	strfree(xattr_name);
++
++	return (error);
++}
++ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
++
++xattr_handler_t zpl_xattr_trusted_handler = {
++	.prefix	= XATTR_TRUSTED_PREFIX,
++	.get	= zpl_xattr_trusted_get,
++	.set	= zpl_xattr_trusted_set,
++};
++
++static int
++__zpl_xattr_security_get(struct inode *ip, const char *name,
++    void *value, size_t size)
++{
++	char *xattr_name;
++	int error;
++
++	if (strcmp(name, "") == 0)
++		return -EINVAL;
++
++	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
++	error = zpl_xattr_get(ip, xattr_name, value, size);
++	strfree(xattr_name);
++
++	return (error);
++}
++ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
++
++static int
++__zpl_xattr_security_set(struct inode *ip, const char *name,
++    const void *value, size_t size, int flags)
++{
++	char *xattr_name;
++	int error;
++
++	if (strcmp(name, "") == 0)
++		return -EINVAL;
++
++	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
++	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
++	strfree(xattr_name);
++
++	return (error);
++}
++ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
++
++#ifdef HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY
++static int
++__zpl_xattr_security_init(struct inode *ip, const struct xattr *xattrs,
++    void *fs_info)
++{
++	const struct xattr *xattr;
++	int error = 0;
++
++	for (xattr = xattrs; xattr->name != NULL; xattr++) {
++		error = __zpl_xattr_security_set(ip,
++		    xattr->name, xattr->value, xattr->value_len, 0);
++
++		if (error < 0)
++			break;
++	}
++
++	return (error);
++}
++
++int
++zpl_xattr_security_init(struct inode *ip, struct inode *dip,
++    const struct qstr *qstr)
++{
++	return security_inode_init_security(ip, dip, qstr,
++	    &__zpl_xattr_security_init, NULL);
++}
++
++#else
++int
++zpl_xattr_security_init(struct inode *ip, struct inode *dip,
++    const struct qstr *qstr)
++{
++	int error;
++	size_t len;
++	void *value;
++	char *name;
++
++	error = zpl_security_inode_init_security(ip, dip, qstr,
++	  &name, &value, &len);
++	if (error) {
++		if (error == -EOPNOTSUPP)
++			return 0;
++		return (error);
++	}
++
++	error = __zpl_xattr_security_set(ip, name, value, len, 0);
++
++	kfree(name);
++	kfree(value);
++
++	return (error);
++}
++#endif /* HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY */
++
++xattr_handler_t zpl_xattr_security_handler = {
++	.prefix	= XATTR_SECURITY_PREFIX,
++	.get	= zpl_xattr_security_get,
++	.set	= zpl_xattr_security_set,
++};
++
++xattr_handler_t *zpl_xattr_handlers[] = {
++	&zpl_xattr_security_handler,
++	&zpl_xattr_trusted_handler,
++	&zpl_xattr_user_handler,
++#ifdef HAVE_POSIX_ACLS
++	&zpl_xattr_acl_access_handler,
++	&zpl_xattr_acl_default_handler,
++#endif /* HAVE_POSIX_ACLS */
++	NULL
++};
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zrlock.c linux-3.2.33-go/fs/zfs/zfs/zrlock.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zrlock.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zrlock.c	2012-11-16 23:25:34.350039322 +0100
+@@ -0,0 +1,207 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/*
++ * A Zero Reference Lock (ZRL) is a reference count that can lock out new
++ * references only when the count is zero and only without waiting if the count
++ * is not already zero. It is similar to a read-write lock in that it allows
++ * multiple readers and only a single writer, but it does not allow a writer to
++ * block while waiting for readers to exit, and therefore the question of
++ * reader/writer priority is moot (no WRWANT bit). Since the equivalent of
++ * rw_enter(&lock, RW_WRITER) is disallowed and only tryenter() is allowed, it
++ * is perfectly safe for the same reader to acquire the same lock multiple
++ * times. The fact that a ZRL is reentrant for readers (through multiple calls
++ * to zrl_add()) makes it convenient for determining whether something is
++ * actively referenced without the fuss of flagging lock ownership across
++ * function calls.
++ */
++#include <sys/zrlock.h>
++
++/*
++ * A ZRL can be locked only while there are zero references, so ZRL_LOCKED is
++ * treated as zero references.
++ */
++#define	ZRL_LOCKED	((uint32_t)-1)
++#define	ZRL_DESTROYED	-2
++
++void
++zrl_init(zrlock_t *zrl)
++{
++	mutex_init(&zrl->zr_mtx, NULL, MUTEX_DEFAULT, NULL);
++	zrl->zr_refcount = 0;
++	cv_init(&zrl->zr_cv, NULL, CV_DEFAULT, NULL);
++#ifdef	ZFS_DEBUG
++	zrl->zr_owner = NULL;
++	zrl->zr_caller = NULL;
++#endif
++}
++
++void
++zrl_destroy(zrlock_t *zrl)
++{
++	ASSERT(zrl->zr_refcount == 0);
++
++	mutex_destroy(&zrl->zr_mtx);
++	zrl->zr_refcount = ZRL_DESTROYED;
++	cv_destroy(&zrl->zr_cv);
++}
++
++void
++#ifdef	ZFS_DEBUG
++zrl_add_debug(zrlock_t *zrl, const char *zc)
++#else
++zrl_add(zrlock_t *zrl)
++#endif
++{
++	uint32_t n = (uint32_t)zrl->zr_refcount;
++
++	while (n != ZRL_LOCKED) {
++		uint32_t cas = atomic_cas_32(
++		    (uint32_t *)&zrl->zr_refcount, n, n + 1);
++		if (cas == n) {
++			ASSERT((int32_t)n >= 0);
++#ifdef	ZFS_DEBUG
++			if (zrl->zr_owner == curthread) {
++				DTRACE_PROBE2(zrlock__reentry,
++				    zrlock_t *, zrl, uint32_t, n);
++			}
++			zrl->zr_owner = curthread;
++			zrl->zr_caller = zc;
++#endif
++			return;
++		}
++		n = cas;
++	}
++
++	mutex_enter(&zrl->zr_mtx);
++	while (zrl->zr_refcount == ZRL_LOCKED) {
++		cv_wait(&zrl->zr_cv, &zrl->zr_mtx);
++	}
++	ASSERT(zrl->zr_refcount >= 0);
++	zrl->zr_refcount++;
++#ifdef	ZFS_DEBUG
++	zrl->zr_owner = curthread;
++	zrl->zr_caller = zc;
++#endif
++	mutex_exit(&zrl->zr_mtx);
++}
++
++void
++zrl_remove(zrlock_t *zrl)
++{
++	uint32_t n;
++
++	n = atomic_dec_32_nv((uint32_t *)&zrl->zr_refcount);
++	ASSERT((int32_t)n >= 0);
++#ifdef	ZFS_DEBUG
++	if (zrl->zr_owner == curthread) {
++		zrl->zr_owner = NULL;
++		zrl->zr_caller = NULL;
++	}
++#endif
++}
++
++int
++zrl_tryenter(zrlock_t *zrl)
++{
++	uint32_t n = (uint32_t)zrl->zr_refcount;
++
++	if (n == 0) {
++		uint32_t cas = atomic_cas_32(
++		    (uint32_t *)&zrl->zr_refcount, 0, ZRL_LOCKED);
++		if (cas == 0) {
++#ifdef	ZFS_DEBUG
++			ASSERT(zrl->zr_owner == NULL);
++			zrl->zr_owner = curthread;
++#endif
++			return (1);
++		}
++	}
++
++	ASSERT((int32_t)n > ZRL_DESTROYED);
++
++	return (0);
++}
++
++void
++zrl_exit(zrlock_t *zrl)
++{
++	ASSERT(zrl->zr_refcount == ZRL_LOCKED);
++
++	mutex_enter(&zrl->zr_mtx);
++#ifdef	ZFS_DEBUG
++	ASSERT(zrl->zr_owner == curthread);
++	zrl->zr_owner = NULL;
++	membar_producer();	/* make sure the owner store happens first */
++#endif
++	zrl->zr_refcount = 0;
++	cv_broadcast(&zrl->zr_cv);
++	mutex_exit(&zrl->zr_mtx);
++}
++
++int
++zrl_refcount(zrlock_t *zrl)
++{
++	int n;
++
++	ASSERT(zrl->zr_refcount > ZRL_DESTROYED);
++
++	n = (int)zrl->zr_refcount;
++	return (n <= 0 ? 0 : n);
++}
++
++int
++zrl_is_zero(zrlock_t *zrl)
++{
++	ASSERT(zrl->zr_refcount > ZRL_DESTROYED);
++
++	return (zrl->zr_refcount <= 0);
++}
++
++int
++zrl_is_locked(zrlock_t *zrl)
++{
++	ASSERT(zrl->zr_refcount > ZRL_DESTROYED);
++
++	return (zrl->zr_refcount == ZRL_LOCKED);
++}
++
++#ifdef	ZFS_DEBUG
++kthread_t *
++zrl_owner(zrlock_t *zrl)
++{
++	return (zrl->zr_owner);
++}
++#endif
++
++#if defined(_KERNEL) && defined(HAVE_SPL)
++
++#ifdef ZFS_DEBUG
++EXPORT_SYMBOL(zrl_add_debug);
++#else
++EXPORT_SYMBOL(zrl_add);
++#endif
++EXPORT_SYMBOL(zrl_remove);
++
++#endif
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zfs/zvol.c linux-3.2.33-go/fs/zfs/zfs/zvol.c
+--- linux-3.2.33-go.orig/fs/zfs/zfs/zvol.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zfs/zvol.c	2012-11-16 23:25:34.350039322 +0100
+@@ -0,0 +1,1503 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
++ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ * Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
++ * LLNL-CODE-403049.
++ *
++ * ZFS volume emulation driver.
++ *
++ * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
++ * Volumes are accessed through the symbolic links named:
++ *
++ * /dev/<pool_name>/<dataset_name>
++ *
++ * Volumes are persistent through reboot and module load.  No user command
++ * needs to be run before opening and using a device.
++ */
++
++#include <sys/dmu_traverse.h>
++#include <sys/dsl_dataset.h>
++#include <sys/dsl_prop.h>
++#include <sys/zap.h>
++#include <sys/zil_impl.h>
++#include <sys/zio.h>
++#include <sys/zfs_rlock.h>
++#include <sys/zfs_znode.h>
++#include <sys/zvol.h>
++#include <linux/blkdev_compat.h>
++
++unsigned int zvol_inhibit_dev = 0;
++unsigned int zvol_major = ZVOL_MAJOR;
++unsigned int zvol_threads = 32;
++unsigned long zvol_max_discard_blocks = 16384;
++
++static taskq_t *zvol_taskq;
++static kmutex_t zvol_state_lock;
++static list_t zvol_state_list;
++static char *zvol_tag = "zvol_tag";
++
++/*
++ * The in-core state of each volume.
++ */
++typedef struct zvol_state {
++	char			zv_name[MAXNAMELEN];	/* name */
++	uint64_t		zv_volsize;	/* advertised space */
++	uint64_t		zv_volblocksize;/* volume block size */
++	objset_t		*zv_objset;	/* objset handle */
++	uint32_t		zv_flags;	/* ZVOL_* flags */
++	uint32_t		zv_open_count;	/* open counts */
++	uint32_t		zv_changed;	/* disk changed */
++	zilog_t			*zv_zilog;	/* ZIL handle */
++	znode_t			zv_znode;	/* for range locking */
++	dmu_buf_t		*zv_dbuf;	/* bonus handle */
++	dev_t			zv_dev;		/* device id */
++	struct gendisk		*zv_disk;	/* generic disk */
++	struct request_queue	*zv_queue;	/* request queue */
++	spinlock_t		zv_lock;	/* request queue lock */
++	list_node_t		zv_next;	/* next zvol_state_t linkage */
++} zvol_state_t;
++
++#define	ZVOL_RDONLY	0x1
++
++/*
++ * Find the next available range of ZVOL_MINORS minor numbers.  The
++ * zvol_state_list is kept in ascending minor order so we simply need
++ * to scan the list for the first gap in the sequence.  This allows us
++ * to recycle minor number as devices are created and removed.
++ */
++static int
++zvol_find_minor(unsigned *minor)
++{
++	zvol_state_t *zv;
++
++	*minor = 0;
++	ASSERT(MUTEX_HELD(&zvol_state_lock));
++	for (zv = list_head(&zvol_state_list); zv != NULL;
++	     zv = list_next(&zvol_state_list, zv), *minor += ZVOL_MINORS) {
++		if (MINOR(zv->zv_dev) != MINOR(*minor))
++			break;
++	}
++
++	/* All minors are in use */
++	if (*minor >= (1 << MINORBITS))
++		return ENXIO;
++
++	return 0;
++}
++
++/*
++ * Find a zvol_state_t given the full major+minor dev_t.
++ */
++static zvol_state_t *
++zvol_find_by_dev(dev_t dev)
++{
++	zvol_state_t *zv;
++
++	ASSERT(MUTEX_HELD(&zvol_state_lock));
++	for (zv = list_head(&zvol_state_list); zv != NULL;
++	     zv = list_next(&zvol_state_list, zv)) {
++		if (zv->zv_dev == dev)
++			return zv;
++	}
++
++	return NULL;
++}
++
++/*
++ * Find a zvol_state_t given the name provided at zvol_alloc() time.
++ */
++static zvol_state_t *
++zvol_find_by_name(const char *name)
++{
++	zvol_state_t *zv;
++
++	ASSERT(MUTEX_HELD(&zvol_state_lock));
++	for (zv = list_head(&zvol_state_list); zv != NULL;
++	     zv = list_next(&zvol_state_list, zv)) {
++		if (!strncmp(zv->zv_name, name, MAXNAMELEN))
++			return zv;
++	}
++
++	return NULL;
++}
++
++/*
++ * ZFS_IOC_CREATE callback handles dmu zvol and zap object creation.
++ */
++void
++zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
++{
++	zfs_creat_t *zct = arg;
++	nvlist_t *nvprops = zct->zct_props;
++	int error;
++	uint64_t volblocksize, volsize;
++
++	VERIFY(nvlist_lookup_uint64(nvprops,
++	    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0);
++	if (nvlist_lookup_uint64(nvprops,
++	    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0)
++		volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
++
++	/*
++	 * These properties must be removed from the list so the generic
++	 * property setting step won't apply to them.
++	 */
++	VERIFY(nvlist_remove_all(nvprops,
++	    zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
++	(void) nvlist_remove_all(nvprops,
++	    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
++
++	error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
++	    DMU_OT_NONE, 0, tx);
++	ASSERT(error == 0);
++
++	error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
++	    DMU_OT_NONE, 0, tx);
++	ASSERT(error == 0);
++
++	error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
++	ASSERT(error == 0);
++}
++
++/*
++ * ZFS_IOC_OBJSET_STATS entry point.
++ */
++int
++zvol_get_stats(objset_t *os, nvlist_t *nv)
++{
++	int error;
++	dmu_object_info_t *doi;
++	uint64_t val;
++
++	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
++	if (error)
++		return (error);
++
++	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
++	doi = kmem_alloc(sizeof(dmu_object_info_t), KM_SLEEP);
++	error = dmu_object_info(os, ZVOL_OBJ, doi);
++
++	if (error == 0) {
++		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE,
++		    doi->doi_data_block_size);
++	}
++
++	kmem_free(doi, sizeof(dmu_object_info_t));
++
++	return (error);
++}
++
++/*
++ * Sanity check volume size.
++ */
++int
++zvol_check_volsize(uint64_t volsize, uint64_t blocksize)
++{
++	if (volsize == 0)
++		return (EINVAL);
++
++	if (volsize % blocksize != 0)
++		return (EINVAL);
++
++#ifdef _ILP32
++	if (volsize - 1 > MAXOFFSET_T)
++		return (EOVERFLOW);
++#endif
++	return (0);
++}
++
++/*
++ * Ensure the zap is flushed then inform the VFS of the capacity change.
++ */
++static int
++zvol_update_volsize(zvol_state_t *zv, uint64_t volsize, objset_t *os)
++{
++	struct block_device *bdev;
++	dmu_tx_t *tx;
++	int error;
++
++	ASSERT(MUTEX_HELD(&zvol_state_lock));
++
++	tx = dmu_tx_create(os);
++	dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
++	error = dmu_tx_assign(tx, TXG_WAIT);
++	if (error) {
++		dmu_tx_abort(tx);
++		return (error);
++	}
++
++	error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1,
++	    &volsize, tx);
++	dmu_tx_commit(tx);
++
++	if (error)
++		return (error);
++
++	error = dmu_free_long_range(os,
++	    ZVOL_OBJ, volsize, DMU_OBJECT_END);
++	if (error)
++		return (error);
++
++	bdev = bdget_disk(zv->zv_disk, 0);
++	if (!bdev)
++		return (EIO);
++/*
++ * 2.6.28 API change
++ * Added check_disk_size_change() helper function.
++ */
++#ifdef HAVE_CHECK_DISK_SIZE_CHANGE
++	set_capacity(zv->zv_disk, volsize >> 9);
++	zv->zv_volsize = volsize;
++	check_disk_size_change(zv->zv_disk, bdev);
++#else
++	zv->zv_volsize = volsize;
++	zv->zv_changed = 1;
++	(void) check_disk_change(bdev);
++#endif /* HAVE_CHECK_DISK_SIZE_CHANGE */
++
++	bdput(bdev);
++
++	return (0);
++}
++
++/*
++ * Set ZFS_PROP_VOLSIZE set entry point.
++ */
++int
++zvol_set_volsize(const char *name, uint64_t volsize)
++{
++	zvol_state_t *zv;
++	dmu_object_info_t *doi;
++	objset_t *os = NULL;
++	uint64_t readonly;
++	int error;
++
++	mutex_enter(&zvol_state_lock);
++
++	zv = zvol_find_by_name(name);
++	if (zv == NULL) {
++		error = ENXIO;
++		goto out;
++	}
++
++	doi = kmem_alloc(sizeof(dmu_object_info_t), KM_SLEEP);
++
++	error = dmu_objset_hold(name, FTAG, &os);
++	if (error)
++		goto out_doi;
++
++	if ((error = dmu_object_info(os, ZVOL_OBJ, doi)) != 0 ||
++	    (error = zvol_check_volsize(volsize,doi->doi_data_block_size)) != 0)
++		goto out_doi;
++
++	VERIFY(dsl_prop_get_integer(name, "readonly", &readonly, NULL) == 0);
++	if (readonly) {
++		error = EROFS;
++		goto out_doi;
++	}
++
++	if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY)) {
++		error = EROFS;
++		goto out_doi;
++	}
++
++	error = zvol_update_volsize(zv, volsize, os);
++out_doi:
++	kmem_free(doi, sizeof(dmu_object_info_t));
++out:
++	if (os)
++		dmu_objset_rele(os, FTAG);
++
++	mutex_exit(&zvol_state_lock);
++
++	return (error);
++}
++
++/*
++ * Sanity check volume block size.
++ */
++int
++zvol_check_volblocksize(uint64_t volblocksize)
++{
++	if (volblocksize < SPA_MINBLOCKSIZE ||
++	    volblocksize > SPA_MAXBLOCKSIZE ||
++	    !ISP2(volblocksize))
++		return (EDOM);
++
++	return (0);
++}
++
++/*
++ * Set ZFS_PROP_VOLBLOCKSIZE set entry point.
++ */
++int
++zvol_set_volblocksize(const char *name, uint64_t volblocksize)
++{
++	zvol_state_t *zv;
++	dmu_tx_t *tx;
++	int error;
++
++	mutex_enter(&zvol_state_lock);
++
++	zv = zvol_find_by_name(name);
++	if (zv == NULL) {
++		error = ENXIO;
++		goto out;
++	}
++
++	if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY)) {
++		error = EROFS;
++		goto out;
++	}
++
++	tx = dmu_tx_create(zv->zv_objset);
++	dmu_tx_hold_bonus(tx, ZVOL_OBJ);
++	error = dmu_tx_assign(tx, TXG_WAIT);
++	if (error) {
++		dmu_tx_abort(tx);
++	} else {
++		error = dmu_object_set_blocksize(zv->zv_objset, ZVOL_OBJ,
++		    volblocksize, 0, tx);
++		if (error == ENOTSUP)
++			error = EBUSY;
++		dmu_tx_commit(tx);
++		if (error == 0)
++			zv->zv_volblocksize = volblocksize;
++	}
++out:
++	mutex_exit(&zvol_state_lock);
++
++	return (error);
++}
++
++/*
++ * Replay a TX_WRITE ZIL transaction that didn't get committed
++ * after a system failure
++ */
++static int
++zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
++{
++	objset_t *os = zv->zv_objset;
++	char *data = (char *)(lr + 1);	/* data follows lr_write_t */
++	uint64_t off = lr->lr_offset;
++	uint64_t len = lr->lr_length;
++	dmu_tx_t *tx;
++	int error;
++
++	if (byteswap)
++		byteswap_uint64_array(lr, sizeof (*lr));
++
++	tx = dmu_tx_create(os);
++	dmu_tx_hold_write(tx, ZVOL_OBJ, off, len);
++	error = dmu_tx_assign(tx, TXG_WAIT);
++	if (error) {
++		dmu_tx_abort(tx);
++	} else {
++		dmu_write(os, ZVOL_OBJ, off, len, data, tx);
++		dmu_tx_commit(tx);
++	}
++
++	return (error);
++}
++
++static int
++zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
++{
++	return (ENOTSUP);
++}
++
++/*
++ * Callback vectors for replaying records.
++ * Only TX_WRITE is needed for zvol.
++ */
++zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
++	(zil_replay_func_t *)zvol_replay_err,	/* no such transaction type */
++	(zil_replay_func_t *)zvol_replay_err,	/* TX_CREATE */
++	(zil_replay_func_t *)zvol_replay_err,	/* TX_MKDIR */
++	(zil_replay_func_t *)zvol_replay_err,	/* TX_MKXATTR */
++	(zil_replay_func_t *)zvol_replay_err,	/* TX_SYMLINK */
++	(zil_replay_func_t *)zvol_replay_err,	/* TX_REMOVE */
++	(zil_replay_func_t *)zvol_replay_err,	/* TX_RMDIR */
++	(zil_replay_func_t *)zvol_replay_err,	/* TX_LINK */
++	(zil_replay_func_t *)zvol_replay_err,	/* TX_RENAME */
++	(zil_replay_func_t *)zvol_replay_write,	/* TX_WRITE */
++	(zil_replay_func_t *)zvol_replay_err,	/* TX_TRUNCATE */
++	(zil_replay_func_t *)zvol_replay_err,	/* TX_SETATTR */
++	(zil_replay_func_t *)zvol_replay_err,	/* TX_ACL */
++};
++
++/*
++ * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions.
++ *
++ * We store data in the log buffers if it's small enough.
++ * Otherwise we will later flush the data out via dmu_sync().
++ */
++ssize_t zvol_immediate_write_sz = 32768;
++
++static void
++zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx,
++	       uint64_t offset, uint64_t size, int sync)
++{
++	uint32_t blocksize = zv->zv_volblocksize;
++	zilog_t *zilog = zv->zv_zilog;
++	boolean_t slogging;
++	ssize_t immediate_write_sz;
++
++	if (zil_replaying(zilog, tx))
++		return;
++
++	immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
++		? 0 : zvol_immediate_write_sz;
++	slogging = spa_has_slogs(zilog->zl_spa) &&
++		(zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
++
++	while (size) {
++		itx_t *itx;
++		lr_write_t *lr;
++		ssize_t len;
++		itx_wr_state_t write_state;
++
++		/*
++		 * Unlike zfs_log_write() we can be called with
++		 * up to DMU_MAX_ACCESS/2 (5MB) writes.
++		 */
++		if (blocksize > immediate_write_sz && !slogging &&
++		    size >= blocksize && offset % blocksize == 0) {
++			write_state = WR_INDIRECT; /* uses dmu_sync */
++			len = blocksize;
++		} else if (sync) {
++			write_state = WR_COPIED;
++			len = MIN(ZIL_MAX_LOG_DATA, size);
++		} else {
++			write_state = WR_NEED_COPY;
++			len = MIN(ZIL_MAX_LOG_DATA, size);
++		}
++
++		itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
++		    (write_state == WR_COPIED ? len : 0));
++		lr = (lr_write_t *)&itx->itx_lr;
++		if (write_state == WR_COPIED && dmu_read(zv->zv_objset,
++		    ZVOL_OBJ, offset, len, lr+1, DMU_READ_NO_PREFETCH) != 0) {
++			zil_itx_destroy(itx);
++			itx = zil_itx_create(TX_WRITE, sizeof (*lr));
++			lr = (lr_write_t *)&itx->itx_lr;
++			write_state = WR_NEED_COPY;
++		}
++
++		itx->itx_wr_state = write_state;
++		if (write_state == WR_NEED_COPY)
++			itx->itx_sod += len;
++		lr->lr_foid = ZVOL_OBJ;
++		lr->lr_offset = offset;
++		lr->lr_length = len;
++		lr->lr_blkoff = 0;
++		BP_ZERO(&lr->lr_blkptr);
++
++		itx->itx_private = zv;
++		itx->itx_sync = sync;
++
++		(void) zil_itx_assign(zilog, itx, tx);
++
++		offset += len;
++		size -= len;
++	}
++}
++
++/*
++ * Common write path running under the zvol taskq context.  This function
++ * is responsible for copying the request structure data in to the DMU and
++ * signaling the request queue with the result of the copy.
++ */
++static void
++zvol_write(void *arg)
++{
++	struct request *req = (struct request *)arg;
++	struct request_queue *q = req->q;
++	zvol_state_t *zv = q->queuedata;
++	uint64_t offset = blk_rq_pos(req) << 9;
++	uint64_t size = blk_rq_bytes(req);
++	int error = 0;
++	dmu_tx_t *tx;
++	rl_t *rl;
++
++	/*
++	 * Annotate this call path with a flag that indicates that it is
++	 * unsafe to use KM_SLEEP during memory allocations due to the
++	 * potential for a deadlock.  KM_PUSHPAGE should be used instead.
++	 */
++	ASSERT(!(current->flags & PF_NOFS));
++	current->flags |= PF_NOFS;
++
++	if (req->cmd_flags & VDEV_REQ_FLUSH)
++		zil_commit(zv->zv_zilog, ZVOL_OBJ);
++
++	/*
++	 * Some requests are just for flush and nothing else.
++	 */
++	if (size == 0) {
++		blk_end_request(req, 0, size);
++		goto out;
++	}
++
++	rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER);
++
++	tx = dmu_tx_create(zv->zv_objset);
++	dmu_tx_hold_write(tx, ZVOL_OBJ, offset, size);
++
++	/* This will only fail for ENOSPC */
++	error = dmu_tx_assign(tx, TXG_WAIT);
++	if (error) {
++		dmu_tx_abort(tx);
++		zfs_range_unlock(rl);
++		blk_end_request(req, -error, size);
++		goto out;
++	}
++
++	error = dmu_write_req(zv->zv_objset, ZVOL_OBJ, req, tx);
++	if (error == 0)
++		zvol_log_write(zv, tx, offset, size,
++		    req->cmd_flags & VDEV_REQ_FUA);
++
++	dmu_tx_commit(tx);
++	zfs_range_unlock(rl);
++
++	if ((req->cmd_flags & VDEV_REQ_FUA) ||
++	    zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)
++		zil_commit(zv->zv_zilog, ZVOL_OBJ);
++
++	blk_end_request(req, -error, size);
++out:
++	current->flags &= ~PF_NOFS;
++}
++
++#ifdef HAVE_BLK_QUEUE_DISCARD
++static void
++zvol_discard(void *arg)
++{
++	struct request *req = (struct request *)arg;
++	struct request_queue *q = req->q;
++	zvol_state_t *zv = q->queuedata;
++	uint64_t start = blk_rq_pos(req) << 9;
++	uint64_t end = start + blk_rq_bytes(req);
++	int error;
++	rl_t *rl;
++
++	/*
++	 * Annotate this call path with a flag that indicates that it is
++	 * unsafe to use KM_SLEEP during memory allocations due to the
++	 * potential for a deadlock.  KM_PUSHPAGE should be used instead.
++	 */
++	ASSERT(!(current->flags & PF_NOFS));
++	current->flags |= PF_NOFS;
++
++	if (end > zv->zv_volsize) {
++		blk_end_request(req, -EIO, blk_rq_bytes(req));
++		goto out;
++	}
++
++	/*
++	 * Align the request to volume block boundaries. If we don't,
++	 * then this will force dnode_free_range() to zero out the
++	 * unaligned parts, which is slow (read-modify-write) and
++	 * useless since we are not freeing any space by doing so.
++	 */
++	start = P2ROUNDUP(start, zv->zv_volblocksize);
++	end = P2ALIGN(end, zv->zv_volblocksize);
++
++	if (start >= end) {
++		blk_end_request(req, 0, blk_rq_bytes(req));
++		goto out;
++	}
++
++	rl = zfs_range_lock(&zv->zv_znode, start, end - start, RL_WRITER);
++
++	error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, end - start);
++
++	/*
++	 * TODO: maybe we should add the operation to the log.
++	 */
++
++	zfs_range_unlock(rl);
++
++	blk_end_request(req, -error, blk_rq_bytes(req));
++out:
++	current->flags &= ~PF_NOFS;
++}
++#endif /* HAVE_BLK_QUEUE_DISCARD */
++
++/*
++ * Common read path running under the zvol taskq context.  This function
++ * is responsible for copying the requested data out of the DMU and in to
++ * a linux request structure.  It then must signal the request queue with
++ * an error code describing the result of the copy.
++ */
++static void
++zvol_read(void *arg)
++{
++	struct request *req = (struct request *)arg;
++	struct request_queue *q = req->q;
++	zvol_state_t *zv = q->queuedata;
++	uint64_t offset = blk_rq_pos(req) << 9;
++	uint64_t size = blk_rq_bytes(req);
++	int error;
++	rl_t *rl;
++
++	if (size == 0) {
++		blk_end_request(req, 0, size);
++		return;
++	}
++
++	rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
++
++	error = dmu_read_req(zv->zv_objset, ZVOL_OBJ, req);
++
++	zfs_range_unlock(rl);
++
++	/* convert checksum errors into IO errors */
++	if (error == ECKSUM)
++		error = EIO;
++
++	blk_end_request(req, -error, size);
++}
++
++/*
++ * Request will be added back to the request queue and retried if
++ * it cannot be immediately dispatched to the taskq for handling
++ */
++static inline void
++zvol_dispatch(task_func_t func, struct request *req)
++{
++	if (!taskq_dispatch(zvol_taskq, func, (void *)req, TQ_NOSLEEP))
++		blk_requeue_request(req->q, req);
++}
++
++/*
++ * Common request path.  Rather than registering a custom make_request()
++ * function we use the generic Linux version.  This is done because it allows
++ * us to easily merge read requests which would otherwise we performed
++ * synchronously by the DMU.  This is less critical in write case where the
++ * DMU will perform the correct merging within a transaction group.  Using
++ * the generic make_request() also let's use leverage the fact that the
++ * elevator with ensure correct ordering in regards to barrior IOs.  On
++ * the downside it means that in the write case we end up doing request
++ * merging twice once in the elevator and once in the DMU.
++ *
++ * The request handler is called under a spin lock so all the real work
++ * is handed off to be done in the context of the zvol taskq.  This function
++ * simply performs basic request sanity checking and hands off the request.
++ */
++static void
++zvol_request(struct request_queue *q)
++{
++	zvol_state_t *zv = q->queuedata;
++	struct request *req;
++	unsigned int size;
++
++	while ((req = blk_fetch_request(q)) != NULL) {
++		size = blk_rq_bytes(req);
++
++		if (size != 0 && blk_rq_pos(req) + blk_rq_sectors(req) >
++		    get_capacity(zv->zv_disk)) {
++			printk(KERN_INFO
++			       "%s: bad access: block=%llu, count=%lu\n",
++			       req->rq_disk->disk_name,
++			       (long long unsigned)blk_rq_pos(req),
++			       (long unsigned)blk_rq_sectors(req));
++			__blk_end_request(req, -EIO, size);
++			continue;
++		}
++
++		if (!blk_fs_request(req)) {
++			printk(KERN_INFO "%s: non-fs cmd\n",
++			       req->rq_disk->disk_name);
++			__blk_end_request(req, -EIO, size);
++			continue;
++		}
++
++		switch (rq_data_dir(req)) {
++		case READ:
++			zvol_dispatch(zvol_read, req);
++			break;
++		case WRITE:
++			if (unlikely(get_disk_ro(zv->zv_disk)) ||
++			    unlikely(zv->zv_flags & ZVOL_RDONLY)) {
++				__blk_end_request(req, -EROFS, size);
++				break;
++			}
++
++#ifdef HAVE_BLK_QUEUE_DISCARD
++			if (req->cmd_flags & VDEV_REQ_DISCARD) {
++				zvol_dispatch(zvol_discard, req);
++				break;
++			}
++#endif /* HAVE_BLK_QUEUE_DISCARD */
++
++			zvol_dispatch(zvol_write, req);
++			break;
++		default:
++			printk(KERN_INFO "%s: unknown cmd: %d\n",
++			       req->rq_disk->disk_name, (int)rq_data_dir(req));
++			__blk_end_request(req, -EIO, size);
++			break;
++		}
++	}
++}
++
++static void
++zvol_get_done(zgd_t *zgd, int error)
++{
++	if (zgd->zgd_db)
++		dmu_buf_rele(zgd->zgd_db, zgd);
++
++	zfs_range_unlock(zgd->zgd_rl);
++
++	if (error == 0 && zgd->zgd_bp)
++		zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
++
++	kmem_free(zgd, sizeof (zgd_t));
++}
++
++/*
++ * Get data to generate a TX_WRITE intent log record.
++ */
++static int
++zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
++{
++	zvol_state_t *zv = arg;
++	objset_t *os = zv->zv_objset;
++	uint64_t offset = lr->lr_offset;
++	uint64_t size = lr->lr_length;
++	dmu_buf_t *db;
++	zgd_t *zgd;
++	int error;
++
++	ASSERT(zio != NULL);
++	ASSERT(size != 0);
++
++	zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_PUSHPAGE);
++	zgd->zgd_zilog = zv->zv_zilog;
++	zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
++
++	/*
++	 * Write records come in two flavors: immediate and indirect.
++	 * For small writes it's cheaper to store the data with the
++	 * log record (immediate); for large writes it's cheaper to
++	 * sync the data and get a pointer to it (indirect) so that
++	 * we don't have to write the data twice.
++	 */
++	if (buf != NULL) { /* immediate write */
++		error = dmu_read(os, ZVOL_OBJ, offset, size, buf,
++		    DMU_READ_NO_PREFETCH);
++	} else {
++		size = zv->zv_volblocksize;
++		offset = P2ALIGN_TYPED(offset, size, uint64_t);
++		error = dmu_buf_hold(os, ZVOL_OBJ, offset, zgd, &db,
++		    DMU_READ_NO_PREFETCH);
++		if (error == 0) {
++			zgd->zgd_db = db;
++			zgd->zgd_bp = &lr->lr_blkptr;
++
++			ASSERT(db != NULL);
++			ASSERT(db->db_offset == offset);
++			ASSERT(db->db_size == size);
++
++			error = dmu_sync(zio, lr->lr_common.lrc_txg,
++			    zvol_get_done, zgd);
++
++			if (error == 0)
++				return (0);
++		}
++	}
++
++	zvol_get_done(zgd, error);
++
++	return (error);
++}
++
++/*
++ * The zvol_state_t's are inserted in increasing MINOR(dev_t) order.
++ */
++static void
++zvol_insert(zvol_state_t *zv_insert)
++{
++	zvol_state_t *zv = NULL;
++
++	ASSERT(MUTEX_HELD(&zvol_state_lock));
++	ASSERT3U(MINOR(zv_insert->zv_dev) & ZVOL_MINOR_MASK, ==, 0);
++	for (zv = list_head(&zvol_state_list); zv != NULL;
++	     zv = list_next(&zvol_state_list, zv)) {
++		if (MINOR(zv->zv_dev) > MINOR(zv_insert->zv_dev))
++			break;
++	}
++
++	list_insert_before(&zvol_state_list, zv, zv_insert);
++}
++
++/*
++ * Simply remove the zvol from to list of zvols.
++ */
++static void
++zvol_remove(zvol_state_t *zv_remove)
++{
++	ASSERT(MUTEX_HELD(&zvol_state_lock));
++	list_remove(&zvol_state_list, zv_remove);
++}
++
++static int
++zvol_first_open(zvol_state_t *zv)
++{
++	objset_t *os;
++	uint64_t volsize;
++	int error;
++	uint64_t ro;
++
++	/* lie and say we're read-only */
++	error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os);
++	if (error)
++		return (-error);
++
++	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
++	if (error) {
++	        dmu_objset_disown(os, zvol_tag);
++	        return (-error);
++	}
++
++	zv->zv_objset = os;
++	error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
++	if (error) {
++	        dmu_objset_disown(os, zvol_tag);
++	        return (-error);
++	}
++
++	set_capacity(zv->zv_disk, volsize >> 9);
++	zv->zv_volsize = volsize;
++	zv->zv_zilog = zil_open(os, zvol_get_data);
++
++	VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL) == 0);
++	if (ro || dmu_objset_is_snapshot(os)) {
++                set_disk_ro(zv->zv_disk, 1);
++	        zv->zv_flags |= ZVOL_RDONLY;
++	} else {
++                set_disk_ro(zv->zv_disk, 0);
++	        zv->zv_flags &= ~ZVOL_RDONLY;
++	}
++
++	return (-error);
++}
++
++static void
++zvol_last_close(zvol_state_t *zv)
++{
++	zil_close(zv->zv_zilog);
++	zv->zv_zilog = NULL;
++
++	dmu_buf_rele(zv->zv_dbuf, zvol_tag);
++	zv->zv_dbuf = NULL;
++
++	/*
++	 * Evict cached data
++	 */
++	if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) &&
++	    !(zv->zv_flags & ZVOL_RDONLY))
++		txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
++	(void) dmu_objset_evict_dbufs(zv->zv_objset);
++
++	dmu_objset_disown(zv->zv_objset, zvol_tag);
++	zv->zv_objset = NULL;
++}
++
++static int
++zvol_open(struct block_device *bdev, fmode_t flag)
++{
++	zvol_state_t *zv = bdev->bd_disk->private_data;
++	int error = 0, drop_mutex = 0;
++
++	/*
++	 * If the caller is already holding the mutex do not take it
++	 * again, this will happen as part of zvol_create_minor().
++	 * Once add_disk() is called the device is live and the kernel
++	 * will attempt to open it to read the partition information.
++	 */
++	if (!mutex_owned(&zvol_state_lock)) {
++		mutex_enter(&zvol_state_lock);
++		drop_mutex = 1;
++	}
++
++	ASSERT3P(zv, !=, NULL);
++
++	if (zv->zv_open_count == 0) {
++		error = zvol_first_open(zv);
++		if (error)
++			goto out_mutex;
++	}
++
++	if ((flag & FMODE_WRITE) &&
++	    (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY))) {
++		error = -EROFS;
++		goto out_open_count;
++	}
++
++	zv->zv_open_count++;
++
++out_open_count:
++	if (zv->zv_open_count == 0)
++		zvol_last_close(zv);
++
++out_mutex:
++	if (drop_mutex)
++		mutex_exit(&zvol_state_lock);
++
++	check_disk_change(bdev);
++
++	return (error);
++}
++
++static int
++zvol_release(struct gendisk *disk, fmode_t mode)
++{
++	zvol_state_t *zv = disk->private_data;
++	int drop_mutex = 0;
++
++	if (!mutex_owned(&zvol_state_lock)) {
++		mutex_enter(&zvol_state_lock);
++		drop_mutex = 1;
++	}
++
++	ASSERT3P(zv, !=, NULL);
++	ASSERT3U(zv->zv_open_count, >, 0);
++	zv->zv_open_count--;
++	if (zv->zv_open_count == 0)
++		zvol_last_close(zv);
++
++	if (drop_mutex)
++		mutex_exit(&zvol_state_lock);
++
++	return (0);
++}
++
++static int
++zvol_ioctl(struct block_device *bdev, fmode_t mode,
++           unsigned int cmd, unsigned long arg)
++{
++	zvol_state_t *zv = bdev->bd_disk->private_data;
++	int error = 0;
++
++	if (zv == NULL)
++		return (-ENXIO);
++
++	switch (cmd) {
++	case BLKFLSBUF:
++		zil_commit(zv->zv_zilog, ZVOL_OBJ);
++		break;
++	case BLKZNAME:
++		error = copy_to_user((void *)arg, zv->zv_name, MAXNAMELEN);
++		break;
++
++	default:
++		error = -ENOTTY;
++		break;
++
++	}
++
++	return (error);
++}
++
++#ifdef CONFIG_COMPAT
++static int
++zvol_compat_ioctl(struct block_device *bdev, fmode_t mode,
++                  unsigned cmd, unsigned long arg)
++{
++	return zvol_ioctl(bdev, mode, cmd, arg);
++}
++#else
++#define zvol_compat_ioctl   NULL
++#endif
++
++static int zvol_media_changed(struct gendisk *disk)
++{
++	zvol_state_t *zv = disk->private_data;
++
++	return zv->zv_changed;
++}
++
++static int zvol_revalidate_disk(struct gendisk *disk)
++{
++	zvol_state_t *zv = disk->private_data;
++
++	zv->zv_changed = 0;
++	set_capacity(zv->zv_disk, zv->zv_volsize >> 9);
++
++	return 0;
++}
++
++/*
++ * Provide a simple virtual geometry for legacy compatibility.  For devices
++ * smaller than 1 MiB a small head and sector count is used to allow very
++ * tiny devices.  For devices over 1 Mib a standard head and sector count
++ * is used to keep the cylinders count reasonable.
++ */
++static int
++zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
++{
++	zvol_state_t *zv = bdev->bd_disk->private_data;
++	sector_t sectors = get_capacity(zv->zv_disk);
++
++	if (sectors > 2048) {
++		geo->heads = 16;
++		geo->sectors = 63;
++	} else {
++		geo->heads = 2;
++		geo->sectors = 4;
++	}
++
++	geo->start = 0;
++	geo->cylinders = sectors / (geo->heads * geo->sectors);
++
++	return 0;
++}
++
++static struct kobject *
++zvol_probe(dev_t dev, int *part, void *arg)
++{
++	zvol_state_t *zv;
++	struct kobject *kobj;
++
++	mutex_enter(&zvol_state_lock);
++	zv = zvol_find_by_dev(dev);
++	kobj = zv ? get_disk(zv->zv_disk) : NULL;
++	mutex_exit(&zvol_state_lock);
++
++	return kobj;
++}
++
++#ifdef HAVE_BDEV_BLOCK_DEVICE_OPERATIONS
++static struct block_device_operations zvol_ops = {
++	.open            = zvol_open,
++	.release         = zvol_release,
++	.ioctl           = zvol_ioctl,
++	.compat_ioctl    = zvol_compat_ioctl,
++	.media_changed   = zvol_media_changed,
++	.revalidate_disk = zvol_revalidate_disk,
++	.getgeo          = zvol_getgeo,
++        .owner           = THIS_MODULE,
++};
++
++#else /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */
++
++static int
++zvol_open_by_inode(struct inode *inode, struct file *file)
++{
++	return zvol_open(inode->i_bdev, file->f_mode);
++}
++
++static int
++zvol_release_by_inode(struct inode *inode, struct file *file)
++{
++	return zvol_release(inode->i_bdev->bd_disk, file->f_mode);
++}
++
++static int
++zvol_ioctl_by_inode(struct inode *inode, struct file *file,
++                    unsigned int cmd, unsigned long arg)
++{
++	if (file == NULL || inode == NULL)
++		return -EINVAL;
++	return zvol_ioctl(inode->i_bdev, file->f_mode, cmd, arg);
++}
++
++# ifdef CONFIG_COMPAT
++static long
++zvol_compat_ioctl_by_inode(struct file *file,
++                           unsigned int cmd, unsigned long arg)
++{
++	if (file == NULL)
++		return -EINVAL;
++	return zvol_compat_ioctl(file->f_dentry->d_inode->i_bdev,
++	                         file->f_mode, cmd, arg);
++}
++# else
++# define zvol_compat_ioctl_by_inode   NULL
++# endif
++
++static struct block_device_operations zvol_ops = {
++	.open            = zvol_open_by_inode,
++	.release         = zvol_release_by_inode,
++	.ioctl           = zvol_ioctl_by_inode,
++	.compat_ioctl    = zvol_compat_ioctl_by_inode,
++	.media_changed   = zvol_media_changed,
++	.revalidate_disk = zvol_revalidate_disk,
++	.getgeo          = zvol_getgeo,
++        .owner           = THIS_MODULE,
++};
++#endif /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */
++
++/*
++ * Allocate memory for a new zvol_state_t and setup the required
++ * request queue and generic disk structures for the block device.
++ */
++static zvol_state_t *
++zvol_alloc(dev_t dev, const char *name)
++{
++	zvol_state_t *zv;
++	int error = 0;
++
++	zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
++	if (zv == NULL)
++		goto out;
++
++	zv->zv_queue = blk_init_queue(zvol_request, &zv->zv_lock);
++	if (zv->zv_queue == NULL)
++		goto out_kmem;
++
++#ifdef HAVE_ELEVATOR_CHANGE
++	error = elevator_change(zv->zv_queue, "noop");
++#endif /* HAVE_ELEVATOR_CHANGE */
++	if (error) {
++		printk("ZFS: Unable to set \"%s\" scheduler for zvol %s: %d\n",
++		    "noop", name, error);
++		goto out_queue;
++	}
++
++#ifdef HAVE_BLK_QUEUE_FLUSH
++	blk_queue_flush(zv->zv_queue, VDEV_REQ_FLUSH | VDEV_REQ_FUA);
++#else
++	blk_queue_ordered(zv->zv_queue, QUEUE_ORDERED_DRAIN, NULL);
++#endif /* HAVE_BLK_QUEUE_FLUSH */
++
++	zv->zv_disk = alloc_disk(ZVOL_MINORS);
++	if (zv->zv_disk == NULL)
++		goto out_queue;
++
++	zv->zv_queue->queuedata = zv;
++	zv->zv_dev = dev;
++	zv->zv_open_count = 0;
++	strlcpy(zv->zv_name, name, MAXNAMELEN);
++
++	mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL);
++	avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare,
++	    sizeof (rl_t), offsetof(rl_t, r_node));
++	zv->zv_znode.z_is_zvol = TRUE;
++
++	spin_lock_init(&zv->zv_lock);
++	list_link_init(&zv->zv_next);
++
++	zv->zv_disk->major = zvol_major;
++	zv->zv_disk->first_minor = (dev & MINORMASK);
++	zv->zv_disk->fops = &zvol_ops;
++	zv->zv_disk->private_data = zv;
++	zv->zv_disk->queue = zv->zv_queue;
++	snprintf(zv->zv_disk->disk_name, DISK_NAME_LEN, "%s%d",
++	    ZVOL_DEV_NAME, (dev & MINORMASK));
++
++	return zv;
++
++out_queue:
++	blk_cleanup_queue(zv->zv_queue);
++out_kmem:
++	kmem_free(zv, sizeof (zvol_state_t));
++out:
++	return NULL;
++}
++
++/*
++ * Cleanup then free a zvol_state_t which was created by zvol_alloc().
++ */
++static void
++zvol_free(zvol_state_t *zv)
++{
++	avl_destroy(&zv->zv_znode.z_range_avl);
++	mutex_destroy(&zv->zv_znode.z_range_lock);
++
++	del_gendisk(zv->zv_disk);
++	blk_cleanup_queue(zv->zv_queue);
++	put_disk(zv->zv_disk);
++
++	kmem_free(zv, sizeof (zvol_state_t));
++}
++
++static int
++__zvol_create_minor(const char *name)
++{
++	zvol_state_t *zv;
++	objset_t *os;
++	dmu_object_info_t *doi;
++	uint64_t volsize;
++	unsigned minor = 0;
++	int error = 0;
++
++	ASSERT(MUTEX_HELD(&zvol_state_lock));
++
++	zv = zvol_find_by_name(name);
++	if (zv) {
++		error = EEXIST;
++		goto out;
++	}
++
++	doi = kmem_alloc(sizeof(dmu_object_info_t), KM_SLEEP);
++
++	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
++	if (error)
++		goto out_doi;
++
++	error = dmu_object_info(os, ZVOL_OBJ, doi);
++	if (error)
++		goto out_dmu_objset_disown;
++
++	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
++	if (error)
++		goto out_dmu_objset_disown;
++
++	error = zvol_find_minor(&minor);
++	if (error)
++		goto out_dmu_objset_disown;
++
++	zv = zvol_alloc(MKDEV(zvol_major, minor), name);
++	if (zv == NULL) {
++		error = EAGAIN;
++		goto out_dmu_objset_disown;
++	}
++
++	if (dmu_objset_is_snapshot(os))
++		zv->zv_flags |= ZVOL_RDONLY;
++
++	zv->zv_volblocksize = doi->doi_data_block_size;
++	zv->zv_volsize = volsize;
++	zv->zv_objset = os;
++
++	set_capacity(zv->zv_disk, zv->zv_volsize >> 9);
++
++	blk_queue_max_hw_sectors(zv->zv_queue, UINT_MAX);
++	blk_queue_max_segments(zv->zv_queue, UINT16_MAX);
++	blk_queue_max_segment_size(zv->zv_queue, UINT_MAX);
++	blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize);
++	blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize);
++#ifdef HAVE_BLK_QUEUE_DISCARD
++	blk_queue_max_discard_sectors(zv->zv_queue,
++	    (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9);
++	blk_queue_discard_granularity(zv->zv_queue, zv->zv_volblocksize);
++	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zv->zv_queue);
++#endif
++#ifdef HAVE_BLK_QUEUE_NONROT
++	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zv->zv_queue);
++#endif
++
++	if (zil_replay_disable)
++		zil_destroy(dmu_objset_zil(os), B_FALSE);
++	else
++		zil_replay(os, zv, zvol_replay_vector);
++
++out_dmu_objset_disown:
++	dmu_objset_disown(os, zvol_tag);
++	zv->zv_objset = NULL;
++out_doi:
++	kmem_free(doi, sizeof(dmu_object_info_t));
++out:
++
++	if (error == 0) {
++		zvol_insert(zv);
++		add_disk(zv->zv_disk);
++	}
++
++	return (error);
++}
++
++/*
++ * Create a block device minor node and setup the linkage between it
++ * and the specified volume.  Once this function returns the block
++ * device is live and ready for use.
++ */
++int
++zvol_create_minor(const char *name)
++{
++	int error;
++
++	mutex_enter(&zvol_state_lock);
++	error = __zvol_create_minor(name);
++	mutex_exit(&zvol_state_lock);
++
++	return (error);
++}
++
++static int
++__zvol_remove_minor(const char *name)
++{
++	zvol_state_t *zv;
++
++	ASSERT(MUTEX_HELD(&zvol_state_lock));
++
++	zv = zvol_find_by_name(name);
++	if (zv == NULL)
++		return (ENXIO);
++
++	if (zv->zv_open_count > 0)
++		return (EBUSY);
++
++	zvol_remove(zv);
++	zvol_free(zv);
++
++	return (0);
++}
++
++/*
++ * Remove a block device minor node for the specified volume.
++ */
++int
++zvol_remove_minor(const char *name)
++{
++	int error;
++
++	mutex_enter(&zvol_state_lock);
++	error = __zvol_remove_minor(name);
++	mutex_exit(&zvol_state_lock);
++
++	return (error);
++}
++
++static int
++zvol_create_minors_cb(spa_t *spa, uint64_t dsobj,
++		      const char *dsname, void *arg)
++{
++	if (strchr(dsname, '/') == NULL)
++		return 0;
++
++	(void) __zvol_create_minor(dsname);
++	return (0);
++}
++
++/*
++ * Create minors for specified pool, if pool is NULL create minors
++ * for all available pools.
++ */
++int
++zvol_create_minors(const char *pool)
++{
++	spa_t *spa = NULL;
++	int error = 0;
++
++	if (zvol_inhibit_dev)
++		return (0);
++
++	mutex_enter(&zvol_state_lock);
++	if (pool) {
++		error = dmu_objset_find_spa(NULL, pool, zvol_create_minors_cb,
++		    NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
++	} else {
++		mutex_enter(&spa_namespace_lock);
++		while ((spa = spa_next(spa)) != NULL) {
++			error = dmu_objset_find_spa(NULL,
++			    spa_name(spa), zvol_create_minors_cb, NULL,
++			    DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
++			if (error)
++				break;
++		}
++		mutex_exit(&spa_namespace_lock);
++	}
++	mutex_exit(&zvol_state_lock);
++
++	return error;
++}
++
++/*
++ * Remove minors for specified pool, if pool is NULL remove all minors.
++ */
++void
++zvol_remove_minors(const char *pool)
++{
++	zvol_state_t *zv, *zv_next;
++	char *str;
++
++	if (zvol_inhibit_dev)
++		return;
++
++	str = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
++	if (pool) {
++		(void) strncpy(str, pool, strlen(pool));
++		(void) strcat(str, "/");
++	}
++
++	mutex_enter(&zvol_state_lock);
++	for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
++		zv_next = list_next(&zvol_state_list, zv);
++
++		if (pool == NULL || !strncmp(str, zv->zv_name, strlen(str))) {
++			zvol_remove(zv);
++			zvol_free(zv);
++		}
++	}
++	mutex_exit(&zvol_state_lock);
++	kmem_free(str, MAXNAMELEN);
++}
++
++int
++zvol_init(void)
++{
++	int error;
++
++	zvol_taskq = taskq_create(ZVOL_DRIVER, zvol_threads, maxclsyspri,
++		                  zvol_threads, INT_MAX, TASKQ_PREPOPULATE);
++	if (zvol_taskq == NULL) {
++		printk(KERN_INFO "ZFS: taskq_create() failed\n");
++		return (-ENOMEM);
++	}
++
++	error = register_blkdev(zvol_major, ZVOL_DRIVER);
++	if (error) {
++		printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
++		taskq_destroy(zvol_taskq);
++		return (error);
++	}
++
++	blk_register_region(MKDEV(zvol_major, 0), 1UL << MINORBITS,
++	                    THIS_MODULE, zvol_probe, NULL, NULL);
++
++	mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL);
++	list_create(&zvol_state_list, sizeof (zvol_state_t),
++	            offsetof(zvol_state_t, zv_next));
++
++	(void) zvol_create_minors(NULL);
++
++	return (0);
++}
++
++void
++zvol_fini(void)
++{
++	zvol_remove_minors(NULL);
++	blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS);
++	unregister_blkdev(zvol_major, ZVOL_DRIVER);
++	taskq_destroy(zvol_taskq);
++	mutex_destroy(&zvol_state_lock);
++	list_destroy(&zvol_state_list);
++}
++
++module_param(zvol_inhibit_dev, uint, 0644);
++MODULE_PARM_DESC(zvol_inhibit_dev, "Do not create zvol device nodes");
++
++module_param(zvol_major, uint, 0444);
++MODULE_PARM_DESC(zvol_major, "Major number for zvol device");
++
++module_param(zvol_threads, uint, 0444);
++MODULE_PARM_DESC(zvol_threads, "Number of threads for zvol device");
++
++module_param(zvol_max_discard_blocks, ulong, 0444);
++MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard at once");
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zpios/Makefile linux-3.2.33-go/fs/zfs/zpios/Makefile
+--- linux-3.2.33-go.orig/fs/zfs/zpios/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zpios/Makefile	2012-11-16 23:25:34.376039025 +0100
+@@ -0,0 +1,7 @@
++MODULE := zpios
++
++EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS)  -Wno-unused-but-set-variable -DHAVE_SPL -D_KERNEL -DTEXT_DOMAIN=\"zfs-linux-kernel\" -DNDEBUG 
++
++obj-$(CONFIG_ZFS) := $(MODULE).o
++
++$(MODULE)-objs += pios.o
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zpios/Makefile.in linux-3.2.33-go/fs/zfs/zpios/Makefile.in
+--- linux-3.2.33-go.orig/fs/zfs/zpios/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zpios/Makefile.in	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,7 @@
++MODULE := zpios
++
++EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS) @KERNELCPPFLAGS@
++
++obj-$(CONFIG_ZFS) := $(MODULE).o
++
++$(MODULE)-objs += @top_srcdir@/module/zpios/pios.o
+diff -uNr linux-3.2.33-go.orig/fs/zfs/zpios/pios.c linux-3.2.33-go/fs/zfs/zpios/pios.c
+--- linux-3.2.33-go.orig/fs/zfs/zpios/pios.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/fs/zfs/zpios/pios.c	2012-11-16 23:25:34.354039278 +0100
+@@ -0,0 +1,1330 @@
++/*****************************************************************************\
++ *  ZPIOS is a heavily modified version of the original PIOS test code.
++ *  It is designed to have the test code running in the Linux kernel
++ *  against ZFS while still being flexibly controled from user space.
++ *
++ *  Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  LLNL-CODE-403049
++ *
++ *  Original PIOS Test Code
++ *  Copyright (C) 2004 Cluster File Systems, Inc.
++ *  Written by Peter Braam <braam@clusterfs.com>
++ *             Atul Vidwansa <atul@clusterfs.com>
++ *             Milind Dumbare <milind@clusterfs.com>
++ *
++ *  This file is part of ZFS on Linux.
++ *  For details, see <http://github.com/behlendorf/zfs/>.
++ *
++ *  ZPIOS is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  ZPIOS is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with ZPIOS.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#include <sys/zfs_context.h>
++#include <sys/dmu.h>
++#include <sys/txg.h>
++#include <linux/cdev.h>
++#include "zpios-internal.h"
++
++
++static spl_class *zpios_class;
++static spl_device *zpios_device;
++static char *zpios_tag = "zpios_tag";
++
++static
++int zpios_upcall(char *path, char *phase, run_args_t *run_args, int rc)
++{
++	/* This is stack heavy but it should be OK since we are only
++	 * making the upcall between tests when the stack is shallow.
++	 */
++        char id[16], chunk_size[16], region_size[16], thread_count[16];
++	char region_count[16], offset[16], region_noise[16], chunk_noise[16];
++        char thread_delay[16], flags[16], result[8];
++        char *argv[16], *envp[4];
++
++	if ((path == NULL) || (strlen(path) == 0))
++		return -ENOENT;
++
++	snprintf(id, 15, "%d", run_args->id);
++	snprintf(chunk_size, 15, "%lu", (long unsigned)run_args->chunk_size);
++        snprintf(region_size, 15, "%lu",(long unsigned) run_args->region_size);
++	snprintf(thread_count, 15, "%u", run_args->thread_count);
++	snprintf(region_count, 15, "%u", run_args->region_count);
++	snprintf(offset, 15, "%lu", (long unsigned)run_args->offset);
++	snprintf(region_noise, 15, "%u", run_args->region_noise);
++	snprintf(chunk_noise, 15, "%u", run_args->chunk_noise);
++	snprintf(thread_delay, 15, "%u", run_args->thread_delay);
++	snprintf(flags, 15, "0x%x", run_args->flags);
++	snprintf(result, 7, "%d", rc);
++
++	/* Passing 15 args to registered pre/post upcall */
++        argv[0] = path;
++	argv[1] = phase;
++	argv[2] = strlen(run_args->log) ? run_args->log : "<none>";
++	argv[3] = id;
++	argv[4] = run_args->pool;
++	argv[5] = chunk_size;
++	argv[6] = region_size;
++	argv[7] = thread_count;
++	argv[8] = region_count;
++	argv[9] = offset;
++	argv[10] = region_noise;
++	argv[11] = chunk_noise;
++	argv[12] = thread_delay;
++	argv[13] = flags;
++	argv[14] = result;
++	argv[15] = NULL;
++
++	/* Passing environment for user space upcall */
++        envp[0] = "HOME=/";
++        envp[1] = "TERM=linux";
++        envp[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin";
++        envp[3] = NULL;
++
++        return call_usermodehelper(path, argv, envp, 1);
++}
++
++static uint64_t
++zpios_dmu_object_create(run_args_t *run_args, objset_t *os)
++{
++	struct dmu_tx *tx;
++        uint64_t obj = 0ULL;
++	int rc;
++
++	tx = dmu_tx_create(os);
++	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE);
++	rc = dmu_tx_assign(tx, TXG_WAIT);
++	if (rc) {
++		zpios_print(run_args->file,
++			    "dmu_tx_assign() failed: %d\n", rc);
++		dmu_tx_abort(tx);
++		return obj;
++	}
++
++	obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
++	                       DMU_OT_NONE, 0, tx);
++	rc = dmu_object_set_blocksize(os, obj, 128ULL << 10, 0, tx);
++	if (rc) {
++		zpios_print(run_args->file,
++			    "dmu_object_set_blocksize() failed: %d\n", rc);
++	        dmu_tx_abort(tx);
++	        return obj;
++	}
++
++	dmu_tx_commit(tx);
++
++	return obj;
++}
++
++static int
++zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj)
++{
++	struct dmu_tx *tx;
++	int rc;
++
++	tx = dmu_tx_create(os);
++        dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
++	rc = dmu_tx_assign(tx, TXG_WAIT);
++	if (rc) {
++		zpios_print(run_args->file,
++			    "dmu_tx_assign() failed: %d\n", rc);
++		dmu_tx_abort(tx);
++		return rc;
++	}
++
++	rc = dmu_object_free(os, obj, tx);
++	if (rc) {
++		zpios_print(run_args->file,
++			    "dmu_object_free() failed: %d\n", rc);
++	        dmu_tx_abort(tx);
++	        return rc;
++	}
++
++	dmu_tx_commit(tx);
++
++	return 0;
++}
++
++static int
++zpios_dmu_setup(run_args_t *run_args)
++{
++	zpios_time_t *t = &(run_args->stats.cr_time);
++	objset_t *os;
++	char name[32];
++	uint64_t obj = 0ULL;
++	int i, rc = 0, rc2;
++
++	(void)zpios_upcall(run_args->pre, PHASE_PRE_CREATE, run_args, 0);
++	t->start = zpios_timespec_now();
++
++	(void)snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
++	rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL);
++	if (rc) {
++		zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) "
++			    "failed: %d\n", name, rc);
++		goto out;
++	}
++
++        rc = dmu_objset_own(name, DMU_OST_OTHER, 0, zpios_tag, &os);
++        if (rc) {
++		zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) "
++			    "failed: %d\n", name, rc);
++		goto out_destroy;
++        }
++
++	if (!(run_args->flags & DMU_FPP)) {
++		obj = zpios_dmu_object_create(run_args, os);
++		if (obj == 0) {
++			rc = -EBADF;
++			zpios_print(run_args->file, "Error zpios_dmu_"
++				    "object_create() failed, %d\n", rc);
++			goto out_destroy;
++		}
++	}
++
++	for (i = 0; i < run_args->region_count; i++) {
++		zpios_region_t *region;
++
++		region = &run_args->regions[i];
++	        mutex_init(&region->lock, NULL, MUTEX_DEFAULT, NULL);
++
++		if (run_args->flags & DMU_FPP) {
++			/* File per process */
++			region->obj.os  = os;
++			region->obj.obj = zpios_dmu_object_create(run_args, os);
++			ASSERT(region->obj.obj > 0); /* XXX - Handle this */
++			region->wr_offset   = run_args->offset;
++			region->rd_offset   = run_args->offset;
++			region->init_offset = run_args->offset;
++			region->max_offset  = run_args->offset +
++			                      run_args->region_size;
++		} else {
++			/* Single shared file */
++			region->obj.os  = os;
++			region->obj.obj = obj;
++			region->wr_offset   = run_args->offset * i;
++			region->rd_offset   = run_args->offset * i;
++			region->init_offset = run_args->offset * i;
++			region->max_offset  = run_args->offset *
++			                      i + run_args->region_size;
++		}
++	}
++
++	run_args->os = os;
++out_destroy:
++	if (rc) {
++		rc2 = dmu_objset_destroy(name, B_FALSE);
++		if (rc2)
++			zpios_print(run_args->file, "Error dmu_objset_destroy"
++				    "(%s, ...) failed: %d\n", name, rc2);
++	}
++out:
++	t->stop  = zpios_timespec_now();
++	t->delta = zpios_timespec_sub(t->stop, t->start);
++	(void)zpios_upcall(run_args->post, PHASE_POST_CREATE, run_args, rc);
++
++	return rc;
++}
++
++static int
++zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file)
++{
++	run_args_t *ra;
++	int rc, size;
++
++	size = sizeof(*ra) + kcmd->cmd_region_count * sizeof(zpios_region_t);
++
++	ra = vmem_zalloc(size, KM_SLEEP);
++	if (ra == NULL) {
++		zpios_print(file, "Unable to vmem_zalloc() %d bytes "
++			    "for regions\n", size);
++		return -ENOMEM;
++	}
++
++	*run_args = ra;
++	strncpy(ra->pool, kcmd->cmd_pool, ZPIOS_NAME_SIZE - 1);
++	strncpy(ra->pre, kcmd->cmd_pre, ZPIOS_PATH_SIZE - 1);
++	strncpy(ra->post, kcmd->cmd_post, ZPIOS_PATH_SIZE - 1);
++	strncpy(ra->log, kcmd->cmd_log, ZPIOS_PATH_SIZE - 1);
++	ra->id              = kcmd->cmd_id;
++	ra->chunk_size      = kcmd->cmd_chunk_size;
++	ra->thread_count    = kcmd->cmd_thread_count;
++	ra->region_count    = kcmd->cmd_region_count;
++	ra->region_size     = kcmd->cmd_region_size;
++	ra->offset          = kcmd->cmd_offset;
++	ra->region_noise    = kcmd->cmd_region_noise;
++	ra->chunk_noise     = kcmd->cmd_chunk_noise;
++	ra->thread_delay    = kcmd->cmd_thread_delay;
++	ra->flags           = kcmd->cmd_flags;
++	ra->stats.wr_data   = 0;
++	ra->stats.wr_chunks = 0;
++	ra->stats.rd_data   = 0;
++	ra->stats.rd_chunks = 0;
++	ra->region_next     = 0;
++	ra->file            = file;
++        mutex_init(&ra->lock_work, NULL, MUTEX_DEFAULT, NULL);
++        mutex_init(&ra->lock_ctl, NULL, MUTEX_DEFAULT, NULL);
++
++	(void)zpios_upcall(ra->pre, PHASE_PRE_RUN, ra, 0);
++
++	rc = zpios_dmu_setup(ra);
++	if (rc) {
++	        mutex_destroy(&ra->lock_ctl);
++	        mutex_destroy(&ra->lock_work);
++		vmem_free(ra, size);
++		*run_args = NULL;
++	}
++
++	return rc;
++}
++
++static int
++zpios_get_work_item(run_args_t *run_args, dmu_obj_t *obj, __u64 *offset,
++		    __u32 *chunk_size, zpios_region_t **region, __u32 flags)
++{
++	int i, j, count = 0;
++	unsigned int random_int;
++
++	get_random_bytes(&random_int, sizeof(unsigned int));
++
++	mutex_enter(&run_args->lock_work);
++	i = run_args->region_next;
++
++	/* XXX: I don't much care for this chunk selection mechansim
++	 * there's the potential to burn a lot of time here doing nothing
++	 * useful while holding the global lock.  This could give some
++	 * misleading performance results.  I'll fix it latter.
++	 */
++	while (count < run_args->region_count) {
++		__u64 *rw_offset;
++		zpios_time_t *rw_time;
++
++		j = i % run_args->region_count;
++		*region = &(run_args->regions[j]);
++
++		if (flags & DMU_WRITE) {
++			rw_offset = &((*region)->wr_offset);
++			rw_time = &((*region)->stats.wr_time);
++		} else {
++			rw_offset = &((*region)->rd_offset);
++			rw_time = &((*region)->stats.rd_time);
++		}
++
++		/* test if region is fully written */
++		if (*rw_offset + *chunk_size > (*region)->max_offset) {
++			i++;
++			count++;
++
++			if (unlikely(rw_time->stop.ts_sec == 0) &&
++			    unlikely(rw_time->stop.ts_nsec == 0))
++				rw_time->stop = zpios_timespec_now();
++
++			continue;
++		}
++
++		*offset = *rw_offset;
++		*obj = (*region)->obj;
++		*rw_offset += *chunk_size;
++
++		/* update ctl structure */
++		if (run_args->region_noise) {
++			get_random_bytes(&random_int, sizeof(unsigned int));
++	                run_args->region_next += random_int % run_args->region_noise;
++		} else {
++			run_args->region_next++;
++		}
++
++		mutex_exit(&run_args->lock_work);
++		return 1;
++	}
++
++	/* nothing left to do */
++	mutex_exit(&run_args->lock_work);
++
++	return 0;
++}
++
++static void
++zpios_remove_objset(run_args_t *run_args)
++{
++	zpios_time_t *t = &(run_args->stats.rm_time);
++	zpios_region_t *region;
++	char name[32];
++	int rc = 0, i;
++
++	(void)zpios_upcall(run_args->pre, PHASE_PRE_REMOVE, run_args, 0);
++	t->start = zpios_timespec_now();
++
++	(void)snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
++
++	if (run_args->flags & DMU_REMOVE) {
++		if (run_args->flags & DMU_FPP) {
++			for (i = 0; i < run_args->region_count; i++) {
++				region = &run_args->regions[i];
++				rc = zpios_dmu_object_free(run_args,
++							   region->obj.os,
++							   region->obj.obj);
++				if (rc)
++					zpios_print(run_args->file, "Error "
++						    "removing object %d, %d\n",
++					            (int)region->obj.obj, rc);
++			}
++		} else {
++			region = &run_args->regions[0];
++			rc = zpios_dmu_object_free(run_args,
++						   region->obj.os,
++						   region->obj.obj);
++			if (rc)
++				zpios_print(run_args->file, "Error "
++					    "removing object %d, %d\n",
++				            (int)region->obj.obj, rc);
++		}
++	}
++
++	dmu_objset_disown(run_args->os, zpios_tag);
++
++	if (run_args->flags & DMU_REMOVE) {
++		rc = dmu_objset_destroy(name, B_FALSE);
++		if (rc)
++			zpios_print(run_args->file, "Error dmu_objset_destroy"
++				    "(%s, ...) failed: %d\n", name, rc);
++	}
++
++	t->stop  = zpios_timespec_now();
++	t->delta = zpios_timespec_sub(t->stop, t->start);
++	(void)zpios_upcall(run_args->post, PHASE_POST_REMOVE, run_args, rc);
++}
++
++static void
++zpios_cleanup_run(run_args_t *run_args)
++{
++	int i, size = 0;
++
++	if (run_args == NULL)
++		return;
++
++	if (run_args->threads != NULL) {
++		for (i = 0; i < run_args->thread_count; i++) {
++			if (run_args->threads[i]) {
++				mutex_destroy(&run_args->threads[i]->lock);
++				kmem_free(run_args->threads[i],
++					  sizeof(thread_data_t));
++			}
++		}
++
++		kmem_free(run_args->threads,
++			  sizeof(thread_data_t *) * run_args->thread_count);
++	}
++
++	for (i = 0; i < run_args->region_count; i++)
++		mutex_destroy(&run_args->regions[i].lock);
++
++	mutex_destroy(&run_args->lock_work);
++	mutex_destroy(&run_args->lock_ctl);
++	size = run_args->region_count * sizeof(zpios_region_t);
++
++	vmem_free(run_args, sizeof(*run_args) + size);
++}
++
++static int
++zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object,
++		uint64_t offset, uint64_t size, const void *buf)
++{
++	struct dmu_tx *tx;
++	int rc, how = TXG_WAIT;
++//	int flags = 0;
++
++	if (run_args->flags & DMU_WRITE_NOWAIT)
++		how = TXG_NOWAIT;
++
++	while (1) {
++		tx = dmu_tx_create(os);
++		dmu_tx_hold_write(tx, object, offset, size);
++		rc = dmu_tx_assign(tx, how);
++
++		if (rc) {
++			if (rc == ERESTART && how == TXG_NOWAIT) {
++				dmu_tx_wait(tx);
++				dmu_tx_abort(tx);
++				continue;
++			}
++			zpios_print(run_args->file,
++				    "Error in dmu_tx_assign(), %d", rc);
++			dmu_tx_abort(tx);
++			return rc;
++		}
++		break;
++	}
++
++//	if (run_args->flags & DMU_WRITE_ZC)
++//		flags |= DMU_WRITE_ZEROCOPY;
++
++	dmu_write(os, object, offset, size, buf, tx);
++	dmu_tx_commit(tx);
++
++	return 0;
++}
++
++static int
++zpios_dmu_read(run_args_t *run_args, objset_t *os, uint64_t object,
++	       uint64_t offset, uint64_t size, void *buf)
++{
++	int flags = 0;
++
++//	if (run_args->flags & DMU_READ_ZC)
++//		flags |= DMU_READ_ZEROCOPY;
++
++	if (run_args->flags & DMU_READ_NOPF)
++		flags |= DMU_READ_NO_PREFETCH;
++
++	return dmu_read(os, object, offset, size, buf, flags);
++}
++
++static int
++zpios_thread_main(void *data)
++{
++	thread_data_t *thr = (thread_data_t *)data;
++	run_args_t *run_args = thr->run_args;
++	zpios_time_t t;
++	dmu_obj_t obj;
++	__u64 offset;
++	__u32 chunk_size;
++	zpios_region_t *region;
++	char *buf;
++	unsigned int random_int;
++	int chunk_noise = run_args->chunk_noise;
++	int chunk_noise_tmp = 0;
++	int thread_delay = run_args->thread_delay;
++	int thread_delay_tmp = 0;
++	int i, rc = 0;
++
++	if (chunk_noise) {
++		get_random_bytes(&random_int, sizeof(unsigned int));
++		chunk_noise_tmp = (random_int % (chunk_noise * 2))-chunk_noise;
++	}
++
++	/* It's OK to vmem_alloc() this memory because it will be copied
++	 * in to the slab and pointers to the slab copy will be setup in
++	 * the bio when the IO is submitted.  This of course is not ideal
++	 * since we want a zero-copy IO path if possible.  It would be nice
++	 * to have direct access to those slab entries.
++	 */
++	chunk_size = run_args->chunk_size + chunk_noise_tmp;
++	buf = (char *)vmem_alloc(chunk_size, KM_SLEEP);
++	ASSERT(buf);
++
++	/* Trivial data verification pattern for now. */
++	if (run_args->flags & DMU_VERIFY)
++		memset(buf, 'z', chunk_size);
++
++	/* Write phase */
++	mutex_enter(&thr->lock);
++	thr->stats.wr_time.start = zpios_timespec_now();
++	mutex_exit(&thr->lock);
++
++	while (zpios_get_work_item(run_args, &obj, &offset,
++				   &chunk_size, &region, DMU_WRITE)) {
++		if (thread_delay) {
++			get_random_bytes(&random_int, sizeof(unsigned int));
++			thread_delay_tmp = random_int % thread_delay;
++			set_current_state(TASK_UNINTERRUPTIBLE);
++			schedule_timeout(thread_delay_tmp); /* In jiffies */
++		}
++
++		t.start = zpios_timespec_now();
++		rc = zpios_dmu_write(run_args, obj.os, obj.obj,
++				     offset, chunk_size, buf);
++		t.stop  = zpios_timespec_now();
++		t.delta = zpios_timespec_sub(t.stop, t.start);
++
++		if (rc) {
++			zpios_print(run_args->file, "IO error while doing "
++				    "dmu_write(): %d\n", rc);
++			break;
++		}
++
++		mutex_enter(&thr->lock);
++		thr->stats.wr_data += chunk_size;
++		thr->stats.wr_chunks++;
++		thr->stats.wr_time.delta = zpios_timespec_add(
++		        thr->stats.wr_time.delta, t.delta);
++		mutex_exit(&thr->lock);
++
++		mutex_enter(&region->lock);
++		region->stats.wr_data += chunk_size;
++		region->stats.wr_chunks++;
++		region->stats.wr_time.delta = zpios_timespec_add(
++		        region->stats.wr_time.delta, t.delta);
++
++		/* First time region was accessed */
++		if (region->init_offset == offset)
++			region->stats.wr_time.start = t.start;
++
++		mutex_exit(&region->lock);
++	}
++
++	mutex_enter(&run_args->lock_ctl);
++	run_args->threads_done++;
++	mutex_exit(&run_args->lock_ctl);
++
++	mutex_enter(&thr->lock);
++	thr->rc = rc;
++	thr->stats.wr_time.stop = zpios_timespec_now();
++	mutex_exit(&thr->lock);
++	wake_up(&run_args->waitq);
++
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	schedule();
++
++	/* Check if we should exit */
++	mutex_enter(&thr->lock);
++	rc = thr->rc;
++	mutex_exit(&thr->lock);
++	if (rc)
++		goto out;
++
++	/* Read phase */
++	mutex_enter(&thr->lock);
++	thr->stats.rd_time.start = zpios_timespec_now();
++	mutex_exit(&thr->lock);
++
++	while (zpios_get_work_item(run_args, &obj, &offset,
++				   &chunk_size, &region, DMU_READ)) {
++		if (thread_delay) {
++			get_random_bytes(&random_int, sizeof(unsigned int));
++			thread_delay_tmp = random_int % thread_delay;
++			set_current_state(TASK_UNINTERRUPTIBLE);
++			schedule_timeout(thread_delay_tmp); /* In jiffies */
++		}
++
++		if (run_args->flags & DMU_VERIFY)
++			memset(buf, 0, chunk_size);
++
++		t.start = zpios_timespec_now();
++		rc = zpios_dmu_read(run_args, obj.os, obj.obj,
++				    offset, chunk_size, buf);
++		t.stop  = zpios_timespec_now();
++		t.delta = zpios_timespec_sub(t.stop, t.start);
++
++		if (rc) {
++			zpios_print(run_args->file, "IO error while doing "
++				    "dmu_read(): %d\n", rc);
++			break;
++		}
++
++		/* Trivial data verification, expensive! */
++		if (run_args->flags & DMU_VERIFY) {
++			for (i = 0; i < chunk_size; i++) {
++				if (buf[i] != 'z') {
++					zpios_print(run_args->file,
++						    "IO verify error: %d/%d/%d\n",
++					            (int)obj.obj, (int)offset,
++					            (int)chunk_size);
++					break;
++				}
++			}
++		}
++
++		mutex_enter(&thr->lock);
++		thr->stats.rd_data += chunk_size;
++		thr->stats.rd_chunks++;
++		thr->stats.rd_time.delta = zpios_timespec_add(
++		        thr->stats.rd_time.delta, t.delta);
++		mutex_exit(&thr->lock);
++
++		mutex_enter(&region->lock);
++		region->stats.rd_data += chunk_size;
++		region->stats.rd_chunks++;
++		region->stats.rd_time.delta = zpios_timespec_add(
++		        region->stats.rd_time.delta, t.delta);
++
++		/* First time region was accessed */
++		if (region->init_offset == offset)
++			region->stats.rd_time.start = t.start;
++
++		mutex_exit(&region->lock);
++	}
++
++	mutex_enter(&run_args->lock_ctl);
++	run_args->threads_done++;
++	mutex_exit(&run_args->lock_ctl);
++
++	mutex_enter(&thr->lock);
++	thr->rc = rc;
++	thr->stats.rd_time.stop = zpios_timespec_now();
++	mutex_exit(&thr->lock);
++	wake_up(&run_args->waitq);
++
++out:
++	vmem_free(buf, chunk_size);
++	do_exit(0);
++
++	return rc; /* Unreachable, due to do_exit() */
++}
++
++static int
++zpios_thread_done(run_args_t *run_args)
++{
++	ASSERT(run_args->threads_done <= run_args->thread_count);
++	return (run_args->threads_done == run_args->thread_count);
++}
++
++static int
++zpios_threads_run(run_args_t *run_args)
++{
++	struct task_struct *tsk, **tsks;
++	thread_data_t *thr = NULL;
++	zpios_time_t *tt = &(run_args->stats.total_time);
++	zpios_time_t *tw = &(run_args->stats.wr_time);
++	zpios_time_t *tr = &(run_args->stats.rd_time);
++	int i, rc = 0, tc = run_args->thread_count;
++
++	tsks = kmem_zalloc(sizeof(struct task_struct *) * tc, KM_SLEEP);
++	if (tsks == NULL) {
++		rc = -ENOMEM;
++		goto cleanup2;
++	}
++
++	run_args->threads = kmem_zalloc(sizeof(thread_data_t *) * tc, KM_SLEEP);
++	if (run_args->threads == NULL) {
++		rc = -ENOMEM;
++		goto cleanup;
++	}
++
++	init_waitqueue_head(&run_args->waitq);
++	run_args->threads_done = 0;
++
++	/* Create all the needed threads which will sleep until awoken */
++	for (i = 0; i < tc; i++) {
++		thr = kmem_zalloc(sizeof(thread_data_t), KM_SLEEP);
++		if (thr == NULL) {
++			rc = -ENOMEM;
++			goto taskerr;
++		}
++
++		thr->thread_no = i;
++		thr->run_args = run_args;
++		thr->rc = 0;
++		mutex_init(&thr->lock, NULL, MUTEX_DEFAULT, NULL);
++		run_args->threads[i] = thr;
++
++		tsk = kthread_create(zpios_thread_main, (void *)thr,
++		                     "%s/%d", "zpios_io", i);
++		if (IS_ERR(tsk)) {
++			rc = -EINVAL;
++			goto taskerr;
++		}
++
++		tsks[i] = tsk;
++	}
++
++	tt->start = zpios_timespec_now();
++
++	/* Wake up all threads for write phase */
++	(void)zpios_upcall(run_args->pre, PHASE_PRE_WRITE, run_args, 0);
++	for (i = 0; i < tc; i++)
++		wake_up_process(tsks[i]);
++
++	/* Wait for write phase to complete */
++	tw->start = zpios_timespec_now();
++	wait_event(run_args->waitq, zpios_thread_done(run_args));
++	tw->stop = zpios_timespec_now();
++	(void)zpios_upcall(run_args->post, PHASE_POST_WRITE, run_args, rc);
++
++	for (i = 0; i < tc; i++) {
++		thr = run_args->threads[i];
++
++		mutex_enter(&thr->lock);
++
++		if (!rc && thr->rc)
++			rc = thr->rc;
++
++		run_args->stats.wr_data += thr->stats.wr_data;
++		run_args->stats.wr_chunks += thr->stats.wr_chunks;
++		mutex_exit(&thr->lock);
++	}
++
++	if (rc) {
++		/* Wake up all threads and tell them to exit */
++		for (i = 0; i < tc; i++) {
++			mutex_enter(&thr->lock);
++			thr->rc = rc;
++			mutex_exit(&thr->lock);
++
++			wake_up_process(tsks[i]);
++		}
++		goto out;
++	}
++
++	mutex_enter(&run_args->lock_ctl);
++	ASSERT(run_args->threads_done == run_args->thread_count);
++	run_args->threads_done = 0;
++	mutex_exit(&run_args->lock_ctl);
++
++	/* Wake up all threads for read phase */
++	(void)zpios_upcall(run_args->pre, PHASE_PRE_READ, run_args, 0);
++        for (i = 0; i < tc; i++)
++		wake_up_process(tsks[i]);
++
++	/* Wait for read phase to complete */
++	tr->start = zpios_timespec_now();
++	wait_event(run_args->waitq, zpios_thread_done(run_args));
++	tr->stop = zpios_timespec_now();
++	(void)zpios_upcall(run_args->post, PHASE_POST_READ, run_args, rc);
++
++	for (i = 0; i < tc; i++) {
++		thr = run_args->threads[i];
++
++		mutex_enter(&thr->lock);
++
++		if (!rc && thr->rc)
++			rc = thr->rc;
++
++		run_args->stats.rd_data += thr->stats.rd_data;
++		run_args->stats.rd_chunks += thr->stats.rd_chunks;
++		mutex_exit(&thr->lock);
++	}
++out:
++	tt->stop  = zpios_timespec_now();
++	tt->delta = zpios_timespec_sub(tt->stop, tt->start);
++	tw->delta = zpios_timespec_sub(tw->stop, tw->start);
++	tr->delta = zpios_timespec_sub(tr->stop, tr->start);
++
++cleanup:
++	kmem_free(tsks, sizeof(struct task_struct *) * tc);
++cleanup2:
++	/* Returns first encountered thread error (if any) */
++	return rc;
++
++taskerr:
++	/* Destroy all threads that were created successfully */
++	for (i = 0; i < tc; i++)
++		if (tsks[i] != NULL)
++			(void) kthread_stop(tsks[i]);
++
++	goto cleanup;
++}
++
++static int
++zpios_do_one_run(struct file *file, zpios_cmd_t *kcmd,
++                 int data_size, void *data)
++{
++	run_args_t *run_args = { 0 };
++	zpios_stats_t *stats = (zpios_stats_t *)data;
++	int i, n, m, size, rc;
++
++	if ((!kcmd->cmd_chunk_size) || (!kcmd->cmd_region_size) ||
++	    (!kcmd->cmd_thread_count) || (!kcmd->cmd_region_count)) {
++		zpios_print(file, "Invalid chunk_size, region_size, "
++			    "thread_count, or region_count, %d\n", -EINVAL);
++		return -EINVAL;
++	}
++
++	if (!(kcmd->cmd_flags & DMU_WRITE) ||
++	    !(kcmd->cmd_flags & DMU_READ)) {
++		zpios_print(file, "Invalid flags, minimally DMU_WRITE "
++			    "and DMU_READ must be set, %d\n", -EINVAL);
++		return -EINVAL;
++	}
++
++	if ((kcmd->cmd_flags & (DMU_WRITE_ZC | DMU_READ_ZC)) &&
++	    (kcmd->cmd_flags & DMU_VERIFY)) {
++		zpios_print(file, "Invalid flags, DMU_*_ZC incompatible "
++			    "with DMU_VERIFY, used for performance analysis "
++			    "only, %d\n", -EINVAL);
++		return -EINVAL;
++	}
++
++	/* Opaque data on return contains structs of the following form:
++	 *
++	 * zpios_stat_t stats[];
++	 * stats[0]     = run_args->stats;
++	 * stats[1-N]   = threads[N]->stats;
++	 * stats[N+1-M] = regions[M]->stats;
++	 *
++	 * Where N is the number of threads, and M is the number of regions.
++	 */
++	size = (sizeof(zpios_stats_t) +
++	       (kcmd->cmd_thread_count * sizeof(zpios_stats_t)) +
++	       (kcmd->cmd_region_count * sizeof(zpios_stats_t)));
++	if (data_size < size) {
++		zpios_print(file, "Invalid size, command data buffer "
++			    "size too small, (%d < %d)\n", data_size, size);
++		return -ENOSPC;
++	}
++
++	rc = zpios_setup_run(&run_args, kcmd, file);
++	if (rc)
++		return rc;
++
++        rc = zpios_threads_run(run_args);
++	zpios_remove_objset(run_args);
++	if (rc)
++		goto cleanup;
++
++	if (stats) {
++		n = 1;
++		m = 1 + kcmd->cmd_thread_count;
++		stats[0] = run_args->stats;
++
++		for (i = 0; i < kcmd->cmd_thread_count; i++)
++			stats[n+i] = run_args->threads[i]->stats;
++
++		for (i = 0; i < kcmd->cmd_region_count; i++)
++			stats[m+i] = run_args->regions[i].stats;
++	}
++
++cleanup:
++        zpios_cleanup_run(run_args);
++
++	(void)zpios_upcall(kcmd->cmd_post, PHASE_POST_RUN, run_args, 0);
++
++	return rc;
++}
++
++static int
++zpios_open(struct inode *inode, struct file *file)
++{
++	unsigned int minor = iminor(inode);
++	zpios_info_t *info;
++
++	if (minor >= ZPIOS_MINORS)
++		return -ENXIO;
++
++	info = (zpios_info_t *)kmem_alloc(sizeof(*info), KM_SLEEP);
++	if (info == NULL)
++		return -ENOMEM;
++
++	spin_lock_init(&info->info_lock);
++	info->info_size = ZPIOS_INFO_BUFFER_SIZE;
++	info->info_buffer = (char *)vmem_alloc(ZPIOS_INFO_BUFFER_SIZE,KM_SLEEP);
++	if (info->info_buffer == NULL) {
++		kmem_free(info, sizeof(*info));
++		return -ENOMEM;
++	}
++
++	info->info_head = info->info_buffer;
++	file->private_data = (void *)info;
++
++        return 0;
++}
++
++static int
++zpios_release(struct inode *inode, struct file *file)
++{
++	unsigned int minor = iminor(inode);
++	zpios_info_t *info = (zpios_info_t *)file->private_data;
++
++	if (minor >= ZPIOS_MINORS)
++		return -ENXIO;
++
++	ASSERT(info);
++	ASSERT(info->info_buffer);
++
++	vmem_free(info->info_buffer, ZPIOS_INFO_BUFFER_SIZE);
++	kmem_free(info, sizeof(*info));
++
++	return 0;
++}
++
++static int
++zpios_buffer_clear(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
++{
++	zpios_info_t *info = (zpios_info_t *)file->private_data;
++
++	ASSERT(info);
++	ASSERT(info->info_buffer);
++
++	spin_lock(&info->info_lock);
++	memset(info->info_buffer, 0, info->info_size);
++	info->info_head = info->info_buffer;
++	spin_unlock(&info->info_lock);
++
++	return 0;
++}
++
++static int
++zpios_buffer_size(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
++{
++	zpios_info_t *info = (zpios_info_t *)file->private_data;
++	char *buf;
++	int min, size, rc = 0;
++
++	ASSERT(info);
++	ASSERT(info->info_buffer);
++
++	spin_lock(&info->info_lock);
++	if (kcfg->cfg_arg1 > 0) {
++
++		size = kcfg->cfg_arg1;
++		buf = (char *)vmem_alloc(size, KM_SLEEP);
++		if (buf == NULL) {
++			rc = -ENOMEM;
++			goto out;
++		}
++
++		/* Zero fill and truncate contents when coping buffer */
++		min = ((size < info->info_size) ? size : info->info_size);
++		memset(buf, 0, size);
++		memcpy(buf, info->info_buffer, min);
++		vmem_free(info->info_buffer, info->info_size);
++		info->info_size = size;
++		info->info_buffer = buf;
++		info->info_head = info->info_buffer;
++	}
++
++	kcfg->cfg_rc1 = info->info_size;
++
++	if (copy_to_user((struct zpios_cfg_t __user *)arg, kcfg, sizeof(*kcfg)))
++		rc = -EFAULT;
++out:
++	spin_unlock(&info->info_lock);
++
++	return rc;
++}
++
++static int
++zpios_ioctl_cfg(struct file *file, unsigned long arg)
++{
++	zpios_cfg_t kcfg;
++	int rc = 0;
++
++	if (copy_from_user(&kcfg, (zpios_cfg_t *)arg, sizeof(kcfg)))
++		return -EFAULT;
++
++	if (kcfg.cfg_magic != ZPIOS_CFG_MAGIC) {
++		zpios_print(file, "Bad config magic 0x%x != 0x%x\n",
++		            kcfg.cfg_magic, ZPIOS_CFG_MAGIC);
++		return -EINVAL;
++	}
++
++	switch (kcfg.cfg_cmd) {
++		case ZPIOS_CFG_BUFFER_CLEAR:
++			/* cfg_arg1 - Unused
++			 * cfg_rc1  - Unused
++			 */
++			rc = zpios_buffer_clear(file, &kcfg, arg);
++			break;
++		case ZPIOS_CFG_BUFFER_SIZE:
++			/* cfg_arg1 - 0 - query size; >0 resize
++			 * cfg_rc1  - Set to current buffer size
++			 */
++			rc = zpios_buffer_size(file, &kcfg, arg);
++			break;
++		default:
++			zpios_print(file, "Bad config command %d\n",
++				    kcfg.cfg_cmd);
++			rc = -EINVAL;
++			break;
++	}
++
++	return rc;
++}
++
++static int
++zpios_ioctl_cmd(struct file *file, unsigned long arg)
++{
++	zpios_cmd_t *kcmd;
++	void *data = NULL;
++	int rc = -EINVAL;
++
++	kcmd = kmem_alloc(sizeof(zpios_cmd_t), KM_SLEEP);
++	if (kcmd == NULL) {
++		zpios_print(file, "Unable to kmem_alloc() %ld byte for "
++			    "zpios_cmd_t\n", (long int)sizeof(zpios_cmd_t));
++		return -ENOMEM;
++	}
++
++	rc = copy_from_user(kcmd, (zpios_cfg_t *)arg, sizeof(zpios_cmd_t));
++	if (rc) {
++		zpios_print(file, "Unable to copy command structure "
++			    "from user to kernel memory, %d\n", rc);
++		goto out_cmd;
++	}
++
++	if (kcmd->cmd_magic != ZPIOS_CMD_MAGIC) {
++		zpios_print(file, "Bad command magic 0x%x != 0x%x\n",
++		            kcmd->cmd_magic, ZPIOS_CFG_MAGIC);
++		rc = -EINVAL;
++		goto out_cmd;
++	}
++
++	/* Allocate memory for any opaque data the caller needed to pass on */
++	if (kcmd->cmd_data_size > 0) {
++		data = (void *)vmem_alloc(kcmd->cmd_data_size, KM_SLEEP);
++		if (data == NULL) {
++			zpios_print(file, "Unable to vmem_alloc() %ld "
++				    "bytes for data buffer\n",
++				    (long)kcmd->cmd_data_size);
++			rc = -ENOMEM;
++			goto out_cmd;
++		}
++
++		rc = copy_from_user(data, (void *)(arg + offsetof(zpios_cmd_t,
++		                    cmd_data_str)), kcmd->cmd_data_size);
++		if (rc) {
++			zpios_print(file, "Unable to copy data buffer "
++				    "from user to kernel memory, %d\n", rc);
++			goto out_data;
++		}
++	}
++
++	rc = zpios_do_one_run(file, kcmd, kcmd->cmd_data_size, data);
++
++	if (data != NULL) {
++		/* If the test failed do not print out the stats */
++		if (rc)
++			goto out_data;
++
++		rc = copy_to_user((void *)(arg + offsetof(zpios_cmd_t,
++		                  cmd_data_str)), data, kcmd->cmd_data_size);
++		if (rc) {
++			zpios_print(file, "Unable to copy data buffer "
++				    "from kernel to user memory, %d\n", rc);
++			rc = -EFAULT;
++		}
++
++out_data:
++		vmem_free(data, kcmd->cmd_data_size);
++	}
++out_cmd:
++	kmem_free(kcmd, sizeof(zpios_cmd_t));
++
++	return rc;
++}
++
++static long
++zpios_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++        unsigned int minor = iminor(file->f_dentry->d_inode);
++	int rc = 0;
++
++	/* Ignore tty ioctls */
++	if ((cmd & 0xffffff00) == ((int)'T') << 8)
++		return -ENOTTY;
++
++	if (minor >= ZPIOS_MINORS)
++		return -ENXIO;
++
++	switch (cmd) {
++		case ZPIOS_CFG:
++			rc = zpios_ioctl_cfg(file, arg);
++			break;
++		case ZPIOS_CMD:
++			rc = zpios_ioctl_cmd(file, arg);
++			break;
++		default:
++			zpios_print(file, "Bad ioctl command %d\n", cmd);
++			rc = -EINVAL;
++			break;
++	}
++
++	return rc;
++}
++
++#ifdef CONFIG_COMPAT
++/* Compatibility handler for ioctls from 32-bit ELF binaries */
++static long
++zpios_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	return zpios_unlocked_ioctl(file, cmd, arg);
++}
++#endif /* CONFIG_COMPAT */
++
++/* I'm not sure why you would want to write in to this buffer from
++ * user space since its principle use is to pass test status info
++ * back to the user space, but I don't see any reason to prevent it.
++ */
++static ssize_t
++zpios_write(struct file *file, const char __user *buf,
++            size_t count, loff_t *ppos)
++{
++        unsigned int minor = iminor(file->f_dentry->d_inode);
++	zpios_info_t *info = (zpios_info_t *)file->private_data;
++	int rc = 0;
++
++	if (minor >= ZPIOS_MINORS)
++		return -ENXIO;
++
++	ASSERT(info);
++	ASSERT(info->info_buffer);
++
++	spin_lock(&info->info_lock);
++
++	/* Write beyond EOF */
++	if (*ppos >= info->info_size) {
++		rc = -EFBIG;
++		goto out;
++	}
++
++	/* Resize count if beyond EOF */
++	if (*ppos + count > info->info_size)
++		count = info->info_size - *ppos;
++
++	if (copy_from_user(info->info_buffer, buf, count)) {
++		rc = -EFAULT;
++		goto out;
++	}
++
++	*ppos += count;
++	rc = count;
++out:
++	spin_unlock(&info->info_lock);
++	return rc;
++}
++
++static ssize_t
++zpios_read(struct file *file, char __user *buf,
++		        size_t count, loff_t *ppos)
++{
++        unsigned int minor = iminor(file->f_dentry->d_inode);
++	zpios_info_t *info = (zpios_info_t *)file->private_data;
++	int rc = 0;
++
++	if (minor >= ZPIOS_MINORS)
++		return -ENXIO;
++
++	ASSERT(info);
++	ASSERT(info->info_buffer);
++
++	spin_lock(&info->info_lock);
++
++	/* Read beyond EOF */
++	if (*ppos >= info->info_size)
++		goto out;
++
++	/* Resize count if beyond EOF */
++	if (*ppos + count > info->info_size)
++		count = info->info_size - *ppos;
++
++	if (copy_to_user(buf, info->info_buffer + *ppos, count)) {
++		rc = -EFAULT;
++		goto out;
++	}
++
++	*ppos += count;
++	rc = count;
++out:
++	spin_unlock(&info->info_lock);
++	return rc;
++}
++
++static loff_t zpios_seek(struct file *file, loff_t offset, int origin)
++{
++        unsigned int minor = iminor(file->f_dentry->d_inode);
++	zpios_info_t *info = (zpios_info_t *)file->private_data;
++	int rc = -EINVAL;
++
++	if (minor >= ZPIOS_MINORS)
++		return -ENXIO;
++
++	ASSERT(info);
++	ASSERT(info->info_buffer);
++
++	spin_lock(&info->info_lock);
++
++	switch (origin) {
++	case 0: /* SEEK_SET - No-op just do it */
++		break;
++	case 1: /* SEEK_CUR - Seek from current */
++		offset = file->f_pos + offset;
++		break;
++	case 2: /* SEEK_END - Seek from end */
++		offset = info->info_size + offset;
++		break;
++	}
++
++	if (offset >= 0) {
++		file->f_pos = offset;
++		file->f_version = 0;
++		rc = offset;
++	}
++
++	spin_unlock(&info->info_lock);
++
++	return rc;
++}
++
++static struct cdev zpios_cdev;
++static struct file_operations zpios_fops = {
++	.owner		= THIS_MODULE,
++	.open		= zpios_open,
++	.release	= zpios_release,
++	.unlocked_ioctl	= zpios_unlocked_ioctl,
++#ifdef CONFIG_COMPAT
++	.compat_ioctl	= zpios_compat_ioctl,
++#endif
++	.read		= zpios_read,
++	.write		= zpios_write,
++	.llseek		= zpios_seek,
++};
++
++static int
++zpios_init(void)
++{
++	dev_t dev;
++	int rc;
++
++	dev = MKDEV(ZPIOS_MAJOR, 0);
++	if ((rc = register_chrdev_region(dev, ZPIOS_MINORS, ZPIOS_NAME)))
++		goto error;
++
++	/* Support for registering a character driver */
++	cdev_init(&zpios_cdev, &zpios_fops);
++	zpios_cdev.owner = THIS_MODULE;
++	kobject_set_name(&zpios_cdev.kobj, ZPIOS_NAME);
++	if ((rc = cdev_add(&zpios_cdev, dev, ZPIOS_MINORS))) {
++		printk(KERN_ERR "ZPIOS: Error adding cdev, %d\n", rc);
++		kobject_put(&zpios_cdev.kobj);
++		unregister_chrdev_region(dev, ZPIOS_MINORS);
++		goto error;
++	}
++
++	/* Support for udev make driver info available in sysfs */
++	zpios_class = spl_class_create(THIS_MODULE, ZPIOS_NAME);
++	if (IS_ERR(zpios_class)) {
++		rc = PTR_ERR(zpios_class);
++		printk(KERN_ERR "ZPIOS: Error creating zpios class, %d\n", rc);
++		cdev_del(&zpios_cdev);
++		unregister_chrdev_region(dev, ZPIOS_MINORS);
++		goto error;
++	}
++
++	zpios_device = spl_device_create(zpios_class, NULL,
++					 dev, NULL, ZPIOS_NAME);
++	return 0;
++error:
++	printk(KERN_ERR "ZPIOS: Error registering zpios device, %d\n", rc);
++	return rc;
++}
++
++static int
++zpios_fini(void)
++{
++	dev_t dev = MKDEV(ZPIOS_MAJOR, 0);
++
++	spl_device_destroy(zpios_class, zpios_device, dev);
++	spl_class_destroy(zpios_class);
++	cdev_del(&zpios_cdev);
++	unregister_chrdev_region(dev, ZPIOS_MINORS);
++
++	return 0;
++}
++
++spl_module_init(zpios_init);
++spl_module_exit(zpios_fini);
++
++MODULE_AUTHOR("LLNL / Sun");
++MODULE_DESCRIPTION("Kernel PIOS implementation");
++MODULE_LICENSE("GPL");
+diff -uNr linux-3.2.33-go.orig/include/spl/fs/fs_subr.h linux-3.2.33-go/include/spl/fs/fs_subr.h
+--- linux-3.2.33-go.orig/include/spl/fs/fs_subr.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/fs/fs_subr.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_FS_FS_SUBR_H
++#define _SPL_FS_FS_SUBR_H
++
++#endif /* SPL_FS_FS_SUBR_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/bitops_compat.h linux-3.2.33-go/include/spl/linux/bitops_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/bitops_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/bitops_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,43 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_BITOPS_COMPAT_H
++#define _SPL_BITOPS_COMPAT_H
++
++#include <linux/bitops.h>
++
++#ifndef HAVE_FLS64
++
++static inline int fls64(__u64 x)
++{
++       __u32 h = x >> 32;
++       if (h)
++               return fls(h) + 32;
++       return fls(x);
++}
++
++#endif /* HAVE_FLS64 */
++
++#endif /* _SPL_BITOPS_COMPAT_H */
++
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/compiler_compat.h linux-3.2.33-go/include/spl/linux/compiler_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/compiler_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/compiler_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,47 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_COMPILER_COMPAT_H
++#define _SPL_COMPILER_COMPAT_H
++
++#include <linux/compiler.h>
++
++#ifndef ACCESS_ONCE
++/*
++ * Prevent the compiler from merging or refetching accesses.  The compiler
++ * is also forbidden from reordering successive instances of ACCESS_ONCE(),
++ * but only when the compiler is aware of some particular ordering.  One way
++ * to make the compiler aware of ordering is to put the two invocations of
++ * ACCESS_ONCE() in different C statements.
++ *
++ * This macro does absolutely -nothing- to prevent the CPU from reordering,
++ * merging, or refetching absolutely anything at any time.  Its main intended
++ * use is to mediate communication between process-level code and irq/NMI
++ * handlers, all running on the same CPU.
++ */
++/* Taken from 2.6.33.2 */
++# define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
++#endif
++
++#endif /* _SPL_COMPILER_COMPAT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/file_compat.h linux-3.2.33-go/include/spl/linux/file_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/file_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/file_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,93 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_FILE_COMPAT_H
++#define _SPL_FILE_COMPAT_H
++
++#include <linux/fs.h>
++#ifdef HAVE_FDTABLE_HEADER
++#include <linux/fdtable.h>
++#endif
++
++static inline struct file *
++spl_filp_open(const char *name, int flags, int mode, int *err)
++{
++        struct file *filp = NULL;
++        int rc;
++
++        filp = filp_open(name, flags, mode);
++        if (IS_ERR(filp)) {
++                rc = PTR_ERR(filp);
++                if (err)
++                        *err = rc;
++                filp = NULL;
++        }
++        return filp;
++}
++
++#define spl_filp_close(f)		filp_close(f, NULL)
++#define spl_filp_poff(f)		(&(f)->f_pos)
++#define spl_filp_write(fp, b, s, p)	(fp)->f_op->write((fp), (b), (s), p)
++
++#ifdef HAVE_VFS_FSYNC
++# ifdef HAVE_2ARGS_VFS_FSYNC
++#  define spl_filp_fsync(fp, sync)	vfs_fsync(fp, sync)
++# else
++#  define spl_filp_fsync(fp, sync)	vfs_fsync(fp, (fp)->f_dentry, sync)
++# endif /* HAVE_2ARGS_VFS_FSYNC */
++#else
++# include <linux/buffer_head.h>
++# define spl_filp_fsync(fp, sync)	file_fsync(fp, (fp)->f_dentry, sync)
++#endif /* HAVE_VFS_FSYNC */
++
++#ifdef HAVE_INODE_I_MUTEX
++#define spl_inode_lock(ip)		(mutex_lock(&(ip)->i_mutex))
++#define spl_inode_lock_nested(ip, type)	(mutex_lock_nested((&(ip)->i_mutex),  \
++					(type)))
++#define spl_inode_unlock(ip)		(mutex_unlock(&(ip)->i_mutex))
++#else
++#define spl_inode_lock(ip)		(down(&(ip)->i_sem))
++#define spl_inode_unlock(ip)		(up(&(ip)->i_sem))
++#endif /* HAVE_INODE_I_MUTEX */
++
++#ifdef HAVE_KERN_PATH_PARENT_HEADER
++# ifndef HAVE_KERN_PATH_PARENT_SYMBOL
++typedef int (*kern_path_parent_t)(const char *, struct nameidata *);
++extern kern_path_parent_t kern_path_parent_fn;
++#  define spl_kern_path_parent(path, nd)	kern_path_parent_fn(path, nd)
++# else
++#  define spl_kern_path_parent(path, nd)	kern_path_parent(path, nd)
++# endif /* HAVE_KERN_PATH_PARENT_SYMBOL */
++#else
++# define spl_kern_path_parent(path, nd)	path_lookup(path, LOOKUP_PARENT, nd)
++#endif /* HAVE_KERN_PATH_PARENT_HEADER */
++
++#ifdef HAVE_KERN_PATH_LOCKED
++typedef struct dentry * (*kern_path_locked_t)(const char *, struct path *);
++extern kern_path_locked_t kern_path_locked_fn;
++# define spl_kern_path_locked(name, path)	kern_path_locked_fn(name, path)
++#endif /* HAVE_KERN_PATH_LOCKED */
++
++#endif /* SPL_FILE_COMPAT_H */
++
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/kallsyms_compat.h linux-3.2.33-go/include/spl/linux/kallsyms_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/kallsyms_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/kallsyms_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,43 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_KALLSYMS_COMPAT_H
++#define _SPL_KALLSYMS_COMPAT_H
++
++#define SYMBOL_POISON ((void*)0xabcddcba)
++
++#ifdef HAVE_KALLSYMS_LOOKUP_NAME
++
++#include <linux/kallsyms.h>
++#define spl_kallsyms_lookup_name(name) kallsyms_lookup_name(name)
++
++#else
++
++typedef unsigned long (*kallsyms_lookup_name_t)(const char *);
++extern kallsyms_lookup_name_t spl_kallsyms_lookup_name_fn;
++#define spl_kallsyms_lookup_name(name) spl_kallsyms_lookup_name_fn(name)
++
++#endif /* HAVE_KALLSYMS_LOOKUP_NAME */
++
++#endif /* _SPL_KALLSYMS_COMPAT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/list_compat.h linux-3.2.33-go/include/spl/linux/list_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/list_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/list_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,51 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_LIST_COMPAT_H
++#define _SPL_LIST_COMPAT_H
++
++#include <linux/list.h>
++
++#ifndef list_for_each_entry_safe_reverse
++
++/**
++ * list_for_each_entry_safe_reverse
++ * @pos:        the type * to use as a loop cursor.
++ * @n:          another type * to use as temporary storage
++ * @head:       the head for your list.
++ * @member:     the name of the list_struct within the struct.
++ *
++ * Iterate backwards over list of given type, safe against removal
++ * of list entry.
++ */
++#define list_for_each_entry_safe_reverse(pos, n, head, member)          \
++        for (pos = list_entry((head)->prev, typeof(*pos), member),      \
++                n = list_entry(pos->member.prev, typeof(*pos), member); \
++             &pos->member != (head);                                    \
++             pos = n, n = list_entry(n->member.prev, typeof(*n), member))
++
++#endif /* list_for_each_entry_safe_reverse */
++
++#endif /* SPL_LIST_COMPAT_H */
++
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/math64_compat.h linux-3.2.33-go/include/spl/linux/math64_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/math64_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/math64_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,32 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_MATH64_COMPAT_H
++#define _SPL_MATH64_COMPAT_H
++
++#ifndef abs64
++#define abs64(x)	({ uint64_t t = (x) >> 63; ((x) ^ t) - t; })
++#endif
++
++#endif /* _SPL_MATH64_COMPAT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/mm_compat.h linux-3.2.33-go/include/spl/linux/mm_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/mm_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/mm_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,282 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_MM_COMPAT_H
++#define _SPL_MM_COMPAT_H
++
++#include <linux/mm.h>
++#include <linux/fs.h>
++
++/*
++ * Linux 2.6.31 API Change.
++ * Individual pages_{min,low,high} moved in to watermark array.
++ */
++#ifndef min_wmark_pages
++#define min_wmark_pages(z)	(z->pages_min)
++#endif
++
++#ifndef low_wmark_pages
++#define low_wmark_pages(z)	(z->pages_low)
++#endif
++
++#ifndef high_wmark_pages
++#define high_wmark_pages(z)	(z->pages_high)
++#endif
++
++/*
++ * 2.6.37 API compat,
++ * The function invalidate_inodes() is no longer exported by the kernel.
++ * The prototype however is still available which means it is safe
++ * to acquire the symbol's address using spl_kallsyms_lookup_name().
++ *
++ * 2.6.39 API compat,
++ * As for 2.6.39 invalidate_inodes() was updated to take a second
++ * argument which controls how dirty inodes should be handled.
++ */
++#if defined(HAVE_INVALIDATE_INODES) || defined(HAVE_INVALIDATE_INODES_CHECK)
++# ifdef HAVE_2ARGS_INVALIDATE_INODES
++#  define spl_invalidate_inodes(sb, kd)	invalidate_inodes(sb, kd)
++# else
++#  define spl_invalidate_inodes(sb, kd)	invalidate_inodes(sb)
++# endif /* HAVE_2ARGS_INVALIDATE_INODES */
++#else
++# ifdef HAVE_2ARGS_INVALIDATE_INODES
++typedef int (*invalidate_inodes_t)(struct super_block *sb, bool kd);
++extern invalidate_inodes_t invalidate_inodes_fn;
++#  define spl_invalidate_inodes(sb, kd)	invalidate_inodes_fn(sb, kd)
++# else
++typedef int (*invalidate_inodes_t)(struct super_block *sb);
++extern invalidate_inodes_t invalidate_inodes_fn;
++#  define spl_invalidate_inodes(sb, kd)	invalidate_inodes_fn(sb)
++# endif /* HAVE_2ARGS_INVALIDATE_INODES */
++#endif /* HAVE_INVALIDATE_INODES || HAVE_INVALIDATE_INODES_CHECK */
++
++#if !defined(HAVE_SHRINK_CONTROL_STRUCT)
++struct shrink_control {
++	gfp_t gfp_mask;
++	unsigned long nr_to_scan;
++};
++#endif /* HAVE_SHRINK_CONTROL_STRUCT */
++
++/*
++ * 2.6.xx API compat,
++ * There currently exists no exposed API to partially shrink the dcache.
++ * The expected mechanism to shrink the cache is a registered shrinker
++ * which is called during memory pressure.
++ */
++#ifndef HAVE_SHRINK_DCACHE_MEMORY
++# if defined(HAVE_SHRINK_CONTROL_STRUCT)
++typedef int (*shrink_dcache_memory_t)(struct shrinker *,
++    struct shrink_control *);
++extern shrink_dcache_memory_t shrink_dcache_memory_fn;
++#  define shrink_dcache_memory(nr, gfp)                                      \
++({                                                                           \
++	struct shrink_control sc = { .nr_to_scan = nr, .gfp_mask = gfp };    \
++	int __ret__ = 0;                                                     \
++                                                                             \
++	if (shrink_dcache_memory_fn)                                         \
++		__ret__ = shrink_dcache_memory_fn(NULL, &sc);                \
++                                                                             \
++	__ret__;                                                             \
++})
++# elif defined(HAVE_3ARGS_SHRINKER_CALLBACK)
++typedef int (*shrink_dcache_memory_t)(struct shrinker *, int, gfp_t);
++extern shrink_dcache_memory_t shrink_dcache_memory_fn;
++#  define shrink_dcache_memory(nr, gfp)                                      \
++({                                                                           \
++	int __ret__ = 0;                                                     \
++                                                                             \
++	if (shrink_dcache_memory_fn)                                         \
++		__ret__ = shrink_dcache_memory_fn(NULL, nr, gfp);            \
++                                                                             \
++	__ret__;                                                             \
++})
++# else
++typedef int (*shrink_dcache_memory_t)(int, gfp_t);
++extern shrink_dcache_memory_t shrink_dcache_memory_fn;
++#  define shrink_dcache_memory(nr, gfp)                                      \
++({                                                                           \
++	int __ret__ = 0;                                                     \
++                                                                             \
++	if (shrink_dcache_memory_fn)                                         \
++		__ret__ = shrink_dcache_memory_fn(nr, gfp);                  \
++                                                                             \
++	__ret__;                                                             \
++})
++# endif /* HAVE_3ARGS_SHRINKER_CALLBACK */
++#endif /* HAVE_SHRINK_DCACHE_MEMORY */
++
++/*
++ * 2.6.xx API compat,
++ * There currently exists no exposed API to partially shrink the icache.
++ * The expected mechanism to shrink the cache is a registered shrinker
++ * which is called during memory pressure.
++ */
++#ifndef HAVE_SHRINK_ICACHE_MEMORY
++# if defined(HAVE_SHRINK_CONTROL_STRUCT)
++typedef int (*shrink_icache_memory_t)(struct shrinker *,
++    struct shrink_control *);
++extern shrink_icache_memory_t shrink_icache_memory_fn;
++#  define shrink_icache_memory(nr, gfp)                                      \
++({                                                                           \
++	struct shrink_control sc = { .nr_to_scan = nr, .gfp_mask = gfp };    \
++	int __ret__ = 0;                                                     \
++                                                                             \
++	if (shrink_icache_memory_fn)                                         \
++		__ret__ = shrink_icache_memory_fn(NULL, &sc);                \
++                                                                             \
++	__ret__;                                                             \
++})
++# elif defined(HAVE_3ARGS_SHRINKER_CALLBACK)
++typedef int (*shrink_icache_memory_t)(struct shrinker *, int, gfp_t);
++extern shrink_icache_memory_t shrink_icache_memory_fn;
++#  define shrink_icache_memory(nr, gfp)                                      \
++({                                                                           \
++	int __ret__ = 0;                                                     \
++                                                                             \
++	if (shrink_icache_memory_fn)                                         \
++		__ret__ = shrink_icache_memory_fn(NULL, nr, gfp);            \
++                                                                             \
++	__ret__;                                                             \
++})
++# else
++typedef int (*shrink_icache_memory_t)(int, gfp_t);
++extern shrink_icache_memory_t shrink_icache_memory_fn;
++#  define shrink_icache_memory(nr, gfp)                                      \
++({                                                                           \
++	int __ret__ = 0;                                                     \
++                                                                             \
++	if (shrink_icache_memory_fn)                                         \
++		__ret__ = shrink_icache_memory_fn(nr, gfp);                  \
++                                                                             \
++	__ret__;                                                             \
++})
++# endif /* HAVE_3ARGS_SHRINKER_CALLBACK */
++#endif /* HAVE_SHRINK_ICACHE_MEMORY */
++
++/*
++ * Linux 2.6. - 2.6. Shrinker API Compatibility.
++ */
++#ifdef HAVE_SET_SHRINKER
++typedef struct spl_shrinker {
++	struct shrinker *shrinker;
++	shrinker_t fn;
++	int seeks;
++} spl_shrinker_t;
++
++static inline void
++spl_register_shrinker(spl_shrinker_t *ss)
++{
++	ss->shrinker = set_shrinker(ss->seeks, ss->fn);
++}
++
++static inline void
++spl_unregister_shrinker(spl_shrinker_t *ss)
++{
++	remove_shrinker(ss->shrinker);
++}
++
++# define SPL_SHRINKER_DECLARE(s, x, y)                                 \
++	static spl_shrinker_t s = {                                    \
++		.shrinker = NULL,                                      \
++		.fn = x,                                               \
++		.seeks = y                                             \
++	}
++
++# define SPL_SHRINKER_CALLBACK_FWD_DECLARE(fn)                         \
++	static int fn(int, unsigned int)
++# define SPL_SHRINKER_CALLBACK_WRAPPER(fn)                             \
++static int                                                             \
++fn(int nr_to_scan, unsigned int gfp_mask)                              \
++{                                                                      \
++	struct shrink_control sc;                                      \
++                                                                       \
++        sc.nr_to_scan = nr_to_scan;                                    \
++        sc.gfp_mask = gfp_mask;                                        \
++                                                                       \
++	return __ ## fn(NULL, &sc);                                    \
++}
++
++#else
++
++# define spl_register_shrinker(x)	register_shrinker(x)
++# define spl_unregister_shrinker(x)	unregister_shrinker(x)
++# define SPL_SHRINKER_DECLARE(s, x, y)                                 \
++	static struct shrinker s = {                                   \
++		.shrink = x,                                           \
++		.seeks = y                                             \
++	}
++
++/*
++ * Linux 2.6. - 2.6. Shrinker API Compatibility.
++ */
++# if defined(HAVE_SHRINK_CONTROL_STRUCT)
++#  define SPL_SHRINKER_CALLBACK_FWD_DECLARE(fn)                        \
++	static int fn(struct shrinker *, struct shrink_control *)
++#  define SPL_SHRINKER_CALLBACK_WRAPPER(fn)                            \
++static int                                                             \
++fn(struct shrinker *shrink, struct shrink_control *sc) {               \
++	return __ ## fn(shrink, sc);                                   \
++}
++
++/*
++ * Linux 2.6. - 2.6. Shrinker API Compatibility.
++ */
++# elif defined(HAVE_3ARGS_SHRINKER_CALLBACK)
++#  define SPL_SHRINKER_CALLBACK_FWD_DECLARE(fn)                       \
++	static int fn(struct shrinker *, int, unsigned int)
++#  define SPL_SHRINKER_CALLBACK_WRAPPER(fn)                           \
++static int                                                            \
++fn(struct shrinker *shrink, int nr_to_scan, unsigned int gfp_mask)    \
++{                                                                     \
++	struct shrink_control sc;                                     \
++                                                                      \
++        sc.nr_to_scan = nr_to_scan;                                   \
++        sc.gfp_mask = gfp_mask;                                       \
++                                                                      \
++	return __ ## fn(shrink, &sc);                                 \
++}
++
++/*
++ * Linux 2.6. - 2.6. Shrinker API Compatibility.
++ */
++# else
++#  define SPL_SHRINKER_CALLBACK_FWD_DECLARE(fn)                       \
++	static int fn(int, unsigned int)
++#  define SPL_SHRINKER_CALLBACK_WRAPPER(fn)                           \
++static int                                                            \
++fn(int nr_to_scan, unsigned int gfp_mask)                             \
++{                                                                     \
++	struct shrink_control sc;                                     \
++                                                                      \
++        sc.nr_to_scan = nr_to_scan;                                   \
++        sc.gfp_mask = gfp_mask;                                       \
++                                                                      \
++	return __ ## fn(NULL, &sc);                                   \
++}
++
++# endif
++#endif /* HAVE_SET_SHRINKER */
++
++#endif /* SPL_MM_COMPAT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/module_compat.h linux-3.2.33-go/include/spl/linux/module_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/module_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/module_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,59 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_MODULE_COMPAT_H
++#define _SPL_MODULE_COMPAT_H
++
++#include <linux/module.h>
++
++#define spl_module_init(init_fn)                                        \
++static int                                                              \
++spl_##init_fn(void)                                                     \
++{                                                                       \
++	int rc;                                                         \
++	                                                                \
++	spl_setup();                                                    \
++	rc = init_fn();                                                 \
++                                                                        \
++	return rc;                                                      \
++}                                                                       \
++                                                                        \
++module_init(spl_##init_fn)
++
++#define spl_module_exit(exit_fn)                                        \
++static void                                                             \
++spl_##exit_fn(void)                                                     \
++{                                                                       \
++	int rc;                                                         \
++                                                                        \
++	rc = exit_fn();                                                 \
++	spl_cleanup();                                                  \
++	if (rc)                                                         \
++		printk(KERN_ERR "SPL: Failure %d unloading "            \
++		       "dependent module\n", rc);                       \
++}                                                                       \
++                                                                        \
++module_exit(spl_##exit_fn)
++
++#endif /* _SPL_MODULE_COMPAT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/mutex_compat.h linux-3.2.33-go/include/spl/linux/mutex_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/mutex_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/mutex_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,36 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_MUTEX_COMPAT_H
++#define _SPL_MUTEX_COMPAT_H
++
++#include <linux/mutex.h>
++
++/* mutex_lock_nested() introduced in 2.6.18 */
++#ifndef HAVE_MUTEX_LOCK_NESTED
++# define mutex_lock_nested(lock, subclass)	mutex_lock(lock)
++#endif /* HAVE_MUTEX_LOCK_NESTED */
++
++#endif /* _SPL_MUTEX_COMPAT_H */
++
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/proc_compat.h linux-3.2.33-go/include/spl/linux/proc_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/proc_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/proc_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,53 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_PROC_H
++#define _SPL_PROC_H
++
++#include <linux/proc_fs.h>
++
++#ifdef CONFIG_SYSCTL
++#ifdef HAVE_2ARGS_REGISTER_SYSCTL
++#define spl_register_sysctl_table(t, a)	register_sysctl_table(t, a)
++#else
++#define spl_register_sysctl_table(t, a)	register_sysctl_table(t)
++#endif /* HAVE_2ARGS_REGISTER_SYSCTL */
++#define spl_unregister_sysctl_table(t)	unregister_sysctl_table(t)
++#endif /* CONFIG_SYSCTL */
++
++#ifdef HAVE_CTL_NAME
++#define CTL_NAME(cname)                 .ctl_name = (cname),
++#else
++#define CTL_NAME(cname)
++#endif
++
++extern struct proc_dir_entry *proc_spl_kstat;
++struct proc_dir_entry *proc_dir_entry_find(struct proc_dir_entry *root,
++					   const char *str);
++int proc_dir_entries(struct proc_dir_entry *root);
++
++int spl_proc_init(void);
++void spl_proc_fini(void);
++
++#endif /* SPL_PROC_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/rwsem_compat.h linux-3.2.33-go/include/spl/linux/rwsem_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/rwsem_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/rwsem_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,66 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_RWSEM_COMPAT_H
++#define _SPL_RWSEM_COMPAT_H
++
++#include <linux/rwsem.h>
++
++#if defined(RWSEM_SPINLOCK_IS_RAW)
++#define spl_rwsem_lock_irqsave(lk, fl)       raw_spin_lock_irqsave(lk, fl)
++#define spl_rwsem_unlock_irqrestore(lk, fl)  raw_spin_unlock_irqrestore(lk, fl)
++#define spl_rwsem_trylock_irqsave(lk, fl)    raw_spin_trylock_irqsave(lk, fl)
++#else
++#define spl_rwsem_lock_irqsave(lk, fl)       spin_lock_irqsave(lk, fl)
++#define spl_rwsem_unlock_irqrestore(lk, fl)  spin_unlock_irqrestore(lk, fl)
++#define spl_rwsem_trylock_irqsave(lk, fl)    spin_trylock_irqsave(lk, fl)
++#endif /* RWSEM_SPINLOCK_IS_RAW */
++
++/*
++ * Prior to Linux 2.6.33 there existed a race condition in rwsem_is_locked().
++ * The semaphore's activity was checked outside of the wait_lock which
++ * could result in some readers getting the incorrect activity value.
++ *
++ * When a kernel without this fix is detected the SPL takes responsibility
++ * for acquiring the wait_lock to avoid this race.
++ */
++#if defined(RWSEM_IS_LOCKED_TAKES_WAIT_LOCK)
++#define spl_rwsem_is_locked(rwsem)           rwsem_is_locked(rwsem)
++#else
++static inline int
++spl_rwsem_is_locked(struct rw_semaphore *rwsem)
++{
++	unsigned long flags;
++	int rc = 1;
++
++	if (spl_rwsem_trylock_irqsave(&rwsem->wait_lock, flags)) {
++		rc = rwsem_is_locked(rwsem);
++		spl_rwsem_unlock_irqrestore(&rwsem->wait_lock, flags);
++	}
++
++	return (rc);
++}
++#endif /* RWSEM_IS_LOCKED_TAKES_WAIT_LOCK */
++
++#endif /* _SPL_RWSEM_COMPAT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/smp_compat.h linux-3.2.33-go/include/spl/linux/smp_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/smp_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/smp_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,40 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_SMP_COMPAT_H
++#define _SPL_SMP_COMPAT_H
++
++#include <linux/smp.h>
++
++#ifdef HAVE_3ARGS_ON_EACH_CPU
++
++#define spl_on_each_cpu(func,info,wait)	on_each_cpu(func,info,wait)
++
++#else
++
++#define spl_on_each_cpu(func,info,wait)	on_each_cpu(func,info,0,wait)
++
++#endif /* HAVE_3ARGS_ON_EACH_CPU */
++
++#endif /* _SPL_SMP_COMPAT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/sysctl_compat.h linux-3.2.33-go/include/spl/linux/sysctl_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/sysctl_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/sysctl_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,96 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_SYSCTL_COMPAT_H
++#define _SPL_SYSCTL_COMPAT_H
++
++#include <linux/sysctl.h>
++
++/* proc_handler() / proc_do* API changes
++ * 2.6.x  - 2.6.31: 6 args, prototype includes 'struct file *'
++ * 2.6.32 - 2.6.y:  5 args, removed unused 'struct file *' from prototype
++ *
++ * Generic SPL_PROC_HANDLER() macro should be used for correct prototypes.
++ * It will define the following function arguments which can and should be
++ * used with the spl_proc_* helper macros.
++ *
++ *   struct ctl_table *table,
++ *   int write,
++ *   struct file *filp [2.6.31 and earlier kernels],
++ *   void __user *buffer,
++ *   size_t *lenp,
++ *   loff_t *ppos,
++ */
++#ifdef HAVE_5ARGS_PROC_HANDLER
++
++#define SPL_PROC_HANDLER(proc_handler)                                       \
++static int                                                                   \
++proc_handler(struct ctl_table *table, int write,                             \
++             void __user *buffer, size_t *lenp, loff_t *ppos)
++
++#define spl_proc_dostring(table, write, filp, buffer, lenp, ppos)            \
++	proc_dostring(table, write, buffer, lenp, ppos)
++#define spl_proc_dointvec(table, write, filp, buffer, lenp, ppos)            \
++	proc_dointvec(table, write, buffer, lenp, ppos)
++#define spl_proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos)     \
++	proc_dointvec_minmax(table, write, buffer, lenp, ppos)
++#define spl_proc_dointvec_jiffies(table, write, filp, buffer, lenp, ppos)    \
++	proc_dointvec_jiffies(table, write, buffer, lenp, ppos)
++#define spl_proc_dointvec_userhz_jiffies(table,write,filp,buffer,lenp,ppos)  \
++	proc_dointvec_userhz_jiffies(table, write, buffer, lenp, ppos)
++#define spl_proc_dointvec_ms_jiffies(table,write,filp,buffer,lenp,ppos)      \
++	proc_dointvec_ms_jiffies(table, write, buffer, lenp, ppos)
++#define spl_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos)   \
++	proc_doulongvec_minmax(table, write, buffer, lenp, ppos)
++#define spl_proc_doulongvec_ms_jiffies_minmax(table,write,filp,buffer,lenp,ppos)\
++	proc_doulongvec_ms_jiffies_minmax(table, write, buffer, lenp, ppos)
++
++#else /* HAVE_5ARGS_PROC_HANDLER */
++
++#define SPL_PROC_HANDLER(proc_handler)                                       \
++static int                                                                   \
++proc_handler(struct ctl_table *table, int write, struct file *filp,          \
++             void __user *buffer, size_t *lenp, loff_t *ppos)
++
++#define spl_proc_dostring(table, write, filp, buffer, lenp, ppos)            \
++	proc_dostring(table, write, filp, buffer, lenp, ppos)
++#define spl_proc_dointvec(table, write, filp, buffer, lenp, ppos)            \
++	proc_dointvec(table, write, filp, buffer, lenp, ppos)
++#define spl_proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos)     \
++	proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos)
++#define spl_proc_dointvec_jiffies(table, write, filp, buffer, lenp, ppos)    \
++	proc_dointvec_jiffies(table, write, filp, buffer, lenp, ppos)
++#define spl_proc_dointvec_userhz_jiffies(table,write,filp,buffer,lenp,ppos)  \
++	proc_dointvec_userhz_jiffies(table, write, filp, buffer, lenp, ppos)
++#define spl_proc_dointvec_ms_jiffies(table, write, filp, buffer, lenp, ppos) \
++	proc_dointvec_ms_jiffies(table, write, filp, buffer, lenp, ppos)
++#define spl_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos)        \
++	proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos)
++#define spl_proc_doulongvec_ms_jiffies_minmax(table,write,filp,buffer,lenp,ppos)  \
++	proc_doulongvec_ms_jiffies_minmax(table,write,filp,buffer,lenp,ppos)
++
++
++#endif /* HAVE_5ARGS_PROC_HANDLER */
++
++#endif /* _SPL_SYSCTL_COMPAT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/time_compat.h linux-3.2.33-go/include/spl/linux/time_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/time_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/time_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,45 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_TIME_COMPAT_H
++#define _SPL_TIME_COMPAT_H
++
++#include <linux/time.h>
++
++/* timespec_sub() API changes
++ * 2.6.18  - 2.6.x: Inline function provided by linux/time.h
++ */
++#ifndef HAVE_TIMESPEC_SUB
++static inline struct timespec
++timespec_sub(struct timespec lhs, struct timespec rhs)
++{
++        struct timespec ts_delta;
++        set_normalized_timespec(&ts_delta, lhs.tv_sec - rhs.tv_sec,
++                                lhs.tv_nsec - rhs.tv_nsec);
++        return ts_delta;
++}
++#endif /* HAVE_TIMESPEC_SUB */
++
++#endif /* _SPL_TIME_COMPAT_H */
++
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/uaccess_compat.h linux-3.2.33-go/include/spl/linux/uaccess_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/uaccess_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/uaccess_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,35 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_UACCESS_COMPAT_H
++#define _SPL_UACCESS_COMPAT_H
++
++#ifdef HAVE_UACCESS_HEADER
++#include <linux/uaccess.h>
++#else
++#include <asm/uaccess.h>
++#endif
++
++#endif /* _SPL_UACCESS_COMPAT_H */
++
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/workqueue_compat.h linux-3.2.33-go/include/spl/linux/workqueue_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/workqueue_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/workqueue_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,49 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_WORKQUEUE_COMPAT_H
++#define _SPL_WORKQUEUE_COMPAT_H
++
++#include <linux/workqueue.h>
++#include <sys/types.h>
++
++#ifdef HAVE_3ARGS_INIT_WORK
++
++#define delayed_work			work_struct
++
++#define spl_init_work(wq, cb, d)	INIT_WORK((wq), (void *)(cb), \
++						  (void *)(d))
++#define spl_init_delayed_work(wq,cb,d)	INIT_WORK((wq), (void *)(cb), \
++						  (void *)(d))
++#define spl_get_work_data(d, t, f)	(t *)(d)
++
++#else
++
++#define spl_init_work(wq, cb, d)	INIT_WORK((wq), (void *)(cb));
++#define spl_init_delayed_work(wq,cb,d)	INIT_DELAYED_WORK((wq), (void *)(cb));
++#define spl_get_work_data(d, t, f)	(t *)container_of(d, t, f)
++
++#endif /* HAVE_3ARGS_INIT_WORK */
++
++#endif  /* _SPL_WORKQUEUE_COMPAT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/linux/zlib_compat.h linux-3.2.33-go/include/spl/linux/zlib_compat.h
+--- linux-3.2.33-go.orig/include/spl/linux/zlib_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/linux/zlib_compat.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,37 @@
++/*****************************************************************************\
++ *  Copyright (C) 2011 Lawrence Livermore National Security, LLC.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_ZLIB_COMPAT_H
++#define _SPL_ZLIB_COMPAT_H
++
++#include <linux/zlib.h>
++
++#ifdef HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
++#define spl_zlib_deflate_workspacesize(wb, ml) \
++	zlib_deflate_workspacesize(wb, ml)
++#else
++#define spl_zlib_deflate_workspacesize(wb, ml) \
++	zlib_deflate_workspacesize()
++#endif /* HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE */
++
++#endif /* SPL_ZLIB_COMPAT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/Makefile linux-3.2.33-go/include/spl/Makefile
+--- linux-3.2.33-go.orig/include/spl/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/Makefile	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,515 @@
++# Makefile.in generated by automake 1.11.6 from Makefile.am.
++# include/Makefile.  Generated from Makefile.in by configure.
++
++# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
++# Foundation, Inc.
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++
++
++
++am__make_dryrun = \
++  { \
++    am__dry=no; \
++    case $$MAKEFLAGS in \
++      *\\[\ \	]*) \
++        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
++          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
++      *) \
++        for am__flg in $$MAKEFLAGS; do \
++          case $$am__flg in \
++            *=*|--*) ;; \
++            *n*) am__dry=yes; break;; \
++          esac; \
++        done;; \
++    esac; \
++    test $$am__dry = yes; \
++  }
++pkgdatadir = $(datadir)/spl
++pkgincludedir = $(includedir)/spl
++pkglibdir = $(libdir)/spl
++pkglibexecdir = $(libexecdir)/spl
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = x86_64-unknown-linux-gnu
++host_triplet = x86_64-unknown-linux-gnu
++target_triplet = x86_64-unknown-linux-gnu
++subdir = include
++DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \
++	$(srcdir)/Makefile.in
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps = $(top_srcdir)/config/spl-build.m4 \
++	$(top_srcdir)/config/spl-meta.m4 $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++mkinstalldirs = $(install_sh) -d
++CONFIG_HEADER = $(top_builddir)/spl_config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++AM_V_GEN = $(am__v_GEN_$(V))
++am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY))
++am__v_GEN_0 = @echo "  GEN   " $@;
++AM_V_at = $(am__v_at_$(V))
++am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY))
++am__v_at_0 = @
++SOURCES =
++DIST_SOURCES =
++am__can_run_installinfo = \
++  case $$AM_UPDATE_INFO_DIR in \
++    n|no|NO) false;; \
++    *) (install-info --version) >/dev/null 2>&1;; \
++  esac
++HEADERS = $(noinst_HEADERS)
++ETAGS = etags
++CTAGS = ctags
++DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
++ACLOCAL = ${SHELL} /root/spl-0.6.0-rc12/config/missing --run aclocal-1.11
++ALIEN = alien
++ALIEN_VERSION = 
++AMTAR = $${TAR-tar}
++AM_DEFAULT_VERBOSITY = 1
++AR = ar
++AUTOCONF = ${SHELL} /root/spl-0.6.0-rc12/config/missing --run autoconf
++AUTOHEADER = ${SHELL} /root/spl-0.6.0-rc12/config/missing --run autoheader
++AUTOMAKE = ${SHELL} /root/spl-0.6.0-rc12/config/missing --run automake-1.11
++AWK = gawk
++CC = gcc
++CCDEPMODE = depmode=gcc3
++CFLAGS = -g -O2
++CPP = gcc -E
++CPPFLAGS = 
++CYGPATH_W = echo
++DEBUG_CFLAGS = -DNDEBUG
++DEBUG_KMEM = _with_debug_kmem
++DEBUG_KMEM_TRACKING = _without_debug_kmem_tracking
++DEBUG_LOG = _with_debug_log
++DEBUG_SPL = _without_debug
++DEFAULT_PACKAGE = tgz
++DEFS = -DHAVE_CONFIG_H
++DEPDIR = .deps
++DLLTOOL = false
++DPKG = dpkg
++DPKGBUILD = dpkg-buildpackage
++DPKGBUILD_VERSION = 
++DPKG_VERSION = 
++DSYMUTIL = 
++DUMPBIN = 
++ECHO_C = 
++ECHO_N = -n
++ECHO_T = 
++EGREP = /bin/grep -E
++EXEEXT = 
++FGREP = /bin/grep -F
++GREP = /bin/grep
++HAVE_ALIEN = no
++HAVE_DPKG = no
++HAVE_DPKGBUILD = no
++HAVE_MAKEPKG = 
++HAVE_PACMAN = 
++HAVE_RPM = yes
++HAVE_RPMBUILD = yes
++INSTALL = /usr/bin/install -c
++INSTALL_DATA = ${INSTALL} -m 644
++INSTALL_PROGRAM = ${INSTALL}
++INSTALL_SCRIPT = ${INSTALL}
++INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
++KERNELCPPFLAGS =  -DHAVE_GPL_ONLY_SYMBOLS -Wstrict-prototypes -DNDEBUG -DDEBUG_LOG -DDEBUG_KMEM
++KERNELMAKE_PARAMS =  O=/usr/src/linux-3.6.0-sabayon
++LD = /usr/x86_64-pc-linux-gnu/bin/ld -m elf_x86_64
++LDFLAGS = 
++LIBOBJS = 
++LIBS = 
++LIBTOOL = $(SHELL) $(top_builddir)/libtool
++LICENSE = GPL
++LINUX = /usr/src/linux-3.2.33-go
++LINUX_OBJ = /usr/src/linux-3.6.0-sabayon
++LINUX_SYMBOLS = NONE
++LINUX_VERSION = 3.6.0-sabayon
++LIPO = 
++LN_S = ln -s
++LTLIBOBJS = 
++MAINT = #
++MAKEINFO = ${SHELL} /root/spl-0.6.0-rc12/config/missing --run makeinfo
++MAKEPKG = 
++MAKEPKG_VERSION = 
++MANIFEST_TOOL = :
++MKDIR_P = /bin/mkdir -p
++NM = /usr/bin/nm -B
++NMEDIT = 
++OBJDUMP = objdump
++OBJEXT = o
++OTOOL = 
++OTOOL64 = 
++PACKAGE = spl
++PACKAGE_BUGREPORT = 
++PACKAGE_NAME = 
++PACKAGE_STRING = 
++PACKAGE_TARNAME = 
++PACKAGE_URL = 
++PACKAGE_VERSION = 
++PACMAN = 
++PACMAN_VERSION = 
++PATH_SEPARATOR = :
++RANLIB = ranlib
++RPM = rpm
++RPMBUILD = rpmbuild
++RPMBUILD_VERSION = 4.10.0
++RPM_VERSION = 4.10.0
++SED = /bin/sed
++SET_MAKE = 
++SHELL = /bin/sh
++SPL_CONFIG = all
++SPL_META_ALIAS = spl-0.6.0-rc12
++SPL_META_AUTHOR = 
++SPL_META_DATA = 
++SPL_META_LT_AGE = 
++SPL_META_LT_CURRENT = 
++SPL_META_LT_REVISION = 
++SPL_META_NAME = spl
++SPL_META_RELEASE = rc12
++SPL_META_VERSION = 0.6.0
++STRIP = strip
++VENDOR = gentoo
++VERSION = 0.6.0
++abs_builddir = /root/spl-0.6.0-rc12/include
++abs_srcdir = /root/spl-0.6.0-rc12/include
++abs_top_builddir = /root/spl-0.6.0-rc12
++abs_top_srcdir = /root/spl-0.6.0-rc12
++ac_ct_AR = ar
++ac_ct_CC = gcc
++ac_ct_DUMPBIN = 
++am__include = include
++am__leading_dot = .
++am__quote = 
++am__tar = $${TAR-tar} chof - "$$tardir"
++am__untar = $${TAR-tar} xf -
++bindir = ${exec_prefix}/bin
++build = x86_64-unknown-linux-gnu
++build_alias = 
++build_cpu = x86_64
++build_os = linux-gnu
++build_vendor = unknown
++builddir = .
++datadir = ${datarootdir}
++datarootdir = ${prefix}/share
++docdir = ${datarootdir}/doc/${PACKAGE}
++dvidir = ${docdir}
++exec_prefix = ${prefix}
++host = x86_64-unknown-linux-gnu
++host_alias = 
++host_cpu = x86_64
++host_os = linux-gnu
++host_vendor = unknown
++htmldir = ${docdir}
++includedir = ${prefix}/include
++infodir = ${datarootdir}/info
++install_sh = ${SHELL} /root/spl-0.6.0-rc12/config/install-sh
++libdir = ${exec_prefix}/lib
++libexecdir = ${exec_prefix}/libexec
++localedir = ${datarootdir}/locale
++localstatedir = ${prefix}/var
++mandir = ${datarootdir}/man
++mkdir_p = /bin/mkdir -p
++oldincludedir = /usr/include
++pdfdir = ${docdir}
++prefix = /usr/local
++program_transform_name = s,x,x,
++psdir = ${docdir}
++sbindir = ${exec_prefix}/sbin
++sharedstatedir = ${prefix}/com
++srcdir = .
++sysconfdir = ${prefix}/etc
++target = x86_64-unknown-linux-gnu
++target_alias = 
++target_cpu = x86_64
++target_os = linux-gnu
++target_vendor = unknown
++top_build_prefix = ../
++top_builddir = ..
++top_srcdir = ..
++
++# All headers are referenced by this top level Makefile.am are
++# noinst_HEADERS because they are not installed in the usual include
++# location.  We do not want to be using $includedir for this.
++# Installation is handled by the custom install-data-local rule.
++noinst_HEADERS = $(top_srcdir)/include/*.h \
++	$(top_srcdir)/include/fs/*.h $(top_srcdir)/include/linux/*.h \
++	$(top_srcdir)/include/rpc/*.h \
++	$(top_srcdir)/include/sharefs/*.h \
++	$(top_srcdir)/include/sys/fm/*.h \
++	$(top_srcdir)/include/sys/fs/*.h \
++	$(top_srcdir)/include/sys/sysevent/*.h \
++	$(top_srcdir)/include/sys/*.h $(top_srcdir)/include/util/*.h \
++	$(top_srcdir)/include/vm/*.h
++all: all-am
++
++.SUFFIXES:
++$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am  $(am__configure_deps)
++	@for dep in $?; do \
++	  case '$(am__configure_deps)' in \
++	    *$$dep*) \
++	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
++	        && { if test -f $@; then exit 0; else break; fi; }; \
++	      exit 1;; \
++	  esac; \
++	done; \
++	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/Makefile'; \
++	$(am__cd) $(top_srcdir) && \
++	  $(AUTOMAKE) --gnu include/Makefile
++.PRECIOUS: Makefile
++Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
++	@case '$?' in \
++	  *config.status*) \
++	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
++	  *) \
++	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
++	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
++	esac;
++
++$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++
++$(top_srcdir)/configure: # $(am__configure_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(ACLOCAL_M4): # $(am__aclocal_m4_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(am__aclocal_m4_deps):
++
++mostlyclean-libtool:
++	-rm -f *.lo
++
++clean-libtool:
++	-rm -rf .libs _libs
++
++ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
++	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	mkid -fID $$unique
++tags: TAGS
++
++TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	set x; \
++	here=`pwd`; \
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	shift; \
++	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
++	  test -n "$$unique" || unique=$$empty_fix; \
++	  if test $$# -gt 0; then \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      "$$@" $$unique; \
++	  else \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      $$unique; \
++	  fi; \
++	fi
++ctags: CTAGS
++CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	test -z "$(CTAGS_ARGS)$$unique" \
++	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
++	     $$unique
++
++GTAGS:
++	here=`$(am__cd) $(top_builddir) && pwd` \
++	  && $(am__cd) $(top_srcdir) \
++	  && gtags -i $(GTAGS_ARGS) "$$here"
++
++distclean-tags:
++	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
++
++distdir: $(DISTFILES)
++	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	list='$(DISTFILES)'; \
++	  dist_files=`for file in $$list; do echo $$file; done | \
++	  sed -e "s|^$$srcdirstrip/||;t" \
++	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
++	case $$dist_files in \
++	  */*) $(MKDIR_P) `echo "$$dist_files" | \
++			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
++			   sort -u` ;; \
++	esac; \
++	for file in $$dist_files; do \
++	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
++	  if test -d $$d/$$file; then \
++	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
++	    if test -d "$(distdir)/$$file"; then \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
++	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
++	  else \
++	    test -f "$(distdir)/$$file" \
++	    || cp -p $$d/$$file "$(distdir)/$$file" \
++	    || exit 1; \
++	  fi; \
++	done
++check-am: all-am
++check: check-am
++all-am: Makefile $(HEADERS)
++installdirs:
++install: install-am
++install-exec: install-exec-am
++install-data: install-data-am
++uninstall: uninstall-am
++
++install-am: all-am
++	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
++
++installcheck: installcheck-am
++install-strip:
++	if test -z '$(STRIP)'; then \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	      install; \
++	else \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
++	fi
++mostlyclean-generic:
++
++clean-generic:
++
++distclean-generic:
++	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
++	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
++
++maintainer-clean-generic:
++	@echo "This command is intended for maintainers to use"
++	@echo "it deletes files that may require special tools to rebuild."
++clean: clean-am
++
++clean-am: clean-generic clean-libtool mostlyclean-am
++
++distclean: distclean-am
++	-rm -f Makefile
++distclean-am: clean-am distclean-generic distclean-tags
++
++dvi: dvi-am
++
++dvi-am:
++
++html: html-am
++
++html-am:
++
++info: info-am
++
++info-am:
++
++install-data-am: install-data-local
++
++install-dvi: install-dvi-am
++
++install-dvi-am:
++
++install-exec-am:
++
++install-html: install-html-am
++
++install-html-am:
++
++install-info: install-info-am
++
++install-info-am:
++
++install-man:
++
++install-pdf: install-pdf-am
++
++install-pdf-am:
++
++install-ps: install-ps-am
++
++install-ps-am:
++
++installcheck-am:
++
++maintainer-clean: maintainer-clean-am
++	-rm -f Makefile
++maintainer-clean-am: distclean-am maintainer-clean-generic
++
++mostlyclean: mostlyclean-am
++
++mostlyclean-am: mostlyclean-generic mostlyclean-libtool
++
++pdf: pdf-am
++
++pdf-am:
++
++ps: ps-am
++
++ps-am:
++
++uninstall-am: uninstall-local
++
++.MAKE: install-am install-strip
++
++.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
++	clean-libtool ctags distclean distclean-generic \
++	distclean-libtool distclean-tags distdir dvi dvi-am html \
++	html-am info info-am install install-am install-data \
++	install-data-am install-data-local install-dvi install-dvi-am \
++	install-exec install-exec-am install-html install-html-am \
++	install-info install-info-am install-man install-pdf \
++	install-pdf-am install-ps install-ps-am install-strip \
++	installcheck installcheck-am installdirs maintainer-clean \
++	maintainer-clean-generic mostlyclean mostlyclean-generic \
++	mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
++	uninstall-am uninstall-local
++
++
++install-data-local:
++	release=$(SPL_META_VERSION)-$(SPL_META_RELEASE); \
++	instdest=$(DESTDIR)/usr/src/spl-$$release/$(LINUX_VERSION); \
++	instfiles=`find . -name '*.h'`; \
++        for instfile in $$instfiles; do \
++		$(INSTALL) -D $$instfile $$instdest/$$instfile; \
++        done
++
++uninstall-local:
++	release=$(SPL_META_VERSION)-$(SPL_META_RELEASE); \
++	instdest=$(DESTDIR)/usr/src/spl-$$release/$(LINUX_VERSION); \
++	$(RM) -R $$instdest
++
++# Tell versions [3.59,3.63) of GNU make to not export all variables.
++# Otherwise a system limit (for SysV at least) may be exceeded.
++.NOEXPORT:
+diff -uNr linux-3.2.33-go.orig/include/spl/Makefile.am linux-3.2.33-go/include/spl/Makefile.am
+--- linux-3.2.33-go.orig/include/spl/Makefile.am	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/Makefile.am	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,28 @@
++# All headers are referenced by this top level Makefile.am are
++# noinst_HEADERS because they are not installed in the usual include
++# location.  We do not want to be using $includedir for this.
++# Installation is handled by the custom install-data-local rule.
++noinst_HEADERS  = $(top_srcdir)/include/*.h
++noinst_HEADERS += $(top_srcdir)/include/fs/*.h
++noinst_HEADERS += $(top_srcdir)/include/linux/*.h
++noinst_HEADERS += $(top_srcdir)/include/rpc/*.h
++noinst_HEADERS += $(top_srcdir)/include/sharefs/*.h
++noinst_HEADERS += $(top_srcdir)/include/sys/fm/*.h
++noinst_HEADERS += $(top_srcdir)/include/sys/fs/*.h
++noinst_HEADERS += $(top_srcdir)/include/sys/sysevent/*.h
++noinst_HEADERS += $(top_srcdir)/include/sys/*.h
++noinst_HEADERS += $(top_srcdir)/include/util/*.h
++noinst_HEADERS += $(top_srcdir)/include/vm/*.h
++
++install-data-local:
++	release=$(SPL_META_VERSION)-$(SPL_META_RELEASE); \
++	instdest=$(DESTDIR)/usr/src/spl-$$release/$(LINUX_VERSION); \
++	instfiles=`find . -name '*.h'`; \
++        for instfile in $$instfiles; do \
++		$(INSTALL) -D $$instfile $$instdest/$$instfile; \
++        done
++
++uninstall-local:
++	release=$(SPL_META_VERSION)-$(SPL_META_RELEASE); \
++	instdest=$(DESTDIR)/usr/src/spl-$$release/$(LINUX_VERSION); \
++	$(RM) -R $$instdest
+diff -uNr linux-3.2.33-go.orig/include/spl/Makefile.in linux-3.2.33-go/include/spl/Makefile.in
+--- linux-3.2.33-go.orig/include/spl/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/Makefile.in	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,515 @@
++# Makefile.in generated by automake 1.11.6 from Makefile.am.
++# @configure_input@
++
++# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
++# Foundation, Inc.
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++@SET_MAKE@
++
++VPATH = @srcdir@
++am__make_dryrun = \
++  { \
++    am__dry=no; \
++    case $$MAKEFLAGS in \
++      *\\[\ \	]*) \
++        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
++          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
++      *) \
++        for am__flg in $$MAKEFLAGS; do \
++          case $$am__flg in \
++            *=*|--*) ;; \
++            *n*) am__dry=yes; break;; \
++          esac; \
++        done;; \
++    esac; \
++    test $$am__dry = yes; \
++  }
++pkgdatadir = $(datadir)/@PACKAGE@
++pkgincludedir = $(includedir)/@PACKAGE@
++pkglibdir = $(libdir)/@PACKAGE@
++pkglibexecdir = $(libexecdir)/@PACKAGE@
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = @build@
++host_triplet = @host@
++target_triplet = @target@
++subdir = include
++DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \
++	$(srcdir)/Makefile.in
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps = $(top_srcdir)/config/spl-build.m4 \
++	$(top_srcdir)/config/spl-meta.m4 $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++mkinstalldirs = $(install_sh) -d
++CONFIG_HEADER = $(top_builddir)/spl_config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++AM_V_GEN = $(am__v_GEN_@AM_V@)
++am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
++am__v_GEN_0 = @echo "  GEN   " $@;
++AM_V_at = $(am__v_at_@AM_V@)
++am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
++am__v_at_0 = @
++SOURCES =
++DIST_SOURCES =
++am__can_run_installinfo = \
++  case $$AM_UPDATE_INFO_DIR in \
++    n|no|NO) false;; \
++    *) (install-info --version) >/dev/null 2>&1;; \
++  esac
++HEADERS = $(noinst_HEADERS)
++ETAGS = etags
++CTAGS = ctags
++DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
++ACLOCAL = @ACLOCAL@
++ALIEN = @ALIEN@
++ALIEN_VERSION = @ALIEN_VERSION@
++AMTAR = @AMTAR@
++AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
++AR = @AR@
++AUTOCONF = @AUTOCONF@
++AUTOHEADER = @AUTOHEADER@
++AUTOMAKE = @AUTOMAKE@
++AWK = @AWK@
++CC = @CC@
++CCDEPMODE = @CCDEPMODE@
++CFLAGS = @CFLAGS@
++CPP = @CPP@
++CPPFLAGS = @CPPFLAGS@
++CYGPATH_W = @CYGPATH_W@
++DEBUG_CFLAGS = @DEBUG_CFLAGS@
++DEBUG_KMEM = @DEBUG_KMEM@
++DEBUG_KMEM_TRACKING = @DEBUG_KMEM_TRACKING@
++DEBUG_LOG = @DEBUG_LOG@
++DEBUG_SPL = @DEBUG_SPL@
++DEFAULT_PACKAGE = @DEFAULT_PACKAGE@
++DEFS = @DEFS@
++DEPDIR = @DEPDIR@
++DLLTOOL = @DLLTOOL@
++DPKG = @DPKG@
++DPKGBUILD = @DPKGBUILD@
++DPKGBUILD_VERSION = @DPKGBUILD_VERSION@
++DPKG_VERSION = @DPKG_VERSION@
++DSYMUTIL = @DSYMUTIL@
++DUMPBIN = @DUMPBIN@
++ECHO_C = @ECHO_C@
++ECHO_N = @ECHO_N@
++ECHO_T = @ECHO_T@
++EGREP = @EGREP@
++EXEEXT = @EXEEXT@
++FGREP = @FGREP@
++GREP = @GREP@
++HAVE_ALIEN = @HAVE_ALIEN@
++HAVE_DPKG = @HAVE_DPKG@
++HAVE_DPKGBUILD = @HAVE_DPKGBUILD@
++HAVE_MAKEPKG = @HAVE_MAKEPKG@
++HAVE_PACMAN = @HAVE_PACMAN@
++HAVE_RPM = @HAVE_RPM@
++HAVE_RPMBUILD = @HAVE_RPMBUILD@
++INSTALL = @INSTALL@
++INSTALL_DATA = @INSTALL_DATA@
++INSTALL_PROGRAM = @INSTALL_PROGRAM@
++INSTALL_SCRIPT = @INSTALL_SCRIPT@
++INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
++KERNELCPPFLAGS = @KERNELCPPFLAGS@
++KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
++LD = @LD@
++LDFLAGS = @LDFLAGS@
++LIBOBJS = @LIBOBJS@
++LIBS = @LIBS@
++LIBTOOL = @LIBTOOL@
++LICENSE = @LICENSE@
++LINUX = @LINUX@
++LINUX_OBJ = @LINUX_OBJ@
++LINUX_SYMBOLS = @LINUX_SYMBOLS@
++LINUX_VERSION = @LINUX_VERSION@
++LIPO = @LIPO@
++LN_S = @LN_S@
++LTLIBOBJS = @LTLIBOBJS@
++MAINT = @MAINT@
++MAKEINFO = @MAKEINFO@
++MAKEPKG = @MAKEPKG@
++MAKEPKG_VERSION = @MAKEPKG_VERSION@
++MANIFEST_TOOL = @MANIFEST_TOOL@
++MKDIR_P = @MKDIR_P@
++NM = @NM@
++NMEDIT = @NMEDIT@
++OBJDUMP = @OBJDUMP@
++OBJEXT = @OBJEXT@
++OTOOL = @OTOOL@
++OTOOL64 = @OTOOL64@
++PACKAGE = @PACKAGE@
++PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
++PACKAGE_NAME = @PACKAGE_NAME@
++PACKAGE_STRING = @PACKAGE_STRING@
++PACKAGE_TARNAME = @PACKAGE_TARNAME@
++PACKAGE_URL = @PACKAGE_URL@
++PACKAGE_VERSION = @PACKAGE_VERSION@
++PACMAN = @PACMAN@
++PACMAN_VERSION = @PACMAN_VERSION@
++PATH_SEPARATOR = @PATH_SEPARATOR@
++RANLIB = @RANLIB@
++RPM = @RPM@
++RPMBUILD = @RPMBUILD@
++RPMBUILD_VERSION = @RPMBUILD_VERSION@
++RPM_VERSION = @RPM_VERSION@
++SED = @SED@
++SET_MAKE = @SET_MAKE@
++SHELL = @SHELL@
++SPL_CONFIG = @SPL_CONFIG@
++SPL_META_ALIAS = @SPL_META_ALIAS@
++SPL_META_AUTHOR = @SPL_META_AUTHOR@
++SPL_META_DATA = @SPL_META_DATA@
++SPL_META_LT_AGE = @SPL_META_LT_AGE@
++SPL_META_LT_CURRENT = @SPL_META_LT_CURRENT@
++SPL_META_LT_REVISION = @SPL_META_LT_REVISION@
++SPL_META_NAME = @SPL_META_NAME@
++SPL_META_RELEASE = @SPL_META_RELEASE@
++SPL_META_VERSION = @SPL_META_VERSION@
++STRIP = @STRIP@
++VENDOR = @VENDOR@
++VERSION = @VERSION@
++abs_builddir = @abs_builddir@
++abs_srcdir = @abs_srcdir@
++abs_top_builddir = @abs_top_builddir@
++abs_top_srcdir = @abs_top_srcdir@
++ac_ct_AR = @ac_ct_AR@
++ac_ct_CC = @ac_ct_CC@
++ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
++am__include = @am__include@
++am__leading_dot = @am__leading_dot@
++am__quote = @am__quote@
++am__tar = @am__tar@
++am__untar = @am__untar@
++bindir = @bindir@
++build = @build@
++build_alias = @build_alias@
++build_cpu = @build_cpu@
++build_os = @build_os@
++build_vendor = @build_vendor@
++builddir = @builddir@
++datadir = @datadir@
++datarootdir = @datarootdir@
++docdir = @docdir@
++dvidir = @dvidir@
++exec_prefix = @exec_prefix@
++host = @host@
++host_alias = @host_alias@
++host_cpu = @host_cpu@
++host_os = @host_os@
++host_vendor = @host_vendor@
++htmldir = @htmldir@
++includedir = @includedir@
++infodir = @infodir@
++install_sh = @install_sh@
++libdir = @libdir@
++libexecdir = @libexecdir@
++localedir = @localedir@
++localstatedir = @localstatedir@
++mandir = @mandir@
++mkdir_p = @mkdir_p@
++oldincludedir = @oldincludedir@
++pdfdir = @pdfdir@
++prefix = @prefix@
++program_transform_name = @program_transform_name@
++psdir = @psdir@
++sbindir = @sbindir@
++sharedstatedir = @sharedstatedir@
++srcdir = @srcdir@
++sysconfdir = @sysconfdir@
++target = @target@
++target_alias = @target_alias@
++target_cpu = @target_cpu@
++target_os = @target_os@
++target_vendor = @target_vendor@
++top_build_prefix = @top_build_prefix@
++top_builddir = @top_builddir@
++top_srcdir = @top_srcdir@
++
++# All headers are referenced by this top level Makefile.am are
++# noinst_HEADERS because they are not installed in the usual include
++# location.  We do not want to be using $includedir for this.
++# Installation is handled by the custom install-data-local rule.
++noinst_HEADERS = $(top_srcdir)/include/*.h \
++	$(top_srcdir)/include/fs/*.h $(top_srcdir)/include/linux/*.h \
++	$(top_srcdir)/include/rpc/*.h \
++	$(top_srcdir)/include/sharefs/*.h \
++	$(top_srcdir)/include/sys/fm/*.h \
++	$(top_srcdir)/include/sys/fs/*.h \
++	$(top_srcdir)/include/sys/sysevent/*.h \
++	$(top_srcdir)/include/sys/*.h $(top_srcdir)/include/util/*.h \
++	$(top_srcdir)/include/vm/*.h
++all: all-am
++
++.SUFFIXES:
++$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
++	@for dep in $?; do \
++	  case '$(am__configure_deps)' in \
++	    *$$dep*) \
++	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
++	        && { if test -f $@; then exit 0; else break; fi; }; \
++	      exit 1;; \
++	  esac; \
++	done; \
++	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/Makefile'; \
++	$(am__cd) $(top_srcdir) && \
++	  $(AUTOMAKE) --gnu include/Makefile
++.PRECIOUS: Makefile
++Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
++	@case '$?' in \
++	  *config.status*) \
++	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
++	  *) \
++	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
++	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
++	esac;
++
++$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++
++$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(am__aclocal_m4_deps):
++
++mostlyclean-libtool:
++	-rm -f *.lo
++
++clean-libtool:
++	-rm -rf .libs _libs
++
++ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
++	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	mkid -fID $$unique
++tags: TAGS
++
++TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	set x; \
++	here=`pwd`; \
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	shift; \
++	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
++	  test -n "$$unique" || unique=$$empty_fix; \
++	  if test $$# -gt 0; then \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      "$$@" $$unique; \
++	  else \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      $$unique; \
++	  fi; \
++	fi
++ctags: CTAGS
++CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	test -z "$(CTAGS_ARGS)$$unique" \
++	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
++	     $$unique
++
++GTAGS:
++	here=`$(am__cd) $(top_builddir) && pwd` \
++	  && $(am__cd) $(top_srcdir) \
++	  && gtags -i $(GTAGS_ARGS) "$$here"
++
++distclean-tags:
++	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
++
++distdir: $(DISTFILES)
++	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	list='$(DISTFILES)'; \
++	  dist_files=`for file in $$list; do echo $$file; done | \
++	  sed -e "s|^$$srcdirstrip/||;t" \
++	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
++	case $$dist_files in \
++	  */*) $(MKDIR_P) `echo "$$dist_files" | \
++			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
++			   sort -u` ;; \
++	esac; \
++	for file in $$dist_files; do \
++	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
++	  if test -d $$d/$$file; then \
++	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
++	    if test -d "$(distdir)/$$file"; then \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
++	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
++	  else \
++	    test -f "$(distdir)/$$file" \
++	    || cp -p $$d/$$file "$(distdir)/$$file" \
++	    || exit 1; \
++	  fi; \
++	done
++check-am: all-am
++check: check-am
++all-am: Makefile $(HEADERS)
++installdirs:
++install: install-am
++install-exec: install-exec-am
++install-data: install-data-am
++uninstall: uninstall-am
++
++install-am: all-am
++	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
++
++installcheck: installcheck-am
++install-strip:
++	if test -z '$(STRIP)'; then \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	      install; \
++	else \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
++	fi
++mostlyclean-generic:
++
++clean-generic:
++
++distclean-generic:
++	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
++	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
++
++maintainer-clean-generic:
++	@echo "This command is intended for maintainers to use"
++	@echo "it deletes files that may require special tools to rebuild."
++clean: clean-am
++
++clean-am: clean-generic clean-libtool mostlyclean-am
++
++distclean: distclean-am
++	-rm -f Makefile
++distclean-am: clean-am distclean-generic distclean-tags
++
++dvi: dvi-am
++
++dvi-am:
++
++html: html-am
++
++html-am:
++
++info: info-am
++
++info-am:
++
++install-data-am: install-data-local
++
++install-dvi: install-dvi-am
++
++install-dvi-am:
++
++install-exec-am:
++
++install-html: install-html-am
++
++install-html-am:
++
++install-info: install-info-am
++
++install-info-am:
++
++install-man:
++
++install-pdf: install-pdf-am
++
++install-pdf-am:
++
++install-ps: install-ps-am
++
++install-ps-am:
++
++installcheck-am:
++
++maintainer-clean: maintainer-clean-am
++	-rm -f Makefile
++maintainer-clean-am: distclean-am maintainer-clean-generic
++
++mostlyclean: mostlyclean-am
++
++mostlyclean-am: mostlyclean-generic mostlyclean-libtool
++
++pdf: pdf-am
++
++pdf-am:
++
++ps: ps-am
++
++ps-am:
++
++uninstall-am: uninstall-local
++
++.MAKE: install-am install-strip
++
++.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
++	clean-libtool ctags distclean distclean-generic \
++	distclean-libtool distclean-tags distdir dvi dvi-am html \
++	html-am info info-am install install-am install-data \
++	install-data-am install-data-local install-dvi install-dvi-am \
++	install-exec install-exec-am install-html install-html-am \
++	install-info install-info-am install-man install-pdf \
++	install-pdf-am install-ps install-ps-am install-strip \
++	installcheck installcheck-am installdirs maintainer-clean \
++	maintainer-clean-generic mostlyclean mostlyclean-generic \
++	mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
++	uninstall-am uninstall-local
++
++
++install-data-local:
++	release=$(SPL_META_VERSION)-$(SPL_META_RELEASE); \
++	instdest=$(DESTDIR)/usr/src/spl-$$release/$(LINUX_VERSION); \
++	instfiles=`find . -name '*.h'`; \
++        for instfile in $$instfiles; do \
++		$(INSTALL) -D $$instfile $$instdest/$$instfile; \
++        done
++
++uninstall-local:
++	release=$(SPL_META_VERSION)-$(SPL_META_RELEASE); \
++	instdest=$(DESTDIR)/usr/src/spl-$$release/$(LINUX_VERSION); \
++	$(RM) -R $$instdest
++
++# Tell versions [3.59,3.63) of GNU make to not export all variables.
++# Otherwise a system limit (for SysV at least) may be exceeded.
++.NOEXPORT:
+diff -uNr linux-3.2.33-go.orig/include/spl/rpc/types.h linux-3.2.33-go/include/spl/rpc/types.h
+--- linux-3.2.33-go.orig/include/spl/rpc/types.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/rpc/types.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,30 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_RPC_TYPES_H
++#define _SPL_RPC_TYPES_H
++
++typedef int bool_t;
++
++#endif /* SPL_RPC_TYPES_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/rpc/xdr.h linux-3.2.33-go/include/spl/rpc/xdr.h
+--- linux-3.2.33-go.orig/include/spl/rpc/xdr.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/rpc/xdr.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,155 @@
++/*****************************************************************************\
++ *  Copyright (c) 2008 Sun Microsystems, Inc.
++ *  Written by Ricardo Correia <Ricardo.M.Correia@Sun.COM>
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_RPC_XDR_H
++#define _SPL_RPC_XDR_H
++
++#include <sys/types.h>
++#include <rpc/types.h>
++
++/*
++ * XDR enums and types.
++ */
++enum xdr_op {
++	XDR_ENCODE,
++	XDR_DECODE
++};
++
++struct xdr_ops;
++
++typedef struct {
++	struct xdr_ops *x_ops;      /* Also used to let caller know if
++	                               xdrmem_create() succeeds (sigh..) */
++	caddr_t         x_addr;     /* Current buffer addr */
++	caddr_t         x_addr_end; /* End of the buffer */
++	enum xdr_op     x_op;       /* Stream direction */
++} XDR;
++
++typedef bool_t (*xdrproc_t)(XDR *xdrs, void *ptr);
++
++struct xdr_ops {
++	bool_t (*xdr_control)(XDR *, int, void *);
++
++	bool_t (*xdr_char)(XDR *, char *);
++	bool_t (*xdr_u_short)(XDR *, unsigned short *);
++	bool_t (*xdr_u_int)(XDR *, unsigned *);
++	bool_t (*xdr_u_longlong_t)(XDR *, u_longlong_t *);
++
++	bool_t (*xdr_opaque)(XDR *, caddr_t, const uint_t);
++	bool_t (*xdr_string)(XDR *, char **, const uint_t);
++	bool_t (*xdr_array)(XDR *, caddr_t *, uint_t *, const uint_t,
++	                    const uint_t, const xdrproc_t);
++};
++
++/*
++ * XDR control operator.
++ */
++#define XDR_GET_BYTES_AVAIL 1
++
++struct xdr_bytesrec {
++	bool_t xc_is_last_record;
++	size_t xc_num_avail;
++};
++
++/*
++ * XDR functions.
++ */
++void xdrmem_create(XDR *xdrs, const caddr_t addr, const uint_t size,
++    const enum xdr_op op);
++#define xdr_destroy(xdrs) ((void) 0) /* Currently not needed. If needed later,
++                                        we'll add it to struct xdr_ops */
++
++#define xdr_control(xdrs, req, info) (xdrs)->x_ops->xdr_control((xdrs),        \
++                                         (req), (info))
++
++/*
++ * For precaution, the following are defined as static inlines instead of macros
++ * to get some amount of type safety.
++ *
++ * Also, macros wouldn't work in the case where typecasting is done, because it
++ * must be possible to reference the functions' addresses by these names.
++ */
++static inline bool_t xdr_char(XDR *xdrs, char *cp)
++{
++	return xdrs->x_ops->xdr_char(xdrs, cp);
++}
++
++static inline bool_t xdr_u_short(XDR *xdrs, unsigned short *usp)
++{
++	return xdrs->x_ops->xdr_u_short(xdrs, usp);
++}
++
++static inline bool_t xdr_short(XDR *xdrs, short *sp)
++{
++	BUILD_BUG_ON(sizeof(short) != 2);
++	return xdrs->x_ops->xdr_u_short(xdrs, (unsigned short *) sp);
++}
++
++static inline bool_t xdr_u_int(XDR *xdrs, unsigned *up)
++{
++	return xdrs->x_ops->xdr_u_int(xdrs, up);
++}
++
++static inline bool_t xdr_int(XDR *xdrs, int *ip)
++{
++	BUILD_BUG_ON(sizeof(int) != 4);
++	return xdrs->x_ops->xdr_u_int(xdrs, (unsigned *) ip);
++}
++
++static inline bool_t xdr_u_longlong_t(XDR *xdrs, u_longlong_t *ullp)
++{
++	return xdrs->x_ops->xdr_u_longlong_t(xdrs, ullp);
++}
++
++static inline bool_t xdr_longlong_t(XDR *xdrs, longlong_t *llp)
++{
++	BUILD_BUG_ON(sizeof(longlong_t) != 8);
++	return xdrs->x_ops->xdr_u_longlong_t(xdrs, (u_longlong_t *) llp);
++}
++
++/*
++ * Fixed-length opaque data.
++ */
++static inline bool_t xdr_opaque(XDR *xdrs, caddr_t cp, const uint_t cnt)
++{
++	return xdrs->x_ops->xdr_opaque(xdrs, cp, cnt);
++}
++
++/*
++ * Variable-length string.
++ * The *sp buffer must have (maxsize + 1) bytes.
++ */
++static inline bool_t xdr_string(XDR *xdrs, char **sp, const uint_t maxsize)
++{
++	return xdrs->x_ops->xdr_string(xdrs, sp, maxsize);
++}
++
++/*
++ * Variable-length arrays.
++ */
++static inline bool_t xdr_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep,
++    const uint_t maxsize, const uint_t elsize, const xdrproc_t elproc)
++{
++	return xdrs->x_ops->xdr_array(xdrs, arrp, sizep, maxsize, elsize,
++	    elproc);
++}
++
++#endif /* SPL_RPC_XDR_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sharefs/share.h linux-3.2.33-go/include/spl/sharefs/share.h
+--- linux-3.2.33-go.orig/include/spl/sharefs/share.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sharefs/share.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_SHARE_H
++#define _SPL_SHARE_H
++
++#endif /* SPL_SHARE_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/splat-ctl.h linux-3.2.33-go/include/spl/splat-ctl.h
+--- linux-3.2.33-go.orig/include/spl/splat-ctl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/splat-ctl.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,111 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPLAT_CTL_H
++#define _SPLAT_CTL_H
++
++#include <linux/types.h>
++
++/*
++ * Contains shared definitions for both user space and kernel space.  To
++ * ensure 32-bit/64-bit interoperability over ioctl()'s only types with
++ * fixed sizes can be used.
++ */
++#define SPLAT_MAJOR			225 /* XXX - Arbitrary */
++#define SPLAT_MINORS                    1
++#define SPLAT_NAME			"splatctl"
++#define SPLAT_DEV			"/dev/splatctl"
++
++#define SPLAT_NAME_SIZE			20
++#define SPLAT_DESC_SIZE			60
++
++typedef struct splat_user {
++	char name[SPLAT_NAME_SIZE];	/* Short name */
++	char desc[SPLAT_DESC_SIZE];	/* Short description */
++	__u32 id;			/* Unique numeric id */
++} splat_user_t;
++
++#define	SPLAT_CFG_MAGIC			0x15263748U
++typedef struct splat_cfg {
++	__u32 cfg_magic;		/* Unique magic */
++	__u32 cfg_cmd;			/* Configure command */
++	__s32 cfg_arg1;			/* Configure command arg 1 */
++	__s32 cfg_rc1;			/* Configure response 1 */
++	union {
++		struct {
++			__u32 size;
++			splat_user_t descs[0];
++		} splat_subsystems;
++		struct {
++			__u32 size;
++			splat_user_t descs[0];
++		} splat_tests;
++	} cfg_data;
++} splat_cfg_t;
++
++#define	SPLAT_CMD_MAGIC			0x9daebfc0U
++typedef struct splat_cmd {
++	__u32 cmd_magic;		/* Unique magic */
++	__u32 cmd_subsystem;		/* Target subsystem */
++	__u32 cmd_test;			/* Subsystem test */
++	__u32 cmd_data_size;		/* Opaque data size */
++	char cmd_data_str[0];		/* Opaque data region */
++} splat_cmd_t;
++
++/* Valid ioctls */
++#define SPLAT_CFG			_IOWR('f', 101, splat_cfg_t)
++#define SPLAT_CMD			_IOWR('f', 102, splat_cmd_t)
++
++/* Valid configuration commands */
++#define SPLAT_CFG_BUFFER_CLEAR		0x001	/* Clear text buffer */
++#define SPLAT_CFG_BUFFER_SIZE		0x002	/* Resize text buffer */
++#define SPLAT_CFG_SUBSYSTEM_COUNT	0x101	/* Number of subsystem */
++#define SPLAT_CFG_SUBSYSTEM_LIST	0x102	/* List of N subsystems */
++#define SPLAT_CFG_TEST_COUNT		0x201	/* Number of tests */
++#define SPLAT_CFG_TEST_LIST		0x202	/* List of N tests */
++
++/*
++ * Valid subsystem and test commands are defined in each subsystem as
++ * SPLAT_SUBSYSTEM_*.  We do need to be careful to avoid collisions, the
++ * currently defined subsystems are as follows:
++ */
++#define SPLAT_SUBSYSTEM_KMEM		0x0100
++#define SPLAT_SUBSYSTEM_TASKQ		0x0200
++#define SPLAT_SUBSYSTEM_KRNG		0x0300
++#define SPLAT_SUBSYSTEM_MUTEX		0x0400
++#define SPLAT_SUBSYSTEM_CONDVAR		0x0500
++#define SPLAT_SUBSYSTEM_THREAD		0x0600
++#define SPLAT_SUBSYSTEM_RWLOCK		0x0700
++#define SPLAT_SUBSYSTEM_TIME		0x0800
++#define SPLAT_SUBSYSTEM_VNODE		0x0900
++#define SPLAT_SUBSYSTEM_KOBJ		0x0a00
++#define SPLAT_SUBSYSTEM_ATOMIC		0x0b00
++#define SPLAT_SUBSYSTEM_LIST		0x0c00
++#define SPLAT_SUBSYSTEM_GENERIC		0x0d00
++#define SPLAT_SUBSYSTEM_CRED		0x0e00
++#define SPLAT_SUBSYSTEM_ZLIB		0x0f00
++#define SPLAT_SUBSYSTEM_LINUX		0x1000
++#define SPLAT_SUBSYSTEM_UNKNOWN		0xff00
++
++#endif /* _SPLAT_CTL_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/spl-ctl.h linux-3.2.33-go/include/spl/spl-ctl.h
+--- linux-3.2.33-go.orig/include/spl/spl-ctl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/spl-ctl.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,45 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _DEBUG_CTL_H
++#define _DEBUG_CTL_H
++
++/*
++ * Contains shared definitions which both the user space
++ * and kernel space portions of splat must agree on.
++ */
++typedef struct spl_debug_header {
++        int ph_len;
++        int ph_flags;
++        int ph_subsys;
++        int ph_mask;
++        int ph_cpu_id;
++        int ph_sec;
++        long ph_usec;
++        int ph_stack;
++        int ph_pid;
++        int ph_line_num;
++} spl_debug_header_t;
++
++#endif /* _DEBUG_CTL_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/spl-debug.h linux-3.2.33-go/include/spl/spl-debug.h
+--- linux-3.2.33-go.orig/include/spl/spl-debug.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/spl-debug.h	2012-11-16 23:22:32.402192954 +0100
+@@ -0,0 +1,276 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++/*
++ * Available debug functions.  These function should be used by any
++ * package which needs to integrate with the SPL log infrastructure.
++ *
++ * SDEBUG()		- Log debug message with specified mask.
++ * SDEBUG_LIMIT()	- Log just 1 debug message with specified mask.
++ * SWARN()		- Log a warning message.
++ * SERROR()		- Log an error message.
++ * SEMERG()		- Log an emergency error message.
++ * SCONSOLE()		- Log a generic message to the console.
++ *
++ * SENTRY		- Log entry point to a function.
++ * SEXIT		- Log exit point from a function.
++ * SRETURN(x)		- Log return from a function.
++ * SGOTO(x, y)		- Log goto within a function.
++ */
++
++#ifndef _SPL_DEBUG_INTERNAL_H
++#define _SPL_DEBUG_INTERNAL_H
++
++#include <linux/limits.h>
++#include <linux/sched.h>
++
++#define SS_UNDEFINED	0x00000001
++#define SS_ATOMIC	0x00000002
++#define SS_KOBJ		0x00000004
++#define SS_VNODE	0x00000008
++#define SS_TIME		0x00000010
++#define SS_RWLOCK	0x00000020
++#define SS_THREAD	0x00000040
++#define SS_CONDVAR	0x00000080
++#define SS_MUTEX	0x00000100
++#define SS_RNG		0x00000200
++#define SS_TASKQ	0x00000400
++#define SS_KMEM		0x00000800
++#define SS_DEBUG	0x00001000
++#define SS_GENERIC	0x00002000
++#define SS_PROC		0x00004000
++#define SS_MODULE	0x00008000
++#define SS_CRED		0x00010000
++#define SS_KSTAT	0x00020000
++#define SS_XDR		0x00040000
++#define SS_TSD		0x00080000
++#define SS_ZLIB		0x00100000
++#define SS_USER1	0x01000000
++#define SS_USER2	0x02000000
++#define SS_USER3	0x04000000
++#define SS_USER4	0x08000000
++#define SS_USER5	0x10000000
++#define SS_USER6	0x20000000
++#define SS_USER7	0x40000000
++#define SS_USER8	0x80000000
++#define SS_DEBUG_SUBSYS	SS_UNDEFINED
++
++#define SD_TRACE	0x00000001
++#define SD_INFO		0x00000002
++#define SD_WARNING	0x00000004
++#define SD_ERROR	0x00000008
++#define SD_EMERG	0x00000010
++#define SD_CONSOLE	0x00000020
++#define SD_IOCTL	0x00000040
++#define SD_DPRINTF	0x00000080
++#define SD_OTHER	0x00000100
++#define SD_CANTMASK	(SD_ERROR | SD_EMERG | SD_WARNING | SD_CONSOLE)
++
++/* Debug log support enabled */
++#ifdef DEBUG_LOG
++
++#define __SDEBUG(cdls, subsys, mask, format, a...)			\
++do {									\
++	if (((mask) & SD_CANTMASK) != 0 ||				\
++	    ((spl_debug_mask & (mask)) != 0 &&				\
++	     (spl_debug_subsys & (subsys)) != 0))			\
++		spl_debug_msg(cdls, subsys, mask, __FILE__,		\
++		__FUNCTION__, __LINE__, format, ## a);			\
++} while (0)
++
++#define SDEBUG(mask, format, a...)					\
++	__SDEBUG(NULL, SS_DEBUG_SUBSYS, mask, format, ## a)
++
++#define __SDEBUG_LIMIT(subsys, mask, format, a...)			\
++do {									\
++	static spl_debug_limit_state_t cdls;				\
++									\
++	__SDEBUG(&cdls, subsys, mask, format, ## a);			\
++} while (0)
++
++#define SDEBUG_LIMIT(mask, format, a...)				\
++	__SDEBUG_LIMIT(SS_DEBUG_SUBSYS, mask, format, ## a)
++
++#define SWARN(fmt, a...)		SDEBUG_LIMIT(SD_WARNING, fmt, ## a)
++#define SERROR(fmt, a...)		SDEBUG_LIMIT(SD_ERROR, fmt, ## a)
++#define SEMERG(fmt, a...)		SDEBUG_LIMIT(SD_EMERG, fmt, ## a)
++#define SCONSOLE(mask, fmt, a...)	SDEBUG(SD_CONSOLE | (mask), fmt, ## a)
++
++#define SENTRY				SDEBUG(SD_TRACE, "Process entered\n")
++#define SEXIT				SDEBUG(SD_TRACE, "Process leaving\n")
++
++#define SRETURN(rc)							\
++do {									\
++	typeof(rc) RETURN__ret = (rc);					\
++	SDEBUG(SD_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n",	\
++	    (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);	\
++	return RETURN__ret;						\
++} while (0)
++
++#define SGOTO(label, rc)						\
++do {									\
++	long GOTO__ret = (long)(rc);					\
++	SDEBUG(SD_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n",\
++	    #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,	\
++	    (signed long)GOTO__ret);					\
++	goto label;							\
++} while (0)
++
++typedef struct {
++	unsigned long	cdls_next;
++	int		cdls_count;
++	long		cdls_delay;
++} spl_debug_limit_state_t;
++
++/* Global debug variables */
++extern unsigned long spl_debug_subsys;
++extern unsigned long spl_debug_mask;
++extern unsigned long spl_debug_printk;
++extern int spl_debug_mb;
++extern unsigned int spl_debug_binary;
++extern unsigned int spl_debug_catastrophe;
++extern unsigned int spl_debug_panic_on_bug;
++extern char spl_debug_file_path[PATH_MAX];
++extern unsigned int spl_console_ratelimit;
++extern long spl_console_max_delay;
++extern long spl_console_min_delay;
++extern unsigned int spl_console_backoff;
++extern unsigned int spl_debug_stack;
++
++/* Exported debug functions */
++extern int spl_debug_mask2str(char *str, int size, unsigned long mask, int ss);
++extern int spl_debug_str2mask(unsigned long *mask, const char *str, int ss);
++extern unsigned long spl_debug_set_mask(unsigned long mask);
++extern unsigned long spl_debug_get_mask(void);
++extern unsigned long spl_debug_set_subsys(unsigned long mask);
++extern unsigned long spl_debug_get_subsys(void);
++extern int spl_debug_set_mb(int mb);
++extern int spl_debug_get_mb(void);
++extern int spl_debug_dumplog(int flags);
++extern void spl_debug_dumpstack(struct task_struct *tsk);
++extern void spl_debug_bug(char *file, const char *fn, const int line, int fl);
++extern int spl_debug_msg(void *arg, int subsys, int mask, const char *file,
++    const char *fn, const int line, const char *format, ...);
++extern int spl_debug_clear_buffer(void);
++extern int spl_debug_mark_buffer(char *text);
++
++int spl_debug_init(void);
++void spl_debug_fini(void);
++
++/* Debug log support disabled */
++#else /* DEBUG_LOG */
++
++#define __SDEBUG(x, y, mask, fmt, a...)	((void)0)
++#define SDEBUG(mask, fmt, a...)		((void)0)
++#define SDEBUG_LIMIT(x, y, fmt, a...)	((void)0)
++#define SWARN(fmt, a...)		((void)0)
++#define SERROR(fmt, a...)		((void)0)
++#define SEMERG(fmt, a...)		((void)0)
++#define SCONSOLE(mask, fmt, a...)	((void)0)
++
++#define SENTRY				((void)0)
++#define SEXIT				((void)0)
++#define SRETURN(x)			return (x)
++#define SGOTO(x, y)			{ ((void)(y)); goto x; }
++
++static inline unsigned long
++spl_debug_set_mask(unsigned long mask) {
++	return (0);
++}
++
++static inline unsigned long
++spl_debug_get_mask(void) {
++	return (0);
++}
++
++static inline unsigned long
++spl_debug_set_subsys(unsigned long mask) {
++	return (0);
++}
++
++static inline unsigned long
++spl_debug_get_subsys(void) {
++	return (0);
++}
++
++static inline int
++spl_debug_set_mb(int mb) {
++	return (0);
++}
++
++static inline int
++spl_debug_get_mb(void) {
++	return (0);
++}
++
++static inline int
++spl_debug_dumplog(int flags)
++{
++	return (0);
++}
++
++static inline void
++spl_debug_dumpstack(struct task_struct *tsk)
++{
++	return;
++}
++
++static inline void
++spl_debug_bug(char *file, const char *fn, const int line, int fl)
++{
++	return;
++}
++
++static inline int
++spl_debug_msg(void *arg, int subsys, int mask, const char *file,
++    const char *fn, const int line, const char *format, ...)
++{
++	return (0);
++}
++
++static inline int
++spl_debug_clear_buffer(void)
++{
++	return (0);
++}
++
++static inline int
++spl_debug_mark_buffer(char *text)
++{
++	return (0);
++}
++
++static inline int
++spl_debug_init(void) {
++	return (0);
++}
++
++static inline void
++spl_debug_fini(void) {
++	return;
++}
++
++#endif /* DEBUG_LOG */
++
++#endif /* SPL_DEBUG_INTERNAL_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/spl-device.h linux-3.2.33-go/include/spl/spl-device.h
+--- linux-3.2.33-go.orig/include/spl/spl-device.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/spl-device.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,90 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_DEVICE_H
++#define _SPL_DEVICE_H
++
++#include <linux/device.h>
++
++/*
++ * Preferred API from 2.6.18 to 2.6.26+
++ */
++#ifdef HAVE_DEVICE_CREATE
++
++typedef struct class			spl_class;
++typedef struct device			spl_device;
++
++#define spl_class_create(mod, name)	class_create(mod, name)
++#define spl_class_destroy(cls)		class_destroy(cls)
++
++# ifdef HAVE_5ARGS_DEVICE_CREATE
++# define spl_device_create(cls, parent, devt, drvdata, fmt, args...)          \
++	device_create(cls, parent, devt, drvdata, fmt, ## args)
++# else
++# define spl_device_create(cls, parent, devt, drvdata, fmt, args...)          \
++	device_create(cls, parent, devt, fmt, ## args)
++# endif
++
++#define spl_device_destroy(cls, cls_dev, devt)                                \
++	device_destroy(cls, devt)
++
++/*
++ * Preferred API from 2.6.13 to 2.6.17
++ * Depricated in 2.6.18
++ * Removed in 2.6.26
++ */
++#else
++#ifdef HAVE_CLASS_DEVICE_CREATE
++
++typedef struct class			spl_class;
++typedef struct class_device		spl_device;
++
++#define spl_class_create(mod, name)	class_create(mod, name)
++#define spl_class_destroy(cls)		class_destroy(cls)
++#define spl_device_create(cls, parent, devt, device, fmt, args...)            \
++	class_device_create(cls, parent, devt, device, fmt, ## args)
++#define spl_device_destroy(cls, cls_dev, devt)                                \
++	class_device_unregister(cls_dev)
++
++/*
++ * Prefered API from 2.6.0 to 2.6.12
++ * Depricated in 2.6.13
++ * Removed in 2.6.13
++ */
++#else /* Legacy API */
++
++typedef struct class_simple		spl_class;
++typedef struct class_device		spl_class_device;
++
++#define spl_class_create(mod, name)	class_simple_create(mod, name)
++#define spl_class_destroy(cls)		class_simple_destroy(cls)
++#define spl_device_create(cls, parent, devt, device, fmt, args...)            \
++	class_simple_device_add(cls, devt, device, fmt, ## args)
++#define spl_device_destroy(cls, cls_dev, devt)                                \
++	class_simple_device_remove(devt)
++
++#endif /* HAVE_CLASS_DEVICE_CREATE */
++#endif /* HAVE_DEVICE_CREATE */
++
++#endif /* _SPL_DEVICE_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/spl-trace.h linux-3.2.33-go/include/spl/spl-trace.h
+--- linux-3.2.33-go.orig/include/spl/spl-trace.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/spl-trace.h	2012-11-16 23:22:32.402192954 +0100
+@@ -0,0 +1,132 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_TRACE_H
++#define _SPL_TRACE_H
++
++#define TCD_MAX_PAGES			(5 << (20 - PAGE_SHIFT))
++#define TCD_STOCK_PAGES			(TCD_MAX_PAGES)
++#define TRACE_CONSOLE_BUFFER_SIZE	1024
++
++#define SPL_DEFAULT_MAX_DELAY		(600 * HZ)
++#define SPL_DEFAULT_MIN_DELAY		((HZ + 1) / 2)
++#define SPL_DEFAULT_BACKOFF		2
++
++#define DL_NOTHREAD			0x0001 /* Do not create a new thread */
++#define DL_SINGLE_CPU			0x0002 /* Collect pages from this CPU*/
++
++typedef struct dumplog_priv {
++	wait_queue_head_t	dp_waitq;
++	pid_t			dp_pid;
++	int			dp_flags;
++	atomic_t		dp_done;
++} dumplog_priv_t;
++
++/* Three trace data types */
++typedef enum {
++	TCD_TYPE_PROC,
++	TCD_TYPE_SOFTIRQ,
++	TCD_TYPE_IRQ,
++	TCD_TYPE_MAX
++} tcd_type_t;
++
++union trace_data_union {
++	struct trace_cpu_data {
++		/* pages with trace records not yet processed by tracefiled */
++		struct list_head	tcd_pages;
++		/* number of pages on ->tcd_pages */
++		unsigned long	tcd_cur_pages;
++		/* Max number of pages allowed on ->tcd_pages */
++		unsigned long	tcd_max_pages;
++
++		/*
++		 * preallocated pages to write trace records into. Pages from
++		 * ->tcd_stock_pages are moved to ->tcd_pages by spl_debug_msg().
++		 *
++		 * This list is necessary, because on some platforms it's
++		 * impossible to perform efficient atomic page allocation in a
++		 * non-blockable context.
++		 *
++		 * Such platforms fill ->tcd_stock_pages "on occasion", when
++		 * tracing code is entered in blockable context.
++		 *
++		 * trace_get_tage_try() tries to get a page from
++		 * ->tcd_stock_pages first and resorts to atomic page
++		 * allocation only if this queue is empty. ->tcd_stock_pages
++		 * is replenished when tracing code is entered in blocking
++		 * context (darwin-tracefile.c:trace_get_tcd()). We try to
++		 * maintain TCD_STOCK_PAGES (40 by default) pages in this
++		 * queue. Atomic allocation is only required if more than
++		 * TCD_STOCK_PAGES pagesful are consumed by trace records all
++		 * emitted in non-blocking contexts. Which is quite unlikely.
++		 */
++		struct list_head	tcd_stock_pages;
++		/* number of pages on ->tcd_stock_pages */
++		unsigned long	tcd_cur_stock_pages;
++
++		unsigned short	tcd_shutting_down;
++		unsigned short	tcd_cpu;
++		unsigned short	tcd_type;
++		/* The factors to share debug memory. */
++		unsigned short	tcd_pages_factor;
++
++		/*
++		 * This spinlock is needed to workaround the problem of
++		 * set_cpus_allowed() being GPL-only. Since we cannot
++		 * schedule a thread on a specific CPU when dumping the
++		 * pages, we must use the spinlock for mutual exclusion.
++		 */
++		spinlock_t	tcd_lock;
++		unsigned long	tcd_lock_flags;
++	} tcd;
++	char __pad[L1_CACHE_ALIGN(sizeof(struct trace_cpu_data))];
++};
++
++extern union trace_data_union (*trace_data[TCD_TYPE_MAX])[NR_CPUS];
++
++#define tcd_for_each(tcd, i, j)						\
++    for (i = 0; i < TCD_TYPE_MAX && trace_data[i]; i++)			\
++	for (j = 0, ((tcd) = &(*trace_data[i])[j].tcd);			\
++	     j < num_possible_cpus(); j++, (tcd) = &(*trace_data[i])[j].tcd)
++
++#define tcd_for_each_type_lock(tcd, i, cpu)				\
++    for (i = 0; i < TCD_TYPE_MAX && trace_data[i] &&			\
++	 (tcd = &(*trace_data[i])[cpu].tcd) &&				\
++	 trace_lock_tcd(tcd); trace_unlock_tcd(tcd), i++)
++
++struct trace_page {
++	struct page	*page;    /* page itself */
++	struct list_head linkage;  /* Used by trace_data_union */
++	unsigned int	used;     /* number of bytes used within this page */
++	unsigned short	cpu;      /* cpu that owns this page */
++	unsigned short	type;     /* type(context) of this page */
++};
++
++struct page_collection {
++	struct list_head pc_pages;
++	spinlock_t	pc_lock;
++	int		pc_want_daemon_pages;
++};
++
++#endif /* SPL_TRACE_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/strings.h linux-3.2.33-go/include/spl/strings.h
+--- linux-3.2.33-go.orig/include/spl/strings.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/strings.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_STRINGS_H
++#define _SPL_STRINGS_H
++
++#endif /* SPL_STRINGS_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/acl.h linux-3.2.33-go/include/spl/sys/acl.h
+--- linux-3.2.33-go.orig/include/spl/sys/acl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/acl.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,117 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_ACL_H
++#define _SPL_ACL_H
++
++#include <sys/types.h>
++
++typedef struct ace {
++        uid_t a_who;
++        uint32_t a_access_mask;
++        uint16_t a_flags;
++        uint16_t a_type;
++} ace_t;
++
++typedef struct ace_object {
++        uid_t           a_who;          /* uid or gid */
++        uint32_t        a_access_mask;  /* read,write,... */
++        uint16_t        a_flags;        /* see below */
++        uint16_t        a_type;         /* allow or deny */
++        uint8_t         a_obj_type[16]; /* obj type */
++        uint8_t         a_inherit_obj_type[16];  /* inherit obj */
++} ace_object_t;
++
++#define MAX_ACL_ENTRIES					1024
++
++#define ACE_READ_DATA                                   0x00000001
++#define ACE_LIST_DIRECTORY                              0x00000001
++#define ACE_WRITE_DATA                                  0x00000002
++#define ACE_ADD_FILE                                    0x00000002
++#define ACE_APPEND_DATA                                 0x00000004
++#define ACE_ADD_SUBDIRECTORY                            0x00000004
++#define ACE_READ_NAMED_ATTRS                            0x00000008
++#define ACE_WRITE_NAMED_ATTRS                           0x00000010
++#define ACE_EXECUTE                                     0x00000020
++#define ACE_DELETE_CHILD                                0x00000040
++#define ACE_READ_ATTRIBUTES                             0x00000080
++#define ACE_WRITE_ATTRIBUTES                            0x00000100
++#define ACE_DELETE                                      0x00010000
++#define ACE_READ_ACL                                    0x00020000
++#define ACE_WRITE_ACL                                   0x00040000
++#define ACE_WRITE_OWNER                                 0x00080000
++#define ACE_SYNCHRONIZE                                 0x00100000
++
++#define ACE_FILE_INHERIT_ACE                            0x0001
++#define ACE_DIRECTORY_INHERIT_ACE                       0x0002
++#define ACE_NO_PROPAGATE_INHERIT_ACE                    0x0004
++#define ACE_INHERIT_ONLY_ACE                            0x0008
++#define ACE_SUCCESSFUL_ACCESS_ACE_FLAG                  0x0010
++#define ACE_FAILED_ACCESS_ACE_FLAG                      0x0020
++#define ACE_IDENTIFIER_GROUP                            0x0040
++#define ACE_INHERITED_ACE                               0x0080
++#define ACE_OWNER                                       0x1000
++#define ACE_GROUP                                       0x2000
++#define ACE_EVERYONE                                    0x4000
++
++#define ACE_ACCESS_ALLOWED_ACE_TYPE                     0x0000
++#define ACE_ACCESS_DENIED_ACE_TYPE                      0x0001
++#define ACE_SYSTEM_AUDIT_ACE_TYPE                       0x0002
++#define ACE_SYSTEM_ALARM_ACE_TYPE                       0x0003
++
++#define ACL_AUTO_INHERIT                                0x0001
++#define ACL_PROTECTED                                   0x0002
++#define ACL_DEFAULTED                                   0x0004
++#define ACL_FLAGS_ALL (ACL_AUTO_INHERIT|ACL_PROTECTED|ACL_DEFAULTED)
++
++#define ACE_ACCESS_ALLOWED_COMPOUND_ACE_TYPE            0x04
++#define ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE              0x05
++#define ACE_ACCESS_DENIED_OBJECT_ACE_TYPE               0x06
++#define ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE                0x07
++#define ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE                0x08
++#define ACE_ACCESS_ALLOWED_CALLBACK_ACE_TYPE            0x09
++#define ACE_ACCESS_DENIED_CALLBACK_ACE_TYPE             0x0A
++#define ACE_ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE     0x0B
++#define ACE_ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE      0x0C
++#define ACE_SYSTEM_AUDIT_CALLBACK_ACE_TYPE              0x0D
++#define ACE_SYSTEM_ALARM_CALLBACK_ACE_TYPE              0x0E
++#define ACE_SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE       0x0F
++#define ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE       0x10
++
++#define ACE_ALL_TYPES   0x001F
++
++#define ACE_TYPE_FLAGS (ACE_OWNER|ACE_GROUP|ACE_EVERYONE|ACE_IDENTIFIER_GROUP)
++
++#define ACE_ALL_PERMS   (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
++     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \
++     ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \
++     ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_WRITE_ACL| \
++     ACE_WRITE_OWNER|ACE_SYNCHRONIZE)
++
++#define VSA_ACE                                         0x0010
++#define VSA_ACECNT                                      0x0020
++#define VSA_ACE_ALLTYPES                                0x0040
++#define VSA_ACE_ACLFLAGS                                0x0080
++
++#endif /* _SPL_ACL_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/acl_impl.h linux-3.2.33-go/include/spl/sys/acl_impl.h
+--- linux-3.2.33-go.orig/include/spl/sys/acl_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/acl_impl.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_ACL_IMPL_H
++#define _SPL_ACL_IMPL_H
++
++#endif /* _SPL_ACL_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/atomic.h linux-3.2.33-go/include/spl/sys/atomic.h
+--- linux-3.2.33-go.orig/include/spl/sys/atomic.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/atomic.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,296 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_ATOMIC_H
++#define _SPL_ATOMIC_H
++
++#include <linux/module.h>
++#include <linux/spinlock.h>
++#include <sys/types.h>
++
++#ifndef HAVE_ATOMIC64_CMPXCHG
++#define atomic64_cmpxchg(v, o, n)       (cmpxchg(&((v)->counter), (o), (n)))
++#endif
++
++#ifndef HAVE_ATOMIC64_XCHG
++#define atomic64_xchg(v, n)             (xchg(&((v)->counter), n))
++#endif
++
++/*
++ * Two approaches to atomic operations are implemented each with its
++ * own benefits are drawbacks imposed by the Solaris API.  Neither
++ * approach handles the issue of word breaking when using a 64-bit
++ * atomic variable on a 32-bit arch.  The Solaris API would need to
++ * add an atomic read call to correctly support this.
++ *
++ * When ATOMIC_SPINLOCK is defined all atomic operations will be
++ * serialized through global spin locks.  This is bad for performance
++ * but it does allow a simple generic implementation.
++ *
++ * When ATOMIC_SPINLOCK is not defined the Linux atomic operations
++ * are used.  This is safe as long as the core Linux implementation
++ * doesn't change because we are relying on the fact that an atomic
++ * type is really just a uint32 or uint64.  If this changes at some
++ * point in the future we need to fall-back to the spin approach.
++ */
++#ifdef ATOMIC_SPINLOCK
++extern spinlock_t atomic32_lock;
++extern spinlock_t atomic64_lock;
++
++static __inline__ void
++atomic_inc_32(volatile uint32_t *target)
++{
++	spin_lock(&atomic32_lock);
++	(*target)++;
++	spin_unlock(&atomic32_lock);
++}
++
++static __inline__ void
++atomic_dec_32(volatile uint32_t *target)
++{
++	spin_lock(&atomic32_lock);
++	(*target)--;
++	spin_unlock(&atomic32_lock);
++}
++
++static __inline__ void
++atomic_add_32(volatile uint32_t *target, int32_t delta)
++{
++	spin_lock(&atomic32_lock);
++	*target += delta;
++	spin_unlock(&atomic32_lock);
++}
++
++static __inline__ void
++atomic_sub_32(volatile uint32_t *target, int32_t delta)
++{
++	spin_lock(&atomic32_lock);
++	*target -= delta;
++	spin_unlock(&atomic32_lock);
++}
++
++static __inline__ uint32_t
++atomic_inc_32_nv(volatile uint32_t *target)
++{
++	uint32_t nv;
++
++	spin_lock(&atomic32_lock);
++	nv = ++(*target);
++	spin_unlock(&atomic32_lock);
++
++	return nv;
++}
++
++static __inline__ uint32_t
++atomic_dec_32_nv(volatile uint32_t *target)
++{
++	uint32_t nv;
++
++	spin_lock(&atomic32_lock);
++	nv = --(*target);
++	spin_unlock(&atomic32_lock);
++
++	return nv;
++}
++
++static __inline__ uint32_t
++atomic_add_32_nv(volatile uint32_t *target, uint32_t delta)
++{
++	uint32_t nv;
++
++	spin_lock(&atomic32_lock);
++	*target += delta;
++	nv = *target;
++	spin_unlock(&atomic32_lock);
++
++	return nv;
++}
++
++static __inline__ uint32_t
++atomic_sub_32_nv(volatile uint32_t *target, uint32_t delta)
++{
++	uint32_t nv;
++
++	spin_lock(&atomic32_lock);
++	*target -= delta;
++	nv = *target;
++	spin_unlock(&atomic32_lock);
++
++	return nv;
++}
++
++static __inline__ uint32_t
++atomic_cas_32(volatile uint32_t *target,  uint32_t cmp,
++              uint32_t newval)
++{
++	uint32_t rc;
++
++	spin_lock(&atomic32_lock);
++	rc = *target;
++	if (*target == cmp)
++		*target = newval;
++
++	spin_unlock(&atomic32_lock);
++
++	return rc;
++}
++
++static __inline__ void
++atomic_inc_64(volatile uint64_t *target)
++{
++	spin_lock(&atomic64_lock);
++	(*target)++;
++	spin_unlock(&atomic64_lock);
++}
++
++static __inline__ void
++atomic_dec_64(volatile uint64_t *target)
++{
++	spin_lock(&atomic64_lock);
++	(*target)--;
++	spin_unlock(&atomic64_lock);
++}
++
++static __inline__ void
++atomic_add_64(volatile uint64_t *target, uint64_t delta)
++{
++	spin_lock(&atomic64_lock);
++	*target += delta;
++	spin_unlock(&atomic64_lock);
++}
++
++static __inline__ void
++atomic_sub_64(volatile uint64_t *target, uint64_t delta)
++{
++	spin_lock(&atomic64_lock);
++	*target -= delta;
++	spin_unlock(&atomic64_lock);
++}
++
++static __inline__ uint64_t
++atomic_inc_64_nv(volatile uint64_t *target)
++{
++	uint64_t nv;
++
++	spin_lock(&atomic64_lock);
++	nv = ++(*target);
++	spin_unlock(&atomic64_lock);
++
++	return nv;
++}
++
++static __inline__ uint64_t
++atomic_dec_64_nv(volatile uint64_t *target)
++{
++	uint64_t nv;
++
++	spin_lock(&atomic64_lock);
++	nv = --(*target);
++	spin_unlock(&atomic64_lock);
++
++	return nv;
++}
++
++static __inline__ uint64_t
++atomic_add_64_nv(volatile uint64_t *target, uint64_t delta)
++{
++	uint64_t nv;
++
++	spin_lock(&atomic64_lock);
++	*target += delta;
++	nv = *target;
++	spin_unlock(&atomic64_lock);
++
++	return nv;
++}
++
++static __inline__ uint64_t
++atomic_sub_64_nv(volatile uint64_t *target, uint64_t delta)
++{
++	uint64_t nv;
++
++	spin_lock(&atomic64_lock);
++	*target -= delta;
++	nv = *target;
++	spin_unlock(&atomic64_lock);
++
++	return nv;
++}
++
++static __inline__ uint64_t
++atomic_cas_64(volatile uint64_t *target,  uint64_t cmp,
++              uint64_t newval)
++{
++	uint64_t rc;
++
++	spin_lock(&atomic64_lock);
++	rc = *target;
++	if (*target == cmp)
++		*target = newval;
++	spin_unlock(&atomic64_lock);
++
++	return rc;
++}
++
++
++#else /* ATOMIC_SPINLOCK */
++
++#define atomic_inc_32(v)	atomic_inc((atomic_t *)(v))
++#define atomic_dec_32(v)	atomic_dec((atomic_t *)(v))
++#define atomic_add_32(v, i)	atomic_add((i), (atomic_t *)(v))
++#define atomic_sub_32(v, i)	atomic_sub((i), (atomic_t *)(v))
++#define atomic_inc_32_nv(v)	atomic_inc_return((atomic_t *)(v))
++#define atomic_dec_32_nv(v)	atomic_dec_return((atomic_t *)(v))
++#define atomic_add_32_nv(v, i)	atomic_add_return((i), (atomic_t *)(v))
++#define atomic_sub_32_nv(v, i)	atomic_sub_return((i), (atomic_t *)(v))
++#define atomic_cas_32(v, x, y)	atomic_cmpxchg((atomic_t *)(v), x, y)
++#define atomic_inc_64(v)	atomic64_inc((atomic64_t *)(v))
++#define atomic_dec_64(v)	atomic64_dec((atomic64_t *)(v))
++#define atomic_add_64(v, i)	atomic64_add((i), (atomic64_t *)(v))
++#define atomic_sub_64(v, i)	atomic64_sub((i), (atomic64_t *)(v))
++#define atomic_inc_64_nv(v)	atomic64_inc_return((atomic64_t *)(v))
++#define atomic_dec_64_nv(v)	atomic64_dec_return((atomic64_t *)(v))
++#define atomic_add_64_nv(v, i)	atomic64_add_return((i), (atomic64_t *)(v))
++#define atomic_sub_64_nv(v, i)	atomic64_sub_return((i), (atomic64_t *)(v))
++#define atomic_cas_64(v, x, y)	atomic64_cmpxchg((atomic64_t *)(v), x, y)
++
++#endif /* ATOMIC_SPINLOCK */
++
++#ifdef _LP64
++static __inline__ void *
++atomic_cas_ptr(volatile void *target,  void *cmp, void *newval)
++{
++	return (void *)atomic_cas_64((volatile uint64_t *)target,
++	                             (uint64_t)cmp, (uint64_t)newval);
++}
++#else /* _LP64 */
++static __inline__ void *
++atomic_cas_ptr(volatile void *target,  void *cmp, void *newval)
++{
++	return (void *)atomic_cas_32((volatile uint32_t *)target,
++	                             (uint32_t)cmp, (uint32_t)newval);
++}
++#endif /* _LP64 */
++
++#endif  /* _SPL_ATOMIC_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/attr.h linux-3.2.33-go/include/spl/sys/attr.h
+--- linux-3.2.33-go.orig/include/spl/sys/attr.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/attr.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_ATTR_H
++#define _SPL_ATTR_H
++
++#endif /* SPL_ATTR_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/bitmap.h linux-3.2.33-go/include/spl/sys/bitmap.h
+--- linux-3.2.33-go.orig/include/spl/sys/bitmap.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/bitmap.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_BITMAP_H
++#define _SPL_BITMAP_H
++
++#endif /* SPL_BITMAP_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/bootconf.h linux-3.2.33-go/include/spl/sys/bootconf.h
+--- linux-3.2.33-go.orig/include/spl/sys/bootconf.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/bootconf.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_BOOTCONF_H
++#define _SPL_BOOTCONF_H
++
++#endif /* SPL_BOOTCONF_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/bootprops.h linux-3.2.33-go/include/spl/sys/bootprops.h
+--- linux-3.2.33-go.orig/include/spl/sys/bootprops.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/bootprops.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_BOOTPROPS_H
++#define _SPL_BOOTPROPS_H
++
++#endif /* SPL_BOOTPROPS_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/buf.h linux-3.2.33-go/include/spl/sys/buf.h
+--- linux-3.2.33-go.orig/include/spl/sys/buf.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/buf.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_BUF_H
++#define _SPL_BUF_H
++
++#endif /* SPL_BUF_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/byteorder.h linux-3.2.33-go/include/spl/sys/byteorder.h
+--- linux-3.2.33-go.orig/include/spl/sys/byteorder.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/byteorder.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,37 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_BYTEORDER_H
++#define _SPL_BYTEORDER_H
++
++#include <asm/byteorder.h>
++
++#define LE_16(x)	cpu_to_le16(x)
++#define LE_32(x)	cpu_to_le32(x)
++#define LE_64(x)	cpu_to_le64(x)
++#define BE_16(x)	cpu_to_be16(x)
++#define BE_32(x)	cpu_to_be32(x)
++#define BE_64(x)	cpu_to_be64(x)
++
++#endif /* SPL_BYTEORDER_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/callb.h linux-3.2.33-go/include/spl/sys/callb.h
+--- linux-3.2.33-go.orig/include/spl/sys/callb.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/callb.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,55 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_CALLB_H
++#define _SPL_CALLB_H
++
++#include <linux/module.h>
++#include <sys/mutex.h>
++
++#define CALLB_CPR_ASSERT(cp)		ASSERT(MUTEX_HELD((cp)->cc_lockp));
++
++typedef struct callb_cpr {
++        kmutex_t        *cc_lockp;
++} callb_cpr_t;
++
++#define CALLB_CPR_INIT(cp, lockp, func, name)   {               \
++        (cp)->cc_lockp = lockp;                                 \
++}
++
++#define CALLB_CPR_SAFE_BEGIN(cp) {                              \
++	CALLB_CPR_ASSERT(cp);					\
++}
++
++#define CALLB_CPR_SAFE_END(cp, lockp) {                         \
++	CALLB_CPR_ASSERT(cp);					\
++}
++
++#define CALLB_CPR_EXIT(cp) {                                    \
++        ASSERT(MUTEX_HELD((cp)->cc_lockp));                     \
++        mutex_exit((cp)->cc_lockp);                             \
++}
++
++#endif  /* _SPL_CALLB_H */
++
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/cmn_err.h linux-3.2.33-go/include/spl/sys/cmn_err.h
+--- linux-3.2.33-go.orig/include/spl/sys/cmn_err.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/cmn_err.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,42 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_CMN_ERR_H
++#define _SPL_CMN_ERR_H
++
++#include <sys/varargs.h>
++
++#define CE_CONT         0       /* continuation         */
++#define CE_NOTE         1       /* notice               */
++#define CE_WARN         2       /* warning              */
++#define CE_PANIC        3       /* panic                */
++#define CE_IGNORE       4       /* print nothing        */
++
++extern void cmn_err(int, const char *, ...);
++extern void vcmn_err(int, const char *, __va_list);
++extern void vpanic(const char *, __va_list);
++
++#define fm_panic	panic
++
++#endif /* SPL_CMN_ERR_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/compress.h linux-3.2.33-go/include/spl/sys/compress.h
+--- linux-3.2.33-go.orig/include/spl/sys/compress.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/compress.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_COMPRESS_H
++#define _SPL_COMPRESS_H
++
++#endif /* SPL_COMPRESS_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/condvar.h linux-3.2.33-go/include/spl/sys/condvar.h
+--- linux-3.2.33-go.orig/include/spl/sys/condvar.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/condvar.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,71 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_CONDVAR_H
++#define _SPL_CONDVAR_H
++
++#include <linux/module.h>
++#include <linux/wait.h>
++#include <sys/kmem.h>
++#include <sys/mutex.h>
++
++/*
++ * The kcondvar_t struct is protected by mutex taken externally before
++ * calling any of the wait/signal funs, and passed into the wait funs.
++ */
++#define CV_MAGIC			0x346545f4
++#define CV_DESTROY			0x346545f5
++
++typedef struct {
++	int cv_magic;
++	wait_queue_head_t cv_event;
++	wait_queue_head_t cv_destroy;
++	atomic_t cv_refs;
++	atomic_t cv_waiters;
++	kmutex_t *cv_mutex;
++} kcondvar_t;
++
++typedef enum { CV_DEFAULT=0, CV_DRIVER } kcv_type_t;
++
++extern void __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg);
++extern void __cv_destroy(kcondvar_t *cvp);
++extern void __cv_wait(kcondvar_t *cvp, kmutex_t *mp);
++extern void __cv_wait_interruptible(kcondvar_t *cvp, kmutex_t *mp);
++extern clock_t __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time);
++extern clock_t __cv_timedwait_interruptible(kcondvar_t *cvp, kmutex_t *mp,
++	clock_t exp_time);
++extern void __cv_signal(kcondvar_t *cvp);
++extern void __cv_broadcast(kcondvar_t *cvp);
++
++#define cv_init(cvp, name, type, arg)		__cv_init(cvp, name, type, arg)
++#define cv_destroy(cvp)				__cv_destroy(cvp)
++#define cv_wait(cvp, mp)			__cv_wait(cvp, mp)
++#define cv_wait_interruptible(cvp, mp)		__cv_wait_interruptible(cvp,mp)
++#define cv_timedwait(cvp, mp, t)		__cv_timedwait(cvp, mp, t)
++#define cv_timedwait_interruptible(cvp, mp, t)                                \
++	__cv_timedwait_interruptible(cvp, mp, t)
++#define cv_signal(cvp)				__cv_signal(cvp)
++#define cv_broadcast(cvp)			__cv_broadcast(cvp)
++
++#endif /* _SPL_CONDVAR_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/conf.h linux-3.2.33-go/include/spl/sys/conf.h
+--- linux-3.2.33-go.orig/include/spl/sys/conf.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/conf.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_CONF_H
++#define _SPL_CONF_H
++
++#endif /* SPL_CONF_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/console.h linux-3.2.33-go/include/spl/sys/console.h
+--- linux-3.2.33-go.orig/include/spl/sys/console.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/console.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,44 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef	_SPL_CONSOLE_H
++#define	_SPL_CONSOLE_H
++
++void
++console_vprintf(const char *fmt, va_list args)
++{
++        vprintk(fmt, args);
++}
++
++void
++console_printf(const char *fmt, ...)
++{
++        va_list args;
++
++        va_start(args, fmt);
++        console_vprintf(fmt, args);
++        va_end(args);
++}
++
++#endif /* _SPL_CONSOLE_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/cpupart.h linux-3.2.33-go/include/spl/sys/cpupart.h
+--- linux-3.2.33-go.orig/include/spl/sys/cpupart.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/cpupart.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_CPUPART_H
++#define _SPL_CPUPART_H
++
++#endif /* SPL_CPUPART_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/cpuvar.h linux-3.2.33-go/include/spl/sys/cpuvar.h
+--- linux-3.2.33-go.orig/include/spl/sys/cpuvar.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/cpuvar.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_CPUVAR_H
++#define _SPL_CPUVAR_H
++
++#endif /* SPL_CPUVAR_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/crc32.h linux-3.2.33-go/include/spl/sys/crc32.h
+--- linux-3.2.33-go.orig/include/spl/sys/crc32.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/crc32.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_CRC32_H
++#define _SPL_CRC32_H
++
++#endif /* SPL_CRC32_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/cred.h linux-3.2.33-go/include/spl/sys/cred.h
+--- linux-3.2.33-go.orig/include/spl/sys/cred.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/cred.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,62 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_CRED_H
++#define _SPL_CRED_H
++
++#include <linux/module.h>
++#include <sys/types.h>
++#include <sys/vfs.h>
++
++#ifdef HAVE_CRED_STRUCT
++
++typedef struct cred cred_t;
++
++#define kcred		((cred_t *)(init_task.cred))
++#define CRED()		((cred_t *)current_cred())
++
++#else
++
++typedef struct task_struct cred_t;
++
++#define kcred		((cred_t *)&init_task)
++#define CRED()		((cred_t *)current)
++
++#endif /* HAVE_CRED_STRUCT */
++
++extern void crhold(cred_t *cr);
++extern void crfree(cred_t *cr);
++extern uid_t crgetuid(const cred_t *cr);
++extern uid_t crgetruid(const cred_t *cr);
++extern uid_t crgetsuid(const cred_t *cr);
++extern uid_t crgetfsuid(const cred_t *cr);
++extern gid_t crgetgid(const cred_t *cr);
++extern gid_t crgetrgid(const cred_t *cr);
++extern gid_t crgetsgid(const cred_t *cr);
++extern gid_t crgetfsgid(const cred_t *cr);
++extern int crgetngroups(const cred_t *cr);
++extern gid_t * crgetgroups(const cred_t *cr);
++extern int groupmember(gid_t gid, const cred_t *cr);
++
++#endif  /* _SPL_CRED_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/ctype.h linux-3.2.33-go/include/spl/sys/ctype.h
+--- linux-3.2.33-go.orig/include/spl/sys/ctype.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/ctype.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,30 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_CTYPE_H
++#define _SPL_CTYPE_H
++
++#include <linux/ctype.h>
++
++#endif /* SPL_CTYPE_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/ddi.h linux-3.2.33-go/include/spl/sys/ddi.h
+--- linux-3.2.33-go.orig/include/spl/sys/ddi.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/ddi.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_DDI_H
++#define _SPL_DDI_H
++
++#endif /* SPL_DDI_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/debug.h linux-3.2.33-go/include/spl/sys/debug.h
+--- linux-3.2.33-go.orig/include/spl/sys/debug.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/debug.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,142 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++/*
++ * Available Solaris debug functions.  All of the ASSERT() macros will be
++ * compiled out when NDEBUG is defined, this is the default behavior for
++ * the SPL.  To enable assertions use the --enable-debug with configure.
++ * The VERIFY() functions are never compiled out and cannot be disabled.
++ *
++ * PANIC()	- Panic the node and print message.
++ * ASSERT()	- Assert X is true, if not panic.
++ * ASSERTF()	- Assert X is true, if not panic and print message.
++ * ASSERTV()	- Wraps a variable declaration which is only used by ASSERT().
++ * ASSERT3S()	- Assert signed X OP Y is true, if not panic.
++ * ASSERT3U()	- Assert unsigned X OP Y is true, if not panic.
++ * ASSERT3P()	- Assert pointer X OP Y is true, if not panic.
++ * VERIFY()	- Verify X is true, if not panic.
++ * VERIFY3S()	- Verify signed X OP Y is true, if not panic.
++ * VERIFY3U()	- Verify unsigned X OP Y is true, if not panic.
++ * VERIFY3P()	- Verify pointer X OP Y is true, if not panic.
++ */
++
++#ifndef _SPL_DEBUG_H
++#define _SPL_DEBUG_H
++
++#include <spl-debug.h>
++
++#ifdef NDEBUG /* Debugging Disabled */
++
++/* Define SPL_DEBUG_STR to make clear which ASSERT definitions are used */
++#define SPL_DEBUG_STR	""
++
++#define PANIC(fmt, a...)						\
++do {									\
++	printk(KERN_EMERG fmt, ## a);					\
++	spl_debug_bug(__FILE__, __FUNCTION__, __LINE__, 0);		\
++} while (0)
++
++#define __ASSERT(x)			((void)0)
++#define ASSERT(x)			((void)0)
++#define ASSERTF(x, y, z...)		((void)0)
++#define ASSERTV(x)
++#define VERIFY(cond)							\
++do {									\
++	if (unlikely(!(cond)))						\
++		PANIC("VERIFY(" #cond ") failed\n");			\
++} while (0)
++
++#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE, FMT, CAST)			\
++do {									\
++	if (!((TYPE)(LEFT) OP (TYPE)(RIGHT)))				\
++		PANIC("VERIFY3(" #LEFT " " #OP " " #RIGHT ") "		\
++		    "failed (" FMT " " #OP " " FMT ")\n",		\
++		    CAST (LEFT), CAST (RIGHT));				\
++} while (0)
++
++#define VERIFY3S(x,y,z)	VERIFY3_IMPL(x, y, z, int64_t, "%lld", (long long))
++#define VERIFY3U(x,y,z)	VERIFY3_IMPL(x, y, z, uint64_t, "%llu",		\
++				    (unsigned long long))
++#define VERIFY3P(x,y,z)	VERIFY3_IMPL(x, y, z, uintptr_t, "%p", (void *))
++
++#define ASSERT3S(x,y,z)	((void)0)
++#define ASSERT3U(x,y,z)	((void)0)
++#define ASSERT3P(x,y,z)	((void)0)
++
++#else /* Debugging Enabled */
++
++/* Define SPL_DEBUG_STR to make clear which ASSERT definitions are used */
++#define SPL_DEBUG_STR	" (DEBUG mode)"
++
++#define PANIC(fmt, a...)						\
++do {									\
++	spl_debug_msg(NULL, 0, 0,					\
++	     __FILE__, __FUNCTION__, __LINE__,	fmt, ## a);		\
++	spl_debug_bug(__FILE__, __FUNCTION__, __LINE__, 0);		\
++} while (0)
++
++/* ASSERTION that is safe to use within the debug system */
++#define __ASSERT(cond)							\
++do {									\
++	if (unlikely(!(cond))) {					\
++	    printk(KERN_EMERG "ASSERTION(" #cond ") failed\n");		\
++	    BUG();							\
++	}								\
++} while (0)
++
++/* ASSERTION that will debug log used outside the debug sysytem */
++#define ASSERT(cond)							\
++do {									\
++	if (unlikely(!(cond)))						\
++		PANIC("ASSERTION(" #cond ") failed\n");			\
++} while (0)
++
++#define ASSERTF(cond, fmt, a...)					\
++do {									\
++	if (unlikely(!(cond)))						\
++		PANIC("ASSERTION(" #cond ") failed: " fmt, ## a);	\
++} while (0)
++
++#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE, FMT, CAST)			\
++do {									\
++	if (!((TYPE)(LEFT) OP (TYPE)(RIGHT)))				\
++		PANIC("VERIFY3(" #LEFT " " #OP " " #RIGHT ") "		\
++		    "failed (" FMT " " #OP " " FMT ")\n",		\
++		    CAST (LEFT), CAST (RIGHT));				\
++} while (0)
++
++#define VERIFY3S(x,y,z)	VERIFY3_IMPL(x, y, z, int64_t, "%lld", (long long))
++#define VERIFY3U(x,y,z)	VERIFY3_IMPL(x, y, z, uint64_t, "%llu",		\
++				    (unsigned long long))
++#define VERIFY3P(x,y,z)	VERIFY3_IMPL(x, y, z, uintptr_t, "%p", (void *))
++
++#define ASSERT3S(x,y,z)	VERIFY3S(x, y, z)
++#define ASSERT3U(x,y,z)	VERIFY3U(x, y, z)
++#define ASSERT3P(x,y,z)	VERIFY3P(x, y, z)
++
++#define ASSERTV(x)	x
++#define VERIFY(x)	ASSERT(x)
++
++#endif /* NDEBUG */
++#endif /* SPL_DEBUG_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/dirent.h linux-3.2.33-go/include/spl/sys/dirent.h
+--- linux-3.2.33-go.orig/include/spl/sys/dirent.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/dirent.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_DIRENT_H
++#define _SPL_DIRENT_H
++
++#endif /* SPL_DIRENT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/disp.h linux-3.2.33-go/include/spl/sys/disp.h
+--- linux-3.2.33-go.orig/include/spl/sys/disp.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/disp.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,33 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_DISP_H
++#define _SPL_DISP_H
++
++#include <linux/preempt.h>
++
++#define	kpreempt_disable()	preempt_disable()
++#define	kpreempt_enable()	preempt_enable()
++
++#endif /* SPL_DISP_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/dkio.h linux-3.2.33-go/include/spl/sys/dkio.h
+--- linux-3.2.33-go.orig/include/spl/sys/dkio.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/dkio.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,38 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_DKIO_H
++#define	_SPL_DKIO_H
++
++struct dk_callback {
++	void (*dkc_callback)(void *dkc_cookie, int error);
++	void *dkc_cookie;
++	int dkc_flag;
++};
++
++#define	DKIOC			(0x04 << 8)
++#define	DKIOCFLUSHWRITECACHE	(DKIOC | 34)
++#define	DKIOCTRIM		(DKIOC | 35)
++
++#endif /* _SPL_DKIO_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/dklabel.h linux-3.2.33-go/include/spl/sys/dklabel.h
+--- linux-3.2.33-go.orig/include/spl/sys/dklabel.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/dklabel.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_DKLABEL_H
++#define	_SPL_DKLABEL_H
++
++#endif	/* _SPL_DKLABEL_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/dnlc.h linux-3.2.33-go/include/spl/sys/dnlc.h
+--- linux-3.2.33-go.orig/include/spl/sys/dnlc.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/dnlc.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,46 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_DNLC_H
++#define _SPL_DNLC_H
++
++/*
++ * Reduce the dcache and icache then reap the free'd slabs.  Note the
++ * interface takes a reclaim percentage but we don't have easy access to
++ * the total number of entries to calculate the reclaim count.  However,
++ * in practice this doesn't need to be even close to correct.  We simply
++ * need to reclaim some useful fraction of the cache.  The caller can
++ * determine if more needs to be done.
++ */
++static inline void
++dnlc_reduce_cache(void *reduce_percent)
++{
++	int nr = (uintptr_t)reduce_percent * 10000;
++
++	shrink_dcache_memory(nr, GFP_KERNEL);
++	shrink_icache_memory(nr, GFP_KERNEL);
++	kmem_reap();
++}
++
++#endif /* SPL_DNLC_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/dumphdr.h linux-3.2.33-go/include/spl/sys/dumphdr.h
+--- linux-3.2.33-go.orig/include/spl/sys/dumphdr.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/dumphdr.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_DUMPHDR_H
++#define _SPL_DUMPHDR_H
++
++#endif /* SPL_DUMPHDR_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/efi_partition.h linux-3.2.33-go/include/spl/sys/efi_partition.h
+--- linux-3.2.33-go.orig/include/spl/sys/efi_partition.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/efi_partition.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_EFI_PARTITION_H
++#define _SPL_EFI_PARTITION_H
++
++#endif /* SPL_EFI_PARTITION_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/errno.h linux-3.2.33-go/include/spl/sys/errno.h
+--- linux-3.2.33-go.orig/include/spl/sys/errno.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/errno.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_ERRNO_H
++#define _SPL_ERRNO_H
++
++#endif /* SPL_ERRNO_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/extdirent.h linux-3.2.33-go/include/spl/sys/extdirent.h
+--- linux-3.2.33-go.orig/include/spl/sys/extdirent.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/extdirent.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,29 @@
++/*****************************************************************************\
++ *  Copyright (C) 2010 Lawrence Livermore National Security, LLC.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef	_SPL_EXTDIRENT_H
++#define	_SPL_EXTDIRENT_H
++
++#define	ED_CASE_CONFLICT	0x10
++
++#endif /* _SPL_EXTDIRENT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/fcntl.h linux-3.2.33-go/include/spl/sys/fcntl.h
+--- linux-3.2.33-go.orig/include/spl/sys/fcntl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/fcntl.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,37 @@
++/*****************************************************************************\
++ *  Copyright (C) 2010 Lawrence Livermore National Security, LLC.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_FCNTL_H
++#define _SPL_FCNTL_H
++
++#include <asm/fcntl.h>
++
++#define F_FREESP 11
++
++#ifdef CONFIG_64BIT
++typedef struct flock flock64_t;
++#else
++typedef struct flock64 flock64_t;
++#endif /* CONFIG_64BIT */
++
++#endif /* _SPL_FCNTL_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/file.h linux-3.2.33-go/include/spl/sys/file.h
+--- linux-3.2.33-go.orig/include/spl/sys/file.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/file.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,31 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_FILE_H
++#define _SPL_FILE_H
++
++#define	FIGNORECASE	0x00080000
++#define	FKIOCTL		0x80000000
++
++#endif /* SPL_FILE_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/fm/protocol.h linux-3.2.33-go/include/spl/sys/fm/protocol.h
+--- linux-3.2.33-go.orig/include/spl/sys/fm/protocol.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/fm/protocol.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef	_SPL_FM_PROTOCOL_H
++#define	_SPL_FM_PROTOCOL_H
++
++#endif /* _SPL_FM_PROTOCOL_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/fm/util.h linux-3.2.33-go/include/spl/sys/fm/util.h
+--- linux-3.2.33-go.orig/include/spl/sys/fm/util.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/fm/util.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef	_SPL_FM_UTIL_H
++#define	_SPL_FM_UTIL_H
++
++#endif /* _SPL_FM_UTIL_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/fs/swapnode.h linux-3.2.33-go/include/spl/sys/fs/swapnode.h
+--- linux-3.2.33-go.orig/include/spl/sys/fs/swapnode.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/fs/swapnode.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_SWAPNODE_H
++#define _SPL_SWAPNODE_H
++
++#endif /* SPL_SWAPNODE_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/idmap.h linux-3.2.33-go/include/spl/sys/idmap.h
+--- linux-3.2.33-go.orig/include/spl/sys/idmap.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/idmap.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,29 @@
++/*****************************************************************************\
++ *  Copyright (C) 2010 Lawrence Livermore National Security, LLC.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_IDMAP_H
++#define _SPL_IDMAP_H
++
++#define IDMAP_WK_CREATOR_OWNER_UID	2147483648U
++
++#endif /* SPL_IDMAP_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/int_limits.h linux-3.2.33-go/include/spl/sys/int_limits.h
+--- linux-3.2.33-go.orig/include/spl/sys/int_limits.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/int_limits.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_INT_LIMITS_H
++#define _SPL_INT_LIMITS_H
++
++#endif /* SPL_INT_LIMITS_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/inttypes.h linux-3.2.33-go/include/spl/sys/inttypes.h
+--- linux-3.2.33-go.orig/include/spl/sys/inttypes.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/inttypes.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_INTTYPES_H
++#define _SPL_INTTYPES_H
++
++#endif /* SPL_INTTYPES_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/int_types.h linux-3.2.33-go/include/spl/sys/int_types.h
+--- linux-3.2.33-go.orig/include/spl/sys/int_types.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/int_types.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,30 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_INT_TYPES_H
++#define _SPL_INT_TYPES_H
++
++#include <sys/inttypes.h>
++
++#endif /* SPL_INT_TYPES_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/isa_defs.h linux-3.2.33-go/include/spl/sys/isa_defs.h
+--- linux-3.2.33-go.orig/include/spl/sys/isa_defs.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/isa_defs.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,120 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef	_SPL_ISA_DEFS_H
++#define	_SPL_ISA_DEFS_H
++
++/* x86_64 arch specific defines */
++#if defined(__x86_64) || defined(__x86_64__)
++
++#if !defined(__x86_64)
++#define __x86_64
++#endif
++
++#if !defined(__amd64)
++#define __amd64
++#endif
++
++#if !defined(__x86)
++#define __x86
++#endif
++
++#if !defined(_LP64)
++#define _LP64
++#endif
++
++/* i386 arch specific defines */
++#elif defined(__i386) || defined(__i386__)
++
++#if !defined(__i386)
++#define __i386
++#endif
++
++#if !defined(__x86)
++#define __x86
++#endif
++
++#if !defined(_ILP32)
++#define _ILP32
++#endif
++
++/* powerpc (ppc64) arch specific defines */
++#elif defined(__powerpc) || defined(__powerpc__)
++
++#if !defined(__powerpc)
++#define __powerpc
++#endif
++
++#if !defined(__powerpc__)
++#define __powerpc__
++#endif
++
++#if !defined(_LP64)
++#define _LP64
++#endif
++
++/* arm arch specific defines */
++#elif defined(__arm) || defined(__arm__)
++
++#if !defined(__arm)
++#define __arm
++#endif
++
++#if !defined(__arm__)
++#define __arm__
++#endif
++
++#if defined(__ARMEL__)
++#define _LITTLE_ENDIAN
++#else
++#define _BIG_ENDIAN
++#endif
++
++#else /* Currently only x86_64, i386, arm, and powerpc arches supported */
++#error "Unsupported ISA type"
++#endif
++
++#if defined(_ILP32) && defined(_LP64)
++#error "Both _ILP32 and _LP64 are defined"
++#endif
++
++#include <sys/byteorder.h>
++
++#if defined(__LITTLE_ENDIAN) && !defined(_LITTLE_ENDIAN)
++#define _LITTLE_ENDIAN __LITTLE_ENDIAN
++#endif
++
++#if defined(__BIG_ENDIAN) && !defined(_BIG_ENDIAN)
++#define _BIG_ENDIAN __BIG_ENDIAN
++#endif
++
++#if defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)
++#error "Both _LITTLE_ENDIAN and _BIG_ENDIAN are defined"
++#endif
++
++#if !defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
++#error "Neither _LITTLE_ENDIAN or _BIG_ENDIAN are defined"
++#endif
++
++#endif	/* _SPL_ISA_DEFS_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/kidmap.h linux-3.2.33-go/include/spl/sys/kidmap.h
+--- linux-3.2.33-go.orig/include/spl/sys/kidmap.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/kidmap.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,30 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_KIDMAP_H
++#define _SPL_KIDMAP_H
++
++#include <sys/idmap.h>
++
++#endif /* SPL_KIDMAP_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/kmem.h linux-3.2.33-go/include/spl/sys/kmem.h
+--- linux-3.2.33-go.orig/include/spl/sys/kmem.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/kmem.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,512 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_KMEM_H
++#define	_SPL_KMEM_H
++
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/spinlock.h>
++#include <linux/rwsem.h>
++#include <linux/hash.h>
++#include <linux/rbtree.h>
++#include <linux/ctype.h>
++#include <asm/atomic.h>
++#include <sys/types.h>
++#include <sys/vmsystm.h>
++#include <sys/kstat.h>
++
++/*
++ * Memory allocation interfaces
++ */
++#define KM_SLEEP	GFP_KERNEL	/* Can sleep, never fails */
++#define KM_NOSLEEP	GFP_ATOMIC	/* Can not sleep, may fail */
++#define KM_PUSHPAGE	(GFP_NOIO | __GFP_HIGH)	/* Use reserved memory */
++#define KM_NODEBUG	__GFP_NOWARN	/* Suppress warnings */
++#define KM_FLAGS	__GFP_BITS_MASK
++#define KM_VMFLAGS	GFP_LEVEL_MASK
++
++/*
++ * Used internally, the kernel does not need to support this flag
++ */
++#ifndef __GFP_ZERO
++# define __GFP_ZERO                     0x8000
++#endif
++
++/*
++ * PF_NOFS is a per-process debug flag which is set in current->flags to
++ * detect when a process is performing an unsafe allocation.  All tasks
++ * with PF_NOFS set must strictly use KM_PUSHPAGE for allocations because
++ * if they enter direct reclaim and initiate I/O the may deadlock.
++ *
++ * When debugging is disabled, any incorrect usage will be detected and
++ * a call stack with warning will be printed to the console.  The flags
++ * will then be automatically corrected to allow for safe execution.  If
++ * debugging is enabled this will be treated as a fatal condition.
++ *
++ * To avoid any risk of conflicting with the existing PF_ flags.  The
++ * PF_NOFS bit shadows the rarely used PF_MUTEX_TESTER bit.  Only when
++ * CONFIG_RT_MUTEX_TESTER is not set, and we know this bit is unused,
++ * will the PF_NOFS bit be valid.  Happily, most existing distributions
++ * ship a kernel with CONFIG_RT_MUTEX_TESTER disabled.
++ */
++#if !defined(CONFIG_RT_MUTEX_TESTER) && defined(PF_MUTEX_TESTER)
++# define PF_NOFS			PF_MUTEX_TESTER
++
++static inline void
++sanitize_flags(struct task_struct *p, gfp_t *flags)
++{
++	if (unlikely((p->flags & PF_NOFS) && (*flags & (__GFP_IO|__GFP_FS)))) {
++# ifdef NDEBUG
++		SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "Fixing allocation for "
++		   "task %s (%d) which used GFP flags 0x%x with PF_NOFS set\n",
++		    p->comm, p->pid, flags);
++		spl_debug_dumpstack(p);
++		*flags &= ~(__GFP_IO|__GFP_FS);
++# else
++		PANIC("FATAL allocation for task %s (%d) which used GFP "
++		    "flags 0x%x with PF_NOFS set\n", p->comm, p->pid, flags);
++# endif /* NDEBUG */
++	}
++}
++#else
++# define PF_NOFS			0x00000000
++# define sanitize_flags(p, fl)		((void)0)
++#endif /* !defined(CONFIG_RT_MUTEX_TESTER) && defined(PF_MUTEX_TESTER) */
++
++/*
++ * __GFP_NOFAIL looks like it will be removed from the kernel perhaps as
++ * early as 2.6.32.  To avoid this issue when it occurs in upstream kernels
++ * we retry the allocation here as long as it is not __GFP_WAIT (GFP_ATOMIC).
++ * I would prefer the caller handle the failure case cleanly but we are
++ * trying to emulate Solaris and those are not the Solaris semantics.
++ */
++static inline void *
++kmalloc_nofail(size_t size, gfp_t flags)
++{
++	void *ptr;
++
++	sanitize_flags(current, &flags);
++
++	do {
++		ptr = kmalloc(size, flags);
++	} while (ptr == NULL && (flags & __GFP_WAIT));
++
++	return ptr;
++}
++
++static inline void *
++kzalloc_nofail(size_t size, gfp_t flags)
++{
++	void *ptr;
++
++	sanitize_flags(current, &flags);
++
++	do {
++		ptr = kzalloc(size, flags);
++	} while (ptr == NULL && (flags & __GFP_WAIT));
++
++	return ptr;
++}
++
++static inline void *
++kmalloc_node_nofail(size_t size, gfp_t flags, int node)
++{
++#ifdef HAVE_KMALLOC_NODE
++	void *ptr;
++
++	sanitize_flags(current, &flags);
++
++	do {
++		ptr = kmalloc_node(size, flags, node);
++	} while (ptr == NULL && (flags & __GFP_WAIT));
++
++	return ptr;
++#else
++	return kmalloc_nofail(size, flags);
++#endif /* HAVE_KMALLOC_NODE */
++}
++
++static inline void *
++vmalloc_nofail(size_t size, gfp_t flags)
++{
++	void *ptr;
++
++	sanitize_flags(current, &flags);
++
++	/*
++	 * Retry failed __vmalloc() allocations once every second.  The
++	 * rational for the delay is that the likely failure modes are:
++	 *
++	 * 1) The system has completely exhausted memory, in which case
++	 *    delaying 1 second for the memory reclaim to run is reasonable
++	 *    to avoid thrashing the system.
++	 * 2) The system has memory but has exhausted the small virtual
++	 *    address space available on 32-bit systems.  Retrying the
++	 *    allocation immediately will only result in spinning on the
++	 *    virtual address space lock.  It is better delay a second and
++	 *    hope that another process will free some of the address space.
++	 *    But the bottom line is there is not much we can actually do
++	 *    since we can never safely return a failure and honor the
++	 *    Solaris semantics.
++	 */
++	while (1) {
++		ptr = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
++		if (unlikely((ptr == NULL) && (flags & __GFP_WAIT))) {
++			set_current_state(TASK_INTERRUPTIBLE);
++			schedule_timeout(HZ);
++		} else {
++			break;
++		}
++	}
++
++	return ptr;
++}
++
++static inline void *
++vzalloc_nofail(size_t size, gfp_t flags)
++{
++	void *ptr;
++
++	ptr = vmalloc_nofail(size, flags);
++	if (ptr)
++		memset(ptr, 0, (size));
++
++	return ptr;
++}
++
++#ifdef DEBUG_KMEM
++
++/*
++ * Memory accounting functions to be used only when DEBUG_KMEM is set.
++ */
++# ifdef HAVE_ATOMIC64_T
++
++# define kmem_alloc_used_add(size)      atomic64_add(size, &kmem_alloc_used)
++# define kmem_alloc_used_sub(size)      atomic64_sub(size, &kmem_alloc_used)
++# define kmem_alloc_used_read()         atomic64_read(&kmem_alloc_used)
++# define kmem_alloc_used_set(size)      atomic64_set(&kmem_alloc_used, size)
++# define vmem_alloc_used_add(size)      atomic64_add(size, &vmem_alloc_used)
++# define vmem_alloc_used_sub(size)      atomic64_sub(size, &vmem_alloc_used)
++# define vmem_alloc_used_read()         atomic64_read(&vmem_alloc_used)
++# define vmem_alloc_used_set(size)      atomic64_set(&vmem_alloc_used, size)
++
++extern atomic64_t kmem_alloc_used;
++extern unsigned long long kmem_alloc_max;
++extern atomic64_t vmem_alloc_used;
++extern unsigned long long vmem_alloc_max;
++
++# else  /* HAVE_ATOMIC64_T */
++
++# define kmem_alloc_used_add(size)      atomic_add(size, &kmem_alloc_used)
++# define kmem_alloc_used_sub(size)      atomic_sub(size, &kmem_alloc_used)
++# define kmem_alloc_used_read()         atomic_read(&kmem_alloc_used)
++# define kmem_alloc_used_set(size)      atomic_set(&kmem_alloc_used, size)
++# define vmem_alloc_used_add(size)      atomic_add(size, &vmem_alloc_used)
++# define vmem_alloc_used_sub(size)      atomic_sub(size, &vmem_alloc_used)
++# define vmem_alloc_used_read()         atomic_read(&vmem_alloc_used)
++# define vmem_alloc_used_set(size)      atomic_set(&vmem_alloc_used, size)
++
++extern atomic_t kmem_alloc_used;
++extern unsigned long long kmem_alloc_max;
++extern atomic_t vmem_alloc_used;
++extern unsigned long long vmem_alloc_max;
++
++# endif /* HAVE_ATOMIC64_T */
++
++# ifdef DEBUG_KMEM_TRACKING
++/*
++ * DEBUG_KMEM && DEBUG_KMEM_TRACKING
++ *
++ * The maximum level of memory debugging.  All memory will be accounted
++ * for and each allocation will be explicitly tracked.  Any allocation
++ * which is leaked will be reported on module unload and the exact location
++ * where that memory was allocation will be reported.  This level of memory
++ * tracking will have a significant impact on performance and should only
++ * be enabled for debugging.  This feature may be enabled by passing
++ * --enable-debug-kmem-tracking to configure.
++ */
++#  define kmem_alloc(sz, fl)            kmem_alloc_track((sz), (fl),           \
++                                             __FUNCTION__, __LINE__, 0, 0)
++#  define kmem_zalloc(sz, fl)           kmem_alloc_track((sz), (fl)|__GFP_ZERO,\
++                                             __FUNCTION__, __LINE__, 0, 0)
++#  define kmem_alloc_node(sz, fl, nd)   kmem_alloc_track((sz), (fl),           \
++                                             __FUNCTION__, __LINE__, 1, nd)
++#  define kmem_free(ptr, sz)            kmem_free_track((ptr), (sz))
++
++#  define vmem_alloc(sz, fl)            vmem_alloc_track((sz), (fl),           \
++                                             __FUNCTION__, __LINE__)
++#  define vmem_zalloc(sz, fl)           vmem_alloc_track((sz), (fl)|__GFP_ZERO,\
++                                             __FUNCTION__, __LINE__)
++#  define vmem_free(ptr, sz)            vmem_free_track((ptr), (sz))
++
++extern void *kmem_alloc_track(size_t, int, const char *, int, int, int);
++extern void kmem_free_track(const void *, size_t);
++extern void *vmem_alloc_track(size_t, int, const char *, int);
++extern void vmem_free_track(const void *, size_t);
++
++# else /* DEBUG_KMEM_TRACKING */
++/*
++ * DEBUG_KMEM && !DEBUG_KMEM_TRACKING
++ *
++ * The default build will set DEBUG_KEM.  This provides basic memory
++ * accounting with little to no impact on performance.  When the module
++ * is unloaded in any memory was leaked the total number of leaked bytes
++ * will be reported on the console.  To disable this basic accounting
++ * pass the --disable-debug-kmem option to configure.
++ */
++#  define kmem_alloc(sz, fl)            kmem_alloc_debug((sz), (fl),           \
++                                             __FUNCTION__, __LINE__, 0, 0)
++#  define kmem_zalloc(sz, fl)           kmem_alloc_debug((sz), (fl)|__GFP_ZERO,\
++                                             __FUNCTION__, __LINE__, 0, 0)
++#  define kmem_alloc_node(sz, fl, nd)   kmem_alloc_debug((sz), (fl),           \
++                                             __FUNCTION__, __LINE__, 1, nd)
++#  define kmem_free(ptr, sz)            kmem_free_debug((ptr), (sz))
++
++#  define vmem_alloc(sz, fl)            vmem_alloc_debug((sz), (fl),           \
++                                             __FUNCTION__, __LINE__)
++#  define vmem_zalloc(sz, fl)           vmem_alloc_debug((sz), (fl)|__GFP_ZERO,\
++                                             __FUNCTION__, __LINE__)
++#  define vmem_free(ptr, sz)            vmem_free_debug((ptr), (sz))
++
++extern void *kmem_alloc_debug(size_t, int, const char *, int, int, int);
++extern void kmem_free_debug(const void *, size_t);
++extern void *vmem_alloc_debug(size_t, int, const char *, int);
++extern void vmem_free_debug(const void *, size_t);
++
++# endif /* DEBUG_KMEM_TRACKING */
++#else /* DEBUG_KMEM */
++/*
++ * !DEBUG_KMEM && !DEBUG_KMEM_TRACKING
++ *
++ * All debugging is disabled.  There will be no overhead even for
++ * minimal memory accounting.  To enable basic accounting pass the
++ * --enable-debug-kmem option to configure.
++ */
++# define kmem_alloc(sz, fl)             kmalloc_nofail((sz), (fl))
++# define kmem_zalloc(sz, fl)            kzalloc_nofail((sz), (fl))
++# define kmem_alloc_node(sz, fl, nd)    kmalloc_node_nofail((sz), (fl), (nd))
++# define kmem_free(ptr, sz)             ((void)(sz), kfree(ptr))
++
++# define vmem_alloc(sz, fl)             vmalloc_nofail((sz), (fl))
++# define vmem_zalloc(sz, fl)            vzalloc_nofail((sz), (fl))
++# define vmem_free(ptr, sz)             ((void)(sz), vfree(ptr))
++
++#endif /* DEBUG_KMEM */
++
++extern int kmem_debugging(void);
++extern char *kmem_vasprintf(const char *fmt, va_list ap);
++extern char *kmem_asprintf(const char *fmt, ...);
++extern char *strdup(const char *str);
++extern void strfree(char *str);
++
++
++/*
++ * Slab allocation interfaces.  The SPL slab differs from the standard
++ * Linux SLAB or SLUB primarily in that each cache may be backed by slabs
++ * allocated from the physical or virtal memory address space.  The virtual
++ * slabs allow for good behavior when allocation large objects of identical
++ * size.  This slab implementation also supports both constructors and
++ * destructions which the Linux slab does not.
++ */
++enum {
++	KMC_BIT_NOTOUCH		= 0,	/* Don't update ages */
++	KMC_BIT_NODEBUG		= 1,	/* Default behavior */
++	KMC_BIT_NOMAGAZINE	= 2,	/* XXX: Unsupported */
++	KMC_BIT_NOHASH		= 3,	/* XXX: Unsupported */
++	KMC_BIT_QCACHE		= 4,	/* XXX: Unsupported */
++	KMC_BIT_KMEM		= 5,	/* Use kmem cache */
++	KMC_BIT_VMEM		= 6,	/* Use vmem cache */
++	KMC_BIT_OFFSLAB		= 7,	/* Objects not on slab */
++	KMC_BIT_NOEMERGENCY	= 8,	/* Disable emergency objects */
++	KMC_BIT_DEADLOCKED      = 14,	/* Deadlock detected */
++	KMC_BIT_GROWING         = 15,   /* Growing in progress */
++	KMC_BIT_REAPING		= 16,	/* Reaping in progress */
++	KMC_BIT_DESTROY		= 17,	/* Destroy in progress */
++	KMC_BIT_TOTAL		= 18,	/* Proc handler helper bit */
++	KMC_BIT_ALLOC		= 19,	/* Proc handler helper bit */
++	KMC_BIT_MAX		= 20,	/* Proc handler helper bit */
++};
++
++/* kmem move callback return values */
++typedef enum kmem_cbrc {
++	KMEM_CBRC_YES		= 0,	/* Object moved */
++	KMEM_CBRC_NO		= 1,	/* Object not moved */
++	KMEM_CBRC_LATER		= 2,	/* Object not moved, try again later */
++	KMEM_CBRC_DONT_NEED	= 3,	/* Neither object is needed */
++	KMEM_CBRC_DONT_KNOW	= 4,	/* Object unknown */
++} kmem_cbrc_t;
++
++#define KMC_NOTOUCH		(1 << KMC_BIT_NOTOUCH)
++#define KMC_NODEBUG		(1 << KMC_BIT_NODEBUG)
++#define KMC_NOMAGAZINE		(1 << KMC_BIT_NOMAGAZINE)
++#define KMC_NOHASH		(1 << KMC_BIT_NOHASH)
++#define KMC_QCACHE		(1 << KMC_BIT_QCACHE)
++#define KMC_KMEM		(1 << KMC_BIT_KMEM)
++#define KMC_VMEM		(1 << KMC_BIT_VMEM)
++#define KMC_OFFSLAB		(1 << KMC_BIT_OFFSLAB)
++#define KMC_NOEMERGENCY		(1 << KMC_BIT_NOEMERGENCY)
++#define KMC_DEADLOCKED		(1 << KMC_BIT_DEADLOCKED)
++#define KMC_GROWING		(1 << KMC_BIT_GROWING)
++#define KMC_REAPING		(1 << KMC_BIT_REAPING)
++#define KMC_DESTROY		(1 << KMC_BIT_DESTROY)
++#define KMC_TOTAL		(1 << KMC_BIT_TOTAL)
++#define KMC_ALLOC		(1 << KMC_BIT_ALLOC)
++#define KMC_MAX			(1 << KMC_BIT_MAX)
++
++#define KMC_REAP_CHUNK			INT_MAX
++#define KMC_DEFAULT_SEEKS		1
++
++extern struct list_head spl_kmem_cache_list;
++extern struct rw_semaphore spl_kmem_cache_sem;
++
++#define SKM_MAGIC			0x2e2e2e2e
++#define SKO_MAGIC			0x20202020
++#define SKS_MAGIC			0x22222222
++#define SKC_MAGIC			0x2c2c2c2c
++
++#define SPL_KMEM_CACHE_DELAY		15	/* Minimum slab release age */
++#define SPL_KMEM_CACHE_REAP		0	/* Default reap everything */
++#define SPL_KMEM_CACHE_OBJ_PER_SLAB	16	/* Target objects per slab */
++#define SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN	8	/* Minimum objects per slab */
++#define SPL_KMEM_CACHE_ALIGN		8	/* Default object alignment */
++
++#define POINTER_IS_VALID(p)		0	/* Unimplemented */
++#define POINTER_INVALIDATE(pp)			/* Unimplemented */
++
++typedef int (*spl_kmem_ctor_t)(void *, void *, int);
++typedef void (*spl_kmem_dtor_t)(void *, void *);
++typedef void (*spl_kmem_reclaim_t)(void *);
++
++typedef struct spl_kmem_magazine {
++	uint32_t		skm_magic;	/* Sanity magic */
++	uint32_t		skm_avail;	/* Available objects */
++	uint32_t		skm_size;	/* Magazine size */
++	uint32_t		skm_refill;	/* Batch refill size */
++	struct spl_kmem_cache	*skm_cache;	/* Owned by cache */
++	struct delayed_work	skm_work;	/* Magazine reclaim work */
++	unsigned long		skm_age;	/* Last cache access */
++	unsigned int		skm_cpu;	/* Owned by cpu */
++	void			*skm_objs[0];	/* Object pointers */
++} spl_kmem_magazine_t;
++
++typedef struct spl_kmem_obj {
++        uint32_t		sko_magic;	/* Sanity magic */
++	void			*sko_addr;	/* Buffer address */
++	struct spl_kmem_slab	*sko_slab;	/* Owned by slab */
++	struct list_head	sko_list;	/* Free object list linkage */
++} spl_kmem_obj_t;
++
++typedef struct spl_kmem_slab {
++        uint32_t		sks_magic;	/* Sanity magic */
++	uint32_t		sks_objs;	/* Objects per slab */
++	struct spl_kmem_cache	*sks_cache;	/* Owned by cache */
++	struct list_head	sks_list;	/* Slab list linkage */
++	struct list_head	sks_free_list;	/* Free object list */
++	unsigned long		sks_age;	/* Last modify jiffie */
++	uint32_t		sks_ref;	/* Ref count used objects */
++} spl_kmem_slab_t;
++
++typedef struct spl_kmem_alloc {
++	struct spl_kmem_cache	*ska_cache;	/* Owned by cache */
++	int			ska_flags;	/* Allocation flags */
++	struct delayed_work	ska_work;	/* Allocation work */
++} spl_kmem_alloc_t;
++
++typedef struct spl_kmem_emergency {
++	struct rb_node		ske_node;	/* Emergency tree linkage */
++	void			*ske_obj;	/* Buffer address */
++} spl_kmem_emergency_t;
++
++typedef struct spl_kmem_cache {
++	uint32_t		skc_magic;	/* Sanity magic */
++	uint32_t		skc_name_size;	/* Name length */
++	char			*skc_name;	/* Name string */
++	spl_kmem_magazine_t	*skc_mag[NR_CPUS]; /* Per-CPU warm cache */
++	uint32_t		skc_mag_size;	/* Magazine size */
++	uint32_t		skc_mag_refill;	/* Magazine refill count */
++	spl_kmem_ctor_t		skc_ctor;	/* Constructor */
++	spl_kmem_dtor_t		skc_dtor;	/* Destructor */
++	spl_kmem_reclaim_t	skc_reclaim;	/* Reclaimator */
++	void			*skc_private;	/* Private data */
++	void			*skc_vmp;	/* Unused */
++	unsigned long		skc_flags;	/* Flags */
++	uint32_t		skc_obj_size;	/* Object size */
++	uint32_t		skc_obj_align;	/* Object alignment */
++	uint32_t		skc_slab_objs;	/* Objects per slab */
++	uint32_t		skc_slab_size;	/* Slab size */
++	uint32_t		skc_delay;	/* Slab reclaim interval */
++	uint32_t		skc_reap;	/* Slab reclaim count */
++	atomic_t		skc_ref;	/* Ref count callers */
++	struct delayed_work	skc_work;	/* Slab reclaim work */
++	struct list_head	skc_list;	/* List of caches linkage */
++	struct list_head	skc_complete_list;/* Completely alloc'ed */
++	struct list_head	skc_partial_list; /* Partially alloc'ed */
++	struct rb_root		skc_emergency_tree; /* Min sized objects */
++	spinlock_t		skc_lock;	/* Cache lock */
++	wait_queue_head_t	skc_waitq;	/* Allocation waiters */
++	uint64_t		skc_slab_fail;	/* Slab alloc failures */
++	uint64_t		skc_slab_create;/* Slab creates */
++	uint64_t		skc_slab_destroy;/* Slab destroys */
++	uint64_t		skc_slab_total;	/* Slab total current */
++	uint64_t		skc_slab_alloc;	/* Slab alloc current */
++	uint64_t		skc_slab_max;	/* Slab max historic  */
++	uint64_t		skc_obj_total;	/* Obj total current */
++	uint64_t		skc_obj_alloc;	/* Obj alloc current */
++	uint64_t		skc_obj_max;	/* Obj max historic */
++	uint64_t		skc_obj_deadlock;  /* Obj emergency deadlocks */
++	uint64_t		skc_obj_emergency; /* Obj emergency current */
++	uint64_t		skc_obj_emergency_max; /* Obj emergency max */
++} spl_kmem_cache_t;
++#define kmem_cache_t		spl_kmem_cache_t
++
++extern spl_kmem_cache_t *spl_kmem_cache_create(char *name, size_t size,
++	size_t align, spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor,
++	spl_kmem_reclaim_t reclaim, void *priv, void *vmp, int flags);
++extern void spl_kmem_cache_set_move(spl_kmem_cache_t *,
++	kmem_cbrc_t (*)(void *, void *, size_t, void *));
++extern void spl_kmem_cache_destroy(spl_kmem_cache_t *skc);
++extern void *spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags);
++extern void spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj);
++extern void spl_kmem_cache_reap_now(spl_kmem_cache_t *skc, int count);
++extern void spl_kmem_reap(void);
++
++int spl_kmem_init_kallsyms_lookup(void);
++int spl_kmem_init(void);
++void spl_kmem_fini(void);
++
++#define kmem_cache_create(name,size,align,ctor,dtor,rclm,priv,vmp,flags) \
++        spl_kmem_cache_create(name,size,align,ctor,dtor,rclm,priv,vmp,flags)
++#define kmem_cache_set_move(skc, move)	spl_kmem_cache_set_move(skc, move)
++#define kmem_cache_destroy(skc)		spl_kmem_cache_destroy(skc)
++#define kmem_cache_alloc(skc, flags)	spl_kmem_cache_alloc(skc, flags)
++#define kmem_cache_free(skc, obj)	spl_kmem_cache_free(skc, obj)
++#define kmem_cache_reap_now(skc)	\
++        spl_kmem_cache_reap_now(skc, skc->skc_reap)
++#define kmem_reap()			spl_kmem_reap()
++#define kmem_virt(ptr)			(((ptr) >= (void *)VMALLOC_START) && \
++					 ((ptr) <  (void *)VMALLOC_END))
++
++#endif	/* _SPL_KMEM_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/kobj.h linux-3.2.33-go/include/spl/sys/kobj.h
+--- linux-3.2.33-go.orig/include/spl/sys/kobj.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/kobj.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,42 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_KOBJ_H
++#define _SPL_KOBJ_H
++
++#include <sys/vnode.h>
++
++typedef struct _buf {
++	vnode_t *vp;
++} _buf_t;
++
++typedef struct _buf buf_t;
++
++extern struct _buf *kobj_open_file(const char *name);
++extern void kobj_close_file(struct _buf *file);
++extern int kobj_read_file(struct _buf *file, char *buf,
++			  ssize_t size, offset_t off);
++extern int kobj_get_filesize(struct _buf *file, uint64_t *size);
++
++#endif /* SPL_KOBJ_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/kstat.h linux-3.2.33-go/include/spl/sys/kstat.h
+--- linux-3.2.33-go.orig/include/spl/sys/kstat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/kstat.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,194 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_KSTAT_H
++#define _SPL_KSTAT_H
++
++#include <linux/module.h>
++#include <linux/proc_compat.h>
++#include <sys/types.h>
++#include <sys/time.h>
++#include <sys/kmem.h>
++#include <sys/mutex.h>
++
++#define KSTAT_STRLEN            31
++
++/* For reference valid classes are:
++ * disk, tape, net, controller, vm, kvm, hat, streams, kstat, misc
++ */
++
++#define KSTAT_TYPE_RAW          0       /* can be anything; ks_ndata >= 1 */
++#define KSTAT_TYPE_NAMED        1       /* name/value pair; ks_ndata >= 1 */
++#define KSTAT_TYPE_INTR         2       /* interrupt stats; ks_ndata == 1 */
++#define KSTAT_TYPE_IO           3       /* I/O stats; ks_ndata == 1 */
++#define KSTAT_TYPE_TIMER        4       /* event timer; ks_ndata >= 1 */
++#define KSTAT_TYPE_TXG          5       /* txg sync; ks_ndata >= 1 */
++#define KSTAT_NUM_TYPES         6
++
++#define KSTAT_DATA_CHAR         0
++#define KSTAT_DATA_INT32        1
++#define KSTAT_DATA_UINT32       2
++#define KSTAT_DATA_INT64        3
++#define KSTAT_DATA_UINT64       4
++#define KSTAT_DATA_LONG         5
++#define KSTAT_DATA_ULONG        6
++#define KSTAT_DATA_STRING       7
++#define KSTAT_NUM_DATAS         8
++
++#define KSTAT_INTR_HARD         0
++#define KSTAT_INTR_SOFT         1
++#define KSTAT_INTR_WATCHDOG     2
++#define KSTAT_INTR_SPURIOUS     3
++#define KSTAT_INTR_MULTSVC      4
++#define KSTAT_NUM_INTRS         5
++
++#define KSTAT_FLAG_VIRTUAL      0x01
++#define KSTAT_FLAG_VAR_SIZE     0x02
++#define KSTAT_FLAG_WRITABLE     0x04
++#define KSTAT_FLAG_PERSISTENT   0x08
++#define KSTAT_FLAG_DORMANT      0x10
++#define KSTAT_FLAG_UNSUPPORTED  (KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_WRITABLE | \
++				 KSTAT_FLAG_PERSISTENT | KSTAT_FLAG_DORMANT)
++
++
++#define KS_MAGIC                0x9d9d9d9d
++
++/* Dynamic updates */
++#define KSTAT_READ              0
++#define KSTAT_WRITE             1
++
++struct kstat_s;
++
++typedef int kid_t;                                  /* unique kstat id */
++typedef int kstat_update_t(struct kstat_s *, int);  /* dynamic update cb */
++
++typedef struct kstat_s {
++	int              ks_magic;                  /* magic value */
++        kid_t            ks_kid;                    /* unique kstat ID */
++        hrtime_t         ks_crtime;                 /* creation time */
++	hrtime_t         ks_snaptime;               /* last access time */
++        char             ks_module[KSTAT_STRLEN+1]; /* provider module name */
++        int              ks_instance;               /* provider module instance */
++        char             ks_name[KSTAT_STRLEN+1];   /* kstat name */
++        char             ks_class[KSTAT_STRLEN+1];  /* kstat class */
++        uchar_t          ks_type;                   /* kstat data type */
++        uchar_t          ks_flags;                  /* kstat flags */
++        void             *ks_data;                  /* kstat type-specific data */
++        uint_t           ks_ndata;                  /* # of type-specific data records */
++        size_t           ks_data_size;              /* size of kstat data section */
++        struct proc_dir_entry *ks_proc;             /* proc linkage */
++        kstat_update_t   *ks_update;                /* dynamic updates */
++        void             *ks_private;               /* private data */
++        kmutex_t         ks_lock;                   /* kstat data lock */
++        struct list_head ks_list;                   /* kstat linkage */
++} kstat_t;
++
++typedef struct kstat_named_s {
++        char             name[KSTAT_STRLEN];        /* name of counter */
++        uchar_t          data_type;                 /* data type */
++        union {
++                char            c[16];              /* 128-bit int */
++                int32_t         i32;                /* 32-bit signed int */
++                uint32_t        ui32;               /* 32-bit unsigned int */
++                int64_t         i64;                /* 64-bit signed int */
++                uint64_t        ui64;               /* 64-bit unsigned int */
++                long            l;                  /* native signed long */
++                ulong_t         ul;                 /* native unsigned long */
++                struct {
++                        union {
++                                char *ptr;          /* NULL-term string */
++                                char __pad[8];      /* 64-bit padding */
++                        } addr;
++                        uint32_t len;               /* # bytes for strlen + '\0' */
++                } string;
++        } value;
++} kstat_named_t;
++
++#define KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.string.addr.ptr)
++#define KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.string.len)
++
++typedef struct kstat_intr {
++        uint_t intrs[KSTAT_NUM_INTRS];
++} kstat_intr_t;
++
++typedef struct kstat_io {
++        u_longlong_t     nread;       /* number of bytes read */
++        u_longlong_t     nwritten;    /* number of bytes written */
++        uint_t           reads;       /* number of read operations */
++        uint_t           writes;      /* number of write operations */
++        hrtime_t         wtime;       /* cumulative wait (pre-service) time */
++        hrtime_t         wlentime;    /* cumulative wait length*time product*/
++        hrtime_t         wlastupdate; /* last time wait queue changed */
++        hrtime_t         rtime;       /* cumulative run (service) time */
++        hrtime_t         rlentime;    /* cumulative run length*time product */
++        hrtime_t         rlastupdate; /* last time run queue changed */
++        uint_t           wcnt;        /* count of elements in wait state */
++        uint_t           rcnt;        /* count of elements in run state */
++} kstat_io_t;
++
++typedef struct kstat_timer {
++        char         name[KSTAT_STRLEN+1]; /* event name */
++        u_longlong_t num_events;           /* number of events */
++        hrtime_t     elapsed_time;         /* cumulative elapsed time */
++        hrtime_t     min_time;             /* shortest event duration */
++        hrtime_t     max_time;             /* longest event duration */
++        hrtime_t     start_time;           /* previous event start time */
++        hrtime_t     stop_time;            /* previous event stop time */
++} kstat_timer_t;
++
++typedef enum kstat_txg_state {
++        TXG_STATE_OPEN      = 1,
++        TXG_STATE_QUIESCING = 2,
++        TXG_STATE_SYNCING   = 3,
++        TXG_STATE_COMMITTED = 4,
++} kstat_txg_state_t;
++
++typedef struct kstat_txg {
++        u_longlong_t       txg;         /* txg id */
++        kstat_txg_state_t  state;       /* txg state */
++        hrtime_t           birth;       /* birth time stamp */
++        u_longlong_t       nread;       /* number of bytes read */
++        u_longlong_t       nwritten;    /* number of bytes written */
++        uint_t             reads;       /* number of read operations */
++        uint_t             writes;      /* number of write operations */
++        hrtime_t           open_time;   /* open time */
++        hrtime_t           quiesce_time;/* quiesce time */
++        hrtime_t           sync_time;   /* sync time */
++} kstat_txg_t;
++
++int spl_kstat_init(void);
++void spl_kstat_fini(void);
++
++extern kstat_t *__kstat_create(const char *ks_module, int ks_instance,
++			     const char *ks_name, const char *ks_class,
++			     uchar_t ks_type, uint_t ks_ndata,
++			     uchar_t ks_flags);
++extern void __kstat_install(kstat_t *ksp);
++extern void __kstat_delete(kstat_t *ksp);
++
++#define kstat_create(m,i,n,c,t,s,f)	__kstat_create(m,i,n,c,t,s,f)
++#define kstat_install(k)		__kstat_install(k)
++#define kstat_delete(k)			__kstat_delete(k)
++
++#endif  /* _SPL_KSTAT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/list.h linux-3.2.33-go/include/spl/sys/list.h
+--- linux-3.2.33-go.orig/include/spl/sys/list.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/list.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,219 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_LIST_H
++#define _SPL_LIST_H
++
++#include <sys/types.h>
++#include <linux/list.h>
++
++/*
++ * NOTE: I have implemented the Solaris list API in terms of the native
++ * linux API.  This has certain advantages in terms of leveraging the linux
++ * list debugging infrastructure, but it also means that the internals of a
++ * list differ slightly than on Solaris.  This is not a problem as long as
++ * all callers stick to the published API.  The two major differences are:
++ *
++ * 1) A list_node_t is mapped to a linux list_head struct which changes
++ *    the name of the list_next/list_prev pointers to next/prev respectively.
++ *
++ * 2) A list_node_t which is not attached to a list on Solaris is denoted
++ *    by having its list_next/list_prev pointers set to NULL.  Under linux
++ *    the next/prev pointers are set to LIST_POISON1 and LIST_POISON2
++ *    respectively.  At this moment this only impacts the implementation
++ *    of the list_link_init() and list_link_active() functions.
++ */
++
++typedef struct list_head list_node_t;
++
++typedef struct list {
++	size_t list_size;
++	size_t list_offset;
++	list_node_t list_head;
++} list_t;
++
++#define list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
++#define list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
++
++static inline int
++list_is_empty(list_t *list)
++{
++	return list_empty(&list->list_head);
++}
++
++static inline void
++list_link_init(list_node_t *node)
++{
++	node->next = LIST_POISON1;
++	node->prev = LIST_POISON2;
++}
++
++static inline void
++list_create(list_t *list, size_t size, size_t offset)
++{
++	ASSERT(list);
++	ASSERT(size > 0);
++	ASSERT(size >= offset + sizeof(list_node_t));
++
++	list->list_size = size;
++	list->list_offset = offset;
++	INIT_LIST_HEAD(&list->list_head);
++}
++
++static inline void
++list_destroy(list_t *list)
++{
++	ASSERT(list);
++	ASSERT(list_is_empty(list));
++
++	list_del(&list->list_head);
++}
++
++static inline void
++list_insert_head(list_t *list, void *object)
++{
++	list_add(list_d2l(list, object), &list->list_head);
++}
++
++static inline void
++list_insert_tail(list_t *list, void *object)
++{
++	list_add_tail(list_d2l(list, object), &list->list_head);
++}
++
++static inline void
++list_insert_after(list_t *list, void *object, void *nobject)
++{
++	if (object == NULL)
++		list_insert_head(list, nobject);
++	else
++		list_add(list_d2l(list, nobject), list_d2l(list, object));
++}
++
++static inline void
++list_insert_before(list_t *list, void *object, void *nobject)
++{
++	if (object == NULL)
++		list_insert_tail(list, nobject);
++	else
++		list_add_tail(list_d2l(list, nobject), list_d2l(list, object));
++}
++
++static inline void
++list_remove(list_t *list, void *object)
++{
++	ASSERT(!list_is_empty(list));
++	list_del(list_d2l(list, object));
++}
++
++static inline void *
++list_remove_head(list_t *list)
++{
++	list_node_t *head = list->list_head.next;
++	if (head == &list->list_head)
++		return NULL;
++
++	list_del(head);
++	return list_object(list, head);
++}
++
++static inline void *
++list_remove_tail(list_t *list)
++{
++	list_node_t *tail = list->list_head.prev;
++	if (tail == &list->list_head)
++		return NULL;
++
++	list_del(tail);
++	return list_object(list, tail);
++}
++
++static inline void *
++list_head(list_t *list)
++{
++	if (list_is_empty(list))
++		return NULL;
++
++	return list_object(list, list->list_head.next);
++}
++
++static inline void *
++list_tail(list_t *list)
++{
++	if (list_is_empty(list))
++		return NULL;
++
++	return list_object(list, list->list_head.prev);
++}
++
++static inline void *
++list_next(list_t *list, void *object)
++{
++	list_node_t *node = list_d2l(list, object);
++
++	if (node->next != &list->list_head)
++		return list_object(list, node->next);
++
++	return NULL;
++}
++
++static inline void *
++list_prev(list_t *list, void *object)
++{
++	list_node_t *node = list_d2l(list, object);
++
++	if (node->prev != &list->list_head)
++		return list_object(list, node->prev);
++
++	return NULL;
++}
++
++static inline int
++list_link_active(list_node_t *node)
++{
++	return (node->next != LIST_POISON1) && (node->prev != LIST_POISON2);
++}
++
++static inline void
++spl_list_move_tail(list_t *dst, list_t *src)
++{
++	list_splice_init(&src->list_head, dst->list_head.prev);
++}
++
++#define list_move_tail(dst, src)	spl_list_move_tail(dst, src)
++
++static inline void
++list_link_replace(list_node_t *old_node, list_node_t *new_node)
++{
++	ASSERT(list_link_active(old_node));
++	ASSERT(!list_link_active(new_node));
++
++	new_node->next = old_node->next;
++	new_node->prev = old_node->prev;
++	old_node->prev->next = new_node;
++	old_node->next->prev = new_node;
++	list_link_init(old_node);
++}
++
++#endif /* SPL_LIST_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/mkdev.h linux-3.2.33-go/include/spl/sys/mkdev.h
+--- linux-3.2.33-go.orig/include/spl/sys/mkdev.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/mkdev.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_MKDEV_H
++#define _SPL_MKDEV_H
++
++#endif /* SPL_MKDEV_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/mntent.h linux-3.2.33-go/include/spl/sys/mntent.h
+--- linux-3.2.33-go.orig/include/spl/sys/mntent.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/mntent.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_MNTENT_H
++#define _SPL_MNTENT_H
++
++#endif /* SPL_MNTENT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/modctl.h linux-3.2.33-go/include/spl/sys/modctl.h
+--- linux-3.2.33-go.orig/include/spl/sys/modctl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/modctl.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_MODCTL_H
++#define _SPL_MODCTL_H
++
++#endif /* SPL_MODCTL_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/mode.h linux-3.2.33-go/include/spl/sys/mode.h
+--- linux-3.2.33-go.orig/include/spl/sys/mode.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/mode.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,32 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_MODE_H
++#define _SPL_MODE_H
++
++#define IFTOVT(mode)	vn_mode_to_vtype(mode)
++#define VTTOIF(vtype)	vn_vtype_to_mode(vtype)
++#define MAKEIMODE(T, M) (VTTOIF(T) | ((M) & ~S_IFMT))
++
++#endif /* SPL_MODE_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/mount.h linux-3.2.33-go/include/spl/sys/mount.h
+--- linux-3.2.33-go.orig/include/spl/sys/mount.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/mount.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_MOUNT_H
++#define _SPL_MOUNT_H
++
++#endif /* SPL_MOUNT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/mutex.h linux-3.2.33-go/include/spl/sys/mutex.h
+--- linux-3.2.33-go.orig/include/spl/sys/mutex.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/mutex.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,218 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_MUTEX_H
++#define _SPL_MUTEX_H
++
++#include <sys/types.h>
++#include <linux/mutex.h>
++#include <linux/compiler_compat.h>
++
++typedef enum {
++        MUTEX_DEFAULT  = 0,
++        MUTEX_SPIN     = 1,
++        MUTEX_ADAPTIVE = 2
++} kmutex_type_t;
++
++#if defined(HAVE_MUTEX_OWNER) && defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES)
++
++/*
++ * We define a 1-field struct rather than a straight typedef to enforce type
++ * safety.
++ */
++typedef struct {
++        struct mutex m;
++} kmutex_t;
++
++static inline kthread_t *
++mutex_owner(kmutex_t *mp)
++{
++#if defined(HAVE_MUTEX_OWNER_TASK_STRUCT)
++	return ACCESS_ONCE(mp->m.owner);
++#else
++	struct thread_info *owner = ACCESS_ONCE(mp->m.owner);
++	if (owner)
++		return owner->task;
++
++	return NULL;
++#endif
++}
++
++#define mutex_owned(mp)         (mutex_owner(mp) == current)
++#define MUTEX_HELD(mp)          mutex_owned(mp)
++#define MUTEX_NOT_HELD(mp)      (!MUTEX_HELD(mp))
++#undef mutex_init
++#define mutex_init(mp, name, type, ibc)                                 \
++({                                                                      \
++        static struct lock_class_key __key;                             \
++        ASSERT(type == MUTEX_DEFAULT);                                  \
++                                                                        \
++        __mutex_init(&(mp)->m, #mp, &__key);                            \
++})
++
++#undef mutex_destroy
++#define mutex_destroy(mp)                                               \
++({                                                                      \
++        VERIFY3P(mutex_owner(mp), ==, NULL);                            \
++})
++
++#define mutex_tryenter(mp)              mutex_trylock(&(mp)->m)
++#define mutex_enter(mp)                                                 \
++({                                                                      \
++        ASSERT3P(mutex_owner(mp), !=, current);				\
++        mutex_lock(&(mp)->m);						\
++ })
++#define mutex_exit(mp)                  mutex_unlock(&(mp)->m)
++
++#ifdef HAVE_GPL_ONLY_SYMBOLS
++# define mutex_enter_nested(mp, sc)     mutex_lock_nested(&(mp)->m, sc)
++#else
++# define mutex_enter_nested(mp, sc)     mutex_enter(mp)
++#endif /* HAVE_GPL_ONLY_SYMBOLS */
++
++#else /* HAVE_MUTEX_OWNER */
++
++typedef struct {
++        struct mutex m_mutex;
++        kthread_t *m_owner;
++} kmutex_t;
++
++#ifdef HAVE_TASK_CURR
++extern int spl_mutex_spin_max(void);
++#else /* HAVE_TASK_CURR */
++# define task_curr(owner)       0
++# define spl_mutex_spin_max()   0
++#endif /* HAVE_TASK_CURR */
++
++#define MUTEX(mp)               (&((mp)->m_mutex))
++
++static inline void
++spl_mutex_set_owner(kmutex_t *mp)
++{
++        mp->m_owner = current;
++}
++
++static inline void
++spl_mutex_clear_owner(kmutex_t *mp)
++{
++        mp->m_owner = NULL;
++}
++
++#define mutex_owner(mp)         (ACCESS_ONCE((mp)->m_owner))
++#define mutex_owned(mp)         (mutex_owner(mp) == current)
++#define MUTEX_HELD(mp)          mutex_owned(mp)
++#define MUTEX_NOT_HELD(mp)      (!MUTEX_HELD(mp))
++
++/*
++ * The following functions must be a #define and not static inline.
++ * This ensures that the native linux mutex functions (lock/unlock)
++ * will be correctly located in the users code which is important
++ * for the built in kernel lock analysis tools
++ */
++#undef mutex_init
++#define mutex_init(mp, name, type, ibc)                                 \
++({                                                                      \
++        static struct lock_class_key __key;                             \
++        ASSERT(type == MUTEX_DEFAULT);                                  \
++                                                                        \
++        __mutex_init(MUTEX(mp), #mp, &__key);                           \
++        spl_mutex_clear_owner(mp);                                      \
++})
++
++#undef mutex_destroy
++#define mutex_destroy(mp)                                               \
++({                                                                      \
++        VERIFY3P(mutex_owner(mp), ==, NULL);                            \
++})
++
++#define mutex_tryenter(mp)                                              \
++({                                                                      \
++        int _rc_;                                                       \
++                                                                        \
++        if ((_rc_ = mutex_trylock(MUTEX(mp))) == 1)                     \
++                spl_mutex_set_owner(mp);                                \
++                                                                        \
++        _rc_;                                                           \
++})
++
++/*
++ * Adaptive mutexs assume that the lock may be held by a task running
++ * on a different cpu.  The expectation is that the task will drop the
++ * lock before leaving the head of the run queue.  So the ideal thing
++ * to do is spin until we acquire the lock and avoid a context switch.
++ * However it is also possible the task holding the lock yields the
++ * processor with out dropping lock.  In this case, we know it's going
++ * to be a while so we stop spinning and go to sleep waiting for the
++ * lock to be available.  This should strike the optimum balance
++ * between spinning and sleeping waiting for a lock.
++ */
++#define mutex_enter(mp)                                                 \
++({                                                                      \
++        kthread_t *_owner_;                                             \
++        int _rc_, _count_;                                              \
++                                                                        \
++        _rc_ = 0;                                                       \
++        _count_ = 0;                                                    \
++        _owner_ = mutex_owner(mp);                                      \
++        ASSERT3P(_owner_, !=, current);					\
++                                                                        \
++        while (_owner_ && task_curr(_owner_) &&                         \
++               _count_ <= spl_mutex_spin_max()) {                       \
++                if ((_rc_ = mutex_trylock(MUTEX(mp))))                  \
++                        break;                                          \
++                                                                        \
++                _count_++;                                              \
++        }                                                               \
++                                                                        \
++        if (!_rc_)                                                      \
++                mutex_lock(MUTEX(mp));                                  \
++                                                                        \
++        spl_mutex_set_owner(mp);                                        \
++})
++
++#define mutex_exit(mp)                                                  \
++({                                                                      \
++        spl_mutex_clear_owner(mp);                                      \
++        mutex_unlock(MUTEX(mp));                                        \
++})
++
++#ifdef HAVE_GPL_ONLY_SYMBOLS
++# define mutex_enter_nested(mp, sc)                                     \
++({                                                                      \
++        mutex_lock_nested(MUTEX(mp), sc);                               \
++        spl_mutex_set_owner(mp);                                        \
++})
++#else
++# define mutex_enter_nested(mp, sc)                                     \
++({                                                                      \
++        mutex_enter(mp);                                                \
++})
++#endif
++
++#endif /* HAVE_MUTEX_OWNER */
++
++int spl_mutex_init(void);
++void spl_mutex_fini(void);
++
++#endif /* _SPL_MUTEX_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/note.h linux-3.2.33-go/include/spl/sys/note.h
+--- linux-3.2.33-go.orig/include/spl/sys/note.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/note.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_NOTE_H
++#define _SPL_NOTE_H
++
++#endif /* SPL_NOTE_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/open.h linux-3.2.33-go/include/spl/sys/open.h
+--- linux-3.2.33-go.orig/include/spl/sys/open.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/open.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_OPEN_H
++#define _SPL_OPEN_H
++
++#endif /* SPL_OPEN_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/param.h linux-3.2.33-go/include/spl/sys/param.h
+--- linux-3.2.33-go.orig/include/spl/sys/param.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/param.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,36 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_PARAM_H
++#define _SPL_PARAM_H
++
++#include <asm/page.h>
++
++/* Pages to bytes and back */
++#define ptob(pages)			(pages << PAGE_SHIFT)
++#define btop(bytes)			(bytes >> PAGE_SHIFT)
++
++#define MAXUID				UINT32_MAX
++
++#endif /* SPL_PARAM_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/pathname.h linux-3.2.33-go/include/spl/sys/pathname.h
+--- linux-3.2.33-go.orig/include/spl/sys/pathname.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/pathname.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,35 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_PATHNAME_H
++#define _SPL_PATHNAME_H
++
++typedef struct pathname {
++	char	*pn_buf;		/* underlying storage */
++	char	*pn_path;		/* remaining pathname */
++	size_t	pn_pathlen;		/* remaining length */
++	size_t	pn_bufsize;		/* total size of pn_buf */
++} pathname_t;
++
++#endif /* SPL_PATHNAME_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/policy.h linux-3.2.33-go/include/spl/sys/policy.h
+--- linux-3.2.33-go.orig/include/spl/sys/policy.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/policy.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,47 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_POLICY_H
++#define _SPL_POLICY_H
++
++#define	secpolicy_fs_unmount(c,vfs)			(0)
++#define	secpolicy_nfs(c)				(0)
++#define	secpolicy_sys_config(c,co)			(0)
++#define	secpolicy_zfs(c)				(0)
++#define	secpolicy_zinject(c)				(0)
++#define	secpolicy_vnode_setids_setgids(c,id)		(0)
++#define	secpolicy_vnode_setid_retain(c, sr)		(0)
++#define	secpolicy_setid_clear(v, c)			(0)
++#define	secpolicy_vnode_any_access(c,vp,o)		(0)
++#define	secpolicy_vnode_access2(c,cp,o,m1,m2)		(0)
++#define	secpolicy_vnode_chown(c,o)			(0)
++#define	secpolicy_vnode_setdac(c,o)			(0)
++#define	secpolicy_vnode_remove(c)			(0)
++#define	secpolicy_vnode_setattr(c,v,a,o,f,func,n)	(0)
++#define	secpolicy_xvattr(x, o, c, t)			(0)
++#define	secpolicy_vnode_stky_modify(c)			(0)
++#define	secpolicy_setid_setsticky_clear(v,a,o,c)	(0)
++#define	secpolicy_basic_link(c)				(0)
++
++#endif /* SPL_POLICY_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/pool.h linux-3.2.33-go/include/spl/sys/pool.h
+--- linux-3.2.33-go.orig/include/spl/sys/pool.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/pool.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,30 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_POOL_H
++#define _SPL_POOL_H
++
++#include <sys/pset.h>
++
++#endif /* SPL_POOL_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/priv_impl.h linux-3.2.33-go/include/spl/sys/priv_impl.h
+--- linux-3.2.33-go.orig/include/spl/sys/priv_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/priv_impl.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_PRIV_IMPL_H
++#define _SPL_PRIV_IMPL_H
++
++#endif /* _SPL_PRIV_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/processor.h linux-3.2.33-go/include/spl/sys/processor.h
+--- linux-3.2.33-go.orig/include/spl/sys/processor.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/processor.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,32 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef	_SPL_PROCESSOR_H
++#define	_SPL_PROCESSOR_H
++
++#define getcpuid() smp_processor_id()
++
++typedef int	processorid_t;
++
++#endif /* _SPL_PROCESSOR_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/proc.h linux-3.2.33-go/include/spl/sys/proc.h
+--- linux-3.2.33-go.orig/include/spl/sys/proc.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/proc.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_PROC_H
++#define _SPL_PROC_H
++
++#endif /* SPL_PROC_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/pset.h linux-3.2.33-go/include/spl/sys/pset.h
+--- linux-3.2.33-go.orig/include/spl/sys/pset.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/pset.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,38 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_PSET_H
++#define _SPL_PSET_H
++
++typedef int psetid_t;
++
++/* special processor set id's */
++#define PS_NONE         -1
++#define PS_QUERY        -2
++#define PS_MYID         -3
++#define PS_SOFT         -4
++#define PS_HARD         -5
++#define PS_QUERY_TYPE   -6
++
++#endif /* SPL_PSET_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/random.h linux-3.2.33-go/include/spl/sys/random.h
+--- linux-3.2.33-go.orig/include/spl/sys/random.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/random.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,45 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_RANDOM_H
++#define	_SPL_RANDOM_H
++
++#include <linux/module.h>
++#include <linux/random.h>
++
++static __inline__ int
++random_get_bytes(uint8_t *ptr, size_t len)
++{
++	get_random_bytes((void *)ptr,(int)len);
++	return 0;
++}
++
++static __inline__ int
++random_get_pseudo_bytes(uint8_t *ptr, size_t len)
++{
++	get_random_bytes((void *)ptr,(int)len);
++	return 0;
++}
++
++#endif	/* _SPL_RANDOM_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/refstr.h linux-3.2.33-go/include/spl/sys/refstr.h
+--- linux-3.2.33-go.orig/include/spl/sys/refstr.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/refstr.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_REFSTR_H
++#define _SPL_REFSTR_H
++
++#endif /* SPL_REFSTR_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/resource.h linux-3.2.33-go/include/spl/sys/resource.h
+--- linux-3.2.33-go.orig/include/spl/sys/resource.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/resource.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,30 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_RESOURCE_H
++#define _SPL_RESOURCE_H
++
++#include <linux/resource.h>
++
++#endif /* SPL_RESOURCE_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/rwlock.h linux-3.2.33-go/include/spl/sys/rwlock.h
+--- linux-3.2.33-go.orig/include/spl/sys/rwlock.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/rwlock.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,214 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_RWLOCK_H
++#define _SPL_RWLOCK_H
++
++#include <sys/types.h>
++#include <linux/rwsem.h>
++#include <linux/rwsem_compat.h>
++
++typedef enum {
++        RW_DRIVER  = 2,
++        RW_DEFAULT = 4
++} krw_type_t;
++
++typedef enum {
++        RW_NONE   = 0,
++        RW_WRITER = 1,
++        RW_READER = 2
++} krw_t;
++
++typedef struct {
++        struct rw_semaphore rw_rwlock;
++        kthread_t *rw_owner;
++} krwlock_t;
++
++#define SEM(rwp)                        ((struct rw_semaphore *)(rwp))
++
++static inline void
++spl_rw_set_owner(krwlock_t *rwp)
++{
++        unsigned long flags;
++
++        spl_rwsem_lock_irqsave(&SEM(rwp)->wait_lock, flags);
++        rwp->rw_owner = current;
++        spl_rwsem_unlock_irqrestore(&SEM(rwp)->wait_lock, flags);
++}
++
++static inline void
++spl_rw_clear_owner(krwlock_t *rwp)
++{
++        unsigned long flags;
++
++        spl_rwsem_lock_irqsave(&SEM(rwp)->wait_lock, flags);
++        rwp->rw_owner = NULL;
++        spl_rwsem_unlock_irqrestore(&SEM(rwp)->wait_lock, flags);
++}
++
++static inline kthread_t *
++rw_owner(krwlock_t *rwp)
++{
++        unsigned long flags;
++        kthread_t *owner;
++
++        spl_rwsem_lock_irqsave(&SEM(rwp)->wait_lock, flags);
++        owner = rwp->rw_owner;
++        spl_rwsem_unlock_irqrestore(&SEM(rwp)->wait_lock, flags);
++
++        return owner;
++}
++
++static inline int
++RW_READ_HELD(krwlock_t *rwp)
++{
++	return (spl_rwsem_is_locked(SEM(rwp)) &&
++		rw_owner(rwp) == NULL);
++}
++
++static inline int
++RW_WRITE_HELD(krwlock_t *rwp)
++{
++	return (spl_rwsem_is_locked(SEM(rwp)) &&
++		rw_owner(rwp) == current);
++}
++
++static inline int
++RW_LOCK_HELD(krwlock_t *rwp)
++{
++	return spl_rwsem_is_locked(SEM(rwp));
++}
++
++/*
++ * The following functions must be a #define and not static inline.
++ * This ensures that the native linux semaphore functions (down/up)
++ * will be correctly located in the users code which is important
++ * for the built in kernel lock analysis tools
++ */
++#define rw_init(rwp, name, type, arg)                                   \
++({                                                                      \
++        static struct lock_class_key __key;                             \
++                                                                        \
++        __init_rwsem(SEM(rwp), #rwp, &__key);                           \
++        spl_rw_clear_owner(rwp);                                        \
++})
++
++#define rw_destroy(rwp)                                                 \
++({                                                                      \
++        VERIFY(!RW_LOCK_HELD(rwp));                                     \
++})
++
++#define rw_tryenter(rwp, rw)                                            \
++({                                                                      \
++        int _rc_ = 0;                                                   \
++                                                                        \
++        switch (rw) {                                                   \
++        case RW_READER:                                                 \
++                _rc_ = down_read_trylock(SEM(rwp));                     \
++                break;                                                  \
++        case RW_WRITER:                                                 \
++                if ((_rc_ = down_write_trylock(SEM(rwp))))              \
++                        spl_rw_set_owner(rwp);                          \
++                break;                                                  \
++        default:                                                        \
++                VERIFY(0);                                              \
++        }                                                               \
++        _rc_;                                                           \
++})
++
++#define rw_enter(rwp, rw)                                               \
++({                                                                      \
++        switch (rw) {                                                   \
++        case RW_READER:                                                 \
++                down_read(SEM(rwp));                                    \
++                break;                                                  \
++        case RW_WRITER:                                                 \
++                down_write(SEM(rwp));                                   \
++                spl_rw_set_owner(rwp);                                  \
++                break;                                                  \
++        default:                                                        \
++                VERIFY(0);                                              \
++        }                                                               \
++})
++
++#define rw_exit(rwp)                                                    \
++({                                                                      \
++        if (RW_WRITE_HELD(rwp)) {                                       \
++                spl_rw_clear_owner(rwp);                                \
++                up_write(SEM(rwp));                                     \
++        } else {                                                        \
++                ASSERT(RW_READ_HELD(rwp));                              \
++                up_read(SEM(rwp));                                      \
++        }                                                               \
++})
++
++#define rw_downgrade(rwp)                                               \
++({                                                                      \
++        spl_rw_clear_owner(rwp);                                        \
++        downgrade_write(SEM(rwp));                                      \
++})
++
++#if defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
++/*
++ * For the generic implementations of rw-semaphores the following is
++ * true.  If your semaphore implementation internally represents the
++ * semaphore state differently then special case handling is required.
++ * - if activity/count is 0 then there are no active readers or writers
++ * - if activity/count is +ve then that is the number of active readers
++ * - if activity/count is -1 then there is one active writer
++ */
++
++extern void __up_read_locked(struct rw_semaphore *);
++extern int __down_write_trylock_locked(struct rw_semaphore *);
++
++#define rw_tryupgrade(rwp)                                              \
++({                                                                      \
++        unsigned long _flags_;                                          \
++        int _rc_ = 0;                                                   \
++                                                                        \
++        spl_rwsem_lock_irqsave(&SEM(rwp)->wait_lock, _flags_);           \
++        if ((list_empty(&SEM(rwp)->wait_list)) &&                       \
++            (SEM(rwp)->activity == 1)) {                                \
++                __up_read_locked(SEM(rwp));                             \
++                VERIFY(_rc_ = __down_write_trylock_locked(SEM(rwp)));   \
++                (rwp)->rw_owner = current;                              \
++        }                                                               \
++        spl_rwsem_unlock_irqrestore(&SEM(rwp)->wait_lock, _flags_);      \
++        _rc_;                                                           \
++})
++#else
++/*
++ * rw_tryupgrade() can be implemented correctly but for each supported
++ * arch we will need a custom implementation.  For the x86 implementation
++ * it looks like a custom cmpxchg() to atomically check and promote the
++ * rwsem would be safe.  For now that's not worth the trouble so in this
++ * case rw_tryupgrade() has just been disabled.
++ */
++#define rw_tryupgrade(rwp)      ({ 0; })
++#endif
++
++int spl_rw_init(void);
++void spl_rw_fini(void);
++
++#endif /* _SPL_RWLOCK_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/sdt.h linux-3.2.33-go/include/spl/sys/sdt.h
+--- linux-3.2.33-go.orig/include/spl/sys/sdt.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/sdt.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_SDT_H
++#define _SPL_SDT_H
++
++#endif /* SPL_SDT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/sid.h linux-3.2.33-go/include/spl/sys/sid.h
+--- linux-3.2.33-go.orig/include/spl/sys/sid.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/sid.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,61 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_SID_H
++#define _SPL_SID_H
++
++typedef struct ksiddomain {
++	char		*kd_name;
++} ksiddomain_t;
++
++typedef enum ksid_index {
++	KSID_USER,
++	KSID_GROUP,
++	KSID_OWNER,
++	KSID_COUNT
++} ksid_index_t;
++
++typedef int ksid_t;
++
++static inline ksiddomain_t *
++ksid_lookupdomain(const char *dom)
++{
++        ksiddomain_t *kd;
++	int len = strlen(dom);
++
++        kd = kmem_zalloc(sizeof(ksiddomain_t), KM_SLEEP);
++        kd->kd_name = kmem_zalloc(len + 1, KM_SLEEP);
++	memcpy(kd->kd_name, dom, len);
++
++        return (kd);
++}
++
++static inline void
++ksiddomain_rele(ksiddomain_t *ksid)
++{
++	kmem_free(ksid->kd_name, strlen(ksid->kd_name) + 1);
++        kmem_free(ksid, sizeof(ksiddomain_t));
++}
++
++#endif /* _SPL_SID_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/signal.h linux-3.2.33-go/include/spl/sys/signal.h
+--- linux-3.2.33-go.orig/include/spl/sys/signal.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/signal.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,50 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_SIGNAL_H
++#define _SPL_SIGNAL_H
++
++#include <linux/sched.h>
++
++#define	FORREAL		0	/* Usual side-effects */
++#define	JUSTLOOKING	1	/* Don't stop the process */
++
++/* The "why" argument indicates the allowable side-effects of the call:
++ *
++ * FORREAL:  Extract the next pending signal from p_sig into p_cursig;
++ * stop the process if a stop has been requested or if a traced signal
++ * is pending.
++ *
++ * JUSTLOOKING:  Don't stop the process, just indicate whether or not
++ * a signal might be pending (FORREAL is needed to tell for sure).
++ */
++static __inline__ int
++issig(int why)
++{
++	ASSERT(why == FORREAL || why == JUSTLOOKING);
++
++	return signal_pending(current);
++}
++
++#endif /* SPL_SIGNAL_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/stat.h linux-3.2.33-go/include/spl/sys/stat.h
+--- linux-3.2.33-go.orig/include/spl/sys/stat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/stat.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,30 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_STAT_H
++#define _SPL_STAT_H
++
++#include <linux/stat.h>
++
++#endif /* SPL_STAT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/stropts.h linux-3.2.33-go/include/spl/sys/stropts.h
+--- linux-3.2.33-go.orig/include/spl/sys/stropts.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/stropts.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_STROPTS_H
++#define _SPL_STROPTS_H
++
++#endif /* SPL_STROPTS_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/sunddi.h linux-3.2.33-go/include/spl/sys/sunddi.h
+--- linux-3.2.33-go.orig/include/spl/sys/sunddi.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/sunddi.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,60 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_SUNDDI_H
++#define _SPL_SUNDDI_H
++
++#include <sys/cred.h>
++#include <sys/uio.h>
++#include <sys/sunldi.h>
++#include <sys/mutex.h>
++#include <sys/u8_textprep.h>
++#include <sys/vnode.h>
++#include <spl-device.h>
++
++typedef int ddi_devid_t;
++
++#define	DDI_DEV_T_NONE				((dev_t)-1)
++#define	DDI_DEV_T_ANY				((dev_t)-2)
++#define	DI_MAJOR_T_UNKNOWN			((major_t)0)
++
++#define	DDI_PROP_DONTPASS			0x0001
++#define	DDI_PROP_CANSLEEP			0x0002
++
++#define	DDI_SUCCESS				0
++#define	DDI_FAILURE				-1
++
++#define	ddi_prop_lookup_string(x1,x2,x3,x4,x5)	(*x5 = NULL)
++#define	ddi_prop_free(x)			(void)0
++#define	ddi_root_node()				(void)0
++
++extern int ddi_strtoul(const char *, char **, int, unsigned long *);
++extern int ddi_strtol(const char *, char **, int, long *);
++extern int ddi_strtoull(const char *, char **, int, unsigned long long *);
++extern int ddi_strtoll(const char *, char **, int, long long *);
++
++extern int ddi_copyin(const void *from, void *to, size_t len, int flags);
++extern int ddi_copyout(const void *from, void *to, size_t len, int flags);
++
++#endif /* SPL_SUNDDI_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/sunldi.h linux-3.2.33-go/include/spl/sys/sunldi.h
+--- linux-3.2.33-go.orig/include/spl/sys/sunldi.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/sunldi.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,56 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_SUNLDI_H
++#define _SPL_SUNLDI_H
++
++#include <sys/types.h>
++#include <linux/fs.h>
++#include <linux/genhd.h>
++#include <linux/hdreg.h>
++#include <linux/bio.h>
++#include <linux/blkdev.h>
++
++#define SECTOR_SIZE 512
++
++typedef struct modlinkage {
++	int ml_rev;
++	struct modlfs *ml_modlfs;
++	struct modldrv *ml_modldrv;
++	major_t ml_major;
++	unsigned ml_minors;
++	void *pad1;
++} modlinkage_t;
++
++typedef struct ldi_ident {
++	char li_modname[MAXNAMELEN];
++	dev_t li_dev;
++} *ldi_ident_t;
++
++typedef struct block_device *ldi_handle_t;
++
++extern int ldi_ident_from_mod(struct modlinkage *modlp, ldi_ident_t *lip);
++extern void ldi_ident_release(ldi_ident_t li);
++
++#endif /* SPL_SUNLDI_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/sysdc.h linux-3.2.33-go/include/spl/sys/sysdc.h
+--- linux-3.2.33-go.orig/include/spl/sys/sysdc.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/sysdc.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_SYSDC_H
++#define _SPL_SYSDC_H
++
++#endif /* SPL_SYSDC_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/sysevent/eventdefs.h linux-3.2.33-go/include/spl/sys/sysevent/eventdefs.h
+--- linux-3.2.33-go.orig/include/spl/sys/sysevent/eventdefs.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/sysevent/eventdefs.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef	_SPL_SYSEVENT_EVENTDEFS_H
++#define	_SPL_SYSEVENT_EVENTDEFS_H
++
++#endif /* _SPL_SYSEVENT_EVENTDEFS_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/sysevent.h linux-3.2.33-go/include/spl/sys/sysevent.h
+--- linux-3.2.33-go.orig/include/spl/sys/sysevent.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/sysevent.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef	_SPL_SYSEVENT_H
++#define	_SPL_SYSEVENT_H
++
++#endif /* _SPL_SYSEVENT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/sysmacros.h linux-3.2.33-go/include/spl/sys/sysmacros.h
+--- linux-3.2.33-go.orig/include/spl/sys/sysmacros.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/sysmacros.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,217 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_SYSMACROS_H
++#define _SPL_SYSMACROS_H
++
++#include <linux/module.h>
++#include <linux/cpumask.h>
++#include <sys/debug.h>
++#include <sys/varargs.h>
++#include <sys/zone.h>
++#include <sys/signal.h>
++
++#ifndef _KERNEL
++#define _KERNEL				__KERNEL__
++#endif
++
++#define FALSE				0
++#define TRUE				1
++
++#define INT8_MAX			(127)
++#define INT8_MIN			(-128)
++#define UINT8_MAX			(255)
++#define UINT8_MIN			(0)
++
++#define INT16_MAX			(32767)
++#define INT16_MIN			(-32768)
++#define UINT16_MAX			(65535)
++#define UINT16_MIN			(0)
++
++#define INT32_MAX			INT_MAX
++#define INT32_MIN			INT_MIN
++#define UINT32_MAX			UINT_MAX
++#define UINT32_MIN			UINT_MIN
++
++#define INT64_MAX			LLONG_MAX
++#define INT64_MIN			LLONG_MIN
++#define UINT64_MAX			ULLONG_MAX
++#define UINT64_MIN			ULLONG_MIN
++
++#define NBBY				8
++#define ENOTSUP				EOPNOTSUPP
++
++#define MAXMSGLEN			256
++#define MAXNAMELEN			256
++#define MAXPATHLEN			PATH_MAX
++#define MAXOFFSET_T			LLONG_MAX
++#define MAXBSIZE			8192
++#define DEV_BSIZE			512
++#define DEV_BSHIFT			9 /* log2(DEV_BSIZE) */
++
++#define proc_pageout			NULL
++#define curproc				current
++#define max_ncpus			num_possible_cpus()
++#define CPU_SEQID			smp_processor_id()
++#define _NOTE(x)
++#define is_system_labeled()		0
++
++#ifndef RLIM64_INFINITY
++#define RLIM64_INFINITY			(~0ULL)
++#endif
++
++/* 0..MAX_PRIO-1:		Process priority
++ * 0..MAX_RT_PRIO-1:		RT priority tasks
++ * MAX_RT_PRIO..MAX_PRIO-1:	SCHED_NORMAL tasks
++ *
++ * Treat shim tasks as SCHED_NORMAL tasks
++ */
++#define minclsyspri			(MAX_RT_PRIO)
++#define maxclsyspri			(MAX_PRIO-1)
++
++#define NICE_TO_PRIO(nice)		(MAX_RT_PRIO + (nice) + 20)
++#define PRIO_TO_NICE(prio)		((prio) - MAX_RT_PRIO - 20)
++
++/* Missing macros
++ */
++#define PAGESIZE			PAGE_SIZE
++
++/* from Solaris sys/byteorder.h */
++#define BSWAP_8(x)	((x) & 0xff)
++#define BSWAP_16(x)	((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
++#define BSWAP_32(x)	((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
++#define BSWAP_64(x)	((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
++
++/* Map some simple functions.
++ */
++#define bzero(ptr,size)			memset(ptr,0,size)
++#define bcopy(src,dest,size)		memmove(dest,src,size)
++#define bcmp(src,dest,size)		memcmp((src), (dest), (size_t)(size))
++
++/* Dtrace probes do not exist in the linux kernel */
++#ifdef DTRACE_PROBE
++#undef  DTRACE_PROBE
++#endif  /* DTRACE_PROBE */
++#define DTRACE_PROBE(a)					((void)0)
++
++#ifdef DTRACE_PROBE1
++#undef  DTRACE_PROBE1
++#endif  /* DTRACE_PROBE1 */
++#define DTRACE_PROBE1(a, b, c)				((void)0)
++
++#ifdef DTRACE_PROBE2
++#undef  DTRACE_PROBE2
++#endif  /* DTRACE_PROBE2 */
++#define DTRACE_PROBE2(a, b, c, d, e)			((void)0)
++
++#ifdef DTRACE_PROBE3
++#undef  DTRACE_PROBE3
++#endif  /* DTRACE_PROBE3 */
++#define DTRACE_PROBE3(a, b, c, d, e, f, g)		((void)0)
++
++#ifdef DTRACE_PROBE4
++#undef  DTRACE_PROBE4
++#endif  /* DTRACE_PROBE4 */
++#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i)	((void)0)
++
++/* Missing globals */
++extern char spl_version[32];
++extern unsigned long spl_hostid;
++extern char hw_serial[11];
++
++/* Missing misc functions */
++extern int highbit(unsigned long i);
++extern uint32_t zone_get_hostid(void *zone);
++extern void spl_setup(void);
++extern void spl_cleanup(void);
++
++#define makedevice(maj,min) makedev(maj,min)
++
++/* common macros */
++#ifndef MIN
++#define MIN(a, b)		((a) < (b) ? (a) : (b))
++#endif
++#ifndef MAX
++#define MAX(a, b)		((a) < (b) ? (b) : (a))
++#endif
++#ifndef ABS
++#define ABS(a)			((a) < 0 ? -(a) : (a))
++#endif
++#ifndef DIV_ROUND_UP
++#define DIV_ROUND_UP(n,d)	(((n) + (d) - 1) / (d))
++#endif
++#ifndef roundup
++#define roundup(x, y)		((((x) + ((y) - 1)) / (y)) * (y))
++#endif
++
++/*
++ * Compatibility macros/typedefs needed for Solaris -> Linux port
++ */
++#define P2ALIGN(x, align)	((x) & -(align))
++#define P2CROSS(x, y, align)	(((x) ^ (y)) > (align) - 1)
++#define P2ROUNDUP(x, align)	(-(-(x) & -(align)))
++#define P2PHASE(x, align)	((x) & ((align) - 1))
++#define P2NPHASE(x, align)	(-(x) & ((align) - 1))
++#define ISP2(x)			(((x) & ((x) - 1)) == 0)
++#define IS_P2ALIGNED(v, a)	((((uintptr_t)(v)) & ((uintptr_t)(a) - 1))==0)
++#define P2BOUNDARY(off, len, align) \
++				(((off) ^ ((off) + (len) - 1)) > (align) - 1)
++
++/*
++ * Typed version of the P2* macros.  These macros should be used to ensure
++ * that the result is correctly calculated based on the data type of (x),
++ * which is passed in as the last argument, regardless of the data
++ * type of the alignment.  For example, if (x) is of type uint64_t,
++ * and we want to round it up to a page boundary using "PAGESIZE" as
++ * the alignment, we can do either
++ *
++ * P2ROUNDUP(x, (uint64_t)PAGESIZE)
++ * or
++ * P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t)
++ */
++#define P2ALIGN_TYPED(x, align, type)   \
++        ((type)(x) & -(type)(align))
++#define P2PHASE_TYPED(x, align, type)   \
++        ((type)(x) & ((type)(align) - 1))
++#define P2NPHASE_TYPED(x, align, type)  \
++        (-(type)(x) & ((type)(align) - 1))
++#define P2ROUNDUP_TYPED(x, align, type) \
++        (-(-(type)(x) & -(type)(align)))
++#define P2END_TYPED(x, align, type)     \
++        (-(~(type)(x) & -(type)(align)))
++#define P2PHASEUP_TYPED(x, align, phase, type)  \
++        ((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align)))
++#define P2CROSS_TYPED(x, y, align, type)        \
++        (((type)(x) ^ (type)(y)) > (type)(align) - 1)
++#define P2SAMEHIGHBIT_TYPED(x, y, type) \
++        (((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y)))
++
++#if defined(_KERNEL) && !defined(_KMEMUSER) && !defined(offsetof)
++
++/* avoid any possibility of clashing with <stddef.h> version */
++
++#define offsetof(s, m)  ((size_t)(&(((s *)0)->m)))
++#endif
++
++#endif  /* _SPL_SYSMACROS_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/systeminfo.h linux-3.2.33-go/include/spl/sys/systeminfo.h
+--- linux-3.2.33-go.orig/include/spl/sys/systeminfo.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/systeminfo.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,37 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_SYSTEMINFO_H
++#define _SPL_SYSTEMINFO_H
++
++#define HW_INVALID_HOSTID	0xFFFFFFFF	/* an invalid hostid */
++#define HW_HOSTID_LEN		11		/* minimum buffer size needed */
++						/* to hold a decimal or hex */
++						/* hostid string */
++
++/* Supplemental definitions for Linux. */
++#define HW_HOSTID_PATH		"/etc/hostid"   /* binary configuration file */
++#define HW_HOSTID_MASK		0xFFFFFFFF 	/* significant hostid bits */
++
++#endif /* SPL_SYSTEMINFO_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/systm.h linux-3.2.33-go/include/spl/sys/systm.h
+--- linux-3.2.33-go.orig/include/spl/sys/systm.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/systm.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,32 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_SYSTM_H
++#define _SPL_SYSTM_H
++
++#include <sys/sunddi.h>
++
++typedef uintptr_t pc_t;
++
++#endif /* SPL_SYSTM_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/taskq.h linux-3.2.33-go/include/spl/sys/taskq.h
+--- linux-3.2.33-go.orig/include/spl/sys/taskq.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/taskq.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,133 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_TASKQ_H
++#define _SPL_TASKQ_H
++
++#include <linux/module.h>
++#include <linux/gfp.h>
++#include <linux/slab.h>
++#include <linux/interrupt.h>
++#include <linux/kthread.h>
++#include <sys/types.h>
++#include <sys/thread.h>
++
++#define TASKQ_NAMELEN           31
++
++#define TASKQ_PREPOPULATE       0x00000001
++#define TASKQ_CPR_SAFE          0x00000002
++#define TASKQ_DYNAMIC           0x00000004
++#define TASKQ_THREADS_CPU_PCT   0x00000008
++#define TASKQ_DC_BATCH          0x00000010
++
++typedef unsigned long taskqid_t;
++typedef void (task_func_t)(void *);
++
++typedef struct taskq_ent {
++        spinlock_t              tqent_lock;
++        struct list_head        tqent_list;
++        taskqid_t               tqent_id;
++        task_func_t             *tqent_func;
++        void                    *tqent_arg;
++        uintptr_t               tqent_flags;
++} taskq_ent_t;
++
++#define TQENT_FLAG_PREALLOC     0x1
++
++/*
++ * Flags for taskq_dispatch. TQ_SLEEP/TQ_NOSLEEP should be same as
++ * KM_SLEEP/KM_NOSLEEP.  TQ_NOQUEUE/TQ_NOALLOC are set particularly
++ * large so as not to conflict with already used GFP_* defines.
++ */
++#define TQ_SLEEP                0x00000000
++#define TQ_NOSLEEP              0x00000001
++#define TQ_PUSHPAGE             0x00000002
++#define TQ_NOQUEUE              0x01000000
++#define TQ_NOALLOC              0x02000000
++#define TQ_NEW                  0x04000000
++#define TQ_FRONT                0x08000000
++#define TQ_ACTIVE               0x80000000
++
++typedef struct taskq {
++        spinlock_t              tq_lock;       /* protects taskq_t */
++        unsigned long           tq_lock_flags; /* interrupt state */
++	const char              *tq_name;      /* taskq name */
++        struct list_head        tq_thread_list;/* list of all threads */
++	struct list_head        tq_active_list;/* list of active threads */
++        int                     tq_nactive;    /* # of active threads */
++        int                     tq_nthreads;   /* # of total threads */
++	int                     tq_pri;        /* priority */
++        int                     tq_minalloc;   /* min task_t pool size */
++        int                     tq_maxalloc;   /* max task_t pool size */
++	int                     tq_nalloc;     /* cur task_t pool size */
++        uint_t                  tq_flags;      /* flags */
++	taskqid_t               tq_next_id;    /* next pend/work id */
++	taskqid_t               tq_lowest_id;  /* lowest pend/work id */
++	struct list_head        tq_free_list;  /* free task_t's */
++	struct list_head        tq_pend_list;  /* pending task_t's */
++	struct list_head        tq_prio_list;  /* priority pending task_t's */
++	wait_queue_head_t       tq_work_waitq; /* new work waitq */
++	wait_queue_head_t       tq_wait_waitq; /* wait waitq */
++} taskq_t;
++
++typedef struct taskq_thread {
++	struct list_head       tqt_thread_list;
++	struct list_head       tqt_active_list;
++	struct task_struct     *tqt_thread;
++	taskq_t                *tqt_tq;
++	taskqid_t              tqt_id;
++        uintptr_t              tqt_flags;
++} taskq_thread_t;
++
++/* Global system-wide dynamic task queue available for all consumers */
++extern taskq_t *system_taskq;
++
++extern taskqid_t __taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
++extern void __taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t, taskq_ent_t *);
++extern int __taskq_empty_ent(taskq_ent_t *);
++extern void __taskq_init_ent(taskq_ent_t *);
++extern taskq_t *__taskq_create(const char *, int, pri_t, int, int, uint_t);
++extern void __taskq_destroy(taskq_t *);
++extern void __taskq_wait_id(taskq_t *, taskqid_t);
++extern void __taskq_wait(taskq_t *);
++extern int __taskq_member(taskq_t *, void *);
++
++int spl_taskq_init(void);
++void spl_taskq_fini(void);
++
++#define taskq_member(tq, t)                __taskq_member(tq, t)
++#define taskq_wait_id(tq, id)              __taskq_wait_id(tq, id)
++#define taskq_wait(tq)                     __taskq_wait(tq)
++#define taskq_dispatch(tq, f, p, fl)       __taskq_dispatch(tq, f, p, fl)
++#define taskq_dispatch_ent(tq, f, p, fl, t) __taskq_dispatch_ent(tq, f, p, fl, t)
++#define taskq_empty_ent(t)                 __taskq_empty_ent(t)
++#define taskq_init_ent(t)                  __taskq_init_ent(t)
++#define taskq_create(n, th, p, mi, ma, fl) __taskq_create(n, th, p, mi, ma, fl)
++#define taskq_create_proc(n, th, p, mi, ma, pr, fl)	\
++	__taskq_create(n, th, p, mi, ma, fl)
++#define taskq_create_sysdc(n, th, mi, ma, pr, dc, fl)	\
++	__taskq_create(n, th, maxclsyspri, mi, ma, fl)
++#define taskq_destroy(tq)                  __taskq_destroy(tq)
++
++#endif  /* _SPL_TASKQ_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/thread.h linux-3.2.33-go/include/spl/sys/thread.h
+--- linux-3.2.33-go.orig/include/spl/sys/thread.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/thread.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,61 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_THREAD_H
++#define _SPL_THREAD_H
++
++#include <linux/module.h>
++#include <linux/mm.h>
++#include <linux/spinlock.h>
++#include <linux/kthread.h>
++#include <sys/types.h>
++#include <sys/sysmacros.h>
++#include <sys/tsd.h>
++
++/*
++ * Thread interfaces
++ */
++#define TP_MAGIC			0x53535353
++
++#define TS_SLEEP			TASK_INTERRUPTIBLE
++#define TS_RUN				TASK_RUNNING
++#define TS_ZOMB				EXIT_ZOMBIE
++#define TS_STOPPED			TASK_STOPPED
++
++typedef void (*thread_func_t)(void *);
++
++#define thread_create(stk, stksize, func, arg, len, pp, state, pri)      \
++	__thread_create(stk, stksize, (thread_func_t)func,               \
++	                #func, arg, len, pp, state, pri)
++#define thread_exit()			__thread_exit()
++#define thread_join(t)			VERIFY(0)
++#define curthread			current
++
++extern kthread_t *__thread_create(caddr_t stk, size_t  stksize,
++                                  thread_func_t func, const char *name,
++                                  void *args, size_t len, proc_t *pp,
++                                  int state, pri_t pri);
++extern void __thread_exit(void);
++
++#endif  /* _SPL_THREAD_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/time.h linux-3.2.33-go/include/spl/sys/time.h
+--- linux-3.2.33-go.orig/include/spl/sys/time.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/time.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,93 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_TIME_H
++#define _SPL_TIME_H
++
++/*
++ * Structure returned by gettimeofday(2) system call,
++ * and used in other calls.
++ */
++#include <linux/module.h>
++#include <linux/time.h>
++#include <sys/types.h>
++#include <sys/timer.h>
++
++#if defined(CONFIG_64BIT)
++#define TIME_MAX			INT64_MAX
++#define TIME_MIN			INT64_MIN
++#else
++#define TIME_MAX			INT32_MAX
++#define TIME_MIN			INT32_MIN
++#endif
++
++#define SEC				1
++#define MILLISEC			1000
++#define MICROSEC			1000000
++#define NANOSEC				1000000000
++
++/* Already defined in include/linux/time.h */
++#undef CLOCK_THREAD_CPUTIME_ID
++#undef CLOCK_REALTIME
++#undef CLOCK_MONOTONIC
++#undef CLOCK_PROCESS_CPUTIME_ID
++
++typedef enum clock_type {
++	__CLOCK_REALTIME0 =		0,	/* obsolete; same as CLOCK_REALTIME */
++	CLOCK_VIRTUAL =			1,	/* thread's user-level CPU clock */
++	CLOCK_THREAD_CPUTIME_ID	=	2,	/* thread's user+system CPU clock */
++	CLOCK_REALTIME =		3,	/* wall clock */
++	CLOCK_MONOTONIC =		4,	/* high resolution monotonic clock */
++	CLOCK_PROCESS_CPUTIME_ID =	5,	/* process's user+system CPU clock */
++	CLOCK_HIGHRES =			CLOCK_MONOTONIC,	/* alternate name */
++	CLOCK_PROF =			CLOCK_THREAD_CPUTIME_ID,/* alternate name */
++} clock_type_t;
++
++#define hz					\
++({						\
++        ASSERT(HZ >= 100 && HZ <= MICROSEC);	\
++        HZ;					\
++})
++
++extern void __gethrestime(timestruc_t *);
++extern int __clock_gettime(clock_type_t, timespec_t *);
++extern hrtime_t __gethrtime(void);
++
++#define gethrestime(ts)			__gethrestime(ts)
++#define clock_gettime(fl, tp)		__clock_gettime(fl, tp)
++#define gethrtime()			__gethrtime()
++
++static __inline__ time_t
++gethrestime_sec(void)
++{
++        timestruc_t now;
++
++        __gethrestime(&now);
++        return now.tv_sec;
++}
++
++#define TIMESPEC_OVERFLOW(ts)		\
++	((ts)->tv_sec < TIME_MIN || (ts)->tv_sec > TIME_MAX)
++
++#endif  /* _SPL_TIME_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/timer.h linux-3.2.33-go/include/spl/sys/timer.h
+--- linux-3.2.33-go.orig/include/spl/sys/timer.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/timer.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,41 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_TIMER_H
++#define _SPL_TIMER_H
++
++#include <linux/module.h>
++#include <linux/sched.h>
++#include <linux/timer.h>
++
++#define lbolt				((clock_t)jiffies)
++#define lbolt64				((int64_t)get_jiffies_64())
++
++#define ddi_get_lbolt()			((clock_t)jiffies)
++#define ddi_get_lbolt64()		((int64_t)get_jiffies_64())
++
++#define delay(ticks)			schedule_timeout((long)(ticks))
++
++#endif  /* _SPL_TIMER_H */
++
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/t_lock.h linux-3.2.33-go/include/spl/sys/t_lock.h
+--- linux-3.2.33-go.orig/include/spl/sys/t_lock.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/t_lock.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,33 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_T_LOCK_H
++#define _SPL_T_LOCK_H
++
++#include <sys/param.h>
++#include <sys/mutex.h>
++#include <sys/rwlock.h>
++#include <sys/condvar.h>
++
++#endif /* SPL_T_LOCK_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/tsd.h linux-3.2.33-go/include/spl/sys/tsd.h
+--- linux-3.2.33-go.orig/include/spl/sys/tsd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/tsd.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,45 @@
++/*****************************************************************************\
++ *  Copyright (C) 2010 Lawrence Livermore National Security, LLC.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_TSD_H
++#define _SPL_TSD_H
++
++#include <sys/types.h>
++
++#define TSD_HASH_TABLE_BITS_DEFAULT	9
++#define TSD_KEYS_MAX			32768
++#define DTOR_PID			(PID_MAX_LIMIT+1)
++#define PID_KEY				(TSD_KEYS_MAX+1)
++
++typedef void (*dtor_func_t)(void *);
++
++extern int tsd_set(uint_t, void *);
++extern void *tsd_get(uint_t);
++extern void tsd_create(uint_t *, dtor_func_t);
++extern void tsd_destroy(uint_t *);
++extern void tsd_exit(void);
++
++int spl_tsd_init(void);
++void spl_tsd_fini(void);
++
++#endif /* _SPL_TSD_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/types32.h linux-3.2.33-go/include/spl/sys/types32.h
+--- linux-3.2.33-go.orig/include/spl/sys/types32.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/types32.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,36 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_TYPES32_H
++#define	_SPL_TYPES32_H
++
++#include <sys/int_types.h>
++#include <sys/types.h>
++
++typedef uint32_t	caddr32_t;
++typedef int32_t		daddr32_t;
++typedef int32_t		time32_t;
++typedef uint32_t	size32_t;
++
++#endif	/* _SPL_TYPES32_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/types.h linux-3.2.33-go/include/spl/sys/types.h
+--- linux-3.2.33-go.orig/include/spl/sys/types.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/types.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,91 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_TYPES_H
++#define	_SPL_TYPES_H
++
++#include <linux/types.h>
++#include <sys/sysmacros.h>
++
++#include <linux/uaccess_compat.h>
++#include <linux/file_compat.h>
++#include <linux/list_compat.h>
++#include <linux/time_compat.h>
++#include <linux/bitops_compat.h>
++#include <linux/smp_compat.h>
++#include <linux/workqueue_compat.h>
++#include <linux/kallsyms_compat.h>
++#include <linux/mutex_compat.h>
++#include <linux/module_compat.h>
++#include <linux/sysctl_compat.h>
++#include <linux/proc_compat.h>
++#include <linux/math64_compat.h>
++#include <linux/zlib_compat.h>
++#include <linux/mm_compat.h>
++#include <linux/delay.h>
++
++#ifndef HAVE_UINTPTR_T
++typedef unsigned long			uintptr_t;
++#endif
++
++#ifndef ULLONG_MAX
++#define ULLONG_MAX			(~0ULL)
++#endif
++
++#ifndef LLONG_MAX
++#define LLONG_MAX			((long long)(~0ULL>>1))
++#endif
++
++typedef enum { B_FALSE=0, B_TRUE=1 }	boolean_t;
++typedef unsigned long			intptr_t;
++typedef unsigned long			ulong_t;
++typedef unsigned int			uint_t;
++typedef unsigned char			uchar_t;
++typedef unsigned long long		u_longlong_t;
++typedef unsigned long long		u_offset_t;
++typedef unsigned long long		rlim64_t;
++typedef long long			longlong_t;
++typedef long long			offset_t;
++typedef struct task_struct		kthread_t;
++typedef struct task_struct		proc_t;
++typedef struct vmem { }			vmem_t;
++typedef short				pri_t;
++typedef struct timespec			timestruc_t; /* definition per SVr4 */
++typedef struct timespec			timespec_t;
++typedef longlong_t			hrtime_t;
++typedef unsigned short			ushort_t;
++typedef u_longlong_t			len_t;
++typedef longlong_t			diskaddr_t;
++typedef ushort_t			o_mode_t;
++typedef uint_t				major_t;
++typedef uint_t				minor_t;
++typedef ulong_t				pfn_t;
++typedef ulong_t				pgcnt_t;
++typedef long				spgcnt_t;
++typedef short				index_t;
++typedef int				id_t;
++
++extern proc_t p0;
++
++#endif	/* _SPL_TYPES_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/u8_textprep.h linux-3.2.33-go/include/spl/sys/u8_textprep.h
+--- linux-3.2.33-go.orig/include/spl/sys/u8_textprep.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/u8_textprep.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_U8_TEXTPREP_H
++#define _SPL_U8_TEXTPREP_H
++
++#endif /* SPL_U8_TEXTPREP_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/uio.h linux-3.2.33-go/include/spl/sys/uio.h
+--- linux-3.2.33-go.orig/include/spl/sys/uio.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/uio.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,99 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_UIO_H
++#define _SPL_UIO_H
++
++#include <linux/uio.h>
++#include <asm/uaccess.h>
++#include <sys/types.h>
++
++typedef struct iovec iovec_t;
++
++typedef enum uio_rw {
++	UIO_READ =	0,
++	UIO_WRITE =	1,
++} uio_rw_t;
++
++typedef enum uio_seg {
++	UIO_USERSPACE =	0,
++	UIO_SYSSPACE =	1,
++	UIO_USERISPACE=	2,
++} uio_seg_t;
++
++typedef struct uio {
++	struct iovec	*uio_iov;
++	int		uio_iovcnt;
++	offset_t	uio_loffset;
++	uio_seg_t	uio_segflg;
++	uint16_t	uio_fmode;
++	uint16_t	uio_extflg;
++	offset_t	uio_limit;
++	ssize_t		uio_resid;
++} uio_t;
++
++typedef struct aio_req {
++	uio_t		*aio_uio;
++	void		*aio_private;
++} aio_req_t;
++
++typedef enum xuio_type {
++	UIOTYPE_ASYNCIO,
++	UIOTYPE_ZEROCOPY,
++} xuio_type_t;
++
++
++#define UIOA_IOV_MAX    16
++
++typedef struct uioa_page_s {
++	int	uioa_pfncnt;
++	void	**uioa_ppp;
++	caddr_t	uioa_base;
++	size_t	uioa_len;
++} uioa_page_t;
++
++typedef struct xuio {
++	uio_t xu_uio;
++	enum xuio_type xu_type;
++	union {
++		struct {
++			uint32_t xu_a_state;
++			ssize_t xu_a_mbytes;
++			uioa_page_t *xu_a_lcur;
++			void **xu_a_lppp;
++			void *xu_a_hwst[4];
++			uioa_page_t xu_a_locked[UIOA_IOV_MAX];
++		} xu_aio;
++
++		struct {
++			int xu_zc_rw;
++			void *xu_zc_priv;
++		} xu_zc;
++	} xu_ext;
++} xuio_t;
++
++#define XUIO_XUZC_PRIV(xuio)	xuio->xu_ext.xu_zc.xu_zc_priv
++#define XUIO_XUZC_RW(xuio)	xuio->xu_ext.xu_zc.xu_zc_rw
++
++#endif /* SPL_UIO_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/unistd.h linux-3.2.33-go/include/spl/sys/unistd.h
+--- linux-3.2.33-go.orig/include/spl/sys/unistd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/unistd.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_UNISTD_H
++#define _SPL_UNISTD_H
++
++#endif /* SPL_UNISTD_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/utsname.h linux-3.2.33-go/include/spl/sys/utsname.h
+--- linux-3.2.33-go.orig/include/spl/sys/utsname.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/utsname.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,34 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_UTSNAME_H
++#define _SPL_UTSNAME_H
++
++#include <linux/utsname.h>
++
++extern struct new_utsname *__utsname(void);
++
++#define utsname			(*__utsname())
++
++#endif /* SPL_UTSNAME_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/va_list.h linux-3.2.33-go/include/spl/sys/va_list.h
+--- linux-3.2.33-go.orig/include/spl/sys/va_list.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/va_list.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_VA_LIST_H
++#define _SPL_VA_LIST_H
++
++#endif /* SPL_VA_LIST_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/varargs.h linux-3.2.33-go/include/spl/sys/varargs.h
+--- linux-3.2.33-go.orig/include/spl/sys/varargs.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/varargs.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,30 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_VARARGS_H
++#define _SPL_VARARGS_H
++
++#define __va_list                       va_list
++
++#endif /* SPL_VARARGS_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/vfs.h linux-3.2.33-go/include/spl/sys/vfs.h
+--- linux-3.2.33-go.orig/include/spl/sys/vfs.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/vfs.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,51 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_ZFS_H
++#define _SPL_ZFS_H
++
++#include <linux/mount.h>
++#include <linux/fs.h>
++#include <linux/dcache.h>
++#include <linux/statfs.h>
++#include <linux/xattr.h>
++#include <linux/security.h>
++#include <linux/seq_file.h>
++
++#define	MAXFIDSZ	64
++
++typedef struct spl_fid {
++	union {
++		long fid_pad;
++		struct {
++			ushort_t len;		/* length of data in bytes */
++			char     data[MAXFIDSZ];/* data (variable len) */
++		} _fid;
++	} un;
++} fid_t;
++
++#define	fid_len		un._fid.len
++#define	fid_data	un._fid.data
++
++#endif /* SPL_ZFS_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/vfs_opreg.h linux-3.2.33-go/include/spl/sys/vfs_opreg.h
+--- linux-3.2.33-go.orig/include/spl/sys/vfs_opreg.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/vfs_opreg.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_OPREG_H
++#define _SPL_OPREG_H
++
++#endif /* SPL_OPREG_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/vmsystm.h linux-3.2.33-go/include/spl/sys/vmsystm.h
+--- linux-3.2.33-go.orig/include/spl/sys/vmsystm.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/vmsystm.h	2012-11-16 23:22:32.405192918 +0100
+@@ -0,0 +1,181 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_VMSYSTM_H
++#define _SPL_VMSYSTM_H
++
++#include <linux/mmzone.h>
++#include <linux/mm.h>
++#include <linux/swap.h>
++#include <linux/highmem.h>
++#include <sys/types.h>
++#include <asm/uaccess.h>
++
++/* These values are loosely coupled with the VM page reclaim.
++ * Linux uses its own heuristics to trigger page reclamation, and
++ * because those interface are difficult to interface with.  These
++ * values should only be considered as a rough guide to the system
++ * memory state and not as direct evidence that page reclamation.
++ * is or is not currently in progress.
++ */
++#define membar_producer()		smp_wmb()
++
++#define physmem				num_physpages
++#define freemem				nr_free_pages()
++#define availrmem			spl_kmem_availrmem()
++
++extern pgcnt_t minfree;			/* Sum of zone->pages_min */
++extern pgcnt_t desfree;			/* Sum of zone->pages_low */
++extern pgcnt_t lotsfree;		/* Sum of zone->pages_high */
++extern pgcnt_t needfree;		/* Always 0 unused in new Solaris */
++extern pgcnt_t swapfs_minfree;		/* Solaris default value */
++extern pgcnt_t swapfs_reserve;		/* Solaris default value */
++
++extern vmem_t *heap_arena;		/* primary kernel heap arena */
++extern vmem_t *zio_alloc_arena;		/* arena for zio caches */
++extern vmem_t *zio_arena;		/* arena for allocating zio memory */
++
++extern pgcnt_t spl_kmem_availrmem(void);
++extern size_t vmem_size(vmem_t *vmp, int typemask);
++
++/*
++ * The following symbols are available for use within the kernel
++ * itself, and they used to be available in older kernels.  But it
++ * looks like they have been removed perhaps due to lack of use.
++ * For our purposes we need them to access the global memory state
++ * of the system, which is even available to user space process
++ * in /proc/meminfo.  It's odd to me that there is no kernel API
++ * to get the same information, minimally the proc handler for
++ * the above mentioned /proc/meminfo file would make use of it.
++ */
++
++/* Source linux/fs/proc/mmu.c */
++#ifndef HAVE_GET_VMALLOC_INFO
++#ifdef CONFIG_MMU
++
++struct vmalloc_info {
++	unsigned long used;
++	unsigned long largest_chunk;
++};
++
++typedef void (*get_vmalloc_info_t)(struct vmalloc_info *);
++extern get_vmalloc_info_t get_vmalloc_info_fn;
++
++# define VMEM_ALLOC		0x01
++# define VMEM_FREE		0x02
++# define VMALLOC_TOTAL		(VMALLOC_END - VMALLOC_START)
++# define get_vmalloc_info(vmi)	get_vmalloc_info_fn(vmi)
++#else
++# error "CONFIG_MMU must be defined"
++#endif /* CONFIG_MMU */
++#endif /* HAVE_GET_VMALLOC_INFO */
++
++#ifdef HAVE_PGDAT_HELPERS
++/* Source linux/mm/mmzone.c */
++# ifndef HAVE_FIRST_ONLINE_PGDAT
++typedef struct pglist_data *(*first_online_pgdat_t)(void);
++extern first_online_pgdat_t first_online_pgdat_fn;
++# define first_online_pgdat()	first_online_pgdat_fn()
++# endif /* HAVE_FIRST_ONLINE_PGDAT */
++
++# ifndef HAVE_NEXT_ONLINE_PGDAT
++typedef struct pglist_data *(*next_online_pgdat_t)(struct pglist_data *);
++extern next_online_pgdat_t next_online_pgdat_fn;
++# define next_online_pgdat(pgd)	next_online_pgdat_fn(pgd)
++# endif /* HAVE_NEXT_ONLINE_PGDAT */
++
++# ifndef HAVE_NEXT_ZONE
++typedef struct zone *(*next_zone_t)(struct zone *);
++extern next_zone_t next_zone_fn;
++# define next_zone(zone)	next_zone_fn(zone)
++# endif /* HAVE_NEXT_ZONE */
++
++#else /* HAVE_PGDAT_HELPERS */
++
++# ifndef HAVE_PGDAT_LIST
++extern struct pglist_data *pgdat_list_addr;
++# define pgdat_list		pgdat_list_addr
++# endif /* HAVE_PGDAT_LIST */
++
++#endif /* HAVE_PGDAT_HELPERS */
++
++/* Source linux/mm/vmstat.c */
++#if defined(NEED_GET_ZONE_COUNTS) && !defined(HAVE_GET_ZONE_COUNTS)
++typedef void (*get_zone_counts_t)(unsigned long *, unsigned long *,
++				  unsigned long *);
++extern get_zone_counts_t get_zone_counts_fn;
++# define get_zone_counts(a,i,f)	get_zone_counts_fn(a,i,f)
++#endif /* NEED_GET_ZONE_COUNTS && !HAVE_GET_ZONE_COUNTS */
++
++typedef enum spl_zone_stat_item {
++	SPL_NR_FREE_PAGES,
++	SPL_NR_INACTIVE,
++	SPL_NR_ACTIVE,
++	SPL_NR_ZONE_STAT_ITEMS
++} spl_zone_stat_item_t;
++
++extern unsigned long spl_global_page_state(spl_zone_stat_item_t);
++
++#define xcopyin(from, to, size)		copy_from_user(to, from, size)
++#define xcopyout(from, to, size)	copy_to_user(to, from, size)
++
++static __inline__ int
++copyin(const void *from, void *to, size_t len)
++{
++	/* On error copyin routine returns -1 */
++	if (xcopyin(from, to, len))
++		return -1;
++
++	return 0;
++}
++
++static __inline__ int
++copyout(const void *from, void *to, size_t len)
++{
++	/* On error copyout routine returns -1 */
++	if (xcopyout(from, to, len))
++		return -1;
++
++	return 0;
++}
++
++static __inline__ int
++copyinstr(const void *from, void *to, size_t len, size_t *done)
++{
++	size_t rc;
++
++	if (len == 0)
++		return -ENAMETOOLONG;
++
++	/* XXX: Should return ENAMETOOLONG if 'strlen(from) > len' */
++
++	memset(to, 0, len);
++	rc = copyin(from, to, len - 1);
++	if (done != NULL)
++		*done = rc;
++
++	return 0;
++}
++
++#endif /* SPL_VMSYSTM_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/vnode.h linux-3.2.33-go/include/spl/sys/vnode.h
+--- linux-3.2.33-go.orig/include/spl/sys/vnode.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/vnode.h	2012-11-16 23:22:32.404192930 +0100
+@@ -0,0 +1,213 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_VNODE_H
++#define _SPL_VNODE_H
++
++#include <linux/module.h>
++#include <linux/syscalls.h>
++#include <linux/fcntl.h>
++#include <linux/buffer_head.h>
++#include <linux/dcache.h>
++#include <linux/namei.h>
++#include <linux/file.h>
++#include <linux/fs.h>
++#include <linux/fs_struct.h>
++#include <linux/mount.h>
++#include <sys/kmem.h>
++#include <sys/mutex.h>
++#include <sys/types.h>
++#include <sys/time.h>
++#include <sys/uio.h>
++#include <sys/sunldi.h>
++
++/*
++ * Prior to linux-2.6.33 only O_DSYNC semantics were implemented and
++ * they used the O_SYNC flag.  As of linux-2.6.33 the this behavior
++ * was properly split in to O_SYNC and O_DSYNC respectively.
++ */
++#ifndef O_DSYNC
++#define O_DSYNC		O_SYNC
++#endif
++
++#define FREAD		1
++#define FWRITE		2
++#define FCREAT		O_CREAT
++#define FTRUNC		O_TRUNC
++#define FOFFMAX		O_LARGEFILE
++#define FSYNC		O_SYNC
++#define FDSYNC		O_DSYNC
++#define FRSYNC		O_SYNC
++#define FEXCL		O_EXCL
++#define FDIRECT		O_DIRECT
++#define FAPPEND		O_APPEND
++
++#define FNODSYNC	0x10000 /* fsync pseudo flag */
++#define FNOFOLLOW	0x20000 /* don't follow symlinks */
++
++#define F_FREESP	11 	/* Free file space */
++
++
++/*
++ * The vnode AT_ flags are mapped to the Linux ATTR_* flags.
++ * This allows them to be used safely with an iattr structure.
++ * The AT_XVATTR flag has been added and mapped to the upper
++ * bit range to avoid conflicting with the standard Linux set.
++ */
++#undef AT_UID
++#undef AT_GID
++
++#define AT_MODE		ATTR_MODE
++#define AT_UID		ATTR_UID
++#define AT_GID		ATTR_GID
++#define AT_SIZE		ATTR_SIZE
++#define AT_ATIME	ATTR_ATIME
++#define AT_MTIME	ATTR_MTIME
++#define AT_CTIME	ATTR_CTIME
++
++#define ATTR_XVATTR	(1 << 31)
++#define AT_XVATTR	ATTR_XVATTR
++
++#define ATTR_IATTR_MASK	(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_SIZE | \
++			ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_FILE)
++
++#define CRCREAT		0x01
++#define RMFILE		0x02
++
++#define B_INVAL		0x01
++#define B_TRUNC		0x02
++
++#define LOOKUP_DIR		0x01
++#define LOOKUP_XATTR		0x02
++#define CREATE_XATTR_DIR	0x04
++#define ATTR_NOACLCHECK		0x20
++
++#ifdef HAVE_PATH_IN_NAMEIDATA
++# define nd_dentry	path.dentry
++# define nd_mnt		path.mnt
++#else
++# define nd_dentry	dentry
++# define nd_mnt		mnt
++#endif
++
++typedef enum vtype {
++	VNON		= 0,
++	VREG		= 1,
++	VDIR		= 2,
++	VBLK		= 3,
++	VCHR		= 4,
++	VLNK		= 5,
++	VFIFO		= 6,
++	VDOOR		= 7,
++	VPROC		= 8,
++	VSOCK		= 9,
++	VPORT		= 10,
++	VBAD		= 11
++} vtype_t;
++
++typedef struct vattr {
++	enum vtype	va_type;	/* vnode type */
++	u_int		va_mask;	/* attribute bit-mask */
++	u_short		va_mode;	/* acc mode */
++	uid_t		va_uid;		/* owner uid */
++	gid_t		va_gid;		/* owner gid */
++	long		va_fsid;	/* fs id */
++	long		va_nodeid;	/* node # */
++	uint32_t	va_nlink;	/* # links */
++	uint64_t	va_size;	/* file size */
++	struct timespec	va_atime;	/* last acc */
++	struct timespec	va_mtime;	/* last mod */
++	struct timespec	va_ctime;	/* last chg */
++	dev_t		va_rdev;	/* dev */
++	uint64_t	va_nblocks;	/* space used */
++	uint32_t	va_blksize;	/* block size */
++	uint32_t	va_seq;		/* sequence */
++	struct dentry	*va_dentry;	/* dentry to wire */
++} vattr_t;
++
++typedef struct vnode {
++	struct file	*v_file;
++	kmutex_t	v_lock;		/* protects vnode fields */
++	uint_t		v_flag;		/* vnode flags (see below) */
++	uint_t		v_count;	/* reference count */
++	void		*v_data;	/* private data for fs */
++	struct vfs	*v_vfsp;	/* ptr to containing VFS */
++	struct stdata	*v_stream;	/* associated stream */
++	enum vtype	v_type;		/* vnode type */
++	dev_t		v_rdev;		/* device (VCHR, VBLK) */
++	gfp_t		v_gfp_mask;	/* original mapping gfp mask */
++} vnode_t;
++
++typedef struct vn_file {
++	int		f_fd;		/* linux fd for lookup */
++	struct task_struct *f_task;	/* linux task this fd belongs to */
++	struct file	*f_file;	/* linux file struct */
++	atomic_t	f_ref;		/* ref count */
++	kmutex_t	f_lock;		/* struct lock */
++	loff_t		f_offset;	/* offset */
++	vnode_t		*f_vnode;	/* vnode */
++	struct list_head f_list;	/* list referenced file_t's */
++} file_t;
++
++extern vnode_t *vn_alloc(int flag);
++void vn_free(vnode_t *vp);
++extern vtype_t vn_mode_to_vtype(mode_t);
++extern mode_t vn_vtype_to_mode(vtype_t);
++extern int vn_open(const char *path, uio_seg_t seg, int flags, int mode,
++		   vnode_t **vpp, int x1, void *x2);
++extern int vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
++		     vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd);
++extern int vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len,
++		   offset_t off, uio_seg_t seg, int x1, rlim64_t x2,
++		   void *x3, ssize_t *residp);
++extern int vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4);
++extern int vn_seek(vnode_t *vp, offset_t o, offset_t *op, void *ct);
++
++extern int vn_remove(const char *path, uio_seg_t seg, int flags);
++extern int vn_rename(const char *path1, const char *path2, int x1);
++extern int vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4);
++extern int vn_fsync(vnode_t *vp, int flags, void *x3, void *x4);
++extern int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag,
++    offset_t offset, void *x6, void *x7);
++extern file_t *vn_getf(int fd);
++extern void vn_releasef(int fd);
++extern int vn_set_pwd(const char *filename);
++
++int spl_vn_init_kallsyms_lookup(void);
++int spl_vn_init(void);
++void spl_vn_fini(void);
++
++#define VOP_CLOSE				vn_close
++#define VOP_SEEK				vn_seek
++#define VOP_GETATTR				vn_getattr
++#define VOP_FSYNC				vn_fsync
++#define VOP_SPACE				vn_space
++#define VOP_PUTPAGE(vp, o, s, f, x1, x2)	((void)0)
++#define vn_is_readonly(vp)			0
++#define getf					vn_getf
++#define releasef				vn_releasef
++
++extern vnode_t *rootdir;
++
++#endif /* SPL_VNODE_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/zmod.h linux-3.2.33-go/include/spl/sys/zmod.h
+--- linux-3.2.33-go.orig/include/spl/sys/zmod.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/zmod.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,69 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  z_compress_level/z_uncompress are nearly identical copies of the
++ *  compress2/uncompress functions provided by the official zlib package
++ *  available at http://zlib.net/.  The only changes made we to slightly
++ *  adapt the functions called to match the linux kernel implementation
++ *  of zlib.  The full zlib license follows:
++ *
++ *  zlib.h -- interface of the 'zlib' general purpose compression library
++ *  version 1.2.5, April 19th, 2010
++ *
++ *  Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
++ *
++ *  This software is provided 'as-is', without any express or implied
++ *  warranty.  In no event will the authors be held liable for any damages
++ *  arising from the use of this software.
++ *
++ *  Permission is granted to anyone to use this software for any purpose,
++ *  including commercial applications, and to alter it and redistribute it
++ *  freely, subject to the following restrictions:
++ *
++ *  1. The origin of this software must not be misrepresented; you must not
++ *     claim that you wrote the original software. If you use this software
++ *     in a product, an acknowledgment in the product documentation would be
++ *     appreciated but is not required.
++ *  2. Altered source versions must be plainly marked as such, and must not be
++ *     misrepresented as being the original software.
++ *  3. This notice may not be removed or altered from any source distribution.
++ *
++ *  Jean-loup Gailly
++ *  Mark Adler
++\*****************************************************************************/
++
++#ifndef _SPL_ZMOD_H
++#define _SPL_ZMOD_H
++
++#include <sys/types.h>
++#include <linux/zlib.h>
++
++extern int z_compress_level(void *dest, size_t *destLen, const void *source,
++    size_t sourceLen, int level);
++extern int z_uncompress(void *dest, size_t *destLen, const void *source,
++    size_t sourceLen);
++
++int spl_zlib_init(void);
++void spl_zlib_fini(void);
++
++#endif /* SPL_ZMOD_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/sys/zone.h linux-3.2.33-go/include/spl/sys/zone.h
+--- linux-3.2.33-go.orig/include/spl/sys/zone.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/sys/zone.h	2012-11-16 23:22:32.406192907 +0100
+@@ -0,0 +1,33 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_ZONE_H
++#define _SPL_ZONE_H
++
++#include <sys/byteorder.h>
++
++#define zone_dataset_visible(x, y)                      (1)
++#define INGLOBALZONE(z)                                 (1)
++
++#endif /* SPL_ZONE_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/unistd.h linux-3.2.33-go/include/spl/unistd.h
+--- linux-3.2.33-go.orig/include/spl/unistd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/unistd.h	2012-11-16 23:22:32.407192896 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_UNISTD_H
++#define _SPL_UNISTD_H
++
++#endif /* SPL_UNISTD_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/util/qsort.h linux-3.2.33-go/include/spl/util/qsort.h
+--- linux-3.2.33-go.orig/include/spl/util/qsort.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/util/qsort.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,32 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_QSORT_H
++#define _SPL_QSORT_H
++
++#include <linux/sort.h>
++
++#define qsort(base, num, size, cmp)	sort(base, num, size, cmp, NULL)
++
++#endif /* SPL_QSORT_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/util/sscanf.h linux-3.2.33-go/include/spl/util/sscanf.h
+--- linux-3.2.33-go.orig/include/spl/util/sscanf.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/util/sscanf.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_UTIL_SSCANF_H
++#define _SPL_UTIL_SSCANF_H
++
++#endif /* SPL_UTIL_SSCAN_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/vm/anon.h linux-3.2.33-go/include/spl/vm/anon.h
+--- linux-3.2.33-go.orig/include/spl/vm/anon.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/vm/anon.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_VM_ANON_H
++#define _SPL_VM_ANON_H
++
++#endif /* SPL_VM_ANON_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/vm/pvn.h linux-3.2.33-go/include/spl/vm/pvn.h
+--- linux-3.2.33-go.orig/include/spl/vm/pvn.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/vm/pvn.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,28 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_VM_PVN_H
++#define _SPL_VM_PVN_H
++
++#endif /* SPL_VM_PVN_H */
+diff -uNr linux-3.2.33-go.orig/include/spl/vm/seg_kmem.h linux-3.2.33-go/include/spl/vm/seg_kmem.h
+--- linux-3.2.33-go.orig/include/spl/vm/seg_kmem.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/spl/vm/seg_kmem.h	2012-11-16 23:22:32.403192942 +0100
+@@ -0,0 +1,30 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPL_SEG_KMEM_H
++#define _SPL_SEG_KMEM_H
++
++#include <sys/vmsystm.h>
++
++#endif /* SPL_SEG_KMEM_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/libnvpair.h linux-3.2.33-go/include/zfs/libnvpair.h
+--- linux-3.2.33-go.orig/include/zfs/libnvpair.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/libnvpair.h	2012-11-16 23:25:34.344039393 +0100
+@@ -0,0 +1,194 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_LIBNVPAIR_H
++#define	_LIBNVPAIR_H
++
++#include <sys/nvpair.h>
++#include <stdlib.h>
++#include <stdio.h>
++#include <regex.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * All interfaces described in this file are private to Solaris, and
++ * are subject to change at any time and without notice.  The public
++ * nvlist/nvpair interfaces, as documented in manpage sections 3NVPAIR,
++ * are all imported from <sys/nvpair.h> included above.
++ */
++
++extern int nvpair_value_match(nvpair_t *, int, char *, char **);
++extern int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *,
++    char **);
++
++extern void nvlist_print(FILE *, nvlist_t *);
++extern void dump_nvlist(nvlist_t *, int);
++
++/*
++ * Private nvlist printing interface that allows the caller some control
++ * over output rendering (as opposed to nvlist_print and dump_nvlist).
++ *
++ * Obtain an opaque nvlist_prtctl_t cookie using nvlist_prtctl_alloc
++ * (NULL on failure);  on return the cookie is set up for default formatting
++ * and rendering.  Quote the cookie in subsequent customisation functions and
++ * then pass the cookie to nvlist_prt to render the nvlist.  Finally,
++ * use nvlist_prtctl_free to release the cookie.
++ *
++ * For all nvlist_lookup_xxx and nvlist_lookup_xxx_array functions
++ * we have a corresponding brace of functions that appoint replacement
++ * rendering functions:
++ *
++ *	extern void nvlist_prtctl_xxx(nvlist_prtctl_t,
++ *	    void (*)(nvlist_prtctl_t ctl, void *private, const char *name,
++ *	    xxxtype value))
++ *
++ *	and
++ *
++ *	extern void nvlist_prtctl_xxx_array(nvlist_prtctl_t,
++ *	    void (*)(nvlist_prtctl_t ctl, void *private, const char *name,
++ *	    xxxtype value, uint_t count))
++ *
++ * where xxxtype is the C datatype corresponding to xxx, eg int8_t for "int8"
++ * and char * for "string".  The function that is appointed to render the
++ * specified datatype receives as arguments the cookie, the nvlist
++ * member name, the value of that member (or a pointer for array function),
++ * and (for array rendering functions) a count of the number of elements.
++ */
++
++typedef struct nvlist_prtctl *nvlist_prtctl_t;	/* opaque */
++
++enum nvlist_indent_mode {
++	NVLIST_INDENT_ABS,	/* Absolute indentation */
++	NVLIST_INDENT_TABBED	/* Indent with tabstops */
++};
++
++extern nvlist_prtctl_t nvlist_prtctl_alloc(void);
++extern void nvlist_prtctl_free(nvlist_prtctl_t);
++extern void nvlist_prt(nvlist_t *, nvlist_prtctl_t);
++
++/* Output stream */
++extern void nvlist_prtctl_setdest(nvlist_prtctl_t, FILE *);
++extern FILE *nvlist_prtctl_getdest(nvlist_prtctl_t);
++
++/* Indentation mode, start indent, indent increment; default tabbed/0/1 */
++extern void nvlist_prtctl_setindent(nvlist_prtctl_t, enum nvlist_indent_mode,
++    int, int);
++extern void nvlist_prtctl_doindent(nvlist_prtctl_t, int);
++
++enum nvlist_prtctl_fmt {
++	NVLIST_FMT_MEMBER_NAME,		/* name fmt; default "%s = " */
++	NVLIST_FMT_MEMBER_POSTAMBLE,	/* after nvlist member; default "\n" */
++	NVLIST_FMT_BTWN_ARRAY		/* between array members; default " " */
++};
++
++extern void nvlist_prtctl_setfmt(nvlist_prtctl_t, enum nvlist_prtctl_fmt,
++    const char *);
++extern void nvlist_prtctl_dofmt(nvlist_prtctl_t, enum nvlist_prtctl_fmt, ...);
++
++/*
++ * Function prototypes for interfaces that appoint a new rendering function
++ * for single-valued nvlist members.
++ *
++ * A replacement function receives arguments as follows:
++ *
++ *	nvlist_prtctl_t	Print control structure; do not change preferences
++ *			for this object from a print callback function.
++ *
++ *	void *		The function-private cookie argument registered
++ *			when the replacement function was appointed.
++ *
++ *	nvlist_t *	The full nvlist that is being processed.  The
++ *			rendering function is called to render a single
++ *			member (name and value passed as below) but it may
++ *			want to reference or incorporate other aspects of
++ *			the full nvlist.
++ *
++ *	const char *	Member name to render
++ *
++ *	valtype		Value of the member to render
++ *
++ * The function must return non-zero if it has rendered output for this
++ * member, or 0 if it wants to default to standard rendering for this
++ * one member.
++ */
++
++#define	NVLIST_PRINTCTL_SVDECL(funcname, valtype) \
++    extern void funcname(nvlist_prtctl_t, \
++    int (*)(nvlist_prtctl_t, void *, nvlist_t *, const char *, valtype), \
++    void *)
++
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_boolean, int);
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_boolean_value, boolean_t);
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_byte, uchar_t);
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int8, int8_t);
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint8, uint8_t);
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int16, int16_t);
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint16, uint16_t);
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int32, int32_t);
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint32, uint32_t);
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_int64, int64_t);
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_uint64, uint64_t);
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_double, double);
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_string, char *);
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_hrtime, hrtime_t);
++NVLIST_PRINTCTL_SVDECL(nvlist_prtctlop_nvlist, nvlist_t *);
++
++#undef	NVLIST_PRINTCTL_SVDECL	/* was just for "clarity" above */
++
++/*
++ * Function prototypes for interfaces that appoint a new rendering function
++ * for array-valued nvlist members.
++ *
++ * One additional argument is taken: uint_t for the number of array elements
++ *
++ * Return values as above.
++ */
++#define	NVLIST_PRINTCTL_AVDECL(funcname, vtype) \
++    extern void funcname(nvlist_prtctl_t, \
++    int (*)(nvlist_prtctl_t, void *, nvlist_t *, const char *, vtype, uint_t), \
++    void *)
++
++NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_boolean_array, boolean_t *);
++NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_byte_array, uchar_t *);
++NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int8_array, int8_t *);
++NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint8_array, uint8_t *);
++NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int16_array, int16_t *);
++NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint16_array, uint16_t *);
++NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int32_array, int32_t *);
++NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint32_array, uint32_t *);
++NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_int64_array, int64_t *);
++NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_uint64_array, uint64_t *);
++NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_string_array, char **);
++NVLIST_PRINTCTL_AVDECL(nvlist_prtctlop_nvlist_array, nvlist_t **);
++
++#undef	NVLIST_PRINTCTL_AVDECL	/* was just for "clarity" above */
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _LIBNVPAIR_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/libuutil_common.h linux-3.2.33-go/include/zfs/libuutil_common.h
+--- linux-3.2.33-go.orig/include/zfs/libuutil_common.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/libuutil_common.h	2012-11-16 23:25:34.344039393 +0100
+@@ -0,0 +1,35 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_LIBUUTIL_COMMON_H
++#define	_LIBUUTIL_COMMON_H
++
++
++
++#include <libuutil.h>
++#include <libuutil_impl.h>
++
++#endif	/* _LIBUUTIL_COMMON_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/libuutil.h linux-3.2.33-go/include/zfs/libuutil.h
+--- linux-3.2.33-go.orig/include/zfs/libuutil.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/libuutil.h	2012-11-16 23:25:34.344039393 +0100
+@@ -0,0 +1,390 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_LIBUUTIL_H
++#define	_LIBUUTIL_H
++
++#include <sys/types.h>
++#include <stdarg.h>
++#include <stdio.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * Standard flags codes.
++ */
++#define	UU_DEFAULT		0
++
++/*
++ * Standard error codes.
++ */
++#define	UU_ERROR_NONE		0	/* no error */
++#define	UU_ERROR_INVALID_ARGUMENT 1	/* invalid argument */
++#define	UU_ERROR_UNKNOWN_FLAG	2	/* passed flag invalid */
++#define	UU_ERROR_NO_MEMORY	3	/* out of memory */
++#define	UU_ERROR_CALLBACK_FAILED 4	/* callback-initiated error */
++#define	UU_ERROR_NOT_SUPPORTED	5	/* operation not supported */
++#define	UU_ERROR_EMPTY		6	/* no value provided */
++#define	UU_ERROR_UNDERFLOW	7	/* value is too small */
++#define	UU_ERROR_OVERFLOW	8	/* value is too value */
++#define	UU_ERROR_INVALID_CHAR	9	/* value contains unexpected char */
++#define	UU_ERROR_INVALID_DIGIT	10	/* value contains digit not in base */
++
++#define	UU_ERROR_SYSTEM		99	/* underlying system error */
++#define	UU_ERROR_UNKNOWN	100	/* error status not known */
++
++/*
++ * Standard program exit codes.
++ */
++#define	UU_EXIT_OK	(*(uu_exit_ok()))
++#define	UU_EXIT_FATAL	(*(uu_exit_fatal()))
++#define	UU_EXIT_USAGE	(*(uu_exit_usage()))
++
++/*
++ * Exit status profiles.
++ */
++#define	UU_PROFILE_DEFAULT	0
++#define	UU_PROFILE_LAUNCHER	1
++
++/*
++ * Error reporting functions.
++ */
++uint32_t uu_error(void);
++const char *uu_strerror(uint32_t);
++
++/*
++ * Program notification functions.
++ */
++extern void uu_alt_exit(int);
++extern const char *uu_setpname(char *);
++extern const char *uu_getpname(void);
++/*PRINTFLIKE1*/
++extern void uu_warn(const char *, ...);
++extern void uu_vwarn(const char *, va_list);
++/*PRINTFLIKE1*/
++extern void uu_die(const char *, ...) __NORETURN;
++extern void uu_vdie(const char *, va_list) __NORETURN;
++/*PRINTFLIKE2*/
++extern void uu_xdie(int, const char *, ...) __NORETURN;
++extern void uu_vxdie(int, const char *, va_list) __NORETURN;
++
++/*
++ * Exit status functions (not to be used directly)
++ */
++extern int *uu_exit_ok(void);
++extern int *uu_exit_fatal(void);
++extern int *uu_exit_usage(void);
++
++/*
++ * string->number conversions
++ */
++extern int uu_strtoint(const char *, void *, size_t, int, int64_t, int64_t);
++extern int uu_strtouint(const char *, void *, size_t, int, uint64_t, uint64_t);
++
++/*
++ * Debug print facility functions.
++ */
++typedef struct uu_dprintf uu_dprintf_t;
++
++typedef enum {
++	UU_DPRINTF_SILENT,
++	UU_DPRINTF_FATAL,
++	UU_DPRINTF_WARNING,
++	UU_DPRINTF_NOTICE,
++	UU_DPRINTF_INFO,
++	UU_DPRINTF_DEBUG
++} uu_dprintf_severity_t;
++
++extern uu_dprintf_t *uu_dprintf_create(const char *, uu_dprintf_severity_t,
++    uint_t);
++/*PRINTFLIKE3*/
++extern void uu_dprintf(uu_dprintf_t *, uu_dprintf_severity_t,
++    const char *, ...);
++extern void uu_dprintf_destroy(uu_dprintf_t *);
++extern const char *uu_dprintf_getname(uu_dprintf_t *);
++
++/*
++ * Identifier test flags and function.
++ */
++#define	UU_NAME_DOMAIN		0x1	/* allow SUNW, or com.sun, prefix */
++#define	UU_NAME_PATH		0x2	/* allow '/'-delimited paths */
++
++int uu_check_name(const char *, uint_t);
++
++/*
++ * File creation functions.
++ */
++extern int uu_open_tmp(const char *dir, uint_t uflags);
++
++/*
++ * Convenience functions.
++ */
++#define	UU_NELEM(a)	(sizeof (a) / sizeof ((a)[0]))
++
++/*PRINTFLIKE1*/
++extern char *uu_msprintf(const char *format, ...);
++extern void *uu_zalloc(size_t);
++extern char *uu_strdup(const char *);
++extern void uu_free(void *);
++
++extern boolean_t uu_strcaseeq(const char *a, const char *b);
++extern boolean_t uu_streq(const char *a, const char *b);
++extern char *uu_strndup(const char *s, size_t n);
++extern boolean_t uu_strbw(const char *a, const char *b);
++extern void *uu_memdup(const void *buf, size_t sz);
++extern void uu_dump(FILE *out, const char *prefix, const void *buf, size_t len);
++
++/*
++ * Comparison function type definition.
++ *   Developers should be careful in their use of the _private argument. If you
++ *   break interface guarantees, you get undefined behavior.
++ */
++typedef int uu_compare_fn_t(const void *__left, const void *__right,
++    void *__private);
++
++/*
++ * Walk variant flags.
++ *   A data structure need not provide support for all variants and
++ *   combinations.  Refer to the appropriate documentation.
++ */
++#define	UU_WALK_ROBUST		0x00000001	/* walk can survive removes */
++#define	UU_WALK_REVERSE		0x00000002	/* reverse walk order */
++
++#define	UU_WALK_PREORDER	0x00000010	/* walk tree in pre-order */
++#define	UU_WALK_POSTORDER	0x00000020	/* walk tree in post-order */
++
++/*
++ * Walk callback function return codes.
++ */
++#define	UU_WALK_ERROR		-1
++#define	UU_WALK_NEXT		0
++#define	UU_WALK_DONE		1
++
++/*
++ * Walk callback function type definition.
++ */
++typedef int uu_walk_fn_t(void *_elem, void *_private);
++
++/*
++ * lists: opaque structures
++ */
++typedef struct uu_list_pool uu_list_pool_t;
++typedef struct uu_list uu_list_t;
++
++typedef struct uu_list_node {
++	uintptr_t uln_opaque[2];
++} uu_list_node_t;
++
++typedef struct uu_list_walk uu_list_walk_t;
++
++typedef uintptr_t uu_list_index_t;
++
++/*
++ * lists: interface
++ *
++ * basic usage:
++ *	typedef struct foo {
++ *		...
++ *		uu_list_node_t foo_node;
++ *		...
++ *	} foo_t;
++ *
++ *	static int
++ *	foo_compare(void *l_arg, void *r_arg, void *private)
++ *	{
++ *		foo_t *l = l_arg;
++ *		foo_t *r = r_arg;
++ *
++ *		if (... l greater than r ...)
++ *			return (1);
++ *		if (... l less than r ...)
++ *			return (-1);
++ *		return (0);
++ *	}
++ *
++ *	...
++ *		// at initialization time
++ *		foo_pool = uu_list_pool_create("foo_pool",
++ *		    sizeof (foo_t), offsetof(foo_t, foo_node), foo_compare,
++ *		    debugging? 0 : UU_AVL_POOL_DEBUG);
++ *	...
++ */
++uu_list_pool_t *uu_list_pool_create(const char *, size_t, size_t,
++    uu_compare_fn_t *, uint32_t);
++#define	UU_LIST_POOL_DEBUG	0x00000001
++
++void uu_list_pool_destroy(uu_list_pool_t *);
++
++/*
++ * usage:
++ *
++ *	foo_t *a;
++ *	a = malloc(sizeof(*a));
++ *	uu_list_node_init(a, &a->foo_list, pool);
++ *	...
++ *	uu_list_node_fini(a, &a->foo_list, pool);
++ *	free(a);
++ */
++void uu_list_node_init(void *, uu_list_node_t *, uu_list_pool_t *);
++void uu_list_node_fini(void *, uu_list_node_t *, uu_list_pool_t *);
++
++uu_list_t *uu_list_create(uu_list_pool_t *, void *_parent, uint32_t);
++#define	UU_LIST_DEBUG	0x00000001
++#define	UU_LIST_SORTED	0x00000002	/* list is sorted */
++
++void uu_list_destroy(uu_list_t *);	/* list must be empty */
++
++size_t uu_list_numnodes(uu_list_t *);
++
++void *uu_list_first(uu_list_t *);
++void *uu_list_last(uu_list_t *);
++
++void *uu_list_next(uu_list_t *, void *);
++void *uu_list_prev(uu_list_t *, void *);
++
++int uu_list_walk(uu_list_t *, uu_walk_fn_t *, void *, uint32_t);
++
++uu_list_walk_t *uu_list_walk_start(uu_list_t *, uint32_t);
++void *uu_list_walk_next(uu_list_walk_t *);
++void uu_list_walk_end(uu_list_walk_t *);
++
++void *uu_list_find(uu_list_t *, void *, void *, uu_list_index_t *);
++void uu_list_insert(uu_list_t *, void *, uu_list_index_t);
++
++void *uu_list_nearest_next(uu_list_t *, uu_list_index_t);
++void *uu_list_nearest_prev(uu_list_t *, uu_list_index_t);
++
++void *uu_list_teardown(uu_list_t *, void **);
++
++void uu_list_remove(uu_list_t *, void *);
++
++/*
++ * lists: interfaces for non-sorted lists only
++ */
++int uu_list_insert_before(uu_list_t *, void *_target, void *_elem);
++int uu_list_insert_after(uu_list_t *, void *_target, void *_elem);
++
++/*
++ * avl trees: opaque structures
++ */
++typedef struct uu_avl_pool uu_avl_pool_t;
++typedef struct uu_avl uu_avl_t;
++
++typedef struct uu_avl_node {
++#ifdef _LP64
++	uintptr_t uan_opaque[3];
++#else
++	uintptr_t uan_opaque[4];
++#endif
++} uu_avl_node_t;
++
++typedef struct uu_avl_walk uu_avl_walk_t;
++
++typedef uintptr_t uu_avl_index_t;
++
++/*
++ * avl trees: interface
++ *
++ * basic usage:
++ *	typedef struct foo {
++ *		...
++ *		uu_avl_node_t foo_node;
++ *		...
++ *	} foo_t;
++ *
++ *	static int
++ *	foo_compare(void *l_arg, void *r_arg, void *private)
++ *	{
++ *		foo_t *l = l_arg;
++ *		foo_t *r = r_arg;
++ *
++ *		if (... l greater than r ...)
++ *			return (1);
++ *		if (... l less than r ...)
++ *			return (-1);
++ *		return (0);
++ *	}
++ *
++ *	...
++ *		// at initialization time
++ *		foo_pool = uu_avl_pool_create("foo_pool",
++ *		    sizeof (foo_t), offsetof(foo_t, foo_node), foo_compare,
++ *		    debugging? 0 : UU_AVL_POOL_DEBUG);
++ *	...
++ */
++uu_avl_pool_t *uu_avl_pool_create(const char *, size_t, size_t,
++    uu_compare_fn_t *, uint32_t);
++#define	UU_AVL_POOL_DEBUG	0x00000001
++
++void uu_avl_pool_destroy(uu_avl_pool_t *);
++
++/*
++ * usage:
++ *
++ *	foo_t *a;
++ *	a = malloc(sizeof(*a));
++ *	uu_avl_node_init(a, &a->foo_avl, pool);
++ *	...
++ *	uu_avl_node_fini(a, &a->foo_avl, pool);
++ *	free(a);
++ */
++void uu_avl_node_init(void *, uu_avl_node_t *, uu_avl_pool_t *);
++void uu_avl_node_fini(void *, uu_avl_node_t *, uu_avl_pool_t *);
++
++uu_avl_t *uu_avl_create(uu_avl_pool_t *, void *_parent, uint32_t);
++#define	UU_AVL_DEBUG	0x00000001
++
++void uu_avl_destroy(uu_avl_t *);	/* list must be empty */
++
++size_t uu_avl_numnodes(uu_avl_t *);
++
++void *uu_avl_first(uu_avl_t *);
++void *uu_avl_last(uu_avl_t *);
++
++void *uu_avl_next(uu_avl_t *, void *);
++void *uu_avl_prev(uu_avl_t *, void *);
++
++int uu_avl_walk(uu_avl_t *, uu_walk_fn_t *, void *, uint32_t);
++
++uu_avl_walk_t *uu_avl_walk_start(uu_avl_t *, uint32_t);
++void *uu_avl_walk_next(uu_avl_walk_t *);
++void uu_avl_walk_end(uu_avl_walk_t *);
++
++void *uu_avl_find(uu_avl_t *, void *, void *, uu_avl_index_t *);
++void uu_avl_insert(uu_avl_t *, void *, uu_avl_index_t);
++
++void *uu_avl_nearest_next(uu_avl_t *, uu_avl_index_t);
++void *uu_avl_nearest_prev(uu_avl_t *, uu_avl_index_t);
++
++void *uu_avl_teardown(uu_avl_t *, void **);
++
++void uu_avl_remove(uu_avl_t *, void *);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _LIBUUTIL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/libuutil_impl.h linux-3.2.33-go/include/zfs/libuutil_impl.h
+--- linux-3.2.33-go.orig/include/zfs/libuutil_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/libuutil_impl.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,181 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License, Version 1.0 only
++ * (the "License").  You may not use this file except in compliance
++ * with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_LIBUUTIL_IMPL_H
++#define	_LIBUUTIL_IMPL_H
++
++
++
++#include <libuutil.h>
++#include <pthread.h>
++
++#include <sys/avl_impl.h>
++#include <sys/byteorder.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++void uu_set_error(uint_t);
++
++
++/*PRINTFLIKE1*/
++void uu_panic(const char *format, ...);
++
++
++struct uu_dprintf {
++	char	*uud_name;
++	uu_dprintf_severity_t uud_severity;
++	uint_t	uud_flags;
++};
++
++/*
++ * For debugging purposes, libuutil keeps around linked lists of all uu_lists
++ * and uu_avls, along with pointers to their parents.  These can cause false
++ * negatives when looking for memory leaks, so we encode the pointers by
++ * storing them with swapped endianness;  this is not perfect, but it's about
++ * the best we can do without wasting a lot of space.
++ */
++#ifdef _LP64
++#define	UU_PTR_ENCODE(ptr)		BSWAP_64((uintptr_t)(void *)(ptr))
++#else
++#define	UU_PTR_ENCODE(ptr)		BSWAP_32((uintptr_t)(void *)(ptr))
++#endif
++
++#define	UU_PTR_DECODE(ptr)		((void *)UU_PTR_ENCODE(ptr))
++
++/*
++ * uu_list structures
++ */
++typedef struct uu_list_node_impl {
++	struct uu_list_node_impl *uln_next;
++	struct uu_list_node_impl *uln_prev;
++} uu_list_node_impl_t;
++
++struct uu_list_walk {
++	uu_list_walk_t	*ulw_next;
++	uu_list_walk_t	*ulw_prev;
++
++	uu_list_t	*ulw_list;
++	int8_t		ulw_dir;
++	uint8_t		ulw_robust;
++	uu_list_node_impl_t *ulw_next_result;
++};
++
++struct uu_list {
++	uintptr_t	ul_next_enc;
++	uintptr_t	ul_prev_enc;
++
++	uu_list_pool_t	*ul_pool;
++	uintptr_t	ul_parent_enc;	/* encoded parent pointer */
++	size_t		ul_offset;
++	size_t		ul_numnodes;
++	uint8_t		ul_debug;
++	uint8_t		ul_sorted;
++	uint8_t		ul_index;	/* mark for uu_list_index_ts */
++
++	uu_list_node_impl_t ul_null_node;
++	uu_list_walk_t	ul_null_walk;	/* for robust walkers */
++};
++
++#define	UU_LIST_PTR(ptr)		((uu_list_t *)UU_PTR_DECODE(ptr))
++
++#define	UU_LIST_POOL_MAXNAME	64
++
++struct uu_list_pool {
++	uu_list_pool_t	*ulp_next;
++	uu_list_pool_t	*ulp_prev;
++
++	char		ulp_name[UU_LIST_POOL_MAXNAME];
++	size_t		ulp_nodeoffset;
++	size_t		ulp_objsize;
++	uu_compare_fn_t	*ulp_cmp;
++	uint8_t		ulp_debug;
++	uint8_t		ulp_last_index;
++	pthread_mutex_t	ulp_lock;		/* protects null_list */
++	uu_list_t	ulp_null_list;
++};
++
++/*
++ * uu_avl structures
++ */
++typedef struct avl_node		uu_avl_node_impl_t;
++
++struct uu_avl_walk {
++	uu_avl_walk_t	*uaw_next;
++	uu_avl_walk_t	*uaw_prev;
++
++	uu_avl_t	*uaw_avl;
++	void		*uaw_next_result;
++	int8_t		uaw_dir;
++	uint8_t		uaw_robust;
++};
++
++struct uu_avl {
++	uintptr_t	ua_next_enc;
++	uintptr_t	ua_prev_enc;
++
++	uu_avl_pool_t	*ua_pool;
++	uintptr_t	ua_parent_enc;
++	uint8_t		ua_debug;
++	uint8_t		ua_index;	/* mark for uu_avl_index_ts */
++
++	struct avl_tree	ua_tree;
++	uu_avl_walk_t	ua_null_walk;
++};
++
++#define	UU_AVL_PTR(x)		((uu_avl_t *)UU_PTR_DECODE(x))
++
++#define	UU_AVL_POOL_MAXNAME	64
++
++struct uu_avl_pool {
++	uu_avl_pool_t	*uap_next;
++	uu_avl_pool_t	*uap_prev;
++
++	char		uap_name[UU_AVL_POOL_MAXNAME];
++	size_t		uap_nodeoffset;
++	size_t		uap_objsize;
++	uu_compare_fn_t	*uap_cmp;
++	uint8_t		uap_debug;
++	uint8_t		uap_last_index;
++	pthread_mutex_t	uap_lock;		/* protects null_avl */
++	uu_avl_t	uap_null_avl;
++};
++
++/*
++ * atfork() handlers
++ */
++void uu_avl_lockup(void);
++void uu_avl_release(void);
++
++void uu_list_lockup(void);
++void uu_list_release(void);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _LIBUUTIL_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/libzfs.h linux-3.2.33-go/include/zfs/libzfs.h
+--- linux-3.2.33-go.orig/include/zfs/libzfs.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/libzfs.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,745 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
++ */
++
++#ifndef	_LIBZFS_H
++#define	_LIBZFS_H
++
++#include <assert.h>
++#include <libnvpair.h>
++#include <sys/mnttab.h>
++#include <sys/param.h>
++#include <sys/types.h>
++#include <sys/varargs.h>
++#include <sys/fs/zfs.h>
++#include <sys/avl.h>
++#include <ucred.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * Miscellaneous ZFS constants
++ */
++#define	ZFS_MAXNAMELEN		MAXNAMELEN
++#define	ZPOOL_MAXNAMELEN	MAXNAMELEN
++#define	ZFS_MAXPROPLEN		MAXPATHLEN
++#define	ZPOOL_MAXPROPLEN	MAXPATHLEN
++
++/*
++ * Default device paths
++ */
++#define	DISK_ROOT		"/dev"
++#define	UDISK_ROOT		"/dev/disk"
++
++#define	DEFAULT_IMPORT_PATH_SIZE	8
++extern char *zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE];
++
++/*
++ * libzfs errors
++ */
++enum {
++	EZFS_NOMEM = 2000,	/* out of memory */
++	EZFS_BADPROP,		/* invalid property value */
++	EZFS_PROPREADONLY,	/* cannot set readonly property */
++	EZFS_PROPTYPE,		/* property does not apply to dataset type */
++	EZFS_PROPNONINHERIT,	/* property is not inheritable */
++	EZFS_PROPSPACE,		/* bad quota or reservation */
++	EZFS_BADTYPE,		/* dataset is not of appropriate type */
++	EZFS_BUSY,		/* pool or dataset is busy */
++	EZFS_EXISTS,		/* pool or dataset already exists */
++	EZFS_NOENT,		/* no such pool or dataset */
++	EZFS_BADSTREAM,		/* bad backup stream */
++	EZFS_DSREADONLY,	/* dataset is readonly */
++	EZFS_VOLTOOBIG,		/* volume is too large for 32-bit system */
++	EZFS_INVALIDNAME,	/* invalid dataset name */
++	EZFS_BADRESTORE,	/* unable to restore to destination */
++	EZFS_BADBACKUP,		/* backup failed */
++	EZFS_BADTARGET,		/* bad attach/detach/replace target */
++	EZFS_NODEVICE,		/* no such device in pool */
++	EZFS_BADDEV,		/* invalid device to add */
++	EZFS_NOREPLICAS,	/* no valid replicas */
++	EZFS_RESILVERING,	/* currently resilvering */
++	EZFS_BADVERSION,	/* unsupported version */
++	EZFS_POOLUNAVAIL,	/* pool is currently unavailable */
++	EZFS_DEVOVERFLOW,	/* too many devices in one vdev */
++	EZFS_BADPATH,		/* must be an absolute path */
++	EZFS_CROSSTARGET,	/* rename or clone across pool or dataset */
++	EZFS_ZONED,		/* used improperly in local zone */
++	EZFS_MOUNTFAILED,	/* failed to mount dataset */
++	EZFS_UMOUNTFAILED,	/* failed to unmount dataset */
++	EZFS_UNSHARENFSFAILED,	/* unshare(1M) failed */
++	EZFS_SHARENFSFAILED,	/* share(1M) failed */
++	EZFS_PERM,		/* permission denied */
++	EZFS_NOSPC,		/* out of space */
++	EZFS_FAULT,		/* bad address */
++	EZFS_IO,		/* I/O error */
++	EZFS_INTR,		/* signal received */
++	EZFS_ISSPARE,		/* device is a hot spare */
++	EZFS_INVALCONFIG,	/* invalid vdev configuration */
++	EZFS_RECURSIVE,		/* recursive dependency */
++	EZFS_NOHISTORY,		/* no history object */
++	EZFS_POOLPROPS,		/* couldn't retrieve pool props */
++	EZFS_POOL_NOTSUP,	/* ops not supported for this type of pool */
++	EZFS_POOL_INVALARG,	/* invalid argument for this pool operation */
++	EZFS_NAMETOOLONG,	/* dataset name is too long */
++	EZFS_OPENFAILED,	/* open of device failed */
++	EZFS_NOCAP,		/* couldn't get capacity */
++	EZFS_LABELFAILED,	/* write of label failed */
++	EZFS_BADWHO,		/* invalid permission who */
++	EZFS_BADPERM,		/* invalid permission */
++	EZFS_BADPERMSET,	/* invalid permission set name */
++	EZFS_NODELEGATION,	/* delegated administration is disabled */
++	EZFS_UNSHARESMBFAILED,	/* failed to unshare over smb */
++	EZFS_SHARESMBFAILED,	/* failed to share over smb */
++	EZFS_BADCACHE,		/* bad cache file */
++	EZFS_ISL2CACHE,		/* device is for the level 2 ARC */
++	EZFS_VDEVNOTSUP,	/* unsupported vdev type */
++	EZFS_NOTSUP,		/* ops not supported on this dataset */
++	EZFS_ACTIVE_SPARE,	/* pool has active shared spare devices */
++	EZFS_UNPLAYED_LOGS,	/* log device has unplayed logs */
++	EZFS_REFTAG_RELE,	/* snapshot release: tag not found */
++	EZFS_REFTAG_HOLD,	/* snapshot hold: tag already exists */
++	EZFS_TAGTOOLONG,	/* snapshot hold/rele: tag too long */
++	EZFS_PIPEFAILED,	/* pipe create failed */
++	EZFS_THREADCREATEFAILED, /* thread create failed */
++	EZFS_POSTSPLIT_ONLINE,	/* onlining a disk after splitting it */
++	EZFS_SCRUBBING,		/* currently scrubbing */
++	EZFS_NO_SCRUB,		/* no active scrub */
++	EZFS_DIFF,		/* general failure of zfs diff */
++	EZFS_DIFFDATA,		/* bad zfs diff data */
++	EZFS_POOLREADONLY,	/* pool is in read-only mode */
++	EZFS_UNKNOWN
++};
++
++/*
++ * The following data structures are all part
++ * of the zfs_allow_t data structure which is
++ * used for printing 'allow' permissions.
++ * It is a linked list of zfs_allow_t's which
++ * then contain avl tree's for user/group/sets/...
++ * and each one of the entries in those trees have
++ * avl tree's for the permissions they belong to and
++ * whether they are local,descendent or local+descendent
++ * permissions.  The AVL trees are used primarily for
++ * sorting purposes, but also so that we can quickly find
++ * a given user and or permission.
++ */
++typedef struct zfs_perm_node {
++	avl_node_t z_node;
++	char z_pname[MAXPATHLEN];
++} zfs_perm_node_t;
++
++typedef struct zfs_allow_node {
++	avl_node_t z_node;
++	char z_key[MAXPATHLEN];		/* name, such as joe */
++	avl_tree_t z_localdescend;	/* local+descendent perms */
++	avl_tree_t z_local;		/* local permissions */
++	avl_tree_t z_descend;		/* descendent permissions */
++} zfs_allow_node_t;
++
++typedef struct zfs_allow {
++	struct zfs_allow *z_next;
++	char z_setpoint[MAXPATHLEN];
++	avl_tree_t z_sets;
++	avl_tree_t z_crperms;
++	avl_tree_t z_user;
++	avl_tree_t z_group;
++	avl_tree_t z_everyone;
++} zfs_allow_t;
++
++/*
++ * Basic handle types
++ */
++typedef struct zfs_handle zfs_handle_t;
++typedef struct zpool_handle zpool_handle_t;
++typedef struct libzfs_handle libzfs_handle_t;
++
++/*
++ * Library initialization
++ */
++extern libzfs_handle_t *libzfs_init(void);
++extern void libzfs_fini(libzfs_handle_t *);
++
++extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *);
++extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *);
++
++extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
++
++extern int libzfs_errno(libzfs_handle_t *);
++extern const char *libzfs_error_action(libzfs_handle_t *);
++extern const char *libzfs_error_description(libzfs_handle_t *);
++extern void libzfs_mnttab_init(libzfs_handle_t *);
++extern void libzfs_mnttab_fini(libzfs_handle_t *);
++extern void libzfs_mnttab_cache(libzfs_handle_t *, boolean_t);
++extern int libzfs_mnttab_find(libzfs_handle_t *, const char *,
++    struct mnttab *);
++extern void libzfs_mnttab_add(libzfs_handle_t *, const char *,
++    const char *, const char *);
++extern void libzfs_mnttab_remove(libzfs_handle_t *, const char *);
++
++/*
++ * Basic handle functions
++ */
++extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *);
++extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *);
++extern void zpool_close(zpool_handle_t *);
++extern const char *zpool_get_name(zpool_handle_t *);
++extern int zpool_get_state(zpool_handle_t *);
++extern char *zpool_state_to_name(vdev_state_t, vdev_aux_t);
++extern void zpool_free_handles(libzfs_handle_t *);
++
++/*
++ * Iterate over all active pools in the system.
++ */
++typedef int (*zpool_iter_f)(zpool_handle_t *, void *);
++extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *);
++
++/*
++ * Functions to create and destroy pools
++ */
++extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
++    nvlist_t *, nvlist_t *);
++extern int zpool_destroy(zpool_handle_t *);
++extern int zpool_add(zpool_handle_t *, nvlist_t *);
++
++typedef struct splitflags {
++	/* do not split, but return the config that would be split off */
++	int dryrun : 1;
++
++	/* after splitting, import the pool */
++	int import : 1;
++} splitflags_t;
++
++/*
++ * Functions to manipulate pool and vdev state
++ */
++extern int zpool_scan(zpool_handle_t *, pool_scan_func_t);
++extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
++extern int zpool_reguid(zpool_handle_t *);
++extern int zpool_reopen(zpool_handle_t *);
++
++extern int zpool_vdev_online(zpool_handle_t *, const char *, int,
++    vdev_state_t *);
++extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
++extern int zpool_vdev_attach(zpool_handle_t *, const char *,
++    const char *, nvlist_t *, int);
++extern int zpool_vdev_detach(zpool_handle_t *, const char *);
++extern int zpool_vdev_remove(zpool_handle_t *, const char *);
++extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *,
++    splitflags_t);
++
++extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t);
++extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t);
++extern int zpool_vdev_clear(zpool_handle_t *, uint64_t);
++
++extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
++    boolean_t *, boolean_t *);
++extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
++    boolean_t *, boolean_t *, boolean_t *);
++extern int zpool_label_disk_wait(char *, int);
++extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
++
++/*
++ * Functions to manage pool properties
++ */
++extern int zpool_set_prop(zpool_handle_t *, const char *, const char *);
++extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *,
++    size_t proplen, zprop_source_t *);
++extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t,
++    zprop_source_t *);
++
++extern const char *zpool_prop_to_name(zpool_prop_t);
++extern const char *zpool_prop_values(zpool_prop_t);
++
++/*
++ * Pool health statistics.
++ */
++typedef enum {
++	/*
++	 * The following correspond to faults as defined in the (fault.fs.zfs.*)
++	 * event namespace.  Each is associated with a corresponding message ID.
++	 */
++	ZPOOL_STATUS_CORRUPT_CACHE,	/* corrupt /kernel/drv/zpool.cache */
++	ZPOOL_STATUS_MISSING_DEV_R,	/* missing device with replicas */
++	ZPOOL_STATUS_MISSING_DEV_NR,	/* missing device with no replicas */
++	ZPOOL_STATUS_CORRUPT_LABEL_R,	/* bad device label with replicas */
++	ZPOOL_STATUS_CORRUPT_LABEL_NR,	/* bad device label with no replicas */
++	ZPOOL_STATUS_BAD_GUID_SUM,	/* sum of device guids didn't match */
++	ZPOOL_STATUS_CORRUPT_POOL,	/* pool metadata is corrupted */
++	ZPOOL_STATUS_CORRUPT_DATA,	/* data errors in user (meta)data */
++	ZPOOL_STATUS_FAILING_DEV,	/* device experiencing errors */
++	ZPOOL_STATUS_VERSION_NEWER,	/* newer on-disk version */
++	ZPOOL_STATUS_HOSTID_MISMATCH,	/* last accessed by another system */
++	ZPOOL_STATUS_IO_FAILURE_WAIT,	/* failed I/O, failmode 'wait' */
++	ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
++	ZPOOL_STATUS_BAD_LOG,		/* cannot read log chain(s) */
++
++	/*
++	 * These faults have no corresponding message ID.  At the time we are
++	 * checking the status, the original reason for the FMA fault (I/O or
++	 * checksum errors) has been lost.
++	 */
++	ZPOOL_STATUS_FAULTED_DEV_R,	/* faulted device with replicas */
++	ZPOOL_STATUS_FAULTED_DEV_NR,	/* faulted device with no replicas */
++
++	/*
++	 * The following are not faults per se, but still an error possibly
++	 * requiring administrative attention.  There is no corresponding
++	 * message ID.
++	 */
++	ZPOOL_STATUS_VERSION_OLDER,	/* older on-disk version */
++	ZPOOL_STATUS_RESILVERING,	/* device being resilvered */
++	ZPOOL_STATUS_OFFLINE_DEV,	/* device online */
++	ZPOOL_STATUS_REMOVED_DEV,	/* removed device */
++
++	/*
++	 * Finally, the following indicates a healthy pool.
++	 */
++	ZPOOL_STATUS_OK
++} zpool_status_t;
++
++extern zpool_status_t zpool_get_status(zpool_handle_t *, char **);
++extern zpool_status_t zpool_import_status(nvlist_t *, char **);
++extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh);
++
++/*
++ * Statistics and configuration functions.
++ */
++extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **);
++extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *);
++extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
++
++/*
++ * Import and export functions
++ */
++extern int zpool_export(zpool_handle_t *, boolean_t);
++extern int zpool_export_force(zpool_handle_t *);
++extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
++    char *altroot);
++extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
++    nvlist_t *, int);
++
++/*
++ * Search for pools to import
++ */
++
++typedef struct importargs {
++	char **path;		/* a list of paths to search		*/
++	int paths;		/* number of paths to search		*/
++	char *poolname;		/* name of a pool to find		*/
++	uint64_t guid;		/* guid of a pool to find		*/
++	char *cachefile;	/* cachefile to use for import		*/
++	int can_be_active : 1;	/* can the pool be active?		*/
++	int unique : 1;		/* does 'poolname' already exist?	*/
++	int exists : 1;		/* set on return if pool already exists	*/
++} importargs_t;
++
++extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *);
++
++/* legacy pool search routines */
++extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
++extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *,
++    char *, uint64_t);
++
++/*
++ * Miscellaneous pool functions
++ */
++struct zfs_cmd;
++
++extern const char *zfs_history_event_names[LOG_END];
++
++extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *,
++    boolean_t verbose);
++extern int zpool_upgrade(zpool_handle_t *, uint64_t);
++extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
++extern int zpool_history_unpack(char *, uint64_t, uint64_t *,
++    nvlist_t ***, uint_t *);
++extern void zpool_set_history_str(const char *subcommand, int argc,
++    char **argv, char *history_str);
++extern int zpool_stage_history(libzfs_handle_t *, const char *);
++extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, int, int);
++extern int zpool_events_clear(libzfs_handle_t *, int *);
++extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
++    size_t len);
++extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);
++extern int zpool_get_physpath(zpool_handle_t *, char *, size_t);
++extern void zpool_explain_recover(libzfs_handle_t *, const char *, int,
++    nvlist_t *);
++
++/*
++ * Basic handle manipulations.  These functions do not create or destroy the
++ * underlying datasets, only the references to them.
++ */
++extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int);
++extern zfs_handle_t *zfs_handle_dup(zfs_handle_t *);
++extern void zfs_close(zfs_handle_t *);
++extern zfs_type_t zfs_get_type(const zfs_handle_t *);
++extern const char *zfs_get_name(const zfs_handle_t *);
++extern zpool_handle_t *zfs_get_pool_handle(const zfs_handle_t *);
++
++/*
++ * Property management functions.  Some functions are shared with the kernel,
++ * and are found in sys/fs/zfs.h.
++ */
++
++/*
++ * zfs dataset property management
++ */
++extern const char *zfs_prop_default_string(zfs_prop_t);
++extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
++extern const char *zfs_prop_column_name(zfs_prop_t);
++extern boolean_t zfs_prop_align_right(zfs_prop_t);
++
++extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t,
++    nvlist_t *, uint64_t, zfs_handle_t *, const char *);
++
++extern const char *zfs_prop_to_name(zfs_prop_t);
++extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
++extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
++    zprop_source_t *, char *, size_t, boolean_t);
++extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t,
++    boolean_t);
++extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
++    zprop_source_t *, char *, size_t);
++extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
++    uint64_t *propvalue);
++extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
++    char *propbuf, int proplen, boolean_t literal);
++extern int zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname,
++    uint64_t *propvalue);
++extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname,
++    char *propbuf, int proplen, boolean_t literal);
++extern int zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap,
++    uint64_t *usedp);
++extern uint64_t getprop_uint64(zfs_handle_t *, zfs_prop_t, char **);
++extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
++extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t);
++extern const char *zfs_prop_values(zfs_prop_t);
++extern int zfs_prop_is_string(zfs_prop_t prop);
++extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
++extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
++extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *);
++
++typedef struct zprop_list {
++	int		pl_prop;
++	char		*pl_user_prop;
++	struct zprop_list *pl_next;
++	boolean_t	pl_all;
++	size_t		pl_width;
++	size_t		pl_recvd_width;
++	boolean_t	pl_fixed;
++} zprop_list_t;
++
++extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t);
++extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *);
++
++#define	ZFS_MOUNTPOINT_NONE	"none"
++#define	ZFS_MOUNTPOINT_LEGACY	"legacy"
++
++/*
++ * zpool property management
++ */
++extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **);
++extern const char *zpool_prop_default_string(zpool_prop_t);
++extern uint64_t zpool_prop_default_numeric(zpool_prop_t);
++extern const char *zpool_prop_column_name(zpool_prop_t);
++extern boolean_t zpool_prop_align_right(zpool_prop_t);
++
++/*
++ * Functions shared by zfs and zpool property management.
++ */
++extern int zprop_iter(zprop_func func, void *cb, boolean_t show_all,
++    boolean_t ordered, zfs_type_t type);
++extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **,
++    zfs_type_t);
++extern void zprop_free_list(zprop_list_t *);
++
++#define	ZFS_GET_NCOLS	5
++
++typedef enum {
++	GET_COL_NONE,
++	GET_COL_NAME,
++	GET_COL_PROPERTY,
++	GET_COL_VALUE,
++	GET_COL_RECVD,
++	GET_COL_SOURCE
++} zfs_get_column_t;
++
++/*
++ * Functions for printing zfs or zpool properties
++ */
++typedef struct zprop_get_cbdata {
++	int cb_sources;
++	zfs_get_column_t cb_columns[ZFS_GET_NCOLS];
++	int cb_colwidths[ZFS_GET_NCOLS + 1];
++	boolean_t cb_scripted;
++	boolean_t cb_literal;
++	boolean_t cb_first;
++	zprop_list_t *cb_proplist;
++	zfs_type_t cb_type;
++} zprop_get_cbdata_t;
++
++void zprop_print_one_property(const char *, zprop_get_cbdata_t *,
++    const char *, const char *, zprop_source_t, const char *,
++    const char *);
++
++/*
++ * Iterator functions.
++ */
++typedef int (*zfs_iter_f)(zfs_handle_t *, void *);
++extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *);
++extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *);
++extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
++extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *);
++extern int zfs_iter_snapshots(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
++extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *);
++extern int zfs_iter_snapspec(zfs_handle_t *, const char *, zfs_iter_f, void *);
++
++typedef struct get_all_cb {
++	zfs_handle_t	**cb_handles;
++	size_t		cb_alloc;
++	size_t		cb_used;
++	boolean_t	cb_verbose;
++	int		(*cb_getone)(zfs_handle_t *, void *);
++} get_all_cb_t;
++
++void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *);
++int libzfs_dataset_cmp(const void *, const void *);
++
++/*
++ * Functions to create and destroy datasets.
++ */
++extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
++    nvlist_t *);
++extern int zfs_create_ancestors(libzfs_handle_t *, const char *);
++extern int zfs_destroy(zfs_handle_t *, boolean_t);
++extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t);
++extern int zfs_destroy_snaps_nvl(zfs_handle_t *, nvlist_t *, boolean_t);
++extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
++extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
++extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
++extern int zfs_rename(zfs_handle_t *, const char *, boolean_t, boolean_t);
++
++typedef struct sendflags {
++	/* print informational messages (ie, -v was specified) */
++	boolean_t verbose;
++
++	/* recursive send  (ie, -R) */
++	boolean_t replicate;
++
++	/* for incrementals, do all intermediate snapshots */
++	boolean_t doall;
++
++	/* if dataset is a clone, do incremental from its origin */
++	boolean_t fromorigin;
++
++	/* do deduplication */
++	boolean_t dedup;
++
++	/* send properties (ie, -p) */
++	boolean_t props;
++
++	/* do not send (no-op, ie. -n) */
++	boolean_t dryrun;
++
++	/* parsable verbose output (ie. -P) */
++	boolean_t parsable;
++
++	/* show progress (ie. -v) */
++	boolean_t progress;
++} sendflags_t;
++
++typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
++
++extern int zfs_send(zfs_handle_t *, const char *, const char *,
++    sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **);
++
++extern int zfs_promote(zfs_handle_t *);
++extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
++    boolean_t, boolean_t, int, uint64_t, uint64_t);
++extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
++extern int zfs_get_holds(zfs_handle_t *, nvlist_t **);
++extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);
++
++typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
++    uid_t rid, uint64_t space);
++
++extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t,
++    zfs_userspace_cb_t, void *);
++
++extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **);
++extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *);
++
++typedef struct recvflags {
++	/* print informational messages (ie, -v was specified) */
++	boolean_t verbose;
++
++	/* the destination is a prefix, not the exact fs (ie, -d) */
++	boolean_t isprefix;
++
++	/*
++	 * Only the tail of the sent snapshot path is appended to the
++	 * destination to determine the received snapshot name (ie, -e).
++	 */
++	boolean_t istail;
++
++	/* do not actually do the recv, just check if it would work (ie, -n) */
++	boolean_t dryrun;
++
++	/* rollback/destroy filesystems as necessary (eg, -F) */
++	boolean_t force;
++
++	/* set "canmount=off" on all modified filesystems */
++	boolean_t canmountoff;
++
++	/* byteswap flag is used internally; callers need not specify */
++	boolean_t byteswap;
++
++	/* do not mount file systems as they are extracted (private) */
++	boolean_t nomount;
++} recvflags_t;
++
++extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t *,
++    int, avl_tree_t *);
++
++typedef enum diff_flags {
++	ZFS_DIFF_PARSEABLE = 0x1,
++	ZFS_DIFF_TIMESTAMP = 0x2,
++	ZFS_DIFF_CLASSIFY = 0x4
++} diff_flags_t;
++
++extern int zfs_show_diffs(zfs_handle_t *, int, const char *, const char *,
++    int);
++
++/*
++ * Miscellaneous functions.
++ */
++extern const char *zfs_type_to_name(zfs_type_t);
++extern void zfs_refresh_properties(zfs_handle_t *);
++extern int zfs_name_valid(const char *, zfs_type_t);
++extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t);
++extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
++    zfs_type_t);
++extern int zfs_spa_version(zfs_handle_t *, int *);
++extern int zfs_append_partition(char *path, size_t max_len);
++extern int zfs_resolve_shortname(const char *name, char *path, size_t pathlen);
++extern int zfs_strcmp_pathname(char *name, char *cmp_name, int wholedisk);
++
++/*
++ * Mount support functions.
++ */
++extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **);
++extern boolean_t zfs_is_mounted(zfs_handle_t *, char **);
++extern int zfs_mount(zfs_handle_t *, const char *, int);
++extern int zfs_unmount(zfs_handle_t *, const char *, int);
++extern int zfs_unmountall(zfs_handle_t *, int);
++
++/*
++ * Share support functions.
++ */
++extern boolean_t zfs_is_shared(zfs_handle_t *);
++extern int zfs_share(zfs_handle_t *);
++extern int zfs_unshare(zfs_handle_t *);
++
++/*
++ * Protocol-specific share support functions.
++ */
++extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **);
++extern boolean_t zfs_is_shared_smb(zfs_handle_t *, char **);
++extern int zfs_share_nfs(zfs_handle_t *);
++extern int zfs_share_smb(zfs_handle_t *);
++extern int zfs_shareall(zfs_handle_t *);
++extern int zfs_unshare_nfs(zfs_handle_t *, const char *);
++extern int zfs_unshare_smb(zfs_handle_t *, const char *);
++extern int zfs_unshareall_nfs(zfs_handle_t *);
++extern int zfs_unshareall_smb(zfs_handle_t *);
++extern int zfs_unshareall_bypath(zfs_handle_t *, const char *);
++extern int zfs_unshareall(zfs_handle_t *);
++extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
++    void *, void *, int, zfs_share_op_t);
++
++/*
++ * Utility function to convert a number to a human-readable form.
++ */
++extern void zfs_nicenum(uint64_t, char *, size_t);
++extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
++
++/*
++ * Utility functions to run an external process.
++ */
++#define	STDOUT_VERBOSE	0x01
++#define	STDERR_VERBOSE	0x02
++
++int libzfs_run_process(const char *, char **, int flags);
++int libzfs_load_module(const char *);
++
++/*
++ * Given a device or file, determine if it is part of a pool.
++ */
++extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
++    boolean_t *);
++
++/*
++ * Label manipulation.
++ */
++extern int zpool_read_label(int, nvlist_t **);
++extern int zpool_clear_label(int);
++
++/*
++ * Management interfaces for SMB ACL files
++ */
++
++int zfs_smb_acl_add(libzfs_handle_t *, char *, char *, char *);
++int zfs_smb_acl_remove(libzfs_handle_t *, char *, char *, char *);
++int zfs_smb_acl_purge(libzfs_handle_t *, char *, char *);
++int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *);
++
++/*
++ * Enable and disable datasets within a pool by mounting/unmounting and
++ * sharing/unsharing them.
++ */
++extern int zpool_enable_datasets(zpool_handle_t *, const char *, int);
++extern int zpool_disable_datasets(zpool_handle_t *, boolean_t);
++
++/*
++ * Mappings between vdev and FRU.
++ */
++extern void libzfs_fru_refresh(libzfs_handle_t *);
++extern const char *libzfs_fru_lookup(libzfs_handle_t *, const char *);
++extern const char *libzfs_fru_devpath(libzfs_handle_t *, const char *);
++extern boolean_t libzfs_fru_compare(libzfs_handle_t *, const char *,
++    const char *);
++extern boolean_t libzfs_fru_notself(libzfs_handle_t *, const char *);
++extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _LIBZFS_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/libzfs_impl.h linux-3.2.33-go/include/zfs/libzfs_impl.h
+--- linux-3.2.33-go.orig/include/zfs/libzfs_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/libzfs_impl.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,222 @@
++/*
++ * CDDL HEADER SART
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ */
++
++#ifndef	_LIBFS_IMPL_H
++#define	_LIBFS_IMPL_H
++
++#include <sys/dmu.h>
++#include <sys/fs/zfs.h>
++#include <sys/zfs_ioctl.h>
++#include <sys/spa.h>
++#include <sys/nvpair.h>
++
++#include <libuutil.h>
++#include <libzfs.h>
++#include <libshare.h>
++
++#if defined(HAVE_LIBTOPO)
++#include <fm/libtopo.h>
++#endif /* HAVE_LIBTOPO */
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++#ifdef	VERIFY
++#undef	VERIFY
++#endif
++#define	VERIFY	verify
++
++typedef struct libzfs_fru {
++	char *zf_device;
++	char *zf_fru;
++	struct libzfs_fru *zf_chain;
++	struct libzfs_fru *zf_next;
++} libzfs_fru_t;
++
++struct libzfs_handle {
++	int libzfs_error;
++	int libzfs_fd;
++	FILE *libzfs_mnttab;
++	FILE *libzfs_sharetab;
++	zpool_handle_t *libzfs_pool_handles;
++	uu_avl_pool_t *libzfs_ns_avlpool;
++	uu_avl_t *libzfs_ns_avl;
++	uint64_t libzfs_ns_gen;
++	int libzfs_desc_active;
++	char libzfs_action[1024];
++	char libzfs_desc[1024];
++	char *libzfs_log_str;
++	int libzfs_printerr;
++	int libzfs_storeerr; /* stuff error messages into buffer */
++	void *libzfs_sharehdl; /* libshare handle */
++	uint_t libzfs_shareflags;
++	boolean_t libzfs_mnttab_enable;
++	avl_tree_t libzfs_mnttab_cache;
++	int libzfs_pool_iter;
++#if defined(HAVE_LIBTOPO)
++	topo_hdl_t *libzfs_topo_hdl;
++	libzfs_fru_t **libzfs_fru_hash;
++	libzfs_fru_t *libzfs_fru_list;
++#endif /* HAVE_LIBTOPO */
++	char libzfs_chassis_id[256];
++};
++
++#define	ZFSSHARE_MISS	0x01	/* Didn't find entry in cache */
++
++struct zfs_handle {
++	libzfs_handle_t *zfs_hdl;
++	zpool_handle_t *zpool_hdl;
++	char zfs_name[ZFS_MAXNAMELEN];
++	zfs_type_t zfs_type; /* type including snapshot */
++	zfs_type_t zfs_head_type; /* type excluding snapshot */
++	dmu_objset_stats_t zfs_dmustats;
++	nvlist_t *zfs_props;
++	nvlist_t *zfs_user_props;
++	nvlist_t *zfs_recvd_props;
++	boolean_t zfs_mntcheck;
++	char *zfs_mntopts;
++	uint8_t *zfs_props_table;
++};
++
++/*
++ * This is different from checking zfs_type, because it will also catch
++ * snapshots of volumes.
++ */
++#define	ZFS_IS_VOLUME(zhp) ((zhp)->zfs_head_type == ZFS_TYPE_VOLUME)
++
++struct zpool_handle {
++	libzfs_handle_t *zpool_hdl;
++	zpool_handle_t *zpool_next;
++	char zpool_name[ZPOOL_MAXNAMELEN];
++	int zpool_state;
++	size_t zpool_config_size;
++	nvlist_t *zpool_config;
++	nvlist_t *zpool_old_config;
++	nvlist_t *zpool_props;
++	diskaddr_t zpool_start_block;
++};
++
++typedef enum {
++	PROTO_NFS = 0,
++	PROTO_SMB = 1,
++	PROTO_END = 2
++} zfs_share_proto_t;
++
++/*
++ * The following can be used as a bitmask and any new values
++ * added must preserve that capability.
++ */
++typedef enum {
++	SHARED_NOT_SHARED = 0x0,
++	SHARED_NFS = 0x2,
++	SHARED_SMB = 0x4
++} zfs_share_type_t;
++
++int zfs_error(libzfs_handle_t *, int, const char *);
++int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...);
++void zfs_error_aux(libzfs_handle_t *, const char *, ...);
++void *zfs_alloc(libzfs_handle_t *, size_t);
++void *zfs_realloc(libzfs_handle_t *, void *, size_t, size_t);
++char *zfs_asprintf(libzfs_handle_t *, const char *, ...);
++char *zfs_strdup(libzfs_handle_t *, const char *);
++int no_memory(libzfs_handle_t *);
++
++int zfs_standard_error(libzfs_handle_t *, int, const char *);
++int zfs_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
++int zpool_standard_error(libzfs_handle_t *, int, const char *);
++int zpool_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
++
++int get_dependents(libzfs_handle_t *, boolean_t, const char *, char ***,
++    size_t *);
++zfs_handle_t *make_dataset_handle_zc(libzfs_handle_t *, zfs_cmd_t *);
++zfs_handle_t *make_dataset_simple_handle_zc(zfs_handle_t *, zfs_cmd_t *);
++
++int zprop_parse_value(libzfs_handle_t *, nvpair_t *, int, zfs_type_t,
++    nvlist_t *, char **, uint64_t *, const char *);
++int zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp,
++    zfs_type_t type);
++
++/*
++ * Use this changelist_gather() flag to force attempting mounts
++ * on each change node regardless of whether or not it is currently
++ * mounted.
++ */
++#define	CL_GATHER_MOUNT_ALWAYS	1
++
++typedef struct prop_changelist prop_changelist_t;
++
++int zcmd_alloc_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, size_t);
++int zcmd_write_src_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *);
++int zcmd_write_conf_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *);
++int zcmd_expand_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *);
++int zcmd_read_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t **);
++void zcmd_free_nvlists(zfs_cmd_t *);
++
++int changelist_prefix(prop_changelist_t *);
++int changelist_postfix(prop_changelist_t *);
++void changelist_rename(prop_changelist_t *, const char *, const char *);
++void changelist_remove(prop_changelist_t *, const char *);
++void changelist_free(prop_changelist_t *);
++prop_changelist_t *changelist_gather(zfs_handle_t *, zfs_prop_t, int, int);
++int changelist_unshare(prop_changelist_t *, zfs_share_proto_t *);
++int changelist_haszonedchild(prop_changelist_t *);
++
++void remove_mountpoint(zfs_handle_t *);
++int create_parents(libzfs_handle_t *, char *, int);
++boolean_t isa_child_of(const char *dataset, const char *parent);
++
++zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
++
++int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **);
++
++int zvol_create_link(libzfs_handle_t *, const char *);
++int zvol_remove_link(libzfs_handle_t *, const char *);
++boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *);
++
++int zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
++    boolean_t modifying);
++
++void namespace_clear(libzfs_handle_t *);
++
++/*
++ * libshare (sharemgr) interfaces used internally.
++ */
++
++extern int zfs_init_libshare(libzfs_handle_t *, int);
++extern void zfs_uninit_libshare(libzfs_handle_t *);
++extern int zfs_parse_options(char *, zfs_share_proto_t);
++
++extern int zfs_unshare_proto(zfs_handle_t *,
++    const char *, zfs_share_proto_t *);
++
++extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _LIBFS_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/linux/blkdev_compat.h linux-3.2.33-go/include/zfs/linux/blkdev_compat.h
+--- linux-3.2.33-go.orig/include/zfs/linux/blkdev_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/linux/blkdev_compat.h	2012-11-16 23:25:34.345039382 +0100
+@@ -0,0 +1,458 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
++ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ * LLNL-CODE-403049.
++ */
++
++#ifndef _ZFS_BLKDEV_H
++#define _ZFS_BLKDEV_H
++
++#include <linux/blkdev.h>
++#include <linux/elevator.h>
++
++#ifndef HAVE_FMODE_T
++typedef unsigned __bitwise__ fmode_t;
++#endif /* HAVE_FMODE_T */
++
++#ifndef HAVE_BLK_FETCH_REQUEST
++static inline struct request *
++blk_fetch_request(struct request_queue *q)
++{
++	struct request *req;
++
++	req = elv_next_request(q);
++	if (req)
++		blkdev_dequeue_request(req);
++
++	return req;
++}
++#endif /* HAVE_BLK_FETCH_REQUEST */
++
++#ifndef HAVE_BLK_REQUEUE_REQUEST
++static inline void
++blk_requeue_request(request_queue_t *q, struct request *req)
++{
++	elv_requeue_request(q, req);
++}
++#endif /* HAVE_BLK_REQUEUE_REQUEST */
++
++#ifndef HAVE_BLK_END_REQUEST
++static inline bool
++__blk_end_request(struct request *req, int error, unsigned int nr_bytes)
++{
++	LIST_HEAD(list);
++
++	/*
++	 * Request has already been dequeued but 2.6.18 version of
++	 * end_request() unconditionally dequeues the request so we
++	 * add it to a local list to prevent hitting the BUG_ON.
++	 */
++	list_add(&req->queuelist, &list);
++
++	/*
++	 * The old API required the driver to end each segment and not
++	 * the entire request.  In our case we always need to end the
++	 * entire request partial requests are not supported.
++	 */
++	req->hard_cur_sectors = nr_bytes >> 9;
++	end_request(req, ((error == 0) ? 1 : error));
++
++	return 0;
++}
++
++static inline bool
++blk_end_request(struct request *req, int error, unsigned int nr_bytes)
++{
++	struct request_queue *q = req->q;
++	bool rc;
++
++	spin_lock_irq(q->queue_lock);
++	rc = __blk_end_request(req, error, nr_bytes);
++	spin_unlock_irq(q->queue_lock);
++
++	return rc;
++}
++#else
++# ifdef HAVE_BLK_END_REQUEST_GPL_ONLY
++/*
++ * Define required to avoid conflicting 2.6.29 non-static prototype for a
++ * GPL-only version of the helper.  As of 2.6.31 the helper is available
++ * to non-GPL modules and is not explicitly exported GPL-only.
++ */
++# define __blk_end_request __blk_end_request_x
++# define blk_end_request blk_end_request_x
++
++static inline bool
++__blk_end_request_x(struct request *req, int error, unsigned int nr_bytes)
++{
++	/*
++	 * The old API required the driver to end each segment and not
++	 * the entire request.  In our case we always need to end the
++	 * entire request partial requests are not supported.
++	 */
++	req->hard_cur_sectors = nr_bytes >> 9;
++	end_request(req, ((error == 0) ? 1 : error));
++
++	return 0;
++}
++static inline bool
++blk_end_request_x(struct request *req, int error, unsigned int nr_bytes)
++{
++	struct request_queue *q = req->q;
++	bool rc;
++
++	spin_lock_irq(q->queue_lock);
++	rc = __blk_end_request_x(req, error, nr_bytes);
++	spin_unlock_irq(q->queue_lock);
++
++	return rc;
++}
++# endif /* HAVE_BLK_END_REQUEST_GPL_ONLY */
++#endif /* HAVE_BLK_END_REQUEST */
++
++/*
++ * 2.6.36 API change,
++ * The blk_queue_flush() interface has replaced blk_queue_ordered()
++ * interface.  However, while the old interface was available to all the
++ * new one is GPL-only.   Thus if the GPL-only version is detected we
++ * implement our own trivial helper compatibility funcion.   The hope is
++ * that long term this function will be opened up.
++ */
++#if defined(HAVE_BLK_QUEUE_FLUSH) && defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY)
++#define blk_queue_flush __blk_queue_flush
++static inline void
++__blk_queue_flush(struct request_queue *q, unsigned int flags)
++{
++	q->flush_flags = flags & (REQ_FLUSH | REQ_FUA);
++}
++#endif /* HAVE_BLK_QUEUE_FLUSH && HAVE_BLK_QUEUE_FLUSH_GPL_ONLY */
++
++#ifndef HAVE_BLK_RQ_POS
++static inline sector_t
++blk_rq_pos(struct request *req)
++{
++	return req->sector;
++}
++#endif /* HAVE_BLK_RQ_POS */
++
++#ifndef HAVE_BLK_RQ_SECTORS
++static inline unsigned int
++blk_rq_sectors(struct request *req)
++{
++	return req->nr_sectors;
++}
++#endif /* HAVE_BLK_RQ_SECTORS */
++
++#if !defined(HAVE_BLK_RQ_BYTES) || defined(HAVE_BLK_RQ_BYTES_GPL_ONLY)
++/*
++ * Define required to avoid conflicting 2.6.29 non-static prototype for a
++ * GPL-only version of the helper.  As of 2.6.31 the helper is available
++ * to non-GPL modules in the form of a static inline in the header.
++ */
++#define blk_rq_bytes __blk_rq_bytes
++static inline unsigned int
++__blk_rq_bytes(struct request *req)
++{
++	return blk_rq_sectors(req) << 9;
++}
++#endif /* !HAVE_BLK_RQ_BYTES || HAVE_BLK_RQ_BYTES_GPL_ONLY */
++
++/*
++ * Most of the blk_* macros were removed in 2.6.36.  Ostensibly this was
++ * done to improve readability and allow easier grepping.  However, from
++ * a portability stand point the macros are helpful.  Therefore the needed
++ * macros are redefined here if they are missing from the kernel.
++ */
++#ifndef blk_fs_request
++#define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
++#endif
++
++/*
++ * 2.6.27 API change,
++ * The blk_queue_stackable() queue flag was added in 2.6.27 to handle dm
++ * stacking drivers.  Prior to this request stacking drivers were detected
++ * by checking (q->request_fn == NULL), for earlier kernels we revert to
++ * this legacy behavior.
++ */
++#ifndef blk_queue_stackable
++#define blk_queue_stackable(q)	((q)->request_fn == NULL)
++#endif
++
++/*
++ * 2.6.34 API change,
++ * The blk_queue_max_hw_sectors() function replaces blk_queue_max_sectors().
++ */
++#ifndef HAVE_BLK_QUEUE_MAX_HW_SECTORS
++#define blk_queue_max_hw_sectors __blk_queue_max_hw_sectors
++static inline void
++__blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
++{
++	blk_queue_max_sectors(q, max_hw_sectors);
++}
++#endif
++
++/*
++ * 2.6.34 API change,
++ * The blk_queue_max_segments() function consolidates
++ * blk_queue_max_hw_segments() and blk_queue_max_phys_segments().
++ */
++#ifndef HAVE_BLK_QUEUE_MAX_SEGMENTS
++#define blk_queue_max_segments __blk_queue_max_segments
++static inline void
++__blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
++{
++	blk_queue_max_phys_segments(q, max_segments);
++	blk_queue_max_hw_segments(q, max_segments);
++}
++#endif
++
++/*
++ * 2.6.30 API change,
++ * The blk_queue_physical_block_size() function was introduced to
++ * indicate the smallest I/O the device can write without incurring
++ * a read-modify-write penalty.  For older kernels this is a no-op.
++ */
++#ifndef HAVE_BLK_QUEUE_PHYSICAL_BLOCK_SIZE
++#define blk_queue_physical_block_size(q, x)	((void)(0))
++#endif
++
++/*
++ * 2.6.30 API change,
++ * The blk_queue_io_opt() function was added to indicate the optimal
++ * I/O size for the device.  For older kernels this is a no-op.
++ */
++#ifndef HAVE_BLK_QUEUE_IO_OPT
++#define blk_queue_io_opt(q, x)			((void)(0))
++#endif
++
++#ifndef HAVE_GET_DISK_RO
++static inline int
++get_disk_ro(struct gendisk *disk)
++{
++	int policy = 0;
++
++	if (disk->part[0])
++		policy = disk->part[0]->policy;
++
++	return policy;
++}
++#endif /* HAVE_GET_DISK_RO */
++
++#ifndef HAVE_RQ_IS_SYNC
++static inline bool
++rq_is_sync(struct request *req)
++{
++	return (req->flags & REQ_RW_SYNC);
++}
++#endif /* HAVE_RQ_IS_SYNC */
++
++#ifndef HAVE_RQ_FOR_EACH_SEGMENT
++struct req_iterator {
++	int i;
++	struct bio *bio;
++};
++
++# define for_each_bio(_bio)              \
++	for (; _bio; _bio = _bio->bi_next)
++
++# define __rq_for_each_bio(_bio, rq)    \
++	if ((rq->bio))                  \
++		for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
++
++# define rq_for_each_segment(bvl, _rq, _iter)                   \
++	__rq_for_each_bio(_iter.bio, _rq)                       \
++		bio_for_each_segment(bvl, _iter.bio, _iter.i)
++#endif /* HAVE_RQ_FOR_EACH_SEGMENT */
++
++/*
++ * Portable helper for correctly setting the FAILFAST flags.  The
++ * correct usage has changed 3 times from 2.6.12 to 2.6.38.
++ */
++static inline void
++bio_set_flags_failfast(struct block_device *bdev, int *flags)
++{
++#ifdef CONFIG_BUG
++	/*
++	 * Disable FAILFAST for loopback devices because of the
++	 * following incorrect BUG_ON() in loop_make_request().
++	 * This support is also disabled for md devices because the
++	 * test suite layers md devices on top of loopback devices.
++	 * This may be removed when the loopback driver is fixed.
++	 *
++	 *   BUG_ON(!lo || (rw != READ && rw != WRITE));
++	 */
++	if ((MAJOR(bdev->bd_dev) == LOOP_MAJOR) ||
++	    (MAJOR(bdev->bd_dev) == MD_MAJOR))
++		return;
++
++#ifdef BLOCK_EXT_MAJOR
++	if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
++		return;
++#endif /* BLOCK_EXT_MAJOR */
++#endif /* CONFIG_BUG */
++
++#ifdef HAVE_BIO_RW_FAILFAST_DTD
++	/* BIO_RW_FAILFAST_* preferred interface from 2.6.28 - 2.6.35 */
++	*flags |=
++	    ((1 << BIO_RW_FAILFAST_DEV) |
++	     (1 << BIO_RW_FAILFAST_TRANSPORT) |
++	     (1 << BIO_RW_FAILFAST_DRIVER));
++#else
++# ifdef HAVE_BIO_RW_FAILFAST
++	/* BIO_RW_FAILFAST preferred interface from 2.6.12 - 2.6.27 */
++	*flags |= (1 << BIO_RW_FAILFAST);
++# else
++#  ifdef HAVE_REQ_FAILFAST_MASK
++	/* REQ_FAILFAST_* preferred interface from 2.6.36 - 2.6.xx,
++	 * the BIO_* and REQ_* flags were unified under REQ_* flags. */
++	*flags |= REQ_FAILFAST_MASK;
++#  endif /* HAVE_REQ_FAILFAST_MASK */
++# endif /* HAVE_BIO_RW_FAILFAST */
++#endif /* HAVE_BIO_RW_FAILFAST_DTD */
++}
++
++/*
++ * Maximum disk label length, it may be undefined for some kernels.
++ */
++#ifndef DISK_NAME_LEN
++#define DISK_NAME_LEN	32
++#endif /* DISK_NAME_LEN */
++
++/*
++ * 2.6.24 API change,
++ * The bio_end_io() prototype changed slightly.  These are helper
++ * macro's to ensure the prototype and return value are handled.
++ */
++#ifdef HAVE_2ARGS_BIO_END_IO_T
++# define BIO_END_IO_PROTO(fn, x, y, z)	static void fn(struct bio *x, int z)
++# define BIO_END_IO_RETURN(rc)		return
++#else
++# define BIO_END_IO_PROTO(fn, x, y, z)	static int fn(struct bio *x, \
++					              unsigned int y, int z)
++# define BIO_END_IO_RETURN(rc)		return rc
++#endif /* HAVE_2ARGS_BIO_END_IO_T */
++
++/*
++ * 2.6.38 - 2.6.x API,
++ *   blkdev_get_by_path()
++ *   blkdev_put()
++ *
++ * 2.6.28 - 2.6.37 API,
++ *   open_bdev_exclusive()
++ *   close_bdev_exclusive()
++ *
++ * 2.6.12 - 2.6.27 API,
++ *   open_bdev_excl()
++ *   close_bdev_excl()
++ *
++ * Used to exclusively open a block device from within the kernel.
++ */
++#if defined(HAVE_BLKDEV_GET_BY_PATH)
++# define vdev_bdev_open(path, md, hld)	blkdev_get_by_path(path, \
++					    (md) | FMODE_EXCL, hld)
++# define vdev_bdev_close(bdev, md)	blkdev_put(bdev, (md) | FMODE_EXCL)
++#elif defined(HAVE_OPEN_BDEV_EXCLUSIVE)
++# define vdev_bdev_open(path, md, hld)	open_bdev_exclusive(path, md, hld)
++# define vdev_bdev_close(bdev, md)	close_bdev_exclusive(bdev, md)
++#else
++# define vdev_bdev_open(path, md, hld)	open_bdev_excl(path, md, hld)
++# define vdev_bdev_close(bdev, md)	close_bdev_excl(bdev)
++#endif /* HAVE_BLKDEV_GET_BY_PATH | HAVE_OPEN_BDEV_EXCLUSIVE */
++
++/*
++ * 2.6.22 API change
++ * The function invalidate_bdev() lost it's second argument because
++ * it was unused.
++ */
++#ifdef HAVE_1ARG_INVALIDATE_BDEV
++# define vdev_bdev_invalidate(bdev)	invalidate_bdev(bdev)
++#else
++# define vdev_bdev_invalidate(bdev)	invalidate_bdev(bdev, 1)
++#endif /* HAVE_1ARG_INVALIDATE_BDEV */
++
++/*
++ * 2.6.30 API change
++ * Change to make it explicit there this is the logical block size.
++ */
++#ifdef HAVE_BDEV_LOGICAL_BLOCK_SIZE
++# define vdev_bdev_block_size(bdev)	bdev_logical_block_size(bdev)
++#else
++# define vdev_bdev_block_size(bdev)	bdev_hardsect_size(bdev)
++#endif
++
++/*
++ * 2.6.37 API change
++ * The WRITE_FLUSH, WRITE_FUA, and WRITE_FLUSH_FUA flags have been
++ * introduced as a replacement for WRITE_BARRIER.  This was done to
++ * allow richer semantics to be expressed to the block layer.  It is
++ * the block layers responsibility to choose the correct way to
++ * implement these semantics.
++ *
++ * The existence of these flags implies that REQ_FLUSH an REQ_FUA are
++ * defined.  Thus we can safely define VDEV_REQ_FLUSH and VDEV_REQ_FUA
++ * compatibility macros.
++ */
++#ifdef WRITE_FLUSH_FUA
++# define VDEV_WRITE_FLUSH_FUA		WRITE_FLUSH_FUA
++# define VDEV_REQ_FLUSH			REQ_FLUSH
++# define VDEV_REQ_FUA			REQ_FUA
++#else
++# define VDEV_WRITE_FLUSH_FUA		WRITE_BARRIER
++# define VDEV_REQ_FLUSH			REQ_HARDBARRIER
++# define VDEV_REQ_FUA			REQ_HARDBARRIER
++#endif
++
++/*
++ * 2.6.32 API change
++ * Use the normal I/O patch for discards.
++ */
++#ifdef REQ_DISCARD
++# define VDEV_REQ_DISCARD		REQ_DISCARD
++#endif
++
++/*
++ * 2.6.33 API change
++ * Discard granularity and alignment restrictions may now be set.  For
++ * older kernels which do not support this it is safe to skip it.
++ */
++#ifdef HAVE_DISCARD_GRANULARITY
++static inline void
++blk_queue_discard_granularity(struct request_queue *q, unsigned int dg)
++{
++	q->limits.discard_granularity = dg;
++}
++#else
++#define blk_queue_discard_granularity(x, dg)	((void)0)
++#endif /* HAVE_DISCARD_GRANULARITY */
++
++/*
++ * Default Linux IO Scheduler,
++ * Setting the scheduler to noop will allow the Linux IO scheduler to
++ * still perform front and back merging, while leaving the request
++ * ordering and prioritization to the ZFS IO scheduler.
++ */
++#define	VDEV_SCHEDULER			"noop"
++
++#endif /* _ZFS_BLKDEV_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/linux/dcache_compat.h linux-3.2.33-go/include/zfs/linux/dcache_compat.h
+--- linux-3.2.33-go.orig/include/zfs/linux/dcache_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/linux/dcache_compat.h	2012-11-16 23:25:34.345039382 +0100
+@@ -0,0 +1,38 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
++ */
++
++#ifndef _ZFS_DCACHE_H
++#define _ZFS_DCACHE_H
++
++#include <linux/dcache.h>
++
++#define dname(dentry)	((char *)((dentry)->d_name.name))
++#define dlen(dentry)	((int)((dentry)->d_name.len))
++
++#ifndef HAVE_D_MAKE_ROOT
++#define d_make_root(inode)	d_alloc_root(inode)
++#endif /* HAVE_D_MAKE_ROOT */
++
++#endif /* _ZFS_DCACHE_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/linux/Makefile linux-3.2.33-go/include/zfs/linux/Makefile
+--- linux-3.2.33-go.orig/include/zfs/linux/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/linux/Makefile	2012-11-16 23:25:34.345039382 +0100
+@@ -0,0 +1,664 @@
++# Makefile.in generated by automake 1.11.6 from Makefile.am.
++# include/linux/Makefile.  Generated from Makefile.in by configure.
++
++# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
++# Foundation, Inc.
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++
++
++
++am__make_dryrun = \
++  { \
++    am__dry=no; \
++    case $$MAKEFLAGS in \
++      *\\[\ \	]*) \
++        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
++          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
++      *) \
++        for am__flg in $$MAKEFLAGS; do \
++          case $$am__flg in \
++            *=*|--*) ;; \
++            *n*) am__dry=yes; break;; \
++          esac; \
++        done;; \
++    esac; \
++    test $$am__dry = yes; \
++  }
++pkgdatadir = $(datadir)/zfs
++pkgincludedir = $(includedir)/zfs
++pkglibdir = $(libdir)/zfs
++pkglibexecdir = $(libexecdir)/zfs
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = x86_64-unknown-linux-gnu
++host_triplet = x86_64-unknown-linux-gnu
++target_triplet = x86_64-unknown-linux-gnu
++subdir = include/linux
++DIST_COMMON = $(am__kernel_HEADERS_DIST) $(libzfs_HEADERS) \
++	$(srcdir)/Makefile.am $(srcdir)/Makefile.in
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps =  \
++	$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
++	$(top_srcdir)/config/kernel-automount.m4 \
++	$(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \
++	$(top_srcdir)/config/kernel-bdev-logical-size.m4 \
++	$(top_srcdir)/config/kernel-bdi-setup-and-register.m4 \
++	$(top_srcdir)/config/kernel-bdi.m4 \
++	$(top_srcdir)/config/kernel-bio-empty-barrier.m4 \
++	$(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \
++	$(top_srcdir)/config/kernel-bio-failfast.m4 \
++	$(top_srcdir)/config/kernel-bio-rw-syncio.m4 \
++	$(top_srcdir)/config/kernel-blk-end-request.m4 \
++	$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-discard.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
++	$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-sectors.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get.m4 \
++	$(top_srcdir)/config/kernel-check-disk-size-change.m4 \
++	$(top_srcdir)/config/kernel-clear-inode.m4 \
++	$(top_srcdir)/config/kernel-commit-metadata.m4 \
++	$(top_srcdir)/config/kernel-create-nameidata.m4 \
++	$(top_srcdir)/config/kernel-d-make-root.m4 \
++	$(top_srcdir)/config/kernel-d-obtain-alias.m4 \
++	$(top_srcdir)/config/kernel-discard-granularity.m4 \
++	$(top_srcdir)/config/kernel-elevator-change.m4 \
++	$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
++	$(top_srcdir)/config/kernel-evict-inode.m4 \
++	$(top_srcdir)/config/kernel-fallocate.m4 \
++	$(top_srcdir)/config/kernel-fmode-t.m4 \
++	$(top_srcdir)/config/kernel-fsync.m4 \
++	$(top_srcdir)/config/kernel-get-disk-ro.m4 \
++	$(top_srcdir)/config/kernel-get-gendisk.m4 \
++	$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
++	$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
++	$(top_srcdir)/config/kernel-kobj-name-len.m4 \
++	$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
++	$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
++	$(top_srcdir)/config/kernel-mount-nodev.m4 \
++	$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
++	$(top_srcdir)/config/kernel-rq-for-each_segment.m4 \
++	$(top_srcdir)/config/kernel-rq-is_sync.m4 \
++	$(top_srcdir)/config/kernel-security-inode-init.m4 \
++	$(top_srcdir)/config/kernel-set-nlink.m4 \
++	$(top_srcdir)/config/kernel-sget-args.m4 \
++	$(top_srcdir)/config/kernel-show-options.m4 \
++	$(top_srcdir)/config/kernel-shrink.m4 \
++	$(top_srcdir)/config/kernel-truncate-range.m4 \
++	$(top_srcdir)/config/kernel-truncate-setsize.m4 \
++	$(top_srcdir)/config/kernel-xattr-handler.m4 \
++	$(top_srcdir)/config/kernel.m4 \
++	$(top_srcdir)/config/user-arch.m4 \
++	$(top_srcdir)/config/user-frame-larger-than.m4 \
++	$(top_srcdir)/config/user-ioctl.m4 \
++	$(top_srcdir)/config/user-libblkid.m4 \
++	$(top_srcdir)/config/user-libuuid.m4 \
++	$(top_srcdir)/config/user-nptl_guard_within_stack.m4 \
++	$(top_srcdir)/config/user-selinux.m4 \
++	$(top_srcdir)/config/user-udev.m4 \
++	$(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \
++	$(top_srcdir)/config/zfs-build.m4 \
++	$(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++mkinstalldirs = $(install_sh) -d
++CONFIG_HEADER = $(top_builddir)/zfs_config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++AM_V_GEN = $(am__v_GEN_$(V))
++am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY))
++am__v_GEN_0 = @echo "  GEN   " $@;
++AM_V_at = $(am__v_at_$(V))
++am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY))
++am__v_at_0 = @
++SOURCES =
++DIST_SOURCES =
++am__can_run_installinfo = \
++  case $$AM_UPDATE_INFO_DIR in \
++    n|no|NO) false;; \
++    *) (install-info --version) >/dev/null 2>&1;; \
++  esac
++am__kernel_HEADERS_DIST = $(top_srcdir)/include/linux/dcache_compat.h \
++	$(top_srcdir)/include/linux/xattr_compat.h \
++	$(top_srcdir)/include/linux/vfs_compat.h \
++	$(top_srcdir)/include/linux/blkdev_compat.h
++am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
++am__vpath_adj = case $$p in \
++    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
++    *) f=$$p;; \
++  esac;
++am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
++am__install_max = 40
++am__nobase_strip_setup = \
++  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
++am__nobase_strip = \
++  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
++am__nobase_list = $(am__nobase_strip_setup); \
++  for p in $$list; do echo "$$p $$p"; done | \
++  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
++  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
++    if (++n[$$2] == $(am__install_max)) \
++      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
++    END { for (dir in files) print dir, files[dir] }'
++am__base_list = \
++  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
++  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
++am__uninstall_files_from_dir = { \
++  test -z "$$files" \
++    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
++    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
++         $(am__cd) "$$dir" && rm -f $$files; }; \
++  }
++am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
++HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
++ETAGS = etags
++CTAGS = ctags
++DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
++ACLOCAL = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run aclocal-1.11
++ALIEN = alien
++ALIEN_VERSION = 
++AMTAR = $${TAR-tar}
++AM_DEFAULT_VERBOSITY = 1
++AR = ar
++AUTOCONF = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run autoconf
++AUTOHEADER = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run autoheader
++AUTOMAKE = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run automake-1.11
++AWK = gawk
++CC = gcc
++CCAS = gcc
++CCASDEPMODE = depmode=gcc3
++CCASFLAGS = -g -O2
++CCDEPMODE = depmode=gcc3
++CFLAGS = -g -O2
++CPP = gcc -E
++CPPFLAGS = 
++CYGPATH_W = echo
++DEBUG_CFLAGS = -DNDEBUG
++DEBUG_DMU_TX = _without_debug_dmu_tx
++DEBUG_STACKFLAGS = 
++DEBUG_ZFS = _without_debug
++DEFAULT_INIT_DIR = ${prefix}/etc/init.d
++DEFAULT_INIT_SCRIPT = gentoo
++DEFAULT_PACKAGE = tgz
++DEFS = -DHAVE_CONFIG_H
++DEPDIR = .deps
++DLLTOOL = false
++DPKG = dpkg
++DPKGBUILD = dpkg-buildpackage
++DPKGBUILD_VERSION = 
++DPKG_VERSION = 
++DSYMUTIL = 
++DUMPBIN = 
++ECHO_C = 
++ECHO_N = -n
++ECHO_T = 
++EGREP = /bin/grep -E
++EXEEXT = 
++FGREP = /bin/grep -F
++FRAME_LARGER_THAN = -Wframe-larger-than=1024
++GREP = /bin/grep
++HAVE_ALIEN = no
++HAVE_DPKG = no
++HAVE_DPKGBUILD = no
++HAVE_MAKEPKG = 
++HAVE_PACMAN = 
++HAVE_RPM = yes
++HAVE_RPMBUILD = yes
++INSTALL = /usr/bin/install -c
++INSTALL_DATA = ${INSTALL} -m 644
++INSTALL_PROGRAM = ${INSTALL}
++INSTALL_SCRIPT = ${INSTALL}
++INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
++KERNELCPPFLAGS =  -Wno-unused-but-set-variable -DHAVE_SPL -D_KERNEL -DTEXT_DOMAIN=\"zfs-linux-kernel\" -DNDEBUG 
++KERNELMAKE_PARAMS =  O=/usr/src/linux-3.6.0-sabayon
++LD = /usr/x86_64-pc-linux-gnu/bin/ld -m elf_x86_64
++LDFLAGS = 
++LIBBLKID = 
++LIBOBJS = 
++LIBS = -luuid -luuid -lz -lz -lz 
++LIBSELINUX = 
++LIBTOOL = $(SHELL) $(top_builddir)/libtool
++LIBUUID = -luuid
++LINUX = /usr/src/linux-3.2.33-go
++LINUX_OBJ = /usr/src/linux-3.6.0-sabayon
++LINUX_SYMBOLS = NONE
++LINUX_VERSION = 3.6.0-sabayon
++LIPO = 
++LN_S = ln -s
++LTLIBOBJS = 
++MAINT = #
++MAKEINFO = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run makeinfo
++MAKEPKG = 
++MAKEPKG_VERSION = 
++MANIFEST_TOOL = :
++MKDIR_P = /bin/mkdir -p
++NM = /usr/bin/nm -B
++NMEDIT = 
++NO_UNUSED_BUT_SET_VARIABLE = -Wno-unused-but-set-variable
++OBJDUMP = objdump
++OBJEXT = o
++OTOOL = 
++OTOOL64 = 
++PACKAGE = zfs
++PACKAGE_BUGREPORT = 
++PACKAGE_NAME = 
++PACKAGE_STRING = 
++PACKAGE_TARNAME = 
++PACKAGE_URL = 
++PACKAGE_VERSION = 
++PACMAN = 
++PACMAN_VERSION = 
++PATH_SEPARATOR = :
++RANLIB = ranlib
++RPM = rpm
++RPMBUILD = rpmbuild
++RPMBUILD_VERSION = 4.10.0
++RPM_VERSION = 4.10.0
++SED = /bin/sed
++SET_MAKE = 
++SHELL = /bin/sh
++SPL = /usr/src/linux-3.2.33-go
++SPL_OBJ = /usr/src/linux-3.2.33-go
++SPL_SYMBOLS = NONE
++SPL_VERSION = 0.6.0-rc12
++STRIP = strip
++TARGET_ASM_DIR = asm-x86_64
++VENDOR = gentoo
++VERSION = 0.6.0
++ZFS_CONFIG = all
++ZFS_META_ALIAS = zfs-0.6.0-rc12
++ZFS_META_AUTHOR = Sun Microsystems/Oracle, Lawrence Livermore National Laboratory
++ZFS_META_DATA = 
++ZFS_META_LICENSE = CDDL
++ZFS_META_LT_AGE = 
++ZFS_META_LT_CURRENT = 
++ZFS_META_LT_REVISION = 
++ZFS_META_NAME = zfs
++ZFS_META_RELEASE = rc12
++ZFS_META_VERSION = 0.6.0
++ZLIB = -lz
++abs_builddir = /root/zfs-0.6.0-rc12/include/linux
++abs_srcdir = /root/zfs-0.6.0-rc12/include/linux
++abs_top_builddir = /root/zfs-0.6.0-rc12
++abs_top_srcdir = /root/zfs-0.6.0-rc12
++ac_ct_AR = ar
++ac_ct_CC = gcc
++ac_ct_DUMPBIN = 
++am__include = include
++am__leading_dot = .
++am__quote = 
++am__tar = $${TAR-tar} chof - "$$tardir"
++am__untar = $${TAR-tar} xf -
++bindir = ${exec_prefix}/bin
++build = x86_64-unknown-linux-gnu
++build_alias = 
++build_cpu = x86_64
++build_os = linux-gnu
++build_vendor = unknown
++builddir = .
++datadir = ${datarootdir}
++datarootdir = ${prefix}/share
++docdir = ${datarootdir}/doc/${PACKAGE}
++dvidir = ${docdir}
++exec_prefix = ${prefix}
++host = x86_64-unknown-linux-gnu
++host_alias = 
++host_cpu = x86_64
++host_os = linux-gnu
++host_vendor = unknown
++htmldir = ${docdir}
++includedir = ${prefix}/include
++infodir = ${datarootdir}/info
++install_sh = ${SHELL} /root/zfs-0.6.0-rc12/config/install-sh
++libdir = ${exec_prefix}/lib
++libexecdir = ${exec_prefix}/libexec
++localedir = ${datarootdir}/locale
++localstatedir = ${prefix}/var
++mandir = ${datarootdir}/man
++mkdir_p = /bin/mkdir -p
++oldincludedir = /usr/include
++pdfdir = ${docdir}
++prefix = /usr/local
++program_transform_name = s,x,x,
++psdir = ${docdir}
++sbindir = ${exec_prefix}/sbin
++sharedstatedir = ${prefix}/com
++srcdir = .
++sysconfdir = ${prefix}/etc
++target = x86_64-unknown-linux-gnu
++target_alias = 
++target_cpu = x86_64
++target_os = linux-gnu
++target_vendor = unknown
++top_build_prefix = ../../
++top_builddir = ../..
++top_srcdir = ../..
++udevdir = ${exec_prefix}/lib/udev
++udevruledir = ${udevdir}/rules.d
++COMMON_H = 
++KERNEL_H = \
++	$(top_srcdir)/include/linux/dcache_compat.h \
++	$(top_srcdir)/include/linux/xattr_compat.h \
++	$(top_srcdir)/include/linux/vfs_compat.h \
++	$(top_srcdir)/include/linux/blkdev_compat.h
++
++USER_H = 
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++libzfsdir = $(includedir)/libzfs/linux
++libzfs_HEADERS = $(COMMON_H) $(USER_H)
++#kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/linux
++#kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++all: all-am
++
++.SUFFIXES:
++$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am  $(am__configure_deps)
++	@for dep in $?; do \
++	  case '$(am__configure_deps)' in \
++	    *$$dep*) \
++	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
++	        && { if test -f $@; then exit 0; else break; fi; }; \
++	      exit 1;; \
++	  esac; \
++	done; \
++	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/linux/Makefile'; \
++	$(am__cd) $(top_srcdir) && \
++	  $(AUTOMAKE) --gnu include/linux/Makefile
++.PRECIOUS: Makefile
++Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
++	@case '$?' in \
++	  *config.status*) \
++	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
++	  *) \
++	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
++	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
++	esac;
++
++$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++
++$(top_srcdir)/configure: # $(am__configure_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(ACLOCAL_M4): # $(am__aclocal_m4_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(am__aclocal_m4_deps):
++
++mostlyclean-libtool:
++	-rm -f *.lo
++
++clean-libtool:
++	-rm -rf .libs _libs
++install-kernelHEADERS: $(kernel_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(kerneldir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(kerneldir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(kerneldir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(kerneldir)" || exit $$?; \
++	done
++
++uninstall-kernelHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(kerneldir)'; $(am__uninstall_files_from_dir)
++install-libzfsHEADERS: $(libzfs_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(libzfsdir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(libzfsdir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libzfsdir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(libzfsdir)" || exit $$?; \
++	done
++
++uninstall-libzfsHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(libzfsdir)'; $(am__uninstall_files_from_dir)
++
++ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
++	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	mkid -fID $$unique
++tags: TAGS
++
++TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	set x; \
++	here=`pwd`; \
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	shift; \
++	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
++	  test -n "$$unique" || unique=$$empty_fix; \
++	  if test $$# -gt 0; then \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      "$$@" $$unique; \
++	  else \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      $$unique; \
++	  fi; \
++	fi
++ctags: CTAGS
++CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	test -z "$(CTAGS_ARGS)$$unique" \
++	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
++	     $$unique
++
++GTAGS:
++	here=`$(am__cd) $(top_builddir) && pwd` \
++	  && $(am__cd) $(top_srcdir) \
++	  && gtags -i $(GTAGS_ARGS) "$$here"
++
++distclean-tags:
++	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
++
++distdir: $(DISTFILES)
++	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	list='$(DISTFILES)'; \
++	  dist_files=`for file in $$list; do echo $$file; done | \
++	  sed -e "s|^$$srcdirstrip/||;t" \
++	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
++	case $$dist_files in \
++	  */*) $(MKDIR_P) `echo "$$dist_files" | \
++			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
++			   sort -u` ;; \
++	esac; \
++	for file in $$dist_files; do \
++	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
++	  if test -d $$d/$$file; then \
++	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
++	    if test -d "$(distdir)/$$file"; then \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
++	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
++	  else \
++	    test -f "$(distdir)/$$file" \
++	    || cp -p $$d/$$file "$(distdir)/$$file" \
++	    || exit 1; \
++	  fi; \
++	done
++check-am: all-am
++check: check-am
++all-am: Makefile $(HEADERS)
++installdirs:
++	for dir in "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"; do \
++	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
++	done
++install: install-am
++install-exec: install-exec-am
++install-data: install-data-am
++uninstall: uninstall-am
++
++install-am: all-am
++	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
++
++installcheck: installcheck-am
++install-strip:
++	if test -z '$(STRIP)'; then \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	      install; \
++	else \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
++	fi
++mostlyclean-generic:
++
++clean-generic:
++
++distclean-generic:
++	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
++	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
++
++maintainer-clean-generic:
++	@echo "This command is intended for maintainers to use"
++	@echo "it deletes files that may require special tools to rebuild."
++clean: clean-am
++
++clean-am: clean-generic clean-libtool mostlyclean-am
++
++distclean: distclean-am
++	-rm -f Makefile
++distclean-am: clean-am distclean-generic distclean-tags
++
++dvi: dvi-am
++
++dvi-am:
++
++html: html-am
++
++html-am:
++
++info: info-am
++
++info-am:
++
++install-data-am: install-kernelHEADERS install-libzfsHEADERS
++
++install-dvi: install-dvi-am
++
++install-dvi-am:
++
++install-exec-am:
++
++install-html: install-html-am
++
++install-html-am:
++
++install-info: install-info-am
++
++install-info-am:
++
++install-man:
++
++install-pdf: install-pdf-am
++
++install-pdf-am:
++
++install-ps: install-ps-am
++
++install-ps-am:
++
++installcheck-am:
++
++maintainer-clean: maintainer-clean-am
++	-rm -f Makefile
++maintainer-clean-am: distclean-am maintainer-clean-generic
++
++mostlyclean: mostlyclean-am
++
++mostlyclean-am: mostlyclean-generic mostlyclean-libtool
++
++pdf: pdf-am
++
++pdf-am:
++
++ps: ps-am
++
++ps-am:
++
++uninstall-am: uninstall-kernelHEADERS uninstall-libzfsHEADERS
++
++.MAKE: install-am install-strip
++
++.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
++	clean-libtool ctags distclean distclean-generic \
++	distclean-libtool distclean-tags distdir dvi dvi-am html \
++	html-am info info-am install install-am install-data \
++	install-data-am install-dvi install-dvi-am install-exec \
++	install-exec-am install-html install-html-am install-info \
++	install-info-am install-kernelHEADERS install-libzfsHEADERS \
++	install-man install-pdf install-pdf-am install-ps \
++	install-ps-am install-strip installcheck installcheck-am \
++	installdirs maintainer-clean maintainer-clean-generic \
++	mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
++	ps ps-am tags uninstall uninstall-am uninstall-kernelHEADERS \
++	uninstall-libzfsHEADERS
++
++
++# Tell versions [3.59,3.63) of GNU make to not export all variables.
++# Otherwise a system limit (for SysV at least) may be exceeded.
++.NOEXPORT:
+diff -uNr linux-3.2.33-go.orig/include/zfs/linux/Makefile.am linux-3.2.33-go/include/zfs/linux/Makefile.am
+--- linux-3.2.33-go.orig/include/zfs/linux/Makefile.am	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/linux/Makefile.am	2012-11-16 23:25:34.345039382 +0100
+@@ -0,0 +1,21 @@
++COMMON_H =
++
++KERNEL_H = \
++	$(top_srcdir)/include/linux/dcache_compat.h \
++	$(top_srcdir)/include/linux/xattr_compat.h \
++	$(top_srcdir)/include/linux/vfs_compat.h \
++	$(top_srcdir)/include/linux/blkdev_compat.h
++
++USER_H =
++
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++
++if CONFIG_USER
++libzfsdir = $(includedir)/libzfs/linux
++libzfs_HEADERS = $(COMMON_H) $(USER_H)
++endif
++
++if CONFIG_KERNEL
++kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/linux
++kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++endif
+diff -uNr linux-3.2.33-go.orig/include/zfs/linux/Makefile.in linux-3.2.33-go/include/zfs/linux/Makefile.in
+--- linux-3.2.33-go.orig/include/zfs/linux/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/linux/Makefile.in	2012-11-16 23:25:34.345039382 +0100
+@@ -0,0 +1,664 @@
++# Makefile.in generated by automake 1.11.6 from Makefile.am.
++# @configure_input@
++
++# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
++# Foundation, Inc.
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++@SET_MAKE@
++
++VPATH = @srcdir@
++am__make_dryrun = \
++  { \
++    am__dry=no; \
++    case $$MAKEFLAGS in \
++      *\\[\ \	]*) \
++        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
++          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
++      *) \
++        for am__flg in $$MAKEFLAGS; do \
++          case $$am__flg in \
++            *=*|--*) ;; \
++            *n*) am__dry=yes; break;; \
++          esac; \
++        done;; \
++    esac; \
++    test $$am__dry = yes; \
++  }
++pkgdatadir = $(datadir)/@PACKAGE@
++pkgincludedir = $(includedir)/@PACKAGE@
++pkglibdir = $(libdir)/@PACKAGE@
++pkglibexecdir = $(libexecdir)/@PACKAGE@
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = @build@
++host_triplet = @host@
++target_triplet = @target@
++subdir = include/linux
++DIST_COMMON = $(am__kernel_HEADERS_DIST) $(libzfs_HEADERS) \
++	$(srcdir)/Makefile.am $(srcdir)/Makefile.in
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps =  \
++	$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
++	$(top_srcdir)/config/kernel-automount.m4 \
++	$(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \
++	$(top_srcdir)/config/kernel-bdev-logical-size.m4 \
++	$(top_srcdir)/config/kernel-bdi-setup-and-register.m4 \
++	$(top_srcdir)/config/kernel-bdi.m4 \
++	$(top_srcdir)/config/kernel-bio-empty-barrier.m4 \
++	$(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \
++	$(top_srcdir)/config/kernel-bio-failfast.m4 \
++	$(top_srcdir)/config/kernel-bio-rw-syncio.m4 \
++	$(top_srcdir)/config/kernel-blk-end-request.m4 \
++	$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-discard.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
++	$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-sectors.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get.m4 \
++	$(top_srcdir)/config/kernel-check-disk-size-change.m4 \
++	$(top_srcdir)/config/kernel-clear-inode.m4 \
++	$(top_srcdir)/config/kernel-commit-metadata.m4 \
++	$(top_srcdir)/config/kernel-create-nameidata.m4 \
++	$(top_srcdir)/config/kernel-d-make-root.m4 \
++	$(top_srcdir)/config/kernel-d-obtain-alias.m4 \
++	$(top_srcdir)/config/kernel-discard-granularity.m4 \
++	$(top_srcdir)/config/kernel-elevator-change.m4 \
++	$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
++	$(top_srcdir)/config/kernel-evict-inode.m4 \
++	$(top_srcdir)/config/kernel-fallocate.m4 \
++	$(top_srcdir)/config/kernel-fmode-t.m4 \
++	$(top_srcdir)/config/kernel-fsync.m4 \
++	$(top_srcdir)/config/kernel-get-disk-ro.m4 \
++	$(top_srcdir)/config/kernel-get-gendisk.m4 \
++	$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
++	$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
++	$(top_srcdir)/config/kernel-kobj-name-len.m4 \
++	$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
++	$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
++	$(top_srcdir)/config/kernel-mount-nodev.m4 \
++	$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
++	$(top_srcdir)/config/kernel-rq-for-each_segment.m4 \
++	$(top_srcdir)/config/kernel-rq-is_sync.m4 \
++	$(top_srcdir)/config/kernel-security-inode-init.m4 \
++	$(top_srcdir)/config/kernel-set-nlink.m4 \
++	$(top_srcdir)/config/kernel-sget-args.m4 \
++	$(top_srcdir)/config/kernel-show-options.m4 \
++	$(top_srcdir)/config/kernel-shrink.m4 \
++	$(top_srcdir)/config/kernel-truncate-range.m4 \
++	$(top_srcdir)/config/kernel-truncate-setsize.m4 \
++	$(top_srcdir)/config/kernel-xattr-handler.m4 \
++	$(top_srcdir)/config/kernel.m4 \
++	$(top_srcdir)/config/user-arch.m4 \
++	$(top_srcdir)/config/user-frame-larger-than.m4 \
++	$(top_srcdir)/config/user-ioctl.m4 \
++	$(top_srcdir)/config/user-libblkid.m4 \
++	$(top_srcdir)/config/user-libuuid.m4 \
++	$(top_srcdir)/config/user-nptl_guard_within_stack.m4 \
++	$(top_srcdir)/config/user-selinux.m4 \
++	$(top_srcdir)/config/user-udev.m4 \
++	$(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \
++	$(top_srcdir)/config/zfs-build.m4 \
++	$(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++mkinstalldirs = $(install_sh) -d
++CONFIG_HEADER = $(top_builddir)/zfs_config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++AM_V_GEN = $(am__v_GEN_@AM_V@)
++am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
++am__v_GEN_0 = @echo "  GEN   " $@;
++AM_V_at = $(am__v_at_@AM_V@)
++am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
++am__v_at_0 = @
++SOURCES =
++DIST_SOURCES =
++am__can_run_installinfo = \
++  case $$AM_UPDATE_INFO_DIR in \
++    n|no|NO) false;; \
++    *) (install-info --version) >/dev/null 2>&1;; \
++  esac
++am__kernel_HEADERS_DIST = $(top_srcdir)/include/linux/dcache_compat.h \
++	$(top_srcdir)/include/linux/xattr_compat.h \
++	$(top_srcdir)/include/linux/vfs_compat.h \
++	$(top_srcdir)/include/linux/blkdev_compat.h
++am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
++am__vpath_adj = case $$p in \
++    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
++    *) f=$$p;; \
++  esac;
++am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
++am__install_max = 40
++am__nobase_strip_setup = \
++  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
++am__nobase_strip = \
++  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
++am__nobase_list = $(am__nobase_strip_setup); \
++  for p in $$list; do echo "$$p $$p"; done | \
++  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
++  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
++    if (++n[$$2] == $(am__install_max)) \
++      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
++    END { for (dir in files) print dir, files[dir] }'
++am__base_list = \
++  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
++  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
++am__uninstall_files_from_dir = { \
++  test -z "$$files" \
++    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
++    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
++         $(am__cd) "$$dir" && rm -f $$files; }; \
++  }
++am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
++HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
++ETAGS = etags
++CTAGS = ctags
++DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
++ACLOCAL = @ACLOCAL@
++ALIEN = @ALIEN@
++ALIEN_VERSION = @ALIEN_VERSION@
++AMTAR = @AMTAR@
++AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
++AR = @AR@
++AUTOCONF = @AUTOCONF@
++AUTOHEADER = @AUTOHEADER@
++AUTOMAKE = @AUTOMAKE@
++AWK = @AWK@
++CC = @CC@
++CCAS = @CCAS@
++CCASDEPMODE = @CCASDEPMODE@
++CCASFLAGS = @CCASFLAGS@
++CCDEPMODE = @CCDEPMODE@
++CFLAGS = @CFLAGS@
++CPP = @CPP@
++CPPFLAGS = @CPPFLAGS@
++CYGPATH_W = @CYGPATH_W@
++DEBUG_CFLAGS = @DEBUG_CFLAGS@
++DEBUG_DMU_TX = @DEBUG_DMU_TX@
++DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
++DEBUG_ZFS = @DEBUG_ZFS@
++DEFAULT_INIT_DIR = @DEFAULT_INIT_DIR@
++DEFAULT_INIT_SCRIPT = @DEFAULT_INIT_SCRIPT@
++DEFAULT_PACKAGE = @DEFAULT_PACKAGE@
++DEFS = @DEFS@
++DEPDIR = @DEPDIR@
++DLLTOOL = @DLLTOOL@
++DPKG = @DPKG@
++DPKGBUILD = @DPKGBUILD@
++DPKGBUILD_VERSION = @DPKGBUILD_VERSION@
++DPKG_VERSION = @DPKG_VERSION@
++DSYMUTIL = @DSYMUTIL@
++DUMPBIN = @DUMPBIN@
++ECHO_C = @ECHO_C@
++ECHO_N = @ECHO_N@
++ECHO_T = @ECHO_T@
++EGREP = @EGREP@
++EXEEXT = @EXEEXT@
++FGREP = @FGREP@
++FRAME_LARGER_THAN = @FRAME_LARGER_THAN@
++GREP = @GREP@
++HAVE_ALIEN = @HAVE_ALIEN@
++HAVE_DPKG = @HAVE_DPKG@
++HAVE_DPKGBUILD = @HAVE_DPKGBUILD@
++HAVE_MAKEPKG = @HAVE_MAKEPKG@
++HAVE_PACMAN = @HAVE_PACMAN@
++HAVE_RPM = @HAVE_RPM@
++HAVE_RPMBUILD = @HAVE_RPMBUILD@
++INSTALL = @INSTALL@
++INSTALL_DATA = @INSTALL_DATA@
++INSTALL_PROGRAM = @INSTALL_PROGRAM@
++INSTALL_SCRIPT = @INSTALL_SCRIPT@
++INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
++KERNELCPPFLAGS = @KERNELCPPFLAGS@
++KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
++LD = @LD@
++LDFLAGS = @LDFLAGS@
++LIBBLKID = @LIBBLKID@
++LIBOBJS = @LIBOBJS@
++LIBS = @LIBS@
++LIBSELINUX = @LIBSELINUX@
++LIBTOOL = @LIBTOOL@
++LIBUUID = @LIBUUID@
++LINUX = @LINUX@
++LINUX_OBJ = @LINUX_OBJ@
++LINUX_SYMBOLS = @LINUX_SYMBOLS@
++LINUX_VERSION = @LINUX_VERSION@
++LIPO = @LIPO@
++LN_S = @LN_S@
++LTLIBOBJS = @LTLIBOBJS@
++MAINT = @MAINT@
++MAKEINFO = @MAKEINFO@
++MAKEPKG = @MAKEPKG@
++MAKEPKG_VERSION = @MAKEPKG_VERSION@
++MANIFEST_TOOL = @MANIFEST_TOOL@
++MKDIR_P = @MKDIR_P@
++NM = @NM@
++NMEDIT = @NMEDIT@
++NO_UNUSED_BUT_SET_VARIABLE = @NO_UNUSED_BUT_SET_VARIABLE@
++OBJDUMP = @OBJDUMP@
++OBJEXT = @OBJEXT@
++OTOOL = @OTOOL@
++OTOOL64 = @OTOOL64@
++PACKAGE = @PACKAGE@
++PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
++PACKAGE_NAME = @PACKAGE_NAME@
++PACKAGE_STRING = @PACKAGE_STRING@
++PACKAGE_TARNAME = @PACKAGE_TARNAME@
++PACKAGE_URL = @PACKAGE_URL@
++PACKAGE_VERSION = @PACKAGE_VERSION@
++PACMAN = @PACMAN@
++PACMAN_VERSION = @PACMAN_VERSION@
++PATH_SEPARATOR = @PATH_SEPARATOR@
++RANLIB = @RANLIB@
++RPM = @RPM@
++RPMBUILD = @RPMBUILD@
++RPMBUILD_VERSION = @RPMBUILD_VERSION@
++RPM_VERSION = @RPM_VERSION@
++SED = @SED@
++SET_MAKE = @SET_MAKE@
++SHELL = @SHELL@
++SPL = @SPL@
++SPL_OBJ = @SPL_OBJ@
++SPL_SYMBOLS = @SPL_SYMBOLS@
++SPL_VERSION = @SPL_VERSION@
++STRIP = @STRIP@
++TARGET_ASM_DIR = @TARGET_ASM_DIR@
++VENDOR = @VENDOR@
++VERSION = @VERSION@
++ZFS_CONFIG = @ZFS_CONFIG@
++ZFS_META_ALIAS = @ZFS_META_ALIAS@
++ZFS_META_AUTHOR = @ZFS_META_AUTHOR@
++ZFS_META_DATA = @ZFS_META_DATA@
++ZFS_META_LICENSE = @ZFS_META_LICENSE@
++ZFS_META_LT_AGE = @ZFS_META_LT_AGE@
++ZFS_META_LT_CURRENT = @ZFS_META_LT_CURRENT@
++ZFS_META_LT_REVISION = @ZFS_META_LT_REVISION@
++ZFS_META_NAME = @ZFS_META_NAME@
++ZFS_META_RELEASE = @ZFS_META_RELEASE@
++ZFS_META_VERSION = @ZFS_META_VERSION@
++ZLIB = @ZLIB@
++abs_builddir = @abs_builddir@
++abs_srcdir = @abs_srcdir@
++abs_top_builddir = @abs_top_builddir@
++abs_top_srcdir = @abs_top_srcdir@
++ac_ct_AR = @ac_ct_AR@
++ac_ct_CC = @ac_ct_CC@
++ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
++am__include = @am__include@
++am__leading_dot = @am__leading_dot@
++am__quote = @am__quote@
++am__tar = @am__tar@
++am__untar = @am__untar@
++bindir = @bindir@
++build = @build@
++build_alias = @build_alias@
++build_cpu = @build_cpu@
++build_os = @build_os@
++build_vendor = @build_vendor@
++builddir = @builddir@
++datadir = @datadir@
++datarootdir = @datarootdir@
++docdir = @docdir@
++dvidir = @dvidir@
++exec_prefix = @exec_prefix@
++host = @host@
++host_alias = @host_alias@
++host_cpu = @host_cpu@
++host_os = @host_os@
++host_vendor = @host_vendor@
++htmldir = @htmldir@
++includedir = @includedir@
++infodir = @infodir@
++install_sh = @install_sh@
++libdir = @libdir@
++libexecdir = @libexecdir@
++localedir = @localedir@
++localstatedir = @localstatedir@
++mandir = @mandir@
++mkdir_p = @mkdir_p@
++oldincludedir = @oldincludedir@
++pdfdir = @pdfdir@
++prefix = @prefix@
++program_transform_name = @program_transform_name@
++psdir = @psdir@
++sbindir = @sbindir@
++sharedstatedir = @sharedstatedir@
++srcdir = @srcdir@
++sysconfdir = @sysconfdir@
++target = @target@
++target_alias = @target_alias@
++target_cpu = @target_cpu@
++target_os = @target_os@
++target_vendor = @target_vendor@
++top_build_prefix = @top_build_prefix@
++top_builddir = @top_builddir@
++top_srcdir = @top_srcdir@
++udevdir = @udevdir@
++udevruledir = @udevruledir@
++COMMON_H = 
++KERNEL_H = \
++	$(top_srcdir)/include/linux/dcache_compat.h \
++	$(top_srcdir)/include/linux/xattr_compat.h \
++	$(top_srcdir)/include/linux/vfs_compat.h \
++	$(top_srcdir)/include/linux/blkdev_compat.h
++
++USER_H = 
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++@CONFIG_USER_TRUE@libzfsdir = $(includedir)/libzfs/linux
++@CONFIG_USER_TRUE@libzfs_HEADERS = $(COMMON_H) $(USER_H)
++@CONFIG_KERNEL_TRUE@kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/linux
++@CONFIG_KERNEL_TRUE@kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++all: all-am
++
++.SUFFIXES:
++$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
++	@for dep in $?; do \
++	  case '$(am__configure_deps)' in \
++	    *$$dep*) \
++	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
++	        && { if test -f $@; then exit 0; else break; fi; }; \
++	      exit 1;; \
++	  esac; \
++	done; \
++	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/linux/Makefile'; \
++	$(am__cd) $(top_srcdir) && \
++	  $(AUTOMAKE) --gnu include/linux/Makefile
++.PRECIOUS: Makefile
++Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
++	@case '$?' in \
++	  *config.status*) \
++	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
++	  *) \
++	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
++	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
++	esac;
++
++$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++
++$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(am__aclocal_m4_deps):
++
++mostlyclean-libtool:
++	-rm -f *.lo
++
++clean-libtool:
++	-rm -rf .libs _libs
++install-kernelHEADERS: $(kernel_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(kerneldir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(kerneldir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(kerneldir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(kerneldir)" || exit $$?; \
++	done
++
++uninstall-kernelHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(kerneldir)'; $(am__uninstall_files_from_dir)
++install-libzfsHEADERS: $(libzfs_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(libzfsdir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(libzfsdir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libzfsdir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(libzfsdir)" || exit $$?; \
++	done
++
++uninstall-libzfsHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(libzfsdir)'; $(am__uninstall_files_from_dir)
++
++ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
++	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	mkid -fID $$unique
++tags: TAGS
++
++TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	set x; \
++	here=`pwd`; \
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	shift; \
++	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
++	  test -n "$$unique" || unique=$$empty_fix; \
++	  if test $$# -gt 0; then \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      "$$@" $$unique; \
++	  else \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      $$unique; \
++	  fi; \
++	fi
++ctags: CTAGS
++CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	test -z "$(CTAGS_ARGS)$$unique" \
++	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
++	     $$unique
++
++GTAGS:
++	here=`$(am__cd) $(top_builddir) && pwd` \
++	  && $(am__cd) $(top_srcdir) \
++	  && gtags -i $(GTAGS_ARGS) "$$here"
++
++distclean-tags:
++	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
++
++distdir: $(DISTFILES)
++	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	list='$(DISTFILES)'; \
++	  dist_files=`for file in $$list; do echo $$file; done | \
++	  sed -e "s|^$$srcdirstrip/||;t" \
++	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
++	case $$dist_files in \
++	  */*) $(MKDIR_P) `echo "$$dist_files" | \
++			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
++			   sort -u` ;; \
++	esac; \
++	for file in $$dist_files; do \
++	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
++	  if test -d $$d/$$file; then \
++	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
++	    if test -d "$(distdir)/$$file"; then \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
++	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
++	  else \
++	    test -f "$(distdir)/$$file" \
++	    || cp -p $$d/$$file "$(distdir)/$$file" \
++	    || exit 1; \
++	  fi; \
++	done
++check-am: all-am
++check: check-am
++all-am: Makefile $(HEADERS)
++installdirs:
++	for dir in "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"; do \
++	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
++	done
++install: install-am
++install-exec: install-exec-am
++install-data: install-data-am
++uninstall: uninstall-am
++
++install-am: all-am
++	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
++
++installcheck: installcheck-am
++install-strip:
++	if test -z '$(STRIP)'; then \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	      install; \
++	else \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
++	fi
++mostlyclean-generic:
++
++clean-generic:
++
++distclean-generic:
++	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
++	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
++
++maintainer-clean-generic:
++	@echo "This command is intended for maintainers to use"
++	@echo "it deletes files that may require special tools to rebuild."
++clean: clean-am
++
++clean-am: clean-generic clean-libtool mostlyclean-am
++
++distclean: distclean-am
++	-rm -f Makefile
++distclean-am: clean-am distclean-generic distclean-tags
++
++dvi: dvi-am
++
++dvi-am:
++
++html: html-am
++
++html-am:
++
++info: info-am
++
++info-am:
++
++install-data-am: install-kernelHEADERS install-libzfsHEADERS
++
++install-dvi: install-dvi-am
++
++install-dvi-am:
++
++install-exec-am:
++
++install-html: install-html-am
++
++install-html-am:
++
++install-info: install-info-am
++
++install-info-am:
++
++install-man:
++
++install-pdf: install-pdf-am
++
++install-pdf-am:
++
++install-ps: install-ps-am
++
++install-ps-am:
++
++installcheck-am:
++
++maintainer-clean: maintainer-clean-am
++	-rm -f Makefile
++maintainer-clean-am: distclean-am maintainer-clean-generic
++
++mostlyclean: mostlyclean-am
++
++mostlyclean-am: mostlyclean-generic mostlyclean-libtool
++
++pdf: pdf-am
++
++pdf-am:
++
++ps: ps-am
++
++ps-am:
++
++uninstall-am: uninstall-kernelHEADERS uninstall-libzfsHEADERS
++
++.MAKE: install-am install-strip
++
++.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
++	clean-libtool ctags distclean distclean-generic \
++	distclean-libtool distclean-tags distdir dvi dvi-am html \
++	html-am info info-am install install-am install-data \
++	install-data-am install-dvi install-dvi-am install-exec \
++	install-exec-am install-html install-html-am install-info \
++	install-info-am install-kernelHEADERS install-libzfsHEADERS \
++	install-man install-pdf install-pdf-am install-ps \
++	install-ps-am install-strip installcheck installcheck-am \
++	installdirs maintainer-clean maintainer-clean-generic \
++	mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
++	ps ps-am tags uninstall uninstall-am uninstall-kernelHEADERS \
++	uninstall-libzfsHEADERS
++
++
++# Tell versions [3.59,3.63) of GNU make to not export all variables.
++# Otherwise a system limit (for SysV at least) may be exceeded.
++.NOEXPORT:
+diff -uNr linux-3.2.33-go.orig/include/zfs/linux/vfs_compat.h linux-3.2.33-go/include/zfs/linux/vfs_compat.h
+--- linux-3.2.33-go.orig/include/zfs/linux/vfs_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/linux/vfs_compat.h	2012-11-16 23:25:34.345039382 +0100
+@@ -0,0 +1,144 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
++ */
++
++#ifndef _ZFS_VFS_H
++#define _ZFS_VFS_H
++
++/*
++ * 2.6.28 API change,
++ * Added insert_inode_locked() helper function, prior to this most callers
++ * used insert_inode_hash().  The older method doesn't check for collisions
++ * in the inode_hashtable but it still acceptible for use.
++ */
++#ifndef HAVE_INSERT_INODE_LOCKED
++static inline int
++insert_inode_locked(struct inode *ip)
++{
++	insert_inode_hash(ip);
++	return (0);
++}
++#endif /* HAVE_INSERT_INODE_LOCKED */
++
++/*
++ * 2.6.35 API change,
++ * Add truncate_setsize() if it is not exported by the Linux kernel.
++ *
++ * Truncate the inode and pages associated with the inode. The pages are
++ * unmapped and removed from cache.
++ */
++#ifndef HAVE_TRUNCATE_SETSIZE
++static inline void
++truncate_setsize(struct inode *ip, loff_t new)
++{
++	struct address_space *mapping = ip->i_mapping;
++
++	i_size_write(ip, new);
++
++	unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
++	truncate_inode_pages(mapping, new);
++	unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
++}
++#endif /* HAVE_TRUNCATE_SETSIZE */
++
++#if defined(HAVE_BDI) && !defined(HAVE_BDI_SETUP_AND_REGISTER)
++/*
++ * 2.6.34 API change,
++ * Add bdi_setup_and_register() function if not yet provided by kernel.
++ * It is used to quickly initialize and register a BDI for the filesystem.
++ */
++extern atomic_long_t zfs_bdi_seq;
++
++static inline int
++bdi_setup_and_register(struct backing_dev_info *bdi,char *name,unsigned int cap)
++{
++	char tmp[32];
++	int error;
++
++	bdi->name = name;
++	bdi->capabilities = cap;
++	error = bdi_init(bdi);
++	if (error)
++		return (error);
++
++	sprintf(tmp, "%.28s%s", name, "-%d");
++	error = bdi_register(bdi, NULL, tmp,
++	    atomic_long_inc_return(&zfs_bdi_seq));
++	if (error) {
++		bdi_destroy(bdi);
++		return (error);
++	}
++
++	return (error);
++}
++#endif /* HAVE_BDI && !HAVE_BDI_SETUP_AND_REGISTER */
++
++/*
++ * 3.2-rc1 API change,
++ * Add set_nlink() if it is not exported by the Linux kernel.
++ *
++ * i_nlink is read-only in Linux 3.2, but it can be set directly in
++ * earlier kernels.
++ */
++#ifndef HAVE_SET_NLINK
++static inline void
++set_nlink(struct inode *inode, unsigned int nlink)
++{
++	inode->i_nlink = nlink;
++}
++#endif /* HAVE_SET_NLINK */
++
++/*
++ * 3.3 API change,
++ * The VFS .create, .mkdir and .mknod callbacks were updated to take a
++ * umode_t type rather than an int.  To cleanly handle both definitions
++ * the zpl_umode_t type is introduced and set accordingly.
++ */
++#ifdef HAVE_MKDIR_UMODE_T
++typedef	umode_t		zpl_umode_t;
++#else
++typedef	int		zpl_umode_t;
++#endif
++
++/*
++ * 3.5 API change,
++ * The clear_inode() function replaces end_writeback() and introduces an
++ * ordering change regarding when the inode_sync_wait() occurs.  See the
++ * configure check in config/kernel-clear-inode.m4 for full details.
++ */
++#if defined(HAVE_EVICT_INODE) && !defined(HAVE_CLEAR_INODE)
++#define clear_inode(ip)		end_writeback(ip)
++#endif /* HAVE_EVICT_INODE && !HAVE_CLEAR_INODE */
++
++/*
++ * 3.6 API change,
++ * The sget() helper function now takes the mount flags as an argument.
++ */
++#ifdef HAVE_5ARG_SGET
++#define zpl_sget(type, cmp, set, fl, mtd)	sget(type, cmp, set, fl, mtd)
++#else
++#define zpl_sget(type, cmp, set, fl, mtd)	sget(type, cmp, set, mtd)
++#endif /* HAVE_5ARG_SGET */
++
++#endif /* _ZFS_VFS_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/linux/xattr_compat.h linux-3.2.33-go/include/zfs/linux/xattr_compat.h
+--- linux-3.2.33-go.orig/include/zfs/linux/xattr_compat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/linux/xattr_compat.h	2012-11-16 23:25:34.345039382 +0100
+@@ -0,0 +1,95 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
++ */
++
++#ifndef _ZFS_XATTR_H
++#define _ZFS_XATTR_H
++
++/*
++ * 2.6.35 API change,
++ * The const keyword was added to the 'struct xattr_handler' in the
++ * generic Linux super_block structure.  To handle this we define an
++ * appropriate xattr_handler_t typedef which can be used.  This was
++ * the preferred solution because it keeps the code clean and readable.
++ */
++#ifdef HAVE_CONST_XATTR_HANDLER
++typedef const struct xattr_handler	xattr_handler_t;
++#else
++typedef struct xattr_handler		xattr_handler_t;
++#endif
++
++/*
++ * 2.6.33 API change,
++ * The xattr_hander->get() callback was changed to take a dentry
++ * instead of an inode, and a handler_flags argument was added.
++ */
++#ifdef HAVE_DENTRY_XATTR_GET
++#define ZPL_XATTR_GET_WRAPPER(fn)					\
++static int								\
++fn(struct dentry *dentry, const char *name, void *buffer, size_t size,	\
++    int unused_handler_flags)						\
++{									\
++	return __ ## fn(dentry->d_inode, name, buffer, size);		\
++}
++#else
++#define ZPL_XATTR_GET_WRAPPER(fn)					\
++static int								\
++fn(struct inode *ip, const char *name, void *buffer, size_t size)	\
++{									\
++	return __ ## fn(ip, name, buffer, size);			\
++}
++#endif /* HAVE_DENTRY_XATTR_GET */
++
++/*
++ * 2.6.33 API change,
++ * The xattr_hander->set() callback was changed to take a dentry
++ * instead of an inode, and a handler_flags argument was added.
++ */
++#ifdef HAVE_DENTRY_XATTR_SET
++#define ZPL_XATTR_SET_WRAPPER(fn)					\
++static int								\
++fn(struct dentry *dentry, const char *name, const void *buffer,		\
++    size_t size, int flags, int unused_handler_flags)			\
++{									\
++	return __ ## fn(dentry->d_inode, name, buffer, size, flags);	\
++}
++#else
++#define ZPL_XATTR_SET_WRAPPER(fn)					\
++static int								\
++fn(struct inode *ip, const char *name, const void *buffer,		\
++    size_t size, int flags)						\
++{									\
++	return __ ## fn(ip, name, buffer, size, flags);			\
++}
++#endif /* HAVE_DENTRY_XATTR_SET */
++
++#ifdef HAVE_6ARGS_SECURITY_INODE_INIT_SECURITY
++#define zpl_security_inode_init_security(ip, dip, qstr, nm, val, len)	\
++	security_inode_init_security(ip, dip, qstr, nm, val, len)
++#else
++#define zpl_security_inode_init_security(ip, dip, qstr, nm, val, len)	\
++	security_inode_init_security(ip, dip, nm, val, len)
++#endif /* HAVE_6ARGS_SECURITY_INODE_INIT_SECURITY */
++
++#endif /* _ZFS_XATTR_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/Makefile linux-3.2.33-go/include/zfs/Makefile
+--- linux-3.2.33-go.orig/include/zfs/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/Makefile	2012-11-16 23:25:34.344039393 +0100
+@@ -0,0 +1,841 @@
++# Makefile.in generated by automake 1.11.6 from Makefile.am.
++# include/Makefile.  Generated from Makefile.in by configure.
++
++# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
++# Foundation, Inc.
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++
++
++
++am__make_dryrun = \
++  { \
++    am__dry=no; \
++    case $$MAKEFLAGS in \
++      *\\[\ \	]*) \
++        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
++          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
++      *) \
++        for am__flg in $$MAKEFLAGS; do \
++          case $$am__flg in \
++            *=*|--*) ;; \
++            *n*) am__dry=yes; break;; \
++          esac; \
++        done;; \
++    esac; \
++    test $$am__dry = yes; \
++  }
++pkgdatadir = $(datadir)/zfs
++pkgincludedir = $(includedir)/zfs
++pkglibdir = $(libdir)/zfs
++pkglibexecdir = $(libexecdir)/zfs
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = x86_64-unknown-linux-gnu
++host_triplet = x86_64-unknown-linux-gnu
++target_triplet = x86_64-unknown-linux-gnu
++subdir = include
++DIST_COMMON = $(am__kernel_HEADERS_DIST) $(am__libzfs_HEADERS_DIST) \
++	$(srcdir)/Makefile.am $(srcdir)/Makefile.in
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps =  \
++	$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
++	$(top_srcdir)/config/kernel-automount.m4 \
++	$(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \
++	$(top_srcdir)/config/kernel-bdev-logical-size.m4 \
++	$(top_srcdir)/config/kernel-bdi-setup-and-register.m4 \
++	$(top_srcdir)/config/kernel-bdi.m4 \
++	$(top_srcdir)/config/kernel-bio-empty-barrier.m4 \
++	$(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \
++	$(top_srcdir)/config/kernel-bio-failfast.m4 \
++	$(top_srcdir)/config/kernel-bio-rw-syncio.m4 \
++	$(top_srcdir)/config/kernel-blk-end-request.m4 \
++	$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-discard.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
++	$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-sectors.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get.m4 \
++	$(top_srcdir)/config/kernel-check-disk-size-change.m4 \
++	$(top_srcdir)/config/kernel-clear-inode.m4 \
++	$(top_srcdir)/config/kernel-commit-metadata.m4 \
++	$(top_srcdir)/config/kernel-create-nameidata.m4 \
++	$(top_srcdir)/config/kernel-d-make-root.m4 \
++	$(top_srcdir)/config/kernel-d-obtain-alias.m4 \
++	$(top_srcdir)/config/kernel-discard-granularity.m4 \
++	$(top_srcdir)/config/kernel-elevator-change.m4 \
++	$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
++	$(top_srcdir)/config/kernel-evict-inode.m4 \
++	$(top_srcdir)/config/kernel-fallocate.m4 \
++	$(top_srcdir)/config/kernel-fmode-t.m4 \
++	$(top_srcdir)/config/kernel-fsync.m4 \
++	$(top_srcdir)/config/kernel-get-disk-ro.m4 \
++	$(top_srcdir)/config/kernel-get-gendisk.m4 \
++	$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
++	$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
++	$(top_srcdir)/config/kernel-kobj-name-len.m4 \
++	$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
++	$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
++	$(top_srcdir)/config/kernel-mount-nodev.m4 \
++	$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
++	$(top_srcdir)/config/kernel-rq-for-each_segment.m4 \
++	$(top_srcdir)/config/kernel-rq-is_sync.m4 \
++	$(top_srcdir)/config/kernel-security-inode-init.m4 \
++	$(top_srcdir)/config/kernel-set-nlink.m4 \
++	$(top_srcdir)/config/kernel-sget-args.m4 \
++	$(top_srcdir)/config/kernel-show-options.m4 \
++	$(top_srcdir)/config/kernel-shrink.m4 \
++	$(top_srcdir)/config/kernel-truncate-range.m4 \
++	$(top_srcdir)/config/kernel-truncate-setsize.m4 \
++	$(top_srcdir)/config/kernel-xattr-handler.m4 \
++	$(top_srcdir)/config/kernel.m4 \
++	$(top_srcdir)/config/user-arch.m4 \
++	$(top_srcdir)/config/user-frame-larger-than.m4 \
++	$(top_srcdir)/config/user-ioctl.m4 \
++	$(top_srcdir)/config/user-libblkid.m4 \
++	$(top_srcdir)/config/user-libuuid.m4 \
++	$(top_srcdir)/config/user-nptl_guard_within_stack.m4 \
++	$(top_srcdir)/config/user-selinux.m4 \
++	$(top_srcdir)/config/user-udev.m4 \
++	$(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \
++	$(top_srcdir)/config/zfs-build.m4 \
++	$(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++mkinstalldirs = $(install_sh) -d
++CONFIG_HEADER = $(top_builddir)/zfs_config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++AM_V_GEN = $(am__v_GEN_$(V))
++am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY))
++am__v_GEN_0 = @echo "  GEN   " $@;
++AM_V_at = $(am__v_at_$(V))
++am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY))
++am__v_at_0 = @
++SOURCES =
++DIST_SOURCES =
++RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
++	html-recursive info-recursive install-data-recursive \
++	install-dvi-recursive install-exec-recursive \
++	install-html-recursive install-info-recursive \
++	install-pdf-recursive install-ps-recursive install-recursive \
++	installcheck-recursive installdirs-recursive pdf-recursive \
++	ps-recursive uninstall-recursive
++am__can_run_installinfo = \
++  case $$AM_UPDATE_INFO_DIR in \
++    n|no|NO) false;; \
++    *) (install-info --version) >/dev/null 2>&1;; \
++  esac
++am__kernel_HEADERS_DIST = $(top_srcdir)/include/zfs_comutil.h \
++	$(top_srcdir)/include/zfs_deleg.h \
++	$(top_srcdir)/include/zfs_fletcher.h \
++	$(top_srcdir)/include/zfs_namecheck.h \
++	$(top_srcdir)/include/zfs_prop.h \
++	$(top_srcdir)/include/zpios-ctl.h \
++	$(top_srcdir)/include/zpios-internal.h
++am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
++am__vpath_adj = case $$p in \
++    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
++    *) f=$$p;; \
++  esac;
++am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
++am__install_max = 40
++am__nobase_strip_setup = \
++  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
++am__nobase_strip = \
++  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
++am__nobase_list = $(am__nobase_strip_setup); \
++  for p in $$list; do echo "$$p $$p"; done | \
++  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
++  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
++    if (++n[$$2] == $(am__install_max)) \
++      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
++    END { for (dir in files) print dir, files[dir] }'
++am__base_list = \
++  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
++  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
++am__uninstall_files_from_dir = { \
++  test -z "$$files" \
++    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
++    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
++         $(am__cd) "$$dir" && rm -f $$files; }; \
++  }
++am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
++am__libzfs_HEADERS_DIST = $(top_srcdir)/include/zfs_comutil.h \
++	$(top_srcdir)/include/zfs_deleg.h \
++	$(top_srcdir)/include/zfs_fletcher.h \
++	$(top_srcdir)/include/zfs_namecheck.h \
++	$(top_srcdir)/include/zfs_prop.h \
++	$(top_srcdir)/include/zpios-ctl.h \
++	$(top_srcdir)/include/libnvpair.h \
++	$(top_srcdir)/include/libuutil_common.h \
++	$(top_srcdir)/include/libuutil.h \
++	$(top_srcdir)/include/libuutil_impl.h \
++	$(top_srcdir)/include/libzfs.h \
++	$(top_srcdir)/include/libzfs_impl.h
++HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
++RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
++  distclean-recursive maintainer-clean-recursive
++AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
++	$(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
++	distdir
++ETAGS = etags
++CTAGS = ctags
++DIST_SUBDIRS = $(SUBDIRS)
++DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
++am__relativize = \
++  dir0=`pwd`; \
++  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
++  sed_rest='s,^[^/]*/*,,'; \
++  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
++  sed_butlast='s,/*[^/]*$$,,'; \
++  while test -n "$$dir1"; do \
++    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
++    if test "$$first" != "."; then \
++      if test "$$first" = ".."; then \
++        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
++        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
++      else \
++        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
++        if test "$$first2" = "$$first"; then \
++          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
++        else \
++          dir2="../$$dir2"; \
++        fi; \
++        dir0="$$dir0"/"$$first"; \
++      fi; \
++    fi; \
++    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
++  done; \
++  reldir="$$dir2"
++ACLOCAL = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run aclocal-1.11
++ALIEN = alien
++ALIEN_VERSION = 
++AMTAR = $${TAR-tar}
++AM_DEFAULT_VERBOSITY = 1
++AR = ar
++AUTOCONF = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run autoconf
++AUTOHEADER = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run autoheader
++AUTOMAKE = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run automake-1.11
++AWK = gawk
++CC = gcc
++CCAS = gcc
++CCASDEPMODE = depmode=gcc3
++CCASFLAGS = -g -O2
++CCDEPMODE = depmode=gcc3
++CFLAGS = -g -O2
++CPP = gcc -E
++CPPFLAGS = 
++CYGPATH_W = echo
++DEBUG_CFLAGS = -DNDEBUG
++DEBUG_DMU_TX = _without_debug_dmu_tx
++DEBUG_STACKFLAGS = 
++DEBUG_ZFS = _without_debug
++DEFAULT_INIT_DIR = ${prefix}/etc/init.d
++DEFAULT_INIT_SCRIPT = gentoo
++DEFAULT_PACKAGE = tgz
++DEFS = -DHAVE_CONFIG_H
++DEPDIR = .deps
++DLLTOOL = false
++DPKG = dpkg
++DPKGBUILD = dpkg-buildpackage
++DPKGBUILD_VERSION = 
++DPKG_VERSION = 
++DSYMUTIL = 
++DUMPBIN = 
++ECHO_C = 
++ECHO_N = -n
++ECHO_T = 
++EGREP = /bin/grep -E
++EXEEXT = 
++FGREP = /bin/grep -F
++FRAME_LARGER_THAN = -Wframe-larger-than=1024
++GREP = /bin/grep
++HAVE_ALIEN = no
++HAVE_DPKG = no
++HAVE_DPKGBUILD = no
++HAVE_MAKEPKG = 
++HAVE_PACMAN = 
++HAVE_RPM = yes
++HAVE_RPMBUILD = yes
++INSTALL = /usr/bin/install -c
++INSTALL_DATA = ${INSTALL} -m 644
++INSTALL_PROGRAM = ${INSTALL}
++INSTALL_SCRIPT = ${INSTALL}
++INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
++KERNELCPPFLAGS =  -Wno-unused-but-set-variable -DHAVE_SPL -D_KERNEL -DTEXT_DOMAIN=\"zfs-linux-kernel\" -DNDEBUG 
++KERNELMAKE_PARAMS =  O=/usr/src/linux-3.6.0-sabayon
++LD = /usr/x86_64-pc-linux-gnu/bin/ld -m elf_x86_64
++LDFLAGS = 
++LIBBLKID = 
++LIBOBJS = 
++LIBS = -luuid -luuid -lz -lz -lz 
++LIBSELINUX = 
++LIBTOOL = $(SHELL) $(top_builddir)/libtool
++LIBUUID = -luuid
++LINUX = /usr/src/linux-3.2.33-go
++LINUX_OBJ = /usr/src/linux-3.6.0-sabayon
++LINUX_SYMBOLS = NONE
++LINUX_VERSION = 3.6.0-sabayon
++LIPO = 
++LN_S = ln -s
++LTLIBOBJS = 
++MAINT = #
++MAKEINFO = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run makeinfo
++MAKEPKG = 
++MAKEPKG_VERSION = 
++MANIFEST_TOOL = :
++MKDIR_P = /bin/mkdir -p
++NM = /usr/bin/nm -B
++NMEDIT = 
++NO_UNUSED_BUT_SET_VARIABLE = -Wno-unused-but-set-variable
++OBJDUMP = objdump
++OBJEXT = o
++OTOOL = 
++OTOOL64 = 
++PACKAGE = zfs
++PACKAGE_BUGREPORT = 
++PACKAGE_NAME = 
++PACKAGE_STRING = 
++PACKAGE_TARNAME = 
++PACKAGE_URL = 
++PACKAGE_VERSION = 
++PACMAN = 
++PACMAN_VERSION = 
++PATH_SEPARATOR = :
++RANLIB = ranlib
++RPM = rpm
++RPMBUILD = rpmbuild
++RPMBUILD_VERSION = 4.10.0
++RPM_VERSION = 4.10.0
++SED = /bin/sed
++SET_MAKE = 
++SHELL = /bin/sh
++SPL = /usr/src/linux-3.2.33-go
++SPL_OBJ = /usr/src/linux-3.2.33-go
++SPL_SYMBOLS = NONE
++SPL_VERSION = 0.6.0-rc12
++STRIP = strip
++TARGET_ASM_DIR = asm-x86_64
++VENDOR = gentoo
++VERSION = 0.6.0
++ZFS_CONFIG = all
++ZFS_META_ALIAS = zfs-0.6.0-rc12
++ZFS_META_AUTHOR = Sun Microsystems/Oracle, Lawrence Livermore National Laboratory
++ZFS_META_DATA = 
++ZFS_META_LICENSE = CDDL
++ZFS_META_LT_AGE = 
++ZFS_META_LT_CURRENT = 
++ZFS_META_LT_REVISION = 
++ZFS_META_NAME = zfs
++ZFS_META_RELEASE = rc12
++ZFS_META_VERSION = 0.6.0
++ZLIB = -lz
++abs_builddir = /root/zfs-0.6.0-rc12/include
++abs_srcdir = /root/zfs-0.6.0-rc12/include
++abs_top_builddir = /root/zfs-0.6.0-rc12
++abs_top_srcdir = /root/zfs-0.6.0-rc12
++ac_ct_AR = ar
++ac_ct_CC = gcc
++ac_ct_DUMPBIN = 
++am__include = include
++am__leading_dot = .
++am__quote = 
++am__tar = $${TAR-tar} chof - "$$tardir"
++am__untar = $${TAR-tar} xf -
++bindir = ${exec_prefix}/bin
++build = x86_64-unknown-linux-gnu
++build_alias = 
++build_cpu = x86_64
++build_os = linux-gnu
++build_vendor = unknown
++builddir = .
++datadir = ${datarootdir}
++datarootdir = ${prefix}/share
++docdir = ${datarootdir}/doc/${PACKAGE}
++dvidir = ${docdir}
++exec_prefix = ${prefix}
++host = x86_64-unknown-linux-gnu
++host_alias = 
++host_cpu = x86_64
++host_os = linux-gnu
++host_vendor = unknown
++htmldir = ${docdir}
++includedir = ${prefix}/include
++infodir = ${datarootdir}/info
++install_sh = ${SHELL} /root/zfs-0.6.0-rc12/config/install-sh
++libdir = ${exec_prefix}/lib
++libexecdir = ${exec_prefix}/libexec
++localedir = ${datarootdir}/locale
++localstatedir = ${prefix}/var
++mandir = ${datarootdir}/man
++mkdir_p = /bin/mkdir -p
++oldincludedir = /usr/include
++pdfdir = ${docdir}
++prefix = /usr/local
++program_transform_name = s,x,x,
++psdir = ${docdir}
++sbindir = ${exec_prefix}/sbin
++sharedstatedir = ${prefix}/com
++srcdir = .
++sysconfdir = ${prefix}/etc
++target = x86_64-unknown-linux-gnu
++target_alias = 
++target_cpu = x86_64
++target_os = linux-gnu
++target_vendor = unknown
++top_build_prefix = ../
++top_builddir = ..
++top_srcdir = ..
++udevdir = ${exec_prefix}/lib/udev
++udevruledir = ${udevdir}/rules.d
++SUBDIRS = linux sys
++COMMON_H = \
++	$(top_srcdir)/include/zfs_comutil.h \
++	$(top_srcdir)/include/zfs_deleg.h \
++	$(top_srcdir)/include/zfs_fletcher.h \
++	$(top_srcdir)/include/zfs_namecheck.h \
++	$(top_srcdir)/include/zfs_prop.h \
++	$(top_srcdir)/include/zpios-ctl.h
++
++KERNEL_H = \
++	$(top_srcdir)/include/zpios-internal.h
++
++USER_H = \
++	$(top_srcdir)/include/libnvpair.h \
++	$(top_srcdir)/include/libuutil_common.h \
++	$(top_srcdir)/include/libuutil.h \
++	$(top_srcdir)/include/libuutil_impl.h \
++	$(top_srcdir)/include/libzfs.h \
++	$(top_srcdir)/include/libzfs_impl.h
++
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++libzfsdir = $(includedir)/libzfs
++libzfs_HEADERS = $(COMMON_H) $(USER_H)
++#kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)
++#kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++all: all-recursive
++
++.SUFFIXES:
++$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am  $(am__configure_deps)
++	@for dep in $?; do \
++	  case '$(am__configure_deps)' in \
++	    *$$dep*) \
++	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
++	        && { if test -f $@; then exit 0; else break; fi; }; \
++	      exit 1;; \
++	  esac; \
++	done; \
++	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/Makefile'; \
++	$(am__cd) $(top_srcdir) && \
++	  $(AUTOMAKE) --gnu include/Makefile
++.PRECIOUS: Makefile
++Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
++	@case '$?' in \
++	  *config.status*) \
++	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
++	  *) \
++	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
++	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
++	esac;
++
++$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++
++$(top_srcdir)/configure: # $(am__configure_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(ACLOCAL_M4): # $(am__aclocal_m4_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(am__aclocal_m4_deps):
++
++mostlyclean-libtool:
++	-rm -f *.lo
++
++clean-libtool:
++	-rm -rf .libs _libs
++install-kernelHEADERS: $(kernel_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(kerneldir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(kerneldir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(kerneldir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(kerneldir)" || exit $$?; \
++	done
++
++uninstall-kernelHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(kerneldir)'; $(am__uninstall_files_from_dir)
++install-libzfsHEADERS: $(libzfs_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(libzfsdir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(libzfsdir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libzfsdir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(libzfsdir)" || exit $$?; \
++	done
++
++uninstall-libzfsHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(libzfsdir)'; $(am__uninstall_files_from_dir)
++
++# This directory's subdirectories are mostly independent; you can cd
++# into them and run `make' without going through this Makefile.
++# To change the values of `make' variables: instead of editing Makefiles,
++# (1) if the variable is set in `config.status', edit `config.status'
++#     (which will cause the Makefiles to be regenerated when you run `make');
++# (2) otherwise, pass the desired values on the `make' command line.
++$(RECURSIVE_TARGETS):
++	@fail= failcom='exit 1'; \
++	for f in x $$MAKEFLAGS; do \
++	  case $$f in \
++	    *=* | --[!k]*);; \
++	    *k*) failcom='fail=yes';; \
++	  esac; \
++	done; \
++	dot_seen=no; \
++	target=`echo $@ | sed s/-recursive//`; \
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  echo "Making $$target in $$subdir"; \
++	  if test "$$subdir" = "."; then \
++	    dot_seen=yes; \
++	    local_target="$$target-am"; \
++	  else \
++	    local_target="$$target"; \
++	  fi; \
++	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
++	  || eval $$failcom; \
++	done; \
++	if test "$$dot_seen" = "no"; then \
++	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
++	fi; test -z "$$fail"
++
++$(RECURSIVE_CLEAN_TARGETS):
++	@fail= failcom='exit 1'; \
++	for f in x $$MAKEFLAGS; do \
++	  case $$f in \
++	    *=* | --[!k]*);; \
++	    *k*) failcom='fail=yes';; \
++	  esac; \
++	done; \
++	dot_seen=no; \
++	case "$@" in \
++	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
++	  *) list='$(SUBDIRS)' ;; \
++	esac; \
++	rev=''; for subdir in $$list; do \
++	  if test "$$subdir" = "."; then :; else \
++	    rev="$$subdir $$rev"; \
++	  fi; \
++	done; \
++	rev="$$rev ."; \
++	target=`echo $@ | sed s/-recursive//`; \
++	for subdir in $$rev; do \
++	  echo "Making $$target in $$subdir"; \
++	  if test "$$subdir" = "."; then \
++	    local_target="$$target-am"; \
++	  else \
++	    local_target="$$target"; \
++	  fi; \
++	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
++	  || eval $$failcom; \
++	done && test -z "$$fail"
++tags-recursive:
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
++	done
++ctags-recursive:
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
++	done
++
++ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
++	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	mkid -fID $$unique
++tags: TAGS
++
++TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	set x; \
++	here=`pwd`; \
++	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
++	  include_option=--etags-include; \
++	  empty_fix=.; \
++	else \
++	  include_option=--include; \
++	  empty_fix=; \
++	fi; \
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  if test "$$subdir" = .; then :; else \
++	    test ! -f $$subdir/TAGS || \
++	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
++	  fi; \
++	done; \
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	shift; \
++	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
++	  test -n "$$unique" || unique=$$empty_fix; \
++	  if test $$# -gt 0; then \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      "$$@" $$unique; \
++	  else \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      $$unique; \
++	  fi; \
++	fi
++ctags: CTAGS
++CTAGS: ctags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	test -z "$(CTAGS_ARGS)$$unique" \
++	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
++	     $$unique
++
++GTAGS:
++	here=`$(am__cd) $(top_builddir) && pwd` \
++	  && $(am__cd) $(top_srcdir) \
++	  && gtags -i $(GTAGS_ARGS) "$$here"
++
++distclean-tags:
++	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
++
++distdir: $(DISTFILES)
++	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	list='$(DISTFILES)'; \
++	  dist_files=`for file in $$list; do echo $$file; done | \
++	  sed -e "s|^$$srcdirstrip/||;t" \
++	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
++	case $$dist_files in \
++	  */*) $(MKDIR_P) `echo "$$dist_files" | \
++			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
++			   sort -u` ;; \
++	esac; \
++	for file in $$dist_files; do \
++	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
++	  if test -d $$d/$$file; then \
++	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
++	    if test -d "$(distdir)/$$file"; then \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
++	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
++	  else \
++	    test -f "$(distdir)/$$file" \
++	    || cp -p $$d/$$file "$(distdir)/$$file" \
++	    || exit 1; \
++	  fi; \
++	done
++	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
++	  if test "$$subdir" = .; then :; else \
++	    $(am__make_dryrun) \
++	      || test -d "$(distdir)/$$subdir" \
++	      || $(MKDIR_P) "$(distdir)/$$subdir" \
++	      || exit 1; \
++	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
++	    $(am__relativize); \
++	    new_distdir=$$reldir; \
++	    dir1=$$subdir; dir2="$(top_distdir)"; \
++	    $(am__relativize); \
++	    new_top_distdir=$$reldir; \
++	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
++	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
++	    ($(am__cd) $$subdir && \
++	      $(MAKE) $(AM_MAKEFLAGS) \
++	        top_distdir="$$new_top_distdir" \
++	        distdir="$$new_distdir" \
++		am__remove_distdir=: \
++		am__skip_length_check=: \
++		am__skip_mode_fix=: \
++	        distdir) \
++	      || exit 1; \
++	  fi; \
++	done
++check-am: all-am
++check: check-recursive
++all-am: Makefile $(HEADERS)
++installdirs: installdirs-recursive
++installdirs-am:
++	for dir in "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"; do \
++	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
++	done
++install: install-recursive
++install-exec: install-exec-recursive
++install-data: install-data-recursive
++uninstall: uninstall-recursive
++
++install-am: all-am
++	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
++
++installcheck: installcheck-recursive
++install-strip:
++	if test -z '$(STRIP)'; then \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	      install; \
++	else \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
++	fi
++mostlyclean-generic:
++
++clean-generic:
++
++distclean-generic:
++	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
++	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
++
++maintainer-clean-generic:
++	@echo "This command is intended for maintainers to use"
++	@echo "it deletes files that may require special tools to rebuild."
++clean: clean-recursive
++
++clean-am: clean-generic clean-libtool mostlyclean-am
++
++distclean: distclean-recursive
++	-rm -f Makefile
++distclean-am: clean-am distclean-generic distclean-tags
++
++dvi: dvi-recursive
++
++dvi-am:
++
++html: html-recursive
++
++html-am:
++
++info: info-recursive
++
++info-am:
++
++install-data-am: install-kernelHEADERS install-libzfsHEADERS
++
++install-dvi: install-dvi-recursive
++
++install-dvi-am:
++
++install-exec-am:
++
++install-html: install-html-recursive
++
++install-html-am:
++
++install-info: install-info-recursive
++
++install-info-am:
++
++install-man:
++
++install-pdf: install-pdf-recursive
++
++install-pdf-am:
++
++install-ps: install-ps-recursive
++
++install-ps-am:
++
++installcheck-am:
++
++maintainer-clean: maintainer-clean-recursive
++	-rm -f Makefile
++maintainer-clean-am: distclean-am maintainer-clean-generic
++
++mostlyclean: mostlyclean-recursive
++
++mostlyclean-am: mostlyclean-generic mostlyclean-libtool
++
++pdf: pdf-recursive
++
++pdf-am:
++
++ps: ps-recursive
++
++ps-am:
++
++uninstall-am: uninstall-kernelHEADERS uninstall-libzfsHEADERS
++
++.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
++	install-am install-strip tags-recursive
++
++.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
++	all all-am check check-am clean clean-generic clean-libtool \
++	ctags ctags-recursive distclean distclean-generic \
++	distclean-libtool distclean-tags distdir dvi dvi-am html \
++	html-am info info-am install install-am install-data \
++	install-data-am install-dvi install-dvi-am install-exec \
++	install-exec-am install-html install-html-am install-info \
++	install-info-am install-kernelHEADERS install-libzfsHEADERS \
++	install-man install-pdf install-pdf-am install-ps \
++	install-ps-am install-strip installcheck installcheck-am \
++	installdirs installdirs-am maintainer-clean \
++	maintainer-clean-generic mostlyclean mostlyclean-generic \
++	mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
++	uninstall uninstall-am uninstall-kernelHEADERS \
++	uninstall-libzfsHEADERS
++
++
++# Tell versions [3.59,3.63) of GNU make to not export all variables.
++# Otherwise a system limit (for SysV at least) may be exceeded.
++.NOEXPORT:
+diff -uNr linux-3.2.33-go.orig/include/zfs/Makefile.am linux-3.2.33-go/include/zfs/Makefile.am
+--- linux-3.2.33-go.orig/include/zfs/Makefile.am	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/Makefile.am	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,32 @@
++SUBDIRS = linux sys
++
++COMMON_H = \
++	$(top_srcdir)/include/zfs_comutil.h \
++	$(top_srcdir)/include/zfs_deleg.h \
++	$(top_srcdir)/include/zfs_fletcher.h \
++	$(top_srcdir)/include/zfs_namecheck.h \
++	$(top_srcdir)/include/zfs_prop.h \
++	$(top_srcdir)/include/zpios-ctl.h
++
++KERNEL_H = \
++	$(top_srcdir)/include/zpios-internal.h
++
++USER_H = \
++	$(top_srcdir)/include/libnvpair.h \
++	$(top_srcdir)/include/libuutil_common.h \
++	$(top_srcdir)/include/libuutil.h \
++	$(top_srcdir)/include/libuutil_impl.h \
++	$(top_srcdir)/include/libzfs.h \
++	$(top_srcdir)/include/libzfs_impl.h
++
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++
++if CONFIG_USER
++libzfsdir = $(includedir)/libzfs
++libzfs_HEADERS = $(COMMON_H) $(USER_H)
++endif
++
++if CONFIG_KERNEL
++kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)
++kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++endif
+diff -uNr linux-3.2.33-go.orig/include/zfs/Makefile.in linux-3.2.33-go/include/zfs/Makefile.in
+--- linux-3.2.33-go.orig/include/zfs/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/Makefile.in	2012-11-16 23:25:34.344039393 +0100
+@@ -0,0 +1,841 @@
++# Makefile.in generated by automake 1.11.6 from Makefile.am.
++# @configure_input@
++
++# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
++# Foundation, Inc.
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++@SET_MAKE@
++
++VPATH = @srcdir@
++am__make_dryrun = \
++  { \
++    am__dry=no; \
++    case $$MAKEFLAGS in \
++      *\\[\ \	]*) \
++        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
++          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
++      *) \
++        for am__flg in $$MAKEFLAGS; do \
++          case $$am__flg in \
++            *=*|--*) ;; \
++            *n*) am__dry=yes; break;; \
++          esac; \
++        done;; \
++    esac; \
++    test $$am__dry = yes; \
++  }
++pkgdatadir = $(datadir)/@PACKAGE@
++pkgincludedir = $(includedir)/@PACKAGE@
++pkglibdir = $(libdir)/@PACKAGE@
++pkglibexecdir = $(libexecdir)/@PACKAGE@
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = @build@
++host_triplet = @host@
++target_triplet = @target@
++subdir = include
++DIST_COMMON = $(am__kernel_HEADERS_DIST) $(am__libzfs_HEADERS_DIST) \
++	$(srcdir)/Makefile.am $(srcdir)/Makefile.in
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps =  \
++	$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
++	$(top_srcdir)/config/kernel-automount.m4 \
++	$(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \
++	$(top_srcdir)/config/kernel-bdev-logical-size.m4 \
++	$(top_srcdir)/config/kernel-bdi-setup-and-register.m4 \
++	$(top_srcdir)/config/kernel-bdi.m4 \
++	$(top_srcdir)/config/kernel-bio-empty-barrier.m4 \
++	$(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \
++	$(top_srcdir)/config/kernel-bio-failfast.m4 \
++	$(top_srcdir)/config/kernel-bio-rw-syncio.m4 \
++	$(top_srcdir)/config/kernel-blk-end-request.m4 \
++	$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-discard.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
++	$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-sectors.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get.m4 \
++	$(top_srcdir)/config/kernel-check-disk-size-change.m4 \
++	$(top_srcdir)/config/kernel-clear-inode.m4 \
++	$(top_srcdir)/config/kernel-commit-metadata.m4 \
++	$(top_srcdir)/config/kernel-create-nameidata.m4 \
++	$(top_srcdir)/config/kernel-d-make-root.m4 \
++	$(top_srcdir)/config/kernel-d-obtain-alias.m4 \
++	$(top_srcdir)/config/kernel-discard-granularity.m4 \
++	$(top_srcdir)/config/kernel-elevator-change.m4 \
++	$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
++	$(top_srcdir)/config/kernel-evict-inode.m4 \
++	$(top_srcdir)/config/kernel-fallocate.m4 \
++	$(top_srcdir)/config/kernel-fmode-t.m4 \
++	$(top_srcdir)/config/kernel-fsync.m4 \
++	$(top_srcdir)/config/kernel-get-disk-ro.m4 \
++	$(top_srcdir)/config/kernel-get-gendisk.m4 \
++	$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
++	$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
++	$(top_srcdir)/config/kernel-kobj-name-len.m4 \
++	$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
++	$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
++	$(top_srcdir)/config/kernel-mount-nodev.m4 \
++	$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
++	$(top_srcdir)/config/kernel-rq-for-each_segment.m4 \
++	$(top_srcdir)/config/kernel-rq-is_sync.m4 \
++	$(top_srcdir)/config/kernel-security-inode-init.m4 \
++	$(top_srcdir)/config/kernel-set-nlink.m4 \
++	$(top_srcdir)/config/kernel-sget-args.m4 \
++	$(top_srcdir)/config/kernel-show-options.m4 \
++	$(top_srcdir)/config/kernel-shrink.m4 \
++	$(top_srcdir)/config/kernel-truncate-range.m4 \
++	$(top_srcdir)/config/kernel-truncate-setsize.m4 \
++	$(top_srcdir)/config/kernel-xattr-handler.m4 \
++	$(top_srcdir)/config/kernel.m4 \
++	$(top_srcdir)/config/user-arch.m4 \
++	$(top_srcdir)/config/user-frame-larger-than.m4 \
++	$(top_srcdir)/config/user-ioctl.m4 \
++	$(top_srcdir)/config/user-libblkid.m4 \
++	$(top_srcdir)/config/user-libuuid.m4 \
++	$(top_srcdir)/config/user-nptl_guard_within_stack.m4 \
++	$(top_srcdir)/config/user-selinux.m4 \
++	$(top_srcdir)/config/user-udev.m4 \
++	$(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \
++	$(top_srcdir)/config/zfs-build.m4 \
++	$(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++mkinstalldirs = $(install_sh) -d
++CONFIG_HEADER = $(top_builddir)/zfs_config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++AM_V_GEN = $(am__v_GEN_@AM_V@)
++am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
++am__v_GEN_0 = @echo "  GEN   " $@;
++AM_V_at = $(am__v_at_@AM_V@)
++am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
++am__v_at_0 = @
++SOURCES =
++DIST_SOURCES =
++RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
++	html-recursive info-recursive install-data-recursive \
++	install-dvi-recursive install-exec-recursive \
++	install-html-recursive install-info-recursive \
++	install-pdf-recursive install-ps-recursive install-recursive \
++	installcheck-recursive installdirs-recursive pdf-recursive \
++	ps-recursive uninstall-recursive
++am__can_run_installinfo = \
++  case $$AM_UPDATE_INFO_DIR in \
++    n|no|NO) false;; \
++    *) (install-info --version) >/dev/null 2>&1;; \
++  esac
++am__kernel_HEADERS_DIST = $(top_srcdir)/include/zfs_comutil.h \
++	$(top_srcdir)/include/zfs_deleg.h \
++	$(top_srcdir)/include/zfs_fletcher.h \
++	$(top_srcdir)/include/zfs_namecheck.h \
++	$(top_srcdir)/include/zfs_prop.h \
++	$(top_srcdir)/include/zpios-ctl.h \
++	$(top_srcdir)/include/zpios-internal.h
++am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
++am__vpath_adj = case $$p in \
++    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
++    *) f=$$p;; \
++  esac;
++am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
++am__install_max = 40
++am__nobase_strip_setup = \
++  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
++am__nobase_strip = \
++  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
++am__nobase_list = $(am__nobase_strip_setup); \
++  for p in $$list; do echo "$$p $$p"; done | \
++  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
++  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
++    if (++n[$$2] == $(am__install_max)) \
++      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
++    END { for (dir in files) print dir, files[dir] }'
++am__base_list = \
++  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
++  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
++am__uninstall_files_from_dir = { \
++  test -z "$$files" \
++    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
++    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
++         $(am__cd) "$$dir" && rm -f $$files; }; \
++  }
++am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
++am__libzfs_HEADERS_DIST = $(top_srcdir)/include/zfs_comutil.h \
++	$(top_srcdir)/include/zfs_deleg.h \
++	$(top_srcdir)/include/zfs_fletcher.h \
++	$(top_srcdir)/include/zfs_namecheck.h \
++	$(top_srcdir)/include/zfs_prop.h \
++	$(top_srcdir)/include/zpios-ctl.h \
++	$(top_srcdir)/include/libnvpair.h \
++	$(top_srcdir)/include/libuutil_common.h \
++	$(top_srcdir)/include/libuutil.h \
++	$(top_srcdir)/include/libuutil_impl.h \
++	$(top_srcdir)/include/libzfs.h \
++	$(top_srcdir)/include/libzfs_impl.h
++HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
++RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
++  distclean-recursive maintainer-clean-recursive
++AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
++	$(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
++	distdir
++ETAGS = etags
++CTAGS = ctags
++DIST_SUBDIRS = $(SUBDIRS)
++DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
++am__relativize = \
++  dir0=`pwd`; \
++  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
++  sed_rest='s,^[^/]*/*,,'; \
++  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
++  sed_butlast='s,/*[^/]*$$,,'; \
++  while test -n "$$dir1"; do \
++    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
++    if test "$$first" != "."; then \
++      if test "$$first" = ".."; then \
++        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
++        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
++      else \
++        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
++        if test "$$first2" = "$$first"; then \
++          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
++        else \
++          dir2="../$$dir2"; \
++        fi; \
++        dir0="$$dir0"/"$$first"; \
++      fi; \
++    fi; \
++    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
++  done; \
++  reldir="$$dir2"
++ACLOCAL = @ACLOCAL@
++ALIEN = @ALIEN@
++ALIEN_VERSION = @ALIEN_VERSION@
++AMTAR = @AMTAR@
++AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
++AR = @AR@
++AUTOCONF = @AUTOCONF@
++AUTOHEADER = @AUTOHEADER@
++AUTOMAKE = @AUTOMAKE@
++AWK = @AWK@
++CC = @CC@
++CCAS = @CCAS@
++CCASDEPMODE = @CCASDEPMODE@
++CCASFLAGS = @CCASFLAGS@
++CCDEPMODE = @CCDEPMODE@
++CFLAGS = @CFLAGS@
++CPP = @CPP@
++CPPFLAGS = @CPPFLAGS@
++CYGPATH_W = @CYGPATH_W@
++DEBUG_CFLAGS = @DEBUG_CFLAGS@
++DEBUG_DMU_TX = @DEBUG_DMU_TX@
++DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
++DEBUG_ZFS = @DEBUG_ZFS@
++DEFAULT_INIT_DIR = @DEFAULT_INIT_DIR@
++DEFAULT_INIT_SCRIPT = @DEFAULT_INIT_SCRIPT@
++DEFAULT_PACKAGE = @DEFAULT_PACKAGE@
++DEFS = @DEFS@
++DEPDIR = @DEPDIR@
++DLLTOOL = @DLLTOOL@
++DPKG = @DPKG@
++DPKGBUILD = @DPKGBUILD@
++DPKGBUILD_VERSION = @DPKGBUILD_VERSION@
++DPKG_VERSION = @DPKG_VERSION@
++DSYMUTIL = @DSYMUTIL@
++DUMPBIN = @DUMPBIN@
++ECHO_C = @ECHO_C@
++ECHO_N = @ECHO_N@
++ECHO_T = @ECHO_T@
++EGREP = @EGREP@
++EXEEXT = @EXEEXT@
++FGREP = @FGREP@
++FRAME_LARGER_THAN = @FRAME_LARGER_THAN@
++GREP = @GREP@
++HAVE_ALIEN = @HAVE_ALIEN@
++HAVE_DPKG = @HAVE_DPKG@
++HAVE_DPKGBUILD = @HAVE_DPKGBUILD@
++HAVE_MAKEPKG = @HAVE_MAKEPKG@
++HAVE_PACMAN = @HAVE_PACMAN@
++HAVE_RPM = @HAVE_RPM@
++HAVE_RPMBUILD = @HAVE_RPMBUILD@
++INSTALL = @INSTALL@
++INSTALL_DATA = @INSTALL_DATA@
++INSTALL_PROGRAM = @INSTALL_PROGRAM@
++INSTALL_SCRIPT = @INSTALL_SCRIPT@
++INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
++KERNELCPPFLAGS = @KERNELCPPFLAGS@
++KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
++LD = @LD@
++LDFLAGS = @LDFLAGS@
++LIBBLKID = @LIBBLKID@
++LIBOBJS = @LIBOBJS@
++LIBS = @LIBS@
++LIBSELINUX = @LIBSELINUX@
++LIBTOOL = @LIBTOOL@
++LIBUUID = @LIBUUID@
++LINUX = @LINUX@
++LINUX_OBJ = @LINUX_OBJ@
++LINUX_SYMBOLS = @LINUX_SYMBOLS@
++LINUX_VERSION = @LINUX_VERSION@
++LIPO = @LIPO@
++LN_S = @LN_S@
++LTLIBOBJS = @LTLIBOBJS@
++MAINT = @MAINT@
++MAKEINFO = @MAKEINFO@
++MAKEPKG = @MAKEPKG@
++MAKEPKG_VERSION = @MAKEPKG_VERSION@
++MANIFEST_TOOL = @MANIFEST_TOOL@
++MKDIR_P = @MKDIR_P@
++NM = @NM@
++NMEDIT = @NMEDIT@
++NO_UNUSED_BUT_SET_VARIABLE = @NO_UNUSED_BUT_SET_VARIABLE@
++OBJDUMP = @OBJDUMP@
++OBJEXT = @OBJEXT@
++OTOOL = @OTOOL@
++OTOOL64 = @OTOOL64@
++PACKAGE = @PACKAGE@
++PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
++PACKAGE_NAME = @PACKAGE_NAME@
++PACKAGE_STRING = @PACKAGE_STRING@
++PACKAGE_TARNAME = @PACKAGE_TARNAME@
++PACKAGE_URL = @PACKAGE_URL@
++PACKAGE_VERSION = @PACKAGE_VERSION@
++PACMAN = @PACMAN@
++PACMAN_VERSION = @PACMAN_VERSION@
++PATH_SEPARATOR = @PATH_SEPARATOR@
++RANLIB = @RANLIB@
++RPM = @RPM@
++RPMBUILD = @RPMBUILD@
++RPMBUILD_VERSION = @RPMBUILD_VERSION@
++RPM_VERSION = @RPM_VERSION@
++SED = @SED@
++SET_MAKE = @SET_MAKE@
++SHELL = @SHELL@
++SPL = @SPL@
++SPL_OBJ = @SPL_OBJ@
++SPL_SYMBOLS = @SPL_SYMBOLS@
++SPL_VERSION = @SPL_VERSION@
++STRIP = @STRIP@
++TARGET_ASM_DIR = @TARGET_ASM_DIR@
++VENDOR = @VENDOR@
++VERSION = @VERSION@
++ZFS_CONFIG = @ZFS_CONFIG@
++ZFS_META_ALIAS = @ZFS_META_ALIAS@
++ZFS_META_AUTHOR = @ZFS_META_AUTHOR@
++ZFS_META_DATA = @ZFS_META_DATA@
++ZFS_META_LICENSE = @ZFS_META_LICENSE@
++ZFS_META_LT_AGE = @ZFS_META_LT_AGE@
++ZFS_META_LT_CURRENT = @ZFS_META_LT_CURRENT@
++ZFS_META_LT_REVISION = @ZFS_META_LT_REVISION@
++ZFS_META_NAME = @ZFS_META_NAME@
++ZFS_META_RELEASE = @ZFS_META_RELEASE@
++ZFS_META_VERSION = @ZFS_META_VERSION@
++ZLIB = @ZLIB@
++abs_builddir = @abs_builddir@
++abs_srcdir = @abs_srcdir@
++abs_top_builddir = @abs_top_builddir@
++abs_top_srcdir = @abs_top_srcdir@
++ac_ct_AR = @ac_ct_AR@
++ac_ct_CC = @ac_ct_CC@
++ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
++am__include = @am__include@
++am__leading_dot = @am__leading_dot@
++am__quote = @am__quote@
++am__tar = @am__tar@
++am__untar = @am__untar@
++bindir = @bindir@
++build = @build@
++build_alias = @build_alias@
++build_cpu = @build_cpu@
++build_os = @build_os@
++build_vendor = @build_vendor@
++builddir = @builddir@
++datadir = @datadir@
++datarootdir = @datarootdir@
++docdir = @docdir@
++dvidir = @dvidir@
++exec_prefix = @exec_prefix@
++host = @host@
++host_alias = @host_alias@
++host_cpu = @host_cpu@
++host_os = @host_os@
++host_vendor = @host_vendor@
++htmldir = @htmldir@
++includedir = @includedir@
++infodir = @infodir@
++install_sh = @install_sh@
++libdir = @libdir@
++libexecdir = @libexecdir@
++localedir = @localedir@
++localstatedir = @localstatedir@
++mandir = @mandir@
++mkdir_p = @mkdir_p@
++oldincludedir = @oldincludedir@
++pdfdir = @pdfdir@
++prefix = @prefix@
++program_transform_name = @program_transform_name@
++psdir = @psdir@
++sbindir = @sbindir@
++sharedstatedir = @sharedstatedir@
++srcdir = @srcdir@
++sysconfdir = @sysconfdir@
++target = @target@
++target_alias = @target_alias@
++target_cpu = @target_cpu@
++target_os = @target_os@
++target_vendor = @target_vendor@
++top_build_prefix = @top_build_prefix@
++top_builddir = @top_builddir@
++top_srcdir = @top_srcdir@
++udevdir = @udevdir@
++udevruledir = @udevruledir@
++SUBDIRS = linux sys
++COMMON_H = \
++	$(top_srcdir)/include/zfs_comutil.h \
++	$(top_srcdir)/include/zfs_deleg.h \
++	$(top_srcdir)/include/zfs_fletcher.h \
++	$(top_srcdir)/include/zfs_namecheck.h \
++	$(top_srcdir)/include/zfs_prop.h \
++	$(top_srcdir)/include/zpios-ctl.h
++
++KERNEL_H = \
++	$(top_srcdir)/include/zpios-internal.h
++
++USER_H = \
++	$(top_srcdir)/include/libnvpair.h \
++	$(top_srcdir)/include/libuutil_common.h \
++	$(top_srcdir)/include/libuutil.h \
++	$(top_srcdir)/include/libuutil_impl.h \
++	$(top_srcdir)/include/libzfs.h \
++	$(top_srcdir)/include/libzfs_impl.h
++
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++@CONFIG_USER_TRUE@libzfsdir = $(includedir)/libzfs
++@CONFIG_USER_TRUE@libzfs_HEADERS = $(COMMON_H) $(USER_H)
++@CONFIG_KERNEL_TRUE@kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)
++@CONFIG_KERNEL_TRUE@kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++all: all-recursive
++
++.SUFFIXES:
++$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
++	@for dep in $?; do \
++	  case '$(am__configure_deps)' in \
++	    *$$dep*) \
++	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
++	        && { if test -f $@; then exit 0; else break; fi; }; \
++	      exit 1;; \
++	  esac; \
++	done; \
++	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/Makefile'; \
++	$(am__cd) $(top_srcdir) && \
++	  $(AUTOMAKE) --gnu include/Makefile
++.PRECIOUS: Makefile
++Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
++	@case '$?' in \
++	  *config.status*) \
++	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
++	  *) \
++	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
++	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
++	esac;
++
++$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++
++$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(am__aclocal_m4_deps):
++
++mostlyclean-libtool:
++	-rm -f *.lo
++
++clean-libtool:
++	-rm -rf .libs _libs
++install-kernelHEADERS: $(kernel_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(kerneldir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(kerneldir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(kerneldir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(kerneldir)" || exit $$?; \
++	done
++
++uninstall-kernelHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(kerneldir)'; $(am__uninstall_files_from_dir)
++install-libzfsHEADERS: $(libzfs_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(libzfsdir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(libzfsdir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libzfsdir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(libzfsdir)" || exit $$?; \
++	done
++
++uninstall-libzfsHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(libzfsdir)'; $(am__uninstall_files_from_dir)
++
++# This directory's subdirectories are mostly independent; you can cd
++# into them and run `make' without going through this Makefile.
++# To change the values of `make' variables: instead of editing Makefiles,
++# (1) if the variable is set in `config.status', edit `config.status'
++#     (which will cause the Makefiles to be regenerated when you run `make');
++# (2) otherwise, pass the desired values on the `make' command line.
++$(RECURSIVE_TARGETS):
++	@fail= failcom='exit 1'; \
++	for f in x $$MAKEFLAGS; do \
++	  case $$f in \
++	    *=* | --[!k]*);; \
++	    *k*) failcom='fail=yes';; \
++	  esac; \
++	done; \
++	dot_seen=no; \
++	target=`echo $@ | sed s/-recursive//`; \
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  echo "Making $$target in $$subdir"; \
++	  if test "$$subdir" = "."; then \
++	    dot_seen=yes; \
++	    local_target="$$target-am"; \
++	  else \
++	    local_target="$$target"; \
++	  fi; \
++	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
++	  || eval $$failcom; \
++	done; \
++	if test "$$dot_seen" = "no"; then \
++	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
++	fi; test -z "$$fail"
++
++$(RECURSIVE_CLEAN_TARGETS):
++	@fail= failcom='exit 1'; \
++	for f in x $$MAKEFLAGS; do \
++	  case $$f in \
++	    *=* | --[!k]*);; \
++	    *k*) failcom='fail=yes';; \
++	  esac; \
++	done; \
++	dot_seen=no; \
++	case "$@" in \
++	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
++	  *) list='$(SUBDIRS)' ;; \
++	esac; \
++	rev=''; for subdir in $$list; do \
++	  if test "$$subdir" = "."; then :; else \
++	    rev="$$subdir $$rev"; \
++	  fi; \
++	done; \
++	rev="$$rev ."; \
++	target=`echo $@ | sed s/-recursive//`; \
++	for subdir in $$rev; do \
++	  echo "Making $$target in $$subdir"; \
++	  if test "$$subdir" = "."; then \
++	    local_target="$$target-am"; \
++	  else \
++	    local_target="$$target"; \
++	  fi; \
++	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
++	  || eval $$failcom; \
++	done && test -z "$$fail"
++tags-recursive:
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
++	done
++ctags-recursive:
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
++	done
++
++ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
++	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	mkid -fID $$unique
++tags: TAGS
++
++TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	set x; \
++	here=`pwd`; \
++	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
++	  include_option=--etags-include; \
++	  empty_fix=.; \
++	else \
++	  include_option=--include; \
++	  empty_fix=; \
++	fi; \
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  if test "$$subdir" = .; then :; else \
++	    test ! -f $$subdir/TAGS || \
++	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
++	  fi; \
++	done; \
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	shift; \
++	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
++	  test -n "$$unique" || unique=$$empty_fix; \
++	  if test $$# -gt 0; then \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      "$$@" $$unique; \
++	  else \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      $$unique; \
++	  fi; \
++	fi
++ctags: CTAGS
++CTAGS: ctags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	test -z "$(CTAGS_ARGS)$$unique" \
++	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
++	     $$unique
++
++GTAGS:
++	here=`$(am__cd) $(top_builddir) && pwd` \
++	  && $(am__cd) $(top_srcdir) \
++	  && gtags -i $(GTAGS_ARGS) "$$here"
++
++distclean-tags:
++	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
++
++distdir: $(DISTFILES)
++	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	list='$(DISTFILES)'; \
++	  dist_files=`for file in $$list; do echo $$file; done | \
++	  sed -e "s|^$$srcdirstrip/||;t" \
++	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
++	case $$dist_files in \
++	  */*) $(MKDIR_P) `echo "$$dist_files" | \
++			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
++			   sort -u` ;; \
++	esac; \
++	for file in $$dist_files; do \
++	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
++	  if test -d $$d/$$file; then \
++	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
++	    if test -d "$(distdir)/$$file"; then \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
++	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
++	  else \
++	    test -f "$(distdir)/$$file" \
++	    || cp -p $$d/$$file "$(distdir)/$$file" \
++	    || exit 1; \
++	  fi; \
++	done
++	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
++	  if test "$$subdir" = .; then :; else \
++	    $(am__make_dryrun) \
++	      || test -d "$(distdir)/$$subdir" \
++	      || $(MKDIR_P) "$(distdir)/$$subdir" \
++	      || exit 1; \
++	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
++	    $(am__relativize); \
++	    new_distdir=$$reldir; \
++	    dir1=$$subdir; dir2="$(top_distdir)"; \
++	    $(am__relativize); \
++	    new_top_distdir=$$reldir; \
++	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
++	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
++	    ($(am__cd) $$subdir && \
++	      $(MAKE) $(AM_MAKEFLAGS) \
++	        top_distdir="$$new_top_distdir" \
++	        distdir="$$new_distdir" \
++		am__remove_distdir=: \
++		am__skip_length_check=: \
++		am__skip_mode_fix=: \
++	        distdir) \
++	      || exit 1; \
++	  fi; \
++	done
++check-am: all-am
++check: check-recursive
++all-am: Makefile $(HEADERS)
++installdirs: installdirs-recursive
++installdirs-am:
++	for dir in "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"; do \
++	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
++	done
++install: install-recursive
++install-exec: install-exec-recursive
++install-data: install-data-recursive
++uninstall: uninstall-recursive
++
++install-am: all-am
++	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
++
++installcheck: installcheck-recursive
++install-strip:
++	if test -z '$(STRIP)'; then \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	      install; \
++	else \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
++	fi
++mostlyclean-generic:
++
++clean-generic:
++
++distclean-generic:
++	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
++	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
++
++maintainer-clean-generic:
++	@echo "This command is intended for maintainers to use"
++	@echo "it deletes files that may require special tools to rebuild."
++clean: clean-recursive
++
++clean-am: clean-generic clean-libtool mostlyclean-am
++
++distclean: distclean-recursive
++	-rm -f Makefile
++distclean-am: clean-am distclean-generic distclean-tags
++
++dvi: dvi-recursive
++
++dvi-am:
++
++html: html-recursive
++
++html-am:
++
++info: info-recursive
++
++info-am:
++
++install-data-am: install-kernelHEADERS install-libzfsHEADERS
++
++install-dvi: install-dvi-recursive
++
++install-dvi-am:
++
++install-exec-am:
++
++install-html: install-html-recursive
++
++install-html-am:
++
++install-info: install-info-recursive
++
++install-info-am:
++
++install-man:
++
++install-pdf: install-pdf-recursive
++
++install-pdf-am:
++
++install-ps: install-ps-recursive
++
++install-ps-am:
++
++installcheck-am:
++
++maintainer-clean: maintainer-clean-recursive
++	-rm -f Makefile
++maintainer-clean-am: distclean-am maintainer-clean-generic
++
++mostlyclean: mostlyclean-recursive
++
++mostlyclean-am: mostlyclean-generic mostlyclean-libtool
++
++pdf: pdf-recursive
++
++pdf-am:
++
++ps: ps-recursive
++
++ps-am:
++
++uninstall-am: uninstall-kernelHEADERS uninstall-libzfsHEADERS
++
++.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
++	install-am install-strip tags-recursive
++
++.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
++	all all-am check check-am clean clean-generic clean-libtool \
++	ctags ctags-recursive distclean distclean-generic \
++	distclean-libtool distclean-tags distdir dvi dvi-am html \
++	html-am info info-am install install-am install-data \
++	install-data-am install-dvi install-dvi-am install-exec \
++	install-exec-am install-html install-html-am install-info \
++	install-info-am install-kernelHEADERS install-libzfsHEADERS \
++	install-man install-pdf install-pdf-am install-ps \
++	install-ps-am install-strip installcheck installcheck-am \
++	installdirs installdirs-am maintainer-clean \
++	maintainer-clean-generic mostlyclean mostlyclean-generic \
++	mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
++	uninstall uninstall-am uninstall-kernelHEADERS \
++	uninstall-libzfsHEADERS
++
++
++# Tell versions [3.59,3.63) of GNU make to not export all variables.
++# Otherwise a system limit (for SysV at least) may be exceeded.
++.NOEXPORT:
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/arc.h linux-3.2.33-go/include/zfs/sys/arc.h
+--- linux-3.2.33-go.orig/include/zfs/sys/arc.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/arc.h	2012-11-16 23:25:34.344039393 +0100
+@@ -0,0 +1,162 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_ARC_H
++#define	_SYS_ARC_H
++
++#include <sys/zfs_context.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++#include <sys/zio.h>
++#include <sys/dmu.h>
++#include <sys/spa.h>
++#include <sys/refcount.h>
++
++typedef struct arc_buf_hdr arc_buf_hdr_t;
++typedef struct arc_buf arc_buf_t;
++typedef struct arc_prune arc_prune_t;
++typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
++typedef void arc_prune_func_t(int64_t bytes, void *private);
++typedef int arc_evict_func_t(void *private);
++
++/* generic arc_done_func_t's which you can use */
++arc_done_func_t arc_bcopy_func;
++arc_done_func_t arc_getbuf_func;
++
++/* generic arc_prune_func_t wrapper for callbacks */
++struct arc_prune {
++	arc_prune_func_t	*p_pfunc;
++	void			*p_private;
++	list_node_t		p_node;
++	refcount_t		p_refcnt;
++};
++
++struct arc_buf {
++	arc_buf_hdr_t		*b_hdr;
++	arc_buf_t		*b_next;
++	kmutex_t		b_evict_lock;
++	krwlock_t		b_data_lock;
++	void			*b_data;
++	arc_evict_func_t	*b_efunc;
++	void			*b_private;
++};
++
++typedef enum arc_buf_contents {
++	ARC_BUFC_DATA,				/* buffer contains data */
++	ARC_BUFC_METADATA,			/* buffer contains metadata */
++	ARC_BUFC_NUMTYPES
++} arc_buf_contents_t;
++/*
++ * These are the flags we pass into calls to the arc
++ */
++#define	ARC_WAIT	(1 << 1)	/* perform I/O synchronously */
++#define	ARC_NOWAIT	(1 << 2)	/* perform I/O asynchronously */
++#define	ARC_PREFETCH	(1 << 3)	/* I/O is a prefetch */
++#define	ARC_CACHED	(1 << 4)	/* I/O was already in cache */
++#define	ARC_L2CACHE	(1 << 5)	/* cache in L2ARC */
++
++/*
++ * The following breakdows of arc_size exist for kstat only.
++ */
++typedef enum arc_space_type {
++	ARC_SPACE_DATA,
++	ARC_SPACE_HDRS,
++	ARC_SPACE_L2HDRS,
++	ARC_SPACE_OTHER,
++	ARC_SPACE_NUMTYPES
++} arc_space_type_t;
++
++void arc_space_consume(uint64_t space, arc_space_type_t type);
++void arc_space_return(uint64_t space, arc_space_type_t type);
++void *arc_data_buf_alloc(uint64_t space);
++void arc_data_buf_free(void *buf, uint64_t space);
++arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
++    arc_buf_contents_t type);
++arc_buf_t *arc_loan_buf(spa_t *spa, int size);
++void arc_return_buf(arc_buf_t *buf, void *tag);
++void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
++void arc_buf_add_ref(arc_buf_t *buf, void *tag);
++int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
++int arc_buf_size(arc_buf_t *buf);
++void arc_release(arc_buf_t *buf, void *tag);
++int arc_release_bp(arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa,
++    zbookmark_t *zb);
++int arc_released(arc_buf_t *buf);
++int arc_has_callback(arc_buf_t *buf);
++void arc_buf_freeze(arc_buf_t *buf);
++void arc_buf_thaw(arc_buf_t *buf);
++#ifdef ZFS_DEBUG
++int arc_referenced(arc_buf_t *buf);
++#endif
++
++int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *pbuf,
++    arc_done_func_t *done, void *private, int priority, int zio_flags,
++    uint32_t *arc_flags, const zbookmark_t *zb);
++int arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,
++    arc_done_func_t *done, void *private, int priority, int flags,
++    uint32_t *arc_flags, const zbookmark_t *zb);
++zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
++    blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
++    arc_done_func_t *ready, arc_done_func_t *done, void *private,
++    int priority, int zio_flags, const zbookmark_t *zb);
++
++arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *private);
++void arc_remove_prune_callback(arc_prune_t *p);
++
++void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
++int arc_buf_evict(arc_buf_t *buf);
++
++void arc_adjust_meta(int64_t adjustment, boolean_t may_prune);
++void arc_flush(spa_t *spa);
++void arc_tempreserve_clear(uint64_t reserve);
++int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
++
++void arc_init(void);
++void arc_fini(void);
++
++/*
++ * Level 2 ARC
++ */
++
++void l2arc_add_vdev(spa_t *spa, vdev_t *vd);
++void l2arc_remove_vdev(vdev_t *vd);
++boolean_t l2arc_vdev_present(vdev_t *vd);
++void l2arc_init(void);
++void l2arc_fini(void);
++void l2arc_start(void);
++void l2arc_stop(void);
++
++/* Global tunings */
++extern int zfs_write_limit_shift;
++extern unsigned long zfs_write_limit_max;
++extern kmutex_t zfs_write_limit_lock;
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_ARC_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/avl.h linux-3.2.33-go/include/zfs/sys/avl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/avl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/avl.h	2012-11-16 23:25:34.339039449 +0100
+@@ -0,0 +1,309 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_AVL_H
++#define	_AVL_H
++
++/*
++ * This is a private header file.  Applications should not directly include
++ * this file.
++ */
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++#include <sys/types.h>
++#include <sys/avl_impl.h>
++
++/*
++ * This is a generic implemenatation of AVL trees for use in the Solaris kernel.
++ * The interfaces provide an efficient way of implementing an ordered set of
++ * data structures.
++ *
++ * AVL trees provide an alternative to using an ordered linked list. Using AVL
++ * trees will usually be faster, however they requires more storage. An ordered
++ * linked list in general requires 2 pointers in each data structure. The
++ * AVL tree implementation uses 3 pointers. The following chart gives the
++ * approximate performance of operations with the different approaches:
++ *
++ *	Operation	 Link List	AVL tree
++ *	---------	 --------	--------
++ *	lookup		   O(n)		O(log(n))
++ *
++ *	insert 1 node	 constant	constant
++ *
++ *	delete 1 node	 constant	between constant and O(log(n))
++ *
++ *	delete all nodes   O(n)		O(n)
++ *
++ *	visit the next
++ *	or prev node	 constant	between constant and O(log(n))
++ *
++ *
++ * The data structure nodes are anchored at an "avl_tree_t" (the equivalent
++ * of a list header) and the individual nodes will have a field of
++ * type "avl_node_t" (corresponding to list pointers).
++ *
++ * The type "avl_index_t" is used to indicate a position in the list for
++ * certain calls.
++ *
++ * The usage scenario is generally:
++ *
++ * 1. Create the list/tree with: avl_create()
++ *
++ * followed by any mixture of:
++ *
++ * 2a. Insert nodes with: avl_add(), or avl_find() and avl_insert()
++ *
++ * 2b. Visited elements with:
++ *	 avl_first() - returns the lowest valued node
++ *	 avl_last() - returns the highest valued node
++ *	 AVL_NEXT() - given a node go to next higher one
++ *	 AVL_PREV() - given a node go to previous lower one
++ *
++ * 2c.  Find the node with the closest value either less than or greater
++ *	than a given value with avl_nearest().
++ *
++ * 2d. Remove individual nodes from the list/tree with avl_remove().
++ *
++ * and finally when the list is being destroyed
++ *
++ * 3. Use avl_destroy_nodes() to quickly process/free up any remaining nodes.
++ *    Note that once you use avl_destroy_nodes(), you can no longer
++ *    use any routine except avl_destroy_nodes() and avl_destoy().
++ *
++ * 4. Use avl_destroy() to destroy the AVL tree itself.
++ *
++ * Any locking for multiple thread access is up to the user to provide, just
++ * as is needed for any linked list implementation.
++ */
++
++
++/*
++ * Type used for the root of the AVL tree.
++ */
++typedef struct avl_tree avl_tree_t;
++
++/*
++ * The data nodes in the AVL tree must have a field of this type.
++ */
++typedef struct avl_node avl_node_t;
++
++/*
++ * An opaque type used to locate a position in the tree where a node
++ * would be inserted.
++ */
++typedef uintptr_t avl_index_t;
++
++
++/*
++ * Direction constants used for avl_nearest().
++ */
++#define	AVL_BEFORE	(0)
++#define	AVL_AFTER	(1)
++
++
++/*
++ * Prototypes
++ *
++ * Where not otherwise mentioned, "void *" arguments are a pointer to the
++ * user data structure which must contain a field of type avl_node_t.
++ *
++ * Also assume the user data structures looks like:
++ *	stuct my_type {
++ *		...
++ *		avl_node_t	my_link;
++ *		...
++ *	};
++ */
++
++/*
++ * Initialize an AVL tree. Arguments are:
++ *
++ * tree   - the tree to be initialized
++ * compar - function to compare two nodes, it must return exactly: -1, 0, or +1
++ *          -1 for <, 0 for ==, and +1 for >
++ * size   - the value of sizeof(struct my_type)
++ * offset - the value of OFFSETOF(struct my_type, my_link)
++ */
++extern void avl_create(avl_tree_t *tree,
++	int (*compar) (const void *, const void *), size_t size, size_t offset);
++
++
++/*
++ * Find a node with a matching value in the tree. Returns the matching node
++ * found. If not found, it returns NULL and then if "where" is not NULL it sets
++ * "where" for use with avl_insert() or avl_nearest().
++ *
++ * node   - node that has the value being looked for
++ * where  - position for use with avl_nearest() or avl_insert(), may be NULL
++ */
++extern void *avl_find(avl_tree_t *tree, const void *node, avl_index_t *where);
++
++/*
++ * Insert a node into the tree.
++ *
++ * node   - the node to insert
++ * where  - position as returned from avl_find()
++ */
++extern void avl_insert(avl_tree_t *tree, void *node, avl_index_t where);
++
++/*
++ * Insert "new_data" in "tree" in the given "direction" either after
++ * or before the data "here".
++ *
++ * This might be usefull for avl clients caching recently accessed
++ * data to avoid doing avl_find() again for insertion.
++ *
++ * new_data	- new data to insert
++ * here		- existing node in "tree"
++ * direction	- either AVL_AFTER or AVL_BEFORE the data "here".
++ */
++extern void avl_insert_here(avl_tree_t *tree, void *new_data, void *here,
++    int direction);
++
++
++/*
++ * Return the first or last valued node in the tree. Will return NULL
++ * if the tree is empty.
++ *
++ */
++extern void *avl_first(avl_tree_t *tree);
++extern void *avl_last(avl_tree_t *tree);
++
++
++/*
++ * Return the next or previous valued node in the tree.
++ * AVL_NEXT() will return NULL if at the last node.
++ * AVL_PREV() will return NULL if at the first node.
++ *
++ * node   - the node from which the next or previous node is found
++ */
++#define	AVL_NEXT(tree, node)	avl_walk(tree, node, AVL_AFTER)
++#define	AVL_PREV(tree, node)	avl_walk(tree, node, AVL_BEFORE)
++
++
++/*
++ * Find the node with the nearest value either greater or less than
++ * the value from a previous avl_find(). Returns the node or NULL if
++ * there isn't a matching one.
++ *
++ * where     - position as returned from avl_find()
++ * direction - either AVL_BEFORE or AVL_AFTER
++ *
++ * EXAMPLE get the greatest node that is less than a given value:
++ *
++ *	avl_tree_t *tree;
++ *	struct my_data look_for_value = {....};
++ *	struct my_data *node;
++ *	struct my_data *less;
++ *	avl_index_t where;
++ *
++ *	node = avl_find(tree, &look_for_value, &where);
++ *	if (node != NULL)
++ *		less = AVL_PREV(tree, node);
++ *	else
++ *		less = avl_nearest(tree, where, AVL_BEFORE);
++ */
++extern void *avl_nearest(avl_tree_t *tree, avl_index_t where, int direction);
++
++
++/*
++ * Add a single node to the tree.
++ * The node must not be in the tree, and it must not
++ * compare equal to any other node already in the tree.
++ *
++ * node   - the node to add
++ */
++extern void avl_add(avl_tree_t *tree, void *node);
++
++
++/*
++ * Remove a single node from the tree.  The node must be in the tree.
++ *
++ * node   - the node to remove
++ */
++extern void avl_remove(avl_tree_t *tree, void *node);
++
++/*
++ * Reinsert a node only if its order has changed relative to its nearest
++ * neighbors. To optimize performance avl_update_lt() checks only the previous
++ * node and avl_update_gt() checks only the next node. Use avl_update_lt() and
++ * avl_update_gt() only if you know the direction in which the order of the
++ * node may change.
++ */
++extern boolean_t avl_update(avl_tree_t *, void *);
++extern boolean_t avl_update_lt(avl_tree_t *, void *);
++extern boolean_t avl_update_gt(avl_tree_t *, void *);
++
++/*
++ * Return the number of nodes in the tree
++ */
++extern ulong_t avl_numnodes(avl_tree_t *tree);
++
++/*
++ * Return B_TRUE if there are zero nodes in the tree, B_FALSE otherwise.
++ */
++extern boolean_t avl_is_empty(avl_tree_t *tree);
++
++/*
++ * Used to destroy any remaining nodes in a tree. The cookie argument should
++ * be initialized to NULL before the first call. Returns a node that has been
++ * removed from the tree and may be free()'d. Returns NULL when the tree is
++ * empty.
++ *
++ * Once you call avl_destroy_nodes(), you can only continuing calling it and
++ * finally avl_destroy(). No other AVL routines will be valid.
++ *
++ * cookie - a "void *" used to save state between calls to avl_destroy_nodes()
++ *
++ * EXAMPLE:
++ *	avl_tree_t *tree;
++ *	struct my_data *node;
++ *	void *cookie;
++ *
++ *	cookie = NULL;
++ *	while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
++ *		free(node);
++ *	avl_destroy(tree);
++ */
++extern void *avl_destroy_nodes(avl_tree_t *tree, void **cookie);
++
++
++/*
++ * Final destroy of an AVL tree. Arguments are:
++ *
++ * tree   - the empty tree to destroy
++ */
++extern void avl_destroy(avl_tree_t *tree);
++
++
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _AVL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/avl_impl.h linux-3.2.33-go/include/zfs/sys/avl_impl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/avl_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/avl_impl.h	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,164 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License, Version 1.0 only
++ * (the "License").  You may not use this file except in compliance
++ * with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_AVL_IMPL_H
++#define	_AVL_IMPL_H
++
++
++
++/*
++ * This is a private header file.  Applications should not directly include
++ * this file.
++ */
++
++#include <sys/types.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++
++/*
++ * generic AVL tree implementation for kernel use
++ *
++ * There are 5 pieces of information stored for each node in an AVL tree
++ *
++ * 	pointer to less than child
++ * 	pointer to greater than child
++ * 	a pointer to the parent of this node
++ *	an indication  [0/1]  of which child I am of my parent
++ * 	a "balance" (-1, 0, +1)  indicating which child tree is taller
++ *
++ * Since they only need 3 bits, the last two fields are packed into the
++ * bottom bits of the parent pointer on 64 bit machines to save on space.
++ */
++
++#ifndef _LP64
++
++struct avl_node {
++	struct avl_node *avl_child[2];	/* left/right children */
++	struct avl_node *avl_parent;	/* this node's parent */
++	unsigned short avl_child_index;	/* my index in parent's avl_child[] */
++	short avl_balance;		/* balance value: -1, 0, +1 */
++};
++
++#define	AVL_XPARENT(n)		((n)->avl_parent)
++#define	AVL_SETPARENT(n, p)	((n)->avl_parent = (p))
++
++#define	AVL_XCHILD(n)		((n)->avl_child_index)
++#define	AVL_SETCHILD(n, c)	((n)->avl_child_index = (unsigned short)(c))
++
++#define	AVL_XBALANCE(n)		((n)->avl_balance)
++#define	AVL_SETBALANCE(n, b)	((n)->avl_balance = (short)(b))
++
++#else /* _LP64 */
++
++/*
++ * for 64 bit machines, avl_pcb contains parent pointer, balance and child_index
++ * values packed in the following manner:
++ *
++ * |63                                  3|        2        |1          0 |
++ * |-------------------------------------|-----------------|-------------|
++ * |      avl_parent hi order bits       | avl_child_index | avl_balance |
++ * |                                     |                 |     + 1     |
++ * |-------------------------------------|-----------------|-------------|
++ *
++ */
++struct avl_node {
++	struct avl_node *avl_child[2];	/* left/right children nodes */
++	uintptr_t avl_pcb;		/* parent, child_index, balance */
++};
++
++/*
++ * macros to extract/set fields in avl_pcb
++ *
++ * pointer to the parent of the current node is the high order bits
++ */
++#define	AVL_XPARENT(n)		((struct avl_node *)((n)->avl_pcb & ~7))
++#define	AVL_SETPARENT(n, p)						\
++	((n)->avl_pcb = (((n)->avl_pcb & 7) | (uintptr_t)(p)))
++
++/*
++ * index of this node in its parent's avl_child[]: bit #2
++ */
++#define	AVL_XCHILD(n)		(((n)->avl_pcb >> 2) & 1)
++#define	AVL_SETCHILD(n, c)						\
++	((n)->avl_pcb = (uintptr_t)(((n)->avl_pcb & ~4) | ((c) << 2)))
++
++/*
++ * balance indication for a node, lowest 2 bits. A valid balance is
++ * -1, 0, or +1, and is encoded by adding 1 to the value to get the
++ * unsigned values of 0, 1, 2.
++ */
++#define	AVL_XBALANCE(n)		((int)(((n)->avl_pcb & 3) - 1))
++#define	AVL_SETBALANCE(n, b)						\
++	((n)->avl_pcb = (uintptr_t)((((n)->avl_pcb & ~3) | ((b) + 1))))
++
++#endif /* _LP64 */
++
++
++
++/*
++ * switch between a node and data pointer for a given tree
++ * the value of "o" is tree->avl_offset
++ */
++#define	AVL_NODE2DATA(n, o)	((void *)((uintptr_t)(n) - (o)))
++#define	AVL_DATA2NODE(d, o)	((struct avl_node *)((uintptr_t)(d) + (o)))
++
++
++
++/*
++ * macros used to create/access an avl_index_t
++ */
++#define	AVL_INDEX2NODE(x)	((avl_node_t *)((x) & ~1))
++#define	AVL_INDEX2CHILD(x)	((x) & 1)
++#define	AVL_MKINDEX(n, c)	((avl_index_t)(n) | (c))
++
++
++/*
++ * The tree structure. The fields avl_root, avl_compar, and avl_offset come
++ * first since they are needed for avl_find().  We want them to fit into
++ * a single 64 byte cache line to make avl_find() as fast as possible.
++ */
++struct avl_tree {
++	struct avl_node *avl_root;	/* root node in tree */
++	int (*avl_compar)(const void *, const void *);
++	size_t avl_offset;		/* offsetof(type, avl_link_t field) */
++	ulong_t avl_numnodes;		/* number of nodes in the tree */
++	size_t avl_size;		/* sizeof user type struct */
++};
++
++
++/*
++ * This will only by used via AVL_NEXT() or AVL_PREV()
++ */
++extern void *avl_walk(struct avl_tree *, void *, int);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _AVL_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/bplist.h linux-3.2.33-go/include/zfs/sys/bplist.h
+--- linux-3.2.33-go.orig/include/zfs/sys/bplist.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/bplist.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,57 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_BPLIST_H
++#define	_SYS_BPLIST_H
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++typedef struct bplist_entry {
++	blkptr_t	bpe_blk;
++	list_node_t	bpe_node;
++} bplist_entry_t;
++
++typedef struct bplist {
++	kmutex_t	bpl_lock;
++	list_t		bpl_list;
++} bplist_t;
++
++typedef int bplist_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
++
++void bplist_create(bplist_t *bpl);
++void bplist_destroy(bplist_t *bpl);
++void bplist_append(bplist_t *bpl, const blkptr_t *bp);
++void bplist_iterate(bplist_t *bpl, bplist_itor_t *func,
++    void *arg, dmu_tx_t *tx);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_BPLIST_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/bpobj.h linux-3.2.33-go/include/zfs/sys/bpobj.h
+--- linux-3.2.33-go.orig/include/zfs/sys/bpobj.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/bpobj.h	2012-11-16 23:25:34.344039393 +0100
+@@ -0,0 +1,91 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_BPOBJ_H
++#define	_SYS_BPOBJ_H
++
++#include <sys/dmu.h>
++#include <sys/spa.h>
++#include <sys/txg.h>
++#include <sys/zio.h>
++#include <sys/zfs_context.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++typedef struct bpobj_phys {
++	/*
++	 * This is the bonus buffer for the dead lists.  The object's
++	 * contents is an array of bpo_entries blkptr_t's, representing
++	 * a total of bpo_bytes physical space.
++	 */
++	uint64_t	bpo_num_blkptrs;
++	uint64_t	bpo_bytes;
++	uint64_t	bpo_comp;
++	uint64_t	bpo_uncomp;
++	uint64_t	bpo_subobjs;
++	uint64_t	bpo_num_subobjs;
++} bpobj_phys_t;
++
++#define	BPOBJ_SIZE_V0	(2 * sizeof (uint64_t))
++#define	BPOBJ_SIZE_V1	(4 * sizeof (uint64_t))
++
++typedef struct bpobj {
++	kmutex_t	bpo_lock;
++	objset_t	*bpo_os;
++	uint64_t	bpo_object;
++	int		bpo_epb;
++	uint8_t		bpo_havecomp;
++	uint8_t		bpo_havesubobj;
++	bpobj_phys_t	*bpo_phys;
++	dmu_buf_t	*bpo_dbuf;
++	dmu_buf_t	*bpo_cached_dbuf;
++} bpobj_t;
++
++typedef int bpobj_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
++
++uint64_t bpobj_alloc(objset_t *mos, int blocksize, dmu_tx_t *tx);
++void bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx);
++
++int bpobj_open(bpobj_t *bpo, objset_t *mos, uint64_t object);
++void bpobj_close(bpobj_t *bpo);
++
++int bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx);
++int bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *, dmu_tx_t *);
++int bpobj_iterate_dbg(bpobj_t *bpo, uint64_t *itorp, blkptr_t *bp);
++
++void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx);
++void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx);
++
++int bpobj_space(bpobj_t *bpo,
++    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
++int bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,
++    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_BPOBJ_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dbuf.h linux-3.2.33-go/include/zfs/sys/dbuf.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dbuf.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dbuf.h	2012-11-16 23:25:34.344039393 +0100
+@@ -0,0 +1,372 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_DBUF_H
++#define	_SYS_DBUF_H
++
++#include <sys/dmu.h>
++#include <sys/spa.h>
++#include <sys/txg.h>
++#include <sys/zio.h>
++#include <sys/arc.h>
++#include <sys/zfs_context.h>
++#include <sys/refcount.h>
++#include <sys/zrlock.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++#define	IN_DMU_SYNC 2
++
++/*
++ * define flags for dbuf_read
++ */
++
++#define	DB_RF_MUST_SUCCEED	(1 << 0)
++#define	DB_RF_CANFAIL		(1 << 1)
++#define	DB_RF_HAVESTRUCT	(1 << 2)
++#define	DB_RF_NOPREFETCH	(1 << 3)
++#define	DB_RF_NEVERWAIT		(1 << 4)
++#define	DB_RF_CACHED		(1 << 5)
++
++/*
++ * The simplified state transition diagram for dbufs looks like:
++ *
++ *		+----> READ ----+
++ *		|		|
++ *		|		V
++ *  (alloc)-->UNCACHED	     CACHED-->EVICTING-->(free)
++ *		|		^	 ^
++ *		|		|	 |
++ *		+----> FILL ----+	 |
++ *		|			 |
++ *		|			 |
++ *		+--------> NOFILL -------+
++ */
++typedef enum dbuf_states {
++	DB_UNCACHED,
++	DB_FILL,
++	DB_NOFILL,
++	DB_READ,
++	DB_CACHED,
++	DB_EVICTING
++} dbuf_states_t;
++
++struct dnode;
++struct dmu_tx;
++
++/*
++ * level = 0 means the user data
++ * level = 1 means the single indirect block
++ * etc.
++ */
++
++struct dmu_buf_impl;
++
++typedef enum override_states {
++	DR_NOT_OVERRIDDEN,
++	DR_IN_DMU_SYNC,
++	DR_OVERRIDDEN
++} override_states_t;
++
++typedef struct dbuf_dirty_record {
++	/* link on our parents dirty list */
++	list_node_t dr_dirty_node;
++
++	/* transaction group this data will sync in */
++	uint64_t dr_txg;
++
++	/* zio of outstanding write IO */
++	zio_t *dr_zio;
++
++	/* pointer back to our dbuf */
++	struct dmu_buf_impl *dr_dbuf;
++
++	/* pointer to next dirty record */
++	struct dbuf_dirty_record *dr_next;
++
++	/* pointer to parent dirty record */
++	struct dbuf_dirty_record *dr_parent;
++
++	union dirty_types {
++		struct dirty_indirect {
++
++			/* protect access to list */
++			kmutex_t dr_mtx;
++
++			/* Our list of dirty children */
++			list_t dr_children;
++		} di;
++		struct dirty_leaf {
++
++			/*
++			 * dr_data is set when we dirty the buffer
++			 * so that we can retain the pointer even if it
++			 * gets COW'd in a subsequent transaction group.
++			 */
++			arc_buf_t *dr_data;
++			blkptr_t dr_overridden_by;
++			override_states_t dr_override_state;
++			uint8_t dr_copies;
++		} dl;
++	} dt;
++} dbuf_dirty_record_t;
++
++typedef struct dmu_buf_impl {
++	/*
++	 * The following members are immutable, with the exception of
++	 * db.db_data, which is protected by db_mtx.
++	 */
++
++	/* the publicly visible structure */
++	dmu_buf_t db;
++
++	/* the objset we belong to */
++	struct objset *db_objset;
++
++	/*
++	 * handle to safely access the dnode we belong to (NULL when evicted)
++	 */
++	struct dnode_handle *db_dnode_handle;
++
++	/*
++	 * our parent buffer; if the dnode points to us directly,
++	 * db_parent == db_dnode_handle->dnh_dnode->dn_dbuf
++	 * only accessed by sync thread ???
++	 * (NULL when evicted)
++	 * May change from NULL to non-NULL under the protection of db_mtx
++	 * (see dbuf_check_blkptr())
++	 */
++	struct dmu_buf_impl *db_parent;
++
++	/*
++	 * link for hash table of all dmu_buf_impl_t's
++	 */
++	struct dmu_buf_impl *db_hash_next;
++
++	/* our block number */
++	uint64_t db_blkid;
++
++	/*
++	 * Pointer to the blkptr_t which points to us. May be NULL if we
++	 * don't have one yet. (NULL when evicted)
++	 */
++	blkptr_t *db_blkptr;
++
++	/*
++	 * Our indirection level.  Data buffers have db_level==0.
++	 * Indirect buffers which point to data buffers have
++	 * db_level==1. etc.  Buffers which contain dnodes have
++	 * db_level==0, since the dnodes are stored in a file.
++	 */
++	uint8_t db_level;
++
++	/* db_mtx protects the members below */
++	kmutex_t db_mtx;
++
++	/*
++	 * Current state of the buffer
++	 */
++	dbuf_states_t db_state;
++
++	/*
++	 * Refcount accessed by dmu_buf_{hold,rele}.
++	 * If nonzero, the buffer can't be destroyed.
++	 * Protected by db_mtx.
++	 */
++	refcount_t db_holds;
++
++	/* buffer holding our data */
++	arc_buf_t *db_buf;
++
++	kcondvar_t db_changed;
++	dbuf_dirty_record_t *db_data_pending;
++
++	/* pointer to most recent dirty record for this buffer */
++	dbuf_dirty_record_t *db_last_dirty;
++
++	/*
++	 * Our link on the owner dnodes's dn_dbufs list.
++	 * Protected by its dn_dbufs_mtx.
++	 */
++	list_node_t db_link;
++
++	/* Data which is unique to data (leaf) blocks: */
++
++	/* stuff we store for the user (see dmu_buf_set_user) */
++	void *db_user_ptr;
++	void **db_user_data_ptr_ptr;
++	dmu_buf_evict_func_t *db_evict_func;
++
++	uint8_t db_immediate_evict;
++	uint8_t db_freed_in_flight;
++
++	uint8_t db_dirtycnt;
++} dmu_buf_impl_t;
++
++/* Note: the dbuf hash table is exposed only for the mdb module */
++#define	DBUF_MUTEXES 256
++#define	DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
++typedef struct dbuf_hash_table {
++	uint64_t hash_table_mask;
++	dmu_buf_impl_t **hash_table;
++	kmutex_t hash_mutexes[DBUF_MUTEXES];
++} dbuf_hash_table_t;
++
++
++uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset);
++
++void dbuf_create_bonus(struct dnode *dn);
++int dbuf_spill_set_blksz(dmu_buf_t *db, uint64_t blksz, dmu_tx_t *tx);
++
++void dbuf_rm_spill(struct dnode *dn, dmu_tx_t *tx);
++
++dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
++dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
++    void *tag);
++int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create,
++    void *tag, dmu_buf_impl_t **dbp);
++
++void dbuf_prefetch(struct dnode *dn, uint64_t blkid);
++
++void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
++uint64_t dbuf_refcount(dmu_buf_impl_t *db);
++
++void dbuf_rele(dmu_buf_impl_t *db, void *tag);
++void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag);
++
++dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid);
++
++int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
++void dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
++void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx);
++void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
++void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
++void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
++void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
++dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
++arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db);
++
++void dbuf_clear(dmu_buf_impl_t *db);
++void dbuf_evict(dmu_buf_impl_t *db);
++
++void dbuf_unoverride(dbuf_dirty_record_t *dr);
++void dbuf_sync_list(list_t *list, dmu_tx_t *tx);
++void dbuf_release_bp(dmu_buf_impl_t *db);
++
++void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
++    struct dmu_tx *);
++
++void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);
++
++#define	DB_DNODE(_db)		((_db)->db_dnode_handle->dnh_dnode)
++#define	DB_DNODE_LOCK(_db)	((_db)->db_dnode_handle->dnh_zrlock)
++#define	DB_DNODE_ENTER(_db)	(zrl_add(&DB_DNODE_LOCK(_db)))
++#define	DB_DNODE_EXIT(_db)	(zrl_remove(&DB_DNODE_LOCK(_db)))
++#define	DB_DNODE_HELD(_db)	(!zrl_is_zero(&DB_DNODE_LOCK(_db)))
++#define	DB_GET_SPA(_spa_p, _db) {		\
++	dnode_t *__dn;				\
++	DB_DNODE_ENTER(_db);			\
++	__dn = DB_DNODE(_db);			\
++	*(_spa_p) = __dn->dn_objset->os_spa;	\
++	DB_DNODE_EXIT(_db);			\
++}
++#define	DB_GET_OBJSET(_os_p, _db) {		\
++	dnode_t *__dn;				\
++	DB_DNODE_ENTER(_db);			\
++	__dn = DB_DNODE(_db);			\
++	*(_os_p) = __dn->dn_objset;		\
++	DB_DNODE_EXIT(_db);			\
++}
++
++void dbuf_init(void);
++void dbuf_fini(void);
++
++boolean_t dbuf_is_metadata(dmu_buf_impl_t *db);
++
++#define	DBUF_IS_METADATA(_db)	\
++	(dbuf_is_metadata(_db))
++
++#define	DBUF_GET_BUFC_TYPE(_db)	\
++	(DBUF_IS_METADATA(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
++
++#define	DBUF_IS_CACHEABLE(_db)						\
++	((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL ||		\
++	(DBUF_IS_METADATA(_db) &&					\
++	((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
++
++#define	DBUF_IS_L2CACHEABLE(_db)					\
++	((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL ||	\
++	(DBUF_IS_METADATA(_db) &&					\
++	((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
++
++#ifdef ZFS_DEBUG
++
++/*
++ * There should be a ## between the string literal and fmt, to make it
++ * clear that we're joining two strings together, but gcc does not
++ * support that preprocessor token.
++ */
++#define	dprintf_dbuf(dbuf, fmt, ...) do { \
++	if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
++	char __db_buf[32]; \
++	uint64_t __db_obj = (dbuf)->db.db_object; \
++	if (__db_obj == DMU_META_DNODE_OBJECT) \
++		(void) strcpy(__db_buf, "mdn"); \
++	else \
++		(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
++		    (u_longlong_t)__db_obj); \
++	dprintf_ds((dbuf)->db_objset->os_dsl_dataset, \
++	    "obj=%s lvl=%u blkid=%lld " fmt, \
++	    __db_buf, (dbuf)->db_level, \
++	    (u_longlong_t)(dbuf)->db_blkid, __VA_ARGS__); \
++	} \
++_NOTE(CONSTCOND) } while (0)
++
++#define	dprintf_dbuf_bp(db, bp, fmt, ...) do {				\
++	if (zfs_flags & ZFS_DEBUG_DPRINTF) {				\
++	char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_PUSHPAGE);	\
++	sprintf_blkptr(__blkbuf, bp);					\
++	dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf);		\
++	kmem_free(__blkbuf, BP_SPRINTF_LEN);				\
++	}								\
++_NOTE(CONSTCOND) } while (0)
++
++#define	DBUF_VERIFY(db)	dbuf_verify(db)
++
++#else
++
++#define	dprintf_dbuf(db, fmt, ...)
++#define	dprintf_dbuf_bp(db, bp, fmt, ...)
++#define	DBUF_VERIFY(db)
++
++#endif
++
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_DBUF_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/ddt.h linux-3.2.33-go/include/zfs/sys/ddt.h
+--- linux-3.2.33-go.orig/include/zfs/sys/ddt.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/ddt.h	2012-11-16 23:25:34.339039449 +0100
+@@ -0,0 +1,246 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef _SYS_DDT_H
++#define	_SYS_DDT_H
++
++#include <sys/sysmacros.h>
++#include <sys/types.h>
++#include <sys/fs/zfs.h>
++#include <sys/zio.h>
++#include <sys/dmu.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * On-disk DDT formats, in the desired search order (newest version first).
++ */
++enum ddt_type {
++	DDT_TYPE_ZAP = 0,
++	DDT_TYPES
++};
++
++/*
++ * DDT classes, in the desired search order (highest replication level first).
++ */
++enum ddt_class {
++	DDT_CLASS_DITTO = 0,
++	DDT_CLASS_DUPLICATE,
++	DDT_CLASS_UNIQUE,
++	DDT_CLASSES
++};
++
++#define	DDT_TYPE_CURRENT		0
++
++#define	DDT_COMPRESS_BYTEORDER_MASK	0x80
++#define	DDT_COMPRESS_FUNCTION_MASK	0x7f
++
++/*
++ * On-disk ddt entry:  key (name) and physical storage (value).
++ */
++typedef struct ddt_key {
++	zio_cksum_t	ddk_cksum;	/* 256-bit block checksum */
++	uint64_t	ddk_prop;	/* LSIZE, PSIZE, compression */
++} ddt_key_t;
++
++/*
++ * ddk_prop layout:
++ *
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ *	|   0	|   0	|   0	| comp	|     PSIZE	|     LSIZE	|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ */
++#define	DDK_GET_LSIZE(ddk)	\
++	BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
++#define	DDK_SET_LSIZE(ddk, x)	\
++	BF64_SET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
++
++#define	DDK_GET_PSIZE(ddk)	\
++	BF64_GET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
++#define	DDK_SET_PSIZE(ddk, x)	\
++	BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
++
++#define	DDK_GET_COMPRESS(ddk)		BF64_GET((ddk)->ddk_prop, 32, 8)
++#define	DDK_SET_COMPRESS(ddk, x)	BF64_SET((ddk)->ddk_prop, 32, 8, x)
++
++#define	DDT_KEY_WORDS	(sizeof (ddt_key_t) / sizeof (uint64_t))
++
++typedef struct ddt_phys {
++	dva_t		ddp_dva[SPA_DVAS_PER_BP];
++	uint64_t	ddp_refcnt;
++	uint64_t	ddp_phys_birth;
++} ddt_phys_t;
++
++enum ddt_phys_type {
++	DDT_PHYS_DITTO = 0,
++	DDT_PHYS_SINGLE = 1,
++	DDT_PHYS_DOUBLE = 2,
++	DDT_PHYS_TRIPLE = 3,
++	DDT_PHYS_TYPES
++};
++
++/*
++ * In-core ddt entry
++ */
++struct ddt_entry {
++	ddt_key_t	dde_key;
++	ddt_phys_t	dde_phys[DDT_PHYS_TYPES];
++	zio_t		*dde_lead_zio[DDT_PHYS_TYPES];
++	void		*dde_repair_data;
++	enum ddt_type	dde_type;
++	enum ddt_class	dde_class;
++	uint8_t		dde_loading;
++	uint8_t		dde_loaded;
++	kcondvar_t	dde_cv;
++	avl_node_t	dde_node;
++};
++
++/*
++ * In-core ddt
++ */
++struct ddt {
++	kmutex_t	ddt_lock;
++	avl_tree_t	ddt_tree;
++	avl_tree_t	ddt_repair_tree;
++	enum zio_checksum ddt_checksum;
++	spa_t		*ddt_spa;
++	objset_t	*ddt_os;
++	uint64_t	ddt_stat_object;
++	uint64_t	ddt_object[DDT_TYPES][DDT_CLASSES];
++	ddt_histogram_t	ddt_histogram[DDT_TYPES][DDT_CLASSES];
++	ddt_histogram_t	ddt_histogram_cache[DDT_TYPES][DDT_CLASSES];
++	ddt_object_t	ddt_object_stats[DDT_TYPES][DDT_CLASSES];
++	avl_node_t	ddt_node;
++};
++
++/*
++ * In-core and on-disk bookmark for DDT walks
++ */
++typedef struct ddt_bookmark {
++	uint64_t	ddb_class;
++	uint64_t	ddb_type;
++	uint64_t	ddb_checksum;
++	uint64_t	ddb_cursor;
++} ddt_bookmark_t;
++
++/*
++ * Ops vector to access a specific DDT object type.
++ */
++typedef struct ddt_ops {
++	char ddt_op_name[32];
++	int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
++	    boolean_t prehash);
++	int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
++	int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
++	void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
++	    ddt_entry_t *dde);
++	int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
++	    dmu_tx_t *tx);
++	int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
++	    dmu_tx_t *tx);
++	int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde,
++	    uint64_t *walk);
++	int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
++} ddt_ops_t;
++
++#define	DDT_NAMELEN	80
++
++extern void ddt_object_name(ddt_t *ddt, enum ddt_type type,
++    enum ddt_class class, char *name);
++extern int ddt_object_walk(ddt_t *ddt, enum ddt_type type,
++    enum ddt_class class, uint64_t *walk, ddt_entry_t *dde);
++extern int ddt_object_count(ddt_t *ddt, enum ddt_type type,
++    enum ddt_class class, uint64_t *count);
++extern int ddt_object_info(ddt_t *ddt, enum ddt_type type,
++    enum ddt_class class, dmu_object_info_t *);
++extern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type,
++    enum ddt_class class);
++
++extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
++    uint64_t txg);
++extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
++    const ddt_phys_t *ddp, blkptr_t *bp);
++
++extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
++
++extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
++extern void ddt_phys_clear(ddt_phys_t *ddp);
++extern void ddt_phys_addref(ddt_phys_t *ddp);
++extern void ddt_phys_decref(ddt_phys_t *ddp);
++extern void ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp,
++    uint64_t txg);
++extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
++extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
++
++extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
++
++extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
++extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
++extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh);
++extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo);
++extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh);
++extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
++
++extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
++extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
++
++extern int ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde,
++    ddt_phys_t *ddp_willref);
++extern int ddt_ditto_copies_present(ddt_entry_t *dde);
++
++extern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len);
++extern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len);
++
++extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
++extern void ddt_enter(ddt_t *ddt);
++extern void ddt_exit(ddt_t *ddt);
++extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
++extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
++extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
++
++extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
++    const blkptr_t *bp);
++
++extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
++extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);
++
++extern int ddt_entry_compare(const void *x1, const void *x2);
++
++extern void ddt_create(spa_t *spa);
++extern int ddt_load(spa_t *spa);
++extern void ddt_unload(spa_t *spa);
++extern void ddt_sync(spa_t *spa, uint64_t txg);
++extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
++extern int ddt_object_update(ddt_t *ddt, enum ddt_type type,
++    enum ddt_class class, ddt_entry_t *dde, dmu_tx_t *tx);
++
++extern const ddt_ops_t ddt_zap_ops;
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_DDT_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dmu.h linux-3.2.33-go/include/zfs/sys/dmu.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dmu.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dmu.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,749 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
++ */
++
++/* Portions Copyright 2010 Robert Milkowski */
++
++#ifndef	_SYS_DMU_H
++#define	_SYS_DMU_H
++
++/*
++ * This file describes the interface that the DMU provides for its
++ * consumers.
++ *
++ * The DMU also interacts with the SPA.  That interface is described in
++ * dmu_spa.h.
++ */
++
++#include <sys/inttypes.h>
++#include <sys/types.h>
++#include <sys/param.h>
++#include <sys/cred.h>
++#include <sys/time.h>
++#include <sys/uio.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct page;
++struct vnode;
++struct spa;
++struct zilog;
++struct zio;
++struct blkptr;
++struct zap_cursor;
++struct dsl_dataset;
++struct dsl_pool;
++struct dnode;
++struct drr_begin;
++struct drr_end;
++struct zbookmark;
++struct spa;
++struct nvlist;
++struct arc_buf;
++struct zio_prop;
++struct sa_handle;
++
++typedef struct objset objset_t;
++typedef struct dmu_tx dmu_tx_t;
++typedef struct dsl_dir dsl_dir_t;
++
++typedef enum dmu_object_type {
++	DMU_OT_NONE,
++	/* general: */
++	DMU_OT_OBJECT_DIRECTORY,	/* ZAP */
++	DMU_OT_OBJECT_ARRAY,		/* UINT64 */
++	DMU_OT_PACKED_NVLIST,		/* UINT8 (XDR by nvlist_pack/unpack) */
++	DMU_OT_PACKED_NVLIST_SIZE,	/* UINT64 */
++	DMU_OT_BPOBJ,			/* UINT64 */
++	DMU_OT_BPOBJ_HDR,		/* UINT64 */
++	/* spa: */
++	DMU_OT_SPACE_MAP_HEADER,	/* UINT64 */
++	DMU_OT_SPACE_MAP,		/* UINT64 */
++	/* zil: */
++	DMU_OT_INTENT_LOG,		/* UINT64 */
++	/* dmu: */
++	DMU_OT_DNODE,			/* DNODE */
++	DMU_OT_OBJSET,			/* OBJSET */
++	/* dsl: */
++	DMU_OT_DSL_DIR,			/* UINT64 */
++	DMU_OT_DSL_DIR_CHILD_MAP,	/* ZAP */
++	DMU_OT_DSL_DS_SNAP_MAP,		/* ZAP */
++	DMU_OT_DSL_PROPS,		/* ZAP */
++	DMU_OT_DSL_DATASET,		/* UINT64 */
++	/* zpl: */
++	DMU_OT_ZNODE,			/* ZNODE */
++	DMU_OT_OLDACL,			/* Old ACL */
++	DMU_OT_PLAIN_FILE_CONTENTS,	/* UINT8 */
++	DMU_OT_DIRECTORY_CONTENTS,	/* ZAP */
++	DMU_OT_MASTER_NODE,		/* ZAP */
++	DMU_OT_UNLINKED_SET,		/* ZAP */
++	/* zvol: */
++	DMU_OT_ZVOL,			/* UINT8 */
++	DMU_OT_ZVOL_PROP,		/* ZAP */
++	/* other; for testing only! */
++	DMU_OT_PLAIN_OTHER,		/* UINT8 */
++	DMU_OT_UINT64_OTHER,		/* UINT64 */
++	DMU_OT_ZAP_OTHER,		/* ZAP */
++	/* new object types: */
++	DMU_OT_ERROR_LOG,		/* ZAP */
++	DMU_OT_SPA_HISTORY,		/* UINT8 */
++	DMU_OT_SPA_HISTORY_OFFSETS,	/* spa_his_phys_t */
++	DMU_OT_POOL_PROPS,		/* ZAP */
++	DMU_OT_DSL_PERMS,		/* ZAP */
++	DMU_OT_ACL,			/* ACL */
++	DMU_OT_SYSACL,			/* SYSACL */
++	DMU_OT_FUID,			/* FUID table (Packed NVLIST UINT8) */
++	DMU_OT_FUID_SIZE,		/* FUID table size UINT64 */
++	DMU_OT_NEXT_CLONES,		/* ZAP */
++	DMU_OT_SCAN_QUEUE,		/* ZAP */
++	DMU_OT_USERGROUP_USED,		/* ZAP */
++	DMU_OT_USERGROUP_QUOTA,		/* ZAP */
++	DMU_OT_USERREFS,		/* ZAP */
++	DMU_OT_DDT_ZAP,			/* ZAP */
++	DMU_OT_DDT_STATS,		/* ZAP */
++	DMU_OT_SA,			/* System attr */
++	DMU_OT_SA_MASTER_NODE,		/* ZAP */
++	DMU_OT_SA_ATTR_REGISTRATION,	/* ZAP */
++	DMU_OT_SA_ATTR_LAYOUTS,		/* ZAP */
++	DMU_OT_SCAN_XLATE,		/* ZAP */
++	DMU_OT_DEDUP,			/* fake dedup BP from ddt_bp_create() */
++	DMU_OT_DEADLIST,		/* ZAP */
++	DMU_OT_DEADLIST_HDR,		/* UINT64 */
++	DMU_OT_DSL_CLONES,		/* ZAP */
++	DMU_OT_BPOBJ_SUBOBJ,		/* UINT64 */
++	DMU_OT_NUMTYPES
++} dmu_object_type_t;
++
++typedef enum dmu_objset_type {
++	DMU_OST_NONE,
++	DMU_OST_META,
++	DMU_OST_ZFS,
++	DMU_OST_ZVOL,
++	DMU_OST_OTHER,			/* For testing only! */
++	DMU_OST_ANY,			/* Be careful! */
++	DMU_OST_NUMTYPES
++} dmu_objset_type_t;
++
++void byteswap_uint64_array(void *buf, size_t size);
++void byteswap_uint32_array(void *buf, size_t size);
++void byteswap_uint16_array(void *buf, size_t size);
++void byteswap_uint8_array(void *buf, size_t size);
++void zap_byteswap(void *buf, size_t size);
++void zfs_oldacl_byteswap(void *buf, size_t size);
++void zfs_acl_byteswap(void *buf, size_t size);
++void zfs_znode_byteswap(void *buf, size_t size);
++
++#define	DS_FIND_SNAPSHOTS	(1<<0)
++#define	DS_FIND_CHILDREN	(1<<1)
++
++/*
++ * The maximum number of bytes that can be accessed as part of one
++ * operation, including metadata.
++ */
++#define	DMU_MAX_ACCESS (10<<20) /* 10MB */
++#define	DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */
++
++#define	DMU_USERUSED_OBJECT	(-1ULL)
++#define	DMU_GROUPUSED_OBJECT	(-2ULL)
++#define	DMU_DEADLIST_OBJECT	(-3ULL)
++
++/*
++ * artificial blkids for bonus buffer and spill blocks
++ */
++#define	DMU_BONUS_BLKID		(-1ULL)
++#define	DMU_SPILL_BLKID		(-2ULL)
++/*
++ * Public routines to create, destroy, open, and close objsets.
++ */
++int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
++int dmu_objset_own(const char *name, dmu_objset_type_t type,
++    boolean_t readonly, void *tag, objset_t **osp);
++void dmu_objset_rele(objset_t *os, void *tag);
++void dmu_objset_disown(objset_t *os, void *tag);
++int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
++
++int dmu_objset_evict_dbufs(objset_t *os);
++int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
++    void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
++int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
++    uint64_t flags);
++int dmu_objset_destroy(const char *name, boolean_t defer);
++int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer, char *);
++int dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
++    struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd);
++int dmu_objset_rename(const char *name, const char *newname,
++    boolean_t recursive);
++int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
++    int flags);
++void dmu_objset_byteswap(void *buf, size_t size);
++
++typedef struct dmu_buf {
++	uint64_t db_object;		/* object that this buffer is part of */
++	uint64_t db_offset;		/* byte offset in this object */
++	uint64_t db_size;		/* size of buffer in bytes */
++	void *db_data;			/* data in buffer */
++} dmu_buf_t;
++
++typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
++
++/*
++ * The names of zap entries in the DIRECTORY_OBJECT of the MOS.
++ */
++#define	DMU_POOL_DIRECTORY_OBJECT	1
++#define	DMU_POOL_CONFIG			"config"
++#define	DMU_POOL_ROOT_DATASET		"root_dataset"
++#define	DMU_POOL_SYNC_BPOBJ		"sync_bplist"
++#define	DMU_POOL_ERRLOG_SCRUB		"errlog_scrub"
++#define	DMU_POOL_ERRLOG_LAST		"errlog_last"
++#define	DMU_POOL_SPARES			"spares"
++#define	DMU_POOL_DEFLATE		"deflate"
++#define	DMU_POOL_HISTORY		"history"
++#define	DMU_POOL_PROPS			"pool_props"
++#define	DMU_POOL_L2CACHE		"l2cache"
++#define	DMU_POOL_TMP_USERREFS		"tmp_userrefs"
++#define	DMU_POOL_DDT			"DDT-%s-%s-%s"
++#define	DMU_POOL_DDT_STATS		"DDT-statistics"
++#define	DMU_POOL_CREATION_VERSION	"creation_version"
++#define	DMU_POOL_SCAN			"scan"
++#define	DMU_POOL_FREE_BPOBJ		"free_bpobj"
++
++/*
++ * Allocate an object from this objset.  The range of object numbers
++ * available is (0, DN_MAX_OBJECT).  Object 0 is the meta-dnode.
++ *
++ * The transaction must be assigned to a txg.  The newly allocated
++ * object will be "held" in the transaction (ie. you can modify the
++ * newly allocated object in this transaction).
++ *
++ * dmu_object_alloc() chooses an object and returns it in *objectp.
++ *
++ * dmu_object_claim() allocates a specific object number.  If that
++ * number is already allocated, it fails and returns EEXIST.
++ *
++ * Return 0 on success, or ENOSPC or EEXIST as specified above.
++ */
++uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot,
++    int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
++int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
++    int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
++int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
++    int blocksize, dmu_object_type_t bonustype, int bonuslen);
++
++/*
++ * Free an object from this objset.
++ *
++ * The object's data will be freed as well (ie. you don't need to call
++ * dmu_free(object, 0, -1, tx)).
++ *
++ * The object need not be held in the transaction.
++ *
++ * If there are any holds on this object's buffers (via dmu_buf_hold()),
++ * or tx holds on the object (via dmu_tx_hold_object()), you can not
++ * free it; it fails and returns EBUSY.
++ *
++ * If the object is not allocated, it fails and returns ENOENT.
++ *
++ * Return 0 on success, or EBUSY or ENOENT as specified above.
++ */
++int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx);
++
++/*
++ * Find the next allocated or free object.
++ *
++ * The objectp parameter is in-out.  It will be updated to be the next
++ * object which is allocated.  Ignore objects which have not been
++ * modified since txg.
++ *
++ * XXX Can only be called on a objset with no dirty data.
++ *
++ * Returns 0 on success, or ENOENT if there are no more objects.
++ */
++int dmu_object_next(objset_t *os, uint64_t *objectp,
++    boolean_t hole, uint64_t txg);
++
++/*
++ * Set the data blocksize for an object.
++ *
++ * The object cannot have any blocks allcated beyond the first.  If
++ * the first block is allocated already, the new size must be greater
++ * than the current block size.  If these conditions are not met,
++ * ENOTSUP will be returned.
++ *
++ * Returns 0 on success, or EBUSY if there are any holds on the object
++ * contents, or ENOTSUP as described above.
++ */
++int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size,
++    int ibs, dmu_tx_t *tx);
++
++/*
++ * Set the checksum property on a dnode.  The new checksum algorithm will
++ * apply to all newly written blocks; existing blocks will not be affected.
++ */
++void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
++    dmu_tx_t *tx);
++
++/*
++ * Set the compress property on a dnode.  The new compression algorithm will
++ * apply to all newly written blocks; existing blocks will not be affected.
++ */
++void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
++    dmu_tx_t *tx);
++
++/*
++ * Decide how to write a block: checksum, compression, number of copies, etc.
++ */
++#define	WP_NOFILL	0x1
++#define	WP_DMU_SYNC	0x2
++#define	WP_SPILL	0x4
++
++void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp,
++    struct zio_prop *zp);
++/*
++ * The bonus data is accessed more or less like a regular buffer.
++ * You must dmu_bonus_hold() to get the buffer, which will give you a
++ * dmu_buf_t with db_offset==-1ULL, and db_size = the size of the bonus
++ * data.  As with any normal buffer, you must call dmu_buf_read() to
++ * read db_data, dmu_buf_will_dirty() before modifying it, and the
++ * object must be held in an assigned transaction before calling
++ * dmu_buf_will_dirty.  You may use dmu_buf_set_user() on the bonus
++ * buffer as well.  You must release your hold with dmu_buf_rele().
++ */
++int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
++int dmu_bonus_max(void);
++int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
++int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *);
++dmu_object_type_t dmu_get_bonustype(dmu_buf_t *);
++int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
++
++/*
++ * Special spill buffer support used by "SA" framework
++ */
++
++int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
++int dmu_spill_hold_by_dnode(struct dnode *dn, uint32_t flags,
++    void *tag, dmu_buf_t **dbp);
++int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
++
++/*
++ * Obtain the DMU buffer from the specified object which contains the
++ * specified offset.  dmu_buf_hold() puts a "hold" on the buffer, so
++ * that it will remain in memory.  You must release the hold with
++ * dmu_buf_rele().  You musn't access the dmu_buf_t after releasing your
++ * hold.  You must have a hold on any dmu_buf_t* you pass to the DMU.
++ *
++ * You must call dmu_buf_read, dmu_buf_will_dirty, or dmu_buf_will_fill
++ * on the returned buffer before reading or writing the buffer's
++ * db_data.  The comments for those routines describe what particular
++ * operations are valid after calling them.
++ *
++ * The object number must be a valid, allocated object number.
++ */
++int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
++    void *tag, dmu_buf_t **, int flags);
++void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
++void dmu_buf_rele(dmu_buf_t *db, void *tag);
++uint64_t dmu_buf_refcount(dmu_buf_t *db);
++
++/*
++ * dmu_buf_hold_array holds the DMU buffers which contain all bytes in a
++ * range of an object.  A pointer to an array of dmu_buf_t*'s is
++ * returned (in *dbpp).
++ *
++ * dmu_buf_rele_array releases the hold on an array of dmu_buf_t*'s, and
++ * frees the array.  The hold on the array of buffers MUST be released
++ * with dmu_buf_rele_array.  You can NOT release the hold on each buffer
++ * individually with dmu_buf_rele.
++ */
++int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
++    uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
++void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
++
++/*
++ * Returns NULL on success, or the existing user ptr if it's already
++ * been set.
++ *
++ * user_ptr is for use by the user and can be obtained via dmu_buf_get_user().
++ *
++ * user_data_ptr_ptr should be NULL, or a pointer to a pointer which
++ * will be set to db->db_data when you are allowed to access it.  Note
++ * that db->db_data (the pointer) can change when you do dmu_buf_read(),
++ * dmu_buf_tryupgrade(), dmu_buf_will_dirty(), or dmu_buf_will_fill().
++ * *user_data_ptr_ptr will be set to the new value when it changes.
++ *
++ * If non-NULL, pageout func will be called when this buffer is being
++ * excised from the cache, so that you can clean up the data structure
++ * pointed to by user_ptr.
++ *
++ * dmu_evict_user() will call the pageout func for all buffers in a
++ * objset with a given pageout func.
++ */
++void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr, void *user_data_ptr_ptr,
++    dmu_buf_evict_func_t *pageout_func);
++/*
++ * set_user_ie is the same as set_user, but request immediate eviction
++ * when hold count goes to zero.
++ */
++void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr,
++    void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func);
++void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr,
++    void *user_ptr, void *user_data_ptr_ptr,
++    dmu_buf_evict_func_t *pageout_func);
++void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func);
++
++/*
++ * Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set.
++ */
++void *dmu_buf_get_user(dmu_buf_t *db);
++
++/*
++ * Indicate that you are going to modify the buffer's data (db_data).
++ *
++ * The transaction (tx) must be assigned to a txg (ie. you've called
++ * dmu_tx_assign()).  The buffer's object must be held in the tx
++ * (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
++ */
++void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
++
++/*
++ * Tells if the given dbuf is freeable.
++ */
++boolean_t dmu_buf_freeable(dmu_buf_t *);
++
++/*
++ * You must create a transaction, then hold the objects which you will
++ * (or might) modify as part of this transaction.  Then you must assign
++ * the transaction to a transaction group.  Once the transaction has
++ * been assigned, you can modify buffers which belong to held objects as
++ * part of this transaction.  You can't modify buffers before the
++ * transaction has been assigned; you can't modify buffers which don't
++ * belong to objects which this transaction holds; you can't hold
++ * objects once the transaction has been assigned.  You may hold an
++ * object which you are going to free (with dmu_object_free()), but you
++ * don't have to.
++ *
++ * You can abort the transaction before it has been assigned.
++ *
++ * Note that you may hold buffers (with dmu_buf_hold) at any time,
++ * regardless of transaction state.
++ */
++
++#define	DMU_NEW_OBJECT	(-1ULL)
++#define	DMU_OBJECT_END	(-1ULL)
++
++dmu_tx_t *dmu_tx_create(objset_t *os);
++void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
++void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
++    uint64_t len);
++void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name);
++void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
++void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
++void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
++void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
++void dmu_tx_abort(dmu_tx_t *tx);
++int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
++void dmu_tx_wait(dmu_tx_t *tx);
++void dmu_tx_commit(dmu_tx_t *tx);
++
++/*
++ * To register a commit callback, dmu_tx_callback_register() must be called.
++ *
++ * dcb_data is a pointer to caller private data that is passed on as a
++ * callback parameter. The caller is responsible for properly allocating and
++ * freeing it.
++ *
++ * When registering a callback, the transaction must be already created, but
++ * it cannot be committed or aborted. It can be assigned to a txg or not.
++ *
++ * The callback will be called after the transaction has been safely written
++ * to stable storage and will also be called if the dmu_tx is aborted.
++ * If there is any error which prevents the transaction from being committed to
++ * disk, the callback will be called with a value of error != 0.
++ */
++typedef void dmu_tx_callback_func_t(void *dcb_data, int error);
++
++void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
++    void *dcb_data);
++
++/*
++ * Free up the data blocks for a defined range of a file.  If size is
++ * zero, the range from offset to end-of-file is freed.
++ */
++int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
++	uint64_t size, dmu_tx_t *tx);
++int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset,
++	uint64_t size);
++int dmu_free_object(objset_t *os, uint64_t object);
++
++/*
++ * Convenience functions.
++ *
++ * Canfail routines will return 0 on success, or an errno if there is a
++ * nonrecoverable I/O error.
++ */
++#define	DMU_READ_PREFETCH	0 /* prefetch */
++#define	DMU_READ_NO_PREFETCH	1 /* don't prefetch */
++int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
++	void *buf, uint32_t flags);
++void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
++	const void *buf, dmu_tx_t *tx);
++void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
++	dmu_tx_t *tx);
++#ifdef _KERNEL
++#include <linux/blkdev_compat.h>
++int dmu_read_req(objset_t *os, uint64_t object, struct request *req);
++int dmu_write_req(objset_t *os, uint64_t object, struct request *req,
++	dmu_tx_t *tx);
++int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
++int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
++	dmu_tx_t *tx);
++int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size,
++	dmu_tx_t *tx);
++#endif
++struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
++void dmu_return_arcbuf(struct arc_buf *buf);
++void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
++    dmu_tx_t *tx);
++int dmu_xuio_init(struct xuio *uio, int niov);
++void dmu_xuio_fini(struct xuio *uio);
++int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off,
++    size_t n);
++int dmu_xuio_cnt(struct xuio *uio);
++struct arc_buf *dmu_xuio_arcbuf(struct xuio *uio, int i);
++void dmu_xuio_clear(struct xuio *uio, int i);
++void xuio_stat_wbuf_copied(void);
++void xuio_stat_wbuf_nocopy(void);
++
++extern int zfs_prefetch_disable;
++
++/*
++ * Asynchronously try to read in the data.
++ */
++void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset,
++    uint64_t len);
++
++typedef struct dmu_object_info {
++	/* All sizes are in bytes unless otherwise indicated. */
++	uint32_t doi_data_block_size;
++	uint32_t doi_metadata_block_size;
++	dmu_object_type_t doi_type;
++	dmu_object_type_t doi_bonus_type;
++	uint64_t doi_bonus_size;
++	uint8_t doi_indirection;		/* 2 = dnode->indirect->data */
++	uint8_t doi_checksum;
++	uint8_t doi_compress;
++	uint8_t doi_pad[5];
++	uint64_t doi_physical_blocks_512;	/* data + metadata, 512b blks */
++	uint64_t doi_max_offset;
++	uint64_t doi_fill_count;		/* number of non-empty blocks */
++} dmu_object_info_t;
++
++typedef void arc_byteswap_func_t(void *buf, size_t size);
++
++typedef struct dmu_object_type_info {
++	arc_byteswap_func_t	*ot_byteswap;
++	boolean_t		ot_metadata;
++	char			*ot_name;
++} dmu_object_type_info_t;
++
++extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
++
++/*
++ * Get information on a DMU object.
++ *
++ * Return 0 on success or ENOENT if object is not allocated.
++ *
++ * If doi is NULL, just indicates whether the object exists.
++ */
++int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
++void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
++void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
++void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
++    u_longlong_t *nblk512);
++
++typedef struct dmu_objset_stats {
++	uint64_t dds_num_clones; /* number of clones of this */
++	uint64_t dds_creation_txg;
++	uint64_t dds_guid;
++	dmu_objset_type_t dds_type;
++	uint8_t dds_is_snapshot;
++	uint8_t dds_inconsistent;
++	char dds_origin[MAXNAMELEN];
++} dmu_objset_stats_t;
++
++/*
++ * Get stats on a dataset.
++ */
++void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
++
++/*
++ * Add entries to the nvlist for all the objset's properties.  See
++ * zfs_prop_table[] and zfs(1m) for details on the properties.
++ */
++void dmu_objset_stats(objset_t *os, struct nvlist *nv);
++
++/*
++ * Get the space usage statistics for statvfs().
++ *
++ * refdbytes is the amount of space "referenced" by this objset.
++ * availbytes is the amount of space available to this objset, taking
++ * into account quotas & reservations, assuming that no other objsets
++ * use the space first.  These values correspond to the 'referenced' and
++ * 'available' properties, described in the zfs(1m) manpage.
++ *
++ * usedobjs and availobjs are the number of objects currently allocated,
++ * and available.
++ */
++void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
++    uint64_t *usedobjsp, uint64_t *availobjsp);
++
++/*
++ * The fsid_guid is a 56-bit ID that can change to avoid collisions.
++ * (Contrast with the ds_guid which is a 64-bit ID that will never
++ * change, so there is a small probability that it will collide.)
++ */
++uint64_t dmu_objset_fsid_guid(objset_t *os);
++
++/*
++ * Get the [cm]time for an objset's snapshot dir
++ */
++timestruc_t dmu_objset_snap_cmtime(objset_t *os);
++
++int dmu_objset_is_snapshot(objset_t *os);
++
++extern struct spa *dmu_objset_spa(objset_t *os);
++extern struct zilog *dmu_objset_zil(objset_t *os);
++extern struct dsl_pool *dmu_objset_pool(objset_t *os);
++extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
++extern void dmu_objset_name(objset_t *os, char *buf);
++extern dmu_objset_type_t dmu_objset_type(objset_t *os);
++extern uint64_t dmu_objset_id(objset_t *os);
++extern uint64_t dmu_objset_syncprop(objset_t *os);
++extern uint64_t dmu_objset_logbias(objset_t *os);
++extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
++    uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
++extern int dmu_snapshot_id(objset_t *os, const char *snapname, uint64_t *idp);
++extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
++    int maxlen, boolean_t *conflict);
++extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
++    uint64_t *idp, uint64_t *offp);
++
++typedef int objset_used_cb_t(dmu_object_type_t bonustype,
++    void *bonus, uint64_t *userp, uint64_t *groupp);
++extern void dmu_objset_register_type(dmu_objset_type_t ost,
++    objset_used_cb_t *cb);
++extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
++extern void *dmu_objset_get_user(objset_t *os);
++
++/*
++ * Return the txg number for the given assigned transaction.
++ */
++uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
++
++/*
++ * Synchronous write.
++ * If a parent zio is provided this function initiates a write on the
++ * provided buffer as a child of the parent zio.
++ * In the absence of a parent zio, the write is completed synchronously.
++ * At write completion, blk is filled with the bp of the written block.
++ * Note that while the data covered by this function will be on stable
++ * storage when the write completes this new data does not become a
++ * permanent part of the file until the associated transaction commits.
++ */
++
++/*
++ * {zfs,zvol,ztest}_get_done() args
++ */
++typedef struct zgd {
++	struct zilog	*zgd_zilog;
++	struct blkptr	*zgd_bp;
++	dmu_buf_t	*zgd_db;
++	struct rl	*zgd_rl;
++	void		*zgd_private;
++} zgd_t;
++
++typedef void dmu_sync_cb_t(zgd_t *arg, int error);
++int dmu_sync(struct zio *zio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd);
++
++/*
++ * Find the next hole or data block in file starting at *off
++ * Return found offset in *off. Return ESRCH for end of file.
++ */
++int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole,
++    uint64_t *off);
++
++/*
++ * Initial setup and final teardown.
++ */
++extern void dmu_init(void);
++extern void dmu_fini(void);
++
++typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
++    uint64_t object, uint64_t offset, int len);
++void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
++    dmu_traverse_cb_t cb, void *arg);
++
++int dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
++    int outfd, struct vnode *vp, offset_t *off);
++int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorign,
++    uint64_t *sizep);
++
++typedef struct dmu_recv_cookie {
++	/*
++	 * This structure is opaque!
++	 *
++	 * If logical and real are different, we are recving the stream
++	 * into the "real" temporary clone, and then switching it with
++	 * the "logical" target.
++	 */
++	struct dsl_dataset *drc_logical_ds;
++	struct dsl_dataset *drc_real_ds;
++	struct drr_begin *drc_drrb;
++	char *drc_tosnap;
++	char *drc_top_ds;
++	boolean_t drc_newfs;
++	boolean_t drc_force;
++	struct avl_tree *drc_guid_to_ds_map;
++} dmu_recv_cookie_t;
++
++int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *,
++    boolean_t force, objset_t *origin, dmu_recv_cookie_t *);
++int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp,
++    int cleanup_fd, uint64_t *action_handlep);
++int dmu_recv_end(dmu_recv_cookie_t *drc);
++
++int dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp,
++    offset_t *off);
++
++/* CRC64 table */
++#define	ZFS_CRC64_POLY	0xC96C5795D7870F42ULL	/* ECMA-182, reflected form */
++extern uint64_t zfs_crc64_table[256];
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_DMU_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dmu_impl.h linux-3.2.33-go/include/zfs/sys/dmu_impl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dmu_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dmu_impl.h	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,274 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
++ */
++
++#ifndef _SYS_DMU_IMPL_H
++#define	_SYS_DMU_IMPL_H
++
++#include <sys/txg_impl.h>
++#include <sys/zio.h>
++#include <sys/dnode.h>
++#include <sys/zfs_context.h>
++#include <sys/zfs_ioctl.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * This is the locking strategy for the DMU.  Numbers in parenthesis are
++ * cases that use that lock order, referenced below:
++ *
++ * ARC is self-contained
++ * bplist is self-contained
++ * refcount is self-contained
++ * txg is self-contained (hopefully!)
++ * zst_lock
++ * zf_rwlock
++ *
++ * XXX try to improve evicting path?
++ *
++ * dp_config_rwlock > os_obj_lock > dn_struct_rwlock >
++ * 	dn_dbufs_mtx > hash_mutexes > db_mtx > dd_lock > leafs
++ *
++ * dp_config_rwlock
++ *    must be held before: everything
++ *    protects dd namespace changes
++ *    protects property changes globally
++ *    held from:
++ *    	dsl_dir_open/r:
++ *    	dsl_dir_create_sync/w:
++ *    	dsl_dir_sync_destroy/w:
++ *    	dsl_dir_rename_sync/w:
++ *    	dsl_prop_changed_notify/r:
++ *
++ * os_obj_lock
++ *   must be held before:
++ *   	everything except dp_config_rwlock
++ *   protects os_obj_next
++ *   held from:
++ *   	dmu_object_alloc: dn_dbufs_mtx, db_mtx, hash_mutexes, dn_struct_rwlock
++ *
++ * dn_struct_rwlock
++ *   must be held before:
++ *   	everything except dp_config_rwlock and os_obj_lock
++ *   protects structure of dnode (eg. nlevels)
++ *   	db_blkptr can change when syncing out change to nlevels
++ *   	dn_maxblkid
++ *   	dn_nlevels
++ *   	dn_*blksz*
++ *   	phys nlevels, maxblkid, physical blkptr_t's (?)
++ *   held from:
++ *   	callers of dbuf_read_impl, dbuf_hold[_impl], dbuf_prefetch
++ *   	dmu_object_info_from_dnode: dn_dirty_mtx (dn_datablksz)
++ *   	dmu_tx_count_free:
++ *   	dbuf_read_impl: db_mtx, dmu_zfetch()
++ *   	dmu_zfetch: zf_rwlock/r, zst_lock, dbuf_prefetch()
++ *   	dbuf_new_size: db_mtx
++ *   	dbuf_dirty: db_mtx
++ *	dbuf_findbp: (callers, phys? - the real need)
++ *	dbuf_create: dn_dbufs_mtx, hash_mutexes, db_mtx (phys?)
++ *	dbuf_prefetch: dn_dirty_mtx, hash_mutexes, db_mtx, dn_dbufs_mtx
++ *	dbuf_hold_impl: hash_mutexes, db_mtx, dn_dbufs_mtx, dbuf_findbp()
++ *	dnode_sync/w (increase_indirection): db_mtx (phys)
++ *	dnode_set_blksz/w: dn_dbufs_mtx (dn_*blksz*)
++ *	dnode_new_blkid/w: (dn_maxblkid)
++ *	dnode_free_range/w: dn_dirty_mtx (dn_maxblkid)
++ *	dnode_next_offset: (phys)
++ *
++ * dn_dbufs_mtx
++ *    must be held before:
++ *    	db_mtx, hash_mutexes
++ *    protects:
++ *    	dn_dbufs
++ *    	dn_evicted
++ *    held from:
++ *    	dmu_evict_user: db_mtx (dn_dbufs)
++ *    	dbuf_free_range: db_mtx (dn_dbufs)
++ *    	dbuf_remove_ref: db_mtx, callees:
++ *    		dbuf_hash_remove: hash_mutexes, db_mtx
++ *    	dbuf_create: hash_mutexes, db_mtx (dn_dbufs)
++ *    	dnode_set_blksz: (dn_dbufs)
++ *
++ * hash_mutexes (global)
++ *   must be held before:
++ *   	db_mtx
++ *   protects dbuf_hash_table (global) and db_hash_next
++ *   held from:
++ *   	dbuf_find: db_mtx
++ *   	dbuf_hash_insert: db_mtx
++ *   	dbuf_hash_remove: db_mtx
++ *
++ * db_mtx (meta-leaf)
++ *   must be held before:
++ *   	dn_mtx, dn_dirty_mtx, dd_lock (leaf mutexes)
++ *   protects:
++ *   	db_state
++ * 	db_holds
++ * 	db_buf
++ * 	db_changed
++ * 	db_data_pending
++ * 	db_dirtied
++ * 	db_link
++ * 	db_dirty_node (??)
++ * 	db_dirtycnt
++ * 	db_d.*
++ * 	db.*
++ *   held from:
++ * 	dbuf_dirty: dn_mtx, dn_dirty_mtx
++ * 	dbuf_dirty->dsl_dir_willuse_space: dd_lock
++ * 	dbuf_dirty->dbuf_new_block->dsl_dataset_block_freeable: dd_lock
++ * 	dbuf_undirty: dn_dirty_mtx (db_d)
++ * 	dbuf_write_done: dn_dirty_mtx (db_state)
++ * 	dbuf_*
++ * 	dmu_buf_update_user: none (db_d)
++ * 	dmu_evict_user: none (db_d) (maybe can eliminate)
++ *   	dbuf_find: none (db_holds)
++ *   	dbuf_hash_insert: none (db_holds)
++ *   	dmu_buf_read_array_impl: none (db_state, db_changed)
++ *   	dmu_sync: none (db_dirty_node, db_d)
++ *   	dnode_reallocate: none (db)
++ *
++ * dn_mtx (leaf)
++ *   protects:
++ *   	dn_dirty_dbufs
++ *   	dn_ranges
++ *   	phys accounting
++ * 	dn_allocated_txg
++ * 	dn_free_txg
++ * 	dn_assigned_txg
++ * 	dd_assigned_tx
++ * 	dn_notxholds
++ * 	dn_dirtyctx
++ * 	dn_dirtyctx_firstset
++ * 	(dn_phys copy fields?)
++ * 	(dn_phys contents?)
++ *   held from:
++ *   	dnode_*
++ *   	dbuf_dirty: none
++ *   	dbuf_sync: none (phys accounting)
++ *   	dbuf_undirty: none (dn_ranges, dn_dirty_dbufs)
++ *   	dbuf_write_done: none (phys accounting)
++ *   	dmu_object_info_from_dnode: none (accounting)
++ *   	dmu_tx_commit: none
++ *   	dmu_tx_hold_object_impl: none
++ *   	dmu_tx_try_assign: dn_notxholds(cv)
++ *   	dmu_tx_unassign: none
++ *
++ * dd_lock
++ *    must be held before:
++ *      ds_lock
++ *      ancestors' dd_lock
++ *    protects:
++ *    	dd_prop_cbs
++ *    	dd_sync_*
++ *    	dd_used_bytes
++ *    	dd_tempreserved
++ *    	dd_space_towrite
++ *    	dd_myname
++ *    	dd_phys accounting?
++ *    held from:
++ *    	dsl_dir_*
++ *    	dsl_prop_changed_notify: none (dd_prop_cbs)
++ *    	dsl_prop_register: none (dd_prop_cbs)
++ *    	dsl_prop_unregister: none (dd_prop_cbs)
++ *    	dsl_dataset_block_freeable: none (dd_sync_*)
++ *
++ * os_lock (leaf)
++ *   protects:
++ *   	os_dirty_dnodes
++ *   	os_free_dnodes
++ *   	os_dnodes
++ *   	os_downgraded_dbufs
++ *   	dn_dirtyblksz
++ *   	dn_dirty_link
++ *   held from:
++ *   	dnode_create: none (os_dnodes)
++ *   	dnode_destroy: none (os_dnodes)
++ *   	dnode_setdirty: none (dn_dirtyblksz, os_*_dnodes)
++ *   	dnode_free: none (dn_dirtyblksz, os_*_dnodes)
++ *
++ * ds_lock
++ *    protects:
++ *    	ds_objset
++ *    	ds_open_refcount
++ *    	ds_snapname
++ *    	ds_phys accounting
++ *	ds_phys userrefs zapobj
++ *	ds_reserved
++ *    held from:
++ *    	dsl_dataset_*
++ *
++ * dr_mtx (leaf)
++ *    protects:
++ *	dr_children
++ *    held from:
++ *	dbuf_dirty
++ *	dbuf_undirty
++ *	dbuf_sync_indirect
++ *	dnode_new_blkid
++ */
++
++struct objset;
++struct dmu_pool;
++
++typedef struct dmu_xuio {
++	int next;
++	int cnt;
++	struct arc_buf **bufs;
++	iovec_t *iovp;
++} dmu_xuio_t;
++
++/*
++ * The list of data whose inclusion in a send stream can be pending from
++ * one call to backup_cb to another.  Multiple calls to dump_free() and
++ * dump_freeobjects() can be aggregated into a single DRR_FREE or
++ * DRR_FREEOBJECTS replay record.
++ */
++typedef enum {
++	PENDING_NONE,
++	PENDING_FREE,
++	PENDING_FREEOBJECTS
++} dmu_pendop_t;
++
++typedef struct dmu_sendarg {
++	list_node_t dsa_link;
++	dmu_replay_record_t *dsa_drr;
++	vnode_t *dsa_vp;
++	int dsa_outfd;
++	proc_t *dsa_proc;
++	offset_t *dsa_off;
++	objset_t *dsa_os;
++	zio_cksum_t dsa_zc;
++	uint64_t dsa_toguid;
++	int dsa_err;
++	dmu_pendop_t dsa_pending_op;
++} dmu_sendarg_t;
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_DMU_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dmu_objset.h linux-3.2.33-go/include/zfs/sys/dmu_objset.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dmu_objset.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dmu_objset.h	2012-11-16 23:25:34.339039449 +0100
+@@ -0,0 +1,182 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/* Portions Copyright 2010 Robert Milkowski */
++
++#ifndef	_SYS_DMU_OBJSET_H
++#define	_SYS_DMU_OBJSET_H
++
++#include <sys/spa.h>
++#include <sys/arc.h>
++#include <sys/txg.h>
++#include <sys/zfs_context.h>
++#include <sys/dnode.h>
++#include <sys/zio.h>
++#include <sys/zil.h>
++#include <sys/sa.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++extern krwlock_t os_lock;
++
++struct dsl_dataset;
++struct dmu_tx;
++
++#define	OBJSET_PHYS_SIZE 2048
++#define	OBJSET_OLD_PHYS_SIZE 1024
++
++#define	OBJSET_BUF_HAS_USERUSED(buf) \
++	(arc_buf_size(buf) > OBJSET_OLD_PHYS_SIZE)
++
++#define	OBJSET_FLAG_USERACCOUNTING_COMPLETE	(1ULL<<0)
++
++typedef struct objset_phys {
++	dnode_phys_t os_meta_dnode;
++	zil_header_t os_zil_header;
++	uint64_t os_type;
++	uint64_t os_flags;
++	char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 -
++	    sizeof (zil_header_t) - sizeof (uint64_t)*2];
++	dnode_phys_t os_userused_dnode;
++	dnode_phys_t os_groupused_dnode;
++} objset_phys_t;
++
++struct objset {
++	/* Immutable: */
++	struct dsl_dataset *os_dsl_dataset;
++	spa_t *os_spa;
++	arc_buf_t *os_phys_buf;
++	objset_phys_t *os_phys;
++	/*
++	 * The following "special" dnodes have no parent and are exempt from
++	 * dnode_move(), but they root their descendents in this objset using
++	 * handles anyway, so that all access to dnodes from dbufs consistently
++	 * uses handles.
++	 */
++	dnode_handle_t os_meta_dnode;
++	dnode_handle_t os_userused_dnode;
++	dnode_handle_t os_groupused_dnode;
++	zilog_t *os_zil;
++
++	/* can change, under dsl_dir's locks: */
++	uint8_t os_checksum;
++	uint8_t os_compress;
++	uint8_t os_copies;
++	uint8_t os_dedup_checksum;
++	uint8_t os_dedup_verify;
++	uint8_t os_logbias;
++	uint8_t os_primary_cache;
++	uint8_t os_secondary_cache;
++	uint8_t os_sync;
++
++	/* no lock needed: */
++	struct dmu_tx *os_synctx; /* XXX sketchy */
++	blkptr_t *os_rootbp;
++	zil_header_t os_zil_header;
++	list_t os_synced_dnodes;
++	uint64_t os_flags;
++
++	/* Protected by os_obj_lock */
++	kmutex_t os_obj_lock;
++	uint64_t os_obj_next;
++
++	/* Protected by os_lock */
++	kmutex_t os_lock;
++	list_t os_dirty_dnodes[TXG_SIZE];
++	list_t os_free_dnodes[TXG_SIZE];
++	list_t os_dnodes;
++	list_t os_downgraded_dbufs;
++
++	/* stuff we store for the user */
++	kmutex_t os_user_ptr_lock;
++	void *os_user_ptr;
++
++	/* SA layout/attribute registration */
++	sa_os_t *os_sa;
++};
++
++#define	DMU_META_OBJSET		0
++#define	DMU_META_DNODE_OBJECT	0
++#define	DMU_OBJECT_IS_SPECIAL(obj) ((int64_t)(obj) <= 0)
++#define	DMU_META_DNODE(os)	((os)->os_meta_dnode.dnh_dnode)
++#define	DMU_USERUSED_DNODE(os)	((os)->os_userused_dnode.dnh_dnode)
++#define	DMU_GROUPUSED_DNODE(os)	((os)->os_groupused_dnode.dnh_dnode)
++
++#define	DMU_OS_IS_L2CACHEABLE(os)				\
++	((os)->os_secondary_cache == ZFS_CACHE_ALL ||		\
++	(os)->os_secondary_cache == ZFS_CACHE_METADATA)
++
++/* called from zpl */
++int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
++int dmu_objset_own(const char *name, dmu_objset_type_t type,
++    boolean_t readonly, void *tag, objset_t **osp);
++void dmu_objset_rele(objset_t *os, void *tag);
++void dmu_objset_disown(objset_t *os, void *tag);
++int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
++
++int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
++    void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
++int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
++    uint64_t flags);
++int dmu_objset_destroy(const char *name, boolean_t defer);
++int dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
++    struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd);
++void dmu_objset_stats(objset_t *os, nvlist_t *nv);
++void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
++void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
++    uint64_t *usedobjsp, uint64_t *availobjsp);
++uint64_t dmu_objset_fsid_guid(objset_t *os);
++int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
++    int flags);
++int dmu_objset_find_spa(spa_t *spa, const char *name,
++    int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);
++int dmu_objset_prefetch(const char *name, void *arg);
++void dmu_objset_byteswap(void *buf, size_t size);
++int dmu_objset_evict_dbufs(objset_t *os);
++timestruc_t dmu_objset_snap_cmtime(objset_t *os);
++
++/* called from dsl */
++void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
++boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg);
++objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
++    blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
++int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
++    objset_t **osp);
++void dmu_objset_evict(objset_t *os);
++void dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx);
++void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx);
++boolean_t dmu_objset_userused_enabled(objset_t *os);
++int dmu_objset_userspace_upgrade(objset_t *os);
++boolean_t dmu_objset_userspace_present(objset_t *os);
++
++void dmu_objset_init(void);
++void dmu_objset_fini(void);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_DMU_OBJSET_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dmu_traverse.h linux-3.2.33-go/include/zfs/sys/dmu_traverse.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dmu_traverse.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dmu_traverse.h	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,64 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_DMU_TRAVERSE_H
++#define	_SYS_DMU_TRAVERSE_H
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/zio.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct dnode_phys;
++struct dsl_dataset;
++struct zilog;
++struct arc_buf;
++
++typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
++    struct arc_buf *pbuf, const zbookmark_t *zb, const struct dnode_phys *dnp,
++    void *arg);
++
++#define	TRAVERSE_PRE			(1<<0)
++#define	TRAVERSE_POST			(1<<1)
++#define	TRAVERSE_PREFETCH_METADATA	(1<<2)
++#define	TRAVERSE_PREFETCH_DATA		(1<<3)
++#define	TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA)
++#define	TRAVERSE_HARD			(1<<4)
++
++/* Special traverse error return value to indicate skipping of children */
++#define	TRAVERSE_VISIT_NO_CHILDREN	-1
++
++int traverse_dataset(struct dsl_dataset *ds,
++    uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
++int traverse_pool(spa_t *spa,
++    uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_DMU_TRAVERSE_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dmu_tx.h linux-3.2.33-go/include/zfs/sys/dmu_tx.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dmu_tx.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dmu_tx.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,176 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_SYS_DMU_TX_H
++#define	_SYS_DMU_TX_H
++
++#include <sys/inttypes.h>
++#include <sys/dmu.h>
++#include <sys/txg.h>
++#include <sys/refcount.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct dmu_buf_impl;
++struct dmu_tx_hold;
++struct dnode_link;
++struct dsl_pool;
++struct dnode;
++struct dsl_dir;
++
++struct dmu_tx {
++	/*
++	 * No synchronization is needed because a tx can only be handled
++	 * by one thread.
++	 */
++	list_t tx_holds; /* list of dmu_tx_hold_t */
++	objset_t *tx_objset;
++	struct dsl_dir *tx_dir;
++	struct dsl_pool *tx_pool;
++	uint64_t tx_txg;
++	uint64_t tx_lastsnap_txg;
++	uint64_t tx_lasttried_txg;
++	txg_handle_t tx_txgh;
++	void *tx_tempreserve_cookie;
++	struct dmu_tx_hold *tx_needassign_txh;
++	list_t tx_callbacks; /* list of dmu_tx_callback_t on this dmu_tx */
++	uint8_t tx_anyobj;
++	int tx_err;
++#ifdef DEBUG_DMU_TX
++	uint64_t tx_space_towrite;
++	uint64_t tx_space_tofree;
++	uint64_t tx_space_tooverwrite;
++	uint64_t tx_space_tounref;
++	refcount_t tx_space_written;
++	refcount_t tx_space_freed;
++#endif
++};
++
++enum dmu_tx_hold_type {
++	THT_NEWOBJECT,
++	THT_WRITE,
++	THT_BONUS,
++	THT_FREE,
++	THT_ZAP,
++	THT_SPACE,
++	THT_SPILL,
++	THT_NUMTYPES
++};
++
++typedef struct dmu_tx_hold {
++	dmu_tx_t *txh_tx;
++	list_node_t txh_node;
++	struct dnode *txh_dnode;
++	uint64_t txh_space_towrite;
++	uint64_t txh_space_tofree;
++	uint64_t txh_space_tooverwrite;
++	uint64_t txh_space_tounref;
++	uint64_t txh_memory_tohold;
++	uint64_t txh_fudge;
++#ifdef DEBUG_DMU_TX
++	enum dmu_tx_hold_type txh_type;
++	uint64_t txh_arg1;
++	uint64_t txh_arg2;
++#endif
++} dmu_tx_hold_t;
++
++typedef struct dmu_tx_callback {
++	list_node_t		dcb_node;    /* linked to tx_callbacks list */
++	dmu_tx_callback_func_t	*dcb_func;   /* caller function pointer */
++	void			*dcb_data;   /* caller private data */
++} dmu_tx_callback_t;
++
++/*
++ * Used for dmu tx kstat.
++ */
++typedef struct dmu_tx_stats {
++	kstat_named_t dmu_tx_assigned;
++	kstat_named_t dmu_tx_delay;
++	kstat_named_t dmu_tx_error;
++	kstat_named_t dmu_tx_suspended;
++	kstat_named_t dmu_tx_group;
++	kstat_named_t dmu_tx_how;
++	kstat_named_t dmu_tx_memory_reserve;
++	kstat_named_t dmu_tx_memory_reclaim;
++	kstat_named_t dmu_tx_memory_inflight;
++	kstat_named_t dmu_tx_dirty_throttle;
++	kstat_named_t dmu_tx_write_limit;
++	kstat_named_t dmu_tx_quota;
++} dmu_tx_stats_t;
++
++extern dmu_tx_stats_t dmu_tx_stats;
++
++#define DMU_TX_STAT_INCR(stat, val) \
++    atomic_add_64(&dmu_tx_stats.stat.value.ui64, (val));
++#define DMU_TX_STAT_BUMP(stat) \
++    DMU_TX_STAT_INCR(stat, 1);
++
++/*
++ * These routines are defined in dmu.h, and are called by the user.
++ */
++dmu_tx_t *dmu_tx_create(objset_t *dd);
++int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
++void dmu_tx_commit(dmu_tx_t *tx);
++void dmu_tx_abort(dmu_tx_t *tx);
++uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
++void dmu_tx_wait(dmu_tx_t *tx);
++
++void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
++    void *dcb_data);
++void dmu_tx_do_callbacks(list_t *cb_list, int error);
++
++/*
++ * These routines are defined in dmu_spa.h, and are called by the SPA.
++ */
++extern dmu_tx_t *dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg);
++
++/*
++ * These routines are only called by the DMU.
++ */
++dmu_tx_t *dmu_tx_create_dd(dsl_dir_t *dd);
++int dmu_tx_is_syncing(dmu_tx_t *tx);
++int dmu_tx_private_ok(dmu_tx_t *tx);
++void dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object);
++void dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta);
++void dmu_tx_dirty_buf(dmu_tx_t *tx, struct dmu_buf_impl *db);
++int dmu_tx_holds(dmu_tx_t *tx, uint64_t object);
++void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space);
++
++#ifdef DEBUG_DMU_TX
++#define	DMU_TX_DIRTY_BUF(tx, db)	dmu_tx_dirty_buf(tx, db)
++#else
++#define	DMU_TX_DIRTY_BUF(tx, db)
++#endif
++
++void dmu_tx_init(void);
++void dmu_tx_fini(void);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_DMU_TX_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dmu_zfetch.h linux-3.2.33-go/include/zfs/sys/dmu_zfetch.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dmu_zfetch.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dmu_zfetch.h	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,76 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_DFETCH_H
++#define	_DFETCH_H
++
++#include <sys/zfs_context.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++extern unsigned long	zfetch_array_rd_sz;
++
++struct dnode;				/* so we can reference dnode */
++
++typedef enum zfetch_dirn {
++	ZFETCH_FORWARD = 1,		/* prefetch increasing block numbers */
++	ZFETCH_BACKWARD	= -1		/* prefetch decreasing block numbers */
++} zfetch_dirn_t;
++
++typedef struct zstream {
++	uint64_t	zst_offset;	/* offset of starting block in range */
++	uint64_t	zst_len;	/* length of range, in blocks */
++	zfetch_dirn_t	zst_direction;	/* direction of prefetch */
++	uint64_t	zst_stride;	/* length of stride, in blocks */
++	uint64_t	zst_ph_offset;	/* prefetch offset, in blocks */
++	uint64_t	zst_cap;	/* prefetch limit (cap), in blocks */
++	kmutex_t	zst_lock;	/* protects stream */
++	clock_t		zst_last;	/* lbolt of last prefetch */
++	avl_node_t	zst_node;	/* embed avl node here */
++} zstream_t;
++
++typedef struct zfetch {
++	krwlock_t	zf_rwlock;	/* protects zfetch structure */
++	list_t		zf_stream;	/* AVL tree of zstream_t's */
++	struct dnode	*zf_dnode;	/* dnode that owns this zfetch */
++	uint32_t	zf_stream_cnt;	/* # of active streams */
++	uint64_t	zf_alloc_fail;	/* # of failed attempts to alloc strm */
++} zfetch_t;
++
++void		zfetch_init(void);
++void		zfetch_fini(void);
++
++void		dmu_zfetch_init(zfetch_t *, struct dnode *);
++void		dmu_zfetch_rele(zfetch_t *);
++void		dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);
++
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _DFETCH_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dnode.h linux-3.2.33-go/include/zfs/sys/dnode.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dnode.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dnode.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,329 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_DNODE_H
++#define	_SYS_DNODE_H
++
++#include <sys/zfs_context.h>
++#include <sys/avl.h>
++#include <sys/spa.h>
++#include <sys/txg.h>
++#include <sys/zio.h>
++#include <sys/refcount.h>
++#include <sys/dmu_zfetch.h>
++#include <sys/zrlock.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * dnode_hold() flags.
++ */
++#define	DNODE_MUST_BE_ALLOCATED	1
++#define	DNODE_MUST_BE_FREE	2
++
++/*
++ * dnode_next_offset() flags.
++ */
++#define	DNODE_FIND_HOLE		1
++#define	DNODE_FIND_BACKWARDS	2
++#define	DNODE_FIND_HAVELOCK	4
++
++/*
++ * Fixed constants.
++ */
++#define	DNODE_SHIFT		9	/* 512 bytes */
++#define	DN_MIN_INDBLKSHIFT	10	/* 1k */
++#define	DN_MAX_INDBLKSHIFT	14	/* 16k */
++#define	DNODE_BLOCK_SHIFT	14	/* 16k */
++#define	DNODE_CORE_SIZE		64	/* 64 bytes for dnode sans blkptrs */
++#define	DN_MAX_OBJECT_SHIFT	48	/* 256 trillion (zfs_fid_t limit) */
++#define	DN_MAX_OFFSET_SHIFT	64	/* 2^64 bytes in a dnode */
++
++/*
++ * dnode id flags
++ *
++ * Note: a file will never ever have its
++ * ids moved from bonus->spill
++ * and only in a crypto environment would it be on spill
++ */
++#define	DN_ID_CHKED_BONUS	0x1
++#define	DN_ID_CHKED_SPILL	0x2
++#define	DN_ID_OLD_EXIST		0x4
++#define	DN_ID_NEW_EXIST		0x8
++
++/*
++ * Derived constants.
++ */
++#define	DNODE_SIZE	(1 << DNODE_SHIFT)
++#define	DN_MAX_NBLKPTR	((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT)
++#define	DN_MAX_BONUSLEN	(DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
++#define	DN_MAX_OBJECT	(1ULL << DN_MAX_OBJECT_SHIFT)
++#define	DN_ZERO_BONUSLEN	(DN_MAX_BONUSLEN + 1)
++#define	DN_KILL_SPILLBLK (1)
++
++#define	DNODES_PER_BLOCK_SHIFT	(DNODE_BLOCK_SHIFT - DNODE_SHIFT)
++#define	DNODES_PER_BLOCK	(1ULL << DNODES_PER_BLOCK_SHIFT)
++#define	DNODES_PER_LEVEL_SHIFT	(DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
++#define	DNODES_PER_LEVEL	(1ULL << DNODES_PER_LEVEL_SHIFT)
++
++/* The +2 here is a cheesy way to round up */
++#define	DN_MAX_LEVELS	(2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \
++	(DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT)))
++
++#define	DN_BONUS(dnp)	((void*)((dnp)->dn_bonus + \
++	(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
++
++#define	DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
++	(dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
++
++#define	EPB(blkshift, typeshift)	(1 << (blkshift - typeshift))
++
++struct dmu_buf_impl;
++struct objset;
++struct zio;
++
++enum dnode_dirtycontext {
++	DN_UNDIRTIED,
++	DN_DIRTY_OPEN,
++	DN_DIRTY_SYNC
++};
++
++/* Is dn_used in bytes?  if not, it's in multiples of SPA_MINBLOCKSIZE */
++#define	DNODE_FLAG_USED_BYTES		(1<<0)
++#define	DNODE_FLAG_USERUSED_ACCOUNTED	(1<<1)
++
++/* Does dnode have a SA spill blkptr in bonus? */
++#define	DNODE_FLAG_SPILL_BLKPTR	(1<<2)
++
++typedef struct dnode_phys {
++	uint8_t dn_type;		/* dmu_object_type_t */
++	uint8_t dn_indblkshift;		/* ln2(indirect block size) */
++	uint8_t dn_nlevels;		/* 1=dn_blkptr->data blocks */
++	uint8_t dn_nblkptr;		/* length of dn_blkptr */
++	uint8_t dn_bonustype;		/* type of data in bonus buffer */
++	uint8_t	dn_checksum;		/* ZIO_CHECKSUM type */
++	uint8_t	dn_compress;		/* ZIO_COMPRESS type */
++	uint8_t dn_flags;		/* DNODE_FLAG_* */
++	uint16_t dn_datablkszsec;	/* data block size in 512b sectors */
++	uint16_t dn_bonuslen;		/* length of dn_bonus */
++	uint8_t dn_pad2[4];
++
++	/* accounting is protected by dn_dirty_mtx */
++	uint64_t dn_maxblkid;		/* largest allocated block ID */
++	uint64_t dn_used;		/* bytes (or sectors) of disk space */
++
++	uint64_t dn_pad3[4];
++
++	blkptr_t dn_blkptr[1];
++	uint8_t dn_bonus[DN_MAX_BONUSLEN - sizeof (blkptr_t)];
++	blkptr_t dn_spill;
++} dnode_phys_t;
++
++typedef struct dnode {
++	/*
++	 * dn_struct_rwlock protects the structure of the dnode,
++	 * including the number of levels of indirection (dn_nlevels),
++	 * dn_maxblkid, and dn_next_*
++	 */
++	krwlock_t dn_struct_rwlock;
++
++	/* Our link on dn_objset->os_dnodes list; protected by os_lock.  */
++	list_node_t dn_link;
++
++	/* immutable: */
++	struct objset *dn_objset;
++	uint64_t dn_object;
++	struct dmu_buf_impl *dn_dbuf;
++	struct dnode_handle *dn_handle;
++	dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */
++
++	/*
++	 * Copies of stuff in dn_phys.  They're valid in the open
++	 * context (eg. even before the dnode is first synced).
++	 * Where necessary, these are protected by dn_struct_rwlock.
++	 */
++	dmu_object_type_t dn_type;	/* object type */
++	uint16_t dn_bonuslen;		/* bonus length */
++	uint8_t dn_bonustype;		/* bonus type */
++	uint8_t dn_nblkptr;		/* number of blkptrs (immutable) */
++	uint8_t dn_checksum;		/* ZIO_CHECKSUM type */
++	uint8_t dn_compress;		/* ZIO_COMPRESS type */
++	uint8_t dn_nlevels;
++	uint8_t dn_indblkshift;
++	uint8_t dn_datablkshift;	/* zero if blksz not power of 2! */
++	uint8_t dn_moved;		/* Has this dnode been moved? */
++	uint16_t dn_datablkszsec;	/* in 512b sectors */
++	uint32_t dn_datablksz;		/* in bytes */
++	uint64_t dn_maxblkid;
++	uint8_t dn_next_nblkptr[TXG_SIZE];
++	uint8_t dn_next_nlevels[TXG_SIZE];
++	uint8_t dn_next_indblkshift[TXG_SIZE];
++	uint8_t dn_next_bonustype[TXG_SIZE];
++	uint8_t dn_rm_spillblk[TXG_SIZE];	/* for removing spill blk */
++	uint16_t dn_next_bonuslen[TXG_SIZE];
++	uint32_t dn_next_blksz[TXG_SIZE];	/* next block size in bytes */
++
++	/* protected by dn_dbufs_mtx; declared here to fill 32-bit hole */
++	uint32_t dn_dbufs_count;	/* count of dn_dbufs */
++
++	/* protected by os_lock: */
++	list_node_t dn_dirty_link[TXG_SIZE];	/* next on dataset's dirty */
++
++	/* protected by dn_mtx: */
++	kmutex_t dn_mtx;
++	list_t dn_dirty_records[TXG_SIZE];
++	avl_tree_t dn_ranges[TXG_SIZE];
++	uint64_t dn_allocated_txg;
++	uint64_t dn_free_txg;
++	uint64_t dn_assigned_txg;
++	kcondvar_t dn_notxholds;
++	enum dnode_dirtycontext dn_dirtyctx;
++	uint8_t *dn_dirtyctx_firstset;		/* dbg: contents meaningless */
++
++	/* protected by own devices */
++	refcount_t dn_tx_holds;
++	refcount_t dn_holds;
++
++	kmutex_t dn_dbufs_mtx;
++	list_t dn_dbufs;		/* descendent dbufs */
++
++	/* protected by dn_struct_rwlock */
++	struct dmu_buf_impl *dn_bonus;	/* bonus buffer dbuf */
++
++	boolean_t dn_have_spill;	/* have spill or are spilling */
++
++	/* parent IO for current sync write */
++	zio_t *dn_zio;
++
++	/* used in syncing context */
++	uint64_t dn_oldused;	/* old phys used bytes */
++	uint64_t dn_oldflags;	/* old phys dn_flags */
++	uint64_t dn_olduid, dn_oldgid;
++	uint64_t dn_newuid, dn_newgid;
++	int dn_id_flags;
++
++	/* holds prefetch structure */
++	struct zfetch	dn_zfetch;
++} dnode_t;
++
++/*
++ * Adds a level of indirection between the dbuf and the dnode to avoid
++ * iterating descendent dbufs in dnode_move(). Handles are not allocated
++ * individually, but as an array of child dnodes in dnode_hold_impl().
++ */
++typedef struct dnode_handle {
++	/* Protects dnh_dnode from modification by dnode_move(). */
++	zrlock_t dnh_zrlock;
++	dnode_t *dnh_dnode;
++} dnode_handle_t;
++
++typedef struct dnode_children {
++	size_t dnc_count;		/* number of children */
++	dnode_handle_t dnc_children[1];	/* sized dynamically */
++} dnode_children_t;
++
++typedef struct free_range {
++	avl_node_t fr_node;
++	uint64_t fr_blkid;
++	uint64_t fr_nblks;
++} free_range_t;
++
++dnode_t *dnode_special_open(struct objset *dd, dnode_phys_t *dnp,
++    uint64_t object, dnode_handle_t *dnh);
++void dnode_special_close(dnode_handle_t *dnh);
++
++void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx);
++void dnode_setbonus_type(dnode_t *dn, dmu_object_type_t, dmu_tx_t *tx);
++void dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx);
++
++int dnode_hold(struct objset *dd, uint64_t object,
++    void *ref, dnode_t **dnp);
++int dnode_hold_impl(struct objset *dd, uint64_t object, int flag,
++    void *ref, dnode_t **dnp);
++boolean_t dnode_add_ref(dnode_t *dn, void *ref);
++void dnode_rele(dnode_t *dn, void *ref);
++void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
++void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
++void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
++void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
++void dnode_free(dnode_t *dn, dmu_tx_t *tx);
++void dnode_byteswap(dnode_phys_t *dnp);
++void dnode_buf_byteswap(void *buf, size_t size);
++void dnode_verify(dnode_t *dn);
++int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
++uint64_t dnode_current_max_length(dnode_t *dn);
++void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
++void dnode_clear_range(dnode_t *dn, uint64_t blkid,
++    uint64_t nblks, dmu_tx_t *tx);
++void dnode_diduse_space(dnode_t *dn, int64_t space);
++void dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx);
++void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t);
++uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid);
++void dnode_init(void);
++void dnode_fini(void);
++int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
++    int minlvl, uint64_t blkfill, uint64_t txg);
++void dnode_evict_dbufs(dnode_t *dn);
++
++#ifdef ZFS_DEBUG
++
++/*
++ * There should be a ## between the string literal and fmt, to make it
++ * clear that we're joining two strings together, but that piece of shit
++ * gcc doesn't support that preprocessor token.
++ */
++#define	dprintf_dnode(dn, fmt, ...) do { \
++	if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
++	char __db_buf[32]; \
++	uint64_t __db_obj = (dn)->dn_object; \
++	if (__db_obj == DMU_META_DNODE_OBJECT) \
++		(void) strcpy(__db_buf, "mdn"); \
++	else \
++		(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
++		    (u_longlong_t)__db_obj);\
++	dprintf_ds((dn)->dn_objset->os_dsl_dataset, "obj=%s " fmt, \
++	    __db_buf, __VA_ARGS__); \
++	} \
++_NOTE(CONSTCOND) } while (0)
++
++#define	DNODE_VERIFY(dn)		dnode_verify(dn)
++#define	FREE_VERIFY(db, start, end, tx)	free_verify(db, start, end, tx)
++
++#else
++
++#define	dprintf_dnode(db, fmt, ...)
++#define	DNODE_VERIFY(dn)
++#define	FREE_VERIFY(db, start, end, tx)
++
++#endif
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_DNODE_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dsl_dataset.h linux-3.2.33-go/include/zfs/sys/dsl_dataset.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dsl_dataset.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dsl_dataset.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,293 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
++ */
++
++#ifndef	_SYS_DSL_DATASET_H
++#define	_SYS_DSL_DATASET_H
++
++#include <sys/dmu.h>
++#include <sys/spa.h>
++#include <sys/txg.h>
++#include <sys/zio.h>
++#include <sys/bplist.h>
++#include <sys/dsl_synctask.h>
++#include <sys/zfs_context.h>
++#include <sys/dsl_deadlist.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct dsl_dataset;
++struct dsl_dir;
++struct dsl_pool;
++
++#define	DS_FLAG_INCONSISTENT	(1ULL<<0)
++#define	DS_IS_INCONSISTENT(ds)	\
++	((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
++/*
++ * NB: nopromote can not yet be set, but we want support for it in this
++ * on-disk version, so that we don't need to upgrade for it later.  It
++ * will be needed when we implement 'zfs split' (where the split off
++ * clone should not be promoted).
++ */
++#define	DS_FLAG_NOPROMOTE	(1ULL<<1)
++
++/*
++ * DS_FLAG_UNIQUE_ACCURATE is set if ds_unique_bytes has been correctly
++ * calculated for head datasets (starting with SPA_VERSION_UNIQUE_ACCURATE,
++ * refquota/refreservations).
++ */
++#define	DS_FLAG_UNIQUE_ACCURATE	(1ULL<<2)
++
++/*
++ * DS_FLAG_DEFER_DESTROY is set after 'zfs destroy -d' has been called
++ * on a dataset. This allows the dataset to be destroyed using 'zfs release'.
++ */
++#define	DS_FLAG_DEFER_DESTROY	(1ULL<<3)
++#define	DS_IS_DEFER_DESTROY(ds)	\
++	((ds)->ds_phys->ds_flags & DS_FLAG_DEFER_DESTROY)
++
++/*
++ * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
++ * name lookups should be performed case-insensitively.
++ */
++#define	DS_FLAG_CI_DATASET	(1ULL<<16)
++
++typedef struct dsl_dataset_phys {
++	uint64_t ds_dir_obj;		/* DMU_OT_DSL_DIR */
++	uint64_t ds_prev_snap_obj;	/* DMU_OT_DSL_DATASET */
++	uint64_t ds_prev_snap_txg;
++	uint64_t ds_next_snap_obj;	/* DMU_OT_DSL_DATASET */
++	uint64_t ds_snapnames_zapobj;	/* DMU_OT_DSL_DS_SNAP_MAP 0 for snaps */
++	uint64_t ds_num_children;	/* clone/snap children; ==0 for head */
++	uint64_t ds_creation_time;	/* seconds since 1970 */
++	uint64_t ds_creation_txg;
++	uint64_t ds_deadlist_obj;	/* DMU_OT_DEADLIST */
++	uint64_t ds_used_bytes;
++	uint64_t ds_compressed_bytes;
++	uint64_t ds_uncompressed_bytes;
++	uint64_t ds_unique_bytes;	/* only relevant to snapshots */
++	/*
++	 * The ds_fsid_guid is a 56-bit ID that can change to avoid
++	 * collisions.  The ds_guid is a 64-bit ID that will never
++	 * change, so there is a small probability that it will collide.
++	 */
++	uint64_t ds_fsid_guid;
++	uint64_t ds_guid;
++	uint64_t ds_flags;		/* DS_FLAG_* */
++	blkptr_t ds_bp;
++	uint64_t ds_next_clones_obj;	/* DMU_OT_DSL_CLONES */
++	uint64_t ds_props_obj;		/* DMU_OT_DSL_PROPS for snaps */
++	uint64_t ds_userrefs_obj;	/* DMU_OT_USERREFS */
++	uint64_t ds_pad[5]; /* pad out to 320 bytes for good measure */
++} dsl_dataset_phys_t;
++
++typedef struct dsl_dataset {
++	/* Immutable: */
++	struct dsl_dir *ds_dir;
++	dsl_dataset_phys_t *ds_phys;
++	dmu_buf_t *ds_dbuf;
++	uint64_t ds_object;
++	uint64_t ds_fsid_guid;
++
++	/* only used in syncing context, only valid for non-snapshots: */
++	struct dsl_dataset *ds_prev;
++
++	/* has internal locking: */
++	dsl_deadlist_t ds_deadlist;
++	bplist_t ds_pending_deadlist;
++
++	/* to protect against multiple concurrent incremental recv */
++	kmutex_t ds_recvlock;
++
++	/* protected by lock on pool's dp_dirty_datasets list */
++	txg_node_t ds_dirty_link;
++	list_node_t ds_synced_link;
++
++	/*
++	 * ds_phys->ds_<accounting> is also protected by ds_lock.
++	 * Protected by ds_lock:
++	 */
++	kmutex_t ds_lock;
++	objset_t *ds_objset;
++	uint64_t ds_userrefs;
++
++	/*
++	 * ds_owner is protected by the ds_rwlock and the ds_lock
++	 */
++	krwlock_t ds_rwlock;
++	kcondvar_t ds_exclusive_cv;
++	void *ds_owner;
++
++	/* no locking; only for making guesses */
++	uint64_t ds_trysnap_txg;
++
++	/* for objset_open() */
++	kmutex_t ds_opening_lock;
++
++	uint64_t ds_reserved;	/* cached refreservation */
++	uint64_t ds_quota;	/* cached refquota */
++
++	kmutex_t ds_sendstream_lock;
++	list_t ds_sendstreams;
++
++	/* Protected by ds_lock; keep at end of struct for better locality */
++	char ds_snapname[MAXNAMELEN];
++} dsl_dataset_t;
++
++struct dsl_ds_destroyarg {
++	dsl_dataset_t *ds;		/* ds to destroy */
++	dsl_dataset_t *rm_origin;	/* also remove our origin? */
++	boolean_t is_origin_rm;		/* set if removing origin snap */
++	boolean_t defer;		/* destroy -d requested? */
++	boolean_t releasing;		/* destroying due to release? */
++	boolean_t need_prep;		/* do we need to retry due to EBUSY? */
++};
++
++/*
++ * The max length of a temporary tag prefix is the number of hex digits
++ * required to express UINT64_MAX plus one for the hyphen.
++ */
++#define	MAX_TAG_PREFIX_LEN	17
++
++struct dsl_ds_holdarg {
++	dsl_sync_task_group_t *dstg;
++	char *htag;
++	char *snapname;
++	boolean_t recursive;
++	boolean_t gotone;
++	boolean_t temphold;
++	char failed[MAXPATHLEN];
++};
++
++#define	dsl_dataset_is_snapshot(ds) \
++	((ds)->ds_phys->ds_num_children != 0)
++
++#define	DS_UNIQUE_IS_ACCURATE(ds)	\
++	(((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
++
++int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
++int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
++    void *tag, dsl_dataset_t **);
++int dsl_dataset_own(const char *name, boolean_t inconsistentok,
++    void *tag, dsl_dataset_t **dsp);
++int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
++    boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp);
++void dsl_dataset_name(dsl_dataset_t *ds, char *name);
++void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
++void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
++void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
++boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
++    void *tag);
++void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag);
++void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
++    minor_t minor);
++uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
++    dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
++uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
++    uint64_t flags, dmu_tx_t *tx);
++int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer);
++int dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
++dsl_checkfunc_t dsl_dataset_destroy_check;
++dsl_syncfunc_t dsl_dataset_destroy_sync;
++dsl_checkfunc_t dsl_dataset_snapshot_check;
++dsl_syncfunc_t dsl_dataset_snapshot_sync;
++dsl_syncfunc_t dsl_dataset_user_hold_sync;
++int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
++int dsl_dataset_promote(const char *name, char *conflsnap);
++int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
++    boolean_t force);
++int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
++    boolean_t recursive, boolean_t temphold, int cleanup_fd);
++int dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
++    boolean_t temphold);
++int dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
++    boolean_t recursive);
++int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
++    char *htag, boolean_t retry);
++int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp);
++
++blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
++void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
++
++spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds);
++
++boolean_t dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds);
++
++void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
++
++void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp,
++    dmu_tx_t *tx);
++int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp,
++    dmu_tx_t *tx, boolean_t async);
++boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
++    uint64_t blk_birth);
++uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
++
++void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
++void dsl_dataset_stats(dsl_dataset_t *os, nvlist_t *nv);
++void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat);
++void dsl_dataset_space(dsl_dataset_t *ds,
++    uint64_t *refdbytesp, uint64_t *availbytesp,
++    uint64_t *usedobjsp, uint64_t *availobjsp);
++uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
++int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
++    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
++int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last,
++    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
++boolean_t dsl_dataset_is_dirty(dsl_dataset_t *ds);
++
++int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
++
++int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
++    uint64_t asize, uint64_t inflight, uint64_t *used,
++    uint64_t *ref_rsrv);
++int dsl_dataset_set_quota(const char *dsname, zprop_source_t source,
++    uint64_t quota);
++dsl_syncfunc_t dsl_dataset_set_quota_sync;
++int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
++    uint64_t reservation);
++
++int dsl_destroy_inconsistent(const char *dsname, void *arg);
++
++#ifdef ZFS_DEBUG
++#define	dprintf_ds(ds, fmt, ...) do { \
++	if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
++	char *__ds_name = kmem_alloc(MAXNAMELEN, KM_PUSHPAGE); \
++	dsl_dataset_name(ds, __ds_name); \
++	dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \
++	kmem_free(__ds_name, MAXNAMELEN); \
++	} \
++_NOTE(CONSTCOND) } while (0)
++#else
++#define	dprintf_ds(dd, fmt, ...)
++#endif
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_DSL_DATASET_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dsl_deadlist.h linux-3.2.33-go/include/zfs/sys/dsl_deadlist.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dsl_deadlist.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dsl_deadlist.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,87 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_DSL_DEADLIST_H
++#define	_SYS_DSL_DEADLIST_H
++
++#include <sys/bpobj.h>
++#include <sys/zfs_context.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct dmu_buf;
++struct dsl_dataset;
++
++typedef struct dsl_deadlist_phys {
++	uint64_t dl_used;
++	uint64_t dl_comp;
++	uint64_t dl_uncomp;
++	uint64_t dl_pad[37]; /* pad out to 320b for future expansion */
++} dsl_deadlist_phys_t;
++
++typedef struct dsl_deadlist {
++	objset_t *dl_os;
++	uint64_t dl_object;
++	avl_tree_t dl_tree;
++	boolean_t dl_havetree;
++	struct dmu_buf *dl_dbuf;
++	dsl_deadlist_phys_t *dl_phys;
++	kmutex_t dl_lock;
++
++	/* if it's the old on-disk format: */
++	bpobj_t dl_bpobj;
++	boolean_t dl_oldfmt;
++} dsl_deadlist_t;
++
++typedef struct dsl_deadlist_entry {
++	avl_node_t dle_node;
++	uint64_t dle_mintxg;
++	bpobj_t dle_bpobj;
++} dsl_deadlist_entry_t;
++
++void dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object);
++void dsl_deadlist_close(dsl_deadlist_t *dl);
++uint64_t dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx);
++void dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx);
++void dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx);
++void dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx);
++void dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx);
++uint64_t dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
++    uint64_t mrs_obj, dmu_tx_t *tx);
++void dsl_deadlist_space(dsl_deadlist_t *dl,
++    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
++void dsl_deadlist_space_range(dsl_deadlist_t *dl,
++    uint64_t mintxg, uint64_t maxtxg,
++    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
++void dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx);
++void dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
++    dmu_tx_t *tx);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_DSL_DEADLIST_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dsl_deleg.h linux-3.2.33-go/include/zfs/sys/dsl_deleg.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dsl_deleg.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dsl_deleg.h	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,80 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ */
++
++#ifndef	_SYS_DSL_DELEG_H
++#define	_SYS_DSL_DELEG_H
++
++#include <sys/dmu.h>
++#include <sys/dsl_pool.h>
++#include <sys/zfs_context.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++#define	ZFS_DELEG_PERM_NONE		""
++#define	ZFS_DELEG_PERM_CREATE		"create"
++#define	ZFS_DELEG_PERM_DESTROY		"destroy"
++#define	ZFS_DELEG_PERM_SNAPSHOT		"snapshot"
++#define	ZFS_DELEG_PERM_ROLLBACK		"rollback"
++#define	ZFS_DELEG_PERM_CLONE		"clone"
++#define	ZFS_DELEG_PERM_PROMOTE		"promote"
++#define	ZFS_DELEG_PERM_RENAME		"rename"
++#define	ZFS_DELEG_PERM_MOUNT		"mount"
++#define	ZFS_DELEG_PERM_SHARE		"share"
++#define	ZFS_DELEG_PERM_SEND		"send"
++#define	ZFS_DELEG_PERM_RECEIVE		"receive"
++#define	ZFS_DELEG_PERM_ALLOW		"allow"
++#define	ZFS_DELEG_PERM_USERPROP		"userprop"
++#define	ZFS_DELEG_PERM_VSCAN		"vscan"
++#define	ZFS_DELEG_PERM_USERQUOTA	"userquota"
++#define	ZFS_DELEG_PERM_GROUPQUOTA	"groupquota"
++#define	ZFS_DELEG_PERM_USERUSED		"userused"
++#define	ZFS_DELEG_PERM_GROUPUSED	"groupused"
++#define	ZFS_DELEG_PERM_HOLD		"hold"
++#define	ZFS_DELEG_PERM_RELEASE		"release"
++#define	ZFS_DELEG_PERM_DIFF		"diff"
++
++/*
++ * Note: the names of properties that are marked delegatable are also
++ * valid delegated permissions
++ */
++
++int dsl_deleg_get(const char *ddname, nvlist_t **nvp);
++int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset);
++int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr);
++int dsl_deleg_access_impl(struct dsl_dataset *ds, boolean_t descendent,
++    const char *perm, cred_t *cr);
++void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr);
++int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr);
++int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr);
++int dsl_deleg_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx);
++boolean_t dsl_delegation_on(objset_t *os);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_DSL_DELEG_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dsl_dir.h linux-3.2.33-go/include/zfs/sys/dsl_dir.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dsl_dir.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dsl_dir.h	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,167 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_DSL_DIR_H
++#define	_SYS_DSL_DIR_H
++
++#include <sys/dmu.h>
++#include <sys/dsl_pool.h>
++#include <sys/dsl_synctask.h>
++#include <sys/refcount.h>
++#include <sys/zfs_context.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct dsl_dataset;
++
++typedef enum dd_used {
++	DD_USED_HEAD,
++	DD_USED_SNAP,
++	DD_USED_CHILD,
++	DD_USED_CHILD_RSRV,
++	DD_USED_REFRSRV,
++	DD_USED_NUM
++} dd_used_t;
++
++#define	DD_FLAG_USED_BREAKDOWN (1<<0)
++
++typedef struct dsl_dir_phys {
++	uint64_t dd_creation_time; /* not actually used */
++	uint64_t dd_head_dataset_obj;
++	uint64_t dd_parent_obj;
++	uint64_t dd_origin_obj;
++	uint64_t dd_child_dir_zapobj;
++	/*
++	 * how much space our children are accounting for; for leaf
++	 * datasets, == physical space used by fs + snaps
++	 */
++	uint64_t dd_used_bytes;
++	uint64_t dd_compressed_bytes;
++	uint64_t dd_uncompressed_bytes;
++	/* Administrative quota setting */
++	uint64_t dd_quota;
++	/* Administrative reservation setting */
++	uint64_t dd_reserved;
++	uint64_t dd_props_zapobj;
++	uint64_t dd_deleg_zapobj; /* dataset delegation permissions */
++	uint64_t dd_flags;
++	uint64_t dd_used_breakdown[DD_USED_NUM];
++	uint64_t dd_clones; /* dsl_dir objects */
++	uint64_t dd_pad[13]; /* pad out to 256 bytes for good measure */
++} dsl_dir_phys_t;
++
++struct dsl_dir {
++	/* These are immutable; no lock needed: */
++	uint64_t dd_object;
++	dsl_dir_phys_t *dd_phys;
++	dmu_buf_t *dd_dbuf;
++	dsl_pool_t *dd_pool;
++
++	/* protected by lock on pool's dp_dirty_dirs list */
++	txg_node_t dd_dirty_link;
++
++	/* protected by dp_config_rwlock */
++	dsl_dir_t *dd_parent;
++
++	/* Protected by dd_lock */
++	kmutex_t dd_lock;
++	list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */
++	timestruc_t dd_snap_cmtime; /* last time snapshot namespace changed */
++	uint64_t dd_origin_txg;
++
++	/* gross estimate of space used by in-flight tx's */
++	uint64_t dd_tempreserved[TXG_SIZE];
++	/* amount of space we expect to write; == amount of dirty data */
++	int64_t dd_space_towrite[TXG_SIZE];
++
++	/* protected by dd_lock; keep at end of struct for better locality */
++	char dd_myname[MAXNAMELEN];
++};
++
++void dsl_dir_close(dsl_dir_t *dd, void *tag);
++int dsl_dir_open(const char *name, void *tag, dsl_dir_t **, const char **tail);
++int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **,
++    const char **tailp);
++int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
++    const char *tail, void *tag, dsl_dir_t **);
++void dsl_dir_name(dsl_dir_t *dd, char *buf);
++int dsl_dir_namelen(dsl_dir_t *dd);
++uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds,
++    const char *name, dmu_tx_t *tx);
++dsl_checkfunc_t dsl_dir_destroy_check;
++dsl_syncfunc_t dsl_dir_destroy_sync;
++void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv);
++uint64_t dsl_dir_space_available(dsl_dir_t *dd,
++    dsl_dir_t *ancestor, int64_t delta, int ondiskonly);
++void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx);
++void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx);
++int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t mem,
++    uint64_t asize, uint64_t fsize, uint64_t usize, void **tr_cookiep,
++    dmu_tx_t *tx);
++void dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx);
++void dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx);
++void dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
++    int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx);
++void dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
++    dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx);
++int dsl_dir_set_quota(const char *ddname, zprop_source_t source,
++    uint64_t quota);
++int dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
++    uint64_t reservation);
++int dsl_dir_rename(dsl_dir_t *dd, const char *newname);
++int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
++int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
++boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
++void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
++    uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
++void dsl_dir_snap_cmtime_update(dsl_dir_t *dd);
++timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd);
++
++/* internal reserved dir name */
++#define	MOS_DIR_NAME "$MOS"
++#define	ORIGIN_DIR_NAME "$ORIGIN"
++#define	XLATION_DIR_NAME "$XLATION"
++#define	FREE_DIR_NAME "$FREE"
++
++#ifdef ZFS_DEBUG
++#define	dprintf_dd(dd, fmt, ...) do { \
++	if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
++	char *__ds_name = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, \
++	    KM_PUSHPAGE); \
++	dsl_dir_name(dd, __ds_name); \
++	dprintf("dd=%s " fmt, __ds_name, __VA_ARGS__); \
++	kmem_free(__ds_name, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); \
++	} \
++_NOTE(CONSTCOND) } while (0)
++#else
++#define	dprintf_dd(dd, fmt, ...)
++#endif
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_DSL_DIR_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dsl_pool.h linux-3.2.33-go/include/zfs/sys/dsl_pool.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dsl_pool.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dsl_pool.h	2012-11-16 23:25:34.344039393 +0100
+@@ -0,0 +1,166 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_DSL_POOL_H
++#define	_SYS_DSL_POOL_H
++
++#include <sys/spa.h>
++#include <sys/txg.h>
++#include <sys/txg_impl.h>
++#include <sys/zfs_context.h>
++#include <sys/zio.h>
++#include <sys/dnode.h>
++#include <sys/ddt.h>
++#include <sys/arc.h>
++#include <sys/bpobj.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct objset;
++struct dsl_dir;
++struct dsl_dataset;
++struct dsl_pool;
++struct dmu_tx;
++struct dsl_scan;
++
++/* These macros are for indexing into the zfs_all_blkstats_t. */
++#define	DMU_OT_DEFERRED	DMU_OT_NONE
++#define	DMU_OT_TOTAL	DMU_OT_NUMTYPES
++
++typedef struct zfs_blkstat {
++	uint64_t	zb_count;
++	uint64_t	zb_asize;
++	uint64_t	zb_lsize;
++	uint64_t	zb_psize;
++	uint64_t	zb_gangs;
++	uint64_t	zb_ditto_2_of_2_samevdev;
++	uint64_t	zb_ditto_2_of_3_samevdev;
++	uint64_t	zb_ditto_3_of_3_samevdev;
++} zfs_blkstat_t;
++
++typedef struct zfs_all_blkstats {
++	zfs_blkstat_t	zab_type[DN_MAX_LEVELS + 1][DMU_OT_TOTAL + 1];
++} zfs_all_blkstats_t;
++
++typedef struct txg_history {
++	kstat_txg_t	th_kstat;
++	vdev_stat_t	th_vs1;
++	vdev_stat_t	th_vs2;
++	kmutex_t	th_lock;
++	list_node_t	th_link;
++} txg_history_t;
++
++typedef struct dsl_pool {
++	/* Immutable */
++	spa_t *dp_spa;
++	struct objset *dp_meta_objset;
++	struct dsl_dir *dp_root_dir;
++	struct dsl_dir *dp_mos_dir;
++	struct dsl_dir *dp_free_dir;
++	struct dsl_dataset *dp_origin_snap;
++	uint64_t dp_root_dir_obj;
++	struct taskq *dp_iput_taskq;
++	kstat_t *dp_txg_kstat;
++
++	/* No lock needed - sync context only */
++	blkptr_t dp_meta_rootbp;
++	list_t dp_synced_datasets;
++	hrtime_t dp_read_overhead;
++	uint64_t dp_throughput; /* bytes per millisec */
++	uint64_t dp_write_limit;
++	uint64_t dp_tmp_userrefs_obj;
++	bpobj_t dp_free_bpobj;
++
++	struct dsl_scan *dp_scan;
++
++	/* Uses dp_lock */
++	kmutex_t dp_lock;
++	uint64_t dp_space_towrite[TXG_SIZE];
++	uint64_t dp_tempreserved[TXG_SIZE];
++	uint64_t dp_txg_history_size;
++	list_t dp_txg_history;
++
++
++	/* Has its own locking */
++	tx_state_t dp_tx;
++	txg_list_t dp_dirty_datasets;
++	txg_list_t dp_dirty_dirs;
++	txg_list_t dp_sync_tasks;
++
++	/*
++	 * Protects administrative changes (properties, namespace)
++	 * It is only held for write in syncing context.  Therefore
++	 * syncing context does not need to ever have it for read, since
++	 * nobody else could possibly have it for write.
++	 */
++	krwlock_t dp_config_rwlock;
++
++	zfs_all_blkstats_t *dp_blkstats;
++} dsl_pool_t;
++
++int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
++void dsl_pool_close(dsl_pool_t *dp);
++dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
++void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
++void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg);
++int dsl_pool_sync_context(dsl_pool_t *dp);
++uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree);
++uint64_t dsl_pool_adjustedfree(dsl_pool_t *dp, boolean_t netfree);
++int dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx);
++void dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
++void dsl_pool_memory_pressure(dsl_pool_t *dp);
++void dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
++void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp);
++void dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg,
++    const blkptr_t *bpp);
++int dsl_read(zio_t *pio, spa_t *spa, const blkptr_t *bpp, arc_buf_t *pbuf,
++    arc_done_func_t *done, void *private, int priority, int zio_flags,
++    uint32_t *arc_flags, const zbookmark_t *zb);
++int dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
++    arc_done_func_t *done, void *private, int priority, int zio_flags,
++    uint32_t *arc_flags, const zbookmark_t *zb);
++void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
++void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
++void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx);
++
++taskq_t *dsl_pool_iput_taskq(dsl_pool_t *dp);
++
++extern int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
++    const char *tag, uint64_t *now, dmu_tx_t *tx);
++extern int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
++    const char *tag, dmu_tx_t *tx);
++extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
++int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **);
++
++txg_history_t *dsl_pool_txg_history_add(dsl_pool_t *dp, uint64_t txg);
++txg_history_t *dsl_pool_txg_history_get(dsl_pool_t *dp, uint64_t txg);
++void dsl_pool_txg_history_put(txg_history_t *th);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_DSL_POOL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dsl_prop.h linux-3.2.33-go/include/zfs/sys/dsl_prop.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dsl_prop.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dsl_prop.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,119 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_DSL_PROP_H
++#define	_SYS_DSL_PROP_H
++
++#include <sys/dmu.h>
++#include <sys/dsl_pool.h>
++#include <sys/zfs_context.h>
++#include <sys/dsl_synctask.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct dsl_dataset;
++struct dsl_dir;
++
++/* The callback func may not call into the DMU or DSL! */
++typedef void (dsl_prop_changed_cb_t)(void *arg, uint64_t newval);
++
++typedef struct dsl_prop_cb_record {
++	list_node_t cbr_node; /* link on dd_prop_cbs */
++	struct dsl_dataset *cbr_ds;
++	const char *cbr_propname;
++	dsl_prop_changed_cb_t *cbr_func;
++	void *cbr_arg;
++} dsl_prop_cb_record_t;
++
++typedef struct dsl_props_arg {
++	nvlist_t *pa_props;
++	zprop_source_t pa_source;
++} dsl_props_arg_t;
++
++typedef struct dsl_prop_set_arg {
++	const char *psa_name;
++	zprop_source_t psa_source;
++	int psa_intsz;
++	int psa_numints;
++	const void *psa_value;
++
++	/*
++	 * Used to handle the special requirements of the quota and reservation
++	 * properties.
++	 */
++	uint64_t psa_effective_value;
++} dsl_prop_setarg_t;
++
++int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
++    dsl_prop_changed_cb_t *callback, void *cbarg);
++int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
++    dsl_prop_changed_cb_t *callback, void *cbarg);
++int dsl_prop_numcb(struct dsl_dataset *ds);
++
++int dsl_prop_get(const char *ddname, const char *propname,
++    int intsz, int numints, void *buf, char *setpoint);
++int dsl_prop_get_integer(const char *ddname, const char *propname,
++    uint64_t *valuep, char *setpoint);
++int dsl_prop_get_all(objset_t *os, nvlist_t **nvp);
++int dsl_prop_get_received(objset_t *os, nvlist_t **nvp);
++int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
++    int intsz, int numints, void *buf, char *setpoint);
++int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
++    int intsz, int numints, void *buf, char *setpoint,
++    boolean_t snapshot);
++
++dsl_syncfunc_t dsl_props_set_sync;
++int dsl_prop_set(const char *ddname, const char *propname,
++    zprop_source_t source, int intsz, int numints, const void *buf);
++int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl);
++void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
++    dmu_tx_t *tx);
++
++void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
++    zprop_source_t source, uint64_t *value);
++int dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
++#ifdef	ZFS_DEBUG
++void dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
++#define	DSL_PROP_CHECK_PREDICTION(dd, psa)	\
++	dsl_prop_check_prediction((dd), (psa))
++#else
++#define	DSL_PROP_CHECK_PREDICTION(dd, psa)	/* nothing */
++#endif
++
++/* flag first receive on or after SPA_VERSION_RECVD_PROPS */
++boolean_t dsl_prop_get_hasrecvd(objset_t *os);
++void dsl_prop_set_hasrecvd(objset_t *os);
++void dsl_prop_unset_hasrecvd(objset_t *os);
++
++void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
++void dsl_prop_nvlist_add_string(nvlist_t *nv,
++    zfs_prop_t prop, const char *value);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_DSL_PROP_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dsl_scan.h linux-3.2.33-go/include/zfs/sys/dsl_scan.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dsl_scan.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dsl_scan.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,108 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_DSL_SCAN_H
++#define	_SYS_DSL_SCAN_H
++
++#include <sys/zfs_context.h>
++#include <sys/zio.h>
++#include <sys/ddt.h>
++#include <sys/bplist.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct objset;
++struct dsl_dir;
++struct dsl_dataset;
++struct dsl_pool;
++struct dmu_tx;
++
++/*
++ * All members of this structure must be uint64_t, for byteswap
++ * purposes.
++ */
++typedef struct dsl_scan_phys {
++	uint64_t scn_func; /* pool_scan_func_t */
++	uint64_t scn_state; /* dsl_scan_state_t */
++	uint64_t scn_queue_obj;
++	uint64_t scn_min_txg;
++	uint64_t scn_max_txg;
++	uint64_t scn_cur_min_txg;
++	uint64_t scn_cur_max_txg;
++	uint64_t scn_start_time;
++	uint64_t scn_end_time;
++	uint64_t scn_to_examine; /* total bytes to be scanned */
++	uint64_t scn_examined; /* bytes scanned so far */
++	uint64_t scn_to_process;
++	uint64_t scn_processed;
++	uint64_t scn_errors;	/* scan I/O error count */
++	uint64_t scn_ddt_class_max;
++	ddt_bookmark_t scn_ddt_bookmark;
++	zbookmark_t scn_bookmark;
++	uint64_t scn_flags; /* dsl_scan_flags_t */
++} dsl_scan_phys_t;
++
++#define	SCAN_PHYS_NUMINTS (sizeof (dsl_scan_phys_t) / sizeof (uint64_t))
++
++typedef enum dsl_scan_flags {
++	DSF_VISIT_DS_AGAIN = 1<<0,
++} dsl_scan_flags_t;
++
++typedef struct dsl_scan {
++	struct dsl_pool *scn_dp;
++
++	boolean_t scn_pausing;
++	uint64_t scn_restart_txg;
++	uint64_t scn_sync_start_time;
++	zio_t *scn_zio_root;
++
++	/* for debugging / information */
++	uint64_t scn_visited_this_txg;
++
++	dsl_scan_phys_t scn_phys;
++} dsl_scan_t;
++
++int dsl_scan_init(struct dsl_pool *dp, uint64_t txg);
++void dsl_scan_fini(struct dsl_pool *dp);
++void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
++int dsl_scan_cancel(struct dsl_pool *);
++int dsl_scan(struct dsl_pool *, pool_scan_func_t);
++void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
++boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
++boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
++void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
++    ddt_entry_t *dde, dmu_tx_t *tx);
++void dsl_scan_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
++void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
++void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
++    struct dmu_tx *tx);
++boolean_t dsl_scan_active(dsl_scan_t *scn);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_DSL_SCAN_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/dsl_synctask.h linux-3.2.33-go/include/zfs/sys/dsl_synctask.h
+--- linux-3.2.33-go.orig/include/zfs/sys/dsl_synctask.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/dsl_synctask.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,79 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_DSL_SYNCTASK_H
++#define	_SYS_DSL_SYNCTASK_H
++
++#include <sys/txg.h>
++#include <sys/zfs_context.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct dsl_pool;
++
++typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
++typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *);
++
++typedef struct dsl_sync_task {
++	list_node_t dst_node;
++	dsl_checkfunc_t *dst_checkfunc;
++	dsl_syncfunc_t *dst_syncfunc;
++	void *dst_arg1;
++	void *dst_arg2;
++	int dst_err;
++} dsl_sync_task_t;
++
++typedef struct dsl_sync_task_group {
++	txg_node_t dstg_node;
++	list_t dstg_tasks;
++	struct dsl_pool *dstg_pool;
++	uint64_t dstg_txg;
++	int dstg_err;
++	int dstg_space;
++	boolean_t dstg_nowaiter;
++} dsl_sync_task_group_t;
++
++dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp);
++void dsl_sync_task_create(dsl_sync_task_group_t *dstg,
++    dsl_checkfunc_t *, dsl_syncfunc_t *,
++    void *arg1, void *arg2, int blocks_modified);
++int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg);
++void dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
++void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg);
++void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
++
++int dsl_sync_task_do(struct dsl_pool *dp,
++    dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
++    void *arg1, void *arg2, int blocks_modified);
++void dsl_sync_task_do_nowait(struct dsl_pool *dp,
++    dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
++    void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_DSL_SYNCTASK_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/efi_partition.h linux-3.2.33-go/include/zfs/sys/efi_partition.h
+--- linux-3.2.33-go.orig/include/zfs/sys/efi_partition.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/efi_partition.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,244 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_EFI_PARTITION_H
++#define	_SYS_EFI_PARTITION_H
++
++#include <sys/uuid.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * GUID Partition Table Header
++ */
++
++#define	EFI_MIN_LABEL_SIZE 92
++#define	EFI_LABEL_SIZE	512
++#define	LEN_EFI_PAD	(EFI_LABEL_SIZE - \
++			    ((5 * sizeof (diskaddr_t)) + \
++			    (7 * sizeof (uint_t)) + \
++			    (8 * sizeof (char)) + \
++			    (1 * (sizeof (struct uuid)))))
++
++#define	EFI_SIGNATURE	0x5452415020494645ULL
++
++/* EFI Guid Partition Table Header -- little endian on-disk format */
++typedef struct efi_gpt {
++	uint64_t	efi_gpt_Signature;
++	uint_t		efi_gpt_Revision;
++	uint_t		efi_gpt_HeaderSize;
++	uint_t		efi_gpt_HeaderCRC32;
++	uint_t		efi_gpt_Reserved1;
++	diskaddr_t	efi_gpt_MyLBA;
++	diskaddr_t	efi_gpt_AlternateLBA;
++	diskaddr_t	efi_gpt_FirstUsableLBA;
++	diskaddr_t	efi_gpt_LastUsableLBA;
++	struct uuid	efi_gpt_DiskGUID;
++	diskaddr_t	efi_gpt_PartitionEntryLBA;
++	uint_t		efi_gpt_NumberOfPartitionEntries;
++	uint_t		efi_gpt_SizeOfPartitionEntry;
++	uint_t		efi_gpt_PartitionEntryArrayCRC32;
++	char		efi_gpt_Reserved2[LEN_EFI_PAD];
++} efi_gpt_t;
++
++/* EFI Guid Partition Entry Attributes -- little endian format */
++typedef struct efi_gpe_Attrs {
++	uint32_t	PartitionAttrs		:16,
++			Reserved2		:16;
++	uint32_t	Reserved1		:31,
++			RequiredPartition	:1;
++} efi_gpe_Attrs_t;
++
++/*
++ * 6a96237f-1dd2-11b2-99a6-080020736631	V_UNASSIGNED (not used as such)
++ * 6a82cb45-1dd2-11b2-99a6-080020736631	V_BOOT
++ * 6a85cf4d-1dd2-11b2-99a6-080020736631	V_ROOT
++ * 6a87c46f-1dd2-11b2-99a6-080020736631	V_SWAP
++ * 6a898cc3-1dd2-11b2-99a6-080020736631	V_USR
++ * 6a8b642b-1dd2-11b2-99a6-080020736631	V_BACKUP
++ * 6a8d2ac7-1dd2-11b2-99a6-080020736631	V_STAND (not used)
++ * 6a8ef2e9-1dd2-11b2-99a6-080020736631	V_VAR
++ * 6a90ba39-1dd2-11b2-99a6-080020736631	V_HOME
++ * 6a9283a5-1dd2-11b2-99a6-080020736631	V_ALTSCTR
++ * 6a945a3b-1dd2-11b2-99a6-080020736631	V_CACHE
++ */
++
++#define	EFI_UNUSED	{ 0x00000000, 0x0000, 0x0000, 0x00, 0x00, \
++			    { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }
++#define	EFI_RESV1	{ 0x6a96237f, 0x1dd2, 0x11b2, 0x99, 0xa6, \
++			    { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
++#define	EFI_BOOT	{ 0x6a82cb45, 0x1dd2, 0x11b2, 0x99, 0xa6, \
++			    { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
++#define	EFI_ROOT	{ 0x6a85cf4d, 0x1dd2, 0x11b2, 0x99, 0xa6, \
++			    { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
++#define	EFI_SWAP	{ 0x6a87c46f, 0x1dd2, 0x11b2, 0x99, 0xa6, \
++			    { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
++#define	EFI_USR		{ 0x6a898cc3, 0x1dd2, 0x11b2, 0x99, 0xa6, \
++			    { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
++#define	EFI_BACKUP	{ 0x6a8b642b, 0x1dd2, 0x11b2, 0x99, 0xa6, \
++			    { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
++#define	EFI_RESV2	{ 0x6a8d2ac7, 0x1dd2, 0x11b2, 0x99, 0xa6, \
++			    { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
++#define	EFI_VAR		{ 0x6a8ef2e9, 0x1dd2, 0x11b2, 0x99, 0xa6, \
++			    { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
++#define	EFI_HOME	{ 0x6a90ba39, 0x1dd2, 0x11b2, 0x99, 0xa6, \
++			    { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
++#define	EFI_ALTSCTR	{ 0x6a9283a5, 0x1dd2, 0x11b2, 0x99, 0xa6, \
++			    { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
++#define	EFI_RESERVED	{ 0x6a945a3b, 0x1dd2, 0x11b2, 0x99, 0xa6, \
++			    { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
++#define	EFI_SYSTEM	{ 0xC12A7328, 0xF81F, 0x11d2, 0xBA, 0x4B, \
++			    { 0x00, 0xA0, 0xC9, 0x3E, 0xC9, 0x3B } }
++#define	EFI_LEGACY_MBR	{ 0x024DEE41, 0x33E7, 0x11d3, 0x9D, 0x69, \
++			    { 0x00, 0x08, 0xC7, 0x81, 0xF3, 0x9F } }
++#define	EFI_SYMC_PUB	{ 0x6a9630d1, 0x1dd2, 0x11b2, 0x99, 0xa6, \
++			    { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
++#define	EFI_SYMC_CDS	{ 0x6a980767, 0x1dd2, 0x11b2, 0x99, 0xa6, \
++			    { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
++#define	EFI_MSFT_RESV	{ 0xE3C9E316, 0x0B5C, 0x4DB8, 0x81, 0x7D, \
++			    { 0xF9, 0x2D, 0xF0, 0x02, 0x15, 0xAE } }
++#define	EFI_DELL_BASIC	{ 0xebd0a0a2, 0xb9e5, 0x4433, 0x87, 0xc0, \
++			    { 0x68, 0xb6, 0xb7, 0x26, 0x99, 0xc7 } }
++#define	EFI_DELL_RAID	{ 0xa19d880f, 0x05fc, 0x4d3b, 0xa0, 0x06, \
++			    { 0x74, 0x3f, 0x0f, 0x84, 0x91, 0x1e } }
++#define	EFI_DELL_SWAP	{ 0x0657fd6d, 0xa4ab, 0x43c4, 0x84, 0xe5, \
++			    { 0x09, 0x33, 0xc8, 0x4b, 0x4f, 0x4f } }
++#define	EFI_DELL_LVM	{ 0xe6d6d379, 0xf507, 0x44c2, 0xa2, 0x3c, \
++			    { 0x23, 0x8f, 0x2a, 0x3d, 0xf9, 0x28 } }
++#define	EFI_DELL_RESV	{ 0x8da63339, 0x0007, 0x60c0, 0xc4, 0x36, \
++			    { 0x08, 0x3a, 0xc8, 0x23, 0x09, 0x08 } }
++#define	EFI_AAPL_HFS	{ 0x48465300, 0x0000, 0x11aa, 0xaa, 0x11, \
++			    { 0x00, 0x30, 0x65, 0x43, 0xec, 0xac } }
++#define	EFI_AAPL_UFS	{ 0x55465300, 0x0000, 0x11aa, 0xaa, 0x11, \
++			    { 0x00, 0x30, 0x65, 0x43, 0xec, 0xac } }
++
++/* minimum # of bytes for partition table entires, per EFI spec */
++#define	EFI_MIN_ARRAY_SIZE	(16 * 1024)
++
++#define	EFI_PART_NAME_LEN	36
++
++/* size of the "reserved" partition, in blocks */
++#define	EFI_MIN_RESV_SIZE	(16 * 1024)
++
++/* EFI Guid Partition Entry */
++typedef struct efi_gpe {
++	struct uuid	efi_gpe_PartitionTypeGUID;
++	struct uuid	efi_gpe_UniquePartitionGUID;
++	diskaddr_t	efi_gpe_StartingLBA;
++	diskaddr_t	efi_gpe_EndingLBA;
++	efi_gpe_Attrs_t	efi_gpe_Attributes;
++	ushort_t	efi_gpe_PartitionName[EFI_PART_NAME_LEN];
++} efi_gpe_t;
++
++/*
++ * passed to the useful (we hope) routines (efi_alloc_and_read and
++ * efi_write) that take this VTOC-like struct.  These routines handle
++ * converting this struct into the EFI struct, generate UUIDs and
++ * checksums, and perform any necessary byte-swapping to the on-disk
++ * format.
++ */
++/* Solaris library abstraction for EFI partitons */
++typedef struct dk_part	{
++	diskaddr_t	p_start;	/* starting LBA */
++	diskaddr_t	p_size;		/* size in blocks */
++	struct uuid	p_guid;		/* partion type GUID */
++	ushort_t	p_tag;		/* converted to part'n type GUID */
++	ushort_t	p_flag;		/* attributes */
++	char		p_name[EFI_PART_NAME_LEN]; /* partition name */
++	struct uuid	p_uguid;	/* unique partition GUID */
++	uint_t		p_resv[8];	/* future use - set to zero */
++} dk_part_t;
++
++/* Solaris library abstraction for an EFI GPT */
++#define	EFI_VERSION102		0x00010002
++#define	EFI_VERSION100		0x00010000
++#define	EFI_VERSION_CURRENT	EFI_VERSION100
++typedef struct dk_gpt {
++	uint_t		efi_version;	/* set to EFI_VERSION_CURRENT */
++	uint_t		efi_nparts;	/* number of partitions below */
++	uint_t		efi_part_size;	/* size of each partition entry */
++					/* efi_part_size is unused */
++	uint_t		efi_lbasize;	/* size of block in bytes */
++	diskaddr_t	efi_last_lba;	/* last block on the disk */
++	diskaddr_t	efi_first_u_lba; /* first block after labels */
++	diskaddr_t	efi_last_u_lba;	/* last block before backup labels */
++	struct uuid	efi_disk_uguid;	/* unique disk GUID */
++	uint_t		efi_flags;
++	uint_t		efi_reserved1;	/* future use - set to zero */
++	diskaddr_t	efi_altern_lba;	/* lba of alternate GPT header */
++	uint_t		efi_reserved[12]; /* future use - set to zero */
++	struct dk_part	efi_parts[1];	/* array of partitions */
++} dk_gpt_t;
++
++/* possible values for "efi_flags" */
++#define	EFI_GPT_PRIMARY_CORRUPT	0x1	/* primary label corrupt */
++
++/* the private ioctl between libefi and the driver */
++typedef struct dk_efi {
++	diskaddr_t	 dki_lba;	/* starting block */
++	len_t		 dki_length;	/* length in bytes */
++	union {
++		efi_gpt_t 	*_dki_data;
++		uint64_t	_dki_data_64;
++	} dki_un;
++#define	dki_data	dki_un._dki_data
++#define	dki_data_64	dki_un._dki_data_64
++} dk_efi_t;
++
++struct partition64 {
++	struct uuid	p_type;
++	uint_t		p_partno;
++	uint_t		p_resv1;
++	diskaddr_t	p_start;
++	diskaddr_t	p_size;
++};
++
++/*
++ * Number of EFI partitions
++ */
++#if defined(__linux__)
++#define	EFI_NUMPAR	128 /* Expected by parted-1.8.1 */
++#else
++#define	EFI_NUMPAR	9
++#endif
++
++#ifndef _KERNEL
++extern	int	efi_alloc_and_init(int, uint32_t, struct dk_gpt **);
++extern	int	efi_alloc_and_read(int, struct dk_gpt **);
++extern	int	efi_write(int, struct dk_gpt *);
++extern	int	efi_rescan(int);
++extern	void	efi_free(struct dk_gpt *);
++extern	int	efi_type(int);
++extern	void	efi_err_check(struct dk_gpt *);
++extern	int	efi_auto_sense(int fd, struct dk_gpt **);
++extern	int	efi_use_whole_disk(int fd);
++#endif
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif /* _SYS_EFI_PARTITION_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/fm/fs/Makefile linux-3.2.33-go/include/zfs/sys/fm/fs/Makefile
+--- linux-3.2.33-go.orig/include/zfs/sys/fm/fs/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/fm/fs/Makefile	2012-11-16 23:25:34.343039404 +0100
+@@ -0,0 +1,659 @@
++# Makefile.in generated by automake 1.11.6 from Makefile.am.
++# include/sys/fm/fs/Makefile.  Generated from Makefile.in by configure.
++
++# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
++# Foundation, Inc.
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++
++
++
++am__make_dryrun = \
++  { \
++    am__dry=no; \
++    case $$MAKEFLAGS in \
++      *\\[\ \	]*) \
++        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
++          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
++      *) \
++        for am__flg in $$MAKEFLAGS; do \
++          case $$am__flg in \
++            *=*|--*) ;; \
++            *n*) am__dry=yes; break;; \
++          esac; \
++        done;; \
++    esac; \
++    test $$am__dry = yes; \
++  }
++pkgdatadir = $(datadir)/zfs
++pkgincludedir = $(includedir)/zfs
++pkglibdir = $(libdir)/zfs
++pkglibexecdir = $(libexecdir)/zfs
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = x86_64-unknown-linux-gnu
++host_triplet = x86_64-unknown-linux-gnu
++target_triplet = x86_64-unknown-linux-gnu
++subdir = include/sys/fm/fs
++DIST_COMMON = $(am__kernel_HEADERS_DIST) $(am__libzfs_HEADERS_DIST) \
++	$(srcdir)/Makefile.am $(srcdir)/Makefile.in
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps =  \
++	$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
++	$(top_srcdir)/config/kernel-automount.m4 \
++	$(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \
++	$(top_srcdir)/config/kernel-bdev-logical-size.m4 \
++	$(top_srcdir)/config/kernel-bdi-setup-and-register.m4 \
++	$(top_srcdir)/config/kernel-bdi.m4 \
++	$(top_srcdir)/config/kernel-bio-empty-barrier.m4 \
++	$(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \
++	$(top_srcdir)/config/kernel-bio-failfast.m4 \
++	$(top_srcdir)/config/kernel-bio-rw-syncio.m4 \
++	$(top_srcdir)/config/kernel-blk-end-request.m4 \
++	$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-discard.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
++	$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-sectors.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get.m4 \
++	$(top_srcdir)/config/kernel-check-disk-size-change.m4 \
++	$(top_srcdir)/config/kernel-clear-inode.m4 \
++	$(top_srcdir)/config/kernel-commit-metadata.m4 \
++	$(top_srcdir)/config/kernel-create-nameidata.m4 \
++	$(top_srcdir)/config/kernel-d-make-root.m4 \
++	$(top_srcdir)/config/kernel-d-obtain-alias.m4 \
++	$(top_srcdir)/config/kernel-discard-granularity.m4 \
++	$(top_srcdir)/config/kernel-elevator-change.m4 \
++	$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
++	$(top_srcdir)/config/kernel-evict-inode.m4 \
++	$(top_srcdir)/config/kernel-fallocate.m4 \
++	$(top_srcdir)/config/kernel-fmode-t.m4 \
++	$(top_srcdir)/config/kernel-fsync.m4 \
++	$(top_srcdir)/config/kernel-get-disk-ro.m4 \
++	$(top_srcdir)/config/kernel-get-gendisk.m4 \
++	$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
++	$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
++	$(top_srcdir)/config/kernel-kobj-name-len.m4 \
++	$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
++	$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
++	$(top_srcdir)/config/kernel-mount-nodev.m4 \
++	$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
++	$(top_srcdir)/config/kernel-rq-for-each_segment.m4 \
++	$(top_srcdir)/config/kernel-rq-is_sync.m4 \
++	$(top_srcdir)/config/kernel-security-inode-init.m4 \
++	$(top_srcdir)/config/kernel-set-nlink.m4 \
++	$(top_srcdir)/config/kernel-sget-args.m4 \
++	$(top_srcdir)/config/kernel-show-options.m4 \
++	$(top_srcdir)/config/kernel-shrink.m4 \
++	$(top_srcdir)/config/kernel-truncate-range.m4 \
++	$(top_srcdir)/config/kernel-truncate-setsize.m4 \
++	$(top_srcdir)/config/kernel-xattr-handler.m4 \
++	$(top_srcdir)/config/kernel.m4 \
++	$(top_srcdir)/config/user-arch.m4 \
++	$(top_srcdir)/config/user-frame-larger-than.m4 \
++	$(top_srcdir)/config/user-ioctl.m4 \
++	$(top_srcdir)/config/user-libblkid.m4 \
++	$(top_srcdir)/config/user-libuuid.m4 \
++	$(top_srcdir)/config/user-nptl_guard_within_stack.m4 \
++	$(top_srcdir)/config/user-selinux.m4 \
++	$(top_srcdir)/config/user-udev.m4 \
++	$(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \
++	$(top_srcdir)/config/zfs-build.m4 \
++	$(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++mkinstalldirs = $(install_sh) -d
++CONFIG_HEADER = $(top_builddir)/zfs_config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++AM_V_GEN = $(am__v_GEN_$(V))
++am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY))
++am__v_GEN_0 = @echo "  GEN   " $@;
++AM_V_at = $(am__v_at_$(V))
++am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY))
++am__v_at_0 = @
++SOURCES =
++DIST_SOURCES =
++am__can_run_installinfo = \
++  case $$AM_UPDATE_INFO_DIR in \
++    n|no|NO) false;; \
++    *) (install-info --version) >/dev/null 2>&1;; \
++  esac
++am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/fm/fs/zfs.h
++am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
++am__vpath_adj = case $$p in \
++    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
++    *) f=$$p;; \
++  esac;
++am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
++am__install_max = 40
++am__nobase_strip_setup = \
++  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
++am__nobase_strip = \
++  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
++am__nobase_list = $(am__nobase_strip_setup); \
++  for p in $$list; do echo "$$p $$p"; done | \
++  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
++  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
++    if (++n[$$2] == $(am__install_max)) \
++      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
++    END { for (dir in files) print dir, files[dir] }'
++am__base_list = \
++  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
++  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
++am__uninstall_files_from_dir = { \
++  test -z "$$files" \
++    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
++    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
++         $(am__cd) "$$dir" && rm -f $$files; }; \
++  }
++am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
++am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/fm/fs/zfs.h
++HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
++ETAGS = etags
++CTAGS = ctags
++DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
++ACLOCAL = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run aclocal-1.11
++ALIEN = alien
++ALIEN_VERSION = 
++AMTAR = $${TAR-tar}
++AM_DEFAULT_VERBOSITY = 1
++AR = ar
++AUTOCONF = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run autoconf
++AUTOHEADER = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run autoheader
++AUTOMAKE = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run automake-1.11
++AWK = gawk
++CC = gcc
++CCAS = gcc
++CCASDEPMODE = depmode=gcc3
++CCASFLAGS = -g -O2
++CCDEPMODE = depmode=gcc3
++CFLAGS = -g -O2
++CPP = gcc -E
++CPPFLAGS = 
++CYGPATH_W = echo
++DEBUG_CFLAGS = -DNDEBUG
++DEBUG_DMU_TX = _without_debug_dmu_tx
++DEBUG_STACKFLAGS = 
++DEBUG_ZFS = _without_debug
++DEFAULT_INIT_DIR = ${prefix}/etc/init.d
++DEFAULT_INIT_SCRIPT = gentoo
++DEFAULT_PACKAGE = tgz
++DEFS = -DHAVE_CONFIG_H
++DEPDIR = .deps
++DLLTOOL = false
++DPKG = dpkg
++DPKGBUILD = dpkg-buildpackage
++DPKGBUILD_VERSION = 
++DPKG_VERSION = 
++DSYMUTIL = 
++DUMPBIN = 
++ECHO_C = 
++ECHO_N = -n
++ECHO_T = 
++EGREP = /bin/grep -E
++EXEEXT = 
++FGREP = /bin/grep -F
++FRAME_LARGER_THAN = -Wframe-larger-than=1024
++GREP = /bin/grep
++HAVE_ALIEN = no
++HAVE_DPKG = no
++HAVE_DPKGBUILD = no
++HAVE_MAKEPKG = 
++HAVE_PACMAN = 
++HAVE_RPM = yes
++HAVE_RPMBUILD = yes
++INSTALL = /usr/bin/install -c
++INSTALL_DATA = ${INSTALL} -m 644
++INSTALL_PROGRAM = ${INSTALL}
++INSTALL_SCRIPT = ${INSTALL}
++INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
++KERNELCPPFLAGS =  -Wno-unused-but-set-variable -DHAVE_SPL -D_KERNEL -DTEXT_DOMAIN=\"zfs-linux-kernel\" -DNDEBUG 
++KERNELMAKE_PARAMS =  O=/usr/src/linux-3.6.0-sabayon
++LD = /usr/x86_64-pc-linux-gnu/bin/ld -m elf_x86_64
++LDFLAGS = 
++LIBBLKID = 
++LIBOBJS = 
++LIBS = -luuid -luuid -lz -lz -lz 
++LIBSELINUX = 
++LIBTOOL = $(SHELL) $(top_builddir)/libtool
++LIBUUID = -luuid
++LINUX = /usr/src/linux-3.2.33-go
++LINUX_OBJ = /usr/src/linux-3.6.0-sabayon
++LINUX_SYMBOLS = NONE
++LINUX_VERSION = 3.6.0-sabayon
++LIPO = 
++LN_S = ln -s
++LTLIBOBJS = 
++MAINT = #
++MAKEINFO = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run makeinfo
++MAKEPKG = 
++MAKEPKG_VERSION = 
++MANIFEST_TOOL = :
++MKDIR_P = /bin/mkdir -p
++NM = /usr/bin/nm -B
++NMEDIT = 
++NO_UNUSED_BUT_SET_VARIABLE = -Wno-unused-but-set-variable
++OBJDUMP = objdump
++OBJEXT = o
++OTOOL = 
++OTOOL64 = 
++PACKAGE = zfs
++PACKAGE_BUGREPORT = 
++PACKAGE_NAME = 
++PACKAGE_STRING = 
++PACKAGE_TARNAME = 
++PACKAGE_URL = 
++PACKAGE_VERSION = 
++PACMAN = 
++PACMAN_VERSION = 
++PATH_SEPARATOR = :
++RANLIB = ranlib
++RPM = rpm
++RPMBUILD = rpmbuild
++RPMBUILD_VERSION = 4.10.0
++RPM_VERSION = 4.10.0
++SED = /bin/sed
++SET_MAKE = 
++SHELL = /bin/sh
++SPL = /usr/src/linux-3.2.33-go
++SPL_OBJ = /usr/src/linux-3.2.33-go
++SPL_SYMBOLS = NONE
++SPL_VERSION = 0.6.0-rc12
++STRIP = strip
++TARGET_ASM_DIR = asm-x86_64
++VENDOR = gentoo
++VERSION = 0.6.0
++ZFS_CONFIG = all
++ZFS_META_ALIAS = zfs-0.6.0-rc12
++ZFS_META_AUTHOR = Sun Microsystems/Oracle, Lawrence Livermore National Laboratory
++ZFS_META_DATA = 
++ZFS_META_LICENSE = CDDL
++ZFS_META_LT_AGE = 
++ZFS_META_LT_CURRENT = 
++ZFS_META_LT_REVISION = 
++ZFS_META_NAME = zfs
++ZFS_META_RELEASE = rc12
++ZFS_META_VERSION = 0.6.0
++ZLIB = -lz
++abs_builddir = /root/zfs-0.6.0-rc12/include/sys/fm/fs
++abs_srcdir = /root/zfs-0.6.0-rc12/include/sys/fm/fs
++abs_top_builddir = /root/zfs-0.6.0-rc12
++abs_top_srcdir = /root/zfs-0.6.0-rc12
++ac_ct_AR = ar
++ac_ct_CC = gcc
++ac_ct_DUMPBIN = 
++am__include = include
++am__leading_dot = .
++am__quote = 
++am__tar = $${TAR-tar} chof - "$$tardir"
++am__untar = $${TAR-tar} xf -
++bindir = ${exec_prefix}/bin
++build = x86_64-unknown-linux-gnu
++build_alias = 
++build_cpu = x86_64
++build_os = linux-gnu
++build_vendor = unknown
++builddir = .
++datadir = ${datarootdir}
++datarootdir = ${prefix}/share
++docdir = ${datarootdir}/doc/${PACKAGE}
++dvidir = ${docdir}
++exec_prefix = ${prefix}
++host = x86_64-unknown-linux-gnu
++host_alias = 
++host_cpu = x86_64
++host_os = linux-gnu
++host_vendor = unknown
++htmldir = ${docdir}
++includedir = ${prefix}/include
++infodir = ${datarootdir}/info
++install_sh = ${SHELL} /root/zfs-0.6.0-rc12/config/install-sh
++libdir = ${exec_prefix}/lib
++libexecdir = ${exec_prefix}/libexec
++localedir = ${datarootdir}/locale
++localstatedir = ${prefix}/var
++mandir = ${datarootdir}/man
++mkdir_p = /bin/mkdir -p
++oldincludedir = /usr/include
++pdfdir = ${docdir}
++prefix = /usr/local
++program_transform_name = s,x,x,
++psdir = ${docdir}
++sbindir = ${exec_prefix}/sbin
++sharedstatedir = ${prefix}/com
++srcdir = .
++sysconfdir = ${prefix}/etc
++target = x86_64-unknown-linux-gnu
++target_alias = 
++target_cpu = x86_64
++target_os = linux-gnu
++target_vendor = unknown
++top_build_prefix = ../../../../
++top_builddir = ../../../..
++top_srcdir = ../../../..
++udevdir = ${exec_prefix}/lib/udev
++udevruledir = ${udevdir}/rules.d
++COMMON_H = \
++	$(top_srcdir)/include/sys/fm/fs/zfs.h
++
++KERNEL_H = 
++USER_H = 
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++libzfsdir = $(includedir)/libzfs/sys/fm/fs
++libzfs_HEADERS = $(COMMON_H) $(USER_H)
++#kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/sys/fm/fs
++#kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++all: all-am
++
++.SUFFIXES:
++$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am  $(am__configure_deps)
++	@for dep in $?; do \
++	  case '$(am__configure_deps)' in \
++	    *$$dep*) \
++	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
++	        && { if test -f $@; then exit 0; else break; fi; }; \
++	      exit 1;; \
++	  esac; \
++	done; \
++	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/sys/fm/fs/Makefile'; \
++	$(am__cd) $(top_srcdir) && \
++	  $(AUTOMAKE) --gnu include/sys/fm/fs/Makefile
++.PRECIOUS: Makefile
++Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
++	@case '$?' in \
++	  *config.status*) \
++	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
++	  *) \
++	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
++	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
++	esac;
++
++$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++
++$(top_srcdir)/configure: # $(am__configure_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(ACLOCAL_M4): # $(am__aclocal_m4_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(am__aclocal_m4_deps):
++
++mostlyclean-libtool:
++	-rm -f *.lo
++
++clean-libtool:
++	-rm -rf .libs _libs
++install-kernelHEADERS: $(kernel_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(kerneldir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(kerneldir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(kerneldir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(kerneldir)" || exit $$?; \
++	done
++
++uninstall-kernelHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(kerneldir)'; $(am__uninstall_files_from_dir)
++install-libzfsHEADERS: $(libzfs_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(libzfsdir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(libzfsdir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libzfsdir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(libzfsdir)" || exit $$?; \
++	done
++
++uninstall-libzfsHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(libzfsdir)'; $(am__uninstall_files_from_dir)
++
++ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
++	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	mkid -fID $$unique
++tags: TAGS
++
++TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	set x; \
++	here=`pwd`; \
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	shift; \
++	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
++	  test -n "$$unique" || unique=$$empty_fix; \
++	  if test $$# -gt 0; then \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      "$$@" $$unique; \
++	  else \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      $$unique; \
++	  fi; \
++	fi
++ctags: CTAGS
++CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	test -z "$(CTAGS_ARGS)$$unique" \
++	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
++	     $$unique
++
++GTAGS:
++	here=`$(am__cd) $(top_builddir) && pwd` \
++	  && $(am__cd) $(top_srcdir) \
++	  && gtags -i $(GTAGS_ARGS) "$$here"
++
++distclean-tags:
++	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
++
++distdir: $(DISTFILES)
++	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	list='$(DISTFILES)'; \
++	  dist_files=`for file in $$list; do echo $$file; done | \
++	  sed -e "s|^$$srcdirstrip/||;t" \
++	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
++	case $$dist_files in \
++	  */*) $(MKDIR_P) `echo "$$dist_files" | \
++			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
++			   sort -u` ;; \
++	esac; \
++	for file in $$dist_files; do \
++	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
++	  if test -d $$d/$$file; then \
++	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
++	    if test -d "$(distdir)/$$file"; then \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
++	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
++	  else \
++	    test -f "$(distdir)/$$file" \
++	    || cp -p $$d/$$file "$(distdir)/$$file" \
++	    || exit 1; \
++	  fi; \
++	done
++check-am: all-am
++check: check-am
++all-am: Makefile $(HEADERS)
++installdirs:
++	for dir in "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"; do \
++	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
++	done
++install: install-am
++install-exec: install-exec-am
++install-data: install-data-am
++uninstall: uninstall-am
++
++install-am: all-am
++	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
++
++installcheck: installcheck-am
++install-strip:
++	if test -z '$(STRIP)'; then \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	      install; \
++	else \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
++	fi
++mostlyclean-generic:
++
++clean-generic:
++
++distclean-generic:
++	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
++	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
++
++maintainer-clean-generic:
++	@echo "This command is intended for maintainers to use"
++	@echo "it deletes files that may require special tools to rebuild."
++clean: clean-am
++
++clean-am: clean-generic clean-libtool mostlyclean-am
++
++distclean: distclean-am
++	-rm -f Makefile
++distclean-am: clean-am distclean-generic distclean-tags
++
++dvi: dvi-am
++
++dvi-am:
++
++html: html-am
++
++html-am:
++
++info: info-am
++
++info-am:
++
++install-data-am: install-kernelHEADERS install-libzfsHEADERS
++
++install-dvi: install-dvi-am
++
++install-dvi-am:
++
++install-exec-am:
++
++install-html: install-html-am
++
++install-html-am:
++
++install-info: install-info-am
++
++install-info-am:
++
++install-man:
++
++install-pdf: install-pdf-am
++
++install-pdf-am:
++
++install-ps: install-ps-am
++
++install-ps-am:
++
++installcheck-am:
++
++maintainer-clean: maintainer-clean-am
++	-rm -f Makefile
++maintainer-clean-am: distclean-am maintainer-clean-generic
++
++mostlyclean: mostlyclean-am
++
++mostlyclean-am: mostlyclean-generic mostlyclean-libtool
++
++pdf: pdf-am
++
++pdf-am:
++
++ps: ps-am
++
++ps-am:
++
++uninstall-am: uninstall-kernelHEADERS uninstall-libzfsHEADERS
++
++.MAKE: install-am install-strip
++
++.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
++	clean-libtool ctags distclean distclean-generic \
++	distclean-libtool distclean-tags distdir dvi dvi-am html \
++	html-am info info-am install install-am install-data \
++	install-data-am install-dvi install-dvi-am install-exec \
++	install-exec-am install-html install-html-am install-info \
++	install-info-am install-kernelHEADERS install-libzfsHEADERS \
++	install-man install-pdf install-pdf-am install-ps \
++	install-ps-am install-strip installcheck installcheck-am \
++	installdirs maintainer-clean maintainer-clean-generic \
++	mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
++	ps ps-am tags uninstall uninstall-am uninstall-kernelHEADERS \
++	uninstall-libzfsHEADERS
++
++
++# Tell versions [3.59,3.63) of GNU make to not export all variables.
++# Otherwise a system limit (for SysV at least) may be exceeded.
++.NOEXPORT:
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/fm/fs/Makefile.am linux-3.2.33-go/include/zfs/sys/fm/fs/Makefile.am
+--- linux-3.2.33-go.orig/include/zfs/sys/fm/fs/Makefile.am	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/fm/fs/Makefile.am	2012-11-16 23:25:34.343039404 +0100
+@@ -0,0 +1,18 @@
++COMMON_H = \
++	$(top_srcdir)/include/sys/fm/fs/zfs.h
++
++KERNEL_H =
++
++USER_H =
++
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++
++if CONFIG_USER
++libzfsdir = $(includedir)/libzfs/sys/fm/fs
++libzfs_HEADERS = $(COMMON_H) $(USER_H)
++endif
++
++if CONFIG_KERNEL
++kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/sys/fm/fs
++kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++endif
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/fm/fs/Makefile.in linux-3.2.33-go/include/zfs/sys/fm/fs/Makefile.in
+--- linux-3.2.33-go.orig/include/zfs/sys/fm/fs/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/fm/fs/Makefile.in	2012-11-16 23:25:34.343039404 +0100
+@@ -0,0 +1,659 @@
++# Makefile.in generated by automake 1.11.6 from Makefile.am.
++# @configure_input@
++
++# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
++# Foundation, Inc.
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++@SET_MAKE@
++
++VPATH = @srcdir@
++am__make_dryrun = \
++  { \
++    am__dry=no; \
++    case $$MAKEFLAGS in \
++      *\\[\ \	]*) \
++        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
++          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
++      *) \
++        for am__flg in $$MAKEFLAGS; do \
++          case $$am__flg in \
++            *=*|--*) ;; \
++            *n*) am__dry=yes; break;; \
++          esac; \
++        done;; \
++    esac; \
++    test $$am__dry = yes; \
++  }
++pkgdatadir = $(datadir)/@PACKAGE@
++pkgincludedir = $(includedir)/@PACKAGE@
++pkglibdir = $(libdir)/@PACKAGE@
++pkglibexecdir = $(libexecdir)/@PACKAGE@
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = @build@
++host_triplet = @host@
++target_triplet = @target@
++subdir = include/sys/fm/fs
++DIST_COMMON = $(am__kernel_HEADERS_DIST) $(am__libzfs_HEADERS_DIST) \
++	$(srcdir)/Makefile.am $(srcdir)/Makefile.in
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps =  \
++	$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
++	$(top_srcdir)/config/kernel-automount.m4 \
++	$(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \
++	$(top_srcdir)/config/kernel-bdev-logical-size.m4 \
++	$(top_srcdir)/config/kernel-bdi-setup-and-register.m4 \
++	$(top_srcdir)/config/kernel-bdi.m4 \
++	$(top_srcdir)/config/kernel-bio-empty-barrier.m4 \
++	$(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \
++	$(top_srcdir)/config/kernel-bio-failfast.m4 \
++	$(top_srcdir)/config/kernel-bio-rw-syncio.m4 \
++	$(top_srcdir)/config/kernel-blk-end-request.m4 \
++	$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-discard.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
++	$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-sectors.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get.m4 \
++	$(top_srcdir)/config/kernel-check-disk-size-change.m4 \
++	$(top_srcdir)/config/kernel-clear-inode.m4 \
++	$(top_srcdir)/config/kernel-commit-metadata.m4 \
++	$(top_srcdir)/config/kernel-create-nameidata.m4 \
++	$(top_srcdir)/config/kernel-d-make-root.m4 \
++	$(top_srcdir)/config/kernel-d-obtain-alias.m4 \
++	$(top_srcdir)/config/kernel-discard-granularity.m4 \
++	$(top_srcdir)/config/kernel-elevator-change.m4 \
++	$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
++	$(top_srcdir)/config/kernel-evict-inode.m4 \
++	$(top_srcdir)/config/kernel-fallocate.m4 \
++	$(top_srcdir)/config/kernel-fmode-t.m4 \
++	$(top_srcdir)/config/kernel-fsync.m4 \
++	$(top_srcdir)/config/kernel-get-disk-ro.m4 \
++	$(top_srcdir)/config/kernel-get-gendisk.m4 \
++	$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
++	$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
++	$(top_srcdir)/config/kernel-kobj-name-len.m4 \
++	$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
++	$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
++	$(top_srcdir)/config/kernel-mount-nodev.m4 \
++	$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
++	$(top_srcdir)/config/kernel-rq-for-each_segment.m4 \
++	$(top_srcdir)/config/kernel-rq-is_sync.m4 \
++	$(top_srcdir)/config/kernel-security-inode-init.m4 \
++	$(top_srcdir)/config/kernel-set-nlink.m4 \
++	$(top_srcdir)/config/kernel-sget-args.m4 \
++	$(top_srcdir)/config/kernel-show-options.m4 \
++	$(top_srcdir)/config/kernel-shrink.m4 \
++	$(top_srcdir)/config/kernel-truncate-range.m4 \
++	$(top_srcdir)/config/kernel-truncate-setsize.m4 \
++	$(top_srcdir)/config/kernel-xattr-handler.m4 \
++	$(top_srcdir)/config/kernel.m4 \
++	$(top_srcdir)/config/user-arch.m4 \
++	$(top_srcdir)/config/user-frame-larger-than.m4 \
++	$(top_srcdir)/config/user-ioctl.m4 \
++	$(top_srcdir)/config/user-libblkid.m4 \
++	$(top_srcdir)/config/user-libuuid.m4 \
++	$(top_srcdir)/config/user-nptl_guard_within_stack.m4 \
++	$(top_srcdir)/config/user-selinux.m4 \
++	$(top_srcdir)/config/user-udev.m4 \
++	$(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \
++	$(top_srcdir)/config/zfs-build.m4 \
++	$(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++mkinstalldirs = $(install_sh) -d
++CONFIG_HEADER = $(top_builddir)/zfs_config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++AM_V_GEN = $(am__v_GEN_@AM_V@)
++am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
++am__v_GEN_0 = @echo "  GEN   " $@;
++AM_V_at = $(am__v_at_@AM_V@)
++am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
++am__v_at_0 = @
++SOURCES =
++DIST_SOURCES =
++am__can_run_installinfo = \
++  case $$AM_UPDATE_INFO_DIR in \
++    n|no|NO) false;; \
++    *) (install-info --version) >/dev/null 2>&1;; \
++  esac
++am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/fm/fs/zfs.h
++am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
++am__vpath_adj = case $$p in \
++    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
++    *) f=$$p;; \
++  esac;
++am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
++am__install_max = 40
++am__nobase_strip_setup = \
++  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
++am__nobase_strip = \
++  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
++am__nobase_list = $(am__nobase_strip_setup); \
++  for p in $$list; do echo "$$p $$p"; done | \
++  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
++  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
++    if (++n[$$2] == $(am__install_max)) \
++      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
++    END { for (dir in files) print dir, files[dir] }'
++am__base_list = \
++  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
++  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
++am__uninstall_files_from_dir = { \
++  test -z "$$files" \
++    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
++    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
++         $(am__cd) "$$dir" && rm -f $$files; }; \
++  }
++am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
++am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/fm/fs/zfs.h
++HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
++ETAGS = etags
++CTAGS = ctags
++DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
++ACLOCAL = @ACLOCAL@
++ALIEN = @ALIEN@
++ALIEN_VERSION = @ALIEN_VERSION@
++AMTAR = @AMTAR@
++AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
++AR = @AR@
++AUTOCONF = @AUTOCONF@
++AUTOHEADER = @AUTOHEADER@
++AUTOMAKE = @AUTOMAKE@
++AWK = @AWK@
++CC = @CC@
++CCAS = @CCAS@
++CCASDEPMODE = @CCASDEPMODE@
++CCASFLAGS = @CCASFLAGS@
++CCDEPMODE = @CCDEPMODE@
++CFLAGS = @CFLAGS@
++CPP = @CPP@
++CPPFLAGS = @CPPFLAGS@
++CYGPATH_W = @CYGPATH_W@
++DEBUG_CFLAGS = @DEBUG_CFLAGS@
++DEBUG_DMU_TX = @DEBUG_DMU_TX@
++DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
++DEBUG_ZFS = @DEBUG_ZFS@
++DEFAULT_INIT_DIR = @DEFAULT_INIT_DIR@
++DEFAULT_INIT_SCRIPT = @DEFAULT_INIT_SCRIPT@
++DEFAULT_PACKAGE = @DEFAULT_PACKAGE@
++DEFS = @DEFS@
++DEPDIR = @DEPDIR@
++DLLTOOL = @DLLTOOL@
++DPKG = @DPKG@
++DPKGBUILD = @DPKGBUILD@
++DPKGBUILD_VERSION = @DPKGBUILD_VERSION@
++DPKG_VERSION = @DPKG_VERSION@
++DSYMUTIL = @DSYMUTIL@
++DUMPBIN = @DUMPBIN@
++ECHO_C = @ECHO_C@
++ECHO_N = @ECHO_N@
++ECHO_T = @ECHO_T@
++EGREP = @EGREP@
++EXEEXT = @EXEEXT@
++FGREP = @FGREP@
++FRAME_LARGER_THAN = @FRAME_LARGER_THAN@
++GREP = @GREP@
++HAVE_ALIEN = @HAVE_ALIEN@
++HAVE_DPKG = @HAVE_DPKG@
++HAVE_DPKGBUILD = @HAVE_DPKGBUILD@
++HAVE_MAKEPKG = @HAVE_MAKEPKG@
++HAVE_PACMAN = @HAVE_PACMAN@
++HAVE_RPM = @HAVE_RPM@
++HAVE_RPMBUILD = @HAVE_RPMBUILD@
++INSTALL = @INSTALL@
++INSTALL_DATA = @INSTALL_DATA@
++INSTALL_PROGRAM = @INSTALL_PROGRAM@
++INSTALL_SCRIPT = @INSTALL_SCRIPT@
++INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
++KERNELCPPFLAGS = @KERNELCPPFLAGS@
++KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
++LD = @LD@
++LDFLAGS = @LDFLAGS@
++LIBBLKID = @LIBBLKID@
++LIBOBJS = @LIBOBJS@
++LIBS = @LIBS@
++LIBSELINUX = @LIBSELINUX@
++LIBTOOL = @LIBTOOL@
++LIBUUID = @LIBUUID@
++LINUX = @LINUX@
++LINUX_OBJ = @LINUX_OBJ@
++LINUX_SYMBOLS = @LINUX_SYMBOLS@
++LINUX_VERSION = @LINUX_VERSION@
++LIPO = @LIPO@
++LN_S = @LN_S@
++LTLIBOBJS = @LTLIBOBJS@
++MAINT = @MAINT@
++MAKEINFO = @MAKEINFO@
++MAKEPKG = @MAKEPKG@
++MAKEPKG_VERSION = @MAKEPKG_VERSION@
++MANIFEST_TOOL = @MANIFEST_TOOL@
++MKDIR_P = @MKDIR_P@
++NM = @NM@
++NMEDIT = @NMEDIT@
++NO_UNUSED_BUT_SET_VARIABLE = @NO_UNUSED_BUT_SET_VARIABLE@
++OBJDUMP = @OBJDUMP@
++OBJEXT = @OBJEXT@
++OTOOL = @OTOOL@
++OTOOL64 = @OTOOL64@
++PACKAGE = @PACKAGE@
++PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
++PACKAGE_NAME = @PACKAGE_NAME@
++PACKAGE_STRING = @PACKAGE_STRING@
++PACKAGE_TARNAME = @PACKAGE_TARNAME@
++PACKAGE_URL = @PACKAGE_URL@
++PACKAGE_VERSION = @PACKAGE_VERSION@
++PACMAN = @PACMAN@
++PACMAN_VERSION = @PACMAN_VERSION@
++PATH_SEPARATOR = @PATH_SEPARATOR@
++RANLIB = @RANLIB@
++RPM = @RPM@
++RPMBUILD = @RPMBUILD@
++RPMBUILD_VERSION = @RPMBUILD_VERSION@
++RPM_VERSION = @RPM_VERSION@
++SED = @SED@
++SET_MAKE = @SET_MAKE@
++SHELL = @SHELL@
++SPL = @SPL@
++SPL_OBJ = @SPL_OBJ@
++SPL_SYMBOLS = @SPL_SYMBOLS@
++SPL_VERSION = @SPL_VERSION@
++STRIP = @STRIP@
++TARGET_ASM_DIR = @TARGET_ASM_DIR@
++VENDOR = @VENDOR@
++VERSION = @VERSION@
++ZFS_CONFIG = @ZFS_CONFIG@
++ZFS_META_ALIAS = @ZFS_META_ALIAS@
++ZFS_META_AUTHOR = @ZFS_META_AUTHOR@
++ZFS_META_DATA = @ZFS_META_DATA@
++ZFS_META_LICENSE = @ZFS_META_LICENSE@
++ZFS_META_LT_AGE = @ZFS_META_LT_AGE@
++ZFS_META_LT_CURRENT = @ZFS_META_LT_CURRENT@
++ZFS_META_LT_REVISION = @ZFS_META_LT_REVISION@
++ZFS_META_NAME = @ZFS_META_NAME@
++ZFS_META_RELEASE = @ZFS_META_RELEASE@
++ZFS_META_VERSION = @ZFS_META_VERSION@
++ZLIB = @ZLIB@
++abs_builddir = @abs_builddir@
++abs_srcdir = @abs_srcdir@
++abs_top_builddir = @abs_top_builddir@
++abs_top_srcdir = @abs_top_srcdir@
++ac_ct_AR = @ac_ct_AR@
++ac_ct_CC = @ac_ct_CC@
++ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
++am__include = @am__include@
++am__leading_dot = @am__leading_dot@
++am__quote = @am__quote@
++am__tar = @am__tar@
++am__untar = @am__untar@
++bindir = @bindir@
++build = @build@
++build_alias = @build_alias@
++build_cpu = @build_cpu@
++build_os = @build_os@
++build_vendor = @build_vendor@
++builddir = @builddir@
++datadir = @datadir@
++datarootdir = @datarootdir@
++docdir = @docdir@
++dvidir = @dvidir@
++exec_prefix = @exec_prefix@
++host = @host@
++host_alias = @host_alias@
++host_cpu = @host_cpu@
++host_os = @host_os@
++host_vendor = @host_vendor@
++htmldir = @htmldir@
++includedir = @includedir@
++infodir = @infodir@
++install_sh = @install_sh@
++libdir = @libdir@
++libexecdir = @libexecdir@
++localedir = @localedir@
++localstatedir = @localstatedir@
++mandir = @mandir@
++mkdir_p = @mkdir_p@
++oldincludedir = @oldincludedir@
++pdfdir = @pdfdir@
++prefix = @prefix@
++program_transform_name = @program_transform_name@
++psdir = @psdir@
++sbindir = @sbindir@
++sharedstatedir = @sharedstatedir@
++srcdir = @srcdir@
++sysconfdir = @sysconfdir@
++target = @target@
++target_alias = @target_alias@
++target_cpu = @target_cpu@
++target_os = @target_os@
++target_vendor = @target_vendor@
++top_build_prefix = @top_build_prefix@
++top_builddir = @top_builddir@
++top_srcdir = @top_srcdir@
++udevdir = @udevdir@
++udevruledir = @udevruledir@
++COMMON_H = \
++	$(top_srcdir)/include/sys/fm/fs/zfs.h
++
++KERNEL_H = 
++USER_H = 
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++@CONFIG_USER_TRUE@libzfsdir = $(includedir)/libzfs/sys/fm/fs
++@CONFIG_USER_TRUE@libzfs_HEADERS = $(COMMON_H) $(USER_H)
++@CONFIG_KERNEL_TRUE@kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/sys/fm/fs
++@CONFIG_KERNEL_TRUE@kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++all: all-am
++
++.SUFFIXES:
++$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
++	@for dep in $?; do \
++	  case '$(am__configure_deps)' in \
++	    *$$dep*) \
++	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
++	        && { if test -f $@; then exit 0; else break; fi; }; \
++	      exit 1;; \
++	  esac; \
++	done; \
++	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/sys/fm/fs/Makefile'; \
++	$(am__cd) $(top_srcdir) && \
++	  $(AUTOMAKE) --gnu include/sys/fm/fs/Makefile
++.PRECIOUS: Makefile
++Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
++	@case '$?' in \
++	  *config.status*) \
++	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
++	  *) \
++	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
++	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
++	esac;
++
++$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++
++$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(am__aclocal_m4_deps):
++
++mostlyclean-libtool:
++	-rm -f *.lo
++
++clean-libtool:
++	-rm -rf .libs _libs
++install-kernelHEADERS: $(kernel_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(kerneldir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(kerneldir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(kerneldir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(kerneldir)" || exit $$?; \
++	done
++
++uninstall-kernelHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(kerneldir)'; $(am__uninstall_files_from_dir)
++install-libzfsHEADERS: $(libzfs_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(libzfsdir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(libzfsdir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libzfsdir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(libzfsdir)" || exit $$?; \
++	done
++
++uninstall-libzfsHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(libzfsdir)'; $(am__uninstall_files_from_dir)
++
++ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
++	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	mkid -fID $$unique
++tags: TAGS
++
++TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	set x; \
++	here=`pwd`; \
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	shift; \
++	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
++	  test -n "$$unique" || unique=$$empty_fix; \
++	  if test $$# -gt 0; then \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      "$$@" $$unique; \
++	  else \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      $$unique; \
++	  fi; \
++	fi
++ctags: CTAGS
++CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	test -z "$(CTAGS_ARGS)$$unique" \
++	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
++	     $$unique
++
++GTAGS:
++	here=`$(am__cd) $(top_builddir) && pwd` \
++	  && $(am__cd) $(top_srcdir) \
++	  && gtags -i $(GTAGS_ARGS) "$$here"
++
++distclean-tags:
++	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
++
++distdir: $(DISTFILES)
++	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	list='$(DISTFILES)'; \
++	  dist_files=`for file in $$list; do echo $$file; done | \
++	  sed -e "s|^$$srcdirstrip/||;t" \
++	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
++	case $$dist_files in \
++	  */*) $(MKDIR_P) `echo "$$dist_files" | \
++			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
++			   sort -u` ;; \
++	esac; \
++	for file in $$dist_files; do \
++	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
++	  if test -d $$d/$$file; then \
++	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
++	    if test -d "$(distdir)/$$file"; then \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
++	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
++	  else \
++	    test -f "$(distdir)/$$file" \
++	    || cp -p $$d/$$file "$(distdir)/$$file" \
++	    || exit 1; \
++	  fi; \
++	done
++check-am: all-am
++check: check-am
++all-am: Makefile $(HEADERS)
++installdirs:
++	for dir in "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"; do \
++	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
++	done
++install: install-am
++install-exec: install-exec-am
++install-data: install-data-am
++uninstall: uninstall-am
++
++install-am: all-am
++	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
++
++installcheck: installcheck-am
++install-strip:
++	if test -z '$(STRIP)'; then \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	      install; \
++	else \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
++	fi
++mostlyclean-generic:
++
++clean-generic:
++
++distclean-generic:
++	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
++	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
++
++maintainer-clean-generic:
++	@echo "This command is intended for maintainers to use"
++	@echo "it deletes files that may require special tools to rebuild."
++clean: clean-am
++
++clean-am: clean-generic clean-libtool mostlyclean-am
++
++distclean: distclean-am
++	-rm -f Makefile
++distclean-am: clean-am distclean-generic distclean-tags
++
++dvi: dvi-am
++
++dvi-am:
++
++html: html-am
++
++html-am:
++
++info: info-am
++
++info-am:
++
++install-data-am: install-kernelHEADERS install-libzfsHEADERS
++
++install-dvi: install-dvi-am
++
++install-dvi-am:
++
++install-exec-am:
++
++install-html: install-html-am
++
++install-html-am:
++
++install-info: install-info-am
++
++install-info-am:
++
++install-man:
++
++install-pdf: install-pdf-am
++
++install-pdf-am:
++
++install-ps: install-ps-am
++
++install-ps-am:
++
++installcheck-am:
++
++maintainer-clean: maintainer-clean-am
++	-rm -f Makefile
++maintainer-clean-am: distclean-am maintainer-clean-generic
++
++mostlyclean: mostlyclean-am
++
++mostlyclean-am: mostlyclean-generic mostlyclean-libtool
++
++pdf: pdf-am
++
++pdf-am:
++
++ps: ps-am
++
++ps-am:
++
++uninstall-am: uninstall-kernelHEADERS uninstall-libzfsHEADERS
++
++.MAKE: install-am install-strip
++
++.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
++	clean-libtool ctags distclean distclean-generic \
++	distclean-libtool distclean-tags distdir dvi dvi-am html \
++	html-am info info-am install install-am install-data \
++	install-data-am install-dvi install-dvi-am install-exec \
++	install-exec-am install-html install-html-am install-info \
++	install-info-am install-kernelHEADERS install-libzfsHEADERS \
++	install-man install-pdf install-pdf-am install-ps \
++	install-ps-am install-strip installcheck installcheck-am \
++	installdirs maintainer-clean maintainer-clean-generic \
++	mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
++	ps ps-am tags uninstall uninstall-am uninstall-kernelHEADERS \
++	uninstall-libzfsHEADERS
++
++
++# Tell versions [3.59,3.63) of GNU make to not export all variables.
++# Otherwise a system limit (for SysV at least) may be exceeded.
++.NOEXPORT:
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/fm/fs/zfs.h linux-3.2.33-go/include/zfs/sys/fm/fs/zfs.h
+--- linux-3.2.33-go.orig/include/zfs/sys/fm/fs/zfs.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/fm/fs/zfs.h	2012-11-16 23:25:34.343039404 +0100
+@@ -0,0 +1,115 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_SYS_FM_FS_ZFS_H
++#define	_SYS_FM_FS_ZFS_H
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++#define	ZFS_ERROR_CLASS				"fs.zfs"
++
++#define	FM_EREPORT_ZFS_CHECKSUM			"checksum"
++#define	FM_EREPORT_ZFS_IO			"io"
++#define	FM_EREPORT_ZFS_DATA			"data"
++#define	FM_EREPORT_ZFS_DELAY			"delay"
++#define	FM_EREPORT_ZFS_CONFIG_SYNC		"config.sync"
++#define	FM_EREPORT_ZFS_POOL			"zpool"
++#define	FM_EREPORT_ZFS_POOL_DESTROY		"zpool.destroy"
++#define FM_EREPORT_ZFS_POOL_REGUID		"zpool.reguid"
++#define	FM_EREPORT_ZFS_DEVICE_UNKNOWN		"vdev.unknown"
++#define	FM_EREPORT_ZFS_DEVICE_OPEN_FAILED	"vdev.open_failed"
++#define	FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA	"vdev.corrupt_data"
++#define	FM_EREPORT_ZFS_DEVICE_NO_REPLICAS	"vdev.no_replicas"
++#define	FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM	"vdev.bad_guid_sum"
++#define	FM_EREPORT_ZFS_DEVICE_TOO_SMALL		"vdev.too_small"
++#define	FM_EREPORT_ZFS_DEVICE_BAD_LABEL		"vdev.bad_label"
++#define	FM_EREPORT_ZFS_DEVICE_REMOVE		"vdev.remove"
++#define	FM_EREPORT_ZFS_DEVICE_CLEAR		"vdev.clear"
++#define	FM_EREPORT_ZFS_DEVICE_CHECK		"vdev.check"
++#define	FM_EREPORT_ZFS_DEVICE_SPARE		"vdev.spare"
++#define	FM_EREPORT_ZFS_DEVICE_AUTOEXPAND	"vdev.autoexpand"
++#define	FM_EREPORT_ZFS_IO_FAILURE		"io_failure"
++#define	FM_EREPORT_ZFS_PROBE_FAILURE		"probe_failure"
++#define	FM_EREPORT_ZFS_LOG_REPLAY		"log_replay"
++#define	FM_EREPORT_ZFS_RESILVER_START		"resilver.start"
++#define	FM_EREPORT_ZFS_RESILVER_FINISH		"resilver.finish"
++#define	FM_EREPORT_ZFS_SCRUB_START		"scrub.start"
++#define	FM_EREPORT_ZFS_SCRUB_FINISH		"scrub.finish"
++#define	FM_EREPORT_ZFS_BOOTFS_VDEV_ATTACH	"bootfs.vdev.attach"
++
++#define	FM_EREPORT_PAYLOAD_ZFS_POOL		"pool"
++#define	FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE	"pool_failmode"
++#define	FM_EREPORT_PAYLOAD_ZFS_POOL_GUID	"pool_guid"
++#define	FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT	"pool_context"
++#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID	"vdev_guid"
++#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE	"vdev_type"
++#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH	"vdev_path"
++#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID	"vdev_devid"
++#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU		"vdev_fru"
++#define	FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE	"vdev_state"
++#define	FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID	"parent_guid"
++#define	FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE	"parent_type"
++#define	FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH	"parent_path"
++#define	FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID	"parent_devid"
++#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET	"zio_objset"
++#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT	"zio_object"
++#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL	"zio_level"
++#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID	"zio_blkid"
++#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR		"zio_err"
++#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET	"zio_offset"
++#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE		"zio_size"
++#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS	"zio_flags"
++#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE	"zio_stage"
++#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE	"zio_pipeline"
++#define	FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY	"zio_delay"
++#define	FM_EREPORT_PAYLOAD_ZFS_PREV_STATE	"prev_state"
++#define	FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED	"cksum_expected"
++#define	FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL	"cksum_actual"
++#define	FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO	"cksum_algorithm"
++#define	FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP	"cksum_byteswap"
++#define	FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES "bad_ranges"
++#define	FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP "bad_ranges_min_gap"
++#define	FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS	"bad_range_sets"
++#define	FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS	"bad_range_clears"
++#define	FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS	"bad_set_bits"
++#define	FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS	"bad_cleared_bits"
++#define	FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM "bad_set_histogram"
++#define	FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM "bad_cleared_histogram"
++
++#define	FM_EREPORT_FAILMODE_WAIT		"wait"
++#define	FM_EREPORT_FAILMODE_CONTINUE		"continue"
++#define	FM_EREPORT_FAILMODE_PANIC		"panic"
++
++#define	FM_EREPORT_RESOURCE_REMOVED		"removed"
++#define	FM_EREPORT_RESOURCE_AUTOREPLACE		"autoreplace"
++#define	FM_EREPORT_RESOURCE_STATECHANGE		"statechange"
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_FM_FS_ZFS_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/fm/Makefile linux-3.2.33-go/include/zfs/sys/fm/Makefile
+--- linux-3.2.33-go.orig/include/zfs/sys/fm/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/fm/Makefile	2012-11-16 23:25:34.343039404 +0100
+@@ -0,0 +1,813 @@
++# Makefile.in generated by automake 1.11.6 from Makefile.am.
++# include/sys/fm/Makefile.  Generated from Makefile.in by configure.
++
++# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
++# Foundation, Inc.
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++
++
++
++am__make_dryrun = \
++  { \
++    am__dry=no; \
++    case $$MAKEFLAGS in \
++      *\\[\ \	]*) \
++        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
++          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
++      *) \
++        for am__flg in $$MAKEFLAGS; do \
++          case $$am__flg in \
++            *=*|--*) ;; \
++            *n*) am__dry=yes; break;; \
++          esac; \
++        done;; \
++    esac; \
++    test $$am__dry = yes; \
++  }
++pkgdatadir = $(datadir)/zfs
++pkgincludedir = $(includedir)/zfs
++pkglibdir = $(libdir)/zfs
++pkglibexecdir = $(libexecdir)/zfs
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = x86_64-unknown-linux-gnu
++host_triplet = x86_64-unknown-linux-gnu
++target_triplet = x86_64-unknown-linux-gnu
++subdir = include/sys/fm
++DIST_COMMON = $(am__kernel_HEADERS_DIST) $(am__libzfs_HEADERS_DIST) \
++	$(srcdir)/Makefile.am $(srcdir)/Makefile.in
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps =  \
++	$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
++	$(top_srcdir)/config/kernel-automount.m4 \
++	$(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \
++	$(top_srcdir)/config/kernel-bdev-logical-size.m4 \
++	$(top_srcdir)/config/kernel-bdi-setup-and-register.m4 \
++	$(top_srcdir)/config/kernel-bdi.m4 \
++	$(top_srcdir)/config/kernel-bio-empty-barrier.m4 \
++	$(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \
++	$(top_srcdir)/config/kernel-bio-failfast.m4 \
++	$(top_srcdir)/config/kernel-bio-rw-syncio.m4 \
++	$(top_srcdir)/config/kernel-blk-end-request.m4 \
++	$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-discard.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
++	$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-sectors.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get.m4 \
++	$(top_srcdir)/config/kernel-check-disk-size-change.m4 \
++	$(top_srcdir)/config/kernel-clear-inode.m4 \
++	$(top_srcdir)/config/kernel-commit-metadata.m4 \
++	$(top_srcdir)/config/kernel-create-nameidata.m4 \
++	$(top_srcdir)/config/kernel-d-make-root.m4 \
++	$(top_srcdir)/config/kernel-d-obtain-alias.m4 \
++	$(top_srcdir)/config/kernel-discard-granularity.m4 \
++	$(top_srcdir)/config/kernel-elevator-change.m4 \
++	$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
++	$(top_srcdir)/config/kernel-evict-inode.m4 \
++	$(top_srcdir)/config/kernel-fallocate.m4 \
++	$(top_srcdir)/config/kernel-fmode-t.m4 \
++	$(top_srcdir)/config/kernel-fsync.m4 \
++	$(top_srcdir)/config/kernel-get-disk-ro.m4 \
++	$(top_srcdir)/config/kernel-get-gendisk.m4 \
++	$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
++	$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
++	$(top_srcdir)/config/kernel-kobj-name-len.m4 \
++	$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
++	$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
++	$(top_srcdir)/config/kernel-mount-nodev.m4 \
++	$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
++	$(top_srcdir)/config/kernel-rq-for-each_segment.m4 \
++	$(top_srcdir)/config/kernel-rq-is_sync.m4 \
++	$(top_srcdir)/config/kernel-security-inode-init.m4 \
++	$(top_srcdir)/config/kernel-set-nlink.m4 \
++	$(top_srcdir)/config/kernel-sget-args.m4 \
++	$(top_srcdir)/config/kernel-show-options.m4 \
++	$(top_srcdir)/config/kernel-shrink.m4 \
++	$(top_srcdir)/config/kernel-truncate-range.m4 \
++	$(top_srcdir)/config/kernel-truncate-setsize.m4 \
++	$(top_srcdir)/config/kernel-xattr-handler.m4 \
++	$(top_srcdir)/config/kernel.m4 \
++	$(top_srcdir)/config/user-arch.m4 \
++	$(top_srcdir)/config/user-frame-larger-than.m4 \
++	$(top_srcdir)/config/user-ioctl.m4 \
++	$(top_srcdir)/config/user-libblkid.m4 \
++	$(top_srcdir)/config/user-libuuid.m4 \
++	$(top_srcdir)/config/user-nptl_guard_within_stack.m4 \
++	$(top_srcdir)/config/user-selinux.m4 \
++	$(top_srcdir)/config/user-udev.m4 \
++	$(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \
++	$(top_srcdir)/config/zfs-build.m4 \
++	$(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++mkinstalldirs = $(install_sh) -d
++CONFIG_HEADER = $(top_builddir)/zfs_config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++AM_V_GEN = $(am__v_GEN_$(V))
++am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY))
++am__v_GEN_0 = @echo "  GEN   " $@;
++AM_V_at = $(am__v_at_$(V))
++am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY))
++am__v_at_0 = @
++SOURCES =
++DIST_SOURCES =
++RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
++	html-recursive info-recursive install-data-recursive \
++	install-dvi-recursive install-exec-recursive \
++	install-html-recursive install-info-recursive \
++	install-pdf-recursive install-ps-recursive install-recursive \
++	installcheck-recursive installdirs-recursive pdf-recursive \
++	ps-recursive uninstall-recursive
++am__can_run_installinfo = \
++  case $$AM_UPDATE_INFO_DIR in \
++    n|no|NO) false;; \
++    *) (install-info --version) >/dev/null 2>&1;; \
++  esac
++am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/fm/protocol.h \
++	$(top_srcdir)/include/sys/fm/util.h
++am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
++am__vpath_adj = case $$p in \
++    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
++    *) f=$$p;; \
++  esac;
++am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
++am__install_max = 40
++am__nobase_strip_setup = \
++  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
++am__nobase_strip = \
++  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
++am__nobase_list = $(am__nobase_strip_setup); \
++  for p in $$list; do echo "$$p $$p"; done | \
++  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
++  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
++    if (++n[$$2] == $(am__install_max)) \
++      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
++    END { for (dir in files) print dir, files[dir] }'
++am__base_list = \
++  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
++  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
++am__uninstall_files_from_dir = { \
++  test -z "$$files" \
++    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
++    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
++         $(am__cd) "$$dir" && rm -f $$files; }; \
++  }
++am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
++am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/fm/protocol.h \
++	$(top_srcdir)/include/sys/fm/util.h
++HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
++RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
++  distclean-recursive maintainer-clean-recursive
++AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
++	$(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
++	distdir
++ETAGS = etags
++CTAGS = ctags
++DIST_SUBDIRS = $(SUBDIRS)
++DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
++am__relativize = \
++  dir0=`pwd`; \
++  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
++  sed_rest='s,^[^/]*/*,,'; \
++  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
++  sed_butlast='s,/*[^/]*$$,,'; \
++  while test -n "$$dir1"; do \
++    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
++    if test "$$first" != "."; then \
++      if test "$$first" = ".."; then \
++        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
++        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
++      else \
++        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
++        if test "$$first2" = "$$first"; then \
++          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
++        else \
++          dir2="../$$dir2"; \
++        fi; \
++        dir0="$$dir0"/"$$first"; \
++      fi; \
++    fi; \
++    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
++  done; \
++  reldir="$$dir2"
++ACLOCAL = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run aclocal-1.11
++ALIEN = alien
++ALIEN_VERSION = 
++AMTAR = $${TAR-tar}
++AM_DEFAULT_VERBOSITY = 1
++AR = ar
++AUTOCONF = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run autoconf
++AUTOHEADER = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run autoheader
++AUTOMAKE = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run automake-1.11
++AWK = gawk
++CC = gcc
++CCAS = gcc
++CCASDEPMODE = depmode=gcc3
++CCASFLAGS = -g -O2
++CCDEPMODE = depmode=gcc3
++CFLAGS = -g -O2
++CPP = gcc -E
++CPPFLAGS = 
++CYGPATH_W = echo
++DEBUG_CFLAGS = -DNDEBUG
++DEBUG_DMU_TX = _without_debug_dmu_tx
++DEBUG_STACKFLAGS = 
++DEBUG_ZFS = _without_debug
++DEFAULT_INIT_DIR = ${prefix}/etc/init.d
++DEFAULT_INIT_SCRIPT = gentoo
++DEFAULT_PACKAGE = tgz
++DEFS = -DHAVE_CONFIG_H
++DEPDIR = .deps
++DLLTOOL = false
++DPKG = dpkg
++DPKGBUILD = dpkg-buildpackage
++DPKGBUILD_VERSION = 
++DPKG_VERSION = 
++DSYMUTIL = 
++DUMPBIN = 
++ECHO_C = 
++ECHO_N = -n
++ECHO_T = 
++EGREP = /bin/grep -E
++EXEEXT = 
++FGREP = /bin/grep -F
++FRAME_LARGER_THAN = -Wframe-larger-than=1024
++GREP = /bin/grep
++HAVE_ALIEN = no
++HAVE_DPKG = no
++HAVE_DPKGBUILD = no
++HAVE_MAKEPKG = 
++HAVE_PACMAN = 
++HAVE_RPM = yes
++HAVE_RPMBUILD = yes
++INSTALL = /usr/bin/install -c
++INSTALL_DATA = ${INSTALL} -m 644
++INSTALL_PROGRAM = ${INSTALL}
++INSTALL_SCRIPT = ${INSTALL}
++INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
++KERNELCPPFLAGS =  -Wno-unused-but-set-variable -DHAVE_SPL -D_KERNEL -DTEXT_DOMAIN=\"zfs-linux-kernel\" -DNDEBUG 
++KERNELMAKE_PARAMS =  O=/usr/src/linux-3.6.0-sabayon
++LD = /usr/x86_64-pc-linux-gnu/bin/ld -m elf_x86_64
++LDFLAGS = 
++LIBBLKID = 
++LIBOBJS = 
++LIBS = -luuid -luuid -lz -lz -lz 
++LIBSELINUX = 
++LIBTOOL = $(SHELL) $(top_builddir)/libtool
++LIBUUID = -luuid
++LINUX = /usr/src/linux-3.2.33-go
++LINUX_OBJ = /usr/src/linux-3.6.0-sabayon
++LINUX_SYMBOLS = NONE
++LINUX_VERSION = 3.6.0-sabayon
++LIPO = 
++LN_S = ln -s
++LTLIBOBJS = 
++MAINT = #
++MAKEINFO = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run makeinfo
++MAKEPKG = 
++MAKEPKG_VERSION = 
++MANIFEST_TOOL = :
++MKDIR_P = /bin/mkdir -p
++NM = /usr/bin/nm -B
++NMEDIT = 
++NO_UNUSED_BUT_SET_VARIABLE = -Wno-unused-but-set-variable
++OBJDUMP = objdump
++OBJEXT = o
++OTOOL = 
++OTOOL64 = 
++PACKAGE = zfs
++PACKAGE_BUGREPORT = 
++PACKAGE_NAME = 
++PACKAGE_STRING = 
++PACKAGE_TARNAME = 
++PACKAGE_URL = 
++PACKAGE_VERSION = 
++PACMAN = 
++PACMAN_VERSION = 
++PATH_SEPARATOR = :
++RANLIB = ranlib
++RPM = rpm
++RPMBUILD = rpmbuild
++RPMBUILD_VERSION = 4.10.0
++RPM_VERSION = 4.10.0
++SED = /bin/sed
++SET_MAKE = 
++SHELL = /bin/sh
++SPL = /usr/src/linux-3.2.33-go
++SPL_OBJ = /usr/src/linux-3.2.33-go
++SPL_SYMBOLS = NONE
++SPL_VERSION = 0.6.0-rc12
++STRIP = strip
++TARGET_ASM_DIR = asm-x86_64
++VENDOR = gentoo
++VERSION = 0.6.0
++ZFS_CONFIG = all
++ZFS_META_ALIAS = zfs-0.6.0-rc12
++ZFS_META_AUTHOR = Sun Microsystems/Oracle, Lawrence Livermore National Laboratory
++ZFS_META_DATA = 
++ZFS_META_LICENSE = CDDL
++ZFS_META_LT_AGE = 
++ZFS_META_LT_CURRENT = 
++ZFS_META_LT_REVISION = 
++ZFS_META_NAME = zfs
++ZFS_META_RELEASE = rc12
++ZFS_META_VERSION = 0.6.0
++ZLIB = -lz
++abs_builddir = /root/zfs-0.6.0-rc12/include/sys/fm
++abs_srcdir = /root/zfs-0.6.0-rc12/include/sys/fm
++abs_top_builddir = /root/zfs-0.6.0-rc12
++abs_top_srcdir = /root/zfs-0.6.0-rc12
++ac_ct_AR = ar
++ac_ct_CC = gcc
++ac_ct_DUMPBIN = 
++am__include = include
++am__leading_dot = .
++am__quote = 
++am__tar = $${TAR-tar} chof - "$$tardir"
++am__untar = $${TAR-tar} xf -
++bindir = ${exec_prefix}/bin
++build = x86_64-unknown-linux-gnu
++build_alias = 
++build_cpu = x86_64
++build_os = linux-gnu
++build_vendor = unknown
++builddir = .
++datadir = ${datarootdir}
++datarootdir = ${prefix}/share
++docdir = ${datarootdir}/doc/${PACKAGE}
++dvidir = ${docdir}
++exec_prefix = ${prefix}
++host = x86_64-unknown-linux-gnu
++host_alias = 
++host_cpu = x86_64
++host_os = linux-gnu
++host_vendor = unknown
++htmldir = ${docdir}
++includedir = ${prefix}/include
++infodir = ${datarootdir}/info
++install_sh = ${SHELL} /root/zfs-0.6.0-rc12/config/install-sh
++libdir = ${exec_prefix}/lib
++libexecdir = ${exec_prefix}/libexec
++localedir = ${datarootdir}/locale
++localstatedir = ${prefix}/var
++mandir = ${datarootdir}/man
++mkdir_p = /bin/mkdir -p
++oldincludedir = /usr/include
++pdfdir = ${docdir}
++prefix = /usr/local
++program_transform_name = s,x,x,
++psdir = ${docdir}
++sbindir = ${exec_prefix}/sbin
++sharedstatedir = ${prefix}/com
++srcdir = .
++sysconfdir = ${prefix}/etc
++target = x86_64-unknown-linux-gnu
++target_alias = 
++target_cpu = x86_64
++target_os = linux-gnu
++target_vendor = unknown
++top_build_prefix = ../../../
++top_builddir = ../../..
++top_srcdir = ../../..
++udevdir = ${exec_prefix}/lib/udev
++udevruledir = ${udevdir}/rules.d
++SUBDIRS = fs
++COMMON_H = \
++	$(top_srcdir)/include/sys/fm/protocol.h \
++	$(top_srcdir)/include/sys/fm/util.h
++
++KERNEL_H = 
++USER_H = 
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++libzfsdir = $(includedir)/libzfs/sys/fm
++libzfs_HEADERS = $(COMMON_H) $(USER_H)
++#kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/sys/fm
++#kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++all: all-recursive
++
++.SUFFIXES:
++$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am  $(am__configure_deps)
++	@for dep in $?; do \
++	  case '$(am__configure_deps)' in \
++	    *$$dep*) \
++	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
++	        && { if test -f $@; then exit 0; else break; fi; }; \
++	      exit 1;; \
++	  esac; \
++	done; \
++	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/sys/fm/Makefile'; \
++	$(am__cd) $(top_srcdir) && \
++	  $(AUTOMAKE) --gnu include/sys/fm/Makefile
++.PRECIOUS: Makefile
++Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
++	@case '$?' in \
++	  *config.status*) \
++	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
++	  *) \
++	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
++	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
++	esac;
++
++$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++
++$(top_srcdir)/configure: # $(am__configure_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(ACLOCAL_M4): # $(am__aclocal_m4_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(am__aclocal_m4_deps):
++
++mostlyclean-libtool:
++	-rm -f *.lo
++
++clean-libtool:
++	-rm -rf .libs _libs
++install-kernelHEADERS: $(kernel_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(kerneldir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(kerneldir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(kerneldir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(kerneldir)" || exit $$?; \
++	done
++
++uninstall-kernelHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(kerneldir)'; $(am__uninstall_files_from_dir)
++install-libzfsHEADERS: $(libzfs_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(libzfsdir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(libzfsdir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libzfsdir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(libzfsdir)" || exit $$?; \
++	done
++
++uninstall-libzfsHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(libzfsdir)'; $(am__uninstall_files_from_dir)
++
++# This directory's subdirectories are mostly independent; you can cd
++# into them and run `make' without going through this Makefile.
++# To change the values of `make' variables: instead of editing Makefiles,
++# (1) if the variable is set in `config.status', edit `config.status'
++#     (which will cause the Makefiles to be regenerated when you run `make');
++# (2) otherwise, pass the desired values on the `make' command line.
++$(RECURSIVE_TARGETS):
++	@fail= failcom='exit 1'; \
++	for f in x $$MAKEFLAGS; do \
++	  case $$f in \
++	    *=* | --[!k]*);; \
++	    *k*) failcom='fail=yes';; \
++	  esac; \
++	done; \
++	dot_seen=no; \
++	target=`echo $@ | sed s/-recursive//`; \
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  echo "Making $$target in $$subdir"; \
++	  if test "$$subdir" = "."; then \
++	    dot_seen=yes; \
++	    local_target="$$target-am"; \
++	  else \
++	    local_target="$$target"; \
++	  fi; \
++	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
++	  || eval $$failcom; \
++	done; \
++	if test "$$dot_seen" = "no"; then \
++	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
++	fi; test -z "$$fail"
++
++$(RECURSIVE_CLEAN_TARGETS):
++	@fail= failcom='exit 1'; \
++	for f in x $$MAKEFLAGS; do \
++	  case $$f in \
++	    *=* | --[!k]*);; \
++	    *k*) failcom='fail=yes';; \
++	  esac; \
++	done; \
++	dot_seen=no; \
++	case "$@" in \
++	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
++	  *) list='$(SUBDIRS)' ;; \
++	esac; \
++	rev=''; for subdir in $$list; do \
++	  if test "$$subdir" = "."; then :; else \
++	    rev="$$subdir $$rev"; \
++	  fi; \
++	done; \
++	rev="$$rev ."; \
++	target=`echo $@ | sed s/-recursive//`; \
++	for subdir in $$rev; do \
++	  echo "Making $$target in $$subdir"; \
++	  if test "$$subdir" = "."; then \
++	    local_target="$$target-am"; \
++	  else \
++	    local_target="$$target"; \
++	  fi; \
++	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
++	  || eval $$failcom; \
++	done && test -z "$$fail"
++tags-recursive:
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
++	done
++ctags-recursive:
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
++	done
++
++ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
++	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	mkid -fID $$unique
++tags: TAGS
++
++TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	set x; \
++	here=`pwd`; \
++	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
++	  include_option=--etags-include; \
++	  empty_fix=.; \
++	else \
++	  include_option=--include; \
++	  empty_fix=; \
++	fi; \
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  if test "$$subdir" = .; then :; else \
++	    test ! -f $$subdir/TAGS || \
++	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
++	  fi; \
++	done; \
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	shift; \
++	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
++	  test -n "$$unique" || unique=$$empty_fix; \
++	  if test $$# -gt 0; then \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      "$$@" $$unique; \
++	  else \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      $$unique; \
++	  fi; \
++	fi
++ctags: CTAGS
++CTAGS: ctags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	test -z "$(CTAGS_ARGS)$$unique" \
++	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
++	     $$unique
++
++GTAGS:
++	here=`$(am__cd) $(top_builddir) && pwd` \
++	  && $(am__cd) $(top_srcdir) \
++	  && gtags -i $(GTAGS_ARGS) "$$here"
++
++distclean-tags:
++	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
++
++distdir: $(DISTFILES)
++	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	list='$(DISTFILES)'; \
++	  dist_files=`for file in $$list; do echo $$file; done | \
++	  sed -e "s|^$$srcdirstrip/||;t" \
++	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
++	case $$dist_files in \
++	  */*) $(MKDIR_P) `echo "$$dist_files" | \
++			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
++			   sort -u` ;; \
++	esac; \
++	for file in $$dist_files; do \
++	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
++	  if test -d $$d/$$file; then \
++	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
++	    if test -d "$(distdir)/$$file"; then \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
++	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
++	  else \
++	    test -f "$(distdir)/$$file" \
++	    || cp -p $$d/$$file "$(distdir)/$$file" \
++	    || exit 1; \
++	  fi; \
++	done
++	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
++	  if test "$$subdir" = .; then :; else \
++	    $(am__make_dryrun) \
++	      || test -d "$(distdir)/$$subdir" \
++	      || $(MKDIR_P) "$(distdir)/$$subdir" \
++	      || exit 1; \
++	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
++	    $(am__relativize); \
++	    new_distdir=$$reldir; \
++	    dir1=$$subdir; dir2="$(top_distdir)"; \
++	    $(am__relativize); \
++	    new_top_distdir=$$reldir; \
++	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
++	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
++	    ($(am__cd) $$subdir && \
++	      $(MAKE) $(AM_MAKEFLAGS) \
++	        top_distdir="$$new_top_distdir" \
++	        distdir="$$new_distdir" \
++		am__remove_distdir=: \
++		am__skip_length_check=: \
++		am__skip_mode_fix=: \
++	        distdir) \
++	      || exit 1; \
++	  fi; \
++	done
++check-am: all-am
++check: check-recursive
++all-am: Makefile $(HEADERS)
++installdirs: installdirs-recursive
++installdirs-am:
++	for dir in "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"; do \
++	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
++	done
++install: install-recursive
++install-exec: install-exec-recursive
++install-data: install-data-recursive
++uninstall: uninstall-recursive
++
++install-am: all-am
++	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
++
++installcheck: installcheck-recursive
++install-strip:
++	if test -z '$(STRIP)'; then \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	      install; \
++	else \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
++	fi
++mostlyclean-generic:
++
++clean-generic:
++
++distclean-generic:
++	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
++	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
++
++maintainer-clean-generic:
++	@echo "This command is intended for maintainers to use"
++	@echo "it deletes files that may require special tools to rebuild."
++clean: clean-recursive
++
++clean-am: clean-generic clean-libtool mostlyclean-am
++
++distclean: distclean-recursive
++	-rm -f Makefile
++distclean-am: clean-am distclean-generic distclean-tags
++
++dvi: dvi-recursive
++
++dvi-am:
++
++html: html-recursive
++
++html-am:
++
++info: info-recursive
++
++info-am:
++
++install-data-am: install-kernelHEADERS install-libzfsHEADERS
++
++install-dvi: install-dvi-recursive
++
++install-dvi-am:
++
++install-exec-am:
++
++install-html: install-html-recursive
++
++install-html-am:
++
++install-info: install-info-recursive
++
++install-info-am:
++
++install-man:
++
++install-pdf: install-pdf-recursive
++
++install-pdf-am:
++
++install-ps: install-ps-recursive
++
++install-ps-am:
++
++installcheck-am:
++
++maintainer-clean: maintainer-clean-recursive
++	-rm -f Makefile
++maintainer-clean-am: distclean-am maintainer-clean-generic
++
++mostlyclean: mostlyclean-recursive
++
++mostlyclean-am: mostlyclean-generic mostlyclean-libtool
++
++pdf: pdf-recursive
++
++pdf-am:
++
++ps: ps-recursive
++
++ps-am:
++
++uninstall-am: uninstall-kernelHEADERS uninstall-libzfsHEADERS
++
++.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
++	install-am install-strip tags-recursive
++
++.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
++	all all-am check check-am clean clean-generic clean-libtool \
++	ctags ctags-recursive distclean distclean-generic \
++	distclean-libtool distclean-tags distdir dvi dvi-am html \
++	html-am info info-am install install-am install-data \
++	install-data-am install-dvi install-dvi-am install-exec \
++	install-exec-am install-html install-html-am install-info \
++	install-info-am install-kernelHEADERS install-libzfsHEADERS \
++	install-man install-pdf install-pdf-am install-ps \
++	install-ps-am install-strip installcheck installcheck-am \
++	installdirs installdirs-am maintainer-clean \
++	maintainer-clean-generic mostlyclean mostlyclean-generic \
++	mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
++	uninstall uninstall-am uninstall-kernelHEADERS \
++	uninstall-libzfsHEADERS
++
++
++# Tell versions [3.59,3.63) of GNU make to not export all variables.
++# Otherwise a system limit (for SysV at least) may be exceeded.
++.NOEXPORT:
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/fm/Makefile.am linux-3.2.33-go/include/zfs/sys/fm/Makefile.am
+--- linux-3.2.33-go.orig/include/zfs/sys/fm/Makefile.am	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/fm/Makefile.am	2012-11-16 23:25:34.343039404 +0100
+@@ -0,0 +1,21 @@
++SUBDIRS = fs
++
++COMMON_H = \
++	$(top_srcdir)/include/sys/fm/protocol.h \
++	$(top_srcdir)/include/sys/fm/util.h
++
++KERNEL_H =
++
++USER_H =
++
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++
++if CONFIG_USER
++libzfsdir = $(includedir)/libzfs/sys/fm
++libzfs_HEADERS = $(COMMON_H) $(USER_H)
++endif
++
++if CONFIG_KERNEL
++kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/sys/fm
++kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++endif
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/fm/Makefile.in linux-3.2.33-go/include/zfs/sys/fm/Makefile.in
+--- linux-3.2.33-go.orig/include/zfs/sys/fm/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/fm/Makefile.in	2012-11-16 23:25:34.344039393 +0100
+@@ -0,0 +1,813 @@
++# Makefile.in generated by automake 1.11.6 from Makefile.am.
++# @configure_input@
++
++# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
++# Foundation, Inc.
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++@SET_MAKE@
++
++VPATH = @srcdir@
++am__make_dryrun = \
++  { \
++    am__dry=no; \
++    case $$MAKEFLAGS in \
++      *\\[\ \	]*) \
++        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
++          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
++      *) \
++        for am__flg in $$MAKEFLAGS; do \
++          case $$am__flg in \
++            *=*|--*) ;; \
++            *n*) am__dry=yes; break;; \
++          esac; \
++        done;; \
++    esac; \
++    test $$am__dry = yes; \
++  }
++pkgdatadir = $(datadir)/@PACKAGE@
++pkgincludedir = $(includedir)/@PACKAGE@
++pkglibdir = $(libdir)/@PACKAGE@
++pkglibexecdir = $(libexecdir)/@PACKAGE@
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = @build@
++host_triplet = @host@
++target_triplet = @target@
++subdir = include/sys/fm
++DIST_COMMON = $(am__kernel_HEADERS_DIST) $(am__libzfs_HEADERS_DIST) \
++	$(srcdir)/Makefile.am $(srcdir)/Makefile.in
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps =  \
++	$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
++	$(top_srcdir)/config/kernel-automount.m4 \
++	$(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \
++	$(top_srcdir)/config/kernel-bdev-logical-size.m4 \
++	$(top_srcdir)/config/kernel-bdi-setup-and-register.m4 \
++	$(top_srcdir)/config/kernel-bdi.m4 \
++	$(top_srcdir)/config/kernel-bio-empty-barrier.m4 \
++	$(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \
++	$(top_srcdir)/config/kernel-bio-failfast.m4 \
++	$(top_srcdir)/config/kernel-bio-rw-syncio.m4 \
++	$(top_srcdir)/config/kernel-blk-end-request.m4 \
++	$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-discard.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
++	$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-sectors.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get.m4 \
++	$(top_srcdir)/config/kernel-check-disk-size-change.m4 \
++	$(top_srcdir)/config/kernel-clear-inode.m4 \
++	$(top_srcdir)/config/kernel-commit-metadata.m4 \
++	$(top_srcdir)/config/kernel-create-nameidata.m4 \
++	$(top_srcdir)/config/kernel-d-make-root.m4 \
++	$(top_srcdir)/config/kernel-d-obtain-alias.m4 \
++	$(top_srcdir)/config/kernel-discard-granularity.m4 \
++	$(top_srcdir)/config/kernel-elevator-change.m4 \
++	$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
++	$(top_srcdir)/config/kernel-evict-inode.m4 \
++	$(top_srcdir)/config/kernel-fallocate.m4 \
++	$(top_srcdir)/config/kernel-fmode-t.m4 \
++	$(top_srcdir)/config/kernel-fsync.m4 \
++	$(top_srcdir)/config/kernel-get-disk-ro.m4 \
++	$(top_srcdir)/config/kernel-get-gendisk.m4 \
++	$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
++	$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
++	$(top_srcdir)/config/kernel-kobj-name-len.m4 \
++	$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
++	$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
++	$(top_srcdir)/config/kernel-mount-nodev.m4 \
++	$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
++	$(top_srcdir)/config/kernel-rq-for-each_segment.m4 \
++	$(top_srcdir)/config/kernel-rq-is_sync.m4 \
++	$(top_srcdir)/config/kernel-security-inode-init.m4 \
++	$(top_srcdir)/config/kernel-set-nlink.m4 \
++	$(top_srcdir)/config/kernel-sget-args.m4 \
++	$(top_srcdir)/config/kernel-show-options.m4 \
++	$(top_srcdir)/config/kernel-shrink.m4 \
++	$(top_srcdir)/config/kernel-truncate-range.m4 \
++	$(top_srcdir)/config/kernel-truncate-setsize.m4 \
++	$(top_srcdir)/config/kernel-xattr-handler.m4 \
++	$(top_srcdir)/config/kernel.m4 \
++	$(top_srcdir)/config/user-arch.m4 \
++	$(top_srcdir)/config/user-frame-larger-than.m4 \
++	$(top_srcdir)/config/user-ioctl.m4 \
++	$(top_srcdir)/config/user-libblkid.m4 \
++	$(top_srcdir)/config/user-libuuid.m4 \
++	$(top_srcdir)/config/user-nptl_guard_within_stack.m4 \
++	$(top_srcdir)/config/user-selinux.m4 \
++	$(top_srcdir)/config/user-udev.m4 \
++	$(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \
++	$(top_srcdir)/config/zfs-build.m4 \
++	$(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++mkinstalldirs = $(install_sh) -d
++CONFIG_HEADER = $(top_builddir)/zfs_config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++AM_V_GEN = $(am__v_GEN_@AM_V@)
++am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
++am__v_GEN_0 = @echo "  GEN   " $@;
++AM_V_at = $(am__v_at_@AM_V@)
++am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
++am__v_at_0 = @
++SOURCES =
++DIST_SOURCES =
++RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
++	html-recursive info-recursive install-data-recursive \
++	install-dvi-recursive install-exec-recursive \
++	install-html-recursive install-info-recursive \
++	install-pdf-recursive install-ps-recursive install-recursive \
++	installcheck-recursive installdirs-recursive pdf-recursive \
++	ps-recursive uninstall-recursive
++am__can_run_installinfo = \
++  case $$AM_UPDATE_INFO_DIR in \
++    n|no|NO) false;; \
++    *) (install-info --version) >/dev/null 2>&1;; \
++  esac
++am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/fm/protocol.h \
++	$(top_srcdir)/include/sys/fm/util.h
++am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
++am__vpath_adj = case $$p in \
++    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
++    *) f=$$p;; \
++  esac;
++am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
++am__install_max = 40
++am__nobase_strip_setup = \
++  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
++am__nobase_strip = \
++  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
++am__nobase_list = $(am__nobase_strip_setup); \
++  for p in $$list; do echo "$$p $$p"; done | \
++  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
++  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
++    if (++n[$$2] == $(am__install_max)) \
++      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
++    END { for (dir in files) print dir, files[dir] }'
++am__base_list = \
++  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
++  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
++am__uninstall_files_from_dir = { \
++  test -z "$$files" \
++    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
++    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
++         $(am__cd) "$$dir" && rm -f $$files; }; \
++  }
++am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
++am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/fm/protocol.h \
++	$(top_srcdir)/include/sys/fm/util.h
++HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
++RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
++  distclean-recursive maintainer-clean-recursive
++AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
++	$(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
++	distdir
++ETAGS = etags
++CTAGS = ctags
++DIST_SUBDIRS = $(SUBDIRS)
++DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
++am__relativize = \
++  dir0=`pwd`; \
++  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
++  sed_rest='s,^[^/]*/*,,'; \
++  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
++  sed_butlast='s,/*[^/]*$$,,'; \
++  while test -n "$$dir1"; do \
++    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
++    if test "$$first" != "."; then \
++      if test "$$first" = ".."; then \
++        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
++        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
++      else \
++        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
++        if test "$$first2" = "$$first"; then \
++          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
++        else \
++          dir2="../$$dir2"; \
++        fi; \
++        dir0="$$dir0"/"$$first"; \
++      fi; \
++    fi; \
++    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
++  done; \
++  reldir="$$dir2"
++ACLOCAL = @ACLOCAL@
++ALIEN = @ALIEN@
++ALIEN_VERSION = @ALIEN_VERSION@
++AMTAR = @AMTAR@
++AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
++AR = @AR@
++AUTOCONF = @AUTOCONF@
++AUTOHEADER = @AUTOHEADER@
++AUTOMAKE = @AUTOMAKE@
++AWK = @AWK@
++CC = @CC@
++CCAS = @CCAS@
++CCASDEPMODE = @CCASDEPMODE@
++CCASFLAGS = @CCASFLAGS@
++CCDEPMODE = @CCDEPMODE@
++CFLAGS = @CFLAGS@
++CPP = @CPP@
++CPPFLAGS = @CPPFLAGS@
++CYGPATH_W = @CYGPATH_W@
++DEBUG_CFLAGS = @DEBUG_CFLAGS@
++DEBUG_DMU_TX = @DEBUG_DMU_TX@
++DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
++DEBUG_ZFS = @DEBUG_ZFS@
++DEFAULT_INIT_DIR = @DEFAULT_INIT_DIR@
++DEFAULT_INIT_SCRIPT = @DEFAULT_INIT_SCRIPT@
++DEFAULT_PACKAGE = @DEFAULT_PACKAGE@
++DEFS = @DEFS@
++DEPDIR = @DEPDIR@
++DLLTOOL = @DLLTOOL@
++DPKG = @DPKG@
++DPKGBUILD = @DPKGBUILD@
++DPKGBUILD_VERSION = @DPKGBUILD_VERSION@
++DPKG_VERSION = @DPKG_VERSION@
++DSYMUTIL = @DSYMUTIL@
++DUMPBIN = @DUMPBIN@
++ECHO_C = @ECHO_C@
++ECHO_N = @ECHO_N@
++ECHO_T = @ECHO_T@
++EGREP = @EGREP@
++EXEEXT = @EXEEXT@
++FGREP = @FGREP@
++FRAME_LARGER_THAN = @FRAME_LARGER_THAN@
++GREP = @GREP@
++HAVE_ALIEN = @HAVE_ALIEN@
++HAVE_DPKG = @HAVE_DPKG@
++HAVE_DPKGBUILD = @HAVE_DPKGBUILD@
++HAVE_MAKEPKG = @HAVE_MAKEPKG@
++HAVE_PACMAN = @HAVE_PACMAN@
++HAVE_RPM = @HAVE_RPM@
++HAVE_RPMBUILD = @HAVE_RPMBUILD@
++INSTALL = @INSTALL@
++INSTALL_DATA = @INSTALL_DATA@
++INSTALL_PROGRAM = @INSTALL_PROGRAM@
++INSTALL_SCRIPT = @INSTALL_SCRIPT@
++INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
++KERNELCPPFLAGS = @KERNELCPPFLAGS@
++KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
++LD = @LD@
++LDFLAGS = @LDFLAGS@
++LIBBLKID = @LIBBLKID@
++LIBOBJS = @LIBOBJS@
++LIBS = @LIBS@
++LIBSELINUX = @LIBSELINUX@
++LIBTOOL = @LIBTOOL@
++LIBUUID = @LIBUUID@
++LINUX = @LINUX@
++LINUX_OBJ = @LINUX_OBJ@
++LINUX_SYMBOLS = @LINUX_SYMBOLS@
++LINUX_VERSION = @LINUX_VERSION@
++LIPO = @LIPO@
++LN_S = @LN_S@
++LTLIBOBJS = @LTLIBOBJS@
++MAINT = @MAINT@
++MAKEINFO = @MAKEINFO@
++MAKEPKG = @MAKEPKG@
++MAKEPKG_VERSION = @MAKEPKG_VERSION@
++MANIFEST_TOOL = @MANIFEST_TOOL@
++MKDIR_P = @MKDIR_P@
++NM = @NM@
++NMEDIT = @NMEDIT@
++NO_UNUSED_BUT_SET_VARIABLE = @NO_UNUSED_BUT_SET_VARIABLE@
++OBJDUMP = @OBJDUMP@
++OBJEXT = @OBJEXT@
++OTOOL = @OTOOL@
++OTOOL64 = @OTOOL64@
++PACKAGE = @PACKAGE@
++PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
++PACKAGE_NAME = @PACKAGE_NAME@
++PACKAGE_STRING = @PACKAGE_STRING@
++PACKAGE_TARNAME = @PACKAGE_TARNAME@
++PACKAGE_URL = @PACKAGE_URL@
++PACKAGE_VERSION = @PACKAGE_VERSION@
++PACMAN = @PACMAN@
++PACMAN_VERSION = @PACMAN_VERSION@
++PATH_SEPARATOR = @PATH_SEPARATOR@
++RANLIB = @RANLIB@
++RPM = @RPM@
++RPMBUILD = @RPMBUILD@
++RPMBUILD_VERSION = @RPMBUILD_VERSION@
++RPM_VERSION = @RPM_VERSION@
++SED = @SED@
++SET_MAKE = @SET_MAKE@
++SHELL = @SHELL@
++SPL = @SPL@
++SPL_OBJ = @SPL_OBJ@
++SPL_SYMBOLS = @SPL_SYMBOLS@
++SPL_VERSION = @SPL_VERSION@
++STRIP = @STRIP@
++TARGET_ASM_DIR = @TARGET_ASM_DIR@
++VENDOR = @VENDOR@
++VERSION = @VERSION@
++ZFS_CONFIG = @ZFS_CONFIG@
++ZFS_META_ALIAS = @ZFS_META_ALIAS@
++ZFS_META_AUTHOR = @ZFS_META_AUTHOR@
++ZFS_META_DATA = @ZFS_META_DATA@
++ZFS_META_LICENSE = @ZFS_META_LICENSE@
++ZFS_META_LT_AGE = @ZFS_META_LT_AGE@
++ZFS_META_LT_CURRENT = @ZFS_META_LT_CURRENT@
++ZFS_META_LT_REVISION = @ZFS_META_LT_REVISION@
++ZFS_META_NAME = @ZFS_META_NAME@
++ZFS_META_RELEASE = @ZFS_META_RELEASE@
++ZFS_META_VERSION = @ZFS_META_VERSION@
++ZLIB = @ZLIB@
++abs_builddir = @abs_builddir@
++abs_srcdir = @abs_srcdir@
++abs_top_builddir = @abs_top_builddir@
++abs_top_srcdir = @abs_top_srcdir@
++ac_ct_AR = @ac_ct_AR@
++ac_ct_CC = @ac_ct_CC@
++ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
++am__include = @am__include@
++am__leading_dot = @am__leading_dot@
++am__quote = @am__quote@
++am__tar = @am__tar@
++am__untar = @am__untar@
++bindir = @bindir@
++build = @build@
++build_alias = @build_alias@
++build_cpu = @build_cpu@
++build_os = @build_os@
++build_vendor = @build_vendor@
++builddir = @builddir@
++datadir = @datadir@
++datarootdir = @datarootdir@
++docdir = @docdir@
++dvidir = @dvidir@
++exec_prefix = @exec_prefix@
++host = @host@
++host_alias = @host_alias@
++host_cpu = @host_cpu@
++host_os = @host_os@
++host_vendor = @host_vendor@
++htmldir = @htmldir@
++includedir = @includedir@
++infodir = @infodir@
++install_sh = @install_sh@
++libdir = @libdir@
++libexecdir = @libexecdir@
++localedir = @localedir@
++localstatedir = @localstatedir@
++mandir = @mandir@
++mkdir_p = @mkdir_p@
++oldincludedir = @oldincludedir@
++pdfdir = @pdfdir@
++prefix = @prefix@
++program_transform_name = @program_transform_name@
++psdir = @psdir@
++sbindir = @sbindir@
++sharedstatedir = @sharedstatedir@
++srcdir = @srcdir@
++sysconfdir = @sysconfdir@
++target = @target@
++target_alias = @target_alias@
++target_cpu = @target_cpu@
++target_os = @target_os@
++target_vendor = @target_vendor@
++top_build_prefix = @top_build_prefix@
++top_builddir = @top_builddir@
++top_srcdir = @top_srcdir@
++udevdir = @udevdir@
++udevruledir = @udevruledir@
++SUBDIRS = fs
++COMMON_H = \
++	$(top_srcdir)/include/sys/fm/protocol.h \
++	$(top_srcdir)/include/sys/fm/util.h
++
++KERNEL_H = 
++USER_H = 
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++@CONFIG_USER_TRUE@libzfsdir = $(includedir)/libzfs/sys/fm
++@CONFIG_USER_TRUE@libzfs_HEADERS = $(COMMON_H) $(USER_H)
++@CONFIG_KERNEL_TRUE@kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/sys/fm
++@CONFIG_KERNEL_TRUE@kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++all: all-recursive
++
++.SUFFIXES:
++$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
++	@for dep in $?; do \
++	  case '$(am__configure_deps)' in \
++	    *$$dep*) \
++	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
++	        && { if test -f $@; then exit 0; else break; fi; }; \
++	      exit 1;; \
++	  esac; \
++	done; \
++	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/sys/fm/Makefile'; \
++	$(am__cd) $(top_srcdir) && \
++	  $(AUTOMAKE) --gnu include/sys/fm/Makefile
++.PRECIOUS: Makefile
++Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
++	@case '$?' in \
++	  *config.status*) \
++	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
++	  *) \
++	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
++	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
++	esac;
++
++$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++
++$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(am__aclocal_m4_deps):
++
++mostlyclean-libtool:
++	-rm -f *.lo
++
++clean-libtool:
++	-rm -rf .libs _libs
++install-kernelHEADERS: $(kernel_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(kerneldir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(kerneldir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(kerneldir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(kerneldir)" || exit $$?; \
++	done
++
++uninstall-kernelHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(kerneldir)'; $(am__uninstall_files_from_dir)
++install-libzfsHEADERS: $(libzfs_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(libzfsdir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(libzfsdir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libzfsdir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(libzfsdir)" || exit $$?; \
++	done
++
++uninstall-libzfsHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(libzfsdir)'; $(am__uninstall_files_from_dir)
++
++# This directory's subdirectories are mostly independent; you can cd
++# into them and run `make' without going through this Makefile.
++# To change the values of `make' variables: instead of editing Makefiles,
++# (1) if the variable is set in `config.status', edit `config.status'
++#     (which will cause the Makefiles to be regenerated when you run `make');
++# (2) otherwise, pass the desired values on the `make' command line.
++$(RECURSIVE_TARGETS):
++	@fail= failcom='exit 1'; \
++	for f in x $$MAKEFLAGS; do \
++	  case $$f in \
++	    *=* | --[!k]*);; \
++	    *k*) failcom='fail=yes';; \
++	  esac; \
++	done; \
++	dot_seen=no; \
++	target=`echo $@ | sed s/-recursive//`; \
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  echo "Making $$target in $$subdir"; \
++	  if test "$$subdir" = "."; then \
++	    dot_seen=yes; \
++	    local_target="$$target-am"; \
++	  else \
++	    local_target="$$target"; \
++	  fi; \
++	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
++	  || eval $$failcom; \
++	done; \
++	if test "$$dot_seen" = "no"; then \
++	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
++	fi; test -z "$$fail"
++
++$(RECURSIVE_CLEAN_TARGETS):
++	@fail= failcom='exit 1'; \
++	for f in x $$MAKEFLAGS; do \
++	  case $$f in \
++	    *=* | --[!k]*);; \
++	    *k*) failcom='fail=yes';; \
++	  esac; \
++	done; \
++	dot_seen=no; \
++	case "$@" in \
++	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
++	  *) list='$(SUBDIRS)' ;; \
++	esac; \
++	rev=''; for subdir in $$list; do \
++	  if test "$$subdir" = "."; then :; else \
++	    rev="$$subdir $$rev"; \
++	  fi; \
++	done; \
++	rev="$$rev ."; \
++	target=`echo $@ | sed s/-recursive//`; \
++	for subdir in $$rev; do \
++	  echo "Making $$target in $$subdir"; \
++	  if test "$$subdir" = "."; then \
++	    local_target="$$target-am"; \
++	  else \
++	    local_target="$$target"; \
++	  fi; \
++	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
++	  || eval $$failcom; \
++	done && test -z "$$fail"
++tags-recursive:
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
++	done
++ctags-recursive:
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
++	done
++
++ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
++	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	mkid -fID $$unique
++tags: TAGS
++
++TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	set x; \
++	here=`pwd`; \
++	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
++	  include_option=--etags-include; \
++	  empty_fix=.; \
++	else \
++	  include_option=--include; \
++	  empty_fix=; \
++	fi; \
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  if test "$$subdir" = .; then :; else \
++	    test ! -f $$subdir/TAGS || \
++	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
++	  fi; \
++	done; \
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	shift; \
++	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
++	  test -n "$$unique" || unique=$$empty_fix; \
++	  if test $$# -gt 0; then \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      "$$@" $$unique; \
++	  else \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      $$unique; \
++	  fi; \
++	fi
++ctags: CTAGS
++CTAGS: ctags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	test -z "$(CTAGS_ARGS)$$unique" \
++	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
++	     $$unique
++
++GTAGS:
++	here=`$(am__cd) $(top_builddir) && pwd` \
++	  && $(am__cd) $(top_srcdir) \
++	  && gtags -i $(GTAGS_ARGS) "$$here"
++
++distclean-tags:
++	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
++
++distdir: $(DISTFILES)
++	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	list='$(DISTFILES)'; \
++	  dist_files=`for file in $$list; do echo $$file; done | \
++	  sed -e "s|^$$srcdirstrip/||;t" \
++	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
++	case $$dist_files in \
++	  */*) $(MKDIR_P) `echo "$$dist_files" | \
++			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
++			   sort -u` ;; \
++	esac; \
++	for file in $$dist_files; do \
++	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
++	  if test -d $$d/$$file; then \
++	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
++	    if test -d "$(distdir)/$$file"; then \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
++	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
++	  else \
++	    test -f "$(distdir)/$$file" \
++	    || cp -p $$d/$$file "$(distdir)/$$file" \
++	    || exit 1; \
++	  fi; \
++	done
++	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
++	  if test "$$subdir" = .; then :; else \
++	    $(am__make_dryrun) \
++	      || test -d "$(distdir)/$$subdir" \
++	      || $(MKDIR_P) "$(distdir)/$$subdir" \
++	      || exit 1; \
++	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
++	    $(am__relativize); \
++	    new_distdir=$$reldir; \
++	    dir1=$$subdir; dir2="$(top_distdir)"; \
++	    $(am__relativize); \
++	    new_top_distdir=$$reldir; \
++	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
++	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
++	    ($(am__cd) $$subdir && \
++	      $(MAKE) $(AM_MAKEFLAGS) \
++	        top_distdir="$$new_top_distdir" \
++	        distdir="$$new_distdir" \
++		am__remove_distdir=: \
++		am__skip_length_check=: \
++		am__skip_mode_fix=: \
++	        distdir) \
++	      || exit 1; \
++	  fi; \
++	done
++check-am: all-am
++check: check-recursive
++all-am: Makefile $(HEADERS)
++installdirs: installdirs-recursive
++installdirs-am:
++	for dir in "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"; do \
++	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
++	done
++install: install-recursive
++install-exec: install-exec-recursive
++install-data: install-data-recursive
++uninstall: uninstall-recursive
++
++install-am: all-am
++	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
++
++installcheck: installcheck-recursive
++install-strip:
++	if test -z '$(STRIP)'; then \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	      install; \
++	else \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
++	fi
++mostlyclean-generic:
++
++clean-generic:
++
++distclean-generic:
++	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
++	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
++
++maintainer-clean-generic:
++	@echo "This command is intended for maintainers to use"
++	@echo "it deletes files that may require special tools to rebuild."
++clean: clean-recursive
++
++clean-am: clean-generic clean-libtool mostlyclean-am
++
++distclean: distclean-recursive
++	-rm -f Makefile
++distclean-am: clean-am distclean-generic distclean-tags
++
++dvi: dvi-recursive
++
++dvi-am:
++
++html: html-recursive
++
++html-am:
++
++info: info-recursive
++
++info-am:
++
++install-data-am: install-kernelHEADERS install-libzfsHEADERS
++
++install-dvi: install-dvi-recursive
++
++install-dvi-am:
++
++install-exec-am:
++
++install-html: install-html-recursive
++
++install-html-am:
++
++install-info: install-info-recursive
++
++install-info-am:
++
++install-man:
++
++install-pdf: install-pdf-recursive
++
++install-pdf-am:
++
++install-ps: install-ps-recursive
++
++install-ps-am:
++
++installcheck-am:
++
++maintainer-clean: maintainer-clean-recursive
++	-rm -f Makefile
++maintainer-clean-am: distclean-am maintainer-clean-generic
++
++mostlyclean: mostlyclean-recursive
++
++mostlyclean-am: mostlyclean-generic mostlyclean-libtool
++
++pdf: pdf-recursive
++
++pdf-am:
++
++ps: ps-recursive
++
++ps-am:
++
++uninstall-am: uninstall-kernelHEADERS uninstall-libzfsHEADERS
++
++.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
++	install-am install-strip tags-recursive
++
++.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
++	all all-am check check-am clean clean-generic clean-libtool \
++	ctags ctags-recursive distclean distclean-generic \
++	distclean-libtool distclean-tags distdir dvi dvi-am html \
++	html-am info info-am install install-am install-data \
++	install-data-am install-dvi install-dvi-am install-exec \
++	install-exec-am install-html install-html-am install-info \
++	install-info-am install-kernelHEADERS install-libzfsHEADERS \
++	install-man install-pdf install-pdf-am install-ps \
++	install-ps-am install-strip installcheck installcheck-am \
++	installdirs installdirs-am maintainer-clean \
++	maintainer-clean-generic mostlyclean mostlyclean-generic \
++	mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
++	uninstall uninstall-am uninstall-kernelHEADERS \
++	uninstall-libzfsHEADERS
++
++
++# Tell versions [3.59,3.63) of GNU make to not export all variables.
++# Otherwise a system limit (for SysV at least) may be exceeded.
++.NOEXPORT:
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/fm/protocol.h linux-3.2.33-go/include/zfs/sys/fm/protocol.h
+--- linux-3.2.33-go.orig/include/zfs/sys/fm/protocol.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/fm/protocol.h	2012-11-16 23:25:34.344039393 +0100
+@@ -0,0 +1,367 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_FM_PROTOCOL_H
++#define	_SYS_FM_PROTOCOL_H
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++#ifdef _KERNEL
++#include <sys/varargs.h>
++#include <sys/nvpair.h>
++#else
++#include <libnvpair.h>
++#include <stdarg.h>
++#endif
++#include <sys/processor.h>
++
++/* FM common member names */
++#define	FM_CLASS			"class"
++#define	FM_VERSION			"version"
++
++/* FM protocol category 1 class names */
++#define	FM_EREPORT_CLASS		"ereport"
++#define	FM_FAULT_CLASS			"fault"
++#define	FM_DEFECT_CLASS			"defect"
++#define	FM_RSRC_CLASS			"resource"
++#define	FM_LIST_EVENT			"list"
++#define	FM_IREPORT_CLASS		"ireport"
++
++/* FM list.* event class values */
++#define	FM_LIST_SUSPECT_CLASS		FM_LIST_EVENT ".suspect"
++#define	FM_LIST_ISOLATED_CLASS		FM_LIST_EVENT ".isolated"
++#define	FM_LIST_REPAIRED_CLASS		FM_LIST_EVENT ".repaired"
++#define	FM_LIST_UPDATED_CLASS		FM_LIST_EVENT ".updated"
++#define	FM_LIST_RESOLVED_CLASS		FM_LIST_EVENT ".resolved"
++
++/* ereport class subcategory values */
++#define	FM_ERROR_CPU			"cpu"
++#define	FM_ERROR_IO			"io"
++
++/* ereport version and payload member names */
++#define	FM_EREPORT_VERS0		0
++#define	FM_EREPORT_VERSION		FM_EREPORT_VERS0
++
++/* ereport payload member names */
++#define	FM_EREPORT_DETECTOR		"detector"
++#define	FM_EREPORT_ENA			"ena"
++#define	FM_EREPORT_TIME			"time"
++
++/* list.* event payload member names */
++#define	FM_LIST_EVENT_SIZE		"list-sz"
++
++/* ireport.* event payload member names */
++#define	FM_IREPORT_DETECTOR		"detector"
++#define	FM_IREPORT_UUID			"uuid"
++#define	FM_IREPORT_PRIORITY		"pri"
++#define	FM_IREPORT_ATTRIBUTES		"attr"
++
++/*
++ * list.suspect, isolated, updated, repaired and resolved
++ * versions/payload member names.
++ */
++#define	FM_SUSPECT_UUID			"uuid"
++#define	FM_SUSPECT_DIAG_CODE		"code"
++#define	FM_SUSPECT_DIAG_TIME		"diag-time"
++#define	FM_SUSPECT_DE			"de"
++#define	FM_SUSPECT_FAULT_LIST		"fault-list"
++#define	FM_SUSPECT_FAULT_SZ		"fault-list-sz"
++#define	FM_SUSPECT_FAULT_STATUS		"fault-status"
++#define	FM_SUSPECT_INJECTED		"__injected"
++#define	FM_SUSPECT_MESSAGE		"message"
++#define	FM_SUSPECT_RETIRE		"retire"
++#define	FM_SUSPECT_RESPONSE		"response"
++#define	FM_SUSPECT_SEVERITY		"severity"
++
++#define	FM_SUSPECT_VERS0		0
++#define	FM_SUSPECT_VERSION		FM_SUSPECT_VERS0
++
++#define	FM_SUSPECT_FAULTY		0x1
++#define	FM_SUSPECT_UNUSABLE		0x2
++#define	FM_SUSPECT_NOT_PRESENT		0x4
++#define	FM_SUSPECT_DEGRADED		0x8
++#define	FM_SUSPECT_REPAIRED		0x10
++#define	FM_SUSPECT_REPLACED		0x20
++#define	FM_SUSPECT_ACQUITTED		0x40
++
++/* fault event versions and payload member names */
++#define	FM_FAULT_VERS0			0
++#define	FM_FAULT_VERSION		FM_FAULT_VERS0
++
++#define	FM_FAULT_ASRU			"asru"
++#define	FM_FAULT_FRU			"fru"
++#define	FM_FAULT_FRU_LABEL		"fru-label"
++#define	FM_FAULT_CERTAINTY		"certainty"
++#define	FM_FAULT_RESOURCE		"resource"
++#define	FM_FAULT_LOCATION		"location"
++
++/* resource event versions and payload member names */
++#define	FM_RSRC_VERS0			0
++#define	FM_RSRC_VERSION			FM_RSRC_VERS0
++#define	FM_RSRC_RESOURCE		"resource"
++
++/* resource.fm.asru.* payload member names */
++#define	FM_RSRC_ASRU_UUID		"uuid"
++#define	FM_RSRC_ASRU_CODE		"code"
++#define	FM_RSRC_ASRU_FAULTY		"faulty"
++#define	FM_RSRC_ASRU_REPAIRED		"repaired"
++#define	FM_RSRC_ASRU_REPLACED		"replaced"
++#define	FM_RSRC_ASRU_ACQUITTED		"acquitted"
++#define	FM_RSRC_ASRU_RESOLVED		"resolved"
++#define	FM_RSRC_ASRU_UNUSABLE		"unusable"
++#define	FM_RSRC_ASRU_EVENT		"event"
++
++/* resource.fm.xprt.* versions and payload member names */
++#define	FM_RSRC_XPRT_VERS0		0
++#define	FM_RSRC_XPRT_VERSION		FM_RSRC_XPRT_VERS0
++#define	FM_RSRC_XPRT_UUID		"uuid"
++#define	FM_RSRC_XPRT_SUBCLASS		"subclass"
++#define	FM_RSRC_XPRT_FAULT_STATUS	"fault-status"
++#define	FM_RSRC_XPRT_FAULT_HAS_ASRU	"fault-has-asru"
++
++/*
++ * FM ENA Format Macros
++ */
++#define	ENA_FORMAT_MASK			0x3
++#define	ENA_FORMAT(ena)			((ena) & ENA_FORMAT_MASK)
++
++/* ENA format types */
++#define	FM_ENA_FMT0			0
++#define	FM_ENA_FMT1			1
++#define	FM_ENA_FMT2			2
++
++/* Format 1 */
++#define	ENA_FMT1_GEN_MASK		0x00000000000003FCull
++#define	ENA_FMT1_ID_MASK		0xFFFFFFFFFFFFFC00ull
++#define	ENA_FMT1_CPUID_MASK		0x00000000000FFC00ull
++#define	ENA_FMT1_TIME_MASK		0xFFFFFFFFFFF00000ull
++#define	ENA_FMT1_GEN_SHFT		2
++#define	ENA_FMT1_ID_SHFT		10
++#define	ENA_FMT1_CPUID_SHFT		ENA_FMT1_ID_SHFT
++#define	ENA_FMT1_TIME_SHFT		20
++
++/* Format 2 */
++#define	ENA_FMT2_GEN_MASK		0x00000000000003FCull
++#define	ENA_FMT2_ID_MASK		0xFFFFFFFFFFFFFC00ull
++#define	ENA_FMT2_TIME_MASK		ENA_FMT2_ID_MASK
++#define	ENA_FMT2_GEN_SHFT		2
++#define	ENA_FMT2_ID_SHFT		10
++#define	ENA_FMT2_TIME_SHFT		ENA_FMT2_ID_SHFT
++
++/* Common FMRI type names */
++#define	FM_FMRI_AUTHORITY		"authority"
++#define	FM_FMRI_SCHEME			"scheme"
++#define	FM_FMRI_SVC_AUTHORITY		"svc-authority"
++#define	FM_FMRI_FACILITY		"facility"
++
++/* FMRI authority-type member names */
++#define	FM_FMRI_AUTH_CHASSIS		"chassis-id"
++#define	FM_FMRI_AUTH_PRODUCT_SN		"product-sn"
++#define	FM_FMRI_AUTH_PRODUCT		"product-id"
++#define	FM_FMRI_AUTH_DOMAIN		"domain-id"
++#define	FM_FMRI_AUTH_SERVER		"server-id"
++#define	FM_FMRI_AUTH_HOST		"host-id"
++
++#define	FM_AUTH_VERS0			0
++#define	FM_FMRI_AUTH_VERSION		FM_AUTH_VERS0
++
++/* scheme name values */
++#define	FM_FMRI_SCHEME_FMD		"fmd"
++#define	FM_FMRI_SCHEME_DEV		"dev"
++#define	FM_FMRI_SCHEME_HC		"hc"
++#define	FM_FMRI_SCHEME_SVC		"svc"
++#define	FM_FMRI_SCHEME_CPU		"cpu"
++#define	FM_FMRI_SCHEME_MEM		"mem"
++#define	FM_FMRI_SCHEME_MOD		"mod"
++#define	FM_FMRI_SCHEME_PKG		"pkg"
++#define	FM_FMRI_SCHEME_LEGACY		"legacy-hc"
++#define	FM_FMRI_SCHEME_ZFS		"zfs"
++#define	FM_FMRI_SCHEME_SW		"sw"
++
++/* Scheme versions */
++#define	FMD_SCHEME_VERSION0		0
++#define	FM_FMD_SCHEME_VERSION		FMD_SCHEME_VERSION0
++#define	DEV_SCHEME_VERSION0		0
++#define	FM_DEV_SCHEME_VERSION		DEV_SCHEME_VERSION0
++#define	FM_HC_VERS0			0
++#define	FM_HC_SCHEME_VERSION		FM_HC_VERS0
++#define	CPU_SCHEME_VERSION0		0
++#define	CPU_SCHEME_VERSION1		1
++#define	FM_CPU_SCHEME_VERSION		CPU_SCHEME_VERSION1
++#define	MEM_SCHEME_VERSION0		0
++#define	FM_MEM_SCHEME_VERSION		MEM_SCHEME_VERSION0
++#define	MOD_SCHEME_VERSION0		0
++#define	FM_MOD_SCHEME_VERSION		MOD_SCHEME_VERSION0
++#define	PKG_SCHEME_VERSION0		0
++#define	FM_PKG_SCHEME_VERSION		PKG_SCHEME_VERSION0
++#define	LEGACY_SCHEME_VERSION0		0
++#define	FM_LEGACY_SCHEME_VERSION	LEGACY_SCHEME_VERSION0
++#define	SVC_SCHEME_VERSION0		0
++#define	FM_SVC_SCHEME_VERSION		SVC_SCHEME_VERSION0
++#define	ZFS_SCHEME_VERSION0		0
++#define	FM_ZFS_SCHEME_VERSION		ZFS_SCHEME_VERSION0
++#define	SW_SCHEME_VERSION0		0
++#define	FM_SW_SCHEME_VERSION		SW_SCHEME_VERSION0
++
++/* hc scheme member names */
++#define	FM_FMRI_HC_SERIAL_ID		"serial"
++#define	FM_FMRI_HC_PART			"part"
++#define	FM_FMRI_HC_REVISION		"revision"
++#define	FM_FMRI_HC_ROOT			"hc-root"
++#define	FM_FMRI_HC_LIST_SZ		"hc-list-sz"
++#define	FM_FMRI_HC_LIST			"hc-list"
++#define	FM_FMRI_HC_SPECIFIC		"hc-specific"
++
++/* facility member names */
++#define	FM_FMRI_FACILITY_NAME		"facility-name"
++#define	FM_FMRI_FACILITY_TYPE		"facility-type"
++
++/* hc-list version and member names */
++#define	FM_FMRI_HC_NAME			"hc-name"
++#define	FM_FMRI_HC_ID			"hc-id"
++
++#define	HC_LIST_VERSION0		0
++#define	FM_HC_LIST_VERSION		HC_LIST_VERSION0
++
++/* hc-specific member names */
++#define	FM_FMRI_HC_SPECIFIC_OFFSET	"offset"
++#define	FM_FMRI_HC_SPECIFIC_PHYSADDR	"physaddr"
++
++/* fmd module scheme member names */
++#define	FM_FMRI_FMD_NAME		"mod-name"
++#define	FM_FMRI_FMD_VERSION		"mod-version"
++
++/* dev scheme member names */
++#define	FM_FMRI_DEV_ID			"devid"
++#define	FM_FMRI_DEV_TGTPTLUN0		"target-port-l0id"
++#define	FM_FMRI_DEV_PATH		"device-path"
++
++/* pkg scheme member names */
++#define	FM_FMRI_PKG_BASEDIR		"pkg-basedir"
++#define	FM_FMRI_PKG_INST		"pkg-inst"
++#define	FM_FMRI_PKG_VERSION		"pkg-version"
++
++/* svc scheme member names */
++#define	FM_FMRI_SVC_NAME		"svc-name"
++#define	FM_FMRI_SVC_INSTANCE		"svc-instance"
++#define	FM_FMRI_SVC_CONTRACT_ID		"svc-contract-id"
++
++/* svc-authority member names */
++#define	FM_FMRI_SVC_AUTH_SCOPE		"scope"
++#define	FM_FMRI_SVC_AUTH_SYSTEM_FQN	"system-fqn"
++
++/* cpu scheme member names */
++#define	FM_FMRI_CPU_ID			"cpuid"
++#define	FM_FMRI_CPU_SERIAL_ID		"serial"
++#define	FM_FMRI_CPU_MASK		"cpumask"
++#define	FM_FMRI_CPU_VID			"cpuvid"
++#define	FM_FMRI_CPU_CPUFRU		"cpufru"
++#define	FM_FMRI_CPU_CACHE_INDEX		"cacheindex"
++#define	FM_FMRI_CPU_CACHE_WAY		"cacheway"
++#define	FM_FMRI_CPU_CACHE_BIT		"cachebit"
++#define	FM_FMRI_CPU_CACHE_TYPE		"cachetype"
++
++#define	FM_FMRI_CPU_CACHE_TYPE_L2	0
++#define	FM_FMRI_CPU_CACHE_TYPE_L3	1
++
++/* legacy-hc scheme member names */
++#define	FM_FMRI_LEGACY_HC		"component"
++#define	FM_FMRI_LEGACY_HC_PREFIX	FM_FMRI_SCHEME_HC":///" \
++    FM_FMRI_LEGACY_HC"="
++
++/* mem scheme member names */
++#define	FM_FMRI_MEM_UNUM		"unum"
++#define	FM_FMRI_MEM_SERIAL_ID		"serial"
++#define	FM_FMRI_MEM_PHYSADDR		"physaddr"
++#define	FM_FMRI_MEM_MEMCONFIG		"memconfig"
++#define	FM_FMRI_MEM_OFFSET		"offset"
++
++/* mod scheme member names */
++#define	FM_FMRI_MOD_PKG			"mod-pkg"
++#define	FM_FMRI_MOD_NAME		"mod-name"
++#define	FM_FMRI_MOD_ID			"mod-id"
++#define	FM_FMRI_MOD_DESC		"mod-desc"
++
++/* zfs scheme member names */
++#define	FM_FMRI_ZFS_POOL		"pool"
++#define	FM_FMRI_ZFS_VDEV		"vdev"
++
++/* sw scheme member names - extra indentation for members of an nvlist */
++#define	FM_FMRI_SW_OBJ			"object"
++#define	FM_FMRI_SW_OBJ_PATH			"path"
++#define	FM_FMRI_SW_OBJ_ROOT			"root"
++#define	FM_FMRI_SW_OBJ_PKG			"pkg"
++#define	FM_FMRI_SW_SITE			"site"
++#define	FM_FMRI_SW_SITE_TOKEN			"token"
++#define	FM_FMRI_SW_SITE_MODULE			"module"
++#define	FM_FMRI_SW_SITE_FILE			"file"
++#define	FM_FMRI_SW_SITE_LINE			"line"
++#define	FM_FMRI_SW_SITE_FUNC			"func"
++#define	FM_FMRI_SW_CTXT			"context"
++#define	FM_FMRI_SW_CTXT_ORIGIN			"origin"
++#define	FM_FMRI_SW_CTXT_EXECNAME		"execname"
++#define	FM_FMRI_SW_CTXT_PID			"pid"
++#define	FM_FMRI_SW_CTXT_ZONE			"zone"
++#define	FM_FMRI_SW_CTXT_CTID			"ctid"
++#define	FM_FMRI_SW_CTXT_STACK			"stack"
++#define	FM_NVA_FREE		0	/* free allocator on nvlist_destroy */
++#define	FM_NVA_RETAIN		1	/* keep allocator on nvlist_destroy */
++
++extern nv_alloc_t *fm_nva_xcreate(char *, size_t);
++extern void fm_nva_xdestroy(nv_alloc_t *);
++extern nvlist_t *fm_nvlist_create(nv_alloc_t *);
++extern void fm_nvlist_destroy(nvlist_t *, int);
++extern void fm_ereport_set(nvlist_t *, int, const char *, uint64_t,
++    const nvlist_t *, ...);
++extern void fm_payload_set(nvlist_t *, ...);
++extern int i_fm_payload_set(nvlist_t *, const char *, va_list);
++extern void fm_fmri_hc_set(nvlist_t *, int, const nvlist_t *, nvlist_t *,
++    int, ...);
++extern void fm_fmri_dev_set(nvlist_t *, int, const nvlist_t *, const char *,
++    const char *, const char *);
++extern void fm_fmri_de_set(nvlist_t *, int, const nvlist_t *, const char *);
++extern void fm_fmri_cpu_set(nvlist_t *, int, const nvlist_t *, uint32_t,
++    uint8_t *, const char *);
++extern void fm_fmri_mem_set(nvlist_t *, int, const nvlist_t *, const char *,
++    const char *, uint64_t);
++extern void fm_fmri_zfs_set(nvlist_t *, int, uint64_t, uint64_t);
++extern void fm_fmri_hc_create(nvlist_t *, int, const nvlist_t *, nvlist_t *,
++    nvlist_t *, int, ...);
++
++extern uint64_t fm_ena_increment(uint64_t);
++extern uint64_t fm_ena_generate(uint64_t, uchar_t);
++extern uint64_t fm_ena_generate_cpu(uint64_t, processorid_t, uchar_t);
++extern uint64_t fm_ena_generation_get(uint64_t);
++extern uchar_t fm_ena_format_get(uint64_t);
++extern uint64_t fm_ena_id_get(uint64_t);
++extern uint64_t fm_ena_time_get(uint64_t);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_FM_PROTOCOL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/fm/util.h linux-3.2.33-go/include/zfs/sys/fm/util.h
+--- linux-3.2.33-go.orig/include/zfs/sys/fm/util.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/fm/util.h	2012-11-16 23:25:34.343039404 +0100
+@@ -0,0 +1,115 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_FM_UTIL_H
++#define	_SYS_FM_UTIL_H
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++#include <sys/nvpair.h>
++
++/*
++ * Shared user/kernel definitions for class length, error channel name,
++ * and kernel event publisher string.
++ */
++#define	FM_MAX_CLASS 100
++#define	FM_ERROR_CHAN	"com.sun:fm:error"
++#define	FM_PUB		"fm"
++
++/*
++ * ereport dump device transport support
++ *
++ * Ereports are written out to the dump device at a proscribed offset from the
++ * end, similar to in-transit log messages.  The ereports are represented as a
++ * erpt_dump_t header followed by ed_size bytes of packed native nvlist data.
++ *
++ * NOTE: All of these constants and the header must be defined so they have the
++ * same representation for *both* 32-bit and 64-bit producers and consumers.
++ */
++#define	ERPT_MAGIC	0xf00d4eddU
++#define	ERPT_MAX_ERRS	16
++#define	ERPT_DATA_SZ	(6 * 1024)
++#define	ERPT_EVCH_MAX	256
++#define	ERPT_HIWAT	64
++
++typedef struct erpt_dump {
++	uint32_t ed_magic;	/* ERPT_MAGIC or zero to indicate end */
++	uint32_t ed_chksum;	/* checksum32() of packed nvlist data */
++	uint32_t ed_size;	/* ereport (nvl) fixed buf size */
++	uint32_t ed_pad;	/* reserved for future use */
++	hrtime_t ed_hrt_nsec;	/* hrtime of this ereport */
++	hrtime_t ed_hrt_base;	/* hrtime sample corresponding to ed_tod_base */
++	struct {
++		uint64_t sec;	/* seconds since gettimeofday() Epoch */
++		uint64_t nsec;	/* nanoseconds past ed_tod_base.sec */
++	} ed_tod_base;
++} erpt_dump_t;
++
++#ifdef _KERNEL
++
++#define ZEVENT_SHUTDOWN	0x1
++
++typedef void zevent_cb_t(nvlist_t *, nvlist_t *);
++
++typedef struct zevent_s {
++	nvlist_t	*ev_nvl;       /* protected by the zevent_lock */
++	nvlist_t	*ev_detector;  /* " */
++	list_t		ev_ze_list;    /* " */
++	list_node_t	ev_node;       /* " */
++	zevent_cb_t	*ev_cb;        /* " */
++} zevent_t;
++
++typedef struct zfs_zevent {
++	zevent_t	*ze_zevent;    /* protected by the zevent_lock */
++	list_node_t	ze_node;       /* " */
++	uint64_t	ze_dropped;    /* " */
++} zfs_zevent_t;
++
++extern void fm_init(void);
++extern void fm_fini(void);
++extern void fm_nvprint(nvlist_t *);
++extern void zfs_zevent_post(nvlist_t *, nvlist_t *, zevent_cb_t *);
++extern void zfs_zevent_drain_all(int *);
++extern int zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **);
++extern void zfs_zevent_fd_rele(int);
++extern int zfs_zevent_next(zfs_zevent_t *, nvlist_t **, uint64_t *, uint64_t *);
++extern int zfs_zevent_wait(zfs_zevent_t *);
++extern void zfs_zevent_init(zfs_zevent_t **);
++extern void zfs_zevent_destroy(zfs_zevent_t *);
++
++#else
++
++static inline void fm_init(void) { }
++static inline void fm_fini(void) { }
++
++#endif  /* _KERNEL */
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_FM_UTIL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/fs/Makefile linux-3.2.33-go/include/zfs/sys/fs/Makefile
+--- linux-3.2.33-go.orig/include/zfs/sys/fs/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/fs/Makefile	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,659 @@
++# Makefile.in generated by automake 1.11.6 from Makefile.am.
++# include/sys/fs/Makefile.  Generated from Makefile.in by configure.
++
++# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
++# Foundation, Inc.
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++
++
++
++am__make_dryrun = \
++  { \
++    am__dry=no; \
++    case $$MAKEFLAGS in \
++      *\\[\ \	]*) \
++        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
++          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
++      *) \
++        for am__flg in $$MAKEFLAGS; do \
++          case $$am__flg in \
++            *=*|--*) ;; \
++            *n*) am__dry=yes; break;; \
++          esac; \
++        done;; \
++    esac; \
++    test $$am__dry = yes; \
++  }
++pkgdatadir = $(datadir)/zfs
++pkgincludedir = $(includedir)/zfs
++pkglibdir = $(libdir)/zfs
++pkglibexecdir = $(libexecdir)/zfs
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = x86_64-unknown-linux-gnu
++host_triplet = x86_64-unknown-linux-gnu
++target_triplet = x86_64-unknown-linux-gnu
++subdir = include/sys/fs
++DIST_COMMON = $(am__kernel_HEADERS_DIST) $(am__libzfs_HEADERS_DIST) \
++	$(srcdir)/Makefile.am $(srcdir)/Makefile.in
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps =  \
++	$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
++	$(top_srcdir)/config/kernel-automount.m4 \
++	$(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \
++	$(top_srcdir)/config/kernel-bdev-logical-size.m4 \
++	$(top_srcdir)/config/kernel-bdi-setup-and-register.m4 \
++	$(top_srcdir)/config/kernel-bdi.m4 \
++	$(top_srcdir)/config/kernel-bio-empty-barrier.m4 \
++	$(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \
++	$(top_srcdir)/config/kernel-bio-failfast.m4 \
++	$(top_srcdir)/config/kernel-bio-rw-syncio.m4 \
++	$(top_srcdir)/config/kernel-blk-end-request.m4 \
++	$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-discard.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
++	$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-sectors.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get.m4 \
++	$(top_srcdir)/config/kernel-check-disk-size-change.m4 \
++	$(top_srcdir)/config/kernel-clear-inode.m4 \
++	$(top_srcdir)/config/kernel-commit-metadata.m4 \
++	$(top_srcdir)/config/kernel-create-nameidata.m4 \
++	$(top_srcdir)/config/kernel-d-make-root.m4 \
++	$(top_srcdir)/config/kernel-d-obtain-alias.m4 \
++	$(top_srcdir)/config/kernel-discard-granularity.m4 \
++	$(top_srcdir)/config/kernel-elevator-change.m4 \
++	$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
++	$(top_srcdir)/config/kernel-evict-inode.m4 \
++	$(top_srcdir)/config/kernel-fallocate.m4 \
++	$(top_srcdir)/config/kernel-fmode-t.m4 \
++	$(top_srcdir)/config/kernel-fsync.m4 \
++	$(top_srcdir)/config/kernel-get-disk-ro.m4 \
++	$(top_srcdir)/config/kernel-get-gendisk.m4 \
++	$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
++	$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
++	$(top_srcdir)/config/kernel-kobj-name-len.m4 \
++	$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
++	$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
++	$(top_srcdir)/config/kernel-mount-nodev.m4 \
++	$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
++	$(top_srcdir)/config/kernel-rq-for-each_segment.m4 \
++	$(top_srcdir)/config/kernel-rq-is_sync.m4 \
++	$(top_srcdir)/config/kernel-security-inode-init.m4 \
++	$(top_srcdir)/config/kernel-set-nlink.m4 \
++	$(top_srcdir)/config/kernel-sget-args.m4 \
++	$(top_srcdir)/config/kernel-show-options.m4 \
++	$(top_srcdir)/config/kernel-shrink.m4 \
++	$(top_srcdir)/config/kernel-truncate-range.m4 \
++	$(top_srcdir)/config/kernel-truncate-setsize.m4 \
++	$(top_srcdir)/config/kernel-xattr-handler.m4 \
++	$(top_srcdir)/config/kernel.m4 \
++	$(top_srcdir)/config/user-arch.m4 \
++	$(top_srcdir)/config/user-frame-larger-than.m4 \
++	$(top_srcdir)/config/user-ioctl.m4 \
++	$(top_srcdir)/config/user-libblkid.m4 \
++	$(top_srcdir)/config/user-libuuid.m4 \
++	$(top_srcdir)/config/user-nptl_guard_within_stack.m4 \
++	$(top_srcdir)/config/user-selinux.m4 \
++	$(top_srcdir)/config/user-udev.m4 \
++	$(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \
++	$(top_srcdir)/config/zfs-build.m4 \
++	$(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++mkinstalldirs = $(install_sh) -d
++CONFIG_HEADER = $(top_builddir)/zfs_config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++AM_V_GEN = $(am__v_GEN_$(V))
++am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY))
++am__v_GEN_0 = @echo "  GEN   " $@;
++AM_V_at = $(am__v_at_$(V))
++am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY))
++am__v_at_0 = @
++SOURCES =
++DIST_SOURCES =
++am__can_run_installinfo = \
++  case $$AM_UPDATE_INFO_DIR in \
++    n|no|NO) false;; \
++    *) (install-info --version) >/dev/null 2>&1;; \
++  esac
++am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/fs/zfs.h
++am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
++am__vpath_adj = case $$p in \
++    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
++    *) f=$$p;; \
++  esac;
++am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
++am__install_max = 40
++am__nobase_strip_setup = \
++  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
++am__nobase_strip = \
++  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
++am__nobase_list = $(am__nobase_strip_setup); \
++  for p in $$list; do echo "$$p $$p"; done | \
++  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
++  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
++    if (++n[$$2] == $(am__install_max)) \
++      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
++    END { for (dir in files) print dir, files[dir] }'
++am__base_list = \
++  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
++  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
++am__uninstall_files_from_dir = { \
++  test -z "$$files" \
++    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
++    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
++         $(am__cd) "$$dir" && rm -f $$files; }; \
++  }
++am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
++am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/fs/zfs.h
++HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
++ETAGS = etags
++CTAGS = ctags
++DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
++ACLOCAL = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run aclocal-1.11
++ALIEN = alien
++ALIEN_VERSION = 
++AMTAR = $${TAR-tar}
++AM_DEFAULT_VERBOSITY = 1
++AR = ar
++AUTOCONF = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run autoconf
++AUTOHEADER = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run autoheader
++AUTOMAKE = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run automake-1.11
++AWK = gawk
++CC = gcc
++CCAS = gcc
++CCASDEPMODE = depmode=gcc3
++CCASFLAGS = -g -O2
++CCDEPMODE = depmode=gcc3
++CFLAGS = -g -O2
++CPP = gcc -E
++CPPFLAGS = 
++CYGPATH_W = echo
++DEBUG_CFLAGS = -DNDEBUG
++DEBUG_DMU_TX = _without_debug_dmu_tx
++DEBUG_STACKFLAGS = 
++DEBUG_ZFS = _without_debug
++DEFAULT_INIT_DIR = ${prefix}/etc/init.d
++DEFAULT_INIT_SCRIPT = gentoo
++DEFAULT_PACKAGE = tgz
++DEFS = -DHAVE_CONFIG_H
++DEPDIR = .deps
++DLLTOOL = false
++DPKG = dpkg
++DPKGBUILD = dpkg-buildpackage
++DPKGBUILD_VERSION = 
++DPKG_VERSION = 
++DSYMUTIL = 
++DUMPBIN = 
++ECHO_C = 
++ECHO_N = -n
++ECHO_T = 
++EGREP = /bin/grep -E
++EXEEXT = 
++FGREP = /bin/grep -F
++FRAME_LARGER_THAN = -Wframe-larger-than=1024
++GREP = /bin/grep
++HAVE_ALIEN = no
++HAVE_DPKG = no
++HAVE_DPKGBUILD = no
++HAVE_MAKEPKG = 
++HAVE_PACMAN = 
++HAVE_RPM = yes
++HAVE_RPMBUILD = yes
++INSTALL = /usr/bin/install -c
++INSTALL_DATA = ${INSTALL} -m 644
++INSTALL_PROGRAM = ${INSTALL}
++INSTALL_SCRIPT = ${INSTALL}
++INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
++KERNELCPPFLAGS =  -Wno-unused-but-set-variable -DHAVE_SPL -D_KERNEL -DTEXT_DOMAIN=\"zfs-linux-kernel\" -DNDEBUG 
++KERNELMAKE_PARAMS =  O=/usr/src/linux-3.6.0-sabayon
++LD = /usr/x86_64-pc-linux-gnu/bin/ld -m elf_x86_64
++LDFLAGS = 
++LIBBLKID = 
++LIBOBJS = 
++LIBS = -luuid -luuid -lz -lz -lz 
++LIBSELINUX = 
++LIBTOOL = $(SHELL) $(top_builddir)/libtool
++LIBUUID = -luuid
++LINUX = /usr/src/linux-3.2.33-go
++LINUX_OBJ = /usr/src/linux-3.6.0-sabayon
++LINUX_SYMBOLS = NONE
++LINUX_VERSION = 3.6.0-sabayon
++LIPO = 
++LN_S = ln -s
++LTLIBOBJS = 
++MAINT = #
++MAKEINFO = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run makeinfo
++MAKEPKG = 
++MAKEPKG_VERSION = 
++MANIFEST_TOOL = :
++MKDIR_P = /bin/mkdir -p
++NM = /usr/bin/nm -B
++NMEDIT = 
++NO_UNUSED_BUT_SET_VARIABLE = -Wno-unused-but-set-variable
++OBJDUMP = objdump
++OBJEXT = o
++OTOOL = 
++OTOOL64 = 
++PACKAGE = zfs
++PACKAGE_BUGREPORT = 
++PACKAGE_NAME = 
++PACKAGE_STRING = 
++PACKAGE_TARNAME = 
++PACKAGE_URL = 
++PACKAGE_VERSION = 
++PACMAN = 
++PACMAN_VERSION = 
++PATH_SEPARATOR = :
++RANLIB = ranlib
++RPM = rpm
++RPMBUILD = rpmbuild
++RPMBUILD_VERSION = 4.10.0
++RPM_VERSION = 4.10.0
++SED = /bin/sed
++SET_MAKE = 
++SHELL = /bin/sh
++SPL = /usr/src/linux-3.2.33-go
++SPL_OBJ = /usr/src/linux-3.2.33-go
++SPL_SYMBOLS = NONE
++SPL_VERSION = 0.6.0-rc12
++STRIP = strip
++TARGET_ASM_DIR = asm-x86_64
++VENDOR = gentoo
++VERSION = 0.6.0
++ZFS_CONFIG = all
++ZFS_META_ALIAS = zfs-0.6.0-rc12
++ZFS_META_AUTHOR = Sun Microsystems/Oracle, Lawrence Livermore National Laboratory
++ZFS_META_DATA = 
++ZFS_META_LICENSE = CDDL
++ZFS_META_LT_AGE = 
++ZFS_META_LT_CURRENT = 
++ZFS_META_LT_REVISION = 
++ZFS_META_NAME = zfs
++ZFS_META_RELEASE = rc12
++ZFS_META_VERSION = 0.6.0
++ZLIB = -lz
++abs_builddir = /root/zfs-0.6.0-rc12/include/sys/fs
++abs_srcdir = /root/zfs-0.6.0-rc12/include/sys/fs
++abs_top_builddir = /root/zfs-0.6.0-rc12
++abs_top_srcdir = /root/zfs-0.6.0-rc12
++ac_ct_AR = ar
++ac_ct_CC = gcc
++ac_ct_DUMPBIN = 
++am__include = include
++am__leading_dot = .
++am__quote = 
++am__tar = $${TAR-tar} chof - "$$tardir"
++am__untar = $${TAR-tar} xf -
++bindir = ${exec_prefix}/bin
++build = x86_64-unknown-linux-gnu
++build_alias = 
++build_cpu = x86_64
++build_os = linux-gnu
++build_vendor = unknown
++builddir = .
++datadir = ${datarootdir}
++datarootdir = ${prefix}/share
++docdir = ${datarootdir}/doc/${PACKAGE}
++dvidir = ${docdir}
++exec_prefix = ${prefix}
++host = x86_64-unknown-linux-gnu
++host_alias = 
++host_cpu = x86_64
++host_os = linux-gnu
++host_vendor = unknown
++htmldir = ${docdir}
++includedir = ${prefix}/include
++infodir = ${datarootdir}/info
++install_sh = ${SHELL} /root/zfs-0.6.0-rc12/config/install-sh
++libdir = ${exec_prefix}/lib
++libexecdir = ${exec_prefix}/libexec
++localedir = ${datarootdir}/locale
++localstatedir = ${prefix}/var
++mandir = ${datarootdir}/man
++mkdir_p = /bin/mkdir -p
++oldincludedir = /usr/include
++pdfdir = ${docdir}
++prefix = /usr/local
++program_transform_name = s,x,x,
++psdir = ${docdir}
++sbindir = ${exec_prefix}/sbin
++sharedstatedir = ${prefix}/com
++srcdir = .
++sysconfdir = ${prefix}/etc
++target = x86_64-unknown-linux-gnu
++target_alias = 
++target_cpu = x86_64
++target_os = linux-gnu
++target_vendor = unknown
++top_build_prefix = ../../../
++top_builddir = ../../..
++top_srcdir = ../../..
++udevdir = ${exec_prefix}/lib/udev
++udevruledir = ${udevdir}/rules.d
++COMMON_H = \
++	$(top_srcdir)/include/sys/fs/zfs.h
++
++KERNEL_H = 
++USER_H = 
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++libzfsdir = $(includedir)/libzfs/sys/fs
++libzfs_HEADERS = $(COMMON_H) $(USER_H)
++#kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/sys/fs
++#kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++all: all-am
++
++.SUFFIXES:
++$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am  $(am__configure_deps)
++	@for dep in $?; do \
++	  case '$(am__configure_deps)' in \
++	    *$$dep*) \
++	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
++	        && { if test -f $@; then exit 0; else break; fi; }; \
++	      exit 1;; \
++	  esac; \
++	done; \
++	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/sys/fs/Makefile'; \
++	$(am__cd) $(top_srcdir) && \
++	  $(AUTOMAKE) --gnu include/sys/fs/Makefile
++.PRECIOUS: Makefile
++Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
++	@case '$?' in \
++	  *config.status*) \
++	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
++	  *) \
++	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
++	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
++	esac;
++
++$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++
++$(top_srcdir)/configure: # $(am__configure_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(ACLOCAL_M4): # $(am__aclocal_m4_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(am__aclocal_m4_deps):
++
++mostlyclean-libtool:
++	-rm -f *.lo
++
++clean-libtool:
++	-rm -rf .libs _libs
++install-kernelHEADERS: $(kernel_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(kerneldir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(kerneldir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(kerneldir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(kerneldir)" || exit $$?; \
++	done
++
++uninstall-kernelHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(kerneldir)'; $(am__uninstall_files_from_dir)
++install-libzfsHEADERS: $(libzfs_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(libzfsdir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(libzfsdir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libzfsdir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(libzfsdir)" || exit $$?; \
++	done
++
++uninstall-libzfsHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(libzfsdir)'; $(am__uninstall_files_from_dir)
++
++ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
++	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	mkid -fID $$unique
++tags: TAGS
++
++TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	set x; \
++	here=`pwd`; \
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	shift; \
++	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
++	  test -n "$$unique" || unique=$$empty_fix; \
++	  if test $$# -gt 0; then \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      "$$@" $$unique; \
++	  else \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      $$unique; \
++	  fi; \
++	fi
++ctags: CTAGS
++CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	test -z "$(CTAGS_ARGS)$$unique" \
++	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
++	     $$unique
++
++GTAGS:
++	here=`$(am__cd) $(top_builddir) && pwd` \
++	  && $(am__cd) $(top_srcdir) \
++	  && gtags -i $(GTAGS_ARGS) "$$here"
++
++distclean-tags:
++	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
++
++distdir: $(DISTFILES)
++	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	list='$(DISTFILES)'; \
++	  dist_files=`for file in $$list; do echo $$file; done | \
++	  sed -e "s|^$$srcdirstrip/||;t" \
++	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
++	case $$dist_files in \
++	  */*) $(MKDIR_P) `echo "$$dist_files" | \
++			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
++			   sort -u` ;; \
++	esac; \
++	for file in $$dist_files; do \
++	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
++	  if test -d $$d/$$file; then \
++	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
++	    if test -d "$(distdir)/$$file"; then \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
++	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
++	  else \
++	    test -f "$(distdir)/$$file" \
++	    || cp -p $$d/$$file "$(distdir)/$$file" \
++	    || exit 1; \
++	  fi; \
++	done
++check-am: all-am
++check: check-am
++all-am: Makefile $(HEADERS)
++installdirs:
++	for dir in "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"; do \
++	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
++	done
++install: install-am
++install-exec: install-exec-am
++install-data: install-data-am
++uninstall: uninstall-am
++
++install-am: all-am
++	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
++
++installcheck: installcheck-am
++install-strip:
++	if test -z '$(STRIP)'; then \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	      install; \
++	else \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
++	fi
++mostlyclean-generic:
++
++clean-generic:
++
++distclean-generic:
++	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
++	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
++
++maintainer-clean-generic:
++	@echo "This command is intended for maintainers to use"
++	@echo "it deletes files that may require special tools to rebuild."
++clean: clean-am
++
++clean-am: clean-generic clean-libtool mostlyclean-am
++
++distclean: distclean-am
++	-rm -f Makefile
++distclean-am: clean-am distclean-generic distclean-tags
++
++dvi: dvi-am
++
++dvi-am:
++
++html: html-am
++
++html-am:
++
++info: info-am
++
++info-am:
++
++install-data-am: install-kernelHEADERS install-libzfsHEADERS
++
++install-dvi: install-dvi-am
++
++install-dvi-am:
++
++install-exec-am:
++
++install-html: install-html-am
++
++install-html-am:
++
++install-info: install-info-am
++
++install-info-am:
++
++install-man:
++
++install-pdf: install-pdf-am
++
++install-pdf-am:
++
++install-ps: install-ps-am
++
++install-ps-am:
++
++installcheck-am:
++
++maintainer-clean: maintainer-clean-am
++	-rm -f Makefile
++maintainer-clean-am: distclean-am maintainer-clean-generic
++
++mostlyclean: mostlyclean-am
++
++mostlyclean-am: mostlyclean-generic mostlyclean-libtool
++
++pdf: pdf-am
++
++pdf-am:
++
++ps: ps-am
++
++ps-am:
++
++uninstall-am: uninstall-kernelHEADERS uninstall-libzfsHEADERS
++
++.MAKE: install-am install-strip
++
++.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
++	clean-libtool ctags distclean distclean-generic \
++	distclean-libtool distclean-tags distdir dvi dvi-am html \
++	html-am info info-am install install-am install-data \
++	install-data-am install-dvi install-dvi-am install-exec \
++	install-exec-am install-html install-html-am install-info \
++	install-info-am install-kernelHEADERS install-libzfsHEADERS \
++	install-man install-pdf install-pdf-am install-ps \
++	install-ps-am install-strip installcheck installcheck-am \
++	installdirs maintainer-clean maintainer-clean-generic \
++	mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
++	ps ps-am tags uninstall uninstall-am uninstall-kernelHEADERS \
++	uninstall-libzfsHEADERS
++
++
++# Tell versions [3.59,3.63) of GNU make to not export all variables.
++# Otherwise a system limit (for SysV at least) may be exceeded.
++.NOEXPORT:
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/fs/Makefile.am linux-3.2.33-go/include/zfs/sys/fs/Makefile.am
+--- linux-3.2.33-go.orig/include/zfs/sys/fs/Makefile.am	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/fs/Makefile.am	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,18 @@
++COMMON_H = \
++	$(top_srcdir)/include/sys/fs/zfs.h
++
++KERNEL_H =
++
++USER_H =
++
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++
++if CONFIG_USER
++libzfsdir = $(includedir)/libzfs/sys/fs
++libzfs_HEADERS = $(COMMON_H) $(USER_H)
++endif
++
++if CONFIG_KERNEL
++kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/sys/fs
++kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++endif
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/fs/Makefile.in linux-3.2.33-go/include/zfs/sys/fs/Makefile.in
+--- linux-3.2.33-go.orig/include/zfs/sys/fs/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/fs/Makefile.in	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,659 @@
++# Makefile.in generated by automake 1.11.6 from Makefile.am.
++# @configure_input@
++
++# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
++# Foundation, Inc.
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++@SET_MAKE@
++
++VPATH = @srcdir@
++am__make_dryrun = \
++  { \
++    am__dry=no; \
++    case $$MAKEFLAGS in \
++      *\\[\ \	]*) \
++        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
++          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
++      *) \
++        for am__flg in $$MAKEFLAGS; do \
++          case $$am__flg in \
++            *=*|--*) ;; \
++            *n*) am__dry=yes; break;; \
++          esac; \
++        done;; \
++    esac; \
++    test $$am__dry = yes; \
++  }
++pkgdatadir = $(datadir)/@PACKAGE@
++pkgincludedir = $(includedir)/@PACKAGE@
++pkglibdir = $(libdir)/@PACKAGE@
++pkglibexecdir = $(libexecdir)/@PACKAGE@
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = @build@
++host_triplet = @host@
++target_triplet = @target@
++subdir = include/sys/fs
++DIST_COMMON = $(am__kernel_HEADERS_DIST) $(am__libzfs_HEADERS_DIST) \
++	$(srcdir)/Makefile.am $(srcdir)/Makefile.in
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps =  \
++	$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
++	$(top_srcdir)/config/kernel-automount.m4 \
++	$(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \
++	$(top_srcdir)/config/kernel-bdev-logical-size.m4 \
++	$(top_srcdir)/config/kernel-bdi-setup-and-register.m4 \
++	$(top_srcdir)/config/kernel-bdi.m4 \
++	$(top_srcdir)/config/kernel-bio-empty-barrier.m4 \
++	$(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \
++	$(top_srcdir)/config/kernel-bio-failfast.m4 \
++	$(top_srcdir)/config/kernel-bio-rw-syncio.m4 \
++	$(top_srcdir)/config/kernel-blk-end-request.m4 \
++	$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-discard.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
++	$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-sectors.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get.m4 \
++	$(top_srcdir)/config/kernel-check-disk-size-change.m4 \
++	$(top_srcdir)/config/kernel-clear-inode.m4 \
++	$(top_srcdir)/config/kernel-commit-metadata.m4 \
++	$(top_srcdir)/config/kernel-create-nameidata.m4 \
++	$(top_srcdir)/config/kernel-d-make-root.m4 \
++	$(top_srcdir)/config/kernel-d-obtain-alias.m4 \
++	$(top_srcdir)/config/kernel-discard-granularity.m4 \
++	$(top_srcdir)/config/kernel-elevator-change.m4 \
++	$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
++	$(top_srcdir)/config/kernel-evict-inode.m4 \
++	$(top_srcdir)/config/kernel-fallocate.m4 \
++	$(top_srcdir)/config/kernel-fmode-t.m4 \
++	$(top_srcdir)/config/kernel-fsync.m4 \
++	$(top_srcdir)/config/kernel-get-disk-ro.m4 \
++	$(top_srcdir)/config/kernel-get-gendisk.m4 \
++	$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
++	$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
++	$(top_srcdir)/config/kernel-kobj-name-len.m4 \
++	$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
++	$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
++	$(top_srcdir)/config/kernel-mount-nodev.m4 \
++	$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
++	$(top_srcdir)/config/kernel-rq-for-each_segment.m4 \
++	$(top_srcdir)/config/kernel-rq-is_sync.m4 \
++	$(top_srcdir)/config/kernel-security-inode-init.m4 \
++	$(top_srcdir)/config/kernel-set-nlink.m4 \
++	$(top_srcdir)/config/kernel-sget-args.m4 \
++	$(top_srcdir)/config/kernel-show-options.m4 \
++	$(top_srcdir)/config/kernel-shrink.m4 \
++	$(top_srcdir)/config/kernel-truncate-range.m4 \
++	$(top_srcdir)/config/kernel-truncate-setsize.m4 \
++	$(top_srcdir)/config/kernel-xattr-handler.m4 \
++	$(top_srcdir)/config/kernel.m4 \
++	$(top_srcdir)/config/user-arch.m4 \
++	$(top_srcdir)/config/user-frame-larger-than.m4 \
++	$(top_srcdir)/config/user-ioctl.m4 \
++	$(top_srcdir)/config/user-libblkid.m4 \
++	$(top_srcdir)/config/user-libuuid.m4 \
++	$(top_srcdir)/config/user-nptl_guard_within_stack.m4 \
++	$(top_srcdir)/config/user-selinux.m4 \
++	$(top_srcdir)/config/user-udev.m4 \
++	$(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \
++	$(top_srcdir)/config/zfs-build.m4 \
++	$(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++mkinstalldirs = $(install_sh) -d
++CONFIG_HEADER = $(top_builddir)/zfs_config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++AM_V_GEN = $(am__v_GEN_@AM_V@)
++am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
++am__v_GEN_0 = @echo "  GEN   " $@;
++AM_V_at = $(am__v_at_@AM_V@)
++am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
++am__v_at_0 = @
++SOURCES =
++DIST_SOURCES =
++am__can_run_installinfo = \
++  case $$AM_UPDATE_INFO_DIR in \
++    n|no|NO) false;; \
++    *) (install-info --version) >/dev/null 2>&1;; \
++  esac
++am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/fs/zfs.h
++am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
++am__vpath_adj = case $$p in \
++    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
++    *) f=$$p;; \
++  esac;
++am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
++am__install_max = 40
++am__nobase_strip_setup = \
++  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
++am__nobase_strip = \
++  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
++am__nobase_list = $(am__nobase_strip_setup); \
++  for p in $$list; do echo "$$p $$p"; done | \
++  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
++  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
++    if (++n[$$2] == $(am__install_max)) \
++      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
++    END { for (dir in files) print dir, files[dir] }'
++am__base_list = \
++  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
++  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
++am__uninstall_files_from_dir = { \
++  test -z "$$files" \
++    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
++    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
++         $(am__cd) "$$dir" && rm -f $$files; }; \
++  }
++am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
++am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/fs/zfs.h
++HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
++ETAGS = etags
++CTAGS = ctags
++DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
++ACLOCAL = @ACLOCAL@
++ALIEN = @ALIEN@
++ALIEN_VERSION = @ALIEN_VERSION@
++AMTAR = @AMTAR@
++AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
++AR = @AR@
++AUTOCONF = @AUTOCONF@
++AUTOHEADER = @AUTOHEADER@
++AUTOMAKE = @AUTOMAKE@
++AWK = @AWK@
++CC = @CC@
++CCAS = @CCAS@
++CCASDEPMODE = @CCASDEPMODE@
++CCASFLAGS = @CCASFLAGS@
++CCDEPMODE = @CCDEPMODE@
++CFLAGS = @CFLAGS@
++CPP = @CPP@
++CPPFLAGS = @CPPFLAGS@
++CYGPATH_W = @CYGPATH_W@
++DEBUG_CFLAGS = @DEBUG_CFLAGS@
++DEBUG_DMU_TX = @DEBUG_DMU_TX@
++DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
++DEBUG_ZFS = @DEBUG_ZFS@
++DEFAULT_INIT_DIR = @DEFAULT_INIT_DIR@
++DEFAULT_INIT_SCRIPT = @DEFAULT_INIT_SCRIPT@
++DEFAULT_PACKAGE = @DEFAULT_PACKAGE@
++DEFS = @DEFS@
++DEPDIR = @DEPDIR@
++DLLTOOL = @DLLTOOL@
++DPKG = @DPKG@
++DPKGBUILD = @DPKGBUILD@
++DPKGBUILD_VERSION = @DPKGBUILD_VERSION@
++DPKG_VERSION = @DPKG_VERSION@
++DSYMUTIL = @DSYMUTIL@
++DUMPBIN = @DUMPBIN@
++ECHO_C = @ECHO_C@
++ECHO_N = @ECHO_N@
++ECHO_T = @ECHO_T@
++EGREP = @EGREP@
++EXEEXT = @EXEEXT@
++FGREP = @FGREP@
++FRAME_LARGER_THAN = @FRAME_LARGER_THAN@
++GREP = @GREP@
++HAVE_ALIEN = @HAVE_ALIEN@
++HAVE_DPKG = @HAVE_DPKG@
++HAVE_DPKGBUILD = @HAVE_DPKGBUILD@
++HAVE_MAKEPKG = @HAVE_MAKEPKG@
++HAVE_PACMAN = @HAVE_PACMAN@
++HAVE_RPM = @HAVE_RPM@
++HAVE_RPMBUILD = @HAVE_RPMBUILD@
++INSTALL = @INSTALL@
++INSTALL_DATA = @INSTALL_DATA@
++INSTALL_PROGRAM = @INSTALL_PROGRAM@
++INSTALL_SCRIPT = @INSTALL_SCRIPT@
++INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
++KERNELCPPFLAGS = @KERNELCPPFLAGS@
++KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
++LD = @LD@
++LDFLAGS = @LDFLAGS@
++LIBBLKID = @LIBBLKID@
++LIBOBJS = @LIBOBJS@
++LIBS = @LIBS@
++LIBSELINUX = @LIBSELINUX@
++LIBTOOL = @LIBTOOL@
++LIBUUID = @LIBUUID@
++LINUX = @LINUX@
++LINUX_OBJ = @LINUX_OBJ@
++LINUX_SYMBOLS = @LINUX_SYMBOLS@
++LINUX_VERSION = @LINUX_VERSION@
++LIPO = @LIPO@
++LN_S = @LN_S@
++LTLIBOBJS = @LTLIBOBJS@
++MAINT = @MAINT@
++MAKEINFO = @MAKEINFO@
++MAKEPKG = @MAKEPKG@
++MAKEPKG_VERSION = @MAKEPKG_VERSION@
++MANIFEST_TOOL = @MANIFEST_TOOL@
++MKDIR_P = @MKDIR_P@
++NM = @NM@
++NMEDIT = @NMEDIT@
++NO_UNUSED_BUT_SET_VARIABLE = @NO_UNUSED_BUT_SET_VARIABLE@
++OBJDUMP = @OBJDUMP@
++OBJEXT = @OBJEXT@
++OTOOL = @OTOOL@
++OTOOL64 = @OTOOL64@
++PACKAGE = @PACKAGE@
++PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
++PACKAGE_NAME = @PACKAGE_NAME@
++PACKAGE_STRING = @PACKAGE_STRING@
++PACKAGE_TARNAME = @PACKAGE_TARNAME@
++PACKAGE_URL = @PACKAGE_URL@
++PACKAGE_VERSION = @PACKAGE_VERSION@
++PACMAN = @PACMAN@
++PACMAN_VERSION = @PACMAN_VERSION@
++PATH_SEPARATOR = @PATH_SEPARATOR@
++RANLIB = @RANLIB@
++RPM = @RPM@
++RPMBUILD = @RPMBUILD@
++RPMBUILD_VERSION = @RPMBUILD_VERSION@
++RPM_VERSION = @RPM_VERSION@
++SED = @SED@
++SET_MAKE = @SET_MAKE@
++SHELL = @SHELL@
++SPL = @SPL@
++SPL_OBJ = @SPL_OBJ@
++SPL_SYMBOLS = @SPL_SYMBOLS@
++SPL_VERSION = @SPL_VERSION@
++STRIP = @STRIP@
++TARGET_ASM_DIR = @TARGET_ASM_DIR@
++VENDOR = @VENDOR@
++VERSION = @VERSION@
++ZFS_CONFIG = @ZFS_CONFIG@
++ZFS_META_ALIAS = @ZFS_META_ALIAS@
++ZFS_META_AUTHOR = @ZFS_META_AUTHOR@
++ZFS_META_DATA = @ZFS_META_DATA@
++ZFS_META_LICENSE = @ZFS_META_LICENSE@
++ZFS_META_LT_AGE = @ZFS_META_LT_AGE@
++ZFS_META_LT_CURRENT = @ZFS_META_LT_CURRENT@
++ZFS_META_LT_REVISION = @ZFS_META_LT_REVISION@
++ZFS_META_NAME = @ZFS_META_NAME@
++ZFS_META_RELEASE = @ZFS_META_RELEASE@
++ZFS_META_VERSION = @ZFS_META_VERSION@
++ZLIB = @ZLIB@
++abs_builddir = @abs_builddir@
++abs_srcdir = @abs_srcdir@
++abs_top_builddir = @abs_top_builddir@
++abs_top_srcdir = @abs_top_srcdir@
++ac_ct_AR = @ac_ct_AR@
++ac_ct_CC = @ac_ct_CC@
++ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
++am__include = @am__include@
++am__leading_dot = @am__leading_dot@
++am__quote = @am__quote@
++am__tar = @am__tar@
++am__untar = @am__untar@
++bindir = @bindir@
++build = @build@
++build_alias = @build_alias@
++build_cpu = @build_cpu@
++build_os = @build_os@
++build_vendor = @build_vendor@
++builddir = @builddir@
++datadir = @datadir@
++datarootdir = @datarootdir@
++docdir = @docdir@
++dvidir = @dvidir@
++exec_prefix = @exec_prefix@
++host = @host@
++host_alias = @host_alias@
++host_cpu = @host_cpu@
++host_os = @host_os@
++host_vendor = @host_vendor@
++htmldir = @htmldir@
++includedir = @includedir@
++infodir = @infodir@
++install_sh = @install_sh@
++libdir = @libdir@
++libexecdir = @libexecdir@
++localedir = @localedir@
++localstatedir = @localstatedir@
++mandir = @mandir@
++mkdir_p = @mkdir_p@
++oldincludedir = @oldincludedir@
++pdfdir = @pdfdir@
++prefix = @prefix@
++program_transform_name = @program_transform_name@
++psdir = @psdir@
++sbindir = @sbindir@
++sharedstatedir = @sharedstatedir@
++srcdir = @srcdir@
++sysconfdir = @sysconfdir@
++target = @target@
++target_alias = @target_alias@
++target_cpu = @target_cpu@
++target_os = @target_os@
++target_vendor = @target_vendor@
++top_build_prefix = @top_build_prefix@
++top_builddir = @top_builddir@
++top_srcdir = @top_srcdir@
++udevdir = @udevdir@
++udevruledir = @udevruledir@
++COMMON_H = \
++	$(top_srcdir)/include/sys/fs/zfs.h
++
++KERNEL_H = 
++USER_H = 
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++@CONFIG_USER_TRUE@libzfsdir = $(includedir)/libzfs/sys/fs
++@CONFIG_USER_TRUE@libzfs_HEADERS = $(COMMON_H) $(USER_H)
++@CONFIG_KERNEL_TRUE@kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/sys/fs
++@CONFIG_KERNEL_TRUE@kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++all: all-am
++
++.SUFFIXES:
++$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
++	@for dep in $?; do \
++	  case '$(am__configure_deps)' in \
++	    *$$dep*) \
++	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
++	        && { if test -f $@; then exit 0; else break; fi; }; \
++	      exit 1;; \
++	  esac; \
++	done; \
++	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/sys/fs/Makefile'; \
++	$(am__cd) $(top_srcdir) && \
++	  $(AUTOMAKE) --gnu include/sys/fs/Makefile
++.PRECIOUS: Makefile
++Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
++	@case '$?' in \
++	  *config.status*) \
++	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
++	  *) \
++	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
++	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
++	esac;
++
++$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++
++$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(am__aclocal_m4_deps):
++
++mostlyclean-libtool:
++	-rm -f *.lo
++
++clean-libtool:
++	-rm -rf .libs _libs
++install-kernelHEADERS: $(kernel_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(kerneldir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(kerneldir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(kerneldir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(kerneldir)" || exit $$?; \
++	done
++
++uninstall-kernelHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(kerneldir)'; $(am__uninstall_files_from_dir)
++install-libzfsHEADERS: $(libzfs_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(libzfsdir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(libzfsdir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libzfsdir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(libzfsdir)" || exit $$?; \
++	done
++
++uninstall-libzfsHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(libzfsdir)'; $(am__uninstall_files_from_dir)
++
++ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
++	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	mkid -fID $$unique
++tags: TAGS
++
++TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	set x; \
++	here=`pwd`; \
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	shift; \
++	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
++	  test -n "$$unique" || unique=$$empty_fix; \
++	  if test $$# -gt 0; then \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      "$$@" $$unique; \
++	  else \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      $$unique; \
++	  fi; \
++	fi
++ctags: CTAGS
++CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	test -z "$(CTAGS_ARGS)$$unique" \
++	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
++	     $$unique
++
++GTAGS:
++	here=`$(am__cd) $(top_builddir) && pwd` \
++	  && $(am__cd) $(top_srcdir) \
++	  && gtags -i $(GTAGS_ARGS) "$$here"
++
++distclean-tags:
++	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
++
++distdir: $(DISTFILES)
++	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	list='$(DISTFILES)'; \
++	  dist_files=`for file in $$list; do echo $$file; done | \
++	  sed -e "s|^$$srcdirstrip/||;t" \
++	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
++	case $$dist_files in \
++	  */*) $(MKDIR_P) `echo "$$dist_files" | \
++			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
++			   sort -u` ;; \
++	esac; \
++	for file in $$dist_files; do \
++	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
++	  if test -d $$d/$$file; then \
++	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
++	    if test -d "$(distdir)/$$file"; then \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
++	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
++	  else \
++	    test -f "$(distdir)/$$file" \
++	    || cp -p $$d/$$file "$(distdir)/$$file" \
++	    || exit 1; \
++	  fi; \
++	done
++check-am: all-am
++check: check-am
++all-am: Makefile $(HEADERS)
++installdirs:
++	for dir in "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"; do \
++	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
++	done
++install: install-am
++install-exec: install-exec-am
++install-data: install-data-am
++uninstall: uninstall-am
++
++install-am: all-am
++	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
++
++installcheck: installcheck-am
++install-strip:
++	if test -z '$(STRIP)'; then \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	      install; \
++	else \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
++	fi
++mostlyclean-generic:
++
++clean-generic:
++
++distclean-generic:
++	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
++	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
++
++maintainer-clean-generic:
++	@echo "This command is intended for maintainers to use"
++	@echo "it deletes files that may require special tools to rebuild."
++clean: clean-am
++
++clean-am: clean-generic clean-libtool mostlyclean-am
++
++distclean: distclean-am
++	-rm -f Makefile
++distclean-am: clean-am distclean-generic distclean-tags
++
++dvi: dvi-am
++
++dvi-am:
++
++html: html-am
++
++html-am:
++
++info: info-am
++
++info-am:
++
++install-data-am: install-kernelHEADERS install-libzfsHEADERS
++
++install-dvi: install-dvi-am
++
++install-dvi-am:
++
++install-exec-am:
++
++install-html: install-html-am
++
++install-html-am:
++
++install-info: install-info-am
++
++install-info-am:
++
++install-man:
++
++install-pdf: install-pdf-am
++
++install-pdf-am:
++
++install-ps: install-ps-am
++
++install-ps-am:
++
++installcheck-am:
++
++maintainer-clean: maintainer-clean-am
++	-rm -f Makefile
++maintainer-clean-am: distclean-am maintainer-clean-generic
++
++mostlyclean: mostlyclean-am
++
++mostlyclean-am: mostlyclean-generic mostlyclean-libtool
++
++pdf: pdf-am
++
++pdf-am:
++
++ps: ps-am
++
++ps-am:
++
++uninstall-am: uninstall-kernelHEADERS uninstall-libzfsHEADERS
++
++.MAKE: install-am install-strip
++
++.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
++	clean-libtool ctags distclean distclean-generic \
++	distclean-libtool distclean-tags distdir dvi dvi-am html \
++	html-am info info-am install install-am install-data \
++	install-data-am install-dvi install-dvi-am install-exec \
++	install-exec-am install-html install-html-am install-info \
++	install-info-am install-kernelHEADERS install-libzfsHEADERS \
++	install-man install-pdf install-pdf-am install-ps \
++	install-ps-am install-strip installcheck installcheck-am \
++	installdirs maintainer-clean maintainer-clean-generic \
++	mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
++	ps ps-am tags uninstall uninstall-am uninstall-kernelHEADERS \
++	uninstall-libzfsHEADERS
++
++
++# Tell versions [3.59,3.63) of GNU make to not export all variables.
++# Otherwise a system limit (for SysV at least) may be exceeded.
++.NOEXPORT:
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/fs/zfs.h linux-3.2.33-go/include/zfs/sys/fs/zfs.h
+--- linux-3.2.33-go.orig/include/zfs/sys/fs/zfs.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/fs/zfs.h	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,948 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
++ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
++ */
++
++/* Portions Copyright 2010 Robert Milkowski */
++
++#ifndef	_SYS_FS_ZFS_H
++#define	_SYS_FS_ZFS_H
++
++#include <sys/time.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * Types and constants shared between userland and the kernel.
++ */
++
++/*
++ * Each dataset can be one of the following types.  These constants can be
++ * combined into masks that can be passed to various functions.
++ */
++typedef enum {
++	ZFS_TYPE_FILESYSTEM	= 0x1,
++	ZFS_TYPE_SNAPSHOT	= 0x2,
++	ZFS_TYPE_VOLUME		= 0x4,
++	ZFS_TYPE_POOL		= 0x8
++} zfs_type_t;
++
++#define	ZFS_TYPE_DATASET	\
++	(ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT)
++
++#define	ZAP_MAXNAMELEN 256
++#define	ZAP_MAXVALUELEN (1024 * 8)
++#define	ZAP_OLDMAXVALUELEN 1024
++
++/*
++ * Dataset properties are identified by these constants and must be added to
++ * the end of this list to ensure that external consumers are not affected
++ * by the change. If you make any changes to this list, be sure to update
++ * the property table in usr/src/common/zfs/zfs_prop.c.
++ */
++typedef enum {
++	ZFS_PROP_TYPE,
++	ZFS_PROP_CREATION,
++	ZFS_PROP_USED,
++	ZFS_PROP_AVAILABLE,
++	ZFS_PROP_REFERENCED,
++	ZFS_PROP_COMPRESSRATIO,
++	ZFS_PROP_MOUNTED,
++	ZFS_PROP_ORIGIN,
++	ZFS_PROP_QUOTA,
++	ZFS_PROP_RESERVATION,
++	ZFS_PROP_VOLSIZE,
++	ZFS_PROP_VOLBLOCKSIZE,
++	ZFS_PROP_RECORDSIZE,
++	ZFS_PROP_MOUNTPOINT,
++	ZFS_PROP_SHARENFS,
++	ZFS_PROP_CHECKSUM,
++	ZFS_PROP_COMPRESSION,
++	ZFS_PROP_ATIME,
++	ZFS_PROP_DEVICES,
++	ZFS_PROP_EXEC,
++	ZFS_PROP_SETUID,
++	ZFS_PROP_READONLY,
++	ZFS_PROP_ZONED,
++	ZFS_PROP_SNAPDIR,
++	ZFS_PROP_PRIVATE,		/* not exposed to user, temporary */
++	ZFS_PROP_ACLINHERIT,
++	ZFS_PROP_CREATETXG,		/* not exposed to the user */
++	ZFS_PROP_NAME,			/* not exposed to the user */
++	ZFS_PROP_CANMOUNT,
++	ZFS_PROP_ISCSIOPTIONS,		/* not exposed to the user */
++	ZFS_PROP_XATTR,
++	ZFS_PROP_NUMCLONES,		/* not exposed to the user */
++	ZFS_PROP_COPIES,
++	ZFS_PROP_VERSION,
++	ZFS_PROP_UTF8ONLY,
++	ZFS_PROP_NORMALIZE,
++	ZFS_PROP_CASE,
++	ZFS_PROP_VSCAN,
++	ZFS_PROP_NBMAND,
++	ZFS_PROP_SHARESMB,
++	ZFS_PROP_REFQUOTA,
++	ZFS_PROP_REFRESERVATION,
++	ZFS_PROP_GUID,
++	ZFS_PROP_PRIMARYCACHE,
++	ZFS_PROP_SECONDARYCACHE,
++	ZFS_PROP_USEDSNAP,
++	ZFS_PROP_USEDDS,
++	ZFS_PROP_USEDCHILD,
++	ZFS_PROP_USEDREFRESERV,
++	ZFS_PROP_USERACCOUNTING,	/* not exposed to the user */
++	ZFS_PROP_STMF_SHAREINFO,	/* not exposed to the user */
++	ZFS_PROP_DEFER_DESTROY,
++	ZFS_PROP_USERREFS,
++	ZFS_PROP_LOGBIAS,
++	ZFS_PROP_UNIQUE,		/* not exposed to the user */
++	ZFS_PROP_OBJSETID,		/* not exposed to the user */
++	ZFS_PROP_DEDUP,
++	ZFS_PROP_MLSLABEL,
++	ZFS_PROP_SYNC,
++	ZFS_PROP_REFRATIO,
++	ZFS_PROP_WRITTEN,
++	ZFS_PROP_CLONES,
++	ZFS_NUM_PROPS
++} zfs_prop_t;
++
++typedef enum {
++	ZFS_PROP_USERUSED,
++	ZFS_PROP_USERQUOTA,
++	ZFS_PROP_GROUPUSED,
++	ZFS_PROP_GROUPQUOTA,
++	ZFS_NUM_USERQUOTA_PROPS
++} zfs_userquota_prop_t;
++
++extern const char *zfs_userquota_prop_prefixes[ZFS_NUM_USERQUOTA_PROPS];
++
++/*
++ * Pool properties are identified by these constants and must be added to the
++ * end of this list to ensure that external consumers are not affected
++ * by the change. If you make any changes to this list, be sure to update
++ * the property table in usr/src/common/zfs/zpool_prop.c.
++ */
++typedef enum {
++	ZPOOL_PROP_NAME,
++	ZPOOL_PROP_SIZE,
++	ZPOOL_PROP_CAPACITY,
++	ZPOOL_PROP_ALTROOT,
++	ZPOOL_PROP_HEALTH,
++	ZPOOL_PROP_GUID,
++	ZPOOL_PROP_VERSION,
++	ZPOOL_PROP_BOOTFS,
++	ZPOOL_PROP_DELEGATION,
++	ZPOOL_PROP_AUTOREPLACE,
++	ZPOOL_PROP_CACHEFILE,
++	ZPOOL_PROP_FAILUREMODE,
++	ZPOOL_PROP_LISTSNAPS,
++	ZPOOL_PROP_AUTOEXPAND,
++	ZPOOL_PROP_DEDUPDITTO,
++	ZPOOL_PROP_DEDUPRATIO,
++	ZPOOL_PROP_FREE,
++	ZPOOL_PROP_ALLOCATED,
++	ZPOOL_PROP_READONLY,
++	ZPOOL_PROP_ASHIFT,
++	ZPOOL_PROP_COMMENT,
++	ZPOOL_PROP_EXPANDSZ,
++	ZPOOL_NUM_PROPS
++} zpool_prop_t;
++
++/* Small enough to not hog a whole line of printout in zpool(1M). */
++#define	ZPROP_MAX_COMMENT	32
++
++#define	ZPROP_CONT		-2
++#define	ZPROP_INVAL		-1
++
++#define	ZPROP_VALUE		"value"
++#define	ZPROP_SOURCE		"source"
++
++typedef enum {
++	ZPROP_SRC_NONE = 0x1,
++	ZPROP_SRC_DEFAULT = 0x2,
++	ZPROP_SRC_TEMPORARY = 0x4,
++	ZPROP_SRC_LOCAL = 0x8,
++	ZPROP_SRC_INHERITED = 0x10,
++	ZPROP_SRC_RECEIVED = 0x20
++} zprop_source_t;
++
++#define	ZPROP_SRC_ALL	0x3f
++
++#define	ZPROP_SOURCE_VAL_RECVD	"$recvd"
++#define	ZPROP_N_MORE_ERRORS	"N_MORE_ERRORS"
++/*
++ * Dataset flag implemented as a special entry in the props zap object
++ * indicating that the dataset has received properties on or after
++ * SPA_VERSION_RECVD_PROPS. The first such receive blows away local properties
++ * just as it did in earlier versions, and thereafter, local properties are
++ * preserved.
++ */
++#define	ZPROP_HAS_RECVD		"$hasrecvd"
++
++typedef enum {
++	ZPROP_ERR_NOCLEAR = 0x1, /* failure to clear existing props */
++	ZPROP_ERR_NORESTORE = 0x2 /* failure to restore props on error */
++} zprop_errflags_t;
++
++typedef int (*zprop_func)(int, void *);
++
++/*
++ * Properties to be set on the root file system of a new pool
++ * are stuffed into their own nvlist, which is then included in
++ * the properties nvlist with the pool properties.
++ */
++#define	ZPOOL_ROOTFS_PROPS	"root-props-nvl"
++
++/*
++ * Dataset property functions shared between libzfs and kernel.
++ */
++const char *zfs_prop_default_string(zfs_prop_t);
++uint64_t zfs_prop_default_numeric(zfs_prop_t);
++boolean_t zfs_prop_readonly(zfs_prop_t);
++boolean_t zfs_prop_inheritable(zfs_prop_t);
++boolean_t zfs_prop_setonce(zfs_prop_t);
++const char *zfs_prop_to_name(zfs_prop_t);
++zfs_prop_t zfs_name_to_prop(const char *);
++boolean_t zfs_prop_user(const char *);
++boolean_t zfs_prop_userquota(const char *);
++boolean_t zfs_prop_written(const char *);
++int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **);
++int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *);
++uint64_t zfs_prop_random_value(zfs_prop_t, uint64_t seed);
++boolean_t zfs_prop_valid_for_type(int, zfs_type_t);
++
++/*
++ * Pool property functions shared between libzfs and kernel.
++ */
++zpool_prop_t zpool_name_to_prop(const char *);
++const char *zpool_prop_to_name(zpool_prop_t);
++const char *zpool_prop_default_string(zpool_prop_t);
++uint64_t zpool_prop_default_numeric(zpool_prop_t);
++boolean_t zpool_prop_readonly(zpool_prop_t);
++int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **);
++int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *);
++uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed);
++
++/*
++ * Definitions for the Delegation.
++ */
++typedef enum {
++	ZFS_DELEG_WHO_UNKNOWN = 0,
++	ZFS_DELEG_USER = 'u',
++	ZFS_DELEG_USER_SETS = 'U',
++	ZFS_DELEG_GROUP = 'g',
++	ZFS_DELEG_GROUP_SETS = 'G',
++	ZFS_DELEG_EVERYONE = 'e',
++	ZFS_DELEG_EVERYONE_SETS = 'E',
++	ZFS_DELEG_CREATE = 'c',
++	ZFS_DELEG_CREATE_SETS = 'C',
++	ZFS_DELEG_NAMED_SET = 's',
++	ZFS_DELEG_NAMED_SET_SETS = 'S'
++} zfs_deleg_who_type_t;
++
++typedef enum {
++	ZFS_DELEG_NONE = 0,
++	ZFS_DELEG_PERM_LOCAL = 1,
++	ZFS_DELEG_PERM_DESCENDENT = 2,
++	ZFS_DELEG_PERM_LOCALDESCENDENT = 3,
++	ZFS_DELEG_PERM_CREATE = 4
++} zfs_deleg_inherit_t;
++
++#define	ZFS_DELEG_PERM_UID	"uid"
++#define	ZFS_DELEG_PERM_GID	"gid"
++#define	ZFS_DELEG_PERM_GROUPS	"groups"
++
++#define	ZFS_MLSLABEL_DEFAULT	"none"
++
++#define	ZFS_SMB_ACL_SRC		"src"
++#define	ZFS_SMB_ACL_TARGET	"target"
++
++typedef enum {
++	ZFS_CANMOUNT_OFF = 0,
++	ZFS_CANMOUNT_ON = 1,
++	ZFS_CANMOUNT_NOAUTO = 2
++} zfs_canmount_type_t;
++
++typedef enum {
++	ZFS_LOGBIAS_LATENCY = 0,
++	ZFS_LOGBIAS_THROUGHPUT = 1
++} zfs_logbias_op_t;
++
++typedef enum zfs_share_op {
++	ZFS_SHARE_NFS = 0,
++	ZFS_UNSHARE_NFS = 1,
++	ZFS_SHARE_SMB = 2,
++	ZFS_UNSHARE_SMB = 3
++} zfs_share_op_t;
++
++typedef enum zfs_smb_acl_op {
++	ZFS_SMB_ACL_ADD,
++	ZFS_SMB_ACL_REMOVE,
++	ZFS_SMB_ACL_RENAME,
++	ZFS_SMB_ACL_PURGE
++} zfs_smb_acl_op_t;
++
++typedef enum zfs_cache_type {
++	ZFS_CACHE_NONE = 0,
++	ZFS_CACHE_METADATA = 1,
++	ZFS_CACHE_ALL = 2
++} zfs_cache_type_t;
++
++typedef enum {
++	ZFS_SYNC_STANDARD = 0,
++	ZFS_SYNC_ALWAYS = 1,
++	ZFS_SYNC_DISABLED = 2
++} zfs_sync_type_t;
++
++typedef enum {
++	ZFS_XATTR_OFF = 0,
++	ZFS_XATTR_DIR = 1,
++	ZFS_XATTR_SA = 2
++} zfs_xattr_type_t;
++
++/*
++ * On-disk version number.
++ */
++#define	SPA_VERSION_1			1ULL
++#define	SPA_VERSION_2			2ULL
++#define	SPA_VERSION_3			3ULL
++#define	SPA_VERSION_4			4ULL
++#define	SPA_VERSION_5			5ULL
++#define	SPA_VERSION_6			6ULL
++#define	SPA_VERSION_7			7ULL
++#define	SPA_VERSION_8			8ULL
++#define	SPA_VERSION_9			9ULL
++#define	SPA_VERSION_10			10ULL
++#define	SPA_VERSION_11			11ULL
++#define	SPA_VERSION_12			12ULL
++#define	SPA_VERSION_13			13ULL
++#define	SPA_VERSION_14			14ULL
++#define	SPA_VERSION_15			15ULL
++#define	SPA_VERSION_16			16ULL
++#define	SPA_VERSION_17			17ULL
++#define	SPA_VERSION_18			18ULL
++#define	SPA_VERSION_19			19ULL
++#define	SPA_VERSION_20			20ULL
++#define	SPA_VERSION_21			21ULL
++#define	SPA_VERSION_22			22ULL
++#define	SPA_VERSION_23			23ULL
++#define	SPA_VERSION_24			24ULL
++#define	SPA_VERSION_25			25ULL
++#define	SPA_VERSION_26			26ULL
++#define	SPA_VERSION_27			27ULL
++#define	SPA_VERSION_28			28ULL
++
++/*
++ * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
++ * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
++ * and do the appropriate changes.  Also bump the version number in
++ * usr/src/grub/capability.
++ */
++#define	SPA_VERSION			SPA_VERSION_28
++#define	SPA_VERSION_STRING		"28"
++
++/*
++ * Symbolic names for the changes that caused a SPA_VERSION switch.
++ * Used in the code when checking for presence or absence of a feature.
++ * Feel free to define multiple symbolic names for each version if there
++ * were multiple changes to on-disk structures during that version.
++ *
++ * NOTE: When checking the current SPA_VERSION in your code, be sure
++ *       to use spa_version() since it reports the version of the
++ *       last synced uberblock.  Checking the in-flight version can
++ *       be dangerous in some cases.
++ */
++#define	SPA_VERSION_INITIAL		SPA_VERSION_1
++#define	SPA_VERSION_DITTO_BLOCKS	SPA_VERSION_2
++#define	SPA_VERSION_SPARES		SPA_VERSION_3
++#define	SPA_VERSION_RAIDZ2		SPA_VERSION_3
++#define	SPA_VERSION_BPOBJ_ACCOUNT	SPA_VERSION_3
++#define	SPA_VERSION_RAIDZ_DEFLATE	SPA_VERSION_3
++#define	SPA_VERSION_DNODE_BYTES		SPA_VERSION_3
++#define	SPA_VERSION_ZPOOL_HISTORY	SPA_VERSION_4
++#define	SPA_VERSION_GZIP_COMPRESSION	SPA_VERSION_5
++#define	SPA_VERSION_BOOTFS		SPA_VERSION_6
++#define	SPA_VERSION_SLOGS		SPA_VERSION_7
++#define	SPA_VERSION_DELEGATED_PERMS	SPA_VERSION_8
++#define	SPA_VERSION_FUID		SPA_VERSION_9
++#define	SPA_VERSION_REFRESERVATION	SPA_VERSION_9
++#define	SPA_VERSION_REFQUOTA		SPA_VERSION_9
++#define	SPA_VERSION_UNIQUE_ACCURATE	SPA_VERSION_9
++#define	SPA_VERSION_L2CACHE		SPA_VERSION_10
++#define	SPA_VERSION_NEXT_CLONES		SPA_VERSION_11
++#define	SPA_VERSION_ORIGIN		SPA_VERSION_11
++#define	SPA_VERSION_DSL_SCRUB		SPA_VERSION_11
++#define	SPA_VERSION_SNAP_PROPS		SPA_VERSION_12
++#define	SPA_VERSION_USED_BREAKDOWN	SPA_VERSION_13
++#define	SPA_VERSION_PASSTHROUGH_X	SPA_VERSION_14
++#define	SPA_VERSION_USERSPACE		SPA_VERSION_15
++#define	SPA_VERSION_STMF_PROP		SPA_VERSION_16
++#define	SPA_VERSION_RAIDZ3		SPA_VERSION_17
++#define	SPA_VERSION_USERREFS		SPA_VERSION_18
++#define	SPA_VERSION_HOLES		SPA_VERSION_19
++#define	SPA_VERSION_ZLE_COMPRESSION	SPA_VERSION_20
++#define	SPA_VERSION_DEDUP		SPA_VERSION_21
++#define	SPA_VERSION_RECVD_PROPS		SPA_VERSION_22
++#define	SPA_VERSION_SLIM_ZIL		SPA_VERSION_23
++#define	SPA_VERSION_SA			SPA_VERSION_24
++#define	SPA_VERSION_SCAN		SPA_VERSION_25
++#define	SPA_VERSION_DIR_CLONES		SPA_VERSION_26
++#define	SPA_VERSION_DEADLISTS		SPA_VERSION_26
++#define	SPA_VERSION_FAST_SNAP		SPA_VERSION_27
++#define	SPA_VERSION_MULTI_REPLACE	SPA_VERSION_28
++
++/*
++ * ZPL version - rev'd whenever an incompatible on-disk format change
++ * occurs.  This is independent of SPA/DMU/ZAP versioning.  You must
++ * also update the version_table[] and help message in zfs_prop.c.
++ *
++ * When changing, be sure to teach GRUB how to read the new format!
++ * See usr/src/grub/grub-0.97/stage2/{zfs-include/,fsys_zfs*}
++ */
++#define	ZPL_VERSION_1			1ULL
++#define	ZPL_VERSION_2			2ULL
++#define	ZPL_VERSION_3			3ULL
++#define	ZPL_VERSION_4			4ULL
++#define	ZPL_VERSION_5			5ULL
++#define	ZPL_VERSION			ZPL_VERSION_5
++#define	ZPL_VERSION_STRING		"5"
++
++#define	ZPL_VERSION_INITIAL		ZPL_VERSION_1
++#define	ZPL_VERSION_DIRENT_TYPE		ZPL_VERSION_2
++#define	ZPL_VERSION_FUID		ZPL_VERSION_3
++#define	ZPL_VERSION_NORMALIZATION	ZPL_VERSION_3
++#define	ZPL_VERSION_SYSATTR		ZPL_VERSION_3
++#define	ZPL_VERSION_USERSPACE		ZPL_VERSION_4
++#define	ZPL_VERSION_SA			ZPL_VERSION_5
++
++/* Rewind request information */
++#define	ZPOOL_NO_REWIND		1  /* No policy - default behavior */
++#define	ZPOOL_NEVER_REWIND	2  /* Do not search for best txg or rewind */
++#define	ZPOOL_TRY_REWIND	4  /* Search for best txg, but do not rewind */
++#define	ZPOOL_DO_REWIND		8  /* Rewind to best txg w/in deferred frees */
++#define	ZPOOL_EXTREME_REWIND	16 /* Allow extreme measures to find best txg */
++#define	ZPOOL_REWIND_MASK	28 /* All the possible rewind bits */
++#define	ZPOOL_REWIND_POLICIES	31 /* All the possible policy bits */
++
++typedef struct zpool_rewind_policy {
++	uint32_t	zrp_request;	/* rewind behavior requested */
++	uint64_t	zrp_maxmeta;	/* max acceptable meta-data errors */
++	uint64_t	zrp_maxdata;	/* max acceptable data errors */
++	uint64_t	zrp_txg;	/* specific txg to load */
++} zpool_rewind_policy_t;
++
++/*
++ * The following are configuration names used in the nvlist describing a pool's
++ * configuration.
++ */
++#define	ZPOOL_CONFIG_VERSION		"version"
++#define	ZPOOL_CONFIG_POOL_NAME		"name"
++#define	ZPOOL_CONFIG_POOL_STATE		"state"
++#define	ZPOOL_CONFIG_POOL_TXG		"txg"
++#define	ZPOOL_CONFIG_POOL_GUID		"pool_guid"
++#define	ZPOOL_CONFIG_CREATE_TXG		"create_txg"
++#define	ZPOOL_CONFIG_TOP_GUID		"top_guid"
++#define	ZPOOL_CONFIG_VDEV_TREE		"vdev_tree"
++#define	ZPOOL_CONFIG_TYPE		"type"
++#define	ZPOOL_CONFIG_CHILDREN		"children"
++#define	ZPOOL_CONFIG_ID			"id"
++#define	ZPOOL_CONFIG_GUID		"guid"
++#define	ZPOOL_CONFIG_PATH		"path"
++#define	ZPOOL_CONFIG_DEVID		"devid"
++#define	ZPOOL_CONFIG_METASLAB_ARRAY	"metaslab_array"
++#define	ZPOOL_CONFIG_METASLAB_SHIFT	"metaslab_shift"
++#define	ZPOOL_CONFIG_ASHIFT		"ashift"
++#define	ZPOOL_CONFIG_ASIZE		"asize"
++#define	ZPOOL_CONFIG_DTL		"DTL"
++#define	ZPOOL_CONFIG_SCAN_STATS		"scan_stats"	/* not stored on disk */
++#define	ZPOOL_CONFIG_VDEV_STATS		"vdev_stats"	/* not stored on disk */
++#define	ZPOOL_CONFIG_WHOLE_DISK		"whole_disk"
++#define	ZPOOL_CONFIG_ERRCOUNT		"error_count"
++#define	ZPOOL_CONFIG_NOT_PRESENT	"not_present"
++#define	ZPOOL_CONFIG_SPARES		"spares"
++#define	ZPOOL_CONFIG_IS_SPARE		"is_spare"
++#define	ZPOOL_CONFIG_NPARITY		"nparity"
++#define	ZPOOL_CONFIG_HOSTID		"hostid"
++#define	ZPOOL_CONFIG_HOSTNAME		"hostname"
++#define	ZPOOL_CONFIG_LOADED_TIME	"initial_load_time"
++#define	ZPOOL_CONFIG_UNSPARE		"unspare"
++#define	ZPOOL_CONFIG_PHYS_PATH		"phys_path"
++#define	ZPOOL_CONFIG_IS_LOG		"is_log"
++#define	ZPOOL_CONFIG_L2CACHE		"l2cache"
++#define	ZPOOL_CONFIG_HOLE_ARRAY		"hole_array"
++#define	ZPOOL_CONFIG_VDEV_CHILDREN	"vdev_children"
++#define	ZPOOL_CONFIG_IS_HOLE		"is_hole"
++#define	ZPOOL_CONFIG_DDT_HISTOGRAM	"ddt_histogram"
++#define	ZPOOL_CONFIG_DDT_OBJ_STATS	"ddt_object_stats"
++#define	ZPOOL_CONFIG_DDT_STATS		"ddt_stats"
++#define	ZPOOL_CONFIG_SPLIT		"splitcfg"
++#define	ZPOOL_CONFIG_ORIG_GUID		"orig_guid"
++#define	ZPOOL_CONFIG_SPLIT_GUID		"split_guid"
++#define	ZPOOL_CONFIG_SPLIT_LIST		"guid_list"
++#define	ZPOOL_CONFIG_REMOVING		"removing"
++#define	ZPOOL_CONFIG_RESILVERING	"resilvering"
++#define	ZPOOL_CONFIG_COMMENT		"comment"
++#define	ZPOOL_CONFIG_SUSPENDED		"suspended"	/* not stored on disk */
++#define	ZPOOL_CONFIG_TIMESTAMP		"timestamp"	/* not stored on disk */
++#define	ZPOOL_CONFIG_BOOTFS		"bootfs"	/* not stored on disk */
++#define	ZPOOL_CONFIG_MISSING_DEVICES	"missing_vdevs"	/* not stored on disk */
++#define	ZPOOL_CONFIG_LOAD_INFO		"load_info"	/* not stored on disk */
++/*
++ * The persistent vdev state is stored as separate values rather than a single
++ * 'vdev_state' entry.  This is because a device can be in multiple states, such
++ * as offline and degraded.
++ */
++#define	ZPOOL_CONFIG_OFFLINE		"offline"
++#define	ZPOOL_CONFIG_FAULTED		"faulted"
++#define	ZPOOL_CONFIG_DEGRADED		"degraded"
++#define	ZPOOL_CONFIG_REMOVED		"removed"
++#define	ZPOOL_CONFIG_FRU		"fru"
++#define	ZPOOL_CONFIG_AUX_STATE		"aux_state"
++
++/* Rewind policy parameters */
++#define	ZPOOL_REWIND_POLICY		"rewind-policy"
++#define	ZPOOL_REWIND_REQUEST		"rewind-request"
++#define	ZPOOL_REWIND_REQUEST_TXG	"rewind-request-txg"
++#define	ZPOOL_REWIND_META_THRESH	"rewind-meta-thresh"
++#define	ZPOOL_REWIND_DATA_THRESH	"rewind-data-thresh"
++
++/* Rewind data discovered */
++#define	ZPOOL_CONFIG_LOAD_TIME		"rewind_txg_ts"
++#define	ZPOOL_CONFIG_LOAD_DATA_ERRORS	"verify_data_errors"
++#define	ZPOOL_CONFIG_REWIND_TIME	"seconds_of_rewind"
++
++#define	VDEV_TYPE_ROOT			"root"
++#define	VDEV_TYPE_MIRROR		"mirror"
++#define	VDEV_TYPE_REPLACING		"replacing"
++#define	VDEV_TYPE_RAIDZ			"raidz"
++#define	VDEV_TYPE_DISK			"disk"
++#define	VDEV_TYPE_FILE			"file"
++#define	VDEV_TYPE_MISSING		"missing"
++#define	VDEV_TYPE_HOLE			"hole"
++#define	VDEV_TYPE_SPARE			"spare"
++#define	VDEV_TYPE_LOG			"log"
++#define	VDEV_TYPE_L2CACHE		"l2cache"
++
++/*
++ * This is needed in userland to report the minimum necessary device size.
++ */
++#define	SPA_MINDEVSIZE		(64ULL << 20)
++
++/*
++ * The location of the pool configuration repository, shared between kernel and
++ * userland.
++ */
++#define	ZPOOL_CACHE		"/etc/zfs/zpool.cache"
++
++/*
++ * vdev states are ordered from least to most healthy.
++ * A vdev that's CANT_OPEN or below is considered unusable.
++ */
++typedef enum vdev_state {
++	VDEV_STATE_UNKNOWN = 0,	/* Uninitialized vdev			*/
++	VDEV_STATE_CLOSED,	/* Not currently open			*/
++	VDEV_STATE_OFFLINE,	/* Not allowed to open			*/
++	VDEV_STATE_REMOVED,	/* Explicitly removed from system	*/
++	VDEV_STATE_CANT_OPEN,	/* Tried to open, but failed		*/
++	VDEV_STATE_FAULTED,	/* External request to fault device	*/
++	VDEV_STATE_DEGRADED,	/* Replicated vdev with unhealthy kids	*/
++	VDEV_STATE_HEALTHY	/* Presumed good			*/
++} vdev_state_t;
++
++#define	VDEV_STATE_ONLINE	VDEV_STATE_HEALTHY
++
++/*
++ * vdev aux states.  When a vdev is in the CANT_OPEN state, the aux field
++ * of the vdev stats structure uses these constants to distinguish why.
++ */
++typedef enum vdev_aux {
++	VDEV_AUX_NONE,		/* no error				*/
++	VDEV_AUX_OPEN_FAILED,	/* ldi_open_*() or vn_open() failed	*/
++	VDEV_AUX_CORRUPT_DATA,	/* bad label or disk contents		*/
++	VDEV_AUX_NO_REPLICAS,	/* insufficient number of replicas	*/
++	VDEV_AUX_BAD_GUID_SUM,	/* vdev guid sum doesn't match		*/
++	VDEV_AUX_TOO_SMALL,	/* vdev size is too small		*/
++	VDEV_AUX_BAD_LABEL,	/* the label is OK but invalid		*/
++	VDEV_AUX_VERSION_NEWER,	/* on-disk version is too new		*/
++	VDEV_AUX_VERSION_OLDER,	/* on-disk version is too old		*/
++	VDEV_AUX_SPARED,	/* hot spare used in another pool	*/
++	VDEV_AUX_ERR_EXCEEDED,	/* too many errors			*/
++	VDEV_AUX_IO_FAILURE,	/* experienced I/O failure		*/
++	VDEV_AUX_BAD_LOG,	/* cannot read log chain(s)		*/
++	VDEV_AUX_EXTERNAL,	/* external diagnosis			*/
++	VDEV_AUX_SPLIT_POOL	/* vdev was split off into another pool	*/
++} vdev_aux_t;
++
++/*
++ * pool state.  The following states are written to disk as part of the normal
++ * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE, L2CACHE.  The remaining
++ * states are software abstractions used at various levels to communicate
++ * pool state.
++ */
++typedef enum pool_state {
++	POOL_STATE_ACTIVE = 0,		/* In active use		*/
++	POOL_STATE_EXPORTED,		/* Explicitly exported		*/
++	POOL_STATE_DESTROYED,		/* Explicitly destroyed		*/
++	POOL_STATE_SPARE,		/* Reserved for hot spare use	*/
++	POOL_STATE_L2CACHE,		/* Level 2 ARC device		*/
++	POOL_STATE_UNINITIALIZED,	/* Internal spa_t state		*/
++	POOL_STATE_UNAVAIL,		/* Internal libzfs state	*/
++	POOL_STATE_POTENTIALLY_ACTIVE	/* Internal libzfs state	*/
++} pool_state_t;
++
++/*
++ * Scan Functions.
++ */
++typedef enum pool_scan_func {
++	POOL_SCAN_NONE,
++	POOL_SCAN_SCRUB,
++	POOL_SCAN_RESILVER,
++	POOL_SCAN_FUNCS
++} pool_scan_func_t;
++
++/*
++ * ZIO types.  Needed to interpret vdev statistics below.
++ */
++typedef enum zio_type {
++	ZIO_TYPE_NULL = 0,
++	ZIO_TYPE_READ,
++	ZIO_TYPE_WRITE,
++	ZIO_TYPE_FREE,
++	ZIO_TYPE_CLAIM,
++	ZIO_TYPE_IOCTL,
++	ZIO_TYPES
++} zio_type_t;
++
++/*
++ * Pool statistics.  Note: all fields should be 64-bit because this
++ * is passed between kernel and userland as an nvlist uint64 array.
++ */
++typedef struct pool_scan_stat {
++	/* values stored on disk */
++	uint64_t	pss_func;	/* pool_scan_func_t */
++	uint64_t	pss_state;	/* dsl_scan_state_t */
++	uint64_t	pss_start_time;	/* scan start time */
++	uint64_t	pss_end_time;	/* scan end time */
++	uint64_t	pss_to_examine;	/* total bytes to scan */
++	uint64_t	pss_examined;	/* total examined bytes	*/
++	uint64_t	pss_to_process; /* total bytes to process */
++	uint64_t	pss_processed;	/* total processed bytes */
++	uint64_t	pss_errors;	/* scan errors	*/
++
++	/* values not stored on disk */
++	uint64_t	pss_pass_exam;	/* examined bytes per scan pass */
++	uint64_t	pss_pass_start;	/* start time of a scan pass */
++} pool_scan_stat_t;
++
++typedef enum dsl_scan_state {
++	DSS_NONE,
++	DSS_SCANNING,
++	DSS_FINISHED,
++	DSS_CANCELED,
++	DSS_NUM_STATES
++} dsl_scan_state_t;
++
++
++/*
++ * Vdev statistics.  Note: all fields should be 64-bit because this
++ * is passed between kernel and userland as an nvlist uint64 array.
++ */
++typedef struct vdev_stat {
++	hrtime_t	vs_timestamp;		/* time since vdev load	*/
++	uint64_t	vs_state;		/* vdev state		*/
++	uint64_t	vs_aux;			/* see vdev_aux_t	*/
++	uint64_t	vs_alloc;		/* space allocated	*/
++	uint64_t	vs_space;		/* total capacity	*/
++	uint64_t	vs_dspace;		/* deflated capacity	*/
++	uint64_t	vs_rsize;		/* replaceable dev size */
++	uint64_t	vs_esize;		/* expandable dev size */
++	uint64_t	vs_ops[ZIO_TYPES];	/* operation count	*/
++	uint64_t	vs_bytes[ZIO_TYPES];	/* bytes read/written	*/
++	uint64_t	vs_read_errors;		/* read errors		*/
++	uint64_t	vs_write_errors;	/* write errors		*/
++	uint64_t	vs_checksum_errors;	/* checksum errors	*/
++	uint64_t	vs_self_healed;		/* self-healed bytes	*/
++	uint64_t	vs_scan_removing;	/* removing?	*/
++	uint64_t	vs_scan_processed;	/* scan processed bytes	*/
++} vdev_stat_t;
++
++/*
++ * DDT statistics.  Note: all fields should be 64-bit because this
++ * is passed between kernel and userland as an nvlist uint64 array.
++ */
++typedef struct ddt_object {
++	uint64_t	ddo_count;	/* number of elments in ddt 	*/
++	uint64_t	ddo_dspace;	/* size of ddt on disk		*/
++	uint64_t	ddo_mspace;	/* size of ddt in-core		*/
++} ddt_object_t;
++
++typedef struct ddt_stat {
++	uint64_t	dds_blocks;	/* blocks			*/
++	uint64_t	dds_lsize;	/* logical size			*/
++	uint64_t	dds_psize;	/* physical size		*/
++	uint64_t	dds_dsize;	/* deflated allocated size	*/
++	uint64_t	dds_ref_blocks;	/* referenced blocks		*/
++	uint64_t	dds_ref_lsize;	/* referenced lsize * refcnt	*/
++	uint64_t	dds_ref_psize;	/* referenced psize * refcnt	*/
++	uint64_t	dds_ref_dsize;	/* referenced dsize * refcnt	*/
++} ddt_stat_t;
++
++typedef struct ddt_histogram {
++	ddt_stat_t	ddh_stat[64];	/* power-of-two histogram buckets */
++} ddt_histogram_t;
++
++#define	ZVOL_DRIVER	"zvol"
++#define	ZFS_DRIVER	"zfs"
++#define	ZFS_DEV		"/dev/zfs"
++
++/* general zvol path */
++#define	ZVOL_DIR	"/dev"
++
++#define	ZVOL_MAJOR		230
++#define	ZVOL_MINOR_BITS		4
++#define	ZVOL_MINOR_MASK		((1U << ZVOL_MINOR_BITS) - 1)
++#define	ZVOL_MINORS		(1 << 4)
++#define	ZVOL_DEV_NAME		"zd"
++
++#define	ZVOL_PROP_NAME		"name"
++#define	ZVOL_DEFAULT_BLOCKSIZE	8192
++
++/*
++ * /dev/zfs ioctl numbers.
++ */
++#define	ZFS_IOC		('Z' << 8)
++
++typedef enum zfs_ioc {
++	ZFS_IOC_POOL_CREATE = ZFS_IOC,
++	ZFS_IOC_POOL_DESTROY,
++	ZFS_IOC_POOL_IMPORT,
++	ZFS_IOC_POOL_EXPORT,
++	ZFS_IOC_POOL_CONFIGS,
++	ZFS_IOC_POOL_STATS,
++	ZFS_IOC_POOL_TRYIMPORT,
++	ZFS_IOC_POOL_SCAN,
++	ZFS_IOC_POOL_FREEZE,
++	ZFS_IOC_POOL_UPGRADE,
++	ZFS_IOC_POOL_GET_HISTORY,
++	ZFS_IOC_VDEV_ADD,
++	ZFS_IOC_VDEV_REMOVE,
++	ZFS_IOC_VDEV_SET_STATE,
++	ZFS_IOC_VDEV_ATTACH,
++	ZFS_IOC_VDEV_DETACH,
++	ZFS_IOC_VDEV_SETPATH,
++	ZFS_IOC_VDEV_SETFRU,
++	ZFS_IOC_OBJSET_STATS,
++	ZFS_IOC_OBJSET_ZPLPROPS,
++	ZFS_IOC_DATASET_LIST_NEXT,
++	ZFS_IOC_SNAPSHOT_LIST_NEXT,
++	ZFS_IOC_SET_PROP,
++	ZFS_IOC_CREATE_MINOR,
++	ZFS_IOC_REMOVE_MINOR,
++	ZFS_IOC_CREATE,
++	ZFS_IOC_DESTROY,
++	ZFS_IOC_ROLLBACK,
++	ZFS_IOC_RENAME,
++	ZFS_IOC_RECV,
++	ZFS_IOC_SEND,
++	ZFS_IOC_INJECT_FAULT,
++	ZFS_IOC_CLEAR_FAULT,
++	ZFS_IOC_INJECT_LIST_NEXT,
++	ZFS_IOC_ERROR_LOG,
++	ZFS_IOC_CLEAR,
++	ZFS_IOC_PROMOTE,
++	ZFS_IOC_DESTROY_SNAPS_NVL,
++	ZFS_IOC_SNAPSHOT,
++	ZFS_IOC_DSOBJ_TO_DSNAME,
++	ZFS_IOC_OBJ_TO_PATH,
++	ZFS_IOC_POOL_SET_PROPS,
++	ZFS_IOC_POOL_GET_PROPS,
++	ZFS_IOC_SET_FSACL,
++	ZFS_IOC_GET_FSACL,
++	ZFS_IOC_SHARE,
++	ZFS_IOC_INHERIT_PROP,
++	ZFS_IOC_SMB_ACL,
++	ZFS_IOC_USERSPACE_ONE,
++	ZFS_IOC_USERSPACE_MANY,
++	ZFS_IOC_USERSPACE_UPGRADE,
++	ZFS_IOC_HOLD,
++	ZFS_IOC_RELEASE,
++	ZFS_IOC_GET_HOLDS,
++	ZFS_IOC_OBJSET_RECVD_PROPS,
++	ZFS_IOC_VDEV_SPLIT,
++	ZFS_IOC_NEXT_OBJ,
++	ZFS_IOC_DIFF,
++	ZFS_IOC_TMP_SNAPSHOT,
++	ZFS_IOC_OBJ_TO_STATS,
++	ZFS_IOC_EVENTS_NEXT,
++	ZFS_IOC_EVENTS_CLEAR,
++	ZFS_IOC_POOL_REGUID,
++	ZFS_IOC_SPACE_WRITTEN,
++	ZFS_IOC_SPACE_SNAPS,
++	ZFS_IOC_POOL_REOPEN,
++	ZFS_IOC_SEND_PROGRESS,
++} zfs_ioc_t;
++
++/*
++ * zvol ioctl to get dataset name
++ */
++#define BLKZNAME		_IOR(0x12,125,char[ZFS_MAXNAMELEN])
++
++/*
++ * Internal SPA load state.  Used by FMA diagnosis engine.
++ */
++typedef enum {
++	SPA_LOAD_NONE,		/* no load in progress	*/
++	SPA_LOAD_OPEN,		/* normal open		*/
++	SPA_LOAD_IMPORT,	/* import in progress	*/
++	SPA_LOAD_TRYIMPORT,	/* tryimport in progress */
++	SPA_LOAD_RECOVER,	/* recovery requested	*/
++	SPA_LOAD_ERROR		/* load failed		*/
++} spa_load_state_t;
++
++/*
++ * Bookmark name values.
++ */
++#define	ZPOOL_ERR_LIST		"error list"
++#define	ZPOOL_ERR_DATASET	"dataset"
++#define	ZPOOL_ERR_OBJECT	"object"
++
++#define	HIS_MAX_RECORD_LEN	(MAXPATHLEN + MAXPATHLEN + 1)
++
++/*
++ * The following are names used in the nvlist describing
++ * the pool's history log.
++ */
++#define	ZPOOL_HIST_RECORD	"history record"
++#define	ZPOOL_HIST_TIME		"history time"
++#define	ZPOOL_HIST_CMD		"history command"
++#define	ZPOOL_HIST_WHO		"history who"
++#define	ZPOOL_HIST_ZONE		"history zone"
++#define	ZPOOL_HIST_HOST		"history hostname"
++#define	ZPOOL_HIST_TXG		"history txg"
++#define	ZPOOL_HIST_INT_EVENT	"history internal event"
++#define	ZPOOL_HIST_INT_STR	"history internal str"
++
++/*
++ * Flags for ZFS_IOC_VDEV_SET_STATE
++ */
++#define	ZFS_ONLINE_CHECKREMOVE	0x1
++#define	ZFS_ONLINE_UNSPARE	0x2
++#define	ZFS_ONLINE_FORCEFAULT	0x4
++#define	ZFS_ONLINE_EXPAND	0x8
++#define	ZFS_OFFLINE_TEMPORARY	0x1
++
++/*
++ * Flags for ZFS_IOC_POOL_IMPORT
++ */
++#define	ZFS_IMPORT_NORMAL	0x0
++#define	ZFS_IMPORT_VERBATIM	0x1
++#define	ZFS_IMPORT_ANY_HOST	0x2
++#define	ZFS_IMPORT_MISSING_LOG	0x4
++#define	ZFS_IMPORT_ONLY		0x8
++
++/*
++ * Sysevent payload members.  ZFS will generate the following sysevents with the
++ * given payloads:
++ *
++ *	ESC_ZFS_RESILVER_START
++ *	ESC_ZFS_RESILVER_END
++ *	ESC_ZFS_POOL_DESTROY
++ *	ESC_ZFS_POOL_REGUID
++ *
++ *		ZFS_EV_POOL_NAME	DATA_TYPE_STRING
++ *		ZFS_EV_POOL_GUID	DATA_TYPE_UINT64
++ *
++ *	ESC_ZFS_VDEV_REMOVE
++ *	ESC_ZFS_VDEV_CLEAR
++ *	ESC_ZFS_VDEV_CHECK
++ *
++ *		ZFS_EV_POOL_NAME	DATA_TYPE_STRING
++ *		ZFS_EV_POOL_GUID	DATA_TYPE_UINT64
++ *		ZFS_EV_VDEV_PATH	DATA_TYPE_STRING	(optional)
++ *		ZFS_EV_VDEV_GUID	DATA_TYPE_UINT64
++ */
++#define	ZFS_EV_POOL_NAME	"pool_name"
++#define	ZFS_EV_POOL_GUID	"pool_guid"
++#define	ZFS_EV_VDEV_PATH	"vdev_path"
++#define	ZFS_EV_VDEV_GUID	"vdev_guid"
++
++/*
++ * Note: This is encoded on-disk, so new events must be added to the
++ * end, and unused events can not be removed.  Be sure to edit
++ * libzfs_pool.c: hist_event_table[].
++ */
++typedef enum history_internal_events {
++	LOG_NO_EVENT = 0,
++	LOG_POOL_CREATE,
++	LOG_POOL_VDEV_ADD,
++	LOG_POOL_REMOVE,
++	LOG_POOL_DESTROY,
++	LOG_POOL_EXPORT,
++	LOG_POOL_IMPORT,
++	LOG_POOL_VDEV_ATTACH,
++	LOG_POOL_VDEV_REPLACE,
++	LOG_POOL_VDEV_DETACH,
++	LOG_POOL_VDEV_ONLINE,
++	LOG_POOL_VDEV_OFFLINE,
++	LOG_POOL_UPGRADE,
++	LOG_POOL_CLEAR,
++	LOG_POOL_SCAN,
++	LOG_POOL_PROPSET,
++	LOG_DS_CREATE,
++	LOG_DS_CLONE,
++	LOG_DS_DESTROY,
++	LOG_DS_DESTROY_BEGIN,
++	LOG_DS_INHERIT,
++	LOG_DS_PROPSET,
++	LOG_DS_QUOTA,
++	LOG_DS_PERM_UPDATE,
++	LOG_DS_PERM_REMOVE,
++	LOG_DS_PERM_WHO_REMOVE,
++	LOG_DS_PROMOTE,
++	LOG_DS_RECEIVE,
++	LOG_DS_RENAME,
++	LOG_DS_RESERVATION,
++	LOG_DS_REPLAY_INC_SYNC,
++	LOG_DS_REPLAY_FULL_SYNC,
++	LOG_DS_ROLLBACK,
++	LOG_DS_SNAPSHOT,
++	LOG_DS_UPGRADE,
++	LOG_DS_REFQUOTA,
++	LOG_DS_REFRESERV,
++	LOG_POOL_SCAN_DONE,
++	LOG_DS_USER_HOLD,
++	LOG_DS_USER_RELEASE,
++	LOG_POOL_SPLIT,
++	LOG_END
++} history_internal_events_t;
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_FS_ZFS_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/Makefile linux-3.2.33-go/include/zfs/sys/Makefile
+--- linux-3.2.33-go.orig/include/zfs/sys/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/Makefile	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,1029 @@
++# Makefile.in generated by automake 1.11.6 from Makefile.am.
++# include/sys/Makefile.  Generated from Makefile.in by configure.
++
++# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
++# Foundation, Inc.
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++
++
++
++am__make_dryrun = \
++  { \
++    am__dry=no; \
++    case $$MAKEFLAGS in \
++      *\\[\ \	]*) \
++        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
++          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
++      *) \
++        for am__flg in $$MAKEFLAGS; do \
++          case $$am__flg in \
++            *=*|--*) ;; \
++            *n*) am__dry=yes; break;; \
++          esac; \
++        done;; \
++    esac; \
++    test $$am__dry = yes; \
++  }
++pkgdatadir = $(datadir)/zfs
++pkgincludedir = $(includedir)/zfs
++pkglibdir = $(libdir)/zfs
++pkglibexecdir = $(libexecdir)/zfs
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = x86_64-unknown-linux-gnu
++host_triplet = x86_64-unknown-linux-gnu
++target_triplet = x86_64-unknown-linux-gnu
++subdir = include/sys
++DIST_COMMON = $(am__kernel_HEADERS_DIST) $(am__libzfs_HEADERS_DIST) \
++	$(srcdir)/Makefile.am $(srcdir)/Makefile.in
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps =  \
++	$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
++	$(top_srcdir)/config/kernel-automount.m4 \
++	$(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \
++	$(top_srcdir)/config/kernel-bdev-logical-size.m4 \
++	$(top_srcdir)/config/kernel-bdi-setup-and-register.m4 \
++	$(top_srcdir)/config/kernel-bdi.m4 \
++	$(top_srcdir)/config/kernel-bio-empty-barrier.m4 \
++	$(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \
++	$(top_srcdir)/config/kernel-bio-failfast.m4 \
++	$(top_srcdir)/config/kernel-bio-rw-syncio.m4 \
++	$(top_srcdir)/config/kernel-blk-end-request.m4 \
++	$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-discard.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
++	$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-sectors.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get.m4 \
++	$(top_srcdir)/config/kernel-check-disk-size-change.m4 \
++	$(top_srcdir)/config/kernel-clear-inode.m4 \
++	$(top_srcdir)/config/kernel-commit-metadata.m4 \
++	$(top_srcdir)/config/kernel-create-nameidata.m4 \
++	$(top_srcdir)/config/kernel-d-make-root.m4 \
++	$(top_srcdir)/config/kernel-d-obtain-alias.m4 \
++	$(top_srcdir)/config/kernel-discard-granularity.m4 \
++	$(top_srcdir)/config/kernel-elevator-change.m4 \
++	$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
++	$(top_srcdir)/config/kernel-evict-inode.m4 \
++	$(top_srcdir)/config/kernel-fallocate.m4 \
++	$(top_srcdir)/config/kernel-fmode-t.m4 \
++	$(top_srcdir)/config/kernel-fsync.m4 \
++	$(top_srcdir)/config/kernel-get-disk-ro.m4 \
++	$(top_srcdir)/config/kernel-get-gendisk.m4 \
++	$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
++	$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
++	$(top_srcdir)/config/kernel-kobj-name-len.m4 \
++	$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
++	$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
++	$(top_srcdir)/config/kernel-mount-nodev.m4 \
++	$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
++	$(top_srcdir)/config/kernel-rq-for-each_segment.m4 \
++	$(top_srcdir)/config/kernel-rq-is_sync.m4 \
++	$(top_srcdir)/config/kernel-security-inode-init.m4 \
++	$(top_srcdir)/config/kernel-set-nlink.m4 \
++	$(top_srcdir)/config/kernel-sget-args.m4 \
++	$(top_srcdir)/config/kernel-show-options.m4 \
++	$(top_srcdir)/config/kernel-shrink.m4 \
++	$(top_srcdir)/config/kernel-truncate-range.m4 \
++	$(top_srcdir)/config/kernel-truncate-setsize.m4 \
++	$(top_srcdir)/config/kernel-xattr-handler.m4 \
++	$(top_srcdir)/config/kernel.m4 \
++	$(top_srcdir)/config/user-arch.m4 \
++	$(top_srcdir)/config/user-frame-larger-than.m4 \
++	$(top_srcdir)/config/user-ioctl.m4 \
++	$(top_srcdir)/config/user-libblkid.m4 \
++	$(top_srcdir)/config/user-libuuid.m4 \
++	$(top_srcdir)/config/user-nptl_guard_within_stack.m4 \
++	$(top_srcdir)/config/user-selinux.m4 \
++	$(top_srcdir)/config/user-udev.m4 \
++	$(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \
++	$(top_srcdir)/config/zfs-build.m4 \
++	$(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++mkinstalldirs = $(install_sh) -d
++CONFIG_HEADER = $(top_builddir)/zfs_config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++AM_V_GEN = $(am__v_GEN_$(V))
++am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY))
++am__v_GEN_0 = @echo "  GEN   " $@;
++AM_V_at = $(am__v_at_$(V))
++am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY))
++am__v_at_0 = @
++SOURCES =
++DIST_SOURCES =
++RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
++	html-recursive info-recursive install-data-recursive \
++	install-dvi-recursive install-exec-recursive \
++	install-html-recursive install-info-recursive \
++	install-pdf-recursive install-ps-recursive install-recursive \
++	installcheck-recursive installdirs-recursive pdf-recursive \
++	ps-recursive uninstall-recursive
++am__can_run_installinfo = \
++  case $$AM_UPDATE_INFO_DIR in \
++    n|no|NO) false;; \
++    *) (install-info --version) >/dev/null 2>&1;; \
++  esac
++am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
++	$(top_srcdir)/include/sys/avl.h \
++	$(top_srcdir)/include/sys/avl_impl.h \
++	$(top_srcdir)/include/sys/bplist.h \
++	$(top_srcdir)/include/sys/bpobj.h \
++	$(top_srcdir)/include/sys/dbuf.h \
++	$(top_srcdir)/include/sys/ddt.h \
++	$(top_srcdir)/include/sys/dmu.h \
++	$(top_srcdir)/include/sys/dmu_impl.h \
++	$(top_srcdir)/include/sys/dmu_objset.h \
++	$(top_srcdir)/include/sys/dmu_traverse.h \
++	$(top_srcdir)/include/sys/dmu_tx.h \
++	$(top_srcdir)/include/sys/dmu_zfetch.h \
++	$(top_srcdir)/include/sys/dnode.h \
++	$(top_srcdir)/include/sys/dsl_dataset.h \
++	$(top_srcdir)/include/sys/dsl_deadlist.h \
++	$(top_srcdir)/include/sys/dsl_deleg.h \
++	$(top_srcdir)/include/sys/dsl_dir.h \
++	$(top_srcdir)/include/sys/dsl_pool.h \
++	$(top_srcdir)/include/sys/dsl_prop.h \
++	$(top_srcdir)/include/sys/dsl_scan.h \
++	$(top_srcdir)/include/sys/dsl_synctask.h \
++	$(top_srcdir)/include/sys/efi_partition.h \
++	$(top_srcdir)/include/sys/metaslab.h \
++	$(top_srcdir)/include/sys/metaslab_impl.h \
++	$(top_srcdir)/include/sys/nvpair.h \
++	$(top_srcdir)/include/sys/nvpair_impl.h \
++	$(top_srcdir)/include/sys/refcount.h \
++	$(top_srcdir)/include/sys/rrwlock.h \
++	$(top_srcdir)/include/sys/sa.h \
++	$(top_srcdir)/include/sys/sa_impl.h \
++	$(top_srcdir)/include/sys/spa_boot.h \
++	$(top_srcdir)/include/sys/space_map.h \
++	$(top_srcdir)/include/sys/spa.h \
++	$(top_srcdir)/include/sys/spa_impl.h \
++	$(top_srcdir)/include/sys/txg.h \
++	$(top_srcdir)/include/sys/txg_impl.h \
++	$(top_srcdir)/include/sys/u8_textprep_data.h \
++	$(top_srcdir)/include/sys/u8_textprep.h \
++	$(top_srcdir)/include/sys/uberblock.h \
++	$(top_srcdir)/include/sys/uberblock_impl.h \
++	$(top_srcdir)/include/sys/uio_impl.h \
++	$(top_srcdir)/include/sys/unique.h \
++	$(top_srcdir)/include/sys/uuid.h \
++	$(top_srcdir)/include/sys/vdev_disk.h \
++	$(top_srcdir)/include/sys/vdev_file.h \
++	$(top_srcdir)/include/sys/vdev.h \
++	$(top_srcdir)/include/sys/vdev_impl.h \
++	$(top_srcdir)/include/sys/xvattr.h \
++	$(top_srcdir)/include/sys/zap.h \
++	$(top_srcdir)/include/sys/zap_impl.h \
++	$(top_srcdir)/include/sys/zap_leaf.h \
++	$(top_srcdir)/include/sys/zfs_acl.h \
++	$(top_srcdir)/include/sys/zfs_context.h \
++	$(top_srcdir)/include/sys/zfs_ctldir.h \
++	$(top_srcdir)/include/sys/zfs_debug.h \
++	$(top_srcdir)/include/sys/zfs_dir.h \
++	$(top_srcdir)/include/sys/zfs_fuid.h \
++	$(top_srcdir)/include/sys/zfs_rlock.h \
++	$(top_srcdir)/include/sys/zfs_sa.h \
++	$(top_srcdir)/include/sys/zfs_stat.h \
++	$(top_srcdir)/include/sys/zfs_vfsops.h \
++	$(top_srcdir)/include/sys/zfs_znode.h \
++	$(top_srcdir)/include/sys/zfs_vnops.h \
++	$(top_srcdir)/include/sys/zil.h \
++	$(top_srcdir)/include/sys/zil_impl.h \
++	$(top_srcdir)/include/sys/zio_checksum.h \
++	$(top_srcdir)/include/sys/zio_compress.h \
++	$(top_srcdir)/include/sys/zio.h \
++	$(top_srcdir)/include/sys/zio_impl.h \
++	$(top_srcdir)/include/sys/zrlock.h \
++	$(top_srcdir)/include/sys/zfs_ioctl.h \
++	$(top_srcdir)/include/sys/zfs_onexit.h \
++	${top_srcdir}/include/sys/zpl.h \
++	$(top_srcdir)/include/sys/zvol.h
++am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
++am__vpath_adj = case $$p in \
++    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
++    *) f=$$p;; \
++  esac;
++am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
++am__install_max = 40
++am__nobase_strip_setup = \
++  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
++am__nobase_strip = \
++  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
++am__nobase_list = $(am__nobase_strip_setup); \
++  for p in $$list; do echo "$$p $$p"; done | \
++  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
++  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
++    if (++n[$$2] == $(am__install_max)) \
++      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
++    END { for (dir in files) print dir, files[dir] }'
++am__base_list = \
++  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
++  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
++am__uninstall_files_from_dir = { \
++  test -z "$$files" \
++    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
++    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
++         $(am__cd) "$$dir" && rm -f $$files; }; \
++  }
++am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
++am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
++	$(top_srcdir)/include/sys/avl.h \
++	$(top_srcdir)/include/sys/avl_impl.h \
++	$(top_srcdir)/include/sys/bplist.h \
++	$(top_srcdir)/include/sys/bpobj.h \
++	$(top_srcdir)/include/sys/dbuf.h \
++	$(top_srcdir)/include/sys/ddt.h \
++	$(top_srcdir)/include/sys/dmu.h \
++	$(top_srcdir)/include/sys/dmu_impl.h \
++	$(top_srcdir)/include/sys/dmu_objset.h \
++	$(top_srcdir)/include/sys/dmu_traverse.h \
++	$(top_srcdir)/include/sys/dmu_tx.h \
++	$(top_srcdir)/include/sys/dmu_zfetch.h \
++	$(top_srcdir)/include/sys/dnode.h \
++	$(top_srcdir)/include/sys/dsl_dataset.h \
++	$(top_srcdir)/include/sys/dsl_deadlist.h \
++	$(top_srcdir)/include/sys/dsl_deleg.h \
++	$(top_srcdir)/include/sys/dsl_dir.h \
++	$(top_srcdir)/include/sys/dsl_pool.h \
++	$(top_srcdir)/include/sys/dsl_prop.h \
++	$(top_srcdir)/include/sys/dsl_scan.h \
++	$(top_srcdir)/include/sys/dsl_synctask.h \
++	$(top_srcdir)/include/sys/efi_partition.h \
++	$(top_srcdir)/include/sys/metaslab.h \
++	$(top_srcdir)/include/sys/metaslab_impl.h \
++	$(top_srcdir)/include/sys/nvpair.h \
++	$(top_srcdir)/include/sys/nvpair_impl.h \
++	$(top_srcdir)/include/sys/refcount.h \
++	$(top_srcdir)/include/sys/rrwlock.h \
++	$(top_srcdir)/include/sys/sa.h \
++	$(top_srcdir)/include/sys/sa_impl.h \
++	$(top_srcdir)/include/sys/spa_boot.h \
++	$(top_srcdir)/include/sys/space_map.h \
++	$(top_srcdir)/include/sys/spa.h \
++	$(top_srcdir)/include/sys/spa_impl.h \
++	$(top_srcdir)/include/sys/txg.h \
++	$(top_srcdir)/include/sys/txg_impl.h \
++	$(top_srcdir)/include/sys/u8_textprep_data.h \
++	$(top_srcdir)/include/sys/u8_textprep.h \
++	$(top_srcdir)/include/sys/uberblock.h \
++	$(top_srcdir)/include/sys/uberblock_impl.h \
++	$(top_srcdir)/include/sys/uio_impl.h \
++	$(top_srcdir)/include/sys/unique.h \
++	$(top_srcdir)/include/sys/uuid.h \
++	$(top_srcdir)/include/sys/vdev_disk.h \
++	$(top_srcdir)/include/sys/vdev_file.h \
++	$(top_srcdir)/include/sys/vdev.h \
++	$(top_srcdir)/include/sys/vdev_impl.h \
++	$(top_srcdir)/include/sys/xvattr.h \
++	$(top_srcdir)/include/sys/zap.h \
++	$(top_srcdir)/include/sys/zap_impl.h \
++	$(top_srcdir)/include/sys/zap_leaf.h \
++	$(top_srcdir)/include/sys/zfs_acl.h \
++	$(top_srcdir)/include/sys/zfs_context.h \
++	$(top_srcdir)/include/sys/zfs_ctldir.h \
++	$(top_srcdir)/include/sys/zfs_debug.h \
++	$(top_srcdir)/include/sys/zfs_dir.h \
++	$(top_srcdir)/include/sys/zfs_fuid.h \
++	$(top_srcdir)/include/sys/zfs_rlock.h \
++	$(top_srcdir)/include/sys/zfs_sa.h \
++	$(top_srcdir)/include/sys/zfs_stat.h \
++	$(top_srcdir)/include/sys/zfs_vfsops.h \
++	$(top_srcdir)/include/sys/zfs_znode.h \
++	$(top_srcdir)/include/sys/zfs_vnops.h \
++	$(top_srcdir)/include/sys/zil.h \
++	$(top_srcdir)/include/sys/zil_impl.h \
++	$(top_srcdir)/include/sys/zio_checksum.h \
++	$(top_srcdir)/include/sys/zio_compress.h \
++	$(top_srcdir)/include/sys/zio.h \
++	$(top_srcdir)/include/sys/zio_impl.h \
++	$(top_srcdir)/include/sys/zrlock.h
++HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
++RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
++  distclean-recursive maintainer-clean-recursive
++AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
++	$(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
++	distdir
++ETAGS = etags
++CTAGS = ctags
++DIST_SUBDIRS = $(SUBDIRS)
++DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
++am__relativize = \
++  dir0=`pwd`; \
++  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
++  sed_rest='s,^[^/]*/*,,'; \
++  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
++  sed_butlast='s,/*[^/]*$$,,'; \
++  while test -n "$$dir1"; do \
++    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
++    if test "$$first" != "."; then \
++      if test "$$first" = ".."; then \
++        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
++        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
++      else \
++        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
++        if test "$$first2" = "$$first"; then \
++          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
++        else \
++          dir2="../$$dir2"; \
++        fi; \
++        dir0="$$dir0"/"$$first"; \
++      fi; \
++    fi; \
++    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
++  done; \
++  reldir="$$dir2"
++ACLOCAL = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run aclocal-1.11
++ALIEN = alien
++ALIEN_VERSION = 
++AMTAR = $${TAR-tar}
++AM_DEFAULT_VERBOSITY = 1
++AR = ar
++AUTOCONF = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run autoconf
++AUTOHEADER = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run autoheader
++AUTOMAKE = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run automake-1.11
++AWK = gawk
++CC = gcc
++CCAS = gcc
++CCASDEPMODE = depmode=gcc3
++CCASFLAGS = -g -O2
++CCDEPMODE = depmode=gcc3
++CFLAGS = -g -O2
++CPP = gcc -E
++CPPFLAGS = 
++CYGPATH_W = echo
++DEBUG_CFLAGS = -DNDEBUG
++DEBUG_DMU_TX = _without_debug_dmu_tx
++DEBUG_STACKFLAGS = 
++DEBUG_ZFS = _without_debug
++DEFAULT_INIT_DIR = ${prefix}/etc/init.d
++DEFAULT_INIT_SCRIPT = gentoo
++DEFAULT_PACKAGE = tgz
++DEFS = -DHAVE_CONFIG_H
++DEPDIR = .deps
++DLLTOOL = false
++DPKG = dpkg
++DPKGBUILD = dpkg-buildpackage
++DPKGBUILD_VERSION = 
++DPKG_VERSION = 
++DSYMUTIL = 
++DUMPBIN = 
++ECHO_C = 
++ECHO_N = -n
++ECHO_T = 
++EGREP = /bin/grep -E
++EXEEXT = 
++FGREP = /bin/grep -F
++FRAME_LARGER_THAN = -Wframe-larger-than=1024
++GREP = /bin/grep
++HAVE_ALIEN = no
++HAVE_DPKG = no
++HAVE_DPKGBUILD = no
++HAVE_MAKEPKG = 
++HAVE_PACMAN = 
++HAVE_RPM = yes
++HAVE_RPMBUILD = yes
++INSTALL = /usr/bin/install -c
++INSTALL_DATA = ${INSTALL} -m 644
++INSTALL_PROGRAM = ${INSTALL}
++INSTALL_SCRIPT = ${INSTALL}
++INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
++KERNELCPPFLAGS =  -Wno-unused-but-set-variable -DHAVE_SPL -D_KERNEL -DTEXT_DOMAIN=\"zfs-linux-kernel\" -DNDEBUG 
++KERNELMAKE_PARAMS =  O=/usr/src/linux-3.6.0-sabayon
++LD = /usr/x86_64-pc-linux-gnu/bin/ld -m elf_x86_64
++LDFLAGS = 
++LIBBLKID = 
++LIBOBJS = 
++LIBS = -luuid -luuid -lz -lz -lz 
++LIBSELINUX = 
++LIBTOOL = $(SHELL) $(top_builddir)/libtool
++LIBUUID = -luuid
++LINUX = /usr/src/linux-3.2.33-go
++LINUX_OBJ = /usr/src/linux-3.6.0-sabayon
++LINUX_SYMBOLS = NONE
++LINUX_VERSION = 3.6.0-sabayon
++LIPO = 
++LN_S = ln -s
++LTLIBOBJS = 
++MAINT = #
++MAKEINFO = ${SHELL} /root/zfs-0.6.0-rc12/config/missing --run makeinfo
++MAKEPKG = 
++MAKEPKG_VERSION = 
++MANIFEST_TOOL = :
++MKDIR_P = /bin/mkdir -p
++NM = /usr/bin/nm -B
++NMEDIT = 
++NO_UNUSED_BUT_SET_VARIABLE = -Wno-unused-but-set-variable
++OBJDUMP = objdump
++OBJEXT = o
++OTOOL = 
++OTOOL64 = 
++PACKAGE = zfs
++PACKAGE_BUGREPORT = 
++PACKAGE_NAME = 
++PACKAGE_STRING = 
++PACKAGE_TARNAME = 
++PACKAGE_URL = 
++PACKAGE_VERSION = 
++PACMAN = 
++PACMAN_VERSION = 
++PATH_SEPARATOR = :
++RANLIB = ranlib
++RPM = rpm
++RPMBUILD = rpmbuild
++RPMBUILD_VERSION = 4.10.0
++RPM_VERSION = 4.10.0
++SED = /bin/sed
++SET_MAKE = 
++SHELL = /bin/sh
++SPL = /usr/src/linux-3.2.33-go
++SPL_OBJ = /usr/src/linux-3.2.33-go
++SPL_SYMBOLS = NONE
++SPL_VERSION = 0.6.0-rc12
++STRIP = strip
++TARGET_ASM_DIR = asm-x86_64
++VENDOR = gentoo
++VERSION = 0.6.0
++ZFS_CONFIG = all
++ZFS_META_ALIAS = zfs-0.6.0-rc12
++ZFS_META_AUTHOR = Sun Microsystems/Oracle, Lawrence Livermore National Laboratory
++ZFS_META_DATA = 
++ZFS_META_LICENSE = CDDL
++ZFS_META_LT_AGE = 
++ZFS_META_LT_CURRENT = 
++ZFS_META_LT_REVISION = 
++ZFS_META_NAME = zfs
++ZFS_META_RELEASE = rc12
++ZFS_META_VERSION = 0.6.0
++ZLIB = -lz
++abs_builddir = /root/zfs-0.6.0-rc12/include/sys
++abs_srcdir = /root/zfs-0.6.0-rc12/include/sys
++abs_top_builddir = /root/zfs-0.6.0-rc12
++abs_top_srcdir = /root/zfs-0.6.0-rc12
++ac_ct_AR = ar
++ac_ct_CC = gcc
++ac_ct_DUMPBIN = 
++am__include = include
++am__leading_dot = .
++am__quote = 
++am__tar = $${TAR-tar} chof - "$$tardir"
++am__untar = $${TAR-tar} xf -
++bindir = ${exec_prefix}/bin
++build = x86_64-unknown-linux-gnu
++build_alias = 
++build_cpu = x86_64
++build_os = linux-gnu
++build_vendor = unknown
++builddir = .
++datadir = ${datarootdir}
++datarootdir = ${prefix}/share
++docdir = ${datarootdir}/doc/${PACKAGE}
++dvidir = ${docdir}
++exec_prefix = ${prefix}
++host = x86_64-unknown-linux-gnu
++host_alias = 
++host_cpu = x86_64
++host_os = linux-gnu
++host_vendor = unknown
++htmldir = ${docdir}
++includedir = ${prefix}/include
++infodir = ${datarootdir}/info
++install_sh = ${SHELL} /root/zfs-0.6.0-rc12/config/install-sh
++libdir = ${exec_prefix}/lib
++libexecdir = ${exec_prefix}/libexec
++localedir = ${datarootdir}/locale
++localstatedir = ${prefix}/var
++mandir = ${datarootdir}/man
++mkdir_p = /bin/mkdir -p
++oldincludedir = /usr/include
++pdfdir = ${docdir}
++prefix = /usr/local
++program_transform_name = s,x,x,
++psdir = ${docdir}
++sbindir = ${exec_prefix}/sbin
++sharedstatedir = ${prefix}/com
++srcdir = .
++sysconfdir = ${prefix}/etc
++target = x86_64-unknown-linux-gnu
++target_alias = 
++target_cpu = x86_64
++target_os = linux-gnu
++target_vendor = unknown
++top_build_prefix = ../../
++top_builddir = ../..
++top_srcdir = ../..
++udevdir = ${exec_prefix}/lib/udev
++udevruledir = ${udevdir}/rules.d
++SUBDIRS = fm fs
++COMMON_H = \
++	$(top_srcdir)/include/sys/arc.h \
++	$(top_srcdir)/include/sys/avl.h \
++	$(top_srcdir)/include/sys/avl_impl.h \
++	$(top_srcdir)/include/sys/bplist.h \
++	$(top_srcdir)/include/sys/bpobj.h \
++	$(top_srcdir)/include/sys/dbuf.h \
++	$(top_srcdir)/include/sys/ddt.h \
++	$(top_srcdir)/include/sys/dmu.h \
++	$(top_srcdir)/include/sys/dmu_impl.h \
++	$(top_srcdir)/include/sys/dmu_objset.h \
++	$(top_srcdir)/include/sys/dmu_traverse.h \
++	$(top_srcdir)/include/sys/dmu_tx.h \
++	$(top_srcdir)/include/sys/dmu_zfetch.h \
++	$(top_srcdir)/include/sys/dnode.h \
++	$(top_srcdir)/include/sys/dsl_dataset.h \
++	$(top_srcdir)/include/sys/dsl_deadlist.h \
++	$(top_srcdir)/include/sys/dsl_deleg.h \
++	$(top_srcdir)/include/sys/dsl_dir.h \
++	$(top_srcdir)/include/sys/dsl_pool.h \
++	$(top_srcdir)/include/sys/dsl_prop.h \
++	$(top_srcdir)/include/sys/dsl_scan.h \
++	$(top_srcdir)/include/sys/dsl_synctask.h \
++	$(top_srcdir)/include/sys/efi_partition.h \
++	$(top_srcdir)/include/sys/metaslab.h \
++	$(top_srcdir)/include/sys/metaslab_impl.h \
++	$(top_srcdir)/include/sys/nvpair.h \
++	$(top_srcdir)/include/sys/nvpair_impl.h \
++	$(top_srcdir)/include/sys/refcount.h \
++	$(top_srcdir)/include/sys/rrwlock.h \
++	$(top_srcdir)/include/sys/sa.h \
++	$(top_srcdir)/include/sys/sa_impl.h \
++	$(top_srcdir)/include/sys/spa_boot.h \
++	$(top_srcdir)/include/sys/space_map.h \
++	$(top_srcdir)/include/sys/spa.h \
++	$(top_srcdir)/include/sys/spa_impl.h \
++	$(top_srcdir)/include/sys/txg.h \
++	$(top_srcdir)/include/sys/txg_impl.h \
++	$(top_srcdir)/include/sys/u8_textprep_data.h \
++	$(top_srcdir)/include/sys/u8_textprep.h \
++	$(top_srcdir)/include/sys/uberblock.h \
++	$(top_srcdir)/include/sys/uberblock_impl.h \
++	$(top_srcdir)/include/sys/uio_impl.h \
++	$(top_srcdir)/include/sys/unique.h \
++	$(top_srcdir)/include/sys/uuid.h \
++	$(top_srcdir)/include/sys/vdev_disk.h \
++	$(top_srcdir)/include/sys/vdev_file.h \
++	$(top_srcdir)/include/sys/vdev.h \
++	$(top_srcdir)/include/sys/vdev_impl.h \
++	$(top_srcdir)/include/sys/xvattr.h \
++	$(top_srcdir)/include/sys/zap.h \
++	$(top_srcdir)/include/sys/zap_impl.h \
++	$(top_srcdir)/include/sys/zap_leaf.h \
++	$(top_srcdir)/include/sys/zfs_acl.h \
++	$(top_srcdir)/include/sys/zfs_context.h \
++	$(top_srcdir)/include/sys/zfs_ctldir.h \
++	$(top_srcdir)/include/sys/zfs_debug.h \
++	$(top_srcdir)/include/sys/zfs_dir.h \
++	$(top_srcdir)/include/sys/zfs_fuid.h \
++	$(top_srcdir)/include/sys/zfs_rlock.h \
++	$(top_srcdir)/include/sys/zfs_sa.h \
++	$(top_srcdir)/include/sys/zfs_stat.h \
++	$(top_srcdir)/include/sys/zfs_vfsops.h \
++	$(top_srcdir)/include/sys/zfs_znode.h \
++	$(top_srcdir)/include/sys/zfs_vnops.h \
++	$(top_srcdir)/include/sys/zil.h \
++	$(top_srcdir)/include/sys/zil_impl.h \
++	$(top_srcdir)/include/sys/zio_checksum.h \
++	$(top_srcdir)/include/sys/zio_compress.h \
++	$(top_srcdir)/include/sys/zio.h \
++	$(top_srcdir)/include/sys/zio_impl.h \
++	$(top_srcdir)/include/sys/zrlock.h
++
++KERNEL_H = \
++	$(top_srcdir)/include/sys/zfs_ioctl.h \
++	$(top_srcdir)/include/sys/zfs_onexit.h \
++	${top_srcdir}/include/sys/zpl.h \
++	$(top_srcdir)/include/sys/zvol.h
++
++USER_H = 
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++libzfsdir = $(includedir)/libzfs/sys
++libzfs_HEADERS = $(COMMON_H) $(USER_H)
++#kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/sys
++#kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++all: all-recursive
++
++.SUFFIXES:
++$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am  $(am__configure_deps)
++	@for dep in $?; do \
++	  case '$(am__configure_deps)' in \
++	    *$$dep*) \
++	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
++	        && { if test -f $@; then exit 0; else break; fi; }; \
++	      exit 1;; \
++	  esac; \
++	done; \
++	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/sys/Makefile'; \
++	$(am__cd) $(top_srcdir) && \
++	  $(AUTOMAKE) --gnu include/sys/Makefile
++.PRECIOUS: Makefile
++Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
++	@case '$?' in \
++	  *config.status*) \
++	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
++	  *) \
++	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
++	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
++	esac;
++
++$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++
++$(top_srcdir)/configure: # $(am__configure_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(ACLOCAL_M4): # $(am__aclocal_m4_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(am__aclocal_m4_deps):
++
++mostlyclean-libtool:
++	-rm -f *.lo
++
++clean-libtool:
++	-rm -rf .libs _libs
++install-kernelHEADERS: $(kernel_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(kerneldir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(kerneldir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(kerneldir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(kerneldir)" || exit $$?; \
++	done
++
++uninstall-kernelHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(kerneldir)'; $(am__uninstall_files_from_dir)
++install-libzfsHEADERS: $(libzfs_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(libzfsdir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(libzfsdir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libzfsdir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(libzfsdir)" || exit $$?; \
++	done
++
++uninstall-libzfsHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(libzfsdir)'; $(am__uninstall_files_from_dir)
++
++# This directory's subdirectories are mostly independent; you can cd
++# into them and run `make' without going through this Makefile.
++# To change the values of `make' variables: instead of editing Makefiles,
++# (1) if the variable is set in `config.status', edit `config.status'
++#     (which will cause the Makefiles to be regenerated when you run `make');
++# (2) otherwise, pass the desired values on the `make' command line.
++$(RECURSIVE_TARGETS):
++	@fail= failcom='exit 1'; \
++	for f in x $$MAKEFLAGS; do \
++	  case $$f in \
++	    *=* | --[!k]*);; \
++	    *k*) failcom='fail=yes';; \
++	  esac; \
++	done; \
++	dot_seen=no; \
++	target=`echo $@ | sed s/-recursive//`; \
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  echo "Making $$target in $$subdir"; \
++	  if test "$$subdir" = "."; then \
++	    dot_seen=yes; \
++	    local_target="$$target-am"; \
++	  else \
++	    local_target="$$target"; \
++	  fi; \
++	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
++	  || eval $$failcom; \
++	done; \
++	if test "$$dot_seen" = "no"; then \
++	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
++	fi; test -z "$$fail"
++
++$(RECURSIVE_CLEAN_TARGETS):
++	@fail= failcom='exit 1'; \
++	for f in x $$MAKEFLAGS; do \
++	  case $$f in \
++	    *=* | --[!k]*);; \
++	    *k*) failcom='fail=yes';; \
++	  esac; \
++	done; \
++	dot_seen=no; \
++	case "$@" in \
++	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
++	  *) list='$(SUBDIRS)' ;; \
++	esac; \
++	rev=''; for subdir in $$list; do \
++	  if test "$$subdir" = "."; then :; else \
++	    rev="$$subdir $$rev"; \
++	  fi; \
++	done; \
++	rev="$$rev ."; \
++	target=`echo $@ | sed s/-recursive//`; \
++	for subdir in $$rev; do \
++	  echo "Making $$target in $$subdir"; \
++	  if test "$$subdir" = "."; then \
++	    local_target="$$target-am"; \
++	  else \
++	    local_target="$$target"; \
++	  fi; \
++	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
++	  || eval $$failcom; \
++	done && test -z "$$fail"
++tags-recursive:
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
++	done
++ctags-recursive:
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
++	done
++
++ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
++	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	mkid -fID $$unique
++tags: TAGS
++
++TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	set x; \
++	here=`pwd`; \
++	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
++	  include_option=--etags-include; \
++	  empty_fix=.; \
++	else \
++	  include_option=--include; \
++	  empty_fix=; \
++	fi; \
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  if test "$$subdir" = .; then :; else \
++	    test ! -f $$subdir/TAGS || \
++	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
++	  fi; \
++	done; \
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	shift; \
++	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
++	  test -n "$$unique" || unique=$$empty_fix; \
++	  if test $$# -gt 0; then \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      "$$@" $$unique; \
++	  else \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      $$unique; \
++	  fi; \
++	fi
++ctags: CTAGS
++CTAGS: ctags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	test -z "$(CTAGS_ARGS)$$unique" \
++	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
++	     $$unique
++
++GTAGS:
++	here=`$(am__cd) $(top_builddir) && pwd` \
++	  && $(am__cd) $(top_srcdir) \
++	  && gtags -i $(GTAGS_ARGS) "$$here"
++
++distclean-tags:
++	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
++
++distdir: $(DISTFILES)
++	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	list='$(DISTFILES)'; \
++	  dist_files=`for file in $$list; do echo $$file; done | \
++	  sed -e "s|^$$srcdirstrip/||;t" \
++	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
++	case $$dist_files in \
++	  */*) $(MKDIR_P) `echo "$$dist_files" | \
++			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
++			   sort -u` ;; \
++	esac; \
++	for file in $$dist_files; do \
++	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
++	  if test -d $$d/$$file; then \
++	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
++	    if test -d "$(distdir)/$$file"; then \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
++	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
++	  else \
++	    test -f "$(distdir)/$$file" \
++	    || cp -p $$d/$$file "$(distdir)/$$file" \
++	    || exit 1; \
++	  fi; \
++	done
++	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
++	  if test "$$subdir" = .; then :; else \
++	    $(am__make_dryrun) \
++	      || test -d "$(distdir)/$$subdir" \
++	      || $(MKDIR_P) "$(distdir)/$$subdir" \
++	      || exit 1; \
++	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
++	    $(am__relativize); \
++	    new_distdir=$$reldir; \
++	    dir1=$$subdir; dir2="$(top_distdir)"; \
++	    $(am__relativize); \
++	    new_top_distdir=$$reldir; \
++	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
++	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
++	    ($(am__cd) $$subdir && \
++	      $(MAKE) $(AM_MAKEFLAGS) \
++	        top_distdir="$$new_top_distdir" \
++	        distdir="$$new_distdir" \
++		am__remove_distdir=: \
++		am__skip_length_check=: \
++		am__skip_mode_fix=: \
++	        distdir) \
++	      || exit 1; \
++	  fi; \
++	done
++check-am: all-am
++check: check-recursive
++all-am: Makefile $(HEADERS)
++installdirs: installdirs-recursive
++installdirs-am:
++	for dir in "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"; do \
++	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
++	done
++install: install-recursive
++install-exec: install-exec-recursive
++install-data: install-data-recursive
++uninstall: uninstall-recursive
++
++install-am: all-am
++	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
++
++installcheck: installcheck-recursive
++install-strip:
++	if test -z '$(STRIP)'; then \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	      install; \
++	else \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
++	fi
++mostlyclean-generic:
++
++clean-generic:
++
++distclean-generic:
++	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
++	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
++
++maintainer-clean-generic:
++	@echo "This command is intended for maintainers to use"
++	@echo "it deletes files that may require special tools to rebuild."
++clean: clean-recursive
++
++clean-am: clean-generic clean-libtool mostlyclean-am
++
++distclean: distclean-recursive
++	-rm -f Makefile
++distclean-am: clean-am distclean-generic distclean-tags
++
++dvi: dvi-recursive
++
++dvi-am:
++
++html: html-recursive
++
++html-am:
++
++info: info-recursive
++
++info-am:
++
++install-data-am: install-kernelHEADERS install-libzfsHEADERS
++
++install-dvi: install-dvi-recursive
++
++install-dvi-am:
++
++install-exec-am:
++
++install-html: install-html-recursive
++
++install-html-am:
++
++install-info: install-info-recursive
++
++install-info-am:
++
++install-man:
++
++install-pdf: install-pdf-recursive
++
++install-pdf-am:
++
++install-ps: install-ps-recursive
++
++install-ps-am:
++
++installcheck-am:
++
++maintainer-clean: maintainer-clean-recursive
++	-rm -f Makefile
++maintainer-clean-am: distclean-am maintainer-clean-generic
++
++mostlyclean: mostlyclean-recursive
++
++mostlyclean-am: mostlyclean-generic mostlyclean-libtool
++
++pdf: pdf-recursive
++
++pdf-am:
++
++ps: ps-recursive
++
++ps-am:
++
++uninstall-am: uninstall-kernelHEADERS uninstall-libzfsHEADERS
++
++.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
++	install-am install-strip tags-recursive
++
++.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
++	all all-am check check-am clean clean-generic clean-libtool \
++	ctags ctags-recursive distclean distclean-generic \
++	distclean-libtool distclean-tags distdir dvi dvi-am html \
++	html-am info info-am install install-am install-data \
++	install-data-am install-dvi install-dvi-am install-exec \
++	install-exec-am install-html install-html-am install-info \
++	install-info-am install-kernelHEADERS install-libzfsHEADERS \
++	install-man install-pdf install-pdf-am install-ps \
++	install-ps-am install-strip installcheck installcheck-am \
++	installdirs installdirs-am maintainer-clean \
++	maintainer-clean-generic mostlyclean mostlyclean-generic \
++	mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
++	uninstall uninstall-am uninstall-kernelHEADERS \
++	uninstall-libzfsHEADERS
++
++
++# Tell versions [3.59,3.63) of GNU make to not export all variables.
++# Otherwise a system limit (for SysV at least) may be exceeded.
++.NOEXPORT:
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/Makefile.am linux-3.2.33-go/include/zfs/sys/Makefile.am
+--- linux-3.2.33-go.orig/include/zfs/sys/Makefile.am	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/Makefile.am	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,94 @@
++SUBDIRS = fm fs
++
++COMMON_H = \
++	$(top_srcdir)/include/sys/arc.h \
++	$(top_srcdir)/include/sys/avl.h \
++	$(top_srcdir)/include/sys/avl_impl.h \
++	$(top_srcdir)/include/sys/bplist.h \
++	$(top_srcdir)/include/sys/bpobj.h \
++	$(top_srcdir)/include/sys/dbuf.h \
++	$(top_srcdir)/include/sys/ddt.h \
++	$(top_srcdir)/include/sys/dmu.h \
++	$(top_srcdir)/include/sys/dmu_impl.h \
++	$(top_srcdir)/include/sys/dmu_objset.h \
++	$(top_srcdir)/include/sys/dmu_traverse.h \
++	$(top_srcdir)/include/sys/dmu_tx.h \
++	$(top_srcdir)/include/sys/dmu_zfetch.h \
++	$(top_srcdir)/include/sys/dnode.h \
++	$(top_srcdir)/include/sys/dsl_dataset.h \
++	$(top_srcdir)/include/sys/dsl_deadlist.h \
++	$(top_srcdir)/include/sys/dsl_deleg.h \
++	$(top_srcdir)/include/sys/dsl_dir.h \
++	$(top_srcdir)/include/sys/dsl_pool.h \
++	$(top_srcdir)/include/sys/dsl_prop.h \
++	$(top_srcdir)/include/sys/dsl_scan.h \
++	$(top_srcdir)/include/sys/dsl_synctask.h \
++	$(top_srcdir)/include/sys/efi_partition.h \
++	$(top_srcdir)/include/sys/metaslab.h \
++	$(top_srcdir)/include/sys/metaslab_impl.h \
++	$(top_srcdir)/include/sys/nvpair.h \
++	$(top_srcdir)/include/sys/nvpair_impl.h \
++	$(top_srcdir)/include/sys/refcount.h \
++	$(top_srcdir)/include/sys/rrwlock.h \
++	$(top_srcdir)/include/sys/sa.h \
++	$(top_srcdir)/include/sys/sa_impl.h \
++	$(top_srcdir)/include/sys/spa_boot.h \
++	$(top_srcdir)/include/sys/space_map.h \
++	$(top_srcdir)/include/sys/spa.h \
++	$(top_srcdir)/include/sys/spa_impl.h \
++	$(top_srcdir)/include/sys/txg.h \
++	$(top_srcdir)/include/sys/txg_impl.h \
++	$(top_srcdir)/include/sys/u8_textprep_data.h \
++	$(top_srcdir)/include/sys/u8_textprep.h \
++	$(top_srcdir)/include/sys/uberblock.h \
++	$(top_srcdir)/include/sys/uberblock_impl.h \
++	$(top_srcdir)/include/sys/uio_impl.h \
++	$(top_srcdir)/include/sys/unique.h \
++	$(top_srcdir)/include/sys/uuid.h \
++	$(top_srcdir)/include/sys/vdev_disk.h \
++	$(top_srcdir)/include/sys/vdev_file.h \
++	$(top_srcdir)/include/sys/vdev.h \
++	$(top_srcdir)/include/sys/vdev_impl.h \
++	$(top_srcdir)/include/sys/xvattr.h \
++	$(top_srcdir)/include/sys/zap.h \
++	$(top_srcdir)/include/sys/zap_impl.h \
++	$(top_srcdir)/include/sys/zap_leaf.h \
++	$(top_srcdir)/include/sys/zfs_acl.h \
++	$(top_srcdir)/include/sys/zfs_context.h \
++	$(top_srcdir)/include/sys/zfs_ctldir.h \
++	$(top_srcdir)/include/sys/zfs_debug.h \
++	$(top_srcdir)/include/sys/zfs_dir.h \
++	$(top_srcdir)/include/sys/zfs_fuid.h \
++	$(top_srcdir)/include/sys/zfs_rlock.h \
++	$(top_srcdir)/include/sys/zfs_sa.h \
++	$(top_srcdir)/include/sys/zfs_stat.h \
++	$(top_srcdir)/include/sys/zfs_vfsops.h \
++	$(top_srcdir)/include/sys/zfs_znode.h \
++	$(top_srcdir)/include/sys/zfs_vnops.h \
++	$(top_srcdir)/include/sys/zil.h \
++	$(top_srcdir)/include/sys/zil_impl.h \
++	$(top_srcdir)/include/sys/zio_checksum.h \
++	$(top_srcdir)/include/sys/zio_compress.h \
++	$(top_srcdir)/include/sys/zio.h \
++	$(top_srcdir)/include/sys/zio_impl.h \
++	$(top_srcdir)/include/sys/zrlock.h
++
++KERNEL_H = \
++	$(top_srcdir)/include/sys/zfs_ioctl.h \
++	$(top_srcdir)/include/sys/zfs_onexit.h \
++	${top_srcdir}/include/sys/zpl.h \
++	$(top_srcdir)/include/sys/zvol.h
++
++USER_H =
++
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++
++if CONFIG_USER
++libzfsdir = $(includedir)/libzfs/sys
++libzfs_HEADERS = $(COMMON_H) $(USER_H)
++endif
++
++if CONFIG_KERNEL
++kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/sys
++kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++endif
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/Makefile.in linux-3.2.33-go/include/zfs/sys/Makefile.in
+--- linux-3.2.33-go.orig/include/zfs/sys/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/Makefile.in	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,1029 @@
++# Makefile.in generated by automake 1.11.6 from Makefile.am.
++# @configure_input@
++
++# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
++# Foundation, Inc.
++# This Makefile.in is free software; the Free Software Foundation
++# gives unlimited permission to copy and/or distribute it,
++# with or without modifications, as long as this notice is preserved.
++
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
++# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
++# PARTICULAR PURPOSE.
++
++@SET_MAKE@
++
++VPATH = @srcdir@
++am__make_dryrun = \
++  { \
++    am__dry=no; \
++    case $$MAKEFLAGS in \
++      *\\[\ \	]*) \
++        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
++          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
++      *) \
++        for am__flg in $$MAKEFLAGS; do \
++          case $$am__flg in \
++            *=*|--*) ;; \
++            *n*) am__dry=yes; break;; \
++          esac; \
++        done;; \
++    esac; \
++    test $$am__dry = yes; \
++  }
++pkgdatadir = $(datadir)/@PACKAGE@
++pkgincludedir = $(includedir)/@PACKAGE@
++pkglibdir = $(libdir)/@PACKAGE@
++pkglibexecdir = $(libexecdir)/@PACKAGE@
++am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
++install_sh_DATA = $(install_sh) -c -m 644
++install_sh_PROGRAM = $(install_sh) -c
++install_sh_SCRIPT = $(install_sh) -c
++INSTALL_HEADER = $(INSTALL_DATA)
++transform = $(program_transform_name)
++NORMAL_INSTALL = :
++PRE_INSTALL = :
++POST_INSTALL = :
++NORMAL_UNINSTALL = :
++PRE_UNINSTALL = :
++POST_UNINSTALL = :
++build_triplet = @build@
++host_triplet = @host@
++target_triplet = @target@
++subdir = include/sys
++DIST_COMMON = $(am__kernel_HEADERS_DIST) $(am__libzfs_HEADERS_DIST) \
++	$(srcdir)/Makefile.am $(srcdir)/Makefile.in
++ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
++am__aclocal_m4_deps =  \
++	$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
++	$(top_srcdir)/config/kernel-automount.m4 \
++	$(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \
++	$(top_srcdir)/config/kernel-bdev-logical-size.m4 \
++	$(top_srcdir)/config/kernel-bdi-setup-and-register.m4 \
++	$(top_srcdir)/config/kernel-bdi.m4 \
++	$(top_srcdir)/config/kernel-bio-empty-barrier.m4 \
++	$(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \
++	$(top_srcdir)/config/kernel-bio-failfast.m4 \
++	$(top_srcdir)/config/kernel-bio-rw-syncio.m4 \
++	$(top_srcdir)/config/kernel-blk-end-request.m4 \
++	$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-discard.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
++	$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
++	$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
++	$(top_srcdir)/config/kernel-blk-rq-sectors.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
++	$(top_srcdir)/config/kernel-blkdev-get.m4 \
++	$(top_srcdir)/config/kernel-check-disk-size-change.m4 \
++	$(top_srcdir)/config/kernel-clear-inode.m4 \
++	$(top_srcdir)/config/kernel-commit-metadata.m4 \
++	$(top_srcdir)/config/kernel-create-nameidata.m4 \
++	$(top_srcdir)/config/kernel-d-make-root.m4 \
++	$(top_srcdir)/config/kernel-d-obtain-alias.m4 \
++	$(top_srcdir)/config/kernel-discard-granularity.m4 \
++	$(top_srcdir)/config/kernel-elevator-change.m4 \
++	$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
++	$(top_srcdir)/config/kernel-evict-inode.m4 \
++	$(top_srcdir)/config/kernel-fallocate.m4 \
++	$(top_srcdir)/config/kernel-fmode-t.m4 \
++	$(top_srcdir)/config/kernel-fsync.m4 \
++	$(top_srcdir)/config/kernel-get-disk-ro.m4 \
++	$(top_srcdir)/config/kernel-get-gendisk.m4 \
++	$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
++	$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
++	$(top_srcdir)/config/kernel-kobj-name-len.m4 \
++	$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
++	$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
++	$(top_srcdir)/config/kernel-mount-nodev.m4 \
++	$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
++	$(top_srcdir)/config/kernel-rq-for-each_segment.m4 \
++	$(top_srcdir)/config/kernel-rq-is_sync.m4 \
++	$(top_srcdir)/config/kernel-security-inode-init.m4 \
++	$(top_srcdir)/config/kernel-set-nlink.m4 \
++	$(top_srcdir)/config/kernel-sget-args.m4 \
++	$(top_srcdir)/config/kernel-show-options.m4 \
++	$(top_srcdir)/config/kernel-shrink.m4 \
++	$(top_srcdir)/config/kernel-truncate-range.m4 \
++	$(top_srcdir)/config/kernel-truncate-setsize.m4 \
++	$(top_srcdir)/config/kernel-xattr-handler.m4 \
++	$(top_srcdir)/config/kernel.m4 \
++	$(top_srcdir)/config/user-arch.m4 \
++	$(top_srcdir)/config/user-frame-larger-than.m4 \
++	$(top_srcdir)/config/user-ioctl.m4 \
++	$(top_srcdir)/config/user-libblkid.m4 \
++	$(top_srcdir)/config/user-libuuid.m4 \
++	$(top_srcdir)/config/user-nptl_guard_within_stack.m4 \
++	$(top_srcdir)/config/user-selinux.m4 \
++	$(top_srcdir)/config/user-udev.m4 \
++	$(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \
++	$(top_srcdir)/config/zfs-build.m4 \
++	$(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac
++am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
++	$(ACLOCAL_M4)
++mkinstalldirs = $(install_sh) -d
++CONFIG_HEADER = $(top_builddir)/zfs_config.h
++CONFIG_CLEAN_FILES =
++CONFIG_CLEAN_VPATH_FILES =
++AM_V_GEN = $(am__v_GEN_@AM_V@)
++am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
++am__v_GEN_0 = @echo "  GEN   " $@;
++AM_V_at = $(am__v_at_@AM_V@)
++am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
++am__v_at_0 = @
++SOURCES =
++DIST_SOURCES =
++RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
++	html-recursive info-recursive install-data-recursive \
++	install-dvi-recursive install-exec-recursive \
++	install-html-recursive install-info-recursive \
++	install-pdf-recursive install-ps-recursive install-recursive \
++	installcheck-recursive installdirs-recursive pdf-recursive \
++	ps-recursive uninstall-recursive
++am__can_run_installinfo = \
++  case $$AM_UPDATE_INFO_DIR in \
++    n|no|NO) false;; \
++    *) (install-info --version) >/dev/null 2>&1;; \
++  esac
++am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
++	$(top_srcdir)/include/sys/avl.h \
++	$(top_srcdir)/include/sys/avl_impl.h \
++	$(top_srcdir)/include/sys/bplist.h \
++	$(top_srcdir)/include/sys/bpobj.h \
++	$(top_srcdir)/include/sys/dbuf.h \
++	$(top_srcdir)/include/sys/ddt.h \
++	$(top_srcdir)/include/sys/dmu.h \
++	$(top_srcdir)/include/sys/dmu_impl.h \
++	$(top_srcdir)/include/sys/dmu_objset.h \
++	$(top_srcdir)/include/sys/dmu_traverse.h \
++	$(top_srcdir)/include/sys/dmu_tx.h \
++	$(top_srcdir)/include/sys/dmu_zfetch.h \
++	$(top_srcdir)/include/sys/dnode.h \
++	$(top_srcdir)/include/sys/dsl_dataset.h \
++	$(top_srcdir)/include/sys/dsl_deadlist.h \
++	$(top_srcdir)/include/sys/dsl_deleg.h \
++	$(top_srcdir)/include/sys/dsl_dir.h \
++	$(top_srcdir)/include/sys/dsl_pool.h \
++	$(top_srcdir)/include/sys/dsl_prop.h \
++	$(top_srcdir)/include/sys/dsl_scan.h \
++	$(top_srcdir)/include/sys/dsl_synctask.h \
++	$(top_srcdir)/include/sys/efi_partition.h \
++	$(top_srcdir)/include/sys/metaslab.h \
++	$(top_srcdir)/include/sys/metaslab_impl.h \
++	$(top_srcdir)/include/sys/nvpair.h \
++	$(top_srcdir)/include/sys/nvpair_impl.h \
++	$(top_srcdir)/include/sys/refcount.h \
++	$(top_srcdir)/include/sys/rrwlock.h \
++	$(top_srcdir)/include/sys/sa.h \
++	$(top_srcdir)/include/sys/sa_impl.h \
++	$(top_srcdir)/include/sys/spa_boot.h \
++	$(top_srcdir)/include/sys/space_map.h \
++	$(top_srcdir)/include/sys/spa.h \
++	$(top_srcdir)/include/sys/spa_impl.h \
++	$(top_srcdir)/include/sys/txg.h \
++	$(top_srcdir)/include/sys/txg_impl.h \
++	$(top_srcdir)/include/sys/u8_textprep_data.h \
++	$(top_srcdir)/include/sys/u8_textprep.h \
++	$(top_srcdir)/include/sys/uberblock.h \
++	$(top_srcdir)/include/sys/uberblock_impl.h \
++	$(top_srcdir)/include/sys/uio_impl.h \
++	$(top_srcdir)/include/sys/unique.h \
++	$(top_srcdir)/include/sys/uuid.h \
++	$(top_srcdir)/include/sys/vdev_disk.h \
++	$(top_srcdir)/include/sys/vdev_file.h \
++	$(top_srcdir)/include/sys/vdev.h \
++	$(top_srcdir)/include/sys/vdev_impl.h \
++	$(top_srcdir)/include/sys/xvattr.h \
++	$(top_srcdir)/include/sys/zap.h \
++	$(top_srcdir)/include/sys/zap_impl.h \
++	$(top_srcdir)/include/sys/zap_leaf.h \
++	$(top_srcdir)/include/sys/zfs_acl.h \
++	$(top_srcdir)/include/sys/zfs_context.h \
++	$(top_srcdir)/include/sys/zfs_ctldir.h \
++	$(top_srcdir)/include/sys/zfs_debug.h \
++	$(top_srcdir)/include/sys/zfs_dir.h \
++	$(top_srcdir)/include/sys/zfs_fuid.h \
++	$(top_srcdir)/include/sys/zfs_rlock.h \
++	$(top_srcdir)/include/sys/zfs_sa.h \
++	$(top_srcdir)/include/sys/zfs_stat.h \
++	$(top_srcdir)/include/sys/zfs_vfsops.h \
++	$(top_srcdir)/include/sys/zfs_znode.h \
++	$(top_srcdir)/include/sys/zfs_vnops.h \
++	$(top_srcdir)/include/sys/zil.h \
++	$(top_srcdir)/include/sys/zil_impl.h \
++	$(top_srcdir)/include/sys/zio_checksum.h \
++	$(top_srcdir)/include/sys/zio_compress.h \
++	$(top_srcdir)/include/sys/zio.h \
++	$(top_srcdir)/include/sys/zio_impl.h \
++	$(top_srcdir)/include/sys/zrlock.h \
++	$(top_srcdir)/include/sys/zfs_ioctl.h \
++	$(top_srcdir)/include/sys/zfs_onexit.h \
++	${top_srcdir}/include/sys/zpl.h \
++	$(top_srcdir)/include/sys/zvol.h
++am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
++am__vpath_adj = case $$p in \
++    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
++    *) f=$$p;; \
++  esac;
++am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
++am__install_max = 40
++am__nobase_strip_setup = \
++  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
++am__nobase_strip = \
++  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
++am__nobase_list = $(am__nobase_strip_setup); \
++  for p in $$list; do echo "$$p $$p"; done | \
++  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
++  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
++    if (++n[$$2] == $(am__install_max)) \
++      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
++    END { for (dir in files) print dir, files[dir] }'
++am__base_list = \
++  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
++  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
++am__uninstall_files_from_dir = { \
++  test -z "$$files" \
++    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
++    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
++         $(am__cd) "$$dir" && rm -f $$files; }; \
++  }
++am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
++am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
++	$(top_srcdir)/include/sys/avl.h \
++	$(top_srcdir)/include/sys/avl_impl.h \
++	$(top_srcdir)/include/sys/bplist.h \
++	$(top_srcdir)/include/sys/bpobj.h \
++	$(top_srcdir)/include/sys/dbuf.h \
++	$(top_srcdir)/include/sys/ddt.h \
++	$(top_srcdir)/include/sys/dmu.h \
++	$(top_srcdir)/include/sys/dmu_impl.h \
++	$(top_srcdir)/include/sys/dmu_objset.h \
++	$(top_srcdir)/include/sys/dmu_traverse.h \
++	$(top_srcdir)/include/sys/dmu_tx.h \
++	$(top_srcdir)/include/sys/dmu_zfetch.h \
++	$(top_srcdir)/include/sys/dnode.h \
++	$(top_srcdir)/include/sys/dsl_dataset.h \
++	$(top_srcdir)/include/sys/dsl_deadlist.h \
++	$(top_srcdir)/include/sys/dsl_deleg.h \
++	$(top_srcdir)/include/sys/dsl_dir.h \
++	$(top_srcdir)/include/sys/dsl_pool.h \
++	$(top_srcdir)/include/sys/dsl_prop.h \
++	$(top_srcdir)/include/sys/dsl_scan.h \
++	$(top_srcdir)/include/sys/dsl_synctask.h \
++	$(top_srcdir)/include/sys/efi_partition.h \
++	$(top_srcdir)/include/sys/metaslab.h \
++	$(top_srcdir)/include/sys/metaslab_impl.h \
++	$(top_srcdir)/include/sys/nvpair.h \
++	$(top_srcdir)/include/sys/nvpair_impl.h \
++	$(top_srcdir)/include/sys/refcount.h \
++	$(top_srcdir)/include/sys/rrwlock.h \
++	$(top_srcdir)/include/sys/sa.h \
++	$(top_srcdir)/include/sys/sa_impl.h \
++	$(top_srcdir)/include/sys/spa_boot.h \
++	$(top_srcdir)/include/sys/space_map.h \
++	$(top_srcdir)/include/sys/spa.h \
++	$(top_srcdir)/include/sys/spa_impl.h \
++	$(top_srcdir)/include/sys/txg.h \
++	$(top_srcdir)/include/sys/txg_impl.h \
++	$(top_srcdir)/include/sys/u8_textprep_data.h \
++	$(top_srcdir)/include/sys/u8_textprep.h \
++	$(top_srcdir)/include/sys/uberblock.h \
++	$(top_srcdir)/include/sys/uberblock_impl.h \
++	$(top_srcdir)/include/sys/uio_impl.h \
++	$(top_srcdir)/include/sys/unique.h \
++	$(top_srcdir)/include/sys/uuid.h \
++	$(top_srcdir)/include/sys/vdev_disk.h \
++	$(top_srcdir)/include/sys/vdev_file.h \
++	$(top_srcdir)/include/sys/vdev.h \
++	$(top_srcdir)/include/sys/vdev_impl.h \
++	$(top_srcdir)/include/sys/xvattr.h \
++	$(top_srcdir)/include/sys/zap.h \
++	$(top_srcdir)/include/sys/zap_impl.h \
++	$(top_srcdir)/include/sys/zap_leaf.h \
++	$(top_srcdir)/include/sys/zfs_acl.h \
++	$(top_srcdir)/include/sys/zfs_context.h \
++	$(top_srcdir)/include/sys/zfs_ctldir.h \
++	$(top_srcdir)/include/sys/zfs_debug.h \
++	$(top_srcdir)/include/sys/zfs_dir.h \
++	$(top_srcdir)/include/sys/zfs_fuid.h \
++	$(top_srcdir)/include/sys/zfs_rlock.h \
++	$(top_srcdir)/include/sys/zfs_sa.h \
++	$(top_srcdir)/include/sys/zfs_stat.h \
++	$(top_srcdir)/include/sys/zfs_vfsops.h \
++	$(top_srcdir)/include/sys/zfs_znode.h \
++	$(top_srcdir)/include/sys/zfs_vnops.h \
++	$(top_srcdir)/include/sys/zil.h \
++	$(top_srcdir)/include/sys/zil_impl.h \
++	$(top_srcdir)/include/sys/zio_checksum.h \
++	$(top_srcdir)/include/sys/zio_compress.h \
++	$(top_srcdir)/include/sys/zio.h \
++	$(top_srcdir)/include/sys/zio_impl.h \
++	$(top_srcdir)/include/sys/zrlock.h
++HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
++RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
++  distclean-recursive maintainer-clean-recursive
++AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
++	$(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
++	distdir
++ETAGS = etags
++CTAGS = ctags
++DIST_SUBDIRS = $(SUBDIRS)
++DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
++am__relativize = \
++  dir0=`pwd`; \
++  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
++  sed_rest='s,^[^/]*/*,,'; \
++  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
++  sed_butlast='s,/*[^/]*$$,,'; \
++  while test -n "$$dir1"; do \
++    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
++    if test "$$first" != "."; then \
++      if test "$$first" = ".."; then \
++        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
++        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
++      else \
++        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
++        if test "$$first2" = "$$first"; then \
++          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
++        else \
++          dir2="../$$dir2"; \
++        fi; \
++        dir0="$$dir0"/"$$first"; \
++      fi; \
++    fi; \
++    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
++  done; \
++  reldir="$$dir2"
++ACLOCAL = @ACLOCAL@
++ALIEN = @ALIEN@
++ALIEN_VERSION = @ALIEN_VERSION@
++AMTAR = @AMTAR@
++AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
++AR = @AR@
++AUTOCONF = @AUTOCONF@
++AUTOHEADER = @AUTOHEADER@
++AUTOMAKE = @AUTOMAKE@
++AWK = @AWK@
++CC = @CC@
++CCAS = @CCAS@
++CCASDEPMODE = @CCASDEPMODE@
++CCASFLAGS = @CCASFLAGS@
++CCDEPMODE = @CCDEPMODE@
++CFLAGS = @CFLAGS@
++CPP = @CPP@
++CPPFLAGS = @CPPFLAGS@
++CYGPATH_W = @CYGPATH_W@
++DEBUG_CFLAGS = @DEBUG_CFLAGS@
++DEBUG_DMU_TX = @DEBUG_DMU_TX@
++DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
++DEBUG_ZFS = @DEBUG_ZFS@
++DEFAULT_INIT_DIR = @DEFAULT_INIT_DIR@
++DEFAULT_INIT_SCRIPT = @DEFAULT_INIT_SCRIPT@
++DEFAULT_PACKAGE = @DEFAULT_PACKAGE@
++DEFS = @DEFS@
++DEPDIR = @DEPDIR@
++DLLTOOL = @DLLTOOL@
++DPKG = @DPKG@
++DPKGBUILD = @DPKGBUILD@
++DPKGBUILD_VERSION = @DPKGBUILD_VERSION@
++DPKG_VERSION = @DPKG_VERSION@
++DSYMUTIL = @DSYMUTIL@
++DUMPBIN = @DUMPBIN@
++ECHO_C = @ECHO_C@
++ECHO_N = @ECHO_N@
++ECHO_T = @ECHO_T@
++EGREP = @EGREP@
++EXEEXT = @EXEEXT@
++FGREP = @FGREP@
++FRAME_LARGER_THAN = @FRAME_LARGER_THAN@
++GREP = @GREP@
++HAVE_ALIEN = @HAVE_ALIEN@
++HAVE_DPKG = @HAVE_DPKG@
++HAVE_DPKGBUILD = @HAVE_DPKGBUILD@
++HAVE_MAKEPKG = @HAVE_MAKEPKG@
++HAVE_PACMAN = @HAVE_PACMAN@
++HAVE_RPM = @HAVE_RPM@
++HAVE_RPMBUILD = @HAVE_RPMBUILD@
++INSTALL = @INSTALL@
++INSTALL_DATA = @INSTALL_DATA@
++INSTALL_PROGRAM = @INSTALL_PROGRAM@
++INSTALL_SCRIPT = @INSTALL_SCRIPT@
++INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
++KERNELCPPFLAGS = @KERNELCPPFLAGS@
++KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
++LD = @LD@
++LDFLAGS = @LDFLAGS@
++LIBBLKID = @LIBBLKID@
++LIBOBJS = @LIBOBJS@
++LIBS = @LIBS@
++LIBSELINUX = @LIBSELINUX@
++LIBTOOL = @LIBTOOL@
++LIBUUID = @LIBUUID@
++LINUX = @LINUX@
++LINUX_OBJ = @LINUX_OBJ@
++LINUX_SYMBOLS = @LINUX_SYMBOLS@
++LINUX_VERSION = @LINUX_VERSION@
++LIPO = @LIPO@
++LN_S = @LN_S@
++LTLIBOBJS = @LTLIBOBJS@
++MAINT = @MAINT@
++MAKEINFO = @MAKEINFO@
++MAKEPKG = @MAKEPKG@
++MAKEPKG_VERSION = @MAKEPKG_VERSION@
++MANIFEST_TOOL = @MANIFEST_TOOL@
++MKDIR_P = @MKDIR_P@
++NM = @NM@
++NMEDIT = @NMEDIT@
++NO_UNUSED_BUT_SET_VARIABLE = @NO_UNUSED_BUT_SET_VARIABLE@
++OBJDUMP = @OBJDUMP@
++OBJEXT = @OBJEXT@
++OTOOL = @OTOOL@
++OTOOL64 = @OTOOL64@
++PACKAGE = @PACKAGE@
++PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
++PACKAGE_NAME = @PACKAGE_NAME@
++PACKAGE_STRING = @PACKAGE_STRING@
++PACKAGE_TARNAME = @PACKAGE_TARNAME@
++PACKAGE_URL = @PACKAGE_URL@
++PACKAGE_VERSION = @PACKAGE_VERSION@
++PACMAN = @PACMAN@
++PACMAN_VERSION = @PACMAN_VERSION@
++PATH_SEPARATOR = @PATH_SEPARATOR@
++RANLIB = @RANLIB@
++RPM = @RPM@
++RPMBUILD = @RPMBUILD@
++RPMBUILD_VERSION = @RPMBUILD_VERSION@
++RPM_VERSION = @RPM_VERSION@
++SED = @SED@
++SET_MAKE = @SET_MAKE@
++SHELL = @SHELL@
++SPL = @SPL@
++SPL_OBJ = @SPL_OBJ@
++SPL_SYMBOLS = @SPL_SYMBOLS@
++SPL_VERSION = @SPL_VERSION@
++STRIP = @STRIP@
++TARGET_ASM_DIR = @TARGET_ASM_DIR@
++VENDOR = @VENDOR@
++VERSION = @VERSION@
++ZFS_CONFIG = @ZFS_CONFIG@
++ZFS_META_ALIAS = @ZFS_META_ALIAS@
++ZFS_META_AUTHOR = @ZFS_META_AUTHOR@
++ZFS_META_DATA = @ZFS_META_DATA@
++ZFS_META_LICENSE = @ZFS_META_LICENSE@
++ZFS_META_LT_AGE = @ZFS_META_LT_AGE@
++ZFS_META_LT_CURRENT = @ZFS_META_LT_CURRENT@
++ZFS_META_LT_REVISION = @ZFS_META_LT_REVISION@
++ZFS_META_NAME = @ZFS_META_NAME@
++ZFS_META_RELEASE = @ZFS_META_RELEASE@
++ZFS_META_VERSION = @ZFS_META_VERSION@
++ZLIB = @ZLIB@
++abs_builddir = @abs_builddir@
++abs_srcdir = @abs_srcdir@
++abs_top_builddir = @abs_top_builddir@
++abs_top_srcdir = @abs_top_srcdir@
++ac_ct_AR = @ac_ct_AR@
++ac_ct_CC = @ac_ct_CC@
++ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
++am__include = @am__include@
++am__leading_dot = @am__leading_dot@
++am__quote = @am__quote@
++am__tar = @am__tar@
++am__untar = @am__untar@
++bindir = @bindir@
++build = @build@
++build_alias = @build_alias@
++build_cpu = @build_cpu@
++build_os = @build_os@
++build_vendor = @build_vendor@
++builddir = @builddir@
++datadir = @datadir@
++datarootdir = @datarootdir@
++docdir = @docdir@
++dvidir = @dvidir@
++exec_prefix = @exec_prefix@
++host = @host@
++host_alias = @host_alias@
++host_cpu = @host_cpu@
++host_os = @host_os@
++host_vendor = @host_vendor@
++htmldir = @htmldir@
++includedir = @includedir@
++infodir = @infodir@
++install_sh = @install_sh@
++libdir = @libdir@
++libexecdir = @libexecdir@
++localedir = @localedir@
++localstatedir = @localstatedir@
++mandir = @mandir@
++mkdir_p = @mkdir_p@
++oldincludedir = @oldincludedir@
++pdfdir = @pdfdir@
++prefix = @prefix@
++program_transform_name = @program_transform_name@
++psdir = @psdir@
++sbindir = @sbindir@
++sharedstatedir = @sharedstatedir@
++srcdir = @srcdir@
++sysconfdir = @sysconfdir@
++target = @target@
++target_alias = @target_alias@
++target_cpu = @target_cpu@
++target_os = @target_os@
++target_vendor = @target_vendor@
++top_build_prefix = @top_build_prefix@
++top_builddir = @top_builddir@
++top_srcdir = @top_srcdir@
++udevdir = @udevdir@
++udevruledir = @udevruledir@
++SUBDIRS = fm fs
++COMMON_H = \
++	$(top_srcdir)/include/sys/arc.h \
++	$(top_srcdir)/include/sys/avl.h \
++	$(top_srcdir)/include/sys/avl_impl.h \
++	$(top_srcdir)/include/sys/bplist.h \
++	$(top_srcdir)/include/sys/bpobj.h \
++	$(top_srcdir)/include/sys/dbuf.h \
++	$(top_srcdir)/include/sys/ddt.h \
++	$(top_srcdir)/include/sys/dmu.h \
++	$(top_srcdir)/include/sys/dmu_impl.h \
++	$(top_srcdir)/include/sys/dmu_objset.h \
++	$(top_srcdir)/include/sys/dmu_traverse.h \
++	$(top_srcdir)/include/sys/dmu_tx.h \
++	$(top_srcdir)/include/sys/dmu_zfetch.h \
++	$(top_srcdir)/include/sys/dnode.h \
++	$(top_srcdir)/include/sys/dsl_dataset.h \
++	$(top_srcdir)/include/sys/dsl_deadlist.h \
++	$(top_srcdir)/include/sys/dsl_deleg.h \
++	$(top_srcdir)/include/sys/dsl_dir.h \
++	$(top_srcdir)/include/sys/dsl_pool.h \
++	$(top_srcdir)/include/sys/dsl_prop.h \
++	$(top_srcdir)/include/sys/dsl_scan.h \
++	$(top_srcdir)/include/sys/dsl_synctask.h \
++	$(top_srcdir)/include/sys/efi_partition.h \
++	$(top_srcdir)/include/sys/metaslab.h \
++	$(top_srcdir)/include/sys/metaslab_impl.h \
++	$(top_srcdir)/include/sys/nvpair.h \
++	$(top_srcdir)/include/sys/nvpair_impl.h \
++	$(top_srcdir)/include/sys/refcount.h \
++	$(top_srcdir)/include/sys/rrwlock.h \
++	$(top_srcdir)/include/sys/sa.h \
++	$(top_srcdir)/include/sys/sa_impl.h \
++	$(top_srcdir)/include/sys/spa_boot.h \
++	$(top_srcdir)/include/sys/space_map.h \
++	$(top_srcdir)/include/sys/spa.h \
++	$(top_srcdir)/include/sys/spa_impl.h \
++	$(top_srcdir)/include/sys/txg.h \
++	$(top_srcdir)/include/sys/txg_impl.h \
++	$(top_srcdir)/include/sys/u8_textprep_data.h \
++	$(top_srcdir)/include/sys/u8_textprep.h \
++	$(top_srcdir)/include/sys/uberblock.h \
++	$(top_srcdir)/include/sys/uberblock_impl.h \
++	$(top_srcdir)/include/sys/uio_impl.h \
++	$(top_srcdir)/include/sys/unique.h \
++	$(top_srcdir)/include/sys/uuid.h \
++	$(top_srcdir)/include/sys/vdev_disk.h \
++	$(top_srcdir)/include/sys/vdev_file.h \
++	$(top_srcdir)/include/sys/vdev.h \
++	$(top_srcdir)/include/sys/vdev_impl.h \
++	$(top_srcdir)/include/sys/xvattr.h \
++	$(top_srcdir)/include/sys/zap.h \
++	$(top_srcdir)/include/sys/zap_impl.h \
++	$(top_srcdir)/include/sys/zap_leaf.h \
++	$(top_srcdir)/include/sys/zfs_acl.h \
++	$(top_srcdir)/include/sys/zfs_context.h \
++	$(top_srcdir)/include/sys/zfs_ctldir.h \
++	$(top_srcdir)/include/sys/zfs_debug.h \
++	$(top_srcdir)/include/sys/zfs_dir.h \
++	$(top_srcdir)/include/sys/zfs_fuid.h \
++	$(top_srcdir)/include/sys/zfs_rlock.h \
++	$(top_srcdir)/include/sys/zfs_sa.h \
++	$(top_srcdir)/include/sys/zfs_stat.h \
++	$(top_srcdir)/include/sys/zfs_vfsops.h \
++	$(top_srcdir)/include/sys/zfs_znode.h \
++	$(top_srcdir)/include/sys/zfs_vnops.h \
++	$(top_srcdir)/include/sys/zil.h \
++	$(top_srcdir)/include/sys/zil_impl.h \
++	$(top_srcdir)/include/sys/zio_checksum.h \
++	$(top_srcdir)/include/sys/zio_compress.h \
++	$(top_srcdir)/include/sys/zio.h \
++	$(top_srcdir)/include/sys/zio_impl.h \
++	$(top_srcdir)/include/sys/zrlock.h
++
++KERNEL_H = \
++	$(top_srcdir)/include/sys/zfs_ioctl.h \
++	$(top_srcdir)/include/sys/zfs_onexit.h \
++	${top_srcdir}/include/sys/zpl.h \
++	$(top_srcdir)/include/sys/zvol.h
++
++USER_H = 
++EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
++@CONFIG_USER_TRUE@libzfsdir = $(includedir)/libzfs/sys
++@CONFIG_USER_TRUE@libzfs_HEADERS = $(COMMON_H) $(USER_H)
++@CONFIG_KERNEL_TRUE@kerneldir = /usr/src/zfs-$(ZFS_META_VERSION)-$(ZFS_META_RELEASE)/$(LINUX_VERSION)/sys
++@CONFIG_KERNEL_TRUE@kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
++all: all-recursive
++
++.SUFFIXES:
++$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
++	@for dep in $?; do \
++	  case '$(am__configure_deps)' in \
++	    *$$dep*) \
++	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
++	        && { if test -f $@; then exit 0; else break; fi; }; \
++	      exit 1;; \
++	  esac; \
++	done; \
++	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/sys/Makefile'; \
++	$(am__cd) $(top_srcdir) && \
++	  $(AUTOMAKE) --gnu include/sys/Makefile
++.PRECIOUS: Makefile
++Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
++	@case '$?' in \
++	  *config.status*) \
++	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
++	  *) \
++	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
++	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
++	esac;
++
++$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++
++$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
++	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
++$(am__aclocal_m4_deps):
++
++mostlyclean-libtool:
++	-rm -f *.lo
++
++clean-libtool:
++	-rm -rf .libs _libs
++install-kernelHEADERS: $(kernel_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(kerneldir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(kerneldir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(kerneldir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(kerneldir)" || exit $$?; \
++	done
++
++uninstall-kernelHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(kerneldir)'; $(am__uninstall_files_from_dir)
++install-libzfsHEADERS: $(libzfs_HEADERS)
++	@$(NORMAL_INSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	if test -n "$$list"; then \
++	  echo " $(MKDIR_P) '$(DESTDIR)$(libzfsdir)'"; \
++	  $(MKDIR_P) "$(DESTDIR)$(libzfsdir)" || exit 1; \
++	fi; \
++	for p in $$list; do \
++	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
++	  echo "$$d$$p"; \
++	done | $(am__base_list) | \
++	while read files; do \
++	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libzfsdir)'"; \
++	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(libzfsdir)" || exit $$?; \
++	done
++
++uninstall-libzfsHEADERS:
++	@$(NORMAL_UNINSTALL)
++	@list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
++	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
++	dir='$(DESTDIR)$(libzfsdir)'; $(am__uninstall_files_from_dir)
++
++# This directory's subdirectories are mostly independent; you can cd
++# into them and run `make' without going through this Makefile.
++# To change the values of `make' variables: instead of editing Makefiles,
++# (1) if the variable is set in `config.status', edit `config.status'
++#     (which will cause the Makefiles to be regenerated when you run `make');
++# (2) otherwise, pass the desired values on the `make' command line.
++$(RECURSIVE_TARGETS):
++	@fail= failcom='exit 1'; \
++	for f in x $$MAKEFLAGS; do \
++	  case $$f in \
++	    *=* | --[!k]*);; \
++	    *k*) failcom='fail=yes';; \
++	  esac; \
++	done; \
++	dot_seen=no; \
++	target=`echo $@ | sed s/-recursive//`; \
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  echo "Making $$target in $$subdir"; \
++	  if test "$$subdir" = "."; then \
++	    dot_seen=yes; \
++	    local_target="$$target-am"; \
++	  else \
++	    local_target="$$target"; \
++	  fi; \
++	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
++	  || eval $$failcom; \
++	done; \
++	if test "$$dot_seen" = "no"; then \
++	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
++	fi; test -z "$$fail"
++
++$(RECURSIVE_CLEAN_TARGETS):
++	@fail= failcom='exit 1'; \
++	for f in x $$MAKEFLAGS; do \
++	  case $$f in \
++	    *=* | --[!k]*);; \
++	    *k*) failcom='fail=yes';; \
++	  esac; \
++	done; \
++	dot_seen=no; \
++	case "$@" in \
++	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
++	  *) list='$(SUBDIRS)' ;; \
++	esac; \
++	rev=''; for subdir in $$list; do \
++	  if test "$$subdir" = "."; then :; else \
++	    rev="$$subdir $$rev"; \
++	  fi; \
++	done; \
++	rev="$$rev ."; \
++	target=`echo $@ | sed s/-recursive//`; \
++	for subdir in $$rev; do \
++	  echo "Making $$target in $$subdir"; \
++	  if test "$$subdir" = "."; then \
++	    local_target="$$target-am"; \
++	  else \
++	    local_target="$$target"; \
++	  fi; \
++	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
++	  || eval $$failcom; \
++	done && test -z "$$fail"
++tags-recursive:
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
++	done
++ctags-recursive:
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
++	done
++
++ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
++	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	mkid -fID $$unique
++tags: TAGS
++
++TAGS: tags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	set x; \
++	here=`pwd`; \
++	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
++	  include_option=--etags-include; \
++	  empty_fix=.; \
++	else \
++	  include_option=--include; \
++	  empty_fix=; \
++	fi; \
++	list='$(SUBDIRS)'; for subdir in $$list; do \
++	  if test "$$subdir" = .; then :; else \
++	    test ! -f $$subdir/TAGS || \
++	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
++	  fi; \
++	done; \
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	shift; \
++	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
++	  test -n "$$unique" || unique=$$empty_fix; \
++	  if test $$# -gt 0; then \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      "$$@" $$unique; \
++	  else \
++	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
++	      $$unique; \
++	  fi; \
++	fi
++ctags: CTAGS
++CTAGS: ctags-recursive $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
++		$(TAGS_FILES) $(LISP)
++	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
++	unique=`for i in $$list; do \
++	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
++	  done | \
++	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
++	      END { if (nonempty) { for (i in files) print i; }; }'`; \
++	test -z "$(CTAGS_ARGS)$$unique" \
++	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
++	     $$unique
++
++GTAGS:
++	here=`$(am__cd) $(top_builddir) && pwd` \
++	  && $(am__cd) $(top_srcdir) \
++	  && gtags -i $(GTAGS_ARGS) "$$here"
++
++distclean-tags:
++	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
++
++distdir: $(DISTFILES)
++	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
++	list='$(DISTFILES)'; \
++	  dist_files=`for file in $$list; do echo $$file; done | \
++	  sed -e "s|^$$srcdirstrip/||;t" \
++	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
++	case $$dist_files in \
++	  */*) $(MKDIR_P) `echo "$$dist_files" | \
++			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
++			   sort -u` ;; \
++	esac; \
++	for file in $$dist_files; do \
++	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
++	  if test -d $$d/$$file; then \
++	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
++	    if test -d "$(distdir)/$$file"; then \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
++	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
++	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
++	    fi; \
++	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
++	  else \
++	    test -f "$(distdir)/$$file" \
++	    || cp -p $$d/$$file "$(distdir)/$$file" \
++	    || exit 1; \
++	  fi; \
++	done
++	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
++	  if test "$$subdir" = .; then :; else \
++	    $(am__make_dryrun) \
++	      || test -d "$(distdir)/$$subdir" \
++	      || $(MKDIR_P) "$(distdir)/$$subdir" \
++	      || exit 1; \
++	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
++	    $(am__relativize); \
++	    new_distdir=$$reldir; \
++	    dir1=$$subdir; dir2="$(top_distdir)"; \
++	    $(am__relativize); \
++	    new_top_distdir=$$reldir; \
++	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
++	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
++	    ($(am__cd) $$subdir && \
++	      $(MAKE) $(AM_MAKEFLAGS) \
++	        top_distdir="$$new_top_distdir" \
++	        distdir="$$new_distdir" \
++		am__remove_distdir=: \
++		am__skip_length_check=: \
++		am__skip_mode_fix=: \
++	        distdir) \
++	      || exit 1; \
++	  fi; \
++	done
++check-am: all-am
++check: check-recursive
++all-am: Makefile $(HEADERS)
++installdirs: installdirs-recursive
++installdirs-am:
++	for dir in "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"; do \
++	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
++	done
++install: install-recursive
++install-exec: install-exec-recursive
++install-data: install-data-recursive
++uninstall: uninstall-recursive
++
++install-am: all-am
++	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
++
++installcheck: installcheck-recursive
++install-strip:
++	if test -z '$(STRIP)'; then \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	      install; \
++	else \
++	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
++	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
++	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
++	fi
++mostlyclean-generic:
++
++clean-generic:
++
++distclean-generic:
++	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
++	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
++
++maintainer-clean-generic:
++	@echo "This command is intended for maintainers to use"
++	@echo "it deletes files that may require special tools to rebuild."
++clean: clean-recursive
++
++clean-am: clean-generic clean-libtool mostlyclean-am
++
++distclean: distclean-recursive
++	-rm -f Makefile
++distclean-am: clean-am distclean-generic distclean-tags
++
++dvi: dvi-recursive
++
++dvi-am:
++
++html: html-recursive
++
++html-am:
++
++info: info-recursive
++
++info-am:
++
++install-data-am: install-kernelHEADERS install-libzfsHEADERS
++
++install-dvi: install-dvi-recursive
++
++install-dvi-am:
++
++install-exec-am:
++
++install-html: install-html-recursive
++
++install-html-am:
++
++install-info: install-info-recursive
++
++install-info-am:
++
++install-man:
++
++install-pdf: install-pdf-recursive
++
++install-pdf-am:
++
++install-ps: install-ps-recursive
++
++install-ps-am:
++
++installcheck-am:
++
++maintainer-clean: maintainer-clean-recursive
++	-rm -f Makefile
++maintainer-clean-am: distclean-am maintainer-clean-generic
++
++mostlyclean: mostlyclean-recursive
++
++mostlyclean-am: mostlyclean-generic mostlyclean-libtool
++
++pdf: pdf-recursive
++
++pdf-am:
++
++ps: ps-recursive
++
++ps-am:
++
++uninstall-am: uninstall-kernelHEADERS uninstall-libzfsHEADERS
++
++.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
++	install-am install-strip tags-recursive
++
++.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
++	all all-am check check-am clean clean-generic clean-libtool \
++	ctags ctags-recursive distclean distclean-generic \
++	distclean-libtool distclean-tags distdir dvi dvi-am html \
++	html-am info info-am install install-am install-data \
++	install-data-am install-dvi install-dvi-am install-exec \
++	install-exec-am install-html install-html-am install-info \
++	install-info-am install-kernelHEADERS install-libzfsHEADERS \
++	install-man install-pdf install-pdf-am install-ps \
++	install-ps-am install-strip installcheck installcheck-am \
++	installdirs installdirs-am maintainer-clean \
++	maintainer-clean-generic mostlyclean mostlyclean-generic \
++	mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
++	uninstall uninstall-am uninstall-kernelHEADERS \
++	uninstall-libzfsHEADERS
++
++
++# Tell versions [3.59,3.63) of GNU make to not export all variables.
++# Otherwise a system limit (for SysV at least) may be exceeded.
++.NOEXPORT:
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/metaslab.h linux-3.2.33-go/include/zfs/sys/metaslab.h
+--- linux-3.2.33-go.orig/include/zfs/sys/metaslab.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/metaslab.h	2012-11-16 23:25:34.343039404 +0100
+@@ -0,0 +1,86 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ */
++
++#ifndef _SYS_METASLAB_H
++#define	_SYS_METASLAB_H
++
++#include <sys/spa.h>
++#include <sys/space_map.h>
++#include <sys/txg.h>
++#include <sys/zio.h>
++#include <sys/avl.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++extern space_map_ops_t *zfs_metaslab_ops;
++
++extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
++    uint64_t start, uint64_t size, uint64_t txg);
++extern void metaslab_fini(metaslab_t *msp);
++extern void metaslab_sync(metaslab_t *msp, uint64_t txg);
++extern void metaslab_sync_done(metaslab_t *msp, uint64_t txg);
++extern void metaslab_sync_reassess(metaslab_group_t *mg);
++
++#define	METASLAB_HINTBP_FAVOR	0x0
++#define	METASLAB_HINTBP_AVOID	0x1
++#define	METASLAB_GANG_HEADER	0x2
++#define	METASLAB_GANG_CHILD	0x4
++#define	METASLAB_GANG_AVOID	0x8
++#define	METASLAB_FASTWRITE	0x10
++
++extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
++    blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags);
++extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
++    boolean_t now);
++extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
++extern void metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp);
++extern void metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp);
++
++extern metaslab_class_t *metaslab_class_create(spa_t *spa,
++    space_map_ops_t *ops);
++extern void metaslab_class_destroy(metaslab_class_t *mc);
++extern int metaslab_class_validate(metaslab_class_t *mc);
++
++extern void metaslab_class_space_update(metaslab_class_t *mc,
++    int64_t alloc_delta, int64_t defer_delta,
++    int64_t space_delta, int64_t dspace_delta);
++extern uint64_t metaslab_class_get_alloc(metaslab_class_t *mc);
++extern uint64_t metaslab_class_get_space(metaslab_class_t *mc);
++extern uint64_t metaslab_class_get_dspace(metaslab_class_t *mc);
++extern uint64_t metaslab_class_get_deferred(metaslab_class_t *mc);
++
++extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc,
++    vdev_t *vd);
++extern void metaslab_group_destroy(metaslab_group_t *mg);
++extern void metaslab_group_activate(metaslab_group_t *mg);
++extern void metaslab_group_passivate(metaslab_group_t *mg);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_METASLAB_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/metaslab_impl.h linux-3.2.33-go/include/zfs/sys/metaslab_impl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/metaslab_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/metaslab_impl.h	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,92 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ */
++
++#ifndef _SYS_METASLAB_IMPL_H
++#define	_SYS_METASLAB_IMPL_H
++
++#include <sys/metaslab.h>
++#include <sys/space_map.h>
++#include <sys/vdev.h>
++#include <sys/txg.h>
++#include <sys/avl.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct metaslab_class {
++	spa_t			*mc_spa;
++	metaslab_group_t	*mc_rotor;
++	space_map_ops_t		*mc_ops;
++	uint64_t		mc_aliquot;
++	uint64_t		mc_alloc;	/* total allocated space */
++	uint64_t		mc_deferred;	/* total deferred frees */
++	uint64_t		mc_space;	/* total space (alloc + free) */
++	uint64_t		mc_dspace;	/* total deflated space */
++	kmutex_t		mc_fastwrite_lock;
++};
++
++struct metaslab_group {
++	kmutex_t		mg_lock;
++	avl_tree_t		mg_metaslab_tree;
++	uint64_t		mg_aliquot;
++	uint64_t		mg_bonus_area;
++	uint64_t		mg_alloc_failures;
++	int64_t			mg_bias;
++	int64_t			mg_activation_count;
++	metaslab_class_t	*mg_class;
++	vdev_t			*mg_vd;
++	metaslab_group_t	*mg_prev;
++	metaslab_group_t	*mg_next;
++};
++
++/*
++ * Each metaslab's free space is tracked in space map object in the MOS,
++ * which is only updated in syncing context.  Each time we sync a txg,
++ * we append the allocs and frees from that txg to the space map object.
++ * When the txg is done syncing, metaslab_sync_done() updates ms_smo
++ * to ms_smo_syncing.  Everything in ms_smo is always safe to allocate.
++ */
++struct metaslab {
++	kmutex_t	ms_lock;	/* metaslab lock		*/
++	space_map_obj_t	ms_smo;		/* synced space map object	*/
++	space_map_obj_t	ms_smo_syncing;	/* syncing space map object	*/
++	space_map_t	ms_allocmap[TXG_SIZE];  /* allocated this txg	*/
++	space_map_t	ms_freemap[TXG_SIZE];	/* freed this txg	*/
++	space_map_t	ms_defermap[TXG_DEFER_SIZE]; /* deferred frees	*/
++	space_map_t	ms_map;		/* in-core free space map	*/
++	int64_t		ms_deferspace;	/* sum of ms_defermap[] space	*/
++	uint64_t	ms_weight;	/* weight vs. others in group	*/
++	metaslab_group_t *ms_group;	/* metaslab group		*/
++	avl_node_t	ms_group_node;	/* node in metaslab group tree	*/
++	txg_node_t	ms_txg_node;	/* per-txg dirty metaslab links	*/
++};
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_METASLAB_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/nvpair.h linux-3.2.33-go/include/zfs/sys/nvpair.h
+--- linux-3.2.33-go.orig/include/zfs/sys/nvpair.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/nvpair.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,281 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_NVPAIR_H
++#define	_SYS_NVPAIR_H
++
++#include <sys/types.h>
++#include <sys/errno.h>
++#include <sys/va_list.h>
++
++#if defined(_KERNEL) && !defined(_BOOT)
++#include <sys/kmem.h>
++#endif
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++typedef enum {
++	DATA_TYPE_UNKNOWN = 0,
++	DATA_TYPE_BOOLEAN,
++	DATA_TYPE_BYTE,
++	DATA_TYPE_INT16,
++	DATA_TYPE_UINT16,
++	DATA_TYPE_INT32,
++	DATA_TYPE_UINT32,
++	DATA_TYPE_INT64,
++	DATA_TYPE_UINT64,
++	DATA_TYPE_STRING,
++	DATA_TYPE_BYTE_ARRAY,
++	DATA_TYPE_INT16_ARRAY,
++	DATA_TYPE_UINT16_ARRAY,
++	DATA_TYPE_INT32_ARRAY,
++	DATA_TYPE_UINT32_ARRAY,
++	DATA_TYPE_INT64_ARRAY,
++	DATA_TYPE_UINT64_ARRAY,
++	DATA_TYPE_STRING_ARRAY,
++	DATA_TYPE_HRTIME,
++	DATA_TYPE_NVLIST,
++	DATA_TYPE_NVLIST_ARRAY,
++	DATA_TYPE_BOOLEAN_VALUE,
++	DATA_TYPE_INT8,
++	DATA_TYPE_UINT8,
++	DATA_TYPE_BOOLEAN_ARRAY,
++	DATA_TYPE_INT8_ARRAY,
++#if !defined(_KERNEL)
++	DATA_TYPE_UINT8_ARRAY,
++	DATA_TYPE_DOUBLE
++#else
++	DATA_TYPE_UINT8_ARRAY
++#endif
++} data_type_t;
++
++typedef struct nvpair {
++	int32_t nvp_size;	/* size of this nvpair */
++	int16_t	nvp_name_sz;	/* length of name string */
++	int16_t	nvp_reserve;	/* not used */
++	int32_t	nvp_value_elem;	/* number of elements for array types */
++	data_type_t nvp_type;	/* type of value */
++	/* name string */
++	/* aligned ptr array for string arrays */
++	/* aligned array of data for value */
++} nvpair_t;
++
++/* nvlist header */
++typedef struct nvlist {
++	int32_t		nvl_version;
++	uint32_t	nvl_nvflag;	/* persistent flags */
++	uint64_t	nvl_priv;	/* ptr to private data if not packed */
++	uint32_t	nvl_flag;
++	int32_t		nvl_pad;	/* currently not used, for alignment */
++} nvlist_t;
++
++/* nvp implementation version */
++#define	NV_VERSION	0
++
++/* nvlist pack encoding */
++#define	NV_ENCODE_NATIVE	0
++#define	NV_ENCODE_XDR		1
++
++/* nvlist persistent unique name flags, stored in nvl_nvflags */
++#define	NV_UNIQUE_NAME		0x1
++#define	NV_UNIQUE_NAME_TYPE	0x2
++
++/* nvlist lookup pairs related flags */
++#define	NV_FLAG_NOENTOK		0x1
++
++/* convenience macros */
++#define	NV_ALIGN(x)		(((ulong_t)(x) + 7ul) & ~7ul)
++#define	NV_ALIGN4(x)		(((x) + 3) & ~3)
++
++#define	NVP_SIZE(nvp)		((nvp)->nvp_size)
++#define	NVP_NAME(nvp)		((char *)(nvp) + sizeof (nvpair_t))
++#define	NVP_TYPE(nvp)		((nvp)->nvp_type)
++#define	NVP_NELEM(nvp)		((nvp)->nvp_value_elem)
++#define	NVP_VALUE(nvp)		((char *)(nvp) + NV_ALIGN(sizeof (nvpair_t) \
++				+ (nvp)->nvp_name_sz))
++
++#define	NVL_VERSION(nvl)	((nvl)->nvl_version)
++#define	NVL_SIZE(nvl)		((nvl)->nvl_size)
++#define	NVL_FLAG(nvl)		((nvl)->nvl_flag)
++
++/* NV allocator framework */
++typedef struct nv_alloc_ops nv_alloc_ops_t;
++
++typedef struct nv_alloc {
++	const nv_alloc_ops_t *nva_ops;
++	void *nva_arg;
++} nv_alloc_t;
++
++struct nv_alloc_ops {
++	int (*nv_ao_init)(nv_alloc_t *, __va_list);
++	void (*nv_ao_fini)(nv_alloc_t *);
++	void *(*nv_ao_alloc)(nv_alloc_t *, size_t);
++	void (*nv_ao_free)(nv_alloc_t *, void *, size_t);
++	void (*nv_ao_reset)(nv_alloc_t *);
++};
++
++extern const nv_alloc_ops_t *nv_fixed_ops;
++extern nv_alloc_t *nv_alloc_nosleep;
++
++#if defined(_KERNEL) && !defined(_BOOT)
++extern nv_alloc_t *nv_alloc_sleep;
++#endif
++
++int nv_alloc_init(nv_alloc_t *, const nv_alloc_ops_t *, /* args */ ...);
++void nv_alloc_reset(nv_alloc_t *);
++void nv_alloc_fini(nv_alloc_t *);
++
++/* list management */
++int nvlist_alloc(nvlist_t **, uint_t, int);
++void nvlist_free(nvlist_t *);
++int nvlist_size(nvlist_t *, size_t *, int);
++int nvlist_pack(nvlist_t *, char **, size_t *, int, int);
++int nvlist_unpack(char *, size_t, nvlist_t **, int);
++int nvlist_dup(nvlist_t *, nvlist_t **, int);
++int nvlist_merge(nvlist_t *, nvlist_t *, int);
++
++uint_t nvlist_nvflag(nvlist_t *);
++
++int nvlist_xalloc(nvlist_t **, uint_t, nv_alloc_t *);
++int nvlist_xpack(nvlist_t *, char **, size_t *, int, nv_alloc_t *);
++int nvlist_xunpack(char *, size_t, nvlist_t **, nv_alloc_t *);
++int nvlist_xdup(nvlist_t *, nvlist_t **, nv_alloc_t *);
++nv_alloc_t *nvlist_lookup_nv_alloc(nvlist_t *);
++
++int nvlist_add_nvpair(nvlist_t *, nvpair_t *);
++int nvlist_add_boolean(nvlist_t *, const char *);
++int nvlist_add_boolean_value(nvlist_t *, const char *, boolean_t);
++int nvlist_add_byte(nvlist_t *, const char *, uchar_t);
++int nvlist_add_int8(nvlist_t *, const char *, int8_t);
++int nvlist_add_uint8(nvlist_t *, const char *, uint8_t);
++int nvlist_add_int16(nvlist_t *, const char *, int16_t);
++int nvlist_add_uint16(nvlist_t *, const char *, uint16_t);
++int nvlist_add_int32(nvlist_t *, const char *, int32_t);
++int nvlist_add_uint32(nvlist_t *, const char *, uint32_t);
++int nvlist_add_int64(nvlist_t *, const char *, int64_t);
++int nvlist_add_uint64(nvlist_t *, const char *, uint64_t);
++int nvlist_add_string(nvlist_t *, const char *, const char *);
++int nvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *);
++int nvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint_t);
++int nvlist_add_byte_array(nvlist_t *, const char *, uchar_t *, uint_t);
++int nvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint_t);
++int nvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint_t);
++int nvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint_t);
++int nvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint_t);
++int nvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint_t);
++int nvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint_t);
++int nvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint_t);
++int nvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint_t);
++int nvlist_add_string_array(nvlist_t *, const char *, char *const *, uint_t);
++int nvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint_t);
++int nvlist_add_hrtime(nvlist_t *, const char *, hrtime_t);
++#if !defined(_KERNEL)
++int nvlist_add_double(nvlist_t *, const char *, double);
++#endif
++
++int nvlist_remove(nvlist_t *, const char *, data_type_t);
++int nvlist_remove_all(nvlist_t *, const char *);
++int nvlist_remove_nvpair(nvlist_t *, nvpair_t *);
++
++int nvlist_lookup_boolean(nvlist_t *, const char *);
++int nvlist_lookup_boolean_value(nvlist_t *, const char *, boolean_t *);
++int nvlist_lookup_byte(nvlist_t *, const char *, uchar_t *);
++int nvlist_lookup_int8(nvlist_t *, const char *, int8_t *);
++int nvlist_lookup_uint8(nvlist_t *, const char *, uint8_t *);
++int nvlist_lookup_int16(nvlist_t *, const char *, int16_t *);
++int nvlist_lookup_uint16(nvlist_t *, const char *, uint16_t *);
++int nvlist_lookup_int32(nvlist_t *, const char *, int32_t *);
++int nvlist_lookup_uint32(nvlist_t *, const char *, uint32_t *);
++int nvlist_lookup_int64(nvlist_t *, const char *, int64_t *);
++int nvlist_lookup_uint64(nvlist_t *, const char *, uint64_t *);
++int nvlist_lookup_string(nvlist_t *, const char *, char **);
++int nvlist_lookup_nvlist(nvlist_t *, const char *, nvlist_t **);
++int nvlist_lookup_boolean_array(nvlist_t *, const char *,
++    boolean_t **, uint_t *);
++int nvlist_lookup_byte_array(nvlist_t *, const char *, uchar_t **, uint_t *);
++int nvlist_lookup_int8_array(nvlist_t *, const char *, int8_t **, uint_t *);
++int nvlist_lookup_uint8_array(nvlist_t *, const char *, uint8_t **, uint_t *);
++int nvlist_lookup_int16_array(nvlist_t *, const char *, int16_t **, uint_t *);
++int nvlist_lookup_uint16_array(nvlist_t *, const char *, uint16_t **, uint_t *);
++int nvlist_lookup_int32_array(nvlist_t *, const char *, int32_t **, uint_t *);
++int nvlist_lookup_uint32_array(nvlist_t *, const char *, uint32_t **, uint_t *);
++int nvlist_lookup_int64_array(nvlist_t *, const char *, int64_t **, uint_t *);
++int nvlist_lookup_uint64_array(nvlist_t *, const char *, uint64_t **, uint_t *);
++int nvlist_lookup_string_array(nvlist_t *, const char *, char ***, uint_t *);
++int nvlist_lookup_nvlist_array(nvlist_t *, const char *,
++    nvlist_t ***, uint_t *);
++int nvlist_lookup_hrtime(nvlist_t *, const char *, hrtime_t *);
++int nvlist_lookup_pairs(nvlist_t *, int, ...);
++#if !defined(_KERNEL)
++int nvlist_lookup_double(nvlist_t *, const char *, double *);
++#endif
++
++int nvlist_lookup_nvpair(nvlist_t *, const char *, nvpair_t **);
++int nvlist_lookup_nvpair_embedded_index(nvlist_t *, const char *, nvpair_t **,
++    int *, char **);
++boolean_t nvlist_exists(nvlist_t *, const char *);
++boolean_t nvlist_empty(nvlist_t *);
++
++/* processing nvpair */
++nvpair_t *nvlist_next_nvpair(nvlist_t *, nvpair_t *);
++nvpair_t *nvlist_prev_nvpair(nvlist_t *, nvpair_t *);
++char *nvpair_name(nvpair_t *);
++data_type_t nvpair_type(nvpair_t *);
++int nvpair_type_is_array(nvpair_t *);
++int nvpair_value_boolean_value(nvpair_t *, boolean_t *);
++int nvpair_value_byte(nvpair_t *, uchar_t *);
++int nvpair_value_int8(nvpair_t *, int8_t *);
++int nvpair_value_uint8(nvpair_t *, uint8_t *);
++int nvpair_value_int16(nvpair_t *, int16_t *);
++int nvpair_value_uint16(nvpair_t *, uint16_t *);
++int nvpair_value_int32(nvpair_t *, int32_t *);
++int nvpair_value_uint32(nvpair_t *, uint32_t *);
++int nvpair_value_int64(nvpair_t *, int64_t *);
++int nvpair_value_uint64(nvpair_t *, uint64_t *);
++int nvpair_value_string(nvpair_t *, char **);
++int nvpair_value_nvlist(nvpair_t *, nvlist_t **);
++int nvpair_value_boolean_array(nvpair_t *, boolean_t **, uint_t *);
++int nvpair_value_byte_array(nvpair_t *, uchar_t **, uint_t *);
++int nvpair_value_int8_array(nvpair_t *, int8_t **, uint_t *);
++int nvpair_value_uint8_array(nvpair_t *, uint8_t **, uint_t *);
++int nvpair_value_int16_array(nvpair_t *, int16_t **, uint_t *);
++int nvpair_value_uint16_array(nvpair_t *, uint16_t **, uint_t *);
++int nvpair_value_int32_array(nvpair_t *, int32_t **, uint_t *);
++int nvpair_value_uint32_array(nvpair_t *, uint32_t **, uint_t *);
++int nvpair_value_int64_array(nvpair_t *, int64_t **, uint_t *);
++int nvpair_value_uint64_array(nvpair_t *, uint64_t **, uint_t *);
++int nvpair_value_string_array(nvpair_t *, char ***, uint_t *);
++int nvpair_value_nvlist_array(nvpair_t *, nvlist_t ***, uint_t *);
++int nvpair_value_hrtime(nvpair_t *, hrtime_t *);
++#if !defined(_KERNEL)
++int nvpair_value_double(nvpair_t *, double *);
++#endif
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_NVPAIR_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/nvpair_impl.h linux-3.2.33-go/include/zfs/sys/nvpair_impl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/nvpair_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/nvpair_impl.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,73 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License, Version 1.0 only
++ * (the "License").  You may not use this file except in compliance
++ * with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_NVPAIR_IMPL_H
++#define	_NVPAIR_IMPL_H
++
++
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#include <sys/nvpair.h>
++
++/*
++ * The structures here provided for information and debugging purposes only
++ * may be changed in the future.
++ */
++
++/*
++ * implementation linked list for pre-packed data
++ */
++typedef struct i_nvp i_nvp_t;
++
++struct i_nvp {
++	union {
++		uint64_t	_nvi_align;	/* ensure alignment */
++		struct {
++			i_nvp_t	*_nvi_next;	/* pointer to next nvpair */
++			i_nvp_t	*_nvi_prev;	/* pointer to prev nvpair */
++		} _nvi;
++	} _nvi_un;
++	nvpair_t nvi_nvp;			/* nvpair */
++};
++#define	nvi_next	_nvi_un._nvi._nvi_next
++#define	nvi_prev	_nvi_un._nvi._nvi_prev
++
++typedef struct {
++	i_nvp_t		*nvp_list;	/* linked list of nvpairs */
++	i_nvp_t		*nvp_last;	/* last nvpair */
++	i_nvp_t		*nvp_curr;	/* current walker nvpair */
++	nv_alloc_t	*nvp_nva;	/* pluggable allocator */
++	uint32_t	nvp_stat;	/* internal state */
++} nvpriv_t;
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _NVPAIR_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/refcount.h linux-3.2.33-go/include/zfs/sys/refcount.h
+--- linux-3.2.33-go.orig/include/zfs/sys/refcount.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/refcount.h	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,107 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_REFCOUNT_H
++#define	_SYS_REFCOUNT_H
++
++#include <sys/inttypes.h>
++#include <sys/list.h>
++#include <sys/zfs_context.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * If the reference is held only by the calling function and not any
++ * particular object, use FTAG (which is a string) for the holder_tag.
++ * Otherwise, use the object that holds the reference.
++ */
++#define	FTAG ((char *)__func__)
++
++#ifdef	ZFS_DEBUG
++typedef struct reference {
++	list_node_t ref_link;
++	void *ref_holder;
++	uint64_t ref_number;
++	uint8_t *ref_removed;
++} reference_t;
++
++typedef struct refcount {
++	kmutex_t rc_mtx;
++	list_t rc_list;
++	list_t rc_removed;
++	int64_t rc_count;
++	int64_t rc_removed_count;
++} refcount_t;
++
++/* Note: refcount_t must be initialized with refcount_create() */
++
++void refcount_create(refcount_t *rc);
++void refcount_destroy(refcount_t *rc);
++void refcount_destroy_many(refcount_t *rc, uint64_t number);
++int refcount_is_zero(refcount_t *rc);
++int64_t refcount_count(refcount_t *rc);
++int64_t refcount_add(refcount_t *rc, void *holder_tag);
++int64_t refcount_remove(refcount_t *rc, void *holder_tag);
++int64_t refcount_add_many(refcount_t *rc, uint64_t number, void *holder_tag);
++int64_t refcount_remove_many(refcount_t *rc, uint64_t number, void *holder_tag);
++void refcount_transfer(refcount_t *dst, refcount_t *src);
++
++void refcount_init(void);
++void refcount_fini(void);
++
++#else	/* ZFS_DEBUG */
++
++typedef struct refcount {
++	uint64_t rc_count;
++} refcount_t;
++
++#define	refcount_create(rc) ((rc)->rc_count = 0)
++#define	refcount_destroy(rc) ((rc)->rc_count = 0)
++#define	refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
++#define	refcount_is_zero(rc) ((rc)->rc_count == 0)
++#define	refcount_count(rc) ((rc)->rc_count)
++#define	refcount_add(rc, holder) atomic_add_64_nv(&(rc)->rc_count, 1)
++#define	refcount_remove(rc, holder) atomic_add_64_nv(&(rc)->rc_count, -1)
++#define	refcount_add_many(rc, number, holder) \
++	atomic_add_64_nv(&(rc)->rc_count, number)
++#define	refcount_remove_many(rc, number, holder) \
++	atomic_add_64_nv(&(rc)->rc_count, -number)
++#define	refcount_transfer(dst, src) { \
++	uint64_t __tmp = (src)->rc_count; \
++	atomic_add_64(&(src)->rc_count, -__tmp); \
++	atomic_add_64(&(dst)->rc_count, __tmp); \
++}
++
++#define	refcount_init()
++#define	refcount_fini()
++
++#endif	/* ZFS_DEBUG */
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_REFCOUNT_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/rrwlock.h linux-3.2.33-go/include/zfs/sys/rrwlock.h
+--- linux-3.2.33-go.orig/include/zfs/sys/rrwlock.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/rrwlock.h	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,80 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_SYS_RR_RW_LOCK_H
++#define	_SYS_RR_RW_LOCK_H
++
++
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++#include <sys/inttypes.h>
++#include <sys/zfs_context.h>
++#include <sys/refcount.h>
++
++/*
++ * A reader-writer lock implementation that allows re-entrant reads, but
++ * still gives writers priority on "new" reads.
++ *
++ * See rrwlock.c for more details about the implementation.
++ *
++ * Fields of the rrwlock_t structure:
++ * - rr_lock: protects modification and reading of rrwlock_t fields
++ * - rr_cv: cv for waking up readers or waiting writers
++ * - rr_writer: thread id of the current writer
++ * - rr_anon_rount: number of active anonymous readers
++ * - rr_linked_rcount: total number of non-anonymous active readers
++ * - rr_writer_wanted: a writer wants the lock
++ */
++typedef struct rrwlock {
++	kmutex_t	rr_lock;
++	kcondvar_t	rr_cv;
++	kthread_t	*rr_writer;
++	refcount_t	rr_anon_rcount;
++	refcount_t	rr_linked_rcount;
++	boolean_t	rr_writer_wanted;
++} rrwlock_t;
++
++/*
++ * 'tag' is used in reference counting tracking.  The
++ * 'tag' must be the same in a rrw_enter() as in its
++ * corresponding rrw_exit().
++ */
++void rrw_init(rrwlock_t *rrl);
++void rrw_destroy(rrwlock_t *rrl);
++void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);
++void rrw_exit(rrwlock_t *rrl, void *tag);
++boolean_t rrw_held(rrwlock_t *rrl, krw_t rw);
++
++#define	RRW_READ_HELD(x)	rrw_held(x, RW_READER)
++#define	RRW_WRITE_HELD(x)	rrw_held(x, RW_WRITER)
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_RR_RW_LOCK_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/sa.h linux-3.2.33-go/include/zfs/sys/sa.h
+--- linux-3.2.33-go.orig/include/zfs/sys/sa.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/sa.h	2012-11-16 23:25:34.344039393 +0100
+@@ -0,0 +1,173 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_SA_H
++#define	_SYS_SA_H
++
++#include <sys/dmu.h>
++
++/*
++ * Currently available byteswap functions.
++ * If it all possible new attributes should used
++ * one of the already defined byteswap functions.
++ * If a new byteswap function is added then the
++ * ZPL/Pool version will need to be bumped.
++ */
++
++typedef enum sa_bswap_type {
++	SA_UINT64_ARRAY,
++	SA_UINT32_ARRAY,
++	SA_UINT16_ARRAY,
++	SA_UINT8_ARRAY,
++	SA_ACL,
++} sa_bswap_type_t;
++
++typedef uint16_t	sa_attr_type_t;
++
++/*
++ * Attribute to register support for.
++ */
++typedef struct sa_attr_reg {
++	char 			*sa_name;	/* attribute name */
++	uint16_t 		sa_length;
++	sa_bswap_type_t		sa_byteswap;	/* bswap functon enum */
++	sa_attr_type_t 		sa_attr; /* filled in during registration */
++} sa_attr_reg_t;
++
++
++typedef void (sa_data_locator_t)(void **, uint32_t *, uint32_t,
++    boolean_t, void *userptr);
++
++/*
++ * array of attributes to store.
++ *
++ * This array should be treated as opaque/private data.
++ * The SA_BULK_ADD_ATTR() macro should be used for manipulating
++ * the array.
++ *
++ * When sa_replace_all_by_template() is used the attributes
++ * will be stored in the order defined in the array, except that
++ * the attributes may be split between the bonus and the spill buffer
++ *
++ */
++typedef struct sa_bulk_attr {
++	void			*sa_data;
++	sa_data_locator_t	*sa_data_func;
++	uint16_t		sa_length;
++	sa_attr_type_t		sa_attr;
++	/* the following are private to the sa framework */
++	void 			*sa_addr;
++	uint16_t		sa_buftype;
++	uint16_t		sa_size;
++} sa_bulk_attr_t;
++
++
++/*
++ * special macro for adding entries for bulk attr support
++ * bulk - sa_bulk_attr_t
++ * count - integer that will be incremented during each add
++ * attr - attribute to manipulate
++ * func - function for accessing data.
++ * data - pointer to data.
++ * len - length of data
++ */
++
++#define	SA_ADD_BULK_ATTR(b, idx, attr, func, data, len) \
++{ \
++	b[idx].sa_attr = attr;\
++	b[idx].sa_data_func = func; \
++	b[idx].sa_data = data; \
++	b[idx++].sa_length = len; \
++}
++
++typedef struct sa_os sa_os_t;
++
++typedef enum sa_handle_type {
++	SA_HDL_SHARED,
++	SA_HDL_PRIVATE
++} sa_handle_type_t;
++
++struct sa_handle;
++typedef void *sa_lookup_tab_t;
++typedef struct sa_handle sa_handle_t;
++
++typedef void (sa_update_cb_t)(sa_handle_t *, dmu_tx_t *tx);
++
++int sa_handle_get(objset_t *, uint64_t, void *userp,
++    sa_handle_type_t, sa_handle_t **);
++int sa_handle_get_from_db(objset_t *, dmu_buf_t *, void *userp,
++    sa_handle_type_t, sa_handle_t **);
++void sa_handle_destroy(sa_handle_t *);
++int sa_buf_hold(objset_t *, uint64_t, void *, dmu_buf_t **);
++void sa_buf_rele(dmu_buf_t *, void *);
++int sa_lookup(sa_handle_t *, sa_attr_type_t, void *buf, uint32_t buflen);
++int sa_update(sa_handle_t *, sa_attr_type_t, void *buf,
++    uint32_t buflen, dmu_tx_t *);
++int sa_remove(sa_handle_t *, sa_attr_type_t, dmu_tx_t *);
++int sa_bulk_lookup(sa_handle_t *, sa_bulk_attr_t *, int count);
++int sa_bulk_lookup_locked(sa_handle_t *, sa_bulk_attr_t *, int count);
++int sa_bulk_update(sa_handle_t *, sa_bulk_attr_t *, int count, dmu_tx_t *);
++int sa_size(sa_handle_t *, sa_attr_type_t, int *);
++int sa_update_from_cb(sa_handle_t *, sa_attr_type_t,
++    uint32_t buflen, sa_data_locator_t *, void *userdata, dmu_tx_t *);
++void sa_object_info(sa_handle_t *, dmu_object_info_t *);
++void sa_object_size(sa_handle_t *, uint32_t *, u_longlong_t *);
++void sa_update_user(sa_handle_t *, sa_handle_t *);
++void *sa_get_userdata(sa_handle_t *);
++void sa_set_userp(sa_handle_t *, void *);
++dmu_buf_t *sa_get_db(sa_handle_t *);
++uint64_t sa_handle_object(sa_handle_t *);
++boolean_t sa_attr_would_spill(sa_handle_t *, sa_attr_type_t, int size);
++void sa_spill_rele(sa_handle_t *);
++void sa_register_update_callback(objset_t *, sa_update_cb_t *);
++int sa_setup(objset_t *, uint64_t, sa_attr_reg_t *, int, sa_attr_type_t **);
++void sa_tear_down(objset_t *);
++int sa_replace_all_by_template(sa_handle_t *, sa_bulk_attr_t *,
++    int, dmu_tx_t *);
++int sa_replace_all_by_template_locked(sa_handle_t *, sa_bulk_attr_t *,
++    int, dmu_tx_t *);
++boolean_t sa_enabled(objset_t *);
++void sa_cache_init(void);
++void sa_cache_fini(void);
++void *sa_spill_alloc(int);
++void sa_spill_free(void *);
++int sa_set_sa_object(objset_t *, uint64_t);
++int sa_hdrsize(void *);
++void sa_handle_lock(sa_handle_t *);
++void sa_handle_unlock(sa_handle_t *);
++
++#ifdef _KERNEL
++int sa_lookup_uio(sa_handle_t *, sa_attr_type_t, uio_t *);
++#endif
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_SA_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/sa_impl.h linux-3.2.33-go/include/zfs/sys/sa_impl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/sa_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/sa_impl.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,287 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_SA_IMPL_H
++#define	_SYS_SA_IMPL_H
++
++#include <sys/dmu.h>
++#include <sys/refcount.h>
++#include <sys/list.h>
++
++/*
++ * Array of known attributes and their
++ * various characteristics.
++ */
++typedef struct sa_attr_table {
++	sa_attr_type_t	sa_attr;
++	uint8_t sa_registered;
++	uint16_t sa_length;
++	sa_bswap_type_t sa_byteswap;
++	char *sa_name;
++} sa_attr_table_t;
++
++/*
++ * Zap attribute format for attribute registration
++ *
++ * 64      56      48      40      32      24      16      8       0
++ * +-------+-------+-------+-------+-------+-------+-------+-------+
++ * |        unused         |      len      | bswap |   attr num    |
++ * +-------+-------+-------+-------+-------+-------+-------+-------+
++ *
++ * Zap attribute format for layout information.
++ *
++ * layout information is stored as an array of attribute numbers
++ * The name of the attribute is the layout number (0, 1, 2, ...)
++ *
++ * 16       0
++ * +---- ---+
++ * | attr # |
++ * +--------+
++ * | attr # |
++ * +--- ----+
++ *  ......
++ *
++ */
++
++#define	ATTR_BSWAP(x)	BF32_GET(x, 16, 8)
++#define	ATTR_LENGTH(x)	BF32_GET(x, 24, 16)
++#define	ATTR_NUM(x)	BF32_GET(x, 0, 16)
++#define	ATTR_ENCODE(x, attr, length, bswap) \
++{ \
++	BF64_SET(x, 24, 16, length); \
++	BF64_SET(x, 16, 8, bswap); \
++	BF64_SET(x, 0, 16, attr); \
++}
++
++#define	TOC_OFF(x)		BF32_GET(x, 0, 23)
++#define	TOC_ATTR_PRESENT(x)	BF32_GET(x, 31, 1)
++#define	TOC_LEN_IDX(x)		BF32_GET(x, 24, 4)
++#define	TOC_ATTR_ENCODE(x, len_idx, offset) \
++{ \
++	BF32_SET(x, 31, 1, 1); \
++	BF32_SET(x, 24, 7, len_idx); \
++	BF32_SET(x, 0, 24, offset); \
++}
++
++#define	SA_LAYOUTS	"LAYOUTS"
++#define	SA_REGISTRY	"REGISTRY"
++
++/*
++ * Each unique layout will have their own table
++ * sa_lot (layout_table)
++ */
++typedef struct sa_lot {
++	avl_node_t lot_num_node;
++	avl_node_t lot_hash_node;
++	uint64_t lot_num;
++	uint64_t lot_hash;
++	sa_attr_type_t *lot_attrs;	/* array of attr #'s */
++	uint32_t lot_var_sizes;	/* how many aren't fixed size */
++	uint32_t lot_attr_count;	/* total attr count */
++	list_t 	lot_idx_tab;	/* should be only a couple of entries */
++	int	lot_instance;	/* used with lot_hash to identify entry */
++} sa_lot_t;
++
++/* index table of offsets */
++typedef struct sa_idx_tab {
++	list_node_t	sa_next;
++	sa_lot_t	*sa_layout;
++	uint16_t	*sa_variable_lengths;
++	refcount_t	sa_refcount;
++	uint32_t	*sa_idx_tab;	/* array of offsets */
++} sa_idx_tab_t;
++
++/*
++ * Since the offset/index information into the actual data
++ * will usually be identical we can share that information with
++ * all handles that have the exact same offsets.
++ *
++ * You would typically only have a large number of different table of
++ * contents if you had a several variable sized attributes.
++ *
++ * Two AVL trees are used to track the attribute layout numbers.
++ * one is keyed by number and will be consulted when a DMU_OT_SA
++ * object is first read.  The second tree is keyed by the hash signature
++ * of the attributes and will be consulted when an attribute is added
++ * to determine if we already have an instance of that layout.  Both
++ * of these tree's are interconnected.  The only difference is that
++ * when an entry is found in the "hash" tree the list of attributes will
++ * need to be compared against the list of attributes you have in hand.
++ * The assumption is that typically attributes will just be updated and
++ * adding a completely new attribute is a very rare operation.
++ */
++struct sa_os {
++	kmutex_t 	sa_lock;
++	boolean_t	sa_need_attr_registration;
++	boolean_t	sa_force_spill;
++	uint64_t	sa_master_obj;
++	uint64_t	sa_reg_attr_obj;
++	uint64_t	sa_layout_attr_obj;
++	int		sa_num_attrs;
++	sa_attr_table_t *sa_attr_table;	 /* private attr table */
++	sa_update_cb_t	*sa_update_cb;
++	avl_tree_t	sa_layout_num_tree;  /* keyed by layout number */
++	avl_tree_t	sa_layout_hash_tree; /* keyed by layout hash value */
++	int		sa_user_table_sz;
++	sa_attr_type_t	*sa_user_table; /* user name->attr mapping table */
++};
++
++/*
++ * header for all bonus and spill buffers.
++ * The header has a fixed portion with a variable number
++ * of "lengths" depending on the number of variable sized
++ * attribues which are determined by the "layout number"
++ */
++
++#define	SA_MAGIC	0x2F505A  /* ZFS SA */
++typedef struct sa_hdr_phys {
++	uint32_t sa_magic;
++	uint16_t sa_layout_info;  /* Encoded with hdrsize and layout number */
++	uint16_t sa_lengths[1];	/* optional sizes for variable length attrs */
++	/* ... Data follows the lengths.  */
++} sa_hdr_phys_t;
++
++/*
++ * sa_hdr_phys -> sa_layout_info
++ *
++ * 16      10       0
++ * +--------+-------+
++ * | hdrsz  |layout |
++ * +--------+-------+
++ *
++ * Bits 0-10 are the layout number
++ * Bits 11-16 are the size of the header.
++ * The hdrsize is the number * 8
++ *
++ * For example.
++ * hdrsz of 1 ==> 8 byte header
++ *          2 ==> 16 byte header
++ *
++ */
++
++#define	SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10)
++#define	SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0)
++#define	SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \
++{ \
++	BF32_SET_SB(x, 10, 6, 3, 0, size); \
++	BF32_SET(x, 0, 10, num); \
++}
++
++typedef enum sa_buf_type {
++	SA_BONUS = 1,
++	SA_SPILL = 2
++} sa_buf_type_t;
++
++typedef enum sa_data_op {
++	SA_LOOKUP,
++	SA_UPDATE,
++	SA_ADD,
++	SA_REPLACE,
++	SA_REMOVE
++} sa_data_op_t;
++
++/*
++ * Opaque handle used for most sa functions
++ *
++ * This needs to be kept as small as possible.
++ */
++
++struct sa_handle {
++	kmutex_t	sa_lock;
++	dmu_buf_t	*sa_bonus;
++	dmu_buf_t	*sa_spill;
++	objset_t	*sa_os;
++	void 		*sa_userp;
++	sa_idx_tab_t	*sa_bonus_tab;	 /* idx of bonus */
++	sa_idx_tab_t	*sa_spill_tab; /* only present if spill activated */
++};
++
++#define	SA_GET_DB(hdl, type)	\
++	(dmu_buf_impl_t *)((type == SA_BONUS) ? hdl->sa_bonus : hdl->sa_spill)
++
++#define	SA_GET_HDR(hdl, type) \
++	((sa_hdr_phys_t *)((dmu_buf_impl_t *)(SA_GET_DB(hdl, \
++	type))->db.db_data))
++
++#define	SA_IDX_TAB_GET(hdl, type) \
++	(type == SA_BONUS ? hdl->sa_bonus_tab : hdl->sa_spill_tab)
++
++#define	IS_SA_BONUSTYPE(a)	\
++	((a == DMU_OT_SA) ? B_TRUE : B_FALSE)
++
++#define	SA_BONUSTYPE_FROM_DB(db) \
++	(dmu_get_bonustype((dmu_buf_t *)db))
++
++#define	SA_BLKPTR_SPACE	(DN_MAX_BONUSLEN - sizeof (blkptr_t))
++
++#define	SA_LAYOUT_NUM(x, type) \
++	((!IS_SA_BONUSTYPE(type) ? 0 : (((IS_SA_BONUSTYPE(type)) && \
++	((SA_HDR_LAYOUT_NUM(x)) == 0)) ? 1 : SA_HDR_LAYOUT_NUM(x))))
++
++
++#define	SA_REGISTERED_LEN(sa, attr) sa->sa_attr_table[attr].sa_length
++
++#define	SA_ATTR_LEN(sa, idx, attr, hdr) ((SA_REGISTERED_LEN(sa, attr) == 0) ?\
++	hdr->sa_lengths[TOC_LEN_IDX(idx->sa_idx_tab[attr])] : \
++	SA_REGISTERED_LEN(sa, attr))
++
++#define	SA_SET_HDR(hdr, num, size) \
++	{ \
++		hdr->sa_magic = SA_MAGIC; \
++		SA_HDR_LAYOUT_INFO_ENCODE(hdr->sa_layout_info, num, size); \
++	}
++
++#define	SA_ATTR_INFO(sa, idx, hdr, attr, bulk, type, hdl) \
++	{ \
++		bulk.sa_size = SA_ATTR_LEN(sa, idx, attr, hdr); \
++		bulk.sa_buftype = type; \
++		bulk.sa_addr = \
++		    (void *)((uintptr_t)TOC_OFF(idx->sa_idx_tab[attr]) + \
++		    (uintptr_t)hdr); \
++}
++
++#define	SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) \
++	(SA_HDR_SIZE(hdr) == (sizeof (sa_hdr_phys_t) + \
++	(tb->lot_var_sizes > 1 ? P2ROUNDUP((tb->lot_var_sizes - 1) * \
++	sizeof (uint16_t), 8) : 0)))
++
++int sa_add_impl(sa_handle_t *, sa_attr_type_t,
++    uint32_t, sa_data_locator_t, void *, dmu_tx_t *);
++
++void sa_register_update_callback_locked(objset_t *, sa_update_cb_t *);
++int sa_size_locked(sa_handle_t *, sa_attr_type_t, int *);
++
++void sa_default_locator(void **, uint32_t *, uint32_t, boolean_t, void *);
++int sa_attr_size(sa_os_t *, sa_idx_tab_t *, sa_attr_type_t,
++    uint16_t *, sa_hdr_phys_t *);
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_SA_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/spa_boot.h linux-3.2.33-go/include/zfs/sys/spa_boot.h
+--- linux-3.2.33-go.orig/include/zfs/sys/spa_boot.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/spa_boot.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,42 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef _SYS_SPA_BOOT_H
++#define	_SYS_SPA_BOOT_H
++
++#include <sys/nvpair.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++extern char *spa_get_bootprop(char *prop);
++extern void spa_free_bootprop(char *prop);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_SPA_BOOT_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/space_map.h linux-3.2.33-go/include/zfs/sys/space_map.h
+--- linux-3.2.33-go.orig/include/zfs/sys/space_map.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/space_map.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,179 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef _SYS_SPACE_MAP_H
++#define	_SYS_SPACE_MAP_H
++
++#include <sys/avl.h>
++#include <sys/dmu.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++typedef struct space_map_ops space_map_ops_t;
++
++typedef struct space_map {
++	avl_tree_t	sm_root;	/* AVL tree of map segments */
++	uint64_t	sm_space;	/* sum of all segments in the map */
++	uint64_t	sm_start;	/* start of map */
++	uint64_t	sm_size;	/* size of map */
++	uint8_t		sm_shift;	/* unit shift */
++	uint8_t		sm_pad[3];	/* unused */
++	uint8_t		sm_loaded;	/* map loaded? */
++	uint8_t		sm_loading;	/* map loading? */
++	kcondvar_t	sm_load_cv;	/* map load completion */
++	space_map_ops_t	*sm_ops;	/* space map block picker ops vector */
++	avl_tree_t	*sm_pp_root;	/* picker-private AVL tree */
++	void		*sm_ppd;	/* picker-private data */
++	kmutex_t	*sm_lock;	/* pointer to lock that protects map */
++} space_map_t;
++
++typedef struct space_seg {
++	avl_node_t	ss_node;	/* AVL node */
++	avl_node_t	ss_pp_node;	/* AVL picker-private node */
++	uint64_t	ss_start;	/* starting offset of this segment */
++	uint64_t	ss_end;		/* ending offset (non-inclusive) */
++} space_seg_t;
++
++typedef struct space_ref {
++	avl_node_t	sr_node;	/* AVL node */
++	uint64_t	sr_offset;	/* offset (start or end) */
++	int64_t		sr_refcnt;	/* associated reference count */
++} space_ref_t;
++
++typedef struct space_map_obj {
++	uint64_t	smo_object;	/* on-disk space map object */
++	uint64_t	smo_objsize;	/* size of the object */
++	uint64_t	smo_alloc;	/* space allocated from the map */
++} space_map_obj_t;
++
++struct space_map_ops {
++	void	(*smop_load)(space_map_t *sm);
++	void	(*smop_unload)(space_map_t *sm);
++	uint64_t (*smop_alloc)(space_map_t *sm, uint64_t size);
++	void	(*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size);
++	void	(*smop_free)(space_map_t *sm, uint64_t start, uint64_t size);
++	uint64_t (*smop_max)(space_map_t *sm);
++	boolean_t (*smop_fragmented)(space_map_t *sm);
++};
++
++/*
++ * debug entry
++ *
++ *    1      3         10                     50
++ *  ,---+--------+------------+---------------------------------.
++ *  | 1 | action |  syncpass  |        txg (lower bits)         |
++ *  `---+--------+------------+---------------------------------'
++ *   63  62    60 59        50 49                               0
++ *
++ *
++ *
++ * non-debug entry
++ *
++ *    1               47                   1           15
++ *  ,-----------------------------------------------------------.
++ *  | 0 |   offset (sm_shift units)    | type |       run       |
++ *  `-----------------------------------------------------------'
++ *   63  62                          17   16   15               0
++ */
++
++/* All this stuff takes and returns bytes */
++#define	SM_RUN_DECODE(x)	(BF64_DECODE(x, 0, 15) + 1)
++#define	SM_RUN_ENCODE(x)	BF64_ENCODE((x) - 1, 0, 15)
++#define	SM_TYPE_DECODE(x)	BF64_DECODE(x, 15, 1)
++#define	SM_TYPE_ENCODE(x)	BF64_ENCODE(x, 15, 1)
++#define	SM_OFFSET_DECODE(x)	BF64_DECODE(x, 16, 47)
++#define	SM_OFFSET_ENCODE(x)	BF64_ENCODE(x, 16, 47)
++#define	SM_DEBUG_DECODE(x)	BF64_DECODE(x, 63, 1)
++#define	SM_DEBUG_ENCODE(x)	BF64_ENCODE(x, 63, 1)
++
++#define	SM_DEBUG_ACTION_DECODE(x)	BF64_DECODE(x, 60, 3)
++#define	SM_DEBUG_ACTION_ENCODE(x)	BF64_ENCODE(x, 60, 3)
++
++#define	SM_DEBUG_SYNCPASS_DECODE(x)	BF64_DECODE(x, 50, 10)
++#define	SM_DEBUG_SYNCPASS_ENCODE(x)	BF64_ENCODE(x, 50, 10)
++
++#define	SM_DEBUG_TXG_DECODE(x)		BF64_DECODE(x, 0, 50)
++#define	SM_DEBUG_TXG_ENCODE(x)		BF64_ENCODE(x, 0, 50)
++
++#define	SM_RUN_MAX			SM_RUN_DECODE(~0ULL)
++
++#define	SM_ALLOC	0x0
++#define	SM_FREE		0x1
++
++/*
++ * The data for a given space map can be kept on blocks of any size.
++ * Larger blocks entail fewer i/o operations, but they also cause the
++ * DMU to keep more data in-core, and also to waste more i/o bandwidth
++ * when only a few blocks have changed since the last transaction group.
++ * This could use a lot more research, but for now, set the freelist
++ * block size to 4k (2^12).
++ */
++#define	SPACE_MAP_BLOCKSHIFT	12
++
++typedef void space_map_func_t(space_map_t *sm, uint64_t start, uint64_t size);
++
++extern void space_map_create(space_map_t *sm, uint64_t start, uint64_t size,
++    uint8_t shift, kmutex_t *lp);
++extern void space_map_destroy(space_map_t *sm);
++extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
++extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
++extern boolean_t space_map_contains(space_map_t *sm,
++    uint64_t start, uint64_t size);
++extern void space_map_vacate(space_map_t *sm,
++    space_map_func_t *func, space_map_t *mdest);
++extern void space_map_walk(space_map_t *sm,
++    space_map_func_t *func, space_map_t *mdest);
++
++extern void space_map_load_wait(space_map_t *sm);
++extern int space_map_load(space_map_t *sm, space_map_ops_t *ops,
++    uint8_t maptype, space_map_obj_t *smo, objset_t *os);
++extern void space_map_unload(space_map_t *sm);
++
++extern uint64_t space_map_alloc(space_map_t *sm, uint64_t size);
++extern void space_map_claim(space_map_t *sm, uint64_t start, uint64_t size);
++extern void space_map_free(space_map_t *sm, uint64_t start, uint64_t size);
++extern uint64_t space_map_maxsize(space_map_t *sm);
++
++extern void space_map_sync(space_map_t *sm, uint8_t maptype,
++    space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx);
++extern void space_map_truncate(space_map_obj_t *smo,
++    objset_t *os, dmu_tx_t *tx);
++
++extern void space_map_ref_create(avl_tree_t *t);
++extern void space_map_ref_destroy(avl_tree_t *t);
++extern void space_map_ref_add_seg(avl_tree_t *t,
++    uint64_t start, uint64_t end, int64_t refcnt);
++extern void space_map_ref_add_map(avl_tree_t *t,
++    space_map_t *sm, int64_t refcnt);
++extern void space_map_ref_generate_map(avl_tree_t *t,
++    space_map_t *sm, int64_t minref);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_SPACE_MAP_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/spa.h linux-3.2.33-go/include/zfs/sys/spa.h
+--- linux-3.2.33-go.orig/include/zfs/sys/spa.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/spa.h	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,718 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
++ */
++
++#ifndef _SYS_SPA_H
++#define	_SYS_SPA_H
++
++#include <sys/avl.h>
++#include <sys/zfs_context.h>
++#include <sys/nvpair.h>
++#include <sys/sysmacros.h>
++#include <sys/types.h>
++#include <sys/fs/zfs.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * Forward references that lots of things need.
++ */
++typedef struct spa spa_t;
++typedef struct vdev vdev_t;
++typedef struct metaslab metaslab_t;
++typedef struct metaslab_group metaslab_group_t;
++typedef struct metaslab_class metaslab_class_t;
++typedef struct zio zio_t;
++typedef struct zilog zilog_t;
++typedef struct spa_aux_vdev spa_aux_vdev_t;
++typedef struct ddt ddt_t;
++typedef struct ddt_entry ddt_entry_t;
++struct dsl_pool;
++
++/*
++ * General-purpose 32-bit and 64-bit bitfield encodings.
++ */
++#define	BF32_DECODE(x, low, len)	P2PHASE((x) >> (low), 1U << (len))
++#define	BF64_DECODE(x, low, len)	P2PHASE((x) >> (low), 1ULL << (len))
++#define	BF32_ENCODE(x, low, len)	(P2PHASE((x), 1U << (len)) << (low))
++#define	BF64_ENCODE(x, low, len)	(P2PHASE((x), 1ULL << (len)) << (low))
++
++#define	BF32_GET(x, low, len)		BF32_DECODE(x, low, len)
++#define	BF64_GET(x, low, len)		BF64_DECODE(x, low, len)
++
++#define	BF32_SET(x, low, len, val)	\
++	((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len))
++#define	BF64_SET(x, low, len, val)	\
++	((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len))
++
++#define	BF32_GET_SB(x, low, len, shift, bias)	\
++	((BF32_GET(x, low, len) + (bias)) << (shift))
++#define	BF64_GET_SB(x, low, len, shift, bias)	\
++	((BF64_GET(x, low, len) + (bias)) << (shift))
++
++#define	BF32_SET_SB(x, low, len, shift, bias, val)	\
++	BF32_SET(x, low, len, ((val) >> (shift)) - (bias))
++#define	BF64_SET_SB(x, low, len, shift, bias, val)	\
++	BF64_SET(x, low, len, ((val) >> (shift)) - (bias))
++
++/*
++ * We currently support nine block sizes, from 512 bytes to 128K.
++ * We could go higher, but the benefits are near-zero and the cost
++ * of COWing a giant block to modify one byte would become excessive.
++ */
++#define	SPA_MINBLOCKSHIFT	9
++#define	SPA_MAXBLOCKSHIFT	17
++#define	SPA_MINBLOCKSIZE	(1ULL << SPA_MINBLOCKSHIFT)
++#define	SPA_MAXBLOCKSIZE	(1ULL << SPA_MAXBLOCKSHIFT)
++
++#define	SPA_BLOCKSIZES		(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)
++
++/*
++ * Size of block to hold the configuration data (a packed nvlist)
++ */
++#define	SPA_CONFIG_BLOCKSIZE	(1 << 14)
++
++/*
++ * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
++ * The ASIZE encoding should be at least 64 times larger (6 more bits)
++ * to support up to 4-way RAID-Z mirror mode with worst-case gang block
++ * overhead, three DVAs per bp, plus one more bit in case we do anything
++ * else that expands the ASIZE.
++ */
++#define	SPA_LSIZEBITS		16	/* LSIZE up to 32M (2^16 * 512)	*/
++#define	SPA_PSIZEBITS		16	/* PSIZE up to 32M (2^16 * 512)	*/
++#define	SPA_ASIZEBITS		24	/* ASIZE up to 64 times larger	*/
++
++/*
++ * All SPA data is represented by 128-bit data virtual addresses (DVAs).
++ * The members of the dva_t should be considered opaque outside the SPA.
++ */
++typedef struct dva {
++	uint64_t	dva_word[2];
++} dva_t;
++
++/*
++ * Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
++ */
++typedef struct zio_cksum {
++	uint64_t	zc_word[4];
++} zio_cksum_t;
++
++/*
++ * Each block is described by its DVAs, time of birth, checksum, etc.
++ * The word-by-word, bit-by-bit layout of the blkptr is as follows:
++ *
++ *	64	56	48	40	32	24	16	8	0
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * 0	|		vdev1		| GRID  |	  ASIZE		|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * 1	|G|			 offset1				|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * 2	|		vdev2		| GRID  |	  ASIZE		|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * 3	|G|			 offset2				|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * 4	|		vdev3		| GRID  |	  ASIZE		|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * 5	|G|			 offset3				|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * 6	|BDX|lvl| type	| cksum | comp	|     PSIZE	|     LSIZE	|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * 7	|			padding					|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * 8	|			padding					|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * 9	|			physical birth txg			|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * a	|			logical birth txg			|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * b	|			fill count				|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * c	|			checksum[0]				|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * d	|			checksum[1]				|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * e	|			checksum[2]				|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ * f	|			checksum[3]				|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ *
++ * Legend:
++ *
++ * vdev		virtual device ID
++ * offset	offset into virtual device
++ * LSIZE	logical size
++ * PSIZE	physical size (after compression)
++ * ASIZE	allocated size (including RAID-Z parity and gang block headers)
++ * GRID		RAID-Z layout information (reserved for future use)
++ * cksum	checksum function
++ * comp		compression function
++ * G		gang block indicator
++ * B		byteorder (endianness)
++ * D		dedup
++ * X		unused
++ * lvl		level of indirection
++ * type		DMU object type
++ * phys birth	txg of block allocation; zero if same as logical birth txg
++ * log. birth	transaction group in which the block was logically born
++ * fill count	number of non-zero blocks under this bp
++ * checksum[4]	256-bit checksum of the data this bp describes
++ */
++#define	SPA_BLKPTRSHIFT	7		/* blkptr_t is 128 bytes	*/
++#define	SPA_DVAS_PER_BP	3		/* Number of DVAs in a bp	*/
++
++typedef struct blkptr {
++	dva_t		blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
++	uint64_t	blk_prop;	/* size, compression, type, etc	    */
++	uint64_t	blk_pad[2];	/* Extra space for the future	    */
++	uint64_t	blk_phys_birth;	/* txg when block was allocated	    */
++	uint64_t	blk_birth;	/* transaction group at birth	    */
++	uint64_t	blk_fill;	/* fill count			    */
++	zio_cksum_t	blk_cksum;	/* 256-bit checksum		    */
++} blkptr_t;
++
++/*
++ * Macros to get and set fields in a bp or DVA.
++ */
++#define	DVA_GET_ASIZE(dva)	\
++	BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0)
++#define	DVA_SET_ASIZE(dva, x)	\
++	BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x)
++
++#define	DVA_GET_GRID(dva)	BF64_GET((dva)->dva_word[0], 24, 8)
++#define	DVA_SET_GRID(dva, x)	BF64_SET((dva)->dva_word[0], 24, 8, x)
++
++#define	DVA_GET_VDEV(dva)	BF64_GET((dva)->dva_word[0], 32, 32)
++#define	DVA_SET_VDEV(dva, x)	BF64_SET((dva)->dva_word[0], 32, 32, x)
++
++#define	DVA_GET_OFFSET(dva)	\
++	BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0)
++#define	DVA_SET_OFFSET(dva, x)	\
++	BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x)
++
++#define	DVA_GET_GANG(dva)	BF64_GET((dva)->dva_word[1], 63, 1)
++#define	DVA_SET_GANG(dva, x)	BF64_SET((dva)->dva_word[1], 63, 1, x)
++
++#define	BP_GET_LSIZE(bp)	\
++	BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
++#define	BP_SET_LSIZE(bp, x)	\
++	BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
++
++#define	BP_GET_PSIZE(bp)	\
++	BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
++#define	BP_SET_PSIZE(bp, x)	\
++	BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
++
++#define	BP_GET_COMPRESS(bp)		BF64_GET((bp)->blk_prop, 32, 8)
++#define	BP_SET_COMPRESS(bp, x)		BF64_SET((bp)->blk_prop, 32, 8, x)
++
++#define	BP_GET_CHECKSUM(bp)		BF64_GET((bp)->blk_prop, 40, 8)
++#define	BP_SET_CHECKSUM(bp, x)		BF64_SET((bp)->blk_prop, 40, 8, x)
++
++#define	BP_GET_TYPE(bp)			BF64_GET((bp)->blk_prop, 48, 8)
++#define	BP_SET_TYPE(bp, x)		BF64_SET((bp)->blk_prop, 48, 8, x)
++
++#define	BP_GET_LEVEL(bp)		BF64_GET((bp)->blk_prop, 56, 5)
++#define	BP_SET_LEVEL(bp, x)		BF64_SET((bp)->blk_prop, 56, 5, x)
++
++#define	BP_GET_PROP_BIT_61(bp)		BF64_GET((bp)->blk_prop, 61, 1)
++#define	BP_SET_PROP_BIT_61(bp, x)	BF64_SET((bp)->blk_prop, 61, 1, x)
++
++#define	BP_GET_DEDUP(bp)		BF64_GET((bp)->blk_prop, 62, 1)
++#define	BP_SET_DEDUP(bp, x)		BF64_SET((bp)->blk_prop, 62, 1, x)
++
++#define	BP_GET_BYTEORDER(bp)		(0 - BF64_GET((bp)->blk_prop, 63, 1))
++#define	BP_SET_BYTEORDER(bp, x)		BF64_SET((bp)->blk_prop, 63, 1, x)
++
++#define	BP_PHYSICAL_BIRTH(bp)		\
++	((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
++
++#define	BP_SET_BIRTH(bp, logical, physical)	\
++{						\
++	(bp)->blk_birth = (logical);		\
++	(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
++}
++
++#define	BP_GET_ASIZE(bp)	\
++	(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
++		DVA_GET_ASIZE(&(bp)->blk_dva[2]))
++
++#define	BP_GET_UCSIZE(bp) \
++	((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
++	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
++
++#define	BP_GET_NDVAS(bp)	\
++	(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
++	!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
++	!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
++
++#define	BP_COUNT_GANG(bp)	\
++	(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
++	DVA_GET_GANG(&(bp)->blk_dva[1]) + \
++	DVA_GET_GANG(&(bp)->blk_dva[2]))
++
++#define	DVA_EQUAL(dva1, dva2)	\
++	((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
++	(dva1)->dva_word[0] == (dva2)->dva_word[0])
++
++#define	BP_EQUAL(bp1, bp2)	\
++	(BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) &&	\
++	DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) &&	\
++	DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) &&	\
++	DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2]))
++
++#define	ZIO_CHECKSUM_EQUAL(zc1, zc2) \
++	(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
++	((zc1).zc_word[1] - (zc2).zc_word[1]) | \
++	((zc1).zc_word[2] - (zc2).zc_word[2]) | \
++	((zc1).zc_word[3] - (zc2).zc_word[3])))
++
++#define	DVA_IS_VALID(dva)	(DVA_GET_ASIZE(dva) != 0)
++
++#define	ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3)	\
++{						\
++	(zcp)->zc_word[0] = w0;			\
++	(zcp)->zc_word[1] = w1;			\
++	(zcp)->zc_word[2] = w2;			\
++	(zcp)->zc_word[3] = w3;			\
++}
++
++#define	BP_IDENTITY(bp)		(&(bp)->blk_dva[0])
++#define	BP_IS_GANG(bp)		DVA_GET_GANG(BP_IDENTITY(bp))
++#define	BP_IS_HOLE(bp)		((bp)->blk_birth == 0)
++
++/* BP_IS_RAIDZ(bp) assumes no block compression */
++#define	BP_IS_RAIDZ(bp)		(DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \
++				BP_GET_PSIZE(bp))
++
++#define	BP_ZERO(bp)				\
++{						\
++	(bp)->blk_dva[0].dva_word[0] = 0;	\
++	(bp)->blk_dva[0].dva_word[1] = 0;	\
++	(bp)->blk_dva[1].dva_word[0] = 0;	\
++	(bp)->blk_dva[1].dva_word[1] = 0;	\
++	(bp)->blk_dva[2].dva_word[0] = 0;	\
++	(bp)->blk_dva[2].dva_word[1] = 0;	\
++	(bp)->blk_prop = 0;			\
++	(bp)->blk_pad[0] = 0;			\
++	(bp)->blk_pad[1] = 0;			\
++	(bp)->blk_phys_birth = 0;		\
++	(bp)->blk_birth = 0;			\
++	(bp)->blk_fill = 0;			\
++	ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0);	\
++}
++
++/*
++ * Note: the byteorder is either 0 or -1, both of which are palindromes.
++ * This simplifies the endianness handling a bit.
++ */
++#ifdef _BIG_ENDIAN
++#define	ZFS_HOST_BYTEORDER	(0ULL)
++#else
++#define	ZFS_HOST_BYTEORDER	(-1ULL)
++#endif
++
++#define	BP_SHOULD_BYTESWAP(bp)	(BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
++
++#define	BP_SPRINTF_LEN	320
++
++/*
++ * This macro allows code sharing between zfs, libzpool, and mdb.
++ * 'func' is either snprintf() or mdb_snprintf().
++ * 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
++ */
++#define	SPRINTF_BLKPTR(func, ws, buf, bp, type, checksum, compress)	\
++{									\
++	static const char *copyname[] =					\
++	    { "zero", "single", "double", "triple" };			\
++	int size = BP_SPRINTF_LEN;					\
++	int len = 0;							\
++	int copies = 0;							\
++	int d;								\
++									\
++	if (bp == NULL) {						\
++		len = func(buf + len, size - len, "<NULL>");		\
++	} else if (BP_IS_HOLE(bp)) {					\
++		len = func(buf + len, size - len, "<hole>");		\
++	} else {							\
++		for (d = 0; d < BP_GET_NDVAS(bp); d++) {		\
++			const dva_t *dva = &bp->blk_dva[d];		\
++			if (DVA_IS_VALID(dva))				\
++				copies++;				\
++			len += func(buf + len, size - len,		\
++			    "DVA[%d]=<%llu:%llx:%llx>%c", d,		\
++			    (u_longlong_t)DVA_GET_VDEV(dva),		\
++			    (u_longlong_t)DVA_GET_OFFSET(dva),		\
++			    (u_longlong_t)DVA_GET_ASIZE(dva),		\
++			    ws);					\
++		}							\
++		if (BP_IS_GANG(bp) &&					\
++		    DVA_GET_ASIZE(&bp->blk_dva[2]) <=			\
++		    DVA_GET_ASIZE(&bp->blk_dva[1]) / 2)			\
++			copies--;					\
++		len += func(buf + len, size - len,			\
++		    "[L%llu %s] %s %s %s %s %s %s%c"			\
++		    "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c"	\
++		    "cksum=%llx:%llx:%llx:%llx",			\
++		    (u_longlong_t)BP_GET_LEVEL(bp),			\
++		    type,						\
++		    checksum,						\
++		    compress,						\
++		    BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE",		\
++		    BP_IS_GANG(bp) ? "gang" : "contiguous",		\
++		    BP_GET_DEDUP(bp) ? "dedup" : "unique",		\
++		    copyname[copies],					\
++		    ws,							\
++		    (u_longlong_t)BP_GET_LSIZE(bp),			\
++		    (u_longlong_t)BP_GET_PSIZE(bp),			\
++		    (u_longlong_t)bp->blk_birth,			\
++		    (u_longlong_t)BP_PHYSICAL_BIRTH(bp),		\
++		    (u_longlong_t)bp->blk_fill,				\
++		    ws,							\
++		    (u_longlong_t)bp->blk_cksum.zc_word[0],		\
++		    (u_longlong_t)bp->blk_cksum.zc_word[1],		\
++		    (u_longlong_t)bp->blk_cksum.zc_word[2],		\
++		    (u_longlong_t)bp->blk_cksum.zc_word[3]);		\
++	}								\
++	ASSERT(len < size);						\
++}
++
++#include <sys/dmu.h>
++
++#define	BP_GET_BUFC_TYPE(bp)						\
++	(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
++	ARC_BUFC_METADATA : ARC_BUFC_DATA);
++
++typedef enum spa_import_type {
++	SPA_IMPORT_EXISTING,
++	SPA_IMPORT_ASSEMBLE
++} spa_import_type_t;
++
++/* state manipulation functions */
++extern int spa_open(const char *pool, spa_t **, void *tag);
++extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
++    nvlist_t *policy, nvlist_t **config);
++extern int spa_get_stats(const char *pool, nvlist_t **config,
++    char *altroot, size_t buflen);
++extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
++    const char *history_str, nvlist_t *zplprops);
++extern int spa_import_rootpool(char *devpath, char *devid);
++extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props,
++    uint64_t flags);
++extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
++extern int spa_destroy(char *pool);
++extern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
++    boolean_t hardforce);
++extern int spa_reset(char *pool);
++extern void spa_async_request(spa_t *spa, int flag);
++extern void spa_async_unrequest(spa_t *spa, int flag);
++extern void spa_async_suspend(spa_t *spa);
++extern void spa_async_resume(spa_t *spa);
++extern spa_t *spa_inject_addref(char *pool);
++extern void spa_inject_delref(spa_t *spa);
++extern void spa_scan_stat_init(spa_t *spa);
++extern int spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps);
++
++#define	SPA_ASYNC_CONFIG_UPDATE	0x01
++#define	SPA_ASYNC_REMOVE	0x02
++#define	SPA_ASYNC_PROBE		0x04
++#define	SPA_ASYNC_RESILVER_DONE	0x08
++#define	SPA_ASYNC_RESILVER	0x10
++#define	SPA_ASYNC_AUTOEXPAND	0x20
++#define	SPA_ASYNC_REMOVE_DONE	0x40
++#define	SPA_ASYNC_REMOVE_STOP	0x80
++
++/*
++ * Controls the behavior of spa_vdev_remove().
++ */
++#define	SPA_REMOVE_UNSPARE	0x01
++#define	SPA_REMOVE_DONE		0x02
++
++/* device manipulation */
++extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
++extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
++    int replacing);
++extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
++    int replace_done);
++extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
++extern boolean_t spa_vdev_remove_active(spa_t *spa);
++extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
++extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
++extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
++    nvlist_t *props, boolean_t exp);
++
++/* spare state (which is global across all pools) */
++extern void spa_spare_add(vdev_t *vd);
++extern void spa_spare_remove(vdev_t *vd);
++extern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt);
++extern void spa_spare_activate(vdev_t *vd);
++
++/* L2ARC state (which is global across all pools) */
++extern void spa_l2cache_add(vdev_t *vd);
++extern void spa_l2cache_remove(vdev_t *vd);
++extern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
++extern void spa_l2cache_activate(vdev_t *vd);
++extern void spa_l2cache_drop(spa_t *spa);
++
++/* scanning */
++extern int spa_scan(spa_t *spa, pool_scan_func_t func);
++extern int spa_scan_stop(spa_t *spa);
++
++/* spa syncing */
++extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
++extern void spa_sync_allpools(void);
++
++/*
++ * DEFERRED_FREE must be large enough that regular blocks are not
++ * deferred.  XXX so can't we change it back to 1?
++ */
++#define	SYNC_PASS_DEFERRED_FREE	2	/* defer frees after this pass */
++#define	SYNC_PASS_DONT_COMPRESS	4	/* don't compress after this pass */
++#define	SYNC_PASS_REWRITE	1	/* rewrite new bps after this pass */
++
++/* spa namespace global mutex */
++extern kmutex_t spa_namespace_lock;
++
++/*
++ * SPA configuration functions in spa_config.c
++ */
++
++#define	SPA_CONFIG_UPDATE_POOL	0
++#define	SPA_CONFIG_UPDATE_VDEVS	1
++
++extern void spa_config_sync(spa_t *, boolean_t, boolean_t);
++extern void spa_config_load(void);
++extern nvlist_t *spa_all_configs(uint64_t *);
++extern void spa_config_set(spa_t *spa, nvlist_t *config);
++extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
++    int getstats);
++extern void spa_config_update(spa_t *spa, int what);
++
++/*
++ * Miscellaneous SPA routines in spa_misc.c
++ */
++
++/* Namespace manipulation */
++extern spa_t *spa_lookup(const char *name);
++extern spa_t *spa_add(const char *name, nvlist_t *config, const char *altroot);
++extern void spa_remove(spa_t *spa);
++extern spa_t *spa_next(spa_t *prev);
++
++/* Refcount functions */
++extern void spa_open_ref(spa_t *spa, void *tag);
++extern void spa_close(spa_t *spa, void *tag);
++extern boolean_t spa_refcount_zero(spa_t *spa);
++
++#define	SCL_NONE	0x00
++#define	SCL_CONFIG	0x01
++#define	SCL_STATE	0x02
++#define	SCL_L2ARC	0x04		/* hack until L2ARC 2.0 */
++#define	SCL_ALLOC	0x08
++#define	SCL_ZIO		0x10
++#define	SCL_FREE	0x20
++#define	SCL_VDEV	0x40
++#define	SCL_LOCKS	7
++#define	SCL_ALL		((1 << SCL_LOCKS) - 1)
++#define	SCL_STATE_ALL	(SCL_STATE | SCL_L2ARC | SCL_ZIO)
++
++/* Pool configuration locks */
++extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
++extern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw);
++extern void spa_config_exit(spa_t *spa, int locks, void *tag);
++extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
++
++/* Pool vdev add/remove lock */
++extern uint64_t spa_vdev_enter(spa_t *spa);
++extern uint64_t spa_vdev_config_enter(spa_t *spa);
++extern void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg,
++    int error, char *tag);
++extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
++
++/* Pool vdev state change lock */
++extern void spa_vdev_state_enter(spa_t *spa, int oplock);
++extern int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error);
++
++/* Log state */
++typedef enum spa_log_state {
++	SPA_LOG_UNKNOWN = 0,	/* unknown log state */
++	SPA_LOG_MISSING,	/* missing log(s) */
++	SPA_LOG_CLEAR,		/* clear the log(s) */
++	SPA_LOG_GOOD,		/* log(s) are good */
++} spa_log_state_t;
++
++extern spa_log_state_t spa_get_log_state(spa_t *spa);
++extern void spa_set_log_state(spa_t *spa, spa_log_state_t state);
++extern int spa_offline_log(spa_t *spa);
++
++/* Log claim callback */
++extern void spa_claim_notify(zio_t *zio);
++
++/* Accessor functions */
++extern boolean_t spa_shutting_down(spa_t *spa);
++extern struct dsl_pool *spa_get_dsl(spa_t *spa);
++extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
++extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
++extern void spa_altroot(spa_t *, char *, size_t);
++extern int spa_sync_pass(spa_t *spa);
++extern char *spa_name(spa_t *spa);
++extern uint64_t spa_guid(spa_t *spa);
++extern uint64_t spa_load_guid(spa_t *spa);
++extern uint64_t spa_last_synced_txg(spa_t *spa);
++extern uint64_t spa_first_txg(spa_t *spa);
++extern uint64_t spa_syncing_txg(spa_t *spa);
++extern uint64_t spa_version(spa_t *spa);
++extern pool_state_t spa_state(spa_t *spa);
++extern spa_load_state_t spa_load_state(spa_t *spa);
++extern uint64_t spa_freeze_txg(spa_t *spa);
++extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
++extern uint64_t spa_get_dspace(spa_t *spa);
++extern void spa_update_dspace(spa_t *spa);
++extern uint64_t spa_version(spa_t *spa);
++extern boolean_t spa_deflate(spa_t *spa);
++extern metaslab_class_t *spa_normal_class(spa_t *spa);
++extern metaslab_class_t *spa_log_class(spa_t *spa);
++extern int spa_max_replication(spa_t *spa);
++extern int spa_prev_software_version(spa_t *spa);
++extern int spa_busy(void);
++extern uint8_t spa_get_failmode(spa_t *spa);
++extern boolean_t spa_suspended(spa_t *spa);
++extern uint64_t spa_bootfs(spa_t *spa);
++extern uint64_t spa_delegation(spa_t *spa);
++extern objset_t *spa_meta_objset(spa_t *spa);
++
++/* Miscellaneous support routines */
++extern int spa_rename(const char *oldname, const char *newname);
++extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid);
++extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
++extern char *spa_strdup(const char *);
++extern void spa_strfree(char *);
++extern uint64_t spa_get_random(uint64_t range);
++extern uint64_t spa_generate_guid(spa_t *spa);
++extern void sprintf_blkptr(char *buf, const blkptr_t *bp);
++extern void spa_freeze(spa_t *spa);
++extern int spa_change_guid(spa_t *spa);
++extern void spa_upgrade(spa_t *spa, uint64_t version);
++extern void spa_evict_all(void);
++extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
++    boolean_t l2cache);
++extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
++extern uint64_t dva_get_dsize_sync(spa_t *spa, const dva_t *dva);
++extern uint64_t bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp);
++extern uint64_t bp_get_dsize(spa_t *spa, const blkptr_t *bp);
++extern boolean_t spa_has_slogs(spa_t *spa);
++extern boolean_t spa_is_root(spa_t *spa);
++extern boolean_t spa_writeable(spa_t *spa);
++
++extern int spa_mode(spa_t *spa);
++extern uint64_t strtonum(const char *str, char **nptr);
++
++/* history logging */
++typedef enum history_log_type {
++	LOG_CMD_POOL_CREATE,
++	LOG_CMD_NORMAL,
++	LOG_INTERNAL
++} history_log_type_t;
++
++typedef struct history_arg {
++	char *ha_history_str;
++	history_log_type_t ha_log_type;
++	history_internal_events_t ha_event;
++	char *ha_zone;
++	uid_t ha_uid;
++} history_arg_t;
++
++extern char *spa_his_ievent_table[];
++
++extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
++extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
++    char *his_buf);
++extern int spa_history_log(spa_t *spa, const char *his_buf,
++    history_log_type_t what);
++extern void spa_history_log_internal(history_internal_events_t event,
++    spa_t *spa, dmu_tx_t *tx, const char *fmt, ...);
++extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt);
++
++/* error handling */
++struct zbookmark;
++extern void spa_log_error(spa_t *spa, zio_t *zio);
++extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
++    zio_t *zio, uint64_t stateoroffset, uint64_t length);
++extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
++extern void zfs_post_state_change(spa_t *spa, vdev_t *vd);
++extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
++extern uint64_t spa_get_errlog_size(spa_t *spa);
++extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
++extern void spa_errlog_rotate(spa_t *spa);
++extern void spa_errlog_drain(spa_t *spa);
++extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
++extern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
++
++/* vdev cache */
++extern void vdev_cache_stat_init(void);
++extern void vdev_cache_stat_fini(void);
++
++/* Initialization and termination */
++extern void spa_init(int flags);
++extern void spa_fini(void);
++extern void spa_boot_init(void);
++
++/* properties */
++extern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
++extern int spa_prop_get(spa_t *spa, nvlist_t **nvp);
++extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
++extern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t);
++
++/* asynchronous event notification */
++extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
++
++#ifdef ZFS_DEBUG
++#define	dprintf_bp(bp, fmt, ...) do {					\
++	if (zfs_flags & ZFS_DEBUG_DPRINTF) {				\
++	char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_PUSHPAGE);	\
++	sprintf_blkptr(__blkbuf, (bp));					\
++	dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf);			\
++	kmem_free(__blkbuf, BP_SPRINTF_LEN);				\
++	} \
++_NOTE(CONSTCOND) } while (0)
++#else
++#define	dprintf_bp(bp, fmt, ...)
++#endif
++
++extern boolean_t spa_debug_enabled(spa_t *spa);
++#define	spa_dbgmsg(spa, ...)			\
++{						\
++	if (spa_debug_enabled(spa))		\
++		zfs_dbgmsg(__VA_ARGS__);	\
++}
++
++extern int spa_mode_global;			/* mode, e.g. FREAD | FWRITE */
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_SPA_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/spa_impl.h linux-3.2.33-go/include/zfs/sys/spa_impl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/spa_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/spa_impl.h	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,240 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2011 by Delphix. All rights reserved.
++ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
++ */
++
++#ifndef _SYS_SPA_IMPL_H
++#define	_SYS_SPA_IMPL_H
++
++#include <sys/spa.h>
++#include <sys/vdev.h>
++#include <sys/metaslab.h>
++#include <sys/dmu.h>
++#include <sys/dsl_pool.h>
++#include <sys/uberblock_impl.h>
++#include <sys/zfs_context.h>
++#include <sys/avl.h>
++#include <sys/refcount.h>
++#include <sys/bplist.h>
++#include <sys/bpobj.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++typedef struct spa_error_entry {
++	zbookmark_t	se_bookmark;
++	char		*se_name;
++	avl_node_t	se_avl;
++} spa_error_entry_t;
++
++typedef struct spa_history_phys {
++	uint64_t sh_pool_create_len;	/* ending offset of zpool create */
++	uint64_t sh_phys_max_off;	/* physical EOF */
++	uint64_t sh_bof;		/* logical BOF */
++	uint64_t sh_eof;		/* logical EOF */
++	uint64_t sh_records_lost;	/* num of records overwritten */
++} spa_history_phys_t;
++
++struct spa_aux_vdev {
++	uint64_t	sav_object;		/* MOS object for device list */
++	nvlist_t	*sav_config;		/* cached device config */
++	vdev_t		**sav_vdevs;		/* devices */
++	int		sav_count;		/* number devices */
++	boolean_t	sav_sync;		/* sync the device list */
++	nvlist_t	**sav_pending;		/* pending device additions */
++	uint_t		sav_npending;		/* # pending devices */
++};
++
++typedef struct spa_config_lock {
++	kmutex_t	scl_lock;
++	kthread_t	*scl_writer;
++	int		scl_write_wanted;
++	kcondvar_t	scl_cv;
++	refcount_t	scl_count;
++} spa_config_lock_t;
++
++typedef struct spa_config_dirent {
++	list_node_t	scd_link;
++	char		*scd_path;
++} spa_config_dirent_t;
++
++enum zio_taskq_type {
++	ZIO_TASKQ_ISSUE = 0,
++	ZIO_TASKQ_ISSUE_HIGH,
++	ZIO_TASKQ_INTERRUPT,
++	ZIO_TASKQ_INTERRUPT_HIGH,
++	ZIO_TASKQ_TYPES
++};
++
++/*
++ * State machine for the zpool-pooname process.  The states transitions
++ * are done as follows:
++ *
++ *	From		   To			Routine
++ *	PROC_NONE	-> PROC_CREATED		spa_activate()
++ *	PROC_CREATED	-> PROC_ACTIVE		spa_thread()
++ *	PROC_ACTIVE	-> PROC_DEACTIVATE	spa_deactivate()
++ *	PROC_DEACTIVATE	-> PROC_GONE		spa_thread()
++ *	PROC_GONE	-> PROC_NONE		spa_deactivate()
++ */
++typedef enum spa_proc_state {
++	SPA_PROC_NONE,		/* spa_proc = &p0, no process created */
++	SPA_PROC_CREATED,	/* spa_activate() has proc, is waiting */
++	SPA_PROC_ACTIVE,	/* taskqs created, spa_proc set */
++	SPA_PROC_DEACTIVATE,	/* spa_deactivate() requests process exit */
++	SPA_PROC_GONE		/* spa_thread() is exiting, spa_proc = &p0 */
++} spa_proc_state_t;
++
++struct spa {
++	/*
++	 * Fields protected by spa_namespace_lock.
++	 */
++	char		spa_name[MAXNAMELEN];	/* pool name */
++	char		*spa_comment;		/* comment */
++	avl_node_t	spa_avl;		/* node in spa_namespace_avl */
++	nvlist_t	*spa_config;		/* last synced config */
++	nvlist_t	*spa_config_syncing;	/* currently syncing config */
++	nvlist_t	*spa_config_splitting;	/* config for splitting */
++	nvlist_t	*spa_load_info;		/* info and errors from load */
++	uint64_t	spa_config_txg;		/* txg of last config change */
++	int		spa_sync_pass;		/* iterate-to-convergence */
++	pool_state_t	spa_state;		/* pool state */
++	int		spa_inject_ref;		/* injection references */
++	uint8_t		spa_sync_on;		/* sync threads are running */
++	spa_load_state_t spa_load_state;	/* current load operation */
++	uint64_t	spa_import_flags;	/* import specific flags */
++	taskq_t		*spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
++	dsl_pool_t	*spa_dsl_pool;
++	metaslab_class_t *spa_normal_class;	/* normal data class */
++	metaslab_class_t *spa_log_class;	/* intent log data class */
++	uint64_t	spa_first_txg;		/* first txg after spa_open() */
++	uint64_t	spa_final_txg;		/* txg of export/destroy */
++	uint64_t	spa_freeze_txg;		/* freeze pool at this txg */
++	uint64_t	spa_load_max_txg;	/* best initial ub_txg */
++	uint64_t	spa_claim_max_txg;	/* highest claimed birth txg */
++	timespec_t	spa_loaded_ts;		/* 1st successful open time */
++	objset_t	*spa_meta_objset;	/* copy of dp->dp_meta_objset */
++	txg_list_t	spa_vdev_txg_list;	/* per-txg dirty vdev list */
++	vdev_t		*spa_root_vdev;		/* top-level vdev container */
++	uint64_t	spa_config_guid;	/* config pool guid */
++	uint64_t	spa_load_guid;		/* spa_load initialized guid */
++	list_t		spa_config_dirty_list;	/* vdevs with dirty config */
++	list_t		spa_state_dirty_list;	/* vdevs with dirty state */
++	spa_aux_vdev_t	spa_spares;		/* hot spares */
++	spa_aux_vdev_t	spa_l2cache;		/* L2ARC cache devices */
++	uint64_t	spa_config_object;	/* MOS object for pool config */
++	uint64_t	spa_config_generation;	/* config generation number */
++	uint64_t	spa_syncing_txg;	/* txg currently syncing */
++	bpobj_t		spa_deferred_bpobj;	/* deferred-free bplist */
++	bplist_t	spa_free_bplist[TXG_SIZE]; /* bplist of stuff to free */
++	uberblock_t	spa_ubsync;		/* last synced uberblock */
++	uberblock_t	spa_uberblock;		/* current uberblock */
++	boolean_t	spa_extreme_rewind;	/* rewind past deferred frees */
++	uint64_t	spa_last_io;		/* lbolt of last non-scan I/O */
++	kmutex_t	spa_scrub_lock;		/* resilver/scrub lock */
++	uint64_t	spa_scrub_inflight;	/* in-flight scrub I/Os */
++	kcondvar_t	spa_scrub_io_cv;	/* scrub I/O completion */
++	uint8_t		spa_scrub_active;	/* active or suspended? */
++	uint8_t		spa_scrub_type;		/* type of scrub we're doing */
++	uint8_t		spa_scrub_finished;	/* indicator to rotate logs */
++	uint8_t		spa_scrub_started;	/* started since last boot */
++	uint8_t		spa_scrub_reopen;	/* scrub doing vdev_reopen */
++	uint64_t	spa_scan_pass_start;	/* start time per pass/reboot */
++	uint64_t	spa_scan_pass_exam;	/* examined bytes per pass */
++	kmutex_t	spa_async_lock;		/* protect async state */
++	kthread_t	*spa_async_thread;	/* thread doing async task */
++	int		spa_async_suspended;	/* async tasks suspended */
++	kcondvar_t	spa_async_cv;		/* wait for thread_exit() */
++	uint16_t	spa_async_tasks;	/* async task mask */
++	char		*spa_root;		/* alternate root directory */
++	uint64_t	spa_ena;		/* spa-wide ereport ENA */
++	int		spa_last_open_failed;	/* error if last open failed */
++	uint64_t	spa_last_ubsync_txg;	/* "best" uberblock txg */
++	uint64_t	spa_last_ubsync_txg_ts;	/* timestamp from that ub */
++	uint64_t	spa_load_txg;		/* ub txg that loaded */
++	uint64_t	spa_load_txg_ts;	/* timestamp from that ub */
++	uint64_t	spa_load_meta_errors;	/* verify metadata err count */
++	uint64_t	spa_load_data_errors;	/* verify data err count */
++	uint64_t	spa_verify_min_txg;	/* start txg of verify scrub */
++	kmutex_t	spa_errlog_lock;	/* error log lock */
++	uint64_t	spa_errlog_last;	/* last error log object */
++	uint64_t	spa_errlog_scrub;	/* scrub error log object */
++	kmutex_t	spa_errlist_lock;	/* error list/ereport lock */
++	avl_tree_t	spa_errlist_last;	/* last error list */
++	avl_tree_t	spa_errlist_scrub;	/* scrub error list */
++	uint64_t	spa_deflate;		/* should we deflate? */
++	uint64_t	spa_history;		/* history object */
++	kmutex_t	spa_history_lock;	/* history lock */
++	vdev_t		*spa_pending_vdev;	/* pending vdev additions */
++	kmutex_t	spa_props_lock;		/* property lock */
++	uint64_t	spa_pool_props_object;	/* object for properties */
++	uint64_t	spa_bootfs;		/* default boot filesystem */
++	uint64_t	spa_failmode;		/* failure mode for the pool */
++	uint64_t	spa_delegation;		/* delegation on/off */
++	list_t		spa_config_list;	/* previous cache file(s) */
++	zio_t		*spa_async_zio_root;	/* root of all async I/O */
++	zio_t		*spa_suspend_zio_root;	/* root of all suspended I/O */
++	kmutex_t	spa_suspend_lock;	/* protects suspend_zio_root */
++	kcondvar_t	spa_suspend_cv;		/* notification of resume */
++	uint8_t		spa_suspended;		/* pool is suspended */
++	uint8_t		spa_claiming;		/* pool is doing zil_claim() */
++	boolean_t	spa_debug;		/* debug enabled? */
++	boolean_t	spa_is_root;		/* pool is root */
++	int		spa_minref;		/* num refs when first opened */
++	int		spa_mode;		/* FREAD | FWRITE */
++	spa_log_state_t spa_log_state;		/* log state */
++	uint64_t	spa_autoexpand;		/* lun expansion on/off */
++	ddt_t		*spa_ddt[ZIO_CHECKSUM_FUNCTIONS]; /* in-core DDTs */
++	uint64_t	spa_ddt_stat_object;	/* DDT statistics */
++	uint64_t	spa_dedup_ditto;	/* dedup ditto threshold */
++	uint64_t	spa_dedup_checksum;	/* default dedup checksum */
++	uint64_t	spa_dspace;		/* dspace in normal class */
++	kmutex_t	spa_vdev_top_lock;	/* dueling offline/remove */
++	kmutex_t	spa_proc_lock;		/* protects spa_proc* */
++	kcondvar_t	spa_proc_cv;		/* spa_proc_state transitions */
++	spa_proc_state_t spa_proc_state;	/* see definition */
++	proc_t		*spa_proc;		/* "zpool-poolname" process */
++	uint64_t	spa_did;		/* if procp != p0, did of t1 */
++	boolean_t	spa_autoreplace;	/* autoreplace set in open */
++	int		spa_vdev_locks;		/* locks grabbed */
++	uint64_t	spa_creation_version;	/* version at pool creation */
++	uint64_t	spa_prev_software_version;
++	/*
++	 * spa_refcnt & spa_config_lock must be the last elements
++	 * because refcount_t changes size based on compilation options.
++	 * In order for the MDB module to function correctly, the other
++	 * fields must remain in the same location.
++	 */
++	spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
++	refcount_t	spa_refcount;		/* number of opens */
++};
++
++extern char *spa_config_path;
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_SPA_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/txg.h linux-3.2.33-go/include/zfs/sys/txg.h
+--- linux-3.2.33-go.orig/include/zfs/sys/txg.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/txg.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,140 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef _SYS_TXG_H
++#define	_SYS_TXG_H
++
++#include <sys/spa.h>
++#include <sys/zfs_context.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++#define	TXG_CONCURRENT_STATES	3	/* open, quiescing, syncing	*/
++#define	TXG_SIZE		4		/* next power of 2	*/
++#define	TXG_MASK		(TXG_SIZE - 1)	/* mask for size	*/
++#define	TXG_INITIAL		TXG_SIZE	/* initial txg 		*/
++#define	TXG_IDX			(txg & TXG_MASK)
++
++/* Number of txgs worth of frees we defer adding to in-core spacemaps */
++#define	TXG_DEFER_SIZE		2
++
++#define	TXG_WAIT		1ULL
++#define	TXG_NOWAIT		2ULL
++
++typedef struct tx_cpu tx_cpu_t;
++
++typedef struct txg_handle {
++	tx_cpu_t	*th_cpu;
++	uint64_t	th_txg;
++} txg_handle_t;
++
++typedef struct txg_node {
++	struct txg_node	*tn_next[TXG_SIZE];
++	uint8_t		tn_member[TXG_SIZE];
++} txg_node_t;
++
++typedef struct txg_list {
++	kmutex_t	tl_lock;
++	size_t		tl_offset;
++	txg_node_t	*tl_head[TXG_SIZE];
++} txg_list_t;
++
++struct dsl_pool;
++
++extern void txg_init(struct dsl_pool *dp, uint64_t txg);
++extern void txg_fini(struct dsl_pool *dp);
++extern void txg_sync_start(struct dsl_pool *dp);
++extern void txg_sync_stop(struct dsl_pool *dp);
++extern uint64_t txg_hold_open(struct dsl_pool *dp, txg_handle_t *txghp);
++extern void txg_rele_to_quiesce(txg_handle_t *txghp);
++extern void txg_rele_to_sync(txg_handle_t *txghp);
++extern void txg_register_callbacks(txg_handle_t *txghp, list_t *tx_callbacks);
++
++/*
++ * Delay the caller by the specified number of ticks or until
++ * the txg closes (whichever comes first).  This is intended
++ * to be used to throttle writers when the system nears its
++ * capacity.
++ */
++extern void txg_delay(struct dsl_pool *dp, uint64_t txg, int ticks);
++
++/*
++ * Wait until the given transaction group has finished syncing.
++ * Try to make this happen as soon as possible (eg. kick off any
++ * necessary syncs immediately).  If txg==0, wait for the currently open
++ * txg to finish syncing.
++ */
++extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg);
++
++/*
++ * Wait until the given transaction group, or one after it, is
++ * the open transaction group.  Try to make this happen as soon
++ * as possible (eg. kick off any necessary syncs immediately).
++ * If txg == 0, wait for the next open txg.
++ */
++extern void txg_wait_open(struct dsl_pool *dp, uint64_t txg);
++
++/*
++ * Returns TRUE if we are "backed up" waiting for the syncing
++ * transaction to complete; otherwise returns FALSE.
++ */
++extern boolean_t txg_stalled(struct dsl_pool *dp);
++
++/* returns TRUE if someone is waiting for the next txg to sync */
++extern boolean_t txg_sync_waiting(struct dsl_pool *dp);
++
++/*
++ * Wait for pending commit callbacks of already-synced transactions to finish
++ * processing.
++ */
++extern void txg_wait_callbacks(struct dsl_pool *dp);
++
++/*
++ * Per-txg object lists.
++ */
++
++#define	TXG_CLEAN(txg)	((txg) - 1)
++
++extern void txg_list_create(txg_list_t *tl, size_t offset);
++extern void txg_list_destroy(txg_list_t *tl);
++extern int txg_list_empty(txg_list_t *tl, uint64_t txg);
++extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
++extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg);
++extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
++extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg);
++extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
++extern void *txg_list_head(txg_list_t *tl, uint64_t txg);
++extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg);
++
++/* Global tuning */
++extern int zfs_txg_timeout;
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_TXG_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/txg_impl.h linux-3.2.33-go/include/zfs/sys/txg_impl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/txg_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/txg_impl.h	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,75 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef _SYS_TXG_IMPL_H
++#define	_SYS_TXG_IMPL_H
++
++#include <sys/spa.h>
++#include <sys/txg.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct tx_cpu {
++	kmutex_t	tc_lock;
++	kcondvar_t	tc_cv[TXG_SIZE];
++	uint64_t	tc_count[TXG_SIZE];
++	list_t		tc_callbacks[TXG_SIZE]; /* commit cb list */
++	char		tc_pad[16];
++};
++
++typedef struct tx_state {
++	tx_cpu_t	*tx_cpu;	/* protects right to enter txg	*/
++	kmutex_t	tx_sync_lock;	/* protects tx_state_t */
++	uint64_t	tx_open_txg;	/* currently open txg id */
++	uint64_t	tx_quiesced_txg; /* quiesced txg waiting for sync */
++	uint64_t	tx_syncing_txg;	/* currently syncing txg id */
++	uint64_t	tx_synced_txg;	/* last synced txg id */
++
++	uint64_t	tx_sync_txg_waiting; /* txg we're waiting to sync */
++	uint64_t	tx_quiesce_txg_waiting; /* txg we're waiting to open */
++
++	kcondvar_t	tx_sync_more_cv;
++	kcondvar_t	tx_sync_done_cv;
++	kcondvar_t	tx_quiesce_more_cv;
++	kcondvar_t	tx_quiesce_done_cv;
++	kcondvar_t	tx_timeout_cv;
++	kcondvar_t	tx_exit_cv;	/* wait for all threads to exit */
++
++	uint8_t		tx_threads;	/* number of threads */
++	uint8_t		tx_exiting;	/* set when we're exiting */
++
++	kthread_t	*tx_sync_thread;
++	kthread_t	*tx_quiesce_thread;
++
++	taskq_t		*tx_commit_cb_taskq; /* commit callback taskq */
++} tx_state_t;
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_TXG_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/u8_textprep_data.h linux-3.2.33-go/include/zfs/sys/u8_textprep_data.h
+--- linux-3.2.33-go.orig/include/zfs/sys/u8_textprep_data.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/u8_textprep_data.h	2012-11-16 23:25:34.341039426 +0100
+@@ -0,0 +1,35376 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++/*
++ * COPYRIGHT AND PERMISSION NOTICE
++ *
++ * Copyright (c) 1991-2006 Unicode, Inc. All rights reserved. Distributed under
++ * the Terms of Use in http://www.unicode.org/copyright.html.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of the Unicode data files and any associated documentation (the
++ * "Data Files") or Unicode software and any associated documentation (the
++ * "Software") to deal in the Data Files or Software without restriction,
++ * including without limitation the rights to use, copy, modify, merge,
++ * publish, distribute, and/or sell copies of the Data Files or Software, and
++ * to permit persons to whom the Data Files or Software are furnished to do so,
++ * provided that (a) the above copyright notice(s) and this permission notice
++ * appear with all copies of the Data Files or Software, (b) both the above
++ * copyright notice(s) and this permission notice appear in associated
++ * documentation, and (c) there is clear notice in each modified Data File or
++ * in the Software as well as in the documentation associated with the Data
++ * File(s) or Software that the data or software has been modified.
++ *
++ * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
++ * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
++ * THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
++ * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
++ * CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
++ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
++ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
++ * OF THE DATA FILES OR SOFTWARE.
++ *
++ * Except as contained in this notice, the name of a copyright holder shall not
++ * be used in advertising or otherwise to promote the sale, use or other
++ * dealings in these Data Files or Software without prior written authorization
++ * of the copyright holder.
++ *
++ * Unicode and the Unicode logo are trademarks of Unicode, Inc., and may be
++ * registered in some jurisdictions. All other trademarks and registered
++ * trademarks mentioned herein are the property of their respective owners.
++ */
++/*
++ * This file has been modified by Sun Microsystems, Inc.
++ */
++
++#ifndef _SYS_U8_TEXTPREP_DATA_H
++#define	_SYS_U8_TEXTPREP_DATA_H
++
++
++
++#include <sys/types.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * To get to the combining class data, composition mappings, decomposition
++ * mappings, and case conversion mappings of Unicode, the data structures
++ * formulated and their meanings are like the following:
++ *
++ * Each UTF-8 character is seen as a 4-byte entity so that U+0061 (or 0x61 in
++ * UTF-8) would be seen as 0x00 0x00 0x00 0x61. Similarly, U+1D15E would be
++ * 0xF0 0x9D 0x85 0x9E in UTF-8.
++ *
++ * The first byte (MSB) value is an index to the b1_tbl, such as
++ * u8_common_b1_tbl and u8_composition_b1_tbl tables. A b1_tbl has
++ * indices to b2_tbl tables that have indices to b3_tbl. Each b3_tbl has
++ * either indices to b4_tbl or indices to b4_tbl and base values for
++ * displacement calculations later by using the u8_displacement_t type at
++ * below. Each b4_tbl table then has indices to the final tables.
++ *
++ * As an example, if we have a character with code value of U+1D15E which is
++ * 0xF0 0x9D 0x85 0x9E in UTF-8, the target decomposition character bytes
++ * that will be mapped by the mapping procedure would be the ones between
++ * the start_index and the end_index computed as like the following:
++ *
++ *	b2_tbl_id = u8_common_b1_tbl[0][0xF0];
++ *	b3_tbl_id = u8_decomp_b2_tbl[0][b2_tbl_id][0x9D];
++ *	b4_tbl_id = u8_decomp_b3_tbl[0][b3_tbl_id][0x85].tbl_id;
++ *	b4_base = u8_decomp_b3_tbl[0][b3_tbl_id][0x85].base;
++ *	if (b4_tbl_id >= 0x8000) {
++ *		b4_tbl_id -= 0x8000;
++ *      	start_index = u8_decomp_b4_16bit_tbl[0][b4_tbl_id][0x9E];
++ *      	end_index = u8_decomp_b4_16bit_tbl[0][b4_tbl_id][0x9E + 1];
++ *	} else {
++ *      	start_index = u8_decomp_b4_tbl[0][b4_tbl_id][0x9E];
++ *      	end_index = u8_decomp_b4_tbl[0][b4_tbl_id][0x9E + 1];
++ *	}
++ *
++ * The start_index and the end_index can be used to retrieve the bytes
++ * possibly of multiple UTF-8 characters from the final tables.
++ *
++ * The "[0]" at the above indicates this is for Unicode Version 3.2.0 data
++ * as of today.  Consequently, the "[1]" indicates another Unicode version
++ * data and it is Unicode 5.0.0 as of today.
++ *
++ * The mapping procedures and the data structures are more or less similar or
++ * alike among different mappings. You might want to read the u8_textprep.c
++ * for specific details.
++ *
++ * The tool programs created and used to generate the tables in this file are
++ * saved at PSARC/2007/149/materials/ as tools.tar.gz file.
++ */
++
++/* The following is a component type for the b4_tbl vectors. */
++typedef struct {
++	uint16_t	tbl_id;
++	uint16_t	base;
++} u8_displacement_t;
++
++/*
++ * The U8_TBL_ELEMENT_NOT_DEF macro indicates a byte that is not defined or
++ * used. The U8_TBL_ELEMENT_FILLER indicates the end of a UTF-8 character at
++ * the final tables.
++ */
++#define	U8_TBL_ELEMENT_NOT_DEF		(0xff)
++#define	N_				U8_TBL_ELEMENT_NOT_DEF
++
++#define	U8_TBL_ELEMENT_FILLER		(0xf7)
++#define	FIL_				U8_TBL_ELEMENT_FILLER
++
++/*
++ * The common b1_tbl for combining class, decompositions, tolower, and
++ * toupper case conversion mappings.
++ */
++static const uchar_t u8_common_b1_tbl[2][256] = {
++	{
++		0,  N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		1,  N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++	},
++	{
++		0,  N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		1,  N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++	},
++};
++
++static const uchar_t u8_combining_class_b2_tbl[2][2][256] = {
++	{
++		{
++			0,  N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			1,  2,  3,  4,  N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, 5,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, 6,  N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++
++	},
++	{
++		{
++			0,  N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			1,  2,  3,  4,  N_, N_, N_, N_,
++			N_, N_, 5,  N_, N_, N_, N_, 6,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			7,  N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, 8,  N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++
++	},
++
++};
++
++static const uchar_t u8_combining_class_b3_tbl[2][9][256] = {
++	{
++		{	/* Third byte table 0. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, 0,  1,  N_, N_,
++			N_, N_, 2,  N_, N_, N_, 3,  4,
++			N_, 5,  N_, 6,  7,  8,  N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 1. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, 9,  10, 11, 12,
++			13, 14, 15, 16, 17, 18, N_, 19,
++			N_, 20, N_, 21, N_, 22, N_, 23,
++			24, 25, 26, 27, 28, 29, 30, 31,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 2. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			32, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, 33, N_, N_, 34,
++			N_, N_, 35, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 3. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, 36, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 4. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			37, N_, 38, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 5. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, 39, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			40, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 6. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, 41, 42, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 7. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 8. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++	},
++	{
++		{	/* Third byte table 0. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, 0,  1,  N_, N_,
++			N_, N_, 2,  N_, N_, N_, 3,  4,
++			5,  6,  N_, 7,  8,  9,  N_, 10,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 1. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, 11, 12, 13, 14,
++			15, 16, 17, 18, 19, 20, N_, 21,
++			N_, 22, 23, 24, N_, 25, N_, 26,
++			27, 28, 29, 30, 31, 32, 33, 34,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 2. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			35, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, 36, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, 37, N_, N_, 38,
++			N_, N_, 39, N_, 40, N_, N_, N_,
++			41, N_, N_, N_, 42, 43, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, 44,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 3. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, 45, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 4. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			46, N_, 47, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 5. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			48, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 6. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, 49, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			50, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 7. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			51, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{	/* Third byte table 8. */
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, 52, 53, N_,
++			N_, 54, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++	},
++};
++
++/*
++ * Unlike other b4_tbl, the b4_tbl for combining class data has
++ * the combining class values not indices to the final tables.
++ */
++static const uchar_t u8_combining_class_b4_tbl[2][55][256] = {
++	{
++		{	/* Fourth byte table 0. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 230, 230, 230, 230, 230, 230, 230,
++			230, 230, 230, 230, 230, 230, 230, 230,
++			230, 230, 230, 230, 230, 232, 220, 220,
++			220, 220, 232, 216, 220, 220, 220, 220,
++			220, 202, 202, 220, 220, 220, 220, 202,
++			202, 220, 220, 220, 220, 220, 220, 220,
++			220, 220, 220, 220, 1,   1,   1,   1,
++			1,   220, 220, 220, 220, 230, 230, 230,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 1. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 230, 230, 230, 230, 240, 230, 220,
++			220, 220, 230, 230, 230, 220, 220, 0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			234, 234, 233, 230, 230, 230, 230, 230,
++			230, 230, 230, 230, 230, 230, 230, 230,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 2. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   230, 230, 230, 230, 0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 3. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   220, 230, 230, 230, 230, 220, 230,
++			230, 230, 222, 220, 230, 230, 230, 230,
++			230, 230, 0,   220, 220, 220, 220, 220,
++			230, 230, 220, 230, 230, 222, 228, 230,
++			10,  11,  12,  13,  14,  15,  16,  17,
++			18,  19,  0,   20,  21,  22,  0,   23,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 4. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   24,  25,  0,   230, 0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 5. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   27,  28,  29,  30,  31,
++			32,  33,  34,  230, 230, 220, 0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			35,  0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 6. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   230, 230,
++			230, 230, 230, 230, 230, 0,   0,   230,
++			230, 230, 230, 220, 230, 0,   0,   230,
++			230, 0,   220, 230, 230, 220, 0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 7. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   36,  0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 220, 230, 230, 220, 230, 230, 220,
++			220, 220, 230, 220, 220, 230, 220, 230,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 8. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 230, 220, 230, 220, 230, 220, 230,
++			220, 230, 230, 0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 9. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 10. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   230, 220, 230, 230, 0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 11. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 12. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 13. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 14. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 15. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 16. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 17. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 18. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 19. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 20. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   84,  91,  0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 21. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 22. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 23. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   9,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 24. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			103, 103, 9,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 25. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			107, 107, 107, 107, 0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 26. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			118, 118, 0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 27. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			122, 122, 122, 122, 0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 28. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			220, 220, 0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   220, 0,   220,
++			0,   216, 0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 29. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   129, 130, 0,   132, 0,   0,   0,
++			0,   0,   130, 130, 130, 130, 0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 30. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			130, 0,   230, 230, 9,   0,   230, 230,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 31. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   220, 0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 32. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   7,
++			0,   9,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 33. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   9,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   9,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 34. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   9,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 35. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   228, 0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 36. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 230, 1,   1,   230, 230, 230, 230,
++			1,   1,   1,   230, 230, 0,   0,   0,
++			0,   230, 0,   0,   0,   1,   1,   230,
++			220, 230, 1,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 37. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   218, 228, 232, 222, 224, 224,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 38. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   8,   8,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 39. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   26,  0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 40. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 230, 230, 230, 0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 41. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   216, 216, 1,
++			1,   1,   0,   0,   0,   226, 216, 216,
++			216, 216, 216, 0,   0,   0,   0,   0,
++			0,   0,   0,   220, 220, 220, 220, 220,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 42. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			220, 220, 220, 0,   0,   230, 230, 230,
++			230, 230, 220, 220, 0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   230, 230, 230, 230, 0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 43. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 44. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 45. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 46. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 47. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 48. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 49. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 50. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 51. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 52. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 53. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 54. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++	},
++	{
++		{	/* Fourth byte table 0. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 230, 230, 230, 230, 230, 230, 230,
++			230, 230, 230, 230, 230, 230, 230, 230,
++			230, 230, 230, 230, 230, 232, 220, 220,
++			220, 220, 232, 216, 220, 220, 220, 220,
++			220, 202, 202, 220, 220, 220, 220, 202,
++			202, 220, 220, 220, 220, 220, 220, 220,
++			220, 220, 220, 220, 1,   1,   1,   1,
++			1,   220, 220, 220, 220, 230, 230, 230,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 1. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 230, 230, 230, 230, 240, 230, 220,
++			220, 220, 230, 230, 230, 220, 220, 0,
++			230, 230, 230, 220, 220, 220, 220, 230,
++			232, 220, 220, 230, 233, 234, 234, 233,
++			234, 234, 233, 230, 230, 230, 230, 230,
++			230, 230, 230, 230, 230, 230, 230, 230,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 2. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   230, 230, 230, 230, 0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 3. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   220, 230, 230, 230, 230, 220, 230,
++			230, 230, 222, 220, 230, 230, 230, 230,
++			230, 230, 220, 220, 220, 220, 220, 220,
++			230, 230, 220, 230, 230, 222, 228, 230,
++			10,  11,  12,  13,  14,  15,  16,  17,
++			18,  19,  19,  20,  21,  22,  0,   23,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 4. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   24,  25,  0,   230, 220, 0,   18,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 5. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 230, 230, 230, 230, 230, 0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 6. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   27,  28,  29,  30,  31,
++			32,  33,  34,  230, 230, 220, 220, 230,
++			230, 230, 230, 230, 220, 230, 230, 0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			35,  0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 7. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   230, 230,
++			230, 230, 230, 230, 230, 0,   0,   230,
++			230, 230, 230, 220, 230, 0,   0,   230,
++			230, 0,   220, 230, 230, 220, 0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 8. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   36,  0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 220, 230, 230, 220, 230, 230, 220,
++			220, 220, 230, 220, 220, 230, 220, 230,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 9. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 230, 220, 230, 220, 230, 220, 230,
++			220, 230, 230, 0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 10. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   230, 230, 230, 230, 230,
++			230, 230, 220, 230, 0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 11. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 12. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   230, 220, 230, 230, 0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 13. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 14. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 15. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 16. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 17. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 18. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 19. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 20. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 21. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 22. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   84,  91,  0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 23. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 24. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 25. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   9,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 26. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   9,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 27. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			103, 103, 9,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 28. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			107, 107, 107, 107, 0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 29. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			118, 118, 0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 30. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			122, 122, 122, 122, 0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 31. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			220, 220, 0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   220, 0,   220,
++			0,   216, 0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 32. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   129, 130, 0,   132, 0,   0,   0,
++			0,   0,   130, 130, 130, 130, 0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 33. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			130, 0,   230, 230, 9,   0,   230, 230,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 34. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   220, 0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 35. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   7,
++			0,   9,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 36. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   230,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 37. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   9,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   9,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 38. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   9,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   230, 0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 39. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   228, 0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 40. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   222, 230, 220, 0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 41. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   230,
++			220, 0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 42. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 43. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   9,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   230, 220, 230, 230, 230,
++			230, 230, 230, 230, 0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 44. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 230, 220, 230, 230, 230, 230, 230,
++			230, 230, 220, 0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   230, 220,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 45. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 230, 1,   1,   230, 230, 230, 230,
++			1,   1,   1,   230, 230, 0,   0,   0,
++			0,   230, 0,   0,   0,   1,   1,   230,
++			220, 230, 1,   1,   220, 220, 220, 220,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 46. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   218, 228, 232, 222, 224, 224,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 47. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   8,   8,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 48. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   9,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 49. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   26,  0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 50. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 230, 230, 230, 0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 51. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   220, 0,   230,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			230, 1,   220, 0,   0,   0,   0,   9,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 52. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   216, 216, 1,
++			1,   1,   0,   0,   0,   226, 216, 216,
++			216, 216, 216, 0,   0,   0,   0,   0,
++			0,   0,   0,   220, 220, 220, 220, 220,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 53. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			220, 220, 220, 0,   0,   230, 230, 230,
++			230, 230, 220, 220, 0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   230, 230, 230, 230, 0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++		{	/* Fourth byte table 54. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   230, 230, 230, 0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++		},
++	},
++};
++
++static const uchar_t u8_composition_b1_tbl[2][256] = {
++	{
++		0,  N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++	},
++	{
++		0,  N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++		N_, N_, N_, N_, N_, N_, N_, N_,
++	},
++};
++
++static const uchar_t u8_composition_b2_tbl[2][1][256] = {
++	{
++		{
++			0,  N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			1,  2,  3,  4,  N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++
++	},
++	{
++		{
++			0,  N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			1,  2,  3,  4,  N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++
++	},
++
++};
++
++static const u8_displacement_t u8_composition_b3_tbl[2][5][256] = {
++	{
++		{	/* Third byte table 0. */
++			{ 0x8000, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 0, 2470 },
++			{ 0x8001, 2491 }, { 1, 2871 }, { 2, 2959 },
++			{ 3, 3061 }, { 4, 3212 }, { 5, 3226 },
++			{ N_, 0 }, { 6, 3270 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 0x8002, 3277 },
++			{ 7, 3774 }, { 8, 3949 }, { 9, 4198 },
++			{ N_, 0 }, { 10, 4265 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 11, 4293 }, { 12, 4312 }, { N_, 0 },
++			{ 13, 4326 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 1. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 14, 4347 },
++			{ N_, 0 }, { N_, 0 }, { 15, 4374 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 16, 4391 },
++			{ 17, 4416 }, { 18, 4425 }, { N_, 0 },
++			{ 19, 4451 }, { 20, 4460 }, { 21, 4469 },
++			{ N_, 0 }, { 22, 4503 }, { N_, 0 },
++			{ 23, 4529 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 2. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 24, 4563 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 25, 4572 }, { 26, 4588 },
++			{ 27, 4620 }, { 28, 4666 }, { 0x8003, 4682 },
++			{ 0x8004, 5254 }, { 29, 5616 }, { 30, 5646 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 3. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 31, 5684 },
++			{ 32, 5708 }, { 33, 5732 }, { 34, 5780 },
++			{ 35, 5900 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 4. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 36, 6012 }, { 37, 6241 }, { 38, 6358 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++	},
++	{
++		{	/* Third byte table 0. */
++			{ 0x8000, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 0, 2470 },
++			{ 0x8001, 2491 }, { 1, 2871 }, { 2, 2959 },
++			{ 3, 3061 }, { 4, 3212 }, { 5, 3226 },
++			{ N_, 0 }, { 6, 3270 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 0x8002, 3277 },
++			{ 7, 3774 }, { 8, 3949 }, { 9, 4198 },
++			{ N_, 0 }, { 10, 4265 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 11, 4293 }, { 12, 4312 }, { N_, 0 },
++			{ 13, 4326 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 1. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 14, 4347 },
++			{ N_, 0 }, { N_, 0 }, { 15, 4374 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 16, 4391 },
++			{ 17, 4416 }, { 18, 4425 }, { N_, 0 },
++			{ 19, 4451 }, { 20, 4460 }, { 21, 4469 },
++			{ N_, 0 }, { 22, 4503 }, { N_, 0 },
++			{ 23, 4529 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 2. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 24, 4563 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 25, 4572 }, { 26, 4662 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 27, 4671 }, { 28, 4687 },
++			{ 29, 4719 }, { 30, 4765 }, { 0x8003, 4781 },
++			{ 0x8004, 5353 }, { 31, 5715 }, { 32, 5745 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 3. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 33, 5783 },
++			{ 34, 5807 }, { 35, 5831 }, { 36, 5879 },
++			{ 37, 5999 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 4. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 38, 6111 }, { 39, 6340 }, { 40, 6457 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++	},
++};
++
++static const uchar_t u8_composition_b4_tbl[2][41][257] = {
++	{
++		{	/* Fourth byte table 0. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,
++		},
++		{	/* Fourth byte table 1. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   29,  58,  58,  58,  58,
++			58,  58,  58,  58,  58,  58,  58,  58,
++			58,  58,  58,  73,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,
++		},
++		{	/* Fourth byte table 2. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   15,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  38,  46,  46,  46,  46,
++			46,  54,  62,  62,  62,  62,  62,  62,
++			62,  70,  78,  86,  94,  94,  94,  94,
++			94,  94,  94,  94,  94,  94,  94,  94,
++			94,  94,  94,  94,  94,  94,  94,  94,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102,
++		},
++		{	/* Fourth byte table 3. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   36,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			108, 144, 144, 144, 144, 144, 144, 144,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151,
++		},
++		{	/* Fourth byte table 4. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   7,   14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,
++		},
++		{	/* Fourth byte table 5. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   7,
++			14,  22,  30,  30,  30,  30,  30,  37,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,
++		},
++		{	/* Fourth byte table 6. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,
++		},
++		{	/* Fourth byte table 7. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   15,  15,  15,  15,  70,  70,
++			70,  70,  112, 133, 154, 154, 154, 162,
++			162, 162, 162, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175,
++		},
++		{	/* Fourth byte table 8. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   20,  20,  20,  27,  27,  46,  59,
++			66,  91,  91,  98,  98,  98,  98,  105,
++			105, 105, 105, 105, 130, 130, 130, 130,
++			137, 137, 137, 137, 144, 144, 151, 151,
++			151, 164, 164, 164, 171, 171, 190, 203,
++			210, 235, 235, 242, 242, 242, 242, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249,
++		},
++		{	/* Fourth byte table 9. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   25,  25,  25,  25,
++			32,  32,  32,  32,  39,  39,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  53,
++			53,  53,  53,  53,  53,  53,  53,  53,
++			53,  53,  53,  53,  53,  53,  53,  53,
++			53,  53,  53,  53,  53,  53,  53,  53,
++			53,  53,  53,  53,  53,  60,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,
++		},
++		{	/* Fourth byte table 10. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  21,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,
++		},
++		{	/* Fourth byte table 11. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,
++		},
++		{	/* Fourth byte table 12. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   7,   14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,
++		},
++		{	/* Fourth byte table 13. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   14,  14,  14,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,
++		},
++		{	/* Fourth byte table 14. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   9,   9,   9,   9,   9,   9,   9,
++			9,   18,  18,  18,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,
++		},
++		{	/* Fourth byte table 15. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,
++		},
++		{	/* Fourth byte table 16. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,
++		},
++		{	/* Fourth byte table 17. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,
++		},
++		{	/* Fourth byte table 18. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   17,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,
++		},
++		{	/* Fourth byte table 19. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,
++		},
++		{	/* Fourth byte table 20. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,
++		},
++		{	/* Fourth byte table 21. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   25,
++			25,  25,  25,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,
++		},
++		{	/* Fourth byte table 22. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   17,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,
++		},
++		{	/* Fourth byte table 23. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   25,  25,  25,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,
++		},
++		{	/* Fourth byte table 24. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,
++		},
++		{	/* Fourth byte table 25. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   8,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,
++		},
++		{	/* Fourth byte table 26. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   8,   16,  16,  16,  16,
++			16,  16,  16,  24,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,
++		},
++		{	/* Fourth byte table 27. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   15,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  38,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,
++		},
++		{	/* Fourth byte table 28. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   8,   16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,
++		},
++		{	/* Fourth byte table 29. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,
++		},
++		{	/* Fourth byte table 30. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   16,
++			16,  16,  16,  16,  16,  16,  16,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,
++		},
++		{	/* Fourth byte table 31. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   8,   8,   16,  16,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,
++		},
++		{	/* Fourth byte table 32. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   8,   8,   16,  16,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,
++		},
++		{	/* Fourth byte table 33. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   8,   8,   8,   8,
++			8,   16,  16,  16,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  32,  32,  40,  40,
++			40,  40,  40,  40,  40,  40,  40,  40,
++			40,  40,  40,  40,  40,  40,  40,  40,
++			40,  40,  40,  40,  40,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,
++		},
++		{	/* Fourth byte table 34. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   8,   8,   16,  16,
++			16,  24,  24,  24,  24,  24,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  40,  40,  40,  48,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  64,  72,  72,  72,  80,
++			88,  88,  88,  96,  104, 112, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120,
++		},
++		{	/* Fourth byte table 35. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   8,   16,  16,  16,  24,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  40,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  56,  56,  56,  56,  56,
++			56,  64,  72,  72,  80,  80,  80,  80,
++			80,  80,  80,  88,  96,  104, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112,
++		},
++		{	/* Fourth byte table 36. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   9,
++			9,   9,   9,   9,   18,  18,  27,  27,
++			36,  36,  45,  45,  54,  54,  63,  63,
++			72,  72,  81,  81,  90,  90,  99,  99,
++			108, 108, 117, 117, 117, 126, 126, 135,
++			135, 144, 144, 144, 144, 144, 144, 144,
++			161, 161, 161, 178, 178, 178, 195, 195,
++			195, 212, 212, 212, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229,
++		},
++		{	/* Fourth byte table 37. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   18,
++			18,  18,  18,  18,  27,  27,  36,  36,
++			45,  45,  54,  54,  63,  63,  72,  72,
++			81,  81,  90,  90,  99,  99,  108, 108,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117,
++		},
++		{	/* Fourth byte table 38. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   9,   9,   9,   18,  18,  27,
++			27,  36,  36,  36,  36,  36,  36,  36,
++			53,  53,  53,  70,  70,  70,  87,  87,
++			87,  104, 104, 104, 121, 121, 121, 121,
++			121, 121, 121, 121, 121, 121, 121, 121,
++			121, 121, 121, 121, 121, 121, 121, 121,
++			130, 139, 148, 157, 157, 157, 157, 157,
++			157, 157, 157, 157, 157, 157, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166,
++		},
++		{	/* Fourth byte table 39. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 40. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++	},
++	{
++		{	/* Fourth byte table 0. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,
++		},
++		{	/* Fourth byte table 1. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   29,  58,  58,  58,  58,
++			58,  58,  58,  58,  58,  58,  58,  58,
++			58,  58,  58,  73,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,
++		},
++		{	/* Fourth byte table 2. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   15,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  38,  46,  46,  46,  46,
++			46,  54,  62,  62,  62,  62,  62,  62,
++			62,  70,  78,  86,  94,  94,  94,  94,
++			94,  94,  94,  94,  94,  94,  94,  94,
++			94,  94,  94,  94,  94,  94,  94,  94,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102, 102, 102, 102, 102, 102, 102, 102,
++			102,
++		},
++		{	/* Fourth byte table 3. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   36,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			108, 144, 144, 144, 144, 144, 144, 144,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151, 151, 151, 151, 151, 151, 151, 151,
++			151,
++		},
++		{	/* Fourth byte table 4. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   7,   14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,
++		},
++		{	/* Fourth byte table 5. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   7,
++			14,  22,  30,  30,  30,  30,  30,  37,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,
++		},
++		{	/* Fourth byte table 6. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,
++		},
++		{	/* Fourth byte table 7. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   15,  15,  15,  15,  70,  70,
++			70,  70,  112, 133, 154, 154, 154, 162,
++			162, 162, 162, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175,
++		},
++		{	/* Fourth byte table 8. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   20,  20,  20,  27,  27,  46,  59,
++			66,  91,  91,  98,  98,  98,  98,  105,
++			105, 105, 105, 105, 130, 130, 130, 130,
++			137, 137, 137, 137, 144, 144, 151, 151,
++			151, 164, 164, 164, 171, 171, 190, 203,
++			210, 235, 235, 242, 242, 242, 242, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249, 249, 249, 249, 249, 249, 249, 249,
++			249,
++		},
++		{	/* Fourth byte table 9. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   25,  25,  25,  25,
++			32,  32,  32,  32,  39,  39,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  53,
++			53,  53,  53,  53,  53,  53,  53,  53,
++			53,  53,  53,  53,  53,  53,  53,  53,
++			53,  53,  53,  53,  53,  53,  53,  53,
++			53,  53,  53,  53,  53,  60,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,
++		},
++		{	/* Fourth byte table 10. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  21,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,
++		},
++		{	/* Fourth byte table 11. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,
++		},
++		{	/* Fourth byte table 12. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   7,   14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,
++		},
++		{	/* Fourth byte table 13. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   14,  14,  14,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,
++		},
++		{	/* Fourth byte table 14. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   9,   9,   9,   9,   9,   9,   9,
++			9,   18,  18,  18,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,
++		},
++		{	/* Fourth byte table 15. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,
++		},
++		{	/* Fourth byte table 16. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,
++		},
++		{	/* Fourth byte table 17. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,
++		},
++		{	/* Fourth byte table 18. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   17,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,
++		},
++		{	/* Fourth byte table 19. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,
++		},
++		{	/* Fourth byte table 20. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,
++		},
++		{	/* Fourth byte table 21. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   25,
++			25,  25,  25,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,
++		},
++		{	/* Fourth byte table 22. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   17,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,
++		},
++		{	/* Fourth byte table 23. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   25,  25,  25,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  34,  34,
++			34,
++		},
++		{	/* Fourth byte table 24. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,
++		},
++		{	/* Fourth byte table 25. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   9,   9,
++			18,  18,  27,  27,  36,  36,  45,  45,
++			45,  45,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  63,  63,  72,  72,  81,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,
++		},
++		{	/* Fourth byte table 26. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,
++		},
++		{	/* Fourth byte table 27. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   8,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,
++		},
++		{	/* Fourth byte table 28. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   8,   16,  16,  16,  16,
++			16,  16,  16,  24,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,
++		},
++		{	/* Fourth byte table 29. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   15,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  38,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,
++		},
++		{	/* Fourth byte table 30. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   8,   16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,
++		},
++		{	/* Fourth byte table 31. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,
++		},
++		{	/* Fourth byte table 32. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   16,
++			16,  16,  16,  16,  16,  16,  16,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,
++		},
++		{	/* Fourth byte table 33. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   8,   8,   16,  16,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,
++		},
++		{	/* Fourth byte table 34. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   8,   8,   16,  16,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,
++		},
++		{	/* Fourth byte table 35. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   8,   8,   8,   8,
++			8,   16,  16,  16,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  32,  32,  40,  40,
++			40,  40,  40,  40,  40,  40,  40,  40,
++			40,  40,  40,  40,  40,  40,  40,  40,
++			40,  40,  40,  40,  40,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,
++		},
++		{	/* Fourth byte table 36. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   8,   8,   16,  16,
++			16,  24,  24,  24,  24,  24,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  40,  40,  40,  48,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  64,  72,  72,  72,  80,
++			88,  88,  88,  96,  104, 112, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120, 120, 120, 120, 120, 120, 120, 120,
++			120,
++		},
++		{	/* Fourth byte table 37. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   8,   16,  16,  16,  24,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  40,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  56,  56,  56,  56,  56,
++			56,  64,  72,  72,  80,  80,  80,  80,
++			80,  80,  80,  88,  96,  104, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112,
++		},
++		{	/* Fourth byte table 38. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   9,
++			9,   9,   9,   9,   18,  18,  27,  27,
++			36,  36,  45,  45,  54,  54,  63,  63,
++			72,  72,  81,  81,  90,  90,  99,  99,
++			108, 108, 117, 117, 117, 126, 126, 135,
++			135, 144, 144, 144, 144, 144, 144, 144,
++			161, 161, 161, 178, 178, 178, 195, 195,
++			195, 212, 212, 212, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229, 229, 229, 229, 229, 229, 229, 229,
++			229,
++		},
++		{	/* Fourth byte table 39. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   18,
++			18,  18,  18,  18,  27,  27,  36,  36,
++			45,  45,  54,  54,  63,  63,  72,  72,
++			81,  81,  90,  90,  99,  99,  108, 108,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117, 117, 117, 117, 117, 117, 117, 117,
++			117,
++		},
++		{	/* Fourth byte table 40. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   9,   9,   9,   18,  18,  27,
++			27,  36,  36,  36,  36,  36,  36,  36,
++			53,  53,  53,  70,  70,  70,  87,  87,
++			87,  104, 104, 104, 121, 121, 121, 121,
++			121, 121, 121, 121, 121, 121, 121, 121,
++			121, 121, 121, 121, 121, 121, 121, 121,
++			130, 139, 148, 157, 157, 157, 157, 157,
++			157, 157, 157, 157, 157, 157, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166, 166, 166, 166, 166, 166, 166, 166,
++			166,
++		},
++	},
++};
++
++static const uint16_t u8_composition_b4_16bit_tbl[2][5][257] = {
++	{
++		{	/* Fourth byte 16-bit table 0. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    8,    16,   24,
++			24,   24,   124,  146,  177,  219,  327,  335,
++			379,  427,  521,  528,  562,  602,  624,  683,
++			782,  797,  797,  849,  894,  941,  1061, 1076,
++			1118, 1133, 1193, 1233, 1233, 1233, 1233, 1233,
++			1233, 1233, 1333, 1355, 1386, 1428, 1536, 1544,
++			1588, 1643, 1731, 1744, 1778, 1818, 1840, 1899,
++			1998, 2013, 2013, 2065, 2110, 2164, 2284, 2299,
++			2348, 2363, 2430, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470,
++		},
++		{	/* Fourth byte 16-bit table 1. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    29,   29,   36,   43,   56,
++			64,   64,   64,   93,   93,   93,   93,   93,
++			101,  101,  101,  101,  101,  130,  151,  158,
++			158,  165,  165,  165,  165,  190,  190,  190,
++			190,  190,  190,  219,  219,  226,  233,  246,
++			254,  254,  254,  283,  283,  283,  283,  283,
++			291,  291,  291,  291,  291,  320,  341,  348,
++			348,  355,  355,  355,  355,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,
++		},
++		{	/* Fourth byte 16-bit table 2. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    49,   49,   49,   49,   77,   77,
++			112,  112,  160,  160,  160,  160,  160,  160,
++			188,  188,  196,  196,  196,  196,  237,  237,
++			237,  237,  272,  272,  272,  280,  280,  288,
++			288,  288,  344,  344,  344,  344,  372,  372,
++			414,  414,  469,  469,  469,  469,  469,  469,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,
++		},
++		{	/* Fourth byte 16-bit table 3. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    29,   58,   66,   74,   82,   90,   98,
++			106,  135,  164,  172,  180,  188,  196,  204,
++			212,  227,  242,  242,  242,  242,  242,  242,
++			242,  257,  272,  272,  272,  272,  272,  272,
++			272,  301,  330,  338,  346,  354,  362,  370,
++			378,  407,  436,  444,  452,  460,  468,  476,
++			484,  506,  528,  528,  528,  528,  528,  528,
++			528,  550,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,
++		},
++		{	/* Fourth byte 16-bit table 4. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    15,   30,   30,   30,   30,   30,   30,
++			30,   45,   60,   60,   60,   60,   60,   60,
++			60,   82,   104,  104,  104,  104,  104,  104,
++			104,  104,  126,  126,  126,  126,  126,  126,
++			126,  155,  184,  192,  200,  208,  216,  224,
++			232,  261,  290,  298,  306,  314,  322,  330,
++			338,  346,  346,  346,  346,  354,  354,  354,
++			354,  354,  354,  354,  354,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,
++		},
++	},
++	{
++		{	/* Fourth byte 16-bit table 0. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    8,    16,   24,
++			24,   24,   124,  146,  177,  219,  327,  335,
++			379,  427,  521,  528,  562,  602,  624,  683,
++			782,  797,  797,  849,  894,  941,  1061, 1076,
++			1118, 1133, 1193, 1233, 1233, 1233, 1233, 1233,
++			1233, 1233, 1333, 1355, 1386, 1428, 1536, 1544,
++			1588, 1643, 1731, 1744, 1778, 1818, 1840, 1899,
++			1998, 2013, 2013, 2065, 2110, 2164, 2284, 2299,
++			2348, 2363, 2430, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470, 2470, 2470, 2470, 2470, 2470, 2470, 2470,
++			2470,
++		},
++		{	/* Fourth byte 16-bit table 1. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    29,   29,   36,   43,   56,
++			64,   64,   64,   93,   93,   93,   93,   93,
++			101,  101,  101,  101,  101,  130,  151,  158,
++			158,  165,  165,  165,  165,  190,  190,  190,
++			190,  190,  190,  219,  219,  226,  233,  246,
++			254,  254,  254,  283,  283,  283,  283,  283,
++			291,  291,  291,  291,  291,  320,  341,  348,
++			348,  355,  355,  355,  355,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,  380,  380,  380,  380,  380,  380,  380,
++			380,
++		},
++		{	/* Fourth byte 16-bit table 2. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    49,   49,   49,   49,   77,   77,
++			112,  112,  160,  160,  160,  160,  160,  160,
++			188,  188,  196,  196,  196,  196,  237,  237,
++			237,  237,  272,  272,  272,  280,  280,  288,
++			288,  288,  344,  344,  344,  344,  372,  372,
++			414,  414,  469,  469,  469,  469,  469,  469,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,  497,  497,  497,  497,  497,  497,  497,
++			497,
++		},
++		{	/* Fourth byte 16-bit table 3. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    29,   58,   66,   74,   82,   90,   98,
++			106,  135,  164,  172,  180,  188,  196,  204,
++			212,  227,  242,  242,  242,  242,  242,  242,
++			242,  257,  272,  272,  272,  272,  272,  272,
++			272,  301,  330,  338,  346,  354,  362,  370,
++			378,  407,  436,  444,  452,  460,  468,  476,
++			484,  506,  528,  528,  528,  528,  528,  528,
++			528,  550,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,  572,  572,  572,  572,  572,  572,  572,
++			572,
++		},
++		{	/* Fourth byte 16-bit table 4. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    15,   30,   30,   30,   30,   30,   30,
++			30,   45,   60,   60,   60,   60,   60,   60,
++			60,   82,   104,  104,  104,  104,  104,  104,
++			104,  104,  126,  126,  126,  126,  126,  126,
++			126,  155,  184,  192,  200,  208,  216,  224,
++			232,  261,  290,  298,  306,  314,  322,  330,
++			338,  346,  346,  346,  346,  354,  354,  354,
++			354,  354,  354,  354,  354,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,  362,  362,  362,  362,  362,  362,  362,
++			362,
++		},
++	},
++};
++
++static const uchar_t u8_composition_final_tbl[2][6623] = {
++	{
++		0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAE, FIL_,
++		0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xA0, FIL_,
++		0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAF, FIL_,
++		0x10, 0xCC, 0x86, FIL_, 0xC4, 0x82, FIL_, 0xCC,
++		0x87, FIL_, 0xC8, 0xA6, FIL_, 0xCC, 0x8F, FIL_,
++		0xC8, 0x80, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0x82,
++		FIL_, 0xCC, 0x81, FIL_, 0xC3, 0x81, FIL_, 0xCC,
++		0x80, FIL_, 0xC3, 0x80, FIL_, 0xCC, 0x83, FIL_,
++		0xC3, 0x83, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBA,
++		0xA0, FIL_, 0xCC, 0xA5, FIL_, 0xE1, 0xB8, 0x80,
++		FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x82, FIL_, 0xCC,
++		0x84, FIL_, 0xC4, 0x80, FIL_, 0xCC, 0x88, FIL_,
++		0xC3, 0x84, FIL_, 0xCC, 0x8A, FIL_, 0xC3, 0x85,
++		FIL_, 0xCC, 0xA8, FIL_, 0xC4, 0x84, FIL_, 0xCC,
++		0x89, FIL_, 0xE1, 0xBA, 0xA2, FIL_, 0xCC, 0x8C,
++		FIL_, 0xC7, 0x8D, FIL_, 0x03, 0xCC, 0x87, FIL_,
++		0xE1, 0xB8, 0x82, FIL_, 0xCC, 0xB1, FIL_, 0xE1,
++		0xB8, 0x86, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB8,
++		0x84, FIL_, 0x05, 0xCC, 0xA7, FIL_, 0xC3, 0x87,
++		FIL_, 0xCC, 0x81, FIL_, 0xC4, 0x86, FIL_, 0xCC,
++		0x8C, FIL_, 0xC4, 0x8C, FIL_, 0xCC, 0x87, FIL_,
++		0xC4, 0x8A, FIL_, 0xCC, 0x82, FIL_, 0xC4, 0x88,
++		FIL_, 0x06, 0xCC, 0xB1, FIL_, 0xE1, 0xB8, 0x8E,
++		FIL_, 0xCC, 0xA7, FIL_, 0xE1, 0xB8, 0x90, FIL_,
++		0xCC, 0xAD, FIL_, 0xE1, 0xB8, 0x92, FIL_, 0xCC,
++		0x87, FIL_, 0xE1, 0xB8, 0x8A, FIL_, 0xCC, 0x8C,
++		FIL_, 0xC4, 0x8E, FIL_, 0xCC, 0xA3, FIL_, 0xE1,
++		0xB8, 0x8C, FIL_, 0x11, 0xCC, 0x80, FIL_, 0xC3,
++		0x88, FIL_, 0xCC, 0x81, FIL_, 0xC3, 0x89, FIL_,
++		0xCC, 0x82, FIL_, 0xC3, 0x8A, FIL_, 0xCC, 0x88,
++		FIL_, 0xC3, 0x8B, FIL_, 0xCC, 0xA7, FIL_, 0xC8,
++		0xA8, FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x86, FIL_,
++		0xCC, 0x8F, FIL_, 0xC8, 0x84, FIL_, 0xCC, 0x89,
++		FIL_, 0xE1, 0xBA, 0xBA, FIL_, 0xCC, 0xB0, FIL_,
++		0xE1, 0xB8, 0x9A, FIL_, 0xCC, 0xAD, FIL_, 0xE1,
++		0xB8, 0x98, FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBA,
++		0xBC, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0xB8,
++		FIL_, 0xCC, 0x84, FIL_, 0xC4, 0x92, FIL_, 0xCC,
++		0x86, FIL_, 0xC4, 0x94, FIL_, 0xCC, 0x87, FIL_,
++		0xC4, 0x96, FIL_, 0xCC, 0xA8, FIL_, 0xC4, 0x98,
++		FIL_, 0xCC, 0x8C, FIL_, 0xC4, 0x9A, FIL_, 0x01,
++		0xCC, 0x87, FIL_, 0xE1, 0xB8, 0x9E, FIL_, 0x07,
++		0xCC, 0x8C, FIL_, 0xC7, 0xA6, FIL_, 0xCC, 0x87,
++		FIL_, 0xC4, 0xA0, FIL_, 0xCC, 0x84, FIL_, 0xE1,
++		0xB8, 0xA0, FIL_, 0xCC, 0x82, FIL_, 0xC4, 0x9C,
++		FIL_, 0xCC, 0x81, FIL_, 0xC7, 0xB4, FIL_, 0xCC,
++		0xA7, FIL_, 0xC4, 0xA2, FIL_, 0xCC, 0x86, FIL_,
++		0xC4, 0x9E, FIL_, 0x07, 0xCC, 0xAE, FIL_, 0xE1,
++		0xB8, 0xAA, FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xB8,
++		0xA2, FIL_, 0xCC, 0x88, FIL_, 0xE1, 0xB8, 0xA6,
++		FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB8, 0xA4, FIL_,
++		0xCC, 0xA7, FIL_, 0xE1, 0xB8, 0xA8, FIL_, 0xCC,
++		0x8C, FIL_, 0xC8, 0x9E, FIL_, 0xCC, 0x82, FIL_,
++		0xC4, 0xA4, FIL_, 0x0F, 0xCC, 0x84, FIL_, 0xC4,
++		0xAA, FIL_, 0xCC, 0x80, FIL_, 0xC3, 0x8C, FIL_,
++		0xCC, 0xA8, FIL_, 0xC4, 0xAE, FIL_, 0xCC, 0x83,
++		FIL_, 0xC4, 0xA8, FIL_, 0xCC, 0x88, FIL_, 0xC3,
++		0x8F, FIL_, 0xCC, 0x81, FIL_, 0xC3, 0x8D, FIL_,
++		0xCC, 0x8F, FIL_, 0xC8, 0x88, FIL_, 0xCC, 0x86,
++		FIL_, 0xC4, 0xAC, FIL_, 0xCC, 0x91, FIL_, 0xC8,
++		0x8A, FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x8F, FIL_,
++		0xCC, 0x89, FIL_, 0xE1, 0xBB, 0x88, FIL_, 0xCC,
++		0x87, FIL_, 0xC4, 0xB0, FIL_, 0xCC, 0xA3, FIL_,
++		0xE1, 0xBB, 0x8A, FIL_, 0xCC, 0xB0, FIL_, 0xE1,
++		0xB8, 0xAC, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0x8E,
++		FIL_, 0x01, 0xCC, 0x82, FIL_, 0xC4, 0xB4, FIL_,
++		0x05, 0xCC, 0x8C, FIL_, 0xC7, 0xA8, FIL_, 0xCC,
++		0xB1, FIL_, 0xE1, 0xB8, 0xB4, FIL_, 0xCC, 0x81,
++		FIL_, 0xE1, 0xB8, 0xB0, FIL_, 0xCC, 0xA7, FIL_,
++		0xC4, 0xB6, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB8,
++		0xB2, FIL_, 0x06, 0xCC, 0xA7, FIL_, 0xC4, 0xBB,
++		FIL_, 0xCC, 0x8C, FIL_, 0xC4, 0xBD, FIL_, 0xCC,
++		0xB1, FIL_, 0xE1, 0xB8, 0xBA, FIL_, 0xCC, 0xA3,
++		FIL_, 0xE1, 0xB8, 0xB6, FIL_, 0xCC, 0xAD, FIL_,
++		0xE1, 0xB8, 0xBC, FIL_, 0xCC, 0x81, FIL_, 0xC4,
++		0xB9, FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1, 0xB8,
++		0xBE, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x82,
++		FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0x80, FIL_,
++		0x09, 0xCC, 0x80, FIL_, 0xC7, 0xB8, FIL_, 0xCC,
++		0xAD, FIL_, 0xE1, 0xB9, 0x8A, FIL_, 0xCC, 0x87,
++		FIL_, 0xE1, 0xB9, 0x84, FIL_, 0xCC, 0xB1, FIL_,
++		0xE1, 0xB9, 0x88, FIL_, 0xCC, 0x83, FIL_, 0xC3,
++		0x91, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x86,
++		FIL_, 0xCC, 0x81, FIL_, 0xC5, 0x83, FIL_, 0xCC,
++		0xA7, FIL_, 0xC5, 0x85, FIL_, 0xCC, 0x8C, FIL_,
++		0xC5, 0x87, FIL_, 0x10, 0xCC, 0xA8, FIL_, 0xC7,
++		0xAA, FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x8E, FIL_,
++		0xCC, 0x80, FIL_, 0xC3, 0x92, FIL_, 0xCC, 0x9B,
++		FIL_, 0xC6, 0xA0, FIL_, 0xCC, 0x8F, FIL_, 0xC8,
++		0x8C, FIL_, 0xCC, 0x81, FIL_, 0xC3, 0x93, FIL_,
++		0xCC, 0x87, FIL_, 0xC8, 0xAE, FIL_, 0xCC, 0x8C,
++		FIL_, 0xC7, 0x91, FIL_, 0xCC, 0xA3, FIL_, 0xE1,
++		0xBB, 0x8C, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0x94,
++		FIL_, 0xCC, 0x84, FIL_, 0xC5, 0x8C, FIL_, 0xCC,
++		0x83, FIL_, 0xC3, 0x95, FIL_, 0xCC, 0x86, FIL_,
++		0xC5, 0x8E, FIL_, 0xCC, 0x88, FIL_, 0xC3, 0x96,
++		FIL_, 0xCC, 0x8B, FIL_, 0xC5, 0x90, FIL_, 0xCC,
++		0x89, FIL_, 0xE1, 0xBB, 0x8E, FIL_, 0x02, 0xCC,
++		0x87, FIL_, 0xE1, 0xB9, 0x96, FIL_, 0xCC, 0x81,
++		FIL_, 0xE1, 0xB9, 0x94, FIL_, 0x08, 0xCC, 0x91,
++		FIL_, 0xC8, 0x92, FIL_, 0xCC, 0xA7, FIL_, 0xC5,
++		0x96, FIL_, 0xCC, 0x8C, FIL_, 0xC5, 0x98, FIL_,
++		0xCC, 0xB1, FIL_, 0xE1, 0xB9, 0x9E, FIL_, 0xCC,
++		0xA3, FIL_, 0xE1, 0xB9, 0x9A, FIL_, 0xCC, 0x87,
++		FIL_, 0xE1, 0xB9, 0x98, FIL_, 0xCC, 0x81, FIL_,
++		0xC5, 0x94, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x90,
++		FIL_, 0x07, 0xCC, 0x81, FIL_, 0xC5, 0x9A, FIL_,
++		0xCC, 0x82, FIL_, 0xC5, 0x9C, FIL_, 0xCC, 0xA7,
++		FIL_, 0xC5, 0x9E, FIL_, 0xCC, 0x8C, FIL_, 0xC5,
++		0xA0, FIL_, 0xCC, 0xA6, FIL_, 0xC8, 0x98, FIL_,
++		0xCC, 0x87, FIL_, 0xE1, 0xB9, 0xA0, FIL_, 0xCC,
++		0xA3, FIL_, 0xE1, 0xB9, 0xA2, FIL_, 0x07, 0xCC,
++		0x8C, FIL_, 0xC5, 0xA4, FIL_, 0xCC, 0xB1, FIL_,
++		0xE1, 0xB9, 0xAE, FIL_, 0xCC, 0xA6, FIL_, 0xC8,
++		0x9A, FIL_, 0xCC, 0xA7, FIL_, 0xC5, 0xA2, FIL_,
++		0xCC, 0x87, FIL_, 0xE1, 0xB9, 0xAA, FIL_, 0xCC,
++		0xAD, FIL_, 0xE1, 0xB9, 0xB0, FIL_, 0xCC, 0xA3,
++		FIL_, 0xE1, 0xB9, 0xAC, FIL_, 0x13, 0xCC, 0xA8,
++		FIL_, 0xC5, 0xB2, FIL_, 0xCC, 0x83, FIL_, 0xC5,
++		0xA8, FIL_, 0xCC, 0x84, FIL_, 0xC5, 0xAA, FIL_,
++		0xCC, 0x81, FIL_, 0xC3, 0x9A, FIL_, 0xCC, 0x86,
++		FIL_, 0xC5, 0xAC, FIL_, 0xCC, 0x8A, FIL_, 0xC5,
++		0xAE, FIL_, 0xCC, 0x80, FIL_, 0xC3, 0x99, FIL_,
++		0xCC, 0x91, FIL_, 0xC8, 0x96, FIL_, 0xCC, 0x8B,
++		FIL_, 0xC5, 0xB0, FIL_, 0xCC, 0xA4, FIL_, 0xE1,
++		0xB9, 0xB2, FIL_, 0xCC, 0xB0, FIL_, 0xE1, 0xB9,
++		0xB4, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x94, FIL_,
++		0xCC, 0xAD, FIL_, 0xE1, 0xB9, 0xB6, FIL_, 0xCC,
++		0x9B, FIL_, 0xC6, 0xAF, FIL_, 0xCC, 0x82, FIL_,
++		0xC3, 0x9B, FIL_, 0xCC, 0x88, FIL_, 0xC3, 0x9C,
++		FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x93, FIL_, 0xCC,
++		0xA3, FIL_, 0xE1, 0xBB, 0xA4, FIL_, 0xCC, 0x89,
++		FIL_, 0xE1, 0xBB, 0xA6, FIL_, 0x02, 0xCC, 0x83,
++		FIL_, 0xE1, 0xB9, 0xBC, FIL_, 0xCC, 0xA3, FIL_,
++		0xE1, 0xB9, 0xBE, FIL_, 0x06, 0xCC, 0x82, FIL_,
++		0xC5, 0xB4, FIL_, 0xCC, 0x88, FIL_, 0xE1, 0xBA,
++		0x84, FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xBA, 0x86,
++		FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0x88, FIL_,
++		0xCC, 0x81, FIL_, 0xE1, 0xBA, 0x82, FIL_, 0xCC,
++		0x80, FIL_, 0xE1, 0xBA, 0x80, FIL_, 0x02, 0xCC,
++		0x87, FIL_, 0xE1, 0xBA, 0x8A, FIL_, 0xCC, 0x88,
++		FIL_, 0xE1, 0xBA, 0x8C, FIL_, 0x09, 0xCC, 0x89,
++		FIL_, 0xE1, 0xBB, 0xB6, FIL_, 0xCC, 0x87, FIL_,
++		0xE1, 0xBA, 0x8E, FIL_, 0xCC, 0xA3, FIL_, 0xE1,
++		0xBB, 0xB4, FIL_, 0xCC, 0x81, FIL_, 0xC3, 0x9D,
++		FIL_, 0xCC, 0x84, FIL_, 0xC8, 0xB2, FIL_, 0xCC,
++		0x82, FIL_, 0xC5, 0xB6, FIL_, 0xCC, 0x88, FIL_,
++		0xC5, 0xB8, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBB,
++		0xB2, FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0xB8,
++		FIL_, 0x06, 0xCC, 0x87, FIL_, 0xC5, 0xBB, FIL_,
++		0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0x92, FIL_, 0xCC,
++		0x8C, FIL_, 0xC5, 0xBD, FIL_, 0xCC, 0xB1, FIL_,
++		0xE1, 0xBA, 0x94, FIL_, 0xCC, 0x82, FIL_, 0xE1,
++		0xBA, 0x90, FIL_, 0xCC, 0x81, FIL_, 0xC5, 0xB9,
++		FIL_, 0x10, 0xCC, 0x8C, FIL_, 0xC7, 0x8E, FIL_,
++		0xCC, 0x8F, FIL_, 0xC8, 0x81, FIL_, 0xCC, 0xA8,
++		FIL_, 0xC4, 0x85, FIL_, 0xCC, 0xA3, FIL_, 0xE1,
++		0xBA, 0xA1, FIL_, 0xCC, 0x86, FIL_, 0xC4, 0x83,
++		FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBA, 0xA3, FIL_,
++		0xCC, 0x84, FIL_, 0xC4, 0x81, FIL_, 0xCC, 0x91,
++		FIL_, 0xC8, 0x83, FIL_, 0xCC, 0x8A, FIL_, 0xC3,
++		0xA5, FIL_, 0xCC, 0x88, FIL_, 0xC3, 0xA4, FIL_,
++		0xCC, 0x83, FIL_, 0xC3, 0xA3, FIL_, 0xCC, 0x82,
++		FIL_, 0xC3, 0xA2, FIL_, 0xCC, 0x81, FIL_, 0xC3,
++		0xA1, FIL_, 0xCC, 0x80, FIL_, 0xC3, 0xA0, FIL_,
++		0xCC, 0x87, FIL_, 0xC8, 0xA7, FIL_, 0xCC, 0xA5,
++		FIL_, 0xE1, 0xB8, 0x81, FIL_, 0x03, 0xCC, 0xB1,
++		FIL_, 0xE1, 0xB8, 0x87, FIL_, 0xCC, 0xA3, FIL_,
++		0xE1, 0xB8, 0x85, FIL_, 0xCC, 0x87, FIL_, 0xE1,
++		0xB8, 0x83, FIL_, 0x05, 0xCC, 0x87, FIL_, 0xC4,
++		0x8B, FIL_, 0xCC, 0xA7, FIL_, 0xC3, 0xA7, FIL_,
++		0xCC, 0x82, FIL_, 0xC4, 0x89, FIL_, 0xCC, 0x8C,
++		FIL_, 0xC4, 0x8D, FIL_, 0xCC, 0x81, FIL_, 0xC4,
++		0x87, FIL_, 0x06, 0xCC, 0xAD, FIL_, 0xE1, 0xB8,
++		0x93, FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xB8, 0x8B,
++		FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB8, 0x8D, FIL_,
++		0xCC, 0xB1, FIL_, 0xE1, 0xB8, 0x8F, FIL_, 0xCC,
++		0xA7, FIL_, 0xE1, 0xB8, 0x91, FIL_, 0xCC, 0x8C,
++		FIL_, 0xC4, 0x8F, FIL_, 0x11, 0xCC, 0xA8, FIL_,
++		0xC4, 0x99, FIL_, 0xCC, 0x8C, FIL_, 0xC4, 0x9B,
++		FIL_, 0xCC, 0x87, FIL_, 0xC4, 0x97, FIL_, 0xCC,
++		0x88, FIL_, 0xC3, 0xAB, FIL_, 0xCC, 0xA3, FIL_,
++		0xE1, 0xBA, 0xB9, FIL_, 0xCC, 0xB0, FIL_, 0xE1,
++		0xB8, 0x9B, FIL_, 0xCC, 0x84, FIL_, 0xC4, 0x93,
++		FIL_, 0xCC, 0xAD, FIL_, 0xE1, 0xB8, 0x99, FIL_,
++		0xCC, 0x83, FIL_, 0xE1, 0xBA, 0xBD, FIL_, 0xCC,
++		0x86, FIL_, 0xC4, 0x95, FIL_, 0xCC, 0xA7, FIL_,
++		0xC8, 0xA9, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBA,
++		0xBB, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x85, FIL_,
++		0xCC, 0x81, FIL_, 0xC3, 0xA9, FIL_, 0xCC, 0x91,
++		FIL_, 0xC8, 0x87, FIL_, 0xCC, 0x80, FIL_, 0xC3,
++		0xA8, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0xAA, FIL_,
++		0x01, 0xCC, 0x87, FIL_, 0xE1, 0xB8, 0x9F, FIL_,
++		0x07, 0xCC, 0x86, FIL_, 0xC4, 0x9F, FIL_, 0xCC,
++		0xA7, FIL_, 0xC4, 0xA3, FIL_, 0xCC, 0x81, FIL_,
++		0xC7, 0xB5, FIL_, 0xCC, 0x82, FIL_, 0xC4, 0x9D,
++		FIL_, 0xCC, 0x87, FIL_, 0xC4, 0xA1, FIL_, 0xCC,
++		0x8C, FIL_, 0xC7, 0xA7, FIL_, 0xCC, 0x84, FIL_,
++		0xE1, 0xB8, 0xA1, FIL_, 0x08, 0xCC, 0x8C, FIL_,
++		0xC8, 0x9F, FIL_, 0xCC, 0x82, FIL_, 0xC4, 0xA5,
++		FIL_, 0xCC, 0x88, FIL_, 0xE1, 0xB8, 0xA7, FIL_,
++		0xCC, 0x87, FIL_, 0xE1, 0xB8, 0xA3, FIL_, 0xCC,
++		0xB1, FIL_, 0xE1, 0xBA, 0x96, FIL_, 0xCC, 0xA3,
++		FIL_, 0xE1, 0xB8, 0xA5, FIL_, 0xCC, 0xA7, FIL_,
++		0xE1, 0xB8, 0xA9, FIL_, 0xCC, 0xAE, FIL_, 0xE1,
++		0xB8, 0xAB, FIL_, 0x0E, 0xCC, 0x81, FIL_, 0xC3,
++		0xAD, FIL_, 0xCC, 0x80, FIL_, 0xC3, 0xAC, FIL_,
++		0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0x8B, FIL_, 0xCC,
++		0x8C, FIL_, 0xC7, 0x90, FIL_, 0xCC, 0x89, FIL_,
++		0xE1, 0xBB, 0x89, FIL_, 0xCC, 0x91, FIL_, 0xC8,
++		0x8B, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x89, FIL_,
++		0xCC, 0x82, FIL_, 0xC3, 0xAE, FIL_, 0xCC, 0xB0,
++		FIL_, 0xE1, 0xB8, 0xAD, FIL_, 0xCC, 0xA8, FIL_,
++		0xC4, 0xAF, FIL_, 0xCC, 0x86, FIL_, 0xC4, 0xAD,
++		FIL_, 0xCC, 0x84, FIL_, 0xC4, 0xAB, FIL_, 0xCC,
++		0x83, FIL_, 0xC4, 0xA9, FIL_, 0xCC, 0x88, FIL_,
++		0xC3, 0xAF, FIL_, 0x02, 0xCC, 0x82, FIL_, 0xC4,
++		0xB5, FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0xB0, FIL_,
++		0x05, 0xCC, 0xA3, FIL_, 0xE1, 0xB8, 0xB3, FIL_,
++		0xCC, 0x81, FIL_, 0xE1, 0xB8, 0xB1, FIL_, 0xCC,
++		0xA7, FIL_, 0xC4, 0xB7, FIL_, 0xCC, 0x8C, FIL_,
++		0xC7, 0xA9, FIL_, 0xCC, 0xB1, FIL_, 0xE1, 0xB8,
++		0xB5, FIL_, 0x06, 0xCC, 0xA3, FIL_, 0xE1, 0xB8,
++		0xB7, FIL_, 0xCC, 0x81, FIL_, 0xC4, 0xBA, FIL_,
++		0xCC, 0xA7, FIL_, 0xC4, 0xBC, FIL_, 0xCC, 0x8C,
++		FIL_, 0xC4, 0xBE, FIL_, 0xCC, 0xB1, FIL_, 0xE1,
++		0xB8, 0xBB, FIL_, 0xCC, 0xAD, FIL_, 0xE1, 0xB8,
++		0xBD, FIL_, 0x03, 0xCC, 0xA3, FIL_, 0xE1, 0xB9,
++		0x83, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0xBF,
++		FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0x81, FIL_,
++		0x09, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x87, FIL_,
++		0xCC, 0x83, FIL_, 0xC3, 0xB1, FIL_, 0xCC, 0x87,
++		FIL_, 0xE1, 0xB9, 0x85, FIL_, 0xCC, 0xB1, FIL_,
++		0xE1, 0xB9, 0x89, FIL_, 0xCC, 0x81, FIL_, 0xC5,
++		0x84, FIL_, 0xCC, 0xA7, FIL_, 0xC5, 0x86, FIL_,
++		0xCC, 0xAD, FIL_, 0xE1, 0xB9, 0x8B, FIL_, 0xCC,
++		0x8C, FIL_, 0xC5, 0x88, FIL_, 0xCC, 0x80, FIL_,
++		0xC7, 0xB9, FIL_, 0x10, 0xCC, 0x89, FIL_, 0xE1,
++		0xBB, 0x8F, FIL_, 0xCC, 0x81, FIL_, 0xC3, 0xB3,
++		FIL_, 0xCC, 0x80, FIL_, 0xC3, 0xB2, FIL_, 0xCC,
++		0x87, FIL_, 0xC8, 0xAF, FIL_, 0xCC, 0x8F, FIL_,
++		0xC8, 0x8D, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBB,
++		0x8D, FIL_, 0xCC, 0x84, FIL_, 0xC5, 0x8D, FIL_,
++		0xCC, 0x8C, FIL_, 0xC7, 0x92, FIL_, 0xCC, 0x86,
++		FIL_, 0xC5, 0x8F, FIL_, 0xCC, 0x8B, FIL_, 0xC5,
++		0x91, FIL_, 0xCC, 0x9B, FIL_, 0xC6, 0xA1, FIL_,
++		0xCC, 0x91, FIL_, 0xC8, 0x8F, FIL_, 0xCC, 0xA8,
++		FIL_, 0xC7, 0xAB, FIL_, 0xCC, 0x88, FIL_, 0xC3,
++		0xB6, FIL_, 0xCC, 0x83, FIL_, 0xC3, 0xB5, FIL_,
++		0xCC, 0x82, FIL_, 0xC3, 0xB4, FIL_, 0x02, 0xCC,
++		0x87, FIL_, 0xE1, 0xB9, 0x97, FIL_, 0xCC, 0x81,
++		FIL_, 0xE1, 0xB9, 0x95, FIL_, 0x08, 0xCC, 0xB1,
++		FIL_, 0xE1, 0xB9, 0x9F, FIL_, 0xCC, 0x87, FIL_,
++		0xE1, 0xB9, 0x99, FIL_, 0xCC, 0x81, FIL_, 0xC5,
++		0x95, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x91, FIL_,
++		0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x9B, FIL_, 0xCC,
++		0x8C, FIL_, 0xC5, 0x99, FIL_, 0xCC, 0x91, FIL_,
++		0xC8, 0x93, FIL_, 0xCC, 0xA7, FIL_, 0xC5, 0x97,
++		FIL_, 0x07, 0xCC, 0xA6, FIL_, 0xC8, 0x99, FIL_,
++		0xCC, 0x8C, FIL_, 0xC5, 0xA1, FIL_, 0xCC, 0x81,
++		FIL_, 0xC5, 0x9B, FIL_, 0xCC, 0x87, FIL_, 0xE1,
++		0xB9, 0xA1, FIL_, 0xCC, 0x82, FIL_, 0xC5, 0x9D,
++		FIL_, 0xCC, 0xA7, FIL_, 0xC5, 0x9F, FIL_, 0xCC,
++		0xA3, FIL_, 0xE1, 0xB9, 0xA3, FIL_, 0x08, 0xCC,
++		0x88, FIL_, 0xE1, 0xBA, 0x97, FIL_, 0xCC, 0xAD,
++		FIL_, 0xE1, 0xB9, 0xB1, FIL_, 0xCC, 0xB1, FIL_,
++		0xE1, 0xB9, 0xAF, FIL_, 0xCC, 0xA3, FIL_, 0xE1,
++		0xB9, 0xAD, FIL_, 0xCC, 0x8C, FIL_, 0xC5, 0xA5,
++		FIL_, 0xCC, 0xA7, FIL_, 0xC5, 0xA3, FIL_, 0xCC,
++		0x87, FIL_, 0xE1, 0xB9, 0xAB, FIL_, 0xCC, 0xA6,
++		FIL_, 0xC8, 0x9B, FIL_, 0x13, 0xCC, 0x81, FIL_,
++		0xC3, 0xBA, FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x97,
++		FIL_, 0xCC, 0x83, FIL_, 0xC5, 0xA9, FIL_, 0xCC,
++		0x8F, FIL_, 0xC8, 0x95, FIL_, 0xCC, 0xA8, FIL_,
++		0xC5, 0xB3, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0xBB,
++		FIL_, 0xCC, 0x88, FIL_, 0xC3, 0xBC, FIL_, 0xCC,
++		0x80, FIL_, 0xC3, 0xB9, FIL_, 0xCC, 0xA3, FIL_,
++		0xE1, 0xBB, 0xA5, FIL_, 0xCC, 0xA4, FIL_, 0xE1,
++		0xB9, 0xB3, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB,
++		0xA7, FIL_, 0xCC, 0xB0, FIL_, 0xE1, 0xB9, 0xB5,
++		FIL_, 0xCC, 0xAD, FIL_, 0xE1, 0xB9, 0xB7, FIL_,
++		0xCC, 0x9B, FIL_, 0xC6, 0xB0, FIL_, 0xCC, 0x84,
++		FIL_, 0xC5, 0xAB, FIL_, 0xCC, 0x8B, FIL_, 0xC5,
++		0xB1, FIL_, 0xCC, 0x86, FIL_, 0xC5, 0xAD, FIL_,
++		0xCC, 0x8C, FIL_, 0xC7, 0x94, FIL_, 0xCC, 0x8A,
++		FIL_, 0xC5, 0xAF, FIL_, 0x02, 0xCC, 0x83, FIL_,
++		0xE1, 0xB9, 0xBD, FIL_, 0xCC, 0xA3, FIL_, 0xE1,
++		0xB9, 0xBF, FIL_, 0x07, 0xCC, 0x82, FIL_, 0xC5,
++		0xB5, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBA, 0x81,
++		FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0x83, FIL_,
++		0xCC, 0x88, FIL_, 0xE1, 0xBA, 0x85, FIL_, 0xCC,
++		0xA3, FIL_, 0xE1, 0xBA, 0x89, FIL_, 0xCC, 0x87,
++		FIL_, 0xE1, 0xBA, 0x87, FIL_, 0xCC, 0x8A, FIL_,
++		0xE1, 0xBA, 0x98, FIL_, 0x02, 0xCC, 0x87, FIL_,
++		0xE1, 0xBA, 0x8B, FIL_, 0xCC, 0x88, FIL_, 0xE1,
++		0xBA, 0x8D, FIL_, 0x0A, 0xCC, 0x87, FIL_, 0xE1,
++		0xBA, 0x8F, FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBB,
++		0xB9, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBB, 0xB3,
++		FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0xB7, FIL_,
++		0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0xB5, FIL_, 0xCC,
++		0x82, FIL_, 0xC5, 0xB7, FIL_, 0xCC, 0x84, FIL_,
++		0xC8, 0xB3, FIL_, 0xCC, 0x8A, FIL_, 0xE1, 0xBA,
++		0x99, FIL_, 0xCC, 0x88, FIL_, 0xC3, 0xBF, FIL_,
++		0xCC, 0x81, FIL_, 0xC3, 0xBD, FIL_, 0x06, 0xCC,
++		0x8C, FIL_, 0xC5, 0xBE, FIL_, 0xCC, 0x87, FIL_,
++		0xC5, 0xBC, FIL_, 0xCC, 0xB1, FIL_, 0xE1, 0xBA,
++		0x95, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0x93,
++		FIL_, 0xCC, 0x81, FIL_, 0xC5, 0xBA, FIL_, 0xCC,
++		0x82, FIL_, 0xE1, 0xBA, 0x91, FIL_, 0x03, 0xCC,
++		0x80, FIL_, 0xE1, 0xBF, 0xAD, FIL_, 0xCD, 0x82,
++		FIL_, 0xE1, 0xBF, 0x81, FIL_, 0xCC, 0x81, FIL_,
++		0xCE, 0x85, FIL_, 0x04, 0xCC, 0x89, FIL_, 0xE1,
++		0xBA, 0xA8, FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBA,
++		0xAA, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0xA4,
++		FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBA, 0xA6, FIL_,
++		0x01, 0xCC, 0x84, FIL_, 0xC7, 0x9E, FIL_, 0x01,
++		0xCC, 0x81, FIL_, 0xC7, 0xBA, FIL_, 0x02, 0xCC,
++		0x84, FIL_, 0xC7, 0xA2, FIL_, 0xCC, 0x81, FIL_,
++		0xC7, 0xBC, FIL_, 0x01, 0xCC, 0x81, FIL_, 0xE1,
++		0xB8, 0x88, FIL_, 0x04, 0xCC, 0x81, FIL_, 0xE1,
++		0xBA, 0xBE, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBB,
++		0x80, FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0x84,
++		FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0x82, FIL_,
++		0x01, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0xAE, FIL_,
++		0x04, 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0x96, FIL_,
++		0xCC, 0x81, FIL_, 0xE1, 0xBB, 0x90, FIL_, 0xCC,
++		0x80, FIL_, 0xE1, 0xBB, 0x92, FIL_, 0xCC, 0x89,
++		FIL_, 0xE1, 0xBB, 0x94, FIL_, 0x03, 0xCC, 0x84,
++		FIL_, 0xC8, 0xAC, FIL_, 0xCC, 0x81, FIL_, 0xE1,
++		0xB9, 0x8C, FIL_, 0xCC, 0x88, FIL_, 0xE1, 0xB9,
++		0x8E, FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC8, 0xAA,
++		FIL_, 0x01, 0xCC, 0x81, FIL_, 0xC7, 0xBE, FIL_,
++		0x04, 0xCC, 0x80, FIL_, 0xC7, 0x9B, FIL_, 0xCC,
++		0x84, FIL_, 0xC7, 0x95, FIL_, 0xCC, 0x8C, FIL_,
++		0xC7, 0x99, FIL_, 0xCC, 0x81, FIL_, 0xC7, 0x97,
++		FIL_, 0x04, 0xCC, 0x89, FIL_, 0xE1, 0xBA, 0xA9,
++		FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBA, 0xA7, FIL_,
++		0xCC, 0x81, FIL_, 0xE1, 0xBA, 0xA5, FIL_, 0xCC,
++		0x83, FIL_, 0xE1, 0xBA, 0xAB, FIL_, 0x01, 0xCC,
++		0x84, FIL_, 0xC7, 0x9F, FIL_, 0x01, 0xCC, 0x81,
++		FIL_, 0xC7, 0xBB, FIL_, 0x02, 0xCC, 0x84, FIL_,
++		0xC7, 0xA3, FIL_, 0xCC, 0x81, FIL_, 0xC7, 0xBD,
++		FIL_, 0x01, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0x89,
++		FIL_, 0x04, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0x83,
++		FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0xBF, FIL_,
++		0xCC, 0x80, FIL_, 0xE1, 0xBB, 0x81, FIL_, 0xCC,
++		0x83, FIL_, 0xE1, 0xBB, 0x85, FIL_, 0x01, 0xCC,
++		0x81, FIL_, 0xE1, 0xB8, 0xAF, FIL_, 0x04, 0xCC,
++		0x83, FIL_, 0xE1, 0xBB, 0x97, FIL_, 0xCC, 0x89,
++		FIL_, 0xE1, 0xBB, 0x95, FIL_, 0xCC, 0x80, FIL_,
++		0xE1, 0xBB, 0x93, FIL_, 0xCC, 0x81, FIL_, 0xE1,
++		0xBB, 0x91, FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1,
++		0xB9, 0x8D, FIL_, 0xCC, 0x84, FIL_, 0xC8, 0xAD,
++		FIL_, 0xCC, 0x88, FIL_, 0xE1, 0xB9, 0x8F, FIL_,
++		0x01, 0xCC, 0x84, FIL_, 0xC8, 0xAB, FIL_, 0x01,
++		0xCC, 0x81, FIL_, 0xC7, 0xBF, FIL_, 0x04, 0xCC,
++		0x81, FIL_, 0xC7, 0x98, FIL_, 0xCC, 0x84, FIL_,
++		0xC7, 0x96, FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x9A,
++		FIL_, 0xCC, 0x80, FIL_, 0xC7, 0x9C, FIL_, 0x04,
++		0xCC, 0x80, FIL_, 0xE1, 0xBA, 0xB0, FIL_, 0xCC,
++		0x81, FIL_, 0xE1, 0xBA, 0xAE, FIL_, 0xCC, 0x83,
++		FIL_, 0xE1, 0xBA, 0xB4, FIL_, 0xCC, 0x89, FIL_,
++		0xE1, 0xBA, 0xB2, FIL_, 0x04, 0xCC, 0x80, FIL_,
++		0xE1, 0xBA, 0xB1, FIL_, 0xCC, 0x83, FIL_, 0xE1,
++		0xBA, 0xB5, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA,
++		0xAF, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBA, 0xB3,
++		FIL_, 0x02, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0x96,
++		FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xB8, 0x94, FIL_,
++		0x02, 0xCC, 0x80, FIL_, 0xE1, 0xB8, 0x95, FIL_,
++		0xCC, 0x81, FIL_, 0xE1, 0xB8, 0x97, FIL_, 0x02,
++		0xCC, 0x80, FIL_, 0xE1, 0xB9, 0x90, FIL_, 0xCC,
++		0x81, FIL_, 0xE1, 0xB9, 0x92, FIL_, 0x02, 0xCC,
++		0x80, FIL_, 0xE1, 0xB9, 0x91, FIL_, 0xCC, 0x81,
++		FIL_, 0xE1, 0xB9, 0x93, FIL_, 0x01, 0xCC, 0x87,
++		FIL_, 0xE1, 0xB9, 0xA4, FIL_, 0x01, 0xCC, 0x87,
++		FIL_, 0xE1, 0xB9, 0xA5, FIL_, 0x01, 0xCC, 0x87,
++		FIL_, 0xE1, 0xB9, 0xA6, FIL_, 0x01, 0xCC, 0x87,
++		FIL_, 0xE1, 0xB9, 0xA7, FIL_, 0x01, 0xCC, 0x81,
++		FIL_, 0xE1, 0xB9, 0xB8, FIL_, 0x01, 0xCC, 0x81,
++		FIL_, 0xE1, 0xB9, 0xB9, FIL_, 0x01, 0xCC, 0x88,
++		FIL_, 0xE1, 0xB9, 0xBA, FIL_, 0x01, 0xCC, 0x88,
++		FIL_, 0xE1, 0xB9, 0xBB, FIL_, 0x01, 0xCC, 0x87,
++		FIL_, 0xE1, 0xBA, 0x9B, FIL_, 0x05, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBB, 0x9C, FIL_, 0xCC, 0x81, FIL_,
++		0xE1, 0xBB, 0x9A, FIL_, 0xCC, 0xA3, FIL_, 0xE1,
++		0xBB, 0xA2, FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBB,
++		0xA0, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0x9E,
++		FIL_, 0x05, 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0xA1,
++		FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBB, 0x9B, FIL_,
++		0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0xA3, FIL_, 0xCC,
++		0x89, FIL_, 0xE1, 0xBB, 0x9F, FIL_, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBB, 0x9D, FIL_, 0x05, 0xCC, 0x83,
++		FIL_, 0xE1, 0xBB, 0xAE, FIL_, 0xCC, 0xA3, FIL_,
++		0xE1, 0xBB, 0xB0, FIL_, 0xCC, 0x89, FIL_, 0xE1,
++		0xBB, 0xAC, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBB,
++		0xA8, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBB, 0xAA,
++		FIL_, 0x05, 0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0xB1,
++		FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0xAF, FIL_,
++		0xCC, 0x89, FIL_, 0xE1, 0xBB, 0xAD, FIL_, 0xCC,
++		0x81, FIL_, 0xE1, 0xBB, 0xA9, FIL_, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBB, 0xAB, FIL_, 0x01, 0xCC, 0x8C,
++		FIL_, 0xC7, 0xAE, FIL_, 0x01, 0xCC, 0x84, FIL_,
++		0xC7, 0xAC, FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC7,
++		0xAD, FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC7, 0xA0,
++		FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC7, 0xA1, FIL_,
++		0x01, 0xCC, 0x86, FIL_, 0xE1, 0xB8, 0x9C, FIL_,
++		0x01, 0xCC, 0x86, FIL_, 0xE1, 0xB8, 0x9D, FIL_,
++		0x01, 0xCC, 0x84, FIL_, 0xC8, 0xB0, FIL_, 0x01,
++		0xCC, 0x84, FIL_, 0xC8, 0xB1, FIL_, 0x01, 0xCC,
++		0x8C, FIL_, 0xC7, 0xAF, FIL_, 0x07, 0xCC, 0x93,
++		FIL_, 0xE1, 0xBC, 0x88, FIL_, 0xCC, 0x94, FIL_,
++		0xE1, 0xBC, 0x89, FIL_, 0xCC, 0x81, FIL_, 0xCE,
++		0x86, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xBC,
++		FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBE, 0xBA, FIL_,
++		0xCC, 0x84, FIL_, 0xE1, 0xBE, 0xB9, FIL_, 0xCC,
++		0x86, FIL_, 0xE1, 0xBE, 0xB8, FIL_, 0x04, 0xCC,
++		0x81, FIL_, 0xCE, 0x88, FIL_, 0xCC, 0x94, FIL_,
++		0xE1, 0xBC, 0x99, FIL_, 0xCC, 0x93, FIL_, 0xE1,
++		0xBC, 0x98, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBF,
++		0x88, FIL_, 0x05, 0xCC, 0x94, FIL_, 0xE1, 0xBC,
++		0xA9, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBF, 0x8A,
++		FIL_, 0xCC, 0x81, FIL_, 0xCE, 0x89, FIL_, 0xCD,
++		0x85, FIL_, 0xE1, 0xBF, 0x8C, FIL_, 0xCC, 0x93,
++		FIL_, 0xE1, 0xBC, 0xA8, FIL_, 0x07, 0xCC, 0x81,
++		FIL_, 0xCE, 0x8A, FIL_, 0xCC, 0x88, FIL_, 0xCE,
++		0xAA, FIL_, 0xCC, 0x86, FIL_, 0xE1, 0xBF, 0x98,
++		FIL_, 0xCC, 0x84, FIL_, 0xE1, 0xBF, 0x99, FIL_,
++		0xCC, 0x93, FIL_, 0xE1, 0xBC, 0xB8, FIL_, 0xCC,
++		0x94, FIL_, 0xE1, 0xBC, 0xB9, FIL_, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBF, 0x9A, FIL_, 0x04, 0xCC, 0x94,
++		FIL_, 0xE1, 0xBD, 0x89, FIL_, 0xCC, 0x80, FIL_,
++		0xE1, 0xBF, 0xB8, FIL_, 0xCC, 0x81, FIL_, 0xCE,
++		0x8C, FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBD, 0x88,
++		FIL_, 0x01, 0xCC, 0x94, FIL_, 0xE1, 0xBF, 0xAC,
++		FIL_, 0x06, 0xCC, 0x81, FIL_, 0xCE, 0x8E, FIL_,
++		0xCC, 0x86, FIL_, 0xE1, 0xBF, 0xA8, FIL_, 0xCC,
++		0x94, FIL_, 0xE1, 0xBD, 0x99, FIL_, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBF, 0xAA, FIL_, 0xCC, 0x84, FIL_,
++		0xE1, 0xBF, 0xA9, FIL_, 0xCC, 0x88, FIL_, 0xCE,
++		0xAB, FIL_, 0x05, 0xCC, 0x80, FIL_, 0xE1, 0xBF,
++		0xBA, FIL_, 0xCC, 0x81, FIL_, 0xCE, 0x8F, FIL_,
++		0xCD, 0x85, FIL_, 0xE1, 0xBF, 0xBC, FIL_, 0xCC,
++		0x94, FIL_, 0xE1, 0xBD, 0xA9, FIL_, 0xCC, 0x93,
++		FIL_, 0xE1, 0xBD, 0xA8, FIL_, 0x01, 0xCD, 0x85,
++		FIL_, 0xE1, 0xBE, 0xB4, FIL_, 0x01, 0xCD, 0x85,
++		FIL_, 0xE1, 0xBF, 0x84, FIL_, 0x08, 0xCC, 0x81,
++		FIL_, 0xCE, 0xAC, FIL_, 0xCC, 0x80, FIL_, 0xE1,
++		0xBD, 0xB0, FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBC,
++		0x80, FIL_, 0xCC, 0x94, FIL_, 0xE1, 0xBC, 0x81,
++		FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBE, 0xB6, FIL_,
++		0xCC, 0x86, FIL_, 0xE1, 0xBE, 0xB0, FIL_, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0xB3, FIL_, 0xCC, 0x84,
++		FIL_, 0xE1, 0xBE, 0xB1, FIL_, 0x04, 0xCC, 0x81,
++		FIL_, 0xCE, 0xAD, FIL_, 0xCC, 0x94, FIL_, 0xE1,
++		0xBC, 0x91, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD,
++		0xB2, FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBC, 0x90,
++		FIL_, 0x06, 0xCC, 0x81, FIL_, 0xCE, 0xAE, FIL_,
++		0xCC, 0x80, FIL_, 0xE1, 0xBD, 0xB4, FIL_, 0xCD,
++		0x85, FIL_, 0xE1, 0xBF, 0x83, FIL_, 0xCD, 0x82,
++		FIL_, 0xE1, 0xBF, 0x86, FIL_, 0xCC, 0x94, FIL_,
++		0xE1, 0xBC, 0xA1, FIL_, 0xCC, 0x93, FIL_, 0xE1,
++		0xBC, 0xA0, FIL_, 0x08, 0xCD, 0x82, FIL_, 0xE1,
++		0xBF, 0x96, FIL_, 0xCC, 0x86, FIL_, 0xE1, 0xBF,
++		0x90, FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBC, 0xB0,
++		FIL_, 0xCC, 0x81, FIL_, 0xCE, 0xAF, FIL_, 0xCC,
++		0x94, FIL_, 0xE1, 0xBC, 0xB1, FIL_, 0xCC, 0x84,
++		FIL_, 0xE1, 0xBF, 0x91, FIL_, 0xCC, 0x88, FIL_,
++		0xCF, 0x8A, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD,
++		0xB6, FIL_, 0x04, 0xCC, 0x81, FIL_, 0xCF, 0x8C,
++		FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0xB8, FIL_,
++		0xCC, 0x93, FIL_, 0xE1, 0xBD, 0x80, FIL_, 0xCC,
++		0x94, FIL_, 0xE1, 0xBD, 0x81, FIL_, 0x02, 0xCC,
++		0x93, FIL_, 0xE1, 0xBF, 0xA4, FIL_, 0xCC, 0x94,
++		FIL_, 0xE1, 0xBF, 0xA5, FIL_, 0x08, 0xCC, 0x93,
++		FIL_, 0xE1, 0xBD, 0x90, FIL_, 0xCC, 0x94, FIL_,
++		0xE1, 0xBD, 0x91, FIL_, 0xCC, 0x86, FIL_, 0xE1,
++		0xBF, 0xA0, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBF,
++		0xA6, FIL_, 0xCC, 0x84, FIL_, 0xE1, 0xBF, 0xA1,
++		FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0xBA, FIL_,
++		0xCC, 0x81, FIL_, 0xCF, 0x8D, FIL_, 0xCC, 0x88,
++		FIL_, 0xCF, 0x8B, FIL_, 0x06, 0xCC, 0x94, FIL_,
++		0xE1, 0xBD, 0xA1, FIL_, 0xCD, 0x85, FIL_, 0xE1,
++		0xBF, 0xB3, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD,
++		0xBC, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBF, 0xB6,
++		FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBD, 0xA0, FIL_,
++		0xCC, 0x81, FIL_, 0xCF, 0x8E, FIL_, 0x03, 0xCD,
++		0x82, FIL_, 0xE1, 0xBF, 0x97, FIL_, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBF, 0x92, FIL_, 0xCC, 0x81, FIL_,
++		0xCE, 0x90, FIL_, 0x03, 0xCC, 0x80, FIL_, 0xE1,
++		0xBF, 0xA2, FIL_, 0xCC, 0x81, FIL_, 0xCE, 0xB0,
++		FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBF, 0xA7, FIL_,
++		0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0xB4, FIL_,
++		0x02, 0xCC, 0x88, FIL_, 0xCF, 0x94, FIL_, 0xCC,
++		0x81, FIL_, 0xCF, 0x93, FIL_, 0x01, 0xCC, 0x88,
++		FIL_, 0xD0, 0x87, FIL_, 0x02, 0xCC, 0x86, FIL_,
++		0xD3, 0x90, FIL_, 0xCC, 0x88, FIL_, 0xD3, 0x92,
++		FIL_, 0x01, 0xCC, 0x81, FIL_, 0xD0, 0x83, FIL_,
++		0x03, 0xCC, 0x86, FIL_, 0xD3, 0x96, FIL_, 0xCC,
++		0x80, FIL_, 0xD0, 0x80, FIL_, 0xCC, 0x88, FIL_,
++		0xD0, 0x81, FIL_, 0x02, 0xCC, 0x88, FIL_, 0xD3,
++		0x9C, FIL_, 0xCC, 0x86, FIL_, 0xD3, 0x81, FIL_,
++		0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9E, FIL_, 0x04,
++		0xCC, 0x80, FIL_, 0xD0, 0x8D, FIL_, 0xCC, 0x88,
++		FIL_, 0xD3, 0xA4, FIL_, 0xCC, 0x86, FIL_, 0xD0,
++		0x99, FIL_, 0xCC, 0x84, FIL_, 0xD3, 0xA2, FIL_,
++		0x01, 0xCC, 0x81, FIL_, 0xD0, 0x8C, FIL_, 0x01,
++		0xCC, 0x88, FIL_, 0xD3, 0xA6, FIL_, 0x04, 0xCC,
++		0x86, FIL_, 0xD0, 0x8E, FIL_, 0xCC, 0x8B, FIL_,
++		0xD3, 0xB2, FIL_, 0xCC, 0x88, FIL_, 0xD3, 0xB0,
++		FIL_, 0xCC, 0x84, FIL_, 0xD3, 0xAE, FIL_, 0x01,
++		0xCC, 0x88, FIL_, 0xD3, 0xB4, FIL_, 0x01, 0xCC,
++		0x88, FIL_, 0xD3, 0xB8, FIL_, 0x01, 0xCC, 0x88,
++		FIL_, 0xD3, 0xAC, FIL_, 0x02, 0xCC, 0x86, FIL_,
++		0xD3, 0x91, FIL_, 0xCC, 0x88, FIL_, 0xD3, 0x93,
++		FIL_, 0x01, 0xCC, 0x81, FIL_, 0xD1, 0x93, FIL_,
++		0x03, 0xCC, 0x80, FIL_, 0xD1, 0x90, FIL_, 0xCC,
++		0x88, FIL_, 0xD1, 0x91, FIL_, 0xCC, 0x86, FIL_,
++		0xD3, 0x97, FIL_, 0x02, 0xCC, 0x88, FIL_, 0xD3,
++		0x9D, FIL_, 0xCC, 0x86, FIL_, 0xD3, 0x82, FIL_,
++		0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9F, FIL_, 0x04,
++		0xCC, 0x88, FIL_, 0xD3, 0xA5, FIL_, 0xCC, 0x86,
++		FIL_, 0xD0, 0xB9, FIL_, 0xCC, 0x80, FIL_, 0xD1,
++		0x9D, FIL_, 0xCC, 0x84, FIL_, 0xD3, 0xA3, FIL_,
++		0x01, 0xCC, 0x81, FIL_, 0xD1, 0x9C, FIL_, 0x01,
++		0xCC, 0x88, FIL_, 0xD3, 0xA7, FIL_, 0x04, 0xCC,
++		0x84, FIL_, 0xD3, 0xAF, FIL_, 0xCC, 0x86, FIL_,
++		0xD1, 0x9E, FIL_, 0xCC, 0x8B, FIL_, 0xD3, 0xB3,
++		FIL_, 0xCC, 0x88, FIL_, 0xD3, 0xB1, FIL_, 0x01,
++		0xCC, 0x88, FIL_, 0xD3, 0xB5, FIL_, 0x01, 0xCC,
++		0x88, FIL_, 0xD3, 0xB9, FIL_, 0x01, 0xCC, 0x88,
++		FIL_, 0xD3, 0xAD, FIL_, 0x01, 0xCC, 0x88, FIL_,
++		0xD1, 0x97, FIL_, 0x01, 0xCC, 0x8F, FIL_, 0xD1,
++		0xB6, FIL_, 0x01, 0xCC, 0x8F, FIL_, 0xD1, 0xB7,
++		FIL_, 0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9A, FIL_,
++		0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9B, FIL_, 0x01,
++		0xCC, 0x88, FIL_, 0xD3, 0xAA, FIL_, 0x01, 0xCC,
++		0x88, FIL_, 0xD3, 0xAB, FIL_, 0x03, 0xD9, 0x94,
++		FIL_, 0xD8, 0xA3, FIL_, 0xD9, 0x93, FIL_, 0xD8,
++		0xA2, FIL_, 0xD9, 0x95, FIL_, 0xD8, 0xA5, FIL_,
++		0x01, 0xD9, 0x94, FIL_, 0xD8, 0xA4, FIL_, 0x01,
++		0xD9, 0x94, FIL_, 0xD8, 0xA6, FIL_, 0x01, 0xD9,
++		0x94, FIL_, 0xDB, 0x82, FIL_, 0x01, 0xD9, 0x94,
++		FIL_, 0xDB, 0x93, FIL_, 0x01, 0xD9, 0x94, FIL_,
++		0xDB, 0x80, FIL_, 0x01, 0xE0, 0xA4, 0xBC, FIL_,
++		0xE0, 0xA4, 0xA9, FIL_, 0x01, 0xE0, 0xA4, 0xBC,
++		FIL_, 0xE0, 0xA4, 0xB1, FIL_, 0x01, 0xE0, 0xA4,
++		0xBC, FIL_, 0xE0, 0xA4, 0xB4, FIL_, 0x02, 0xE0,
++		0xA6, 0xBE, FIL_, 0xE0, 0xA7, 0x8B, FIL_, 0xE0,
++		0xA7, 0x97, FIL_, 0xE0, 0xA7, 0x8C, FIL_, 0x03,
++		0xE0, 0xAD, 0x97, FIL_, 0xE0, 0xAD, 0x8C, FIL_,
++		0xE0, 0xAC, 0xBE, FIL_, 0xE0, 0xAD, 0x8B, FIL_,
++		0xE0, 0xAD, 0x96, FIL_, 0xE0, 0xAD, 0x88, FIL_,
++		0x01, 0xE0, 0xAF, 0x97, FIL_, 0xE0, 0xAE, 0x94,
++		FIL_, 0x02, 0xE0, 0xAE, 0xBE, FIL_, 0xE0, 0xAF,
++		0x8A, FIL_, 0xE0, 0xAF, 0x97, FIL_, 0xE0, 0xAF,
++		0x8C, FIL_, 0x01, 0xE0, 0xAE, 0xBE, FIL_, 0xE0,
++		0xAF, 0x8B, FIL_, 0x01, 0xE0, 0xB1, 0x96, FIL_,
++		0xE0, 0xB1, 0x88, FIL_, 0x01, 0xE0, 0xB3, 0x95,
++		FIL_, 0xE0, 0xB3, 0x80, FIL_, 0x03, 0xE0, 0xB3,
++		0x95, FIL_, 0xE0, 0xB3, 0x87, FIL_, 0xE0, 0xB3,
++		0x82, FIL_, 0xE0, 0xB3, 0x8A, FIL_, 0xE0, 0xB3,
++		0x96, FIL_, 0xE0, 0xB3, 0x88, FIL_, 0x01, 0xE0,
++		0xB3, 0x95, FIL_, 0xE0, 0xB3, 0x8B, FIL_, 0x02,
++		0xE0, 0xB4, 0xBE, FIL_, 0xE0, 0xB5, 0x8A, FIL_,
++		0xE0, 0xB5, 0x97, FIL_, 0xE0, 0xB5, 0x8C, FIL_,
++		0x01, 0xE0, 0xB4, 0xBE, FIL_, 0xE0, 0xB5, 0x8B,
++		FIL_, 0x03, 0xE0, 0xB7, 0x8F, FIL_, 0xE0, 0xB7,
++		0x9C, FIL_, 0xE0, 0xB7, 0x8A, FIL_, 0xE0, 0xB7,
++		0x9A, FIL_, 0xE0, 0xB7, 0x9F, FIL_, 0xE0, 0xB7,
++		0x9E, FIL_, 0x01, 0xE0, 0xB7, 0x8A, FIL_, 0xE0,
++		0xB7, 0x9D, FIL_, 0x01, 0xE1, 0x80, 0xAE, FIL_,
++		0xE1, 0x80, 0xA6, FIL_, 0x01, 0xCC, 0x84, FIL_,
++		0xE1, 0xB8, 0xB8, FIL_, 0x01, 0xCC, 0x84, FIL_,
++		0xE1, 0xB8, 0xB9, FIL_, 0x01, 0xCC, 0x84, FIL_,
++		0xE1, 0xB9, 0x9C, FIL_, 0x01, 0xCC, 0x84, FIL_,
++		0xE1, 0xB9, 0x9D, FIL_, 0x01, 0xCC, 0x87, FIL_,
++		0xE1, 0xB9, 0xA8, FIL_, 0x01, 0xCC, 0x87, FIL_,
++		0xE1, 0xB9, 0xA9, FIL_, 0x02, 0xCC, 0x86, FIL_,
++		0xE1, 0xBA, 0xB6, FIL_, 0xCC, 0x82, FIL_, 0xE1,
++		0xBA, 0xAC, FIL_, 0x02, 0xCC, 0x86, FIL_, 0xE1,
++		0xBA, 0xB7, FIL_, 0xCC, 0x82, FIL_, 0xE1, 0xBA,
++		0xAD, FIL_, 0x01, 0xCC, 0x82, FIL_, 0xE1, 0xBB,
++		0x86, FIL_, 0x01, 0xCC, 0x82, FIL_, 0xE1, 0xBB,
++		0x87, FIL_, 0x01, 0xCC, 0x82, FIL_, 0xE1, 0xBB,
++		0x98, FIL_, 0x01, 0xCC, 0x82, FIL_, 0xE1, 0xBB,
++		0x99, FIL_, 0x04, 0xCC, 0x80, FIL_, 0xE1, 0xBC,
++		0x82, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0x84,
++		FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x80, FIL_,
++		0xCD, 0x82, FIL_, 0xE1, 0xBC, 0x86, FIL_, 0x04,
++		0xCD, 0x82, FIL_, 0xE1, 0xBC, 0x87, FIL_, 0xCC,
++		0x80, FIL_, 0xE1, 0xBC, 0x83, FIL_, 0xCC, 0x81,
++		FIL_, 0xE1, 0xBC, 0x85, FIL_, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x81, FIL_, 0x01, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x82, FIL_, 0x01, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x83, FIL_, 0x01, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x84, FIL_, 0x01, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x85, FIL_, 0x01, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x86, FIL_, 0x01, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x87, FIL_, 0x04, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x88, FIL_, 0xCC, 0x80, FIL_, 0xE1,
++		0xBC, 0x8A, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBC,
++		0x8E, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0x8C,
++		FIL_, 0x04, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0x8D,
++		FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0x8B, FIL_,
++		0xCD, 0x82, FIL_, 0xE1, 0xBC, 0x8F, FIL_, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0x89, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0x8A, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0x8B, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0x8C, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0x8D, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0x8E, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0x8F, FIL_, 0x02, 0xCC,
++		0x80, FIL_, 0xE1, 0xBC, 0x92, FIL_, 0xCC, 0x81,
++		FIL_, 0xE1, 0xBC, 0x94, FIL_, 0x02, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBC, 0x93, FIL_, 0xCC, 0x81, FIL_,
++		0xE1, 0xBC, 0x95, FIL_, 0x02, 0xCC, 0x80, FIL_,
++		0xE1, 0xBC, 0x9A, FIL_, 0xCC, 0x81, FIL_, 0xE1,
++		0xBC, 0x9C, FIL_, 0x02, 0xCC, 0x80, FIL_, 0xE1,
++		0xBC, 0x9B, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC,
++		0x9D, FIL_, 0x04, 0xCD, 0x82, FIL_, 0xE1, 0xBC,
++		0xA6, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x90,
++		FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0xA4, FIL_,
++		0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xA2, FIL_, 0x04,
++		0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xA3, FIL_, 0xCC,
++		0x81, FIL_, 0xE1, 0xBC, 0xA5, FIL_, 0xCD, 0x82,
++		FIL_, 0xE1, 0xBC, 0xA7, FIL_, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x91, FIL_, 0x01, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x92, FIL_, 0x01, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x93, FIL_, 0x01, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x94, FIL_, 0x01, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x95, FIL_, 0x01, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x96, FIL_, 0x01, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0x97, FIL_, 0x04, 0xCD, 0x82, FIL_,
++		0xE1, 0xBC, 0xAE, FIL_, 0xCC, 0x81, FIL_, 0xE1,
++		0xBC, 0xAC, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE,
++		0x98, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xAA,
++		FIL_, 0x04, 0xCD, 0x82, FIL_, 0xE1, 0xBC, 0xAF,
++		FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x99, FIL_,
++		0xCC, 0x81, FIL_, 0xE1, 0xBC, 0xAD, FIL_, 0xCC,
++		0x80, FIL_, 0xE1, 0xBC, 0xAB, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0x9A, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0x9B, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0x9C, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0x9D, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0x9E, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0x9F, FIL_, 0x03, 0xCC,
++		0x81, FIL_, 0xE1, 0xBC, 0xB4, FIL_, 0xCD, 0x82,
++		FIL_, 0xE1, 0xBC, 0xB6, FIL_, 0xCC, 0x80, FIL_,
++		0xE1, 0xBC, 0xB2, FIL_, 0x03, 0xCC, 0x81, FIL_,
++		0xE1, 0xBC, 0xB5, FIL_, 0xCD, 0x82, FIL_, 0xE1,
++		0xBC, 0xB7, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBC,
++		0xB3, FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1, 0xBC,
++		0xBC, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xBA,
++		FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBC, 0xBE, FIL_,
++		0x03, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xBB, FIL_,
++		0xCD, 0x82, FIL_, 0xE1, 0xBC, 0xBF, FIL_, 0xCC,
++		0x81, FIL_, 0xE1, 0xBC, 0xBD, FIL_, 0x02, 0xCC,
++		0x80, FIL_, 0xE1, 0xBD, 0x82, FIL_, 0xCC, 0x81,
++		FIL_, 0xE1, 0xBD, 0x84, FIL_, 0x02, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBD, 0x83, FIL_, 0xCC, 0x81, FIL_,
++		0xE1, 0xBD, 0x85, FIL_, 0x02, 0xCC, 0x81, FIL_,
++		0xE1, 0xBD, 0x8C, FIL_, 0xCC, 0x80, FIL_, 0xE1,
++		0xBD, 0x8A, FIL_, 0x02, 0xCC, 0x81, FIL_, 0xE1,
++		0xBD, 0x8D, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD,
++		0x8B, FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1, 0xBD,
++		0x94, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBD, 0x96,
++		FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0x92, FIL_,
++		0x03, 0xCD, 0x82, FIL_, 0xE1, 0xBD, 0x97, FIL_,
++		0xCC, 0x81, FIL_, 0xE1, 0xBD, 0x95, FIL_, 0xCC,
++		0x80, FIL_, 0xE1, 0xBD, 0x93, FIL_, 0x03, 0xCC,
++		0x81, FIL_, 0xE1, 0xBD, 0x9D, FIL_, 0xCD, 0x82,
++		FIL_, 0xE1, 0xBD, 0x9F, FIL_, 0xCC, 0x80, FIL_,
++		0xE1, 0xBD, 0x9B, FIL_, 0x04, 0xCC, 0x81, FIL_,
++		0xE1, 0xBD, 0xA4, FIL_, 0xCC, 0x80, FIL_, 0xE1,
++		0xBD, 0xA2, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBD,
++		0xA6, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA0,
++		FIL_, 0x04, 0xCD, 0x82, FIL_, 0xE1, 0xBD, 0xA7,
++		FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBD, 0xA5, FIL_,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA1, FIL_, 0xCC,
++		0x80, FIL_, 0xE1, 0xBD, 0xA3, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0xA2, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0xA3, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0xA4, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0xA5, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0xA6, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0xA7, FIL_, 0x04, 0xCC,
++		0x81, FIL_, 0xE1, 0xBD, 0xAC, FIL_, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBD, 0xAA, FIL_, 0xCD, 0x82, FIL_,
++		0xE1, 0xBD, 0xAE, FIL_, 0xCD, 0x85, FIL_, 0xE1,
++		0xBE, 0xA8, FIL_, 0x04, 0xCC, 0x81, FIL_, 0xE1,
++		0xBD, 0xAD, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE,
++		0xA9, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBD, 0xAF,
++		FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0xAB, FIL_,
++		0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xAA, FIL_,
++		0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xAB, FIL_,
++		0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xAC, FIL_,
++		0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xAD, FIL_,
++		0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xAE, FIL_,
++		0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xAF, FIL_,
++		0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xB2, FIL_,
++		0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0x82, FIL_,
++		0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0xB2, FIL_,
++		0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xB7, FIL_,
++		0x03, 0xCD, 0x82, FIL_, 0xE1, 0xBF, 0x8F, FIL_,
++		0xCC, 0x80, FIL_, 0xE1, 0xBF, 0x8D, FIL_, 0xCC,
++		0x81, FIL_, 0xE1, 0xBF, 0x8E, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBF, 0x87, FIL_, 0x01, 0xCD,
++		0x85, FIL_, 0xE1, 0xBF, 0xB7, FIL_, 0x03, 0xCC,
++		0x80, FIL_, 0xE1, 0xBF, 0x9D, FIL_, 0xCD, 0x82,
++		FIL_, 0xE1, 0xBF, 0x9F, FIL_, 0xCC, 0x81, FIL_,
++		0xE1, 0xBF, 0x9E, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x86, 0x9A, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x86, 0x9B, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x86, 0xAE, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x87, 0x8D, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x87, 0x8F, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x87, 0x8E, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x88, 0x84, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x88, 0x89, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x88, 0x8C, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x88, 0xA4, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x88, 0xA6, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x89, 0x81, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x89, 0x84, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x89, 0x87, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x89, 0x89, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x89, 0xAD, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x89, 0xA2, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x89, 0xB0, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x89, 0xB1, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x89, 0xB4, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x89, 0xB5, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x89, 0xB8, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x89, 0xB9, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8A, 0x80, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8A, 0x81, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8B, 0xA0, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8B, 0xA1, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8A, 0x84, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8A, 0x85, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8A, 0x88, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8A, 0x89, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8B, 0xA2, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8B, 0xA3, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8A, 0xAC, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8A, 0xAD, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8A, 0xAE, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8A, 0xAF, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8B, 0xAA, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8B, 0xAB, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8B, 0xAC, FIL_, 0x01, 0xCC, 0xB8, FIL_,
++		0xE2, 0x8B, 0xAD, FIL_, 0x01, 0xE3, 0x82, 0x99,
++		FIL_, 0xE3, 0x82, 0x94, FIL_, 0x01, 0xE3, 0x82,
++		0x99, FIL_, 0xE3, 0x81, 0x8C, FIL_, 0x01, 0xE3,
++		0x82, 0x99, FIL_, 0xE3, 0x81, 0x8E, FIL_, 0x01,
++		0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0x90, FIL_,
++		0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0x92,
++		FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81,
++		0x94, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3,
++		0x81, 0x96, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_,
++		0xE3, 0x81, 0x98, FIL_, 0x01, 0xE3, 0x82, 0x99,
++		FIL_, 0xE3, 0x81, 0x9A, FIL_, 0x01, 0xE3, 0x82,
++		0x99, FIL_, 0xE3, 0x81, 0x9C, FIL_, 0x01, 0xE3,
++		0x82, 0x99, FIL_, 0xE3, 0x81, 0x9E, FIL_, 0x01,
++		0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0xA0, FIL_,
++		0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0xA2,
++		FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81,
++		0xA5, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3,
++		0x81, 0xA7, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_,
++		0xE3, 0x81, 0xA9, FIL_, 0x02, 0xE3, 0x82, 0x9A,
++		FIL_, 0xE3, 0x81, 0xB1, FIL_, 0xE3, 0x82, 0x99,
++		FIL_, 0xE3, 0x81, 0xB0, FIL_, 0x02, 0xE3, 0x82,
++		0x9A, FIL_, 0xE3, 0x81, 0xB4, FIL_, 0xE3, 0x82,
++		0x99, FIL_, 0xE3, 0x81, 0xB3, FIL_, 0x02, 0xE3,
++		0x82, 0x9A, FIL_, 0xE3, 0x81, 0xB7, FIL_, 0xE3,
++		0x82, 0x99, FIL_, 0xE3, 0x81, 0xB6, FIL_, 0x02,
++		0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0xB9, FIL_,
++		0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x81, 0xBA, FIL_,
++		0x02, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0xBC,
++		FIL_, 0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x81, 0xBD,
++		FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82,
++		0x9E, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3,
++		0x83, 0xB4, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_,
++		0xE3, 0x82, 0xAC, FIL_, 0x01, 0xE3, 0x82, 0x99,
++		FIL_, 0xE3, 0x82, 0xAE, FIL_, 0x01, 0xE3, 0x82,
++		0x99, FIL_, 0xE3, 0x82, 0xB0, FIL_, 0x01, 0xE3,
++		0x82, 0x99, FIL_, 0xE3, 0x82, 0xB2, FIL_, 0x01,
++		0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, 0xB4, FIL_,
++		0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, 0xB6,
++		FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82,
++		0xB8, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3,
++		0x82, 0xBA, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_,
++		0xE3, 0x82, 0xBC, FIL_, 0x01, 0xE3, 0x82, 0x99,
++		FIL_, 0xE3, 0x82, 0xBE, FIL_, 0x01, 0xE3, 0x82,
++		0x99, FIL_, 0xE3, 0x83, 0x80, FIL_, 0x01, 0xE3,
++		0x82, 0x99, FIL_, 0xE3, 0x83, 0x82, FIL_, 0x01,
++		0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, 0x85, FIL_,
++		0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, 0x87,
++		FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83,
++		0x89, FIL_, 0x02, 0xE3, 0x82, 0x99, FIL_, 0xE3,
++		0x83, 0x90, FIL_, 0xE3, 0x82, 0x9A, FIL_, 0xE3,
++		0x83, 0x91, FIL_, 0x02, 0xE3, 0x82, 0x99, FIL_,
++		0xE3, 0x83, 0x93, FIL_, 0xE3, 0x82, 0x9A, FIL_,
++		0xE3, 0x83, 0x94, FIL_, 0x02, 0xE3, 0x82, 0x99,
++		FIL_, 0xE3, 0x83, 0x96, FIL_, 0xE3, 0x82, 0x9A,
++		FIL_, 0xE3, 0x83, 0x97, FIL_, 0x02, 0xE3, 0x82,
++		0x9A, FIL_, 0xE3, 0x83, 0x9A, FIL_, 0xE3, 0x82,
++		0x99, FIL_, 0xE3, 0x83, 0x99, FIL_, 0x02, 0xE3,
++		0x82, 0x9A, FIL_, 0xE3, 0x83, 0x9D, FIL_, 0xE3,
++		0x82, 0x99, FIL_, 0xE3, 0x83, 0x9C, FIL_, 0x01,
++		0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, 0xB7, FIL_,
++		0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, 0xB8,
++		FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83,
++		0xB9, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3,
++		0x83, 0xBA, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_,
++		0xE3, 0x83, 0xBE, FIL_, 0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,
++	},
++	{
++		0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAE, FIL_,
++		0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xA0, FIL_,
++		0x01, 0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAF, FIL_,
++		0x10, 0xCC, 0xA5, FIL_, 0xE1, 0xB8, 0x80, FIL_,
++		0xCC, 0x87, FIL_, 0xC8, 0xA6, FIL_, 0xCC, 0x83,
++		FIL_, 0xC3, 0x83, FIL_, 0xCC, 0x91, FIL_, 0xC8,
++		0x82, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x80, FIL_,
++		0xCC, 0x8A, FIL_, 0xC3, 0x85, FIL_, 0xCC, 0x88,
++		FIL_, 0xC3, 0x84, FIL_, 0xCC, 0x89, FIL_, 0xE1,
++		0xBA, 0xA2, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBA,
++		0xA0, FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x8D, FIL_,
++		0xCC, 0x80, FIL_, 0xC3, 0x80, FIL_, 0xCC, 0x81,
++		FIL_, 0xC3, 0x81, FIL_, 0xCC, 0x82, FIL_, 0xC3,
++		0x82, FIL_, 0xCC, 0xA8, FIL_, 0xC4, 0x84, FIL_,
++		0xCC, 0x86, FIL_, 0xC4, 0x82, FIL_, 0xCC, 0x84,
++		FIL_, 0xC4, 0x80, FIL_, 0x03, 0xCC, 0xB1, FIL_,
++		0xE1, 0xB8, 0x86, FIL_, 0xCC, 0x87, FIL_, 0xE1,
++		0xB8, 0x82, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB8,
++		0x84, FIL_, 0x05, 0xCC, 0xA7, FIL_, 0xC3, 0x87,
++		FIL_, 0xCC, 0x8C, FIL_, 0xC4, 0x8C, FIL_, 0xCC,
++		0x81, FIL_, 0xC4, 0x86, FIL_, 0xCC, 0x82, FIL_,
++		0xC4, 0x88, FIL_, 0xCC, 0x87, FIL_, 0xC4, 0x8A,
++		FIL_, 0x06, 0xCC, 0xA7, FIL_, 0xE1, 0xB8, 0x90,
++		FIL_, 0xCC, 0x8C, FIL_, 0xC4, 0x8E, FIL_, 0xCC,
++		0xB1, FIL_, 0xE1, 0xB8, 0x8E, FIL_, 0xCC, 0xAD,
++		FIL_, 0xE1, 0xB8, 0x92, FIL_, 0xCC, 0xA3, FIL_,
++		0xE1, 0xB8, 0x8C, FIL_, 0xCC, 0x87, FIL_, 0xE1,
++		0xB8, 0x8A, FIL_, 0x11, 0xCC, 0x84, FIL_, 0xC4,
++		0x92, FIL_, 0xCC, 0x86, FIL_, 0xC4, 0x94, FIL_,
++		0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0xB8, FIL_, 0xCC,
++		0x91, FIL_, 0xC8, 0x86, FIL_, 0xCC, 0x82, FIL_,
++		0xC3, 0x8A, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x84,
++		FIL_, 0xCC, 0xAD, FIL_, 0xE1, 0xB8, 0x98, FIL_,
++		0xCC, 0x89, FIL_, 0xE1, 0xBA, 0xBA, FIL_, 0xCC,
++		0xA7, FIL_, 0xC8, 0xA8, FIL_, 0xCC, 0x8C, FIL_,
++		0xC4, 0x9A, FIL_, 0xCC, 0x80, FIL_, 0xC3, 0x88,
++		FIL_, 0xCC, 0xA8, FIL_, 0xC4, 0x98, FIL_, 0xCC,
++		0x83, FIL_, 0xE1, 0xBA, 0xBC, FIL_, 0xCC, 0x87,
++		FIL_, 0xC4, 0x96, FIL_, 0xCC, 0x81, FIL_, 0xC3,
++		0x89, FIL_, 0xCC, 0x88, FIL_, 0xC3, 0x8B, FIL_,
++		0xCC, 0xB0, FIL_, 0xE1, 0xB8, 0x9A, FIL_, 0x01,
++		0xCC, 0x87, FIL_, 0xE1, 0xB8, 0x9E, FIL_, 0x07,
++		0xCC, 0x8C, FIL_, 0xC7, 0xA6, FIL_, 0xCC, 0x86,
++		FIL_, 0xC4, 0x9E, FIL_, 0xCC, 0x82, FIL_, 0xC4,
++		0x9C, FIL_, 0xCC, 0xA7, FIL_, 0xC4, 0xA2, FIL_,
++		0xCC, 0x84, FIL_, 0xE1, 0xB8, 0xA0, FIL_, 0xCC,
++		0x81, FIL_, 0xC7, 0xB4, FIL_, 0xCC, 0x87, FIL_,
++		0xC4, 0xA0, FIL_, 0x07, 0xCC, 0x87, FIL_, 0xE1,
++		0xB8, 0xA2, FIL_, 0xCC, 0xA7, FIL_, 0xE1, 0xB8,
++		0xA8, FIL_, 0xCC, 0x82, FIL_, 0xC4, 0xA4, FIL_,
++		0xCC, 0x88, FIL_, 0xE1, 0xB8, 0xA6, FIL_, 0xCC,
++		0x8C, FIL_, 0xC8, 0x9E, FIL_, 0xCC, 0xAE, FIL_,
++		0xE1, 0xB8, 0xAA, FIL_, 0xCC, 0xA3, FIL_, 0xE1,
++		0xB8, 0xA4, FIL_, 0x0F, 0xCC, 0xB0, FIL_, 0xE1,
++		0xB8, 0xAC, FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x8F,
++		FIL_, 0xCC, 0x80, FIL_, 0xC3, 0x8C, FIL_, 0xCC,
++		0x89, FIL_, 0xE1, 0xBB, 0x88, FIL_, 0xCC, 0xA3,
++		FIL_, 0xE1, 0xBB, 0x8A, FIL_, 0xCC, 0x91, FIL_,
++		0xC8, 0x8A, FIL_, 0xCC, 0x88, FIL_, 0xC3, 0x8F,
++		FIL_, 0xCC, 0x82, FIL_, 0xC3, 0x8E, FIL_, 0xCC,
++		0x81, FIL_, 0xC3, 0x8D, FIL_, 0xCC, 0x83, FIL_,
++		0xC4, 0xA8, FIL_, 0xCC, 0x87, FIL_, 0xC4, 0xB0,
++		FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x88, FIL_, 0xCC,
++		0xA8, FIL_, 0xC4, 0xAE, FIL_, 0xCC, 0x86, FIL_,
++		0xC4, 0xAC, FIL_, 0xCC, 0x84, FIL_, 0xC4, 0xAA,
++		FIL_, 0x01, 0xCC, 0x82, FIL_, 0xC4, 0xB4, FIL_,
++		0x05, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0xB0, FIL_,
++		0xCC, 0x8C, FIL_, 0xC7, 0xA8, FIL_, 0xCC, 0xB1,
++		FIL_, 0xE1, 0xB8, 0xB4, FIL_, 0xCC, 0xA7, FIL_,
++		0xC4, 0xB6, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB8,
++		0xB2, FIL_, 0x06, 0xCC, 0xA3, FIL_, 0xE1, 0xB8,
++		0xB6, FIL_, 0xCC, 0x8C, FIL_, 0xC4, 0xBD, FIL_,
++		0xCC, 0xAD, FIL_, 0xE1, 0xB8, 0xBC, FIL_, 0xCC,
++		0xB1, FIL_, 0xE1, 0xB8, 0xBA, FIL_, 0xCC, 0xA7,
++		FIL_, 0xC4, 0xBB, FIL_, 0xCC, 0x81, FIL_, 0xC4,
++		0xB9, FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1, 0xB8,
++		0xBE, FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0x80,
++		FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x82, FIL_,
++		0x09, 0xCC, 0x83, FIL_, 0xC3, 0x91, FIL_, 0xCC,
++		0x81, FIL_, 0xC5, 0x83, FIL_, 0xCC, 0xA7, FIL_,
++		0xC5, 0x85, FIL_, 0xCC, 0x8C, FIL_, 0xC5, 0x87,
++		FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0x84, FIL_,
++		0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x86, FIL_, 0xCC,
++		0xB1, FIL_, 0xE1, 0xB9, 0x88, FIL_, 0xCC, 0xAD,
++		FIL_, 0xE1, 0xB9, 0x8A, FIL_, 0xCC, 0x80, FIL_,
++		0xC7, 0xB8, FIL_, 0x10, 0xCC, 0x89, FIL_, 0xE1,
++		0xBB, 0x8E, FIL_, 0xCC, 0x84, FIL_, 0xC5, 0x8C,
++		FIL_, 0xCC, 0x82, FIL_, 0xC3, 0x94, FIL_, 0xCC,
++		0x86, FIL_, 0xC5, 0x8E, FIL_, 0xCC, 0x83, FIL_,
++		0xC3, 0x95, FIL_, 0xCC, 0x8B, FIL_, 0xC5, 0x90,
++		FIL_, 0xCC, 0x88, FIL_, 0xC3, 0x96, FIL_, 0xCC,
++		0x9B, FIL_, 0xC6, 0xA0, FIL_, 0xCC, 0x91, FIL_,
++		0xC8, 0x8E, FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x91,
++		FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x8C, FIL_, 0xCC,
++		0xA3, FIL_, 0xE1, 0xBB, 0x8C, FIL_, 0xCC, 0x80,
++		FIL_, 0xC3, 0x92, FIL_, 0xCC, 0xA8, FIL_, 0xC7,
++		0xAA, FIL_, 0xCC, 0x87, FIL_, 0xC8, 0xAE, FIL_,
++		0xCC, 0x81, FIL_, 0xC3, 0x93, FIL_, 0x02, 0xCC,
++		0x87, FIL_, 0xE1, 0xB9, 0x96, FIL_, 0xCC, 0x81,
++		FIL_, 0xE1, 0xB9, 0x94, FIL_, 0x08, 0xCC, 0xA7,
++		FIL_, 0xC5, 0x96, FIL_, 0xCC, 0x8C, FIL_, 0xC5,
++		0x98, FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x92, FIL_,
++		0xCC, 0x8F, FIL_, 0xC8, 0x90, FIL_, 0xCC, 0x81,
++		FIL_, 0xC5, 0x94, FIL_, 0xCC, 0x87, FIL_, 0xE1,
++		0xB9, 0x98, FIL_, 0xCC, 0xB1, FIL_, 0xE1, 0xB9,
++		0x9E, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x9A,
++		FIL_, 0x07, 0xCC, 0xA6, FIL_, 0xC8, 0x98, FIL_,
++		0xCC, 0x81, FIL_, 0xC5, 0x9A, FIL_, 0xCC, 0x82,
++		FIL_, 0xC5, 0x9C, FIL_, 0xCC, 0xA7, FIL_, 0xC5,
++		0x9E, FIL_, 0xCC, 0x8C, FIL_, 0xC5, 0xA0, FIL_,
++		0xCC, 0x87, FIL_, 0xE1, 0xB9, 0xA0, FIL_, 0xCC,
++		0xA3, FIL_, 0xE1, 0xB9, 0xA2, FIL_, 0x07, 0xCC,
++		0xA6, FIL_, 0xC8, 0x9A, FIL_, 0xCC, 0x87, FIL_,
++		0xE1, 0xB9, 0xAA, FIL_, 0xCC, 0xA3, FIL_, 0xE1,
++		0xB9, 0xAC, FIL_, 0xCC, 0xB1, FIL_, 0xE1, 0xB9,
++		0xAE, FIL_, 0xCC, 0xAD, FIL_, 0xE1, 0xB9, 0xB0,
++		FIL_, 0xCC, 0xA7, FIL_, 0xC5, 0xA2, FIL_, 0xCC,
++		0x8C, FIL_, 0xC5, 0xA4, FIL_, 0x13, 0xCC, 0x8A,
++		FIL_, 0xC5, 0xAE, FIL_, 0xCC, 0x88, FIL_, 0xC3,
++		0x9C, FIL_, 0xCC, 0x8B, FIL_, 0xC5, 0xB0, FIL_,
++		0xCC, 0xAD, FIL_, 0xE1, 0xB9, 0xB6, FIL_, 0xCC,
++		0xA8, FIL_, 0xC5, 0xB2, FIL_, 0xCC, 0x8C, FIL_,
++		0xC7, 0x93, FIL_, 0xCC, 0x80, FIL_, 0xC3, 0x99,
++		FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x94, FIL_, 0xCC,
++		0xA3, FIL_, 0xE1, 0xBB, 0xA4, FIL_, 0xCC, 0xA4,
++		FIL_, 0xE1, 0xB9, 0xB2, FIL_, 0xCC, 0x81, FIL_,
++		0xC3, 0x9A, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0x9B,
++		FIL_, 0xCC, 0xB0, FIL_, 0xE1, 0xB9, 0xB4, FIL_,
++		0xCC, 0x83, FIL_, 0xC5, 0xA8, FIL_, 0xCC, 0x89,
++		FIL_, 0xE1, 0xBB, 0xA6, FIL_, 0xCC, 0x84, FIL_,
++		0xC5, 0xAA, FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x96,
++		FIL_, 0xCC, 0x86, FIL_, 0xC5, 0xAC, FIL_, 0xCC,
++		0x9B, FIL_, 0xC6, 0xAF, FIL_, 0x02, 0xCC, 0xA3,
++		FIL_, 0xE1, 0xB9, 0xBE, FIL_, 0xCC, 0x83, FIL_,
++		0xE1, 0xB9, 0xBC, FIL_, 0x06, 0xCC, 0x88, FIL_,
++		0xE1, 0xBA, 0x84, FIL_, 0xCC, 0x81, FIL_, 0xE1,
++		0xBA, 0x82, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBA,
++		0x80, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0x88,
++		FIL_, 0xCC, 0x82, FIL_, 0xC5, 0xB4, FIL_, 0xCC,
++		0x87, FIL_, 0xE1, 0xBA, 0x86, FIL_, 0x02, 0xCC,
++		0x88, FIL_, 0xE1, 0xBA, 0x8C, FIL_, 0xCC, 0x87,
++		FIL_, 0xE1, 0xBA, 0x8A, FIL_, 0x09, 0xCC, 0x89,
++		FIL_, 0xE1, 0xBB, 0xB6, FIL_, 0xCC, 0xA3, FIL_,
++		0xE1, 0xBB, 0xB4, FIL_, 0xCC, 0x80, FIL_, 0xE1,
++		0xBB, 0xB2, FIL_, 0xCC, 0x88, FIL_, 0xC5, 0xB8,
++		FIL_, 0xCC, 0x81, FIL_, 0xC3, 0x9D, FIL_, 0xCC,
++		0x83, FIL_, 0xE1, 0xBB, 0xB8, FIL_, 0xCC, 0x87,
++		FIL_, 0xE1, 0xBA, 0x8E, FIL_, 0xCC, 0x84, FIL_,
++		0xC8, 0xB2, FIL_, 0xCC, 0x82, FIL_, 0xC5, 0xB6,
++		FIL_, 0x06, 0xCC, 0x82, FIL_, 0xE1, 0xBA, 0x90,
++		FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0x92, FIL_,
++		0xCC, 0xB1, FIL_, 0xE1, 0xBA, 0x94, FIL_, 0xCC,
++		0x8C, FIL_, 0xC5, 0xBD, FIL_, 0xCC, 0x87, FIL_,
++		0xC5, 0xBB, FIL_, 0xCC, 0x81, FIL_, 0xC5, 0xB9,
++		FIL_, 0x10, 0xCC, 0xA3, FIL_, 0xE1, 0xBA, 0xA1,
++		FIL_, 0xCC, 0xA8, FIL_, 0xC4, 0x85, FIL_, 0xCC,
++		0x81, FIL_, 0xC3, 0xA1, FIL_, 0xCC, 0x82, FIL_,
++		0xC3, 0xA2, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBA,
++		0xA3, FIL_, 0xCC, 0x83, FIL_, 0xC3, 0xA3, FIL_,
++		0xCC, 0x8C, FIL_, 0xC7, 0x8E, FIL_, 0xCC, 0x8A,
++		FIL_, 0xC3, 0xA5, FIL_, 0xCC, 0x88, FIL_, 0xC3,
++		0xA4, FIL_, 0xCC, 0x87, FIL_, 0xC8, 0xA7, FIL_,
++		0xCC, 0x91, FIL_, 0xC8, 0x83, FIL_, 0xCC, 0xA5,
++		FIL_, 0xE1, 0xB8, 0x81, FIL_, 0xCC, 0x84, FIL_,
++		0xC4, 0x81, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x81,
++		FIL_, 0xCC, 0x86, FIL_, 0xC4, 0x83, FIL_, 0xCC,
++		0x80, FIL_, 0xC3, 0xA0, FIL_, 0x03, 0xCC, 0xA3,
++		FIL_, 0xE1, 0xB8, 0x85, FIL_, 0xCC, 0x87, FIL_,
++		0xE1, 0xB8, 0x83, FIL_, 0xCC, 0xB1, FIL_, 0xE1,
++		0xB8, 0x87, FIL_, 0x05, 0xCC, 0x87, FIL_, 0xC4,
++		0x8B, FIL_, 0xCC, 0x8C, FIL_, 0xC4, 0x8D, FIL_,
++		0xCC, 0x82, FIL_, 0xC4, 0x89, FIL_, 0xCC, 0x81,
++		FIL_, 0xC4, 0x87, FIL_, 0xCC, 0xA7, FIL_, 0xC3,
++		0xA7, FIL_, 0x06, 0xCC, 0x87, FIL_, 0xE1, 0xB8,
++		0x8B, FIL_, 0xCC, 0xA7, FIL_, 0xE1, 0xB8, 0x91,
++		FIL_, 0xCC, 0xB1, FIL_, 0xE1, 0xB8, 0x8F, FIL_,
++		0xCC, 0xA3, FIL_, 0xE1, 0xB8, 0x8D, FIL_, 0xCC,
++		0x8C, FIL_, 0xC4, 0x8F, FIL_, 0xCC, 0xAD, FIL_,
++		0xE1, 0xB8, 0x93, FIL_, 0x11, 0xCC, 0x80, FIL_,
++		0xC3, 0xA8, FIL_, 0xCC, 0x81, FIL_, 0xC3, 0xA9,
++		FIL_, 0xCC, 0x82, FIL_, 0xC3, 0xAA, FIL_, 0xCC,
++		0x88, FIL_, 0xC3, 0xAB, FIL_, 0xCC, 0x84, FIL_,
++		0xC4, 0x93, FIL_, 0xCC, 0x86, FIL_, 0xC4, 0x95,
++		FIL_, 0xCC, 0x87, FIL_, 0xC4, 0x97, FIL_, 0xCC,
++		0xA8, FIL_, 0xC4, 0x99, FIL_, 0xCC, 0x8C, FIL_,
++		0xC4, 0x9B, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x85,
++		FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x87, FIL_, 0xCC,
++		0xA3, FIL_, 0xE1, 0xBA, 0xB9, FIL_, 0xCC, 0xA7,
++		FIL_, 0xC8, 0xA9, FIL_, 0xCC, 0x83, FIL_, 0xE1,
++		0xBA, 0xBD, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBA,
++		0xBB, FIL_, 0xCC, 0xAD, FIL_, 0xE1, 0xB8, 0x99,
++		FIL_, 0xCC, 0xB0, FIL_, 0xE1, 0xB8, 0x9B, FIL_,
++		0x01, 0xCC, 0x87, FIL_, 0xE1, 0xB8, 0x9F, FIL_,
++		0x07, 0xCC, 0x86, FIL_, 0xC4, 0x9F, FIL_, 0xCC,
++		0x87, FIL_, 0xC4, 0xA1, FIL_, 0xCC, 0x82, FIL_,
++		0xC4, 0x9D, FIL_, 0xCC, 0x84, FIL_, 0xE1, 0xB8,
++		0xA1, FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0xA7, FIL_,
++		0xCC, 0xA7, FIL_, 0xC4, 0xA3, FIL_, 0xCC, 0x81,
++		FIL_, 0xC7, 0xB5, FIL_, 0x08, 0xCC, 0xA7, FIL_,
++		0xE1, 0xB8, 0xA9, FIL_, 0xCC, 0xB1, FIL_, 0xE1,
++		0xBA, 0x96, FIL_, 0xCC, 0x8C, FIL_, 0xC8, 0x9F,
++		FIL_, 0xCC, 0xAE, FIL_, 0xE1, 0xB8, 0xAB, FIL_,
++		0xCC, 0x88, FIL_, 0xE1, 0xB8, 0xA7, FIL_, 0xCC,
++		0xA3, FIL_, 0xE1, 0xB8, 0xA5, FIL_, 0xCC, 0x87,
++		FIL_, 0xE1, 0xB8, 0xA3, FIL_, 0xCC, 0x82, FIL_,
++		0xC4, 0xA5, FIL_, 0x0E, 0xCC, 0x88, FIL_, 0xC3,
++		0xAF, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0x89,
++		FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0x8B, FIL_,
++		0xCC, 0x82, FIL_, 0xC3, 0xAE, FIL_, 0xCC, 0x81,
++		FIL_, 0xC3, 0xAD, FIL_, 0xCC, 0x80, FIL_, 0xC3,
++		0xAC, FIL_, 0xCC, 0x83, FIL_, 0xC4, 0xA9, FIL_,
++		0xCC, 0x84, FIL_, 0xC4, 0xAB, FIL_, 0xCC, 0x86,
++		FIL_, 0xC4, 0xAD, FIL_, 0xCC, 0xA8, FIL_, 0xC4,
++		0xAF, FIL_, 0xCC, 0xB0, FIL_, 0xE1, 0xB8, 0xAD,
++		FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x90, FIL_, 0xCC,
++		0x91, FIL_, 0xC8, 0x8B, FIL_, 0xCC, 0x8F, FIL_,
++		0xC8, 0x89, FIL_, 0x02, 0xCC, 0x8C, FIL_, 0xC7,
++		0xB0, FIL_, 0xCC, 0x82, FIL_, 0xC4, 0xB5, FIL_,
++		0x05, 0xCC, 0xB1, FIL_, 0xE1, 0xB8, 0xB5, FIL_,
++		0xCC, 0xA7, FIL_, 0xC4, 0xB7, FIL_, 0xCC, 0x8C,
++		FIL_, 0xC7, 0xA9, FIL_, 0xCC, 0x81, FIL_, 0xE1,
++		0xB8, 0xB1, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB8,
++		0xB3, FIL_, 0x06, 0xCC, 0xA3, FIL_, 0xE1, 0xB8,
++		0xB7, FIL_, 0xCC, 0xAD, FIL_, 0xE1, 0xB8, 0xBD,
++		FIL_, 0xCC, 0xB1, FIL_, 0xE1, 0xB8, 0xBB, FIL_,
++		0xCC, 0xA7, FIL_, 0xC4, 0xBC, FIL_, 0xCC, 0x81,
++		FIL_, 0xC4, 0xBA, FIL_, 0xCC, 0x8C, FIL_, 0xC4,
++		0xBE, FIL_, 0x03, 0xCC, 0x87, FIL_, 0xE1, 0xB9,
++		0x81, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x83,
++		FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0xBF, FIL_,
++		0x09, 0xCC, 0x80, FIL_, 0xC7, 0xB9, FIL_, 0xCC,
++		0xAD, FIL_, 0xE1, 0xB9, 0x8B, FIL_, 0xCC, 0x83,
++		FIL_, 0xC3, 0xB1, FIL_, 0xCC, 0x81, FIL_, 0xC5,
++		0x84, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0x87,
++		FIL_, 0xCC, 0xB1, FIL_, 0xE1, 0xB9, 0x89, FIL_,
++		0xCC, 0x87, FIL_, 0xE1, 0xB9, 0x85, FIL_, 0xCC,
++		0xA7, FIL_, 0xC5, 0x86, FIL_, 0xCC, 0x8C, FIL_,
++		0xC5, 0x88, FIL_, 0x10, 0xCC, 0xA3, FIL_, 0xE1,
++		0xBB, 0x8D, FIL_, 0xCC, 0x87, FIL_, 0xC8, 0xAF,
++		FIL_, 0xCC, 0x80, FIL_, 0xC3, 0xB2, FIL_, 0xCC,
++		0x91, FIL_, 0xC8, 0x8F, FIL_, 0xCC, 0x89, FIL_,
++		0xE1, 0xBB, 0x8F, FIL_, 0xCC, 0x88, FIL_, 0xC3,
++		0xB6, FIL_, 0xCC, 0x83, FIL_, 0xC3, 0xB5, FIL_,
++		0xCC, 0x81, FIL_, 0xC3, 0xB3, FIL_, 0xCC, 0x8C,
++		FIL_, 0xC7, 0x92, FIL_, 0xCC, 0xA8, FIL_, 0xC7,
++		0xAB, FIL_, 0xCC, 0x9B, FIL_, 0xC6, 0xA1, FIL_,
++		0xCC, 0x84, FIL_, 0xC5, 0x8D, FIL_, 0xCC, 0x86,
++		FIL_, 0xC5, 0x8F, FIL_, 0xCC, 0x8B, FIL_, 0xC5,
++		0x91, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0xB4, FIL_,
++		0xCC, 0x8F, FIL_, 0xC8, 0x8D, FIL_, 0x02, 0xCC,
++		0x87, FIL_, 0xE1, 0xB9, 0x97, FIL_, 0xCC, 0x81,
++		FIL_, 0xE1, 0xB9, 0x95, FIL_, 0x08, 0xCC, 0x8C,
++		FIL_, 0xC5, 0x99, FIL_, 0xCC, 0xA3, FIL_, 0xE1,
++		0xB9, 0x9B, FIL_, 0xCC, 0x81, FIL_, 0xC5, 0x95,
++		FIL_, 0xCC, 0xA7, FIL_, 0xC5, 0x97, FIL_, 0xCC,
++		0xB1, FIL_, 0xE1, 0xB9, 0x9F, FIL_, 0xCC, 0x87,
++		FIL_, 0xE1, 0xB9, 0x99, FIL_, 0xCC, 0x91, FIL_,
++		0xC8, 0x93, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x91,
++		FIL_, 0x07, 0xCC, 0xA7, FIL_, 0xC5, 0x9F, FIL_,
++		0xCC, 0x82, FIL_, 0xC5, 0x9D, FIL_, 0xCC, 0x87,
++		FIL_, 0xE1, 0xB9, 0xA1, FIL_, 0xCC, 0xA6, FIL_,
++		0xC8, 0x99, FIL_, 0xCC, 0x81, FIL_, 0xC5, 0x9B,
++		FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9, 0xA3, FIL_,
++		0xCC, 0x8C, FIL_, 0xC5, 0xA1, FIL_, 0x08, 0xCC,
++		0xA6, FIL_, 0xC8, 0x9B, FIL_, 0xCC, 0xAD, FIL_,
++		0xE1, 0xB9, 0xB1, FIL_, 0xCC, 0xB1, FIL_, 0xE1,
++		0xB9, 0xAF, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xB9,
++		0xAD, FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xB9, 0xAB,
++		FIL_, 0xCC, 0x8C, FIL_, 0xC5, 0xA5, FIL_, 0xCC,
++		0xA7, FIL_, 0xC5, 0xA3, FIL_, 0xCC, 0x88, FIL_,
++		0xE1, 0xBA, 0x97, FIL_, 0x13, 0xCC, 0x8A, FIL_,
++		0xC5, 0xAF, FIL_, 0xCC, 0x8F, FIL_, 0xC8, 0x95,
++		FIL_, 0xCC, 0x8C, FIL_, 0xC7, 0x94, FIL_, 0xCC,
++		0x80, FIL_, 0xC3, 0xB9, FIL_, 0xCC, 0x9B, FIL_,
++		0xC6, 0xB0, FIL_, 0xCC, 0x82, FIL_, 0xC3, 0xBB,
++		FIL_, 0xCC, 0x81, FIL_, 0xC3, 0xBA, FIL_, 0xCC,
++		0x88, FIL_, 0xC3, 0xBC, FIL_, 0xCC, 0x83, FIL_,
++		0xC5, 0xA9, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB,
++		0xA7, FIL_, 0xCC, 0x84, FIL_, 0xC5, 0xAB, FIL_,
++		0xCC, 0x86, FIL_, 0xC5, 0xAD, FIL_, 0xCC, 0xAD,
++		FIL_, 0xE1, 0xB9, 0xB7, FIL_, 0xCC, 0x8B, FIL_,
++		0xC5, 0xB1, FIL_, 0xCC, 0xA8, FIL_, 0xC5, 0xB3,
++		FIL_, 0xCC, 0x91, FIL_, 0xC8, 0x97, FIL_, 0xCC,
++		0xA4, FIL_, 0xE1, 0xB9, 0xB3, FIL_, 0xCC, 0xA3,
++		FIL_, 0xE1, 0xBB, 0xA5, FIL_, 0xCC, 0xB0, FIL_,
++		0xE1, 0xB9, 0xB5, FIL_, 0x02, 0xCC, 0x83, FIL_,
++		0xE1, 0xB9, 0xBD, FIL_, 0xCC, 0xA3, FIL_, 0xE1,
++		0xB9, 0xBF, FIL_, 0x07, 0xCC, 0x8A, FIL_, 0xE1,
++		0xBA, 0x98, FIL_, 0xCC, 0x87, FIL_, 0xE1, 0xBA,
++		0x87, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0x83,
++		FIL_, 0xCC, 0x82, FIL_, 0xC5, 0xB5, FIL_, 0xCC,
++		0x80, FIL_, 0xE1, 0xBA, 0x81, FIL_, 0xCC, 0xA3,
++		FIL_, 0xE1, 0xBA, 0x89, FIL_, 0xCC, 0x88, FIL_,
++		0xE1, 0xBA, 0x85, FIL_, 0x02, 0xCC, 0x87, FIL_,
++		0xE1, 0xBA, 0x8B, FIL_, 0xCC, 0x88, FIL_, 0xE1,
++		0xBA, 0x8D, FIL_, 0x0A, 0xCC, 0x87, FIL_, 0xE1,
++		0xBA, 0x8F, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBB,
++		0xB5, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0xB7,
++		FIL_, 0xCC, 0x8A, FIL_, 0xE1, 0xBA, 0x99, FIL_,
++		0xCC, 0x80, FIL_, 0xE1, 0xBB, 0xB3, FIL_, 0xCC,
++		0x83, FIL_, 0xE1, 0xBB, 0xB9, FIL_, 0xCC, 0x88,
++		FIL_, 0xC3, 0xBF, FIL_, 0xCC, 0x81, FIL_, 0xC3,
++		0xBD, FIL_, 0xCC, 0x84, FIL_, 0xC8, 0xB3, FIL_,
++		0xCC, 0x82, FIL_, 0xC5, 0xB7, FIL_, 0x06, 0xCC,
++		0xB1, FIL_, 0xE1, 0xBA, 0x95, FIL_, 0xCC, 0xA3,
++		FIL_, 0xE1, 0xBA, 0x93, FIL_, 0xCC, 0x82, FIL_,
++		0xE1, 0xBA, 0x91, FIL_, 0xCC, 0x81, FIL_, 0xC5,
++		0xBA, FIL_, 0xCC, 0x87, FIL_, 0xC5, 0xBC, FIL_,
++		0xCC, 0x8C, FIL_, 0xC5, 0xBE, FIL_, 0x03, 0xCC,
++		0x80, FIL_, 0xE1, 0xBF, 0xAD, FIL_, 0xCD, 0x82,
++		FIL_, 0xE1, 0xBF, 0x81, FIL_, 0xCC, 0x81, FIL_,
++		0xCE, 0x85, FIL_, 0x04, 0xCC, 0x83, FIL_, 0xE1,
++		0xBA, 0xAA, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA,
++		0xA4, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBA, 0xA8,
++		FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBA, 0xA6, FIL_,
++		0x01, 0xCC, 0x84, FIL_, 0xC7, 0x9E, FIL_, 0x01,
++		0xCC, 0x81, FIL_, 0xC7, 0xBA, FIL_, 0x02, 0xCC,
++		0x84, FIL_, 0xC7, 0xA2, FIL_, 0xCC, 0x81, FIL_,
++		0xC7, 0xBC, FIL_, 0x01, 0xCC, 0x81, FIL_, 0xE1,
++		0xB8, 0x88, FIL_, 0x04, 0xCC, 0x83, FIL_, 0xE1,
++		0xBB, 0x84, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBB,
++		0x80, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0x82,
++		FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0xBE, FIL_,
++		0x01, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0xAE, FIL_,
++		0x04, 0xCC, 0x81, FIL_, 0xE1, 0xBB, 0x90, FIL_,
++		0xCC, 0x80, FIL_, 0xE1, 0xBB, 0x92, FIL_, 0xCC,
++		0x89, FIL_, 0xE1, 0xBB, 0x94, FIL_, 0xCC, 0x83,
++		FIL_, 0xE1, 0xBB, 0x96, FIL_, 0x03, 0xCC, 0x84,
++		FIL_, 0xC8, 0xAC, FIL_, 0xCC, 0x88, FIL_, 0xE1,
++		0xB9, 0x8E, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xB9,
++		0x8C, FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC8, 0xAA,
++		FIL_, 0x01, 0xCC, 0x81, FIL_, 0xC7, 0xBE, FIL_,
++		0x04, 0xCC, 0x80, FIL_, 0xC7, 0x9B, FIL_, 0xCC,
++		0x84, FIL_, 0xC7, 0x95, FIL_, 0xCC, 0x8C, FIL_,
++		0xC7, 0x99, FIL_, 0xCC, 0x81, FIL_, 0xC7, 0x97,
++		FIL_, 0x04, 0xCC, 0x81, FIL_, 0xE1, 0xBA, 0xA5,
++		FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBA, 0xAB, FIL_,
++		0xCC, 0x89, FIL_, 0xE1, 0xBA, 0xA9, FIL_, 0xCC,
++		0x80, FIL_, 0xE1, 0xBA, 0xA7, FIL_, 0x01, 0xCC,
++		0x84, FIL_, 0xC7, 0x9F, FIL_, 0x01, 0xCC, 0x81,
++		FIL_, 0xC7, 0xBB, FIL_, 0x02, 0xCC, 0x81, FIL_,
++		0xC7, 0xBD, FIL_, 0xCC, 0x84, FIL_, 0xC7, 0xA3,
++		FIL_, 0x01, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0x89,
++		FIL_, 0x04, 0xCC, 0x89, FIL_, 0xE1, 0xBB, 0x83,
++		FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0x85, FIL_,
++		0xCC, 0x80, FIL_, 0xE1, 0xBB, 0x81, FIL_, 0xCC,
++		0x81, FIL_, 0xE1, 0xBA, 0xBF, FIL_, 0x01, 0xCC,
++		0x81, FIL_, 0xE1, 0xB8, 0xAF, FIL_, 0x04, 0xCC,
++		0x80, FIL_, 0xE1, 0xBB, 0x93, FIL_, 0xCC, 0x81,
++		FIL_, 0xE1, 0xBB, 0x91, FIL_, 0xCC, 0x83, FIL_,
++		0xE1, 0xBB, 0x97, FIL_, 0xCC, 0x89, FIL_, 0xE1,
++		0xBB, 0x95, FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1,
++		0xB9, 0x8D, FIL_, 0xCC, 0x88, FIL_, 0xE1, 0xB9,
++		0x8F, FIL_, 0xCC, 0x84, FIL_, 0xC8, 0xAD, FIL_,
++		0x01, 0xCC, 0x84, FIL_, 0xC8, 0xAB, FIL_, 0x01,
++		0xCC, 0x81, FIL_, 0xC7, 0xBF, FIL_, 0x04, 0xCC,
++		0x8C, FIL_, 0xC7, 0x9A, FIL_, 0xCC, 0x84, FIL_,
++		0xC7, 0x96, FIL_, 0xCC, 0x80, FIL_, 0xC7, 0x9C,
++		FIL_, 0xCC, 0x81, FIL_, 0xC7, 0x98, FIL_, 0x04,
++		0xCC, 0x81, FIL_, 0xE1, 0xBA, 0xAE, FIL_, 0xCC,
++		0x83, FIL_, 0xE1, 0xBA, 0xB4, FIL_, 0xCC, 0x89,
++		FIL_, 0xE1, 0xBA, 0xB2, FIL_, 0xCC, 0x80, FIL_,
++		0xE1, 0xBA, 0xB0, FIL_, 0x04, 0xCC, 0x83, FIL_,
++		0xE1, 0xBA, 0xB5, FIL_, 0xCC, 0x80, FIL_, 0xE1,
++		0xBA, 0xB1, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBA,
++		0xAF, FIL_, 0xCC, 0x89, FIL_, 0xE1, 0xBA, 0xB3,
++		FIL_, 0x02, 0xCC, 0x81, FIL_, 0xE1, 0xB8, 0x96,
++		FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xB8, 0x94, FIL_,
++		0x02, 0xCC, 0x80, FIL_, 0xE1, 0xB8, 0x95, FIL_,
++		0xCC, 0x81, FIL_, 0xE1, 0xB8, 0x97, FIL_, 0x02,
++		0xCC, 0x80, FIL_, 0xE1, 0xB9, 0x90, FIL_, 0xCC,
++		0x81, FIL_, 0xE1, 0xB9, 0x92, FIL_, 0x02, 0xCC,
++		0x81, FIL_, 0xE1, 0xB9, 0x93, FIL_, 0xCC, 0x80,
++		FIL_, 0xE1, 0xB9, 0x91, FIL_, 0x01, 0xCC, 0x87,
++		FIL_, 0xE1, 0xB9, 0xA4, FIL_, 0x01, 0xCC, 0x87,
++		FIL_, 0xE1, 0xB9, 0xA5, FIL_, 0x01, 0xCC, 0x87,
++		FIL_, 0xE1, 0xB9, 0xA6, FIL_, 0x01, 0xCC, 0x87,
++		FIL_, 0xE1, 0xB9, 0xA7, FIL_, 0x01, 0xCC, 0x81,
++		FIL_, 0xE1, 0xB9, 0xB8, FIL_, 0x01, 0xCC, 0x81,
++		FIL_, 0xE1, 0xB9, 0xB9, FIL_, 0x01, 0xCC, 0x88,
++		FIL_, 0xE1, 0xB9, 0xBA, FIL_, 0x01, 0xCC, 0x88,
++		FIL_, 0xE1, 0xB9, 0xBB, FIL_, 0x01, 0xCC, 0x87,
++		FIL_, 0xE1, 0xBA, 0x9B, FIL_, 0x05, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBB, 0x9C, FIL_, 0xCC, 0x89, FIL_,
++		0xE1, 0xBB, 0x9E, FIL_, 0xCC, 0x83, FIL_, 0xE1,
++		0xBB, 0xA0, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBB,
++		0x9A, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0xA2,
++		FIL_, 0x05, 0xCC, 0x83, FIL_, 0xE1, 0xBB, 0xA1,
++		FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0xA3, FIL_,
++		0xCC, 0x81, FIL_, 0xE1, 0xBB, 0x9B, FIL_, 0xCC,
++		0x80, FIL_, 0xE1, 0xBB, 0x9D, FIL_, 0xCC, 0x89,
++		FIL_, 0xE1, 0xBB, 0x9F, FIL_, 0x05, 0xCC, 0x81,
++		FIL_, 0xE1, 0xBB, 0xA8, FIL_, 0xCC, 0x80, FIL_,
++		0xE1, 0xBB, 0xAA, FIL_, 0xCC, 0x89, FIL_, 0xE1,
++		0xBB, 0xAC, FIL_, 0xCC, 0x83, FIL_, 0xE1, 0xBB,
++		0xAE, FIL_, 0xCC, 0xA3, FIL_, 0xE1, 0xBB, 0xB0,
++		FIL_, 0x05, 0xCC, 0x80, FIL_, 0xE1, 0xBB, 0xAB,
++		FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBB, 0xA9, FIL_,
++		0xCC, 0x83, FIL_, 0xE1, 0xBB, 0xAF, FIL_, 0xCC,
++		0xA3, FIL_, 0xE1, 0xBB, 0xB1, FIL_, 0xCC, 0x89,
++		FIL_, 0xE1, 0xBB, 0xAD, FIL_, 0x01, 0xCC, 0x8C,
++		FIL_, 0xC7, 0xAE, FIL_, 0x01, 0xCC, 0x84, FIL_,
++		0xC7, 0xAC, FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC7,
++		0xAD, FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC7, 0xA0,
++		FIL_, 0x01, 0xCC, 0x84, FIL_, 0xC7, 0xA1, FIL_,
++		0x01, 0xCC, 0x86, FIL_, 0xE1, 0xB8, 0x9C, FIL_,
++		0x01, 0xCC, 0x86, FIL_, 0xE1, 0xB8, 0x9D, FIL_,
++		0x01, 0xCC, 0x84, FIL_, 0xC8, 0xB0, FIL_, 0x01,
++		0xCC, 0x84, FIL_, 0xC8, 0xB1, FIL_, 0x01, 0xCC,
++		0x8C, FIL_, 0xC7, 0xAF, FIL_, 0x07, 0xCC, 0x93,
++		FIL_, 0xE1, 0xBC, 0x88, FIL_, 0xCC, 0x81, FIL_,
++		0xCE, 0x86, FIL_, 0xCC, 0x86, FIL_, 0xE1, 0xBE,
++		0xB8, FIL_, 0xCC, 0x84, FIL_, 0xE1, 0xBE, 0xB9,
++		FIL_, 0xCC, 0x94, FIL_, 0xE1, 0xBC, 0x89, FIL_,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xBC, FIL_, 0xCC,
++		0x80, FIL_, 0xE1, 0xBE, 0xBA, FIL_, 0x04, 0xCC,
++		0x94, FIL_, 0xE1, 0xBC, 0x99, FIL_, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBF, 0x88, FIL_, 0xCC, 0x81, FIL_,
++		0xCE, 0x88, FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBC,
++		0x98, FIL_, 0x05, 0xCD, 0x85, FIL_, 0xE1, 0xBF,
++		0x8C, FIL_, 0xCC, 0x81, FIL_, 0xCE, 0x89, FIL_,
++		0xCC, 0x80, FIL_, 0xE1, 0xBF, 0x8A, FIL_, 0xCC,
++		0x93, FIL_, 0xE1, 0xBC, 0xA8, FIL_, 0xCC, 0x94,
++		FIL_, 0xE1, 0xBC, 0xA9, FIL_, 0x07, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBF, 0x9A, FIL_, 0xCC, 0x84, FIL_,
++		0xE1, 0xBF, 0x99, FIL_, 0xCC, 0x93, FIL_, 0xE1,
++		0xBC, 0xB8, FIL_, 0xCC, 0x94, FIL_, 0xE1, 0xBC,
++		0xB9, FIL_, 0xCC, 0x86, FIL_, 0xE1, 0xBF, 0x98,
++		FIL_, 0xCC, 0x81, FIL_, 0xCE, 0x8A, FIL_, 0xCC,
++		0x88, FIL_, 0xCE, 0xAA, FIL_, 0x04, 0xCC, 0x81,
++		FIL_, 0xCE, 0x8C, FIL_, 0xCC, 0x94, FIL_, 0xE1,
++		0xBD, 0x89, FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBD,
++		0x88, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBF, 0xB8,
++		FIL_, 0x01, 0xCC, 0x94, FIL_, 0xE1, 0xBF, 0xAC,
++		FIL_, 0x06, 0xCC, 0x94, FIL_, 0xE1, 0xBD, 0x99,
++		FIL_, 0xCC, 0x86, FIL_, 0xE1, 0xBF, 0xA8, FIL_,
++		0xCC, 0x88, FIL_, 0xCE, 0xAB, FIL_, 0xCC, 0x84,
++		FIL_, 0xE1, 0xBF, 0xA9, FIL_, 0xCC, 0x81, FIL_,
++		0xCE, 0x8E, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBF,
++		0xAA, FIL_, 0x05, 0xCC, 0x93, FIL_, 0xE1, 0xBD,
++		0xA8, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0xBC,
++		FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBF, 0xBA, FIL_,
++		0xCC, 0x94, FIL_, 0xE1, 0xBD, 0xA9, FIL_, 0xCC,
++		0x81, FIL_, 0xCE, 0x8F, FIL_, 0x01, 0xCD, 0x85,
++		FIL_, 0xE1, 0xBE, 0xB4, FIL_, 0x01, 0xCD, 0x85,
++		FIL_, 0xE1, 0xBF, 0x84, FIL_, 0x08, 0xCD, 0x85,
++		FIL_, 0xE1, 0xBE, 0xB3, FIL_, 0xCC, 0x84, FIL_,
++		0xE1, 0xBE, 0xB1, FIL_, 0xCC, 0x86, FIL_, 0xE1,
++		0xBE, 0xB0, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD,
++		0xB0, FIL_, 0xCC, 0x81, FIL_, 0xCE, 0xAC, FIL_,
++		0xCC, 0x94, FIL_, 0xE1, 0xBC, 0x81, FIL_, 0xCC,
++		0x93, FIL_, 0xE1, 0xBC, 0x80, FIL_, 0xCD, 0x82,
++		FIL_, 0xE1, 0xBE, 0xB6, FIL_, 0x04, 0xCC, 0x93,
++		FIL_, 0xE1, 0xBC, 0x90, FIL_, 0xCC, 0x80, FIL_,
++		0xE1, 0xBD, 0xB2, FIL_, 0xCC, 0x94, FIL_, 0xE1,
++		0xBC, 0x91, FIL_, 0xCC, 0x81, FIL_, 0xCE, 0xAD,
++		FIL_, 0x06, 0xCC, 0x94, FIL_, 0xE1, 0xBC, 0xA1,
++		FIL_, 0xCC, 0x81, FIL_, 0xCE, 0xAE, FIL_, 0xCD,
++		0x85, FIL_, 0xE1, 0xBF, 0x83, FIL_, 0xCD, 0x82,
++		FIL_, 0xE1, 0xBF, 0x86, FIL_, 0xCC, 0x93, FIL_,
++		0xE1, 0xBC, 0xA0, FIL_, 0xCC, 0x80, FIL_, 0xE1,
++		0xBD, 0xB4, FIL_, 0x08, 0xCC, 0x88, FIL_, 0xCF,
++		0x8A, FIL_, 0xCC, 0x81, FIL_, 0xCE, 0xAF, FIL_,
++		0xCC, 0x93, FIL_, 0xE1, 0xBC, 0xB0, FIL_, 0xCC,
++		0x94, FIL_, 0xE1, 0xBC, 0xB1, FIL_, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBD, 0xB6, FIL_, 0xCC, 0x86, FIL_,
++		0xE1, 0xBF, 0x90, FIL_, 0xCC, 0x84, FIL_, 0xE1,
++		0xBF, 0x91, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBF,
++		0x96, FIL_, 0x04, 0xCC, 0x93, FIL_, 0xE1, 0xBD,
++		0x80, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0xB8,
++		FIL_, 0xCC, 0x94, FIL_, 0xE1, 0xBD, 0x81, FIL_,
++		0xCC, 0x81, FIL_, 0xCF, 0x8C, FIL_, 0x02, 0xCC,
++		0x93, FIL_, 0xE1, 0xBF, 0xA4, FIL_, 0xCC, 0x94,
++		FIL_, 0xE1, 0xBF, 0xA5, FIL_, 0x08, 0xCC, 0x81,
++		FIL_, 0xCF, 0x8D, FIL_, 0xCC, 0x94, FIL_, 0xE1,
++		0xBD, 0x91, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBF,
++		0xA6, FIL_, 0xCC, 0x88, FIL_, 0xCF, 0x8B, FIL_,
++		0xCC, 0x84, FIL_, 0xE1, 0xBF, 0xA1, FIL_, 0xCC,
++		0x80, FIL_, 0xE1, 0xBD, 0xBA, FIL_, 0xCC, 0x93,
++		FIL_, 0xE1, 0xBD, 0x90, FIL_, 0xCC, 0x86, FIL_,
++		0xE1, 0xBF, 0xA0, FIL_, 0x06, 0xCC, 0x80, FIL_,
++		0xE1, 0xBD, 0xBC, FIL_, 0xCC, 0x94, FIL_, 0xE1,
++		0xBD, 0xA1, FIL_, 0xCC, 0x93, FIL_, 0xE1, 0xBD,
++		0xA0, FIL_, 0xCC, 0x81, FIL_, 0xCF, 0x8E, FIL_,
++		0xCD, 0x85, FIL_, 0xE1, 0xBF, 0xB3, FIL_, 0xCD,
++		0x82, FIL_, 0xE1, 0xBF, 0xB6, FIL_, 0x03, 0xCC,
++		0x80, FIL_, 0xE1, 0xBF, 0x92, FIL_, 0xCD, 0x82,
++		FIL_, 0xE1, 0xBF, 0x97, FIL_, 0xCC, 0x81, FIL_,
++		0xCE, 0x90, FIL_, 0x03, 0xCD, 0x82, FIL_, 0xE1,
++		0xBF, 0xA7, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBF,
++		0xA2, FIL_, 0xCC, 0x81, FIL_, 0xCE, 0xB0, FIL_,
++		0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0xB4, FIL_,
++		0x02, 0xCC, 0x88, FIL_, 0xCF, 0x94, FIL_, 0xCC,
++		0x81, FIL_, 0xCF, 0x93, FIL_, 0x01, 0xCC, 0x88,
++		FIL_, 0xD0, 0x87, FIL_, 0x02, 0xCC, 0x88, FIL_,
++		0xD3, 0x92, FIL_, 0xCC, 0x86, FIL_, 0xD3, 0x90,
++		FIL_, 0x01, 0xCC, 0x81, FIL_, 0xD0, 0x83, FIL_,
++		0x03, 0xCC, 0x88, FIL_, 0xD0, 0x81, FIL_, 0xCC,
++		0x80, FIL_, 0xD0, 0x80, FIL_, 0xCC, 0x86, FIL_,
++		0xD3, 0x96, FIL_, 0x02, 0xCC, 0x86, FIL_, 0xD3,
++		0x81, FIL_, 0xCC, 0x88, FIL_, 0xD3, 0x9C, FIL_,
++		0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9E, FIL_, 0x04,
++		0xCC, 0x84, FIL_, 0xD3, 0xA2, FIL_, 0xCC, 0x88,
++		FIL_, 0xD3, 0xA4, FIL_, 0xCC, 0x86, FIL_, 0xD0,
++		0x99, FIL_, 0xCC, 0x80, FIL_, 0xD0, 0x8D, FIL_,
++		0x01, 0xCC, 0x81, FIL_, 0xD0, 0x8C, FIL_, 0x01,
++		0xCC, 0x88, FIL_, 0xD3, 0xA6, FIL_, 0x04, 0xCC,
++		0x8B, FIL_, 0xD3, 0xB2, FIL_, 0xCC, 0x88, FIL_,
++		0xD3, 0xB0, FIL_, 0xCC, 0x86, FIL_, 0xD0, 0x8E,
++		FIL_, 0xCC, 0x84, FIL_, 0xD3, 0xAE, FIL_, 0x01,
++		0xCC, 0x88, FIL_, 0xD3, 0xB4, FIL_, 0x01, 0xCC,
++		0x88, FIL_, 0xD3, 0xB8, FIL_, 0x01, 0xCC, 0x88,
++		FIL_, 0xD3, 0xAC, FIL_, 0x02, 0xCC, 0x86, FIL_,
++		0xD3, 0x91, FIL_, 0xCC, 0x88, FIL_, 0xD3, 0x93,
++		FIL_, 0x01, 0xCC, 0x81, FIL_, 0xD1, 0x93, FIL_,
++		0x03, 0xCC, 0x80, FIL_, 0xD1, 0x90, FIL_, 0xCC,
++		0x86, FIL_, 0xD3, 0x97, FIL_, 0xCC, 0x88, FIL_,
++		0xD1, 0x91, FIL_, 0x02, 0xCC, 0x86, FIL_, 0xD3,
++		0x82, FIL_, 0xCC, 0x88, FIL_, 0xD3, 0x9D, FIL_,
++		0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9F, FIL_, 0x04,
++		0xCC, 0x86, FIL_, 0xD0, 0xB9, FIL_, 0xCC, 0x88,
++		FIL_, 0xD3, 0xA5, FIL_, 0xCC, 0x84, FIL_, 0xD3,
++		0xA3, FIL_, 0xCC, 0x80, FIL_, 0xD1, 0x9D, FIL_,
++		0x01, 0xCC, 0x81, FIL_, 0xD1, 0x9C, FIL_, 0x01,
++		0xCC, 0x88, FIL_, 0xD3, 0xA7, FIL_, 0x04, 0xCC,
++		0x8B, FIL_, 0xD3, 0xB3, FIL_, 0xCC, 0x84, FIL_,
++		0xD3, 0xAF, FIL_, 0xCC, 0x86, FIL_, 0xD1, 0x9E,
++		FIL_, 0xCC, 0x88, FIL_, 0xD3, 0xB1, FIL_, 0x01,
++		0xCC, 0x88, FIL_, 0xD3, 0xB5, FIL_, 0x01, 0xCC,
++		0x88, FIL_, 0xD3, 0xB9, FIL_, 0x01, 0xCC, 0x88,
++		FIL_, 0xD3, 0xAD, FIL_, 0x01, 0xCC, 0x88, FIL_,
++		0xD1, 0x97, FIL_, 0x01, 0xCC, 0x8F, FIL_, 0xD1,
++		0xB6, FIL_, 0x01, 0xCC, 0x8F, FIL_, 0xD1, 0xB7,
++		FIL_, 0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9A, FIL_,
++		0x01, 0xCC, 0x88, FIL_, 0xD3, 0x9B, FIL_, 0x01,
++		0xCC, 0x88, FIL_, 0xD3, 0xAA, FIL_, 0x01, 0xCC,
++		0x88, FIL_, 0xD3, 0xAB, FIL_, 0x03, 0xD9, 0x94,
++		FIL_, 0xD8, 0xA3, FIL_, 0xD9, 0x95, FIL_, 0xD8,
++		0xA5, FIL_, 0xD9, 0x93, FIL_, 0xD8, 0xA2, FIL_,
++		0x01, 0xD9, 0x94, FIL_, 0xD8, 0xA4, FIL_, 0x01,
++		0xD9, 0x94, FIL_, 0xD8, 0xA6, FIL_, 0x01, 0xD9,
++		0x94, FIL_, 0xDB, 0x82, FIL_, 0x01, 0xD9, 0x94,
++		FIL_, 0xDB, 0x93, FIL_, 0x01, 0xD9, 0x94, FIL_,
++		0xDB, 0x80, FIL_, 0x01, 0xE0, 0xA4, 0xBC, FIL_,
++		0xE0, 0xA4, 0xA9, FIL_, 0x01, 0xE0, 0xA4, 0xBC,
++		FIL_, 0xE0, 0xA4, 0xB1, FIL_, 0x01, 0xE0, 0xA4,
++		0xBC, FIL_, 0xE0, 0xA4, 0xB4, FIL_, 0x02, 0xE0,
++		0xA6, 0xBE, FIL_, 0xE0, 0xA7, 0x8B, FIL_, 0xE0,
++		0xA7, 0x97, FIL_, 0xE0, 0xA7, 0x8C, FIL_, 0x03,
++		0xE0, 0xAD, 0x96, FIL_, 0xE0, 0xAD, 0x88, FIL_,
++		0xE0, 0xAC, 0xBE, FIL_, 0xE0, 0xAD, 0x8B, FIL_,
++		0xE0, 0xAD, 0x97, FIL_, 0xE0, 0xAD, 0x8C, FIL_,
++		0x01, 0xE0, 0xAF, 0x97, FIL_, 0xE0, 0xAE, 0x94,
++		FIL_, 0x02, 0xE0, 0xAF, 0x97, FIL_, 0xE0, 0xAF,
++		0x8C, FIL_, 0xE0, 0xAE, 0xBE, FIL_, 0xE0, 0xAF,
++		0x8A, FIL_, 0x01, 0xE0, 0xAE, 0xBE, FIL_, 0xE0,
++		0xAF, 0x8B, FIL_, 0x01, 0xE0, 0xB1, 0x96, FIL_,
++		0xE0, 0xB1, 0x88, FIL_, 0x01, 0xE0, 0xB3, 0x95,
++		FIL_, 0xE0, 0xB3, 0x80, FIL_, 0x03, 0xE0, 0xB3,
++		0x82, FIL_, 0xE0, 0xB3, 0x8A, FIL_, 0xE0, 0xB3,
++		0x96, FIL_, 0xE0, 0xB3, 0x88, FIL_, 0xE0, 0xB3,
++		0x95, FIL_, 0xE0, 0xB3, 0x87, FIL_, 0x01, 0xE0,
++		0xB3, 0x95, FIL_, 0xE0, 0xB3, 0x8B, FIL_, 0x02,
++		0xE0, 0xB4, 0xBE, FIL_, 0xE0, 0xB5, 0x8A, FIL_,
++		0xE0, 0xB5, 0x97, FIL_, 0xE0, 0xB5, 0x8C, FIL_,
++		0x01, 0xE0, 0xB4, 0xBE, FIL_, 0xE0, 0xB5, 0x8B,
++		FIL_, 0x03, 0xE0, 0xB7, 0x9F, FIL_, 0xE0, 0xB7,
++		0x9E, FIL_, 0xE0, 0xB7, 0x8A, FIL_, 0xE0, 0xB7,
++		0x9A, FIL_, 0xE0, 0xB7, 0x8F, FIL_, 0xE0, 0xB7,
++		0x9C, FIL_, 0x01, 0xE0, 0xB7, 0x8A, FIL_, 0xE0,
++		0xB7, 0x9D, FIL_, 0x01, 0xE1, 0x80, 0xAE, FIL_,
++		0xE1, 0x80, 0xA6, FIL_, 0x01, 0xE1, 0xAC, 0xB5,
++		FIL_, 0xE1, 0xAC, 0x86, FIL_, 0x01, 0xE1, 0xAC,
++		0xB5, FIL_, 0xE1, 0xAC, 0x88, FIL_, 0x01, 0xE1,
++		0xAC, 0xB5, FIL_, 0xE1, 0xAC, 0x8A, FIL_, 0x01,
++		0xE1, 0xAC, 0xB5, FIL_, 0xE1, 0xAC, 0x8C, FIL_,
++		0x01, 0xE1, 0xAC, 0xB5, FIL_, 0xE1, 0xAC, 0x8E,
++		FIL_, 0x01, 0xE1, 0xAC, 0xB5, FIL_, 0xE1, 0xAC,
++		0x92, FIL_, 0x01, 0xE1, 0xAC, 0xB5, FIL_, 0xE1,
++		0xAC, 0xBB, FIL_, 0x01, 0xE1, 0xAC, 0xB5, FIL_,
++		0xE1, 0xAC, 0xBD, FIL_, 0x01, 0xE1, 0xAC, 0xB5,
++		FIL_, 0xE1, 0xAD, 0x80, FIL_, 0x01, 0xE1, 0xAC,
++		0xB5, FIL_, 0xE1, 0xAD, 0x81, FIL_, 0x01, 0xE1,
++		0xAC, 0xB5, FIL_, 0xE1, 0xAD, 0x83, FIL_, 0x01,
++		0xCC, 0x84, FIL_, 0xE1, 0xB8, 0xB8, FIL_, 0x01,
++		0xCC, 0x84, FIL_, 0xE1, 0xB8, 0xB9, FIL_, 0x01,
++		0xCC, 0x84, FIL_, 0xE1, 0xB9, 0x9C, FIL_, 0x01,
++		0xCC, 0x84, FIL_, 0xE1, 0xB9, 0x9D, FIL_, 0x01,
++		0xCC, 0x87, FIL_, 0xE1, 0xB9, 0xA8, FIL_, 0x01,
++		0xCC, 0x87, FIL_, 0xE1, 0xB9, 0xA9, FIL_, 0x02,
++		0xCC, 0x86, FIL_, 0xE1, 0xBA, 0xB6, FIL_, 0xCC,
++		0x82, FIL_, 0xE1, 0xBA, 0xAC, FIL_, 0x02, 0xCC,
++		0x82, FIL_, 0xE1, 0xBA, 0xAD, FIL_, 0xCC, 0x86,
++		FIL_, 0xE1, 0xBA, 0xB7, FIL_, 0x01, 0xCC, 0x82,
++		FIL_, 0xE1, 0xBB, 0x86, FIL_, 0x01, 0xCC, 0x82,
++		FIL_, 0xE1, 0xBB, 0x87, FIL_, 0x01, 0xCC, 0x82,
++		FIL_, 0xE1, 0xBB, 0x98, FIL_, 0x01, 0xCC, 0x82,
++		FIL_, 0xE1, 0xBB, 0x99, FIL_, 0x04, 0xCD, 0x85,
++		FIL_, 0xE1, 0xBE, 0x80, FIL_, 0xCD, 0x82, FIL_,
++		0xE1, 0xBC, 0x86, FIL_, 0xCC, 0x80, FIL_, 0xE1,
++		0xBC, 0x82, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC,
++		0x84, FIL_, 0x04, 0xCD, 0x82, FIL_, 0xE1, 0xBC,
++		0x87, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0x85,
++		FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0x83, FIL_,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x81, FIL_, 0x01,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x82, FIL_, 0x01,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x83, FIL_, 0x01,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x84, FIL_, 0x01,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x85, FIL_, 0x01,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x86, FIL_, 0x01,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x87, FIL_, 0x04,
++		0xCC, 0x81, FIL_, 0xE1, 0xBC, 0x8C, FIL_, 0xCC,
++		0x80, FIL_, 0xE1, 0xBC, 0x8A, FIL_, 0xCD, 0x85,
++		FIL_, 0xE1, 0xBE, 0x88, FIL_, 0xCD, 0x82, FIL_,
++		0xE1, 0xBC, 0x8E, FIL_, 0x04, 0xCC, 0x80, FIL_,
++		0xE1, 0xBC, 0x8B, FIL_, 0xCD, 0x82, FIL_, 0xE1,
++		0xBC, 0x8F, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC,
++		0x8D, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x89,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x8A,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x8B,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x8C,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x8D,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x8E,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x8F,
++		FIL_, 0x02, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0x92,
++		FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0x94, FIL_,
++		0x02, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0x93, FIL_,
++		0xCC, 0x81, FIL_, 0xE1, 0xBC, 0x95, FIL_, 0x02,
++		0xCC, 0x80, FIL_, 0xE1, 0xBC, 0x9A, FIL_, 0xCC,
++		0x81, FIL_, 0xE1, 0xBC, 0x9C, FIL_, 0x02, 0xCC,
++		0x80, FIL_, 0xE1, 0xBC, 0x9B, FIL_, 0xCC, 0x81,
++		FIL_, 0xE1, 0xBC, 0x9D, FIL_, 0x04, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBC, 0xA2, FIL_, 0xCC, 0x81, FIL_,
++		0xE1, 0xBC, 0xA4, FIL_, 0xCD, 0x82, FIL_, 0xE1,
++		0xBC, 0xA6, FIL_, 0xCD, 0x85, FIL_, 0xE1, 0xBE,
++		0x90, FIL_, 0x04, 0xCD, 0x85, FIL_, 0xE1, 0xBE,
++		0x91, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0xA5,
++		FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBC, 0xA7, FIL_,
++		0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xA3, FIL_, 0x01,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x92, FIL_, 0x01,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x93, FIL_, 0x01,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x94, FIL_, 0x01,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x95, FIL_, 0x01,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x96, FIL_, 0x01,
++		0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x97, FIL_, 0x04,
++		0xCC, 0x81, FIL_, 0xE1, 0xBC, 0xAC, FIL_, 0xCC,
++		0x80, FIL_, 0xE1, 0xBC, 0xAA, FIL_, 0xCD, 0x85,
++		FIL_, 0xE1, 0xBE, 0x98, FIL_, 0xCD, 0x82, FIL_,
++		0xE1, 0xBC, 0xAE, FIL_, 0x04, 0xCD, 0x82, FIL_,
++		0xE1, 0xBC, 0xAF, FIL_, 0xCD, 0x85, FIL_, 0xE1,
++		0xBE, 0x99, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC,
++		0xAD, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xAB,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x9A,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x9B,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x9C,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x9D,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x9E,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0x9F,
++		FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0xB4,
++		FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xB2, FIL_,
++		0xCD, 0x82, FIL_, 0xE1, 0xBC, 0xB6, FIL_, 0x03,
++		0xCC, 0x80, FIL_, 0xE1, 0xBC, 0xB3, FIL_, 0xCD,
++		0x82, FIL_, 0xE1, 0xBC, 0xB7, FIL_, 0xCC, 0x81,
++		FIL_, 0xE1, 0xBC, 0xB5, FIL_, 0x03, 0xCC, 0x81,
++		FIL_, 0xE1, 0xBC, 0xBC, FIL_, 0xCC, 0x80, FIL_,
++		0xE1, 0xBC, 0xBA, FIL_, 0xCD, 0x82, FIL_, 0xE1,
++		0xBC, 0xBE, FIL_, 0x03, 0xCC, 0x80, FIL_, 0xE1,
++		0xBC, 0xBB, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBC,
++		0xBF, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBC, 0xBD,
++		FIL_, 0x02, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0x82,
++		FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBD, 0x84, FIL_,
++		0x02, 0xCC, 0x81, FIL_, 0xE1, 0xBD, 0x85, FIL_,
++		0xCC, 0x80, FIL_, 0xE1, 0xBD, 0x83, FIL_, 0x02,
++		0xCC, 0x80, FIL_, 0xE1, 0xBD, 0x8A, FIL_, 0xCC,
++		0x81, FIL_, 0xE1, 0xBD, 0x8C, FIL_, 0x02, 0xCC,
++		0x80, FIL_, 0xE1, 0xBD, 0x8B, FIL_, 0xCC, 0x81,
++		FIL_, 0xE1, 0xBD, 0x8D, FIL_, 0x03, 0xCD, 0x82,
++		FIL_, 0xE1, 0xBD, 0x96, FIL_, 0xCC, 0x80, FIL_,
++		0xE1, 0xBD, 0x92, FIL_, 0xCC, 0x81, FIL_, 0xE1,
++		0xBD, 0x94, FIL_, 0x03, 0xCC, 0x80, FIL_, 0xE1,
++		0xBD, 0x93, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBD,
++		0x97, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBD, 0x95,
++		FIL_, 0x03, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0x9B,
++		FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBD, 0x9F, FIL_,
++		0xCC, 0x81, FIL_, 0xE1, 0xBD, 0x9D, FIL_, 0x04,
++		0xCD, 0x82, FIL_, 0xE1, 0xBD, 0xA6, FIL_, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0xA0, FIL_, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBD, 0xA2, FIL_, 0xCC, 0x81, FIL_,
++		0xE1, 0xBD, 0xA4, FIL_, 0x04, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0xA1, FIL_, 0xCD, 0x82, FIL_, 0xE1,
++		0xBD, 0xA7, FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBD,
++		0xA5, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0xA3,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA2,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA3,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA4,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA5,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA6,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBE, 0xA7,
++		FIL_, 0x04, 0xCC, 0x80, FIL_, 0xE1, 0xBD, 0xAA,
++		FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBD, 0xAC, FIL_,
++		0xCD, 0x82, FIL_, 0xE1, 0xBD, 0xAE, FIL_, 0xCD,
++		0x85, FIL_, 0xE1, 0xBE, 0xA8, FIL_, 0x04, 0xCD,
++		0x82, FIL_, 0xE1, 0xBD, 0xAF, FIL_, 0xCC, 0x80,
++		FIL_, 0xE1, 0xBD, 0xAB, FIL_, 0xCD, 0x85, FIL_,
++		0xE1, 0xBE, 0xA9, FIL_, 0xCC, 0x81, FIL_, 0xE1,
++		0xBD, 0xAD, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1,
++		0xBE, 0xAA, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1,
++		0xBE, 0xAB, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1,
++		0xBE, 0xAC, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1,
++		0xBE, 0xAD, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1,
++		0xBE, 0xAE, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1,
++		0xBE, 0xAF, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1,
++		0xBE, 0xB2, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1,
++		0xBF, 0x82, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1,
++		0xBF, 0xB2, FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1,
++		0xBE, 0xB7, FIL_, 0x03, 0xCC, 0x81, FIL_, 0xE1,
++		0xBF, 0x8E, FIL_, 0xCC, 0x80, FIL_, 0xE1, 0xBF,
++		0x8D, FIL_, 0xCD, 0x82, FIL_, 0xE1, 0xBF, 0x8F,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0x87,
++		FIL_, 0x01, 0xCD, 0x85, FIL_, 0xE1, 0xBF, 0xB7,
++		FIL_, 0x03, 0xCC, 0x80, FIL_, 0xE1, 0xBF, 0x9D,
++		FIL_, 0xCC, 0x81, FIL_, 0xE1, 0xBF, 0x9E, FIL_,
++		0xCD, 0x82, FIL_, 0xE1, 0xBF, 0x9F, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x86, 0x9A, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x86, 0x9B, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x86, 0xAE, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x87, 0x8D, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x87, 0x8F, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x87, 0x8E, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x88, 0x84, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x88, 0x89, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x88, 0x8C, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x88, 0xA4, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x88, 0xA6, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x89, 0x81, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x89, 0x84, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x89, 0x87, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x89, 0x89, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xAD, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xA2, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xB0, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xB1, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xB4, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xB5, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xB8, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x89, 0xB9, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0x80, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0x81, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xA0, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xA1, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0x84, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0x85, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0x88, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0x89, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xA2, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xA3, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0xAC, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0xAD, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0xAE, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8A, 0xAF, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xAA, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xAB, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xAC, FIL_, 0x01,
++		0xCC, 0xB8, FIL_, 0xE2, 0x8B, 0xAD, FIL_, 0x01,
++		0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, 0x94, FIL_,
++		0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0x8C,
++		FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81,
++		0x8E, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3,
++		0x81, 0x90, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_,
++		0xE3, 0x81, 0x92, FIL_, 0x01, 0xE3, 0x82, 0x99,
++		FIL_, 0xE3, 0x81, 0x94, FIL_, 0x01, 0xE3, 0x82,
++		0x99, FIL_, 0xE3, 0x81, 0x96, FIL_, 0x01, 0xE3,
++		0x82, 0x99, FIL_, 0xE3, 0x81, 0x98, FIL_, 0x01,
++		0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0x9A, FIL_,
++		0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0x9C,
++		FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81,
++		0x9E, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3,
++		0x81, 0xA0, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_,
++		0xE3, 0x81, 0xA2, FIL_, 0x01, 0xE3, 0x82, 0x99,
++		FIL_, 0xE3, 0x81, 0xA5, FIL_, 0x01, 0xE3, 0x82,
++		0x99, FIL_, 0xE3, 0x81, 0xA7, FIL_, 0x01, 0xE3,
++		0x82, 0x99, FIL_, 0xE3, 0x81, 0xA9, FIL_, 0x02,
++		0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x81, 0xB1, FIL_,
++		0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0xB0, FIL_,
++		0x02, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81, 0xB3,
++		FIL_, 0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x81, 0xB4,
++		FIL_, 0x02, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x81,
++		0xB6, FIL_, 0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x81,
++		0xB7, FIL_, 0x02, 0xE3, 0x82, 0x9A, FIL_, 0xE3,
++		0x81, 0xBA, FIL_, 0xE3, 0x82, 0x99, FIL_, 0xE3,
++		0x81, 0xB9, FIL_, 0x02, 0xE3, 0x82, 0x9A, FIL_,
++		0xE3, 0x81, 0xBD, FIL_, 0xE3, 0x82, 0x99, FIL_,
++		0xE3, 0x81, 0xBC, FIL_, 0x01, 0xE3, 0x82, 0x99,
++		FIL_, 0xE3, 0x82, 0x9E, FIL_, 0x01, 0xE3, 0x82,
++		0x99, FIL_, 0xE3, 0x83, 0xB4, FIL_, 0x01, 0xE3,
++		0x82, 0x99, FIL_, 0xE3, 0x82, 0xAC, FIL_, 0x01,
++		0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, 0xAE, FIL_,
++		0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, 0xB0,
++		FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82,
++		0xB2, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3,
++		0x82, 0xB4, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_,
++		0xE3, 0x82, 0xB6, FIL_, 0x01, 0xE3, 0x82, 0x99,
++		FIL_, 0xE3, 0x82, 0xB8, FIL_, 0x01, 0xE3, 0x82,
++		0x99, FIL_, 0xE3, 0x82, 0xBA, FIL_, 0x01, 0xE3,
++		0x82, 0x99, FIL_, 0xE3, 0x82, 0xBC, FIL_, 0x01,
++		0xE3, 0x82, 0x99, FIL_, 0xE3, 0x82, 0xBE, FIL_,
++		0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, 0x80,
++		FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83,
++		0x82, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3,
++		0x83, 0x85, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_,
++		0xE3, 0x83, 0x87, FIL_, 0x01, 0xE3, 0x82, 0x99,
++		FIL_, 0xE3, 0x83, 0x89, FIL_, 0x02, 0xE3, 0x82,
++		0x99, FIL_, 0xE3, 0x83, 0x90, FIL_, 0xE3, 0x82,
++		0x9A, FIL_, 0xE3, 0x83, 0x91, FIL_, 0x02, 0xE3,
++		0x82, 0x99, FIL_, 0xE3, 0x83, 0x93, FIL_, 0xE3,
++		0x82, 0x9A, FIL_, 0xE3, 0x83, 0x94, FIL_, 0x02,
++		0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x83, 0x97, FIL_,
++		0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, 0x96, FIL_,
++		0x02, 0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x83, 0x9A,
++		FIL_, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83, 0x99,
++		FIL_, 0x02, 0xE3, 0x82, 0x99, FIL_, 0xE3, 0x83,
++		0x9C, FIL_, 0xE3, 0x82, 0x9A, FIL_, 0xE3, 0x83,
++		0x9D, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_, 0xE3,
++		0x83, 0xB7, FIL_, 0x01, 0xE3, 0x82, 0x99, FIL_,
++		0xE3, 0x83, 0xB8, FIL_, 0x01, 0xE3, 0x82, 0x99,
++		FIL_, 0xE3, 0x83, 0xB9, FIL_, 0x01, 0xE3, 0x82,
++		0x99, FIL_, 0xE3, 0x83, 0xBA, FIL_, 0x01, 0xE3,
++		0x82, 0x99, FIL_, 0xE3, 0x83, 0xBE, FIL_,
++	},
++};
++
++static const uchar_t u8_decomp_b2_tbl[2][2][256] = {
++	{
++		{
++			0,  N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			1,  2,  3,  4,  N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, 5,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, 6,  N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, 7,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++
++	},
++	{
++		{
++			0,  N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			1,  2,  3,  4,  N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, 5,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, 6,  N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, 7,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++
++	},
++
++};
++
++static const u8_displacement_t u8_decomp_b3_tbl[2][8][256] = {
++	{
++		{	/* Third byte table 0. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 0, 0 },
++			{ 1, 35 }, { 2, 247 }, { 3, 474 },
++			{ 4, 693 }, { 5, 709 }, { 6, 951 },
++			{ N_, 0 }, { 7, 1139 }, { 8, 1152 },
++			{ N_, 0 }, { 9, 1177 }, { 10, 1199 },
++			{ 11, 1295 }, { 12, 1360 }, { 13, 1405 },
++			{ N_, 0 }, { 14, 1450 }, { N_, 0 },
++			{ N_, 0 }, { 15, 1620 }, { N_, 0 },
++			{ 16, 1624 }, { 17, 1649 }, { N_, 0 },
++			{ 18, 1665 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 1. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 19, 1680 },
++			{ 20, 1701 }, { N_, 0 }, { 21, 1757 },
++			{ 22, 1792 }, { 23, 1806 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 24, 1834 },
++			{ 25, 1869 }, { 26, 1876 }, { N_, 0 },
++			{ 27, 1897 }, { N_, 0 }, { 28, 1904 },
++			{ N_, 0 }, { 29, 1942 }, { N_, 0 },
++			{ 30, 1963 }, { 31, 1994 }, { N_, 0 },
++			{ 32, 2000 }, { 33, 2006 }, { 34, 2018 },
++			{ 35, 2021 }, { 36, 2109 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 2. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 37, 2158 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 0x8000, 2165 }, { 0x8001, 2445 },
++			{ 0x8002, 2741 }, { 0x8003, 3029 }, { 0x8004, 3337 },
++			{ 0x8005, 3725 }, { 0x8006, 4053 }, { 0x8007, 4536 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 3. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 38, 4895 },
++			{ 39, 4964 }, { 40, 4999 }, { N_, 0 },
++			{ 41, 5018 }, { 42, 5098 }, { 43, 5230 },
++			{ 44, 5248 }, { 45, 5266 }, { 46, 5326 },
++			{ 47, 5410 }, { 48, 5470 }, { 49, 5518 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 50, 5526 }, { 51, 5596 },
++			{ 52, 5767 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 53, 5810 }, { 54, 5822 }, { N_, 0 },
++			{ 55, 5830 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 56, 5836 }, { 57, 5839 }, { 58, 5842 },
++			{ 59, 6034 }, { 60, 6226 }, { 61, 6418 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 4. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 62, 6484 },
++			{ 63, 6497 }, { 64, 6672 }, { 65, 6770 },
++			{ 66, 6923 }, { 67, 6968 }, { 68, 7160 },
++			{ N_, 0 }, { 0x8008, 7247 }, { 69, 7597 },
++			{ 70, 7773 }, { 71, 7950 }, { 0x8009, 8142 },
++			{ 0x800A, 8919 }, { 72, 9351 }, { 73, 9522 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 5. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 0x800B, 9743 },
++			{ 0x800C, 9999 }, { 0x800D, 10255 }, { 0x800E, 10511 },
++			{ 74, 10767 }, { 75, 10967 }, { N_, 0 },
++			{ N_, 0 }, { 76, 11139 }, { 77, 11303 },
++			{ 78, 11468 }, { 79, 11576 }, { 0x800F, 11740 },
++			{ 0x8010, 12006 }, { 0x8011, 12280 }, { 0x8012, 12546 },
++			{ 80, 12812 }, { 0x8013, 13060 }, { 0x8014, 13348 },
++			{ 81, 13720 }, { 82, 13898 }, { 83, 13933 },
++			{ 84, 14045 }, { 85, 14197 }, { 86, 14347 },
++			{ 87, 14410 }, { 88, 14540 }, { 89, 14729 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 6. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 90, 14829 }, { 91, 14912 },
++			{ 92, 14969 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 93, 14982 }, { 94, 15046 }, { 95, 15109 },
++			{ 96, 15163 }, { 97, 15225 }, { 98, 15282 },
++			{ 99, 15341 }, { 100, 15405 }, { 101, 15469 },
++			{ 102, 15533 }, { 103, 15597 }, { 104, 15681 },
++			{ 105, 15812 }, { 106, 15942 }, { 107, 16072 },
++			{ 108, 16202 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 7. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 0x8015, 16273 }, { 0x8016, 16536 },
++			{ 0x8017, 16799 }, { 0x8018, 17064 }, { 0x8019, 17329 },
++			{ 0x801A, 17601 }, { 0x801B, 17878 }, { 0x801C, 18147 },
++			{ 109, 18419 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++	},
++	{
++		{	/* Third byte table 0. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 0, 0 },
++			{ 1, 35 }, { 2, 247 }, { 3, 474 },
++			{ 4, 693 }, { 5, 709 }, { 6, 951 },
++			{ N_, 0 }, { 7, 1139 }, { 8, 1152 },
++			{ N_, 0 }, { 9, 1177 }, { 10, 1199 },
++			{ 11, 1295 }, { 12, 1362 }, { 13, 1407 },
++			{ N_, 0 }, { 14, 1452 }, { N_, 0 },
++			{ N_, 0 }, { 15, 1622 }, { N_, 0 },
++			{ 16, 1626 }, { 17, 1651 }, { N_, 0 },
++			{ 18, 1667 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 1. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 19, 1682 },
++			{ 20, 1703 }, { N_, 0 }, { 21, 1759 },
++			{ 22, 1794 }, { 23, 1808 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 24, 1836 },
++			{ 25, 1871 }, { 26, 1878 }, { N_, 0 },
++			{ 27, 1899 }, { N_, 0 }, { 28, 1906 },
++			{ N_, 0 }, { 29, 1944 }, { N_, 0 },
++			{ 30, 1965 }, { 31, 1996 }, { N_, 0 },
++			{ 32, 2002 }, { 33, 2008 }, { 34, 2020 },
++			{ 35, 2023 }, { 36, 2111 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 2. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 37, 2160 },
++			{ N_, 0 }, { N_, 0 }, { 38, 2167 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 39, 2170 }, { 40, 2226 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 41, 2247 }, { 42, 2268 }, { 43, 2340 },
++			{ N_, 0 }, { 0x8000, 2414 }, { 0x8001, 2694 },
++			{ 0x8002, 2990 }, { 0x8003, 3278 }, { 0x8004, 3586 },
++			{ 0x8005, 3974 }, { 0x8006, 4302 }, { 0x8007, 4785 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 3. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 44, 5144 },
++			{ 45, 5213 }, { 46, 5248 }, { N_, 0 },
++			{ 47, 5273 }, { 48, 5358 }, { 49, 5490 },
++			{ 50, 5508 }, { 51, 5526 }, { 52, 5586 },
++			{ 53, 5670 }, { 54, 5730 }, { 55, 5778 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 56, 5786 }, { 57, 5856 },
++			{ 58, 6027 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 59, 6070 }, { 60, 6082 }, { N_, 0 },
++			{ 61, 6090 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 62, 6096 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 63, 6099 }, { 64, 6102 }, { 65, 6105 },
++			{ 66, 6297 }, { 67, 6489 }, { 68, 6681 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 4. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 69, 6747 },
++			{ 70, 6760 }, { 71, 6935 }, { 72, 7033 },
++			{ 73, 7186 }, { 74, 7231 }, { 75, 7423 },
++			{ N_, 0 }, { 0x8008, 7510 }, { 76, 7891 },
++			{ 77, 8103 }, { 78, 8280 }, { 0x8009, 8482 },
++			{ 0x800A, 9259 }, { 79, 9701 }, { 80, 9872 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 5. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 0x800B, 10106 },
++			{ 0x800C, 10362 }, { 0x800D, 10618 }, { 0x800E, 10874 },
++			{ 81, 11130 }, { 82, 11330 }, { 0x800F, 11566 },
++			{ 83, 11822 }, { 84, 11932 }, { 85, 12096 },
++			{ 86, 12261 }, { 87, 12369 }, { 0x8010, 12533 },
++			{ 0x8011, 12799 }, { 0x8012, 13073 }, { 0x8013, 13339 },
++			{ 88, 13605 }, { 0x8014, 13853 }, { 0x8015, 14141 },
++			{ 89, 14513 }, { 90, 14691 }, { 91, 14746 },
++			{ 92, 14860 }, { 93, 15012 }, { 94, 15162 },
++			{ 95, 15225 }, { 96, 15355 }, { 97, 15544 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 6. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 98, 15644 }, { 99, 15727 },
++			{ 100, 15784 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 101, 15797 }, { 102, 15861 }, { 103, 15924 },
++			{ 104, 15978 }, { 105, 16041 }, { 106, 16098 },
++			{ 107, 16157 }, { 108, 16221 }, { 109, 16285 },
++			{ 110, 16349 }, { 111, 16413 }, { 112, 16501 },
++			{ 113, 16632 }, { 114, 16762 }, { 115, 16892 },
++			{ 116, 17022 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++		{	/* Third byte table 7. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 0x8016, 17097 }, { 0x8017, 17360 },
++			{ 0x8018, 17623 }, { 0x8019, 17888 }, { 0x801A, 18153 },
++			{ 0x801B, 18425 }, { 0x801C, 18702 }, { 0x801D, 18971 },
++			{ 117, 19243 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 },
++		},
++	},
++};
++
++static const uchar_t u8_decomp_b4_tbl[2][118][257] = {
++	{
++		{	/* Fourth byte table 0. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   1,   1,   1,   1,   1,   1,
++			1,   4,   4,   5,   5,   5,   5,   5,
++			8,   8,   8,   9,   10,  13,  15,  15,
++			15,  18,  19,  20,  20,  25,  30,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,
++		},
++		{	/* Fourth byte table 1. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  24,
++			28,  32,  36,  40,  44,  48,  52,  56,
++			60,  60,  64,  68,  72,  76,  80,  84,
++			84,  84,  88,  92,  96,  100, 104, 104,
++			104, 108, 112, 116, 120, 124, 128, 128,
++			132, 136, 140, 144, 148, 152, 156, 160,
++			164, 164, 168, 172, 176, 180, 184, 188,
++			188, 188, 192, 196, 200, 204, 208, 208,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212,
++		},
++		{	/* Fourth byte table 2. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  60,
++			64,  64,  64,  68,  72,  76,  80,  84,
++			88,  92,  96,  100, 104, 108, 112, 116,
++			120, 124, 128, 132, 136, 140, 144, 144,
++			144, 148, 152, 156, 160, 164, 168, 172,
++			176, 180, 180, 182, 184, 188, 192, 196,
++			200, 200, 204, 208, 212, 216, 220, 224,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227,
++		},
++		{	/* Fourth byte table 3. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   3,   7,   11,  15,  19,
++			23,  27,  30,  30,  30,  34,  38,  42,
++			46,  50,  54,  54,  54,  58,  62,  66,
++			70,  74,  78,  82,  86,  90,  94,  98,
++			102, 106, 110, 114, 118, 122, 126, 126,
++			126, 130, 134, 138, 142, 146, 150, 154,
++			158, 162, 166, 170, 174, 178, 182, 186,
++			190, 194, 198, 202, 206, 210, 214, 218,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219,
++		},
++		{	/* Fourth byte table 4. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			12,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,
++		},
++		{	/* Fourth byte table 5. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   4,   8,   12,
++			14,  16,  18,  20,  22,  24,  28,  32,
++			36,  40,  44,  48,  52,  56,  62,  68,
++			74,  80,  86,  92,  98,  104, 104, 110,
++			116, 122, 128, 133, 138, 138, 138, 142,
++			146, 150, 154, 158, 162, 168, 174, 179,
++			184, 188, 190, 192, 194, 198, 202, 202,
++			202, 206, 210, 216, 222, 227, 232, 237,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242,
++		},
++		{	/* Fourth byte table 6. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  60,
++			64,  68,  72,  76,  80,  84,  88,  92,
++			96,  100, 104, 108, 112, 112, 112, 116,
++			120, 120, 120, 120, 120, 120, 120, 124,
++			128, 132, 136, 142, 148, 154, 160, 164,
++			168, 174, 180, 184, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188,
++		},
++		{	/* Fourth byte table 7. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   3,   4,   5,   7,   9,   11,
++			12,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,
++		},
++		{	/* Fourth byte table 8. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  18,
++			18,  20,  21,  22,  23,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,
++		},
++		{	/* Fourth byte table 9. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   6,   9,   14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  17,  17,  17,
++			17,  17,  17,  20,  20,  20,  20,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,
++		},
++		{	/* Fourth byte table 10. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   3,   14,  19,
++			22,  27,  32,  37,  37,  42,  42,  47,
++			52,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  64,  69,  74,  79,  84,
++			89,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 11. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   5,   10,  15,  20,  25,
++			25,  27,  29,  31,  41,  51,  53,  55,
++			55,  55,  55,  55,  55,  55,  55,  55,
++			55,  55,  55,  55,  55,  55,  55,  55,
++			55,  55,  55,  55,  55,  55,  55,  55,
++			55,  57,  59,  61,  61,  63,  65,  65,
++			65,  65,  65,  65,  65,  65,  65,  65,
++			65,  65,  65,  65,  65,  65,  65,  65,
++			65,  65,  65,  65,  65,  65,  65,  65,
++			65,  65,  65,  65,  65,  65,  65,  65,
++			65,  65,  65,  65,  65,  65,  65,  65,
++			65,  65,  65,  65,  65,  65,  65,  65,
++			65,  65,  65,  65,  65,  65,  65,  65,
++			65,  65,  65,  65,  65,  65,  65,  65,
++			65,  65,  65,  65,  65,  65,  65,  65,
++			65,
++		},
++		{	/* Fourth byte table 12. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   5,   10,  10,  15,  15,  15,  15,
++			20,  20,  20,  20,  20,  25,  30,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  40,  40,  40,  40,  40,  40,
++			40,  40,  40,  40,  40,  40,  40,  40,
++			40,  40,  40,  40,  40,  40,  40,  40,
++			40,  40,  40,  40,  40,  40,  40,  40,
++			40,  40,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,
++		},
++		{	/* Fourth byte table 13. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   5,   10,  10,  15,  15,  15,  15,
++			20,  20,  20,  20,  20,  25,  30,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  40,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,
++		},
++		{	/* Fourth byte table 14. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   5,   10,  10,  10,  10,  10,
++			10,  10,  10,  10,  10,  10,  10,  10,
++			10,  15,  20,  25,  30,  30,  30,  35,
++			40,  40,  40,  45,  50,  55,  60,  65,
++			70,  70,  70,  75,  80,  85,  90,  95,
++			100, 100, 100, 105, 110, 115, 120, 125,
++			130, 135, 140, 145, 150, 155, 160, 160,
++			160, 165, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170,
++		},
++		{	/* Fourth byte table 15. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,
++		},
++		{	/* Fourth byte table 16. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   5,   10,  15,  20,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,
++		},
++		{	/* Fourth byte table 17. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   4,   8,
++			12,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,
++		},
++		{	/* Fourth byte table 18. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   5,   5,   10,  10,  10,  10,  10,
++			10,  10,  10,  10,  10,  10,  10,  10,
++			10,  10,  10,  10,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,
++		},
++		{	/* Fourth byte table 19. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   7,   7,   7,   7,   7,   7,
++			7,   7,   14,  14,  14,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,
++		},
++		{	/* Fourth byte table 20. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   14,  21,  28,  35,  42,  49,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,
++		},
++		{	/* Fourth byte table 21. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  21,  28,  28,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,
++		},
++		{	/* Fourth byte table 22. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   7,   7,   14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,
++		},
++		{	/* Fourth byte table 23. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   7,   14,  21,  21,  21,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,
++		},
++		{	/* Fourth byte table 24. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   7,   7,   14,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  28,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,
++		},
++		{	/* Fourth byte table 25. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,
++		},
++		{	/* Fourth byte table 26. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   7,   14,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,
++		},
++		{	/* Fourth byte table 27. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,
++		},
++		{	/* Fourth byte table 28. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   7,   7,   7,   7,   7,   7,
++			14,  21,  21,  28,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,
++		},
++		{	/* Fourth byte table 29. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   7,   14,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,
++		},
++		{	/* Fourth byte table 30. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   7,   7,   14,  24,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,
++		},
++		{	/* Fourth byte table 31. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,
++		},
++		{	/* Fourth byte table 32. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,
++		},
++		{	/* Fourth byte table 33. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   6,   12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,
++		},
++		{	/* Fourth byte table 34. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,
++		},
++		{	/* Fourth byte table 35. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   14,  14,
++			14,  14,  14,  21,  21,  21,  21,  21,
++			28,  28,  28,  28,  28,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  42,  42,  42,  42,  42,  42,
++			42,  42,  42,  42,  49,  49,  56,  63,
++			72,  79,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,
++		},
++		{	/* Fourth byte table 36. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  21,  21,
++			21,  21,  21,  28,  28,  28,  28,  28,
++			35,  35,  35,  35,  35,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  42,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,
++		},
++		{	/* Fourth byte table 37. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,
++		},
++		{	/* Fourth byte table 38. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   6,   12,  13,  14,  15,  16,  17,
++			18,  19,  20,  21,  21,  21,  21,  21,
++			21,  21,  24,  24,  24,  24,  24,  24,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  28,  30,  33,
++			33,  33,  33,  33,  33,  33,  33,  33,
++			34,  34,  34,  34,  40,  49,  49,  55,
++			64,  64,  64,  64,  64,  66,  66,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,
++		},
++		{	/* Fourth byte table 39. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			2,   4,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  20,  21,  21,  21,  22,  23,  24,
++			25,  26,  27,  28,  31,  32,  33,  34,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,
++		},
++		{	/* Fourth byte table 40. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  14,  15,  16,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  17,  17,  17,  17,  17,  17,  17,
++			17,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,
++		},
++		{	/* Fourth byte table 41. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   7,   10,  10,  13,  16,
++			18,  18,  21,  22,  23,  24,  25,  26,
++			28,  29,  30,  31,  32,  32,  33,  35,
++			35,  35,  36,  37,  38,  39,  40,  40,
++			40,  42,  45,  47,  47,  48,  48,  51,
++			51,  52,  52,  54,  58,  59,  60,  60,
++			61,  62,  63,  63,  64,  65,  67,  69,
++			71,  73,  74,  74,  74,  74,  76,  78,
++			80,  80,  80,  80,  80,  80,  80,  80,
++			80,  80,  80,  80,  80,  80,  80,  80,
++			80,  80,  80,  80,  80,  80,  80,  80,
++			80,  80,  80,  80,  80,  80,  80,  80,
++			80,  80,  80,  80,  80,  80,  80,  80,
++			80,  80,  80,  80,  80,  80,  80,  80,
++			80,  80,  80,  80,  80,  80,  80,  80,
++			80,  80,  80,  80,  80,  80,  80,  80,
++			80,
++		},
++		{	/* Fourth byte table 42. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   3,   3,   3,   4,   5,
++			6,   7,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   13,  18,  23,  28,
++			33,  38,  43,  48,  53,  58,  63,  68,
++			72,  73,  75,  78,  80,  81,  83,  86,
++			90,  92,  93,  95,  98,  99,  100, 101,
++			102, 103, 105, 108, 110, 111, 113, 116,
++			120, 122, 123, 125, 128, 129, 130, 131,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132,
++		},
++		{	/* Fourth byte table 43. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   6,   12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,
++		},
++		{	/* Fourth byte table 44. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   6,   12,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,
++		},
++		{	/* Fourth byte table 45. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   6,   6,   6,
++			6,   6,   12,  12,  12,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  24,  24,  30,
++			30,  30,  30,  30,  30,  36,  45,  45,
++			51,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,
++		},
++		{	/* Fourth byte table 46. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   6,   6,   6,   12,  12,  12,
++			18,  18,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  28,  28,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  40,  44,
++			48,  54,  60,  60,  60,  66,  72,  72,
++			72,  78,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,
++		},
++		{	/* Fourth byte table 47. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   6,   12,  12,  12,  18,  24,  24,
++			24,  30,  36,  36,  36,  36,  36,  36,
++			36,  36,  36,  36,  36,  36,  36,  36,
++			36,  36,  36,  36,  36,  36,  36,  36,
++			36,  36,  36,  36,  36,  36,  36,  36,
++			36,  36,  36,  36,  36,  42,  48,  54,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,
++		},
++		{	/* Fourth byte table 48. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   6,   12,  18,  24,  24,  24,  24,
++			24,  24,  24,  30,  36,  42,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,
++		},
++		{	/* Fourth byte table 49. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   4,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,
++		},
++		{	/* Fourth byte table 50. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   11,  13,  15,  17,  19,  21,
++			23,  25,  27,  29,  31,  34,  37,  40,
++			43,  46,  49,  52,  55,  58,  62,  66,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,
++		},
++		{	/* Fourth byte table 51. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			48,  50,  53,  56,  59,  62,  65,  68,
++			71,  74,  77,  80,  83,  86,  89,  92,
++			95,  98,  101, 104, 107, 110, 113, 116,
++			119, 122, 125, 128, 131, 134, 137, 140,
++			143, 146, 149, 152, 155, 158, 161, 162,
++			163, 164, 165, 166, 167, 168, 169, 170,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171,
++		},
++		{	/* Fourth byte table 52. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			32,  33,  34,  35,  36,  37,  38,  39,
++			40,  41,  42,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,
++		},
++		{	/* Fourth byte table 53. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,
++		},
++		{	/* Fourth byte table 54. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   3,   5,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,
++		},
++		{	/* Fourth byte table 55. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,
++		},
++		{	/* Fourth byte table 56. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,
++		},
++		{	/* Fourth byte table 57. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,
++		},
++		{	/* Fourth byte table 58. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  99,  102, 105, 108, 111, 114, 117,
++			120, 123, 126, 129, 132, 135, 138, 141,
++			144, 147, 150, 153, 156, 159, 162, 165,
++			168, 171, 174, 177, 180, 183, 186, 189,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192,
++		},
++		{	/* Fourth byte table 59. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  99,  102, 105, 108, 111, 114, 117,
++			120, 123, 126, 129, 132, 135, 138, 141,
++			144, 147, 150, 153, 156, 159, 162, 165,
++			168, 171, 174, 177, 180, 183, 186, 189,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192,
++		},
++		{	/* Fourth byte table 60. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  99,  102, 105, 108, 111, 114, 117,
++			120, 123, 126, 129, 132, 135, 138, 141,
++			144, 147, 150, 153, 156, 159, 162, 165,
++			168, 171, 174, 177, 180, 183, 186, 189,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192,
++		},
++		{	/* Fourth byte table 61. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,
++		},
++		{	/* Fourth byte table 62. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   1,   1,   1,   1,   1,   1,
++			1,   1,   1,   1,   1,   1,   1,   1,
++			1,   1,   1,   1,   1,   1,   1,   1,
++			1,   1,   1,   1,   1,   1,   1,   1,
++			1,   1,   1,   1,   1,   1,   1,   1,
++			1,   1,   1,   1,   1,   1,   1,   1,
++			1,   1,   1,   1,   1,   1,   1,   4,
++			4,   7,   10,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,
++		},
++		{	/* Fourth byte table 63. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   7,   7,   14,
++			14,  21,  21,  28,  28,  35,  35,  42,
++			42,  49,  49,  56,  56,  63,  63,  70,
++			70,  77,  77,  84,  84,  84,  91,  91,
++			98,  98,  105, 105, 105, 105, 105, 105,
++			105, 112, 119, 119, 126, 133, 133, 140,
++			147, 147, 154, 161, 161, 168, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175,
++		},
++		{	/* Fourth byte table 64. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   7,   7,   7,
++			7,   7,   7,   7,   11,  15,  15,  22,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  35,  35,  42,
++			42,  49,  49,  56,  56,  63,  63,  70,
++			70,  77,  77,  84,  84,  91,  91,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,
++		},
++		{	/* Fourth byte table 65. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   7,   14,  14,  14,  21,  21,
++			28,  28,  35,  35,  35,  35,  35,  35,
++			35,  42,  49,  49,  56,  63,  63,  70,
++			77,  77,  84,  91,  91,  98,  105, 105,
++			105, 105, 105, 105, 105, 105, 105, 105,
++			105, 105, 105, 105, 105, 105, 105, 105,
++			105, 105, 105, 105, 105, 112, 112, 112,
++			119, 126, 133, 140, 140, 140, 140, 147,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153,
++		},
++		{	/* Fourth byte table 66. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   6,   9,   12,  15,  18,
++			21,  24,  27,  30,  33,  36,  39,  42,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,
++		},
++		{	/* Fourth byte table 67. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  99,  102, 105, 108, 111, 114, 117,
++			120, 123, 126, 129, 132, 135, 138, 141,
++			144, 147, 150, 153, 156, 159, 162, 165,
++			168, 171, 174, 177, 180, 183, 186, 189,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192,
++		},
++		{	/* Fourth byte table 68. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			45,  45,  45,  48,  51,  54,  57,  60,
++			63,  66,  69,  72,  75,  78,  81,  84,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,
++		},
++		{	/* Fourth byte table 69. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   5,   10,  15,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  22,  24,  26,  28,  30,  32,
++			34,  36,  38,  40,  42,  44,  46,  48,
++			50,  53,  56,  59,  62,  65,  68,  71,
++			74,  77,  80,  83,  86,  89,  92,  98,
++			104, 110, 116, 122, 128, 134, 140, 146,
++			152, 158, 164, 170, 176, 176, 176, 176,
++			176, 176, 176, 176, 176, 176, 176, 176,
++			176, 176, 176, 176, 176, 176, 176, 176,
++			176, 176, 176, 176, 176, 176, 176, 176,
++			176, 176, 176, 176, 176, 176, 176, 176,
++			176, 176, 176, 176, 176, 176, 176, 176,
++			176, 176, 176, 176, 176, 176, 176, 176,
++			176, 176, 176, 176, 176, 176, 176, 176,
++			176, 176, 176, 176, 176, 176, 176, 176,
++			176,
++		},
++		{	/* Fourth byte table 70. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  99,  102, 105, 108, 111, 114, 117,
++			120, 123, 126, 129, 132, 135, 138, 141,
++			144, 147, 149, 151, 153, 155, 157, 159,
++			161, 163, 165, 167, 169, 171, 173, 175,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177,
++		},
++		{	/* Fourth byte table 71. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  41,  46,  51,  51,  51,  51,
++			51,  54,  57,  60,  63,  66,  69,  72,
++			75,  78,  81,  84,  87,  90,  93,  96,
++			99,  102, 105, 108, 111, 114, 117, 120,
++			123, 126, 129, 132, 135, 138, 141, 144,
++			147, 150, 153, 156, 159, 162, 165, 168,
++			171, 174, 177, 180, 183, 186, 189, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192,
++		},
++		{	/* Fourth byte table 72. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   7,   9,   11,  13,  15,
++			17,  20,  24,  26,  28,  31,  34,  36,
++			38,  40,  43,  46,  49,  52,  55,  57,
++			59,  61,  63,  65,  68,  70,  72,  74,
++			77,  80,  82,  85,  88,  91,  93,  96,
++			101, 107, 109, 112, 115, 118, 121, 128,
++			136, 138, 140, 143, 145, 147, 149, 152,
++			154, 156, 158, 160, 162, 165, 167, 169,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171,
++		},
++		{	/* Fourth byte table 73. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   10,  12,  14,  16,  22,
++			25,  27,  29,  31,  33,  35,  37,  39,
++			41,  43,  45,  48,  50,  52,  55,  58,
++			60,  64,  67,  69,  71,  73,  75,  75,
++			75,  79,  83,  87,  91,  95,  99,  103,
++			107, 111, 116, 121, 126, 131, 136, 141,
++			146, 151, 156, 161, 166, 171, 176, 181,
++			186, 191, 196, 201, 206, 211, 216, 221,
++			221, 221, 221, 221, 221, 221, 221, 221,
++			221, 221, 221, 221, 221, 221, 221, 221,
++			221, 221, 221, 221, 221, 221, 221, 221,
++			221, 221, 221, 221, 221, 221, 221, 221,
++			221, 221, 221, 221, 221, 221, 221, 221,
++			221, 221, 221, 221, 221, 221, 221, 221,
++			221, 221, 221, 221, 221, 221, 221, 221,
++			221, 221, 221, 221, 221, 221, 221, 221,
++			221,
++		},
++		{	/* Fourth byte table 74. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  56,
++			56,  60,  60,  64,  64,  64,  68,  72,
++			76,  80,  84,  88,  92,  96,  100, 104,
++			104, 108, 108, 112, 112, 112, 116, 120,
++			120, 120, 120, 124, 128, 132, 136, 136,
++			136, 140, 144, 148, 152, 156, 160, 164,
++			168, 172, 176, 180, 184, 188, 192, 196,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200,
++		},
++		{	/* Fourth byte table 75. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  60,
++			64,  68,  72,  76,  80,  84,  88,  92,
++			96,  100, 104, 108, 112, 116, 120, 124,
++			128, 132, 136, 140, 144, 148, 152, 156,
++			160, 164, 168, 172, 172, 172, 172, 172,
++			172, 172, 172, 172, 172, 172, 172, 172,
++			172, 172, 172, 172, 172, 172, 172, 172,
++			172, 172, 172, 172, 172, 172, 172, 172,
++			172, 172, 172, 172, 172, 172, 172, 172,
++			172, 172, 172, 172, 172, 172, 172, 172,
++			172, 172, 172, 172, 172, 172, 172, 172,
++			172, 172, 172, 172, 172, 172, 172, 172,
++			172, 172, 172, 172, 172, 172, 172, 172,
++			172, 172, 172, 172, 172, 172, 172, 172,
++			172, 172, 172, 172, 172, 172, 172, 172,
++			172,
++		},
++		{	/* Fourth byte table 76. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   9,   12,  14,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  20,  24,  28,  32,
++			36,  36,  36,  36,  36,  36,  41,  41,
++			46,  48,  50,  52,  54,  56,  58,  60,
++			62,  64,  65,  70,  75,  82,  89,  94,
++			99,  104, 109, 114, 119, 124, 129, 134,
++			134, 139, 144, 149, 154, 159, 159, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164,
++		},
++		{	/* Fourth byte table 77. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   5,   10,  10,  15,  20,  20,  25,
++			30,  35,  40,  45,  50,  55,  60,  65,
++			69,  71,  73,  75,  77,  79,  81,  83,
++			85,  87,  89,  91,  93,  95,  97,  99,
++			101, 103, 105, 107, 109, 111, 113, 115,
++			117, 119, 121, 123, 125, 127, 129, 131,
++			133, 135, 137, 139, 141, 143, 145, 147,
++			149, 151, 153, 155, 157, 159, 161, 163,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165,
++		},
++		{	/* Fourth byte table 78. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			48,  50,  52,  54,  56,  58,  60,  62,
++			64,  66,  68,  70,  72,  76,  80,  82,
++			84,  86,  88,  90,  92,  94,  96,  98,
++			100, 104, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108,
++		},
++		{	/* Fourth byte table 79. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   2,   4,   6,   8,
++			10,  12,  14,  16,  18,  20,  24,  26,
++			28,  30,  32,  34,  36,  38,  40,  42,
++			44,  46,  48,  54,  60,  66,  72,  78,
++			84,  90,  96,  102, 108, 114, 120, 126,
++			132, 138, 144, 150, 156, 158, 160, 162,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164,
++		},
++		{	/* Fourth byte table 80. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  60,
++			64,  68,  72,  76,  80,  84,  88,  92,
++			96,  100, 104, 108, 112, 116, 120, 124,
++			128, 132, 136, 140, 144, 148, 152, 156,
++			160, 164, 168, 172, 176, 180, 184, 188,
++			192, 196, 200, 204, 208, 212, 216, 220,
++			224, 228, 232, 236, 240, 244, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248,
++		},
++		{	/* Fourth byte table 81. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   6,   12,  18,  24,  30,  36,  42,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  54,  60,  68,  76,  84,  92,  100,
++			108, 116, 122, 155, 170, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178,
++		},
++		{	/* Fourth byte table 82. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   5,   8,   9,   10,  11,  12,
++			13,  14,  17,  20,  23,  26,  29,  32,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,
++		},
++		{	/* Fourth byte table 83. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  15,  15,
++			15,  15,  18,  21,  24,  27,  28,  29,
++			30,  31,  34,  35,  35,  36,  37,  38,
++			39,  42,  43,  44,  45,  46,  49,  52,
++			53,  54,  55,  56,  57,  58,  59,  60,
++			60,  61,  62,  63,  64,  64,  64,  64,
++			64,  67,  71,  74,  74,  77,  77,  80,
++			84,  87,  91,  94,  98,  101, 105, 108,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112,
++		},
++		{	/* Fourth byte table 84. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   6,   10,  14,  18,  22,  26,
++			30,  34,  38,  42,  46,  50,  52,  54,
++			56,  58,  60,  62,  64,  66,  68,  70,
++			72,  74,  76,  78,  80,  82,  84,  86,
++			88,  90,  92,  94,  96,  98,  100, 102,
++			104, 106, 108, 110, 112, 114, 116, 118,
++			120, 122, 124, 126, 128, 130, 132, 134,
++			136, 138, 140, 142, 144, 146, 148, 150,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152,
++		},
++		{	/* Fourth byte table 85. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			48,  50,  52,  54,  56,  58,  60,  62,
++			64,  66,  68,  70,  72,  74,  76,  78,
++			80,  82,  84,  86,  88,  90,  92,  94,
++			96,  98,  100, 102, 104, 106, 112, 118,
++			124, 130, 136, 142, 146, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150,
++		},
++		{	/* Fourth byte table 86. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   1,   2,   3,   4,   5,   6,
++			7,   8,   9,   10,  11,  12,  13,  14,
++			15,  16,  17,  18,  19,  20,  21,  22,
++			23,  24,  25,  26,  27,  28,  29,  30,
++			31,  32,  33,  34,  35,  36,  37,  38,
++			39,  40,  41,  42,  43,  44,  45,  46,
++			47,  48,  49,  50,  51,  52,  53,  54,
++			55,  56,  57,  58,  59,  60,  61,  62,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,
++		},
++		{	/* Fourth byte table 87. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			34,  37,  40,  43,  46,  49,  52,  55,
++			58,  61,  64,  67,  70,  73,  76,  79,
++			82,  85,  88,  91,  94,  97,  100, 103,
++			106, 109, 112, 115, 118, 121, 124, 127,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130,
++		},
++		{	/* Fourth byte table 88. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  99,  102, 105, 108, 111, 114, 117,
++			120, 123, 126, 129, 132, 135, 138, 141,
++			144, 147, 150, 153, 156, 159, 162, 165,
++			168, 171, 174, 177, 180, 183, 186, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189,
++		},
++		{	/* Fourth byte table 89. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   3,   6,   9,   12,  15,
++			18,  18,  18,  21,  24,  27,  30,  33,
++			36,  36,  36,  39,  42,  45,  48,  51,
++			54,  54,  54,  57,  60,  63,  63,  63,
++			63,  65,  67,  69,  72,  74,  76,  79,
++			79,  82,  85,  88,  91,  94,  97,  100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100,
++		},
++		{	/* Fourth byte table 90. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   9,
++			18,  31,  44,  57,  70,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,
++		},
++		{	/* Fourth byte table 91. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   9,   18,  31,  44,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,
++		},
++		{	/* Fourth byte table 92. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,
++		},
++		{	/* Fourth byte table 93. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			32,  33,  34,  35,  36,  37,  38,  39,
++			40,  41,  42,  43,  44,  45,  46,  47,
++			48,  49,  50,  51,  52,  53,  54,  55,
++			56,  57,  58,  59,  60,  61,  62,  63,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++		{	/* Fourth byte table 94. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  21,  22,
++			23,  24,  25,  26,  27,  28,  29,  30,
++			31,  32,  33,  34,  35,  36,  37,  38,
++			39,  40,  41,  42,  43,  44,  45,  46,
++			47,  48,  49,  50,  51,  52,  53,  54,
++			55,  56,  57,  58,  59,  60,  61,  62,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,
++		},
++		{	/* Fourth byte table 95. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  29,  30,
++			31,  31,  31,  32,  32,  32,  33,  34,
++			34,  34,  35,  36,  37,  38,  38,  39,
++			40,  41,  42,  43,  44,  45,  46,  47,
++			48,  49,  50,  50,  51,  51,  52,  53,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,
++		},
++		{	/* Fourth byte table 96. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   1,   2,   3,   3,   4,   5,
++			6,   7,   8,   9,   10,  11,  12,  13,
++			14,  15,  16,  17,  18,  19,  20,  21,
++			22,  23,  24,  25,  26,  27,  28,  29,
++			30,  31,  32,  33,  34,  35,  36,  37,
++			38,  39,  40,  41,  42,  43,  44,  45,
++			46,  47,  48,  49,  50,  51,  52,  53,
++			54,  55,  56,  57,  58,  59,  60,  61,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,
++		},
++		{	/* Fourth byte table 97. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   6,
++			7,   8,   9,   10,  10,  10,  11,  12,
++			13,  14,  15,  16,  17,  18,  18,  19,
++			20,  21,  22,  23,  24,  25,  25,  26,
++			27,  28,  29,  30,  31,  32,  33,  34,
++			35,  36,  37,  38,  39,  40,  41,  42,
++			43,  44,  45,  46,  47,  48,  49,  50,
++			51,  52,  53,  53,  54,  55,  56,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,
++		},
++		{	/* Fourth byte table 98. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   5,   6,
++			6,   6,   6,   7,   8,   9,   10,  11,
++			12,  13,  13,  14,  15,  16,  17,  18,
++			19,  20,  21,  22,  23,  24,  25,  26,
++			27,  28,  29,  30,  31,  32,  33,  34,
++			35,  36,  37,  38,  39,  40,  41,  42,
++			43,  44,  45,  46,  47,  48,  49,  50,
++			51,  52,  53,  54,  55,  56,  57,  58,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,
++		},
++		{	/* Fourth byte table 99. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			32,  33,  34,  35,  36,  37,  38,  39,
++			40,  41,  42,  43,  44,  45,  46,  47,
++			48,  49,  50,  51,  52,  53,  54,  55,
++			56,  57,  58,  59,  60,  61,  62,  63,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++		{	/* Fourth byte table 100. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			32,  33,  34,  35,  36,  37,  38,  39,
++			40,  41,  42,  43,  44,  45,  46,  47,
++			48,  49,  50,  51,  52,  53,  54,  55,
++			56,  57,  58,  59,  60,  61,  62,  63,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++		{	/* Fourth byte table 101. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			32,  33,  34,  35,  36,  37,  38,  39,
++			40,  41,  42,  43,  44,  45,  46,  47,
++			48,  49,  50,  51,  52,  53,  54,  55,
++			56,  57,  58,  59,  60,  61,  62,  63,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++		{	/* Fourth byte table 102. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			32,  33,  34,  35,  36,  37,  38,  39,
++			40,  41,  42,  43,  44,  45,  46,  47,
++			48,  49,  50,  51,  52,  53,  54,  55,
++			56,  57,  58,  59,  60,  61,  62,  63,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++		{	/* Fourth byte table 103. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			32,  33,  34,  35,  36,  36,  36,  36,
++			36,  38,  40,  42,  44,  46,  48,  50,
++			52,  54,  56,  58,  60,  62,  64,  66,
++			68,  70,  72,  74,  76,  78,  80,  82,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,
++		},
++		{	/* Fourth byte table 104. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   5,   7,   9,   11,  13,  15,
++			17,  19,  21,  23,  25,  27,  29,  31,
++			33,  35,  37,  39,  41,  43,  45,  47,
++			49,  51,  53,  55,  58,  60,  62,  64,
++			66,  68,  70,  72,  74,  76,  78,  80,
++			82,  84,  86,  88,  90,  92,  94,  96,
++			98,  100, 102, 104, 106, 108, 110, 112,
++			114, 116, 118, 120, 123, 125, 127, 129,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131,
++		},
++		{	/* Fourth byte table 105. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  45,  47,
++			49,  51,  53,  55,  57,  59,  61,  63,
++			65,  67,  69,  71,  73,  75,  77,  79,
++			81,  83,  85,  87,  89,  91,  93,  95,
++			97,  99,  101, 103, 105, 107, 110, 112,
++			114, 116, 118, 120, 122, 124, 126, 128,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130,
++		},
++		{	/* Fourth byte table 106. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			33,  35,  37,  39,  41,  43,  45,  47,
++			49,  51,  53,  55,  57,  59,  61,  63,
++			65,  67,  69,  71,  73,  75,  77,  79,
++			81,  83,  85,  87,  89,  91,  93,  95,
++			98,  100, 102, 104, 106, 108, 110, 112,
++			114, 116, 118, 120, 122, 124, 126, 128,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130,
++		},
++		{	/* Fourth byte table 107. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  21,  23,  25,  27,  29,  31,
++			33,  35,  37,  39,  41,  43,  45,  47,
++			49,  51,  53,  55,  57,  59,  61,  63,
++			65,  67,  69,  71,  73,  75,  77,  79,
++			81,  83,  86,  88,  90,  92,  94,  96,
++			98,  100, 102, 104, 106, 108, 110, 112,
++			114, 116, 118, 120, 122, 124, 126, 128,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130,
++		},
++		{	/* Fourth byte table 108. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   9,   11,  13,  15,
++			17,  19,  21,  21,  21,  21,  21,  22,
++			23,  24,  25,  26,  27,  28,  29,  30,
++			31,  32,  33,  34,  35,  36,  37,  38,
++			39,  40,  41,  42,  43,  44,  45,  46,
++			47,  48,  49,  50,  51,  52,  53,  54,
++			55,  56,  57,  58,  59,  60,  61,  62,
++			63,  64,  65,  66,  67,  68,  69,  70,
++			71,  71,  71,  71,  71,  71,  71,  71,
++			71,  71,  71,  71,  71,  71,  71,  71,
++			71,  71,  71,  71,  71,  71,  71,  71,
++			71,  71,  71,  71,  71,  71,  71,  71,
++			71,  71,  71,  71,  71,  71,  71,  71,
++			71,  71,  71,  71,  71,  71,  71,  71,
++			71,  71,  71,  71,  71,  71,  71,  71,
++			71,  71,  71,  71,  71,  71,  71,  71,
++			71,
++		},
++		{	/* Fourth byte table 109. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   9,   13,  17,  21,  25,  29,
++			33,  37,  42,  46,  50,  54,  58,  62,
++			66,  71,  75,  80,  85,  90,  94,  98,
++			102, 106, 110, 114, 118, 122, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127,
++		},
++		{	/* Fourth byte table 110. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 111. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 112. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 113. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 114. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 115. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 116. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 117. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++	},
++	{
++		{	/* Fourth byte table 0. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   1,   1,   1,   1,   1,   1,
++			1,   4,   4,   5,   5,   5,   5,   5,
++			8,   8,   8,   9,   10,  13,  15,  15,
++			15,  18,  19,  20,  20,  25,  30,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,
++		},
++		{	/* Fourth byte table 1. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  24,
++			28,  32,  36,  40,  44,  48,  52,  56,
++			60,  60,  64,  68,  72,  76,  80,  84,
++			84,  84,  88,  92,  96,  100, 104, 104,
++			104, 108, 112, 116, 120, 124, 128, 128,
++			132, 136, 140, 144, 148, 152, 156, 160,
++			164, 164, 168, 172, 176, 180, 184, 188,
++			188, 188, 192, 196, 200, 204, 208, 208,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212,
++		},
++		{	/* Fourth byte table 2. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  60,
++			64,  64,  64,  68,  72,  76,  80,  84,
++			88,  92,  96,  100, 104, 108, 112, 116,
++			120, 124, 128, 132, 136, 140, 144, 144,
++			144, 148, 152, 156, 160, 164, 168, 172,
++			176, 180, 180, 182, 184, 188, 192, 196,
++			200, 200, 204, 208, 212, 216, 220, 224,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227, 227, 227, 227, 227, 227, 227, 227,
++			227,
++		},
++		{	/* Fourth byte table 3. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   3,   7,   11,  15,  19,
++			23,  27,  30,  30,  30,  34,  38,  42,
++			46,  50,  54,  54,  54,  58,  62,  66,
++			70,  74,  78,  82,  86,  90,  94,  98,
++			102, 106, 110, 114, 118, 122, 126, 126,
++			126, 130, 134, 138, 142, 146, 150, 154,
++			158, 162, 166, 170, 174, 178, 182, 186,
++			190, 194, 198, 202, 206, 210, 214, 218,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219, 219, 219, 219, 219, 219, 219, 219,
++			219,
++		},
++		{	/* Fourth byte table 4. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			12,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,
++		},
++		{	/* Fourth byte table 5. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   4,   8,   12,
++			14,  16,  18,  20,  22,  24,  28,  32,
++			36,  40,  44,  48,  52,  56,  62,  68,
++			74,  80,  86,  92,  98,  104, 104, 110,
++			116, 122, 128, 133, 138, 138, 138, 142,
++			146, 150, 154, 158, 162, 168, 174, 179,
++			184, 188, 190, 192, 194, 198, 202, 202,
++			202, 206, 210, 216, 222, 227, 232, 237,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242, 242, 242, 242, 242, 242, 242, 242,
++			242,
++		},
++		{	/* Fourth byte table 6. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  60,
++			64,  68,  72,  76,  80,  84,  88,  92,
++			96,  100, 104, 108, 112, 112, 112, 116,
++			120, 120, 120, 120, 120, 120, 120, 124,
++			128, 132, 136, 142, 148, 154, 160, 164,
++			168, 174, 180, 184, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188, 188, 188, 188, 188, 188, 188, 188,
++			188,
++		},
++		{	/* Fourth byte table 7. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   3,   4,   5,   7,   9,   11,
++			12,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,
++		},
++		{	/* Fourth byte table 8. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  18,
++			18,  20,  21,  22,  23,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,
++		},
++		{	/* Fourth byte table 9. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   6,   9,   14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  17,  17,  17,
++			17,  17,  17,  20,  20,  20,  20,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,
++		},
++		{	/* Fourth byte table 10. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   3,   14,  19,
++			22,  27,  32,  37,  37,  42,  42,  47,
++			52,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  64,  69,  74,  79,  84,
++			89,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 11. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   5,   10,  15,  20,  25,
++			25,  27,  29,  31,  41,  51,  53,  55,
++			55,  55,  55,  55,  55,  55,  55,  55,
++			55,  55,  55,  55,  55,  55,  55,  55,
++			55,  55,  55,  55,  55,  55,  55,  55,
++			55,  57,  59,  61,  61,  63,  65,  65,
++			65,  65,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,  67,  67,  67,  67,  67,  67,  67,
++			67,
++		},
++		{	/* Fourth byte table 12. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   5,   10,  10,  15,  15,  15,  15,
++			20,  20,  20,  20,  20,  25,  30,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  40,  40,  40,  40,  40,  40,
++			40,  40,  40,  40,  40,  40,  40,  40,
++			40,  40,  40,  40,  40,  40,  40,  40,
++			40,  40,  40,  40,  40,  40,  40,  40,
++			40,  40,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,
++		},
++		{	/* Fourth byte table 13. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   5,   10,  10,  15,  15,  15,  15,
++			20,  20,  20,  20,  20,  25,  30,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  40,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,
++		},
++		{	/* Fourth byte table 14. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   5,   10,  10,  10,  10,  10,
++			10,  10,  10,  10,  10,  10,  10,  10,
++			10,  15,  20,  25,  30,  30,  30,  35,
++			40,  40,  40,  45,  50,  55,  60,  65,
++			70,  70,  70,  75,  80,  85,  90,  95,
++			100, 100, 100, 105, 110, 115, 120, 125,
++			130, 135, 140, 145, 150, 155, 160, 160,
++			160, 165, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170, 170, 170, 170, 170, 170, 170, 170,
++			170,
++		},
++		{	/* Fourth byte table 15. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,   4,   4,   4,   4,   4,   4,   4,
++			4,
++		},
++		{	/* Fourth byte table 16. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   5,   10,  15,  20,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,
++		},
++		{	/* Fourth byte table 17. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   4,   8,
++			12,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,
++		},
++		{	/* Fourth byte table 18. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   5,   5,   10,  10,  10,  10,  10,
++			10,  10,  10,  10,  10,  10,  10,  10,
++			10,  10,  10,  10,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,
++		},
++		{	/* Fourth byte table 19. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   7,   7,   7,   7,   7,   7,
++			7,   7,   14,  14,  14,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,
++		},
++		{	/* Fourth byte table 20. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   14,  21,  28,  35,  42,  49,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,
++		},
++		{	/* Fourth byte table 21. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  21,  28,  28,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,
++		},
++		{	/* Fourth byte table 22. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   7,   7,   14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,
++		},
++		{	/* Fourth byte table 23. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   7,   14,  21,  21,  21,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,
++		},
++		{	/* Fourth byte table 24. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   7,   7,   14,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  28,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,
++		},
++		{	/* Fourth byte table 25. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,
++		},
++		{	/* Fourth byte table 26. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   7,   14,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,
++		},
++		{	/* Fourth byte table 27. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,
++		},
++		{	/* Fourth byte table 28. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   7,   7,   7,   7,   7,   7,
++			14,  21,  21,  28,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,
++		},
++		{	/* Fourth byte table 29. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   7,   14,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,
++		},
++		{	/* Fourth byte table 30. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   7,   7,   14,  24,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,  31,  31,  31,  31,  31,  31,  31,
++			31,
++		},
++		{	/* Fourth byte table 31. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,
++		},
++		{	/* Fourth byte table 32. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,
++		},
++		{	/* Fourth byte table 33. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   6,   12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,
++		},
++		{	/* Fourth byte table 34. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,
++		},
++		{	/* Fourth byte table 35. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   14,  14,
++			14,  14,  14,  21,  21,  21,  21,  21,
++			28,  28,  28,  28,  28,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  42,  42,  42,  42,  42,  42,
++			42,  42,  42,  42,  49,  49,  56,  63,
++			72,  79,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,
++		},
++		{	/* Fourth byte table 36. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  21,  21,
++			21,  21,  21,  28,  28,  28,  28,  28,
++			35,  35,  35,  35,  35,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  42,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,  49,  49,  49,  49,  49,  49,  49,
++			49,
++		},
++		{	/* Fourth byte table 37. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,   7,   7,   7,   7,   7,   7,   7,
++			7,
++		},
++		{	/* Fourth byte table 38. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,
++		},
++		{	/* Fourth byte table 39. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   7,
++			7,   14,  14,  21,  21,  28,  28,  35,
++			35,  35,  35,  42,  42,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  42,  42,  42,  49,  49,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,
++		},
++		{	/* Fourth byte table 40. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   14,  14,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,
++		},
++		{	/* Fourth byte table 41. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   1,   3,   4,
++			4,   5,   6,   8,   9,   10,  11,  12,
++			13,  14,  15,  16,  16,  17,  19,  20,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,
++		},
++		{	/* Fourth byte table 42. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   6,   8,   11,
++			12,  13,  14,  16,  18,  20,  21,  21,
++			22,  23,  25,  26,  28,  31,  34,  35,
++			36,  37,  40,  42,  43,  46,  48,  50,
++			52,  54,  56,  57,  58,  59,  60,  62,
++			64,  66,  68,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,
++		},
++		{	/* Fourth byte table 43. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   2,   3,   5,   7,
++			9,   10,  12,  14,  16,  18,  20,  22,
++			25,  27,  29,  32,  34,  36,  38,  40,
++			42,  44,  46,  48,  50,  52,  54,  56,
++			58,  61,  63,  65,  66,  68,  70,  72,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,
++		},
++		{	/* Fourth byte table 44. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   6,   12,  13,  14,  15,  16,  17,
++			18,  19,  20,  21,  21,  21,  21,  21,
++			21,  21,  24,  24,  24,  24,  24,  24,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  28,  30,  33,
++			33,  33,  33,  33,  33,  33,  33,  33,
++			34,  34,  34,  34,  40,  49,  49,  55,
++			64,  64,  64,  64,  64,  66,  66,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,  69,  69,  69,  69,  69,  69,  69,
++			69,
++		},
++		{	/* Fourth byte table 45. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			2,   4,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  19,  19,
++			19,  20,  21,  21,  21,  22,  23,  24,
++			25,  26,  27,  28,  31,  32,  33,  34,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,  35,  35,  35,  35,  35,  35,  35,
++			35,
++		},
++		{	/* Fourth byte table 46. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  14,  15,  16,  17,
++			17,  18,  19,  20,  21,  23,  23,  23,
++			23,  23,  23,  23,  23,  23,  23,  23,
++			23,  23,  23,  23,  23,  23,  23,  23,
++			23,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,  25,  25,  25,  25,  25,  25,  25,
++			25,
++		},
++		{	/* Fourth byte table 47. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   7,   10,  10,  13,  16,
++			18,  18,  21,  22,  23,  24,  25,  26,
++			28,  29,  30,  31,  32,  32,  33,  35,
++			35,  35,  36,  37,  38,  39,  40,  40,
++			40,  42,  45,  47,  47,  48,  48,  51,
++			51,  52,  52,  54,  58,  59,  60,  60,
++			61,  62,  63,  63,  64,  65,  67,  69,
++			71,  73,  74,  74,  77,  79,  81,  83,
++			85,  85,  85,  85,  85,  85,  85,  85,
++			85,  85,  85,  85,  85,  85,  85,  85,
++			85,  85,  85,  85,  85,  85,  85,  85,
++			85,  85,  85,  85,  85,  85,  85,  85,
++			85,  85,  85,  85,  85,  85,  85,  85,
++			85,  85,  85,  85,  85,  85,  85,  85,
++			85,  85,  85,  85,  85,  85,  85,  85,
++			85,  85,  85,  85,  85,  85,  85,  85,
++			85,
++		},
++		{	/* Fourth byte table 48. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   3,   3,   3,   4,   5,
++			6,   7,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   13,  18,  23,  28,
++			33,  38,  43,  48,  53,  58,  63,  68,
++			72,  73,  75,  78,  80,  81,  83,  86,
++			90,  92,  93,  95,  98,  99,  100, 101,
++			102, 103, 105, 108, 110, 111, 113, 116,
++			120, 122, 123, 125, 128, 129, 130, 131,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132, 132, 132, 132, 132, 132, 132, 132,
++			132,
++		},
++		{	/* Fourth byte table 49. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   6,   12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,
++		},
++		{	/* Fourth byte table 50. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   6,   12,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,
++		},
++		{	/* Fourth byte table 51. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   6,   6,   6,
++			6,   6,   12,  12,  12,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  24,  24,  30,
++			30,  30,  30,  30,  30,  36,  45,  45,
++			51,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,
++		},
++		{	/* Fourth byte table 52. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   6,   6,   6,   12,  12,  12,
++			18,  18,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  28,  28,  34,  34,  34,  34,  34,
++			34,  34,  34,  34,  34,  34,  40,  44,
++			48,  54,  60,  60,  60,  66,  72,  72,
++			72,  78,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,
++		},
++		{	/* Fourth byte table 53. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   6,   12,  12,  12,  18,  24,  24,
++			24,  30,  36,  36,  36,  36,  36,  36,
++			36,  36,  36,  36,  36,  36,  36,  36,
++			36,  36,  36,  36,  36,  36,  36,  36,
++			36,  36,  36,  36,  36,  36,  36,  36,
++			36,  36,  36,  36,  36,  42,  48,  54,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,
++		},
++		{	/* Fourth byte table 54. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   6,   12,  18,  24,  24,  24,  24,
++			24,  24,  24,  30,  36,  42,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,
++		},
++		{	/* Fourth byte table 55. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   4,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,
++		},
++		{	/* Fourth byte table 56. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   11,  13,  15,  17,  19,  21,
++			23,  25,  27,  29,  31,  34,  37,  40,
++			43,  46,  49,  52,  55,  58,  62,  66,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,
++		},
++		{	/* Fourth byte table 57. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			48,  50,  53,  56,  59,  62,  65,  68,
++			71,  74,  77,  80,  83,  86,  89,  92,
++			95,  98,  101, 104, 107, 110, 113, 116,
++			119, 122, 125, 128, 131, 134, 137, 140,
++			143, 146, 149, 152, 155, 158, 161, 162,
++			163, 164, 165, 166, 167, 168, 169, 170,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171,
++		},
++		{	/* Fourth byte table 58. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			32,  33,  34,  35,  36,  37,  38,  39,
++			40,  41,  42,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,  43,  43,  43,  43,  43,  43,  43,
++			43,
++		},
++		{	/* Fourth byte table 59. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,
++		},
++		{	/* Fourth byte table 60. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   3,   5,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,
++		},
++		{	/* Fourth byte table 61. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,   6,   6,   6,   6,   6,   6,   6,
++			6,
++		},
++		{	/* Fourth byte table 62. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,
++		},
++		{	/* Fourth byte table 63. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,
++		},
++		{	/* Fourth byte table 64. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,
++		},
++		{	/* Fourth byte table 65. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  99,  102, 105, 108, 111, 114, 117,
++			120, 123, 126, 129, 132, 135, 138, 141,
++			144, 147, 150, 153, 156, 159, 162, 165,
++			168, 171, 174, 177, 180, 183, 186, 189,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192,
++		},
++		{	/* Fourth byte table 66. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  99,  102, 105, 108, 111, 114, 117,
++			120, 123, 126, 129, 132, 135, 138, 141,
++			144, 147, 150, 153, 156, 159, 162, 165,
++			168, 171, 174, 177, 180, 183, 186, 189,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192,
++		},
++		{	/* Fourth byte table 67. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  99,  102, 105, 108, 111, 114, 117,
++			120, 123, 126, 129, 132, 135, 138, 141,
++			144, 147, 150, 153, 156, 159, 162, 165,
++			168, 171, 174, 177, 180, 183, 186, 189,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192,
++		},
++		{	/* Fourth byte table 68. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,
++		},
++		{	/* Fourth byte table 69. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   1,   1,   1,   1,   1,   1,
++			1,   1,   1,   1,   1,   1,   1,   1,
++			1,   1,   1,   1,   1,   1,   1,   1,
++			1,   1,   1,   1,   1,   1,   1,   1,
++			1,   1,   1,   1,   1,   1,   1,   1,
++			1,   1,   1,   1,   1,   1,   1,   1,
++			1,   1,   1,   1,   1,   1,   1,   4,
++			4,   7,   10,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,
++		},
++		{	/* Fourth byte table 70. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   7,   7,   14,
++			14,  21,  21,  28,  28,  35,  35,  42,
++			42,  49,  49,  56,  56,  63,  63,  70,
++			70,  77,  77,  84,  84,  84,  91,  91,
++			98,  98,  105, 105, 105, 105, 105, 105,
++			105, 112, 119, 119, 126, 133, 133, 140,
++			147, 147, 154, 161, 161, 168, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175, 175, 175, 175, 175, 175, 175, 175,
++			175,
++		},
++		{	/* Fourth byte table 71. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   7,   7,   7,
++			7,   7,   7,   7,   11,  15,  15,  22,
++			28,  28,  28,  28,  28,  28,  28,  28,
++			28,  28,  28,  28,  28,  35,  35,  42,
++			42,  49,  49,  56,  56,  63,  63,  70,
++			70,  77,  77,  84,  84,  91,  91,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,  98,  98,  98,  98,  98,  98,  98,
++			98,
++		},
++		{	/* Fourth byte table 72. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   7,   7,   14,  14,  14,  21,  21,
++			28,  28,  35,  35,  35,  35,  35,  35,
++			35,  42,  49,  49,  56,  63,  63,  70,
++			77,  77,  84,  91,  91,  98,  105, 105,
++			105, 105, 105, 105, 105, 105, 105, 105,
++			105, 105, 105, 105, 105, 105, 105, 105,
++			105, 105, 105, 105, 105, 112, 112, 112,
++			119, 126, 133, 140, 140, 140, 140, 147,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153, 153, 153, 153, 153, 153, 153, 153,
++			153,
++		},
++		{	/* Fourth byte table 73. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   6,   9,   12,  15,  18,
++			21,  24,  27,  30,  33,  36,  39,  42,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,  45,  45,  45,  45,  45,  45,  45,
++			45,
++		},
++		{	/* Fourth byte table 74. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  99,  102, 105, 108, 111, 114, 117,
++			120, 123, 126, 129, 132, 135, 138, 141,
++			144, 147, 150, 153, 156, 159, 162, 165,
++			168, 171, 174, 177, 180, 183, 186, 189,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192, 192, 192, 192, 192, 192, 192, 192,
++			192,
++		},
++		{	/* Fourth byte table 75. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			45,  45,  45,  48,  51,  54,  57,  60,
++			63,  66,  69,  72,  75,  78,  81,  84,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,
++		},
++		{	/* Fourth byte table 76. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   5,   10,  15,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  23,  25,  27,  29,  31,  33,  35,
++			37,  39,  41,  43,  45,  47,  49,  51,
++			53,  56,  59,  62,  65,  68,  71,  74,
++			77,  80,  83,  86,  89,  92,  95,  101,
++			107, 113, 119, 125, 131, 137, 143, 149,
++			155, 161, 167, 173, 179, 194, 206, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212, 212, 212, 212, 212, 212, 212, 212,
++			212,
++		},
++		{	/* Fourth byte table 77. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  99,  102, 105, 108, 111, 114, 117,
++			120, 123, 126, 129, 132, 135, 138, 141,
++			144, 147, 149, 151, 153, 155, 157, 159,
++			161, 163, 165, 167, 169, 171, 173, 175,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177, 177, 177, 177, 177, 177, 177, 177,
++			177,
++		},
++		{	/* Fourth byte table 78. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  41,  46,  51,  53,  56,  58,
++			61,  64,  67,  70,  73,  76,  79,  82,
++			85,  88,  91,  94,  97,  100, 103, 106,
++			109, 112, 115, 118, 121, 124, 127, 130,
++			133, 136, 139, 142, 145, 148, 151, 154,
++			157, 160, 163, 166, 169, 172, 175, 178,
++			181, 184, 187, 190, 193, 196, 199, 202,
++			202, 202, 202, 202, 202, 202, 202, 202,
++			202, 202, 202, 202, 202, 202, 202, 202,
++			202, 202, 202, 202, 202, 202, 202, 202,
++			202, 202, 202, 202, 202, 202, 202, 202,
++			202, 202, 202, 202, 202, 202, 202, 202,
++			202, 202, 202, 202, 202, 202, 202, 202,
++			202, 202, 202, 202, 202, 202, 202, 202,
++			202, 202, 202, 202, 202, 202, 202, 202,
++			202,
++		},
++		{	/* Fourth byte table 79. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   7,   9,   11,  13,  15,
++			17,  20,  24,  26,  28,  31,  34,  36,
++			38,  40,  43,  46,  49,  52,  55,  57,
++			59,  61,  63,  65,  68,  70,  72,  74,
++			77,  80,  82,  85,  88,  91,  93,  96,
++			101, 107, 109, 112, 115, 118, 121, 128,
++			136, 138, 140, 143, 145, 147, 149, 152,
++			154, 156, 158, 160, 162, 165, 167, 169,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171, 171, 171, 171, 171, 171, 171, 171,
++			171,
++		},
++		{	/* Fourth byte table 80. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   10,  12,  14,  16,  22,
++			25,  27,  29,  31,  33,  35,  37,  39,
++			41,  43,  45,  48,  50,  52,  55,  58,
++			60,  64,  67,  69,  71,  73,  75,  80,
++			85,  89,  93,  97,  101, 105, 109, 113,
++			117, 121, 126, 131, 136, 141, 146, 151,
++			156, 161, 166, 171, 176, 181, 186, 191,
++			196, 201, 206, 211, 216, 221, 226, 231,
++			234, 234, 234, 234, 234, 234, 234, 234,
++			234, 234, 234, 234, 234, 234, 234, 234,
++			234, 234, 234, 234, 234, 234, 234, 234,
++			234, 234, 234, 234, 234, 234, 234, 234,
++			234, 234, 234, 234, 234, 234, 234, 234,
++			234, 234, 234, 234, 234, 234, 234, 234,
++			234, 234, 234, 234, 234, 234, 234, 234,
++			234, 234, 234, 234, 234, 234, 234, 234,
++			234,
++		},
++		{	/* Fourth byte table 81. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  56,
++			56,  60,  60,  64,  64,  64,  68,  72,
++			76,  80,  84,  88,  92,  96,  100, 104,
++			104, 108, 108, 112, 112, 112, 116, 120,
++			120, 120, 120, 124, 128, 132, 136, 136,
++			136, 140, 144, 148, 152, 156, 160, 164,
++			168, 172, 176, 180, 184, 188, 192, 196,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200, 200, 200, 200, 200, 200, 200, 200,
++			200,
++		},
++		{	/* Fourth byte table 82. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  60,
++			64,  68,  72,  76,  80,  84,  88,  92,
++			96,  100, 104, 108, 112, 116, 120, 124,
++			128, 132, 136, 140, 144, 148, 152, 156,
++			160, 164, 168, 172, 172, 172, 172, 172,
++			172, 176, 180, 184, 188, 192, 196, 200,
++			204, 208, 212, 216, 220, 224, 228, 232,
++			236, 236, 236, 236, 236, 236, 236, 236,
++			236, 236, 236, 236, 236, 236, 236, 236,
++			236, 236, 236, 236, 236, 236, 236, 236,
++			236, 236, 236, 236, 236, 236, 236, 236,
++			236, 236, 236, 236, 236, 236, 236, 236,
++			236, 236, 236, 236, 236, 236, 236, 236,
++			236, 236, 236, 236, 236, 236, 236, 236,
++			236, 236, 236, 236, 236, 236, 236, 236,
++			236,
++		},
++		{	/* Fourth byte table 83. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  60,
++			65,  70,  75,  79,  83,  87,  92,  97,
++			102, 106, 110, 110, 110, 110, 110, 110,
++			110, 110, 110, 110, 110, 110, 110, 110,
++			110, 110, 110, 110, 110, 110, 110, 110,
++			110, 110, 110, 110, 110, 110, 110, 110,
++			110, 110, 110, 110, 110, 110, 110, 110,
++			110, 110, 110, 110, 110, 110, 110, 110,
++			110, 110, 110, 110, 110, 110, 110, 110,
++			110, 110, 110, 110, 110, 110, 110, 110,
++			110, 110, 110, 110, 110, 110, 110, 110,
++			110, 110, 110, 110, 110, 110, 110, 110,
++			110, 110, 110, 110, 110, 110, 110, 110,
++			110, 110, 110, 110, 110, 110, 110, 110,
++			110, 110, 110, 110, 110, 110, 110, 110,
++			110,
++		},
++		{	/* Fourth byte table 84. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   9,   12,  14,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  20,  24,  28,  32,
++			36,  36,  36,  36,  36,  36,  41,  41,
++			46,  48,  50,  52,  54,  56,  58,  60,
++			62,  64,  65,  70,  75,  82,  89,  94,
++			99,  104, 109, 114, 119, 124, 129, 134,
++			134, 139, 144, 149, 154, 159, 159, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164,
++		},
++		{	/* Fourth byte table 85. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   5,   10,  10,  15,  20,  20,  25,
++			30,  35,  40,  45,  50,  55,  60,  65,
++			69,  71,  73,  75,  77,  79,  81,  83,
++			85,  87,  89,  91,  93,  95,  97,  99,
++			101, 103, 105, 107, 109, 111, 113, 115,
++			117, 119, 121, 123, 125, 127, 129, 131,
++			133, 135, 137, 139, 141, 143, 145, 147,
++			149, 151, 153, 155, 157, 159, 161, 163,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165, 165, 165, 165, 165, 165, 165, 165,
++			165,
++		},
++		{	/* Fourth byte table 86. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			48,  50,  52,  54,  56,  58,  60,  62,
++			64,  66,  68,  70,  72,  76,  80,  82,
++			84,  86,  88,  90,  92,  94,  96,  98,
++			100, 104, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108, 108, 108, 108, 108, 108, 108, 108,
++			108,
++		},
++		{	/* Fourth byte table 87. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   2,   4,   6,   8,
++			10,  12,  14,  16,  18,  20,  24,  26,
++			28,  30,  32,  34,  36,  38,  40,  42,
++			44,  46,  48,  54,  60,  66,  72,  78,
++			84,  90,  96,  102, 108, 114, 120, 126,
++			132, 138, 144, 150, 156, 158, 160, 162,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164, 164, 164, 164, 164, 164, 164, 164,
++			164,
++		},
++		{	/* Fourth byte table 88. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  60,
++			64,  68,  72,  76,  80,  84,  88,  92,
++			96,  100, 104, 108, 112, 116, 120, 124,
++			128, 132, 136, 140, 144, 148, 152, 156,
++			160, 164, 168, 172, 176, 180, 184, 188,
++			192, 196, 200, 204, 208, 212, 216, 220,
++			224, 228, 232, 236, 240, 244, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248, 248, 248, 248, 248, 248, 248, 248,
++			248,
++		},
++		{	/* Fourth byte table 89. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   6,   12,  18,  24,  30,  36,  42,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  54,  60,  68,  76,  84,  92,  100,
++			108, 116, 122, 155, 170, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178, 178, 178, 178, 178, 178, 178, 178,
++			178,
++		},
++		{	/* Fourth byte table 90. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   4,   7,   8,   9,   10,  11,
++			14,  17,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  22,  25,  28,  29,  30,  31,  32,
++			33,  34,  37,  40,  43,  46,  49,  52,
++			55,  55,  55,  55,  55,  55,  55,  55,
++			55,  55,  55,  55,  55,  55,  55,  55,
++			55,  55,  55,  55,  55,  55,  55,  55,
++			55,  55,  55,  55,  55,  55,  55,  55,
++			55,  55,  55,  55,  55,  55,  55,  55,
++			55,  55,  55,  55,  55,  55,  55,  55,
++			55,  55,  55,  55,  55,  55,  55,  55,
++			55,  55,  55,  55,  55,  55,  55,  55,
++			55,
++		},
++		{	/* Fourth byte table 91. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  15,  15,
++			16,  17,  20,  23,  26,  29,  30,  31,
++			32,  33,  36,  37,  37,  38,  39,  40,
++			41,  44,  45,  46,  47,  48,  51,  54,
++			55,  56,  57,  58,  59,  60,  61,  62,
++			62,  63,  64,  65,  66,  66,  66,  66,
++			66,  69,  73,  76,  76,  79,  79,  82,
++			86,  89,  93,  96,  100, 103, 107, 110,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114,
++		},
++		{	/* Fourth byte table 92. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   6,   10,  14,  18,  22,  26,
++			30,  34,  38,  42,  46,  50,  52,  54,
++			56,  58,  60,  62,  64,  66,  68,  70,
++			72,  74,  76,  78,  80,  82,  84,  86,
++			88,  90,  92,  94,  96,  98,  100, 102,
++			104, 106, 108, 110, 112, 114, 116, 118,
++			120, 122, 124, 126, 128, 130, 132, 134,
++			136, 138, 140, 142, 144, 146, 148, 150,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152,
++		},
++		{	/* Fourth byte table 93. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			48,  50,  52,  54,  56,  58,  60,  62,
++			64,  66,  68,  70,  72,  74,  76,  78,
++			80,  82,  84,  86,  88,  90,  92,  94,
++			96,  98,  100, 102, 104, 106, 112, 118,
++			124, 130, 136, 142, 146, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150, 150, 150, 150, 150, 150, 150, 150,
++			150,
++		},
++		{	/* Fourth byte table 94. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   1,   2,   3,   4,   5,   6,
++			7,   8,   9,   10,  11,  12,  13,  14,
++			15,  16,  17,  18,  19,  20,  21,  22,
++			23,  24,  25,  26,  27,  28,  29,  30,
++			31,  32,  33,  34,  35,  36,  37,  38,
++			39,  40,  41,  42,  43,  44,  45,  46,
++			47,  48,  49,  50,  51,  52,  53,  54,
++			55,  56,  57,  58,  59,  60,  61,  62,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,
++		},
++		{	/* Fourth byte table 95. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			34,  37,  40,  43,  46,  49,  52,  55,
++			58,  61,  64,  67,  70,  73,  76,  79,
++			82,  85,  88,  91,  94,  97,  100, 103,
++			106, 109, 112, 115, 118, 121, 124, 127,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130,
++		},
++		{	/* Fourth byte table 96. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  99,  102, 105, 108, 111, 114, 117,
++			120, 123, 126, 129, 132, 135, 138, 141,
++			144, 147, 150, 153, 156, 159, 162, 165,
++			168, 171, 174, 177, 180, 183, 186, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189, 189, 189, 189, 189, 189, 189, 189,
++			189,
++		},
++		{	/* Fourth byte table 97. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   3,   6,   9,   12,  15,
++			18,  18,  18,  21,  24,  27,  30,  33,
++			36,  36,  36,  39,  42,  45,  48,  51,
++			54,  54,  54,  57,  60,  63,  63,  63,
++			63,  65,  67,  69,  72,  74,  76,  79,
++			79,  82,  85,  88,  91,  94,  97,  100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100, 100, 100, 100, 100, 100, 100, 100,
++			100,
++		},
++		{	/* Fourth byte table 98. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   9,
++			18,  31,  44,  57,  70,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,
++		},
++		{	/* Fourth byte table 99. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   9,   18,  31,  44,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,
++		},
++		{	/* Fourth byte table 100. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,  13,  13,  13,  13,  13,  13,  13,
++			13,
++		},
++		{	/* Fourth byte table 101. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			32,  33,  34,  35,  36,  37,  38,  39,
++			40,  41,  42,  43,  44,  45,  46,  47,
++			48,  49,  50,  51,  52,  53,  54,  55,
++			56,  57,  58,  59,  60,  61,  62,  63,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++		{	/* Fourth byte table 102. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  21,  22,
++			23,  24,  25,  26,  27,  28,  29,  30,
++			31,  32,  33,  34,  35,  36,  37,  38,
++			39,  40,  41,  42,  43,  44,  45,  46,
++			47,  48,  49,  50,  51,  52,  53,  54,
++			55,  56,  57,  58,  59,  60,  61,  62,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,
++		},
++		{	/* Fourth byte table 103. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  29,  30,
++			31,  31,  31,  32,  32,  32,  33,  34,
++			34,  34,  35,  36,  37,  38,  38,  39,
++			40,  41,  42,  43,  44,  45,  46,  47,
++			48,  49,  50,  50,  51,  51,  52,  53,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,
++		},
++		{	/* Fourth byte table 104. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   4,   5,   6,
++			7,   8,   9,   10,  11,  12,  13,  14,
++			15,  16,  17,  18,  19,  20,  21,  22,
++			23,  24,  25,  26,  27,  28,  29,  30,
++			31,  32,  33,  34,  35,  36,  37,  38,
++			39,  40,  41,  42,  43,  44,  45,  46,
++			47,  48,  49,  50,  51,  52,  53,  54,
++			55,  56,  57,  58,  59,  60,  61,  62,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,
++		},
++		{	/* Fourth byte table 105. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   6,
++			7,   8,   9,   10,  10,  10,  11,  12,
++			13,  14,  15,  16,  17,  18,  18,  19,
++			20,  21,  22,  23,  24,  25,  25,  26,
++			27,  28,  29,  30,  31,  32,  33,  34,
++			35,  36,  37,  38,  39,  40,  41,  42,
++			43,  44,  45,  46,  47,  48,  49,  50,
++			51,  52,  53,  53,  54,  55,  56,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,
++		},
++		{	/* Fourth byte table 106. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   5,   6,
++			6,   6,   6,   7,   8,   9,   10,  11,
++			12,  13,  13,  14,  15,  16,  17,  18,
++			19,  20,  21,  22,  23,  24,  25,  26,
++			27,  28,  29,  30,  31,  32,  33,  34,
++			35,  36,  37,  38,  39,  40,  41,  42,
++			43,  44,  45,  46,  47,  48,  49,  50,
++			51,  52,  53,  54,  55,  56,  57,  58,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,  59,  59,  59,  59,  59,  59,  59,
++			59,
++		},
++		{	/* Fourth byte table 107. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			32,  33,  34,  35,  36,  37,  38,  39,
++			40,  41,  42,  43,  44,  45,  46,  47,
++			48,  49,  50,  51,  52,  53,  54,  55,
++			56,  57,  58,  59,  60,  61,  62,  63,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++		{	/* Fourth byte table 108. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			32,  33,  34,  35,  36,  37,  38,  39,
++			40,  41,  42,  43,  44,  45,  46,  47,
++			48,  49,  50,  51,  52,  53,  54,  55,
++			56,  57,  58,  59,  60,  61,  62,  63,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++		{	/* Fourth byte table 109. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			32,  33,  34,  35,  36,  37,  38,  39,
++			40,  41,  42,  43,  44,  45,  46,  47,
++			48,  49,  50,  51,  52,  53,  54,  55,
++			56,  57,  58,  59,  60,  61,  62,  63,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++		{	/* Fourth byte table 110. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			32,  33,  34,  35,  36,  37,  38,  39,
++			40,  41,  42,  43,  44,  45,  46,  47,
++			48,  49,  50,  51,  52,  53,  54,  55,
++			56,  57,  58,  59,  60,  61,  62,  63,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++		{	/* Fourth byte table 111. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   1,   2,   3,   4,   5,   6,   7,
++			8,   9,   10,  11,  12,  13,  14,  15,
++			16,  17,  18,  19,  20,  21,  22,  23,
++			24,  25,  26,  27,  28,  29,  30,  31,
++			32,  33,  34,  35,  36,  38,  40,  40,
++			40,  42,  44,  46,  48,  50,  52,  54,
++			56,  58,  60,  62,  64,  66,  68,  70,
++			72,  74,  76,  78,  80,  82,  84,  86,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,  88,  88,  88,  88,  88,  88,  88,
++			88,
++		},
++		{	/* Fourth byte table 112. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   5,   7,   9,   11,  13,  15,
++			17,  19,  21,  23,  25,  27,  29,  31,
++			33,  35,  37,  39,  41,  43,  45,  47,
++			49,  51,  53,  55,  58,  60,  62,  64,
++			66,  68,  70,  72,  74,  76,  78,  80,
++			82,  84,  86,  88,  90,  92,  94,  96,
++			98,  100, 102, 104, 106, 108, 110, 112,
++			114, 116, 118, 120, 123, 125, 127, 129,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131, 131, 131, 131, 131, 131, 131, 131,
++			131,
++		},
++		{	/* Fourth byte table 113. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  45,  47,
++			49,  51,  53,  55,  57,  59,  61,  63,
++			65,  67,  69,  71,  73,  75,  77,  79,
++			81,  83,  85,  87,  89,  91,  93,  95,
++			97,  99,  101, 103, 105, 107, 110, 112,
++			114, 116, 118, 120, 122, 124, 126, 128,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130,
++		},
++		{	/* Fourth byte table 114. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			33,  35,  37,  39,  41,  43,  45,  47,
++			49,  51,  53,  55,  57,  59,  61,  63,
++			65,  67,  69,  71,  73,  75,  77,  79,
++			81,  83,  85,  87,  89,  91,  93,  95,
++			98,  100, 102, 104, 106, 108, 110, 112,
++			114, 116, 118, 120, 122, 124, 126, 128,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130,
++		},
++		{	/* Fourth byte table 115. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  21,  23,  25,  27,  29,  31,
++			33,  35,  37,  39,  41,  43,  45,  47,
++			49,  51,  53,  55,  57,  59,  61,  63,
++			65,  67,  69,  71,  73,  75,  77,  79,
++			81,  83,  86,  88,  90,  92,  94,  96,
++			98,  100, 102, 104, 106, 108, 110, 112,
++			114, 116, 118, 120, 122, 124, 126, 128,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130, 130, 130, 130, 130, 130, 130, 130,
++			130,
++		},
++		{	/* Fourth byte table 116. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   9,   11,  13,  15,
++			17,  19,  21,  23,  25,  25,  25,  26,
++			27,  28,  29,  30,  31,  32,  33,  34,
++			35,  36,  37,  38,  39,  40,  41,  42,
++			43,  44,  45,  46,  47,  48,  49,  50,
++			51,  52,  53,  54,  55,  56,  57,  58,
++			59,  60,  61,  62,  63,  64,  65,  66,
++			67,  68,  69,  70,  71,  72,  73,  74,
++			75,  75,  75,  75,  75,  75,  75,  75,
++			75,  75,  75,  75,  75,  75,  75,  75,
++			75,  75,  75,  75,  75,  75,  75,  75,
++			75,  75,  75,  75,  75,  75,  75,  75,
++			75,  75,  75,  75,  75,  75,  75,  75,
++			75,  75,  75,  75,  75,  75,  75,  75,
++			75,  75,  75,  75,  75,  75,  75,  75,
++			75,  75,  75,  75,  75,  75,  75,  75,
++			75,
++		},
++		{	/* Fourth byte table 117. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   9,   13,  17,  21,  25,  29,
++			33,  37,  42,  46,  50,  54,  58,  62,
++			66,  71,  75,  80,  85,  90,  94,  98,
++			102, 106, 110, 114, 118, 122, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127, 127, 127, 127, 127, 127, 127, 127,
++			127,
++		},
++	},
++};
++
++static const uint16_t u8_decomp_b4_16bit_tbl[2][30][257] = {
++	{
++		{	/* Fourth byte 16-bit table 0. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   38,   44,   48,   52,   56,   60,   64,
++			68,   72,   76,   80,   84,   90,   96,   102,
++			108,  112,  116,  120,  124,  130,  136,  140,
++			144,  148,  152,  156,  160,  164,  168,  172,
++			176,  180,  184,  188,  192,  196,  200,  206,
++			212,  216,  220,  224,  228,  232,  236,  240,
++			244,  250,  256,  260,  264,  268,  272,  276,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,
++		},
++		{	/* Fourth byte 16-bit table 1. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   54,   60,   66,
++			72,   78,   84,   90,   96,   100,  104,  108,
++			112,  116,  120,  124,  128,  134,  140,  144,
++			148,  152,  156,  160,  164,  170,  176,  182,
++			188,  194,  200,  204,  208,  212,  216,  220,
++			224,  228,  232,  236,  240,  244,  248,  252,
++			256,  262,  268,  274,  280,  284,  288,  292,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,
++		},
++		{	/* Fourth byte 16-bit table 2. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  107,  116,  116,  116,  116,
++			116,  120,  124,  128,  132,  138,  144,  150,
++			156,  162,  168,  174,  180,  186,  192,  198,
++			204,  210,  216,  222,  228,  234,  240,  246,
++			252,  256,  260,  264,  268,  272,  276,  282,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,
++		},
++		{	/* Fourth byte 16-bit table 3. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    6,    12,   18,   24,   30,   36,   42,
++			48,   52,   56,   60,   64,   68,   72,   76,
++			80,   86,   92,   98,   104,  110,  116,  122,
++			128,  134,  140,  146,  152,  158,  164,  170,
++			176,  182,  188,  194,  200,  204,  208,  212,
++			216,  222,  228,  234,  240,  246,  252,  258,
++			264,  270,  276,  280,  284,  288,  292,  296,
++			300,  304,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,
++		},
++		{	/* Fourth byte 16-bit table 4. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    5,    10,   17,   24,   31,   38,   45,
++			52,   57,   62,   69,   76,   83,   90,   97,
++			104,  109,  114,  121,  128,  135,  142,  142,
++			142,  147,  152,  159,  166,  173,  180,  180,
++			180,  185,  190,  197,  204,  211,  218,  225,
++			232,  237,  242,  249,  256,  263,  270,  277,
++			284,  289,  294,  301,  308,  315,  322,  329,
++			336,  341,  346,  353,  360,  367,  374,  381,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,
++		},
++		{	/* Fourth byte 16-bit table 5. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    5,    10,   17,   24,   31,   38,   38,
++			38,   43,   48,   55,   62,   69,   76,   76,
++			76,   81,   86,   93,   100,  107,  114,  121,
++			128,  128,  133,  133,  140,  140,  147,  147,
++			154,  159,  164,  171,  178,  185,  192,  199,
++			206,  211,  216,  223,  230,  237,  244,  251,
++			258,  263,  268,  273,  278,  283,  288,  293,
++			298,  303,  308,  313,  318,  323,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,
++		},
++		{	/* Fourth byte 16-bit table 6. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    7,    14,   23,   32,   41,   50,   59,
++			68,   75,   82,   91,   100,  109,  118,  127,
++			136,  143,  150,  159,  168,  177,  186,  195,
++			204,  211,  218,  227,  236,  245,  254,  263,
++			272,  279,  286,  295,  304,  313,  322,  331,
++			340,  347,  354,  363,  372,  381,  390,  399,
++			408,  413,  418,  425,  430,  437,  437,  442,
++			449,  454,  459,  464,  469,  474,  477,  480,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,
++		},
++		{	/* Fourth byte 16-bit table 7. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    3,    14,   21,   26,   33,   33,   38,
++			45,   50,   55,   60,   65,   70,   82,   94,
++			106,  111,  116,  123,  130,  130,  130,  135,
++			142,  147,  152,  157,  162,  162,  174,  186,
++			198,  203,  208,  215,  222,  227,  232,  237,
++			244,  249,  254,  259,  264,  269,  280,  291,
++			293,  293,  293,  300,  305,  312,  312,  317,
++			324,  329,  334,  339,  344,  349,  356,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,
++		},
++		{	/* Fourth byte 16-bit table 8. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    5,    10,   15,   20,   25,   30,   35,
++			40,   45,   50,   55,   60,   65,   70,   78,
++			86,   94,   102,  110,  118,  126,  134,  142,
++			150,  158,  166,  174,  182,  190,  190,  190,
++			190,  195,  200,  205,  210,  215,  220,  225,
++			230,  235,  240,  245,  250,  255,  260,  265,
++			270,  275,  280,  285,  290,  295,  300,  305,
++			310,  315,  320,  325,  330,  335,  340,  345,
++			350,  350,  350,  350,  350,  350,  350,  350,
++			350,  350,  350,  350,  350,  350,  350,  350,
++			350,  350,  350,  350,  350,  350,  350,  350,
++			350,  350,  350,  350,  350,  350,  350,  350,
++			350,  350,  350,  350,  350,  350,  350,  350,
++			350,  350,  350,  350,  350,  350,  350,  350,
++			350,  350,  350,  350,  350,  350,  350,  350,
++			350,  350,  350,  350,  350,  350,  350,  350,
++			350,
++		},
++		{	/* Fourth byte 16-bit table 9. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    15,   27,   42,   51,   66,   75,   84,
++			102,  114,  123,  132,  141,  153,  165,  177,
++			189,  201,  213,  225,  243,  249,  267,  285,
++			300,  312,  330,  348,  360,  369,  378,  390,
++			402,  417,  432,  441,  450,  462,  471,  480,
++			486,  492,  501,  510,  528,  540,  555,  573,
++			585,  594,  603,  621,  633,  651,  660,  675,
++			684,  696,  705,  717,  732,  744,  759,  771,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,
++		},
++		{	/* Fourth byte 16-bit table 10. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    15,   24,   33,   45,   54,   63,   72,
++			87,   99,   105,  123,  132,  147,  159,  171,
++			180,  189,  201,  207,  219,  234,  240,  258,
++			267,  271,  275,  279,  283,  287,  291,  295,
++			299,  303,  307,  312,  317,  322,  327,  332,
++			337,  342,  347,  352,  357,  362,  367,  372,
++			377,  382,  385,  387,  389,  392,  394,  396,
++			396,  396,  396,  396,  402,  408,  414,  420,
++			432,  432,  432,  432,  432,  432,  432,  432,
++			432,  432,  432,  432,  432,  432,  432,  432,
++			432,  432,  432,  432,  432,  432,  432,  432,
++			432,  432,  432,  432,  432,  432,  432,  432,
++			432,  432,  432,  432,  432,  432,  432,  432,
++			432,  432,  432,  432,  432,  432,  432,  432,
++			432,  432,  432,  432,  432,  432,  432,  432,
++			432,  432,  432,  432,  432,  432,  432,  432,
++			432,
++		},
++		{	/* Fourth byte 16-bit table 11. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  108,  112,  116,  120,  124,
++			128,  132,  136,  140,  144,  148,  152,  156,
++			160,  164,  168,  172,  176,  180,  184,  188,
++			192,  196,  200,  204,  208,  212,  216,  220,
++			224,  228,  232,  236,  240,  244,  248,  252,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,
++		},
++		{	/* Fourth byte 16-bit table 12. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  108,  112,  116,  120,  124,
++			128,  132,  136,  140,  144,  148,  152,  156,
++			160,  164,  168,  172,  176,  180,  184,  188,
++			192,  196,  200,  204,  208,  212,  216,  220,
++			224,  228,  232,  236,  240,  244,  248,  252,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,
++		},
++		{	/* Fourth byte 16-bit table 13. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  108,  112,  116,  120,  124,
++			128,  132,  136,  140,  144,  148,  152,  156,
++			160,  164,  168,  172,  176,  180,  184,  188,
++			192,  196,  200,  204,  208,  212,  216,  220,
++			224,  228,  232,  236,  240,  244,  248,  252,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,
++		},
++		{	/* Fourth byte 16-bit table 14. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  108,  112,  116,  120,  124,
++			128,  132,  136,  140,  144,  148,  152,  156,
++			160,  164,  168,  172,  176,  180,  184,  188,
++			192,  196,  200,  204,  208,  212,  216,  220,
++			224,  228,  232,  236,  240,  244,  248,  252,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,
++		},
++		{	/* Fourth byte 16-bit table 15. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    6,    12,   18,   24,   30,   34,   38,
++			42,   46,   50,   54,   58,   62,   66,   70,
++			74,   78,   82,   86,   90,   94,   98,   102,
++			106,  110,  114,  118,  122,  126,  130,  134,
++			138,  142,  146,  150,  154,  158,  162,  166,
++			170,  174,  178,  182,  186,  190,  194,  198,
++			202,  206,  210,  214,  218,  222,  226,  230,
++			234,  238,  242,  246,  250,  254,  258,  262,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,
++		},
++		{	/* Fourth byte 16-bit table 16. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  108,  112,  116,  120,  125,
++			130,  135,  140,  145,  150,  156,  162,  168,
++			174,  180,  186,  190,  194,  198,  202,  206,
++			210,  214,  218,  222,  226,  230,  234,  238,
++			242,  246,  250,  254,  258,  262,  266,  270,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,
++		},
++		{	/* Fourth byte 16-bit table 17. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			98,   104,  110,  116,  122,  126,  130,  134,
++			138,  142,  146,  150,  154,  158,  162,  166,
++			170,  174,  178,  182,  186,  190,  194,  198,
++			202,  206,  210,  214,  218,  222,  226,  230,
++			234,  238,  242,  246,  250,  254,  258,  262,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,
++		},
++		{	/* Fourth byte 16-bit table 18. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  108,  112,  116,  120,  124,
++			130,  136,  140,  144,  148,  152,  156,  160,
++			164,  168,  172,  176,  180,  184,  188,  192,
++			196,  200,  204,  210,  216,  222,  226,  230,
++			234,  238,  242,  246,  250,  254,  258,  262,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,
++		},
++		{	/* Fourth byte 16-bit table 19. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    6,    12,   18,   24,   30,   36,   42,
++			48,   54,   60,   66,   72,   78,   84,   90,
++			96,   102,  108,  114,  120,  126,  132,  138,
++			144,  150,  156,  162,  168,  174,  180,  186,
++			192,  198,  204,  210,  216,  222,  228,  234,
++			240,  246,  252,  258,  264,  270,  276,  282,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,
++		},
++		{	/* Fourth byte 16-bit table 20. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    6,    12,   18,   24,   30,   36,   42,
++			48,   54,   60,   66,   72,   78,   84,   90,
++			96,   96,   96,   102,  108,  114,  120,  126,
++			132,  138,  144,  150,  156,  162,  168,  174,
++			180,  186,  192,  198,  204,  210,  216,  222,
++			228,  234,  240,  246,  252,  258,  264,  270,
++			276,  282,  288,  294,  300,  306,  312,  318,
++			324,  330,  336,  342,  348,  354,  360,  366,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,
++		},
++		{	/* Fourth byte 16-bit table 21. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   17,   21,   25,   29,
++			33,   37,   41,   45,   49,   53,   58,   62,
++			66,   70,   74,   79,   83,   87,   91,   96,
++			100,  104,  108,  112,  116,  121,  125,  129,
++			133,  137,  141,  145,  149,  153,  157,  161,
++			165,  169,  173,  177,  181,  185,  189,  193,
++			197,  201,  205,  209,  213,  218,  222,  226,
++			230,  235,  239,  243,  247,  251,  255,  259,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,
++		},
++		{	/* Fourth byte 16-bit table 22. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  105,  109,  113,  117,  121,  125,
++			129,  134,  139,  143,  147,  151,  155,  159,
++			163,  167,  171,  175,  179,  184,  188,  192,
++			196,  200,  205,  209,  213,  217,  221,  225,
++			229,  233,  237,  241,  246,  250,  255,  259,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,
++		},
++		{	/* Fourth byte 16-bit table 23. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   41,   45,   49,   53,   57,   61,
++			66,   70,   75,   80,   84,   88,   92,   96,
++			101,  106,  110,  114,  118,  122,  126,  130,
++			134,  138,  142,  146,  150,  155,  159,  163,
++			167,  171,  175,  179,  183,  187,  191,  195,
++			199,  203,  207,  211,  215,  219,  223,  227,
++			231,  236,  240,  244,  248,  252,  256,  261,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,
++		},
++		{	/* Fourth byte 16-bit table 24. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   45,   49,   53,   57,   61,
++			65,   69,   73,   77,   81,   85,   89,   93,
++			97,   101,  105,  109,  113,  117,  122,  126,
++			130,  134,  138,  142,  147,  151,  155,  159,
++			163,  167,  171,  175,  179,  184,  188,  192,
++			196,  201,  205,  209,  213,  217,  221,  225,
++			230,  235,  240,  244,  249,  253,  257,  261,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,
++		},
++		{	/* Fourth byte 16-bit table 25. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   29,
++			33,   37,   41,   45,   49,   53,   58,   62,
++			66,   71,   76,   80,   84,   88,   92,   96,
++			100,  104,  108,  112,  117,  121,  126,  130,
++			135,  139,  143,  147,  152,  156,  160,  165,
++			170,  174,  178,  182,  186,  190,  194,  198,
++			202,  206,  210,  214,  218,  222,  227,  231,
++			236,  240,  245,  249,  254,  259,  264,  268,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,
++		},
++		{	/* Fourth byte 16-bit table 26. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    9,    14,   19,   24,   28,   32,
++			36,   40,   44,   48,   52,   56,   61,   65,
++			69,   73,   77,   82,   86,   91,   96,   100,
++			104,  108,  112,  116,  120,  125,  130,  135,
++			139,  143,  148,  152,  156,  160,  165,  169,
++			173,  177,  181,  185,  190,  194,  198,  202,
++			206,  210,  214,  219,  224,  228,  233,  237,
++			242,  246,  250,  254,  259,  264,  268,  273,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,
++		},
++		{	/* Fourth byte 16-bit table 27. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    5,    9,    13,   17,   21,   25,   29,
++			34,   39,   44,   49,   53,   57,   61,   65,
++			69,   73,   77,   81,   85,   89,   93,   97,
++			102,  106,  110,  114,  118,  122,  126,  130,
++			134,  138,  142,  146,  150,  155,  160,  165,
++			169,  173,  177,  181,  186,  190,  195,  199,
++			203,  208,  213,  217,  221,  225,  229,  233,
++			237,  241,  245,  249,  253,  257,  261,  265,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,
++		},
++		{	/* Fourth byte 16-bit table 28. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   25,   29,
++			33,   37,   41,   45,   50,   55,   59,   63,
++			67,   71,   75,   79,   84,   88,   92,   96,
++			100,  105,  110,  114,  118,  122,  127,  131,
++			135,  140,  145,  149,  153,  157,  162,  166,
++			170,  174,  178,  182,  186,  190,  195,  199,
++			203,  207,  212,  216,  220,  224,  228,  233,
++			238,  242,  246,  250,  255,  259,  264,  268,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,
++		},
++		{	/* Fourth byte 16-bit table 29. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,
++		},
++	},
++	{
++		{	/* Fourth byte 16-bit table 0. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   38,   44,   48,   52,   56,   60,   64,
++			68,   72,   76,   80,   84,   90,   96,   102,
++			108,  112,  116,  120,  124,  130,  136,  140,
++			144,  148,  152,  156,  160,  164,  168,  172,
++			176,  180,  184,  188,  192,  196,  200,  206,
++			212,  216,  220,  224,  228,  232,  236,  240,
++			244,  250,  256,  260,  264,  268,  272,  276,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,  280,  280,  280,  280,  280,  280,  280,
++			280,
++		},
++		{	/* Fourth byte 16-bit table 1. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   54,   60,   66,
++			72,   78,   84,   90,   96,   100,  104,  108,
++			112,  116,  120,  124,  128,  134,  140,  144,
++			148,  152,  156,  160,  164,  170,  176,  182,
++			188,  194,  200,  204,  208,  212,  216,  220,
++			224,  228,  232,  236,  240,  244,  248,  252,
++			256,  262,  268,  274,  280,  284,  288,  292,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,  296,  296,  296,  296,  296,  296,  296,
++			296,
++		},
++		{	/* Fourth byte 16-bit table 2. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  107,  116,  116,  116,  116,
++			116,  120,  124,  128,  132,  138,  144,  150,
++			156,  162,  168,  174,  180,  186,  192,  198,
++			204,  210,  216,  222,  228,  234,  240,  246,
++			252,  256,  260,  264,  268,  272,  276,  282,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,
++		},
++		{	/* Fourth byte 16-bit table 3. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    6,    12,   18,   24,   30,   36,   42,
++			48,   52,   56,   60,   64,   68,   72,   76,
++			80,   86,   92,   98,   104,  110,  116,  122,
++			128,  134,  140,  146,  152,  158,  164,  170,
++			176,  182,  188,  194,  200,  204,  208,  212,
++			216,  222,  228,  234,  240,  246,  252,  258,
++			264,  270,  276,  280,  284,  288,  292,  296,
++			300,  304,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,  308,  308,  308,  308,  308,  308,  308,
++			308,
++		},
++		{	/* Fourth byte 16-bit table 4. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    5,    10,   17,   24,   31,   38,   45,
++			52,   57,   62,   69,   76,   83,   90,   97,
++			104,  109,  114,  121,  128,  135,  142,  142,
++			142,  147,  152,  159,  166,  173,  180,  180,
++			180,  185,  190,  197,  204,  211,  218,  225,
++			232,  237,  242,  249,  256,  263,  270,  277,
++			284,  289,  294,  301,  308,  315,  322,  329,
++			336,  341,  346,  353,  360,  367,  374,  381,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,  388,  388,  388,  388,  388,  388,  388,
++			388,
++		},
++		{	/* Fourth byte 16-bit table 5. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    5,    10,   17,   24,   31,   38,   38,
++			38,   43,   48,   55,   62,   69,   76,   76,
++			76,   81,   86,   93,   100,  107,  114,  121,
++			128,  128,  133,  133,  140,  140,  147,  147,
++			154,  159,  164,  171,  178,  185,  192,  199,
++			206,  211,  216,  223,  230,  237,  244,  251,
++			258,  263,  268,  273,  278,  283,  288,  293,
++			298,  303,  308,  313,  318,  323,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,  328,  328,  328,  328,  328,  328,  328,
++			328,
++		},
++		{	/* Fourth byte 16-bit table 6. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    7,    14,   23,   32,   41,   50,   59,
++			68,   75,   82,   91,   100,  109,  118,  127,
++			136,  143,  150,  159,  168,  177,  186,  195,
++			204,  211,  218,  227,  236,  245,  254,  263,
++			272,  279,  286,  295,  304,  313,  322,  331,
++			340,  347,  354,  363,  372,  381,  390,  399,
++			408,  413,  418,  425,  430,  437,  437,  442,
++			449,  454,  459,  464,  469,  474,  477,  480,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,  483,  483,  483,  483,  483,  483,  483,
++			483,
++		},
++		{	/* Fourth byte 16-bit table 7. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    3,    14,   21,   26,   33,   33,   38,
++			45,   50,   55,   60,   65,   70,   82,   94,
++			106,  111,  116,  123,  130,  130,  130,  135,
++			142,  147,  152,  157,  162,  162,  174,  186,
++			198,  203,  208,  215,  222,  227,  232,  237,
++			244,  249,  254,  259,  264,  269,  280,  291,
++			293,  293,  293,  300,  305,  312,  312,  317,
++			324,  329,  334,  339,  344,  349,  356,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,  359,  359,  359,  359,  359,  359,  359,
++			359,
++		},
++		{	/* Fourth byte 16-bit table 8. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    5,    10,   15,   20,   25,   30,   35,
++			40,   45,   50,   55,   60,   65,   70,   78,
++			86,   94,   102,  110,  118,  126,  134,  142,
++			150,  158,  166,  174,  182,  190,  207,  221,
++			221,  226,  231,  236,  241,  246,  251,  256,
++			261,  266,  271,  276,  281,  286,  291,  296,
++			301,  306,  311,  316,  321,  326,  331,  336,
++			341,  346,  351,  356,  361,  366,  371,  376,
++			381,  381,  381,  381,  381,  381,  381,  381,
++			381,  381,  381,  381,  381,  381,  381,  381,
++			381,  381,  381,  381,  381,  381,  381,  381,
++			381,  381,  381,  381,  381,  381,  381,  381,
++			381,  381,  381,  381,  381,  381,  381,  381,
++			381,  381,  381,  381,  381,  381,  381,  381,
++			381,  381,  381,  381,  381,  381,  381,  381,
++			381,  381,  381,  381,  381,  381,  381,  381,
++			381,
++		},
++		{	/* Fourth byte 16-bit table 9. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    15,   27,   42,   51,   66,   75,   84,
++			102,  114,  123,  132,  141,  153,  165,  177,
++			189,  201,  213,  225,  243,  249,  267,  285,
++			300,  312,  330,  348,  360,  369,  378,  390,
++			402,  417,  432,  441,  450,  462,  471,  480,
++			486,  492,  501,  510,  528,  540,  555,  573,
++			585,  594,  603,  621,  633,  651,  660,  675,
++			684,  696,  705,  717,  732,  744,  759,  771,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,  777,  777,  777,  777,  777,  777,  777,
++			777,
++		},
++		{	/* Fourth byte 16-bit table 10. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    15,   24,   33,   45,   54,   63,   72,
++			87,   99,   105,  123,  132,  147,  159,  171,
++			180,  189,  201,  207,  219,  234,  240,  258,
++			267,  271,  275,  279,  283,  287,  291,  295,
++			299,  303,  307,  312,  317,  322,  327,  332,
++			337,  342,  347,  352,  357,  362,  367,  372,
++			377,  382,  385,  387,  389,  392,  394,  396,
++			398,  401,  404,  406,  412,  418,  424,  430,
++			442,  442,  442,  442,  442,  442,  442,  442,
++			442,  442,  442,  442,  442,  442,  442,  442,
++			442,  442,  442,  442,  442,  442,  442,  442,
++			442,  442,  442,  442,  442,  442,  442,  442,
++			442,  442,  442,  442,  442,  442,  442,  442,
++			442,  442,  442,  442,  442,  442,  442,  442,
++			442,  442,  442,  442,  442,  442,  442,  442,
++			442,  442,  442,  442,  442,  442,  442,  442,
++			442,
++		},
++		{	/* Fourth byte 16-bit table 11. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  108,  112,  116,  120,  124,
++			128,  132,  136,  140,  144,  148,  152,  156,
++			160,  164,  168,  172,  176,  180,  184,  188,
++			192,  196,  200,  204,  208,  212,  216,  220,
++			224,  228,  232,  236,  240,  244,  248,  252,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,
++		},
++		{	/* Fourth byte 16-bit table 12. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  108,  112,  116,  120,  124,
++			128,  132,  136,  140,  144,  148,  152,  156,
++			160,  164,  168,  172,  176,  180,  184,  188,
++			192,  196,  200,  204,  208,  212,  216,  220,
++			224,  228,  232,  236,  240,  244,  248,  252,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,
++		},
++		{	/* Fourth byte 16-bit table 13. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  108,  112,  116,  120,  124,
++			128,  132,  136,  140,  144,  148,  152,  156,
++			160,  164,  168,  172,  176,  180,  184,  188,
++			192,  196,  200,  204,  208,  212,  216,  220,
++			224,  228,  232,  236,  240,  244,  248,  252,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,
++		},
++		{	/* Fourth byte 16-bit table 14. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  108,  112,  116,  120,  124,
++			128,  132,  136,  140,  144,  148,  152,  156,
++			160,  164,  168,  172,  176,  180,  184,  188,
++			192,  196,  200,  204,  208,  212,  216,  220,
++			224,  228,  232,  236,  240,  244,  248,  252,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,
++		},
++		{	/* Fourth byte 16-bit table 15. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  108,  112,  116,  120,  124,
++			128,  132,  136,  140,  144,  148,  152,  156,
++			160,  164,  168,  172,  176,  180,  184,  188,
++			192,  196,  200,  204,  208,  212,  216,  220,
++			224,  228,  232,  236,  240,  244,  248,  252,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,  256,  256,  256,  256,  256,  256,  256,
++			256,
++		},
++		{	/* Fourth byte 16-bit table 16. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    6,    12,   18,   24,   30,   34,   38,
++			42,   46,   50,   54,   58,   62,   66,   70,
++			74,   78,   82,   86,   90,   94,   98,   102,
++			106,  110,  114,  118,  122,  126,  130,  134,
++			138,  142,  146,  150,  154,  158,  162,  166,
++			170,  174,  178,  182,  186,  190,  194,  198,
++			202,  206,  210,  214,  218,  222,  226,  230,
++			234,  238,  242,  246,  250,  254,  258,  262,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,
++		},
++		{	/* Fourth byte 16-bit table 17. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  108,  112,  116,  120,  125,
++			130,  135,  140,  145,  150,  156,  162,  168,
++			174,  180,  186,  190,  194,  198,  202,  206,
++			210,  214,  218,  222,  226,  230,  234,  238,
++			242,  246,  250,  254,  258,  262,  266,  270,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,  274,  274,  274,  274,  274,  274,  274,
++			274,
++		},
++		{	/* Fourth byte 16-bit table 18. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			98,   104,  110,  116,  122,  126,  130,  134,
++			138,  142,  146,  150,  154,  158,  162,  166,
++			170,  174,  178,  182,  186,  190,  194,  198,
++			202,  206,  210,  214,  218,  222,  226,  230,
++			234,  238,  242,  246,  250,  254,  258,  262,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,
++		},
++		{	/* Fourth byte 16-bit table 19. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  104,  108,  112,  116,  120,  124,
++			130,  136,  140,  144,  148,  152,  156,  160,
++			164,  168,  172,  176,  180,  184,  188,  192,
++			196,  200,  204,  210,  216,  222,  226,  230,
++			234,  238,  242,  246,  250,  254,  258,  262,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,  266,  266,  266,  266,  266,  266,  266,
++			266,
++		},
++		{	/* Fourth byte 16-bit table 20. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    6,    12,   18,   24,   30,   36,   42,
++			48,   54,   60,   66,   72,   78,   84,   90,
++			96,   102,  108,  114,  120,  126,  132,  138,
++			144,  150,  156,  162,  168,  174,  180,  186,
++			192,  198,  204,  210,  216,  222,  228,  234,
++			240,  246,  252,  258,  264,  270,  276,  282,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,  288,  288,  288,  288,  288,  288,  288,
++			288,
++		},
++		{	/* Fourth byte 16-bit table 21. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    6,    12,   18,   24,   30,   36,   42,
++			48,   54,   60,   66,   72,   78,   84,   90,
++			96,   96,   96,   102,  108,  114,  120,  126,
++			132,  138,  144,  150,  156,  162,  168,  174,
++			180,  186,  192,  198,  204,  210,  216,  222,
++			228,  234,  240,  246,  252,  258,  264,  270,
++			276,  282,  288,  294,  300,  306,  312,  318,
++			324,  330,  336,  342,  348,  354,  360,  366,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,  372,  372,  372,  372,  372,  372,  372,
++			372,
++		},
++		{	/* Fourth byte 16-bit table 22. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   17,   21,   25,   29,
++			33,   37,   41,   45,   49,   53,   58,   62,
++			66,   70,   74,   79,   83,   87,   91,   96,
++			100,  104,  108,  112,  116,  121,  125,  129,
++			133,  137,  141,  145,  149,  153,  157,  161,
++			165,  169,  173,  177,  181,  185,  189,  193,
++			197,  201,  205,  209,  213,  218,  222,  226,
++			230,  235,  239,  243,  247,  251,  255,  259,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,
++		},
++		{	/* Fourth byte 16-bit table 23. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   44,   48,   52,   56,   60,
++			64,   68,   72,   76,   80,   84,   88,   92,
++			96,   100,  105,  109,  113,  117,  121,  125,
++			129,  134,  139,  143,  147,  151,  155,  159,
++			163,  167,  171,  175,  179,  184,  188,  192,
++			196,  200,  205,  209,  213,  217,  221,  225,
++			229,  233,  237,  241,  246,  250,  255,  259,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,  263,  263,  263,  263,  263,  263,  263,
++			263,
++		},
++		{	/* Fourth byte 16-bit table 24. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   41,   45,   49,   53,   57,   61,
++			66,   70,   75,   80,   84,   88,   92,   96,
++			101,  106,  110,  114,  118,  122,  126,  130,
++			134,  138,  142,  146,  150,  155,  159,  163,
++			167,  171,  175,  179,  183,  187,  191,  195,
++			199,  203,  207,  211,  215,  219,  223,  227,
++			231,  236,  240,  244,  248,  252,  256,  261,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,
++		},
++		{	/* Fourth byte 16-bit table 25. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   28,
++			32,   36,   40,   45,   49,   53,   57,   61,
++			65,   69,   73,   77,   81,   85,   89,   93,
++			97,   101,  105,  109,  113,  117,  122,  126,
++			130,  134,  138,  142,  147,  151,  155,  159,
++			163,  167,  171,  175,  179,  184,  188,  192,
++			196,  201,  205,  209,  213,  217,  221,  225,
++			230,  235,  240,  244,  249,  253,  257,  261,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,  265,  265,  265,  265,  265,  265,  265,
++			265,
++		},
++		{	/* Fourth byte 16-bit table 26. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   24,   29,
++			33,   37,   41,   45,   49,   53,   58,   62,
++			66,   71,   76,   80,   84,   88,   92,   96,
++			100,  104,  108,  112,  117,  121,  126,  130,
++			135,  139,  143,  147,  152,  156,  160,  165,
++			170,  174,  178,  182,  186,  190,  194,  198,
++			202,  206,  210,  214,  218,  222,  227,  231,
++			236,  240,  245,  249,  254,  259,  264,  268,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,
++		},
++		{	/* Fourth byte 16-bit table 27. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    9,    14,   19,   24,   28,   32,
++			36,   40,   44,   48,   52,   56,   61,   65,
++			69,   73,   77,   82,   86,   91,   96,   100,
++			104,  108,  112,  116,  120,  125,  130,  135,
++			139,  143,  148,  152,  156,  160,  165,  169,
++			173,  177,  181,  185,  190,  194,  198,  202,
++			206,  210,  214,  219,  224,  228,  233,  237,
++			242,  246,  250,  254,  259,  264,  268,  273,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,  277,  277,  277,  277,  277,  277,  277,
++			277,
++		},
++		{	/* Fourth byte 16-bit table 28. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    5,    9,    13,   17,   21,   25,   29,
++			34,   39,   44,   49,   53,   57,   61,   65,
++			69,   73,   77,   81,   85,   89,   93,   97,
++			102,  106,  110,  114,  118,  122,  126,  130,
++			134,  138,  142,  146,  150,  155,  160,  165,
++			169,  173,  177,  181,  186,  190,  195,  199,
++			203,  208,  213,  217,  221,  225,  229,  233,
++			237,  241,  245,  249,  253,  257,  261,  265,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,  269,  269,  269,  269,  269,  269,  269,
++			269,
++		},
++		{	/* Fourth byte 16-bit table 29. */
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    0,    0,    0,    0,    0,    0,    0,
++			0,    4,    8,    12,   16,   20,   25,   29,
++			33,   37,   41,   45,   50,   55,   59,   63,
++			67,   71,   75,   79,   84,   88,   92,   96,
++			100,  105,  110,  114,  118,  122,  127,  131,
++			135,  140,  145,  149,  153,  157,  162,  166,
++			170,  174,  178,  182,  186,  190,  195,  199,
++			203,  207,  212,  216,  220,  224,  228,  233,
++			238,  242,  246,  250,  255,  259,  264,  268,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,  272,  272,  272,  272,  272,  272,  272,
++			272,
++		},
++	},
++};
++
++static const uchar_t u8_decomp_final_tbl[2][19370] = {
++	{
++		0x20, 0x20, 0xCC, 0x88, 0x61, 0x20, 0xCC, 0x84,
++		0x32, 0x33, 0x20, 0xCC, 0x81, 0xCE, 0xBC, 0x20,
++		0xCC, 0xA7, 0x31, 0x6F, 0x31, 0xE2, 0x81, 0x84,
++		0x34, 0x31, 0xE2, 0x81, 0x84, 0x32, 0x33, 0xE2,
++		0x81, 0x84, 0x34, 0xF6, 0x41, 0xCC, 0x80, 0xF6,
++		0x41, 0xCC, 0x81, 0xF6, 0x41, 0xCC, 0x82, 0xF6,
++		0x41, 0xCC, 0x83, 0xF6, 0x41, 0xCC, 0x88, 0xF6,
++		0x41, 0xCC, 0x8A, 0xF6, 0x43, 0xCC, 0xA7, 0xF6,
++		0x45, 0xCC, 0x80, 0xF6, 0x45, 0xCC, 0x81, 0xF6,
++		0x45, 0xCC, 0x82, 0xF6, 0x45, 0xCC, 0x88, 0xF6,
++		0x49, 0xCC, 0x80, 0xF6, 0x49, 0xCC, 0x81, 0xF6,
++		0x49, 0xCC, 0x82, 0xF6, 0x49, 0xCC, 0x88, 0xF6,
++		0x4E, 0xCC, 0x83, 0xF6, 0x4F, 0xCC, 0x80, 0xF6,
++		0x4F, 0xCC, 0x81, 0xF6, 0x4F, 0xCC, 0x82, 0xF6,
++		0x4F, 0xCC, 0x83, 0xF6, 0x4F, 0xCC, 0x88, 0xF6,
++		0x55, 0xCC, 0x80, 0xF6, 0x55, 0xCC, 0x81, 0xF6,
++		0x55, 0xCC, 0x82, 0xF6, 0x55, 0xCC, 0x88, 0xF6,
++		0x59, 0xCC, 0x81, 0xF6, 0x61, 0xCC, 0x80, 0xF6,
++		0x61, 0xCC, 0x81, 0xF6, 0x61, 0xCC, 0x82, 0xF6,
++		0x61, 0xCC, 0x83, 0xF6, 0x61, 0xCC, 0x88, 0xF6,
++		0x61, 0xCC, 0x8A, 0xF6, 0x63, 0xCC, 0xA7, 0xF6,
++		0x65, 0xCC, 0x80, 0xF6, 0x65, 0xCC, 0x81, 0xF6,
++		0x65, 0xCC, 0x82, 0xF6, 0x65, 0xCC, 0x88, 0xF6,
++		0x69, 0xCC, 0x80, 0xF6, 0x69, 0xCC, 0x81, 0xF6,
++		0x69, 0xCC, 0x82, 0xF6, 0x69, 0xCC, 0x88, 0xF6,
++		0x6E, 0xCC, 0x83, 0xF6, 0x6F, 0xCC, 0x80, 0xF6,
++		0x6F, 0xCC, 0x81, 0xF6, 0x6F, 0xCC, 0x82, 0xF6,
++		0x6F, 0xCC, 0x83, 0xF6, 0x6F, 0xCC, 0x88, 0xF6,
++		0x75, 0xCC, 0x80, 0xF6, 0x75, 0xCC, 0x81, 0xF6,
++		0x75, 0xCC, 0x82, 0xF6, 0x75, 0xCC, 0x88, 0xF6,
++		0x79, 0xCC, 0x81, 0xF6, 0x79, 0xCC, 0x88, 0xF6,
++		0x41, 0xCC, 0x84, 0xF6, 0x61, 0xCC, 0x84, 0xF6,
++		0x41, 0xCC, 0x86, 0xF6, 0x61, 0xCC, 0x86, 0xF6,
++		0x41, 0xCC, 0xA8, 0xF6, 0x61, 0xCC, 0xA8, 0xF6,
++		0x43, 0xCC, 0x81, 0xF6, 0x63, 0xCC, 0x81, 0xF6,
++		0x43, 0xCC, 0x82, 0xF6, 0x63, 0xCC, 0x82, 0xF6,
++		0x43, 0xCC, 0x87, 0xF6, 0x63, 0xCC, 0x87, 0xF6,
++		0x43, 0xCC, 0x8C, 0xF6, 0x63, 0xCC, 0x8C, 0xF6,
++		0x44, 0xCC, 0x8C, 0xF6, 0x64, 0xCC, 0x8C, 0xF6,
++		0x45, 0xCC, 0x84, 0xF6, 0x65, 0xCC, 0x84, 0xF6,
++		0x45, 0xCC, 0x86, 0xF6, 0x65, 0xCC, 0x86, 0xF6,
++		0x45, 0xCC, 0x87, 0xF6, 0x65, 0xCC, 0x87, 0xF6,
++		0x45, 0xCC, 0xA8, 0xF6, 0x65, 0xCC, 0xA8, 0xF6,
++		0x45, 0xCC, 0x8C, 0xF6, 0x65, 0xCC, 0x8C, 0xF6,
++		0x47, 0xCC, 0x82, 0xF6, 0x67, 0xCC, 0x82, 0xF6,
++		0x47, 0xCC, 0x86, 0xF6, 0x67, 0xCC, 0x86, 0xF6,
++		0x47, 0xCC, 0x87, 0xF6, 0x67, 0xCC, 0x87, 0xF6,
++		0x47, 0xCC, 0xA7, 0xF6, 0x67, 0xCC, 0xA7, 0xF6,
++		0x48, 0xCC, 0x82, 0xF6, 0x68, 0xCC, 0x82, 0xF6,
++		0x49, 0xCC, 0x83, 0xF6, 0x69, 0xCC, 0x83, 0xF6,
++		0x49, 0xCC, 0x84, 0xF6, 0x69, 0xCC, 0x84, 0xF6,
++		0x49, 0xCC, 0x86, 0xF6, 0x69, 0xCC, 0x86, 0xF6,
++		0x49, 0xCC, 0xA8, 0xF6, 0x69, 0xCC, 0xA8, 0xF6,
++		0x49, 0xCC, 0x87, 0x49, 0x4A, 0x69, 0x6A, 0xF6,
++		0x4A, 0xCC, 0x82, 0xF6, 0x6A, 0xCC, 0x82, 0xF6,
++		0x4B, 0xCC, 0xA7, 0xF6, 0x6B, 0xCC, 0xA7, 0xF6,
++		0x4C, 0xCC, 0x81, 0xF6, 0x6C, 0xCC, 0x81, 0xF6,
++		0x4C, 0xCC, 0xA7, 0xF6, 0x6C, 0xCC, 0xA7, 0xF6,
++		0x4C, 0xCC, 0x8C, 0xF6, 0x6C, 0xCC, 0x8C, 0x4C,
++		0xC2, 0xB7, 0x6C, 0xC2, 0xB7, 0xF6, 0x4E, 0xCC,
++		0x81, 0xF6, 0x6E, 0xCC, 0x81, 0xF6, 0x4E, 0xCC,
++		0xA7, 0xF6, 0x6E, 0xCC, 0xA7, 0xF6, 0x4E, 0xCC,
++		0x8C, 0xF6, 0x6E, 0xCC, 0x8C, 0xCA, 0xBC, 0x6E,
++		0xF6, 0x4F, 0xCC, 0x84, 0xF6, 0x6F, 0xCC, 0x84,
++		0xF6, 0x4F, 0xCC, 0x86, 0xF6, 0x6F, 0xCC, 0x86,
++		0xF6, 0x4F, 0xCC, 0x8B, 0xF6, 0x6F, 0xCC, 0x8B,
++		0xF6, 0x52, 0xCC, 0x81, 0xF6, 0x72, 0xCC, 0x81,
++		0xF6, 0x52, 0xCC, 0xA7, 0xF6, 0x72, 0xCC, 0xA7,
++		0xF6, 0x52, 0xCC, 0x8C, 0xF6, 0x72, 0xCC, 0x8C,
++		0xF6, 0x53, 0xCC, 0x81, 0xF6, 0x73, 0xCC, 0x81,
++		0xF6, 0x53, 0xCC, 0x82, 0xF6, 0x73, 0xCC, 0x82,
++		0xF6, 0x53, 0xCC, 0xA7, 0xF6, 0x73, 0xCC, 0xA7,
++		0xF6, 0x53, 0xCC, 0x8C, 0xF6, 0x73, 0xCC, 0x8C,
++		0xF6, 0x54, 0xCC, 0xA7, 0xF6, 0x74, 0xCC, 0xA7,
++		0xF6, 0x54, 0xCC, 0x8C, 0xF6, 0x74, 0xCC, 0x8C,
++		0xF6, 0x55, 0xCC, 0x83, 0xF6, 0x75, 0xCC, 0x83,
++		0xF6, 0x55, 0xCC, 0x84, 0xF6, 0x75, 0xCC, 0x84,
++		0xF6, 0x55, 0xCC, 0x86, 0xF6, 0x75, 0xCC, 0x86,
++		0xF6, 0x55, 0xCC, 0x8A, 0xF6, 0x75, 0xCC, 0x8A,
++		0xF6, 0x55, 0xCC, 0x8B, 0xF6, 0x75, 0xCC, 0x8B,
++		0xF6, 0x55, 0xCC, 0xA8, 0xF6, 0x75, 0xCC, 0xA8,
++		0xF6, 0x57, 0xCC, 0x82, 0xF6, 0x77, 0xCC, 0x82,
++		0xF6, 0x59, 0xCC, 0x82, 0xF6, 0x79, 0xCC, 0x82,
++		0xF6, 0x59, 0xCC, 0x88, 0xF6, 0x5A, 0xCC, 0x81,
++		0xF6, 0x7A, 0xCC, 0x81, 0xF6, 0x5A, 0xCC, 0x87,
++		0xF6, 0x7A, 0xCC, 0x87, 0xF6, 0x5A, 0xCC, 0x8C,
++		0xF6, 0x7A, 0xCC, 0x8C, 0x73, 0xF6, 0x4F, 0xCC,
++		0x9B, 0xF6, 0x6F, 0xCC, 0x9B, 0xF6, 0x55, 0xCC,
++		0x9B, 0xF6, 0x75, 0xCC, 0x9B, 0x44, 0x5A, 0xCC,
++		0x8C, 0x44, 0x7A, 0xCC, 0x8C, 0x64, 0x7A, 0xCC,
++		0x8C, 0x4C, 0x4A, 0x4C, 0x6A, 0x6C, 0x6A, 0x4E,
++		0x4A, 0x4E, 0x6A, 0x6E, 0x6A, 0xF6, 0x41, 0xCC,
++		0x8C, 0xF6, 0x61, 0xCC, 0x8C, 0xF6, 0x49, 0xCC,
++		0x8C, 0xF6, 0x69, 0xCC, 0x8C, 0xF6, 0x4F, 0xCC,
++		0x8C, 0xF6, 0x6F, 0xCC, 0x8C, 0xF6, 0x55, 0xCC,
++		0x8C, 0xF6, 0x75, 0xCC, 0x8C, 0xF6, 0x55, 0xCC,
++		0x88, 0xCC, 0x84, 0xF6, 0x75, 0xCC, 0x88, 0xCC,
++		0x84, 0xF6, 0x55, 0xCC, 0x88, 0xCC, 0x81, 0xF6,
++		0x75, 0xCC, 0x88, 0xCC, 0x81, 0xF6, 0x55, 0xCC,
++		0x88, 0xCC, 0x8C, 0xF6, 0x75, 0xCC, 0x88, 0xCC,
++		0x8C, 0xF6, 0x55, 0xCC, 0x88, 0xCC, 0x80, 0xF6,
++		0x75, 0xCC, 0x88, 0xCC, 0x80, 0xF6, 0x41, 0xCC,
++		0x88, 0xCC, 0x84, 0xF6, 0x61, 0xCC, 0x88, 0xCC,
++		0x84, 0xF6, 0x41, 0xCC, 0x87, 0xCC, 0x84, 0xF6,
++		0x61, 0xCC, 0x87, 0xCC, 0x84, 0xF6, 0xC3, 0x86,
++		0xCC, 0x84, 0xF6, 0xC3, 0xA6, 0xCC, 0x84, 0xF6,
++		0x47, 0xCC, 0x8C, 0xF6, 0x67, 0xCC, 0x8C, 0xF6,
++		0x4B, 0xCC, 0x8C, 0xF6, 0x6B, 0xCC, 0x8C, 0xF6,
++		0x4F, 0xCC, 0xA8, 0xF6, 0x6F, 0xCC, 0xA8, 0xF6,
++		0x4F, 0xCC, 0xA8, 0xCC, 0x84, 0xF6, 0x6F, 0xCC,
++		0xA8, 0xCC, 0x84, 0xF6, 0xC6, 0xB7, 0xCC, 0x8C,
++		0xF6, 0xCA, 0x92, 0xCC, 0x8C, 0xF6, 0x6A, 0xCC,
++		0x8C, 0x44, 0x5A, 0x44, 0x7A, 0x64, 0x7A, 0xF6,
++		0x47, 0xCC, 0x81, 0xF6, 0x67, 0xCC, 0x81, 0xF6,
++		0x4E, 0xCC, 0x80, 0xF6, 0x6E, 0xCC, 0x80, 0xF6,
++		0x41, 0xCC, 0x8A, 0xCC, 0x81, 0xF6, 0x61, 0xCC,
++		0x8A, 0xCC, 0x81, 0xF6, 0xC3, 0x86, 0xCC, 0x81,
++		0xF6, 0xC3, 0xA6, 0xCC, 0x81, 0xF6, 0xC3, 0x98,
++		0xCC, 0x81, 0xF6, 0xC3, 0xB8, 0xCC, 0x81, 0xF6,
++		0x41, 0xCC, 0x8F, 0xF6, 0x61, 0xCC, 0x8F, 0xF6,
++		0x41, 0xCC, 0x91, 0xF6, 0x61, 0xCC, 0x91, 0xF6,
++		0x45, 0xCC, 0x8F, 0xF6, 0x65, 0xCC, 0x8F, 0xF6,
++		0x45, 0xCC, 0x91, 0xF6, 0x65, 0xCC, 0x91, 0xF6,
++		0x49, 0xCC, 0x8F, 0xF6, 0x69, 0xCC, 0x8F, 0xF6,
++		0x49, 0xCC, 0x91, 0xF6, 0x69, 0xCC, 0x91, 0xF6,
++		0x4F, 0xCC, 0x8F, 0xF6, 0x6F, 0xCC, 0x8F, 0xF6,
++		0x4F, 0xCC, 0x91, 0xF6, 0x6F, 0xCC, 0x91, 0xF6,
++		0x52, 0xCC, 0x8F, 0xF6, 0x72, 0xCC, 0x8F, 0xF6,
++		0x52, 0xCC, 0x91, 0xF6, 0x72, 0xCC, 0x91, 0xF6,
++		0x55, 0xCC, 0x8F, 0xF6, 0x75, 0xCC, 0x8F, 0xF6,
++		0x55, 0xCC, 0x91, 0xF6, 0x75, 0xCC, 0x91, 0xF6,
++		0x53, 0xCC, 0xA6, 0xF6, 0x73, 0xCC, 0xA6, 0xF6,
++		0x54, 0xCC, 0xA6, 0xF6, 0x74, 0xCC, 0xA6, 0xF6,
++		0x48, 0xCC, 0x8C, 0xF6, 0x68, 0xCC, 0x8C, 0xF6,
++		0x41, 0xCC, 0x87, 0xF6, 0x61, 0xCC, 0x87, 0xF6,
++		0x45, 0xCC, 0xA7, 0xF6, 0x65, 0xCC, 0xA7, 0xF6,
++		0x4F, 0xCC, 0x88, 0xCC, 0x84, 0xF6, 0x6F, 0xCC,
++		0x88, 0xCC, 0x84, 0xF6, 0x4F, 0xCC, 0x83, 0xCC,
++		0x84, 0xF6, 0x6F, 0xCC, 0x83, 0xCC, 0x84, 0xF6,
++		0x4F, 0xCC, 0x87, 0xF6, 0x6F, 0xCC, 0x87, 0xF6,
++		0x4F, 0xCC, 0x87, 0xCC, 0x84, 0xF6, 0x6F, 0xCC,
++		0x87, 0xCC, 0x84, 0xF6, 0x59, 0xCC, 0x84, 0xF6,
++		0x79, 0xCC, 0x84, 0x68, 0xC9, 0xA6, 0x6A, 0x72,
++		0xC9, 0xB9, 0xC9, 0xBB, 0xCA, 0x81, 0x77, 0x79,
++		0x20, 0xCC, 0x86, 0x20, 0xCC, 0x87, 0x20, 0xCC,
++		0x8A, 0x20, 0xCC, 0xA8, 0x20, 0xCC, 0x83, 0x20,
++		0xCC, 0x8B, 0xC9, 0xA3, 0x6C, 0x73, 0x78, 0xCA,
++		0x95, 0xF6, 0xCC, 0x80, 0xF6, 0xCC, 0x81, 0xF6,
++		0xCC, 0x93, 0xF6, 0xCC, 0x88, 0xCC, 0x81, 0xF6,
++		0xCA, 0xB9, 0x20, 0xCD, 0x85, 0xF6, 0x3B, 0x20,
++		0xCC, 0x81, 0xF5, 0x05, 0xC2, 0xA8, 0xCC, 0x81,
++		0x20, 0xCC, 0x88, 0xCC, 0x81, 0xF6, 0xCE, 0x91,
++		0xCC, 0x81, 0xF6, 0xC2, 0xB7, 0xF6, 0xCE, 0x95,
++		0xCC, 0x81, 0xF6, 0xCE, 0x97, 0xCC, 0x81, 0xF6,
++		0xCE, 0x99, 0xCC, 0x81, 0xF6, 0xCE, 0x9F, 0xCC,
++		0x81, 0xF6, 0xCE, 0xA5, 0xCC, 0x81, 0xF6, 0xCE,
++		0xA9, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCC, 0x88,
++		0xCC, 0x81, 0xF6, 0xCE, 0x99, 0xCC, 0x88, 0xF6,
++		0xCE, 0xA5, 0xCC, 0x88, 0xF6, 0xCE, 0xB1, 0xCC,
++		0x81, 0xF6, 0xCE, 0xB5, 0xCC, 0x81, 0xF6, 0xCE,
++		0xB7, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCC, 0x81,
++		0xF6, 0xCF, 0x85, 0xCC, 0x88, 0xCC, 0x81, 0xF6,
++		0xCE, 0xB9, 0xCC, 0x88, 0xF6, 0xCF, 0x85, 0xCC,
++		0x88, 0xF6, 0xCE, 0xBF, 0xCC, 0x81, 0xF6, 0xCF,
++		0x85, 0xCC, 0x81, 0xF6, 0xCF, 0x89, 0xCC, 0x81,
++		0xCE, 0xB2, 0xCE, 0xB8, 0xCE, 0xA5, 0xF5, 0x05,
++		0xCF, 0x92, 0xCC, 0x81, 0xCE, 0xA5, 0xCC, 0x81,
++		0xF5, 0x05, 0xCF, 0x92, 0xCC, 0x88, 0xCE, 0xA5,
++		0xCC, 0x88, 0xCF, 0x86, 0xCF, 0x80, 0xCE, 0xBA,
++		0xCF, 0x81, 0xCF, 0x82, 0xCE, 0x98, 0xCE, 0xB5,
++		0xF6, 0xD0, 0x95, 0xCC, 0x80, 0xF6, 0xD0, 0x95,
++		0xCC, 0x88, 0xF6, 0xD0, 0x93, 0xCC, 0x81, 0xF6,
++		0xD0, 0x86, 0xCC, 0x88, 0xF6, 0xD0, 0x9A, 0xCC,
++		0x81, 0xF6, 0xD0, 0x98, 0xCC, 0x80, 0xF6, 0xD0,
++		0xA3, 0xCC, 0x86, 0xF6, 0xD0, 0x98, 0xCC, 0x86,
++		0xF6, 0xD0, 0xB8, 0xCC, 0x86, 0xF6, 0xD0, 0xB5,
++		0xCC, 0x80, 0xF6, 0xD0, 0xB5, 0xCC, 0x88, 0xF6,
++		0xD0, 0xB3, 0xCC, 0x81, 0xF6, 0xD1, 0x96, 0xCC,
++		0x88, 0xF6, 0xD0, 0xBA, 0xCC, 0x81, 0xF6, 0xD0,
++		0xB8, 0xCC, 0x80, 0xF6, 0xD1, 0x83, 0xCC, 0x86,
++		0xF6, 0xD1, 0xB4, 0xCC, 0x8F, 0xF6, 0xD1, 0xB5,
++		0xCC, 0x8F, 0xF6, 0xD0, 0x96, 0xCC, 0x86, 0xF6,
++		0xD0, 0xB6, 0xCC, 0x86, 0xF6, 0xD0, 0x90, 0xCC,
++		0x86, 0xF6, 0xD0, 0xB0, 0xCC, 0x86, 0xF6, 0xD0,
++		0x90, 0xCC, 0x88, 0xF6, 0xD0, 0xB0, 0xCC, 0x88,
++		0xF6, 0xD0, 0x95, 0xCC, 0x86, 0xF6, 0xD0, 0xB5,
++		0xCC, 0x86, 0xF6, 0xD3, 0x98, 0xCC, 0x88, 0xF6,
++		0xD3, 0x99, 0xCC, 0x88, 0xF6, 0xD0, 0x96, 0xCC,
++		0x88, 0xF6, 0xD0, 0xB6, 0xCC, 0x88, 0xF6, 0xD0,
++		0x97, 0xCC, 0x88, 0xF6, 0xD0, 0xB7, 0xCC, 0x88,
++		0xF6, 0xD0, 0x98, 0xCC, 0x84, 0xF6, 0xD0, 0xB8,
++		0xCC, 0x84, 0xF6, 0xD0, 0x98, 0xCC, 0x88, 0xF6,
++		0xD0, 0xB8, 0xCC, 0x88, 0xF6, 0xD0, 0x9E, 0xCC,
++		0x88, 0xF6, 0xD0, 0xBE, 0xCC, 0x88, 0xF6, 0xD3,
++		0xA8, 0xCC, 0x88, 0xF6, 0xD3, 0xA9, 0xCC, 0x88,
++		0xF6, 0xD0, 0xAD, 0xCC, 0x88, 0xF6, 0xD1, 0x8D,
++		0xCC, 0x88, 0xF6, 0xD0, 0xA3, 0xCC, 0x84, 0xF6,
++		0xD1, 0x83, 0xCC, 0x84, 0xF6, 0xD0, 0xA3, 0xCC,
++		0x88, 0xF6, 0xD1, 0x83, 0xCC, 0x88, 0xF6, 0xD0,
++		0xA3, 0xCC, 0x8B, 0xF6, 0xD1, 0x83, 0xCC, 0x8B,
++		0xF6, 0xD0, 0xA7, 0xCC, 0x88, 0xF6, 0xD1, 0x87,
++		0xCC, 0x88, 0xF6, 0xD0, 0xAB, 0xCC, 0x88, 0xF6,
++		0xD1, 0x8B, 0xCC, 0x88, 0xD5, 0xA5, 0xD6, 0x82,
++		0xF6, 0xD8, 0xA7, 0xD9, 0x93, 0xF6, 0xD8, 0xA7,
++		0xD9, 0x94, 0xF6, 0xD9, 0x88, 0xD9, 0x94, 0xF6,
++		0xD8, 0xA7, 0xD9, 0x95, 0xF6, 0xD9, 0x8A, 0xD9,
++		0x94, 0xD8, 0xA7, 0xD9, 0xB4, 0xD9, 0x88, 0xD9,
++		0xB4, 0xDB, 0x87, 0xD9, 0xB4, 0xD9, 0x8A, 0xD9,
++		0xB4, 0xF6, 0xDB, 0x95, 0xD9, 0x94, 0xF6, 0xDB,
++		0x81, 0xD9, 0x94, 0xF6, 0xDB, 0x92, 0xD9, 0x94,
++		0xF6, 0xE0, 0xA4, 0xA8, 0xE0, 0xA4, 0xBC, 0xF6,
++		0xE0, 0xA4, 0xB0, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0,
++		0xA4, 0xB3, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, 0xA4,
++		0x95, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, 0xA4, 0x96,
++		0xE0, 0xA4, 0xBC, 0xF6, 0xE0, 0xA4, 0x97, 0xE0,
++		0xA4, 0xBC, 0xF6, 0xE0, 0xA4, 0x9C, 0xE0, 0xA4,
++		0xBC, 0xF6, 0xE0, 0xA4, 0xA1, 0xE0, 0xA4, 0xBC,
++		0xF6, 0xE0, 0xA4, 0xA2, 0xE0, 0xA4, 0xBC, 0xF6,
++		0xE0, 0xA4, 0xAB, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0,
++		0xA4, 0xAF, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, 0xA7,
++		0x87, 0xE0, 0xA6, 0xBE, 0xF6, 0xE0, 0xA7, 0x87,
++		0xE0, 0xA7, 0x97, 0xF6, 0xE0, 0xA6, 0xA1, 0xE0,
++		0xA6, 0xBC, 0xF6, 0xE0, 0xA6, 0xA2, 0xE0, 0xA6,
++		0xBC, 0xF6, 0xE0, 0xA6, 0xAF, 0xE0, 0xA6, 0xBC,
++		0xF6, 0xE0, 0xA8, 0xB2, 0xE0, 0xA8, 0xBC, 0xF6,
++		0xE0, 0xA8, 0xB8, 0xE0, 0xA8, 0xBC, 0xF6, 0xE0,
++		0xA8, 0x96, 0xE0, 0xA8, 0xBC, 0xF6, 0xE0, 0xA8,
++		0x97, 0xE0, 0xA8, 0xBC, 0xF6, 0xE0, 0xA8, 0x9C,
++		0xE0, 0xA8, 0xBC, 0xF6, 0xE0, 0xA8, 0xAB, 0xE0,
++		0xA8, 0xBC, 0xF6, 0xE0, 0xAD, 0x87, 0xE0, 0xAD,
++		0x96, 0xF6, 0xE0, 0xAD, 0x87, 0xE0, 0xAC, 0xBE,
++		0xF6, 0xE0, 0xAD, 0x87, 0xE0, 0xAD, 0x97, 0xF6,
++		0xE0, 0xAC, 0xA1, 0xE0, 0xAC, 0xBC, 0xF6, 0xE0,
++		0xAC, 0xA2, 0xE0, 0xAC, 0xBC, 0xF6, 0xE0, 0xAE,
++		0x92, 0xE0, 0xAF, 0x97, 0xF6, 0xE0, 0xAF, 0x86,
++		0xE0, 0xAE, 0xBE, 0xF6, 0xE0, 0xAF, 0x87, 0xE0,
++		0xAE, 0xBE, 0xF6, 0xE0, 0xAF, 0x86, 0xE0, 0xAF,
++		0x97, 0xF6, 0xE0, 0xB1, 0x86, 0xE0, 0xB1, 0x96,
++		0xF6, 0xE0, 0xB2, 0xBF, 0xE0, 0xB3, 0x95, 0xF6,
++		0xE0, 0xB3, 0x86, 0xE0, 0xB3, 0x95, 0xF6, 0xE0,
++		0xB3, 0x86, 0xE0, 0xB3, 0x96, 0xF6, 0xE0, 0xB3,
++		0x86, 0xE0, 0xB3, 0x82, 0xF6, 0xE0, 0xB3, 0x86,
++		0xE0, 0xB3, 0x82, 0xE0, 0xB3, 0x95, 0xF6, 0xE0,
++		0xB5, 0x86, 0xE0, 0xB4, 0xBE, 0xF6, 0xE0, 0xB5,
++		0x87, 0xE0, 0xB4, 0xBE, 0xF6, 0xE0, 0xB5, 0x86,
++		0xE0, 0xB5, 0x97, 0xF6, 0xE0, 0xB7, 0x99, 0xE0,
++		0xB7, 0x8A, 0xF6, 0xE0, 0xB7, 0x99, 0xE0, 0xB7,
++		0x8F, 0xF6, 0xE0, 0xB7, 0x99, 0xE0, 0xB7, 0x8F,
++		0xE0, 0xB7, 0x8A, 0xF6, 0xE0, 0xB7, 0x99, 0xE0,
++		0xB7, 0x9F, 0xE0, 0xB9, 0x8D, 0xE0, 0xB8, 0xB2,
++		0xE0, 0xBB, 0x8D, 0xE0, 0xBA, 0xB2, 0xE0, 0xBA,
++		0xAB, 0xE0, 0xBA, 0x99, 0xE0, 0xBA, 0xAB, 0xE0,
++		0xBA, 0xA1, 0xE0, 0xBC, 0x8B, 0xF6, 0xE0, 0xBD,
++		0x82, 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBD, 0x8C,
++		0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBD, 0x91, 0xE0,
++		0xBE, 0xB7, 0xF6, 0xE0, 0xBD, 0x96, 0xE0, 0xBE,
++		0xB7, 0xF6, 0xE0, 0xBD, 0x9B, 0xE0, 0xBE, 0xB7,
++		0xF6, 0xE0, 0xBD, 0x80, 0xE0, 0xBE, 0xB5, 0xF6,
++		0xE0, 0xBD, 0xB1, 0xE0, 0xBD, 0xB2, 0xF6, 0xE0,
++		0xBD, 0xB1, 0xE0, 0xBD, 0xB4, 0xF6, 0xE0, 0xBE,
++		0xB2, 0xE0, 0xBE, 0x80, 0xE0, 0xBE, 0xB2, 0xE0,
++		0xBD, 0xB1, 0xE0, 0xBE, 0x80, 0xF6, 0xE0, 0xBE,
++		0xB3, 0xE0, 0xBE, 0x80, 0xE0, 0xBE, 0xB3, 0xE0,
++		0xBD, 0xB1, 0xE0, 0xBE, 0x80, 0xF6, 0xE0, 0xBD,
++		0xB1, 0xE0, 0xBE, 0x80, 0xF6, 0xE0, 0xBE, 0x92,
++		0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBE, 0x9C, 0xE0,
++		0xBE, 0xB7, 0xF6, 0xE0, 0xBE, 0xA1, 0xE0, 0xBE,
++		0xB7, 0xF6, 0xE0, 0xBE, 0xA6, 0xE0, 0xBE, 0xB7,
++		0xF6, 0xE0, 0xBE, 0xAB, 0xE0, 0xBE, 0xB7, 0xF6,
++		0xE0, 0xBE, 0x90, 0xE0, 0xBE, 0xB5, 0xF6, 0xE1,
++		0x80, 0xA5, 0xE1, 0x80, 0xAE, 0xF6, 0x41, 0xCC,
++		0xA5, 0xF6, 0x61, 0xCC, 0xA5, 0xF6, 0x42, 0xCC,
++		0x87, 0xF6, 0x62, 0xCC, 0x87, 0xF6, 0x42, 0xCC,
++		0xA3, 0xF6, 0x62, 0xCC, 0xA3, 0xF6, 0x42, 0xCC,
++		0xB1, 0xF6, 0x62, 0xCC, 0xB1, 0xF6, 0x43, 0xCC,
++		0xA7, 0xCC, 0x81, 0xF6, 0x63, 0xCC, 0xA7, 0xCC,
++		0x81, 0xF6, 0x44, 0xCC, 0x87, 0xF6, 0x64, 0xCC,
++		0x87, 0xF6, 0x44, 0xCC, 0xA3, 0xF6, 0x64, 0xCC,
++		0xA3, 0xF6, 0x44, 0xCC, 0xB1, 0xF6, 0x64, 0xCC,
++		0xB1, 0xF6, 0x44, 0xCC, 0xA7, 0xF6, 0x64, 0xCC,
++		0xA7, 0xF6, 0x44, 0xCC, 0xAD, 0xF6, 0x64, 0xCC,
++		0xAD, 0xF6, 0x45, 0xCC, 0x84, 0xCC, 0x80, 0xF6,
++		0x65, 0xCC, 0x84, 0xCC, 0x80, 0xF6, 0x45, 0xCC,
++		0x84, 0xCC, 0x81, 0xF6, 0x65, 0xCC, 0x84, 0xCC,
++		0x81, 0xF6, 0x45, 0xCC, 0xAD, 0xF6, 0x65, 0xCC,
++		0xAD, 0xF6, 0x45, 0xCC, 0xB0, 0xF6, 0x65, 0xCC,
++		0xB0, 0xF6, 0x45, 0xCC, 0xA7, 0xCC, 0x86, 0xF6,
++		0x65, 0xCC, 0xA7, 0xCC, 0x86, 0xF6, 0x46, 0xCC,
++		0x87, 0xF6, 0x66, 0xCC, 0x87, 0xF6, 0x47, 0xCC,
++		0x84, 0xF6, 0x67, 0xCC, 0x84, 0xF6, 0x48, 0xCC,
++		0x87, 0xF6, 0x68, 0xCC, 0x87, 0xF6, 0x48, 0xCC,
++		0xA3, 0xF6, 0x68, 0xCC, 0xA3, 0xF6, 0x48, 0xCC,
++		0x88, 0xF6, 0x68, 0xCC, 0x88, 0xF6, 0x48, 0xCC,
++		0xA7, 0xF6, 0x68, 0xCC, 0xA7, 0xF6, 0x48, 0xCC,
++		0xAE, 0xF6, 0x68, 0xCC, 0xAE, 0xF6, 0x49, 0xCC,
++		0xB0, 0xF6, 0x69, 0xCC, 0xB0, 0xF6, 0x49, 0xCC,
++		0x88, 0xCC, 0x81, 0xF6, 0x69, 0xCC, 0x88, 0xCC,
++		0x81, 0xF6, 0x4B, 0xCC, 0x81, 0xF6, 0x6B, 0xCC,
++		0x81, 0xF6, 0x4B, 0xCC, 0xA3, 0xF6, 0x6B, 0xCC,
++		0xA3, 0xF6, 0x4B, 0xCC, 0xB1, 0xF6, 0x6B, 0xCC,
++		0xB1, 0xF6, 0x4C, 0xCC, 0xA3, 0xF6, 0x6C, 0xCC,
++		0xA3, 0xF6, 0x4C, 0xCC, 0xA3, 0xCC, 0x84, 0xF6,
++		0x6C, 0xCC, 0xA3, 0xCC, 0x84, 0xF6, 0x4C, 0xCC,
++		0xB1, 0xF6, 0x6C, 0xCC, 0xB1, 0xF6, 0x4C, 0xCC,
++		0xAD, 0xF6, 0x6C, 0xCC, 0xAD, 0xF6, 0x4D, 0xCC,
++		0x81, 0xF6, 0x6D, 0xCC, 0x81, 0xF6, 0x4D, 0xCC,
++		0x87, 0xF6, 0x6D, 0xCC, 0x87, 0xF6, 0x4D, 0xCC,
++		0xA3, 0xF6, 0x6D, 0xCC, 0xA3, 0xF6, 0x4E, 0xCC,
++		0x87, 0xF6, 0x6E, 0xCC, 0x87, 0xF6, 0x4E, 0xCC,
++		0xA3, 0xF6, 0x6E, 0xCC, 0xA3, 0xF6, 0x4E, 0xCC,
++		0xB1, 0xF6, 0x6E, 0xCC, 0xB1, 0xF6, 0x4E, 0xCC,
++		0xAD, 0xF6, 0x6E, 0xCC, 0xAD, 0xF6, 0x4F, 0xCC,
++		0x83, 0xCC, 0x81, 0xF6, 0x6F, 0xCC, 0x83, 0xCC,
++		0x81, 0xF6, 0x4F, 0xCC, 0x83, 0xCC, 0x88, 0xF6,
++		0x6F, 0xCC, 0x83, 0xCC, 0x88, 0xF6, 0x4F, 0xCC,
++		0x84, 0xCC, 0x80, 0xF6, 0x6F, 0xCC, 0x84, 0xCC,
++		0x80, 0xF6, 0x4F, 0xCC, 0x84, 0xCC, 0x81, 0xF6,
++		0x6F, 0xCC, 0x84, 0xCC, 0x81, 0xF6, 0x50, 0xCC,
++		0x81, 0xF6, 0x70, 0xCC, 0x81, 0xF6, 0x50, 0xCC,
++		0x87, 0xF6, 0x70, 0xCC, 0x87, 0xF6, 0x52, 0xCC,
++		0x87, 0xF6, 0x72, 0xCC, 0x87, 0xF6, 0x52, 0xCC,
++		0xA3, 0xF6, 0x72, 0xCC, 0xA3, 0xF6, 0x52, 0xCC,
++		0xA3, 0xCC, 0x84, 0xF6, 0x72, 0xCC, 0xA3, 0xCC,
++		0x84, 0xF6, 0x52, 0xCC, 0xB1, 0xF6, 0x72, 0xCC,
++		0xB1, 0xF6, 0x53, 0xCC, 0x87, 0xF6, 0x73, 0xCC,
++		0x87, 0xF6, 0x53, 0xCC, 0xA3, 0xF6, 0x73, 0xCC,
++		0xA3, 0xF6, 0x53, 0xCC, 0x81, 0xCC, 0x87, 0xF6,
++		0x73, 0xCC, 0x81, 0xCC, 0x87, 0xF6, 0x53, 0xCC,
++		0x8C, 0xCC, 0x87, 0xF6, 0x73, 0xCC, 0x8C, 0xCC,
++		0x87, 0xF6, 0x53, 0xCC, 0xA3, 0xCC, 0x87, 0xF6,
++		0x73, 0xCC, 0xA3, 0xCC, 0x87, 0xF6, 0x54, 0xCC,
++		0x87, 0xF6, 0x74, 0xCC, 0x87, 0xF6, 0x54, 0xCC,
++		0xA3, 0xF6, 0x74, 0xCC, 0xA3, 0xF6, 0x54, 0xCC,
++		0xB1, 0xF6, 0x74, 0xCC, 0xB1, 0xF6, 0x54, 0xCC,
++		0xAD, 0xF6, 0x74, 0xCC, 0xAD, 0xF6, 0x55, 0xCC,
++		0xA4, 0xF6, 0x75, 0xCC, 0xA4, 0xF6, 0x55, 0xCC,
++		0xB0, 0xF6, 0x75, 0xCC, 0xB0, 0xF6, 0x55, 0xCC,
++		0xAD, 0xF6, 0x75, 0xCC, 0xAD, 0xF6, 0x55, 0xCC,
++		0x83, 0xCC, 0x81, 0xF6, 0x75, 0xCC, 0x83, 0xCC,
++		0x81, 0xF6, 0x55, 0xCC, 0x84, 0xCC, 0x88, 0xF6,
++		0x75, 0xCC, 0x84, 0xCC, 0x88, 0xF6, 0x56, 0xCC,
++		0x83, 0xF6, 0x76, 0xCC, 0x83, 0xF6, 0x56, 0xCC,
++		0xA3, 0xF6, 0x76, 0xCC, 0xA3, 0xF6, 0x57, 0xCC,
++		0x80, 0xF6, 0x77, 0xCC, 0x80, 0xF6, 0x57, 0xCC,
++		0x81, 0xF6, 0x77, 0xCC, 0x81, 0xF6, 0x57, 0xCC,
++		0x88, 0xF6, 0x77, 0xCC, 0x88, 0xF6, 0x57, 0xCC,
++		0x87, 0xF6, 0x77, 0xCC, 0x87, 0xF6, 0x57, 0xCC,
++		0xA3, 0xF6, 0x77, 0xCC, 0xA3, 0xF6, 0x58, 0xCC,
++		0x87, 0xF6, 0x78, 0xCC, 0x87, 0xF6, 0x58, 0xCC,
++		0x88, 0xF6, 0x78, 0xCC, 0x88, 0xF6, 0x59, 0xCC,
++		0x87, 0xF6, 0x79, 0xCC, 0x87, 0xF6, 0x5A, 0xCC,
++		0x82, 0xF6, 0x7A, 0xCC, 0x82, 0xF6, 0x5A, 0xCC,
++		0xA3, 0xF6, 0x7A, 0xCC, 0xA3, 0xF6, 0x5A, 0xCC,
++		0xB1, 0xF6, 0x7A, 0xCC, 0xB1, 0xF6, 0x68, 0xCC,
++		0xB1, 0xF6, 0x74, 0xCC, 0x88, 0xF6, 0x77, 0xCC,
++		0x8A, 0xF6, 0x79, 0xCC, 0x8A, 0x61, 0xCA, 0xBE,
++		0xF5, 0x05, 0xC5, 0xBF, 0xCC, 0x87, 0x73, 0xCC,
++		0x87, 0xF6, 0x41, 0xCC, 0xA3, 0xF6, 0x61, 0xCC,
++		0xA3, 0xF6, 0x41, 0xCC, 0x89, 0xF6, 0x61, 0xCC,
++		0x89, 0xF6, 0x41, 0xCC, 0x82, 0xCC, 0x81, 0xF6,
++		0x61, 0xCC, 0x82, 0xCC, 0x81, 0xF6, 0x41, 0xCC,
++		0x82, 0xCC, 0x80, 0xF6, 0x61, 0xCC, 0x82, 0xCC,
++		0x80, 0xF6, 0x41, 0xCC, 0x82, 0xCC, 0x89, 0xF6,
++		0x61, 0xCC, 0x82, 0xCC, 0x89, 0xF6, 0x41, 0xCC,
++		0x82, 0xCC, 0x83, 0xF6, 0x61, 0xCC, 0x82, 0xCC,
++		0x83, 0xF6, 0x41, 0xCC, 0xA3, 0xCC, 0x82, 0xF6,
++		0x61, 0xCC, 0xA3, 0xCC, 0x82, 0xF6, 0x41, 0xCC,
++		0x86, 0xCC, 0x81, 0xF6, 0x61, 0xCC, 0x86, 0xCC,
++		0x81, 0xF6, 0x41, 0xCC, 0x86, 0xCC, 0x80, 0xF6,
++		0x61, 0xCC, 0x86, 0xCC, 0x80, 0xF6, 0x41, 0xCC,
++		0x86, 0xCC, 0x89, 0xF6, 0x61, 0xCC, 0x86, 0xCC,
++		0x89, 0xF6, 0x41, 0xCC, 0x86, 0xCC, 0x83, 0xF6,
++		0x61, 0xCC, 0x86, 0xCC, 0x83, 0xF6, 0x41, 0xCC,
++		0xA3, 0xCC, 0x86, 0xF6, 0x61, 0xCC, 0xA3, 0xCC,
++		0x86, 0xF6, 0x45, 0xCC, 0xA3, 0xF6, 0x65, 0xCC,
++		0xA3, 0xF6, 0x45, 0xCC, 0x89, 0xF6, 0x65, 0xCC,
++		0x89, 0xF6, 0x45, 0xCC, 0x83, 0xF6, 0x65, 0xCC,
++		0x83, 0xF6, 0x45, 0xCC, 0x82, 0xCC, 0x81, 0xF6,
++		0x65, 0xCC, 0x82, 0xCC, 0x81, 0xF6, 0x45, 0xCC,
++		0x82, 0xCC, 0x80, 0xF6, 0x65, 0xCC, 0x82, 0xCC,
++		0x80, 0xF6, 0x45, 0xCC, 0x82, 0xCC, 0x89, 0xF6,
++		0x65, 0xCC, 0x82, 0xCC, 0x89, 0xF6, 0x45, 0xCC,
++		0x82, 0xCC, 0x83, 0xF6, 0x65, 0xCC, 0x82, 0xCC,
++		0x83, 0xF6, 0x45, 0xCC, 0xA3, 0xCC, 0x82, 0xF6,
++		0x65, 0xCC, 0xA3, 0xCC, 0x82, 0xF6, 0x49, 0xCC,
++		0x89, 0xF6, 0x69, 0xCC, 0x89, 0xF6, 0x49, 0xCC,
++		0xA3, 0xF6, 0x69, 0xCC, 0xA3, 0xF6, 0x4F, 0xCC,
++		0xA3, 0xF6, 0x6F, 0xCC, 0xA3, 0xF6, 0x4F, 0xCC,
++		0x89, 0xF6, 0x6F, 0xCC, 0x89, 0xF6, 0x4F, 0xCC,
++		0x82, 0xCC, 0x81, 0xF6, 0x6F, 0xCC, 0x82, 0xCC,
++		0x81, 0xF6, 0x4F, 0xCC, 0x82, 0xCC, 0x80, 0xF6,
++		0x6F, 0xCC, 0x82, 0xCC, 0x80, 0xF6, 0x4F, 0xCC,
++		0x82, 0xCC, 0x89, 0xF6, 0x6F, 0xCC, 0x82, 0xCC,
++		0x89, 0xF6, 0x4F, 0xCC, 0x82, 0xCC, 0x83, 0xF6,
++		0x6F, 0xCC, 0x82, 0xCC, 0x83, 0xF6, 0x4F, 0xCC,
++		0xA3, 0xCC, 0x82, 0xF6, 0x6F, 0xCC, 0xA3, 0xCC,
++		0x82, 0xF6, 0x4F, 0xCC, 0x9B, 0xCC, 0x81, 0xF6,
++		0x6F, 0xCC, 0x9B, 0xCC, 0x81, 0xF6, 0x4F, 0xCC,
++		0x9B, 0xCC, 0x80, 0xF6, 0x6F, 0xCC, 0x9B, 0xCC,
++		0x80, 0xF6, 0x4F, 0xCC, 0x9B, 0xCC, 0x89, 0xF6,
++		0x6F, 0xCC, 0x9B, 0xCC, 0x89, 0xF6, 0x4F, 0xCC,
++		0x9B, 0xCC, 0x83, 0xF6, 0x6F, 0xCC, 0x9B, 0xCC,
++		0x83, 0xF6, 0x4F, 0xCC, 0x9B, 0xCC, 0xA3, 0xF6,
++		0x6F, 0xCC, 0x9B, 0xCC, 0xA3, 0xF6, 0x55, 0xCC,
++		0xA3, 0xF6, 0x75, 0xCC, 0xA3, 0xF6, 0x55, 0xCC,
++		0x89, 0xF6, 0x75, 0xCC, 0x89, 0xF6, 0x55, 0xCC,
++		0x9B, 0xCC, 0x81, 0xF6, 0x75, 0xCC, 0x9B, 0xCC,
++		0x81, 0xF6, 0x55, 0xCC, 0x9B, 0xCC, 0x80, 0xF6,
++		0x75, 0xCC, 0x9B, 0xCC, 0x80, 0xF6, 0x55, 0xCC,
++		0x9B, 0xCC, 0x89, 0xF6, 0x75, 0xCC, 0x9B, 0xCC,
++		0x89, 0xF6, 0x55, 0xCC, 0x9B, 0xCC, 0x83, 0xF6,
++		0x75, 0xCC, 0x9B, 0xCC, 0x83, 0xF6, 0x55, 0xCC,
++		0x9B, 0xCC, 0xA3, 0xF6, 0x75, 0xCC, 0x9B, 0xCC,
++		0xA3, 0xF6, 0x59, 0xCC, 0x80, 0xF6, 0x79, 0xCC,
++		0x80, 0xF6, 0x59, 0xCC, 0xA3, 0xF6, 0x79, 0xCC,
++		0xA3, 0xF6, 0x59, 0xCC, 0x89, 0xF6, 0x79, 0xCC,
++		0x89, 0xF6, 0x59, 0xCC, 0x83, 0xF6, 0x79, 0xCC,
++		0x83, 0xF6, 0xCE, 0xB1, 0xCC, 0x93, 0xF6, 0xCE,
++		0xB1, 0xCC, 0x94, 0xF6, 0xCE, 0xB1, 0xCC, 0x93,
++		0xCC, 0x80, 0xF6, 0xCE, 0xB1, 0xCC, 0x94, 0xCC,
++		0x80, 0xF6, 0xCE, 0xB1, 0xCC, 0x93, 0xCC, 0x81,
++		0xF6, 0xCE, 0xB1, 0xCC, 0x94, 0xCC, 0x81, 0xF6,
++		0xCE, 0xB1, 0xCC, 0x93, 0xCD, 0x82, 0xF6, 0xCE,
++		0xB1, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE, 0x91,
++		0xCC, 0x93, 0xF6, 0xCE, 0x91, 0xCC, 0x94, 0xF6,
++		0xCE, 0x91, 0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE,
++		0x91, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, 0x91,
++		0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0x91, 0xCC,
++		0x94, 0xCC, 0x81, 0xF6, 0xCE, 0x91, 0xCC, 0x93,
++		0xCD, 0x82, 0xF6, 0xCE, 0x91, 0xCC, 0x94, 0xCD,
++		0x82, 0xF6, 0xCE, 0xB5, 0xCC, 0x93, 0xF6, 0xCE,
++		0xB5, 0xCC, 0x94, 0xF6, 0xCE, 0xB5, 0xCC, 0x93,
++		0xCC, 0x80, 0xF6, 0xCE, 0xB5, 0xCC, 0x94, 0xCC,
++		0x80, 0xF6, 0xCE, 0xB5, 0xCC, 0x93, 0xCC, 0x81,
++		0xF6, 0xCE, 0xB5, 0xCC, 0x94, 0xCC, 0x81, 0xF6,
++		0xCE, 0x95, 0xCC, 0x93, 0xF6, 0xCE, 0x95, 0xCC,
++		0x94, 0xF6, 0xCE, 0x95, 0xCC, 0x93, 0xCC, 0x80,
++		0xF6, 0xCE, 0x95, 0xCC, 0x94, 0xCC, 0x80, 0xF6,
++		0xCE, 0x95, 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE,
++		0x95, 0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCE, 0xB7,
++		0xCC, 0x93, 0xF6, 0xCE, 0xB7, 0xCC, 0x94, 0xF6,
++		0xCE, 0xB7, 0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE,
++		0xB7, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, 0xB7,
++		0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0xB7, 0xCC,
++		0x94, 0xCC, 0x81, 0xF6, 0xCE, 0xB7, 0xCC, 0x93,
++		0xCD, 0x82, 0xF6, 0xCE, 0xB7, 0xCC, 0x94, 0xCD,
++		0x82, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xF6, 0xCE,
++		0x97, 0xCC, 0x94, 0xF6, 0xCE, 0x97, 0xCC, 0x93,
++		0xCC, 0x80, 0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCC,
++		0x80, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCC, 0x81,
++		0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCC, 0x81, 0xF6,
++		0xCE, 0x97, 0xCC, 0x93, 0xCD, 0x82, 0xF6, 0xCE,
++		0x97, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE, 0xB9,
++		0xCC, 0x93, 0xF6, 0xCE, 0xB9, 0xCC, 0x94, 0xF6,
++		0xCE, 0xB9, 0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE,
++		0xB9, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, 0xB9,
++		0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCC,
++		0x94, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCC, 0x93,
++		0xCD, 0x82, 0xF6, 0xCE, 0xB9, 0xCC, 0x94, 0xCD,
++		0x82, 0xF6, 0xCE, 0x99, 0xCC, 0x93, 0xF6, 0xCE,
++		0x99, 0xCC, 0x94, 0xF6, 0xCE, 0x99, 0xCC, 0x93,
++		0xCC, 0x80, 0xF6, 0xCE, 0x99, 0xCC, 0x94, 0xCC,
++		0x80, 0xF6, 0xCE, 0x99, 0xCC, 0x93, 0xCC, 0x81,
++		0xF6, 0xCE, 0x99, 0xCC, 0x94, 0xCC, 0x81, 0xF6,
++		0xCE, 0x99, 0xCC, 0x93, 0xCD, 0x82, 0xF6, 0xCE,
++		0x99, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE, 0xBF,
++		0xCC, 0x93, 0xF6, 0xCE, 0xBF, 0xCC, 0x94, 0xF6,
++		0xCE, 0xBF, 0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE,
++		0xBF, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, 0xBF,
++		0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0xBF, 0xCC,
++		0x94, 0xCC, 0x81, 0xF6, 0xCE, 0x9F, 0xCC, 0x93,
++		0xF6, 0xCE, 0x9F, 0xCC, 0x94, 0xF6, 0xCE, 0x9F,
++		0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE, 0x9F, 0xCC,
++		0x94, 0xCC, 0x80, 0xF6, 0xCE, 0x9F, 0xCC, 0x93,
++		0xCC, 0x81, 0xF6, 0xCE, 0x9F, 0xCC, 0x94, 0xCC,
++		0x81, 0xF6, 0xCF, 0x85, 0xCC, 0x93, 0xF6, 0xCF,
++		0x85, 0xCC, 0x94, 0xF6, 0xCF, 0x85, 0xCC, 0x93,
++		0xCC, 0x80, 0xF6, 0xCF, 0x85, 0xCC, 0x94, 0xCC,
++		0x80, 0xF6, 0xCF, 0x85, 0xCC, 0x93, 0xCC, 0x81,
++		0xF6, 0xCF, 0x85, 0xCC, 0x94, 0xCC, 0x81, 0xF6,
++		0xCF, 0x85, 0xCC, 0x93, 0xCD, 0x82, 0xF6, 0xCF,
++		0x85, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE, 0xA5,
++		0xCC, 0x94, 0xF6, 0xCE, 0xA5, 0xCC, 0x94, 0xCC,
++		0x80, 0xF6, 0xCE, 0xA5, 0xCC, 0x94, 0xCC, 0x81,
++		0xF6, 0xCE, 0xA5, 0xCC, 0x94, 0xCD, 0x82, 0xF6,
++		0xCF, 0x89, 0xCC, 0x93, 0xF6, 0xCF, 0x89, 0xCC,
++		0x94, 0xF6, 0xCF, 0x89, 0xCC, 0x93, 0xCC, 0x80,
++		0xF6, 0xCF, 0x89, 0xCC, 0x94, 0xCC, 0x80, 0xF6,
++		0xCF, 0x89, 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCF,
++		0x89, 0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCF, 0x89,
++		0xCC, 0x93, 0xCD, 0x82, 0xF6, 0xCF, 0x89, 0xCC,
++		0x94, 0xCD, 0x82, 0xF6, 0xCE, 0xA9, 0xCC, 0x93,
++		0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xF6, 0xCE, 0xA9,
++		0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE, 0xA9, 0xCC,
++		0x94, 0xCC, 0x80, 0xF6, 0xCE, 0xA9, 0xCC, 0x93,
++		0xCC, 0x81, 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCC,
++		0x81, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, 0xCD, 0x82,
++		0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCD, 0x82, 0xF6,
++		0xCE, 0xB1, 0xCC, 0x80, 0xF6, 0xCE, 0xB1, 0xCC,
++		0x81, 0xF6, 0xCE, 0xB5, 0xCC, 0x80, 0xF6, 0xCE,
++		0xB5, 0xCC, 0x81, 0xF6, 0xCE, 0xB7, 0xCC, 0x80,
++		0xF6, 0xCE, 0xB7, 0xCC, 0x81, 0xF6, 0xCE, 0xB9,
++		0xCC, 0x80, 0xF6, 0xCE, 0xB9, 0xCC, 0x81, 0xF6,
++		0xCE, 0xBF, 0xCC, 0x80, 0xF6, 0xCE, 0xBF, 0xCC,
++		0x81, 0xF6, 0xCF, 0x85, 0xCC, 0x80, 0xF6, 0xCF,
++		0x85, 0xCC, 0x81, 0xF6, 0xCF, 0x89, 0xCC, 0x80,
++		0xF6, 0xCF, 0x89, 0xCC, 0x81, 0xF6, 0xCE, 0xB1,
++		0xCC, 0x93, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCC,
++		0x94, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCC, 0x93,
++		0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCC,
++		0x94, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, 0xB1,
++		0xCC, 0x93, 0xCC, 0x81, 0xCD, 0x85, 0xF6, 0xCE,
++		0xB1, 0xCC, 0x94, 0xCC, 0x81, 0xCD, 0x85, 0xF6,
++		0xCE, 0xB1, 0xCC, 0x93, 0xCD, 0x82, 0xCD, 0x85,
++		0xF6, 0xCE, 0xB1, 0xCC, 0x94, 0xCD, 0x82, 0xCD,
++		0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x93, 0xCD, 0x85,
++		0xF6, 0xCE, 0x91, 0xCC, 0x94, 0xCD, 0x85, 0xF6,
++		0xCE, 0x91, 0xCC, 0x93, 0xCC, 0x80, 0xCD, 0x85,
++		0xF6, 0xCE, 0x91, 0xCC, 0x94, 0xCC, 0x80, 0xCD,
++		0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x93, 0xCC, 0x81,
++		0xCD, 0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x94, 0xCC,
++		0x81, 0xCD, 0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x93,
++		0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0x91, 0xCC,
++		0x94, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0xB7,
++		0xCC, 0x93, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, 0xCC,
++		0x94, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, 0xCC, 0x93,
++		0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, 0xCC,
++		0x94, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, 0xB7,
++		0xCC, 0x93, 0xCC, 0x81, 0xCD, 0x85, 0xF6, 0xCE,
++		0xB7, 0xCC, 0x94, 0xCC, 0x81, 0xCD, 0x85, 0xF6,
++		0xCE, 0xB7, 0xCC, 0x93, 0xCD, 0x82, 0xCD, 0x85,
++		0xF6, 0xCE, 0xB7, 0xCC, 0x94, 0xCD, 0x82, 0xCD,
++		0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCD, 0x85,
++		0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCD, 0x85, 0xF6,
++		0xCE, 0x97, 0xCC, 0x93, 0xCC, 0x80, 0xCD, 0x85,
++		0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCC, 0x80, 0xCD,
++		0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCC, 0x81,
++		0xCD, 0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCC,
++		0x81, 0xCD, 0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x93,
++		0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0x97, 0xCC,
++		0x94, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCF, 0x89,
++		0xCC, 0x93, 0xCD, 0x85, 0xF6, 0xCF, 0x89, 0xCC,
++		0x94, 0xCD, 0x85, 0xF6, 0xCF, 0x89, 0xCC, 0x93,
++		0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCF, 0x89, 0xCC,
++		0x94, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCF, 0x89,
++		0xCC, 0x93, 0xCC, 0x81, 0xCD, 0x85, 0xF6, 0xCF,
++		0x89, 0xCC, 0x94, 0xCC, 0x81, 0xCD, 0x85, 0xF6,
++		0xCF, 0x89, 0xCC, 0x93, 0xCD, 0x82, 0xCD, 0x85,
++		0xF6, 0xCF, 0x89, 0xCC, 0x94, 0xCD, 0x82, 0xCD,
++		0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, 0xCD, 0x85,
++		0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCD, 0x85, 0xF6,
++		0xCE, 0xA9, 0xCC, 0x93, 0xCC, 0x80, 0xCD, 0x85,
++		0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCC, 0x80, 0xCD,
++		0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, 0xCC, 0x81,
++		0xCD, 0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCC,
++		0x81, 0xCD, 0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x93,
++		0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0xA9, 0xCC,
++		0x94, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0xB1,
++		0xCC, 0x86, 0xF6, 0xCE, 0xB1, 0xCC, 0x84, 0xF6,
++		0xCE, 0xB1, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE,
++		0xB1, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCC, 0x81,
++		0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCD, 0x82, 0xF6,
++		0xCE, 0xB1, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE,
++		0x91, 0xCC, 0x86, 0xF6, 0xCE, 0x91, 0xCC, 0x84,
++		0xF6, 0xCE, 0x91, 0xCC, 0x80, 0xF6, 0xCE, 0x91,
++		0xCC, 0x81, 0xF6, 0xCE, 0x91, 0xCD, 0x85, 0x20,
++		0xCC, 0x93, 0xF6, 0xCE, 0xB9, 0x20, 0xCC, 0x93,
++		0x20, 0xCD, 0x82, 0xF5, 0x05, 0xC2, 0xA8, 0xCD,
++		0x82, 0x20, 0xCC, 0x88, 0xCD, 0x82, 0xF6, 0xCE,
++		0xB7, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, 0xB7,
++		0xCD, 0x85, 0xF6, 0xCE, 0xB7, 0xCC, 0x81, 0xCD,
++		0x85, 0xF6, 0xCE, 0xB7, 0xCD, 0x82, 0xF6, 0xCE,
++		0xB7, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0x95,
++		0xCC, 0x80, 0xF6, 0xCE, 0x95, 0xCC, 0x81, 0xF6,
++		0xCE, 0x97, 0xCC, 0x80, 0xF6, 0xCE, 0x97, 0xCC,
++		0x81, 0xF6, 0xCE, 0x97, 0xCD, 0x85, 0xF5, 0x06,
++		0xE1, 0xBE, 0xBF, 0xCC, 0x80, 0x20, 0xCC, 0x93,
++		0xCC, 0x80, 0xF5, 0x06, 0xE1, 0xBE, 0xBF, 0xCC,
++		0x81, 0x20, 0xCC, 0x93, 0xCC, 0x81, 0xF5, 0x06,
++		0xE1, 0xBE, 0xBF, 0xCD, 0x82, 0x20, 0xCC, 0x93,
++		0xCD, 0x82, 0xF6, 0xCE, 0xB9, 0xCC, 0x86, 0xF6,
++		0xCE, 0xB9, 0xCC, 0x84, 0xF6, 0xCE, 0xB9, 0xCC,
++		0x88, 0xCC, 0x80, 0xF6, 0xCE, 0xB9, 0xCC, 0x88,
++		0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCD, 0x82, 0xF6,
++		0xCE, 0xB9, 0xCC, 0x88, 0xCD, 0x82, 0xF6, 0xCE,
++		0x99, 0xCC, 0x86, 0xF6, 0xCE, 0x99, 0xCC, 0x84,
++		0xF6, 0xCE, 0x99, 0xCC, 0x80, 0xF6, 0xCE, 0x99,
++		0xCC, 0x81, 0xF5, 0x06, 0xE1, 0xBF, 0xBE, 0xCC,
++		0x80, 0x20, 0xCC, 0x94, 0xCC, 0x80, 0xF5, 0x06,
++		0xE1, 0xBF, 0xBE, 0xCC, 0x81, 0x20, 0xCC, 0x94,
++		0xCC, 0x81, 0xF5, 0x06, 0xE1, 0xBF, 0xBE, 0xCD,
++		0x82, 0x20, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCF,
++		0x85, 0xCC, 0x86, 0xF6, 0xCF, 0x85, 0xCC, 0x84,
++		0xF6, 0xCF, 0x85, 0xCC, 0x88, 0xCC, 0x80, 0xF6,
++		0xCF, 0x85, 0xCC, 0x88, 0xCC, 0x81, 0xF6, 0xCF,
++		0x81, 0xCC, 0x93, 0xF6, 0xCF, 0x81, 0xCC, 0x94,
++		0xF6, 0xCF, 0x85, 0xCD, 0x82, 0xF6, 0xCF, 0x85,
++		0xCC, 0x88, 0xCD, 0x82, 0xF6, 0xCE, 0xA5, 0xCC,
++		0x86, 0xF6, 0xCE, 0xA5, 0xCC, 0x84, 0xF6, 0xCE,
++		0xA5, 0xCC, 0x80, 0xF6, 0xCE, 0xA5, 0xCC, 0x81,
++		0xF6, 0xCE, 0xA1, 0xCC, 0x94, 0xF5, 0x05, 0xC2,
++		0xA8, 0xCC, 0x80, 0x20, 0xCC, 0x88, 0xCC, 0x80,
++		0xF5, 0x05, 0xC2, 0xA8, 0xCC, 0x81, 0x20, 0xCC,
++		0x88, 0xCC, 0x81, 0xF6, 0x60, 0xF6, 0xCF, 0x89,
++		0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCF, 0x89, 0xCD,
++		0x85, 0xF6, 0xCF, 0x89, 0xCC, 0x81, 0xCD, 0x85,
++		0xF6, 0xCF, 0x89, 0xCD, 0x82, 0xF6, 0xCF, 0x89,
++		0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0x9F, 0xCC,
++		0x80, 0xF6, 0xCE, 0x9F, 0xCC, 0x81, 0xF6, 0xCE,
++		0xA9, 0xCC, 0x80, 0xF6, 0xCE, 0xA9, 0xCC, 0x81,
++		0xF6, 0xCE, 0xA9, 0xCD, 0x85, 0xF5, 0x03, 0xC2,
++		0xB4, 0x20, 0xCC, 0x81, 0x20, 0xCC, 0x94, 0xF5,
++		0x04, 0xE2, 0x80, 0x82, 0x20, 0xF5, 0x04, 0xE2,
++		0x80, 0x83, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
++		0x20, 0x20, 0x20, 0x20, 0xE2, 0x80, 0x90, 0x20,
++		0xCC, 0xB3, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
++		0x20, 0xE2, 0x80, 0xB2, 0xE2, 0x80, 0xB2, 0xE2,
++		0x80, 0xB2, 0xE2, 0x80, 0xB2, 0xE2, 0x80, 0xB2,
++		0xE2, 0x80, 0xB5, 0xE2, 0x80, 0xB5, 0xE2, 0x80,
++		0xB5, 0xE2, 0x80, 0xB5, 0xE2, 0x80, 0xB5, 0x21,
++		0x21, 0x20, 0xCC, 0x85, 0x3F, 0x3F, 0x3F, 0x21,
++		0x21, 0x3F, 0xE2, 0x80, 0xB2, 0xE2, 0x80, 0xB2,
++		0xE2, 0x80, 0xB2, 0xE2, 0x80, 0xB2, 0x20, 0x30,
++		0x69, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B,
++		0xE2, 0x88, 0x92, 0x3D, 0x28, 0x29, 0x6E, 0x30,
++		0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
++		0x39, 0x2B, 0xE2, 0x88, 0x92, 0x3D, 0x28, 0x29,
++		0x52, 0x73, 0x61, 0x2F, 0x63, 0x61, 0x2F, 0x73,
++		0x43, 0xC2, 0xB0, 0x43, 0x63, 0x2F, 0x6F, 0x63,
++		0x2F, 0x75, 0xC6, 0x90, 0xC2, 0xB0, 0x46, 0x67,
++		0x48, 0x48, 0x48, 0x68, 0xC4, 0xA7, 0x49, 0x49,
++		0x4C, 0x6C, 0x4E, 0x4E, 0x6F, 0x50, 0x51, 0x52,
++		0x52, 0x52, 0x53, 0x4D, 0x54, 0x45, 0x4C, 0x54,
++		0x4D, 0x5A, 0xF6, 0xCE, 0xA9, 0x5A, 0xF6, 0x4B,
++		0xF6, 0x41, 0xCC, 0x8A, 0x42, 0x43, 0x65, 0x45,
++		0x46, 0x4D, 0x6F, 0xD7, 0x90, 0xD7, 0x91, 0xD7,
++		0x92, 0xD7, 0x93, 0x69, 0xCE, 0xB3, 0xCE, 0x93,
++		0xCE, 0xA0, 0xE2, 0x88, 0x91, 0x44, 0x64, 0x65,
++		0x69, 0x6A, 0x31, 0xE2, 0x81, 0x84, 0x33, 0x32,
++		0xE2, 0x81, 0x84, 0x33, 0x31, 0xE2, 0x81, 0x84,
++		0x35, 0x32, 0xE2, 0x81, 0x84, 0x35, 0x33, 0xE2,
++		0x81, 0x84, 0x35, 0x34, 0xE2, 0x81, 0x84, 0x35,
++		0x31, 0xE2, 0x81, 0x84, 0x36, 0x35, 0xE2, 0x81,
++		0x84, 0x36, 0x31, 0xE2, 0x81, 0x84, 0x38, 0x33,
++		0xE2, 0x81, 0x84, 0x38, 0x35, 0xE2, 0x81, 0x84,
++		0x38, 0x37, 0xE2, 0x81, 0x84, 0x38, 0x31, 0xE2,
++		0x81, 0x84, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49,
++		0x49, 0x56, 0x56, 0x56, 0x49, 0x56, 0x49, 0x49,
++		0x56, 0x49, 0x49, 0x49, 0x49, 0x58, 0x58, 0x58,
++		0x49, 0x58, 0x49, 0x49, 0x4C, 0x43, 0x44, 0x4D,
++		0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x76,
++		0x76, 0x76, 0x69, 0x76, 0x69, 0x69, 0x76, 0x69,
++		0x69, 0x69, 0x69, 0x78, 0x78, 0x78, 0x69, 0x78,
++		0x69, 0x69, 0x6C, 0x63, 0x64, 0x6D, 0xF6, 0xE2,
++		0x86, 0x90, 0xCC, 0xB8, 0xF6, 0xE2, 0x86, 0x92,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x86, 0x94, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x87, 0x90, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x87, 0x94, 0xCC, 0xB8, 0xF6, 0xE2, 0x87, 0x92,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x88, 0x83, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x88, 0x88, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x88, 0x8B, 0xCC, 0xB8, 0xF6, 0xE2, 0x88, 0xA3,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x88, 0xA5, 0xCC, 0xB8,
++		0xE2, 0x88, 0xAB, 0xE2, 0x88, 0xAB, 0xE2, 0x88,
++		0xAB, 0xE2, 0x88, 0xAB, 0xE2, 0x88, 0xAB, 0xE2,
++		0x88, 0xAE, 0xE2, 0x88, 0xAE, 0xE2, 0x88, 0xAE,
++		0xE2, 0x88, 0xAE, 0xE2, 0x88, 0xAE, 0xF6, 0xE2,
++		0x88, 0xBC, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0x83,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0x85, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x89, 0x88, 0xCC, 0xB8, 0xF6, 0x3D,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xA1, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x89, 0x8D, 0xCC, 0xB8, 0xF6, 0x3C,
++		0xCC, 0xB8, 0xF6, 0x3E, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x89, 0xA4, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xA5,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xB2, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x89, 0xB3, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x89, 0xB6, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xB7,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xBA, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x89, 0xBB, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x8A, 0x82, 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0x83,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0x86, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x8A, 0x87, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x8A, 0xA2, 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xA8,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xA9, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x8A, 0xAB, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x89, 0xBC, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xBD,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0x91, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x8A, 0x92, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x8A, 0xB2, 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xB3,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xB4, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x8A, 0xB5, 0xCC, 0xB8, 0xF6, 0xE3,
++		0x80, 0x88, 0xF6, 0xE3, 0x80, 0x89, 0x31, 0x32,
++		0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x31,
++		0x30, 0x31, 0x31, 0x31, 0x32, 0x31, 0x33, 0x31,
++		0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, 0x31,
++		0x38, 0x31, 0x39, 0x32, 0x30, 0x28, 0x31, 0x29,
++		0x28, 0x32, 0x29, 0x28, 0x33, 0x29, 0x28, 0x34,
++		0x29, 0x28, 0x35, 0x29, 0x28, 0x36, 0x29, 0x28,
++		0x37, 0x29, 0x28, 0x38, 0x29, 0x28, 0x39, 0x29,
++		0x28, 0x31, 0x30, 0x29, 0x28, 0x31, 0x31, 0x29,
++		0x28, 0x31, 0x32, 0x29, 0x28, 0x31, 0x33, 0x29,
++		0x28, 0x31, 0x34, 0x29, 0x28, 0x31, 0x35, 0x29,
++		0x28, 0x31, 0x36, 0x29, 0x28, 0x31, 0x37, 0x29,
++		0x28, 0x31, 0x38, 0x29, 0x28, 0x31, 0x39, 0x29,
++		0x28, 0x32, 0x30, 0x29, 0x31, 0x2E, 0x32, 0x2E,
++		0x33, 0x2E, 0x34, 0x2E, 0x35, 0x2E, 0x36, 0x2E,
++		0x37, 0x2E, 0x38, 0x2E, 0x39, 0x2E, 0x31, 0x30,
++		0x2E, 0x31, 0x31, 0x2E, 0x31, 0x32, 0x2E, 0x31,
++		0x33, 0x2E, 0x31, 0x34, 0x2E, 0x31, 0x35, 0x2E,
++		0x31, 0x36, 0x2E, 0x31, 0x37, 0x2E, 0x31, 0x38,
++		0x2E, 0x31, 0x39, 0x2E, 0x32, 0x30, 0x2E, 0x28,
++		0x61, 0x29, 0x28, 0x62, 0x29, 0x28, 0x63, 0x29,
++		0x28, 0x64, 0x29, 0x28, 0x65, 0x29, 0x28, 0x66,
++		0x29, 0x28, 0x67, 0x29, 0x28, 0x68, 0x29, 0x28,
++		0x69, 0x29, 0x28, 0x6A, 0x29, 0x28, 0x6B, 0x29,
++		0x28, 0x6C, 0x29, 0x28, 0x6D, 0x29, 0x28, 0x6E,
++		0x29, 0x28, 0x6F, 0x29, 0x28, 0x70, 0x29, 0x28,
++		0x71, 0x29, 0x28, 0x72, 0x29, 0x28, 0x73, 0x29,
++		0x28, 0x74, 0x29, 0x28, 0x75, 0x29, 0x28, 0x76,
++		0x29, 0x28, 0x77, 0x29, 0x28, 0x78, 0x29, 0x28,
++		0x79, 0x29, 0x28, 0x7A, 0x29, 0x41, 0x42, 0x43,
++		0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B,
++		0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53,
++		0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61,
++		0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
++		0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
++		0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
++		0x7A, 0x30, 0xE2, 0x88, 0xAB, 0xE2, 0x88, 0xAB,
++		0xE2, 0x88, 0xAB, 0xE2, 0x88, 0xAB, 0x3A, 0x3A,
++		0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0xF6, 0xE2,
++		0xAB, 0x9D, 0xCC, 0xB8, 0xE6, 0xAF, 0x8D, 0xE9,
++		0xBE, 0x9F, 0xE4, 0xB8, 0x80, 0xE4, 0xB8, 0xA8,
++		0xE4, 0xB8, 0xB6, 0xE4, 0xB8, 0xBF, 0xE4, 0xB9,
++		0x99, 0xE4, 0xBA, 0x85, 0xE4, 0xBA, 0x8C, 0xE4,
++		0xBA, 0xA0, 0xE4, 0xBA, 0xBA, 0xE5, 0x84, 0xBF,
++		0xE5, 0x85, 0xA5, 0xE5, 0x85, 0xAB, 0xE5, 0x86,
++		0x82, 0xE5, 0x86, 0x96, 0xE5, 0x86, 0xAB, 0xE5,
++		0x87, 0xA0, 0xE5, 0x87, 0xB5, 0xE5, 0x88, 0x80,
++		0xE5, 0x8A, 0x9B, 0xE5, 0x8B, 0xB9, 0xE5, 0x8C,
++		0x95, 0xE5, 0x8C, 0x9A, 0xE5, 0x8C, 0xB8, 0xE5,
++		0x8D, 0x81, 0xE5, 0x8D, 0x9C, 0xE5, 0x8D, 0xA9,
++		0xE5, 0x8E, 0x82, 0xE5, 0x8E, 0xB6, 0xE5, 0x8F,
++		0x88, 0xE5, 0x8F, 0xA3, 0xE5, 0x9B, 0x97, 0xE5,
++		0x9C, 0x9F, 0xE5, 0xA3, 0xAB, 0xE5, 0xA4, 0x82,
++		0xE5, 0xA4, 0x8A, 0xE5, 0xA4, 0x95, 0xE5, 0xA4,
++		0xA7, 0xE5, 0xA5, 0xB3, 0xE5, 0xAD, 0x90, 0xE5,
++		0xAE, 0x80, 0xE5, 0xAF, 0xB8, 0xE5, 0xB0, 0x8F,
++		0xE5, 0xB0, 0xA2, 0xE5, 0xB0, 0xB8, 0xE5, 0xB1,
++		0xAE, 0xE5, 0xB1, 0xB1, 0xE5, 0xB7, 0x9B, 0xE5,
++		0xB7, 0xA5, 0xE5, 0xB7, 0xB1, 0xE5, 0xB7, 0xBE,
++		0xE5, 0xB9, 0xB2, 0xE5, 0xB9, 0xBA, 0xE5, 0xB9,
++		0xBF, 0xE5, 0xBB, 0xB4, 0xE5, 0xBB, 0xBE, 0xE5,
++		0xBC, 0x8B, 0xE5, 0xBC, 0x93, 0xE5, 0xBD, 0x90,
++		0xE5, 0xBD, 0xA1, 0xE5, 0xBD, 0xB3, 0xE5, 0xBF,
++		0x83, 0xE6, 0x88, 0x88, 0xE6, 0x88, 0xB6, 0xE6,
++		0x89, 0x8B, 0xE6, 0x94, 0xAF, 0xE6, 0x94, 0xB4,
++		0xE6, 0x96, 0x87, 0xE6, 0x96, 0x97, 0xE6, 0x96,
++		0xA4, 0xE6, 0x96, 0xB9, 0xE6, 0x97, 0xA0, 0xE6,
++		0x97, 0xA5, 0xE6, 0x9B, 0xB0, 0xE6, 0x9C, 0x88,
++		0xE6, 0x9C, 0xA8, 0xE6, 0xAC, 0xA0, 0xE6, 0xAD,
++		0xA2, 0xE6, 0xAD, 0xB9, 0xE6, 0xAE, 0xB3, 0xE6,
++		0xAF, 0x8B, 0xE6, 0xAF, 0x94, 0xE6, 0xAF, 0x9B,
++		0xE6, 0xB0, 0x8F, 0xE6, 0xB0, 0x94, 0xE6, 0xB0,
++		0xB4, 0xE7, 0x81, 0xAB, 0xE7, 0x88, 0xAA, 0xE7,
++		0x88, 0xB6, 0xE7, 0x88, 0xBB, 0xE7, 0x88, 0xBF,
++		0xE7, 0x89, 0x87, 0xE7, 0x89, 0x99, 0xE7, 0x89,
++		0x9B, 0xE7, 0x8A, 0xAC, 0xE7, 0x8E, 0x84, 0xE7,
++		0x8E, 0x89, 0xE7, 0x93, 0x9C, 0xE7, 0x93, 0xA6,
++		0xE7, 0x94, 0x98, 0xE7, 0x94, 0x9F, 0xE7, 0x94,
++		0xA8, 0xE7, 0x94, 0xB0, 0xE7, 0x96, 0x8B, 0xE7,
++		0x96, 0x92, 0xE7, 0x99, 0xB6, 0xE7, 0x99, 0xBD,
++		0xE7, 0x9A, 0xAE, 0xE7, 0x9A, 0xBF, 0xE7, 0x9B,
++		0xAE, 0xE7, 0x9F, 0x9B, 0xE7, 0x9F, 0xA2, 0xE7,
++		0x9F, 0xB3, 0xE7, 0xA4, 0xBA, 0xE7, 0xA6, 0xB8,
++		0xE7, 0xA6, 0xBE, 0xE7, 0xA9, 0xB4, 0xE7, 0xAB,
++		0x8B, 0xE7, 0xAB, 0xB9, 0xE7, 0xB1, 0xB3, 0xE7,
++		0xB3, 0xB8, 0xE7, 0xBC, 0xB6, 0xE7, 0xBD, 0x91,
++		0xE7, 0xBE, 0x8A, 0xE7, 0xBE, 0xBD, 0xE8, 0x80,
++		0x81, 0xE8, 0x80, 0x8C, 0xE8, 0x80, 0x92, 0xE8,
++		0x80, 0xB3, 0xE8, 0x81, 0xBF, 0xE8, 0x82, 0x89,
++		0xE8, 0x87, 0xA3, 0xE8, 0x87, 0xAA, 0xE8, 0x87,
++		0xB3, 0xE8, 0x87, 0xBC, 0xE8, 0x88, 0x8C, 0xE8,
++		0x88, 0x9B, 0xE8, 0x88, 0x9F, 0xE8, 0x89, 0xAE,
++		0xE8, 0x89, 0xB2, 0xE8, 0x89, 0xB8, 0xE8, 0x99,
++		0x8D, 0xE8, 0x99, 0xAB, 0xE8, 0xA1, 0x80, 0xE8,
++		0xA1, 0x8C, 0xE8, 0xA1, 0xA3, 0xE8, 0xA5, 0xBE,
++		0xE8, 0xA6, 0x8B, 0xE8, 0xA7, 0x92, 0xE8, 0xA8,
++		0x80, 0xE8, 0xB0, 0xB7, 0xE8, 0xB1, 0x86, 0xE8,
++		0xB1, 0x95, 0xE8, 0xB1, 0xB8, 0xE8, 0xB2, 0x9D,
++		0xE8, 0xB5, 0xA4, 0xE8, 0xB5, 0xB0, 0xE8, 0xB6,
++		0xB3, 0xE8, 0xBA, 0xAB, 0xE8, 0xBB, 0x8A, 0xE8,
++		0xBE, 0x9B, 0xE8, 0xBE, 0xB0, 0xE8, 0xBE, 0xB5,
++		0xE9, 0x82, 0x91, 0xE9, 0x85, 0x89, 0xE9, 0x87,
++		0x86, 0xE9, 0x87, 0x8C, 0xE9, 0x87, 0x91, 0xE9,
++		0x95, 0xB7, 0xE9, 0x96, 0x80, 0xE9, 0x98, 0x9C,
++		0xE9, 0x9A, 0xB6, 0xE9, 0x9A, 0xB9, 0xE9, 0x9B,
++		0xA8, 0xE9, 0x9D, 0x91, 0xE9, 0x9D, 0x9E, 0xE9,
++		0x9D, 0xA2, 0xE9, 0x9D, 0xA9, 0xE9, 0x9F, 0x8B,
++		0xE9, 0x9F, 0xAD, 0xE9, 0x9F, 0xB3, 0xE9, 0xA0,
++		0x81, 0xE9, 0xA2, 0xA8, 0xE9, 0xA3, 0x9B, 0xE9,
++		0xA3, 0x9F, 0xE9, 0xA6, 0x96, 0xE9, 0xA6, 0x99,
++		0xE9, 0xA6, 0xAC, 0xE9, 0xAA, 0xA8, 0xE9, 0xAB,
++		0x98, 0xE9, 0xAB, 0x9F, 0xE9, 0xAC, 0xA5, 0xE9,
++		0xAC, 0xAF, 0xE9, 0xAC, 0xB2, 0xE9, 0xAC, 0xBC,
++		0xE9, 0xAD, 0x9A, 0xE9, 0xB3, 0xA5, 0xE9, 0xB9,
++		0xB5, 0xE9, 0xB9, 0xBF, 0xE9, 0xBA, 0xA5, 0xE9,
++		0xBA, 0xBB, 0xE9, 0xBB, 0x83, 0xE9, 0xBB, 0x8D,
++		0xE9, 0xBB, 0x91, 0xE9, 0xBB, 0xB9, 0xE9, 0xBB,
++		0xBD, 0xE9, 0xBC, 0x8E, 0xE9, 0xBC, 0x93, 0xE9,
++		0xBC, 0xA0, 0xE9, 0xBC, 0xBB, 0xE9, 0xBD, 0x8A,
++		0xE9, 0xBD, 0x92, 0xE9, 0xBE, 0x8D, 0xE9, 0xBE,
++		0x9C, 0xE9, 0xBE, 0xA0, 0x20, 0xE3, 0x80, 0x92,
++		0xE5, 0x8D, 0x81, 0xE5, 0x8D, 0x84, 0xE5, 0x8D,
++		0x85, 0xF6, 0xE3, 0x81, 0x8B, 0xE3, 0x82, 0x99,
++		0xF6, 0xE3, 0x81, 0x8D, 0xE3, 0x82, 0x99, 0xF6,
++		0xE3, 0x81, 0x8F, 0xE3, 0x82, 0x99, 0xF6, 0xE3,
++		0x81, 0x91, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81,
++		0x93, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0x95,
++		0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0x97, 0xE3,
++		0x82, 0x99, 0xF6, 0xE3, 0x81, 0x99, 0xE3, 0x82,
++		0x99, 0xF6, 0xE3, 0x81, 0x9B, 0xE3, 0x82, 0x99,
++		0xF6, 0xE3, 0x81, 0x9D, 0xE3, 0x82, 0x99, 0xF6,
++		0xE3, 0x81, 0x9F, 0xE3, 0x82, 0x99, 0xF6, 0xE3,
++		0x81, 0xA1, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81,
++		0xA4, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xA6,
++		0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xA8, 0xE3,
++		0x82, 0x99, 0xF6, 0xE3, 0x81, 0xAF, 0xE3, 0x82,
++		0x99, 0xF6, 0xE3, 0x81, 0xAF, 0xE3, 0x82, 0x9A,
++		0xF6, 0xE3, 0x81, 0xB2, 0xE3, 0x82, 0x99, 0xF6,
++		0xE3, 0x81, 0xB2, 0xE3, 0x82, 0x9A, 0xF6, 0xE3,
++		0x81, 0xB5, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81,
++		0xB5, 0xE3, 0x82, 0x9A, 0xF6, 0xE3, 0x81, 0xB8,
++		0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xB8, 0xE3,
++		0x82, 0x9A, 0xF6, 0xE3, 0x81, 0xBB, 0xE3, 0x82,
++		0x99, 0xF6, 0xE3, 0x81, 0xBB, 0xE3, 0x82, 0x9A,
++		0xF6, 0xE3, 0x81, 0x86, 0xE3, 0x82, 0x99, 0x20,
++		0xE3, 0x82, 0x99, 0x20, 0xE3, 0x82, 0x9A, 0xF6,
++		0xE3, 0x82, 0x9D, 0xE3, 0x82, 0x99, 0xE3, 0x82,
++		0x88, 0xE3, 0x82, 0x8A, 0xF6, 0xE3, 0x82, 0xAB,
++		0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x82, 0xAD, 0xE3,
++		0x82, 0x99, 0xF6, 0xE3, 0x82, 0xAF, 0xE3, 0x82,
++		0x99, 0xF6, 0xE3, 0x82, 0xB1, 0xE3, 0x82, 0x99,
++		0xF6, 0xE3, 0x82, 0xB3, 0xE3, 0x82, 0x99, 0xF6,
++		0xE3, 0x82, 0xB5, 0xE3, 0x82, 0x99, 0xF6, 0xE3,
++		0x82, 0xB7, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x82,
++		0xB9, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x82, 0xBB,
++		0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x82, 0xBD, 0xE3,
++		0x82, 0x99, 0xF6, 0xE3, 0x82, 0xBF, 0xE3, 0x82,
++		0x99, 0xF6, 0xE3, 0x83, 0x81, 0xE3, 0x82, 0x99,
++		0xF6, 0xE3, 0x83, 0x84, 0xE3, 0x82, 0x99, 0xF6,
++		0xE3, 0x83, 0x86, 0xE3, 0x82, 0x99, 0xF6, 0xE3,
++		0x83, 0x88, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83,
++		0x8F, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83, 0x8F,
++		0xE3, 0x82, 0x9A, 0xF6, 0xE3, 0x83, 0x92, 0xE3,
++		0x82, 0x99, 0xF6, 0xE3, 0x83, 0x92, 0xE3, 0x82,
++		0x9A, 0xF6, 0xE3, 0x83, 0x95, 0xE3, 0x82, 0x99,
++		0xF6, 0xE3, 0x83, 0x95, 0xE3, 0x82, 0x9A, 0xF6,
++		0xE3, 0x83, 0x98, 0xE3, 0x82, 0x99, 0xF6, 0xE3,
++		0x83, 0x98, 0xE3, 0x82, 0x9A, 0xF6, 0xE3, 0x83,
++		0x9B, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83, 0x9B,
++		0xE3, 0x82, 0x9A, 0xF6, 0xE3, 0x82, 0xA6, 0xE3,
++		0x82, 0x99, 0xF6, 0xE3, 0x83, 0xAF, 0xE3, 0x82,
++		0x99, 0xF6, 0xE3, 0x83, 0xB0, 0xE3, 0x82, 0x99,
++		0xF6, 0xE3, 0x83, 0xB1, 0xE3, 0x82, 0x99, 0xF6,
++		0xE3, 0x83, 0xB2, 0xE3, 0x82, 0x99, 0xF6, 0xE3,
++		0x83, 0xBD, 0xE3, 0x82, 0x99, 0xE3, 0x82, 0xB3,
++		0xE3, 0x83, 0x88, 0xE1, 0x84, 0x80, 0xE1, 0x84,
++		0x81, 0xE1, 0x86, 0xAA, 0xE1, 0x84, 0x82, 0xE1,
++		0x86, 0xAC, 0xE1, 0x86, 0xAD, 0xE1, 0x84, 0x83,
++		0xE1, 0x84, 0x84, 0xE1, 0x84, 0x85, 0xE1, 0x86,
++		0xB0, 0xE1, 0x86, 0xB1, 0xE1, 0x86, 0xB2, 0xE1,
++		0x86, 0xB3, 0xE1, 0x86, 0xB4, 0xE1, 0x86, 0xB5,
++		0xE1, 0x84, 0x9A, 0xE1, 0x84, 0x86, 0xE1, 0x84,
++		0x87, 0xE1, 0x84, 0x88, 0xE1, 0x84, 0xA1, 0xE1,
++		0x84, 0x89, 0xE1, 0x84, 0x8A, 0xE1, 0x84, 0x8B,
++		0xE1, 0x84, 0x8C, 0xE1, 0x84, 0x8D, 0xE1, 0x84,
++		0x8E, 0xE1, 0x84, 0x8F, 0xE1, 0x84, 0x90, 0xE1,
++		0x84, 0x91, 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1,
++		0xE1, 0x85, 0xA2, 0xE1, 0x85, 0xA3, 0xE1, 0x85,
++		0xA4, 0xE1, 0x85, 0xA5, 0xE1, 0x85, 0xA6, 0xE1,
++		0x85, 0xA7, 0xE1, 0x85, 0xA8, 0xE1, 0x85, 0xA9,
++		0xE1, 0x85, 0xAA, 0xE1, 0x85, 0xAB, 0xE1, 0x85,
++		0xAC, 0xE1, 0x85, 0xAD, 0xE1, 0x85, 0xAE, 0xE1,
++		0x85, 0xAF, 0xE1, 0x85, 0xB0, 0xE1, 0x85, 0xB1,
++		0xE1, 0x85, 0xB2, 0xE1, 0x85, 0xB3, 0xE1, 0x85,
++		0xB4, 0xE1, 0x85, 0xB5, 0xE1, 0x85, 0xA0, 0xE1,
++		0x84, 0x94, 0xE1, 0x84, 0x95, 0xE1, 0x87, 0x87,
++		0xE1, 0x87, 0x88, 0xE1, 0x87, 0x8C, 0xE1, 0x87,
++		0x8E, 0xE1, 0x87, 0x93, 0xE1, 0x87, 0x97, 0xE1,
++		0x87, 0x99, 0xE1, 0x84, 0x9C, 0xE1, 0x87, 0x9D,
++		0xE1, 0x87, 0x9F, 0xE1, 0x84, 0x9D, 0xE1, 0x84,
++		0x9E, 0xE1, 0x84, 0xA0, 0xE1, 0x84, 0xA2, 0xE1,
++		0x84, 0xA3, 0xE1, 0x84, 0xA7, 0xE1, 0x84, 0xA9,
++		0xE1, 0x84, 0xAB, 0xE1, 0x84, 0xAC, 0xE1, 0x84,
++		0xAD, 0xE1, 0x84, 0xAE, 0xE1, 0x84, 0xAF, 0xE1,
++		0x84, 0xB2, 0xE1, 0x84, 0xB6, 0xE1, 0x85, 0x80,
++		0xE1, 0x85, 0x87, 0xE1, 0x85, 0x8C, 0xE1, 0x87,
++		0xB1, 0xE1, 0x87, 0xB2, 0xE1, 0x85, 0x97, 0xE1,
++		0x85, 0x98, 0xE1, 0x85, 0x99, 0xE1, 0x86, 0x84,
++		0xE1, 0x86, 0x85, 0xE1, 0x86, 0x88, 0xE1, 0x86,
++		0x91, 0xE1, 0x86, 0x92, 0xE1, 0x86, 0x94, 0xE1,
++		0x86, 0x9E, 0xE1, 0x86, 0xA1, 0xE4, 0xB8, 0x80,
++		0xE4, 0xBA, 0x8C, 0xE4, 0xB8, 0x89, 0xE5, 0x9B,
++		0x9B, 0xE4, 0xB8, 0x8A, 0xE4, 0xB8, 0xAD, 0xE4,
++		0xB8, 0x8B, 0xE7, 0x94, 0xB2, 0xE4, 0xB9, 0x99,
++		0xE4, 0xB8, 0x99, 0xE4, 0xB8, 0x81, 0xE5, 0xA4,
++		0xA9, 0xE5, 0x9C, 0xB0, 0xE4, 0xBA, 0xBA, 0x28,
++		0xE1, 0x84, 0x80, 0x29, 0x28, 0xE1, 0x84, 0x82,
++		0x29, 0x28, 0xE1, 0x84, 0x83, 0x29, 0x28, 0xE1,
++		0x84, 0x85, 0x29, 0x28, 0xE1, 0x84, 0x86, 0x29,
++		0x28, 0xE1, 0x84, 0x87, 0x29, 0x28, 0xE1, 0x84,
++		0x89, 0x29, 0x28, 0xE1, 0x84, 0x8B, 0x29, 0x28,
++		0xE1, 0x84, 0x8C, 0x29, 0x28, 0xE1, 0x84, 0x8E,
++		0x29, 0x28, 0xE1, 0x84, 0x8F, 0x29, 0x28, 0xE1,
++		0x84, 0x90, 0x29, 0x28, 0xE1, 0x84, 0x91, 0x29,
++		0x28, 0xE1, 0x84, 0x92, 0x29, 0x28, 0xE1, 0x84,
++		0x80, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84,
++		0x82, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84,
++		0x83, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84,
++		0x85, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84,
++		0x86, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84,
++		0x87, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84,
++		0x89, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84,
++		0x8B, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84,
++		0x8C, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84,
++		0x8E, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84,
++		0x8F, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84,
++		0x90, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84,
++		0x91, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84,
++		0x92, 0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84,
++		0x8C, 0xE1, 0x85, 0xAE, 0x29, 0x28, 0xE4, 0xB8,
++		0x80, 0x29, 0x28, 0xE4, 0xBA, 0x8C, 0x29, 0x28,
++		0xE4, 0xB8, 0x89, 0x29, 0x28, 0xE5, 0x9B, 0x9B,
++		0x29, 0x28, 0xE4, 0xBA, 0x94, 0x29, 0x28, 0xE5,
++		0x85, 0xAD, 0x29, 0x28, 0xE4, 0xB8, 0x83, 0x29,
++		0x28, 0xE5, 0x85, 0xAB, 0x29, 0x28, 0xE4, 0xB9,
++		0x9D, 0x29, 0x28, 0xE5, 0x8D, 0x81, 0x29, 0x28,
++		0xE6, 0x9C, 0x88, 0x29, 0x28, 0xE7, 0x81, 0xAB,
++		0x29, 0x28, 0xE6, 0xB0, 0xB4, 0x29, 0x28, 0xE6,
++		0x9C, 0xA8, 0x29, 0x28, 0xE9, 0x87, 0x91, 0x29,
++		0x28, 0xE5, 0x9C, 0x9F, 0x29, 0x28, 0xE6, 0x97,
++		0xA5, 0x29, 0x28, 0xE6, 0xA0, 0xAA, 0x29, 0x28,
++		0xE6, 0x9C, 0x89, 0x29, 0x28, 0xE7, 0xA4, 0xBE,
++		0x29, 0x28, 0xE5, 0x90, 0x8D, 0x29, 0x28, 0xE7,
++		0x89, 0xB9, 0x29, 0x28, 0xE8, 0xB2, 0xA1, 0x29,
++		0x28, 0xE7, 0xA5, 0x9D, 0x29, 0x28, 0xE5, 0x8A,
++		0xB4, 0x29, 0x28, 0xE4, 0xBB, 0xA3, 0x29, 0x28,
++		0xE5, 0x91, 0xBC, 0x29, 0x28, 0xE5, 0xAD, 0xA6,
++		0x29, 0x28, 0xE7, 0x9B, 0xA3, 0x29, 0x28, 0xE4,
++		0xBC, 0x81, 0x29, 0x28, 0xE8, 0xB3, 0x87, 0x29,
++		0x28, 0xE5, 0x8D, 0x94, 0x29, 0x28, 0xE7, 0xA5,
++		0xAD, 0x29, 0x28, 0xE4, 0xBC, 0x91, 0x29, 0x28,
++		0xE8, 0x87, 0xAA, 0x29, 0x28, 0xE8, 0x87, 0xB3,
++		0x29, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, 0x32,
++		0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32,
++		0x38, 0x32, 0x39, 0x33, 0x30, 0x33, 0x31, 0x33,
++		0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, 0xE1,
++		0x84, 0x80, 0xE1, 0x84, 0x82, 0xE1, 0x84, 0x83,
++		0xE1, 0x84, 0x85, 0xE1, 0x84, 0x86, 0xE1, 0x84,
++		0x87, 0xE1, 0x84, 0x89, 0xE1, 0x84, 0x8B, 0xE1,
++		0x84, 0x8C, 0xE1, 0x84, 0x8E, 0xE1, 0x84, 0x8F,
++		0xE1, 0x84, 0x90, 0xE1, 0x84, 0x91, 0xE1, 0x84,
++		0x92, 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xA1, 0xE1,
++		0x84, 0x82, 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x83,
++		0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x85, 0xE1, 0x85,
++		0xA1, 0xE1, 0x84, 0x86, 0xE1, 0x85, 0xA1, 0xE1,
++		0x84, 0x87, 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x89,
++		0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x8B, 0xE1, 0x85,
++		0xA1, 0xE1, 0x84, 0x8C, 0xE1, 0x85, 0xA1, 0xE1,
++		0x84, 0x8E, 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x8F,
++		0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x90, 0xE1, 0x85,
++		0xA1, 0xE1, 0x84, 0x91, 0xE1, 0x85, 0xA1, 0xE1,
++		0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE4, 0xB8, 0x80,
++		0xE4, 0xBA, 0x8C, 0xE4, 0xB8, 0x89, 0xE5, 0x9B,
++		0x9B, 0xE4, 0xBA, 0x94, 0xE5, 0x85, 0xAD, 0xE4,
++		0xB8, 0x83, 0xE5, 0x85, 0xAB, 0xE4, 0xB9, 0x9D,
++		0xE5, 0x8D, 0x81, 0xE6, 0x9C, 0x88, 0xE7, 0x81,
++		0xAB, 0xE6, 0xB0, 0xB4, 0xE6, 0x9C, 0xA8, 0xE9,
++		0x87, 0x91, 0xE5, 0x9C, 0x9F, 0xE6, 0x97, 0xA5,
++		0xE6, 0xA0, 0xAA, 0xE6, 0x9C, 0x89, 0xE7, 0xA4,
++		0xBE, 0xE5, 0x90, 0x8D, 0xE7, 0x89, 0xB9, 0xE8,
++		0xB2, 0xA1, 0xE7, 0xA5, 0x9D, 0xE5, 0x8A, 0xB4,
++		0xE7, 0xA7, 0x98, 0xE7, 0x94, 0xB7, 0xE5, 0xA5,
++		0xB3, 0xE9, 0x81, 0xA9, 0xE5, 0x84, 0xAA, 0xE5,
++		0x8D, 0xB0, 0xE6, 0xB3, 0xA8, 0xE9, 0xA0, 0x85,
++		0xE4, 0xBC, 0x91, 0xE5, 0x86, 0x99, 0xE6, 0xAD,
++		0xA3, 0xE4, 0xB8, 0x8A, 0xE4, 0xB8, 0xAD, 0xE4,
++		0xB8, 0x8B, 0xE5, 0xB7, 0xA6, 0xE5, 0x8F, 0xB3,
++		0xE5, 0x8C, 0xBB, 0xE5, 0xAE, 0x97, 0xE5, 0xAD,
++		0xA6, 0xE7, 0x9B, 0xA3, 0xE4, 0xBC, 0x81, 0xE8,
++		0xB3, 0x87, 0xE5, 0x8D, 0x94, 0xE5, 0xA4, 0x9C,
++		0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39,
++		0x34, 0x30, 0x34, 0x31, 0x34, 0x32, 0x34, 0x33,
++		0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37,
++		0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x31, 0xE6,
++		0x9C, 0x88, 0x32, 0xE6, 0x9C, 0x88, 0x33, 0xE6,
++		0x9C, 0x88, 0x34, 0xE6, 0x9C, 0x88, 0x35, 0xE6,
++		0x9C, 0x88, 0x36, 0xE6, 0x9C, 0x88, 0x37, 0xE6,
++		0x9C, 0x88, 0x38, 0xE6, 0x9C, 0x88, 0x39, 0xE6,
++		0x9C, 0x88, 0x31, 0x30, 0xE6, 0x9C, 0x88, 0x31,
++		0x31, 0xE6, 0x9C, 0x88, 0x31, 0x32, 0xE6, 0x9C,
++		0x88, 0xE3, 0x82, 0xA2, 0xE3, 0x82, 0xA4, 0xE3,
++		0x82, 0xA6, 0xE3, 0x82, 0xA8, 0xE3, 0x82, 0xAA,
++		0xE3, 0x82, 0xAB, 0xE3, 0x82, 0xAD, 0xE3, 0x82,
++		0xAF, 0xE3, 0x82, 0xB1, 0xE3, 0x82, 0xB3, 0xE3,
++		0x82, 0xB5, 0xE3, 0x82, 0xB7, 0xE3, 0x82, 0xB9,
++		0xE3, 0x82, 0xBB, 0xE3, 0x82, 0xBD, 0xE3, 0x82,
++		0xBF, 0xE3, 0x83, 0x81, 0xE3, 0x83, 0x84, 0xE3,
++		0x83, 0x86, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0x8A,
++		0xE3, 0x83, 0x8B, 0xE3, 0x83, 0x8C, 0xE3, 0x83,
++		0x8D, 0xE3, 0x83, 0x8E, 0xE3, 0x83, 0x8F, 0xE3,
++		0x83, 0x92, 0xE3, 0x83, 0x95, 0xE3, 0x83, 0x98,
++		0xE3, 0x83, 0x9B, 0xE3, 0x83, 0x9E, 0xE3, 0x83,
++		0x9F, 0xE3, 0x83, 0xA0, 0xE3, 0x83, 0xA1, 0xE3,
++		0x83, 0xA2, 0xE3, 0x83, 0xA4, 0xE3, 0x83, 0xA6,
++		0xE3, 0x83, 0xA8, 0xE3, 0x83, 0xA9, 0xE3, 0x83,
++		0xAA, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0xAC, 0xE3,
++		0x83, 0xAD, 0xE3, 0x83, 0xAF, 0xE3, 0x83, 0xB0,
++		0xE3, 0x83, 0xB1, 0xE3, 0x83, 0xB2, 0xE3, 0x82,
++		0xA2, 0xE3, 0x83, 0x8F, 0xE3, 0x82, 0x9A, 0xE3,
++		0x83, 0xBC, 0xE3, 0x83, 0x88, 0xE3, 0x82, 0xA2,
++		0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x95, 0xE3, 0x82,
++		0xA1, 0xE3, 0x82, 0xA2, 0xE3, 0x83, 0xB3, 0xE3,
++		0x83, 0x98, 0xE3, 0x82, 0x9A, 0xE3, 0x82, 0xA2,
++		0xE3, 0x82, 0xA2, 0xE3, 0x83, 0xBC, 0xE3, 0x83,
++		0xAB, 0xE3, 0x82, 0xA4, 0xE3, 0x83, 0x8B, 0xE3,
++		0x83, 0xB3, 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0x99,
++		0xE3, 0x82, 0xA4, 0xE3, 0x83, 0xB3, 0xE3, 0x83,
++		0x81, 0xE3, 0x82, 0xA6, 0xE3, 0x82, 0xA9, 0xE3,
++		0x83, 0xB3, 0xE3, 0x82, 0xA8, 0xE3, 0x82, 0xB9,
++		0xE3, 0x82, 0xAF, 0xE3, 0x83, 0xBC, 0xE3, 0x83,
++		0x88, 0xE3, 0x82, 0x99, 0xE3, 0x82, 0xA8, 0xE3,
++		0x83, 0xBC, 0xE3, 0x82, 0xAB, 0xE3, 0x83, 0xBC,
++		0xE3, 0x82, 0xAA, 0xE3, 0x83, 0xB3, 0xE3, 0x82,
++		0xB9, 0xE3, 0x82, 0xAA, 0xE3, 0x83, 0xBC, 0xE3,
++		0x83, 0xA0, 0xE3, 0x82, 0xAB, 0xE3, 0x82, 0xA4,
++		0xE3, 0x83, 0xAA, 0xE3, 0x82, 0xAB, 0xE3, 0x83,
++		0xA9, 0xE3, 0x83, 0x83, 0xE3, 0x83, 0x88, 0xE3,
++		0x82, 0xAB, 0xE3, 0x83, 0xAD, 0xE3, 0x83, 0xAA,
++		0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xAB, 0xE3, 0x82,
++		0x99, 0xE3, 0x83, 0xAD, 0xE3, 0x83, 0xB3, 0xE3,
++		0x82, 0xAB, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xB3,
++		0xE3, 0x83, 0x9E, 0xE3, 0x82, 0xAD, 0xE3, 0x82,
++		0x99, 0xE3, 0x82, 0xAB, 0xE3, 0x82, 0x99, 0xE3,
++		0x82, 0xAD, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0x8B,
++		0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xAD, 0xE3, 0x83,
++		0xA5, 0xE3, 0x83, 0xAA, 0xE3, 0x83, 0xBC, 0xE3,
++		0x82, 0xAD, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xAB,
++		0xE3, 0x82, 0xBF, 0xE3, 0x82, 0x99, 0xE3, 0x83,
++		0xBC, 0xE3, 0x82, 0xAD, 0xE3, 0x83, 0xAD, 0xE3,
++		0x82, 0xAD, 0xE3, 0x83, 0xAD, 0xE3, 0x82, 0xAF,
++		0xE3, 0x82, 0x99, 0xE3, 0x83, 0xA9, 0xE3, 0x83,
++		0xA0, 0xE3, 0x82, 0xAD, 0xE3, 0x83, 0xAD, 0xE3,
++		0x83, 0xA1, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0x88,
++		0xE3, 0x83, 0xAB, 0xE3, 0x82, 0xAD, 0xE3, 0x83,
++		0xAD, 0xE3, 0x83, 0xAF, 0xE3, 0x83, 0x83, 0xE3,
++		0x83, 0x88, 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0x99,
++		0xE3, 0x83, 0xA9, 0xE3, 0x83, 0xA0, 0xE3, 0x82,
++		0xAF, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xA9, 0xE3,
++		0x83, 0xA0, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0xB3,
++		0xE3, 0x82, 0xAF, 0xE3, 0x83, 0xAB, 0xE3, 0x82,
++		0xBB, 0xE3, 0x82, 0x99, 0xE3, 0x82, 0xA4, 0xE3,
++		0x83, 0xAD, 0xE3, 0x82, 0xAF, 0xE3, 0x83, 0xAD,
++		0xE3, 0x83, 0xBC, 0xE3, 0x83, 0x8D, 0xE3, 0x82,
++		0xB1, 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xB9, 0xE3,
++		0x82, 0xB3, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x8A,
++		0xE3, 0x82, 0xB3, 0xE3, 0x83, 0xBC, 0xE3, 0x83,
++		0x9B, 0xE3, 0x82, 0x9A, 0xE3, 0x82, 0xB5, 0xE3,
++		0x82, 0xA4, 0xE3, 0x82, 0xAF, 0xE3, 0x83, 0xAB,
++		0xE3, 0x82, 0xB5, 0xE3, 0x83, 0xB3, 0xE3, 0x83,
++		0x81, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0xA0, 0xE3,
++		0x82, 0xB7, 0xE3, 0x83, 0xAA, 0xE3, 0x83, 0xB3,
++		0xE3, 0x82, 0xAF, 0xE3, 0x82, 0x99, 0xE3, 0x82,
++		0xBB, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x81, 0xE3,
++		0x82, 0xBB, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x88,
++		0xE3, 0x82, 0xBF, 0xE3, 0x82, 0x99, 0xE3, 0x83,
++		0xBC, 0xE3, 0x82, 0xB9, 0xE3, 0x83, 0x86, 0xE3,
++		0x82, 0x99, 0xE3, 0x82, 0xB7, 0xE3, 0x83, 0x88,
++		0xE3, 0x82, 0x99, 0xE3, 0x83, 0xAB, 0xE3, 0x83,
++		0x88, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x8A, 0xE3,
++		0x83, 0x8E, 0xE3, 0x83, 0x8E, 0xE3, 0x83, 0x83,
++		0xE3, 0x83, 0x88, 0xE3, 0x83, 0x8F, 0xE3, 0x82,
++		0xA4, 0xE3, 0x83, 0x84, 0xE3, 0x83, 0x8F, 0xE3,
++		0x82, 0x9A, 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xBB,
++		0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x88, 0xE3, 0x83,
++		0x8F, 0xE3, 0x82, 0x9A, 0xE3, 0x83, 0xBC, 0xE3,
++		0x83, 0x84, 0xE3, 0x83, 0x8F, 0xE3, 0x82, 0x99,
++		0xE3, 0x83, 0xBC, 0xE3, 0x83, 0xAC, 0xE3, 0x83,
++		0xAB, 0xE3, 0x83, 0x92, 0xE3, 0x82, 0x9A, 0xE3,
++		0x82, 0xA2, 0xE3, 0x82, 0xB9, 0xE3, 0x83, 0x88,
++		0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x92, 0xE3, 0x82,
++		0x9A, 0xE3, 0x82, 0xAF, 0xE3, 0x83, 0xAB, 0xE3,
++		0x83, 0x92, 0xE3, 0x82, 0x9A, 0xE3, 0x82, 0xB3,
++		0xE3, 0x83, 0x92, 0xE3, 0x82, 0x99, 0xE3, 0x83,
++		0xAB, 0xE3, 0x83, 0x95, 0xE3, 0x82, 0xA1, 0xE3,
++		0x83, 0xA9, 0xE3, 0x83, 0x83, 0xE3, 0x83, 0x88,
++		0xE3, 0x82, 0x99, 0xE3, 0x83, 0x95, 0xE3, 0x82,
++		0xA3, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0x88, 0xE3,
++		0x83, 0x95, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0x83,
++		0xE3, 0x82, 0xB7, 0xE3, 0x82, 0xA7, 0xE3, 0x83,
++		0xAB, 0xE3, 0x83, 0x95, 0xE3, 0x83, 0xA9, 0xE3,
++		0x83, 0xB3, 0xE3, 0x83, 0x98, 0xE3, 0x82, 0xAF,
++		0xE3, 0x82, 0xBF, 0xE3, 0x83, 0xBC, 0xE3, 0x83,
++		0xAB, 0xE3, 0x83, 0x98, 0xE3, 0x82, 0x9A, 0xE3,
++		0x82, 0xBD, 0xE3, 0x83, 0x98, 0xE3, 0x82, 0x9A,
++		0xE3, 0x83, 0x8B, 0xE3, 0x83, 0x92, 0xE3, 0x83,
++		0x98, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x84, 0xE3,
++		0x83, 0x98, 0xE3, 0x82, 0x9A, 0xE3, 0x83, 0xB3,
++		0xE3, 0x82, 0xB9, 0xE3, 0x83, 0x98, 0xE3, 0x82,
++		0x9A, 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xB7, 0xE3,
++		0x82, 0x99, 0xE3, 0x83, 0x98, 0xE3, 0x82, 0x99,
++		0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xBF, 0xE3, 0x83,
++		0x9B, 0xE3, 0x82, 0x9A, 0xE3, 0x82, 0xA4, 0xE3,
++		0x83, 0xB3, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0x9B,
++		0xE3, 0x82, 0x99, 0xE3, 0x83, 0xAB, 0xE3, 0x83,
++		0x88, 0xE3, 0x83, 0x9B, 0xE3, 0x83, 0xB3, 0xE3,
++		0x83, 0x9B, 0xE3, 0x82, 0x9A, 0xE3, 0x83, 0xB3,
++		0xE3, 0x83, 0x88, 0xE3, 0x82, 0x99, 0xE3, 0x83,
++		0x9B, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0xAB, 0xE3,
++		0x83, 0x9B, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0xB3,
++		0xE3, 0x83, 0x9E, 0xE3, 0x82, 0xA4, 0xE3, 0x82,
++		0xAF, 0xE3, 0x83, 0xAD, 0xE3, 0x83, 0x9E, 0xE3,
++		0x82, 0xA4, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x9E,
++		0xE3, 0x83, 0x83, 0xE3, 0x83, 0x8F, 0xE3, 0x83,
++		0x9E, 0xE3, 0x83, 0xAB, 0xE3, 0x82, 0xAF, 0xE3,
++		0x83, 0x9E, 0xE3, 0x83, 0xB3, 0xE3, 0x82, 0xB7,
++		0xE3, 0x83, 0xA7, 0xE3, 0x83, 0xB3, 0xE3, 0x83,
++		0x9F, 0xE3, 0x82, 0xAF, 0xE3, 0x83, 0xAD, 0xE3,
++		0x83, 0xB3, 0xE3, 0x83, 0x9F, 0xE3, 0x83, 0xAA,
++		0xE3, 0x83, 0x9F, 0xE3, 0x83, 0xAA, 0xE3, 0x83,
++		0x8F, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xBC, 0xE3,
++		0x83, 0xAB, 0xE3, 0x83, 0xA1, 0xE3, 0x82, 0xAB,
++		0xE3, 0x82, 0x99, 0xE3, 0x83, 0xA1, 0xE3, 0x82,
++		0xAB, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0x88, 0xE3,
++		0x83, 0xB3, 0xE3, 0x83, 0xA1, 0xE3, 0x83, 0xBC,
++		0xE3, 0x83, 0x88, 0xE3, 0x83, 0xAB, 0xE3, 0x83,
++		0xA4, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0x88, 0xE3,
++		0x82, 0x99, 0xE3, 0x83, 0xA4, 0xE3, 0x83, 0xBC,
++		0xE3, 0x83, 0xAB, 0xE3, 0x83, 0xA6, 0xE3, 0x82,
++		0xA2, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0xAA, 0xE3,
++		0x83, 0x83, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0xAB,
++		0xE3, 0x83, 0xAA, 0xE3, 0x83, 0xA9, 0xE3, 0x83,
++		0xAB, 0xE3, 0x83, 0x92, 0xE3, 0x82, 0x9A, 0xE3,
++		0x83, 0xBC, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0xBC,
++		0xE3, 0x83, 0x95, 0xE3, 0x82, 0x99, 0xE3, 0x83,
++		0xAB, 0xE3, 0x83, 0xAC, 0xE3, 0x83, 0xA0, 0xE3,
++		0x83, 0xAC, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x88,
++		0xE3, 0x82, 0xB1, 0xE3, 0x82, 0x99, 0xE3, 0x83,
++		0xB3, 0xE3, 0x83, 0xAF, 0xE3, 0x83, 0x83, 0xE3,
++		0x83, 0x88, 0x30, 0xE7, 0x82, 0xB9, 0x31, 0xE7,
++		0x82, 0xB9, 0x32, 0xE7, 0x82, 0xB9, 0x33, 0xE7,
++		0x82, 0xB9, 0x34, 0xE7, 0x82, 0xB9, 0x35, 0xE7,
++		0x82, 0xB9, 0x36, 0xE7, 0x82, 0xB9, 0x37, 0xE7,
++		0x82, 0xB9, 0x38, 0xE7, 0x82, 0xB9, 0x39, 0xE7,
++		0x82, 0xB9, 0x31, 0x30, 0xE7, 0x82, 0xB9, 0x31,
++		0x31, 0xE7, 0x82, 0xB9, 0x31, 0x32, 0xE7, 0x82,
++		0xB9, 0x31, 0x33, 0xE7, 0x82, 0xB9, 0x31, 0x34,
++		0xE7, 0x82, 0xB9, 0x31, 0x35, 0xE7, 0x82, 0xB9,
++		0x31, 0x36, 0xE7, 0x82, 0xB9, 0x31, 0x37, 0xE7,
++		0x82, 0xB9, 0x31, 0x38, 0xE7, 0x82, 0xB9, 0x31,
++		0x39, 0xE7, 0x82, 0xB9, 0x32, 0x30, 0xE7, 0x82,
++		0xB9, 0x32, 0x31, 0xE7, 0x82, 0xB9, 0x32, 0x32,
++		0xE7, 0x82, 0xB9, 0x32, 0x33, 0xE7, 0x82, 0xB9,
++		0x32, 0x34, 0xE7, 0x82, 0xB9, 0x68, 0x50, 0x61,
++		0x64, 0x61, 0x41, 0x55, 0x62, 0x61, 0x72, 0x6F,
++		0x56, 0x70, 0x63, 0xE5, 0xB9, 0xB3, 0xE6, 0x88,
++		0x90, 0xE6, 0x98, 0xAD, 0xE5, 0x92, 0x8C, 0xE5,
++		0xA4, 0xA7, 0xE6, 0xAD, 0xA3, 0xE6, 0x98, 0x8E,
++		0xE6, 0xB2, 0xBB, 0xE6, 0xA0, 0xAA, 0xE5, 0xBC,
++		0x8F, 0xE4, 0xBC, 0x9A, 0xE7, 0xA4, 0xBE, 0x70,
++		0x41, 0x6E, 0x41, 0xCE, 0xBC, 0x41, 0x6D, 0x41,
++		0x6B, 0x41, 0x4B, 0x42, 0x4D, 0x42, 0x47, 0x42,
++		0x63, 0x61, 0x6C, 0x6B, 0x63, 0x61, 0x6C, 0x70,
++		0x46, 0x6E, 0x46, 0xCE, 0xBC, 0x46, 0xCE, 0xBC,
++		0x67, 0x6D, 0x67, 0x6B, 0x67, 0x48, 0x7A, 0x6B,
++		0x48, 0x7A, 0x4D, 0x48, 0x7A, 0x47, 0x48, 0x7A,
++		0x54, 0x48, 0x7A, 0xCE, 0xBC, 0x6C, 0x6D, 0x6C,
++		0x64, 0x6C, 0x6B, 0x6C, 0x66, 0x6D, 0x6E, 0x6D,
++		0xCE, 0xBC, 0x6D, 0x6D, 0x6D, 0x63, 0x6D, 0x6B,
++		0x6D, 0x6D, 0x6D, 0x32, 0x63, 0x6D, 0x32, 0x6D,
++		0x32, 0x6B, 0x6D, 0x32, 0x6D, 0x6D, 0x33, 0x63,
++		0x6D, 0x33, 0x6D, 0x33, 0x6B, 0x6D, 0x33, 0x6D,
++		0xE2, 0x88, 0x95, 0x73, 0x6D, 0xE2, 0x88, 0x95,
++		0x73, 0x32, 0x50, 0x61, 0x6B, 0x50, 0x61, 0x4D,
++		0x50, 0x61, 0x47, 0x50, 0x61, 0x72, 0x61, 0x64,
++		0x72, 0x61, 0x64, 0xE2, 0x88, 0x95, 0x73, 0x72,
++		0x61, 0x64, 0xE2, 0x88, 0x95, 0x73, 0x32, 0x70,
++		0x73, 0x6E, 0x73, 0xCE, 0xBC, 0x73, 0x6D, 0x73,
++		0x70, 0x56, 0x6E, 0x56, 0xCE, 0xBC, 0x56, 0x6D,
++		0x56, 0x6B, 0x56, 0x4D, 0x56, 0x70, 0x57, 0x6E,
++		0x57, 0xCE, 0xBC, 0x57, 0x6D, 0x57, 0x6B, 0x57,
++		0x4D, 0x57, 0x6B, 0xCE, 0xA9, 0x4D, 0xCE, 0xA9,
++		0x61, 0x2E, 0x6D, 0x2E, 0x42, 0x71, 0x63, 0x63,
++		0x63, 0x64, 0x43, 0xE2, 0x88, 0x95, 0x6B, 0x67,
++		0x43, 0x6F, 0x2E, 0x64, 0x42, 0x47, 0x79, 0x68,
++		0x61, 0x48, 0x50, 0x69, 0x6E, 0x4B, 0x4B, 0x4B,
++		0x4D, 0x6B, 0x74, 0x6C, 0x6D, 0x6C, 0x6E, 0x6C,
++		0x6F, 0x67, 0x6C, 0x78, 0x6D, 0x62, 0x6D, 0x69,
++		0x6C, 0x6D, 0x6F, 0x6C, 0x50, 0x48, 0x70, 0x2E,
++		0x6D, 0x2E, 0x50, 0x50, 0x4D, 0x50, 0x52, 0x73,
++		0x72, 0x53, 0x76, 0x57, 0x62, 0x31, 0xE6, 0x97,
++		0xA5, 0x32, 0xE6, 0x97, 0xA5, 0x33, 0xE6, 0x97,
++		0xA5, 0x34, 0xE6, 0x97, 0xA5, 0x35, 0xE6, 0x97,
++		0xA5, 0x36, 0xE6, 0x97, 0xA5, 0x37, 0xE6, 0x97,
++		0xA5, 0x38, 0xE6, 0x97, 0xA5, 0x39, 0xE6, 0x97,
++		0xA5, 0x31, 0x30, 0xE6, 0x97, 0xA5, 0x31, 0x31,
++		0xE6, 0x97, 0xA5, 0x31, 0x32, 0xE6, 0x97, 0xA5,
++		0x31, 0x33, 0xE6, 0x97, 0xA5, 0x31, 0x34, 0xE6,
++		0x97, 0xA5, 0x31, 0x35, 0xE6, 0x97, 0xA5, 0x31,
++		0x36, 0xE6, 0x97, 0xA5, 0x31, 0x37, 0xE6, 0x97,
++		0xA5, 0x31, 0x38, 0xE6, 0x97, 0xA5, 0x31, 0x39,
++		0xE6, 0x97, 0xA5, 0x32, 0x30, 0xE6, 0x97, 0xA5,
++		0x32, 0x31, 0xE6, 0x97, 0xA5, 0x32, 0x32, 0xE6,
++		0x97, 0xA5, 0x32, 0x33, 0xE6, 0x97, 0xA5, 0x32,
++		0x34, 0xE6, 0x97, 0xA5, 0x32, 0x35, 0xE6, 0x97,
++		0xA5, 0x32, 0x36, 0xE6, 0x97, 0xA5, 0x32, 0x37,
++		0xE6, 0x97, 0xA5, 0x32, 0x38, 0xE6, 0x97, 0xA5,
++		0x32, 0x39, 0xE6, 0x97, 0xA5, 0x33, 0x30, 0xE6,
++		0x97, 0xA5, 0x33, 0x31, 0xE6, 0x97, 0xA5, 0xF6,
++		0xE8, 0xB1, 0x88, 0xF6, 0xE6, 0x9B, 0xB4, 0xF6,
++		0xE8, 0xBB, 0x8A, 0xF6, 0xE8, 0xB3, 0x88, 0xF6,
++		0xE6, 0xBB, 0x91, 0xF6, 0xE4, 0xB8, 0xB2, 0xF6,
++		0xE5, 0x8F, 0xA5, 0xF6, 0xE9, 0xBE, 0x9C, 0xF6,
++		0xE9, 0xBE, 0x9C, 0xF6, 0xE5, 0xA5, 0x91, 0xF6,
++		0xE9, 0x87, 0x91, 0xF6, 0xE5, 0x96, 0x87, 0xF6,
++		0xE5, 0xA5, 0x88, 0xF6, 0xE6, 0x87, 0xB6, 0xF6,
++		0xE7, 0x99, 0xA9, 0xF6, 0xE7, 0xBE, 0x85, 0xF6,
++		0xE8, 0x98, 0xBF, 0xF6, 0xE8, 0x9E, 0xBA, 0xF6,
++		0xE8, 0xA3, 0xB8, 0xF6, 0xE9, 0x82, 0x8F, 0xF6,
++		0xE6, 0xA8, 0x82, 0xF6, 0xE6, 0xB4, 0x9B, 0xF6,
++		0xE7, 0x83, 0x99, 0xF6, 0xE7, 0x8F, 0x9E, 0xF6,
++		0xE8, 0x90, 0xBD, 0xF6, 0xE9, 0x85, 0xAA, 0xF6,
++		0xE9, 0xA7, 0xB1, 0xF6, 0xE4, 0xBA, 0x82, 0xF6,
++		0xE5, 0x8D, 0xB5, 0xF6, 0xE6, 0xAC, 0x84, 0xF6,
++		0xE7, 0x88, 0x9B, 0xF6, 0xE8, 0x98, 0xAD, 0xF6,
++		0xE9, 0xB8, 0x9E, 0xF6, 0xE5, 0xB5, 0x90, 0xF6,
++		0xE6, 0xBF, 0xAB, 0xF6, 0xE8, 0x97, 0x8D, 0xF6,
++		0xE8, 0xA5, 0xA4, 0xF6, 0xE6, 0x8B, 0x89, 0xF6,
++		0xE8, 0x87, 0x98, 0xF6, 0xE8, 0xA0, 0x9F, 0xF6,
++		0xE5, 0xBB, 0x8A, 0xF6, 0xE6, 0x9C, 0x97, 0xF6,
++		0xE6, 0xB5, 0xAA, 0xF6, 0xE7, 0x8B, 0xBC, 0xF6,
++		0xE9, 0x83, 0x8E, 0xF6, 0xE4, 0xBE, 0x86, 0xF6,
++		0xE5, 0x86, 0xB7, 0xF6, 0xE5, 0x8B, 0x9E, 0xF6,
++		0xE6, 0x93, 0x84, 0xF6, 0xE6, 0xAB, 0x93, 0xF6,
++		0xE7, 0x88, 0x90, 0xF6, 0xE7, 0x9B, 0xA7, 0xF6,
++		0xE8, 0x80, 0x81, 0xF6, 0xE8, 0x98, 0x86, 0xF6,
++		0xE8, 0x99, 0x9C, 0xF6, 0xE8, 0xB7, 0xAF, 0xF6,
++		0xE9, 0x9C, 0xB2, 0xF6, 0xE9, 0xAD, 0xAF, 0xF6,
++		0xE9, 0xB7, 0xBA, 0xF6, 0xE7, 0xA2, 0x8C, 0xF6,
++		0xE7, 0xA5, 0xBF, 0xF6, 0xE7, 0xB6, 0xA0, 0xF6,
++		0xE8, 0x8F, 0x89, 0xF6, 0xE9, 0x8C, 0x84, 0xF6,
++		0xE9, 0xB9, 0xBF, 0xF6, 0xE8, 0xAB, 0x96, 0xF6,
++		0xE5, 0xA3, 0x9F, 0xF6, 0xE5, 0xBC, 0x84, 0xF6,
++		0xE7, 0xB1, 0xA0, 0xF6, 0xE8, 0x81, 0xBE, 0xF6,
++		0xE7, 0x89, 0xA2, 0xF6, 0xE7, 0xA3, 0x8A, 0xF6,
++		0xE8, 0xB3, 0x82, 0xF6, 0xE9, 0x9B, 0xB7, 0xF6,
++		0xE5, 0xA3, 0x98, 0xF6, 0xE5, 0xB1, 0xA2, 0xF6,
++		0xE6, 0xA8, 0x93, 0xF6, 0xE6, 0xB7, 0x9A, 0xF6,
++		0xE6, 0xBC, 0x8F, 0xF6, 0xE7, 0xB4, 0xAF, 0xF6,
++		0xE7, 0xB8, 0xB7, 0xF6, 0xE9, 0x99, 0x8B, 0xF6,
++		0xE5, 0x8B, 0x92, 0xF6, 0xE8, 0x82, 0x8B, 0xF6,
++		0xE5, 0x87, 0x9C, 0xF6, 0xE5, 0x87, 0x8C, 0xF6,
++		0xE7, 0xA8, 0x9C, 0xF6, 0xE7, 0xB6, 0xBE, 0xF6,
++		0xE8, 0x8F, 0xB1, 0xF6, 0xE9, 0x99, 0xB5, 0xF6,
++		0xE8, 0xAE, 0x80, 0xF6, 0xE6, 0x8B, 0x8F, 0xF6,
++		0xE6, 0xA8, 0x82, 0xF6, 0xE8, 0xAB, 0xBE, 0xF6,
++		0xE4, 0xB8, 0xB9, 0xF6, 0xE5, 0xAF, 0xA7, 0xF6,
++		0xE6, 0x80, 0x92, 0xF6, 0xE7, 0x8E, 0x87, 0xF6,
++		0xE7, 0x95, 0xB0, 0xF6, 0xE5, 0x8C, 0x97, 0xF6,
++		0xE7, 0xA3, 0xBB, 0xF6, 0xE4, 0xBE, 0xBF, 0xF6,
++		0xE5, 0xBE, 0xA9, 0xF6, 0xE4, 0xB8, 0x8D, 0xF6,
++		0xE6, 0xB3, 0x8C, 0xF6, 0xE6, 0x95, 0xB8, 0xF6,
++		0xE7, 0xB4, 0xA2, 0xF6, 0xE5, 0x8F, 0x83, 0xF6,
++		0xE5, 0xA1, 0x9E, 0xF6, 0xE7, 0x9C, 0x81, 0xF6,
++		0xE8, 0x91, 0x89, 0xF6, 0xE8, 0xAA, 0xAA, 0xF6,
++		0xE6, 0xAE, 0xBA, 0xF6, 0xE8, 0xBE, 0xB0, 0xF6,
++		0xE6, 0xB2, 0x88, 0xF6, 0xE6, 0x8B, 0xBE, 0xF6,
++		0xE8, 0x8B, 0xA5, 0xF6, 0xE6, 0x8E, 0xA0, 0xF6,
++		0xE7, 0x95, 0xA5, 0xF6, 0xE4, 0xBA, 0xAE, 0xF6,
++		0xE5, 0x85, 0xA9, 0xF6, 0xE5, 0x87, 0x89, 0xF6,
++		0xE6, 0xA2, 0x81, 0xF6, 0xE7, 0xB3, 0xA7, 0xF6,
++		0xE8, 0x89, 0xAF, 0xF6, 0xE8, 0xAB, 0x92, 0xF6,
++		0xE9, 0x87, 0x8F, 0xF6, 0xE5, 0x8B, 0xB5, 0xF6,
++		0xE5, 0x91, 0x82, 0xF6, 0xE5, 0xA5, 0xB3, 0xF6,
++		0xE5, 0xBB, 0xAC, 0xF6, 0xE6, 0x97, 0x85, 0xF6,
++		0xE6, 0xBF, 0xBE, 0xF6, 0xE7, 0xA4, 0xAA, 0xF6,
++		0xE9, 0x96, 0xAD, 0xF6, 0xE9, 0xA9, 0xAA, 0xF6,
++		0xE9, 0xBA, 0x97, 0xF6, 0xE9, 0xBB, 0x8E, 0xF6,
++		0xE5, 0x8A, 0x9B, 0xF6, 0xE6, 0x9B, 0x86, 0xF6,
++		0xE6, 0xAD, 0xB7, 0xF6, 0xE8, 0xBD, 0xA2, 0xF6,
++		0xE5, 0xB9, 0xB4, 0xF6, 0xE6, 0x86, 0x90, 0xF6,
++		0xE6, 0x88, 0x80, 0xF6, 0xE6, 0x92, 0x9A, 0xF6,
++		0xE6, 0xBC, 0xA3, 0xF6, 0xE7, 0x85, 0x89, 0xF6,
++		0xE7, 0x92, 0x89, 0xF6, 0xE7, 0xA7, 0x8A, 0xF6,
++		0xE7, 0xB7, 0xB4, 0xF6, 0xE8, 0x81, 0xAF, 0xF6,
++		0xE8, 0xBC, 0xA6, 0xF6, 0xE8, 0x93, 0xAE, 0xF6,
++		0xE9, 0x80, 0xA3, 0xF6, 0xE9, 0x8D, 0x8A, 0xF6,
++		0xE5, 0x88, 0x97, 0xF6, 0xE5, 0x8A, 0xA3, 0xF6,
++		0xE5, 0x92, 0xBD, 0xF6, 0xE7, 0x83, 0x88, 0xF6,
++		0xE8, 0xA3, 0x82, 0xF6, 0xE8, 0xAA, 0xAA, 0xF6,
++		0xE5, 0xBB, 0x89, 0xF6, 0xE5, 0xBF, 0xB5, 0xF6,
++		0xE6, 0x8D, 0xBB, 0xF6, 0xE6, 0xAE, 0xAE, 0xF6,
++		0xE7, 0xB0, 0xBE, 0xF6, 0xE7, 0x8D, 0xB5, 0xF6,
++		0xE4, 0xBB, 0xA4, 0xF6, 0xE5, 0x9B, 0xB9, 0xF6,
++		0xE5, 0xAF, 0xA7, 0xF6, 0xE5, 0xB6, 0xBA, 0xF6,
++		0xE6, 0x80, 0x9C, 0xF6, 0xE7, 0x8E, 0xB2, 0xF6,
++		0xE7, 0x91, 0xA9, 0xF6, 0xE7, 0xBE, 0x9A, 0xF6,
++		0xE8, 0x81, 0x86, 0xF6, 0xE9, 0x88, 0xB4, 0xF6,
++		0xE9, 0x9B, 0xB6, 0xF6, 0xE9, 0x9D, 0x88, 0xF6,
++		0xE9, 0xA0, 0x98, 0xF6, 0xE4, 0xBE, 0x8B, 0xF6,
++		0xE7, 0xA6, 0xAE, 0xF6, 0xE9, 0x86, 0xB4, 0xF6,
++		0xE9, 0x9A, 0xB8, 0xF6, 0xE6, 0x83, 0xA1, 0xF6,
++		0xE4, 0xBA, 0x86, 0xF6, 0xE5, 0x83, 0x9A, 0xF6,
++		0xE5, 0xAF, 0xAE, 0xF6, 0xE5, 0xB0, 0xBF, 0xF6,
++		0xE6, 0x96, 0x99, 0xF6, 0xE6, 0xA8, 0x82, 0xF6,
++		0xE7, 0x87, 0x8E, 0xF6, 0xE7, 0x99, 0x82, 0xF6,
++		0xE8, 0x93, 0xBC, 0xF6, 0xE9, 0x81, 0xBC, 0xF6,
++		0xE9, 0xBE, 0x8D, 0xF6, 0xE6, 0x9A, 0x88, 0xF6,
++		0xE9, 0x98, 0xAE, 0xF6, 0xE5, 0x8A, 0x89, 0xF6,
++		0xE6, 0x9D, 0xBB, 0xF6, 0xE6, 0x9F, 0xB3, 0xF6,
++		0xE6, 0xB5, 0x81, 0xF6, 0xE6, 0xBA, 0x9C, 0xF6,
++		0xE7, 0x90, 0x89, 0xF6, 0xE7, 0x95, 0x99, 0xF6,
++		0xE7, 0xA1, 0xAB, 0xF6, 0xE7, 0xB4, 0x90, 0xF6,
++		0xE9, 0xA1, 0x9E, 0xF6, 0xE5, 0x85, 0xAD, 0xF6,
++		0xE6, 0x88, 0xAE, 0xF6, 0xE9, 0x99, 0xB8, 0xF6,
++		0xE5, 0x80, 0xAB, 0xF6, 0xE5, 0xB4, 0x99, 0xF6,
++		0xE6, 0xB7, 0xAA, 0xF6, 0xE8, 0xBC, 0xAA, 0xF6,
++		0xE5, 0xBE, 0x8B, 0xF6, 0xE6, 0x85, 0x84, 0xF6,
++		0xE6, 0xA0, 0x97, 0xF6, 0xE7, 0x8E, 0x87, 0xF6,
++		0xE9, 0x9A, 0x86, 0xF6, 0xE5, 0x88, 0xA9, 0xF6,
++		0xE5, 0x90, 0x8F, 0xF6, 0xE5, 0xB1, 0xA5, 0xF6,
++		0xE6, 0x98, 0x93, 0xF6, 0xE6, 0x9D, 0x8E, 0xF6,
++		0xE6, 0xA2, 0xA8, 0xF6, 0xE6, 0xB3, 0xA5, 0xF6,
++		0xE7, 0x90, 0x86, 0xF6, 0xE7, 0x97, 0xA2, 0xF6,
++		0xE7, 0xBD, 0xB9, 0xF6, 0xE8, 0xA3, 0x8F, 0xF6,
++		0xE8, 0xA3, 0xA1, 0xF6, 0xE9, 0x87, 0x8C, 0xF6,
++		0xE9, 0x9B, 0xA2, 0xF6, 0xE5, 0x8C, 0xBF, 0xF6,
++		0xE6, 0xBA, 0xBA, 0xF6, 0xE5, 0x90, 0x9D, 0xF6,
++		0xE7, 0x87, 0x90, 0xF6, 0xE7, 0x92, 0x98, 0xF6,
++		0xE8, 0x97, 0xBA, 0xF6, 0xE9, 0x9A, 0xA3, 0xF6,
++		0xE9, 0xB1, 0x97, 0xF6, 0xE9, 0xBA, 0x9F, 0xF6,
++		0xE6, 0x9E, 0x97, 0xF6, 0xE6, 0xB7, 0x8B, 0xF6,
++		0xE8, 0x87, 0xA8, 0xF6, 0xE7, 0xAB, 0x8B, 0xF6,
++		0xE7, 0xAC, 0xA0, 0xF6, 0xE7, 0xB2, 0x92, 0xF6,
++		0xE7, 0x8B, 0x80, 0xF6, 0xE7, 0x82, 0x99, 0xF6,
++		0xE8, 0xAD, 0x98, 0xF6, 0xE4, 0xBB, 0x80, 0xF6,
++		0xE8, 0x8C, 0xB6, 0xF6, 0xE5, 0x88, 0xBA, 0xF6,
++		0xE5, 0x88, 0x87, 0xF6, 0xE5, 0xBA, 0xA6, 0xF6,
++		0xE6, 0x8B, 0x93, 0xF6, 0xE7, 0xB3, 0x96, 0xF6,
++		0xE5, 0xAE, 0x85, 0xF6, 0xE6, 0xB4, 0x9E, 0xF6,
++		0xE6, 0x9A, 0xB4, 0xF6, 0xE8, 0xBC, 0xBB, 0xF6,
++		0xE8, 0xA1, 0x8C, 0xF6, 0xE9, 0x99, 0x8D, 0xF6,
++		0xE8, 0xA6, 0x8B, 0xF6, 0xE5, 0xBB, 0x93, 0xF6,
++		0xE5, 0x85, 0x80, 0xF6, 0xE5, 0x97, 0x80, 0xF6,
++		0xE5, 0xA1, 0x9A, 0xF6, 0xE6, 0x99, 0xB4, 0xF6,
++		0xE5, 0x87, 0x9E, 0xF6, 0xE7, 0x8C, 0xAA, 0xF6,
++		0xE7, 0x9B, 0x8A, 0xF6, 0xE7, 0xA4, 0xBC, 0xF6,
++		0xE7, 0xA5, 0x9E, 0xF6, 0xE7, 0xA5, 0xA5, 0xF6,
++		0xE7, 0xA6, 0x8F, 0xF6, 0xE9, 0x9D, 0x96, 0xF6,
++		0xE7, 0xB2, 0xBE, 0xF6, 0xE7, 0xBE, 0xBD, 0xF6,
++		0xE8, 0x98, 0x92, 0xF6, 0xE8, 0xAB, 0xB8, 0xF6,
++		0xE9, 0x80, 0xB8, 0xF6, 0xE9, 0x83, 0xBD, 0xF6,
++		0xE9, 0xA3, 0xAF, 0xF6, 0xE9, 0xA3, 0xBC, 0xF6,
++		0xE9, 0xA4, 0xA8, 0xF6, 0xE9, 0xB6, 0xB4, 0xF6,
++		0xE4, 0xBE, 0xAE, 0xF6, 0xE5, 0x83, 0xA7, 0xF6,
++		0xE5, 0x85, 0x8D, 0xF6, 0xE5, 0x8B, 0x89, 0xF6,
++		0xE5, 0x8B, 0xA4, 0xF6, 0xE5, 0x8D, 0x91, 0xF6,
++		0xE5, 0x96, 0x9D, 0xF6, 0xE5, 0x98, 0x86, 0xF6,
++		0xE5, 0x99, 0xA8, 0xF6, 0xE5, 0xA1, 0x80, 0xF6,
++		0xE5, 0xA2, 0xA8, 0xF6, 0xE5, 0xB1, 0xA4, 0xF6,
++		0xE5, 0xB1, 0xAE, 0xF6, 0xE6, 0x82, 0x94, 0xF6,
++		0xE6, 0x85, 0xA8, 0xF6, 0xE6, 0x86, 0x8E, 0xF6,
++		0xE6, 0x87, 0xB2, 0xF6, 0xE6, 0x95, 0x8F, 0xF6,
++		0xE6, 0x97, 0xA2, 0xF6, 0xE6, 0x9A, 0x91, 0xF6,
++		0xE6, 0xA2, 0x85, 0xF6, 0xE6, 0xB5, 0xB7, 0xF6,
++		0xE6, 0xB8, 0x9A, 0xF6, 0xE6, 0xBC, 0xA2, 0xF6,
++		0xE7, 0x85, 0xAE, 0xF6, 0xE7, 0x88, 0xAB, 0xF6,
++		0xE7, 0x90, 0xA2, 0xF6, 0xE7, 0xA2, 0x91, 0xF6,
++		0xE7, 0xA4, 0xBE, 0xF6, 0xE7, 0xA5, 0x89, 0xF6,
++		0xE7, 0xA5, 0x88, 0xF6, 0xE7, 0xA5, 0x90, 0xF6,
++		0xE7, 0xA5, 0x96, 0xF6, 0xE7, 0xA5, 0x9D, 0xF6,
++		0xE7, 0xA6, 0x8D, 0xF6, 0xE7, 0xA6, 0x8E, 0xF6,
++		0xE7, 0xA9, 0x80, 0xF6, 0xE7, 0xAA, 0x81, 0xF6,
++		0xE7, 0xAF, 0x80, 0xF6, 0xE7, 0xB7, 0xB4, 0xF6,
++		0xE7, 0xB8, 0x89, 0xF6, 0xE7, 0xB9, 0x81, 0xF6,
++		0xE7, 0xBD, 0xB2, 0xF6, 0xE8, 0x80, 0x85, 0xF6,
++		0xE8, 0x87, 0xAD, 0xF6, 0xE8, 0x89, 0xB9, 0xF6,
++		0xE8, 0x89, 0xB9, 0xF6, 0xE8, 0x91, 0x97, 0xF6,
++		0xE8, 0xA4, 0x90, 0xF6, 0xE8, 0xA6, 0x96, 0xF6,
++		0xE8, 0xAC, 0x81, 0xF6, 0xE8, 0xAC, 0xB9, 0xF6,
++		0xE8, 0xB3, 0x93, 0xF6, 0xE8, 0xB4, 0x88, 0xF6,
++		0xE8, 0xBE, 0xB6, 0xF6, 0xE9, 0x80, 0xB8, 0xF6,
++		0xE9, 0x9B, 0xA3, 0xF6, 0xE9, 0x9F, 0xBF, 0xF6,
++		0xE9, 0xA0, 0xBB, 0x66, 0x66, 0x66, 0x69, 0x66,
++		0x6C, 0x66, 0x66, 0x69, 0x66, 0x66, 0x6C, 0x73,
++		0x74, 0x73, 0x74, 0xD5, 0xB4, 0xD5, 0xB6, 0xD5,
++		0xB4, 0xD5, 0xA5, 0xD5, 0xB4, 0xD5, 0xAB, 0xD5,
++		0xBE, 0xD5, 0xB6, 0xD5, 0xB4, 0xD5, 0xAD, 0xF6,
++		0xD7, 0x99, 0xD6, 0xB4, 0xF6, 0xD7, 0xB2, 0xD6,
++		0xB7, 0xD7, 0xA2, 0xD7, 0x90, 0xD7, 0x93, 0xD7,
++		0x94, 0xD7, 0x9B, 0xD7, 0x9C, 0xD7, 0x9D, 0xD7,
++		0xA8, 0xD7, 0xAA, 0x2B, 0xF6, 0xD7, 0xA9, 0xD7,
++		0x81, 0xF6, 0xD7, 0xA9, 0xD7, 0x82, 0xF6, 0xD7,
++		0xA9, 0xD6, 0xBC, 0xD7, 0x81, 0xF6, 0xD7, 0xA9,
++		0xD6, 0xBC, 0xD7, 0x82, 0xF6, 0xD7, 0x90, 0xD6,
++		0xB7, 0xF6, 0xD7, 0x90, 0xD6, 0xB8, 0xF6, 0xD7,
++		0x90, 0xD6, 0xBC, 0xF6, 0xD7, 0x91, 0xD6, 0xBC,
++		0xF6, 0xD7, 0x92, 0xD6, 0xBC, 0xF6, 0xD7, 0x93,
++		0xD6, 0xBC, 0xF6, 0xD7, 0x94, 0xD6, 0xBC, 0xF6,
++		0xD7, 0x95, 0xD6, 0xBC, 0xF6, 0xD7, 0x96, 0xD6,
++		0xBC, 0xF6, 0xD7, 0x98, 0xD6, 0xBC, 0xF6, 0xD7,
++		0x99, 0xD6, 0xBC, 0xF6, 0xD7, 0x9A, 0xD6, 0xBC,
++		0xF6, 0xD7, 0x9B, 0xD6, 0xBC, 0xF6, 0xD7, 0x9C,
++		0xD6, 0xBC, 0xF6, 0xD7, 0x9E, 0xD6, 0xBC, 0xF6,
++		0xD7, 0xA0, 0xD6, 0xBC, 0xF6, 0xD7, 0xA1, 0xD6,
++		0xBC, 0xF6, 0xD7, 0xA3, 0xD6, 0xBC, 0xF6, 0xD7,
++		0xA4, 0xD6, 0xBC, 0xF6, 0xD7, 0xA6, 0xD6, 0xBC,
++		0xF6, 0xD7, 0xA7, 0xD6, 0xBC, 0xF6, 0xD7, 0xA8,
++		0xD6, 0xBC, 0xF6, 0xD7, 0xA9, 0xD6, 0xBC, 0xF6,
++		0xD7, 0xAA, 0xD6, 0xBC, 0xF6, 0xD7, 0x95, 0xD6,
++		0xB9, 0xF6, 0xD7, 0x91, 0xD6, 0xBF, 0xF6, 0xD7,
++		0x9B, 0xD6, 0xBF, 0xF6, 0xD7, 0xA4, 0xD6, 0xBF,
++		0xD7, 0x90, 0xD7, 0x9C, 0xD9, 0xB1, 0xD9, 0xB1,
++		0xD9, 0xBB, 0xD9, 0xBB, 0xD9, 0xBB, 0xD9, 0xBB,
++		0xD9, 0xBE, 0xD9, 0xBE, 0xD9, 0xBE, 0xD9, 0xBE,
++		0xDA, 0x80, 0xDA, 0x80, 0xDA, 0x80, 0xDA, 0x80,
++		0xD9, 0xBA, 0xD9, 0xBA, 0xD9, 0xBA, 0xD9, 0xBA,
++		0xD9, 0xBF, 0xD9, 0xBF, 0xD9, 0xBF, 0xD9, 0xBF,
++		0xD9, 0xB9, 0xD9, 0xB9, 0xD9, 0xB9, 0xD9, 0xB9,
++		0xDA, 0xA4, 0xDA, 0xA4, 0xDA, 0xA4, 0xDA, 0xA4,
++		0xDA, 0xA6, 0xDA, 0xA6, 0xDA, 0xA6, 0xDA, 0xA6,
++		0xDA, 0x84, 0xDA, 0x84, 0xDA, 0x84, 0xDA, 0x84,
++		0xDA, 0x83, 0xDA, 0x83, 0xDA, 0x83, 0xDA, 0x83,
++		0xDA, 0x86, 0xDA, 0x86, 0xDA, 0x86, 0xDA, 0x86,
++		0xDA, 0x87, 0xDA, 0x87, 0xDA, 0x87, 0xDA, 0x87,
++		0xDA, 0x8D, 0xDA, 0x8D, 0xDA, 0x8C, 0xDA, 0x8C,
++		0xDA, 0x8E, 0xDA, 0x8E, 0xDA, 0x88, 0xDA, 0x88,
++		0xDA, 0x98, 0xDA, 0x98, 0xDA, 0x91, 0xDA, 0x91,
++		0xDA, 0xA9, 0xDA, 0xA9, 0xDA, 0xA9, 0xDA, 0xA9,
++		0xDA, 0xAF, 0xDA, 0xAF, 0xDA, 0xAF, 0xDA, 0xAF,
++		0xDA, 0xB3, 0xDA, 0xB3, 0xDA, 0xB3, 0xDA, 0xB3,
++		0xDA, 0xB1, 0xDA, 0xB1, 0xDA, 0xB1, 0xDA, 0xB1,
++		0xDA, 0xBA, 0xDA, 0xBA, 0xDA, 0xBB, 0xDA, 0xBB,
++		0xDA, 0xBB, 0xDA, 0xBB, 0xDB, 0x95, 0xD9, 0x94,
++		0xDB, 0x95, 0xD9, 0x94, 0xDB, 0x81, 0xDB, 0x81,
++		0xDB, 0x81, 0xDB, 0x81, 0xDA, 0xBE, 0xDA, 0xBE,
++		0xDA, 0xBE, 0xDA, 0xBE, 0xDB, 0x92, 0xDB, 0x92,
++		0xDB, 0x92, 0xD9, 0x94, 0xDB, 0x92, 0xD9, 0x94,
++		0xDA, 0xAD, 0xDA, 0xAD, 0xDA, 0xAD, 0xDA, 0xAD,
++		0xDB, 0x87, 0xDB, 0x87, 0xDB, 0x86, 0xDB, 0x86,
++		0xDB, 0x88, 0xDB, 0x88, 0xDB, 0x87, 0xD9, 0xB4,
++		0xDB, 0x8B, 0xDB, 0x8B, 0xDB, 0x85, 0xDB, 0x85,
++		0xDB, 0x89, 0xDB, 0x89, 0xDB, 0x90, 0xDB, 0x90,
++		0xDB, 0x90, 0xDB, 0x90, 0xD9, 0x89, 0xD9, 0x89,
++		0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xA7, 0xD9, 0x8A,
++		0xD9, 0x94, 0xD8, 0xA7, 0xD9, 0x8A, 0xD9, 0x94,
++		0xDB, 0x95, 0xD9, 0x8A, 0xD9, 0x94, 0xDB, 0x95,
++		0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x88, 0xD9, 0x8A,
++		0xD9, 0x94, 0xD9, 0x88, 0xD9, 0x8A, 0xD9, 0x94,
++		0xDB, 0x87, 0xD9, 0x8A, 0xD9, 0x94, 0xDB, 0x87,
++		0xD9, 0x8A, 0xD9, 0x94, 0xDB, 0x86, 0xD9, 0x8A,
++		0xD9, 0x94, 0xDB, 0x86, 0xD9, 0x8A, 0xD9, 0x94,
++		0xDB, 0x88, 0xD9, 0x8A, 0xD9, 0x94, 0xDB, 0x88,
++		0xD9, 0x8A, 0xD9, 0x94, 0xDB, 0x90, 0xD9, 0x8A,
++		0xD9, 0x94, 0xDB, 0x90, 0xD9, 0x8A, 0xD9, 0x94,
++		0xDB, 0x90, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x89,
++		0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x89, 0xD9, 0x8A,
++		0xD9, 0x94, 0xD9, 0x89, 0xDB, 0x8C, 0xDB, 0x8C,
++		0xDB, 0x8C, 0xDB, 0x8C, 0xD9, 0x8A, 0xD9, 0x94,
++		0xD8, 0xAC, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xAD,
++		0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x85, 0xD9, 0x8A,
++		0xD9, 0x94, 0xD9, 0x89, 0xD9, 0x8A, 0xD9, 0x94,
++		0xD9, 0x8A, 0xD8, 0xA8, 0xD8, 0xAC, 0xD8, 0xA8,
++		0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xAE, 0xD8, 0xA8,
++		0xD9, 0x85, 0xD8, 0xA8, 0xD9, 0x89, 0xD8, 0xA8,
++		0xD9, 0x8A, 0xD8, 0xAA, 0xD8, 0xAC, 0xD8, 0xAA,
++		0xD8, 0xAD, 0xD8, 0xAA, 0xD8, 0xAE, 0xD8, 0xAA,
++		0xD9, 0x85, 0xD8, 0xAA, 0xD9, 0x89, 0xD8, 0xAA,
++		0xD9, 0x8A, 0xD8, 0xAB, 0xD8, 0xAC, 0xD8, 0xAB,
++		0xD9, 0x85, 0xD8, 0xAB, 0xD9, 0x89, 0xD8, 0xAB,
++		0xD9, 0x8A, 0xD8, 0xAC, 0xD8, 0xAD, 0xD8, 0xAC,
++		0xD9, 0x85, 0xD8, 0xAD, 0xD8, 0xAC, 0xD8, 0xAD,
++		0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xAC, 0xD8, 0xAE,
++		0xD8, 0xAD, 0xD8, 0xAE, 0xD9, 0x85, 0xD8, 0xB3,
++		0xD8, 0xAC, 0xD8, 0xB3, 0xD8, 0xAD, 0xD8, 0xB3,
++		0xD8, 0xAE, 0xD8, 0xB3, 0xD9, 0x85, 0xD8, 0xB5,
++		0xD8, 0xAD, 0xD8, 0xB5, 0xD9, 0x85, 0xD8, 0xB6,
++		0xD8, 0xAC, 0xD8, 0xB6, 0xD8, 0xAD, 0xD8, 0xB6,
++		0xD8, 0xAE, 0xD8, 0xB6, 0xD9, 0x85, 0xD8, 0xB7,
++		0xD8, 0xAD, 0xD8, 0xB7, 0xD9, 0x85, 0xD8, 0xB8,
++		0xD9, 0x85, 0xD8, 0xB9, 0xD8, 0xAC, 0xD8, 0xB9,
++		0xD9, 0x85, 0xD8, 0xBA, 0xD8, 0xAC, 0xD8, 0xBA,
++		0xD9, 0x85, 0xD9, 0x81, 0xD8, 0xAC, 0xD9, 0x81,
++		0xD8, 0xAD, 0xD9, 0x81, 0xD8, 0xAE, 0xD9, 0x81,
++		0xD9, 0x85, 0xD9, 0x81, 0xD9, 0x89, 0xD9, 0x81,
++		0xD9, 0x8A, 0xD9, 0x82, 0xD8, 0xAD, 0xD9, 0x82,
++		0xD9, 0x85, 0xD9, 0x82, 0xD9, 0x89, 0xD9, 0x82,
++		0xD9, 0x8A, 0xD9, 0x83, 0xD8, 0xA7, 0xD9, 0x83,
++		0xD8, 0xAC, 0xD9, 0x83, 0xD8, 0xAD, 0xD9, 0x83,
++		0xD8, 0xAE, 0xD9, 0x83, 0xD9, 0x84, 0xD9, 0x83,
++		0xD9, 0x85, 0xD9, 0x83, 0xD9, 0x89, 0xD9, 0x83,
++		0xD9, 0x8A, 0xD9, 0x84, 0xD8, 0xAC, 0xD9, 0x84,
++		0xD8, 0xAD, 0xD9, 0x84, 0xD8, 0xAE, 0xD9, 0x84,
++		0xD9, 0x85, 0xD9, 0x84, 0xD9, 0x89, 0xD9, 0x84,
++		0xD9, 0x8A, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, 0x85,
++		0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAE, 0xD9, 0x85,
++		0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x89, 0xD9, 0x85,
++		0xD9, 0x8A, 0xD9, 0x86, 0xD8, 0xAC, 0xD9, 0x86,
++		0xD8, 0xAD, 0xD9, 0x86, 0xD8, 0xAE, 0xD9, 0x86,
++		0xD9, 0x85, 0xD9, 0x86, 0xD9, 0x89, 0xD9, 0x86,
++		0xD9, 0x8A, 0xD9, 0x87, 0xD8, 0xAC, 0xD9, 0x87,
++		0xD9, 0x85, 0xD9, 0x87, 0xD9, 0x89, 0xD9, 0x87,
++		0xD9, 0x8A, 0xD9, 0x8A, 0xD8, 0xAC, 0xD9, 0x8A,
++		0xD8, 0xAD, 0xD9, 0x8A, 0xD8, 0xAE, 0xD9, 0x8A,
++		0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x89, 0xD9, 0x8A,
++		0xD9, 0x8A, 0xD8, 0xB0, 0xD9, 0xB0, 0xD8, 0xB1,
++		0xD9, 0xB0, 0xD9, 0x89, 0xD9, 0xB0, 0x20, 0xD9,
++		0x8C, 0xD9, 0x91, 0x20, 0xD9, 0x8D, 0xD9, 0x91,
++		0x20, 0xD9, 0x8E, 0xD9, 0x91, 0x20, 0xD9, 0x8F,
++		0xD9, 0x91, 0x20, 0xD9, 0x90, 0xD9, 0x91, 0x20,
++		0xD9, 0x91, 0xD9, 0xB0, 0xD9, 0x8A, 0xD9, 0x94,
++		0xD8, 0xB1, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xB2,
++		0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x85, 0xD9, 0x8A,
++		0xD9, 0x94, 0xD9, 0x86, 0xD9, 0x8A, 0xD9, 0x94,
++		0xD9, 0x89, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x8A,
++		0xD8, 0xA8, 0xD8, 0xB1, 0xD8, 0xA8, 0xD8, 0xB2,
++		0xD8, 0xA8, 0xD9, 0x85, 0xD8, 0xA8, 0xD9, 0x86,
++		0xD8, 0xA8, 0xD9, 0x89, 0xD8, 0xA8, 0xD9, 0x8A,
++		0xD8, 0xAA, 0xD8, 0xB1, 0xD8, 0xAA, 0xD8, 0xB2,
++		0xD8, 0xAA, 0xD9, 0x85, 0xD8, 0xAA, 0xD9, 0x86,
++		0xD8, 0xAA, 0xD9, 0x89, 0xD8, 0xAA, 0xD9, 0x8A,
++		0xD8, 0xAB, 0xD8, 0xB1, 0xD8, 0xAB, 0xD8, 0xB2,
++		0xD8, 0xAB, 0xD9, 0x85, 0xD8, 0xAB, 0xD9, 0x86,
++		0xD8, 0xAB, 0xD9, 0x89, 0xD8, 0xAB, 0xD9, 0x8A,
++		0xD9, 0x81, 0xD9, 0x89, 0xD9, 0x81, 0xD9, 0x8A,
++		0xD9, 0x82, 0xD9, 0x89, 0xD9, 0x82, 0xD9, 0x8A,
++		0xD9, 0x83, 0xD8, 0xA7, 0xD9, 0x83, 0xD9, 0x84,
++		0xD9, 0x83, 0xD9, 0x85, 0xD9, 0x83, 0xD9, 0x89,
++		0xD9, 0x83, 0xD9, 0x8A, 0xD9, 0x84, 0xD9, 0x85,
++		0xD9, 0x84, 0xD9, 0x89, 0xD9, 0x84, 0xD9, 0x8A,
++		0xD9, 0x85, 0xD8, 0xA7, 0xD9, 0x85, 0xD9, 0x85,
++		0xD9, 0x86, 0xD8, 0xB1, 0xD9, 0x86, 0xD8, 0xB2,
++		0xD9, 0x86, 0xD9, 0x85, 0xD9, 0x86, 0xD9, 0x86,
++		0xD9, 0x86, 0xD9, 0x89, 0xD9, 0x86, 0xD9, 0x8A,
++		0xD9, 0x89, 0xD9, 0xB0, 0xD9, 0x8A, 0xD8, 0xB1,
++		0xD9, 0x8A, 0xD8, 0xB2, 0xD9, 0x8A, 0xD9, 0x85,
++		0xD9, 0x8A, 0xD9, 0x86, 0xD9, 0x8A, 0xD9, 0x89,
++		0xD9, 0x8A, 0xD9, 0x8A, 0xD9, 0x8A, 0xD9, 0x94,
++		0xD8, 0xAC, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xAD,
++		0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xAE, 0xD9, 0x8A,
++		0xD9, 0x94, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x94,
++		0xD9, 0x87, 0xD8, 0xA8, 0xD8, 0xAC, 0xD8, 0xA8,
++		0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xAE, 0xD8, 0xA8,
++		0xD9, 0x85, 0xD8, 0xA8, 0xD9, 0x87, 0xD8, 0xAA,
++		0xD8, 0xAC, 0xD8, 0xAA, 0xD8, 0xAD, 0xD8, 0xAA,
++		0xD8, 0xAE, 0xD8, 0xAA, 0xD9, 0x85, 0xD8, 0xAA,
++		0xD9, 0x87, 0xD8, 0xAB, 0xD9, 0x85, 0xD8, 0xAC,
++		0xD8, 0xAD, 0xD8, 0xAC, 0xD9, 0x85, 0xD8, 0xAD,
++		0xD8, 0xAC, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAE,
++		0xD8, 0xAC, 0xD8, 0xAE, 0xD9, 0x85, 0xD8, 0xB3,
++		0xD8, 0xAC, 0xD8, 0xB3, 0xD8, 0xAD, 0xD8, 0xB3,
++		0xD8, 0xAE, 0xD8, 0xB3, 0xD9, 0x85, 0xD8, 0xB5,
++		0xD8, 0xAD, 0xD8, 0xB5, 0xD8, 0xAE, 0xD8, 0xB5,
++		0xD9, 0x85, 0xD8, 0xB6, 0xD8, 0xAC, 0xD8, 0xB6,
++		0xD8, 0xAD, 0xD8, 0xB6, 0xD8, 0xAE, 0xD8, 0xB6,
++		0xD9, 0x85, 0xD8, 0xB7, 0xD8, 0xAD, 0xD8, 0xB8,
++		0xD9, 0x85, 0xD8, 0xB9, 0xD8, 0xAC, 0xD8, 0xB9,
++		0xD9, 0x85, 0xD8, 0xBA, 0xD8, 0xAC, 0xD8, 0xBA,
++		0xD9, 0x85, 0xD9, 0x81, 0xD8, 0xAC, 0xD9, 0x81,
++		0xD8, 0xAD, 0xD9, 0x81, 0xD8, 0xAE, 0xD9, 0x81,
++		0xD9, 0x85, 0xD9, 0x82, 0xD8, 0xAD, 0xD9, 0x82,
++		0xD9, 0x85, 0xD9, 0x83, 0xD8, 0xAC, 0xD9, 0x83,
++		0xD8, 0xAD, 0xD9, 0x83, 0xD8, 0xAE, 0xD9, 0x83,
++		0xD9, 0x84, 0xD9, 0x83, 0xD9, 0x85, 0xD9, 0x84,
++		0xD8, 0xAC, 0xD9, 0x84, 0xD8, 0xAD, 0xD9, 0x84,
++		0xD8, 0xAE, 0xD9, 0x84, 0xD9, 0x85, 0xD9, 0x84,
++		0xD9, 0x87, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, 0x85,
++		0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAE, 0xD9, 0x85,
++		0xD9, 0x85, 0xD9, 0x86, 0xD8, 0xAC, 0xD9, 0x86,
++		0xD8, 0xAD, 0xD9, 0x86, 0xD8, 0xAE, 0xD9, 0x86,
++		0xD9, 0x85, 0xD9, 0x86, 0xD9, 0x87, 0xD9, 0x87,
++		0xD8, 0xAC, 0xD9, 0x87, 0xD9, 0x85, 0xD9, 0x87,
++		0xD9, 0xB0, 0xD9, 0x8A, 0xD8, 0xAC, 0xD9, 0x8A,
++		0xD8, 0xAD, 0xD9, 0x8A, 0xD8, 0xAE, 0xD9, 0x8A,
++		0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x87, 0xD9, 0x8A,
++		0xD9, 0x94, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x94,
++		0xD9, 0x87, 0xD8, 0xA8, 0xD9, 0x85, 0xD8, 0xA8,
++		0xD9, 0x87, 0xD8, 0xAA, 0xD9, 0x85, 0xD8, 0xAA,
++		0xD9, 0x87, 0xD8, 0xAB, 0xD9, 0x85, 0xD8, 0xAB,
++		0xD9, 0x87, 0xD8, 0xB3, 0xD9, 0x85, 0xD8, 0xB3,
++		0xD9, 0x87, 0xD8, 0xB4, 0xD9, 0x85, 0xD8, 0xB4,
++		0xD9, 0x87, 0xD9, 0x83, 0xD9, 0x84, 0xD9, 0x83,
++		0xD9, 0x85, 0xD9, 0x84, 0xD9, 0x85, 0xD9, 0x86,
++		0xD9, 0x85, 0xD9, 0x86, 0xD9, 0x87, 0xD9, 0x8A,
++		0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x87, 0xD9, 0x80,
++		0xD9, 0x8E, 0xD9, 0x91, 0xD9, 0x80, 0xD9, 0x8F,
++		0xD9, 0x91, 0xD9, 0x80, 0xD9, 0x90, 0xD9, 0x91,
++		0xD8, 0xB7, 0xD9, 0x89, 0xD8, 0xB7, 0xD9, 0x8A,
++		0xD8, 0xB9, 0xD9, 0x89, 0xD8, 0xB9, 0xD9, 0x8A,
++		0xD8, 0xBA, 0xD9, 0x89, 0xD8, 0xBA, 0xD9, 0x8A,
++		0xD8, 0xB3, 0xD9, 0x89, 0xD8, 0xB3, 0xD9, 0x8A,
++		0xD8, 0xB4, 0xD9, 0x89, 0xD8, 0xB4, 0xD9, 0x8A,
++		0xD8, 0xAD, 0xD9, 0x89, 0xD8, 0xAD, 0xD9, 0x8A,
++		0xD8, 0xAC, 0xD9, 0x89, 0xD8, 0xAC, 0xD9, 0x8A,
++		0xD8, 0xAE, 0xD9, 0x89, 0xD8, 0xAE, 0xD9, 0x8A,
++		0xD8, 0xB5, 0xD9, 0x89, 0xD8, 0xB5, 0xD9, 0x8A,
++		0xD8, 0xB6, 0xD9, 0x89, 0xD8, 0xB6, 0xD9, 0x8A,
++		0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8, 0xAD,
++		0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9, 0x85,
++		0xD8, 0xB4, 0xD8, 0xB1, 0xD8, 0xB3, 0xD8, 0xB1,
++		0xD8, 0xB5, 0xD8, 0xB1, 0xD8, 0xB6, 0xD8, 0xB1,
++		0xD8, 0xB7, 0xD9, 0x89, 0xD8, 0xB7, 0xD9, 0x8A,
++		0xD8, 0xB9, 0xD9, 0x89, 0xD8, 0xB9, 0xD9, 0x8A,
++		0xD8, 0xBA, 0xD9, 0x89, 0xD8, 0xBA, 0xD9, 0x8A,
++		0xD8, 0xB3, 0xD9, 0x89, 0xD8, 0xB3, 0xD9, 0x8A,
++		0xD8, 0xB4, 0xD9, 0x89, 0xD8, 0xB4, 0xD9, 0x8A,
++		0xD8, 0xAD, 0xD9, 0x89, 0xD8, 0xAD, 0xD9, 0x8A,
++		0xD8, 0xAC, 0xD9, 0x89, 0xD8, 0xAC, 0xD9, 0x8A,
++		0xD8, 0xAE, 0xD9, 0x89, 0xD8, 0xAE, 0xD9, 0x8A,
++		0xD8, 0xB5, 0xD9, 0x89, 0xD8, 0xB5, 0xD9, 0x8A,
++		0xD8, 0xB6, 0xD9, 0x89, 0xD8, 0xB6, 0xD9, 0x8A,
++		0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8, 0xAD,
++		0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9, 0x85,
++		0xD8, 0xB4, 0xD8, 0xB1, 0xD8, 0xB3, 0xD8, 0xB1,
++		0xD8, 0xB5, 0xD8, 0xB1, 0xD8, 0xB6, 0xD8, 0xB1,
++		0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8, 0xAD,
++		0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9, 0x85,
++		0xD8, 0xB3, 0xD9, 0x87, 0xD8, 0xB4, 0xD9, 0x87,
++		0xD8, 0xB7, 0xD9, 0x85, 0xD8, 0xB3, 0xD8, 0xAC,
++		0xD8, 0xB3, 0xD8, 0xAD, 0xD8, 0xB3, 0xD8, 0xAE,
++		0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8, 0xAD,
++		0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB7, 0xD9, 0x85,
++		0xD8, 0xB8, 0xD9, 0x85, 0xD8, 0xA7, 0xD9, 0x8B,
++		0xD8, 0xA7, 0xD9, 0x8B, 0xD8, 0xAA, 0xD8, 0xAC,
++		0xD9, 0x85, 0xD8, 0xAA, 0xD8, 0xAD, 0xD8, 0xAC,
++		0xD8, 0xAA, 0xD8, 0xAD, 0xD8, 0xAC, 0xD8, 0xAA,
++		0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAA, 0xD8, 0xAE,
++		0xD9, 0x85, 0xD8, 0xAA, 0xD9, 0x85, 0xD8, 0xAC,
++		0xD8, 0xAA, 0xD9, 0x85, 0xD8, 0xAD, 0xD8, 0xAA,
++		0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xAC, 0xD9, 0x85,
++		0xD8, 0xAD, 0xD8, 0xAC, 0xD9, 0x85, 0xD8, 0xAD,
++		0xD8, 0xAD, 0xD9, 0x85, 0xD9, 0x8A, 0xD8, 0xAD,
++		0xD9, 0x85, 0xD9, 0x89, 0xD8, 0xB3, 0xD8, 0xAD,
++		0xD8, 0xAC, 0xD8, 0xB3, 0xD8, 0xAC, 0xD8, 0xAD,
++		0xD8, 0xB3, 0xD8, 0xAC, 0xD9, 0x89, 0xD8, 0xB3,
++		0xD9, 0x85, 0xD8, 0xAD, 0xD8, 0xB3, 0xD9, 0x85,
++		0xD8, 0xAD, 0xD8, 0xB3, 0xD9, 0x85, 0xD8, 0xAC,
++		0xD8, 0xB3, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB3,
++		0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB5, 0xD8, 0xAD,
++		0xD8, 0xAD, 0xD8, 0xB5, 0xD8, 0xAD, 0xD8, 0xAD,
++		0xD8, 0xB5, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB4,
++		0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xB4, 0xD8, 0xAD,
++		0xD9, 0x85, 0xD8, 0xB4, 0xD8, 0xAC, 0xD9, 0x8A,
++		0xD8, 0xB4, 0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xB4,
++		0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9, 0x85,
++		0xD9, 0x85, 0xD8, 0xB4, 0xD9, 0x85, 0xD9, 0x85,
++		0xD8, 0xB6, 0xD8, 0xAD, 0xD9, 0x89, 0xD8, 0xB6,
++		0xD8, 0xAE, 0xD9, 0x85, 0xD8, 0xB6, 0xD8, 0xAE,
++		0xD9, 0x85, 0xD8, 0xB7, 0xD9, 0x85, 0xD8, 0xAD,
++		0xD8, 0xB7, 0xD9, 0x85, 0xD8, 0xAD, 0xD8, 0xB7,
++		0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB7, 0xD9, 0x85,
++		0xD9, 0x8A, 0xD8, 0xB9, 0xD8, 0xAC, 0xD9, 0x85,
++		0xD8, 0xB9, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB9,
++		0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB9, 0xD9, 0x85,
++		0xD9, 0x89, 0xD8, 0xBA, 0xD9, 0x85, 0xD9, 0x85,
++		0xD8, 0xBA, 0xD9, 0x85, 0xD9, 0x8A, 0xD8, 0xBA,
++		0xD9, 0x85, 0xD9, 0x89, 0xD9, 0x81, 0xD8, 0xAE,
++		0xD9, 0x85, 0xD9, 0x81, 0xD8, 0xAE, 0xD9, 0x85,
++		0xD9, 0x82, 0xD9, 0x85, 0xD8, 0xAD, 0xD9, 0x82,
++		0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x84, 0xD8, 0xAD,
++		0xD9, 0x85, 0xD9, 0x84, 0xD8, 0xAD, 0xD9, 0x8A,
++		0xD9, 0x84, 0xD8, 0xAD, 0xD9, 0x89, 0xD9, 0x84,
++		0xD8, 0xAC, 0xD8, 0xAC, 0xD9, 0x84, 0xD8, 0xAC,
++		0xD8, 0xAC, 0xD9, 0x84, 0xD8, 0xAE, 0xD9, 0x85,
++		0xD9, 0x84, 0xD8, 0xAE, 0xD9, 0x85, 0xD9, 0x84,
++		0xD9, 0x85, 0xD8, 0xAD, 0xD9, 0x84, 0xD9, 0x85,
++		0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAD, 0xD8, 0xAC,
++		0xD9, 0x85, 0xD8, 0xAD, 0xD9, 0x85, 0xD9, 0x85,
++		0xD8, 0xAD, 0xD9, 0x8A, 0xD9, 0x85, 0xD8, 0xAC,
++		0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, 0x85,
++		0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xAC, 0xD9, 0x85,
++		0xD8, 0xAE, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xAC,
++		0xD8, 0xAE, 0xD9, 0x87, 0xD9, 0x85, 0xD8, 0xAC,
++		0xD9, 0x87, 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x86,
++		0xD8, 0xAD, 0xD9, 0x85, 0xD9, 0x86, 0xD8, 0xAD,
++		0xD9, 0x89, 0xD9, 0x86, 0xD8, 0xAC, 0xD9, 0x85,
++		0xD9, 0x86, 0xD8, 0xAC, 0xD9, 0x85, 0xD9, 0x86,
++		0xD8, 0xAC, 0xD9, 0x89, 0xD9, 0x86, 0xD9, 0x85,
++		0xD9, 0x8A, 0xD9, 0x86, 0xD9, 0x85, 0xD9, 0x89,
++		0xD9, 0x8A, 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x8A,
++		0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xA8, 0xD8, 0xAE,
++		0xD9, 0x8A, 0xD8, 0xAA, 0xD8, 0xAC, 0xD9, 0x8A,
++		0xD8, 0xAA, 0xD8, 0xAC, 0xD9, 0x89, 0xD8, 0xAA,
++		0xD8, 0xAE, 0xD9, 0x8A, 0xD8, 0xAA, 0xD8, 0xAE,
++		0xD9, 0x89, 0xD8, 0xAA, 0xD9, 0x85, 0xD9, 0x8A,
++		0xD8, 0xAA, 0xD9, 0x85, 0xD9, 0x89, 0xD8, 0xAC,
++		0xD9, 0x85, 0xD9, 0x8A, 0xD8, 0xAC, 0xD8, 0xAD,
++		0xD9, 0x89, 0xD8, 0xAC, 0xD9, 0x85, 0xD9, 0x89,
++		0xD8, 0xB3, 0xD8, 0xAE, 0xD9, 0x89, 0xD8, 0xB5,
++		0xD8, 0xAD, 0xD9, 0x8A, 0xD8, 0xB4, 0xD8, 0xAD,
++		0xD9, 0x8A, 0xD8, 0xB6, 0xD8, 0xAD, 0xD9, 0x8A,
++		0xD9, 0x84, 0xD8, 0xAC, 0xD9, 0x8A, 0xD9, 0x84,
++		0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x8A, 0xD8, 0xAD,
++		0xD9, 0x8A, 0xD9, 0x8A, 0xD8, 0xAC, 0xD9, 0x8A,
++		0xD9, 0x8A, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x85,
++		0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x82, 0xD9, 0x85,
++		0xD9, 0x8A, 0xD9, 0x86, 0xD8, 0xAD, 0xD9, 0x8A,
++		0xD9, 0x82, 0xD9, 0x85, 0xD8, 0xAD, 0xD9, 0x84,
++		0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xB9, 0xD9, 0x85,
++		0xD9, 0x8A, 0xD9, 0x83, 0xD9, 0x85, 0xD9, 0x8A,
++		0xD9, 0x86, 0xD8, 0xAC, 0xD8, 0xAD, 0xD9, 0x85,
++		0xD8, 0xAE, 0xD9, 0x8A, 0xD9, 0x84, 0xD8, 0xAC,
++		0xD9, 0x85, 0xD9, 0x83, 0xD9, 0x85, 0xD9, 0x85,
++		0xD9, 0x84, 0xD8, 0xAC, 0xD9, 0x85, 0xD9, 0x86,
++		0xD8, 0xAC, 0xD8, 0xAD, 0xD8, 0xAC, 0xD8, 0xAD,
++		0xD9, 0x8A, 0xD8, 0xAD, 0xD8, 0xAC, 0xD9, 0x8A,
++		0xD9, 0x85, 0xD8, 0xAC, 0xD9, 0x8A, 0xD9, 0x81,
++		0xD9, 0x85, 0xD9, 0x8A, 0xD8, 0xA8, 0xD8, 0xAD,
++		0xD9, 0x8A, 0xD9, 0x83, 0xD9, 0x85, 0xD9, 0x85,
++		0xD8, 0xB9, 0xD8, 0xAC, 0xD9, 0x85, 0xD8, 0xB5,
++		0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB3, 0xD8, 0xAE,
++		0xD9, 0x8A, 0xD9, 0x86, 0xD8, 0xAC, 0xD9, 0x8A,
++		0xD8, 0xB5, 0xD9, 0x84, 0xDB, 0x92, 0xD9, 0x82,
++		0xD9, 0x84, 0xDB, 0x92, 0xD8, 0xA7, 0xD9, 0x84,
++		0xD9, 0x84, 0xD9, 0x87, 0xD8, 0xA7, 0xD9, 0x83,
++		0xD8, 0xA8, 0xD8, 0xB1, 0xD9, 0x85, 0xD8, 0xAD,
++		0xD9, 0x85, 0xD8, 0xAF, 0xD8, 0xB5, 0xD9, 0x84,
++		0xD8, 0xB9, 0xD9, 0x85, 0xD8, 0xB1, 0xD8, 0xB3,
++		0xD9, 0x88, 0xD9, 0x84, 0xD8, 0xB9, 0xD9, 0x84,
++		0xD9, 0x8A, 0xD9, 0x87, 0xD9, 0x88, 0xD8, 0xB3,
++		0xD9, 0x84, 0xD9, 0x85, 0xD8, 0xB5, 0xD9, 0x84,
++		0xD9, 0x89, 0xD8, 0xB5, 0xD9, 0x84, 0xD9, 0x89,
++		0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD9, 0x84, 0xD9,
++		0x87, 0x20, 0xD8, 0xB9, 0xD9, 0x84, 0xD9, 0x8A,
++		0xD9, 0x87, 0x20, 0xD9, 0x88, 0xD8, 0xB3, 0xD9,
++		0x84, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, 0x84, 0x20,
++		0xD8, 0xAC, 0xD9, 0x84, 0xD8, 0xA7, 0xD9, 0x84,
++		0xD9, 0x87, 0xD8, 0xB1, 0xDB, 0x8C, 0xD8, 0xA7,
++		0xD9, 0x84, 0x2E, 0x2E, 0xE2, 0x80, 0x94, 0xE2,
++		0x80, 0x93, 0x5F, 0x5F, 0x28, 0x29, 0x7B, 0x7D,
++		0xE3, 0x80, 0x94, 0xE3, 0x80, 0x95, 0xE3, 0x80,
++		0x90, 0xE3, 0x80, 0x91, 0xE3, 0x80, 0x8A, 0xE3,
++		0x80, 0x8B, 0xE3, 0x80, 0x88, 0xE3, 0x80, 0x89,
++		0xE3, 0x80, 0x8C, 0xE3, 0x80, 0x8D, 0xE3, 0x80,
++		0x8E, 0xE3, 0x80, 0x8F, 0x20, 0xCC, 0x85, 0x20,
++		0xCC, 0x85, 0x20, 0xCC, 0x85, 0x20, 0xCC, 0x85,
++		0x5F, 0x5F, 0x5F, 0x2C, 0xE3, 0x80, 0x81, 0x2E,
++		0x3B, 0x3A, 0x3F, 0x21, 0xE2, 0x80, 0x94, 0x28,
++		0x29, 0x7B, 0x7D, 0xE3, 0x80, 0x94, 0xE3, 0x80,
++		0x95, 0x23, 0x26, 0x2A, 0x2B, 0x2D, 0x3C, 0x3E,
++		0x3D, 0x5C, 0x24, 0x25, 0x40, 0x20, 0xD9, 0x8B,
++		0xD9, 0x80, 0xD9, 0x8B, 0x20, 0xD9, 0x8C, 0x20,
++		0xD9, 0x8D, 0x20, 0xD9, 0x8E, 0xD9, 0x80, 0xD9,
++		0x8E, 0x20, 0xD9, 0x8F, 0xD9, 0x80, 0xD9, 0x8F,
++		0x20, 0xD9, 0x90, 0xD9, 0x80, 0xD9, 0x90, 0x20,
++		0xD9, 0x91, 0xD9, 0x80, 0xD9, 0x91, 0x20, 0xD9,
++		0x92, 0xD9, 0x80, 0xD9, 0x92, 0xD8, 0xA1, 0xD8,
++		0xA7, 0xD9, 0x93, 0xD8, 0xA7, 0xD9, 0x93, 0xD8,
++		0xA7, 0xD9, 0x94, 0xD8, 0xA7, 0xD9, 0x94, 0xD9,
++		0x88, 0xD9, 0x94, 0xD9, 0x88, 0xD9, 0x94, 0xD8,
++		0xA7, 0xD9, 0x95, 0xD8, 0xA7, 0xD9, 0x95, 0xD9,
++		0x8A, 0xD9, 0x94, 0xD9, 0x8A, 0xD9, 0x94, 0xD9,
++		0x8A, 0xD9, 0x94, 0xD9, 0x8A, 0xD9, 0x94, 0xD8,
++		0xA7, 0xD8, 0xA7, 0xD8, 0xA8, 0xD8, 0xA8, 0xD8,
++		0xA8, 0xD8, 0xA8, 0xD8, 0xA9, 0xD8, 0xA9, 0xD8,
++		0xAA, 0xD8, 0xAA, 0xD8, 0xAA, 0xD8, 0xAA, 0xD8,
++		0xAB, 0xD8, 0xAB, 0xD8, 0xAB, 0xD8, 0xAB, 0xD8,
++		0xAC, 0xD8, 0xAC, 0xD8, 0xAC, 0xD8, 0xAC, 0xD8,
++		0xAD, 0xD8, 0xAD, 0xD8, 0xAD, 0xD8, 0xAD, 0xD8,
++		0xAE, 0xD8, 0xAE, 0xD8, 0xAE, 0xD8, 0xAE, 0xD8,
++		0xAF, 0xD8, 0xAF, 0xD8, 0xB0, 0xD8, 0xB0, 0xD8,
++		0xB1, 0xD8, 0xB1, 0xD8, 0xB2, 0xD8, 0xB2, 0xD8,
++		0xB3, 0xD8, 0xB3, 0xD8, 0xB3, 0xD8, 0xB3, 0xD8,
++		0xB4, 0xD8, 0xB4, 0xD8, 0xB4, 0xD8, 0xB4, 0xD8,
++		0xB5, 0xD8, 0xB5, 0xD8, 0xB5, 0xD8, 0xB5, 0xD8,
++		0xB6, 0xD8, 0xB6, 0xD8, 0xB6, 0xD8, 0xB6, 0xD8,
++		0xB7, 0xD8, 0xB7, 0xD8, 0xB7, 0xD8, 0xB7, 0xD8,
++		0xB8, 0xD8, 0xB8, 0xD8, 0xB8, 0xD8, 0xB8, 0xD8,
++		0xB9, 0xD8, 0xB9, 0xD8, 0xB9, 0xD8, 0xB9, 0xD8,
++		0xBA, 0xD8, 0xBA, 0xD8, 0xBA, 0xD8, 0xBA, 0xD9,
++		0x81, 0xD9, 0x81, 0xD9, 0x81, 0xD9, 0x81, 0xD9,
++		0x82, 0xD9, 0x82, 0xD9, 0x82, 0xD9, 0x82, 0xD9,
++		0x83, 0xD9, 0x83, 0xD9, 0x83, 0xD9, 0x83, 0xD9,
++		0x84, 0xD9, 0x84, 0xD9, 0x84, 0xD9, 0x84, 0xD9,
++		0x85, 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x85, 0xD9,
++		0x86, 0xD9, 0x86, 0xD9, 0x86, 0xD9, 0x86, 0xD9,
++		0x87, 0xD9, 0x87, 0xD9, 0x87, 0xD9, 0x87, 0xD9,
++		0x88, 0xD9, 0x88, 0xD9, 0x89, 0xD9, 0x89, 0xD9,
++		0x8A, 0xD9, 0x8A, 0xD9, 0x8A, 0xD9, 0x8A, 0xD9,
++		0x84, 0xD8, 0xA7, 0xD9, 0x93, 0xD9, 0x84, 0xD8,
++		0xA7, 0xD9, 0x93, 0xD9, 0x84, 0xD8, 0xA7, 0xD9,
++		0x94, 0xD9, 0x84, 0xD8, 0xA7, 0xD9, 0x94, 0xD9,
++		0x84, 0xD8, 0xA7, 0xD9, 0x95, 0xD9, 0x84, 0xD8,
++		0xA7, 0xD9, 0x95, 0xD9, 0x84, 0xD8, 0xA7, 0xD9,
++		0x84, 0xD8, 0xA7, 0x21, 0x22, 0x23, 0x24, 0x25,
++		0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D,
++		0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35,
++		0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D,
++		0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45,
++		0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D,
++		0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55,
++		0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D,
++		0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65,
++		0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
++		0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75,
++		0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D,
++		0x7E, 0xE2, 0xA6, 0x85, 0xE2, 0xA6, 0x86, 0xE3,
++		0x80, 0x82, 0xE3, 0x80, 0x8C, 0xE3, 0x80, 0x8D,
++		0xE3, 0x80, 0x81, 0xE3, 0x83, 0xBB, 0xE3, 0x83,
++		0xB2, 0xE3, 0x82, 0xA1, 0xE3, 0x82, 0xA3, 0xE3,
++		0x82, 0xA5, 0xE3, 0x82, 0xA7, 0xE3, 0x82, 0xA9,
++		0xE3, 0x83, 0xA3, 0xE3, 0x83, 0xA5, 0xE3, 0x83,
++		0xA7, 0xE3, 0x83, 0x83, 0xE3, 0x83, 0xBC, 0xE3,
++		0x82, 0xA2, 0xE3, 0x82, 0xA4, 0xE3, 0x82, 0xA6,
++		0xE3, 0x82, 0xA8, 0xE3, 0x82, 0xAA, 0xE3, 0x82,
++		0xAB, 0xE3, 0x82, 0xAD, 0xE3, 0x82, 0xAF, 0xE3,
++		0x82, 0xB1, 0xE3, 0x82, 0xB3, 0xE3, 0x82, 0xB5,
++		0xE3, 0x82, 0xB7, 0xE3, 0x82, 0xB9, 0xE3, 0x82,
++		0xBB, 0xE3, 0x82, 0xBD, 0xE3, 0x82, 0xBF, 0xE3,
++		0x83, 0x81, 0xE3, 0x83, 0x84, 0xE3, 0x83, 0x86,
++		0xE3, 0x83, 0x88, 0xE3, 0x83, 0x8A, 0xE3, 0x83,
++		0x8B, 0xE3, 0x83, 0x8C, 0xE3, 0x83, 0x8D, 0xE3,
++		0x83, 0x8E, 0xE3, 0x83, 0x8F, 0xE3, 0x83, 0x92,
++		0xE3, 0x83, 0x95, 0xE3, 0x83, 0x98, 0xE3, 0x83,
++		0x9B, 0xE3, 0x83, 0x9E, 0xE3, 0x83, 0x9F, 0xE3,
++		0x83, 0xA0, 0xE3, 0x83, 0xA1, 0xE3, 0x83, 0xA2,
++		0xE3, 0x83, 0xA4, 0xE3, 0x83, 0xA6, 0xE3, 0x83,
++		0xA8, 0xE3, 0x83, 0xA9, 0xE3, 0x83, 0xAA, 0xE3,
++		0x83, 0xAB, 0xE3, 0x83, 0xAC, 0xE3, 0x83, 0xAD,
++		0xE3, 0x83, 0xAF, 0xE3, 0x83, 0xB3, 0xE3, 0x82,
++		0x99, 0xE3, 0x82, 0x9A, 0xE1, 0x85, 0xA0, 0xE1,
++		0x84, 0x80, 0xE1, 0x84, 0x81, 0xE1, 0x86, 0xAA,
++		0xE1, 0x84, 0x82, 0xE1, 0x86, 0xAC, 0xE1, 0x86,
++		0xAD, 0xE1, 0x84, 0x83, 0xE1, 0x84, 0x84, 0xE1,
++		0x84, 0x85, 0xE1, 0x86, 0xB0, 0xE1, 0x86, 0xB1,
++		0xE1, 0x86, 0xB2, 0xE1, 0x86, 0xB3, 0xE1, 0x86,
++		0xB4, 0xE1, 0x86, 0xB5, 0xE1, 0x84, 0x9A, 0xE1,
++		0x84, 0x86, 0xE1, 0x84, 0x87, 0xE1, 0x84, 0x88,
++		0xE1, 0x84, 0xA1, 0xE1, 0x84, 0x89, 0xE1, 0x84,
++		0x8A, 0xE1, 0x84, 0x8B, 0xE1, 0x84, 0x8C, 0xE1,
++		0x84, 0x8D, 0xE1, 0x84, 0x8E, 0xE1, 0x84, 0x8F,
++		0xE1, 0x84, 0x90, 0xE1, 0x84, 0x91, 0xE1, 0x84,
++		0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x85, 0xA2, 0xE1,
++		0x85, 0xA3, 0xE1, 0x85, 0xA4, 0xE1, 0x85, 0xA5,
++		0xE1, 0x85, 0xA6, 0xE1, 0x85, 0xA7, 0xE1, 0x85,
++		0xA8, 0xE1, 0x85, 0xA9, 0xE1, 0x85, 0xAA, 0xE1,
++		0x85, 0xAB, 0xE1, 0x85, 0xAC, 0xE1, 0x85, 0xAD,
++		0xE1, 0x85, 0xAE, 0xE1, 0x85, 0xAF, 0xE1, 0x85,
++		0xB0, 0xE1, 0x85, 0xB1, 0xE1, 0x85, 0xB2, 0xE1,
++		0x85, 0xB3, 0xE1, 0x85, 0xB4, 0xE1, 0x85, 0xB5,
++		0xC2, 0xA2, 0xC2, 0xA3, 0xC2, 0xAC, 0x20, 0xCC,
++		0x84, 0xC2, 0xA6, 0xC2, 0xA5, 0xE2, 0x82, 0xA9,
++		0xE2, 0x94, 0x82, 0xE2, 0x86, 0x90, 0xE2, 0x86,
++		0x91, 0xE2, 0x86, 0x92, 0xE2, 0x86, 0x93, 0xE2,
++		0x96, 0xA0, 0xE2, 0x97, 0x8B, 0xF6, 0xF0, 0x9D,
++		0x85, 0x97, 0xF0, 0x9D, 0x85, 0xA5, 0xF6, 0xF0,
++		0x9D, 0x85, 0x98, 0xF0, 0x9D, 0x85, 0xA5, 0xF6,
++		0xF0, 0x9D, 0x85, 0x98, 0xF0, 0x9D, 0x85, 0xA5,
++		0xF0, 0x9D, 0x85, 0xAE, 0xF6, 0xF0, 0x9D, 0x85,
++		0x98, 0xF0, 0x9D, 0x85, 0xA5, 0xF0, 0x9D, 0x85,
++		0xAF, 0xF6, 0xF0, 0x9D, 0x85, 0x98, 0xF0, 0x9D,
++		0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xB0, 0xF6, 0xF0,
++		0x9D, 0x85, 0x98, 0xF0, 0x9D, 0x85, 0xA5, 0xF0,
++		0x9D, 0x85, 0xB1, 0xF6, 0xF0, 0x9D, 0x85, 0x98,
++		0xF0, 0x9D, 0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xB2,
++		0xF6, 0xF0, 0x9D, 0x86, 0xB9, 0xF0, 0x9D, 0x85,
++		0xA5, 0xF6, 0xF0, 0x9D, 0x86, 0xBA, 0xF0, 0x9D,
++		0x85, 0xA5, 0xF6, 0xF0, 0x9D, 0x86, 0xB9, 0xF0,
++		0x9D, 0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xAE, 0xF6,
++		0xF0, 0x9D, 0x86, 0xBA, 0xF0, 0x9D, 0x85, 0xA5,
++		0xF0, 0x9D, 0x85, 0xAE, 0xF6, 0xF0, 0x9D, 0x86,
++		0xB9, 0xF0, 0x9D, 0x85, 0xA5, 0xF0, 0x9D, 0x85,
++		0xAF, 0xF6, 0xF0, 0x9D, 0x86, 0xBA, 0xF0, 0x9D,
++		0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xAF, 0x41, 0x42,
++		0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A,
++		0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52,
++		0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A,
++		0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
++		0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
++		0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
++		0x79, 0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46,
++		0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E,
++		0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
++		0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64,
++		0x65, 0x66, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
++		0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75,
++		0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43,
++		0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B,
++		0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53,
++		0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61,
++		0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
++		0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
++		0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
++		0x7A, 0x41, 0x43, 0x44, 0x47, 0x4A, 0x4B, 0x4E,
++		0x4F, 0x50, 0x51, 0x53, 0x54, 0x55, 0x56, 0x57,
++		0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x66,
++		0x68, 0x69, 0x6A, 0x6B, 0x6D, 0x6E, 0x70, 0x71,
++		0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
++		0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
++		0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
++		0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
++		0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65,
++		0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
++		0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75,
++		0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x44,
++		0x45, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E,
++		0x4F, 0x50, 0x51, 0x53, 0x54, 0x55, 0x56, 0x57,
++		0x58, 0x59, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
++		0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E,
++		0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
++		0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x44, 0x45,
++		0x46, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4F,
++		0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x61,
++		0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
++		0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
++		0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
++		0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
++		0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
++		0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
++		0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65,
++		0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
++		0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75,
++		0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43,
++		0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B,
++		0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53,
++		0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61,
++		0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
++		0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
++		0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
++		0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
++		0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
++		0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
++		0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65,
++		0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
++		0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75,
++		0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43,
++		0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B,
++		0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53,
++		0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61,
++		0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
++		0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
++		0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
++		0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
++		0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
++		0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
++		0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65,
++		0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
++		0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75,
++		0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43,
++		0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B,
++		0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53,
++		0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61,
++		0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
++		0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
++		0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
++		0x7A, 0xCE, 0x91, 0xCE, 0x92, 0xCE, 0x93, 0xCE,
++		0x94, 0xCE, 0x95, 0xCE, 0x96, 0xCE, 0x97, 0xCE,
++		0x98, 0xCE, 0x99, 0xCE, 0x9A, 0xCE, 0x9B, 0xCE,
++		0x9C, 0xCE, 0x9D, 0xCE, 0x9E, 0xCE, 0x9F, 0xCE,
++		0xA0, 0xCE, 0xA1, 0xCE, 0x98, 0xCE, 0xA3, 0xCE,
++		0xA4, 0xCE, 0xA5, 0xCE, 0xA6, 0xCE, 0xA7, 0xCE,
++		0xA8, 0xCE, 0xA9, 0xE2, 0x88, 0x87, 0xCE, 0xB1,
++		0xCE, 0xB2, 0xCE, 0xB3, 0xCE, 0xB4, 0xCE, 0xB5,
++		0xCE, 0xB6, 0xCE, 0xB7, 0xCE, 0xB8, 0xCE, 0xB9,
++		0xCE, 0xBA, 0xCE, 0xBB, 0xCE, 0xBC, 0xCE, 0xBD,
++		0xCE, 0xBE, 0xCE, 0xBF, 0xCF, 0x80, 0xCF, 0x81,
++		0xCF, 0x82, 0xCF, 0x83, 0xCF, 0x84, 0xCF, 0x85,
++		0xCF, 0x86, 0xCF, 0x87, 0xCF, 0x88, 0xCF, 0x89,
++		0xE2, 0x88, 0x82, 0xCE, 0xB5, 0xCE, 0xB8, 0xCE,
++		0xBA, 0xCF, 0x86, 0xCF, 0x81, 0xCF, 0x80, 0xCE,
++		0x91, 0xCE, 0x92, 0xCE, 0x93, 0xCE, 0x94, 0xCE,
++		0x95, 0xCE, 0x96, 0xCE, 0x97, 0xCE, 0x98, 0xCE,
++		0x99, 0xCE, 0x9A, 0xCE, 0x9B, 0xCE, 0x9C, 0xCE,
++		0x9D, 0xCE, 0x9E, 0xCE, 0x9F, 0xCE, 0xA0, 0xCE,
++		0xA1, 0xCE, 0x98, 0xCE, 0xA3, 0xCE, 0xA4, 0xCE,
++		0xA5, 0xCE, 0xA6, 0xCE, 0xA7, 0xCE, 0xA8, 0xCE,
++		0xA9, 0xE2, 0x88, 0x87, 0xCE, 0xB1, 0xCE, 0xB2,
++		0xCE, 0xB3, 0xCE, 0xB4, 0xCE, 0xB5, 0xCE, 0xB6,
++		0xCE, 0xB7, 0xCE, 0xB8, 0xCE, 0xB9, 0xCE, 0xBA,
++		0xCE, 0xBB, 0xCE, 0xBC, 0xCE, 0xBD, 0xCE, 0xBE,
++		0xCE, 0xBF, 0xCF, 0x80, 0xCF, 0x81, 0xCF, 0x82,
++		0xCF, 0x83, 0xCF, 0x84, 0xCF, 0x85, 0xCF, 0x86,
++		0xCF, 0x87, 0xCF, 0x88, 0xCF, 0x89, 0xE2, 0x88,
++		0x82, 0xCE, 0xB5, 0xCE, 0xB8, 0xCE, 0xBA, 0xCF,
++		0x86, 0xCF, 0x81, 0xCF, 0x80, 0xCE, 0x91, 0xCE,
++		0x92, 0xCE, 0x93, 0xCE, 0x94, 0xCE, 0x95, 0xCE,
++		0x96, 0xCE, 0x97, 0xCE, 0x98, 0xCE, 0x99, 0xCE,
++		0x9A, 0xCE, 0x9B, 0xCE, 0x9C, 0xCE, 0x9D, 0xCE,
++		0x9E, 0xCE, 0x9F, 0xCE, 0xA0, 0xCE, 0xA1, 0xCE,
++		0x98, 0xCE, 0xA3, 0xCE, 0xA4, 0xCE, 0xA5, 0xCE,
++		0xA6, 0xCE, 0xA7, 0xCE, 0xA8, 0xCE, 0xA9, 0xE2,
++		0x88, 0x87, 0xCE, 0xB1, 0xCE, 0xB2, 0xCE, 0xB3,
++		0xCE, 0xB4, 0xCE, 0xB5, 0xCE, 0xB6, 0xCE, 0xB7,
++		0xCE, 0xB8, 0xCE, 0xB9, 0xCE, 0xBA, 0xCE, 0xBB,
++		0xCE, 0xBC, 0xCE, 0xBD, 0xCE, 0xBE, 0xCE, 0xBF,
++		0xCF, 0x80, 0xCF, 0x81, 0xCF, 0x82, 0xCF, 0x83,
++		0xCF, 0x84, 0xCF, 0x85, 0xCF, 0x86, 0xCF, 0x87,
++		0xCF, 0x88, 0xCF, 0x89, 0xE2, 0x88, 0x82, 0xCE,
++		0xB5, 0xCE, 0xB8, 0xCE, 0xBA, 0xCF, 0x86, 0xCF,
++		0x81, 0xCF, 0x80, 0xCE, 0x91, 0xCE, 0x92, 0xCE,
++		0x93, 0xCE, 0x94, 0xCE, 0x95, 0xCE, 0x96, 0xCE,
++		0x97, 0xCE, 0x98, 0xCE, 0x99, 0xCE, 0x9A, 0xCE,
++		0x9B, 0xCE, 0x9C, 0xCE, 0x9D, 0xCE, 0x9E, 0xCE,
++		0x9F, 0xCE, 0xA0, 0xCE, 0xA1, 0xCE, 0x98, 0xCE,
++		0xA3, 0xCE, 0xA4, 0xCE, 0xA5, 0xCE, 0xA6, 0xCE,
++		0xA7, 0xCE, 0xA8, 0xCE, 0xA9, 0xE2, 0x88, 0x87,
++		0xCE, 0xB1, 0xCE, 0xB2, 0xCE, 0xB3, 0xCE, 0xB4,
++		0xCE, 0xB5, 0xCE, 0xB6, 0xCE, 0xB7, 0xCE, 0xB8,
++		0xCE, 0xB9, 0xCE, 0xBA, 0xCE, 0xBB, 0xCE, 0xBC,
++		0xCE, 0xBD, 0xCE, 0xBE, 0xCE, 0xBF, 0xCF, 0x80,
++		0xCF, 0x81, 0xCF, 0x82, 0xCF, 0x83, 0xCF, 0x84,
++		0xCF, 0x85, 0xCF, 0x86, 0xCF, 0x87, 0xCF, 0x88,
++		0xCF, 0x89, 0xE2, 0x88, 0x82, 0xCE, 0xB5, 0xCE,
++		0xB8, 0xCE, 0xBA, 0xCF, 0x86, 0xCF, 0x81, 0xCF,
++		0x80, 0xCE, 0x91, 0xCE, 0x92, 0xCE, 0x93, 0xCE,
++		0x94, 0xCE, 0x95, 0xCE, 0x96, 0xCE, 0x97, 0xCE,
++		0x98, 0xCE, 0x99, 0xCE, 0x9A, 0xCE, 0x9B, 0xCE,
++		0x9C, 0xCE, 0x9D, 0xCE, 0x9E, 0xCE, 0x9F, 0xCE,
++		0xA0, 0xCE, 0xA1, 0xCE, 0x98, 0xCE, 0xA3, 0xCE,
++		0xA4, 0xCE, 0xA5, 0xCE, 0xA6, 0xCE, 0xA7, 0xCE,
++		0xA8, 0xCE, 0xA9, 0xE2, 0x88, 0x87, 0xCE, 0xB1,
++		0xCE, 0xB2, 0xCE, 0xB3, 0xCE, 0xB4, 0xCE, 0xB5,
++		0xCE, 0xB6, 0xCE, 0xB7, 0xCE, 0xB8, 0xCE, 0xB9,
++		0xCE, 0xBA, 0xCE, 0xBB, 0xCE, 0xBC, 0xCE, 0xBD,
++		0xCE, 0xBE, 0xCE, 0xBF, 0xCF, 0x80, 0xCF, 0x81,
++		0xCF, 0x82, 0xCF, 0x83, 0xCF, 0x84, 0xCF, 0x85,
++		0xCF, 0x86, 0xCF, 0x87, 0xCF, 0x88, 0xCF, 0x89,
++		0xE2, 0x88, 0x82, 0xCE, 0xB5, 0xCE, 0xB8, 0xCE,
++		0xBA, 0xCF, 0x86, 0xCF, 0x81, 0xCF, 0x80, 0x30,
++		0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
++		0x39, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
++		0x37, 0x38, 0x39, 0x30, 0x31, 0x32, 0x33, 0x34,
++		0x35, 0x36, 0x37, 0x38, 0x39, 0x30, 0x31, 0x32,
++		0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x30,
++		0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
++		0x39, 0xF6, 0xE4, 0xB8, 0xBD, 0xF6, 0xE4, 0xB8,
++		0xB8, 0xF6, 0xE4, 0xB9, 0x81, 0xF6, 0xF0, 0xA0,
++		0x84, 0xA2, 0xF6, 0xE4, 0xBD, 0xA0, 0xF6, 0xE4,
++		0xBE, 0xAE, 0xF6, 0xE4, 0xBE, 0xBB, 0xF6, 0xE5,
++		0x80, 0x82, 0xF6, 0xE5, 0x81, 0xBA, 0xF6, 0xE5,
++		0x82, 0x99, 0xF6, 0xE5, 0x83, 0xA7, 0xF6, 0xE5,
++		0x83, 0x8F, 0xF6, 0xE3, 0x92, 0x9E, 0xF6, 0xF0,
++		0xA0, 0x98, 0xBA, 0xF6, 0xE5, 0x85, 0x8D, 0xF6,
++		0xE5, 0x85, 0x94, 0xF6, 0xE5, 0x85, 0xA4, 0xF6,
++		0xE5, 0x85, 0xB7, 0xF6, 0xF0, 0xA0, 0x94, 0x9C,
++		0xF6, 0xE3, 0x92, 0xB9, 0xF6, 0xE5, 0x85, 0xA7,
++		0xF6, 0xE5, 0x86, 0x8D, 0xF6, 0xF0, 0xA0, 0x95,
++		0x8B, 0xF6, 0xE5, 0x86, 0x97, 0xF6, 0xE5, 0x86,
++		0xA4, 0xF6, 0xE4, 0xBB, 0x8C, 0xF6, 0xE5, 0x86,
++		0xAC, 0xF6, 0xE5, 0x86, 0xB5, 0xF6, 0xF0, 0xA9,
++		0x87, 0x9F, 0xF6, 0xE5, 0x87, 0xB5, 0xF6, 0xE5,
++		0x88, 0x83, 0xF6, 0xE3, 0x93, 0x9F, 0xF6, 0xE5,
++		0x88, 0xBB, 0xF6, 0xE5, 0x89, 0x86, 0xF6, 0xE5,
++		0x89, 0xB2, 0xF6, 0xE5, 0x89, 0xB7, 0xF6, 0xE3,
++		0x94, 0x95, 0xF6, 0xE5, 0x8B, 0x87, 0xF6, 0xE5,
++		0x8B, 0x89, 0xF6, 0xE5, 0x8B, 0xA4, 0xF6, 0xE5,
++		0x8B, 0xBA, 0xF6, 0xE5, 0x8C, 0x85, 0xF6, 0xE5,
++		0x8C, 0x86, 0xF6, 0xE5, 0x8C, 0x97, 0xF6, 0xE5,
++		0x8D, 0x89, 0xF6, 0xE5, 0x8D, 0x91, 0xF6, 0xE5,
++		0x8D, 0x9A, 0xF6, 0xE5, 0x8D, 0xB3, 0xF6, 0xE5,
++		0x8D, 0xBD, 0xF6, 0xE5, 0x8D, 0xBF, 0xF6, 0xE5,
++		0x8D, 0xBF, 0xF6, 0xE5, 0x8D, 0xBF, 0xF6, 0xF0,
++		0xA0, 0xA8, 0xAC, 0xF6, 0xE7, 0x81, 0xB0, 0xF6,
++		0xE5, 0x8F, 0x8A, 0xF6, 0xE5, 0x8F, 0x9F, 0xF6,
++		0xF0, 0xA0, 0xAD, 0xA3, 0xF6, 0xE5, 0x8F, 0xAB,
++		0xF6, 0xE5, 0x8F, 0xB1, 0xF6, 0xE5, 0x90, 0x86,
++		0xF6, 0xE5, 0x92, 0x9E, 0xF6, 0xE5, 0x90, 0xB8,
++		0xF6, 0xE5, 0x91, 0x88, 0xF6, 0xE5, 0x91, 0xA8,
++		0xF6, 0xE5, 0x92, 0xA2, 0xF6, 0xE5, 0x93, 0xB6,
++		0xF6, 0xE5, 0x94, 0x90, 0xF6, 0xE5, 0x95, 0x93,
++		0xF6, 0xE5, 0x95, 0xA3, 0xF6, 0xE5, 0x96, 0x84,
++		0xF6, 0xE5, 0x96, 0x84, 0xF6, 0xE5, 0x96, 0x99,
++		0xF6, 0xE5, 0x96, 0xAB, 0xF6, 0xE5, 0x96, 0xB3,
++		0xF6, 0xE5, 0x97, 0x82, 0xF6, 0xE5, 0x9C, 0x96,
++		0xF6, 0xE5, 0x98, 0x86, 0xF6, 0xE5, 0x9C, 0x97,
++		0xF6, 0xE5, 0x99, 0x91, 0xF6, 0xE5, 0x99, 0xB4,
++		0xF6, 0xE5, 0x88, 0x87, 0xF6, 0xE5, 0xA3, 0xAE,
++		0xF6, 0xE5, 0x9F, 0x8E, 0xF6, 0xE5, 0x9F, 0xB4,
++		0xF6, 0xE5, 0xA0, 0x8D, 0xF6, 0xE5, 0x9E, 0x8B,
++		0xF6, 0xE5, 0xA0, 0xB2, 0xF6, 0xE5, 0xA0, 0xB1,
++		0xF6, 0xE5, 0xA2, 0xAC, 0xF6, 0xF0, 0xA1, 0x93,
++		0xA4, 0xF6, 0xE5, 0xA3, 0xB2, 0xF6, 0xE5, 0xA3,
++		0xB7, 0xF6, 0xE5, 0xA4, 0x86, 0xF6, 0xE5, 0xA4,
++		0x9A, 0xF6, 0xE5, 0xA4, 0xA2, 0xF6, 0xE5, 0xA5,
++		0xA2, 0xF6, 0xF0, 0xA1, 0x9A, 0xA8, 0xF6, 0xF0,
++		0xA1, 0x9B, 0xAA, 0xF6, 0xE5, 0xA7, 0xAC, 0xF6,
++		0xE5, 0xA8, 0x9B, 0xF6, 0xE5, 0xA8, 0xA7, 0xF6,
++		0xE5, 0xA7, 0x98, 0xF6, 0xE5, 0xA9, 0xA6, 0xF6,
++		0xE3, 0x9B, 0xAE, 0xF6, 0xE3, 0x9B, 0xBC, 0xF6,
++		0xE5, 0xAC, 0x88, 0xF6, 0xE5, 0xAC, 0xBE, 0xF6,
++		0xE5, 0xAC, 0xBE, 0xF6, 0xF0, 0xA1, 0xA7, 0x88,
++		0xF6, 0xE5, 0xAF, 0x83, 0xF6, 0xE5, 0xAF, 0x98,
++		0xF6, 0xE5, 0xAF, 0xA7, 0xF6, 0xE5, 0xAF, 0xB3,
++		0xF6, 0xF0, 0xA1, 0xAC, 0x98, 0xF6, 0xE5, 0xAF,
++		0xBF, 0xF6, 0xE5, 0xB0, 0x86, 0xF6, 0xE5, 0xBD,
++		0x93, 0xF6, 0xE5, 0xB0, 0xA2, 0xF6, 0xE3, 0x9E,
++		0x81, 0xF6, 0xE5, 0xB1, 0xA0, 0xF6, 0xE5, 0xB1,
++		0xAE, 0xF6, 0xE5, 0xB3, 0x80, 0xF6, 0xE5, 0xB2,
++		0x8D, 0xF6, 0xF0, 0xA1, 0xB7, 0xA4, 0xF6, 0xE5,
++		0xB5, 0x83, 0xF6, 0xF0, 0xA1, 0xB7, 0xA6, 0xF6,
++		0xE5, 0xB5, 0xAE, 0xF6, 0xE5, 0xB5, 0xAB, 0xF6,
++		0xE5, 0xB5, 0xBC, 0xF6, 0xE5, 0xB7, 0xA1, 0xF6,
++		0xE5, 0xB7, 0xA2, 0xF6, 0xE3, 0xA0, 0xAF, 0xF6,
++		0xE5, 0xB7, 0xBD, 0xF6, 0xE5, 0xB8, 0xA8, 0xF6,
++		0xE5, 0xB8, 0xBD, 0xF6, 0xE5, 0xB9, 0xA9, 0xF6,
++		0xE3, 0xA1, 0xA2, 0xF6, 0xF0, 0xA2, 0x86, 0x83,
++		0xF6, 0xE3, 0xA1, 0xBC, 0xF6, 0xE5, 0xBA, 0xB0,
++		0xF6, 0xE5, 0xBA, 0xB3, 0xF6, 0xE5, 0xBA, 0xB6,
++		0xF6, 0xE5, 0xBB, 0x8A, 0xF6, 0xF0, 0xAA, 0x8E,
++		0x92, 0xF6, 0xE5, 0xBB, 0xBE, 0xF6, 0xF0, 0xA2,
++		0x8C, 0xB1, 0xF6, 0xF0, 0xA2, 0x8C, 0xB1, 0xF6,
++		0xE8, 0x88, 0x81, 0xF6, 0xE5, 0xBC, 0xA2, 0xF6,
++		0xE5, 0xBC, 0xA2, 0xF6, 0xE3, 0xA3, 0x87, 0xF6,
++		0xF0, 0xA3, 0x8A, 0xB8, 0xF6, 0xF0, 0xA6, 0x87,
++		0x9A, 0xF6, 0xE5, 0xBD, 0xA2, 0xF6, 0xE5, 0xBD,
++		0xAB, 0xF6, 0xE3, 0xA3, 0xA3, 0xF6, 0xE5, 0xBE,
++		0x9A, 0xF6, 0xE5, 0xBF, 0x8D, 0xF6, 0xE5, 0xBF,
++		0x97, 0xF6, 0xE5, 0xBF, 0xB9, 0xF6, 0xE6, 0x82,
++		0x81, 0xF6, 0xE3, 0xA4, 0xBA, 0xF6, 0xE3, 0xA4,
++		0x9C, 0xF6, 0xE6, 0x82, 0x94, 0xF6, 0xF0, 0xA2,
++		0x9B, 0x94, 0xF6, 0xE6, 0x83, 0x87, 0xF6, 0xE6,
++		0x85, 0x88, 0xF6, 0xE6, 0x85, 0x8C, 0xF6, 0xE6,
++		0x85, 0x8E, 0xF6, 0xE6, 0x85, 0x8C, 0xF6, 0xE6,
++		0x85, 0xBA, 0xF6, 0xE6, 0x86, 0x8E, 0xF6, 0xE6,
++		0x86, 0xB2, 0xF6, 0xE6, 0x86, 0xA4, 0xF6, 0xE6,
++		0x86, 0xAF, 0xF6, 0xE6, 0x87, 0x9E, 0xF6, 0xE6,
++		0x87, 0xB2, 0xF6, 0xE6, 0x87, 0xB6, 0xF6, 0xE6,
++		0x88, 0x90, 0xF6, 0xE6, 0x88, 0x9B, 0xF6, 0xE6,
++		0x89, 0x9D, 0xF6, 0xE6, 0x8A, 0xB1, 0xF6, 0xE6,
++		0x8B, 0x94, 0xF6, 0xE6, 0x8D, 0x90, 0xF6, 0xF0,
++		0xA2, 0xAC, 0x8C, 0xF6, 0xE6, 0x8C, 0xBD, 0xF6,
++		0xE6, 0x8B, 0xBC, 0xF6, 0xE6, 0x8D, 0xA8, 0xF6,
++		0xE6, 0x8E, 0x83, 0xF6, 0xE6, 0x8F, 0xA4, 0xF6,
++		0xF0, 0xA2, 0xAF, 0xB1, 0xF6, 0xE6, 0x90, 0xA2,
++		0xF6, 0xE6, 0x8F, 0x85, 0xF6, 0xE6, 0x8E, 0xA9,
++		0xF6, 0xE3, 0xA8, 0xAE, 0xF6, 0xE6, 0x91, 0xA9,
++		0xF6, 0xE6, 0x91, 0xBE, 0xF6, 0xE6, 0x92, 0x9D,
++		0xF6, 0xE6, 0x91, 0xB7, 0xF6, 0xE3, 0xA9, 0xAC,
++		0xF6, 0xE6, 0x95, 0x8F, 0xF6, 0xE6, 0x95, 0xAC,
++		0xF6, 0xF0, 0xA3, 0x80, 0x8A, 0xF6, 0xE6, 0x97,
++		0xA3, 0xF6, 0xE6, 0x9B, 0xB8, 0xF6, 0xE6, 0x99,
++		0x89, 0xF6, 0xE3, 0xAC, 0x99, 0xF6, 0xE6, 0x9A,
++		0x91, 0xF6, 0xE3, 0xAC, 0x88, 0xF6, 0xE3, 0xAB,
++		0xA4, 0xF6, 0xE5, 0x86, 0x92, 0xF6, 0xE5, 0x86,
++		0x95, 0xF6, 0xE6, 0x9C, 0x80, 0xF6, 0xE6, 0x9A,
++		0x9C, 0xF6, 0xE8, 0x82, 0xAD, 0xF6, 0xE4, 0x8F,
++		0x99, 0xF6, 0xE6, 0x9C, 0x97, 0xF6, 0xE6, 0x9C,
++		0x9B, 0xF6, 0xE6, 0x9C, 0xA1, 0xF6, 0xE6, 0x9D,
++		0x9E, 0xF6, 0xE6, 0x9D, 0x93, 0xF6, 0xF0, 0xA3,
++		0x8F, 0x83, 0xF6, 0xE3, 0xAD, 0x89, 0xF6, 0xE6,
++		0x9F, 0xBA, 0xF6, 0xE6, 0x9E, 0x85, 0xF6, 0xE6,
++		0xA1, 0x92, 0xF6, 0xE6, 0xA2, 0x85, 0xF6, 0xF0,
++		0xA3, 0x91, 0xAD, 0xF6, 0xE6, 0xA2, 0x8E, 0xF6,
++		0xE6, 0xA0, 0x9F, 0xF6, 0xE6, 0xA4, 0x94, 0xF6,
++		0xE3, 0xAE, 0x9D, 0xF6, 0xE6, 0xA5, 0x82, 0xF6,
++		0xE6, 0xA6, 0xA3, 0xF6, 0xE6, 0xA7, 0xAA, 0xF6,
++		0xE6, 0xAA, 0xA8, 0xF6, 0xF0, 0xA3, 0x9A, 0xA3,
++		0xF6, 0xE6, 0xAB, 0x9B, 0xF6, 0xE3, 0xB0, 0x98,
++		0xF6, 0xE6, 0xAC, 0xA1, 0xF6, 0xF0, 0xA3, 0xA2,
++		0xA7, 0xF6, 0xE6, 0xAD, 0x94, 0xF6, 0xE3, 0xB1,
++		0x8E, 0xF6, 0xE6, 0xAD, 0xB2, 0xF6, 0xE6, 0xAE,
++		0x9F, 0xF6, 0xE6, 0xAE, 0xBA, 0xF6, 0xE6, 0xAE,
++		0xBB, 0xF6, 0xF0, 0xA3, 0xAA, 0x8D, 0xF6, 0xF0,
++		0xA1, 0xB4, 0x8B, 0xF6, 0xF0, 0xA3, 0xAB, 0xBA,
++		0xF6, 0xE6, 0xB1, 0x8E, 0xF6, 0xF0, 0xA3, 0xB2,
++		0xBC, 0xF6, 0xE6, 0xB2, 0xBF, 0xF6, 0xE6, 0xB3,
++		0x8D, 0xF6, 0xE6, 0xB1, 0xA7, 0xF6, 0xE6, 0xB4,
++		0x96, 0xF6, 0xE6, 0xB4, 0xBE, 0xF6, 0xE6, 0xB5,
++		0xB7, 0xF6, 0xE6, 0xB5, 0x81, 0xF6, 0xE6, 0xB5,
++		0xA9, 0xF6, 0xE6, 0xB5, 0xB8, 0xF6, 0xE6, 0xB6,
++		0x85, 0xF6, 0xF0, 0xA3, 0xB4, 0x9E, 0xF6, 0xE6,
++		0xB4, 0xB4, 0xF6, 0xE6, 0xB8, 0xAF, 0xF6, 0xE6,
++		0xB9, 0xAE, 0xF6, 0xE3, 0xB4, 0xB3, 0xF6, 0xE6,
++		0xBB, 0x8B, 0xF6, 0xE6, 0xBB, 0x87, 0xF6, 0xF0,
++		0xA3, 0xBB, 0x91, 0xF6, 0xE6, 0xB7, 0xB9, 0xF6,
++		0xE6, 0xBD, 0xAE, 0xF6, 0xF0, 0xA3, 0xBD, 0x9E,
++		0xF6, 0xF0, 0xA3, 0xBE, 0x8E, 0xF6, 0xE6, 0xBF,
++		0x86, 0xF6, 0xE7, 0x80, 0xB9, 0xF6, 0xE7, 0x80,
++		0x9E, 0xF6, 0xE7, 0x80, 0x9B, 0xF6, 0xE3, 0xB6,
++		0x96, 0xF6, 0xE7, 0x81, 0x8A, 0xF6, 0xE7, 0x81,
++		0xBD, 0xF6, 0xE7, 0x81, 0xB7, 0xF6, 0xE7, 0x82,
++		0xAD, 0xF6, 0xF0, 0xA0, 0x94, 0xA5, 0xF6, 0xE7,
++		0x85, 0x85, 0xF6, 0xF0, 0xA4, 0x89, 0xA3, 0xF6,
++		0xE7, 0x86, 0x9C, 0xF6, 0xF0, 0xA4, 0x8E, 0xAB,
++		0xF6, 0xE7, 0x88, 0xA8, 0xF6, 0xE7, 0x88, 0xB5,
++		0xF6, 0xE7, 0x89, 0x90, 0xF6, 0xF0, 0xA4, 0x98,
++		0x88, 0xF6, 0xE7, 0x8A, 0x80, 0xF6, 0xE7, 0x8A,
++		0x95, 0xF6, 0xF0, 0xA4, 0x9C, 0xB5, 0xF6, 0xF0,
++		0xA4, 0xA0, 0x94, 0xF6, 0xE7, 0x8D, 0xBA, 0xF6,
++		0xE7, 0x8E, 0x8B, 0xF6, 0xE3, 0xBA, 0xAC, 0xF6,
++		0xE7, 0x8E, 0xA5, 0xF6, 0xE3, 0xBA, 0xB8, 0xF6,
++		0xE3, 0xBA, 0xB8, 0xF6, 0xE7, 0x91, 0x87, 0xF6,
++		0xE7, 0x91, 0x9C, 0xF6, 0xE7, 0x91, 0xB1, 0xF6,
++		0xE7, 0x92, 0x85, 0xF6, 0xE7, 0x93, 0x8A, 0xF6,
++		0xE3, 0xBC, 0x9B, 0xF6, 0xE7, 0x94, 0xA4, 0xF6,
++		0xF0, 0xA4, 0xB0, 0xB6, 0xF6, 0xE7, 0x94, 0xBE,
++		0xF6, 0xF0, 0xA4, 0xB2, 0x92, 0xF6, 0xE7, 0x95,
++		0xB0, 0xF6, 0xF0, 0xA2, 0x86, 0x9F, 0xF6, 0xE7,
++		0x98, 0x90, 0xF6, 0xF0, 0xA4, 0xBE, 0xA1, 0xF6,
++		0xF0, 0xA4, 0xBE, 0xB8, 0xF6, 0xF0, 0xA5, 0x81,
++		0x84, 0xF6, 0xE3, 0xBF, 0xBC, 0xF6, 0xE4, 0x80,
++		0x88, 0xF6, 0xE7, 0x9B, 0xB4, 0xF6, 0xF0, 0xA5,
++		0x83, 0xB3, 0xF6, 0xF0, 0xA5, 0x83, 0xB2, 0xF6,
++		0xF0, 0xA5, 0x84, 0x99, 0xF6, 0xF0, 0xA5, 0x84,
++		0xB3, 0xF6, 0xE7, 0x9C, 0x9E, 0xF6, 0xE7, 0x9C,
++		0x9F, 0xF6, 0xE7, 0x9C, 0x9F, 0xF6, 0xE7, 0x9D,
++		0x8A, 0xF6, 0xE4, 0x80, 0xB9, 0xF6, 0xE7, 0x9E,
++		0x8B, 0xF6, 0xE4, 0x81, 0x86, 0xF6, 0xE4, 0x82,
++		0x96, 0xF6, 0xF0, 0xA5, 0x90, 0x9D, 0xF6, 0xE7,
++		0xA1, 0x8E, 0xF6, 0xE7, 0xA2, 0x8C, 0xF6, 0xE7,
++		0xA3, 0x8C, 0xF6, 0xE4, 0x83, 0xA3, 0xF6, 0xF0,
++		0xA5, 0x98, 0xA6, 0xF6, 0xE7, 0xA5, 0x96, 0xF6,
++		0xF0, 0xA5, 0x9A, 0x9A, 0xF6, 0xF0, 0xA5, 0x9B,
++		0x85, 0xF6, 0xE7, 0xA6, 0x8F, 0xF6, 0xE7, 0xA7,
++		0xAB, 0xF6, 0xE4, 0x84, 0xAF, 0xF6, 0xE7, 0xA9,
++		0x80, 0xF6, 0xE7, 0xA9, 0x8A, 0xF6, 0xE7, 0xA9,
++		0x8F, 0xF6, 0xF0, 0xA5, 0xA5, 0xBC, 0xF6, 0xF0,
++		0xA5, 0xAA, 0xA7, 0xF6, 0xF0, 0xA5, 0xAA, 0xA7,
++		0xF6, 0xE7, 0xAB, 0xAE, 0xF6, 0xE4, 0x88, 0x82,
++		0xF6, 0xF0, 0xA5, 0xAE, 0xAB, 0xF6, 0xE7, 0xAF,
++		0x86, 0xF6, 0xE7, 0xAF, 0x89, 0xF6, 0xE4, 0x88,
++		0xA7, 0xF6, 0xF0, 0xA5, 0xB2, 0x80, 0xF6, 0xE7,
++		0xB3, 0x92, 0xF6, 0xE4, 0x8A, 0xA0, 0xF6, 0xE7,
++		0xB3, 0xA8, 0xF6, 0xE7, 0xB3, 0xA3, 0xF6, 0xE7,
++		0xB4, 0x80, 0xF6, 0xF0, 0xA5, 0xBE, 0x86, 0xF6,
++		0xE7, 0xB5, 0xA3, 0xF6, 0xE4, 0x8C, 0x81, 0xF6,
++		0xE7, 0xB7, 0x87, 0xF6, 0xE7, 0xB8, 0x82, 0xF6,
++		0xE7, 0xB9, 0x85, 0xF6, 0xE4, 0x8C, 0xB4, 0xF6,
++		0xF0, 0xA6, 0x88, 0xA8, 0xF6, 0xF0, 0xA6, 0x89,
++		0x87, 0xF6, 0xE4, 0x8D, 0x99, 0xF6, 0xF0, 0xA6,
++		0x8B, 0x99, 0xF6, 0xE7, 0xBD, 0xBA, 0xF6, 0xF0,
++		0xA6, 0x8C, 0xBE, 0xF6, 0xE7, 0xBE, 0x95, 0xF6,
++		0xE7, 0xBF, 0xBA, 0xF6, 0xE8, 0x80, 0x85, 0xF6,
++		0xF0, 0xA6, 0x93, 0x9A, 0xF6, 0xF0, 0xA6, 0x94,
++		0xA3, 0xF6, 0xE8, 0x81, 0xA0, 0xF6, 0xF0, 0xA6,
++		0x96, 0xA8, 0xF6, 0xE8, 0x81, 0xB0, 0xF6, 0xF0,
++		0xA3, 0x8D, 0x9F, 0xF6, 0xE4, 0x8F, 0x95, 0xF6,
++		0xE8, 0x82, 0xB2, 0xF6, 0xE8, 0x84, 0x83, 0xF6,
++		0xE4, 0x90, 0x8B, 0xF6, 0xE8, 0x84, 0xBE, 0xF6,
++		0xE5, 0xAA, 0xB5, 0xF6, 0xF0, 0xA6, 0x9E, 0xA7,
++		0xF6, 0xF0, 0xA6, 0x9E, 0xB5, 0xF6, 0xF0, 0xA3,
++		0x8E, 0x93, 0xF6, 0xF0, 0xA3, 0x8E, 0x9C, 0xF6,
++		0xE8, 0x88, 0x81, 0xF6, 0xE8, 0x88, 0x84, 0xF6,
++		0xE8, 0xBE, 0x9E, 0xF6, 0xE4, 0x91, 0xAB, 0xF6,
++		0xE8, 0x8A, 0x91, 0xF6, 0xE8, 0x8A, 0x8B, 0xF6,
++		0xE8, 0x8A, 0x9D, 0xF6, 0xE5, 0x8A, 0xB3, 0xF6,
++		0xE8, 0x8A, 0xB1, 0xF6, 0xE8, 0x8A, 0xB3, 0xF6,
++		0xE8, 0x8A, 0xBD, 0xF6, 0xE8, 0x8B, 0xA6, 0xF6,
++		0xF0, 0xA6, 0xAC, 0xBC, 0xF6, 0xE8, 0x8B, 0xA5,
++		0xF6, 0xE8, 0x8C, 0x9D, 0xF6, 0xE8, 0x8D, 0xA3,
++		0xF6, 0xE8, 0x8E, 0xAD, 0xF6, 0xE8, 0x8C, 0xA3,
++		0xF6, 0xE8, 0x8E, 0xBD, 0xF6, 0xE8, 0x8F, 0xA7,
++		0xF6, 0xE8, 0x91, 0x97, 0xF6, 0xE8, 0x8D, 0x93,
++		0xF6, 0xE8, 0x8F, 0x8A, 0xF6, 0xE8, 0x8F, 0x8C,
++		0xF6, 0xE8, 0x8F, 0x9C, 0xF6, 0xF0, 0xA6, 0xB0,
++		0xB6, 0xF6, 0xF0, 0xA6, 0xB5, 0xAB, 0xF6, 0xF0,
++		0xA6, 0xB3, 0x95, 0xF6, 0xE4, 0x94, 0xAB, 0xF6,
++		0xE8, 0x93, 0xB1, 0xF6, 0xE8, 0x93, 0xB3, 0xF6,
++		0xE8, 0x94, 0x96, 0xF6, 0xF0, 0xA7, 0x8F, 0x8A,
++		0xF6, 0xE8, 0x95, 0xA4, 0xF6, 0xF0, 0xA6, 0xBC,
++		0xAC, 0xF6, 0xE4, 0x95, 0x9D, 0xF6, 0xE4, 0x95,
++		0xA1, 0xF6, 0xF0, 0xA6, 0xBE, 0xB1, 0xF6, 0xF0,
++		0xA7, 0x83, 0x92, 0xF6, 0xE4, 0x95, 0xAB, 0xF6,
++		0xE8, 0x99, 0x90, 0xF6, 0xE8, 0x99, 0x9C, 0xF6,
++		0xE8, 0x99, 0xA7, 0xF6, 0xE8, 0x99, 0xA9, 0xF6,
++		0xE8, 0x9A, 0xA9, 0xF6, 0xE8, 0x9A, 0x88, 0xF6,
++		0xE8, 0x9C, 0x8E, 0xF6, 0xE8, 0x9B, 0xA2, 0xF6,
++		0xE8, 0x9D, 0xB9, 0xF6, 0xE8, 0x9C, 0xA8, 0xF6,
++		0xE8, 0x9D, 0xAB, 0xF6, 0xE8, 0x9E, 0x86, 0xF6,
++		0xE4, 0x97, 0x97, 0xF6, 0xE8, 0x9F, 0xA1, 0xF6,
++		0xE8, 0xA0, 0x81, 0xF6, 0xE4, 0x97, 0xB9, 0xF6,
++		0xE8, 0xA1, 0xA0, 0xF6, 0xE8, 0xA1, 0xA3, 0xF6,
++		0xF0, 0xA7, 0x99, 0xA7, 0xF6, 0xE8, 0xA3, 0x97,
++		0xF6, 0xE8, 0xA3, 0x9E, 0xF6, 0xE4, 0x98, 0xB5,
++		0xF6, 0xE8, 0xA3, 0xBA, 0xF6, 0xE3, 0x92, 0xBB,
++		0xF6, 0xF0, 0xA7, 0xA2, 0xAE, 0xF6, 0xF0, 0xA7,
++		0xA5, 0xA6, 0xF6, 0xE4, 0x9A, 0xBE, 0xF6, 0xE4,
++		0x9B, 0x87, 0xF6, 0xE8, 0xAA, 0xA0, 0xF6, 0xE8,
++		0xAB, 0xAD, 0xF6, 0xE8, 0xAE, 0x8A, 0xF6, 0xE8,
++		0xB1, 0x95, 0xF6, 0xF0, 0xA7, 0xB2, 0xA8, 0xF6,
++		0xE8, 0xB2, 0xAB, 0xF6, 0xE8, 0xB3, 0x81, 0xF6,
++		0xE8, 0xB4, 0x9B, 0xF6, 0xE8, 0xB5, 0xB7, 0xF6,
++		0xF0, 0xA7, 0xBC, 0xAF, 0xF6, 0xF0, 0xA0, 0xA0,
++		0x84, 0xF6, 0xE8, 0xB7, 0x8B, 0xF6, 0xE8, 0xB6,
++		0xBC, 0xF6, 0xE8, 0xB7, 0xB0, 0xF6, 0xF0, 0xA0,
++		0xA3, 0x9E, 0xF6, 0xE8, 0xBB, 0x94, 0xF6, 0xE8,
++		0xBC, 0xB8, 0xF6, 0xF0, 0xA8, 0x97, 0x92, 0xF6,
++		0xF0, 0xA8, 0x97, 0xAD, 0xF6, 0xE9, 0x82, 0x94,
++		0xF6, 0xE9, 0x83, 0xB1, 0xF6, 0xE9, 0x84, 0x91,
++		0xF6, 0xF0, 0xA8, 0x9C, 0xAE, 0xF6, 0xE9, 0x84,
++		0x9B, 0xF6, 0xE9, 0x88, 0xB8, 0xF6, 0xE9, 0x8B,
++		0x97, 0xF6, 0xE9, 0x8B, 0x98, 0xF6, 0xE9, 0x89,
++		0xBC, 0xF6, 0xE9, 0x8F, 0xB9, 0xF6, 0xE9, 0x90,
++		0x95, 0xF6, 0xF0, 0xA8, 0xAF, 0xBA, 0xF6, 0xE9,
++		0x96, 0x8B, 0xF6, 0xE4, 0xA6, 0x95, 0xF6, 0xE9,
++		0x96, 0xB7, 0xF6, 0xF0, 0xA8, 0xB5, 0xB7, 0xF6,
++		0xE4, 0xA7, 0xA6, 0xF6, 0xE9, 0x9B, 0x83, 0xF6,
++		0xE5, 0xB6, 0xB2, 0xF6, 0xE9, 0x9C, 0xA3, 0xF6,
++		0xF0, 0xA9, 0x85, 0x85, 0xF6, 0xF0, 0xA9, 0x88,
++		0x9A, 0xF6, 0xE4, 0xA9, 0xAE, 0xF6, 0xE4, 0xA9,
++		0xB6, 0xF6, 0xE9, 0x9F, 0xA0, 0xF6, 0xF0, 0xA9,
++		0x90, 0x8A, 0xF6, 0xE4, 0xAA, 0xB2, 0xF6, 0xF0,
++		0xA9, 0x92, 0x96, 0xF6, 0xE9, 0xA0, 0x8B, 0xF6,
++		0xE9, 0xA0, 0x8B, 0xF6, 0xE9, 0xA0, 0xA9, 0xF6,
++		0xF0, 0xA9, 0x96, 0xB6, 0xF6, 0xE9, 0xA3, 0xA2,
++		0xF6, 0xE4, 0xAC, 0xB3, 0xF6, 0xE9, 0xA4, 0xA9,
++		0xF6, 0xE9, 0xA6, 0xA7, 0xF6, 0xE9, 0xA7, 0x82,
++		0xF6, 0xE9, 0xA7, 0xBE, 0xF6, 0xE4, 0xAF, 0x8E,
++		0xF6, 0xF0, 0xA9, 0xAC, 0xB0, 0xF6, 0xE9, 0xAC,
++		0x92, 0xF6, 0xE9, 0xB1, 0x80, 0xF6, 0xE9, 0xB3,
++		0xBD, 0xF6, 0xE4, 0xB3, 0x8E, 0xF6, 0xE4, 0xB3,
++		0xAD, 0xF6, 0xE9, 0xB5, 0xA7, 0xF6, 0xF0, 0xAA,
++		0x83, 0x8E, 0xF6, 0xE4, 0xB3, 0xB8, 0xF6, 0xF0,
++		0xAA, 0x84, 0x85, 0xF6, 0xF0, 0xAA, 0x88, 0x8E,
++		0xF6, 0xF0, 0xAA, 0x8A, 0x91, 0xF6, 0xE9, 0xBA,
++		0xBB, 0xF6, 0xE4, 0xB5, 0x96, 0xF6, 0xE9, 0xBB,
++		0xB9, 0xF6, 0xE9, 0xBB, 0xBE, 0xF6, 0xE9, 0xBC,
++		0x85, 0xF6, 0xE9, 0xBC, 0x8F, 0xF6, 0xE9, 0xBC,
++		0x96, 0xF6, 0xE9, 0xBC, 0xBB, 0xF6, 0xF0, 0xAA,
++		0x98, 0x80, 0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,
++	},
++	{
++		0x20, 0x20, 0xCC, 0x88, 0x61, 0x20, 0xCC, 0x84,
++		0x32, 0x33, 0x20, 0xCC, 0x81, 0xCE, 0xBC, 0x20,
++		0xCC, 0xA7, 0x31, 0x6F, 0x31, 0xE2, 0x81, 0x84,
++		0x34, 0x31, 0xE2, 0x81, 0x84, 0x32, 0x33, 0xE2,
++		0x81, 0x84, 0x34, 0xF6, 0x41, 0xCC, 0x80, 0xF6,
++		0x41, 0xCC, 0x81, 0xF6, 0x41, 0xCC, 0x82, 0xF6,
++		0x41, 0xCC, 0x83, 0xF6, 0x41, 0xCC, 0x88, 0xF6,
++		0x41, 0xCC, 0x8A, 0xF6, 0x43, 0xCC, 0xA7, 0xF6,
++		0x45, 0xCC, 0x80, 0xF6, 0x45, 0xCC, 0x81, 0xF6,
++		0x45, 0xCC, 0x82, 0xF6, 0x45, 0xCC, 0x88, 0xF6,
++		0x49, 0xCC, 0x80, 0xF6, 0x49, 0xCC, 0x81, 0xF6,
++		0x49, 0xCC, 0x82, 0xF6, 0x49, 0xCC, 0x88, 0xF6,
++		0x4E, 0xCC, 0x83, 0xF6, 0x4F, 0xCC, 0x80, 0xF6,
++		0x4F, 0xCC, 0x81, 0xF6, 0x4F, 0xCC, 0x82, 0xF6,
++		0x4F, 0xCC, 0x83, 0xF6, 0x4F, 0xCC, 0x88, 0xF6,
++		0x55, 0xCC, 0x80, 0xF6, 0x55, 0xCC, 0x81, 0xF6,
++		0x55, 0xCC, 0x82, 0xF6, 0x55, 0xCC, 0x88, 0xF6,
++		0x59, 0xCC, 0x81, 0xF6, 0x61, 0xCC, 0x80, 0xF6,
++		0x61, 0xCC, 0x81, 0xF6, 0x61, 0xCC, 0x82, 0xF6,
++		0x61, 0xCC, 0x83, 0xF6, 0x61, 0xCC, 0x88, 0xF6,
++		0x61, 0xCC, 0x8A, 0xF6, 0x63, 0xCC, 0xA7, 0xF6,
++		0x65, 0xCC, 0x80, 0xF6, 0x65, 0xCC, 0x81, 0xF6,
++		0x65, 0xCC, 0x82, 0xF6, 0x65, 0xCC, 0x88, 0xF6,
++		0x69, 0xCC, 0x80, 0xF6, 0x69, 0xCC, 0x81, 0xF6,
++		0x69, 0xCC, 0x82, 0xF6, 0x69, 0xCC, 0x88, 0xF6,
++		0x6E, 0xCC, 0x83, 0xF6, 0x6F, 0xCC, 0x80, 0xF6,
++		0x6F, 0xCC, 0x81, 0xF6, 0x6F, 0xCC, 0x82, 0xF6,
++		0x6F, 0xCC, 0x83, 0xF6, 0x6F, 0xCC, 0x88, 0xF6,
++		0x75, 0xCC, 0x80, 0xF6, 0x75, 0xCC, 0x81, 0xF6,
++		0x75, 0xCC, 0x82, 0xF6, 0x75, 0xCC, 0x88, 0xF6,
++		0x79, 0xCC, 0x81, 0xF6, 0x79, 0xCC, 0x88, 0xF6,
++		0x41, 0xCC, 0x84, 0xF6, 0x61, 0xCC, 0x84, 0xF6,
++		0x41, 0xCC, 0x86, 0xF6, 0x61, 0xCC, 0x86, 0xF6,
++		0x41, 0xCC, 0xA8, 0xF6, 0x61, 0xCC, 0xA8, 0xF6,
++		0x43, 0xCC, 0x81, 0xF6, 0x63, 0xCC, 0x81, 0xF6,
++		0x43, 0xCC, 0x82, 0xF6, 0x63, 0xCC, 0x82, 0xF6,
++		0x43, 0xCC, 0x87, 0xF6, 0x63, 0xCC, 0x87, 0xF6,
++		0x43, 0xCC, 0x8C, 0xF6, 0x63, 0xCC, 0x8C, 0xF6,
++		0x44, 0xCC, 0x8C, 0xF6, 0x64, 0xCC, 0x8C, 0xF6,
++		0x45, 0xCC, 0x84, 0xF6, 0x65, 0xCC, 0x84, 0xF6,
++		0x45, 0xCC, 0x86, 0xF6, 0x65, 0xCC, 0x86, 0xF6,
++		0x45, 0xCC, 0x87, 0xF6, 0x65, 0xCC, 0x87, 0xF6,
++		0x45, 0xCC, 0xA8, 0xF6, 0x65, 0xCC, 0xA8, 0xF6,
++		0x45, 0xCC, 0x8C, 0xF6, 0x65, 0xCC, 0x8C, 0xF6,
++		0x47, 0xCC, 0x82, 0xF6, 0x67, 0xCC, 0x82, 0xF6,
++		0x47, 0xCC, 0x86, 0xF6, 0x67, 0xCC, 0x86, 0xF6,
++		0x47, 0xCC, 0x87, 0xF6, 0x67, 0xCC, 0x87, 0xF6,
++		0x47, 0xCC, 0xA7, 0xF6, 0x67, 0xCC, 0xA7, 0xF6,
++		0x48, 0xCC, 0x82, 0xF6, 0x68, 0xCC, 0x82, 0xF6,
++		0x49, 0xCC, 0x83, 0xF6, 0x69, 0xCC, 0x83, 0xF6,
++		0x49, 0xCC, 0x84, 0xF6, 0x69, 0xCC, 0x84, 0xF6,
++		0x49, 0xCC, 0x86, 0xF6, 0x69, 0xCC, 0x86, 0xF6,
++		0x49, 0xCC, 0xA8, 0xF6, 0x69, 0xCC, 0xA8, 0xF6,
++		0x49, 0xCC, 0x87, 0x49, 0x4A, 0x69, 0x6A, 0xF6,
++		0x4A, 0xCC, 0x82, 0xF6, 0x6A, 0xCC, 0x82, 0xF6,
++		0x4B, 0xCC, 0xA7, 0xF6, 0x6B, 0xCC, 0xA7, 0xF6,
++		0x4C, 0xCC, 0x81, 0xF6, 0x6C, 0xCC, 0x81, 0xF6,
++		0x4C, 0xCC, 0xA7, 0xF6, 0x6C, 0xCC, 0xA7, 0xF6,
++		0x4C, 0xCC, 0x8C, 0xF6, 0x6C, 0xCC, 0x8C, 0x4C,
++		0xC2, 0xB7, 0x6C, 0xC2, 0xB7, 0xF6, 0x4E, 0xCC,
++		0x81, 0xF6, 0x6E, 0xCC, 0x81, 0xF6, 0x4E, 0xCC,
++		0xA7, 0xF6, 0x6E, 0xCC, 0xA7, 0xF6, 0x4E, 0xCC,
++		0x8C, 0xF6, 0x6E, 0xCC, 0x8C, 0xCA, 0xBC, 0x6E,
++		0xF6, 0x4F, 0xCC, 0x84, 0xF6, 0x6F, 0xCC, 0x84,
++		0xF6, 0x4F, 0xCC, 0x86, 0xF6, 0x6F, 0xCC, 0x86,
++		0xF6, 0x4F, 0xCC, 0x8B, 0xF6, 0x6F, 0xCC, 0x8B,
++		0xF6, 0x52, 0xCC, 0x81, 0xF6, 0x72, 0xCC, 0x81,
++		0xF6, 0x52, 0xCC, 0xA7, 0xF6, 0x72, 0xCC, 0xA7,
++		0xF6, 0x52, 0xCC, 0x8C, 0xF6, 0x72, 0xCC, 0x8C,
++		0xF6, 0x53, 0xCC, 0x81, 0xF6, 0x73, 0xCC, 0x81,
++		0xF6, 0x53, 0xCC, 0x82, 0xF6, 0x73, 0xCC, 0x82,
++		0xF6, 0x53, 0xCC, 0xA7, 0xF6, 0x73, 0xCC, 0xA7,
++		0xF6, 0x53, 0xCC, 0x8C, 0xF6, 0x73, 0xCC, 0x8C,
++		0xF6, 0x54, 0xCC, 0xA7, 0xF6, 0x74, 0xCC, 0xA7,
++		0xF6, 0x54, 0xCC, 0x8C, 0xF6, 0x74, 0xCC, 0x8C,
++		0xF6, 0x55, 0xCC, 0x83, 0xF6, 0x75, 0xCC, 0x83,
++		0xF6, 0x55, 0xCC, 0x84, 0xF6, 0x75, 0xCC, 0x84,
++		0xF6, 0x55, 0xCC, 0x86, 0xF6, 0x75, 0xCC, 0x86,
++		0xF6, 0x55, 0xCC, 0x8A, 0xF6, 0x75, 0xCC, 0x8A,
++		0xF6, 0x55, 0xCC, 0x8B, 0xF6, 0x75, 0xCC, 0x8B,
++		0xF6, 0x55, 0xCC, 0xA8, 0xF6, 0x75, 0xCC, 0xA8,
++		0xF6, 0x57, 0xCC, 0x82, 0xF6, 0x77, 0xCC, 0x82,
++		0xF6, 0x59, 0xCC, 0x82, 0xF6, 0x79, 0xCC, 0x82,
++		0xF6, 0x59, 0xCC, 0x88, 0xF6, 0x5A, 0xCC, 0x81,
++		0xF6, 0x7A, 0xCC, 0x81, 0xF6, 0x5A, 0xCC, 0x87,
++		0xF6, 0x7A, 0xCC, 0x87, 0xF6, 0x5A, 0xCC, 0x8C,
++		0xF6, 0x7A, 0xCC, 0x8C, 0x73, 0xF6, 0x4F, 0xCC,
++		0x9B, 0xF6, 0x6F, 0xCC, 0x9B, 0xF6, 0x55, 0xCC,
++		0x9B, 0xF6, 0x75, 0xCC, 0x9B, 0x44, 0x5A, 0xCC,
++		0x8C, 0x44, 0x7A, 0xCC, 0x8C, 0x64, 0x7A, 0xCC,
++		0x8C, 0x4C, 0x4A, 0x4C, 0x6A, 0x6C, 0x6A, 0x4E,
++		0x4A, 0x4E, 0x6A, 0x6E, 0x6A, 0xF6, 0x41, 0xCC,
++		0x8C, 0xF6, 0x61, 0xCC, 0x8C, 0xF6, 0x49, 0xCC,
++		0x8C, 0xF6, 0x69, 0xCC, 0x8C, 0xF6, 0x4F, 0xCC,
++		0x8C, 0xF6, 0x6F, 0xCC, 0x8C, 0xF6, 0x55, 0xCC,
++		0x8C, 0xF6, 0x75, 0xCC, 0x8C, 0xF6, 0x55, 0xCC,
++		0x88, 0xCC, 0x84, 0xF6, 0x75, 0xCC, 0x88, 0xCC,
++		0x84, 0xF6, 0x55, 0xCC, 0x88, 0xCC, 0x81, 0xF6,
++		0x75, 0xCC, 0x88, 0xCC, 0x81, 0xF6, 0x55, 0xCC,
++		0x88, 0xCC, 0x8C, 0xF6, 0x75, 0xCC, 0x88, 0xCC,
++		0x8C, 0xF6, 0x55, 0xCC, 0x88, 0xCC, 0x80, 0xF6,
++		0x75, 0xCC, 0x88, 0xCC, 0x80, 0xF6, 0x41, 0xCC,
++		0x88, 0xCC, 0x84, 0xF6, 0x61, 0xCC, 0x88, 0xCC,
++		0x84, 0xF6, 0x41, 0xCC, 0x87, 0xCC, 0x84, 0xF6,
++		0x61, 0xCC, 0x87, 0xCC, 0x84, 0xF6, 0xC3, 0x86,
++		0xCC, 0x84, 0xF6, 0xC3, 0xA6, 0xCC, 0x84, 0xF6,
++		0x47, 0xCC, 0x8C, 0xF6, 0x67, 0xCC, 0x8C, 0xF6,
++		0x4B, 0xCC, 0x8C, 0xF6, 0x6B, 0xCC, 0x8C, 0xF6,
++		0x4F, 0xCC, 0xA8, 0xF6, 0x6F, 0xCC, 0xA8, 0xF6,
++		0x4F, 0xCC, 0xA8, 0xCC, 0x84, 0xF6, 0x6F, 0xCC,
++		0xA8, 0xCC, 0x84, 0xF6, 0xC6, 0xB7, 0xCC, 0x8C,
++		0xF6, 0xCA, 0x92, 0xCC, 0x8C, 0xF6, 0x6A, 0xCC,
++		0x8C, 0x44, 0x5A, 0x44, 0x7A, 0x64, 0x7A, 0xF6,
++		0x47, 0xCC, 0x81, 0xF6, 0x67, 0xCC, 0x81, 0xF6,
++		0x4E, 0xCC, 0x80, 0xF6, 0x6E, 0xCC, 0x80, 0xF6,
++		0x41, 0xCC, 0x8A, 0xCC, 0x81, 0xF6, 0x61, 0xCC,
++		0x8A, 0xCC, 0x81, 0xF6, 0xC3, 0x86, 0xCC, 0x81,
++		0xF6, 0xC3, 0xA6, 0xCC, 0x81, 0xF6, 0xC3, 0x98,
++		0xCC, 0x81, 0xF6, 0xC3, 0xB8, 0xCC, 0x81, 0xF6,
++		0x41, 0xCC, 0x8F, 0xF6, 0x61, 0xCC, 0x8F, 0xF6,
++		0x41, 0xCC, 0x91, 0xF6, 0x61, 0xCC, 0x91, 0xF6,
++		0x45, 0xCC, 0x8F, 0xF6, 0x65, 0xCC, 0x8F, 0xF6,
++		0x45, 0xCC, 0x91, 0xF6, 0x65, 0xCC, 0x91, 0xF6,
++		0x49, 0xCC, 0x8F, 0xF6, 0x69, 0xCC, 0x8F, 0xF6,
++		0x49, 0xCC, 0x91, 0xF6, 0x69, 0xCC, 0x91, 0xF6,
++		0x4F, 0xCC, 0x8F, 0xF6, 0x6F, 0xCC, 0x8F, 0xF6,
++		0x4F, 0xCC, 0x91, 0xF6, 0x6F, 0xCC, 0x91, 0xF6,
++		0x52, 0xCC, 0x8F, 0xF6, 0x72, 0xCC, 0x8F, 0xF6,
++		0x52, 0xCC, 0x91, 0xF6, 0x72, 0xCC, 0x91, 0xF6,
++		0x55, 0xCC, 0x8F, 0xF6, 0x75, 0xCC, 0x8F, 0xF6,
++		0x55, 0xCC, 0x91, 0xF6, 0x75, 0xCC, 0x91, 0xF6,
++		0x53, 0xCC, 0xA6, 0xF6, 0x73, 0xCC, 0xA6, 0xF6,
++		0x54, 0xCC, 0xA6, 0xF6, 0x74, 0xCC, 0xA6, 0xF6,
++		0x48, 0xCC, 0x8C, 0xF6, 0x68, 0xCC, 0x8C, 0xF6,
++		0x41, 0xCC, 0x87, 0xF6, 0x61, 0xCC, 0x87, 0xF6,
++		0x45, 0xCC, 0xA7, 0xF6, 0x65, 0xCC, 0xA7, 0xF6,
++		0x4F, 0xCC, 0x88, 0xCC, 0x84, 0xF6, 0x6F, 0xCC,
++		0x88, 0xCC, 0x84, 0xF6, 0x4F, 0xCC, 0x83, 0xCC,
++		0x84, 0xF6, 0x6F, 0xCC, 0x83, 0xCC, 0x84, 0xF6,
++		0x4F, 0xCC, 0x87, 0xF6, 0x6F, 0xCC, 0x87, 0xF6,
++		0x4F, 0xCC, 0x87, 0xCC, 0x84, 0xF6, 0x6F, 0xCC,
++		0x87, 0xCC, 0x84, 0xF6, 0x59, 0xCC, 0x84, 0xF6,
++		0x79, 0xCC, 0x84, 0x68, 0xC9, 0xA6, 0x6A, 0x72,
++		0xC9, 0xB9, 0xC9, 0xBB, 0xCA, 0x81, 0x77, 0x79,
++		0x20, 0xCC, 0x86, 0x20, 0xCC, 0x87, 0x20, 0xCC,
++		0x8A, 0x20, 0xCC, 0xA8, 0x20, 0xCC, 0x83, 0x20,
++		0xCC, 0x8B, 0xC9, 0xA3, 0x6C, 0x73, 0x78, 0xCA,
++		0x95, 0xF6, 0xCC, 0x80, 0xF6, 0xCC, 0x81, 0xF6,
++		0xCC, 0x93, 0xF6, 0xCC, 0x88, 0xCC, 0x81, 0xF6,
++		0xCA, 0xB9, 0x20, 0xCD, 0x85, 0xF6, 0x3B, 0x20,
++		0xCC, 0x81, 0xF5, 0x05, 0xC2, 0xA8, 0xCC, 0x81,
++		0x20, 0xCC, 0x88, 0xCC, 0x81, 0xF6, 0xCE, 0x91,
++		0xCC, 0x81, 0xF6, 0xC2, 0xB7, 0xF6, 0xCE, 0x95,
++		0xCC, 0x81, 0xF6, 0xCE, 0x97, 0xCC, 0x81, 0xF6,
++		0xCE, 0x99, 0xCC, 0x81, 0xF6, 0xCE, 0x9F, 0xCC,
++		0x81, 0xF6, 0xCE, 0xA5, 0xCC, 0x81, 0xF6, 0xCE,
++		0xA9, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCC, 0x88,
++		0xCC, 0x81, 0xF6, 0xCE, 0x99, 0xCC, 0x88, 0xF6,
++		0xCE, 0xA5, 0xCC, 0x88, 0xF6, 0xCE, 0xB1, 0xCC,
++		0x81, 0xF6, 0xCE, 0xB5, 0xCC, 0x81, 0xF6, 0xCE,
++		0xB7, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCC, 0x81,
++		0xF6, 0xCF, 0x85, 0xCC, 0x88, 0xCC, 0x81, 0xF6,
++		0xCE, 0xB9, 0xCC, 0x88, 0xF6, 0xCF, 0x85, 0xCC,
++		0x88, 0xF6, 0xCE, 0xBF, 0xCC, 0x81, 0xF6, 0xCF,
++		0x85, 0xCC, 0x81, 0xF6, 0xCF, 0x89, 0xCC, 0x81,
++		0xCE, 0xB2, 0xCE, 0xB8, 0xCE, 0xA5, 0xF5, 0x05,
++		0xCF, 0x92, 0xCC, 0x81, 0xCE, 0xA5, 0xCC, 0x81,
++		0xF5, 0x05, 0xCF, 0x92, 0xCC, 0x88, 0xCE, 0xA5,
++		0xCC, 0x88, 0xCF, 0x86, 0xCF, 0x80, 0xCE, 0xBA,
++		0xCF, 0x81, 0xCF, 0x82, 0xCE, 0x98, 0xCE, 0xB5,
++		0xCE, 0xA3, 0xF6, 0xD0, 0x95, 0xCC, 0x80, 0xF6,
++		0xD0, 0x95, 0xCC, 0x88, 0xF6, 0xD0, 0x93, 0xCC,
++		0x81, 0xF6, 0xD0, 0x86, 0xCC, 0x88, 0xF6, 0xD0,
++		0x9A, 0xCC, 0x81, 0xF6, 0xD0, 0x98, 0xCC, 0x80,
++		0xF6, 0xD0, 0xA3, 0xCC, 0x86, 0xF6, 0xD0, 0x98,
++		0xCC, 0x86, 0xF6, 0xD0, 0xB8, 0xCC, 0x86, 0xF6,
++		0xD0, 0xB5, 0xCC, 0x80, 0xF6, 0xD0, 0xB5, 0xCC,
++		0x88, 0xF6, 0xD0, 0xB3, 0xCC, 0x81, 0xF6, 0xD1,
++		0x96, 0xCC, 0x88, 0xF6, 0xD0, 0xBA, 0xCC, 0x81,
++		0xF6, 0xD0, 0xB8, 0xCC, 0x80, 0xF6, 0xD1, 0x83,
++		0xCC, 0x86, 0xF6, 0xD1, 0xB4, 0xCC, 0x8F, 0xF6,
++		0xD1, 0xB5, 0xCC, 0x8F, 0xF6, 0xD0, 0x96, 0xCC,
++		0x86, 0xF6, 0xD0, 0xB6, 0xCC, 0x86, 0xF6, 0xD0,
++		0x90, 0xCC, 0x86, 0xF6, 0xD0, 0xB0, 0xCC, 0x86,
++		0xF6, 0xD0, 0x90, 0xCC, 0x88, 0xF6, 0xD0, 0xB0,
++		0xCC, 0x88, 0xF6, 0xD0, 0x95, 0xCC, 0x86, 0xF6,
++		0xD0, 0xB5, 0xCC, 0x86, 0xF6, 0xD3, 0x98, 0xCC,
++		0x88, 0xF6, 0xD3, 0x99, 0xCC, 0x88, 0xF6, 0xD0,
++		0x96, 0xCC, 0x88, 0xF6, 0xD0, 0xB6, 0xCC, 0x88,
++		0xF6, 0xD0, 0x97, 0xCC, 0x88, 0xF6, 0xD0, 0xB7,
++		0xCC, 0x88, 0xF6, 0xD0, 0x98, 0xCC, 0x84, 0xF6,
++		0xD0, 0xB8, 0xCC, 0x84, 0xF6, 0xD0, 0x98, 0xCC,
++		0x88, 0xF6, 0xD0, 0xB8, 0xCC, 0x88, 0xF6, 0xD0,
++		0x9E, 0xCC, 0x88, 0xF6, 0xD0, 0xBE, 0xCC, 0x88,
++		0xF6, 0xD3, 0xA8, 0xCC, 0x88, 0xF6, 0xD3, 0xA9,
++		0xCC, 0x88, 0xF6, 0xD0, 0xAD, 0xCC, 0x88, 0xF6,
++		0xD1, 0x8D, 0xCC, 0x88, 0xF6, 0xD0, 0xA3, 0xCC,
++		0x84, 0xF6, 0xD1, 0x83, 0xCC, 0x84, 0xF6, 0xD0,
++		0xA3, 0xCC, 0x88, 0xF6, 0xD1, 0x83, 0xCC, 0x88,
++		0xF6, 0xD0, 0xA3, 0xCC, 0x8B, 0xF6, 0xD1, 0x83,
++		0xCC, 0x8B, 0xF6, 0xD0, 0xA7, 0xCC, 0x88, 0xF6,
++		0xD1, 0x87, 0xCC, 0x88, 0xF6, 0xD0, 0xAB, 0xCC,
++		0x88, 0xF6, 0xD1, 0x8B, 0xCC, 0x88, 0xD5, 0xA5,
++		0xD6, 0x82, 0xF6, 0xD8, 0xA7, 0xD9, 0x93, 0xF6,
++		0xD8, 0xA7, 0xD9, 0x94, 0xF6, 0xD9, 0x88, 0xD9,
++		0x94, 0xF6, 0xD8, 0xA7, 0xD9, 0x95, 0xF6, 0xD9,
++		0x8A, 0xD9, 0x94, 0xD8, 0xA7, 0xD9, 0xB4, 0xD9,
++		0x88, 0xD9, 0xB4, 0xDB, 0x87, 0xD9, 0xB4, 0xD9,
++		0x8A, 0xD9, 0xB4, 0xF6, 0xDB, 0x95, 0xD9, 0x94,
++		0xF6, 0xDB, 0x81, 0xD9, 0x94, 0xF6, 0xDB, 0x92,
++		0xD9, 0x94, 0xF6, 0xE0, 0xA4, 0xA8, 0xE0, 0xA4,
++		0xBC, 0xF6, 0xE0, 0xA4, 0xB0, 0xE0, 0xA4, 0xBC,
++		0xF6, 0xE0, 0xA4, 0xB3, 0xE0, 0xA4, 0xBC, 0xF6,
++		0xE0, 0xA4, 0x95, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0,
++		0xA4, 0x96, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, 0xA4,
++		0x97, 0xE0, 0xA4, 0xBC, 0xF6, 0xE0, 0xA4, 0x9C,
++		0xE0, 0xA4, 0xBC, 0xF6, 0xE0, 0xA4, 0xA1, 0xE0,
++		0xA4, 0xBC, 0xF6, 0xE0, 0xA4, 0xA2, 0xE0, 0xA4,
++		0xBC, 0xF6, 0xE0, 0xA4, 0xAB, 0xE0, 0xA4, 0xBC,
++		0xF6, 0xE0, 0xA4, 0xAF, 0xE0, 0xA4, 0xBC, 0xF6,
++		0xE0, 0xA7, 0x87, 0xE0, 0xA6, 0xBE, 0xF6, 0xE0,
++		0xA7, 0x87, 0xE0, 0xA7, 0x97, 0xF6, 0xE0, 0xA6,
++		0xA1, 0xE0, 0xA6, 0xBC, 0xF6, 0xE0, 0xA6, 0xA2,
++		0xE0, 0xA6, 0xBC, 0xF6, 0xE0, 0xA6, 0xAF, 0xE0,
++		0xA6, 0xBC, 0xF6, 0xE0, 0xA8, 0xB2, 0xE0, 0xA8,
++		0xBC, 0xF6, 0xE0, 0xA8, 0xB8, 0xE0, 0xA8, 0xBC,
++		0xF6, 0xE0, 0xA8, 0x96, 0xE0, 0xA8, 0xBC, 0xF6,
++		0xE0, 0xA8, 0x97, 0xE0, 0xA8, 0xBC, 0xF6, 0xE0,
++		0xA8, 0x9C, 0xE0, 0xA8, 0xBC, 0xF6, 0xE0, 0xA8,
++		0xAB, 0xE0, 0xA8, 0xBC, 0xF6, 0xE0, 0xAD, 0x87,
++		0xE0, 0xAD, 0x96, 0xF6, 0xE0, 0xAD, 0x87, 0xE0,
++		0xAC, 0xBE, 0xF6, 0xE0, 0xAD, 0x87, 0xE0, 0xAD,
++		0x97, 0xF6, 0xE0, 0xAC, 0xA1, 0xE0, 0xAC, 0xBC,
++		0xF6, 0xE0, 0xAC, 0xA2, 0xE0, 0xAC, 0xBC, 0xF6,
++		0xE0, 0xAE, 0x92, 0xE0, 0xAF, 0x97, 0xF6, 0xE0,
++		0xAF, 0x86, 0xE0, 0xAE, 0xBE, 0xF6, 0xE0, 0xAF,
++		0x87, 0xE0, 0xAE, 0xBE, 0xF6, 0xE0, 0xAF, 0x86,
++		0xE0, 0xAF, 0x97, 0xF6, 0xE0, 0xB1, 0x86, 0xE0,
++		0xB1, 0x96, 0xF6, 0xE0, 0xB2, 0xBF, 0xE0, 0xB3,
++		0x95, 0xF6, 0xE0, 0xB3, 0x86, 0xE0, 0xB3, 0x95,
++		0xF6, 0xE0, 0xB3, 0x86, 0xE0, 0xB3, 0x96, 0xF6,
++		0xE0, 0xB3, 0x86, 0xE0, 0xB3, 0x82, 0xF6, 0xE0,
++		0xB3, 0x86, 0xE0, 0xB3, 0x82, 0xE0, 0xB3, 0x95,
++		0xF6, 0xE0, 0xB5, 0x86, 0xE0, 0xB4, 0xBE, 0xF6,
++		0xE0, 0xB5, 0x87, 0xE0, 0xB4, 0xBE, 0xF6, 0xE0,
++		0xB5, 0x86, 0xE0, 0xB5, 0x97, 0xF6, 0xE0, 0xB7,
++		0x99, 0xE0, 0xB7, 0x8A, 0xF6, 0xE0, 0xB7, 0x99,
++		0xE0, 0xB7, 0x8F, 0xF6, 0xE0, 0xB7, 0x99, 0xE0,
++		0xB7, 0x8F, 0xE0, 0xB7, 0x8A, 0xF6, 0xE0, 0xB7,
++		0x99, 0xE0, 0xB7, 0x9F, 0xE0, 0xB9, 0x8D, 0xE0,
++		0xB8, 0xB2, 0xE0, 0xBB, 0x8D, 0xE0, 0xBA, 0xB2,
++		0xE0, 0xBA, 0xAB, 0xE0, 0xBA, 0x99, 0xE0, 0xBA,
++		0xAB, 0xE0, 0xBA, 0xA1, 0xE0, 0xBC, 0x8B, 0xF6,
++		0xE0, 0xBD, 0x82, 0xE0, 0xBE, 0xB7, 0xF6, 0xE0,
++		0xBD, 0x8C, 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBD,
++		0x91, 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBD, 0x96,
++		0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBD, 0x9B, 0xE0,
++		0xBE, 0xB7, 0xF6, 0xE0, 0xBD, 0x80, 0xE0, 0xBE,
++		0xB5, 0xF6, 0xE0, 0xBD, 0xB1, 0xE0, 0xBD, 0xB2,
++		0xF6, 0xE0, 0xBD, 0xB1, 0xE0, 0xBD, 0xB4, 0xF6,
++		0xE0, 0xBE, 0xB2, 0xE0, 0xBE, 0x80, 0xE0, 0xBE,
++		0xB2, 0xE0, 0xBD, 0xB1, 0xE0, 0xBE, 0x80, 0xF6,
++		0xE0, 0xBE, 0xB3, 0xE0, 0xBE, 0x80, 0xE0, 0xBE,
++		0xB3, 0xE0, 0xBD, 0xB1, 0xE0, 0xBE, 0x80, 0xF6,
++		0xE0, 0xBD, 0xB1, 0xE0, 0xBE, 0x80, 0xF6, 0xE0,
++		0xBE, 0x92, 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBE,
++		0x9C, 0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBE, 0xA1,
++		0xE0, 0xBE, 0xB7, 0xF6, 0xE0, 0xBE, 0xA6, 0xE0,
++		0xBE, 0xB7, 0xF6, 0xE0, 0xBE, 0xAB, 0xE0, 0xBE,
++		0xB7, 0xF6, 0xE0, 0xBE, 0x90, 0xE0, 0xBE, 0xB5,
++		0xF6, 0xE1, 0x80, 0xA5, 0xE1, 0x80, 0xAE, 0xE1,
++		0x83, 0x9C, 0xF6, 0xE1, 0xAC, 0x85, 0xE1, 0xAC,
++		0xB5, 0xF6, 0xE1, 0xAC, 0x87, 0xE1, 0xAC, 0xB5,
++		0xF6, 0xE1, 0xAC, 0x89, 0xE1, 0xAC, 0xB5, 0xF6,
++		0xE1, 0xAC, 0x8B, 0xE1, 0xAC, 0xB5, 0xF6, 0xE1,
++		0xAC, 0x8D, 0xE1, 0xAC, 0xB5, 0xF6, 0xE1, 0xAC,
++		0x91, 0xE1, 0xAC, 0xB5, 0xF6, 0xE1, 0xAC, 0xBA,
++		0xE1, 0xAC, 0xB5, 0xF6, 0xE1, 0xAC, 0xBC, 0xE1,
++		0xAC, 0xB5, 0xF6, 0xE1, 0xAC, 0xBE, 0xE1, 0xAC,
++		0xB5, 0xF6, 0xE1, 0xAC, 0xBF, 0xE1, 0xAC, 0xB5,
++		0xF6, 0xE1, 0xAD, 0x82, 0xE1, 0xAC, 0xB5, 0x41,
++		0xC3, 0x86, 0x42, 0x44, 0x45, 0xC6, 0x8E, 0x47,
++		0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
++		0xC8, 0xA2, 0x50, 0x52, 0x54, 0x55, 0x57, 0x61,
++		0xC9, 0x90, 0xC9, 0x91, 0xE1, 0xB4, 0x82, 0x62,
++		0x64, 0x65, 0xC9, 0x99, 0xC9, 0x9B, 0xC9, 0x9C,
++		0x67, 0x6B, 0x6D, 0xC5, 0x8B, 0x6F, 0xC9, 0x94,
++		0xE1, 0xB4, 0x96, 0xE1, 0xB4, 0x97, 0x70, 0x74,
++		0x75, 0xE1, 0xB4, 0x9D, 0xC9, 0xAF, 0x76, 0xE1,
++		0xB4, 0xA5, 0xCE, 0xB2, 0xCE, 0xB3, 0xCE, 0xB4,
++		0xCF, 0x86, 0xCF, 0x87, 0x69, 0x72, 0x75, 0x76,
++		0xCE, 0xB2, 0xCE, 0xB3, 0xCF, 0x81, 0xCF, 0x86,
++		0xCF, 0x87, 0xD0, 0xBD, 0xC9, 0x92, 0x63, 0xC9,
++		0x95, 0xC3, 0xB0, 0xC9, 0x9C, 0x66, 0xC9, 0x9F,
++		0xC9, 0xA1, 0xC9, 0xA5, 0xC9, 0xA8, 0xC9, 0xA9,
++		0xC9, 0xAA, 0xE1, 0xB5, 0xBB, 0xCA, 0x9D, 0xC9,
++		0xAD, 0xE1, 0xB6, 0x85, 0xCA, 0x9F, 0xC9, 0xB1,
++		0xC9, 0xB0, 0xC9, 0xB2, 0xC9, 0xB3, 0xC9, 0xB4,
++		0xC9, 0xB5, 0xC9, 0xB8, 0xCA, 0x82, 0xCA, 0x83,
++		0xC6, 0xAB, 0xCA, 0x89, 0xCA, 0x8A, 0xE1, 0xB4,
++		0x9C, 0xCA, 0x8B, 0xCA, 0x8C, 0x7A, 0xCA, 0x90,
++		0xCA, 0x91, 0xCA, 0x92, 0xCE, 0xB8, 0xF6, 0x41,
++		0xCC, 0xA5, 0xF6, 0x61, 0xCC, 0xA5, 0xF6, 0x42,
++		0xCC, 0x87, 0xF6, 0x62, 0xCC, 0x87, 0xF6, 0x42,
++		0xCC, 0xA3, 0xF6, 0x62, 0xCC, 0xA3, 0xF6, 0x42,
++		0xCC, 0xB1, 0xF6, 0x62, 0xCC, 0xB1, 0xF6, 0x43,
++		0xCC, 0xA7, 0xCC, 0x81, 0xF6, 0x63, 0xCC, 0xA7,
++		0xCC, 0x81, 0xF6, 0x44, 0xCC, 0x87, 0xF6, 0x64,
++		0xCC, 0x87, 0xF6, 0x44, 0xCC, 0xA3, 0xF6, 0x64,
++		0xCC, 0xA3, 0xF6, 0x44, 0xCC, 0xB1, 0xF6, 0x64,
++		0xCC, 0xB1, 0xF6, 0x44, 0xCC, 0xA7, 0xF6, 0x64,
++		0xCC, 0xA7, 0xF6, 0x44, 0xCC, 0xAD, 0xF6, 0x64,
++		0xCC, 0xAD, 0xF6, 0x45, 0xCC, 0x84, 0xCC, 0x80,
++		0xF6, 0x65, 0xCC, 0x84, 0xCC, 0x80, 0xF6, 0x45,
++		0xCC, 0x84, 0xCC, 0x81, 0xF6, 0x65, 0xCC, 0x84,
++		0xCC, 0x81, 0xF6, 0x45, 0xCC, 0xAD, 0xF6, 0x65,
++		0xCC, 0xAD, 0xF6, 0x45, 0xCC, 0xB0, 0xF6, 0x65,
++		0xCC, 0xB0, 0xF6, 0x45, 0xCC, 0xA7, 0xCC, 0x86,
++		0xF6, 0x65, 0xCC, 0xA7, 0xCC, 0x86, 0xF6, 0x46,
++		0xCC, 0x87, 0xF6, 0x66, 0xCC, 0x87, 0xF6, 0x47,
++		0xCC, 0x84, 0xF6, 0x67, 0xCC, 0x84, 0xF6, 0x48,
++		0xCC, 0x87, 0xF6, 0x68, 0xCC, 0x87, 0xF6, 0x48,
++		0xCC, 0xA3, 0xF6, 0x68, 0xCC, 0xA3, 0xF6, 0x48,
++		0xCC, 0x88, 0xF6, 0x68, 0xCC, 0x88, 0xF6, 0x48,
++		0xCC, 0xA7, 0xF6, 0x68, 0xCC, 0xA7, 0xF6, 0x48,
++		0xCC, 0xAE, 0xF6, 0x68, 0xCC, 0xAE, 0xF6, 0x49,
++		0xCC, 0xB0, 0xF6, 0x69, 0xCC, 0xB0, 0xF6, 0x49,
++		0xCC, 0x88, 0xCC, 0x81, 0xF6, 0x69, 0xCC, 0x88,
++		0xCC, 0x81, 0xF6, 0x4B, 0xCC, 0x81, 0xF6, 0x6B,
++		0xCC, 0x81, 0xF6, 0x4B, 0xCC, 0xA3, 0xF6, 0x6B,
++		0xCC, 0xA3, 0xF6, 0x4B, 0xCC, 0xB1, 0xF6, 0x6B,
++		0xCC, 0xB1, 0xF6, 0x4C, 0xCC, 0xA3, 0xF6, 0x6C,
++		0xCC, 0xA3, 0xF6, 0x4C, 0xCC, 0xA3, 0xCC, 0x84,
++		0xF6, 0x6C, 0xCC, 0xA3, 0xCC, 0x84, 0xF6, 0x4C,
++		0xCC, 0xB1, 0xF6, 0x6C, 0xCC, 0xB1, 0xF6, 0x4C,
++		0xCC, 0xAD, 0xF6, 0x6C, 0xCC, 0xAD, 0xF6, 0x4D,
++		0xCC, 0x81, 0xF6, 0x6D, 0xCC, 0x81, 0xF6, 0x4D,
++		0xCC, 0x87, 0xF6, 0x6D, 0xCC, 0x87, 0xF6, 0x4D,
++		0xCC, 0xA3, 0xF6, 0x6D, 0xCC, 0xA3, 0xF6, 0x4E,
++		0xCC, 0x87, 0xF6, 0x6E, 0xCC, 0x87, 0xF6, 0x4E,
++		0xCC, 0xA3, 0xF6, 0x6E, 0xCC, 0xA3, 0xF6, 0x4E,
++		0xCC, 0xB1, 0xF6, 0x6E, 0xCC, 0xB1, 0xF6, 0x4E,
++		0xCC, 0xAD, 0xF6, 0x6E, 0xCC, 0xAD, 0xF6, 0x4F,
++		0xCC, 0x83, 0xCC, 0x81, 0xF6, 0x6F, 0xCC, 0x83,
++		0xCC, 0x81, 0xF6, 0x4F, 0xCC, 0x83, 0xCC, 0x88,
++		0xF6, 0x6F, 0xCC, 0x83, 0xCC, 0x88, 0xF6, 0x4F,
++		0xCC, 0x84, 0xCC, 0x80, 0xF6, 0x6F, 0xCC, 0x84,
++		0xCC, 0x80, 0xF6, 0x4F, 0xCC, 0x84, 0xCC, 0x81,
++		0xF6, 0x6F, 0xCC, 0x84, 0xCC, 0x81, 0xF6, 0x50,
++		0xCC, 0x81, 0xF6, 0x70, 0xCC, 0x81, 0xF6, 0x50,
++		0xCC, 0x87, 0xF6, 0x70, 0xCC, 0x87, 0xF6, 0x52,
++		0xCC, 0x87, 0xF6, 0x72, 0xCC, 0x87, 0xF6, 0x52,
++		0xCC, 0xA3, 0xF6, 0x72, 0xCC, 0xA3, 0xF6, 0x52,
++		0xCC, 0xA3, 0xCC, 0x84, 0xF6, 0x72, 0xCC, 0xA3,
++		0xCC, 0x84, 0xF6, 0x52, 0xCC, 0xB1, 0xF6, 0x72,
++		0xCC, 0xB1, 0xF6, 0x53, 0xCC, 0x87, 0xF6, 0x73,
++		0xCC, 0x87, 0xF6, 0x53, 0xCC, 0xA3, 0xF6, 0x73,
++		0xCC, 0xA3, 0xF6, 0x53, 0xCC, 0x81, 0xCC, 0x87,
++		0xF6, 0x73, 0xCC, 0x81, 0xCC, 0x87, 0xF6, 0x53,
++		0xCC, 0x8C, 0xCC, 0x87, 0xF6, 0x73, 0xCC, 0x8C,
++		0xCC, 0x87, 0xF6, 0x53, 0xCC, 0xA3, 0xCC, 0x87,
++		0xF6, 0x73, 0xCC, 0xA3, 0xCC, 0x87, 0xF6, 0x54,
++		0xCC, 0x87, 0xF6, 0x74, 0xCC, 0x87, 0xF6, 0x54,
++		0xCC, 0xA3, 0xF6, 0x74, 0xCC, 0xA3, 0xF6, 0x54,
++		0xCC, 0xB1, 0xF6, 0x74, 0xCC, 0xB1, 0xF6, 0x54,
++		0xCC, 0xAD, 0xF6, 0x74, 0xCC, 0xAD, 0xF6, 0x55,
++		0xCC, 0xA4, 0xF6, 0x75, 0xCC, 0xA4, 0xF6, 0x55,
++		0xCC, 0xB0, 0xF6, 0x75, 0xCC, 0xB0, 0xF6, 0x55,
++		0xCC, 0xAD, 0xF6, 0x75, 0xCC, 0xAD, 0xF6, 0x55,
++		0xCC, 0x83, 0xCC, 0x81, 0xF6, 0x75, 0xCC, 0x83,
++		0xCC, 0x81, 0xF6, 0x55, 0xCC, 0x84, 0xCC, 0x88,
++		0xF6, 0x75, 0xCC, 0x84, 0xCC, 0x88, 0xF6, 0x56,
++		0xCC, 0x83, 0xF6, 0x76, 0xCC, 0x83, 0xF6, 0x56,
++		0xCC, 0xA3, 0xF6, 0x76, 0xCC, 0xA3, 0xF6, 0x57,
++		0xCC, 0x80, 0xF6, 0x77, 0xCC, 0x80, 0xF6, 0x57,
++		0xCC, 0x81, 0xF6, 0x77, 0xCC, 0x81, 0xF6, 0x57,
++		0xCC, 0x88, 0xF6, 0x77, 0xCC, 0x88, 0xF6, 0x57,
++		0xCC, 0x87, 0xF6, 0x77, 0xCC, 0x87, 0xF6, 0x57,
++		0xCC, 0xA3, 0xF6, 0x77, 0xCC, 0xA3, 0xF6, 0x58,
++		0xCC, 0x87, 0xF6, 0x78, 0xCC, 0x87, 0xF6, 0x58,
++		0xCC, 0x88, 0xF6, 0x78, 0xCC, 0x88, 0xF6, 0x59,
++		0xCC, 0x87, 0xF6, 0x79, 0xCC, 0x87, 0xF6, 0x5A,
++		0xCC, 0x82, 0xF6, 0x7A, 0xCC, 0x82, 0xF6, 0x5A,
++		0xCC, 0xA3, 0xF6, 0x7A, 0xCC, 0xA3, 0xF6, 0x5A,
++		0xCC, 0xB1, 0xF6, 0x7A, 0xCC, 0xB1, 0xF6, 0x68,
++		0xCC, 0xB1, 0xF6, 0x74, 0xCC, 0x88, 0xF6, 0x77,
++		0xCC, 0x8A, 0xF6, 0x79, 0xCC, 0x8A, 0x61, 0xCA,
++		0xBE, 0xF5, 0x05, 0xC5, 0xBF, 0xCC, 0x87, 0x73,
++		0xCC, 0x87, 0xF6, 0x41, 0xCC, 0xA3, 0xF6, 0x61,
++		0xCC, 0xA3, 0xF6, 0x41, 0xCC, 0x89, 0xF6, 0x61,
++		0xCC, 0x89, 0xF6, 0x41, 0xCC, 0x82, 0xCC, 0x81,
++		0xF6, 0x61, 0xCC, 0x82, 0xCC, 0x81, 0xF6, 0x41,
++		0xCC, 0x82, 0xCC, 0x80, 0xF6, 0x61, 0xCC, 0x82,
++		0xCC, 0x80, 0xF6, 0x41, 0xCC, 0x82, 0xCC, 0x89,
++		0xF6, 0x61, 0xCC, 0x82, 0xCC, 0x89, 0xF6, 0x41,
++		0xCC, 0x82, 0xCC, 0x83, 0xF6, 0x61, 0xCC, 0x82,
++		0xCC, 0x83, 0xF6, 0x41, 0xCC, 0xA3, 0xCC, 0x82,
++		0xF6, 0x61, 0xCC, 0xA3, 0xCC, 0x82, 0xF6, 0x41,
++		0xCC, 0x86, 0xCC, 0x81, 0xF6, 0x61, 0xCC, 0x86,
++		0xCC, 0x81, 0xF6, 0x41, 0xCC, 0x86, 0xCC, 0x80,
++		0xF6, 0x61, 0xCC, 0x86, 0xCC, 0x80, 0xF6, 0x41,
++		0xCC, 0x86, 0xCC, 0x89, 0xF6, 0x61, 0xCC, 0x86,
++		0xCC, 0x89, 0xF6, 0x41, 0xCC, 0x86, 0xCC, 0x83,
++		0xF6, 0x61, 0xCC, 0x86, 0xCC, 0x83, 0xF6, 0x41,
++		0xCC, 0xA3, 0xCC, 0x86, 0xF6, 0x61, 0xCC, 0xA3,
++		0xCC, 0x86, 0xF6, 0x45, 0xCC, 0xA3, 0xF6, 0x65,
++		0xCC, 0xA3, 0xF6, 0x45, 0xCC, 0x89, 0xF6, 0x65,
++		0xCC, 0x89, 0xF6, 0x45, 0xCC, 0x83, 0xF6, 0x65,
++		0xCC, 0x83, 0xF6, 0x45, 0xCC, 0x82, 0xCC, 0x81,
++		0xF6, 0x65, 0xCC, 0x82, 0xCC, 0x81, 0xF6, 0x45,
++		0xCC, 0x82, 0xCC, 0x80, 0xF6, 0x65, 0xCC, 0x82,
++		0xCC, 0x80, 0xF6, 0x45, 0xCC, 0x82, 0xCC, 0x89,
++		0xF6, 0x65, 0xCC, 0x82, 0xCC, 0x89, 0xF6, 0x45,
++		0xCC, 0x82, 0xCC, 0x83, 0xF6, 0x65, 0xCC, 0x82,
++		0xCC, 0x83, 0xF6, 0x45, 0xCC, 0xA3, 0xCC, 0x82,
++		0xF6, 0x65, 0xCC, 0xA3, 0xCC, 0x82, 0xF6, 0x49,
++		0xCC, 0x89, 0xF6, 0x69, 0xCC, 0x89, 0xF6, 0x49,
++		0xCC, 0xA3, 0xF6, 0x69, 0xCC, 0xA3, 0xF6, 0x4F,
++		0xCC, 0xA3, 0xF6, 0x6F, 0xCC, 0xA3, 0xF6, 0x4F,
++		0xCC, 0x89, 0xF6, 0x6F, 0xCC, 0x89, 0xF6, 0x4F,
++		0xCC, 0x82, 0xCC, 0x81, 0xF6, 0x6F, 0xCC, 0x82,
++		0xCC, 0x81, 0xF6, 0x4F, 0xCC, 0x82, 0xCC, 0x80,
++		0xF6, 0x6F, 0xCC, 0x82, 0xCC, 0x80, 0xF6, 0x4F,
++		0xCC, 0x82, 0xCC, 0x89, 0xF6, 0x6F, 0xCC, 0x82,
++		0xCC, 0x89, 0xF6, 0x4F, 0xCC, 0x82, 0xCC, 0x83,
++		0xF6, 0x6F, 0xCC, 0x82, 0xCC, 0x83, 0xF6, 0x4F,
++		0xCC, 0xA3, 0xCC, 0x82, 0xF6, 0x6F, 0xCC, 0xA3,
++		0xCC, 0x82, 0xF6, 0x4F, 0xCC, 0x9B, 0xCC, 0x81,
++		0xF6, 0x6F, 0xCC, 0x9B, 0xCC, 0x81, 0xF6, 0x4F,
++		0xCC, 0x9B, 0xCC, 0x80, 0xF6, 0x6F, 0xCC, 0x9B,
++		0xCC, 0x80, 0xF6, 0x4F, 0xCC, 0x9B, 0xCC, 0x89,
++		0xF6, 0x6F, 0xCC, 0x9B, 0xCC, 0x89, 0xF6, 0x4F,
++		0xCC, 0x9B, 0xCC, 0x83, 0xF6, 0x6F, 0xCC, 0x9B,
++		0xCC, 0x83, 0xF6, 0x4F, 0xCC, 0x9B, 0xCC, 0xA3,
++		0xF6, 0x6F, 0xCC, 0x9B, 0xCC, 0xA3, 0xF6, 0x55,
++		0xCC, 0xA3, 0xF6, 0x75, 0xCC, 0xA3, 0xF6, 0x55,
++		0xCC, 0x89, 0xF6, 0x75, 0xCC, 0x89, 0xF6, 0x55,
++		0xCC, 0x9B, 0xCC, 0x81, 0xF6, 0x75, 0xCC, 0x9B,
++		0xCC, 0x81, 0xF6, 0x55, 0xCC, 0x9B, 0xCC, 0x80,
++		0xF6, 0x75, 0xCC, 0x9B, 0xCC, 0x80, 0xF6, 0x55,
++		0xCC, 0x9B, 0xCC, 0x89, 0xF6, 0x75, 0xCC, 0x9B,
++		0xCC, 0x89, 0xF6, 0x55, 0xCC, 0x9B, 0xCC, 0x83,
++		0xF6, 0x75, 0xCC, 0x9B, 0xCC, 0x83, 0xF6, 0x55,
++		0xCC, 0x9B, 0xCC, 0xA3, 0xF6, 0x75, 0xCC, 0x9B,
++		0xCC, 0xA3, 0xF6, 0x59, 0xCC, 0x80, 0xF6, 0x79,
++		0xCC, 0x80, 0xF6, 0x59, 0xCC, 0xA3, 0xF6, 0x79,
++		0xCC, 0xA3, 0xF6, 0x59, 0xCC, 0x89, 0xF6, 0x79,
++		0xCC, 0x89, 0xF6, 0x59, 0xCC, 0x83, 0xF6, 0x79,
++		0xCC, 0x83, 0xF6, 0xCE, 0xB1, 0xCC, 0x93, 0xF6,
++		0xCE, 0xB1, 0xCC, 0x94, 0xF6, 0xCE, 0xB1, 0xCC,
++		0x93, 0xCC, 0x80, 0xF6, 0xCE, 0xB1, 0xCC, 0x94,
++		0xCC, 0x80, 0xF6, 0xCE, 0xB1, 0xCC, 0x93, 0xCC,
++		0x81, 0xF6, 0xCE, 0xB1, 0xCC, 0x94, 0xCC, 0x81,
++		0xF6, 0xCE, 0xB1, 0xCC, 0x93, 0xCD, 0x82, 0xF6,
++		0xCE, 0xB1, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE,
++		0x91, 0xCC, 0x93, 0xF6, 0xCE, 0x91, 0xCC, 0x94,
++		0xF6, 0xCE, 0x91, 0xCC, 0x93, 0xCC, 0x80, 0xF6,
++		0xCE, 0x91, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE,
++		0x91, 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0x91,
++		0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCE, 0x91, 0xCC,
++		0x93, 0xCD, 0x82, 0xF6, 0xCE, 0x91, 0xCC, 0x94,
++		0xCD, 0x82, 0xF6, 0xCE, 0xB5, 0xCC, 0x93, 0xF6,
++		0xCE, 0xB5, 0xCC, 0x94, 0xF6, 0xCE, 0xB5, 0xCC,
++		0x93, 0xCC, 0x80, 0xF6, 0xCE, 0xB5, 0xCC, 0x94,
++		0xCC, 0x80, 0xF6, 0xCE, 0xB5, 0xCC, 0x93, 0xCC,
++		0x81, 0xF6, 0xCE, 0xB5, 0xCC, 0x94, 0xCC, 0x81,
++		0xF6, 0xCE, 0x95, 0xCC, 0x93, 0xF6, 0xCE, 0x95,
++		0xCC, 0x94, 0xF6, 0xCE, 0x95, 0xCC, 0x93, 0xCC,
++		0x80, 0xF6, 0xCE, 0x95, 0xCC, 0x94, 0xCC, 0x80,
++		0xF6, 0xCE, 0x95, 0xCC, 0x93, 0xCC, 0x81, 0xF6,
++		0xCE, 0x95, 0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCE,
++		0xB7, 0xCC, 0x93, 0xF6, 0xCE, 0xB7, 0xCC, 0x94,
++		0xF6, 0xCE, 0xB7, 0xCC, 0x93, 0xCC, 0x80, 0xF6,
++		0xCE, 0xB7, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE,
++		0xB7, 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0xB7,
++		0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCE, 0xB7, 0xCC,
++		0x93, 0xCD, 0x82, 0xF6, 0xCE, 0xB7, 0xCC, 0x94,
++		0xCD, 0x82, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xF6,
++		0xCE, 0x97, 0xCC, 0x94, 0xF6, 0xCE, 0x97, 0xCC,
++		0x93, 0xCC, 0x80, 0xF6, 0xCE, 0x97, 0xCC, 0x94,
++		0xCC, 0x80, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCC,
++		0x81, 0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCC, 0x81,
++		0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCD, 0x82, 0xF6,
++		0xCE, 0x97, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE,
++		0xB9, 0xCC, 0x93, 0xF6, 0xCE, 0xB9, 0xCC, 0x94,
++		0xF6, 0xCE, 0xB9, 0xCC, 0x93, 0xCC, 0x80, 0xF6,
++		0xCE, 0xB9, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE,
++		0xB9, 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0xB9,
++		0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCC,
++		0x93, 0xCD, 0x82, 0xF6, 0xCE, 0xB9, 0xCC, 0x94,
++		0xCD, 0x82, 0xF6, 0xCE, 0x99, 0xCC, 0x93, 0xF6,
++		0xCE, 0x99, 0xCC, 0x94, 0xF6, 0xCE, 0x99, 0xCC,
++		0x93, 0xCC, 0x80, 0xF6, 0xCE, 0x99, 0xCC, 0x94,
++		0xCC, 0x80, 0xF6, 0xCE, 0x99, 0xCC, 0x93, 0xCC,
++		0x81, 0xF6, 0xCE, 0x99, 0xCC, 0x94, 0xCC, 0x81,
++		0xF6, 0xCE, 0x99, 0xCC, 0x93, 0xCD, 0x82, 0xF6,
++		0xCE, 0x99, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE,
++		0xBF, 0xCC, 0x93, 0xF6, 0xCE, 0xBF, 0xCC, 0x94,
++		0xF6, 0xCE, 0xBF, 0xCC, 0x93, 0xCC, 0x80, 0xF6,
++		0xCE, 0xBF, 0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE,
++		0xBF, 0xCC, 0x93, 0xCC, 0x81, 0xF6, 0xCE, 0xBF,
++		0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCE, 0x9F, 0xCC,
++		0x93, 0xF6, 0xCE, 0x9F, 0xCC, 0x94, 0xF6, 0xCE,
++		0x9F, 0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE, 0x9F,
++		0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, 0x9F, 0xCC,
++		0x93, 0xCC, 0x81, 0xF6, 0xCE, 0x9F, 0xCC, 0x94,
++		0xCC, 0x81, 0xF6, 0xCF, 0x85, 0xCC, 0x93, 0xF6,
++		0xCF, 0x85, 0xCC, 0x94, 0xF6, 0xCF, 0x85, 0xCC,
++		0x93, 0xCC, 0x80, 0xF6, 0xCF, 0x85, 0xCC, 0x94,
++		0xCC, 0x80, 0xF6, 0xCF, 0x85, 0xCC, 0x93, 0xCC,
++		0x81, 0xF6, 0xCF, 0x85, 0xCC, 0x94, 0xCC, 0x81,
++		0xF6, 0xCF, 0x85, 0xCC, 0x93, 0xCD, 0x82, 0xF6,
++		0xCF, 0x85, 0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE,
++		0xA5, 0xCC, 0x94, 0xF6, 0xCE, 0xA5, 0xCC, 0x94,
++		0xCC, 0x80, 0xF6, 0xCE, 0xA5, 0xCC, 0x94, 0xCC,
++		0x81, 0xF6, 0xCE, 0xA5, 0xCC, 0x94, 0xCD, 0x82,
++		0xF6, 0xCF, 0x89, 0xCC, 0x93, 0xF6, 0xCF, 0x89,
++		0xCC, 0x94, 0xF6, 0xCF, 0x89, 0xCC, 0x93, 0xCC,
++		0x80, 0xF6, 0xCF, 0x89, 0xCC, 0x94, 0xCC, 0x80,
++		0xF6, 0xCF, 0x89, 0xCC, 0x93, 0xCC, 0x81, 0xF6,
++		0xCF, 0x89, 0xCC, 0x94, 0xCC, 0x81, 0xF6, 0xCF,
++		0x89, 0xCC, 0x93, 0xCD, 0x82, 0xF6, 0xCF, 0x89,
++		0xCC, 0x94, 0xCD, 0x82, 0xF6, 0xCE, 0xA9, 0xCC,
++		0x93, 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xF6, 0xCE,
++		0xA9, 0xCC, 0x93, 0xCC, 0x80, 0xF6, 0xCE, 0xA9,
++		0xCC, 0x94, 0xCC, 0x80, 0xF6, 0xCE, 0xA9, 0xCC,
++		0x93, 0xCC, 0x81, 0xF6, 0xCE, 0xA9, 0xCC, 0x94,
++		0xCC, 0x81, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, 0xCD,
++		0x82, 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCD, 0x82,
++		0xF6, 0xCE, 0xB1, 0xCC, 0x80, 0xF6, 0xCE, 0xB1,
++		0xCC, 0x81, 0xF6, 0xCE, 0xB5, 0xCC, 0x80, 0xF6,
++		0xCE, 0xB5, 0xCC, 0x81, 0xF6, 0xCE, 0xB7, 0xCC,
++		0x80, 0xF6, 0xCE, 0xB7, 0xCC, 0x81, 0xF6, 0xCE,
++		0xB9, 0xCC, 0x80, 0xF6, 0xCE, 0xB9, 0xCC, 0x81,
++		0xF6, 0xCE, 0xBF, 0xCC, 0x80, 0xF6, 0xCE, 0xBF,
++		0xCC, 0x81, 0xF6, 0xCF, 0x85, 0xCC, 0x80, 0xF6,
++		0xCF, 0x85, 0xCC, 0x81, 0xF6, 0xCF, 0x89, 0xCC,
++		0x80, 0xF6, 0xCF, 0x89, 0xCC, 0x81, 0xF6, 0xCE,
++		0xB1, 0xCC, 0x93, 0xCD, 0x85, 0xF6, 0xCE, 0xB1,
++		0xCC, 0x94, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCC,
++		0x93, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, 0xB1,
++		0xCC, 0x94, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE,
++		0xB1, 0xCC, 0x93, 0xCC, 0x81, 0xCD, 0x85, 0xF6,
++		0xCE, 0xB1, 0xCC, 0x94, 0xCC, 0x81, 0xCD, 0x85,
++		0xF6, 0xCE, 0xB1, 0xCC, 0x93, 0xCD, 0x82, 0xCD,
++		0x85, 0xF6, 0xCE, 0xB1, 0xCC, 0x94, 0xCD, 0x82,
++		0xCD, 0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x93, 0xCD,
++		0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x94, 0xCD, 0x85,
++		0xF6, 0xCE, 0x91, 0xCC, 0x93, 0xCC, 0x80, 0xCD,
++		0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x94, 0xCC, 0x80,
++		0xCD, 0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x93, 0xCC,
++		0x81, 0xCD, 0x85, 0xF6, 0xCE, 0x91, 0xCC, 0x94,
++		0xCC, 0x81, 0xCD, 0x85, 0xF6, 0xCE, 0x91, 0xCC,
++		0x93, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0x91,
++		0xCC, 0x94, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE,
++		0xB7, 0xCC, 0x93, 0xCD, 0x85, 0xF6, 0xCE, 0xB7,
++		0xCC, 0x94, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, 0xCC,
++		0x93, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE, 0xB7,
++		0xCC, 0x94, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE,
++		0xB7, 0xCC, 0x93, 0xCC, 0x81, 0xCD, 0x85, 0xF6,
++		0xCE, 0xB7, 0xCC, 0x94, 0xCC, 0x81, 0xCD, 0x85,
++		0xF6, 0xCE, 0xB7, 0xCC, 0x93, 0xCD, 0x82, 0xCD,
++		0x85, 0xF6, 0xCE, 0xB7, 0xCC, 0x94, 0xCD, 0x82,
++		0xCD, 0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCD,
++		0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCD, 0x85,
++		0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCC, 0x80, 0xCD,
++		0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x94, 0xCC, 0x80,
++		0xCD, 0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x93, 0xCC,
++		0x81, 0xCD, 0x85, 0xF6, 0xCE, 0x97, 0xCC, 0x94,
++		0xCC, 0x81, 0xCD, 0x85, 0xF6, 0xCE, 0x97, 0xCC,
++		0x93, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0x97,
++		0xCC, 0x94, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCF,
++		0x89, 0xCC, 0x93, 0xCD, 0x85, 0xF6, 0xCF, 0x89,
++		0xCC, 0x94, 0xCD, 0x85, 0xF6, 0xCF, 0x89, 0xCC,
++		0x93, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCF, 0x89,
++		0xCC, 0x94, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCF,
++		0x89, 0xCC, 0x93, 0xCC, 0x81, 0xCD, 0x85, 0xF6,
++		0xCF, 0x89, 0xCC, 0x94, 0xCC, 0x81, 0xCD, 0x85,
++		0xF6, 0xCF, 0x89, 0xCC, 0x93, 0xCD, 0x82, 0xCD,
++		0x85, 0xF6, 0xCF, 0x89, 0xCC, 0x94, 0xCD, 0x82,
++		0xCD, 0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, 0xCD,
++		0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCD, 0x85,
++		0xF6, 0xCE, 0xA9, 0xCC, 0x93, 0xCC, 0x80, 0xCD,
++		0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x94, 0xCC, 0x80,
++		0xCD, 0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x93, 0xCC,
++		0x81, 0xCD, 0x85, 0xF6, 0xCE, 0xA9, 0xCC, 0x94,
++		0xCC, 0x81, 0xCD, 0x85, 0xF6, 0xCE, 0xA9, 0xCC,
++		0x93, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0xA9,
++		0xCC, 0x94, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE,
++		0xB1, 0xCC, 0x86, 0xF6, 0xCE, 0xB1, 0xCC, 0x84,
++		0xF6, 0xCE, 0xB1, 0xCC, 0x80, 0xCD, 0x85, 0xF6,
++		0xCE, 0xB1, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCC,
++		0x81, 0xCD, 0x85, 0xF6, 0xCE, 0xB1, 0xCD, 0x82,
++		0xF6, 0xCE, 0xB1, 0xCD, 0x82, 0xCD, 0x85, 0xF6,
++		0xCE, 0x91, 0xCC, 0x86, 0xF6, 0xCE, 0x91, 0xCC,
++		0x84, 0xF6, 0xCE, 0x91, 0xCC, 0x80, 0xF6, 0xCE,
++		0x91, 0xCC, 0x81, 0xF6, 0xCE, 0x91, 0xCD, 0x85,
++		0x20, 0xCC, 0x93, 0xF6, 0xCE, 0xB9, 0x20, 0xCC,
++		0x93, 0x20, 0xCD, 0x82, 0xF5, 0x05, 0xC2, 0xA8,
++		0xCD, 0x82, 0x20, 0xCC, 0x88, 0xCD, 0x82, 0xF6,
++		0xCE, 0xB7, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCE,
++		0xB7, 0xCD, 0x85, 0xF6, 0xCE, 0xB7, 0xCC, 0x81,
++		0xCD, 0x85, 0xF6, 0xCE, 0xB7, 0xCD, 0x82, 0xF6,
++		0xCE, 0xB7, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE,
++		0x95, 0xCC, 0x80, 0xF6, 0xCE, 0x95, 0xCC, 0x81,
++		0xF6, 0xCE, 0x97, 0xCC, 0x80, 0xF6, 0xCE, 0x97,
++		0xCC, 0x81, 0xF6, 0xCE, 0x97, 0xCD, 0x85, 0xF5,
++		0x06, 0xE1, 0xBE, 0xBF, 0xCC, 0x80, 0x20, 0xCC,
++		0x93, 0xCC, 0x80, 0xF5, 0x06, 0xE1, 0xBE, 0xBF,
++		0xCC, 0x81, 0x20, 0xCC, 0x93, 0xCC, 0x81, 0xF5,
++		0x06, 0xE1, 0xBE, 0xBF, 0xCD, 0x82, 0x20, 0xCC,
++		0x93, 0xCD, 0x82, 0xF6, 0xCE, 0xB9, 0xCC, 0x86,
++		0xF6, 0xCE, 0xB9, 0xCC, 0x84, 0xF6, 0xCE, 0xB9,
++		0xCC, 0x88, 0xCC, 0x80, 0xF6, 0xCE, 0xB9, 0xCC,
++		0x88, 0xCC, 0x81, 0xF6, 0xCE, 0xB9, 0xCD, 0x82,
++		0xF6, 0xCE, 0xB9, 0xCC, 0x88, 0xCD, 0x82, 0xF6,
++		0xCE, 0x99, 0xCC, 0x86, 0xF6, 0xCE, 0x99, 0xCC,
++		0x84, 0xF6, 0xCE, 0x99, 0xCC, 0x80, 0xF6, 0xCE,
++		0x99, 0xCC, 0x81, 0xF5, 0x06, 0xE1, 0xBF, 0xBE,
++		0xCC, 0x80, 0x20, 0xCC, 0x94, 0xCC, 0x80, 0xF5,
++		0x06, 0xE1, 0xBF, 0xBE, 0xCC, 0x81, 0x20, 0xCC,
++		0x94, 0xCC, 0x81, 0xF5, 0x06, 0xE1, 0xBF, 0xBE,
++		0xCD, 0x82, 0x20, 0xCC, 0x94, 0xCD, 0x82, 0xF6,
++		0xCF, 0x85, 0xCC, 0x86, 0xF6, 0xCF, 0x85, 0xCC,
++		0x84, 0xF6, 0xCF, 0x85, 0xCC, 0x88, 0xCC, 0x80,
++		0xF6, 0xCF, 0x85, 0xCC, 0x88, 0xCC, 0x81, 0xF6,
++		0xCF, 0x81, 0xCC, 0x93, 0xF6, 0xCF, 0x81, 0xCC,
++		0x94, 0xF6, 0xCF, 0x85, 0xCD, 0x82, 0xF6, 0xCF,
++		0x85, 0xCC, 0x88, 0xCD, 0x82, 0xF6, 0xCE, 0xA5,
++		0xCC, 0x86, 0xF6, 0xCE, 0xA5, 0xCC, 0x84, 0xF6,
++		0xCE, 0xA5, 0xCC, 0x80, 0xF6, 0xCE, 0xA5, 0xCC,
++		0x81, 0xF6, 0xCE, 0xA1, 0xCC, 0x94, 0xF5, 0x05,
++		0xC2, 0xA8, 0xCC, 0x80, 0x20, 0xCC, 0x88, 0xCC,
++		0x80, 0xF5, 0x05, 0xC2, 0xA8, 0xCC, 0x81, 0x20,
++		0xCC, 0x88, 0xCC, 0x81, 0xF6, 0x60, 0xF6, 0xCF,
++		0x89, 0xCC, 0x80, 0xCD, 0x85, 0xF6, 0xCF, 0x89,
++		0xCD, 0x85, 0xF6, 0xCF, 0x89, 0xCC, 0x81, 0xCD,
++		0x85, 0xF6, 0xCF, 0x89, 0xCD, 0x82, 0xF6, 0xCF,
++		0x89, 0xCD, 0x82, 0xCD, 0x85, 0xF6, 0xCE, 0x9F,
++		0xCC, 0x80, 0xF6, 0xCE, 0x9F, 0xCC, 0x81, 0xF6,
++		0xCE, 0xA9, 0xCC, 0x80, 0xF6, 0xCE, 0xA9, 0xCC,
++		0x81, 0xF6, 0xCE, 0xA9, 0xCD, 0x85, 0xF5, 0x03,
++		0xC2, 0xB4, 0x20, 0xCC, 0x81, 0x20, 0xCC, 0x94,
++		0xF5, 0x04, 0xE2, 0x80, 0x82, 0x20, 0xF5, 0x04,
++		0xE2, 0x80, 0x83, 0x20, 0x20, 0x20, 0x20, 0x20,
++		0x20, 0x20, 0x20, 0x20, 0x20, 0xE2, 0x80, 0x90,
++		0x20, 0xCC, 0xB3, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
++		0x2E, 0x20, 0xE2, 0x80, 0xB2, 0xE2, 0x80, 0xB2,
++		0xE2, 0x80, 0xB2, 0xE2, 0x80, 0xB2, 0xE2, 0x80,
++		0xB2, 0xE2, 0x80, 0xB5, 0xE2, 0x80, 0xB5, 0xE2,
++		0x80, 0xB5, 0xE2, 0x80, 0xB5, 0xE2, 0x80, 0xB5,
++		0x21, 0x21, 0x20, 0xCC, 0x85, 0x3F, 0x3F, 0x3F,
++		0x21, 0x21, 0x3F, 0xE2, 0x80, 0xB2, 0xE2, 0x80,
++		0xB2, 0xE2, 0x80, 0xB2, 0xE2, 0x80, 0xB2, 0x20,
++		0x30, 0x69, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
++		0x2B, 0xE2, 0x88, 0x92, 0x3D, 0x28, 0x29, 0x6E,
++		0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
++		0x38, 0x39, 0x2B, 0xE2, 0x88, 0x92, 0x3D, 0x28,
++		0x29, 0x61, 0x65, 0x6F, 0x78, 0xC9, 0x99, 0x52,
++		0x73, 0x61, 0x2F, 0x63, 0x61, 0x2F, 0x73, 0x43,
++		0xC2, 0xB0, 0x43, 0x63, 0x2F, 0x6F, 0x63, 0x2F,
++		0x75, 0xC6, 0x90, 0xC2, 0xB0, 0x46, 0x67, 0x48,
++		0x48, 0x48, 0x68, 0xC4, 0xA7, 0x49, 0x49, 0x4C,
++		0x6C, 0x4E, 0x4E, 0x6F, 0x50, 0x51, 0x52, 0x52,
++		0x52, 0x53, 0x4D, 0x54, 0x45, 0x4C, 0x54, 0x4D,
++		0x5A, 0xF6, 0xCE, 0xA9, 0x5A, 0xF6, 0x4B, 0xF6,
++		0x41, 0xCC, 0x8A, 0x42, 0x43, 0x65, 0x45, 0x46,
++		0x4D, 0x6F, 0xD7, 0x90, 0xD7, 0x91, 0xD7, 0x92,
++		0xD7, 0x93, 0x69, 0x46, 0x41, 0x58, 0xCF, 0x80,
++		0xCE, 0xB3, 0xCE, 0x93, 0xCE, 0xA0, 0xE2, 0x88,
++		0x91, 0x44, 0x64, 0x65, 0x69, 0x6A, 0x31, 0xE2,
++		0x81, 0x84, 0x33, 0x32, 0xE2, 0x81, 0x84, 0x33,
++		0x31, 0xE2, 0x81, 0x84, 0x35, 0x32, 0xE2, 0x81,
++		0x84, 0x35, 0x33, 0xE2, 0x81, 0x84, 0x35, 0x34,
++		0xE2, 0x81, 0x84, 0x35, 0x31, 0xE2, 0x81, 0x84,
++		0x36, 0x35, 0xE2, 0x81, 0x84, 0x36, 0x31, 0xE2,
++		0x81, 0x84, 0x38, 0x33, 0xE2, 0x81, 0x84, 0x38,
++		0x35, 0xE2, 0x81, 0x84, 0x38, 0x37, 0xE2, 0x81,
++		0x84, 0x38, 0x31, 0xE2, 0x81, 0x84, 0x49, 0x49,
++		0x49, 0x49, 0x49, 0x49, 0x49, 0x56, 0x56, 0x56,
++		0x49, 0x56, 0x49, 0x49, 0x56, 0x49, 0x49, 0x49,
++		0x49, 0x58, 0x58, 0x58, 0x49, 0x58, 0x49, 0x49,
++		0x4C, 0x43, 0x44, 0x4D, 0x69, 0x69, 0x69, 0x69,
++		0x69, 0x69, 0x69, 0x76, 0x76, 0x76, 0x69, 0x76,
++		0x69, 0x69, 0x76, 0x69, 0x69, 0x69, 0x69, 0x78,
++		0x78, 0x78, 0x69, 0x78, 0x69, 0x69, 0x6C, 0x63,
++		0x64, 0x6D, 0xF6, 0xE2, 0x86, 0x90, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x86, 0x92, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x86, 0x94, 0xCC, 0xB8, 0xF6, 0xE2, 0x87, 0x90,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x87, 0x94, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x87, 0x92, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x88, 0x83, 0xCC, 0xB8, 0xF6, 0xE2, 0x88, 0x88,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x88, 0x8B, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x88, 0xA3, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x88, 0xA5, 0xCC, 0xB8, 0xE2, 0x88, 0xAB, 0xE2,
++		0x88, 0xAB, 0xE2, 0x88, 0xAB, 0xE2, 0x88, 0xAB,
++		0xE2, 0x88, 0xAB, 0xE2, 0x88, 0xAE, 0xE2, 0x88,
++		0xAE, 0xE2, 0x88, 0xAE, 0xE2, 0x88, 0xAE, 0xE2,
++		0x88, 0xAE, 0xF6, 0xE2, 0x88, 0xBC, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x89, 0x83, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x89, 0x85, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0x88,
++		0xCC, 0xB8, 0xF6, 0x3D, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x89, 0xA1, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0x8D,
++		0xCC, 0xB8, 0xF6, 0x3C, 0xCC, 0xB8, 0xF6, 0x3E,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xA4, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x89, 0xA5, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x89, 0xB2, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xB3,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xB6, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x89, 0xB7, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x89, 0xBA, 0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xBB,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0x82, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x8A, 0x83, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x8A, 0x86, 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0x87,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xA2, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x8A, 0xA8, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x8A, 0xA9, 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xAB,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x89, 0xBC, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x89, 0xBD, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x8A, 0x91, 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0x92,
++		0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xB2, 0xCC, 0xB8,
++		0xF6, 0xE2, 0x8A, 0xB3, 0xCC, 0xB8, 0xF6, 0xE2,
++		0x8A, 0xB4, 0xCC, 0xB8, 0xF6, 0xE2, 0x8A, 0xB5,
++		0xCC, 0xB8, 0xF6, 0xE3, 0x80, 0x88, 0xF6, 0xE3,
++		0x80, 0x89, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
++		0x37, 0x38, 0x39, 0x31, 0x30, 0x31, 0x31, 0x31,
++		0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31,
++		0x36, 0x31, 0x37, 0x31, 0x38, 0x31, 0x39, 0x32,
++		0x30, 0x28, 0x31, 0x29, 0x28, 0x32, 0x29, 0x28,
++		0x33, 0x29, 0x28, 0x34, 0x29, 0x28, 0x35, 0x29,
++		0x28, 0x36, 0x29, 0x28, 0x37, 0x29, 0x28, 0x38,
++		0x29, 0x28, 0x39, 0x29, 0x28, 0x31, 0x30, 0x29,
++		0x28, 0x31, 0x31, 0x29, 0x28, 0x31, 0x32, 0x29,
++		0x28, 0x31, 0x33, 0x29, 0x28, 0x31, 0x34, 0x29,
++		0x28, 0x31, 0x35, 0x29, 0x28, 0x31, 0x36, 0x29,
++		0x28, 0x31, 0x37, 0x29, 0x28, 0x31, 0x38, 0x29,
++		0x28, 0x31, 0x39, 0x29, 0x28, 0x32, 0x30, 0x29,
++		0x31, 0x2E, 0x32, 0x2E, 0x33, 0x2E, 0x34, 0x2E,
++		0x35, 0x2E, 0x36, 0x2E, 0x37, 0x2E, 0x38, 0x2E,
++		0x39, 0x2E, 0x31, 0x30, 0x2E, 0x31, 0x31, 0x2E,
++		0x31, 0x32, 0x2E, 0x31, 0x33, 0x2E, 0x31, 0x34,
++		0x2E, 0x31, 0x35, 0x2E, 0x31, 0x36, 0x2E, 0x31,
++		0x37, 0x2E, 0x31, 0x38, 0x2E, 0x31, 0x39, 0x2E,
++		0x32, 0x30, 0x2E, 0x28, 0x61, 0x29, 0x28, 0x62,
++		0x29, 0x28, 0x63, 0x29, 0x28, 0x64, 0x29, 0x28,
++		0x65, 0x29, 0x28, 0x66, 0x29, 0x28, 0x67, 0x29,
++		0x28, 0x68, 0x29, 0x28, 0x69, 0x29, 0x28, 0x6A,
++		0x29, 0x28, 0x6B, 0x29, 0x28, 0x6C, 0x29, 0x28,
++		0x6D, 0x29, 0x28, 0x6E, 0x29, 0x28, 0x6F, 0x29,
++		0x28, 0x70, 0x29, 0x28, 0x71, 0x29, 0x28, 0x72,
++		0x29, 0x28, 0x73, 0x29, 0x28, 0x74, 0x29, 0x28,
++		0x75, 0x29, 0x28, 0x76, 0x29, 0x28, 0x77, 0x29,
++		0x28, 0x78, 0x29, 0x28, 0x79, 0x29, 0x28, 0x7A,
++		0x29, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
++		0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
++		0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
++		0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65,
++		0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
++		0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75,
++		0x76, 0x77, 0x78, 0x79, 0x7A, 0x30, 0xE2, 0x88,
++		0xAB, 0xE2, 0x88, 0xAB, 0xE2, 0x88, 0xAB, 0xE2,
++		0x88, 0xAB, 0x3A, 0x3A, 0x3D, 0x3D, 0x3D, 0x3D,
++		0x3D, 0x3D, 0xF6, 0xE2, 0xAB, 0x9D, 0xCC, 0xB8,
++		0xE2, 0xB5, 0xA1, 0xE6, 0xAF, 0x8D, 0xE9, 0xBE,
++		0x9F, 0xE4, 0xB8, 0x80, 0xE4, 0xB8, 0xA8, 0xE4,
++		0xB8, 0xB6, 0xE4, 0xB8, 0xBF, 0xE4, 0xB9, 0x99,
++		0xE4, 0xBA, 0x85, 0xE4, 0xBA, 0x8C, 0xE4, 0xBA,
++		0xA0, 0xE4, 0xBA, 0xBA, 0xE5, 0x84, 0xBF, 0xE5,
++		0x85, 0xA5, 0xE5, 0x85, 0xAB, 0xE5, 0x86, 0x82,
++		0xE5, 0x86, 0x96, 0xE5, 0x86, 0xAB, 0xE5, 0x87,
++		0xA0, 0xE5, 0x87, 0xB5, 0xE5, 0x88, 0x80, 0xE5,
++		0x8A, 0x9B, 0xE5, 0x8B, 0xB9, 0xE5, 0x8C, 0x95,
++		0xE5, 0x8C, 0x9A, 0xE5, 0x8C, 0xB8, 0xE5, 0x8D,
++		0x81, 0xE5, 0x8D, 0x9C, 0xE5, 0x8D, 0xA9, 0xE5,
++		0x8E, 0x82, 0xE5, 0x8E, 0xB6, 0xE5, 0x8F, 0x88,
++		0xE5, 0x8F, 0xA3, 0xE5, 0x9B, 0x97, 0xE5, 0x9C,
++		0x9F, 0xE5, 0xA3, 0xAB, 0xE5, 0xA4, 0x82, 0xE5,
++		0xA4, 0x8A, 0xE5, 0xA4, 0x95, 0xE5, 0xA4, 0xA7,
++		0xE5, 0xA5, 0xB3, 0xE5, 0xAD, 0x90, 0xE5, 0xAE,
++		0x80, 0xE5, 0xAF, 0xB8, 0xE5, 0xB0, 0x8F, 0xE5,
++		0xB0, 0xA2, 0xE5, 0xB0, 0xB8, 0xE5, 0xB1, 0xAE,
++		0xE5, 0xB1, 0xB1, 0xE5, 0xB7, 0x9B, 0xE5, 0xB7,
++		0xA5, 0xE5, 0xB7, 0xB1, 0xE5, 0xB7, 0xBE, 0xE5,
++		0xB9, 0xB2, 0xE5, 0xB9, 0xBA, 0xE5, 0xB9, 0xBF,
++		0xE5, 0xBB, 0xB4, 0xE5, 0xBB, 0xBE, 0xE5, 0xBC,
++		0x8B, 0xE5, 0xBC, 0x93, 0xE5, 0xBD, 0x90, 0xE5,
++		0xBD, 0xA1, 0xE5, 0xBD, 0xB3, 0xE5, 0xBF, 0x83,
++		0xE6, 0x88, 0x88, 0xE6, 0x88, 0xB6, 0xE6, 0x89,
++		0x8B, 0xE6, 0x94, 0xAF, 0xE6, 0x94, 0xB4, 0xE6,
++		0x96, 0x87, 0xE6, 0x96, 0x97, 0xE6, 0x96, 0xA4,
++		0xE6, 0x96, 0xB9, 0xE6, 0x97, 0xA0, 0xE6, 0x97,
++		0xA5, 0xE6, 0x9B, 0xB0, 0xE6, 0x9C, 0x88, 0xE6,
++		0x9C, 0xA8, 0xE6, 0xAC, 0xA0, 0xE6, 0xAD, 0xA2,
++		0xE6, 0xAD, 0xB9, 0xE6, 0xAE, 0xB3, 0xE6, 0xAF,
++		0x8B, 0xE6, 0xAF, 0x94, 0xE6, 0xAF, 0x9B, 0xE6,
++		0xB0, 0x8F, 0xE6, 0xB0, 0x94, 0xE6, 0xB0, 0xB4,
++		0xE7, 0x81, 0xAB, 0xE7, 0x88, 0xAA, 0xE7, 0x88,
++		0xB6, 0xE7, 0x88, 0xBB, 0xE7, 0x88, 0xBF, 0xE7,
++		0x89, 0x87, 0xE7, 0x89, 0x99, 0xE7, 0x89, 0x9B,
++		0xE7, 0x8A, 0xAC, 0xE7, 0x8E, 0x84, 0xE7, 0x8E,
++		0x89, 0xE7, 0x93, 0x9C, 0xE7, 0x93, 0xA6, 0xE7,
++		0x94, 0x98, 0xE7, 0x94, 0x9F, 0xE7, 0x94, 0xA8,
++		0xE7, 0x94, 0xB0, 0xE7, 0x96, 0x8B, 0xE7, 0x96,
++		0x92, 0xE7, 0x99, 0xB6, 0xE7, 0x99, 0xBD, 0xE7,
++		0x9A, 0xAE, 0xE7, 0x9A, 0xBF, 0xE7, 0x9B, 0xAE,
++		0xE7, 0x9F, 0x9B, 0xE7, 0x9F, 0xA2, 0xE7, 0x9F,
++		0xB3, 0xE7, 0xA4, 0xBA, 0xE7, 0xA6, 0xB8, 0xE7,
++		0xA6, 0xBE, 0xE7, 0xA9, 0xB4, 0xE7, 0xAB, 0x8B,
++		0xE7, 0xAB, 0xB9, 0xE7, 0xB1, 0xB3, 0xE7, 0xB3,
++		0xB8, 0xE7, 0xBC, 0xB6, 0xE7, 0xBD, 0x91, 0xE7,
++		0xBE, 0x8A, 0xE7, 0xBE, 0xBD, 0xE8, 0x80, 0x81,
++		0xE8, 0x80, 0x8C, 0xE8, 0x80, 0x92, 0xE8, 0x80,
++		0xB3, 0xE8, 0x81, 0xBF, 0xE8, 0x82, 0x89, 0xE8,
++		0x87, 0xA3, 0xE8, 0x87, 0xAA, 0xE8, 0x87, 0xB3,
++		0xE8, 0x87, 0xBC, 0xE8, 0x88, 0x8C, 0xE8, 0x88,
++		0x9B, 0xE8, 0x88, 0x9F, 0xE8, 0x89, 0xAE, 0xE8,
++		0x89, 0xB2, 0xE8, 0x89, 0xB8, 0xE8, 0x99, 0x8D,
++		0xE8, 0x99, 0xAB, 0xE8, 0xA1, 0x80, 0xE8, 0xA1,
++		0x8C, 0xE8, 0xA1, 0xA3, 0xE8, 0xA5, 0xBE, 0xE8,
++		0xA6, 0x8B, 0xE8, 0xA7, 0x92, 0xE8, 0xA8, 0x80,
++		0xE8, 0xB0, 0xB7, 0xE8, 0xB1, 0x86, 0xE8, 0xB1,
++		0x95, 0xE8, 0xB1, 0xB8, 0xE8, 0xB2, 0x9D, 0xE8,
++		0xB5, 0xA4, 0xE8, 0xB5, 0xB0, 0xE8, 0xB6, 0xB3,
++		0xE8, 0xBA, 0xAB, 0xE8, 0xBB, 0x8A, 0xE8, 0xBE,
++		0x9B, 0xE8, 0xBE, 0xB0, 0xE8, 0xBE, 0xB5, 0xE9,
++		0x82, 0x91, 0xE9, 0x85, 0x89, 0xE9, 0x87, 0x86,
++		0xE9, 0x87, 0x8C, 0xE9, 0x87, 0x91, 0xE9, 0x95,
++		0xB7, 0xE9, 0x96, 0x80, 0xE9, 0x98, 0x9C, 0xE9,
++		0x9A, 0xB6, 0xE9, 0x9A, 0xB9, 0xE9, 0x9B, 0xA8,
++		0xE9, 0x9D, 0x91, 0xE9, 0x9D, 0x9E, 0xE9, 0x9D,
++		0xA2, 0xE9, 0x9D, 0xA9, 0xE9, 0x9F, 0x8B, 0xE9,
++		0x9F, 0xAD, 0xE9, 0x9F, 0xB3, 0xE9, 0xA0, 0x81,
++		0xE9, 0xA2, 0xA8, 0xE9, 0xA3, 0x9B, 0xE9, 0xA3,
++		0x9F, 0xE9, 0xA6, 0x96, 0xE9, 0xA6, 0x99, 0xE9,
++		0xA6, 0xAC, 0xE9, 0xAA, 0xA8, 0xE9, 0xAB, 0x98,
++		0xE9, 0xAB, 0x9F, 0xE9, 0xAC, 0xA5, 0xE9, 0xAC,
++		0xAF, 0xE9, 0xAC, 0xB2, 0xE9, 0xAC, 0xBC, 0xE9,
++		0xAD, 0x9A, 0xE9, 0xB3, 0xA5, 0xE9, 0xB9, 0xB5,
++		0xE9, 0xB9, 0xBF, 0xE9, 0xBA, 0xA5, 0xE9, 0xBA,
++		0xBB, 0xE9, 0xBB, 0x83, 0xE9, 0xBB, 0x8D, 0xE9,
++		0xBB, 0x91, 0xE9, 0xBB, 0xB9, 0xE9, 0xBB, 0xBD,
++		0xE9, 0xBC, 0x8E, 0xE9, 0xBC, 0x93, 0xE9, 0xBC,
++		0xA0, 0xE9, 0xBC, 0xBB, 0xE9, 0xBD, 0x8A, 0xE9,
++		0xBD, 0x92, 0xE9, 0xBE, 0x8D, 0xE9, 0xBE, 0x9C,
++		0xE9, 0xBE, 0xA0, 0x20, 0xE3, 0x80, 0x92, 0xE5,
++		0x8D, 0x81, 0xE5, 0x8D, 0x84, 0xE5, 0x8D, 0x85,
++		0xF6, 0xE3, 0x81, 0x8B, 0xE3, 0x82, 0x99, 0xF6,
++		0xE3, 0x81, 0x8D, 0xE3, 0x82, 0x99, 0xF6, 0xE3,
++		0x81, 0x8F, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81,
++		0x91, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0x93,
++		0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0x95, 0xE3,
++		0x82, 0x99, 0xF6, 0xE3, 0x81, 0x97, 0xE3, 0x82,
++		0x99, 0xF6, 0xE3, 0x81, 0x99, 0xE3, 0x82, 0x99,
++		0xF6, 0xE3, 0x81, 0x9B, 0xE3, 0x82, 0x99, 0xF6,
++		0xE3, 0x81, 0x9D, 0xE3, 0x82, 0x99, 0xF6, 0xE3,
++		0x81, 0x9F, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81,
++		0xA1, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xA4,
++		0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xA6, 0xE3,
++		0x82, 0x99, 0xF6, 0xE3, 0x81, 0xA8, 0xE3, 0x82,
++		0x99, 0xF6, 0xE3, 0x81, 0xAF, 0xE3, 0x82, 0x99,
++		0xF6, 0xE3, 0x81, 0xAF, 0xE3, 0x82, 0x9A, 0xF6,
++		0xE3, 0x81, 0xB2, 0xE3, 0x82, 0x99, 0xF6, 0xE3,
++		0x81, 0xB2, 0xE3, 0x82, 0x9A, 0xF6, 0xE3, 0x81,
++		0xB5, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x81, 0xB5,
++		0xE3, 0x82, 0x9A, 0xF6, 0xE3, 0x81, 0xB8, 0xE3,
++		0x82, 0x99, 0xF6, 0xE3, 0x81, 0xB8, 0xE3, 0x82,
++		0x9A, 0xF6, 0xE3, 0x81, 0xBB, 0xE3, 0x82, 0x99,
++		0xF6, 0xE3, 0x81, 0xBB, 0xE3, 0x82, 0x9A, 0xF6,
++		0xE3, 0x81, 0x86, 0xE3, 0x82, 0x99, 0x20, 0xE3,
++		0x82, 0x99, 0x20, 0xE3, 0x82, 0x9A, 0xF6, 0xE3,
++		0x82, 0x9D, 0xE3, 0x82, 0x99, 0xE3, 0x82, 0x88,
++		0xE3, 0x82, 0x8A, 0xF6, 0xE3, 0x82, 0xAB, 0xE3,
++		0x82, 0x99, 0xF6, 0xE3, 0x82, 0xAD, 0xE3, 0x82,
++		0x99, 0xF6, 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0x99,
++		0xF6, 0xE3, 0x82, 0xB1, 0xE3, 0x82, 0x99, 0xF6,
++		0xE3, 0x82, 0xB3, 0xE3, 0x82, 0x99, 0xF6, 0xE3,
++		0x82, 0xB5, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x82,
++		0xB7, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x82, 0xB9,
++		0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x82, 0xBB, 0xE3,
++		0x82, 0x99, 0xF6, 0xE3, 0x82, 0xBD, 0xE3, 0x82,
++		0x99, 0xF6, 0xE3, 0x82, 0xBF, 0xE3, 0x82, 0x99,
++		0xF6, 0xE3, 0x83, 0x81, 0xE3, 0x82, 0x99, 0xF6,
++		0xE3, 0x83, 0x84, 0xE3, 0x82, 0x99, 0xF6, 0xE3,
++		0x83, 0x86, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83,
++		0x88, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83, 0x8F,
++		0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83, 0x8F, 0xE3,
++		0x82, 0x9A, 0xF6, 0xE3, 0x83, 0x92, 0xE3, 0x82,
++		0x99, 0xF6, 0xE3, 0x83, 0x92, 0xE3, 0x82, 0x9A,
++		0xF6, 0xE3, 0x83, 0x95, 0xE3, 0x82, 0x99, 0xF6,
++		0xE3, 0x83, 0x95, 0xE3, 0x82, 0x9A, 0xF6, 0xE3,
++		0x83, 0x98, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83,
++		0x98, 0xE3, 0x82, 0x9A, 0xF6, 0xE3, 0x83, 0x9B,
++		0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83, 0x9B, 0xE3,
++		0x82, 0x9A, 0xF6, 0xE3, 0x82, 0xA6, 0xE3, 0x82,
++		0x99, 0xF6, 0xE3, 0x83, 0xAF, 0xE3, 0x82, 0x99,
++		0xF6, 0xE3, 0x83, 0xB0, 0xE3, 0x82, 0x99, 0xF6,
++		0xE3, 0x83, 0xB1, 0xE3, 0x82, 0x99, 0xF6, 0xE3,
++		0x83, 0xB2, 0xE3, 0x82, 0x99, 0xF6, 0xE3, 0x83,
++		0xBD, 0xE3, 0x82, 0x99, 0xE3, 0x82, 0xB3, 0xE3,
++		0x83, 0x88, 0xE1, 0x84, 0x80, 0xE1, 0x84, 0x81,
++		0xE1, 0x86, 0xAA, 0xE1, 0x84, 0x82, 0xE1, 0x86,
++		0xAC, 0xE1, 0x86, 0xAD, 0xE1, 0x84, 0x83, 0xE1,
++		0x84, 0x84, 0xE1, 0x84, 0x85, 0xE1, 0x86, 0xB0,
++		0xE1, 0x86, 0xB1, 0xE1, 0x86, 0xB2, 0xE1, 0x86,
++		0xB3, 0xE1, 0x86, 0xB4, 0xE1, 0x86, 0xB5, 0xE1,
++		0x84, 0x9A, 0xE1, 0x84, 0x86, 0xE1, 0x84, 0x87,
++		0xE1, 0x84, 0x88, 0xE1, 0x84, 0xA1, 0xE1, 0x84,
++		0x89, 0xE1, 0x84, 0x8A, 0xE1, 0x84, 0x8B, 0xE1,
++		0x84, 0x8C, 0xE1, 0x84, 0x8D, 0xE1, 0x84, 0x8E,
++		0xE1, 0x84, 0x8F, 0xE1, 0x84, 0x90, 0xE1, 0x84,
++		0x91, 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1,
++		0x85, 0xA2, 0xE1, 0x85, 0xA3, 0xE1, 0x85, 0xA4,
++		0xE1, 0x85, 0xA5, 0xE1, 0x85, 0xA6, 0xE1, 0x85,
++		0xA7, 0xE1, 0x85, 0xA8, 0xE1, 0x85, 0xA9, 0xE1,
++		0x85, 0xAA, 0xE1, 0x85, 0xAB, 0xE1, 0x85, 0xAC,
++		0xE1, 0x85, 0xAD, 0xE1, 0x85, 0xAE, 0xE1, 0x85,
++		0xAF, 0xE1, 0x85, 0xB0, 0xE1, 0x85, 0xB1, 0xE1,
++		0x85, 0xB2, 0xE1, 0x85, 0xB3, 0xE1, 0x85, 0xB4,
++		0xE1, 0x85, 0xB5, 0xE1, 0x85, 0xA0, 0xE1, 0x84,
++		0x94, 0xE1, 0x84, 0x95, 0xE1, 0x87, 0x87, 0xE1,
++		0x87, 0x88, 0xE1, 0x87, 0x8C, 0xE1, 0x87, 0x8E,
++		0xE1, 0x87, 0x93, 0xE1, 0x87, 0x97, 0xE1, 0x87,
++		0x99, 0xE1, 0x84, 0x9C, 0xE1, 0x87, 0x9D, 0xE1,
++		0x87, 0x9F, 0xE1, 0x84, 0x9D, 0xE1, 0x84, 0x9E,
++		0xE1, 0x84, 0xA0, 0xE1, 0x84, 0xA2, 0xE1, 0x84,
++		0xA3, 0xE1, 0x84, 0xA7, 0xE1, 0x84, 0xA9, 0xE1,
++		0x84, 0xAB, 0xE1, 0x84, 0xAC, 0xE1, 0x84, 0xAD,
++		0xE1, 0x84, 0xAE, 0xE1, 0x84, 0xAF, 0xE1, 0x84,
++		0xB2, 0xE1, 0x84, 0xB6, 0xE1, 0x85, 0x80, 0xE1,
++		0x85, 0x87, 0xE1, 0x85, 0x8C, 0xE1, 0x87, 0xB1,
++		0xE1, 0x87, 0xB2, 0xE1, 0x85, 0x97, 0xE1, 0x85,
++		0x98, 0xE1, 0x85, 0x99, 0xE1, 0x86, 0x84, 0xE1,
++		0x86, 0x85, 0xE1, 0x86, 0x88, 0xE1, 0x86, 0x91,
++		0xE1, 0x86, 0x92, 0xE1, 0x86, 0x94, 0xE1, 0x86,
++		0x9E, 0xE1, 0x86, 0xA1, 0xE4, 0xB8, 0x80, 0xE4,
++		0xBA, 0x8C, 0xE4, 0xB8, 0x89, 0xE5, 0x9B, 0x9B,
++		0xE4, 0xB8, 0x8A, 0xE4, 0xB8, 0xAD, 0xE4, 0xB8,
++		0x8B, 0xE7, 0x94, 0xB2, 0xE4, 0xB9, 0x99, 0xE4,
++		0xB8, 0x99, 0xE4, 0xB8, 0x81, 0xE5, 0xA4, 0xA9,
++		0xE5, 0x9C, 0xB0, 0xE4, 0xBA, 0xBA, 0x28, 0xE1,
++		0x84, 0x80, 0x29, 0x28, 0xE1, 0x84, 0x82, 0x29,
++		0x28, 0xE1, 0x84, 0x83, 0x29, 0x28, 0xE1, 0x84,
++		0x85, 0x29, 0x28, 0xE1, 0x84, 0x86, 0x29, 0x28,
++		0xE1, 0x84, 0x87, 0x29, 0x28, 0xE1, 0x84, 0x89,
++		0x29, 0x28, 0xE1, 0x84, 0x8B, 0x29, 0x28, 0xE1,
++		0x84, 0x8C, 0x29, 0x28, 0xE1, 0x84, 0x8E, 0x29,
++		0x28, 0xE1, 0x84, 0x8F, 0x29, 0x28, 0xE1, 0x84,
++		0x90, 0x29, 0x28, 0xE1, 0x84, 0x91, 0x29, 0x28,
++		0xE1, 0x84, 0x92, 0x29, 0x28, 0xE1, 0x84, 0x80,
++		0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x82,
++		0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x83,
++		0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x85,
++		0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x86,
++		0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x87,
++		0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x89,
++		0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x8B,
++		0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x8C,
++		0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x8E,
++		0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x8F,
++		0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x90,
++		0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x91,
++		0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x92,
++		0xE1, 0x85, 0xA1, 0x29, 0x28, 0xE1, 0x84, 0x8C,
++		0xE1, 0x85, 0xAE, 0x29, 0x28, 0xE1, 0x84, 0x8B,
++		0xE1, 0x85, 0xA9, 0xE1, 0x84, 0x8C, 0xE1, 0x85,
++		0xA5, 0xE1, 0x86, 0xAB, 0x29, 0x28, 0xE1, 0x84,
++		0x8B, 0xE1, 0x85, 0xA9, 0xE1, 0x84, 0x92, 0xE1,
++		0x85, 0xAE, 0x29, 0x28, 0xE4, 0xB8, 0x80, 0x29,
++		0x28, 0xE4, 0xBA, 0x8C, 0x29, 0x28, 0xE4, 0xB8,
++		0x89, 0x29, 0x28, 0xE5, 0x9B, 0x9B, 0x29, 0x28,
++		0xE4, 0xBA, 0x94, 0x29, 0x28, 0xE5, 0x85, 0xAD,
++		0x29, 0x28, 0xE4, 0xB8, 0x83, 0x29, 0x28, 0xE5,
++		0x85, 0xAB, 0x29, 0x28, 0xE4, 0xB9, 0x9D, 0x29,
++		0x28, 0xE5, 0x8D, 0x81, 0x29, 0x28, 0xE6, 0x9C,
++		0x88, 0x29, 0x28, 0xE7, 0x81, 0xAB, 0x29, 0x28,
++		0xE6, 0xB0, 0xB4, 0x29, 0x28, 0xE6, 0x9C, 0xA8,
++		0x29, 0x28, 0xE9, 0x87, 0x91, 0x29, 0x28, 0xE5,
++		0x9C, 0x9F, 0x29, 0x28, 0xE6, 0x97, 0xA5, 0x29,
++		0x28, 0xE6, 0xA0, 0xAA, 0x29, 0x28, 0xE6, 0x9C,
++		0x89, 0x29, 0x28, 0xE7, 0xA4, 0xBE, 0x29, 0x28,
++		0xE5, 0x90, 0x8D, 0x29, 0x28, 0xE7, 0x89, 0xB9,
++		0x29, 0x28, 0xE8, 0xB2, 0xA1, 0x29, 0x28, 0xE7,
++		0xA5, 0x9D, 0x29, 0x28, 0xE5, 0x8A, 0xB4, 0x29,
++		0x28, 0xE4, 0xBB, 0xA3, 0x29, 0x28, 0xE5, 0x91,
++		0xBC, 0x29, 0x28, 0xE5, 0xAD, 0xA6, 0x29, 0x28,
++		0xE7, 0x9B, 0xA3, 0x29, 0x28, 0xE4, 0xBC, 0x81,
++		0x29, 0x28, 0xE8, 0xB3, 0x87, 0x29, 0x28, 0xE5,
++		0x8D, 0x94, 0x29, 0x28, 0xE7, 0xA5, 0xAD, 0x29,
++		0x28, 0xE4, 0xBC, 0x91, 0x29, 0x28, 0xE8, 0x87,
++		0xAA, 0x29, 0x28, 0xE8, 0x87, 0xB3, 0x29, 0x50,
++		0x54, 0x45, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33,
++		0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37,
++		0x32, 0x38, 0x32, 0x39, 0x33, 0x30, 0x33, 0x31,
++		0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35,
++		0xE1, 0x84, 0x80, 0xE1, 0x84, 0x82, 0xE1, 0x84,
++		0x83, 0xE1, 0x84, 0x85, 0xE1, 0x84, 0x86, 0xE1,
++		0x84, 0x87, 0xE1, 0x84, 0x89, 0xE1, 0x84, 0x8B,
++		0xE1, 0x84, 0x8C, 0xE1, 0x84, 0x8E, 0xE1, 0x84,
++		0x8F, 0xE1, 0x84, 0x90, 0xE1, 0x84, 0x91, 0xE1,
++		0x84, 0x92, 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xA1,
++		0xE1, 0x84, 0x82, 0xE1, 0x85, 0xA1, 0xE1, 0x84,
++		0x83, 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x85, 0xE1,
++		0x85, 0xA1, 0xE1, 0x84, 0x86, 0xE1, 0x85, 0xA1,
++		0xE1, 0x84, 0x87, 0xE1, 0x85, 0xA1, 0xE1, 0x84,
++		0x89, 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x8B, 0xE1,
++		0x85, 0xA1, 0xE1, 0x84, 0x8C, 0xE1, 0x85, 0xA1,
++		0xE1, 0x84, 0x8E, 0xE1, 0x85, 0xA1, 0xE1, 0x84,
++		0x8F, 0xE1, 0x85, 0xA1, 0xE1, 0x84, 0x90, 0xE1,
++		0x85, 0xA1, 0xE1, 0x84, 0x91, 0xE1, 0x85, 0xA1,
++		0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x84,
++		0x8E, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xB7, 0xE1,
++		0x84, 0x80, 0xE1, 0x85, 0xA9, 0xE1, 0x84, 0x8C,
++		0xE1, 0x85, 0xAE, 0xE1, 0x84, 0x8B, 0xE1, 0x85,
++		0xB4, 0xE1, 0x84, 0x8B, 0xE1, 0x85, 0xAE, 0xE4,
++		0xB8, 0x80, 0xE4, 0xBA, 0x8C, 0xE4, 0xB8, 0x89,
++		0xE5, 0x9B, 0x9B, 0xE4, 0xBA, 0x94, 0xE5, 0x85,
++		0xAD, 0xE4, 0xB8, 0x83, 0xE5, 0x85, 0xAB, 0xE4,
++		0xB9, 0x9D, 0xE5, 0x8D, 0x81, 0xE6, 0x9C, 0x88,
++		0xE7, 0x81, 0xAB, 0xE6, 0xB0, 0xB4, 0xE6, 0x9C,
++		0xA8, 0xE9, 0x87, 0x91, 0xE5, 0x9C, 0x9F, 0xE6,
++		0x97, 0xA5, 0xE6, 0xA0, 0xAA, 0xE6, 0x9C, 0x89,
++		0xE7, 0xA4, 0xBE, 0xE5, 0x90, 0x8D, 0xE7, 0x89,
++		0xB9, 0xE8, 0xB2, 0xA1, 0xE7, 0xA5, 0x9D, 0xE5,
++		0x8A, 0xB4, 0xE7, 0xA7, 0x98, 0xE7, 0x94, 0xB7,
++		0xE5, 0xA5, 0xB3, 0xE9, 0x81, 0xA9, 0xE5, 0x84,
++		0xAA, 0xE5, 0x8D, 0xB0, 0xE6, 0xB3, 0xA8, 0xE9,
++		0xA0, 0x85, 0xE4, 0xBC, 0x91, 0xE5, 0x86, 0x99,
++		0xE6, 0xAD, 0xA3, 0xE4, 0xB8, 0x8A, 0xE4, 0xB8,
++		0xAD, 0xE4, 0xB8, 0x8B, 0xE5, 0xB7, 0xA6, 0xE5,
++		0x8F, 0xB3, 0xE5, 0x8C, 0xBB, 0xE5, 0xAE, 0x97,
++		0xE5, 0xAD, 0xA6, 0xE7, 0x9B, 0xA3, 0xE4, 0xBC,
++		0x81, 0xE8, 0xB3, 0x87, 0xE5, 0x8D, 0x94, 0xE5,
++		0xA4, 0x9C, 0x33, 0x36, 0x33, 0x37, 0x33, 0x38,
++		0x33, 0x39, 0x34, 0x30, 0x34, 0x31, 0x34, 0x32,
++		0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36,
++		0x34, 0x37, 0x34, 0x38, 0x34, 0x39, 0x35, 0x30,
++		0x31, 0xE6, 0x9C, 0x88, 0x32, 0xE6, 0x9C, 0x88,
++		0x33, 0xE6, 0x9C, 0x88, 0x34, 0xE6, 0x9C, 0x88,
++		0x35, 0xE6, 0x9C, 0x88, 0x36, 0xE6, 0x9C, 0x88,
++		0x37, 0xE6, 0x9C, 0x88, 0x38, 0xE6, 0x9C, 0x88,
++		0x39, 0xE6, 0x9C, 0x88, 0x31, 0x30, 0xE6, 0x9C,
++		0x88, 0x31, 0x31, 0xE6, 0x9C, 0x88, 0x31, 0x32,
++		0xE6, 0x9C, 0x88, 0x48, 0x67, 0x65, 0x72, 0x67,
++		0x65, 0x56, 0x4C, 0x54, 0x44, 0xE3, 0x82, 0xA2,
++		0xE3, 0x82, 0xA4, 0xE3, 0x82, 0xA6, 0xE3, 0x82,
++		0xA8, 0xE3, 0x82, 0xAA, 0xE3, 0x82, 0xAB, 0xE3,
++		0x82, 0xAD, 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0xB1,
++		0xE3, 0x82, 0xB3, 0xE3, 0x82, 0xB5, 0xE3, 0x82,
++		0xB7, 0xE3, 0x82, 0xB9, 0xE3, 0x82, 0xBB, 0xE3,
++		0x82, 0xBD, 0xE3, 0x82, 0xBF, 0xE3, 0x83, 0x81,
++		0xE3, 0x83, 0x84, 0xE3, 0x83, 0x86, 0xE3, 0x83,
++		0x88, 0xE3, 0x83, 0x8A, 0xE3, 0x83, 0x8B, 0xE3,
++		0x83, 0x8C, 0xE3, 0x83, 0x8D, 0xE3, 0x83, 0x8E,
++		0xE3, 0x83, 0x8F, 0xE3, 0x83, 0x92, 0xE3, 0x83,
++		0x95, 0xE3, 0x83, 0x98, 0xE3, 0x83, 0x9B, 0xE3,
++		0x83, 0x9E, 0xE3, 0x83, 0x9F, 0xE3, 0x83, 0xA0,
++		0xE3, 0x83, 0xA1, 0xE3, 0x83, 0xA2, 0xE3, 0x83,
++		0xA4, 0xE3, 0x83, 0xA6, 0xE3, 0x83, 0xA8, 0xE3,
++		0x83, 0xA9, 0xE3, 0x83, 0xAA, 0xE3, 0x83, 0xAB,
++		0xE3, 0x83, 0xAC, 0xE3, 0x83, 0xAD, 0xE3, 0x83,
++		0xAF, 0xE3, 0x83, 0xB0, 0xE3, 0x83, 0xB1, 0xE3,
++		0x83, 0xB2, 0xE3, 0x82, 0xA2, 0xE3, 0x83, 0x8F,
++		0xE3, 0x82, 0x9A, 0xE3, 0x83, 0xBC, 0xE3, 0x83,
++		0x88, 0xE3, 0x82, 0xA2, 0xE3, 0x83, 0xAB, 0xE3,
++		0x83, 0x95, 0xE3, 0x82, 0xA1, 0xE3, 0x82, 0xA2,
++		0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x98, 0xE3, 0x82,
++		0x9A, 0xE3, 0x82, 0xA2, 0xE3, 0x82, 0xA2, 0xE3,
++		0x83, 0xBC, 0xE3, 0x83, 0xAB, 0xE3, 0x82, 0xA4,
++		0xE3, 0x83, 0x8B, 0xE3, 0x83, 0xB3, 0xE3, 0x82,
++		0xAF, 0xE3, 0x82, 0x99, 0xE3, 0x82, 0xA4, 0xE3,
++		0x83, 0xB3, 0xE3, 0x83, 0x81, 0xE3, 0x82, 0xA6,
++		0xE3, 0x82, 0xA9, 0xE3, 0x83, 0xB3, 0xE3, 0x82,
++		0xA8, 0xE3, 0x82, 0xB9, 0xE3, 0x82, 0xAF, 0xE3,
++		0x83, 0xBC, 0xE3, 0x83, 0x88, 0xE3, 0x82, 0x99,
++		0xE3, 0x82, 0xA8, 0xE3, 0x83, 0xBC, 0xE3, 0x82,
++		0xAB, 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xAA, 0xE3,
++		0x83, 0xB3, 0xE3, 0x82, 0xB9, 0xE3, 0x82, 0xAA,
++		0xE3, 0x83, 0xBC, 0xE3, 0x83, 0xA0, 0xE3, 0x82,
++		0xAB, 0xE3, 0x82, 0xA4, 0xE3, 0x83, 0xAA, 0xE3,
++		0x82, 0xAB, 0xE3, 0x83, 0xA9, 0xE3, 0x83, 0x83,
++		0xE3, 0x83, 0x88, 0xE3, 0x82, 0xAB, 0xE3, 0x83,
++		0xAD, 0xE3, 0x83, 0xAA, 0xE3, 0x83, 0xBC, 0xE3,
++		0x82, 0xAB, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xAD,
++		0xE3, 0x83, 0xB3, 0xE3, 0x82, 0xAB, 0xE3, 0x82,
++		0x99, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x9E, 0xE3,
++		0x82, 0xAD, 0xE3, 0x82, 0x99, 0xE3, 0x82, 0xAB,
++		0xE3, 0x82, 0x99, 0xE3, 0x82, 0xAD, 0xE3, 0x82,
++		0x99, 0xE3, 0x83, 0x8B, 0xE3, 0x83, 0xBC, 0xE3,
++		0x82, 0xAD, 0xE3, 0x83, 0xA5, 0xE3, 0x83, 0xAA,
++		0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xAD, 0xE3, 0x82,
++		0x99, 0xE3, 0x83, 0xAB, 0xE3, 0x82, 0xBF, 0xE3,
++		0x82, 0x99, 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xAD,
++		0xE3, 0x83, 0xAD, 0xE3, 0x82, 0xAD, 0xE3, 0x83,
++		0xAD, 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0x99, 0xE3,
++		0x83, 0xA9, 0xE3, 0x83, 0xA0, 0xE3, 0x82, 0xAD,
++		0xE3, 0x83, 0xAD, 0xE3, 0x83, 0xA1, 0xE3, 0x83,
++		0xBC, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0xAB, 0xE3,
++		0x82, 0xAD, 0xE3, 0x83, 0xAD, 0xE3, 0x83, 0xAF,
++		0xE3, 0x83, 0x83, 0xE3, 0x83, 0x88, 0xE3, 0x82,
++		0xAF, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xA9, 0xE3,
++		0x83, 0xA0, 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0x99,
++		0xE3, 0x83, 0xA9, 0xE3, 0x83, 0xA0, 0xE3, 0x83,
++		0x88, 0xE3, 0x83, 0xB3, 0xE3, 0x82, 0xAF, 0xE3,
++		0x83, 0xAB, 0xE3, 0x82, 0xBB, 0xE3, 0x82, 0x99,
++		0xE3, 0x82, 0xA4, 0xE3, 0x83, 0xAD, 0xE3, 0x82,
++		0xAF, 0xE3, 0x83, 0xAD, 0xE3, 0x83, 0xBC, 0xE3,
++		0x83, 0x8D, 0xE3, 0x82, 0xB1, 0xE3, 0x83, 0xBC,
++		0xE3, 0x82, 0xB9, 0xE3, 0x82, 0xB3, 0xE3, 0x83,
++		0xAB, 0xE3, 0x83, 0x8A, 0xE3, 0x82, 0xB3, 0xE3,
++		0x83, 0xBC, 0xE3, 0x83, 0x9B, 0xE3, 0x82, 0x9A,
++		0xE3, 0x82, 0xB5, 0xE3, 0x82, 0xA4, 0xE3, 0x82,
++		0xAF, 0xE3, 0x83, 0xAB, 0xE3, 0x82, 0xB5, 0xE3,
++		0x83, 0xB3, 0xE3, 0x83, 0x81, 0xE3, 0x83, 0xBC,
++		0xE3, 0x83, 0xA0, 0xE3, 0x82, 0xB7, 0xE3, 0x83,
++		0xAA, 0xE3, 0x83, 0xB3, 0xE3, 0x82, 0xAF, 0xE3,
++		0x82, 0x99, 0xE3, 0x82, 0xBB, 0xE3, 0x83, 0xB3,
++		0xE3, 0x83, 0x81, 0xE3, 0x82, 0xBB, 0xE3, 0x83,
++		0xB3, 0xE3, 0x83, 0x88, 0xE3, 0x82, 0xBF, 0xE3,
++		0x82, 0x99, 0xE3, 0x83, 0xBC, 0xE3, 0x82, 0xB9,
++		0xE3, 0x83, 0x86, 0xE3, 0x82, 0x99, 0xE3, 0x82,
++		0xB7, 0xE3, 0x83, 0x88, 0xE3, 0x82, 0x99, 0xE3,
++		0x83, 0xAB, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0xB3,
++		0xE3, 0x83, 0x8A, 0xE3, 0x83, 0x8E, 0xE3, 0x83,
++		0x8E, 0xE3, 0x83, 0x83, 0xE3, 0x83, 0x88, 0xE3,
++		0x83, 0x8F, 0xE3, 0x82, 0xA4, 0xE3, 0x83, 0x84,
++		0xE3, 0x83, 0x8F, 0xE3, 0x82, 0x9A, 0xE3, 0x83,
++		0xBC, 0xE3, 0x82, 0xBB, 0xE3, 0x83, 0xB3, 0xE3,
++		0x83, 0x88, 0xE3, 0x83, 0x8F, 0xE3, 0x82, 0x9A,
++		0xE3, 0x83, 0xBC, 0xE3, 0x83, 0x84, 0xE3, 0x83,
++		0x8F, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xBC, 0xE3,
++		0x83, 0xAC, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x92,
++		0xE3, 0x82, 0x9A, 0xE3, 0x82, 0xA2, 0xE3, 0x82,
++		0xB9, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0xAB, 0xE3,
++		0x83, 0x92, 0xE3, 0x82, 0x9A, 0xE3, 0x82, 0xAF,
++		0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x92, 0xE3, 0x82,
++		0x9A, 0xE3, 0x82, 0xB3, 0xE3, 0x83, 0x92, 0xE3,
++		0x82, 0x99, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x95,
++		0xE3, 0x82, 0xA1, 0xE3, 0x83, 0xA9, 0xE3, 0x83,
++		0x83, 0xE3, 0x83, 0x88, 0xE3, 0x82, 0x99, 0xE3,
++		0x83, 0x95, 0xE3, 0x82, 0xA3, 0xE3, 0x83, 0xBC,
++		0xE3, 0x83, 0x88, 0xE3, 0x83, 0x95, 0xE3, 0x82,
++		0x99, 0xE3, 0x83, 0x83, 0xE3, 0x82, 0xB7, 0xE3,
++		0x82, 0xA7, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x95,
++		0xE3, 0x83, 0xA9, 0xE3, 0x83, 0xB3, 0xE3, 0x83,
++		0x98, 0xE3, 0x82, 0xAF, 0xE3, 0x82, 0xBF, 0xE3,
++		0x83, 0xBC, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x98,
++		0xE3, 0x82, 0x9A, 0xE3, 0x82, 0xBD, 0xE3, 0x83,
++		0x98, 0xE3, 0x82, 0x9A, 0xE3, 0x83, 0x8B, 0xE3,
++		0x83, 0x92, 0xE3, 0x83, 0x98, 0xE3, 0x83, 0xAB,
++		0xE3, 0x83, 0x84, 0xE3, 0x83, 0x98, 0xE3, 0x82,
++		0x9A, 0xE3, 0x83, 0xB3, 0xE3, 0x82, 0xB9, 0xE3,
++		0x83, 0x98, 0xE3, 0x82, 0x9A, 0xE3, 0x83, 0xBC,
++		0xE3, 0x82, 0xB7, 0xE3, 0x82, 0x99, 0xE3, 0x83,
++		0x98, 0xE3, 0x82, 0x99, 0xE3, 0x83, 0xBC, 0xE3,
++		0x82, 0xBF, 0xE3, 0x83, 0x9B, 0xE3, 0x82, 0x9A,
++		0xE3, 0x82, 0xA4, 0xE3, 0x83, 0xB3, 0xE3, 0x83,
++		0x88, 0xE3, 0x83, 0x9B, 0xE3, 0x82, 0x99, 0xE3,
++		0x83, 0xAB, 0xE3, 0x83, 0x88, 0xE3, 0x83, 0x9B,
++		0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x9B, 0xE3, 0x82,
++		0x9A, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x88, 0xE3,
++		0x82, 0x99, 0xE3, 0x83, 0x9B, 0xE3, 0x83, 0xBC,
++		0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x9B, 0xE3, 0x83,
++		0xBC, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0x9E, 0xE3,
++		0x82, 0xA4, 0xE3, 0x82, 0xAF, 0xE3, 0x83, 0xAD,
++		0xE3, 0x83, 0x9E, 0xE3, 0x82, 0xA4, 0xE3, 0x83,
++		0xAB, 0xE3, 0x83, 0x9E, 0xE3, 0x83, 0x83, 0xE3,
++		0x83, 0x8F, 0xE3, 0x83, 0x9E, 0xE3, 0x83, 0xAB,
++		0xE3, 0x82, 0xAF, 0xE3, 0x83, 0x9E, 0xE3, 0x83,
++		0xB3, 0xE3, 0x82, 0xB7, 0xE3, 0x83, 0xA7, 0xE3,
++		0x83, 0xB3, 0xE3, 0x83, 0x9F, 0xE3, 0x82, 0xAF,
++		0xE3, 0x83, 0xAD, 0xE3, 0x83, 0xB3, 0xE3, 0x83,
++		0x9F, 0xE3, 0x83, 0xAA, 0xE3, 0x83, 0x9F, 0xE3,
++		0x83, 0xAA, 0xE3, 0x83, 0x8F, 0xE3, 0x82, 0x99,
++		0xE3, 0x83, 0xBC, 0xE3, 0x83, 0xAB, 0xE3, 0x83,
++		0xA1, 0xE3, 0x82, 0xAB, 0xE3, 0x82, 0x99, 0xE3,
++		0x83, 0xA1, 0xE3, 0x82, 0xAB, 0xE3, 0x82, 0x99,
++		0xE3, 0x83, 0x88, 0xE3, 0x83, 0xB3, 0xE3, 0x83,
++		0xA1, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0x88, 0xE3,
++		0x83, 0xAB, 0xE3, 0x83, 0xA4, 0xE3, 0x83, 0xBC,
++		0xE3, 0x83, 0x88, 0xE3, 0x82, 0x99, 0xE3, 0x83,
++		0xA4, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0xAB, 0xE3,
++		0x83, 0xA6, 0xE3, 0x82, 0xA2, 0xE3, 0x83, 0xB3,
++		0xE3, 0x83, 0xAA, 0xE3, 0x83, 0x83, 0xE3, 0x83,
++		0x88, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0xAA, 0xE3,
++		0x83, 0xA9, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0x92,
++		0xE3, 0x82, 0x9A, 0xE3, 0x83, 0xBC, 0xE3, 0x83,
++		0xAB, 0xE3, 0x83, 0xBC, 0xE3, 0x83, 0x95, 0xE3,
++		0x82, 0x99, 0xE3, 0x83, 0xAB, 0xE3, 0x83, 0xAC,
++		0xE3, 0x83, 0xA0, 0xE3, 0x83, 0xAC, 0xE3, 0x83,
++		0xB3, 0xE3, 0x83, 0x88, 0xE3, 0x82, 0xB1, 0xE3,
++		0x82, 0x99, 0xE3, 0x83, 0xB3, 0xE3, 0x83, 0xAF,
++		0xE3, 0x83, 0x83, 0xE3, 0x83, 0x88, 0x30, 0xE7,
++		0x82, 0xB9, 0x31, 0xE7, 0x82, 0xB9, 0x32, 0xE7,
++		0x82, 0xB9, 0x33, 0xE7, 0x82, 0xB9, 0x34, 0xE7,
++		0x82, 0xB9, 0x35, 0xE7, 0x82, 0xB9, 0x36, 0xE7,
++		0x82, 0xB9, 0x37, 0xE7, 0x82, 0xB9, 0x38, 0xE7,
++		0x82, 0xB9, 0x39, 0xE7, 0x82, 0xB9, 0x31, 0x30,
++		0xE7, 0x82, 0xB9, 0x31, 0x31, 0xE7, 0x82, 0xB9,
++		0x31, 0x32, 0xE7, 0x82, 0xB9, 0x31, 0x33, 0xE7,
++		0x82, 0xB9, 0x31, 0x34, 0xE7, 0x82, 0xB9, 0x31,
++		0x35, 0xE7, 0x82, 0xB9, 0x31, 0x36, 0xE7, 0x82,
++		0xB9, 0x31, 0x37, 0xE7, 0x82, 0xB9, 0x31, 0x38,
++		0xE7, 0x82, 0xB9, 0x31, 0x39, 0xE7, 0x82, 0xB9,
++		0x32, 0x30, 0xE7, 0x82, 0xB9, 0x32, 0x31, 0xE7,
++		0x82, 0xB9, 0x32, 0x32, 0xE7, 0x82, 0xB9, 0x32,
++		0x33, 0xE7, 0x82, 0xB9, 0x32, 0x34, 0xE7, 0x82,
++		0xB9, 0x68, 0x50, 0x61, 0x64, 0x61, 0x41, 0x55,
++		0x62, 0x61, 0x72, 0x6F, 0x56, 0x70, 0x63, 0x64,
++		0x6D, 0x64, 0x6D, 0x32, 0x64, 0x6D, 0x33, 0x49,
++		0x55, 0xE5, 0xB9, 0xB3, 0xE6, 0x88, 0x90, 0xE6,
++		0x98, 0xAD, 0xE5, 0x92, 0x8C, 0xE5, 0xA4, 0xA7,
++		0xE6, 0xAD, 0xA3, 0xE6, 0x98, 0x8E, 0xE6, 0xB2,
++		0xBB, 0xE6, 0xA0, 0xAA, 0xE5, 0xBC, 0x8F, 0xE4,
++		0xBC, 0x9A, 0xE7, 0xA4, 0xBE, 0x70, 0x41, 0x6E,
++		0x41, 0xCE, 0xBC, 0x41, 0x6D, 0x41, 0x6B, 0x41,
++		0x4B, 0x42, 0x4D, 0x42, 0x47, 0x42, 0x63, 0x61,
++		0x6C, 0x6B, 0x63, 0x61, 0x6C, 0x70, 0x46, 0x6E,
++		0x46, 0xCE, 0xBC, 0x46, 0xCE, 0xBC, 0x67, 0x6D,
++		0x67, 0x6B, 0x67, 0x48, 0x7A, 0x6B, 0x48, 0x7A,
++		0x4D, 0x48, 0x7A, 0x47, 0x48, 0x7A, 0x54, 0x48,
++		0x7A, 0xCE, 0xBC, 0x6C, 0x6D, 0x6C, 0x64, 0x6C,
++		0x6B, 0x6C, 0x66, 0x6D, 0x6E, 0x6D, 0xCE, 0xBC,
++		0x6D, 0x6D, 0x6D, 0x63, 0x6D, 0x6B, 0x6D, 0x6D,
++		0x6D, 0x32, 0x63, 0x6D, 0x32, 0x6D, 0x32, 0x6B,
++		0x6D, 0x32, 0x6D, 0x6D, 0x33, 0x63, 0x6D, 0x33,
++		0x6D, 0x33, 0x6B, 0x6D, 0x33, 0x6D, 0xE2, 0x88,
++		0x95, 0x73, 0x6D, 0xE2, 0x88, 0x95, 0x73, 0x32,
++		0x50, 0x61, 0x6B, 0x50, 0x61, 0x4D, 0x50, 0x61,
++		0x47, 0x50, 0x61, 0x72, 0x61, 0x64, 0x72, 0x61,
++		0x64, 0xE2, 0x88, 0x95, 0x73, 0x72, 0x61, 0x64,
++		0xE2, 0x88, 0x95, 0x73, 0x32, 0x70, 0x73, 0x6E,
++		0x73, 0xCE, 0xBC, 0x73, 0x6D, 0x73, 0x70, 0x56,
++		0x6E, 0x56, 0xCE, 0xBC, 0x56, 0x6D, 0x56, 0x6B,
++		0x56, 0x4D, 0x56, 0x70, 0x57, 0x6E, 0x57, 0xCE,
++		0xBC, 0x57, 0x6D, 0x57, 0x6B, 0x57, 0x4D, 0x57,
++		0x6B, 0xCE, 0xA9, 0x4D, 0xCE, 0xA9, 0x61, 0x2E,
++		0x6D, 0x2E, 0x42, 0x71, 0x63, 0x63, 0x63, 0x64,
++		0x43, 0xE2, 0x88, 0x95, 0x6B, 0x67, 0x43, 0x6F,
++		0x2E, 0x64, 0x42, 0x47, 0x79, 0x68, 0x61, 0x48,
++		0x50, 0x69, 0x6E, 0x4B, 0x4B, 0x4B, 0x4D, 0x6B,
++		0x74, 0x6C, 0x6D, 0x6C, 0x6E, 0x6C, 0x6F, 0x67,
++		0x6C, 0x78, 0x6D, 0x62, 0x6D, 0x69, 0x6C, 0x6D,
++		0x6F, 0x6C, 0x50, 0x48, 0x70, 0x2E, 0x6D, 0x2E,
++		0x50, 0x50, 0x4D, 0x50, 0x52, 0x73, 0x72, 0x53,
++		0x76, 0x57, 0x62, 0x56, 0xE2, 0x88, 0x95, 0x6D,
++		0x41, 0xE2, 0x88, 0x95, 0x6D, 0x31, 0xE6, 0x97,
++		0xA5, 0x32, 0xE6, 0x97, 0xA5, 0x33, 0xE6, 0x97,
++		0xA5, 0x34, 0xE6, 0x97, 0xA5, 0x35, 0xE6, 0x97,
++		0xA5, 0x36, 0xE6, 0x97, 0xA5, 0x37, 0xE6, 0x97,
++		0xA5, 0x38, 0xE6, 0x97, 0xA5, 0x39, 0xE6, 0x97,
++		0xA5, 0x31, 0x30, 0xE6, 0x97, 0xA5, 0x31, 0x31,
++		0xE6, 0x97, 0xA5, 0x31, 0x32, 0xE6, 0x97, 0xA5,
++		0x31, 0x33, 0xE6, 0x97, 0xA5, 0x31, 0x34, 0xE6,
++		0x97, 0xA5, 0x31, 0x35, 0xE6, 0x97, 0xA5, 0x31,
++		0x36, 0xE6, 0x97, 0xA5, 0x31, 0x37, 0xE6, 0x97,
++		0xA5, 0x31, 0x38, 0xE6, 0x97, 0xA5, 0x31, 0x39,
++		0xE6, 0x97, 0xA5, 0x32, 0x30, 0xE6, 0x97, 0xA5,
++		0x32, 0x31, 0xE6, 0x97, 0xA5, 0x32, 0x32, 0xE6,
++		0x97, 0xA5, 0x32, 0x33, 0xE6, 0x97, 0xA5, 0x32,
++		0x34, 0xE6, 0x97, 0xA5, 0x32, 0x35, 0xE6, 0x97,
++		0xA5, 0x32, 0x36, 0xE6, 0x97, 0xA5, 0x32, 0x37,
++		0xE6, 0x97, 0xA5, 0x32, 0x38, 0xE6, 0x97, 0xA5,
++		0x32, 0x39, 0xE6, 0x97, 0xA5, 0x33, 0x30, 0xE6,
++		0x97, 0xA5, 0x33, 0x31, 0xE6, 0x97, 0xA5, 0x67,
++		0x61, 0x6C, 0xF6, 0xE8, 0xB1, 0x88, 0xF6, 0xE6,
++		0x9B, 0xB4, 0xF6, 0xE8, 0xBB, 0x8A, 0xF6, 0xE8,
++		0xB3, 0x88, 0xF6, 0xE6, 0xBB, 0x91, 0xF6, 0xE4,
++		0xB8, 0xB2, 0xF6, 0xE5, 0x8F, 0xA5, 0xF6, 0xE9,
++		0xBE, 0x9C, 0xF6, 0xE9, 0xBE, 0x9C, 0xF6, 0xE5,
++		0xA5, 0x91, 0xF6, 0xE9, 0x87, 0x91, 0xF6, 0xE5,
++		0x96, 0x87, 0xF6, 0xE5, 0xA5, 0x88, 0xF6, 0xE6,
++		0x87, 0xB6, 0xF6, 0xE7, 0x99, 0xA9, 0xF6, 0xE7,
++		0xBE, 0x85, 0xF6, 0xE8, 0x98, 0xBF, 0xF6, 0xE8,
++		0x9E, 0xBA, 0xF6, 0xE8, 0xA3, 0xB8, 0xF6, 0xE9,
++		0x82, 0x8F, 0xF6, 0xE6, 0xA8, 0x82, 0xF6, 0xE6,
++		0xB4, 0x9B, 0xF6, 0xE7, 0x83, 0x99, 0xF6, 0xE7,
++		0x8F, 0x9E, 0xF6, 0xE8, 0x90, 0xBD, 0xF6, 0xE9,
++		0x85, 0xAA, 0xF6, 0xE9, 0xA7, 0xB1, 0xF6, 0xE4,
++		0xBA, 0x82, 0xF6, 0xE5, 0x8D, 0xB5, 0xF6, 0xE6,
++		0xAC, 0x84, 0xF6, 0xE7, 0x88, 0x9B, 0xF6, 0xE8,
++		0x98, 0xAD, 0xF6, 0xE9, 0xB8, 0x9E, 0xF6, 0xE5,
++		0xB5, 0x90, 0xF6, 0xE6, 0xBF, 0xAB, 0xF6, 0xE8,
++		0x97, 0x8D, 0xF6, 0xE8, 0xA5, 0xA4, 0xF6, 0xE6,
++		0x8B, 0x89, 0xF6, 0xE8, 0x87, 0x98, 0xF6, 0xE8,
++		0xA0, 0x9F, 0xF6, 0xE5, 0xBB, 0x8A, 0xF6, 0xE6,
++		0x9C, 0x97, 0xF6, 0xE6, 0xB5, 0xAA, 0xF6, 0xE7,
++		0x8B, 0xBC, 0xF6, 0xE9, 0x83, 0x8E, 0xF6, 0xE4,
++		0xBE, 0x86, 0xF6, 0xE5, 0x86, 0xB7, 0xF6, 0xE5,
++		0x8B, 0x9E, 0xF6, 0xE6, 0x93, 0x84, 0xF6, 0xE6,
++		0xAB, 0x93, 0xF6, 0xE7, 0x88, 0x90, 0xF6, 0xE7,
++		0x9B, 0xA7, 0xF6, 0xE8, 0x80, 0x81, 0xF6, 0xE8,
++		0x98, 0x86, 0xF6, 0xE8, 0x99, 0x9C, 0xF6, 0xE8,
++		0xB7, 0xAF, 0xF6, 0xE9, 0x9C, 0xB2, 0xF6, 0xE9,
++		0xAD, 0xAF, 0xF6, 0xE9, 0xB7, 0xBA, 0xF6, 0xE7,
++		0xA2, 0x8C, 0xF6, 0xE7, 0xA5, 0xBF, 0xF6, 0xE7,
++		0xB6, 0xA0, 0xF6, 0xE8, 0x8F, 0x89, 0xF6, 0xE9,
++		0x8C, 0x84, 0xF6, 0xE9, 0xB9, 0xBF, 0xF6, 0xE8,
++		0xAB, 0x96, 0xF6, 0xE5, 0xA3, 0x9F, 0xF6, 0xE5,
++		0xBC, 0x84, 0xF6, 0xE7, 0xB1, 0xA0, 0xF6, 0xE8,
++		0x81, 0xBE, 0xF6, 0xE7, 0x89, 0xA2, 0xF6, 0xE7,
++		0xA3, 0x8A, 0xF6, 0xE8, 0xB3, 0x82, 0xF6, 0xE9,
++		0x9B, 0xB7, 0xF6, 0xE5, 0xA3, 0x98, 0xF6, 0xE5,
++		0xB1, 0xA2, 0xF6, 0xE6, 0xA8, 0x93, 0xF6, 0xE6,
++		0xB7, 0x9A, 0xF6, 0xE6, 0xBC, 0x8F, 0xF6, 0xE7,
++		0xB4, 0xAF, 0xF6, 0xE7, 0xB8, 0xB7, 0xF6, 0xE9,
++		0x99, 0x8B, 0xF6, 0xE5, 0x8B, 0x92, 0xF6, 0xE8,
++		0x82, 0x8B, 0xF6, 0xE5, 0x87, 0x9C, 0xF6, 0xE5,
++		0x87, 0x8C, 0xF6, 0xE7, 0xA8, 0x9C, 0xF6, 0xE7,
++		0xB6, 0xBE, 0xF6, 0xE8, 0x8F, 0xB1, 0xF6, 0xE9,
++		0x99, 0xB5, 0xF6, 0xE8, 0xAE, 0x80, 0xF6, 0xE6,
++		0x8B, 0x8F, 0xF6, 0xE6, 0xA8, 0x82, 0xF6, 0xE8,
++		0xAB, 0xBE, 0xF6, 0xE4, 0xB8, 0xB9, 0xF6, 0xE5,
++		0xAF, 0xA7, 0xF6, 0xE6, 0x80, 0x92, 0xF6, 0xE7,
++		0x8E, 0x87, 0xF6, 0xE7, 0x95, 0xB0, 0xF6, 0xE5,
++		0x8C, 0x97, 0xF6, 0xE7, 0xA3, 0xBB, 0xF6, 0xE4,
++		0xBE, 0xBF, 0xF6, 0xE5, 0xBE, 0xA9, 0xF6, 0xE4,
++		0xB8, 0x8D, 0xF6, 0xE6, 0xB3, 0x8C, 0xF6, 0xE6,
++		0x95, 0xB8, 0xF6, 0xE7, 0xB4, 0xA2, 0xF6, 0xE5,
++		0x8F, 0x83, 0xF6, 0xE5, 0xA1, 0x9E, 0xF6, 0xE7,
++		0x9C, 0x81, 0xF6, 0xE8, 0x91, 0x89, 0xF6, 0xE8,
++		0xAA, 0xAA, 0xF6, 0xE6, 0xAE, 0xBA, 0xF6, 0xE8,
++		0xBE, 0xB0, 0xF6, 0xE6, 0xB2, 0x88, 0xF6, 0xE6,
++		0x8B, 0xBE, 0xF6, 0xE8, 0x8B, 0xA5, 0xF6, 0xE6,
++		0x8E, 0xA0, 0xF6, 0xE7, 0x95, 0xA5, 0xF6, 0xE4,
++		0xBA, 0xAE, 0xF6, 0xE5, 0x85, 0xA9, 0xF6, 0xE5,
++		0x87, 0x89, 0xF6, 0xE6, 0xA2, 0x81, 0xF6, 0xE7,
++		0xB3, 0xA7, 0xF6, 0xE8, 0x89, 0xAF, 0xF6, 0xE8,
++		0xAB, 0x92, 0xF6, 0xE9, 0x87, 0x8F, 0xF6, 0xE5,
++		0x8B, 0xB5, 0xF6, 0xE5, 0x91, 0x82, 0xF6, 0xE5,
++		0xA5, 0xB3, 0xF6, 0xE5, 0xBB, 0xAC, 0xF6, 0xE6,
++		0x97, 0x85, 0xF6, 0xE6, 0xBF, 0xBE, 0xF6, 0xE7,
++		0xA4, 0xAA, 0xF6, 0xE9, 0x96, 0xAD, 0xF6, 0xE9,
++		0xA9, 0xAA, 0xF6, 0xE9, 0xBA, 0x97, 0xF6, 0xE9,
++		0xBB, 0x8E, 0xF6, 0xE5, 0x8A, 0x9B, 0xF6, 0xE6,
++		0x9B, 0x86, 0xF6, 0xE6, 0xAD, 0xB7, 0xF6, 0xE8,
++		0xBD, 0xA2, 0xF6, 0xE5, 0xB9, 0xB4, 0xF6, 0xE6,
++		0x86, 0x90, 0xF6, 0xE6, 0x88, 0x80, 0xF6, 0xE6,
++		0x92, 0x9A, 0xF6, 0xE6, 0xBC, 0xA3, 0xF6, 0xE7,
++		0x85, 0x89, 0xF6, 0xE7, 0x92, 0x89, 0xF6, 0xE7,
++		0xA7, 0x8A, 0xF6, 0xE7, 0xB7, 0xB4, 0xF6, 0xE8,
++		0x81, 0xAF, 0xF6, 0xE8, 0xBC, 0xA6, 0xF6, 0xE8,
++		0x93, 0xAE, 0xF6, 0xE9, 0x80, 0xA3, 0xF6, 0xE9,
++		0x8D, 0x8A, 0xF6, 0xE5, 0x88, 0x97, 0xF6, 0xE5,
++		0x8A, 0xA3, 0xF6, 0xE5, 0x92, 0xBD, 0xF6, 0xE7,
++		0x83, 0x88, 0xF6, 0xE8, 0xA3, 0x82, 0xF6, 0xE8,
++		0xAA, 0xAA, 0xF6, 0xE5, 0xBB, 0x89, 0xF6, 0xE5,
++		0xBF, 0xB5, 0xF6, 0xE6, 0x8D, 0xBB, 0xF6, 0xE6,
++		0xAE, 0xAE, 0xF6, 0xE7, 0xB0, 0xBE, 0xF6, 0xE7,
++		0x8D, 0xB5, 0xF6, 0xE4, 0xBB, 0xA4, 0xF6, 0xE5,
++		0x9B, 0xB9, 0xF6, 0xE5, 0xAF, 0xA7, 0xF6, 0xE5,
++		0xB6, 0xBA, 0xF6, 0xE6, 0x80, 0x9C, 0xF6, 0xE7,
++		0x8E, 0xB2, 0xF6, 0xE7, 0x91, 0xA9, 0xF6, 0xE7,
++		0xBE, 0x9A, 0xF6, 0xE8, 0x81, 0x86, 0xF6, 0xE9,
++		0x88, 0xB4, 0xF6, 0xE9, 0x9B, 0xB6, 0xF6, 0xE9,
++		0x9D, 0x88, 0xF6, 0xE9, 0xA0, 0x98, 0xF6, 0xE4,
++		0xBE, 0x8B, 0xF6, 0xE7, 0xA6, 0xAE, 0xF6, 0xE9,
++		0x86, 0xB4, 0xF6, 0xE9, 0x9A, 0xB8, 0xF6, 0xE6,
++		0x83, 0xA1, 0xF6, 0xE4, 0xBA, 0x86, 0xF6, 0xE5,
++		0x83, 0x9A, 0xF6, 0xE5, 0xAF, 0xAE, 0xF6, 0xE5,
++		0xB0, 0xBF, 0xF6, 0xE6, 0x96, 0x99, 0xF6, 0xE6,
++		0xA8, 0x82, 0xF6, 0xE7, 0x87, 0x8E, 0xF6, 0xE7,
++		0x99, 0x82, 0xF6, 0xE8, 0x93, 0xBC, 0xF6, 0xE9,
++		0x81, 0xBC, 0xF6, 0xE9, 0xBE, 0x8D, 0xF6, 0xE6,
++		0x9A, 0x88, 0xF6, 0xE9, 0x98, 0xAE, 0xF6, 0xE5,
++		0x8A, 0x89, 0xF6, 0xE6, 0x9D, 0xBB, 0xF6, 0xE6,
++		0x9F, 0xB3, 0xF6, 0xE6, 0xB5, 0x81, 0xF6, 0xE6,
++		0xBA, 0x9C, 0xF6, 0xE7, 0x90, 0x89, 0xF6, 0xE7,
++		0x95, 0x99, 0xF6, 0xE7, 0xA1, 0xAB, 0xF6, 0xE7,
++		0xB4, 0x90, 0xF6, 0xE9, 0xA1, 0x9E, 0xF6, 0xE5,
++		0x85, 0xAD, 0xF6, 0xE6, 0x88, 0xAE, 0xF6, 0xE9,
++		0x99, 0xB8, 0xF6, 0xE5, 0x80, 0xAB, 0xF6, 0xE5,
++		0xB4, 0x99, 0xF6, 0xE6, 0xB7, 0xAA, 0xF6, 0xE8,
++		0xBC, 0xAA, 0xF6, 0xE5, 0xBE, 0x8B, 0xF6, 0xE6,
++		0x85, 0x84, 0xF6, 0xE6, 0xA0, 0x97, 0xF6, 0xE7,
++		0x8E, 0x87, 0xF6, 0xE9, 0x9A, 0x86, 0xF6, 0xE5,
++		0x88, 0xA9, 0xF6, 0xE5, 0x90, 0x8F, 0xF6, 0xE5,
++		0xB1, 0xA5, 0xF6, 0xE6, 0x98, 0x93, 0xF6, 0xE6,
++		0x9D, 0x8E, 0xF6, 0xE6, 0xA2, 0xA8, 0xF6, 0xE6,
++		0xB3, 0xA5, 0xF6, 0xE7, 0x90, 0x86, 0xF6, 0xE7,
++		0x97, 0xA2, 0xF6, 0xE7, 0xBD, 0xB9, 0xF6, 0xE8,
++		0xA3, 0x8F, 0xF6, 0xE8, 0xA3, 0xA1, 0xF6, 0xE9,
++		0x87, 0x8C, 0xF6, 0xE9, 0x9B, 0xA2, 0xF6, 0xE5,
++		0x8C, 0xBF, 0xF6, 0xE6, 0xBA, 0xBA, 0xF6, 0xE5,
++		0x90, 0x9D, 0xF6, 0xE7, 0x87, 0x90, 0xF6, 0xE7,
++		0x92, 0x98, 0xF6, 0xE8, 0x97, 0xBA, 0xF6, 0xE9,
++		0x9A, 0xA3, 0xF6, 0xE9, 0xB1, 0x97, 0xF6, 0xE9,
++		0xBA, 0x9F, 0xF6, 0xE6, 0x9E, 0x97, 0xF6, 0xE6,
++		0xB7, 0x8B, 0xF6, 0xE8, 0x87, 0xA8, 0xF6, 0xE7,
++		0xAB, 0x8B, 0xF6, 0xE7, 0xAC, 0xA0, 0xF6, 0xE7,
++		0xB2, 0x92, 0xF6, 0xE7, 0x8B, 0x80, 0xF6, 0xE7,
++		0x82, 0x99, 0xF6, 0xE8, 0xAD, 0x98, 0xF6, 0xE4,
++		0xBB, 0x80, 0xF6, 0xE8, 0x8C, 0xB6, 0xF6, 0xE5,
++		0x88, 0xBA, 0xF6, 0xE5, 0x88, 0x87, 0xF6, 0xE5,
++		0xBA, 0xA6, 0xF6, 0xE6, 0x8B, 0x93, 0xF6, 0xE7,
++		0xB3, 0x96, 0xF6, 0xE5, 0xAE, 0x85, 0xF6, 0xE6,
++		0xB4, 0x9E, 0xF6, 0xE6, 0x9A, 0xB4, 0xF6, 0xE8,
++		0xBC, 0xBB, 0xF6, 0xE8, 0xA1, 0x8C, 0xF6, 0xE9,
++		0x99, 0x8D, 0xF6, 0xE8, 0xA6, 0x8B, 0xF6, 0xE5,
++		0xBB, 0x93, 0xF6, 0xE5, 0x85, 0x80, 0xF6, 0xE5,
++		0x97, 0x80, 0xF6, 0xE5, 0xA1, 0x9A, 0xF6, 0xE6,
++		0x99, 0xB4, 0xF6, 0xE5, 0x87, 0x9E, 0xF6, 0xE7,
++		0x8C, 0xAA, 0xF6, 0xE7, 0x9B, 0x8A, 0xF6, 0xE7,
++		0xA4, 0xBC, 0xF6, 0xE7, 0xA5, 0x9E, 0xF6, 0xE7,
++		0xA5, 0xA5, 0xF6, 0xE7, 0xA6, 0x8F, 0xF6, 0xE9,
++		0x9D, 0x96, 0xF6, 0xE7, 0xB2, 0xBE, 0xF6, 0xE7,
++		0xBE, 0xBD, 0xF6, 0xE8, 0x98, 0x92, 0xF6, 0xE8,
++		0xAB, 0xB8, 0xF6, 0xE9, 0x80, 0xB8, 0xF6, 0xE9,
++		0x83, 0xBD, 0xF6, 0xE9, 0xA3, 0xAF, 0xF6, 0xE9,
++		0xA3, 0xBC, 0xF6, 0xE9, 0xA4, 0xA8, 0xF6, 0xE9,
++		0xB6, 0xB4, 0xF6, 0xE4, 0xBE, 0xAE, 0xF6, 0xE5,
++		0x83, 0xA7, 0xF6, 0xE5, 0x85, 0x8D, 0xF6, 0xE5,
++		0x8B, 0x89, 0xF6, 0xE5, 0x8B, 0xA4, 0xF6, 0xE5,
++		0x8D, 0x91, 0xF6, 0xE5, 0x96, 0x9D, 0xF6, 0xE5,
++		0x98, 0x86, 0xF6, 0xE5, 0x99, 0xA8, 0xF6, 0xE5,
++		0xA1, 0x80, 0xF6, 0xE5, 0xA2, 0xA8, 0xF6, 0xE5,
++		0xB1, 0xA4, 0xF6, 0xE5, 0xB1, 0xAE, 0xF6, 0xE6,
++		0x82, 0x94, 0xF6, 0xE6, 0x85, 0xA8, 0xF6, 0xE6,
++		0x86, 0x8E, 0xF6, 0xE6, 0x87, 0xB2, 0xF6, 0xE6,
++		0x95, 0x8F, 0xF6, 0xE6, 0x97, 0xA2, 0xF6, 0xE6,
++		0x9A, 0x91, 0xF6, 0xE6, 0xA2, 0x85, 0xF6, 0xE6,
++		0xB5, 0xB7, 0xF6, 0xE6, 0xB8, 0x9A, 0xF6, 0xE6,
++		0xBC, 0xA2, 0xF6, 0xE7, 0x85, 0xAE, 0xF6, 0xE7,
++		0x88, 0xAB, 0xF6, 0xE7, 0x90, 0xA2, 0xF6, 0xE7,
++		0xA2, 0x91, 0xF6, 0xE7, 0xA4, 0xBE, 0xF6, 0xE7,
++		0xA5, 0x89, 0xF6, 0xE7, 0xA5, 0x88, 0xF6, 0xE7,
++		0xA5, 0x90, 0xF6, 0xE7, 0xA5, 0x96, 0xF6, 0xE7,
++		0xA5, 0x9D, 0xF6, 0xE7, 0xA6, 0x8D, 0xF6, 0xE7,
++		0xA6, 0x8E, 0xF6, 0xE7, 0xA9, 0x80, 0xF6, 0xE7,
++		0xAA, 0x81, 0xF6, 0xE7, 0xAF, 0x80, 0xF6, 0xE7,
++		0xB7, 0xB4, 0xF6, 0xE7, 0xB8, 0x89, 0xF6, 0xE7,
++		0xB9, 0x81, 0xF6, 0xE7, 0xBD, 0xB2, 0xF6, 0xE8,
++		0x80, 0x85, 0xF6, 0xE8, 0x87, 0xAD, 0xF6, 0xE8,
++		0x89, 0xB9, 0xF6, 0xE8, 0x89, 0xB9, 0xF6, 0xE8,
++		0x91, 0x97, 0xF6, 0xE8, 0xA4, 0x90, 0xF6, 0xE8,
++		0xA6, 0x96, 0xF6, 0xE8, 0xAC, 0x81, 0xF6, 0xE8,
++		0xAC, 0xB9, 0xF6, 0xE8, 0xB3, 0x93, 0xF6, 0xE8,
++		0xB4, 0x88, 0xF6, 0xE8, 0xBE, 0xB6, 0xF6, 0xE9,
++		0x80, 0xB8, 0xF6, 0xE9, 0x9B, 0xA3, 0xF6, 0xE9,
++		0x9F, 0xBF, 0xF6, 0xE9, 0xA0, 0xBB, 0xF6, 0xE4,
++		0xB8, 0xA6, 0xF6, 0xE5, 0x86, 0xB5, 0xF6, 0xE5,
++		0x85, 0xA8, 0xF6, 0xE4, 0xBE, 0x80, 0xF6, 0xE5,
++		0x85, 0x85, 0xF6, 0xE5, 0x86, 0x80, 0xF6, 0xE5,
++		0x8B, 0x87, 0xF6, 0xE5, 0x8B, 0xBA, 0xF6, 0xE5,
++		0x96, 0x9D, 0xF6, 0xE5, 0x95, 0x95, 0xF6, 0xE5,
++		0x96, 0x99, 0xF6, 0xE5, 0x97, 0xA2, 0xF6, 0xE5,
++		0xA1, 0x9A, 0xF6, 0xE5, 0xA2, 0xB3, 0xF6, 0xE5,
++		0xA5, 0x84, 0xF6, 0xE5, 0xA5, 0x94, 0xF6, 0xE5,
++		0xA9, 0xA2, 0xF6, 0xE5, 0xAC, 0xA8, 0xF6, 0xE5,
++		0xBB, 0x92, 0xF6, 0xE5, 0xBB, 0x99, 0xF6, 0xE5,
++		0xBD, 0xA9, 0xF6, 0xE5, 0xBE, 0xAD, 0xF6, 0xE6,
++		0x83, 0x98, 0xF6, 0xE6, 0x85, 0x8E, 0xF6, 0xE6,
++		0x84, 0x88, 0xF6, 0xE6, 0x86, 0x8E, 0xF6, 0xE6,
++		0x85, 0xA0, 0xF6, 0xE6, 0x87, 0xB2, 0xF6, 0xE6,
++		0x88, 0xB4, 0xF6, 0xE6, 0x8F, 0x84, 0xF6, 0xE6,
++		0x90, 0x9C, 0xF6, 0xE6, 0x91, 0x92, 0xF6, 0xE6,
++		0x95, 0x96, 0xF6, 0xE6, 0x99, 0xB4, 0xF6, 0xE6,
++		0x9C, 0x97, 0xF6, 0xE6, 0x9C, 0x9B, 0xF6, 0xE6,
++		0x9D, 0x96, 0xF6, 0xE6, 0xAD, 0xB9, 0xF6, 0xE6,
++		0xAE, 0xBA, 0xF6, 0xE6, 0xB5, 0x81, 0xF6, 0xE6,
++		0xBB, 0x9B, 0xF6, 0xE6, 0xBB, 0x8B, 0xF6, 0xE6,
++		0xBC, 0xA2, 0xF6, 0xE7, 0x80, 0x9E, 0xF6, 0xE7,
++		0x85, 0xAE, 0xF6, 0xE7, 0x9E, 0xA7, 0xF6, 0xE7,
++		0x88, 0xB5, 0xF6, 0xE7, 0x8A, 0xAF, 0xF6, 0xE7,
++		0x8C, 0xAA, 0xF6, 0xE7, 0x91, 0xB1, 0xF6, 0xE7,
++		0x94, 0x86, 0xF6, 0xE7, 0x94, 0xBB, 0xF6, 0xE7,
++		0x98, 0x9D, 0xF6, 0xE7, 0x98, 0x9F, 0xF6, 0xE7,
++		0x9B, 0x8A, 0xF6, 0xE7, 0x9B, 0x9B, 0xF6, 0xE7,
++		0x9B, 0xB4, 0xF6, 0xE7, 0x9D, 0x8A, 0xF6, 0xE7,
++		0x9D, 0x80, 0xF6, 0xE7, 0xA3, 0x8C, 0xF6, 0xE7,
++		0xAA, 0xB1, 0xF6, 0xE7, 0xAF, 0x80, 0xF6, 0xE7,
++		0xB1, 0xBB, 0xF6, 0xE7, 0xB5, 0x9B, 0xF6, 0xE7,
++		0xB7, 0xB4, 0xF6, 0xE7, 0xBC, 0xBE, 0xF6, 0xE8,
++		0x80, 0x85, 0xF6, 0xE8, 0x8D, 0x92, 0xF6, 0xE8,
++		0x8F, 0xAF, 0xF6, 0xE8, 0x9D, 0xB9, 0xF6, 0xE8,
++		0xA5, 0x81, 0xF6, 0xE8, 0xA6, 0x86, 0xF6, 0xE8,
++		0xA6, 0x96, 0xF6, 0xE8, 0xAA, 0xBF, 0xF6, 0xE8,
++		0xAB, 0xB8, 0xF6, 0xE8, 0xAB, 0x8B, 0xF6, 0xE8,
++		0xAC, 0x81, 0xF6, 0xE8, 0xAB, 0xBE, 0xF6, 0xE8,
++		0xAB, 0xAD, 0xF6, 0xE8, 0xAC, 0xB9, 0xF6, 0xE8,
++		0xAE, 0x8A, 0xF6, 0xE8, 0xB4, 0x88, 0xF6, 0xE8,
++		0xBC, 0xB8, 0xF6, 0xE9, 0x81, 0xB2, 0xF6, 0xE9,
++		0x86, 0x99, 0xF6, 0xE9, 0x89, 0xB6, 0xF6, 0xE9,
++		0x99, 0xBC, 0xF6, 0xE9, 0x9B, 0xA3, 0xF6, 0xE9,
++		0x9D, 0x96, 0xF6, 0xE9, 0x9F, 0x9B, 0xF6, 0xE9,
++		0x9F, 0xBF, 0xF6, 0xE9, 0xA0, 0x8B, 0xF6, 0xE9,
++		0xA0, 0xBB, 0xF6, 0xE9, 0xAC, 0x92, 0xF6, 0xE9,
++		0xBE, 0x9C, 0xF6, 0xF0, 0xA2, 0xA1, 0x8A, 0xF6,
++		0xF0, 0xA2, 0xA1, 0x84, 0xF6, 0xF0, 0xA3, 0x8F,
++		0x95, 0xF6, 0xE3, 0xAE, 0x9D, 0xF6, 0xE4, 0x80,
++		0x98, 0xF6, 0xE4, 0x80, 0xB9, 0xF6, 0xF0, 0xA5,
++		0x89, 0x89, 0xF6, 0xF0, 0xA5, 0xB3, 0x90, 0xF6,
++		0xF0, 0xA7, 0xBB, 0x93, 0xF6, 0xE9, 0xBD, 0x83,
++		0xF6, 0xE9, 0xBE, 0x8E, 0x66, 0x66, 0x66, 0x69,
++		0x66, 0x6C, 0x66, 0x66, 0x69, 0x66, 0x66, 0x6C,
++		0x73, 0x74, 0x73, 0x74, 0xD5, 0xB4, 0xD5, 0xB6,
++		0xD5, 0xB4, 0xD5, 0xA5, 0xD5, 0xB4, 0xD5, 0xAB,
++		0xD5, 0xBE, 0xD5, 0xB6, 0xD5, 0xB4, 0xD5, 0xAD,
++		0xF6, 0xD7, 0x99, 0xD6, 0xB4, 0xF6, 0xD7, 0xB2,
++		0xD6, 0xB7, 0xD7, 0xA2, 0xD7, 0x90, 0xD7, 0x93,
++		0xD7, 0x94, 0xD7, 0x9B, 0xD7, 0x9C, 0xD7, 0x9D,
++		0xD7, 0xA8, 0xD7, 0xAA, 0x2B, 0xF6, 0xD7, 0xA9,
++		0xD7, 0x81, 0xF6, 0xD7, 0xA9, 0xD7, 0x82, 0xF6,
++		0xD7, 0xA9, 0xD6, 0xBC, 0xD7, 0x81, 0xF6, 0xD7,
++		0xA9, 0xD6, 0xBC, 0xD7, 0x82, 0xF6, 0xD7, 0x90,
++		0xD6, 0xB7, 0xF6, 0xD7, 0x90, 0xD6, 0xB8, 0xF6,
++		0xD7, 0x90, 0xD6, 0xBC, 0xF6, 0xD7, 0x91, 0xD6,
++		0xBC, 0xF6, 0xD7, 0x92, 0xD6, 0xBC, 0xF6, 0xD7,
++		0x93, 0xD6, 0xBC, 0xF6, 0xD7, 0x94, 0xD6, 0xBC,
++		0xF6, 0xD7, 0x95, 0xD6, 0xBC, 0xF6, 0xD7, 0x96,
++		0xD6, 0xBC, 0xF6, 0xD7, 0x98, 0xD6, 0xBC, 0xF6,
++		0xD7, 0x99, 0xD6, 0xBC, 0xF6, 0xD7, 0x9A, 0xD6,
++		0xBC, 0xF6, 0xD7, 0x9B, 0xD6, 0xBC, 0xF6, 0xD7,
++		0x9C, 0xD6, 0xBC, 0xF6, 0xD7, 0x9E, 0xD6, 0xBC,
++		0xF6, 0xD7, 0xA0, 0xD6, 0xBC, 0xF6, 0xD7, 0xA1,
++		0xD6, 0xBC, 0xF6, 0xD7, 0xA3, 0xD6, 0xBC, 0xF6,
++		0xD7, 0xA4, 0xD6, 0xBC, 0xF6, 0xD7, 0xA6, 0xD6,
++		0xBC, 0xF6, 0xD7, 0xA7, 0xD6, 0xBC, 0xF6, 0xD7,
++		0xA8, 0xD6, 0xBC, 0xF6, 0xD7, 0xA9, 0xD6, 0xBC,
++		0xF6, 0xD7, 0xAA, 0xD6, 0xBC, 0xF6, 0xD7, 0x95,
++		0xD6, 0xB9, 0xF6, 0xD7, 0x91, 0xD6, 0xBF, 0xF6,
++		0xD7, 0x9B, 0xD6, 0xBF, 0xF6, 0xD7, 0xA4, 0xD6,
++		0xBF, 0xD7, 0x90, 0xD7, 0x9C, 0xD9, 0xB1, 0xD9,
++		0xB1, 0xD9, 0xBB, 0xD9, 0xBB, 0xD9, 0xBB, 0xD9,
++		0xBB, 0xD9, 0xBE, 0xD9, 0xBE, 0xD9, 0xBE, 0xD9,
++		0xBE, 0xDA, 0x80, 0xDA, 0x80, 0xDA, 0x80, 0xDA,
++		0x80, 0xD9, 0xBA, 0xD9, 0xBA, 0xD9, 0xBA, 0xD9,
++		0xBA, 0xD9, 0xBF, 0xD9, 0xBF, 0xD9, 0xBF, 0xD9,
++		0xBF, 0xD9, 0xB9, 0xD9, 0xB9, 0xD9, 0xB9, 0xD9,
++		0xB9, 0xDA, 0xA4, 0xDA, 0xA4, 0xDA, 0xA4, 0xDA,
++		0xA4, 0xDA, 0xA6, 0xDA, 0xA6, 0xDA, 0xA6, 0xDA,
++		0xA6, 0xDA, 0x84, 0xDA, 0x84, 0xDA, 0x84, 0xDA,
++		0x84, 0xDA, 0x83, 0xDA, 0x83, 0xDA, 0x83, 0xDA,
++		0x83, 0xDA, 0x86, 0xDA, 0x86, 0xDA, 0x86, 0xDA,
++		0x86, 0xDA, 0x87, 0xDA, 0x87, 0xDA, 0x87, 0xDA,
++		0x87, 0xDA, 0x8D, 0xDA, 0x8D, 0xDA, 0x8C, 0xDA,
++		0x8C, 0xDA, 0x8E, 0xDA, 0x8E, 0xDA, 0x88, 0xDA,
++		0x88, 0xDA, 0x98, 0xDA, 0x98, 0xDA, 0x91, 0xDA,
++		0x91, 0xDA, 0xA9, 0xDA, 0xA9, 0xDA, 0xA9, 0xDA,
++		0xA9, 0xDA, 0xAF, 0xDA, 0xAF, 0xDA, 0xAF, 0xDA,
++		0xAF, 0xDA, 0xB3, 0xDA, 0xB3, 0xDA, 0xB3, 0xDA,
++		0xB3, 0xDA, 0xB1, 0xDA, 0xB1, 0xDA, 0xB1, 0xDA,
++		0xB1, 0xDA, 0xBA, 0xDA, 0xBA, 0xDA, 0xBB, 0xDA,
++		0xBB, 0xDA, 0xBB, 0xDA, 0xBB, 0xDB, 0x95, 0xD9,
++		0x94, 0xDB, 0x95, 0xD9, 0x94, 0xDB, 0x81, 0xDB,
++		0x81, 0xDB, 0x81, 0xDB, 0x81, 0xDA, 0xBE, 0xDA,
++		0xBE, 0xDA, 0xBE, 0xDA, 0xBE, 0xDB, 0x92, 0xDB,
++		0x92, 0xDB, 0x92, 0xD9, 0x94, 0xDB, 0x92, 0xD9,
++		0x94, 0xDA, 0xAD, 0xDA, 0xAD, 0xDA, 0xAD, 0xDA,
++		0xAD, 0xDB, 0x87, 0xDB, 0x87, 0xDB, 0x86, 0xDB,
++		0x86, 0xDB, 0x88, 0xDB, 0x88, 0xDB, 0x87, 0xD9,
++		0xB4, 0xDB, 0x8B, 0xDB, 0x8B, 0xDB, 0x85, 0xDB,
++		0x85, 0xDB, 0x89, 0xDB, 0x89, 0xDB, 0x90, 0xDB,
++		0x90, 0xDB, 0x90, 0xDB, 0x90, 0xD9, 0x89, 0xD9,
++		0x89, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xA7, 0xD9,
++		0x8A, 0xD9, 0x94, 0xD8, 0xA7, 0xD9, 0x8A, 0xD9,
++		0x94, 0xDB, 0x95, 0xD9, 0x8A, 0xD9, 0x94, 0xDB,
++		0x95, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x88, 0xD9,
++		0x8A, 0xD9, 0x94, 0xD9, 0x88, 0xD9, 0x8A, 0xD9,
++		0x94, 0xDB, 0x87, 0xD9, 0x8A, 0xD9, 0x94, 0xDB,
++		0x87, 0xD9, 0x8A, 0xD9, 0x94, 0xDB, 0x86, 0xD9,
++		0x8A, 0xD9, 0x94, 0xDB, 0x86, 0xD9, 0x8A, 0xD9,
++		0x94, 0xDB, 0x88, 0xD9, 0x8A, 0xD9, 0x94, 0xDB,
++		0x88, 0xD9, 0x8A, 0xD9, 0x94, 0xDB, 0x90, 0xD9,
++		0x8A, 0xD9, 0x94, 0xDB, 0x90, 0xD9, 0x8A, 0xD9,
++		0x94, 0xDB, 0x90, 0xD9, 0x8A, 0xD9, 0x94, 0xD9,
++		0x89, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x89, 0xD9,
++		0x8A, 0xD9, 0x94, 0xD9, 0x89, 0xDB, 0x8C, 0xDB,
++		0x8C, 0xDB, 0x8C, 0xDB, 0x8C, 0xD9, 0x8A, 0xD9,
++		0x94, 0xD8, 0xAC, 0xD9, 0x8A, 0xD9, 0x94, 0xD8,
++		0xAD, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x85, 0xD9,
++		0x8A, 0xD9, 0x94, 0xD9, 0x89, 0xD9, 0x8A, 0xD9,
++		0x94, 0xD9, 0x8A, 0xD8, 0xA8, 0xD8, 0xAC, 0xD8,
++		0xA8, 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xAE, 0xD8,
++		0xA8, 0xD9, 0x85, 0xD8, 0xA8, 0xD9, 0x89, 0xD8,
++		0xA8, 0xD9, 0x8A, 0xD8, 0xAA, 0xD8, 0xAC, 0xD8,
++		0xAA, 0xD8, 0xAD, 0xD8, 0xAA, 0xD8, 0xAE, 0xD8,
++		0xAA, 0xD9, 0x85, 0xD8, 0xAA, 0xD9, 0x89, 0xD8,
++		0xAA, 0xD9, 0x8A, 0xD8, 0xAB, 0xD8, 0xAC, 0xD8,
++		0xAB, 0xD9, 0x85, 0xD8, 0xAB, 0xD9, 0x89, 0xD8,
++		0xAB, 0xD9, 0x8A, 0xD8, 0xAC, 0xD8, 0xAD, 0xD8,
++		0xAC, 0xD9, 0x85, 0xD8, 0xAD, 0xD8, 0xAC, 0xD8,
++		0xAD, 0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xAC, 0xD8,
++		0xAE, 0xD8, 0xAD, 0xD8, 0xAE, 0xD9, 0x85, 0xD8,
++		0xB3, 0xD8, 0xAC, 0xD8, 0xB3, 0xD8, 0xAD, 0xD8,
++		0xB3, 0xD8, 0xAE, 0xD8, 0xB3, 0xD9, 0x85, 0xD8,
++		0xB5, 0xD8, 0xAD, 0xD8, 0xB5, 0xD9, 0x85, 0xD8,
++		0xB6, 0xD8, 0xAC, 0xD8, 0xB6, 0xD8, 0xAD, 0xD8,
++		0xB6, 0xD8, 0xAE, 0xD8, 0xB6, 0xD9, 0x85, 0xD8,
++		0xB7, 0xD8, 0xAD, 0xD8, 0xB7, 0xD9, 0x85, 0xD8,
++		0xB8, 0xD9, 0x85, 0xD8, 0xB9, 0xD8, 0xAC, 0xD8,
++		0xB9, 0xD9, 0x85, 0xD8, 0xBA, 0xD8, 0xAC, 0xD8,
++		0xBA, 0xD9, 0x85, 0xD9, 0x81, 0xD8, 0xAC, 0xD9,
++		0x81, 0xD8, 0xAD, 0xD9, 0x81, 0xD8, 0xAE, 0xD9,
++		0x81, 0xD9, 0x85, 0xD9, 0x81, 0xD9, 0x89, 0xD9,
++		0x81, 0xD9, 0x8A, 0xD9, 0x82, 0xD8, 0xAD, 0xD9,
++		0x82, 0xD9, 0x85, 0xD9, 0x82, 0xD9, 0x89, 0xD9,
++		0x82, 0xD9, 0x8A, 0xD9, 0x83, 0xD8, 0xA7, 0xD9,
++		0x83, 0xD8, 0xAC, 0xD9, 0x83, 0xD8, 0xAD, 0xD9,
++		0x83, 0xD8, 0xAE, 0xD9, 0x83, 0xD9, 0x84, 0xD9,
++		0x83, 0xD9, 0x85, 0xD9, 0x83, 0xD9, 0x89, 0xD9,
++		0x83, 0xD9, 0x8A, 0xD9, 0x84, 0xD8, 0xAC, 0xD9,
++		0x84, 0xD8, 0xAD, 0xD9, 0x84, 0xD8, 0xAE, 0xD9,
++		0x84, 0xD9, 0x85, 0xD9, 0x84, 0xD9, 0x89, 0xD9,
++		0x84, 0xD9, 0x8A, 0xD9, 0x85, 0xD8, 0xAC, 0xD9,
++		0x85, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAE, 0xD9,
++		0x85, 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x89, 0xD9,
++		0x85, 0xD9, 0x8A, 0xD9, 0x86, 0xD8, 0xAC, 0xD9,
++		0x86, 0xD8, 0xAD, 0xD9, 0x86, 0xD8, 0xAE, 0xD9,
++		0x86, 0xD9, 0x85, 0xD9, 0x86, 0xD9, 0x89, 0xD9,
++		0x86, 0xD9, 0x8A, 0xD9, 0x87, 0xD8, 0xAC, 0xD9,
++		0x87, 0xD9, 0x85, 0xD9, 0x87, 0xD9, 0x89, 0xD9,
++		0x87, 0xD9, 0x8A, 0xD9, 0x8A, 0xD8, 0xAC, 0xD9,
++		0x8A, 0xD8, 0xAD, 0xD9, 0x8A, 0xD8, 0xAE, 0xD9,
++		0x8A, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x89, 0xD9,
++		0x8A, 0xD9, 0x8A, 0xD8, 0xB0, 0xD9, 0xB0, 0xD8,
++		0xB1, 0xD9, 0xB0, 0xD9, 0x89, 0xD9, 0xB0, 0x20,
++		0xD9, 0x8C, 0xD9, 0x91, 0x20, 0xD9, 0x8D, 0xD9,
++		0x91, 0x20, 0xD9, 0x8E, 0xD9, 0x91, 0x20, 0xD9,
++		0x8F, 0xD9, 0x91, 0x20, 0xD9, 0x90, 0xD9, 0x91,
++		0x20, 0xD9, 0x91, 0xD9, 0xB0, 0xD9, 0x8A, 0xD9,
++		0x94, 0xD8, 0xB1, 0xD9, 0x8A, 0xD9, 0x94, 0xD8,
++		0xB2, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x85, 0xD9,
++		0x8A, 0xD9, 0x94, 0xD9, 0x86, 0xD9, 0x8A, 0xD9,
++		0x94, 0xD9, 0x89, 0xD9, 0x8A, 0xD9, 0x94, 0xD9,
++		0x8A, 0xD8, 0xA8, 0xD8, 0xB1, 0xD8, 0xA8, 0xD8,
++		0xB2, 0xD8, 0xA8, 0xD9, 0x85, 0xD8, 0xA8, 0xD9,
++		0x86, 0xD8, 0xA8, 0xD9, 0x89, 0xD8, 0xA8, 0xD9,
++		0x8A, 0xD8, 0xAA, 0xD8, 0xB1, 0xD8, 0xAA, 0xD8,
++		0xB2, 0xD8, 0xAA, 0xD9, 0x85, 0xD8, 0xAA, 0xD9,
++		0x86, 0xD8, 0xAA, 0xD9, 0x89, 0xD8, 0xAA, 0xD9,
++		0x8A, 0xD8, 0xAB, 0xD8, 0xB1, 0xD8, 0xAB, 0xD8,
++		0xB2, 0xD8, 0xAB, 0xD9, 0x85, 0xD8, 0xAB, 0xD9,
++		0x86, 0xD8, 0xAB, 0xD9, 0x89, 0xD8, 0xAB, 0xD9,
++		0x8A, 0xD9, 0x81, 0xD9, 0x89, 0xD9, 0x81, 0xD9,
++		0x8A, 0xD9, 0x82, 0xD9, 0x89, 0xD9, 0x82, 0xD9,
++		0x8A, 0xD9, 0x83, 0xD8, 0xA7, 0xD9, 0x83, 0xD9,
++		0x84, 0xD9, 0x83, 0xD9, 0x85, 0xD9, 0x83, 0xD9,
++		0x89, 0xD9, 0x83, 0xD9, 0x8A, 0xD9, 0x84, 0xD9,
++		0x85, 0xD9, 0x84, 0xD9, 0x89, 0xD9, 0x84, 0xD9,
++		0x8A, 0xD9, 0x85, 0xD8, 0xA7, 0xD9, 0x85, 0xD9,
++		0x85, 0xD9, 0x86, 0xD8, 0xB1, 0xD9, 0x86, 0xD8,
++		0xB2, 0xD9, 0x86, 0xD9, 0x85, 0xD9, 0x86, 0xD9,
++		0x86, 0xD9, 0x86, 0xD9, 0x89, 0xD9, 0x86, 0xD9,
++		0x8A, 0xD9, 0x89, 0xD9, 0xB0, 0xD9, 0x8A, 0xD8,
++		0xB1, 0xD9, 0x8A, 0xD8, 0xB2, 0xD9, 0x8A, 0xD9,
++		0x85, 0xD9, 0x8A, 0xD9, 0x86, 0xD9, 0x8A, 0xD9,
++		0x89, 0xD9, 0x8A, 0xD9, 0x8A, 0xD9, 0x8A, 0xD9,
++		0x94, 0xD8, 0xAC, 0xD9, 0x8A, 0xD9, 0x94, 0xD8,
++		0xAD, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xAE, 0xD9,
++		0x8A, 0xD9, 0x94, 0xD9, 0x85, 0xD9, 0x8A, 0xD9,
++		0x94, 0xD9, 0x87, 0xD8, 0xA8, 0xD8, 0xAC, 0xD8,
++		0xA8, 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xAE, 0xD8,
++		0xA8, 0xD9, 0x85, 0xD8, 0xA8, 0xD9, 0x87, 0xD8,
++		0xAA, 0xD8, 0xAC, 0xD8, 0xAA, 0xD8, 0xAD, 0xD8,
++		0xAA, 0xD8, 0xAE, 0xD8, 0xAA, 0xD9, 0x85, 0xD8,
++		0xAA, 0xD9, 0x87, 0xD8, 0xAB, 0xD9, 0x85, 0xD8,
++		0xAC, 0xD8, 0xAD, 0xD8, 0xAC, 0xD9, 0x85, 0xD8,
++		0xAD, 0xD8, 0xAC, 0xD8, 0xAD, 0xD9, 0x85, 0xD8,
++		0xAE, 0xD8, 0xAC, 0xD8, 0xAE, 0xD9, 0x85, 0xD8,
++		0xB3, 0xD8, 0xAC, 0xD8, 0xB3, 0xD8, 0xAD, 0xD8,
++		0xB3, 0xD8, 0xAE, 0xD8, 0xB3, 0xD9, 0x85, 0xD8,
++		0xB5, 0xD8, 0xAD, 0xD8, 0xB5, 0xD8, 0xAE, 0xD8,
++		0xB5, 0xD9, 0x85, 0xD8, 0xB6, 0xD8, 0xAC, 0xD8,
++		0xB6, 0xD8, 0xAD, 0xD8, 0xB6, 0xD8, 0xAE, 0xD8,
++		0xB6, 0xD9, 0x85, 0xD8, 0xB7, 0xD8, 0xAD, 0xD8,
++		0xB8, 0xD9, 0x85, 0xD8, 0xB9, 0xD8, 0xAC, 0xD8,
++		0xB9, 0xD9, 0x85, 0xD8, 0xBA, 0xD8, 0xAC, 0xD8,
++		0xBA, 0xD9, 0x85, 0xD9, 0x81, 0xD8, 0xAC, 0xD9,
++		0x81, 0xD8, 0xAD, 0xD9, 0x81, 0xD8, 0xAE, 0xD9,
++		0x81, 0xD9, 0x85, 0xD9, 0x82, 0xD8, 0xAD, 0xD9,
++		0x82, 0xD9, 0x85, 0xD9, 0x83, 0xD8, 0xAC, 0xD9,
++		0x83, 0xD8, 0xAD, 0xD9, 0x83, 0xD8, 0xAE, 0xD9,
++		0x83, 0xD9, 0x84, 0xD9, 0x83, 0xD9, 0x85, 0xD9,
++		0x84, 0xD8, 0xAC, 0xD9, 0x84, 0xD8, 0xAD, 0xD9,
++		0x84, 0xD8, 0xAE, 0xD9, 0x84, 0xD9, 0x85, 0xD9,
++		0x84, 0xD9, 0x87, 0xD9, 0x85, 0xD8, 0xAC, 0xD9,
++		0x85, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAE, 0xD9,
++		0x85, 0xD9, 0x85, 0xD9, 0x86, 0xD8, 0xAC, 0xD9,
++		0x86, 0xD8, 0xAD, 0xD9, 0x86, 0xD8, 0xAE, 0xD9,
++		0x86, 0xD9, 0x85, 0xD9, 0x86, 0xD9, 0x87, 0xD9,
++		0x87, 0xD8, 0xAC, 0xD9, 0x87, 0xD9, 0x85, 0xD9,
++		0x87, 0xD9, 0xB0, 0xD9, 0x8A, 0xD8, 0xAC, 0xD9,
++		0x8A, 0xD8, 0xAD, 0xD9, 0x8A, 0xD8, 0xAE, 0xD9,
++		0x8A, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x87, 0xD9,
++		0x8A, 0xD9, 0x94, 0xD9, 0x85, 0xD9, 0x8A, 0xD9,
++		0x94, 0xD9, 0x87, 0xD8, 0xA8, 0xD9, 0x85, 0xD8,
++		0xA8, 0xD9, 0x87, 0xD8, 0xAA, 0xD9, 0x85, 0xD8,
++		0xAA, 0xD9, 0x87, 0xD8, 0xAB, 0xD9, 0x85, 0xD8,
++		0xAB, 0xD9, 0x87, 0xD8, 0xB3, 0xD9, 0x85, 0xD8,
++		0xB3, 0xD9, 0x87, 0xD8, 0xB4, 0xD9, 0x85, 0xD8,
++		0xB4, 0xD9, 0x87, 0xD9, 0x83, 0xD9, 0x84, 0xD9,
++		0x83, 0xD9, 0x85, 0xD9, 0x84, 0xD9, 0x85, 0xD9,
++		0x86, 0xD9, 0x85, 0xD9, 0x86, 0xD9, 0x87, 0xD9,
++		0x8A, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x87, 0xD9,
++		0x80, 0xD9, 0x8E, 0xD9, 0x91, 0xD9, 0x80, 0xD9,
++		0x8F, 0xD9, 0x91, 0xD9, 0x80, 0xD9, 0x90, 0xD9,
++		0x91, 0xD8, 0xB7, 0xD9, 0x89, 0xD8, 0xB7, 0xD9,
++		0x8A, 0xD8, 0xB9, 0xD9, 0x89, 0xD8, 0xB9, 0xD9,
++		0x8A, 0xD8, 0xBA, 0xD9, 0x89, 0xD8, 0xBA, 0xD9,
++		0x8A, 0xD8, 0xB3, 0xD9, 0x89, 0xD8, 0xB3, 0xD9,
++		0x8A, 0xD8, 0xB4, 0xD9, 0x89, 0xD8, 0xB4, 0xD9,
++		0x8A, 0xD8, 0xAD, 0xD9, 0x89, 0xD8, 0xAD, 0xD9,
++		0x8A, 0xD8, 0xAC, 0xD9, 0x89, 0xD8, 0xAC, 0xD9,
++		0x8A, 0xD8, 0xAE, 0xD9, 0x89, 0xD8, 0xAE, 0xD9,
++		0x8A, 0xD8, 0xB5, 0xD9, 0x89, 0xD8, 0xB5, 0xD9,
++		0x8A, 0xD8, 0xB6, 0xD9, 0x89, 0xD8, 0xB6, 0xD9,
++		0x8A, 0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8,
++		0xAD, 0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9,
++		0x85, 0xD8, 0xB4, 0xD8, 0xB1, 0xD8, 0xB3, 0xD8,
++		0xB1, 0xD8, 0xB5, 0xD8, 0xB1, 0xD8, 0xB6, 0xD8,
++		0xB1, 0xD8, 0xB7, 0xD9, 0x89, 0xD8, 0xB7, 0xD9,
++		0x8A, 0xD8, 0xB9, 0xD9, 0x89, 0xD8, 0xB9, 0xD9,
++		0x8A, 0xD8, 0xBA, 0xD9, 0x89, 0xD8, 0xBA, 0xD9,
++		0x8A, 0xD8, 0xB3, 0xD9, 0x89, 0xD8, 0xB3, 0xD9,
++		0x8A, 0xD8, 0xB4, 0xD9, 0x89, 0xD8, 0xB4, 0xD9,
++		0x8A, 0xD8, 0xAD, 0xD9, 0x89, 0xD8, 0xAD, 0xD9,
++		0x8A, 0xD8, 0xAC, 0xD9, 0x89, 0xD8, 0xAC, 0xD9,
++		0x8A, 0xD8, 0xAE, 0xD9, 0x89, 0xD8, 0xAE, 0xD9,
++		0x8A, 0xD8, 0xB5, 0xD9, 0x89, 0xD8, 0xB5, 0xD9,
++		0x8A, 0xD8, 0xB6, 0xD9, 0x89, 0xD8, 0xB6, 0xD9,
++		0x8A, 0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8,
++		0xAD, 0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9,
++		0x85, 0xD8, 0xB4, 0xD8, 0xB1, 0xD8, 0xB3, 0xD8,
++		0xB1, 0xD8, 0xB5, 0xD8, 0xB1, 0xD8, 0xB6, 0xD8,
++		0xB1, 0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8,
++		0xAD, 0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9,
++		0x85, 0xD8, 0xB3, 0xD9, 0x87, 0xD8, 0xB4, 0xD9,
++		0x87, 0xD8, 0xB7, 0xD9, 0x85, 0xD8, 0xB3, 0xD8,
++		0xAC, 0xD8, 0xB3, 0xD8, 0xAD, 0xD8, 0xB3, 0xD8,
++		0xAE, 0xD8, 0xB4, 0xD8, 0xAC, 0xD8, 0xB4, 0xD8,
++		0xAD, 0xD8, 0xB4, 0xD8, 0xAE, 0xD8, 0xB7, 0xD9,
++		0x85, 0xD8, 0xB8, 0xD9, 0x85, 0xD8, 0xA7, 0xD9,
++		0x8B, 0xD8, 0xA7, 0xD9, 0x8B, 0xD8, 0xAA, 0xD8,
++		0xAC, 0xD9, 0x85, 0xD8, 0xAA, 0xD8, 0xAD, 0xD8,
++		0xAC, 0xD8, 0xAA, 0xD8, 0xAD, 0xD8, 0xAC, 0xD8,
++		0xAA, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAA, 0xD8,
++		0xAE, 0xD9, 0x85, 0xD8, 0xAA, 0xD9, 0x85, 0xD8,
++		0xAC, 0xD8, 0xAA, 0xD9, 0x85, 0xD8, 0xAD, 0xD8,
++		0xAA, 0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xAC, 0xD9,
++		0x85, 0xD8, 0xAD, 0xD8, 0xAC, 0xD9, 0x85, 0xD8,
++		0xAD, 0xD8, 0xAD, 0xD9, 0x85, 0xD9, 0x8A, 0xD8,
++		0xAD, 0xD9, 0x85, 0xD9, 0x89, 0xD8, 0xB3, 0xD8,
++		0xAD, 0xD8, 0xAC, 0xD8, 0xB3, 0xD8, 0xAC, 0xD8,
++		0xAD, 0xD8, 0xB3, 0xD8, 0xAC, 0xD9, 0x89, 0xD8,
++		0xB3, 0xD9, 0x85, 0xD8, 0xAD, 0xD8, 0xB3, 0xD9,
++		0x85, 0xD8, 0xAD, 0xD8, 0xB3, 0xD9, 0x85, 0xD8,
++		0xAC, 0xD8, 0xB3, 0xD9, 0x85, 0xD9, 0x85, 0xD8,
++		0xB3, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB5, 0xD8,
++		0xAD, 0xD8, 0xAD, 0xD8, 0xB5, 0xD8, 0xAD, 0xD8,
++		0xAD, 0xD8, 0xB5, 0xD9, 0x85, 0xD9, 0x85, 0xD8,
++		0xB4, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xB4, 0xD8,
++		0xAD, 0xD9, 0x85, 0xD8, 0xB4, 0xD8, 0xAC, 0xD9,
++		0x8A, 0xD8, 0xB4, 0xD9, 0x85, 0xD8, 0xAE, 0xD8,
++		0xB4, 0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xB4, 0xD9,
++		0x85, 0xD9, 0x85, 0xD8, 0xB4, 0xD9, 0x85, 0xD9,
++		0x85, 0xD8, 0xB6, 0xD8, 0xAD, 0xD9, 0x89, 0xD8,
++		0xB6, 0xD8, 0xAE, 0xD9, 0x85, 0xD8, 0xB6, 0xD8,
++		0xAE, 0xD9, 0x85, 0xD8, 0xB7, 0xD9, 0x85, 0xD8,
++		0xAD, 0xD8, 0xB7, 0xD9, 0x85, 0xD8, 0xAD, 0xD8,
++		0xB7, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB7, 0xD9,
++		0x85, 0xD9, 0x8A, 0xD8, 0xB9, 0xD8, 0xAC, 0xD9,
++		0x85, 0xD8, 0xB9, 0xD9, 0x85, 0xD9, 0x85, 0xD8,
++		0xB9, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB9, 0xD9,
++		0x85, 0xD9, 0x89, 0xD8, 0xBA, 0xD9, 0x85, 0xD9,
++		0x85, 0xD8, 0xBA, 0xD9, 0x85, 0xD9, 0x8A, 0xD8,
++		0xBA, 0xD9, 0x85, 0xD9, 0x89, 0xD9, 0x81, 0xD8,
++		0xAE, 0xD9, 0x85, 0xD9, 0x81, 0xD8, 0xAE, 0xD9,
++		0x85, 0xD9, 0x82, 0xD9, 0x85, 0xD8, 0xAD, 0xD9,
++		0x82, 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x84, 0xD8,
++		0xAD, 0xD9, 0x85, 0xD9, 0x84, 0xD8, 0xAD, 0xD9,
++		0x8A, 0xD9, 0x84, 0xD8, 0xAD, 0xD9, 0x89, 0xD9,
++		0x84, 0xD8, 0xAC, 0xD8, 0xAC, 0xD9, 0x84, 0xD8,
++		0xAC, 0xD8, 0xAC, 0xD9, 0x84, 0xD8, 0xAE, 0xD9,
++		0x85, 0xD9, 0x84, 0xD8, 0xAE, 0xD9, 0x85, 0xD9,
++		0x84, 0xD9, 0x85, 0xD8, 0xAD, 0xD9, 0x84, 0xD9,
++		0x85, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAD, 0xD8,
++		0xAC, 0xD9, 0x85, 0xD8, 0xAD, 0xD9, 0x85, 0xD9,
++		0x85, 0xD8, 0xAD, 0xD9, 0x8A, 0xD9, 0x85, 0xD8,
++		0xAC, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xAC, 0xD9,
++		0x85, 0xD9, 0x85, 0xD8, 0xAE, 0xD8, 0xAC, 0xD9,
++		0x85, 0xD8, 0xAE, 0xD9, 0x85, 0xD9, 0x85, 0xD8,
++		0xAC, 0xD8, 0xAE, 0xD9, 0x87, 0xD9, 0x85, 0xD8,
++		0xAC, 0xD9, 0x87, 0xD9, 0x85, 0xD9, 0x85, 0xD9,
++		0x86, 0xD8, 0xAD, 0xD9, 0x85, 0xD9, 0x86, 0xD8,
++		0xAD, 0xD9, 0x89, 0xD9, 0x86, 0xD8, 0xAC, 0xD9,
++		0x85, 0xD9, 0x86, 0xD8, 0xAC, 0xD9, 0x85, 0xD9,
++		0x86, 0xD8, 0xAC, 0xD9, 0x89, 0xD9, 0x86, 0xD9,
++		0x85, 0xD9, 0x8A, 0xD9, 0x86, 0xD9, 0x85, 0xD9,
++		0x89, 0xD9, 0x8A, 0xD9, 0x85, 0xD9, 0x85, 0xD9,
++		0x8A, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xA8, 0xD8,
++		0xAE, 0xD9, 0x8A, 0xD8, 0xAA, 0xD8, 0xAC, 0xD9,
++		0x8A, 0xD8, 0xAA, 0xD8, 0xAC, 0xD9, 0x89, 0xD8,
++		0xAA, 0xD8, 0xAE, 0xD9, 0x8A, 0xD8, 0xAA, 0xD8,
++		0xAE, 0xD9, 0x89, 0xD8, 0xAA, 0xD9, 0x85, 0xD9,
++		0x8A, 0xD8, 0xAA, 0xD9, 0x85, 0xD9, 0x89, 0xD8,
++		0xAC, 0xD9, 0x85, 0xD9, 0x8A, 0xD8, 0xAC, 0xD8,
++		0xAD, 0xD9, 0x89, 0xD8, 0xAC, 0xD9, 0x85, 0xD9,
++		0x89, 0xD8, 0xB3, 0xD8, 0xAE, 0xD9, 0x89, 0xD8,
++		0xB5, 0xD8, 0xAD, 0xD9, 0x8A, 0xD8, 0xB4, 0xD8,
++		0xAD, 0xD9, 0x8A, 0xD8, 0xB6, 0xD8, 0xAD, 0xD9,
++		0x8A, 0xD9, 0x84, 0xD8, 0xAC, 0xD9, 0x8A, 0xD9,
++		0x84, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x8A, 0xD8,
++		0xAD, 0xD9, 0x8A, 0xD9, 0x8A, 0xD8, 0xAC, 0xD9,
++		0x8A, 0xD9, 0x8A, 0xD9, 0x85, 0xD9, 0x8A, 0xD9,
++		0x85, 0xD9, 0x85, 0xD9, 0x8A, 0xD9, 0x82, 0xD9,
++		0x85, 0xD9, 0x8A, 0xD9, 0x86, 0xD8, 0xAD, 0xD9,
++		0x8A, 0xD9, 0x82, 0xD9, 0x85, 0xD8, 0xAD, 0xD9,
++		0x84, 0xD8, 0xAD, 0xD9, 0x85, 0xD8, 0xB9, 0xD9,
++		0x85, 0xD9, 0x8A, 0xD9, 0x83, 0xD9, 0x85, 0xD9,
++		0x8A, 0xD9, 0x86, 0xD8, 0xAC, 0xD8, 0xAD, 0xD9,
++		0x85, 0xD8, 0xAE, 0xD9, 0x8A, 0xD9, 0x84, 0xD8,
++		0xAC, 0xD9, 0x85, 0xD9, 0x83, 0xD9, 0x85, 0xD9,
++		0x85, 0xD9, 0x84, 0xD8, 0xAC, 0xD9, 0x85, 0xD9,
++		0x86, 0xD8, 0xAC, 0xD8, 0xAD, 0xD8, 0xAC, 0xD8,
++		0xAD, 0xD9, 0x8A, 0xD8, 0xAD, 0xD8, 0xAC, 0xD9,
++		0x8A, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, 0x8A, 0xD9,
++		0x81, 0xD9, 0x85, 0xD9, 0x8A, 0xD8, 0xA8, 0xD8,
++		0xAD, 0xD9, 0x8A, 0xD9, 0x83, 0xD9, 0x85, 0xD9,
++		0x85, 0xD8, 0xB9, 0xD8, 0xAC, 0xD9, 0x85, 0xD8,
++		0xB5, 0xD9, 0x85, 0xD9, 0x85, 0xD8, 0xB3, 0xD8,
++		0xAE, 0xD9, 0x8A, 0xD9, 0x86, 0xD8, 0xAC, 0xD9,
++		0x8A, 0xD8, 0xB5, 0xD9, 0x84, 0xDB, 0x92, 0xD9,
++		0x82, 0xD9, 0x84, 0xDB, 0x92, 0xD8, 0xA7, 0xD9,
++		0x84, 0xD9, 0x84, 0xD9, 0x87, 0xD8, 0xA7, 0xD9,
++		0x83, 0xD8, 0xA8, 0xD8, 0xB1, 0xD9, 0x85, 0xD8,
++		0xAD, 0xD9, 0x85, 0xD8, 0xAF, 0xD8, 0xB5, 0xD9,
++		0x84, 0xD8, 0xB9, 0xD9, 0x85, 0xD8, 0xB1, 0xD8,
++		0xB3, 0xD9, 0x88, 0xD9, 0x84, 0xD8, 0xB9, 0xD9,
++		0x84, 0xD9, 0x8A, 0xD9, 0x87, 0xD9, 0x88, 0xD8,
++		0xB3, 0xD9, 0x84, 0xD9, 0x85, 0xD8, 0xB5, 0xD9,
++		0x84, 0xD9, 0x89, 0xD8, 0xB5, 0xD9, 0x84, 0xD9,
++		0x89, 0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD9, 0x84,
++		0xD9, 0x87, 0x20, 0xD8, 0xB9, 0xD9, 0x84, 0xD9,
++		0x8A, 0xD9, 0x87, 0x20, 0xD9, 0x88, 0xD8, 0xB3,
++		0xD9, 0x84, 0xD9, 0x85, 0xD8, 0xAC, 0xD9, 0x84,
++		0x20, 0xD8, 0xAC, 0xD9, 0x84, 0xD8, 0xA7, 0xD9,
++		0x84, 0xD9, 0x87, 0xD8, 0xB1, 0xDB, 0x8C, 0xD8,
++		0xA7, 0xD9, 0x84, 0x2C, 0xE3, 0x80, 0x81, 0xE3,
++		0x80, 0x82, 0x3A, 0x3B, 0x21, 0x3F, 0xE3, 0x80,
++		0x96, 0xE3, 0x80, 0x97, 0x2E, 0x2E, 0x2E, 0x2E,
++		0x2E, 0xE2, 0x80, 0x94, 0xE2, 0x80, 0x93, 0x5F,
++		0x5F, 0x28, 0x29, 0x7B, 0x7D, 0xE3, 0x80, 0x94,
++		0xE3, 0x80, 0x95, 0xE3, 0x80, 0x90, 0xE3, 0x80,
++		0x91, 0xE3, 0x80, 0x8A, 0xE3, 0x80, 0x8B, 0xE3,
++		0x80, 0x88, 0xE3, 0x80, 0x89, 0xE3, 0x80, 0x8C,
++		0xE3, 0x80, 0x8D, 0xE3, 0x80, 0x8E, 0xE3, 0x80,
++		0x8F, 0x5B, 0x5D, 0x20, 0xCC, 0x85, 0x20, 0xCC,
++		0x85, 0x20, 0xCC, 0x85, 0x20, 0xCC, 0x85, 0x5F,
++		0x5F, 0x5F, 0x2C, 0xE3, 0x80, 0x81, 0x2E, 0x3B,
++		0x3A, 0x3F, 0x21, 0xE2, 0x80, 0x94, 0x28, 0x29,
++		0x7B, 0x7D, 0xE3, 0x80, 0x94, 0xE3, 0x80, 0x95,
++		0x23, 0x26, 0x2A, 0x2B, 0x2D, 0x3C, 0x3E, 0x3D,
++		0x5C, 0x24, 0x25, 0x40, 0x20, 0xD9, 0x8B, 0xD9,
++		0x80, 0xD9, 0x8B, 0x20, 0xD9, 0x8C, 0x20, 0xD9,
++		0x8D, 0x20, 0xD9, 0x8E, 0xD9, 0x80, 0xD9, 0x8E,
++		0x20, 0xD9, 0x8F, 0xD9, 0x80, 0xD9, 0x8F, 0x20,
++		0xD9, 0x90, 0xD9, 0x80, 0xD9, 0x90, 0x20, 0xD9,
++		0x91, 0xD9, 0x80, 0xD9, 0x91, 0x20, 0xD9, 0x92,
++		0xD9, 0x80, 0xD9, 0x92, 0xD8, 0xA1, 0xD8, 0xA7,
++		0xD9, 0x93, 0xD8, 0xA7, 0xD9, 0x93, 0xD8, 0xA7,
++		0xD9, 0x94, 0xD8, 0xA7, 0xD9, 0x94, 0xD9, 0x88,
++		0xD9, 0x94, 0xD9, 0x88, 0xD9, 0x94, 0xD8, 0xA7,
++		0xD9, 0x95, 0xD8, 0xA7, 0xD9, 0x95, 0xD9, 0x8A,
++		0xD9, 0x94, 0xD9, 0x8A, 0xD9, 0x94, 0xD9, 0x8A,
++		0xD9, 0x94, 0xD9, 0x8A, 0xD9, 0x94, 0xD8, 0xA7,
++		0xD8, 0xA7, 0xD8, 0xA8, 0xD8, 0xA8, 0xD8, 0xA8,
++		0xD8, 0xA8, 0xD8, 0xA9, 0xD8, 0xA9, 0xD8, 0xAA,
++		0xD8, 0xAA, 0xD8, 0xAA, 0xD8, 0xAA, 0xD8, 0xAB,
++		0xD8, 0xAB, 0xD8, 0xAB, 0xD8, 0xAB, 0xD8, 0xAC,
++		0xD8, 0xAC, 0xD8, 0xAC, 0xD8, 0xAC, 0xD8, 0xAD,
++		0xD8, 0xAD, 0xD8, 0xAD, 0xD8, 0xAD, 0xD8, 0xAE,
++		0xD8, 0xAE, 0xD8, 0xAE, 0xD8, 0xAE, 0xD8, 0xAF,
++		0xD8, 0xAF, 0xD8, 0xB0, 0xD8, 0xB0, 0xD8, 0xB1,
++		0xD8, 0xB1, 0xD8, 0xB2, 0xD8, 0xB2, 0xD8, 0xB3,
++		0xD8, 0xB3, 0xD8, 0xB3, 0xD8, 0xB3, 0xD8, 0xB4,
++		0xD8, 0xB4, 0xD8, 0xB4, 0xD8, 0xB4, 0xD8, 0xB5,
++		0xD8, 0xB5, 0xD8, 0xB5, 0xD8, 0xB5, 0xD8, 0xB6,
++		0xD8, 0xB6, 0xD8, 0xB6, 0xD8, 0xB6, 0xD8, 0xB7,
++		0xD8, 0xB7, 0xD8, 0xB7, 0xD8, 0xB7, 0xD8, 0xB8,
++		0xD8, 0xB8, 0xD8, 0xB8, 0xD8, 0xB8, 0xD8, 0xB9,
++		0xD8, 0xB9, 0xD8, 0xB9, 0xD8, 0xB9, 0xD8, 0xBA,
++		0xD8, 0xBA, 0xD8, 0xBA, 0xD8, 0xBA, 0xD9, 0x81,
++		0xD9, 0x81, 0xD9, 0x81, 0xD9, 0x81, 0xD9, 0x82,
++		0xD9, 0x82, 0xD9, 0x82, 0xD9, 0x82, 0xD9, 0x83,
++		0xD9, 0x83, 0xD9, 0x83, 0xD9, 0x83, 0xD9, 0x84,
++		0xD9, 0x84, 0xD9, 0x84, 0xD9, 0x84, 0xD9, 0x85,
++		0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x85, 0xD9, 0x86,
++		0xD9, 0x86, 0xD9, 0x86, 0xD9, 0x86, 0xD9, 0x87,
++		0xD9, 0x87, 0xD9, 0x87, 0xD9, 0x87, 0xD9, 0x88,
++		0xD9, 0x88, 0xD9, 0x89, 0xD9, 0x89, 0xD9, 0x8A,
++		0xD9, 0x8A, 0xD9, 0x8A, 0xD9, 0x8A, 0xD9, 0x84,
++		0xD8, 0xA7, 0xD9, 0x93, 0xD9, 0x84, 0xD8, 0xA7,
++		0xD9, 0x93, 0xD9, 0x84, 0xD8, 0xA7, 0xD9, 0x94,
++		0xD9, 0x84, 0xD8, 0xA7, 0xD9, 0x94, 0xD9, 0x84,
++		0xD8, 0xA7, 0xD9, 0x95, 0xD9, 0x84, 0xD8, 0xA7,
++		0xD9, 0x95, 0xD9, 0x84, 0xD8, 0xA7, 0xD9, 0x84,
++		0xD8, 0xA7, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26,
++		0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E,
++		0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
++		0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E,
++		0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46,
++		0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E,
++		0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
++		0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E,
++		0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
++		0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E,
++		0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
++		0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E,
++		0xE2, 0xA6, 0x85, 0xE2, 0xA6, 0x86, 0xE3, 0x80,
++		0x82, 0xE3, 0x80, 0x8C, 0xE3, 0x80, 0x8D, 0xE3,
++		0x80, 0x81, 0xE3, 0x83, 0xBB, 0xE3, 0x83, 0xB2,
++		0xE3, 0x82, 0xA1, 0xE3, 0x82, 0xA3, 0xE3, 0x82,
++		0xA5, 0xE3, 0x82, 0xA7, 0xE3, 0x82, 0xA9, 0xE3,
++		0x83, 0xA3, 0xE3, 0x83, 0xA5, 0xE3, 0x83, 0xA7,
++		0xE3, 0x83, 0x83, 0xE3, 0x83, 0xBC, 0xE3, 0x82,
++		0xA2, 0xE3, 0x82, 0xA4, 0xE3, 0x82, 0xA6, 0xE3,
++		0x82, 0xA8, 0xE3, 0x82, 0xAA, 0xE3, 0x82, 0xAB,
++		0xE3, 0x82, 0xAD, 0xE3, 0x82, 0xAF, 0xE3, 0x82,
++		0xB1, 0xE3, 0x82, 0xB3, 0xE3, 0x82, 0xB5, 0xE3,
++		0x82, 0xB7, 0xE3, 0x82, 0xB9, 0xE3, 0x82, 0xBB,
++		0xE3, 0x82, 0xBD, 0xE3, 0x82, 0xBF, 0xE3, 0x83,
++		0x81, 0xE3, 0x83, 0x84, 0xE3, 0x83, 0x86, 0xE3,
++		0x83, 0x88, 0xE3, 0x83, 0x8A, 0xE3, 0x83, 0x8B,
++		0xE3, 0x83, 0x8C, 0xE3, 0x83, 0x8D, 0xE3, 0x83,
++		0x8E, 0xE3, 0x83, 0x8F, 0xE3, 0x83, 0x92, 0xE3,
++		0x83, 0x95, 0xE3, 0x83, 0x98, 0xE3, 0x83, 0x9B,
++		0xE3, 0x83, 0x9E, 0xE3, 0x83, 0x9F, 0xE3, 0x83,
++		0xA0, 0xE3, 0x83, 0xA1, 0xE3, 0x83, 0xA2, 0xE3,
++		0x83, 0xA4, 0xE3, 0x83, 0xA6, 0xE3, 0x83, 0xA8,
++		0xE3, 0x83, 0xA9, 0xE3, 0x83, 0xAA, 0xE3, 0x83,
++		0xAB, 0xE3, 0x83, 0xAC, 0xE3, 0x83, 0xAD, 0xE3,
++		0x83, 0xAF, 0xE3, 0x83, 0xB3, 0xE3, 0x82, 0x99,
++		0xE3, 0x82, 0x9A, 0xE1, 0x85, 0xA0, 0xE1, 0x84,
++		0x80, 0xE1, 0x84, 0x81, 0xE1, 0x86, 0xAA, 0xE1,
++		0x84, 0x82, 0xE1, 0x86, 0xAC, 0xE1, 0x86, 0xAD,
++		0xE1, 0x84, 0x83, 0xE1, 0x84, 0x84, 0xE1, 0x84,
++		0x85, 0xE1, 0x86, 0xB0, 0xE1, 0x86, 0xB1, 0xE1,
++		0x86, 0xB2, 0xE1, 0x86, 0xB3, 0xE1, 0x86, 0xB4,
++		0xE1, 0x86, 0xB5, 0xE1, 0x84, 0x9A, 0xE1, 0x84,
++		0x86, 0xE1, 0x84, 0x87, 0xE1, 0x84, 0x88, 0xE1,
++		0x84, 0xA1, 0xE1, 0x84, 0x89, 0xE1, 0x84, 0x8A,
++		0xE1, 0x84, 0x8B, 0xE1, 0x84, 0x8C, 0xE1, 0x84,
++		0x8D, 0xE1, 0x84, 0x8E, 0xE1, 0x84, 0x8F, 0xE1,
++		0x84, 0x90, 0xE1, 0x84, 0x91, 0xE1, 0x84, 0x92,
++		0xE1, 0x85, 0xA1, 0xE1, 0x85, 0xA2, 0xE1, 0x85,
++		0xA3, 0xE1, 0x85, 0xA4, 0xE1, 0x85, 0xA5, 0xE1,
++		0x85, 0xA6, 0xE1, 0x85, 0xA7, 0xE1, 0x85, 0xA8,
++		0xE1, 0x85, 0xA9, 0xE1, 0x85, 0xAA, 0xE1, 0x85,
++		0xAB, 0xE1, 0x85, 0xAC, 0xE1, 0x85, 0xAD, 0xE1,
++		0x85, 0xAE, 0xE1, 0x85, 0xAF, 0xE1, 0x85, 0xB0,
++		0xE1, 0x85, 0xB1, 0xE1, 0x85, 0xB2, 0xE1, 0x85,
++		0xB3, 0xE1, 0x85, 0xB4, 0xE1, 0x85, 0xB5, 0xC2,
++		0xA2, 0xC2, 0xA3, 0xC2, 0xAC, 0x20, 0xCC, 0x84,
++		0xC2, 0xA6, 0xC2, 0xA5, 0xE2, 0x82, 0xA9, 0xE2,
++		0x94, 0x82, 0xE2, 0x86, 0x90, 0xE2, 0x86, 0x91,
++		0xE2, 0x86, 0x92, 0xE2, 0x86, 0x93, 0xE2, 0x96,
++		0xA0, 0xE2, 0x97, 0x8B, 0xF6, 0xF0, 0x9D, 0x85,
++		0x97, 0xF0, 0x9D, 0x85, 0xA5, 0xF6, 0xF0, 0x9D,
++		0x85, 0x98, 0xF0, 0x9D, 0x85, 0xA5, 0xF6, 0xF0,
++		0x9D, 0x85, 0x98, 0xF0, 0x9D, 0x85, 0xA5, 0xF0,
++		0x9D, 0x85, 0xAE, 0xF6, 0xF0, 0x9D, 0x85, 0x98,
++		0xF0, 0x9D, 0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xAF,
++		0xF6, 0xF0, 0x9D, 0x85, 0x98, 0xF0, 0x9D, 0x85,
++		0xA5, 0xF0, 0x9D, 0x85, 0xB0, 0xF6, 0xF0, 0x9D,
++		0x85, 0x98, 0xF0, 0x9D, 0x85, 0xA5, 0xF0, 0x9D,
++		0x85, 0xB1, 0xF6, 0xF0, 0x9D, 0x85, 0x98, 0xF0,
++		0x9D, 0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xB2, 0xF6,
++		0xF0, 0x9D, 0x86, 0xB9, 0xF0, 0x9D, 0x85, 0xA5,
++		0xF6, 0xF0, 0x9D, 0x86, 0xBA, 0xF0, 0x9D, 0x85,
++		0xA5, 0xF6, 0xF0, 0x9D, 0x86, 0xB9, 0xF0, 0x9D,
++		0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xAE, 0xF6, 0xF0,
++		0x9D, 0x86, 0xBA, 0xF0, 0x9D, 0x85, 0xA5, 0xF0,
++		0x9D, 0x85, 0xAE, 0xF6, 0xF0, 0x9D, 0x86, 0xB9,
++		0xF0, 0x9D, 0x85, 0xA5, 0xF0, 0x9D, 0x85, 0xAF,
++		0xF6, 0xF0, 0x9D, 0x86, 0xBA, 0xF0, 0x9D, 0x85,
++		0xA5, 0xF0, 0x9D, 0x85, 0xAF, 0x41, 0x42, 0x43,
++		0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B,
++		0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53,
++		0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61,
++		0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
++		0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
++		0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
++		0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
++		0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
++		0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
++		0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65,
++		0x66, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E,
++		0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
++		0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43, 0x44,
++		0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C,
++		0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54,
++		0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62,
++		0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A,
++		0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72,
++		0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A,
++		0x41, 0x43, 0x44, 0x47, 0x4A, 0x4B, 0x4E, 0x4F,
++		0x50, 0x51, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
++		0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x66, 0x68,
++		0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x70, 0x71,
++		0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
++		0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
++		0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
++		0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
++		0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65,
++		0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
++		0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75,
++		0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x44,
++		0x45, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E,
++		0x4F, 0x50, 0x51, 0x53, 0x54, 0x55, 0x56, 0x57,
++		0x58, 0x59, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
++		0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E,
++		0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
++		0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x44, 0x45,
++		0x46, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4F,
++		0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x61,
++		0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
++		0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
++		0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
++		0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
++		0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
++		0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
++		0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65,
++		0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
++		0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75,
++		0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43,
++		0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B,
++		0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53,
++		0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61,
++		0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
++		0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
++		0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
++		0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
++		0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
++		0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
++		0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65,
++		0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
++		0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75,
++		0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43,
++		0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B,
++		0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53,
++		0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61,
++		0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
++		0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
++		0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
++		0x7A, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
++		0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
++		0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
++		0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65,
++		0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
++		0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75,
++		0x76, 0x77, 0x78, 0x79, 0x7A, 0x41, 0x42, 0x43,
++		0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B,
++		0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53,
++		0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61,
++		0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
++		0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
++		0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
++		0x7A, 0xC4, 0xB1, 0xC8, 0xB7, 0xCE, 0x91, 0xCE,
++		0x92, 0xCE, 0x93, 0xCE, 0x94, 0xCE, 0x95, 0xCE,
++		0x96, 0xCE, 0x97, 0xCE, 0x98, 0xCE, 0x99, 0xCE,
++		0x9A, 0xCE, 0x9B, 0xCE, 0x9C, 0xCE, 0x9D, 0xCE,
++		0x9E, 0xCE, 0x9F, 0xCE, 0xA0, 0xCE, 0xA1, 0xCE,
++		0x98, 0xCE, 0xA3, 0xCE, 0xA4, 0xCE, 0xA5, 0xCE,
++		0xA6, 0xCE, 0xA7, 0xCE, 0xA8, 0xCE, 0xA9, 0xE2,
++		0x88, 0x87, 0xCE, 0xB1, 0xCE, 0xB2, 0xCE, 0xB3,
++		0xCE, 0xB4, 0xCE, 0xB5, 0xCE, 0xB6, 0xCE, 0xB7,
++		0xCE, 0xB8, 0xCE, 0xB9, 0xCE, 0xBA, 0xCE, 0xBB,
++		0xCE, 0xBC, 0xCE, 0xBD, 0xCE, 0xBE, 0xCE, 0xBF,
++		0xCF, 0x80, 0xCF, 0x81, 0xCF, 0x82, 0xCF, 0x83,
++		0xCF, 0x84, 0xCF, 0x85, 0xCF, 0x86, 0xCF, 0x87,
++		0xCF, 0x88, 0xCF, 0x89, 0xE2, 0x88, 0x82, 0xCE,
++		0xB5, 0xCE, 0xB8, 0xCE, 0xBA, 0xCF, 0x86, 0xCF,
++		0x81, 0xCF, 0x80, 0xCE, 0x91, 0xCE, 0x92, 0xCE,
++		0x93, 0xCE, 0x94, 0xCE, 0x95, 0xCE, 0x96, 0xCE,
++		0x97, 0xCE, 0x98, 0xCE, 0x99, 0xCE, 0x9A, 0xCE,
++		0x9B, 0xCE, 0x9C, 0xCE, 0x9D, 0xCE, 0x9E, 0xCE,
++		0x9F, 0xCE, 0xA0, 0xCE, 0xA1, 0xCE, 0x98, 0xCE,
++		0xA3, 0xCE, 0xA4, 0xCE, 0xA5, 0xCE, 0xA6, 0xCE,
++		0xA7, 0xCE, 0xA8, 0xCE, 0xA9, 0xE2, 0x88, 0x87,
++		0xCE, 0xB1, 0xCE, 0xB2, 0xCE, 0xB3, 0xCE, 0xB4,
++		0xCE, 0xB5, 0xCE, 0xB6, 0xCE, 0xB7, 0xCE, 0xB8,
++		0xCE, 0xB9, 0xCE, 0xBA, 0xCE, 0xBB, 0xCE, 0xBC,
++		0xCE, 0xBD, 0xCE, 0xBE, 0xCE, 0xBF, 0xCF, 0x80,
++		0xCF, 0x81, 0xCF, 0x82, 0xCF, 0x83, 0xCF, 0x84,
++		0xCF, 0x85, 0xCF, 0x86, 0xCF, 0x87, 0xCF, 0x88,
++		0xCF, 0x89, 0xE2, 0x88, 0x82, 0xCE, 0xB5, 0xCE,
++		0xB8, 0xCE, 0xBA, 0xCF, 0x86, 0xCF, 0x81, 0xCF,
++		0x80, 0xCE, 0x91, 0xCE, 0x92, 0xCE, 0x93, 0xCE,
++		0x94, 0xCE, 0x95, 0xCE, 0x96, 0xCE, 0x97, 0xCE,
++		0x98, 0xCE, 0x99, 0xCE, 0x9A, 0xCE, 0x9B, 0xCE,
++		0x9C, 0xCE, 0x9D, 0xCE, 0x9E, 0xCE, 0x9F, 0xCE,
++		0xA0, 0xCE, 0xA1, 0xCE, 0x98, 0xCE, 0xA3, 0xCE,
++		0xA4, 0xCE, 0xA5, 0xCE, 0xA6, 0xCE, 0xA7, 0xCE,
++		0xA8, 0xCE, 0xA9, 0xE2, 0x88, 0x87, 0xCE, 0xB1,
++		0xCE, 0xB2, 0xCE, 0xB3, 0xCE, 0xB4, 0xCE, 0xB5,
++		0xCE, 0xB6, 0xCE, 0xB7, 0xCE, 0xB8, 0xCE, 0xB9,
++		0xCE, 0xBA, 0xCE, 0xBB, 0xCE, 0xBC, 0xCE, 0xBD,
++		0xCE, 0xBE, 0xCE, 0xBF, 0xCF, 0x80, 0xCF, 0x81,
++		0xCF, 0x82, 0xCF, 0x83, 0xCF, 0x84, 0xCF, 0x85,
++		0xCF, 0x86, 0xCF, 0x87, 0xCF, 0x88, 0xCF, 0x89,
++		0xE2, 0x88, 0x82, 0xCE, 0xB5, 0xCE, 0xB8, 0xCE,
++		0xBA, 0xCF, 0x86, 0xCF, 0x81, 0xCF, 0x80, 0xCE,
++		0x91, 0xCE, 0x92, 0xCE, 0x93, 0xCE, 0x94, 0xCE,
++		0x95, 0xCE, 0x96, 0xCE, 0x97, 0xCE, 0x98, 0xCE,
++		0x99, 0xCE, 0x9A, 0xCE, 0x9B, 0xCE, 0x9C, 0xCE,
++		0x9D, 0xCE, 0x9E, 0xCE, 0x9F, 0xCE, 0xA0, 0xCE,
++		0xA1, 0xCE, 0x98, 0xCE, 0xA3, 0xCE, 0xA4, 0xCE,
++		0xA5, 0xCE, 0xA6, 0xCE, 0xA7, 0xCE, 0xA8, 0xCE,
++		0xA9, 0xE2, 0x88, 0x87, 0xCE, 0xB1, 0xCE, 0xB2,
++		0xCE, 0xB3, 0xCE, 0xB4, 0xCE, 0xB5, 0xCE, 0xB6,
++		0xCE, 0xB7, 0xCE, 0xB8, 0xCE, 0xB9, 0xCE, 0xBA,
++		0xCE, 0xBB, 0xCE, 0xBC, 0xCE, 0xBD, 0xCE, 0xBE,
++		0xCE, 0xBF, 0xCF, 0x80, 0xCF, 0x81, 0xCF, 0x82,
++		0xCF, 0x83, 0xCF, 0x84, 0xCF, 0x85, 0xCF, 0x86,
++		0xCF, 0x87, 0xCF, 0x88, 0xCF, 0x89, 0xE2, 0x88,
++		0x82, 0xCE, 0xB5, 0xCE, 0xB8, 0xCE, 0xBA, 0xCF,
++		0x86, 0xCF, 0x81, 0xCF, 0x80, 0xCE, 0x91, 0xCE,
++		0x92, 0xCE, 0x93, 0xCE, 0x94, 0xCE, 0x95, 0xCE,
++		0x96, 0xCE, 0x97, 0xCE, 0x98, 0xCE, 0x99, 0xCE,
++		0x9A, 0xCE, 0x9B, 0xCE, 0x9C, 0xCE, 0x9D, 0xCE,
++		0x9E, 0xCE, 0x9F, 0xCE, 0xA0, 0xCE, 0xA1, 0xCE,
++		0x98, 0xCE, 0xA3, 0xCE, 0xA4, 0xCE, 0xA5, 0xCE,
++		0xA6, 0xCE, 0xA7, 0xCE, 0xA8, 0xCE, 0xA9, 0xE2,
++		0x88, 0x87, 0xCE, 0xB1, 0xCE, 0xB2, 0xCE, 0xB3,
++		0xCE, 0xB4, 0xCE, 0xB5, 0xCE, 0xB6, 0xCE, 0xB7,
++		0xCE, 0xB8, 0xCE, 0xB9, 0xCE, 0xBA, 0xCE, 0xBB,
++		0xCE, 0xBC, 0xCE, 0xBD, 0xCE, 0xBE, 0xCE, 0xBF,
++		0xCF, 0x80, 0xCF, 0x81, 0xCF, 0x82, 0xCF, 0x83,
++		0xCF, 0x84, 0xCF, 0x85, 0xCF, 0x86, 0xCF, 0x87,
++		0xCF, 0x88, 0xCF, 0x89, 0xE2, 0x88, 0x82, 0xCE,
++		0xB5, 0xCE, 0xB8, 0xCE, 0xBA, 0xCF, 0x86, 0xCF,
++		0x81, 0xCF, 0x80, 0xCF, 0x9C, 0xCF, 0x9D, 0x30,
++		0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
++		0x39, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
++		0x37, 0x38, 0x39, 0x30, 0x31, 0x32, 0x33, 0x34,
++		0x35, 0x36, 0x37, 0x38, 0x39, 0x30, 0x31, 0x32,
++		0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x30,
++		0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
++		0x39, 0xF6, 0xE4, 0xB8, 0xBD, 0xF6, 0xE4, 0xB8,
++		0xB8, 0xF6, 0xE4, 0xB9, 0x81, 0xF6, 0xF0, 0xA0,
++		0x84, 0xA2, 0xF6, 0xE4, 0xBD, 0xA0, 0xF6, 0xE4,
++		0xBE, 0xAE, 0xF6, 0xE4, 0xBE, 0xBB, 0xF6, 0xE5,
++		0x80, 0x82, 0xF6, 0xE5, 0x81, 0xBA, 0xF6, 0xE5,
++		0x82, 0x99, 0xF6, 0xE5, 0x83, 0xA7, 0xF6, 0xE5,
++		0x83, 0x8F, 0xF6, 0xE3, 0x92, 0x9E, 0xF6, 0xF0,
++		0xA0, 0x98, 0xBA, 0xF6, 0xE5, 0x85, 0x8D, 0xF6,
++		0xE5, 0x85, 0x94, 0xF6, 0xE5, 0x85, 0xA4, 0xF6,
++		0xE5, 0x85, 0xB7, 0xF6, 0xF0, 0xA0, 0x94, 0x9C,
++		0xF6, 0xE3, 0x92, 0xB9, 0xF6, 0xE5, 0x85, 0xA7,
++		0xF6, 0xE5, 0x86, 0x8D, 0xF6, 0xF0, 0xA0, 0x95,
++		0x8B, 0xF6, 0xE5, 0x86, 0x97, 0xF6, 0xE5, 0x86,
++		0xA4, 0xF6, 0xE4, 0xBB, 0x8C, 0xF6, 0xE5, 0x86,
++		0xAC, 0xF6, 0xE5, 0x86, 0xB5, 0xF6, 0xF0, 0xA9,
++		0x87, 0x9F, 0xF6, 0xE5, 0x87, 0xB5, 0xF6, 0xE5,
++		0x88, 0x83, 0xF6, 0xE3, 0x93, 0x9F, 0xF6, 0xE5,
++		0x88, 0xBB, 0xF6, 0xE5, 0x89, 0x86, 0xF6, 0xE5,
++		0x89, 0xB2, 0xF6, 0xE5, 0x89, 0xB7, 0xF6, 0xE3,
++		0x94, 0x95, 0xF6, 0xE5, 0x8B, 0x87, 0xF6, 0xE5,
++		0x8B, 0x89, 0xF6, 0xE5, 0x8B, 0xA4, 0xF6, 0xE5,
++		0x8B, 0xBA, 0xF6, 0xE5, 0x8C, 0x85, 0xF6, 0xE5,
++		0x8C, 0x86, 0xF6, 0xE5, 0x8C, 0x97, 0xF6, 0xE5,
++		0x8D, 0x89, 0xF6, 0xE5, 0x8D, 0x91, 0xF6, 0xE5,
++		0x8D, 0x9A, 0xF6, 0xE5, 0x8D, 0xB3, 0xF6, 0xE5,
++		0x8D, 0xBD, 0xF6, 0xE5, 0x8D, 0xBF, 0xF6, 0xE5,
++		0x8D, 0xBF, 0xF6, 0xE5, 0x8D, 0xBF, 0xF6, 0xF0,
++		0xA0, 0xA8, 0xAC, 0xF6, 0xE7, 0x81, 0xB0, 0xF6,
++		0xE5, 0x8F, 0x8A, 0xF6, 0xE5, 0x8F, 0x9F, 0xF6,
++		0xF0, 0xA0, 0xAD, 0xA3, 0xF6, 0xE5, 0x8F, 0xAB,
++		0xF6, 0xE5, 0x8F, 0xB1, 0xF6, 0xE5, 0x90, 0x86,
++		0xF6, 0xE5, 0x92, 0x9E, 0xF6, 0xE5, 0x90, 0xB8,
++		0xF6, 0xE5, 0x91, 0x88, 0xF6, 0xE5, 0x91, 0xA8,
++		0xF6, 0xE5, 0x92, 0xA2, 0xF6, 0xE5, 0x93, 0xB6,
++		0xF6, 0xE5, 0x94, 0x90, 0xF6, 0xE5, 0x95, 0x93,
++		0xF6, 0xE5, 0x95, 0xA3, 0xF6, 0xE5, 0x96, 0x84,
++		0xF6, 0xE5, 0x96, 0x84, 0xF6, 0xE5, 0x96, 0x99,
++		0xF6, 0xE5, 0x96, 0xAB, 0xF6, 0xE5, 0x96, 0xB3,
++		0xF6, 0xE5, 0x97, 0x82, 0xF6, 0xE5, 0x9C, 0x96,
++		0xF6, 0xE5, 0x98, 0x86, 0xF6, 0xE5, 0x9C, 0x97,
++		0xF6, 0xE5, 0x99, 0x91, 0xF6, 0xE5, 0x99, 0xB4,
++		0xF6, 0xE5, 0x88, 0x87, 0xF6, 0xE5, 0xA3, 0xAE,
++		0xF6, 0xE5, 0x9F, 0x8E, 0xF6, 0xE5, 0x9F, 0xB4,
++		0xF6, 0xE5, 0xA0, 0x8D, 0xF6, 0xE5, 0x9E, 0x8B,
++		0xF6, 0xE5, 0xA0, 0xB2, 0xF6, 0xE5, 0xA0, 0xB1,
++		0xF6, 0xE5, 0xA2, 0xAC, 0xF6, 0xF0, 0xA1, 0x93,
++		0xA4, 0xF6, 0xE5, 0xA3, 0xB2, 0xF6, 0xE5, 0xA3,
++		0xB7, 0xF6, 0xE5, 0xA4, 0x86, 0xF6, 0xE5, 0xA4,
++		0x9A, 0xF6, 0xE5, 0xA4, 0xA2, 0xF6, 0xE5, 0xA5,
++		0xA2, 0xF6, 0xF0, 0xA1, 0x9A, 0xA8, 0xF6, 0xF0,
++		0xA1, 0x9B, 0xAA, 0xF6, 0xE5, 0xA7, 0xAC, 0xF6,
++		0xE5, 0xA8, 0x9B, 0xF6, 0xE5, 0xA8, 0xA7, 0xF6,
++		0xE5, 0xA7, 0x98, 0xF6, 0xE5, 0xA9, 0xA6, 0xF6,
++		0xE3, 0x9B, 0xAE, 0xF6, 0xE3, 0x9B, 0xBC, 0xF6,
++		0xE5, 0xAC, 0x88, 0xF6, 0xE5, 0xAC, 0xBE, 0xF6,
++		0xE5, 0xAC, 0xBE, 0xF6, 0xF0, 0xA1, 0xA7, 0x88,
++		0xF6, 0xE5, 0xAF, 0x83, 0xF6, 0xE5, 0xAF, 0x98,
++		0xF6, 0xE5, 0xAF, 0xA7, 0xF6, 0xE5, 0xAF, 0xB3,
++		0xF6, 0xF0, 0xA1, 0xAC, 0x98, 0xF6, 0xE5, 0xAF,
++		0xBF, 0xF6, 0xE5, 0xB0, 0x86, 0xF6, 0xE5, 0xBD,
++		0x93, 0xF6, 0xE5, 0xB0, 0xA2, 0xF6, 0xE3, 0x9E,
++		0x81, 0xF6, 0xE5, 0xB1, 0xA0, 0xF6, 0xE5, 0xB1,
++		0xAE, 0xF6, 0xE5, 0xB3, 0x80, 0xF6, 0xE5, 0xB2,
++		0x8D, 0xF6, 0xF0, 0xA1, 0xB7, 0xA4, 0xF6, 0xE5,
++		0xB5, 0x83, 0xF6, 0xF0, 0xA1, 0xB7, 0xA6, 0xF6,
++		0xE5, 0xB5, 0xAE, 0xF6, 0xE5, 0xB5, 0xAB, 0xF6,
++		0xE5, 0xB5, 0xBC, 0xF6, 0xE5, 0xB7, 0xA1, 0xF6,
++		0xE5, 0xB7, 0xA2, 0xF6, 0xE3, 0xA0, 0xAF, 0xF6,
++		0xE5, 0xB7, 0xBD, 0xF6, 0xE5, 0xB8, 0xA8, 0xF6,
++		0xE5, 0xB8, 0xBD, 0xF6, 0xE5, 0xB9, 0xA9, 0xF6,
++		0xE3, 0xA1, 0xA2, 0xF6, 0xF0, 0xA2, 0x86, 0x83,
++		0xF6, 0xE3, 0xA1, 0xBC, 0xF6, 0xE5, 0xBA, 0xB0,
++		0xF6, 0xE5, 0xBA, 0xB3, 0xF6, 0xE5, 0xBA, 0xB6,
++		0xF6, 0xE5, 0xBB, 0x8A, 0xF6, 0xF0, 0xAA, 0x8E,
++		0x92, 0xF6, 0xE5, 0xBB, 0xBE, 0xF6, 0xF0, 0xA2,
++		0x8C, 0xB1, 0xF6, 0xF0, 0xA2, 0x8C, 0xB1, 0xF6,
++		0xE8, 0x88, 0x81, 0xF6, 0xE5, 0xBC, 0xA2, 0xF6,
++		0xE5, 0xBC, 0xA2, 0xF6, 0xE3, 0xA3, 0x87, 0xF6,
++		0xF0, 0xA3, 0x8A, 0xB8, 0xF6, 0xF0, 0xA6, 0x87,
++		0x9A, 0xF6, 0xE5, 0xBD, 0xA2, 0xF6, 0xE5, 0xBD,
++		0xAB, 0xF6, 0xE3, 0xA3, 0xA3, 0xF6, 0xE5, 0xBE,
++		0x9A, 0xF6, 0xE5, 0xBF, 0x8D, 0xF6, 0xE5, 0xBF,
++		0x97, 0xF6, 0xE5, 0xBF, 0xB9, 0xF6, 0xE6, 0x82,
++		0x81, 0xF6, 0xE3, 0xA4, 0xBA, 0xF6, 0xE3, 0xA4,
++		0x9C, 0xF6, 0xE6, 0x82, 0x94, 0xF6, 0xF0, 0xA2,
++		0x9B, 0x94, 0xF6, 0xE6, 0x83, 0x87, 0xF6, 0xE6,
++		0x85, 0x88, 0xF6, 0xE6, 0x85, 0x8C, 0xF6, 0xE6,
++		0x85, 0x8E, 0xF6, 0xE6, 0x85, 0x8C, 0xF6, 0xE6,
++		0x85, 0xBA, 0xF6, 0xE6, 0x86, 0x8E, 0xF6, 0xE6,
++		0x86, 0xB2, 0xF6, 0xE6, 0x86, 0xA4, 0xF6, 0xE6,
++		0x86, 0xAF, 0xF6, 0xE6, 0x87, 0x9E, 0xF6, 0xE6,
++		0x87, 0xB2, 0xF6, 0xE6, 0x87, 0xB6, 0xF6, 0xE6,
++		0x88, 0x90, 0xF6, 0xE6, 0x88, 0x9B, 0xF6, 0xE6,
++		0x89, 0x9D, 0xF6, 0xE6, 0x8A, 0xB1, 0xF6, 0xE6,
++		0x8B, 0x94, 0xF6, 0xE6, 0x8D, 0x90, 0xF6, 0xF0,
++		0xA2, 0xAC, 0x8C, 0xF6, 0xE6, 0x8C, 0xBD, 0xF6,
++		0xE6, 0x8B, 0xBC, 0xF6, 0xE6, 0x8D, 0xA8, 0xF6,
++		0xE6, 0x8E, 0x83, 0xF6, 0xE6, 0x8F, 0xA4, 0xF6,
++		0xF0, 0xA2, 0xAF, 0xB1, 0xF6, 0xE6, 0x90, 0xA2,
++		0xF6, 0xE6, 0x8F, 0x85, 0xF6, 0xE6, 0x8E, 0xA9,
++		0xF6, 0xE3, 0xA8, 0xAE, 0xF6, 0xE6, 0x91, 0xA9,
++		0xF6, 0xE6, 0x91, 0xBE, 0xF6, 0xE6, 0x92, 0x9D,
++		0xF6, 0xE6, 0x91, 0xB7, 0xF6, 0xE3, 0xA9, 0xAC,
++		0xF6, 0xE6, 0x95, 0x8F, 0xF6, 0xE6, 0x95, 0xAC,
++		0xF6, 0xF0, 0xA3, 0x80, 0x8A, 0xF6, 0xE6, 0x97,
++		0xA3, 0xF6, 0xE6, 0x9B, 0xB8, 0xF6, 0xE6, 0x99,
++		0x89, 0xF6, 0xE3, 0xAC, 0x99, 0xF6, 0xE6, 0x9A,
++		0x91, 0xF6, 0xE3, 0xAC, 0x88, 0xF6, 0xE3, 0xAB,
++		0xA4, 0xF6, 0xE5, 0x86, 0x92, 0xF6, 0xE5, 0x86,
++		0x95, 0xF6, 0xE6, 0x9C, 0x80, 0xF6, 0xE6, 0x9A,
++		0x9C, 0xF6, 0xE8, 0x82, 0xAD, 0xF6, 0xE4, 0x8F,
++		0x99, 0xF6, 0xE6, 0x9C, 0x97, 0xF6, 0xE6, 0x9C,
++		0x9B, 0xF6, 0xE6, 0x9C, 0xA1, 0xF6, 0xE6, 0x9D,
++		0x9E, 0xF6, 0xE6, 0x9D, 0x93, 0xF6, 0xF0, 0xA3,
++		0x8F, 0x83, 0xF6, 0xE3, 0xAD, 0x89, 0xF6, 0xE6,
++		0x9F, 0xBA, 0xF6, 0xE6, 0x9E, 0x85, 0xF6, 0xE6,
++		0xA1, 0x92, 0xF6, 0xE6, 0xA2, 0x85, 0xF6, 0xF0,
++		0xA3, 0x91, 0xAD, 0xF6, 0xE6, 0xA2, 0x8E, 0xF6,
++		0xE6, 0xA0, 0x9F, 0xF6, 0xE6, 0xA4, 0x94, 0xF6,
++		0xE3, 0xAE, 0x9D, 0xF6, 0xE6, 0xA5, 0x82, 0xF6,
++		0xE6, 0xA6, 0xA3, 0xF6, 0xE6, 0xA7, 0xAA, 0xF6,
++		0xE6, 0xAA, 0xA8, 0xF6, 0xF0, 0xA3, 0x9A, 0xA3,
++		0xF6, 0xE6, 0xAB, 0x9B, 0xF6, 0xE3, 0xB0, 0x98,
++		0xF6, 0xE6, 0xAC, 0xA1, 0xF6, 0xF0, 0xA3, 0xA2,
++		0xA7, 0xF6, 0xE6, 0xAD, 0x94, 0xF6, 0xE3, 0xB1,
++		0x8E, 0xF6, 0xE6, 0xAD, 0xB2, 0xF6, 0xE6, 0xAE,
++		0x9F, 0xF6, 0xE6, 0xAE, 0xBA, 0xF6, 0xE6, 0xAE,
++		0xBB, 0xF6, 0xF0, 0xA3, 0xAA, 0x8D, 0xF6, 0xF0,
++		0xA1, 0xB4, 0x8B, 0xF6, 0xF0, 0xA3, 0xAB, 0xBA,
++		0xF6, 0xE6, 0xB1, 0x8E, 0xF6, 0xF0, 0xA3, 0xB2,
++		0xBC, 0xF6, 0xE6, 0xB2, 0xBF, 0xF6, 0xE6, 0xB3,
++		0x8D, 0xF6, 0xE6, 0xB1, 0xA7, 0xF6, 0xE6, 0xB4,
++		0x96, 0xF6, 0xE6, 0xB4, 0xBE, 0xF6, 0xE6, 0xB5,
++		0xB7, 0xF6, 0xE6, 0xB5, 0x81, 0xF6, 0xE6, 0xB5,
++		0xA9, 0xF6, 0xE6, 0xB5, 0xB8, 0xF6, 0xE6, 0xB6,
++		0x85, 0xF6, 0xF0, 0xA3, 0xB4, 0x9E, 0xF6, 0xE6,
++		0xB4, 0xB4, 0xF6, 0xE6, 0xB8, 0xAF, 0xF6, 0xE6,
++		0xB9, 0xAE, 0xF6, 0xE3, 0xB4, 0xB3, 0xF6, 0xE6,
++		0xBB, 0x8B, 0xF6, 0xE6, 0xBB, 0x87, 0xF6, 0xF0,
++		0xA3, 0xBB, 0x91, 0xF6, 0xE6, 0xB7, 0xB9, 0xF6,
++		0xE6, 0xBD, 0xAE, 0xF6, 0xF0, 0xA3, 0xBD, 0x9E,
++		0xF6, 0xF0, 0xA3, 0xBE, 0x8E, 0xF6, 0xE6, 0xBF,
++		0x86, 0xF6, 0xE7, 0x80, 0xB9, 0xF6, 0xE7, 0x80,
++		0x9E, 0xF6, 0xE7, 0x80, 0x9B, 0xF6, 0xE3, 0xB6,
++		0x96, 0xF6, 0xE7, 0x81, 0x8A, 0xF6, 0xE7, 0x81,
++		0xBD, 0xF6, 0xE7, 0x81, 0xB7, 0xF6, 0xE7, 0x82,
++		0xAD, 0xF6, 0xF0, 0xA0, 0x94, 0xA5, 0xF6, 0xE7,
++		0x85, 0x85, 0xF6, 0xF0, 0xA4, 0x89, 0xA3, 0xF6,
++		0xE7, 0x86, 0x9C, 0xF6, 0xF0, 0xA4, 0x8E, 0xAB,
++		0xF6, 0xE7, 0x88, 0xA8, 0xF6, 0xE7, 0x88, 0xB5,
++		0xF6, 0xE7, 0x89, 0x90, 0xF6, 0xF0, 0xA4, 0x98,
++		0x88, 0xF6, 0xE7, 0x8A, 0x80, 0xF6, 0xE7, 0x8A,
++		0x95, 0xF6, 0xF0, 0xA4, 0x9C, 0xB5, 0xF6, 0xF0,
++		0xA4, 0xA0, 0x94, 0xF6, 0xE7, 0x8D, 0xBA, 0xF6,
++		0xE7, 0x8E, 0x8B, 0xF6, 0xE3, 0xBA, 0xAC, 0xF6,
++		0xE7, 0x8E, 0xA5, 0xF6, 0xE3, 0xBA, 0xB8, 0xF6,
++		0xE3, 0xBA, 0xB8, 0xF6, 0xE7, 0x91, 0x87, 0xF6,
++		0xE7, 0x91, 0x9C, 0xF6, 0xE7, 0x91, 0xB1, 0xF6,
++		0xE7, 0x92, 0x85, 0xF6, 0xE7, 0x93, 0x8A, 0xF6,
++		0xE3, 0xBC, 0x9B, 0xF6, 0xE7, 0x94, 0xA4, 0xF6,
++		0xF0, 0xA4, 0xB0, 0xB6, 0xF6, 0xE7, 0x94, 0xBE,
++		0xF6, 0xF0, 0xA4, 0xB2, 0x92, 0xF6, 0xE7, 0x95,
++		0xB0, 0xF6, 0xF0, 0xA2, 0x86, 0x9F, 0xF6, 0xE7,
++		0x98, 0x90, 0xF6, 0xF0, 0xA4, 0xBE, 0xA1, 0xF6,
++		0xF0, 0xA4, 0xBE, 0xB8, 0xF6, 0xF0, 0xA5, 0x81,
++		0x84, 0xF6, 0xE3, 0xBF, 0xBC, 0xF6, 0xE4, 0x80,
++		0x88, 0xF6, 0xE7, 0x9B, 0xB4, 0xF6, 0xF0, 0xA5,
++		0x83, 0xB3, 0xF6, 0xF0, 0xA5, 0x83, 0xB2, 0xF6,
++		0xF0, 0xA5, 0x84, 0x99, 0xF6, 0xF0, 0xA5, 0x84,
++		0xB3, 0xF6, 0xE7, 0x9C, 0x9E, 0xF6, 0xE7, 0x9C,
++		0x9F, 0xF6, 0xE7, 0x9C, 0x9F, 0xF6, 0xE7, 0x9D,
++		0x8A, 0xF6, 0xE4, 0x80, 0xB9, 0xF6, 0xE7, 0x9E,
++		0x8B, 0xF6, 0xE4, 0x81, 0x86, 0xF6, 0xE4, 0x82,
++		0x96, 0xF6, 0xF0, 0xA5, 0x90, 0x9D, 0xF6, 0xE7,
++		0xA1, 0x8E, 0xF6, 0xE7, 0xA2, 0x8C, 0xF6, 0xE7,
++		0xA3, 0x8C, 0xF6, 0xE4, 0x83, 0xA3, 0xF6, 0xF0,
++		0xA5, 0x98, 0xA6, 0xF6, 0xE7, 0xA5, 0x96, 0xF6,
++		0xF0, 0xA5, 0x9A, 0x9A, 0xF6, 0xF0, 0xA5, 0x9B,
++		0x85, 0xF6, 0xE7, 0xA6, 0x8F, 0xF6, 0xE7, 0xA7,
++		0xAB, 0xF6, 0xE4, 0x84, 0xAF, 0xF6, 0xE7, 0xA9,
++		0x80, 0xF6, 0xE7, 0xA9, 0x8A, 0xF6, 0xE7, 0xA9,
++		0x8F, 0xF6, 0xF0, 0xA5, 0xA5, 0xBC, 0xF6, 0xF0,
++		0xA5, 0xAA, 0xA7, 0xF6, 0xF0, 0xA5, 0xAA, 0xA7,
++		0xF6, 0xE7, 0xAB, 0xAE, 0xF6, 0xE4, 0x88, 0x82,
++		0xF6, 0xF0, 0xA5, 0xAE, 0xAB, 0xF6, 0xE7, 0xAF,
++		0x86, 0xF6, 0xE7, 0xAF, 0x89, 0xF6, 0xE4, 0x88,
++		0xA7, 0xF6, 0xF0, 0xA5, 0xB2, 0x80, 0xF6, 0xE7,
++		0xB3, 0x92, 0xF6, 0xE4, 0x8A, 0xA0, 0xF6, 0xE7,
++		0xB3, 0xA8, 0xF6, 0xE7, 0xB3, 0xA3, 0xF6, 0xE7,
++		0xB4, 0x80, 0xF6, 0xF0, 0xA5, 0xBE, 0x86, 0xF6,
++		0xE7, 0xB5, 0xA3, 0xF6, 0xE4, 0x8C, 0x81, 0xF6,
++		0xE7, 0xB7, 0x87, 0xF6, 0xE7, 0xB8, 0x82, 0xF6,
++		0xE7, 0xB9, 0x85, 0xF6, 0xE4, 0x8C, 0xB4, 0xF6,
++		0xF0, 0xA6, 0x88, 0xA8, 0xF6, 0xF0, 0xA6, 0x89,
++		0x87, 0xF6, 0xE4, 0x8D, 0x99, 0xF6, 0xF0, 0xA6,
++		0x8B, 0x99, 0xF6, 0xE7, 0xBD, 0xBA, 0xF6, 0xF0,
++		0xA6, 0x8C, 0xBE, 0xF6, 0xE7, 0xBE, 0x95, 0xF6,
++		0xE7, 0xBF, 0xBA, 0xF6, 0xE8, 0x80, 0x85, 0xF6,
++		0xF0, 0xA6, 0x93, 0x9A, 0xF6, 0xF0, 0xA6, 0x94,
++		0xA3, 0xF6, 0xE8, 0x81, 0xA0, 0xF6, 0xF0, 0xA6,
++		0x96, 0xA8, 0xF6, 0xE8, 0x81, 0xB0, 0xF6, 0xF0,
++		0xA3, 0x8D, 0x9F, 0xF6, 0xE4, 0x8F, 0x95, 0xF6,
++		0xE8, 0x82, 0xB2, 0xF6, 0xE8, 0x84, 0x83, 0xF6,
++		0xE4, 0x90, 0x8B, 0xF6, 0xE8, 0x84, 0xBE, 0xF6,
++		0xE5, 0xAA, 0xB5, 0xF6, 0xF0, 0xA6, 0x9E, 0xA7,
++		0xF6, 0xF0, 0xA6, 0x9E, 0xB5, 0xF6, 0xF0, 0xA3,
++		0x8E, 0x93, 0xF6, 0xF0, 0xA3, 0x8E, 0x9C, 0xF6,
++		0xE8, 0x88, 0x81, 0xF6, 0xE8, 0x88, 0x84, 0xF6,
++		0xE8, 0xBE, 0x9E, 0xF6, 0xE4, 0x91, 0xAB, 0xF6,
++		0xE8, 0x8A, 0x91, 0xF6, 0xE8, 0x8A, 0x8B, 0xF6,
++		0xE8, 0x8A, 0x9D, 0xF6, 0xE5, 0x8A, 0xB3, 0xF6,
++		0xE8, 0x8A, 0xB1, 0xF6, 0xE8, 0x8A, 0xB3, 0xF6,
++		0xE8, 0x8A, 0xBD, 0xF6, 0xE8, 0x8B, 0xA6, 0xF6,
++		0xF0, 0xA6, 0xAC, 0xBC, 0xF6, 0xE8, 0x8B, 0xA5,
++		0xF6, 0xE8, 0x8C, 0x9D, 0xF6, 0xE8, 0x8D, 0xA3,
++		0xF6, 0xE8, 0x8E, 0xAD, 0xF6, 0xE8, 0x8C, 0xA3,
++		0xF6, 0xE8, 0x8E, 0xBD, 0xF6, 0xE8, 0x8F, 0xA7,
++		0xF6, 0xE8, 0x91, 0x97, 0xF6, 0xE8, 0x8D, 0x93,
++		0xF6, 0xE8, 0x8F, 0x8A, 0xF6, 0xE8, 0x8F, 0x8C,
++		0xF6, 0xE8, 0x8F, 0x9C, 0xF6, 0xF0, 0xA6, 0xB0,
++		0xB6, 0xF6, 0xF0, 0xA6, 0xB5, 0xAB, 0xF6, 0xF0,
++		0xA6, 0xB3, 0x95, 0xF6, 0xE4, 0x94, 0xAB, 0xF6,
++		0xE8, 0x93, 0xB1, 0xF6, 0xE8, 0x93, 0xB3, 0xF6,
++		0xE8, 0x94, 0x96, 0xF6, 0xF0, 0xA7, 0x8F, 0x8A,
++		0xF6, 0xE8, 0x95, 0xA4, 0xF6, 0xF0, 0xA6, 0xBC,
++		0xAC, 0xF6, 0xE4, 0x95, 0x9D, 0xF6, 0xE4, 0x95,
++		0xA1, 0xF6, 0xF0, 0xA6, 0xBE, 0xB1, 0xF6, 0xF0,
++		0xA7, 0x83, 0x92, 0xF6, 0xE4, 0x95, 0xAB, 0xF6,
++		0xE8, 0x99, 0x90, 0xF6, 0xE8, 0x99, 0x9C, 0xF6,
++		0xE8, 0x99, 0xA7, 0xF6, 0xE8, 0x99, 0xA9, 0xF6,
++		0xE8, 0x9A, 0xA9, 0xF6, 0xE8, 0x9A, 0x88, 0xF6,
++		0xE8, 0x9C, 0x8E, 0xF6, 0xE8, 0x9B, 0xA2, 0xF6,
++		0xE8, 0x9D, 0xB9, 0xF6, 0xE8, 0x9C, 0xA8, 0xF6,
++		0xE8, 0x9D, 0xAB, 0xF6, 0xE8, 0x9E, 0x86, 0xF6,
++		0xE4, 0x97, 0x97, 0xF6, 0xE8, 0x9F, 0xA1, 0xF6,
++		0xE8, 0xA0, 0x81, 0xF6, 0xE4, 0x97, 0xB9, 0xF6,
++		0xE8, 0xA1, 0xA0, 0xF6, 0xE8, 0xA1, 0xA3, 0xF6,
++		0xF0, 0xA7, 0x99, 0xA7, 0xF6, 0xE8, 0xA3, 0x97,
++		0xF6, 0xE8, 0xA3, 0x9E, 0xF6, 0xE4, 0x98, 0xB5,
++		0xF6, 0xE8, 0xA3, 0xBA, 0xF6, 0xE3, 0x92, 0xBB,
++		0xF6, 0xF0, 0xA7, 0xA2, 0xAE, 0xF6, 0xF0, 0xA7,
++		0xA5, 0xA6, 0xF6, 0xE4, 0x9A, 0xBE, 0xF6, 0xE4,
++		0x9B, 0x87, 0xF6, 0xE8, 0xAA, 0xA0, 0xF6, 0xE8,
++		0xAB, 0xAD, 0xF6, 0xE8, 0xAE, 0x8A, 0xF6, 0xE8,
++		0xB1, 0x95, 0xF6, 0xF0, 0xA7, 0xB2, 0xA8, 0xF6,
++		0xE8, 0xB2, 0xAB, 0xF6, 0xE8, 0xB3, 0x81, 0xF6,
++		0xE8, 0xB4, 0x9B, 0xF6, 0xE8, 0xB5, 0xB7, 0xF6,
++		0xF0, 0xA7, 0xBC, 0xAF, 0xF6, 0xF0, 0xA0, 0xA0,
++		0x84, 0xF6, 0xE8, 0xB7, 0x8B, 0xF6, 0xE8, 0xB6,
++		0xBC, 0xF6, 0xE8, 0xB7, 0xB0, 0xF6, 0xF0, 0xA0,
++		0xA3, 0x9E, 0xF6, 0xE8, 0xBB, 0x94, 0xF6, 0xE8,
++		0xBC, 0xB8, 0xF6, 0xF0, 0xA8, 0x97, 0x92, 0xF6,
++		0xF0, 0xA8, 0x97, 0xAD, 0xF6, 0xE9, 0x82, 0x94,
++		0xF6, 0xE9, 0x83, 0xB1, 0xF6, 0xE9, 0x84, 0x91,
++		0xF6, 0xF0, 0xA8, 0x9C, 0xAE, 0xF6, 0xE9, 0x84,
++		0x9B, 0xF6, 0xE9, 0x88, 0xB8, 0xF6, 0xE9, 0x8B,
++		0x97, 0xF6, 0xE9, 0x8B, 0x98, 0xF6, 0xE9, 0x89,
++		0xBC, 0xF6, 0xE9, 0x8F, 0xB9, 0xF6, 0xE9, 0x90,
++		0x95, 0xF6, 0xF0, 0xA8, 0xAF, 0xBA, 0xF6, 0xE9,
++		0x96, 0x8B, 0xF6, 0xE4, 0xA6, 0x95, 0xF6, 0xE9,
++		0x96, 0xB7, 0xF6, 0xF0, 0xA8, 0xB5, 0xB7, 0xF6,
++		0xE4, 0xA7, 0xA6, 0xF6, 0xE9, 0x9B, 0x83, 0xF6,
++		0xE5, 0xB6, 0xB2, 0xF6, 0xE9, 0x9C, 0xA3, 0xF6,
++		0xF0, 0xA9, 0x85, 0x85, 0xF6, 0xF0, 0xA9, 0x88,
++		0x9A, 0xF6, 0xE4, 0xA9, 0xAE, 0xF6, 0xE4, 0xA9,
++		0xB6, 0xF6, 0xE9, 0x9F, 0xA0, 0xF6, 0xF0, 0xA9,
++		0x90, 0x8A, 0xF6, 0xE4, 0xAA, 0xB2, 0xF6, 0xF0,
++		0xA9, 0x92, 0x96, 0xF6, 0xE9, 0xA0, 0x8B, 0xF6,
++		0xE9, 0xA0, 0x8B, 0xF6, 0xE9, 0xA0, 0xA9, 0xF6,
++		0xF0, 0xA9, 0x96, 0xB6, 0xF6, 0xE9, 0xA3, 0xA2,
++		0xF6, 0xE4, 0xAC, 0xB3, 0xF6, 0xE9, 0xA4, 0xA9,
++		0xF6, 0xE9, 0xA6, 0xA7, 0xF6, 0xE9, 0xA7, 0x82,
++		0xF6, 0xE9, 0xA7, 0xBE, 0xF6, 0xE4, 0xAF, 0x8E,
++		0xF6, 0xF0, 0xA9, 0xAC, 0xB0, 0xF6, 0xE9, 0xAC,
++		0x92, 0xF6, 0xE9, 0xB1, 0x80, 0xF6, 0xE9, 0xB3,
++		0xBD, 0xF6, 0xE4, 0xB3, 0x8E, 0xF6, 0xE4, 0xB3,
++		0xAD, 0xF6, 0xE9, 0xB5, 0xA7, 0xF6, 0xF0, 0xAA,
++		0x83, 0x8E, 0xF6, 0xE4, 0xB3, 0xB8, 0xF6, 0xF0,
++		0xAA, 0x84, 0x85, 0xF6, 0xF0, 0xAA, 0x88, 0x8E,
++		0xF6, 0xF0, 0xAA, 0x8A, 0x91, 0xF6, 0xE9, 0xBA,
++		0xBB, 0xF6, 0xE4, 0xB5, 0x96, 0xF6, 0xE9, 0xBB,
++		0xB9, 0xF6, 0xE9, 0xBB, 0xBE, 0xF6, 0xE9, 0xBC,
++		0x85, 0xF6, 0xE9, 0xBC, 0x8F, 0xF6, 0xE9, 0xBC,
++		0x96, 0xF6, 0xE9, 0xBC, 0xBB, 0xF6, 0xF0, 0xAA,
++		0x98, 0x80,
++	},
++};
++
++static const uchar_t u8_case_common_b2_tbl[2][2][256] = {
++	{
++		{
++			0,  N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, 1,  2,  N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, 3,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			4,  N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++
++	},
++	{
++		{
++			0,  N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, 1,  2,  N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, 3,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++		{
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			4,  N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++			N_, N_, N_, N_, N_, N_, N_, N_,
++		},
++
++	},
++
++};
++
++static const u8_displacement_t u8_tolower_b3_tbl[2][5][256] = {
++	{
++		{	/* Third byte table 0. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { 0, 0 },
++			{ 1, 60 }, { 2, 123 }, { 3, 185 }, { 4, 257 },
++			{ 5, 321 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 6, 373 }, { 7, 439 },
++			{ 8, 465 }, { 9, 561 }, { 10, 593 }, { 11, 649 },
++			{ 12, 703 }, { 13, 749 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 1. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 14, 795 }, { 15, 891 }, { 16, 987 }, { 17, 1068 },
++			{ 18, 1155 }, { 19, 1245 }, { 20, 1299 }, { 21, 1386 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 2. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 22, 1443 }, { 23, 1448 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 24, 1496 }, { 25, 1526 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 3. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 26, 1574 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 4. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 27, 1652 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++	},
++	{
++		{	/* Third byte table 0. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { 0, 0 },
++			{ 1, 60 }, { 2, 123 }, { 3, 185 }, { 4, 257 },
++			{ 5, 321 }, { 6, 383 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 7, 401 }, { 8, 467 },
++			{ 9, 505 }, { 10, 601 }, { 11, 633 }, { 12, 689 },
++			{ 13, 753 }, { 14, 803 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 1. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 15, 849 }, { 16, 945 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 17, 963 }, { 18, 1059 }, { 19, 1155 }, { 20, 1236 },
++			{ 21, 1323 }, { 22, 1413 }, { 23, 1467 }, { 24, 1554 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 2. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 25, 1611 }, { 26, 1619 }, { 27, 1667 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 28, 1670 }, { 29, 1700 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 30, 1748 }, { 31, 1889 }, { 32, 1911 }, { 33, 2007 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 3. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 34, 2061 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 4. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 35, 2139 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++	},
++};
++
++static const uchar_t u8_tolower_b4_tbl[2][36][257] = {
++	{
++		{	/* Fourth byte table 0. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			46,  48,  50,  52,  54,  56,  58,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,
++		},
++		{	/* Fourth byte table 1. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   4,   4,   6,   6,   8,
++			8,   10,  10,  12,  12,  14,  14,  16,
++			16,  18,  18,  20,  20,  22,  22,  24,
++			24,  26,  26,  28,  28,  30,  30,  32,
++			32,  34,  34,  36,  36,  38,  38,  40,
++			40,  42,  42,  44,  44,  46,  46,  48,
++			48,  49,  49,  51,  51,  53,  53,  55,
++			55,  55,  57,  57,  59,  59,  61,  61,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,
++		},
++		{	/* Fourth byte table 2. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   2,   4,   4,   6,   6,
++			8,   8,   8,   10,  10,  12,  12,  14,
++			14,  16,  16,  18,  18,  20,  20,  22,
++			22,  24,  24,  26,  26,  28,  28,  30,
++			30,  32,  32,  34,  34,  36,  36,  38,
++			38,  40,  40,  42,  42,  44,  44,  46,
++			46,  48,  48,  50,  50,  52,  52,  54,
++			54,  56,  58,  58,  60,  60,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,
++		},
++		{	/* Fourth byte table 3. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   4,   4,   6,   6,   8,
++			10,  10,  12,  14,  16,  16,  16,  18,
++			20,  22,  24,  24,  26,  28,  28,  30,
++			32,  34,  34,  34,  34,  36,  38,  38,
++			40,  42,  42,  44,  44,  46,  46,  48,
++			50,  50,  52,  52,  52,  54,  54,  56,
++			58,  58,  60,  62,  64,  64,  66,  66,
++			68,  70,  70,  70,  70,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,
++		},
++		{	/* Fourth byte table 4. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   2,   4,   4,
++			6,   8,   8,   10,  12,  12,  14,  14,
++			16,  16,  18,  18,  20,  20,  22,  22,
++			24,  24,  26,  26,  28,  28,  28,  30,
++			30,  32,  32,  34,  34,  36,  36,  38,
++			38,  40,  40,  42,  42,  44,  44,  46,
++			46,  46,  48,  50,  50,  52,  52,  54,
++			56,  58,  58,  60,  60,  62,  62,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++		{	/* Fourth byte table 5. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   4,   4,   6,   6,   8,
++			8,   10,  10,  12,  12,  14,  14,  16,
++			16,  18,  18,  20,  20,  22,  22,  24,
++			24,  26,  26,  28,  28,  30,  30,  32,
++			32,  34,  34,  36,  36,  38,  38,  40,
++			40,  42,  42,  44,  44,  46,  46,  48,
++			48,  50,  50,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,
++		},
++		{	/* Fourth byte table 6. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   2,
++			2,   4,   6,   8,   8,   10,  10,  12,
++			14,  14,  16,  18,  20,  22,  24,  26,
++			28,  30,  32,  34,  36,  38,  40,  42,
++			44,  46,  48,  48,  50,  52,  54,  56,
++			58,  60,  62,  64,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,
++		},
++		{	/* Fourth byte table 7. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   4,   4,   6,   6,   8,
++			8,   10,  10,  12,  12,  14,  14,  16,
++			16,  18,  18,  20,  20,  22,  22,  24,
++			24,  24,  24,  24,  24,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,
++		},
++		{	/* Fourth byte table 8. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			48,  50,  52,  54,  56,  58,  60,  62,
++			64,  66,  68,  70,  72,  74,  76,  78,
++			80,  82,  84,  86,  88,  90,  92,  94,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 9. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   4,   4,   6,   6,   8,
++			8,   10,  10,  12,  12,  14,  14,  16,
++			16,  18,  18,  20,  20,  22,  22,  24,
++			24,  26,  26,  28,  28,  30,  30,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,
++		},
++		{	/* Fourth byte table 10. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   4,   4,   6,   6,   8,
++			8,   10,  10,  12,  12,  14,  14,  16,
++			16,  18,  18,  20,  20,  22,  22,  24,
++			24,  26,  26,  28,  28,  30,  30,  32,
++			32,  34,  34,  36,  36,  38,  38,  40,
++			40,  42,  42,  44,  44,  46,  46,  48,
++			48,  50,  50,  52,  52,  54,  54,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,
++		},
++		{	/* Fourth byte table 11. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   2,   4,   4,   6,   6,
++			8,   8,   10,  10,  12,  12,  14,  14,
++			14,  16,  16,  18,  18,  20,  20,  22,
++			22,  24,  24,  26,  26,  28,  28,  30,
++			30,  32,  32,  34,  34,  36,  36,  38,
++			38,  40,  40,  42,  42,  44,  44,  46,
++			46,  48,  48,  50,  50,  52,  52,  52,
++			52,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,
++		},
++		{	/* Fourth byte table 12. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   4,   4,   6,   6,   8,
++			8,   10,  10,  12,  12,  14,  14,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  18,  20,  22,  24,  26,  28,
++			30,  32,  34,  36,  38,  40,  42,  44,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,
++		},
++		{	/* Fourth byte table 13. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,
++		},
++		{	/* Fourth byte table 14. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   6,   6,   9,   9,   12,
++			12,  15,  15,  18,  18,  21,  21,  24,
++			24,  27,  27,  30,  30,  33,  33,  36,
++			36,  39,  39,  42,  42,  45,  45,  48,
++			48,  51,  51,  54,  54,  57,  57,  60,
++			60,  63,  63,  66,  66,  69,  69,  72,
++			72,  75,  75,  78,  78,  81,  81,  84,
++			84,  87,  87,  90,  90,  93,  93,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 15. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   6,   6,   9,   9,   12,
++			12,  15,  15,  18,  18,  21,  21,  24,
++			24,  27,  27,  30,  30,  33,  33,  36,
++			36,  39,  39,  42,  42,  45,  45,  48,
++			48,  51,  51,  54,  54,  57,  57,  60,
++			60,  63,  63,  66,  66,  69,  69,  72,
++			72,  75,  75,  78,  78,  81,  81,  84,
++			84,  87,  87,  90,  90,  93,  93,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 16. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   6,   6,   9,   9,   12,
++			12,  15,  15,  18,  18,  21,  21,  24,
++			24,  27,  27,  30,  30,  33,  33,  33,
++			33,  33,  33,  33,  33,  33,  33,  33,
++			33,  36,  36,  39,  39,  42,  42,  45,
++			45,  48,  48,  51,  51,  54,  54,  57,
++			57,  60,  60,  63,  63,  66,  66,  69,
++			69,  72,  72,  75,  75,  78,  78,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,
++		},
++		{	/* Fourth byte table 17. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   6,   6,   9,   9,   12,
++			12,  15,  15,  18,  18,  21,  21,  24,
++			24,  27,  27,  30,  30,  33,  33,  36,
++			36,  39,  39,  42,  42,  45,  45,  48,
++			48,  51,  51,  54,  54,  57,  57,  60,
++			60,  63,  63,  66,  66,  69,  69,  72,
++			72,  75,  75,  78,  78,  81,  81,  84,
++			84,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,
++		},
++		{	/* Fourth byte table 18. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  27,  30,  33,  36,  39,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  45,  48,  51,  54,  57,  60,  63,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  69,  72,  75,  78,  81,  84,  87,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,
++		},
++		{	/* Fourth byte table 19. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  21,  21,  24,  24,  27,  27,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  33,  36,  39,  42,  45,  48,  51,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,
++		},
++		{	/* Fourth byte table 20. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  75,  78,  81,  84,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,
++		},
++		{	/* Fourth byte table 21. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  18,  21,  24,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  30,  33,  36,  39,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  45,  48,  51,  54,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,
++		},
++		{	/* Fourth byte table 22. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   2,
++			2,   2,   2,   3,   5,   5,   5,   5,
++			5,   5,   5,   5,   5,   5,   5,   5,
++			5,   5,   5,   5,   5,   5,   5,   5,
++			5,   5,   5,   5,   5,   5,   5,   5,
++			5,   5,   5,   5,   5,   5,   5,   5,
++			5,   5,   5,   5,   5,   5,   5,   5,
++			5,   5,   5,   5,   5,   5,   5,   5,
++			5,   5,   5,   5,   5,   5,   5,   5,
++			5,   5,   5,   5,   5,   5,   5,   5,
++			5,   5,   5,   5,   5,   5,   5,   5,
++			5,   5,   5,   5,   5,   5,   5,   5,
++			5,
++		},
++		{	/* Fourth byte table 23. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,
++		},
++		{	/* Fourth byte table 24. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   3,
++			6,   9,   12,  15,  18,  21,  24,  27,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,
++		},
++		{	/* Fourth byte table 25. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,
++		},
++		{	/* Fourth byte table 26. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   6,   9,   12,  15,  18,
++			21,  24,  27,  30,  33,  36,  39,  42,
++			45,  48,  51,  54,  57,  60,  63,  66,
++			69,  72,  75,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,
++		},
++		{	/* Fourth byte table 27. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  60,
++			64,  68,  72,  76,  80,  84,  88,  92,
++			96,  100, 104, 108, 112, 116, 120, 124,
++			128, 132, 136, 140, 144, 148, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152, 152, 152, 152, 152, 152, 152, 152,
++			152,
++		},
++		{	/* Fourth byte table 28. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 29. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 30. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 31. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 32. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 33. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 34. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 35. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++	},
++	{
++		{	/* Fourth byte table 0. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			46,  48,  50,  52,  54,  56,  58,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,  60,  60,  60,  60,  60,  60,  60,
++			60,
++		},
++		{	/* Fourth byte table 1. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   4,   4,   6,   6,   8,
++			8,   10,  10,  12,  12,  14,  14,  16,
++			16,  18,  18,  20,  20,  22,  22,  24,
++			24,  26,  26,  28,  28,  30,  30,  32,
++			32,  34,  34,  36,  36,  38,  38,  40,
++			40,  42,  42,  44,  44,  46,  46,  48,
++			48,  49,  49,  51,  51,  53,  53,  55,
++			55,  55,  57,  57,  59,  59,  61,  61,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,
++		},
++		{	/* Fourth byte table 2. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   2,   4,   4,   6,   6,
++			8,   8,   8,   10,  10,  12,  12,  14,
++			14,  16,  16,  18,  18,  20,  20,  22,
++			22,  24,  24,  26,  26,  28,  28,  30,
++			30,  32,  32,  34,  34,  36,  36,  38,
++			38,  40,  40,  42,  42,  44,  44,  46,
++			46,  48,  48,  50,  50,  52,  52,  54,
++			54,  56,  58,  58,  60,  60,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,
++		},
++		{	/* Fourth byte table 3. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   4,   4,   6,   6,   8,
++			10,  10,  12,  14,  16,  16,  16,  18,
++			20,  22,  24,  24,  26,  28,  28,  30,
++			32,  34,  34,  34,  34,  36,  38,  38,
++			40,  42,  42,  44,  44,  46,  46,  48,
++			50,  50,  52,  52,  52,  54,  54,  56,
++			58,  58,  60,  62,  64,  64,  66,  66,
++			68,  70,  70,  70,  70,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,
++		},
++		{	/* Fourth byte table 4. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   2,   4,   4,
++			6,   8,   8,   10,  12,  12,  14,  14,
++			16,  16,  18,  18,  20,  20,  22,  22,
++			24,  24,  26,  26,  28,  28,  28,  30,
++			30,  32,  32,  34,  34,  36,  36,  38,
++			38,  40,  40,  42,  42,  44,  44,  46,
++			46,  46,  48,  50,  50,  52,  52,  54,
++			56,  58,  58,  60,  60,  62,  62,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++		{	/* Fourth byte table 5. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   4,   4,   6,   6,   8,
++			8,   10,  10,  12,  12,  14,  14,  16,
++			16,  18,  18,  20,  20,  22,  22,  24,
++			24,  26,  26,  28,  28,  30,  30,  32,
++			32,  34,  34,  36,  36,  38,  38,  40,
++			40,  42,  42,  44,  44,  46,  46,  48,
++			48,  50,  50,  52,  52,  52,  52,  52,
++			52,  52,  52,  55,  57,  57,  59,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,
++		},
++		{	/* Fourth byte table 6. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   2,   4,   6,   8,   10,
++			10,  12,  12,  14,  14,  16,  16,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,
++		},
++		{	/* Fourth byte table 7. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   2,
++			2,   4,   6,   8,   8,   10,  10,  12,
++			14,  14,  16,  18,  20,  22,  24,  26,
++			28,  30,  32,  34,  36,  38,  40,  42,
++			44,  46,  48,  48,  50,  52,  54,  56,
++			58,  60,  62,  64,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,
++		},
++		{	/* Fourth byte table 8. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   4,   4,   6,   6,   8,
++			8,   10,  10,  12,  12,  14,  14,  16,
++			16,  18,  18,  20,  20,  22,  22,  24,
++			24,  24,  24,  24,  24,  26,  26,  26,
++			28,  28,  30,  32,  32,  32,  34,  36,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,
++		},
++		{	/* Fourth byte table 9. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			48,  50,  52,  54,  56,  58,  60,  62,
++			64,  66,  68,  70,  72,  74,  76,  78,
++			80,  82,  84,  86,  88,  90,  92,  94,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 10. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   4,   4,   6,   6,   8,
++			8,   10,  10,  12,  12,  14,  14,  16,
++			16,  18,  18,  20,  20,  22,  22,  24,
++			24,  26,  26,  28,  28,  30,  30,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,
++		},
++		{	/* Fourth byte table 11. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   4,   4,   6,   6,   8,
++			8,   10,  10,  12,  12,  14,  14,  16,
++			16,  18,  18,  20,  20,  22,  22,  24,
++			24,  26,  26,  28,  28,  30,  30,  32,
++			32,  34,  34,  36,  36,  38,  38,  40,
++			40,  42,  42,  44,  44,  46,  46,  48,
++			48,  50,  50,  52,  52,  54,  54,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,
++		},
++		{	/* Fourth byte table 12. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   4,   6,   6,   8,   8,
++			10,  10,  12,  12,  14,  14,  16,  16,
++			16,  18,  18,  20,  20,  22,  22,  24,
++			24,  26,  26,  28,  28,  30,  30,  32,
++			32,  34,  34,  36,  36,  38,  38,  40,
++			40,  42,  42,  44,  44,  46,  46,  48,
++			48,  50,  50,  52,  52,  54,  54,  56,
++			56,  58,  58,  60,  60,  62,  62,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++		{	/* Fourth byte table 13. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   4,   4,   6,   6,   8,
++			8,   10,  10,  12,  12,  14,  14,  16,
++			16,  18,  18,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  22,  24,  26,  28,  30,  32,
++			34,  36,  38,  40,  42,  44,  46,  48,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,
++		},
++		{	/* Fourth byte table 14. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,  46,  46,  46,  46,  46,  46,  46,
++			46,
++		},
++		{	/* Fourth byte table 15. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 16. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,
++		},
++		{	/* Fourth byte table 17. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   6,   6,   9,   9,   12,
++			12,  15,  15,  18,  18,  21,  21,  24,
++			24,  27,  27,  30,  30,  33,  33,  36,
++			36,  39,  39,  42,  42,  45,  45,  48,
++			48,  51,  51,  54,  54,  57,  57,  60,
++			60,  63,  63,  66,  66,  69,  69,  72,
++			72,  75,  75,  78,  78,  81,  81,  84,
++			84,  87,  87,  90,  90,  93,  93,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 18. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   6,   6,   9,   9,   12,
++			12,  15,  15,  18,  18,  21,  21,  24,
++			24,  27,  27,  30,  30,  33,  33,  36,
++			36,  39,  39,  42,  42,  45,  45,  48,
++			48,  51,  51,  54,  54,  57,  57,  60,
++			60,  63,  63,  66,  66,  69,  69,  72,
++			72,  75,  75,  78,  78,  81,  81,  84,
++			84,  87,  87,  90,  90,  93,  93,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 19. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   6,   6,   9,   9,   12,
++			12,  15,  15,  18,  18,  21,  21,  24,
++			24,  27,  27,  30,  30,  33,  33,  33,
++			33,  33,  33,  33,  33,  33,  33,  33,
++			33,  36,  36,  39,  39,  42,  42,  45,
++			45,  48,  48,  51,  51,  54,  54,  57,
++			57,  60,  60,  63,  63,  66,  66,  69,
++			69,  72,  72,  75,  75,  78,  78,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  81,
++			81,
++		},
++		{	/* Fourth byte table 20. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   6,   6,   9,   9,   12,
++			12,  15,  15,  18,  18,  21,  21,  24,
++			24,  27,  27,  30,  30,  33,  33,  36,
++			36,  39,  39,  42,  42,  45,  45,  48,
++			48,  51,  51,  54,  54,  57,  57,  60,
++			60,  63,  63,  66,  66,  69,  69,  72,
++			72,  75,  75,  78,  78,  81,  81,  84,
++			84,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,
++		},
++		{	/* Fourth byte table 21. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  27,  30,  33,  36,  39,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  45,  48,  51,  54,  57,  60,  63,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  69,  72,  75,  78,  81,  84,  87,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,
++		},
++		{	/* Fourth byte table 22. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  21,  21,  24,  24,  27,  27,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  33,  36,  39,  42,  45,  48,  51,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,
++		},
++		{	/* Fourth byte table 23. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  75,  78,  81,  84,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,
++		},
++		{	/* Fourth byte table 24. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  15,  15,
++			15,  15,  15,  15,  15,  15,  15,  15,
++			15,  18,  21,  24,  27,  27,  27,  27,
++			27,  27,  27,  27,  27,  27,  27,  27,
++			27,  30,  33,  36,  39,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  45,  48,  51,  54,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,  57,  57,  57,  57,  57,  57,  57,
++			57,
++		},
++		{	/* Fourth byte table 25. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   2,
++			2,   2,   2,   3,   5,   5,   5,   5,
++			5,   5,   5,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,
++		},
++		{	/* Fourth byte table 26. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,
++		},
++		{	/* Fourth byte table 27. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,
++		},
++		{	/* Fourth byte table 28. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   3,
++			6,   9,   12,  15,  18,  21,  24,  27,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,
++		},
++		{	/* Fourth byte table 29. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,
++		},
++		{	/* Fourth byte table 30. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  99,  102, 105, 108, 111, 114, 117,
++			120, 123, 126, 129, 132, 135, 138, 141,
++			141, 141, 141, 141, 141, 141, 141, 141,
++			141, 141, 141, 141, 141, 141, 141, 141,
++			141, 141, 141, 141, 141, 141, 141, 141,
++			141, 141, 141, 141, 141, 141, 141, 141,
++			141, 141, 141, 141, 141, 141, 141, 141,
++			141, 141, 141, 141, 141, 141, 141, 141,
++			141, 141, 141, 141, 141, 141, 141, 141,
++			141, 141, 141, 141, 141, 141, 141, 141,
++			141, 141, 141, 141, 141, 141, 141, 141,
++			141, 141, 141, 141, 141, 141, 141, 141,
++			141,
++		},
++		{	/* Fourth byte table 31. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   5,   8,   10,  10,  10,
++			13,  13,  16,  16,  19,  19,  19,  19,
++			19,  19,  19,  19,  19,  19,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,  22,  22,  22,  22,  22,  22,  22,
++			22,
++		},
++		{	/* Fourth byte table 32. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   6,   6,   9,   9,   12,
++			12,  15,  15,  18,  18,  21,  21,  24,
++			24,  27,  27,  30,  30,  33,  33,  36,
++			36,  39,  39,  42,  42,  45,  45,  48,
++			48,  51,  51,  54,  54,  57,  57,  60,
++			60,  63,  63,  66,  66,  69,  69,  72,
++			72,  75,  75,  78,  78,  81,  81,  84,
++			84,  87,  87,  90,  90,  93,  93,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 33. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   3,   6,   6,   9,   9,   12,
++			12,  15,  15,  18,  18,  21,  21,  24,
++			24,  27,  27,  30,  30,  33,  33,  36,
++			36,  39,  39,  42,  42,  45,  45,  48,
++			48,  51,  51,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,
++		},
++		{	/* Fourth byte table 34. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   6,   9,   12,  15,  18,
++			21,  24,  27,  30,  33,  36,  39,  42,
++			45,  48,  51,  54,  57,  60,  63,  66,
++			69,  72,  75,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,
++		},
++		{	/* Fourth byte table 35. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  60,
++			64,  68,  72,  76,  80,  84,  88,  92,
++			96,  100, 104, 108, 112, 116, 120, 124,
++			128, 132, 136, 140, 144, 148, 152, 156,
++			160, 160, 160, 160, 160, 160, 160, 160,
++			160, 160, 160, 160, 160, 160, 160, 160,
++			160, 160, 160, 160, 160, 160, 160, 160,
++			160, 160, 160, 160, 160, 160, 160, 160,
++			160, 160, 160, 160, 160, 160, 160, 160,
++			160, 160, 160, 160, 160, 160, 160, 160,
++			160, 160, 160, 160, 160, 160, 160, 160,
++			160, 160, 160, 160, 160, 160, 160, 160,
++			160, 160, 160, 160, 160, 160, 160, 160,
++			160, 160, 160, 160, 160, 160, 160, 160,
++			160, 160, 160, 160, 160, 160, 160, 160,
++			160,
++		},
++	},
++};
++
++static const uchar_t u8_tolower_final_tbl[2][2299] = {
++	{
++		0xC3, 0xA0, 0xC3, 0xA1, 0xC3, 0xA2, 0xC3, 0xA3,
++		0xC3, 0xA4, 0xC3, 0xA5, 0xC3, 0xA6, 0xC3, 0xA7,
++		0xC3, 0xA8, 0xC3, 0xA9, 0xC3, 0xAA, 0xC3, 0xAB,
++		0xC3, 0xAC, 0xC3, 0xAD, 0xC3, 0xAE, 0xC3, 0xAF,
++		0xC3, 0xB0, 0xC3, 0xB1, 0xC3, 0xB2, 0xC3, 0xB3,
++		0xC3, 0xB4, 0xC3, 0xB5, 0xC3, 0xB6, 0xC3, 0xB8,
++		0xC3, 0xB9, 0xC3, 0xBA, 0xC3, 0xBB, 0xC3, 0xBC,
++		0xC3, 0xBD, 0xC3, 0xBE, 0xC4, 0x81, 0xC4, 0x83,
++		0xC4, 0x85, 0xC4, 0x87, 0xC4, 0x89, 0xC4, 0x8B,
++		0xC4, 0x8D, 0xC4, 0x8F, 0xC4, 0x91, 0xC4, 0x93,
++		0xC4, 0x95, 0xC4, 0x97, 0xC4, 0x99, 0xC4, 0x9B,
++		0xC4, 0x9D, 0xC4, 0x9F, 0xC4, 0xA1, 0xC4, 0xA3,
++		0xC4, 0xA5, 0xC4, 0xA7, 0xC4, 0xA9, 0xC4, 0xAB,
++		0xC4, 0xAD, 0xC4, 0xAF, 0x69, 0xC4, 0xB3, 0xC4,
++		0xB5, 0xC4, 0xB7, 0xC4, 0xBA, 0xC4, 0xBC, 0xC4,
++		0xBE, 0xC5, 0x80, 0xC5, 0x82, 0xC5, 0x84, 0xC5,
++		0x86, 0xC5, 0x88, 0xC5, 0x8B, 0xC5, 0x8D, 0xC5,
++		0x8F, 0xC5, 0x91, 0xC5, 0x93, 0xC5, 0x95, 0xC5,
++		0x97, 0xC5, 0x99, 0xC5, 0x9B, 0xC5, 0x9D, 0xC5,
++		0x9F, 0xC5, 0xA1, 0xC5, 0xA3, 0xC5, 0xA5, 0xC5,
++		0xA7, 0xC5, 0xA9, 0xC5, 0xAB, 0xC5, 0xAD, 0xC5,
++		0xAF, 0xC5, 0xB1, 0xC5, 0xB3, 0xC5, 0xB5, 0xC5,
++		0xB7, 0xC3, 0xBF, 0xC5, 0xBA, 0xC5, 0xBC, 0xC5,
++		0xBE, 0xC9, 0x93, 0xC6, 0x83, 0xC6, 0x85, 0xC9,
++		0x94, 0xC6, 0x88, 0xC9, 0x96, 0xC9, 0x97, 0xC6,
++		0x8C, 0xC7, 0x9D, 0xC9, 0x99, 0xC9, 0x9B, 0xC6,
++		0x92, 0xC9, 0xA0, 0xC9, 0xA3, 0xC9, 0xA9, 0xC9,
++		0xA8, 0xC6, 0x99, 0xC9, 0xAF, 0xC9, 0xB2, 0xC9,
++		0xB5, 0xC6, 0xA1, 0xC6, 0xA3, 0xC6, 0xA5, 0xCA,
++		0x80, 0xC6, 0xA8, 0xCA, 0x83, 0xC6, 0xAD, 0xCA,
++		0x88, 0xC6, 0xB0, 0xCA, 0x8A, 0xCA, 0x8B, 0xC6,
++		0xB4, 0xC6, 0xB6, 0xCA, 0x92, 0xC6, 0xB9, 0xC6,
++		0xBD, 0xC7, 0x86, 0xC7, 0x86, 0xC7, 0x89, 0xC7,
++		0x89, 0xC7, 0x8C, 0xC7, 0x8C, 0xC7, 0x8E, 0xC7,
++		0x90, 0xC7, 0x92, 0xC7, 0x94, 0xC7, 0x96, 0xC7,
++		0x98, 0xC7, 0x9A, 0xC7, 0x9C, 0xC7, 0x9F, 0xC7,
++		0xA1, 0xC7, 0xA3, 0xC7, 0xA5, 0xC7, 0xA7, 0xC7,
++		0xA9, 0xC7, 0xAB, 0xC7, 0xAD, 0xC7, 0xAF, 0xC7,
++		0xB3, 0xC7, 0xB3, 0xC7, 0xB5, 0xC6, 0x95, 0xC6,
++		0xBF, 0xC7, 0xB9, 0xC7, 0xBB, 0xC7, 0xBD, 0xC7,
++		0xBF, 0xC8, 0x81, 0xC8, 0x83, 0xC8, 0x85, 0xC8,
++		0x87, 0xC8, 0x89, 0xC8, 0x8B, 0xC8, 0x8D, 0xC8,
++		0x8F, 0xC8, 0x91, 0xC8, 0x93, 0xC8, 0x95, 0xC8,
++		0x97, 0xC8, 0x99, 0xC8, 0x9B, 0xC8, 0x9D, 0xC8,
++		0x9F, 0xC6, 0x9E, 0xC8, 0xA3, 0xC8, 0xA5, 0xC8,
++		0xA7, 0xC8, 0xA9, 0xC8, 0xAB, 0xC8, 0xAD, 0xC8,
++		0xAF, 0xC8, 0xB1, 0xC8, 0xB3, 0xCE, 0xAC, 0xCE,
++		0xAD, 0xCE, 0xAE, 0xCE, 0xAF, 0xCF, 0x8C, 0xCF,
++		0x8D, 0xCF, 0x8E, 0xCE, 0xB1, 0xCE, 0xB2, 0xCE,
++		0xB3, 0xCE, 0xB4, 0xCE, 0xB5, 0xCE, 0xB6, 0xCE,
++		0xB7, 0xCE, 0xB8, 0xCE, 0xB9, 0xCE, 0xBA, 0xCE,
++		0xBB, 0xCE, 0xBC, 0xCE, 0xBD, 0xCE, 0xBE, 0xCE,
++		0xBF, 0xCF, 0x80, 0xCF, 0x81, 0xCF, 0x83, 0xCF,
++		0x84, 0xCF, 0x85, 0xCF, 0x86, 0xCF, 0x87, 0xCF,
++		0x88, 0xCF, 0x89, 0xCF, 0x8A, 0xCF, 0x8B, 0xCF,
++		0x99, 0xCF, 0x9B, 0xCF, 0x9D, 0xCF, 0x9F, 0xCF,
++		0xA1, 0xCF, 0xA3, 0xCF, 0xA5, 0xCF, 0xA7, 0xCF,
++		0xA9, 0xCF, 0xAB, 0xCF, 0xAD, 0xCF, 0xAF, 0xCE,
++		0xB8, 0xD1, 0x90, 0xD1, 0x91, 0xD1, 0x92, 0xD1,
++		0x93, 0xD1, 0x94, 0xD1, 0x95, 0xD1, 0x96, 0xD1,
++		0x97, 0xD1, 0x98, 0xD1, 0x99, 0xD1, 0x9A, 0xD1,
++		0x9B, 0xD1, 0x9C, 0xD1, 0x9D, 0xD1, 0x9E, 0xD1,
++		0x9F, 0xD0, 0xB0, 0xD0, 0xB1, 0xD0, 0xB2, 0xD0,
++		0xB3, 0xD0, 0xB4, 0xD0, 0xB5, 0xD0, 0xB6, 0xD0,
++		0xB7, 0xD0, 0xB8, 0xD0, 0xB9, 0xD0, 0xBA, 0xD0,
++		0xBB, 0xD0, 0xBC, 0xD0, 0xBD, 0xD0, 0xBE, 0xD0,
++		0xBF, 0xD1, 0x80, 0xD1, 0x81, 0xD1, 0x82, 0xD1,
++		0x83, 0xD1, 0x84, 0xD1, 0x85, 0xD1, 0x86, 0xD1,
++		0x87, 0xD1, 0x88, 0xD1, 0x89, 0xD1, 0x8A, 0xD1,
++		0x8B, 0xD1, 0x8C, 0xD1, 0x8D, 0xD1, 0x8E, 0xD1,
++		0x8F, 0xD1, 0xA1, 0xD1, 0xA3, 0xD1, 0xA5, 0xD1,
++		0xA7, 0xD1, 0xA9, 0xD1, 0xAB, 0xD1, 0xAD, 0xD1,
++		0xAF, 0xD1, 0xB1, 0xD1, 0xB3, 0xD1, 0xB5, 0xD1,
++		0xB7, 0xD1, 0xB9, 0xD1, 0xBB, 0xD1, 0xBD, 0xD1,
++		0xBF, 0xD2, 0x81, 0xD2, 0x8B, 0xD2, 0x8D, 0xD2,
++		0x8F, 0xD2, 0x91, 0xD2, 0x93, 0xD2, 0x95, 0xD2,
++		0x97, 0xD2, 0x99, 0xD2, 0x9B, 0xD2, 0x9D, 0xD2,
++		0x9F, 0xD2, 0xA1, 0xD2, 0xA3, 0xD2, 0xA5, 0xD2,
++		0xA7, 0xD2, 0xA9, 0xD2, 0xAB, 0xD2, 0xAD, 0xD2,
++		0xAF, 0xD2, 0xB1, 0xD2, 0xB3, 0xD2, 0xB5, 0xD2,
++		0xB7, 0xD2, 0xB9, 0xD2, 0xBB, 0xD2, 0xBD, 0xD2,
++		0xBF, 0xD3, 0x82, 0xD3, 0x84, 0xD3, 0x86, 0xD3,
++		0x88, 0xD3, 0x8A, 0xD3, 0x8C, 0xD3, 0x8E, 0xD3,
++		0x91, 0xD3, 0x93, 0xD3, 0x95, 0xD3, 0x97, 0xD3,
++		0x99, 0xD3, 0x9B, 0xD3, 0x9D, 0xD3, 0x9F, 0xD3,
++		0xA1, 0xD3, 0xA3, 0xD3, 0xA5, 0xD3, 0xA7, 0xD3,
++		0xA9, 0xD3, 0xAB, 0xD3, 0xAD, 0xD3, 0xAF, 0xD3,
++		0xB1, 0xD3, 0xB3, 0xD3, 0xB5, 0xD3, 0xB9, 0xD4,
++		0x81, 0xD4, 0x83, 0xD4, 0x85, 0xD4, 0x87, 0xD4,
++		0x89, 0xD4, 0x8B, 0xD4, 0x8D, 0xD4, 0x8F, 0xD5,
++		0xA1, 0xD5, 0xA2, 0xD5, 0xA3, 0xD5, 0xA4, 0xD5,
++		0xA5, 0xD5, 0xA6, 0xD5, 0xA7, 0xD5, 0xA8, 0xD5,
++		0xA9, 0xD5, 0xAA, 0xD5, 0xAB, 0xD5, 0xAC, 0xD5,
++		0xAD, 0xD5, 0xAE, 0xD5, 0xAF, 0xD5, 0xB0, 0xD5,
++		0xB1, 0xD5, 0xB2, 0xD5, 0xB3, 0xD5, 0xB4, 0xD5,
++		0xB5, 0xD5, 0xB6, 0xD5, 0xB7, 0xD5, 0xB8, 0xD5,
++		0xB9, 0xD5, 0xBA, 0xD5, 0xBB, 0xD5, 0xBC, 0xD5,
++		0xBD, 0xD5, 0xBE, 0xD5, 0xBF, 0xD6, 0x80, 0xD6,
++		0x81, 0xD6, 0x82, 0xD6, 0x83, 0xD6, 0x84, 0xD6,
++		0x85, 0xD6, 0x86, 0xE1, 0xB8, 0x81, 0xE1, 0xB8,
++		0x83, 0xE1, 0xB8, 0x85, 0xE1, 0xB8, 0x87, 0xE1,
++		0xB8, 0x89, 0xE1, 0xB8, 0x8B, 0xE1, 0xB8, 0x8D,
++		0xE1, 0xB8, 0x8F, 0xE1, 0xB8, 0x91, 0xE1, 0xB8,
++		0x93, 0xE1, 0xB8, 0x95, 0xE1, 0xB8, 0x97, 0xE1,
++		0xB8, 0x99, 0xE1, 0xB8, 0x9B, 0xE1, 0xB8, 0x9D,
++		0xE1, 0xB8, 0x9F, 0xE1, 0xB8, 0xA1, 0xE1, 0xB8,
++		0xA3, 0xE1, 0xB8, 0xA5, 0xE1, 0xB8, 0xA7, 0xE1,
++		0xB8, 0xA9, 0xE1, 0xB8, 0xAB, 0xE1, 0xB8, 0xAD,
++		0xE1, 0xB8, 0xAF, 0xE1, 0xB8, 0xB1, 0xE1, 0xB8,
++		0xB3, 0xE1, 0xB8, 0xB5, 0xE1, 0xB8, 0xB7, 0xE1,
++		0xB8, 0xB9, 0xE1, 0xB8, 0xBB, 0xE1, 0xB8, 0xBD,
++		0xE1, 0xB8, 0xBF, 0xE1, 0xB9, 0x81, 0xE1, 0xB9,
++		0x83, 0xE1, 0xB9, 0x85, 0xE1, 0xB9, 0x87, 0xE1,
++		0xB9, 0x89, 0xE1, 0xB9, 0x8B, 0xE1, 0xB9, 0x8D,
++		0xE1, 0xB9, 0x8F, 0xE1, 0xB9, 0x91, 0xE1, 0xB9,
++		0x93, 0xE1, 0xB9, 0x95, 0xE1, 0xB9, 0x97, 0xE1,
++		0xB9, 0x99, 0xE1, 0xB9, 0x9B, 0xE1, 0xB9, 0x9D,
++		0xE1, 0xB9, 0x9F, 0xE1, 0xB9, 0xA1, 0xE1, 0xB9,
++		0xA3, 0xE1, 0xB9, 0xA5, 0xE1, 0xB9, 0xA7, 0xE1,
++		0xB9, 0xA9, 0xE1, 0xB9, 0xAB, 0xE1, 0xB9, 0xAD,
++		0xE1, 0xB9, 0xAF, 0xE1, 0xB9, 0xB1, 0xE1, 0xB9,
++		0xB3, 0xE1, 0xB9, 0xB5, 0xE1, 0xB9, 0xB7, 0xE1,
++		0xB9, 0xB9, 0xE1, 0xB9, 0xBB, 0xE1, 0xB9, 0xBD,
++		0xE1, 0xB9, 0xBF, 0xE1, 0xBA, 0x81, 0xE1, 0xBA,
++		0x83, 0xE1, 0xBA, 0x85, 0xE1, 0xBA, 0x87, 0xE1,
++		0xBA, 0x89, 0xE1, 0xBA, 0x8B, 0xE1, 0xBA, 0x8D,
++		0xE1, 0xBA, 0x8F, 0xE1, 0xBA, 0x91, 0xE1, 0xBA,
++		0x93, 0xE1, 0xBA, 0x95, 0xE1, 0xBA, 0xA1, 0xE1,
++		0xBA, 0xA3, 0xE1, 0xBA, 0xA5, 0xE1, 0xBA, 0xA7,
++		0xE1, 0xBA, 0xA9, 0xE1, 0xBA, 0xAB, 0xE1, 0xBA,
++		0xAD, 0xE1, 0xBA, 0xAF, 0xE1, 0xBA, 0xB1, 0xE1,
++		0xBA, 0xB3, 0xE1, 0xBA, 0xB5, 0xE1, 0xBA, 0xB7,
++		0xE1, 0xBA, 0xB9, 0xE1, 0xBA, 0xBB, 0xE1, 0xBA,
++		0xBD, 0xE1, 0xBA, 0xBF, 0xE1, 0xBB, 0x81, 0xE1,
++		0xBB, 0x83, 0xE1, 0xBB, 0x85, 0xE1, 0xBB, 0x87,
++		0xE1, 0xBB, 0x89, 0xE1, 0xBB, 0x8B, 0xE1, 0xBB,
++		0x8D, 0xE1, 0xBB, 0x8F, 0xE1, 0xBB, 0x91, 0xE1,
++		0xBB, 0x93, 0xE1, 0xBB, 0x95, 0xE1, 0xBB, 0x97,
++		0xE1, 0xBB, 0x99, 0xE1, 0xBB, 0x9B, 0xE1, 0xBB,
++		0x9D, 0xE1, 0xBB, 0x9F, 0xE1, 0xBB, 0xA1, 0xE1,
++		0xBB, 0xA3, 0xE1, 0xBB, 0xA5, 0xE1, 0xBB, 0xA7,
++		0xE1, 0xBB, 0xA9, 0xE1, 0xBB, 0xAB, 0xE1, 0xBB,
++		0xAD, 0xE1, 0xBB, 0xAF, 0xE1, 0xBB, 0xB1, 0xE1,
++		0xBB, 0xB3, 0xE1, 0xBB, 0xB5, 0xE1, 0xBB, 0xB7,
++		0xE1, 0xBB, 0xB9, 0xE1, 0xBC, 0x80, 0xE1, 0xBC,
++		0x81, 0xE1, 0xBC, 0x82, 0xE1, 0xBC, 0x83, 0xE1,
++		0xBC, 0x84, 0xE1, 0xBC, 0x85, 0xE1, 0xBC, 0x86,
++		0xE1, 0xBC, 0x87, 0xE1, 0xBC, 0x90, 0xE1, 0xBC,
++		0x91, 0xE1, 0xBC, 0x92, 0xE1, 0xBC, 0x93, 0xE1,
++		0xBC, 0x94, 0xE1, 0xBC, 0x95, 0xE1, 0xBC, 0xA0,
++		0xE1, 0xBC, 0xA1, 0xE1, 0xBC, 0xA2, 0xE1, 0xBC,
++		0xA3, 0xE1, 0xBC, 0xA4, 0xE1, 0xBC, 0xA5, 0xE1,
++		0xBC, 0xA6, 0xE1, 0xBC, 0xA7, 0xE1, 0xBC, 0xB0,
++		0xE1, 0xBC, 0xB1, 0xE1, 0xBC, 0xB2, 0xE1, 0xBC,
++		0xB3, 0xE1, 0xBC, 0xB4, 0xE1, 0xBC, 0xB5, 0xE1,
++		0xBC, 0xB6, 0xE1, 0xBC, 0xB7, 0xE1, 0xBD, 0x80,
++		0xE1, 0xBD, 0x81, 0xE1, 0xBD, 0x82, 0xE1, 0xBD,
++		0x83, 0xE1, 0xBD, 0x84, 0xE1, 0xBD, 0x85, 0xE1,
++		0xBD, 0x91, 0xE1, 0xBD, 0x93, 0xE1, 0xBD, 0x95,
++		0xE1, 0xBD, 0x97, 0xE1, 0xBD, 0xA0, 0xE1, 0xBD,
++		0xA1, 0xE1, 0xBD, 0xA2, 0xE1, 0xBD, 0xA3, 0xE1,
++		0xBD, 0xA4, 0xE1, 0xBD, 0xA5, 0xE1, 0xBD, 0xA6,
++		0xE1, 0xBD, 0xA7, 0xE1, 0xBE, 0x80, 0xE1, 0xBE,
++		0x81, 0xE1, 0xBE, 0x82, 0xE1, 0xBE, 0x83, 0xE1,
++		0xBE, 0x84, 0xE1, 0xBE, 0x85, 0xE1, 0xBE, 0x86,
++		0xE1, 0xBE, 0x87, 0xE1, 0xBE, 0x90, 0xE1, 0xBE,
++		0x91, 0xE1, 0xBE, 0x92, 0xE1, 0xBE, 0x93, 0xE1,
++		0xBE, 0x94, 0xE1, 0xBE, 0x95, 0xE1, 0xBE, 0x96,
++		0xE1, 0xBE, 0x97, 0xE1, 0xBE, 0xA0, 0xE1, 0xBE,
++		0xA1, 0xE1, 0xBE, 0xA2, 0xE1, 0xBE, 0xA3, 0xE1,
++		0xBE, 0xA4, 0xE1, 0xBE, 0xA5, 0xE1, 0xBE, 0xA6,
++		0xE1, 0xBE, 0xA7, 0xE1, 0xBE, 0xB0, 0xE1, 0xBE,
++		0xB1, 0xE1, 0xBD, 0xB0, 0xE1, 0xBD, 0xB1, 0xE1,
++		0xBE, 0xB3, 0xE1, 0xBD, 0xB2, 0xE1, 0xBD, 0xB3,
++		0xE1, 0xBD, 0xB4, 0xE1, 0xBD, 0xB5, 0xE1, 0xBF,
++		0x83, 0xE1, 0xBF, 0x90, 0xE1, 0xBF, 0x91, 0xE1,
++		0xBD, 0xB6, 0xE1, 0xBD, 0xB7, 0xE1, 0xBF, 0xA0,
++		0xE1, 0xBF, 0xA1, 0xE1, 0xBD, 0xBA, 0xE1, 0xBD,
++		0xBB, 0xE1, 0xBF, 0xA5, 0xE1, 0xBD, 0xB8, 0xE1,
++		0xBD, 0xB9, 0xE1, 0xBD, 0xBC, 0xE1, 0xBD, 0xBD,
++		0xE1, 0xBF, 0xB3, 0xCF, 0x89, 0x6B, 0xC3, 0xA5,
++		0xE2, 0x85, 0xB0, 0xE2, 0x85, 0xB1, 0xE2, 0x85,
++		0xB2, 0xE2, 0x85, 0xB3, 0xE2, 0x85, 0xB4, 0xE2,
++		0x85, 0xB5, 0xE2, 0x85, 0xB6, 0xE2, 0x85, 0xB7,
++		0xE2, 0x85, 0xB8, 0xE2, 0x85, 0xB9, 0xE2, 0x85,
++		0xBA, 0xE2, 0x85, 0xBB, 0xE2, 0x85, 0xBC, 0xE2,
++		0x85, 0xBD, 0xE2, 0x85, 0xBE, 0xE2, 0x85, 0xBF,
++		0xE2, 0x93, 0x90, 0xE2, 0x93, 0x91, 0xE2, 0x93,
++		0x92, 0xE2, 0x93, 0x93, 0xE2, 0x93, 0x94, 0xE2,
++		0x93, 0x95, 0xE2, 0x93, 0x96, 0xE2, 0x93, 0x97,
++		0xE2, 0x93, 0x98, 0xE2, 0x93, 0x99, 0xE2, 0x93,
++		0x9A, 0xE2, 0x93, 0x9B, 0xE2, 0x93, 0x9C, 0xE2,
++		0x93, 0x9D, 0xE2, 0x93, 0x9E, 0xE2, 0x93, 0x9F,
++		0xE2, 0x93, 0xA0, 0xE2, 0x93, 0xA1, 0xE2, 0x93,
++		0xA2, 0xE2, 0x93, 0xA3, 0xE2, 0x93, 0xA4, 0xE2,
++		0x93, 0xA5, 0xE2, 0x93, 0xA6, 0xE2, 0x93, 0xA7,
++		0xE2, 0x93, 0xA8, 0xE2, 0x93, 0xA9, 0xEF, 0xBD,
++		0x81, 0xEF, 0xBD, 0x82, 0xEF, 0xBD, 0x83, 0xEF,
++		0xBD, 0x84, 0xEF, 0xBD, 0x85, 0xEF, 0xBD, 0x86,
++		0xEF, 0xBD, 0x87, 0xEF, 0xBD, 0x88, 0xEF, 0xBD,
++		0x89, 0xEF, 0xBD, 0x8A, 0xEF, 0xBD, 0x8B, 0xEF,
++		0xBD, 0x8C, 0xEF, 0xBD, 0x8D, 0xEF, 0xBD, 0x8E,
++		0xEF, 0xBD, 0x8F, 0xEF, 0xBD, 0x90, 0xEF, 0xBD,
++		0x91, 0xEF, 0xBD, 0x92, 0xEF, 0xBD, 0x93, 0xEF,
++		0xBD, 0x94, 0xEF, 0xBD, 0x95, 0xEF, 0xBD, 0x96,
++		0xEF, 0xBD, 0x97, 0xEF, 0xBD, 0x98, 0xEF, 0xBD,
++		0x99, 0xEF, 0xBD, 0x9A, 0xF0, 0x90, 0x90, 0xA8,
++		0xF0, 0x90, 0x90, 0xA9, 0xF0, 0x90, 0x90, 0xAA,
++		0xF0, 0x90, 0x90, 0xAB, 0xF0, 0x90, 0x90, 0xAC,
++		0xF0, 0x90, 0x90, 0xAD, 0xF0, 0x90, 0x90, 0xAE,
++		0xF0, 0x90, 0x90, 0xAF, 0xF0, 0x90, 0x90, 0xB0,
++		0xF0, 0x90, 0x90, 0xB1, 0xF0, 0x90, 0x90, 0xB2,
++		0xF0, 0x90, 0x90, 0xB3, 0xF0, 0x90, 0x90, 0xB4,
++		0xF0, 0x90, 0x90, 0xB5, 0xF0, 0x90, 0x90, 0xB6,
++		0xF0, 0x90, 0x90, 0xB7, 0xF0, 0x90, 0x90, 0xB8,
++		0xF0, 0x90, 0x90, 0xB9, 0xF0, 0x90, 0x90, 0xBA,
++		0xF0, 0x90, 0x90, 0xBB, 0xF0, 0x90, 0x90, 0xBC,
++		0xF0, 0x90, 0x90, 0xBD, 0xF0, 0x90, 0x90, 0xBE,
++		0xF0, 0x90, 0x90, 0xBF, 0xF0, 0x90, 0x91, 0x80,
++		0xF0, 0x90, 0x91, 0x81, 0xF0, 0x90, 0x91, 0x82,
++		0xF0, 0x90, 0x91, 0x83, 0xF0, 0x90, 0x91, 0x84,
++		0xF0, 0x90, 0x91, 0x85, 0xF0, 0x90, 0x91, 0x86,
++		0xF0, 0x90, 0x91, 0x87, 0xF0, 0x90, 0x91, 0x88,
++		0xF0, 0x90, 0x91, 0x89, 0xF0, 0x90, 0x91, 0x8A,
++		0xF0, 0x90, 0x91, 0x8B, 0xF0, 0x90, 0x91, 0x8C,
++		0xF0, 0x90, 0x91, 0x8D, 0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,
++	},
++	{
++		0xC3, 0xA0, 0xC3, 0xA1, 0xC3, 0xA2, 0xC3, 0xA3,
++		0xC3, 0xA4, 0xC3, 0xA5, 0xC3, 0xA6, 0xC3, 0xA7,
++		0xC3, 0xA8, 0xC3, 0xA9, 0xC3, 0xAA, 0xC3, 0xAB,
++		0xC3, 0xAC, 0xC3, 0xAD, 0xC3, 0xAE, 0xC3, 0xAF,
++		0xC3, 0xB0, 0xC3, 0xB1, 0xC3, 0xB2, 0xC3, 0xB3,
++		0xC3, 0xB4, 0xC3, 0xB5, 0xC3, 0xB6, 0xC3, 0xB8,
++		0xC3, 0xB9, 0xC3, 0xBA, 0xC3, 0xBB, 0xC3, 0xBC,
++		0xC3, 0xBD, 0xC3, 0xBE, 0xC4, 0x81, 0xC4, 0x83,
++		0xC4, 0x85, 0xC4, 0x87, 0xC4, 0x89, 0xC4, 0x8B,
++		0xC4, 0x8D, 0xC4, 0x8F, 0xC4, 0x91, 0xC4, 0x93,
++		0xC4, 0x95, 0xC4, 0x97, 0xC4, 0x99, 0xC4, 0x9B,
++		0xC4, 0x9D, 0xC4, 0x9F, 0xC4, 0xA1, 0xC4, 0xA3,
++		0xC4, 0xA5, 0xC4, 0xA7, 0xC4, 0xA9, 0xC4, 0xAB,
++		0xC4, 0xAD, 0xC4, 0xAF, 0x69, 0xC4, 0xB3, 0xC4,
++		0xB5, 0xC4, 0xB7, 0xC4, 0xBA, 0xC4, 0xBC, 0xC4,
++		0xBE, 0xC5, 0x80, 0xC5, 0x82, 0xC5, 0x84, 0xC5,
++		0x86, 0xC5, 0x88, 0xC5, 0x8B, 0xC5, 0x8D, 0xC5,
++		0x8F, 0xC5, 0x91, 0xC5, 0x93, 0xC5, 0x95, 0xC5,
++		0x97, 0xC5, 0x99, 0xC5, 0x9B, 0xC5, 0x9D, 0xC5,
++		0x9F, 0xC5, 0xA1, 0xC5, 0xA3, 0xC5, 0xA5, 0xC5,
++		0xA7, 0xC5, 0xA9, 0xC5, 0xAB, 0xC5, 0xAD, 0xC5,
++		0xAF, 0xC5, 0xB1, 0xC5, 0xB3, 0xC5, 0xB5, 0xC5,
++		0xB7, 0xC3, 0xBF, 0xC5, 0xBA, 0xC5, 0xBC, 0xC5,
++		0xBE, 0xC9, 0x93, 0xC6, 0x83, 0xC6, 0x85, 0xC9,
++		0x94, 0xC6, 0x88, 0xC9, 0x96, 0xC9, 0x97, 0xC6,
++		0x8C, 0xC7, 0x9D, 0xC9, 0x99, 0xC9, 0x9B, 0xC6,
++		0x92, 0xC9, 0xA0, 0xC9, 0xA3, 0xC9, 0xA9, 0xC9,
++		0xA8, 0xC6, 0x99, 0xC9, 0xAF, 0xC9, 0xB2, 0xC9,
++		0xB5, 0xC6, 0xA1, 0xC6, 0xA3, 0xC6, 0xA5, 0xCA,
++		0x80, 0xC6, 0xA8, 0xCA, 0x83, 0xC6, 0xAD, 0xCA,
++		0x88, 0xC6, 0xB0, 0xCA, 0x8A, 0xCA, 0x8B, 0xC6,
++		0xB4, 0xC6, 0xB6, 0xCA, 0x92, 0xC6, 0xB9, 0xC6,
++		0xBD, 0xC7, 0x86, 0xC7, 0x86, 0xC7, 0x89, 0xC7,
++		0x89, 0xC7, 0x8C, 0xC7, 0x8C, 0xC7, 0x8E, 0xC7,
++		0x90, 0xC7, 0x92, 0xC7, 0x94, 0xC7, 0x96, 0xC7,
++		0x98, 0xC7, 0x9A, 0xC7, 0x9C, 0xC7, 0x9F, 0xC7,
++		0xA1, 0xC7, 0xA3, 0xC7, 0xA5, 0xC7, 0xA7, 0xC7,
++		0xA9, 0xC7, 0xAB, 0xC7, 0xAD, 0xC7, 0xAF, 0xC7,
++		0xB3, 0xC7, 0xB3, 0xC7, 0xB5, 0xC6, 0x95, 0xC6,
++		0xBF, 0xC7, 0xB9, 0xC7, 0xBB, 0xC7, 0xBD, 0xC7,
++		0xBF, 0xC8, 0x81, 0xC8, 0x83, 0xC8, 0x85, 0xC8,
++		0x87, 0xC8, 0x89, 0xC8, 0x8B, 0xC8, 0x8D, 0xC8,
++		0x8F, 0xC8, 0x91, 0xC8, 0x93, 0xC8, 0x95, 0xC8,
++		0x97, 0xC8, 0x99, 0xC8, 0x9B, 0xC8, 0x9D, 0xC8,
++		0x9F, 0xC6, 0x9E, 0xC8, 0xA3, 0xC8, 0xA5, 0xC8,
++		0xA7, 0xC8, 0xA9, 0xC8, 0xAB, 0xC8, 0xAD, 0xC8,
++		0xAF, 0xC8, 0xB1, 0xC8, 0xB3, 0xE2, 0xB1, 0xA5,
++		0xC8, 0xBC, 0xC6, 0x9A, 0xE2, 0xB1, 0xA6, 0xC9,
++		0x82, 0xC6, 0x80, 0xCA, 0x89, 0xCA, 0x8C, 0xC9,
++		0x87, 0xC9, 0x89, 0xC9, 0x8B, 0xC9, 0x8D, 0xC9,
++		0x8F, 0xCE, 0xAC, 0xCE, 0xAD, 0xCE, 0xAE, 0xCE,
++		0xAF, 0xCF, 0x8C, 0xCF, 0x8D, 0xCF, 0x8E, 0xCE,
++		0xB1, 0xCE, 0xB2, 0xCE, 0xB3, 0xCE, 0xB4, 0xCE,
++		0xB5, 0xCE, 0xB6, 0xCE, 0xB7, 0xCE, 0xB8, 0xCE,
++		0xB9, 0xCE, 0xBA, 0xCE, 0xBB, 0xCE, 0xBC, 0xCE,
++		0xBD, 0xCE, 0xBE, 0xCE, 0xBF, 0xCF, 0x80, 0xCF,
++		0x81, 0xCF, 0x83, 0xCF, 0x84, 0xCF, 0x85, 0xCF,
++		0x86, 0xCF, 0x87, 0xCF, 0x88, 0xCF, 0x89, 0xCF,
++		0x8A, 0xCF, 0x8B, 0xCF, 0x99, 0xCF, 0x9B, 0xCF,
++		0x9D, 0xCF, 0x9F, 0xCF, 0xA1, 0xCF, 0xA3, 0xCF,
++		0xA5, 0xCF, 0xA7, 0xCF, 0xA9, 0xCF, 0xAB, 0xCF,
++		0xAD, 0xCF, 0xAF, 0xCE, 0xB8, 0xCF, 0xB8, 0xCF,
++		0xB2, 0xCF, 0xBB, 0xCD, 0xBB, 0xCD, 0xBC, 0xCD,
++		0xBD, 0xD1, 0x90, 0xD1, 0x91, 0xD1, 0x92, 0xD1,
++		0x93, 0xD1, 0x94, 0xD1, 0x95, 0xD1, 0x96, 0xD1,
++		0x97, 0xD1, 0x98, 0xD1, 0x99, 0xD1, 0x9A, 0xD1,
++		0x9B, 0xD1, 0x9C, 0xD1, 0x9D, 0xD1, 0x9E, 0xD1,
++		0x9F, 0xD0, 0xB0, 0xD0, 0xB1, 0xD0, 0xB2, 0xD0,
++		0xB3, 0xD0, 0xB4, 0xD0, 0xB5, 0xD0, 0xB6, 0xD0,
++		0xB7, 0xD0, 0xB8, 0xD0, 0xB9, 0xD0, 0xBA, 0xD0,
++		0xBB, 0xD0, 0xBC, 0xD0, 0xBD, 0xD0, 0xBE, 0xD0,
++		0xBF, 0xD1, 0x80, 0xD1, 0x81, 0xD1, 0x82, 0xD1,
++		0x83, 0xD1, 0x84, 0xD1, 0x85, 0xD1, 0x86, 0xD1,
++		0x87, 0xD1, 0x88, 0xD1, 0x89, 0xD1, 0x8A, 0xD1,
++		0x8B, 0xD1, 0x8C, 0xD1, 0x8D, 0xD1, 0x8E, 0xD1,
++		0x8F, 0xD1, 0xA1, 0xD1, 0xA3, 0xD1, 0xA5, 0xD1,
++		0xA7, 0xD1, 0xA9, 0xD1, 0xAB, 0xD1, 0xAD, 0xD1,
++		0xAF, 0xD1, 0xB1, 0xD1, 0xB3, 0xD1, 0xB5, 0xD1,
++		0xB7, 0xD1, 0xB9, 0xD1, 0xBB, 0xD1, 0xBD, 0xD1,
++		0xBF, 0xD2, 0x81, 0xD2, 0x8B, 0xD2, 0x8D, 0xD2,
++		0x8F, 0xD2, 0x91, 0xD2, 0x93, 0xD2, 0x95, 0xD2,
++		0x97, 0xD2, 0x99, 0xD2, 0x9B, 0xD2, 0x9D, 0xD2,
++		0x9F, 0xD2, 0xA1, 0xD2, 0xA3, 0xD2, 0xA5, 0xD2,
++		0xA7, 0xD2, 0xA9, 0xD2, 0xAB, 0xD2, 0xAD, 0xD2,
++		0xAF, 0xD2, 0xB1, 0xD2, 0xB3, 0xD2, 0xB5, 0xD2,
++		0xB7, 0xD2, 0xB9, 0xD2, 0xBB, 0xD2, 0xBD, 0xD2,
++		0xBF, 0xD3, 0x8F, 0xD3, 0x82, 0xD3, 0x84, 0xD3,
++		0x86, 0xD3, 0x88, 0xD3, 0x8A, 0xD3, 0x8C, 0xD3,
++		0x8E, 0xD3, 0x91, 0xD3, 0x93, 0xD3, 0x95, 0xD3,
++		0x97, 0xD3, 0x99, 0xD3, 0x9B, 0xD3, 0x9D, 0xD3,
++		0x9F, 0xD3, 0xA1, 0xD3, 0xA3, 0xD3, 0xA5, 0xD3,
++		0xA7, 0xD3, 0xA9, 0xD3, 0xAB, 0xD3, 0xAD, 0xD3,
++		0xAF, 0xD3, 0xB1, 0xD3, 0xB3, 0xD3, 0xB5, 0xD3,
++		0xB7, 0xD3, 0xB9, 0xD3, 0xBB, 0xD3, 0xBD, 0xD3,
++		0xBF, 0xD4, 0x81, 0xD4, 0x83, 0xD4, 0x85, 0xD4,
++		0x87, 0xD4, 0x89, 0xD4, 0x8B, 0xD4, 0x8D, 0xD4,
++		0x8F, 0xD4, 0x91, 0xD4, 0x93, 0xD5, 0xA1, 0xD5,
++		0xA2, 0xD5, 0xA3, 0xD5, 0xA4, 0xD5, 0xA5, 0xD5,
++		0xA6, 0xD5, 0xA7, 0xD5, 0xA8, 0xD5, 0xA9, 0xD5,
++		0xAA, 0xD5, 0xAB, 0xD5, 0xAC, 0xD5, 0xAD, 0xD5,
++		0xAE, 0xD5, 0xAF, 0xD5, 0xB0, 0xD5, 0xB1, 0xD5,
++		0xB2, 0xD5, 0xB3, 0xD5, 0xB4, 0xD5, 0xB5, 0xD5,
++		0xB6, 0xD5, 0xB7, 0xD5, 0xB8, 0xD5, 0xB9, 0xD5,
++		0xBA, 0xD5, 0xBB, 0xD5, 0xBC, 0xD5, 0xBD, 0xD5,
++		0xBE, 0xD5, 0xBF, 0xD6, 0x80, 0xD6, 0x81, 0xD6,
++		0x82, 0xD6, 0x83, 0xD6, 0x84, 0xD6, 0x85, 0xD6,
++		0x86, 0xE2, 0xB4, 0x80, 0xE2, 0xB4, 0x81, 0xE2,
++		0xB4, 0x82, 0xE2, 0xB4, 0x83, 0xE2, 0xB4, 0x84,
++		0xE2, 0xB4, 0x85, 0xE2, 0xB4, 0x86, 0xE2, 0xB4,
++		0x87, 0xE2, 0xB4, 0x88, 0xE2, 0xB4, 0x89, 0xE2,
++		0xB4, 0x8A, 0xE2, 0xB4, 0x8B, 0xE2, 0xB4, 0x8C,
++		0xE2, 0xB4, 0x8D, 0xE2, 0xB4, 0x8E, 0xE2, 0xB4,
++		0x8F, 0xE2, 0xB4, 0x90, 0xE2, 0xB4, 0x91, 0xE2,
++		0xB4, 0x92, 0xE2, 0xB4, 0x93, 0xE2, 0xB4, 0x94,
++		0xE2, 0xB4, 0x95, 0xE2, 0xB4, 0x96, 0xE2, 0xB4,
++		0x97, 0xE2, 0xB4, 0x98, 0xE2, 0xB4, 0x99, 0xE2,
++		0xB4, 0x9A, 0xE2, 0xB4, 0x9B, 0xE2, 0xB4, 0x9C,
++		0xE2, 0xB4, 0x9D, 0xE2, 0xB4, 0x9E, 0xE2, 0xB4,
++		0x9F, 0xE2, 0xB4, 0xA0, 0xE2, 0xB4, 0xA1, 0xE2,
++		0xB4, 0xA2, 0xE2, 0xB4, 0xA3, 0xE2, 0xB4, 0xA4,
++		0xE2, 0xB4, 0xA5, 0xE1, 0xB8, 0x81, 0xE1, 0xB8,
++		0x83, 0xE1, 0xB8, 0x85, 0xE1, 0xB8, 0x87, 0xE1,
++		0xB8, 0x89, 0xE1, 0xB8, 0x8B, 0xE1, 0xB8, 0x8D,
++		0xE1, 0xB8, 0x8F, 0xE1, 0xB8, 0x91, 0xE1, 0xB8,
++		0x93, 0xE1, 0xB8, 0x95, 0xE1, 0xB8, 0x97, 0xE1,
++		0xB8, 0x99, 0xE1, 0xB8, 0x9B, 0xE1, 0xB8, 0x9D,
++		0xE1, 0xB8, 0x9F, 0xE1, 0xB8, 0xA1, 0xE1, 0xB8,
++		0xA3, 0xE1, 0xB8, 0xA5, 0xE1, 0xB8, 0xA7, 0xE1,
++		0xB8, 0xA9, 0xE1, 0xB8, 0xAB, 0xE1, 0xB8, 0xAD,
++		0xE1, 0xB8, 0xAF, 0xE1, 0xB8, 0xB1, 0xE1, 0xB8,
++		0xB3, 0xE1, 0xB8, 0xB5, 0xE1, 0xB8, 0xB7, 0xE1,
++		0xB8, 0xB9, 0xE1, 0xB8, 0xBB, 0xE1, 0xB8, 0xBD,
++		0xE1, 0xB8, 0xBF, 0xE1, 0xB9, 0x81, 0xE1, 0xB9,
++		0x83, 0xE1, 0xB9, 0x85, 0xE1, 0xB9, 0x87, 0xE1,
++		0xB9, 0x89, 0xE1, 0xB9, 0x8B, 0xE1, 0xB9, 0x8D,
++		0xE1, 0xB9, 0x8F, 0xE1, 0xB9, 0x91, 0xE1, 0xB9,
++		0x93, 0xE1, 0xB9, 0x95, 0xE1, 0xB9, 0x97, 0xE1,
++		0xB9, 0x99, 0xE1, 0xB9, 0x9B, 0xE1, 0xB9, 0x9D,
++		0xE1, 0xB9, 0x9F, 0xE1, 0xB9, 0xA1, 0xE1, 0xB9,
++		0xA3, 0xE1, 0xB9, 0xA5, 0xE1, 0xB9, 0xA7, 0xE1,
++		0xB9, 0xA9, 0xE1, 0xB9, 0xAB, 0xE1, 0xB9, 0xAD,
++		0xE1, 0xB9, 0xAF, 0xE1, 0xB9, 0xB1, 0xE1, 0xB9,
++		0xB3, 0xE1, 0xB9, 0xB5, 0xE1, 0xB9, 0xB7, 0xE1,
++		0xB9, 0xB9, 0xE1, 0xB9, 0xBB, 0xE1, 0xB9, 0xBD,
++		0xE1, 0xB9, 0xBF, 0xE1, 0xBA, 0x81, 0xE1, 0xBA,
++		0x83, 0xE1, 0xBA, 0x85, 0xE1, 0xBA, 0x87, 0xE1,
++		0xBA, 0x89, 0xE1, 0xBA, 0x8B, 0xE1, 0xBA, 0x8D,
++		0xE1, 0xBA, 0x8F, 0xE1, 0xBA, 0x91, 0xE1, 0xBA,
++		0x93, 0xE1, 0xBA, 0x95, 0xE1, 0xBA, 0xA1, 0xE1,
++		0xBA, 0xA3, 0xE1, 0xBA, 0xA5, 0xE1, 0xBA, 0xA7,
++		0xE1, 0xBA, 0xA9, 0xE1, 0xBA, 0xAB, 0xE1, 0xBA,
++		0xAD, 0xE1, 0xBA, 0xAF, 0xE1, 0xBA, 0xB1, 0xE1,
++		0xBA, 0xB3, 0xE1, 0xBA, 0xB5, 0xE1, 0xBA, 0xB7,
++		0xE1, 0xBA, 0xB9, 0xE1, 0xBA, 0xBB, 0xE1, 0xBA,
++		0xBD, 0xE1, 0xBA, 0xBF, 0xE1, 0xBB, 0x81, 0xE1,
++		0xBB, 0x83, 0xE1, 0xBB, 0x85, 0xE1, 0xBB, 0x87,
++		0xE1, 0xBB, 0x89, 0xE1, 0xBB, 0x8B, 0xE1, 0xBB,
++		0x8D, 0xE1, 0xBB, 0x8F, 0xE1, 0xBB, 0x91, 0xE1,
++		0xBB, 0x93, 0xE1, 0xBB, 0x95, 0xE1, 0xBB, 0x97,
++		0xE1, 0xBB, 0x99, 0xE1, 0xBB, 0x9B, 0xE1, 0xBB,
++		0x9D, 0xE1, 0xBB, 0x9F, 0xE1, 0xBB, 0xA1, 0xE1,
++		0xBB, 0xA3, 0xE1, 0xBB, 0xA5, 0xE1, 0xBB, 0xA7,
++		0xE1, 0xBB, 0xA9, 0xE1, 0xBB, 0xAB, 0xE1, 0xBB,
++		0xAD, 0xE1, 0xBB, 0xAF, 0xE1, 0xBB, 0xB1, 0xE1,
++		0xBB, 0xB3, 0xE1, 0xBB, 0xB5, 0xE1, 0xBB, 0xB7,
++		0xE1, 0xBB, 0xB9, 0xE1, 0xBC, 0x80, 0xE1, 0xBC,
++		0x81, 0xE1, 0xBC, 0x82, 0xE1, 0xBC, 0x83, 0xE1,
++		0xBC, 0x84, 0xE1, 0xBC, 0x85, 0xE1, 0xBC, 0x86,
++		0xE1, 0xBC, 0x87, 0xE1, 0xBC, 0x90, 0xE1, 0xBC,
++		0x91, 0xE1, 0xBC, 0x92, 0xE1, 0xBC, 0x93, 0xE1,
++		0xBC, 0x94, 0xE1, 0xBC, 0x95, 0xE1, 0xBC, 0xA0,
++		0xE1, 0xBC, 0xA1, 0xE1, 0xBC, 0xA2, 0xE1, 0xBC,
++		0xA3, 0xE1, 0xBC, 0xA4, 0xE1, 0xBC, 0xA5, 0xE1,
++		0xBC, 0xA6, 0xE1, 0xBC, 0xA7, 0xE1, 0xBC, 0xB0,
++		0xE1, 0xBC, 0xB1, 0xE1, 0xBC, 0xB2, 0xE1, 0xBC,
++		0xB3, 0xE1, 0xBC, 0xB4, 0xE1, 0xBC, 0xB5, 0xE1,
++		0xBC, 0xB6, 0xE1, 0xBC, 0xB7, 0xE1, 0xBD, 0x80,
++		0xE1, 0xBD, 0x81, 0xE1, 0xBD, 0x82, 0xE1, 0xBD,
++		0x83, 0xE1, 0xBD, 0x84, 0xE1, 0xBD, 0x85, 0xE1,
++		0xBD, 0x91, 0xE1, 0xBD, 0x93, 0xE1, 0xBD, 0x95,
++		0xE1, 0xBD, 0x97, 0xE1, 0xBD, 0xA0, 0xE1, 0xBD,
++		0xA1, 0xE1, 0xBD, 0xA2, 0xE1, 0xBD, 0xA3, 0xE1,
++		0xBD, 0xA4, 0xE1, 0xBD, 0xA5, 0xE1, 0xBD, 0xA6,
++		0xE1, 0xBD, 0xA7, 0xE1, 0xBE, 0x80, 0xE1, 0xBE,
++		0x81, 0xE1, 0xBE, 0x82, 0xE1, 0xBE, 0x83, 0xE1,
++		0xBE, 0x84, 0xE1, 0xBE, 0x85, 0xE1, 0xBE, 0x86,
++		0xE1, 0xBE, 0x87, 0xE1, 0xBE, 0x90, 0xE1, 0xBE,
++		0x91, 0xE1, 0xBE, 0x92, 0xE1, 0xBE, 0x93, 0xE1,
++		0xBE, 0x94, 0xE1, 0xBE, 0x95, 0xE1, 0xBE, 0x96,
++		0xE1, 0xBE, 0x97, 0xE1, 0xBE, 0xA0, 0xE1, 0xBE,
++		0xA1, 0xE1, 0xBE, 0xA2, 0xE1, 0xBE, 0xA3, 0xE1,
++		0xBE, 0xA4, 0xE1, 0xBE, 0xA5, 0xE1, 0xBE, 0xA6,
++		0xE1, 0xBE, 0xA7, 0xE1, 0xBE, 0xB0, 0xE1, 0xBE,
++		0xB1, 0xE1, 0xBD, 0xB0, 0xE1, 0xBD, 0xB1, 0xE1,
++		0xBE, 0xB3, 0xE1, 0xBD, 0xB2, 0xE1, 0xBD, 0xB3,
++		0xE1, 0xBD, 0xB4, 0xE1, 0xBD, 0xB5, 0xE1, 0xBF,
++		0x83, 0xE1, 0xBF, 0x90, 0xE1, 0xBF, 0x91, 0xE1,
++		0xBD, 0xB6, 0xE1, 0xBD, 0xB7, 0xE1, 0xBF, 0xA0,
++		0xE1, 0xBF, 0xA1, 0xE1, 0xBD, 0xBA, 0xE1, 0xBD,
++		0xBB, 0xE1, 0xBF, 0xA5, 0xE1, 0xBD, 0xB8, 0xE1,
++		0xBD, 0xB9, 0xE1, 0xBD, 0xBC, 0xE1, 0xBD, 0xBD,
++		0xE1, 0xBF, 0xB3, 0xCF, 0x89, 0x6B, 0xC3, 0xA5,
++		0xE2, 0x85, 0x8E, 0xE2, 0x85, 0xB0, 0xE2, 0x85,
++		0xB1, 0xE2, 0x85, 0xB2, 0xE2, 0x85, 0xB3, 0xE2,
++		0x85, 0xB4, 0xE2, 0x85, 0xB5, 0xE2, 0x85, 0xB6,
++		0xE2, 0x85, 0xB7, 0xE2, 0x85, 0xB8, 0xE2, 0x85,
++		0xB9, 0xE2, 0x85, 0xBA, 0xE2, 0x85, 0xBB, 0xE2,
++		0x85, 0xBC, 0xE2, 0x85, 0xBD, 0xE2, 0x85, 0xBE,
++		0xE2, 0x85, 0xBF, 0xE2, 0x86, 0x84, 0xE2, 0x93,
++		0x90, 0xE2, 0x93, 0x91, 0xE2, 0x93, 0x92, 0xE2,
++		0x93, 0x93, 0xE2, 0x93, 0x94, 0xE2, 0x93, 0x95,
++		0xE2, 0x93, 0x96, 0xE2, 0x93, 0x97, 0xE2, 0x93,
++		0x98, 0xE2, 0x93, 0x99, 0xE2, 0x93, 0x9A, 0xE2,
++		0x93, 0x9B, 0xE2, 0x93, 0x9C, 0xE2, 0x93, 0x9D,
++		0xE2, 0x93, 0x9E, 0xE2, 0x93, 0x9F, 0xE2, 0x93,
++		0xA0, 0xE2, 0x93, 0xA1, 0xE2, 0x93, 0xA2, 0xE2,
++		0x93, 0xA3, 0xE2, 0x93, 0xA4, 0xE2, 0x93, 0xA5,
++		0xE2, 0x93, 0xA6, 0xE2, 0x93, 0xA7, 0xE2, 0x93,
++		0xA8, 0xE2, 0x93, 0xA9, 0xE2, 0xB0, 0xB0, 0xE2,
++		0xB0, 0xB1, 0xE2, 0xB0, 0xB2, 0xE2, 0xB0, 0xB3,
++		0xE2, 0xB0, 0xB4, 0xE2, 0xB0, 0xB5, 0xE2, 0xB0,
++		0xB6, 0xE2, 0xB0, 0xB7, 0xE2, 0xB0, 0xB8, 0xE2,
++		0xB0, 0xB9, 0xE2, 0xB0, 0xBA, 0xE2, 0xB0, 0xBB,
++		0xE2, 0xB0, 0xBC, 0xE2, 0xB0, 0xBD, 0xE2, 0xB0,
++		0xBE, 0xE2, 0xB0, 0xBF, 0xE2, 0xB1, 0x80, 0xE2,
++		0xB1, 0x81, 0xE2, 0xB1, 0x82, 0xE2, 0xB1, 0x83,
++		0xE2, 0xB1, 0x84, 0xE2, 0xB1, 0x85, 0xE2, 0xB1,
++		0x86, 0xE2, 0xB1, 0x87, 0xE2, 0xB1, 0x88, 0xE2,
++		0xB1, 0x89, 0xE2, 0xB1, 0x8A, 0xE2, 0xB1, 0x8B,
++		0xE2, 0xB1, 0x8C, 0xE2, 0xB1, 0x8D, 0xE2, 0xB1,
++		0x8E, 0xE2, 0xB1, 0x8F, 0xE2, 0xB1, 0x90, 0xE2,
++		0xB1, 0x91, 0xE2, 0xB1, 0x92, 0xE2, 0xB1, 0x93,
++		0xE2, 0xB1, 0x94, 0xE2, 0xB1, 0x95, 0xE2, 0xB1,
++		0x96, 0xE2, 0xB1, 0x97, 0xE2, 0xB1, 0x98, 0xE2,
++		0xB1, 0x99, 0xE2, 0xB1, 0x9A, 0xE2, 0xB1, 0x9B,
++		0xE2, 0xB1, 0x9C, 0xE2, 0xB1, 0x9D, 0xE2, 0xB1,
++		0x9E, 0xE2, 0xB1, 0xA1, 0xC9, 0xAB, 0xE1, 0xB5,
++		0xBD, 0xC9, 0xBD, 0xE2, 0xB1, 0xA8, 0xE2, 0xB1,
++		0xAA, 0xE2, 0xB1, 0xAC, 0xE2, 0xB1, 0xB6, 0xE2,
++		0xB2, 0x81, 0xE2, 0xB2, 0x83, 0xE2, 0xB2, 0x85,
++		0xE2, 0xB2, 0x87, 0xE2, 0xB2, 0x89, 0xE2, 0xB2,
++		0x8B, 0xE2, 0xB2, 0x8D, 0xE2, 0xB2, 0x8F, 0xE2,
++		0xB2, 0x91, 0xE2, 0xB2, 0x93, 0xE2, 0xB2, 0x95,
++		0xE2, 0xB2, 0x97, 0xE2, 0xB2, 0x99, 0xE2, 0xB2,
++		0x9B, 0xE2, 0xB2, 0x9D, 0xE2, 0xB2, 0x9F, 0xE2,
++		0xB2, 0xA1, 0xE2, 0xB2, 0xA3, 0xE2, 0xB2, 0xA5,
++		0xE2, 0xB2, 0xA7, 0xE2, 0xB2, 0xA9, 0xE2, 0xB2,
++		0xAB, 0xE2, 0xB2, 0xAD, 0xE2, 0xB2, 0xAF, 0xE2,
++		0xB2, 0xB1, 0xE2, 0xB2, 0xB3, 0xE2, 0xB2, 0xB5,
++		0xE2, 0xB2, 0xB7, 0xE2, 0xB2, 0xB9, 0xE2, 0xB2,
++		0xBB, 0xE2, 0xB2, 0xBD, 0xE2, 0xB2, 0xBF, 0xE2,
++		0xB3, 0x81, 0xE2, 0xB3, 0x83, 0xE2, 0xB3, 0x85,
++		0xE2, 0xB3, 0x87, 0xE2, 0xB3, 0x89, 0xE2, 0xB3,
++		0x8B, 0xE2, 0xB3, 0x8D, 0xE2, 0xB3, 0x8F, 0xE2,
++		0xB3, 0x91, 0xE2, 0xB3, 0x93, 0xE2, 0xB3, 0x95,
++		0xE2, 0xB3, 0x97, 0xE2, 0xB3, 0x99, 0xE2, 0xB3,
++		0x9B, 0xE2, 0xB3, 0x9D, 0xE2, 0xB3, 0x9F, 0xE2,
++		0xB3, 0xA1, 0xE2, 0xB3, 0xA3, 0xEF, 0xBD, 0x81,
++		0xEF, 0xBD, 0x82, 0xEF, 0xBD, 0x83, 0xEF, 0xBD,
++		0x84, 0xEF, 0xBD, 0x85, 0xEF, 0xBD, 0x86, 0xEF,
++		0xBD, 0x87, 0xEF, 0xBD, 0x88, 0xEF, 0xBD, 0x89,
++		0xEF, 0xBD, 0x8A, 0xEF, 0xBD, 0x8B, 0xEF, 0xBD,
++		0x8C, 0xEF, 0xBD, 0x8D, 0xEF, 0xBD, 0x8E, 0xEF,
++		0xBD, 0x8F, 0xEF, 0xBD, 0x90, 0xEF, 0xBD, 0x91,
++		0xEF, 0xBD, 0x92, 0xEF, 0xBD, 0x93, 0xEF, 0xBD,
++		0x94, 0xEF, 0xBD, 0x95, 0xEF, 0xBD, 0x96, 0xEF,
++		0xBD, 0x97, 0xEF, 0xBD, 0x98, 0xEF, 0xBD, 0x99,
++		0xEF, 0xBD, 0x9A, 0xF0, 0x90, 0x90, 0xA8, 0xF0,
++		0x90, 0x90, 0xA9, 0xF0, 0x90, 0x90, 0xAA, 0xF0,
++		0x90, 0x90, 0xAB, 0xF0, 0x90, 0x90, 0xAC, 0xF0,
++		0x90, 0x90, 0xAD, 0xF0, 0x90, 0x90, 0xAE, 0xF0,
++		0x90, 0x90, 0xAF, 0xF0, 0x90, 0x90, 0xB0, 0xF0,
++		0x90, 0x90, 0xB1, 0xF0, 0x90, 0x90, 0xB2, 0xF0,
++		0x90, 0x90, 0xB3, 0xF0, 0x90, 0x90, 0xB4, 0xF0,
++		0x90, 0x90, 0xB5, 0xF0, 0x90, 0x90, 0xB6, 0xF0,
++		0x90, 0x90, 0xB7, 0xF0, 0x90, 0x90, 0xB8, 0xF0,
++		0x90, 0x90, 0xB9, 0xF0, 0x90, 0x90, 0xBA, 0xF0,
++		0x90, 0x90, 0xBB, 0xF0, 0x90, 0x90, 0xBC, 0xF0,
++		0x90, 0x90, 0xBD, 0xF0, 0x90, 0x90, 0xBE, 0xF0,
++		0x90, 0x90, 0xBF, 0xF0, 0x90, 0x91, 0x80, 0xF0,
++		0x90, 0x91, 0x81, 0xF0, 0x90, 0x91, 0x82, 0xF0,
++		0x90, 0x91, 0x83, 0xF0, 0x90, 0x91, 0x84, 0xF0,
++		0x90, 0x91, 0x85, 0xF0, 0x90, 0x91, 0x86, 0xF0,
++		0x90, 0x91, 0x87, 0xF0, 0x90, 0x91, 0x88, 0xF0,
++		0x90, 0x91, 0x89, 0xF0, 0x90, 0x91, 0x8A, 0xF0,
++		0x90, 0x91, 0x8B, 0xF0, 0x90, 0x91, 0x8C, 0xF0,
++		0x90, 0x91, 0x8D, 0xF0, 0x90, 0x91, 0x8E, 0xF0,
++		0x90, 0x91, 0x8F,
++	},
++};
++
++static const u8_displacement_t u8_toupper_b3_tbl[2][5][256] = {
++	{
++		{	/* Third byte table 0. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 0, 0 }, { 1, 2 },
++			{ 2, 64 }, { 3, 125 }, { 4, 188 }, { 5, 226 },
++			{ 6, 288 }, { 7, 338 }, { 8, 364 }, { N_, 0 },
++			{ N_, 0 }, { 9, 376 }, { 10, 378 }, { 11, 416 },
++			{ 12, 486 }, { 13, 518 }, { 14, 614 }, { 15, 670 },
++			{ 16, 724 }, { 17, 740 }, { 18, 802 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 1. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 19, 816 }, { 20, 912 }, { 21, 1008 }, { 22, 1092 },
++			{ 23, 1179 }, { 24, 1269 }, { 25, 1365 }, { 26, 1448 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 2. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 27, 1469 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { 28, 1517 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 3. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 29, 1595 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 4. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 30, 1673 }, { 31, 1769 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++	},
++	{
++		{	/* Third byte table 0. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { 0, 0 }, { 1, 2 },
++			{ 2, 64 }, { 3, 125 }, { 4, 188 }, { 5, 230 },
++			{ 6, 292 }, { 7, 344 }, { 8, 388 }, { N_, 0 },
++			{ N_, 0 }, { 9, 404 }, { 10, 412 }, { 11, 450 },
++			{ 12, 524 }, { 13, 556 }, { 14, 652 }, { 15, 708 },
++			{ 16, 772 }, { 17, 792 }, { 18, 854 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 1. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 19, 868 }, { N_, 0 }, { N_, 0 },
++			{ 20, 871 }, { 21, 967 }, { 22, 1063 }, { 23, 1147 },
++			{ 24, 1234 }, { 25, 1324 }, { 26, 1420 }, { 27, 1503 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 2. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 28, 1524 }, { 29, 1575 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { 30, 1578 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 31, 1656 }, { 32, 1704 }, { 33, 1816 }, { 34, 1912 },
++			{ 35, 1966 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 3. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { 36, 2080 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++		{	/* Third byte table 4. */
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ 37, 2158 }, { 38, 2254 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++			{ N_, 0 }, { N_, 0 }, { N_, 0 }, { N_, 0 },
++		},
++	},
++};
++
++static const uchar_t u8_toupper_b4_tbl[2][39][257] = {
++	{
++		{	/* Fourth byte table 0. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,
++		},
++		{	/* Fourth byte table 1. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			46,  48,  50,  52,  54,  56,  58,  60,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,
++		},
++		{	/* Fourth byte table 2. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   2,   4,   4,   6,   6,
++			8,   8,   10,  10,  12,  12,  14,  14,
++			16,  16,  18,  18,  20,  20,  22,  22,
++			24,  24,  26,  26,  28,  28,  30,  30,
++			32,  32,  34,  34,  36,  36,  38,  38,
++			40,  40,  42,  42,  44,  44,  46,  46,
++			48,  48,  49,  49,  51,  51,  53,  53,
++			55,  55,  55,  57,  57,  59,  59,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,
++		},
++		{	/* Fourth byte table 3. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   4,   4,   6,   6,   8,
++			8,   10,  10,  10,  12,  12,  14,  14,
++			16,  16,  18,  18,  20,  20,  22,  22,
++			24,  24,  26,  26,  28,  28,  30,  30,
++			32,  32,  34,  34,  36,  36,  38,  38,
++			40,  40,  42,  42,  44,  44,  46,  46,
++			48,  48,  50,  50,  52,  52,  54,  54,
++			56,  56,  56,  58,  58,  60,  60,  62,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,
++		},
++		{	/* Fourth byte table 4. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   2,   2,   4,   4,
++			4,   6,   6,   6,   6,   8,   8,   8,
++			8,   8,   8,   10,  10,  10,  12,  12,
++			12,  12,  14,  14,  14,  14,  14,  16,
++			16,  16,  18,  18,  20,  20,  22,  22,
++			22,  24,  24,  24,  24,  24,  26,  26,
++			26,  28,  28,  28,  28,  30,  30,  32,
++			32,  32,  34,  34,  34,  34,  36,  36,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,
++		},
++		{	/* Fourth byte table 5. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   2,   4,
++			4,   6,   8,   8,   10,  12,  12,  14,
++			14,  16,  16,  18,  18,  20,  20,  22,
++			22,  24,  24,  26,  26,  28,  30,  30,
++			32,  32,  34,  34,  36,  36,  38,  38,
++			40,  40,  42,  42,  44,  44,  46,  46,
++			48,  48,  48,  50,  52,  52,  54,  54,
++			54,  54,  56,  56,  58,  58,  60,  60,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,
++		},
++		{	/* Fourth byte table 6. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   2,   4,   4,   6,   6,
++			8,   8,   10,  10,  12,  12,  14,  14,
++			16,  16,  18,  18,  20,  20,  22,  22,
++			24,  24,  26,  26,  28,  28,  30,  30,
++			32,  32,  32,  32,  34,  34,  36,  36,
++			38,  38,  40,  40,  42,  42,  44,  44,
++			46,  46,  48,  48,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  50,  50,  50,
++			50,
++		},
++		{	/* Fourth byte table 7. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   2,   4,   4,   6,
++			8,   8,   10,  10,  12,  12,  12,  12,
++			12,  14,  14,  14,  16,  16,  16,  16,
++			16,  18,  20,  20,  20,  20,  20,  20,
++			22,  22,  22,  24,  24,  24,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,  26,  26,  26,  26,  26,  26,  26,
++			26,
++		},
++		{	/* Fourth byte table 8. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   2,   4,   4,   4,   4,
++			4,   6,   6,   8,   10,  10,  10,  10,
++			10,  10,  10,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,  12,  12,  12,  12,  12,  12,  12,
++			12,
++		},
++		{	/* Fourth byte table 9. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,
++		},
++		{	/* Fourth byte table 10. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   2,   4,   6,
++			8,   8,   10,  12,  14,  16,  18,  20,
++			22,  24,  26,  28,  30,  32,  34,  36,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,
++		},
++		{	/* Fourth byte table 11. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			30,  32,  34,  34,  34,  34,  36,  38,
++			38,  38,  40,  40,  42,  42,  44,  44,
++			46,  46,  48,  48,  50,  50,  52,  52,
++			54,  54,  56,  56,  58,  58,  60,  60,
++			62,  64,  66,  68,  68,  68,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,  70,  70,  70,  70,  70,  70,  70,
++			70,
++		},
++		{	/* Fourth byte table 12. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,
++		},
++		{	/* Fourth byte table 13. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			48,  50,  52,  54,  56,  58,  60,  62,
++			64,  64,  66,  66,  68,  68,  70,  70,
++			72,  72,  74,  74,  76,  76,  78,  78,
++			80,  80,  82,  82,  84,  84,  86,  86,
++			88,  88,  90,  90,  92,  92,  94,  94,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 14. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   4,   4,   6,   6,
++			8,   8,   10,  10,  12,  12,  14,  14,
++			16,  16,  18,  18,  20,  20,  22,  22,
++			24,  24,  26,  26,  28,  28,  30,  30,
++			32,  32,  34,  34,  36,  36,  38,  38,
++			40,  40,  42,  42,  44,  44,  46,  46,
++			48,  48,  50,  50,  52,  52,  54,  54,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,
++		},
++		{	/* Fourth byte table 15. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   2,   2,   4,   4,   6,
++			6,   8,   8,   10,  10,  12,  12,  14,
++			14,  14,  16,  16,  18,  18,  20,  20,
++			22,  22,  24,  24,  26,  26,  28,  28,
++			30,  30,  32,  32,  34,  34,  36,  36,
++			38,  38,  40,  40,  42,  42,  44,  44,
++			46,  46,  48,  48,  50,  50,  52,  52,
++			52,  52,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,
++		},
++		{	/* Fourth byte table 16. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   2,   4,   4,   6,   6,
++			8,   8,   10,  10,  12,  12,  14,  14,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,
++		},
++		{	/* Fourth byte table 17. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   4,   6,   8,   10,  12,
++			14,  16,  18,  20,  22,  24,  26,  28,
++			30,  32,  34,  36,  38,  40,  42,  44,
++			46,  48,  50,  52,  54,  56,  58,  60,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,
++		},
++		{	/* Fourth byte table 18. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,
++		},
++		{	/* Fourth byte table 19. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   3,   6,   6,   9,   9,
++			12,  12,  15,  15,  18,  18,  21,  21,
++			24,  24,  27,  27,  30,  30,  33,  33,
++			36,  36,  39,  39,  42,  42,  45,  45,
++			48,  48,  51,  51,  54,  54,  57,  57,
++			60,  60,  63,  63,  66,  66,  69,  69,
++			72,  72,  75,  75,  78,  78,  81,  81,
++			84,  84,  87,  87,  90,  90,  93,  93,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 20. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   3,   6,   6,   9,   9,
++			12,  12,  15,  15,  18,  18,  21,  21,
++			24,  24,  27,  27,  30,  30,  33,  33,
++			36,  36,  39,  39,  42,  42,  45,  45,
++			48,  48,  51,  51,  54,  54,  57,  57,
++			60,  60,  63,  63,  66,  66,  69,  69,
++			72,  72,  75,  75,  78,  78,  81,  81,
++			84,  84,  87,  87,  90,  90,  93,  93,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 21. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   3,   6,   6,   9,   9,
++			12,  12,  15,  15,  18,  18,  21,  21,
++			24,  24,  27,  27,  30,  30,  33,  33,
++			33,  33,  33,  33,  36,  36,  36,  36,
++			36,  36,  39,  39,  42,  42,  45,  45,
++			48,  48,  51,  51,  54,  54,  57,  57,
++			60,  60,  63,  63,  66,  66,  69,  69,
++			72,  72,  75,  75,  78,  78,  81,  81,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,
++		},
++		{	/* Fourth byte table 22. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   3,   6,   6,   9,   9,
++			12,  12,  15,  15,  18,  18,  21,  21,
++			24,  24,  27,  27,  30,  30,  33,  33,
++			36,  36,  39,  39,  42,  42,  45,  45,
++			48,  48,  51,  51,  54,  54,  57,  57,
++			60,  60,  63,  63,  66,  66,  69,  69,
++			72,  72,  75,  75,  78,  78,  81,  81,
++			84,  84,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,
++		},
++		{	/* Fourth byte table 23. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  27,  30,  33,  36,  39,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  45,  48,  51,  54,  57,  60,  63,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  69,  72,  75,  78,  81,  84,  87,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,
++		},
++		{	/* Fourth byte table 24. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  21,  21,  24,  24,  27,  27,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  33,  36,  39,  42,  45,  48,  51,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  57,  60,  63,  66,  69,  72,  75,
++			78,  81,  84,  87,  90,  93,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 25. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  75,  78,  78,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,
++		},
++		{	/* Fourth byte table 26. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   6,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   12,  15,  15,  15,  15,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,
++		},
++		{	/* Fourth byte table 27. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,
++		},
++		{	/* Fourth byte table 28. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,
++		},
++		{	/* Fourth byte table 29. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   6,   9,   12,  15,  18,
++			21,  24,  27,  30,  33,  36,  39,  42,
++			45,  48,  51,  54,  57,  60,  63,  66,
++			69,  72,  75,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,
++		},
++		{	/* Fourth byte table 30. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  60,
++			64,  68,  72,  76,  80,  84,  88,  92,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 31. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,
++		},
++		{	/* Fourth byte table 32. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 33. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 34. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 35. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 36. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 37. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++		{	/* Fourth byte table 38. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,
++		},
++	},
++	{
++		{	/* Fourth byte table 0. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,
++		},
++		{	/* Fourth byte table 1. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			46,  48,  50,  52,  54,  56,  58,  60,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,
++		},
++		{	/* Fourth byte table 2. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   2,   4,   4,   6,   6,
++			8,   8,   10,  10,  12,  12,  14,  14,
++			16,  16,  18,  18,  20,  20,  22,  22,
++			24,  24,  26,  26,  28,  28,  30,  30,
++			32,  32,  34,  34,  36,  36,  38,  38,
++			40,  40,  42,  42,  44,  44,  46,  46,
++			48,  48,  49,  49,  51,  51,  53,  53,
++			55,  55,  55,  57,  57,  59,  59,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,  61,  61,  61,  61,  61,  61,  61,
++			61,
++		},
++		{	/* Fourth byte table 3. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   4,   4,   6,   6,   8,
++			8,   10,  10,  10,  12,  12,  14,  14,
++			16,  16,  18,  18,  20,  20,  22,  22,
++			24,  24,  26,  26,  28,  28,  30,  30,
++			32,  32,  34,  34,  36,  36,  38,  38,
++			40,  40,  42,  42,  44,  44,  46,  46,
++			48,  48,  50,  50,  52,  52,  54,  54,
++			56,  56,  56,  58,  58,  60,  60,  62,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,  63,  63,  63,  63,  63,  63,  63,
++			63,
++		},
++		{	/* Fourth byte table 4. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   2,   4,   4,   6,   6,
++			6,   8,   8,   8,   8,   10,  10,  10,
++			10,  10,  10,  12,  12,  12,  14,  14,
++			14,  14,  16,  18,  18,  18,  18,  20,
++			20,  20,  22,  22,  24,  24,  26,  26,
++			26,  28,  28,  28,  28,  28,  30,  30,
++			30,  32,  32,  32,  32,  34,  34,  36,
++			36,  36,  38,  38,  38,  38,  40,  40,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,
++		},
++		{	/* Fourth byte table 5. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   2,   4,
++			4,   6,   8,   8,   10,  12,  12,  14,
++			14,  16,  16,  18,  18,  20,  20,  22,
++			22,  24,  24,  26,  26,  28,  30,  30,
++			32,  32,  34,  34,  36,  36,  38,  38,
++			40,  40,  42,  42,  44,  44,  46,  46,
++			48,  48,  48,  50,  52,  52,  54,  54,
++			54,  54,  56,  56,  58,  58,  60,  60,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,
++		},
++		{	/* Fourth byte table 6. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   2,   4,   4,   6,   6,
++			8,   8,   10,  10,  12,  12,  14,  14,
++			16,  16,  18,  18,  20,  20,  22,  22,
++			24,  24,  26,  26,  28,  28,  30,  30,
++			32,  32,  32,  32,  34,  34,  36,  36,
++			38,  38,  40,  40,  42,  42,  44,  44,
++			46,  46,  48,  48,  50,  50,  50,  50,
++			50,  50,  50,  50,  50,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,  52,  52,  52,  52,  52,  52,  52,
++			52,
++		},
++		{	/* Fourth byte table 7. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   2,   2,   2,   2,   2,
++			4,   4,   6,   6,   8,   8,   10,  10,
++			12,  12,  12,  12,  14,  16,  16,  18,
++			20,  20,  22,  22,  24,  24,  24,  24,
++			24,  26,  26,  26,  28,  28,  28,  28,
++			28,  30,  32,  32,  35,  35,  35,  35,
++			37,  37,  37,  39,  39,  39,  41,  41,
++			41,  41,  41,  41,  41,  41,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,  44,  44,  44,  44,  44,  44,  44,
++			44,
++		},
++		{	/* Fourth byte table 8. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   2,   2,   4,   4,   4,   4,
++			4,   6,   8,   10,  12,  14,  14,  14,
++			14,  14,  14,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,  16,  16,  16,  16,  16,  16,  16,
++			16,
++		},
++		{	/* Fourth byte table 9. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   4,   6,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,   8,   8,   8,   8,   8,   8,   8,
++			8,
++		},
++		{	/* Fourth byte table 10. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   2,   4,   6,
++			8,   8,   10,  12,  14,  16,  18,  20,
++			22,  24,  26,  28,  30,  32,  34,  36,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,  38,  38,  38,  38,  38,  38,  38,
++			38,
++		},
++		{	/* Fourth byte table 11. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			30,  32,  34,  34,  34,  34,  36,  38,
++			38,  38,  40,  40,  42,  42,  44,  44,
++			46,  46,  48,  48,  50,  50,  52,  52,
++			54,  54,  56,  56,  58,  58,  60,  60,
++			62,  64,  66,  68,  68,  68,  70,  70,
++			70,  72,  72,  72,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,  74,  74,  74,  74,  74,  74,  74,
++			74,
++		},
++		{	/* Fourth byte table 12. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,  32,  32,  32,  32,  32,  32,  32,
++			32,
++		},
++		{	/* Fourth byte table 13. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			16,  18,  20,  22,  24,  26,  28,  30,
++			32,  34,  36,  38,  40,  42,  44,  46,
++			48,  50,  52,  54,  56,  58,  60,  62,
++			64,  64,  66,  66,  68,  68,  70,  70,
++			72,  72,  74,  74,  76,  76,  78,  78,
++			80,  80,  82,  82,  84,  84,  86,  86,
++			88,  88,  90,  90,  92,  92,  94,  94,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 14. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   2,   2,   2,   2,   2,
++			2,   2,   2,   2,   4,   4,   6,   6,
++			8,   8,   10,  10,  12,  12,  14,  14,
++			16,  16,  18,  18,  20,  20,  22,  22,
++			24,  24,  26,  26,  28,  28,  30,  30,
++			32,  32,  34,  34,  36,  36,  38,  38,
++			40,  40,  42,  42,  44,  44,  46,  46,
++			48,  48,  50,  50,  52,  52,  54,  54,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,  56,  56,  56,  56,  56,  56,  56,
++			56,
++		},
++		{	/* Fourth byte table 15. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   2,   2,   4,   4,   6,
++			6,   8,   8,   10,  10,  12,  12,  14,
++			16,  16,  18,  18,  20,  20,  22,  22,
++			24,  24,  26,  26,  28,  28,  30,  30,
++			32,  32,  34,  34,  36,  36,  38,  38,
++			40,  40,  42,  42,  44,  44,  46,  46,
++			48,  48,  50,  50,  52,  52,  54,  54,
++			56,  56,  58,  58,  60,  60,  62,  62,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++		{	/* Fourth byte table 16. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   2,   4,   4,   6,   6,
++			8,   8,   10,  10,  12,  12,  14,  14,
++			16,  16,  18,  18,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,  20,  20,  20,  20,  20,  20,  20,
++			20,
++		},
++		{	/* Fourth byte table 17. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   2,   4,   6,   8,   10,  12,
++			14,  16,  18,  20,  22,  24,  26,  28,
++			30,  32,  34,  36,  38,  40,  42,  44,
++			46,  48,  50,  52,  54,  56,  58,  60,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,  62,  62,  62,  62,  62,  62,  62,
++			62,
++		},
++		{	/* Fourth byte table 18. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   2,   4,   6,   8,   10,  12,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,  14,  14,  14,  14,  14,  14,  14,
++			14,
++		},
++		{	/* Fourth byte table 19. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,
++		},
++		{	/* Fourth byte table 20. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   3,   6,   6,   9,   9,
++			12,  12,  15,  15,  18,  18,  21,  21,
++			24,  24,  27,  27,  30,  30,  33,  33,
++			36,  36,  39,  39,  42,  42,  45,  45,
++			48,  48,  51,  51,  54,  54,  57,  57,
++			60,  60,  63,  63,  66,  66,  69,  69,
++			72,  72,  75,  75,  78,  78,  81,  81,
++			84,  84,  87,  87,  90,  90,  93,  93,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 21. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   3,   6,   6,   9,   9,
++			12,  12,  15,  15,  18,  18,  21,  21,
++			24,  24,  27,  27,  30,  30,  33,  33,
++			36,  36,  39,  39,  42,  42,  45,  45,
++			48,  48,  51,  51,  54,  54,  57,  57,
++			60,  60,  63,  63,  66,  66,  69,  69,
++			72,  72,  75,  75,  78,  78,  81,  81,
++			84,  84,  87,  87,  90,  90,  93,  93,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 22. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   3,   6,   6,   9,   9,
++			12,  12,  15,  15,  18,  18,  21,  21,
++			24,  24,  27,  27,  30,  30,  33,  33,
++			33,  33,  33,  33,  36,  36,  36,  36,
++			36,  36,  39,  39,  42,  42,  45,  45,
++			48,  48,  51,  51,  54,  54,  57,  57,
++			60,  60,  63,  63,  66,  66,  69,  69,
++			72,  72,  75,  75,  78,  78,  81,  81,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,  84,  84,  84,  84,  84,  84,  84,
++			84,
++		},
++		{	/* Fourth byte table 23. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   3,   6,   6,   9,   9,
++			12,  12,  15,  15,  18,  18,  21,  21,
++			24,  24,  27,  27,  30,  30,  33,  33,
++			36,  36,  39,  39,  42,  42,  45,  45,
++			48,  48,  51,  51,  54,  54,  57,  57,
++			60,  60,  63,  63,  66,  66,  69,  69,
++			72,  72,  75,  75,  78,  78,  81,  81,
++			84,  84,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,  87,  87,  87,  87,  87,  87,  87,
++			87,
++		},
++		{	/* Fourth byte table 24. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  27,  30,  33,  36,  39,  42,  42,
++			42,  42,  42,  42,  42,  42,  42,  42,
++			42,  45,  48,  51,  54,  57,  60,  63,
++			66,  66,  66,  66,  66,  66,  66,  66,
++			66,  69,  72,  75,  78,  81,  84,  87,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,  90,  90,  90,  90,  90,  90,  90,
++			90,
++		},
++		{	/* Fourth byte table 25. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  21,  21,  24,  24,  27,  27,
++			30,  30,  30,  30,  30,  30,  30,  30,
++			30,  33,  36,  39,  42,  45,  48,  51,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  57,  60,  63,  66,  69,  72,  75,
++			78,  81,  84,  87,  90,  93,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 26. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  24,  24,  24,  24,  24,  24,  24,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  72,  72,  72,  72,  72,  72,  72,
++			72,  75,  78,  78,  81,  81,  81,  81,
++			81,  81,  81,  81,  81,  81,  81,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,  83,  83,  83,  83,  83,  83,  83,
++			83,
++		},
++		{	/* Fourth byte table 27. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   6,   9,   9,   9,   9,   9,   9,
++			9,   9,   9,   9,   9,   9,   9,   9,
++			9,   12,  15,  15,  15,  15,  18,  18,
++			18,  18,  18,  18,  18,  18,  18,  18,
++			18,  18,  18,  18,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,  21,  21,  21,  21,  21,  21,  21,
++			21,
++		},
++		{	/* Fourth byte table 28. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   6,   9,   12,  15,  18,  21,  24,
++			27,  30,  33,  36,  39,  42,  45,  48,
++			51,  51,  51,  51,  51,  51,  51,  51,
++			51,  51,  51,  51,  51,  51,  51,  51,
++			51,  51,  51,  51,  51,  51,  51,  51,
++			51,  51,  51,  51,  51,  51,  51,  51,
++			51,  51,  51,  51,  51,  51,  51,  51,
++			51,  51,  51,  51,  51,  51,  51,  51,
++			51,  51,  51,  51,  51,  51,  51,  51,
++			51,  51,  51,  51,  51,  51,  51,  51,
++			51,
++		},
++		{	/* Fourth byte table 29. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,   3,   3,   3,   3,   3,   3,   3,
++			3,
++		},
++		{	/* Fourth byte table 30. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,
++		},
++		{	/* Fourth byte table 31. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,  48,  48,  48,  48,  48,  48,  48,
++			48,
++		},
++		{	/* Fourth byte table 32. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			93,  93,  96,  96,  96,  96,  98,  100,
++			100, 103, 103, 106, 106, 109, 109, 109,
++			109, 109, 109, 109, 109, 109, 109, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112, 112, 112, 112, 112, 112, 112, 112,
++			112,
++		},
++		{	/* Fourth byte table 33. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   3,   6,   6,   9,   9,
++			12,  12,  15,  15,  18,  18,  21,  21,
++			24,  24,  27,  27,  30,  30,  33,  33,
++			36,  36,  39,  39,  42,  42,  45,  45,
++			48,  48,  51,  51,  54,  54,  57,  57,
++			60,  60,  63,  63,  66,  66,  69,  69,
++			72,  72,  75,  75,  78,  78,  81,  81,
++			84,  84,  87,  87,  90,  90,  93,  93,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 34. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   3,   6,   6,   9,   9,
++			12,  12,  15,  15,  18,  18,  21,  21,
++			24,  24,  27,  27,  30,  30,  33,  33,
++			36,  36,  39,  39,  42,  42,  45,  45,
++			48,  48,  51,  51,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,  54,  54,  54,  54,  54,  54,  54,
++			54,
++		},
++		{	/* Fourth byte table 35. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   3,   6,   9,   12,  15,  18,  21,
++			24,  27,  30,  33,  36,  39,  42,  45,
++			48,  51,  54,  57,  60,  63,  66,  69,
++			72,  75,  78,  81,  84,  87,  90,  93,
++			96,  99,  102, 105, 108, 111, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114, 114, 114, 114, 114, 114, 114, 114,
++			114,
++		},
++		{	/* Fourth byte table 36. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   3,   6,   9,   12,  15,  18,
++			21,  24,  27,  30,  33,  36,  39,  42,
++			45,  48,  51,  54,  57,  60,  63,  66,
++			69,  72,  75,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,  78,  78,  78,  78,  78,  78,  78,
++			78,
++		},
++		{	/* Fourth byte table 37. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  60,
++			64,  68,  72,  76,  80,  84,  88,  92,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,  96,  96,  96,  96,  96,  96,  96,
++			96,
++		},
++		{	/* Fourth byte table 38. */
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   0,   0,   0,   0,   0,   0,   0,
++			0,   4,   8,   12,  16,  20,  24,  28,
++			32,  36,  40,  44,  48,  52,  56,  60,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,  64,  64,  64,  64,  64,  64,  64,
++			64,
++		},
++	},
++};
++
++static const uchar_t u8_toupper_final_tbl[2][2318] = {
++	{
++		0xCE, 0x9C, 0xC3, 0x80, 0xC3, 0x81, 0xC3, 0x82,
++		0xC3, 0x83, 0xC3, 0x84, 0xC3, 0x85, 0xC3, 0x86,
++		0xC3, 0x87, 0xC3, 0x88, 0xC3, 0x89, 0xC3, 0x8A,
++		0xC3, 0x8B, 0xC3, 0x8C, 0xC3, 0x8D, 0xC3, 0x8E,
++		0xC3, 0x8F, 0xC3, 0x90, 0xC3, 0x91, 0xC3, 0x92,
++		0xC3, 0x93, 0xC3, 0x94, 0xC3, 0x95, 0xC3, 0x96,
++		0xC3, 0x98, 0xC3, 0x99, 0xC3, 0x9A, 0xC3, 0x9B,
++		0xC3, 0x9C, 0xC3, 0x9D, 0xC3, 0x9E, 0xC5, 0xB8,
++		0xC4, 0x80, 0xC4, 0x82, 0xC4, 0x84, 0xC4, 0x86,
++		0xC4, 0x88, 0xC4, 0x8A, 0xC4, 0x8C, 0xC4, 0x8E,
++		0xC4, 0x90, 0xC4, 0x92, 0xC4, 0x94, 0xC4, 0x96,
++		0xC4, 0x98, 0xC4, 0x9A, 0xC4, 0x9C, 0xC4, 0x9E,
++		0xC4, 0xA0, 0xC4, 0xA2, 0xC4, 0xA4, 0xC4, 0xA6,
++		0xC4, 0xA8, 0xC4, 0xAA, 0xC4, 0xAC, 0xC4, 0xAE,
++		0x49, 0xC4, 0xB2, 0xC4, 0xB4, 0xC4, 0xB6, 0xC4,
++		0xB9, 0xC4, 0xBB, 0xC4, 0xBD, 0xC4, 0xBF, 0xC5,
++		0x81, 0xC5, 0x83, 0xC5, 0x85, 0xC5, 0x87, 0xC5,
++		0x8A, 0xC5, 0x8C, 0xC5, 0x8E, 0xC5, 0x90, 0xC5,
++		0x92, 0xC5, 0x94, 0xC5, 0x96, 0xC5, 0x98, 0xC5,
++		0x9A, 0xC5, 0x9C, 0xC5, 0x9E, 0xC5, 0xA0, 0xC5,
++		0xA2, 0xC5, 0xA4, 0xC5, 0xA6, 0xC5, 0xA8, 0xC5,
++		0xAA, 0xC5, 0xAC, 0xC5, 0xAE, 0xC5, 0xB0, 0xC5,
++		0xB2, 0xC5, 0xB4, 0xC5, 0xB6, 0xC5, 0xB9, 0xC5,
++		0xBB, 0xC5, 0xBD, 0x53, 0xC6, 0x82, 0xC6, 0x84,
++		0xC6, 0x87, 0xC6, 0x8B, 0xC6, 0x91, 0xC7, 0xB6,
++		0xC6, 0x98, 0xC8, 0xA0, 0xC6, 0xA0, 0xC6, 0xA2,
++		0xC6, 0xA4, 0xC6, 0xA7, 0xC6, 0xAC, 0xC6, 0xAF,
++		0xC6, 0xB3, 0xC6, 0xB5, 0xC6, 0xB8, 0xC6, 0xBC,
++		0xC7, 0xB7, 0xC7, 0x84, 0xC7, 0x84, 0xC7, 0x87,
++		0xC7, 0x87, 0xC7, 0x8A, 0xC7, 0x8A, 0xC7, 0x8D,
++		0xC7, 0x8F, 0xC7, 0x91, 0xC7, 0x93, 0xC7, 0x95,
++		0xC7, 0x97, 0xC7, 0x99, 0xC7, 0x9B, 0xC6, 0x8E,
++		0xC7, 0x9E, 0xC7, 0xA0, 0xC7, 0xA2, 0xC7, 0xA4,
++		0xC7, 0xA6, 0xC7, 0xA8, 0xC7, 0xAA, 0xC7, 0xAC,
++		0xC7, 0xAE, 0xC7, 0xB1, 0xC7, 0xB1, 0xC7, 0xB4,
++		0xC7, 0xB8, 0xC7, 0xBA, 0xC7, 0xBC, 0xC7, 0xBE,
++		0xC8, 0x80, 0xC8, 0x82, 0xC8, 0x84, 0xC8, 0x86,
++		0xC8, 0x88, 0xC8, 0x8A, 0xC8, 0x8C, 0xC8, 0x8E,
++		0xC8, 0x90, 0xC8, 0x92, 0xC8, 0x94, 0xC8, 0x96,
++		0xC8, 0x98, 0xC8, 0x9A, 0xC8, 0x9C, 0xC8, 0x9E,
++		0xC8, 0xA2, 0xC8, 0xA4, 0xC8, 0xA6, 0xC8, 0xA8,
++		0xC8, 0xAA, 0xC8, 0xAC, 0xC8, 0xAE, 0xC8, 0xB0,
++		0xC8, 0xB2, 0xC6, 0x81, 0xC6, 0x86, 0xC6, 0x89,
++		0xC6, 0x8A, 0xC6, 0x8F, 0xC6, 0x90, 0xC6, 0x93,
++		0xC6, 0x94, 0xC6, 0x97, 0xC6, 0x96, 0xC6, 0x9C,
++		0xC6, 0x9D, 0xC6, 0x9F, 0xC6, 0xA6, 0xC6, 0xA9,
++		0xC6, 0xAE, 0xC6, 0xB1, 0xC6, 0xB2, 0xC6, 0xB7,
++		0xCE, 0x99, 0xCE, 0x86, 0xCE, 0x88, 0xCE, 0x89,
++		0xCE, 0x8A, 0xCE, 0x91, 0xCE, 0x92, 0xCE, 0x93,
++		0xCE, 0x94, 0xCE, 0x95, 0xCE, 0x96, 0xCE, 0x97,
++		0xCE, 0x98, 0xCE, 0x99, 0xCE, 0x9A, 0xCE, 0x9B,
++		0xCE, 0x9C, 0xCE, 0x9D, 0xCE, 0x9E, 0xCE, 0x9F,
++		0xCE, 0xA0, 0xCE, 0xA1, 0xCE, 0xA3, 0xCE, 0xA3,
++		0xCE, 0xA4, 0xCE, 0xA5, 0xCE, 0xA6, 0xCE, 0xA7,
++		0xCE, 0xA8, 0xCE, 0xA9, 0xCE, 0xAA, 0xCE, 0xAB,
++		0xCE, 0x8C, 0xCE, 0x8E, 0xCE, 0x8F, 0xCE, 0x92,
++		0xCE, 0x98, 0xCE, 0xA6, 0xCE, 0xA0, 0xCF, 0x98,
++		0xCF, 0x9A, 0xCF, 0x9C, 0xCF, 0x9E, 0xCF, 0xA0,
++		0xCF, 0xA2, 0xCF, 0xA4, 0xCF, 0xA6, 0xCF, 0xA8,
++		0xCF, 0xAA, 0xCF, 0xAC, 0xCF, 0xAE, 0xCE, 0x9A,
++		0xCE, 0xA1, 0xCE, 0xA3, 0xCE, 0x95, 0xD0, 0x90,
++		0xD0, 0x91, 0xD0, 0x92, 0xD0, 0x93, 0xD0, 0x94,
++		0xD0, 0x95, 0xD0, 0x96, 0xD0, 0x97, 0xD0, 0x98,
++		0xD0, 0x99, 0xD0, 0x9A, 0xD0, 0x9B, 0xD0, 0x9C,
++		0xD0, 0x9D, 0xD0, 0x9E, 0xD0, 0x9F, 0xD0, 0xA0,
++		0xD0, 0xA1, 0xD0, 0xA2, 0xD0, 0xA3, 0xD0, 0xA4,
++		0xD0, 0xA5, 0xD0, 0xA6, 0xD0, 0xA7, 0xD0, 0xA8,
++		0xD0, 0xA9, 0xD0, 0xAA, 0xD0, 0xAB, 0xD0, 0xAC,
++		0xD0, 0xAD, 0xD0, 0xAE, 0xD0, 0xAF, 0xD0, 0x80,
++		0xD0, 0x81, 0xD0, 0x82, 0xD0, 0x83, 0xD0, 0x84,
++		0xD0, 0x85, 0xD0, 0x86, 0xD0, 0x87, 0xD0, 0x88,
++		0xD0, 0x89, 0xD0, 0x8A, 0xD0, 0x8B, 0xD0, 0x8C,
++		0xD0, 0x8D, 0xD0, 0x8E, 0xD0, 0x8F, 0xD1, 0xA0,
++		0xD1, 0xA2, 0xD1, 0xA4, 0xD1, 0xA6, 0xD1, 0xA8,
++		0xD1, 0xAA, 0xD1, 0xAC, 0xD1, 0xAE, 0xD1, 0xB0,
++		0xD1, 0xB2, 0xD1, 0xB4, 0xD1, 0xB6, 0xD1, 0xB8,
++		0xD1, 0xBA, 0xD1, 0xBC, 0xD1, 0xBE, 0xD2, 0x80,
++		0xD2, 0x8A, 0xD2, 0x8C, 0xD2, 0x8E, 0xD2, 0x90,
++		0xD2, 0x92, 0xD2, 0x94, 0xD2, 0x96, 0xD2, 0x98,
++		0xD2, 0x9A, 0xD2, 0x9C, 0xD2, 0x9E, 0xD2, 0xA0,
++		0xD2, 0xA2, 0xD2, 0xA4, 0xD2, 0xA6, 0xD2, 0xA8,
++		0xD2, 0xAA, 0xD2, 0xAC, 0xD2, 0xAE, 0xD2, 0xB0,
++		0xD2, 0xB2, 0xD2, 0xB4, 0xD2, 0xB6, 0xD2, 0xB8,
++		0xD2, 0xBA, 0xD2, 0xBC, 0xD2, 0xBE, 0xD3, 0x81,
++		0xD3, 0x83, 0xD3, 0x85, 0xD3, 0x87, 0xD3, 0x89,
++		0xD3, 0x8B, 0xD3, 0x8D, 0xD3, 0x90, 0xD3, 0x92,
++		0xD3, 0x94, 0xD3, 0x96, 0xD3, 0x98, 0xD3, 0x9A,
++		0xD3, 0x9C, 0xD3, 0x9E, 0xD3, 0xA0, 0xD3, 0xA2,
++		0xD3, 0xA4, 0xD3, 0xA6, 0xD3, 0xA8, 0xD3, 0xAA,
++		0xD3, 0xAC, 0xD3, 0xAE, 0xD3, 0xB0, 0xD3, 0xB2,
++		0xD3, 0xB4, 0xD3, 0xB8, 0xD4, 0x80, 0xD4, 0x82,
++		0xD4, 0x84, 0xD4, 0x86, 0xD4, 0x88, 0xD4, 0x8A,
++		0xD4, 0x8C, 0xD4, 0x8E, 0xD4, 0xB1, 0xD4, 0xB2,
++		0xD4, 0xB3, 0xD4, 0xB4, 0xD4, 0xB5, 0xD4, 0xB6,
++		0xD4, 0xB7, 0xD4, 0xB8, 0xD4, 0xB9, 0xD4, 0xBA,
++		0xD4, 0xBB, 0xD4, 0xBC, 0xD4, 0xBD, 0xD4, 0xBE,
++		0xD4, 0xBF, 0xD5, 0x80, 0xD5, 0x81, 0xD5, 0x82,
++		0xD5, 0x83, 0xD5, 0x84, 0xD5, 0x85, 0xD5, 0x86,
++		0xD5, 0x87, 0xD5, 0x88, 0xD5, 0x89, 0xD5, 0x8A,
++		0xD5, 0x8B, 0xD5, 0x8C, 0xD5, 0x8D, 0xD5, 0x8E,
++		0xD5, 0x8F, 0xD5, 0x90, 0xD5, 0x91, 0xD5, 0x92,
++		0xD5, 0x93, 0xD5, 0x94, 0xD5, 0x95, 0xD5, 0x96,
++		0xE1, 0xB8, 0x80, 0xE1, 0xB8, 0x82, 0xE1, 0xB8,
++		0x84, 0xE1, 0xB8, 0x86, 0xE1, 0xB8, 0x88, 0xE1,
++		0xB8, 0x8A, 0xE1, 0xB8, 0x8C, 0xE1, 0xB8, 0x8E,
++		0xE1, 0xB8, 0x90, 0xE1, 0xB8, 0x92, 0xE1, 0xB8,
++		0x94, 0xE1, 0xB8, 0x96, 0xE1, 0xB8, 0x98, 0xE1,
++		0xB8, 0x9A, 0xE1, 0xB8, 0x9C, 0xE1, 0xB8, 0x9E,
++		0xE1, 0xB8, 0xA0, 0xE1, 0xB8, 0xA2, 0xE1, 0xB8,
++		0xA4, 0xE1, 0xB8, 0xA6, 0xE1, 0xB8, 0xA8, 0xE1,
++		0xB8, 0xAA, 0xE1, 0xB8, 0xAC, 0xE1, 0xB8, 0xAE,
++		0xE1, 0xB8, 0xB0, 0xE1, 0xB8, 0xB2, 0xE1, 0xB8,
++		0xB4, 0xE1, 0xB8, 0xB6, 0xE1, 0xB8, 0xB8, 0xE1,
++		0xB8, 0xBA, 0xE1, 0xB8, 0xBC, 0xE1, 0xB8, 0xBE,
++		0xE1, 0xB9, 0x80, 0xE1, 0xB9, 0x82, 0xE1, 0xB9,
++		0x84, 0xE1, 0xB9, 0x86, 0xE1, 0xB9, 0x88, 0xE1,
++		0xB9, 0x8A, 0xE1, 0xB9, 0x8C, 0xE1, 0xB9, 0x8E,
++		0xE1, 0xB9, 0x90, 0xE1, 0xB9, 0x92, 0xE1, 0xB9,
++		0x94, 0xE1, 0xB9, 0x96, 0xE1, 0xB9, 0x98, 0xE1,
++		0xB9, 0x9A, 0xE1, 0xB9, 0x9C, 0xE1, 0xB9, 0x9E,
++		0xE1, 0xB9, 0xA0, 0xE1, 0xB9, 0xA2, 0xE1, 0xB9,
++		0xA4, 0xE1, 0xB9, 0xA6, 0xE1, 0xB9, 0xA8, 0xE1,
++		0xB9, 0xAA, 0xE1, 0xB9, 0xAC, 0xE1, 0xB9, 0xAE,
++		0xE1, 0xB9, 0xB0, 0xE1, 0xB9, 0xB2, 0xE1, 0xB9,
++		0xB4, 0xE1, 0xB9, 0xB6, 0xE1, 0xB9, 0xB8, 0xE1,
++		0xB9, 0xBA, 0xE1, 0xB9, 0xBC, 0xE1, 0xB9, 0xBE,
++		0xE1, 0xBA, 0x80, 0xE1, 0xBA, 0x82, 0xE1, 0xBA,
++		0x84, 0xE1, 0xBA, 0x86, 0xE1, 0xBA, 0x88, 0xE1,
++		0xBA, 0x8A, 0xE1, 0xBA, 0x8C, 0xE1, 0xBA, 0x8E,
++		0xE1, 0xBA, 0x90, 0xE1, 0xBA, 0x92, 0xE1, 0xBA,
++		0x94, 0xE1, 0xB9, 0xA0, 0xE1, 0xBA, 0xA0, 0xE1,
++		0xBA, 0xA2, 0xE1, 0xBA, 0xA4, 0xE1, 0xBA, 0xA6,
++		0xE1, 0xBA, 0xA8, 0xE1, 0xBA, 0xAA, 0xE1, 0xBA,
++		0xAC, 0xE1, 0xBA, 0xAE, 0xE1, 0xBA, 0xB0, 0xE1,
++		0xBA, 0xB2, 0xE1, 0xBA, 0xB4, 0xE1, 0xBA, 0xB6,
++		0xE1, 0xBA, 0xB8, 0xE1, 0xBA, 0xBA, 0xE1, 0xBA,
++		0xBC, 0xE1, 0xBA, 0xBE, 0xE1, 0xBB, 0x80, 0xE1,
++		0xBB, 0x82, 0xE1, 0xBB, 0x84, 0xE1, 0xBB, 0x86,
++		0xE1, 0xBB, 0x88, 0xE1, 0xBB, 0x8A, 0xE1, 0xBB,
++		0x8C, 0xE1, 0xBB, 0x8E, 0xE1, 0xBB, 0x90, 0xE1,
++		0xBB, 0x92, 0xE1, 0xBB, 0x94, 0xE1, 0xBB, 0x96,
++		0xE1, 0xBB, 0x98, 0xE1, 0xBB, 0x9A, 0xE1, 0xBB,
++		0x9C, 0xE1, 0xBB, 0x9E, 0xE1, 0xBB, 0xA0, 0xE1,
++		0xBB, 0xA2, 0xE1, 0xBB, 0xA4, 0xE1, 0xBB, 0xA6,
++		0xE1, 0xBB, 0xA8, 0xE1, 0xBB, 0xAA, 0xE1, 0xBB,
++		0xAC, 0xE1, 0xBB, 0xAE, 0xE1, 0xBB, 0xB0, 0xE1,
++		0xBB, 0xB2, 0xE1, 0xBB, 0xB4, 0xE1, 0xBB, 0xB6,
++		0xE1, 0xBB, 0xB8, 0xE1, 0xBC, 0x88, 0xE1, 0xBC,
++		0x89, 0xE1, 0xBC, 0x8A, 0xE1, 0xBC, 0x8B, 0xE1,
++		0xBC, 0x8C, 0xE1, 0xBC, 0x8D, 0xE1, 0xBC, 0x8E,
++		0xE1, 0xBC, 0x8F, 0xE1, 0xBC, 0x98, 0xE1, 0xBC,
++		0x99, 0xE1, 0xBC, 0x9A, 0xE1, 0xBC, 0x9B, 0xE1,
++		0xBC, 0x9C, 0xE1, 0xBC, 0x9D, 0xE1, 0xBC, 0xA8,
++		0xE1, 0xBC, 0xA9, 0xE1, 0xBC, 0xAA, 0xE1, 0xBC,
++		0xAB, 0xE1, 0xBC, 0xAC, 0xE1, 0xBC, 0xAD, 0xE1,
++		0xBC, 0xAE, 0xE1, 0xBC, 0xAF, 0xE1, 0xBC, 0xB8,
++		0xE1, 0xBC, 0xB9, 0xE1, 0xBC, 0xBA, 0xE1, 0xBC,
++		0xBB, 0xE1, 0xBC, 0xBC, 0xE1, 0xBC, 0xBD, 0xE1,
++		0xBC, 0xBE, 0xE1, 0xBC, 0xBF, 0xE1, 0xBD, 0x88,
++		0xE1, 0xBD, 0x89, 0xE1, 0xBD, 0x8A, 0xE1, 0xBD,
++		0x8B, 0xE1, 0xBD, 0x8C, 0xE1, 0xBD, 0x8D, 0xE1,
++		0xBD, 0x99, 0xE1, 0xBD, 0x9B, 0xE1, 0xBD, 0x9D,
++		0xE1, 0xBD, 0x9F, 0xE1, 0xBD, 0xA8, 0xE1, 0xBD,
++		0xA9, 0xE1, 0xBD, 0xAA, 0xE1, 0xBD, 0xAB, 0xE1,
++		0xBD, 0xAC, 0xE1, 0xBD, 0xAD, 0xE1, 0xBD, 0xAE,
++		0xE1, 0xBD, 0xAF, 0xE1, 0xBE, 0xBA, 0xE1, 0xBE,
++		0xBB, 0xE1, 0xBF, 0x88, 0xE1, 0xBF, 0x89, 0xE1,
++		0xBF, 0x8A, 0xE1, 0xBF, 0x8B, 0xE1, 0xBF, 0x9A,
++		0xE1, 0xBF, 0x9B, 0xE1, 0xBF, 0xB8, 0xE1, 0xBF,
++		0xB9, 0xE1, 0xBF, 0xAA, 0xE1, 0xBF, 0xAB, 0xE1,
++		0xBF, 0xBA, 0xE1, 0xBF, 0xBB, 0xE1, 0xBE, 0x88,
++		0xE1, 0xBE, 0x89, 0xE1, 0xBE, 0x8A, 0xE1, 0xBE,
++		0x8B, 0xE1, 0xBE, 0x8C, 0xE1, 0xBE, 0x8D, 0xE1,
++		0xBE, 0x8E, 0xE1, 0xBE, 0x8F, 0xE1, 0xBE, 0x98,
++		0xE1, 0xBE, 0x99, 0xE1, 0xBE, 0x9A, 0xE1, 0xBE,
++		0x9B, 0xE1, 0xBE, 0x9C, 0xE1, 0xBE, 0x9D, 0xE1,
++		0xBE, 0x9E, 0xE1, 0xBE, 0x9F, 0xE1, 0xBE, 0xA8,
++		0xE1, 0xBE, 0xA9, 0xE1, 0xBE, 0xAA, 0xE1, 0xBE,
++		0xAB, 0xE1, 0xBE, 0xAC, 0xE1, 0xBE, 0xAD, 0xE1,
++		0xBE, 0xAE, 0xE1, 0xBE, 0xAF, 0xE1, 0xBE, 0xB8,
++		0xE1, 0xBE, 0xB9, 0xE1, 0xBE, 0xBC, 0xCE, 0x99,
++		0xE1, 0xBF, 0x8C, 0xE1, 0xBF, 0x98, 0xE1, 0xBF,
++		0x99, 0xE1, 0xBF, 0xA8, 0xE1, 0xBF, 0xA9, 0xE1,
++		0xBF, 0xAC, 0xE1, 0xBF, 0xBC, 0xE2, 0x85, 0xA0,
++		0xE2, 0x85, 0xA1, 0xE2, 0x85, 0xA2, 0xE2, 0x85,
++		0xA3, 0xE2, 0x85, 0xA4, 0xE2, 0x85, 0xA5, 0xE2,
++		0x85, 0xA6, 0xE2, 0x85, 0xA7, 0xE2, 0x85, 0xA8,
++		0xE2, 0x85, 0xA9, 0xE2, 0x85, 0xAA, 0xE2, 0x85,
++		0xAB, 0xE2, 0x85, 0xAC, 0xE2, 0x85, 0xAD, 0xE2,
++		0x85, 0xAE, 0xE2, 0x85, 0xAF, 0xE2, 0x92, 0xB6,
++		0xE2, 0x92, 0xB7, 0xE2, 0x92, 0xB8, 0xE2, 0x92,
++		0xB9, 0xE2, 0x92, 0xBA, 0xE2, 0x92, 0xBB, 0xE2,
++		0x92, 0xBC, 0xE2, 0x92, 0xBD, 0xE2, 0x92, 0xBE,
++		0xE2, 0x92, 0xBF, 0xE2, 0x93, 0x80, 0xE2, 0x93,
++		0x81, 0xE2, 0x93, 0x82, 0xE2, 0x93, 0x83, 0xE2,
++		0x93, 0x84, 0xE2, 0x93, 0x85, 0xE2, 0x93, 0x86,
++		0xE2, 0x93, 0x87, 0xE2, 0x93, 0x88, 0xE2, 0x93,
++		0x89, 0xE2, 0x93, 0x8A, 0xE2, 0x93, 0x8B, 0xE2,
++		0x93, 0x8C, 0xE2, 0x93, 0x8D, 0xE2, 0x93, 0x8E,
++		0xE2, 0x93, 0x8F, 0xEF, 0xBC, 0xA1, 0xEF, 0xBC,
++		0xA2, 0xEF, 0xBC, 0xA3, 0xEF, 0xBC, 0xA4, 0xEF,
++		0xBC, 0xA5, 0xEF, 0xBC, 0xA6, 0xEF, 0xBC, 0xA7,
++		0xEF, 0xBC, 0xA8, 0xEF, 0xBC, 0xA9, 0xEF, 0xBC,
++		0xAA, 0xEF, 0xBC, 0xAB, 0xEF, 0xBC, 0xAC, 0xEF,
++		0xBC, 0xAD, 0xEF, 0xBC, 0xAE, 0xEF, 0xBC, 0xAF,
++		0xEF, 0xBC, 0xB0, 0xEF, 0xBC, 0xB1, 0xEF, 0xBC,
++		0xB2, 0xEF, 0xBC, 0xB3, 0xEF, 0xBC, 0xB4, 0xEF,
++		0xBC, 0xB5, 0xEF, 0xBC, 0xB6, 0xEF, 0xBC, 0xB7,
++		0xEF, 0xBC, 0xB8, 0xEF, 0xBC, 0xB9, 0xEF, 0xBC,
++		0xBA, 0xF0, 0x90, 0x90, 0x80, 0xF0, 0x90, 0x90,
++		0x81, 0xF0, 0x90, 0x90, 0x82, 0xF0, 0x90, 0x90,
++		0x83, 0xF0, 0x90, 0x90, 0x84, 0xF0, 0x90, 0x90,
++		0x85, 0xF0, 0x90, 0x90, 0x86, 0xF0, 0x90, 0x90,
++		0x87, 0xF0, 0x90, 0x90, 0x88, 0xF0, 0x90, 0x90,
++		0x89, 0xF0, 0x90, 0x90, 0x8A, 0xF0, 0x90, 0x90,
++		0x8B, 0xF0, 0x90, 0x90, 0x8C, 0xF0, 0x90, 0x90,
++		0x8D, 0xF0, 0x90, 0x90, 0x8E, 0xF0, 0x90, 0x90,
++		0x8F, 0xF0, 0x90, 0x90, 0x90, 0xF0, 0x90, 0x90,
++		0x91, 0xF0, 0x90, 0x90, 0x92, 0xF0, 0x90, 0x90,
++		0x93, 0xF0, 0x90, 0x90, 0x94, 0xF0, 0x90, 0x90,
++		0x95, 0xF0, 0x90, 0x90, 0x96, 0xF0, 0x90, 0x90,
++		0x97, 0xF0, 0x90, 0x90, 0x98, 0xF0, 0x90, 0x90,
++		0x99, 0xF0, 0x90, 0x90, 0x9A, 0xF0, 0x90, 0x90,
++		0x9B, 0xF0, 0x90, 0x90, 0x9C, 0xF0, 0x90, 0x90,
++		0x9D, 0xF0, 0x90, 0x90, 0x9E, 0xF0, 0x90, 0x90,
++		0x9F, 0xF0, 0x90, 0x90, 0xA0, 0xF0, 0x90, 0x90,
++		0xA1, 0xF0, 0x90, 0x90, 0xA2, 0xF0, 0x90, 0x90,
++		0xA3, 0xF0, 0x90, 0x90, 0xA4, 0xF0, 0x90, 0x90,
++		0xA5, 0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,    0,    0,
++		0,    0,    0,    0,    0,    0,
++	},
++	{
++		0xCE, 0x9C, 0xC3, 0x80, 0xC3, 0x81, 0xC3, 0x82,
++		0xC3, 0x83, 0xC3, 0x84, 0xC3, 0x85, 0xC3, 0x86,
++		0xC3, 0x87, 0xC3, 0x88, 0xC3, 0x89, 0xC3, 0x8A,
++		0xC3, 0x8B, 0xC3, 0x8C, 0xC3, 0x8D, 0xC3, 0x8E,
++		0xC3, 0x8F, 0xC3, 0x90, 0xC3, 0x91, 0xC3, 0x92,
++		0xC3, 0x93, 0xC3, 0x94, 0xC3, 0x95, 0xC3, 0x96,
++		0xC3, 0x98, 0xC3, 0x99, 0xC3, 0x9A, 0xC3, 0x9B,
++		0xC3, 0x9C, 0xC3, 0x9D, 0xC3, 0x9E, 0xC5, 0xB8,
++		0xC4, 0x80, 0xC4, 0x82, 0xC4, 0x84, 0xC4, 0x86,
++		0xC4, 0x88, 0xC4, 0x8A, 0xC4, 0x8C, 0xC4, 0x8E,
++		0xC4, 0x90, 0xC4, 0x92, 0xC4, 0x94, 0xC4, 0x96,
++		0xC4, 0x98, 0xC4, 0x9A, 0xC4, 0x9C, 0xC4, 0x9E,
++		0xC4, 0xA0, 0xC4, 0xA2, 0xC4, 0xA4, 0xC4, 0xA6,
++		0xC4, 0xA8, 0xC4, 0xAA, 0xC4, 0xAC, 0xC4, 0xAE,
++		0x49, 0xC4, 0xB2, 0xC4, 0xB4, 0xC4, 0xB6, 0xC4,
++		0xB9, 0xC4, 0xBB, 0xC4, 0xBD, 0xC4, 0xBF, 0xC5,
++		0x81, 0xC5, 0x83, 0xC5, 0x85, 0xC5, 0x87, 0xC5,
++		0x8A, 0xC5, 0x8C, 0xC5, 0x8E, 0xC5, 0x90, 0xC5,
++		0x92, 0xC5, 0x94, 0xC5, 0x96, 0xC5, 0x98, 0xC5,
++		0x9A, 0xC5, 0x9C, 0xC5, 0x9E, 0xC5, 0xA0, 0xC5,
++		0xA2, 0xC5, 0xA4, 0xC5, 0xA6, 0xC5, 0xA8, 0xC5,
++		0xAA, 0xC5, 0xAC, 0xC5, 0xAE, 0xC5, 0xB0, 0xC5,
++		0xB2, 0xC5, 0xB4, 0xC5, 0xB6, 0xC5, 0xB9, 0xC5,
++		0xBB, 0xC5, 0xBD, 0x53, 0xC9, 0x83, 0xC6, 0x82,
++		0xC6, 0x84, 0xC6, 0x87, 0xC6, 0x8B, 0xC6, 0x91,
++		0xC7, 0xB6, 0xC6, 0x98, 0xC8, 0xBD, 0xC8, 0xA0,
++		0xC6, 0xA0, 0xC6, 0xA2, 0xC6, 0xA4, 0xC6, 0xA7,
++		0xC6, 0xAC, 0xC6, 0xAF, 0xC6, 0xB3, 0xC6, 0xB5,
++		0xC6, 0xB8, 0xC6, 0xBC, 0xC7, 0xB7, 0xC7, 0x84,
++		0xC7, 0x84, 0xC7, 0x87, 0xC7, 0x87, 0xC7, 0x8A,
++		0xC7, 0x8A, 0xC7, 0x8D, 0xC7, 0x8F, 0xC7, 0x91,
++		0xC7, 0x93, 0xC7, 0x95, 0xC7, 0x97, 0xC7, 0x99,
++		0xC7, 0x9B, 0xC6, 0x8E, 0xC7, 0x9E, 0xC7, 0xA0,
++		0xC7, 0xA2, 0xC7, 0xA4, 0xC7, 0xA6, 0xC7, 0xA8,
++		0xC7, 0xAA, 0xC7, 0xAC, 0xC7, 0xAE, 0xC7, 0xB1,
++		0xC7, 0xB1, 0xC7, 0xB4, 0xC7, 0xB8, 0xC7, 0xBA,
++		0xC7, 0xBC, 0xC7, 0xBE, 0xC8, 0x80, 0xC8, 0x82,
++		0xC8, 0x84, 0xC8, 0x86, 0xC8, 0x88, 0xC8, 0x8A,
++		0xC8, 0x8C, 0xC8, 0x8E, 0xC8, 0x90, 0xC8, 0x92,
++		0xC8, 0x94, 0xC8, 0x96, 0xC8, 0x98, 0xC8, 0x9A,
++		0xC8, 0x9C, 0xC8, 0x9E, 0xC8, 0xA2, 0xC8, 0xA4,
++		0xC8, 0xA6, 0xC8, 0xA8, 0xC8, 0xAA, 0xC8, 0xAC,
++		0xC8, 0xAE, 0xC8, 0xB0, 0xC8, 0xB2, 0xC8, 0xBB,
++		0xC9, 0x81, 0xC9, 0x86, 0xC9, 0x88, 0xC9, 0x8A,
++		0xC9, 0x8C, 0xC9, 0x8E, 0xC6, 0x81, 0xC6, 0x86,
++		0xC6, 0x89, 0xC6, 0x8A, 0xC6, 0x8F, 0xC6, 0x90,
++		0xC6, 0x93, 0xC6, 0x94, 0xC6, 0x97, 0xC6, 0x96,
++		0xE2, 0xB1, 0xA2, 0xC6, 0x9C, 0xC6, 0x9D, 0xC6,
++		0x9F, 0xE2, 0xB1, 0xA4, 0xC6, 0xA6, 0xC6, 0xA9,
++		0xC6, 0xAE, 0xC9, 0x84, 0xC6, 0xB1, 0xC6, 0xB2,
++		0xC9, 0x85, 0xC6, 0xB7, 0xCE, 0x99, 0xCF, 0xBD,
++		0xCF, 0xBE, 0xCF, 0xBF, 0xCE, 0x86, 0xCE, 0x88,
++		0xCE, 0x89, 0xCE, 0x8A, 0xCE, 0x91, 0xCE, 0x92,
++		0xCE, 0x93, 0xCE, 0x94, 0xCE, 0x95, 0xCE, 0x96,
++		0xCE, 0x97, 0xCE, 0x98, 0xCE, 0x99, 0xCE, 0x9A,
++		0xCE, 0x9B, 0xCE, 0x9C, 0xCE, 0x9D, 0xCE, 0x9E,
++		0xCE, 0x9F, 0xCE, 0xA0, 0xCE, 0xA1, 0xCE, 0xA3,
++		0xCE, 0xA3, 0xCE, 0xA4, 0xCE, 0xA5, 0xCE, 0xA6,
++		0xCE, 0xA7, 0xCE, 0xA8, 0xCE, 0xA9, 0xCE, 0xAA,
++		0xCE, 0xAB, 0xCE, 0x8C, 0xCE, 0x8E, 0xCE, 0x8F,
++		0xCE, 0x92, 0xCE, 0x98, 0xCE, 0xA6, 0xCE, 0xA0,
++		0xCF, 0x98, 0xCF, 0x9A, 0xCF, 0x9C, 0xCF, 0x9E,
++		0xCF, 0xA0, 0xCF, 0xA2, 0xCF, 0xA4, 0xCF, 0xA6,
++		0xCF, 0xA8, 0xCF, 0xAA, 0xCF, 0xAC, 0xCF, 0xAE,
++		0xCE, 0x9A, 0xCE, 0xA1, 0xCF, 0xB9, 0xCE, 0x95,
++		0xCF, 0xB7, 0xCF, 0xBA, 0xD0, 0x90, 0xD0, 0x91,
++		0xD0, 0x92, 0xD0, 0x93, 0xD0, 0x94, 0xD0, 0x95,
++		0xD0, 0x96, 0xD0, 0x97, 0xD0, 0x98, 0xD0, 0x99,
++		0xD0, 0x9A, 0xD0, 0x9B, 0xD0, 0x9C, 0xD0, 0x9D,
++		0xD0, 0x9E, 0xD0, 0x9F, 0xD0, 0xA0, 0xD0, 0xA1,
++		0xD0, 0xA2, 0xD0, 0xA3, 0xD0, 0xA4, 0xD0, 0xA5,
++		0xD0, 0xA6, 0xD0, 0xA7, 0xD0, 0xA8, 0xD0, 0xA9,
++		0xD0, 0xAA, 0xD0, 0xAB, 0xD0, 0xAC, 0xD0, 0xAD,
++		0xD0, 0xAE, 0xD0, 0xAF, 0xD0, 0x80, 0xD0, 0x81,
++		0xD0, 0x82, 0xD0, 0x83, 0xD0, 0x84, 0xD0, 0x85,
++		0xD0, 0x86, 0xD0, 0x87, 0xD0, 0x88, 0xD0, 0x89,
++		0xD0, 0x8A, 0xD0, 0x8B, 0xD0, 0x8C, 0xD0, 0x8D,
++		0xD0, 0x8E, 0xD0, 0x8F, 0xD1, 0xA0, 0xD1, 0xA2,
++		0xD1, 0xA4, 0xD1, 0xA6, 0xD1, 0xA8, 0xD1, 0xAA,
++		0xD1, 0xAC, 0xD1, 0xAE, 0xD1, 0xB0, 0xD1, 0xB2,
++		0xD1, 0xB4, 0xD1, 0xB6, 0xD1, 0xB8, 0xD1, 0xBA,
++		0xD1, 0xBC, 0xD1, 0xBE, 0xD2, 0x80, 0xD2, 0x8A,
++		0xD2, 0x8C, 0xD2, 0x8E, 0xD2, 0x90, 0xD2, 0x92,
++		0xD2, 0x94, 0xD2, 0x96, 0xD2, 0x98, 0xD2, 0x9A,
++		0xD2, 0x9C, 0xD2, 0x9E, 0xD2, 0xA0, 0xD2, 0xA2,
++		0xD2, 0xA4, 0xD2, 0xA6, 0xD2, 0xA8, 0xD2, 0xAA,
++		0xD2, 0xAC, 0xD2, 0xAE, 0xD2, 0xB0, 0xD2, 0xB2,
++		0xD2, 0xB4, 0xD2, 0xB6, 0xD2, 0xB8, 0xD2, 0xBA,
++		0xD2, 0xBC, 0xD2, 0xBE, 0xD3, 0x81, 0xD3, 0x83,
++		0xD3, 0x85, 0xD3, 0x87, 0xD3, 0x89, 0xD3, 0x8B,
++		0xD3, 0x8D, 0xD3, 0x80, 0xD3, 0x90, 0xD3, 0x92,
++		0xD3, 0x94, 0xD3, 0x96, 0xD3, 0x98, 0xD3, 0x9A,
++		0xD3, 0x9C, 0xD3, 0x9E, 0xD3, 0xA0, 0xD3, 0xA2,
++		0xD3, 0xA4, 0xD3, 0xA6, 0xD3, 0xA8, 0xD3, 0xAA,
++		0xD3, 0xAC, 0xD3, 0xAE, 0xD3, 0xB0, 0xD3, 0xB2,
++		0xD3, 0xB4, 0xD3, 0xB6, 0xD3, 0xB8, 0xD3, 0xBA,
++		0xD3, 0xBC, 0xD3, 0xBE, 0xD4, 0x80, 0xD4, 0x82,
++		0xD4, 0x84, 0xD4, 0x86, 0xD4, 0x88, 0xD4, 0x8A,
++		0xD4, 0x8C, 0xD4, 0x8E, 0xD4, 0x90, 0xD4, 0x92,
++		0xD4, 0xB1, 0xD4, 0xB2, 0xD4, 0xB3, 0xD4, 0xB4,
++		0xD4, 0xB5, 0xD4, 0xB6, 0xD4, 0xB7, 0xD4, 0xB8,
++		0xD4, 0xB9, 0xD4, 0xBA, 0xD4, 0xBB, 0xD4, 0xBC,
++		0xD4, 0xBD, 0xD4, 0xBE, 0xD4, 0xBF, 0xD5, 0x80,
++		0xD5, 0x81, 0xD5, 0x82, 0xD5, 0x83, 0xD5, 0x84,
++		0xD5, 0x85, 0xD5, 0x86, 0xD5, 0x87, 0xD5, 0x88,
++		0xD5, 0x89, 0xD5, 0x8A, 0xD5, 0x8B, 0xD5, 0x8C,
++		0xD5, 0x8D, 0xD5, 0x8E, 0xD5, 0x8F, 0xD5, 0x90,
++		0xD5, 0x91, 0xD5, 0x92, 0xD5, 0x93, 0xD5, 0x94,
++		0xD5, 0x95, 0xD5, 0x96, 0xE2, 0xB1, 0xA3, 0xE1,
++		0xB8, 0x80, 0xE1, 0xB8, 0x82, 0xE1, 0xB8, 0x84,
++		0xE1, 0xB8, 0x86, 0xE1, 0xB8, 0x88, 0xE1, 0xB8,
++		0x8A, 0xE1, 0xB8, 0x8C, 0xE1, 0xB8, 0x8E, 0xE1,
++		0xB8, 0x90, 0xE1, 0xB8, 0x92, 0xE1, 0xB8, 0x94,
++		0xE1, 0xB8, 0x96, 0xE1, 0xB8, 0x98, 0xE1, 0xB8,
++		0x9A, 0xE1, 0xB8, 0x9C, 0xE1, 0xB8, 0x9E, 0xE1,
++		0xB8, 0xA0, 0xE1, 0xB8, 0xA2, 0xE1, 0xB8, 0xA4,
++		0xE1, 0xB8, 0xA6, 0xE1, 0xB8, 0xA8, 0xE1, 0xB8,
++		0xAA, 0xE1, 0xB8, 0xAC, 0xE1, 0xB8, 0xAE, 0xE1,
++		0xB8, 0xB0, 0xE1, 0xB8, 0xB2, 0xE1, 0xB8, 0xB4,
++		0xE1, 0xB8, 0xB6, 0xE1, 0xB8, 0xB8, 0xE1, 0xB8,
++		0xBA, 0xE1, 0xB8, 0xBC, 0xE1, 0xB8, 0xBE, 0xE1,
++		0xB9, 0x80, 0xE1, 0xB9, 0x82, 0xE1, 0xB9, 0x84,
++		0xE1, 0xB9, 0x86, 0xE1, 0xB9, 0x88, 0xE1, 0xB9,
++		0x8A, 0xE1, 0xB9, 0x8C, 0xE1, 0xB9, 0x8E, 0xE1,
++		0xB9, 0x90, 0xE1, 0xB9, 0x92, 0xE1, 0xB9, 0x94,
++		0xE1, 0xB9, 0x96, 0xE1, 0xB9, 0x98, 0xE1, 0xB9,
++		0x9A, 0xE1, 0xB9, 0x9C, 0xE1, 0xB9, 0x9E, 0xE1,
++		0xB9, 0xA0, 0xE1, 0xB9, 0xA2, 0xE1, 0xB9, 0xA4,
++		0xE1, 0xB9, 0xA6, 0xE1, 0xB9, 0xA8, 0xE1, 0xB9,
++		0xAA, 0xE1, 0xB9, 0xAC, 0xE1, 0xB9, 0xAE, 0xE1,
++		0xB9, 0xB0, 0xE1, 0xB9, 0xB2, 0xE1, 0xB9, 0xB4,
++		0xE1, 0xB9, 0xB6, 0xE1, 0xB9, 0xB8, 0xE1, 0xB9,
++		0xBA, 0xE1, 0xB9, 0xBC, 0xE1, 0xB9, 0xBE, 0xE1,
++		0xBA, 0x80, 0xE1, 0xBA, 0x82, 0xE1, 0xBA, 0x84,
++		0xE1, 0xBA, 0x86, 0xE1, 0xBA, 0x88, 0xE1, 0xBA,
++		0x8A, 0xE1, 0xBA, 0x8C, 0xE1, 0xBA, 0x8E, 0xE1,
++		0xBA, 0x90, 0xE1, 0xBA, 0x92, 0xE1, 0xBA, 0x94,
++		0xE1, 0xB9, 0xA0, 0xE1, 0xBA, 0xA0, 0xE1, 0xBA,
++		0xA2, 0xE1, 0xBA, 0xA4, 0xE1, 0xBA, 0xA6, 0xE1,
++		0xBA, 0xA8, 0xE1, 0xBA, 0xAA, 0xE1, 0xBA, 0xAC,
++		0xE1, 0xBA, 0xAE, 0xE1, 0xBA, 0xB0, 0xE1, 0xBA,
++		0xB2, 0xE1, 0xBA, 0xB4, 0xE1, 0xBA, 0xB6, 0xE1,
++		0xBA, 0xB8, 0xE1, 0xBA, 0xBA, 0xE1, 0xBA, 0xBC,
++		0xE1, 0xBA, 0xBE, 0xE1, 0xBB, 0x80, 0xE1, 0xBB,
++		0x82, 0xE1, 0xBB, 0x84, 0xE1, 0xBB, 0x86, 0xE1,
++		0xBB, 0x88, 0xE1, 0xBB, 0x8A, 0xE1, 0xBB, 0x8C,
++		0xE1, 0xBB, 0x8E, 0xE1, 0xBB, 0x90, 0xE1, 0xBB,
++		0x92, 0xE1, 0xBB, 0x94, 0xE1, 0xBB, 0x96, 0xE1,
++		0xBB, 0x98, 0xE1, 0xBB, 0x9A, 0xE1, 0xBB, 0x9C,
++		0xE1, 0xBB, 0x9E, 0xE1, 0xBB, 0xA0, 0xE1, 0xBB,
++		0xA2, 0xE1, 0xBB, 0xA4, 0xE1, 0xBB, 0xA6, 0xE1,
++		0xBB, 0xA8, 0xE1, 0xBB, 0xAA, 0xE1, 0xBB, 0xAC,
++		0xE1, 0xBB, 0xAE, 0xE1, 0xBB, 0xB0, 0xE1, 0xBB,
++		0xB2, 0xE1, 0xBB, 0xB4, 0xE1, 0xBB, 0xB6, 0xE1,
++		0xBB, 0xB8, 0xE1, 0xBC, 0x88, 0xE1, 0xBC, 0x89,
++		0xE1, 0xBC, 0x8A, 0xE1, 0xBC, 0x8B, 0xE1, 0xBC,
++		0x8C, 0xE1, 0xBC, 0x8D, 0xE1, 0xBC, 0x8E, 0xE1,
++		0xBC, 0x8F, 0xE1, 0xBC, 0x98, 0xE1, 0xBC, 0x99,
++		0xE1, 0xBC, 0x9A, 0xE1, 0xBC, 0x9B, 0xE1, 0xBC,
++		0x9C, 0xE1, 0xBC, 0x9D, 0xE1, 0xBC, 0xA8, 0xE1,
++		0xBC, 0xA9, 0xE1, 0xBC, 0xAA, 0xE1, 0xBC, 0xAB,
++		0xE1, 0xBC, 0xAC, 0xE1, 0xBC, 0xAD, 0xE1, 0xBC,
++		0xAE, 0xE1, 0xBC, 0xAF, 0xE1, 0xBC, 0xB8, 0xE1,
++		0xBC, 0xB9, 0xE1, 0xBC, 0xBA, 0xE1, 0xBC, 0xBB,
++		0xE1, 0xBC, 0xBC, 0xE1, 0xBC, 0xBD, 0xE1, 0xBC,
++		0xBE, 0xE1, 0xBC, 0xBF, 0xE1, 0xBD, 0x88, 0xE1,
++		0xBD, 0x89, 0xE1, 0xBD, 0x8A, 0xE1, 0xBD, 0x8B,
++		0xE1, 0xBD, 0x8C, 0xE1, 0xBD, 0x8D, 0xE1, 0xBD,
++		0x99, 0xE1, 0xBD, 0x9B, 0xE1, 0xBD, 0x9D, 0xE1,
++		0xBD, 0x9F, 0xE1, 0xBD, 0xA8, 0xE1, 0xBD, 0xA9,
++		0xE1, 0xBD, 0xAA, 0xE1, 0xBD, 0xAB, 0xE1, 0xBD,
++		0xAC, 0xE1, 0xBD, 0xAD, 0xE1, 0xBD, 0xAE, 0xE1,
++		0xBD, 0xAF, 0xE1, 0xBE, 0xBA, 0xE1, 0xBE, 0xBB,
++		0xE1, 0xBF, 0x88, 0xE1, 0xBF, 0x89, 0xE1, 0xBF,
++		0x8A, 0xE1, 0xBF, 0x8B, 0xE1, 0xBF, 0x9A, 0xE1,
++		0xBF, 0x9B, 0xE1, 0xBF, 0xB8, 0xE1, 0xBF, 0xB9,
++		0xE1, 0xBF, 0xAA, 0xE1, 0xBF, 0xAB, 0xE1, 0xBF,
++		0xBA, 0xE1, 0xBF, 0xBB, 0xE1, 0xBE, 0x88, 0xE1,
++		0xBE, 0x89, 0xE1, 0xBE, 0x8A, 0xE1, 0xBE, 0x8B,
++		0xE1, 0xBE, 0x8C, 0xE1, 0xBE, 0x8D, 0xE1, 0xBE,
++		0x8E, 0xE1, 0xBE, 0x8F, 0xE1, 0xBE, 0x98, 0xE1,
++		0xBE, 0x99, 0xE1, 0xBE, 0x9A, 0xE1, 0xBE, 0x9B,
++		0xE1, 0xBE, 0x9C, 0xE1, 0xBE, 0x9D, 0xE1, 0xBE,
++		0x9E, 0xE1, 0xBE, 0x9F, 0xE1, 0xBE, 0xA8, 0xE1,
++		0xBE, 0xA9, 0xE1, 0xBE, 0xAA, 0xE1, 0xBE, 0xAB,
++		0xE1, 0xBE, 0xAC, 0xE1, 0xBE, 0xAD, 0xE1, 0xBE,
++		0xAE, 0xE1, 0xBE, 0xAF, 0xE1, 0xBE, 0xB8, 0xE1,
++		0xBE, 0xB9, 0xE1, 0xBE, 0xBC, 0xCE, 0x99, 0xE1,
++		0xBF, 0x8C, 0xE1, 0xBF, 0x98, 0xE1, 0xBF, 0x99,
++		0xE1, 0xBF, 0xA8, 0xE1, 0xBF, 0xA9, 0xE1, 0xBF,
++		0xAC, 0xE1, 0xBF, 0xBC, 0xE2, 0x84, 0xB2, 0xE2,
++		0x85, 0xA0, 0xE2, 0x85, 0xA1, 0xE2, 0x85, 0xA2,
++		0xE2, 0x85, 0xA3, 0xE2, 0x85, 0xA4, 0xE2, 0x85,
++		0xA5, 0xE2, 0x85, 0xA6, 0xE2, 0x85, 0xA7, 0xE2,
++		0x85, 0xA8, 0xE2, 0x85, 0xA9, 0xE2, 0x85, 0xAA,
++		0xE2, 0x85, 0xAB, 0xE2, 0x85, 0xAC, 0xE2, 0x85,
++		0xAD, 0xE2, 0x85, 0xAE, 0xE2, 0x85, 0xAF, 0xE2,
++		0x86, 0x83, 0xE2, 0x92, 0xB6, 0xE2, 0x92, 0xB7,
++		0xE2, 0x92, 0xB8, 0xE2, 0x92, 0xB9, 0xE2, 0x92,
++		0xBA, 0xE2, 0x92, 0xBB, 0xE2, 0x92, 0xBC, 0xE2,
++		0x92, 0xBD, 0xE2, 0x92, 0xBE, 0xE2, 0x92, 0xBF,
++		0xE2, 0x93, 0x80, 0xE2, 0x93, 0x81, 0xE2, 0x93,
++		0x82, 0xE2, 0x93, 0x83, 0xE2, 0x93, 0x84, 0xE2,
++		0x93, 0x85, 0xE2, 0x93, 0x86, 0xE2, 0x93, 0x87,
++		0xE2, 0x93, 0x88, 0xE2, 0x93, 0x89, 0xE2, 0x93,
++		0x8A, 0xE2, 0x93, 0x8B, 0xE2, 0x93, 0x8C, 0xE2,
++		0x93, 0x8D, 0xE2, 0x93, 0x8E, 0xE2, 0x93, 0x8F,
++		0xE2, 0xB0, 0x80, 0xE2, 0xB0, 0x81, 0xE2, 0xB0,
++		0x82, 0xE2, 0xB0, 0x83, 0xE2, 0xB0, 0x84, 0xE2,
++		0xB0, 0x85, 0xE2, 0xB0, 0x86, 0xE2, 0xB0, 0x87,
++		0xE2, 0xB0, 0x88, 0xE2, 0xB0, 0x89, 0xE2, 0xB0,
++		0x8A, 0xE2, 0xB0, 0x8B, 0xE2, 0xB0, 0x8C, 0xE2,
++		0xB0, 0x8D, 0xE2, 0xB0, 0x8E, 0xE2, 0xB0, 0x8F,
++		0xE2, 0xB0, 0x90, 0xE2, 0xB0, 0x91, 0xE2, 0xB0,
++		0x92, 0xE2, 0xB0, 0x93, 0xE2, 0xB0, 0x94, 0xE2,
++		0xB0, 0x95, 0xE2, 0xB0, 0x96, 0xE2, 0xB0, 0x97,
++		0xE2, 0xB0, 0x98, 0xE2, 0xB0, 0x99, 0xE2, 0xB0,
++		0x9A, 0xE2, 0xB0, 0x9B, 0xE2, 0xB0, 0x9C, 0xE2,
++		0xB0, 0x9D, 0xE2, 0xB0, 0x9E, 0xE2, 0xB0, 0x9F,
++		0xE2, 0xB0, 0xA0, 0xE2, 0xB0, 0xA1, 0xE2, 0xB0,
++		0xA2, 0xE2, 0xB0, 0xA3, 0xE2, 0xB0, 0xA4, 0xE2,
++		0xB0, 0xA5, 0xE2, 0xB0, 0xA6, 0xE2, 0xB0, 0xA7,
++		0xE2, 0xB0, 0xA8, 0xE2, 0xB0, 0xA9, 0xE2, 0xB0,
++		0xAA, 0xE2, 0xB0, 0xAB, 0xE2, 0xB0, 0xAC, 0xE2,
++		0xB0, 0xAD, 0xE2, 0xB0, 0xAE, 0xE2, 0xB1, 0xA0,
++		0xC8, 0xBA, 0xC8, 0xBE, 0xE2, 0xB1, 0xA7, 0xE2,
++		0xB1, 0xA9, 0xE2, 0xB1, 0xAB, 0xE2, 0xB1, 0xB5,
++		0xE2, 0xB2, 0x80, 0xE2, 0xB2, 0x82, 0xE2, 0xB2,
++		0x84, 0xE2, 0xB2, 0x86, 0xE2, 0xB2, 0x88, 0xE2,
++		0xB2, 0x8A, 0xE2, 0xB2, 0x8C, 0xE2, 0xB2, 0x8E,
++		0xE2, 0xB2, 0x90, 0xE2, 0xB2, 0x92, 0xE2, 0xB2,
++		0x94, 0xE2, 0xB2, 0x96, 0xE2, 0xB2, 0x98, 0xE2,
++		0xB2, 0x9A, 0xE2, 0xB2, 0x9C, 0xE2, 0xB2, 0x9E,
++		0xE2, 0xB2, 0xA0, 0xE2, 0xB2, 0xA2, 0xE2, 0xB2,
++		0xA4, 0xE2, 0xB2, 0xA6, 0xE2, 0xB2, 0xA8, 0xE2,
++		0xB2, 0xAA, 0xE2, 0xB2, 0xAC, 0xE2, 0xB2, 0xAE,
++		0xE2, 0xB2, 0xB0, 0xE2, 0xB2, 0xB2, 0xE2, 0xB2,
++		0xB4, 0xE2, 0xB2, 0xB6, 0xE2, 0xB2, 0xB8, 0xE2,
++		0xB2, 0xBA, 0xE2, 0xB2, 0xBC, 0xE2, 0xB2, 0xBE,
++		0xE2, 0xB3, 0x80, 0xE2, 0xB3, 0x82, 0xE2, 0xB3,
++		0x84, 0xE2, 0xB3, 0x86, 0xE2, 0xB3, 0x88, 0xE2,
++		0xB3, 0x8A, 0xE2, 0xB3, 0x8C, 0xE2, 0xB3, 0x8E,
++		0xE2, 0xB3, 0x90, 0xE2, 0xB3, 0x92, 0xE2, 0xB3,
++		0x94, 0xE2, 0xB3, 0x96, 0xE2, 0xB3, 0x98, 0xE2,
++		0xB3, 0x9A, 0xE2, 0xB3, 0x9C, 0xE2, 0xB3, 0x9E,
++		0xE2, 0xB3, 0xA0, 0xE2, 0xB3, 0xA2, 0xE1, 0x82,
++		0xA0, 0xE1, 0x82, 0xA1, 0xE1, 0x82, 0xA2, 0xE1,
++		0x82, 0xA3, 0xE1, 0x82, 0xA4, 0xE1, 0x82, 0xA5,
++		0xE1, 0x82, 0xA6, 0xE1, 0x82, 0xA7, 0xE1, 0x82,
++		0xA8, 0xE1, 0x82, 0xA9, 0xE1, 0x82, 0xAA, 0xE1,
++		0x82, 0xAB, 0xE1, 0x82, 0xAC, 0xE1, 0x82, 0xAD,
++		0xE1, 0x82, 0xAE, 0xE1, 0x82, 0xAF, 0xE1, 0x82,
++		0xB0, 0xE1, 0x82, 0xB1, 0xE1, 0x82, 0xB2, 0xE1,
++		0x82, 0xB3, 0xE1, 0x82, 0xB4, 0xE1, 0x82, 0xB5,
++		0xE1, 0x82, 0xB6, 0xE1, 0x82, 0xB7, 0xE1, 0x82,
++		0xB8, 0xE1, 0x82, 0xB9, 0xE1, 0x82, 0xBA, 0xE1,
++		0x82, 0xBB, 0xE1, 0x82, 0xBC, 0xE1, 0x82, 0xBD,
++		0xE1, 0x82, 0xBE, 0xE1, 0x82, 0xBF, 0xE1, 0x83,
++		0x80, 0xE1, 0x83, 0x81, 0xE1, 0x83, 0x82, 0xE1,
++		0x83, 0x83, 0xE1, 0x83, 0x84, 0xE1, 0x83, 0x85,
++		0xEF, 0xBC, 0xA1, 0xEF, 0xBC, 0xA2, 0xEF, 0xBC,
++		0xA3, 0xEF, 0xBC, 0xA4, 0xEF, 0xBC, 0xA5, 0xEF,
++		0xBC, 0xA6, 0xEF, 0xBC, 0xA7, 0xEF, 0xBC, 0xA8,
++		0xEF, 0xBC, 0xA9, 0xEF, 0xBC, 0xAA, 0xEF, 0xBC,
++		0xAB, 0xEF, 0xBC, 0xAC, 0xEF, 0xBC, 0xAD, 0xEF,
++		0xBC, 0xAE, 0xEF, 0xBC, 0xAF, 0xEF, 0xBC, 0xB0,
++		0xEF, 0xBC, 0xB1, 0xEF, 0xBC, 0xB2, 0xEF, 0xBC,
++		0xB3, 0xEF, 0xBC, 0xB4, 0xEF, 0xBC, 0xB5, 0xEF,
++		0xBC, 0xB6, 0xEF, 0xBC, 0xB7, 0xEF, 0xBC, 0xB8,
++		0xEF, 0xBC, 0xB9, 0xEF, 0xBC, 0xBA, 0xF0, 0x90,
++		0x90, 0x80, 0xF0, 0x90, 0x90, 0x81, 0xF0, 0x90,
++		0x90, 0x82, 0xF0, 0x90, 0x90, 0x83, 0xF0, 0x90,
++		0x90, 0x84, 0xF0, 0x90, 0x90, 0x85, 0xF0, 0x90,
++		0x90, 0x86, 0xF0, 0x90, 0x90, 0x87, 0xF0, 0x90,
++		0x90, 0x88, 0xF0, 0x90, 0x90, 0x89, 0xF0, 0x90,
++		0x90, 0x8A, 0xF0, 0x90, 0x90, 0x8B, 0xF0, 0x90,
++		0x90, 0x8C, 0xF0, 0x90, 0x90, 0x8D, 0xF0, 0x90,
++		0x90, 0x8E, 0xF0, 0x90, 0x90, 0x8F, 0xF0, 0x90,
++		0x90, 0x90, 0xF0, 0x90, 0x90, 0x91, 0xF0, 0x90,
++		0x90, 0x92, 0xF0, 0x90, 0x90, 0x93, 0xF0, 0x90,
++		0x90, 0x94, 0xF0, 0x90, 0x90, 0x95, 0xF0, 0x90,
++		0x90, 0x96, 0xF0, 0x90, 0x90, 0x97, 0xF0, 0x90,
++		0x90, 0x98, 0xF0, 0x90, 0x90, 0x99, 0xF0, 0x90,
++		0x90, 0x9A, 0xF0, 0x90, 0x90, 0x9B, 0xF0, 0x90,
++		0x90, 0x9C, 0xF0, 0x90, 0x90, 0x9D, 0xF0, 0x90,
++		0x90, 0x9E, 0xF0, 0x90, 0x90, 0x9F, 0xF0, 0x90,
++		0x90, 0xA0, 0xF0, 0x90, 0x90, 0xA1, 0xF0, 0x90,
++		0x90, 0xA2, 0xF0, 0x90, 0x90, 0xA3, 0xF0, 0x90,
++		0x90, 0xA4, 0xF0, 0x90, 0x90, 0xA5, 0xF0, 0x90,
++		0x90, 0xA6, 0xF0, 0x90, 0x90, 0xA7,
++	},
++};
++
++#undef	N_
++#undef	FIL_
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_U8_TEXTPREP_DATA_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/u8_textprep.h linux-3.2.33-go/include/zfs/sys/u8_textprep.h
+--- linux-3.2.33-go.orig/include/zfs/sys/u8_textprep.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/u8_textprep.h	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,113 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_SYS_U8_TEXTPREP_H
++#define	_SYS_U8_TEXTPREP_H
++
++
++
++#include <sys/isa_defs.h>
++#include <sys/types.h>
++#include <sys/errno.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * Unicode encoding conversion functions and their macros.
++ */
++#define	UCONV_IN_BIG_ENDIAN		0x0001
++#define	UCONV_OUT_BIG_ENDIAN		0x0002
++#define	UCONV_IN_SYSTEM_ENDIAN		0x0004
++#define	UCONV_OUT_SYSTEM_ENDIAN		0x0008
++#define	UCONV_IN_LITTLE_ENDIAN		0x0010
++#define	UCONV_OUT_LITTLE_ENDIAN		0x0020
++#define	UCONV_IGNORE_NULL		0x0040
++#define	UCONV_IN_ACCEPT_BOM		0x0080
++#define	UCONV_OUT_EMIT_BOM		0x0100
++
++extern int uconv_u16tou32(const uint16_t *, size_t *, uint32_t *, size_t *,
++	int);
++extern int uconv_u16tou8(const uint16_t *, size_t *, uchar_t *, size_t *, int);
++extern int uconv_u32tou16(const uint32_t *, size_t *, uint16_t *, size_t *,
++	int);
++extern int uconv_u32tou8(const uint32_t *, size_t *, uchar_t *, size_t *, int);
++extern int uconv_u8tou16(const uchar_t *, size_t *, uint16_t *, size_t *, int);
++extern int uconv_u8tou32(const uchar_t *, size_t *, uint32_t *, size_t *, int);
++
++/*
++ * UTF-8 text preparation functions and their macros.
++ *
++ * Among the macros defined, U8_CANON_DECOMP, U8_COMPAT_DECOMP, and
++ * U8_CANON_COMP are not public interfaces and must not be used directly
++ * at the flag input argument.
++ */
++#define	U8_STRCMP_CS			(0x00000001)
++#define	U8_STRCMP_CI_UPPER		(0x00000002)
++#define	U8_STRCMP_CI_LOWER		(0x00000004)
++
++#define	U8_CANON_DECOMP			(0x00000010)
++#define	U8_COMPAT_DECOMP		(0x00000020)
++#define	U8_CANON_COMP			(0x00000040)
++
++#define	U8_STRCMP_NFD			(U8_CANON_DECOMP)
++#define	U8_STRCMP_NFC			(U8_CANON_DECOMP | U8_CANON_COMP)
++#define	U8_STRCMP_NFKD			(U8_COMPAT_DECOMP)
++#define	U8_STRCMP_NFKC			(U8_COMPAT_DECOMP | U8_CANON_COMP)
++
++#define	U8_TEXTPREP_TOUPPER		(U8_STRCMP_CI_UPPER)
++#define	U8_TEXTPREP_TOLOWER		(U8_STRCMP_CI_LOWER)
++
++#define	U8_TEXTPREP_NFD			(U8_STRCMP_NFD)
++#define	U8_TEXTPREP_NFC			(U8_STRCMP_NFC)
++#define	U8_TEXTPREP_NFKD		(U8_STRCMP_NFKD)
++#define	U8_TEXTPREP_NFKC		(U8_STRCMP_NFKC)
++
++#define	U8_TEXTPREP_IGNORE_NULL		(0x00010000)
++#define	U8_TEXTPREP_IGNORE_INVALID	(0x00020000)
++#define	U8_TEXTPREP_NOWAIT		(0x00040000)
++
++#define	U8_UNICODE_320			(0)
++#define	U8_UNICODE_500			(1)
++#define	U8_UNICODE_LATEST		(U8_UNICODE_500)
++
++#define	U8_VALIDATE_ENTIRE		(0x00100000)
++#define	U8_VALIDATE_CHECK_ADDITIONAL	(0x00200000)
++#define	U8_VALIDATE_UCS2_RANGE		(0x00400000)
++
++#define	U8_ILLEGAL_CHAR			(-1)
++#define	U8_OUT_OF_RANGE_CHAR		(-2)
++
++extern int u8_validate(char *, size_t, char **, int, int *);
++extern int u8_strcmp(const char *, const char *, size_t, int, size_t, int *);
++extern size_t u8_textprep_str(char *, size_t *, char *, size_t *, int, size_t,
++	int *);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_U8_TEXTPREP_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/uberblock.h linux-3.2.33-go/include/zfs/sys/uberblock.h
+--- linux-3.2.33-go.orig/include/zfs/sys/uberblock.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/uberblock.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,46 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef _SYS_UBERBLOCK_H
++#define	_SYS_UBERBLOCK_H
++
++#include <sys/spa.h>
++#include <sys/vdev.h>
++#include <sys/zio.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++typedef struct uberblock uberblock_t;
++
++extern int uberblock_verify(uberblock_t *ub);
++extern int uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_UBERBLOCK_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/uberblock_impl.h linux-3.2.33-go/include/zfs/sys/uberblock_impl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/uberblock_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/uberblock_impl.h	2012-11-16 23:25:34.339039449 +0100
+@@ -0,0 +1,63 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef _SYS_UBERBLOCK_IMPL_H
++#define	_SYS_UBERBLOCK_IMPL_H
++
++#include <sys/uberblock.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * The uberblock version is incremented whenever an incompatible on-disk
++ * format change is made to the SPA, DMU, or ZAP.
++ *
++ * Note: the first two fields should never be moved.  When a storage pool
++ * is opened, the uberblock must be read off the disk before the version
++ * can be checked.  If the ub_version field is moved, we may not detect
++ * version mismatch.  If the ub_magic field is moved, applications that
++ * expect the magic number in the first word won't work.
++ */
++#define	UBERBLOCK_MAGIC		0x00bab10c		/* oo-ba-bloc!	*/
++#define	UBERBLOCK_SHIFT		10			/* up to 1K	*/
++
++struct uberblock {
++	uint64_t	ub_magic;	/* UBERBLOCK_MAGIC		*/
++	uint64_t	ub_version;	/* SPA_VERSION			*/
++	uint64_t	ub_txg;		/* txg of last sync		*/
++	uint64_t	ub_guid_sum;	/* sum of all vdev guids	*/
++	uint64_t	ub_timestamp;	/* UTC time of last sync	*/
++	blkptr_t	ub_rootbp;	/* MOS objset_phys_t		*/
++
++	/* highest SPA_VERSION supported by software that wrote this txg */
++	uint64_t	ub_software_version;
++};
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_UBERBLOCK_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/uio_impl.h linux-3.2.33-go/include/zfs/sys/uio_impl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/uio_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/uio_impl.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,49 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
++/*	  All Rights Reserved	*/
++
++/*
++ * University Copyright- Copyright (c) 1982, 1986, 1988
++ * The Regents of the University of California
++ * All Rights Reserved
++ *
++ * University Acknowledgment- Portions of this document are derived from
++ * software developed by the University of California, Berkeley, and its
++ * contributors.
++ */
++
++#ifndef _SYS_UIO_IMPL_H
++#define	_SYS_UIO_IMPL_H
++
++#include <sys/uio.h>
++
++extern int uiomove(void *, size_t, enum uio_rw, uio_t *);
++extern void uio_prefaultpages(ssize_t, uio_t *);
++extern int uiocopy(void *, size_t, enum uio_rw, uio_t *, size_t *);
++extern void uioskip(uio_t *, size_t);
++
++#endif	/* _SYS_UIO_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/unique.h linux-3.2.33-go/include/zfs/sys/unique.h
+--- linux-3.2.33-go.orig/include/zfs/sys/unique.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/unique.h	2012-11-16 23:25:34.343039404 +0100
+@@ -0,0 +1,59 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_SYS_UNIQUE_H
++#define	_SYS_UNIQUE_H
++
++
++
++#include <sys/zfs_context.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/* The number of significant bits in each unique value. */
++#define	UNIQUE_BITS	56
++
++void unique_init(void);
++void unique_fini(void);
++
++/*
++ * Return a new unique value (which will not be uniquified against until
++ * it is unique_insert()-ed.
++ */
++uint64_t unique_create(void);
++
++/* Return a unique value, which equals the one passed in if possible. */
++uint64_t unique_insert(uint64_t value);
++
++/* Indicate that this value no longer needs to be uniquified against. */
++void unique_remove(uint64_t value);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_UNIQUE_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/uuid.h linux-3.2.33-go/include/zfs/sys/uuid.h
+--- linux-3.2.33-go.orig/include/zfs/sys/uuid.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/uuid.h	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,94 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License, Version 1.0 only
++ * (the "License").  You may not use this file except in compliance
++ * with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_SYS_UUID_H
++#define	_SYS_UUID_H
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * The copyright in this file is taken from the original Leach
++ * & Salz UUID specification, from which this implementation
++ * is derived.
++ */
++
++/*
++ * Copyright (c) 1990- 1993, 1996 Open Software Foundation, Inc.
++ * Copyright (c) 1989 by Hewlett-Packard Company, Palo Alto, Ca. &
++ * Digital Equipment Corporation, Maynard, Mass.  Copyright (c) 1998
++ * Microsoft.  To anyone who acknowledges that this file is provided
++ * "AS IS" without any express or implied warranty: permission to use,
++ * copy, modify, and distribute this file for any purpose is hereby
++ * granted without fee, provided that the above copyright notices and
++ * this notice appears in all source code copies, and that none of the
++ * names of Open Software Foundation, Inc., Hewlett-Packard Company,
++ * or Digital Equipment Corporation be used in advertising or
++ * publicity pertaining to distribution of the software without
++ * specific, written prior permission.  Neither Open Software
++ * Foundation, Inc., Hewlett-Packard Company, Microsoft, nor Digital
++ * Equipment Corporation makes any representations about the
++ * suitability of this software for any purpose.
++ */
++
++#include <sys/types.h>
++#include <sys/byteorder.h>
++
++typedef struct {
++	uint8_t		nodeID[6];
++} uuid_node_t;
++
++/*
++ * The uuid type used throughout when referencing uuids themselves
++ */
++struct uuid {
++	uint32_t	time_low;
++	uint16_t	time_mid;
++	uint16_t	time_hi_and_version;
++	uint8_t		clock_seq_hi_and_reserved;
++	uint8_t		clock_seq_low;
++	uint8_t		node_addr[6];
++};
++
++#define	UUID_PRINTABLE_STRING_LENGTH 37
++
++/*
++ * Convert a uuid to/from little-endian format
++ */
++#define	UUID_LE_CONVERT(dest, src)					\
++{									\
++	(dest) = (src);							\
++	(dest).time_low = LE_32((dest).time_low);			\
++	(dest).time_mid = LE_16((dest).time_mid);			\
++	(dest).time_hi_and_version = LE_16((dest).time_hi_and_version);	\
++}
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif /* _SYS_UUID_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/vdev_disk.h linux-3.2.33-go/include/zfs/sys/vdev_disk.h
+--- linux-3.2.33-go.orig/include/zfs/sys/vdev_disk.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/vdev_disk.h	2012-11-16 23:25:34.339039449 +0100
+@@ -0,0 +1,45 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
++ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ * LLNL-CODE-403049.
++ */
++
++#ifndef _SYS_VDEV_DISK_H
++#define _SYS_VDEV_DISK_H
++
++#ifdef _KERNEL
++#include <sys/vdev.h>
++
++typedef struct vdev_disk {
++	ddi_devid_t		vd_devid;
++	char			*vd_minor;
++	struct block_device	*vd_bdev;
++} vdev_disk_t;
++
++extern int vdev_disk_physio(struct block_device *, caddr_t,
++			    size_t, uint64_t, int);
++extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
++
++#endif /* _KERNEL */
++#endif /* _SYS_VDEV_DISK_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/vdev_file.h linux-3.2.33-go/include/zfs/sys/vdev_file.h
+--- linux-3.2.33-go.orig/include/zfs/sys/vdev_file.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/vdev_file.h	2012-11-16 23:25:34.343039404 +0100
+@@ -0,0 +1,46 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License, Version 1.0 only
++ * (the "License").  You may not use this file except in compliance
++ * with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef _SYS_VDEV_FILE_H
++#define	_SYS_VDEV_FILE_H
++
++
++
++#include <sys/vdev.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++typedef struct vdev_file {
++	vnode_t		*vf_vnode;
++} vdev_file_t;
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_VDEV_FILE_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/vdev.h linux-3.2.33-go/include/zfs/sys/vdev.h
+--- linux-3.2.33-go.orig/include/zfs/sys/vdev.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/vdev.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,162 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ */
++
++#ifndef _SYS_VDEV_H
++#define	_SYS_VDEV_H
++
++#include <sys/spa.h>
++#include <sys/zio.h>
++#include <sys/dmu.h>
++#include <sys/space_map.h>
++#include <sys/fs/zfs.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++typedef enum vdev_dtl_type {
++	DTL_MISSING,	/* 0% replication: no copies of the data */
++	DTL_PARTIAL,	/* less than 100% replication: some copies missing */
++	DTL_SCRUB,	/* unable to fully repair during scrub/resilver */
++	DTL_OUTAGE,	/* temporarily missing (used to attempt detach) */
++	DTL_TYPES
++} vdev_dtl_type_t;
++
++extern int zfs_nocacheflush;
++
++extern int vdev_open(vdev_t *);
++extern void vdev_open_children(vdev_t *);
++extern boolean_t vdev_uses_zvols(vdev_t *);
++extern int vdev_validate(vdev_t *, boolean_t);
++extern void vdev_close(vdev_t *);
++extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
++extern void vdev_reopen(vdev_t *);
++extern int vdev_validate_aux(vdev_t *vd);
++extern zio_t *vdev_probe(vdev_t *vd, zio_t *pio);
++
++extern boolean_t vdev_is_bootable(vdev_t *vd);
++extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
++extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid);
++extern void vdev_dtl_dirty(vdev_t *vd, vdev_dtl_type_t d,
++    uint64_t txg, uint64_t size);
++extern boolean_t vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t d,
++    uint64_t txg, uint64_t size);
++extern boolean_t vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t d);
++extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
++    int scrub_done);
++extern boolean_t vdev_dtl_required(vdev_t *vd);
++extern boolean_t vdev_resilver_needed(vdev_t *vd,
++    uint64_t *minp, uint64_t *maxp);
++
++extern void vdev_hold(vdev_t *);
++extern void vdev_rele(vdev_t *);
++
++extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
++extern void vdev_metaslab_fini(vdev_t *vd);
++extern void vdev_metaslab_set_size(vdev_t *);
++extern void vdev_expand(vdev_t *vd, uint64_t txg);
++extern void vdev_split(vdev_t *vd);
++
++
++extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
++extern void vdev_clear_stats(vdev_t *vd);
++extern void vdev_stat_update(zio_t *zio, uint64_t psize);
++extern void vdev_scan_stat_init(vdev_t *vd);
++extern void vdev_propagate_state(vdev_t *vd);
++extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state,
++    vdev_aux_t aux);
++
++extern void vdev_space_update(vdev_t *vd,
++    int64_t alloc_delta, int64_t defer_delta, int64_t space_delta);
++
++extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
++
++extern int vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux);
++extern int vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux);
++extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags,
++    vdev_state_t *);
++extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags);
++extern void vdev_clear(spa_t *spa, vdev_t *vd);
++
++extern boolean_t vdev_is_dead(vdev_t *vd);
++extern boolean_t vdev_readable(vdev_t *vd);
++extern boolean_t vdev_writeable(vdev_t *vd);
++extern boolean_t vdev_allocatable(vdev_t *vd);
++extern boolean_t vdev_accessible(vdev_t *vd, zio_t *zio);
++
++extern void vdev_cache_init(vdev_t *vd);
++extern void vdev_cache_fini(vdev_t *vd);
++extern int vdev_cache_read(zio_t *zio);
++extern void vdev_cache_write(zio_t *zio);
++extern void vdev_cache_purge(vdev_t *vd);
++
++extern void vdev_queue_init(vdev_t *vd);
++extern void vdev_queue_fini(vdev_t *vd);
++extern zio_t *vdev_queue_io(zio_t *zio);
++extern void vdev_queue_io_done(zio_t *zio);
++
++extern void vdev_config_dirty(vdev_t *vd);
++extern void vdev_config_clean(vdev_t *vd);
++extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg,
++    boolean_t);
++
++extern void vdev_state_dirty(vdev_t *vd);
++extern void vdev_state_clean(vdev_t *vd);
++
++typedef enum vdev_config_flag {
++	VDEV_CONFIG_SPARE = 1 << 0,
++	VDEV_CONFIG_L2CACHE = 1 << 1,
++	VDEV_CONFIG_REMOVING = 1 << 2
++} vdev_config_flag_t;
++
++extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config);
++extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
++    boolean_t getstats, vdev_config_flag_t flags);
++
++/*
++ * Label routines
++ */
++struct uberblock;
++extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
++extern int vdev_label_number(uint64_t psise, uint64_t offset);
++extern nvlist_t *vdev_label_read_config(vdev_t *vd);
++extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
++
++typedef enum {
++	VDEV_LABEL_CREATE,	/* create/add a new device */
++	VDEV_LABEL_REPLACE,	/* replace an existing device */
++	VDEV_LABEL_SPARE,	/* add a new hot spare */
++	VDEV_LABEL_REMOVE,	/* remove an existing device */
++	VDEV_LABEL_L2CACHE,	/* add an L2ARC cache device */
++	VDEV_LABEL_SPLIT	/* generating new label for split-off dev */
++} vdev_labeltype_t;
++
++extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_VDEV_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/vdev_impl.h linux-3.2.33-go/include/zfs/sys/vdev_impl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/vdev_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/vdev_impl.h	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,334 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2012 by Delphix. All rights reserved.
++ */
++
++#ifndef _SYS_VDEV_IMPL_H
++#define	_SYS_VDEV_IMPL_H
++
++#include <sys/avl.h>
++#include <sys/dmu.h>
++#include <sys/metaslab.h>
++#include <sys/nvpair.h>
++#include <sys/space_map.h>
++#include <sys/vdev.h>
++#include <sys/dkio.h>
++#include <sys/uberblock_impl.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * Virtual device descriptors.
++ *
++ * All storage pool operations go through the virtual device framework,
++ * which provides data replication and I/O scheduling.
++ */
++
++/*
++ * Forward declarations that lots of things need.
++ */
++typedef struct vdev_queue vdev_queue_t;
++typedef struct vdev_io vdev_io_t;
++typedef struct vdev_cache vdev_cache_t;
++typedef struct vdev_cache_entry vdev_cache_entry_t;
++
++/*
++ * Virtual device operations
++ */
++typedef int	vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size,
++    uint64_t *ashift);
++typedef void	vdev_close_func_t(vdev_t *vd);
++typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
++typedef int	vdev_io_start_func_t(zio_t *zio);
++typedef void	vdev_io_done_func_t(zio_t *zio);
++typedef void	vdev_state_change_func_t(vdev_t *vd, int, int);
++typedef void	vdev_hold_func_t(vdev_t *vd);
++typedef void	vdev_rele_func_t(vdev_t *vd);
++
++typedef struct vdev_ops {
++	vdev_open_func_t		*vdev_op_open;
++	vdev_close_func_t		*vdev_op_close;
++	vdev_asize_func_t		*vdev_op_asize;
++	vdev_io_start_func_t		*vdev_op_io_start;
++	vdev_io_done_func_t		*vdev_op_io_done;
++	vdev_state_change_func_t	*vdev_op_state_change;
++	vdev_hold_func_t		*vdev_op_hold;
++	vdev_rele_func_t		*vdev_op_rele;
++	char				vdev_op_type[16];
++	boolean_t			vdev_op_leaf;
++} vdev_ops_t;
++
++/*
++ * Virtual device properties
++ */
++struct vdev_cache_entry {
++	char		*ve_data;
++	uint64_t	ve_offset;
++	uint64_t	ve_lastused;
++	avl_node_t	ve_offset_node;
++	avl_node_t	ve_lastused_node;
++	uint32_t	ve_hits;
++	uint16_t	ve_missed_update;
++	zio_t		*ve_fill_io;
++};
++
++struct vdev_cache {
++	avl_tree_t	vc_offset_tree;
++	avl_tree_t	vc_lastused_tree;
++	kmutex_t	vc_lock;
++};
++
++struct vdev_queue {
++	avl_tree_t	vq_deadline_tree;
++	avl_tree_t	vq_read_tree;
++	avl_tree_t	vq_write_tree;
++	avl_tree_t	vq_pending_tree;
++	list_t		vq_io_list;
++	kmutex_t	vq_lock;
++};
++
++struct vdev_io {
++	char		vi_buffer[SPA_MAXBLOCKSIZE]; /* Must be first */
++	list_node_t	vi_node;
++};
++
++/*
++ * Virtual device descriptor
++ */
++struct vdev {
++	/*
++	 * Common to all vdev types.
++	 */
++	uint64_t	vdev_id;	/* child number in vdev parent	*/
++	uint64_t	vdev_guid;	/* unique ID for this vdev	*/
++	uint64_t	vdev_guid_sum;	/* self guid + all child guids	*/
++	uint64_t	vdev_orig_guid;	/* orig. guid prior to remove	*/
++	uint64_t	vdev_asize;	/* allocatable device capacity	*/
++	uint64_t	vdev_min_asize;	/* min acceptable asize		*/
++	uint64_t	vdev_max_asize;	/* max acceptable asize		*/
++	uint64_t	vdev_ashift;	/* block alignment shift	*/
++	uint64_t	vdev_state;	/* see VDEV_STATE_* #defines	*/
++	uint64_t	vdev_prevstate;	/* used when reopening a vdev	*/
++	vdev_ops_t	*vdev_ops;	/* vdev operations		*/
++	spa_t		*vdev_spa;	/* spa for this vdev		*/
++	void		*vdev_tsd;	/* type-specific data		*/
++	vnode_t		*vdev_name_vp;	/* vnode for pathname		*/
++	vnode_t		*vdev_devid_vp;	/* vnode for devid		*/
++	vdev_t		*vdev_top;	/* top-level vdev		*/
++	vdev_t		*vdev_parent;	/* parent vdev			*/
++	vdev_t		**vdev_child;	/* array of children		*/
++	uint64_t	vdev_children;	/* number of children		*/
++	space_map_t	vdev_dtl[DTL_TYPES]; /* in-core dirty time logs	*/
++	vdev_stat_t	vdev_stat;	/* virtual device statistics	*/
++	boolean_t	vdev_expanding;	/* expand the vdev?		*/
++	boolean_t	vdev_reopening;	/* reopen in progress?		*/
++	int		vdev_open_error; /* error on last open		*/
++	kthread_t	*vdev_open_thread; /* thread opening children	*/
++	uint64_t	vdev_crtxg;	/* txg when top-level was added */
++
++	/*
++	 * Top-level vdev state.
++	 */
++	uint64_t	vdev_ms_array;	/* metaslab array object	*/
++	uint64_t	vdev_ms_shift;	/* metaslab size shift		*/
++	uint64_t	vdev_ms_count;	/* number of metaslabs		*/
++	metaslab_group_t *vdev_mg;	/* metaslab group		*/
++	metaslab_t	**vdev_ms;	/* metaslab array		*/
++	uint64_t	vdev_pending_fastwrite; /* allocated fastwrites */
++	txg_list_t	vdev_ms_list;	/* per-txg dirty metaslab lists	*/
++	txg_list_t	vdev_dtl_list;	/* per-txg dirty DTL lists	*/
++	txg_node_t	vdev_txg_node;	/* per-txg dirty vdev linkage	*/
++	boolean_t	vdev_remove_wanted; /* async remove wanted?	*/
++	boolean_t	vdev_probe_wanted; /* async probe wanted?	*/
++	uint64_t	vdev_removing;	/* device is being removed?	*/
++	list_node_t	vdev_config_dirty_node; /* config dirty list	*/
++	list_node_t	vdev_state_dirty_node; /* state dirty list	*/
++	uint64_t	vdev_deflate_ratio; /* deflation ratio (x512)	*/
++	uint64_t	vdev_islog;	/* is an intent log device	*/
++	uint64_t	vdev_ishole;	/* is a hole in the namespace 	*/
++
++	/*
++	 * Leaf vdev state.
++	 */
++	uint64_t	vdev_psize;	/* physical device capacity	*/
++	space_map_obj_t	vdev_dtl_smo;	/* dirty time log space map obj	*/
++	txg_node_t	vdev_dtl_node;	/* per-txg dirty DTL linkage	*/
++	uint64_t	vdev_wholedisk;	/* true if this is a whole disk */
++	uint64_t	vdev_offline;	/* persistent offline state	*/
++	uint64_t	vdev_faulted;	/* persistent faulted state	*/
++	uint64_t	vdev_degraded;	/* persistent degraded state	*/
++	uint64_t	vdev_removed;	/* persistent removed state	*/
++	uint64_t	vdev_resilvering; /* persistent resilvering state */
++	uint64_t	vdev_nparity;	/* number of parity devices for raidz */
++	char		*vdev_path;	/* vdev path (if any)		*/
++	char		*vdev_devid;	/* vdev devid (if any)		*/
++	char		*vdev_physpath;	/* vdev device path (if any)	*/
++	char		*vdev_fru;	/* physical FRU location	*/
++	uint64_t	vdev_not_present; /* not present during import	*/
++	uint64_t	vdev_unspare;	/* unspare when resilvering done */
++	hrtime_t	vdev_last_try;	/* last reopen time		*/
++	boolean_t	vdev_nowritecache; /* true if flushwritecache failed */
++	boolean_t	vdev_checkremove; /* temporary online test	*/
++	boolean_t	vdev_forcefault; /* force online fault		*/
++	boolean_t	vdev_splitting;	/* split or repair in progress  */
++	boolean_t	vdev_delayed_close; /* delayed device close?	*/
++	uint8_t		vdev_tmpoffline; /* device taken offline temporarily? */
++	uint8_t		vdev_detached;	/* device detached?		*/
++	uint8_t		vdev_cant_read;	/* vdev is failing all reads	*/
++	uint8_t		vdev_cant_write; /* vdev is failing all writes	*/
++	uint64_t	vdev_isspare;	/* was a hot spare		*/
++	uint64_t	vdev_isl2cache;	/* was a l2cache device		*/
++	vdev_queue_t	vdev_queue;	/* I/O deadline schedule queue	*/
++	vdev_cache_t	vdev_cache;	/* physical block cache		*/
++	spa_aux_vdev_t	*vdev_aux;	/* for l2cache vdevs		*/
++	zio_t		*vdev_probe_zio; /* root of current probe	*/
++	vdev_aux_t	vdev_label_aux;	/* on-disk aux state		*/
++
++	/*
++	 * For DTrace to work in userland (libzpool) context, these fields must
++	 * remain at the end of the structure.  DTrace will use the kernel's
++	 * CTF definition for 'struct vdev', and since the size of a kmutex_t is
++	 * larger in userland, the offsets for the rest fields would be
++	 * incorrect.
++	 */
++	kmutex_t	vdev_dtl_lock;	/* vdev_dtl_{map,resilver}	*/
++	kmutex_t	vdev_stat_lock;	/* vdev_stat			*/
++	kmutex_t	vdev_probe_lock; /* protects vdev_probe_zio	*/
++};
++
++#define	VDEV_RAIDZ_MAXPARITY	3
++
++#define	VDEV_PAD_SIZE		(8 << 10)
++/* 2 padding areas (vl_pad1 and vl_pad2) to skip */
++#define	VDEV_SKIP_SIZE		VDEV_PAD_SIZE * 2
++#define	VDEV_PHYS_SIZE		(112 << 10)
++#define	VDEV_UBERBLOCK_RING	(128 << 10)
++
++#define	VDEV_UBERBLOCK_SHIFT(vd)	\
++	MAX((vd)->vdev_top->vdev_ashift, UBERBLOCK_SHIFT)
++#define	VDEV_UBERBLOCK_COUNT(vd)	\
++	(VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd))
++#define	VDEV_UBERBLOCK_OFFSET(vd, n)	\
++	offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)])
++#define	VDEV_UBERBLOCK_SIZE(vd)		(1ULL << VDEV_UBERBLOCK_SHIFT(vd))
++
++typedef struct vdev_phys {
++	char		vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_eck_t)];
++	zio_eck_t	vp_zbt;
++} vdev_phys_t;
++
++typedef struct vdev_label {
++	char		vl_pad1[VDEV_PAD_SIZE];			/*  8K */
++	char		vl_pad2[VDEV_PAD_SIZE];			/*  8K */
++	vdev_phys_t	vl_vdev_phys;				/* 112K	*/
++	char		vl_uberblock[VDEV_UBERBLOCK_RING];	/* 128K	*/
++} vdev_label_t;							/* 256K total */
++
++/*
++ * vdev_dirty() flags
++ */
++#define	VDD_METASLAB	0x01
++#define	VDD_DTL		0x02
++
++/*
++ * Size and offset of embedded boot loader region on each label.
++ * The total size of the first two labels plus the boot area is 4MB.
++ */
++#define	VDEV_BOOT_OFFSET	(2 * sizeof (vdev_label_t))
++#define	VDEV_BOOT_SIZE		(7ULL << 19)			/* 3.5M	*/
++
++/*
++ * Size of label regions at the start and end of each leaf device.
++ */
++#define	VDEV_LABEL_START_SIZE	(2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE)
++#define	VDEV_LABEL_END_SIZE	(2 * sizeof (vdev_label_t))
++#define	VDEV_LABELS		4
++
++#define	VDEV_ALLOC_LOAD		0
++#define	VDEV_ALLOC_ADD		1
++#define	VDEV_ALLOC_SPARE	2
++#define	VDEV_ALLOC_L2CACHE	3
++#define	VDEV_ALLOC_ROOTPOOL	4
++#define	VDEV_ALLOC_SPLIT	5
++#define	VDEV_ALLOC_ATTACH	6
++
++/*
++ * Allocate or free a vdev
++ */
++extern vdev_t *vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid,
++    vdev_ops_t *ops);
++extern int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *config,
++    vdev_t *parent, uint_t id, int alloctype);
++extern void vdev_free(vdev_t *vd);
++
++/*
++ * Add or remove children and parents
++ */
++extern void vdev_add_child(vdev_t *pvd, vdev_t *cvd);
++extern void vdev_remove_child(vdev_t *pvd, vdev_t *cvd);
++extern void vdev_compact_children(vdev_t *pvd);
++extern vdev_t *vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops);
++extern void vdev_remove_parent(vdev_t *cvd);
++
++/*
++ * vdev sync load and sync
++ */
++extern void vdev_load_log_state(vdev_t *nvd, vdev_t *ovd);
++extern boolean_t vdev_log_state_valid(vdev_t *vd);
++extern void vdev_load(vdev_t *vd);
++extern void vdev_sync(vdev_t *vd, uint64_t txg);
++extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
++extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg);
++
++/*
++ * Available vdev types.
++ */
++extern vdev_ops_t vdev_root_ops;
++extern vdev_ops_t vdev_mirror_ops;
++extern vdev_ops_t vdev_replacing_ops;
++extern vdev_ops_t vdev_raidz_ops;
++extern vdev_ops_t vdev_disk_ops;
++extern vdev_ops_t vdev_file_ops;
++extern vdev_ops_t vdev_missing_ops;
++extern vdev_ops_t vdev_hole_ops;
++extern vdev_ops_t vdev_spare_ops;
++
++/*
++ * Common size functions
++ */
++extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize);
++extern uint64_t vdev_get_min_asize(vdev_t *vd);
++extern void vdev_set_min_asize(vdev_t *vd);
++
++/*
++ * zdb uses this tunable, so it must be declared here to make lint happy.
++ */
++extern int zfs_vdev_cache_size;
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_VDEV_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/xvattr.h linux-3.2.33-go/include/zfs/sys/xvattr.h
+--- linux-3.2.33-go.orig/include/zfs/sys/xvattr.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/xvattr.h	2012-11-16 23:25:34.344039393 +0100
+@@ -0,0 +1,330 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
++/*	  All Rights Reserved  	*/
++
++/*
++ * University Copyright- Copyright (c) 1982, 1986, 1988
++ * The Regents of the University of California
++ * All Rights Reserved
++ *
++ * University Acknowledgment- Portions of this document are derived from
++ * software developed by the University of California, Berkeley, and its
++ * contributors.
++ */
++
++#ifndef _SYS_XVATTR_H
++#define	_SYS_XVATTR_H
++
++#include <sys/vnode.h>
++
++#define	AV_SCANSTAMP_SZ	32		/* length of anti-virus scanstamp */
++
++/*
++ * Structure of all optional attributes.
++ */
++typedef struct xoptattr {
++	timestruc_t	xoa_createtime;	/* Create time of file */
++	uint8_t		xoa_archive;
++	uint8_t		xoa_system;
++	uint8_t		xoa_readonly;
++	uint8_t		xoa_hidden;
++	uint8_t		xoa_nounlink;
++	uint8_t		xoa_immutable;
++	uint8_t		xoa_appendonly;
++	uint8_t		xoa_nodump;
++	uint8_t		xoa_opaque;
++	uint8_t		xoa_av_quarantined;
++	uint8_t		xoa_av_modified;
++	uint8_t		xoa_av_scanstamp[AV_SCANSTAMP_SZ];
++	uint8_t		xoa_reparse;
++	uint64_t	xoa_generation;
++	uint8_t		xoa_offline;
++	uint8_t		xoa_sparse;
++} xoptattr_t;
++
++/*
++ * The xvattr structure is really a variable length structure that
++ * is made up of:
++ * - The classic vattr_t (xva_vattr)
++ * - a 32 bit quantity (xva_mapsize) that specifies the size of the
++ *   attribute bitmaps in 32 bit words.
++ * - A pointer to the returned attribute bitmap (needed because the
++ *   previous element, the requested attribute bitmap) is variable lenth.
++ * - The requested attribute bitmap, which is an array of 32 bit words.
++ *   Callers use the XVA_SET_REQ() macro to set the bits corresponding to
++ *   the attributes that are being requested.
++ * - The returned attribute bitmap, which is an array of 32 bit words.
++ *   File systems that support optional attributes use the XVA_SET_RTN()
++ *   macro to set the bits corresponding to the attributes that are being
++ *   returned.
++ * - The xoptattr_t structure which contains the attribute values
++ *
++ * xva_mapsize determines how many words in the attribute bitmaps.
++ * Immediately following the attribute bitmaps is the xoptattr_t.
++ * xva_getxoptattr() is used to get the pointer to the xoptattr_t
++ * section.
++ */
++
++#define	XVA_MAPSIZE	3		/* Size of attr bitmaps */
++#define	XVA_MAGIC	0x78766174	/* Magic # for verification */
++
++/*
++ * The xvattr structure is an extensible structure which permits optional
++ * attributes to be requested/returned.  File systems may or may not support
++ * optional attributes.  They do so at their own discretion but if they do
++ * support optional attributes, they must register the VFSFT_XVATTR feature
++ * so that the optional attributes can be set/retrived.
++ *
++ * The fields of the xvattr structure are:
++ *
++ * xva_vattr - The first element of an xvattr is a legacy vattr structure
++ * which includes the common attributes.  If AT_XVATTR is set in the va_mask
++ * then the entire structure is treated as an xvattr.  If AT_XVATTR is not
++ * set, then only the xva_vattr structure can be used.
++ *
++ * xva_magic - 0x78766174 (hex for "xvat"). Magic number for verification.
++ *
++ * xva_mapsize - Size of requested and returned attribute bitmaps.
++ *
++ * xva_rtnattrmapp - Pointer to xva_rtnattrmap[].  We need this since the
++ * size of the array before it, xva_reqattrmap[], could change which means
++ * the location of xva_rtnattrmap[] could change.  This will allow unbundled
++ * file systems to find the location of xva_rtnattrmap[] when the sizes change.
++ *
++ * xva_reqattrmap[] - Array of requested attributes.  Attributes are
++ * represented by a specific bit in a specific element of the attribute
++ * map array.  Callers set the bits corresponding to the attributes
++ * that the caller wants to get/set.
++ *
++ * xva_rtnattrmap[] - Array of attributes that the file system was able to
++ * process.  Not all file systems support all optional attributes.  This map
++ * informs the caller which attributes the underlying file system was able
++ * to set/get.  (Same structure as the requested attributes array in terms
++ * of each attribute  corresponding to specific bits and array elements.)
++ *
++ * xva_xoptattrs - Structure containing values of optional attributes.
++ * These values are only valid if the corresponding bits in xva_reqattrmap
++ * are set and the underlying file system supports those attributes.
++ */
++typedef struct xvattr {
++	vattr_t		xva_vattr;	/* Embedded vattr structure */
++	uint32_t	xva_magic;	/* Magic Number */
++	uint32_t	xva_mapsize;	/* Size of attr bitmap (32-bit words) */
++	uint32_t	*xva_rtnattrmapp;	/* Ptr to xva_rtnattrmap[] */
++	uint32_t	xva_reqattrmap[XVA_MAPSIZE];	/* Requested attrs */
++	uint32_t	xva_rtnattrmap[XVA_MAPSIZE];	/* Returned attrs */
++	xoptattr_t	xva_xoptattrs;	/* Optional attributes */
++} xvattr_t;
++
++/*
++ * Attribute bits used in the extensible attribute's (xva's) attribute
++ * bitmaps.  Note that the bitmaps are made up of a variable length number
++ * of 32-bit words.  The convention is to use XAT{n}_{attrname} where "n"
++ * is the element in the bitmap (starting at 1).  This convention is for
++ * the convenience of the maintainer to keep track of which element each
++ * attribute belongs to.
++ *
++ * NOTE THAT CONSUMERS MUST *NOT* USE THE XATn_* DEFINES DIRECTLY.  CONSUMERS
++ * MUST USE THE XAT_* DEFINES.
++ */
++#define	XAT0_INDEX	0LL		/* Index into bitmap for XAT0 attrs */
++#define	XAT0_CREATETIME	0x00000001	/* Create time of file */
++#define	XAT0_ARCHIVE	0x00000002	/* Archive */
++#define	XAT0_SYSTEM	0x00000004	/* System */
++#define	XAT0_READONLY	0x00000008	/* Readonly */
++#define	XAT0_HIDDEN	0x00000010	/* Hidden */
++#define	XAT0_NOUNLINK	0x00000020	/* Nounlink */
++#define	XAT0_IMMUTABLE	0x00000040	/* immutable */
++#define	XAT0_APPENDONLY	0x00000080	/* appendonly */
++#define	XAT0_NODUMP	0x00000100	/* nodump */
++#define	XAT0_OPAQUE	0x00000200	/* opaque */
++#define	XAT0_AV_QUARANTINED	0x00000400	/* anti-virus quarantine */
++#define	XAT0_AV_MODIFIED	0x00000800	/* anti-virus modified */
++#define	XAT0_AV_SCANSTAMP	0x00001000	/* anti-virus scanstamp */
++#define	XAT0_REPARSE	0x00002000	/* FS reparse point */
++#define	XAT0_GEN	0x00004000	/* object generation number */
++#define	XAT0_OFFLINE	0x00008000	/* offline */
++#define	XAT0_SPARSE	0x00010000	/* sparse */
++
++#define	XAT0_ALL_ATTRS	(XAT0_CREATETIME|XAT0_ARCHIVE|XAT0_SYSTEM| \
++    XAT0_READONLY|XAT0_HIDDEN|XAT0_NOUNLINK|XAT0_IMMUTABLE|XAT0_APPENDONLY| \
++    XAT0_NODUMP|XAT0_OPAQUE|XAT0_AV_QUARANTINED|  XAT0_AV_MODIFIED| \
++    XAT0_AV_SCANSTAMP|XAT0_REPARSE|XATO_GEN|XAT0_OFFLINE|XAT0_SPARSE)
++
++/* Support for XAT_* optional attributes */
++#define	XVA_MASK		0xffffffff	/* Used to mask off 32 bits */
++#define	XVA_SHFT		32		/* Used to shift index */
++
++/*
++ * Used to pry out the index and attribute bits from the XAT_* attributes
++ * defined below.  Note that we're masking things down to 32 bits then
++ * casting to uint32_t.
++ */
++#define	XVA_INDEX(attr)		((uint32_t)(((attr) >> XVA_SHFT) & XVA_MASK))
++#define	XVA_ATTRBIT(attr)	((uint32_t)((attr) & XVA_MASK))
++
++/*
++ * The following defines present a "flat namespace" so that consumers don't
++ * need to keep track of which element belongs to which bitmap entry.
++ *
++ * NOTE THAT THESE MUST NEVER BE OR-ed TOGETHER
++ */
++#define	XAT_CREATETIME		((XAT0_INDEX << XVA_SHFT) | XAT0_CREATETIME)
++#define	XAT_ARCHIVE		((XAT0_INDEX << XVA_SHFT) | XAT0_ARCHIVE)
++#define	XAT_SYSTEM		((XAT0_INDEX << XVA_SHFT) | XAT0_SYSTEM)
++#define	XAT_READONLY		((XAT0_INDEX << XVA_SHFT) | XAT0_READONLY)
++#define	XAT_HIDDEN		((XAT0_INDEX << XVA_SHFT) | XAT0_HIDDEN)
++#define	XAT_NOUNLINK		((XAT0_INDEX << XVA_SHFT) | XAT0_NOUNLINK)
++#define	XAT_IMMUTABLE		((XAT0_INDEX << XVA_SHFT) | XAT0_IMMUTABLE)
++#define	XAT_APPENDONLY		((XAT0_INDEX << XVA_SHFT) | XAT0_APPENDONLY)
++#define	XAT_NODUMP		((XAT0_INDEX << XVA_SHFT) | XAT0_NODUMP)
++#define	XAT_OPAQUE		((XAT0_INDEX << XVA_SHFT) | XAT0_OPAQUE)
++#define	XAT_AV_QUARANTINED	((XAT0_INDEX << XVA_SHFT) | XAT0_AV_QUARANTINED)
++#define	XAT_AV_MODIFIED		((XAT0_INDEX << XVA_SHFT) | XAT0_AV_MODIFIED)
++#define	XAT_AV_SCANSTAMP	((XAT0_INDEX << XVA_SHFT) | XAT0_AV_SCANSTAMP)
++#define	XAT_REPARSE		((XAT0_INDEX << XVA_SHFT) | XAT0_REPARSE)
++#define	XAT_GEN			((XAT0_INDEX << XVA_SHFT) | XAT0_GEN)
++#define	XAT_OFFLINE		((XAT0_INDEX << XVA_SHFT) | XAT0_OFFLINE)
++#define	XAT_SPARSE		((XAT0_INDEX << XVA_SHFT) | XAT0_SPARSE)
++
++/*
++ * The returned attribute map array (xva_rtnattrmap[]) is located past the
++ * requested attribute map array (xva_reqattrmap[]).  Its location changes
++ * when the array sizes change.  We use a separate pointer in a known location
++ * (xva_rtnattrmapp) to hold the location of xva_rtnattrmap[].  This is
++ * set in xva_init()
++ */
++#define	XVA_RTNATTRMAP(xvap)	((xvap)->xva_rtnattrmapp)
++
++/*
++ * XVA_SET_REQ() sets an attribute bit in the proper element in the bitmap
++ * of requested attributes (xva_reqattrmap[]).
++ */
++#define	XVA_SET_REQ(xvap, attr)					\
++	ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR);		\
++	ASSERT((xvap)->xva_magic == XVA_MAGIC);			\
++	(xvap)->xva_reqattrmap[XVA_INDEX(attr)] |= XVA_ATTRBIT(attr)
++/*
++ * XVA_CLR_REQ() clears an attribute bit in the proper element in the bitmap
++ * of requested attributes (xva_reqattrmap[]).
++ */
++#define	XVA_CLR_REQ(xvap, attr)					\
++	ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR);		\
++	ASSERT((xvap)->xva_magic == XVA_MAGIC);			\
++	(xvap)->xva_reqattrmap[XVA_INDEX(attr)] &= ~XVA_ATTRBIT(attr)
++
++/*
++ * XVA_SET_RTN() sets an attribute bit in the proper element in the bitmap
++ * of returned attributes (xva_rtnattrmap[]).
++ */
++#define	XVA_SET_RTN(xvap, attr)					\
++	ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR);		\
++	ASSERT((xvap)->xva_magic == XVA_MAGIC);			\
++	(XVA_RTNATTRMAP(xvap))[XVA_INDEX(attr)] |= XVA_ATTRBIT(attr)
++
++/*
++ * XVA_ISSET_REQ() checks the requested attribute bitmap (xva_reqattrmap[])
++ * to see of the corresponding attribute bit is set.  If so, returns non-zero.
++ */
++#define	XVA_ISSET_REQ(xvap, attr)					\
++	((((xvap)->xva_vattr.va_mask | AT_XVATTR) &&			\
++		((xvap)->xva_magic == XVA_MAGIC) &&			\
++		((xvap)->xva_mapsize > XVA_INDEX(attr))) ?		\
++	((xvap)->xva_reqattrmap[XVA_INDEX(attr)] & XVA_ATTRBIT(attr)) :	0)
++
++/*
++ * XVA_ISSET_RTN() checks the returned attribute bitmap (xva_rtnattrmap[])
++ * to see of the corresponding attribute bit is set.  If so, returns non-zero.
++ */
++#define	XVA_ISSET_RTN(xvap, attr)					\
++	((((xvap)->xva_vattr.va_mask | AT_XVATTR) &&			\
++		((xvap)->xva_magic == XVA_MAGIC) &&			\
++		((xvap)->xva_mapsize > XVA_INDEX(attr))) ?		\
++	((XVA_RTNATTRMAP(xvap))[XVA_INDEX(attr)] & XVA_ATTRBIT(attr)) : 0)
++
++/*
++ * Zero out the structure, set the size of the requested/returned bitmaps,
++ * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer
++ * to the returned attributes array.
++ */
++static inline void
++xva_init(xvattr_t *xvap)
++{
++	bzero(xvap, sizeof (xvattr_t));
++	xvap->xva_mapsize = XVA_MAPSIZE;
++	xvap->xva_magic = XVA_MAGIC;
++	xvap->xva_vattr.va_mask = ATTR_XVATTR;
++	xvap->xva_rtnattrmapp = &(xvap->xva_rtnattrmap)[0];
++}
++
++/*
++ * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t
++ * structure.  Otherwise, returns NULL.
++ */
++static inline xoptattr_t *
++xva_getxoptattr(xvattr_t *xvap)
++{
++	xoptattr_t *xoap = NULL;
++	if (xvap->xva_vattr.va_mask & AT_XVATTR)
++			xoap = &xvap->xva_xoptattrs;
++	return (xoap);
++}
++
++#define	MODEMASK	07777		/* mode bits plus permission bits */
++#define	PERMMASK	00777		/* permission bits */
++
++/*
++ * VOP_ACCESS flags
++ */
++#define	V_ACE_MASK	0x1	/* mask represents  NFSv4 ACE permissions */
++#define	V_APPEND	0x2	/* want to do append only check */
++
++/*
++ * Structure used on VOP_GETSECATTR and VOP_SETSECATTR operations
++ */
++
++typedef struct vsecattr {
++	uint_t		vsa_mask;	/* See below */
++	int		vsa_aclcnt;	/* ACL entry count */
++	void		*vsa_aclentp;	/* pointer to ACL entries */
++	int		vsa_dfaclcnt;	/* default ACL entry count */
++	void		*vsa_dfaclentp;	/* pointer to default ACL entries */
++	size_t		vsa_aclentsz;	/* ACE size in bytes of vsa_aclentp */
++	uint_t		vsa_aclflags;	/* ACE ACL flags */
++} vsecattr_t;
++
++/* vsa_mask values */
++#define	VSA_ACL			0x0001
++#define	VSA_ACLCNT		0x0002
++#define	VSA_DFACL		0x0004
++#define	VSA_DFACLCNT		0x0008
++#define	VSA_ACE			0x0010
++#define	VSA_ACECNT		0x0020
++#define	VSA_ACE_ALLTYPES	0x0040
++#define	VSA_ACE_ACLFLAGS	0x0080	/* get/set ACE ACL flags */
++
++#endif /* _SYS_XVATTR_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zap.h linux-3.2.33-go/include/zfs/sys/zap.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zap.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zap.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,480 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_ZAP_H
++#define	_SYS_ZAP_H
++
++/*
++ * ZAP - ZFS Attribute Processor
++ *
++ * The ZAP is a module which sits on top of the DMU (Data Management
++ * Unit) and implements a higher-level storage primitive using DMU
++ * objects.  Its primary consumer is the ZPL (ZFS Posix Layer).
++ *
++ * A "zapobj" is a DMU object which the ZAP uses to stores attributes.
++ * Users should use only zap routines to access a zapobj - they should
++ * not access the DMU object directly using DMU routines.
++ *
++ * The attributes stored in a zapobj are name-value pairs.  The name is
++ * a zero-terminated string of up to ZAP_MAXNAMELEN bytes (including
++ * terminating NULL).  The value is an array of integers, which may be
++ * 1, 2, 4, or 8 bytes long.  The total space used by the array (number
++ * of integers * integer length) can be up to ZAP_MAXVALUELEN bytes.
++ * Note that an 8-byte integer value can be used to store the location
++ * (object number) of another dmu object (which may be itself a zapobj).
++ * Note that you can use a zero-length attribute to store a single bit
++ * of information - the attribute is present or not.
++ *
++ * The ZAP routines are thread-safe.  However, you must observe the
++ * DMU's restriction that a transaction may not be operated on
++ * concurrently.
++ *
++ * Any of the routines that return an int may return an I/O error (EIO
++ * or ECHECKSUM).
++ *
++ *
++ * Implementation / Performance Notes:
++ *
++ * The ZAP is intended to operate most efficiently on attributes with
++ * short (49 bytes or less) names and single 8-byte values, for which
++ * the microzap will be used.  The ZAP should be efficient enough so
++ * that the user does not need to cache these attributes.
++ *
++ * The ZAP's locking scheme makes its routines thread-safe.  Operations
++ * on different zapobjs will be processed concurrently.  Operations on
++ * the same zapobj which only read data will be processed concurrently.
++ * Operations on the same zapobj which modify data will be processed
++ * concurrently when there are many attributes in the zapobj (because
++ * the ZAP uses per-block locking - more than 128 * (number of cpus)
++ * small attributes will suffice).
++ */
++
++/*
++ * We're using zero-terminated byte strings (ie. ASCII or UTF-8 C
++ * strings) for the names of attributes, rather than a byte string
++ * bounded by an explicit length.  If some day we want to support names
++ * in character sets which have embedded zeros (eg. UTF-16, UTF-32),
++ * we'll have to add routines for using length-bounded strings.
++ */
++
++#include <sys/dmu.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * The matchtype specifies which entry will be accessed.
++ * MT_EXACT: only find an exact match (non-normalized)
++ * MT_FIRST: find the "first" normalized (case and Unicode
++ *     form) match; the designated "first" match will not change as long
++ *     as the set of entries with this normalization doesn't change
++ * MT_BEST: if there is an exact match, find that, otherwise find the
++ *     first normalized match
++ */
++typedef enum matchtype
++{
++	MT_EXACT,
++	MT_BEST,
++	MT_FIRST
++} matchtype_t;
++
++typedef enum zap_flags {
++	/* Use 64-bit hash value (serialized cursors will always use 64-bits) */
++	ZAP_FLAG_HASH64 = 1 << 0,
++	/* Key is binary, not string (zap_add_uint64() can be used) */
++	ZAP_FLAG_UINT64_KEY = 1 << 1,
++	/*
++	 * First word of key (which must be an array of uint64) is
++	 * already randomly distributed.
++	 */
++	ZAP_FLAG_PRE_HASHED_KEY = 1 << 2,
++} zap_flags_t;
++
++/*
++ * Create a new zapobj with no attributes and return its object number.
++ * MT_EXACT will cause the zap object to only support MT_EXACT lookups,
++ * otherwise any matchtype can be used for lookups.
++ *
++ * normflags specifies what normalization will be done.  values are:
++ * 0: no normalization (legacy on-disk format, supports MT_EXACT matching
++ *     only)
++ * U8_TEXTPREP_TOLOWER: case normalization will be performed.
++ *     MT_FIRST/MT_BEST matching will find entries that match without
++ *     regard to case (eg. looking for "foo" can find an entry "Foo").
++ * Eventually, other flags will permit unicode normalization as well.
++ */
++uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
++uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
++uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
++    dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
++
++/*
++ * Create a new zapobj with no attributes from the given (unallocated)
++ * object number.
++ */
++int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
++int zap_create_claim_norm(objset_t *ds, uint64_t obj,
++    int normflags, dmu_object_type_t ot,
++    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
++
++/*
++ * The zapobj passed in must be a valid ZAP object for all of the
++ * following routines.
++ */
++
++/*
++ * Destroy this zapobj and all its attributes.
++ *
++ * Frees the object number using dmu_object_free.
++ */
++int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
++
++/*
++ * Manipulate attributes.
++ *
++ * 'integer_size' is in bytes, and must be 1, 2, 4, or 8.
++ */
++
++/*
++ * Retrieve the contents of the attribute with the given name.
++ *
++ * If the requested attribute does not exist, the call will fail and
++ * return ENOENT.
++ *
++ * If 'integer_size' is smaller than the attribute's integer size, the
++ * call will fail and return EINVAL.
++ *
++ * If 'integer_size' is equal to or larger than the attribute's integer
++ * size, the call will succeed and return 0.  * When converting to a
++ * larger integer size, the integers will be treated as unsigned (ie. no
++ * sign-extension will be performed).
++ *
++ * 'num_integers' is the length (in integers) of 'buf'.
++ *
++ * If the attribute is longer than the buffer, as many integers as will
++ * fit will be transferred to 'buf'.  If the entire attribute was not
++ * transferred, the call will return EOVERFLOW.
++ *
++ * If rn_len is nonzero, realname will be set to the name of the found
++ * entry (which may be different from the requested name if matchtype is
++ * not MT_EXACT).
++ *
++ * If normalization_conflictp is not NULL, it will be set if there is
++ * another name with the same case/unicode normalized form.
++ */
++int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name,
++    uint64_t integer_size, uint64_t num_integers, void *buf);
++int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
++    uint64_t integer_size, uint64_t num_integers, void *buf,
++    matchtype_t mt, char *realname, int rn_len,
++    boolean_t *normalization_conflictp);
++int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
++    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
++int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);
++int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
++    int key_numints);
++
++int zap_count_write(objset_t *os, uint64_t zapobj, const char *name,
++    int add, uint64_t *towrite, uint64_t *tooverwrite);
++
++/*
++ * Create an attribute with the given name and value.
++ *
++ * If an attribute with the given name already exists, the call will
++ * fail and return EEXIST.
++ */
++int zap_add(objset_t *ds, uint64_t zapobj, const char *key,
++    int integer_size, uint64_t num_integers,
++    const void *val, dmu_tx_t *tx);
++int zap_add_uint64(objset_t *ds, uint64_t zapobj, const uint64_t *key,
++    int key_numints, int integer_size, uint64_t num_integers,
++    const void *val, dmu_tx_t *tx);
++
++/*
++ * Set the attribute with the given name to the given value.  If an
++ * attribute with the given name does not exist, it will be created.  If
++ * an attribute with the given name already exists, the previous value
++ * will be overwritten.  The integer_size may be different from the
++ * existing attribute's integer size, in which case the attribute's
++ * integer size will be updated to the new value.
++ */
++int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
++    int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
++int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
++    int key_numints,
++    int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
++
++/*
++ * Get the length (in integers) and the integer size of the specified
++ * attribute.
++ *
++ * If the requested attribute does not exist, the call will fail and
++ * return ENOENT.
++ */
++int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
++    uint64_t *integer_size, uint64_t *num_integers);
++int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
++    int key_numints, uint64_t *integer_size, uint64_t *num_integers);
++
++/*
++ * Remove the specified attribute.
++ *
++ * If the specified attribute does not exist, the call will fail and
++ * return ENOENT.
++ */
++int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx);
++int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
++    matchtype_t mt, dmu_tx_t *tx);
++int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
++    int key_numints, dmu_tx_t *tx);
++
++/*
++ * Returns (in *count) the number of attributes in the specified zap
++ * object.
++ */
++int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
++
++/*
++ * Returns (in name) the name of the entry whose (value & mask)
++ * (za_first_integer) is value, or ENOENT if not found.  The string
++ * pointed to by name must be at least 256 bytes long.  If mask==0, the
++ * match must be exact (ie, same as mask=-1ULL).
++ */
++int zap_value_search(objset_t *os, uint64_t zapobj,
++    uint64_t value, uint64_t mask, char *name);
++
++/*
++ * Transfer all the entries from fromobj into intoobj.  Only works on
++ * int_size=8 num_integers=1 values.  Fails if there are any duplicated
++ * entries.
++ */
++int zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx);
++
++/* Same as zap_join, but set the values to 'value'. */
++int zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj,
++    uint64_t value, dmu_tx_t *tx);
++
++/* Same as zap_join, but add together any duplicated entries. */
++int zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,
++    dmu_tx_t *tx);
++
++/*
++ * Manipulate entries where the name + value are the "same" (the name is
++ * a stringified version of the value).
++ */
++int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
++int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
++int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value);
++int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
++    dmu_tx_t *tx);
++
++/* Here the key is an int and the value is a different int. */
++int zap_add_int_key(objset_t *os, uint64_t obj,
++    uint64_t key, uint64_t value, dmu_tx_t *tx);
++int zap_lookup_int_key(objset_t *os, uint64_t obj,
++    uint64_t key, uint64_t *valuep);
++
++/*
++ * They name is a stringified version of key; increment its value by
++ * delta.  Zero values will be zap_remove()-ed.
++ */
++int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
++    dmu_tx_t *tx);
++
++struct zap;
++struct zap_leaf;
++typedef struct zap_cursor {
++	/* This structure is opaque! */
++	objset_t *zc_objset;
++	struct zap *zc_zap;
++	struct zap_leaf *zc_leaf;
++	uint64_t zc_zapobj;
++	uint64_t zc_serialized;
++	uint64_t zc_hash;
++	uint32_t zc_cd;
++} zap_cursor_t;
++
++typedef struct {
++	int za_integer_length;
++	/*
++	 * za_normalization_conflict will be set if there are additional
++	 * entries with this normalized form (eg, "foo" and "Foo").
++	 */
++	boolean_t za_normalization_conflict;
++	uint64_t za_num_integers;
++	uint64_t za_first_integer;	/* no sign extension for <8byte ints */
++	char za_name[MAXNAMELEN];
++} zap_attribute_t;
++
++/*
++ * The interface for listing all the attributes of a zapobj can be
++ * thought of as cursor moving down a list of the attributes one by
++ * one.  The cookie returned by the zap_cursor_serialize routine is
++ * persistent across system calls (and across reboot, even).
++ */
++
++/*
++ * Initialize a zap cursor, pointing to the "first" attribute of the
++ * zapobj.  You must _fini the cursor when you are done with it.
++ */
++void zap_cursor_init(zap_cursor_t *zc, objset_t *ds, uint64_t zapobj);
++void zap_cursor_fini(zap_cursor_t *zc);
++
++/*
++ * Get the attribute currently pointed to by the cursor.  Returns
++ * ENOENT if at the end of the attributes.
++ */
++int zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za);
++
++/*
++ * Advance the cursor to the next attribute.
++ */
++void zap_cursor_advance(zap_cursor_t *zc);
++
++/*
++ * Get a persistent cookie pointing to the current position of the zap
++ * cursor.  The low 4 bits in the cookie are always zero, and thus can
++ * be used as to differentiate a serialized cookie from a different type
++ * of value.  The cookie will be less than 2^32 as long as there are
++ * fewer than 2^22 (4.2 million) entries in the zap object.
++ */
++uint64_t zap_cursor_serialize(zap_cursor_t *zc);
++
++/*
++ * Advance the cursor to the attribute having the given key.
++ */
++int zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt);
++
++/*
++ * Initialize a zap cursor pointing to the position recorded by
++ * zap_cursor_serialize (in the "serialized" argument).  You can also
++ * use a "serialized" argument of 0 to start at the beginning of the
++ * zapobj (ie.  zap_cursor_init_serialized(..., 0) is equivalent to
++ * zap_cursor_init(...).)
++ */
++void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *ds,
++    uint64_t zapobj, uint64_t serialized);
++
++
++#define	ZAP_HISTOGRAM_SIZE 10
++
++typedef struct zap_stats {
++	/*
++	 * Size of the pointer table (in number of entries).
++	 * This is always a power of 2, or zero if it's a microzap.
++	 * In general, it should be considerably greater than zs_num_leafs.
++	 */
++	uint64_t zs_ptrtbl_len;
++
++	uint64_t zs_blocksize;		/* size of zap blocks */
++
++	/*
++	 * The number of blocks used.  Note that some blocks may be
++	 * wasted because old ptrtbl's and large name/value blocks are
++	 * not reused.  (Although their space is reclaimed, we don't
++	 * reuse those offsets in the object.)
++	 */
++	uint64_t zs_num_blocks;
++
++	/*
++	 * Pointer table values from zap_ptrtbl in the zap_phys_t
++	 */
++	uint64_t zs_ptrtbl_nextblk;	  /* next (larger) copy start block */
++	uint64_t zs_ptrtbl_blks_copied;   /* number source blocks copied */
++	uint64_t zs_ptrtbl_zt_blk;	  /* starting block number */
++	uint64_t zs_ptrtbl_zt_numblks;    /* number of blocks */
++	uint64_t zs_ptrtbl_zt_shift;	  /* bits to index it */
++
++	/*
++	 * Values of the other members of the zap_phys_t
++	 */
++	uint64_t zs_block_type;		/* ZBT_HEADER */
++	uint64_t zs_magic;		/* ZAP_MAGIC */
++	uint64_t zs_num_leafs;		/* The number of leaf blocks */
++	uint64_t zs_num_entries;	/* The number of zap entries */
++	uint64_t zs_salt;		/* salt to stir into hash function */
++
++	/*
++	 * Histograms.  For all histograms, the last index
++	 * (ZAP_HISTOGRAM_SIZE-1) includes any values which are greater
++	 * than what can be represented.  For example
++	 * zs_leafs_with_n5_entries[ZAP_HISTOGRAM_SIZE-1] is the number
++	 * of leafs with more than 45 entries.
++	 */
++
++	/*
++	 * zs_leafs_with_n_pointers[n] is the number of leafs with
++	 * 2^n pointers to it.
++	 */
++	uint64_t zs_leafs_with_2n_pointers[ZAP_HISTOGRAM_SIZE];
++
++	/*
++	 * zs_leafs_with_n_entries[n] is the number of leafs with
++	 * [n*5, (n+1)*5) entries.  In the current implementation, there
++	 * can be at most 55 entries in any block, but there may be
++	 * fewer if the name or value is large, or the block is not
++	 * completely full.
++	 */
++	uint64_t zs_blocks_with_n5_entries[ZAP_HISTOGRAM_SIZE];
++
++	/*
++	 * zs_leafs_n_tenths_full[n] is the number of leafs whose
++	 * fullness is in the range [n/10, (n+1)/10).
++	 */
++	uint64_t zs_blocks_n_tenths_full[ZAP_HISTOGRAM_SIZE];
++
++	/*
++	 * zs_entries_using_n_chunks[n] is the number of entries which
++	 * consume n 24-byte chunks.  (Note, large names/values only use
++	 * one chunk, but contribute to zs_num_blocks_large.)
++	 */
++	uint64_t zs_entries_using_n_chunks[ZAP_HISTOGRAM_SIZE];
++
++	/*
++	 * zs_buckets_with_n_entries[n] is the number of buckets (each
++	 * leaf has 64 buckets) with n entries.
++	 * zs_buckets_with_n_entries[1] should be very close to
++	 * zs_num_entries.
++	 */
++	uint64_t zs_buckets_with_n_entries[ZAP_HISTOGRAM_SIZE];
++} zap_stats_t;
++
++/*
++ * Get statistics about a ZAP object.  Note: you need to be aware of the
++ * internal implementation of the ZAP to correctly interpret some of the
++ * statistics.  This interface shouldn't be relied on unless you really
++ * know what you're doing.
++ */
++int zap_get_stats(objset_t *ds, uint64_t zapobj, zap_stats_t *zs);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_ZAP_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zap_impl.h linux-3.2.33-go/include/zfs/sys/zap_impl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zap_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zap_impl.h	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,228 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_ZAP_IMPL_H
++#define	_SYS_ZAP_IMPL_H
++
++#include <sys/zap.h>
++#include <sys/zfs_context.h>
++#include <sys/avl.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++extern int fzap_default_block_shift;
++
++#define	ZAP_MAGIC 0x2F52AB2ABULL
++
++#define	FZAP_BLOCK_SHIFT(zap)	((zap)->zap_f.zap_block_shift)
++
++#define	MZAP_ENT_LEN		64
++#define	MZAP_NAME_LEN		(MZAP_ENT_LEN - 8 - 4 - 2)
++#define	MZAP_MAX_BLKSHIFT	SPA_MAXBLOCKSHIFT
++#define	MZAP_MAX_BLKSZ		(1 << MZAP_MAX_BLKSHIFT)
++
++#define	ZAP_NEED_CD		(-1U)
++
++typedef struct mzap_ent_phys {
++	uint64_t mze_value;
++	uint32_t mze_cd;
++	uint16_t mze_pad;	/* in case we want to chain them someday */
++	char mze_name[MZAP_NAME_LEN];
++} mzap_ent_phys_t;
++
++typedef struct mzap_phys {
++	uint64_t mz_block_type;	/* ZBT_MICRO */
++	uint64_t mz_salt;
++	uint64_t mz_normflags;
++	uint64_t mz_pad[5];
++	mzap_ent_phys_t mz_chunk[1];
++	/* actually variable size depending on block size */
++} mzap_phys_t;
++
++typedef struct mzap_ent {
++	avl_node_t mze_node;
++	int mze_chunkid;
++	uint64_t mze_hash;
++	uint32_t mze_cd; /* copy from mze_phys->mze_cd */
++} mzap_ent_t;
++
++#define	MZE_PHYS(zap, mze) \
++	(&(zap)->zap_m.zap_phys->mz_chunk[(mze)->mze_chunkid])
++
++/*
++ * The (fat) zap is stored in one object. It is an array of
++ * 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:
++ *
++ * ptrtbl fits in first block:
++ * 	[zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ...
++ *
++ * ptrtbl too big for first block:
++ * 	[zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ...
++ *
++ */
++
++struct dmu_buf;
++struct zap_leaf;
++
++#define	ZBT_LEAF		((1ULL << 63) + 0)
++#define	ZBT_HEADER		((1ULL << 63) + 1)
++#define	ZBT_MICRO		((1ULL << 63) + 3)
++/* any other values are ptrtbl blocks */
++
++/*
++ * the embedded pointer table takes up half a block:
++ * block size / entry size (2^3) / 2
++ */
++#define	ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1)
++
++/*
++ * The embedded pointer table starts half-way through the block.  Since
++ * the pointer table itself is half the block, it starts at (64-bit)
++ * word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)).
++ */
++#define	ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) \
++	((uint64_t *)(zap)->zap_f.zap_phys) \
++	[(idx) + (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap))]
++
++/*
++ * TAKE NOTE:
++ * If zap_phys_t is modified, zap_byteswap() must be modified.
++ */
++typedef struct zap_phys {
++	uint64_t zap_block_type;	/* ZBT_HEADER */
++	uint64_t zap_magic;		/* ZAP_MAGIC */
++
++	struct zap_table_phys {
++		uint64_t zt_blk;	/* starting block number */
++		uint64_t zt_numblks;	/* number of blocks */
++		uint64_t zt_shift;	/* bits to index it */
++		uint64_t zt_nextblk;	/* next (larger) copy start block */
++		uint64_t zt_blks_copied; /* number source blocks copied */
++	} zap_ptrtbl;
++
++	uint64_t zap_freeblk;		/* the next free block */
++	uint64_t zap_num_leafs;		/* number of leafs */
++	uint64_t zap_num_entries;	/* number of entries */
++	uint64_t zap_salt;		/* salt to stir into hash function */
++	uint64_t zap_normflags;		/* flags for u8_textprep_str() */
++	uint64_t zap_flags;		/* zap_flags_t */
++	/*
++	 * This structure is followed by padding, and then the embedded
++	 * pointer table.  The embedded pointer table takes up second
++	 * half of the block.  It is accessed using the
++	 * ZAP_EMBEDDED_PTRTBL_ENT() macro.
++	 */
++} zap_phys_t;
++
++typedef struct zap_table_phys zap_table_phys_t;
++
++typedef struct zap {
++	objset_t *zap_objset;
++	uint64_t zap_object;
++	struct dmu_buf *zap_dbuf;
++	krwlock_t zap_rwlock;
++	boolean_t zap_ismicro;
++	int zap_normflags;
++	uint64_t zap_salt;
++	union {
++		struct {
++			zap_phys_t *zap_phys;
++
++			/*
++			 * zap_num_entries_mtx protects
++			 * zap_num_entries
++			 */
++			kmutex_t zap_num_entries_mtx;
++			int zap_block_shift;
++		} zap_fat;
++		struct {
++			mzap_phys_t *zap_phys;
++			int16_t zap_num_entries;
++			int16_t zap_num_chunks;
++			int16_t zap_alloc_next;
++			avl_tree_t zap_avl;
++		} zap_micro;
++	} zap_u;
++} zap_t;
++
++typedef struct zap_name {
++	zap_t *zn_zap;
++	int zn_key_intlen;
++	const void *zn_key_orig;
++	int zn_key_orig_numints;
++	const void *zn_key_norm;
++	int zn_key_norm_numints;
++	uint64_t zn_hash;
++	matchtype_t zn_matchtype;
++	char zn_normbuf[ZAP_MAXNAMELEN];
++} zap_name_t;
++
++#define	zap_f	zap_u.zap_fat
++#define	zap_m	zap_u.zap_micro
++
++boolean_t zap_match(zap_name_t *zn, const char *matchname);
++int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
++    krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
++void zap_unlockdir(zap_t *zap);
++void zap_evict(dmu_buf_t *db, void *vmzap);
++zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
++void zap_name_free(zap_name_t *zn);
++int zap_hashbits(zap_t *zap);
++uint32_t zap_maxcd(zap_t *zap);
++uint64_t zap_getflags(zap_t *zap);
++
++#define	ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
++
++void fzap_byteswap(void *buf, size_t size);
++int fzap_count(zap_t *zap, uint64_t *count);
++int fzap_lookup(zap_name_t *zn,
++    uint64_t integer_size, uint64_t num_integers, void *buf,
++    char *realname, int rn_len, boolean_t *normalization_conflictp);
++void fzap_prefetch(zap_name_t *zn);
++int fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
++    uint64_t *tooverwrite);
++int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
++    const void *val, dmu_tx_t *tx);
++int fzap_update(zap_name_t *zn,
++    int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
++int fzap_length(zap_name_t *zn,
++    uint64_t *integer_size, uint64_t *num_integers);
++int fzap_remove(zap_name_t *zn, dmu_tx_t *tx);
++int fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za);
++void fzap_get_stats(zap_t *zap, zap_stats_t *zs);
++void zap_put_leaf(struct zap_leaf *l);
++
++int fzap_add_cd(zap_name_t *zn,
++    uint64_t integer_size, uint64_t num_integers,
++    const void *val, uint32_t cd, dmu_tx_t *tx);
++void fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags);
++int fzap_cursor_move_to_key(zap_cursor_t *zc, zap_name_t *zn);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_ZAP_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zap_leaf.h linux-3.2.33-go/include/zfs/sys/zap_leaf.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zap_leaf.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zap_leaf.h	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,245 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_ZAP_LEAF_H
++#define	_SYS_ZAP_LEAF_H
++
++#include <sys/zap.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct zap;
++struct zap_name;
++struct zap_stats;
++
++#define	ZAP_LEAF_MAGIC 0x2AB1EAF
++
++/* chunk size = 24 bytes */
++#define	ZAP_LEAF_CHUNKSIZE 24
++
++/*
++ * The amount of space available for chunks is:
++ * block size (1<<l->l_bs) - hash entry size (2) * number of hash
++ * entries - header space (2*chunksize)
++ */
++#define	ZAP_LEAF_NUMCHUNKS(l) \
++	(((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \
++	ZAP_LEAF_CHUNKSIZE - 2)
++
++/*
++ * The amount of space within the chunk available for the array is:
++ * chunk size - space for type (1) - space for next pointer (2)
++ */
++#define	ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3)
++
++#define	ZAP_LEAF_ARRAY_NCHUNKS(bytes) \
++	(((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES)
++
++/*
++ * Low water mark:  when there are only this many chunks free, start
++ * growing the ptrtbl.  Ideally, this should be larger than a
++ * "reasonably-sized" entry.  20 chunks is more than enough for the
++ * largest directory entry (MAXNAMELEN (256) byte name, 8-byte value),
++ * while still being only around 3% for 16k blocks.
++ */
++#define	ZAP_LEAF_LOW_WATER (20)
++
++/*
++ * The leaf hash table has block size / 2^5 (32) number of entries,
++ * which should be more than enough for the maximum number of entries,
++ * which is less than block size / CHUNKSIZE (24) / minimum number of
++ * chunks per entry (3).
++ */
++#define	ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5)
++#define	ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l))
++
++/*
++ * The chunks start immediately after the hash table.  The end of the
++ * hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
++ * chunk_t.
++ */
++#define	ZAP_LEAF_CHUNK(l, idx) \
++	((zap_leaf_chunk_t *) \
++	((l)->l_phys->l_hash + ZAP_LEAF_HASH_NUMENTRIES(l)))[idx]
++#define	ZAP_LEAF_ENTRY(l, idx) (&ZAP_LEAF_CHUNK(l, idx).l_entry)
++
++typedef enum zap_chunk_type {
++	ZAP_CHUNK_FREE = 253,
++	ZAP_CHUNK_ENTRY = 252,
++	ZAP_CHUNK_ARRAY = 251,
++	ZAP_CHUNK_TYPE_MAX = 250
++} zap_chunk_type_t;
++
++#define	ZLF_ENTRIES_CDSORTED (1<<0)
++
++/*
++ * TAKE NOTE:
++ * If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified.
++ */
++typedef struct zap_leaf_phys {
++	struct zap_leaf_header {
++		uint64_t lh_block_type;		/* ZBT_LEAF */
++		uint64_t lh_pad1;
++		uint64_t lh_prefix;		/* hash prefix of this leaf */
++		uint32_t lh_magic;		/* ZAP_LEAF_MAGIC */
++		uint16_t lh_nfree;		/* number free chunks */
++		uint16_t lh_nentries;		/* number of entries */
++		uint16_t lh_prefix_len;		/* num bits used to id this */
++
++/* above is accessable to zap, below is zap_leaf private */
++
++		uint16_t lh_freelist;		/* chunk head of free list */
++		uint8_t lh_flags;		/* ZLF_* flags */
++		uint8_t lh_pad2[11];
++	} l_hdr; /* 2 24-byte chunks */
++
++	/*
++	 * The header is followed by a hash table with
++	 * ZAP_LEAF_HASH_NUMENTRIES(zap) entries.  The hash table is
++	 * followed by an array of ZAP_LEAF_NUMCHUNKS(zap)
++	 * zap_leaf_chunk structures.  These structures are accessed
++	 * with the ZAP_LEAF_CHUNK() macro.
++	 */
++
++	uint16_t l_hash[1];
++} zap_leaf_phys_t;
++
++typedef union zap_leaf_chunk {
++	struct zap_leaf_entry {
++		uint8_t le_type; 		/* always ZAP_CHUNK_ENTRY */
++		uint8_t le_value_intlen;	/* size of value's ints */
++		uint16_t le_next;		/* next entry in hash chain */
++		uint16_t le_name_chunk;		/* first chunk of the name */
++		uint16_t le_name_numints;	/* ints in name (incl null) */
++		uint16_t le_value_chunk;	/* first chunk of the value */
++		uint16_t le_value_numints;	/* value length in ints */
++		uint32_t le_cd;			/* collision differentiator */
++		uint64_t le_hash;		/* hash value of the name */
++	} l_entry;
++	struct zap_leaf_array {
++		uint8_t la_type;		/* always ZAP_CHUNK_ARRAY */
++		uint8_t la_array[ZAP_LEAF_ARRAY_BYTES];
++		uint16_t la_next;		/* next blk or CHAIN_END */
++	} l_array;
++	struct zap_leaf_free {
++		uint8_t lf_type;		/* always ZAP_CHUNK_FREE */
++		uint8_t lf_pad[ZAP_LEAF_ARRAY_BYTES];
++		uint16_t lf_next;	/* next in free list, or CHAIN_END */
++	} l_free;
++} zap_leaf_chunk_t;
++
++typedef struct zap_leaf {
++	krwlock_t l_rwlock;
++	uint64_t l_blkid;		/* 1<<ZAP_BLOCK_SHIFT byte block off */
++	int l_bs;			/* block size shift */
++	dmu_buf_t *l_dbuf;
++	zap_leaf_phys_t *l_phys;
++} zap_leaf_t;
++
++
++typedef struct zap_entry_handle {
++	/* below is set by zap_leaf.c and is public to zap.c */
++	uint64_t zeh_num_integers;
++	uint64_t zeh_hash;
++	uint32_t zeh_cd;
++	uint8_t zeh_integer_size;
++
++	/* below is private to zap_leaf.c */
++	uint16_t zeh_fakechunk;
++	uint16_t *zeh_chunkp;
++	zap_leaf_t *zeh_leaf;
++} zap_entry_handle_t;
++
++/*
++ * Return a handle to the named entry, or ENOENT if not found.  The hash
++ * value must equal zap_hash(name).
++ */
++extern int zap_leaf_lookup(zap_leaf_t *l,
++    struct zap_name *zn, zap_entry_handle_t *zeh);
++
++/*
++ * Return a handle to the entry with this hash+cd, or the entry with the
++ * next closest hash+cd.
++ */
++extern int zap_leaf_lookup_closest(zap_leaf_t *l,
++    uint64_t hash, uint32_t cd, zap_entry_handle_t *zeh);
++
++/*
++ * Read the first num_integers in the attribute.  Integer size
++ * conversion will be done without sign extension.  Return EINVAL if
++ * integer_size is too small.  Return EOVERFLOW if there are more than
++ * num_integers in the attribute.
++ */
++extern int zap_entry_read(const zap_entry_handle_t *zeh,
++    uint8_t integer_size, uint64_t num_integers, void *buf);
++
++extern int zap_entry_read_name(struct zap *zap, const zap_entry_handle_t *zeh,
++    uint16_t buflen, char *buf);
++
++/*
++ * Replace the value of an existing entry.
++ *
++ * zap_entry_update may fail if it runs out of space (ENOSPC).
++ */
++extern int zap_entry_update(zap_entry_handle_t *zeh,
++    uint8_t integer_size, uint64_t num_integers, const void *buf);
++
++/*
++ * Remove an entry.
++ */
++extern void zap_entry_remove(zap_entry_handle_t *zeh);
++
++/*
++ * Create an entry. An equal entry must not exist, and this entry must
++ * belong in this leaf (according to its hash value).  Fills in the
++ * entry handle on success.  Returns 0 on success or ENOSPC on failure.
++ */
++extern int zap_entry_create(zap_leaf_t *l, struct zap_name *zn, uint32_t cd,
++    uint8_t integer_size, uint64_t num_integers, const void *buf,
++    zap_entry_handle_t *zeh);
++
++/*
++ * Return true if there are additional entries with the same normalized
++ * form.
++ */
++extern boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh,
++    struct zap_name *zn, const char *name, struct zap *zap);
++
++/*
++ * Other stuff.
++ */
++
++extern void zap_leaf_init(zap_leaf_t *l, boolean_t sort);
++extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, int len);
++extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort);
++extern void zap_leaf_stats(struct zap *zap, zap_leaf_t *l,
++    struct zap_stats *zs);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_ZAP_LEAF_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zfs_acl.h linux-3.2.33-go/include/zfs/sys/zfs_acl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zfs_acl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zfs_acl.h	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,246 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_FS_ZFS_ACL_H
++#define	_SYS_FS_ZFS_ACL_H
++
++#ifdef _KERNEL
++#include <sys/isa_defs.h>
++#include <sys/types32.h>
++#include <sys/xvattr.h>
++#endif
++#include <sys/acl.h>
++#include <sys/dmu.h>
++#include <sys/zfs_fuid.h>
++#include <sys/sa.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct znode_phys;
++
++#define	ACE_SLOT_CNT	6
++#define	ZFS_ACL_VERSION_INITIAL 0ULL
++#define	ZFS_ACL_VERSION_FUID	1ULL
++#define	ZFS_ACL_VERSION		ZFS_ACL_VERSION_FUID
++
++/*
++ * ZFS ACLs are store in various forms.
++ * Files created with ACL version ZFS_ACL_VERSION_INITIAL
++ * will all be created with fixed length ACEs of type
++ * zfs_oldace_t.
++ *
++ * Files with ACL version ZFS_ACL_VERSION_FUID will be created
++ * with various sized ACEs.  The abstraction entries will utilize
++ * zfs_ace_hdr_t, normal user/group entries will use zfs_ace_t
++ * and some specialized CIFS ACEs will use zfs_object_ace_t.
++ */
++
++/*
++ * All ACEs have a common hdr.  For
++ * owner@, group@, and everyone@ this is all
++ * thats needed.
++ */
++typedef struct zfs_ace_hdr {
++	uint16_t z_type;
++	uint16_t z_flags;
++	uint32_t z_access_mask;
++} zfs_ace_hdr_t;
++
++typedef zfs_ace_hdr_t zfs_ace_abstract_t;
++
++/*
++ * Standard ACE
++ */
++typedef struct zfs_ace {
++	zfs_ace_hdr_t	z_hdr;
++	uint64_t	z_fuid;
++} zfs_ace_t;
++
++/*
++ * The following type only applies to ACE_ACCESS_ALLOWED|DENIED_OBJECT_ACE_TYPE
++ * and will only be set/retrieved in a CIFS context.
++ */
++
++typedef struct zfs_object_ace {
++	zfs_ace_t	z_ace;
++	uint8_t		z_object_type[16]; /* object type */
++	uint8_t		z_inherit_type[16]; /* inherited object type */
++} zfs_object_ace_t;
++
++typedef struct zfs_oldace {
++	uint32_t	z_fuid;		/* "who" */
++	uint32_t	z_access_mask;  /* access mask */
++	uint16_t	z_flags;	/* flags, i.e inheritance */
++	uint16_t	z_type;		/* type of entry allow/deny */
++} zfs_oldace_t;
++
++typedef struct zfs_acl_phys_v0 {
++	uint64_t	z_acl_extern_obj;	/* ext acl pieces */
++	uint32_t	z_acl_count;		/* Number of ACEs */
++	uint16_t	z_acl_version;		/* acl version */
++	uint16_t	z_acl_pad;		/* pad */
++	zfs_oldace_t	z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */
++} zfs_acl_phys_v0_t;
++
++#define	ZFS_ACE_SPACE	(sizeof (zfs_oldace_t) * ACE_SLOT_CNT)
++
++/*
++ * Size of ACL count is always 2 bytes.
++ * Necessary to for dealing with both V0 ACL and V1 ACL layout
++ */
++#define	ZFS_ACL_COUNT_SIZE	(sizeof (uint16_t))
++
++typedef struct zfs_acl_phys {
++	uint64_t	z_acl_extern_obj;	  /* ext acl pieces */
++	uint32_t	z_acl_size;		  /* Number of bytes in ACL */
++	uint16_t	z_acl_version;		  /* acl version */
++	uint16_t	z_acl_count;		  /* ace count */
++	uint8_t	z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */
++} zfs_acl_phys_t;
++
++typedef struct acl_ops {
++	uint32_t	(*ace_mask_get) (void *acep); /* get  access mask */
++	void 		(*ace_mask_set) (void *acep,
++			    uint32_t mask); /* set access mask */
++	uint16_t	(*ace_flags_get) (void *acep);	/* get flags */
++	void		(*ace_flags_set) (void *acep,
++			    uint16_t flags); /* set flags */
++	uint16_t	(*ace_type_get)(void *acep); /* get type */
++	void		(*ace_type_set)(void *acep,
++			    uint16_t type); /* set type */
++	uint64_t	(*ace_who_get)(void *acep); /* get who/fuid */
++	void		(*ace_who_set)(void *acep,
++			    uint64_t who); /* set who/fuid */
++	size_t		(*ace_size)(void *acep); /* how big is this ace */
++	size_t		(*ace_abstract_size)(void); /* sizeof abstract entry */
++	int		(*ace_mask_off)(void); /* off of access mask in ace */
++	int		(*ace_data)(void *acep, void **datap);
++			    /* ptr to data if any */
++} acl_ops_t;
++
++/*
++ * A zfs_acl_t structure is composed of a list of zfs_acl_node_t's.
++ * Each node will have one or more ACEs associated with it.  You will
++ * only have multiple nodes during a chmod operation.   Normally only
++ * one node is required.
++ */
++typedef struct zfs_acl_node {
++	list_node_t	z_next;		/* Next chunk of ACEs */
++	void		*z_acldata;	/* pointer into actual ACE(s) */
++	void		*z_allocdata;	/* pointer to kmem allocated memory */
++	size_t		z_allocsize;	/* Size of blob in bytes */
++	size_t		z_size;		/* length of ACL data */
++	uint64_t	z_ace_count;	/* number of ACEs in this acl node */
++	int		z_ace_idx;	/* ace iterator positioned on */
++} zfs_acl_node_t;
++
++typedef struct zfs_acl {
++	uint64_t	z_acl_count;	/* Number of ACEs */
++	size_t		z_acl_bytes;	/* Number of bytes in ACL */
++	uint_t		z_version;	/* version of ACL */
++	void		*z_next_ace;	/* pointer to next ACE */
++	uint64_t	z_hints;	/* ACL hints (ZFS_INHERIT_ACE ...) */
++	zfs_acl_node_t	*z_curr_node;	/* current node iterator is handling */
++	list_t		z_acl;		/* chunks of ACE data */
++	acl_ops_t	*z_ops;		/* ACL operations */
++} zfs_acl_t;
++
++typedef struct acl_locator_cb {
++	zfs_acl_t *cb_aclp;
++	zfs_acl_node_t *cb_acl_node;
++} zfs_acl_locator_cb_t;
++
++#define	ACL_DATA_ALLOCED	0x1
++#define	ZFS_ACL_SIZE(aclcnt)	(sizeof (ace_t) * (aclcnt))
++
++struct zfs_fuid_info;
++
++typedef struct zfs_acl_ids {
++	uint64_t		z_fuid;		/* file owner fuid */
++	uint64_t		z_fgid;		/* file group owner fuid */
++	uint64_t		z_mode;		/* mode to set on create */
++	zfs_acl_t		*z_aclp;	/* ACL to create with file */
++	struct zfs_fuid_info 	*z_fuidp;	/* for tracking fuids for log */
++} zfs_acl_ids_t;
++
++/*
++ * Property values for acl_mode and acl_inherit.
++ *
++ * acl_mode can take discard, noallow, groupmask and passthrough.
++ * whereas acl_inherit has secure instead of groupmask.
++ */
++
++#define	ZFS_ACL_DISCARD		0
++#define	ZFS_ACL_NOALLOW		1
++#define	ZFS_ACL_GROUPMASK	2
++#define	ZFS_ACL_PASSTHROUGH	3
++#define	ZFS_ACL_RESTRICTED	4
++#define	ZFS_ACL_PASSTHROUGH_X	5
++
++struct znode;
++struct zfs_sb;
++
++#ifdef _KERNEL
++int zfs_acl_ids_create(struct znode *, int, vattr_t *,
++    cred_t *, vsecattr_t *, zfs_acl_ids_t *);
++void zfs_acl_ids_free(zfs_acl_ids_t *);
++boolean_t zfs_acl_ids_overquota(struct zfs_sb *, zfs_acl_ids_t *);
++int zfs_getacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
++int zfs_setacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
++void zfs_acl_rele(void *);
++void zfs_oldace_byteswap(ace_t *, int);
++void zfs_ace_byteswap(void *, size_t, boolean_t);
++extern boolean_t zfs_has_access(struct znode *zp, cred_t *cr);
++extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *);
++int zfs_fastaccesschk_execute(struct znode *, cred_t *);
++extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
++extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
++extern int zfs_acl_access(struct znode *, int, cred_t *);
++void zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t);
++int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
++int zfs_zaccess_rename(struct znode *, struct znode *,
++    struct znode *, struct znode *, cred_t *cr);
++void zfs_acl_free(zfs_acl_t *);
++int zfs_vsec_2_aclp(struct zfs_sb *, umode_t, vsecattr_t *, cred_t *,
++    struct zfs_fuid_info **, zfs_acl_t **);
++int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *, dmu_tx_t *);
++uint64_t zfs_external_acl(struct znode *);
++int zfs_znode_acl_version(struct znode *);
++int zfs_acl_size(struct znode *, int *);
++zfs_acl_t *zfs_acl_alloc(int);
++zfs_acl_node_t *zfs_acl_node_alloc(size_t);
++void zfs_acl_xform(struct znode *, zfs_acl_t *, cred_t *);
++void zfs_acl_data_locator(void **, uint32_t *, uint32_t, boolean_t, void *);
++uint64_t zfs_mode_compute(uint64_t, zfs_acl_t *,
++    uint64_t *, uint64_t, uint64_t);
++int zfs_acl_chown_setattr(struct znode *);
++
++#endif
++
++#ifdef	__cplusplus
++}
++#endif
++#endif	/* _SYS_FS_ZFS_ACL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zfs_context.h linux-3.2.33-go/include/zfs/sys/zfs_context.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zfs_context.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zfs_context.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,653 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++/*
++ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
++ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
++ */
++
++#ifndef _SYS_ZFS_CONTEXT_H
++#define	_SYS_ZFS_CONTEXT_H
++
++#ifdef __KERNEL__
++
++#include <sys/note.h>
++#include <sys/types.h>
++#include <sys/t_lock.h>
++#include <sys/atomic.h>
++#include <sys/sysmacros.h>
++#include <sys/bitmap.h>
++#include <sys/cmn_err.h>
++#include <sys/kmem.h>
++#include <sys/taskq.h>
++#include <sys/buf.h>
++#include <sys/param.h>
++#include <sys/systm.h>
++#include <sys/cpuvar.h>
++#include <sys/kobj.h>
++#include <sys/conf.h>
++#include <sys/disp.h>
++#include <sys/debug.h>
++#include <sys/random.h>
++#include <sys/byteorder.h>
++#include <sys/systm.h>
++#include <sys/list.h>
++#include <sys/uio_impl.h>
++#include <sys/dirent.h>
++#include <sys/time.h>
++#include <vm/seg_kmem.h>
++#include <sys/zone.h>
++#include <sys/zfs_debug.h>
++#include <sys/fm/fs/zfs.h>
++#include <sys/sunddi.h>
++#include <sys/ctype.h>
++#include <sys/disp.h>
++#include <linux/dcache_compat.h>
++
++#else /* _KERNEL */
++
++#define	_SYS_MUTEX_H
++#define	_SYS_RWLOCK_H
++#define	_SYS_CONDVAR_H
++#define	_SYS_SYSTM_H
++#define	_SYS_DEBUG_H
++#define	_SYS_T_LOCK_H
++#define	_SYS_VNODE_H
++#define	_SYS_VFS_H
++#define	_SYS_SUNDDI_H
++#define	_SYS_CALLB_H
++
++#include <stdio.h>
++#include <stdlib.h>
++#include <stddef.h>
++#include <stdarg.h>
++#include <fcntl.h>
++#include <unistd.h>
++#include <errno.h>
++#include <string.h>
++#include <strings.h>
++#include <pthread.h>
++#include <assert.h>
++#include <alloca.h>
++#include <umem.h>
++#include <limits.h>
++#include <atomic.h>
++#include <dirent.h>
++#include <time.h>
++#include <ctype.h>
++#include <sys/note.h>
++#include <sys/types.h>
++#include <sys/cred.h>
++#include <sys/sysmacros.h>
++#include <sys/bitmap.h>
++#include <sys/resource.h>
++#include <sys/byteorder.h>
++#include <sys/list.h>
++#include <sys/uio.h>
++#include <sys/zfs_debug.h>
++#include <sys/sdt.h>
++#include <sys/kstat.h>
++#include <sys/u8_textprep.h>
++#include <sys/fm/fs/zfs.h>
++#include <sys/sunddi.h>
++
++/*
++ * Stack
++ */
++
++#define  noinline	__attribute__((noinline))
++
++/*
++ * Debugging
++ */
++
++/*
++ * Note that we are not using the debugging levels.
++ */
++
++#define	CE_CONT		0	/* continuation		*/
++#define	CE_NOTE		1	/* notice		*/
++#define	CE_WARN		2	/* warning		*/
++#define	CE_PANIC	3	/* panic		*/
++#define	CE_IGNORE	4	/* print nothing	*/
++
++extern int aok;
++
++/*
++ * ZFS debugging
++ */
++
++extern void dprintf_setup(int *argc, char **argv);
++extern void __dprintf(const char *file, const char *func,
++    int line, const char *fmt, ...);
++extern void cmn_err(int, const char *, ...);
++extern void vcmn_err(int, const char *, __va_list);
++extern void panic(const char *, ...);
++extern void vpanic(const char *, __va_list);
++
++#define	fm_panic	panic
++
++/*
++ * DTrace SDT probes have different signatures in userland than they do in
++ * kernel.  If they're being used in kernel code, re-define them out of
++ * existence for their counterparts in libzpool.
++ */
++
++#ifdef DTRACE_PROBE
++#undef	DTRACE_PROBE
++#define	DTRACE_PROBE(a)	((void)0)
++#endif	/* DTRACE_PROBE */
++
++#ifdef DTRACE_PROBE1
++#undef	DTRACE_PROBE1
++#define	DTRACE_PROBE1(a, b, c)	((void)0)
++#endif	/* DTRACE_PROBE1 */
++
++#ifdef DTRACE_PROBE2
++#undef	DTRACE_PROBE2
++#define	DTRACE_PROBE2(a, b, c, d, e)	((void)0)
++#endif	/* DTRACE_PROBE2 */
++
++#ifdef DTRACE_PROBE3
++#undef	DTRACE_PROBE3
++#define	DTRACE_PROBE3(a, b, c, d, e, f, g)	((void)0)
++#endif	/* DTRACE_PROBE3 */
++
++#ifdef DTRACE_PROBE4
++#undef	DTRACE_PROBE4
++#define	DTRACE_PROBE4(a, b, c, d, e, f, g, h, i)	((void)0)
++#endif	/* DTRACE_PROBE4 */
++
++/*
++ * Threads
++ */
++#define	TS_MAGIC		0x72f158ab4261e538ull
++#define	TS_RUN			0x00000002
++#ifdef __linux__
++#define	STACK_SIZE		8192	/* Linux x86 and amd64 */
++#else
++#define	STACK_SIZE		24576	/* Solaris */
++#endif
++
++#ifdef NPTL_GUARD_WITHIN_STACK
++#define	EXTRA_GUARD_BYTES	PAGESIZE
++#else
++#define	EXTRA_GUARD_BYTES	0
++#endif
++
++/* in libzpool, p0 exists only to have its address taken */
++typedef struct proc {
++	uintptr_t	this_is_never_used_dont_dereference_it;
++} proc_t;
++
++extern struct proc p0;
++#define	curproc		(&p0)
++
++typedef void (*thread_func_t)(void *);
++typedef void (*thread_func_arg_t)(void *);
++typedef pthread_t kt_did_t;
++
++typedef struct kthread {
++	kt_did_t	t_tid;
++	thread_func_t	t_func;
++	void *		t_arg;
++} kthread_t;
++
++#define	tsd_get(key)			pthread_getspecific(key)
++#define	tsd_set(key, val)		pthread_setspecific(key, val)
++#define	curthread			zk_thread_current()
++#define	thread_exit			zk_thread_exit
++#define	thread_create(stk, stksize, func, arg, len, pp, state, pri)	\
++	zk_thread_create(stk, stksize, (thread_func_t)func, arg,	\
++			 len, NULL, state, pri, PTHREAD_CREATE_DETACHED)
++#define	thread_join(t)			zk_thread_join(t)
++#define	newproc(f,a,cid,pri,ctp,pid)	(ENOSYS)
++
++extern kthread_t *zk_thread_current(void);
++extern void zk_thread_exit(void);
++extern kthread_t *zk_thread_create(caddr_t stk, size_t  stksize,
++	thread_func_t func, void *arg, size_t len,
++	proc_t *pp, int state, pri_t pri, int detachstate);
++extern void zk_thread_join(kt_did_t tid);
++
++#define	kpreempt_disable()	((void)0)
++#define	kpreempt_enable()	((void)0)
++
++#define	PS_NONE		-1
++
++#define	issig(why)	(FALSE)
++#define	ISSIG(thr, why)	(FALSE)
++
++/*
++ * Mutexes
++ */
++#define	MTX_MAGIC	0x9522f51362a6e326ull
++#define	MTX_INIT	((void *)NULL)
++#define	MTX_DEST	((void *)-1UL)
++
++typedef struct kmutex {
++	void		*m_owner;
++	uint64_t	m_magic;
++	pthread_mutex_t	m_lock;
++} kmutex_t;
++
++#define	MUTEX_DEFAULT	0
++#define MUTEX_HELD(m)	((m)->m_owner == curthread)
++#define	MUTEX_NOT_HELD(m) (!MUTEX_HELD(m))
++
++extern void mutex_init(kmutex_t *mp, char *name, int type, void *cookie);
++extern void mutex_destroy(kmutex_t *mp);
++extern void mutex_enter(kmutex_t *mp);
++extern void mutex_exit(kmutex_t *mp);
++extern int mutex_tryenter(kmutex_t *mp);
++extern void *mutex_owner(kmutex_t *mp);
++extern int mutex_held(kmutex_t *mp);
++
++/*
++ * RW locks
++ */
++#define	RW_MAGIC	0x4d31fb123648e78aull
++#define	RW_INIT		((void *)NULL)
++#define	RW_DEST		((void *)-1UL)
++
++typedef struct krwlock {
++	void			*rw_owner;
++	void			*rw_wr_owner;
++	uint64_t		rw_magic;
++	pthread_rwlock_t	rw_lock;
++	uint_t			rw_readers;
++} krwlock_t;
++
++typedef int krw_t;
++
++#define	RW_READER	0
++#define	RW_WRITER	1
++#define RW_DEFAULT	RW_READER
++
++#define	RW_READ_HELD(x)		((x)->rw_readers > 0)
++#define	RW_WRITE_HELD(x)	((x)->rw_wr_owner == curthread)
++#define	RW_LOCK_HELD(x)		(RW_READ_HELD(x) || RW_WRITE_HELD(x))
++
++extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg);
++extern void rw_destroy(krwlock_t *rwlp);
++extern void rw_enter(krwlock_t *rwlp, krw_t rw);
++extern int rw_tryenter(krwlock_t *rwlp, krw_t rw);
++extern int rw_tryupgrade(krwlock_t *rwlp);
++extern void rw_exit(krwlock_t *rwlp);
++#define	rw_downgrade(rwlp) do { } while (0)
++
++extern uid_t crgetuid(cred_t *cr);
++extern gid_t crgetgid(cred_t *cr);
++extern int crgetngroups(cred_t *cr);
++extern gid_t *crgetgroups(cred_t *cr);
++
++/*
++ * Condition variables
++ */
++#define CV_MAGIC	0xd31ea9a83b1b30c4ull
++
++typedef struct kcondvar {
++	uint64_t		cv_magic;
++	pthread_cond_t		cv;
++} kcondvar_t;
++
++#define	CV_DEFAULT	0
++
++extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg);
++extern void cv_destroy(kcondvar_t *cv);
++extern void cv_wait(kcondvar_t *cv, kmutex_t *mp);
++extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
++extern void cv_signal(kcondvar_t *cv);
++extern void cv_broadcast(kcondvar_t *cv);
++#define cv_timedwait_interruptible(cv, mp, at)	cv_timedwait(cv, mp, at)
++#define cv_wait_interruptible(cv, mp)		cv_wait(cv, mp)
++
++/*
++ * kstat creation, installation and deletion
++ */
++extern kstat_t *kstat_create(char *, int,
++    char *, char *, uchar_t, ulong_t, uchar_t);
++extern void kstat_install(kstat_t *);
++extern void kstat_delete(kstat_t *);
++
++/*
++ * Kernel memory
++ */
++#define	KM_SLEEP		UMEM_NOFAIL
++#define	KM_PUSHPAGE		KM_SLEEP
++#define	KM_NOSLEEP		UMEM_DEFAULT
++#define	KM_NODEBUG		0x0
++#define	KMC_NODEBUG		UMC_NODEBUG
++#define	KMC_KMEM		0x0
++#define	KMC_VMEM		0x0
++#define	kmem_alloc(_s, _f)	umem_alloc(_s, _f)
++#define	kmem_zalloc(_s, _f)	umem_zalloc(_s, _f)
++#define	kmem_free(_b, _s)	umem_free(_b, _s)
++#define	vmem_alloc(_s, _f)	kmem_alloc(_s, _f)
++#define	vmem_zalloc(_s, _f)	kmem_zalloc(_s, _f)
++#define	vmem_free(_b, _s)	kmem_free(_b, _s)
++#define	kmem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) \
++	umem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i)
++#define	kmem_cache_destroy(_c)	umem_cache_destroy(_c)
++#define	kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f)
++#define	kmem_cache_free(_c, _b)	umem_cache_free(_c, _b)
++#define	kmem_debugging()	0
++#define	kmem_cache_reap_now(_c)		/* nothing */
++#define	kmem_cache_set_move(_c, _cb)	/* nothing */
++#define	POINTER_INVALIDATE(_pp)		/* nothing */
++#define	POINTER_IS_VALID(_p)	0
++
++typedef umem_cache_t kmem_cache_t;
++
++typedef enum kmem_cbrc {
++	KMEM_CBRC_YES,
++	KMEM_CBRC_NO,
++	KMEM_CBRC_LATER,
++	KMEM_CBRC_DONT_NEED,
++	KMEM_CBRC_DONT_KNOW
++} kmem_cbrc_t;
++
++/*
++ * Task queues
++ */
++typedef struct taskq taskq_t;
++typedef uintptr_t taskqid_t;
++typedef void (task_func_t)(void *);
++
++typedef struct taskq_ent {
++	struct taskq_ent	*tqent_next;
++	struct taskq_ent	*tqent_prev;
++	task_func_t		*tqent_func;
++	void			*tqent_arg;
++	uintptr_t		tqent_flags;
++} taskq_ent_t;
++
++#define	TQENT_FLAG_PREALLOC	0x1	/* taskq_dispatch_ent used */
++
++#define	TASKQ_PREPOPULATE	0x0001
++#define	TASKQ_CPR_SAFE		0x0002	/* Use CPR safe protocol */
++#define	TASKQ_DYNAMIC		0x0004	/* Use dynamic thread scheduling */
++#define	TASKQ_THREADS_CPU_PCT	0x0008	/* Scale # threads by # cpus */
++#define	TASKQ_DC_BATCH		0x0010	/* Mark threads as batch */
++
++#define	TQ_SLEEP	KM_SLEEP	/* Can block for memory */
++#define	TQ_NOSLEEP	KM_NOSLEEP	/* cannot block for memory; may fail */
++#define	TQ_PUSHPAGE	KM_PUSHPAGE	/* Cannot perform I/O */
++#define	TQ_NOQUEUE	0x02		/* Do not enqueue if can't dispatch */
++#define	TQ_FRONT	0x08		/* Queue in front */
++
++extern taskq_t *system_taskq;
++
++extern taskq_t	*taskq_create(const char *, int, pri_t, int, int, uint_t);
++#define	taskq_create_proc(a, b, c, d, e, p, f) \
++	    (taskq_create(a, b, c, d, e, f))
++#define	taskq_create_sysdc(a, b, d, e, p, dc, f) \
++	    (taskq_create(a, b, maxclsyspri, d, e, f))
++extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
++extern void	taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
++    taskq_ent_t *);
++extern int	taskq_empty_ent(taskq_ent_t *);
++extern void	taskq_init_ent(taskq_ent_t *);
++extern void	taskq_destroy(taskq_t *);
++extern void	taskq_wait(taskq_t *);
++extern int	taskq_member(taskq_t *, kthread_t *);
++extern void	system_taskq_init(void);
++extern void	system_taskq_fini(void);
++
++#define	XVA_MAPSIZE	3
++#define	XVA_MAGIC	0x78766174
++
++/*
++ * vnodes
++ */
++typedef struct vnode {
++	uint64_t	v_size;
++	int		v_fd;
++	char		*v_path;
++} vnode_t;
++
++#define	AV_SCANSTAMP_SZ	32		/* length of anti-virus scanstamp */
++
++typedef struct xoptattr {
++	timestruc_t	xoa_createtime;	/* Create time of file */
++	uint8_t		xoa_archive;
++	uint8_t		xoa_system;
++	uint8_t		xoa_readonly;
++	uint8_t		xoa_hidden;
++	uint8_t		xoa_nounlink;
++	uint8_t		xoa_immutable;
++	uint8_t		xoa_appendonly;
++	uint8_t		xoa_nodump;
++	uint8_t		xoa_settable;
++	uint8_t		xoa_opaque;
++	uint8_t		xoa_av_quarantined;
++	uint8_t		xoa_av_modified;
++	uint8_t		xoa_av_scanstamp[AV_SCANSTAMP_SZ];
++	uint8_t		xoa_reparse;
++	uint8_t		xoa_offline;
++	uint8_t		xoa_sparse;
++} xoptattr_t;
++
++typedef struct vattr {
++	uint_t		va_mask;	/* bit-mask of attributes */
++	u_offset_t	va_size;	/* file size in bytes */
++} vattr_t;
++
++
++typedef struct xvattr {
++	vattr_t		xva_vattr;	/* Embedded vattr structure */
++	uint32_t	xva_magic;	/* Magic Number */
++	uint32_t	xva_mapsize;	/* Size of attr bitmap (32-bit words) */
++	uint32_t	*xva_rtnattrmapp;	/* Ptr to xva_rtnattrmap[] */
++	uint32_t	xva_reqattrmap[XVA_MAPSIZE];	/* Requested attrs */
++	uint32_t	xva_rtnattrmap[XVA_MAPSIZE];	/* Returned attrs */
++	xoptattr_t	xva_xoptattrs;	/* Optional attributes */
++} xvattr_t;
++
++typedef struct vsecattr {
++	uint_t		vsa_mask;	/* See below */
++	int		vsa_aclcnt;	/* ACL entry count */
++	void		*vsa_aclentp;	/* pointer to ACL entries */
++	int		vsa_dfaclcnt;	/* default ACL entry count */
++	void		*vsa_dfaclentp;	/* pointer to default ACL entries */
++	size_t		vsa_aclentsz;	/* ACE size in bytes of vsa_aclentp */
++} vsecattr_t;
++
++#define	AT_TYPE		0x00001
++#define	AT_MODE		0x00002
++#define	AT_UID		0x00004
++#define	AT_GID		0x00008
++#define	AT_FSID		0x00010
++#define	AT_NODEID	0x00020
++#define	AT_NLINK	0x00040
++#define	AT_SIZE		0x00080
++#define	AT_ATIME	0x00100
++#define	AT_MTIME	0x00200
++#define	AT_CTIME	0x00400
++#define	AT_RDEV		0x00800
++#define	AT_BLKSIZE	0x01000
++#define	AT_NBLOCKS	0x02000
++#define	AT_SEQ		0x08000
++#define	AT_XVATTR	0x10000
++
++#define	CRCREAT		0
++
++extern int fop_getattr(vnode_t *vp, vattr_t *vap);
++
++#define	VOP_CLOSE(vp, f, c, o, cr, ct)	vn_close(vp)
++#define	VOP_PUTPAGE(vp, of, sz, fl, cr, ct)	0
++#define	VOP_GETATTR(vp, vap, fl, cr, ct)  fop_getattr((vp), (vap));
++
++#define	VOP_FSYNC(vp, f, cr, ct)	fsync((vp)->v_fd)
++
++#define	VN_RELE(vp)	vn_close(vp)
++
++extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp,
++    int x2, int x3);
++extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp,
++    int x2, int x3, vnode_t *vp, int fd);
++extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len,
++    offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp);
++extern void vn_close(vnode_t *vp);
++
++#define	vn_remove(path, x1, x2)		remove(path)
++#define	vn_rename(from, to, seg)	rename((from), (to))
++#define	vn_is_readonly(vp)		B_FALSE
++
++extern vnode_t *rootdir;
++
++#include <sys/file.h>		/* for FREAD, FWRITE, etc */
++
++/*
++ * Random stuff
++ */
++#define	ddi_get_lbolt()		(gethrtime() >> 23)
++#define	ddi_get_lbolt64()	(gethrtime() >> 23)
++#define	hz	119	/* frequency when using gethrtime() >> 23 for lbolt */
++
++extern void delay(clock_t ticks);
++
++#define	gethrestime_sec() time(NULL)
++#define	gethrestime(t) \
++	do {\
++		(t)->tv_sec = gethrestime_sec();\
++		(t)->tv_nsec = 0;\
++	} while (0);
++
++#define	max_ncpus	64
++
++#define	minclsyspri	60
++#define	maxclsyspri	99
++
++#define	CPU_SEQID	(pthread_self() & (max_ncpus - 1))
++
++#define	kcred		NULL
++#define	CRED()		NULL
++
++#define	ptob(x)		((x) * PAGESIZE)
++
++extern uint64_t physmem;
++
++extern int highbit(ulong_t i);
++extern int random_get_bytes(uint8_t *ptr, size_t len);
++extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
++
++extern void kernel_init(int);
++extern void kernel_fini(void);
++
++struct spa;
++extern void nicenum(uint64_t num, char *buf);
++extern void show_pool_stats(struct spa *);
++
++typedef struct callb_cpr {
++	kmutex_t	*cc_lockp;
++} callb_cpr_t;
++
++#define	CALLB_CPR_INIT(cp, lockp, func, name)	{		\
++	(cp)->cc_lockp = lockp;					\
++}
++
++#define	CALLB_CPR_SAFE_BEGIN(cp) {				\
++	ASSERT(MUTEX_HELD((cp)->cc_lockp));			\
++}
++
++#define	CALLB_CPR_SAFE_END(cp, lockp) {				\
++	ASSERT(MUTEX_HELD((cp)->cc_lockp));			\
++}
++
++#define	CALLB_CPR_EXIT(cp) {					\
++	ASSERT(MUTEX_HELD((cp)->cc_lockp));			\
++	mutex_exit((cp)->cc_lockp);				\
++}
++
++#define	zone_dataset_visible(x, y)	(1)
++#define	INGLOBALZONE(z)			(1)
++
++extern char *kmem_vasprintf(const char *fmt, va_list adx);
++extern char *kmem_asprintf(const char *fmt, ...);
++#define	strfree(str) kmem_free((str), strlen(str)+1)
++
++/*
++ * Hostname information
++ */
++extern char hw_serial[];	/* for userland-emulated hostid access */
++extern int ddi_strtoul(const char *str, char **nptr, int base,
++    unsigned long *result);
++
++extern int ddi_strtoull(const char *str, char **nptr, int base,
++    u_longlong_t *result);
++
++/* ZFS Boot Related stuff. */
++
++struct _buf {
++	intptr_t	_fd;
++};
++
++struct bootstat {
++	uint64_t st_size;
++};
++
++typedef struct ace_object {
++	uid_t		a_who;
++	uint32_t	a_access_mask;
++	uint16_t	a_flags;
++	uint16_t	a_type;
++	uint8_t		a_obj_type[16];
++	uint8_t		a_inherit_obj_type[16];
++} ace_object_t;
++
++
++#define	ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE	0x05
++#define	ACE_ACCESS_DENIED_OBJECT_ACE_TYPE	0x06
++#define	ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE	0x07
++#define	ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE	0x08
++
++extern struct _buf *kobj_open_file(char *name);
++extern int kobj_read_file(struct _buf *file, char *buf, unsigned size,
++    unsigned off);
++extern void kobj_close_file(struct _buf *file);
++extern int kobj_get_filesize(struct _buf *file, uint64_t *size);
++extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
++extern int zfs_secpolicy_rename_perms(const char *from, const char *to,
++    cred_t *cr);
++extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
++extern zoneid_t getzoneid(void);
++
++/* SID stuff */
++typedef struct ksiddomain {
++	uint_t	kd_ref;
++	uint_t	kd_len;
++	char	*kd_name;
++} ksiddomain_t;
++
++ksiddomain_t *ksid_lookupdomain(const char *);
++void ksiddomain_rele(ksiddomain_t *);
++
++#define	DDI_SLEEP	KM_SLEEP
++#define	ddi_log_sysevent(_a, _b, _c, _d, _e, _f, _g) \
++	sysevent_post_event(_c, _d, _b, "libzpool", _e, _f)
++
++#endif /* _KERNEL */
++
++#endif	/* _SYS_ZFS_CONTEXT_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zfs_ctldir.h linux-3.2.33-go/include/zfs/sys/zfs_ctldir.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zfs_ctldir.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zfs_ctldir.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,113 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
++ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ * LLNL-CODE-403049.
++ * Rewritten for Linux by:
++ *   Rohan Puri <rohan.puri15@gmail.com>
++ *   Brian Behlendorf <behlendorf1@llnl.gov>
++ */
++
++#ifndef	_ZFS_CTLDIR_H
++#define	_ZFS_CTLDIR_H
++
++#include <sys/vnode.h>
++#include <sys/zfs_vfsops.h>
++#include <sys/zfs_znode.h>
++
++#define	ZFS_CTLDIR_NAME		".zfs"
++#define	ZFS_SNAPDIR_NAME	"snapshot"
++#define	ZFS_SHAREDIR_NAME	"shares"
++
++#define	zfs_has_ctldir(zdp)	\
++	((zdp)->z_id == ZTOZSB(zdp)->z_root && \
++	(ZTOZSB(zdp)->z_ctldir != NULL))
++#define	zfs_show_ctldir(zdp)	\
++	(zfs_has_ctldir(zdp) && \
++	(ZTOZSB(zdp)->z_show_ctldir))
++
++typedef struct {
++	char			*se_name;
++	char			*se_path;
++	struct inode		*se_inode;
++	struct delayed_work	se_work;
++	avl_node_t		se_node;
++} zfs_snapentry_t;
++
++/* zfsctl generic functions */
++extern int snapentry_compare(const void *a, const void *b);
++extern boolean_t zfsctl_is_node(struct inode *ip);
++extern boolean_t zfsctl_is_snapdir(struct inode *ip);
++extern void zfsctl_inode_inactive(struct inode *ip);
++extern void zfsctl_inode_destroy(struct inode *ip);
++extern int zfsctl_create(zfs_sb_t *zsb);
++extern void zfsctl_destroy(zfs_sb_t *zsb);
++extern struct inode *zfsctl_root(znode_t *zp);
++extern int zfsctl_fid(struct inode *ip, fid_t *fidp);
++
++/* zfsctl '.zfs' functions */
++extern int zfsctl_root_lookup(struct inode *dip, char *name,
++    struct inode **ipp, int flags, cred_t *cr, int *direntflags,
++    pathname_t *realpnp);
++
++/* zfsctl '.zfs/snapshot' functions */
++extern int zfsctl_snapdir_lookup(struct inode *dip, char *name,
++    struct inode **ipp, int flags, cred_t *cr, int *direntflags,
++    pathname_t *realpnp);
++extern int zfsctl_snapdir_rename(struct inode *sdip, char *sname,
++    struct inode *tdip, char *tname, cred_t *cr, int flags);
++extern int zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr,
++    int flags);
++extern int zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
++    struct inode **ipp, cred_t *cr, int flags);
++extern void zfsctl_snapdir_inactive(struct inode *ip);
++extern int zfsctl_unmount_snapshot(zfs_sb_t *zsb, char *name, int flags);
++extern int zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count);
++extern int zfsctl_mount_snapshot(struct path *path, int flags);
++extern int zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid,
++    zfs_sb_t **zsb);
++
++/* zfsctl '.zfs/shares' functions */
++extern int zfsctl_shares_lookup(struct inode *dip, char *name,
++    struct inode **ipp, int flags, cred_t *cr, int *direntflags,
++    pathname_t *realpnp);
++
++/* zfsctl_init/fini functions */
++extern void zfsctl_init(void);
++extern void zfsctl_fini(void);
++
++/*
++ * These inodes numbers are reserved for the .zfs control directory.
++ * It is important that they be no larger that 48-bits because only
++ * 6 bytes are reserved in the NFS file handle for the object number.
++ * However, they should be as large as possible to avoid conflicts
++ * with the objects which are assigned monotonically by the dmu.
++ */
++#define	ZFSCTL_INO_ROOT		0x0000FFFFFFFFFFFFULL
++#define	ZFSCTL_INO_SHARES	0x0000FFFFFFFFFFFEULL
++#define	ZFSCTL_INO_SNAPDIR	0x0000FFFFFFFFFFFDULL
++#define	ZFSCTL_INO_SNAPDIRS	0x0000FFFFFFFFFFFCULL
++
++#define	ZFSCTL_EXPIRE_SNAPSHOT	300
++
++#endif	/* _ZFS_CTLDIR_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zfs_debug.h linux-3.2.33-go/include/zfs/sys/zfs_debug.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zfs_debug.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zfs_debug.h	2012-11-16 23:25:34.343039404 +0100
+@@ -0,0 +1,81 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef _SYS_ZFS_DEBUG_H
++#define	_SYS_ZFS_DEBUG_H
++
++#ifndef TRUE
++#define	TRUE 1
++#endif
++
++#ifndef FALSE
++#define	FALSE 0
++#endif
++
++/*
++ * ZFS debugging - Always enabled for user space builds.
++ */
++#if !defined(ZFS_DEBUG) && !defined(_KERNEL)
++#define	ZFS_DEBUG
++#endif
++
++extern int zfs_flags;
++extern int zfs_recover;
++
++#define	ZFS_DEBUG_DPRINTF	0x0001
++#define	ZFS_DEBUG_DBUF_VERIFY	0x0002
++#define	ZFS_DEBUG_DNODE_VERIFY	0x0004
++#define	ZFS_DEBUG_SNAPNAMES	0x0008
++#define	ZFS_DEBUG_MODIFY	0x0010
++
++/*
++ * Always log zfs debug messages to the spl debug subsystem as SS_USER1.
++ * When the SPL is configured with debugging enabled these messages will
++ * appear in the internal spl debug log, otherwise they are a no-op.
++ */
++#if defined(_KERNEL)
++
++#include <spl-debug.h>
++#define	dprintf(...)                                                   \
++	if (zfs_flags & ZFS_DEBUG_DPRINTF)                             \
++		__SDEBUG(NULL, SS_USER1, SD_DPRINTF, __VA_ARGS__)
++
++/*
++ * When zfs is running is user space the debugging is always enabled.
++ * The messages will be printed using the __dprintf() function and
++ * filtered based on the zfs_flags variable.
++ */
++#else
++#define dprintf(...)                                                   \
++	if (zfs_flags & ZFS_DEBUG_DPRINTF)                             \
++		__dprintf(__FILE__, __func__, __LINE__, __VA_ARGS__)
++
++#endif /* _KERNEL */
++
++void zfs_panic_recover(const char *fmt, ...);
++#define	zfs_dbgmsg(...)	dprintf(__VA_ARGS__)
++void zfs_dbgmsg_init(void);
++void zfs_dbgmsg_fini(void);
++
++#endif	/* _SYS_ZFS_DEBUG_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zfs_dir.h linux-3.2.33-go/include/zfs/sys/zfs_dir.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zfs_dir.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zfs_dir.h	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,74 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_SYS_FS_ZFS_DIR_H
++#define	_SYS_FS_ZFS_DIR_H
++
++#include <sys/pathname.h>
++#include <sys/dmu.h>
++#include <sys/zfs_znode.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/* zfs_dirent_lock() flags */
++#define	ZNEW		0x0001		/* entry should not exist */
++#define	ZEXISTS		0x0002		/* entry should exist */
++#define	ZSHARED		0x0004		/* shared access (zfs_dirlook()) */
++#define	ZXATTR		0x0008		/* we want the xattr dir */
++#define	ZRENAMING	0x0010		/* znode is being renamed */
++#define	ZCILOOK		0x0020		/* case-insensitive lookup requested */
++#define	ZCIEXACT	0x0040		/* c-i requires c-s match (rename) */
++#define	ZHAVELOCK	0x0080		/* z_name_lock is already held */
++
++/* mknode flags */
++#define	IS_ROOT_NODE	0x01		/* create a root node */
++#define	IS_XATTR	0x02		/* create an extended attribute node */
++
++extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
++    int, int *, pathname_t *);
++extern void zfs_dirent_unlock(zfs_dirlock_t *);
++extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int);
++extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
++    boolean_t *);
++extern int zfs_dirlook(znode_t *, char *, struct inode **, int, int *,
++    pathname_t *);
++extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
++    uint_t, znode_t **, zfs_acl_ids_t *);
++extern void zfs_rmnode(znode_t *);
++extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
++extern boolean_t zfs_dirempty(znode_t *);
++extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
++extern void zfs_unlinked_drain(zfs_sb_t *);
++extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
++extern int zfs_get_xattrdir(znode_t *, struct inode **, cred_t *, int);
++extern int zfs_make_xattrdir(znode_t *, vattr_t *, struct inode **, cred_t *);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_FS_ZFS_DIR_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zfs_fuid.h linux-3.2.33-go/include/zfs/sys/zfs_fuid.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zfs_fuid.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zfs_fuid.h	2012-11-16 23:25:34.343039404 +0100
+@@ -0,0 +1,132 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_SYS_FS_ZFS_FUID_H
++#define	_SYS_FS_ZFS_FUID_H
++
++#ifdef _KERNEL
++#include <sys/kidmap.h>
++#include <sys/sid.h>
++#include <sys/dmu.h>
++#include <sys/zfs_vfsops.h>
++#endif
++#include <sys/avl.h>
++#include <sys/list.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++typedef enum {
++	ZFS_OWNER,
++	ZFS_GROUP,
++	ZFS_ACE_USER,
++	ZFS_ACE_GROUP
++} zfs_fuid_type_t;
++
++/*
++ * Estimate space needed for one more fuid table entry.
++ * for now assume its current size + 1K
++ */
++#define	FUID_SIZE_ESTIMATE(z) ((z)->z_fuid_size + (SPA_MINBLOCKSIZE << 1))
++
++#define	FUID_INDEX(x)	((x) >> 32)
++#define	FUID_RID(x)	((x) & 0xffffffff)
++#define	FUID_ENCODE(idx, rid) (((uint64_t)(idx) << 32) | (rid))
++/*
++ * FUIDs cause problems for the intent log
++ * we need to replay the creation of the FUID,
++ * but we can't count on the idmapper to be around
++ * and during replay the FUID index may be different than
++ * before.  Also, if an ACL has 100 ACEs and 12 different
++ * domains we don't want to log 100 domain strings, but rather
++ * just the unique 12.
++ */
++
++/*
++ * The FUIDs in the log will index into
++ * domain string table and the bottom half will be the rid.
++ * Used for mapping ephemeral uid/gid during ACL setting to FUIDs
++ */
++typedef struct zfs_fuid {
++	list_node_t 	z_next;
++	uint64_t 	z_id;		/* uid/gid being converted to fuid */
++	uint64_t	z_domidx;	/* index in AVL domain table */
++	uint64_t	z_logfuid;	/* index for domain in log */
++} zfs_fuid_t;
++
++/* list of unique domains */
++typedef struct zfs_fuid_domain {
++	list_node_t	z_next;
++	uint64_t	z_domidx;	/* AVL tree idx */
++	const char	*z_domain;	/* domain string */
++} zfs_fuid_domain_t;
++
++/*
++ * FUID information necessary for logging create, setattr, and setacl.
++ */
++typedef struct zfs_fuid_info {
++	list_t	z_fuids;
++	list_t	z_domains;
++	uint64_t z_fuid_owner;
++	uint64_t z_fuid_group;
++	char **z_domain_table;  /* Used during replay */
++	uint32_t z_fuid_cnt;	/* How many fuids in z_fuids */
++	uint32_t z_domain_cnt;	/* How many domains */
++	size_t	z_domain_str_sz; /* len of domain strings z_domain list */
++} zfs_fuid_info_t;
++
++#ifdef _KERNEL
++struct znode;
++extern uid_t zfs_fuid_map_id(zfs_sb_t *, uint64_t, cred_t *, zfs_fuid_type_t);
++extern void zfs_fuid_node_add(zfs_fuid_info_t **, const char *, uint32_t,
++    uint64_t, uint64_t, zfs_fuid_type_t);
++extern void zfs_fuid_destroy(zfs_sb_t *);
++extern uint64_t zfs_fuid_create_cred(zfs_sb_t *, zfs_fuid_type_t,
++    cred_t *, zfs_fuid_info_t **);
++extern uint64_t zfs_fuid_create(zfs_sb_t *, uint64_t, cred_t *, zfs_fuid_type_t,
++    zfs_fuid_info_t **);
++extern void zfs_fuid_map_ids(struct znode *zp, cred_t *cr,
++    uid_t *uid, uid_t *gid);
++extern zfs_fuid_info_t *zfs_fuid_info_alloc(void);
++extern void zfs_fuid_info_free(zfs_fuid_info_t *);
++extern boolean_t zfs_groupmember(zfs_sb_t *, uint64_t, cred_t *);
++void zfs_fuid_sync(zfs_sb_t *, dmu_tx_t *);
++extern int zfs_fuid_find_by_domain(zfs_sb_t *, const char *domain,
++    char **retdomain, boolean_t addok);
++extern const char *zfs_fuid_find_by_idx(zfs_sb_t *zsb, uint32_t idx);
++extern void zfs_fuid_txhold(zfs_sb_t *zsb, dmu_tx_t *tx);
++#endif
++
++char *zfs_fuid_idx_domain(avl_tree_t *, uint32_t);
++void zfs_fuid_avl_tree_create(avl_tree_t *, avl_tree_t *);
++uint64_t zfs_fuid_table_load(objset_t *, uint64_t, avl_tree_t *, avl_tree_t *);
++void zfs_fuid_table_destroy(avl_tree_t *, avl_tree_t *);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_FS_ZFS_FUID_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zfs_ioctl.h linux-3.2.33-go/include/zfs/sys/zfs_ioctl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zfs_ioctl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zfs_ioctl.h	2012-11-16 23:25:34.343039404 +0100
+@@ -0,0 +1,346 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_ZFS_IOCTL_H
++#define	_SYS_ZFS_IOCTL_H
++
++#include <sys/cred.h>
++#include <sys/dmu.h>
++#include <sys/zio.h>
++#include <sys/dsl_deleg.h>
++#include <sys/spa.h>
++#include <sys/zfs_stat.h>
++
++#ifdef _KERNEL
++#include <sys/nvpair.h>
++#endif	/* _KERNEL */
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * Property values for snapdir
++ */
++#define	ZFS_SNAPDIR_HIDDEN		0
++#define	ZFS_SNAPDIR_VISIBLE		1
++
++/*
++ * Field manipulation macros for the drr_versioninfo field of the
++ * send stream header.
++ */
++
++/*
++ * Header types for zfs send streams.
++ */
++typedef enum drr_headertype {
++	DMU_SUBSTREAM = 0x1,
++	DMU_COMPOUNDSTREAM = 0x2
++} drr_headertype_t;
++
++#define	DMU_GET_STREAM_HDRTYPE(vi)	BF64_GET((vi), 0, 2)
++#define	DMU_SET_STREAM_HDRTYPE(vi, x)	BF64_SET((vi), 0, 2, x)
++
++#define	DMU_GET_FEATUREFLAGS(vi)	BF64_GET((vi), 2, 30)
++#define	DMU_SET_FEATUREFLAGS(vi, x)	BF64_SET((vi), 2, 30, x)
++
++/*
++ * Feature flags for zfs send streams (flags in drr_versioninfo)
++ */
++
++#define	DMU_BACKUP_FEATURE_DEDUP	(0x1)
++#define	DMU_BACKUP_FEATURE_DEDUPPROPS	(0x2)
++#define	DMU_BACKUP_FEATURE_SA_SPILL	(0x4)
++
++/*
++ * Mask of all supported backup features
++ */
++#define	DMU_BACKUP_FEATURE_MASK	(DMU_BACKUP_FEATURE_DEDUP | \
++		DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL)
++
++/* Are all features in the given flag word currently supported? */
++#define	DMU_STREAM_SUPPORTED(x)	(!((x) & ~DMU_BACKUP_FEATURE_MASK))
++
++/*
++ * The drr_versioninfo field of the dmu_replay_record has the
++ * following layout:
++ *
++ *	64	56	48	40	32	24	16	8	0
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ *  	|		reserved	|        feature-flags	    |C|S|
++ *	+-------+-------+-------+-------+-------+-------+-------+-------+
++ *
++ * The low order two bits indicate the header type: SUBSTREAM (0x1)
++ * or COMPOUNDSTREAM (0x2).  Using two bits for this is historical:
++ * this field used to be a version number, where the two version types
++ * were 1 and 2.  Using two bits for this allows earlier versions of
++ * the code to be able to recognize send streams that don't use any
++ * of the features indicated by feature flags.
++ */
++
++#define	DMU_BACKUP_MAGIC 0x2F5bacbacULL
++
++#define	DRR_FLAG_CLONE		(1<<0)
++#define	DRR_FLAG_CI_DATA	(1<<1)
++
++/*
++ * flags in the drr_checksumflags field in the DRR_WRITE and
++ * DRR_WRITE_BYREF blocks
++ */
++#define	DRR_CHECKSUM_DEDUP	(1<<0)
++
++#define	DRR_IS_DEDUP_CAPABLE(flags)	((flags) & DRR_CHECKSUM_DEDUP)
++
++/*
++ * zfs ioctl command structure
++ */
++typedef struct dmu_replay_record {
++	enum {
++		DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
++		DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
++		DRR_SPILL, DRR_NUMTYPES
++	} drr_type;
++	uint32_t drr_payloadlen;
++	union {
++		struct drr_begin {
++			uint64_t drr_magic;
++			uint64_t drr_versioninfo; /* was drr_version */
++			uint64_t drr_creation_time;
++			dmu_objset_type_t drr_type;
++			uint32_t drr_flags;
++			uint64_t drr_toguid;
++			uint64_t drr_fromguid;
++			char drr_toname[MAXNAMELEN];
++		} drr_begin;
++		struct drr_end {
++			zio_cksum_t drr_checksum;
++			uint64_t drr_toguid;
++		} drr_end;
++		struct drr_object {
++			uint64_t drr_object;
++			dmu_object_type_t drr_type;
++			dmu_object_type_t drr_bonustype;
++			uint32_t drr_blksz;
++			uint32_t drr_bonuslen;
++			uint8_t drr_checksumtype;
++			uint8_t drr_compress;
++			uint8_t drr_pad[6];
++			uint64_t drr_toguid;
++			/* bonus content follows */
++		} drr_object;
++		struct drr_freeobjects {
++			uint64_t drr_firstobj;
++			uint64_t drr_numobjs;
++			uint64_t drr_toguid;
++		} drr_freeobjects;
++		struct drr_write {
++			uint64_t drr_object;
++			dmu_object_type_t drr_type;
++			uint32_t drr_pad;
++			uint64_t drr_offset;
++			uint64_t drr_length;
++			uint64_t drr_toguid;
++			uint8_t drr_checksumtype;
++			uint8_t drr_checksumflags;
++			uint8_t drr_pad2[6];
++			ddt_key_t drr_key; /* deduplication key */
++			/* content follows */
++		} drr_write;
++		struct drr_free {
++			uint64_t drr_object;
++			uint64_t drr_offset;
++			uint64_t drr_length;
++			uint64_t drr_toguid;
++		} drr_free;
++		struct drr_write_byref {
++			/* where to put the data */
++			uint64_t drr_object;
++			uint64_t drr_offset;
++			uint64_t drr_length;
++			uint64_t drr_toguid;
++			/* where to find the prior copy of the data */
++			uint64_t drr_refguid;
++			uint64_t drr_refobject;
++			uint64_t drr_refoffset;
++			/* properties of the data */
++			uint8_t drr_checksumtype;
++			uint8_t drr_checksumflags;
++			uint8_t drr_pad2[6];
++			ddt_key_t drr_key; /* deduplication key */
++		} drr_write_byref;
++		struct drr_spill {
++			uint64_t drr_object;
++			uint64_t drr_length;
++			uint64_t drr_toguid;
++			uint64_t drr_pad[4]; /* needed for crypto */
++			/* spill data follows */
++		} drr_spill;
++	} drr_u;
++} dmu_replay_record_t;
++
++/* diff record range types */
++typedef enum diff_type {
++	DDR_NONE = 0x1,
++	DDR_INUSE = 0x2,
++	DDR_FREE = 0x4
++} diff_type_t;
++
++/*
++ * The diff reports back ranges of free or in-use objects.
++ */
++typedef struct dmu_diff_record {
++	uint64_t ddr_type;
++	uint64_t ddr_first;
++	uint64_t ddr_last;
++} dmu_diff_record_t;
++
++typedef struct zinject_record {
++	uint64_t	zi_objset;
++	uint64_t	zi_object;
++	uint64_t	zi_start;
++	uint64_t	zi_end;
++	uint64_t	zi_guid;
++	uint32_t	zi_level;
++	uint32_t	zi_error;
++	uint64_t	zi_type;
++	uint32_t	zi_freq;
++	uint32_t	zi_failfast;
++	char		zi_func[MAXNAMELEN];
++	uint32_t	zi_iotype;
++	int32_t		zi_duration;
++	uint64_t	zi_timer;
++} zinject_record_t;
++
++#define	ZINJECT_NULL		0x1
++#define	ZINJECT_FLUSH_ARC	0x2
++#define	ZINJECT_UNLOAD_SPA	0x4
++
++#define	ZEVENT_NONBLOCK		0x1
++#define	ZEVENT_SIZE		1024
++
++typedef struct zfs_share {
++	uint64_t	z_exportdata;
++	uint64_t	z_sharedata;
++	uint64_t	z_sharetype;	/* 0 = share, 1 = unshare */
++	uint64_t	z_sharemax;  /* max length of share string */
++} zfs_share_t;
++
++/*
++ * ZFS file systems may behave the usual, POSIX-compliant way, where
++ * name lookups are case-sensitive.  They may also be set up so that
++ * all the name lookups are case-insensitive, or so that only some
++ * lookups, the ones that set an FIGNORECASE flag, are case-insensitive.
++ */
++typedef enum zfs_case {
++	ZFS_CASE_SENSITIVE,
++	ZFS_CASE_INSENSITIVE,
++	ZFS_CASE_MIXED
++} zfs_case_t;
++
++typedef struct zfs_cmd {
++	char		zc_name[MAXPATHLEN];
++	char		zc_value[MAXPATHLEN * 2];
++	char		zc_string[MAXNAMELEN];
++	char		zc_top_ds[MAXPATHLEN];
++	uint64_t	zc_guid;
++	uint64_t	zc_nvlist_conf;		/* really (char *) */
++	uint64_t	zc_nvlist_conf_size;
++	uint64_t	zc_nvlist_src;		/* really (char *) */
++	uint64_t	zc_nvlist_src_size;
++	uint64_t	zc_nvlist_dst;		/* really (char *) */
++	uint64_t	zc_nvlist_dst_size;
++	uint64_t	zc_cookie;
++	uint64_t	zc_objset_type;
++	uint64_t	zc_perm_action;
++	uint64_t 	zc_history;		/* really (char *) */
++	uint64_t 	zc_history_len;
++	uint64_t	zc_history_offset;
++	uint64_t	zc_obj;
++	uint64_t	zc_iflags;		/* internal to zfs(7fs) */
++	zfs_share_t	zc_share;
++	dmu_objset_stats_t zc_objset_stats;
++	struct drr_begin zc_begin_record;
++	zinject_record_t zc_inject_record;
++	boolean_t	zc_defer_destroy;
++	boolean_t	zc_temphold;
++	uint64_t	zc_action_handle;
++	int		zc_cleanup_fd;
++	uint8_t		zc_simple;
++	uint8_t		zc_pad[3];		/* alignment */
++	uint64_t	zc_sendobj;
++	uint64_t	zc_fromobj;
++	uint64_t	zc_createtxg;
++	zfs_stat_t	zc_stat;
++} zfs_cmd_t;
++
++typedef struct zfs_useracct {
++	char zu_domain[256];
++	uid_t zu_rid;
++	uint32_t zu_pad;
++	uint64_t zu_space;
++} zfs_useracct_t;
++
++#define	ZFSDEV_MAX_MINOR	(1 << 16)
++#define	ZFS_MIN_MINOR	(ZFSDEV_MAX_MINOR + 1)
++
++#define	ZPOOL_EXPORT_AFTER_SPLIT 0x1
++
++#ifdef _KERNEL
++
++typedef struct zfs_creat {
++	nvlist_t	*zct_zplprops;
++	nvlist_t	*zct_props;
++} zfs_creat_t;
++
++extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
++extern int zfs_secpolicy_rename_perms(const char *from,
++    const char *to, cred_t *cr);
++extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
++extern int zfs_unmount_snap(const char *, void *);
++
++enum zfsdev_state_type {
++	ZST_ONEXIT,
++	ZST_ZEVENT,
++	ZST_ALL,
++};
++
++typedef struct zfsdev_state {
++        list_node_t             zs_next;        /* next zfsdev_state_t link */
++	struct file		*zs_file;	/* associated file struct */
++	minor_t			zs_minor;	/* made up minor number */
++	void			*zs_onexit;	/* onexit data */
++	void			*zs_zevent;	/* zevent data */
++} zfsdev_state_t;
++
++extern void *zfsdev_get_state(minor_t minor, enum zfsdev_state_type which);
++extern minor_t zfsdev_getminor(struct file *filp);
++extern minor_t zfsdev_minor_alloc(void);
++
++#endif	/* _KERNEL */
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_ZFS_IOCTL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zfs_onexit.h linux-3.2.33-go/include/zfs/sys/zfs_onexit.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zfs_onexit.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zfs_onexit.h	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,66 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_ZFS_ONEXIT_H
++#define	_SYS_ZFS_ONEXIT_H
++
++#include <sys/zfs_context.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++#ifdef _KERNEL
++
++typedef struct zfs_onexit {
++	kmutex_t	zo_lock;
++	list_t		zo_actions;
++} zfs_onexit_t;
++
++typedef struct zfs_onexit_action_node {
++	list_node_t	za_link;
++	void		(*za_func)(void *);
++	void		*za_data;
++} zfs_onexit_action_node_t;
++
++extern void zfs_onexit_init(zfs_onexit_t **zo);
++extern void zfs_onexit_destroy(zfs_onexit_t *zo);
++
++#endif
++
++extern int zfs_onexit_fd_hold(int fd, minor_t *minorp);
++extern void zfs_onexit_fd_rele(int fd);
++extern int zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
++    uint64_t *action_handle);
++extern int zfs_onexit_del_cb(minor_t minor, uint64_t action_handle,
++    boolean_t fire);
++extern int zfs_onexit_cb_data(minor_t minor, uint64_t action_handle,
++    void **data);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_ZFS_ONEXIT_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zfs_rlock.h linux-3.2.33-go/include/zfs/sys/zfs_rlock.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zfs_rlock.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zfs_rlock.h	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,90 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_SYS_FS_ZFS_RLOCK_H
++#define	_SYS_FS_ZFS_RLOCK_H
++
++
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++#ifdef _KERNEL
++
++#include <sys/zfs_znode.h>
++
++typedef enum {
++	RL_READER,
++	RL_WRITER,
++	RL_APPEND
++} rl_type_t;
++
++typedef struct rl {
++	znode_t *r_zp;		/* znode this lock applies to */
++	avl_node_t r_node;	/* avl node link */
++	uint64_t r_off;		/* file range offset */
++	uint64_t r_len;		/* file range length */
++	uint_t r_cnt;		/* range reference count in tree */
++	rl_type_t r_type;	/* range type */
++	kcondvar_t r_wr_cv;	/* cv for waiting writers */
++	kcondvar_t r_rd_cv;	/* cv for waiting readers */
++	uint8_t r_proxy;	/* acting for original range */
++	uint8_t r_write_wanted;	/* writer wants to lock this range */
++	uint8_t r_read_wanted;	/* reader wants to lock this range */
++	list_node_t rl_node;	/* used for deferred release */
++} rl_t;
++
++/*
++ * Lock a range (offset, length) as either shared (READER)
++ * or exclusive (WRITER or APPEND). APPEND is a special type that
++ * is converted to WRITER that specified to lock from the start of the
++ * end of file.  zfs_range_lock() returns the range lock structure.
++ */
++rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type);
++
++/*
++ * Unlock range and destroy range lock structure.
++ */
++void zfs_range_unlock(rl_t *rl);
++
++/*
++ * Reduce range locked as RW_WRITER from whole file to specified range.
++ * Asserts the whole file was previously locked.
++ */
++void zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len);
++
++/*
++ * AVL comparison function used to compare range locks
++ */
++int zfs_range_compare(const void *arg1, const void *arg2);
++
++#endif /* _KERNEL */
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_FS_ZFS_RLOCK_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zfs_sa.h linux-3.2.33-go/include/zfs/sys/zfs_sa.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zfs_sa.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zfs_sa.h	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,150 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_SYS_ZFS_SA_H
++#define	_SYS_ZFS_SA_H
++
++#ifdef _KERNEL
++#include <sys/types32.h>
++#include <sys/list.h>
++#include <sys/dmu.h>
++#include <sys/zfs_acl.h>
++#include <sys/zfs_znode.h>
++#include <sys/sa.h>
++#include <sys/zil.h>
++
++
++#endif
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * This is the list of known attributes
++ * to the ZPL.  The values of the actual
++ * attributes are not defined by the order
++ * the enums.  It is controlled by the attribute
++ * registration mechanism.  Two different file system
++ * could have different numeric values for the same
++ * attributes.  this list is only used for dereferencing
++ * into the table that will hold the actual numeric value.
++ */
++typedef enum zpl_attr {
++	ZPL_ATIME,
++	ZPL_MTIME,
++	ZPL_CTIME,
++	ZPL_CRTIME,
++	ZPL_GEN,
++	ZPL_MODE,
++	ZPL_SIZE,
++	ZPL_PARENT,
++	ZPL_LINKS,
++	ZPL_XATTR,
++	ZPL_RDEV,
++	ZPL_FLAGS,
++	ZPL_UID,
++	ZPL_GID,
++	ZPL_PAD,
++	ZPL_ZNODE_ACL,
++	ZPL_DACL_COUNT,
++	ZPL_SYMLINK,
++	ZPL_SCANSTAMP,
++	ZPL_DACL_ACES,
++	ZPL_DXATTR,
++	ZPL_END
++} zpl_attr_t;
++
++#define	ZFS_OLD_ZNODE_PHYS_SIZE	0x108
++#define	ZFS_SA_BASE_ATTR_SIZE	(ZFS_OLD_ZNODE_PHYS_SIZE - \
++    sizeof (zfs_acl_phys_t))
++
++#define	SA_MODE_OFFSET		0
++#define	SA_SIZE_OFFSET		8
++#define	SA_GEN_OFFSET		16
++#define	SA_UID_OFFSET		24
++#define	SA_GID_OFFSET		32
++#define	SA_PARENT_OFFSET	40
++
++extern sa_attr_reg_t zfs_attr_table[ZPL_END + 1];
++extern sa_attr_reg_t zfs_legacy_attr_table[ZPL_END + 1];
++
++/*
++ * This is a deprecated data structure that only exists for
++ * dealing with file systems create prior to ZPL version 5.
++ */
++typedef struct znode_phys {
++	uint64_t zp_atime[2];		/*  0 - last file access time */
++	uint64_t zp_mtime[2];		/* 16 - last file modification time */
++	uint64_t zp_ctime[2];		/* 32 - last file change time */
++	uint64_t zp_crtime[2];		/* 48 - creation time */
++	uint64_t zp_gen;		/* 64 - generation (txg of creation) */
++	uint64_t zp_mode;		/* 72 - file mode bits */
++	uint64_t zp_size;		/* 80 - size of file */
++	uint64_t zp_parent;		/* 88 - directory parent (`..') */
++	uint64_t zp_links;		/* 96 - number of links to file */
++	uint64_t zp_xattr;		/* 104 - DMU object for xattrs */
++	uint64_t zp_rdev;		/* 112 - dev_t for VBLK & VCHR files */
++	uint64_t zp_flags;		/* 120 - persistent flags */
++	uint64_t zp_uid;		/* 128 - file owner */
++	uint64_t zp_gid;		/* 136 - owning group */
++	uint64_t zp_zap;		/* 144 - extra attributes */
++	uint64_t zp_pad[3];		/* 152 - future */
++	zfs_acl_phys_t zp_acl;		/* 176 - 263 ACL */
++	/*
++	 * Data may pad out any remaining bytes in the znode buffer, eg:
++	 *
++	 * |<---------------------- dnode_phys (512) ------------------------>|
++	 * |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
++	 *			|<---- znode (264) ---->|<---- data (56) ---->|
++	 *
++	 * At present, we use this space for the following:
++	 *  - symbolic links
++	 *  - 32-byte anti-virus scanstamp (regular files only)
++	 */
++} znode_phys_t;
++
++#ifdef _KERNEL
++
++#define DXATTR_MAX_ENTRY_SIZE   (32768)
++#define DXATTR_MAX_SA_SIZE      (SPA_MAXBLOCKSIZE >> 1)
++
++int zfs_sa_readlink(struct znode *, uio_t *);
++void zfs_sa_symlink(struct znode *, char *link, int len, dmu_tx_t *);
++void zfs_sa_get_scanstamp(struct znode *, xvattr_t *);
++void zfs_sa_set_scanstamp(struct znode *, xvattr_t *, dmu_tx_t *);
++int zfs_sa_get_xattr(struct znode *);
++int zfs_sa_set_xattr(struct znode *);
++void zfs_sa_upgrade(struct sa_handle  *, dmu_tx_t *);
++void zfs_sa_upgrade_txholds(dmu_tx_t *, struct znode *);
++void zfs_sa_init(void);
++void zfs_sa_fini(void);
++#endif
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_ZFS_SA_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zfs_stat.h linux-3.2.33-go/include/zfs/sys/zfs_stat.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zfs_stat.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zfs_stat.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,56 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_FS_ZFS_STAT_H
++#define	_SYS_FS_ZFS_STAT_H
++
++#ifdef _KERNEL
++#include <sys/isa_defs.h>
++#include <sys/types32.h>
++#include <sys/dmu.h>
++#endif
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * A limited number of zpl level stats are retrievable
++ * with an ioctl.  zfs diff is the current consumer.
++ */
++typedef struct zfs_stat {
++	uint64_t	zs_gen;
++	uint64_t	zs_mode;
++	uint64_t	zs_links;
++	uint64_t	zs_ctime[2];
++} zfs_stat_t;
++
++extern int zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
++    char *buf, int len);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_FS_ZFS_STAT_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zfs_vfsops.h linux-3.2.33-go/include/zfs/sys/zfs_vfsops.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zfs_vfsops.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zfs_vfsops.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,193 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_FS_ZFS_VFSOPS_H
++#define	_SYS_FS_ZFS_VFSOPS_H
++
++#include <sys/isa_defs.h>
++#include <sys/types32.h>
++#include <sys/list.h>
++#include <sys/vfs.h>
++#include <sys/zil.h>
++#include <sys/sa.h>
++#include <sys/rrwlock.h>
++#include <sys/zfs_ioctl.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++struct zfs_sb;
++struct znode;
++
++typedef struct zfs_sb {
++	struct super_block *z_sb;	/* generic super_block */
++	struct backing_dev_info z_bdi;	/* generic backing dev info */
++	struct zfs_sb	*z_parent;	/* parent fs */
++	objset_t	*z_os;		/* objset reference */
++	uint64_t	z_flags;	/* super_block flags */
++	uint64_t	z_root;		/* id of root znode */
++	uint64_t	z_unlinkedobj;	/* id of unlinked zapobj */
++	uint64_t	z_max_blksz;	/* maximum block size for files */
++	uint64_t	z_fuid_obj;	/* fuid table object number */
++	uint64_t	z_fuid_size;	/* fuid table size */
++	avl_tree_t	z_fuid_idx;	/* fuid tree keyed by index */
++	avl_tree_t	z_fuid_domain;	/* fuid tree keyed by domain */
++	krwlock_t	z_fuid_lock;	/* fuid lock */
++	boolean_t	z_fuid_loaded;	/* fuid tables are loaded */
++	boolean_t	z_fuid_dirty;   /* need to sync fuid table ? */
++	struct zfs_fuid_info	*z_fuid_replay; /* fuid info for replay */
++	zilog_t		*z_log;		/* intent log pointer */
++	uint_t		z_acl_inherit;	/* acl inheritance behavior */
++	zfs_case_t	z_case;		/* case-sense */
++	boolean_t	z_utf8;		/* utf8-only */
++	int		z_norm;		/* normalization flags */
++	boolean_t	z_atime;	/* enable atimes mount option */
++	boolean_t	z_unmounted;	/* unmounted */
++	rrwlock_t	z_teardown_lock;
++	krwlock_t	z_teardown_inactive_lock;
++	list_t		z_all_znodes;	/* all znodes in the fs */
++	uint64_t	z_nr_znodes;	/* number of znodes in the fs */
++	kmutex_t	z_znodes_lock;	/* lock for z_all_znodes */
++	struct inode	*z_ctldir;	/* .zfs directory inode */
++	avl_tree_t	z_ctldir_snaps;	/* .zfs/snapshot entries */
++	kmutex_t	z_ctldir_lock;	/* .zfs ctldir lock */
++	boolean_t	z_show_ctldir;	/* expose .zfs in the root dir */
++	boolean_t	z_issnap;	/* true if this is a snapshot */
++	boolean_t	z_vscan;	/* virus scan on/off */
++	boolean_t	z_use_fuids;	/* version allows fuids */
++	boolean_t	z_replay;	/* set during ZIL replay */
++	boolean_t	z_use_sa;	/* version allow system attributes */
++	boolean_t	z_xattr_sa;	/* allow xattrs to be stores as SA */
++	uint64_t	z_version;	/* ZPL version */
++	uint64_t	z_shares_dir;	/* hidden shares dir */
++	kmutex_t	z_lock;
++	uint64_t	z_userquota_obj;
++	uint64_t	z_groupquota_obj;
++	uint64_t	z_replay_eof;	/* New end of file - replay only */
++	sa_attr_type_t	*z_attr_table;	/* SA attr mapping->id */
++#define	ZFS_OBJ_MTX_SZ	64
++	kmutex_t	z_hold_mtx[ZFS_OBJ_MTX_SZ];	/* znode hold locks */
++} zfs_sb_t;
++
++#define	ZFS_SUPER_MAGIC	0x2fc12fc1
++
++#define	ZSB_XATTR	0x0001		/* Enable user xattrs */
++
++/*
++ * Allow a maximum number of links.  While ZFS does not internally limit
++ * this the inode->i_nlink member is defined as an unsigned int.  To be
++ * safe we use 2^31-1 as the limit.
++ */
++#define ZFS_LINK_MAX		((1U << 31) - 1U)
++
++/*
++ * Normal filesystems (those not under .zfs/snapshot) have a total
++ * file ID size limited to 12 bytes (including the length field) due to
++ * NFSv2 protocol's limitation of 32 bytes for a filehandle.  For historical
++ * reasons, this same limit is being imposed by the Solaris NFSv3 implementation
++ * (although the NFSv3 protocol actually permits a maximum of 64 bytes).  It
++ * is not possible to expand beyond 12 bytes without abandoning support
++ * of NFSv2.
++ *
++ * For normal filesystems, we partition up the available space as follows:
++ *	2 bytes		fid length (required)
++ *	6 bytes		object number (48 bits)
++ *	4 bytes		generation number (32 bits)
++ *
++ * We reserve only 48 bits for the object number, as this is the limit
++ * currently defined and imposed by the DMU.
++ */
++typedef struct zfid_short {
++	uint16_t	zf_len;
++	uint8_t		zf_object[6];		/* obj[i] = obj >> (8 * i) */
++	uint8_t		zf_gen[4];		/* gen[i] = gen >> (8 * i) */
++} zfid_short_t;
++
++/*
++ * Filesystems under .zfs/snapshot have a total file ID size of 22 bytes
++ * (including the length field).  This makes files under .zfs/snapshot
++ * accessible by NFSv3 and NFSv4, but not NFSv2.
++ *
++ * For files under .zfs/snapshot, we partition up the available space
++ * as follows:
++ *	2 bytes		fid length (required)
++ *	6 bytes		object number (48 bits)
++ *	4 bytes		generation number (32 bits)
++ *	6 bytes		objset id (48 bits)
++ *	4 bytes		currently just zero (32 bits)
++ *
++ * We reserve only 48 bits for the object number and objset id, as these are
++ * the limits currently defined and imposed by the DMU.
++ */
++typedef struct zfid_long {
++	zfid_short_t	z_fid;
++	uint8_t		zf_setid[6];		/* obj[i] = obj >> (8 * i) */
++	uint8_t		zf_setgen[4];		/* gen[i] = gen >> (8 * i) */
++} zfid_long_t;
++
++#define	SHORT_FID_LEN	(sizeof (zfid_short_t) - sizeof (uint16_t))
++#define	LONG_FID_LEN	(sizeof (zfid_long_t) - sizeof (uint16_t))
++
++extern uint_t zfs_fsyncer_key;
++
++extern int zfs_suspend_fs(zfs_sb_t *zsb);
++extern int zfs_resume_fs(zfs_sb_t *zsb, const char *osname);
++extern int zfs_userspace_one(zfs_sb_t *zsb, zfs_userquota_prop_t type,
++    const char *domain, uint64_t rid, uint64_t *valuep);
++extern int zfs_userspace_many(zfs_sb_t *zsb, zfs_userquota_prop_t type,
++    uint64_t *cookiep, void *vbuf, uint64_t *bufsizep);
++extern int zfs_set_userquota(zfs_sb_t *zsb, zfs_userquota_prop_t type,
++    const char *domain, uint64_t rid, uint64_t quota);
++extern boolean_t zfs_owner_overquota(zfs_sb_t *zsb, struct znode *,
++    boolean_t isgroup);
++extern boolean_t zfs_fuid_overquota(zfs_sb_t *zsb, boolean_t isgroup,
++    uint64_t fuid);
++extern int zfs_set_version(zfs_sb_t *zsb, uint64_t newvers);
++extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop,
++    uint64_t *value);
++extern int zfs_sb_create(const char *name, zfs_sb_t **zsbp);
++extern int zfs_sb_setup(zfs_sb_t *zsb, boolean_t mounting);
++extern void zfs_sb_free(zfs_sb_t *zsb);
++extern int zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan,
++    int *objects);
++extern int zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting);
++extern int zfs_check_global_label(const char *dsname, const char *hexsl);
++extern boolean_t zfs_is_readonly(zfs_sb_t *zsb);
++
++extern int zfs_register_callbacks(zfs_sb_t *zsb);
++extern void zfs_unregister_callbacks(zfs_sb_t *zsb);
++extern int zfs_domount(struct super_block *sb, void *data, int silent);
++extern void zfs_preumount(struct super_block *sb);
++extern int zfs_umount(struct super_block *sb);
++extern int zfs_remount(struct super_block *sb, int *flags, char *data);
++extern int zfs_root(zfs_sb_t *zsb, struct inode **ipp);
++extern int zfs_statvfs(struct dentry *dentry, struct kstatfs *statp);
++extern int zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_FS_ZFS_VFSOPS_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zfs_vnops.h linux-3.2.33-go/include/zfs/sys/zfs_vnops.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zfs_vnops.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zfs_vnops.h	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,85 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_FS_ZFS_VNOPS_H
++#define	_SYS_FS_ZFS_VNOPS_H
++
++#include <sys/vnode.h>
++#include <sys/xvattr.h>
++#include <sys/uio.h>
++#include <sys/cred.h>
++#include <sys/fcntl.h>
++#include <sys/pathname.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++extern int zfs_open(struct inode *ip, int mode, int flag, cred_t *cr);
++extern int zfs_close(struct inode *ip, int flag, cred_t *cr);
++extern int zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr);
++extern int zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr);
++extern int zfs_access(struct inode *ip, int mode, int flag, cred_t *cr);
++extern int zfs_lookup(struct inode *dip, char *nm, struct inode **ipp,
++    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp);
++extern int zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
++    int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp);
++extern int zfs_remove(struct inode *dip, char *name, cred_t *cr);
++extern int zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
++    struct inode **ipp, cred_t *cr, int flags, vsecattr_t *vsecp);
++extern int zfs_rmdir(struct inode *dip, char *name, struct inode *cwd,
++    cred_t *cr, int flags);
++extern int zfs_readdir(struct inode *ip, void *dirent, filldir_t filldir,
++    loff_t *pos, cred_t *cr);
++extern int zfs_fsync(struct inode *ip, int syncflag, cred_t *cr);
++extern int zfs_getattr(struct inode *ip, vattr_t *vap, int flag, cred_t *cr);
++extern int zfs_getattr_fast(struct inode *ip, struct kstat *sp);
++extern int zfs_setattr(struct inode *ip, vattr_t *vap, int flag, cred_t *cr);
++extern int zfs_rename(struct inode *sdip, char *snm, struct inode *tdip,
++    char *tnm, cred_t *cr, int flags);
++extern int zfs_symlink(struct inode *dip, char *name, vattr_t *vap,
++    char *link, struct inode **ipp, cred_t *cr, int flags);
++extern int zfs_follow_link(struct dentry *dentry, struct nameidata *nd);
++extern int zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr);
++extern int zfs_link(struct inode *tdip, struct inode *sip,
++    char *name, cred_t *cr);
++extern void zfs_inactive(struct inode *ip);
++extern int zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag,
++    offset_t offset, cred_t *cr);
++extern int zfs_fid(struct inode *ip, fid_t *fidp);
++extern int zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag,
++    cred_t *cr);
++extern int zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag,
++    cred_t *cr);
++extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages);
++extern int zfs_putpage(struct inode *ip, struct page *pp,
++    struct writeback_control *wbc);
++extern int zfs_map(struct inode *ip, offset_t off, caddr_t *addrp,
++    size_t len, unsigned long vm_flags);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_FS_ZFS_VNOPS_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zfs_znode.h linux-3.2.33-go/include/zfs/sys/zfs_znode.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zfs_znode.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zfs_znode.h	2012-11-16 23:25:34.338039461 +0100
+@@ -0,0 +1,383 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_FS_ZFS_ZNODE_H
++#define	_SYS_FS_ZFS_ZNODE_H
++
++#ifdef _KERNEL
++#include <sys/isa_defs.h>
++#include <sys/types32.h>
++#include <sys/attr.h>
++#include <sys/list.h>
++#include <sys/dmu.h>
++#include <sys/sa.h>
++#include <sys/zfs_vfsops.h>
++#include <sys/rrwlock.h>
++#include <sys/zfs_sa.h>
++#include <sys/zfs_stat.h>
++#endif
++#include <sys/zfs_acl.h>
++#include <sys/zil.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * Additional file level attributes, that are stored
++ * in the upper half of zp_flags
++ */
++#define	ZFS_READONLY		0x0000000100000000ull
++#define	ZFS_HIDDEN		0x0000000200000000ull
++#define	ZFS_SYSTEM		0x0000000400000000ull
++#define	ZFS_ARCHIVE		0x0000000800000000ull
++#define	ZFS_IMMUTABLE		0x0000001000000000ull
++#define	ZFS_NOUNLINK		0x0000002000000000ull
++#define	ZFS_APPENDONLY		0x0000004000000000ull
++#define	ZFS_NODUMP		0x0000008000000000ull
++#define	ZFS_OPAQUE		0x0000010000000000ull
++#define	ZFS_AV_QUARANTINED	0x0000020000000000ull
++#define	ZFS_AV_MODIFIED		0x0000040000000000ull
++#define	ZFS_REPARSE		0x0000080000000000ull
++#define	ZFS_OFFLINE		0x0000100000000000ull
++#define	ZFS_SPARSE		0x0000200000000000ull
++
++#define	ZFS_ATTR_SET(zp, attr, value, pflags, tx) \
++{ \
++	if (value) \
++		pflags |= attr; \
++	else \
++		pflags &= ~attr; \
++	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(ZTOZSB(zp)), \
++	    &pflags, sizeof (pflags), tx)); \
++}
++
++/*
++ * Define special zfs pflags
++ */
++#define	ZFS_XATTR		0x1		/* is an extended attribute */
++#define	ZFS_INHERIT_ACE		0x2		/* ace has inheritable ACEs */
++#define	ZFS_ACL_TRIVIAL		0x4		/* files ACL is trivial */
++#define	ZFS_ACL_OBJ_ACE		0x8		/* ACL has CMPLX Object ACE */
++#define	ZFS_ACL_PROTECTED	0x10		/* ACL protected */
++#define	ZFS_ACL_DEFAULTED	0x20		/* ACL should be defaulted */
++#define	ZFS_ACL_AUTO_INHERIT	0x40		/* ACL should be inherited */
++#define	ZFS_BONUS_SCANSTAMP	0x80		/* Scanstamp in bonus area */
++#define	ZFS_NO_EXECS_DENIED	0x100		/* exec was given to everyone */
++
++#define	SA_ZPL_ATIME(z)		z->z_attr_table[ZPL_ATIME]
++#define	SA_ZPL_MTIME(z)		z->z_attr_table[ZPL_MTIME]
++#define	SA_ZPL_CTIME(z)		z->z_attr_table[ZPL_CTIME]
++#define	SA_ZPL_CRTIME(z)	z->z_attr_table[ZPL_CRTIME]
++#define	SA_ZPL_GEN(z)		z->z_attr_table[ZPL_GEN]
++#define	SA_ZPL_DACL_ACES(z)	z->z_attr_table[ZPL_DACL_ACES]
++#define	SA_ZPL_XATTR(z)		z->z_attr_table[ZPL_XATTR]
++#define	SA_ZPL_SYMLINK(z)	z->z_attr_table[ZPL_SYMLINK]
++#define	SA_ZPL_RDEV(z)		z->z_attr_table[ZPL_RDEV]
++#define	SA_ZPL_SCANSTAMP(z)	z->z_attr_table[ZPL_SCANSTAMP]
++#define	SA_ZPL_UID(z)		z->z_attr_table[ZPL_UID]
++#define	SA_ZPL_GID(z)		z->z_attr_table[ZPL_GID]
++#define	SA_ZPL_PARENT(z)	z->z_attr_table[ZPL_PARENT]
++#define	SA_ZPL_LINKS(z)		z->z_attr_table[ZPL_LINKS]
++#define	SA_ZPL_MODE(z)		z->z_attr_table[ZPL_MODE]
++#define	SA_ZPL_DACL_COUNT(z)	z->z_attr_table[ZPL_DACL_COUNT]
++#define	SA_ZPL_FLAGS(z)		z->z_attr_table[ZPL_FLAGS]
++#define	SA_ZPL_SIZE(z)		z->z_attr_table[ZPL_SIZE]
++#define	SA_ZPL_ZNODE_ACL(z)	z->z_attr_table[ZPL_ZNODE_ACL]
++#define	SA_ZPL_DXATTR(z)	z->z_attr_table[ZPL_DXATTR]
++#define	SA_ZPL_PAD(z)		z->z_attr_table[ZPL_PAD]
++
++/*
++ * Is ID ephemeral?
++ */
++#define	IS_EPHEMERAL(x)		(x > MAXUID)
++
++/*
++ * Should we use FUIDs?
++ */
++#define	USE_FUIDS(version, os)	(version >= ZPL_VERSION_FUID && \
++    spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
++#define	USE_SA(version, os) (version >= ZPL_VERSION_SA && \
++    spa_version(dmu_objset_spa(os)) >= SPA_VERSION_SA)
++
++#define	MASTER_NODE_OBJ	1
++
++/*
++ * Special attributes for master node.
++ * "userquota@" and "groupquota@" are also valid (from
++ * zfs_userquota_prop_prefixes[]).
++ */
++#define	ZFS_FSID		"FSID"
++#define	ZFS_UNLINKED_SET	"DELETE_QUEUE"
++#define	ZFS_ROOT_OBJ		"ROOT"
++#define	ZPL_VERSION_STR		"VERSION"
++#define	ZFS_FUID_TABLES		"FUID"
++#define	ZFS_SHARES_DIR		"SHARES"
++#define	ZFS_SA_ATTRS		"SA_ATTRS"
++
++#define	ZFS_MAX_BLOCKSIZE	(SPA_MAXBLOCKSIZE)
++
++/* Path component length */
++/*
++ * The generic fs code uses MAXNAMELEN to represent
++ * what the largest component length is.  Unfortunately,
++ * this length includes the terminating NULL.  ZFS needs
++ * to tell the users via pathconf() and statvfs() what the
++ * true maximum length of a component is, excluding the NULL.
++ */
++#define	ZFS_MAXNAMELEN	(MAXNAMELEN - 1)
++
++/*
++ * Convert mode bits (zp_mode) to BSD-style DT_* values for storing in
++ * the directory entries.  On Linux systems this value is already
++ * defined correctly as part of the /usr/include/dirent.h header file.
++ */
++#ifndef IFTODT
++#define	IFTODT(mode) (((mode) & S_IFMT) >> 12)
++#endif
++
++/*
++ * The directory entry has the type (currently unused on Solaris) in the
++ * top 4 bits, and the object number in the low 48 bits.  The "middle"
++ * 12 bits are unused.
++ */
++#define	ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4)
++#define	ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
++
++/*
++ * Directory entry locks control access to directory entries.
++ * They are used to protect creates, deletes, and renames.
++ * Each directory znode has a mutex and a list of locked names.
++ */
++#ifdef _KERNEL
++typedef struct zfs_dirlock {
++	char		*dl_name;	/* directory entry being locked */
++	uint32_t	dl_sharecnt;	/* 0 if exclusive, > 0 if shared */
++	uint8_t		dl_namelock;	/* 1 if z_name_lock is NOT held */
++	uint16_t	dl_namesize;	/* set if dl_name was allocated */
++	kcondvar_t	dl_cv;		/* wait for entry to be unlocked */
++	struct znode	*dl_dzp;	/* directory znode */
++	struct zfs_dirlock *dl_next;	/* next in z_dirlocks list */
++} zfs_dirlock_t;
++
++typedef struct znode {
++	uint64_t	z_id;		/* object ID for this znode */
++	kmutex_t	z_lock;		/* znode modification lock */
++	krwlock_t	z_parent_lock;	/* parent lock for directories */
++	krwlock_t	z_name_lock;	/* "master" lock for dirent locks */
++	zfs_dirlock_t	*z_dirlocks;	/* directory entry lock list */
++	kmutex_t	z_range_lock;	/* protects changes to z_range_avl */
++	avl_tree_t	z_range_avl;	/* avl tree of file range locks */
++	uint8_t		z_unlinked;	/* file has been unlinked */
++	uint8_t		z_atime_dirty;	/* atime needs to be synced */
++	uint8_t		z_zn_prefetch;	/* Prefetch znodes? */
++	uint8_t		z_moved;	/* Has this znode been moved? */
++	uint_t		z_blksz;	/* block size in bytes */
++	uint_t		z_seq;		/* modification sequence number */
++	uint64_t	z_mapcnt;	/* number of pages mapped to file */
++	uint64_t	z_gen;		/* generation (cached) */
++	uint64_t	z_size;		/* file size (cached) */
++	uint64_t	z_atime[2];	/* atime (cached) */
++	uint64_t	z_links;	/* file links (cached) */
++	uint64_t	z_pflags;	/* pflags (cached) */
++	uint64_t	z_uid;		/* uid fuid (cached) */
++	uint64_t	z_gid;		/* gid fuid (cached) */
++	mode_t		z_mode;		/* mode (cached) */
++	uint32_t	z_sync_cnt;	/* synchronous open count */
++	kmutex_t	z_acl_lock;	/* acl data lock */
++	zfs_acl_t	*z_acl_cached;	/* cached acl */
++	krwlock_t	z_xattr_lock;	/* xattr data lock */
++	nvlist_t	*z_xattr_cached;/* cached xattrs */
++	list_node_t	z_link_node;	/* all znodes in fs link */
++	sa_handle_t	*z_sa_hdl;	/* handle to sa data */
++	boolean_t	z_is_sa;	/* are we native sa? */
++	boolean_t	z_is_zvol;	/* are we used by the zvol */
++	boolean_t	z_is_mapped;	/* are we mmap'ed */
++	boolean_t	z_is_ctldir;	/* are we .zfs entry */
++	struct inode	z_inode;	/* generic vfs inode */
++} znode_t;
++
++
++/*
++ * Range locking rules
++ * --------------------
++ * 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole
++ *    file range needs to be locked as RL_WRITER. Only then can the pages be
++ *    freed etc and zp_size reset. zp_size must be set within range lock.
++ * 2. For writes and punching holes (zfs_write & zfs_space) just the range
++ *    being written or freed needs to be locked as RL_WRITER.
++ *    Multiple writes at the end of the file must coordinate zp_size updates
++ *    to ensure data isn't lost. A compare and swap loop is currently used
++ *    to ensure the file size is at least the offset last written.
++ * 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being
++ *    read needs to be locked as RL_READER. A check against zp_size can then
++ *    be made for reading beyond end of file.
++ */
++
++/*
++ * Convert between znode pointers and inode pointers
++ */
++#define	ZTOI(znode)	(&((znode)->z_inode))
++#define	ITOZ(inode)	(container_of((inode), znode_t, z_inode))
++#define	ZTOZSB(znode)	((zfs_sb_t *)(ZTOI(znode)->i_sb->s_fs_info))
++#define	ITOZSB(inode)	((zfs_sb_t *)((inode)->i_sb->s_fs_info))
++
++#define	S_ISDEV(mode)	(S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode))
++
++/*
++ * ZFS_ENTER() is called on entry to each ZFS inode and vfs operation.
++ * ZFS_EXIT() must be called before exitting the vop.
++ * ZFS_VERIFY_ZP() verifies the znode is valid.
++ */
++#define	ZFS_ENTER(zsb) \
++	{ \
++		rrw_enter(&(zsb)->z_teardown_lock, RW_READER, FTAG); \
++		if ((zsb)->z_unmounted) { \
++			ZFS_EXIT(zsb); \
++			return (EIO); \
++		} \
++	}
++
++#define	ZFS_EXIT(zsb) \
++	{ \
++		rrw_exit(&(zsb)->z_teardown_lock, FTAG); \
++		tsd_exit(); \
++	}
++
++#define	ZFS_VERIFY_ZP(zp) \
++	if ((zp)->z_sa_hdl == NULL) { \
++		ZFS_EXIT(ZTOZSB(zp)); \
++		return (EIO); \
++	}
++
++/*
++ * Macros for dealing with dmu_buf_hold
++ */
++#define	ZFS_OBJ_HASH(obj_num)	((obj_num) & (ZFS_OBJ_MTX_SZ - 1))
++#define	ZFS_OBJ_MUTEX(zsb, obj_num)	\
++	(&(zsb)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)])
++#define	ZFS_OBJ_HOLD_ENTER(zsb, obj_num) \
++	mutex_enter(ZFS_OBJ_MUTEX((zsb), (obj_num)))
++#define	ZFS_OBJ_HOLD_TRYENTER(zsb, obj_num) \
++	mutex_tryenter(ZFS_OBJ_MUTEX((zsb), (obj_num)))
++#define	ZFS_OBJ_HOLD_EXIT(zsb, obj_num) \
++	mutex_exit(ZFS_OBJ_MUTEX((zsb), (obj_num)))
++#define	ZFS_OBJ_HOLD_OWNED(zsb, obj_num) \
++	mutex_owned(ZFS_OBJ_MUTEX((zsb), (obj_num)))
++
++/*
++ * Macros to encode/decode ZFS stored time values from/to struct timespec
++ */
++#define	ZFS_TIME_ENCODE(tp, stmp)		\
++{						\
++	(stmp)[0] = (uint64_t)(tp)->tv_sec;	\
++	(stmp)[1] = (uint64_t)(tp)->tv_nsec;	\
++}
++
++#define	ZFS_TIME_DECODE(tp, stmp)		\
++{						\
++	(tp)->tv_sec = (time_t)(stmp)[0];		\
++	(tp)->tv_nsec = (long)(stmp)[1];		\
++}
++
++/*
++ * Timestamp defines
++ */
++#define	ACCESSED		(ATTR_ATIME)
++#define	STATE_CHANGED		(ATTR_CTIME)
++#define	CONTENT_MODIFIED	(ATTR_MTIME | ATTR_CTIME)
++
++#define	ZFS_ACCESSTIME_STAMP(zsb, zp) \
++	if ((zsb)->z_atime && !(zfs_is_readonly(zsb))) \
++		zfs_tstamp_update_setup(zp, ACCESSED, NULL, NULL, B_FALSE);
++
++extern int	zfs_init_fs(zfs_sb_t *, znode_t **);
++extern void	zfs_set_dataprop(objset_t *);
++extern void	zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *,
++    dmu_tx_t *tx);
++extern void	zfs_tstamp_update_setup(znode_t *, uint_t, uint64_t [2],
++    uint64_t [2], boolean_t);
++extern void	zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *);
++extern int	zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
++extern void	zfs_znode_init(void);
++extern void	zfs_znode_fini(void);
++extern int	zfs_zget(zfs_sb_t *, uint64_t, znode_t **);
++extern int	zfs_rezget(znode_t *);
++extern void	zfs_zinactive(znode_t *);
++extern void	zfs_znode_delete(znode_t *, dmu_tx_t *);
++extern void	zfs_remove_op_tables(void);
++extern int	zfs_create_op_tables(void);
++extern int	zfs_sync(struct super_block *, int, cred_t *);
++extern dev_t	zfs_cmpldev(uint64_t);
++extern int	zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
++extern int	zfs_get_stats(objset_t *os, nvlist_t *nv);
++extern void	zfs_znode_dmu_fini(znode_t *);
++extern int	zfs_inode_alloc(struct super_block *, struct inode **ip);
++extern void	zfs_inode_destroy(struct inode *);
++extern void	zfs_inode_update(znode_t *);
++
++extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
++    znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *,
++    vattr_t *vap);
++extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp,
++    vattr_t *vap);
++extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
++    znode_t *dzp, char *name, uint64_t foid);
++#define	ZFS_NO_OBJECT	0	/* no object id */
++extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
++    znode_t *dzp, znode_t *zp, char *name);
++extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
++    znode_t *dzp, znode_t *zp, char *name, char *link);
++extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
++    znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp);
++extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
++    znode_t *zp, offset_t off, ssize_t len, int ioflag);
++extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
++    znode_t *zp, uint64_t off, uint64_t len);
++extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
++    znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp);
++extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
++    vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
++extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx);
++extern void zfs_upgrade(zfs_sb_t *zsb, dmu_tx_t *tx);
++extern int zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx);
++
++#if defined(HAVE_UIO_RW)
++extern caddr_t zfs_map_page(page_t *, enum seg_rw);
++extern void zfs_unmap_page(page_t *, caddr_t);
++#endif /* HAVE_UIO_RW */
++
++extern zil_get_data_t zfs_get_data;
++extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
++extern int zfsfstype;
++
++#endif /* _KERNEL */
++
++extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_FS_ZFS_ZNODE_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zil.h linux-3.2.33-go/include/zfs/sys/zil.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zil.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zil.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,487 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/* Portions Copyright 2010 Robert Milkowski */
++
++#ifndef	_SYS_ZIL_H
++#define	_SYS_ZIL_H
++
++#include <sys/types.h>
++#include <sys/spa.h>
++#include <sys/zio.h>
++#include <sys/dmu.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * Intent log format:
++ *
++ * Each objset has its own intent log.  The log header (zil_header_t)
++ * for objset N's intent log is kept in the Nth object of the SPA's
++ * intent_log objset.  The log header points to a chain of log blocks,
++ * each of which contains log records (i.e., transactions) followed by
++ * a log block trailer (zil_trailer_t).  The format of a log record
++ * depends on the record (or transaction) type, but all records begin
++ * with a common structure that defines the type, length, and txg.
++ */
++
++/*
++ * Intent log header - this on disk structure holds fields to manage
++ * the log.  All fields are 64 bit to easily handle cross architectures.
++ */
++typedef struct zil_header {
++	uint64_t zh_claim_txg;	/* txg in which log blocks were claimed */
++	uint64_t zh_replay_seq;	/* highest replayed sequence number */
++	blkptr_t zh_log;	/* log chain */
++	uint64_t zh_claim_blk_seq; /* highest claimed block sequence number */
++	uint64_t zh_flags;	/* header flags */
++	uint64_t zh_claim_lr_seq; /* highest claimed lr sequence number */
++	uint64_t zh_pad[3];
++} zil_header_t;
++
++/*
++ * zh_flags bit settings
++ */
++#define	ZIL_REPLAY_NEEDED	0x1	/* replay needed - internal only */
++#define	ZIL_CLAIM_LR_SEQ_VALID	0x2	/* zh_claim_lr_seq field is valid */
++
++/*
++ * Log block chaining.
++ *
++ * Log blocks are chained together. Originally they were chained at the
++ * end of the block. For performance reasons the chain was moved to the
++ * beginning of the block which allows writes for only the data being used.
++ * The older position is supported for backwards compatability.
++ *
++ * The zio_eck_t contains a zec_cksum which for the intent log is
++ * the sequence number of this log block. A seq of 0 is invalid.
++ * The zec_cksum is checked by the SPA against the sequence
++ * number passed in the blk_cksum field of the blkptr_t
++ */
++typedef struct zil_chain {
++	uint64_t zc_pad;
++	blkptr_t zc_next_blk;	/* next block in chain */
++	uint64_t zc_nused;	/* bytes in log block used */
++	zio_eck_t zc_eck;	/* block trailer */
++} zil_chain_t;
++
++#define	ZIL_MIN_BLKSZ	4096ULL
++#define	ZIL_MAX_BLKSZ	SPA_MAXBLOCKSIZE
++
++/*
++ * The words of a log block checksum.
++ */
++#define	ZIL_ZC_GUID_0	0
++#define	ZIL_ZC_GUID_1	1
++#define	ZIL_ZC_OBJSET	2
++#define	ZIL_ZC_SEQ	3
++
++typedef enum zil_create {
++	Z_FILE,
++	Z_DIR,
++	Z_XATTRDIR,
++} zil_create_t;
++
++/*
++ * size of xvattr log section.
++ * its composed of lr_attr_t + xvattr bitmap + 2 64 bit timestamps
++ * for create time and a single 64 bit integer for all of the attributes,
++ * and 4 64 bit integers (32 bytes) for the scanstamp.
++ *
++ */
++
++#define	ZIL_XVAT_SIZE(mapsize) \
++	sizeof (lr_attr_t) + (sizeof (uint32_t) * (mapsize - 1)) + \
++	(sizeof (uint64_t) * 7)
++
++/*
++ * Size of ACL in log.  The ACE data is padded out to properly align
++ * on 8 byte boundary.
++ */
++
++#define	ZIL_ACE_LENGTH(x)	(roundup(x, sizeof (uint64_t)))
++
++/*
++ * Intent log transaction types and record structures
++ */
++#define	TX_CREATE		1	/* Create file */
++#define	TX_MKDIR		2	/* Make directory */
++#define	TX_MKXATTR		3	/* Make XATTR directory */
++#define	TX_SYMLINK		4	/* Create symbolic link to a file */
++#define	TX_REMOVE		5	/* Remove file */
++#define	TX_RMDIR		6	/* Remove directory */
++#define	TX_LINK			7	/* Create hard link to a file */
++#define	TX_RENAME		8	/* Rename a file */
++#define	TX_WRITE		9	/* File write */
++#define	TX_TRUNCATE		10	/* Truncate a file */
++#define	TX_SETATTR		11	/* Set file attributes */
++#define	TX_ACL_V0		12	/* Set old formatted ACL */
++#define	TX_ACL			13	/* Set ACL */
++#define	TX_CREATE_ACL		14	/* create with ACL */
++#define	TX_CREATE_ATTR		15	/* create + attrs */
++#define	TX_CREATE_ACL_ATTR 	16	/* create with ACL + attrs */
++#define	TX_MKDIR_ACL		17	/* mkdir with ACL */
++#define	TX_MKDIR_ATTR		18	/* mkdir with attr */
++#define	TX_MKDIR_ACL_ATTR	19	/* mkdir with ACL + attrs */
++#define	TX_WRITE2		20	/* dmu_sync EALREADY write */
++#define	TX_MAX_TYPE		21	/* Max transaction type */
++
++/*
++ * The transactions for mkdir, symlink, remove, rmdir, link, and rename
++ * may have the following bit set, indicating the original request
++ * specified case-insensitive handling of names.
++ */
++#define	TX_CI	((uint64_t)0x1 << 63) /* case-insensitive behavior requested */
++
++/*
++ * Transactions for write, truncate, setattr, acl_v0, and acl can be logged
++ * out of order.  For convenience in the code, all such records must have
++ * lr_foid at the same offset.
++ */
++#define	TX_OOO(txtype)			\
++	((txtype) == TX_WRITE ||	\
++	(txtype) == TX_TRUNCATE ||	\
++	(txtype) == TX_SETATTR ||	\
++	(txtype) == TX_ACL_V0 ||	\
++	(txtype) == TX_ACL ||		\
++	(txtype) == TX_WRITE2)
++
++/*
++ * Format of log records.
++ * The fields are carefully defined to allow them to be aligned
++ * and sized the same on sparc & intel architectures.
++ * Each log record has a common structure at the beginning.
++ *
++ * The log record on disk (lrc_seq) holds the sequence number of all log
++ * records which is used to ensure we don't replay the same record.
++ */
++typedef struct {			/* common log record header */
++	uint64_t	lrc_txtype;	/* intent log transaction type */
++	uint64_t	lrc_reclen;	/* transaction record length */
++	uint64_t	lrc_txg;	/* dmu transaction group number */
++	uint64_t	lrc_seq;	/* see comment above */
++} lr_t;
++
++/*
++ * Common start of all out-of-order record types (TX_OOO() above).
++ */
++typedef struct {
++	lr_t		lr_common;	/* common portion of log record */
++	uint64_t	lr_foid;	/* object id */
++} lr_ooo_t;
++
++/*
++ * Handle option extended vattr attributes.
++ *
++ * Whenever new attributes are added the version number
++ * will need to be updated as will code in
++ * zfs_log.c and zfs_replay.c
++ */
++typedef struct {
++	uint32_t	lr_attr_masksize; /* number of elements in array */
++	uint32_t	lr_attr_bitmap; /* First entry of array */
++	/* remainder of array and any additional fields */
++} lr_attr_t;
++
++/*
++ * log record for creates without optional ACL.
++ * This log record does support optional xvattr_t attributes.
++ */
++typedef struct {
++	lr_t		lr_common;	/* common portion of log record */
++	uint64_t	lr_doid;	/* object id of directory */
++	uint64_t	lr_foid;	/* object id of created file object */
++	uint64_t	lr_mode;	/* mode of object */
++	uint64_t	lr_uid;		/* uid of object */
++	uint64_t	lr_gid;		/* gid of object */
++	uint64_t	lr_gen;		/* generation (txg of creation) */
++	uint64_t	lr_crtime[2];	/* creation time */
++	uint64_t	lr_rdev;	/* rdev of object to create */
++	/* name of object to create follows this */
++	/* for symlinks, link content follows name */
++	/* for creates with xvattr data, the name follows the xvattr info */
++} lr_create_t;
++
++/*
++ * FUID ACL record will be an array of ACEs from the original ACL.
++ * If this array includes ephemeral IDs, the record will also include
++ * an array of log-specific FUIDs to replace the ephemeral IDs.
++ * Only one copy of each unique domain will be present, so the log-specific
++ * FUIDs will use an index into a compressed domain table.  On replay this
++ * information will be used to construct real FUIDs (and bypass idmap,
++ * since it may not be available).
++ */
++
++/*
++ * Log record for creates with optional ACL
++ * This log record is also used for recording any FUID
++ * information needed for replaying the create.  If the
++ * file doesn't have any actual ACEs then the lr_aclcnt
++ * would be zero.
++ */
++typedef struct {
++	lr_create_t	lr_create;	/* common create portion */
++	uint64_t	lr_aclcnt;	/* number of ACEs in ACL */
++	uint64_t	lr_domcnt;	/* number of unique domains */
++	uint64_t	lr_fuidcnt;	/* number of real fuids */
++	uint64_t	lr_acl_bytes;	/* number of bytes in ACL */
++	uint64_t	lr_acl_flags;	/* ACL flags */
++	/* lr_acl_bytes number of variable sized ace's follows */
++	/* if create is also setting xvattr's, then acl data follows xvattr */
++	/* if ACE FUIDs are needed then they will follow the xvattr_t */
++	/* Following the FUIDs will be the domain table information. */
++	/* The FUIDs for the owner and group will be in the lr_create */
++	/* portion of the record. */
++	/* name follows ACL data */
++} lr_acl_create_t;
++
++typedef struct {
++	lr_t		lr_common;	/* common portion of log record */
++	uint64_t	lr_doid;	/* obj id of directory */
++	/* name of object to remove follows this */
++} lr_remove_t;
++
++typedef struct {
++	lr_t		lr_common;	/* common portion of log record */
++	uint64_t	lr_doid;	/* obj id of directory */
++	uint64_t	lr_link_obj;	/* obj id of link */
++	/* name of object to link follows this */
++} lr_link_t;
++
++typedef struct {
++	lr_t		lr_common;	/* common portion of log record */
++	uint64_t	lr_sdoid;	/* obj id of source directory */
++	uint64_t	lr_tdoid;	/* obj id of target directory */
++	/* 2 strings: names of source and destination follow this */
++} lr_rename_t;
++
++typedef struct {
++	lr_t		lr_common;	/* common portion of log record */
++	uint64_t	lr_foid;	/* file object to write */
++	uint64_t	lr_offset;	/* offset to write to */
++	uint64_t	lr_length;	/* user data length to write */
++	uint64_t	lr_blkoff;	/* no longer used */
++	blkptr_t	lr_blkptr;	/* spa block pointer for replay */
++	/* write data will follow for small writes */
++} lr_write_t;
++
++typedef struct {
++	lr_t		lr_common;	/* common portion of log record */
++	uint64_t	lr_foid;	/* object id of file to truncate */
++	uint64_t	lr_offset;	/* offset to truncate from */
++	uint64_t	lr_length;	/* length to truncate */
++} lr_truncate_t;
++
++typedef struct {
++	lr_t		lr_common;	/* common portion of log record */
++	uint64_t	lr_foid;	/* file object to change attributes */
++	uint64_t	lr_mask;	/* mask of attributes to set */
++	uint64_t	lr_mode;	/* mode to set */
++	uint64_t	lr_uid;		/* uid to set */
++	uint64_t	lr_gid;		/* gid to set */
++	uint64_t	lr_size;	/* size to set */
++	uint64_t	lr_atime[2];	/* access time */
++	uint64_t	lr_mtime[2];	/* modification time */
++	/* optional attribute lr_attr_t may be here */
++} lr_setattr_t;
++
++typedef struct {
++	lr_t		lr_common;	/* common portion of log record */
++	uint64_t	lr_foid;	/* obj id of file */
++	uint64_t	lr_aclcnt;	/* number of acl entries */
++	/* lr_aclcnt number of ace_t entries follow this */
++} lr_acl_v0_t;
++
++typedef struct {
++	lr_t		lr_common;	/* common portion of log record */
++	uint64_t	lr_foid;	/* obj id of file */
++	uint64_t	lr_aclcnt;	/* number of ACEs in ACL */
++	uint64_t	lr_domcnt;	/* number of unique domains */
++	uint64_t	lr_fuidcnt;	/* number of real fuids */
++	uint64_t	lr_acl_bytes;	/* number of bytes in ACL */
++	uint64_t	lr_acl_flags;	/* ACL flags */
++	/* lr_acl_bytes number of variable sized ace's follows */
++} lr_acl_t;
++
++/*
++ * ZIL structure definitions, interface function prototype and globals.
++ */
++
++/*
++ * Writes are handled in three different ways:
++ *
++ * WR_INDIRECT:
++ *    In this mode, if we need to commit the write later, then the block
++ *    is immediately written into the file system (using dmu_sync),
++ *    and a pointer to the block is put into the log record.
++ *    When the txg commits the block is linked in.
++ *    This saves additionally writing the data into the log record.
++ *    There are a few requirements for this to occur:
++ *	- write is greater than zfs/zvol_immediate_write_sz
++ *	- not using slogs (as slogs are assumed to always be faster
++ *	  than writing into the main pool)
++ *	- the write occupies only one block
++ * WR_COPIED:
++ *    If we know we'll immediately be committing the
++ *    transaction (FSYNC or FDSYNC), the we allocate a larger
++ *    log record here for the data and copy the data in.
++ * WR_NEED_COPY:
++ *    Otherwise we don't allocate a buffer, and *if* we need to
++ *    flush the write later then a buffer is allocated and
++ *    we retrieve the data using the dmu.
++ */
++typedef enum {
++	WR_INDIRECT,	/* indirect - a large write (dmu_sync() data */
++			/* and put blkptr in log, rather than actual data) */
++	WR_COPIED,	/* immediate - data is copied into lr_write_t */
++	WR_NEED_COPY,	/* immediate - data needs to be copied if pushed */
++	WR_NUM_STATES	/* number of states */
++} itx_wr_state_t;
++
++typedef struct itx {
++	list_node_t	itx_node;	/* linkage on zl_itx_list */
++	void		*itx_private;	/* type-specific opaque data */
++	itx_wr_state_t	itx_wr_state;	/* write state */
++	uint8_t		itx_sync;	/* synchronous transaction */
++	uint64_t	itx_sod;	/* record size on disk */
++	uint64_t	itx_oid;	/* object id */
++	lr_t		itx_lr;		/* common part of log record */
++	/* followed by type-specific part of lr_xx_t and its immediate data */
++} itx_t;
++
++/*
++ * Used for zil kstat.
++ */
++typedef struct zil_stats {
++	/*
++	 * Number of times a ZIL commit (e.g. fsync) has been requested.
++	 */
++	kstat_named_t zil_commit_count;
++
++	/*
++	 * Number of times the ZIL has been flushed to stable storage.
++	 * This is less than zil_commit_count when commits are "merged"
++	 * (see the documentation above zil_commit()).
++	 */
++	kstat_named_t zil_commit_writer_count;
++
++	/*
++	 * Number of transactions (reads, writes, renames, etc.)
++	 * that have been commited.
++	 */
++	kstat_named_t zil_itx_count;
++
++	/*
++	 * See the documentation for itx_wr_state_t above.
++	 * Note that "bytes" accumulates the length of the transactions
++	 * (i.e. data), not the actual log record sizes.
++	 */
++	kstat_named_t zil_itx_indirect_count;
++	kstat_named_t zil_itx_indirect_bytes;
++	kstat_named_t zil_itx_copied_count;
++	kstat_named_t zil_itx_copied_bytes;
++	kstat_named_t zil_itx_needcopy_count;
++	kstat_named_t zil_itx_needcopy_bytes;
++
++	/*
++	 * Transactions which have been allocated to the "normal"
++	 * (i.e. not slog) storage pool. Note that "bytes" accumulate
++	 * the actual log record sizes - which do not include the actual
++	 * data in case of indirect writes.
++	 */
++	kstat_named_t zil_itx_metaslab_normal_count;
++	kstat_named_t zil_itx_metaslab_normal_bytes;
++
++	/*
++	 * Transactions which have been allocated to the "slog" storage pool.
++	 * If there are no separate log devices, this is the same as the
++	 * "normal" pool.
++	 */
++	kstat_named_t zil_itx_metaslab_slog_count;
++	kstat_named_t zil_itx_metaslab_slog_bytes;
++} zil_stats_t;
++
++extern zil_stats_t zil_stats;
++
++#define ZIL_STAT_INCR(stat, val) \
++    atomic_add_64(&zil_stats.stat.value.ui64, (val));
++#define ZIL_STAT_BUMP(stat) \
++    ZIL_STAT_INCR(stat, 1);
++
++typedef int zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg,
++    uint64_t txg);
++typedef int zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg,
++    uint64_t txg);
++typedef int zil_replay_func_t(void *, char *, boolean_t);
++typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
++
++extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
++    zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg);
++
++extern void	zil_init(void);
++extern void	zil_fini(void);
++
++extern zilog_t	*zil_alloc(objset_t *os, zil_header_t *zh_phys);
++extern void	zil_free(zilog_t *zilog);
++
++extern zilog_t	*zil_open(objset_t *os, zil_get_data_t *get_data);
++extern void	zil_close(zilog_t *zilog);
++
++extern void	zil_replay(objset_t *os, void *arg,
++    zil_replay_func_t *replay_func[TX_MAX_TYPE]);
++extern boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx);
++extern void	zil_destroy(zilog_t *zilog, boolean_t keep_first);
++extern void	zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
++
++extern itx_t	*zil_itx_create(uint64_t txtype, size_t lrsize);
++extern void	zil_itx_destroy(itx_t *itx);
++extern void	zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
++
++extern void	zil_commit(zilog_t *zilog, uint64_t oid);
++
++extern int	zil_vdev_offline(const char *osname, void *txarg);
++extern int	zil_claim(const char *osname, void *txarg);
++extern int	zil_check_log_chain(const char *osname, void *txarg);
++extern void	zil_sync(zilog_t *zilog, dmu_tx_t *tx);
++extern void	zil_clean(zilog_t *zilog, uint64_t synced_txg);
++
++extern int	zil_suspend(zilog_t *zilog);
++extern void	zil_resume(zilog_t *zilog);
++
++extern void	zil_add_block(zilog_t *zilog, const blkptr_t *bp);
++extern int	zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp);
++
++extern void	zil_set_sync(zilog_t *zilog, uint64_t syncval);
++
++extern void	zil_set_logbias(zilog_t *zilog, uint64_t slogval);
++
++extern int zil_replay_disable;
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_ZIL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zil_impl.h linux-3.2.33-go/include/zfs/sys/zil_impl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zil_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zil_impl.h	2012-11-16 23:25:34.339039449 +0100
+@@ -0,0 +1,148 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++/* Portions Copyright 2010 Robert Milkowski */
++
++#ifndef	_SYS_ZIL_IMPL_H
++#define	_SYS_ZIL_IMPL_H
++
++#include <sys/zil.h>
++#include <sys/dmu_objset.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * Log write buffer.
++ */
++typedef struct lwb {
++	zilog_t		*lwb_zilog;	/* back pointer to log struct */
++	blkptr_t	lwb_blk;	/* on disk address of this log blk */
++	boolean_t       lwb_fastwrite;  /* is blk marked for fastwrite? */
++	int		lwb_nused;	/* # used bytes in buffer */
++	int		lwb_sz;		/* size of block and buffer */
++	char		*lwb_buf;	/* log write buffer */
++	zio_t		*lwb_zio;	/* zio for this buffer */
++	dmu_tx_t	*lwb_tx;	/* tx for log block allocation */
++	uint64_t	lwb_max_txg;	/* highest txg in this lwb */
++	list_node_t	lwb_node;	/* zilog->zl_lwb_list linkage */
++} lwb_t;
++
++/*
++ * Intent log transaction lists
++ */
++typedef struct itxs {
++	list_t		i_sync_list;	/* list of synchronous itxs */
++	avl_tree_t	i_async_tree;	/* tree of foids for async itxs */
++} itxs_t;
++
++typedef struct itxg {
++	kmutex_t	itxg_lock;	/* lock for this structure */
++	uint64_t	itxg_txg;	/* txg for this chain */
++	uint64_t	itxg_sod;	/* total size on disk for this txg */
++	itxs_t		*itxg_itxs;	/* sync and async itxs */
++} itxg_t;
++
++/* for async nodes we build up an AVL tree of lists of async itxs per file */
++typedef struct itx_async_node {
++	uint64_t	ia_foid;	/* file object id */
++	list_t		ia_list;	/* list of async itxs for this foid */
++	avl_node_t	ia_node;	/* AVL tree linkage */
++} itx_async_node_t;
++
++/*
++ * Vdev flushing: during a zil_commit(), we build up an AVL tree of the vdevs
++ * we've touched so we know which ones need a write cache flush at the end.
++ */
++typedef struct zil_vdev_node {
++	uint64_t	zv_vdev;	/* vdev to be flushed */
++	avl_node_t	zv_node;	/* AVL tree linkage */
++} zil_vdev_node_t;
++
++#define	ZIL_PREV_BLKS 16
++
++/*
++ * Stable storage intent log management structure.  One per dataset.
++ */
++struct zilog {
++	kmutex_t	zl_lock;	/* protects most zilog_t fields */
++	struct dsl_pool	*zl_dmu_pool;	/* DSL pool */
++	spa_t		*zl_spa;	/* handle for read/write log */
++	const zil_header_t *zl_header;	/* log header buffer */
++	objset_t	*zl_os;		/* object set we're logging */
++	zil_get_data_t	*zl_get_data;	/* callback to get object content */
++	zio_t		*zl_root_zio;	/* log writer root zio */
++	uint64_t	zl_lr_seq;	/* on-disk log record sequence number */
++	uint64_t	zl_commit_lr_seq; /* last committed on-disk lr seq */
++	uint64_t	zl_destroy_txg;	/* txg of last zil_destroy() */
++	uint64_t	zl_replayed_seq[TXG_SIZE]; /* last replayed rec seq */
++	uint64_t	zl_replaying_seq; /* current replay seq number */
++	uint32_t	zl_suspend;	/* log suspend count */
++	kcondvar_t	zl_cv_writer;	/* log writer thread completion */
++	kcondvar_t	zl_cv_suspend;	/* log suspend completion */
++	uint8_t		zl_suspending;	/* log is currently suspending */
++	uint8_t		zl_keep_first;	/* keep first log block in destroy */
++	uint8_t		zl_replay;	/* replaying records while set */
++	uint8_t		zl_stop_sync;	/* for debugging */
++	uint8_t		zl_writer;	/* boolean: write setup in progress */
++	uint8_t		zl_logbias;	/* latency or throughput */
++	uint8_t		zl_sync;	/* synchronous or asynchronous */
++	int		zl_parse_error;	/* last zil_parse() error */
++	uint64_t	zl_parse_blk_seq; /* highest blk seq on last parse */
++	uint64_t	zl_parse_lr_seq; /* highest lr seq on last parse */
++	uint64_t	zl_parse_blk_count; /* number of blocks parsed */
++	uint64_t	zl_parse_lr_count; /* number of log records parsed */
++	uint64_t	zl_next_batch;	/* next batch number */
++	uint64_t	zl_com_batch;	/* committed batch number */
++	kcondvar_t	zl_cv_batch[2];	/* batch condition variables */
++	itxg_t		zl_itxg[TXG_SIZE]; /* intent log txg chains */
++	list_t		zl_itx_commit_list; /* itx list to be committed */
++	uint64_t	zl_itx_list_sz;	/* total size of records on list */
++	uint64_t	zl_cur_used;	/* current commit log size used */
++	list_t		zl_lwb_list;	/* in-flight log write list */
++	kmutex_t	zl_vdev_lock;	/* protects zl_vdev_tree */
++	avl_tree_t	zl_vdev_tree;	/* vdevs to flush in zil_commit() */
++	taskq_t		*zl_clean_taskq; /* runs lwb and itx clean tasks */
++	avl_tree_t	zl_bp_tree;	/* track bps during log parse */
++	clock_t		zl_replay_time;	/* lbolt of when replay started */
++	uint64_t	zl_replay_blks;	/* number of log blocks replayed */
++	zil_header_t	zl_old_header;	/* debugging aid */
++	uint_t		zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */
++	uint_t		zl_prev_rotor;	/* rotor for zl_prev[] */
++};
++
++typedef struct zil_bp_node {
++	dva_t		zn_dva;
++	avl_node_t	zn_node;
++} zil_bp_node_t;
++
++#define	ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_chain_t) - \
++    sizeof (lr_write_t))
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_ZIL_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zio_checksum.h linux-3.2.33-go/include/zfs/sys/zio_checksum.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zio_checksum.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zio_checksum.h	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,75 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef _SYS_ZIO_CHECKSUM_H
++#define	_SYS_ZIO_CHECKSUM_H
++
++#include <sys/zio.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * Signature for checksum functions.
++ */
++typedef void zio_checksum_t(const void *data, uint64_t size, zio_cksum_t *zcp);
++
++/*
++ * Information about each checksum function.
++ */
++typedef struct zio_checksum_info {
++	zio_checksum_t	*ci_func[2]; /* checksum function for each byteorder */
++	int		ci_correctable;	/* number of correctable bits	*/
++	int		ci_eck;		/* uses zio embedded checksum? */
++	int		ci_dedup;	/* strong enough for dedup? */
++	char		*ci_name;	/* descriptive name */
++} zio_checksum_info_t;
++
++typedef struct zio_bad_cksum {
++	zio_cksum_t		zbc_expected;
++	zio_cksum_t		zbc_actual;
++	const char		*zbc_checksum_name;
++	uint8_t			zbc_byteswapped;
++	uint8_t			zbc_injected;
++	uint8_t			zbc_has_cksum;	/* expected/actual valid */
++} zio_bad_cksum_t;
++
++extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS];
++
++/*
++ * Checksum routines.
++ */
++extern zio_checksum_t zio_checksum_SHA256;
++
++extern void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
++    void *data, uint64_t size);
++extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out);
++extern enum zio_checksum spa_dedup_checksum(spa_t *spa);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_ZIO_CHECKSUM_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zio_compress.h linux-3.2.33-go/include/zfs/sys/zio_compress.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zio_compress.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zio_compress.h	2012-11-16 23:25:34.343039404 +0100
+@@ -0,0 +1,84 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef _SYS_ZIO_COMPRESS_H
++#define	_SYS_ZIO_COMPRESS_H
++
++#include <sys/zio.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * Common signature for all zio compress/decompress functions.
++ */
++typedef size_t zio_compress_func_t(void *src, void *dst,
++    size_t s_len, size_t d_len, int);
++typedef int zio_decompress_func_t(void *src, void *dst,
++    size_t s_len, size_t d_len, int);
++
++/*
++ * Information about each compression function.
++ */
++typedef struct zio_compress_info {
++	zio_compress_func_t	*ci_compress;	/* compression function */
++	zio_decompress_func_t	*ci_decompress;	/* decompression function */
++	int			ci_level;	/* level parameter */
++	char			*ci_name;	/* algorithm name */
++} zio_compress_info_t;
++
++extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
++
++/*
++ * Compression routines.
++ */
++extern size_t lzjb_compress(void *src, void *dst, size_t s_len, size_t d_len,
++    int level);
++extern int lzjb_decompress(void *src, void *dst, size_t s_len, size_t d_len,
++    int level);
++extern size_t gzip_compress(void *src, void *dst, size_t s_len, size_t d_len,
++    int level);
++extern int gzip_decompress(void *src, void *dst, size_t s_len, size_t d_len,
++    int level);
++extern size_t zle_compress(void *src, void *dst, size_t s_len, size_t d_len,
++    int level);
++extern int zle_decompress(void *src, void *dst, size_t s_len, size_t d_len,
++    int level);
++
++/*
++ * Compress and decompress data if necessary.
++ */
++extern size_t zio_compress_data(enum zio_compress c, void *src, void *dst,
++    size_t s_len);
++extern int zio_decompress_data(enum zio_compress c, void *src, void *dst,
++    size_t s_len, size_t d_len);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _SYS_ZIO_COMPRESS_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zio.h linux-3.2.33-go/include/zfs/sys/zio.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zio.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zio.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,574 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++/*
++ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
++ */
++
++#ifndef _ZIO_H
++#define	_ZIO_H
++
++#include <sys/zfs_context.h>
++#include <sys/spa.h>
++#include <sys/txg.h>
++#include <sys/avl.h>
++#include <sys/fs/zfs.h>
++#include <sys/zio_impl.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * Embedded checksum
++ */
++#define	ZEC_MAGIC	0x210da7ab10c7a11ULL
++
++typedef struct zio_eck {
++	uint64_t	zec_magic;	/* for validation, endianness	*/
++	zio_cksum_t	zec_cksum;	/* 256-bit checksum		*/
++} zio_eck_t;
++
++/*
++ * Gang block headers are self-checksumming and contain an array
++ * of block pointers.
++ */
++#define	SPA_GANGBLOCKSIZE	SPA_MINBLOCKSIZE
++#define	SPA_GBH_NBLKPTRS	((SPA_GANGBLOCKSIZE - \
++	sizeof (zio_eck_t)) / sizeof (blkptr_t))
++#define	SPA_GBH_FILLER		((SPA_GANGBLOCKSIZE - \
++	sizeof (zio_eck_t) - \
++	(SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
++	sizeof (uint64_t))
++
++typedef struct zio_gbh {
++	blkptr_t		zg_blkptr[SPA_GBH_NBLKPTRS];
++	uint64_t		zg_filler[SPA_GBH_FILLER];
++	zio_eck_t		zg_tail;
++} zio_gbh_phys_t;
++
++enum zio_checksum {
++	ZIO_CHECKSUM_INHERIT = 0,
++	ZIO_CHECKSUM_ON,
++	ZIO_CHECKSUM_OFF,
++	ZIO_CHECKSUM_LABEL,
++	ZIO_CHECKSUM_GANG_HEADER,
++	ZIO_CHECKSUM_ZILOG,
++	ZIO_CHECKSUM_FLETCHER_2,
++	ZIO_CHECKSUM_FLETCHER_4,
++	ZIO_CHECKSUM_SHA256,
++	ZIO_CHECKSUM_ZILOG2,
++	ZIO_CHECKSUM_FUNCTIONS
++};
++
++#define	ZIO_CHECKSUM_ON_VALUE	ZIO_CHECKSUM_FLETCHER_4
++#define	ZIO_CHECKSUM_DEFAULT	ZIO_CHECKSUM_ON
++
++#define	ZIO_CHECKSUM_MASK	0xffULL
++#define	ZIO_CHECKSUM_VERIFY	(1 << 8)
++
++#define	ZIO_DEDUPCHECKSUM	ZIO_CHECKSUM_SHA256
++#define	ZIO_DEDUPDITTO_MIN	100
++
++enum zio_compress {
++	ZIO_COMPRESS_INHERIT = 0,
++	ZIO_COMPRESS_ON,
++	ZIO_COMPRESS_OFF,
++	ZIO_COMPRESS_LZJB,
++	ZIO_COMPRESS_EMPTY,
++	ZIO_COMPRESS_GZIP_1,
++	ZIO_COMPRESS_GZIP_2,
++	ZIO_COMPRESS_GZIP_3,
++	ZIO_COMPRESS_GZIP_4,
++	ZIO_COMPRESS_GZIP_5,
++	ZIO_COMPRESS_GZIP_6,
++	ZIO_COMPRESS_GZIP_7,
++	ZIO_COMPRESS_GZIP_8,
++	ZIO_COMPRESS_GZIP_9,
++	ZIO_COMPRESS_ZLE,
++	ZIO_COMPRESS_FUNCTIONS
++};
++
++#define	ZIO_COMPRESS_ON_VALUE	ZIO_COMPRESS_LZJB
++#define	ZIO_COMPRESS_DEFAULT	ZIO_COMPRESS_OFF
++
++#define	BOOTFS_COMPRESS_VALID(compress)			\
++	((compress) == ZIO_COMPRESS_LZJB ||		\
++	((compress) == ZIO_COMPRESS_ON &&		\
++	ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) ||	\
++	(compress) == ZIO_COMPRESS_OFF)
++
++/*
++ * Default Linux timeout for a sd device.
++ */
++#define ZIO_DELAY_MAX			(30 * MILLISEC)
++
++#define	ZIO_FAILURE_MODE_WAIT		0
++#define	ZIO_FAILURE_MODE_CONTINUE	1
++#define	ZIO_FAILURE_MODE_PANIC		2
++
++#define	ZIO_PRIORITY_NOW		(zio_priority_table[0])
++#define	ZIO_PRIORITY_SYNC_READ		(zio_priority_table[1])
++#define	ZIO_PRIORITY_SYNC_WRITE		(zio_priority_table[2])
++#define	ZIO_PRIORITY_LOG_WRITE		(zio_priority_table[3])
++#define	ZIO_PRIORITY_CACHE_FILL		(zio_priority_table[4])
++#define	ZIO_PRIORITY_AGG		(zio_priority_table[5])
++#define	ZIO_PRIORITY_FREE		(zio_priority_table[6])
++#define	ZIO_PRIORITY_ASYNC_WRITE	(zio_priority_table[7])
++#define	ZIO_PRIORITY_ASYNC_READ		(zio_priority_table[8])
++#define	ZIO_PRIORITY_RESILVER		(zio_priority_table[9])
++#define	ZIO_PRIORITY_SCRUB		(zio_priority_table[10])
++#define	ZIO_PRIORITY_DDT_PREFETCH	(zio_priority_table[11])
++#define	ZIO_PRIORITY_TABLE_SIZE		12
++
++#define	ZIO_PIPELINE_CONTINUE		0x100
++#define	ZIO_PIPELINE_STOP		0x101
++
++enum zio_flag {
++	/*
++	 * Flags inherited by gang, ddt, and vdev children,
++	 * and that must be equal for two zios to aggregate
++	 */
++	ZIO_FLAG_DONT_AGGREGATE	= 1 << 0,
++	ZIO_FLAG_IO_REPAIR	= 1 << 1,
++	ZIO_FLAG_SELF_HEAL	= 1 << 2,
++	ZIO_FLAG_RESILVER	= 1 << 3,
++	ZIO_FLAG_SCRUB		= 1 << 4,
++	ZIO_FLAG_SCAN_THREAD	= 1 << 5,
++
++#define	ZIO_FLAG_AGG_INHERIT	(ZIO_FLAG_CANFAIL - 1)
++
++	/*
++	 * Flags inherited by ddt, gang, and vdev children.
++	 */
++	ZIO_FLAG_CANFAIL	= 1 << 6,	/* must be first for INHERIT */
++	ZIO_FLAG_SPECULATIVE	= 1 << 7,
++	ZIO_FLAG_CONFIG_WRITER	= 1 << 8,
++	ZIO_FLAG_DONT_RETRY	= 1 << 9,
++	ZIO_FLAG_DONT_CACHE	= 1 << 10,
++	ZIO_FLAG_NODATA		= 1 << 11,
++	ZIO_FLAG_INDUCE_DAMAGE	= 1 << 12,
++
++#define	ZIO_FLAG_DDT_INHERIT	(ZIO_FLAG_IO_RETRY - 1)
++#define	ZIO_FLAG_GANG_INHERIT	(ZIO_FLAG_IO_RETRY - 1)
++
++	/*
++	 * Flags inherited by vdev children.
++	 */
++	ZIO_FLAG_IO_RETRY	= 1 << 13,	/* must be first for INHERIT */
++	ZIO_FLAG_PROBE		= 1 << 14,
++	ZIO_FLAG_TRYHARD	= 1 << 15,
++	ZIO_FLAG_OPTIONAL	= 1 << 16,
++
++#define	ZIO_FLAG_VDEV_INHERIT	(ZIO_FLAG_DONT_QUEUE - 1)
++
++	/*
++	 * Flags not inherited by any children.
++	 */
++	ZIO_FLAG_DONT_QUEUE	= 1 << 17,	/* must be first for INHERIT */
++	ZIO_FLAG_DONT_PROPAGATE	= 1 << 18,
++	ZIO_FLAG_IO_BYPASS	= 1 << 19,
++	ZIO_FLAG_IO_REWRITE	= 1 << 20,
++	ZIO_FLAG_RAW		= 1 << 21,
++	ZIO_FLAG_GANG_CHILD	= 1 << 22,
++	ZIO_FLAG_DDT_CHILD	= 1 << 23,
++	ZIO_FLAG_GODFATHER	= 1 << 24,
++	ZIO_FLAG_FASTWRITE      = 1 << 25
++};
++
++#define	ZIO_FLAG_MUSTSUCCEED		0
++
++#define	ZIO_DDT_CHILD_FLAGS(zio)				\
++	(((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) |		\
++	ZIO_FLAG_DDT_CHILD | ZIO_FLAG_CANFAIL)
++
++#define	ZIO_GANG_CHILD_FLAGS(zio)				\
++	(((zio)->io_flags & ZIO_FLAG_GANG_INHERIT) |		\
++	ZIO_FLAG_GANG_CHILD | ZIO_FLAG_CANFAIL)
++
++#define	ZIO_VDEV_CHILD_FLAGS(zio)				\
++	(((zio)->io_flags & ZIO_FLAG_VDEV_INHERIT) |		\
++	ZIO_FLAG_CANFAIL)
++
++enum zio_child {
++	ZIO_CHILD_VDEV = 0,
++	ZIO_CHILD_GANG,
++	ZIO_CHILD_DDT,
++	ZIO_CHILD_LOGICAL,
++	ZIO_CHILD_TYPES
++};
++
++enum zio_wait_type {
++	ZIO_WAIT_READY = 0,
++	ZIO_WAIT_DONE,
++	ZIO_WAIT_TYPES
++};
++
++/*
++ * We'll take the unused errnos, 'EBADE' and 'EBADR' (from the Convergent
++ * graveyard) to indicate checksum errors and fragmentation.
++ */
++#define	ECKSUM	EBADE
++#define	EFRAGS	EBADR
++
++typedef void zio_done_func_t(zio_t *zio);
++
++extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE];
++extern char *zio_type_name[ZIO_TYPES];
++
++/*
++ * A bookmark is a four-tuple <objset, object, level, blkid> that uniquely
++ * identifies any block in the pool.  By convention, the meta-objset (MOS)
++ * is objset 0, and the meta-dnode is object 0.  This covers all blocks
++ * except root blocks and ZIL blocks, which are defined as follows:
++ *
++ * Root blocks (objset_phys_t) are object 0, level -1:  <objset, 0, -1, 0>.
++ * ZIL blocks are bookmarked <objset, 0, -2, blkid == ZIL sequence number>.
++ * dmu_sync()ed ZIL data blocks are bookmarked <objset, object, -2, blkid>.
++ *
++ * Note: this structure is called a bookmark because its original purpose
++ * was to remember where to resume a pool-wide traverse.
++ *
++ * Note: this structure is passed between userland and the kernel.
++ * Therefore it must not change size or alignment between 32/64 bit
++ * compilation options.
++ */
++typedef struct zbookmark {
++	uint64_t	zb_objset;
++	uint64_t	zb_object;
++	int64_t		zb_level;
++	uint64_t	zb_blkid;
++} zbookmark_t;
++
++#define	SET_BOOKMARK(zb, objset, object, level, blkid)  \
++{                                                       \
++	(zb)->zb_objset = objset;                       \
++	(zb)->zb_object = object;                       \
++	(zb)->zb_level = level;                         \
++	(zb)->zb_blkid = blkid;                         \
++}
++
++#define	ZB_DESTROYED_OBJSET	(-1ULL)
++
++#define	ZB_ROOT_OBJECT		(0ULL)
++#define	ZB_ROOT_LEVEL		(-1LL)
++#define	ZB_ROOT_BLKID		(0ULL)
++
++#define	ZB_ZIL_OBJECT		(0ULL)
++#define	ZB_ZIL_LEVEL		(-2LL)
++
++typedef struct zio_prop {
++	enum zio_checksum	zp_checksum;
++	enum zio_compress	zp_compress;
++	dmu_object_type_t	zp_type;
++	uint8_t			zp_level;
++	uint8_t			zp_copies;
++	uint8_t			zp_dedup;
++	uint8_t			zp_dedup_verify;
++} zio_prop_t;
++
++typedef struct zio_cksum_report zio_cksum_report_t;
++
++typedef void zio_cksum_finish_f(zio_cksum_report_t *rep,
++    const void *good_data);
++typedef void zio_cksum_free_f(void *cbdata, size_t size);
++
++struct zio_bad_cksum;				/* defined in zio_checksum.h */
++
++struct zio_cksum_report {
++	struct zio_cksum_report *zcr_next;
++	nvlist_t		*zcr_ereport;
++	nvlist_t		*zcr_detector;
++	void			*zcr_cbdata;
++	size_t			zcr_cbinfo;	/* passed to zcr_free() */
++	uint64_t		zcr_align;
++	uint64_t		zcr_length;
++	zio_cksum_finish_f	*zcr_finish;
++	zio_cksum_free_f	*zcr_free;
++
++	/* internal use only */
++	struct zio_bad_cksum	*zcr_ckinfo;	/* information from failure */
++};
++
++typedef void zio_vsd_cksum_report_f(zio_t *zio, zio_cksum_report_t *zcr,
++    void *arg);
++
++zio_vsd_cksum_report_f	zio_vsd_default_cksum_report;
++
++typedef struct zio_vsd_ops {
++	zio_done_func_t		*vsd_free;
++	zio_vsd_cksum_report_f	*vsd_cksum_report;
++} zio_vsd_ops_t;
++
++typedef struct zio_gang_node {
++	zio_gbh_phys_t		*gn_gbh;
++	struct zio_gang_node	*gn_child[SPA_GBH_NBLKPTRS];
++} zio_gang_node_t;
++
++typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp,
++    zio_gang_node_t *gn, void *data);
++
++typedef void zio_transform_func_t(zio_t *zio, void *data, uint64_t size);
++
++typedef struct zio_transform {
++	void			*zt_orig_data;
++	uint64_t		zt_orig_size;
++	uint64_t		zt_bufsize;
++	zio_transform_func_t	*zt_transform;
++	struct zio_transform	*zt_next;
++} zio_transform_t;
++
++typedef int zio_pipe_stage_t(zio_t *zio);
++
++/*
++ * The io_reexecute flags are distinct from io_flags because the child must
++ * be able to propagate them to the parent.  The normal io_flags are local
++ * to the zio, not protected by any lock, and not modifiable by children;
++ * the reexecute flags are protected by io_lock, modifiable by children,
++ * and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set.
++ */
++#define	ZIO_REEXECUTE_NOW	0x01
++#define	ZIO_REEXECUTE_SUSPEND	0x02
++
++typedef struct zio_link {
++	zio_t		*zl_parent;
++	zio_t		*zl_child;
++	list_node_t	zl_parent_node;
++	list_node_t	zl_child_node;
++} zio_link_t;
++
++struct zio {
++	/* Core information about this I/O */
++	zbookmark_t	io_bookmark;
++	zio_prop_t	io_prop;
++	zio_type_t	io_type;
++	enum zio_child	io_child_type;
++	int		io_cmd;
++	uint8_t		io_priority;
++	uint8_t		io_reexecute;
++	uint8_t		io_state[ZIO_WAIT_TYPES];
++	uint64_t	io_txg;
++	spa_t		*io_spa;
++	blkptr_t	*io_bp;
++	blkptr_t	*io_bp_override;
++	blkptr_t	io_bp_copy;
++	list_t		io_parent_list;
++	list_t		io_child_list;
++	zio_link_t	*io_walk_link;
++	zio_t		*io_logical;
++	zio_transform_t *io_transform_stack;
++
++	/* Callback info */
++	zio_done_func_t	*io_ready;
++	zio_done_func_t	*io_done;
++	void		*io_private;
++	int64_t		io_prev_space_delta;	/* DMU private */
++	blkptr_t	io_bp_orig;
++
++	/* Data represented by this I/O */
++	void		*io_data;
++	void		*io_orig_data;
++	uint64_t	io_size;
++	uint64_t	io_orig_size;
++
++	/* Stuff for the vdev stack */
++	vdev_t		*io_vd;
++	void		*io_vsd;
++	const zio_vsd_ops_t *io_vsd_ops;
++
++	uint64_t	io_offset;
++	uint64_t	io_deadline;
++	avl_node_t	io_offset_node;
++	avl_node_t	io_deadline_node;
++	avl_tree_t	*io_vdev_tree;
++
++	/* Internal pipeline state */
++	enum zio_flag	io_flags;
++	enum zio_stage	io_stage;
++	enum zio_stage	io_pipeline;
++	enum zio_flag	io_orig_flags;
++	enum zio_stage	io_orig_stage;
++	enum zio_stage	io_orig_pipeline;
++	uint64_t	io_delay;
++	int		io_error;
++	int		io_child_error[ZIO_CHILD_TYPES];
++	uint64_t	io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
++	uint64_t	io_child_count;
++	uint64_t	io_parent_count;
++	uint64_t	*io_stall;
++	zio_t		*io_gang_leader;
++	zio_gang_node_t	*io_gang_tree;
++	void		*io_executor;
++	void		*io_waiter;
++	kmutex_t	io_lock;
++	kcondvar_t	io_cv;
++
++	/* FMA state */
++	zio_cksum_report_t *io_cksum_report;
++	uint64_t	io_ena;
++
++	/* Taskq dispatching state */
++	taskq_ent_t	io_tqent;
++};
++
++extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
++    zio_done_func_t *done, void *private, enum zio_flag flags);
++
++extern zio_t *zio_root(spa_t *spa,
++    zio_done_func_t *done, void *private, enum zio_flag flags);
++
++extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
++    uint64_t size, zio_done_func_t *done, void *private,
++    int priority, enum zio_flag flags, const zbookmark_t *zb);
++
++extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
++    void *data, uint64_t size, const zio_prop_t *zp,
++    zio_done_func_t *ready, zio_done_func_t *done, void *private,
++    int priority, enum zio_flag flags, const zbookmark_t *zb);
++
++extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
++    void *data, uint64_t size, zio_done_func_t *done, void *private,
++    int priority, enum zio_flag flags, zbookmark_t *zb);
++
++extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies);
++
++extern void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp);
++
++extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg,
++    const blkptr_t *bp,
++    zio_done_func_t *done, void *private, enum zio_flag flags);
++
++extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
++    zio_done_func_t *done, void *private, int priority, enum zio_flag flags);
++
++extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
++    uint64_t size, void *data, int checksum,
++    zio_done_func_t *done, void *private, int priority, enum zio_flag flags,
++    boolean_t labels);
++
++extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
++    uint64_t size, void *data, int checksum,
++    zio_done_func_t *done, void *private, int priority, enum zio_flag flags,
++    boolean_t labels);
++
++extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
++    const blkptr_t *bp, enum zio_flag flags);
++
++extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
++    uint64_t size, boolean_t use_slog);
++extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
++extern void zio_flush(zio_t *zio, vdev_t *vd);
++extern void zio_shrink(zio_t *zio, uint64_t size);
++
++extern int zio_wait(zio_t *zio);
++extern void zio_nowait(zio_t *zio);
++extern void zio_execute(zio_t *zio);
++extern void zio_interrupt(zio_t *zio);
++
++extern zio_t *zio_walk_parents(zio_t *cio);
++extern zio_t *zio_walk_children(zio_t *pio);
++extern zio_t *zio_unique_parent(zio_t *cio);
++extern void zio_add_child(zio_t *pio, zio_t *cio);
++
++extern void *zio_buf_alloc(size_t size);
++extern void zio_buf_free(void *buf, size_t size);
++extern void *zio_data_buf_alloc(size_t size);
++extern void zio_data_buf_free(void *buf, size_t size);
++extern void *zio_vdev_alloc(void);
++extern void zio_vdev_free(void *buf);
++
++extern void zio_resubmit_stage_async(void *);
++
++extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
++    uint64_t offset, void *data, uint64_t size, int type, int priority,
++    enum zio_flag flags, zio_done_func_t *done, void *private);
++
++extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
++    void *data, uint64_t size, int type, int priority,
++    enum zio_flag flags, zio_done_func_t *done, void *private);
++
++extern void zio_vdev_io_bypass(zio_t *zio);
++extern void zio_vdev_io_reissue(zio_t *zio);
++extern void zio_vdev_io_redone(zio_t *zio);
++
++extern void zio_checksum_verified(zio_t *zio);
++extern int zio_worst_error(int e1, int e2);
++
++extern enum zio_checksum zio_checksum_select(enum zio_checksum child,
++    enum zio_checksum parent);
++extern enum zio_checksum zio_checksum_dedup_select(spa_t *spa,
++    enum zio_checksum child, enum zio_checksum parent);
++extern enum zio_compress zio_compress_select(enum zio_compress child,
++    enum zio_compress parent);
++
++extern void zio_suspend(spa_t *spa, zio_t *zio);
++extern int zio_resume(spa_t *spa);
++extern void zio_resume_wait(spa_t *spa);
++
++/*
++ * Initial setup and teardown.
++ */
++extern void zio_init(void);
++extern void zio_fini(void);
++
++/*
++ * Fault injection
++ */
++struct zinject_record;
++extern uint32_t zio_injection_enabled;
++extern int zio_inject_fault(char *name, int flags, int *id,
++    struct zinject_record *record);
++extern int zio_inject_list_next(int *id, char *name, size_t buflen,
++    struct zinject_record *record);
++extern int zio_clear_fault(int id);
++extern void zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type);
++extern int zio_handle_fault_injection(zio_t *zio, int error);
++extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
++extern int zio_handle_label_injection(zio_t *zio, int error);
++extern void zio_handle_ignored_writes(zio_t *zio);
++
++/*
++ * Checksum ereport functions
++ */
++extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio,
++    uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info);
++extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
++    const void *good_data, const void *bad_data, boolean_t drop_if_identical);
++
++extern void zfs_ereport_send_interim_checksum(zio_cksum_report_t *report);
++extern void zfs_ereport_free_checksum(zio_cksum_report_t *report);
++
++/* If we have the good data in hand, this function can be used */
++extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
++    struct zio *zio, uint64_t offset, uint64_t length,
++    const void *good_data, const void *bad_data, struct zio_bad_cksum *info);
++
++/* Called from spa_sync(), but primarily an injection handler */
++extern void spa_handle_ignored_writes(spa_t *spa);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _ZIO_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zio_impl.h linux-3.2.33-go/include/zfs/sys/zio_impl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zio_impl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zio_impl.h	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,175 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef _ZIO_IMPL_H
++#define	_ZIO_IMPL_H
++
++#include <sys/zfs_context.h>
++#include <sys/zio.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * zio pipeline stage definitions
++ */
++enum zio_stage {
++	ZIO_STAGE_OPEN			= 1 << 0,	/* RWFCI */
++
++	ZIO_STAGE_READ_BP_INIT		= 1 << 1,	/* R---- */
++	ZIO_STAGE_FREE_BP_INIT		= 1 << 2,	/* --F-- */
++	ZIO_STAGE_ISSUE_ASYNC		= 1 << 3,	/* RWF-- */
++	ZIO_STAGE_WRITE_BP_INIT		= 1 << 4,	/* -W--- */
++
++	ZIO_STAGE_CHECKSUM_GENERATE	= 1 << 5,	/* -W--- */
++
++	ZIO_STAGE_DDT_READ_START	= 1 << 6,	/* R---- */
++	ZIO_STAGE_DDT_READ_DONE		= 1 << 7,	/* R---- */
++	ZIO_STAGE_DDT_WRITE		= 1 << 8,	/* -W--- */
++	ZIO_STAGE_DDT_FREE		= 1 << 9,	/* --F-- */
++
++	ZIO_STAGE_GANG_ASSEMBLE		= 1 << 10,	/* RWFC- */
++	ZIO_STAGE_GANG_ISSUE		= 1 << 11,	/* RWFC- */
++
++	ZIO_STAGE_DVA_ALLOCATE		= 1 << 12,	/* -W--- */
++	ZIO_STAGE_DVA_FREE		= 1 << 13,	/* --F-- */
++	ZIO_STAGE_DVA_CLAIM		= 1 << 14,	/* ---C- */
++
++	ZIO_STAGE_READY			= 1 << 15,	/* RWFCI */
++
++	ZIO_STAGE_VDEV_IO_START		= 1 << 16,	/* RW--I */
++	ZIO_STAGE_VDEV_IO_DONE		= 1 << 17,	/* RW--I */
++	ZIO_STAGE_VDEV_IO_ASSESS	= 1 << 18,	/* RW--I */
++
++	ZIO_STAGE_CHECKSUM_VERIFY	= 1 << 19,	/* R---- */
++
++	ZIO_STAGE_DONE			= 1 << 20	/* RWFCI */
++};
++
++#define	ZIO_INTERLOCK_STAGES			\
++	(ZIO_STAGE_READY |			\
++	ZIO_STAGE_DONE)
++
++#define	ZIO_INTERLOCK_PIPELINE			\
++	ZIO_INTERLOCK_STAGES
++
++#define	ZIO_VDEV_IO_STAGES			\
++	(ZIO_STAGE_VDEV_IO_START |		\
++	ZIO_STAGE_VDEV_IO_DONE |		\
++	ZIO_STAGE_VDEV_IO_ASSESS)
++
++#define	ZIO_VDEV_CHILD_PIPELINE			\
++	(ZIO_VDEV_IO_STAGES |			\
++	ZIO_STAGE_DONE)
++
++#define	ZIO_READ_COMMON_STAGES			\
++	(ZIO_INTERLOCK_STAGES |			\
++	ZIO_VDEV_IO_STAGES |			\
++	ZIO_STAGE_CHECKSUM_VERIFY)
++
++#define	ZIO_READ_PHYS_PIPELINE			\
++	ZIO_READ_COMMON_STAGES
++
++#define	ZIO_READ_PIPELINE			\
++	(ZIO_READ_COMMON_STAGES |		\
++	ZIO_STAGE_READ_BP_INIT)
++
++#define	ZIO_DDT_CHILD_READ_PIPELINE		\
++	ZIO_READ_COMMON_STAGES
++
++#define	ZIO_DDT_READ_PIPELINE			\
++	(ZIO_INTERLOCK_STAGES |			\
++	ZIO_STAGE_READ_BP_INIT |		\
++	ZIO_STAGE_DDT_READ_START |		\
++	ZIO_STAGE_DDT_READ_DONE)
++
++#define	ZIO_WRITE_COMMON_STAGES			\
++	(ZIO_INTERLOCK_STAGES |			\
++	ZIO_VDEV_IO_STAGES |			\
++	ZIO_STAGE_ISSUE_ASYNC |			\
++	ZIO_STAGE_CHECKSUM_GENERATE)
++
++#define	ZIO_WRITE_PHYS_PIPELINE			\
++	ZIO_WRITE_COMMON_STAGES
++
++#define	ZIO_REWRITE_PIPELINE			\
++	(ZIO_WRITE_COMMON_STAGES |		\
++	ZIO_STAGE_WRITE_BP_INIT)
++
++#define	ZIO_WRITE_PIPELINE			\
++	(ZIO_WRITE_COMMON_STAGES |		\
++	ZIO_STAGE_WRITE_BP_INIT |		\
++	ZIO_STAGE_DVA_ALLOCATE)
++
++#define	ZIO_DDT_CHILD_WRITE_PIPELINE		\
++	(ZIO_INTERLOCK_STAGES |			\
++	ZIO_VDEV_IO_STAGES |			\
++	ZIO_STAGE_DVA_ALLOCATE)
++
++#define	ZIO_DDT_WRITE_PIPELINE			\
++	(ZIO_INTERLOCK_STAGES |			\
++	ZIO_STAGE_ISSUE_ASYNC |			\
++	ZIO_STAGE_WRITE_BP_INIT |		\
++	ZIO_STAGE_CHECKSUM_GENERATE |		\
++	ZIO_STAGE_DDT_WRITE)
++
++#define	ZIO_GANG_STAGES				\
++	(ZIO_STAGE_GANG_ASSEMBLE |		\
++	ZIO_STAGE_GANG_ISSUE)
++
++#define	ZIO_FREE_PIPELINE			\
++	(ZIO_INTERLOCK_STAGES |			\
++	ZIO_STAGE_FREE_BP_INIT |		\
++	ZIO_STAGE_DVA_FREE)
++
++#define	ZIO_DDT_FREE_PIPELINE			\
++	(ZIO_INTERLOCK_STAGES |			\
++	ZIO_STAGE_FREE_BP_INIT |		\
++	ZIO_STAGE_ISSUE_ASYNC |			\
++	ZIO_STAGE_DDT_FREE)
++
++#define	ZIO_CLAIM_PIPELINE			\
++	(ZIO_INTERLOCK_STAGES |			\
++	ZIO_STAGE_DVA_CLAIM)
++
++#define	ZIO_IOCTL_PIPELINE			\
++	(ZIO_INTERLOCK_STAGES |			\
++	ZIO_STAGE_VDEV_IO_START |		\
++	ZIO_STAGE_VDEV_IO_ASSESS)
++
++#define	ZIO_BLOCKING_STAGES			\
++	(ZIO_STAGE_DVA_ALLOCATE |		\
++	ZIO_STAGE_DVA_CLAIM |			\
++	ZIO_STAGE_VDEV_IO_START)
++
++extern void zio_inject_init(void);
++extern void zio_inject_fini(void);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _ZIO_IMPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zpl.h linux-3.2.33-go/include/zfs/sys/zpl.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zpl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zpl.h	2012-11-16 23:25:34.337039473 +0100
+@@ -0,0 +1,90 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
++ */
++
++#ifndef	_SYS_ZPL_H
++#define	_SYS_ZPL_H
++
++#include <sys/vfs.h>
++#include <linux/vfs_compat.h>
++#include <linux/xattr_compat.h>
++#include <linux/exportfs.h>
++#include <linux/writeback.h>
++#include <linux/falloc.h>
++
++/* zpl_inode.c */
++extern void zpl_vap_init(vattr_t *vap, struct inode *dir,
++    struct dentry *dentry, zpl_umode_t mode, cred_t *cr);
++
++extern const struct inode_operations zpl_inode_operations;
++extern const struct inode_operations zpl_dir_inode_operations;
++extern const struct inode_operations zpl_symlink_inode_operations;
++extern const struct inode_operations zpl_special_inode_operations;
++
++/* zpl_file.c */
++extern ssize_t zpl_read_common(struct inode *ip, const char *buf,
++    size_t len, loff_t pos, uio_seg_t segment, int flags, cred_t *cr);
++extern ssize_t zpl_write_common(struct inode *ip, const char *buf,
++    size_t len, loff_t pos, uio_seg_t segment, int flags, cred_t *cr);
++extern long zpl_fallocate_common(struct inode *ip, int mode,
++    loff_t offset, loff_t len);
++
++extern const struct address_space_operations zpl_address_space_operations;
++extern const struct file_operations zpl_file_operations;
++extern const struct file_operations zpl_dir_file_operations;
++
++/* zpl_super.c */
++extern void zpl_prune_sbs(int64_t bytes_to_scan, void *private);
++
++typedef struct zpl_mount_data {
++	const char *z_osname;	/* Dataset name */
++	void *z_data;		/* Mount options string */
++} zpl_mount_data_t;
++
++extern const struct super_operations zpl_super_operations;
++extern const struct export_operations zpl_export_operations;
++extern struct file_system_type zpl_fs_type;
++
++/* zpl_xattr.c */
++extern ssize_t zpl_xattr_list(struct dentry *dentry, char *buf, size_t size);
++extern int zpl_xattr_security_init(struct inode *ip, struct inode *dip,
++    const struct qstr *qstr);
++
++extern xattr_handler_t *zpl_xattr_handlers[];
++
++/* zpl_ctldir.c */
++extern const struct file_operations zpl_fops_root;
++extern const struct inode_operations zpl_ops_root;
++
++extern const struct file_operations zpl_fops_snapdir;
++extern const struct inode_operations zpl_ops_snapdir;
++#ifdef HAVE_AUTOMOUNT
++extern const struct dentry_operations zpl_dops_snapdirs;
++#else
++extern const struct inode_operations zpl_ops_snapdirs;
++#endif /* HAVE_AUTOMOUNT */
++
++extern const struct file_operations zpl_fops_shares;
++extern const struct inode_operations zpl_ops_shares;
++
++#endif	/* _SYS_ZPL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zrlock.h linux-3.2.33-go/include/zfs/sys/zrlock.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zrlock.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zrlock.h	2012-11-16 23:25:34.342039415 +0100
+@@ -0,0 +1,66 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_ZRLOCK_H
++#define	_SYS_ZRLOCK_H
++
++#include <sys/zfs_context.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++typedef struct zrlock {
++	kmutex_t zr_mtx;
++	volatile int32_t zr_refcount;
++	kcondvar_t zr_cv;
++	uint16_t zr_pad;
++#ifdef	ZFS_DEBUG
++	kthread_t *zr_owner;
++	const char *zr_caller;
++#endif
++} zrlock_t;
++
++extern void zrl_init(zrlock_t *);
++extern void zrl_destroy(zrlock_t *);
++#ifdef	ZFS_DEBUG
++#define	zrl_add(_z)	zrl_add_debug((_z), __func__)
++extern void zrl_add_debug(zrlock_t *, const char *);
++#else
++extern void zrl_add(zrlock_t *);
++#endif
++extern void zrl_remove(zrlock_t *);
++extern int zrl_tryenter(zrlock_t *);
++extern void zrl_exit(zrlock_t *);
++extern int zrl_is_zero(zrlock_t *);
++extern int zrl_is_locked(zrlock_t *);
++#ifdef	ZFS_DEBUG
++extern kthread_t *zrl_owner(zrlock_t *);
++#endif
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif /* _SYS_ZRLOCK_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/sys/zvol.h linux-3.2.33-go/include/zfs/sys/zvol.h
+--- linux-3.2.33-go.orig/include/zfs/sys/zvol.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/sys/zvol.h	2012-11-16 23:25:34.339039449 +0100
+@@ -0,0 +1,51 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_SYS_ZVOL_H
++#define	_SYS_ZVOL_H
++
++#include <sys/zfs_context.h>
++
++#define	ZVOL_OBJ		1ULL
++#define	ZVOL_ZAP_OBJ		2ULL
++
++#ifdef _KERNEL
++
++extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
++extern int zvol_check_volblocksize(uint64_t volblocksize);
++extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
++extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
++extern int zvol_create_minor(const char *);
++extern int zvol_create_minors(const char *);
++extern int zvol_remove_minor(const char *);
++extern void zvol_remove_minors(const char *);
++extern int zvol_set_volsize(const char *, uint64_t);
++extern int zvol_set_volblocksize(const char *, uint64_t);
++
++extern int zvol_init(void);
++extern void zvol_fini(void);
++
++#endif /* _KERNEL */
++#endif /* _SYS_ZVOL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/zfs_comutil.h linux-3.2.33-go/include/zfs/zfs_comutil.h
+--- linux-3.2.33-go.orig/include/zfs/zfs_comutil.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/zfs_comutil.h	2012-11-16 23:25:34.344039393 +0100
+@@ -0,0 +1,46 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
++ */
++
++#ifndef	_ZFS_COMUTIL_H
++#define	_ZFS_COMUTIL_H
++
++#include <sys/fs/zfs.h>
++#include <sys/types.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++extern boolean_t zfs_allocatable_devs(nvlist_t *);
++extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *);
++
++extern int zfs_zpl_version_map(int spa_version);
++extern int zfs_spa_version_map(int zpl_version);
++extern const char *zfs_history_event_names[LOG_END];
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _ZFS_COMUTIL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/zfs_deleg.h linux-3.2.33-go/include/zfs/zfs_deleg.h
+--- linux-3.2.33-go.orig/include/zfs/zfs_deleg.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/zfs_deleg.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,87 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
++ */
++
++#ifndef	_ZFS_DELEG_H
++#define	_ZFS_DELEG_H
++
++#include <sys/fs/zfs.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++#define	ZFS_DELEG_SET_NAME_CHR		'@'		/* set name lead char */
++#define	ZFS_DELEG_FIELD_SEP_CHR		'$'		/* field separator */
++
++/*
++ * Max name length for a delegation attribute
++ */
++#define	ZFS_MAX_DELEG_NAME	128
++
++#define	ZFS_DELEG_LOCAL		'l'
++#define	ZFS_DELEG_DESCENDENT	'd'
++#define	ZFS_DELEG_NA		'-'
++
++typedef enum {
++	ZFS_DELEG_NOTE_CREATE,
++	ZFS_DELEG_NOTE_DESTROY,
++	ZFS_DELEG_NOTE_SNAPSHOT,
++	ZFS_DELEG_NOTE_ROLLBACK,
++	ZFS_DELEG_NOTE_CLONE,
++	ZFS_DELEG_NOTE_PROMOTE,
++	ZFS_DELEG_NOTE_RENAME,
++	ZFS_DELEG_NOTE_SEND,
++	ZFS_DELEG_NOTE_RECEIVE,
++	ZFS_DELEG_NOTE_ALLOW,
++	ZFS_DELEG_NOTE_USERPROP,
++	ZFS_DELEG_NOTE_MOUNT,
++	ZFS_DELEG_NOTE_SHARE,
++	ZFS_DELEG_NOTE_USERQUOTA,
++	ZFS_DELEG_NOTE_GROUPQUOTA,
++	ZFS_DELEG_NOTE_USERUSED,
++	ZFS_DELEG_NOTE_GROUPUSED,
++	ZFS_DELEG_NOTE_HOLD,
++	ZFS_DELEG_NOTE_RELEASE,
++	ZFS_DELEG_NOTE_DIFF,
++	ZFS_DELEG_NOTE_NONE
++} zfs_deleg_note_t;
++
++typedef struct zfs_deleg_perm_tab {
++	char *z_perm;
++	zfs_deleg_note_t z_note;
++} zfs_deleg_perm_tab_t;
++
++extern zfs_deleg_perm_tab_t zfs_deleg_perm_tab[];
++
++int zfs_deleg_verify_nvlist(nvlist_t *nvlist);
++void zfs_deleg_whokey(char *attr, zfs_deleg_who_type_t type,
++    char checkflag, void *data);
++const char *zfs_deleg_canonicalize_perm(const char *perm);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _ZFS_DELEG_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/zfs_fletcher.h linux-3.2.33-go/include/zfs/zfs_fletcher.h
+--- linux-3.2.33-go.orig/include/zfs/zfs_fletcher.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/zfs_fletcher.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,53 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_ZFS_FLETCHER_H
++#define	_ZFS_FLETCHER_H
++
++#include <sys/types.h>
++#include <sys/spa.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * fletcher checksum functions
++ */
++
++void fletcher_2_native(const void *, uint64_t, zio_cksum_t *);
++void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *);
++void fletcher_4_native(const void *, uint64_t, zio_cksum_t *);
++void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *);
++void fletcher_4_incremental_native(const void *, uint64_t,
++    zio_cksum_t *);
++void fletcher_4_incremental_byteswap(const void *, uint64_t,
++    zio_cksum_t *);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _ZFS_FLETCHER_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/zfs_namecheck.h linux-3.2.33-go/include/zfs/zfs_namecheck.h
+--- linux-3.2.33-go.orig/include/zfs/zfs_namecheck.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/zfs_namecheck.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,58 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_ZFS_NAMECHECK_H
++#define	_ZFS_NAMECHECK_H
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++typedef enum {
++	NAME_ERR_LEADING_SLASH,		/* name begins with leading slash */
++	NAME_ERR_EMPTY_COMPONENT,	/* name contains an empty component */
++	NAME_ERR_TRAILING_SLASH,	/* name ends with a slash */
++	NAME_ERR_INVALCHAR,		/* invalid character found */
++	NAME_ERR_MULTIPLE_AT,		/* multiple '@' characters found */
++	NAME_ERR_NOLETTER,		/* pool doesn't begin with a letter */
++	NAME_ERR_RESERVED,		/* entire name is reserved */
++	NAME_ERR_DISKLIKE,		/* reserved disk name (c[0-9].*) */
++	NAME_ERR_TOOLONG,		/* name is too long */
++	NAME_ERR_NO_AT,			/* permission set is missing '@' */
++} namecheck_err_t;
++
++#define	ZFS_PERMSET_MAXLEN	64
++
++int pool_namecheck(const char *, namecheck_err_t *, char *);
++int dataset_namecheck(const char *, namecheck_err_t *, char *);
++int mountpoint_namecheck(const char *, namecheck_err_t *);
++int snapshot_namecheck(const char *, namecheck_err_t *, char *);
++int permset_namecheck(const char *, namecheck_err_t *, char *);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _ZFS_NAMECHECK_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/zfs_prop.h linux-3.2.33-go/include/zfs/zfs_prop.h
+--- linux-3.2.33-go.orig/include/zfs/zfs_prop.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/zfs_prop.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,129 @@
++/*
++ * CDDL HEADER START
++ *
++ * The contents of this file are subject to the terms of the
++ * Common Development and Distribution License (the "License").
++ * You may not use this file except in compliance with the License.
++ *
++ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++ * or http://www.opensolaris.org/os/licensing.
++ * See the License for the specific language governing permissions
++ * and limitations under the License.
++ *
++ * When distributing Covered Code, include this CDDL HEADER in each
++ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++ * If applicable, add the following below this CDDL HEADER, with the
++ * fields enclosed by brackets "[]" replaced with your own identifying
++ * information: Portions Copyright [yyyy] [name of copyright owner]
++ *
++ * CDDL HEADER END
++ */
++/*
++ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
++ * Use is subject to license terms.
++ */
++
++#ifndef	_ZFS_PROP_H
++#define	_ZFS_PROP_H
++
++#include <sys/fs/zfs.h>
++#include <sys/types.h>
++
++#ifdef	__cplusplus
++extern "C" {
++#endif
++
++/*
++ * For index types (e.g. compression and checksum), we want the numeric value
++ * in the kernel, but the string value in userland.
++ */
++typedef enum {
++	PROP_TYPE_NUMBER,	/* numeric value */
++	PROP_TYPE_STRING,	/* string value */
++	PROP_TYPE_INDEX		/* numeric value indexed by string */
++} zprop_type_t;
++
++typedef enum {
++	PROP_DEFAULT,
++	PROP_READONLY,
++	PROP_INHERIT,
++	/*
++	 * ONETIME properties are a sort of conglomeration of READONLY
++	 * and INHERIT.  They can be set only during object creation,
++	 * after that they are READONLY.  If not explicitly set during
++	 * creation, they can be inherited.
++	 */
++	PROP_ONETIME
++} zprop_attr_t;
++
++typedef struct zfs_index {
++	const char *pi_name;
++	uint64_t pi_value;
++} zprop_index_t;
++
++typedef struct {
++	const char *pd_name;		/* human-readable property name */
++	int pd_propnum;			/* property number */
++	zprop_type_t pd_proptype;	/* string, boolean, index, number */
++	const char *pd_strdefault;	/* default for strings */
++	uint64_t pd_numdefault;		/* for boolean / index / number */
++	zprop_attr_t pd_attr;		/* default, readonly, inherit */
++	int pd_types;			/* bitfield of valid dataset types */
++					/* fs | vol | snap; or pool */
++	const char *pd_values;		/* string telling acceptable values */
++	const char *pd_colname;		/* column header for "zfs list" */
++	boolean_t pd_rightalign;	/* column alignment for "zfs list" */
++	boolean_t pd_visible;		/* do we list this property with the */
++					/* "zfs get" help message */
++	const zprop_index_t *pd_table;	/* for index properties, a table */
++					/* defining the possible values */
++	size_t pd_table_size;		/* number of entries in pd_table[] */
++} zprop_desc_t;
++
++/*
++ * zfs dataset property functions
++ */
++void zfs_prop_init(void);
++zprop_type_t zfs_prop_get_type(zfs_prop_t);
++boolean_t zfs_prop_delegatable(zfs_prop_t prop);
++zprop_desc_t *zfs_prop_get_table(void);
++
++/*
++ * zpool property functions
++ */
++void zpool_prop_init(void);
++zprop_type_t zpool_prop_get_type(zpool_prop_t);
++zprop_desc_t *zpool_prop_get_table(void);
++
++/*
++ * Common routines to initialize property tables
++ */
++void zprop_register_impl(int, const char *, zprop_type_t, uint64_t,
++    const char *, zprop_attr_t, int, const char *, const char *,
++    boolean_t, boolean_t, const zprop_index_t *);
++void zprop_register_string(int, const char *, const char *,
++    zprop_attr_t attr, int, const char *, const char *);
++void zprop_register_number(int, const char *, uint64_t, zprop_attr_t, int,
++    const char *, const char *);
++void zprop_register_index(int, const char *, uint64_t, zprop_attr_t, int,
++    const char *, const char *, const zprop_index_t *);
++void zprop_register_hidden(int, const char *, zprop_type_t, zprop_attr_t,
++    int, const char *);
++
++/*
++ * Common routines for zfs and zpool property management
++ */
++int zprop_iter_common(zprop_func, void *, boolean_t, boolean_t, zfs_type_t);
++int zprop_name_to_prop(const char *, zfs_type_t);
++int zprop_string_to_index(int, const char *, uint64_t *, zfs_type_t);
++int zprop_index_to_string(int, uint64_t, const char **, zfs_type_t);
++uint64_t zprop_random_value(int, uint64_t, zfs_type_t);
++const char *zprop_values(int, zfs_type_t);
++size_t zprop_width(int, boolean_t *, zfs_type_t);
++boolean_t zprop_valid_for_type(int, zfs_type_t);
++
++#ifdef	__cplusplus
++}
++#endif
++
++#endif	/* _ZFS_PROP_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/zpios-ctl.h linux-3.2.33-go/include/zfs/zpios-ctl.h
+--- linux-3.2.33-go.orig/include/zfs/zpios-ctl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/zpios-ctl.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,198 @@
++/*****************************************************************************\
++ *  ZPIOS is a heavily modified version of the original PIOS test code.
++ *  It is designed to have the test code running in the Linux kernel
++ *  against ZFS while still being flexibly controled from user space.
++ *
++ *  Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  LLNL-CODE-403049
++ *
++ *  Original PIOS Test Code
++ *  Copyright (C) 2004 Cluster File Systems, Inc.
++ *  Written by Peter Braam <braam@clusterfs.com>
++ *             Atul Vidwansa <atul@clusterfs.com>
++ *             Milind Dumbare <milind@clusterfs.com>
++ *
++ *  This file is part of ZFS on Linux.
++ *  For details, see <http://github.com/behlendorf/zfs/>.
++ *
++ *  ZPIOS is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  ZPIOS is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with ZPIOS.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _ZPIOS_CTL_H
++#define _ZPIOS_CTL_H
++
++/* Contains shared definitions which both the userspace
++ * and kernelspace portions of zpios must agree on.
++ */
++#ifndef _KERNEL
++#include <stdint.h>
++#endif
++
++#define ZPIOS_MAJOR			232 /* XXX - Arbitrary */
++#define ZPIOS_MINORS			1
++#define ZPIOS_NAME			"zpios"
++#define ZPIOS_DEV			"/dev/zpios"
++
++#define DMU_IO				0x01
++
++#define DMU_WRITE			0x0001
++#define DMU_READ			0x0002
++#define DMU_VERIFY			0x0004
++#define DMU_REMOVE			0x0008
++#define DMU_FPP				0x0010
++#define DMU_WRITE_ZC			0x0020 /* Incompatible w/DMU_VERIFY */
++#define DMU_READ_ZC			0x0040 /* Incompatible w/DMU_VERIFY */
++#define DMU_WRITE_NOWAIT		0x0080
++#define DMU_READ_NOPF			0x0100
++
++#define ZPIOS_NAME_SIZE			16
++#define ZPIOS_PATH_SIZE			128
++
++#define PHASE_PRE_RUN			"pre-run"
++#define PHASE_PRE_CREATE		"pre-create"
++#define PHASE_PRE_WRITE			"pre-write"
++#define PHASE_PRE_READ			"pre-read"
++#define PHASE_PRE_REMOVE		"pre-remove"
++#define PHASE_POST_RUN			"post-run"
++#define PHASE_POST_CREATE		"post-create"
++#define PHASE_POST_WRITE		"post-write"
++#define PHASE_POST_READ			"post-read"
++#define PHASE_POST_REMOVE		"post-remove"
++
++#define	ZPIOS_CFG_MAGIC			0x87237190U
++typedef struct zpios_cfg {
++	uint32_t cfg_magic;		/* Unique magic */
++	int32_t cfg_cmd;		/* Config command */
++	int32_t cfg_arg1;		/* Config command arg 1 */
++	int32_t cfg_rc1;		/* Config response 1 */
++} zpios_cfg_t;
++
++typedef struct zpios_timespec {
++	uint32_t ts_sec;
++	uint32_t ts_nsec;
++} zpios_timespec_t;
++
++typedef struct zpios_time {
++	zpios_timespec_t start;
++	zpios_timespec_t stop;
++	zpios_timespec_t delta;
++} zpios_time_t;
++
++typedef struct zpios_stats {
++	zpios_time_t total_time;
++	zpios_time_t cr_time;
++	zpios_time_t rm_time;
++	zpios_time_t wr_time;
++	zpios_time_t rd_time;
++	uint64_t wr_data;
++	uint64_t wr_chunks;
++	uint64_t rd_data;
++	uint64_t rd_chunks;
++} zpios_stats_t;
++
++#define	ZPIOS_CMD_MAGIC			0x49715385U
++typedef struct zpios_cmd {
++	uint32_t cmd_magic;		/* Unique magic */
++	uint32_t cmd_id;		/* Run ID */
++	char cmd_pool[ZPIOS_NAME_SIZE];	/* Pool name */
++	uint64_t cmd_chunk_size;	/* Chunk size */
++	uint32_t cmd_thread_count;	/* Thread count */
++	uint32_t cmd_region_count;	/* Region count */
++	uint64_t cmd_region_size;	/* Region size */
++	uint64_t cmd_offset;		/* Region offset */
++	uint32_t cmd_region_noise;	/* Region noise */
++	uint32_t cmd_chunk_noise;	/* Chunk noise */
++	uint32_t cmd_thread_delay;	/* Thread delay */
++	uint32_t cmd_flags;		/* Test flags */
++        char cmd_pre[ZPIOS_PATH_SIZE];	/* Pre-exec hook */
++        char cmd_post[ZPIOS_PATH_SIZE];	/* Post-exec hook */
++	char cmd_log[ZPIOS_PATH_SIZE];  /* Requested log dir */
++	uint64_t cmd_data_size;		/* Opaque data size */
++	char cmd_data_str[0];		/* Opaque data region */
++} zpios_cmd_t;
++
++/* Valid ioctls */
++#define ZPIOS_CFG			_IOWR('f', 101, zpios_cfg_t)
++#define ZPIOS_CMD			_IOWR('f', 102, zpios_cmd_t)
++
++/* Valid configuration commands */
++#define ZPIOS_CFG_BUFFER_CLEAR		0x001	/* Clear text buffer */
++#define ZPIOS_CFG_BUFFER_SIZE		0x002	/* Resize text buffer */
++
++#ifndef NSEC_PER_SEC
++#define NSEC_PER_SEC    1000000000L
++#endif
++
++static inline
++void zpios_timespec_normalize(zpios_timespec_t *ts, uint32_t sec, uint32_t nsec)
++{
++	while (nsec >= NSEC_PER_SEC) {
++		nsec -= NSEC_PER_SEC;
++		sec++;
++	}
++	while (nsec < 0) {
++		nsec += NSEC_PER_SEC;
++		sec--;
++	}
++	ts->ts_sec = sec;
++	ts->ts_nsec = nsec;
++}
++
++static inline
++zpios_timespec_t zpios_timespec_add(zpios_timespec_t lhs, zpios_timespec_t rhs)
++{
++	zpios_timespec_t ts_delta;
++	zpios_timespec_normalize(&ts_delta, lhs.ts_sec + rhs.ts_sec,
++	                         lhs.ts_nsec + rhs.ts_nsec);
++        return ts_delta;
++}
++
++static inline
++zpios_timespec_t zpios_timespec_sub(zpios_timespec_t lhs, zpios_timespec_t rhs)
++{
++	zpios_timespec_t ts_delta;
++	zpios_timespec_normalize(&ts_delta, lhs.ts_sec - rhs.ts_sec,
++	                         lhs.ts_nsec - rhs.ts_nsec);
++	return ts_delta;
++}
++
++#ifdef _KERNEL
++
++static inline
++zpios_timespec_t zpios_timespec_now(void)
++{
++	zpios_timespec_t zts_now;
++	struct timespec ts_now;
++
++	ts_now = current_kernel_time();
++	zts_now.ts_sec  = ts_now.tv_sec;
++	zts_now.ts_nsec = ts_now.tv_nsec;
++
++	return zts_now;
++}
++
++#else
++
++static inline
++double zpios_timespec_to_double(zpios_timespec_t ts)
++{
++	return ((double)(ts.ts_sec) +
++	       ((double)(ts.ts_nsec) / (double)(NSEC_PER_SEC)));
++}
++
++#endif /* _KERNEL */
++
++#endif /* _ZPIOS_CTL_H */
+diff -uNr linux-3.2.33-go.orig/include/zfs/zpios-internal.h linux-3.2.33-go/include/zfs/zpios-internal.h
+--- linux-3.2.33-go.orig/include/zfs/zpios-internal.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/include/zfs/zpios-internal.h	2012-11-16 23:25:34.336039485 +0100
+@@ -0,0 +1,138 @@
++/*****************************************************************************\
++ *  ZPIOS is a heavily modified version of the original PIOS test code.
++ *  It is designed to have the test code running in the Linux kernel
++ *  against ZFS while still being flexibly controled from user space.
++ *
++ *  Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  LLNL-CODE-403049
++ *
++ *  Original PIOS Test Code
++ *  Copyright (C) 2004 Cluster File Systems, Inc.
++ *  Written by Peter Braam <braam@clusterfs.com>
++ *             Atul Vidwansa <atul@clusterfs.com>
++ *             Milind Dumbare <milind@clusterfs.com>
++ *
++ *  This file is part of ZFS on Linux.
++ *  For details, see <http://github.com/behlendorf/zfs/>.
++ *
++ *  ZPIOS is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  ZPIOS is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with ZPIOS.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _ZPIOS_INTERNAL_H
++#define _ZPIOS_INTERNAL_H
++
++#include "zpios-ctl.h"
++
++#define OBJ_SIZE	64
++
++struct run_args;
++
++typedef struct dmu_obj {
++	objset_t *os;
++	uint64_t obj;
++} dmu_obj_t;
++
++/* thread doing the IO data */
++typedef struct thread_data {
++	struct run_args *run_args;
++	int thread_no;
++	int rc;
++	zpios_stats_t stats;
++        kmutex_t lock;
++} thread_data_t;
++
++/* region for IO data */
++typedef struct zpios_region {
++	__u64 wr_offset;
++	__u64 rd_offset;
++	__u64 init_offset;
++	__u64 max_offset;
++	dmu_obj_t obj;
++	zpios_stats_t stats;
++        kmutex_t lock;
++} zpios_region_t;
++
++/* arguments for one run */
++typedef struct run_args {
++	/* Config args */
++	int id;
++	char pool[ZPIOS_NAME_SIZE];
++	__u64 chunk_size;
++	__u32 thread_count;
++	__u32 region_count;
++	__u64 region_size;
++	__u64 offset;
++	__u32 region_noise;
++	__u32 chunk_noise;
++	__u32 thread_delay;
++	__u32 flags;
++	char pre[ZPIOS_PATH_SIZE];
++	char post[ZPIOS_PATH_SIZE];
++	char log[ZPIOS_PATH_SIZE];
++
++	/* Control data */
++	objset_t *os;
++        wait_queue_head_t waitq;
++	volatile uint64_t threads_done;
++        kmutex_t lock_work;
++	kmutex_t lock_ctl;
++	__u32 region_next;
++
++	/* Results data */
++	struct file *file;
++	zpios_stats_t stats;
++
++	thread_data_t **threads;
++	zpios_region_t regions[0]; /* Must be last element */
++} run_args_t;
++
++#define ZPIOS_INFO_BUFFER_SIZE          65536
++#define ZPIOS_INFO_BUFFER_REDZONE       1024
++
++typedef struct zpios_info {
++        spinlock_t info_lock;
++        int info_size;
++        char *info_buffer;
++        char *info_head;        /* Internal kernel use only */
++} zpios_info_t;
++
++#define zpios_print(file, format, args...)                              \
++({      zpios_info_t *_info_ = (zpios_info_t *)file->private_data;      \
++        int _rc_;                                                       \
++                                                                        \
++        ASSERT(_info_);                                                 \
++        ASSERT(_info_->info_buffer);                                    \
++                                                                        \
++        spin_lock(&_info_->info_lock);                                  \
++                                                                        \
++        /* Don't allow the kernel to start a write in the red zone */   \
++        if ((int)(_info_->info_head - _info_->info_buffer) >            \
++            (_info_->info_size - ZPIOS_INFO_BUFFER_REDZONE))      {     \
++                _rc_ = -EOVERFLOW;                                      \
++        } else {                                                        \
++                _rc_ = sprintf(_info_->info_head, format, args);        \
++                if (_rc_ >= 0)                                          \
++                        _info_->info_head += _rc_;                      \
++        }                                                               \
++                                                                        \
++        spin_unlock(&_info_->info_lock);                                \
++        _rc_;                                                           \
++})
++
++#define zpios_vprint(file, test, format, args...)                       \
++        zpios_print(file, "%*s: " format, ZPIOS_NAME_SIZE, test, args)
++
++#endif /* _ZPIOS_INTERNAL_H */
+diff -uNr linux-3.2.33-go.orig/Kconfig linux-3.2.33-go/Kconfig
+--- linux-3.2.33-go.orig/Kconfig	2012-11-16 23:15:13.037410874 +0100
++++ linux-3.2.33-go/Kconfig	2012-11-16 23:22:32.423192712 +0100
+@@ -9,3 +9,4 @@
+ 	option env="SRCARCH"
+ 
+ source "arch/$SRCARCH/Kconfig"
++source "spl/Kconfig"
+diff -uNr linux-3.2.33-go.orig/Makefile linux-3.2.33-go/Makefile
+--- linux-3.2.33-go.orig/Makefile	2012-11-16 23:15:13.028410977 +0100
++++ linux-3.2.33-go/Makefile	2012-11-16 23:22:32.430192633 +0100
+@@ -708,7 +708,7 @@
+ 
+ 
+ ifeq ($(KBUILD_EXTMOD),)
+-core-y		+= kernel/ mm/ fs/ ipc/ security/ crypto/ block/
++core-y		+= kernel/ mm/ spl/ fs/ ipc/ security/ crypto/ block/
+ 
+ vmlinux-dirs	:= $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
+ 		     $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
+diff -uNr linux-3.2.33-go.orig/spl/Kbuild linux-3.2.33-go/spl/Kbuild
+--- linux-3.2.33-go.orig/spl/Kbuild	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/Kbuild	2012-11-16 23:22:32.422192724 +0100
+@@ -0,0 +1,7 @@
++SPL_MODULE_CFLAGS  = -I$(srctree)/include/spl
++SPL_MODULE_CFLAGS += -include $(srctree)/spl_config.h
++export SPL_MODULE_CFLAGS
++
++obj-$(CONFIG_SPL) :=
++obj-$(CONFIG_SPL) +=  spl/
++obj-$(CONFIG_SPL) +=  splat/
+diff -uNr linux-3.2.33-go.orig/spl/Kconfig linux-3.2.33-go/spl/Kconfig
+--- linux-3.2.33-go.orig/spl/Kconfig	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/Kconfig	2012-11-16 23:22:32.421192736 +0100
+@@ -0,0 +1,10 @@
++config SPL
++	tristate "Solaris Porting Layer (SPL)"
++	help
++	  This is the SPL library from the ZFS On Linux project.
++
++	  See http://zfsonlinux.org/
++
++	  To compile this library as a module, choose M here.
++
++	  If unsure, say N.
+diff -uNr linux-3.2.33-go.orig/spl/Makefile linux-3.2.33-go/spl/Makefile
+--- linux-3.2.33-go.orig/spl/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/Makefile	2012-11-16 23:22:32.411192851 +0100
+@@ -0,0 +1,50 @@
++subdir-m += spl
++subdir-m += splat
++
++INSTALL=/usr/bin/install
++
++SPL_MODULE_CFLAGS  = -I/root/spl-0.6.0-rc12/include
++SPL_MODULE_CFLAGS += -include /root/spl-0.6.0-rc12/spl_config.h
++export SPL_MODULE_CFLAGS
++
++modules:
++	$(MAKE) -C /usr/src/linux-3.6.0-sabayon SUBDIRS=`pwd`  O=/usr/src/linux-3.6.0-sabayon CONFIG_SPL=m $@
++
++clean:
++	@# Only cleanup the kernel build directories when CONFIG_KERNEL
++	@# is defined.  This indicates that kernel modules should be built.
++#	$(MAKE) -C /usr/src/linux-3.6.0-sabayon SUBDIRS=`pwd`  O=/usr/src/linux-3.6.0-sabayon $@
++
++	if [ -f 'NONE' ]; then $(RM) 'NONE'; fi
++	if [ -f Module.markers ]; then $(RM) Module.markers; fi
++
++modules_install:
++	@# Install the kernel modules
++	$(MAKE) -C /usr/src/linux-3.6.0-sabayon SUBDIRS=`pwd` \
++		INSTALL_MOD_PATH=$(DESTDIR) \
++		INSTALL_MOD_DIR=addon/spl $@
++	@# Remove extraneous build products when packaging
++	if [ -n "$(DESTDIR)" ]; then \
++		find $(DESTDIR)/lib/modules/3.6.0-sabayon \
++			-name 'modules.*' | xargs $(RM); \
++	fi
++	sysmap=$(DESTDIR)/boot/System.map-3.6.0-sabayon; \
++	if [ -f $$sysmap ]; then \
++		depmod -ae -F $$sysmap 3.6.0-sabayon; \
++	fi
++
++modules_uninstall:
++	@# Uninstall the kernel modules
++	$(RM) -R $(DESTDIR)/lib/modules/3.6.0-sabayon/addon/spl
++
++distdir:
++	list='$(subdir-m)'; for subdir in $$list; do \
++		(find ../module/$$subdir -name '*.c' -o -name '*.h' |\
++		 xargs /bin/cp -t $$distdir/$$subdir); \
++	done
++
++distclean maintainer-clean: clean
++install: modules_install
++uninstall: modules_uninstall
++all: modules
++check:
+diff -uNr linux-3.2.33-go.orig/spl/Makefile.in linux-3.2.33-go/spl/Makefile.in
+--- linux-3.2.33-go.orig/spl/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/Makefile.in	2012-11-16 23:22:32.411192851 +0100
+@@ -0,0 +1,50 @@
++subdir-m += spl
++subdir-m += splat
++
++INSTALL=/usr/bin/install
++
++SPL_MODULE_CFLAGS  = -I@abs_top_srcdir@/include
++SPL_MODULE_CFLAGS += -include @abs_top_builddir@/spl_config.h
++export SPL_MODULE_CFLAGS
++
++modules:
++	$(MAKE) -C @LINUX_OBJ@ SUBDIRS=`pwd` @KERNELMAKE_PARAMS@ CONFIG_SPL=m $@
++
++clean:
++	@# Only cleanup the kernel build directories when CONFIG_KERNEL
++	@# is defined.  This indicates that kernel modules should be built.
++@CONFIG_KERNEL_TRUE@	$(MAKE) -C @LINUX_OBJ@ SUBDIRS=`pwd` @KERNELMAKE_PARAMS@ $@
++
++	if [ -f '@LINUX_SYMBOLS@' ]; then $(RM) '@LINUX_SYMBOLS@'; fi
++	if [ -f Module.markers ]; then $(RM) Module.markers; fi
++
++modules_install:
++	@# Install the kernel modules
++	$(MAKE) -C @LINUX_OBJ@ SUBDIRS=`pwd` \
++		INSTALL_MOD_PATH=$(DESTDIR) \
++		INSTALL_MOD_DIR=addon/spl $@
++	@# Remove extraneous build products when packaging
++	if [ -n "$(DESTDIR)" ]; then \
++		find $(DESTDIR)/lib/modules/@LINUX_VERSION@ \
++			-name 'modules.*' | xargs $(RM); \
++	fi
++	sysmap=$(DESTDIR)/boot/System.map-@LINUX_VERSION@; \
++	if [ -f $$sysmap ]; then \
++		depmod -ae -F $$sysmap @LINUX_VERSION@; \
++	fi
++
++modules_uninstall:
++	@# Uninstall the kernel modules
++	$(RM) -R $(DESTDIR)/lib/modules/@LINUX_VERSION@/addon/spl
++
++distdir:
++	list='$(subdir-m)'; for subdir in $$list; do \
++		(find @top_srcdir@/module/$$subdir -name '*.c' -o -name '*.h' |\
++		 xargs /bin/cp -t $$distdir/$$subdir); \
++	done
++
++distclean maintainer-clean: clean
++install: modules_install
++uninstall: modules_uninstall
++all: modules
++check:
+diff -uNr linux-3.2.33-go.orig/spl/spl/Makefile linux-3.2.33-go/spl/spl/Makefile
+--- linux-3.2.33-go.orig/spl/spl/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/Makefile	2012-11-16 23:22:32.415192803 +0100
+@@ -0,0 +1,27 @@
++# Makefile.in for spl kernel module
++
++MODULE := spl
++EXTRA_CFLAGS = $(SPL_MODULE_CFLAGS)  -DHAVE_GPL_ONLY_SYMBOLS -Wstrict-prototypes -DNDEBUG -DDEBUG_LOG -DDEBUG_KMEM
++
++# Solaris porting layer module
++obj-$(CONFIG_SPL) := $(MODULE).o
++
++$(MODULE)-objs += spl-debug.o
++$(MODULE)-objs += spl-proc.o
++$(MODULE)-objs += spl-kmem.o
++$(MODULE)-objs += spl-thread.o
++$(MODULE)-objs += spl-taskq.o
++$(MODULE)-objs += spl-rwlock.o
++$(MODULE)-objs += spl-vnode.o
++$(MODULE)-objs += spl-err.o
++$(MODULE)-objs += spl-time.o
++$(MODULE)-objs += spl-kobj.o
++$(MODULE)-objs += spl-generic.o
++$(MODULE)-objs += spl-atomic.o
++$(MODULE)-objs += spl-mutex.o
++$(MODULE)-objs += spl-kstat.o
++$(MODULE)-objs += spl-condvar.o
++$(MODULE)-objs += spl-xdr.o
++$(MODULE)-objs += spl-cred.o
++$(MODULE)-objs += spl-tsd.o
++$(MODULE)-objs += spl-zlib.o
+diff -uNr linux-3.2.33-go.orig/spl/spl/Makefile.in linux-3.2.33-go/spl/spl/Makefile.in
+--- linux-3.2.33-go.orig/spl/spl/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/Makefile.in	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,27 @@
++# Makefile.in for spl kernel module
++
++MODULE := spl
++EXTRA_CFLAGS = $(SPL_MODULE_CFLAGS) @KERNELCPPFLAGS@
++
++# Solaris porting layer module
++obj-$(CONFIG_SPL) := $(MODULE).o
++
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-debug.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-proc.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-kmem.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-thread.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-taskq.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-rwlock.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-vnode.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-err.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-time.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-kobj.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-generic.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-atomic.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-mutex.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-kstat.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-condvar.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-xdr.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-cred.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-tsd.o
++$(MODULE)-objs += @top_srcdir@/module/spl/spl-zlib.o
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-atomic.c linux-3.2.33-go/spl/spl/spl-atomic.c
+--- linux-3.2.33-go.orig/spl/spl/spl-atomic.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-atomic.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,42 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Atomic Implementation.
++\*****************************************************************************/
++
++#include <sys/atomic.h>
++
++#ifdef DEBUG_SUBSYSTEM
++#undef DEBUG_SUBSYSTEM
++#endif
++
++#define DEBUG_SUBSYSTEM S_ATOMIC
++
++#ifdef ATOMIC_SPINLOCK
++/* Global atomic lock declarations */
++DEFINE_SPINLOCK(atomic32_lock);
++DEFINE_SPINLOCK(atomic64_lock);
++
++EXPORT_SYMBOL(atomic32_lock);
++EXPORT_SYMBOL(atomic64_lock);
++#endif /* ATOMIC_SPINLOCK */
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-condvar.c linux-3.2.33-go/spl/spl/spl-condvar.c
+--- linux-3.2.33-go.orig/spl/spl/spl-condvar.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-condvar.c	2012-11-16 23:22:32.411192851 +0100
+@@ -0,0 +1,255 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Credential Implementation.
++\*****************************************************************************/
++
++#include <sys/condvar.h>
++#include <spl-debug.h>
++
++#ifdef SS_DEBUG_SUBSYS
++#undef SS_DEBUG_SUBSYS
++#endif
++
++#define SS_DEBUG_SUBSYS SS_CONDVAR
++
++void
++__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
++{
++	int flags = KM_SLEEP;
++
++	SENTRY;
++	ASSERT(cvp);
++	ASSERT(name == NULL);
++	ASSERT(type == CV_DEFAULT);
++	ASSERT(arg == NULL);
++
++	cvp->cv_magic = CV_MAGIC;
++	init_waitqueue_head(&cvp->cv_event);
++	init_waitqueue_head(&cvp->cv_destroy);
++	atomic_set(&cvp->cv_waiters, 0);
++	atomic_set(&cvp->cv_refs, 1);
++	cvp->cv_mutex = NULL;
++
++        /* We may be called when there is a non-zero preempt_count or
++	 * interrupts are disabled is which case we must not sleep.
++	 */
++        if (current_thread_info()->preempt_count || irqs_disabled())
++		flags = KM_NOSLEEP;
++
++	SEXIT;
++}
++EXPORT_SYMBOL(__cv_init);
++
++static int
++cv_destroy_wakeup(kcondvar_t *cvp)
++{
++	if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
++		ASSERT(cvp->cv_mutex == NULL);
++		ASSERT(!waitqueue_active(&cvp->cv_event));
++		return 1;
++	}
++
++	return 0;
++}
++
++void
++__cv_destroy(kcondvar_t *cvp)
++{
++	SENTRY;
++	ASSERT(cvp);
++	ASSERT(cvp->cv_magic == CV_MAGIC);
++
++	cvp->cv_magic = CV_DESTROY;
++	atomic_dec(&cvp->cv_refs);
++
++	/* Block until all waiters are woken and references dropped. */
++	while (cv_destroy_wakeup(cvp) == 0)
++		wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
++
++	ASSERT3P(cvp->cv_mutex, ==, NULL);
++	ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
++	ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
++	ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
++
++	SEXIT;
++}
++EXPORT_SYMBOL(__cv_destroy);
++
++static void
++cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state)
++{
++	DEFINE_WAIT(wait);
++	SENTRY;
++
++	ASSERT(cvp);
++        ASSERT(mp);
++	ASSERT(cvp->cv_magic == CV_MAGIC);
++	ASSERT(mutex_owned(mp));
++	atomic_inc(&cvp->cv_refs);
++
++	if (cvp->cv_mutex == NULL)
++		cvp->cv_mutex = mp;
++
++	/* Ensure the same mutex is used by all callers */
++	ASSERT(cvp->cv_mutex == mp);
++
++	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
++	atomic_inc(&cvp->cv_waiters);
++
++	/* Mutex should be dropped after prepare_to_wait() this
++	 * ensures we're linked in to the waiters list and avoids the
++	 * race where 'cvp->cv_waiters > 0' but the list is empty. */
++	mutex_exit(mp);
++	schedule();
++	mutex_enter(mp);
++
++	/* No more waiters a different mutex could be used */
++	if (atomic_dec_and_test(&cvp->cv_waiters)) {
++		cvp->cv_mutex = NULL;
++		wake_up(&cvp->cv_destroy);
++	}
++
++	finish_wait(&cvp->cv_event, &wait);
++	atomic_dec(&cvp->cv_refs);
++
++	SEXIT;
++}
++
++void
++__cv_wait(kcondvar_t *cvp, kmutex_t *mp)
++{
++	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE);
++}
++EXPORT_SYMBOL(__cv_wait);
++
++void
++__cv_wait_interruptible(kcondvar_t *cvp, kmutex_t *mp)
++{
++	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE);
++}
++EXPORT_SYMBOL(__cv_wait_interruptible);
++
++/* 'expire_time' argument is an absolute wall clock time in jiffies.
++ * Return value is time left (expire_time - now) or -1 if timeout occurred.
++ */
++static clock_t
++__cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp,
++		      clock_t expire_time, int state)
++{
++	DEFINE_WAIT(wait);
++	clock_t time_left;
++	SENTRY;
++
++	ASSERT(cvp);
++        ASSERT(mp);
++	ASSERT(cvp->cv_magic == CV_MAGIC);
++	ASSERT(mutex_owned(mp));
++	atomic_inc(&cvp->cv_refs);
++
++	if (cvp->cv_mutex == NULL)
++		cvp->cv_mutex = mp;
++
++	/* Ensure the same mutex is used by all callers */
++	ASSERT(cvp->cv_mutex == mp);
++
++	/* XXX - Does not handle jiffie wrap properly */
++	time_left = expire_time - jiffies;
++	if (time_left <= 0) {
++		atomic_dec(&cvp->cv_refs);
++		SRETURN(-1);
++	}
++
++	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
++	atomic_inc(&cvp->cv_waiters);
++
++	/* Mutex should be dropped after prepare_to_wait() this
++	 * ensures we're linked in to the waiters list and avoids the
++	 * race where 'cvp->cv_waiters > 0' but the list is empty. */
++	mutex_exit(mp);
++	time_left = schedule_timeout(time_left);
++	mutex_enter(mp);
++
++	/* No more waiters a different mutex could be used */
++	if (atomic_dec_and_test(&cvp->cv_waiters)) {
++		cvp->cv_mutex = NULL;
++		wake_up(&cvp->cv_destroy);
++	}
++
++	finish_wait(&cvp->cv_event, &wait);
++	atomic_dec(&cvp->cv_refs);
++
++	SRETURN(time_left > 0 ? time_left : -1);
++}
++
++clock_t
++__cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
++{
++	return __cv_timedwait_common(cvp, mp, exp_time, TASK_UNINTERRUPTIBLE);
++}
++EXPORT_SYMBOL(__cv_timedwait);
++
++clock_t
++__cv_timedwait_interruptible(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
++{
++	return __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE);
++}
++EXPORT_SYMBOL(__cv_timedwait_interruptible);
++
++void
++__cv_signal(kcondvar_t *cvp)
++{
++	SENTRY;
++	ASSERT(cvp);
++	ASSERT(cvp->cv_magic == CV_MAGIC);
++	atomic_inc(&cvp->cv_refs);
++
++	/* All waiters are added with WQ_FLAG_EXCLUSIVE so only one
++	 * waiter will be set runable with each call to wake_up().
++	 * Additionally wake_up() holds a spin_lock assoicated with
++	 * the wait queue to ensure we don't race waking up processes. */
++	if (atomic_read(&cvp->cv_waiters) > 0)
++		wake_up(&cvp->cv_event);
++
++	atomic_dec(&cvp->cv_refs);
++	SEXIT;
++}
++EXPORT_SYMBOL(__cv_signal);
++
++void
++__cv_broadcast(kcondvar_t *cvp)
++{
++	SENTRY;
++	ASSERT(cvp);
++	ASSERT(cvp->cv_magic == CV_MAGIC);
++	atomic_inc(&cvp->cv_refs);
++
++	/* Wake_up_all() will wake up all waiters even those which
++	 * have the WQ_FLAG_EXCLUSIVE flag set. */
++	if (atomic_read(&cvp->cv_waiters) > 0)
++		wake_up_all(&cvp->cv_event);
++
++	atomic_dec(&cvp->cv_refs);
++	SEXIT;
++}
++EXPORT_SYMBOL(__cv_broadcast);
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-cred.c linux-3.2.33-go/spl/spl/spl-cred.c
+--- linux-3.2.33-go.orig/spl/spl/spl-cred.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-cred.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,282 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Credential Implementation.
++\*****************************************************************************/
++
++#include <sys/cred.h>
++
++#ifdef DEBUG_SUBSYSTEM
++#undef DEBUG_SUBSYSTEM
++#endif
++
++#define DEBUG_SUBSYSTEM S_CRED
++
++#ifdef HAVE_GROUPS_SEARCH
++/* Symbol may be exported by custom kernel patch */
++#define cr_groups_search(gi, grp)	groups_search(gi, grp)
++#else
++/* Implementation from 2.6.30 kernel */
++static int
++cr_groups_search(const struct group_info *group_info, gid_t grp)
++{
++	unsigned int left, right;
++
++	if (!group_info)
++		return 0;
++
++	left = 0;
++	right = group_info->ngroups;
++	while (left < right) {
++		unsigned int mid = (left+right)/2;
++		int cmp = grp - GROUP_AT(group_info, mid);
++		if (cmp > 0)
++			left = mid + 1;
++		else if (cmp < 0)
++			right = mid;
++		else
++			return 1;
++	}
++	return 0;
++}
++#endif
++
++#ifdef HAVE_CRED_STRUCT
++
++/*
++ * As of 2.6.29 a clean credential API appears in the linux kernel.
++ * We attempt to layer the Solaris API on top of the linux API.
++ */
++
++/* Hold a reference on the credential and group info */
++void
++crhold(cred_t *cr)
++{
++	(void)get_cred((const cred_t *)cr);
++	(void)get_group_info(cr->group_info);
++}
++
++/* Free a reference on the credential and group info */
++void
++crfree(cred_t *cr)
++{
++	put_group_info(cr->group_info);
++	put_cred((const cred_t *)cr);
++}
++
++/* Return the number of supplemental groups */
++int
++crgetngroups(const cred_t *cr)
++{
++	struct group_info *gi;
++	int rc;
++
++	gi = get_group_info(cr->group_info);
++	rc = gi->ngroups;
++	put_group_info(gi);
++
++	return rc;
++}
++
++/*
++ * Return an array of supplemental gids.  The returned address is safe
++ * to use as long as the caller has taken a reference with crhold().
++ * The caller is responsible for releasing the reference with crfree().
++ */
++gid_t *
++crgetgroups(const cred_t *cr)
++{
++	struct group_info *gi;
++	gid_t *gids;
++
++	gi = get_group_info(cr->group_info);
++	gids = gi->blocks[0];
++	put_group_info(gi);
++
++	return gids;
++}
++
++/* Check if the passed gid is available is in supplied credential. */
++int
++groupmember(gid_t gid, const cred_t *cr)
++{
++	struct group_info *gi;
++	int rc;
++
++	gi = get_group_info(cr->group_info);
++	rc = cr_groups_search(cr->group_info, gid);
++	put_group_info(gi);
++
++	return rc;
++}
++
++#else /* HAVE_CRED_STRUCT */
++
++/*
++ * Until very recently all credential information was embedded in
++ * the linux task struct.  For this reason to simulate a Solaris
++ * cred_t we need to pass the entire task structure around.
++ */
++
++/* Hold a reference on the credential and group info */
++void crhold(cred_t *cr) { }
++
++/* Free a reference on the credential and group info */
++void crfree(cred_t *cr) { }
++
++/* Return the number of supplemental groups */
++int
++crgetngroups(const cred_t *cr)
++{
++	int lock, rc;
++
++	lock = (cr != current);
++	if (lock)
++		task_lock((struct task_struct *)cr);
++
++	get_group_info(cr->group_info);
++	rc = cr->group_info->ngroups;
++	put_group_info(cr->group_info);
++
++	if (lock)
++		task_unlock((struct task_struct *)cr);
++
++	return rc;
++}
++
++/*
++ * Return an array of supplemental gids.  The returned address is safe
++ * to use as long as the caller has taken a reference with crhold().
++ * The caller is responsible for releasing the reference with crfree().
++ */
++gid_t *
++crgetgroups(const cred_t *cr)
++{
++	gid_t *gids;
++	int lock;
++
++	lock = (cr != current);
++	if (lock)
++		task_lock((struct task_struct *)cr);
++
++	get_group_info(cr->group_info);
++	gids = cr->group_info->blocks[0];
++	put_group_info(cr->group_info);
++
++	if (lock)
++		task_unlock((struct task_struct *)cr);
++
++	return gids;
++}
++
++/* Check if the passed gid is available is in supplied credential. */
++int
++groupmember(gid_t gid, const cred_t *cr)
++{
++	int lock, rc;
++
++	lock = (cr != current);
++	if (lock)
++		task_lock((struct task_struct *)cr);
++
++	get_group_info(cr->group_info);
++	rc = cr_groups_search(cr->group_info, gid);
++	put_group_info(cr->group_info);
++
++	if (lock)
++		task_unlock((struct task_struct *)cr);
++
++	return rc;
++}
++
++#endif /* HAVE_CRED_STRUCT */
++
++/* Return the effective user id */
++uid_t
++crgetuid(const cred_t *cr)
++{
++	return cr->euid;
++}
++
++/* Return the real user id */
++uid_t
++crgetruid(const cred_t *cr)
++{
++	return cr->uid;
++}
++
++/* Return the saved user id */
++uid_t
++crgetsuid(const cred_t *cr)
++{
++	return cr->suid;
++}
++
++/* Return the filesystem user id */
++uid_t
++crgetfsuid(const cred_t *cr)
++{
++	return cr->fsuid;
++}
++
++/* Return the effective group id */
++gid_t
++crgetgid(const cred_t *cr)
++{
++	return cr->egid;
++}
++
++/* Return the real group id */
++gid_t
++crgetrgid(const cred_t *cr)
++{
++	return cr->gid;
++}
++
++/* Return the saved group id */
++gid_t
++crgetsgid(const cred_t *cr)
++{
++	return cr->sgid;
++}
++
++/* Return the filesystem group id */
++gid_t
++crgetfsgid(const cred_t *cr)
++{
++	return cr->fsgid;
++}
++
++EXPORT_SYMBOL(crhold);
++EXPORT_SYMBOL(crfree);
++EXPORT_SYMBOL(crgetuid);
++EXPORT_SYMBOL(crgetruid);
++EXPORT_SYMBOL(crgetsuid);
++EXPORT_SYMBOL(crgetfsuid);
++EXPORT_SYMBOL(crgetgid);
++EXPORT_SYMBOL(crgetrgid);
++EXPORT_SYMBOL(crgetsgid);
++EXPORT_SYMBOL(crgetfsgid);
++EXPORT_SYMBOL(crgetngroups);
++EXPORT_SYMBOL(crgetgroups);
++EXPORT_SYMBOL(groupmember);
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-debug.c linux-3.2.33-go/spl/spl/spl-debug.c
+--- linux-3.2.33-go.orig/spl/spl/spl-debug.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-debug.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,1254 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Debug Implementation.
++\*****************************************************************************/
++
++#include <linux/kmod.h>
++#include <linux/mm.h>
++#include <linux/vmalloc.h>
++#include <linux/pagemap.h>
++#include <linux/slab.h>
++#include <linux/ctype.h>
++#include <linux/kthread.h>
++#include <linux/hardirq.h>
++#include <linux/interrupt.h>
++#include <linux/spinlock.h>
++#include <linux/proc_compat.h>
++#include <linux/file_compat.h>
++#include <sys/sysmacros.h>
++#include <spl-debug.h>
++#include <spl-trace.h>
++#include <spl-ctl.h>
++
++#ifdef SS_DEBUG_SUBSYS
++#undef SS_DEBUG_SUBSYS
++#endif
++
++#define SS_DEBUG_SUBSYS SS_DEBUG
++
++/* Debug log support enabled */
++#ifdef DEBUG_LOG
++
++unsigned long spl_debug_subsys = ~0;
++EXPORT_SYMBOL(spl_debug_subsys);
++module_param(spl_debug_subsys, ulong, 0644);
++MODULE_PARM_DESC(spl_debug_subsys, "Subsystem debugging level mask.");
++
++unsigned long spl_debug_mask = SD_CANTMASK;
++EXPORT_SYMBOL(spl_debug_mask);
++module_param(spl_debug_mask, ulong, 0644);
++MODULE_PARM_DESC(spl_debug_mask, "Debugging level mask.");
++
++unsigned long spl_debug_printk = SD_CANTMASK;
++EXPORT_SYMBOL(spl_debug_printk);
++module_param(spl_debug_printk, ulong, 0644);
++MODULE_PARM_DESC(spl_debug_printk, "Console printk level mask.");
++
++int spl_debug_mb = -1;
++EXPORT_SYMBOL(spl_debug_mb);
++module_param(spl_debug_mb, int, 0644);
++MODULE_PARM_DESC(spl_debug_mb, "Total debug buffer size.");
++
++unsigned int spl_debug_binary = 1;
++EXPORT_SYMBOL(spl_debug_binary);
++
++unsigned int spl_debug_catastrophe;
++EXPORT_SYMBOL(spl_debug_catastrophe);
++
++unsigned int spl_debug_panic_on_bug = 0;
++EXPORT_SYMBOL(spl_debug_panic_on_bug);
++module_param(spl_debug_panic_on_bug, uint, 0644);
++MODULE_PARM_DESC(spl_debug_panic_on_bug, "Panic on BUG");
++
++static char spl_debug_file_name[PATH_MAX];
++char spl_debug_file_path[PATH_MAX] = "/tmp/spl-log";
++
++unsigned int spl_console_ratelimit = 1;
++EXPORT_SYMBOL(spl_console_ratelimit);
++
++long spl_console_max_delay;
++EXPORT_SYMBOL(spl_console_max_delay);
++
++long spl_console_min_delay;
++EXPORT_SYMBOL(spl_console_min_delay);
++
++unsigned int spl_console_backoff = SPL_DEFAULT_BACKOFF;
++EXPORT_SYMBOL(spl_console_backoff);
++
++unsigned int spl_debug_stack;
++EXPORT_SYMBOL(spl_debug_stack);
++
++static int spl_panic_in_progress;
++
++union trace_data_union (*trace_data[TCD_TYPE_MAX])[NR_CPUS] __cacheline_aligned;
++char *trace_console_buffers[NR_CPUS][3];
++struct rw_semaphore trace_sem;
++atomic_t trace_tage_allocated = ATOMIC_INIT(0);
++
++static int spl_debug_dump_all_pages(dumplog_priv_t *dp, char *);
++static void trace_fini(void);
++
++
++/* Memory percentage breakdown by type */
++static unsigned int pages_factor[TCD_TYPE_MAX] = {
++       80,  /* 80% pages for TCD_TYPE_PROC */
++       10,  /* 10% pages for TCD_TYPE_SOFTIRQ */
++       10   /* 10% pages for TCD_TYPE_IRQ */
++};
++
++const char *
++spl_debug_subsys2str(int subsys)
++{
++        switch (subsys) {
++        default:
++                return NULL;
++        case SS_UNDEFINED:
++                return "undefined";
++        case SS_ATOMIC:
++                return "atomic";
++        case SS_KOBJ:
++                return "kobj";
++        case SS_VNODE:
++                return "vnode";
++        case SS_TIME:
++                return "time";
++        case SS_RWLOCK:
++                return "rwlock";
++        case SS_THREAD:
++                return "thread";
++        case SS_CONDVAR:
++                return "condvar";
++        case SS_MUTEX:
++                return "mutex";
++        case SS_RNG:
++                return "rng";
++        case SS_TASKQ:
++                return "taskq";
++        case SS_KMEM:
++                return "kmem";
++        case SS_DEBUG:
++                return "debug";
++        case SS_GENERIC:
++                return "generic";
++        case SS_PROC:
++                return "proc";
++        case SS_MODULE:
++                return "module";
++        case SS_CRED:
++                return "cred";
++        case SS_KSTAT:
++                return "kstat";
++        case SS_XDR:
++                return "xdr";
++        case SS_TSD:
++                return "tsd";
++	case SS_ZLIB:
++		return "zlib";
++        case SS_USER1:
++                return "user1";
++        case SS_USER2:
++                return "user2";
++        case SS_USER3:
++                return "user3";
++        case SS_USER4:
++                return "user4";
++        case SS_USER5:
++                return "user5";
++        case SS_USER6:
++                return "user6";
++        case SS_USER7:
++                return "user7";
++        case SS_USER8:
++                return "user8";
++        }
++}
++
++const char *
++spl_debug_dbg2str(int debug)
++{
++        switch (debug) {
++        default:
++                return NULL;
++        case SD_TRACE:
++                return "trace";
++        case SD_INFO:
++                return "info";
++        case SD_WARNING:
++                return "warning";
++        case SD_ERROR:
++                return "error";
++        case SD_EMERG:
++                return "emerg";
++        case SD_CONSOLE:
++                return "console";
++        case SD_IOCTL:
++                return "ioctl";
++        case SD_DPRINTF:
++                return "dprintf";
++        case SD_OTHER:
++                return "other";
++        }
++}
++
++int
++spl_debug_mask2str(char *str, int size, unsigned long mask, int is_subsys)
++{
++        const char *(*fn)(int bit) = is_subsys ? spl_debug_subsys2str :
++                                                 spl_debug_dbg2str;
++        const char *token;
++        int i, bit, len = 0;
++
++        if (mask == 0) {                        /* "0" */
++                if (size > 0)
++                        str[0] = '0';
++                len = 1;
++        } else {                                /* space-separated tokens */
++                for (i = 0; i < 32; i++) {
++                        bit = 1 << i;
++
++                        if ((mask & bit) == 0)
++                                continue;
++
++                        token = fn(bit);
++                        if (token == NULL)              /* unused bit */
++                                continue;
++
++                        if (len > 0) {                  /* separator? */
++                                if (len < size)
++                                        str[len] = ' ';
++                                len++;
++                        }
++
++                        while (*token != 0) {
++                                if (len < size)
++                                        str[len] = *token;
++                                token++;
++                                len++;
++                        }
++                }
++        }
++
++        /* terminate 'str' */
++        if (len < size)
++                str[len] = 0;
++        else
++                str[size - 1] = 0;
++
++        return len;
++}
++
++static int
++spl_debug_token2mask(int *mask, const char *str, int len, int is_subsys)
++{
++        const char *(*fn)(int bit) = is_subsys ? spl_debug_subsys2str :
++                                                 spl_debug_dbg2str;
++        const char   *token;
++        int i, j, bit;
++
++        /* match against known tokens */
++        for (i = 0; i < 32; i++) {
++                bit = 1 << i;
++
++                token = fn(bit);
++                if (token == NULL)              /* unused? */
++                        continue;
++
++                /* strcasecmp */
++                for (j = 0; ; j++) {
++                        if (j == len) {         /* end of token */
++                                if (token[j] == 0) {
++                                        *mask = bit;
++                                        return 0;
++                                }
++                                break;
++                        }
++
++                        if (token[j] == 0)
++                                break;
++
++                        if (str[j] == token[j])
++                                continue;
++
++                        if (str[j] < 'A' || 'Z' < str[j])
++                                break;
++
++                        if (str[j] - 'A' + 'a' != token[j])
++                                break;
++                }
++        }
++
++        return -EINVAL;                         /* no match */
++}
++
++int
++spl_debug_str2mask(unsigned long *mask, const char *str, int is_subsys)
++{
++        char op = 0;
++        int m = 0, matched, n, t;
++
++        /* Allow a number for backwards compatibility */
++        for (n = strlen(str); n > 0; n--)
++                if (!isspace(str[n-1]))
++                        break;
++        matched = n;
++
++        if ((t = sscanf(str, "%i%n", &m, &matched)) >= 1 && matched == n) {
++                *mask = m;
++                return 0;
++        }
++
++        /* <str> must be a list of debug tokens or numbers separated by
++         * whitespace and optionally an operator ('+' or '-').  If an operator
++         * appears first in <str>, '*mask' is used as the starting point
++         * (relative), otherwise 0 is used (absolute).  An operator applies to
++         * all following tokens up to the next operator. */
++        matched = 0;
++        while (*str != 0) {
++                while (isspace(*str)) /* skip whitespace */
++                        str++;
++
++                if (*str == 0)
++                        break;
++
++                if (*str == '+' || *str == '-') {
++                        op = *str++;
++
++                        /* op on first token == relative */
++                        if (!matched)
++                                m = *mask;
++
++                        while (isspace(*str)) /* skip whitespace */
++                                str++;
++
++                        if (*str == 0)          /* trailing op */
++                                return -EINVAL;
++                }
++
++                /* find token length */
++                for (n = 0; str[n] != 0 && !isspace(str[n]); n++);
++
++                /* match token */
++                if (spl_debug_token2mask(&t, str, n, is_subsys) != 0)
++                        return -EINVAL;
++
++                matched = 1;
++                if (op == '-')
++                        m &= ~t;
++                else
++                        m |= t;
++
++                str += n;
++        }
++
++        if (!matched)
++                return -EINVAL;
++
++        *mask = m;
++        return 0;
++}
++
++static void
++spl_debug_dumplog_internal(dumplog_priv_t *dp)
++{
++        void *journal_info;
++
++        journal_info = current->journal_info;
++        current->journal_info = NULL;
++
++        snprintf(spl_debug_file_name, sizeof(spl_debug_file_path) - 1,
++                 "%s.%ld.%ld", spl_debug_file_path,
++		 get_seconds(), (long)dp->dp_pid);
++        printk("SPL: Dumping log to %s\n", spl_debug_file_name);
++        spl_debug_dump_all_pages(dp, spl_debug_file_name);
++
++        current->journal_info = journal_info;
++}
++
++static int
++spl_debug_dumplog_thread(void *arg)
++{
++	dumplog_priv_t *dp = (dumplog_priv_t *)arg;
++
++        spl_debug_dumplog_internal(dp);
++	atomic_set(&dp->dp_done, 1);
++        wake_up(&dp->dp_waitq);
++	complete_and_exit(NULL, 0);
++
++        return 0; /* Unreachable */
++}
++
++/* When flag is set do not use a new thread for the debug dump */
++int
++spl_debug_dumplog(int flags)
++{
++	struct task_struct *tsk;
++	dumplog_priv_t dp;
++
++        init_waitqueue_head(&dp.dp_waitq);
++        dp.dp_pid = current->pid;
++        dp.dp_flags = flags;
++        atomic_set(&dp.dp_done, 0);
++
++        if (dp.dp_flags & DL_NOTHREAD) {
++                spl_debug_dumplog_internal(&dp);
++        } else {
++
++                tsk = kthread_create(spl_debug_dumplog_thread,(void *)&dp,"spl_debug");
++                if (tsk == NULL)
++                        return -ENOMEM;
++
++                wake_up_process(tsk);
++                wait_event(dp.dp_waitq, atomic_read(&dp.dp_done));
++        }
++
++	return 0;
++}
++EXPORT_SYMBOL(spl_debug_dumplog);
++
++static char *
++trace_get_console_buffer(void)
++{
++        int  cpu = get_cpu();
++        int  idx;
++
++        if (in_irq()) {
++                idx = 0;
++        } else if (in_softirq()) {
++                idx = 1;
++        } else {
++                idx = 2;
++        }
++
++        return trace_console_buffers[cpu][idx];
++}
++
++static void
++trace_put_console_buffer(char *buffer)
++{
++        put_cpu();
++}
++
++static int
++trace_lock_tcd(struct trace_cpu_data *tcd)
++{
++        __ASSERT(tcd->tcd_type < TCD_TYPE_MAX);
++
++        spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
++
++        return 1;
++}
++
++static void
++trace_unlock_tcd(struct trace_cpu_data *tcd)
++{
++        __ASSERT(tcd->tcd_type < TCD_TYPE_MAX);
++
++        spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
++}
++
++static struct trace_cpu_data *
++trace_get_tcd(void)
++{
++        int cpu;
++        struct trace_cpu_data *tcd;
++
++        cpu = get_cpu();
++        if (in_irq())
++                tcd = &(*trace_data[TCD_TYPE_IRQ])[cpu].tcd;
++        else if (in_softirq())
++                tcd = &(*trace_data[TCD_TYPE_SOFTIRQ])[cpu].tcd;
++        else
++                tcd = &(*trace_data[TCD_TYPE_PROC])[cpu].tcd;
++
++        trace_lock_tcd(tcd);
++
++        return tcd;
++}
++
++static void
++trace_put_tcd (struct trace_cpu_data *tcd)
++{
++        trace_unlock_tcd(tcd);
++
++        put_cpu();
++}
++
++static void
++trace_set_debug_header(struct spl_debug_header *header, int subsys,
++                       int mask, const int line, unsigned long stack)
++{
++        struct timeval tv;
++
++        do_gettimeofday(&tv);
++
++        header->ph_subsys = subsys;
++        header->ph_mask = mask;
++        header->ph_cpu_id = smp_processor_id();
++        header->ph_sec = (__u32)tv.tv_sec;
++        header->ph_usec = tv.tv_usec;
++        header->ph_stack = stack;
++        header->ph_pid = current->pid;
++        header->ph_line_num = line;
++
++        return;
++}
++
++static void
++trace_print_to_console(struct spl_debug_header *hdr, int mask, const char *buf,
++                       int len, const char *file, const char *fn)
++{
++        char *prefix = "SPL", *ptype = NULL;
++
++        if ((mask & SD_EMERG) != 0) {
++                prefix = "SPLError";
++                ptype = KERN_EMERG;
++        } else if ((mask & SD_ERROR) != 0) {
++                prefix = "SPLError";
++                ptype = KERN_ERR;
++        } else if ((mask & SD_WARNING) != 0) {
++                prefix = "SPL";
++                ptype = KERN_WARNING;
++        } else if ((mask & (SD_CONSOLE | spl_debug_printk)) != 0) {
++                prefix = "SPL";
++                ptype = KERN_INFO;
++        }
++
++        if ((mask & SD_CONSOLE) != 0) {
++                printk("%s%s: %.*s", ptype, prefix, len, buf);
++        } else {
++                printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix,
++                       hdr->ph_pid, hdr->ph_stack, file,
++                       hdr->ph_line_num, fn, len, buf);
++        }
++
++        return;
++}
++
++static int
++trace_max_debug_mb(void)
++{
++        return MAX(512, ((num_physpages >> (20 - PAGE_SHIFT)) * 80) / 100);
++}
++
++static struct trace_page *
++tage_alloc(int gfp)
++{
++        struct page *page;
++        struct trace_page *tage;
++
++        page = alloc_pages(gfp | __GFP_NOWARN, 0);
++        if (page == NULL)
++                return NULL;
++
++        tage = kmalloc(sizeof(*tage), gfp);
++        if (tage == NULL) {
++                __free_pages(page, 0);
++                return NULL;
++        }
++
++        tage->page = page;
++        atomic_inc(&trace_tage_allocated);
++
++        return tage;
++}
++
++static void
++tage_free(struct trace_page *tage)
++{
++        __ASSERT(tage != NULL);
++        __ASSERT(tage->page != NULL);
++
++        __free_pages(tage->page, 0);
++        kfree(tage);
++        atomic_dec(&trace_tage_allocated);
++}
++
++static struct trace_page *
++tage_from_list(struct list_head *list)
++{
++        return list_entry(list, struct trace_page, linkage);
++}
++
++static void
++tage_to_tail(struct trace_page *tage, struct list_head *queue)
++{
++        __ASSERT(tage != NULL);
++        __ASSERT(queue != NULL);
++
++        list_move_tail(&tage->linkage, queue);
++}
++
++/* try to return a page that has 'len' bytes left at the end */
++static struct trace_page *
++trace_get_tage_try(struct trace_cpu_data *tcd, unsigned long len)
++{
++        struct trace_page *tage;
++
++        if (tcd->tcd_cur_pages > 0) {
++                __ASSERT(!list_empty(&tcd->tcd_pages));
++                tage = tage_from_list(tcd->tcd_pages.prev);
++                if (tage->used + len <= PAGE_SIZE)
++                        return tage;
++        }
++
++        if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
++                if (tcd->tcd_cur_stock_pages > 0) {
++                        tage = tage_from_list(tcd->tcd_stock_pages.prev);
++                        tcd->tcd_cur_stock_pages--;
++                        list_del_init(&tage->linkage);
++                } else {
++                        tage = tage_alloc(GFP_ATOMIC);
++                        if (tage == NULL) {
++                                printk(KERN_WARNING
++                                       "failure to allocate a tage (%ld)\n",
++                                       tcd->tcd_cur_pages);
++                                return NULL;
++                        }
++                }
++
++                tage->used = 0;
++                tage->cpu = smp_processor_id();
++                tage->type = tcd->tcd_type;
++                list_add_tail(&tage->linkage, &tcd->tcd_pages);
++                tcd->tcd_cur_pages++;
++
++                return tage;
++        }
++
++        return NULL;
++}
++
++/* return a page that has 'len' bytes left at the end */
++static struct trace_page *
++trace_get_tage(struct trace_cpu_data *tcd, unsigned long len)
++{
++        struct trace_page *tage;
++
++        __ASSERT(len <= PAGE_SIZE);
++
++        tage = trace_get_tage_try(tcd, len);
++        if (tage)
++                return tage;
++
++        if (tcd->tcd_cur_pages > 0) {
++                tage = tage_from_list(tcd->tcd_pages.next);
++                tage->used = 0;
++                tage_to_tail(tage, &tcd->tcd_pages);
++        }
++
++        return tage;
++}
++
++int
++spl_debug_msg(void *arg, int subsys, int mask, const char *file,
++    const char *fn, const int line, const char *format, ...)
++{
++	spl_debug_limit_state_t *cdls = arg;
++        struct trace_cpu_data   *tcd = NULL;
++        struct spl_debug_header header = { 0, };
++        struct trace_page       *tage;
++        /* string_buf is used only if tcd != NULL, and is always set then */
++        char                    *string_buf = NULL;
++        char                    *debug_buf;
++        int                      known_size;
++        int                      needed = 85; /* average message length */
++        int                      max_nob;
++        va_list                  ap;
++        int                      i;
++
++	if (subsys == 0)
++		subsys = SS_DEBUG_SUBSYS;
++
++	if (mask == 0)
++		mask = SD_EMERG;
++
++        if (strchr(file, '/'))
++                file = strrchr(file, '/') + 1;
++
++        tcd = trace_get_tcd();
++        trace_set_debug_header(&header, subsys, mask, line, 0);
++        if (tcd == NULL)
++                goto console;
++
++        if (tcd->tcd_shutting_down) {
++                trace_put_tcd(tcd);
++                tcd = NULL;
++                goto console;
++        }
++
++        known_size = strlen(file) + 1;
++        if (fn)
++                known_size += strlen(fn) + 1;
++
++        if (spl_debug_binary)
++                known_size += sizeof(header);
++
++        /* '2' used because vsnprintf returns real size required for output
++         * _without_ terminating NULL. */
++        for (i = 0; i < 2; i++) {
++                tage = trace_get_tage(tcd, needed + known_size + 1);
++                if (tage == NULL) {
++                        if (needed + known_size > PAGE_SIZE)
++                                mask |= SD_ERROR;
++
++                        trace_put_tcd(tcd);
++                        tcd = NULL;
++                        goto console;
++                }
++
++                string_buf = (char *)page_address(tage->page) +
++                             tage->used + known_size;
++
++                max_nob = PAGE_SIZE - tage->used - known_size;
++                if (max_nob <= 0) {
++                        printk(KERN_EMERG "negative max_nob: %i\n", max_nob);
++                        mask |= SD_ERROR;
++                        trace_put_tcd(tcd);
++                        tcd = NULL;
++                        goto console;
++                }
++
++                needed = 0;
++                if (format) {
++                        va_start(ap, format);
++                        needed += vsnprintf(string_buf, max_nob, format, ap);
++                        va_end(ap);
++                }
++
++                if (needed < max_nob)
++                        break;
++        }
++
++        header.ph_len = known_size + needed;
++        debug_buf = (char *)page_address(tage->page) + tage->used;
++
++        if (spl_debug_binary) {
++                memcpy(debug_buf, &header, sizeof(header));
++                tage->used += sizeof(header);
++                debug_buf += sizeof(header);
++        }
++
++        strcpy(debug_buf, file);
++        tage->used += strlen(file) + 1;
++        debug_buf += strlen(file) + 1;
++
++        if (fn) {
++                strcpy(debug_buf, fn);
++                tage->used += strlen(fn) + 1;
++                debug_buf += strlen(fn) + 1;
++        }
++
++        __ASSERT(debug_buf == string_buf);
++
++        tage->used += needed;
++        __ASSERT (tage->used <= PAGE_SIZE);
++
++console:
++        if ((mask & spl_debug_printk) == 0) {
++                /* no console output requested */
++                if (tcd != NULL)
++                        trace_put_tcd(tcd);
++                return 1;
++        }
++
++        if (cdls != NULL) {
++                if (spl_console_ratelimit && cdls->cdls_next != 0 &&
++                    !time_before(cdls->cdls_next, jiffies)) {
++                        /* skipping a console message */
++                        cdls->cdls_count++;
++                        if (tcd != NULL)
++                                trace_put_tcd(tcd);
++                        return 1;
++                }
++
++                if (time_before(cdls->cdls_next + spl_console_max_delay +
++                                (10 * HZ), jiffies)) {
++                        /* last timeout was a long time ago */
++                        cdls->cdls_delay /= spl_console_backoff * 4;
++                } else {
++                        cdls->cdls_delay *= spl_console_backoff;
++
++                        if (cdls->cdls_delay < spl_console_min_delay)
++                                cdls->cdls_delay = spl_console_min_delay;
++                        else if (cdls->cdls_delay > spl_console_max_delay)
++                                cdls->cdls_delay = spl_console_max_delay;
++                }
++
++                /* ensure cdls_next is never zero after it's been seen */
++                cdls->cdls_next = (jiffies + cdls->cdls_delay) | 1;
++        }
++
++        if (tcd != NULL) {
++                trace_print_to_console(&header, mask, string_buf, needed, file, fn);
++                trace_put_tcd(tcd);
++        } else {
++                string_buf = trace_get_console_buffer();
++
++                needed = 0;
++                if (format != NULL) {
++                        va_start(ap, format);
++                        needed += vsnprintf(string_buf,
++                            TRACE_CONSOLE_BUFFER_SIZE, format, ap);
++                        va_end(ap);
++                }
++                trace_print_to_console(&header, mask,
++                                 string_buf, needed, file, fn);
++
++                trace_put_console_buffer(string_buf);
++        }
++
++        if (cdls != NULL && cdls->cdls_count != 0) {
++                string_buf = trace_get_console_buffer();
++
++                needed = snprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE,
++                         "Skipped %d previous similar message%s\n",
++                         cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : "");
++
++                trace_print_to_console(&header, mask,
++                                 string_buf, needed, file, fn);
++
++                trace_put_console_buffer(string_buf);
++                cdls->cdls_count = 0;
++        }
++
++        return 0;
++}
++EXPORT_SYMBOL(spl_debug_msg);
++
++/* Do the collect_pages job on a single CPU: assumes that all other
++ * CPUs have been stopped during a panic.  If this isn't true for
++ * some arch, this will have to be implemented separately in each arch.
++ */
++static void
++collect_pages_from_single_cpu(struct page_collection *pc)
++{
++        struct trace_cpu_data *tcd;
++        int i, j;
++
++        tcd_for_each(tcd, i, j) {
++                list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
++                tcd->tcd_cur_pages = 0;
++        }
++}
++
++static void
++collect_pages_on_all_cpus(struct page_collection *pc)
++{
++        struct trace_cpu_data *tcd;
++        int i, cpu;
++
++        spin_lock(&pc->pc_lock);
++        for_each_possible_cpu(cpu) {
++                tcd_for_each_type_lock(tcd, i, cpu) {
++                        list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
++                        tcd->tcd_cur_pages = 0;
++                }
++        }
++        spin_unlock(&pc->pc_lock);
++}
++
++static void
++collect_pages(dumplog_priv_t *dp, struct page_collection *pc)
++{
++        INIT_LIST_HEAD(&pc->pc_pages);
++
++        if (spl_panic_in_progress || dp->dp_flags & DL_SINGLE_CPU)
++                collect_pages_from_single_cpu(pc);
++        else
++                collect_pages_on_all_cpus(pc);
++}
++
++static void
++put_pages_back_on_all_cpus(struct page_collection *pc)
++{
++        struct trace_cpu_data *tcd;
++        struct list_head *cur_head;
++        struct trace_page *tage;
++        struct trace_page *tmp;
++        int i, cpu;
++
++        spin_lock(&pc->pc_lock);
++
++        for_each_possible_cpu(cpu) {
++                tcd_for_each_type_lock(tcd, i, cpu) {
++                        cur_head = tcd->tcd_pages.next;
++
++                        list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
++                                                 linkage) {
++                                if (tage->cpu != cpu || tage->type != i)
++                                        continue;
++
++                                tage_to_tail(tage, cur_head);
++                                tcd->tcd_cur_pages++;
++                        }
++                }
++        }
++
++        spin_unlock(&pc->pc_lock);
++}
++
++static void
++put_pages_back(struct page_collection *pc)
++{
++        if (!spl_panic_in_progress)
++                put_pages_back_on_all_cpus(pc);
++}
++
++static int
++spl_debug_dump_all_pages(dumplog_priv_t *dp, char *filename)
++{
++        struct page_collection pc;
++        struct file *filp;
++        struct trace_page *tage;
++        struct trace_page *tmp;
++        mm_segment_t oldfs;
++        int rc = 0;
++
++        down_write(&trace_sem);
++
++        filp = spl_filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE,
++                               0600, &rc);
++        if (filp == NULL) {
++                if (rc != -EEXIST)
++                        printk(KERN_ERR "SPL: Can't open %s for dump: %d\n",
++                               filename, rc);
++                goto out;
++        }
++
++        spin_lock_init(&pc.pc_lock);
++        collect_pages(dp, &pc);
++        if (list_empty(&pc.pc_pages)) {
++                rc = 0;
++                goto close;
++        }
++
++        oldfs = get_fs();
++        set_fs(get_ds());
++
++        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
++                rc = spl_filp_write(filp, page_address(tage->page),
++                                    tage->used, spl_filp_poff(filp));
++                if (rc != (int)tage->used) {
++                        printk(KERN_WARNING "SPL: Wanted to write %u "
++                               "but wrote %d\n", tage->used, rc);
++                        put_pages_back(&pc);
++                        __ASSERT(list_empty(&pc.pc_pages));
++                        break;
++                }
++                list_del(&tage->linkage);
++                tage_free(tage);
++        }
++
++        set_fs(oldfs);
++
++        rc = spl_filp_fsync(filp, 1);
++        if (rc)
++                printk(KERN_ERR "SPL: Unable to sync: %d\n", rc);
++ close:
++        spl_filp_close(filp);
++ out:
++        up_write(&trace_sem);
++
++        return rc;
++}
++
++static void
++spl_debug_flush_pages(void)
++{
++        dumplog_priv_t dp;
++        struct page_collection pc;
++        struct trace_page *tage;
++        struct trace_page *tmp;
++
++        spin_lock_init(&pc.pc_lock);
++        init_waitqueue_head(&dp.dp_waitq);
++        dp.dp_pid = current->pid;
++        dp.dp_flags = 0;
++        atomic_set(&dp.dp_done, 0);
++
++        collect_pages(&dp, &pc);
++        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
++                list_del(&tage->linkage);
++                tage_free(tage);
++        }
++}
++
++unsigned long
++spl_debug_set_mask(unsigned long mask) {
++	spl_debug_mask = mask;
++        return 0;
++}
++EXPORT_SYMBOL(spl_debug_set_mask);
++
++unsigned long
++spl_debug_get_mask(void) {
++        return spl_debug_mask;
++}
++EXPORT_SYMBOL(spl_debug_get_mask);
++
++unsigned long
++spl_debug_set_subsys(unsigned long subsys) {
++	spl_debug_subsys = subsys;
++        return 0;
++}
++EXPORT_SYMBOL(spl_debug_set_subsys);
++
++unsigned long
++spl_debug_get_subsys(void) {
++        return spl_debug_subsys;
++}
++EXPORT_SYMBOL(spl_debug_get_subsys);
++
++int
++spl_debug_set_mb(int mb)
++{
++        int i, j, pages;
++        int limit = trace_max_debug_mb();
++        struct trace_cpu_data *tcd;
++
++        if (mb < num_possible_cpus()) {
++                printk(KERN_ERR "SPL: Refusing to set debug buffer size to "
++                       "%dMB - lower limit is %d\n", mb, num_possible_cpus());
++                return -EINVAL;
++        }
++
++        if (mb > limit) {
++                printk(KERN_ERR "SPL: Refusing to set debug buffer size to "
++                       "%dMB - upper limit is %d\n", mb, limit);
++                return -EINVAL;
++        }
++
++        mb /= num_possible_cpus();
++        pages = mb << (20 - PAGE_SHIFT);
++
++        down_write(&trace_sem);
++
++        tcd_for_each(tcd, i, j)
++                tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
++
++        up_write(&trace_sem);
++
++        return 0;
++}
++EXPORT_SYMBOL(spl_debug_set_mb);
++
++int
++spl_debug_get_mb(void)
++{
++        int i, j;
++        struct trace_cpu_data *tcd;
++        int total_pages = 0;
++
++        down_read(&trace_sem);
++
++        tcd_for_each(tcd, i, j)
++                total_pages += tcd->tcd_max_pages;
++
++        up_read(&trace_sem);
++
++        return (total_pages >> (20 - PAGE_SHIFT)) + 1;
++}
++EXPORT_SYMBOL(spl_debug_get_mb);
++
++void spl_debug_dumpstack(struct task_struct *tsk)
++{
++        extern void show_task(struct task_struct *);
++
++        if (tsk == NULL)
++                tsk = current;
++
++        printk("SPL: Showing stack for process %d\n", tsk->pid);
++        dump_stack();
++}
++EXPORT_SYMBOL(spl_debug_dumpstack);
++
++void spl_debug_bug(char *file, const char *func, const int line, int flags)
++{
++        spl_debug_catastrophe = 1;
++        spl_debug_msg(NULL, 0, SD_EMERG, file, func, line, "SPL PANIC\n");
++
++        if (in_interrupt())
++                panic("SPL PANIC in interrupt.\n");
++
++	if (in_atomic() || irqs_disabled())
++		flags |= DL_NOTHREAD;
++
++        /* Ensure all debug pages and dumped by current cpu */
++         if (spl_debug_panic_on_bug)
++                spl_panic_in_progress = 1;
++
++        spl_debug_dumpstack(NULL);
++        spl_debug_dumplog(flags);
++
++        if (spl_debug_panic_on_bug)
++                panic("SPL PANIC");
++
++        set_task_state(current, TASK_UNINTERRUPTIBLE);
++        while (1)
++                schedule();
++}
++EXPORT_SYMBOL(spl_debug_bug);
++
++int
++spl_debug_clear_buffer(void)
++{
++        spl_debug_flush_pages();
++        return 0;
++}
++EXPORT_SYMBOL(spl_debug_clear_buffer);
++
++int
++spl_debug_mark_buffer(char *text)
++{
++        SDEBUG(SD_WARNING, "*************************************\n");
++        SDEBUG(SD_WARNING, "DEBUG MARKER: %s\n", text);
++        SDEBUG(SD_WARNING, "*************************************\n");
++
++        return 0;
++}
++EXPORT_SYMBOL(spl_debug_mark_buffer);
++
++static int
++trace_init(int max_pages)
++{
++        struct trace_cpu_data *tcd;
++        int i, j;
++
++        init_rwsem(&trace_sem);
++
++        /* initialize trace_data */
++        memset(trace_data, 0, sizeof(trace_data));
++        for (i = 0; i < TCD_TYPE_MAX; i++) {
++                trace_data[i] = kmalloc(sizeof(union trace_data_union) *
++                                        NR_CPUS, GFP_KERNEL);
++                if (trace_data[i] == NULL)
++                        goto out;
++        }
++
++        tcd_for_each(tcd, i, j) {
++                spin_lock_init(&tcd->tcd_lock);
++                tcd->tcd_pages_factor = pages_factor[i];
++                tcd->tcd_type = i;
++                tcd->tcd_cpu = j;
++                INIT_LIST_HEAD(&tcd->tcd_pages);
++                INIT_LIST_HEAD(&tcd->tcd_stock_pages);
++                tcd->tcd_cur_pages = 0;
++                tcd->tcd_cur_stock_pages = 0;
++                tcd->tcd_max_pages = (max_pages * pages_factor[i]) / 100;
++                tcd->tcd_shutting_down = 0;
++        }
++
++        for (i = 0; i < num_possible_cpus(); i++) {
++                for (j = 0; j < 3; j++) {
++                        trace_console_buffers[i][j] =
++                                kmalloc(TRACE_CONSOLE_BUFFER_SIZE,
++                                        GFP_KERNEL);
++
++                        if (trace_console_buffers[i][j] == NULL)
++                                goto out;
++                }
++       }
++
++        return 0;
++out:
++        trace_fini();
++        printk(KERN_ERR "SPL: Insufficient memory for debug logs\n");
++        return -ENOMEM;
++}
++
++int
++spl_debug_init(void)
++{
++        int rc, max = spl_debug_mb;
++
++        spl_console_max_delay = SPL_DEFAULT_MAX_DELAY;
++        spl_console_min_delay = SPL_DEFAULT_MIN_DELAY;
++
++        /* If spl_debug_mb is set to an invalid value or uninitialized
++         * then just make the total buffers smp_num_cpus TCD_MAX_PAGES */
++        if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 ||
++            max >= 512 || max < 0) {
++                max = TCD_MAX_PAGES;
++        } else {
++                max = (max / num_online_cpus()) << (20 - PAGE_SHIFT);
++        }
++
++        rc = trace_init(max);
++        if (rc)
++                return rc;
++
++        return rc;
++}
++
++static void
++trace_cleanup_on_all_cpus(void)
++{
++        struct trace_cpu_data *tcd;
++        struct trace_page *tage;
++        struct trace_page *tmp;
++        int i, cpu;
++
++        for_each_possible_cpu(cpu) {
++                tcd_for_each_type_lock(tcd, i, cpu) {
++                        tcd->tcd_shutting_down = 1;
++
++                        list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages,
++                                                 linkage) {
++                                list_del(&tage->linkage);
++                                tage_free(tage);
++                        }
++                        tcd->tcd_cur_pages = 0;
++                }
++        }
++}
++
++static void
++trace_fini(void)
++{
++        int i, j;
++
++        trace_cleanup_on_all_cpus();
++
++        for (i = 0; i < num_possible_cpus(); i++) {
++                for (j = 0; j < 3; j++) {
++                        if (trace_console_buffers[i][j] != NULL) {
++                                kfree(trace_console_buffers[i][j]);
++                                trace_console_buffers[i][j] = NULL;
++                        }
++                }
++        }
++
++        for (i = 0; i < TCD_TYPE_MAX && trace_data[i] != NULL; i++) {
++                kfree(trace_data[i]);
++                trace_data[i] = NULL;
++        }
++}
++
++void
++spl_debug_fini(void)
++{
++        trace_fini();
++}
++
++#endif /* DEBUG_LOG */
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-err.c linux-3.2.33-go/spl/spl/spl-err.c
+--- linux-3.2.33-go.orig/spl/spl/spl-err.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-err.c	2012-11-16 23:22:32.411192851 +0100
+@@ -0,0 +1,82 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Error Implementation.
++\*****************************************************************************/
++
++#include <sys/sysmacros.h>
++#include <sys/cmn_err.h>
++#include <spl-debug.h>
++
++#ifdef SS_DEBUG_SUBSYS
++#undef SS_DEBUG_SUBSYS
++#endif
++
++#define SS_DEBUG_SUBSYS SS_GENERIC
++
++#ifdef DEBUG_LOG
++static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
++static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
++#endif
++
++void
++vpanic(const char *fmt, va_list ap)
++{
++	char msg[MAXMSGLEN];
++
++	vsnprintf(msg, MAXMSGLEN - 1, fmt, ap);
++	PANIC("%s", msg);
++} /* vpanic() */
++EXPORT_SYMBOL(vpanic);
++
++void
++vcmn_err(int ce, const char *fmt, va_list ap)
++{
++	char msg[MAXMSGLEN];
++
++	if (ce == CE_PANIC)
++		vpanic(fmt, ap);
++
++	if (ce != CE_NOTE) {
++		vsnprintf(msg, MAXMSGLEN - 1, fmt, ap);
++
++		if (fmt[0] == '!')
++			SDEBUG(SD_INFO, "%s%s%s",
++			       ce_prefix[ce], msg, ce_suffix[ce]);
++		else
++			SERROR("%s%s%s", ce_prefix[ce], msg, ce_suffix[ce]);
++	}
++} /* vcmn_err() */
++EXPORT_SYMBOL(vcmn_err);
++
++void
++cmn_err(int ce, const char *fmt, ...)
++{
++	va_list ap;
++
++	va_start(ap, fmt);
++	vcmn_err(ce, fmt, ap);
++	va_end(ap);
++} /* cmn_err() */
++EXPORT_SYMBOL(cmn_err);
++
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-generic.c linux-3.2.33-go/spl/spl/spl-generic.c
+--- linux-3.2.33-go.orig/spl/spl/spl-generic.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-generic.c	2012-11-16 23:22:32.411192851 +0100
+@@ -0,0 +1,742 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Generic Implementation.
++\*****************************************************************************/
++
++#include <sys/sysmacros.h>
++#include <sys/systeminfo.h>
++#include <sys/vmsystm.h>
++#include <sys/kobj.h>
++#include <sys/kmem.h>
++#include <sys/mutex.h>
++#include <sys/rwlock.h>
++#include <sys/taskq.h>
++#include <sys/tsd.h>
++#include <sys/zmod.h>
++#include <sys/debug.h>
++#include <sys/proc.h>
++#include <sys/kstat.h>
++#include <sys/utsname.h>
++#include <sys/file.h>
++#include <linux/kmod.h>
++#include <linux/proc_compat.h>
++#include <spl-debug.h>
++
++#ifdef SS_DEBUG_SUBSYS
++#undef SS_DEBUG_SUBSYS
++#endif
++
++#define SS_DEBUG_SUBSYS SS_GENERIC
++
++char spl_version[32] = "SPL v" SPL_META_VERSION "-" SPL_META_RELEASE;
++EXPORT_SYMBOL(spl_version);
++
++unsigned long spl_hostid = HW_INVALID_HOSTID;
++EXPORT_SYMBOL(spl_hostid);
++module_param(spl_hostid, ulong, 0644);
++MODULE_PARM_DESC(spl_hostid, "The system hostid.");
++
++char hw_serial[HW_HOSTID_LEN] = "<none>";
++EXPORT_SYMBOL(hw_serial);
++
++proc_t p0 = { 0 };
++EXPORT_SYMBOL(p0);
++
++#ifndef HAVE_KALLSYMS_LOOKUP_NAME
++kallsyms_lookup_name_t spl_kallsyms_lookup_name_fn = SYMBOL_POISON;
++#endif
++
++int
++highbit(unsigned long i)
++{
++        register int h = 1;
++        SENTRY;
++
++        if (i == 0)
++                SRETURN(0);
++#if BITS_PER_LONG == 64
++        if (i & 0xffffffff00000000ul) {
++                h += 32; i >>= 32;
++        }
++#endif
++        if (i & 0xffff0000) {
++                h += 16; i >>= 16;
++        }
++        if (i & 0xff00) {
++                h += 8; i >>= 8;
++        }
++        if (i & 0xf0) {
++                h += 4; i >>= 4;
++        }
++        if (i & 0xc) {
++                h += 2; i >>= 2;
++        }
++        if (i & 0x2) {
++                h += 1;
++        }
++        SRETURN(h);
++}
++EXPORT_SYMBOL(highbit);
++
++#if BITS_PER_LONG == 32
++/*
++ * Support 64/64 => 64 division on a 32-bit platform.  While the kernel
++ * provides a div64_u64() function for this we do not use it because the
++ * implementation is flawed.  There are cases which return incorrect
++ * results as late as linux-2.6.35.  Until this is fixed upstream the
++ * spl must provide its own implementation.
++ *
++ * This implementation is a slightly modified version of the algorithm
++ * proposed by the book 'Hacker's Delight'.  The original source can be
++ * found here and is available for use without restriction.
++ *
++ * http://www.hackersdelight.org/HDcode/newCode/divDouble.c
++ */
++
++/*
++ * Calculate number of leading of zeros for a 64-bit value.
++ */
++static int
++nlz64(uint64_t x) {
++	register int n = 0;
++
++	if (x == 0)
++		return 64;
++
++	if (x <= 0x00000000FFFFFFFFULL) {n = n + 32; x = x << 32;}
++	if (x <= 0x0000FFFFFFFFFFFFULL) {n = n + 16; x = x << 16;}
++	if (x <= 0x00FFFFFFFFFFFFFFULL) {n = n +  8; x = x <<  8;}
++	if (x <= 0x0FFFFFFFFFFFFFFFULL) {n = n +  4; x = x <<  4;}
++	if (x <= 0x3FFFFFFFFFFFFFFFULL) {n = n +  2; x = x <<  2;}
++	if (x <= 0x7FFFFFFFFFFFFFFFULL) {n = n +  1;}
++
++	return n;
++}
++
++/*
++ * Newer kernels have a div_u64() function but we define our own
++ * to simplify portibility between kernel versions.
++ */
++static inline uint64_t
++__div_u64(uint64_t u, uint32_t v)
++{
++	(void) do_div(u, v);
++	return u;
++}
++
++/*
++ * Implementation of 64-bit unsigned division for 32-bit machines.
++ *
++ * First the procedure takes care of the case in which the divisor is a
++ * 32-bit quantity. There are two subcases: (1) If the left half of the
++ * dividend is less than the divisor, one execution of do_div() is all that
++ * is required (overflow is not possible). (2) Otherwise it does two
++ * divisions, using the grade school method.
++ */
++uint64_t
++__udivdi3(uint64_t u, uint64_t v)
++{
++	uint64_t u0, u1, v1, q0, q1, k;
++	int n;
++
++	if (v >> 32 == 0) {			// If v < 2**32:
++		if (u >> 32 < v) {		// If u/v cannot overflow,
++			return __div_u64(u, v);	// just do one division.
++		} else {			// If u/v would overflow:
++			u1 = u >> 32;		// Break u into two halves.
++			u0 = u & 0xFFFFFFFF;
++			q1 = __div_u64(u1, v);	// First quotient digit.
++			k  = u1 - q1 * v;	// First remainder, < v.
++			u0 += (k << 32);
++			q0 = __div_u64(u0, v);	// Seconds quotient digit.
++			return (q1 << 32) + q0;
++		}
++	} else {				// If v >= 2**32:
++		n = nlz64(v);			// 0 <= n <= 31.
++		v1 = (v << n) >> 32;		// Normalize divisor, MSB is 1.
++		u1 = u >> 1;			// To ensure no overflow.
++		q1 = __div_u64(u1, v1);		// Get quotient from
++		q0 = (q1 << n) >> 31;		// Undo normalization and
++						// division of u by 2.
++		if (q0 != 0)			// Make q0 correct or
++			q0 = q0 - 1;		// too small by 1.
++		if ((u - q0 * v) >= v)
++			q0 = q0 + 1;		// Now q0 is correct.
++
++		return q0;
++	}
++}
++EXPORT_SYMBOL(__udivdi3);
++
++/*
++ * Implementation of 64-bit signed division for 32-bit machines.
++ */
++int64_t
++__divdi3(int64_t u, int64_t v)
++{
++	int64_t q, t;
++	q = __udivdi3(abs64(u), abs64(v));
++	t = (u ^ v) >> 63;	// If u, v have different
++	return (q ^ t) - t;	// signs, negate q.
++}
++EXPORT_SYMBOL(__divdi3);
++
++/*
++ * Implementation of 64-bit unsigned modulo for 32-bit machines.
++ */
++uint64_t
++__umoddi3(uint64_t dividend, uint64_t divisor)
++{
++	return (dividend - (divisor * __udivdi3(dividend, divisor)));
++}
++EXPORT_SYMBOL(__umoddi3);
++
++#if defined(__arm) || defined(__arm__)
++/*
++ * Implementation of 64-bit (un)signed division for 32-bit arm machines.
++ *
++ * Run-time ABI for the ARM Architecture (page 20).  A pair of (unsigned)
++ * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1},
++ * and the remainder in {r2, r3}.  The return type is specifically left
++ * set to 'void' to ensure the compiler does not overwrite these registers
++ * during the return.  All results are in registers as per ABI
++ */
++void
++__aeabi_uldivmod(uint64_t u, uint64_t v)
++{
++	uint64_t res;
++	uint64_t mod;
++
++	res = __udivdi3(u, v);
++	mod = __umoddi3(u, v);
++	{
++		register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
++		register uint32_t r1 asm("r1") = (res >> 32);
++		register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
++		register uint32_t r3 asm("r3") = (mod >> 32);
++
++		asm volatile(""
++		    : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3)  /* output */
++		    : "r"(r0), "r"(r1), "r"(r2), "r"(r3));   /* input */
++
++		return; /* r0; */
++	}
++}
++EXPORT_SYMBOL(__aeabi_uldivmod);
++
++void
++__aeabi_ldivmod(int64_t u, int64_t v)
++{
++	int64_t res;
++	uint64_t mod;
++
++	res =  __divdi3(u, v);
++	mod = __umoddi3(u, v);
++	{
++		register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);
++		register uint32_t r1 asm("r1") = (res >> 32);
++		register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);
++		register uint32_t r3 asm("r3") = (mod >> 32);
++
++		asm volatile(""
++		    : "+r"(r0), "+r"(r1), "+r"(r2),"+r"(r3)  /* output */
++		    : "r"(r0), "r"(r1), "r"(r2), "r"(r3));   /* input */
++
++		return; /* r0; */
++	}
++}
++EXPORT_SYMBOL(__aeabi_ldivmod);
++#endif /* __arm || __arm__ */
++#endif /* BITS_PER_LONG */
++
++/* NOTE: The strtoxx behavior is solely based on my reading of the Solaris
++ * ddi_strtol(9F) man page.  I have not verified the behavior of these
++ * functions against their Solaris counterparts.  It is possible that I
++ * may have misinterpreted the man page or the man page is incorrect.
++ */
++int ddi_strtoul(const char *, char **, int, unsigned long *);
++int ddi_strtol(const char *, char **, int, long *);
++int ddi_strtoull(const char *, char **, int, unsigned long long *);
++int ddi_strtoll(const char *, char **, int, long long *);
++
++#define define_ddi_strtoux(type, valtype)				\
++int ddi_strtou##type(const char *str, char **endptr,			\
++		     int base, valtype *result)				\
++{									\
++	valtype last_value, value = 0;					\
++	char *ptr = (char *)str;					\
++	int flag = 1, digit;						\
++									\
++	if (strlen(ptr) == 0)						\
++		return EINVAL;						\
++									\
++	/* Auto-detect base based on prefix */				\
++	if (!base) {							\
++		if (str[0] == '0') {					\
++			if (tolower(str[1])=='x' && isxdigit(str[2])) {	\
++				base = 16; /* hex */			\
++				ptr += 2;				\
++			} else if (str[1] >= '0' && str[1] < 8) {	\
++				base = 8; /* octal */			\
++				ptr += 1;				\
++			} else {					\
++				return EINVAL;				\
++			}						\
++		} else {						\
++			base = 10; /* decimal */			\
++		}							\
++	}								\
++									\
++	while (1) {							\
++		if (isdigit(*ptr))					\
++			digit = *ptr - '0';				\
++		else if (isalpha(*ptr))					\
++			digit = tolower(*ptr) - 'a' + 10;		\
++		else							\
++			break;						\
++									\
++		if (digit >= base)					\
++			break;						\
++									\
++		last_value = value;					\
++		value = value * base + digit;				\
++		if (last_value > value) /* Overflow */			\
++			return ERANGE;					\
++									\
++		flag = 1;						\
++		ptr++;							\
++	}								\
++									\
++	if (flag)							\
++		*result = value;					\
++									\
++	if (endptr)							\
++		*endptr = (char *)(flag ? ptr : str);			\
++									\
++	return 0;							\
++}									\
++
++#define define_ddi_strtox(type, valtype)				\
++int ddi_strto##type(const char *str, char **endptr,			\
++		       int base, valtype *result)			\
++{									\
++	int rc;								\
++									\
++	if (*str == '-') {						\
++		rc = ddi_strtou##type(str + 1, endptr, base, result);	\
++		if (!rc) {						\
++			if (*endptr == str + 1)				\
++				*endptr = (char *)str;			\
++			else						\
++				*result = -*result;			\
++		}							\
++	} else {							\
++		rc = ddi_strtou##type(str, endptr, base, result);	\
++	}								\
++									\
++	return rc;							\
++}
++
++define_ddi_strtoux(l, unsigned long)
++define_ddi_strtox(l, long)
++define_ddi_strtoux(ll, unsigned long long)
++define_ddi_strtox(ll, long long)
++
++EXPORT_SYMBOL(ddi_strtoul);
++EXPORT_SYMBOL(ddi_strtol);
++EXPORT_SYMBOL(ddi_strtoll);
++EXPORT_SYMBOL(ddi_strtoull);
++
++int
++ddi_copyin(const void *from, void *to, size_t len, int flags)
++{
++	/* Fake ioctl() issued by kernel, 'from' is a kernel address */
++	if (flags & FKIOCTL) {
++		memcpy(to, from, len);
++		return 0;
++	}
++
++	return copyin(from, to, len);
++}
++EXPORT_SYMBOL(ddi_copyin);
++
++int
++ddi_copyout(const void *from, void *to, size_t len, int flags)
++{
++	/* Fake ioctl() issued by kernel, 'from' is a kernel address */
++	if (flags & FKIOCTL) {
++		memcpy(to, from, len);
++		return 0;
++	}
++
++	return copyout(from, to, len);
++}
++EXPORT_SYMBOL(ddi_copyout);
++
++#ifndef HAVE_PUT_TASK_STRUCT
++/*
++ * This is only a stub function which should never be used.  The SPL should
++ * never be putting away the last reference on a task structure so this will
++ * not be called.  However, we still need to define it so the module does not
++ * have undefined symbol at load time.  That all said if this impossible
++ * thing does somehow happen PANIC immediately so we know about it.
++ */
++void
++__put_task_struct(struct task_struct *t)
++{
++	PANIC("Unexpectly put last reference on task %d\n", (int)t->pid);
++}
++EXPORT_SYMBOL(__put_task_struct);
++#endif /* HAVE_PUT_TASK_STRUCT */
++
++struct new_utsname *__utsname(void)
++{
++#ifdef HAVE_INIT_UTSNAME
++	return init_utsname();
++#else
++	return &system_utsname;
++#endif
++}
++EXPORT_SYMBOL(__utsname);
++
++
++/*
++ * Read the unique system identifier from the /etc/hostid file.
++ *
++ * The behavior of /usr/bin/hostid on Linux systems with the
++ * regular eglibc and coreutils is:
++ *
++ *   1. Generate the value if the /etc/hostid file does not exist
++ *      or if the /etc/hostid file is less than four bytes in size.
++ *
++ *   2. If the /etc/hostid file is at least 4 bytes, then return
++ *      the first four bytes [0..3] in native endian order.
++ *
++ *   3. Always ignore bytes [4..] if they exist in the file.
++ *
++ * Only the first four bytes are significant, even on systems that
++ * have a 64-bit word size.
++ *
++ * See:
++ *
++ *   eglibc: sysdeps/unix/sysv/linux/gethostid.c
++ *   coreutils: src/hostid.c
++ *
++ * Notes:
++ *
++ * The /etc/hostid file on Solaris is a text file that often reads:
++ *
++ *   # DO NOT EDIT
++ *   "0123456789"
++ *
++ * Directly copying this file to Linux results in a constant
++ * hostid of 4f442023 because the default comment constitutes
++ * the first four bytes of the file.
++ *
++ */
++
++char *spl_hostid_path = HW_HOSTID_PATH;
++module_param(spl_hostid_path, charp, 0444);
++MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)");
++
++static int
++hostid_read(void)
++{
++	int result;
++	uint64_t size;
++	struct _buf *file;
++	unsigned long hostid = 0;
++
++	file = kobj_open_file(spl_hostid_path);
++
++	if (file == (struct _buf *)-1)
++		return -1;
++
++	result = kobj_get_filesize(file, &size);
++
++	if (result != 0) {
++		printk(KERN_WARNING
++		       "SPL: kobj_get_filesize returned %i on %s\n",
++		       result, spl_hostid_path);
++		kobj_close_file(file);
++		return -2;
++	}
++
++	if (size < sizeof(HW_HOSTID_MASK)) {
++		printk(KERN_WARNING
++		       "SPL: Ignoring the %s file because it is %llu bytes; "
++		       "expecting %lu bytes instead.\n", spl_hostid_path,
++		       size, (unsigned long)sizeof(HW_HOSTID_MASK));
++		kobj_close_file(file);
++		return -3;
++	}
++
++	/* Read directly into the variable like eglibc does. */
++	/* Short reads are okay; native behavior is preserved. */
++	result = kobj_read_file(file, (char *)&hostid, sizeof(hostid), 0);
++
++	if (result < 0) {
++		printk(KERN_WARNING
++		       "SPL: kobj_read_file returned %i on %s\n",
++		       result, spl_hostid_path);
++		kobj_close_file(file);
++		return -4;
++	}
++
++	/* Mask down to 32 bits like coreutils does. */
++	spl_hostid = hostid & HW_HOSTID_MASK;
++	kobj_close_file(file);
++	return 0;
++}
++
++#define GET_HOSTID_CMD \
++	"exec 0</dev/null " \
++	"     1>/proc/sys/kernel/spl/hostid " \
++	"     2>/dev/null; " \
++	"hostid"
++
++static int
++hostid_exec(void)
++{
++	char *argv[] = { "/bin/sh",
++	                 "-c",
++	                 GET_HOSTID_CMD,
++	                 NULL };
++	char *envp[] = { "HOME=/",
++	                 "TERM=linux",
++	                 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
++	                 NULL };
++	int rc;
++
++	/* Doing address resolution in the kernel is tricky and just
++	 * not a good idea in general.  So to set the proper 'hw_serial'
++	 * use the usermodehelper support to ask '/bin/sh' to run
++	 * '/usr/bin/hostid' and redirect the result to /proc/sys/spl/hostid
++	 * for us to use.  It's a horrific solution but it will do for now.
++	 */
++	rc = call_usermodehelper(argv[0], argv, envp, 1);
++	if (rc)
++		printk("SPL: Failed user helper '%s %s %s', rc = %d\n",
++		       argv[0], argv[1], argv[2], rc);
++
++	return rc;
++}
++
++uint32_t
++zone_get_hostid(void *zone)
++{
++	static int first = 1;
++	unsigned long hostid;
++	int rc;
++
++	/* Only the global zone is supported */
++	ASSERT(zone == NULL);
++
++	if (first) {
++		first = 0;
++
++		/*
++		 * Get the hostid if it was not passed as a module parameter.
++		 * Try reading the /etc/hostid file directly, and then fall
++		 * back to calling the /usr/bin/hostid utility.
++		 */
++		if ((spl_hostid == HW_INVALID_HOSTID) &&
++		    (rc = hostid_read()) && (rc = hostid_exec()))
++			return HW_INVALID_HOSTID;
++
++		printk(KERN_NOTICE "SPL: using hostid 0x%08x\n",
++			(unsigned int) spl_hostid);
++	}
++
++	if (ddi_strtoul(hw_serial, NULL, HW_HOSTID_LEN-1, &hostid) != 0)
++		return HW_INVALID_HOSTID;
++
++	return (uint32_t)hostid;
++}
++EXPORT_SYMBOL(zone_get_hostid);
++
++#ifndef HAVE_KALLSYMS_LOOKUP_NAME
++/*
++ * The kallsyms_lookup_name() kernel function is not an exported symbol in
++ * Linux 2.6.19 through 2.6.32 inclusive.
++ *
++ * This function replaces the functionality by performing an upcall to user
++ * space where /proc/kallsyms is consulted for the requested address.
++ *
++ */
++
++#define GET_KALLSYMS_ADDR_CMD \
++	"exec 0</dev/null " \
++	"     1>/proc/sys/kernel/spl/kallsyms_lookup_name " \
++	"     2>/dev/null; " \
++	"awk  '{ if ( $3 == \"kallsyms_lookup_name\" ) { print $1 } }' " \
++	"     /proc/kallsyms "
++
++static int
++set_kallsyms_lookup_name(void)
++{
++	char *argv[] = { "/bin/sh",
++	                 "-c",
++			 GET_KALLSYMS_ADDR_CMD,
++	                 NULL };
++	char *envp[] = { "HOME=/",
++	                 "TERM=linux",
++	                 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
++	                 NULL };
++	int rc;
++
++	rc = call_usermodehelper(argv[0], argv, envp, 1);
++	if (rc)
++		printk("SPL: Failed user helper '%s %s %s', rc = %d\n",
++		       argv[0], argv[1], argv[2], rc);
++
++	return rc;
++}
++#endif
++
++static int
++__init spl_init(void)
++{
++	int rc = 0;
++
++	if ((rc = spl_debug_init()))
++		return rc;
++
++	if ((rc = spl_kmem_init()))
++		SGOTO(out1, rc);
++
++	if ((rc = spl_mutex_init()))
++		SGOTO(out2, rc);
++
++	if ((rc = spl_rw_init()))
++		SGOTO(out3, rc);
++
++	if ((rc = spl_taskq_init()))
++		SGOTO(out4, rc);
++
++	if ((rc = spl_vn_init()))
++		SGOTO(out5, rc);
++
++	if ((rc = spl_proc_init()))
++		SGOTO(out6, rc);
++
++	if ((rc = spl_kstat_init()))
++		SGOTO(out7, rc);
++
++	if ((rc = spl_tsd_init()))
++		SGOTO(out8, rc);
++
++	if ((rc = spl_zlib_init()))
++		SGOTO(out9, rc);
++
++#ifndef HAVE_KALLSYMS_LOOKUP_NAME
++	if ((rc = set_kallsyms_lookup_name()))
++		SGOTO(out10, rc = -EADDRNOTAVAIL);
++#endif /* HAVE_KALLSYMS_LOOKUP_NAME */
++
++	if ((rc = spl_kmem_init_kallsyms_lookup()))
++		SGOTO(out10, rc);
++
++	if ((rc = spl_vn_init_kallsyms_lookup()))
++		SGOTO(out10, rc);
++
++	printk(KERN_NOTICE "SPL: Loaded module v%s-%s%s\n", SPL_META_VERSION,
++	       SPL_META_RELEASE, SPL_DEBUG_STR);
++	SRETURN(rc);
++out10:
++	spl_zlib_fini();
++out9:
++	spl_tsd_fini();
++out8:
++	spl_kstat_fini();
++out7:
++	spl_proc_fini();
++out6:
++	spl_vn_fini();
++out5:
++	spl_taskq_fini();
++out4:
++	spl_rw_fini();
++out3:
++	spl_mutex_fini();
++out2:
++	spl_kmem_fini();
++out1:
++	spl_debug_fini();
++
++	printk(KERN_NOTICE "SPL: Failed to Load Solaris Porting Layer "
++	       "v%s-%s%s, rc = %d\n", SPL_META_VERSION, SPL_META_RELEASE,
++	       SPL_DEBUG_STR, rc);
++	return rc;
++}
++
++static void
++spl_fini(void)
++{
++	SENTRY;
++
++	printk(KERN_NOTICE "SPL: Unloaded module v%s-%s%s\n",
++	       SPL_META_VERSION, SPL_META_RELEASE, SPL_DEBUG_STR);
++	spl_zlib_fini();
++	spl_tsd_fini();
++	spl_kstat_fini();
++	spl_proc_fini();
++	spl_vn_fini();
++	spl_taskq_fini();
++	spl_rw_fini();
++	spl_mutex_fini();
++	spl_kmem_fini();
++	spl_debug_fini();
++}
++
++/* Called when a dependent module is loaded */
++void
++spl_setup(void)
++{
++        int rc;
++
++        /*
++         * At module load time the pwd is set to '/' on a Solaris system.
++         * On a Linux system will be set to whatever directory the caller
++         * was in when executing insmod/modprobe.
++         */
++        rc = vn_set_pwd("/");
++        if (rc)
++                printk("SPL: Warning unable to set pwd to '/': %d\n", rc);
++}
++EXPORT_SYMBOL(spl_setup);
++
++/* Called when a dependent module is unloaded */
++void
++spl_cleanup(void)
++{
++}
++EXPORT_SYMBOL(spl_cleanup);
++
++module_init(spl_init);
++module_exit(spl_fini);
++
++MODULE_AUTHOR("Lawrence Livermore National Labs");
++MODULE_DESCRIPTION("Solaris Porting Layer");
++MODULE_LICENSE("GPL");
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-kmem.c linux-3.2.33-go/spl/spl/spl-kmem.c
+--- linux-3.2.33-go.orig/spl/spl/spl-kmem.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-kmem.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,2440 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Kmem Implementation.
++\*****************************************************************************/
++
++#include <sys/kmem.h>
++#include <spl-debug.h>
++
++#ifdef SS_DEBUG_SUBSYS
++#undef SS_DEBUG_SUBSYS
++#endif
++
++#define SS_DEBUG_SUBSYS SS_KMEM
++
++/*
++ * The minimum amount of memory measured in pages to be free at all
++ * times on the system.  This is similar to Linux's zone->pages_min
++ * multiplied by the number of zones and is sized based on that.
++ */
++pgcnt_t minfree = 0;
++EXPORT_SYMBOL(minfree);
++
++/*
++ * The desired amount of memory measured in pages to be free at all
++ * times on the system.  This is similar to Linux's zone->pages_low
++ * multiplied by the number of zones and is sized based on that.
++ * Assuming all zones are being used roughly equally, when we drop
++ * below this threshold asynchronous page reclamation is triggered.
++ */
++pgcnt_t desfree = 0;
++EXPORT_SYMBOL(desfree);
++
++/*
++ * When above this amount of memory measures in pages the system is
++ * determined to have enough free memory.  This is similar to Linux's
++ * zone->pages_high multiplied by the number of zones and is sized based
++ * on that.  Assuming all zones are being used roughly equally, when
++ * asynchronous page reclamation reaches this threshold it stops.
++ */
++pgcnt_t lotsfree = 0;
++EXPORT_SYMBOL(lotsfree);
++
++/* Unused always 0 in this implementation */
++pgcnt_t needfree = 0;
++EXPORT_SYMBOL(needfree);
++
++pgcnt_t swapfs_minfree = 0;
++EXPORT_SYMBOL(swapfs_minfree);
++
++pgcnt_t swapfs_reserve = 0;
++EXPORT_SYMBOL(swapfs_reserve);
++
++vmem_t *heap_arena = NULL;
++EXPORT_SYMBOL(heap_arena);
++
++vmem_t *zio_alloc_arena = NULL;
++EXPORT_SYMBOL(zio_alloc_arena);
++
++vmem_t *zio_arena = NULL;
++EXPORT_SYMBOL(zio_arena);
++
++#ifndef HAVE_GET_VMALLOC_INFO
++get_vmalloc_info_t get_vmalloc_info_fn = SYMBOL_POISON;
++EXPORT_SYMBOL(get_vmalloc_info_fn);
++#endif /* HAVE_GET_VMALLOC_INFO */
++
++#ifdef HAVE_PGDAT_HELPERS
++# ifndef HAVE_FIRST_ONLINE_PGDAT
++first_online_pgdat_t first_online_pgdat_fn = SYMBOL_POISON;
++EXPORT_SYMBOL(first_online_pgdat_fn);
++# endif /* HAVE_FIRST_ONLINE_PGDAT */
++
++# ifndef HAVE_NEXT_ONLINE_PGDAT
++next_online_pgdat_t next_online_pgdat_fn = SYMBOL_POISON;
++EXPORT_SYMBOL(next_online_pgdat_fn);
++# endif /* HAVE_NEXT_ONLINE_PGDAT */
++
++# ifndef HAVE_NEXT_ZONE
++next_zone_t next_zone_fn = SYMBOL_POISON;
++EXPORT_SYMBOL(next_zone_fn);
++# endif /* HAVE_NEXT_ZONE */
++
++#else /* HAVE_PGDAT_HELPERS */
++
++# ifndef HAVE_PGDAT_LIST
++struct pglist_data *pgdat_list_addr = SYMBOL_POISON;
++EXPORT_SYMBOL(pgdat_list_addr);
++# endif /* HAVE_PGDAT_LIST */
++
++#endif /* HAVE_PGDAT_HELPERS */
++
++#ifdef NEED_GET_ZONE_COUNTS
++# ifndef HAVE_GET_ZONE_COUNTS
++get_zone_counts_t get_zone_counts_fn = SYMBOL_POISON;
++EXPORT_SYMBOL(get_zone_counts_fn);
++# endif /* HAVE_GET_ZONE_COUNTS */
++
++unsigned long
++spl_global_page_state(spl_zone_stat_item_t item)
++{
++	unsigned long active;
++	unsigned long inactive;
++	unsigned long free;
++
++	get_zone_counts(&active, &inactive, &free);
++	switch (item) {
++	case SPL_NR_FREE_PAGES: return free;
++	case SPL_NR_INACTIVE:   return inactive;
++	case SPL_NR_ACTIVE:     return active;
++	default:                ASSERT(0); /* Unsupported */
++	}
++
++	return 0;
++}
++#else
++# ifdef HAVE_GLOBAL_PAGE_STATE
++unsigned long
++spl_global_page_state(spl_zone_stat_item_t item)
++{
++	unsigned long pages = 0;
++
++	switch (item) {
++	case SPL_NR_FREE_PAGES:
++#  ifdef HAVE_ZONE_STAT_ITEM_NR_FREE_PAGES
++		pages += global_page_state(NR_FREE_PAGES);
++#  endif
++		break;
++	case SPL_NR_INACTIVE:
++#  ifdef HAVE_ZONE_STAT_ITEM_NR_INACTIVE
++		pages += global_page_state(NR_INACTIVE);
++#  endif
++#  ifdef HAVE_ZONE_STAT_ITEM_NR_INACTIVE_ANON
++		pages += global_page_state(NR_INACTIVE_ANON);
++#  endif
++#  ifdef HAVE_ZONE_STAT_ITEM_NR_INACTIVE_FILE
++		pages += global_page_state(NR_INACTIVE_FILE);
++#  endif
++		break;
++	case SPL_NR_ACTIVE:
++#  ifdef HAVE_ZONE_STAT_ITEM_NR_ACTIVE
++		pages += global_page_state(NR_ACTIVE);
++#  endif
++#  ifdef HAVE_ZONE_STAT_ITEM_NR_ACTIVE_ANON
++		pages += global_page_state(NR_ACTIVE_ANON);
++#  endif
++#  ifdef HAVE_ZONE_STAT_ITEM_NR_ACTIVE_FILE
++		pages += global_page_state(NR_ACTIVE_FILE);
++#  endif
++		break;
++	default:
++		ASSERT(0); /* Unsupported */
++	}
++
++	return pages;
++}
++# else
++#  error "Both global_page_state() and get_zone_counts() unavailable"
++# endif /* HAVE_GLOBAL_PAGE_STATE */
++#endif /* NEED_GET_ZONE_COUNTS */
++EXPORT_SYMBOL(spl_global_page_state);
++
++#if !defined(HAVE_INVALIDATE_INODES) && !defined(HAVE_INVALIDATE_INODES_CHECK)
++invalidate_inodes_t invalidate_inodes_fn = SYMBOL_POISON;
++EXPORT_SYMBOL(invalidate_inodes_fn);
++#endif /* !HAVE_INVALIDATE_INODES && !HAVE_INVALIDATE_INODES_CHECK */
++
++#ifndef HAVE_SHRINK_DCACHE_MEMORY
++shrink_dcache_memory_t shrink_dcache_memory_fn = SYMBOL_POISON;
++EXPORT_SYMBOL(shrink_dcache_memory_fn);
++#endif /* HAVE_SHRINK_DCACHE_MEMORY */
++
++#ifndef HAVE_SHRINK_ICACHE_MEMORY
++shrink_icache_memory_t shrink_icache_memory_fn = SYMBOL_POISON;
++EXPORT_SYMBOL(shrink_icache_memory_fn);
++#endif /* HAVE_SHRINK_ICACHE_MEMORY */
++
++pgcnt_t
++spl_kmem_availrmem(void)
++{
++	/* The amount of easily available memory */
++	return (spl_global_page_state(SPL_NR_FREE_PAGES) +
++	        spl_global_page_state(SPL_NR_INACTIVE));
++}
++EXPORT_SYMBOL(spl_kmem_availrmem);
++
++size_t
++vmem_size(vmem_t *vmp, int typemask)
++{
++        struct vmalloc_info vmi;
++	size_t size = 0;
++
++	ASSERT(vmp == NULL);
++	ASSERT(typemask & (VMEM_ALLOC | VMEM_FREE));
++
++	get_vmalloc_info(&vmi);
++	if (typemask & VMEM_ALLOC)
++		size += (size_t)vmi.used;
++
++	if (typemask & VMEM_FREE)
++		size += (size_t)(VMALLOC_TOTAL - vmi.used);
++
++	return size;
++}
++EXPORT_SYMBOL(vmem_size);
++
++int
++kmem_debugging(void)
++{
++	return 0;
++}
++EXPORT_SYMBOL(kmem_debugging);
++
++#ifndef HAVE_KVASPRINTF
++/* Simplified asprintf. */
++char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
++{
++	unsigned int len;
++	char *p;
++	va_list aq;
++
++	va_copy(aq, ap);
++	len = vsnprintf(NULL, 0, fmt, aq);
++	va_end(aq);
++
++	p = kmalloc(len+1, gfp);
++	if (!p)
++		return NULL;
++
++	vsnprintf(p, len+1, fmt, ap);
++
++	return p;
++}
++EXPORT_SYMBOL(kvasprintf);
++#endif /* HAVE_KVASPRINTF */
++
++char *
++kmem_vasprintf(const char *fmt, va_list ap)
++{
++	va_list aq;
++	char *ptr;
++
++	do {
++		va_copy(aq, ap);
++		ptr = kvasprintf(GFP_KERNEL, fmt, aq);
++		va_end(aq);
++	} while (ptr == NULL);
++
++	return ptr;
++}
++EXPORT_SYMBOL(kmem_vasprintf);
++
++char *
++kmem_asprintf(const char *fmt, ...)
++{
++	va_list ap;
++	char *ptr;
++
++	do {
++		va_start(ap, fmt);
++		ptr = kvasprintf(GFP_KERNEL, fmt, ap);
++		va_end(ap);
++	} while (ptr == NULL);
++
++	return ptr;
++}
++EXPORT_SYMBOL(kmem_asprintf);
++
++static char *
++__strdup(const char *str, int flags)
++{
++	char *ptr;
++	int n;
++
++	n = strlen(str);
++	ptr = kmalloc_nofail(n + 1, flags);
++	if (ptr)
++		memcpy(ptr, str, n + 1);
++
++	return ptr;
++}
++
++char *
++strdup(const char *str)
++{
++	return __strdup(str, KM_SLEEP);
++}
++EXPORT_SYMBOL(strdup);
++
++void
++strfree(char *str)
++{
++	kfree(str);
++}
++EXPORT_SYMBOL(strfree);
++
++/*
++ * Memory allocation interfaces and debugging for basic kmem_*
++ * and vmem_* style memory allocation.  When DEBUG_KMEM is enabled
++ * the SPL will keep track of the total memory allocated, and
++ * report any memory leaked when the module is unloaded.
++ */
++#ifdef DEBUG_KMEM
++
++/* Shim layer memory accounting */
++# ifdef HAVE_ATOMIC64_T
++atomic64_t kmem_alloc_used = ATOMIC64_INIT(0);
++unsigned long long kmem_alloc_max = 0;
++atomic64_t vmem_alloc_used = ATOMIC64_INIT(0);
++unsigned long long vmem_alloc_max = 0;
++# else  /* HAVE_ATOMIC64_T */
++atomic_t kmem_alloc_used = ATOMIC_INIT(0);
++unsigned long long kmem_alloc_max = 0;
++atomic_t vmem_alloc_used = ATOMIC_INIT(0);
++unsigned long long vmem_alloc_max = 0;
++# endif /* HAVE_ATOMIC64_T */
++
++EXPORT_SYMBOL(kmem_alloc_used);
++EXPORT_SYMBOL(kmem_alloc_max);
++EXPORT_SYMBOL(vmem_alloc_used);
++EXPORT_SYMBOL(vmem_alloc_max);
++
++/* When DEBUG_KMEM_TRACKING is enabled not only will total bytes be tracked
++ * but also the location of every alloc and free.  When the SPL module is
++ * unloaded a list of all leaked addresses and where they were allocated
++ * will be dumped to the console.  Enabling this feature has a significant
++ * impact on performance but it makes finding memory leaks straight forward.
++ *
++ * Not surprisingly with debugging enabled the xmem_locks are very highly
++ * contended particularly on xfree().  If we want to run with this detailed
++ * debugging enabled for anything other than debugging  we need to minimize
++ * the contention by moving to a lock per xmem_table entry model.
++ */
++# ifdef DEBUG_KMEM_TRACKING
++
++#  define KMEM_HASH_BITS          10
++#  define KMEM_TABLE_SIZE         (1 << KMEM_HASH_BITS)
++
++#  define VMEM_HASH_BITS          10
++#  define VMEM_TABLE_SIZE         (1 << VMEM_HASH_BITS)
++
++typedef struct kmem_debug {
++	struct hlist_node kd_hlist;     /* Hash node linkage */
++	struct list_head kd_list;       /* List of all allocations */
++	void *kd_addr;                  /* Allocation pointer */
++	size_t kd_size;                 /* Allocation size */
++	const char *kd_func;            /* Allocation function */
++	int kd_line;                    /* Allocation line */
++} kmem_debug_t;
++
++spinlock_t kmem_lock;
++struct hlist_head kmem_table[KMEM_TABLE_SIZE];
++struct list_head kmem_list;
++
++spinlock_t vmem_lock;
++struct hlist_head vmem_table[VMEM_TABLE_SIZE];
++struct list_head vmem_list;
++
++EXPORT_SYMBOL(kmem_lock);
++EXPORT_SYMBOL(kmem_table);
++EXPORT_SYMBOL(kmem_list);
++
++EXPORT_SYMBOL(vmem_lock);
++EXPORT_SYMBOL(vmem_table);
++EXPORT_SYMBOL(vmem_list);
++
++static kmem_debug_t *
++kmem_del_init(spinlock_t *lock, struct hlist_head *table, int bits, const void *addr)
++{
++	struct hlist_head *head;
++	struct hlist_node *node;
++	struct kmem_debug *p;
++	unsigned long flags;
++	SENTRY;
++
++	spin_lock_irqsave(lock, flags);
++
++	head = &table[hash_ptr(addr, bits)];
++	hlist_for_each_entry_rcu(p, node, head, kd_hlist) {
++		if (p->kd_addr == addr) {
++			hlist_del_init(&p->kd_hlist);
++			list_del_init(&p->kd_list);
++			spin_unlock_irqrestore(lock, flags);
++			return p;
++		}
++	}
++
++	spin_unlock_irqrestore(lock, flags);
++
++	SRETURN(NULL);
++}
++
++void *
++kmem_alloc_track(size_t size, int flags, const char *func, int line,
++    int node_alloc, int node)
++{
++	void *ptr = NULL;
++	kmem_debug_t *dptr;
++	unsigned long irq_flags;
++	SENTRY;
++
++	/* Function may be called with KM_NOSLEEP so failure is possible */
++	dptr = (kmem_debug_t *) kmalloc_nofail(sizeof(kmem_debug_t),
++	    flags & ~__GFP_ZERO);
++
++	if (unlikely(dptr == NULL)) {
++		SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "debug "
++		    "kmem_alloc(%ld, 0x%x) at %s:%d failed (%lld/%llu)\n",
++		    sizeof(kmem_debug_t), flags, func, line,
++		    kmem_alloc_used_read(), kmem_alloc_max);
++	} else {
++		/*
++		 * Marked unlikely because we should never be doing this,
++		 * we tolerate to up 2 pages but a single page is best.
++		 */
++		if (unlikely((size > PAGE_SIZE*2) && !(flags & KM_NODEBUG))) {
++			SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "large "
++			    "kmem_alloc(%llu, 0x%x) at %s:%d (%lld/%llu)\n",
++			    (unsigned long long) size, flags, func, line,
++			    kmem_alloc_used_read(), kmem_alloc_max);
++			spl_debug_dumpstack(NULL);
++		}
++
++		/*
++		 *  We use __strdup() below because the string pointed to by
++		 * __FUNCTION__ might not be available by the time we want
++		 * to print it since the module might have been unloaded.
++		 * This can only fail in the KM_NOSLEEP case.
++		 */
++		dptr->kd_func = __strdup(func, flags & ~__GFP_ZERO);
++		if (unlikely(dptr->kd_func == NULL)) {
++			kfree(dptr);
++			SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING,
++			    "debug __strdup() at %s:%d failed (%lld/%llu)\n",
++			    func, line, kmem_alloc_used_read(), kmem_alloc_max);
++			goto out;
++		}
++
++		/* Use the correct allocator */
++		if (node_alloc) {
++			ASSERT(!(flags & __GFP_ZERO));
++			ptr = kmalloc_node_nofail(size, flags, node);
++		} else if (flags & __GFP_ZERO) {
++			ptr = kzalloc_nofail(size, flags & ~__GFP_ZERO);
++		} else {
++			ptr = kmalloc_nofail(size, flags);
++		}
++
++		if (unlikely(ptr == NULL)) {
++			kfree(dptr->kd_func);
++			kfree(dptr);
++			SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "kmem_alloc"
++			    "(%llu, 0x%x) at %s:%d failed (%lld/%llu)\n",
++			    (unsigned long long) size, flags, func, line,
++			    kmem_alloc_used_read(), kmem_alloc_max);
++			goto out;
++		}
++
++		kmem_alloc_used_add(size);
++		if (unlikely(kmem_alloc_used_read() > kmem_alloc_max))
++			kmem_alloc_max = kmem_alloc_used_read();
++
++		INIT_HLIST_NODE(&dptr->kd_hlist);
++		INIT_LIST_HEAD(&dptr->kd_list);
++
++		dptr->kd_addr = ptr;
++		dptr->kd_size = size;
++		dptr->kd_line = line;
++
++		spin_lock_irqsave(&kmem_lock, irq_flags);
++		hlist_add_head_rcu(&dptr->kd_hlist,
++		    &kmem_table[hash_ptr(ptr, KMEM_HASH_BITS)]);
++		list_add_tail(&dptr->kd_list, &kmem_list);
++		spin_unlock_irqrestore(&kmem_lock, irq_flags);
++
++		SDEBUG_LIMIT(SD_INFO,
++		    "kmem_alloc(%llu, 0x%x) at %s:%d = %p (%lld/%llu)\n",
++		    (unsigned long long) size, flags, func, line, ptr,
++		    kmem_alloc_used_read(), kmem_alloc_max);
++	}
++out:
++	SRETURN(ptr);
++}
++EXPORT_SYMBOL(kmem_alloc_track);
++
++void
++kmem_free_track(const void *ptr, size_t size)
++{
++	kmem_debug_t *dptr;
++	SENTRY;
++
++	ASSERTF(ptr || size > 0, "ptr: %p, size: %llu", ptr,
++	    (unsigned long long) size);
++
++	dptr = kmem_del_init(&kmem_lock, kmem_table, KMEM_HASH_BITS, ptr);
++
++	/* Must exist in hash due to kmem_alloc() */
++	ASSERT(dptr);
++
++	/* Size must match */
++	ASSERTF(dptr->kd_size == size, "kd_size (%llu) != size (%llu), "
++	    "kd_func = %s, kd_line = %d\n", (unsigned long long) dptr->kd_size,
++	    (unsigned long long) size, dptr->kd_func, dptr->kd_line);
++
++	kmem_alloc_used_sub(size);
++	SDEBUG_LIMIT(SD_INFO, "kmem_free(%p, %llu) (%lld/%llu)\n", ptr,
++	    (unsigned long long) size, kmem_alloc_used_read(),
++	    kmem_alloc_max);
++
++	kfree(dptr->kd_func);
++
++	memset(dptr, 0x5a, sizeof(kmem_debug_t));
++	kfree(dptr);
++
++	memset(ptr, 0x5a, size);
++	kfree(ptr);
++
++	SEXIT;
++}
++EXPORT_SYMBOL(kmem_free_track);
++
++void *
++vmem_alloc_track(size_t size, int flags, const char *func, int line)
++{
++	void *ptr = NULL;
++	kmem_debug_t *dptr;
++	unsigned long irq_flags;
++	SENTRY;
++
++	ASSERT(flags & KM_SLEEP);
++
++	/* Function may be called with KM_NOSLEEP so failure is possible */
++	dptr = (kmem_debug_t *) kmalloc_nofail(sizeof(kmem_debug_t),
++	    flags & ~__GFP_ZERO);
++	if (unlikely(dptr == NULL)) {
++		SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "debug "
++		    "vmem_alloc(%ld, 0x%x) at %s:%d failed (%lld/%llu)\n",
++		    sizeof(kmem_debug_t), flags, func, line,
++		    vmem_alloc_used_read(), vmem_alloc_max);
++	} else {
++		/*
++		 * We use __strdup() below because the string pointed to by
++		 * __FUNCTION__ might not be available by the time we want
++		 * to print it, since the module might have been unloaded.
++		 * This can never fail because we have already asserted
++		 * that flags is KM_SLEEP.
++		 */
++		dptr->kd_func = __strdup(func, flags & ~__GFP_ZERO);
++		if (unlikely(dptr->kd_func == NULL)) {
++			kfree(dptr);
++			SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING,
++			    "debug __strdup() at %s:%d failed (%lld/%llu)\n",
++			    func, line, vmem_alloc_used_read(), vmem_alloc_max);
++			goto out;
++		}
++
++		/* Use the correct allocator */
++		if (flags & __GFP_ZERO) {
++			ptr = vzalloc_nofail(size, flags & ~__GFP_ZERO);
++		} else {
++			ptr = vmalloc_nofail(size, flags);
++		}
++
++		if (unlikely(ptr == NULL)) {
++			kfree(dptr->kd_func);
++			kfree(dptr);
++			SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING, "vmem_alloc"
++			    "(%llu, 0x%x) at %s:%d failed (%lld/%llu)\n",
++			    (unsigned long long) size, flags, func, line,
++			    vmem_alloc_used_read(), vmem_alloc_max);
++			goto out;
++		}
++
++		vmem_alloc_used_add(size);
++		if (unlikely(vmem_alloc_used_read() > vmem_alloc_max))
++			vmem_alloc_max = vmem_alloc_used_read();
++
++		INIT_HLIST_NODE(&dptr->kd_hlist);
++		INIT_LIST_HEAD(&dptr->kd_list);
++
++		dptr->kd_addr = ptr;
++		dptr->kd_size = size;
++		dptr->kd_line = line;
++
++		spin_lock_irqsave(&vmem_lock, irq_flags);
++		hlist_add_head_rcu(&dptr->kd_hlist,
++		    &vmem_table[hash_ptr(ptr, VMEM_HASH_BITS)]);
++		list_add_tail(&dptr->kd_list, &vmem_list);
++		spin_unlock_irqrestore(&vmem_lock, irq_flags);
++
++		SDEBUG_LIMIT(SD_INFO,
++		    "vmem_alloc(%llu, 0x%x) at %s:%d = %p (%lld/%llu)\n",
++		    (unsigned long long) size, flags, func, line,
++		    ptr, vmem_alloc_used_read(), vmem_alloc_max);
++	}
++out:
++	SRETURN(ptr);
++}
++EXPORT_SYMBOL(vmem_alloc_track);
++
++void
++vmem_free_track(const void *ptr, size_t size)
++{
++	kmem_debug_t *dptr;
++	SENTRY;
++
++	ASSERTF(ptr || size > 0, "ptr: %p, size: %llu", ptr,
++	    (unsigned long long) size);
++
++	dptr = kmem_del_init(&vmem_lock, vmem_table, VMEM_HASH_BITS, ptr);
++
++	/* Must exist in hash due to vmem_alloc() */
++	ASSERT(dptr);
++
++	/* Size must match */
++	ASSERTF(dptr->kd_size == size, "kd_size (%llu) != size (%llu), "
++	    "kd_func = %s, kd_line = %d\n", (unsigned long long) dptr->kd_size,
++	    (unsigned long long) size, dptr->kd_func, dptr->kd_line);
++
++	vmem_alloc_used_sub(size);
++	SDEBUG_LIMIT(SD_INFO, "vmem_free(%p, %llu) (%lld/%llu)\n", ptr,
++	    (unsigned long long) size, vmem_alloc_used_read(),
++	    vmem_alloc_max);
++
++	kfree(dptr->kd_func);
++
++	memset(dptr, 0x5a, sizeof(kmem_debug_t));
++	kfree(dptr);
++
++	memset(ptr, 0x5a, size);
++	vfree(ptr);
++
++	SEXIT;
++}
++EXPORT_SYMBOL(vmem_free_track);
++
++# else /* DEBUG_KMEM_TRACKING */
++
++void *
++kmem_alloc_debug(size_t size, int flags, const char *func, int line,
++    int node_alloc, int node)
++{
++	void *ptr;
++	SENTRY;
++
++	/*
++	 * Marked unlikely because we should never be doing this,
++	 * we tolerate to up 2 pages but a single page is best.
++	 */
++	if (unlikely((size > PAGE_SIZE * 2) && !(flags & KM_NODEBUG))) {
++		SDEBUG(SD_CONSOLE | SD_WARNING,
++		    "large kmem_alloc(%llu, 0x%x) at %s:%d (%lld/%llu)\n",
++		    (unsigned long long) size, flags, func, line,
++		    kmem_alloc_used_read(), kmem_alloc_max);
++		dump_stack();
++	}
++
++	/* Use the correct allocator */
++	if (node_alloc) {
++		ASSERT(!(flags & __GFP_ZERO));
++		ptr = kmalloc_node_nofail(size, flags, node);
++	} else if (flags & __GFP_ZERO) {
++		ptr = kzalloc_nofail(size, flags & (~__GFP_ZERO));
++	} else {
++		ptr = kmalloc_nofail(size, flags);
++	}
++
++	if (unlikely(ptr == NULL)) {
++		SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING,
++		    "kmem_alloc(%llu, 0x%x) at %s:%d failed (%lld/%llu)\n",
++		    (unsigned long long) size, flags, func, line,
++		    kmem_alloc_used_read(), kmem_alloc_max);
++	} else {
++		kmem_alloc_used_add(size);
++		if (unlikely(kmem_alloc_used_read() > kmem_alloc_max))
++			kmem_alloc_max = kmem_alloc_used_read();
++
++		SDEBUG_LIMIT(SD_INFO,
++		    "kmem_alloc(%llu, 0x%x) at %s:%d = %p (%lld/%llu)\n",
++		    (unsigned long long) size, flags, func, line, ptr,
++		    kmem_alloc_used_read(), kmem_alloc_max);
++	}
++
++	SRETURN(ptr);
++}
++EXPORT_SYMBOL(kmem_alloc_debug);
++
++void
++kmem_free_debug(const void *ptr, size_t size)
++{
++	SENTRY;
++
++	ASSERTF(ptr || size > 0, "ptr: %p, size: %llu", ptr,
++	    (unsigned long long) size);
++
++	kmem_alloc_used_sub(size);
++	SDEBUG_LIMIT(SD_INFO, "kmem_free(%p, %llu) (%lld/%llu)\n", ptr,
++	    (unsigned long long) size, kmem_alloc_used_read(),
++	    kmem_alloc_max);
++	kfree(ptr);
++
++	SEXIT;
++}
++EXPORT_SYMBOL(kmem_free_debug);
++
++void *
++vmem_alloc_debug(size_t size, int flags, const char *func, int line)
++{
++	void *ptr;
++	SENTRY;
++
++	ASSERT(flags & KM_SLEEP);
++
++	/* Use the correct allocator */
++	if (flags & __GFP_ZERO) {
++		ptr = vzalloc_nofail(size, flags & (~__GFP_ZERO));
++	} else {
++		ptr = vmalloc_nofail(size, flags);
++	}
++
++	if (unlikely(ptr == NULL)) {
++		SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING,
++		    "vmem_alloc(%llu, 0x%x) at %s:%d failed (%lld/%llu)\n",
++		    (unsigned long long) size, flags, func, line,
++		    vmem_alloc_used_read(), vmem_alloc_max);
++	} else {
++		vmem_alloc_used_add(size);
++		if (unlikely(vmem_alloc_used_read() > vmem_alloc_max))
++			vmem_alloc_max = vmem_alloc_used_read();
++
++		SDEBUG_LIMIT(SD_INFO, "vmem_alloc(%llu, 0x%x) = %p "
++		    "(%lld/%llu)\n", (unsigned long long) size, flags, ptr,
++		    vmem_alloc_used_read(), vmem_alloc_max);
++	}
++
++	SRETURN(ptr);
++}
++EXPORT_SYMBOL(vmem_alloc_debug);
++
++void
++vmem_free_debug(const void *ptr, size_t size)
++{
++	SENTRY;
++
++	ASSERTF(ptr || size > 0, "ptr: %p, size: %llu", ptr,
++	    (unsigned long long) size);
++
++	vmem_alloc_used_sub(size);
++	SDEBUG_LIMIT(SD_INFO, "vmem_free(%p, %llu) (%lld/%llu)\n", ptr,
++	    (unsigned long long) size, vmem_alloc_used_read(),
++	    vmem_alloc_max);
++	vfree(ptr);
++
++	SEXIT;
++}
++EXPORT_SYMBOL(vmem_free_debug);
++
++# endif /* DEBUG_KMEM_TRACKING */
++#endif /* DEBUG_KMEM */
++
++/*
++ * Slab allocation interfaces
++ *
++ * While the Linux slab implementation was inspired by the Solaris
++ * implementation I cannot use it to emulate the Solaris APIs.  I
++ * require two features which are not provided by the Linux slab.
++ *
++ * 1) Constructors AND destructors.  Recent versions of the Linux
++ *    kernel have removed support for destructors.  This is a deal
++ *    breaker for the SPL which contains particularly expensive
++ *    initializers for mutex's, condition variables, etc.  We also
++ *    require a minimal level of cleanup for these data types unlike
++ *    many Linux data type which do need to be explicitly destroyed.
++ *
++ * 2) Virtual address space backed slab.  Callers of the Solaris slab
++ *    expect it to work well for both small are very large allocations.
++ *    Because of memory fragmentation the Linux slab which is backed
++ *    by kmalloc'ed memory performs very badly when confronted with
++ *    large numbers of large allocations.  Basing the slab on the
++ *    virtual address space removes the need for contiguous pages
++ *    and greatly improve performance for large allocations.
++ *
++ * For these reasons, the SPL has its own slab implementation with
++ * the needed features.  It is not as highly optimized as either the
++ * Solaris or Linux slabs, but it should get me most of what is
++ * needed until it can be optimized or obsoleted by another approach.
++ *
++ * One serious concern I do have about this method is the relatively
++ * small virtual address space on 32bit arches.  This will seriously
++ * constrain the size of the slab caches and their performance.
++ *
++ * XXX: Improve the partial slab list by carefully maintaining a
++ *      strict ordering of fullest to emptiest slabs based on
++ *      the slab reference count.  This guarantees the when freeing
++ *      slabs back to the system we need only linearly traverse the
++ *      last N slabs in the list to discover all the freeable slabs.
++ *
++ * XXX: NUMA awareness for optionally allocating memory close to a
++ *      particular core.  This can be advantageous if you know the slab
++ *      object will be short lived and primarily accessed from one core.
++ *
++ * XXX: Slab coloring may also yield performance improvements and would
++ *      be desirable to implement.
++ */
++
++struct list_head spl_kmem_cache_list;   /* List of caches */
++struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
++
++static int spl_cache_flush(spl_kmem_cache_t *skc,
++                           spl_kmem_magazine_t *skm, int flush);
++
++SPL_SHRINKER_CALLBACK_FWD_DECLARE(spl_kmem_cache_generic_shrinker);
++SPL_SHRINKER_DECLARE(spl_kmem_cache_shrinker,
++	spl_kmem_cache_generic_shrinker, KMC_DEFAULT_SEEKS);
++
++static void *
++kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
++{
++	void *ptr;
++
++	ASSERT(ISP2(size));
++
++	if (skc->skc_flags & KMC_KMEM)
++		ptr = (void *)__get_free_pages(flags, get_order(size));
++	else
++		ptr = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
++
++	/* Resulting allocated memory will be page aligned */
++	ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
++
++	return ptr;
++}
++
++static void
++kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
++{
++	ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
++	ASSERT(ISP2(size));
++
++	/*
++	 * The Linux direct reclaim path uses this out of band value to
++	 * determine if forward progress is being made.  Normally this is
++	 * incremented by kmem_freepages() which is part of the various
++	 * Linux slab implementations.  However, since we are using none
++	 * of that infrastructure we are responsible for incrementing it.
++	 */
++	if (current->reclaim_state)
++		current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
++
++	if (skc->skc_flags & KMC_KMEM)
++		free_pages((unsigned long)ptr, get_order(size));
++	else
++		vfree(ptr);
++}
++
++/*
++ * Required space for each aligned sks.
++ */
++static inline uint32_t
++spl_sks_size(spl_kmem_cache_t *skc)
++{
++	return P2ROUNDUP_TYPED(sizeof(spl_kmem_slab_t),
++	       skc->skc_obj_align, uint32_t);
++}
++
++/*
++ * Required space for each aligned object.
++ */
++static inline uint32_t
++spl_obj_size(spl_kmem_cache_t *skc)
++{
++	uint32_t align = skc->skc_obj_align;
++
++	return P2ROUNDUP_TYPED(skc->skc_obj_size, align, uint32_t) +
++	       P2ROUNDUP_TYPED(sizeof(spl_kmem_obj_t), align, uint32_t);
++}
++
++/*
++ * Lookup the spl_kmem_object_t for an object given that object.
++ */
++static inline spl_kmem_obj_t *
++spl_sko_from_obj(spl_kmem_cache_t *skc, void *obj)
++{
++	return obj + P2ROUNDUP_TYPED(skc->skc_obj_size,
++	       skc->skc_obj_align, uint32_t);
++}
++
++/*
++ * Required space for each offslab object taking in to account alignment
++ * restrictions and the power-of-two requirement of kv_alloc().
++ */
++static inline uint32_t
++spl_offslab_size(spl_kmem_cache_t *skc)
++{
++	return 1UL << (highbit(spl_obj_size(skc)) + 1);
++}
++
++/*
++ * It's important that we pack the spl_kmem_obj_t structure and the
++ * actual objects in to one large address space to minimize the number
++ * of calls to the allocator.  It is far better to do a few large
++ * allocations and then subdivide it ourselves.  Now which allocator
++ * we use requires balancing a few trade offs.
++ *
++ * For small objects we use kmem_alloc() because as long as you are
++ * only requesting a small number of pages (ideally just one) its cheap.
++ * However, when you start requesting multiple pages with kmem_alloc()
++ * it gets increasingly expensive since it requires contiguous pages.
++ * For this reason we shift to vmem_alloc() for slabs of large objects
++ * which removes the need for contiguous pages.  We do not use
++ * vmem_alloc() in all cases because there is significant locking
++ * overhead in __get_vm_area_node().  This function takes a single
++ * global lock when acquiring an available virtual address range which
++ * serializes all vmem_alloc()'s for all slab caches.  Using slightly
++ * different allocation functions for small and large objects should
++ * give us the best of both worlds.
++ *
++ * KMC_ONSLAB                       KMC_OFFSLAB
++ *
++ * +------------------------+       +-----------------+
++ * | spl_kmem_slab_t --+-+  |       | spl_kmem_slab_t |---+-+
++ * | skc_obj_size    <-+ |  |       +-----------------+   | |
++ * | spl_kmem_obj_t      |  |                             | |
++ * | skc_obj_size    <---+  |       +-----------------+   | |
++ * | spl_kmem_obj_t      |  |       | skc_obj_size    | <-+ |
++ * | ...                 v  |       | spl_kmem_obj_t  |     |
++ * +------------------------+       +-----------------+     v
++ */
++static spl_kmem_slab_t *
++spl_slab_alloc(spl_kmem_cache_t *skc, int flags)
++{
++	spl_kmem_slab_t *sks;
++	spl_kmem_obj_t *sko, *n;
++	void *base, *obj;
++	uint32_t obj_size, offslab_size = 0;
++	int i,  rc = 0;
++
++	base = kv_alloc(skc, skc->skc_slab_size, flags);
++	if (base == NULL)
++		SRETURN(NULL);
++
++	sks = (spl_kmem_slab_t *)base;
++	sks->sks_magic = SKS_MAGIC;
++	sks->sks_objs = skc->skc_slab_objs;
++	sks->sks_age = jiffies;
++	sks->sks_cache = skc;
++	INIT_LIST_HEAD(&sks->sks_list);
++	INIT_LIST_HEAD(&sks->sks_free_list);
++	sks->sks_ref = 0;
++	obj_size = spl_obj_size(skc);
++
++	if (skc->skc_flags & KMC_OFFSLAB)
++		offslab_size = spl_offslab_size(skc);
++
++	for (i = 0; i < sks->sks_objs; i++) {
++		if (skc->skc_flags & KMC_OFFSLAB) {
++			obj = kv_alloc(skc, offslab_size, flags);
++			if (!obj)
++				SGOTO(out, rc = -ENOMEM);
++		} else {
++			obj = base + spl_sks_size(skc) + (i * obj_size);
++		}
++
++		ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
++		sko = spl_sko_from_obj(skc, obj);
++		sko->sko_addr = obj;
++		sko->sko_magic = SKO_MAGIC;
++		sko->sko_slab = sks;
++		INIT_LIST_HEAD(&sko->sko_list);
++		list_add_tail(&sko->sko_list, &sks->sks_free_list);
++	}
++
++	list_for_each_entry(sko, &sks->sks_free_list, sko_list)
++		if (skc->skc_ctor)
++			skc->skc_ctor(sko->sko_addr, skc->skc_private, flags);
++out:
++	if (rc) {
++		if (skc->skc_flags & KMC_OFFSLAB)
++			list_for_each_entry_safe(sko, n, &sks->sks_free_list,
++						 sko_list)
++				kv_free(skc, sko->sko_addr, offslab_size);
++
++		kv_free(skc, base, skc->skc_slab_size);
++		sks = NULL;
++	}
++
++	SRETURN(sks);
++}
++
++/*
++ * Remove a slab from complete or partial list, it must be called with
++ * the 'skc->skc_lock' held but the actual free must be performed
++ * outside the lock to prevent deadlocking on vmem addresses.
++ */
++static void
++spl_slab_free(spl_kmem_slab_t *sks,
++	      struct list_head *sks_list, struct list_head *sko_list)
++{
++	spl_kmem_cache_t *skc;
++	SENTRY;
++
++	ASSERT(sks->sks_magic == SKS_MAGIC);
++	ASSERT(sks->sks_ref == 0);
++
++	skc = sks->sks_cache;
++	ASSERT(skc->skc_magic == SKC_MAGIC);
++	ASSERT(spin_is_locked(&skc->skc_lock));
++
++	/*
++	 * Update slab/objects counters in the cache, then remove the
++	 * slab from the skc->skc_partial_list.  Finally add the slab
++	 * and all its objects in to the private work lists where the
++	 * destructors will be called and the memory freed to the system.
++	 */
++	skc->skc_obj_total -= sks->sks_objs;
++	skc->skc_slab_total--;
++	list_del(&sks->sks_list);
++	list_add(&sks->sks_list, sks_list);
++	list_splice_init(&sks->sks_free_list, sko_list);
++
++	SEXIT;
++}
++
++/*
++ * Traverses all the partial slabs attached to a cache and free those
++ * which which are currently empty, and have not been touched for
++ * skc_delay seconds to  avoid thrashing.  The count argument is
++ * passed to optionally cap the number of slabs reclaimed, a count
++ * of zero means try and reclaim everything.  When flag is set we
++ * always free an available slab regardless of age.
++ */
++static void
++spl_slab_reclaim(spl_kmem_cache_t *skc, int count, int flag)
++{
++	spl_kmem_slab_t *sks, *m;
++	spl_kmem_obj_t *sko, *n;
++	LIST_HEAD(sks_list);
++	LIST_HEAD(sko_list);
++	uint32_t size = 0;
++	int i = 0;
++	SENTRY;
++
++	/*
++	 * Move empty slabs and objects which have not been touched in
++	 * skc_delay seconds on to private lists to be freed outside
++	 * the spin lock.  This delay time is important to avoid thrashing
++	 * however when flag is set the delay will not be used.
++	 */
++	spin_lock(&skc->skc_lock);
++	list_for_each_entry_safe_reverse(sks,m,&skc->skc_partial_list,sks_list){
++		/*
++		 * All empty slabs are at the end of skc->skc_partial_list,
++		 * therefore once a non-empty slab is found we can stop
++		 * scanning.  Additionally, stop when reaching the target
++		 * reclaim 'count' if a non-zero threshold is given.
++		 */
++		if ((sks->sks_ref > 0) || (count && i >= count))
++			break;
++
++		if (time_after(jiffies,sks->sks_age+skc->skc_delay*HZ)||flag) {
++			spl_slab_free(sks, &sks_list, &sko_list);
++			i++;
++		}
++	}
++	spin_unlock(&skc->skc_lock);
++
++	/*
++	 * The following two loops ensure all the object destructors are
++	 * run, any offslab objects are freed, and the slabs themselves
++	 * are freed.  This is all done outside the skc->skc_lock since
++	 * this allows the destructor to sleep, and allows us to perform
++	 * a conditional reschedule when a freeing a large number of
++	 * objects and slabs back to the system.
++	 */
++	if (skc->skc_flags & KMC_OFFSLAB)
++		size = spl_offslab_size(skc);
++
++	list_for_each_entry_safe(sko, n, &sko_list, sko_list) {
++		ASSERT(sko->sko_magic == SKO_MAGIC);
++
++		if (skc->skc_dtor)
++			skc->skc_dtor(sko->sko_addr, skc->skc_private);
++
++		if (skc->skc_flags & KMC_OFFSLAB)
++			kv_free(skc, sko->sko_addr, size);
++
++		cond_resched();
++	}
++
++	list_for_each_entry_safe(sks, m, &sks_list, sks_list) {
++		ASSERT(sks->sks_magic == SKS_MAGIC);
++		kv_free(skc, sks, skc->skc_slab_size);
++		cond_resched();
++	}
++
++	SEXIT;
++}
++
++static spl_kmem_emergency_t *
++spl_emergency_search(struct rb_root *root, void *obj)
++{
++	struct rb_node *node = root->rb_node;
++	spl_kmem_emergency_t *ske;
++	unsigned long address = (unsigned long)obj;
++
++	while (node) {
++		ske = container_of(node, spl_kmem_emergency_t, ske_node);
++
++		if (address < (unsigned long)ske->ske_obj)
++			node = node->rb_left;
++		else if (address > (unsigned long)ske->ske_obj)
++			node = node->rb_right;
++		else
++			return ske;
++	}
++
++	return NULL;
++}
++
++static int
++spl_emergency_insert(struct rb_root *root, spl_kmem_emergency_t *ske)
++{
++	struct rb_node **new = &(root->rb_node), *parent = NULL;
++	spl_kmem_emergency_t *ske_tmp;
++	unsigned long address = (unsigned long)ske->ske_obj;
++
++	while (*new) {
++		ske_tmp = container_of(*new, spl_kmem_emergency_t, ske_node);
++
++		parent = *new;
++		if (address < (unsigned long)ske_tmp->ske_obj)
++			new = &((*new)->rb_left);
++		else if (address > (unsigned long)ske_tmp->ske_obj)
++			new = &((*new)->rb_right);
++		else
++			return 0;
++	}
++
++	rb_link_node(&ske->ske_node, parent, new);
++	rb_insert_color(&ske->ske_node, root);
++
++	return 1;
++}
++
++/*
++ * Allocate a single emergency object and track it in a red black tree.
++ */
++static int
++spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj)
++{
++	spl_kmem_emergency_t *ske;
++	int empty;
++	SENTRY;
++
++	/* Last chance use a partial slab if one now exists */
++	spin_lock(&skc->skc_lock);
++	empty = list_empty(&skc->skc_partial_list);
++	spin_unlock(&skc->skc_lock);
++	if (!empty)
++		SRETURN(-EEXIST);
++
++	ske = kmalloc(sizeof(*ske), flags);
++	if (ske == NULL)
++		SRETURN(-ENOMEM);
++
++	ske->ske_obj = kmalloc(skc->skc_obj_size, flags);
++	if (ske->ske_obj == NULL) {
++		kfree(ske);
++		SRETURN(-ENOMEM);
++	}
++
++	spin_lock(&skc->skc_lock);
++	empty = spl_emergency_insert(&skc->skc_emergency_tree, ske);
++	if (likely(empty)) {
++		skc->skc_obj_total++;
++		skc->skc_obj_emergency++;
++		if (skc->skc_obj_emergency > skc->skc_obj_emergency_max)
++			skc->skc_obj_emergency_max = skc->skc_obj_emergency;
++	}
++	spin_unlock(&skc->skc_lock);
++
++	if (unlikely(!empty)) {
++		kfree(ske->ske_obj);
++		kfree(ske);
++		SRETURN(-EINVAL);
++	}
++
++	if (skc->skc_ctor)
++		skc->skc_ctor(ske->ske_obj, skc->skc_private, flags);
++
++	*obj = ske->ske_obj;
++
++	SRETURN(0);
++}
++
++/*
++ * Locate the passed object in the red black tree and free it.
++ */
++static int
++spl_emergency_free(spl_kmem_cache_t *skc, void *obj)
++{
++	spl_kmem_emergency_t *ske;
++	SENTRY;
++
++	spin_lock(&skc->skc_lock);
++	ske = spl_emergency_search(&skc->skc_emergency_tree, obj);
++	if (likely(ske)) {
++		rb_erase(&ske->ske_node, &skc->skc_emergency_tree);
++		skc->skc_obj_emergency--;
++		skc->skc_obj_total--;
++	}
++	spin_unlock(&skc->skc_lock);
++
++	if (unlikely(ske == NULL))
++		SRETURN(-ENOENT);
++
++	if (skc->skc_dtor)
++		skc->skc_dtor(ske->ske_obj, skc->skc_private);
++
++	kfree(ske->ske_obj);
++	kfree(ske);
++
++	SRETURN(0);
++}
++
++/*
++ * Called regularly on all caches to age objects out of the magazines
++ * which have not been access in skc->skc_delay seconds.  This prevents
++ * idle magazines from holding memory which might be better used by
++ * other caches or parts of the system.  The delay is present to
++ * prevent thrashing the magazine.
++ */
++static void
++spl_magazine_age(void *data)
++{
++	spl_kmem_magazine_t *skm =
++		spl_get_work_data(data, spl_kmem_magazine_t, skm_work.work);
++	spl_kmem_cache_t *skc = skm->skm_cache;
++
++	ASSERT(skm->skm_magic == SKM_MAGIC);
++	ASSERT(skc->skc_magic == SKC_MAGIC);
++	ASSERT(skc->skc_mag[skm->skm_cpu] == skm);
++
++	if (skm->skm_avail > 0 &&
++	    time_after(jiffies, skm->skm_age + skc->skc_delay * HZ))
++		(void)spl_cache_flush(skc, skm, skm->skm_refill);
++
++	if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags))
++		schedule_delayed_work_on(skm->skm_cpu, &skm->skm_work,
++					 skc->skc_delay / 3 * HZ);
++}
++
++/*
++ * Called regularly to keep a downward pressure on the size of idle
++ * magazines and to release free slabs from the cache.  This function
++ * never calls the registered reclaim function, that only occurs
++ * under memory pressure or with a direct call to spl_kmem_reap().
++ */
++static void
++spl_cache_age(void *data)
++{
++	spl_kmem_cache_t *skc =
++		spl_get_work_data(data, spl_kmem_cache_t, skc_work.work);
++
++	ASSERT(skc->skc_magic == SKC_MAGIC);
++	spl_slab_reclaim(skc, skc->skc_reap, 0);
++
++	if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags))
++		schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ);
++}
++
++/*
++ * Size a slab based on the size of each aligned object plus spl_kmem_obj_t.
++ * When on-slab we want to target SPL_KMEM_CACHE_OBJ_PER_SLAB.  However,
++ * for very small objects we may end up with more than this so as not
++ * to waste space in the minimal allocation of a single page.  Also for
++ * very large objects we may use as few as SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN,
++ * lower than this and we will fail.
++ */
++static int
++spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size)
++{
++	uint32_t sks_size, obj_size, max_size;
++
++	if (skc->skc_flags & KMC_OFFSLAB) {
++		*objs = SPL_KMEM_CACHE_OBJ_PER_SLAB;
++		*size = sizeof(spl_kmem_slab_t);
++	} else {
++		sks_size = spl_sks_size(skc);
++		obj_size = spl_obj_size(skc);
++
++		if (skc->skc_flags & KMC_KMEM)
++			max_size = ((uint32_t)1 << (MAX_ORDER-3)) * PAGE_SIZE;
++		else
++			max_size = (32 * 1024 * 1024);
++
++		/* Power of two sized slab */
++		for (*size = PAGE_SIZE; *size <= max_size; *size *= 2) {
++			*objs = (*size - sks_size) / obj_size;
++			if (*objs >= SPL_KMEM_CACHE_OBJ_PER_SLAB)
++				SRETURN(0);
++		}
++
++		/*
++		 * Unable to satisfy target objects per slab, fall back to
++		 * allocating a maximally sized slab and assuming it can
++		 * contain the minimum objects count use it.  If not fail.
++		 */
++		*size = max_size;
++		*objs = (*size - sks_size) / obj_size;
++		if (*objs >= SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN)
++			SRETURN(0);
++	}
++
++	SRETURN(-ENOSPC);
++}
++
++/*
++ * Make a guess at reasonable per-cpu magazine size based on the size of
++ * each object and the cost of caching N of them in each magazine.  Long
++ * term this should really adapt based on an observed usage heuristic.
++ */
++static int
++spl_magazine_size(spl_kmem_cache_t *skc)
++{
++	uint32_t obj_size = spl_obj_size(skc);
++	int size;
++	SENTRY;
++
++	/* Per-magazine sizes below assume a 4Kib page size */
++	if (obj_size > (PAGE_SIZE * 256))
++		size = 4;  /* Minimum 4Mib per-magazine */
++	else if (obj_size > (PAGE_SIZE * 32))
++		size = 16; /* Minimum 2Mib per-magazine */
++	else if (obj_size > (PAGE_SIZE))
++		size = 64; /* Minimum 256Kib per-magazine */
++	else if (obj_size > (PAGE_SIZE / 4))
++		size = 128; /* Minimum 128Kib per-magazine */
++	else
++		size = 256;
++
++	SRETURN(size);
++}
++
++/*
++ * Allocate a per-cpu magazine to associate with a specific core.
++ */
++static spl_kmem_magazine_t *
++spl_magazine_alloc(spl_kmem_cache_t *skc, int cpu)
++{
++	spl_kmem_magazine_t *skm;
++	int size = sizeof(spl_kmem_magazine_t) +
++	           sizeof(void *) * skc->skc_mag_size;
++	SENTRY;
++
++	skm = kmem_alloc_node(size, KM_SLEEP, cpu_to_node(cpu));
++	if (skm) {
++		skm->skm_magic = SKM_MAGIC;
++		skm->skm_avail = 0;
++		skm->skm_size = skc->skc_mag_size;
++		skm->skm_refill = skc->skc_mag_refill;
++		skm->skm_cache = skc;
++		spl_init_delayed_work(&skm->skm_work, spl_magazine_age, skm);
++		skm->skm_age = jiffies;
++		skm->skm_cpu = cpu;
++	}
++
++	SRETURN(skm);
++}
++
++/*
++ * Free a per-cpu magazine associated with a specific core.
++ */
++static void
++spl_magazine_free(spl_kmem_magazine_t *skm)
++{
++	int size = sizeof(spl_kmem_magazine_t) +
++	           sizeof(void *) * skm->skm_size;
++
++	SENTRY;
++	ASSERT(skm->skm_magic == SKM_MAGIC);
++	ASSERT(skm->skm_avail == 0);
++
++	kmem_free(skm, size);
++	SEXIT;
++}
++
++/*
++ * Create all pre-cpu magazines of reasonable sizes.
++ */
++static int
++spl_magazine_create(spl_kmem_cache_t *skc)
++{
++	int i;
++	SENTRY;
++
++	skc->skc_mag_size = spl_magazine_size(skc);
++	skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
++
++	for_each_online_cpu(i) {
++		skc->skc_mag[i] = spl_magazine_alloc(skc, i);
++		if (!skc->skc_mag[i]) {
++			for (i--; i >= 0; i--)
++				spl_magazine_free(skc->skc_mag[i]);
++
++			SRETURN(-ENOMEM);
++		}
++	}
++
++	/* Only after everything is allocated schedule magazine work */
++	for_each_online_cpu(i)
++		schedule_delayed_work_on(i, &skc->skc_mag[i]->skm_work,
++				         skc->skc_delay / 3 * HZ);
++
++	SRETURN(0);
++}
++
++/*
++ * Destroy all pre-cpu magazines.
++ */
++static void
++spl_magazine_destroy(spl_kmem_cache_t *skc)
++{
++	spl_kmem_magazine_t *skm;
++	int i;
++	SENTRY;
++
++        for_each_online_cpu(i) {
++		skm = skc->skc_mag[i];
++		(void)spl_cache_flush(skc, skm, skm->skm_avail);
++		spl_magazine_free(skm);
++        }
++
++	SEXIT;
++}
++
++/*
++ * Create a object cache based on the following arguments:
++ * name		cache name
++ * size		cache object size
++ * align	cache object alignment
++ * ctor		cache object constructor
++ * dtor		cache object destructor
++ * reclaim	cache object reclaim
++ * priv		cache private data for ctor/dtor/reclaim
++ * vmp		unused must be NULL
++ * flags
++ *	KMC_NOTOUCH	Disable cache object aging (unsupported)
++ *	KMC_NODEBUG	Disable debugging (unsupported)
++ *	KMC_NOMAGAZINE	Disable magazine (unsupported)
++ *	KMC_NOHASH      Disable hashing (unsupported)
++ *	KMC_QCACHE	Disable qcache (unsupported)
++ *	KMC_KMEM	Force kmem backed cache
++ *	KMC_VMEM        Force vmem backed cache
++ *	KMC_OFFSLAB	Locate objects off the slab
++ */
++spl_kmem_cache_t *
++spl_kmem_cache_create(char *name, size_t size, size_t align,
++                      spl_kmem_ctor_t ctor,
++                      spl_kmem_dtor_t dtor,
++                      spl_kmem_reclaim_t reclaim,
++                      void *priv, void *vmp, int flags)
++{
++        spl_kmem_cache_t *skc;
++	int rc, kmem_flags = KM_SLEEP;
++	SENTRY;
++
++	ASSERTF(!(flags & KMC_NOMAGAZINE), "Bad KMC_NOMAGAZINE (%x)\n", flags);
++	ASSERTF(!(flags & KMC_NOHASH), "Bad KMC_NOHASH (%x)\n", flags);
++	ASSERTF(!(flags & KMC_QCACHE), "Bad KMC_QCACHE (%x)\n", flags);
++	ASSERT(vmp == NULL);
++
++        /* We may be called when there is a non-zero preempt_count or
++         * interrupts are disabled is which case we must not sleep.
++	 */
++	if (current_thread_info()->preempt_count || irqs_disabled())
++		kmem_flags = KM_NOSLEEP;
++
++	/* Allocate memory for a new cache an initialize it.  Unfortunately,
++	 * this usually ends up being a large allocation of ~32k because
++	 * we need to allocate enough memory for the worst case number of
++	 * cpus in the magazine, skc_mag[NR_CPUS].  Because of this we
++	 * explicitly pass KM_NODEBUG to suppress the kmem warning */
++	skc = (spl_kmem_cache_t *)kmem_zalloc(sizeof(*skc),
++	                                      kmem_flags | KM_NODEBUG);
++	if (skc == NULL)
++		SRETURN(NULL);
++
++	skc->skc_magic = SKC_MAGIC;
++	skc->skc_name_size = strlen(name) + 1;
++	skc->skc_name = (char *)kmem_alloc(skc->skc_name_size, kmem_flags);
++	if (skc->skc_name == NULL) {
++		kmem_free(skc, sizeof(*skc));
++		SRETURN(NULL);
++	}
++	strncpy(skc->skc_name, name, skc->skc_name_size);
++
++	skc->skc_ctor = ctor;
++	skc->skc_dtor = dtor;
++	skc->skc_reclaim = reclaim;
++	skc->skc_private = priv;
++	skc->skc_vmp = vmp;
++	skc->skc_flags = flags;
++	skc->skc_obj_size = size;
++	skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN;
++	skc->skc_delay = SPL_KMEM_CACHE_DELAY;
++	skc->skc_reap = SPL_KMEM_CACHE_REAP;
++	atomic_set(&skc->skc_ref, 0);
++
++	INIT_LIST_HEAD(&skc->skc_list);
++	INIT_LIST_HEAD(&skc->skc_complete_list);
++	INIT_LIST_HEAD(&skc->skc_partial_list);
++	skc->skc_emergency_tree = RB_ROOT;
++	spin_lock_init(&skc->skc_lock);
++	init_waitqueue_head(&skc->skc_waitq);
++	skc->skc_slab_fail = 0;
++	skc->skc_slab_create = 0;
++	skc->skc_slab_destroy = 0;
++	skc->skc_slab_total = 0;
++	skc->skc_slab_alloc = 0;
++	skc->skc_slab_max = 0;
++	skc->skc_obj_total = 0;
++	skc->skc_obj_alloc = 0;
++	skc->skc_obj_max = 0;
++	skc->skc_obj_deadlock = 0;
++	skc->skc_obj_emergency = 0;
++	skc->skc_obj_emergency_max = 0;
++
++	if (align) {
++		VERIFY(ISP2(align));
++		VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN); /* Min alignment */
++		VERIFY3U(align, <=, PAGE_SIZE);            /* Max alignment */
++		skc->skc_obj_align = align;
++	}
++
++	/* If none passed select a cache type based on object size */
++	if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM))) {
++		if (spl_obj_size(skc) < (PAGE_SIZE / 8))
++			skc->skc_flags |= KMC_KMEM;
++		else
++			skc->skc_flags |= KMC_VMEM;
++	}
++
++	rc = spl_slab_size(skc, &skc->skc_slab_objs, &skc->skc_slab_size);
++	if (rc)
++		SGOTO(out, rc);
++
++	rc = spl_magazine_create(skc);
++	if (rc)
++		SGOTO(out, rc);
++
++	spl_init_delayed_work(&skc->skc_work, spl_cache_age, skc);
++	schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ);
++
++	down_write(&spl_kmem_cache_sem);
++	list_add_tail(&skc->skc_list, &spl_kmem_cache_list);
++	up_write(&spl_kmem_cache_sem);
++
++	SRETURN(skc);
++out:
++	kmem_free(skc->skc_name, skc->skc_name_size);
++	kmem_free(skc, sizeof(*skc));
++	SRETURN(NULL);
++}
++EXPORT_SYMBOL(spl_kmem_cache_create);
++
++/*
++ * Register a move callback to for cache defragmentation.
++ * XXX: Unimplemented but harmless to stub out for now.
++ */
++void
++spl_kmem_cache_set_move(spl_kmem_cache_t *skc,
++    kmem_cbrc_t (move)(void *, void *, size_t, void *))
++{
++        ASSERT(move != NULL);
++}
++EXPORT_SYMBOL(spl_kmem_cache_set_move);
++
++/*
++ * Destroy a cache and all objects associated with the cache.
++ */
++void
++spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
++{
++	DECLARE_WAIT_QUEUE_HEAD(wq);
++	int i;
++	SENTRY;
++
++	ASSERT(skc->skc_magic == SKC_MAGIC);
++
++	down_write(&spl_kmem_cache_sem);
++	list_del_init(&skc->skc_list);
++	up_write(&spl_kmem_cache_sem);
++
++	/* Cancel any and wait for any pending delayed work */
++	VERIFY(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags));
++	cancel_delayed_work_sync(&skc->skc_work);
++	for_each_online_cpu(i)
++		cancel_delayed_work_sync(&skc->skc_mag[i]->skm_work);
++
++	flush_scheduled_work();
++
++	/* Wait until all current callers complete, this is mainly
++	 * to catch the case where a low memory situation triggers a
++	 * cache reaping action which races with this destroy. */
++	wait_event(wq, atomic_read(&skc->skc_ref) == 0);
++
++	spl_magazine_destroy(skc);
++	spl_slab_reclaim(skc, 0, 1);
++	spin_lock(&skc->skc_lock);
++
++	/* Validate there are no objects in use and free all the
++	 * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */
++	ASSERT3U(skc->skc_slab_alloc, ==, 0);
++	ASSERT3U(skc->skc_obj_alloc, ==, 0);
++	ASSERT3U(skc->skc_slab_total, ==, 0);
++	ASSERT3U(skc->skc_obj_total, ==, 0);
++	ASSERT3U(skc->skc_obj_emergency, ==, 0);
++	ASSERT(list_empty(&skc->skc_complete_list));
++
++	kmem_free(skc->skc_name, skc->skc_name_size);
++	spin_unlock(&skc->skc_lock);
++
++	kmem_free(skc, sizeof(*skc));
++
++	SEXIT;
++}
++EXPORT_SYMBOL(spl_kmem_cache_destroy);
++
++/*
++ * Allocate an object from a slab attached to the cache.  This is used to
++ * repopulate the per-cpu magazine caches in batches when they run low.
++ */
++static void *
++spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
++{
++	spl_kmem_obj_t *sko;
++
++	ASSERT(skc->skc_magic == SKC_MAGIC);
++	ASSERT(sks->sks_magic == SKS_MAGIC);
++	ASSERT(spin_is_locked(&skc->skc_lock));
++
++	sko = list_entry(sks->sks_free_list.next, spl_kmem_obj_t, sko_list);
++	ASSERT(sko->sko_magic == SKO_MAGIC);
++	ASSERT(sko->sko_addr != NULL);
++
++	/* Remove from sks_free_list */
++	list_del_init(&sko->sko_list);
++
++	sks->sks_age = jiffies;
++	sks->sks_ref++;
++	skc->skc_obj_alloc++;
++
++	/* Track max obj usage statistics */
++	if (skc->skc_obj_alloc > skc->skc_obj_max)
++		skc->skc_obj_max = skc->skc_obj_alloc;
++
++	/* Track max slab usage statistics */
++	if (sks->sks_ref == 1) {
++		skc->skc_slab_alloc++;
++
++		if (skc->skc_slab_alloc > skc->skc_slab_max)
++			skc->skc_slab_max = skc->skc_slab_alloc;
++	}
++
++	return sko->sko_addr;
++}
++
++/*
++ * Generic slab allocation function to run by the global work queues.
++ * It is responsible for allocating a new slab, linking it in to the list
++ * of partial slabs, and then waking any waiters.
++ */
++static void
++spl_cache_grow_work(void *data)
++{
++	spl_kmem_alloc_t *ska =
++		spl_get_work_data(data, spl_kmem_alloc_t, ska_work.work);
++	spl_kmem_cache_t *skc = ska->ska_cache;
++	spl_kmem_slab_t *sks;
++
++	sks = spl_slab_alloc(skc, ska->ska_flags | __GFP_NORETRY | KM_NODEBUG);
++	spin_lock(&skc->skc_lock);
++	if (sks) {
++		skc->skc_slab_total++;
++		skc->skc_obj_total += sks->sks_objs;
++		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
++	}
++
++	atomic_dec(&skc->skc_ref);
++	clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
++	clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
++	wake_up_all(&skc->skc_waitq);
++	spin_unlock(&skc->skc_lock);
++
++	kfree(ska);
++}
++
++/*
++ * Returns non-zero when a new slab should be available.
++ */
++static int
++spl_cache_grow_wait(spl_kmem_cache_t *skc)
++{
++	return !test_bit(KMC_BIT_GROWING, &skc->skc_flags);
++}
++
++static int
++spl_cache_reclaim_wait(void *word)
++{
++	schedule();
++	return 0;
++}
++
++/*
++ * No available objects on any slabs, create a new slab.
++ */
++static int
++spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
++{
++	int remaining, rc;
++	SENTRY;
++
++	ASSERT(skc->skc_magic == SKC_MAGIC);
++	might_sleep();
++	*obj = NULL;
++
++	/*
++	 * Before allocating a new slab wait for any reaping to complete and
++	 * then return so the local magazine can be rechecked for new objects.
++	 */
++	if (test_bit(KMC_BIT_REAPING, &skc->skc_flags)) {
++		rc = wait_on_bit(&skc->skc_flags, KMC_BIT_REAPING,
++		    spl_cache_reclaim_wait, TASK_UNINTERRUPTIBLE);
++		SRETURN(rc ? rc : -EAGAIN);
++	}
++
++	/*
++	 * This is handled by dispatching a work request to the global work
++	 * queue.  This allows us to asynchronously allocate a new slab while
++	 * retaining the ability to safely fall back to a smaller synchronous
++	 * allocations to ensure forward progress is always maintained.
++	 */
++	if (test_and_set_bit(KMC_BIT_GROWING, &skc->skc_flags) == 0) {
++		spl_kmem_alloc_t *ska;
++
++		ska = kmalloc(sizeof(*ska), flags);
++		if (ska == NULL) {
++			clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
++			wake_up_all(&skc->skc_waitq);
++			SRETURN(-ENOMEM);
++		}
++
++		atomic_inc(&skc->skc_ref);
++		ska->ska_cache = skc;
++		ska->ska_flags = flags;
++		spl_init_delayed_work(&ska->ska_work, spl_cache_grow_work, ska);
++		schedule_delayed_work(&ska->ska_work, 0);
++	}
++
++	/*
++	 * The goal here is to only detect the rare case where a virtual slab
++	 * allocation has deadlocked.  We must be careful to minimize the use
++	 * of emergency objects which are more expensive to track.  Therefore,
++	 * we set a very long timeout for the asynchronous allocation and if
++	 * the timeout is reached the cache is flagged as deadlocked.  From
++	 * this point only new emergency objects will be allocated until the
++	 * asynchronous allocation completes and clears the deadlocked flag.
++	 */
++	if (test_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags)) {
++		rc = spl_emergency_alloc(skc, flags, obj);
++	} else {
++		remaining = wait_event_timeout(skc->skc_waitq,
++					       spl_cache_grow_wait(skc), HZ);
++
++		if (!remaining && test_bit(KMC_BIT_VMEM, &skc->skc_flags)) {
++			spin_lock(&skc->skc_lock);
++			if (test_bit(KMC_BIT_GROWING, &skc->skc_flags)) {
++				set_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
++				skc->skc_obj_deadlock++;
++			}
++			spin_unlock(&skc->skc_lock);
++		}
++
++		rc = -ENOMEM;
++	}
++
++	SRETURN(rc);
++}
++
++/*
++ * Refill a per-cpu magazine with objects from the slabs for this cache.
++ * Ideally the magazine can be repopulated using existing objects which have
++ * been released, however if we are unable to locate enough free objects new
++ * slabs of objects will be created.  On success NULL is returned, otherwise
++ * the address of a single emergency object is returned for use by the caller.
++ */
++static void *
++spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
++{
++	spl_kmem_slab_t *sks;
++	int count = 0, rc, refill;
++	void *obj = NULL;
++	SENTRY;
++
++	ASSERT(skc->skc_magic == SKC_MAGIC);
++	ASSERT(skm->skm_magic == SKM_MAGIC);
++
++	refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail);
++	spin_lock(&skc->skc_lock);
++
++	while (refill > 0) {
++		/* No slabs available we may need to grow the cache */
++		if (list_empty(&skc->skc_partial_list)) {
++			spin_unlock(&skc->skc_lock);
++
++			local_irq_enable();
++			rc = spl_cache_grow(skc, flags, &obj);
++			local_irq_disable();
++
++			/* Emergency object for immediate use by caller */
++			if (rc == 0 && obj != NULL)
++				SRETURN(obj);
++
++			if (rc)
++				SGOTO(out, rc);
++
++			/* Rescheduled to different CPU skm is not local */
++			if (skm != skc->skc_mag[smp_processor_id()])
++				SGOTO(out, rc);
++
++			/* Potentially rescheduled to the same CPU but
++			 * allocations may have occurred from this CPU while
++			 * we were sleeping so recalculate max refill. */
++			refill = MIN(refill, skm->skm_size - skm->skm_avail);
++
++			spin_lock(&skc->skc_lock);
++			continue;
++		}
++
++		/* Grab the next available slab */
++		sks = list_entry((&skc->skc_partial_list)->next,
++		                 spl_kmem_slab_t, sks_list);
++		ASSERT(sks->sks_magic == SKS_MAGIC);
++		ASSERT(sks->sks_ref < sks->sks_objs);
++		ASSERT(!list_empty(&sks->sks_free_list));
++
++		/* Consume as many objects as needed to refill the requested
++		 * cache.  We must also be careful not to overfill it. */
++		while (sks->sks_ref < sks->sks_objs && refill-- > 0 && ++count) {
++			ASSERT(skm->skm_avail < skm->skm_size);
++			ASSERT(count < skm->skm_size);
++			skm->skm_objs[skm->skm_avail++]=spl_cache_obj(skc,sks);
++		}
++
++		/* Move slab to skc_complete_list when full */
++		if (sks->sks_ref == sks->sks_objs) {
++			list_del(&sks->sks_list);
++			list_add(&sks->sks_list, &skc->skc_complete_list);
++		}
++	}
++
++	spin_unlock(&skc->skc_lock);
++out:
++	SRETURN(NULL);
++}
++
++/*
++ * Release an object back to the slab from which it came.
++ */
++static void
++spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
++{
++	spl_kmem_slab_t *sks = NULL;
++	spl_kmem_obj_t *sko = NULL;
++	SENTRY;
++
++	ASSERT(skc->skc_magic == SKC_MAGIC);
++	ASSERT(spin_is_locked(&skc->skc_lock));
++
++	sko = spl_sko_from_obj(skc, obj);
++	ASSERT(sko->sko_magic == SKO_MAGIC);
++	sks = sko->sko_slab;
++	ASSERT(sks->sks_magic == SKS_MAGIC);
++	ASSERT(sks->sks_cache == skc);
++	list_add(&sko->sko_list, &sks->sks_free_list);
++
++	sks->sks_age = jiffies;
++	sks->sks_ref--;
++	skc->skc_obj_alloc--;
++
++	/* Move slab to skc_partial_list when no longer full.  Slabs
++	 * are added to the head to keep the partial list is quasi-full
++	 * sorted order.  Fuller at the head, emptier at the tail. */
++	if (sks->sks_ref == (sks->sks_objs - 1)) {
++		list_del(&sks->sks_list);
++		list_add(&sks->sks_list, &skc->skc_partial_list);
++	}
++
++	/* Move empty slabs to the end of the partial list so
++	 * they can be easily found and freed during reclamation. */
++	if (sks->sks_ref == 0) {
++		list_del(&sks->sks_list);
++		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
++		skc->skc_slab_alloc--;
++	}
++
++	SEXIT;
++}
++
++/*
++ * Release a batch of objects from a per-cpu magazine back to their
++ * respective slabs.  This occurs when we exceed the magazine size,
++ * are under memory pressure, when the cache is idle, or during
++ * cache cleanup.  The flush argument contains the number of entries
++ * to remove from the magazine.
++ */
++static int
++spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
++{
++	int i, count = MIN(flush, skm->skm_avail);
++	SENTRY;
++
++	ASSERT(skc->skc_magic == SKC_MAGIC);
++	ASSERT(skm->skm_magic == SKM_MAGIC);
++
++	/*
++	 * XXX: Currently we simply return objects from the magazine to
++	 * the slabs in fifo order.  The ideal thing to do from a memory
++	 * fragmentation standpoint is to cheaply determine the set of
++	 * objects in the magazine which will result in the largest
++	 * number of free slabs if released from the magazine.
++	 */
++	spin_lock(&skc->skc_lock);
++	for (i = 0; i < count; i++)
++		spl_cache_shrink(skc, skm->skm_objs[i]);
++
++	skm->skm_avail -= count;
++	memmove(skm->skm_objs, &(skm->skm_objs[count]),
++	        sizeof(void *) * skm->skm_avail);
++
++	spin_unlock(&skc->skc_lock);
++
++	SRETURN(count);
++}
++
++/*
++ * Allocate an object from the per-cpu magazine, or if the magazine
++ * is empty directly allocate from a slab and repopulate the magazine.
++ */
++void *
++spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
++{
++	spl_kmem_magazine_t *skm;
++	unsigned long irq_flags;
++	void *obj = NULL;
++	SENTRY;
++
++	ASSERT(skc->skc_magic == SKC_MAGIC);
++	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
++	ASSERT(flags & KM_SLEEP);
++	atomic_inc(&skc->skc_ref);
++	local_irq_save(irq_flags);
++
++restart:
++	/* Safe to update per-cpu structure without lock, but
++	 * in the restart case we must be careful to reacquire
++	 * the local magazine since this may have changed
++	 * when we need to grow the cache. */
++	skm = skc->skc_mag[smp_processor_id()];
++	ASSERTF(skm->skm_magic == SKM_MAGIC, "%x != %x: %s/%p/%p %x/%x/%x\n",
++		skm->skm_magic, SKM_MAGIC, skc->skc_name, skc, skm,
++		skm->skm_size, skm->skm_refill, skm->skm_avail);
++
++	if (likely(skm->skm_avail)) {
++		/* Object available in CPU cache, use it */
++		obj = skm->skm_objs[--skm->skm_avail];
++		skm->skm_age = jiffies;
++	} else {
++		obj = spl_cache_refill(skc, skm, flags);
++		if (obj == NULL)
++			SGOTO(restart, obj = NULL);
++	}
++
++	local_irq_restore(irq_flags);
++	ASSERT(obj);
++	ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
++
++	/* Pre-emptively migrate object to CPU L1 cache */
++	prefetchw(obj);
++	atomic_dec(&skc->skc_ref);
++
++	SRETURN(obj);
++}
++EXPORT_SYMBOL(spl_kmem_cache_alloc);
++
++/*
++ * Free an object back to the local per-cpu magazine, there is no
++ * guarantee that this is the same magazine the object was originally
++ * allocated from.  We may need to flush entire from the magazine
++ * back to the slabs to make space.
++ */
++void
++spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
++{
++	spl_kmem_magazine_t *skm;
++	unsigned long flags;
++	SENTRY;
++
++	ASSERT(skc->skc_magic == SKC_MAGIC);
++	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
++	atomic_inc(&skc->skc_ref);
++
++	/*
++	 * Only virtual slabs may have emergency objects and these objects
++	 * are guaranteed to have physical addresses.  They must be removed
++	 * from the tree of emergency objects and the freed.
++	 */
++	if ((skc->skc_flags & KMC_VMEM) && !kmem_virt(obj))
++		SGOTO(out, spl_emergency_free(skc, obj));
++
++	local_irq_save(flags);
++
++	/* Safe to update per-cpu structure without lock, but
++	 * no remote memory allocation tracking is being performed
++	 * it is entirely possible to allocate an object from one
++	 * CPU cache and return it to another. */
++	skm = skc->skc_mag[smp_processor_id()];
++	ASSERT(skm->skm_magic == SKM_MAGIC);
++
++	/* Per-CPU cache full, flush it to make space */
++	if (unlikely(skm->skm_avail >= skm->skm_size))
++		(void)spl_cache_flush(skc, skm, skm->skm_refill);
++
++	/* Available space in cache, use it */
++	skm->skm_objs[skm->skm_avail++] = obj;
++
++	local_irq_restore(flags);
++out:
++	atomic_dec(&skc->skc_ref);
++
++	SEXIT;
++}
++EXPORT_SYMBOL(spl_kmem_cache_free);
++
++/*
++ * The generic shrinker function for all caches.  Under Linux a shrinker
++ * may not be tightly coupled with a slab cache.  In fact Linux always
++ * systematically tries calling all registered shrinker callbacks which
++ * report that they contain unused objects.  Because of this we only
++ * register one shrinker function in the shim layer for all slab caches.
++ * We always attempt to shrink all caches when this generic shrinker
++ * is called.  The shrinker should return the number of free objects
++ * in the cache when called with nr_to_scan == 0 but not attempt to
++ * free any objects.  When nr_to_scan > 0 it is a request that nr_to_scan
++ * objects should be freed, which differs from Solaris semantics.
++ * Solaris semantics are to free all available objects which may (and
++ * probably will) be more objects than the requested nr_to_scan.
++ */
++static int
++__spl_kmem_cache_generic_shrinker(struct shrinker *shrink,
++    struct shrink_control *sc)
++{
++	spl_kmem_cache_t *skc;
++	int unused = 0;
++
++	down_read(&spl_kmem_cache_sem);
++	list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
++		if (sc->nr_to_scan)
++			spl_kmem_cache_reap_now(skc,
++			   MAX(sc->nr_to_scan >> fls64(skc->skc_slab_objs), 1));
++
++		/*
++		 * Presume everything alloc'ed in reclaimable, this ensures
++		 * we are called again with nr_to_scan > 0 so can try and
++		 * reclaim.  The exact number is not important either so
++		 * we forgo taking this already highly contented lock.
++		 */
++		unused += skc->skc_obj_alloc;
++	}
++	up_read(&spl_kmem_cache_sem);
++
++	return (unused * sysctl_vfs_cache_pressure) / 100;
++}
++
++SPL_SHRINKER_CALLBACK_WRAPPER(spl_kmem_cache_generic_shrinker);
++
++/*
++ * Call the registered reclaim function for a cache.  Depending on how
++ * many and which objects are released it may simply repopulate the
++ * local magazine which will then need to age-out.  Objects which cannot
++ * fit in the magazine we will be released back to their slabs which will
++ * also need to age out before being release.  This is all just best
++ * effort and we do not want to thrash creating and destroying slabs.
++ */
++void
++spl_kmem_cache_reap_now(spl_kmem_cache_t *skc, int count)
++{
++	SENTRY;
++
++	ASSERT(skc->skc_magic == SKC_MAGIC);
++	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
++
++	/* Prevent concurrent cache reaping when contended */
++	if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags)) {
++		SEXIT;
++		return;
++	}
++
++	atomic_inc(&skc->skc_ref);
++
++	/*
++	 * When a reclaim function is available it may be invoked repeatedly
++	 * until at least a single slab can be freed.  This ensures that we
++	 * do free memory back to the system.  This helps minimize the chance
++	 * of an OOM event when the bulk of memory is used by the slab.
++	 *
++	 * When free slabs are already available the reclaim callback will be
++	 * skipped.  Additionally, if no forward progress is detected despite
++	 * a reclaim function the cache will be skipped to avoid deadlock.
++	 *
++	 * Longer term this would be the correct place to add the code which
++	 * repacks the slabs in order minimize fragmentation.
++	 */
++	if (skc->skc_reclaim) {
++		uint64_t objects = UINT64_MAX;
++		int do_reclaim;
++
++		do {
++			spin_lock(&skc->skc_lock);
++			do_reclaim =
++			    (skc->skc_slab_total > 0) &&
++			    ((skc->skc_slab_total - skc->skc_slab_alloc) == 0) &&
++			    (skc->skc_obj_alloc < objects);
++
++			objects = skc->skc_obj_alloc;
++			spin_unlock(&skc->skc_lock);
++
++			if (do_reclaim)
++				skc->skc_reclaim(skc->skc_private);
++
++		} while (do_reclaim);
++	}
++
++	/* Reclaim from the cache, ignoring it's age and delay. */
++	spl_slab_reclaim(skc, count, 1);
++	clear_bit(KMC_BIT_REAPING, &skc->skc_flags);
++	smp_mb__after_clear_bit();
++	wake_up_bit(&skc->skc_flags, KMC_BIT_REAPING);
++
++	atomic_dec(&skc->skc_ref);
++
++	SEXIT;
++}
++EXPORT_SYMBOL(spl_kmem_cache_reap_now);
++
++/*
++ * Reap all free slabs from all registered caches.
++ */
++void
++spl_kmem_reap(void)
++{
++	struct shrink_control sc;
++
++	sc.nr_to_scan = KMC_REAP_CHUNK;
++	sc.gfp_mask = GFP_KERNEL;
++
++	__spl_kmem_cache_generic_shrinker(NULL, &sc);
++}
++EXPORT_SYMBOL(spl_kmem_reap);
++
++#if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING)
++static char *
++spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
++{
++	int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
++	int i, flag = 1;
++
++	ASSERT(str != NULL && len >= 17);
++	memset(str, 0, len);
++
++	/* Check for a fully printable string, and while we are at
++         * it place the printable characters in the passed buffer. */
++	for (i = 0; i < size; i++) {
++		str[i] = ((char *)(kd->kd_addr))[i];
++		if (isprint(str[i])) {
++			continue;
++		} else {
++			/* Minimum number of printable characters found
++			 * to make it worthwhile to print this as ascii. */
++			if (i > min)
++				break;
++
++			flag = 0;
++			break;
++		}
++	}
++
++	if (!flag) {
++		sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
++		        *((uint8_t *)kd->kd_addr),
++		        *((uint8_t *)kd->kd_addr + 2),
++		        *((uint8_t *)kd->kd_addr + 4),
++		        *((uint8_t *)kd->kd_addr + 6),
++		        *((uint8_t *)kd->kd_addr + 8),
++		        *((uint8_t *)kd->kd_addr + 10),
++		        *((uint8_t *)kd->kd_addr + 12),
++		        *((uint8_t *)kd->kd_addr + 14));
++	}
++
++	return str;
++}
++
++static int
++spl_kmem_init_tracking(struct list_head *list, spinlock_t *lock, int size)
++{
++	int i;
++	SENTRY;
++
++	spin_lock_init(lock);
++	INIT_LIST_HEAD(list);
++
++	for (i = 0; i < size; i++)
++		INIT_HLIST_HEAD(&kmem_table[i]);
++
++	SRETURN(0);
++}
++
++static void
++spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
++{
++	unsigned long flags;
++	kmem_debug_t *kd;
++	char str[17];
++	SENTRY;
++
++	spin_lock_irqsave(lock, flags);
++	if (!list_empty(list))
++		printk(KERN_WARNING "%-16s %-5s %-16s %s:%s\n", "address",
++		       "size", "data", "func", "line");
++
++	list_for_each_entry(kd, list, kd_list)
++		printk(KERN_WARNING "%p %-5d %-16s %s:%d\n", kd->kd_addr,
++		       (int)kd->kd_size, spl_sprintf_addr(kd, str, 17, 8),
++		       kd->kd_func, kd->kd_line);
++
++	spin_unlock_irqrestore(lock, flags);
++	SEXIT;
++}
++#else /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
++#define spl_kmem_init_tracking(list, lock, size)
++#define spl_kmem_fini_tracking(list, lock)
++#endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
++
++static void
++spl_kmem_init_globals(void)
++{
++	struct zone *zone;
++
++	/* For now all zones are includes, it may be wise to restrict
++	 * this to normal and highmem zones if we see problems. */
++        for_each_zone(zone) {
++
++                if (!populated_zone(zone))
++                        continue;
++
++		minfree += min_wmark_pages(zone);
++		desfree += low_wmark_pages(zone);
++		lotsfree += high_wmark_pages(zone);
++	}
++
++	/* Solaris default values */
++	swapfs_minfree = MAX(2*1024*1024 >> PAGE_SHIFT, physmem >> 3);
++	swapfs_reserve = MIN(4*1024*1024 >> PAGE_SHIFT, physmem >> 4);
++}
++
++/*
++ * Called at module init when it is safe to use spl_kallsyms_lookup_name()
++ */
++int
++spl_kmem_init_kallsyms_lookup(void)
++{
++#ifndef HAVE_GET_VMALLOC_INFO
++	get_vmalloc_info_fn = (get_vmalloc_info_t)
++		spl_kallsyms_lookup_name("get_vmalloc_info");
++	if (!get_vmalloc_info_fn) {
++		printk(KERN_ERR "Error: Unknown symbol get_vmalloc_info\n");
++		return -EFAULT;
++	}
++#endif /* HAVE_GET_VMALLOC_INFO */
++
++#ifdef HAVE_PGDAT_HELPERS
++# ifndef HAVE_FIRST_ONLINE_PGDAT
++	first_online_pgdat_fn = (first_online_pgdat_t)
++		spl_kallsyms_lookup_name("first_online_pgdat");
++	if (!first_online_pgdat_fn) {
++		printk(KERN_ERR "Error: Unknown symbol first_online_pgdat\n");
++		return -EFAULT;
++	}
++# endif /* HAVE_FIRST_ONLINE_PGDAT */
++
++# ifndef HAVE_NEXT_ONLINE_PGDAT
++	next_online_pgdat_fn = (next_online_pgdat_t)
++		spl_kallsyms_lookup_name("next_online_pgdat");
++	if (!next_online_pgdat_fn) {
++		printk(KERN_ERR "Error: Unknown symbol next_online_pgdat\n");
++		return -EFAULT;
++	}
++# endif /* HAVE_NEXT_ONLINE_PGDAT */
++
++# ifndef HAVE_NEXT_ZONE
++	next_zone_fn = (next_zone_t)
++		spl_kallsyms_lookup_name("next_zone");
++	if (!next_zone_fn) {
++		printk(KERN_ERR "Error: Unknown symbol next_zone\n");
++		return -EFAULT;
++	}
++# endif /* HAVE_NEXT_ZONE */
++
++#else /* HAVE_PGDAT_HELPERS */
++
++# ifndef HAVE_PGDAT_LIST
++	pgdat_list_addr = *(struct pglist_data **)
++		spl_kallsyms_lookup_name("pgdat_list");
++	if (!pgdat_list_addr) {
++		printk(KERN_ERR "Error: Unknown symbol pgdat_list\n");
++		return -EFAULT;
++	}
++# endif /* HAVE_PGDAT_LIST */
++#endif /* HAVE_PGDAT_HELPERS */
++
++#if defined(NEED_GET_ZONE_COUNTS) && !defined(HAVE_GET_ZONE_COUNTS)
++	get_zone_counts_fn = (get_zone_counts_t)
++		spl_kallsyms_lookup_name("get_zone_counts");
++	if (!get_zone_counts_fn) {
++		printk(KERN_ERR "Error: Unknown symbol get_zone_counts\n");
++		return -EFAULT;
++	}
++#endif  /* NEED_GET_ZONE_COUNTS && !HAVE_GET_ZONE_COUNTS */
++
++	/*
++	 * It is now safe to initialize the global tunings which rely on
++	 * the use of the for_each_zone() macro.  This macro in turns
++	 * depends on the *_pgdat symbols which are now available.
++	 */
++	spl_kmem_init_globals();
++
++#if !defined(HAVE_INVALIDATE_INODES) && !defined(HAVE_INVALIDATE_INODES_CHECK)
++	invalidate_inodes_fn = (invalidate_inodes_t)
++		spl_kallsyms_lookup_name("invalidate_inodes");
++	if (!invalidate_inodes_fn) {
++		printk(KERN_ERR "Error: Unknown symbol invalidate_inodes\n");
++		return -EFAULT;
++	}
++#endif /* !HAVE_INVALIDATE_INODES && !HAVE_INVALIDATE_INODES_CHECK */
++
++#ifndef HAVE_SHRINK_DCACHE_MEMORY
++	/* When shrink_dcache_memory_fn == NULL support is disabled */
++	shrink_dcache_memory_fn = (shrink_dcache_memory_t)
++		spl_kallsyms_lookup_name("shrink_dcache_memory");
++#endif /* HAVE_SHRINK_DCACHE_MEMORY */
++
++#ifndef HAVE_SHRINK_ICACHE_MEMORY
++	/* When shrink_icache_memory_fn == NULL support is disabled */
++	shrink_icache_memory_fn = (shrink_icache_memory_t)
++		spl_kallsyms_lookup_name("shrink_icache_memory");
++#endif /* HAVE_SHRINK_ICACHE_MEMORY */
++
++	return 0;
++}
++
++int
++spl_kmem_init(void)
++{
++	int rc = 0;
++	SENTRY;
++
++	init_rwsem(&spl_kmem_cache_sem);
++	INIT_LIST_HEAD(&spl_kmem_cache_list);
++
++	spl_register_shrinker(&spl_kmem_cache_shrinker);
++
++#ifdef DEBUG_KMEM
++	kmem_alloc_used_set(0);
++	vmem_alloc_used_set(0);
++
++	spl_kmem_init_tracking(&kmem_list, &kmem_lock, KMEM_TABLE_SIZE);
++	spl_kmem_init_tracking(&vmem_list, &vmem_lock, VMEM_TABLE_SIZE);
++#endif
++	SRETURN(rc);
++}
++
++void
++spl_kmem_fini(void)
++{
++#ifdef DEBUG_KMEM
++	/* Display all unreclaimed memory addresses, including the
++	 * allocation size and the first few bytes of what's located
++	 * at that address to aid in debugging.  Performance is not
++	 * a serious concern here since it is module unload time. */
++	if (kmem_alloc_used_read() != 0)
++		SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING,
++		    "kmem leaked %ld/%ld bytes\n",
++		    kmem_alloc_used_read(), kmem_alloc_max);
++
++
++	if (vmem_alloc_used_read() != 0)
++		SDEBUG_LIMIT(SD_CONSOLE | SD_WARNING,
++		    "vmem leaked %ld/%ld bytes\n",
++		    vmem_alloc_used_read(), vmem_alloc_max);
++
++	spl_kmem_fini_tracking(&kmem_list, &kmem_lock);
++	spl_kmem_fini_tracking(&vmem_list, &vmem_lock);
++#endif /* DEBUG_KMEM */
++	SENTRY;
++
++	spl_unregister_shrinker(&spl_kmem_cache_shrinker);
++
++	SEXIT;
++}
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-kobj.c linux-3.2.33-go/spl/spl/spl-kobj.c
+--- linux-3.2.33-go.orig/spl/spl/spl-kobj.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-kobj.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,93 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Kobj Implementation.
++\*****************************************************************************/
++
++#include <sys/kobj.h>
++#include <spl-debug.h>
++
++#ifdef SS_DEBUG_SUBSYS
++#undef SS_DEBUG_SUBSYS
++#endif
++
++#define SS_DEBUG_SUBSYS SS_KOBJ
++
++struct _buf *
++kobj_open_file(const char *name)
++{
++	struct _buf *file;
++	vnode_t *vp;
++	int rc;
++	SENTRY;
++
++	file = kmalloc(sizeof(_buf_t), GFP_KERNEL);
++	if (file == NULL)
++		SRETURN((_buf_t *)-1UL);
++
++	if ((rc = vn_open(name, UIO_SYSSPACE, FREAD, 0644, &vp, 0, 0))) {
++		kfree(file);
++		SRETURN((_buf_t *)-1UL);
++	}
++
++	file->vp = vp;
++
++	SRETURN(file);
++} /* kobj_open_file() */
++EXPORT_SYMBOL(kobj_open_file);
++
++void
++kobj_close_file(struct _buf *file)
++{
++	SENTRY;
++	VOP_CLOSE(file->vp, 0, 0, 0, 0, 0);
++        kfree(file);
++        SEXIT;
++} /* kobj_close_file() */
++EXPORT_SYMBOL(kobj_close_file);
++
++int
++kobj_read_file(struct _buf *file, char *buf, ssize_t size, offset_t off)
++{
++	SENTRY;
++	SRETURN(vn_rdwr(UIO_READ, file->vp, buf, size, off,
++	       UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL));
++} /* kobj_read_file() */
++EXPORT_SYMBOL(kobj_read_file);
++
++int
++kobj_get_filesize(struct _buf *file, uint64_t *size)
++{
++        vattr_t vap;
++	int rc;
++	SENTRY;
++
++	rc = VOP_GETATTR(file->vp, &vap, 0, 0, NULL);
++	if (rc)
++		SRETURN(rc);
++
++        *size = vap.va_size;
++
++        SRETURN(rc);
++} /* kobj_get_filesize() */
++EXPORT_SYMBOL(kobj_get_filesize);
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-kstat.c linux-3.2.33-go/spl/spl/spl-kstat.c
+--- linux-3.2.33-go.orig/spl/spl/spl-kstat.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-kstat.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,549 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Kstat Implementation.
++\*****************************************************************************/
++
++#include <linux/seq_file.h>
++#include <sys/kstat.h>
++#include <spl-debug.h>
++
++#ifdef SS_DEBUG_SUBSYS
++#undef SS_DEBUG_SUBSYS
++#endif
++
++#define SS_DEBUG_SUBSYS SS_KSTAT
++
++static spinlock_t kstat_lock;
++static struct list_head kstat_list;
++static kid_t kstat_id;
++
++static void
++kstat_seq_show_headers(struct seq_file *f)
++{
++        kstat_t *ksp = (kstat_t *)f->private;
++        ASSERT(ksp->ks_magic == KS_MAGIC);
++
++        seq_printf(f, "%d %d 0x%02x %d %d %lld %lld\n",
++		   ksp->ks_kid, ksp->ks_type, ksp->ks_flags,
++		   ksp->ks_ndata, (int)ksp->ks_data_size,
++		   ksp->ks_crtime, ksp->ks_snaptime);
++
++	switch (ksp->ks_type) {
++                case KSTAT_TYPE_RAW:
++                        seq_printf(f, "raw data");
++                        break;
++                case KSTAT_TYPE_NAMED:
++                        seq_printf(f, "%-31s %-4s %s\n",
++                                   "name", "type", "data");
++                        break;
++                case KSTAT_TYPE_INTR:
++                        seq_printf(f, "%-8s %-8s %-8s %-8s %-8s\n",
++                                   "hard", "soft", "watchdog",
++                                   "spurious", "multsvc");
++                        break;
++                case KSTAT_TYPE_IO:
++                        seq_printf(f,
++                                   "%-8s %-8s %-8s %-8s %-8s %-8s "
++                                   "%-8s %-8s %-8s %-8s %-8s %-8s\n",
++                                   "nread", "nwritten", "reads", "writes",
++                                   "wtime", "wlentime", "wupdate",
++                                   "rtime", "rlentime", "rupdate",
++                                   "wcnt", "rcnt");
++                        break;
++                case KSTAT_TYPE_TIMER:
++                        seq_printf(f,
++                                   "%-31s %-8s "
++                                   "%-8s %-8s %-8s %-8s %-8s\n",
++                                   "name", "events", "elapsed",
++                                   "min", "max", "start", "stop");
++                        break;
++                case KSTAT_TYPE_TXG:
++                        seq_printf(f,
++                                   "%-8s %-5s %-13s %-12s %-12s %-8s %-8s "
++                                   "%-12s %-12s %-12s\n",
++                                   "txg", "state", "birth",
++                                   "nread", "nwritten", "reads", "writes",
++                                   "otime", "qtime", "stime");
++                        break;
++                default:
++                        PANIC("Undefined kstat type %d\n", ksp->ks_type);
++        }
++}
++
++static int
++kstat_seq_show_raw(struct seq_file *f, unsigned char *p, int l)
++{
++        int i, j;
++
++        for (i = 0; ; i++) {
++                seq_printf(f, "%03x:", i);
++
++                for (j = 0; j < 16; j++) {
++                        if (i * 16 + j >= l) {
++                                seq_printf(f, "\n");
++                                goto out;
++                        }
++
++                        seq_printf(f, " %02x", (unsigned char)p[i * 16 + j]);
++                }
++                seq_printf(f, "\n");
++        }
++out:
++        return 0;
++}
++
++static int
++kstat_seq_show_named(struct seq_file *f, kstat_named_t *knp)
++{
++        seq_printf(f, "%-31s %-4d ", knp->name, knp->data_type);
++
++        switch (knp->data_type) {
++                case KSTAT_DATA_CHAR:
++                        knp->value.c[15] = '\0'; /* NULL terminate */
++                        seq_printf(f, "%-16s", knp->value.c);
++                        break;
++                /* XXX - We need to be more careful able what tokens are
++                 * used for each arch, for now this is correct for x86_64.
++                 */
++                case KSTAT_DATA_INT32:
++                        seq_printf(f, "%d", knp->value.i32);
++                        break;
++                case KSTAT_DATA_UINT32:
++                        seq_printf(f, "%u", knp->value.ui32);
++                        break;
++                case KSTAT_DATA_INT64:
++                        seq_printf(f, "%lld", (signed long long)knp->value.i64);
++                        break;
++                case KSTAT_DATA_UINT64:
++                        seq_printf(f, "%llu", (unsigned long long)knp->value.ui64);
++                        break;
++                case KSTAT_DATA_LONG:
++                        seq_printf(f, "%ld", knp->value.l);
++                        break;
++                case KSTAT_DATA_ULONG:
++                        seq_printf(f, "%lu", knp->value.ul);
++                        break;
++                case KSTAT_DATA_STRING:
++                        KSTAT_NAMED_STR_PTR(knp)
++                                [KSTAT_NAMED_STR_BUFLEN(knp)-1] = '\0';
++                        seq_printf(f, "%s", KSTAT_NAMED_STR_PTR(knp));
++                        break;
++                default:
++                        PANIC("Undefined kstat data type %d\n", knp->data_type);
++        }
++
++        seq_printf(f, "\n");
++
++        return 0;
++}
++
++static int
++kstat_seq_show_intr(struct seq_file *f, kstat_intr_t *kip)
++{
++        seq_printf(f, "%-8u %-8u %-8u %-8u %-8u\n",
++                   kip->intrs[KSTAT_INTR_HARD],
++                   kip->intrs[KSTAT_INTR_SOFT],
++                   kip->intrs[KSTAT_INTR_WATCHDOG],
++                   kip->intrs[KSTAT_INTR_SPURIOUS],
++                   kip->intrs[KSTAT_INTR_MULTSVC]);
++
++        return 0;
++}
++
++static int
++kstat_seq_show_io(struct seq_file *f, kstat_io_t *kip)
++{
++        seq_printf(f,
++                   "%-8llu %-8llu %-8u %-8u %-8lld %-8lld "
++                   "%-8lld %-8lld %-8lld %-8lld %-8u %-8u\n",
++                   kip->nread, kip->nwritten,
++                   kip->reads, kip->writes,
++                   kip->wtime, kip->wlentime, kip->wlastupdate,
++                   kip->rtime, kip->wlentime, kip->rlastupdate,
++                   kip->wcnt,  kip->rcnt);
++
++        return 0;
++}
++
++static int
++kstat_seq_show_timer(struct seq_file *f, kstat_timer_t *ktp)
++{
++        seq_printf(f,
++                   "%-31s %-8llu %-8lld %-8lld %-8lld %-8lld %-8lld\n",
++                   ktp->name, ktp->num_events, ktp->elapsed_time,
++                   ktp->min_time, ktp->max_time,
++                   ktp->start_time, ktp->stop_time);
++
++        return 0;
++}
++
++static int
++kstat_seq_show_txg(struct seq_file *f, kstat_txg_t *ktp)
++{
++	char state;
++
++	switch (ktp->state) {
++		case TXG_STATE_OPEN:		state = 'O';	break;
++		case TXG_STATE_QUIESCING:	state = 'Q';	break;
++		case TXG_STATE_SYNCING:		state = 'S';	break;
++		case TXG_STATE_COMMITTED:	state = 'C';	break;
++		default:			state = '?';	break;
++	}
++
++        seq_printf(f,
++                   "%-8llu %-5c %-13llu %-12llu %-12llu %-8u %-8u "
++                   "%12lld %12lld %12lld\n", ktp->txg, state, ktp->birth,
++                    ktp->nread, ktp->nwritten, ktp->reads, ktp->writes,
++                    ktp->open_time, ktp->quiesce_time, ktp->sync_time);
++	return 0;
++}
++
++static int
++kstat_seq_show(struct seq_file *f, void *p)
++{
++        kstat_t *ksp = (kstat_t *)f->private;
++        int rc = 0;
++
++        ASSERT(ksp->ks_magic == KS_MAGIC);
++
++	switch (ksp->ks_type) {
++                case KSTAT_TYPE_RAW:
++                        ASSERT(ksp->ks_ndata == 1);
++                        rc = kstat_seq_show_raw(f, ksp->ks_data,
++                                                ksp->ks_data_size);
++                        break;
++                case KSTAT_TYPE_NAMED:
++                        rc = kstat_seq_show_named(f, (kstat_named_t *)p);
++                        break;
++                case KSTAT_TYPE_INTR:
++                        rc = kstat_seq_show_intr(f, (kstat_intr_t *)p);
++                        break;
++                case KSTAT_TYPE_IO:
++                        rc = kstat_seq_show_io(f, (kstat_io_t *)p);
++                        break;
++                case KSTAT_TYPE_TIMER:
++                        rc = kstat_seq_show_timer(f, (kstat_timer_t *)p);
++                        break;
++                case KSTAT_TYPE_TXG:
++                        rc = kstat_seq_show_txg(f, (kstat_txg_t *)p);
++                        break;
++                default:
++                        PANIC("Undefined kstat type %d\n", ksp->ks_type);
++        }
++
++        return rc;
++}
++
++int
++kstat_default_update(kstat_t *ksp, int rw)
++{
++	ASSERT(ksp != NULL);
++	return 0;
++}
++
++static void *
++kstat_seq_data_addr(kstat_t *ksp, loff_t n)
++{
++        void *rc = NULL;
++        SENTRY;
++
++	switch (ksp->ks_type) {
++                case KSTAT_TYPE_RAW:
++	                rc = ksp->ks_data;
++                        break;
++                case KSTAT_TYPE_NAMED:
++                        rc = ksp->ks_data + n * sizeof(kstat_named_t);
++                        break;
++                case KSTAT_TYPE_INTR:
++                        rc = ksp->ks_data + n * sizeof(kstat_intr_t);
++                        break;
++                case KSTAT_TYPE_IO:
++                        rc = ksp->ks_data + n * sizeof(kstat_io_t);
++                        break;
++                case KSTAT_TYPE_TIMER:
++                        rc = ksp->ks_data + n * sizeof(kstat_timer_t);
++                        break;
++                case KSTAT_TYPE_TXG:
++                        rc = ksp->ks_data + n * sizeof(kstat_txg_t);
++                        break;
++                default:
++                        PANIC("Undefined kstat type %d\n", ksp->ks_type);
++        }
++
++        SRETURN(rc);
++}
++
++static void *
++kstat_seq_start(struct seq_file *f, loff_t *pos)
++{
++        loff_t n = *pos;
++        kstat_t *ksp = (kstat_t *)f->private;
++        ASSERT(ksp->ks_magic == KS_MAGIC);
++        SENTRY;
++
++        mutex_enter(&ksp->ks_lock);
++
++        /* Dynamically update kstat, on error existing kstats are used */
++        (void) ksp->ks_update(ksp, KSTAT_READ);
++
++	ksp->ks_snaptime = gethrtime();
++
++        if (!n)
++                kstat_seq_show_headers(f);
++
++        if (n >= ksp->ks_ndata)
++                SRETURN(NULL);
++
++        SRETURN(kstat_seq_data_addr(ksp, n));
++}
++
++static void *
++kstat_seq_next(struct seq_file *f, void *p, loff_t *pos)
++{
++        kstat_t *ksp = (kstat_t *)f->private;
++        ASSERT(ksp->ks_magic == KS_MAGIC);
++        SENTRY;
++
++        ++*pos;
++        if (*pos >= ksp->ks_ndata)
++                SRETURN(NULL);
++
++        SRETURN(kstat_seq_data_addr(ksp, *pos));
++}
++
++static void
++kstat_seq_stop(struct seq_file *f, void *v)
++{
++        kstat_t *ksp = (kstat_t *)f->private;
++        ASSERT(ksp->ks_magic == KS_MAGIC);
++
++        mutex_exit(&ksp->ks_lock);
++}
++
++static struct seq_operations kstat_seq_ops = {
++        .show  = kstat_seq_show,
++        .start = kstat_seq_start,
++        .next  = kstat_seq_next,
++        .stop  = kstat_seq_stop,
++};
++
++static int
++proc_kstat_open(struct inode *inode, struct file *filp)
++{
++        struct seq_file *f;
++        int rc;
++
++        rc = seq_open(filp, &kstat_seq_ops);
++        if (rc)
++                return rc;
++
++        f = filp->private_data;
++        f->private = PDE(inode)->data;
++
++        return rc;
++}
++
++static struct file_operations proc_kstat_operations = {
++        .open           = proc_kstat_open,
++        .read           = seq_read,
++        .llseek         = seq_lseek,
++        .release        = seq_release,
++};
++
++kstat_t *
++__kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
++             const char *ks_class, uchar_t ks_type, uint_t ks_ndata,
++             uchar_t ks_flags)
++{
++	kstat_t *ksp;
++
++	ASSERT(ks_module);
++	ASSERT(ks_instance == 0);
++	ASSERT(ks_name);
++	ASSERT(!(ks_flags & KSTAT_FLAG_UNSUPPORTED));
++
++	if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
++                ASSERT(ks_ndata == 1);
++
++	ksp = kmem_zalloc(sizeof(*ksp), KM_SLEEP);
++	if (ksp == NULL)
++		return ksp;
++
++	spin_lock(&kstat_lock);
++	ksp->ks_kid = kstat_id;
++        kstat_id++;
++	spin_unlock(&kstat_lock);
++
++        ksp->ks_magic = KS_MAGIC;
++	mutex_init(&ksp->ks_lock, NULL, MUTEX_DEFAULT, NULL);
++	INIT_LIST_HEAD(&ksp->ks_list);
++
++	ksp->ks_crtime = gethrtime();
++        ksp->ks_snaptime = ksp->ks_crtime;
++	strncpy(ksp->ks_module, ks_module, KSTAT_STRLEN);
++	ksp->ks_instance = ks_instance;
++	strncpy(ksp->ks_name, ks_name, KSTAT_STRLEN);
++	strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN);
++	ksp->ks_type = ks_type;
++	ksp->ks_flags = ks_flags;
++	ksp->ks_update = kstat_default_update;
++	ksp->ks_private = NULL;
++
++	switch (ksp->ks_type) {
++                case KSTAT_TYPE_RAW:
++	                ksp->ks_ndata = 1;
++                        ksp->ks_data_size = ks_ndata;
++                        break;
++                case KSTAT_TYPE_NAMED:
++	                ksp->ks_ndata = ks_ndata;
++                        ksp->ks_data_size = ks_ndata * sizeof(kstat_named_t);
++                        break;
++                case KSTAT_TYPE_INTR:
++	                ksp->ks_ndata = ks_ndata;
++                        ksp->ks_data_size = ks_ndata * sizeof(kstat_intr_t);
++                        break;
++                case KSTAT_TYPE_IO:
++	                ksp->ks_ndata = ks_ndata;
++                        ksp->ks_data_size = ks_ndata * sizeof(kstat_io_t);
++                        break;
++                case KSTAT_TYPE_TIMER:
++	                ksp->ks_ndata = ks_ndata;
++                        ksp->ks_data_size = ks_ndata * sizeof(kstat_timer_t);
++                        break;
++		case KSTAT_TYPE_TXG:
++			ksp->ks_ndata = ks_ndata;
++			ksp->ks_data_size = ks_ndata * sizeof(kstat_timer_t);
++			break;
++                default:
++                        PANIC("Undefined kstat type %d\n", ksp->ks_type);
++        }
++
++	if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) {
++                ksp->ks_data = NULL;
++        } else {
++                ksp->ks_data = kmem_alloc(ksp->ks_data_size, KM_SLEEP);
++                if (ksp->ks_data == NULL) {
++                        kmem_free(ksp, sizeof(*ksp));
++                        ksp = NULL;
++                }
++        }
++
++	return ksp;
++}
++EXPORT_SYMBOL(__kstat_create);
++
++void
++__kstat_install(kstat_t *ksp)
++{
++	struct proc_dir_entry *de_module, *de_name;
++	kstat_t *tmp;
++	int rc = 0;
++	SENTRY;
++
++	spin_lock(&kstat_lock);
++
++	/* Item may only be added to the list once */
++        list_for_each_entry(tmp, &kstat_list, ks_list) {
++                if (tmp == ksp) {
++		        spin_unlock(&kstat_lock);
++			SGOTO(out, rc = -EEXIST);
++		}
++	}
++
++        list_add_tail(&ksp->ks_list, &kstat_list);
++	spin_unlock(&kstat_lock);
++
++	de_module = proc_dir_entry_find(proc_spl_kstat, ksp->ks_module);
++	if (de_module == NULL) {
++                de_module = proc_mkdir(ksp->ks_module, proc_spl_kstat);
++		if (de_module == NULL)
++			SGOTO(out, rc = -EUNATCH);
++	}
++
++	de_name = create_proc_entry(ksp->ks_name, 0444, de_module);
++	if (de_name == NULL)
++		SGOTO(out, rc = -EUNATCH);
++
++	mutex_enter(&ksp->ks_lock);
++	ksp->ks_proc = de_name;
++	de_name->proc_fops = &proc_kstat_operations;
++        de_name->data = (void *)ksp;
++	mutex_exit(&ksp->ks_lock);
++out:
++	if (rc) {
++		spin_lock(&kstat_lock);
++	        list_del_init(&ksp->ks_list);
++		spin_unlock(&kstat_lock);
++	}
++
++	SEXIT;
++}
++EXPORT_SYMBOL(__kstat_install);
++
++void
++__kstat_delete(kstat_t *ksp)
++{
++	struct proc_dir_entry *de_module;
++
++	spin_lock(&kstat_lock);
++        list_del_init(&ksp->ks_list);
++	spin_unlock(&kstat_lock);
++
++        if (ksp->ks_proc) {
++	        de_module = ksp->ks_proc->parent;
++	        remove_proc_entry(ksp->ks_name, de_module);
++
++	        /* Remove top level module directory if it's empty */
++	        if (proc_dir_entries(de_module) == 0)
++		        remove_proc_entry(de_module->name, de_module->parent);
++	}
++
++	if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
++                kmem_free(ksp->ks_data, ksp->ks_data_size);
++
++	mutex_destroy(&ksp->ks_lock);
++	kmem_free(ksp, sizeof(*ksp));
++
++	return;
++}
++EXPORT_SYMBOL(__kstat_delete);
++
++int
++spl_kstat_init(void)
++{
++	SENTRY;
++	spin_lock_init(&kstat_lock);
++	INIT_LIST_HEAD(&kstat_list);
++        kstat_id = 0;
++	SRETURN(0);
++}
++
++void
++spl_kstat_fini(void)
++{
++	SENTRY;
++	ASSERT(list_empty(&kstat_list));
++	SEXIT;
++}
++
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-mutex.c linux-3.2.33-go/spl/spl/spl-mutex.c
+--- linux-3.2.33-go.orig/spl/spl/spl-mutex.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-mutex.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,77 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Mutex Implementation.
++\*****************************************************************************/
++
++#include <sys/mutex.h>
++
++#ifdef DEBUG_SUBSYSTEM
++#undef DEBUG_SUBSYSTEM
++#endif
++
++#define DEBUG_SUBSYSTEM S_MUTEX
++
++/*
++ * While a standard mutex implementation has been available in the kernel
++ * for quite some time.  It was not until 2.6.29 and latter kernels that
++ * adaptive mutexs were embraced and integrated with the scheduler.  This
++ * brought a significant performance improvement, but just as importantly
++ * it added a lock owner to the generic mutex outside CONFIG_DEBUG_MUTEXES
++ * builds.  This is critical for correctly supporting the mutex_owner()
++ * Solaris primitive.  When the owner is available we use a pure Linux
++ * mutex implementation.  When the owner is not available we still use
++ * Linux mutexs as a base but also reserve space for an owner field right
++ * after the mutex structure.
++ *
++ * In the case when HAVE_MUTEX_OWNER is not defined your code may
++ * still me able to leverage adaptive mutexs.  As long as the task_curr()
++ * symbol is exported this code will provide a poor mans adaptive mutex
++ * implementation.  However, this is not required and if the symbol is
++ * unavailable we provide a standard mutex.
++ */
++
++#if !defined(HAVE_MUTEX_OWNER) || !defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
++#ifdef HAVE_TASK_CURR
++/*
++ * mutex_spin_max = { 0, -1, 1-MAX_INT }
++ *  0:         Never spin when trying to acquire lock
++ * -1:         Spin until acquired or holder yields without dropping lock
++ *  1-MAX_INT: Spin for N attempts before sleeping for lock
++ */
++int mutex_spin_max = 0;
++module_param(mutex_spin_max, int, 0644);
++MODULE_PARM_DESC(mutex_spin_max, "Spin a maximum of N times to acquire lock");
++
++int
++spl_mutex_spin_max(void)
++{
++        return mutex_spin_max;
++}
++EXPORT_SYMBOL(spl_mutex_spin_max);
++
++#endif /* HAVE_TASK_CURR */
++#endif /* !HAVE_MUTEX_OWNER */
++
++int spl_mutex_init(void) { return 0; }
++void spl_mutex_fini(void) { }
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-proc.c linux-3.2.33-go/spl/spl/spl-proc.c
+--- linux-3.2.33-go.orig/spl/spl/spl-proc.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-proc.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,1219 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Proc Implementation.
++\*****************************************************************************/
++
++#include <sys/systeminfo.h>
++#include <sys/kstat.h>
++#include <linux/kmod.h>
++#include <linux/seq_file.h>
++#include <linux/proc_compat.h>
++#include <spl-debug.h>
++
++#ifdef SS_DEBUG_SUBSYS
++#undef SS_DEBUG_SUBSYS
++#endif
++
++#define SS_DEBUG_SUBSYS SS_PROC
++
++#ifdef DEBUG_KMEM
++static unsigned long table_min = 0;
++static unsigned long table_max = ~0;
++#endif
++
++#ifdef CONFIG_SYSCTL
++static struct ctl_table_header *spl_header = NULL;
++#endif /* CONFIG_SYSCTL */
++
++static struct proc_dir_entry *proc_spl = NULL;
++#ifdef DEBUG_KMEM
++static struct proc_dir_entry *proc_spl_kmem = NULL;
++static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
++#endif /* DEBUG_KMEM */
++struct proc_dir_entry *proc_spl_kstat = NULL;
++
++#ifdef HAVE_CTL_NAME
++#ifdef HAVE_CTL_UNNUMBERED
++
++#define CTL_SPL			CTL_UNNUMBERED
++#define CTL_SPL_DEBUG		CTL_UNNUMBERED
++#define CTL_SPL_VM		CTL_UNNUMBERED
++#define CTL_SPL_MUTEX		CTL_UNNUMBERED
++#define CTL_SPL_KMEM		CTL_UNNUMBERED
++#define CTL_SPL_KSTAT		CTL_UNNUMBERED
++
++#define CTL_VERSION		CTL_UNNUMBERED /* Version */
++#define CTL_HOSTID		CTL_UNNUMBERED /* Host id by /usr/bin/hostid */
++#define CTL_HW_SERIAL		CTL_UNNUMBERED /* HW serial number by hostid */
++#define CTL_KALLSYMS		CTL_UNNUMBERED /* kallsyms_lookup_name addr */
++
++#define CTL_DEBUG_SUBSYS	CTL_UNNUMBERED /* Debug subsystem */
++#define CTL_DEBUG_MASK		CTL_UNNUMBERED /* Debug mask */
++#define CTL_DEBUG_PRINTK	CTL_UNNUMBERED /* All messages to console */
++#define CTL_DEBUG_MB		CTL_UNNUMBERED /* Debug buffer size */
++#define CTL_DEBUG_BINARY	CTL_UNNUMBERED /* Binary data in buffer */
++#define CTL_DEBUG_CATASTROPHE	CTL_UNNUMBERED /* Set if BUG'd or panic'd */
++#define CTL_DEBUG_PANIC_ON_BUG	CTL_UNNUMBERED /* Should panic on BUG */
++#define CTL_DEBUG_PATH		CTL_UNNUMBERED /* Dump log location */
++#define CTL_DEBUG_DUMP		CTL_UNNUMBERED /* Dump debug buffer to file */
++#define CTL_DEBUG_FORCE_BUG	CTL_UNNUMBERED /* Hook to force a BUG */
++#define CTL_DEBUG_STACK_SIZE	CTL_UNNUMBERED /* Max observed stack size */
++
++#define CTL_CONSOLE_RATELIMIT	CTL_UNNUMBERED /* Ratelimit console messages */
++#define CTL_CONSOLE_MAX_DELAY_CS CTL_UNNUMBERED /* Max delay skip messages */
++#define CTL_CONSOLE_MIN_DELAY_CS CTL_UNNUMBERED /* Init delay skip messages */
++#define CTL_CONSOLE_BACKOFF	CTL_UNNUMBERED /* Delay increase factor */
++
++#define CTL_VM_MINFREE		CTL_UNNUMBERED /* Minimum free memory */
++#define CTL_VM_DESFREE		CTL_UNNUMBERED /* Desired free memory */
++#define CTL_VM_LOTSFREE		CTL_UNNUMBERED /* Lots of free memory */
++#define CTL_VM_NEEDFREE		CTL_UNNUMBERED /* Need free memory */
++#define CTL_VM_SWAPFS_MINFREE	CTL_UNNUMBERED /* Minimum swapfs memory */
++#define CTL_VM_SWAPFS_RESERVE	CTL_UNNUMBERED /* Reserved swapfs memory */
++#define CTL_VM_AVAILRMEM	CTL_UNNUMBERED /* Easily available memory */
++#define CTL_VM_FREEMEM		CTL_UNNUMBERED /* Free memory */
++#define CTL_VM_PHYSMEM		CTL_UNNUMBERED /* Total physical memory */
++
++#ifdef DEBUG_KMEM
++#define CTL_KMEM_KMEMUSED	CTL_UNNUMBERED /* Alloc'd kmem bytes */
++#define CTL_KMEM_KMEMMAX	CTL_UNNUMBERED /* Max alloc'd by kmem bytes */
++#define CTL_KMEM_VMEMUSED	CTL_UNNUMBERED /* Alloc'd vmem bytes */
++#define CTL_KMEM_VMEMMAX	CTL_UNNUMBERED /* Max alloc'd by vmem bytes */
++#define CTL_KMEM_SLAB_KMEMTOTAL	CTL_UNNUMBERED /* Total kmem slab size */
++#define CTL_KMEM_SLAB_KMEMALLOC	CTL_UNNUMBERED /* Alloc'd kmem slab size */
++#define CTL_KMEM_SLAB_KMEMMAX	CTL_UNNUMBERED /* Max kmem slab size */
++#define CTL_KMEM_SLAB_VMEMTOTAL	CTL_UNNUMBERED /* Total vmem slab size */
++#define CTL_KMEM_SLAB_VMEMALLOC	CTL_UNNUMBERED /* Alloc'd vmem slab size */
++#define CTL_KMEM_SLAB_VMEMMAX	CTL_UNNUMBERED /* Max vmem slab size */
++#endif
++
++#else /* HAVE_CTL_UNNUMBERED */
++
++enum {
++	CTL_SPL = 0x87,
++	CTL_SPL_DEBUG = 0x88,
++	CTL_SPL_VM = 0x89,
++	CTL_SPL_MUTEX = 0x90,
++	CTL_SPL_KMEM = 0x91,
++	CTL_SPL_KSTAT = 0x92,
++};
++
++enum {
++	CTL_VERSION = 1,		/* Version */
++	CTL_HOSTID,			/* Host id reported by /usr/bin/hostid */
++	CTL_HW_SERIAL,			/* Hardware serial number from hostid */
++	CTL_KALLSYMS,			/* Address of kallsyms_lookup_name */
++
++#ifdef DEBUG_LOG
++	CTL_DEBUG_SUBSYS,		/* Debug subsystem */
++	CTL_DEBUG_MASK,			/* Debug mask */
++	CTL_DEBUG_PRINTK,		/* Force all messages to console */
++	CTL_DEBUG_MB,			/* Debug buffer size */
++	CTL_DEBUG_BINARY,		/* Include binary data in buffer */
++	CTL_DEBUG_CATASTROPHE,		/* Set if we have BUG'd or panic'd */
++	CTL_DEBUG_PANIC_ON_BUG,		/* Set if we should panic on BUG */
++	CTL_DEBUG_PATH,			/* Dump log location */
++	CTL_DEBUG_DUMP,			/* Dump debug buffer to file */
++	CTL_DEBUG_FORCE_BUG,		/* Hook to force a BUG */
++	CTL_DEBUG_STACK_SIZE,		/* Max observed stack size */
++#endif
++
++	CTL_CONSOLE_RATELIMIT,		/* Ratelimit console messages */
++	CTL_CONSOLE_MAX_DELAY_CS,	/* Max delay which we skip messages */
++	CTL_CONSOLE_MIN_DELAY_CS,	/* Init delay which we skip messages */
++	CTL_CONSOLE_BACKOFF,		/* Delay increase factor */
++
++	CTL_VM_MINFREE,			/* Minimum free memory threshold */
++	CTL_VM_DESFREE,			/* Desired free memory threshold */
++	CTL_VM_LOTSFREE,		/* Lots of free memory threshold */
++	CTL_VM_NEEDFREE,		/* Need free memory deficit */
++	CTL_VM_SWAPFS_MINFREE,		/* Minimum swapfs memory */
++	CTL_VM_SWAPFS_RESERVE,		/* Reserved swapfs memory */
++	CTL_VM_AVAILRMEM,		/* Easily available memory */
++	CTL_VM_FREEMEM,			/* Free memory */
++	CTL_VM_PHYSMEM,			/* Total physical memory */
++
++#ifdef DEBUG_KMEM
++	CTL_KMEM_KMEMUSED,		/* Alloc'd kmem bytes */
++	CTL_KMEM_KMEMMAX,		/* Max alloc'd by kmem bytes */
++	CTL_KMEM_VMEMUSED,		/* Alloc'd vmem bytes */
++	CTL_KMEM_VMEMMAX,		/* Max alloc'd by vmem bytes */
++	CTL_KMEM_SLAB_KMEMTOTAL,	/* Total kmem slab size */
++	CTL_KMEM_SLAB_KMEMALLOC,	/* Alloc'd kmem slab size */
++	CTL_KMEM_SLAB_KMEMMAX,		/* Max kmem slab size */
++	CTL_KMEM_SLAB_VMEMTOTAL,	/* Total vmem slab size */
++	CTL_KMEM_SLAB_VMEMALLOC,	/* Alloc'd vmem slab size */
++	CTL_KMEM_SLAB_VMEMMAX,		/* Max vmem slab size */
++#endif
++};
++#endif /* HAVE_CTL_UNNUMBERED */
++#endif /* HAVE_CTL_NAME */
++
++static int
++proc_copyin_string(char *kbuffer, int kbuffer_size,
++                   const char *ubuffer, int ubuffer_size)
++{
++        int size;
++
++        if (ubuffer_size > kbuffer_size)
++                return -EOVERFLOW;
++
++        if (copy_from_user((void *)kbuffer, (void *)ubuffer, ubuffer_size))
++                return -EFAULT;
++
++        /* strip trailing whitespace */
++        size = strnlen(kbuffer, ubuffer_size);
++        while (size-- >= 0)
++                if (!isspace(kbuffer[size]))
++                        break;
++
++        /* empty string */
++        if (size < 0)
++                return -EINVAL;
++
++        /* no space to terminate */
++        if (size == kbuffer_size)
++                return -EOVERFLOW;
++
++        kbuffer[size + 1] = 0;
++        return 0;
++}
++
++static int
++proc_copyout_string(char *ubuffer, int ubuffer_size,
++                    const char *kbuffer, char *append)
++{
++        /* NB if 'append' != NULL, it's a single character to append to the
++         * copied out string - usually "\n", for /proc entries and
++         * (i.e. a terminating zero byte) for sysctl entries
++         */
++        int size = MIN(strlen(kbuffer), ubuffer_size);
++
++        if (copy_to_user(ubuffer, kbuffer, size))
++                return -EFAULT;
++
++        if (append != NULL && size < ubuffer_size) {
++                if (copy_to_user(ubuffer + size, append, 1))
++                        return -EFAULT;
++
++                size++;
++        }
++
++        return size;
++}
++
++#ifdef DEBUG_LOG
++SPL_PROC_HANDLER(proc_dobitmasks)
++{
++        unsigned long *mask = table->data;
++        int is_subsys = (mask == &spl_debug_subsys) ? 1 : 0;
++        int is_printk = (mask == &spl_debug_printk) ? 1 : 0;
++        int size = 512, rc;
++        char *str;
++        SENTRY;
++
++        str = kmem_alloc(size, KM_SLEEP);
++        if (str == NULL)
++                SRETURN(-ENOMEM);
++
++        if (write) {
++                rc = proc_copyin_string(str, size, buffer, *lenp);
++                if (rc < 0)
++                        SRETURN(rc);
++
++                rc = spl_debug_str2mask(mask, str, is_subsys);
++                /* Always print BUG/ASSERT to console, so keep this mask */
++                if (is_printk)
++                        *mask |= SD_EMERG;
++
++                *ppos += *lenp;
++        } else {
++                rc = spl_debug_mask2str(str, size, *mask, is_subsys);
++                if (*ppos >= rc)
++                        rc = 0;
++                else
++                        rc = proc_copyout_string(buffer, *lenp,
++                                                 str + *ppos, "\n");
++                if (rc >= 0) {
++                        *lenp = rc;
++                        *ppos += rc;
++                }
++        }
++
++        kmem_free(str, size);
++        SRETURN(rc);
++}
++
++SPL_PROC_HANDLER(proc_debug_mb)
++{
++        char str[32];
++        int rc, len;
++        SENTRY;
++
++        if (write) {
++                rc = proc_copyin_string(str, sizeof(str), buffer, *lenp);
++                if (rc < 0)
++                        SRETURN(rc);
++
++                rc = spl_debug_set_mb(simple_strtoul(str, NULL, 0));
++                *ppos += *lenp;
++        } else {
++                len = snprintf(str, sizeof(str), "%d", spl_debug_get_mb());
++                if (*ppos >= len)
++                        rc = 0;
++                else
++                        rc = proc_copyout_string(buffer,*lenp,str+*ppos,"\n");
++
++                if (rc >= 0) {
++                        *lenp = rc;
++                        *ppos += rc;
++                }
++        }
++
++        SRETURN(rc);
++}
++
++SPL_PROC_HANDLER(proc_dump_kernel)
++{
++	SENTRY;
++
++        if (write) {
++               spl_debug_dumplog(0);
++                *ppos += *lenp;
++        } else {
++                *lenp = 0;
++        }
++
++        SRETURN(0);
++}
++
++SPL_PROC_HANDLER(proc_force_bug)
++{
++	SENTRY;
++
++        if (write)
++		PANIC("Crashing due to forced panic\n");
++        else
++                *lenp = 0;
++
++	SRETURN(0);
++}
++
++SPL_PROC_HANDLER(proc_console_max_delay_cs)
++{
++        int rc, max_delay_cs;
++        struct ctl_table dummy = *table;
++        long d;
++	SENTRY;
++
++        dummy.data = &max_delay_cs;
++        dummy.proc_handler = &proc_dointvec;
++
++        if (write) {
++                max_delay_cs = 0;
++                rc = spl_proc_dointvec(&dummy,write,filp,buffer,lenp,ppos);
++                if (rc < 0)
++                        SRETURN(rc);
++
++                if (max_delay_cs <= 0)
++                        SRETURN(-EINVAL);
++
++                d = (max_delay_cs * HZ) / 100;
++                if (d == 0 || d < spl_console_min_delay)
++                        SRETURN(-EINVAL);
++
++                spl_console_max_delay = d;
++        } else {
++                max_delay_cs = (spl_console_max_delay * 100) / HZ;
++                rc = spl_proc_dointvec(&dummy,write,filp,buffer,lenp,ppos);
++        }
++
++        SRETURN(rc);
++}
++
++SPL_PROC_HANDLER(proc_console_min_delay_cs)
++{
++        int rc, min_delay_cs;
++        struct ctl_table dummy = *table;
++        long d;
++	SENTRY;
++
++        dummy.data = &min_delay_cs;
++        dummy.proc_handler = &proc_dointvec;
++
++        if (write) {
++                min_delay_cs = 0;
++                rc = spl_proc_dointvec(&dummy,write,filp,buffer,lenp,ppos);
++                if (rc < 0)
++                        SRETURN(rc);
++
++                if (min_delay_cs <= 0)
++                        SRETURN(-EINVAL);
++
++                d = (min_delay_cs * HZ) / 100;
++                if (d == 0 || d > spl_console_max_delay)
++                        SRETURN(-EINVAL);
++
++                spl_console_min_delay = d;
++        } else {
++                min_delay_cs = (spl_console_min_delay * 100) / HZ;
++                rc = spl_proc_dointvec(&dummy,write,filp,buffer,lenp,ppos);
++        }
++
++        SRETURN(rc);
++}
++
++SPL_PROC_HANDLER(proc_console_backoff)
++{
++        int rc, backoff;
++        struct ctl_table dummy = *table;
++	SENTRY;
++
++        dummy.data = &backoff;
++        dummy.proc_handler = &proc_dointvec;
++
++        if (write) {
++                backoff = 0;
++                rc = spl_proc_dointvec(&dummy,write,filp,buffer,lenp,ppos);
++                if (rc < 0)
++                        SRETURN(rc);
++
++                if (backoff <= 0)
++                        SRETURN(-EINVAL);
++
++                spl_console_backoff = backoff;
++        } else {
++                backoff = spl_console_backoff;
++                rc = spl_proc_dointvec(&dummy,write,filp,buffer,lenp,ppos);
++        }
++
++        SRETURN(rc);
++}
++#endif /* DEBUG_LOG */
++
++#ifdef DEBUG_KMEM
++SPL_PROC_HANDLER(proc_domemused)
++{
++        int rc = 0;
++        unsigned long min = 0, max = ~0, val;
++        struct ctl_table dummy = *table;
++	SENTRY;
++
++        dummy.data = &val;
++        dummy.proc_handler = &proc_dointvec;
++        dummy.extra1 = &min;
++        dummy.extra2 = &max;
++
++        if (write) {
++                *ppos += *lenp;
++        } else {
++# ifdef HAVE_ATOMIC64_T
++                val = atomic64_read((atomic64_t *)table->data);
++# else
++                val = atomic_read((atomic_t *)table->data);
++# endif /* HAVE_ATOMIC64_T */
++                rc = spl_proc_doulongvec_minmax(&dummy, write, filp,
++                                                buffer, lenp, ppos);
++        }
++
++        SRETURN(rc);
++}
++
++SPL_PROC_HANDLER(proc_doslab)
++{
++        int rc = 0;
++        unsigned long min = 0, max = ~0, val = 0, mask;
++        struct ctl_table dummy = *table;
++        spl_kmem_cache_t *skc;
++        SENTRY;
++
++        dummy.data = &val;
++        dummy.proc_handler = &proc_dointvec;
++        dummy.extra1 = &min;
++        dummy.extra2 = &max;
++
++        if (write) {
++                *ppos += *lenp;
++        } else {
++                down_read(&spl_kmem_cache_sem);
++                mask = (unsigned long)table->data;
++
++                list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
++
++			/* Only use slabs of the correct kmem/vmem type */
++			if (!(skc->skc_flags & mask))
++				continue;
++
++			/* Sum the specified field for selected slabs */
++			switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) {
++			case KMC_TOTAL:
++	                        val += skc->skc_slab_size * skc->skc_slab_total;
++				break;
++			case KMC_ALLOC:
++	                        val += skc->skc_obj_size * skc->skc_obj_alloc;
++				break;
++			case KMC_MAX:
++	                        val += skc->skc_obj_size * skc->skc_obj_max;
++				break;
++			}
++                }
++
++                up_read(&spl_kmem_cache_sem);
++                rc = spl_proc_doulongvec_minmax(&dummy, write, filp,
++                                                buffer, lenp, ppos);
++        }
++
++        SRETURN(rc);
++}
++#endif /* DEBUG_KMEM */
++
++SPL_PROC_HANDLER(proc_dohostid)
++{
++        int len, rc = 0;
++        char *end, str[32];
++        SENTRY;
++
++        if (write) {
++                /* We can't use spl_proc_doulongvec_minmax() in the write
++                 * case here because hostid while a hex value has no
++                 * leading 0x which confuses the helper function. */
++                rc = proc_copyin_string(str, sizeof(str), buffer, *lenp);
++                if (rc < 0)
++                        SRETURN(rc);
++
++                spl_hostid = simple_strtoul(str, &end, 16);
++                if (str == end)
++                        SRETURN(-EINVAL);
++
++                (void) snprintf(hw_serial, HW_HOSTID_LEN, "%lu", spl_hostid);
++                hw_serial[HW_HOSTID_LEN - 1] = '\0';
++                *ppos += *lenp;
++        } else {
++                len = snprintf(str, sizeof(str), "%lx", spl_hostid);
++                if (*ppos >= len)
++                        rc = 0;
++                else
++                        rc = proc_copyout_string(buffer,*lenp,str+*ppos,"\n");
++
++                if (rc >= 0) {
++                        *lenp = rc;
++                        *ppos += rc;
++                }
++        }
++
++        SRETURN(rc);
++}
++
++#ifndef HAVE_KALLSYMS_LOOKUP_NAME
++SPL_PROC_HANDLER(proc_dokallsyms_lookup_name)
++{
++        int len, rc = 0;
++        char *end, str[32];
++	SENTRY;
++
++        if (write) {
++		/* This may only be set once at module load time */
++		if (spl_kallsyms_lookup_name_fn != SYMBOL_POISON)
++			SRETURN(-EEXIST);
++
++		/* We can't use spl_proc_doulongvec_minmax() in the write
++		 * case here because the address while a hex value has no
++		 * leading 0x which confuses the helper function. */
++                rc = proc_copyin_string(str, sizeof(str), buffer, *lenp);
++                if (rc < 0)
++                        SRETURN(rc);
++
++                spl_kallsyms_lookup_name_fn =
++			(kallsyms_lookup_name_t)simple_strtoul(str, &end, 16);
++		if (str == end)
++			SRETURN(-EINVAL);
++
++                *ppos += *lenp;
++        } else {
++                len = snprintf(str, sizeof(str), "%lx",
++			       (unsigned long)spl_kallsyms_lookup_name_fn);
++                if (*ppos >= len)
++                        rc = 0;
++                else
++                        rc = proc_copyout_string(buffer,*lenp,str+*ppos,"\n");
++
++                if (rc >= 0) {
++                        *lenp = rc;
++                        *ppos += rc;
++                }
++        }
++
++        SRETURN(rc);
++}
++#endif /* HAVE_KALLSYMS_LOOKUP_NAME */
++
++SPL_PROC_HANDLER(proc_doavailrmem)
++{
++        int len, rc = 0;
++	char str[32];
++	SENTRY;
++
++        if (write) {
++                *ppos += *lenp;
++        } else {
++		len = snprintf(str, sizeof(str), "%lu",
++			       (unsigned long)availrmem);
++		if (*ppos >= len)
++			rc = 0;
++		else
++			rc = proc_copyout_string(buffer,*lenp,str+*ppos,"\n");
++
++		if (rc >= 0) {
++			*lenp = rc;
++			*ppos += rc;
++		}
++        }
++
++        SRETURN(rc);
++}
++
++SPL_PROC_HANDLER(proc_dofreemem)
++{
++        int len, rc = 0;
++	char str[32];
++	SENTRY;
++
++        if (write) {
++                *ppos += *lenp;
++        } else {
++		len = snprintf(str, sizeof(str), "%lu", (unsigned long)freemem);
++		if (*ppos >= len)
++			rc = 0;
++		else
++			rc = proc_copyout_string(buffer,*lenp,str+*ppos,"\n");
++
++		if (rc >= 0) {
++			*lenp = rc;
++			*ppos += rc;
++		}
++        }
++
++        SRETURN(rc);
++}
++
++#ifdef DEBUG_KMEM
++static void
++slab_seq_show_headers(struct seq_file *f)
++{
++        seq_printf(f,
++            "--------------------- cache ----------"
++            "---------------------------------------------  "
++            "----- slab ------  "
++            "---- object -----  "
++            "--- emergency ---\n");
++        seq_printf(f,
++            "name                                  "
++            "  flags      size     alloc slabsize  objsize  "
++            "total alloc   max  "
++            "total alloc   max  "
++            "dlock alloc   max\n");
++}
++
++static int
++slab_seq_show(struct seq_file *f, void *p)
++{
++        spl_kmem_cache_t *skc = p;
++
++        ASSERT(skc->skc_magic == SKC_MAGIC);
++
++        spin_lock(&skc->skc_lock);
++        seq_printf(f, "%-36s  ", skc->skc_name);
++        seq_printf(f, "0x%05lx %9lu %9lu %8u %8u  "
++            "%5lu %5lu %5lu  %5lu %5lu %5lu  %5lu %5lu %5lu\n",
++            (long unsigned)skc->skc_flags,
++            (long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
++            (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
++            (unsigned)skc->skc_slab_size,
++            (unsigned)skc->skc_obj_size,
++            (long unsigned)skc->skc_slab_total,
++            (long unsigned)skc->skc_slab_alloc,
++            (long unsigned)skc->skc_slab_max,
++            (long unsigned)skc->skc_obj_total,
++            (long unsigned)skc->skc_obj_alloc,
++            (long unsigned)skc->skc_obj_max,
++            (long unsigned)skc->skc_obj_deadlock,
++            (long unsigned)skc->skc_obj_emergency,
++            (long unsigned)skc->skc_obj_emergency_max);
++
++        spin_unlock(&skc->skc_lock);
++
++        return 0;
++}
++
++static void *
++slab_seq_start(struct seq_file *f, loff_t *pos)
++{
++        struct list_head *p;
++        loff_t n = *pos;
++        SENTRY;
++
++	down_read(&spl_kmem_cache_sem);
++        if (!n)
++                slab_seq_show_headers(f);
++
++        p = spl_kmem_cache_list.next;
++        while (n--) {
++                p = p->next;
++                if (p == &spl_kmem_cache_list)
++                        SRETURN(NULL);
++        }
++
++        SRETURN(list_entry(p, spl_kmem_cache_t, skc_list));
++}
++
++static void *
++slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
++{
++	spl_kmem_cache_t *skc = p;
++        SENTRY;
++
++        ++*pos;
++        SRETURN((skc->skc_list.next == &spl_kmem_cache_list) ?
++	       NULL : list_entry(skc->skc_list.next,spl_kmem_cache_t,skc_list));
++}
++
++static void
++slab_seq_stop(struct seq_file *f, void *v)
++{
++	up_read(&spl_kmem_cache_sem);
++}
++
++static struct seq_operations slab_seq_ops = {
++        .show  = slab_seq_show,
++        .start = slab_seq_start,
++        .next  = slab_seq_next,
++        .stop  = slab_seq_stop,
++};
++
++static int
++proc_slab_open(struct inode *inode, struct file *filp)
++{
++        return seq_open(filp, &slab_seq_ops);
++}
++
++static struct file_operations proc_slab_operations = {
++        .open           = proc_slab_open,
++        .read           = seq_read,
++        .llseek         = seq_lseek,
++        .release        = seq_release,
++};
++#endif /* DEBUG_KMEM */
++
++#ifdef DEBUG_LOG
++static struct ctl_table spl_debug_table[] = {
++        {
++                CTL_NAME    (CTL_DEBUG_SUBSYS)
++                .procname = "subsystem",
++                .data     = &spl_debug_subsys,
++                .maxlen   = sizeof(unsigned long),
++                .mode     = 0644,
++                .proc_handler = &proc_dobitmasks
++        },
++        {
++                CTL_NAME    (CTL_DEBUG_MASK)
++                .procname = "mask",
++                .data     = &spl_debug_mask,
++                .maxlen   = sizeof(unsigned long),
++                .mode     = 0644,
++                .proc_handler = &proc_dobitmasks
++        },
++        {
++                CTL_NAME    (CTL_DEBUG_PRINTK)
++                .procname = "printk",
++                .data     = &spl_debug_printk,
++                .maxlen   = sizeof(unsigned long),
++                .mode     = 0644,
++                .proc_handler = &proc_dobitmasks
++        },
++        {
++                CTL_NAME    (CTL_DEBUG_MB)
++                .procname = "mb",
++                .mode     = 0644,
++                .proc_handler = &proc_debug_mb,
++        },
++        {
++                CTL_NAME    (CTL_DEBUG_BINARY)
++                .procname = "binary",
++                .data     = &spl_debug_binary,
++                .maxlen   = sizeof(int),
++                .mode     = 0644,
++                .proc_handler = &proc_dointvec,
++        },
++        {
++                CTL_NAME    (CTL_DEBUG_CATASTROPHE)
++                .procname = "catastrophe",
++                .data     = &spl_debug_catastrophe,
++                .maxlen   = sizeof(int),
++                .mode     = 0444,
++                .proc_handler = &proc_dointvec,
++        },
++        {
++                CTL_NAME    (CTL_DEBUG_PANIC_ON_BUG)
++                .procname = "panic_on_bug",
++                .data     = &spl_debug_panic_on_bug,
++                .maxlen   = sizeof(int),
++                .mode     = 0644,
++                .proc_handler = &proc_dointvec
++        },
++        {
++                CTL_NAME    (CTL_DEBUG_PATH)
++                .procname = "path",
++                .data     = spl_debug_file_path,
++                .maxlen   = sizeof(spl_debug_file_path),
++                .mode     = 0644,
++                .proc_handler = &proc_dostring,
++        },
++        {
++                CTL_NAME    (CTL_DEBUG_DUMP)
++                .procname = "dump",
++                .mode     = 0200,
++                .proc_handler = &proc_dump_kernel,
++        },
++        {       CTL_NAME    (CTL_DEBUG_FORCE_BUG)
++                .procname = "force_bug",
++                .mode     = 0200,
++                .proc_handler = &proc_force_bug,
++        },
++        {
++                CTL_NAME    (CTL_CONSOLE_RATELIMIT)
++                .procname = "console_ratelimit",
++                .data     = &spl_console_ratelimit,
++                .maxlen   = sizeof(int),
++                .mode     = 0644,
++                .proc_handler = &proc_dointvec,
++        },
++        {
++                CTL_NAME    (CTL_CONSOLE_MAX_DELAY_CS)
++                .procname = "console_max_delay_centisecs",
++                .maxlen   = sizeof(int),
++                .mode     = 0644,
++                .proc_handler = &proc_console_max_delay_cs,
++        },
++        {
++                CTL_NAME    (CTL_CONSOLE_MIN_DELAY_CS)
++                .procname = "console_min_delay_centisecs",
++                .maxlen   = sizeof(int),
++                .mode     = 0644,
++                .proc_handler = &proc_console_min_delay_cs,
++        },
++        {
++                CTL_NAME    (CTL_CONSOLE_BACKOFF)
++                .procname = "console_backoff",
++                .maxlen   = sizeof(int),
++                .mode     = 0644,
++                .proc_handler = &proc_console_backoff,
++        },
++        {
++                CTL_NAME    (CTL_DEBUG_STACK_SIZE)
++                .procname = "stack_max",
++                .data     = &spl_debug_stack,
++                .maxlen   = sizeof(int),
++                .mode     = 0444,
++                .proc_handler = &proc_dointvec,
++        },
++	{0},
++};
++#endif /* DEBUG_LOG */
++
++static struct ctl_table spl_vm_table[] = {
++        {
++                CTL_NAME    (CTL_VM_MINFREE)
++                .procname = "minfree",
++                .data     = &minfree,
++                .maxlen   = sizeof(int),
++                .mode     = 0644,
++                .proc_handler = &proc_dointvec,
++        },
++        {
++                CTL_NAME    (CTL_VM_DESFREE)
++                .procname = "desfree",
++                .data     = &desfree,
++                .maxlen   = sizeof(int),
++                .mode     = 0644,
++                .proc_handler = &proc_dointvec,
++        },
++        {
++                CTL_NAME    (CTL_VM_LOTSFREE)
++                .procname = "lotsfree",
++                .data     = &lotsfree,
++                .maxlen   = sizeof(int),
++                .mode     = 0644,
++                .proc_handler = &proc_dointvec,
++        },
++        {
++                CTL_NAME    (CTL_VM_NEEDFREE)
++                .procname = "needfree",
++                .data     = &needfree,
++                .maxlen   = sizeof(int),
++                .mode     = 0444,
++                .proc_handler = &proc_dointvec,
++        },
++        {
++                CTL_NAME    (CTL_VM_SWAPFS_MINFREE)
++                .procname = "swapfs_minfree",
++                .data     = &swapfs_minfree,
++                .maxlen   = sizeof(int),
++                .mode     = 0644,
++                .proc_handler = &proc_dointvec,
++        },
++        {
++                CTL_NAME    (CTL_VM_SWAPFS_RESERVE)
++                .procname = "swapfs_reserve",
++                .data     = &swapfs_reserve,
++                .maxlen   = sizeof(int),
++                .mode     = 0644,
++                .proc_handler = &proc_dointvec,
++        },
++        {
++                CTL_NAME    (CTL_VM_AVAILRMEM)
++                .procname = "availrmem",
++                .mode     = 0444,
++                .proc_handler = &proc_doavailrmem,
++        },
++        {
++                CTL_NAME    (CTL_VM_FREEMEM)
++                .procname = "freemem",
++                .data     = (void *)2,
++                .maxlen   = sizeof(int),
++                .mode     = 0444,
++                .proc_handler = &proc_dofreemem,
++        },
++        {
++                CTL_NAME    (CTL_VM_PHYSMEM)
++                .procname = "physmem",
++                .data     = &physmem,
++                .maxlen   = sizeof(int),
++                .mode     = 0444,
++                .proc_handler = &proc_dointvec,
++        },
++	{0},
++};
++
++#ifdef DEBUG_KMEM
++static struct ctl_table spl_kmem_table[] = {
++        {
++                CTL_NAME    (CTL_KMEM_KMEMUSED)
++                .procname = "kmem_used",
++                .data     = &kmem_alloc_used,
++# ifdef HAVE_ATOMIC64_T
++                .maxlen   = sizeof(atomic64_t),
++# else
++                .maxlen   = sizeof(atomic_t),
++# endif /* HAVE_ATOMIC64_T */
++                .mode     = 0444,
++                .proc_handler = &proc_domemused,
++        },
++        {
++                CTL_NAME    (CTL_KMEM_KMEMMAX)
++                .procname = "kmem_max",
++                .data     = &kmem_alloc_max,
++                .maxlen   = sizeof(unsigned long),
++                .extra1   = &table_min,
++                .extra2   = &table_max,
++                .mode     = 0444,
++                .proc_handler = &proc_doulongvec_minmax,
++        },
++        {
++                CTL_NAME    (CTL_KMEM_VMEMUSED)
++                .procname = "vmem_used",
++                .data     = &vmem_alloc_used,
++# ifdef HAVE_ATOMIC64_T
++                .maxlen   = sizeof(atomic64_t),
++# else
++                .maxlen   = sizeof(atomic_t),
++# endif /* HAVE_ATOMIC64_T */
++                .mode     = 0444,
++                .proc_handler = &proc_domemused,
++        },
++        {
++                CTL_NAME    (CTL_KMEM_VMEMMAX)
++                .procname = "vmem_max",
++                .data     = &vmem_alloc_max,
++                .maxlen   = sizeof(unsigned long),
++                .extra1   = &table_min,
++                .extra2   = &table_max,
++                .mode     = 0444,
++                .proc_handler = &proc_doulongvec_minmax,
++        },
++        {
++                CTL_NAME    (CTL_KMEM_SLAB_KMEMTOTAL)
++                .procname = "slab_kmem_total",
++		.data     = (void *)(KMC_KMEM | KMC_TOTAL),
++                .maxlen   = sizeof(unsigned long),
++                .extra1   = &table_min,
++                .extra2   = &table_max,
++                .mode     = 0444,
++                .proc_handler = &proc_doslab,
++        },
++        {
++                CTL_NAME    (CTL_KMEM_SLAB_KMEMALLOC)
++                .procname = "slab_kmem_alloc",
++		.data     = (void *)(KMC_KMEM | KMC_ALLOC),
++                .maxlen   = sizeof(unsigned long),
++                .extra1   = &table_min,
++                .extra2   = &table_max,
++                .mode     = 0444,
++                .proc_handler = &proc_doslab,
++        },
++        {
++                CTL_NAME    (CTL_KMEM_SLAB_KMEMMAX)
++                .procname = "slab_kmem_max",
++		.data     = (void *)(KMC_KMEM | KMC_MAX),
++                .maxlen   = sizeof(unsigned long),
++                .extra1   = &table_min,
++                .extra2   = &table_max,
++                .mode     = 0444,
++                .proc_handler = &proc_doslab,
++        },
++        {
++                CTL_NAME    (CTL_KMEM_SLAB_VMEMTOTAL)
++                .procname = "slab_vmem_total",
++		.data     = (void *)(KMC_VMEM | KMC_TOTAL),
++                .maxlen   = sizeof(unsigned long),
++                .extra1   = &table_min,
++                .extra2   = &table_max,
++                .mode     = 0444,
++                .proc_handler = &proc_doslab,
++        },
++        {
++                CTL_NAME    (CTL_KMEM_SLAB_VMEMALLOC)
++                .procname = "slab_vmem_alloc",
++		.data     = (void *)(KMC_VMEM | KMC_ALLOC),
++                .maxlen   = sizeof(unsigned long),
++                .extra1   = &table_min,
++                .extra2   = &table_max,
++                .mode     = 0444,
++                .proc_handler = &proc_doslab,
++        },
++        {
++                CTL_NAME    (CTL_KMEM_SLAB_VMEMMAX)
++                .procname = "slab_vmem_max",
++		.data     = (void *)(KMC_VMEM | KMC_MAX),
++                .maxlen   = sizeof(unsigned long),
++                .extra1   = &table_min,
++                .extra2   = &table_max,
++                .mode     = 0444,
++                .proc_handler = &proc_doslab,
++        },
++	{0},
++};
++#endif /* DEBUG_KMEM */
++
++static struct ctl_table spl_kstat_table[] = {
++	{0},
++};
++
++static struct ctl_table spl_table[] = {
++        /* NB No .strategy entries have been provided since
++         * sysctl(8) prefers to go via /proc for portability.
++         */
++        {
++                CTL_NAME    (CTL_VERSION)
++                .procname = "version",
++                .data     = spl_version,
++                .maxlen   = sizeof(spl_version),
++                .mode     = 0444,
++                .proc_handler = &proc_dostring,
++        },
++        {
++                CTL_NAME    (CTL_HOSTID)
++                .procname = "hostid",
++                .data     = &spl_hostid,
++                .maxlen   = sizeof(unsigned long),
++                .mode     = 0644,
++                .proc_handler = &proc_dohostid,
++        },
++        {
++                CTL_NAME    (CTL_HW_SERIAL)
++                .procname = "hw_serial",
++                .data     = hw_serial,
++                .maxlen   = sizeof(hw_serial),
++                .mode     = 0444,
++                .proc_handler = &proc_dostring,
++        },
++#ifndef HAVE_KALLSYMS_LOOKUP_NAME
++        {
++                CTL_NAME    (CTL_KALLSYMS)
++                .procname = "kallsyms_lookup_name",
++                .data     = &spl_kallsyms_lookup_name_fn,
++                .maxlen   = sizeof(unsigned long),
++                .mode     = 0644,
++                .proc_handler = &proc_dokallsyms_lookup_name,
++        },
++#endif
++#ifdef DEBUG_LOG
++	{
++		CTL_NAME    (CTL_SPL_DEBUG)
++		.procname = "debug",
++		.mode     = 0555,
++		.child    = spl_debug_table,
++	},
++#endif
++	{
++		CTL_NAME    (CTL_SPL_VM)
++		.procname = "vm",
++		.mode     = 0555,
++		.child    = spl_vm_table,
++	},
++#ifdef DEBUG_KMEM
++	{
++		CTL_NAME    (CTL_SPL_KMEM)
++		.procname = "kmem",
++		.mode     = 0555,
++		.child    = spl_kmem_table,
++	},
++#endif
++	{
++		CTL_NAME    (CTL_SPL_KSTAT)
++		.procname = "kstat",
++		.mode     = 0555,
++		.child    = spl_kstat_table,
++	},
++        { 0 },
++};
++
++static struct ctl_table spl_dir[] = {
++        {
++                CTL_NAME    (CTL_SPL)
++                .procname = "spl",
++                .mode     = 0555,
++                .child    = spl_table,
++        },
++        { 0 }
++};
++
++static struct ctl_table spl_root[] = {
++	{
++	CTL_NAME    (CTL_KERN)
++	.procname = "kernel",
++	.mode = 0555,
++	.child = spl_dir,
++	},
++	{ 0 }
++};
++
++static int
++proc_dir_entry_match(int len, const char *name, struct proc_dir_entry *de)
++{
++        if (de->namelen != len)
++                return 0;
++
++        return !memcmp(name, de->name, len);
++}
++
++struct proc_dir_entry *
++proc_dir_entry_find(struct proc_dir_entry *root, const char *str)
++{
++	struct proc_dir_entry *de;
++
++	for (de = root->subdir; de; de = de->next)
++		if (proc_dir_entry_match(strlen(str), str, de))
++			return de;
++
++	return NULL;
++}
++
++int
++proc_dir_entries(struct proc_dir_entry *root)
++{
++	struct proc_dir_entry *de;
++	int i = 0;
++
++	for (de = root->subdir; de; de = de->next)
++		i++;
++
++	return i;
++}
++
++int
++spl_proc_init(void)
++{
++	int rc = 0;
++        SENTRY;
++
++#ifdef CONFIG_SYSCTL
++        spl_header = spl_register_sysctl_table(spl_root, 0);
++	if (spl_header == NULL)
++		SRETURN(-EUNATCH);
++#endif /* CONFIG_SYSCTL */
++
++	proc_spl = proc_mkdir("spl", NULL);
++	if (proc_spl == NULL)
++		SGOTO(out, rc = -EUNATCH);
++
++#ifdef DEBUG_KMEM
++        proc_spl_kmem = proc_mkdir("kmem", proc_spl);
++        if (proc_spl_kmem == NULL)
++                SGOTO(out, rc = -EUNATCH);
++
++	proc_spl_kmem_slab = create_proc_entry("slab", 0444, proc_spl_kmem);
++        if (proc_spl_kmem_slab == NULL)
++		SGOTO(out, rc = -EUNATCH);
++
++        proc_spl_kmem_slab->proc_fops = &proc_slab_operations;
++#endif /* DEBUG_KMEM */
++
++        proc_spl_kstat = proc_mkdir("kstat", proc_spl);
++        if (proc_spl_kstat == NULL)
++                SGOTO(out, rc = -EUNATCH);
++out:
++	if (rc) {
++		remove_proc_entry("kstat", proc_spl);
++#ifdef DEBUG_KMEM
++	        remove_proc_entry("slab", proc_spl_kmem);
++		remove_proc_entry("kmem", proc_spl);
++#endif
++		remove_proc_entry("spl", NULL);
++#ifdef CONFIG_SYSCTL
++	        spl_unregister_sysctl_table(spl_header);
++#endif /* CONFIG_SYSCTL */
++	}
++
++        SRETURN(rc);
++}
++
++void
++spl_proc_fini(void)
++{
++        SENTRY;
++
++	remove_proc_entry("kstat", proc_spl);
++#ifdef DEBUG_KMEM
++        remove_proc_entry("slab", proc_spl_kmem);
++	remove_proc_entry("kmem", proc_spl);
++#endif
++	remove_proc_entry("spl", NULL);
++
++#ifdef CONFIG_SYSCTL
++        ASSERT(spl_header != NULL);
++        spl_unregister_sysctl_table(spl_header);
++#endif /* CONFIG_SYSCTL */
++
++        SEXIT;
++}
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-rwlock.c linux-3.2.33-go/spl/spl/spl-rwlock.c
+--- linux-3.2.33-go.orig/spl/spl/spl-rwlock.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-rwlock.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,96 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Reader/Writer Lock Implementation.
++\*****************************************************************************/
++
++#include <sys/rwlock.h>
++
++#ifdef DEBUG_SUBSYSTEM
++#undef DEBUG_SUBSYSTEM
++#endif
++
++#define DEBUG_SUBSYSTEM S_RWLOCK
++
++#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
++
++/*
++ * From lib/rwsem-spinlock.c but modified such that the caller is
++ * responsible for acquiring and dropping the sem->wait_lock.
++ */
++struct rwsem_waiter {
++        struct list_head list;
++        struct task_struct *task;
++        unsigned int flags;
++#define RWSEM_WAITING_FOR_READ  0x00000001
++#define RWSEM_WAITING_FOR_WRITE 0x00000002
++};
++
++/* wake a single writer */
++static struct rw_semaphore *
++__rwsem_wake_one_writer_locked(struct rw_semaphore *sem)
++{
++        struct rwsem_waiter *waiter;
++        struct task_struct *tsk;
++
++        sem->activity = -1;
++
++        waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
++        list_del(&waiter->list);
++
++        tsk = waiter->task;
++        smp_mb();
++        waiter->task = NULL;
++        wake_up_process(tsk);
++        put_task_struct(tsk);
++        return sem;
++}
++
++/* release a read lock on the semaphore */
++void
++__up_read_locked(struct rw_semaphore *sem)
++{
++        if (--sem->activity == 0 && !list_empty(&sem->wait_list))
++                (void)__rwsem_wake_one_writer_locked(sem);
++}
++EXPORT_SYMBOL(__up_read_locked);
++
++/* trylock for writing -- returns 1 if successful, 0 if contention */
++int
++__down_write_trylock_locked(struct rw_semaphore *sem)
++{
++        int ret = 0;
++
++        if (sem->activity == 0 && list_empty(&sem->wait_list)) {
++                sem->activity = -1;
++                ret = 1;
++        }
++
++        return ret;
++}
++EXPORT_SYMBOL(__down_write_trylock_locked);
++
++#endif
++
++int spl_rw_init(void) { return 0; }
++void spl_rw_fini(void) { }
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-taskq.c linux-3.2.33-go/spl/spl/spl-taskq.c
+--- linux-3.2.33-go.orig/spl/spl/spl-taskq.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-taskq.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,703 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Task Queue Implementation.
++\*****************************************************************************/
++
++#include <sys/taskq.h>
++#include <sys/kmem.h>
++#include <spl-debug.h>
++
++#ifdef SS_DEBUG_SUBSYS
++#undef SS_DEBUG_SUBSYS
++#endif
++
++#define SS_DEBUG_SUBSYS SS_TASKQ
++
++/* Global system-wide dynamic task queue available for all consumers */
++taskq_t *system_taskq;
++EXPORT_SYMBOL(system_taskq);
++
++static int
++task_km_flags(uint_t flags)
++{
++	if (flags & TQ_NOSLEEP)
++		return KM_NOSLEEP;
++
++	if (flags & TQ_PUSHPAGE)
++		return KM_PUSHPAGE;
++
++	return KM_SLEEP;
++}
++
++/*
++ * NOTE: Must be called with tq->tq_lock held, returns a list_t which
++ * is not attached to the free, work, or pending taskq lists.
++ */
++static taskq_ent_t *
++task_alloc(taskq_t *tq, uint_t flags)
++{
++        taskq_ent_t *t;
++        int count = 0;
++        SENTRY;
++
++        ASSERT(tq);
++        ASSERT(spin_is_locked(&tq->tq_lock));
++retry:
++        /* Acquire taskq_ent_t's from free list if available */
++        if (!list_empty(&tq->tq_free_list) && !(flags & TQ_NEW)) {
++                t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
++
++                ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
++
++                list_del_init(&t->tqent_list);
++                SRETURN(t);
++        }
++
++        /* Free list is empty and memory allocations are prohibited */
++        if (flags & TQ_NOALLOC)
++                SRETURN(NULL);
++
++        /* Hit maximum taskq_ent_t pool size */
++        if (tq->tq_nalloc >= tq->tq_maxalloc) {
++                if (flags & TQ_NOSLEEP)
++                        SRETURN(NULL);
++
++                /*
++                 * Sleep periodically polling the free list for an available
++                 * taskq_ent_t. Dispatching with TQ_SLEEP should always succeed
++                 * but we cannot block forever waiting for an taskq_entq_t to
++                 * show up in the free list, otherwise a deadlock can happen.
++                 *
++                 * Therefore, we need to allocate a new task even if the number
++                 * of allocated tasks is above tq->tq_maxalloc, but we still
++                 * end up delaying the task allocation by one second, thereby
++                 * throttling the task dispatch rate.
++                 */
++                 spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
++                 schedule_timeout(HZ / 100);
++                 spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
++                 if (count < 100)
++                        SGOTO(retry, count++);
++        }
++
++        spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
++        t = kmem_alloc(sizeof(taskq_ent_t), task_km_flags(flags));
++        spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
++
++        if (t) {
++                taskq_init_ent(t);
++                tq->tq_nalloc++;
++        }
++
++        SRETURN(t);
++}
++
++/*
++ * NOTE: Must be called with tq->tq_lock held, expects the taskq_ent_t
++ * to already be removed from the free, work, or pending taskq lists.
++ */
++static void
++task_free(taskq_t *tq, taskq_ent_t *t)
++{
++        SENTRY;
++
++        ASSERT(tq);
++        ASSERT(t);
++	ASSERT(spin_is_locked(&tq->tq_lock));
++	ASSERT(list_empty(&t->tqent_list));
++
++        kmem_free(t, sizeof(taskq_ent_t));
++        tq->tq_nalloc--;
++
++	SEXIT;
++}
++
++/*
++ * NOTE: Must be called with tq->tq_lock held, either destroys the
++ * taskq_ent_t if too many exist or moves it to the free list for later use.
++ */
++static void
++task_done(taskq_t *tq, taskq_ent_t *t)
++{
++	SENTRY;
++	ASSERT(tq);
++	ASSERT(t);
++	ASSERT(spin_is_locked(&tq->tq_lock));
++
++	list_del_init(&t->tqent_list);
++
++        if (tq->tq_nalloc <= tq->tq_minalloc) {
++		t->tqent_id = 0;
++		t->tqent_func = NULL;
++		t->tqent_arg = NULL;
++		t->tqent_flags = 0;
++
++                list_add_tail(&t->tqent_list, &tq->tq_free_list);
++	} else {
++		task_free(tq, t);
++	}
++
++        SEXIT;
++}
++
++/*
++ * As tasks are submitted to the task queue they are assigned a
++ * monotonically increasing taskqid and added to the tail of the pending
++ * list.  As worker threads become available the tasks are removed from
++ * the head of the pending or priority list, giving preference to the
++ * priority list.  The tasks are then removed from their respective
++ * list, and the taskq_thread servicing the task is added to the active
++ * list, preserving the order using the serviced task's taskqid.
++ * Finally, as tasks complete the taskq_thread servicing the task is
++ * removed from the active list.  This means that the pending task and
++ * active taskq_thread lists are always kept sorted by taskqid. Thus the
++ * lowest outstanding incomplete taskqid can be determined simply by
++ * checking the min taskqid for each head item on the pending, priority,
++ * and active taskq_thread list. This value is stored in
++ * tq->tq_lowest_id and only updated to the new lowest id when the
++ * previous lowest id completes.  All taskqids lower than
++ * tq->tq_lowest_id must have completed.  It is also possible larger
++ * taskqid's have completed because they may be processed in parallel by
++ * several worker threads.  However, this is not a problem because the
++ * behavior of taskq_wait_id() is to block until all previously
++ * submitted taskqid's have completed.
++ *
++ * XXX: Taskqid_t wrapping is not handled.  However, taskqid_t's are
++ * 64-bit values so even if a taskq is processing 2^24 (16,777,216)
++ * taskqid_ts per second it will still take 2^40 seconds, 34,865 years,
++ * before the wrap occurs.  I can live with that for now.
++ */
++static int
++taskq_wait_check(taskq_t *tq, taskqid_t id)
++{
++	int rc;
++
++	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
++	rc = (id < tq->tq_lowest_id);
++	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
++
++	SRETURN(rc);
++}
++
++void
++__taskq_wait_id(taskq_t *tq, taskqid_t id)
++{
++	SENTRY;
++	ASSERT(tq);
++
++	wait_event(tq->tq_wait_waitq, taskq_wait_check(tq, id));
++
++	SEXIT;
++}
++EXPORT_SYMBOL(__taskq_wait_id);
++
++void
++__taskq_wait(taskq_t *tq)
++{
++	taskqid_t id;
++	SENTRY;
++	ASSERT(tq);
++
++	/* Wait for the largest outstanding taskqid */
++	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
++	id = tq->tq_next_id - 1;
++	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
++
++	__taskq_wait_id(tq, id);
++
++	SEXIT;
++
++}
++EXPORT_SYMBOL(__taskq_wait);
++
++int
++__taskq_member(taskq_t *tq, void *t)
++{
++	struct list_head *l;
++	taskq_thread_t *tqt;
++        SENTRY;
++
++	ASSERT(tq);
++        ASSERT(t);
++
++	list_for_each(l, &tq->tq_thread_list) {
++		tqt = list_entry(l, taskq_thread_t, tqt_thread_list);
++		if (tqt->tqt_thread == (struct task_struct *)t)
++			SRETURN(1);
++	}
++
++        SRETURN(0);
++}
++EXPORT_SYMBOL(__taskq_member);
++
++taskqid_t
++__taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
++{
++        taskq_ent_t *t;
++	taskqid_t rc = 0;
++        SENTRY;
++
++        ASSERT(tq);
++        ASSERT(func);
++
++        spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
++
++	/* Taskq being destroyed and all tasks drained */
++	if (!(tq->tq_flags & TQ_ACTIVE))
++		SGOTO(out, rc = 0);
++
++	/* Do not queue the task unless there is idle thread for it */
++	ASSERT(tq->tq_nactive <= tq->tq_nthreads);
++	if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads))
++		SGOTO(out, rc = 0);
++
++        if ((t = task_alloc(tq, flags)) == NULL)
++		SGOTO(out, rc = 0);
++
++	spin_lock(&t->tqent_lock);
++
++	/* Queue to the priority list instead of the pending list */
++	if (flags & TQ_FRONT)
++		list_add_tail(&t->tqent_list, &tq->tq_prio_list);
++	else
++		list_add_tail(&t->tqent_list, &tq->tq_pend_list);
++
++	t->tqent_id = rc = tq->tq_next_id;
++	tq->tq_next_id++;
++        t->tqent_func = func;
++        t->tqent_arg = arg;
++
++	ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
++
++	spin_unlock(&t->tqent_lock);
++
++	wake_up(&tq->tq_work_waitq);
++out:
++	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
++	SRETURN(rc);
++}
++EXPORT_SYMBOL(__taskq_dispatch);
++
++void
++__taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
++   taskq_ent_t *t)
++{
++	SENTRY;
++
++	ASSERT(tq);
++	ASSERT(func);
++	ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
++
++	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
++
++	/* Taskq being destroyed and all tasks drained */
++	if (!(tq->tq_flags & TQ_ACTIVE)) {
++		t->tqent_id = 0;
++		goto out;
++	}
++
++	spin_lock(&t->tqent_lock);
++
++	/*
++	 * Mark it as a prealloc'd task.  This is important
++	 * to ensure that we don't free it later.
++	 */
++	t->tqent_flags |= TQENT_FLAG_PREALLOC;
++
++	/* Queue to the priority list instead of the pending list */
++	if (flags & TQ_FRONT)
++		list_add_tail(&t->tqent_list, &tq->tq_prio_list);
++	else
++		list_add_tail(&t->tqent_list, &tq->tq_pend_list);
++
++	t->tqent_id = tq->tq_next_id;
++	tq->tq_next_id++;
++	t->tqent_func = func;
++	t->tqent_arg = arg;
++
++	spin_unlock(&t->tqent_lock);
++
++	wake_up(&tq->tq_work_waitq);
++out:
++	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
++	SEXIT;
++}
++EXPORT_SYMBOL(__taskq_dispatch_ent);
++
++int
++__taskq_empty_ent(taskq_ent_t *t)
++{
++	return list_empty(&t->tqent_list);
++}
++EXPORT_SYMBOL(__taskq_empty_ent);
++
++void
++__taskq_init_ent(taskq_ent_t *t)
++{
++	spin_lock_init(&t->tqent_lock);
++	INIT_LIST_HEAD(&t->tqent_list);
++	t->tqent_id = 0;
++	t->tqent_func = NULL;
++	t->tqent_arg = NULL;
++	t->tqent_flags = 0;
++}
++EXPORT_SYMBOL(__taskq_init_ent);
++
++/*
++ * Returns the lowest incomplete taskqid_t.  The taskqid_t may
++ * be queued on the pending list, on the priority list,  or on
++ * the work list currently being handled, but it is not 100%
++ * complete yet.
++ */
++static taskqid_t
++taskq_lowest_id(taskq_t *tq)
++{
++	taskqid_t lowest_id = tq->tq_next_id;
++        taskq_ent_t *t;
++	taskq_thread_t *tqt;
++	SENTRY;
++
++	ASSERT(tq);
++	ASSERT(spin_is_locked(&tq->tq_lock));
++
++	if (!list_empty(&tq->tq_pend_list)) {
++		t = list_entry(tq->tq_pend_list.next, taskq_ent_t, tqent_list);
++		lowest_id = MIN(lowest_id, t->tqent_id);
++	}
++
++	if (!list_empty(&tq->tq_prio_list)) {
++		t = list_entry(tq->tq_prio_list.next, taskq_ent_t, tqent_list);
++		lowest_id = MIN(lowest_id, t->tqent_id);
++	}
++
++	if (!list_empty(&tq->tq_active_list)) {
++		tqt = list_entry(tq->tq_active_list.next, taskq_thread_t,
++		                 tqt_active_list);
++		ASSERT(tqt->tqt_id != 0);
++		lowest_id = MIN(lowest_id, tqt->tqt_id);
++	}
++
++	SRETURN(lowest_id);
++}
++
++/*
++ * Insert a task into a list keeping the list sorted by increasing
++ * taskqid.
++ */
++static void
++taskq_insert_in_order(taskq_t *tq, taskq_thread_t *tqt)
++{
++	taskq_thread_t *w;
++	struct list_head *l;
++
++	SENTRY;
++	ASSERT(tq);
++	ASSERT(tqt);
++	ASSERT(spin_is_locked(&tq->tq_lock));
++
++	list_for_each_prev(l, &tq->tq_active_list) {
++		w = list_entry(l, taskq_thread_t, tqt_active_list);
++		if (w->tqt_id < tqt->tqt_id) {
++			list_add(&tqt->tqt_active_list, l);
++			break;
++		}
++	}
++	if (l == &tq->tq_active_list)
++		list_add(&tqt->tqt_active_list, &tq->tq_active_list);
++
++	SEXIT;
++}
++
++static int
++taskq_thread(void *args)
++{
++        DECLARE_WAITQUEUE(wait, current);
++        sigset_t blocked;
++	taskq_thread_t *tqt = args;
++        taskq_t *tq;
++        taskq_ent_t *t;
++	struct list_head *pend_list;
++	SENTRY;
++
++        ASSERT(tqt);
++	tq = tqt->tqt_tq;
++        current->flags |= PF_NOFREEZE;
++
++        sigfillset(&blocked);
++        sigprocmask(SIG_BLOCK, &blocked, NULL);
++        flush_signals(current);
++
++        spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
++        tq->tq_nthreads++;
++        wake_up(&tq->tq_wait_waitq);
++        set_current_state(TASK_INTERRUPTIBLE);
++
++        while (!kthread_should_stop()) {
++
++		if (list_empty(&tq->tq_pend_list) &&
++		    list_empty(&tq->tq_prio_list)) {
++			add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
++			spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
++			schedule();
++			spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
++			remove_wait_queue(&tq->tq_work_waitq, &wait);
++		} else {
++			__set_current_state(TASK_RUNNING);
++		}
++
++
++		if (!list_empty(&tq->tq_prio_list))
++			pend_list = &tq->tq_prio_list;
++		else if (!list_empty(&tq->tq_pend_list))
++			pend_list = &tq->tq_pend_list;
++		else
++			pend_list = NULL;
++
++		if (pend_list) {
++                        t = list_entry(pend_list->next, taskq_ent_t, tqent_list);
++                        list_del_init(&t->tqent_list);
++
++			/* In order to support recursively dispatching a
++			 * preallocated taskq_ent_t, tqent_id must be
++			 * stored prior to executing tqent_func. */
++			tqt->tqt_id = t->tqent_id;
++
++			/* We must store a copy of the flags prior to
++			 * servicing the task (servicing a prealloc'd task
++			 * returns the ownership of the tqent back to
++			 * the caller of taskq_dispatch). Thus,
++			 * tqent_flags _may_ change within the call. */
++			tqt->tqt_flags = t->tqent_flags;
++
++			taskq_insert_in_order(tq, tqt);
++                        tq->tq_nactive++;
++			spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
++
++			/* Perform the requested task */
++                        t->tqent_func(t->tqent_arg);
++
++			spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
++                        tq->tq_nactive--;
++			list_del_init(&tqt->tqt_active_list);
++
++			/* For prealloc'd tasks, we don't free anything. */
++			if ((tq->tq_flags & TASKQ_DYNAMIC) ||
++			    !(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
++				task_done(tq, t);
++
++			/* When the current lowest outstanding taskqid is
++			 * done calculate the new lowest outstanding id */
++			if (tq->tq_lowest_id == tqt->tqt_id) {
++				tq->tq_lowest_id = taskq_lowest_id(tq);
++				ASSERT3S(tq->tq_lowest_id, >, tqt->tqt_id);
++			}
++
++			tqt->tqt_id = 0;
++			tqt->tqt_flags = 0;
++                        wake_up_all(&tq->tq_wait_waitq);
++		}
++
++		set_current_state(TASK_INTERRUPTIBLE);
++
++        }
++
++	__set_current_state(TASK_RUNNING);
++        tq->tq_nthreads--;
++	list_del_init(&tqt->tqt_thread_list);
++	kmem_free(tqt, sizeof(taskq_thread_t));
++
++        spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
++
++	SRETURN(0);
++}
++
++taskq_t *
++__taskq_create(const char *name, int nthreads, pri_t pri,
++               int minalloc, int maxalloc, uint_t flags)
++{
++        taskq_t *tq;
++	taskq_thread_t *tqt;
++        int rc = 0, i, j = 0;
++        SENTRY;
++
++        ASSERT(name != NULL);
++        ASSERT(pri <= maxclsyspri);
++        ASSERT(minalloc >= 0);
++        ASSERT(maxalloc <= INT_MAX);
++        ASSERT(!(flags & (TASKQ_CPR_SAFE | TASKQ_DYNAMIC))); /* Unsupported */
++
++	/* Scale the number of threads using nthreads as a percentage */
++	if (flags & TASKQ_THREADS_CPU_PCT) {
++		ASSERT(nthreads <= 100);
++		ASSERT(nthreads >= 0);
++		nthreads = MIN(nthreads, 100);
++		nthreads = MAX(nthreads, 0);
++		nthreads = MAX((num_online_cpus() * nthreads) / 100, 1);
++	}
++
++        tq = kmem_alloc(sizeof(*tq), KM_PUSHPAGE);
++        if (tq == NULL)
++                SRETURN(NULL);
++
++        spin_lock_init(&tq->tq_lock);
++        spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
++        INIT_LIST_HEAD(&tq->tq_thread_list);
++        INIT_LIST_HEAD(&tq->tq_active_list);
++        tq->tq_name      = name;
++        tq->tq_nactive   = 0;
++	tq->tq_nthreads  = 0;
++        tq->tq_pri       = pri;
++        tq->tq_minalloc  = minalloc;
++        tq->tq_maxalloc  = maxalloc;
++	tq->tq_nalloc    = 0;
++        tq->tq_flags     = (flags | TQ_ACTIVE);
++	tq->tq_next_id   = 1;
++	tq->tq_lowest_id = 1;
++        INIT_LIST_HEAD(&tq->tq_free_list);
++        INIT_LIST_HEAD(&tq->tq_pend_list);
++        INIT_LIST_HEAD(&tq->tq_prio_list);
++        init_waitqueue_head(&tq->tq_work_waitq);
++        init_waitqueue_head(&tq->tq_wait_waitq);
++
++        if (flags & TASKQ_PREPOPULATE)
++                for (i = 0; i < minalloc; i++)
++                        task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW));
++
++        spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
++
++	for (i = 0; i < nthreads; i++) {
++		tqt = kmem_alloc(sizeof(*tqt), KM_PUSHPAGE);
++		INIT_LIST_HEAD(&tqt->tqt_thread_list);
++		INIT_LIST_HEAD(&tqt->tqt_active_list);
++		tqt->tqt_tq = tq;
++		tqt->tqt_id = 0;
++
++		tqt->tqt_thread = kthread_create(taskq_thread, tqt,
++		                                 "%s/%d", name, i);
++		if (tqt->tqt_thread) {
++			list_add(&tqt->tqt_thread_list, &tq->tq_thread_list);
++			kthread_bind(tqt->tqt_thread, i % num_online_cpus());
++			set_user_nice(tqt->tqt_thread, PRIO_TO_NICE(pri));
++			wake_up_process(tqt->tqt_thread);
++			j++;
++		} else {
++			kmem_free(tqt, sizeof(taskq_thread_t));
++			rc = 1;
++		}
++	}
++
++        /* Wait for all threads to be started before potential destroy */
++	wait_event(tq->tq_wait_waitq, tq->tq_nthreads == j);
++
++        if (rc) {
++                __taskq_destroy(tq);
++                tq = NULL;
++        }
++
++        SRETURN(tq);
++}
++EXPORT_SYMBOL(__taskq_create);
++
++void
++__taskq_destroy(taskq_t *tq)
++{
++	struct task_struct *thread;
++	taskq_thread_t *tqt;
++	taskq_ent_t *t;
++	SENTRY;
++
++	ASSERT(tq);
++	spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
++        tq->tq_flags &= ~TQ_ACTIVE;
++	spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
++
++	/* TQ_ACTIVE cleared prevents new tasks being added to pending */
++        __taskq_wait(tq);
++
++        spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
++
++	/*
++	 * Signal each thread to exit and block until it does.  Each thread
++	 * is responsible for removing itself from the list and freeing its
++	 * taskq_thread_t.  This allows for idle threads to opt to remove
++	 * themselves from the taskq.  They can be recreated as needed.
++	 */
++	while (!list_empty(&tq->tq_thread_list)) {
++		tqt = list_entry(tq->tq_thread_list.next,
++				 taskq_thread_t, tqt_thread_list);
++		thread = tqt->tqt_thread;
++		spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
++
++		kthread_stop(thread);
++
++	        spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
++	}
++
++        while (!list_empty(&tq->tq_free_list)) {
++		t = list_entry(tq->tq_free_list.next, taskq_ent_t, tqent_list);
++
++		ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
++
++	        list_del_init(&t->tqent_list);
++                task_free(tq, t);
++        }
++
++        ASSERT(tq->tq_nthreads == 0);
++        ASSERT(tq->tq_nalloc == 0);
++        ASSERT(list_empty(&tq->tq_thread_list));
++        ASSERT(list_empty(&tq->tq_active_list));
++        ASSERT(list_empty(&tq->tq_free_list));
++        ASSERT(list_empty(&tq->tq_pend_list));
++        ASSERT(list_empty(&tq->tq_prio_list));
++
++        spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
++
++        kmem_free(tq, sizeof(taskq_t));
++
++	SEXIT;
++}
++EXPORT_SYMBOL(__taskq_destroy);
++
++int
++spl_taskq_init(void)
++{
++        SENTRY;
++
++	/* Solaris creates a dynamic taskq of up to 64 threads, however in
++	 * a Linux environment 1 thread per-core is usually about right */
++        system_taskq = taskq_create("spl_system_taskq", num_online_cpus(),
++				    minclsyspri, 4, 512, TASKQ_PREPOPULATE);
++	if (system_taskq == NULL)
++		SRETURN(1);
++
++        SRETURN(0);
++}
++
++void
++spl_taskq_fini(void)
++{
++        SENTRY;
++	taskq_destroy(system_taskq);
++        SEXIT;
++}
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-thread.c linux-3.2.33-go/spl/spl/spl-thread.c
+--- linux-3.2.33-go.orig/spl/spl/spl-thread.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-thread.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,139 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Thread Implementation.
++\*****************************************************************************/
++
++#include <sys/thread.h>
++#include <sys/kmem.h>
++#include <sys/tsd.h>
++#include <spl-debug.h>
++
++#ifdef SS_DEBUG_SUBSYS
++#undef SS_DEBUG_SUBSYS
++#endif
++
++#define SS_DEBUG_SUBSYS SS_THREAD
++
++/*
++ * Thread interfaces
++ */
++typedef struct thread_priv_s {
++	unsigned long tp_magic;		/* Magic */
++        int tp_name_size;		/* Name size */
++        char *tp_name;			/* Name (without _thread suffix) */
++	void (*tp_func)(void *);	/* Registered function */
++	void *tp_args;			/* Args to be passed to function */
++	size_t tp_len;			/* Len to be passed to function */
++	int tp_state;			/* State to start thread at */
++	pri_t tp_pri;			/* Priority to start threat at */
++} thread_priv_t;
++
++static int
++thread_generic_wrapper(void *arg)
++{
++	thread_priv_t *tp = (thread_priv_t *)arg;
++	void (*func)(void *);
++	void *args;
++
++	ASSERT(tp->tp_magic == TP_MAGIC);
++	func = tp->tp_func;
++	args = tp->tp_args;
++	set_current_state(tp->tp_state);
++	set_user_nice((kthread_t *)current, PRIO_TO_NICE(tp->tp_pri));
++	kmem_free(tp->tp_name, tp->tp_name_size);
++	kmem_free(tp, sizeof(thread_priv_t));
++
++	if (func)
++		func(args);
++
++	return 0;
++}
++
++void
++__thread_exit(void)
++{
++	SENTRY;
++	SEXIT;
++	tsd_exit();
++	complete_and_exit(NULL, 0);
++	/* Unreachable */
++}
++EXPORT_SYMBOL(__thread_exit);
++
++/* thread_create() may block forever if it cannot create a thread or
++ * allocate memory.  This is preferable to returning a NULL which Solaris
++ * style callers likely never check for... since it can't fail. */
++kthread_t *
++__thread_create(caddr_t stk, size_t  stksize, thread_func_t func,
++		const char *name, void *args, size_t len, proc_t *pp,
++		int state, pri_t pri)
++{
++	thread_priv_t *tp;
++	struct task_struct *tsk;
++	char *p;
++	SENTRY;
++
++	/* Option pp is simply ignored */
++	/* Variable stack size unsupported */
++	ASSERT(stk == NULL);
++
++	tp = kmem_alloc(sizeof(thread_priv_t), KM_PUSHPAGE);
++	if (tp == NULL)
++		SRETURN(NULL);
++
++	tp->tp_magic = TP_MAGIC;
++	tp->tp_name_size = strlen(name) + 1;
++
++	tp->tp_name = kmem_alloc(tp->tp_name_size, KM_PUSHPAGE);
++        if (tp->tp_name == NULL) {
++		kmem_free(tp, sizeof(thread_priv_t));
++		SRETURN(NULL);
++	}
++
++	strncpy(tp->tp_name, name, tp->tp_name_size);
++
++	/* Strip trailing "_thread" from passed name which will be the func
++	 * name since the exposed API has no parameter for passing a name.
++	 */
++	p = strstr(tp->tp_name, "_thread");
++	if (p)
++		p[0] = '\0';
++
++	tp->tp_func  = func;
++	tp->tp_args  = args;
++	tp->tp_len   = len;
++	tp->tp_state = state;
++	tp->tp_pri   = pri;
++
++	tsk = kthread_create(thread_generic_wrapper, (void *)tp,
++			     "%s", tp->tp_name);
++	if (IS_ERR(tsk)) {
++		SERROR("Failed to create thread: %ld\n", PTR_ERR(tsk));
++		SRETURN(NULL);
++	}
++
++	wake_up_process(tsk);
++	SRETURN((kthread_t *)tsk);
++}
++EXPORT_SYMBOL(__thread_create);
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-time.c linux-3.2.33-go/spl/spl/spl-time.c
+--- linux-3.2.33-go.orig/spl/spl/spl-time.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-time.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,93 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Time Implementation.
++\*****************************************************************************/
++
++#include <sys/sysmacros.h>
++#include <sys/time.h>
++
++#ifdef HAVE_MONOTONIC_CLOCK
++extern unsigned long long monotonic_clock(void);
++#endif
++
++#ifdef DEBUG_SUBSYSTEM
++#undef DEBUG_SUBSYSTEM
++#endif
++
++#define DEBUG_SUBSYSTEM S_TIME
++
++void
++__gethrestime(timestruc_t *ts)
++{
++        struct timeval tv;
++
++	do_gettimeofday(&tv);
++	ts->tv_sec = tv.tv_sec;
++	ts->tv_nsec = tv.tv_usec * NSEC_PER_USEC;
++}
++EXPORT_SYMBOL(__gethrestime);
++
++/* Use monotonic_clock() by default. It's faster and is available on older
++ * kernels, but few architectures have them, so we must fallback to
++ * do_posix_clock_monotonic_gettime().
++ */
++hrtime_t
++__gethrtime(void) {
++#ifdef HAVE_MONOTONIC_CLOCK
++        unsigned long long res = monotonic_clock();
++
++        /* Deal with signed/unsigned mismatch */
++        return (hrtime_t)(res & ~(1ULL << 63));
++#else
++        struct timespec ts;
++
++        do_posix_clock_monotonic_gettime(&ts);
++        return (((hrtime_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec);
++#endif
++}
++EXPORT_SYMBOL(__gethrtime);
++
++/* set_normalized_timespec() API changes
++ * 2.6.0  - 2.6.15: Inline function provided by linux/time.h
++ * 2.6.16 - 2.6.25: Function prototype defined but not exported
++ * 2.6.26 - 2.6.x:  Function defined and exported
++ */
++#if !defined(HAVE_SET_NORMALIZED_TIMESPEC_INLINE) && \
++    !defined(HAVE_SET_NORMALIZED_TIMESPEC_EXPORT)
++void
++set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
++{
++	while (nsec >= NSEC_PER_SEC) {
++	        nsec -= NSEC_PER_SEC;
++	        ++sec;
++	}
++	while (nsec < 0) {
++	        nsec += NSEC_PER_SEC;
++	        --sec;
++	}
++	ts->tv_sec = sec;
++	ts->tv_nsec = nsec;
++}
++EXPORT_SYMBOL(set_normalized_timespec);
++#endif
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-tsd.c linux-3.2.33-go/spl/spl/spl-tsd.c
+--- linux-3.2.33-go.orig/spl/spl/spl-tsd.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-tsd.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,641 @@
++/*****************************************************************************\
++ *  Copyright (C) 2010 Lawrence Livermore National Security, LLC.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Thread Specific Data Implementation.
++ *
++ *  Thread specific data has implemented using a hash table, this avoids
++ *  the need to add a member to the task structure and allows maximum
++ *  portability between kernels.  This implementation has been optimized
++ *  to keep the tsd_set() and tsd_get() times as small as possible.
++ *
++ *  The majority of the entries in the hash table are for specific tsd
++ *  entries.  These entries are hashed by the product of their key and
++ *  pid because by design the key and pid are guaranteed to be unique.
++ *  Their product also has the desirable properly that it will be uniformly
++ *  distributed over the hash bins providing neither the pid nor key is zero.
++ *  Under linux the zero pid is always the init process and thus won't be
++ *  used, and this implementation is careful to never to assign a zero key.
++ *  By default the hash table is sized to 512 bins which is expected to
++ *  be sufficient for light to moderate usage of thread specific data.
++ *
++ *  The hash table contains two additional type of entries.  They first
++ *  type is entry is called a 'key' entry and it is added to the hash during
++ *  tsd_create().  It is used to store the address of the destructor function
++ *  and it is used as an anchor point.  All tsd entries which use the same
++ *  key will be linked to this entry.  This is used during tsd_destory() to
++ *  quickly call the destructor function for all tsd associated with the key.
++ *  The 'key' entry may be looked up with tsd_hash_search() by passing the
++ *  key you wish to lookup and DTOR_PID constant as the pid.
++ *
++ *  The second type of entry is called a 'pid' entry and it is added to the
++ *  hash the first time a process set a key.  The 'pid' entry is also used
++ *  as an anchor and all tsd for the process will be linked to it.  This
++ *  list is using during tsd_exit() to ensure all registered destructors
++ *  are run for the process.  The 'pid' entry may be looked up with
++ *  tsd_hash_search() by passing the PID_KEY constant as the key, and
++ *  the process pid.  Note that tsd_exit() is called by thread_exit()
++ *  so if your using the Solaris thread API you should not need to call
++ *  tsd_exit() directly.
++ *
++\*****************************************************************************/
++
++#include <sys/kmem.h>
++#include <sys/thread.h>
++#include <sys/tsd.h>
++#include <spl-debug.h>
++
++#ifdef DEBUG_SUBSYSTEM
++#undef DEBUG_SUBSYSTEM
++#endif
++
++#define DEBUG_SUBSYSTEM SS_TSD
++#define DEBUG_SUBSYSTEM SS_TSD
++
++typedef struct tsd_hash_bin {
++	spinlock_t		hb_lock;
++	struct hlist_head	hb_head;
++} tsd_hash_bin_t;
++
++typedef struct tsd_hash_table {
++	spinlock_t		ht_lock;
++	uint_t			ht_bits;
++	uint_t			ht_key;
++	tsd_hash_bin_t		*ht_bins;
++} tsd_hash_table_t;
++
++typedef struct tsd_hash_entry {
++	uint_t			he_key;
++	pid_t			he_pid;
++	dtor_func_t		he_dtor;
++	void			*he_value;
++	struct hlist_node	he_list;
++	struct list_head	he_key_list;
++	struct list_head	he_pid_list;
++} tsd_hash_entry_t;
++
++static tsd_hash_table_t *tsd_hash_table = NULL;
++
++
++/*
++ * tsd_hash_search - searches hash table for tsd_hash_entry
++ * @table: hash table
++ * @key: search key
++ * @pid: search pid
++ */
++static tsd_hash_entry_t *
++tsd_hash_search(tsd_hash_table_t *table, uint_t key, pid_t pid)
++{
++	struct hlist_node *node;
++	tsd_hash_entry_t *entry;
++	tsd_hash_bin_t *bin;
++	ulong_t hash;
++	SENTRY;
++
++	hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
++	bin = &table->ht_bins[hash];
++	spin_lock(&bin->hb_lock);
++	hlist_for_each_entry(entry, node, &bin->hb_head, he_list) {
++		if ((entry->he_key == key) && (entry->he_pid == pid)) {
++			spin_unlock(&bin->hb_lock);
++			SRETURN(entry);
++		}
++	}
++
++	spin_unlock(&bin->hb_lock);
++	SRETURN(NULL);
++}
++
++/*
++ * tsd_hash_dtor - call the destructor and free all entries on the list
++ * @work: list of hash entries
++ *
++ * For a list of entries which have all already been removed from the
++ * hash call their registered destructor then free the associated memory.
++ */
++static void
++tsd_hash_dtor(struct hlist_head *work)
++{
++	tsd_hash_entry_t *entry;
++	SENTRY;
++
++	while (!hlist_empty(work)) {
++		entry = hlist_entry(work->first, tsd_hash_entry_t, he_list);
++		hlist_del(&entry->he_list);
++
++		if (entry->he_dtor && entry->he_pid != DTOR_PID)
++			entry->he_dtor(entry->he_value);
++
++		kmem_free(entry, sizeof(tsd_hash_entry_t));
++	}
++
++	SEXIT;
++}
++
++/*
++ * tsd_hash_add - adds an entry to hash table
++ * @table: hash table
++ * @key: search key
++ * @pid: search pid
++ *
++ * The caller is responsible for ensuring the unique key/pid do not
++ * already exist in the hash table.  This possible because all entries
++ * are thread specific thus a concurrent thread will never attempt to
++ * add this key/pid.  Because multiple bins must be checked to add
++ * links to the dtor and pid entries the entire table is locked.
++ */
++static int
++tsd_hash_add(tsd_hash_table_t *table, uint_t key, pid_t pid, void *value)
++{
++	tsd_hash_entry_t *entry, *dtor_entry, *pid_entry;
++	tsd_hash_bin_t *bin;
++	ulong_t hash;
++	int rc = 0;
++	SENTRY;
++
++	ASSERT3P(tsd_hash_search(table, key, pid), ==, NULL);
++
++	/* New entry allocate structure, set value, and add to hash */
++	entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
++	if (entry == NULL)
++		SRETURN(ENOMEM);
++
++	entry->he_key = key;
++	entry->he_pid = pid;
++	entry->he_value = value;
++	INIT_HLIST_NODE(&entry->he_list);
++	INIT_LIST_HEAD(&entry->he_key_list);
++	INIT_LIST_HEAD(&entry->he_pid_list);
++
++	spin_lock(&table->ht_lock);
++
++	/* Destructor entry must exist for all valid keys */
++	dtor_entry = tsd_hash_search(table, entry->he_key, DTOR_PID);
++	ASSERT3P(dtor_entry, !=, NULL);
++	entry->he_dtor = dtor_entry->he_dtor;
++
++	/* Process entry must exist for all valid processes */
++	pid_entry = tsd_hash_search(table, PID_KEY, entry->he_pid);
++	ASSERT3P(pid_entry, !=, NULL);
++
++	hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
++	bin = &table->ht_bins[hash];
++	spin_lock(&bin->hb_lock);
++
++	/* Add to the hash, key, and pid lists */
++	hlist_add_head(&entry->he_list, &bin->hb_head);
++	list_add(&entry->he_key_list, &dtor_entry->he_key_list);
++	list_add(&entry->he_pid_list, &pid_entry->he_pid_list);
++
++	spin_unlock(&bin->hb_lock);
++	spin_unlock(&table->ht_lock);
++
++	SRETURN(rc);
++}
++
++/*
++ * tsd_hash_add_key - adds a destructor entry to the hash table
++ * @table: hash table
++ * @keyp: search key
++ * @dtor: key destructor
++ *
++ * For every unique key there is a single entry in the hash which is used
++ * as anchor.  All other thread specific entries for this key are linked
++ * to this anchor via the 'he_key_list' list head.  On return they keyp
++ * will be set to the next available key for the hash table.
++ */
++static int
++tsd_hash_add_key(tsd_hash_table_t *table, uint_t *keyp, dtor_func_t dtor)
++{
++	tsd_hash_entry_t *tmp_entry, *entry;
++	tsd_hash_bin_t *bin;
++	ulong_t hash;
++	int keys_checked = 0;
++	SENTRY;
++
++	ASSERT3P(table, !=, NULL);
++
++	/* Allocate entry to be used as a destructor for this key */
++	entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
++	if (entry == NULL)
++		SRETURN(ENOMEM);
++
++	/* Determine next available key value */
++	spin_lock(&table->ht_lock);
++	do {
++		/* Limited to TSD_KEYS_MAX concurrent unique keys */
++		if (table->ht_key++ > TSD_KEYS_MAX)
++			table->ht_key = 1;
++
++		/* Ensure failure when all TSD_KEYS_MAX keys are in use */
++		if (keys_checked++ >= TSD_KEYS_MAX) {
++			spin_unlock(&table->ht_lock);
++			SRETURN(ENOENT);
++		}
++
++		tmp_entry = tsd_hash_search(table, table->ht_key, DTOR_PID);
++	} while (tmp_entry);
++
++	/* Add destructor entry in to hash table */
++	entry->he_key = *keyp = table->ht_key;
++	entry->he_pid = DTOR_PID;
++	entry->he_dtor = dtor;
++	entry->he_value = NULL;
++	INIT_HLIST_NODE(&entry->he_list);
++	INIT_LIST_HEAD(&entry->he_key_list);
++	INIT_LIST_HEAD(&entry->he_pid_list);
++
++	hash = hash_long((ulong_t)*keyp * (ulong_t)DTOR_PID, table->ht_bits);
++	bin = &table->ht_bins[hash];
++	spin_lock(&bin->hb_lock);
++
++	hlist_add_head(&entry->he_list, &bin->hb_head);
++
++	spin_unlock(&bin->hb_lock);
++	spin_unlock(&table->ht_lock);
++
++	SRETURN(0);
++}
++
++/*
++ * tsd_hash_add_pid - adds a process entry to the hash table
++ * @table: hash table
++ * @pid: search pid
++ *
++ * For every process these is a single entry in the hash which is used
++ * as anchor.  All other thread specific entries for this process are
++ * linked to this anchor via the 'he_pid_list' list head.
++ */
++static int
++tsd_hash_add_pid(tsd_hash_table_t *table, pid_t pid)
++{
++	tsd_hash_entry_t *entry;
++	tsd_hash_bin_t *bin;
++	ulong_t hash;
++	SENTRY;
++
++	/* Allocate entry to be used as the process reference */
++	entry = kmem_alloc(sizeof(tsd_hash_entry_t), KM_PUSHPAGE);
++	if (entry == NULL)
++		SRETURN(ENOMEM);
++
++	spin_lock(&table->ht_lock);
++	entry->he_key = PID_KEY;
++	entry->he_pid = pid;
++	entry->he_dtor = NULL;
++	entry->he_value = NULL;
++	INIT_HLIST_NODE(&entry->he_list);
++	INIT_LIST_HEAD(&entry->he_key_list);
++	INIT_LIST_HEAD(&entry->he_pid_list);
++
++	hash = hash_long((ulong_t)PID_KEY * (ulong_t)pid, table->ht_bits);
++	bin = &table->ht_bins[hash];
++	spin_lock(&bin->hb_lock);
++
++	hlist_add_head(&entry->he_list, &bin->hb_head);
++
++	spin_unlock(&bin->hb_lock);
++	spin_unlock(&table->ht_lock);
++
++	SRETURN(0);
++}
++
++/*
++ * tsd_hash_del - delete an entry from hash table, key, and pid lists
++ * @table: hash table
++ * @key: search key
++ * @pid: search pid
++ */
++static void
++tsd_hash_del(tsd_hash_table_t *table, tsd_hash_entry_t *entry)
++{
++	SENTRY;
++
++	ASSERT(spin_is_locked(&table->ht_lock));
++	hlist_del(&entry->he_list);
++	list_del_init(&entry->he_key_list);
++	list_del_init(&entry->he_pid_list);
++
++	SEXIT;
++}
++
++/*
++ * tsd_hash_table_init - allocate a hash table
++ * @bits: hash table size
++ *
++ * A hash table with 2^bits bins will be created, it may not be resized
++ * after the fact and must be free'd with tsd_hash_table_fini().
++ */
++static tsd_hash_table_t *
++tsd_hash_table_init(uint_t bits)
++{
++	tsd_hash_table_t *table;
++	int hash, size = (1 << bits);
++	SENTRY;
++
++	table = kmem_zalloc(sizeof(tsd_hash_table_t), KM_SLEEP);
++	if (table == NULL)
++		SRETURN(NULL);
++
++	table->ht_bins = kmem_zalloc(sizeof(tsd_hash_bin_t) * size,
++	    KM_SLEEP | KM_NODEBUG);
++	if (table->ht_bins == NULL) {
++		kmem_free(table, sizeof(tsd_hash_table_t));
++		SRETURN(NULL);
++	}
++
++	for (hash = 0; hash < size; hash++) {
++		spin_lock_init(&table->ht_bins[hash].hb_lock);
++		INIT_HLIST_HEAD(&table->ht_bins[hash].hb_head);
++	}
++
++	spin_lock_init(&table->ht_lock);
++	table->ht_bits = bits;
++	table->ht_key = 1;
++
++	SRETURN(table);
++}
++
++/*
++ * tsd_hash_table_fini - free a hash table
++ * @table: hash table
++ *
++ * Free a hash table allocated by tsd_hash_table_init().  If the hash
++ * table is not empty this function will call the proper destructor for
++ * all remaining entries before freeing the memory used by those entries.
++ */
++static void
++tsd_hash_table_fini(tsd_hash_table_t *table)
++{
++	HLIST_HEAD(work);
++	tsd_hash_bin_t *bin;
++	tsd_hash_entry_t *entry;
++	int size, i;
++	SENTRY;
++
++	ASSERT3P(table, !=, NULL);
++	spin_lock(&table->ht_lock);
++	for (i = 0, size = (1 << table->ht_bits); i < size; i++) {
++		bin = &table->ht_bins[i];
++		spin_lock(&bin->hb_lock);
++	        while (!hlist_empty(&bin->hb_head)) {
++			entry = hlist_entry(bin->hb_head.first,
++					    tsd_hash_entry_t, he_list);
++			tsd_hash_del(table, entry);
++			hlist_add_head(&entry->he_list, &work);
++		}
++		spin_unlock(&bin->hb_lock);
++	}
++	spin_unlock(&table->ht_lock);
++
++	tsd_hash_dtor(&work);
++	kmem_free(table->ht_bins, sizeof(tsd_hash_bin_t)*(1<<table->ht_bits));
++	kmem_free(table, sizeof(tsd_hash_table_t));
++
++	SEXIT;
++}
++
++/*
++ * tsd_set - set thread specific data
++ * @key: lookup key
++ * @value: value to set
++ *
++ * Caller must prevent racing tsd_create() or tsd_destroy(), protected
++ * from racing tsd_get() or tsd_set() because it is thread specific.
++ * This function has been optimized to be fast for the update case.
++ * When setting the tsd initially it will be slower due to additional
++ * required locking and potential memory allocations.
++ */
++int
++tsd_set(uint_t key, void *value)
++{
++	tsd_hash_table_t *table;
++	tsd_hash_entry_t *entry;
++	pid_t pid;
++	int rc;
++	SENTRY;
++
++	table = tsd_hash_table;
++	pid = curthread->pid;
++	ASSERT3P(table, !=, NULL);
++
++	if ((key == 0) || (key > TSD_KEYS_MAX))
++		SRETURN(EINVAL);
++
++	/* Entry already exists in hash table update value */
++	entry = tsd_hash_search(table, key, pid);
++	if (entry) {
++		entry->he_value = value;
++		SRETURN(0);
++	}
++
++	/* Add a process entry to the hash if not yet exists */
++	entry = tsd_hash_search(table, PID_KEY, pid);
++	if (entry == NULL) {
++		rc = tsd_hash_add_pid(table, pid);
++		if (rc)
++			SRETURN(rc);
++	}
++
++	rc = tsd_hash_add(table, key, pid, value);
++	SRETURN(rc);
++}
++EXPORT_SYMBOL(tsd_set);
++
++/*
++ * tsd_get - get thread specific data
++ * @key: lookup key
++ *
++ * Caller must prevent racing tsd_create() or tsd_destroy().  This
++ * implementation is designed to be fast and scalable, it does not
++ * lock the entire table only a single hash bin.
++ */
++void *
++tsd_get(uint_t key)
++{
++	tsd_hash_entry_t *entry;
++	SENTRY;
++
++	ASSERT3P(tsd_hash_table, !=, NULL);
++
++	if ((key == 0) || (key > TSD_KEYS_MAX))
++		SRETURN(NULL);
++
++	entry = tsd_hash_search(tsd_hash_table, key, curthread->pid);
++	if (entry == NULL)
++		SRETURN(NULL);
++
++	SRETURN(entry->he_value);
++}
++EXPORT_SYMBOL(tsd_get);
++
++/*
++ * tsd_create - create thread specific data key
++ * @keyp: lookup key address
++ * @dtor: destructor called during tsd_destroy() or tsd_exit()
++ *
++ * Provided key must be set to 0 or it assumed to be already in use.
++ * The dtor is allowed to be NULL in which case no additional cleanup
++ * for the data is performed during tsd_destroy() or tsd_exit().
++ *
++ * Caller must prevent racing tsd_set() or tsd_get(), this function is
++ * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
++ */
++void
++tsd_create(uint_t *keyp, dtor_func_t dtor)
++{
++	SENTRY;
++
++	ASSERT3P(keyp, !=, NULL);
++	if (*keyp) {
++		SEXIT;
++		return;
++	}
++
++	(void)tsd_hash_add_key(tsd_hash_table, keyp, dtor);
++
++	SEXIT;
++}
++EXPORT_SYMBOL(tsd_create);
++
++/*
++ * tsd_destroy - destroy thread specific data
++ * @keyp: lookup key address
++ *
++ * Destroys the thread specific data on all threads which use this key.
++ *
++ * Caller must prevent racing tsd_set() or tsd_get(), this function is
++ * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
++ */
++void
++tsd_destroy(uint_t *keyp)
++{
++	HLIST_HEAD(work);
++	tsd_hash_table_t *table;
++	tsd_hash_entry_t *dtor_entry, *entry;
++	SENTRY;
++
++	table = tsd_hash_table;
++	ASSERT3P(table, !=, NULL);
++
++	spin_lock(&table->ht_lock);
++	dtor_entry = tsd_hash_search(table, *keyp, DTOR_PID);
++	if (dtor_entry == NULL) {
++		spin_unlock(&table->ht_lock);
++		SEXIT;
++		return;
++	}
++
++	/*
++	 * All threads which use this key must be linked off of the
++	 * DTOR_PID entry.  They are removed from the hash table and
++	 * linked in to a private working list to be destroyed.
++	 */
++        while (!list_empty(&dtor_entry->he_key_list)) {
++		entry = list_entry(dtor_entry->he_key_list.next,
++				   tsd_hash_entry_t, he_key_list);
++		ASSERT3U(dtor_entry->he_key, ==, entry->he_key);
++		ASSERT3P(dtor_entry->he_dtor, ==, entry->he_dtor);
++		tsd_hash_del(table, entry);
++		hlist_add_head(&entry->he_list, &work);
++	}
++
++	tsd_hash_del(table, dtor_entry);
++	hlist_add_head(&dtor_entry->he_list, &work);
++	spin_unlock(&table->ht_lock);
++
++	tsd_hash_dtor(&work);
++	*keyp = 0;
++
++	SEXIT;
++}
++EXPORT_SYMBOL(tsd_destroy);
++
++/*
++ * tsd_exit - destroys all thread specific data for this thread
++ *
++ * Destroys all the thread specific data for this thread.
++ *
++ * Caller must prevent racing tsd_set() or tsd_get(), this function is
++ * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
++ */
++void
++tsd_exit(void)
++{
++	HLIST_HEAD(work);
++	tsd_hash_table_t *table;
++	tsd_hash_entry_t *pid_entry, *entry;
++	SENTRY;
++
++	table = tsd_hash_table;
++	ASSERT3P(table, !=, NULL);
++
++	spin_lock(&table->ht_lock);
++	pid_entry = tsd_hash_search(table, PID_KEY, curthread->pid);
++	if (pid_entry == NULL) {
++		spin_unlock(&table->ht_lock);
++		SEXIT;
++		return;
++	}
++
++	/*
++	 * All keys associated with this pid must be linked off of the
++	 * PID_KEY entry.  They are removed from the hash table and
++	 * linked in to a private working to be destroyed.
++	 */
++        while (!list_empty(&pid_entry->he_pid_list)) {
++		entry = list_entry(pid_entry->he_pid_list.next,
++				   tsd_hash_entry_t, he_pid_list);
++		ASSERT3U(pid_entry->he_pid, ==, entry->he_pid);
++		tsd_hash_del(table, entry);
++		hlist_add_head(&entry->he_list, &work);
++	}
++
++	tsd_hash_del(table, pid_entry);
++	hlist_add_head(&pid_entry->he_list, &work);
++	spin_unlock(&table->ht_lock);
++
++	tsd_hash_dtor(&work);
++
++	SEXIT;
++}
++EXPORT_SYMBOL(tsd_exit);
++
++int
++spl_tsd_init(void)
++{
++	SENTRY;
++
++	tsd_hash_table = tsd_hash_table_init(TSD_HASH_TABLE_BITS_DEFAULT);
++	if (tsd_hash_table == NULL)
++		SRETURN(1);
++
++	SRETURN(0);
++}
++
++void
++spl_tsd_fini(void)
++{
++	SENTRY;
++	tsd_hash_table_fini(tsd_hash_table);
++	tsd_hash_table = NULL;
++	SEXIT;
++}
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-vnode.c linux-3.2.33-go/spl/spl/spl-vnode.c
+--- linux-3.2.33-go.orig/spl/spl/spl-vnode.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-vnode.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,1047 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) Vnode Implementation.
++\*****************************************************************************/
++
++#include <sys/vnode.h>
++#include <linux/falloc.h>
++#include <spl-debug.h>
++
++#ifdef SS_DEBUG_SUBSYS
++#undef SS_DEBUG_SUBSYS
++#endif
++
++#define SS_DEBUG_SUBSYS SS_VNODE
++
++vnode_t *rootdir = (vnode_t *)0xabcd1234;
++EXPORT_SYMBOL(rootdir);
++
++static spl_kmem_cache_t *vn_cache;
++static spl_kmem_cache_t *vn_file_cache;
++
++static DEFINE_SPINLOCK(vn_file_lock);
++static LIST_HEAD(vn_file_list);
++
++#ifdef HAVE_KERN_PATH_PARENT_HEADER
++#ifndef HAVE_KERN_PATH_PARENT_SYMBOL
++kern_path_parent_t kern_path_parent_fn = SYMBOL_POISON;
++EXPORT_SYMBOL(kern_path_parent_fn);
++#endif /* HAVE_KERN_PATH_PARENT_SYMBOL */
++#endif /* HAVE_KERN_PATH_PARENT_HEADER */
++
++#ifdef HAVE_KERN_PATH_LOCKED
++kern_path_locked_t kern_path_locked_fn = SYMBOL_POISON;
++#endif /* HAVE_KERN_PATH_LOCKED */
++
++vtype_t
++vn_mode_to_vtype(mode_t mode)
++{
++	if (S_ISREG(mode))
++		return VREG;
++
++	if (S_ISDIR(mode))
++		return VDIR;
++
++	if (S_ISCHR(mode))
++		return VCHR;
++
++	if (S_ISBLK(mode))
++		return VBLK;
++
++	if (S_ISFIFO(mode))
++		return VFIFO;
++
++	if (S_ISLNK(mode))
++		return VLNK;
++
++	if (S_ISSOCK(mode))
++		return VSOCK;
++
++	if (S_ISCHR(mode))
++		return VCHR;
++
++	return VNON;
++} /* vn_mode_to_vtype() */
++EXPORT_SYMBOL(vn_mode_to_vtype);
++
++mode_t
++vn_vtype_to_mode(vtype_t vtype)
++{
++	if (vtype == VREG)
++		return S_IFREG;
++
++	if (vtype == VDIR)
++		return S_IFDIR;
++
++	if (vtype == VCHR)
++		return S_IFCHR;
++
++	if (vtype == VBLK)
++		return S_IFBLK;
++
++	if (vtype == VFIFO)
++		return S_IFIFO;
++
++	if (vtype == VLNK)
++		return S_IFLNK;
++
++	if (vtype == VSOCK)
++		return S_IFSOCK;
++
++	return VNON;
++} /* vn_vtype_to_mode() */
++EXPORT_SYMBOL(vn_vtype_to_mode);
++
++vnode_t *
++vn_alloc(int flag)
++{
++	vnode_t *vp;
++	SENTRY;
++
++	vp = kmem_cache_alloc(vn_cache, flag);
++	if (vp != NULL) {
++		vp->v_file = NULL;
++		vp->v_type = 0;
++	}
++
++	SRETURN(vp);
++} /* vn_alloc() */
++EXPORT_SYMBOL(vn_alloc);
++
++void
++vn_free(vnode_t *vp)
++{
++	SENTRY;
++	kmem_cache_free(vn_cache, vp);
++	SEXIT;
++} /* vn_free() */
++EXPORT_SYMBOL(vn_free);
++
++int
++vn_open(const char *path, uio_seg_t seg, int flags, int mode,
++	vnode_t **vpp, int x1, void *x2)
++{
++	struct file *fp;
++	struct kstat stat;
++	int rc, saved_umask = 0;
++	gfp_t saved_gfp;
++	vnode_t *vp;
++	SENTRY;
++
++	ASSERT(flags & (FWRITE | FREAD));
++	ASSERT(seg == UIO_SYSSPACE);
++	ASSERT(vpp);
++	*vpp = NULL;
++
++	if (!(flags & FCREAT) && (flags & FWRITE))
++		flags |= FEXCL;
++
++	/* Note for filp_open() the two low bits must be remapped to mean:
++	 * 01 - read-only  -> 00 read-only
++	 * 10 - write-only -> 01 write-only
++	 * 11 - read-write -> 10 read-write
++	 */
++	flags--;
++
++	if (flags & FCREAT)
++		saved_umask = xchg(&current->fs->umask, 0);
++
++	fp = filp_open(path, flags, mode);
++
++	if (flags & FCREAT)
++		(void)xchg(&current->fs->umask, saved_umask);
++
++	if (IS_ERR(fp))
++		SRETURN(-PTR_ERR(fp));
++
++	rc = vfs_getattr(fp->f_vfsmnt, fp->f_dentry, &stat);
++	if (rc) {
++		filp_close(fp, 0);
++		SRETURN(-rc);
++	}
++
++	vp = vn_alloc(KM_SLEEP);
++	if (!vp) {
++		filp_close(fp, 0);
++		SRETURN(ENOMEM);
++	}
++
++	saved_gfp = mapping_gfp_mask(fp->f_mapping);
++	mapping_set_gfp_mask(fp->f_mapping, saved_gfp & ~(__GFP_IO|__GFP_FS));
++
++	mutex_enter(&vp->v_lock);
++	vp->v_type = vn_mode_to_vtype(stat.mode);
++	vp->v_file = fp;
++	vp->v_gfp_mask = saved_gfp;
++	*vpp = vp;
++	mutex_exit(&vp->v_lock);
++
++	SRETURN(0);
++} /* vn_open() */
++EXPORT_SYMBOL(vn_open);
++
++int
++vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
++	  vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd)
++{
++	char *realpath;
++	int len, rc;
++	SENTRY;
++
++	ASSERT(vp == rootdir);
++
++	len = strlen(path) + 2;
++	realpath = kmalloc(len, GFP_KERNEL);
++	if (!realpath)
++		SRETURN(ENOMEM);
++
++	(void)snprintf(realpath, len, "/%s", path);
++	rc = vn_open(realpath, seg, flags, mode, vpp, x1, x2);
++	kfree(realpath);
++
++	SRETURN(rc);
++} /* vn_openat() */
++EXPORT_SYMBOL(vn_openat);
++
++int
++vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off,
++	uio_seg_t seg, int ioflag, rlim64_t x2, void *x3, ssize_t *residp)
++{
++	loff_t offset;
++	mm_segment_t saved_fs;
++	struct file *fp;
++	int rc;
++	SENTRY;
++
++	ASSERT(uio == UIO_WRITE || uio == UIO_READ);
++	ASSERT(vp);
++	ASSERT(vp->v_file);
++	ASSERT(seg == UIO_SYSSPACE);
++	ASSERT((ioflag & ~FAPPEND) == 0);
++	ASSERT(x2 == RLIM64_INFINITY);
++
++	fp = vp->v_file;
++
++	offset = off;
++	if (ioflag & FAPPEND)
++		offset = fp->f_pos;
++
++	/* Writable user data segment must be briefly increased for this
++	 * process so we can use the user space read call paths to write
++	 * in to memory allocated by the kernel. */
++	saved_fs = get_fs();
++        set_fs(get_ds());
++
++	if (uio & UIO_WRITE)
++		rc = vfs_write(fp, addr, len, &offset);
++	else
++		rc = vfs_read(fp, addr, len, &offset);
++
++	set_fs(saved_fs);
++	fp->f_pos = offset;
++
++	if (rc < 0)
++		SRETURN(-rc);
++
++	if (residp) {
++		*residp = len - rc;
++	} else {
++		if (rc != len)
++			SRETURN(EIO);
++	}
++
++	SRETURN(0);
++} /* vn_rdwr() */
++EXPORT_SYMBOL(vn_rdwr);
++
++int
++vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4)
++{
++	int rc;
++	SENTRY;
++
++	ASSERT(vp);
++	ASSERT(vp->v_file);
++
++	mapping_set_gfp_mask(vp->v_file->f_mapping, vp->v_gfp_mask);
++	rc = filp_close(vp->v_file, 0);
++	vn_free(vp);
++
++	SRETURN(-rc);
++} /* vn_close() */
++EXPORT_SYMBOL(vn_close);
++
++/* vn_seek() does not actually seek it only performs bounds checking on the
++ * proposed seek.  We perform minimal checking and allow vn_rdwr() to catch
++ * anything more serious. */
++int
++vn_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, void *ct)
++{
++	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
++}
++EXPORT_SYMBOL(vn_seek);
++
++#ifdef HAVE_KERN_PATH_LOCKED
++/* Based on do_unlinkat() from linux/fs/namei.c */
++int
++vn_remove(const char *path, uio_seg_t seg, int flags)
++{
++	struct dentry *dentry;
++	struct path parent;
++	struct inode *inode = NULL;
++	int rc = 0;
++	SENTRY;
++
++	ASSERT(seg == UIO_SYSSPACE);
++	ASSERT(flags == RMFILE);
++
++	dentry = spl_kern_path_locked(path, &parent);
++	rc = PTR_ERR(dentry);
++	if (!IS_ERR(dentry)) {
++		if (parent.dentry->d_name.name[parent.dentry->d_name.len])
++			SGOTO(slashes, rc = 0);
++
++		inode = dentry->d_inode;
++		if (!inode)
++			SGOTO(slashes, rc = 0);
++
++		if (inode)
++			ihold(inode);
++
++		rc = vfs_unlink(parent.dentry->d_inode, dentry);
++exit1:
++		dput(dentry);
++	}
++
++	spl_inode_unlock(parent.dentry->d_inode);
++	if (inode)
++		iput(inode);    /* truncate the inode here */
++
++	path_put(&parent);
++	SRETURN(-rc);
++
++slashes:
++	rc = !dentry->d_inode ? -ENOENT :
++	    S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
++	SGOTO(exit1, rc);
++} /* vn_remove() */
++EXPORT_SYMBOL(vn_remove);
++
++/* Based on do_rename() from linux/fs/namei.c */
++int
++vn_rename(const char *oldname, const char *newname, int x1)
++{
++	struct dentry *old_dir, *new_dir;
++	struct dentry *old_dentry, *new_dentry;
++	struct dentry *trap;
++	struct path old_parent, new_parent;
++	int rc = 0;
++	SENTRY;
++
++	old_dentry = spl_kern_path_locked(oldname, &old_parent);
++	if (IS_ERR(old_dentry))
++		SGOTO(exit, rc = PTR_ERR(old_dentry));
++
++	spl_inode_unlock(old_parent.dentry->d_inode);
++
++	new_dentry = spl_kern_path_locked(newname, &new_parent);
++	if (IS_ERR(new_dentry))
++		SGOTO(exit2, rc = PTR_ERR(new_dentry));
++
++	spl_inode_unlock(new_parent.dentry->d_inode);
++
++	rc = -EXDEV;
++	if (old_parent.mnt != new_parent.mnt)
++		SGOTO(exit3, rc);
++
++	old_dir = old_parent.dentry;
++	new_dir = new_parent.dentry;
++	trap = lock_rename(new_dir, old_dir);
++
++	/* source should not be ancestor of target */
++	rc = -EINVAL;
++	if (old_dentry == trap)
++		SGOTO(exit4, rc);
++
++	/* target should not be an ancestor of source */
++	rc = -ENOTEMPTY;
++	if (new_dentry == trap)
++		SGOTO(exit4, rc);
++
++	/* source must exist */
++	rc = -ENOENT;
++	if (!old_dentry->d_inode)
++		SGOTO(exit4, rc);
++
++	/* unless the source is a directory trailing slashes give -ENOTDIR */
++	if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
++		rc = -ENOTDIR;
++		if (old_dentry->d_name.name[old_dentry->d_name.len])
++			SGOTO(exit4, rc);
++		if (new_dentry->d_name.name[new_dentry->d_name.len])
++			SGOTO(exit4, rc);
++	}
++
++#ifdef HAVE_4ARGS_VFS_RENAME
++	rc = vfs_rename(old_dir->d_inode, old_dentry,
++			new_dir->d_inode, new_dentry);
++#else
++	rc = vfs_rename(old_dir->d_inode, old_dentry, oldnd.nd_mnt,
++			new_dir->d_inode, new_dentry, newnd.nd_mnt);
++#endif /* HAVE_4ARGS_VFS_RENAME */
++exit4:
++	unlock_rename(new_dir, old_dir);
++exit3:
++	dput(new_dentry);
++	path_put(&new_parent);
++exit2:
++	dput(old_dentry);
++	path_put(&old_parent);
++exit:
++	SRETURN(-rc);
++}
++EXPORT_SYMBOL(vn_rename);
++
++#else
++static struct dentry *
++vn_lookup_hash(struct nameidata *nd)
++{
++	return lookup_one_len((const char *)nd->last.name,
++			      nd->nd_dentry, nd->last.len);
++} /* lookup_hash() */
++
++static void
++vn_path_release(struct nameidata *nd)
++{
++	dput(nd->nd_dentry);
++	mntput(nd->nd_mnt);
++}
++
++/* Modified do_unlinkat() from linux/fs/namei.c, only uses exported symbols */
++int
++vn_remove(const char *path, uio_seg_t seg, int flags)
++{
++        struct dentry *dentry;
++        struct nameidata nd;
++        struct inode *inode = NULL;
++        int rc = 0;
++        SENTRY;
++
++        ASSERT(seg == UIO_SYSSPACE);
++        ASSERT(flags == RMFILE);
++
++	rc = spl_kern_path_parent(path, &nd);
++        if (rc)
++                SGOTO(exit, rc);
++
++        rc = -EISDIR;
++        if (nd.last_type != LAST_NORM)
++                SGOTO(exit1, rc);
++
++        spl_inode_lock_nested(nd.nd_dentry->d_inode, I_MUTEX_PARENT);
++        dentry = vn_lookup_hash(&nd);
++        rc = PTR_ERR(dentry);
++        if (!IS_ERR(dentry)) {
++                /* Why not before? Because we want correct rc value */
++                if (nd.last.name[nd.last.len])
++                        SGOTO(slashes, rc);
++
++                inode = dentry->d_inode;
++                if (inode)
++                        atomic_inc(&inode->i_count);
++#ifdef HAVE_2ARGS_VFS_UNLINK
++                rc = vfs_unlink(nd.nd_dentry->d_inode, dentry);
++#else
++                rc = vfs_unlink(nd.nd_dentry->d_inode, dentry, nd.nd_mnt);
++#endif /* HAVE_2ARGS_VFS_UNLINK */
++exit2:
++                dput(dentry);
++        }
++
++        spl_inode_unlock(nd.nd_dentry->d_inode);
++        if (inode)
++                iput(inode);    /* truncate the inode here */
++exit1:
++        vn_path_release(&nd);
++exit:
++        SRETURN(-rc);
++
++slashes:
++        rc = !dentry->d_inode ? -ENOENT :
++                S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
++        SGOTO(exit2, rc);
++} /* vn_remove() */
++EXPORT_SYMBOL(vn_remove);
++
++/* Modified do_rename() from linux/fs/namei.c, only uses exported symbols */
++int
++vn_rename(const char *oldname, const char *newname, int x1)
++{
++        struct dentry *old_dir, *new_dir;
++        struct dentry *old_dentry, *new_dentry;
++        struct dentry *trap;
++        struct nameidata oldnd, newnd;
++        int rc = 0;
++	SENTRY;
++
++        rc = spl_kern_path_parent(oldname, &oldnd);
++        if (rc)
++                SGOTO(exit, rc);
++
++        rc = spl_kern_path_parent(newname, &newnd);
++        if (rc)
++                SGOTO(exit1, rc);
++
++        rc = -EXDEV;
++        if (oldnd.nd_mnt != newnd.nd_mnt)
++                SGOTO(exit2, rc);
++
++        old_dir = oldnd.nd_dentry;
++        rc = -EBUSY;
++        if (oldnd.last_type != LAST_NORM)
++                SGOTO(exit2, rc);
++
++        new_dir = newnd.nd_dentry;
++        if (newnd.last_type != LAST_NORM)
++                SGOTO(exit2, rc);
++
++        trap = lock_rename(new_dir, old_dir);
++
++        old_dentry = vn_lookup_hash(&oldnd);
++
++        rc = PTR_ERR(old_dentry);
++        if (IS_ERR(old_dentry))
++                SGOTO(exit3, rc);
++
++        /* source must exist */
++        rc = -ENOENT;
++        if (!old_dentry->d_inode)
++                SGOTO(exit4, rc);
++
++        /* unless the source is a directory trailing slashes give -ENOTDIR */
++        if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
++                rc = -ENOTDIR;
++                if (oldnd.last.name[oldnd.last.len])
++                        SGOTO(exit4, rc);
++                if (newnd.last.name[newnd.last.len])
++                        SGOTO(exit4, rc);
++        }
++
++        /* source should not be ancestor of target */
++        rc = -EINVAL;
++        if (old_dentry == trap)
++                SGOTO(exit4, rc);
++
++        new_dentry = vn_lookup_hash(&newnd);
++        rc = PTR_ERR(new_dentry);
++        if (IS_ERR(new_dentry))
++                SGOTO(exit4, rc);
++
++        /* target should not be an ancestor of source */
++        rc = -ENOTEMPTY;
++        if (new_dentry == trap)
++                SGOTO(exit5, rc);
++
++#ifdef HAVE_4ARGS_VFS_RENAME
++        rc = vfs_rename(old_dir->d_inode, old_dentry,
++                        new_dir->d_inode, new_dentry);
++#else
++        rc = vfs_rename(old_dir->d_inode, old_dentry, oldnd.nd_mnt,
++                        new_dir->d_inode, new_dentry, newnd.nd_mnt);
++#endif /* HAVE_4ARGS_VFS_RENAME */
++exit5:
++        dput(new_dentry);
++exit4:
++        dput(old_dentry);
++exit3:
++        unlock_rename(new_dir, old_dir);
++exit2:
++        vn_path_release(&newnd);
++exit1:
++        vn_path_release(&oldnd);
++exit:
++        SRETURN(-rc);
++}
++EXPORT_SYMBOL(vn_rename);
++#endif /* HAVE_KERN_PATH_LOCKED */
++
++int
++vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4)
++{
++	struct file *fp;
++	struct kstat stat;
++	int rc;
++	SENTRY;
++
++	ASSERT(vp);
++	ASSERT(vp->v_file);
++	ASSERT(vap);
++
++	fp = vp->v_file;
++
++        rc = vfs_getattr(fp->f_vfsmnt, fp->f_dentry, &stat);
++	if (rc)
++		SRETURN(-rc);
++
++	vap->va_type          = vn_mode_to_vtype(stat.mode);
++	vap->va_mode          = stat.mode;
++	vap->va_uid           = stat.uid;
++	vap->va_gid           = stat.gid;
++	vap->va_fsid          = 0;
++	vap->va_nodeid        = stat.ino;
++	vap->va_nlink         = stat.nlink;
++        vap->va_size          = stat.size;
++	vap->va_blksize       = stat.blksize;
++	vap->va_atime         = stat.atime;
++	vap->va_mtime         = stat.mtime;
++	vap->va_ctime         = stat.ctime;
++	vap->va_rdev          = stat.rdev;
++	vap->va_nblocks       = stat.blocks;
++
++	SRETURN(0);
++}
++EXPORT_SYMBOL(vn_getattr);
++
++int vn_fsync(vnode_t *vp, int flags, void *x3, void *x4)
++{
++	int datasync = 0;
++	SENTRY;
++
++	ASSERT(vp);
++	ASSERT(vp->v_file);
++
++	if (flags & FDSYNC)
++		datasync = 1;
++
++	SRETURN(-spl_filp_fsync(vp->v_file, datasync));
++} /* vn_fsync() */
++EXPORT_SYMBOL(vn_fsync);
++
++int vn_space(vnode_t *vp, int cmd, struct flock *bfp, int flag,
++    offset_t offset, void *x6, void *x7)
++{
++	int error = EOPNOTSUPP;
++	SENTRY;
++
++	if (cmd != F_FREESP || bfp->l_whence != 0)
++		SRETURN(EOPNOTSUPP);
++
++	ASSERT(vp);
++	ASSERT(vp->v_file);
++	ASSERT(bfp->l_start >= 0 && bfp->l_len > 0);
++
++#ifdef FALLOC_FL_PUNCH_HOLE
++	if (vp->v_file->f_op->fallocate) {
++		error = -vp->v_file->f_op->fallocate(vp->v_file,
++		    FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
++		    bfp->l_start, bfp->l_len);
++		if (!error)
++			SRETURN(0);
++	}
++#endif
++
++#ifdef HAVE_INODE_TRUNCATE_RANGE
++	if (vp->v_file->f_dentry && vp->v_file->f_dentry->d_inode &&
++	    vp->v_file->f_dentry->d_inode->i_op &&
++	    vp->v_file->f_dentry->d_inode->i_op->truncate_range) {
++		off_t end = bfp->l_start + bfp->l_len;
++		/*
++		 * Judging from the code in shmem_truncate_range(),
++		 * it seems the kernel expects the end offset to be
++		 * inclusive and aligned to the end of a page.
++		 */
++		if (end % PAGE_SIZE != 0) {
++			end &= ~(off_t)(PAGE_SIZE - 1);
++			if (end <= bfp->l_start)
++				SRETURN(0);
++		}
++		--end;
++
++		vp->v_file->f_dentry->d_inode->i_op->truncate_range(
++			vp->v_file->f_dentry->d_inode,
++			bfp->l_start, end
++		);
++		SRETURN(0);
++	}
++#endif
++
++	SRETURN(error);
++}
++EXPORT_SYMBOL(vn_space);
++
++/* Function must be called while holding the vn_file_lock */
++static file_t *
++file_find(int fd)
++{
++        file_t *fp;
++
++	ASSERT(spin_is_locked(&vn_file_lock));
++
++        list_for_each_entry(fp, &vn_file_list,  f_list) {
++		if (fd == fp->f_fd && fp->f_task == current) {
++			ASSERT(atomic_read(&fp->f_ref) != 0);
++                        return fp;
++		}
++	}
++
++        return NULL;
++} /* file_find() */
++
++file_t *
++vn_getf(int fd)
++{
++        struct kstat stat;
++	struct file *lfp;
++	file_t *fp;
++	vnode_t *vp;
++	int rc = 0;
++	SENTRY;
++
++	/* Already open just take an extra reference */
++	spin_lock(&vn_file_lock);
++
++	fp = file_find(fd);
++	if (fp) {
++		atomic_inc(&fp->f_ref);
++		spin_unlock(&vn_file_lock);
++		SRETURN(fp);
++	}
++
++	spin_unlock(&vn_file_lock);
++
++	/* File was not yet opened create the object and setup */
++	fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP);
++	if (fp == NULL)
++		SGOTO(out, rc);
++
++	mutex_enter(&fp->f_lock);
++
++	fp->f_fd = fd;
++	fp->f_task = current;
++	fp->f_offset = 0;
++	atomic_inc(&fp->f_ref);
++
++	lfp = fget(fd);
++	if (lfp == NULL)
++		SGOTO(out_mutex, rc);
++
++	vp = vn_alloc(KM_SLEEP);
++	if (vp == NULL)
++		SGOTO(out_fget, rc);
++
++        if (vfs_getattr(lfp->f_vfsmnt, lfp->f_dentry, &stat))
++		SGOTO(out_vnode, rc);
++
++	mutex_enter(&vp->v_lock);
++	vp->v_type = vn_mode_to_vtype(stat.mode);
++	vp->v_file = lfp;
++	mutex_exit(&vp->v_lock);
++
++	fp->f_vnode = vp;
++	fp->f_file = lfp;
++
++	/* Put it on the tracking list */
++	spin_lock(&vn_file_lock);
++	list_add(&fp->f_list, &vn_file_list);
++	spin_unlock(&vn_file_lock);
++
++	mutex_exit(&fp->f_lock);
++	SRETURN(fp);
++
++out_vnode:
++	vn_free(vp);
++out_fget:
++	fput(lfp);
++out_mutex:
++	mutex_exit(&fp->f_lock);
++	kmem_cache_free(vn_file_cache, fp);
++out:
++        SRETURN(NULL);
++} /* getf() */
++EXPORT_SYMBOL(getf);
++
++static void releasef_locked(file_t *fp)
++{
++	ASSERT(fp->f_file);
++	ASSERT(fp->f_vnode);
++
++	/* Unlinked from list, no refs, safe to free outside mutex */
++	fput(fp->f_file);
++	vn_free(fp->f_vnode);
++
++	kmem_cache_free(vn_file_cache, fp);
++}
++
++void
++vn_releasef(int fd)
++{
++	file_t *fp;
++	SENTRY;
++
++	spin_lock(&vn_file_lock);
++	fp = file_find(fd);
++	if (fp) {
++		atomic_dec(&fp->f_ref);
++		if (atomic_read(&fp->f_ref) > 0) {
++			spin_unlock(&vn_file_lock);
++			SEXIT;
++			return;
++		}
++
++	        list_del(&fp->f_list);
++		releasef_locked(fp);
++	}
++	spin_unlock(&vn_file_lock);
++
++	SEXIT;
++	return;
++} /* releasef() */
++EXPORT_SYMBOL(releasef);
++
++#ifndef HAVE_SET_FS_PWD
++# ifdef HAVE_2ARGS_SET_FS_PWD
++/* Used from 2.6.25 - 2.6.31+ */
++void
++set_fs_pwd(struct fs_struct *fs, struct path *path)
++{
++	struct path old_pwd;
++
++#  ifdef HAVE_FS_STRUCT_SPINLOCK
++	spin_lock(&fs->lock);
++	old_pwd = fs->pwd;
++	fs->pwd = *path;
++	path_get(path);
++	spin_unlock(&fs->lock);
++#  else
++	write_lock(&fs->lock);
++	old_pwd = fs->pwd;
++	fs->pwd = *path;
++	path_get(path);
++	write_unlock(&fs->lock);
++#  endif /* HAVE_FS_STRUCT_SPINLOCK */
++
++	if (old_pwd.dentry)
++		path_put(&old_pwd);
++}
++# else
++/* Used from 2.6.11 - 2.6.24 */
++void
++set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, struct dentry *dentry)
++{
++        struct dentry *old_pwd;
++        struct vfsmount *old_pwdmnt;
++
++        write_lock(&fs->lock);
++        old_pwd = fs->pwd;
++        old_pwdmnt = fs->pwdmnt;
++        fs->pwdmnt = mntget(mnt);
++        fs->pwd = dget(dentry);
++        write_unlock(&fs->lock);
++
++        if (old_pwd) {
++                dput(old_pwd);
++                mntput(old_pwdmnt);
++        }
++}
++# endif /* HAVE_2ARGS_SET_FS_PWD */
++#endif /* HAVE_SET_FS_PWD */
++
++int
++vn_set_pwd(const char *filename)
++{
++#if defined(HAVE_2ARGS_SET_FS_PWD) && defined(HAVE_USER_PATH_DIR)
++        struct path path;
++#else
++        struct nameidata nd;
++#endif /* HAVE_2ARGS_SET_FS_PWD */
++        mm_segment_t saved_fs;
++        int rc;
++        SENTRY;
++
++        /*
++         * user_path_dir() and __user_walk() both expect 'filename' to be
++         * a user space address so we must briefly increase the data segment
++         * size to ensure strncpy_from_user() does not fail with -EFAULT.
++         */
++        saved_fs = get_fs();
++        set_fs(get_ds());
++
++#ifdef HAVE_2ARGS_SET_FS_PWD
++# ifdef HAVE_USER_PATH_DIR
++        rc = user_path_dir(filename, &path);
++        if (rc)
++                SGOTO(out, rc);
++
++        rc = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
++        if (rc)
++                SGOTO(dput_and_out, rc);
++
++        set_fs_pwd(current->fs, &path);
++
++dput_and_out:
++        path_put(&path);
++# else
++        rc = __user_walk(filename,
++                         LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd);
++        if (rc)
++                SGOTO(out, rc);
++
++        rc = vfs_permission(&nd, MAY_EXEC);
++        if (rc)
++                SGOTO(dput_and_out, rc);
++
++        set_fs_pwd(current->fs, &nd.path);
++
++dput_and_out:
++        path_put(&nd.path);
++# endif /* HAVE_USER_PATH_DIR */
++#else
++        rc = __user_walk(filename,
++                         LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd);
++        if (rc)
++                SGOTO(out, rc);
++
++        rc = vfs_permission(&nd, MAY_EXEC);
++        if (rc)
++                SGOTO(dput_and_out, rc);
++
++        set_fs_pwd(current->fs, nd.nd_mnt, nd.nd_dentry);
++
++dput_and_out:
++        vn_path_release(&nd);
++#endif /* HAVE_2ARGS_SET_FS_PWD */
++out:
++	set_fs(saved_fs);
++
++        SRETURN(-rc);
++} /* vn_set_pwd() */
++EXPORT_SYMBOL(vn_set_pwd);
++
++static int
++vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
++{
++	struct vnode *vp = buf;
++
++	mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
++
++	return (0);
++} /* vn_cache_constructor() */
++
++static void
++vn_cache_destructor(void *buf, void *cdrarg)
++{
++	struct vnode *vp = buf;
++
++	mutex_destroy(&vp->v_lock);
++} /* vn_cache_destructor() */
++
++static int
++vn_file_cache_constructor(void *buf, void *cdrarg, int kmflags)
++{
++	file_t *fp = buf;
++
++	atomic_set(&fp->f_ref, 0);
++        mutex_init(&fp->f_lock, NULL, MUTEX_DEFAULT, NULL);
++	INIT_LIST_HEAD(&fp->f_list);
++
++        return (0);
++} /* file_cache_constructor() */
++
++static void
++vn_file_cache_destructor(void *buf, void *cdrarg)
++{
++	file_t *fp = buf;
++
++	mutex_destroy(&fp->f_lock);
++} /* vn_file_cache_destructor() */
++
++int spl_vn_init_kallsyms_lookup(void)
++{
++#ifdef HAVE_KERN_PATH_PARENT_HEADER
++#ifndef HAVE_KERN_PATH_PARENT_SYMBOL
++	kern_path_parent_fn = (kern_path_parent_t)
++		spl_kallsyms_lookup_name("kern_path_parent");
++	if (!kern_path_parent_fn) {
++		printk(KERN_ERR "Error: Unknown symbol kern_path_parent\n");
++		return -EFAULT;
++	}
++#endif /* HAVE_KERN_PATH_PARENT_SYMBOL */
++#endif /* HAVE_KERN_PATH_PARENT_HEADER */
++
++#ifdef HAVE_KERN_PATH_LOCKED
++        kern_path_locked_fn = (kern_path_locked_t)
++                spl_kallsyms_lookup_name("kern_path_locked");
++        if (!kern_path_locked_fn) {
++                printk(KERN_ERR "Error: Unknown symbol kern_path_locked\n");
++                return -EFAULT;
++        }
++#endif
++
++	return (0);
++}
++
++int
++spl_vn_init(void)
++{
++	SENTRY;
++	vn_cache = kmem_cache_create("spl_vn_cache",
++				     sizeof(struct vnode), 64,
++	                             vn_cache_constructor,
++				     vn_cache_destructor,
++				     NULL, NULL, NULL, KMC_KMEM);
++
++	vn_file_cache = kmem_cache_create("spl_vn_file_cache",
++					  sizeof(file_t), 64,
++				          vn_file_cache_constructor,
++				          vn_file_cache_destructor,
++				          NULL, NULL, NULL, KMC_KMEM);
++	SRETURN(0);
++} /* vn_init() */
++
++void
++spl_vn_fini(void)
++{
++        file_t *fp, *next_fp;
++	int leaked = 0;
++	SENTRY;
++
++	spin_lock(&vn_file_lock);
++
++        list_for_each_entry_safe(fp, next_fp, &vn_file_list,  f_list) {
++	        list_del(&fp->f_list);
++		releasef_locked(fp);
++		leaked++;
++	}
++
++	spin_unlock(&vn_file_lock);
++
++	if (leaked > 0)
++		SWARN("Warning %d files leaked\n", leaked);
++
++	kmem_cache_destroy(vn_file_cache);
++	kmem_cache_destroy(vn_cache);
++
++	SEXIT;
++	return;
++} /* vn_fini() */
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-xdr.c linux-3.2.33-go/spl/spl/spl-xdr.c
+--- linux-3.2.33-go.orig/spl/spl/spl-xdr.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-xdr.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,525 @@
++/*****************************************************************************\
++ *  Copyright (c) 2008-2010 Sun Microsystems, Inc.
++ *  Written by Ricardo Correia <Ricardo.M.Correia@Sun.COM>
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting Layer (SPL) XDR Implementation.
++\*****************************************************************************/
++
++#include <linux/string.h>
++#include <sys/kmem.h>
++#include <sys/debug.h>
++#include <sys/types.h>
++#include <rpc/types.h>
++#include <rpc/xdr.h>
++#include <spl-debug.h>
++
++#ifdef SS_DEBUG_SUBSYS
++#undef SS_DEBUG_SUBSYS
++#endif
++
++#define SS_DEBUG_SUBSYS SS_XDR
++
++/*
++ * SPL's XDR mem implementation.
++ *
++ * This is used by libnvpair to serialize/deserialize the name-value pair data
++ * structures into byte arrays in a well-defined and portable manner.
++ *
++ * These data structures are used by the DMU/ZFS to flexibly manipulate various
++ * information in memory and later serialize it/deserialize it to disk.
++ * Examples of usages include the pool configuration, lists of pool and dataset
++ * properties, etc.
++ *
++ * Reference documentation for the XDR representation and XDR operations can be
++ * found in RFC 1832 and xdr(3), respectively.
++ *
++ * ===  Implementation shortcomings ===
++ *
++ * It is assumed that the following C types have the following sizes:
++ *
++ * char/unsigned char:      1 byte
++ * short/unsigned short:    2 bytes
++ * int/unsigned int:        4 bytes
++ * longlong_t/u_longlong_t: 8 bytes
++ *
++ * The C standard allows these types to be larger (and in the case of ints,
++ * shorter), so if that is the case on some compiler/architecture, the build
++ * will fail (on purpose).
++ *
++ * If someone wants to fix the code to work properly on such environments, then:
++ *
++ * 1) Preconditions should be added to xdrmem_enc functions to make sure the
++ *    caller doesn't pass arguments which exceed the expected range.
++ * 2) Functions which take signed integers should be changed to properly do
++ *    sign extension.
++ * 3) For ints with less than 32 bits, well.. I suspect you'll have bigger
++ *    problems than this implementation.
++ *
++ * It is also assumed that:
++ *
++ * 1) Chars have 8 bits.
++ * 2) We can always do 32-bit-aligned int memory accesses and byte-aligned
++ *    memcpy, memset and memcmp.
++ * 3) Arrays passed to xdr_array() are packed and the compiler/architecture
++ *    supports element-sized-aligned memory accesses.
++ * 4) Negative integers are natively stored in two's complement binary
++ *    representation.
++ *
++ * No checks are done for the 4 assumptions above, though.
++ *
++ * === Caller expectations ===
++ *
++ * Existing documentation does not describe the semantics of XDR operations very
++ * well.  Therefore, some assumptions about failure semantics will be made and
++ * will be described below:
++ *
++ * 1) If any encoding operation fails (e.g., due to lack of buffer space), the
++ * the stream should be considered valid only up to the encoding operation
++ * previous to the one that first failed. However, the stream size as returned
++ * by xdr_control() cannot be considered to be strictly correct (it may be
++ * bigger).
++ *
++ * Putting it another way, if there is an encoding failure it's undefined
++ * whether anything is added to the stream in that operation and therefore
++ * neither xdr_control() nor future encoding operations on the same stream can
++ * be relied upon to produce correct results.
++ *
++ * 2) If a decoding operation fails, it's undefined whether anything will be
++ * decoded into passed buffers/pointers during that operation, or what the
++ * values on those buffers will look like.
++ *
++ * Future decoding operations on the same stream will also have similar
++ * undefined behavior.
++ *
++ * 3) When the first decoding operation fails it is OK to trust the results of
++ * previous decoding operations on the same stream, as long as the caller
++ * expects a failure to be possible (e.g. due to end-of-stream).
++ *
++ * However, this is highly discouraged because the caller should know the
++ * stream size and should be coded to expect any decoding failure to be data
++ * corruption due to hardware, accidental or even malicious causes, which should
++ * be handled gracefully in all cases.
++ *
++ * In very rare situations where there are strong reasons to believe the data
++ * can be trusted to be valid and non-tampered with, then the caller may assume
++ * a decoding failure to be a bug (e.g. due to mismatched data types) and may
++ * fail non-gracefully.
++ *
++ * 4) Non-zero padding bytes will cause the decoding operation to fail.
++ *
++ * 5) Zero bytes on string types will also cause the decoding operation to fail.
++ *
++ * 6) It is assumed that either the pointer to the stream buffer given by the
++ * caller is 32-bit aligned or the architecture supports non-32-bit-aligned int
++ * memory accesses.
++ *
++ * 7) The stream buffer and encoding/decoding buffers/ptrs should not overlap.
++ *
++ * 8) If a caller passes pointers to non-kernel memory (e.g., pointers to user
++ * space or MMIO space), the computer may explode.
++ */
++
++static struct xdr_ops xdrmem_encode_ops;
++static struct xdr_ops xdrmem_decode_ops;
++
++void
++xdrmem_create(XDR *xdrs, const caddr_t addr, const uint_t size,
++    const enum xdr_op op)
++{
++	switch (op) {
++		case XDR_ENCODE:
++			xdrs->x_ops = &xdrmem_encode_ops;
++			break;
++		case XDR_DECODE:
++			xdrs->x_ops = &xdrmem_decode_ops;
++			break;
++		default:
++			SWARN("Invalid op value: %d\n", op);
++			xdrs->x_ops = NULL; /* Let the caller know we failed */
++			return;
++	}
++
++	xdrs->x_op = op;
++	xdrs->x_addr = addr;
++	xdrs->x_addr_end = addr + size;
++
++	if (xdrs->x_addr_end < xdrs->x_addr) {
++		SWARN("Overflow while creating xdrmem: %p, %u\n", addr, size);
++		xdrs->x_ops = NULL;
++	}
++}
++EXPORT_SYMBOL(xdrmem_create);
++
++static bool_t
++xdrmem_control(XDR *xdrs, int req, void *info)
++{
++	struct xdr_bytesrec *rec = (struct xdr_bytesrec *) info;
++
++	if (req != XDR_GET_BYTES_AVAIL) {
++		SWARN("Called with unknown request: %d\n", req);
++		return FALSE;
++	}
++
++	rec->xc_is_last_record = TRUE; /* always TRUE in xdrmem streams */
++	rec->xc_num_avail = xdrs->x_addr_end - xdrs->x_addr;
++
++	return TRUE;
++}
++
++static bool_t
++xdrmem_enc_bytes(XDR *xdrs, caddr_t cp, const uint_t cnt)
++{
++	uint_t size = roundup(cnt, 4);
++	uint_t pad;
++
++	if (size < cnt)
++		return FALSE; /* Integer overflow */
++
++	if (xdrs->x_addr > xdrs->x_addr_end)
++		return FALSE;
++
++	if (xdrs->x_addr_end - xdrs->x_addr < size)
++		return FALSE;
++
++	memcpy(xdrs->x_addr, cp, cnt);
++
++	xdrs->x_addr += cnt;
++
++	pad = size - cnt;
++	if (pad > 0) {
++		memset(xdrs->x_addr, 0, pad);
++		xdrs->x_addr += pad;
++	}
++
++	return TRUE;
++}
++
++static bool_t
++xdrmem_dec_bytes(XDR *xdrs, caddr_t cp, const uint_t cnt)
++{
++	static uint32_t zero = 0;
++	uint_t size = roundup(cnt, 4);
++	uint_t pad;
++
++	if (size < cnt)
++		return FALSE; /* Integer overflow */
++
++	if (xdrs->x_addr > xdrs->x_addr_end)
++		return FALSE;
++
++	if (xdrs->x_addr_end - xdrs->x_addr < size)
++		return FALSE;
++
++	memcpy(cp, xdrs->x_addr, cnt);
++	xdrs->x_addr += cnt;
++
++	pad = size - cnt;
++	if (pad > 0) {
++		/* An inverted memchr() would be useful here... */
++		if (memcmp(&zero, xdrs->x_addr, pad) != 0)
++			return FALSE;
++
++		xdrs->x_addr += pad;
++	}
++
++	return TRUE;
++}
++
++static bool_t
++xdrmem_enc_uint32(XDR *xdrs, uint32_t val)
++{
++	if (xdrs->x_addr + sizeof(uint32_t) > xdrs->x_addr_end)
++		return FALSE;
++
++	*((uint32_t *) xdrs->x_addr) = cpu_to_be32(val);
++
++	xdrs->x_addr += sizeof(uint32_t);
++
++	return TRUE;
++}
++
++static bool_t
++xdrmem_dec_uint32(XDR *xdrs, uint32_t *val)
++{
++	if (xdrs->x_addr + sizeof(uint32_t) > xdrs->x_addr_end)
++		return FALSE;
++
++	*val = be32_to_cpu(*((uint32_t *) xdrs->x_addr));
++
++	xdrs->x_addr += sizeof(uint32_t);
++
++	return TRUE;
++}
++
++static bool_t
++xdrmem_enc_char(XDR *xdrs, char *cp)
++{
++	uint32_t val;
++
++	BUILD_BUG_ON(sizeof(char) != 1);
++	val = *((unsigned char *) cp);
++
++	return xdrmem_enc_uint32(xdrs, val);
++}
++
++static bool_t
++xdrmem_dec_char(XDR *xdrs, char *cp)
++{
++	uint32_t val;
++
++	BUILD_BUG_ON(sizeof(char) != 1);
++
++	if (!xdrmem_dec_uint32(xdrs, &val))
++		return FALSE;
++
++	/*
++	 * If any of the 3 other bytes are non-zero then val will be greater
++	 * than 0xff and we fail because according to the RFC, this block does
++	 * not have a char encoded in it.
++	 */
++	if (val > 0xff)
++		return FALSE;
++
++	*((unsigned char *) cp) = val;
++
++	return TRUE;
++}
++
++static bool_t
++xdrmem_enc_ushort(XDR *xdrs, unsigned short *usp)
++{
++	BUILD_BUG_ON(sizeof(unsigned short) != 2);
++
++	return xdrmem_enc_uint32(xdrs, *usp);
++}
++
++static bool_t
++xdrmem_dec_ushort(XDR *xdrs, unsigned short *usp)
++{
++	uint32_t val;
++
++	BUILD_BUG_ON(sizeof(unsigned short) != 2);
++
++	if (!xdrmem_dec_uint32(xdrs, &val))
++		return FALSE;
++
++	/*
++	 * Short ints are not in the RFC, but we assume similar logic as in
++	 * xdrmem_dec_char().
++	 */
++	if (val > 0xffff)
++		return FALSE;
++
++	*usp = val;
++
++	return TRUE;
++}
++
++static bool_t
++xdrmem_enc_uint(XDR *xdrs, unsigned *up)
++{
++	BUILD_BUG_ON(sizeof(unsigned) != 4);
++
++	return xdrmem_enc_uint32(xdrs, *up);
++}
++
++static bool_t
++xdrmem_dec_uint(XDR *xdrs, unsigned *up)
++{
++	BUILD_BUG_ON(sizeof(unsigned) != 4);
++
++	return xdrmem_dec_uint32(xdrs, (uint32_t *) up);
++}
++
++static bool_t
++xdrmem_enc_ulonglong(XDR *xdrs, u_longlong_t *ullp)
++{
++	BUILD_BUG_ON(sizeof(u_longlong_t) != 8);
++
++	if (!xdrmem_enc_uint32(xdrs, *ullp >> 32))
++		return FALSE;
++
++	return xdrmem_enc_uint32(xdrs, *ullp & 0xffffffff);
++}
++
++static bool_t
++xdrmem_dec_ulonglong(XDR *xdrs, u_longlong_t *ullp)
++{
++	uint32_t low, high;
++
++	BUILD_BUG_ON(sizeof(u_longlong_t) != 8);
++
++	if (!xdrmem_dec_uint32(xdrs, &high))
++		return FALSE;
++	if (!xdrmem_dec_uint32(xdrs, &low))
++		return FALSE;
++
++	*ullp = ((u_longlong_t) high << 32) | low;
++
++	return TRUE;
++}
++
++static bool_t
++xdr_enc_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep, const uint_t maxsize,
++    const uint_t elsize, const xdrproc_t elproc)
++{
++	uint_t i;
++	caddr_t addr = *arrp;
++
++	if (*sizep > maxsize || *sizep > UINT_MAX / elsize)
++		return FALSE;
++
++	if (!xdrmem_enc_uint(xdrs, sizep))
++		return FALSE;
++
++	for (i = 0; i < *sizep; i++) {
++		if (!elproc(xdrs, addr))
++			return FALSE;
++		addr += elsize;
++	}
++
++	return TRUE;
++}
++
++static bool_t
++xdr_dec_array(XDR *xdrs, caddr_t *arrp, uint_t *sizep, const uint_t maxsize,
++    const uint_t elsize, const xdrproc_t elproc)
++{
++	uint_t i, size;
++	bool_t alloc = FALSE;
++	caddr_t addr;
++
++	if (!xdrmem_dec_uint(xdrs, sizep))
++		return FALSE;
++
++	size = *sizep;
++
++	if (size > maxsize || size > UINT_MAX / elsize)
++		return FALSE;
++
++	/*
++	 * The Solaris man page says: "If *arrp is NULL when decoding,
++	 * xdr_array() allocates memory and *arrp points to it".
++	 */
++	if (*arrp == NULL) {
++		BUILD_BUG_ON(sizeof(uint_t) > sizeof(size_t));
++
++		*arrp = kmem_alloc(size * elsize, KM_NOSLEEP);
++		if (*arrp == NULL)
++			return FALSE;
++
++		alloc = TRUE;
++	}
++
++	addr = *arrp;
++
++	for (i = 0; i < size; i++) {
++		if (!elproc(xdrs, addr)) {
++			if (alloc)
++				kmem_free(*arrp, size * elsize);
++			return FALSE;
++		}
++		addr += elsize;
++	}
++
++	return TRUE;
++}
++
++static bool_t
++xdr_enc_string(XDR *xdrs, char **sp, const uint_t maxsize)
++{
++	size_t slen = strlen(*sp);
++	uint_t len;
++
++	if (slen > maxsize)
++		return FALSE;
++
++	len = slen;
++
++	if (!xdrmem_enc_uint(xdrs, &len))
++		return FALSE;
++
++	return xdrmem_enc_bytes(xdrs, *sp, len);
++}
++
++static bool_t
++xdr_dec_string(XDR *xdrs, char **sp, const uint_t maxsize)
++{
++	uint_t size;
++	bool_t alloc = FALSE;
++
++	if (!xdrmem_dec_uint(xdrs, &size))
++		return FALSE;
++
++	if (size > maxsize || size > UINT_MAX - 1)
++		return FALSE;
++
++	/*
++	 * Solaris man page: "If *sp is NULL when decoding, xdr_string()
++	 * allocates memory and *sp points to it".
++	 */
++	if (*sp == NULL) {
++		BUILD_BUG_ON(sizeof(uint_t) > sizeof(size_t));
++
++		*sp = kmem_alloc(size + 1, KM_NOSLEEP);
++		if (*sp == NULL)
++			return FALSE;
++
++		alloc = TRUE;
++	}
++
++	if (!xdrmem_dec_bytes(xdrs, *sp, size))
++		goto fail;
++
++	if (memchr(*sp, 0, size) != NULL)
++		goto fail;
++
++	(*sp)[size] = '\0';
++
++	return TRUE;
++
++fail:
++	if (alloc)
++		kmem_free(*sp, size + 1);
++
++	return FALSE;
++}
++
++static struct xdr_ops xdrmem_encode_ops = {
++	.xdr_control      = xdrmem_control,
++	.xdr_char         = xdrmem_enc_char,
++	.xdr_u_short      = xdrmem_enc_ushort,
++	.xdr_u_int        = xdrmem_enc_uint,
++	.xdr_u_longlong_t = xdrmem_enc_ulonglong,
++	.xdr_opaque       = xdrmem_enc_bytes,
++	.xdr_string       = xdr_enc_string,
++	.xdr_array        = xdr_enc_array
++};
++
++static struct xdr_ops xdrmem_decode_ops = {
++	.xdr_control      = xdrmem_control,
++	.xdr_char         = xdrmem_dec_char,
++	.xdr_u_short      = xdrmem_dec_ushort,
++	.xdr_u_int        = xdrmem_dec_uint,
++	.xdr_u_longlong_t = xdrmem_dec_ulonglong,
++	.xdr_opaque       = xdrmem_dec_bytes,
++	.xdr_string       = xdr_dec_string,
++	.xdr_array        = xdr_dec_array
++};
++
+diff -uNr linux-3.2.33-go.orig/spl/spl/spl-zlib.c linux-3.2.33-go/spl/spl/spl-zlib.c
+--- linux-3.2.33-go.orig/spl/spl/spl-zlib.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/spl/spl-zlib.c	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,225 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  z_compress_level/z_uncompress are nearly identical copies of the
++ *  compress2/uncompress functions provided by the official zlib package
++ *  available at http://zlib.net/.  The only changes made we to slightly
++ *  adapt the functions called to match the linux kernel implementation
++ *  of zlib.  The full zlib license follows:
++ *
++ *  zlib.h -- interface of the 'zlib' general purpose compression library
++ *  version 1.2.5, April 19th, 2010
++ *
++ *  Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
++ *
++ *  This software is provided 'as-is', without any express or implied
++ *  warranty.  In no event will the authors be held liable for any damages
++ *  arising from the use of this software.
++ *
++ *  Permission is granted to anyone to use this software for any purpose,
++ *  including commercial applications, and to alter it and redistribute it
++ *  freely, subject to the following restrictions:
++ *
++ *  1. The origin of this software must not be misrepresented; you must not
++ *     claim that you wrote the original software. If you use this software
++ *     in a product, an acknowledgment in the product documentation would be
++ *     appreciated but is not required.
++ *  2. Altered source versions must be plainly marked as such, and must not be
++ *     misrepresented as being the original software.
++ *  3. This notice may not be removed or altered from any source distribution.
++ *
++ *  Jean-loup Gailly
++ *  Mark Adler
++\*****************************************************************************/
++
++
++#include <sys/kmem.h>
++#include <sys/zmod.h>
++#include <spl-debug.h>
++
++#ifdef DEBUG_SUBSYSTEM
++#undef DEBUG_SUBSYSTEM
++#endif
++
++#define DEBUG_SUBSYSTEM SS_ZLIB
++
++static spl_kmem_cache_t *zlib_workspace_cache;
++
++/*
++ * A kmem_cache is used for the zlib workspaces to avoid having to vmalloc
++ * and vfree for every call.  Using a kmem_cache also has the advantage
++ * that improves the odds that the memory used will be local to this cpu.
++ * To further improve things it might be wise to create a dedicated per-cpu
++ * workspace for use.  This would take some additional care because we then
++ * must disable preemption around the critical section, and verify that
++ * zlib_deflate* and zlib_inflate* never internally call schedule().
++ */
++static void *
++zlib_workspace_alloc(int flags)
++{
++	return kmem_cache_alloc(zlib_workspace_cache, flags & ~(__GFP_FS));
++}
++
++static void
++zlib_workspace_free(void *workspace)
++{
++	kmem_cache_free(zlib_workspace_cache, workspace);
++}
++
++/*
++ * Compresses the source buffer into the destination buffer. The level
++ * parameter has the same meaning as in deflateInit.  sourceLen is the byte
++ * length of the source buffer. Upon entry, destLen is the total size of the
++ * destination buffer, which must be at least 0.1% larger than sourceLen plus
++ * 12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
++ *
++ * compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
++ * memory, Z_BUF_ERROR if there was not enough room in the output buffer,
++ * Z_STREAM_ERROR if the level parameter is invalid.
++ */
++int
++z_compress_level(void *dest, size_t *destLen, const void *source,
++                 size_t sourceLen, int level)
++{
++	z_stream stream;
++	int err;
++
++	stream.next_in = (Byte *)source;
++	stream.avail_in = (uInt)sourceLen;
++	stream.next_out = dest;
++	stream.avail_out = (uInt)*destLen;
++
++	if ((size_t)stream.avail_out != *destLen)
++		return Z_BUF_ERROR;
++
++	stream.workspace = zlib_workspace_alloc(KM_SLEEP);
++	if (!stream.workspace)
++		return Z_MEM_ERROR;
++
++	err = zlib_deflateInit(&stream, level);
++	if (err != Z_OK) {
++		zlib_workspace_free(stream.workspace);
++		return err;
++	}
++
++	err = zlib_deflate(&stream, Z_FINISH);
++	if (err != Z_STREAM_END) {
++		zlib_deflateEnd(&stream);
++		zlib_workspace_free(stream.workspace);
++		return err == Z_OK ? Z_BUF_ERROR : err;
++	}
++	*destLen = stream.total_out;
++
++	err = zlib_deflateEnd(&stream);
++	zlib_workspace_free(stream.workspace);
++
++	return err;
++}
++EXPORT_SYMBOL(z_compress_level);
++
++/*
++ * Decompresses the source buffer into the destination buffer.  sourceLen is
++ * the byte length of the source buffer. Upon entry, destLen is the total
++ * size of the destination buffer, which must be large enough to hold the
++ * entire uncompressed data. (The size of the uncompressed data must have
++ * been saved previously by the compressor and transmitted to the decompressor
++ * by some mechanism outside the scope of this compression library.)
++ * Upon exit, destLen is the actual size of the compressed buffer.
++ * This function can be used to decompress a whole file at once if the
++ * input file is mmap'ed.
++ *
++ * uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
++ * enough memory, Z_BUF_ERROR if there was not enough room in the output
++ * buffer, or Z_DATA_ERROR if the input data was corrupted.
++ */
++int
++z_uncompress(void *dest, size_t *destLen, const void *source, size_t sourceLen)
++{
++	z_stream stream;
++	int err;
++
++	stream.next_in = (Byte *)source;
++	stream.avail_in = (uInt)sourceLen;
++	stream.next_out = dest;
++	stream.avail_out = (uInt)*destLen;
++
++	if ((size_t)stream.avail_out != *destLen)
++		return Z_BUF_ERROR;
++
++	stream.workspace = zlib_workspace_alloc(KM_SLEEP);
++	if (!stream.workspace)
++		return Z_MEM_ERROR;
++
++	err = zlib_inflateInit(&stream);
++	if (err != Z_OK) {
++		zlib_workspace_free(stream.workspace);
++		return err;
++	}
++
++	err = zlib_inflate(&stream, Z_FINISH);
++	if (err != Z_STREAM_END) {
++		zlib_inflateEnd(&stream);
++		zlib_workspace_free(stream.workspace);
++
++		if (err == Z_NEED_DICT ||
++		   (err == Z_BUF_ERROR && stream.avail_in == 0))
++			return Z_DATA_ERROR;
++
++		return err;
++	}
++	*destLen = stream.total_out;
++
++	err = zlib_inflateEnd(&stream);
++	zlib_workspace_free(stream.workspace);
++
++	return err;
++}
++EXPORT_SYMBOL(z_uncompress);
++
++int
++spl_zlib_init(void)
++{
++	int size;
++        SENTRY;
++
++	size = MAX(spl_zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
++	    zlib_inflate_workspacesize());
++
++	zlib_workspace_cache = kmem_cache_create(
++	    "spl_zlib_workspace_cache",
++	    size, 0, NULL, NULL, NULL, NULL, NULL,
++	    KMC_VMEM | KMC_NOEMERGENCY);
++        if (!zlib_workspace_cache)
++		SRETURN(1);
++
++        SRETURN(0);
++}
++
++void
++spl_zlib_fini(void)
++{
++        SENTRY;
++	kmem_cache_destroy(zlib_workspace_cache);
++        zlib_workspace_cache = NULL;
++        SEXIT;
++}
+diff -uNr linux-3.2.33-go.orig/spl/splat/Makefile linux-3.2.33-go/spl/splat/Makefile
+--- linux-3.2.33-go.orig/spl/splat/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/Makefile	2012-11-16 23:22:32.419192759 +0100
+@@ -0,0 +1,25 @@
++# Makefile.in for splat kernel module
++
++MODULE := splat
++EXTRA_CFLAGS = $(SPL_MODULE_CFLAGS)  -DHAVE_GPL_ONLY_SYMBOLS -Wstrict-prototypes -DNDEBUG -DDEBUG_LOG -DDEBUG_KMEM
++
++# Solaris Porting LAyer Tests
++obj-$(CONFIG_SPL) := $(MODULE).o
++
++$(MODULE)-objs += splat-ctl.o
++$(MODULE)-objs += splat-kmem.o
++$(MODULE)-objs += splat-taskq.o
++$(MODULE)-objs += splat-random.o
++$(MODULE)-objs += splat-mutex.o
++$(MODULE)-objs += splat-condvar.o
++$(MODULE)-objs += splat-thread.o
++$(MODULE)-objs += splat-rwlock.o
++$(MODULE)-objs += splat-time.o
++$(MODULE)-objs += splat-vnode.o
++$(MODULE)-objs += splat-kobj.o
++$(MODULE)-objs += splat-atomic.o
++$(MODULE)-objs += splat-list.o
++$(MODULE)-objs += splat-generic.o
++$(MODULE)-objs += splat-cred.o
++$(MODULE)-objs += splat-zlib.o
++$(MODULE)-objs += splat-linux.o
+diff -uNr linux-3.2.33-go.orig/spl/splat/Makefile.in linux-3.2.33-go/spl/splat/Makefile.in
+--- linux-3.2.33-go.orig/spl/splat/Makefile.in	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/Makefile.in	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,25 @@
++# Makefile.in for splat kernel module
++
++MODULE := splat
++EXTRA_CFLAGS = $(SPL_MODULE_CFLAGS) @KERNELCPPFLAGS@
++
++# Solaris Porting LAyer Tests
++obj-$(CONFIG_SPL) := $(MODULE).o
++
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-ctl.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-kmem.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-taskq.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-random.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-mutex.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-condvar.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-thread.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-rwlock.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-time.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-vnode.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-kobj.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-atomic.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-list.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-generic.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-cred.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-zlib.o
++$(MODULE)-objs += @top_srcdir@/module/splat/splat-linux.o
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-atomic.c linux-3.2.33-go/spl/splat/splat-atomic.c
+--- linux-3.2.33-go.orig/spl/splat/splat-atomic.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-atomic.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,227 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Atomic Tests.
++\*****************************************************************************/
++
++#include <sys/atomic.h>
++#include <sys/thread.h>
++#include "splat-internal.h"
++
++#define SPLAT_ATOMIC_NAME		"atomic"
++#define SPLAT_ATOMIC_DESC		"Kernel Atomic Tests"
++
++#define SPLAT_ATOMIC_TEST1_ID		0x0b01
++#define SPLAT_ATOMIC_TEST1_NAME		"64-bit"
++#define SPLAT_ATOMIC_TEST1_DESC		"Validate 64-bit atomic ops"
++
++#define SPLAT_ATOMIC_TEST_MAGIC		0x43435454UL
++#define SPLAT_ATOMIC_INIT_VALUE		10000000UL
++
++typedef enum {
++	SPLAT_ATOMIC_INC_64    = 0,
++	SPLAT_ATOMIC_DEC_64    = 1,
++	SPLAT_ATOMIC_ADD_64    = 2,
++	SPLAT_ATOMIC_SUB_64    = 3,
++	SPLAT_ATOMIC_ADD_64_NV = 4,
++	SPLAT_ATOMIC_SUB_64_NV = 5,
++	SPLAT_ATOMIC_COUNT_64  = 6
++} atomic_op_t;
++
++typedef struct atomic_priv {
++        unsigned long ap_magic;
++        struct file *ap_file;
++	struct mutex ap_lock;
++        wait_queue_head_t ap_waitq;
++	volatile uint64_t ap_atomic;
++	volatile uint64_t ap_atomic_exited;
++	atomic_op_t ap_op;
++
++} atomic_priv_t;
++
++static void
++splat_atomic_work(void *priv)
++{
++	atomic_priv_t *ap;
++	atomic_op_t op;
++	int i;
++
++	ap = (atomic_priv_t *)priv;
++	ASSERT(ap->ap_magic == SPLAT_ATOMIC_TEST_MAGIC);
++
++	mutex_lock(&ap->ap_lock);
++	op = ap->ap_op;
++	wake_up(&ap->ap_waitq);
++	mutex_unlock(&ap->ap_lock);
++
++        splat_vprint(ap->ap_file, SPLAT_ATOMIC_TEST1_NAME,
++	             "Thread %d successfully started: %lu/%lu\n", op,
++		     (long unsigned)ap->ap_atomic,
++		     (long unsigned)ap->ap_atomic_exited);
++
++	for (i = 0; i < SPLAT_ATOMIC_INIT_VALUE / 10; i++) {
++
++		/* Periodically sleep to mix up the ordering */
++		if ((i % (SPLAT_ATOMIC_INIT_VALUE / 100)) == 0) {
++		        splat_vprint(ap->ap_file, SPLAT_ATOMIC_TEST1_NAME,
++			     "Thread %d sleeping: %lu/%lu\n", op,
++			     (long unsigned)ap->ap_atomic,
++			     (long unsigned)ap->ap_atomic_exited);
++		        set_current_state(TASK_INTERRUPTIBLE);
++			schedule_timeout(HZ / 100);
++		}
++
++		switch (op) {
++			case SPLAT_ATOMIC_INC_64:
++				atomic_inc_64(&ap->ap_atomic);
++				break;
++			case SPLAT_ATOMIC_DEC_64:
++				atomic_dec_64(&ap->ap_atomic);
++				break;
++			case SPLAT_ATOMIC_ADD_64:
++				atomic_add_64(&ap->ap_atomic, 3);
++				break;
++			case SPLAT_ATOMIC_SUB_64:
++				atomic_sub_64(&ap->ap_atomic, 3);
++				break;
++			case SPLAT_ATOMIC_ADD_64_NV:
++				atomic_add_64_nv(&ap->ap_atomic, 5);
++				break;
++			case SPLAT_ATOMIC_SUB_64_NV:
++				atomic_sub_64_nv(&ap->ap_atomic, 5);
++				break;
++			default:
++				PANIC("Undefined op %d\n", op);
++		}
++	}
++
++	atomic_inc_64(&ap->ap_atomic_exited);
++
++        splat_vprint(ap->ap_file, SPLAT_ATOMIC_TEST1_NAME,
++	             "Thread %d successfully exited: %lu/%lu\n", op,
++		     (long unsigned)ap->ap_atomic,
++		     (long unsigned)ap->ap_atomic_exited);
++
++	wake_up(&ap->ap_waitq);
++	thread_exit();
++}
++
++static int
++splat_atomic_test1_cond(atomic_priv_t *ap, int started)
++{
++	return (ap->ap_atomic_exited == started);
++}
++
++static int
++splat_atomic_test1(struct file *file, void *arg)
++{
++	atomic_priv_t ap;
++        DEFINE_WAIT(wait);
++	kthread_t *thr;
++	int i, rc = 0;
++
++	ap.ap_magic = SPLAT_ATOMIC_TEST_MAGIC;
++	ap.ap_file = file;
++	mutex_init(&ap.ap_lock);
++	init_waitqueue_head(&ap.ap_waitq);
++	ap.ap_atomic = SPLAT_ATOMIC_INIT_VALUE;
++	ap.ap_atomic_exited = 0;
++
++	for (i = 0; i < SPLAT_ATOMIC_COUNT_64; i++) {
++		mutex_lock(&ap.ap_lock);
++		ap.ap_op = i;
++
++		thr = (kthread_t *)thread_create(NULL, 0, splat_atomic_work,
++						 &ap, 0, &p0, TS_RUN,
++						 minclsyspri);
++		if (thr == NULL) {
++			rc = -ESRCH;
++			mutex_unlock(&ap.ap_lock);
++			break;
++		}
++
++		/* Prepare to wait, the new thread will wake us once it
++		 * has made a copy of the unique private passed data */
++                prepare_to_wait(&ap.ap_waitq, &wait, TASK_UNINTERRUPTIBLE);
++		mutex_unlock(&ap.ap_lock);
++		schedule();
++	}
++
++	wait_event(ap.ap_waitq, splat_atomic_test1_cond(&ap, i));
++
++	if (rc) {
++		splat_vprint(file, SPLAT_ATOMIC_TEST1_NAME, "Only started "
++			     "%d/%d test threads\n", i, SPLAT_ATOMIC_COUNT_64);
++		return rc;
++	}
++
++	if (ap.ap_atomic != SPLAT_ATOMIC_INIT_VALUE) {
++		splat_vprint(file, SPLAT_ATOMIC_TEST1_NAME,
++			     "Final value %lu does not match initial value %lu\n",
++			     (long unsigned)ap.ap_atomic, SPLAT_ATOMIC_INIT_VALUE);
++		return -EINVAL;
++	}
++
++        splat_vprint(file, SPLAT_ATOMIC_TEST1_NAME,
++	           "Success initial and final values match, %lu == %lu\n",
++	           (long unsigned)ap.ap_atomic, SPLAT_ATOMIC_INIT_VALUE);
++
++	return 0;
++}
++
++splat_subsystem_t *
++splat_atomic_init(void)
++{
++        splat_subsystem_t *sub;
++
++        sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++        if (sub == NULL)
++                return NULL;
++
++        memset(sub, 0, sizeof(*sub));
++        strncpy(sub->desc.name, SPLAT_ATOMIC_NAME, SPLAT_NAME_SIZE);
++        strncpy(sub->desc.desc, SPLAT_ATOMIC_DESC, SPLAT_DESC_SIZE);
++        INIT_LIST_HEAD(&sub->subsystem_list);
++        INIT_LIST_HEAD(&sub->test_list);
++        spin_lock_init(&sub->test_lock);
++        sub->desc.id = SPLAT_SUBSYSTEM_ATOMIC;
++
++        SPLAT_TEST_INIT(sub, SPLAT_ATOMIC_TEST1_NAME, SPLAT_ATOMIC_TEST1_DESC,
++                      SPLAT_ATOMIC_TEST1_ID, splat_atomic_test1);
++
++        return sub;
++}
++
++void
++splat_atomic_fini(splat_subsystem_t *sub)
++{
++        ASSERT(sub);
++        SPLAT_TEST_FINI(sub, SPLAT_ATOMIC_TEST1_ID);
++
++        kfree(sub);
++}
++
++int
++splat_atomic_id(void) {
++        return SPLAT_SUBSYSTEM_ATOMIC;
++}
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-condvar.c linux-3.2.33-go/spl/splat/splat-condvar.c
+--- linux-3.2.33-go.orig/spl/splat/splat-condvar.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-condvar.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,479 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Condition Variable Tests.
++\*****************************************************************************/
++
++#include <sys/condvar.h>
++#include "splat-internal.h"
++
++#define SPLAT_CONDVAR_NAME		"condvar"
++#define SPLAT_CONDVAR_DESC		"Kernel Condition Variable Tests"
++
++#define SPLAT_CONDVAR_TEST1_ID		0x0501
++#define SPLAT_CONDVAR_TEST1_NAME	"signal1"
++#define SPLAT_CONDVAR_TEST1_DESC	"Wake a single thread, cv_wait()/cv_signal()"
++
++#define SPLAT_CONDVAR_TEST2_ID		0x0502
++#define SPLAT_CONDVAR_TEST2_NAME	"broadcast1"
++#define SPLAT_CONDVAR_TEST2_DESC	"Wake all threads, cv_wait()/cv_broadcast()"
++
++#define SPLAT_CONDVAR_TEST3_ID		0x0503
++#define SPLAT_CONDVAR_TEST3_NAME	"signal2"
++#define SPLAT_CONDVAR_TEST3_DESC	"Wake a single thread, cv_wait_timeout()/cv_signal()"
++
++#define SPLAT_CONDVAR_TEST4_ID		0x0504
++#define SPLAT_CONDVAR_TEST4_NAME	"broadcast2"
++#define SPLAT_CONDVAR_TEST4_DESC	"Wake all threads, cv_wait_timeout()/cv_broadcast()"
++
++#define SPLAT_CONDVAR_TEST5_ID		0x0505
++#define SPLAT_CONDVAR_TEST5_NAME	"timeout"
++#define SPLAT_CONDVAR_TEST5_DESC	"Timeout thread, cv_wait_timeout()"
++
++#define SPLAT_CONDVAR_TEST_MAGIC	0x115599DDUL
++#define SPLAT_CONDVAR_TEST_NAME		"condvar_test"
++#define SPLAT_CONDVAR_TEST_COUNT	8
++
++typedef struct condvar_priv {
++        unsigned long cv_magic;
++        struct file *cv_file;
++	kcondvar_t cv_condvar;
++	kmutex_t cv_mtx;
++} condvar_priv_t;
++
++typedef struct condvar_thr {
++	int ct_id;
++	const char *ct_name;
++	condvar_priv_t *ct_cvp;
++	int ct_rc;
++} condvar_thr_t;
++
++int
++splat_condvar_test12_thread(void *arg)
++{
++	condvar_thr_t *ct = (condvar_thr_t *)arg;
++	condvar_priv_t *cv = ct->ct_cvp;
++	char name[16];
++
++	ASSERT(cv->cv_magic == SPLAT_CONDVAR_TEST_MAGIC);
++        snprintf(name, sizeof(name),"%s%d",SPLAT_CONDVAR_TEST_NAME,ct->ct_id);
++	daemonize(name);
++
++	mutex_enter(&cv->cv_mtx);
++	splat_vprint(cv->cv_file, ct->ct_name,
++	           "%s thread sleeping with %d waiters\n",
++		   name, atomic_read(&cv->cv_condvar.cv_waiters));
++	cv_wait(&cv->cv_condvar, &cv->cv_mtx);
++	splat_vprint(cv->cv_file, ct->ct_name,
++	           "%s thread woken %d waiters remain\n",
++		   name, atomic_read(&cv->cv_condvar.cv_waiters));
++	mutex_exit(&cv->cv_mtx);
++
++	return 0;
++}
++
++static int
++splat_condvar_test1(struct file *file, void *arg)
++{
++	int i, count = 0, rc = 0;
++	long pids[SPLAT_CONDVAR_TEST_COUNT];
++	condvar_thr_t ct[SPLAT_CONDVAR_TEST_COUNT];
++	condvar_priv_t cv;
++
++	cv.cv_magic = SPLAT_CONDVAR_TEST_MAGIC;
++	cv.cv_file = file;
++	mutex_init(&cv.cv_mtx, SPLAT_CONDVAR_TEST_NAME, MUTEX_DEFAULT, NULL);
++	cv_init(&cv.cv_condvar, NULL, CV_DEFAULT, NULL);
++
++	/* Create some threads, the exact number isn't important just as
++	 * long as we know how many we managed to create and should expect. */
++	for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) {
++		ct[i].ct_cvp = &cv;
++		ct[i].ct_id = i;
++		ct[i].ct_name = SPLAT_CONDVAR_TEST1_NAME;
++		ct[i].ct_rc = 0;
++
++		pids[i] = kernel_thread(splat_condvar_test12_thread, &ct[i], 0);
++		if (pids[i] >= 0)
++			count++;
++	}
++
++	/* Wait until all threads are waiting on the condition variable */
++	while (atomic_read(&cv.cv_condvar.cv_waiters) != count)
++		schedule();
++
++	/* Wake a single thread at a time, wait until it exits */
++	for (i = 1; i <= count; i++) {
++		cv_signal(&cv.cv_condvar);
++
++		while (atomic_read(&cv.cv_condvar.cv_waiters) > (count - i))
++			schedule();
++
++		/* Correct behavior 1 thread woken */
++		if (atomic_read(&cv.cv_condvar.cv_waiters) == (count - i))
++			continue;
++
++                splat_vprint(file, SPLAT_CONDVAR_TEST1_NAME, "Attempted to "
++			   "wake %d thread but work %d threads woke\n",
++			   1, count - atomic_read(&cv.cv_condvar.cv_waiters));
++		rc = -EINVAL;
++		break;
++	}
++
++	if (!rc)
++                splat_vprint(file, SPLAT_CONDVAR_TEST1_NAME, "Correctly woke "
++			   "%d sleeping threads %d at a time\n", count, 1);
++
++	/* Wait until that last nutex is dropped */
++	while (mutex_owner(&cv.cv_mtx))
++		schedule();
++
++	/* Wake everything for the failure case */
++	cv_broadcast(&cv.cv_condvar);
++	cv_destroy(&cv.cv_condvar);
++	mutex_destroy(&cv.cv_mtx);
++
++	return rc;
++}
++
++static int
++splat_condvar_test2(struct file *file, void *arg)
++{
++	int i, count = 0, rc = 0;
++	long pids[SPLAT_CONDVAR_TEST_COUNT];
++	condvar_thr_t ct[SPLAT_CONDVAR_TEST_COUNT];
++	condvar_priv_t cv;
++
++	cv.cv_magic = SPLAT_CONDVAR_TEST_MAGIC;
++	cv.cv_file = file;
++	mutex_init(&cv.cv_mtx, SPLAT_CONDVAR_TEST_NAME, MUTEX_DEFAULT, NULL);
++	cv_init(&cv.cv_condvar, NULL, CV_DEFAULT, NULL);
++
++	/* Create some threads, the exact number isn't important just as
++	 * long as we know how many we managed to create and should expect. */
++	for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) {
++		ct[i].ct_cvp = &cv;
++		ct[i].ct_id = i;
++		ct[i].ct_name = SPLAT_CONDVAR_TEST2_NAME;
++		ct[i].ct_rc = 0;
++
++		pids[i] = kernel_thread(splat_condvar_test12_thread, &ct[i], 0);
++		if (pids[i] > 0)
++			count++;
++	}
++
++	/* Wait until all threads are waiting on the condition variable */
++	while (atomic_read(&cv.cv_condvar.cv_waiters) != count)
++		schedule();
++
++	/* Wake all threads waiting on the condition variable */
++	cv_broadcast(&cv.cv_condvar);
++
++	/* Wait until all threads have exited */
++	while ((atomic_read(&cv.cv_condvar.cv_waiters) > 0) || mutex_owner(&cv.cv_mtx))
++		schedule();
++
++        splat_vprint(file, SPLAT_CONDVAR_TEST2_NAME, "Correctly woke all "
++			   "%d sleeping threads at once\n", count);
++
++	/* Wake everything for the failure case */
++	cv_destroy(&cv.cv_condvar);
++	mutex_destroy(&cv.cv_mtx);
++
++	return rc;
++}
++
++int
++splat_condvar_test34_thread(void *arg)
++{
++	condvar_thr_t *ct = (condvar_thr_t *)arg;
++	condvar_priv_t *cv = ct->ct_cvp;
++	char name[16];
++	clock_t rc;
++
++	ASSERT(cv->cv_magic == SPLAT_CONDVAR_TEST_MAGIC);
++        snprintf(name, sizeof(name), "%s%d", SPLAT_CONDVAR_TEST_NAME, ct->ct_id);
++	daemonize(name);
++
++	mutex_enter(&cv->cv_mtx);
++	splat_vprint(cv->cv_file, ct->ct_name,
++	           "%s thread sleeping with %d waiters\n",
++		   name, atomic_read(&cv->cv_condvar.cv_waiters));
++
++	/* Sleep no longer than 3 seconds, for this test we should
++	 * actually never sleep that long without being woken up. */
++	rc = cv_timedwait(&cv->cv_condvar, &cv->cv_mtx, lbolt + HZ * 3);
++	if (rc == -1) {
++		ct->ct_rc = -ETIMEDOUT;
++		splat_vprint(cv->cv_file, ct->ct_name, "%s thread timed out, "
++		           "should have been woken\n", name);
++	} else {
++		splat_vprint(cv->cv_file, ct->ct_name,
++		           "%s thread woken %d waiters remain\n",
++			   name, atomic_read(&cv->cv_condvar.cv_waiters));
++	}
++
++	mutex_exit(&cv->cv_mtx);
++
++	return 0;
++}
++
++static int
++splat_condvar_test3(struct file *file, void *arg)
++{
++	int i, count = 0, rc = 0;
++	long pids[SPLAT_CONDVAR_TEST_COUNT];
++	condvar_thr_t ct[SPLAT_CONDVAR_TEST_COUNT];
++	condvar_priv_t cv;
++
++	cv.cv_magic = SPLAT_CONDVAR_TEST_MAGIC;
++	cv.cv_file = file;
++	mutex_init(&cv.cv_mtx, SPLAT_CONDVAR_TEST_NAME, MUTEX_DEFAULT, NULL);
++	cv_init(&cv.cv_condvar, NULL, CV_DEFAULT, NULL);
++
++	/* Create some threads, the exact number isn't important just as
++	 * long as we know how many we managed to create and should expect. */
++	for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) {
++		ct[i].ct_cvp = &cv;
++		ct[i].ct_id = i;
++		ct[i].ct_name = SPLAT_CONDVAR_TEST3_NAME;
++		ct[i].ct_rc = 0;
++
++		pids[i] = kernel_thread(splat_condvar_test34_thread, &ct[i], 0);
++		if (pids[i] >= 0)
++			count++;
++	}
++
++	/* Wait until all threads are waiting on the condition variable */
++	while (atomic_read(&cv.cv_condvar.cv_waiters) != count)
++		schedule();
++
++	/* Wake a single thread at a time, wait until it exits */
++	for (i = 1; i <= count; i++) {
++		cv_signal(&cv.cv_condvar);
++
++		while (atomic_read(&cv.cv_condvar.cv_waiters) > (count - i))
++			schedule();
++
++		/* Correct behavior 1 thread woken */
++		if (atomic_read(&cv.cv_condvar.cv_waiters) == (count - i))
++			continue;
++
++                splat_vprint(file, SPLAT_CONDVAR_TEST3_NAME, "Attempted to "
++			   "wake %d thread but work %d threads woke\n",
++			   1, count - atomic_read(&cv.cv_condvar.cv_waiters));
++		rc = -EINVAL;
++		break;
++	}
++
++	/* Validate no waiting thread timed out early */
++	for (i = 0; i < count; i++)
++		if (ct[i].ct_rc)
++			rc = ct[i].ct_rc;
++
++	if (!rc)
++                splat_vprint(file, SPLAT_CONDVAR_TEST3_NAME, "Correctly woke "
++			   "%d sleeping threads %d at a time\n", count, 1);
++
++	/* Wait until that last nutex is dropped */
++	while (mutex_owner(&cv.cv_mtx))
++		schedule();
++
++	/* Wake everything for the failure case */
++	cv_broadcast(&cv.cv_condvar);
++	cv_destroy(&cv.cv_condvar);
++	mutex_destroy(&cv.cv_mtx);
++
++	return rc;
++}
++
++static int
++splat_condvar_test4(struct file *file, void *arg)
++{
++	int i, count = 0, rc = 0;
++	long pids[SPLAT_CONDVAR_TEST_COUNT];
++	condvar_thr_t ct[SPLAT_CONDVAR_TEST_COUNT];
++	condvar_priv_t cv;
++
++	cv.cv_magic = SPLAT_CONDVAR_TEST_MAGIC;
++	cv.cv_file = file;
++	mutex_init(&cv.cv_mtx, SPLAT_CONDVAR_TEST_NAME, MUTEX_DEFAULT, NULL);
++	cv_init(&cv.cv_condvar, NULL, CV_DEFAULT, NULL);
++
++	/* Create some threads, the exact number isn't important just as
++	 * long as we know how many we managed to create and should expect. */
++	for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) {
++		ct[i].ct_cvp = &cv;
++		ct[i].ct_id = i;
++		ct[i].ct_name = SPLAT_CONDVAR_TEST3_NAME;
++		ct[i].ct_rc = 0;
++
++		pids[i] = kernel_thread(splat_condvar_test34_thread, &ct[i], 0);
++		if (pids[i] >= 0)
++			count++;
++	}
++
++	/* Wait until all threads are waiting on the condition variable */
++	while (atomic_read(&cv.cv_condvar.cv_waiters) != count)
++		schedule();
++
++	/* Wake a single thread at a time, wait until it exits */
++	for (i = 1; i <= count; i++) {
++		cv_signal(&cv.cv_condvar);
++
++		while (atomic_read(&cv.cv_condvar.cv_waiters) > (count - i))
++			schedule();
++
++		/* Correct behavior 1 thread woken */
++		if (atomic_read(&cv.cv_condvar.cv_waiters) == (count - i))
++			continue;
++
++                splat_vprint(file, SPLAT_CONDVAR_TEST3_NAME, "Attempted to "
++			   "wake %d thread but work %d threads woke\n",
++			   1, count - atomic_read(&cv.cv_condvar.cv_waiters));
++		rc = -EINVAL;
++		break;
++	}
++
++	/* Validate no waiting thread timed out early */
++	for (i = 0; i < count; i++)
++		if (ct[i].ct_rc)
++			rc = ct[i].ct_rc;
++
++	if (!rc)
++                splat_vprint(file, SPLAT_CONDVAR_TEST3_NAME, "Correctly woke "
++			   "%d sleeping threads %d at a time\n", count, 1);
++
++	/* Wait until that last nutex is dropped */
++	while (mutex_owner(&cv.cv_mtx))
++		schedule();
++
++	/* Wake everything for the failure case */
++	cv_broadcast(&cv.cv_condvar);
++	cv_destroy(&cv.cv_condvar);
++	mutex_destroy(&cv.cv_mtx);
++
++	return rc;
++}
++
++static int
++splat_condvar_test5(struct file *file, void *arg)
++{
++        kcondvar_t condvar;
++        kmutex_t mtx;
++	clock_t time_left, time_before, time_after, time_delta;
++	int64_t whole_delta;
++	int32_t remain_delta;
++	int rc = 0;
++
++	mutex_init(&mtx, SPLAT_CONDVAR_TEST_NAME, MUTEX_DEFAULT, NULL);
++	cv_init(&condvar, NULL, CV_DEFAULT, NULL);
++
++        splat_vprint(file, SPLAT_CONDVAR_TEST5_NAME, "Thread going to sleep for "
++	           "%d second and expecting to be woken by timeout\n", 1);
++
++	/* Allow a 1 second timeout, plenty long to validate correctness. */
++	time_before = lbolt;
++	mutex_enter(&mtx);
++	time_left = cv_timedwait(&condvar, &mtx, lbolt + HZ);
++	mutex_exit(&mtx);
++	time_after = lbolt;
++	time_delta = time_after - time_before; /* XXX - Handle jiffie wrap */
++	whole_delta  = time_delta;
++	remain_delta = do_div(whole_delta, HZ);
++
++	if (time_left == -1) {
++		if (time_delta >= HZ) {
++			splat_vprint(file, SPLAT_CONDVAR_TEST5_NAME,
++			           "Thread correctly timed out and was asleep "
++			           "for %d.%d seconds (%d second min)\n",
++			           (int)whole_delta, remain_delta, 1);
++		} else {
++			splat_vprint(file, SPLAT_CONDVAR_TEST5_NAME,
++			           "Thread correctly timed out but was only "
++			           "asleep for %d.%d seconds (%d second "
++			           "min)\n", (int)whole_delta, remain_delta, 1);
++			rc = -ETIMEDOUT;
++		}
++	} else {
++		splat_vprint(file, SPLAT_CONDVAR_TEST5_NAME,
++		           "Thread exited after only %d.%d seconds, it "
++		           "did not hit the %d second timeout\n",
++		           (int)whole_delta, remain_delta, 1);
++		rc = -ETIMEDOUT;
++	}
++
++	cv_destroy(&condvar);
++	mutex_destroy(&mtx);
++
++	return rc;
++}
++
++splat_subsystem_t *
++splat_condvar_init(void)
++{
++        splat_subsystem_t *sub;
++
++        sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++        if (sub == NULL)
++                return NULL;
++
++        memset(sub, 0, sizeof(*sub));
++        strncpy(sub->desc.name, SPLAT_CONDVAR_NAME, SPLAT_NAME_SIZE);
++        strncpy(sub->desc.desc, SPLAT_CONDVAR_DESC, SPLAT_DESC_SIZE);
++        INIT_LIST_HEAD(&sub->subsystem_list);
++        INIT_LIST_HEAD(&sub->test_list);
++        spin_lock_init(&sub->test_lock);
++        sub->desc.id = SPLAT_SUBSYSTEM_CONDVAR;
++
++        SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST1_NAME, SPLAT_CONDVAR_TEST1_DESC,
++                      SPLAT_CONDVAR_TEST1_ID, splat_condvar_test1);
++        SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST2_NAME, SPLAT_CONDVAR_TEST2_DESC,
++                      SPLAT_CONDVAR_TEST2_ID, splat_condvar_test2);
++        SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST3_NAME, SPLAT_CONDVAR_TEST3_DESC,
++                      SPLAT_CONDVAR_TEST3_ID, splat_condvar_test3);
++        SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST4_NAME, SPLAT_CONDVAR_TEST4_DESC,
++                      SPLAT_CONDVAR_TEST4_ID, splat_condvar_test4);
++        SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST5_NAME, SPLAT_CONDVAR_TEST5_DESC,
++                      SPLAT_CONDVAR_TEST5_ID, splat_condvar_test5);
++
++        return sub;
++}
++
++void
++splat_condvar_fini(splat_subsystem_t *sub)
++{
++        ASSERT(sub);
++        SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST5_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST4_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST3_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST2_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST1_ID);
++
++        kfree(sub);
++}
++
++int
++splat_condvar_id(void) {
++        return SPLAT_SUBSYSTEM_CONDVAR;
++}
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-cred.c linux-3.2.33-go/spl/splat/splat-cred.c
+--- linux-3.2.33-go.orig/spl/splat/splat-cred.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-cred.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,250 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Credential Tests.
++\*****************************************************************************/
++
++#include <sys/cred.h>
++#include "splat-internal.h"
++
++#define SPLAT_CRED_NAME			"cred"
++#define SPLAT_CRED_DESC			"Kernel Cred Tests"
++
++#define SPLAT_CRED_TEST1_ID		0x0e01
++#define SPLAT_CRED_TEST1_NAME		"cred"
++#define SPLAT_CRED_TEST1_DESC		"Task Credential Test"
++
++#define SPLAT_CRED_TEST2_ID		0x0e02
++#define SPLAT_CRED_TEST2_NAME		"kcred"
++#define SPLAT_CRED_TEST2_DESC		"Kernel Credential Test"
++
++#define SPLAT_CRED_TEST3_ID		0x0e03
++#define SPLAT_CRED_TEST3_NAME		"groupmember"
++#define SPLAT_CRED_TEST3_DESC		"Group Member Test"
++
++#define GROUP_STR_SIZE			128
++#define GROUP_STR_REDZONE		16
++
++static int
++splat_cred_test1(struct file *file, void *arg)
++{
++	char str[GROUP_STR_SIZE];
++	uid_t uid, ruid, suid;
++	gid_t gid, rgid, sgid, *groups;
++	int ngroups, i, count = 0;
++
++	uid  = crgetuid(CRED());
++	ruid = crgetruid(CRED());
++	suid = crgetsuid(CRED());
++
++	gid  = crgetgid(CRED());
++	rgid = crgetrgid(CRED());
++	sgid = crgetsgid(CRED());
++
++	crhold(CRED());
++	ngroups = crgetngroups(CRED());
++	groups  = crgetgroups(CRED());
++
++	memset(str, 0, GROUP_STR_SIZE);
++	for (i = 0; i < ngroups; i++) {
++		count += sprintf(str + count, "%d ", groups[i]);
++
++		if (count > (GROUP_STR_SIZE - GROUP_STR_REDZONE)) {
++			splat_vprint(file, SPLAT_CRED_TEST1_NAME,
++				     "Failed too many group entries for temp "
++				     "buffer: %d, %s\n", ngroups, str);
++			return -ENOSPC;
++		}
++	}
++
++	crfree(CRED());
++
++	splat_vprint(file, SPLAT_CRED_TEST1_NAME,
++		     "uid: %d ruid: %d suid: %d "
++		     "gid: %d rgid: %d sgid: %d\n",
++		     uid, ruid, suid, gid, rgid, sgid);
++	splat_vprint(file, SPLAT_CRED_TEST1_NAME,
++		     "ngroups: %d groups: %s\n", ngroups, str);
++
++	if (uid || ruid || suid || gid || rgid || sgid) {
++		splat_vprint(file, SPLAT_CRED_TEST1_NAME,
++			     "Failed expected all uids+gids to be %d\n", 0);
++		return -EIDRM;
++	}
++
++	if (ngroups > NGROUPS_MAX) {
++		splat_vprint(file, SPLAT_CRED_TEST1_NAME,
++			     "Failed ngroups must not exceed NGROUPS_MAX: "
++			     "%d > %d\n", ngroups, NGROUPS_MAX);
++		return -EIDRM;
++	}
++
++	splat_vprint(file, SPLAT_CRED_TEST1_NAME,
++		     "Success sane CRED(): %d\n", 0);
++
++        return 0;
++} /* splat_cred_test1() */
++
++static int
++splat_cred_test2(struct file *file, void *arg)
++{
++	char str[GROUP_STR_SIZE];
++	uid_t uid, ruid, suid;
++	gid_t gid, rgid, sgid, *groups;
++	int ngroups, i, count = 0;
++
++	uid  = crgetuid(kcred);
++	ruid = crgetruid(kcred);
++	suid = crgetsuid(kcred);
++
++	gid  = crgetgid(kcred);
++	rgid = crgetrgid(kcred);
++	sgid = crgetsgid(kcred);
++
++	crhold(kcred);
++	ngroups = crgetngroups(kcred);
++	groups  = crgetgroups(kcred);
++
++	memset(str, 0, GROUP_STR_SIZE);
++	for (i = 0; i < ngroups; i++) {
++		count += sprintf(str + count, "%d ", groups[i]);
++
++		if (count > (GROUP_STR_SIZE - GROUP_STR_REDZONE)) {
++			splat_vprint(file, SPLAT_CRED_TEST2_NAME,
++				     "Failed too many group entries for temp "
++				     "buffer: %d, %s\n", ngroups, str);
++			return -ENOSPC;
++		}
++	}
++
++	crfree(kcred);
++
++	splat_vprint(file, SPLAT_CRED_TEST2_NAME,
++		     "uid: %d ruid: %d suid: %d "
++		     "gid: %d rgid: %d sgid: %d\n",
++		     uid, ruid, suid, gid, rgid, sgid);
++	splat_vprint(file, SPLAT_CRED_TEST2_NAME,
++		     "ngroups: %d groups: %s\n", ngroups, str);
++
++	if (uid || ruid || suid || gid || rgid || sgid) {
++		splat_vprint(file, SPLAT_CRED_TEST2_NAME,
++			     "Failed expected all uids+gids to be %d\n", 0);
++		return -EIDRM;
++	}
++
++	if (ngroups > NGROUPS_MAX) {
++		splat_vprint(file, SPLAT_CRED_TEST2_NAME,
++			     "Failed ngroups must not exceed NGROUPS_MAX: "
++			     "%d > %d\n", ngroups, NGROUPS_MAX);
++		return -EIDRM;
++	}
++
++	splat_vprint(file, SPLAT_CRED_TEST2_NAME,
++		     "Success sane kcred: %d\n", 0);
++
++        return 0;
++} /* splat_cred_test2() */
++
++/*
++ * On most/all systems it can be expected that a task with root
++ * permissions also is a member of the root group,  Since the
++ * test suite is always run as root we check first that CRED() is
++ * a member of the root group, and secondly that it is not a member
++ * of our fake group.  This test will break is someone happens to
++ * create group number NGROUPS_MAX-1 and then added root to it.
++ */
++static int
++splat_cred_test3(struct file *file, void *arg)
++{
++	gid_t root_gid, fake_gid;
++	int rc;
++
++	root_gid = 0;
++	fake_gid = NGROUPS_MAX-1;
++
++	rc = groupmember(root_gid, CRED());
++	if (!rc) {
++		splat_vprint(file, SPLAT_CRED_TEST3_NAME,
++			     "Failed root git %d expected to be member "
++			     "of CRED() groups: %d\n", root_gid, rc);
++		return -EIDRM;
++	}
++
++	rc = groupmember(fake_gid, CRED());
++	if (rc) {
++		splat_vprint(file, SPLAT_CRED_TEST3_NAME,
++			     "Failed fake git %d expected not to be member "
++			     "of CRED() groups: %d\n", fake_gid, rc);
++		return -EIDRM;
++	}
++
++	splat_vprint(file, SPLAT_CRED_TEST3_NAME, "Success root gid "
++		     "is a member of the expected groups: %d\n", rc);
++
++	return rc;
++} /* splat_cred_test3() */
++
++splat_subsystem_t *
++splat_cred_init(void)
++{
++        splat_subsystem_t *sub;
++
++        sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++        if (sub == NULL)
++                return NULL;
++
++        memset(sub, 0, sizeof(*sub));
++        strncpy(sub->desc.name, SPLAT_CRED_NAME, SPLAT_NAME_SIZE);
++	strncpy(sub->desc.desc, SPLAT_CRED_DESC, SPLAT_DESC_SIZE);
++        INIT_LIST_HEAD(&sub->subsystem_list);
++	INIT_LIST_HEAD(&sub->test_list);
++        spin_lock_init(&sub->test_lock);
++        sub->desc.id = SPLAT_SUBSYSTEM_CRED;
++
++        SPLAT_TEST_INIT(sub, SPLAT_CRED_TEST1_NAME, SPLAT_CRED_TEST1_DESC,
++	              SPLAT_CRED_TEST1_ID, splat_cred_test1);
++        SPLAT_TEST_INIT(sub, SPLAT_CRED_TEST2_NAME, SPLAT_CRED_TEST2_DESC,
++	              SPLAT_CRED_TEST2_ID, splat_cred_test2);
++        SPLAT_TEST_INIT(sub, SPLAT_CRED_TEST3_NAME, SPLAT_CRED_TEST3_DESC,
++	              SPLAT_CRED_TEST3_ID, splat_cred_test3);
++
++        return sub;
++} /* splat_cred_init() */
++
++void
++splat_cred_fini(splat_subsystem_t *sub)
++{
++        ASSERT(sub);
++
++        SPLAT_TEST_FINI(sub, SPLAT_CRED_TEST3_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_CRED_TEST2_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_CRED_TEST1_ID);
++
++        kfree(sub);
++} /* splat_cred_fini() */
++
++int
++splat_cred_id(void)
++{
++        return SPLAT_SUBSYSTEM_CRED;
++} /* splat_cred_id() */
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-ctl.c linux-3.2.33-go/spl/splat/splat-ctl.c
+--- linux-3.2.33-go.orig/spl/splat/splat-ctl.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-ctl.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,723 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Test Control Interface.
++ *
++ *  The 'splat' (Solaris Porting LAyer Tests) module is designed as a
++ *  framework which runs various in kernel regression tests to validate
++ *  the SPL primitives honor the Solaris ABI.
++ *
++ *  The splat module is constructed of various splat_* source files each
++ *  of which contain regression tests for a particular subsystem.  For
++ *  example, the splat_kmem.c file contains all the tests for validating
++ *  the kmem interfaces have been implemented correctly.  When the splat
++ *  module is loaded splat_*_init() will be called for each subsystems
++ *  tests.  It is the responsibility of splat_*_init() to register all
++ *  the tests for this subsystem using the SPLAT_TEST_INIT() macro.
++ *  Similarly splat_*_fini() is called when the splat module is removed
++ *  and is responsible for unregistering its tests via the SPLAT_TEST_FINI
++ *  macro.  Once a test is registered it can then be run with an ioctl()
++ *  call which specifies the subsystem and test to be run.  The provided
++ *  splat command line tool can be used to display all available
++ *  subsystems and tests.  It can also be used to run the full suite
++ *  of regression tests or particular tests.
++\*****************************************************************************/
++
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/cdev.h>
++#include <linux/fs.h>
++#include <linux/uaccess.h>
++#include <sys/types.h>
++#include <sys/debug.h>
++#include "splat-internal.h"
++
++static spl_class *splat_class;
++static spl_device *splat_device;
++static struct list_head splat_module_list;
++static spinlock_t splat_module_lock;
++
++static int
++splat_open(struct inode *inode, struct file *file)
++{
++	unsigned int minor = iminor(inode);
++	splat_info_t *info;
++
++	if (minor >= SPLAT_MINORS)
++		return -ENXIO;
++
++	info = (splat_info_t *)kmalloc(sizeof(*info), GFP_KERNEL);
++	if (info == NULL)
++		return -ENOMEM;
++
++	mutex_init(&info->info_lock);
++	info->info_size = SPLAT_INFO_BUFFER_SIZE;
++	info->info_buffer = (char *)vmalloc(SPLAT_INFO_BUFFER_SIZE);
++	if (info->info_buffer == NULL) {
++		kfree(info);
++		return -ENOMEM;
++	}
++	memset(info->info_buffer, 0, info->info_size);
++
++	info->info_head = info->info_buffer;
++	file->private_data = (void *)info;
++
++	splat_print(file, "%s\n", spl_version);
++
++	return 0;
++}
++
++static int
++splat_release(struct inode *inode, struct file *file)
++{
++	unsigned int minor = iminor(inode);
++	splat_info_t *info = (splat_info_t *)file->private_data;
++
++	if (minor >= SPLAT_MINORS)
++		return -ENXIO;
++
++	ASSERT(info);
++	ASSERT(info->info_buffer);
++
++	mutex_destroy(&info->info_lock);
++	vfree(info->info_buffer);
++	kfree(info);
++
++	return 0;
++}
++
++static int
++splat_buffer_clear(struct file *file, splat_cfg_t *kcfg, unsigned long arg)
++{
++	splat_info_t *info = (splat_info_t *)file->private_data;
++
++	ASSERT(info);
++	ASSERT(info->info_buffer);
++
++	mutex_lock(&info->info_lock);
++	memset(info->info_buffer, 0, info->info_size);
++	info->info_head = info->info_buffer;
++	mutex_unlock(&info->info_lock);
++
++	return 0;
++}
++
++static int
++splat_buffer_size(struct file *file, splat_cfg_t *kcfg, unsigned long arg)
++{
++	splat_info_t *info = (splat_info_t *)file->private_data;
++	char *buf;
++	int min, size, rc = 0;
++
++	ASSERT(info);
++	ASSERT(info->info_buffer);
++
++	mutex_lock(&info->info_lock);
++	if (kcfg->cfg_arg1 > 0) {
++
++		size = kcfg->cfg_arg1;
++		buf = (char *)vmalloc(size);
++		if (buf == NULL) {
++			rc = -ENOMEM;
++			goto out;
++		}
++
++		/* Zero fill and truncate contents when coping buffer */
++		min = ((size < info->info_size) ? size : info->info_size);
++		memset(buf, 0, size);
++		memcpy(buf, info->info_buffer, min);
++		vfree(info->info_buffer);
++		info->info_size = size;
++		info->info_buffer = buf;
++		info->info_head = info->info_buffer;
++	}
++
++	kcfg->cfg_rc1 = info->info_size;
++
++	if (copy_to_user((struct splat_cfg_t __user *)arg, kcfg, sizeof(*kcfg)))
++		rc = -EFAULT;
++out:
++	mutex_unlock(&info->info_lock);
++
++	return rc;
++}
++
++
++static splat_subsystem_t *
++splat_subsystem_find(int id) {
++	splat_subsystem_t *sub;
++
++        spin_lock(&splat_module_lock);
++        list_for_each_entry(sub, &splat_module_list, subsystem_list) {
++		if (id == sub->desc.id) {
++		        spin_unlock(&splat_module_lock);
++			return sub;
++		}
++        }
++        spin_unlock(&splat_module_lock);
++
++	return NULL;
++}
++
++static int
++splat_subsystem_count(splat_cfg_t *kcfg, unsigned long arg)
++{
++	splat_subsystem_t *sub;
++	int i = 0;
++
++        spin_lock(&splat_module_lock);
++        list_for_each_entry(sub, &splat_module_list, subsystem_list)
++		i++;
++
++        spin_unlock(&splat_module_lock);
++	kcfg->cfg_rc1 = i;
++
++	if (copy_to_user((struct splat_cfg_t __user *)arg, kcfg, sizeof(*kcfg)))
++		return -EFAULT;
++
++	return 0;
++}
++
++static int
++splat_subsystem_list(splat_cfg_t *kcfg, unsigned long arg)
++{
++	splat_subsystem_t *sub;
++	splat_cfg_t *tmp;
++	int size, i = 0;
++
++	/* Structure will be sized large enough for N subsystem entries
++	 * which is passed in by the caller.  On exit the number of
++	 * entries filled in with valid subsystems will be stored in
++	 * cfg_rc1.  If the caller does not provide enough entries
++	 * for all subsystems we will truncate the list to avoid overrun.
++	 */
++	size = sizeof(*tmp) + kcfg->cfg_data.splat_subsystems.size *
++	       sizeof(splat_user_t);
++	tmp = kmalloc(size, GFP_KERNEL);
++	if (tmp == NULL)
++		return -ENOMEM;
++
++	/* Local 'tmp' is used as the structure copied back to user space */
++	memset(tmp, 0, size);
++	memcpy(tmp, kcfg, sizeof(*kcfg));
++
++        spin_lock(&splat_module_lock);
++        list_for_each_entry(sub, &splat_module_list, subsystem_list) {
++		strncpy(tmp->cfg_data.splat_subsystems.descs[i].name,
++		        sub->desc.name, SPLAT_NAME_SIZE);
++		strncpy(tmp->cfg_data.splat_subsystems.descs[i].desc,
++		        sub->desc.desc, SPLAT_DESC_SIZE);
++		tmp->cfg_data.splat_subsystems.descs[i].id = sub->desc.id;
++
++		/* Truncate list if we are about to overrun alloc'ed memory */
++		if ((i++) == kcfg->cfg_data.splat_subsystems.size)
++			break;
++        }
++        spin_unlock(&splat_module_lock);
++	tmp->cfg_rc1 = i;
++
++	if (copy_to_user((struct splat_cfg_t __user *)arg, tmp, size)) {
++		kfree(tmp);
++		return -EFAULT;
++	}
++
++	kfree(tmp);
++	return 0;
++}
++
++static int
++splat_test_count(splat_cfg_t *kcfg, unsigned long arg)
++{
++	splat_subsystem_t *sub;
++	splat_test_t *test;
++	int i = 0;
++
++	/* Subsystem ID passed as arg1 */
++	sub = splat_subsystem_find(kcfg->cfg_arg1);
++	if (sub == NULL)
++		return -EINVAL;
++
++        spin_lock(&(sub->test_lock));
++        list_for_each_entry(test, &(sub->test_list), test_list)
++		i++;
++
++        spin_unlock(&(sub->test_lock));
++	kcfg->cfg_rc1 = i;
++
++	if (copy_to_user((struct splat_cfg_t __user *)arg, kcfg, sizeof(*kcfg)))
++		return -EFAULT;
++
++	return 0;
++}
++
++static int
++splat_test_list(splat_cfg_t *kcfg, unsigned long arg)
++{
++	splat_subsystem_t *sub;
++	splat_test_t *test;
++	splat_cfg_t *tmp;
++	int size, i = 0;
++
++	/* Subsystem ID passed as arg1 */
++	sub = splat_subsystem_find(kcfg->cfg_arg1);
++	if (sub == NULL)
++		return -EINVAL;
++
++	/* Structure will be sized large enough for N test entries
++	 * which is passed in by the caller.  On exit the number of
++	 * entries filled in with valid tests will be stored in
++	 * cfg_rc1.  If the caller does not provide enough entries
++	 * for all tests we will truncate the list to avoid overrun.
++	 */
++	size = sizeof(*tmp)+kcfg->cfg_data.splat_tests.size*sizeof(splat_user_t);
++	tmp = kmalloc(size, GFP_KERNEL);
++	if (tmp == NULL)
++		return -ENOMEM;
++
++	/* Local 'tmp' is used as the structure copied back to user space */
++	memset(tmp, 0, size);
++	memcpy(tmp, kcfg, sizeof(*kcfg));
++
++        spin_lock(&(sub->test_lock));
++        list_for_each_entry(test, &(sub->test_list), test_list) {
++		strncpy(tmp->cfg_data.splat_tests.descs[i].name,
++		        test->desc.name, SPLAT_NAME_SIZE);
++		strncpy(tmp->cfg_data.splat_tests.descs[i].desc,
++		        test->desc.desc, SPLAT_DESC_SIZE);
++		tmp->cfg_data.splat_tests.descs[i].id = test->desc.id;
++
++		/* Truncate list if we are about to overrun alloc'ed memory */
++		if ((i++) == kcfg->cfg_data.splat_tests.size)
++			break;
++        }
++        spin_unlock(&(sub->test_lock));
++	tmp->cfg_rc1 = i;
++
++	if (copy_to_user((struct splat_cfg_t __user *)arg, tmp, size)) {
++		kfree(tmp);
++		return -EFAULT;
++	}
++
++	kfree(tmp);
++	return 0;
++}
++
++static int
++splat_validate(struct file *file, splat_subsystem_t *sub, int cmd, void *arg)
++{
++        splat_test_t *test;
++
++        spin_lock(&(sub->test_lock));
++        list_for_each_entry(test, &(sub->test_list), test_list) {
++                if (test->desc.id == cmd) {
++			spin_unlock(&(sub->test_lock));
++                        return test->test(file, arg);
++                }
++        }
++        spin_unlock(&(sub->test_lock));
++
++        return -EINVAL;
++}
++
++static int
++splat_ioctl_cfg(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	splat_cfg_t kcfg;
++	int rc = 0;
++
++	/* User and kernel space agree about arg size */
++	if (_IOC_SIZE(cmd) != sizeof(kcfg))
++		return -EBADMSG;
++
++	if (copy_from_user(&kcfg, (splat_cfg_t *)arg, sizeof(kcfg)))
++		return -EFAULT;
++
++	if (kcfg.cfg_magic != SPLAT_CFG_MAGIC) {
++		splat_print(file, "Bad config magic 0x%x != 0x%x\n",
++		          kcfg.cfg_magic, SPLAT_CFG_MAGIC);
++		return -EINVAL;
++	}
++
++	switch (kcfg.cfg_cmd) {
++		case SPLAT_CFG_BUFFER_CLEAR:
++			/* cfg_arg1 - Unused
++			 * cfg_rc1  - Unused
++			 */
++			rc = splat_buffer_clear(file, &kcfg, arg);
++			break;
++		case SPLAT_CFG_BUFFER_SIZE:
++			/* cfg_arg1 - 0 - query size; >0 resize
++			 * cfg_rc1  - Set to current buffer size
++			 */
++			rc = splat_buffer_size(file, &kcfg, arg);
++			break;
++		case SPLAT_CFG_SUBSYSTEM_COUNT:
++			/* cfg_arg1 - Unused
++			 * cfg_rc1  - Set to number of subsystems
++			 */
++			rc = splat_subsystem_count(&kcfg, arg);
++			break;
++		case SPLAT_CFG_SUBSYSTEM_LIST:
++			/* cfg_arg1 - Unused
++			 * cfg_rc1  - Set to number of subsystems
++			 * cfg_data.splat_subsystems - Set with subsystems
++			 */
++			rc = splat_subsystem_list(&kcfg, arg);
++			break;
++		case SPLAT_CFG_TEST_COUNT:
++			/* cfg_arg1 - Set to a target subsystem
++			 * cfg_rc1  - Set to number of tests
++			 */
++			rc = splat_test_count(&kcfg, arg);
++			break;
++		case SPLAT_CFG_TEST_LIST:
++			/* cfg_arg1 - Set to a target subsystem
++			 * cfg_rc1  - Set to number of tests
++			 * cfg_data.splat_subsystems - Populated with tests
++			 */
++			rc = splat_test_list(&kcfg, arg);
++			break;
++		default:
++			splat_print(file, "Bad config command %d\n",
++				    kcfg.cfg_cmd);
++			rc = -EINVAL;
++			break;
++	}
++
++	return rc;
++}
++
++static int
++splat_ioctl_cmd(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	splat_subsystem_t *sub;
++	splat_cmd_t kcmd;
++	int rc = -EINVAL;
++	void *data = NULL;
++
++	/* User and kernel space agree about arg size */
++	if (_IOC_SIZE(cmd) != sizeof(kcmd))
++		return -EBADMSG;
++
++	if (copy_from_user(&kcmd, (splat_cfg_t *)arg, sizeof(kcmd)))
++		return -EFAULT;
++
++	if (kcmd.cmd_magic != SPLAT_CMD_MAGIC) {
++		splat_print(file, "Bad command magic 0x%x != 0x%x\n",
++		          kcmd.cmd_magic, SPLAT_CFG_MAGIC);
++		return -EINVAL;
++	}
++
++	/* Allocate memory for any opaque data the caller needed to pass on */
++	if (kcmd.cmd_data_size > 0) {
++		data = (void *)kmalloc(kcmd.cmd_data_size, GFP_KERNEL);
++		if (data == NULL)
++			return -ENOMEM;
++
++		if (copy_from_user(data, (void *)(arg + offsetof(splat_cmd_t,
++		                   cmd_data_str)), kcmd.cmd_data_size)) {
++			kfree(data);
++			return -EFAULT;
++		}
++	}
++
++	sub = splat_subsystem_find(kcmd.cmd_subsystem);
++	if (sub != NULL)
++		rc = splat_validate(file, sub, kcmd.cmd_test, data);
++	else
++		rc = -EINVAL;
++
++	if (data != NULL)
++		kfree(data);
++
++	return rc;
++}
++
++static long
++splat_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++        unsigned int minor = iminor(file->f_dentry->d_inode);
++	int rc = 0;
++
++	/* Ignore tty ioctls */
++	if ((cmd & 0xffffff00) == ((int)'T') << 8)
++		return -ENOTTY;
++
++	if (minor >= SPLAT_MINORS)
++		return -ENXIO;
++
++	switch (cmd) {
++		case SPLAT_CFG:
++			rc = splat_ioctl_cfg(file, cmd, arg);
++			break;
++		case SPLAT_CMD:
++			rc = splat_ioctl_cmd(file, cmd, arg);
++			break;
++		default:
++			splat_print(file, "Bad ioctl command %d\n", cmd);
++			rc = -EINVAL;
++			break;
++	}
++
++	return rc;
++}
++
++#ifdef CONFIG_COMPAT
++/* Compatibility handler for ioctls from 32-bit ELF binaries */
++static long
++splat_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	return splat_unlocked_ioctl(file, cmd, arg);
++}
++#endif /* CONFIG_COMPAT */
++
++/* I'm not sure why you would want to write in to this buffer from
++ * user space since its principle use is to pass test status info
++ * back to the user space, but I don't see any reason to prevent it.
++ */
++static ssize_t splat_write(struct file *file, const char __user *buf,
++                         size_t count, loff_t *ppos)
++{
++        unsigned int minor = iminor(file->f_dentry->d_inode);
++	splat_info_t *info = (splat_info_t *)file->private_data;
++	int rc = 0;
++
++	if (minor >= SPLAT_MINORS)
++		return -ENXIO;
++
++	ASSERT(info);
++	ASSERT(info->info_buffer);
++
++	mutex_lock(&info->info_lock);
++
++	/* Write beyond EOF */
++	if (*ppos >= info->info_size) {
++		rc = -EFBIG;
++		goto out;
++	}
++
++	/* Resize count if beyond EOF */
++	if (*ppos + count > info->info_size)
++		count = info->info_size - *ppos;
++
++	if (copy_from_user(info->info_buffer, buf, count)) {
++		rc = -EFAULT;
++		goto out;
++	}
++
++	*ppos += count;
++	rc = count;
++out:
++	mutex_unlock(&info->info_lock);
++	return rc;
++}
++
++static ssize_t splat_read(struct file *file, char __user *buf,
++		        size_t count, loff_t *ppos)
++{
++        unsigned int minor = iminor(file->f_dentry->d_inode);
++	splat_info_t *info = (splat_info_t *)file->private_data;
++	int rc = 0;
++
++	if (minor >= SPLAT_MINORS)
++		return -ENXIO;
++
++	ASSERT(info);
++	ASSERT(info->info_buffer);
++
++	mutex_lock(&info->info_lock);
++
++	/* Read beyond EOF */
++	if (*ppos >= info->info_size)
++		goto out;
++
++	/* Resize count if beyond EOF */
++	if (*ppos + count > info->info_size)
++		count = info->info_size - *ppos;
++
++	if (copy_to_user(buf, info->info_buffer + *ppos, count)) {
++		rc = -EFAULT;
++		goto out;
++	}
++
++	*ppos += count;
++	rc = count;
++out:
++	mutex_unlock(&info->info_lock);
++	return rc;
++}
++
++static loff_t splat_seek(struct file *file, loff_t offset, int origin)
++{
++        unsigned int minor = iminor(file->f_dentry->d_inode);
++	splat_info_t *info = (splat_info_t *)file->private_data;
++	int rc = -EINVAL;
++
++	if (minor >= SPLAT_MINORS)
++		return -ENXIO;
++
++	ASSERT(info);
++	ASSERT(info->info_buffer);
++
++	mutex_lock(&info->info_lock);
++
++	switch (origin) {
++	case 0: /* SEEK_SET - No-op just do it */
++		break;
++	case 1: /* SEEK_CUR - Seek from current */
++		offset = file->f_pos + offset;
++		break;
++	case 2: /* SEEK_END - Seek from end */
++		offset = info->info_size + offset;
++		break;
++	}
++
++	if (offset >= 0) {
++		file->f_pos = offset;
++		file->f_version = 0;
++		rc = offset;
++	}
++
++	mutex_unlock(&info->info_lock);
++
++	return rc;
++}
++
++static struct cdev splat_cdev;
++static struct file_operations splat_fops = {
++	.owner		= THIS_MODULE,
++	.open		= splat_open,
++	.release	= splat_release,
++	.unlocked_ioctl	= splat_unlocked_ioctl,
++#ifdef CONFIG_COMPAT
++	.compat_ioctl	= splat_compat_ioctl,
++#endif
++	.read		= splat_read,
++	.write		= splat_write,
++	.llseek		= splat_seek,
++};
++
++static int
++splat_init(void)
++{
++	dev_t dev;
++	int rc;
++
++	spin_lock_init(&splat_module_lock);
++	INIT_LIST_HEAD(&splat_module_list);
++
++	SPLAT_SUBSYSTEM_INIT(kmem);
++	SPLAT_SUBSYSTEM_INIT(taskq);
++	SPLAT_SUBSYSTEM_INIT(krng);
++	SPLAT_SUBSYSTEM_INIT(mutex);
++	SPLAT_SUBSYSTEM_INIT(condvar);
++	SPLAT_SUBSYSTEM_INIT(thread);
++	SPLAT_SUBSYSTEM_INIT(rwlock);
++	SPLAT_SUBSYSTEM_INIT(time);
++	SPLAT_SUBSYSTEM_INIT(vnode);
++	SPLAT_SUBSYSTEM_INIT(kobj);
++	SPLAT_SUBSYSTEM_INIT(atomic);
++	SPLAT_SUBSYSTEM_INIT(list);
++	SPLAT_SUBSYSTEM_INIT(generic);
++	SPLAT_SUBSYSTEM_INIT(cred);
++	SPLAT_SUBSYSTEM_INIT(zlib);
++	SPLAT_SUBSYSTEM_INIT(linux);
++
++	dev = MKDEV(SPLAT_MAJOR, 0);
++        if ((rc = register_chrdev_region(dev, SPLAT_MINORS, SPLAT_NAME)))
++		goto error;
++
++	/* Support for registering a character driver */
++	cdev_init(&splat_cdev, &splat_fops);
++	splat_cdev.owner = THIS_MODULE;
++	kobject_set_name(&splat_cdev.kobj, SPLAT_NAME);
++	if ((rc = cdev_add(&splat_cdev, dev, SPLAT_MINORS))) {
++		printk(KERN_ERR "SPLAT: Error adding cdev, %d\n", rc);
++		kobject_put(&splat_cdev.kobj);
++		unregister_chrdev_region(dev, SPLAT_MINORS);
++		goto error;
++	}
++
++	/* Support for udev make driver info available in sysfs */
++        splat_class = spl_class_create(THIS_MODULE, "splat");
++	if (IS_ERR(splat_class)) {
++		rc = PTR_ERR(splat_class);
++		printk(KERN_ERR "SPLAT: Error creating splat class, %d\n", rc);
++		cdev_del(&splat_cdev);
++		unregister_chrdev_region(dev, SPLAT_MINORS);
++		goto error;
++	}
++
++	splat_device = spl_device_create(splat_class, NULL,
++					 MKDEV(SPLAT_MAJOR, 0),
++					 NULL, SPLAT_NAME);
++
++	printk(KERN_INFO "SPLAT: Loaded module v%s-%s%s\n",
++	       SPL_META_VERSION, SPL_META_RELEASE, SPL_DEBUG_STR);
++	return 0;
++error:
++	printk(KERN_ERR "SPLAT: Error registering splat device, %d\n", rc);
++	return rc;
++}
++
++static int
++splat_fini(void)
++{
++	dev_t dev = MKDEV(SPLAT_MAJOR, 0);
++
++        spl_device_destroy(splat_class, splat_device, dev);
++        spl_class_destroy(splat_class);
++        cdev_del(&splat_cdev);
++        unregister_chrdev_region(dev, SPLAT_MINORS);
++
++	SPLAT_SUBSYSTEM_FINI(linux);
++	SPLAT_SUBSYSTEM_FINI(zlib);
++	SPLAT_SUBSYSTEM_FINI(cred);
++	SPLAT_SUBSYSTEM_FINI(generic);
++	SPLAT_SUBSYSTEM_FINI(list);
++	SPLAT_SUBSYSTEM_FINI(atomic);
++	SPLAT_SUBSYSTEM_FINI(kobj);
++	SPLAT_SUBSYSTEM_FINI(vnode);
++	SPLAT_SUBSYSTEM_FINI(time);
++	SPLAT_SUBSYSTEM_FINI(rwlock);
++	SPLAT_SUBSYSTEM_FINI(thread);
++	SPLAT_SUBSYSTEM_FINI(condvar);
++	SPLAT_SUBSYSTEM_FINI(mutex);
++	SPLAT_SUBSYSTEM_FINI(krng);
++	SPLAT_SUBSYSTEM_FINI(taskq);
++	SPLAT_SUBSYSTEM_FINI(kmem);
++
++	ASSERT(list_empty(&splat_module_list));
++	printk(KERN_INFO "SPLAT: Unloaded module v%s-%s%s\n",
++	       SPL_META_VERSION, SPL_META_RELEASE, SPL_DEBUG_STR);
++
++	return 0;
++}
++
++spl_module_init(splat_init);
++spl_module_exit(splat_fini);
++
++MODULE_AUTHOR("Lawrence Livermore National Labs");
++MODULE_DESCRIPTION("Solaris Porting LAyer Tests");
++MODULE_LICENSE("GPL");
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-generic.c linux-3.2.33-go/spl/splat/splat-generic.c
+--- linux-3.2.33-go.orig/spl/splat/splat-generic.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-generic.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,366 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Generic Tests.
++\*****************************************************************************/
++
++#include <sys/sunddi.h>
++#include "splat-internal.h"
++
++#define SPLAT_GENERIC_NAME		"generic"
++#define SPLAT_GENERIC_DESC		"Kernel Generic Tests"
++
++#define SPLAT_GENERIC_TEST1_ID		0x0d01
++#define SPLAT_GENERIC_TEST1_NAME	"ddi_strtoul"
++#define SPLAT_GENERIC_TEST1_DESC	"ddi_strtoul Test"
++
++#define SPLAT_GENERIC_TEST2_ID		0x0d02
++#define SPLAT_GENERIC_TEST2_NAME	"ddi_strtol"
++#define SPLAT_GENERIC_TEST2_DESC	"ddi_strtol Test"
++
++#define SPLAT_GENERIC_TEST3_ID		0x0d03
++#define SPLAT_GENERIC_TEST3_NAME	"ddi_strtoull"
++#define SPLAT_GENERIC_TEST3_DESC	"ddi_strtoull Test"
++
++#define SPLAT_GENERIC_TEST4_ID		0x0d04
++#define SPLAT_GENERIC_TEST4_NAME	"ddi_strtoll"
++#define SPLAT_GENERIC_TEST4_DESC	"ddi_strtoll Test"
++
++# define SPLAT_GENERIC_TEST5_ID		0x0d05
++# define SPLAT_GENERIC_TEST5_NAME	"udivdi3"
++# define SPLAT_GENERIC_TEST5_DESC	"Unsigned Div-64 Test"
++
++# define SPLAT_GENERIC_TEST6_ID		0x0d06
++# define SPLAT_GENERIC_TEST6_NAME	"divdi3"
++# define SPLAT_GENERIC_TEST6_DESC	"Signed Div-64 Test"
++
++#define STR_POS				"123456789"
++#define STR_NEG				"-123456789"
++#define STR_BASE			"0xabcdef"
++#define STR_RANGE_MAX			"10000000000000000"
++#define STR_RANGE_MIN			"-10000000000000000"
++#define STR_INVAL1			"12345U"
++#define STR_INVAL2			"invald"
++
++#define VAL_POS				123456789
++#define VAL_NEG				-123456789
++#define VAL_BASE			0xabcdef
++#define VAL_INVAL1			12345U
++
++#define define_generic_msg_strtox(type, valtype)			\
++static void								\
++generic_msg_strto##type(struct file *file, char *msg, int rc, int *err, \
++			const char *s, valtype d, char *endptr)		\
++{									\
++	splat_vprint(file, SPLAT_GENERIC_TEST1_NAME,			\
++		     "%s (%d) %s: %s == %lld, 0x%p\n",			\
++		     rc ? "Fail" : "Pass", *err, msg, s,		\
++		     (unsigned long long)d, endptr);			\
++	*err = rc;							\
++}
++
++define_generic_msg_strtox(ul, unsigned long);
++define_generic_msg_strtox(l, long);
++define_generic_msg_strtox(ull, unsigned long long);
++define_generic_msg_strtox(ll, long long);
++
++#define define_splat_generic_test_strtox(type, valtype)			\
++static int								\
++splat_generic_test_strto##type(struct file *file, void *arg)		\
++{									\
++	int rc, rc1, rc2, rc3, rc4, rc5, rc6, rc7;			\
++	char str[20], *endptr;						\
++	valtype r;							\
++									\
++	/* Positive value: expect success */				\
++	r = 0;								\
++	rc = 1;								\
++	endptr = NULL;							\
++	rc1 = ddi_strto##type(STR_POS, &endptr, 10, &r);		\
++	if (rc1 == 0 && r == VAL_POS && endptr && *endptr == '\0')	\
++		rc = 0;							\
++									\
++	generic_msg_strto##type(file, "positive", rc , &rc1,		\
++				STR_POS, r, endptr);			\
++									\
++	/* Negative value: expect success */				\
++	r = 0;								\
++	rc = 1;								\
++	endptr = NULL;							\
++	strcpy(str, STR_NEG);						\
++	rc2 = ddi_strto##type(str, &endptr, 10, &r);			\
++	if (#type[0] == 'u') {						\
++		if (rc2 == 0 && r == 0 && endptr == str)		\
++			rc = 0;						\
++	} else {							\
++		if (rc2 == 0 && r == VAL_NEG &&				\
++		    endptr && *endptr == '\0')				\
++			rc = 0;						\
++	}								\
++									\
++	generic_msg_strto##type(file, "negative", rc, &rc2,		\
++				STR_NEG, r, endptr);			\
++									\
++	/* Non decimal base: expect sucess */				\
++	r = 0;								\
++	rc = 1;								\
++	endptr = NULL;							\
++	rc3 = ddi_strto##type(STR_BASE, &endptr, 0, &r);		\
++	if (rc3 == 0 && r == VAL_BASE && endptr && *endptr == '\0')	\
++		rc = 0;							\
++									\
++	generic_msg_strto##type(file, "base", rc, &rc3,			\
++				STR_BASE, r, endptr);			\
++									\
++	/* Max out of range: failure expected, r unchanged */		\
++	r = 0;								\
++	rc = 1;								\
++	endptr = NULL;							\
++	rc4 = ddi_strto##type(STR_RANGE_MAX, &endptr, 16, &r);		\
++	if (rc4 == ERANGE && r == 0 && endptr == NULL)			\
++		rc = 0;							\
++									\
++	generic_msg_strto##type(file, "max", rc, &rc4,			\
++				STR_RANGE_MAX, r, endptr);		\
++									\
++	/* Min out of range: failure expected, r unchanged */		\
++	r = 0;								\
++	rc = 1;								\
++	endptr = NULL;							\
++	strcpy(str, STR_RANGE_MIN);					\
++	rc5 = ddi_strto##type(str, &endptr, 16, &r);			\
++	if (#type[0] == 'u') {						\
++		if (rc5 == 0 && r == 0 && endptr == str)		\
++			rc = 0;						\
++	} else {							\
++		if (rc5 == ERANGE && r == 0 && endptr == NULL)		\
++			rc = 0;						\
++	}								\
++									\
++	generic_msg_strto##type(file, "min", rc, &rc5,			\
++				STR_RANGE_MIN, r, endptr);		\
++									\
++	/* Invalid string: success expected, endptr == 'U' */		\
++	r = 0;								\
++	rc = 1;								\
++	endptr = NULL;							\
++	rc6 = ddi_strto##type(STR_INVAL1, &endptr, 10, &r);		\
++	if (rc6 == 0 && r == VAL_INVAL1 && endptr && *endptr == 'U')	\
++		rc = 0;							\
++									\
++	generic_msg_strto##type(file, "invalid", rc, &rc6,		\
++				STR_INVAL1, r, endptr);			\
++									\
++	/* Invalid string: failure expected, endptr == str */		\
++	r = 0;								\
++	rc = 1;								\
++	endptr = NULL;							\
++	strcpy(str, STR_INVAL2);					\
++	rc7 = ddi_strto##type(str, &endptr, 10, &r);			\
++	if (rc7 == 0 && r == 0 && endptr == str)			\
++		rc = 0;							\
++									\
++	generic_msg_strto##type(file, "invalid", rc, &rc7,		\
++				STR_INVAL2, r, endptr);			\
++									\
++        return (rc1 || rc2 || rc3 || rc4 || rc5 || rc6 || rc7) ?	\
++		-EINVAL : 0;						\
++}
++
++define_splat_generic_test_strtox(ul, unsigned long);
++define_splat_generic_test_strtox(l, long);
++define_splat_generic_test_strtox(ull, unsigned long long);
++define_splat_generic_test_strtox(ll, long long);
++
++/*
++ * The entries in the table are used in all combinations and the
++ * return value is checked to ensure it is range.  On 32-bit
++ * systems __udivdi3 will be invoked for the 64-bit division.
++ * On 64-bit system the native 64-bit divide will be used so
++ * __udivdi3 isn't used but we might as well stil run the test.
++ */
++static int
++splat_generic_test_udivdi3(struct file *file, void *arg)
++{
++	const uint64_t tabu[] = {
++	    0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
++	    10, 11, 12, 13, 14, 15, 16, 1000, 2003,
++	    32765, 32766, 32767, 32768, 32769, 32760,
++	    65533, 65534, 65535, 65536, 65537, 65538,
++	    0x7ffffffeULL, 0x7fffffffULL, 0x80000000ULL, 0x80000001ULL,
++	    0x7000000000000000ULL, 0x7000000080000000ULL, 0x7000000080000001ULL,
++	    0x7fffffffffffffffULL, 0x7fffffff8fffffffULL, 0x7fffffff8ffffff1ULL,
++	    0x7fffffff00000000ULL, 0x7fffffff80000000ULL, 0x7fffffff00000001ULL,
++	    0x8000000000000000ULL, 0x8000000080000000ULL, 0x8000000080000001ULL,
++	    0xc000000000000000ULL, 0xc000000080000000ULL, 0xc000000080000001ULL,
++	    0xfffffffffffffffdULL, 0xfffffffffffffffeULL, 0xffffffffffffffffULL,
++	};
++	uint64_t uu, vu, qu, ru;
++	int n, i, j, errors = 0;
++
++	splat_vprint(file, SPLAT_GENERIC_TEST5_NAME, "%s",
++	    "Testing unsigned 64-bit division.\n");
++	n = sizeof(tabu) / sizeof(tabu[0]);
++	for (i = 0; i < n; i++) {
++		for (j = 1; j < n; j++) {
++			uu = tabu[i];
++			vu = tabu[j];
++			qu = uu / vu; /* __udivdi3 */
++			ru = uu - qu * vu;
++			if (qu > uu || ru >= vu) {
++				splat_vprint(file, SPLAT_GENERIC_TEST5_NAME,
++				    "%016llx/%016llx != %016llx rem %016llx\n",
++				    uu, vu, qu, ru);
++				errors++;
++			}
++		}
++	}
++
++	if (errors) {
++		splat_vprint(file, SPLAT_GENERIC_TEST5_NAME,
++		    "Failed %d/%d tests\n", errors, n * (n - 1));
++		return -ERANGE;
++	}
++
++	splat_vprint(file, SPLAT_GENERIC_TEST5_NAME,
++	    "Passed all %d tests\n", n * (n - 1));
++
++	return 0;
++}
++
++/*
++ * The entries the table are used in all combinations, with + and - signs
++ * preceding them.  The return value is checked to ensure it is range.
++ * On 32-bit systems __divdi3 will be invoked for the 64-bit division.
++ * On 64-bit system the native 64-bit divide will be used so __divdi3
++ *  isn't used but we might as well stil run the test.
++ */
++static int
++splat_generic_test_divdi3(struct file *file, void *arg)
++{
++	const int64_t tabs[] = {
++	    0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
++	    10, 11, 12, 13, 14, 15, 16, 1000, 2003,
++	    32765, 32766, 32767, 32768, 32769, 32760,
++	    65533, 65534, 65535, 65536, 65537, 65538,
++	    0x7ffffffeLL, 0x7fffffffLL, 0x80000000LL, 0x80000001LL,
++	    0x7000000000000000LL, 0x7000000080000000LL, 0x7000000080000001LL,
++	    0x7fffffffffffffffLL, 0x7fffffff8fffffffLL, 0x7fffffff8ffffff1LL,
++	    0x7fffffff00000000LL, 0x7fffffff80000000LL, 0x7fffffff00000001LL,
++	    0x0123456789abcdefLL, 0x00000000abcdef01LL, 0x0000000012345678LL,
++#if BITS_PER_LONG == 32
++	    0x8000000000000000LL, 0x8000000080000000LL, 0x8000000080000001LL,
++#endif
++	};
++	int64_t u, v, q, r;
++	int n, i, j, k, errors = 0;
++
++	splat_vprint(file, SPLAT_GENERIC_TEST6_NAME, "%s",
++	    "Testing signed 64-bit division.\n");
++	n = sizeof(tabs) / sizeof(tabs[0]);
++	for (i = 0; i < n; i++) {
++		for (j = 1; j < n; j++) {
++			for (k = 0; k <= 3; k++) {
++				u = (k & 1)  ? -tabs[i] : tabs[i];
++				v = (k >= 2) ? -tabs[j] : tabs[j];
++
++				q = u / v; /* __divdi3 */
++				r = u - q * v;
++				if (abs64(q) >  abs64(u) ||
++				    abs64(r) >= abs64(v) ||
++				    (r != 0 && (r ^ u) < 0)) {
++					splat_vprint(file,
++					    SPLAT_GENERIC_TEST6_NAME,
++					    "%016llx/%016llx != %016llx "
++					    "rem %016llx\n", u, v, q, r);
++					errors++;
++				}
++			}
++		}
++	}
++
++	if (errors) {
++		splat_vprint(file, SPLAT_GENERIC_TEST6_NAME,
++		    "Failed %d/%d tests\n", errors, n * (n - 1));
++		return -ERANGE;
++	}
++
++	splat_vprint(file, SPLAT_GENERIC_TEST6_NAME,
++	    "Passed all %d tests\n", n * (n - 1));
++
++	return 0;
++}
++
++splat_subsystem_t *
++splat_generic_init(void)
++{
++        splat_subsystem_t *sub;
++
++        sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++        if (sub == NULL)
++                return NULL;
++
++        memset(sub, 0, sizeof(*sub));
++        strncpy(sub->desc.name, SPLAT_GENERIC_NAME, SPLAT_NAME_SIZE);
++	strncpy(sub->desc.desc, SPLAT_GENERIC_DESC, SPLAT_DESC_SIZE);
++        INIT_LIST_HEAD(&sub->subsystem_list);
++	INIT_LIST_HEAD(&sub->test_list);
++        spin_lock_init(&sub->test_lock);
++        sub->desc.id = SPLAT_SUBSYSTEM_GENERIC;
++
++        SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST1_NAME, SPLAT_GENERIC_TEST1_DESC,
++	                SPLAT_GENERIC_TEST1_ID, splat_generic_test_strtoul);
++        SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST2_NAME, SPLAT_GENERIC_TEST2_DESC,
++	                SPLAT_GENERIC_TEST2_ID, splat_generic_test_strtol);
++        SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST3_NAME, SPLAT_GENERIC_TEST3_DESC,
++	                SPLAT_GENERIC_TEST3_ID, splat_generic_test_strtoull);
++        SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST4_NAME, SPLAT_GENERIC_TEST4_DESC,
++	                SPLAT_GENERIC_TEST4_ID, splat_generic_test_strtoll);
++        SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST5_NAME, SPLAT_GENERIC_TEST5_DESC,
++	                SPLAT_GENERIC_TEST5_ID, splat_generic_test_udivdi3);
++        SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST6_NAME, SPLAT_GENERIC_TEST6_DESC,
++	                SPLAT_GENERIC_TEST6_ID, splat_generic_test_divdi3);
++
++        return sub;
++}
++
++void
++splat_generic_fini(splat_subsystem_t *sub)
++{
++        ASSERT(sub);
++
++        SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST6_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST5_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST4_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST3_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST2_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST1_ID);
++
++        kfree(sub);
++}
++
++int
++splat_generic_id(void)
++{
++        return SPLAT_SUBSYSTEM_GENERIC;
++}
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-internal.h linux-3.2.33-go/spl/splat/splat-internal.h
+--- linux-3.2.33-go.orig/spl/splat/splat-internal.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-internal.h	2012-11-16 23:22:32.410192863 +0100
+@@ -0,0 +1,218 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++\*****************************************************************************/
++
++#ifndef _SPLAT_INTERNAL_H
++#define _SPLAT_INTERNAL_H
++
++#include "spl-device.h"
++#include "spl-debug.h"
++#include "splat-ctl.h"
++
++#define SPLAT_SUBSYSTEM_INIT(type)                                      \
++({      splat_subsystem_t *_sub_;                                       \
++                                                                        \
++        _sub_ = (splat_subsystem_t *)splat_##type##_init();             \
++        if (_sub_ == NULL) {                                            \
++                printk(KERN_ERR "splat: Error initializing: " #type "\n"); \
++        } else {                                                        \
++                spin_lock(&splat_module_lock);                          \
++                list_add_tail(&(_sub_->subsystem_list),			\
++		              &splat_module_list);			\
++                spin_unlock(&splat_module_lock);                        \
++        }                                                               \
++})
++
++#define SPLAT_SUBSYSTEM_FINI(type)                                      \
++({      splat_subsystem_t *_sub_, *_tmp_;                               \
++        int _id_, _flag_ = 0;                                           \
++                                                                        \
++	_id_ = splat_##type##_id();                                     \
++        spin_lock(&splat_module_lock);                                  \
++        list_for_each_entry_safe(_sub_, _tmp_,  &splat_module_list,	\
++		                 subsystem_list) {			\
++                if (_sub_->desc.id == _id_) {                           \
++                        list_del_init(&(_sub_->subsystem_list));        \
++                        spin_unlock(&splat_module_lock);                \
++                        splat_##type##_fini(_sub_);                     \
++			spin_lock(&splat_module_lock);			\
++                        _flag_ = 1;                                     \
++                }                                                       \
++        }                                                               \
++        spin_unlock(&splat_module_lock);                                \
++                                                                        \
++	if (!_flag_)                                                    \
++                printk(KERN_ERR "splat: Error finalizing: " #type "\n"); \
++})
++
++#define SPLAT_TEST_INIT(sub, n, d, tid, func)				\
++({      splat_test_t *_test_;                                           \
++                                                                        \
++	_test_ = (splat_test_t *)kmalloc(sizeof(*_test_), GFP_KERNEL);  \
++        if (_test_ == NULL) {						\
++		printk(KERN_ERR "splat: Error initializing: " n "/" #tid" \n");\
++	} else {							\
++		memset(_test_, 0, sizeof(*_test_));			\
++		strncpy(_test_->desc.name, n, SPLAT_NAME_SIZE-1);	\
++		strncpy(_test_->desc.desc, d, SPLAT_DESC_SIZE-1);	\
++		_test_->desc.id = tid;					\
++	        _test_->test = func;					\
++		INIT_LIST_HEAD(&(_test_->test_list));			\
++                spin_lock(&((sub)->test_lock));				\
++                list_add_tail(&(_test_->test_list),&((sub)->test_list));\
++                spin_unlock(&((sub)->test_lock));			\
++        }								\
++})
++
++#define SPLAT_TEST_FINI(sub, tid)					\
++({      splat_test_t *_test_, *_tmp_;                                   \
++        int _flag_ = 0;							\
++                                                                        \
++        spin_lock(&((sub)->test_lock));					\
++        list_for_each_entry_safe(_test_, _tmp_,				\
++		                 &((sub)->test_list), test_list) {	\
++                if (_test_->desc.id == tid) {                           \
++                        list_del_init(&(_test_->test_list));		\
++                        _flag_ = 1;                                     \
++                }                                                       \
++        }                                                               \
++        spin_unlock(&((sub)->test_lock));				\
++                                                                        \
++	if (!_flag_)                                                    \
++                printk(KERN_ERR "splat: Error finalizing: " #tid "\n");	\
++})
++
++typedef int (*splat_test_func_t)(struct file *, void *);
++
++typedef struct splat_test {
++	struct list_head test_list;
++	splat_user_t desc;
++	splat_test_func_t test;
++} splat_test_t;
++
++typedef struct splat_subsystem {
++	struct list_head subsystem_list;/* List had to chain entries */
++	splat_user_t desc;
++	spinlock_t test_lock;
++	struct list_head test_list;
++} splat_subsystem_t;
++
++#define SPLAT_INFO_BUFFER_SIZE		65536
++#define SPLAT_INFO_BUFFER_REDZONE	256
++
++typedef struct splat_info {
++	struct mutex info_lock;
++	int info_size;
++	char *info_buffer;
++	char *info_head;	/* Internal kernel use only */
++} splat_info_t;
++
++#define sym2str(sym)			(char *)(#sym)
++
++#define splat_print(file, format, args...)				\
++({	splat_info_t *_info_ = (splat_info_t *)file->private_data;	\
++	int _rc_;							\
++									\
++	ASSERT(_info_);							\
++	ASSERT(_info_->info_buffer);					\
++									\
++	mutex_lock(&_info_->info_lock);					\
++									\
++	/* Don't allow the kernel to start a write in the red zone */	\
++	if ((int)(_info_->info_head - _info_->info_buffer) >		\
++	    (SPLAT_INFO_BUFFER_SIZE - SPLAT_INFO_BUFFER_REDZONE)) {	\
++		_rc_ = -EOVERFLOW;					\
++	} else {							\
++		_rc_ = sprintf(_info_->info_head, format, args);	\
++		if (_rc_ >= 0)						\
++			_info_->info_head += _rc_;			\
++	}								\
++									\
++	mutex_unlock(&_info_->info_lock);				\
++	_rc_;								\
++})
++
++#define splat_vprint(file, test, format, args...)			\
++	splat_print(file, "%*s: " format, SPLAT_NAME_SIZE, test, args)
++
++#define splat_locked_test(lock, test)					\
++({									\
++	int _rc_;							\
++	spin_lock(lock);						\
++	_rc_ = (test) ? 1 : 0;						\
++	spin_unlock(lock);						\
++	_rc_;								\
++})
++
++splat_subsystem_t *splat_condvar_init(void);
++splat_subsystem_t *splat_kmem_init(void);
++splat_subsystem_t *splat_mutex_init(void);
++splat_subsystem_t *splat_krng_init(void);
++splat_subsystem_t *splat_rwlock_init(void);
++splat_subsystem_t *splat_taskq_init(void);
++splat_subsystem_t *splat_thread_init(void);
++splat_subsystem_t *splat_time_init(void);
++splat_subsystem_t *splat_vnode_init(void);
++splat_subsystem_t *splat_kobj_init(void);
++splat_subsystem_t *splat_atomic_init(void);
++splat_subsystem_t *splat_list_init(void);
++splat_subsystem_t *splat_generic_init(void);
++splat_subsystem_t *splat_cred_init(void);
++splat_subsystem_t *splat_zlib_init(void);
++splat_subsystem_t *splat_linux_init(void);
++
++void splat_condvar_fini(splat_subsystem_t *);
++void splat_kmem_fini(splat_subsystem_t *);
++void splat_mutex_fini(splat_subsystem_t *);
++void splat_krng_fini(splat_subsystem_t *);
++void splat_rwlock_fini(splat_subsystem_t *);
++void splat_taskq_fini(splat_subsystem_t *);
++void splat_thread_fini(splat_subsystem_t *);
++void splat_time_fini(splat_subsystem_t *);
++void splat_vnode_fini(splat_subsystem_t *);
++void splat_kobj_fini(splat_subsystem_t *);
++void splat_atomic_fini(splat_subsystem_t *);
++void splat_list_fini(splat_subsystem_t *);
++void splat_generic_fini(splat_subsystem_t *);
++void splat_cred_fini(splat_subsystem_t *);
++void splat_zlib_fini(splat_subsystem_t *);
++void splat_linux_fini(splat_subsystem_t *);
++
++int splat_condvar_id(void);
++int splat_kmem_id(void);
++int splat_mutex_id(void);
++int splat_krng_id(void);
++int splat_rwlock_id(void);
++int splat_taskq_id(void);
++int splat_thread_id(void);
++int splat_time_id(void);
++int splat_vnode_id(void);
++int splat_kobj_id(void);
++int splat_atomic_id(void);
++int splat_list_id(void);
++int splat_generic_id(void);
++int splat_cred_id(void);
++int splat_zlib_id(void);
++int splat_linux_id(void);
++
++#endif /* _SPLAT_INTERNAL_H */
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-kmem.c linux-3.2.33-go/spl/splat/splat-kmem.c
+--- linux-3.2.33-go.orig/spl/splat/splat-kmem.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-kmem.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,1333 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Kmem Tests.
++\*****************************************************************************/
++
++#include <sys/kmem.h>
++#include <sys/thread.h>
++#include "splat-internal.h"
++
++#define SPLAT_KMEM_NAME			"kmem"
++#define SPLAT_KMEM_DESC			"Kernel Malloc/Slab Tests"
++
++#define SPLAT_KMEM_TEST1_ID		0x0101
++#define SPLAT_KMEM_TEST1_NAME		"kmem_alloc"
++#define SPLAT_KMEM_TEST1_DESC		"Memory allocation test (kmem_alloc)"
++
++#define SPLAT_KMEM_TEST2_ID		0x0102
++#define SPLAT_KMEM_TEST2_NAME		"kmem_zalloc"
++#define SPLAT_KMEM_TEST2_DESC		"Memory allocation test (kmem_zalloc)"
++
++#define SPLAT_KMEM_TEST3_ID		0x0103
++#define SPLAT_KMEM_TEST3_NAME		"vmem_alloc"
++#define SPLAT_KMEM_TEST3_DESC		"Memory allocation test (vmem_alloc)"
++
++#define SPLAT_KMEM_TEST4_ID		0x0104
++#define SPLAT_KMEM_TEST4_NAME		"vmem_zalloc"
++#define SPLAT_KMEM_TEST4_DESC		"Memory allocation test (vmem_zalloc)"
++
++#define SPLAT_KMEM_TEST5_ID		0x0105
++#define SPLAT_KMEM_TEST5_NAME		"slab_small"
++#define SPLAT_KMEM_TEST5_DESC		"Slab ctor/dtor test (small)"
++
++#define SPLAT_KMEM_TEST6_ID		0x0106
++#define SPLAT_KMEM_TEST6_NAME		"slab_large"
++#define SPLAT_KMEM_TEST6_DESC		"Slab ctor/dtor test (large)"
++
++#define SPLAT_KMEM_TEST7_ID		0x0107
++#define SPLAT_KMEM_TEST7_NAME		"slab_align"
++#define SPLAT_KMEM_TEST7_DESC		"Slab alignment test"
++
++#define SPLAT_KMEM_TEST8_ID		0x0108
++#define SPLAT_KMEM_TEST8_NAME		"slab_reap"
++#define SPLAT_KMEM_TEST8_DESC		"Slab reaping test"
++
++#define SPLAT_KMEM_TEST9_ID		0x0109
++#define SPLAT_KMEM_TEST9_NAME		"slab_age"
++#define SPLAT_KMEM_TEST9_DESC		"Slab aging test"
++
++#define SPLAT_KMEM_TEST10_ID		0x010a
++#define SPLAT_KMEM_TEST10_NAME		"slab_lock"
++#define SPLAT_KMEM_TEST10_DESC		"Slab locking test"
++
++#if 0
++#define SPLAT_KMEM_TEST11_ID		0x010b
++#define SPLAT_KMEM_TEST11_NAME		"slab_overcommit"
++#define SPLAT_KMEM_TEST11_DESC		"Slab memory overcommit test"
++#endif
++
++#define SPLAT_KMEM_TEST12_ID		0x010c
++#define SPLAT_KMEM_TEST12_NAME		"vmem_size"
++#define SPLAT_KMEM_TEST12_DESC		"Memory zone test"
++
++#define SPLAT_KMEM_TEST13_ID		0x010d
++#define SPLAT_KMEM_TEST13_NAME		"slab_reclaim"
++#define SPLAT_KMEM_TEST13_DESC		"Slab direct memory reclaim test"
++
++#define SPLAT_KMEM_ALLOC_COUNT		10
++#define SPLAT_VMEM_ALLOC_COUNT		10
++
++
++static int
++splat_kmem_test1(struct file *file, void *arg)
++{
++	void *ptr[SPLAT_KMEM_ALLOC_COUNT];
++	int size = PAGE_SIZE;
++	int i, count, rc = 0;
++
++	while ((!rc) && (size <= (PAGE_SIZE * 32))) {
++		count = 0;
++
++		for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++) {
++			ptr[i] = kmem_alloc(size, KM_SLEEP | KM_NODEBUG);
++			if (ptr[i])
++				count++;
++		}
++
++		for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++)
++			if (ptr[i])
++				kmem_free(ptr[i], size);
++
++		splat_vprint(file, SPLAT_KMEM_TEST1_NAME,
++			   "%d byte allocations, %d/%d successful\n",
++			   size, count, SPLAT_KMEM_ALLOC_COUNT);
++		if (count != SPLAT_KMEM_ALLOC_COUNT)
++			rc = -ENOMEM;
++
++		size *= 2;
++	}
++
++	return rc;
++}
++
++static int
++splat_kmem_test2(struct file *file, void *arg)
++{
++	void *ptr[SPLAT_KMEM_ALLOC_COUNT];
++	int size = PAGE_SIZE;
++	int i, j, count, rc = 0;
++
++	while ((!rc) && (size <= (PAGE_SIZE * 32))) {
++		count = 0;
++
++		for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++) {
++			ptr[i] = kmem_zalloc(size, KM_SLEEP | KM_NODEBUG);
++			if (ptr[i])
++				count++;
++		}
++
++		/* Ensure buffer has been zero filled */
++		for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++) {
++			for (j = 0; j < size; j++) {
++				if (((char *)ptr[i])[j] != '\0') {
++					splat_vprint(file,SPLAT_KMEM_TEST2_NAME,
++						  "%d-byte allocation was "
++						  "not zeroed\n", size);
++					rc = -EFAULT;
++				}
++			}
++		}
++
++		for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++)
++			if (ptr[i])
++				kmem_free(ptr[i], size);
++
++		splat_vprint(file, SPLAT_KMEM_TEST2_NAME,
++			   "%d byte allocations, %d/%d successful\n",
++			   size, count, SPLAT_KMEM_ALLOC_COUNT);
++		if (count != SPLAT_KMEM_ALLOC_COUNT)
++			rc = -ENOMEM;
++
++		size *= 2;
++	}
++
++	return rc;
++}
++
++static int
++splat_kmem_test3(struct file *file, void *arg)
++{
++	void *ptr[SPLAT_VMEM_ALLOC_COUNT];
++	int size = PAGE_SIZE;
++	int i, count, rc = 0;
++
++	while ((!rc) && (size <= (PAGE_SIZE * 1024))) {
++		count = 0;
++
++		for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++) {
++			ptr[i] = vmem_alloc(size, KM_SLEEP);
++			if (ptr[i])
++				count++;
++		}
++
++		for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++)
++			if (ptr[i])
++				vmem_free(ptr[i], size);
++
++		splat_vprint(file, SPLAT_KMEM_TEST3_NAME,
++			   "%d byte allocations, %d/%d successful\n",
++			   size, count, SPLAT_VMEM_ALLOC_COUNT);
++		if (count != SPLAT_VMEM_ALLOC_COUNT)
++			rc = -ENOMEM;
++
++		size *= 2;
++	}
++
++	return rc;
++}
++
++static int
++splat_kmem_test4(struct file *file, void *arg)
++{
++	void *ptr[SPLAT_VMEM_ALLOC_COUNT];
++	int size = PAGE_SIZE;
++	int i, j, count, rc = 0;
++
++	while ((!rc) && (size <= (PAGE_SIZE * 1024))) {
++		count = 0;
++
++		for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++) {
++			ptr[i] = vmem_zalloc(size, KM_SLEEP);
++			if (ptr[i])
++				count++;
++		}
++
++		/* Ensure buffer has been zero filled */
++		for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++) {
++			for (j = 0; j < size; j++) {
++				if (((char *)ptr[i])[j] != '\0') {
++					splat_vprint(file, SPLAT_KMEM_TEST4_NAME,
++						  "%d-byte allocation was "
++						  "not zeroed\n", size);
++					rc = -EFAULT;
++				}
++			}
++		}
++
++		for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++)
++			if (ptr[i])
++				vmem_free(ptr[i], size);
++
++		splat_vprint(file, SPLAT_KMEM_TEST4_NAME,
++			   "%d byte allocations, %d/%d successful\n",
++			   size, count, SPLAT_VMEM_ALLOC_COUNT);
++		if (count != SPLAT_VMEM_ALLOC_COUNT)
++			rc = -ENOMEM;
++
++		size *= 2;
++	}
++
++	return rc;
++}
++
++#define SPLAT_KMEM_TEST_MAGIC		0x004488CCUL
++#define SPLAT_KMEM_CACHE_NAME		"kmem_test"
++#define SPLAT_KMEM_OBJ_COUNT		1024
++#define SPLAT_KMEM_OBJ_RECLAIM		1000 /* objects */
++#define SPLAT_KMEM_THREADS		32
++
++#define KCP_FLAG_READY			0x01
++
++typedef struct kmem_cache_data {
++	unsigned long kcd_magic;
++	struct list_head kcd_node;
++	int kcd_flag;
++	char kcd_buf[0];
++} kmem_cache_data_t;
++
++typedef struct kmem_cache_thread {
++	spinlock_t kct_lock;
++	int kct_id;
++	struct list_head kct_list;
++} kmem_cache_thread_t;
++
++typedef struct kmem_cache_priv {
++	unsigned long kcp_magic;
++	struct file *kcp_file;
++	kmem_cache_t *kcp_cache;
++	spinlock_t kcp_lock;
++	wait_queue_head_t kcp_ctl_waitq;
++	wait_queue_head_t kcp_thr_waitq;
++	int kcp_flags;
++	int kcp_kct_count;
++	kmem_cache_thread_t *kcp_kct[SPLAT_KMEM_THREADS];
++	int kcp_size;
++	int kcp_align;
++	int kcp_count;
++	int kcp_alloc;
++	int kcp_rc;
++} kmem_cache_priv_t;
++
++static kmem_cache_priv_t *
++splat_kmem_cache_test_kcp_alloc(struct file *file, char *name,
++				int size, int align, int alloc)
++{
++	kmem_cache_priv_t *kcp;
++
++	kcp = kmem_zalloc(sizeof(kmem_cache_priv_t), KM_SLEEP);
++	if (!kcp)
++		return NULL;
++
++	kcp->kcp_magic = SPLAT_KMEM_TEST_MAGIC;
++	kcp->kcp_file = file;
++	kcp->kcp_cache = NULL;
++	spin_lock_init(&kcp->kcp_lock);
++	init_waitqueue_head(&kcp->kcp_ctl_waitq);
++	init_waitqueue_head(&kcp->kcp_thr_waitq);
++	kcp->kcp_flags = 0;
++	kcp->kcp_kct_count = -1;
++	kcp->kcp_size = size;
++	kcp->kcp_align = align;
++	kcp->kcp_count = 0;
++	kcp->kcp_alloc = alloc;
++	kcp->kcp_rc = 0;
++
++	return kcp;
++}
++
++static void
++splat_kmem_cache_test_kcp_free(kmem_cache_priv_t *kcp)
++{
++	kmem_free(kcp, sizeof(kmem_cache_priv_t));
++}
++
++static kmem_cache_thread_t *
++splat_kmem_cache_test_kct_alloc(kmem_cache_priv_t *kcp, int id)
++{
++	kmem_cache_thread_t *kct;
++
++	ASSERTF(id < SPLAT_KMEM_THREADS, "id=%d\n", id);
++	ASSERT(kcp->kcp_kct[id] == NULL);
++
++	kct = kmem_zalloc(sizeof(kmem_cache_thread_t), KM_SLEEP);
++	if (!kct)
++		return NULL;
++
++	spin_lock_init(&kct->kct_lock);
++	kct->kct_id = id;
++	INIT_LIST_HEAD(&kct->kct_list);
++
++	spin_lock(&kcp->kcp_lock);
++	kcp->kcp_kct[id] = kct;
++	spin_unlock(&kcp->kcp_lock);
++
++	return kct;
++}
++
++static void
++splat_kmem_cache_test_kct_free(kmem_cache_priv_t *kcp,
++			       kmem_cache_thread_t *kct)
++{
++	spin_lock(&kcp->kcp_lock);
++	kcp->kcp_kct[kct->kct_id] = NULL;
++	spin_unlock(&kcp->kcp_lock);
++
++	kmem_free(kct, sizeof(kmem_cache_thread_t));
++}
++
++static void
++splat_kmem_cache_test_kcd_free(kmem_cache_priv_t *kcp,
++			       kmem_cache_thread_t *kct)
++{
++	kmem_cache_data_t *kcd;
++
++	spin_lock(&kct->kct_lock);
++	while (!list_empty(&kct->kct_list)) {
++		kcd = list_entry(kct->kct_list.next,
++				 kmem_cache_data_t, kcd_node);
++		list_del(&kcd->kcd_node);
++		spin_unlock(&kct->kct_lock);
++
++		kmem_cache_free(kcp->kcp_cache, kcd);
++
++		spin_lock(&kct->kct_lock);
++	}
++	spin_unlock(&kct->kct_lock);
++}
++
++static int
++splat_kmem_cache_test_kcd_alloc(kmem_cache_priv_t *kcp,
++				kmem_cache_thread_t *kct, int count)
++{
++	kmem_cache_data_t *kcd;
++	int i;
++
++	for (i = 0; i < count; i++) {
++		kcd = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP);
++		if (kcd == NULL) {
++			splat_kmem_cache_test_kcd_free(kcp, kct);
++			return -ENOMEM;
++		}
++
++		spin_lock(&kct->kct_lock);
++		list_add_tail(&kcd->kcd_node, &kct->kct_list);
++		spin_unlock(&kct->kct_lock);
++	}
++
++	return 0;
++}
++
++static void
++splat_kmem_cache_test_debug(struct file *file, char *name,
++			    kmem_cache_priv_t *kcp)
++{
++	int j;
++
++	splat_vprint(file, name,
++		     "%s cache objects %d, slabs %u/%u objs %u/%u mags ",
++		     kcp->kcp_cache->skc_name, kcp->kcp_count,
++		     (unsigned)kcp->kcp_cache->skc_slab_alloc,
++		     (unsigned)kcp->kcp_cache->skc_slab_total,
++		     (unsigned)kcp->kcp_cache->skc_obj_alloc,
++		     (unsigned)kcp->kcp_cache->skc_obj_total);
++
++	for_each_online_cpu(j)
++		splat_print(file, "%u/%u ",
++			     kcp->kcp_cache->skc_mag[j]->skm_avail,
++			     kcp->kcp_cache->skc_mag[j]->skm_size);
++
++	splat_print(file, "%s\n", "");
++}
++
++static int
++splat_kmem_cache_test_constructor(void *ptr, void *priv, int flags)
++{
++	kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)priv;
++	kmem_cache_data_t *kcd = (kmem_cache_data_t *)ptr;
++
++	if (kcd && kcp) {
++		kcd->kcd_magic = kcp->kcp_magic;
++		INIT_LIST_HEAD(&kcd->kcd_node);
++		kcd->kcd_flag = 1;
++		memset(kcd->kcd_buf, 0xaa, kcp->kcp_size - (sizeof *kcd));
++		kcp->kcp_count++;
++	}
++
++	return 0;
++}
++
++static void
++splat_kmem_cache_test_destructor(void *ptr, void *priv)
++{
++	kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)priv;
++	kmem_cache_data_t *kcd = (kmem_cache_data_t *)ptr;
++
++	if (kcd && kcp) {
++		kcd->kcd_magic = 0;
++		kcd->kcd_flag = 0;
++		memset(kcd->kcd_buf, 0xbb, kcp->kcp_size - (sizeof *kcd));
++		kcp->kcp_count--;
++	}
++
++	return;
++}
++
++/*
++ * Generic reclaim function which assumes that all objects may
++ * be reclaimed at any time.  We free a small  percentage of the
++ * objects linked off the kcp or kct[] every time we are called.
++ */
++static void
++splat_kmem_cache_test_reclaim(void *priv)
++{
++	kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)priv;
++	kmem_cache_thread_t *kct;
++	kmem_cache_data_t *kcd;
++	LIST_HEAD(reclaim);
++	int i, count;
++
++	ASSERT(kcp->kcp_magic == SPLAT_KMEM_TEST_MAGIC);
++
++	/* For each kct thread reclaim some objects */
++	spin_lock(&kcp->kcp_lock);
++	for (i = 0; i < SPLAT_KMEM_THREADS; i++) {
++		kct = kcp->kcp_kct[i];
++		if (!kct)
++			continue;
++
++		spin_unlock(&kcp->kcp_lock);
++		spin_lock(&kct->kct_lock);
++
++		count = SPLAT_KMEM_OBJ_RECLAIM;
++		while (count > 0 && !list_empty(&kct->kct_list)) {
++			kcd = list_entry(kct->kct_list.next,
++					 kmem_cache_data_t, kcd_node);
++			list_del(&kcd->kcd_node);
++			list_add(&kcd->kcd_node, &reclaim);
++			count--;
++		}
++
++		spin_unlock(&kct->kct_lock);
++		spin_lock(&kcp->kcp_lock);
++	}
++	spin_unlock(&kcp->kcp_lock);
++
++	/* Freed outside the spin lock */
++	while (!list_empty(&reclaim)) {
++		kcd = list_entry(reclaim.next, kmem_cache_data_t, kcd_node);
++		list_del(&kcd->kcd_node);
++		kmem_cache_free(kcp->kcp_cache, kcd);
++	}
++
++	return;
++}
++
++static int
++splat_kmem_cache_test_threads(kmem_cache_priv_t *kcp, int threads)
++{
++	int rc;
++
++	spin_lock(&kcp->kcp_lock);
++	rc = (kcp->kcp_kct_count == threads);
++	spin_unlock(&kcp->kcp_lock);
++
++	return rc;
++}
++
++static int
++splat_kmem_cache_test_flags(kmem_cache_priv_t *kcp, int flags)
++{
++	int rc;
++
++	spin_lock(&kcp->kcp_lock);
++	rc = (kcp->kcp_flags & flags);
++	spin_unlock(&kcp->kcp_lock);
++
++	return rc;
++}
++
++static void
++splat_kmem_cache_test_thread(void *arg)
++{
++	kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)arg;
++	kmem_cache_thread_t *kct;
++	int rc = 0, id;
++
++	ASSERT(kcp->kcp_magic == SPLAT_KMEM_TEST_MAGIC);
++
++	/* Assign thread ids */
++	spin_lock(&kcp->kcp_lock);
++	if (kcp->kcp_kct_count == -1)
++		kcp->kcp_kct_count = 0;
++
++	id = kcp->kcp_kct_count;
++	kcp->kcp_kct_count++;
++	spin_unlock(&kcp->kcp_lock);
++
++	kct = splat_kmem_cache_test_kct_alloc(kcp, id);
++	if (!kct) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	/* Wait for all threads to have started and report they are ready */
++	if (kcp->kcp_kct_count == SPLAT_KMEM_THREADS)
++		wake_up(&kcp->kcp_ctl_waitq);
++
++	wait_event(kcp->kcp_thr_waitq,
++		splat_kmem_cache_test_flags(kcp, KCP_FLAG_READY));
++
++	/* Create and destroy objects */
++	rc = splat_kmem_cache_test_kcd_alloc(kcp, kct, kcp->kcp_alloc);
++	splat_kmem_cache_test_kcd_free(kcp, kct);
++out:
++	if (kct)
++		splat_kmem_cache_test_kct_free(kcp, kct);
++
++	spin_lock(&kcp->kcp_lock);
++	if (!kcp->kcp_rc)
++		kcp->kcp_rc = rc;
++
++	if ((--kcp->kcp_kct_count) == 0)
++		wake_up(&kcp->kcp_ctl_waitq);
++
++	spin_unlock(&kcp->kcp_lock);
++
++	thread_exit();
++}
++
++static int
++splat_kmem_cache_test(struct file *file, void *arg, char *name,
++		      int size, int align, int flags)
++{
++	kmem_cache_priv_t *kcp;
++	kmem_cache_data_t *kcd = NULL;
++	int rc = 0, max;
++
++	kcp = splat_kmem_cache_test_kcp_alloc(file, name, size, align, 0);
++	if (!kcp) {
++		splat_vprint(file, name, "Unable to create '%s'\n", "kcp");
++		return -ENOMEM;
++	}
++
++	kcp->kcp_cache =
++		kmem_cache_create(SPLAT_KMEM_CACHE_NAME,
++				  kcp->kcp_size, kcp->kcp_align,
++				  splat_kmem_cache_test_constructor,
++				  splat_kmem_cache_test_destructor,
++				  NULL, kcp, NULL, flags);
++	if (!kcp->kcp_cache) {
++		splat_vprint(file, name,
++			     "Unable to create '%s'\n",
++			     SPLAT_KMEM_CACHE_NAME);
++		rc = -ENOMEM;
++		goto out_free;
++	}
++
++	kcd = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP);
++	if (!kcd) {
++		splat_vprint(file, name,
++			     "Unable to allocate from '%s'\n",
++			     SPLAT_KMEM_CACHE_NAME);
++		rc = -EINVAL;
++		goto out_free;
++	}
++
++	if (!kcd->kcd_flag) {
++		splat_vprint(file, name,
++			     "Failed to run contructor for '%s'\n",
++			     SPLAT_KMEM_CACHE_NAME);
++		rc = -EINVAL;
++		goto out_free;
++	}
++
++	if (kcd->kcd_magic != kcp->kcp_magic) {
++		splat_vprint(file, name,
++			     "Failed to pass private data to constructor "
++			     "for '%s'\n", SPLAT_KMEM_CACHE_NAME);
++		rc = -EINVAL;
++		goto out_free;
++	}
++
++	max = kcp->kcp_count;
++	kmem_cache_free(kcp->kcp_cache, kcd);
++
++	/* Destroy the entire cache which will force destructors to
++	 * run and we can verify one was called for every object */
++	kmem_cache_destroy(kcp->kcp_cache);
++	if (kcp->kcp_count) {
++		splat_vprint(file, name,
++			     "Failed to run destructor on all slab objects "
++			     "for '%s'\n", SPLAT_KMEM_CACHE_NAME);
++		rc = -EINVAL;
++	}
++
++	splat_kmem_cache_test_kcp_free(kcp);
++	splat_vprint(file, name,
++		     "Successfully ran ctors/dtors for %d elements in '%s'\n",
++		     max, SPLAT_KMEM_CACHE_NAME);
++
++	return rc;
++
++out_free:
++	if (kcd)
++		kmem_cache_free(kcp->kcp_cache, kcd);
++
++	if (kcp->kcp_cache)
++		kmem_cache_destroy(kcp->kcp_cache);
++
++	splat_kmem_cache_test_kcp_free(kcp);
++
++	return rc;
++}
++
++static int
++splat_kmem_cache_thread_test(struct file *file, void *arg, char *name,
++			     int size, int alloc, int max_time)
++{
++	kmem_cache_priv_t *kcp;
++	kthread_t *thr;
++	struct timespec start, stop, delta;
++	char cache_name[32];
++	int i, rc = 0;
++
++	kcp = splat_kmem_cache_test_kcp_alloc(file, name, size, 0, alloc);
++	if (!kcp) {
++		splat_vprint(file, name, "Unable to create '%s'\n", "kcp");
++		return -ENOMEM;
++	}
++
++	(void)snprintf(cache_name, 32, "%s-%d-%d",
++		       SPLAT_KMEM_CACHE_NAME, size, alloc);
++	kcp->kcp_cache =
++		kmem_cache_create(cache_name, kcp->kcp_size, 0,
++				  splat_kmem_cache_test_constructor,
++				  splat_kmem_cache_test_destructor,
++				  splat_kmem_cache_test_reclaim,
++				  kcp, NULL, 0);
++	if (!kcp->kcp_cache) {
++		splat_vprint(file, name, "Unable to create '%s'\n", cache_name);
++		rc = -ENOMEM;
++		goto out_kcp;
++	}
++
++	start = current_kernel_time();
++
++	for (i = 0; i < SPLAT_KMEM_THREADS; i++) {
++		thr = thread_create(NULL, 0,
++				    splat_kmem_cache_test_thread,
++				    kcp, 0, &p0, TS_RUN, minclsyspri);
++		if (thr == NULL) {
++			rc = -ESRCH;
++			goto out_cache;
++		}
++	}
++
++	/* Sleep until all threads have started, then set the ready
++	 * flag and wake them all up for maximum concurrency. */
++	wait_event(kcp->kcp_ctl_waitq,
++		   splat_kmem_cache_test_threads(kcp, SPLAT_KMEM_THREADS));
++
++	spin_lock(&kcp->kcp_lock);
++	kcp->kcp_flags |= KCP_FLAG_READY;
++	spin_unlock(&kcp->kcp_lock);
++	wake_up_all(&kcp->kcp_thr_waitq);
++
++	/* Sleep until all thread have finished */
++	wait_event(kcp->kcp_ctl_waitq, splat_kmem_cache_test_threads(kcp, 0));
++
++	stop = current_kernel_time();
++	delta = timespec_sub(stop, start);
++
++	splat_vprint(file, name,
++		     "%-22s %2ld.%09ld\t"
++		     "%lu/%lu/%lu\t%lu/%lu/%lu\n",
++		     kcp->kcp_cache->skc_name,
++		     delta.tv_sec, delta.tv_nsec,
++		     (unsigned long)kcp->kcp_cache->skc_slab_total,
++		     (unsigned long)kcp->kcp_cache->skc_slab_max,
++		     (unsigned long)(kcp->kcp_alloc *
++				    SPLAT_KMEM_THREADS /
++				    SPL_KMEM_CACHE_OBJ_PER_SLAB),
++		     (unsigned long)kcp->kcp_cache->skc_obj_total,
++		     (unsigned long)kcp->kcp_cache->skc_obj_max,
++		     (unsigned long)(kcp->kcp_alloc *
++				     SPLAT_KMEM_THREADS));
++
++	if (delta.tv_sec >= max_time)
++		rc = -ETIME;
++
++	if (!rc && kcp->kcp_rc)
++		rc = kcp->kcp_rc;
++
++out_cache:
++	kmem_cache_destroy(kcp->kcp_cache);
++out_kcp:
++	splat_kmem_cache_test_kcp_free(kcp);
++	return rc;
++}
++
++/* Validate small object cache behavior for dynamic/kmem/vmem caches */
++static int
++splat_kmem_test5(struct file *file, void *arg)
++{
++	char *name = SPLAT_KMEM_TEST5_NAME;
++	int rc;
++
++	rc = splat_kmem_cache_test(file, arg, name, 128, 0, 0);
++	if (rc)
++		return rc;
++
++	rc = splat_kmem_cache_test(file, arg, name, 128, 0, KMC_KMEM);
++	if (rc)
++		return rc;
++
++	return splat_kmem_cache_test(file, arg, name, 128, 0, KMC_VMEM);
++}
++
++/*
++ * Validate large object cache behavior for dynamic/kmem/vmem caches
++ */
++static int
++splat_kmem_test6(struct file *file, void *arg)
++{
++	char *name = SPLAT_KMEM_TEST6_NAME;
++	int rc;
++
++	rc = splat_kmem_cache_test(file, arg, name, 256*1024, 0, 0);
++	if (rc)
++		return rc;
++
++	rc = splat_kmem_cache_test(file, arg, name, 64*1024, 0, KMC_KMEM);
++	if (rc)
++		return rc;
++
++	return splat_kmem_cache_test(file, arg, name, 1024*1024, 0, KMC_VMEM);
++}
++
++/*
++ * Validate object alignment cache behavior for caches
++ */
++static int
++splat_kmem_test7(struct file *file, void *arg)
++{
++	char *name = SPLAT_KMEM_TEST7_NAME;
++	int i, rc;
++
++	for (i = SPL_KMEM_CACHE_ALIGN; i <= PAGE_SIZE; i *= 2) {
++		rc = splat_kmem_cache_test(file, arg, name, 157, i, 0);
++		if (rc)
++			return rc;
++	}
++
++	return rc;
++}
++
++/*
++ * Validate kmem_cache_reap() by requesting the slab cache free any objects
++ * it can.  For a few reasons this may not immediately result in more free
++ * memory even if objects are freed.  First off, due to fragmentation we
++ * may not be able to reclaim any slabs.  Secondly, even if we do we fully
++ * clear some slabs we will not want to immediately reclaim all of them
++ * because we may contend with cache allocations and thrash.  What we want
++ * to see is the slab size decrease more gradually as it becomes clear they
++ * will not be needed.  This should be achievable in less than a minute.
++ * If it takes longer than this something has gone wrong.
++ */
++static int
++splat_kmem_test8(struct file *file, void *arg)
++{
++	kmem_cache_priv_t *kcp;
++	kmem_cache_thread_t *kct;
++	int i, rc = 0;
++
++	kcp = splat_kmem_cache_test_kcp_alloc(file, SPLAT_KMEM_TEST8_NAME,
++					      256, 0, 0);
++	if (!kcp) {
++		splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
++			     "Unable to create '%s'\n", "kcp");
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	kcp->kcp_cache =
++		kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp->kcp_size, 0,
++				  splat_kmem_cache_test_constructor,
++				  splat_kmem_cache_test_destructor,
++				  splat_kmem_cache_test_reclaim,
++				  kcp, NULL, 0);
++	if (!kcp->kcp_cache) {
++		splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
++			   "Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME);
++		rc = -ENOMEM;
++		goto out_kcp;
++	}
++
++	kct = splat_kmem_cache_test_kct_alloc(kcp, 0);
++	if (!kct) {
++		splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
++			     "Unable to create '%s'\n", "kct");
++		rc = -ENOMEM;
++		goto out_cache;
++	}
++
++	rc = splat_kmem_cache_test_kcd_alloc(kcp, kct, SPLAT_KMEM_OBJ_COUNT);
++	if (rc) {
++		splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "Unable to "
++			     "allocate from '%s'\n", SPLAT_KMEM_CACHE_NAME);
++		goto out_kct;
++	}
++
++	for (i = 0; i < 60; i++) {
++		kmem_cache_reap_now(kcp->kcp_cache);
++		splat_kmem_cache_test_debug(file, SPLAT_KMEM_TEST8_NAME, kcp);
++
++		if (kcp->kcp_cache->skc_obj_total == 0)
++			break;
++
++		set_current_state(TASK_INTERRUPTIBLE);
++		schedule_timeout(HZ);
++	}
++
++	if (kcp->kcp_cache->skc_obj_total == 0) {
++		splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
++			"Successfully created %d objects "
++			"in cache %s and reclaimed them\n",
++			SPLAT_KMEM_OBJ_COUNT, SPLAT_KMEM_CACHE_NAME);
++	} else {
++		splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
++			"Failed to reclaim %u/%d objects from cache %s\n",
++			(unsigned)kcp->kcp_cache->skc_obj_total,
++			SPLAT_KMEM_OBJ_COUNT, SPLAT_KMEM_CACHE_NAME);
++		rc = -ENOMEM;
++	}
++
++	/* Cleanup our mess (for failure case of time expiring) */
++	splat_kmem_cache_test_kcd_free(kcp, kct);
++out_kct:
++	splat_kmem_cache_test_kct_free(kcp, kct);
++out_cache:
++	kmem_cache_destroy(kcp->kcp_cache);
++out_kcp:
++	splat_kmem_cache_test_kcp_free(kcp);
++out:
++	return rc;
++}
++
++/* Test cache aging, we have allocated a large number of objects thus
++ * creating a large number of slabs and then free'd them all.  However,
++ * since there should be little memory pressure at the moment those
++ * slabs have not been freed.  What we want to see is the slab size
++ * decrease gradually as it becomes clear they will not be be needed.
++ * This should be achievable in less than minute.  If it takes longer
++ * than this something has gone wrong.
++ */
++static int
++splat_kmem_test9(struct file *file, void *arg)
++{
++	kmem_cache_priv_t *kcp;
++	kmem_cache_thread_t *kct;
++	int i, rc = 0, count = SPLAT_KMEM_OBJ_COUNT * 128;
++
++	kcp = splat_kmem_cache_test_kcp_alloc(file, SPLAT_KMEM_TEST9_NAME,
++					      256, 0, 0);
++	if (!kcp) {
++		splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
++			     "Unable to create '%s'\n", "kcp");
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	kcp->kcp_cache =
++		kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp->kcp_size, 0,
++				  splat_kmem_cache_test_constructor,
++				  splat_kmem_cache_test_destructor,
++				  NULL, kcp, NULL, 0);
++	if (!kcp->kcp_cache) {
++		splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
++			   "Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME);
++		rc = -ENOMEM;
++		goto out_kcp;
++	}
++
++	kct = splat_kmem_cache_test_kct_alloc(kcp, 0);
++	if (!kct) {
++		splat_vprint(file, SPLAT_KMEM_TEST8_NAME,
++			     "Unable to create '%s'\n", "kct");
++		rc = -ENOMEM;
++		goto out_cache;
++	}
++
++	rc = splat_kmem_cache_test_kcd_alloc(kcp, kct, count);
++	if (rc) {
++		splat_vprint(file, SPLAT_KMEM_TEST9_NAME, "Unable to "
++			     "allocate from '%s'\n", SPLAT_KMEM_CACHE_NAME);
++		goto out_kct;
++	}
++
++	splat_kmem_cache_test_kcd_free(kcp, kct);
++
++	for (i = 0; i < 60; i++) {
++		splat_kmem_cache_test_debug(file, SPLAT_KMEM_TEST9_NAME, kcp);
++
++		if (kcp->kcp_cache->skc_obj_total == 0)
++			break;
++
++		set_current_state(TASK_INTERRUPTIBLE);
++		schedule_timeout(HZ);
++	}
++
++	if (kcp->kcp_cache->skc_obj_total == 0) {
++		splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
++			"Successfully created %d objects "
++			"in cache %s and reclaimed them\n",
++			count, SPLAT_KMEM_CACHE_NAME);
++	} else {
++		splat_vprint(file, SPLAT_KMEM_TEST9_NAME,
++			"Failed to reclaim %u/%d objects from cache %s\n",
++			(unsigned)kcp->kcp_cache->skc_obj_total, count,
++			SPLAT_KMEM_CACHE_NAME);
++		rc = -ENOMEM;
++	}
++
++out_kct:
++	splat_kmem_cache_test_kct_free(kcp, kct);
++out_cache:
++	kmem_cache_destroy(kcp->kcp_cache);
++out_kcp:
++	splat_kmem_cache_test_kcp_free(kcp);
++out:
++	return rc;
++}
++
++/*
++ * This test creates N threads with a shared kmem cache.  They then all
++ * concurrently allocate and free from the cache to stress the locking and
++ * concurrent cache performance.  If any one test takes longer than 5
++ * seconds to complete it is treated as a failure and may indicate a
++ * performance regression.  On my test system no one test takes more
++ * than 1 second to complete so a 5x slowdown likely a problem.
++ */
++static int
++splat_kmem_test10(struct file *file, void *arg)
++{
++	uint64_t size, alloc, rc = 0;
++
++	for (size = 32; size <= 1024*1024; size *= 2) {
++
++		splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s  %s", "name",
++			     "time (sec)\tslabs       \tobjs	\thash\n");
++		splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s  %s", "",
++			     "	  \ttot/max/calc\ttot/max/calc\n");
++
++		for (alloc = 1; alloc <= 1024; alloc *= 2) {
++
++			/* Skip tests which exceed available memory.  We
++			 * leverage availrmem here for some extra testing */
++			if (size * alloc * SPLAT_KMEM_THREADS > availrmem / 2)
++				continue;
++
++			rc = splat_kmem_cache_thread_test(file, arg,
++				SPLAT_KMEM_TEST10_NAME, size, alloc, 5);
++			if (rc)
++				break;
++		}
++	}
++
++	return rc;
++}
++
++#if 0
++/*
++ * This test creates N threads with a shared kmem cache which overcommits
++ * memory by 4x.  This makes it impossible for the slab to satify the
++ * thread requirements without having its reclaim hook run which will
++ * free objects back for use.  This behavior is triggered by the linum VM
++ * detecting a low memory condition on the node and invoking the shrinkers.
++ * This should allow all the threads to complete while avoiding deadlock
++ * and for the most part out of memory events.  This is very tough on the
++ * system so it is possible the test app may get oom'ed.  This particular
++ * test has proven troublesome on 32-bit archs with limited virtual
++ * address space so it only run on 64-bit systems.
++ */
++static int
++splat_kmem_test11(struct file *file, void *arg)
++{
++	uint64_t size, alloc, rc;
++
++	size = 8 * 1024;
++	alloc = ((4 * physmem * PAGE_SIZE) / size) / SPLAT_KMEM_THREADS;
++
++	splat_vprint(file, SPLAT_KMEM_TEST11_NAME, "%-22s  %s", "name",
++		     "time (sec)\tslabs       \tobjs	\thash\n");
++	splat_vprint(file, SPLAT_KMEM_TEST11_NAME, "%-22s  %s", "",
++		     "	  \ttot/max/calc\ttot/max/calc\n");
++
++	rc = splat_kmem_cache_thread_test(file, arg,
++		SPLAT_KMEM_TEST11_NAME, size, alloc, 60);
++
++	return rc;
++}
++#endif
++
++/*
++ * Check vmem_size() behavior by acquiring the alloc/free/total vmem
++ * space, then allocate a known buffer size from vmem space.  We can
++ * then check that vmem_size() values were updated properly with in
++ * a fairly small tolerence.  The tolerance is important because we
++ * are not the only vmem consumer on the system.  Other unrelated
++ * allocations might occur during the small test window.  The vmem
++ * allocation itself may also add in a little extra private space to
++ * the buffer.  Finally, verify total space always remains unchanged.
++ */
++static int
++splat_kmem_test12(struct file *file, void *arg)
++{
++	size_t alloc1, free1, total1;
++	size_t alloc2, free2, total2;
++	int size = 8*1024*1024;
++	void *ptr;
++
++	alloc1 = vmem_size(NULL, VMEM_ALLOC);
++	free1  = vmem_size(NULL, VMEM_FREE);
++	total1 = vmem_size(NULL, VMEM_ALLOC | VMEM_FREE);
++	splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Vmem alloc=%lu "
++		     "free=%lu total=%lu\n", (unsigned long)alloc1,
++		     (unsigned long)free1, (unsigned long)total1);
++
++	splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Alloc %d bytes\n", size);
++	ptr = vmem_alloc(size, KM_SLEEP);
++	if (!ptr) {
++		splat_vprint(file, SPLAT_KMEM_TEST12_NAME,
++		             "Failed to alloc %d bytes\n", size);
++		return -ENOMEM;
++	}
++
++	alloc2 = vmem_size(NULL, VMEM_ALLOC);
++	free2  = vmem_size(NULL, VMEM_FREE);
++	total2 = vmem_size(NULL, VMEM_ALLOC | VMEM_FREE);
++	splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Vmem alloc=%lu "
++		     "free=%lu total=%lu\n", (unsigned long)alloc2,
++		     (unsigned long)free2, (unsigned long)total2);
++
++	splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Free %d bytes\n", size);
++	vmem_free(ptr, size);
++	if (alloc2 < (alloc1 + size - (size / 100)) ||
++	    alloc2 > (alloc1 + size + (size / 100))) {
++		splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Failed "
++			     "VMEM_ALLOC size: %lu != %lu+%d (+/- 1%%)\n",
++		             (unsigned long)alloc2,(unsigned long)alloc1,size);
++		return -ERANGE;
++	}
++
++	if (free2 < (free1 - size - (size / 100)) ||
++	    free2 > (free1 - size + (size / 100))) {
++		splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Failed "
++			     "VMEM_FREE size: %lu != %lu-%d (+/- 1%%)\n",
++		             (unsigned long)free2, (unsigned long)free1, size);
++		return -ERANGE;
++	}
++
++	if (total1 != total2) {
++		splat_vprint(file, SPLAT_KMEM_TEST12_NAME, "Failed "
++			     "VMEM_ALLOC | VMEM_FREE not constant: "
++		             "%lu != %lu\n", (unsigned long)total2,
++			     (unsigned long)total1);
++		return -ERANGE;
++	}
++
++	splat_vprint(file, SPLAT_KMEM_TEST12_NAME,
++	             "VMEM_ALLOC within tolerance: ~%ld%% (%ld/%d)\n",
++	             (long)abs(alloc1 + (long)size - alloc2) * 100 / (long)size,
++	             (long)abs(alloc1 + (long)size - alloc2), size);
++	splat_vprint(file, SPLAT_KMEM_TEST12_NAME,
++	             "VMEM_FREE within tolerance:  ~%ld%% (%ld/%d)\n",
++	             (long)abs((free1 - (long)size) - free2) * 100 / (long)size,
++	             (long)abs((free1 - (long)size) - free2), size);
++
++	return 0;
++}
++
++typedef struct dummy_page {
++	struct list_head dp_list;
++	char             dp_pad[PAGE_SIZE - sizeof(struct list_head)];
++} dummy_page_t;
++
++/*
++ * This test is designed to verify that direct reclaim is functioning as
++ * expected.  We allocate a large number of objects thus creating a large
++ * number of slabs.  We then apply memory pressure and expect that the
++ * direct reclaim path can easily recover those slabs.  The registered
++ * reclaim function will free the objects and the slab shrinker will call
++ * it repeatedly until at least a single slab can be freed.
++ *
++ * Note it may not be possible to reclaim every last slab via direct reclaim
++ * without a failure because the shrinker_rwsem may be contended.  For this
++ * reason, quickly reclaiming 3/4 of the slabs is considered a success.
++ *
++ * This should all be possible within 10 seconds.  For reference, on a
++ * system with 2G of memory this test takes roughly 0.2 seconds to run.
++ * It may take longer on larger memory systems but should still easily
++ * complete in the alloted 10 seconds.
++ */
++static int
++splat_kmem_test13(struct file *file, void *arg)
++{
++	kmem_cache_priv_t *kcp;
++	kmem_cache_thread_t *kct;
++	dummy_page_t *dp;
++	struct list_head list;
++	struct timespec start, delta = { 0, 0 };
++	int size, count, slabs, fails = 0;
++	int i, rc = 0, max_time = 10;
++
++	size = 128 * 1024;
++	count = ((physmem * PAGE_SIZE) / 4 / size);
++
++	kcp = splat_kmem_cache_test_kcp_alloc(file, SPLAT_KMEM_TEST13_NAME,
++	                                      size, 0, 0);
++	if (!kcp) {
++		splat_vprint(file, SPLAT_KMEM_TEST13_NAME,
++		             "Unable to create '%s'\n", "kcp");
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	kcp->kcp_cache =
++		kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp->kcp_size, 0,
++		                  splat_kmem_cache_test_constructor,
++		                  splat_kmem_cache_test_destructor,
++				  splat_kmem_cache_test_reclaim,
++		                  kcp, NULL, 0);
++	if (!kcp->kcp_cache) {
++		splat_vprint(file, SPLAT_KMEM_TEST13_NAME,
++		             "Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME);
++		rc = -ENOMEM;
++		goto out_kcp;
++	}
++
++	kct = splat_kmem_cache_test_kct_alloc(kcp, 0);
++	if (!kct) {
++		splat_vprint(file, SPLAT_KMEM_TEST13_NAME,
++			     "Unable to create '%s'\n", "kct");
++		rc = -ENOMEM;
++		goto out_cache;
++	}
++
++	rc = splat_kmem_cache_test_kcd_alloc(kcp, kct, count);
++	if (rc) {
++		splat_vprint(file, SPLAT_KMEM_TEST13_NAME, "Unable to "
++			     "allocate from '%s'\n", SPLAT_KMEM_CACHE_NAME);
++		goto out_kct;
++	}
++
++	i = 0;
++	slabs = kcp->kcp_cache->skc_slab_total;
++	INIT_LIST_HEAD(&list);
++	start = current_kernel_time();
++
++	/* Apply memory pressure */
++	while (kcp->kcp_cache->skc_slab_total > (slabs >> 2)) {
++
++		if ((i % 10000) == 0)
++			splat_kmem_cache_test_debug(
++			    file, SPLAT_KMEM_TEST13_NAME, kcp);
++
++		delta = timespec_sub(current_kernel_time(), start);
++		if (delta.tv_sec >= max_time) {
++			splat_vprint(file, SPLAT_KMEM_TEST13_NAME,
++				     "Failed to reclaim 3/4 of cache in %ds, "
++				     "%u/%u slabs remain\n", max_time,
++				     (unsigned)kcp->kcp_cache->skc_slab_total,
++				     slabs);
++			rc = -ETIME;
++			break;
++		}
++
++		dp = (dummy_page_t *)__get_free_page(GFP_KERNEL | __GFP_NORETRY);
++		if (!dp) {
++			fails++;
++			splat_vprint(file, SPLAT_KMEM_TEST13_NAME,
++				     "Failed (%d) to allocate page with %u "
++				     "slabs still in the cache\n", fails,
++				     (unsigned)kcp->kcp_cache->skc_slab_total);
++			continue;
++		}
++
++		list_add(&dp->dp_list, &list);
++		i++;
++	}
++
++	if (rc == 0)
++		splat_vprint(file, SPLAT_KMEM_TEST13_NAME,
++			     "Successfully created %u slabs and with %d alloc "
++			     "failures reclaimed 3/4 of them in %d.%03ds\n",
++			     slabs, fails,
++			     (int)delta.tv_sec, (int)delta.tv_nsec / 1000000);
++
++	/* Release memory pressure pages */
++	while (!list_empty(&list)) {
++		dp = list_entry(list.next, dummy_page_t, dp_list);
++		list_del_init(&dp->dp_list);
++		free_page((unsigned long)dp);
++	}
++
++	/* Release remaining kmem cache objects */
++	splat_kmem_cache_test_kcd_free(kcp, kct);
++out_kct:
++	splat_kmem_cache_test_kct_free(kcp, kct);
++out_cache:
++	kmem_cache_destroy(kcp->kcp_cache);
++out_kcp:
++	splat_kmem_cache_test_kcp_free(kcp);
++out:
++	return rc;
++}
++
++splat_subsystem_t *
++splat_kmem_init(void)
++{
++	splat_subsystem_t *sub;
++
++	sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++	if (sub == NULL)
++		return NULL;
++
++	memset(sub, 0, sizeof(*sub));
++	strncpy(sub->desc.name, SPLAT_KMEM_NAME, SPLAT_NAME_SIZE);
++	strncpy(sub->desc.desc, SPLAT_KMEM_DESC, SPLAT_DESC_SIZE);
++	INIT_LIST_HEAD(&sub->subsystem_list);
++	INIT_LIST_HEAD(&sub->test_list);
++	spin_lock_init(&sub->test_lock);
++	sub->desc.id = SPLAT_SUBSYSTEM_KMEM;
++
++	SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST1_NAME, SPLAT_KMEM_TEST1_DESC,
++			SPLAT_KMEM_TEST1_ID, splat_kmem_test1);
++	SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST2_NAME, SPLAT_KMEM_TEST2_DESC,
++			SPLAT_KMEM_TEST2_ID, splat_kmem_test2);
++	SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST3_NAME, SPLAT_KMEM_TEST3_DESC,
++			SPLAT_KMEM_TEST3_ID, splat_kmem_test3);
++	SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST4_NAME, SPLAT_KMEM_TEST4_DESC,
++			SPLAT_KMEM_TEST4_ID, splat_kmem_test4);
++	SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST5_NAME, SPLAT_KMEM_TEST5_DESC,
++			SPLAT_KMEM_TEST5_ID, splat_kmem_test5);
++	SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST6_NAME, SPLAT_KMEM_TEST6_DESC,
++			SPLAT_KMEM_TEST6_ID, splat_kmem_test6);
++	SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST7_NAME, SPLAT_KMEM_TEST7_DESC,
++			SPLAT_KMEM_TEST7_ID, splat_kmem_test7);
++	SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST8_NAME, SPLAT_KMEM_TEST8_DESC,
++			SPLAT_KMEM_TEST8_ID, splat_kmem_test8);
++	SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST9_NAME, SPLAT_KMEM_TEST9_DESC,
++			SPLAT_KMEM_TEST9_ID, splat_kmem_test9);
++	SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST10_NAME, SPLAT_KMEM_TEST10_DESC,
++			SPLAT_KMEM_TEST10_ID, splat_kmem_test10);
++#if 0
++	SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST11_NAME, SPLAT_KMEM_TEST11_DESC,
++			SPLAT_KMEM_TEST11_ID, splat_kmem_test11);
++#endif
++	SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST12_NAME, SPLAT_KMEM_TEST12_DESC,
++			SPLAT_KMEM_TEST12_ID, splat_kmem_test12);
++	SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST13_NAME, SPLAT_KMEM_TEST13_DESC,
++			SPLAT_KMEM_TEST13_ID, splat_kmem_test13);
++
++	return sub;
++}
++
++void
++splat_kmem_fini(splat_subsystem_t *sub)
++{
++	ASSERT(sub);
++	SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST13_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST12_ID);
++#if 0
++	SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST11_ID);
++#endif
++	SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST10_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST9_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST8_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST7_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST6_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST5_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST4_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST3_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST2_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST1_ID);
++
++	kfree(sub);
++}
++
++int
++splat_kmem_id(void) {
++	return SPLAT_SUBSYSTEM_KMEM;
++}
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-kobj.c linux-3.2.33-go/spl/splat/splat-kobj.c
+--- linux-3.2.33-go.orig/spl/splat/splat-kobj.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-kobj.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,166 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Kobj Tests.
++\*****************************************************************************/
++
++#include <sys/kobj.h>
++#include "splat-internal.h"
++
++#define SPLAT_KOBJ_NAME			"kobj"
++#define SPLAT_KOBJ_DESC			"Kernel Kobj Tests"
++
++#define SPLAT_KOBJ_TEST1_ID		0x0a01
++#define SPLAT_KOBJ_TEST1_NAME		"open"
++#define SPLAT_KOBJ_TEST1_DESC		"Kobj Open/Close Test"
++
++#define SPLAT_KOBJ_TEST2_ID		0x0a02
++#define SPLAT_KOBJ_TEST2_NAME		"size/read"
++#define SPLAT_KOBJ_TEST2_DESC		"Kobj Size/Read Test"
++
++#define SPLAT_KOBJ_TEST_FILE		"/etc/fstab"
++
++static int
++splat_kobj_test1(struct file *file, void *arg)
++{
++	struct _buf *f;
++
++	f = kobj_open_file(SPLAT_KOBJ_TEST_FILE);
++	if (f == (struct _buf *)-1) {
++		splat_vprint(file, SPLAT_KOBJ_TEST1_NAME, "Failed to open "
++			     "test file: %s\n", SPLAT_KOBJ_TEST_FILE);
++		return -ENOENT;
++	}
++
++	kobj_close_file(f);
++	splat_vprint(file, SPLAT_KOBJ_TEST1_NAME, "Successfully opened and "
++		     "closed test file: %s\n", SPLAT_KOBJ_TEST_FILE);
++
++        return 0;
++} /* splat_kobj_test1() */
++
++static int
++splat_kobj_test2(struct file *file, void *arg)
++{
++	struct _buf *f;
++	char *buf;
++	uint64_t size;
++	int rc;
++
++	f = kobj_open_file(SPLAT_KOBJ_TEST_FILE);
++	if (f == (struct _buf *)-1) {
++		splat_vprint(file, SPLAT_KOBJ_TEST2_NAME, "Failed to open "
++			     "test file: %s\n", SPLAT_KOBJ_TEST_FILE);
++		return -ENOENT;
++	}
++
++	rc = kobj_get_filesize(f, &size);
++	if (rc) {
++		splat_vprint(file, SPLAT_KOBJ_TEST2_NAME, "Failed stat of "
++			     "test file: %s (%d)\n", SPLAT_KOBJ_TEST_FILE, rc);
++		goto out;
++	}
++
++	buf = kmalloc(size + 1, GFP_KERNEL);
++	if (!buf) {
++		rc = -ENOMEM;
++		splat_vprint(file, SPLAT_KOBJ_TEST2_NAME, "Failed to alloc "
++			     "%lld bytes for tmp buffer (%d)\n",
++			     (long long)size, rc);
++		goto out;
++	}
++
++	memset(buf, 0, size + 1);
++	rc = kobj_read_file(f, buf, size, 0);
++	if (rc < 0) {
++		splat_vprint(file, SPLAT_KOBJ_TEST2_NAME, "Failed read of "
++			     "test file: %s (%d)\n", SPLAT_KOBJ_TEST_FILE, rc);
++		goto out2;
++	}
++
++	/* Validate we read as many bytes as expected based on the stat.  This
++	 * isn't a perfect test since we didn't create the file however it is
++	 * pretty unlikely there are garbage characters in your /etc/fstab */
++	if (size != (uint64_t)strlen(buf)) {
++		rc = -EFBIG;
++		splat_vprint(file, SPLAT_KOBJ_TEST2_NAME, "Stat'ed size "
++			     "(%lld) does not match number of bytes read "
++			     "(%lld)\n", (long long)size,
++			     (long long)strlen(buf));
++		goto out2;
++	}
++
++	rc = 0;
++	splat_vprint(file, SPLAT_KOBJ_TEST2_NAME, "\n%s\n", buf);
++	splat_vprint(file, SPLAT_KOBJ_TEST2_NAME, "Successfully stat'ed "
++		     "and read expected number of bytes (%lld) from test "
++		     "file: %s\n", (long long)size, SPLAT_KOBJ_TEST_FILE);
++out2:
++	kfree(buf);
++out:
++	kobj_close_file(f);
++
++        return rc;
++} /* splat_kobj_test2() */
++
++splat_subsystem_t *
++splat_kobj_init(void)
++{
++        splat_subsystem_t *sub;
++
++        sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++        if (sub == NULL)
++                return NULL;
++
++        memset(sub, 0, sizeof(*sub));
++        strncpy(sub->desc.name, SPLAT_KOBJ_NAME, SPLAT_NAME_SIZE);
++	strncpy(sub->desc.desc, SPLAT_KOBJ_DESC, SPLAT_DESC_SIZE);
++        INIT_LIST_HEAD(&sub->subsystem_list);
++	INIT_LIST_HEAD(&sub->test_list);
++        spin_lock_init(&sub->test_lock);
++        sub->desc.id = SPLAT_SUBSYSTEM_KOBJ;
++
++        SPLAT_TEST_INIT(sub, SPLAT_KOBJ_TEST1_NAME, SPLAT_KOBJ_TEST1_DESC,
++	              SPLAT_KOBJ_TEST1_ID, splat_kobj_test1);
++        SPLAT_TEST_INIT(sub, SPLAT_KOBJ_TEST2_NAME, SPLAT_KOBJ_TEST2_DESC,
++	              SPLAT_KOBJ_TEST2_ID, splat_kobj_test2);
++
++        return sub;
++} /* splat_kobj_init() */
++
++void
++splat_kobj_fini(splat_subsystem_t *sub)
++{
++        ASSERT(sub);
++
++        SPLAT_TEST_FINI(sub, SPLAT_KOBJ_TEST2_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_KOBJ_TEST1_ID);
++
++        kfree(sub);
++} /* splat_kobj_fini() */
++
++int
++splat_kobj_id(void)
++{
++        return SPLAT_SUBSYSTEM_KOBJ;
++} /* splat_kobj_id() */
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-linux.c linux-3.2.33-go/spl/splat/splat-linux.c
+--- linux-3.2.33-go.orig/spl/splat/splat-linux.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-linux.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,242 @@
++/*****************************************************************************\
++ *  Copyright (C) 2011 Lawrence Livermore National Security, LLC.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Kernel Compatibility Tests.
++\*****************************************************************************/
++
++#include <sys/kmem.h>
++#include "splat-internal.h"
++
++#define SPLAT_LINUX_NAME		"linux"
++#define SPLAT_LINUX_DESC		"Kernel Compatibility Tests"
++
++#define SPLAT_LINUX_TEST1_ID		0x1001
++#define SPLAT_LINUX_TEST1_NAME		"shrink_dcache"
++#define SPLAT_LINUX_TEST1_DESC		"Shrink dcache test"
++
++#define SPLAT_LINUX_TEST2_ID		0x1002
++#define SPLAT_LINUX_TEST2_NAME		"shrink_icache"
++#define SPLAT_LINUX_TEST2_DESC		"Shrink icache test"
++
++#define SPLAT_LINUX_TEST3_ID		0x1003
++#define SPLAT_LINUX_TEST3_NAME		"shrinker"
++#define SPLAT_LINUX_TEST3_DESC		"Shrinker test"
++
++
++/*
++ * Attempt to shrink the dcache memory.  This is simply a functional
++ * to ensure we can correctly call the shrinker.  We don't check that
++ * the cache actually decreased because we have no control over what
++ * else may be running on the system.  This avoid false positives.
++ */
++static int
++splat_linux_test1(struct file *file, void *arg)
++{
++	int remain_before;
++	int remain_after;
++
++	remain_before = shrink_dcache_memory(0, GFP_KERNEL);
++	remain_after = shrink_dcache_memory(KMC_REAP_CHUNK, GFP_KERNEL);
++
++	splat_vprint(file, SPLAT_LINUX_TEST1_NAME,
++	    "Shrink dcache memory, remain %d -> %d\n",
++	    remain_before, remain_after);
++
++	return 0;
++}
++
++/*
++ * Attempt to shrink the icache memory.  This is simply a functional
++ * to ensure we can correctly call the shrinker.  We don't check that
++ * the cache actually decreased because we have no control over what
++ * else may be running on the system.  This avoid false positives.
++ */
++static int
++splat_linux_test2(struct file *file, void *arg)
++{
++	int remain_before;
++	int remain_after;
++
++	remain_before = shrink_icache_memory(0, GFP_KERNEL);
++	remain_after = shrink_icache_memory(KMC_REAP_CHUNK, GFP_KERNEL);
++
++	splat_vprint(file, SPLAT_LINUX_TEST2_NAME,
++	    "Shrink icache memory, remain %d -> %d\n",
++	    remain_before, remain_after);
++
++	return 0;
++}
++
++SPL_SHRINKER_CALLBACK_FWD_DECLARE(splat_linux_shrinker_fn);
++SPL_SHRINKER_DECLARE(splat_linux_shrinker, splat_linux_shrinker_fn, 1);
++static unsigned long splat_linux_shrinker_size = 0;
++static struct file *splat_linux_shrinker_file = NULL;
++
++static int
++__splat_linux_shrinker_fn(struct shrinker *shrink, struct shrink_control *sc)
++{
++	static int failsafe = 0;
++
++	if (sc->nr_to_scan) {
++		splat_linux_shrinker_size = splat_linux_shrinker_size -
++		    MIN(sc->nr_to_scan, splat_linux_shrinker_size);
++
++		splat_vprint(splat_linux_shrinker_file, SPLAT_LINUX_TEST3_NAME,
++		    "Reclaimed %lu objects, size now %lu\n",
++		    sc->nr_to_scan, splat_linux_shrinker_size);
++	} else {
++		splat_vprint(splat_linux_shrinker_file, SPLAT_LINUX_TEST3_NAME,
++		    "Cache size is %lu\n", splat_linux_shrinker_size);
++	}
++
++	/* Far more calls than expected abort drop_slab as a failsafe */
++	if ((++failsafe % 1000) == 0) {
++		splat_vprint(splat_linux_shrinker_file, SPLAT_LINUX_TEST3_NAME,
++		    "Far more calls than expected (%d), size now %lu\n",
++		   failsafe, splat_linux_shrinker_size);
++		return -1;
++	}
++
++	return (int)splat_linux_shrinker_size;
++}
++
++SPL_SHRINKER_CALLBACK_WRAPPER(splat_linux_shrinker_fn);
++
++#define DROP_SLAB_CMD \
++	"exec 0</dev/null " \
++	"     1>/proc/sys/vm/drop_caches " \
++	"     2>/dev/null; " \
++	"echo 2"
++
++static int
++splat_linux_drop_slab(struct file *file)
++{
++	char *argv[] = { "/bin/sh",
++	                 "-c",
++	                 DROP_SLAB_CMD,
++	                 NULL };
++	char *envp[] = { "HOME=/",
++	                 "TERM=linux",
++	                 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
++	                 NULL };
++	int rc;
++
++	rc = call_usermodehelper(argv[0], argv, envp, 1);
++	if (rc)
++		splat_vprint(file, SPLAT_LINUX_TEST3_NAME,
++	            "Failed user helper '%s %s %s', rc = %d\n",
++		    argv[0], argv[1], argv[2], rc);
++
++	return rc;
++}
++
++/*
++ * Verify correct shrinker functionality by registering a shrinker
++ * with the required compatibility macros.  We then use a simulated
++ * cache and force the systems caches to be dropped.  The shrinker
++ * should be repeatedly called until it reports that the cache is
++ * empty.  It is then cleanly unregistered and correct behavior is
++ * verified.  There are now four slightly different supported shrinker
++ * API and this test ensures the compatibility code is correct.
++ */
++static int
++splat_linux_test3(struct file *file, void *arg)
++{
++	int rc = -EINVAL;
++
++	/*
++	 * Globals used by the shrinker, it is not safe to run this
++	 * test concurrently this is a safe assumption for SPLAT tests.
++	 * Regardless we do some minimal checking a bail if concurrent
++	 * use is detected.
++	 */
++	if (splat_linux_shrinker_size || splat_linux_shrinker_file) {
++		splat_vprint(file, SPLAT_LINUX_TEST3_NAME,
++	            "Failed due to concurrent shrinker test, rc = %d\n", rc);
++		return (rc);
++	}
++
++	splat_linux_shrinker_size = 1024;
++	splat_linux_shrinker_file = file;
++
++	spl_register_shrinker(&splat_linux_shrinker);
++	rc = splat_linux_drop_slab(file);
++	if (rc)
++		goto out;
++
++	if (splat_linux_shrinker_size != 0) {
++		splat_vprint(file, SPLAT_LINUX_TEST3_NAME,
++	            "Failed cache was not shrunk to 0, size now %lu",
++		    splat_linux_shrinker_size);
++		rc = -EDOM;
++	}
++out:
++	spl_unregister_shrinker(&splat_linux_shrinker);
++
++	splat_linux_shrinker_size = 0;
++	splat_linux_shrinker_file = NULL;
++
++	return rc;
++}
++
++splat_subsystem_t *
++splat_linux_init(void)
++{
++	splat_subsystem_t *sub;
++
++	sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++	if (sub == NULL)
++		return NULL;
++
++	memset(sub, 0, sizeof(*sub));
++	strncpy(sub->desc.name, SPLAT_LINUX_NAME, SPLAT_NAME_SIZE);
++	strncpy(sub->desc.desc, SPLAT_LINUX_DESC, SPLAT_DESC_SIZE);
++	INIT_LIST_HEAD(&sub->subsystem_list);
++	INIT_LIST_HEAD(&sub->test_list);
++	spin_lock_init(&sub->test_lock);
++	sub->desc.id = SPLAT_SUBSYSTEM_LINUX;
++
++	SPLAT_TEST_INIT(sub, SPLAT_LINUX_TEST1_NAME, SPLAT_LINUX_TEST1_DESC,
++			SPLAT_LINUX_TEST1_ID, splat_linux_test1);
++	SPLAT_TEST_INIT(sub, SPLAT_LINUX_TEST2_NAME, SPLAT_LINUX_TEST2_DESC,
++			SPLAT_LINUX_TEST2_ID, splat_linux_test2);
++	SPLAT_TEST_INIT(sub, SPLAT_LINUX_TEST3_NAME, SPLAT_LINUX_TEST3_DESC,
++			SPLAT_LINUX_TEST3_ID, splat_linux_test3);
++
++	return sub;
++}
++
++void
++splat_linux_fini(splat_subsystem_t *sub)
++{
++	ASSERT(sub);
++	SPLAT_TEST_FINI(sub, SPLAT_LINUX_TEST3_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_LINUX_TEST2_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_LINUX_TEST1_ID);
++
++	kfree(sub);
++}
++
++int
++splat_linux_id(void) {
++	return SPLAT_SUBSYSTEM_LINUX;
++}
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-list.c linux-3.2.33-go/spl/splat/splat-list.c
+--- linux-3.2.33-go.orig/spl/splat/splat-list.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-list.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,475 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) List Tests.
++\*****************************************************************************/
++
++#include <sys/list.h>
++#include <sys/kmem.h>
++#include "splat-internal.h"
++
++#define SPLAT_LIST_NAME			"list"
++#define SPLAT_LIST_DESC			"Kernel List Tests"
++
++#define SPLAT_LIST_TEST1_ID		0x0c01
++#define SPLAT_LIST_TEST1_NAME		"create/destroy"
++#define SPLAT_LIST_TEST1_DESC		"Create/destroy Test"
++
++#define SPLAT_LIST_TEST2_ID		0x0c02
++#define SPLAT_LIST_TEST2_NAME		"ins/rm head"
++#define SPLAT_LIST_TEST2_DESC		"Insert/remove head Test"
++
++#define SPLAT_LIST_TEST3_ID		0x0c03
++#define SPLAT_LIST_TEST3_NAME		"ins/rm tail"
++#define SPLAT_LIST_TEST3_DESC		"Insert/remove tail Test"
++
++#define SPLAT_LIST_TEST4_ID		0x0c04
++#define SPLAT_LIST_TEST4_NAME		"insert_after"
++#define SPLAT_LIST_TEST4_DESC		"Insert_after Test"
++
++#define SPLAT_LIST_TEST5_ID		0x0c05
++#define SPLAT_LIST_TEST5_NAME		"insert_before"
++#define SPLAT_LIST_TEST5_DESC		"Insert_before Test"
++
++#define SPLAT_LIST_TEST6_ID		0x0c06
++#define SPLAT_LIST_TEST6_NAME		"remove"
++#define SPLAT_LIST_TEST6_DESC		"Remove Test"
++
++#define SPLAT_LIST_TEST7_ID		0x0c7
++#define SPLAT_LIST_TEST7_NAME		"active"
++#define SPLAT_LIST_TEST7_DESC		"Active Test"
++
++/* It is important that li_node is not the first element, this
++ * ensures the list_d2l/list_object macros are working correctly. */
++typedef struct list_item {
++	int li_data;
++	list_node_t li_node;
++} list_item_t;
++
++#define LIST_ORDER_STACK		0
++#define LIST_ORDER_QUEUE		1
++
++static int
++splat_list_test1(struct file *file, void *arg)
++{
++	list_t list;
++
++	splat_vprint(file, SPLAT_LIST_TEST1_NAME, "Creating list\n%s", "");
++	list_create(&list, sizeof(list_item_t), offsetof(list_item_t, li_node));
++
++	if (!list_is_empty(&list)) {
++		splat_vprint(file, SPLAT_LIST_TEST1_NAME,
++			     "New list NOT empty%s\n", "");
++		/* list_destroy() intentionally skipped to avoid assert */
++		return -EEXIST;
++	}
++
++	splat_vprint(file, SPLAT_LIST_TEST1_NAME, "Destroying list\n%s", "");
++	list_destroy(&list);
++
++	/* Validate the list has been destroyed */
++	if (list_link_active(&list.list_head)) {
++		splat_vprint(file, SPLAT_LIST_TEST1_NAME,
++			     "Destroyed list still active%s", "");
++		return -EIO;
++	}
++
++        return 0;
++}
++
++static int
++splat_list_validate(list_t *list, int size, int order, int mult)
++{
++	list_item_t *li;
++	int i;
++
++	/* Walk all items in list from head to verify stack or queue
++	 * ordering.  We bound the for loop by size+1 to ensure that
++	 * we still terminate if there is list corruption.  We also
++	 * intentionally make things a little more complex than they
++	 * need to be by using list_head/list_next for queues, and
++	 * list_tail/list_prev for stacks.  This is simply done for
++	 * coverage and to ensure these function are working right.
++	 */
++	for (i = 0, li = (order ? list_head(list) : list_tail(list));
++	     i < size + 1 && li != NULL;
++	     i++, li = (order ? list_next(list, li) : list_prev(list, li)))
++		if (li->li_data != i * mult)
++			return -EIDRM;
++
++	if (i != size)
++		return -E2BIG;
++
++	return 0;
++}
++
++static int
++splat_list_test2(struct file *file, void *arg)
++{
++	list_t list;
++	list_item_t *li;
++	int i, list_size = 8, rc = 0;
++
++	splat_vprint(file, SPLAT_LIST_TEST2_NAME, "Creating list\n%s", "");
++	list_create(&list, sizeof(list_item_t), offsetof(list_item_t, li_node));
++
++	/* Insert all items at the list head to form a stack */
++	splat_vprint(file, SPLAT_LIST_TEST2_NAME,
++		     "Adding %d items to list head\n", list_size);
++	for (i = 0; i < list_size; i++) {
++		li = kmem_alloc(sizeof(list_item_t), KM_SLEEP);
++		if (li == NULL) {
++			rc = -ENOMEM;
++			goto out;
++		}
++
++		list_link_init(&li->li_node);
++		li->li_data = i;
++		list_insert_head(&list, li);
++	}
++
++	splat_vprint(file, SPLAT_LIST_TEST2_NAME,
++		     "Validating %d item list is a stack\n", list_size);
++	rc = splat_list_validate(&list, list_size, LIST_ORDER_STACK, 1);
++	if (rc)
++		splat_vprint(file, SPLAT_LIST_TEST2_NAME,
++			     "List validation failed, %d\n", rc);
++out:
++	/* Remove all items */
++	splat_vprint(file, SPLAT_LIST_TEST2_NAME,
++		     "Removing %d items from list head\n", list_size);
++	while ((li = list_remove_head(&list)))
++		kmem_free(li, sizeof(list_item_t));
++
++	splat_vprint(file, SPLAT_LIST_TEST2_NAME, "Destroying list\n%s", "");
++	list_destroy(&list);
++
++        return rc;
++}
++
++static int
++splat_list_test3(struct file *file, void *arg)
++{
++	list_t list;
++	list_item_t *li;
++	int i, list_size = 8, rc = 0;
++
++	splat_vprint(file, SPLAT_LIST_TEST3_NAME, "Creating list\n%s", "");
++	list_create(&list, sizeof(list_item_t), offsetof(list_item_t, li_node));
++
++	/* Insert all items at the list tail to form a queue */
++	splat_vprint(file, SPLAT_LIST_TEST3_NAME,
++		     "Adding %d items to list tail\n", list_size);
++	for (i = 0; i < list_size; i++) {
++		li = kmem_alloc(sizeof(list_item_t), KM_SLEEP);
++		if (li == NULL) {
++			rc = -ENOMEM;
++			goto out;
++		}
++
++		list_link_init(&li->li_node);
++		li->li_data = i;
++		list_insert_tail(&list, li);
++	}
++
++	splat_vprint(file, SPLAT_LIST_TEST3_NAME,
++		     "Validating %d item list is a queue\n", list_size);
++	rc = splat_list_validate(&list, list_size, LIST_ORDER_QUEUE, 1);
++	if (rc)
++		splat_vprint(file, SPLAT_LIST_TEST3_NAME,
++			     "List validation failed, %d\n", rc);
++out:
++	/* Remove all items */
++	splat_vprint(file, SPLAT_LIST_TEST3_NAME,
++		     "Removing %d items from list tail\n", list_size);
++	while ((li = list_remove_tail(&list)))
++		kmem_free(li, sizeof(list_item_t));
++
++	splat_vprint(file, SPLAT_LIST_TEST3_NAME, "Destroying list\n%s", "");
++	list_destroy(&list);
++
++        return rc;
++}
++
++static int
++splat_list_test4(struct file *file, void *arg)
++{
++	list_t list;
++	list_item_t *li_new, *li_last = NULL;
++	int i, list_size = 8, rc = 0;
++
++	splat_vprint(file, SPLAT_LIST_TEST4_NAME, "Creating list\n%s", "");
++	list_create(&list, sizeof(list_item_t), offsetof(list_item_t, li_node));
++
++	/* Insert all items after the last item to form a queue */
++	splat_vprint(file, SPLAT_LIST_TEST4_NAME,
++		     "Adding %d items each after the last item\n", list_size);
++	for (i = 0; i < list_size; i++) {
++		li_new = kmem_alloc(sizeof(list_item_t), KM_SLEEP);
++		if (li_new == NULL) {
++			rc = -ENOMEM;
++			goto out;
++		}
++
++		list_link_init(&li_new->li_node);
++		li_new->li_data = i;
++		list_insert_after(&list, li_last, li_new);
++		li_last = li_new;
++	}
++
++	splat_vprint(file, SPLAT_LIST_TEST4_NAME,
++		     "Validating %d item list is a queue\n", list_size);
++	rc = splat_list_validate(&list, list_size, LIST_ORDER_QUEUE, 1);
++	if (rc)
++		splat_vprint(file, SPLAT_LIST_TEST4_NAME,
++			     "List validation failed, %d\n", rc);
++out:
++	/* Remove all items */
++	splat_vprint(file, SPLAT_LIST_TEST4_NAME,
++		     "Removing %d items from list tail\n", list_size);
++	while ((li_new = list_remove_head(&list)))
++		kmem_free(li_new, sizeof(list_item_t));
++
++	splat_vprint(file, SPLAT_LIST_TEST4_NAME, "Destroying list\n%s", "");
++	list_destroy(&list);
++
++        return rc;
++}
++
++static int
++splat_list_test5(struct file *file, void *arg)
++{
++	list_t list;
++	list_item_t *li_new, *li_last = NULL;
++	int i, list_size = 8, rc = 0;
++
++	splat_vprint(file, SPLAT_LIST_TEST5_NAME, "Creating list\n%s", "");
++	list_create(&list, sizeof(list_item_t), offsetof(list_item_t, li_node));
++
++	/* Insert all items before the last item to form a stack */
++	splat_vprint(file, SPLAT_LIST_TEST5_NAME,
++		     "Adding %d items each before the last item\n", list_size);
++	for (i = 0; i < list_size; i++) {
++		li_new = kmem_alloc(sizeof(list_item_t), KM_SLEEP);
++		if (li_new == NULL) {
++			rc = -ENOMEM;
++			goto out;
++		}
++
++		list_link_init(&li_new->li_node);
++		li_new->li_data = i;
++		list_insert_before(&list, li_last, li_new);
++		li_last = li_new;
++	}
++
++	splat_vprint(file, SPLAT_LIST_TEST5_NAME,
++		     "Validating %d item list is a queue\n", list_size);
++	rc = splat_list_validate(&list, list_size, LIST_ORDER_STACK, 1);
++	if (rc)
++		splat_vprint(file, SPLAT_LIST_TEST5_NAME,
++			     "List validation failed, %d\n", rc);
++out:
++	/* Remove all items */
++	splat_vprint(file, SPLAT_LIST_TEST5_NAME,
++		     "Removing %d items from list tail\n", list_size);
++	while ((li_new = list_remove_tail(&list)))
++		kmem_free(li_new, sizeof(list_item_t));
++
++	splat_vprint(file, SPLAT_LIST_TEST5_NAME, "Destroying list\n%s", "");
++	list_destroy(&list);
++
++        return rc;
++}
++
++static int
++splat_list_test6(struct file *file, void *arg)
++{
++	list_t list;
++	list_item_t *li, *li_prev;
++	int i, list_size = 8, rc = 0;
++
++	splat_vprint(file, SPLAT_LIST_TEST6_NAME, "Creating list\n%s", "");
++	list_create(&list, sizeof(list_item_t), offsetof(list_item_t, li_node));
++
++	/* Insert all items at the list tail to form a queue */
++	splat_vprint(file, SPLAT_LIST_TEST6_NAME,
++		     "Adding %d items to list tail\n", list_size);
++	for (i = 0; i < list_size; i++) {
++		li = kmem_alloc(sizeof(list_item_t), KM_SLEEP);
++		if (li == NULL) {
++			rc = -ENOMEM;
++			goto out;
++		}
++
++		list_link_init(&li->li_node);
++		li->li_data = i;
++		list_insert_tail(&list, li);
++	}
++
++	/* Remove all odd items from the queue */
++	splat_vprint(file, SPLAT_LIST_TEST6_NAME,
++		     "Removing %d odd items from the list\n", list_size >> 1);
++	for (li = list_head(&list); li != NULL; li = list_next(&list, li)) {
++		if (li->li_data % 2 == 1) {
++			li_prev = list_prev(&list, li);
++			list_remove(&list, li);
++			kmem_free(li, sizeof(list_item_t));
++			li = li_prev;
++		}
++	}
++
++	splat_vprint(file, SPLAT_LIST_TEST6_NAME, "Validating %d item "
++		     "list is a queue of only even elements\n", list_size / 2);
++	rc = splat_list_validate(&list, list_size / 2, LIST_ORDER_QUEUE, 2);
++	if (rc)
++		splat_vprint(file, SPLAT_LIST_TEST6_NAME,
++			     "List validation failed, %d\n", rc);
++out:
++	/* Remove all items */
++	splat_vprint(file, SPLAT_LIST_TEST6_NAME,
++		     "Removing %d items from list tail\n", list_size / 2);
++	while ((li = list_remove_tail(&list)))
++		kmem_free(li, sizeof(list_item_t));
++
++	splat_vprint(file, SPLAT_LIST_TEST6_NAME, "Destroying list\n%s", "");
++	list_destroy(&list);
++
++        return rc;
++}
++
++static int
++splat_list_test7(struct file *file, void *arg)
++{
++	list_t list;
++	list_item_t *li;
++	int rc = 0;
++
++	splat_vprint(file, SPLAT_LIST_TEST7_NAME, "Creating list\n%s", "");
++	list_create(&list, sizeof(list_item_t), offsetof(list_item_t, li_node));
++
++	li = kmem_alloc(sizeof(list_item_t), KM_SLEEP);
++	if (li == NULL) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	/* Validate newly initialized node is inactive */
++	splat_vprint(file, SPLAT_LIST_TEST7_NAME, "Init list node\n%s", "");
++	list_link_init(&li->li_node);
++	if (list_link_active(&li->li_node)) {
++		splat_vprint(file, SPLAT_LIST_TEST7_NAME, "Newly initialized "
++			    "list node should inactive %p/%p\n",
++			    li->li_node.prev, li->li_node.next);
++		rc = -EINVAL;
++		goto out_li;
++	}
++
++	/* Validate node is active when linked in to a list */
++	splat_vprint(file, SPLAT_LIST_TEST7_NAME, "Insert list node\n%s", "");
++	list_insert_head(&list, li);
++	if (!list_link_active(&li->li_node)) {
++		splat_vprint(file, SPLAT_LIST_TEST7_NAME, "List node "
++			    "inserted in list should be active %p/%p\n",
++			    li->li_node.prev, li->li_node.next);
++		rc = -EINVAL;
++		goto out;
++	}
++
++	/* Validate node is inactive when removed from list */
++	splat_vprint(file, SPLAT_LIST_TEST7_NAME, "Remove list node\n%s", "");
++	list_remove(&list, li);
++	if (list_link_active(&li->li_node)) {
++		splat_vprint(file, SPLAT_LIST_TEST7_NAME, "List node "
++			    "removed from list should be inactive %p/%p\n",
++			    li->li_node.prev, li->li_node.next);
++		rc = -EINVAL;
++	}
++out_li:
++	kmem_free(li, sizeof(list_item_t));
++out:
++	/* Remove all items */
++	while ((li = list_remove_head(&list)))
++		kmem_free(li, sizeof(list_item_t));
++
++	splat_vprint(file, SPLAT_LIST_TEST7_NAME, "Destroying list\n%s", "");
++	list_destroy(&list);
++
++        return rc;
++}
++
++splat_subsystem_t *
++splat_list_init(void)
++{
++        splat_subsystem_t *sub;
++
++        sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++        if (sub == NULL)
++                return NULL;
++
++        memset(sub, 0, sizeof(*sub));
++        strncpy(sub->desc.name, SPLAT_LIST_NAME, SPLAT_NAME_SIZE);
++	strncpy(sub->desc.desc, SPLAT_LIST_DESC, SPLAT_DESC_SIZE);
++        INIT_LIST_HEAD(&sub->subsystem_list);
++	INIT_LIST_HEAD(&sub->test_list);
++        spin_lock_init(&sub->test_lock);
++        sub->desc.id = SPLAT_SUBSYSTEM_LIST;
++
++        SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST1_NAME, SPLAT_LIST_TEST1_DESC,
++	                SPLAT_LIST_TEST1_ID, splat_list_test1);
++        SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST2_NAME, SPLAT_LIST_TEST2_DESC,
++	                SPLAT_LIST_TEST2_ID, splat_list_test2);
++        SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST3_NAME, SPLAT_LIST_TEST3_DESC,
++	                SPLAT_LIST_TEST3_ID, splat_list_test3);
++        SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST4_NAME, SPLAT_LIST_TEST4_DESC,
++	                SPLAT_LIST_TEST4_ID, splat_list_test4);
++        SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST5_NAME, SPLAT_LIST_TEST5_DESC,
++	                SPLAT_LIST_TEST5_ID, splat_list_test5);
++        SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST6_NAME, SPLAT_LIST_TEST6_DESC,
++	                SPLAT_LIST_TEST6_ID, splat_list_test6);
++        SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST7_NAME, SPLAT_LIST_TEST7_DESC,
++	                SPLAT_LIST_TEST7_ID, splat_list_test7);
++
++        return sub;
++}
++
++void
++splat_list_fini(splat_subsystem_t *sub)
++{
++        ASSERT(sub);
++
++        SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST7_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST6_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST5_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST4_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST3_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST2_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST1_ID);
++
++        kfree(sub);
++}
++
++int
++splat_list_id(void)
++{
++        return SPLAT_SUBSYSTEM_LIST;
++}
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-mutex.c linux-3.2.33-go/spl/splat/splat-mutex.c
+--- linux-3.2.33-go.orig/spl/splat/splat-mutex.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-mutex.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,439 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Mutex Tests.
++\*****************************************************************************/
++
++#include <sys/mutex.h>
++#include <sys/taskq.h>
++#include "splat-internal.h"
++
++#define SPLAT_MUTEX_NAME                "mutex"
++#define SPLAT_MUTEX_DESC                "Kernel Mutex Tests"
++
++#define SPLAT_MUTEX_TEST1_ID            0x0401
++#define SPLAT_MUTEX_TEST1_NAME          "tryenter"
++#define SPLAT_MUTEX_TEST1_DESC          "Validate mutex_tryenter() correctness"
++
++#define SPLAT_MUTEX_TEST2_ID            0x0402
++#define SPLAT_MUTEX_TEST2_NAME          "race"
++#define SPLAT_MUTEX_TEST2_DESC          "Many threads entering/exiting the mutex"
++
++#define SPLAT_MUTEX_TEST3_ID            0x0403
++#define SPLAT_MUTEX_TEST3_NAME          "owned"
++#define SPLAT_MUTEX_TEST3_DESC          "Validate mutex_owned() correctness"
++
++#define SPLAT_MUTEX_TEST4_ID            0x0404
++#define SPLAT_MUTEX_TEST4_NAME          "owner"
++#define SPLAT_MUTEX_TEST4_DESC          "Validate mutex_owner() correctness"
++
++#define SPLAT_MUTEX_TEST_MAGIC          0x115599DDUL
++#define SPLAT_MUTEX_TEST_NAME           "mutex_test"
++#define SPLAT_MUTEX_TEST_TASKQ          "mutex_taskq"
++#define SPLAT_MUTEX_TEST_COUNT          128
++
++typedef struct mutex_priv {
++        unsigned long mp_magic;
++        struct file *mp_file;
++        kmutex_t mp_mtx;
++        int mp_rc;
++        int mp_rc2;
++} mutex_priv_t;
++
++static void
++splat_mutex_test1_func(void *arg)
++{
++        mutex_priv_t *mp = (mutex_priv_t *)arg;
++        ASSERT(mp->mp_magic == SPLAT_MUTEX_TEST_MAGIC);
++
++        if (mutex_tryenter(&mp->mp_mtx)) {
++                mp->mp_rc = 0;
++                mutex_exit(&mp->mp_mtx);
++        } else {
++                mp->mp_rc = -EBUSY;
++        }
++}
++
++static int
++splat_mutex_test1(struct file *file, void *arg)
++{
++        mutex_priv_t *mp;
++        taskq_t *tq;
++        int id, rc = 0;
++
++        mp = (mutex_priv_t *)kmalloc(sizeof(*mp), GFP_KERNEL);
++        if (mp == NULL)
++                return -ENOMEM;
++
++        tq = taskq_create(SPLAT_MUTEX_TEST_TASKQ, 1, maxclsyspri,
++                          50, INT_MAX, TASKQ_PREPOPULATE);
++        if (tq == NULL) {
++                rc = -ENOMEM;
++                goto out2;
++        }
++
++        mp->mp_magic = SPLAT_MUTEX_TEST_MAGIC;
++        mp->mp_file = file;
++        mutex_init(&mp->mp_mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL);
++        mutex_enter(&mp->mp_mtx);
++
++        /*
++         * Schedule a task function which will try and acquire the mutex via
++         * mutex_tryenter() while it's held.  This should fail and the task
++         * function will indicate this status in the passed private data.
++         */
++        mp->mp_rc = -EINVAL;
++        id = taskq_dispatch(tq, splat_mutex_test1_func, mp, TQ_SLEEP);
++        if (id == 0) {
++                mutex_exit(&mp->mp_mtx);
++                splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s",
++                             "taskq_dispatch() failed\n");
++                rc = -EINVAL;
++                goto out;
++        }
++
++        taskq_wait_id(tq, id);
++        mutex_exit(&mp->mp_mtx);
++
++        /* Task function successfully acquired mutex, very bad! */
++        if (mp->mp_rc != -EBUSY) {
++                splat_vprint(file, SPLAT_MUTEX_TEST1_NAME,
++                             "mutex_trylock() incorrectly succeeded when "
++                             "the mutex was held, %d/%d\n", id, mp->mp_rc);
++                rc = -EINVAL;
++                goto out;
++        } else {
++                splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s",
++                             "mutex_trylock() correctly failed when "
++                             "the mutex was held\n");
++        }
++
++        /*
++         * Schedule a task function which will try and acquire the mutex via
++         * mutex_tryenter() while it is not held.  This should succeed and
++         * can be verified by checking the private data.
++         */
++        mp->mp_rc = -EINVAL;
++        id = taskq_dispatch(tq, splat_mutex_test1_func, mp, TQ_SLEEP);
++        if (id == 0) {
++                splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s",
++                             "taskq_dispatch() failed\n");
++                rc = -EINVAL;
++                goto out;
++        }
++
++        taskq_wait_id(tq, id);
++
++        /* Task function failed to acquire mutex, very bad! */
++        if (mp->mp_rc != 0) {
++                splat_vprint(file, SPLAT_MUTEX_TEST1_NAME,
++                             "mutex_trylock() incorrectly failed when "
++                             "the mutex was not held, %d/%d\n", id, mp->mp_rc);
++                rc = -EINVAL;
++        } else {
++                splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s",
++                             "mutex_trylock() correctly succeeded "
++                             "when the mutex was not held\n");
++        }
++out:
++        taskq_destroy(tq);
++        mutex_destroy(&(mp->mp_mtx));
++out2:
++        kfree(mp);
++        return rc;
++}
++
++static void
++splat_mutex_test2_func(void *arg)
++{
++        mutex_priv_t *mp = (mutex_priv_t *)arg;
++        int rc;
++        ASSERT(mp->mp_magic == SPLAT_MUTEX_TEST_MAGIC);
++
++        /* Read the value before sleeping and write it after we wake up to
++         * maximize the chance of a race if mutexs are not working properly */
++        mutex_enter(&mp->mp_mtx);
++        rc = mp->mp_rc;
++        set_current_state(TASK_INTERRUPTIBLE);
++        schedule_timeout(HZ / 100);  /* 1/100 of a second */
++        VERIFY(mp->mp_rc == rc);
++        mp->mp_rc = rc + 1;
++        mutex_exit(&mp->mp_mtx);
++}
++
++static int
++splat_mutex_test2(struct file *file, void *arg)
++{
++        mutex_priv_t *mp;
++        taskq_t *tq;
++        int i, rc = 0;
++
++        mp = (mutex_priv_t *)kmalloc(sizeof(*mp), GFP_KERNEL);
++        if (mp == NULL)
++                return -ENOMEM;
++
++        /* Create several threads allowing tasks to race with each other */
++        tq = taskq_create(SPLAT_MUTEX_TEST_TASKQ, num_online_cpus(),
++                          maxclsyspri, 50, INT_MAX, TASKQ_PREPOPULATE);
++        if (tq == NULL) {
++                rc = -ENOMEM;
++                goto out;
++        }
++
++        mp->mp_magic = SPLAT_MUTEX_TEST_MAGIC;
++        mp->mp_file = file;
++        mutex_init(&(mp->mp_mtx), SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL);
++        mp->mp_rc = 0;
++
++        /*
++         * Schedule N work items to the work queue each of which enters the
++         * mutex, sleeps briefly, then exits the mutex.  On a multiprocessor
++         * box these work items will be handled by all available CPUs.  The
++         * task function checks to ensure the tracked shared variable is
++         * always only incremented by one.  Additionally, the mutex itself
++         * is instrumented such that if any two processors are in the
++         * critical region at the same time the system will panic.  If the
++         * mutex is implemented right this will never happy, that's a pass.
++         */
++        for (i = 0; i < SPLAT_MUTEX_TEST_COUNT; i++) {
++                if (!taskq_dispatch(tq, splat_mutex_test2_func, mp, TQ_SLEEP)) {
++                        splat_vprint(file, SPLAT_MUTEX_TEST2_NAME,
++                                     "Failed to queue task %d\n", i);
++                        rc = -EINVAL;
++                }
++        }
++
++        taskq_wait(tq);
++
++        if (mp->mp_rc == SPLAT_MUTEX_TEST_COUNT) {
++                splat_vprint(file, SPLAT_MUTEX_TEST2_NAME, "%d racing threads "
++                           "correctly entered/exited the mutex %d times\n",
++                           num_online_cpus(), mp->mp_rc);
++        } else {
++                splat_vprint(file, SPLAT_MUTEX_TEST2_NAME, "%d racing threads "
++                           "only processed %d/%d mutex work items\n",
++                           num_online_cpus(),mp->mp_rc,SPLAT_MUTEX_TEST_COUNT);
++                rc = -EINVAL;
++        }
++
++        taskq_destroy(tq);
++        mutex_destroy(&(mp->mp_mtx));
++out:
++        kfree(mp);
++        return rc;
++}
++
++static void
++splat_mutex_owned(void *priv)
++{
++        mutex_priv_t *mp = (mutex_priv_t *)priv;
++
++        ASSERT(mp->mp_magic == SPLAT_MUTEX_TEST_MAGIC);
++        mp->mp_rc = mutex_owned(&mp->mp_mtx);
++        mp->mp_rc2 = MUTEX_HELD(&mp->mp_mtx);
++}
++
++static int
++splat_mutex_test3(struct file *file, void *arg)
++{
++        mutex_priv_t mp;
++        taskq_t *tq;
++        int rc = 0;
++
++        mp.mp_magic = SPLAT_MUTEX_TEST_MAGIC;
++        mp.mp_file = file;
++        mutex_init(&mp.mp_mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL);
++
++        if ((tq = taskq_create(SPLAT_MUTEX_TEST_NAME, 1, maxclsyspri,
++                               50, INT_MAX, TASKQ_PREPOPULATE)) == NULL) {
++                splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Taskq '%s' "
++                             "create failed\n", SPLAT_MUTEX_TEST3_NAME);
++                return -EINVAL;
++        }
++
++        mutex_enter(&mp.mp_mtx);
++
++        /* Mutex should be owned by current */
++        if (!mutex_owned(&mp.mp_mtx)) {
++                splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Unowned mutex "
++                             "should be owned by pid %d\n", current->pid);
++                rc = -EINVAL;
++                goto out_exit;
++        }
++
++        if (taskq_dispatch(tq, splat_mutex_owned, &mp, TQ_SLEEP) == 0) {
++                splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Failed to "
++                             "dispatch function '%s' to taskq\n",
++                             sym2str(splat_mutex_owned));
++                rc = -EINVAL;
++                goto out_exit;
++        }
++        taskq_wait(tq);
++
++        /* Mutex should not be owned which checked from a different thread */
++        if (mp.mp_rc || mp.mp_rc2) {
++                splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex owned by "
++                             "pid %d not by taskq\n", current->pid);
++                rc = -EINVAL;
++                goto out_exit;
++        }
++
++        mutex_exit(&mp.mp_mtx);
++
++        /* Mutex should not be owned by current */
++        if (mutex_owned(&mp.mp_mtx)) {
++                splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex owned by "
++                             "pid %d it should be unowned\b", current->pid);
++                rc = -EINVAL;
++                goto out;
++        }
++
++        if (taskq_dispatch(tq, splat_mutex_owned, &mp, TQ_SLEEP) == 0) {
++                splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Failed to "
++                             "dispatch function '%s' to taskq\n",
++                             sym2str(splat_mutex_owned));
++                rc = -EINVAL;
++                goto out;
++        }
++        taskq_wait(tq);
++
++        /* Mutex should be owned by no one */
++        if (mp.mp_rc || mp.mp_rc2) {
++                splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex owned by "
++                             "no one, %d/%d disagrees\n", mp.mp_rc, mp.mp_rc2);
++                rc = -EINVAL;
++                goto out;
++        }
++
++        splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "%s",
++                   "Correct mutex_owned() behavior\n");
++        goto out;
++out_exit:
++        mutex_exit(&mp.mp_mtx);
++out:
++        mutex_destroy(&mp.mp_mtx);
++        taskq_destroy(tq);
++
++        return rc;
++}
++
++static int
++splat_mutex_test4(struct file *file, void *arg)
++{
++        kmutex_t mtx;
++        kthread_t *owner;
++        int rc = 0;
++
++        mutex_init(&mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL);
++
++        /*
++         * Verify mutex owner is cleared after being dropped.  Depending
++         * on how you build your kernel this behavior changes, ensure the
++         * SPL mutex implementation is properly detecting this.
++         */
++        mutex_enter(&mtx);
++        msleep(100);
++        mutex_exit(&mtx);
++        if (MUTEX_HELD(&mtx)) {
++                splat_vprint(file, SPLAT_MUTEX_TEST4_NAME, "Mutex should "
++                           "not be held, bit is by %p\n", mutex_owner(&mtx));
++                rc = -EINVAL;
++                goto out;
++        }
++
++        mutex_enter(&mtx);
++
++        /* Mutex should be owned by current */
++        owner = mutex_owner(&mtx);
++        if (current != owner) {
++                splat_vprint(file, SPLAT_MUTEX_TEST4_NAME, "Mutex should "
++                           "be owned by pid %d but is owned by pid %d\n",
++                           current->pid, owner ? owner->pid : -1);
++                rc = -EINVAL;
++                goto out;
++        }
++
++        mutex_exit(&mtx);
++
++        /* Mutex should not be owned by any task */
++        owner = mutex_owner(&mtx);
++        if (owner) {
++                splat_vprint(file, SPLAT_MUTEX_TEST4_NAME, "Mutex should not "
++                           "be owned but is owned by pid %d\n", owner->pid);
++                rc = -EINVAL;
++                goto out;
++        }
++
++        splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "%s",
++                   "Correct mutex_owner() behavior\n");
++out:
++        mutex_destroy(&mtx);
++
++        return rc;
++}
++
++splat_subsystem_t *
++splat_mutex_init(void)
++{
++        splat_subsystem_t *sub;
++
++        sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++        if (sub == NULL)
++                return NULL;
++
++        memset(sub, 0, sizeof(*sub));
++        strncpy(sub->desc.name, SPLAT_MUTEX_NAME, SPLAT_NAME_SIZE);
++        strncpy(sub->desc.desc, SPLAT_MUTEX_DESC, SPLAT_DESC_SIZE);
++        INIT_LIST_HEAD(&sub->subsystem_list);
++        INIT_LIST_HEAD(&sub->test_list);
++        spin_lock_init(&sub->test_lock);
++        sub->desc.id = SPLAT_SUBSYSTEM_MUTEX;
++
++        SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST1_NAME, SPLAT_MUTEX_TEST1_DESC,
++                      SPLAT_MUTEX_TEST1_ID, splat_mutex_test1);
++        SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST2_NAME, SPLAT_MUTEX_TEST2_DESC,
++                      SPLAT_MUTEX_TEST2_ID, splat_mutex_test2);
++        SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST3_NAME, SPLAT_MUTEX_TEST3_DESC,
++                      SPLAT_MUTEX_TEST3_ID, splat_mutex_test3);
++        SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST4_NAME, SPLAT_MUTEX_TEST4_DESC,
++                      SPLAT_MUTEX_TEST4_ID, splat_mutex_test4);
++
++        return sub;
++}
++
++void
++splat_mutex_fini(splat_subsystem_t *sub)
++{
++        ASSERT(sub);
++        SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST4_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST3_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST2_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST1_ID);
++
++        kfree(sub);
++}
++
++int
++splat_mutex_id(void) {
++        return SPLAT_SUBSYSTEM_MUTEX;
++}
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-random.c linux-3.2.33-go/spl/splat/splat-random.c
+--- linux-3.2.33-go.orig/spl/splat/splat-random.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-random.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,130 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Random Number Generator Tests.
++\*****************************************************************************/
++
++#include <sys/random.h>
++#include <sys/kmem.h>
++#include "splat-internal.h"
++
++#define SPLAT_KRNG_NAME			"krng"
++#define SPLAT_KRNG_DESC			"Kernel Random Number Generator Tests"
++
++#define SPLAT_KRNG_TEST1_ID		0x0301
++#define SPLAT_KRNG_TEST1_NAME		"freq"
++#define SPLAT_KRNG_TEST1_DESC		"Frequency Test"
++
++#define KRNG_NUM_BITS			1048576
++#define KRNG_NUM_BYTES			(KRNG_NUM_BITS >> 3)
++#define KRNG_NUM_BITS_DIV2		(KRNG_NUM_BITS >> 1)
++#define KRNG_ERROR_RANGE		2097
++
++/* Random Number Generator Tests
++   There can be meny more tests on quality of the
++   random number generator.  For now we are only
++   testing the frequency of particular bits.
++   We could also test consecutive sequences,
++   randomness within a particular block, etc.
++   but is probably not necessary for our purposes */
++
++static int
++splat_krng_test1(struct file *file, void *arg)
++{
++	uint8_t *buf;
++	int i, j, diff, num = 0, rc = 0;
++
++	buf = kmalloc(sizeof(*buf) * KRNG_NUM_BYTES, GFP_KERNEL);
++	if (buf == NULL) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	memset(buf, 0, sizeof(*buf) * KRNG_NUM_BYTES);
++
++	/* Always succeeds */
++	random_get_pseudo_bytes(buf, sizeof(uint8_t) * KRNG_NUM_BYTES);
++
++	for (i = 0; i < KRNG_NUM_BYTES; i++) {
++		uint8_t tmp = buf[i];
++		for (j = 0; j < 8; j++) {
++			uint8_t tmp2 = ((tmp >> j) & 0x01);
++			if (tmp2 == 1) {
++				num++;
++			}
++		}
++	}
++
++	kfree(buf);
++
++	diff = KRNG_NUM_BITS_DIV2 - num;
++	if (diff < 0)
++		diff *= -1;
++
++	splat_print(file, "Test 1 Number of ones: %d\n", num);
++	splat_print(file, "Test 1 Difference from expected: %d Allowed: %d\n",
++                  diff, KRNG_ERROR_RANGE);
++
++	if (diff > KRNG_ERROR_RANGE)
++		rc = -ERANGE;
++out:
++	return rc;
++}
++
++splat_subsystem_t *
++splat_krng_init(void)
++{
++        splat_subsystem_t *sub;
++
++        sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++        if (sub == NULL)
++                return NULL;
++
++        memset(sub, 0, sizeof(*sub));
++        strncpy(sub->desc.name, SPLAT_KRNG_NAME, SPLAT_NAME_SIZE);
++	strncpy(sub->desc.desc, SPLAT_KRNG_DESC, SPLAT_DESC_SIZE);
++        INIT_LIST_HEAD(&sub->subsystem_list);
++	INIT_LIST_HEAD(&sub->test_list);
++        spin_lock_init(&sub->test_lock);
++        sub->desc.id = SPLAT_SUBSYSTEM_KRNG;
++
++        SPLAT_TEST_INIT(sub, SPLAT_KRNG_TEST1_NAME, SPLAT_KRNG_TEST1_DESC,
++	              SPLAT_KRNG_TEST1_ID, splat_krng_test1);
++
++        return sub;
++}
++
++void
++splat_krng_fini(splat_subsystem_t *sub)
++{
++        ASSERT(sub);
++
++        SPLAT_TEST_FINI(sub, SPLAT_KRNG_TEST1_ID);
++
++        kfree(sub);
++}
++
++int
++splat_krng_id(void) {
++        return SPLAT_SUBSYSTEM_KRNG;
++}
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-rwlock.c linux-3.2.33-go/spl/splat/splat-rwlock.c
+--- linux-3.2.33-go.orig/spl/splat/splat-rwlock.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-rwlock.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,678 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Read/Writer Lock Tests.
++\*****************************************************************************/
++
++#include <sys/rwlock.h>
++#include <sys/taskq.h>
++#include <sys/random.h>
++#include "splat-internal.h"
++
++#define SPLAT_RWLOCK_NAME		"rwlock"
++#define SPLAT_RWLOCK_DESC		"Kernel RW Lock Tests"
++
++#define SPLAT_RWLOCK_TEST1_ID		0x0701
++#define SPLAT_RWLOCK_TEST1_NAME		"N-rd/1-wr"
++#define SPLAT_RWLOCK_TEST1_DESC		"Multiple readers one writer"
++
++#define SPLAT_RWLOCK_TEST2_ID		0x0702
++#define SPLAT_RWLOCK_TEST2_NAME		"0-rd/N-wr"
++#define SPLAT_RWLOCK_TEST2_DESC		"Multiple writers"
++
++#define SPLAT_RWLOCK_TEST3_ID		0x0703
++#define SPLAT_RWLOCK_TEST3_NAME		"held"
++#define SPLAT_RWLOCK_TEST3_DESC		"RW_{LOCK|READ|WRITE}_HELD"
++
++#define SPLAT_RWLOCK_TEST4_ID		0x0704
++#define SPLAT_RWLOCK_TEST4_NAME		"tryenter"
++#define SPLAT_RWLOCK_TEST4_DESC		"Tryenter"
++
++#define SPLAT_RWLOCK_TEST5_ID		0x0705
++#define SPLAT_RWLOCK_TEST5_NAME		"rw_downgrade"
++#define SPLAT_RWLOCK_TEST5_DESC		"Write downgrade"
++
++#define SPLAT_RWLOCK_TEST6_ID		0x0706
++#define SPLAT_RWLOCK_TEST6_NAME		"rw_tryupgrade"
++#define SPLAT_RWLOCK_TEST6_DESC		"Read upgrade"
++
++#define SPLAT_RWLOCK_TEST_MAGIC		0x115599DDUL
++#define SPLAT_RWLOCK_TEST_NAME		"rwlock_test"
++#define SPLAT_RWLOCK_TEST_TASKQ		"rwlock_taskq"
++#define SPLAT_RWLOCK_TEST_COUNT		8
++
++#define SPLAT_RWLOCK_RELEASE_INIT	0
++#define SPLAT_RWLOCK_RELEASE_WR		1
++#define SPLAT_RWLOCK_RELEASE_RD		2
++
++typedef struct rw_priv {
++	unsigned long rw_magic;
++	struct file *rw_file;
++	krwlock_t rw_rwlock;
++	spinlock_t rw_lock;
++	wait_queue_head_t rw_waitq;
++	int rw_completed;
++	int rw_holders;
++	int rw_waiters;
++	int rw_release;
++	int rw_rc;
++	krw_t rw_type;
++} rw_priv_t;
++
++typedef struct rw_thr {
++	const char *rwt_name;
++	rw_priv_t *rwt_rwp;
++	int rwt_id;
++} rw_thr_t;
++
++void splat_init_rw_priv(rw_priv_t *rwp, struct file *file)
++{
++	rwp->rw_magic = SPLAT_RWLOCK_TEST_MAGIC;
++	rwp->rw_file = file;
++	rw_init(&rwp->rw_rwlock, SPLAT_RWLOCK_TEST_NAME, RW_DEFAULT, NULL);
++	spin_lock_init(&rwp->rw_lock);
++	init_waitqueue_head(&rwp->rw_waitq);
++	rwp->rw_completed = 0;
++	rwp->rw_holders = 0;
++	rwp->rw_waiters = 0;
++	rwp->rw_release = SPLAT_RWLOCK_RELEASE_INIT;
++	rwp->rw_rc = 0;
++	rwp->rw_type = 0;
++}
++
++static int
++splat_rwlock_wr_thr(void *arg)
++{
++	rw_thr_t *rwt = (rw_thr_t *)arg;
++	rw_priv_t *rwp = rwt->rwt_rwp;
++	uint8_t rnd;
++	char name[16];
++
++	ASSERT(rwp->rw_magic == SPLAT_RWLOCK_TEST_MAGIC);
++	snprintf(name, sizeof(name), "rwlock_wr_thr%d", rwt->rwt_id);
++	daemonize(name);
++	get_random_bytes((void *)&rnd, 1);
++	msleep((unsigned int)rnd);
++
++	splat_vprint(rwp->rw_file, rwt->rwt_name,
++		     "%s trying to acquire rwlock (%d holding/%d waiting)\n",
++		     name, rwp->rw_holders, rwp->rw_waiters);
++	spin_lock(&rwp->rw_lock);
++	rwp->rw_waiters++;
++	spin_unlock(&rwp->rw_lock);
++	rw_enter(&rwp->rw_rwlock, RW_WRITER);
++
++	spin_lock(&rwp->rw_lock);
++	rwp->rw_waiters--;
++	rwp->rw_holders++;
++	spin_unlock(&rwp->rw_lock);
++	splat_vprint(rwp->rw_file, rwt->rwt_name,
++		     "%s acquired rwlock (%d holding/%d waiting)\n",
++		     name, rwp->rw_holders, rwp->rw_waiters);
++
++	/* Wait for control thread to signal we can release the write lock */
++	wait_event_interruptible(rwp->rw_waitq, splat_locked_test(&rwp->rw_lock,
++				 rwp->rw_release == SPLAT_RWLOCK_RELEASE_WR));
++
++	spin_lock(&rwp->rw_lock);
++	rwp->rw_completed++;
++	rwp->rw_holders--;
++	spin_unlock(&rwp->rw_lock);
++	splat_vprint(rwp->rw_file, rwt->rwt_name,
++		   "%s dropped rwlock (%d holding/%d waiting)\n",
++		   name, rwp->rw_holders, rwp->rw_waiters);
++
++	rw_exit(&rwp->rw_rwlock);
++
++	return 0;
++}
++
++static int
++splat_rwlock_rd_thr(void *arg)
++{
++	rw_thr_t *rwt = (rw_thr_t *)arg;
++	rw_priv_t *rwp = rwt->rwt_rwp;
++	uint8_t rnd;
++	char name[16];
++
++	ASSERT(rwp->rw_magic == SPLAT_RWLOCK_TEST_MAGIC);
++	snprintf(name, sizeof(name), "rwlock_rd_thr%d", rwt->rwt_id);
++	daemonize(name);
++	get_random_bytes((void *)&rnd, 1);
++	msleep((unsigned int)rnd);
++
++	/* Don't try and take the semaphore until after someone has it */
++	wait_event_interruptible(rwp->rw_waitq, splat_locked_test(&rwp->rw_lock,
++				 rwp->rw_holders > 0));
++
++	splat_vprint(rwp->rw_file, rwt->rwt_name,
++		     "%s trying to acquire rwlock (%d holding/%d waiting)\n",
++		     name, rwp->rw_holders, rwp->rw_waiters);
++	spin_lock(&rwp->rw_lock);
++	rwp->rw_waiters++;
++	spin_unlock(&rwp->rw_lock);
++	rw_enter(&rwp->rw_rwlock, RW_READER);
++
++	spin_lock(&rwp->rw_lock);
++	rwp->rw_waiters--;
++	rwp->rw_holders++;
++	spin_unlock(&rwp->rw_lock);
++	splat_vprint(rwp->rw_file, rwt->rwt_name,
++		     "%s acquired rwlock (%d holding/%d waiting)\n",
++		     name, rwp->rw_holders, rwp->rw_waiters);
++
++	/* Wait for control thread to signal we can release the read lock */
++	wait_event_interruptible(rwp->rw_waitq, splat_locked_test(&rwp->rw_lock,
++				 rwp->rw_release == SPLAT_RWLOCK_RELEASE_RD));
++
++	spin_lock(&rwp->rw_lock);
++	rwp->rw_completed++;
++	rwp->rw_holders--;
++	spin_unlock(&rwp->rw_lock);
++	splat_vprint(rwp->rw_file, rwt->rwt_name,
++		     "%s dropped rwlock (%d holding/%d waiting)\n",
++		     name, rwp->rw_holders, rwp->rw_waiters);
++
++	rw_exit(&rwp->rw_rwlock);
++
++	return 0;
++}
++
++static int
++splat_rwlock_test1(struct file *file, void *arg)
++{
++	int i, count = 0, rc = 0;
++	long pids[SPLAT_RWLOCK_TEST_COUNT];
++	rw_thr_t rwt[SPLAT_RWLOCK_TEST_COUNT];
++	rw_priv_t *rwp;
++
++	rwp = (rw_priv_t *)kmalloc(sizeof(*rwp), GFP_KERNEL);
++	if (rwp == NULL)
++		return -ENOMEM;
++
++	splat_init_rw_priv(rwp, file);
++
++	/* Create some threads, the exact number isn't important just as
++	 * long as we know how many we managed to create and should expect. */
++
++
++
++	for (i = 0; i < SPLAT_RWLOCK_TEST_COUNT; i++) {
++		rwt[i].rwt_rwp = rwp;
++		rwt[i].rwt_id = i;
++		rwt[i].rwt_name = SPLAT_RWLOCK_TEST1_NAME;
++
++		/* The first thread will be the writer */
++		if (i == 0)
++			pids[i] = kernel_thread(splat_rwlock_wr_thr, &rwt[i], 0);
++		else
++			pids[i] = kernel_thread(splat_rwlock_rd_thr, &rwt[i], 0);
++
++		if (pids[i] >= 0)
++			count++;
++	}
++
++	/* Wait for the writer */
++	while (splat_locked_test(&rwp->rw_lock, rwp->rw_holders == 0)) {
++		wake_up_interruptible(&rwp->rw_waitq);
++		msleep(100);
++	}
++
++	/* Wait for 'count-1' readers */
++	while (splat_locked_test(&rwp->rw_lock, rwp->rw_waiters < count - 1)) {
++		wake_up_interruptible(&rwp->rw_waitq);
++		msleep(100);
++	}
++
++	/* Verify there is only one lock holder */
++	if (splat_locked_test(&rwp->rw_lock, rwp->rw_holders) != 1) {
++		splat_vprint(file, SPLAT_RWLOCK_TEST1_NAME, "Only 1 holder "
++			     "expected for rwlock (%d holding/%d waiting)\n",
++			     rwp->rw_holders, rwp->rw_waiters);
++		rc = -EINVAL;
++	}
++
++	/* Verify 'count-1' readers */
++	if (splat_locked_test(&rwp->rw_lock, rwp->rw_waiters != count - 1)) {
++		splat_vprint(file, SPLAT_RWLOCK_TEST1_NAME, "Only %d waiters "
++			     "expected for rwlock (%d holding/%d waiting)\n",
++			     count - 1, rwp->rw_holders, rwp->rw_waiters);
++		rc = -EINVAL;
++	}
++
++	/* Signal the writer to release, allows readers to acquire */
++	spin_lock(&rwp->rw_lock);
++	rwp->rw_release = SPLAT_RWLOCK_RELEASE_WR;
++	wake_up_interruptible(&rwp->rw_waitq);
++	spin_unlock(&rwp->rw_lock);
++
++	/* Wait for 'count-1' readers to hold the lock */
++	while (splat_locked_test(&rwp->rw_lock, rwp->rw_holders < count - 1)) {
++		wake_up_interruptible(&rwp->rw_waitq);
++		msleep(100);
++	}
++
++	/* Verify there are 'count-1' readers */
++	if (splat_locked_test(&rwp->rw_lock, rwp->rw_holders != count - 1)) {
++		splat_vprint(file, SPLAT_RWLOCK_TEST1_NAME, "Only %d holders "
++			     "expected for rwlock (%d holding/%d waiting)\n",
++			     count - 1, rwp->rw_holders, rwp->rw_waiters);
++		rc = -EINVAL;
++	}
++
++	/* Release 'count-1' readers */
++	spin_lock(&rwp->rw_lock);
++	rwp->rw_release = SPLAT_RWLOCK_RELEASE_RD;
++	wake_up_interruptible(&rwp->rw_waitq);
++	spin_unlock(&rwp->rw_lock);
++
++	/* Wait for the test to complete */
++	while (splat_locked_test(&rwp->rw_lock,
++				 rwp->rw_holders>0 || rwp->rw_waiters>0))
++		msleep(100);
++
++	rw_destroy(&(rwp->rw_rwlock));
++	kfree(rwp);
++
++	return rc;
++}
++
++static void
++splat_rwlock_test2_func(void *arg)
++{
++	rw_priv_t *rwp = (rw_priv_t *)arg;
++	int rc;
++	ASSERT(rwp->rw_magic == SPLAT_RWLOCK_TEST_MAGIC);
++
++	/* Read the value before sleeping and write it after we wake up to
++	 * maximize the chance of a race if rwlocks are not working properly */
++	rw_enter(&rwp->rw_rwlock, RW_WRITER);
++	rc = rwp->rw_rc;
++	set_current_state(TASK_INTERRUPTIBLE);
++	schedule_timeout(HZ / 100);  /* 1/100 of a second */
++	VERIFY(rwp->rw_rc == rc);
++	rwp->rw_rc = rc + 1;
++	rw_exit(&rwp->rw_rwlock);
++}
++
++static int
++splat_rwlock_test2(struct file *file, void *arg)
++{
++	rw_priv_t *rwp;
++	taskq_t *tq;
++	int i, rc = 0, tq_count = 256;
++
++	rwp = (rw_priv_t *)kmalloc(sizeof(*rwp), GFP_KERNEL);
++	if (rwp == NULL)
++		return -ENOMEM;
++
++	splat_init_rw_priv(rwp, file);
++
++	/* Create several threads allowing tasks to race with each other */
++	tq = taskq_create(SPLAT_RWLOCK_TEST_TASKQ, num_online_cpus(),
++			  maxclsyspri, 50, INT_MAX, TASKQ_PREPOPULATE);
++	if (tq == NULL) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	/*
++	 * Schedule N work items to the work queue each of which enters the
++	 * writer rwlock, sleeps briefly, then exits the writer rwlock.  On a
++	 * multiprocessor box these work items will be handled by all available
++	 * CPUs.  The task function checks to ensure the tracked shared variable
++	 * is always only incremented by one.  Additionally, the rwlock itself
++	 * is instrumented such that if any two processors are in the
++	 * critical region at the same time the system will panic.  If the
++	 * rwlock is implemented right this will never happy, that's a pass.
++	 */
++	for (i = 0; i < tq_count; i++) {
++		if (!taskq_dispatch(tq,splat_rwlock_test2_func,rwp,TQ_SLEEP)) {
++			splat_vprint(file, SPLAT_RWLOCK_TEST2_NAME,
++				     "Failed to queue task %d\n", i);
++			rc = -EINVAL;
++		}
++	}
++
++	taskq_wait(tq);
++
++	if (rwp->rw_rc == tq_count) {
++		splat_vprint(file, SPLAT_RWLOCK_TEST2_NAME, "%d racing threads "
++			     "correctly entered/exited the rwlock %d times\n",
++			     num_online_cpus(), rwp->rw_rc);
++	} else {
++		splat_vprint(file, SPLAT_RWLOCK_TEST2_NAME, "%d racing threads "
++			     "only processed %d/%d w rwlock work items\n",
++			     num_online_cpus(), rwp->rw_rc, tq_count);
++		rc = -EINVAL;
++	}
++
++	taskq_destroy(tq);
++	rw_destroy(&(rwp->rw_rwlock));
++out:
++	kfree(rwp);
++	return rc;
++}
++
++#define splat_rwlock_test3_helper(rwp,rex1,rex2,wex1,wex2,held_func,rc)	\
++do {									\
++	int result, _rc1_, _rc2_, _rc3_, _rc4_;				\
++									\
++	rc = 0;								\
++	rw_enter(&(rwp)->rw_rwlock, RW_READER);				\
++	_rc1_ = ((result = held_func(&(rwp)->rw_rwlock)) != rex1);	\
++	splat_vprint(file, SPLAT_RWLOCK_TEST3_NAME, "%s" #held_func	\
++		     " returned %d (expected %d) when RW_READER\n",	\
++		     _rc1_ ? "Fail " : "", result, rex1);		\
++	rw_exit(&(rwp)->rw_rwlock);					\
++	_rc2_ = ((result = held_func(&(rwp)->rw_rwlock)) != rex2);	\
++	splat_vprint(file, SPLAT_RWLOCK_TEST3_NAME, "%s" #held_func	\
++		     " returned %d (expected %d) when !RW_READER\n",	\
++		     _rc2_ ? "Fail " : "", result, rex2);		\
++									\
++	rw_enter(&(rwp)->rw_rwlock, RW_WRITER);				\
++	_rc3_ = ((result = held_func(&(rwp)->rw_rwlock)) != wex1);	\
++	splat_vprint(file, SPLAT_RWLOCK_TEST3_NAME, "%s" #held_func	\
++		     " returned %d (expected %d) when RW_WRITER\n",	\
++		     _rc3_ ? "Fail " : "", result, wex1);		\
++	rw_exit(&(rwp)->rw_rwlock);					\
++	_rc4_ = ((result = held_func(&(rwp)->rw_rwlock)) != wex2);	\
++	splat_vprint(file, SPLAT_RWLOCK_TEST3_NAME, "%s" #held_func	\
++		     " returned %d (expected %d) when !RW_WRITER\n",	\
++		     _rc4_ ? "Fail " : "", result, wex2);		\
++									\
++	rc = ((_rc1_ ||  _rc2_ || _rc3_ || _rc4_) ? -EINVAL : 0);	\
++} while(0);
++
++static int
++splat_rwlock_test3(struct file *file, void *arg)
++{
++	rw_priv_t *rwp;
++	int rc1, rc2, rc3;
++
++	rwp = (rw_priv_t *)kmalloc(sizeof(*rwp), GFP_KERNEL);
++	if (rwp == NULL)
++		return -ENOMEM;
++
++	splat_init_rw_priv(rwp, file);
++
++	splat_rwlock_test3_helper(rwp, 1, 0, 1, 0, RW_LOCK_HELD, rc1);
++	splat_rwlock_test3_helper(rwp, 1, 0, 0, 0, RW_READ_HELD, rc2);
++	splat_rwlock_test3_helper(rwp, 0, 0, 1, 0, RW_WRITE_HELD, rc3);
++
++	rw_destroy(&rwp->rw_rwlock);
++	kfree(rwp);
++
++	return ((rc1 || rc2 || rc3) ? -EINVAL : 0);
++}
++
++static void
++splat_rwlock_test4_func(void *arg)
++{
++	rw_priv_t *rwp = (rw_priv_t *)arg;
++	ASSERT(rwp->rw_magic == SPLAT_RWLOCK_TEST_MAGIC);
++
++	if (rw_tryenter(&rwp->rw_rwlock, rwp->rw_type)) {
++		rwp->rw_rc = 0;
++		rw_exit(&rwp->rw_rwlock);
++	} else {
++		rwp->rw_rc = -EBUSY;
++	}
++}
++
++static char *
++splat_rwlock_test4_name(krw_t type)
++{
++	switch (type) {
++		case RW_NONE: return "RW_NONE";
++		case RW_WRITER: return "RW_WRITER";
++		case RW_READER: return "RW_READER";
++	}
++
++	return NULL;
++}
++
++static int
++splat_rwlock_test4_type(taskq_t *tq, rw_priv_t *rwp, int expected_rc,
++			krw_t holder_type, krw_t try_type)
++{
++	int id, rc = 0;
++
++	/* Schedule a task function which will try and acquire the rwlock
++	 * using type try_type while the rwlock is being held as holder_type.
++	 * The result must match expected_rc for the test to pass */
++	rwp->rw_rc = -EINVAL;
++	rwp->rw_type = try_type;
++
++	if (holder_type == RW_WRITER || holder_type == RW_READER)
++		rw_enter(&rwp->rw_rwlock, holder_type);
++
++	id = taskq_dispatch(tq, splat_rwlock_test4_func, rwp, TQ_SLEEP);
++	if (id == 0) {
++		splat_vprint(rwp->rw_file, SPLAT_RWLOCK_TEST4_NAME, "%s",
++			     "taskq_dispatch() failed\n");
++		rc = -EINVAL;
++		goto out;
++	}
++
++	taskq_wait_id(tq, id);
++
++	if (rwp->rw_rc != expected_rc)
++		rc = -EINVAL;
++
++	splat_vprint(rwp->rw_file, SPLAT_RWLOCK_TEST4_NAME,
++		     "%srw_tryenter(%s) returned %d (expected %d) when %s\n",
++		     rc ? "Fail " : "", splat_rwlock_test4_name(try_type),
++		     rwp->rw_rc, expected_rc,
++		     splat_rwlock_test4_name(holder_type));
++out:
++	if (holder_type == RW_WRITER || holder_type == RW_READER)
++		rw_exit(&rwp->rw_rwlock);
++
++	return rc;
++}
++
++static int
++splat_rwlock_test4(struct file *file, void *arg)
++{
++	rw_priv_t *rwp;
++	taskq_t *tq;
++	int rc = 0, rc1, rc2, rc3, rc4, rc5, rc6;
++
++	rwp = (rw_priv_t *)kmalloc(sizeof(*rwp), GFP_KERNEL);
++	if (rwp == NULL)
++		return -ENOMEM;
++
++	tq = taskq_create(SPLAT_RWLOCK_TEST_TASKQ, 1, maxclsyspri,
++			  50, INT_MAX, TASKQ_PREPOPULATE);
++	if (tq == NULL) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	splat_init_rw_priv(rwp, file);
++
++	/* Validate all combinations of rw_tryenter() contention */
++	rc1 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_WRITER, RW_WRITER);
++	rc2 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_WRITER, RW_READER);
++	rc3 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_READER, RW_WRITER);
++	rc4 = splat_rwlock_test4_type(tq, rwp, 0,      RW_READER, RW_READER);
++	rc5 = splat_rwlock_test4_type(tq, rwp, 0,      RW_NONE,   RW_WRITER);
++	rc6 = splat_rwlock_test4_type(tq, rwp, 0,      RW_NONE,   RW_READER);
++
++	if (rc1 || rc2 || rc3 || rc4 || rc5 || rc6)
++		rc = -EINVAL;
++
++	taskq_destroy(tq);
++out:
++	rw_destroy(&(rwp->rw_rwlock));
++	kfree(rwp);
++
++	return rc;
++}
++
++static int
++splat_rwlock_test5(struct file *file, void *arg)
++{
++	rw_priv_t *rwp;
++	int rc = -EINVAL;
++
++	rwp = (rw_priv_t *)kmalloc(sizeof(*rwp), GFP_KERNEL);
++	if (rwp == NULL)
++		return -ENOMEM;
++
++	splat_init_rw_priv(rwp, file);
++
++	rw_enter(&rwp->rw_rwlock, RW_WRITER);
++	if (!RW_WRITE_HELD(&rwp->rw_rwlock)) {
++		splat_vprint(file, SPLAT_RWLOCK_TEST5_NAME,
++			     "rwlock should be write lock: %d\n",
++			     RW_WRITE_HELD(&rwp->rw_rwlock));
++		goto out;
++	}
++
++	rw_downgrade(&rwp->rw_rwlock);
++	if (!RW_READ_HELD(&rwp->rw_rwlock)) {
++		splat_vprint(file, SPLAT_RWLOCK_TEST5_NAME,
++			     "rwlock should be read lock: %d\n",
++			     RW_READ_HELD(&rwp->rw_rwlock));
++		goto out;
++	}
++
++	rc = 0;
++	splat_vprint(file, SPLAT_RWLOCK_TEST5_NAME, "%s",
++		     "rwlock properly downgraded\n");
++out:
++	rw_exit(&rwp->rw_rwlock);
++	rw_destroy(&rwp->rw_rwlock);
++	kfree(rwp);
++
++	return rc;
++}
++
++static int
++splat_rwlock_test6(struct file *file, void *arg)
++{
++	rw_priv_t *rwp;
++	int rc;
++
++	rwp = (rw_priv_t *)kmalloc(sizeof(*rwp), GFP_KERNEL);
++	if (rwp == NULL)
++		return -ENOMEM;
++
++	splat_init_rw_priv(rwp, file);
++
++	rw_enter(&rwp->rw_rwlock, RW_READER);
++	if (!RW_READ_HELD(&rwp->rw_rwlock)) {
++		splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
++		             "rwlock should be read lock: %d\n",
++			     RW_READ_HELD(&rwp->rw_rwlock));
++		rc = -ENOLCK;
++		goto out;
++	}
++
++#if defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
++	/* With one reader upgrade should never fail. */
++	rc = rw_tryupgrade(&rwp->rw_rwlock);
++	if (!rc) {
++		splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
++			     "rwlock failed upgrade from reader: %d\n",
++			     RW_READ_HELD(&rwp->rw_rwlock));
++		rc = -ENOLCK;
++		goto out;
++	}
++
++	if (RW_READ_HELD(&rwp->rw_rwlock) || !RW_WRITE_HELD(&rwp->rw_rwlock)) {
++		splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "rwlock should "
++			   "have 0 (not %d) reader and 1 (not %d) writer\n",
++			   RW_READ_HELD(&rwp->rw_rwlock),
++			   RW_WRITE_HELD(&rwp->rw_rwlock));
++		goto out;
++	}
++
++	rc = 0;
++	splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s",
++		     "rwlock properly upgraded\n");
++#else
++	rc = 0;
++	splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s",
++		     "rw_tryupgrade() is disabled for this arch\n");
++#endif
++out:
++	rw_exit(&rwp->rw_rwlock);
++	rw_destroy(&rwp->rw_rwlock);
++	kfree(rwp);
++
++	return rc;
++}
++
++splat_subsystem_t *
++splat_rwlock_init(void)
++{
++	splat_subsystem_t *sub;
++
++	sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++	if (sub == NULL)
++		return NULL;
++
++	memset(sub, 0, sizeof(*sub));
++	strncpy(sub->desc.name, SPLAT_RWLOCK_NAME, SPLAT_NAME_SIZE);
++	strncpy(sub->desc.desc, SPLAT_RWLOCK_DESC, SPLAT_DESC_SIZE);
++	INIT_LIST_HEAD(&sub->subsystem_list);
++	INIT_LIST_HEAD(&sub->test_list);
++	spin_lock_init(&sub->test_lock);
++	sub->desc.id = SPLAT_SUBSYSTEM_RWLOCK;
++
++	SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST1_NAME, SPLAT_RWLOCK_TEST1_DESC,
++		      SPLAT_RWLOCK_TEST1_ID, splat_rwlock_test1);
++	SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST2_NAME, SPLAT_RWLOCK_TEST2_DESC,
++		      SPLAT_RWLOCK_TEST2_ID, splat_rwlock_test2);
++	SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST3_NAME, SPLAT_RWLOCK_TEST3_DESC,
++		      SPLAT_RWLOCK_TEST3_ID, splat_rwlock_test3);
++	SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST4_NAME, SPLAT_RWLOCK_TEST4_DESC,
++		      SPLAT_RWLOCK_TEST4_ID, splat_rwlock_test4);
++	SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST5_NAME, SPLAT_RWLOCK_TEST5_DESC,
++		      SPLAT_RWLOCK_TEST5_ID, splat_rwlock_test5);
++	SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST6_NAME, SPLAT_RWLOCK_TEST6_DESC,
++		      SPLAT_RWLOCK_TEST6_ID, splat_rwlock_test6);
++
++	return sub;
++}
++
++void
++splat_rwlock_fini(splat_subsystem_t *sub)
++{
++	ASSERT(sub);
++	SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST6_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST5_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST4_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST3_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST2_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST1_ID);
++	kfree(sub);
++}
++
++int
++splat_rwlock_id(void) {
++	return SPLAT_SUBSYSTEM_RWLOCK;
++}
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-taskq.c linux-3.2.33-go/spl/splat/splat-taskq.c
+--- linux-3.2.33-go.orig/spl/splat/splat-taskq.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-taskq.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,1163 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Task Queue Tests.
++\*****************************************************************************/
++
++#include <sys/taskq.h>
++#include <sys/kmem.h>
++#include "splat-internal.h"
++
++#define SPLAT_TASKQ_NAME		"taskq"
++#define SPLAT_TASKQ_DESC		"Kernel Task Queue Tests"
++
++#define SPLAT_TASKQ_TEST1_ID		0x0201
++#define SPLAT_TASKQ_TEST1_NAME		"single"
++#define SPLAT_TASKQ_TEST1_DESC		"Single task queue, single task"
++
++#define SPLAT_TASKQ_TEST2_ID		0x0202
++#define SPLAT_TASKQ_TEST2_NAME		"multiple"
++#define SPLAT_TASKQ_TEST2_DESC		"Multiple task queues, multiple tasks"
++
++#define SPLAT_TASKQ_TEST3_ID		0x0203
++#define SPLAT_TASKQ_TEST3_NAME		"system"
++#define SPLAT_TASKQ_TEST3_DESC		"System task queue, multiple tasks"
++
++#define SPLAT_TASKQ_TEST4_ID		0x0204
++#define SPLAT_TASKQ_TEST4_NAME		"wait"
++#define SPLAT_TASKQ_TEST4_DESC		"Multiple task waiting"
++
++#define SPLAT_TASKQ_TEST5_ID		0x0205
++#define SPLAT_TASKQ_TEST5_NAME		"order"
++#define SPLAT_TASKQ_TEST5_DESC		"Correct task ordering"
++
++#define SPLAT_TASKQ_TEST6_ID		0x0206
++#define SPLAT_TASKQ_TEST6_NAME		"front"
++#define SPLAT_TASKQ_TEST6_DESC		"Correct ordering with TQ_FRONT flag"
++
++#define SPLAT_TASKQ_TEST7_ID		0x0207
++#define SPLAT_TASKQ_TEST7_NAME		"recurse"
++#define SPLAT_TASKQ_TEST7_DESC		"Single task queue, recursive dispatch"
++
++#define SPLAT_TASKQ_TEST8_ID		0x0208
++#define SPLAT_TASKQ_TEST8_NAME		"contention"
++#define SPLAT_TASKQ_TEST8_DESC		"1 queue, 100 threads, 131072 tasks"
++
++#define SPLAT_TASKQ_ORDER_MAX		8
++#define SPLAT_TASKQ_DEPTH_MAX		16
++
++
++typedef struct splat_taskq_arg {
++	int flag;
++	int id;
++	atomic_t count;
++	int order[SPLAT_TASKQ_ORDER_MAX];
++	unsigned int depth;
++	taskq_t *tq;
++	taskq_ent_t *tqe;
++	spinlock_t lock;
++	struct file *file;
++	const char *name;
++} splat_taskq_arg_t;
++
++typedef struct splat_taskq_id {
++	int id;
++	splat_taskq_arg_t *arg;
++} splat_taskq_id_t;
++
++/*
++ * Create a taskq, queue a task, wait until task completes, ensure
++ * task ran properly, cleanup taskq.
++ */
++static void
++splat_taskq_test13_func(void *arg)
++{
++	splat_taskq_arg_t *tq_arg = (splat_taskq_arg_t *)arg;
++
++	ASSERT(tq_arg);
++	splat_vprint(tq_arg->file, SPLAT_TASKQ_TEST1_NAME,
++	           "Taskq '%s' function '%s' setting flag\n",
++	           tq_arg->name, sym2str(splat_taskq_test13_func));
++	tq_arg->flag = 1;
++}
++
++static int
++splat_taskq_test1_impl(struct file *file, void *arg, boolean_t prealloc)
++{
++	taskq_t *tq;
++	taskqid_t id;
++	splat_taskq_arg_t tq_arg;
++	taskq_ent_t tqe;
++
++	taskq_init_ent(&tqe);
++
++	splat_vprint(file, SPLAT_TASKQ_TEST1_NAME,
++		     "Taskq '%s' creating (%s dispatch)\n",
++	             SPLAT_TASKQ_TEST1_NAME,
++		     prealloc ? "prealloc" : "dynamic");
++	if ((tq = taskq_create(SPLAT_TASKQ_TEST1_NAME, 1, maxclsyspri,
++			       50, INT_MAX, TASKQ_PREPOPULATE)) == NULL) {
++		splat_vprint(file, SPLAT_TASKQ_TEST1_NAME,
++		           "Taskq '%s' create failed\n",
++		           SPLAT_TASKQ_TEST1_NAME);
++		return -EINVAL;
++	}
++
++	tq_arg.flag = 0;
++	tq_arg.id   = 0;
++	tq_arg.file = file;
++	tq_arg.name = SPLAT_TASKQ_TEST1_NAME;
++
++	splat_vprint(file, SPLAT_TASKQ_TEST1_NAME,
++	           "Taskq '%s' function '%s' dispatching\n",
++	           tq_arg.name, sym2str(splat_taskq_test13_func));
++	if (prealloc) {
++		taskq_dispatch_ent(tq, splat_taskq_test13_func,
++		                   &tq_arg, TQ_SLEEP, &tqe);
++		id = tqe.tqent_id;
++	} else {
++		id = taskq_dispatch(tq, splat_taskq_test13_func,
++				    &tq_arg, TQ_SLEEP);
++	}
++
++	if (id == 0) {
++		splat_vprint(file, SPLAT_TASKQ_TEST1_NAME,
++		             "Taskq '%s' function '%s' dispatch failed\n",
++		             tq_arg.name, sym2str(splat_taskq_test13_func));
++		taskq_destroy(tq);
++		return -EINVAL;
++	}
++
++	splat_vprint(file, SPLAT_TASKQ_TEST1_NAME, "Taskq '%s' waiting\n",
++	           tq_arg.name);
++	taskq_wait(tq);
++	splat_vprint(file, SPLAT_TASKQ_TEST1_NAME, "Taskq '%s' destroying\n",
++	           tq_arg.name);
++
++	taskq_destroy(tq);
++
++	return (tq_arg.flag) ? 0 : -EINVAL;
++}
++
++static int
++splat_taskq_test1(struct file *file, void *arg)
++{
++	int rc;
++
++	rc = splat_taskq_test1_impl(file, arg, B_FALSE);
++	if (rc)
++		return rc;
++
++	rc = splat_taskq_test1_impl(file, arg, B_TRUE);
++
++	return rc;
++}
++
++/*
++ * Create multiple taskq's, each with multiple tasks, wait until
++ * all tasks complete, ensure all tasks ran properly and in the
++ * correct order.  Run order must be the same as the order submitted
++ * because we only have 1 thread per taskq.  Finally cleanup the taskq.
++ */
++static void
++splat_taskq_test2_func1(void *arg)
++{
++	splat_taskq_arg_t *tq_arg = (splat_taskq_arg_t *)arg;
++
++	ASSERT(tq_arg);
++	splat_vprint(tq_arg->file, SPLAT_TASKQ_TEST2_NAME,
++	           "Taskq '%s/%d' function '%s' flag = %d = %d * 2\n",
++	           tq_arg->name, tq_arg->id,
++	           sym2str(splat_taskq_test2_func1),
++	           tq_arg->flag * 2, tq_arg->flag);
++	tq_arg->flag *= 2;
++}
++
++static void
++splat_taskq_test2_func2(void *arg)
++{
++	splat_taskq_arg_t *tq_arg = (splat_taskq_arg_t *)arg;
++
++	ASSERT(tq_arg);
++	splat_vprint(tq_arg->file, SPLAT_TASKQ_TEST2_NAME,
++	           "Taskq '%s/%d' function '%s' flag = %d = %d + 1\n",
++	           tq_arg->name, tq_arg->id,
++	           sym2str(splat_taskq_test2_func2),
++	           tq_arg->flag + 1, tq_arg->flag);
++	tq_arg->flag += 1;
++}
++
++#define TEST2_TASKQS                    8
++#define TEST2_THREADS_PER_TASKQ         1
++
++static int
++splat_taskq_test2_impl(struct file *file, void *arg, boolean_t prealloc) {
++	taskq_t *tq[TEST2_TASKQS] = { NULL };
++	taskqid_t id;
++	splat_taskq_arg_t tq_args[TEST2_TASKQS];
++	taskq_ent_t *func1_tqes = NULL;
++	taskq_ent_t *func2_tqes = NULL;
++	int i, rc = 0;
++
++	func1_tqes = kmalloc(sizeof(*func1_tqes) * TEST2_TASKQS, GFP_KERNEL);
++	if (func1_tqes == NULL) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	func2_tqes = kmalloc(sizeof(*func2_tqes) * TEST2_TASKQS, GFP_KERNEL);
++	if (func2_tqes == NULL) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	for (i = 0; i < TEST2_TASKQS; i++) {
++		taskq_init_ent(&func1_tqes[i]);
++		taskq_init_ent(&func2_tqes[i]);
++
++		splat_vprint(file, SPLAT_TASKQ_TEST2_NAME,
++			     "Taskq '%s/%d' creating (%s dispatch)\n",
++			     SPLAT_TASKQ_TEST2_NAME, i,
++			     prealloc ? "prealloc" : "dynamic");
++		if ((tq[i] = taskq_create(SPLAT_TASKQ_TEST2_NAME,
++			                  TEST2_THREADS_PER_TASKQ,
++					  maxclsyspri, 50, INT_MAX,
++					  TASKQ_PREPOPULATE)) == NULL) {
++			splat_vprint(file, SPLAT_TASKQ_TEST2_NAME,
++			           "Taskq '%s/%d' create failed\n",
++				   SPLAT_TASKQ_TEST2_NAME, i);
++			rc = -EINVAL;
++			break;
++		}
++
++		tq_args[i].flag = i;
++		tq_args[i].id   = i;
++		tq_args[i].file = file;
++		tq_args[i].name = SPLAT_TASKQ_TEST2_NAME;
++
++		splat_vprint(file, SPLAT_TASKQ_TEST2_NAME,
++		           "Taskq '%s/%d' function '%s' dispatching\n",
++			   tq_args[i].name, tq_args[i].id,
++		           sym2str(splat_taskq_test2_func1));
++		if (prealloc) {
++			taskq_dispatch_ent(tq[i], splat_taskq_test2_func1,
++			                 &tq_args[i], TQ_SLEEP, &func1_tqes[i]);
++			id = func1_tqes[i].tqent_id;
++		} else {
++			id = taskq_dispatch(tq[i], splat_taskq_test2_func1,
++					    &tq_args[i], TQ_SLEEP);
++		}
++
++		if (id == 0) {
++			splat_vprint(file, SPLAT_TASKQ_TEST2_NAME,
++			           "Taskq '%s/%d' function '%s' dispatch "
++			           "failed\n", tq_args[i].name, tq_args[i].id,
++			           sym2str(splat_taskq_test2_func1));
++			rc = -EINVAL;
++			break;
++		}
++
++		splat_vprint(file, SPLAT_TASKQ_TEST2_NAME,
++		           "Taskq '%s/%d' function '%s' dispatching\n",
++			   tq_args[i].name, tq_args[i].id,
++		           sym2str(splat_taskq_test2_func2));
++		if (prealloc) {
++			taskq_dispatch_ent(tq[i], splat_taskq_test2_func2,
++			                &tq_args[i], TQ_SLEEP, &func2_tqes[i]);
++			id = func2_tqes[i].tqent_id;
++		} else {
++			id = taskq_dispatch(tq[i], splat_taskq_test2_func2,
++			                    &tq_args[i], TQ_SLEEP);
++		}
++
++		if (id == 0) {
++			splat_vprint(file, SPLAT_TASKQ_TEST2_NAME, "Taskq "
++				     "'%s/%d' function '%s' dispatch failed\n",
++			             tq_args[i].name, tq_args[i].id,
++			             sym2str(splat_taskq_test2_func2));
++			rc = -EINVAL;
++			break;
++		}
++	}
++
++	/* When rc is set we're effectively just doing cleanup here, so
++	 * ignore new errors in that case.  They just cause noise. */
++	for (i = 0; i < TEST2_TASKQS; i++) {
++		if (tq[i] != NULL) {
++			splat_vprint(file, SPLAT_TASKQ_TEST2_NAME,
++			           "Taskq '%s/%d' waiting\n",
++			           tq_args[i].name, tq_args[i].id);
++			taskq_wait(tq[i]);
++			splat_vprint(file, SPLAT_TASKQ_TEST2_NAME,
++			           "Taskq '%s/%d; destroying\n",
++			          tq_args[i].name, tq_args[i].id);
++
++			taskq_destroy(tq[i]);
++
++			if (!rc && tq_args[i].flag != ((i * 2) + 1)) {
++				splat_vprint(file, SPLAT_TASKQ_TEST2_NAME,
++				           "Taskq '%s/%d' processed tasks "
++				           "out of order; %d != %d\n",
++				           tq_args[i].name, tq_args[i].id,
++				           tq_args[i].flag, i * 2 + 1);
++				rc = -EINVAL;
++			} else {
++				splat_vprint(file, SPLAT_TASKQ_TEST2_NAME,
++				           "Taskq '%s/%d' processed tasks "
++					   "in the correct order; %d == %d\n",
++				           tq_args[i].name, tq_args[i].id,
++				           tq_args[i].flag, i * 2 + 1);
++			}
++		}
++	}
++out:
++	if (func1_tqes)
++		kfree(func1_tqes);
++
++	if (func2_tqes)
++		kfree(func2_tqes);
++
++	return rc;
++}
++
++static int
++splat_taskq_test2(struct file *file, void *arg) {
++	int rc;
++
++	rc = splat_taskq_test2_impl(file, arg, B_FALSE);
++	if (rc)
++		return rc;
++
++	rc = splat_taskq_test2_impl(file, arg, B_TRUE);
++
++	return rc;
++}
++
++/*
++ * Use the global system task queue with a single task, wait until task
++ * completes, ensure task ran properly.
++ */
++static int
++splat_taskq_test3_impl(struct file *file, void *arg, boolean_t prealloc)
++{
++	taskqid_t id;
++	splat_taskq_arg_t tq_arg;
++	taskq_ent_t tqe;
++
++	taskq_init_ent(&tqe);
++
++	tq_arg.flag = 0;
++	tq_arg.id   = 0;
++	tq_arg.file = file;
++	tq_arg.name = SPLAT_TASKQ_TEST3_NAME;
++
++	splat_vprint(file, SPLAT_TASKQ_TEST3_NAME,
++	           "Taskq '%s' function '%s' %s dispatch\n",
++	           tq_arg.name, sym2str(splat_taskq_test13_func),
++		   prealloc ? "prealloc" : "dynamic");
++	if (prealloc) {
++		taskq_dispatch_ent(system_taskq, splat_taskq_test13_func,
++		                   &tq_arg, TQ_SLEEP, &tqe);
++		id = tqe.tqent_id;
++	} else {
++		id = taskq_dispatch(system_taskq, splat_taskq_test13_func,
++				    &tq_arg, TQ_SLEEP);
++	}
++
++	if (id == 0) {
++		splat_vprint(file, SPLAT_TASKQ_TEST3_NAME,
++		           "Taskq '%s' function '%s' dispatch failed\n",
++		           tq_arg.name, sym2str(splat_taskq_test13_func));
++		return -EINVAL;
++	}
++
++	splat_vprint(file, SPLAT_TASKQ_TEST3_NAME, "Taskq '%s' waiting\n",
++	           tq_arg.name);
++	taskq_wait(system_taskq);
++
++	return (tq_arg.flag) ? 0 : -EINVAL;
++}
++
++static int
++splat_taskq_test3(struct file *file, void *arg)
++{
++	int rc;
++
++	rc = splat_taskq_test3_impl(file, arg, B_FALSE);
++	if (rc)
++		return rc;
++
++	rc = splat_taskq_test3_impl(file, arg, B_TRUE);
++
++	return rc;
++}
++
++/*
++ * Create a taskq and dispatch a large number of tasks to the queue.
++ * Then use taskq_wait() to block until all the tasks complete, then
++ * cross check that all the tasks ran by checking tg_arg->count which
++ * is incremented in the task function.  Finally cleanup the taskq.
++ *
++ * First we try with a large 'maxalloc' value, then we try with a small one.
++ * We should not drop tasks when TQ_SLEEP is used in taskq_dispatch(), even
++ * if the number of pending tasks is above maxalloc.
++ */
++static void
++splat_taskq_test4_func(void *arg)
++{
++	splat_taskq_arg_t *tq_arg = (splat_taskq_arg_t *)arg;
++	ASSERT(tq_arg);
++
++	atomic_inc(&tq_arg->count);
++}
++
++static int
++splat_taskq_test4_common(struct file *file, void *arg, int minalloc,
++                         int maxalloc, int nr_tasks, boolean_t prealloc)
++{
++	taskq_t *tq;
++	taskqid_t id;
++	splat_taskq_arg_t tq_arg;
++	taskq_ent_t *tqes;
++	int i, j, rc = 0;
++
++	tqes = kmalloc(sizeof(*tqes) * nr_tasks, GFP_KERNEL);
++	if (tqes == NULL)
++		return -ENOMEM;
++
++	splat_vprint(file, SPLAT_TASKQ_TEST4_NAME,
++		     "Taskq '%s' creating (%s dispatch) (%d/%d/%d)\n",
++		     SPLAT_TASKQ_TEST4_NAME,
++		     prealloc ? "prealloc" : "dynamic",
++		     minalloc, maxalloc, nr_tasks);
++	if ((tq = taskq_create(SPLAT_TASKQ_TEST4_NAME, 1, maxclsyspri,
++		               minalloc, maxalloc, TASKQ_PREPOPULATE)) == NULL) {
++		splat_vprint(file, SPLAT_TASKQ_TEST4_NAME,
++		             "Taskq '%s' create failed\n",
++		             SPLAT_TASKQ_TEST4_NAME);
++		rc = -EINVAL;
++		goto out_free;
++	}
++
++	tq_arg.file = file;
++	tq_arg.name = SPLAT_TASKQ_TEST4_NAME;
++
++	for (i = 1; i <= nr_tasks; i *= 2) {
++		atomic_set(&tq_arg.count, 0);
++		splat_vprint(file, SPLAT_TASKQ_TEST4_NAME,
++		             "Taskq '%s' function '%s' dispatched %d times\n",
++		             tq_arg.name, sym2str(splat_taskq_test4_func), i);
++
++		for (j = 0; j < i; j++) {
++			taskq_init_ent(&tqes[j]);
++
++			if (prealloc) {
++				taskq_dispatch_ent(tq, splat_taskq_test4_func,
++				                   &tq_arg, TQ_SLEEP, &tqes[j]);
++				id = tqes[j].tqent_id;
++			} else {
++				id = taskq_dispatch(tq, splat_taskq_test4_func,
++						    &tq_arg, TQ_SLEEP);
++			}
++
++			if (id == 0) {
++				splat_vprint(file, SPLAT_TASKQ_TEST4_NAME,
++				        "Taskq '%s' function '%s' dispatch "
++					"%d failed\n", tq_arg.name,
++					sym2str(splat_taskq_test4_func), j);
++					rc = -EINVAL;
++					goto out;
++			}
++		}
++
++		splat_vprint(file, SPLAT_TASKQ_TEST4_NAME, "Taskq '%s' "
++			     "waiting for %d dispatches\n", tq_arg.name, i);
++		taskq_wait(tq);
++		splat_vprint(file, SPLAT_TASKQ_TEST4_NAME, "Taskq '%s' "
++			     "%d/%d dispatches finished\n", tq_arg.name,
++			     atomic_read(&tq_arg.count), i);
++		if (atomic_read(&tq_arg.count) != i) {
++			rc = -ERANGE;
++			goto out;
++
++		}
++	}
++out:
++	splat_vprint(file, SPLAT_TASKQ_TEST4_NAME, "Taskq '%s' destroying\n",
++	           tq_arg.name);
++	taskq_destroy(tq);
++
++out_free:
++	kfree(tqes);
++
++	return rc;
++}
++
++static int
++splat_taskq_test4_impl(struct file *file, void *arg, boolean_t prealloc)
++{
++	int rc;
++
++	rc = splat_taskq_test4_common(file, arg, 50, INT_MAX, 1024, prealloc);
++	if (rc)
++		return rc;
++
++	rc = splat_taskq_test4_common(file, arg, 1, 1, 32, prealloc);
++
++	return rc;
++}
++
++static int
++splat_taskq_test4(struct file *file, void *arg)
++{
++	int rc;
++
++	rc = splat_taskq_test4_impl(file, arg, B_FALSE);
++	if (rc)
++		return rc;
++
++	rc = splat_taskq_test4_impl(file, arg, B_TRUE);
++
++	return rc;
++}
++
++/*
++ * Create a taskq and dispatch a specific sequence of tasks carefully
++ * crafted to validate the order in which tasks are processed.  When
++ * there are multiple worker threads each thread will process the
++ * next pending task as soon as it completes its current task.  This
++ * means that tasks do not strictly complete in order in which they
++ * were dispatched (increasing task id).  This is fine but we need to
++ * verify that taskq_wait_id() blocks until the passed task id and all
++ * lower task ids complete.  We do this by dispatching the following
++ * specific sequence of tasks each of which block for N time units.
++ * We then use taskq_wait_id() to unblock at specific task id and
++ * verify the only the expected task ids have completed and in the
++ * correct order.  The two cases of interest are:
++ *
++ * 1) Task ids larger than the waited for task id can run and
++ *    complete as long as there is an available worker thread.
++ * 2) All task ids lower than the waited one must complete before
++ *    unblocking even if the waited task id itself has completed.
++ *
++ * The following table shows each task id and how they will be
++ * scheduled.  Each rows represent one time unit and each column
++ * one of the three worker threads.  The places taskq_wait_id()
++ * must unblock for a specific id are identified as well as the
++ * task ids which must have completed and their order.
++ *
++ *       +-----+       <--- taskq_wait_id(tq, 8) unblocks
++ *       |     |            Required Completion Order: 1,2,4,5,3,8,6,7
++ * +-----+     |
++ * |     |     |
++ * |     |     +-----+
++ * |     |     |  8  |
++ * |     |     +-----+ <--- taskq_wait_id(tq, 3) unblocks
++ * |     |  7  |     |      Required Completion Order: 1,2,4,5,3
++ * |     +-----+     |
++ * |  6  |     |     |
++ * +-----+     |     |
++ * |     |  5  |     |
++ * |     +-----+     |
++ * |  4  |     |     |
++ * +-----+     |     |
++ * |  1  |  2  |  3  |
++ * +-----+-----+-----+
++ *
++ */
++static void
++splat_taskq_test5_func(void *arg)
++{
++	splat_taskq_id_t *tq_id = (splat_taskq_id_t *)arg;
++	splat_taskq_arg_t *tq_arg = tq_id->arg;
++	int factor;
++
++	/* Delays determined by above table */
++	switch (tq_id->id) {
++		default:		factor = 0;	break;
++		case 1: case 8:		factor = 1;	break;
++		case 2: case 4: case 5:	factor = 2;	break;
++		case 6: case 7:		factor = 4;	break;
++		case 3:			factor = 5;	break;
++	}
++
++	msleep(factor * 100);
++	splat_vprint(tq_arg->file, tq_arg->name,
++		     "Taskqid %d complete for taskq '%s'\n",
++		     tq_id->id, tq_arg->name);
++
++	spin_lock(&tq_arg->lock);
++	tq_arg->order[tq_arg->flag] = tq_id->id;
++	tq_arg->flag++;
++	spin_unlock(&tq_arg->lock);
++}
++
++static int
++splat_taskq_test_order(splat_taskq_arg_t *tq_arg, int *order)
++{
++	int i, j;
++
++	for (i = 0; i < SPLAT_TASKQ_ORDER_MAX; i++) {
++		if (tq_arg->order[i] != order[i]) {
++			splat_vprint(tq_arg->file, tq_arg->name,
++				     "Taskq '%s' incorrect completion "
++				     "order\n", tq_arg->name);
++			splat_vprint(tq_arg->file, tq_arg->name,
++				     "%s", "Expected { ");
++
++			for (j = 0; j < SPLAT_TASKQ_ORDER_MAX; j++)
++				splat_print(tq_arg->file, "%d ", order[j]);
++
++			splat_print(tq_arg->file, "%s", "}\n");
++			splat_vprint(tq_arg->file, tq_arg->name,
++				     "%s", "Got      { ");
++
++			for (j = 0; j < SPLAT_TASKQ_ORDER_MAX; j++)
++				splat_print(tq_arg->file, "%d ",
++					    tq_arg->order[j]);
++
++			splat_print(tq_arg->file, "%s", "}\n");
++			return -EILSEQ;
++		}
++	}
++
++	splat_vprint(tq_arg->file, tq_arg->name,
++		     "Taskq '%s' validated correct completion order\n",
++		     tq_arg->name);
++
++	return 0;
++}
++
++static int
++splat_taskq_test5_impl(struct file *file, void *arg, boolean_t prealloc)
++{
++	taskq_t *tq;
++	taskqid_t id;
++	splat_taskq_id_t tq_id[SPLAT_TASKQ_ORDER_MAX];
++	splat_taskq_arg_t tq_arg;
++	int order1[SPLAT_TASKQ_ORDER_MAX] = { 1,2,4,5,3,0,0,0 };
++	int order2[SPLAT_TASKQ_ORDER_MAX] = { 1,2,4,5,3,8,6,7 };
++	taskq_ent_t tqes[SPLAT_TASKQ_ORDER_MAX];
++	int i, rc = 0;
++
++	splat_vprint(file, SPLAT_TASKQ_TEST5_NAME,
++		     "Taskq '%s' creating (%s dispatch)\n",
++		     SPLAT_TASKQ_TEST5_NAME,
++		     prealloc ? "prealloc" : "dynamic");
++	if ((tq = taskq_create(SPLAT_TASKQ_TEST5_NAME, 3, maxclsyspri,
++		               50, INT_MAX, TASKQ_PREPOPULATE)) == NULL) {
++		splat_vprint(file, SPLAT_TASKQ_TEST5_NAME,
++		             "Taskq '%s' create failed\n",
++		             SPLAT_TASKQ_TEST5_NAME);
++		return -EINVAL;
++	}
++
++	tq_arg.flag = 0;
++	memset(&tq_arg.order, 0, sizeof(int) * SPLAT_TASKQ_ORDER_MAX);
++	spin_lock_init(&tq_arg.lock);
++	tq_arg.file = file;
++	tq_arg.name = SPLAT_TASKQ_TEST5_NAME;
++
++	for (i = 0; i < SPLAT_TASKQ_ORDER_MAX; i++) {
++		taskq_init_ent(&tqes[i]);
++
++		tq_id[i].id = i + 1;
++		tq_id[i].arg = &tq_arg;
++
++		if (prealloc) {
++			taskq_dispatch_ent(tq, splat_taskq_test5_func,
++			               &tq_id[i], TQ_SLEEP, &tqes[i]);
++			id = tqes[i].tqent_id;
++		} else {
++			id = taskq_dispatch(tq, splat_taskq_test5_func,
++					    &tq_id[i], TQ_SLEEP);
++		}
++
++		if (id == 0) {
++			splat_vprint(file, SPLAT_TASKQ_TEST5_NAME,
++			        "Taskq '%s' function '%s' dispatch failed\n",
++				tq_arg.name, sym2str(splat_taskq_test5_func));
++				rc = -EINVAL;
++				goto out;
++		}
++
++		if (tq_id[i].id != id) {
++			splat_vprint(file, SPLAT_TASKQ_TEST5_NAME,
++			        "Taskq '%s' expected taskqid %d got %d\n",
++				tq_arg.name, (int)tq_id[i].id, (int)id);
++				rc = -EINVAL;
++				goto out;
++		}
++	}
++
++	splat_vprint(file, SPLAT_TASKQ_TEST5_NAME, "Taskq '%s' "
++		     "waiting for taskqid %d completion\n", tq_arg.name, 3);
++	taskq_wait_id(tq, 3);
++	if ((rc = splat_taskq_test_order(&tq_arg, order1)))
++		goto out;
++
++	splat_vprint(file, SPLAT_TASKQ_TEST5_NAME, "Taskq '%s' "
++		     "waiting for taskqid %d completion\n", tq_arg.name, 8);
++	taskq_wait_id(tq, 8);
++	rc = splat_taskq_test_order(&tq_arg, order2);
++
++out:
++	splat_vprint(file, SPLAT_TASKQ_TEST5_NAME,
++		     "Taskq '%s' destroying\n", tq_arg.name);
++	taskq_destroy(tq);
++
++	return rc;
++}
++
++static int
++splat_taskq_test5(struct file *file, void *arg)
++{
++	int rc;
++
++	rc = splat_taskq_test5_impl(file, arg, B_FALSE);
++	if (rc)
++		return rc;
++
++	rc = splat_taskq_test5_impl(file, arg, B_TRUE);
++
++	return rc;
++}
++
++/*
++ * Create a single task queue with three threads.  Dispatch 8 tasks,
++ * setting TQ_FRONT on only the last three.  Sleep after
++ * dispatching tasks 1-3 to ensure they will run and hold the threads
++ * busy while we dispatch the remaining tasks.  Verify that tasks 6-8
++ * run before task 4-5.
++ *
++ * The following table shows each task id and how they will be
++ * scheduled.  Each rows represent one time unit and each column
++ * one of the three worker threads.
++ *
++ *       +-----+
++ *       |     |
++ * +-----+     |
++ * |     |  5  +-----+
++ * |     |     |     |
++ * |     +-----|     |
++ * |  4  |     |     |
++ * +-----+     |  8  |
++ * |     |     |     |
++ * |     |  7  +-----+
++ * |     |     |     |
++ * |     |-----+     |
++ * |  6  |     |     |
++ * +-----+     |     |
++ * |     |     |     |
++ * |  1  |  2  |  3  |
++ * +-----+-----+-----+
++ *
++ */
++static void
++splat_taskq_test6_func(void *arg)
++{
++	splat_taskq_id_t *tq_id = (splat_taskq_id_t *)arg;
++	splat_taskq_arg_t *tq_arg = tq_id->arg;
++	int factor;
++
++	/* Delays determined by above table */
++	switch (tq_id->id) {
++		default:		factor = 0;	break;
++		case 1:			factor = 2;	break;
++		case 2: case 4: case 5:	factor = 4;	break;
++		case 6: case 7: case 8:	factor = 5;	break;
++		case 3:			factor = 6;	break;
++	}
++
++	msleep(factor * 100);
++
++	splat_vprint(tq_arg->file, tq_arg->name,
++		     "Taskqid %d complete for taskq '%s'\n",
++		     tq_id->id, tq_arg->name);
++
++	spin_lock(&tq_arg->lock);
++	tq_arg->order[tq_arg->flag] = tq_id->id;
++	tq_arg->flag++;
++	spin_unlock(&tq_arg->lock);
++}
++
++static int
++splat_taskq_test6_impl(struct file *file, void *arg, boolean_t prealloc)
++{
++	taskq_t *tq;
++	taskqid_t id;
++	splat_taskq_id_t tq_id[SPLAT_TASKQ_ORDER_MAX];
++	splat_taskq_arg_t tq_arg;
++	int order[SPLAT_TASKQ_ORDER_MAX] = { 1,2,3,6,7,8,4,5 };
++	taskq_ent_t tqes[SPLAT_TASKQ_ORDER_MAX];
++	int i, rc = 0;
++	uint_t tflags;
++
++	splat_vprint(file, SPLAT_TASKQ_TEST6_NAME,
++		     "Taskq '%s' creating (%s dispatch)\n",
++		     SPLAT_TASKQ_TEST6_NAME,
++		     prealloc ? "prealloc" : "dynamic");
++	if ((tq = taskq_create(SPLAT_TASKQ_TEST6_NAME, 3, maxclsyspri,
++		               50, INT_MAX, TASKQ_PREPOPULATE)) == NULL) {
++		splat_vprint(file, SPLAT_TASKQ_TEST6_NAME,
++		             "Taskq '%s' create failed\n",
++		             SPLAT_TASKQ_TEST6_NAME);
++		return -EINVAL;
++	}
++
++	tq_arg.flag = 0;
++	memset(&tq_arg.order, 0, sizeof(int) * SPLAT_TASKQ_ORDER_MAX);
++	spin_lock_init(&tq_arg.lock);
++	tq_arg.file = file;
++	tq_arg.name = SPLAT_TASKQ_TEST6_NAME;
++
++	for (i = 0; i < SPLAT_TASKQ_ORDER_MAX; i++) {
++		taskq_init_ent(&tqes[i]);
++
++		tq_id[i].id = i + 1;
++		tq_id[i].arg = &tq_arg;
++		tflags = TQ_SLEEP;
++		if (i > 4)
++			tflags |= TQ_FRONT;
++
++		if (prealloc) {
++			taskq_dispatch_ent(tq, splat_taskq_test6_func,
++			                   &tq_id[i], tflags, &tqes[i]);
++			id = tqes[i].tqent_id;
++		} else {
++			id = taskq_dispatch(tq, splat_taskq_test6_func,
++					    &tq_id[i], tflags);
++		}
++
++		if (id == 0) {
++			splat_vprint(file, SPLAT_TASKQ_TEST6_NAME,
++			        "Taskq '%s' function '%s' dispatch failed\n",
++				tq_arg.name, sym2str(splat_taskq_test6_func));
++				rc = -EINVAL;
++				goto out;
++		}
++
++		if (tq_id[i].id != id) {
++			splat_vprint(file, SPLAT_TASKQ_TEST6_NAME,
++			        "Taskq '%s' expected taskqid %d got %d\n",
++				tq_arg.name, (int)tq_id[i].id, (int)id);
++				rc = -EINVAL;
++				goto out;
++		}
++		/* Sleep to let tasks 1-3 start executing. */
++		if ( i == 2 )
++			msleep(100);
++	}
++
++	splat_vprint(file, SPLAT_TASKQ_TEST6_NAME, "Taskq '%s' "
++		     "waiting for taskqid %d completion\n", tq_arg.name,
++		     SPLAT_TASKQ_ORDER_MAX);
++	taskq_wait_id(tq, SPLAT_TASKQ_ORDER_MAX);
++	rc = splat_taskq_test_order(&tq_arg, order);
++
++out:
++	splat_vprint(file, SPLAT_TASKQ_TEST6_NAME,
++		     "Taskq '%s' destroying\n", tq_arg.name);
++	taskq_destroy(tq);
++
++	return rc;
++}
++
++static int
++splat_taskq_test6(struct file *file, void *arg)
++{
++	int rc;
++
++	rc = splat_taskq_test6_impl(file, arg, B_FALSE);
++	if (rc)
++		return rc;
++
++	rc = splat_taskq_test6_impl(file, arg, B_TRUE);
++
++	return rc;
++}
++
++static void
++splat_taskq_test7_func(void *arg)
++{
++	splat_taskq_arg_t *tq_arg = (splat_taskq_arg_t *)arg;
++	taskqid_t id;
++
++	ASSERT(tq_arg);
++
++	if (tq_arg->depth >= SPLAT_TASKQ_DEPTH_MAX)
++		return;
++
++	tq_arg->depth++;
++
++	splat_vprint(tq_arg->file, SPLAT_TASKQ_TEST7_NAME,
++	             "Taskq '%s' function '%s' dispatching (depth = %u)\n",
++	             tq_arg->name, sym2str(splat_taskq_test7_func),
++	             tq_arg->depth);
++
++	if (tq_arg->tqe) {
++		VERIFY(taskq_empty_ent(tq_arg->tqe));
++		taskq_dispatch_ent(tq_arg->tq, splat_taskq_test7_func,
++		                   tq_arg, TQ_SLEEP, tq_arg->tqe);
++		id = tq_arg->tqe->tqent_id;
++	} else {
++		id = taskq_dispatch(tq_arg->tq, splat_taskq_test7_func,
++		                    tq_arg, TQ_SLEEP);
++	}
++
++	if (id == 0) {
++		splat_vprint(tq_arg->file, SPLAT_TASKQ_TEST7_NAME,
++		             "Taskq '%s' function '%s' dispatch failed "
++		             "(depth = %u)\n", tq_arg->name,
++		             sym2str(splat_taskq_test7_func), tq_arg->depth);
++		tq_arg->flag = -EINVAL;
++		return;
++	}
++}
++
++static int
++splat_taskq_test7_impl(struct file *file, void *arg, boolean_t prealloc)
++{
++	taskq_t *tq;
++	taskq_ent_t tqe;
++	splat_taskq_arg_t tq_arg;
++
++	splat_vprint(file, SPLAT_TASKQ_TEST7_NAME,
++	             "Taskq '%s' creating (%s dispatch)\n",
++	             SPLAT_TASKQ_TEST7_NAME,
++	             prealloc ? "prealloc" :  "dynamic");
++	if ((tq = taskq_create(SPLAT_TASKQ_TEST7_NAME, 1, maxclsyspri,
++	                       50, INT_MAX, TASKQ_PREPOPULATE)) == NULL) {
++		splat_vprint(file, SPLAT_TASKQ_TEST7_NAME,
++		             "Taskq '%s' create failed\n",
++		             SPLAT_TASKQ_TEST7_NAME);
++		return -EINVAL;
++	}
++
++	tq_arg.depth = 0;
++	tq_arg.flag  = 0;
++	tq_arg.id    = 0;
++	tq_arg.file  = file;
++	tq_arg.name  = SPLAT_TASKQ_TEST7_NAME;
++	tq_arg.tq    = tq;
++
++	if (prealloc) {
++		taskq_init_ent(&tqe);
++		tq_arg.tqe = &tqe;
++	} else {
++		tq_arg.tqe = NULL;
++	}
++
++	splat_taskq_test7_func(&tq_arg);
++
++	if (tq_arg.flag == 0) {
++		splat_vprint(file, SPLAT_TASKQ_TEST7_NAME,
++		             "Taskq '%s' waiting\n", tq_arg.name);
++		taskq_wait_id(tq, SPLAT_TASKQ_DEPTH_MAX);
++	}
++
++	splat_vprint(file, SPLAT_TASKQ_TEST7_NAME,
++	              "Taskq '%s' destroying\n", tq_arg.name);
++	taskq_destroy(tq);
++
++	return tq_arg.depth == SPLAT_TASKQ_DEPTH_MAX ? 0 : -EINVAL;
++}
++
++static int
++splat_taskq_test7(struct file *file, void *arg)
++{
++	int rc;
++
++	rc = splat_taskq_test7_impl(file, arg, B_FALSE);
++	if (rc)
++		return rc;
++
++	rc = splat_taskq_test7_impl(file, arg, B_TRUE);
++
++	return rc;
++}
++
++/*
++ * Create a taskq with 100 threads and dispatch a huge number of trivial
++ * tasks to generate contention on tq->tq_lock.  This test should always
++ * pass.  The purpose is to provide a benchmark for measuring the
++ * effectiveness of taskq optimizations.
++ */
++static void
++splat_taskq_test8_func(void *arg)
++{
++	splat_taskq_arg_t *tq_arg = (splat_taskq_arg_t *)arg;
++	ASSERT(tq_arg);
++
++	atomic_inc(&tq_arg->count);
++}
++
++#define TEST8_NUM_TASKS			0x20000
++#define TEST8_THREADS_PER_TASKQ		100
++
++static int
++splat_taskq_test8_common(struct file *file, void *arg, int minalloc,
++                         int maxalloc)
++{
++	taskq_t *tq;
++	taskqid_t id;
++	splat_taskq_arg_t tq_arg;
++	taskq_ent_t **tqes;
++	int i, j, rc = 0;
++
++	tqes = vmalloc(sizeof(*tqes) * TEST8_NUM_TASKS);
++	if (tqes == NULL)
++		return -ENOMEM;
++	memset(tqes, 0, sizeof(*tqes) * TEST8_NUM_TASKS);
++
++	splat_vprint(file, SPLAT_TASKQ_TEST8_NAME,
++		     "Taskq '%s' creating (%d/%d/%d)\n",
++		     SPLAT_TASKQ_TEST8_NAME,
++		     minalloc, maxalloc, TEST8_NUM_TASKS);
++	if ((tq = taskq_create(SPLAT_TASKQ_TEST8_NAME, TEST8_THREADS_PER_TASKQ,
++			       maxclsyspri, minalloc, maxalloc,
++			       TASKQ_PREPOPULATE)) == NULL) {
++		splat_vprint(file, SPLAT_TASKQ_TEST8_NAME,
++		             "Taskq '%s' create failed\n",
++		             SPLAT_TASKQ_TEST8_NAME);
++		rc = -EINVAL;
++		goto out_free;
++	}
++
++	tq_arg.file = file;
++	tq_arg.name = SPLAT_TASKQ_TEST8_NAME;
++
++	atomic_set(&tq_arg.count, 0);
++	for (i = 0; i < TEST8_NUM_TASKS; i++) {
++		tqes[i] = kmalloc(sizeof(taskq_ent_t), GFP_KERNEL);
++		if (tqes[i] == NULL) {
++			rc = -ENOMEM;
++			goto out;
++		}
++		taskq_init_ent(tqes[i]);
++
++		taskq_dispatch_ent(tq, splat_taskq_test8_func,
++				   &tq_arg, TQ_SLEEP, tqes[i]);
++
++		id = tqes[i]->tqent_id;
++
++		if (id == 0) {
++			splat_vprint(file, SPLAT_TASKQ_TEST8_NAME,
++			        "Taskq '%s' function '%s' dispatch "
++				"%d failed\n", tq_arg.name,
++				sym2str(splat_taskq_test8_func), i);
++				rc = -EINVAL;
++				goto out;
++		}
++	}
++
++	splat_vprint(file, SPLAT_TASKQ_TEST8_NAME, "Taskq '%s' "
++		     "waiting for %d dispatches\n", tq_arg.name,
++		     TEST8_NUM_TASKS);
++	taskq_wait(tq);
++	splat_vprint(file, SPLAT_TASKQ_TEST8_NAME, "Taskq '%s' "
++		     "%d/%d dispatches finished\n", tq_arg.name,
++		     atomic_read(&tq_arg.count), TEST8_NUM_TASKS);
++
++	if (atomic_read(&tq_arg.count) != TEST8_NUM_TASKS)
++		rc = -ERANGE;
++
++out:
++	splat_vprint(file, SPLAT_TASKQ_TEST8_NAME, "Taskq '%s' destroying\n",
++	           tq_arg.name);
++	taskq_destroy(tq);
++out_free:
++	for (j = 0; j < TEST8_NUM_TASKS && tqes[j] != NULL; j++)
++		kfree(tqes[j]);
++	vfree(tqes);
++
++	return rc;
++}
++
++static int
++splat_taskq_test8(struct file *file, void *arg)
++{
++	int rc;
++
++	rc = splat_taskq_test8_common(file, arg, 1, 100);
++
++	return rc;
++}
++
++splat_subsystem_t *
++splat_taskq_init(void)
++{
++        splat_subsystem_t *sub;
++
++        sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++        if (sub == NULL)
++                return NULL;
++
++        memset(sub, 0, sizeof(*sub));
++        strncpy(sub->desc.name, SPLAT_TASKQ_NAME, SPLAT_NAME_SIZE);
++        strncpy(sub->desc.desc, SPLAT_TASKQ_DESC, SPLAT_DESC_SIZE);
++        INIT_LIST_HEAD(&sub->subsystem_list);
++	INIT_LIST_HEAD(&sub->test_list);
++	spin_lock_init(&sub->test_lock);
++        sub->desc.id = SPLAT_SUBSYSTEM_TASKQ;
++
++	SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST1_NAME, SPLAT_TASKQ_TEST1_DESC,
++	              SPLAT_TASKQ_TEST1_ID, splat_taskq_test1);
++	SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST2_NAME, SPLAT_TASKQ_TEST2_DESC,
++	              SPLAT_TASKQ_TEST2_ID, splat_taskq_test2);
++	SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST3_NAME, SPLAT_TASKQ_TEST3_DESC,
++	              SPLAT_TASKQ_TEST3_ID, splat_taskq_test3);
++	SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST4_NAME, SPLAT_TASKQ_TEST4_DESC,
++	              SPLAT_TASKQ_TEST4_ID, splat_taskq_test4);
++	SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST5_NAME, SPLAT_TASKQ_TEST5_DESC,
++	              SPLAT_TASKQ_TEST5_ID, splat_taskq_test5);
++	SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST6_NAME, SPLAT_TASKQ_TEST6_DESC,
++	              SPLAT_TASKQ_TEST6_ID, splat_taskq_test6);
++	SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST7_NAME, SPLAT_TASKQ_TEST7_DESC,
++	              SPLAT_TASKQ_TEST7_ID, splat_taskq_test7);
++	SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST8_NAME, SPLAT_TASKQ_TEST8_DESC,
++	              SPLAT_TASKQ_TEST8_ID, splat_taskq_test8);
++
++        return sub;
++}
++
++void
++splat_taskq_fini(splat_subsystem_t *sub)
++{
++        ASSERT(sub);
++	SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST8_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST7_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST6_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST5_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST4_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST3_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST2_ID);
++	SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST1_ID);
++
++        kfree(sub);
++}
++
++int
++splat_taskq_id(void) {
++        return SPLAT_SUBSYSTEM_TASKQ;
++}
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-thread.c linux-3.2.33-go/spl/splat/splat-thread.c
+--- linux-3.2.33-go.orig/spl/splat/splat-thread.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-thread.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,386 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Thread Tests.
++\*****************************************************************************/
++
++#include <sys/thread.h>
++#include <sys/random.h>
++#include "splat-internal.h"
++
++#define SPLAT_THREAD_NAME		"thread"
++#define SPLAT_THREAD_DESC		"Kernel Thread Tests"
++
++#define SPLAT_THREAD_TEST1_ID		0x0601
++#define SPLAT_THREAD_TEST1_NAME		"create"
++#define SPLAT_THREAD_TEST1_DESC		"Validate thread creation"
++
++#define SPLAT_THREAD_TEST2_ID		0x0602
++#define SPLAT_THREAD_TEST2_NAME		"exit"
++#define SPLAT_THREAD_TEST2_DESC		"Validate thread exit"
++
++#define SPLAT_THREAD_TEST3_ID		0x6003
++#define SPLAT_THREAD_TEST3_NAME		"tsd"
++#define SPLAT_THREAD_TEST3_DESC		"Validate thread specific data"
++
++#define SPLAT_THREAD_TEST_MAGIC		0x4488CC00UL
++#define SPLAT_THREAD_TEST_KEYS		32
++#define SPLAT_THREAD_TEST_THREADS	16
++
++typedef struct thread_priv {
++        unsigned long tp_magic;
++        struct file *tp_file;
++        spinlock_t tp_lock;
++        wait_queue_head_t tp_waitq;
++	uint_t tp_keys[SPLAT_THREAD_TEST_KEYS];
++	int tp_rc;
++	int tp_count;
++	int tp_dtor_count;
++} thread_priv_t;
++
++static int
++splat_thread_rc(thread_priv_t *tp, int rc)
++{
++	int ret;
++
++	spin_lock(&tp->tp_lock);
++	ret = (tp->tp_rc == rc);
++	spin_unlock(&tp->tp_lock);
++
++	return ret;
++}
++
++static int
++splat_thread_count(thread_priv_t *tp, int count)
++{
++	int ret;
++
++	spin_lock(&tp->tp_lock);
++	ret = (tp->tp_count == count);
++	spin_unlock(&tp->tp_lock);
++
++	return ret;
++}
++
++static void
++splat_thread_work1(void *priv)
++{
++	thread_priv_t *tp = (thread_priv_t *)priv;
++
++	spin_lock(&tp->tp_lock);
++	ASSERT(tp->tp_magic == SPLAT_THREAD_TEST_MAGIC);
++	tp->tp_rc = 1;
++	wake_up(&tp->tp_waitq);
++	spin_unlock(&tp->tp_lock);
++
++	thread_exit();
++}
++
++static int
++splat_thread_test1(struct file *file, void *arg)
++{
++	thread_priv_t tp;
++	kthread_t *thr;
++
++	tp.tp_magic = SPLAT_THREAD_TEST_MAGIC;
++	tp.tp_file = file;
++        spin_lock_init(&tp.tp_lock);
++	init_waitqueue_head(&tp.tp_waitq);
++	tp.tp_rc = 0;
++
++	thr = (kthread_t *)thread_create(NULL, 0, splat_thread_work1, &tp, 0,
++			                 &p0, TS_RUN, minclsyspri);
++	/* Must never fail under Solaris, but we check anyway since this
++	 * can happen in the linux SPL, we may want to change this behavior */
++	if (thr == NULL)
++		return  -ESRCH;
++
++	/* Sleep until the thread sets tp.tp_rc == 1 */
++	wait_event(tp.tp_waitq, splat_thread_rc(&tp, 1));
++
++        splat_vprint(file, SPLAT_THREAD_TEST1_NAME, "%s",
++	           "Thread successfully started properly\n");
++	return 0;
++}
++
++static void
++splat_thread_work2(void *priv)
++{
++	thread_priv_t *tp = (thread_priv_t *)priv;
++
++	spin_lock(&tp->tp_lock);
++	ASSERT(tp->tp_magic == SPLAT_THREAD_TEST_MAGIC);
++	tp->tp_rc = 1;
++	wake_up(&tp->tp_waitq);
++	spin_unlock(&tp->tp_lock);
++
++	thread_exit();
++
++	/* The following code is unreachable when thread_exit() is
++	 * working properly, which is exactly what we're testing */
++	spin_lock(&tp->tp_lock);
++	tp->tp_rc = 2;
++	wake_up(&tp->tp_waitq);
++	spin_unlock(&tp->tp_lock);
++}
++
++static int
++splat_thread_test2(struct file *file, void *arg)
++{
++	thread_priv_t tp;
++	kthread_t *thr;
++	int rc = 0;
++
++	tp.tp_magic = SPLAT_THREAD_TEST_MAGIC;
++	tp.tp_file = file;
++        spin_lock_init(&tp.tp_lock);
++	init_waitqueue_head(&tp.tp_waitq);
++	tp.tp_rc = 0;
++
++	thr = (kthread_t *)thread_create(NULL, 0, splat_thread_work2, &tp, 0,
++			                 &p0, TS_RUN, minclsyspri);
++	/* Must never fail under Solaris, but we check anyway since this
++	 * can happen in the linux SPL, we may want to change this behavior */
++	if (thr == NULL)
++		return -ESRCH;
++
++	/* Sleep until the thread sets tp.tp_rc == 1 */
++	wait_event(tp.tp_waitq, splat_thread_rc(&tp, 1));
++
++	/* Sleep until the thread sets tp.tp_rc == 2, or until we hit
++	 * the timeout.  If thread exit is working properly we should
++	 * hit the timeout and never see to.tp_rc == 2. */
++	rc = wait_event_timeout(tp.tp_waitq, splat_thread_rc(&tp, 2), HZ / 10);
++	if (rc > 0) {
++		rc = -EINVAL;
++	        splat_vprint(file, SPLAT_THREAD_TEST2_NAME, "%s",
++		           "Thread did not exit properly at thread_exit()\n");
++	} else {
++	        splat_vprint(file, SPLAT_THREAD_TEST2_NAME, "%s",
++		           "Thread successfully exited at thread_exit()\n");
++	}
++
++	return rc;
++}
++
++static void
++splat_thread_work3_common(thread_priv_t *tp)
++{
++	ulong_t rnd;
++	int i, rc = 0;
++
++	/* set a unique value for each key using a random value */
++	get_random_bytes((void *)&rnd, 4);
++	for (i = 0; i < SPLAT_THREAD_TEST_KEYS; i++)
++		tsd_set(tp->tp_keys[i], (void *)(i + rnd));
++
++	/* verify the unique value for each key */
++	for (i = 0; i < SPLAT_THREAD_TEST_KEYS; i++)
++		if (tsd_get(tp->tp_keys[i]) !=  (void *)(i + rnd))
++			rc = -EINVAL;
++
++	/* set the value to thread_priv_t for use by the destructor */
++	for (i = 0; i < SPLAT_THREAD_TEST_KEYS; i++)
++		tsd_set(tp->tp_keys[i], (void *)tp);
++
++	spin_lock(&tp->tp_lock);
++	if (rc && !tp->tp_rc)
++		tp->tp_rc = rc;
++
++	tp->tp_count++;
++	wake_up_all(&tp->tp_waitq);
++	spin_unlock(&tp->tp_lock);
++}
++
++static void
++splat_thread_work3_wait(void *priv)
++{
++	thread_priv_t *tp = (thread_priv_t *)priv;
++
++	ASSERT(tp->tp_magic == SPLAT_THREAD_TEST_MAGIC);
++	splat_thread_work3_common(tp);
++	wait_event(tp->tp_waitq, splat_thread_count(tp, 0));
++	thread_exit();
++}
++
++static void
++splat_thread_work3_exit(void *priv)
++{
++	thread_priv_t *tp = (thread_priv_t *)priv;
++
++	ASSERT(tp->tp_magic == SPLAT_THREAD_TEST_MAGIC);
++	splat_thread_work3_common(tp);
++	thread_exit();
++}
++
++static void
++splat_thread_dtor3(void *priv)
++{
++	thread_priv_t *tp = (thread_priv_t *)priv;
++
++	ASSERT(tp->tp_magic == SPLAT_THREAD_TEST_MAGIC);
++	spin_lock(&tp->tp_lock);
++	tp->tp_dtor_count++;
++	spin_unlock(&tp->tp_lock);
++}
++
++/*
++ * Create threads which set and verify SPLAT_THREAD_TEST_KEYS number of
++ * keys.  These threads may then exit by calling thread_exit() which calls
++ * tsd_exit() resulting in all their thread specific data being reclaimed.
++ * Alternately, the thread may block in which case the thread specific
++ * data will be reclaimed as part of tsd_destroy().  In either case all
++ * thread specific data must be reclaimed, this is verified by ensuring
++ * the registered destructor is called the correct number of times.
++ */
++static int
++splat_thread_test3(struct file *file, void *arg)
++{
++	int i, rc = 0, expected, wait_count = 0, exit_count = 0;
++	thread_priv_t tp;
++
++	tp.tp_magic = SPLAT_THREAD_TEST_MAGIC;
++	tp.tp_file = file;
++        spin_lock_init(&tp.tp_lock);
++	init_waitqueue_head(&tp.tp_waitq);
++	tp.tp_rc = 0;
++	tp.tp_count = 0;
++	tp.tp_dtor_count = 0;
++
++	for (i = 0; i < SPLAT_THREAD_TEST_KEYS; i++) {
++		tp.tp_keys[i] = 0;
++		tsd_create(&tp.tp_keys[i], splat_thread_dtor3);
++	}
++
++	/* Start tsd wait threads */
++	for (i = 0; i < SPLAT_THREAD_TEST_THREADS; i++) {
++		if (thread_create(NULL, 0, splat_thread_work3_wait,
++				  &tp, 0, &p0, TS_RUN, minclsyspri))
++			wait_count++;
++	}
++
++	/* All wait threads have setup their tsd and are blocking. */
++	wait_event(tp.tp_waitq, splat_thread_count(&tp, wait_count));
++
++	if (tp.tp_dtor_count != 0) {
++	        splat_vprint(file, SPLAT_THREAD_TEST3_NAME,
++		    "Prematurely ran %d tsd destructors\n", tp.tp_dtor_count);
++		if (!rc)
++			rc = -ERANGE;
++	}
++
++	/* Start tsd exit threads */
++	for (i = 0; i < SPLAT_THREAD_TEST_THREADS; i++) {
++		if (thread_create(NULL, 0, splat_thread_work3_exit,
++				  &tp, 0, &p0, TS_RUN, minclsyspri))
++			exit_count++;
++	}
++
++	/* All exit threads verified tsd and are in the process of exiting */
++	wait_event(tp.tp_waitq,splat_thread_count(&tp, wait_count+exit_count));
++	msleep(500);
++
++	expected = (SPLAT_THREAD_TEST_KEYS * exit_count);
++	if (tp.tp_dtor_count != expected) {
++	        splat_vprint(file, SPLAT_THREAD_TEST3_NAME,
++		    "Expected %d exit tsd destructors but saw %d\n",
++		    expected, tp.tp_dtor_count);
++		if (!rc)
++			rc = -ERANGE;
++	}
++
++	/* Destroy all keys and associated tsd in blocked threads */
++	for (i = 0; i < SPLAT_THREAD_TEST_KEYS; i++)
++		tsd_destroy(&tp.tp_keys[i]);
++
++	expected = (SPLAT_THREAD_TEST_KEYS * (exit_count + wait_count));
++	if (tp.tp_dtor_count != expected) {
++	        splat_vprint(file, SPLAT_THREAD_TEST3_NAME,
++		    "Expected %d wait+exit tsd destructors but saw %d\n",
++		    expected, tp.tp_dtor_count);
++		if (!rc)
++			rc = -ERANGE;
++	}
++
++	/* Release the remaining wait threads, sleep briefly while they exit */
++	spin_lock(&tp.tp_lock);
++	tp.tp_count = 0;
++	wake_up_all(&tp.tp_waitq);
++	spin_unlock(&tp.tp_lock);
++	msleep(500);
++
++	if (tp.tp_rc) {
++	        splat_vprint(file, SPLAT_THREAD_TEST3_NAME,
++		    "Thread tsd_get()/tsd_set() error %d\n", tp.tp_rc);
++		if (!rc)
++			rc = tp.tp_rc;
++	} else if (!rc) {
++	        splat_vprint(file, SPLAT_THREAD_TEST3_NAME, "%s",
++		    "Thread specific data verified\n");
++	}
++
++	return rc;
++}
++
++splat_subsystem_t *
++splat_thread_init(void)
++{
++        splat_subsystem_t *sub;
++
++        sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++        if (sub == NULL)
++                return NULL;
++
++        memset(sub, 0, sizeof(*sub));
++        strncpy(sub->desc.name, SPLAT_THREAD_NAME, SPLAT_NAME_SIZE);
++        strncpy(sub->desc.desc, SPLAT_THREAD_DESC, SPLAT_DESC_SIZE);
++        INIT_LIST_HEAD(&sub->subsystem_list);
++        INIT_LIST_HEAD(&sub->test_list);
++        spin_lock_init(&sub->test_lock);
++        sub->desc.id = SPLAT_SUBSYSTEM_THREAD;
++
++        SPLAT_TEST_INIT(sub, SPLAT_THREAD_TEST1_NAME, SPLAT_THREAD_TEST1_DESC,
++                      SPLAT_THREAD_TEST1_ID, splat_thread_test1);
++        SPLAT_TEST_INIT(sub, SPLAT_THREAD_TEST2_NAME, SPLAT_THREAD_TEST2_DESC,
++                      SPLAT_THREAD_TEST2_ID, splat_thread_test2);
++        SPLAT_TEST_INIT(sub, SPLAT_THREAD_TEST3_NAME, SPLAT_THREAD_TEST3_DESC,
++                      SPLAT_THREAD_TEST3_ID, splat_thread_test3);
++
++        return sub;
++}
++
++void
++splat_thread_fini(splat_subsystem_t *sub)
++{
++        ASSERT(sub);
++        SPLAT_TEST_FINI(sub, SPLAT_THREAD_TEST3_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_THREAD_TEST2_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_THREAD_TEST1_ID);
++
++        kfree(sub);
++}
++
++int
++splat_thread_id(void) {
++        return SPLAT_SUBSYSTEM_THREAD;
++}
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-time.c linux-3.2.33-go/spl/splat/splat-time.c
+--- linux-3.2.33-go.orig/spl/splat/splat-time.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-time.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,117 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Time Tests.
++\*****************************************************************************/
++
++#include <sys/time.h>
++#include "splat-internal.h"
++
++#define SPLAT_TIME_NAME			"time"
++#define SPLAT_TIME_DESC			"Kernel Time Tests"
++
++#define SPLAT_TIME_TEST1_ID		0x0801
++#define SPLAT_TIME_TEST1_NAME		"time1"
++#define SPLAT_TIME_TEST1_DESC		"HZ Test"
++
++#define SPLAT_TIME_TEST2_ID		0x0802
++#define SPLAT_TIME_TEST2_NAME		"time2"
++#define SPLAT_TIME_TEST2_DESC		"Monotonic Test"
++
++static int
++splat_time_test1(struct file *file, void *arg)
++{
++	int myhz = hz;
++	splat_vprint(file, SPLAT_TIME_TEST1_NAME, "hz is %d\n", myhz);
++        return 0;
++}
++
++static int
++splat_time_test2(struct file *file, void *arg)
++{
++        hrtime_t tm1, tm2;
++	int i;
++
++        tm1 = gethrtime();
++        splat_vprint(file, SPLAT_TIME_TEST2_NAME, "time is %lld\n", tm1);
++
++        for(i = 0; i < 100; i++) {
++                tm2 = gethrtime();
++                splat_vprint(file, SPLAT_TIME_TEST2_NAME, "time is %lld\n", tm2);
++
++                if(tm1 > tm2) {
++                        splat_print(file, "%s: gethrtime() is not giving "
++				    "monotonically increasing values\n",
++				    SPLAT_TIME_TEST2_NAME);
++                        return 1;
++                }
++                tm1 = tm2;
++
++                set_current_state(TASK_INTERRUPTIBLE);
++                schedule_timeout(10);
++        }
++
++        return 0;
++}
++
++splat_subsystem_t *
++splat_time_init(void)
++{
++        splat_subsystem_t *sub;
++
++        sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++        if (sub == NULL)
++                return NULL;
++
++        memset(sub, 0, sizeof(*sub));
++        strncpy(sub->desc.name, SPLAT_TIME_NAME, SPLAT_NAME_SIZE);
++	strncpy(sub->desc.desc, SPLAT_TIME_DESC, SPLAT_DESC_SIZE);
++        INIT_LIST_HEAD(&sub->subsystem_list);
++	INIT_LIST_HEAD(&sub->test_list);
++        spin_lock_init(&sub->test_lock);
++        sub->desc.id = SPLAT_SUBSYSTEM_TIME;
++
++        SPLAT_TEST_INIT(sub, SPLAT_TIME_TEST1_NAME, SPLAT_TIME_TEST1_DESC,
++	              SPLAT_TIME_TEST1_ID, splat_time_test1);
++        SPLAT_TEST_INIT(sub, SPLAT_TIME_TEST2_NAME, SPLAT_TIME_TEST2_DESC,
++	              SPLAT_TIME_TEST2_ID, splat_time_test2);
++
++        return sub;
++}
++
++void
++splat_time_fini(splat_subsystem_t *sub)
++{
++        ASSERT(sub);
++
++        SPLAT_TEST_FINI(sub, SPLAT_TIME_TEST2_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_TIME_TEST1_ID);
++
++        kfree(sub);
++}
++
++int
++splat_time_id(void)
++{
++        return SPLAT_SUBSYSTEM_TIME;
++}
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-vnode.c linux-3.2.33-go/spl/splat/splat-vnode.c
+--- linux-3.2.33-go.orig/spl/splat/splat-vnode.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-vnode.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,445 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Vnode Tests.
++\*****************************************************************************/
++
++#include <sys/vnode.h>
++#include "splat-internal.h"
++
++#define SPLAT_VNODE_NAME		"vnode"
++#define SPLAT_VNODE_DESC		"Kernel Vnode Tests"
++
++#define SPLAT_VNODE_TEST1_ID		0x0901
++#define SPLAT_VNODE_TEST1_NAME		"vn_open"
++#define SPLAT_VNODE_TEST1_DESC		"Vn_open Test"
++
++#define SPLAT_VNODE_TEST2_ID		0x0902
++#define SPLAT_VNODE_TEST2_NAME		"vn_openat"
++#define SPLAT_VNODE_TEST2_DESC		"Vn_openat Test"
++
++#define SPLAT_VNODE_TEST3_ID		0x0903
++#define SPLAT_VNODE_TEST3_NAME		"vn_rdwr"
++#define SPLAT_VNODE_TEST3_DESC		"Vn_rdwrt Test"
++
++#define SPLAT_VNODE_TEST4_ID		0x0904
++#define SPLAT_VNODE_TEST4_NAME		"vn_rename"
++#define SPLAT_VNODE_TEST4_DESC		"Vn_rename Test"
++
++#define SPLAT_VNODE_TEST5_ID		0x0905
++#define SPLAT_VNODE_TEST5_NAME		"vn_getattr"
++#define SPLAT_VNODE_TEST5_DESC		"Vn_getattr Test"
++
++#define SPLAT_VNODE_TEST6_ID		0x0906
++#define SPLAT_VNODE_TEST6_NAME		"vn_sync"
++#define SPLAT_VNODE_TEST6_DESC		"Vn_sync Test"
++
++#define SPLAT_VNODE_TEST_FILE		"/etc/fstab"
++#define SPLAT_VNODE_TEST_FILE_AT	"etc/fstab"
++#define SPLAT_VNODE_TEST_FILE_RW	"/tmp/spl.vnode.tmp"
++#define SPLAT_VNODE_TEST_FILE_RW1	"/tmp/spl.vnode.tmp.1"
++#define SPLAT_VNODE_TEST_FILE_RW2	"/tmp/spl.vnode.tmp.2"
++
++static int
++splat_vnode_user_cmd(struct file *file, void *arg,
++                     char *name, char *cmd)
++{
++	char sh_path[] = "/bin/sh";
++	char *argv[] = { sh_path,
++	                 "-c",
++	                 cmd,
++	                 NULL };
++	char *envp[] = { "HOME=/",
++	                 "TERM=linux",
++	                 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
++	                 NULL };
++	int rc;
++
++	rc = call_usermodehelper(sh_path, argv, envp, 1);
++	if (rc) {
++		splat_vprint(file, name,
++			     "Failed command: %s %s %s (%d)\n",
++			     argv[0], argv[1], cmd, rc);
++		return -EPERM;
++	}
++
++	return 0;
++}
++
++static int
++splat_vnode_unlink_all(struct file *file, void *arg, char *name)
++{
++	char *cmds[] = { "rm -f " SPLAT_VNODE_TEST_FILE_RW,
++	                 "rm -f " SPLAT_VNODE_TEST_FILE_RW1,
++			 "rm -f " SPLAT_VNODE_TEST_FILE_RW2,
++	                 NULL };
++	int i = 0, rc = 0;
++
++	while (cmds[i] != NULL) {
++		if ((rc = splat_vnode_user_cmd(file, arg, name, cmds[i])))
++			return rc;
++
++		i++;
++	}
++
++	return rc;
++}
++
++static int
++splat_vnode_test1(struct file *file, void *arg)
++{
++	vnode_t *vp;
++	int rc;
++
++	if ((rc = vn_open(SPLAT_VNODE_TEST_FILE, UIO_SYSSPACE,
++			  FREAD, 0644, &vp, 0, 0))) {
++		splat_vprint(file, SPLAT_VNODE_TEST1_NAME,
++			     "Failed to vn_open test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE, rc);
++		return -rc;
++	}
++
++        rc = VOP_CLOSE(vp, 0, 0, 0, 0, 0);
++
++	if (rc) {
++		splat_vprint(file, SPLAT_VNODE_TEST1_NAME,
++			     "Failed to vn_close test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE, rc);
++		return -rc;
++	}
++
++	splat_vprint(file, SPLAT_VNODE_TEST1_NAME, "Successfully vn_open'ed "
++		     "and vn_closed test file: %s\n", SPLAT_VNODE_TEST_FILE);
++
++        return -rc;
++} /* splat_vnode_test1() */
++
++static int
++splat_vnode_test2(struct file *file, void *arg)
++{
++	vnode_t *vp;
++	int rc;
++
++	if ((rc = vn_openat(SPLAT_VNODE_TEST_FILE_AT, UIO_SYSSPACE,
++			    FREAD, 0644, &vp, 0, 0, rootdir, 0))) {
++		splat_vprint(file, SPLAT_VNODE_TEST2_NAME,
++			     "Failed to vn_openat test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE, rc);
++		return -rc;
++	}
++
++        rc = VOP_CLOSE(vp, 0, 0, 0, 0, 0);
++
++	if (rc) {
++		splat_vprint(file, SPLAT_VNODE_TEST2_NAME,
++			     "Failed to vn_close test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE, rc);
++		return -rc;
++	}
++
++	splat_vprint(file, SPLAT_VNODE_TEST2_NAME, "Successfully vn_openat'ed "
++		     "and vn_closed test file: %s\n", SPLAT_VNODE_TEST_FILE);
++
++        return -rc;
++} /* splat_vnode_test2() */
++
++static int
++splat_vnode_test3(struct file *file, void *arg)
++{
++	vnode_t *vp;
++	char buf1[32] = "SPL VNode Interface Test File\n";
++	char buf2[32] = "";
++	int rc;
++
++	if ((rc = splat_vnode_unlink_all(file, arg, SPLAT_VNODE_TEST3_NAME)))
++		return rc;
++
++	if ((rc = vn_open(SPLAT_VNODE_TEST_FILE_RW, UIO_SYSSPACE,
++			  FWRITE | FREAD | FCREAT | FEXCL,
++			  0644, &vp, 0, 0))) {
++		splat_vprint(file, SPLAT_VNODE_TEST3_NAME,
++			     "Failed to vn_open test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE_RW, rc);
++		return -rc;
++	}
++
++        rc = vn_rdwr(UIO_WRITE, vp, buf1, strlen(buf1), 0,
++                     UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL);
++	if (rc) {
++		splat_vprint(file, SPLAT_VNODE_TEST3_NAME,
++			     "Failed vn_rdwr write of test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE_RW, rc);
++		goto out;
++	}
++
++        rc = vn_rdwr(UIO_READ, vp, buf2, strlen(buf1), 0,
++                     UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL);
++	if (rc) {
++		splat_vprint(file, SPLAT_VNODE_TEST3_NAME,
++			     "Failed vn_rdwr read of test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE_RW, rc);
++		goto out;
++	}
++
++	if (strncmp(buf1, buf2, strlen(buf1))) {
++		rc = EINVAL;
++		splat_vprint(file, SPLAT_VNODE_TEST3_NAME,
++			     "Failed strncmp data written does not match "
++			     "data read\nWrote: %sRead:  %s\n", buf1, buf2);
++		goto out;
++	}
++
++	rc = 0;
++	splat_vprint(file, SPLAT_VNODE_TEST3_NAME, "Wrote: %s", buf1);
++	splat_vprint(file, SPLAT_VNODE_TEST3_NAME, "Read:  %s", buf2);
++	splat_vprint(file, SPLAT_VNODE_TEST3_NAME, "Successfully wrote and "
++		     "read expected data pattern to test file: %s\n",
++		     SPLAT_VNODE_TEST_FILE_RW);
++
++out:
++        VOP_CLOSE(vp, 0, 0, 0, 0, 0);
++	vn_remove(SPLAT_VNODE_TEST_FILE_RW, UIO_SYSSPACE, RMFILE);
++
++        return -rc;
++} /* splat_vnode_test3() */
++
++static int
++splat_vnode_test4(struct file *file, void *arg)
++{
++	vnode_t *vp;
++	char buf1[32] = "SPL VNode Interface Test File\n";
++	char buf2[32] = "";
++	int rc;
++
++	if ((rc = splat_vnode_unlink_all(file, arg, SPLAT_VNODE_TEST4_NAME)))
++		return rc;
++
++	if ((rc = vn_open(SPLAT_VNODE_TEST_FILE_RW1, UIO_SYSSPACE,
++			  FWRITE | FREAD | FCREAT | FEXCL, 0644, &vp, 0, 0))) {
++		splat_vprint(file, SPLAT_VNODE_TEST4_NAME,
++			     "Failed to vn_open test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE_RW1, rc);
++		goto out;
++	}
++
++        rc = vn_rdwr(UIO_WRITE, vp, buf1, strlen(buf1), 0,
++                     UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL);
++	if (rc) {
++		splat_vprint(file, SPLAT_VNODE_TEST4_NAME,
++			     "Failed vn_rdwr write of test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE_RW1, rc);
++		goto out2;
++	}
++
++        VOP_CLOSE(vp, 0, 0, 0, 0, 0);
++
++	rc = vn_rename(SPLAT_VNODE_TEST_FILE_RW1,SPLAT_VNODE_TEST_FILE_RW2,0);
++	if (rc) {
++		splat_vprint(file, SPLAT_VNODE_TEST4_NAME, "Failed vn_rename "
++			     "%s -> %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE_RW1,
++			     SPLAT_VNODE_TEST_FILE_RW2, rc);
++		goto out;
++	}
++
++	if ((rc = vn_open(SPLAT_VNODE_TEST_FILE_RW2, UIO_SYSSPACE,
++			  FREAD | FEXCL, 0644, &vp, 0, 0))) {
++		splat_vprint(file, SPLAT_VNODE_TEST4_NAME,
++			     "Failed to vn_open test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE_RW2, rc);
++		goto out;
++	}
++
++        rc = vn_rdwr(UIO_READ, vp, buf2, strlen(buf1), 0,
++                     UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL);
++	if (rc) {
++		splat_vprint(file, SPLAT_VNODE_TEST4_NAME,
++			     "Failed vn_rdwr read of test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE_RW2, rc);
++		goto out2;
++	}
++
++	if (strncmp(buf1, buf2, strlen(buf1))) {
++		rc = EINVAL;
++		splat_vprint(file, SPLAT_VNODE_TEST4_NAME,
++			     "Failed strncmp data written does not match "
++			     "data read\nWrote: %sRead:  %s\n", buf1, buf2);
++		goto out2;
++	}
++
++	rc = 0;
++	splat_vprint(file, SPLAT_VNODE_TEST4_NAME, "Wrote to %s:  %s",
++		     SPLAT_VNODE_TEST_FILE_RW1, buf1);
++	splat_vprint(file, SPLAT_VNODE_TEST4_NAME, "Read from %s: %s",
++		     SPLAT_VNODE_TEST_FILE_RW2, buf2);
++	splat_vprint(file, SPLAT_VNODE_TEST4_NAME, "Successfully renamed "
++		     "test file %s -> %s and verified data pattern\n",
++		     SPLAT_VNODE_TEST_FILE_RW1, SPLAT_VNODE_TEST_FILE_RW2);
++out2:
++        VOP_CLOSE(vp, 0, 0, 0, 0, 0);
++out:
++	vn_remove(SPLAT_VNODE_TEST_FILE_RW1, UIO_SYSSPACE, RMFILE);
++	vn_remove(SPLAT_VNODE_TEST_FILE_RW2, UIO_SYSSPACE, RMFILE);
++
++        return -rc;
++} /* splat_vnode_test4() */
++
++static int
++splat_vnode_test5(struct file *file, void *arg)
++{
++	vnode_t *vp;
++	vattr_t vap;
++	int rc;
++
++	if ((rc = vn_open(SPLAT_VNODE_TEST_FILE, UIO_SYSSPACE,
++			  FREAD, 0644, &vp, 0, 0))) {
++		splat_vprint(file, SPLAT_VNODE_TEST5_NAME,
++			     "Failed to vn_open test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE, rc);
++		return -rc;
++	}
++
++	rc = VOP_GETATTR(vp, &vap, 0, 0, NULL);
++	if (rc) {
++		splat_vprint(file, SPLAT_VNODE_TEST5_NAME,
++			     "Failed to vn_getattr test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE, rc);
++		goto out;
++	}
++
++	if (vap.va_type != VREG) {
++		rc = EINVAL;
++		splat_vprint(file, SPLAT_VNODE_TEST5_NAME,
++			     "Failed expected regular file type "
++			     "(%d != VREG): %s (%d)\n", vap.va_type,
++			     SPLAT_VNODE_TEST_FILE, rc);
++		goto out;
++	}
++
++	splat_vprint(file, SPLAT_VNODE_TEST1_NAME, "Successfully "
++		     "vn_getattr'ed test file: %s\n", SPLAT_VNODE_TEST_FILE);
++
++out:
++        VOP_CLOSE(vp, 0, 0, 0, 0, 0);
++
++        return -rc;
++} /* splat_vnode_test5() */
++
++static int
++splat_vnode_test6(struct file *file, void *arg)
++{
++	vnode_t *vp;
++	char buf[32] = "SPL VNode Interface Test File\n";
++	int rc;
++
++	if ((rc = splat_vnode_unlink_all(file, arg, SPLAT_VNODE_TEST6_NAME)))
++		return rc;
++
++	if ((rc = vn_open(SPLAT_VNODE_TEST_FILE_RW, UIO_SYSSPACE,
++			  FWRITE | FCREAT | FEXCL, 0644, &vp, 0, 0))) {
++		splat_vprint(file, SPLAT_VNODE_TEST6_NAME,
++			     "Failed to vn_open test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE_RW, rc);
++		return -rc;
++	}
++
++        rc = vn_rdwr(UIO_WRITE, vp, buf, strlen(buf), 0,
++                     UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL);
++	if (rc) {
++		splat_vprint(file, SPLAT_VNODE_TEST6_NAME,
++			     "Failed vn_rdwr write of test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE_RW, rc);
++		goto out;
++	}
++
++	rc = vn_fsync(vp, 0, 0, 0);
++	if (rc) {
++		splat_vprint(file, SPLAT_VNODE_TEST6_NAME,
++			     "Failed vn_fsync of test file: %s (%d)\n",
++			     SPLAT_VNODE_TEST_FILE_RW, rc);
++		goto out;
++	}
++
++	rc = 0;
++	splat_vprint(file, SPLAT_VNODE_TEST6_NAME, "Successfully "
++		     "fsync'ed test file %s\n", SPLAT_VNODE_TEST_FILE_RW);
++out:
++        VOP_CLOSE(vp, 0, 0, 0, 0, 0);
++	vn_remove(SPLAT_VNODE_TEST_FILE_RW, UIO_SYSSPACE, RMFILE);
++
++        return -rc;
++} /* splat_vnode_test6() */
++
++splat_subsystem_t *
++splat_vnode_init(void)
++{
++        splat_subsystem_t *sub;
++
++        sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++        if (sub == NULL)
++                return NULL;
++
++        memset(sub, 0, sizeof(*sub));
++        strncpy(sub->desc.name, SPLAT_VNODE_NAME, SPLAT_NAME_SIZE);
++	strncpy(sub->desc.desc, SPLAT_VNODE_DESC, SPLAT_DESC_SIZE);
++        INIT_LIST_HEAD(&sub->subsystem_list);
++	INIT_LIST_HEAD(&sub->test_list);
++        spin_lock_init(&sub->test_lock);
++        sub->desc.id = SPLAT_SUBSYSTEM_VNODE;
++
++        SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST1_NAME, SPLAT_VNODE_TEST1_DESC,
++	                SPLAT_VNODE_TEST1_ID, splat_vnode_test1);
++        SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST2_NAME, SPLAT_VNODE_TEST2_DESC,
++	                SPLAT_VNODE_TEST2_ID, splat_vnode_test2);
++        SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST3_NAME, SPLAT_VNODE_TEST3_DESC,
++	                SPLAT_VNODE_TEST3_ID, splat_vnode_test3);
++        SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST4_NAME, SPLAT_VNODE_TEST4_DESC,
++	                SPLAT_VNODE_TEST4_ID, splat_vnode_test4);
++        SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST5_NAME, SPLAT_VNODE_TEST5_DESC,
++	                SPLAT_VNODE_TEST5_ID, splat_vnode_test5);
++        SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST6_NAME, SPLAT_VNODE_TEST6_DESC,
++	                SPLAT_VNODE_TEST6_ID, splat_vnode_test6);
++
++        return sub;
++} /* splat_vnode_init() */
++
++void
++splat_vnode_fini(splat_subsystem_t *sub)
++{
++        ASSERT(sub);
++
++        SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST6_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST5_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST4_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST3_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST2_ID);
++        SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST1_ID);
++
++        kfree(sub);
++} /* splat_vnode_fini() */
++
++int
++splat_vnode_id(void)
++{
++        return SPLAT_SUBSYSTEM_VNODE;
++} /* splat_vnode_id() */
+diff -uNr linux-3.2.33-go.orig/spl/splat/splat-zlib.c linux-3.2.33-go/spl/splat/splat-zlib.c
+--- linux-3.2.33-go.orig/spl/splat/splat-zlib.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl/splat/splat-zlib.c	2012-11-16 23:22:32.409192874 +0100
+@@ -0,0 +1,165 @@
++/*****************************************************************************\
++ *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
++ *  Copyright (C) 2007 The Regents of the University of California.
++ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
++ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
++ *  UCRL-CODE-235197
++ *
++ *  This file is part of the SPL, Solaris Porting Layer.
++ *  For details, see <http://github.com/behlendorf/spl/>.
++ *
++ *  The SPL is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ *
++ *  The SPL is distributed in the hope that it will be useful, but WITHOUT
++ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ *  for more details.
++ *
++ *  You should have received a copy of the GNU General Public License along
++ *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
++ *****************************************************************************
++ *  Solaris Porting LAyer Tests (SPLAT) Zlib Compression Tests.
++\*****************************************************************************/
++
++#include <sys/zmod.h>
++#include <sys/random.h>
++#include <sys/kmem.h>
++#include "splat-internal.h"
++
++#define SPLAT_ZLIB_NAME			"zlib"
++#define SPLAT_ZLIB_DESC			"Zlib Compression Tests"
++
++#define SPLAT_ZLIB_TEST1_ID		0x0f01
++#define SPLAT_ZLIB_TEST1_NAME		"compress/uncompress"
++#define SPLAT_ZLIB_TEST1_DESC		"Compress/Uncompress Test"
++
++#define BUFFER_SIZE			(128 * 1024)
++
++static int
++splat_zlib_test1_check(struct file *file, void *src, void *dst, void *chk,
++    int level)
++{
++	size_t dst_len = BUFFER_SIZE;
++	size_t chk_len = BUFFER_SIZE;
++	int rc;
++
++	memset(dst, 0, BUFFER_SIZE);
++	memset(chk, 0, BUFFER_SIZE);
++
++	rc = z_compress_level(dst, &dst_len, src, BUFFER_SIZE, level);
++	if (rc != Z_OK) {
++		splat_vprint(file, SPLAT_ZLIB_TEST1_NAME,
++		    "Failed level %d z_compress_level(), %d\n", level, rc);
++		return -EINVAL;
++	}
++
++	rc = z_uncompress(chk, &chk_len, dst, dst_len);
++	if (rc != Z_OK) {
++		splat_vprint(file, SPLAT_ZLIB_TEST1_NAME,
++		    "Failed level %d z_uncompress(), %d\n", level, rc);
++		return -EINVAL;
++	}
++
++	rc = memcmp(src, chk, BUFFER_SIZE);
++	if (rc) {
++		splat_vprint(file, SPLAT_ZLIB_TEST1_NAME,
++		    "Failed level %d memcmp()), %d\n", level, rc);
++		return -EINVAL;
++	}
++
++	splat_vprint(file, SPLAT_ZLIB_TEST1_NAME,
++	    "Passed level %d, compressed %d bytes to %d bytes\n",
++	    level, BUFFER_SIZE, (int)dst_len);
++
++	return 0;
++}
++
++/*
++ * Compress a buffer, uncompress the newly compressed buffer, then
++ * compare it to the original.  Do this for all 9 compression levels.
++ */
++static int
++splat_zlib_test1(struct file *file, void *arg)
++{
++	void *src = NULL, *dst = NULL, *chk = NULL;
++	int i, rc, level;
++
++	src = vmalloc(BUFFER_SIZE);
++	if (src == NULL) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	dst = vmalloc(BUFFER_SIZE);
++	if (dst == NULL) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	chk = vmalloc(BUFFER_SIZE);
++	if (chk == NULL) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	/* Source buffer is a repeating 1024 byte random pattern. */
++	random_get_pseudo_bytes(src, sizeof(uint8_t) * 1024);
++	for (i = 1; i < 128; i++)
++		memcpy(src + (i * 1024), src, 1024);
++
++	for (level = 1; level <= 9; level++)
++		if ((rc = splat_zlib_test1_check(file, src, dst, chk, level)))
++			break;
++out:
++	if (src)
++		vfree(src);
++
++	if (dst)
++		vfree(dst);
++
++	if (chk)
++		vfree(chk);
++
++	return rc;
++}
++
++splat_subsystem_t *
++splat_zlib_init(void)
++{
++        splat_subsystem_t *sub;
++
++        sub = kmalloc(sizeof(*sub), GFP_KERNEL);
++        if (sub == NULL)
++                return NULL;
++
++        memset(sub, 0, sizeof(*sub));
++        strncpy(sub->desc.name, SPLAT_ZLIB_NAME, SPLAT_NAME_SIZE);
++	strncpy(sub->desc.desc, SPLAT_ZLIB_DESC, SPLAT_DESC_SIZE);
++        INIT_LIST_HEAD(&sub->subsystem_list);
++	INIT_LIST_HEAD(&sub->test_list);
++        spin_lock_init(&sub->test_lock);
++        sub->desc.id = SPLAT_SUBSYSTEM_ZLIB;
++
++        SPLAT_TEST_INIT(sub, SPLAT_ZLIB_TEST1_NAME, SPLAT_ZLIB_TEST1_DESC,
++	              SPLAT_ZLIB_TEST1_ID, splat_zlib_test1);
++
++        return sub;
++}
++
++void
++splat_zlib_fini(splat_subsystem_t *sub)
++{
++        ASSERT(sub);
++
++        SPLAT_TEST_FINI(sub, SPLAT_ZLIB_TEST1_ID);
++
++        kfree(sub);
++}
++
++int
++splat_zlib_id(void) {
++        return SPLAT_SUBSYSTEM_ZLIB;
++}
+diff -uNr linux-3.2.33-go.orig/spl_config.h linux-3.2.33-go/spl_config.h
+--- linux-3.2.33-go.orig/spl_config.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/spl_config.h	2012-11-16 23:22:32.412192839 +0100
+@@ -0,0 +1,289 @@
++/* spl_config.h.  Generated from spl_config.h.in by configure.  */
++/* spl_config.h.in.  Generated from configure.ac by autoheader.  */
++
++/* Atomic types use spinlocks */
++/* #undef ATOMIC_SPINLOCK */
++
++/* Define to 1 to enable basic kmem accounting */
++#define DEBUG_KMEM 1
++
++/* Define to 1 to enable detailed kmem tracking */
++/* #undef DEBUG_KMEM_TRACKING */
++
++/* Define to 1 to enable basic debug logging */
++#define DEBUG_LOG 1
++
++/* invalidate_inodes() wants 2 args */
++#define HAVE_2ARGS_INVALIDATE_INODES 1
++
++/* register_sysctl_table() wants 2 args */
++/* #undef HAVE_2ARGS_REGISTER_SYSCTL */
++
++/* set_fs_pwd() wants 2 args */
++#define HAVE_2ARGS_SET_FS_PWD 1
++
++/* vfs_fsync() wants 2 args */
++#define HAVE_2ARGS_VFS_FSYNC 1
++
++/* vfs_unlink() wants 2 args */
++#define HAVE_2ARGS_VFS_UNLINK 1
++
++/* zlib_deflate_workspacesize() wants 2 args */
++#define HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE 1
++
++/* INIT_WORK wants 3 args */
++/* #undef HAVE_3ARGS_INIT_WORK */
++
++/* on_each_cpu wants 3 args */
++#define HAVE_3ARGS_ON_EACH_CPU 1
++
++/* shrinker callback wants 3 args */
++/* #undef HAVE_3ARGS_SHRINKER_CALLBACK */
++
++/* vfs_rename() wants 4 args */
++#define HAVE_4ARGS_VFS_RENAME 1
++
++/* device_create wants 5 args */
++#define HAVE_5ARGS_DEVICE_CREATE 1
++
++/* proc_handler() wants 5 args */
++#define HAVE_5ARGS_PROC_HANDLER 1
++
++/* kernel defines atomic64_cmpxchg */
++/* #undef HAVE_ATOMIC64_CMPXCHG */
++
++/* kernel defines atomic64_t */
++#define HAVE_ATOMIC64_T 1
++
++/* kernel defines atomic64_xchg */
++#define HAVE_ATOMIC64_XCHG 1
++
++/* class_device_create() is available */
++/* #undef HAVE_CLASS_DEVICE_CREATE */
++
++/* struct cred exists */
++#define HAVE_CRED_STRUCT 1
++
++/* struct ctl_table has ctl_name */
++/* #undef HAVE_CTL_NAME */
++
++/* unnumbered sysctl support exists */
++/* #undef HAVE_CTL_UNNUMBERED */
++
++/* device_create() is available */
++#define HAVE_DEVICE_CREATE 1
++
++/* Define to 1 if you have the <dlfcn.h> header file. */
++#define HAVE_DLFCN_H 1
++
++/* first_online_pgdat() is available */
++#define HAVE_FIRST_ONLINE_PGDAT 1
++
++/* fls64() is available */
++#define HAVE_FLS64 1
++
++/* struct fs_struct uses spinlock_t */
++#define HAVE_FS_STRUCT_SPINLOCK 1
++
++/* get_vmalloc_info() is available */
++/* #undef HAVE_GET_VMALLOC_INFO */
++
++/* get_zone_counts() is available */
++/* #undef HAVE_GET_ZONE_COUNTS */
++
++/* global_page_state() is available */
++#define HAVE_GLOBAL_PAGE_STATE 1
++
++/* groups_search() is available */
++#define HAVE_GROUPS_SEARCH 1
++
++/* init_utsname() is available */
++#define HAVE_INIT_UTSNAME 1
++
++/* struct inode has i_mutex */
++#define HAVE_INODE_I_MUTEX 1
++
++/* truncate_range() inode operation is available */
++/* #undef HAVE_INODE_TRUNCATE_RANGE */
++
++/* Define to 1 if you have the <inttypes.h> header file. */
++#define HAVE_INTTYPES_H 1
++
++/* invalidate_inodes() is available */
++/* #undef HAVE_INVALIDATE_INODES */
++
++/* invalidate_inodes_check() is available */
++/* #undef HAVE_INVALIDATE_INODES_CHECK */
++
++/* kallsyms_lookup_name() is available */
++#define HAVE_KALLSYMS_LOOKUP_NAME 1
++
++/* kern_path_locked() is available */
++/* #undef HAVE_KERN_PATH_LOCKED */
++
++/* kern_path_parent() is available */
++#define HAVE_KERN_PATH_PARENT_HEADER 1
++
++/* kern_path_parent() is available */
++/* #undef HAVE_KERN_PATH_PARENT_SYMBOL */
++
++/* kmalloc_node() is available */
++#define HAVE_KMALLOC_NODE 1
++
++/* kvasprintf() is available */
++#define HAVE_KVASPRINTF 1
++
++/* Define to 1 if you have the <memory.h> header file. */
++#define HAVE_MEMORY_H 1
++
++/* monotonic_clock() is available */
++/* #undef HAVE_MONOTONIC_CLOCK */
++
++/* mutex_lock_nested() is available */
++#define HAVE_MUTEX_LOCK_NESTED 1
++
++/* struct mutex has owner */
++#define HAVE_MUTEX_OWNER 1
++
++/* struct mutex owner is a task_struct */
++#define HAVE_MUTEX_OWNER_TASK_STRUCT 1
++
++/* next_online_pgdat() is available */
++#define HAVE_NEXT_ONLINE_PGDAT 1
++
++/* next_zone() is available */
++#define HAVE_NEXT_ZONE 1
++
++/* struct path used in struct nameidata */
++#define HAVE_PATH_IN_NAMEIDATA 1
++
++/* pgdat helpers are available */
++#define HAVE_PGDAT_HELPERS 1
++
++/* pgdat_list is available */
++/* #undef HAVE_PGDAT_LIST */
++
++/* __put_task_struct() is available */
++#define HAVE_PUT_TASK_STRUCT 1
++
++/* set_fs_pwd() is available */
++#define HAVE_SET_FS_PWD 1
++
++/* set_normalized_timespec() is available as export */
++#define HAVE_SET_NORMALIZED_TIMESPEC_EXPORT 1
++
++/* set_normalized_timespec() is available as inline */
++#define HAVE_SET_NORMALIZED_TIMESPEC_INLINE 1
++
++/* set_shrinker() available */
++/* #undef HAVE_SET_SHRINKER */
++
++/* struct shrink_control exists */
++#define HAVE_SHRINK_CONTROL_STRUCT 1
++
++/* shrink_dcache_memory() is available */
++/* #undef HAVE_SHRINK_DCACHE_MEMORY */
++
++/* shrink_icache_memory() is available */
++/* #undef HAVE_SHRINK_ICACHE_MEMORY */
++
++/* Define to 1 if you have the <stdint.h> header file. */
++#define HAVE_STDINT_H 1
++
++/* Define to 1 if you have the <stdlib.h> header file. */
++#define HAVE_STDLIB_H 1
++
++/* Define to 1 if you have the <strings.h> header file. */
++#define HAVE_STRINGS_H 1
++
++/* Define to 1 if you have the <string.h> header file. */
++#define HAVE_STRING_H 1
++
++/* Define to 1 if you have the <sys/stat.h> header file. */
++#define HAVE_SYS_STAT_H 1
++
++/* Define to 1 if you have the <sys/types.h> header file. */
++#define HAVE_SYS_TYPES_H 1
++
++/* task_curr() is available */
++#define HAVE_TASK_CURR 1
++
++/* timespec_sub() is available */
++#define HAVE_TIMESPEC_SUB 1
++
++/* linux/uaccess.h exists */
++#define HAVE_UACCESS_HEADER 1
++
++/* kernel defines uintptr_t */
++#define HAVE_UINTPTR_T 1
++
++/* Define to 1 if you have the <unistd.h> header file. */
++#define HAVE_UNISTD_H 1
++
++/* user_path_dir() is available */
++#define HAVE_USER_PATH_DIR 1
++
++/* vfs_fsync() is available */
++#define HAVE_VFS_FSYNC 1
++
++/* Page state NR_ACTIVE is available */
++/* #undef HAVE_ZONE_STAT_ITEM_NR_ACTIVE */
++
++/* Page state NR_ACTIVE_ANON is available */
++#define HAVE_ZONE_STAT_ITEM_NR_ACTIVE_ANON 1
++
++/* Page state NR_ACTIVE_FILE is available */
++#define HAVE_ZONE_STAT_ITEM_NR_ACTIVE_FILE 1
++
++/* Page state NR_FREE_PAGES is available */
++#define HAVE_ZONE_STAT_ITEM_NR_FREE_PAGES 1
++
++/* Page state NR_INACTIVE is available */
++/* #undef HAVE_ZONE_STAT_ITEM_NR_INACTIVE */
++
++/* Page state NR_INACTIVE_ANON is available */
++#define HAVE_ZONE_STAT_ITEM_NR_INACTIVE_ANON 1
++
++/* Page state NR_INACTIVE_FILE is available */
++#define HAVE_ZONE_STAT_ITEM_NR_INACTIVE_FILE 1
++
++/* Define to the sub-directory in which libtool stores uninstalled libraries.
++   */
++#define LT_OBJDIR ".libs/"
++
++/* get_zone_counts() is needed */
++/* #undef NEED_GET_ZONE_COUNTS */
++
++/* rwsem_is_locked() acquires sem->wait_lock */
++/* #undef RWSEM_IS_LOCKED_TAKES_WAIT_LOCK */
++
++/* struct rw_semaphore member wait_lock is raw_spinlock_t */
++#define RWSEM_SPINLOCK_IS_RAW 1
++
++/* Define the project alias string. */
++#define SPL_META_ALIAS "spl-0.6.0-rc12"
++
++/* Define the project author. */
++/* #undef SPL_META_AUTHOR */
++
++/* Define the project release date. */
++/* #undef SPL_META_DATA */
++
++/* Define the libtool library 'age' version information. */
++/* #undef SPL_META_LT_AGE */
++
++/* Define the libtool library 'current' version information. */
++/* #undef SPL_META_LT_CURRENT */
++
++/* Define the libtool library 'revision' version information. */
++/* #undef SPL_META_LT_REVISION */
++
++/* Define the project name. */
++#define SPL_META_NAME "spl"
++
++/* Define the project release. */
++#define SPL_META_RELEASE "rc12"
++
++/* Define the project version. */
++#define SPL_META_VERSION "0.6.0"
++
+diff -uNr linux-3.2.33-go.orig/zfs_config.h linux-3.2.33-go/zfs_config.h
+--- linux-3.2.33-go.orig/zfs_config.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/zfs_config.h	2012-11-16 23:25:34.356039255 +0100
+@@ -0,0 +1,317 @@
++/* zfs_config.h.  Generated from zfs_config.h.in by configure.  */
++/* zfs_config.h.in.  Generated from configure.ac by autoheader.  */
++
++/* Define to 1 to enabled dmu tx validation */
++/* #undef DEBUG_DMU_TX */
++
++/* invalidate_bdev() wants 1 arg */
++#define HAVE_1ARG_INVALIDATE_BDEV 1
++
++/* bio_end_io_t wants 2 args */
++#define HAVE_2ARGS_BIO_END_IO_T 1
++
++/* blkdev_get() wants 3 args */
++#define HAVE_3ARG_BLKDEV_GET 1
++
++/* sget() wants 5 args */
++#define HAVE_5ARG_SGET 1
++
++/* security_inode_init_security wants 6 args */
++/* #undef HAVE_6ARGS_SECURITY_INODE_INIT_SECURITY */
++
++/* dops->automount() exists */
++#define HAVE_AUTOMOUNT 1
++
++/* struct block_device_operations use bdevs */
++#define HAVE_BDEV_BLOCK_DEVICE_OPERATIONS 1
++
++/* bdev_logical_block_size() is available */
++#define HAVE_BDEV_LOGICAL_BLOCK_SIZE 1
++
++/* struct super_block has s_bdi */
++#define HAVE_BDI 1
++
++/* bdi_setup_and_register() is available */
++#define HAVE_BDI_SETUP_AND_REGISTER 1
++
++/* bio_empy_barrier() is defined */
++/* #undef HAVE_BIO_EMPTY_BARRIER */
++
++/* REQ_FAILFAST_MASK is defined */
++#define HAVE_BIO_REQ_FAILFAST_MASK 1
++
++/* BIO_RW_FAILFAST is defined */
++/* #undef HAVE_BIO_RW_FAILFAST */
++
++/* BIO_RW_FAILFAST_* are defined */
++/* #undef HAVE_BIO_RW_FAILFAST_DTD */
++
++/* BIO_RW_SYNC is defined */
++/* #undef HAVE_BIO_RW_SYNC */
++
++/* BIO_RW_SYNCIO is defined */
++/* #undef HAVE_BIO_RW_SYNCIO */
++
++/* blkdev_get_by_path() is available */
++#define HAVE_BLKDEV_GET_BY_PATH 1
++
++/* blk_end_request() is available */
++#define HAVE_BLK_END_REQUEST 1
++
++/* blk_end_request() is GPL-only */
++/* #undef HAVE_BLK_END_REQUEST_GPL_ONLY */
++
++/* blk_fetch_request() is available */
++#define HAVE_BLK_FETCH_REQUEST 1
++
++/* blk_queue_discard() is available */
++#define HAVE_BLK_QUEUE_DISCARD 1
++
++/* blk_queue_flush() is available */
++#define HAVE_BLK_QUEUE_FLUSH 1
++
++/* blk_queue_flush() is GPL-only */
++/* #undef HAVE_BLK_QUEUE_FLUSH_GPL_ONLY */
++
++/* blk_queue_io_opt() is available */
++#define HAVE_BLK_QUEUE_IO_OPT 1
++
++/* blk_queue_max_hw_sectors() is available */
++#define HAVE_BLK_QUEUE_MAX_HW_SECTORS 1
++
++/* blk_queue_max_segments() is available */
++#define HAVE_BLK_QUEUE_MAX_SEGMENTS 1
++
++/* blk_queue_nonrot() is available */
++#define HAVE_BLK_QUEUE_NONROT 1
++
++/* blk_queue_physical_block_size() is available */
++#define HAVE_BLK_QUEUE_PHYSICAL_BLOCK_SIZE 1
++
++/* blk_requeue_request() is available */
++#define HAVE_BLK_REQUEUE_REQUEST 1
++
++/* blk_rq_bytes() is available */
++#define HAVE_BLK_RQ_BYTES 1
++
++/* blk_rq_bytes() is GPL-only */
++/* #undef HAVE_BLK_RQ_BYTES_GPL_ONLY */
++
++/* blk_rq_pos() is available */
++#define HAVE_BLK_RQ_POS 1
++
++/* blk_rq_sectors() is available */
++#define HAVE_BLK_RQ_SECTORS 1
++
++/* security_inode_init_security wants callback */
++#define HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY 1
++
++/* check_disk_size_change() is available */
++#define HAVE_CHECK_DISK_SIZE_CHANGE 1
++
++/* clear_inode() is available */
++#define HAVE_CLEAR_INODE 1
++
++/* eops->commit_metadata() exists */
++#define HAVE_COMMIT_METADATA 1
++
++/* super_block uses const struct xattr_hander */
++#define HAVE_CONST_XATTR_HANDLER 1
++
++/* iops->create() operation takes nameidata */
++/* #undef HAVE_CREATE_NAMEIDATA */
++
++/* xattr_handler->get() wants dentry */
++#define HAVE_DENTRY_XATTR_GET 1
++
++/* xattr_handler->set() wants dentry */
++#define HAVE_DENTRY_XATTR_SET 1
++
++/* ql->discard_granularity is available */
++#define HAVE_DISCARD_GRANULARITY 1
++
++/* Define to 1 if you have the <dlfcn.h> header file. */
++#define HAVE_DLFCN_H 1
++
++/* d_make_root() is available */
++#define HAVE_D_MAKE_ROOT 1
++
++/* d_obtain_alias() is available */
++#define HAVE_D_OBTAIN_ALIAS 1
++
++/* elevator_change() is available */
++#define HAVE_ELEVATOR_CHANGE 1
++
++/* eops->encode_fh() wants child and parent inodes */
++#define HAVE_ENCODE_FH_WITH_INODE 1
++
++/* sops->evict_inode() exists */
++#define HAVE_EVICT_INODE 1
++
++/* fops->fallocate() exists */
++#define HAVE_FILE_FALLOCATE 1
++
++/* kernel defines fmode_t */
++#define HAVE_FMODE_T 1
++
++/* sops->free_cached_objects() exists */
++#define HAVE_FREE_CACHED_OBJECTS 1
++
++/* fops->fsync() with range */
++#define HAVE_FSYNC_RANGE 1
++
++/* fops->fsync() without dentry */
++/* #undef HAVE_FSYNC_WITHOUT_DENTRY */
++
++/* fops->fsync() with dentry */
++/* #undef HAVE_FSYNC_WITH_DENTRY */
++
++/* blk_disk_ro() is available */
++#define HAVE_GET_DISK_RO 1
++
++/* get_gendisk() is available */
++#define HAVE_GET_GENDISK 1
++
++/* Define to 1 if licensed under the GPL */
++/* #undef HAVE_GPL_ONLY_SYMBOLS */
++
++/* fops->fallocate() exists */
++/* #undef HAVE_INODE_FALLOCATE */
++
++/* iops->truncate_range() exists */
++/* #undef HAVE_INODE_TRUNCATE_RANGE */
++
++/* insert_inode_locked() is available */
++#define HAVE_INSERT_INODE_LOCKED 1
++
++/* Define to 1 if you have the <inttypes.h> header file. */
++#define HAVE_INTTYPES_H 1
++
++/* result=stropts.h Define to 1 if ioctl() defined in <stropts.h> */
++/* #undef HAVE_IOCTL_IN_STROPTS_H */
++
++/* Define to 1 if ioctl() defined in <sys/ioctl.h> */
++#define HAVE_IOCTL_IN_SYS_IOCTL_H 1
++
++/* Define to 1 if ioctl() defined in <unistd.h> */
++/* #undef HAVE_IOCTL_IN_UNISTD_H */
++
++/* kernel defines KOBJ_NAME_LEN */
++/* #undef HAVE_KOBJ_NAME_LEN */
++
++/* Define if you have libblkid */
++/* #undef HAVE_LIBBLKID */
++
++/* Define if you have selinux */
++/* #undef HAVE_LIBSELINUX */
++
++/* Define if you have libuuid */
++#define HAVE_LIBUUID 1
++
++/* Define to 1 if you have the `z' library (-lz). */
++#define HAVE_LIBZ 1
++
++/* iops->lookup() operation takes nameidata */
++/* #undef HAVE_LOOKUP_NAMEIDATA */
++
++/* Define to 1 if you have the <memory.h> header file. */
++#define HAVE_MEMORY_H 1
++
++/* iops->create()/mkdir()/mknod() take umode_t */
++#define HAVE_MKDIR_UMODE_T 1
++
++/* mount_nodev() is available */
++#define HAVE_MOUNT_NODEV 1
++
++/* sops->nr_cached_objects() exists */
++#define HAVE_NR_CACHED_OBJECTS 1
++
++/* open_bdev_exclusive() is available */
++/* #undef HAVE_OPEN_BDEV_EXCLUSIVE */
++
++/* REQ_SYNC is defined */
++#define HAVE_REQ_SYNC 1
++
++/* rq_for_each_segment() is available */
++#define HAVE_RQ_FOR_EACH_SEGMENT 1
++
++/* rq_is_sync() is available */
++#define HAVE_RQ_IS_SYNC 1
++
++/* set_nlink() is available */
++#define HAVE_SET_NLINK 1
++
++/* sops->show_options() with dentry */
++#define HAVE_SHOW_OPTIONS_WITH_DENTRY 1
++
++/* struct super_block has s_shrink */
++#define HAVE_SHRINK 1
++
++/* Define to 1 if you have the <stdint.h> header file. */
++#define HAVE_STDINT_H 1
++
++/* Define to 1 if you have the <stdlib.h> header file. */
++#define HAVE_STDLIB_H 1
++
++/* Define to 1 if you have the <strings.h> header file. */
++#define HAVE_STRINGS_H 1
++
++/* Define to 1 if you have the <string.h> header file. */
++#define HAVE_STRING_H 1
++
++/* Define to 1 if you have the <sys/stat.h> header file. */
++#define HAVE_SYS_STAT_H 1
++
++/* Define to 1 if you have the <sys/types.h> header file. */
++#define HAVE_SYS_TYPES_H 1
++
++/* truncate_setsize() is available */
++#define HAVE_TRUNCATE_SETSIZE 1
++
++/* Define to 1 if you have the <unistd.h> header file. */
++#define HAVE_UNISTD_H 1
++
++/* Define if you have zlib */
++#define HAVE_ZLIB 1
++
++/* Define to the sub-directory in which libtool stores uninstalled libraries.
++   */
++#define LT_OBJDIR ".libs/"
++
++/* Define to 1 if NPTL threading implementation includes guard area in stack
++   allocation */
++/* #undef NPTL_GUARD_WITHIN_STACK */
++
++/* zfs debugging enabled */
++/* #undef ZFS_DEBUG */
++
++/* Define the project alias string. */
++#define ZFS_META_ALIAS "zfs-0.6.0-rc12"
++
++/* Define the project author. */
++#define ZFS_META_AUTHOR "Sun Microsystems/Oracle, Lawrence Livermore National Laboratory"
++
++/* Define the project release date. */
++/* #undef ZFS_META_DATA */
++
++/* Define the project license. */
++#define ZFS_META_LICENSE "CDDL"
++
++/* Define the libtool library 'age' version information. */
++/* #undef ZFS_META_LT_AGE */
++
++/* Define the libtool library 'current' version information. */
++/* #undef ZFS_META_LT_CURRENT */
++
++/* Define the libtool library 'revision' version information. */
++/* #undef ZFS_META_LT_REVISION */
++
++/* Define the project name. */
++#define ZFS_META_NAME "zfs"
++
++/* Define the project release. */
++#define ZFS_META_RELEASE "rc12"
++
++/* Define the project version. */
++#define ZFS_META_VERSION "0.6.0"
++
diff --git a/3.2.34/lschlv2.patch b/3.2.34/lschlv2.patch
new file mode 100644
index 0000000..40ef6be
--- /dev/null
+++ b/3.2.34/lschlv2.patch
@@ -0,0 +1,256 @@
+--- a/arch/arm/mach-kirkwood/include/mach/system.h
++++ b/arch/arm/mach-kirkwood/include/mach/system.h
+@@ -9,6 +9,8 @@
+ #ifndef __ASM_ARCH_SYSTEM_H
+ #define __ASM_ARCH_SYSTEM_H
+ 
++#include <linux/io.h>
++#include <asm/proc-fns.h>
+ #include <mach/bridge-regs.h>
+ 
+ static inline void arch_idle(void)
+--- a/arch/arm/mach-kirkwood/Kconfig
++++ b/arch/arm/mach-kirkwood/Kconfig
+@@ -87,6 +87,12 @@
+ 	  Say 'Y' here if you want your kernel to support the
+ 	  HP t5325 Thin Client.
+ 
++config MACH_LINKSTATION_CHLV2
++	bool "Buffalo LS-CHLv2 Series"
++	help
++	  Say 'Y' here if you want your kernel to support the
++	  Buffalo LS-CHLv2 Series.
++
+ endmenu
+ 
+ endif
+--- a/arch/arm/mach-kirkwood/lschlv2-setup.c
++++ b/arch/arm/mach-kirkwood/lschlv2-setup.c
+@@ -0,0 +1,210 @@
++/*
++ * arch/arm/mach-kirkwood/lschlv2-setup.c
++ *
++ * Buffalo LS Kirkwood Series Setup
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2.  This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/gpio.h>
++#include <linux/gpio_keys.h>
++#include <linux/init.h>
++#include <linux/input.h>
++#include <linux/leds.h>
++#include <linux/platform_device.h>
++#include <linux/ata_platform.h>
++#include <linux/mv643xx_eth.h>
++#include <linux/mtd/physmap.h>
++#include <linux/spi/flash.h>
++#include <linux/spi/spi.h>
++#include <asm/mach-types.h>
++#include <asm/mach/arch.h>
++#include "include/mach/system.h"
++#include <mach/kirkwood.h>
++#include "common.h"
++#include "mpp.h"
++
++/*****************************************************************************
++ * 512KB SPI Flash on BOOT Device
++ ****************************************************************************/
++static struct mtd_partition lschlv2_partitions[] = {
++	{
++		.name		= "u-boot",
++		.offset		= 0x00000,
++		.size		= 0x70000,
++		.mask_flags	= MTD_WRITEABLE,
++	},
++	{
++		.name		= "u-boot env",
++		.offset		= MTDPART_OFS_APPEND,
++		.size		= 0x10000,
++	}
++};
++
++static struct flash_platform_data lschlv2_spi_slave_data = {
++	.type		= "m25p40",
++	.parts		= lschlv2_partitions,
++	.nr_parts	= ARRAY_SIZE(lschlv2_partitions),
++};
++
++static struct spi_board_info __initdata lschlv2_spi_slave_info[] = {
++	{
++		.modalias		= "m25p80",
++		.platform_data	= &lschlv2_spi_slave_data,
++		.irq			= -1,
++		.max_speed_hz	= 20000000,
++		.bus_num		= 0,
++		.chip_select	= 0,
++	}
++};
++
++static struct mv643xx_eth_platform_data lschlv2_ge00_data = {
++	.phy_addr	= MV643XX_ETH_PHY_ADDR(0),
++};
++
++static struct mv643xx_eth_platform_data lschlv2_ge01_data = {
++	.phy_addr	= MV643XX_ETH_PHY_ADDR(8),
++};
++
++static unsigned int lschlv2_mpp_config[] __initdata = {
++	MPP10_GPO, /* HDD Power */
++	MPP11_GPIO, /* USB Vbus Power */
++	MPP18_GPO, /* FAN High on:0, off:1 */
++	MPP19_GPO, /* FAN Low on:0, off:1 */
++	MPP36_GPIO, /* FUNC LED */
++	MPP37_GPIO, /* ALARM LED */
++	MPP38_GPIO, /* INFO LED */
++	MPP39_GPIO, /* POWER LED */
++	MPP40_GPIO, /* FAN LOCK */
++	MPP41_GPIO, /* FUNC SW */
++	MPP42_GPIO, /* POWER SW */
++	MPP43_GPIO, /* POWER AUTO SW */
++	MPP48_GPIO, /* FUNC RED LED */
++	MPP49_GPIO, /* UART EN */
++	0
++};
++
++static struct mv_sata_platform_data lschlv2_sata_data = {
++	.n_ports	= 1,
++};
++
++static struct gpio_led lschlv2_led_pins[] = {
++	{
++		.name			= "func",
++		.gpio			= 36,
++		.active_low		= 1,
++	},
++	{
++		.name			= "alarm",
++		.gpio			= 37,
++		.active_low		= 1,
++	},
++	{
++		.name			= "info",
++		.gpio			= 38,
++		.active_low		= 1,
++	},
++	{
++		.name			= "power",
++		.gpio			= 39,
++		.default_trigger	= "default-on",
++		.active_low		= 1,
++	},
++	{
++		.name			= "func2",
++		.gpio			= 48,
++		.active_low		= 1,
++	},
++};
++
++static struct gpio_led_platform_data lschlv2_led_data = {
++	.leds		= lschlv2_led_pins,
++	.num_leds	= ARRAY_SIZE(lschlv2_led_pins),
++};
++
++static struct platform_device lschlv2_leds = {
++	.name	= "leds-gpio",
++	.id	= -1,
++	.dev	= {
++		.platform_data	= &lschlv2_led_data,
++	}
++};
++
++#define LSCHLv2_GPIO_USB_VBUS_EN		11
++#define LSCHLv2_GPIO_KEY_FUNC		41
++
++static struct gpio_keys_button lschlv2_buttons[] = {
++	{
++		.code		= KEY_OPTION,
++		.gpio		= LSCHLv2_GPIO_KEY_FUNC,
++		.desc		= "Function Button",
++		.active_low	= 1,
++	},
++};
++
++static struct gpio_keys_platform_data lschlv2_button_data = {
++	.buttons	= lschlv2_buttons,
++	.nbuttons	= ARRAY_SIZE(lschlv2_buttons),
++};
++
++static struct platform_device lschlv2_button_device = {
++	.name		= "gpio-keys",
++	.id		= -1,
++	.num_resources	= 0,
++	.dev		= {
++		.platform_data	= &lschlv2_button_data,
++	},
++};
++
++static void lschlv2_power_off(void)
++{
++	arch_reset(0, NULL);
++}
++
++static void __init lschlv2_init(void)
++{
++	/*
++	 * Basic setup. Needs to be called early.
++	 */
++	kirkwood_init();
++	kirkwood_mpp_conf(lschlv2_mpp_config);
++
++	kirkwood_uart0_init();
++
++	if (gpio_request(LSCHLv2_GPIO_USB_VBUS_EN, "USB Power Enable") != 0 ||
++		gpio_direction_output(LSCHLv2_GPIO_USB_VBUS_EN, 1) != 0)
++		printk(KERN_ERR "can't set up USB Power Enable\n");
++	kirkwood_ehci_init();
++
++	kirkwood_ge00_init(&lschlv2_ge00_data);
++	kirkwood_ge01_init(&lschlv2_ge01_data);
++
++	kirkwood_sata_init(&lschlv2_sata_data);
++
++	kirkwood_spi_init();
++
++	platform_device_register(&lschlv2_leds);
++	platform_device_register(&lschlv2_button_device);
++
++	spi_register_board_info(lschlv2_spi_slave_info,
++		ARRAY_SIZE(lschlv2_spi_slave_info));
++
++	/* register power-off method */
++	pm_power_off = lschlv2_power_off;
++
++	pr_info("%s: finished\n", __func__);
++}
++
++
++
++MACHINE_START(LINKSTATION_CHLV2, "Buffalo Linkstation LS-CHLv2")
++	.atag_offset    = 0x100,
++	.init_machine   = lschlv2_init,
++	.map_io         = kirkwood_map_io,
++	.init_early     = kirkwood_init_early,
++	.init_irq       = kirkwood_init_irq,
++	.timer          = &kirkwood_timer,
++MACHINE_END
+--- a/arch/arm/mach-kirkwood/Makefile
++++ b/arch/arm/mach-kirkwood/Makefile
+@@ -20,3 +20,4 @@
+ obj-$(CONFIG_MACH_T5325)		+= t5325-setup.o
++obj-$(CONFIG_MACH_LINKSTATION_CHLV2)		+= lschlv2-setup.o
+ 
+ obj-$(CONFIG_CPU_IDLE)			+= cpuidle.o
+--- a/arch/arm/mach-kirkwood/common.c
++++ b/arch/arm/mach-kirkwood/common.c
+@@ -32,6 +32,7 @@
+ #include <plat/orion_nand.h>
+ #include <plat/orion_wdt.h>
+ #include <plat/time.h>
++#include <asm/mach-types.h>
+ #include "common.h"
+ 
+ /*****************************************************************************
diff --git a/3.2.34/net-netfilter-IFWLOG-2.6.35-buildfix.patch b/3.2.34/net-netfilter-IFWLOG-2.6.35-buildfix.patch
new file mode 100644
index 0000000..99d4d06
--- /dev/null
+++ b/3.2.34/net-netfilter-IFWLOG-2.6.35-buildfix.patch
@@ -0,0 +1,32 @@
+--- linux-2.6.35-rc6-git-mnb0.1/net/ipv4/netfilter/ipt_IFWLOG.c.orig	2010-07-30 21:17:30.000000000 +0300
++++ linux-2.6.35-rc6-git-mnb0.1/net/ipv4/netfilter/ipt_IFWLOG.c	2010-07-31 13:46:33.834611944 +0300
+@@ -135,7 +135,7 @@ static void ipt_IFWLOG_packet(const stru
+ }
+ 
+ static unsigned int ipt_IFWLOG_target(struct sk_buff *skb,
+-				      const struct xt_target_param *target_param)
++				      const struct xt_action_param *target_param)
+ {
+ 	const struct ipt_IFWLOG_info *info = target_param->targinfo;
+ 
+@@ -144,17 +144,17 @@ static unsigned int ipt_IFWLOG_target(st
+ 	return IPT_CONTINUE;
+ }
+ 
+-static bool ipt_IFWLOG_checkentry(const struct xt_tgchk_param *tgchk_param)
++static int ipt_IFWLOG_checkentry(const struct xt_tgchk_param *tgchk_param)
+ {
+ 	const struct ipt_IFWLOG_info *info = tgchk_param->targinfo;
+ 
+ 	if (info->prefix[sizeof(info->prefix)-1] != '\0') {
+ 		DEBUGP("IFWLOG: prefix term %i\n",
+ 		       info->prefix[sizeof(info->prefix)-1]);
+-		return false;
++		return -EINVAL;
+ 	}
+ 
+-	return true;
++	return 0;
+ }
+ 
+ static struct xt_target ipt_IFWLOG = {
diff --git a/3.2.34/net-netfilter-IFWLOG-2.6.37-buildfix.patch b/3.2.34/net-netfilter-IFWLOG-2.6.37-buildfix.patch
new file mode 100644
index 0000000..0ae95aa
--- /dev/null
+++ b/3.2.34/net-netfilter-IFWLOG-2.6.37-buildfix.patch
@@ -0,0 +1,15 @@
+
+ net/ipv4/netfilter/ipt_IFWLOG.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- linux-2.6.37-rc3-git1-tmb0.3/net/ipv4/netfilter/ipt_IFWLOG.c.orig	2010-11-24 21:58:36.000000000 +0200
++++ linux-2.6.37-rc3-git1-tmb0.3/net/ipv4/netfilter/ipt_IFWLOG.c	2010-11-25 13:08:55.719379646 +0200
+@@ -141,7 +141,7 @@ static unsigned int ipt_IFWLOG_target(st
+ 
+ 	ipt_IFWLOG_packet(skb, target_param->in, target_param->out, info);
+ 
+-	return IPT_CONTINUE;
++	return XT_CONTINUE;
+ }
+ 
+ static int ipt_IFWLOG_checkentry(const struct xt_tgchk_param *tgchk_param)
diff --git a/3.2.34/net-netfilter-IFWLOG-mdv.patch b/3.2.34/net-netfilter-IFWLOG-mdv.patch
new file mode 100644
index 0000000..e5b9c92
--- /dev/null
+++ b/3.2.34/net-netfilter-IFWLOG-mdv.patch
@@ -0,0 +1,264 @@
+ipt_IFWLOG: Mandriva changes
+
+This patch holds all the Mandriva changes done in ipt_IFWLOG
+netfilter module.
+
+This work is mostly done by Thomas Backlund, Herton R. Krzesinski
+and Luiz Fernando N. Capitulino.
+
+Signed-off-by: Luiz Fernando N. Capitulino <lcapitulino@mandriva.com.br>
+Signed-off-by: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
+
+---
+ include/linux/netfilter_ipv4/Kbuild       |    1
+ include/linux/netfilter_ipv4/ipt_IFWLOG.h |   23 +++++-
+ net/ipv4/netfilter/ipt_IFWLOG.c           |  108 +++++++++++++++---------------
+ 3 files changed, 77 insertions(+), 55 deletions(-)
+
+diff -p -up linux-2.6.28/include/linux/netfilter_ipv4/ipt_IFWLOG.h.orig linux-2.6.28/include/linux/netfilter_ipv4/ipt_IFWLOG.h
+--- linux-2.6.28/include/linux/netfilter_ipv4/ipt_IFWLOG.h.orig	2008-12-12 10:55:07.000000000 -0500
++++ linux-2.6.28/include/linux/netfilter_ipv4/ipt_IFWLOG.h	2008-12-12 10:56:30.000000000 -0500
+@@ -1,10 +1,25 @@
+-#ifndef _IPT_IFWLOG_H
+-#define _IPT_IFWLOG_H
++#ifndef _LINUX_IPT_IFWLOG_H
++#define _LINUX_IPT_IFWLOG_H
+ 
+ #ifndef NETLINK_IFWLOG
+-#define NETLINK_IFWLOG  19
++#define NETLINK_IFWLOG  20
+ #endif
+ 
++#ifndef __KERNEL__
++/* Multicast groups - backwards compatiblility for userspace */
++#define IFWLOG_NLGRP_NONE 0x00000000
++#define IFWLOG_NLGRP_DEF  0x00000001 /* default message group */
++#endif
++
++enum {
++	IFWLOGNLGRP_NONE,
++#define IFWLOGNLGRP_NONE IFWLOGNLGRP_NONE
++	IFWLOGNLGRP_DEF,
++#define IFWLOGNLGRP_DEF IFWLOGNLGRP_DEF
++	__IFWLOGNLGRP_MAX
++};
++#define IFWLOGNLGRP_MAX (__IFWLOGNLGRP_MAX - 1)
++
+ #define PREFSIZ         32
+ 
+ struct nl_msg {				/* Netlink message */
+@@ -23,4 +38,4 @@ struct ipt_IFWLOG_info {
+ 	char prefix[PREFSIZ];
+ };
+ 
+-#endif /* _IPT_IFWLOG_H */
++#endif /* _LINUX_IPT_IFWLOG_H */
+diff -p -up linux-2.6.28/net/ipv4/netfilter/ipt_IFWLOG.c.orig linux-2.6.28/net/ipv4/netfilter/ipt_IFWLOG.c
+--- linux-2.6.28/net/ipv4/netfilter/ipt_IFWLOG.c.orig	2008-12-12 10:55:07.000000000 -0500
++++ linux-2.6.28/net/ipv4/netfilter/ipt_IFWLOG.c	2008-12-12 10:57:16.000000000 -0500
+@@ -4,6 +4,14 @@
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License version 2 as
+  * published by the Free Software Foundation.
++ * 
++ * 2007-10-10 Thomas Backlund <tmb@mandriva.org>: build fixes for 2.6.22.9
++ * 2007-11-11 Herton Krzesinski <herton@mandriva.com>: build fixes for 2.6.24-rc
++ * 2007-12-03 Luiz Capitulino <lcapitulino@mandriva.com.br>: v1.1
++ * 		- Better multicast group usage
++ * 		- Coding style fixes
++ * 		- Do not return -EINVAL by default in ipt_ifwlog_init()
++ * 		- Minor refinements
+  */
+ 
+ #include <linux/module.h>
+@@ -19,12 +27,10 @@
+ #include <linux/string.h>
+ 
+ #include <linux/netfilter.h>
++#include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_IFWLOG.h>
+ 
+-MODULE_LICENSE("GPL");
+-MODULE_AUTHOR("Samir Bellabes <sbellabes@mandriva.com>");
+-MODULE_DESCRIPTION("Interactive firewall logging and module");
+ 
+ #if 0
+ #define DEBUGP PRINTR
+@@ -36,44 +42,41 @@ MODULE_DESCRIPTION("Interactive firewall
+ 
+ static struct sock *nl;
+ 
+-#define GROUP 10
+-
+ /* send struct to userspace */
+-static void send_packet(struct nl_msg msg)
++static void send_packet(const struct nl_msg *msg)
+ {
+ 	struct sk_buff *skb = NULL;
+ 	struct nlmsghdr *nlh;
++	unsigned int size;
+ 
+-	skb = alloc_skb(NLMSG_SPACE(sizeof(struct nl_msg)), GFP_ATOMIC);
++	size = NLMSG_SPACE(sizeof(*msg));
++	skb = alloc_skb(size, GFP_ATOMIC);
+ 	if (!skb) {
+ 		PRINTR(KERN_WARNING "IFWLOG: OOM can't allocate skb\n");
+-		return ;
++		return;
+ 	}
+ 
+-	nlh = NLMSG_PUT(skb, 0, 0, 0, sizeof(struct nl_msg) - sizeof(*nlh));
++	nlh = NLMSG_PUT(skb, 0, 0, 0, size - sizeof(*nlh));
+ 
+-	memcpy(NLMSG_DATA(nlh), (const void*)&msg, sizeof(struct nl_msg));
++	memcpy(NLMSG_DATA(nlh), (const void *) msg, sizeof(*msg));
+ 
+ 	NETLINK_CB(skb).pid = 0;  /* from kernel */
+-	NETLINK_CB(skb).dst_pid = 0;  /* multicast */
+-	NETLINK_CB(skb).dst_group = 10;
++	NETLINK_CB(skb).dst_group = IFWLOGNLGRP_DEF;
+ 
+ 	if (nl) {
+ 		DEBUGP(KERN_WARNING
+ 		       "IFWLOG: nlmsg_len=%ld\nnlmsg_type=%d nlmsg_flags=%d\nnlmsg_seq=%ld nlmsg_pid = %ld\n",
+ 		       (long)nlh->nlmsg_len,  nlh->nlmsg_type, nlh->nlmsg_flags,
+ 		       (long)nlh->nlmsg_seq, (long)nlh->nlmsg_pid);
+-		DEBUGP(KERN_WARNING "prefix : %s\n", msg.prefix);
++		DEBUGP(KERN_WARNING "prefix : %s\n", msg->prefix);
+ 
+-		netlink_broadcast(nl, skb, 0, 10, GFP_ATOMIC);
+-		return ;
++		netlink_broadcast(nl, skb, 0, IFWLOGNLGRP_DEF, GFP_ATOMIC);
++		return;
+ 	}
+ 
+- nlmsg_failure:
+-        if (skb)
+-                kfree_skb(skb);
+-        PRINTR(KERN_WARNING "IFWLOG: Error sending netlink packet\n");
+-        return ;
++nlmsg_failure:
++	kfree_skb(skb);
++	PRINTR(KERN_WARNING "IFWLOG: Error sending netlink packet\n");
+ }
+ 
+ /* fill struct for userspace */
+@@ -128,73 +131,76 @@ static void ipt_IFWLOG_packet(const stru
+ 	do_gettimeofday((struct timeval *)&tv);
+         msg.timestamp_sec = tv.tv_sec;
+ 
+-	send_packet(msg);
++	send_packet(&msg);
+ }
+ 
+-static unsigned int ipt_IFWLOG_target(struct sk_buff **pskb,
+-				      const struct net_device *in,
+-				      const struct net_device *out,
+-				      unsigned int hooknum,
+-				      const void *targinfo,
+-				      void *userinfo)
++static unsigned int ipt_IFWLOG_target(struct sk_buff *skb,
++				      const struct xt_target_param *target_param)
+ {
+-	const struct ipt_IFWLOG_info *info = targinfo;
++	const struct ipt_IFWLOG_info *info = target_param->targinfo;
+ 
+-	ipt_IFWLOG_packet(*pskb, in, out, info);
++	ipt_IFWLOG_packet(skb, target_param->in, target_param->out, info);
+ 
+ 	return IPT_CONTINUE;
+ }
+ 
+-static int ipt_IFWLOG_checkentry(const char *tablename,
+-				 const struct ipt_entry *e,
+-				 void *targinfo,
+-				 unsigned int targinfosize,
+-				 unsigned int hook_mask)
++static bool ipt_IFWLOG_checkentry(const struct xt_tgchk_param *tgchk_param)
+ {
+-	const struct ipt_IFWLOG_info *info = targinfo;
++	const struct ipt_IFWLOG_info *info = tgchk_param->targinfo;
+ 
+ 	if (info->prefix[sizeof(info->prefix)-1] != '\0') {
+ 		DEBUGP("IFWLOG: prefix term %i\n",
+ 		       info->prefix[sizeof(info->prefix)-1]);
+-		return 0;
++		return false;
+ 	}
+ 
+-	return 1;
++	return true;
+ }
+ 
+-static struct ipt_target ipt_IFWLOG = {
++static struct xt_target ipt_IFWLOG = {
+ 	.name		= "IFWLOG",
++	.family		= AF_INET,
+ 	.target		= ipt_IFWLOG_target,
+ 	.targetsize	= sizeof(struct ipt_IFWLOG_info),
+ 	.checkentry	= ipt_IFWLOG_checkentry,
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++static int __init ipt_ifwlog_init(void)
+ {
+-	nl = (struct sock*) netlink_kernel_create(NETLINK_IFWLOG, GROUP, NULL, THIS_MODULE);
+-        if (!nl) {
+-                PRINTR(KERN_WARNING "IFWLOG: cannot create netlink socket\n");
+-                return -EINVAL;
+-        }
++	int err;
+ 
+-	if (ipt_register_target(&ipt_IFWLOG)) {
++	nl = netlink_kernel_create(&init_net, NETLINK_IFWLOG, IFWLOGNLGRP_MAX,
++				   NULL, NULL, THIS_MODULE);
++	if (!nl) {
++		PRINTR(KERN_WARNING "IFWLOG: cannot create netlink socket\n");
++		return -ENOMEM;
++	}
++
++	err = xt_register_target(&ipt_IFWLOG);
++	if (err) {
+ 		if (nl && nl->sk_socket)
+ 			sock_release(nl->sk_socket);
+-		return -EINVAL;
++		return err;
+ 	}
+ 
+ 	PRINTR(KERN_INFO "IFWLOG: register target\n");
+ 	return 0;
+ }
+ 
+-static void __exit fini(void)
++static void __exit ipt_ifwlog_fini(void)
+ {
+ 	if (nl && nl->sk_socket)
+-                sock_release(nl->sk_socket);
++		sock_release(nl->sk_socket);
+ 	PRINTR(KERN_INFO "IFWLOG: unregister target\n");
+-	ipt_unregister_target(&ipt_IFWLOG);
++	xt_unregister_target(&ipt_IFWLOG);
+ }
+ 
+-module_init(init);
+-module_exit(fini);
++module_init(ipt_ifwlog_init);
++module_exit(ipt_ifwlog_fini);
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Samir Bellabes <sbellabes@mandriva.com>");
++MODULE_AUTHOR("Luiz Capitulino <lcapitulino@mandriva.com.br>");
++MODULE_DESCRIPTION("Interactive firewall logging and module");
++MODULE_VERSION("v1.1");
+--- linux/include/linux/netfilter_ipv4/Kbuild.net-netfilter-IFWLOG-mdv.orig	2012-05-21 01:29:13.000000000 +0300
++++ linux/include/linux/netfilter_ipv4/Kbuild	2012-05-26 01:27:24.743139430 +0300
+@@ -2,6 +2,7 @@ header-y += ip_queue.h
+ header-y += ip_tables.h
+ header-y += ipt_CLUSTERIP.h
+ header-y += ipt_ECN.h
++header-y += ipt_IFWLOG.h
+ header-y += ipt_LOG.h
+ header-y += ipt_REJECT.h
+ header-y += ipt_TTL.h
diff --git a/3.2.34/net-netfilter-IFWLOG.patch b/3.2.34/net-netfilter-IFWLOG.patch
new file mode 100644
index 0000000..6efe89a
--- /dev/null
+++ b/3.2.34/net-netfilter-IFWLOG.patch
@@ -0,0 +1,269 @@
+---
+ include/linux/netfilter_ipv4/ipt_IFWLOG.h |   26 +++
+ net/ipv4/netfilter/Kconfig                |   11 +
+ net/ipv4/netfilter/Makefile               |    1 
+ net/ipv4/netfilter/ipt_IFWLOG.c           |  200 ++++++++++++++++++++++++++++++
+ 4 files changed, 238 insertions(+)
+
+--- /dev/null
++++ b/net/ipv4/netfilter/ipt_IFWLOG.c
+@@ -0,0 +1,200 @@
++/* Interactive Firewall for Mandriva
++ * Samir Bellabes <sbellabes@mandriva.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include <linux/module.h>
++#include <asm/types.h>
++#include <linux/jiffies.h>
++#include <linux/skbuff.h>
++#include <linux/ip.h>
++#include <net/icmp.h>
++#include <net/udp.h>
++#include <net/tcp.h>
++#include <net/sock.h>
++#include <linux/netlink.h>
++#include <linux/string.h>
++
++#include <linux/netfilter.h>
++#include <linux/netfilter_ipv4/ip_tables.h>
++#include <linux/netfilter_ipv4/ipt_IFWLOG.h>
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Samir Bellabes <sbellabes@mandriva.com>");
++MODULE_DESCRIPTION("Interactive firewall logging and module");
++
++#if 0
++#define DEBUGP PRINTR
++#else
++#define DEBUGP(format, args...)
++#endif
++
++#define PRINTR(format, args...) do { if(net_ratelimit()) printk(format, ##args); } while(0)
++
++static struct sock *nl;
++
++#define GROUP 10
++
++/* send struct to userspace */
++static void send_packet(struct nl_msg msg)
++{
++	struct sk_buff *skb = NULL;
++	struct nlmsghdr *nlh;
++
++	skb = alloc_skb(NLMSG_SPACE(sizeof(struct nl_msg)), GFP_ATOMIC);
++	if (!skb) {
++		PRINTR(KERN_WARNING "IFWLOG: OOM can't allocate skb\n");
++		return ;
++	}
++
++	nlh = NLMSG_PUT(skb, 0, 0, 0, sizeof(struct nl_msg) - sizeof(*nlh));
++
++	memcpy(NLMSG_DATA(nlh), (const void*)&msg, sizeof(struct nl_msg));
++
++	NETLINK_CB(skb).pid = 0;  /* from kernel */
++	NETLINK_CB(skb).dst_pid = 0;  /* multicast */
++	NETLINK_CB(skb).dst_group = 10;
++
++	if (nl) {
++		DEBUGP(KERN_WARNING
++		       "IFWLOG: nlmsg_len=%ld\nnlmsg_type=%d nlmsg_flags=%d\nnlmsg_seq=%ld nlmsg_pid = %ld\n",
++		       (long)nlh->nlmsg_len,  nlh->nlmsg_type, nlh->nlmsg_flags,
++		       (long)nlh->nlmsg_seq, (long)nlh->nlmsg_pid);
++		DEBUGP(KERN_WARNING "prefix : %s\n", msg.prefix);
++
++		netlink_broadcast(nl, skb, 0, 10, GFP_ATOMIC);
++		return ;
++	}
++
++ nlmsg_failure:
++        if (skb)
++                kfree_skb(skb);
++        PRINTR(KERN_WARNING "IFWLOG: Error sending netlink packet\n");
++        return ;
++}
++
++/* fill struct for userspace */
++static void ipt_IFWLOG_packet(const struct sk_buff *skb,
++			      const struct net_device *in,
++			      const struct net_device *out,
++			      const struct ipt_IFWLOG_info *info)
++{
++	struct iphdr iph;
++	struct tcphdr tcph;
++	struct udphdr udph;
++	struct nl_msg msg;
++	struct iphdr _iph, *ih;
++	struct timeval tv;
++
++	memset(&msg, 0, sizeof(struct nl_msg));
++
++	ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
++	if (ih == NULL) {
++		PRINTR(KERN_WARNING "IFWLOG: skb truncated");
++		return;
++	}
++
++	/* save interface name */
++	if (in)
++		strcpy(msg.indev_name, in->name);
++	if (out)
++		strcpy(msg.outdev_name, out->name);
++
++	/* save log-prefix */
++	strcpy(msg.prefix, info->prefix);
++
++	/* save ip header */
++	skb_copy_bits(skb, 0, &iph, sizeof(iph));
++	memcpy(&msg.ip, &iph, sizeof(struct iphdr));
++
++	/* save transport header */
++	switch (iph.protocol){
++	case IPPROTO_TCP:
++		skb_copy_bits(skb, iph.ihl*4 , &tcph, sizeof(tcph));
++		memcpy(&msg.h.th, &tcph, sizeof(struct tcphdr));
++		break;
++	case IPPROTO_UDP:
++		skb_copy_bits(skb, iph.ihl*4 , &udph, sizeof(udph));
++		memcpy(&msg.h.uh, &udph, sizeof(struct udphdr));
++		break;
++	default:
++		break;
++	}
++
++	/* save timetamp */
++	do_gettimeofday((struct timeval *)&tv);
++        msg.timestamp_sec = tv.tv_sec;
++
++	send_packet(msg);
++}
++
++static unsigned int ipt_IFWLOG_target(struct sk_buff **pskb,
++				      const struct net_device *in,
++				      const struct net_device *out,
++				      unsigned int hooknum,
++				      const void *targinfo,
++				      void *userinfo)
++{
++	const struct ipt_IFWLOG_info *info = targinfo;
++
++	ipt_IFWLOG_packet(*pskb, in, out, info);
++
++	return IPT_CONTINUE;
++}
++
++static int ipt_IFWLOG_checkentry(const char *tablename,
++				 const struct ipt_entry *e,
++				 void *targinfo,
++				 unsigned int targinfosize,
++				 unsigned int hook_mask)
++{
++	const struct ipt_IFWLOG_info *info = targinfo;
++
++	if (info->prefix[sizeof(info->prefix)-1] != '\0') {
++		DEBUGP("IFWLOG: prefix term %i\n",
++		       info->prefix[sizeof(info->prefix)-1]);
++		return 0;
++	}
++
++	return 1;
++}
++
++static struct ipt_target ipt_IFWLOG = {
++	.name		= "IFWLOG",
++	.target		= ipt_IFWLOG_target,
++	.targetsize	= sizeof(struct ipt_IFWLOG_info),
++	.checkentry	= ipt_IFWLOG_checkentry,
++	.me		= THIS_MODULE,
++};
++
++static int __init init(void)
++{
++	nl = (struct sock*) netlink_kernel_create(NETLINK_IFWLOG, GROUP, NULL, THIS_MODULE);
++        if (!nl) {
++                PRINTR(KERN_WARNING "IFWLOG: cannot create netlink socket\n");
++                return -EINVAL;
++        }
++
++	if (ipt_register_target(&ipt_IFWLOG)) {
++		if (nl && nl->sk_socket)
++			sock_release(nl->sk_socket);
++		return -EINVAL;
++	}
++
++	PRINTR(KERN_INFO "IFWLOG: register target\n");
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	if (nl && nl->sk_socket)
++                sock_release(nl->sk_socket);
++	PRINTR(KERN_INFO "IFWLOG: unregister target\n");
++	ipt_unregister_target(&ipt_IFWLOG);
++}
++
++module_init(init);
++module_exit(fini);
+--- a/net/ipv4/netfilter/Kconfig
++++ b/net/ipv4/netfilter/Kconfig
+@@ -331,6 +331,17 @@ config IP_NF_TARGET_TTL
+ 	(e.g. when running oldconfig). It selects
+ 	CONFIG_NETFILTER_XT_TARGET_HL.
+ 
++config IP_NF_TARGET_IFWLOG
++	tristate  'IFWLOG target support'
++	depends on IP_NF_IPTABLES
++	help
++	  This option adds a `IFWLOG' target, which is used by
++	  Interactive Firewall for sending informations to a userspace
++	  daemon
++
++	  If you want to compile it as a module, say M here and read
++	  Documentation/modules.txt.  If unsure, say `N'.
++
+ # raw + specific targets
+ config IP_NF_RAW
+ 	tristate  'raw table support (required for NOTRACK/TRACE)'
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ipt_IFWLOG.h
+@@ -0,0 +1,26 @@
++#ifndef _IPT_IFWLOG_H
++#define _IPT_IFWLOG_H
++
++#ifndef NETLINK_IFWLOG
++#define NETLINK_IFWLOG  19
++#endif
++
++#define PREFSIZ         32
++
++struct nl_msg {				/* Netlink message */
++	long timestamp_sec;             /* time packet */
++	char indev_name[IFNAMSIZ];      /* name of the ingoing interface */
++	char outdev_name[IFNAMSIZ];     /* name of the outgoing interface */
++	unsigned char prefix[PREFSIZ];  /* informations on the logging reason */
++	struct iphdr ip;
++	union {
++		struct tcphdr th;
++		struct udphdr uh;
++	} h;
++};
++
++struct ipt_IFWLOG_info {
++	char prefix[PREFSIZ];
++};
++
++#endif /* _IPT_IFWLOG_H */
+--- linux/net/ipv4/netfilter/Makefile.net-netfilter-IFWLOG.orig	2012-05-21 01:29:13.000000000 +0300
++++ linux/net/ipv4/netfilter/Makefile	2012-05-26 01:23:57.511514194 +0300
+@@ -53,6 +53,7 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ip
+ 
+ # targets
+ obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
++obj-$(CONFIG_IP_NF_TARGET_IFWLOG) += ipt_IFWLOG.o
+ obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
+ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
+ obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
diff --git a/3.2.34/net-netfilter-psd-2.6.35-buildfix.patch b/3.2.34/net-netfilter-psd-2.6.35-buildfix.patch
new file mode 100644
index 0000000..218031c
--- /dev/null
+++ b/3.2.34/net-netfilter-psd-2.6.35-buildfix.patch
@@ -0,0 +1,11 @@
+--- linux-2.6.35-rc6-git-mnb0.1/net/ipv4/netfilter/ipt_psd.c.orig	2010-07-30 21:17:30.000000000 +0300
++++ linux-2.6.35-rc6-git-mnb0.1/net/ipv4/netfilter/ipt_psd.c	2010-07-31 13:29:00.623601957 +0300
+@@ -98,7 +98,7 @@ static inline int hashfunc(struct in_add
+ 
+ static bool
+ ipt_psd_match(const struct sk_buff *pskb,
+-	      const struct xt_match_param *match_param)
++	      struct xt_action_param *match_param)
+ {
+ 	struct iphdr *ip_hdr;
+ 	struct tcphdr *tcp_hdr;
diff --git a/3.2.34/net-netfilter-psd-mdv.patch b/3.2.34/net-netfilter-psd-mdv.patch
new file mode 100644
index 0000000..68884aa
--- /dev/null
+++ b/3.2.34/net-netfilter-psd-mdv.patch
@@ -0,0 +1,235 @@
+ipt_psd: Mandriva changes
+
+This patch holds all the Mandriva changes done in ipt_psd
+netfilter module.
+
+Most of the time they're just upgrades to match with new
+API in the kernel.
+
+This work is mostly done by Thomas Backlund, Herton R.
+Krzesinski and Luiz Fernando N. Capitulino.
+
+Signed-off-by: Luiz Fernando N. Capitulino <lcapitulino@mandriva.com.br>
+Signed-off-by: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
+
+---
+ include/linux/netfilter_ipv4/Kbuild |    1
+ net/ipv4/netfilter/Kconfig          |    8 ++
+ net/ipv4/netfilter/ipt_psd.c        |  113 ++++++++++++++----------------------
+ 3 files changed, 55 insertions(+), 67 deletions(-)
+
+diff -p -up linux-2.6.28/net/ipv4/netfilter/ipt_psd.c.orig linux-2.6.28/net/ipv4/netfilter/ipt_psd.c
+--- linux-2.6.28/net/ipv4/netfilter/ipt_psd.c.orig	2008-12-12 11:03:05.000000000 -0500
++++ linux-2.6.28/net/ipv4/netfilter/ipt_psd.c	2008-12-12 11:04:03.000000000 -0500
+@@ -1,21 +1,24 @@
+ /*
+-  This is a module which is used for PSD (portscan detection)
+-  Derived from scanlogd v2.1 written by Solar Designer <solar@false.com>
+-  and LOG target module.
+-
+-  Copyright (C) 2000,2001 astaro AG
+-
+-  This file is distributed under the terms of the GNU General Public
+-  License (GPL). Copies of the GPL can be obtained from:
+-     ftp://prep.ai.mit.edu/pub/gnu/GPL
+-
+-  2000-05-04 Markus Hennig <hennig@astaro.de> : initial
+-  2000-08-18 Dennis Koslowski <koslowski@astaro.de> : first release
+-  2000-12-01 Dennis Koslowski <koslowski@astaro.de> : UDP scans detection added
+-  2001-01-02 Dennis Koslowski <koslowski@astaro.de> : output modified
+-  2001-02-04 Jan Rekorajski <baggins@pld.org.pl> : converted from target to match
+-  2004-05-05 Martijn Lievaart <m@rtij.nl> : ported to 2.6
+-*/
++ * This is a module which is used for PSD (portscan detection)
++ * Derived from scanlogd v2.1 written by Solar Designer <solar@false.com>
++ * and LOG target module.
++ *
++ * Copyright (C) 2000,2001 astaro AG
++ *
++ * This file is distributed under the terms of the GNU General Public
++ * License (GPL). Copies of the GPL can be obtained from:
++ *    ftp://prep.ai.mit.edu/pub/gnu/GPL
++ *
++ * 2000-05-04 Markus Hennig <hennig@astaro.de> : initial
++ * 2000-08-18 Dennis Koslowski <koslowski@astaro.de> : first release
++ * 2000-12-01 Dennis Koslowski <koslowski@astaro.de> : UDP scans detection added
++ * 2001-01-02 Dennis Koslowski <koslowski@astaro.de> : output modified
++ * 2001-02-04 Jan Rekorajski <baggins@pld.org.pl> : converted from target to match
++ * 2004-05-05 Martijn Lievaart <m@rtij.nl> : ported to 2.6
++ * 2007-10-10 Thomas Backlund <tmb@mandriva.org>: 2.6.22 update
++ * 2007-11-14 Luiz Capitulino <lcapitulino@mandriva.com> : 2.6.22 API usage fixes
++ * 2007-11-26 Herton Ronaldo Krzesinski <herton@mandriva.com>: switch xt_match->match to bool
++ */
+ 
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
+@@ -54,7 +57,7 @@ struct port {
+  */
+ struct host {
+ 	struct host *next;		/* Next entry with the same hash */
+-	clock_t timestamp;		/* Last update time */
++	unsigned long timestamp;	/* Last update time */
+ 	struct in_addr src_addr;	/* Source address */
+ 	struct in_addr dest_addr;	/* Destination address */
+ 	unsigned short src_port;	/* Source port */
+@@ -93,33 +96,29 @@ static inline int hashfunc(struct in_add
+ 	return hash & (HASH_SIZE - 1);
+ }
+ 
+-static int
++static bool
+ ipt_psd_match(const struct sk_buff *pskb,
+-	      const struct net_device *in,
+-	      const struct net_device *out,
+-	      const void *matchinfo,
+-	      int offset,
+-	      int *hotdrop)
++	      const struct xt_match_param *match_param)
+ {
+ 	struct iphdr *ip_hdr;
+ 	struct tcphdr *tcp_hdr;
+ 	struct in_addr addr;
+ 	u_int16_t src_port,dest_port;
+   	u_int8_t tcp_flags, proto;
+-	clock_t now;
++	unsigned long now;
+ 	struct host *curr, *last, **head;
+ 	int hash, index, count;
+ 
+ 	/* Parameters from userspace */
+-	const struct ipt_psd_info *psdinfo = matchinfo;
++	const struct ipt_psd_info *psdinfo = match_param->matchinfo;
+ 
+ 	/* IP header */
+-	ip_hdr = pskb->nh.iph;
++	ip_hdr = ipip_hdr(pskb);
+ 
+ 	/* Sanity check */
+ 	if (ntohs(ip_hdr->frag_off) & IP_OFFSET) {
+ 		DEBUGP("PSD: sanity check failed\n");
+-		return 0;
++		return false;
+ 	}
+ 
+ 	/* TCP or UDP ? */
+@@ -127,7 +126,7 @@ ipt_psd_match(const struct sk_buff *pskb
+ 
+ 	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
+ 		DEBUGP("PSD: protocol not supported\n");
+-		return 0;
++		return false;
+ 	}
+ 
+ 	/* Get the source address, source & destination ports, and TCP flags */
+@@ -151,7 +150,7 @@ ipt_psd_match(const struct sk_buff *pskb
+ 	 * them spoof us. [DHCP needs this feature - HW] */
+ 	if (!addr.s_addr) {
+ 		DEBUGP("PSD: spoofed source address (0.0.0.0)\n");
+-		return 0;
++		return false;
+ 	}
+ 
+ 	/* Use jiffies here not to depend on someone setting the time while we're
+@@ -298,46 +297,26 @@ ipt_psd_match(const struct sk_buff *pskb
+ 
+ out_no_match:
+ 	spin_unlock(&state.lock);
+-	return 0;
++	return false;
+ 
+ out_match:
+ 	spin_unlock(&state.lock);
+-	return 1;
++	DEBUGP("PSD: Dropping packets from "NIPQUAD_FMT" \n",
++	       NIPQUAD(curr->src_addr.s_addr));
++	return true;
+ }
+ 
+-static int ipt_psd_checkentry(const char *tablename,
+-			      const struct ipt_ip *e,
+-			      void *matchinfo,
+-			      unsigned int matchsize,
+-			      unsigned int hook_mask)
+-{
+-/*	const struct ipt_psd_info *psdinfo = targinfo;*/
+-
+-	/* we accept TCP only */
+-/*  	if (e->ip.proto != IPPROTO_TCP) { */
+-/*  		DEBUGP("PSD: specified protocol may be TCP only\n"); */
+-/*  		return 0; */
+-/*  	} */
+-
+-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_psd_info))) {
+-		DEBUGP("PSD: matchsize %u != %u\n",
+-		       matchsize,
+-		       IPT_ALIGN(sizeof(struct ipt_psd_info)));
+-		return 0;
+-	}
+-
+-	return 1;
+-}
+-
+-static struct ipt_match ipt_psd_reg = {
+-	.name = "psd",
+-	.match = ipt_psd_match,
+-	.checkentry = ipt_psd_checkentry,
+-	.me = THIS_MODULE };
++static struct xt_match ipt_psd_reg = {
++	.name      = "psd",
++	.family    = AF_INET,
++	.match     = ipt_psd_match,
++	.matchsize = sizeof(struct ipt_psd_info),
++	.me        = THIS_MODULE
++};
+ 
+-static int __init init(void)
++static int __init ipt_psd_init(void)
+ {
+-	if (ipt_register_match(&ipt_psd_reg))
++	if (xt_register_match(&ipt_psd_reg))
+ 		return -EINVAL;
+ 
+ 	memset(&state, 0, sizeof(state));
+@@ -348,11 +327,11 @@ static int __init init(void)
+ 	return 0;
+ }
+ 
+-static void __exit fini(void)
++static void __exit ipt_psd_fini(void)
+ {
+-	ipt_unregister_match(&ipt_psd_reg);
++	xt_unregister_match(&ipt_psd_reg);
+ 	printk("netfilter PSD unloaded - (c) astaro AG\n");
+ }
+ 
+-module_init(init);
+-module_exit(fini);
++module_init(ipt_psd_init);
++module_exit(ipt_psd_fini);
+--- a/net/ipv4/netfilter/Kconfig
++++ b/net/ipv4/netfilter/Kconfig
+@@ -322,6 +322,14 @@
+ 	(e.g. when running oldconfig). It selects
+ 	CONFIG_NETFILTER_XT_TARGET_HL.
+ 
++config IP_NF_MATCH_PSD
++	tristate 'Port scanner detection support'
++	depends on NETFILTER_ADVANCED
++	help
++	  Module used for PSD (portscan detection).
++
++	  To compile it as a module, choose M here.  If unsure, say N.
++
+ config IP_NF_TARGET_IFWLOG
+ 	tristate  'IFWLOG target support'
+ 	depends on IP_NF_IPTABLES
+--- linux/include/linux/netfilter_ipv4/Kbuild.net-netfilter-psd-mdv.orig	2012-05-26 01:28:56.000000000 +0300
++++ linux/include/linux/netfilter_ipv4/Kbuild	2012-05-26 01:30:21.493540796 +0300
+@@ -11,6 +11,7 @@
+ header-y += ipt_addrtype.h
+ header-y += ipt_ah.h
+ header-y += ipt_ecn.h
++header-y += ipt_psd.h
+ header-y += ipt_realm.h
+ header-y += ipt_ttl.h
+ header-y += nf_nat.h
diff --git a/3.2.34/net-netfilter-psd.patch b/3.2.34/net-netfilter-psd.patch
new file mode 100644
index 0000000..c8ad7a9
--- /dev/null
+++ b/3.2.34/net-netfilter-psd.patch
@@ -0,0 +1,420 @@
+---
+ include/linux/netfilter_ipv4/ipt_psd.h |   40 +++
+ net/ipv4/netfilter/Makefile            |    1 
+ net/ipv4/netfilter/ipt_psd.c           |  358 +++++++++++++++++++++++++++++++++
+ 3 files changed, 399 insertions(+)
+
+--- /dev/null
++++ b/net/ipv4/netfilter/ipt_psd.c
+@@ -0,0 +1,358 @@
++/*
++  This is a module which is used for PSD (portscan detection)
++  Derived from scanlogd v2.1 written by Solar Designer <solar@false.com>
++  and LOG target module.
++
++  Copyright (C) 2000,2001 astaro AG
++
++  This file is distributed under the terms of the GNU General Public
++  License (GPL). Copies of the GPL can be obtained from:
++     ftp://prep.ai.mit.edu/pub/gnu/GPL
++
++  2000-05-04 Markus Hennig <hennig@astaro.de> : initial
++  2000-08-18 Dennis Koslowski <koslowski@astaro.de> : first release
++  2000-12-01 Dennis Koslowski <koslowski@astaro.de> : UDP scans detection added
++  2001-01-02 Dennis Koslowski <koslowski@astaro.de> : output modified
++  2001-02-04 Jan Rekorajski <baggins@pld.org.pl> : converted from target to match
++  2004-05-05 Martijn Lievaart <m@rtij.nl> : ported to 2.6
++*/
++
++#include <linux/module.h>
++#include <linux/skbuff.h>
++#include <linux/ip.h>
++#include <net/tcp.h>
++#include <linux/spinlock.h>
++#include <linux/netfilter_ipv4/ip_tables.h>
++#include <linux/netfilter_ipv4/ipt_psd.h>
++
++#if 0
++#define DEBUGP printk
++#else
++#define DEBUGP(format, args...)
++#endif
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Dennis Koslowski <koslowski@astaro.com>");
++
++#define HF_DADDR_CHANGING   0x01
++#define HF_SPORT_CHANGING   0x02
++#define HF_TOS_CHANGING	    0x04
++#define HF_TTL_CHANGING	    0x08
++
++/*
++ * Information we keep per each target port
++ */
++struct port {
++	u_int16_t number;      /* port number */
++	u_int8_t proto;        /* protocol number */
++	u_int8_t and_flags;    /* tcp ANDed flags */
++	u_int8_t or_flags;     /* tcp ORed flags */
++};
++
++/*
++ * Information we keep per each source address.
++ */
++struct host {
++	struct host *next;		/* Next entry with the same hash */
++	clock_t timestamp;		/* Last update time */
++	struct in_addr src_addr;	/* Source address */
++	struct in_addr dest_addr;	/* Destination address */
++	unsigned short src_port;	/* Source port */
++	int count;			/* Number of ports in the list */
++	int weight;			/* Total weight of ports in the list */
++	struct port ports[SCAN_MAX_COUNT - 1];	/* List of ports */
++	unsigned char tos;		/* TOS */
++	unsigned char ttl;		/* TTL */
++	unsigned char flags;		/* HF_ flags bitmask */
++};
++
++/*
++ * State information.
++ */
++static struct {
++	spinlock_t lock;
++	struct host list[LIST_SIZE];	/* List of source addresses */
++	struct host *hash[HASH_SIZE];	/* Hash: pointers into the list */
++	int index;			/* Oldest entry to be replaced */
++} state;
++
++/*
++ * Convert an IP address into a hash table index.
++ */
++static inline int hashfunc(struct in_addr addr)
++{
++	unsigned int value;
++	int hash;
++
++	value = addr.s_addr;
++	hash = 0;
++	do {
++		hash ^= value;
++	} while ((value >>= HASH_LOG));
++
++	return hash & (HASH_SIZE - 1);
++}
++
++static int
++ipt_psd_match(const struct sk_buff *pskb,
++	      const struct net_device *in,
++	      const struct net_device *out,
++	      const void *matchinfo,
++	      int offset,
++	      int *hotdrop)
++{
++	struct iphdr *ip_hdr;
++	struct tcphdr *tcp_hdr;
++	struct in_addr addr;
++	u_int16_t src_port,dest_port;
++  	u_int8_t tcp_flags, proto;
++	clock_t now;
++	struct host *curr, *last, **head;
++	int hash, index, count;
++
++	/* Parameters from userspace */
++	const struct ipt_psd_info *psdinfo = matchinfo;
++
++	/* IP header */
++	ip_hdr = pskb->nh.iph;
++
++	/* Sanity check */
++	if (ntohs(ip_hdr->frag_off) & IP_OFFSET) {
++		DEBUGP("PSD: sanity check failed\n");
++		return 0;
++	}
++
++	/* TCP or UDP ? */
++	proto = ip_hdr->protocol;
++
++	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
++		DEBUGP("PSD: protocol not supported\n");
++		return 0;
++	}
++
++	/* Get the source address, source & destination ports, and TCP flags */
++
++	addr.s_addr = ip_hdr->saddr;
++
++	tcp_hdr = (struct tcphdr*)((u_int32_t *)ip_hdr + ip_hdr->ihl);
++
++	/* Yep, it´s dirty */
++	src_port = tcp_hdr->source;
++	dest_port = tcp_hdr->dest;
++
++	if (proto == IPPROTO_TCP) {
++		tcp_flags = *((u_int8_t*)tcp_hdr + 13);
++	}
++	else {
++		tcp_flags = 0x00;
++	}
++
++	/* We're using IP address 0.0.0.0 for a special purpose here, so don't let
++	 * them spoof us. [DHCP needs this feature - HW] */
++	if (!addr.s_addr) {
++		DEBUGP("PSD: spoofed source address (0.0.0.0)\n");
++		return 0;
++	}
++
++	/* Use jiffies here not to depend on someone setting the time while we're
++	 * running; we need to be careful with possible return value overflows. */
++	now = jiffies;
++
++	spin_lock(&state.lock);
++
++	/* Do we know this source address already? */
++	count = 0;
++	last = NULL;
++	if ((curr = *(head = &state.hash[hash = hashfunc(addr)])))
++		do {
++			if (curr->src_addr.s_addr == addr.s_addr) break;
++			count++;
++			if (curr->next) last = curr;
++		} while ((curr = curr->next));
++
++	if (curr) {
++
++		/* We know this address, and the entry isn't too old. Update it. */
++		if (now - curr->timestamp <= (psdinfo->delay_threshold*HZ)/100 &&
++		    time_after_eq(now, curr->timestamp)) {
++
++			/* Just update the appropriate list entry if we've seen this port already */
++			for (index = 0; index < curr->count; index++) {
++				if (curr->ports[index].number == dest_port) {
++					curr->ports[index].proto = proto;
++					curr->ports[index].and_flags &= tcp_flags;
++					curr->ports[index].or_flags |= tcp_flags;
++					goto out_no_match;
++				}
++			}
++
++			/* TCP/ACK and/or TCP/RST to a new port? This could be an outgoing connection. */
++			if (proto == IPPROTO_TCP && (tcp_hdr->ack || tcp_hdr->rst))
++				goto out_no_match;
++
++			/* Packet to a new port, and not TCP/ACK: update the timestamp */
++			curr->timestamp = now;
++
++			/* Logged this scan already? Then drop the packet. */
++			if (curr->weight >= psdinfo->weight_threshold)
++				goto out_match;
++
++			/* Specify if destination address, source port, TOS or TTL are not fixed */
++			if (curr->dest_addr.s_addr != ip_hdr->daddr)
++				curr->flags |= HF_DADDR_CHANGING;
++			if (curr->src_port != src_port)
++				curr->flags |= HF_SPORT_CHANGING;
++			if (curr->tos != ip_hdr->tos)
++				curr->flags |= HF_TOS_CHANGING;
++			if (curr->ttl != ip_hdr->ttl)
++				curr->flags |= HF_TTL_CHANGING;
++
++			/* Update the total weight */
++			curr->weight += (ntohs(dest_port) < 1024) ?
++				psdinfo->lo_ports_weight : psdinfo->hi_ports_weight;
++
++			/* Got enough destination ports to decide that this is a scan? */
++			/* Then log it and drop the packet. */
++			if (curr->weight >= psdinfo->weight_threshold)
++				goto out_match;
++
++			/* Remember the new port */
++			if (curr->count < SCAN_MAX_COUNT) {
++				curr->ports[curr->count].number = dest_port;
++				curr->ports[curr->count].proto = proto;
++				curr->ports[curr->count].and_flags = tcp_flags;
++				curr->ports[curr->count].or_flags = tcp_flags;
++				curr->count++;
++			}
++
++			goto out_no_match;
++		}
++
++		/* We know this address, but the entry is outdated. Mark it unused, and
++		 * remove from the hash table. We'll allocate a new entry instead since
++		 * this one might get re-used too soon. */
++		curr->src_addr.s_addr = 0;
++		if (last)
++			last->next = last->next->next;
++		else if (*head)
++			*head = (*head)->next;
++		last = NULL;
++	}
++
++	/* We don't need an ACK from a new source address */
++	if (proto == IPPROTO_TCP && tcp_hdr->ack)
++		goto out_no_match;
++
++	/* Got too many source addresses with the same hash value? Then remove the
++	 * oldest one from the hash table, so that they can't take too much of our
++	 * CPU time even with carefully chosen spoofed IP addresses. */
++	if (count >= HASH_MAX && last) last->next = NULL;
++
++	/* We're going to re-use the oldest list entry, so remove it from the hash
++	 * table first (if it is really already in use, and isn't removed from the
++	 * hash table already because of the HASH_MAX check above). */
++
++	/* First, find it */
++	if (state.list[state.index].src_addr.s_addr)
++		head = &state.hash[hashfunc(state.list[state.index].src_addr)];
++	else
++		head = &last;
++	last = NULL;
++	if ((curr = *head))
++	do {
++		if (curr == &state.list[state.index]) break;
++		last = curr;
++	} while ((curr = curr->next));
++
++	/* Then, remove it */
++	if (curr) {
++		if (last)
++			last->next = last->next->next;
++		else if (*head)
++			*head = (*head)->next;
++	}
++
++	/* Get our list entry */
++	curr = &state.list[state.index++];
++	if (state.index >= LIST_SIZE) state.index = 0;
++
++	/* Link it into the hash table */
++	head = &state.hash[hash];
++	curr->next = *head;
++	*head = curr;
++
++	/* And fill in the fields */
++	curr->timestamp = now;
++	curr->src_addr = addr;
++	curr->dest_addr.s_addr = ip_hdr->daddr;
++	curr->src_port = src_port;
++	curr->count = 1;
++	curr->weight = (ntohs(dest_port) < 1024) ?
++		psdinfo->lo_ports_weight : psdinfo->hi_ports_weight;
++	curr->ports[0].number = dest_port;
++	curr->ports[0].proto = proto;
++	curr->ports[0].and_flags = tcp_flags;
++	curr->ports[0].or_flags = tcp_flags;
++	curr->tos = ip_hdr->tos;
++	curr->ttl = ip_hdr->ttl;
++
++out_no_match:
++	spin_unlock(&state.lock);
++	return 0;
++
++out_match:
++	spin_unlock(&state.lock);
++	return 1;
++}
++
++static int ipt_psd_checkentry(const char *tablename,
++			      const struct ipt_ip *e,
++			      void *matchinfo,
++			      unsigned int matchsize,
++			      unsigned int hook_mask)
++{
++/*	const struct ipt_psd_info *psdinfo = targinfo;*/
++
++	/* we accept TCP only */
++/*  	if (e->ip.proto != IPPROTO_TCP) { */
++/*  		DEBUGP("PSD: specified protocol may be TCP only\n"); */
++/*  		return 0; */
++/*  	} */
++
++	if (matchsize != IPT_ALIGN(sizeof(struct ipt_psd_info))) {
++		DEBUGP("PSD: matchsize %u != %u\n",
++		       matchsize,
++		       IPT_ALIGN(sizeof(struct ipt_psd_info)));
++		return 0;
++	}
++
++	return 1;
++}
++
++static struct ipt_match ipt_psd_reg = {
++	.name = "psd",
++	.match = ipt_psd_match,
++	.checkentry = ipt_psd_checkentry,
++	.me = THIS_MODULE };
++
++static int __init init(void)
++{
++	if (ipt_register_match(&ipt_psd_reg))
++		return -EINVAL;
++
++	memset(&state, 0, sizeof(state));
++
++	spin_lock_init(&(state.lock));
++
++	printk("netfilter PSD loaded - (c) astaro AG\n");
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	ipt_unregister_match(&ipt_psd_reg);
++	printk("netfilter PSD unloaded - (c) astaro AG\n");
++}
++
++module_init(init);
++module_exit(fini);
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ipt_psd.h
+@@ -0,0 +1,40 @@
++#ifndef _IPT_PSD_H
++#define _IPT_PSD_H
++
++#include <linux/param.h>
++#include <linux/types.h>
++
++/*
++ * High port numbers have a lower weight to reduce the frequency of false
++ * positives, such as from passive mode FTP transfers.
++ */
++#define PORT_WEIGHT_PRIV		3
++#define PORT_WEIGHT_HIGH		1
++
++/*
++ * Port scan detection thresholds: at least COUNT ports need to be scanned
++ * from the same source, with no longer than DELAY ticks between ports.
++ */
++#define SCAN_MIN_COUNT			7
++#define SCAN_MAX_COUNT			(SCAN_MIN_COUNT * PORT_WEIGHT_PRIV)
++#define SCAN_WEIGHT_THRESHOLD		SCAN_MAX_COUNT
++#define SCAN_DELAY_THRESHOLD		(300) /* old usage of HZ here was erroneously and broke under uml */
++
++/*
++ * Keep track of up to LIST_SIZE source addresses, using a hash table of
++ * HASH_SIZE entries for faster lookups, but limiting hash collisions to
++ * HASH_MAX source addresses per the same hash value.
++ */
++#define LIST_SIZE			0x100
++#define HASH_LOG			9
++#define HASH_SIZE			(1 << HASH_LOG)
++#define HASH_MAX			0x10
++
++struct ipt_psd_info {
++	unsigned int weight_threshold;
++	unsigned int delay_threshold;
++	unsigned short lo_ports_weight;
++	unsigned short hi_ports_weight;
++};
++
++#endif /*_IPT_PSD_H*/
+--- a/net/ipv4/netfilter/Makefile
++++ b/net/ipv4/netfilter/Makefile
+@@ -49,6 +49,7 @@
+ 
+ # matches
+ obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
++obj-$(CONFIG_IP_NF_MATCH_PSD) += ipt_psd.o
+ obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
+ 
+ # targets
diff --git a/3.2.34/netfilter-implement-rfc-1123-for-ftp-conntrack.patch b/3.2.34/netfilter-implement-rfc-1123-for-ftp-conntrack.patch
new file mode 100644
index 0000000..30cae8c
--- /dev/null
+++ b/3.2.34/netfilter-implement-rfc-1123-for-ftp-conntrack.patch
@@ -0,0 +1,190 @@
+From: Jeff Mahoney <jeffm@suse.com>
+Subject: netfilter: Implement RFC 1123 for FTP conntrack
+References: bnc#466279 bnc#681639
+Patch-mainline: Submitted via http://bugzilla.netfilter.org/show_bug.cgi?id=574 23 Jan 2011
+
+ The FTP conntrack code currently only accepts the following format for
+ the 227 response for PASV:
+ 227 Entering Passive Mode (148,100,81,40,31,161).
+
+ It doesn't accept the following format from an obscure server:
+ 227 Data transfer will passively listen to 67,218,99,134,50,144
+
+ From RFC 1123:
+ The format of the 227 reply to a PASV command is not
+ well standardized.  In particular, an FTP client cannot
+ assume that the parentheses shown on page 40 of RFC-959
+ will be present (and in fact, Figure 3 on page 43 omits
+ them).  Therefore, a User-FTP program that interprets
+ the PASV reply must scan the reply for the first digit
+ of the host and port numbers.
+
+ This patch adds support for the RFC 1123 clarification by:
+ - Allowing a search filter to specify NUL as the terminator so that
+   try_number will return successfully if the array of numbers has been
+   filled when an unexpected character is encountered.
+ - Using space as the separator for the 227 reply and then scanning for
+   the first digit of the number sequence. The number sequence is parsed
+   out using the existing try_rfc959 but with a NUL terminator.
+
+ Tracked in: https://bugzilla.novell.com/show_bug.cgi?id=466279
+
+Reported-by: Mark Post <mpost@novell.com>
+Signed-off-by: Jeff Mahoney <jeffm@suse.com>
+---
+ net/netfilter/nf_conntrack_ftp.c |   73 ++++++++++++++++++++++++++++-----------
+ 1 file changed, 54 insertions(+), 19 deletions(-)
+
+--- a/net/netfilter/nf_conntrack_ftp.c
++++ b/net/netfilter/nf_conntrack_ftp.c
+@@ -53,10 +53,14 @@ unsigned int (*nf_nat_ftp_hook)(struct s
+ 				struct nf_conntrack_expect *exp);
+ EXPORT_SYMBOL_GPL(nf_nat_ftp_hook);
+ 
+-static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char);
+-static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char);
++static int try_rfc959(const char *, size_t, struct nf_conntrack_man *,
++		      char, unsigned int *);
++static int try_rfc1123(const char *, size_t, struct nf_conntrack_man *,
++		       char, unsigned int *);
++static int try_eprt(const char *, size_t, struct nf_conntrack_man *,
++		    char, unsigned int *);
+ static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *,
+-			     char);
++			     char, unsigned int *);
+ 
+ static struct ftp_search {
+ 	const char *pattern;
+@@ -64,7 +68,7 @@ static struct ftp_search {
+ 	char skip;
+ 	char term;
+ 	enum nf_ct_ftp_type ftptype;
+-	int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char);
++	int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char, unsigned int *);
+ } search[IP_CT_DIR_MAX][2] = {
+ 	[IP_CT_DIR_ORIGINAL] = {
+ 		{
+@@ -88,10 +92,8 @@ static struct ftp_search {
+ 		{
+ 			.pattern	= "227 ",
+ 			.plen		= sizeof("227 ") - 1,
+-			.skip		= '(',
+-			.term		= ')',
+ 			.ftptype	= NF_CT_FTP_PASV,
+-			.getnum		= try_rfc959,
++			.getnum		= try_rfc1123,
+ 		},
+ 		{
+ 			.pattern	= "229 ",
+@@ -130,8 +132,9 @@ static int try_number(const char *data,
+ 			i++;
+ 		else {
+ 			/* Unexpected character; true if it's the
+-			   terminator and we're finished. */
+-			if (*data == term && i == array_size - 1)
++			   terminator (or we don't care about one)
++			   and we're finished. */
++			if ((*data == term || !term) && i == array_size - 1)
+ 				return len;
+ 
+ 			pr_debug("Char %u (got %u nums) `%u' unexpected\n",
+@@ -146,7 +149,8 @@ static int try_number(const char *data,
+ 
+ /* Returns 0, or length of numbers: 192,168,1,1,5,6 */
+ static int try_rfc959(const char *data, size_t dlen,
+-		      struct nf_conntrack_man *cmd, char term)
++		      struct nf_conntrack_man *cmd, char term,
++		      unsigned int *offset)
+ {
+ 	int length;
+ 	u_int32_t array[6];
+@@ -161,6 +165,33 @@ static int try_rfc959(const char *data,
+ 	return length;
+ }
+ 
++/*
++ * From RFC 1123:
++ * The format of the 227 reply to a PASV command is not
++ * well standardized.  In particular, an FTP client cannot
++ * assume that the parentheses shown on page 40 of RFC-959
++ * will be present (and in fact, Figure 3 on page 43 omits
++ * them).  Therefore, a User-FTP program that interprets
++ * the PASV reply must scan the reply for the first digit
++ * of the host and port numbers.
++ */
++static int try_rfc1123(const char *data, size_t dlen,
++		       struct nf_conntrack_man *cmd, char term,
++		       unsigned int *offset)
++{
++	int i;
++	for (i = 0; i < dlen; i++)
++		if (isdigit(data[i]))
++			break;
++
++	if (i == dlen)
++		return 0;
++
++	*offset += i;
++
++	return try_rfc959(data + i, dlen - i, cmd, 0, offset);
++}
++
+ /* Grab port: number up to delimiter */
+ static int get_port(const char *data, int start, size_t dlen, char delim,
+ 		    __be16 *port)
+@@ -189,7 +220,7 @@ static int get_port(const char *data, in
+ 
+ /* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */
+ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
+-		    char term)
++		    char term, unsigned int *offset)
+ {
+ 	char delim;
+ 	int length;
+@@ -237,7 +268,8 @@ static int try_eprt(const char *data, si
+ 
+ /* Returns 0, or length of numbers: |||6446| */
+ static int try_epsv_response(const char *data, size_t dlen,
+-			     struct nf_conntrack_man *cmd, char term)
++			     struct nf_conntrack_man *cmd, char term,
++			     unsigned int *offset)
+ {
+ 	char delim;
+ 
+@@ -259,9 +291,10 @@ static int find_pattern(const char *data
+ 			unsigned int *numlen,
+ 			struct nf_conntrack_man *cmd,
+ 			int (*getnum)(const char *, size_t,
+-				      struct nf_conntrack_man *, char))
++				      struct nf_conntrack_man *, char,
++				      unsigned int *))
+ {
+-	size_t i;
++	size_t i = plen;
+ 
+ 	pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen);
+ 	if (dlen == 0)
+@@ -291,16 +324,18 @@ static int find_pattern(const char *data
+ 	pr_debug("Pattern matches!\n");
+ 	/* Now we've found the constant string, try to skip
+ 	   to the 'skip' character */
+-	for (i = plen; data[i] != skip; i++)
+-		if (i == dlen - 1) return -1;
++	if (skip) {
++		for (i = plen; data[i] != skip; i++)
++			if (i == dlen - 1) return -1;
+ 
+-	/* Skip over the last character */
+-	i++;
++		/* Skip over the last character */
++		i++;
++	}
+ 
+ 	pr_debug("Skipped up to `%c'!\n", skip);
+ 
+ 	*numoff = i;
+-	*numlen = getnum(data + i, dlen - i, cmd, term);
++	*numlen = getnum(data + i, dlen - i, cmd, term, numoff);
+ 	if (!*numlen)
+ 		return -1;
+ 
diff --git a/3.2.34/netfilter-ip_conntrack_slp.patch b/3.2.34/netfilter-ip_conntrack_slp.patch
new file mode 100644
index 0000000..ff72d85
--- /dev/null
+++ b/3.2.34/netfilter-ip_conntrack_slp.patch
@@ -0,0 +1,185 @@
+From: Jiri Bohac <jbohac@suse.cz>
+Subject: connection tracking helper for SLP
+References: fate#301134
+Patch-mainline:  Not yet
+
+A simple connection tracking helper for SLP. Marks replies to a
+SLP broadcast query as ESTABLISHED to allow them to pass through the 
+firewall.
+
+Signed-off-by: Jiri Bohac <jbohac@suse.cz>
+
+---
+ net/netfilter/Kconfig            |   15 ++++
+ net/netfilter/Makefile           |    1 
+ net/netfilter/nf_conntrack_slp.c |  131 +++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 147 insertions(+)
+
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -290,6 +290,21 @@ config NF_CONNTRACK_TFTP
+ 
+ 	  To compile it as a module, choose M here.  If unsure, say N.
+ 
++config NF_CONNTRACK_SLP
++	tristate "SLP protocol support"
++	depends on NF_CONNTRACK
++	depends on NETFILTER_ADVANCED
++	help
++	  SLP queries are sometimes sent as broadcast messages from an
++	  unprivileged port and responded to with unicast messages to the
++	  same port. This make them hard to firewall properly because connection
++	  tracking doesn't deal with broadcasts. This helper tracks locally
++	  originating broadcast SLP queries and the corresponding
++	  responses. It relies on correct IP address configuration, specifically
++	  netmask and broadcast address.
++
++	  To compile it as a module, choose M here.  If unsure, say N.
++
+ config NF_CT_NETLINK
+ 	tristate 'Connection tracking netlink interface'
+ 	select NETFILTER_NETLINK
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -36,6 +36,7 @@ obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_co
+ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
+ obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
+ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
++obj-$(CONFIG_NF_CONNTRACK_SLP) += nf_conntrack_slp.o
+ 
+ # transparent proxy support
+ obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
+--- /dev/null
++++ b/net/netfilter/nf_conntrack_slp.c
+@@ -0,0 +1,131 @@
++/*
++ *      NetBIOS name service broadcast connection tracking helper
++ *
++ *      (c) 2007 Jiri Bohac <jbohac@suse.cz>
++ *      (c) 2005 Patrick McHardy <kaber@trash.net>
++ *
++ *      This program is free software; you can redistribute it and/or
++ *      modify it under the terms of the GNU General Public License
++ *      as published by the Free Software Foundation; either version
++ *      2 of the License, or (at your option) any later version.
++ */
++/*
++ *      This helper tracks locally originating NetBIOS name service
++ *      requests by issuing permanent expectations (valid until
++ *      timing out) matching all reply connections from the
++ *      destination network. The only NetBIOS specific thing is
++ *      actually the port number.
++ */
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/skbuff.h>
++#include <linux/netdevice.h>
++#include <linux/inetdevice.h>
++#include <linux/if_addr.h>
++#include <linux/in.h>
++#include <linux/ip.h>
++#include <linux/netfilter.h>
++#include <net/route.h>
++
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_helper.h>
++#include <net/netfilter/nf_conntrack_expect.h>
++
++#define SLP_PORT	427
++
++MODULE_AUTHOR("Jiri Bohac <jbohac@suse.cz>");
++MODULE_DESCRIPTION("SLP broadcast connection tracking helper");
++MODULE_LICENSE("GPL");
++MODULE_ALIAS("ip_conntrack_slp");
++
++static unsigned int timeout __read_mostly = 3;
++module_param(timeout, uint, 0400);
++MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
++
++static int help(struct sk_buff *skb, unsigned int protoff,
++		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
++{
++	struct nf_conntrack_expect *exp;
++	struct rtable *rt = skb_rtable(skb);
++	struct in_device *in_dev;
++	__be32 mask = 0;
++	__be32 src = 0;
++
++	/* we're only interested in locally generated packets */
++	if (skb->sk == NULL)
++		goto out;
++	if (rt == NULL || !(rt->rt_flags & (RTCF_MULTICAST|RTCF_BROADCAST)))
++		goto out;
++	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
++		goto out;
++
++	rcu_read_lock();
++	in_dev = __in_dev_get_rcu(rt->dst.dev);
++	if (in_dev != NULL) {
++		for_primary_ifa(in_dev) {
++			/* this is a hack as slp uses multicast we can't match
++			 * the destination address to some broadcast address. So
++			 * just take the first one. Better would be to install
++			 * expectations for all addresses */
++			mask = ifa->ifa_mask;
++			src = ifa->ifa_broadcast;
++			break;
++		} endfor_ifa(in_dev);
++	}
++	rcu_read_unlock();
++
++	if (mask == 0 || src == 0)
++		goto out;
++
++	exp = nf_ct_expect_alloc(ct);
++	if (exp == NULL)
++		goto out;
++
++	exp->tuple                = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
++	exp->tuple.src.u3.ip      = src;
++	exp->tuple.src.u.udp.port = htons(SLP_PORT);
++
++	exp->mask.src.u3.ip       = mask;
++	exp->mask.src.u.udp.port  = htons(0xFFFF);
++
++	exp->expectfn             = NULL;
++	exp->flags                = NF_CT_EXPECT_PERMANENT;
++	exp->class		  = NF_CT_EXPECT_CLASS_DEFAULT;
++	exp->helper               = NULL;
++
++	nf_ct_expect_related(exp);
++	nf_ct_expect_put(exp);
++
++	nf_ct_refresh(ct, skb, timeout * HZ);
++out:
++	return NF_ACCEPT;
++}
++
++static struct nf_conntrack_expect_policy exp_policy = {
++	.max_expected	= 1,
++};
++
++static struct nf_conntrack_helper helper __read_mostly = {
++	.name			= "slp",
++	.tuple.src.l3num	= AF_INET,
++	.tuple.src.u.udp.port	= __constant_htons(SLP_PORT),
++	.tuple.dst.protonum	= IPPROTO_UDP,
++	.me			= THIS_MODULE,
++	.help			= help,
++	.expect_policy		= &exp_policy,
++};
++
++static int __init nf_conntrack_slp_init(void)
++{
++	exp_policy.timeout = timeout;
++	return nf_conntrack_helper_register(&helper);
++}
++
++static void __exit nf_conntrack_slp_fini(void)
++{
++	nf_conntrack_helper_unregister(&helper);
++}
++
++module_init(nf_conntrack_slp_init);
++module_exit(nf_conntrack_slp_fini);
diff --git a/3.2.34/patches.suse/0002-btrfs-Introduce-btrfs_get_maps_dev.patch b/3.2.34/patches.suse/0002-btrfs-Introduce-btrfs_get_maps_dev.patch
new file mode 100644
index 0000000..3dd6a2b
--- /dev/null
+++ b/3.2.34/patches.suse/0002-btrfs-Introduce-btrfs_get_maps_dev.patch
@@ -0,0 +1,39 @@
+From c83e5a977a2510de872d48a4d3bebc94dac0ed8f Mon Sep 17 00:00:00 2001
+From: Mark Fasheh <mfasheh@suse.com>
+Date: Fri, 13 May 2011 16:01:39 -0700
+Subject: [PATCH 2/2] btrfs: Introduce btrfs_get_maps_dev()
+References: bnc#672923
+Patch-mainline: Never
+
+Use this to return the subvolume superblock in proc instead of the global
+superblock which is automatically taken today. This fixes a userspace
+breakage where discrepancies between the devices two would confuse software
+such as lsof.
+
+Signed-off-by: Mark Fasheh <mfasheh@suse.com>
+---
+ fs/btrfs/super.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -1301,6 +1301,11 @@ static void btrfs_fs_dirty_inode(struct
+ 				   "error %d\n", btrfs_ino(inode), ret);
+ }
+ 
++static dev_t btrfs_get_maps_dev(struct inode *inode)
++{
++	return BTRFS_I(inode)->root->anon_dev;
++}
++
+ static const struct super_operations btrfs_super_ops = {
+ 	.drop_inode	= btrfs_drop_inode,
+ 	.evict_inode	= btrfs_evict_inode,
+@@ -1315,6 +1320,7 @@ static const struct super_operations btr
+ 	.remount_fs	= btrfs_remount,
+ 	.freeze_fs	= btrfs_freeze,
+ 	.unfreeze_fs	= btrfs_unfreeze,
++	.get_maps_dev	= btrfs_get_maps_dev,
+ };
+ 
+ static const struct file_operations btrfs_ctl_fops = {
diff --git a/3.2.34/patches.suse/btrfs-0900-add-allocator-tracepoints.patch b/3.2.34/patches.suse/btrfs-0900-add-allocator-tracepoints.patch
new file mode 100644
index 0000000..6a14034
--- /dev/null
+++ b/3.2.34/patches.suse/btrfs-0900-add-allocator-tracepoints.patch
@@ -0,0 +1,304 @@
+From: Josef Bacik <josef@redhat.com>
+Date: Thu, 10 Nov 2011 08:29:20 -0500
+Patch-mainline: pending
+References: FATE#306586
+Subject: [PATCH] Btrfs: add allocator tracepoints
+
+I used these tracepoints when figuring out what the cluster stuff was doing, so
+add them to mainline in case we need to profile this stuff again.  Thanks,
+
+Signed-off-by: Josef Bacik <josef@redhat.com>
+Signed-off-by: David Sterba <dsterba@suse.cz>
+---
+ fs/btrfs/extent-tree.c       |    9 ++
+ fs/btrfs/free-space-cache.c  |   11 ++
+ include/trace/events/btrfs.h |  173 +++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 192 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -5128,6 +5128,8 @@
+ 	ins->objectid = 0;
+ 	ins->offset = 0;
+ 
++	trace_find_free_extent(orig_root, num_bytes, empty_size, data);
++
+ 	space_info = __find_space_info(root->fs_info, data);
+ 	if (!space_info) {
+ 		printk(KERN_ERR "No space info for %llu\n", data);
+@@ -5313,6 +5315,8 @@
+ 			if (offset) {
+ 				/* we have a block, we're done */
+ 				spin_unlock(&last_ptr->refill_lock);
++				trace_btrfs_reserve_extent_cluster(root,
++					block_group, search_start, num_bytes);
+ 				goto checks;
+ 			}
+ 
+@@ -5359,6 +5363,9 @@
+ 				if (offset) {
+ 					/* we found one, proceed */
+ 					spin_unlock(&last_ptr->refill_lock);
++					trace_btrfs_reserve_extent_cluster(root,
++						block_group, search_start,
++						num_bytes);
+ 					goto checks;
+ 				}
+ 			} else if (!cached && loop > LOOP_CACHING_NOWAIT
+@@ -5439,6 +5446,8 @@
+ 		ins->objectid = search_start;
+ 		ins->offset = num_bytes;
+ 
++		trace_btrfs_reserve_extent(orig_root, block_group,
++					   search_start, num_bytes);
+ 		if (offset < search_start)
+ 			btrfs_add_free_space(used_block_group, offset,
+ 					     search_start - offset);
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -2346,6 +2346,8 @@
+ 				 &entry->offset_index, 1);
+ 	BUG_ON(ret);
+ 
++	trace_btrfs_setup_cluster(block_group, cluster,
++				  total_found * block_group->sectorsize, 1);
+ 	return 0;
+ }
+ 
+@@ -2368,6 +2370,7 @@
+ 	u64 window_free;
+ 	u64 max_extent;
+ 	u64 max_gap = 128 * 1024;
++	u64 total_size = 0;
+ 
+ 	entry = tree_search_offset(ctl, offset, 0, 1);
+ 	if (!entry)
+@@ -2444,11 +2447,12 @@
+ 		rb_erase(&entry->offset_index, &ctl->free_space_offset);
+ 		ret = tree_insert_offset(&cluster->root, entry->offset,
+ 					 &entry->offset_index, 0);
++		total_size += entry->bytes;
+ 		BUG_ON(ret);
+ 	} while (node && entry != last);
+ 
+ 	cluster->max_size = max_extent;
+-
++	trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
+ 	return 0;
+ }
+ 
+@@ -2552,6 +2556,9 @@
+ 		goto out;
+ 	}
+ 
++	trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
++				 min_bytes);
++
+ 	ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
+ 				      bytes, min_bytes);
+ 	if (ret)
+@@ -2567,6 +2574,8 @@
+ 		list_add_tail(&cluster->block_group_list,
+ 			      &block_group->cluster_list);
+ 		cluster->block_group = block_group;
++	} else {
++		trace_btrfs_failed_cluster_setup(block_group);
+ 	}
+ out:
+ 	spin_unlock(&cluster->lock);
+--- a/include/trace/events/btrfs.h
++++ b/include/trace/events/btrfs.h
+@@ -16,6 +16,8 @@
+ struct btrfs_delayed_tree_ref;
+ struct btrfs_delayed_data_ref;
+ struct btrfs_delayed_ref_head;
++struct btrfs_block_group_cache;
++struct btrfs_free_cluster;
+ struct map_lookup;
+ struct extent_buffer;
+ 
+@@ -44,6 +46,15 @@
+ 	obj, ((obj >= BTRFS_DATA_RELOC_TREE_OBJECTID) ||		\
+ 	      (obj <= BTRFS_CSUM_TREE_OBJECTID )) ? __show_root_type(obj) : "-"
+ 
++#define BTRFS_GROUP_FLAGS	\
++	{ BTRFS_BLOCK_GROUP_DATA,	"DATA"}, \
++	{ BTRFS_BLOCK_GROUP_SYSTEM,	"SYSTEM"}, \
++	{ BTRFS_BLOCK_GROUP_METADATA,	"METADATA"}, \
++	{ BTRFS_BLOCK_GROUP_RAID0,	"RAID0"}, \
++	{ BTRFS_BLOCK_GROUP_RAID1,	"RAID1"}, \
++	{ BTRFS_BLOCK_GROUP_DUP,	"DUP"}, \
++	{ BTRFS_BLOCK_GROUP_RAID10,	"RAID10"}
++
+ TRACE_EVENT(btrfs_transaction_commit,
+ 
+ 	TP_PROTO(struct btrfs_root *root),
+@@ -661,6 +672,168 @@
+ 	TP_ARGS(root, start, len)
+ );
+ 
++TRACE_EVENT(find_free_extent,
++
++	TP_PROTO(struct btrfs_root *root, u64 num_bytes, u64 empty_size,
++		 u64 data),
++
++	TP_ARGS(root, num_bytes, empty_size, data),
++
++	TP_STRUCT__entry(
++		__field(	u64,	root_objectid		)
++		__field(	u64,	num_bytes		)
++		__field(	u64,	empty_size		)
++		__field(	u64,	data			)
++	),
++
++	TP_fast_assign(
++		__entry->root_objectid	= root->root_key.objectid;
++		__entry->num_bytes	= num_bytes;
++		__entry->empty_size	= empty_size;
++		__entry->data		= data;
++	),
++
++	TP_printk("root = %Lu(%s), len = %Lu, empty_size = %Lu, "
++		  "flags = %Lu(%s)", show_root_type(__entry->root_objectid),
++		  __entry->num_bytes, __entry->empty_size, __entry->data,
++		  __print_flags((unsigned long)__entry->data, "|",
++				 BTRFS_GROUP_FLAGS))
++);
++
++DECLARE_EVENT_CLASS(btrfs__reserve_extent,
++
++	TP_PROTO(struct btrfs_root *root,
++		 struct btrfs_block_group_cache *block_group, u64 start,
++		 u64 len),
++
++	TP_ARGS(root, block_group, start, len),
++
++	TP_STRUCT__entry(
++		__field(	u64,	root_objectid		)
++		__field(	u64,	bg_objectid		)
++		__field(	u64,	flags			)
++		__field(	u64,	start			)
++		__field(	u64,	len			)
++	),
++
++	TP_fast_assign(
++		__entry->root_objectid	= root->root_key.objectid;
++		__entry->bg_objectid	= block_group->key.objectid;
++		__entry->flags		= block_group->flags;
++		__entry->start		= start;
++		__entry->len		= len;
++	),
++
++	TP_printk("root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
++		  "start = %Lu, len = %Lu",
++		  show_root_type(__entry->root_objectid), __entry->bg_objectid,
++		  __entry->flags, __print_flags((unsigned long)__entry->flags,
++						"|", BTRFS_GROUP_FLAGS),
++		  __entry->start, __entry->len)
++);
++
++DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent,
++
++	TP_PROTO(struct btrfs_root *root,
++		 struct btrfs_block_group_cache *block_group, u64 start,
++		 u64 len),
++
++	TP_ARGS(root, block_group, start, len)
++);
++
++DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent_cluster,
++
++	TP_PROTO(struct btrfs_root *root,
++		 struct btrfs_block_group_cache *block_group, u64 start,
++		 u64 len),
++
++	TP_ARGS(root, block_group, start, len)
++);
++
++TRACE_EVENT(btrfs_find_cluster,
++
++	TP_PROTO(struct btrfs_block_group_cache *block_group, u64 start,
++		 u64 bytes, u64 empty_size, u64 min_bytes),
++
++	TP_ARGS(block_group, start, bytes, empty_size, min_bytes),
++
++	TP_STRUCT__entry(
++		__field(	u64,	bg_objectid		)
++		__field(	u64,	flags			)
++		__field(	u64,	start			)
++		__field(	u64,	bytes			)
++		__field(	u64,	empty_size		)
++		__field(	u64,	min_bytes		)
++	),
++
++	TP_fast_assign(
++		__entry->bg_objectid	= block_group->key.objectid;
++		__entry->flags		= block_group->flags;
++		__entry->start		= start;
++		__entry->bytes		= bytes;
++		__entry->empty_size	= empty_size;
++		__entry->min_bytes	= min_bytes;
++	),
++
++	TP_printk("block_group = %Lu, flags = %Lu(%s), start = %Lu, len = %Lu,"
++		  " empty_size = %Lu, min_bytes = %Lu", __entry->bg_objectid,
++		  __entry->flags,
++		  __print_flags((unsigned long)__entry->flags, "|",
++				BTRFS_GROUP_FLAGS), __entry->start,
++		  __entry->bytes, __entry->empty_size,  __entry->min_bytes)
++);
++
++TRACE_EVENT(btrfs_failed_cluster_setup,
++
++	TP_PROTO(struct btrfs_block_group_cache *block_group),
++
++	TP_ARGS(block_group),
++
++	TP_STRUCT__entry(
++		__field(	u64,	bg_objectid		)
++	),
++
++	TP_fast_assign(
++		__entry->bg_objectid	= block_group->key.objectid;
++	),
++
++	TP_printk("block_group = %Lu", __entry->bg_objectid)
++);
++
++TRACE_EVENT(btrfs_setup_cluster,
++
++	TP_PROTO(struct btrfs_block_group_cache *block_group,
++		 struct btrfs_free_cluster *cluster, u64 size, int bitmap),
++
++	TP_ARGS(block_group, cluster, size, bitmap),
++
++	TP_STRUCT__entry(
++		__field(	u64,	bg_objectid		)
++		__field(	u64,	flags			)
++		__field(	u64,	start			)
++		__field(	u64,	max_size		)
++		__field(	u64,	size			)
++		__field(	int,	bitmap			)
++	),
++
++	TP_fast_assign(
++		__entry->bg_objectid	= block_group->key.objectid;
++		__entry->flags		= block_group->flags;
++		__entry->start		= cluster->window_start;
++		__entry->max_size	= cluster->max_size;
++		__entry->size		= size;
++		__entry->bitmap		= bitmap;
++	),
++
++	TP_printk("block_group = %Lu, flags = %Lu(%s), window_start = %Lu, "
++		  "size = %Lu, max_size = %Lu, bitmap = %d",
++		  __entry->bg_objectid,
++		  __entry->flags,
++		  __print_flags((unsigned long)__entry->flags, "|",
++				BTRFS_GROUP_FLAGS), __entry->start,
++		  __entry->size, __entry->max_size, __entry->bitmap)
++);
++
+ #endif /* _TRACE_BTRFS_H */
+ 
+ /* This part must be outside protection */
diff --git a/3.2.34/patches.suse/btrfs-8001-rewrite-btrfs_trim_block_group.patch b/3.2.34/patches.suse/btrfs-8001-rewrite-btrfs_trim_block_group.patch
new file mode 100644
index 0000000..9d858c2
--- /dev/null
+++ b/3.2.34/patches.suse/btrfs-8001-rewrite-btrfs_trim_block_group.patch
@@ -0,0 +1,299 @@
+From 033eea6d488471c7262b377e066ecf9eea85d5b1 Mon Sep 17 00:00:00 2001
+From: Li Zefan <lizf@cn.fujitsu.com>
+Date: Thu, 17 Nov 2011 15:26:17 +0800
+Patch-mainline: pending
+References: FATE#306586
+Subject: [PATCH] Btrfs: rewrite btrfs_trim_block_group()
+
+There are various bugs in block group trimming:
+
+- It may trim from offset smaller than user-specified offset.
+- It may trim beyond user-specified range.
+- It may leak free space for extents smaller than specified minlen.
+- It may truncate the last trimmed extent thus leak free space.
+- With mixed extents+bitmaps, some extents may not be trimmed.
+- With mixed extents+bitmaps, some bitmaps may not be trimmed (even
+none will be trimmed). Even for those trimmed, not all the free space
+in the bitmaps will be trimmed.
+
+I rewrite btrfs_trim_block_group() and break it into two functions.
+One is to trim extents only, and the other is to trim bitmaps only.
+
+Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
+Signed-off-by: David Sterba <dsterba@suse.cz>
+---
+ fs/btrfs/free-space-cache.c |  233 ++++++++++++++++++++++++++++++--------------
+ 1 file changed, 163 insertions(+), 70 deletions(-)
+
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -2586,17 +2586,57 @@ void btrfs_init_free_cluster(struct btrf
+ 	cluster->block_group = NULL;
+ }
+ 
+-int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
+-			   u64 *trimmed, u64 start, u64 end, u64 minlen)
++static int do_trimming(struct btrfs_block_group_cache *block_group,
++		       u64 *total_trimmed, u64 start, u64 bytes,
++		       u64 reserved_start, u64 reserved_bytes)
+ {
+-	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+-	struct btrfs_free_space *entry = NULL;
++	struct btrfs_space_info *space_info = block_group->space_info;
+ 	struct btrfs_fs_info *fs_info = block_group->fs_info;
+-	u64 bytes = 0;
+-	u64 actually_trimmed;
+-	int ret = 0;
++	int ret;
++	int update = 0;
++	u64 trimmed = 0;
++
++	spin_lock(&space_info->lock);
++	spin_lock(&block_group->lock);
++	if (!block_group->ro) {
++		block_group->reserved += reserved_bytes;
++		space_info->bytes_reserved += reserved_bytes;
++		update = 1;
++	}
++	spin_unlock(&block_group->lock);
++	spin_unlock(&space_info->lock);
+ 
+-	*trimmed = 0;
++	ret = btrfs_error_discard_extent(fs_info->extent_root,
++					 start, bytes, &trimmed);
++	if (!ret)
++		*total_trimmed += trimmed;
++
++	btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
++
++	if (update) {
++		spin_lock(&space_info->lock);
++		spin_lock(&block_group->lock);
++		if (block_group->ro)
++			space_info->bytes_readonly += reserved_bytes;
++		block_group->reserved -= reserved_bytes;
++		space_info->bytes_reserved -= reserved_bytes;
++		spin_unlock(&space_info->lock);
++		spin_unlock(&block_group->lock);
++	}
++
++	return ret;
++}
++
++static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
++			  u64 *total_trimmed, u64 start, u64 end, u64 minlen)
++{
++	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
++	struct btrfs_free_space *entry;
++	struct rb_node *node;
++	int ret = 0;
++	u64 extent_start;
++	u64 extent_bytes;
++	u64 bytes;
+ 
+ 	while (start < end) {
+ 		spin_lock(&ctl->tree_lock);
+@@ -2607,81 +2647,118 @@ int btrfs_trim_block_group(struct btrfs_
+ 		}
+ 
+ 		entry = tree_search_offset(ctl, start, 0, 1);
+-		if (!entry)
+-			entry = tree_search_offset(ctl,
+-						   offset_to_bitmap(ctl, start),
+-						   1, 1);
+-
+-		if (!entry || entry->offset >= end) {
++		if (!entry) {
+ 			spin_unlock(&ctl->tree_lock);
+ 			break;
+ 		}
+ 
+-		if (entry->bitmap) {
+-			ret = search_bitmap(ctl, entry, &start, &bytes);
+-			if (!ret) {
+-				if (start >= end) {
+-					spin_unlock(&ctl->tree_lock);
+-					break;
+-				}
+-				bytes = min(bytes, end - start);
+-				bitmap_clear_bits(ctl, entry, start, bytes);
+-				if (entry->bytes == 0)
+-					free_bitmap(ctl, entry);
+-			} else {
+-				start = entry->offset + BITS_PER_BITMAP *
+-					block_group->sectorsize;
++		/* skip bitmaps */
++		while (entry->bitmap) {
++			node = rb_next(&entry->offset_index);
++			if (!node) {
+ 				spin_unlock(&ctl->tree_lock);
+-				ret = 0;
+-				continue;
++				goto out;
+ 			}
+-		} else {
+-			start = entry->offset;
+-			bytes = min(entry->bytes, end - start);
+-			unlink_free_space(ctl, entry);
+-			kmem_cache_free(btrfs_free_space_cachep, entry);
++			entry = rb_entry(node, struct btrfs_free_space,
++					 offset_index);
++		}
++
++		if (entry->offset >= end) {
++			spin_unlock(&ctl->tree_lock);
++			break;
+ 		}
+ 
++		extent_start = entry->offset;
++		extent_bytes = entry->bytes;
++		start = max(start, extent_start);
++		bytes = min(extent_start + extent_bytes, end) - start;
++		if (bytes < minlen) {
++			spin_unlock(&ctl->tree_lock);
++			goto next;
++		}
++
++		unlink_free_space(ctl, entry);
++		kmem_cache_free(btrfs_free_space_cachep, entry);
++
+ 		spin_unlock(&ctl->tree_lock);
+ 
+-		if (bytes >= minlen) {
+-			struct btrfs_space_info *space_info;
+-			int update = 0;
+-
+-			space_info = block_group->space_info;
+-			spin_lock(&space_info->lock);
+-			spin_lock(&block_group->lock);
+-			if (!block_group->ro) {
+-				block_group->reserved += bytes;
+-				space_info->bytes_reserved += bytes;
+-				update = 1;
+-			}
+-			spin_unlock(&block_group->lock);
+-			spin_unlock(&space_info->lock);
++		ret = do_trimming(block_group, total_trimmed, start, bytes,
++				  extent_start, extent_bytes);
++		if (ret)
++			break;
++next:
++		start += bytes;
+ 
+-			ret = btrfs_error_discard_extent(fs_info->extent_root,
+-							 start,
+-							 bytes,
+-							 &actually_trimmed);
+-
+-			btrfs_add_free_space(block_group, start, bytes);
+-			if (update) {
+-				spin_lock(&space_info->lock);
+-				spin_lock(&block_group->lock);
+-				if (block_group->ro)
+-					space_info->bytes_readonly += bytes;
+-				block_group->reserved -= bytes;
+-				space_info->bytes_reserved -= bytes;
+-				spin_unlock(&space_info->lock);
+-				spin_unlock(&block_group->lock);
+-			}
++		if (fatal_signal_pending(current)) {
++			ret = -ERESTARTSYS;
++			break;
++		}
++
++		cond_resched();
++	}
++out:
++	return ret;
++}
++
++static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
++			u64 *total_trimmed, u64 start, u64 end, u64 minlen)
++{
++	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
++	struct btrfs_free_space *entry;
++	int ret = 0;
++	int ret2;
++	u64 bytes;
++	u64 offset = offset_to_bitmap(ctl, start);
++
++	while (offset < end) {
++		bool next_bitmap = false;
++
++		spin_lock(&ctl->tree_lock);
+ 
+-			if (ret)
+-				break;
+-			*trimmed += actually_trimmed;
++		if (ctl->free_space < minlen) {
++			spin_unlock(&ctl->tree_lock);
++			break;
++		}
++
++		entry = tree_search_offset(ctl, offset, 1, 0);
++		if (!entry) {
++			spin_unlock(&ctl->tree_lock);
++			next_bitmap = true;
++			goto next;
++		}
++
++		bytes = minlen;
++		ret2 = search_bitmap(ctl, entry, &start, &bytes);
++		if (ret2 || start >= end) {
++			spin_unlock(&ctl->tree_lock);
++			next_bitmap = true;
++			goto next;
++		}
++
++		bytes = min(bytes, end - start);
++		if (bytes < minlen) {
++			spin_unlock(&ctl->tree_lock);
++			goto next;
++		}
++
++		bitmap_clear_bits(ctl, entry, start, bytes);
++		if (entry->bytes == 0)
++			free_bitmap(ctl, entry);
++
++		spin_unlock(&ctl->tree_lock);
++
++		ret = do_trimming(block_group, total_trimmed, start, bytes,
++				  start, bytes);
++		if (ret)
++			break;
++next:
++		if (next_bitmap) {
++			offset += BITS_PER_BITMAP * ctl->unit;
++		} else {
++			start += bytes;
++			if (start >= offset + BITS_PER_BITMAP * ctl->unit)
++				offset += BITS_PER_BITMAP * ctl->unit;
+ 		}
+-		start += bytes;
+-		bytes = 0;
+ 
+ 		if (fatal_signal_pending(current)) {
+ 			ret = -ERESTARTSYS;
+@@ -2693,6 +2770,22 @@ int btrfs_trim_block_group(struct btrfs_
+ 
+ 	return ret;
+ }
++
++int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
++			   u64 *trimmed, u64 start, u64 end, u64 minlen)
++{
++	int ret;
++
++	*trimmed = 0;
++
++	ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
++	if (ret)
++		return ret;
++
++	ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
++
++	return ret;
++}
+ 
+ /*
+  * Find the left-most item in the cache tree, and then return the
diff --git a/3.2.34/patches.suse/btrfs-8007-lock-and-disable-irq-during-space-alloc.patch b/3.2.34/patches.suse/btrfs-8007-lock-and-disable-irq-during-space-alloc.patch
new file mode 100644
index 0000000..900cba4
--- /dev/null
+++ b/3.2.34/patches.suse/btrfs-8007-lock-and-disable-irq-during-space-alloc.patch
@@ -0,0 +1,40 @@
+From e2049e28add8f8fbfa8680fcf5fc49fa3b713ceb Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.cz>
+Date: Tue, 22 Nov 2011 18:05:48 +0100
+Patch-mainline: pending
+References: FATE#306586 bnc#730103
+Subject: [PATCH] btrfs: lock and disable irq during space alloc
+
+This is a workaround.
+
+Signed-off-by: Jeff Mahoney <jeffm@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.cz>
+---
+ fs/btrfs/free-space-cache.c |    4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
+index 7807276..e49c1cd 100644
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -2102,7 +2102,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
+ 	u64 bytes_search = bytes + empty_size;
+ 	u64 ret = 0;
+ 
+-	spin_lock(&ctl->tree_lock);
++	spin_lock_irq(&ctl->tree_lock);
+ 	entry = find_free_space(ctl, &offset, &bytes_search);
+ 	if (!entry)
+ 		goto out;
+@@ -2123,7 +2123,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
+ 	}
+ 
+ out:
+-	spin_unlock(&ctl->tree_lock);
++	spin_unlock_irq(&ctl->tree_lock);
+ 
+ 	return ret;
+ }
+-- 
+1.7.6
+
diff --git a/3.2.34/patches.suse/btrfs-8013-sector-size-check-during-mount.patch b/3.2.34/patches.suse/btrfs-8013-sector-size-check-during-mount.patch
new file mode 100644
index 0000000..2017ed3
--- /dev/null
+++ b/3.2.34/patches.suse/btrfs-8013-sector-size-check-during-mount.patch
@@ -0,0 +1,43 @@
+From: Keith Mannthey <kmannth@us.ibm.com>
+Date: Tue, 29 Nov 2011 17:44:12 -0800
+Patch-mainline: pending
+References: FATE#306586 bnc#724620
+Subject: [PATCH] Sector Size check during Mount
+
+Gracefully fail when trying to mount a BTRFS file system that has a
+sectorsize smaller than PAGE_SIZE.
+
+On PPC it is possible to build a FS while using a 4k PAGE_SIZE kernel
+then boot into a 64K PAGE_SIZE kernel.  Presently open_ctree fails in an
+endless loop and hangs the machine in this situation.
+
+My debugging has show this Sector size < Page size to be a non trivial
+situation and a graceful exit from the situation would be nice for the
+time being.
+
+Signed-off-by:  Keith Mannthey <kmannth@us.ibm.com>
+Signed-off-by: David Sterba <dsterba@suse.cz>
+---
+ fs/btrfs/disk-io.c |    6 ++++++
+ 1 files changed, 6 insertions(+), 0 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 94abc25..1cbfa75 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2230,6 +2230,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
+ 		goto fail_sb_buffer;
+ 	}
+ 
++	if (sectorsize < PAGE_SIZE) {
++		printk(KERN_WARNING "btrfs: Incompatible sector size "
++		       "found on %s\n", sb->s_id);
++		goto fail_sb_buffer;
++	}
++
+ 	mutex_lock(&fs_info->chunk_mutex);
+ 	ret = btrfs_read_sys_array(tree_root);
+ 	mutex_unlock(&fs_info->chunk_mutex);
+-- 
+1.7.6.233.gd79bc
+
diff --git a/3.2.34/patches.suse/btrfs-8014-add-new-ioctl-to-determine-size-of-compressed-.patch b/3.2.34/patches.suse/btrfs-8014-add-new-ioctl-to-determine-size-of-compressed-.patch
new file mode 100644
index 0000000..bd92a74
--- /dev/null
+++ b/3.2.34/patches.suse/btrfs-8014-add-new-ioctl-to-determine-size-of-compressed-.patch
@@ -0,0 +1,158 @@
+From: David Sterba <dsterba@suse.cz>
+Date: Tue, 28 Jun 2011 12:38:06 +0200
+Patch-mainline: pending
+References: FATE#306586
+Subject: [PATCH] btrfs: add new ioctl to determine size of compressed file
+
+Go through all extents of a file in a given [start,end) range and sum
+for:
+* regular extent: ->block_len, size is already rounded up to blocks
+* inline extents: length rounded up to 512
+
+The range is start inclusive / end exclusive. For whole a file pass
+0 and (u64)-1.
+
+The values returned are number of occupied 512B sectors for uncompressed
+and compressed size and  can be easily compared to determine rough
+compression ratio of the given file range.
+
+Based on implementation from Ulrich Hecht,
+http://comments.gmane.org/gmane.comp.file-systems.btrfs/6253
+
+Signed-off-by: David Sterba <dsterba@suse.cz>
+---
+ fs/btrfs/ioctl.c |   83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ fs/btrfs/ioctl.h |   12 ++++++++
+ 2 files changed, 95 insertions(+), 0 deletions(-)
+
+diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
+index c04f02c..91e6ab8 100644
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -2972,6 +2972,86 @@ static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
+ 	return 0;
+ }
+ 
++/*
++ * Returns the compressed size of an inode in 512 byte blocks.
++ * Count the on-disk space used by extents starting in range [start, end),
++ * inline data are rounded up to sector, ie. 512.
++ *
++ * The range is start inclusive and end exclusive so it can be used to
++ * determine compressed size of a given extent by its start and start of the
++ * next extent easily, without counting length.
++ * Whole file is specified as start = 0, end = (u64)-1
++ */
++static long btrfs_ioctl_compr_size(struct file *file, void __user *argp)
++{
++	struct inode *inode = fdentry(file)->d_inode;
++	struct btrfs_ioctl_compr_size_args compr_args;
++	u64 len;
++	u64 compressed_size = 0;
++	u64 size = 0;
++	u64 offset = 0;
++
++	if (S_ISDIR(inode->i_mode))
++		return -EISDIR;
++
++	if (copy_from_user(&compr_args, argp,
++				sizeof(struct btrfs_ioctl_compr_size_args)))
++		return -EFAULT;
++
++	if (compr_args.start > compr_args.end)
++		return -EINVAL;
++
++	mutex_lock(&inode->i_mutex);
++
++	offset = compr_args.start;
++	if (inode->i_size > compr_args.end)
++		len = compr_args.end;
++	else
++		len = inode->i_size;
++
++	/*
++	 * do any pending delalloc/csum calc on inode, one way or
++	 * another, and lock file content
++	 */
++	btrfs_wait_ordered_range(inode, compr_args.start, len);
++
++	lock_extent(&BTRFS_I(inode)->io_tree, compr_args.start, len, GFP_NOFS);
++
++	while (offset < len) {
++		struct extent_map *em;
++
++		em = btrfs_get_extent(inode, NULL, 0, offset, 1, 0);
++		if (IS_ERR_OR_NULL(em))
++			goto error;
++		if (em->block_len != (u64)-1) {
++			compressed_size += em->block_len;
++			size += ALIGN(em->len, inode->i_sb->s_blocksize);
++		} else if (em->block_start == EXTENT_MAP_INLINE) {
++			compressed_size += ALIGN(em->len, 512);
++			size += ALIGN(em->len, 512);
++		}
++		offset += em->len;
++		free_extent_map(em);
++	}
++	unlock_extent(&BTRFS_I(inode)->io_tree, compr_args.start, len, GFP_NOFS);
++	mutex_unlock(&inode->i_mutex);
++
++	compr_args.size = size >> 9;
++	compr_args.compressed_size = compressed_size >> 9;
++
++	if (copy_to_user(argp, &compr_args,
++				sizeof(struct btrfs_ioctl_compr_size_args)))
++		return -EFAULT;
++
++	return 0;
++
++error:
++	unlock_extent(&BTRFS_I(inode)->io_tree, compr_args.start, len, GFP_NOFS);
++	mutex_unlock(&inode->i_mutex);
++
++	return -EIO;
++}
++
+ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
+ 					void __user *arg)
+ {
+@@ -3110,6 +3190,8 @@ long btrfs_ioctl(struct file *file, unsigned int
+ 		return btrfs_ioctl_scrub_cancel(root, argp);
+ 	case BTRFS_IOC_SCRUB_PROGRESS:
+ 		return btrfs_ioctl_scrub_progress(root, argp);
++	case BTRFS_IOC_COMPR_SIZE:
++		return btrfs_ioctl_compr_size(file, argp);
+ 	}
+ 
+ 	return -ENOTTY;
+diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
+--- a/fs/btrfs/ioctl.h
++++ b/fs/btrfs/ioctl.h
+@@ -217,6 +217,16 @@ struct btrfs_ioctl_logical_ino_args {
+ 	__u64				inodes;
+ };
+ 
++struct btrfs_ioctl_compr_size_args {
++	/* Range start, inclusive */
++	__u64	start;				/* in */
++	/* Range end, exclusive */
++	__u64	end;				/* in */
++	__u64	size;				/* out */
++	__u64	compressed_size;		/* out */
++	__u64	reserved[2];
++};
++
+ #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
+ 				   struct btrfs_ioctl_vol_args)
+ #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
+@@ -276,5 +286,7 @@ struct btrfs_ioctl_logical_ino_args {
+ 					struct btrfs_ioctl_ino_path_args)
+ #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
+ 					struct btrfs_ioctl_ino_path_args)
++#define BTRFS_IOC_COMPR_SIZE _IOR(BTRFS_IOCTL_MAGIC, 51, \
++				struct btrfs_ioctl_compr_size_args)
+ 
+ #endif
+-- 
+1.7.7.3
+
diff --git a/3.2.34/patches.suse/btrfs-8015-make-lzo-the-default-compression-scheme.patch b/3.2.34/patches.suse/btrfs-8015-make-lzo-the-default-compression-scheme.patch
new file mode 100644
index 0000000..1a647aa
--- /dev/null
+++ b/3.2.34/patches.suse/btrfs-8015-make-lzo-the-default-compression-scheme.patch
@@ -0,0 +1,68 @@
+From: Li Zefan <lizf@cn.fujitsu.com>
+Date: Thu, 26 May 2011 11:39:03 +0800
+Patch-mainline: pending
+References: FATE#306586
+Subject: [PATCH] Btrfs: make lzo the default compression scheme
+
+As the lzo compression feature has been established for quite
+a while, we are now ready to replace zlib with lzo as the default
+compression scheme.
+
+Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
+Signed-off-by: David Sterba <dsterba@suse.cz>
+---
+ fs/btrfs/disk-io.c |    2 +-
+ fs/btrfs/ioctl.c   |    2 +-
+ fs/btrfs/super.c   |    8 ++++----
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 94abc25..7ea0cdd 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2095,7 +2095,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
+ 	 * In the long term, we'll store the compression type in the super
+ 	 * block, and it'll be used for per file compression control.
+ 	 */
+-	fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
++	fs_info->compress_type = BTRFS_COMPRESS_LZO;
+ 
+ 	ret = btrfs_parse_options(tree_root, options);
+ 	if (ret) {
+diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
+index a90e749..d9c2ba6 100644
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -992,7 +992,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
+ 	unsigned long ra_index = 0;
+ 	int ret;
+ 	int defrag_count = 0;
+-	int compress_type = BTRFS_COMPRESS_ZLIB;
++	int compress_type = BTRFS_COMPRESS_LZO;
+ 	int extent_thresh = range->extent_thresh;
+ 	int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
+ 	int cluster = max_cluster;
+diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
+index 8bd9d6d..b6b5bd7 100644
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -270,12 +270,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
+ 		case Opt_compress_type:
+ 			if (token == Opt_compress ||
+ 			    token == Opt_compress_force ||
+-			    strcmp(args[0].from, "zlib") == 0) {
+-				compress_type = "zlib";
+-				info->compress_type = BTRFS_COMPRESS_ZLIB;
+-			} else if (strcmp(args[0].from, "lzo") == 0) {
++			    strcmp(args[0].from, "lzo") == 0) {
+ 				compress_type = "lzo";
+ 				info->compress_type = BTRFS_COMPRESS_LZO;
++			} else if (strcmp(args[0].from, "zlib") == 0) {
++				compress_type = "zlib";
++				info->compress_type = BTRFS_COMPRESS_ZLIB;
+ 			} else {
+ 				ret = -EINVAL;
+ 				goto out;
+-- 
+1.7.6
+
diff --git a/3.2.34/patches.suse/btrfs-8024-workaround-for-cleaner-deadlock.patch b/3.2.34/patches.suse/btrfs-8024-workaround-for-cleaner-deadlock.patch
new file mode 100644
index 0000000..c6767c4
--- /dev/null
+++ b/3.2.34/patches.suse/btrfs-8024-workaround-for-cleaner-deadlock.patch
@@ -0,0 +1,32 @@
+From: David Sterba <dsterba@suse.cz>
+Date: Thu, 15 Dec 2011 02:10:55 +0100
+Patch-mainline: pending
+References: FATE#306586
+Subject: [PATCH] btrfs: workaround for cleaner deadlock
+
+Signed-off-by: David Sterba <dsterba@suse.cz>
+---
+ fs/btrfs/disk-io.c |    2 ++
+ 1 files changed, 2 insertions(+), 0 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 3f9d555..12d785b 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -1572,11 +1572,13 @@ static int cleaner_kthread(void *arg)
+ 		vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
+ 
+ 		if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
++			down_read_trylock(&root->fs_info->sb->s_umount) &&
+ 		    mutex_trylock(&root->fs_info->cleaner_mutex)) {
+ 			btrfs_run_delayed_iputs(root);
+ 			btrfs_clean_old_snapshots(root);
+ 			mutex_unlock(&root->fs_info->cleaner_mutex);
+ 			btrfs_run_defrag_inodes(root->fs_info);
++			up_read(&root->fs_info->sb->s_umount);
+ 		}
+ 
+ 		if (freezing(current)) {
+-- 
+1.7.7.3
+
diff --git a/3.2.34/patches.suse/btrfs-8025-update-global-block_rsv-when-creating-a-new-bl.patch b/3.2.34/patches.suse/btrfs-8025-update-global-block_rsv-when-creating-a-new-bl.patch
new file mode 100644
index 0000000..13c4ebc
--- /dev/null
+++ b/3.2.34/patches.suse/btrfs-8025-update-global-block_rsv-when-creating-a-new-bl.patch
@@ -0,0 +1,61 @@
+From: Li Zefan <lizf@cn.fujitsu.com>
+Date: Wed, 7 Dec 2011 13:12:59 +0800
+Patch-mainline: pending
+References: FATE#306586
+Subject: [PATCH] Btrfs: update global block_rsv when creating a new block
+ group
+
+A bug was triggered while using seed device:
+
+    # mkfs.btrfs /dev/loop1
+    # btrfstune -S 1 /dev/loop1
+    # mount -o /dev/loop1 /mnt
+    # btrfs dev add /dev/loop2 /mnt
+
+btrfs: block rsv returned -28
+------------[ cut here ]------------
+WARNING: at fs/btrfs/extent-tree.c:5969 btrfs_alloc_free_block+0x166/0x396 [btrfs]()
+...
+Call Trace:
+...
+[<f7b7c31c>] btrfs_cow_block+0x101/0x147 [btrfs]
+[<f7b7eaa6>] btrfs_search_slot+0x1b8/0x55f [btrfs]
+[<f7b7f844>] btrfs_insert_empty_items+0x42/0x7f [btrfs]
+[<f7b7f8c1>] btrfs_insert_item+0x40/0x7e [btrfs]
+[<f7b8ac02>] btrfs_make_block_group+0x243/0x2aa [btrfs]
+[<f7bb3f53>] __btrfs_alloc_chunk+0x672/0x70e [btrfs]
+[<f7bb41ff>] init_first_rw_device+0x77/0x13c [btrfs]
+[<f7bb5a62>] btrfs_init_new_device+0x664/0x9fd [btrfs]
+[<f7bbb65a>] btrfs_ioctl+0x694/0xdbe [btrfs]
+[<c04f55f7>] do_vfs_ioctl+0x496/0x4cc
+[<c04f5660>] sys_ioctl+0x33/0x4f
+[<c07b9edf>] sysenter_do_call+0x12/0x38
+---[ end trace 906adac595facc7d ]---
+
+Since seed device is readonly, there's no usable space in the filesystem.
+Afterwards we add a sprout device to it, and the kernel creates a METADATA
+block group and a SYSTEM block group where comes free space we can reserve,
+but we still get revervation failure because the global block_rsv hasn't
+been updated accordingly.
+
+Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
+Signed-off-by: David Sterba <dsterba@suse.cz>
+---
+ fs/btrfs/extent-tree.c |    1 +
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index 8861572..a80efb5 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -7476,6 +7476,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
+ 	ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
+ 				&cache->space_info);
+ 	BUG_ON(ret);
++	update_global_block_rsv(root->fs_info);
+ 
+ 	spin_lock(&cache->space_info->lock);
+ 	cache->space_info->bytes_readonly += cache->bytes_super;
+-- 
+1.7.6.233.gd79bc
+
diff --git a/3.2.34/patches.suse/btrfs-8026-fix-possible-deadlock-when-opening-a-seed-devi.patch b/3.2.34/patches.suse/btrfs-8026-fix-possible-deadlock-when-opening-a-seed-devi.patch
new file mode 100644
index 0000000..cc134cb
--- /dev/null
+++ b/3.2.34/patches.suse/btrfs-8026-fix-possible-deadlock-when-opening-a-seed-devi.patch
@@ -0,0 +1,84 @@
+From: Li Zefan <lizf@cn.fujitsu.com>
+Date: Wed, 7 Dec 2011 13:13:26 +0800
+Patch-mainline: pending
+References: FATE#306586
+Subject: [PATCH] Btrfs: fix possible deadlock when opening a seed device
+
+The correct lock order is uuid_mutex -> volume_mutex -> chunk_mutex,
+but when we mount a filesystem which has backing seed devices, we have
+this lock chain:
+
+    open_ctree()
+        lock(chunk_mutex);
+        read_chunk_tree();
+            read_one_dev();
+                open_seed_devices();
+                    lock(uuid_mutex);
+
+and then we hit a lockdep splat.
+
+Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
+Signed-off-by: David Sterba <dsterba@suse.cz>
+---
+ fs/btrfs/disk-io.c |    2 --
+ fs/btrfs/volumes.c |    9 +++++++--
+ 2 files changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index d0bc3c5..beb1d19 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2287,9 +2287,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
+ 	   (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
+ 	   BTRFS_UUID_SIZE);
+ 
+-	mutex_lock(&fs_info->chunk_mutex);
+ 	ret = btrfs_read_chunk_tree(chunk_root);
+-	mutex_unlock(&fs_info->chunk_mutex);
+ 	if (ret) {
+ 		printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n",
+ 		       sb->s_id);
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index d136915..fc94228 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -4264,7 +4264,7 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
+ 	struct btrfs_fs_devices *fs_devices;
+ 	int ret;
+ 
+-	mutex_lock(&uuid_mutex);
++	BUG_ON(!mutex_is_locked(&uuid_mutex));
+ 
+ 	fs_devices = root->fs_info->fs_devices->seed;
+ 	while (fs_devices) {
+@@ -4302,7 +4302,6 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
+ 	fs_devices->seed = root->fs_info->fs_devices->seed;
+ 	root->fs_info->fs_devices->seed = fs_devices;
+ out:
+-	mutex_unlock(&uuid_mutex);
+ 	return ret;
+ }
+ 
+@@ -4459,6 +4458,9 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
+ 	if (!path)
+ 		return -ENOMEM;
+ 
++	mutex_lock(&uuid_mutex);
++	lock_chunks(root);
++
+ 	/* first we search for all of the device items, and then we
+ 	 * read in all of the chunk items.  This way we can create chunk
+ 	 * mappings that reference all of the devices that are afound
+@@ -4509,6 +4511,9 @@ again:
+ 	}
+ 	ret = 0;
+ error:
++	unlock_chunks(root);
++	mutex_unlock(&uuid_mutex);
++
+ 	btrfs_free_path(path);
+ 	return ret;
+ }
+-- 
+1.7.6.233.gd79bc
+
diff --git a/3.2.34/patches.suse/btrfs-allow-cross-subvolume-file-clone.patch b/3.2.34/patches.suse/btrfs-allow-cross-subvolume-file-clone.patch
new file mode 100644
index 0000000..70d3975
--- /dev/null
+++ b/3.2.34/patches.suse/btrfs-allow-cross-subvolume-file-clone.patch
@@ -0,0 +1,47 @@
+From: David Sterba <dsterba@suse.cz>
+Date: Mon, 1 Aug 2011 18:11:57 +0200
+Subject: [PATCH] btrfs: allow cross-subvolume file clone
+Reference: bnc#698540
+Patch-mainline: pending
+
+Lift the EXDEV condition and allow different root trees for files being
+cloned, then pass source inode's root when searching for extents.
+
+Signed-off-by: David Sterba <dsterba@suse.cz>
+---
+ fs/btrfs/ioctl.c |    7 ++++---
+ 1 files changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
+index 0b980af..58eb0ef 100644
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -2183,7 +2183,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
+ 		goto out_fput;
+ 
+ 	ret = -EXDEV;
+-	if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root)
++	if (src->i_sb != inode->i_sb)
+ 		goto out_fput;
+ 
+ 	ret = -ENOMEM;
+@@ -2247,13 +2247,14 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
+ 		 * note the key will change type as we walk through the
+ 		 * tree.
+ 		 */
+-		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
++		ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
++				0, 0);
+ 		if (ret < 0)
+ 			goto out;
+ 
+ 		nritems = btrfs_header_nritems(path->nodes[0]);
+ 		if (path->slots[0] >= nritems) {
+-			ret = btrfs_next_leaf(root, path);
++			ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
+ 			if (ret < 0)
+ 				goto out;
+ 			if (ret > 0)
+-- 
+1.7.6
+
diff --git a/3.2.34/series b/3.2.34/series
new file mode 100644
index 0000000..92b8362
--- /dev/null
+++ b/3.2.34/series
@@ -0,0 +1,68 @@
+bump/1021_linux-3.2.22.patch
+bump/1022_linux-3.2.23.patch
+bump/1023_linux-3.2.24.patch
+bump/1024_linux-3.2.25.patch
+bump/1025_linux-3.2.26.patch
+bump/1026_linux-3.2.27.patch
+bump/1027_linux-3.2.28.patch
+bump/1028_linux-3.2.29.patch
+bump/1029_linux-3.2.30.patch
+bump/1030_linux-3.2.31.patch
+bump/1031_linux-3.2.32.patch
+bump/1032_linux-3.2.33.patch
+bump/1033_linux-3.2.34.patch
+
+
+0001-block-prepare-I-O-context-code-for-BFQ-v5-for-3.2.patch
+0002-block-cgroups-kconfig-build-bits-for-BFQ-v5-3.2.patch
+0003-block-introduce-the-BFQ-v5-I-O-sched-for-3.2.patch
+
+0001-AppArmor-compatibility-patch-for-v5-network-controll.patch
+0002-AppArmor-compatibility-patch-for-v5-interface.patch
+0003-AppArmor-Allow-dfa-backward-compatibility-with-broke.patch
+
+cloneconfig.patch
+kbuild-compress-kernel-modules-on-installation.patch
+ata-prefer-ata-drivers-over-ide-drivers-when-both-are-built.patch
+colored-printk-3.2.33.patch
+910-kobject_uevent.patch
+911-kobject_add_broadcast_uevent.patch
+
+linux-2.6-x86-tune-generic.patch
+hz-432-kconfig-option.patch
+hz-864-kconfig-option.patch
+
+Add_CONFIG_VFAT_FS_DUALNAMES_option.patch
+linux-2.6-defaults-fat-utf8.patch
+aufs3-standalone-3.2.patch
+accessfs-3.2-0.26.patch
+wrapfs-v3.2.2-45-ga5296eb.patch
+
+imqmq-3.2.patch
+
+vserver-3.2.34-vs2.3.2.15.patch
+uksm-0.1.2.1-for-v3.2.ge.31.patch
+kernel-3.4.0-layer7-2.22.patch
+net-netfilter-IFWLOG.patch
+net-netfilter-IFWLOG-mdv.patch
+net-netfilter-IFWLOG-2.6.35-buildfix.patch
+net-netfilter-IFWLOG-2.6.37-buildfix.patch
+net-netfilter-psd.patch
+net-netfilter-psd-mdv.patch
+net-netfilter-psd-2.6.35-buildfix.patch
+netfilter-implement-rfc-1123-for-ftp-conntrack.patch
+netfilter-ip_conntrack_slp.patch
+
+kernel-3.2-lsxhl.patch
+kernel-3.2-lsproduo.patch
+kernel-3.2-lsql.patch
+v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-VL.patch
+v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-WVL.patch
+lschlv2.patch
+
+3rd-3rdparty-1.0-tree.patch
+3rd-3rdparty-merge.patch
+3rd-3rdparty-netatop-0.1.1.patch
+3rd-3rdparty-button_hotplug-0.4.1.patch
+3rd-3rdparty-gpio_button_hotplug-0.1.patch
+3rd-3rdparty-gpio_event_drv-0.1.patch
\ No newline at end of file
diff --git a/3.2.34/uksm-0.1.2.1-for-v3.2.ge.31.patch b/3.2.34/uksm-0.1.2.1-for-v3.2.ge.31.patch
new file mode 100644
index 0000000..9036595
--- /dev/null
+++ b/3.2.34/uksm-0.1.2.1-for-v3.2.ge.31.patch
@@ -0,0 +1,7032 @@
+diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX
+index 5481c8b..7141876 100644
+--- a/Documentation/vm/00-INDEX
++++ b/Documentation/vm/00-INDEX
+@@ -14,6 +14,8 @@ hwpoison.txt
+ 	- explains what hwpoison is
+ ksm.txt
+ 	- how to use the Kernel Samepage Merging feature.
++uksm.txt
++	- Introduction to Ultra KSM
+ locking
+ 	- info on how locking and synchronization is done in the Linux vm code.
+ map_hugetlb.c
+diff --git a/Documentation/vm/uksm.txt b/Documentation/vm/uksm.txt
+new file mode 100644
+index 0000000..d4aaae8
+--- /dev/null
++++ b/Documentation/vm/uksm.txt
+@@ -0,0 +1,56 @@
++The Ultra Kernel Samepage Merging feature
++----------------------------------------------
++/*
++ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia
++ *
++ * This is an improvement upon KSM. Some basic data structures and routines
++ * are borrowed from ksm.c .
++ *
++ * Its new features:
++ * 1. Full system scan:
++ *      It automatically scans all user processes' anonymous VMAs. Kernel-user
++ *      interaction to submit a memory area to KSM is no longer needed.
++ *
++ * 2. Rich area detection:
++ *      It automatically detects rich areas containing abundant duplicated
++ *      pages based. Rich areas are given a full scan speed. Poor areas are
++ *      sampled at a reasonable speed with very low CPU consumption.
++ *
++ * 3. Ultra Per-page scan speed improvement:
++ *      A new hash algorithm is proposed. As a result, on a machine with
++ *      Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it
++ *      can scan memory areas that does not contain duplicated pages at speed of
++ *      627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of
++ *      477MB/sec ~ 923MB/sec.
++ *
++ * 4. Thrashing area avoidance:
++ *      Thrashing area(an VMA that has frequent Ksm page break-out) can be
++ *      filtered out. My benchmark shows it's more efficient than KSM's per-page
++ *      hash value based volatile page detection.
++ *
++ *
++ * 5. Misc changes upon KSM:
++ *      * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page
++ *        comparison. It's much faster than default C version on x86.
++ *      * rmap_item now has an struct *page member to loosely cache a
++ *        address-->page mapping, which reduces too much time-costly
++ *        follow_page().
++ *      * The VMA creation/exit procedures are hooked to let the Ultra KSM know.
++ *      * try_to_merge_two_pages() now can revert a pte if it fails. No break_
++ *        ksm is needed for this case.
++ *
++ * 6. Full Zero Page consideration(contributed by Figo Zhang)
++ *    Now uksmd consider full zero pages as special pages and merge them to an
++ *    special unswappable uksm zero page.
++ */
++
++ChangeLog:
++
++2012-05-05 The creation of this Doc
++2012-05-08 UKSM 0.1.1.1 libc crash bug fix, api clean up, doc clean up.
++2012-05-28 UKSM 0.1.1.2 bug fix release
++2012-06-26 UKSM 0.1.2-beta1 first beta release for 0.1.2
++2012-07-2  UKSM 0.1.2-beta2
++2012-07-10 UKSM 0.1.2-beta3
++2012-07-26 UKSM 0.1.2 Fine grained speed control, more scan optimization.
++2012-10-13 UKSM 0.1.2.1 Bug fixes.
+diff --git a/fs/exec.c b/fs/exec.c
+index 160cd2f..ae68311 100644
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -19,7 +19,7 @@
+  * current->executable is only used by the procfs.  This allows a dispatch
+  * table to check for several different types  of binary formats.  We keep
+  * trying until we recognize the file or we run out of supported binary
+- * formats. 
++ * formats.
+  */
+ 
+ #include <linux/slab.h>
+@@ -55,6 +55,7 @@
+ #include <linux/pipe_fs_i.h>
+ #include <linux/oom.h>
+ #include <linux/compat.h>
++#include <linux/ksm.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/mmu_context.h>
+@@ -85,7 +86,7 @@ int __register_binfmt(struct linux_binfmt * fmt, int insert)
+ 	insert ? list_add(&fmt->lh, &formats) :
+ 		 list_add_tail(&fmt->lh, &formats);
+ 	write_unlock(&binfmt_lock);
+-	return 0;	
++	return 0;
+ }
+ 
+ EXPORT_SYMBOL(__register_binfmt);
+@@ -1169,7 +1170,7 @@ void setup_new_exec(struct linux_binprm * bprm)
+ 	   group */
+ 
+ 	current->self_exec_id++;
+-			
++
+ 	flush_signal_handlers(current, 0);
+ 	flush_old_files(current->files);
+ }
+@@ -1264,8 +1265,8 @@ int check_unsafe_exec(struct linux_binprm *bprm)
+ 	return res;
+ }
+ 
+-/* 
+- * Fill the binprm structure from the inode. 
++/*
++ * Fill the binprm structure from the inode.
+  * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
+  *
+  * This may be called multiple times for binary chains (scripts for example).
+diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
+index 80e4645..33f9e9b 100644
+--- a/fs/proc/meminfo.c
++++ b/fs/proc/meminfo.c
+@@ -87,6 +87,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
+ 		"SUnreclaim:     %8lu kB\n"
+ 		"KernelStack:    %8lu kB\n"
+ 		"PageTables:     %8lu kB\n"
++#ifdef CONFIG_UKSM
++		"KsmZeroPages:   %8lu kB\n"
++#endif
+ #ifdef CONFIG_QUICKLIST
+ 		"Quicklists:     %8lu kB\n"
+ #endif
+@@ -146,6 +149,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
+ 		K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
+ 		global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024,
+ 		K(global_page_state(NR_PAGETABLE)),
++#ifdef CONFIG_UKSM
++		K(global_page_state(NR_UKSM_ZERO_PAGES)),
++#endif
+ #ifdef CONFIG_QUICKLIST
+ 		K(quicklist_total_size()),
+ #endif
+diff --git a/include/linux/ksm.h b/include/linux/ksm.h
+index 3319a69..f4edf33 100644
+--- a/include/linux/ksm.h
++++ b/include/linux/ksm.h
+@@ -22,21 +22,6 @@ struct page *ksm_does_need_to_copy(struct page *page,
+ #ifdef CONFIG_KSM
+ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+ 		unsigned long end, int advice, unsigned long *vm_flags);
+-int __ksm_enter(struct mm_struct *mm);
+-void __ksm_exit(struct mm_struct *mm);
+-
+-static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+-{
+-	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
+-		return __ksm_enter(mm);
+-	return 0;
+-}
+-
+-static inline void ksm_exit(struct mm_struct *mm)
+-{
+-	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
+-		__ksm_exit(mm);
+-}
+ 
+ /*
+  * A KSM page is one of those write-protected "shared pages" or "merged pages"
+@@ -90,6 +75,33 @@ int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
+ 		  struct vm_area_struct *, unsigned long, void *), void *arg);
+ void ksm_migrate_page(struct page *newpage, struct page *oldpage);
+ 
++#ifdef CONFIG_KSM_LEGACY
++int __ksm_enter(struct mm_struct *mm);
++void __ksm_exit(struct mm_struct *mm);
++static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
++{
++	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
++		return __ksm_enter(mm);
++	return 0;
++}
++
++static inline void ksm_exit(struct mm_struct *mm)
++{
++	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
++		__ksm_exit(mm);
++}
++
++#elif defined(CONFIG_UKSM)
++static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
++{
++	return 0;
++}
++
++static inline void ksm_exit(struct mm_struct *mm)
++{
++}
++#endif /* !CONFIG_UKSM */
++
+ #else  /* !CONFIG_KSM */
+ 
+ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+@@ -142,4 +154,6 @@ static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
+ #endif /* CONFIG_MMU */
+ #endif /* !CONFIG_KSM */
+ 
++#include <linux/uksm.h>
++
+ #endif /* __LINUX_KSM_H */
+diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
+index 5b42f1b..7a09663 100644
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -253,6 +253,9 @@ struct vm_area_struct {
+ #ifdef CONFIG_NUMA
+ 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
+ #endif
++#ifdef CONFIG_UKSM
++	struct vma_slot *uksm_vma_slot;
++#endif
+ };
+ 
+ struct core_thread {
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index 25842b6..d2b8dba 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -116,6 +116,9 @@ enum zone_stat_item {
+ 	NUMA_OTHER,		/* allocation from other node */
+ #endif
+ 	NR_ANON_TRANSPARENT_HUGEPAGES,
++#ifdef CONFIG_UKSM
++	NR_UKSM_ZERO_PAGES,
++#endif
+ 	NR_VM_ZONE_STAT_ITEMS };
+ 
+ /*
+@@ -360,7 +363,7 @@ struct zone {
+ 	ZONE_PADDING(_pad1_)
+ 
+ 	/* Fields commonly accessed by the page reclaim scanner */
+-	spinlock_t		lru_lock;	
++	spinlock_t		lru_lock;
+ 	struct zone_lru {
+ 		struct list_head list;
+ 	} lru[NR_LRU_LISTS];
+@@ -745,7 +748,7 @@ static inline int is_normal_idx(enum zone_type idx)
+ }
+ 
+ /**
+- * is_highmem - helper function to quickly check if a struct zone is a 
++ * is_highmem - helper function to quickly check if a struct zone is a
+  *              highmem zone or not.  This is an attempt to keep references
+  *              to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
+  * @zone - pointer to struct zone variable
+diff --git a/include/linux/sradix-tree.h b/include/linux/sradix-tree.h
+new file mode 100644
+index 0000000..6780fdb
+--- /dev/null
++++ b/include/linux/sradix-tree.h
+@@ -0,0 +1,77 @@
++#ifndef _LINUX_SRADIX_TREE_H
++#define _LINUX_SRADIX_TREE_H
++
++
++#define INIT_SRADIX_TREE(root, mask)					\
++do {									\
++	(root)->height = 0;						\
++	(root)->gfp_mask = (mask);					\
++	(root)->rnode = NULL;						\
++} while (0)
++
++#define ULONG_BITS	(sizeof(unsigned long) * 8)
++#define SRADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
++//#define SRADIX_TREE_MAP_SHIFT	6
++//#define SRADIX_TREE_MAP_SIZE	(1UL << SRADIX_TREE_MAP_SHIFT)
++//#define SRADIX_TREE_MAP_MASK	(SRADIX_TREE_MAP_SIZE-1)
++
++struct sradix_tree_node {
++	unsigned int	height;		/* Height from the bottom */
++	unsigned int	count;		
++	unsigned int	fulls;		/* Number of full sublevel trees */ 
++	struct sradix_tree_node *parent;
++	void *stores[0];
++};
++
++/* A simple radix tree implementation */
++struct sradix_tree_root {
++        unsigned int            height;
++        struct sradix_tree_node *rnode;
++
++	/* Where found to have available empty stores in its sublevels */
++        struct sradix_tree_node *enter_node;
++	unsigned int shift;
++	unsigned int stores_size;
++	unsigned int mask;
++	unsigned long min;	/* The first hole index */
++	unsigned long num;
++	//unsigned long *height_to_maxindex;
++
++	/* How the node is allocated and freed. */
++	struct sradix_tree_node *(*alloc)(void); 
++	void (*free)(struct sradix_tree_node *node);
++
++	/* When a new node is added and removed */
++	void (*extend)(struct sradix_tree_node *parent, struct sradix_tree_node *child);
++	void (*assign)(struct sradix_tree_node *node, unsigned index, void *item);
++	void (*rm)(struct sradix_tree_node *node, unsigned offset);
++};
++
++struct sradix_tree_path {
++	struct sradix_tree_node *node;
++	int offset;
++};
++
++static inline 
++void init_sradix_tree_root(struct sradix_tree_root *root, unsigned long shift)
++{
++	root->height = 0;
++	root->rnode = NULL;
++	root->shift = shift;
++	root->stores_size = 1UL << shift;
++	root->mask = root->stores_size - 1;
++}
++
++
++extern void *sradix_tree_next(struct sradix_tree_root *root,
++		       struct sradix_tree_node *node, unsigned long index,
++		       int (*iter)(void *, unsigned long));
++
++extern int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num);
++
++extern void sradix_tree_delete_from_leaf(struct sradix_tree_root *root, 
++			struct sradix_tree_node *node, unsigned long index);
++
++extern void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index);
++
++#endif /* _LINUX_SRADIX_TREE_H */
+diff --git a/include/linux/uksm.h b/include/linux/uksm.h
+new file mode 100644
+index 0000000..361eee2
+--- /dev/null
++++ b/include/linux/uksm.h
+@@ -0,0 +1,145 @@
++#ifndef __LINUX_UKSM_H
++#define __LINUX_UKSM_H
++/*
++ * Memory merging support.
++ *
++ * This code enables dynamic sharing of identical pages found in different
++ * memory areas, even if they are not shared by fork().
++ */
++
++/* if !CONFIG_UKSM this file should not be compiled at all. */
++#ifdef CONFIG_UKSM
++
++#include <linux/bitops.h>
++#include <linux/mm.h>
++#include <linux/pagemap.h>
++#include <linux/rmap.h>
++#include <linux/sched.h>
++
++extern unsigned long zero_pfn __read_mostly;
++extern unsigned long uksm_zero_pfn __read_mostly;
++extern struct page *empty_uksm_zero_page;
++
++/* must be done before linked to mm */
++extern void uksm_vma_add_new(struct vm_area_struct *vma);
++extern void uksm_remove_vma(struct vm_area_struct *vma);
++
++#define UKSM_SLOT_NEED_SORT	(1 << 0)
++#define UKSM_SLOT_NEED_RERAND 	(1 << 1)
++#define UKSM_SLOT_SCANNED     	(1 << 2) /* It's scanned in this round */
++#define UKSM_SLOT_FUL_SCANNED 	(1 << 3)
++#define UKSM_SLOT_IN_UKSM 	(1 << 4)
++
++struct vma_slot {
++	struct sradix_tree_node *snode;
++	unsigned long sindex;
++
++	struct list_head slot_list;
++	unsigned long fully_scanned_round;
++	unsigned long dedup_num;
++	unsigned long pages_scanned;
++	unsigned long last_scanned;
++	unsigned long pages_to_scan;
++	struct scan_rung *rung;
++	struct page **rmap_list_pool;
++	unsigned int *pool_counts;
++	unsigned long pool_size;
++	struct vm_area_struct *vma;
++	struct mm_struct *mm;
++	unsigned long ctime_j;
++	unsigned long pages;
++	unsigned long flags;
++	unsigned long pages_cowed; /* pages cowed this round */
++	unsigned long pages_merged; /* pages merged this round */
++	unsigned long pages_bemerged;
++
++	/* when it has page merged in this eval round */
++	struct list_head dedup_list;
++};
++
++static inline void uksm_unmap_zero_page(pte_t pte)
++{
++	if (pte_pfn(pte) == uksm_zero_pfn)
++		__dec_zone_page_state(empty_uksm_zero_page, NR_UKSM_ZERO_PAGES);
++}
++
++static inline void uksm_map_zero_page(pte_t pte)
++{
++	if (pte_pfn(pte) == uksm_zero_pfn)
++		__inc_zone_page_state(empty_uksm_zero_page, NR_UKSM_ZERO_PAGES);
++}
++
++static inline void uksm_cow_page(struct vm_area_struct *vma, struct page *page)
++{
++	if (vma->uksm_vma_slot && PageKsm(page))
++		vma->uksm_vma_slot->pages_cowed++;
++}
++
++static inline void uksm_cow_pte(struct vm_area_struct *vma, pte_t pte)
++{
++	if (vma->uksm_vma_slot && pte_pfn(pte) == uksm_zero_pfn)
++		vma->uksm_vma_slot->pages_cowed++;
++}
++
++static inline int uksm_flags_can_scan(unsigned long vm_flags)
++{
++	return !(vm_flags & (VM_PFNMAP | VM_IO  | VM_DONTEXPAND |
++				  VM_RESERVED  | VM_HUGETLB | VM_INSERTPAGE |
++				  VM_NONLINEAR | VM_MIXEDMAP | VM_SAO |
++				  VM_SHARED  | VM_MAYSHARE | VM_GROWSUP
++				  | VM_GROWSDOWN));
++}
++
++static inline void uksm_vm_flags_mod(unsigned long *vm_flags_p)
++{
++	if (uksm_flags_can_scan(*vm_flags_p))
++		*vm_flags_p |= VM_MERGEABLE;
++}
++
++/*
++ * Just a wrapper for BUG_ON for where ksm_zeropage must not be. TODO: it will
++ * be removed when uksm zero page patch is stable enough.
++ */
++static inline void uksm_bugon_zeropage(pte_t pte)
++{
++	BUG_ON(pte_pfn(pte) == uksm_zero_pfn);
++}
++#else
++static inline void uksm_vma_add_new(struct vm_area_struct *vma)
++{
++}
++
++static inline void uksm_remove_vma(struct vm_area_struct *vma)
++{
++}
++
++static inline void uksm_unmap_zero_page(pte_t pte)
++{
++}
++
++static inline void uksm_map_zero_page(pte_t pte)
++{
++}
++
++static inline void uksm_cow_page(struct vm_area_struct *vma, struct page *page)
++{
++}
++
++static inline void uksm_cow_pte(struct vm_area_struct *vma, pte_t pte)
++{
++}
++
++static inline int uksm_flags_can_scan(unsigned long vm_flags)
++{
++	return 0;
++}
++
++static inline void uksm_vm_flags_mod(unsigned long *vm_flags_p)
++{
++}
++
++static inline void uksm_bugon_zeropage(pte_t pte)
++{
++}
++#endif /* !CONFIG_UKSM */
++#endif /* __LINUX_UKSM_H */
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 222457a..cd9137e 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -358,7 +358,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+ 				goto fail_nomem;
+ 			charge = len;
+ 		}
+-		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
++		tmp = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ 		if (!tmp)
+ 			goto fail_nomem;
+ 		*tmp = *mpnt;
+@@ -410,7 +410,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+ 		__vma_link_rb(mm, tmp, rb_link, rb_parent);
+ 		rb_link = &tmp->vm_rb.rb_right;
+ 		rb_parent = &tmp->vm_rb;
+-
++		uksm_vma_add_new(tmp);
+ 		mm->map_count++;
+ 		retval = copy_page_range(mm, oldmm, mpnt);
+ 
+diff --git a/lib/Makefile b/lib/Makefile
+index a4da283..5ac75a7 100644
+--- a/lib/Makefile
++++ b/lib/Makefile
+@@ -8,7 +8,7 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
+ endif
+ 
+ lib-y := ctype.o string.o vsprintf.o cmdline.o \
+-	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
++	 rbtree.o radix-tree.o sradix-tree.o dump_stack.o timerqueue.o\
+ 	 idr.o int_sqrt.o extable.o prio_tree.o \
+ 	 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
+ 	 proportions.o prio_heap.o ratelimit.o show_mem.o \
+diff --git a/lib/sradix-tree.c b/lib/sradix-tree.c
+new file mode 100644
+index 0000000..8d06329
+--- /dev/null
++++ b/lib/sradix-tree.c
+@@ -0,0 +1,476 @@
++#include <linux/errno.h>
++#include <linux/mm.h>
++#include <linux/mman.h>
++#include <linux/spinlock.h>
++#include <linux/slab.h>
++#include <linux/gcd.h>
++#include <linux/sradix-tree.h>
++
++static inline int sradix_node_full(struct sradix_tree_root *root, struct sradix_tree_node *node)
++{
++	return node->fulls == root->stores_size || 
++		(node->height == 1 && node->count == root->stores_size);
++}
++
++/*
++ *	Extend a sradix tree so it can store key @index.
++ */
++static int sradix_tree_extend(struct sradix_tree_root *root, unsigned long index)
++{
++	struct sradix_tree_node *node;
++	unsigned int height;
++
++	if (unlikely(root->rnode == NULL)) {
++		if (!(node = root->alloc()))
++			return -ENOMEM;
++
++		node->height = 1;
++		root->rnode = node;
++		root->height = 1;
++	}
++
++	/* Figure out what the height should be.  */
++	height = root->height;
++	index >>= root->shift * height;
++
++	while (index) {
++		index >>= root->shift;
++		height++;
++	}
++
++	while (height > root->height) {
++		unsigned int newheight;
++		if (!(node = root->alloc()))
++			return -ENOMEM;
++
++		/* Increase the height.  */
++		node->stores[0] = root->rnode;
++		root->rnode->parent = node;
++		if (root->extend)
++			root->extend(node, root->rnode);
++
++		newheight = root->height + 1;
++		node->height = newheight;
++		node->count = 1;
++		if (sradix_node_full(root, root->rnode))
++			node->fulls = 1;
++
++		root->rnode = node;
++		root->height = newheight;
++	}
++
++	return 0;
++}
++
++/*
++ * Search the next item from the current node, that is not NULL
++ * and can satify root->iter().
++ */
++void *sradix_tree_next(struct sradix_tree_root *root,
++		       struct sradix_tree_node *node, unsigned long index,
++		       int (*iter)(void *item, unsigned long height))
++{
++	unsigned long offset;
++	void *item;
++
++	if (unlikely(node == NULL)) {
++		node = root->rnode;
++		for (offset = 0; offset < root->stores_size; offset++) {
++			item = node->stores[offset];
++			if (item && (!iter || iter(item, node->height)))
++				break;
++		}
++
++		if (unlikely(offset >= root->stores_size))
++			return NULL;
++
++		if (node->height == 1)
++			return item;
++		else
++			goto go_down;
++	}
++
++	while (node) {
++		offset = (index & root->mask) + 1;					
++		for (;offset < root->stores_size; offset++) {
++			item = node->stores[offset];
++			if (item && (!iter || iter(item, node->height)))
++				break;
++		}
++
++		if (offset < root->stores_size)
++			break;
++
++		node = node->parent;
++		index >>= root->shift;
++	}
++
++	if (!node)
++		return NULL;
++
++	while (node->height > 1) {
++go_down:
++		node = item;
++		for (offset = 0; offset < root->stores_size; offset++) {
++			item = node->stores[offset];
++			if (item && (!iter || iter(item, node->height)))
++				break;
++		}
++
++		if (unlikely(offset >= root->stores_size))
++			return NULL;
++	}
++
++	BUG_ON(offset > root->stores_size);
++
++	return item;
++}
++
++/*
++ * Blindly insert the item to the tree. Typically, we reuse the
++ * first empty store item.
++ */
++int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num)
++{
++	unsigned long index;
++	unsigned int height;
++	struct sradix_tree_node *node, *tmp = NULL;
++	int offset, offset_saved;
++	void **store = NULL;
++	int error, i, j, shift;
++
++go_on:
++	index = root->min;
++
++	if (root->enter_node && !sradix_node_full(root, root->enter_node)) {
++		node = root->enter_node;
++		BUG_ON((index >> (root->shift * root->height)));
++	} else {
++		node = root->rnode;
++		if (node == NULL || (index >> (root->shift * root->height))
++		    || sradix_node_full(root, node)) {
++			error = sradix_tree_extend(root, index);
++			if (error)
++				return error;
++
++			node = root->rnode;
++		}
++	}
++
++
++	height = node->height;
++	shift = (height - 1) * root->shift;
++	offset = (index >> shift) & root->mask;
++	while (shift > 0) {
++		offset_saved = offset;
++		for (; offset < root->stores_size; offset++) {
++			store = &node->stores[offset];
++			tmp = *store;
++
++			if (!tmp || !sradix_node_full(root, tmp))
++				break;
++		}
++		BUG_ON(offset >= root->stores_size);
++
++		if (offset != offset_saved) {
++			index += (offset - offset_saved) << shift;
++			index &= ~((1UL << shift) - 1);
++		}
++
++		if (!tmp) {
++			if (!(tmp = root->alloc()))
++				return -ENOMEM;
++
++			tmp->height = shift / root->shift;
++			*store = tmp;
++			tmp->parent = node;
++			node->count++;
++//			if (root->extend)
++//				root->extend(node, tmp);
++		}
++
++		node = tmp;
++		shift -= root->shift;
++		offset = (index >> shift) & root->mask;
++	}
++
++	BUG_ON(node->height != 1);
++
++
++	store = &node->stores[offset];
++	for (i = 0, j = 0;
++	      j < root->stores_size - node->count && 
++	      i < root->stores_size - offset && j < num; i++) {
++		if (!store[i]) {
++			store[i] = item[j];
++			if (root->assign)
++				root->assign(node, index + i, item[j]);
++			j++;
++		}
++	}
++
++	node->count += j;
++	root->num += j;
++	num -= j;
++
++	while (sradix_node_full(root, node)) {
++		node = node->parent;
++		if (!node)
++			break;
++
++		node->fulls++;
++	}
++
++	if (unlikely(!node)) {
++		/* All nodes are full */
++		root->min = 1 << (root->height * root->shift);
++		root->enter_node = NULL;
++	} else {
++		root->min = index + i - 1;
++		root->min |= (1UL << (node->height - 1)) - 1;
++		root->min++;
++		root->enter_node = node;
++	}
++
++	if (num) {
++		item += j;
++		goto go_on;
++	}
++
++	return 0;
++}
++
++
++/**
++ *	sradix_tree_shrink    -    shrink height of a sradix tree to minimal
++ *      @root		sradix tree root
++ *  
++ */
++static inline void sradix_tree_shrink(struct sradix_tree_root *root)
++{
++	/* try to shrink tree height */
++	while (root->height > 1) {
++		struct sradix_tree_node *to_free = root->rnode;
++
++		/*
++		 * The candidate node has more than one child, or its child
++		 * is not at the leftmost store, we cannot shrink.
++		 */
++		if (to_free->count != 1 || !to_free->stores[0])
++			break;
++
++		root->rnode = to_free->stores[0];
++		root->rnode->parent = NULL;
++		root->height--;
++		if (unlikely(root->enter_node == to_free)) {
++			root->enter_node = NULL;
++		}
++		root->free(to_free);
++	}
++}
++
++/*
++ * Del the item on the known leaf node and index
++ */
++void sradix_tree_delete_from_leaf(struct sradix_tree_root *root, 
++				  struct sradix_tree_node *node, unsigned long index)
++{
++	unsigned int offset;
++	struct sradix_tree_node *start, *end;
++
++	BUG_ON(node->height != 1);
++
++	start = node;
++	while (node && !(--node->count))
++		node = node->parent;
++
++	end = node;
++	if (!node) {
++		root->rnode = NULL;
++		root->height = 0;
++		root->min = 0;
++		root->num = 0;
++		root->enter_node = NULL;
++	} else {
++		offset = (index >> (root->shift * (node->height - 1))) & root->mask;
++		if (root->rm)
++			root->rm(node, offset);
++		node->stores[offset] = NULL;
++		root->num--;
++		if (root->min > index) {
++			root->min = index;
++			root->enter_node = node;
++		}
++	}
++
++	if (start != end) {
++		do {
++			node = start;
++			start = start->parent;
++			if (unlikely(root->enter_node == node))
++				root->enter_node = end;
++			root->free(node);
++		} while (start != end);
++
++		/*
++		 * Note that shrink may free "end", so enter_node still need to
++		 * be checked inside.
++		 */
++		sradix_tree_shrink(root);
++	} else if (node->count == root->stores_size - 1) {
++		/* It WAS a full leaf node. Update the ancestors */
++		node = node->parent;
++		while (node) {
++			node->fulls--;
++			if (node->fulls != root->stores_size - 1)
++				break;
++
++			node = node->parent;
++		}
++	}
++}
++
++void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index)
++{
++	unsigned int height, offset;
++	struct sradix_tree_node *node;
++	int shift;
++
++	node = root->rnode;
++	if (node == NULL || (index >> (root->shift * root->height)))
++		return NULL;
++
++	height = root->height;
++	shift = (height - 1) * root->shift;
++
++	do {
++		offset = (index >> shift) & root->mask;
++		node = node->stores[offset];
++		if (!node)
++			return NULL;
++
++		shift -= root->shift;
++	} while (shift >= 0);
++
++	return node;
++}
++
++/*
++ * Return the item if it exists, otherwise create it in place
++ * and return the created item.
++ */
++void *sradix_tree_lookup_create(struct sradix_tree_root *root, 
++			unsigned long index, void *(*item_alloc)(void))
++{
++	unsigned int height, offset;
++	struct sradix_tree_node *node, *tmp;
++	void *item;
++	int shift, error;
++
++	if (root->rnode == NULL || (index >> (root->shift * root->height))) {
++		if (item_alloc) {
++			error = sradix_tree_extend(root, index);
++			if (error)
++				return NULL;
++		} else {
++			return NULL;
++		}
++	}
++
++	node = root->rnode;
++	height = root->height;
++	shift = (height - 1) * root->shift;
++
++	do {
++		offset = (index >> shift) & root->mask;
++		if (!node->stores[offset]) {
++			if (!(tmp = root->alloc()))
++				return NULL;
++
++			tmp->height = shift / root->shift;
++			node->stores[offset] = tmp;
++			tmp->parent = node;
++			node->count++;
++			node = tmp;
++		} else {
++			node = node->stores[offset];
++		}
++
++		shift -= root->shift;
++	} while (shift > 0);
++
++	BUG_ON(node->height != 1);
++	offset = index & root->mask;
++	if (node->stores[offset]) {
++		return node->stores[offset];
++	} else if (item_alloc) {
++		if (!(item = item_alloc()))
++			return NULL;
++
++		node->stores[offset] = item;
++
++		/*
++		 * NOTE: we do NOT call root->assign here, since this item is
++		 * newly created by us having no meaning. Caller can call this
++		 * if it's necessary to do so.
++		 */
++
++		node->count++;
++		root->num++;
++
++		while (sradix_node_full(root, node)) {
++			node = node->parent;
++			if (!node)
++				break;
++
++			node->fulls++;
++		}
++
++		if (unlikely(!node)) {
++			/* All nodes are full */
++			root->min = 1 << (root->height * root->shift);
++		} else {
++			if (root->min == index) {
++				root->min |= (1UL << (node->height - 1)) - 1;
++				root->min++;
++				root->enter_node = node;
++			}
++		}
++
++		return item;
++	} else {
++		return NULL;
++	}
++
++}
++
++int sradix_tree_delete(struct sradix_tree_root *root, unsigned long index)
++{
++	unsigned int height, offset;
++	struct sradix_tree_node *node;
++	int shift;
++
++	node = root->rnode;
++	if (node == NULL || (index >> (root->shift * root->height)))
++		return -ENOENT;
++
++	height = root->height;
++	shift = (height - 1) * root->shift;
++
++	do {
++		offset = (index >> shift) & root->mask;
++		node = node->stores[offset];
++		if (!node)
++			return -ENOENT;
++
++		shift -= root->shift;
++	} while (shift > 0);
++
++	offset = index & root->mask;
++	if (!node->stores[offset])
++		return -ENOENT;
++
++	sradix_tree_delete_from_leaf(root, node, index);
++
++	return 0;
++}
+diff --git a/mm/Kconfig b/mm/Kconfig
+index 011b110..b766090 100644
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -239,6 +239,32 @@ config KSM
+ 	  See Documentation/vm/ksm.txt for more information: KSM is inactive
+ 	  until a program has madvised that an area is MADV_MERGEABLE, and
+ 	  root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set).
++choice
++	prompt "Choose UKSM/KSM strategy"
++	default UKSM
++	depends on KSM
++	help
++	  This option allows to select a UKSM/KSM stragety.
++
++config UKSM
++	bool "Ultra-KSM for page merging"
++	depends on KSM
++	help
++	UKSM is inspired by the Linux kernel project \u2014 KSM(Kernel Same
++	page Merging), but with a fundamentally rewritten core algorithm. With
++	an advanced algorithm, UKSM now can transparently scans all anonymously
++	mapped user space applications with an significantly improved scan speed
++	and CPU efficiency. Since KVM is friendly to KSM, KVM can also benefit from
++	UKSM. Now UKSM has its first stable release and first real world enterprise user.
++	For more information, please goto its project page.
++	(www.kerneldedup.org)
++
++config KSM_LEGACY
++	bool "Legacy KSM implementation"
++	depends on KSM
++	help
++	The legacy KSM implementation from Redhat.
++endchoice
+ 
+ config DEFAULT_MMAP_MIN_ADDR
+         int "Low address space to protect from user allocation"
+diff --git a/mm/Makefile b/mm/Makefile
+index 50ec00e..c551bae 100644
+--- a/mm/Makefile
++++ b/mm/Makefile
+@@ -34,7 +34,8 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
+ obj-$(CONFIG_SLOB) += slob.o
+ obj-$(CONFIG_COMPACTION) += compaction.o
+ obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
+-obj-$(CONFIG_KSM) += ksm.o
++obj-$(CONFIG_KSM_LEGACY) += ksm.o
++obj-$(CONFIG_UKSM) += uksm.o
+ obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
+ obj-$(CONFIG_SLAB) += slab.o
+ obj-$(CONFIG_SLUB) += slub.o
+diff --git a/mm/memory.c b/mm/memory.c
+index 70f5daf..861bcbc 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -112,6 +112,37 @@ __setup("norandmaps", disable_randmaps);
+ unsigned long zero_pfn __read_mostly;
+ unsigned long highest_memmap_pfn __read_mostly;
+ 
++#ifdef CONFIG_UKSM
++unsigned long uksm_zero_pfn __read_mostly;
++struct page *empty_uksm_zero_page;
++
++static int __init setup_uksm_zero_page(void)
++{
++	unsigned long addr;
++	addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, 0);
++	if (!addr)
++		panic("Oh boy, that early out of memory?");
++
++	empty_uksm_zero_page = virt_to_page((void *) addr);
++	SetPageReserved(empty_uksm_zero_page);
++
++	uksm_zero_pfn = page_to_pfn(empty_uksm_zero_page);
++
++	return 0;
++}
++core_initcall(setup_uksm_zero_page);
++
++static inline int is_uksm_zero_pfn(unsigned long pfn)
++{
++	return pfn == uksm_zero_pfn;
++}
++#else
++static inline int is_uksm_zero_pfn(unsigned long pfn)
++{
++	return 0;
++}
++#endif
++
+ /*
+  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
+  */
+@@ -123,6 +154,7 @@ static int __init init_zero_pfn(void)
+ core_initcall(init_zero_pfn);
+ 
+ 
++
+ #if defined(SPLIT_RSS_COUNTING)
+ 
+ static void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
+@@ -739,8 +771,10 @@ static inline int is_cow_mapping(vm_flags_t flags)
+ #ifndef is_zero_pfn
+ static inline int is_zero_pfn(unsigned long pfn)
+ {
+-	return pfn == zero_pfn;
++	return (pfn == zero_pfn) || (is_uksm_zero_pfn(pfn));
+ }
++#else
++#define is_zero_pfn(pfn)   (is_zero_pfn(pfn) || is_uksm_zero_pfn(pfn))
+ #endif
+ 
+ #ifndef my_zero_pfn
+@@ -917,6 +951,11 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ 			rss[MM_ANONPAGES]++;
+ 		else
+ 			rss[MM_FILEPAGES]++;
++
++		/* Should return NULL in vm_normal_page() */
++		uksm_bugon_zeropage(pte);
++	} else {
++		uksm_map_zero_page(pte);
+ 	}
+ 
+ out_set_pte:
+@@ -1152,8 +1191,10 @@ again:
+ 			ptent = ptep_get_and_clear_full(mm, addr, pte,
+ 							tlb->fullmm);
+ 			tlb_remove_tlb_entry(tlb, pte, addr);
+-			if (unlikely(!page))
++			if (unlikely(!page)) {
++				uksm_unmap_zero_page(ptent);
+ 				continue;
++			}
+ 			if (unlikely(details) && details->nonlinear_vma
+ 			    && linear_page_index(details->nonlinear_vma,
+ 						addr) != page->index)
+@@ -1645,7 +1686,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+ 
+ 	VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
+ 
+-	/* 
++	/*
+ 	 * Require read or write permissions.
+ 	 * If FOLL_FORCE is set, we only require the "MAY" flags.
+ 	 */
+@@ -1692,7 +1733,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+ 				page = vm_normal_page(vma, start, *pte);
+ 				if (!page) {
+ 					if (!(gup_flags & FOLL_DUMP) &&
+-					     is_zero_pfn(pte_pfn(*pte)))
++					    (is_zero_pfn(pte_pfn(*pte))))
+ 						page = pte_page(*pte);
+ 					else {
+ 						pte_unmap(pte);
+@@ -2452,8 +2493,10 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
+ 			clear_page(kaddr);
+ 		kunmap_atomic(kaddr, KM_USER0);
+ 		flush_dcache_page(dst);
+-	} else
++	} else {
+ 		copy_user_highpage(dst, src, va, vma);
++		uksm_cow_page(vma, src);
++	}
+ }
+ 
+ /*
+@@ -2651,6 +2694,7 @@ gotten:
+ 		new_page = alloc_zeroed_user_highpage_movable(vma, address);
+ 		if (!new_page)
+ 			goto oom;
++		uksm_cow_pte(vma, orig_pte);
+ 	} else {
+ 		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+ 		if (!new_page)
+@@ -2672,8 +2716,11 @@ gotten:
+ 				dec_mm_counter_fast(mm, MM_FILEPAGES);
+ 				inc_mm_counter_fast(mm, MM_ANONPAGES);
+ 			}
+-		} else
++			uksm_bugon_zeropage(orig_pte);
++		} else {
++			uksm_unmap_zero_page(orig_pte);
+ 			inc_mm_counter_fast(mm, MM_ANONPAGES);
++		}
+ 		flush_cache_page(vma, address, pte_pfn(orig_pte));
+ 		entry = mk_pte(new_page, vma->vm_page_prot);
+ 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+diff --git a/mm/mmap.c b/mm/mmap.c
+index eae90af..e723d3a 100644
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -30,6 +30,7 @@
+ #include <linux/perf_event.h>
+ #include <linux/audit.h>
+ #include <linux/khugepaged.h>
++#include <linux/ksm.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/cacheflush.h>
+@@ -65,7 +66,7 @@ static void unmap_region(struct mm_struct *mm,
+  * MAP_SHARED	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
+  *		w: (no) no	w: (no) no	w: (yes) yes	w: (no) no
+  *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
+- *		
++ *
+  * MAP_PRIVATE	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
+  *		w: (no) no	w: (no) no	w: (copy) copy	w: (no) no
+  *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
+@@ -236,6 +237,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+ 			removed_exe_file_vma(vma->vm_mm);
+ 	}
+ 	mpol_put(vma_policy(vma));
++	uksm_remove_vma(vma);
+ 	kmem_cache_free(vm_area_cachep, vma);
+ 	return next;
+ }
+@@ -500,9 +502,16 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
+ 	long adjust_next = 0;
+ 	int remove_next = 0;
+ 
++/*
++ * to avoid deadlock, ksm_remove_vma must be done before any spin_lock is
++ * acquired
++ */
++	uksm_remove_vma(vma);
++
+ 	if (next && !insert) {
+ 		struct vm_area_struct *exporter = NULL;
+ 
++		uksm_remove_vma(next);
+ 		if (end >= next->vm_end) {
+ 			/*
+ 			 * vma expands, overlapping all the next, and
+@@ -578,10 +587,10 @@ again:			remove_next = 1 + (end > next->vm_end);
+ 		if (adjust_next)
+ 			vma_prio_tree_remove(next, root);
+ 	}
+-
+ 	vma->vm_start = start;
+ 	vma->vm_end = end;
+ 	vma->vm_pgoff = pgoff;
++
+ 	if (adjust_next) {
+ 		next->vm_start += adjust_next << PAGE_SHIFT;
+ 		next->vm_pgoff += adjust_next;
+@@ -634,10 +643,15 @@ again:			remove_next = 1 + (end > next->vm_end);
+ 		 */
+ 		if (remove_next == 2) {
+ 			next = vma->vm_next;
++			uksm_remove_vma(next);
+ 			goto again;
+ 		}
++	} else {
++		if (next && !insert)
++			uksm_vma_add_new(next);
+ 	}
+ 
++	uksm_vma_add_new(vma);
+ 	validate_mm(mm);
+ 
+ 	return 0;
+@@ -992,6 +1006,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+ 	vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
+ 			mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+ 
++	/* If uksm is enabled, we add VM_MERGABLE to new VMAs. */
++	uksm_vm_flags_mod(&vm_flags);
++
+ 	if (flags & MAP_LOCKED)
+ 		if (!can_do_mlock())
+ 			return -EPERM;
+@@ -1315,6 +1332,7 @@ munmap_back:
+ 
+ 	vma_link(mm, vma, prev, rb_link, rb_parent);
+ 	file = vma->vm_file;
++	uksm_vma_add_new(vma);
+ 
+ 	/* Once vma denies write, undo our temporary denial count */
+ 	if (correct_wcount)
+@@ -1341,6 +1359,7 @@ unmap_and_free_vma:
+ 	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+ 	charged = 0;
+ free_vma:
++	uksm_remove_vma(vma);
+ 	kmem_cache_free(vm_area_cachep, vma);
+ unacct_error:
+ 	if (charged)
+@@ -1416,7 +1435,7 @@ full_search:
+ 		addr = vma->vm_end;
+ 	}
+ }
+-#endif	
++#endif
+ 
+ void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
+ {
+@@ -1978,6 +1997,8 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+ 	else
+ 		err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+ 
++	uksm_vma_add_new(new);
++
+ 	/* Success. */
+ 	if (!err)
+ 		return 0;
+@@ -2147,6 +2168,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
+ 		return error;
+ 
+ 	flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
++	uksm_vm_flags_mod(&flags);
+ 
+ 	error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
+ 	if (error & ~PAGE_MASK)
+@@ -2215,6 +2237,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
+ 	vma->vm_flags = flags;
+ 	vma->vm_page_prot = vm_get_page_prot(flags);
+ 	vma_link(mm, vma, prev, rb_link, rb_parent);
++	uksm_vma_add_new(vma);
+ out:
+ 	perf_event_mmap(vma);
+ 	mm->total_vm += len >> PAGE_SHIFT;
+@@ -2238,6 +2261,12 @@ void exit_mmap(struct mm_struct *mm)
+ 	/* mm's last user has gone, and its about to be pulled down */
+ 	mmu_notifier_release(mm);
+ 
++	/*
++	 * Taking write lock on mmap_sem does not harm others,
++	 * but it's crucial for uksm to avoid races.
++	 */
++	down_write(&mm->mmap_sem);
++
+ 	if (mm->locked_vm) {
+ 		vma = mm->mmap;
+ 		while (vma) {
+@@ -2271,6 +2300,11 @@ void exit_mmap(struct mm_struct *mm)
+ 	while (vma)
+ 		vma = remove_vma(vma);
+ 
++	mm->mmap = NULL;
++	mm->mm_rb = RB_ROOT;
++	mm->mmap_cache = NULL;
++	up_write(&mm->mmap_sem);
++
+ 	BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
+ }
+ 
+@@ -2362,6 +2396,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
+ 			if (new_vma->vm_ops && new_vma->vm_ops->open)
+ 				new_vma->vm_ops->open(new_vma);
+ 			vma_link(mm, new_vma, prev, rb_link, rb_parent);
++			uksm_vma_add_new(new_vma);
+ 		}
+ 	}
+ 	return new_vma;
+@@ -2467,10 +2502,10 @@ int install_special_mapping(struct mm_struct *mm,
+ 	ret = insert_vm_struct(mm, vma);
+ 	if (ret)
+ 		goto out;
+-
+ 	mm->total_vm += len >> PAGE_SHIFT;
+ 
+ 	perf_event_mmap(vma);
++	uksm_vma_add_new(vma);
+ 
+ 	return 0;
+ 
+diff --git a/mm/rmap.c b/mm/rmap.c
+index a4fd368..f11b505 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -972,9 +972,9 @@ void page_move_anon_rmap(struct page *page,
+ 
+ /**
+  * __page_set_anon_rmap - set up new anonymous rmap
+- * @page:	Page to add to rmap	
++ * @page:	Page to add to rmap
+  * @vma:	VM area to add page to.
+- * @address:	User virtual address of the mapping	
++ * @address:	User virtual address of the mapping
+  * @exclusive:	the page is exclusively owned by the current process
+  */
+ static void __page_set_anon_rmap(struct page *page,
+diff --git a/mm/uksm.c b/mm/uksm.c
+new file mode 100644
+index 0000000..967c755
+--- /dev/null
++++ b/mm/uksm.c
+@@ -0,0 +1,5616 @@
++/*
++ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia
++ *
++ * This is an improvement upon KSM. Some basic data structures and routines
++ * are borrowed from ksm.c .
++ *
++ * Its new features:
++ * 1. Full system scan:
++ *      It automatically scans all user processes' anonymous VMAs. Kernel-user
++ *      interaction to submit a memory area to KSM is no longer needed.
++ *
++ * 2. Rich area detection:
++ *      It automatically detects rich areas containing abundant duplicated
++ *      pages based. Rich areas are given a full scan speed. Poor areas are
++ *      sampled at a reasonable speed with very low CPU consumption.
++ *
++ * 3. Ultra Per-page scan speed improvement:
++ *      A new hash algorithm is proposed. As a result, on a machine with
++ *      Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it
++ *      can scan memory areas that does not contain duplicated pages at speed of
++ *      627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of
++ *      477MB/sec ~ 923MB/sec.
++ *
++ * 4. Thrashing area avoidance:
++ *      Thrashing area(an VMA that has frequent Ksm page break-out) can be
++ *      filtered out. My benchmark shows it's more efficient than KSM's per-page
++ *      hash value based volatile page detection.
++ *
++ *
++ * 5. Misc changes upon KSM:
++ *      * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page
++ *        comparison. It's much faster than default C version on x86.
++ *      * rmap_item now has an struct *page member to loosely cache a
++ *        address-->page mapping, which reduces too much time-costly
++ *        follow_page().
++ *      * The VMA creation/exit procedures are hooked to let the Ultra KSM know.
++ *      * try_to_merge_two_pages() now can revert a pte if it fails. No break_
++ *        ksm is needed for this case.
++ *
++ * 6. Full Zero Page consideration(contributed by Figo Zhang)
++ *    Now uksmd consider full zero pages as special pages and merge them to an
++ *    special unswappable uksm zero page.
++ */
++
++#include <linux/errno.h>
++#include <linux/mm.h>
++#include <linux/fs.h>
++#include <linux/mman.h>
++#include <linux/sched.h>
++#include <linux/rwsem.h>
++#include <linux/pagemap.h>
++#include <linux/rmap.h>
++#include <linux/spinlock.h>
++#include <linux/jhash.h>
++#include <linux/delay.h>
++#include <linux/kthread.h>
++#include <linux/wait.h>
++#include <linux/slab.h>
++#include <linux/rbtree.h>
++#include <linux/memory.h>
++#include <linux/mmu_notifier.h>
++#include <linux/swap.h>
++#include <linux/ksm.h>
++#include <linux/crypto.h>
++#include <linux/scatterlist.h>
++#include <crypto/hash.h>
++#include <linux/random.h>
++#include <linux/math64.h>
++#include <linux/gcd.h>
++#include <linux/freezer.h>
++#include <linux/sradix-tree.h>
++
++#include <asm/tlbflush.h>
++#include "internal.h"
++
++#ifdef CONFIG_X86
++#undef memcmp
++
++#ifdef CONFIG_X86_32
++#define memcmp memcmpx86_32
++/*
++ * Compare 4-byte-aligned address s1 and s2, with length n
++ */
++int memcmpx86_32(void *s1, void *s2, size_t n)
++{
++	size_t num = n / 4;
++	register int res;
++
++	__asm__ __volatile__
++	(
++	 "testl %3,%3\n\t"
++	 "repe; cmpsd\n\t"
++	 "je        1f\n\t"
++	 "sbbl      %0,%0\n\t"
++	 "orl       $1,%0\n"
++	 "1:"
++	 : "=&a" (res), "+&S" (s1), "+&D" (s2), "+&c" (num)
++	 : "0" (0)
++	 : "cc");
++
++	return res;
++}
++
++/*
++ * Check the page is all zero ?
++ */
++static int is_full_zero(const void *s1, size_t len)
++{
++	unsigned char same;
++
++	len /= 4;
++
++	__asm__ __volatile__
++	("repe; scasl;"
++	 "sete %0"
++	 : "=qm" (same), "+D" (s1), "+c" (len)
++	 : "a" (0)
++	 : "cc");
++
++	return same;
++}
++
++
++#elif defined(CONFIG_X86_64)
++#define memcmp memcmpx86_64
++/*
++ * Compare 8-byte-aligned address s1 and s2, with length n
++ */
++int memcmpx86_64(void *s1, void *s2, size_t n)
++{
++	size_t num = n / 8;
++	register int res;
++
++	__asm__ __volatile__
++	(
++	 "testq %q3,%q3\n\t"
++	 "repe; cmpsq\n\t"
++	 "je        1f\n\t"
++	 "sbbq      %q0,%q0\n\t"
++	 "orq       $1,%q0\n"
++	 "1:"
++	 : "=&a" (res), "+&S" (s1), "+&D" (s2), "+&c" (num)
++	 : "0" (0)
++	 : "cc");
++
++	return res;
++}
++
++static int is_full_zero(const void *s1, size_t len)
++{
++	unsigned char same;
++
++	len /= 8;
++
++	__asm__ __volatile__
++	("repe; scasq;"
++	 "sete %0"
++	 : "=qm" (same), "+D" (s1), "+c" (len)
++	 : "a" (0)
++	 : "cc");
++
++	return same;
++}
++
++#endif
++#else
++static int is_full_zero(const void *s1, size_t len)
++{
++	unsigned long *src = s1;
++	int i;
++
++	len /= sizeof(*src);
++
++	for (i = 0; i < len; i++) {
++		if (src[i])
++			return 0;
++	}
++
++	return 1;
++}
++#endif
++
++#define U64_MAX		(~((u64)0))
++#define UKSM_RUNG_ROUND_FINISHED  (1 << 0)
++#define TIME_RATIO_SCALE	10000
++
++#define SLOT_TREE_NODE_SHIFT	8
++#define SLOT_TREE_NODE_STORE_SIZE	(1UL << SLOT_TREE_NODE_SHIFT)
++struct slot_tree_node {
++	unsigned long size;
++	struct sradix_tree_node snode;
++	void *stores[SLOT_TREE_NODE_STORE_SIZE];
++};
++
++static struct kmem_cache *slot_tree_node_cachep;
++
++static struct sradix_tree_node *slot_tree_node_alloc(void)
++{
++	struct slot_tree_node *p;
++	p = kmem_cache_zalloc(slot_tree_node_cachep, GFP_KERNEL);
++	if (!p)
++		return NULL;
++
++	return &p->snode;
++}
++
++static void slot_tree_node_free(struct sradix_tree_node *node)
++{
++	struct slot_tree_node *p;
++
++	p = container_of(node, struct slot_tree_node, snode);
++	kmem_cache_free(slot_tree_node_cachep, p);
++}
++
++static void slot_tree_node_extend(struct sradix_tree_node *parent,
++				  struct sradix_tree_node *child)
++{
++	struct slot_tree_node *p, *c;
++
++	p = container_of(parent, struct slot_tree_node, snode);
++	c = container_of(child, struct slot_tree_node, snode);
++
++	p->size += c->size;
++}
++
++void slot_tree_node_assign(struct sradix_tree_node *node,
++			   unsigned index, void *item)
++{
++	struct vma_slot *slot = item;
++	struct slot_tree_node *cur;
++
++	slot->snode = node;
++	slot->sindex = index;
++
++	while (node) {
++		cur = container_of(node, struct slot_tree_node, snode);
++		cur->size += slot->pages;
++		node = node->parent;
++	}
++}
++
++void slot_tree_node_rm(struct sradix_tree_node *node, unsigned offset)
++{
++	struct vma_slot *slot;
++	struct slot_tree_node *cur;
++	unsigned long pages;
++
++	if (node->height == 1) {
++		slot = node->stores[offset];
++		pages = slot->pages;
++	} else {
++		cur = container_of(node->stores[offset],
++				   struct slot_tree_node, snode);
++		pages = cur->size;
++	}
++
++	while (node) {
++		cur = container_of(node, struct slot_tree_node, snode);
++		cur->size -= pages;
++		node = node->parent;
++	}
++}
++
++unsigned long slot_iter_index;
++int slot_iter(void *item,  unsigned long height)
++{
++	struct slot_tree_node *node;
++	struct vma_slot *slot;
++
++	if (height == 1) {
++		slot = item;
++		if (slot_iter_index < slot->pages) {
++			/*in this one*/
++			return 1;
++		} else {
++			slot_iter_index -= slot->pages;
++			return 0;
++		}
++
++	} else {
++		node = container_of(item, struct slot_tree_node, snode);
++		if (slot_iter_index < node->size) {
++			/*in this one*/
++			return 1;
++		} else {
++			slot_iter_index -= node->size;
++			return 0;
++		}
++	}
++}
++
++
++static inline void slot_tree_init_root(struct sradix_tree_root *root)
++{
++	init_sradix_tree_root(root, SLOT_TREE_NODE_SHIFT);
++	root->alloc = slot_tree_node_alloc;
++	root->free = slot_tree_node_free;
++	root->extend = slot_tree_node_extend;
++	root->assign = slot_tree_node_assign;
++	root->rm = slot_tree_node_rm;
++}
++
++void slot_tree_init(void)
++{
++	slot_tree_node_cachep = kmem_cache_create("slot_tree_node",
++				sizeof(struct slot_tree_node), 0,
++				SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
++				NULL);
++}
++
++
++/* Each rung of this ladder is a list of VMAs having a same scan ratio */
++struct scan_rung {
++	//struct list_head scanned_list;
++	struct sradix_tree_root vma_root;
++	struct sradix_tree_root vma_root2;
++
++	struct vma_slot *current_scan;
++	unsigned long current_offset;
++
++	/*
++	 * The initial value for current_offset, it should loop over
++	 * [0~ step - 1] to let all slot have its chance to be scanned.
++	 */
++	unsigned long offset_init;
++	unsigned long step; /* dynamic step for current_offset */
++	unsigned int flags;
++	unsigned long pages_to_scan;
++	//unsigned long fully_scanned_slots;
++	/*
++	 * a little bit tricky - if cpu_time_ratio > 0, then the value is the
++	 * the cpu time ratio it can spend in rung_i for every scan
++	 * period. if < 0, then it is the cpu time ratio relative to the
++	 * max cpu percentage user specified. Both in unit of
++	 * 1/TIME_RATIO_SCALE
++	 */
++	int cpu_ratio;
++
++	/*
++	 * How long it will take for all slots in this rung to be fully
++	 * scanned? If it's zero, we don't care about the cover time:
++	 * it's fully scanned.
++	 */
++	unsigned int cover_msecs;
++	//unsigned long vma_num;
++	//unsigned long pages; /* Sum of all slot's pages in rung */
++};
++
++/**
++ * node of either the stable or unstale rbtree
++ *
++ */
++struct tree_node {
++	struct rb_node node; /* link in the main (un)stable rbtree */
++	struct rb_root sub_root; /* rb_root for sublevel collision rbtree */
++	u32 hash;
++	unsigned long count; /* TODO: merged with sub_root */
++	struct list_head all_list; /* all tree nodes in stable/unstable tree */
++};
++
++/**
++ * struct stable_node - node of the stable rbtree
++ * @node: rb node of this ksm page in the stable tree
++ * @hlist: hlist head of rmap_items using this ksm page
++ * @kpfn: page frame number of this ksm page
++ */
++struct stable_node {
++	struct rb_node node; /* link in sub-rbtree */
++	struct tree_node *tree_node; /* it's tree node root in stable tree, NULL if it's in hell list */
++	struct hlist_head hlist;
++	unsigned long kpfn;
++	u32 hash_max; /* if ==0 then it's not been calculated yet */
++	struct list_head all_list; /* in a list for all stable nodes */
++};
++
++/**
++ * struct node_vma - group rmap_items linked in a same stable
++ * node together.
++ */
++struct node_vma {
++	union {
++		struct vma_slot *slot;
++		unsigned long key;  /* slot is used as key sorted on hlist */
++	};
++	struct hlist_node hlist;
++	struct hlist_head rmap_hlist;
++	struct stable_node *head;
++};
++
++/**
++ * struct rmap_item - reverse mapping item for virtual addresses
++ * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
++ * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
++ * @mm: the memory structure this rmap_item is pointing into
++ * @address: the virtual address this rmap_item tracks (+ flags in low bits)
++ * @node: rb node of this rmap_item in the unstable tree
++ * @head: pointer to stable_node heading this list in the stable tree
++ * @hlist: link into hlist of rmap_items hanging off that stable_node
++ */
++struct rmap_item {
++	struct vma_slot *slot;
++	struct page *page;
++	unsigned long address;	/* + low bits used for flags below */
++	unsigned long hash_round;
++	unsigned long entry_index;
++	union {
++		struct {/* when in unstable tree */
++			struct rb_node node;
++			struct tree_node *tree_node;
++			u32 hash_max;
++		};
++		struct { /* when in stable tree */
++			struct node_vma *head;
++			struct hlist_node hlist;
++			struct anon_vma *anon_vma;
++		};
++	};
++} __attribute__((aligned(4)));
++
++struct rmap_list_entry {
++	union {
++		struct rmap_item *item;
++		unsigned long addr;
++	};
++	/* lowest bit is used for is_addr tag */
++} __attribute__((aligned(4))); /* 4 aligned to fit in to pages*/
++
++
++/* Basic data structure definition ends */
++
++
++/*
++ * Flags for rmap_item to judge if it's listed in the stable/unstable tree.
++ * The flags use the low bits of rmap_item.address
++ */
++#define UNSTABLE_FLAG	0x1
++#define STABLE_FLAG	0x2
++#define get_rmap_addr(x)	((x)->address & PAGE_MASK)
++
++/*
++ * rmap_list_entry helpers
++ */
++#define IS_ADDR_FLAG	1
++#define is_addr(ptr)		((unsigned long)(ptr) & IS_ADDR_FLAG)
++#define set_is_addr(ptr)	((ptr) |= IS_ADDR_FLAG)
++#define get_clean_addr(ptr)	(((ptr) & ~(__typeof__(ptr))IS_ADDR_FLAG))
++
++
++/*
++ * High speed caches for frequently allocated and freed structs
++ */
++static struct kmem_cache *rmap_item_cache;
++static struct kmem_cache *stable_node_cache;
++static struct kmem_cache *node_vma_cache;
++static struct kmem_cache *vma_slot_cache;
++static struct kmem_cache *tree_node_cache;
++#define UKSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("uksm_"#__struct,\
++		sizeof(struct __struct), __alignof__(struct __struct),\
++		(__flags), NULL)
++
++/* Array of all scan_rung, uksm_scan_ladder[0] having the minimum scan ratio */
++#define SCAN_LADDER_SIZE 4
++static struct scan_rung uksm_scan_ladder[SCAN_LADDER_SIZE];
++
++/* The evaluation rounds uksmd has finished */
++static unsigned long long uksm_eval_round = 1;
++
++/*
++ * we add 1 to this var when we consider we should rebuild the whole
++ * unstable tree.
++ */
++static unsigned long uksm_hash_round = 1;
++
++/*
++ * How many times the whole memory is scanned.
++ */
++static unsigned long long fully_scanned_round = 1;
++
++/* The total number of virtual pages of all vma slots */
++static u64 uksm_pages_total;
++
++/* The number of pages has been scanned since the start up */
++static u64 uksm_pages_scanned;
++
++static u64 scanned_virtual_pages;
++
++/* The number of pages has been scanned since last encode_benefit call */
++static u64 uksm_pages_scanned_last;
++
++/* If the scanned number is tooo large, we encode it here */
++static u64 pages_scanned_stored;
++
++static unsigned long pages_scanned_base;
++
++/* The number of nodes in the stable tree */
++static unsigned long uksm_pages_shared;
++
++/* The number of page slots additionally sharing those nodes */
++static unsigned long uksm_pages_sharing;
++
++/* The number of nodes in the unstable tree */
++static unsigned long uksm_pages_unshared;
++
++/*
++ * Milliseconds ksmd should sleep between scans,
++ * >= 100ms to be consistent with
++ * scan_time_to_sleep_msec()
++ */
++static unsigned int uksm_sleep_jiffies;
++
++/* The real value for the uksmd next sleep */
++static unsigned int uksm_sleep_real;
++
++/* Saved value for user input uksm_sleep_jiffies when it's enlarged */
++static unsigned int uksm_sleep_saved;
++
++/* Max percentage of cpu utilization ksmd can take to scan in one batch */
++static unsigned int uksm_max_cpu_percentage;
++
++static int uksm_cpu_governor;
++
++static char *uksm_cpu_governor_str[4] = { "full", "medium", "low", "quiet" };
++
++struct uksm_cpu_preset_s {
++	int cpu_ratio[SCAN_LADDER_SIZE];
++	unsigned int cover_msecs[SCAN_LADDER_SIZE];
++	unsigned int max_cpu; /* percentage */
++};
++
++struct uksm_cpu_preset_s uksm_cpu_preset[4] = {
++	{ {20, 40, -2500, -10000}, {1000, 500, 200, 50}, 95},
++	{ {20, 30, -2500, -10000}, {1000, 500, 400, 100}, 50},
++	{ {10, 20, -5000, -10000}, {1500, 1000, 1000, 250}, 20},
++	{ {10, 20, 40, 75}, {2000, 1000, 1000, 1000}, 1},
++};
++
++/* The default value for uksm_ema_page_time if it's not initialized */
++#define UKSM_PAGE_TIME_DEFAULT	500
++
++/*cost to scan one page by expotional moving average in nsecs */
++static unsigned long uksm_ema_page_time = UKSM_PAGE_TIME_DEFAULT;
++
++/* The expotional moving average alpha weight, in percentage. */
++#define EMA_ALPHA	20
++
++/*
++ * The threshold used to filter out thrashing areas,
++ * If it == 0, filtering is disabled, otherwise it's the percentage up-bound
++ * of the thrashing ratio of all areas. Any area with a bigger thrashing ratio
++ * will be considered as having a zero duplication ratio.
++ */
++static unsigned int uksm_thrash_threshold = 50;
++
++/* How much dedup ratio is considered to be abundant*/
++static unsigned int uksm_abundant_threshold = 10;
++
++/* All slots having merged pages in this eval round. */
++struct list_head vma_slot_dedup = LIST_HEAD_INIT(vma_slot_dedup);
++
++/* How many times the ksmd has slept since startup */
++static unsigned long long uksm_sleep_times;
++
++#define UKSM_RUN_STOP	0
++#define UKSM_RUN_MERGE	1
++static unsigned int uksm_run = 1;
++
++static DECLARE_WAIT_QUEUE_HEAD(uksm_thread_wait);
++static DEFINE_MUTEX(uksm_thread_mutex);
++
++/*
++ * List vma_slot_new is for newly created vma_slot waiting to be added by
++ * ksmd. If one cannot be added(e.g. due to it's too small), it's moved to
++ * vma_slot_noadd. vma_slot_del is the list for vma_slot whose corresponding
++ * VMA has been removed/freed.
++ */
++struct list_head vma_slot_new = LIST_HEAD_INIT(vma_slot_new);
++struct list_head vma_slot_noadd = LIST_HEAD_INIT(vma_slot_noadd);
++struct list_head vma_slot_del = LIST_HEAD_INIT(vma_slot_del);
++static DEFINE_SPINLOCK(vma_slot_list_lock);
++
++/* The unstable tree heads */
++static struct rb_root root_unstable_tree = RB_ROOT;
++
++/*
++ * All tree_nodes are in a list to be freed at once when unstable tree is
++ * freed after each scan round.
++ */
++static struct list_head unstable_tree_node_list =
++				LIST_HEAD_INIT(unstable_tree_node_list);
++
++/* List contains all stable nodes */
++static struct list_head stable_node_list = LIST_HEAD_INIT(stable_node_list);
++
++/*
++ * When the hash strength is changed, the stable tree must be delta_hashed and
++ * re-structured. We use two set of below structs to speed up the
++ * re-structuring of stable tree.
++ */
++static struct list_head
++stable_tree_node_list[2] = {LIST_HEAD_INIT(stable_tree_node_list[0]),
++			    LIST_HEAD_INIT(stable_tree_node_list[1])};
++
++static struct list_head *stable_tree_node_listp = &stable_tree_node_list[0];
++static struct rb_root root_stable_tree[2] = {RB_ROOT, RB_ROOT};
++static struct rb_root *root_stable_treep = &root_stable_tree[0];
++static unsigned long stable_tree_index;
++
++/* The hash strength needed to hash a full page */
++#define HASH_STRENGTH_FULL		(PAGE_SIZE / sizeof(u32))
++
++/* The hash strength needed for loop-back hashing */
++#define HASH_STRENGTH_MAX		(HASH_STRENGTH_FULL + 10)
++
++/* The random offsets in a page */
++static u32 *random_nums;
++
++/* The hash strength */
++static unsigned long hash_strength = HASH_STRENGTH_FULL >> 4;
++
++/* The delta value each time the hash strength increases or decreases */
++static unsigned long hash_strength_delta;
++#define HASH_STRENGTH_DELTA_MAX	5
++
++/* The time we have saved due to random_sample_hash */
++static u64 rshash_pos;
++
++/* The time we have wasted due to hash collision */
++static u64 rshash_neg;
++
++struct uksm_benefit {
++	u64 pos;
++	u64 neg;
++	u64 scanned;
++	unsigned long base;
++} benefit;
++
++/*
++ * The relative cost of memcmp, compared to 1 time unit of random sample
++ * hash, this value is tested when ksm module is initialized
++ */
++static unsigned long memcmp_cost;
++
++static unsigned long  rshash_neg_cont_zero;
++static unsigned long  rshash_cont_obscure;
++
++/* The possible states of hash strength adjustment heuristic */
++enum rshash_states {
++		RSHASH_STILL,
++		RSHASH_TRYUP,
++		RSHASH_TRYDOWN,
++		RSHASH_NEW,
++		RSHASH_PRE_STILL,
++};
++
++/* The possible direction we are about to adjust hash strength */
++enum rshash_direct {
++	GO_UP,
++	GO_DOWN,
++	OBSCURE,
++	STILL,
++};
++
++/* random sampling hash state machine */
++static struct {
++	enum rshash_states state;
++	enum rshash_direct pre_direct;
++	u8 below_count;
++	/* Keep a lookup window of size 5, iff above_count/below_count > 3
++	 * in this window we stop trying.
++	 */
++	u8 lookup_window_index;
++	u64 stable_benefit;
++	unsigned long turn_point_down;
++	unsigned long turn_benefit_down;
++	unsigned long turn_point_up;
++	unsigned long turn_benefit_up;
++	unsigned long stable_point;
++} rshash_state;
++
++/*zero page hash table, hash_strength [0 ~ HASH_STRENGTH_MAX]*/
++static u32 *zero_hash_table;
++
++static inline struct node_vma *alloc_node_vma(void)
++{
++	struct node_vma *node_vma;
++	node_vma = kmem_cache_zalloc(node_vma_cache, GFP_KERNEL);
++	if (node_vma) {
++		INIT_HLIST_HEAD(&node_vma->rmap_hlist);
++		INIT_HLIST_NODE(&node_vma->hlist);
++	}
++	return node_vma;
++}
++
++static inline void free_node_vma(struct node_vma *node_vma)
++{
++	kmem_cache_free(node_vma_cache, node_vma);
++}
++
++
++static inline struct vma_slot *alloc_vma_slot(void)
++{
++	struct vma_slot *slot;
++
++	/*
++	 * In case ksm is not initialized by now.
++	 * Oops, we need to consider the call site of uksm_init() in the future.
++	 */
++	if (!vma_slot_cache)
++		return NULL;
++
++	slot = kmem_cache_zalloc(vma_slot_cache, GFP_KERNEL);
++	if (slot) {
++		INIT_LIST_HEAD(&slot->slot_list);
++		INIT_LIST_HEAD(&slot->dedup_list);
++		slot->flags |= UKSM_SLOT_NEED_RERAND;
++	}
++	return slot;
++}
++
++static inline void free_vma_slot(struct vma_slot *vma_slot)
++{
++	kmem_cache_free(vma_slot_cache, vma_slot);
++}
++
++
++
++static inline struct rmap_item *alloc_rmap_item(void)
++{
++	struct rmap_item *rmap_item;
++
++	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
++	if (rmap_item) {
++		/* bug on lowest bit is not clear for flag use */
++		BUG_ON(is_addr(rmap_item));
++	}
++	return rmap_item;
++}
++
++static inline void free_rmap_item(struct rmap_item *rmap_item)
++{
++	rmap_item->slot = NULL;	/* debug safety */
++	kmem_cache_free(rmap_item_cache, rmap_item);
++}
++
++static inline struct stable_node *alloc_stable_node(void)
++{
++	struct stable_node *node;
++	node = kmem_cache_alloc(stable_node_cache, GFP_KERNEL | GFP_ATOMIC);
++	if (!node)
++		return NULL;
++
++	INIT_HLIST_HEAD(&node->hlist);
++	list_add(&node->all_list, &stable_node_list);
++	return node;
++}
++
++static inline void free_stable_node(struct stable_node *stable_node)
++{
++	list_del(&stable_node->all_list);
++	kmem_cache_free(stable_node_cache, stable_node);
++}
++
++static inline struct tree_node *alloc_tree_node(struct list_head *list)
++{
++	struct tree_node *node;
++	node = kmem_cache_zalloc(tree_node_cache, GFP_KERNEL | GFP_ATOMIC);
++	if (!node)
++		return NULL;
++
++	list_add(&node->all_list, list);
++	return node;
++}
++
++static inline void free_tree_node(struct tree_node *node)
++{
++	list_del(&node->all_list);
++	kmem_cache_free(tree_node_cache, node);
++}
++
++static void uksm_drop_anon_vma(struct rmap_item *rmap_item)
++{
++	struct anon_vma *anon_vma = rmap_item->anon_vma;
++
++	put_anon_vma(anon_vma);
++}
++
++
++/**
++ * Remove a stable node from stable_tree, may unlink from its tree_node and
++ * may remove its parent tree_node if no other stable node is pending.
++ *
++ * @stable_node 	The node need to be removed
++ * @unlink_rb 		Will this node be unlinked from the rbtree?
++ * @remove_tree_	node Will its tree_node be removed if empty?
++ */
++static void remove_node_from_stable_tree(struct stable_node *stable_node,
++					 int unlink_rb,  int remove_tree_node)
++{
++	struct node_vma *node_vma;
++	struct rmap_item *rmap_item;
++	struct hlist_node *hlist, *rmap_hlist, *n;
++
++	if (!hlist_empty(&stable_node->hlist)) {
++		hlist_for_each_entry_safe(node_vma, hlist, n,
++					  &stable_node->hlist, hlist) {
++			hlist_for_each_entry(rmap_item, rmap_hlist,
++					     &node_vma->rmap_hlist, hlist) {
++				uksm_pages_sharing--;
++
++				uksm_drop_anon_vma(rmap_item);
++				rmap_item->address &= PAGE_MASK;
++			}
++			free_node_vma(node_vma);
++			cond_resched();
++		}
++
++		/* the last one is counted as shared */
++		uksm_pages_shared--;
++		uksm_pages_sharing++;
++	}
++
++	if (stable_node->tree_node && unlink_rb) {
++		rb_erase(&stable_node->node,
++			 &stable_node->tree_node->sub_root);
++
++		if (RB_EMPTY_ROOT(&stable_node->tree_node->sub_root) &&
++		    remove_tree_node) {
++			rb_erase(&stable_node->tree_node->node,
++				 root_stable_treep);
++			free_tree_node(stable_node->tree_node);
++		} else {
++			stable_node->tree_node->count--;
++		}
++	}
++
++	free_stable_node(stable_node);
++}
++
++
++/*
++ * get_uksm_page: checks if the page indicated by the stable node
++ * is still its ksm page, despite having held no reference to it.
++ * In which case we can trust the content of the page, and it
++ * returns the gotten page; but if the page has now been zapped,
++ * remove the stale node from the stable tree and return NULL.
++ *
++ * You would expect the stable_node to hold a reference to the ksm page.
++ * But if it increments the page's count, swapping out has to wait for
++ * ksmd to come around again before it can free the page, which may take
++ * seconds or even minutes: much too unresponsive.  So instead we use a
++ * "keyhole reference": access to the ksm page from the stable node peeps
++ * out through its keyhole to see if that page still holds the right key,
++ * pointing back to this stable node.  This relies on freeing a PageAnon
++ * page to reset its page->mapping to NULL, and relies on no other use of
++ * a page to put something that might look like our key in page->mapping.
++ *
++ * include/linux/pagemap.h page_cache_get_speculative() is a good reference,
++ * but this is different - made simpler by uksm_thread_mutex being held, but
++ * interesting for assuming that no other use of the struct page could ever
++ * put our expected_mapping into page->mapping (or a field of the union which
++ * coincides with page->mapping).  The RCU calls are not for KSM at all, but
++ * to keep the page_count protocol described with page_cache_get_speculative.
++ *
++ * Note: it is possible that get_uksm_page() will return NULL one moment,
++ * then page the next, if the page is in between page_freeze_refs() and
++ * page_unfreeze_refs(): this shouldn't be a problem anywhere, the page
++ * is on its way to being freed; but it is an anomaly to bear in mind.
++ *
++ * @unlink_rb: 		if the removal of this node will firstly unlink from
++ * its rbtree. stable_node_reinsert will prevent this when restructuring the
++ * node from its old tree.
++ *
++ * @remove_tree_node:	if this is the last one of its tree_node, will the
++ * tree_node be freed ? If we are inserting stable node, this tree_node may
++ * be reused, so don't free it.
++ */
++static struct page *get_uksm_page(struct stable_node *stable_node,
++				 int unlink_rb, int remove_tree_node)
++{
++	struct page *page;
++	void *expected_mapping;
++
++	page = pfn_to_page(stable_node->kpfn);
++	expected_mapping = (void *)stable_node +
++				(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
++	rcu_read_lock();
++	if (page->mapping != expected_mapping)
++		goto stale;
++	if (!get_page_unless_zero(page))
++		goto stale;
++	if (page->mapping != expected_mapping) {
++		put_page(page);
++		goto stale;
++	}
++	rcu_read_unlock();
++	return page;
++stale:
++	rcu_read_unlock();
++	remove_node_from_stable_tree(stable_node, unlink_rb, remove_tree_node);
++
++	return NULL;
++}
++
++/*
++ * Removing rmap_item from stable or unstable tree.
++ * This function will clean the information from the stable/unstable tree.
++ */
++static inline void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
++{
++	if (rmap_item->address & STABLE_FLAG) {
++		struct stable_node *stable_node;
++		struct node_vma *node_vma;
++		struct page *page;
++
++		node_vma = rmap_item->head;
++		stable_node = node_vma->head;
++		page = get_uksm_page(stable_node, 1, 1);
++		if (!page)
++			goto out;
++
++		/*
++		 * page lock is needed because it's racing with
++		 * try_to_unmap_ksm(), etc.
++		 */
++		lock_page(page);
++		hlist_del(&rmap_item->hlist);
++
++		if (hlist_empty(&node_vma->rmap_hlist)) {
++			hlist_del(&node_vma->hlist);
++			free_node_vma(node_vma);
++		}
++		unlock_page(page);
++
++		put_page(page);
++		if (hlist_empty(&stable_node->hlist)) {
++			/* do NOT call remove_node_from_stable_tree() here,
++			 * it's possible for a forked rmap_item not in
++			 * stable tree while the in-tree rmap_items were
++			 * deleted.
++			 */
++			uksm_pages_shared--;
++		} else
++			uksm_pages_sharing--;
++
++
++		uksm_drop_anon_vma(rmap_item);
++	} else if (rmap_item->address & UNSTABLE_FLAG) {
++		if (rmap_item->hash_round == uksm_hash_round) {
++
++			rb_erase(&rmap_item->node,
++				 &rmap_item->tree_node->sub_root);
++			if (RB_EMPTY_ROOT(&rmap_item->tree_node->sub_root)) {
++				rb_erase(&rmap_item->tree_node->node,
++					 &root_unstable_tree);
++
++				free_tree_node(rmap_item->tree_node);
++			} else
++				rmap_item->tree_node->count--;
++		}
++		uksm_pages_unshared--;
++	}
++
++	rmap_item->address &= PAGE_MASK;
++	rmap_item->hash_max = 0;
++
++out:
++	cond_resched();		/* we're called from many long loops */
++}
++
++static inline int slot_in_uksm(struct vma_slot *slot)
++{
++	return list_empty(&slot->slot_list);
++}
++
++/*
++ * Test if the mm is exiting
++ */
++static inline bool uksm_test_exit(struct mm_struct *mm)
++{
++	return atomic_read(&mm->mm_users) == 0;
++}
++
++/**
++ * Need to do two things:
++ * 1. check if slot was moved to del list
++ * 2. make sure the mmap_sem is manipulated under valid vma.
++ *
++ * My concern here is that in some cases, this may make
++ * vma_slot_list_lock() waiters to serialized further by some
++ * sem->wait_lock, can this really be expensive?
++ *
++ *
++ * @return
++ * 0: if successfully locked mmap_sem
++ * -ENOENT: this slot was moved to del list
++ * -EBUSY: vma lock failed
++ */
++static int try_down_read_slot_mmap_sem(struct vma_slot *slot)
++{
++	struct vm_area_struct *vma;
++	struct mm_struct *mm;
++	struct rw_semaphore *sem;
++
++	spin_lock(&vma_slot_list_lock);
++
++	/* the slot_list was removed and inited from new list, when it enters
++	 * uksm_list. If now it's not empty, then it must be moved to del list
++	 */
++	if (!slot_in_uksm(slot)) {
++		spin_unlock(&vma_slot_list_lock);
++		return -ENOENT;
++	}
++
++	BUG_ON(slot->pages != vma_pages(slot->vma));
++	/* Ok, vma still valid */
++	vma = slot->vma;
++	mm = vma->vm_mm;
++	sem = &mm->mmap_sem;
++
++	if (uksm_test_exit(mm)) {
++		spin_unlock(&vma_slot_list_lock);
++		return -ENOENT;
++	}
++
++	if (down_read_trylock(sem)) {
++		spin_unlock(&vma_slot_list_lock);
++		return 0;
++	}
++
++	spin_unlock(&vma_slot_list_lock);
++	return -EBUSY;
++}
++
++static inline unsigned long
++vma_page_address(struct page *page, struct vm_area_struct *vma)
++{
++	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
++	unsigned long address;
++
++	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
++	if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
++		/* page should be within @vma mapping range */
++		return -EFAULT;
++	}
++	return address;
++}
++
++
++/* return 0 on success with the item's mmap_sem locked */
++static inline int get_mergeable_page_lock_mmap(struct rmap_item *item)
++{
++	struct mm_struct *mm;
++	struct vma_slot *slot = item->slot;
++	int err = -EINVAL;
++
++	struct page *page;
++
++	/*
++	 * try_down_read_slot_mmap_sem() returns non-zero if the slot
++	 * has been removed by uksm_remove_vma().
++	 */
++	if (try_down_read_slot_mmap_sem(slot))
++		return -EBUSY;
++
++	mm = slot->vma->vm_mm;
++
++	if (uksm_test_exit(mm))
++		goto failout_up;
++
++	page = item->page;
++	rcu_read_lock();
++	if (!get_page_unless_zero(page)) {
++		rcu_read_unlock();
++		goto failout_up;
++	}
++
++	/* No need to consider huge page here. */
++	if (item->slot->vma->anon_vma != page_anon_vma(page) ||
++	    vma_page_address(page, item->slot->vma) != get_rmap_addr(item)) {
++		/*
++		 * TODO:
++		 * should we release this item becase of its stale page
++		 * mapping?
++		 */
++		put_page(page);
++		rcu_read_unlock();
++		goto failout_up;
++	}
++	rcu_read_unlock();
++	return 0;
++
++failout_up:
++	up_read(&mm->mmap_sem);
++	return err;
++}
++
++/*
++ * What kind of VMA is considered ?
++ */
++static inline int vma_can_enter(struct vm_area_struct *vma)
++{
++	return uksm_flags_can_scan(vma->vm_flags);
++}
++
++/*
++ * Called whenever a fresh new vma is created A new vma_slot.
++ * is created and inserted into a global list Must be called.
++ * after vma is inserted to its mm      		    .
++ */
++void uksm_vma_add_new(struct vm_area_struct *vma)
++{
++	struct vma_slot *slot;
++
++	if (!vma_can_enter(vma)) {
++		vma->uksm_vma_slot = NULL;
++		return;
++	}
++
++	slot = alloc_vma_slot();
++	if (!slot) {
++		vma->uksm_vma_slot = NULL;
++		return;
++	}
++
++	vma->uksm_vma_slot = slot;
++	vma->vm_flags |= VM_MERGEABLE;
++	slot->vma = vma;
++	slot->mm = vma->vm_mm;
++	slot->ctime_j = jiffies;
++	slot->pages = vma_pages(vma);
++	spin_lock(&vma_slot_list_lock);
++	list_add_tail(&slot->slot_list, &vma_slot_new);
++	spin_unlock(&vma_slot_list_lock);
++}
++
++/*
++ * Called after vma is unlinked from its mm
++ */
++void uksm_remove_vma(struct vm_area_struct *vma)
++{
++	struct vma_slot *slot;
++
++	if (!vma->uksm_vma_slot)
++		return;
++
++	slot = vma->uksm_vma_slot;
++	spin_lock(&vma_slot_list_lock);
++	if (slot_in_uksm(slot)) {
++		/**
++		 * This slot has been added by ksmd, so move to the del list
++		 * waiting ksmd to free it.
++		 */
++		list_add_tail(&slot->slot_list, &vma_slot_del);
++	} else {
++		/**
++		 * It's still on new list. It's ok to free slot directly.
++		 */
++		list_del(&slot->slot_list);
++		free_vma_slot(slot);
++	}
++	spin_unlock(&vma_slot_list_lock);
++	vma->uksm_vma_slot = NULL;
++}
++
++/*   32/3 < they < 32/2 */
++#define shiftl	8
++#define shiftr	12
++
++#define HASH_FROM_TO(from, to) 				\
++for (index = from; index < to; index++) {		\
++	pos = random_nums[index];			\
++	hash += key[pos];				\
++	hash += (hash << shiftl);			\
++	hash ^= (hash >> shiftr);			\
++}
++
++
++#define HASH_FROM_DOWN_TO(from, to) 			\
++for (index = from - 1; index >= to; index--) {		\
++	hash ^= (hash >> shiftr);			\
++	hash ^= (hash >> (shiftr*2));			\
++	hash -= (hash << shiftl);			\
++	hash += (hash << (shiftl*2));			\
++	pos = random_nums[index];			\
++	hash -= key[pos];				\
++}
++
++/*
++ * The main random sample hash function.
++ */
++static u32 random_sample_hash(void *addr, u32 hash_strength)
++{
++	u32 hash = 0xdeadbeef;
++	int index, pos, loop = hash_strength;
++	u32 *key = (u32 *)addr;
++
++	if (loop > HASH_STRENGTH_FULL)
++		loop = HASH_STRENGTH_FULL;
++
++	HASH_FROM_TO(0, loop);
++
++	if (hash_strength > HASH_STRENGTH_FULL) {
++		loop = hash_strength - HASH_STRENGTH_FULL;
++		HASH_FROM_TO(0, loop);
++	}
++
++	return hash;
++}
++
++
++/**
++ * It's used when hash strength is adjusted
++ *
++ * @addr The page's virtual address
++ * @from The original hash strength
++ * @to   The hash strength changed to
++ * @hash The hash value generated with "from" hash value
++ *
++ * return the hash value
++ */
++static u32 delta_hash(void *addr, int from, int to, u32 hash)
++{
++	u32 *key = (u32 *)addr;
++	int index, pos; /* make sure they are int type */
++
++	if (to > from) {
++		if (from >= HASH_STRENGTH_FULL) {
++			from -= HASH_STRENGTH_FULL;
++			to -= HASH_STRENGTH_FULL;
++			HASH_FROM_TO(from, to);
++		} else if (to <= HASH_STRENGTH_FULL) {
++			HASH_FROM_TO(from, to);
++		} else {
++			HASH_FROM_TO(from, HASH_STRENGTH_FULL);
++			HASH_FROM_TO(0, to - HASH_STRENGTH_FULL);
++		}
++	} else {
++		if (from <= HASH_STRENGTH_FULL) {
++			HASH_FROM_DOWN_TO(from, to);
++		} else if (to >= HASH_STRENGTH_FULL) {
++			from -= HASH_STRENGTH_FULL;
++			to -= HASH_STRENGTH_FULL;
++			HASH_FROM_DOWN_TO(from, to);
++		} else {
++			HASH_FROM_DOWN_TO(from - HASH_STRENGTH_FULL, 0);
++			HASH_FROM_DOWN_TO(HASH_STRENGTH_FULL, to);
++		}
++	}
++
++	return hash;
++}
++
++
++
++
++#define CAN_OVERFLOW_U64(x, delta) (U64_MAX - (x) < (delta))
++
++/**
++ *
++ * Called when: rshash_pos or rshash_neg is about to overflow or a scan round
++ * has finished.
++ *
++ * return 0 if no page has been scanned since last call, 1 otherwise.
++ */
++static inline int encode_benefit(void)
++{
++	u64 scanned_delta, pos_delta, neg_delta;
++	unsigned long base = benefit.base;
++
++	scanned_delta = uksm_pages_scanned - uksm_pages_scanned_last;
++
++	if (!scanned_delta)
++		return 0;
++
++	scanned_delta >>= base;
++	pos_delta = rshash_pos >> base;
++	neg_delta = rshash_neg >> base;
++
++	if (CAN_OVERFLOW_U64(benefit.pos, pos_delta) ||
++	    CAN_OVERFLOW_U64(benefit.neg, neg_delta) ||
++	    CAN_OVERFLOW_U64(benefit.scanned, scanned_delta)) {
++		benefit.scanned >>= 1;
++		benefit.neg >>= 1;
++		benefit.pos >>= 1;
++		benefit.base++;
++		scanned_delta >>= 1;
++		pos_delta >>= 1;
++		neg_delta >>= 1;
++	}
++
++	benefit.pos += pos_delta;
++	benefit.neg += neg_delta;
++	benefit.scanned += scanned_delta;
++
++	BUG_ON(!benefit.scanned);
++
++	rshash_pos = rshash_neg = 0;
++	uksm_pages_scanned_last = uksm_pages_scanned;
++
++	return 1;
++}
++
++static inline void reset_benefit(void)
++{
++	benefit.pos = 0;
++	benefit.neg = 0;
++	benefit.base = 0;
++	benefit.scanned = 0;
++}
++
++static inline void inc_rshash_pos(unsigned long delta)
++{
++	if (CAN_OVERFLOW_U64(rshash_pos, delta))
++		encode_benefit();
++
++	rshash_pos += delta;
++}
++
++static inline void inc_rshash_neg(unsigned long delta)
++{
++	if (CAN_OVERFLOW_U64(rshash_neg, delta))
++		encode_benefit();
++
++	rshash_neg += delta;
++}
++
++
++static inline u32 page_hash(struct page *page, unsigned long hash_strength,
++			    int cost_accounting)
++{
++	u32 val;
++	unsigned long delta;
++
++	void *addr = kmap_atomic(page, KM_USER0);
++
++	val = random_sample_hash(addr, hash_strength);
++	kunmap_atomic(addr, KM_USER0);
++
++	if (cost_accounting) {
++		if (HASH_STRENGTH_FULL > hash_strength)
++			delta = HASH_STRENGTH_FULL - hash_strength;
++		else
++			delta = 0;
++
++		inc_rshash_pos(delta);
++	}
++
++	return val;
++}
++
++static int memcmp_pages(struct page *page1, struct page *page2,
++			int cost_accounting)
++{
++	char *addr1, *addr2;
++	int ret;
++
++	addr1 = kmap_atomic(page1, KM_USER0);
++	addr2 = kmap_atomic(page2, KM_USER1);
++	ret = memcmp(addr1, addr2, PAGE_SIZE);
++	kunmap_atomic(addr2, KM_USER1);
++	kunmap_atomic(addr1, KM_USER0);
++
++	if (cost_accounting)
++		inc_rshash_neg(memcmp_cost);
++
++	return ret;
++}
++
++static inline int pages_identical(struct page *page1, struct page *page2)
++{
++	return !memcmp_pages(page1, page2, 0);
++}
++
++static inline int is_page_full_zero(struct page *page)
++{
++	char *addr;
++	int ret;
++
++	addr = kmap_atomic(page, KM_USER0);
++	ret = is_full_zero(addr, PAGE_SIZE);
++	kunmap_atomic(addr, KM_USER0);
++
++	return ret;
++}
++
++static int write_protect_page(struct vm_area_struct *vma, struct page *page,
++			      pte_t *orig_pte, pte_t *old_pte)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	unsigned long addr;
++	pte_t *ptep;
++	spinlock_t *ptl;
++	int swapped;
++	int err = -EFAULT;
++
++	addr = page_address_in_vma(page, vma);
++	if (addr == -EFAULT)
++		goto out;
++
++	BUG_ON(PageTransCompound(page));
++	ptep = page_check_address(page, mm, addr, &ptl, 0);
++	if (!ptep)
++		goto out;
++
++	if (old_pte)
++		*old_pte = *ptep;
++
++	if (pte_write(*ptep) || pte_dirty(*ptep)) {
++		pte_t entry;
++
++		swapped = PageSwapCache(page);
++		flush_cache_page(vma, addr, page_to_pfn(page));
++		/*
++		 * Ok this is tricky, when get_user_pages_fast() run it doesnt
++		 * take any lock, therefore the check that we are going to make
++		 * with the pagecount against the mapcount is racey and
++		 * O_DIRECT can happen right after the check.
++		 * So we clear the pte and flush the tlb before the check
++		 * this assure us that no O_DIRECT can happen after the check
++		 * or in the middle of the check.
++		 */
++		entry = ptep_clear_flush(vma, addr, ptep);
++		/*
++		 * Check that no O_DIRECT or similar I/O is in progress on the
++		 * page
++		 */
++		if (page_mapcount(page) + 1 + swapped != page_count(page)) {
++			set_pte_at(mm, addr, ptep, entry);
++			goto out_unlock;
++		}
++		if (pte_dirty(entry))
++			set_page_dirty(page);
++		entry = pte_mkclean(pte_wrprotect(entry));
++		set_pte_at_notify(mm, addr, ptep, entry);
++	}
++	*orig_pte = *ptep;
++	err = 0;
++
++out_unlock:
++	pte_unmap_unlock(ptep, ptl);
++out:
++	return err;
++}
++
++#define MERGE_ERR_PGERR		1 /* the page is invalid cannot continue */
++#define MERGE_ERR_COLLI		2 /* there is a collision */
++#define MERGE_ERR_COLLI_MAX	3 /* collision at the max hash strength */
++#define MERGE_ERR_CHANGED	4 /* the page has changed since last hash */
++
++
++/**
++ * replace_page - replace page in vma by new ksm page
++ * @vma:      vma that holds the pte pointing to page
++ * @page:     the page we are replacing by kpage
++ * @kpage:    the ksm page we replace page by
++ * @orig_pte: the original value of the pte
++ *
++ * Returns 0 on success, MERGE_ERR_PGERR on failure.
++ */
++static int replace_page(struct vm_area_struct *vma, struct page *page,
++			struct page *kpage, pte_t orig_pte)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	pgd_t *pgd;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *ptep;
++	spinlock_t *ptl;
++	pte_t entry;
++
++	unsigned long addr;
++	int err = MERGE_ERR_PGERR;
++
++	addr = page_address_in_vma(page, vma);
++	if (addr == -EFAULT)
++		goto out;
++
++	pgd = pgd_offset(mm, addr);
++	if (!pgd_present(*pgd))
++		goto out;
++
++	pud = pud_offset(pgd, addr);
++	if (!pud_present(*pud))
++		goto out;
++
++	pmd = pmd_offset(pud, addr);
++	BUG_ON(pmd_trans_huge(*pmd));
++	if (!pmd_present(*pmd))
++		goto out;
++
++	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
++	if (!pte_same(*ptep, orig_pte)) {
++		pte_unmap_unlock(ptep, ptl);
++		goto out;
++	}
++
++	flush_cache_page(vma, addr, pte_pfn(*ptep));
++	ptep_clear_flush(vma, addr, ptep);
++	entry = mk_pte(kpage, vma->vm_page_prot);
++
++	/* special treatment is needed for zero_page */
++	if ((page_to_pfn(kpage) == uksm_zero_pfn) ||
++				(page_to_pfn(kpage) == zero_pfn))
++		entry = pte_mkspecial(entry);
++	else {
++		get_page(kpage);
++		page_add_anon_rmap(kpage, vma, addr);
++	}
++
++	set_pte_at_notify(mm, addr, ptep, entry);
++
++	page_remove_rmap(page);
++	if (!page_mapped(page))
++		try_to_free_swap(page);
++	put_page(page);
++
++	pte_unmap_unlock(ptep, ptl);
++	err = 0;
++out:
++	return err;
++}
++
++
++/**
++ *  Fully hash a page with HASH_STRENGTH_MAX return a non-zero hash value. The
++ *  zero hash value at HASH_STRENGTH_MAX is used to indicated that its
++ *  hash_max member has not been calculated.
++ *
++ * @page The page needs to be hashed
++ * @hash_old The hash value calculated with current hash strength
++ *
++ * return the new hash value calculated at HASH_STRENGTH_MAX
++ */
++static inline u32 page_hash_max(struct page *page, u32 hash_old)
++{
++	u32 hash_max = 0;
++	void *addr;
++
++	addr = kmap_atomic(page, KM_USER0);
++	hash_max = delta_hash(addr, hash_strength,
++			      HASH_STRENGTH_MAX, hash_old);
++
++	kunmap_atomic(addr, KM_USER0);
++
++	if (!hash_max)
++		hash_max = 1;
++
++	inc_rshash_neg(HASH_STRENGTH_MAX - hash_strength);
++	return hash_max;
++}
++
++/*
++ * We compare the hash again, to ensure that it is really a hash collision
++ * instead of being caused by page write.
++ */
++static inline int check_collision(struct rmap_item *rmap_item,
++				  u32 hash)
++{
++	int err;
++	struct page *page = rmap_item->page;
++
++	/* if this rmap_item has already been hash_maxed, then the collision
++	 * must appears in the second-level rbtree search. In this case we check
++	 * if its hash_max value has been changed. Otherwise, the collision
++	 * happens in the first-level rbtree search, so we check against it's
++	 * current hash value.
++	 */
++	if (rmap_item->hash_max) {
++		inc_rshash_neg(memcmp_cost);
++		inc_rshash_neg(HASH_STRENGTH_MAX - hash_strength);
++
++		if (rmap_item->hash_max == page_hash_max(page, hash))
++			err = MERGE_ERR_COLLI;
++		else
++			err = MERGE_ERR_CHANGED;
++	} else {
++		inc_rshash_neg(memcmp_cost + hash_strength);
++
++		if (page_hash(page, hash_strength, 0) == hash)
++			err = MERGE_ERR_COLLI;
++		else
++			err = MERGE_ERR_CHANGED;
++	}
++
++	return err;
++}
++
++static struct page *page_trans_compound_anon(struct page *page)
++{
++	if (PageTransCompound(page)) {
++		struct page *head = compound_trans_head(page);
++		/*
++		 * head may actually be splitted and freed from under
++		 * us but it's ok here.
++		 */
++		if (PageAnon(head))
++			return head;
++	}
++	return NULL;
++}
++
++static int page_trans_compound_anon_split(struct page *page)
++{
++	int ret = 0;
++	struct page *transhuge_head = page_trans_compound_anon(page);
++	if (transhuge_head) {
++		/* Get the reference on the head to split it. */
++		if (get_page_unless_zero(transhuge_head)) {
++			/*
++			 * Recheck we got the reference while the head
++			 * was still anonymous.
++			 */
++			if (PageAnon(transhuge_head))
++				ret = split_huge_page(transhuge_head);
++			else
++				/*
++				 * Retry later if split_huge_page run
++				 * from under us.
++				 */
++				ret = 1;
++			put_page(transhuge_head);
++		} else
++			/* Retry later if split_huge_page run from under us. */
++			ret = 1;
++	}
++	return ret;
++}
++
++/**
++ * Try to merge a rmap_item.page with a kpage in stable node. kpage must
++ * already be a ksm page.
++ *
++ * @return 0 if the pages were merged, -EFAULT otherwise.
++ */
++static int try_to_merge_with_uksm_page(struct rmap_item *rmap_item,
++				      struct page *kpage, u32 hash)
++{
++	struct vm_area_struct *vma = rmap_item->slot->vma;
++	struct mm_struct *mm = vma->vm_mm;
++	pte_t orig_pte = __pte(0);
++	int err = MERGE_ERR_PGERR;
++	struct page *page;
++
++	if (uksm_test_exit(mm))
++		goto out;
++
++	page = rmap_item->page;
++
++	if (page == kpage) { /* ksm page forked */
++		err = 0;
++		goto out;
++	}
++
++	if (PageTransCompound(page) && page_trans_compound_anon_split(page))
++		goto out;
++	BUG_ON(PageTransCompound(page));
++
++	if (!PageAnon(page) || !PageKsm(kpage))
++		goto out;
++
++	/*
++	 * We need the page lock to read a stable PageSwapCache in
++	 * write_protect_page().  We use trylock_page() instead of
++	 * lock_page() because we don't want to wait here - we
++	 * prefer to continue scanning and merging different pages,
++	 * then come back to this page when it is unlocked.
++	 */
++	if (!trylock_page(page))
++		goto out;
++	/*
++	 * If this anonymous page is mapped only here, its pte may need
++	 * to be write-protected.  If it's mapped elsewhere, all of its
++	 * ptes are necessarily already write-protected.  But in either
++	 * case, we need to lock and check page_count is not raised.
++	 */
++	if (write_protect_page(vma, page, &orig_pte, NULL) == 0) {
++		if (pages_identical(page, kpage))
++			err = replace_page(vma, page, kpage, orig_pte);
++		else
++			err = check_collision(rmap_item, hash);
++	}
++
++	if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
++		munlock_vma_page(page);
++		if (!PageMlocked(kpage)) {
++			unlock_page(page);
++			lock_page(kpage);
++			mlock_vma_page(kpage);
++			page = kpage;		/* for final unlock */
++		}
++	}
++
++	unlock_page(page);
++out:
++	return err;
++}
++
++
++
++/**
++ * If two pages fail to merge in try_to_merge_two_pages, then we have a chance
++ * to restore a page mapping that has been changed in try_to_merge_two_pages.
++ *
++ * @return 0 on success.
++ */
++static int restore_uksm_page_pte(struct vm_area_struct *vma, unsigned long addr,
++			     pte_t orig_pte, pte_t wprt_pte)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	pgd_t *pgd;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *ptep;
++	spinlock_t *ptl;
++
++	int err = -EFAULT;
++
++	pgd = pgd_offset(mm, addr);
++	if (!pgd_present(*pgd))
++		goto out;
++
++	pud = pud_offset(pgd, addr);
++	if (!pud_present(*pud))
++		goto out;
++
++	pmd = pmd_offset(pud, addr);
++	if (!pmd_present(*pmd))
++		goto out;
++
++	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
++	if (!pte_same(*ptep, wprt_pte)) {
++		/* already copied, let it be */
++		pte_unmap_unlock(ptep, ptl);
++		goto out;
++	}
++
++	/*
++	 * Good boy, still here. When we still get the ksm page, it does not
++	 * return to the free page pool, there is no way that a pte was changed
++	 * to other page and gets back to this page. And remind that ksm page
++	 * do not reuse in do_wp_page(). So it's safe to restore the original
++	 * pte.
++	 */
++	flush_cache_page(vma, addr, pte_pfn(*ptep));
++	ptep_clear_flush(vma, addr, ptep);
++	set_pte_at_notify(mm, addr, ptep, orig_pte);
++
++	pte_unmap_unlock(ptep, ptl);
++	err = 0;
++out:
++	return err;
++}
++
++/**
++ * try_to_merge_two_pages() - take two identical pages and prepare
++ * them to be merged into one page(rmap_item->page)
++ *
++ * @return 0 if we successfully merged two identical pages into
++ *         one ksm page. MERGE_ERR_COLLI if it's only a hash collision
++ *         search in rbtree. MERGE_ERR_CHANGED if rmap_item has been
++ *         changed since it's hashed. MERGE_ERR_PGERR otherwise.
++ *
++ */
++static int try_to_merge_two_pages(struct rmap_item *rmap_item,
++				  struct rmap_item *tree_rmap_item,
++				  u32 hash)
++{
++	pte_t orig_pte1 = __pte(0), orig_pte2 = __pte(0);
++	pte_t wprt_pte1 = __pte(0), wprt_pte2 = __pte(0);
++	struct vm_area_struct *vma1 = rmap_item->slot->vma;
++	struct vm_area_struct *vma2 = tree_rmap_item->slot->vma;
++	struct page *page = rmap_item->page;
++	struct page *tree_page = tree_rmap_item->page;
++	int err = MERGE_ERR_PGERR;
++	struct address_space *saved_mapping;
++
++
++	if (rmap_item->page == tree_rmap_item->page)
++		goto out;
++
++	if (PageTransCompound(page) && page_trans_compound_anon_split(page))
++		goto out;
++	BUG_ON(PageTransCompound(page));
++
++	if (PageTransCompound(tree_page) && page_trans_compound_anon_split(tree_page))
++		goto out;
++	BUG_ON(PageTransCompound(tree_page));
++
++	if (!PageAnon(page) || !PageAnon(tree_page))
++		goto out;
++
++	if (!trylock_page(page))
++		goto out;
++
++
++	if (write_protect_page(vma1, page, &wprt_pte1, &orig_pte1) != 0) {
++		unlock_page(page);
++		goto out;
++	}
++
++	/*
++	 * While we hold page lock, upgrade page from
++	 * PageAnon+anon_vma to PageKsm+NULL stable_node:
++	 * stable_tree_insert() will update stable_node.
++	 */
++	saved_mapping = page->mapping;
++	set_page_stable_node(page, NULL);
++	mark_page_accessed(page);
++	unlock_page(page);
++
++	if (!trylock_page(tree_page))
++		goto restore_out;
++
++	if (write_protect_page(vma2, tree_page, &wprt_pte2, &orig_pte2) != 0) {
++		unlock_page(tree_page);
++		goto restore_out;
++	}
++
++	if (pages_identical(page, tree_page)) {
++		err = replace_page(vma2, tree_page, page, wprt_pte2);
++		if (err) {
++			unlock_page(tree_page);
++			goto restore_out;
++		}
++
++		if ((vma2->vm_flags & VM_LOCKED)) {
++			munlock_vma_page(tree_page);
++			if (!PageMlocked(page)) {
++				unlock_page(tree_page);
++				lock_page(page);
++				mlock_vma_page(page);
++				tree_page = page; /* for final unlock */
++			}
++		}
++
++		unlock_page(tree_page);
++
++		goto out; /* success */
++
++	} else {
++		if (tree_rmap_item->hash_max &&
++		    tree_rmap_item->hash_max == rmap_item->hash_max) {
++			err = MERGE_ERR_COLLI_MAX;
++		} else if (page_hash(page, hash_strength, 0) ==
++		    page_hash(tree_page, hash_strength, 0)) {
++			inc_rshash_neg(memcmp_cost + hash_strength * 2);
++			err = MERGE_ERR_COLLI;
++		} else {
++			err = MERGE_ERR_CHANGED;
++		}
++
++		unlock_page(tree_page);
++	}
++
++restore_out:
++	lock_page(page);
++	if (!restore_uksm_page_pte(vma1, get_rmap_addr(rmap_item),
++				  orig_pte1, wprt_pte1))
++		page->mapping = saved_mapping;
++
++	unlock_page(page);
++out:
++	return err;
++}
++
++static inline int hash_cmp(u32 new_val, u32 node_val)
++{
++	if (new_val > node_val)
++		return 1;
++	else if (new_val < node_val)
++		return -1;
++	else
++		return 0;
++}
++
++static inline u32 rmap_item_hash_max(struct rmap_item *item, u32 hash)
++{
++	u32 hash_max = item->hash_max;
++
++	if (!hash_max) {
++		hash_max = page_hash_max(item->page, hash);
++
++		item->hash_max = hash_max;
++	}
++
++	return hash_max;
++}
++
++
++
++/**
++ * stable_tree_search() - search the stable tree for a page
++ *
++ * @item: 	the rmap_item we are comparing with
++ * @hash: 	the hash value of this item->page already calculated
++ *
++ * @return 	the page we have found, NULL otherwise. The page returned has
++ *         	been gotten.
++ */
++static struct page *stable_tree_search(struct rmap_item *item, u32 hash)
++{
++	struct rb_node *node = root_stable_treep->rb_node;
++	struct tree_node *tree_node;
++	unsigned long hash_max;
++	struct page *page = item->page;
++	struct stable_node *stable_node;
++
++	stable_node = page_stable_node(page);
++	if (stable_node) {
++		/* ksm page forked, that is
++		 * if (PageKsm(page) && !in_stable_tree(rmap_item))
++		 * it's actually gotten once outside.
++		 */
++		get_page(page);
++		return page;
++	}
++
++	while (node) {
++		int cmp;
++
++		tree_node = rb_entry(node, struct tree_node, node);
++
++		cmp = hash_cmp(hash, tree_node->hash);
++
++		if (cmp < 0)
++			node = node->rb_left;
++		else if (cmp > 0)
++			node = node->rb_right;
++		else
++			break;
++	}
++
++	if (!node)
++		return NULL;
++
++	if (tree_node->count == 1) {
++		stable_node = rb_entry(tree_node->sub_root.rb_node,
++				       struct stable_node, node);
++		BUG_ON(!stable_node);
++
++		goto get_page_out;
++	}
++
++	/*
++	 * ok, we have to search the second
++	 * level subtree, hash the page to a
++	 * full strength.
++	 */
++	node = tree_node->sub_root.rb_node;
++	BUG_ON(!node);
++	hash_max = rmap_item_hash_max(item, hash);
++
++	while (node) {
++		int cmp;
++
++		stable_node = rb_entry(node, struct stable_node, node);
++
++		cmp = hash_cmp(hash_max, stable_node->hash_max);
++
++		if (cmp < 0)
++			node = node->rb_left;
++		else if (cmp > 0)
++			node = node->rb_right;
++		else
++			goto get_page_out;
++	}
++
++	return NULL;
++
++get_page_out:
++	page = get_uksm_page(stable_node, 1, 1);
++	return page;
++}
++
++static int try_merge_rmap_item(struct rmap_item *item,
++			       struct page *kpage,
++			       struct page *tree_page)
++{
++	spinlock_t *ptl;
++	pte_t *ptep;
++	unsigned long addr;
++	struct vm_area_struct *vma = item->slot->vma;
++
++	addr = get_rmap_addr(item);
++	ptep = page_check_address(kpage, vma->vm_mm, addr, &ptl, 0);
++	if (!ptep)
++		return 0;
++
++	if (pte_write(*ptep)) {
++		/* has changed, abort! */
++		pte_unmap_unlock(ptep, ptl);
++		return 0;
++	}
++
++	get_page(tree_page);
++	page_add_anon_rmap(tree_page, vma, addr);
++
++	flush_cache_page(vma, addr, pte_pfn(*ptep));
++	ptep_clear_flush(vma, addr, ptep);
++	set_pte_at_notify(vma->vm_mm, addr, ptep,
++			  mk_pte(tree_page, vma->vm_page_prot));
++
++	page_remove_rmap(kpage);
++	put_page(kpage);
++
++	pte_unmap_unlock(ptep, ptl);
++
++	return 1;
++}
++
++/**
++ * try_to_merge_with_stable_page() - when two rmap_items need to be inserted
++ * into stable tree, the page was found to be identical to a stable ksm page,
++ * this is the last chance we can merge them into one.
++ *
++ * @item1:	the rmap_item holding the page which we wanted to insert
++ *       	into stable tree.
++ * @item2:	the other rmap_item we found when unstable tree search
++ * @oldpage:	the page currently mapped by the two rmap_items
++ * @tree_page: 	the page we found identical in stable tree node
++ * @success1:	return if item1 is successfully merged
++ * @success2:	return if item2 is successfully merged
++ */
++static void try_merge_with_stable(struct rmap_item *item1,
++				  struct rmap_item *item2,
++				  struct page **kpage,
++				  struct page *tree_page,
++				  int *success1, int *success2)
++{
++	struct vm_area_struct *vma1 = item1->slot->vma;
++	struct vm_area_struct *vma2 = item2->slot->vma;
++	*success1 = 0;
++	*success2 = 0;
++
++	if (unlikely(*kpage == tree_page)) {
++		/* I don't think this can really happen */
++		printk(KERN_WARNING "UKSM: unexpected condition detected in "
++			"try_merge_with_stable() -- *kpage == tree_page !\n");
++		*success1 = 1;
++		*success2 = 1;
++		return;
++	}
++
++	if (!PageAnon(*kpage) || !PageKsm(*kpage))
++		goto failed;
++
++	if (!trylock_page(tree_page))
++		goto failed;
++
++	/* If the oldpage is still ksm and still pointed
++	 * to in the right place, and still write protected,
++	 * we are confident it's not changed, no need to
++	 * memcmp anymore.
++	 * be ware, we cannot take nested pte locks,
++	 * deadlock risk.
++	 */
++	if (!try_merge_rmap_item(item1, *kpage, tree_page))
++		goto unlock_failed;
++
++	/* ok, then vma2, remind that pte1 already set */
++	if (!try_merge_rmap_item(item2, *kpage, tree_page))
++		goto success_1;
++
++	*success2 = 1;
++success_1:
++	*success1 = 1;
++
++
++	if ((*success1 && vma1->vm_flags & VM_LOCKED) ||
++	    (*success2 && vma2->vm_flags & VM_LOCKED)) {
++		munlock_vma_page(*kpage);
++		if (!PageMlocked(tree_page))
++			mlock_vma_page(tree_page);
++	}
++
++	/*
++	 * We do not need oldpage any more in the caller, so can break the lock
++	 * now.
++	 */
++	unlock_page(*kpage);
++	*kpage = tree_page; /* Get unlocked outside. */
++	return;
++
++unlock_failed:
++	unlock_page(tree_page);
++failed:
++	return;
++}
++
++static inline void stable_node_hash_max(struct stable_node *node,
++					 struct page *page, u32 hash)
++{
++	u32 hash_max = node->hash_max;
++
++	if (!hash_max) {
++		hash_max = page_hash_max(page, hash);
++		node->hash_max = hash_max;
++	}
++}
++
++static inline
++struct stable_node *new_stable_node(struct tree_node *tree_node,
++				    struct page *kpage, u32 hash_max)
++{
++	struct stable_node *new_stable_node;
++
++	new_stable_node = alloc_stable_node();
++	if (!new_stable_node)
++		return NULL;
++
++	new_stable_node->kpfn = page_to_pfn(kpage);
++	new_stable_node->hash_max = hash_max;
++	new_stable_node->tree_node = tree_node;
++	set_page_stable_node(kpage, new_stable_node);
++
++	return new_stable_node;
++}
++
++static inline
++struct stable_node *first_level_insert(struct tree_node *tree_node,
++				       struct rmap_item *rmap_item,
++				       struct rmap_item *tree_rmap_item,
++				       struct page **kpage, u32 hash,
++				       int *success1, int *success2)
++{
++	int cmp;
++	struct page *tree_page;
++	u32 hash_max = 0;
++	struct stable_node *stable_node, *new_snode;
++	struct rb_node *parent = NULL, **new;
++
++	/* this tree node contains no sub-tree yet */
++	stable_node = rb_entry(tree_node->sub_root.rb_node,
++			       struct stable_node, node);
++
++	tree_page = get_uksm_page(stable_node, 1, 0);
++	if (tree_page) {
++		cmp = memcmp_pages(*kpage, tree_page, 1);
++		if (!cmp) {
++			try_merge_with_stable(rmap_item, tree_rmap_item, kpage,
++					      tree_page, success1, success2);
++			put_page(tree_page);
++			if (!*success1 && !*success2)
++				goto failed;
++
++			return stable_node;
++
++		} else {
++			/*
++			 * collision in first level try to create a subtree.
++			 * A new node need to be created.
++			 */
++			put_page(tree_page);
++
++			stable_node_hash_max(stable_node, tree_page,
++					     tree_node->hash);
++			hash_max = rmap_item_hash_max(rmap_item, hash);
++			cmp = hash_cmp(hash_max, stable_node->hash_max);
++
++			parent = &stable_node->node;
++			if (cmp < 0) {
++				new = &parent->rb_left;
++			} else if (cmp > 0) {
++				new = &parent->rb_right;
++			} else {
++				goto failed;
++			}
++		}
++
++	} else {
++		/* the only stable_node deleted, we reuse its tree_node.
++		 */
++		parent = NULL;
++		new = &tree_node->sub_root.rb_node;
++	}
++
++	new_snode = new_stable_node(tree_node, *kpage, hash_max);
++	if (!new_snode)
++		goto failed;
++
++	rb_link_node(&new_snode->node, parent, new);
++	rb_insert_color(&new_snode->node, &tree_node->sub_root);
++	tree_node->count++;
++	*success1 = *success2 = 1;
++
++	return new_snode;
++
++failed:
++	return NULL;
++}
++
++static inline
++struct stable_node *stable_subtree_insert(struct tree_node *tree_node,
++					  struct rmap_item *rmap_item,
++					  struct rmap_item *tree_rmap_item,
++					  struct page **kpage, u32 hash,
++					  int *success1, int *success2)
++{
++	struct page *tree_page;
++	u32 hash_max;
++	struct stable_node *stable_node, *new_snode;
++	struct rb_node *parent, **new;
++
++research:
++	parent = NULL;
++	new = &tree_node->sub_root.rb_node;
++	BUG_ON(!*new);
++	hash_max = rmap_item_hash_max(rmap_item, hash);
++	while (*new) {
++		int cmp;
++
++		stable_node = rb_entry(*new, struct stable_node, node);
++
++		cmp = hash_cmp(hash_max, stable_node->hash_max);
++
++		if (cmp < 0) {
++			parent = *new;
++			new = &parent->rb_left;
++		} else if (cmp > 0) {
++			parent = *new;
++			new = &parent->rb_right;
++		} else {
++			tree_page = get_uksm_page(stable_node, 1, 0);
++			if (tree_page) {
++				cmp = memcmp_pages(*kpage, tree_page, 1);
++				if (!cmp) {
++					try_merge_with_stable(rmap_item,
++						tree_rmap_item, kpage,
++						tree_page, success1, success2);
++
++					put_page(tree_page);
++					if (!*success1 && !*success2)
++						goto failed;
++					/*
++					 * successfully merged with a stable
++					 * node
++					 */
++					return stable_node;
++				} else {
++					put_page(tree_page);
++					goto failed;
++				}
++			} else {
++				/*
++				 * stable node may be deleted,
++				 * and subtree maybe
++				 * restructed, cannot
++				 * continue, research it.
++				 */
++				if (tree_node->count) {
++					goto research;
++				} else {
++					/* reuse the tree node*/
++					parent = NULL;
++					new = &tree_node->sub_root.rb_node;
++				}
++			}
++		}
++	}
++
++	new_snode = new_stable_node(tree_node, *kpage, hash_max);
++	if (!new_snode)
++		goto failed;
++
++	rb_link_node(&new_snode->node, parent, new);
++	rb_insert_color(&new_snode->node, &tree_node->sub_root);
++	tree_node->count++;
++	*success1 = *success2 = 1;
++
++	return new_snode;
++
++failed:
++	return NULL;
++}
++
++
++/**
++ * stable_tree_insert() - try to insert a merged page in unstable tree to
++ * the stable tree
++ *
++ * @kpage:		the page need to be inserted
++ * @hash:		the current hash of this page
++ * @rmap_item:		the rmap_item being scanned
++ * @tree_rmap_item:	the rmap_item found on unstable tree
++ * @success1:		return if rmap_item is merged
++ * @success2:		return if tree_rmap_item is merged
++ *
++ * @return 		the stable_node on stable tree if at least one
++ *      		rmap_item is inserted into stable tree, NULL
++ *      		otherwise.
++ */
++static struct stable_node *
++stable_tree_insert(struct page **kpage, u32 hash,
++		   struct rmap_item *rmap_item,
++		   struct rmap_item *tree_rmap_item,
++		   int *success1, int *success2)
++{
++	struct rb_node **new = &root_stable_treep->rb_node;
++	struct rb_node *parent = NULL;
++	struct stable_node *stable_node;
++	struct tree_node *tree_node;
++	u32 hash_max = 0;
++
++	*success1 = *success2 = 0;
++
++	while (*new) {
++		int cmp;
++
++		tree_node = rb_entry(*new, struct tree_node, node);
++
++		cmp = hash_cmp(hash, tree_node->hash);
++
++		if (cmp < 0) {
++			parent = *new;
++			new = &parent->rb_left;
++		} else if (cmp > 0) {
++			parent = *new;
++			new = &parent->rb_right;
++		} else
++			break;
++	}
++
++	if (*new) {
++		if (tree_node->count == 1) {
++			stable_node = first_level_insert(tree_node, rmap_item,
++						tree_rmap_item, kpage,
++						hash, success1, success2);
++		} else {
++			stable_node = stable_subtree_insert(tree_node,
++					rmap_item, tree_rmap_item, kpage,
++					hash, success1, success2);
++		}
++	} else {
++
++		/* no tree node found */
++		tree_node = alloc_tree_node(stable_tree_node_listp);
++		if (!tree_node) {
++			stable_node = NULL;
++			goto out;
++		}
++
++		stable_node = new_stable_node(tree_node, *kpage, hash_max);
++		if (!stable_node) {
++			free_tree_node(tree_node);
++			goto out;
++		}
++
++		tree_node->hash = hash;
++		rb_link_node(&tree_node->node, parent, new);
++		rb_insert_color(&tree_node->node, root_stable_treep);
++		parent = NULL;
++		new = &tree_node->sub_root.rb_node;
++
++		rb_link_node(&stable_node->node, parent, new);
++		rb_insert_color(&stable_node->node, &tree_node->sub_root);
++		tree_node->count++;
++		*success1 = *success2 = 1;
++	}
++
++out:
++	return stable_node;
++}
++
++
++/**
++ * get_tree_rmap_item_page() - try to get the page and lock the mmap_sem
++ *
++ * @return 	0 on success, -EBUSY if unable to lock the mmap_sem,
++ *         	-EINVAL if the page mapping has been changed.
++ */
++static inline int get_tree_rmap_item_page(struct rmap_item *tree_rmap_item)
++{
++	int err;
++
++	err = get_mergeable_page_lock_mmap(tree_rmap_item);
++
++	if (err == -EINVAL) {
++		/* its page map has been changed, remove it */
++		remove_rmap_item_from_tree(tree_rmap_item);
++	}
++
++	/* The page is gotten and mmap_sem is locked now. */
++	return err;
++}
++
++
++/**
++ * unstable_tree_search_insert() - search an unstable tree rmap_item with the
++ * same hash value. Get its page and trylock the mmap_sem
++ */
++static inline
++struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
++					      u32 hash)
++
++{
++	struct rb_node **new = &root_unstable_tree.rb_node;
++	struct rb_node *parent = NULL;
++	struct tree_node *tree_node;
++	u32 hash_max;
++	struct rmap_item *tree_rmap_item;
++
++	while (*new) {
++		int cmp;
++
++		tree_node = rb_entry(*new, struct tree_node, node);
++
++		cmp = hash_cmp(hash, tree_node->hash);
++
++		if (cmp < 0) {
++			parent = *new;
++			new = &parent->rb_left;
++		} else if (cmp > 0) {
++			parent = *new;
++			new = &parent->rb_right;
++		} else
++			break;
++	}
++
++	if (*new) {
++		/* got the tree_node */
++		if (tree_node->count == 1) {
++			tree_rmap_item = rb_entry(tree_node->sub_root.rb_node,
++						  struct rmap_item, node);
++			BUG_ON(!tree_rmap_item);
++
++			goto get_page_out;
++		}
++
++		/* well, search the collision subtree */
++		new = &tree_node->sub_root.rb_node;
++		BUG_ON(!*new);
++		hash_max = rmap_item_hash_max(rmap_item, hash);
++
++		while (*new) {
++			int cmp;
++
++			tree_rmap_item = rb_entry(*new, struct rmap_item,
++						  node);
++
++			cmp = hash_cmp(hash_max, tree_rmap_item->hash_max);
++			parent = *new;
++			if (cmp < 0)
++				new = &parent->rb_left;
++			else if (cmp > 0)
++				new = &parent->rb_right;
++			else
++				goto get_page_out;
++		}
++	} else {
++		/* alloc a new tree_node */
++		tree_node = alloc_tree_node(&unstable_tree_node_list);
++		if (!tree_node)
++			return NULL;
++
++		tree_node->hash = hash;
++		rb_link_node(&tree_node->node, parent, new);
++		rb_insert_color(&tree_node->node, &root_unstable_tree);
++		parent = NULL;
++		new = &tree_node->sub_root.rb_node;
++	}
++
++	/* did not found even in sub-tree */
++	rmap_item->tree_node = tree_node;
++	rmap_item->address |= UNSTABLE_FLAG;
++	rmap_item->hash_round = uksm_hash_round;
++	rb_link_node(&rmap_item->node, parent, new);
++	rb_insert_color(&rmap_item->node, &tree_node->sub_root);
++
++	uksm_pages_unshared++;
++	return NULL;
++
++get_page_out:
++	if (tree_rmap_item->page == rmap_item->page)
++		return NULL;
++
++	if (get_tree_rmap_item_page(tree_rmap_item))
++		return NULL;
++
++	return tree_rmap_item;
++}
++
++static void hold_anon_vma(struct rmap_item *rmap_item,
++			  struct anon_vma *anon_vma)
++{
++	rmap_item->anon_vma = anon_vma;
++	get_anon_vma(anon_vma);
++}
++
++
++/**
++ * stable_tree_append() - append a rmap_item to a stable node. Deduplication
++ * ratio statistics is done in this function.
++ *
++ */
++static void stable_tree_append(struct rmap_item *rmap_item,
++			       struct stable_node *stable_node, int logdedup)
++{
++	struct node_vma *node_vma = NULL, *new_node_vma;
++	struct hlist_node *hlist = NULL, *cont_p = NULL;
++	unsigned long key = (unsigned long)rmap_item->slot;
++	unsigned long factor = rmap_item->slot->rung->step;
++
++	BUG_ON(!stable_node);
++	rmap_item->address |= STABLE_FLAG;
++
++	if (hlist_empty(&stable_node->hlist)) {
++		uksm_pages_shared++;
++		goto node_vma_new;
++	} else {
++		uksm_pages_sharing++;
++	}
++
++	hlist_for_each_entry(node_vma, hlist, &stable_node->hlist, hlist) {
++		if (node_vma->key >= key)
++			break;
++
++		if (logdedup) {
++			node_vma->slot->pages_bemerged += factor;
++			if (list_empty(&node_vma->slot->dedup_list))
++				list_add(&node_vma->slot->dedup_list,
++					 &vma_slot_dedup);
++		}
++	}
++
++	if (node_vma) {
++		if (node_vma->key == key) {
++			cont_p = hlist->next;
++			goto node_vma_ok;
++		} else if (node_vma->key > key) {
++			cont_p = hlist;
++		}
++	}
++
++node_vma_new:
++	/* no same vma already in node, alloc a new node_vma */
++	new_node_vma = alloc_node_vma();
++	BUG_ON(!new_node_vma);
++	new_node_vma->head = stable_node;
++	new_node_vma->slot = rmap_item->slot;
++
++	if (!node_vma) {
++		hlist_add_head(&new_node_vma->hlist, &stable_node->hlist);
++	} else if (node_vma->key != key) {
++		if (node_vma->key < key)
++			hlist_add_after(&node_vma->hlist, &new_node_vma->hlist);
++		else {
++			hlist_add_before(&new_node_vma->hlist,
++					 &node_vma->hlist);
++		}
++
++	}
++	node_vma = new_node_vma;
++
++node_vma_ok: /* ok, ready to add to the list */
++	rmap_item->head = node_vma;
++	hlist_add_head(&rmap_item->hlist, &node_vma->rmap_hlist);
++	hold_anon_vma(rmap_item, rmap_item->slot->vma->anon_vma);
++	if (logdedup) {
++		rmap_item->slot->pages_merged++;
++		if (cont_p) {
++			hlist_for_each_entry_continue(node_vma,
++						      cont_p, hlist) {
++				node_vma->slot->pages_bemerged += factor;
++				if (list_empty(&node_vma->slot->dedup_list))
++					list_add(&node_vma->slot->dedup_list,
++						 &vma_slot_dedup);
++			}
++		}
++	}
++}
++
++/*
++ * We use break_ksm to break COW on a ksm page: it's a stripped down
++ *
++ *	if (get_user_pages(current, mm, addr, 1, 1, 1, &page, NULL) == 1)
++ *		put_page(page);
++ *
++ * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
++ * in case the application has unmapped and remapped mm,addr meanwhile.
++ * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
++ * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
++ */
++static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
++{
++	struct page *page;
++	int ret = 0;
++
++	do {
++		cond_resched();
++		page = follow_page(vma, addr, FOLL_GET);
++		if (IS_ERR_OR_NULL(page))
++			break;
++		if (PageKsm(page)) {
++			ret = handle_mm_fault(vma->vm_mm, vma, addr,
++					      FAULT_FLAG_WRITE);
++		} else
++			ret = VM_FAULT_WRITE;
++		put_page(page);
++	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM)));
++	/*
++	 * We must loop because handle_mm_fault() may back out if there's
++	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
++	 *
++	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
++	 * COW has been broken, even if the vma does not permit VM_WRITE;
++	 * but note that a concurrent fault might break PageKsm for us.
++	 *
++	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
++	 * backing file, which also invalidates anonymous pages: that's
++	 * okay, that truncation will have unmapped the PageKsm for us.
++	 *
++	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
++	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
++	 * current task has TIF_MEMDIE set, and will be OOM killed on return
++	 * to user; and ksmd, having no mm, would never be chosen for that.
++	 *
++	 * But if the mm is in a limited mem_cgroup, then the fault may fail
++	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
++	 * even ksmd can fail in this way - though it's usually breaking ksm
++	 * just to undo a merge it made a moment before, so unlikely to oom.
++	 *
++	 * That's a pity: we might therefore have more kernel pages allocated
++	 * than we're counting as nodes in the stable tree; but uksm_do_scan
++	 * will retry to break_cow on each pass, so should recover the page
++	 * in due course.  The important thing is to not let VM_MERGEABLE
++	 * be cleared while any such pages might remain in the area.
++	 */
++	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
++}
++
++static void break_cow(struct rmap_item *rmap_item)
++{
++	struct vm_area_struct *vma = rmap_item->slot->vma;
++	struct mm_struct *mm = vma->vm_mm;
++	unsigned long addr = get_rmap_addr(rmap_item);
++
++	if (uksm_test_exit(mm))
++		goto out;
++
++	break_ksm(vma, addr);
++out:
++	return;
++}
++
++/*
++ * Though it's very tempting to unmerge in_stable_tree(rmap_item)s rather
++ * than check every pte of a given vma, the locking doesn't quite work for
++ * that - an rmap_item is assigned to the stable tree after inserting ksm
++ * page and upping mmap_sem.  Nor does it fit with the way we skip dup'ing
++ * rmap_items from parent to child at fork time (so as not to waste time
++ * if exit comes before the next scan reaches it).
++ *
++ * Similarly, although we'd like to remove rmap_items (so updating counts
++ * and freeing memory) when unmerging an area, it's easier to leave that
++ * to the next pass of ksmd - consider, for example, how ksmd might be
++ * in cmp_and_merge_page on one of the rmap_items we would be removing.
++ */
++inline int unmerge_uksm_pages(struct vm_area_struct *vma,
++		      unsigned long start, unsigned long end)
++{
++	unsigned long addr;
++	int err = 0;
++
++	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
++		if (uksm_test_exit(vma->vm_mm))
++			break;
++		if (signal_pending(current))
++			err = -ERESTARTSYS;
++		else
++			err = break_ksm(vma, addr);
++	}
++	return err;
++}
++
++static inline void inc_uksm_pages_scanned(void)
++{
++	u64 delta;
++
++
++	if (uksm_pages_scanned == U64_MAX) {
++		encode_benefit();
++
++		delta = uksm_pages_scanned >> pages_scanned_base;
++
++		if (CAN_OVERFLOW_U64(pages_scanned_stored, delta)) {
++			pages_scanned_stored >>= 1;
++			delta >>= 1;
++			pages_scanned_base++;
++		}
++
++		pages_scanned_stored += delta;
++
++		uksm_pages_scanned = uksm_pages_scanned_last = 0;
++	}
++
++	uksm_pages_scanned++;
++}
++
++static inline int find_zero_page_hash(int strength, u32 hash)
++{
++	return (zero_hash_table[strength] == hash);
++}
++
++static
++int cmp_and_merge_zero_page(struct vm_area_struct *vma, struct page *page)
++{
++	struct page *zero_page = empty_uksm_zero_page;
++	struct mm_struct *mm = vma->vm_mm;
++	pte_t orig_pte = __pte(0);
++	int err = -EFAULT;
++
++	if (uksm_test_exit(mm))
++		goto out;
++
++	if (PageTransCompound(page) && page_trans_compound_anon_split(page))
++		goto out;
++	BUG_ON(PageTransCompound(page));
++
++	if (!PageAnon(page))
++		goto out;
++
++	if (!trylock_page(page))
++		goto out;
++
++	if (write_protect_page(vma, page, &orig_pte, 0) == 0) {
++		if (is_page_full_zero(page))
++			err = replace_page(vma, page, zero_page, orig_pte);
++	}
++
++	unlock_page(page);
++out:
++	return err;
++}
++
++/*
++ * cmp_and_merge_page() - first see if page can be merged into the stable
++ * tree; if not, compare hash to previous and if it's the same, see if page
++ * can be inserted into the unstable tree, or merged with a page already there
++ * and both transferred to the stable tree.
++ *
++ * @page: the page that we are searching identical page to.
++ * @rmap_item: the reverse mapping into the virtual address of this page
++ */
++static void cmp_and_merge_page(struct rmap_item *rmap_item, u32 hash)
++{
++	struct rmap_item *tree_rmap_item;
++	struct page *page;
++	struct page *kpage = NULL;
++	u32 hash_max;
++	int err;
++	unsigned int success1, success2;
++	struct stable_node *snode;
++	int cmp;
++	struct rb_node *parent = NULL, **new;
++
++	remove_rmap_item_from_tree(rmap_item);
++	page = rmap_item->page;
++
++	/* We first start with searching the page inside the stable tree */
++	kpage = stable_tree_search(rmap_item, hash);
++	if (kpage) {
++		err = try_to_merge_with_uksm_page(rmap_item, kpage,
++						 hash);
++		if (!err) {
++			/*
++			 * The page was successfully merged, add
++			 * its rmap_item to the stable tree.
++			 * page lock is needed because it's
++			 * racing with try_to_unmap_ksm(), etc.
++			 */
++			lock_page(kpage);
++			snode = page_stable_node(kpage);
++			stable_tree_append(rmap_item, snode, 1);
++			unlock_page(kpage);
++			put_page(kpage);
++			return; /* success */
++		}
++		put_page(kpage);
++
++		/*
++		 * if it's a collision and it has been search in sub-rbtree
++		 * (hash_max != 0), we want to abort, because if it is
++		 * successfully merged in unstable tree, the collision trends to
++		 * happen again.
++		 */
++		if (err == MERGE_ERR_COLLI && rmap_item->hash_max)
++			return;
++	}
++
++	tree_rmap_item =
++		unstable_tree_search_insert(rmap_item, hash);
++	if (tree_rmap_item) {
++		err = try_to_merge_two_pages(rmap_item, tree_rmap_item, hash);
++		/*
++		 * As soon as we merge this page, we want to remove the
++		 * rmap_item of the page we have merged with from the unstable
++		 * tree, and insert it instead as new node in the stable tree.
++		 */
++		if (!err) {
++			kpage = page;
++			remove_rmap_item_from_tree(tree_rmap_item);
++			lock_page(kpage);
++			snode = stable_tree_insert(&kpage, hash,
++						   rmap_item, tree_rmap_item,
++						   &success1, &success2);
++
++			/*
++			 * Do not log dedup for tree item, it's not counted as
++			 * scanned in this round.
++			 */
++			if (success2)
++				stable_tree_append(tree_rmap_item, snode, 0);
++
++			/*
++			 * The order of these two stable append is important:
++			 * we are scanning rmap_item.
++			 */
++			if (success1)
++				stable_tree_append(rmap_item, snode, 1);
++
++			/*
++			 * The original kpage may be unlocked inside
++			 * stable_tree_insert() already. This page
++			 * should be unlocked before doing
++			 * break_cow().
++			 */
++			unlock_page(kpage);
++
++			if (!success1)
++				break_cow(rmap_item);
++
++			if (!success2)
++				break_cow(tree_rmap_item);
++
++		} else if (err == MERGE_ERR_COLLI) {
++			BUG_ON(tree_rmap_item->tree_node->count > 1);
++
++			rmap_item_hash_max(tree_rmap_item,
++					   tree_rmap_item->tree_node->hash);
++
++			hash_max = rmap_item_hash_max(rmap_item, hash);
++			cmp = hash_cmp(hash_max, tree_rmap_item->hash_max);
++			parent = &tree_rmap_item->node;
++			if (cmp < 0)
++				new = &parent->rb_left;
++			else if (cmp > 0)
++				new = &parent->rb_right;
++			else
++				goto put_up_out;
++
++			rmap_item->tree_node = tree_rmap_item->tree_node;
++			rmap_item->address |= UNSTABLE_FLAG;
++			rmap_item->hash_round = uksm_hash_round;
++			rb_link_node(&rmap_item->node, parent, new);
++			rb_insert_color(&rmap_item->node,
++					&tree_rmap_item->tree_node->sub_root);
++			rmap_item->tree_node->count++;
++		} else {
++			/*
++			 * either one of the page has changed or they collide
++			 * at the max hash, we consider them as ill items.
++			 */
++			remove_rmap_item_from_tree(tree_rmap_item);
++		}
++put_up_out:
++		put_page(tree_rmap_item->page);
++		up_read(&tree_rmap_item->slot->vma->vm_mm->mmap_sem);
++	}
++}
++
++
++
++
++static inline unsigned long get_pool_index(struct vma_slot *slot,
++					   unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = (sizeof(struct rmap_list_entry *) * index) >> PAGE_SHIFT;
++	if (pool_index >= slot->pool_size)
++		BUG();
++	return pool_index;
++}
++
++static inline unsigned long index_page_offset(unsigned long index)
++{
++	return offset_in_page(sizeof(struct rmap_list_entry *) * index);
++}
++
++static inline
++struct rmap_list_entry *get_rmap_list_entry(struct vma_slot *slot,
++					    unsigned long index, int need_alloc)
++{
++	unsigned long pool_index;
++	struct page *page;
++	void *addr;
++
++
++	pool_index = get_pool_index(slot, index);
++	if (!slot->rmap_list_pool[pool_index]) {
++		if (!need_alloc)
++			return NULL;
++
++		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
++		if (!page)
++			return NULL;
++
++		slot->rmap_list_pool[pool_index] = page;
++	}
++
++	addr = kmap(slot->rmap_list_pool[pool_index]);
++	addr += index_page_offset(index);
++
++	return addr;
++}
++
++static inline void put_rmap_list_entry(struct vma_slot *slot,
++				       unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = get_pool_index(slot, index);
++	BUG_ON(!slot->rmap_list_pool[pool_index]);
++	kunmap(slot->rmap_list_pool[pool_index]);
++}
++
++static inline int entry_is_new(struct rmap_list_entry *entry)
++{
++	return !entry->item;
++}
++
++static inline unsigned long get_index_orig_addr(struct vma_slot *slot,
++						unsigned long index)
++{
++	return slot->vma->vm_start + (index << PAGE_SHIFT);
++}
++
++static inline unsigned long get_entry_address(struct rmap_list_entry *entry)
++{
++	unsigned long addr;
++
++	if (is_addr(entry->addr))
++		addr = get_clean_addr(entry->addr);
++	else if (entry->item)
++		addr = get_rmap_addr(entry->item);
++	else
++		BUG();
++
++	return addr;
++}
++
++static inline struct rmap_item *get_entry_item(struct rmap_list_entry *entry)
++{
++	if (is_addr(entry->addr))
++		return NULL;
++
++	return entry->item;
++}
++
++static inline void inc_rmap_list_pool_count(struct vma_slot *slot,
++					    unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = get_pool_index(slot, index);
++	BUG_ON(!slot->rmap_list_pool[pool_index]);
++	slot->pool_counts[pool_index]++;
++}
++
++static inline void dec_rmap_list_pool_count(struct vma_slot *slot,
++					    unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = get_pool_index(slot, index);
++	BUG_ON(!slot->rmap_list_pool[pool_index]);
++	BUG_ON(!slot->pool_counts[pool_index]);
++	slot->pool_counts[pool_index]--;
++}
++
++static inline int entry_has_rmap(struct rmap_list_entry *entry)
++{
++	return !is_addr(entry->addr) && entry->item;
++}
++
++static inline void swap_entries(struct rmap_list_entry *entry1,
++				unsigned long index1,
++				struct rmap_list_entry *entry2,
++				unsigned long index2)
++{
++	struct rmap_list_entry tmp;
++
++	/* swapping two new entries is meaningless */
++	BUG_ON(entry_is_new(entry1) && entry_is_new(entry2));
++
++	tmp = *entry1;
++	*entry1 = *entry2;
++	*entry2 = tmp;
++
++	if (entry_has_rmap(entry1))
++		entry1->item->entry_index = index1;
++
++	if (entry_has_rmap(entry2))
++		entry2->item->entry_index = index2;
++
++	if (entry_has_rmap(entry1) && !entry_has_rmap(entry2)) {
++		inc_rmap_list_pool_count(entry1->item->slot, index1);
++		dec_rmap_list_pool_count(entry1->item->slot, index2);
++	} else if (!entry_has_rmap(entry1) && entry_has_rmap(entry2)) {
++		inc_rmap_list_pool_count(entry2->item->slot, index2);
++		dec_rmap_list_pool_count(entry2->item->slot, index1);
++	}
++}
++
++static inline void free_entry_item(struct rmap_list_entry *entry)
++{
++	unsigned long index;
++	struct rmap_item *item;
++
++	if (!is_addr(entry->addr)) {
++		BUG_ON(!entry->item);
++		item = entry->item;
++		entry->addr = get_rmap_addr(item);
++		set_is_addr(entry->addr);
++		index = item->entry_index;
++		remove_rmap_item_from_tree(item);
++		dec_rmap_list_pool_count(item->slot, index);
++		free_rmap_item(item);
++	}
++}
++
++static inline int pool_entry_boundary(unsigned long index)
++{
++	unsigned long linear_addr;
++
++	linear_addr = sizeof(struct rmap_list_entry *) * index;
++	return index && !offset_in_page(linear_addr);
++}
++
++static inline void try_free_last_pool(struct vma_slot *slot,
++				      unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = get_pool_index(slot, index);
++	if (slot->rmap_list_pool[pool_index] &&
++	    !slot->pool_counts[pool_index]) {
++		__free_page(slot->rmap_list_pool[pool_index]);
++		slot->rmap_list_pool[pool_index] = NULL;
++		slot->flags |= UKSM_SLOT_NEED_SORT;
++	}
++
++}
++
++static inline unsigned long vma_item_index(struct vm_area_struct *vma,
++					   struct rmap_item *item)
++{
++	return (get_rmap_addr(item) - vma->vm_start) >> PAGE_SHIFT;
++}
++
++static int within_same_pool(struct vma_slot *slot,
++			    unsigned long i, unsigned long j)
++{
++	unsigned long pool_i, pool_j;
++
++	pool_i = get_pool_index(slot, i);
++	pool_j = get_pool_index(slot, j);
++
++	return (pool_i == pool_j);
++}
++
++static void sort_rmap_entry_list(struct vma_slot *slot)
++{
++	unsigned long i, j;
++	struct rmap_list_entry *entry, *swap_entry;
++
++	entry = get_rmap_list_entry(slot, 0, 0);
++	for (i = 0; i < slot->pages; ) {
++
++		if (!entry)
++			goto skip_whole_pool;
++
++		if (entry_is_new(entry))
++			goto next_entry;
++
++		if (is_addr(entry->addr)) {
++			entry->addr = 0;
++			goto next_entry;
++		}
++
++		j = vma_item_index(slot->vma, entry->item);
++		if (j == i)
++			goto next_entry;
++
++		if (within_same_pool(slot, i, j))
++			swap_entry = entry + j - i;
++		else
++			swap_entry = get_rmap_list_entry(slot, j, 1);
++
++		swap_entries(entry, i, swap_entry, j);
++		if (!within_same_pool(slot, i, j))
++			put_rmap_list_entry(slot, j);
++		continue;
++
++skip_whole_pool:
++		i += PAGE_SIZE / sizeof(*entry);
++		if (i < slot->pages)
++			entry = get_rmap_list_entry(slot, i, 0);
++		continue;
++
++next_entry:
++		if (i >= slot->pages - 1 ||
++		    !within_same_pool(slot, i, i + 1)) {
++			put_rmap_list_entry(slot, i);
++			if (i + 1 < slot->pages)
++				entry = get_rmap_list_entry(slot, i + 1, 0);
++		} else
++			entry++;
++		i++;
++		continue;
++	}
++
++	/* free empty pool entries which contain no rmap_item */
++	/* CAN be simplied to based on only pool_counts when bug freed !!!!! */
++	for (i = 0; i < slot->pool_size; i++) {
++		unsigned char has_rmap;
++		void *addr;
++
++		if (!slot->rmap_list_pool[i])
++			continue;
++
++		has_rmap = 0;
++		addr = kmap(slot->rmap_list_pool[i]);
++		BUG_ON(!addr);
++		for (j = 0; j < PAGE_SIZE / sizeof(*entry); j++) {
++			entry = (struct rmap_list_entry *)addr + j;
++			if (is_addr(entry->addr))
++				continue;
++			if (!entry->item)
++				continue;
++			has_rmap = 1;
++		}
++		kunmap(slot->rmap_list_pool[i]);
++		if (!has_rmap) {
++			BUG_ON(slot->pool_counts[i]);
++			__free_page(slot->rmap_list_pool[i]);
++			slot->rmap_list_pool[i] = NULL;
++		}
++	}
++
++	slot->flags &= ~UKSM_SLOT_NEED_SORT;
++}
++
++/*
++ * vma_fully_scanned() - if all the pages in this slot have been scanned.
++ */
++static inline int vma_fully_scanned(struct vma_slot *slot)
++{
++	return slot->pages_scanned == slot->pages;
++}
++
++/**
++ * get_next_rmap_item() - Get the next rmap_item in a vma_slot according to
++ * its random permutation. This function is embedded with the random
++ * permutation index management code.
++ */
++static struct rmap_item *get_next_rmap_item(struct vma_slot *slot, u32 *hash)
++{
++	unsigned long rand_range, addr, swap_index, scan_index;
++	struct rmap_item *item = NULL;
++	struct rmap_list_entry *scan_entry, *swap_entry = NULL;
++	struct page *page;
++
++	scan_index = swap_index = slot->pages_scanned % slot->pages;
++
++	if (pool_entry_boundary(scan_index))
++		try_free_last_pool(slot, scan_index - 1);
++
++	if (vma_fully_scanned(slot)) {
++		if (slot->flags & UKSM_SLOT_NEED_SORT)
++			slot->flags |= UKSM_SLOT_NEED_RERAND;
++		else
++			slot->flags &= ~UKSM_SLOT_NEED_RERAND;
++		if (slot->flags & UKSM_SLOT_NEED_SORT)
++			sort_rmap_entry_list(slot);
++	}
++
++	scan_entry = get_rmap_list_entry(slot, scan_index, 1);
++	if (!scan_entry)
++		return NULL;
++
++	if (entry_is_new(scan_entry)) {
++		scan_entry->addr = get_index_orig_addr(slot, scan_index);
++		set_is_addr(scan_entry->addr);
++	}
++
++	if (slot->flags & UKSM_SLOT_NEED_RERAND) {
++		rand_range = slot->pages - scan_index;
++		BUG_ON(!rand_range);
++		swap_index = scan_index + (random32() % rand_range);
++	}
++
++	if (swap_index != scan_index) {
++		swap_entry = get_rmap_list_entry(slot, swap_index, 1);
++		if (entry_is_new(swap_entry)) {
++			swap_entry->addr = get_index_orig_addr(slot,
++							       swap_index);
++			set_is_addr(swap_entry->addr);
++		}
++		swap_entries(scan_entry, scan_index, swap_entry, swap_index);
++	}
++
++	addr = get_entry_address(scan_entry);
++	item = get_entry_item(scan_entry);
++	BUG_ON(addr > slot->vma->vm_end || addr < slot->vma->vm_start);
++
++	page = follow_page(slot->vma, addr, FOLL_GET);
++	if (IS_ERR_OR_NULL(page))
++		goto nopage;
++
++	if (!PageAnon(page) && !page_trans_compound_anon(page))
++		goto putpage;
++
++	/*check is zero_page pfn or uksm_zero_page*/
++	if ((page_to_pfn(page) == zero_pfn)
++			|| (page_to_pfn(page) == uksm_zero_pfn))
++		goto putpage;
++
++	flush_anon_page(slot->vma, page, addr);
++	flush_dcache_page(page);
++
++
++	*hash = page_hash(page, hash_strength, 1);
++	inc_uksm_pages_scanned();
++	/*if the page content all zero, re-map to zero-page*/
++	if (find_zero_page_hash(hash_strength, *hash)) {
++		if (!cmp_and_merge_zero_page(slot->vma, page)) {
++			slot->pages_merged++;
++			__inc_zone_page_state(page, NR_UKSM_ZERO_PAGES);
++			dec_mm_counter(slot->mm, MM_ANONPAGES);
++
++			/* For full-zero pages, no need to create rmap item */
++			goto putpage;
++		} else {
++			inc_rshash_neg(memcmp_cost / 2);
++		}
++	}
++
++	if (!item) {
++		item = alloc_rmap_item();
++		if (item) {
++			/* It has already been zeroed */
++			item->slot = slot;
++			item->address = addr;
++			item->entry_index = scan_index;
++			scan_entry->item = item;
++			inc_rmap_list_pool_count(slot, scan_index);
++		} else
++			goto putpage;
++	}
++
++	BUG_ON(item->slot != slot);
++	/* the page may have changed */
++	item->page = page;
++	put_rmap_list_entry(slot, scan_index);
++	if (swap_entry)
++		put_rmap_list_entry(slot, swap_index);
++	return item;
++
++putpage:
++	put_page(page);
++	page = NULL;
++nopage:
++	/* no page, store addr back and free rmap_item if possible */
++	free_entry_item(scan_entry);
++	put_rmap_list_entry(slot, scan_index);
++	if (swap_entry)
++		put_rmap_list_entry(slot, swap_index);
++	return NULL;
++}
++
++static inline int in_stable_tree(struct rmap_item *rmap_item)
++{
++	return rmap_item->address & STABLE_FLAG;
++}
++
++/**
++ * scan_vma_one_page() - scan the next page in a vma_slot. Called with
++ * mmap_sem locked.
++ */
++static noinline void scan_vma_one_page(struct vma_slot *slot)
++{
++	u32 hash;
++	struct mm_struct *mm;
++	struct rmap_item *rmap_item = NULL;
++	struct vm_area_struct *vma = slot->vma;
++
++	mm = vma->vm_mm;
++	BUG_ON(!mm);
++	BUG_ON(!slot);
++
++	rmap_item = get_next_rmap_item(slot, &hash);
++	if (!rmap_item)
++		goto out1;
++
++	if (PageKsm(rmap_item->page) && in_stable_tree(rmap_item))
++		goto out2;
++
++	cmp_and_merge_page(rmap_item, hash);
++out2:
++	put_page(rmap_item->page);
++out1:
++	slot->pages_scanned++;
++	if (slot->fully_scanned_round != fully_scanned_round)
++		scanned_virtual_pages++;
++
++	if (vma_fully_scanned(slot))
++		slot->fully_scanned_round = fully_scanned_round;
++}
++
++static inline unsigned long rung_get_pages(struct scan_rung *rung)
++{
++	struct slot_tree_node *node;
++
++	if (!rung->vma_root.rnode)
++		return 0;
++
++	node = container_of(rung->vma_root.rnode, struct slot_tree_node, snode);
++
++	return node->size;
++}
++
++#define RUNG_SAMPLED_MIN	3
++
++static inline
++void uksm_calc_rung_step(struct scan_rung *rung,
++			 unsigned long page_time, unsigned long ratio)
++{
++	unsigned long sampled, pages;
++
++	/* will be fully scanned ? */
++	if (!rung->cover_msecs) {
++		rung->step = 1;
++		return;
++	}
++
++	sampled = rung->cover_msecs * (NSEC_PER_MSEC / TIME_RATIO_SCALE)
++		  * ratio / page_time;
++
++	/*
++	 *  Before we finsish a scan round and expensive per-round jobs,
++	 *  we need to have a chance to estimate the per page time. So
++	 *  the sampled number can not be too small.
++	 */
++	if (sampled < RUNG_SAMPLED_MIN)
++		sampled = RUNG_SAMPLED_MIN;
++
++	pages = rung_get_pages(rung);
++	if (likely(pages > sampled))
++		rung->step = pages / sampled;
++	else
++		rung->step = 1;
++}
++
++static inline int step_need_recalc(struct scan_rung *rung)
++{
++	unsigned long pages, stepmax;
++
++	pages = rung_get_pages(rung);
++	stepmax = pages / RUNG_SAMPLED_MIN;
++
++	return pages && (rung->step > pages ||
++			 (stepmax && rung->step > stepmax));
++}
++
++static inline
++void reset_current_scan(struct scan_rung *rung, int finished, int step_recalc)
++{
++	struct vma_slot *slot;
++
++	if (finished)
++		rung->flags |= UKSM_RUNG_ROUND_FINISHED;
++
++	if (step_recalc || step_need_recalc(rung)) {
++		uksm_calc_rung_step(rung, uksm_ema_page_time, rung->cpu_ratio);
++		BUG_ON(step_need_recalc(rung));
++	}
++
++	slot_iter_index = random32() % rung->step;
++	BUG_ON(!rung->vma_root.rnode);
++	slot = sradix_tree_next(&rung->vma_root, NULL, 0, slot_iter);
++	BUG_ON(!slot);
++
++	rung->current_scan = slot;
++	rung->current_offset = slot_iter_index;
++}
++
++static inline struct sradix_tree_root *slot_get_root(struct vma_slot *slot)
++{
++	return &slot->rung->vma_root;
++}
++
++/*
++ * return if resetted.
++ */
++static int advance_current_scan(struct scan_rung *rung)
++{
++	unsigned short n;
++	struct vma_slot *slot, *next = NULL;
++
++	BUG_ON(!rung->vma_root.num);
++
++	slot = rung->current_scan;
++	n = (slot->pages - rung->current_offset) % rung->step;
++	slot_iter_index = rung->step - n;
++	next = sradix_tree_next(&rung->vma_root, slot->snode,
++				slot->sindex, slot_iter);
++
++	if (next) {
++		rung->current_offset = slot_iter_index;
++		rung->current_scan = next;
++		return 0;
++	} else {
++		reset_current_scan(rung, 1, 0);
++		return 1;
++	}
++}
++
++static inline void rung_rm_slot(struct vma_slot *slot)
++{
++	struct scan_rung *rung = slot->rung;
++	struct sradix_tree_root *root;
++
++	if (rung->current_scan == slot)
++		advance_current_scan(rung);
++
++	root = slot_get_root(slot);
++	sradix_tree_delete_from_leaf(root, slot->snode, slot->sindex);
++	slot->snode = NULL;
++	if (step_need_recalc(rung)) {
++		uksm_calc_rung_step(rung, uksm_ema_page_time, rung->cpu_ratio);
++		BUG_ON(step_need_recalc(rung));
++	}
++
++	/* In case advance_current_scan loop back to this slot again */
++	if (rung->vma_root.num && rung->current_scan == slot)
++		reset_current_scan(slot->rung, 1, 0);
++}
++
++static inline void rung_add_new_slots(struct scan_rung *rung,
++			struct vma_slot **slots, unsigned long num)
++{
++	int err;
++	struct vma_slot *slot;
++	unsigned long i;
++	struct sradix_tree_root *root = &rung->vma_root;
++
++	err = sradix_tree_enter(root, (void **)slots, num);
++	BUG_ON(err);
++
++	for (i = 0; i < num; i++) {
++		slot = slots[i];
++		slot->rung = rung;
++		BUG_ON(vma_fully_scanned(slot));
++	}
++
++	if (rung->vma_root.num == num)
++		reset_current_scan(rung, 0, 1);
++}
++
++static inline int rung_add_one_slot(struct scan_rung *rung,
++				     struct vma_slot *slot)
++{
++	int err;
++
++	err = sradix_tree_enter(&rung->vma_root, (void **)&slot, 1);
++	if (err)
++		return err;
++
++	slot->rung = rung;
++	if (rung->vma_root.num == 1)
++		reset_current_scan(rung, 0, 1);
++
++	return 0;
++}
++
++/*
++ * Return true if the slot is deleted from its rung.
++ */
++static inline int vma_rung_enter(struct vma_slot *slot, struct scan_rung *rung)
++{
++	struct scan_rung *old_rung = slot->rung;
++	int err;
++
++	if (old_rung == rung)
++		return 0;
++
++	rung_rm_slot(slot);
++	err = rung_add_one_slot(rung, slot);
++	if (err) {
++		err = rung_add_one_slot(old_rung, slot);
++		WARN_ON(err); /* OOPS, badly OOM, we lost this slot */
++	}
++
++	return 1;
++}
++
++static inline int vma_rung_up(struct vma_slot *slot)
++{
++	struct scan_rung *rung;
++
++	rung = slot->rung;
++	if (slot->rung != &uksm_scan_ladder[SCAN_LADDER_SIZE-1])
++		rung++;
++
++	return vma_rung_enter(slot, rung);
++}
++
++static inline int vma_rung_down(struct vma_slot *slot)
++{
++	struct scan_rung *rung;
++
++	rung = slot->rung;
++	if (slot->rung != &uksm_scan_ladder[0])
++		rung--;
++
++	return vma_rung_enter(slot, rung);
++}
++
++/**
++ * cal_dedup_ratio() - Calculate the deduplication ratio for this slot.
++ */
++static unsigned long cal_dedup_ratio(struct vma_slot *slot)
++{
++	unsigned long ret;
++
++	BUG_ON(slot->pages_scanned == slot->last_scanned);
++
++	ret = slot->pages_merged;
++
++	/* Thrashing area filtering */
++	if (ret && uksm_thrash_threshold) {
++		if (slot->pages_cowed * 100 / slot->pages_merged
++		    > uksm_thrash_threshold) {
++			ret = 0;
++		} else {
++			ret = slot->pages_merged - slot->pages_cowed;
++		}
++	}
++
++	return ret;
++}
++
++/**
++ * cal_dedup_ratio() - Calculate the deduplication ratio for this slot.
++ */
++static unsigned long cal_dedup_ratio_old(struct vma_slot *slot)
++{
++	unsigned long ret;
++	unsigned long pages_scanned;
++
++	pages_scanned = slot->pages_scanned;
++	if (!pages_scanned) {
++		if (uksm_thrash_threshold)
++			return 0;
++		else
++			pages_scanned = slot->pages_scanned;
++	}
++
++	ret = slot->pages_bemerged * 100 / pages_scanned;
++
++	/* Thrashing area filtering */
++	if (ret && uksm_thrash_threshold) {
++		if (slot->pages_cowed * 100 / slot->pages_bemerged
++		    > uksm_thrash_threshold) {
++			ret = 0;
++		} else {
++			ret = slot->pages_bemerged - slot->pages_cowed;
++		}
++	}
++
++	return ret;
++}
++
++/**
++ * stable_node_reinsert() - When the hash_strength has been adjusted, the
++ * stable tree need to be restructured, this is the function re-inserting the
++ * stable node.
++ */
++static inline void stable_node_reinsert(struct stable_node *new_node,
++					struct page *page,
++					struct rb_root *root_treep,
++					struct list_head *tree_node_listp,
++					u32 hash)
++{
++	struct rb_node **new = &root_treep->rb_node;
++	struct rb_node *parent = NULL;
++	struct stable_node *stable_node;
++	struct tree_node *tree_node;
++	struct page *tree_page;
++	int cmp;
++
++	while (*new) {
++		int cmp;
++
++		tree_node = rb_entry(*new, struct tree_node, node);
++
++		cmp = hash_cmp(hash, tree_node->hash);
++
++		if (cmp < 0) {
++			parent = *new;
++			new = &parent->rb_left;
++		} else if (cmp > 0) {
++			parent = *new;
++			new = &parent->rb_right;
++		} else
++			break;
++	}
++
++	if (*new) {
++		/* find a stable tree node with same first level hash value */
++		stable_node_hash_max(new_node, page, hash);
++		if (tree_node->count == 1) {
++			stable_node = rb_entry(tree_node->sub_root.rb_node,
++					       struct stable_node, node);
++			tree_page = get_uksm_page(stable_node, 1, 0);
++			if (tree_page) {
++				stable_node_hash_max(stable_node,
++						      tree_page, hash);
++				put_page(tree_page);
++
++				/* prepare for stable node insertion */
++
++				cmp = hash_cmp(new_node->hash_max,
++						   stable_node->hash_max);
++				parent = &stable_node->node;
++				if (cmp < 0)
++					new = &parent->rb_left;
++				else if (cmp > 0)
++					new = &parent->rb_right;
++				else
++					goto failed;
++
++				goto add_node;
++			} else {
++				/* the only stable_node deleted, the tree node
++				 * was not deleted.
++				 */
++				goto tree_node_reuse;
++			}
++		}
++
++		/* well, search the collision subtree */
++		new = &tree_node->sub_root.rb_node;
++		parent = NULL;
++		BUG_ON(!*new);
++		while (*new) {
++			int cmp;
++
++			stable_node = rb_entry(*new, struct stable_node, node);
++
++			cmp = hash_cmp(new_node->hash_max,
++					   stable_node->hash_max);
++
++			if (cmp < 0) {
++				parent = *new;
++				new = &parent->rb_left;
++			} else if (cmp > 0) {
++				parent = *new;
++				new = &parent->rb_right;
++			} else {
++				/* oh, no, still a collision */
++				goto failed;
++			}
++		}
++
++		goto add_node;
++	}
++
++	/* no tree node found */
++	tree_node = alloc_tree_node(tree_node_listp);
++	if (!tree_node) {
++		printk(KERN_ERR "UKSM: memory allocation error!\n");
++		goto failed;
++	} else {
++		tree_node->hash = hash;
++		rb_link_node(&tree_node->node, parent, new);
++		rb_insert_color(&tree_node->node, root_treep);
++
++tree_node_reuse:
++		/* prepare for stable node insertion */
++		parent = NULL;
++		new = &tree_node->sub_root.rb_node;
++	}
++
++add_node:
++	rb_link_node(&new_node->node, parent, new);
++	rb_insert_color(&new_node->node, &tree_node->sub_root);
++	new_node->tree_node = tree_node;
++	tree_node->count++;
++	return;
++
++failed:
++	/* This can only happen when two nodes have collided
++	 * in two levels.
++	 */
++	new_node->tree_node = NULL;
++	return;
++}
++
++static inline void free_all_tree_nodes(struct list_head *list)
++{
++	struct tree_node *node, *tmp;
++
++	list_for_each_entry_safe(node, tmp, list, all_list) {
++		free_tree_node(node);
++	}
++}
++
++/**
++ * stable_tree_delta_hash() - Delta hash the stable tree from previous hash
++ * strength to the current hash_strength. It re-structures the hole tree.
++ */
++static inline void stable_tree_delta_hash(u32 prev_hash_strength)
++{
++	struct stable_node *node, *tmp;
++	struct rb_root *root_new_treep;
++	struct list_head *new_tree_node_listp;
++
++	stable_tree_index = (stable_tree_index + 1) % 2;
++	root_new_treep = &root_stable_tree[stable_tree_index];
++	new_tree_node_listp = &stable_tree_node_list[stable_tree_index];
++	*root_new_treep = RB_ROOT;
++	BUG_ON(!list_empty(new_tree_node_listp));
++
++	/*
++	 * we need to be safe, the node could be removed by get_uksm_page()
++	 */
++	list_for_each_entry_safe(node, tmp, &stable_node_list, all_list) {
++		void *addr;
++		struct page *node_page;
++		u32 hash;
++
++		/*
++		 * We are completely re-structuring the stable nodes to a new
++		 * stable tree. We don't want to touch the old tree unlinks and
++		 * old tree_nodes. The old tree_nodes will be freed at once.
++		 */
++		node_page = get_uksm_page(node, 0, 0);
++		if (!node_page)
++			continue;
++
++		if (node->tree_node) {
++			hash = node->tree_node->hash;
++
++			addr = kmap_atomic(node_page, KM_USER0);
++
++			hash = delta_hash(addr, prev_hash_strength,
++					  hash_strength, hash);
++			kunmap_atomic(addr, KM_USER0);
++		} else {
++			/*
++			 *it was not inserted to rbtree due to collision in last
++			 *round scan.
++			 */
++			hash = page_hash(node_page, hash_strength, 0);
++		}
++
++		stable_node_reinsert(node, node_page, root_new_treep,
++				     new_tree_node_listp, hash);
++		put_page(node_page);
++	}
++
++	root_stable_treep = root_new_treep;
++	free_all_tree_nodes(stable_tree_node_listp);
++	BUG_ON(!list_empty(stable_tree_node_listp));
++	stable_tree_node_listp = new_tree_node_listp;
++}
++
++static inline void inc_hash_strength(unsigned long delta)
++{
++	hash_strength += 1 << delta;
++	if (hash_strength > HASH_STRENGTH_MAX)
++		hash_strength = HASH_STRENGTH_MAX;
++}
++
++static inline void dec_hash_strength(unsigned long delta)
++{
++	unsigned long change = 1 << delta;
++
++	if (hash_strength <= change + 1)
++		hash_strength = 1;
++	else
++		hash_strength -= change;
++}
++
++static inline void inc_hash_strength_delta(void)
++{
++	hash_strength_delta++;
++	if (hash_strength_delta > HASH_STRENGTH_DELTA_MAX)
++		hash_strength_delta = HASH_STRENGTH_DELTA_MAX;
++}
++
++/*
++static inline unsigned long get_current_neg_ratio(void)
++{
++	if (!rshash_pos || rshash_neg > rshash_pos)
++		return 100;
++
++	return div64_u64(100 * rshash_neg , rshash_pos);
++}
++*/
++
++static inline unsigned long get_current_neg_ratio(void)
++{
++	u64 pos = benefit.pos;
++	u64 neg = benefit.neg;
++
++	if (!neg)
++		return 0;
++
++	if (!pos || neg > pos)
++		return 100;
++
++	if (neg > div64_u64(U64_MAX, 100))
++		pos = div64_u64(pos, 100);
++	else
++		neg *= 100;
++
++	return div64_u64(neg, pos);
++}
++
++static inline unsigned long get_current_benefit(void)
++{
++	u64 pos = benefit.pos;
++	u64 neg = benefit.neg;
++	u64 scanned = benefit.scanned;
++
++	if (neg > pos)
++		return 0;
++
++	return div64_u64((pos - neg), scanned);
++}
++
++static inline int judge_rshash_direction(void)
++{
++	u64 current_neg_ratio, stable_benefit;
++	u64 current_benefit, delta = 0;
++	int ret = STILL;
++
++	/* Try to probe a value after the boot, and in case the system
++	   are still for a long time. */
++	if ((fully_scanned_round & 0xFFULL) == 10) {
++		ret = OBSCURE;
++		goto out;
++	}
++
++	current_neg_ratio = get_current_neg_ratio();
++
++	if (current_neg_ratio == 0) {
++		rshash_neg_cont_zero++;
++		if (rshash_neg_cont_zero > 2)
++			return GO_DOWN;
++		else
++			return STILL;
++	}
++	rshash_neg_cont_zero = 0;
++
++	if (current_neg_ratio > 90) {
++		ret = GO_UP;
++		goto out;
++	}
++
++	current_benefit = get_current_benefit();
++	stable_benefit = rshash_state.stable_benefit;
++
++	if (!stable_benefit) {
++		ret = OBSCURE;
++		goto out;
++	}
++
++	if (current_benefit > stable_benefit)
++		delta = current_benefit - stable_benefit;
++	else if (current_benefit < stable_benefit)
++		delta = stable_benefit - current_benefit;
++
++	delta = div64_u64(100 * delta , stable_benefit);
++
++	if (delta > 50) {
++		rshash_cont_obscure++;
++		if (rshash_cont_obscure > 2)
++			return OBSCURE;
++		else
++			return STILL;
++	}
++
++out:
++	rshash_cont_obscure = 0;
++	return ret;
++}
++
++/**
++ * rshash_adjust() - The main function to control the random sampling state
++ * machine for hash strength adapting.
++ *
++ * return true if hash_strength has changed.
++ */
++static inline int rshash_adjust(void)
++{
++	unsigned long prev_hash_strength = hash_strength;
++
++	if (!encode_benefit())
++		return 0;
++
++	switch (rshash_state.state) {
++	case RSHASH_STILL:
++		switch (judge_rshash_direction()) {
++		case GO_UP:
++			if (rshash_state.pre_direct == GO_DOWN)
++				hash_strength_delta = 0;
++
++			inc_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++			rshash_state.stable_benefit = get_current_benefit();
++			rshash_state.pre_direct = GO_UP;
++			break;
++
++		case GO_DOWN:
++			if (rshash_state.pre_direct == GO_UP)
++				hash_strength_delta = 0;
++
++			dec_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++			rshash_state.stable_benefit = get_current_benefit();
++			rshash_state.pre_direct = GO_DOWN;
++			break;
++
++		case OBSCURE:
++			rshash_state.stable_point = hash_strength;
++			rshash_state.turn_point_down = hash_strength;
++			rshash_state.turn_point_up = hash_strength;
++			rshash_state.turn_benefit_down = get_current_benefit();
++			rshash_state.turn_benefit_up = get_current_benefit();
++			rshash_state.lookup_window_index = 0;
++			rshash_state.state = RSHASH_TRYDOWN;
++			dec_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++			break;
++
++		case STILL:
++			break;
++		default:
++			BUG();
++		}
++		break;
++
++	case RSHASH_TRYDOWN:
++		if (rshash_state.lookup_window_index++ % 5 == 0)
++			rshash_state.below_count = 0;
++
++		if (get_current_benefit() < rshash_state.stable_benefit)
++			rshash_state.below_count++;
++		else if (get_current_benefit() >
++			 rshash_state.turn_benefit_down) {
++			rshash_state.turn_point_down = hash_strength;
++			rshash_state.turn_benefit_down = get_current_benefit();
++		}
++
++		if (rshash_state.below_count >= 3 ||
++		    judge_rshash_direction() == GO_UP ||
++		    hash_strength == 1) {
++			hash_strength = rshash_state.stable_point;
++			hash_strength_delta = 0;
++			inc_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++			rshash_state.lookup_window_index = 0;
++			rshash_state.state = RSHASH_TRYUP;
++			hash_strength_delta = 0;
++		} else {
++			dec_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++		}
++		break;
++
++	case RSHASH_TRYUP:
++		if (rshash_state.lookup_window_index++ % 5 == 0)
++			rshash_state.below_count = 0;
++
++		if (get_current_benefit() < rshash_state.turn_benefit_down)
++			rshash_state.below_count++;
++		else if (get_current_benefit() > rshash_state.turn_benefit_up) {
++			rshash_state.turn_point_up = hash_strength;
++			rshash_state.turn_benefit_up = get_current_benefit();
++		}
++
++		if (rshash_state.below_count >= 3 ||
++		    judge_rshash_direction() == GO_DOWN ||
++		    hash_strength == HASH_STRENGTH_MAX) {
++			hash_strength = rshash_state.turn_benefit_up >
++				rshash_state.turn_benefit_down ?
++				rshash_state.turn_point_up :
++				rshash_state.turn_point_down;
++
++			rshash_state.state = RSHASH_PRE_STILL;
++		} else {
++			inc_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++		}
++
++		break;
++
++	case RSHASH_NEW:
++	case RSHASH_PRE_STILL:
++		rshash_state.stable_benefit = get_current_benefit();
++		rshash_state.state = RSHASH_STILL;
++		hash_strength_delta = 0;
++		break;
++	default:
++		BUG();
++	}
++
++	/* rshash_neg = rshash_pos = 0; */
++	reset_benefit();
++
++	if (prev_hash_strength != hash_strength)
++		stable_tree_delta_hash(prev_hash_strength);
++
++	return prev_hash_strength != hash_strength;
++}
++
++/**
++ * round_update_ladder() - The main function to do update of all the
++ * adjustments whenever a scan round is finished.
++ */
++static noinline void round_update_ladder(void)
++{
++	int i;
++	unsigned long dedup;
++	struct vma_slot *slot, *tmp_slot;
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		uksm_scan_ladder[i].flags &= ~UKSM_RUNG_ROUND_FINISHED;
++	}
++
++	list_for_each_entry_safe(slot, tmp_slot, &vma_slot_dedup, dedup_list) {
++
++		/* slot may be rung_rm_slot() when mm exits */
++		if (slot->snode) {
++			dedup = cal_dedup_ratio_old(slot);
++			if (dedup && dedup >= uksm_abundant_threshold)
++				vma_rung_up(slot);
++		}
++
++		slot->pages_bemerged = 0;
++		slot->pages_cowed = 0;
++
++		list_del_init(&slot->dedup_list);
++	}
++}
++
++static void uksm_del_vma_slot(struct vma_slot *slot)
++{
++	int i, j;
++	struct rmap_list_entry *entry;
++
++	if (slot->snode) {
++		/*
++		 * In case it just failed when entering the rung, it's not
++		 * necessary.
++		 */
++		rung_rm_slot(slot);
++	}
++
++	if (!list_empty(&slot->dedup_list))
++		list_del(&slot->dedup_list);
++
++	if (!slot->rmap_list_pool || !slot->pool_counts) {
++		/* In case it OOMed in uksm_vma_enter() */
++		goto out;
++	}
++
++	for (i = 0; i < slot->pool_size; i++) {
++		void *addr;
++
++		if (!slot->rmap_list_pool[i])
++			continue;
++
++		addr = kmap(slot->rmap_list_pool[i]);
++		for (j = 0; j < PAGE_SIZE / sizeof(*entry); j++) {
++			entry = (struct rmap_list_entry *)addr + j;
++			if (is_addr(entry->addr))
++				continue;
++			if (!entry->item)
++				continue;
++
++			remove_rmap_item_from_tree(entry->item);
++			free_rmap_item(entry->item);
++			slot->pool_counts[i]--;
++		}
++		BUG_ON(slot->pool_counts[i]);
++		kunmap(slot->rmap_list_pool[i]);
++		__free_page(slot->rmap_list_pool[i]);
++	}
++	kfree(slot->rmap_list_pool);
++	kfree(slot->pool_counts);
++
++out:
++	slot->rung = NULL;
++	BUG_ON(uksm_pages_total < slot->pages);
++	if (slot->flags & UKSM_SLOT_IN_UKSM)
++		uksm_pages_total -= slot->pages;
++
++	if (slot->fully_scanned_round == fully_scanned_round)
++		scanned_virtual_pages -= slot->pages;
++	else
++		scanned_virtual_pages -= slot->pages_scanned;
++	free_vma_slot(slot);
++}
++
++
++#define SPIN_LOCK_PERIOD	32
++static struct vma_slot *cleanup_slots[SPIN_LOCK_PERIOD];
++static inline void cleanup_vma_slots(void)
++{
++	struct vma_slot *slot;
++	int i;
++
++	i = 0;
++	spin_lock(&vma_slot_list_lock);
++	while (!list_empty(&vma_slot_del)) {
++		slot = list_entry(vma_slot_del.next,
++				  struct vma_slot, slot_list);
++		list_del(&slot->slot_list);
++		cleanup_slots[i++] = slot;
++		if (i == SPIN_LOCK_PERIOD) {
++			spin_unlock(&vma_slot_list_lock);
++			while (--i >= 0)
++				uksm_del_vma_slot(cleanup_slots[i]);
++			i = 0;
++			spin_lock(&vma_slot_list_lock);
++		}
++	}
++	spin_unlock(&vma_slot_list_lock);
++
++	while (--i >= 0)
++		uksm_del_vma_slot(cleanup_slots[i]);
++}
++
++/*
++*expotional moving average formula
++*/
++static inline unsigned long ema(unsigned long curr, unsigned long last_ema)
++{
++	/*
++	 * For a very high burst, even the ema cannot work well, a false very
++	 * high per-page time estimation can result in feedback in very high
++	 * overhead of context swith and rung update -- this will then lead
++	 * to higher per-paper time, this may not converge.
++	 *
++	 * Instead, we try to approach this value in a binary manner.
++	 */
++	if (curr > last_ema * 10)
++		return last_ema * 2;
++
++	return (EMA_ALPHA * curr + (100 - EMA_ALPHA) * last_ema) / 100;
++}
++
++/*
++ * convert cpu ratio in 1/TIME_RATIO_SCALE configured by user to
++ * nanoseconds based on current uksm_sleep_jiffies.
++ */
++static inline unsigned long cpu_ratio_to_nsec(unsigned int ratio)
++{
++	return NSEC_PER_USEC * jiffies_to_usecs(uksm_sleep_jiffies) /
++		(TIME_RATIO_SCALE - ratio) * ratio;
++}
++
++
++static inline unsigned long rung_real_ratio(int cpu_time_ratio)
++{
++	unsigned long ret;
++
++	BUG_ON(!cpu_time_ratio);
++
++	if (cpu_time_ratio > 0)
++		ret = cpu_time_ratio;
++	else
++		ret = (unsigned long)(-cpu_time_ratio) *
++			uksm_max_cpu_percentage / 100UL;
++
++	return ret ? ret : 1;
++}
++
++static noinline void uksm_calc_scan_pages(void)
++{
++	struct scan_rung *ladder = uksm_scan_ladder;
++	unsigned long sleep_usecs, nsecs;
++	unsigned long ratio;
++	int i;
++	unsigned long per_page;
++
++	if (uksm_ema_page_time > 100000 ||
++	    (((unsigned long) uksm_eval_round & (256UL - 1)) == 0UL))
++		uksm_ema_page_time = UKSM_PAGE_TIME_DEFAULT;
++
++	per_page = uksm_ema_page_time;
++	BUG_ON(!per_page);
++
++	/*
++	 * For every 8 eval round, we try to probe a uksm_sleep_jiffies value
++	 * based on saved user input.
++	 */
++	if (((unsigned long) uksm_eval_round & (8UL - 1)) == 0UL)
++		uksm_sleep_jiffies = uksm_sleep_saved;
++
++	/* We require a rung scan at least 1 page in a period. */
++	nsecs = per_page;
++	ratio = rung_real_ratio(ladder[0].cpu_ratio);
++	if (cpu_ratio_to_nsec(ratio) < nsecs) {
++		sleep_usecs = nsecs * (TIME_RATIO_SCALE - ratio) / ratio
++				/ NSEC_PER_USEC;
++		uksm_sleep_jiffies = usecs_to_jiffies(sleep_usecs) + 1;
++	}
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		ratio = rung_real_ratio(ladder[i].cpu_ratio);
++		ladder[i].pages_to_scan = cpu_ratio_to_nsec(ratio) /
++					per_page;
++		BUG_ON(!ladder[i].pages_to_scan);
++		uksm_calc_rung_step(&ladder[i], per_page, ratio);
++	}
++}
++
++/*
++ * From the scan time of this round (ns) to next expected min sleep time
++ * (ms), be careful of the possible overflows. ratio is taken from
++ * rung_real_ratio()
++ */
++static inline
++unsigned int scan_time_to_sleep(unsigned long long scan_time, unsigned long ratio)
++{
++	scan_time >>= 20; /* to msec level now */
++	BUG_ON(scan_time > (ULONG_MAX / TIME_RATIO_SCALE));
++
++	return (unsigned int) ((unsigned long) scan_time *
++			       (TIME_RATIO_SCALE - ratio) / ratio);
++}
++
++#define __round_mask(x, y) ((__typeof__(x))((y)-1))
++#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
++
++static inline unsigned long vma_pool_size(struct vma_slot *slot)
++{
++	return round_up(sizeof(struct rmap_list_entry) * slot->pages,
++			PAGE_SIZE) >> PAGE_SHIFT;
++}
++
++static void uksm_vma_enter(struct vma_slot **slots, unsigned long num)
++{
++	struct scan_rung *rung;
++	unsigned long pool_size, i;
++	struct vma_slot *slot;
++	int failed;
++
++	rung = &uksm_scan_ladder[0];
++
++	failed = 0;
++	for (i = 0; i < num; i++) {
++		slot = slots[i];
++
++		pool_size = vma_pool_size(slot);
++		slot->rmap_list_pool = kzalloc(sizeof(struct page *) *
++					       pool_size, GFP_KERNEL);
++		if (!slot->rmap_list_pool)
++			break;
++
++		slot->pool_counts = kzalloc(sizeof(unsigned int) * pool_size,
++					    GFP_KERNEL);
++		if (!slot->pool_counts) {
++			kfree(slot->rmap_list_pool);
++			break;
++		}
++
++		slot->pool_size = pool_size;
++		BUG_ON(CAN_OVERFLOW_U64(uksm_pages_total, slot->pages));
++		slot->flags |= UKSM_SLOT_IN_UKSM;
++		uksm_pages_total += slot->pages;
++	}
++
++	if (i)
++		rung_add_new_slots(rung, slots, i);
++
++	return;
++}
++
++static struct vma_slot *batch_slots[SLOT_TREE_NODE_STORE_SIZE];
++
++static void uksm_enter_all_slots(void)
++{
++	struct vma_slot *slot;
++	unsigned long index;
++	struct list_head empty_vma_list;
++	int i;
++
++	i = 0;
++	index = 0;
++	INIT_LIST_HEAD(&empty_vma_list);
++
++	spin_lock(&vma_slot_list_lock);
++	while (!list_empty(&vma_slot_new)) {
++		slot = list_entry(vma_slot_new.next,
++				  struct vma_slot, slot_list);
++
++		if (!slot->vma->anon_vma) {
++			list_move(&slot->slot_list, &empty_vma_list);
++		} else if (vma_can_enter(slot->vma)) {
++			batch_slots[index++] = slot;
++			list_del_init(&slot->slot_list);
++		} else {
++			list_move(&slot->slot_list, &vma_slot_noadd);
++		}
++
++		if (++i == SPIN_LOCK_PERIOD ||
++		    (index && !(index % SLOT_TREE_NODE_STORE_SIZE))) {
++			spin_unlock(&vma_slot_list_lock);
++
++			if (index && !(index % SLOT_TREE_NODE_STORE_SIZE)) {
++				uksm_vma_enter(batch_slots, index);
++				index = 0;
++			}
++			i = 0;
++			cond_resched();
++			spin_lock(&vma_slot_list_lock);
++		}
++	}
++
++	list_splice(&empty_vma_list, &vma_slot_new);
++
++	spin_unlock(&vma_slot_list_lock);
++
++	if (index)
++		uksm_vma_enter(batch_slots, index);
++
++}
++
++static inline int rung_round_finished(struct scan_rung *rung)
++{
++	return rung->flags & UKSM_RUNG_ROUND_FINISHED;
++}
++
++static inline void judge_slot(struct vma_slot *slot)
++{
++	struct scan_rung *rung = slot->rung;
++	unsigned long dedup;
++	int deleted;
++
++	dedup = cal_dedup_ratio(slot);
++	if (vma_fully_scanned(slot) && uksm_thrash_threshold)
++		deleted = vma_rung_enter(slot, &uksm_scan_ladder[0]);
++	else if (dedup && dedup >= uksm_abundant_threshold)
++		deleted = vma_rung_up(slot);
++	else
++		deleted = vma_rung_down(slot);
++
++	slot->pages_merged = 0;
++	slot->pages_cowed = 0;
++
++	if (vma_fully_scanned(slot))
++		slot->pages_scanned = 0;
++
++	slot->last_scanned = slot->pages_scanned;
++
++	/* If its deleted in above, then rung was already advanced. */
++	if (!deleted)
++		advance_current_scan(rung);
++}
++
++
++static inline int hash_round_finished(void)
++{
++	if (scanned_virtual_pages > (uksm_pages_total >> 2)) {
++		scanned_virtual_pages = 0;
++		if (uksm_pages_scanned)
++			fully_scanned_round++;
++
++		return 1;
++	} else {
++		return 0;
++	}
++}
++
++#define UKSM_MMSEM_BATCH	5
++/**
++ * uksm_do_scan()  - the main worker function.
++ */
++static noinline void uksm_do_scan(void)
++{
++	struct vma_slot *slot, *iter;
++	struct mm_struct *busy_mm;
++	unsigned char round_finished, all_rungs_emtpy;
++	int i, err, mmsem_batch;
++	unsigned long pcost;
++	long long delta_exec;
++	unsigned long vpages, max_cpu_ratio;
++	unsigned long long start_time, end_time, scan_time;
++	unsigned int expected_jiffies;
++
++	might_sleep();
++
++	vpages = 0;
++
++	start_time = task_sched_runtime(current);
++	max_cpu_ratio = 0;
++	mmsem_batch = 0;
++
++	for (i = 0; i < SCAN_LADDER_SIZE;) {
++		struct scan_rung *rung = &uksm_scan_ladder[i];
++		unsigned long ratio;
++
++		if (!rung->pages_to_scan) {
++			i++;
++			continue;
++		}
++
++		if (!rung->vma_root.num) {
++			rung->pages_to_scan = 0;
++			i++;
++			continue;
++		}
++
++		ratio = rung_real_ratio(rung->cpu_ratio);
++		if (ratio > max_cpu_ratio)
++			max_cpu_ratio = ratio;
++
++		/*
++		 * Do not consider rung_round_finished() here, just used up the
++		 * rung->pages_to_scan quota.
++		 */
++		while (rung->pages_to_scan && rung->vma_root.num &&
++		       likely(!freezing(current))) {
++			int reset = 0;
++
++			slot = rung->current_scan;
++
++			BUG_ON(vma_fully_scanned(slot));
++
++			if (mmsem_batch) {
++				err = 0;
++			} else {
++				err = try_down_read_slot_mmap_sem(slot);
++			}
++
++			if (err == -ENOENT) {
++rm_slot:
++				rung_rm_slot(slot);
++				continue;
++			}
++
++			busy_mm = slot->mm;
++
++			if (err == -EBUSY) {
++				/* skip other vmas on the same mm */
++				do {
++					reset = advance_current_scan(rung);
++					iter = rung->current_scan;
++					if (iter->vma->vm_mm != busy_mm)
++						break;
++				} while (!reset);
++
++				if (iter->vma->vm_mm != busy_mm) {
++					continue;
++				} else {
++					/* scan round finsished */
++					break;
++				}
++			}
++
++			BUG_ON(!vma_can_enter(slot->vma));
++			if (uksm_test_exit(slot->vma->vm_mm)) {
++				mmsem_batch = 0;
++				up_read(&slot->vma->vm_mm->mmap_sem);
++				goto rm_slot;
++			}
++
++			if (mmsem_batch)
++				mmsem_batch--;
++			else
++				mmsem_batch = UKSM_MMSEM_BATCH;
++
++			/* Ok, we have take the mmap_sem, ready to scan */
++			scan_vma_one_page(slot);
++			rung->pages_to_scan--;
++			vpages++;
++
++			if (rung->current_offset + rung->step > slot->pages - 1
++			    || vma_fully_scanned(slot)) {
++				up_read(&slot->vma->vm_mm->mmap_sem);
++				judge_slot(slot);
++				mmsem_batch = 0;
++			} else {
++				rung->current_offset += rung->step;
++				if (!mmsem_batch)
++					up_read(&slot->vma->vm_mm->mmap_sem);
++			}
++
++			cond_resched();
++		}
++
++		if (mmsem_batch) {
++			up_read(&slot->vma->vm_mm->mmap_sem);
++			mmsem_batch = 0;
++		}
++
++		if (freezing(current))
++			break;
++
++		cond_resched();
++	}
++	end_time = task_sched_runtime(current);
++	delta_exec = end_time - start_time;
++
++	if (freezing(current))
++		return;
++
++	cleanup_vma_slots();
++	uksm_enter_all_slots();
++
++	round_finished = 1;
++	all_rungs_emtpy = 1;
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		struct scan_rung *rung = &uksm_scan_ladder[i];
++
++		if (rung->vma_root.num) {
++			all_rungs_emtpy = 0;
++			if (!rung_round_finished(rung))
++				round_finished = 0;
++		}
++	}
++
++	if (all_rungs_emtpy)
++		round_finished = 0;
++
++	if (round_finished) {
++		round_update_ladder();
++		uksm_eval_round++;
++
++		if (hash_round_finished() && rshash_adjust()) {
++			/* Reset the unstable root iff hash strength changed */
++			uksm_hash_round++;
++			root_unstable_tree = RB_ROOT;
++			free_all_tree_nodes(&unstable_tree_node_list);
++		}
++
++		/*
++		 * A number of pages can hang around indefinitely on per-cpu
++		 * pagevecs, raised page count preventing write_protect_page
++		 * from merging them.  Though it doesn't really matter much,
++		 * it is puzzling to see some stuck in pages_volatile until
++		 * other activity jostles them out, and they also prevented
++		 * LTP's KSM test from succeeding deterministically; so drain
++		 * them here (here rather than on entry to uksm_do_scan(),
++		 * so we don't IPI too often when pages_to_scan is set low).
++		 */
++		lru_add_drain_all();
++	}
++
++
++	if (vpages && delta_exec > 0) {
++		pcost = (unsigned long) delta_exec / vpages;
++		if (likely(uksm_ema_page_time))
++			uksm_ema_page_time = ema(pcost, uksm_ema_page_time);
++		else
++			uksm_ema_page_time = pcost;
++	}
++
++	uksm_calc_scan_pages();
++	uksm_sleep_real = uksm_sleep_jiffies;
++	/* in case of radical cpu bursts, apply the upper bound */
++	end_time = task_sched_runtime(current);
++	if (max_cpu_ratio && end_time > start_time) {
++		scan_time = end_time - start_time;
++		expected_jiffies = msecs_to_jiffies(
++			scan_time_to_sleep(scan_time, max_cpu_ratio));
++
++		if (expected_jiffies > uksm_sleep_real)
++			uksm_sleep_real = expected_jiffies;
++
++		/* We have a 1 second up bound for responsiveness. */
++		if (jiffies_to_msecs(uksm_sleep_real) > MSEC_PER_SEC)
++			uksm_sleep_real = msecs_to_jiffies(1000);
++	}
++
++	return;
++}
++
++static int ksmd_should_run(void)
++{
++	return uksm_run & UKSM_RUN_MERGE;
++}
++
++static int uksm_scan_thread(void *nothing)
++{
++	set_freezable();
++	set_user_nice(current, 5);
++
++	while (!kthread_should_stop()) {
++		mutex_lock(&uksm_thread_mutex);
++		if (ksmd_should_run()) {
++			uksm_do_scan();
++		}
++		mutex_unlock(&uksm_thread_mutex);
++
++		try_to_freeze();
++
++		if (ksmd_should_run()) {
++			schedule_timeout_interruptible(uksm_sleep_real);
++			uksm_sleep_times++;
++		} else {
++			wait_event_freezable(uksm_thread_wait,
++				ksmd_should_run() || kthread_should_stop());
++		}
++	}
++	return 0;
++}
++
++int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
++			unsigned long *vm_flags)
++{
++	struct stable_node *stable_node;
++	struct node_vma *node_vma;
++	struct rmap_item *rmap_item;
++	struct hlist_node *hlist, *rmap_hlist;
++	unsigned int mapcount = page_mapcount(page);
++	int referenced = 0;
++	int search_new_forks = 0;
++	unsigned long address;
++
++	VM_BUG_ON(!PageKsm(page));
++	VM_BUG_ON(!PageLocked(page));
++
++	stable_node = page_stable_node(page);
++	if (!stable_node)
++		return 0;
++
++
++again:
++	hlist_for_each_entry(node_vma, hlist, &stable_node->hlist, hlist) {
++		hlist_for_each_entry(rmap_item, rmap_hlist,
++				     &node_vma->rmap_hlist, hlist) {
++			struct anon_vma *anon_vma = rmap_item->anon_vma;
++			struct anon_vma_chain *vmac;
++			struct vm_area_struct *vma;
++
++			anon_vma_lock(anon_vma);
++			list_for_each_entry(vmac, &anon_vma->head,
++					    same_anon_vma) {
++				vma = vmac->vma;
++				address = get_rmap_addr(rmap_item);
++
++				if (address < vma->vm_start ||
++				    address >= vma->vm_end)
++					continue;
++				/*
++				 * Initially we examine only the vma which
++				 * covers this rmap_item; but later, if there
++				 * is still work to do, we examine covering
++				 * vmas in other mms: in case they were forked
++				 * from the original since ksmd passed.
++				 */
++				if ((rmap_item->slot->vma == vma) ==
++				    search_new_forks)
++					continue;
++
++				if (memcg &&
++				    !mm_match_cgroup(vma->vm_mm, memcg))
++					continue;
++
++				referenced +=
++					page_referenced_one(page, vma,
++						address, &mapcount, vm_flags);
++				if (!search_new_forks || !mapcount)
++					break;
++			}
++
++			anon_vma_unlock(anon_vma);
++			if (!mapcount)
++				goto out;
++		}
++	}
++	if (!search_new_forks++)
++		goto again;
++out:
++	return referenced;
++}
++
++int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
++{
++	struct stable_node *stable_node;
++	struct node_vma *node_vma;
++	struct hlist_node *hlist, *rmap_hlist;
++	struct rmap_item *rmap_item;
++	int ret = SWAP_AGAIN;
++	int search_new_forks = 0;
++	unsigned long address;
++
++	VM_BUG_ON(!PageKsm(page));
++	VM_BUG_ON(!PageLocked(page));
++
++	stable_node = page_stable_node(page);
++	if (!stable_node)
++		return SWAP_FAIL;
++again:
++	hlist_for_each_entry(node_vma, hlist, &stable_node->hlist, hlist) {
++		hlist_for_each_entry(rmap_item, rmap_hlist,
++				     &node_vma->rmap_hlist, hlist) {
++			struct anon_vma *anon_vma = rmap_item->anon_vma;
++			struct anon_vma_chain *vmac;
++			struct vm_area_struct *vma;
++
++			anon_vma_lock(anon_vma);
++			list_for_each_entry(vmac, &anon_vma->head,
++					    same_anon_vma) {
++				vma = vmac->vma;
++				address = get_rmap_addr(rmap_item);
++
++				if (address < vma->vm_start ||
++				    address >= vma->vm_end)
++					continue;
++				/*
++				 * Initially we examine only the vma which
++				 * covers this rmap_item; but later, if there
++				 * is still work to do, we examine covering
++				 * vmas in other mms: in case they were forked
++				 * from the original since ksmd passed.
++				 */
++				if ((rmap_item->slot->vma == vma) ==
++				    search_new_forks)
++					continue;
++
++				ret = try_to_unmap_one(page, vma,
++						       address, flags);
++				if (ret != SWAP_AGAIN || !page_mapped(page)) {
++					anon_vma_unlock(anon_vma);
++					goto out;
++				}
++			}
++			anon_vma_unlock(anon_vma);
++		}
++	}
++	if (!search_new_forks++)
++		goto again;
++out:
++	return ret;
++}
++
++#ifdef CONFIG_MIGRATION
++int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
++		  struct vm_area_struct *, unsigned long, void *), void *arg)
++{
++	struct stable_node *stable_node;
++	struct node_vma *node_vma;
++	struct hlist_node *hlist, *rmap_hlist;
++	struct rmap_item *rmap_item;
++	int ret = SWAP_AGAIN;
++	int search_new_forks = 0;
++	unsigned long address;
++
++	VM_BUG_ON(!PageKsm(page));
++	VM_BUG_ON(!PageLocked(page));
++
++	stable_node = page_stable_node(page);
++	if (!stable_node)
++		return ret;
++again:
++	hlist_for_each_entry(node_vma, hlist, &stable_node->hlist, hlist) {
++		hlist_for_each_entry(rmap_item, rmap_hlist,
++				     &node_vma->rmap_hlist, hlist) {
++			struct anon_vma *anon_vma = rmap_item->anon_vma;
++			struct anon_vma_chain *vmac;
++			struct vm_area_struct *vma;
++
++			anon_vma_lock(anon_vma);
++			list_for_each_entry(vmac, &anon_vma->head,
++					    same_anon_vma) {
++				vma = vmac->vma;
++				address = get_rmap_addr(rmap_item);
++
++				if (address < vma->vm_start ||
++				    address >= vma->vm_end)
++					continue;
++
++				if ((rmap_item->slot->vma == vma) ==
++				    search_new_forks)
++					continue;
++
++				ret = rmap_one(page, vma, address, arg);
++				if (ret != SWAP_AGAIN) {
++					anon_vma_unlock(anon_vma);
++					goto out;
++				}
++			}
++			anon_vma_unlock(anon_vma);
++		}
++	}
++	if (!search_new_forks++)
++		goto again;
++out:
++	return ret;
++}
++
++/* Common ksm interface but may be specific to uksm */
++void ksm_migrate_page(struct page *newpage, struct page *oldpage)
++{
++	struct stable_node *stable_node;
++
++	VM_BUG_ON(!PageLocked(oldpage));
++	VM_BUG_ON(!PageLocked(newpage));
++	VM_BUG_ON(newpage->mapping != oldpage->mapping);
++
++	stable_node = page_stable_node(newpage);
++	if (stable_node) {
++		VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
++		stable_node->kpfn = page_to_pfn(newpage);
++	}
++}
++#endif /* CONFIG_MIGRATION */
++
++#ifdef CONFIG_MEMORY_HOTREMOVE
++static struct stable_node *uksm_check_stable_tree(unsigned long start_pfn,
++						 unsigned long end_pfn)
++{
++	struct rb_node *node;
++
++	for (node = rb_first(root_stable_treep); node; node = rb_next(node)) {
++		struct stable_node *stable_node;
++
++		stable_node = rb_entry(node, struct stable_node, node);
++		if (stable_node->kpfn >= start_pfn &&
++		    stable_node->kpfn < end_pfn)
++			return stable_node;
++	}
++	return NULL;
++}
++
++static int uksm_memory_callback(struct notifier_block *self,
++			       unsigned long action, void *arg)
++{
++	struct memory_notify *mn = arg;
++	struct stable_node *stable_node;
++
++	switch (action) {
++	case MEM_GOING_OFFLINE:
++		/*
++		 * Keep it very simple for now: just lock out ksmd and
++		 * MADV_UNMERGEABLE while any memory is going offline.
++		 * mutex_lock_nested() is necessary because lockdep was alarmed
++		 * that here we take uksm_thread_mutex inside notifier chain
++		 * mutex, and later take notifier chain mutex inside
++		 * uksm_thread_mutex to unlock it.   But that's safe because both
++		 * are inside mem_hotplug_mutex.
++		 */
++		mutex_lock_nested(&uksm_thread_mutex, SINGLE_DEPTH_NESTING);
++		break;
++
++	case MEM_OFFLINE:
++		/*
++		 * Most of the work is done by page migration; but there might
++		 * be a few stable_nodes left over, still pointing to struct
++		 * pages which have been offlined: prune those from the tree.
++		 */
++		while ((stable_node = uksm_check_stable_tree(mn->start_pfn,
++					mn->start_pfn + mn->nr_pages)) != NULL)
++			remove_node_from_stable_tree(stable_node, 1, 1);
++		/* fallthrough */
++
++	case MEM_CANCEL_OFFLINE:
++		mutex_unlock(&uksm_thread_mutex);
++		break;
++	}
++	return NOTIFY_OK;
++}
++#endif /* CONFIG_MEMORY_HOTREMOVE */
++
++#ifdef CONFIG_SYSFS
++/*
++ * This all compiles without CONFIG_SYSFS, but is a waste of space.
++ */
++
++#define UKSM_ATTR_RO(_name) \
++	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
++#define UKSM_ATTR(_name) \
++	static struct kobj_attribute _name##_attr = \
++		__ATTR(_name, 0644, _name##_show, _name##_store)
++
++static ssize_t max_cpu_percentage_show(struct kobject *kobj,
++				    struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", uksm_max_cpu_percentage);
++}
++
++static ssize_t max_cpu_percentage_store(struct kobject *kobj,
++				     struct kobj_attribute *attr,
++				     const char *buf, size_t count)
++{
++	unsigned long max_cpu_percentage;
++	int err;
++
++	err = strict_strtoul(buf, 10, &max_cpu_percentage);
++	if (err || max_cpu_percentage > 100)
++		return -EINVAL;
++
++	if (max_cpu_percentage == 100)
++		max_cpu_percentage = 99;
++	else if (max_cpu_percentage < 10)
++		max_cpu_percentage = 10;
++
++	uksm_max_cpu_percentage = max_cpu_percentage;
++
++	return count;
++}
++UKSM_ATTR(max_cpu_percentage);
++
++static ssize_t sleep_millisecs_show(struct kobject *kobj,
++				    struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", jiffies_to_msecs(uksm_sleep_jiffies));
++}
++
++static ssize_t sleep_millisecs_store(struct kobject *kobj,
++				     struct kobj_attribute *attr,
++				     const char *buf, size_t count)
++{
++	unsigned long msecs;
++	int err;
++
++	err = strict_strtoul(buf, 10, &msecs);
++	if (err || msecs > MSEC_PER_SEC)
++		return -EINVAL;
++
++	uksm_sleep_jiffies = msecs_to_jiffies(msecs);
++	uksm_sleep_saved = uksm_sleep_jiffies;
++
++	return count;
++}
++UKSM_ATTR(sleep_millisecs);
++
++
++static ssize_t cpu_governor_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	int n = sizeof(uksm_cpu_governor_str) / sizeof(char *);
++	int i;
++
++	buf[0] = '\0';
++	for (i = 0; i < n ; i++) {
++		if (uksm_cpu_governor == i)
++			strcat(buf, "[");
++
++		strcat(buf, uksm_cpu_governor_str[i]);
++
++		if (uksm_cpu_governor == i)
++			strcat(buf, "]");
++
++		strcat(buf, " ");
++	}
++	strcat(buf, "\n");
++
++	return strlen(buf);
++}
++
++static inline void init_performance_values(void)
++{
++	int i;
++	struct scan_rung *rung;
++	struct uksm_cpu_preset_s *preset = uksm_cpu_preset + uksm_cpu_governor;
++
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = uksm_scan_ladder + i;
++		rung->cpu_ratio = preset->cpu_ratio[i];
++		rung->cover_msecs = preset->cover_msecs[i];
++	}
++
++	uksm_max_cpu_percentage = preset->max_cpu;
++}
++
++static ssize_t cpu_governor_store(struct kobject *kobj,
++				   struct kobj_attribute *attr,
++				   const char *buf, size_t count)
++{
++	int n = sizeof(uksm_cpu_governor_str) / sizeof(char *);
++
++	for (n--; n >=0 ; n--) {
++		if (!strncmp(buf, uksm_cpu_governor_str[n],
++			     strlen(uksm_cpu_governor_str[n])))
++			break;
++	}
++
++	if (n < 0)
++		return -EINVAL;
++	else
++		uksm_cpu_governor = n;
++
++	init_performance_values();
++
++	return count;
++}
++UKSM_ATTR(cpu_governor);
++
++static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
++			char *buf)
++{
++	return sprintf(buf, "%u\n", uksm_run);
++}
++
++static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
++			 const char *buf, size_t count)
++{
++	int err;
++	unsigned long flags;
++
++	err = strict_strtoul(buf, 10, &flags);
++	if (err || flags > UINT_MAX)
++		return -EINVAL;
++	if (flags > UKSM_RUN_MERGE)
++		return -EINVAL;
++
++	mutex_lock(&uksm_thread_mutex);
++	if (uksm_run != flags) {
++		uksm_run = flags;
++	}
++	mutex_unlock(&uksm_thread_mutex);
++
++	if (flags & UKSM_RUN_MERGE)
++		wake_up_interruptible(&uksm_thread_wait);
++
++	return count;
++}
++UKSM_ATTR(run);
++
++static ssize_t abundant_threshold_show(struct kobject *kobj,
++				     struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", uksm_abundant_threshold);
++}
++
++static ssize_t abundant_threshold_store(struct kobject *kobj,
++				      struct kobj_attribute *attr,
++				      const char *buf, size_t count)
++{
++	int err;
++	unsigned long flags;
++
++	err = strict_strtoul(buf, 10, &flags);
++	if (err || flags > 99)
++		return -EINVAL;
++
++	uksm_abundant_threshold = flags;
++
++	return count;
++}
++UKSM_ATTR(abundant_threshold);
++
++static ssize_t thrash_threshold_show(struct kobject *kobj,
++				     struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", uksm_thrash_threshold);
++}
++
++static ssize_t thrash_threshold_store(struct kobject *kobj,
++				      struct kobj_attribute *attr,
++				      const char *buf, size_t count)
++{
++	int err;
++	unsigned long flags;
++
++	err = strict_strtoul(buf, 10, &flags);
++	if (err || flags > 99)
++		return -EINVAL;
++
++	uksm_thrash_threshold = flags;
++
++	return count;
++}
++UKSM_ATTR(thrash_threshold);
++
++static ssize_t cpu_ratios_show(struct kobject *kobj,
++			       struct kobj_attribute *attr, char *buf)
++{
++	int i, size;
++	struct scan_rung *rung;
++	char *p = buf;
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = &uksm_scan_ladder[i];
++
++		if (rung->cpu_ratio > 0)
++			size = sprintf(p, "%d ", rung->cpu_ratio);
++		else
++			size = sprintf(p, "MAX/%d ",
++					TIME_RATIO_SCALE / -rung->cpu_ratio);
++
++		p += size;
++	}
++
++	*p++ = '\n';
++	*p = '\0';
++
++	return p - buf;
++}
++
++static ssize_t cpu_ratios_store(struct kobject *kobj,
++				      struct kobj_attribute *attr,
++				      const char *buf, size_t count)
++{
++	int i, cpuratios[SCAN_LADDER_SIZE], err;
++	unsigned long value;
++	struct scan_rung *rung;
++	char *p, *end = NULL;
++
++	p = kzalloc(count, GFP_KERNEL);
++	if (!p)
++		return -ENOMEM;
++
++	memcpy(p, buf, count);
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		if (i != SCAN_LADDER_SIZE -1) {
++			end = strchr(p, ' ');
++			if (!end)
++				return -EINVAL;
++
++			*end = '\0';
++		}
++
++		if (strstr(p, "MAX/")) {
++			p = strchr(p, '/') + 1;
++			err = strict_strtoul(p, 10, &value);
++			if (err || value > TIME_RATIO_SCALE || !value)
++				return -EINVAL;
++
++			cpuratios[i] = - (int) (TIME_RATIO_SCALE / value);
++		} else {
++			err = strict_strtoul(p, 10, &value);
++			if (err || value > TIME_RATIO_SCALE || !value)
++				return -EINVAL;
++
++			cpuratios[i] = value;
++		}
++
++		p = end + 1;
++	}
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = &uksm_scan_ladder[i];
++
++		rung->cpu_ratio = cpuratios[i];
++	}
++
++	return count;
++}
++UKSM_ATTR(cpu_ratios);
++
++static ssize_t eval_intervals_show(struct kobject *kobj,
++			       struct kobj_attribute *attr, char *buf)
++{
++	int i, size;
++	struct scan_rung *rung;
++	char *p = buf;
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = &uksm_scan_ladder[i];
++		size = sprintf(p, "%u ", rung->cover_msecs);
++		p += size;
++	}
++
++	*p++ = '\n';
++	*p = '\0';
++
++	return p - buf;
++}
++
++static ssize_t eval_intervals_store(struct kobject *kobj,
++				      struct kobj_attribute *attr,
++				      const char *buf, size_t count)
++{
++	int i, err;
++	unsigned long values[SCAN_LADDER_SIZE];
++	struct scan_rung *rung;
++	char *p, *end = NULL;
++
++	p = kzalloc(count, GFP_KERNEL);
++	if (!p)
++		return -ENOMEM;
++
++	memcpy(p, buf, count);
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		if (i != SCAN_LADDER_SIZE -1) {
++			end = strchr(p, ' ');
++			if (!end)
++				return -EINVAL;
++
++			*end = '\0';
++		}
++
++		err = strict_strtoul(p, 10, &values[i]);
++		if (err)
++			return -EINVAL;
++
++		p = end + 1;
++	}
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = &uksm_scan_ladder[i];
++
++		rung->cover_msecs = values[i];
++	}
++
++	return count;
++}
++UKSM_ATTR(eval_intervals);
++
++static ssize_t ema_per_page_time_show(struct kobject *kobj,
++				 struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", uksm_ema_page_time);
++}
++UKSM_ATTR_RO(ema_per_page_time);
++
++static ssize_t pages_shared_show(struct kobject *kobj,
++				 struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", uksm_pages_shared);
++}
++UKSM_ATTR_RO(pages_shared);
++
++static ssize_t pages_sharing_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", uksm_pages_sharing);
++}
++UKSM_ATTR_RO(pages_sharing);
++
++static ssize_t pages_unshared_show(struct kobject *kobj,
++				   struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", uksm_pages_unshared);
++}
++UKSM_ATTR_RO(pages_unshared);
++
++static ssize_t full_scans_show(struct kobject *kobj,
++			       struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%llu\n", fully_scanned_round);
++}
++UKSM_ATTR_RO(full_scans);
++
++static ssize_t pages_scanned_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	unsigned long base = 0;
++	u64 delta, ret;
++
++	if (pages_scanned_stored) {
++		base = pages_scanned_base;
++		ret = pages_scanned_stored;
++		delta = uksm_pages_scanned >> base;
++		if (CAN_OVERFLOW_U64(ret, delta)) {
++			ret >>= 1;
++			delta >>= 1;
++			base++;
++			ret += delta;
++		}
++	} else {
++		ret = uksm_pages_scanned;
++	}
++
++	while (ret > ULONG_MAX) {
++		ret >>= 1;
++		base++;
++	}
++
++	if (base)
++		return sprintf(buf, "%lu * 2^%lu\n", (unsigned long)ret, base);
++	else
++		return sprintf(buf, "%lu\n", (unsigned long)ret);
++}
++UKSM_ATTR_RO(pages_scanned);
++
++static ssize_t hash_strength_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", hash_strength);
++}
++UKSM_ATTR_RO(hash_strength);
++
++static ssize_t sleep_times_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%llu\n", uksm_sleep_times);
++}
++UKSM_ATTR_RO(sleep_times);
++
++
++static struct attribute *uksm_attrs[] = {
++	&max_cpu_percentage_attr.attr,
++	&sleep_millisecs_attr.attr,
++	&cpu_governor_attr.attr,
++	&run_attr.attr,
++	&ema_per_page_time_attr.attr,
++	&pages_shared_attr.attr,
++	&pages_sharing_attr.attr,
++	&pages_unshared_attr.attr,
++	&full_scans_attr.attr,
++	&pages_scanned_attr.attr,
++	&hash_strength_attr.attr,
++	&sleep_times_attr.attr,
++	&thrash_threshold_attr.attr,
++	&abundant_threshold_attr.attr,
++	&cpu_ratios_attr.attr,
++	&eval_intervals_attr.attr,
++	NULL,
++};
++
++static struct attribute_group uksm_attr_group = {
++	.attrs = uksm_attrs,
++	.name = "uksm",
++};
++#endif /* CONFIG_SYSFS */
++
++static inline void init_scan_ladder(void)
++{
++	int i;
++	struct scan_rung *rung;
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = uksm_scan_ladder + i;
++		slot_tree_init_root(&rung->vma_root);
++	}
++
++	init_performance_values();
++	uksm_calc_scan_pages();
++}
++
++static inline int cal_positive_negative_costs(void)
++{
++	struct page *p1, *p2;
++	unsigned char *addr1, *addr2;
++	unsigned long i, time_start, hash_cost;
++	unsigned long loopnum = 0;
++
++	/*IMPORTANT: volatile is needed to prevent over-optimization by gcc. */
++	volatile u32 hash;
++	volatile int ret;
++
++	p1 = alloc_page(GFP_KERNEL);
++	if (!p1)
++		return -ENOMEM;
++
++	p2 = alloc_page(GFP_KERNEL);
++	if (!p2)
++		return -ENOMEM;
++
++	addr1 = kmap_atomic(p1, KM_USER0);
++	addr2 = kmap_atomic(p2, KM_USER1);
++	memset(addr1, random32(), PAGE_SIZE);
++	memcpy(addr2, addr1, PAGE_SIZE);
++
++	/* make sure that the two pages differ in last byte */
++	addr2[PAGE_SIZE-1] = ~addr2[PAGE_SIZE-1];
++	kunmap_atomic(addr2, KM_USER1);
++	kunmap_atomic(addr1, KM_USER0);
++
++	time_start = jiffies;
++	while (jiffies - time_start < 100) {
++		for (i = 0; i < 100; i++)
++			hash = page_hash(p1, HASH_STRENGTH_FULL, 0);
++		loopnum += 100;
++	}
++	hash_cost = (jiffies - time_start);
++
++	time_start = jiffies;
++	for (i = 0; i < loopnum; i++)
++		ret = pages_identical(p1, p2);
++	memcmp_cost = HASH_STRENGTH_FULL * (jiffies - time_start);
++	memcmp_cost /= hash_cost;
++	printk(KERN_INFO "UKSM: relative memcmp_cost = %lu "
++			 "hash=%u cmp_ret=%d.\n",
++	       memcmp_cost, hash, ret);
++
++	__free_page(p1);
++	__free_page(p2);
++	return 0;
++}
++
++static int init_zeropage_hash_table(void)
++{
++	struct page *page;
++	char *addr;
++	int i;
++
++	page = alloc_page(GFP_KERNEL);
++	if (!page)
++		return -ENOMEM;
++
++	addr = kmap_atomic(page, KM_USER0);
++	memset(addr, 0, PAGE_SIZE);
++	kunmap_atomic(addr, KM_USER0);
++
++	zero_hash_table = kmalloc(HASH_STRENGTH_MAX * sizeof(u32),
++		GFP_KERNEL);
++	if (!zero_hash_table)
++		return -ENOMEM;
++
++	for (i = 0; i < HASH_STRENGTH_MAX; i++)
++		zero_hash_table[i] = page_hash(page, i, 0);
++
++	__free_page(page);
++
++	return 0;
++}
++
++static inline int init_random_sampling(void)
++{
++	unsigned long i;
++	random_nums = kmalloc(PAGE_SIZE, GFP_KERNEL);
++	if (!random_nums)
++		return -ENOMEM;
++
++	for (i = 0; i < HASH_STRENGTH_FULL; i++)
++		random_nums[i] = i;
++
++	for (i = 0; i < HASH_STRENGTH_FULL; i++) {
++		unsigned long rand_range, swap_index, tmp;
++
++		rand_range = HASH_STRENGTH_FULL - i;
++		swap_index = i + random32() % rand_range;
++		tmp = random_nums[i];
++		random_nums[i] =  random_nums[swap_index];
++		random_nums[swap_index] = tmp;
++	}
++
++	rshash_state.state = RSHASH_NEW;
++	rshash_state.below_count = 0;
++	rshash_state.lookup_window_index = 0;
++
++	return cal_positive_negative_costs();
++}
++
++static int __init uksm_slab_init(void)
++{
++	rmap_item_cache = UKSM_KMEM_CACHE(rmap_item, 0);
++	if (!rmap_item_cache)
++		goto out;
++
++	stable_node_cache = UKSM_KMEM_CACHE(stable_node, 0);
++	if (!stable_node_cache)
++		goto out_free1;
++
++	node_vma_cache = UKSM_KMEM_CACHE(node_vma, 0);
++	if (!node_vma_cache)
++		goto out_free2;
++
++	vma_slot_cache = UKSM_KMEM_CACHE(vma_slot, 0);
++	if (!vma_slot_cache)
++		goto out_free3;
++
++	tree_node_cache = UKSM_KMEM_CACHE(tree_node, 0);
++	if (!tree_node_cache)
++		goto out_free4;
++
++	return 0;
++
++out_free4:
++	kmem_cache_destroy(vma_slot_cache);
++out_free3:
++	kmem_cache_destroy(node_vma_cache);
++out_free2:
++	kmem_cache_destroy(stable_node_cache);
++out_free1:
++	kmem_cache_destroy(rmap_item_cache);
++out:
++	return -ENOMEM;
++}
++
++static void __init uksm_slab_free(void)
++{
++	kmem_cache_destroy(stable_node_cache);
++	kmem_cache_destroy(rmap_item_cache);
++	kmem_cache_destroy(node_vma_cache);
++	kmem_cache_destroy(vma_slot_cache);
++	kmem_cache_destroy(tree_node_cache);
++}
++
++/* Common interface to ksm, different to it. */
++int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
++		unsigned long end, int advice, unsigned long *vm_flags)
++{
++	int err;
++
++	switch (advice) {
++	case MADV_MERGEABLE:
++		return 0;		/* just ignore the advice */
++
++	case MADV_UNMERGEABLE:
++		if (!(*vm_flags & VM_MERGEABLE))
++			return 0;		/* just ignore the advice */
++
++		if (vma->anon_vma) {
++			err = unmerge_uksm_pages(vma, start, end);
++			if (err)
++				return err;
++		}
++
++		uksm_remove_vma(vma);
++		*vm_flags &= ~VM_MERGEABLE;
++		break;
++	}
++
++	return 0;
++}
++
++/* Common interface to ksm, actually the same. */
++struct page *ksm_does_need_to_copy(struct page *page,
++			struct vm_area_struct *vma, unsigned long address)
++{
++	struct page *new_page;
++
++	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
++	if (new_page) {
++		copy_user_highpage(new_page, page, address, vma);
++
++		SetPageDirty(new_page);
++		__SetPageUptodate(new_page);
++		SetPageSwapBacked(new_page);
++		__set_page_locked(new_page);
++
++		if (page_evictable(new_page, vma))
++			lru_cache_add_lru(new_page, LRU_ACTIVE_ANON);
++		else
++			add_page_to_unevictable_list(new_page);
++	}
++
++	return new_page;
++}
++
++static int __init uksm_init(void)
++{
++	struct task_struct *uksm_thread;
++	int err;
++
++	uksm_sleep_jiffies = msecs_to_jiffies(100);
++	uksm_sleep_saved = uksm_sleep_jiffies;
++
++	slot_tree_init();
++	init_scan_ladder();
++
++
++	err = init_random_sampling();
++	if (err)
++		goto out_free2;
++
++	err = uksm_slab_init();
++	if (err)
++		goto out_free1;
++
++	err = init_zeropage_hash_table();
++	if (err)
++		goto out_free0;
++
++	uksm_thread = kthread_run(uksm_scan_thread, NULL, "uksmd");
++	if (IS_ERR(uksm_thread)) {
++		printk(KERN_ERR "uksm: creating kthread failed\n");
++		err = PTR_ERR(uksm_thread);
++		goto out_free;
++	}
++
++#ifdef CONFIG_SYSFS
++	err = sysfs_create_group(mm_kobj, &uksm_attr_group);
++	if (err) {
++		printk(KERN_ERR "uksm: register sysfs failed\n");
++		kthread_stop(uksm_thread);
++		goto out_free;
++	}
++#else
++	uksm_run = UKSM_RUN_MERGE;	/* no way for user to start it */
++
++#endif /* CONFIG_SYSFS */
++
++#ifdef CONFIG_MEMORY_HOTREMOVE
++	/*
++	 * Choose a high priority since the callback takes uksm_thread_mutex:
++	 * later callbacks could only be taking locks which nest within that.
++	 */
++	hotplug_memory_notifier(uksm_memory_callback, 100);
++#endif
++	return 0;
++
++out_free:
++	kfree(zero_hash_table);
++out_free0:
++	uksm_slab_free();
++out_free1:
++	kfree(random_nums);
++out_free2:
++	kfree(uksm_scan_ladder);
++	return err;
++}
++
++#ifdef MODULE
++module_init(uksm_init)
++#else
++late_initcall(uksm_init);
++#endif
++
+diff --git a/mm/vmstat.c b/mm/vmstat.c
+index 8fd603b..63d43d9 100644
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -719,6 +719,9 @@ const char * const vmstat_text[] = {
+ 	"numa_other",
+ #endif
+ 	"nr_anon_transparent_hugepages",
++#ifdef CONFIG_UKSM
++	"nr_uksm_zero_pages",
++#endif
+ 	"nr_dirty_threshold",
+ 	"nr_dirty_background_threshold",
+ 
diff --git a/3.2.34/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-VL.patch b/3.2.34/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-VL.patch
new file mode 100644
index 0000000..a26d2b1
--- /dev/null
+++ b/3.2.34/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-VL.patch
@@ -0,0 +1,381 @@
+diff -uNr linux-3.2.33-go.orig/arch/arm/mach-kirkwood/Kconfig linux-3.2.33-go/arch/arm/mach-kirkwood/Kconfig
+--- linux-3.2.33-go.orig/arch/arm/mach-kirkwood/Kconfig	2012-11-14 21:20:22.326388580 +0100
++++ linux-3.2.33-go/arch/arm/mach-kirkwood/Kconfig	2012-11-14 21:21:02.353908681 +0100
+@@ -136,6 +136,12 @@
+ 	  Say 'Y' here if you want your kernel to support the
+ 	  Buffalo LS-XHL Series.
+ 
++config MACH_LSVL
++	bool "Buffalo LS-VL Series"
++	help
++	 Say 'Y' here if you want your kernel to support the
++	 Buffalo LS-VL Series.
++
+ endmenu
+ 
+ endif
+diff -uNr linux-3.2.33-go.orig/arch/arm/mach-kirkwood/lsvl-setup.c linux-3.2.33-go/arch/arm/mach-kirkwood/lsvl-setup.c
+--- linux-3.2.33-go.orig/arch/arm/mach-kirkwood/lsvl-setup.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/arch/arm/mach-kirkwood/lsvl-setup.c	2012-11-14 21:22:54.158568343 +0100
+@@ -0,0 +1,340 @@
++/*
++ * arch/arm/mach-kirkwood/lsvl-setup.c
++ *
++ * Buffalo LS-VL Series Setup
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2. This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/platform_device.h>
++#include <linux/mtd/physmap.h>
++#include <linux/ata_platform.h>
++#include <linux/mtd/partitions.h>
++#include <linux/spi/flash.h>
++#include <linux/spi/spi.h>
++#include <linux/mv643xx_eth.h>
++#include <linux/gpio.h>
++#include <linux/gpio_keys.h>
++#include <linux/gpio-fan.h>
++#include <linux/input.h>
++#include <linux/leds.h>
++#include <asm/mach-types.h>
++#include <asm/mach/arch.h>
++#include <mach/kirkwood.h>
++#include <plat/mvsdio.h>
++#include "common.h"
++#include "mpp.h"
++
++/*****************************************************************************
++ * 512KB SPI Flash on BOOT Device
++ ****************************************************************************/
++static struct mtd_partition lsvl_partitions[] = {
++	{
++	 .name	= "u-boot",
++	 .size	= 0x80000,
++	 .offset	= 0x00000,
++	 .mask_flags = MTD_WRITEABLE, /* force read-only */
++	}
++};
++
++static struct flash_platform_data lsvl_spi_slave_data = {
++	.type	 = "m25p40-nonjedec",
++	.parts	 = lsvl_partitions,
++	.nr_parts	= ARRAY_SIZE(lsvl_partitions),
++};
++
++static struct spi_board_info __initdata lsvl_spi_slave_info[] = {
++	{
++	 .modalias	= "m25p80",
++	 .platform_data	= &lsvl_spi_slave_data,
++	 .irq	 = -1,
++	 .max_speed_hz	= 20000000,
++	 .bus_num	= 0,
++	 .chip_select	= 0,
++	}
++};
++
++/*****************************************************************************
++ * Ethernet
++ ****************************************************************************/
++static struct mv643xx_eth_platform_data lsvl_ge00_data = {
++	.phy_addr	= MV643XX_ETH_PHY_ADDR(0),
++};
++
++/*****************************************************************************
++ * SATA
++ ****************************************************************************/
++static struct mv_sata_platform_data lsvl_sata_data = {
++	.n_ports	= 1,
++};
++
++/*****************************************************************************
++ * LEDs attached to GPIO
++ ****************************************************************************/
++#define LSVL_GPIO_LED_ALARM	 36
++#define LSVL_GPIO_LED_FUNC_RED 37
++#define LSVL_GPIO_LED_INFO	 38
++#define LSVL_GPIO_LED_FUNC_BLUE 39
++#define LSVL_GPIO_LED_PWR	 40
++
++static struct gpio_led lsvl_led_pins[] = {
++	{
++	 .name	 = "alarm:red",
++	 .gpio	 = LSVL_GPIO_LED_ALARM,
++	},
++ {
++ .name = "func:red:bottom",
++ .gpio = LSVL_GPIO_LED_FUNC_RED,
++ },
++	{
++	 .name	 = "info:amber",
++	 .gpio	 = LSVL_GPIO_LED_INFO,
++	},
++ {
++ .name = "func:blue:bottom",
++ .gpio = LSVL_GPIO_LED_FUNC_BLUE,
++ },
++
++	{
++	 .name	 = "power:blue",
++	 .default_trigger	= "default-on",
++	 .gpio	 = LSVL_GPIO_LED_PWR,
++	 .active_low	 = 1,
++	},
++};
++
++static struct gpio_led_platform_data lsvl_led_data = {
++	.leds	 = lsvl_led_pins,
++	.num_leds	= ARRAY_SIZE(lsvl_led_pins),
++};
++
++static struct platform_device lsvl_leds = {
++	.name	= "leds-gpio",
++	.id	= -1,
++	.dev	= {
++	 .platform_data	= &lsvl_led_data,
++	}
++};
++
++/*****************************************************************************
++ * General Setup
++ ****************************************************************************/
++#define LSVL_GPIO_HDD_POWER 8
++#define LSVL_GPIO_USB_POWER 12
++
++/*****************************************************************************
++ * GPIO Attached Keys
++ ****************************************************************************/
++/*#define LSVL_GPIO_KEY_FUNC 45
++#define LSVL_GPIO_KEY_POWER	46
++#define LSVL_GPIO_KEY_AUTOPOWER 47
++#define LSVL_SW_POWER	 0x00
++#define LSVL_SW_AUTOPOWER	0x01
++#define LSVL_SW_FUNC	 0x02
++
++static struct gpio_keys_button lsvl_buttons[] = {
++	{
++	 .type = EV_SW,
++	 .code = LSVL_SW_POWER,
++	 .gpio = LSVL_GPIO_KEY_POWER,
++	 .desc = "Power-on Switch",
++	 .active_low = 1,
++	}, {
++	 .type = EV_SW,
++	 .code = LSVL_SW_AUTOPOWER,
++	 .gpio = LSVL_GPIO_KEY_AUTOPOWER,
++	 .desc = "Power-auto Switch",
++	 .active_low = 1,
++	}, {
++	 .type = EV_SW,
++	 .code = LSVL_SW_FUNC,
++	 .gpio = LSVL_GPIO_KEY_FUNC,
++	 .desc = "Function Button",
++	 .active_low = 1,
++	},
++};
++
++static struct gpio_keys_platform_data lsvl_button_data = {
++	.buttons = lsvl_buttons,
++	.nbuttons = ARRAY_SIZE(lsvl_buttons),
++};
++
++static struct platform_device lsvl_button_device = {
++	.name = "gpio-keys",
++	.id = -1,
++	.num_resources = 0,
++	.dev = {
++	 .platform_data = &lsvl_button_data,
++	},
++};
++*/
++
++/*****************************************************************************
++ * GPIO Fan
++ ****************************************************************************/
++#define LSVL_GPIO_FAN_HIGH	16
++#define LSVL_GPIO_FAN_LOW	17
++#define LSVL_GPIO_FAN_LOCK	43
++
++static struct gpio_fan_alarm lsvl_alarm = {
++	.gpio = LSVL_GPIO_FAN_LOCK,
++};
++
++static struct gpio_fan_speed lsvl_speeds[] = {
++	{
++	 .rpm = 0,
++	 .ctrl_val = 3,
++	}, {
++	 .rpm = 1500,
++	 .ctrl_val = 1,
++	}, {
++	 .rpm = 3250,
++	 .ctrl_val = 2,
++	}, {
++	 .rpm = 5000,
++	 .ctrl_val = 0,
++	}
++};
++
++static int lsvl_gpio_list[] = {
++	LSVL_GPIO_FAN_HIGH, LSVL_GPIO_FAN_LOW,
++};
++
++static struct gpio_fan_platform_data lsvl_fan_data = {
++	.num_ctrl = ARRAY_SIZE(lsvl_gpio_list),
++	.ctrl = lsvl_gpio_list,
++	.alarm = &lsvl_alarm,
++	.num_speed = ARRAY_SIZE(lsvl_speeds),
++	.speed = lsvl_speeds,
++};
++
++static struct platform_device lsvl_fan_device = {
++	.name = "gpio-fan",
++	.id = -1,
++	.num_resources = 0,
++	.dev = {
++	 .platform_data = &lsvl_fan_data,
++	},
++};
++
++/*****************************************************************************
++ * GPIO Data
++ ****************************************************************************/
++
++static unsigned int lsvl_mpp_config[] __initdata = {
++	MPP0_NF_IO2,
++ MPP1_NF_IO3,
++ MPP2_NF_IO4,
++ MPP3_NF_IO5,
++ MPP4_NF_IO6,
++ MPP5_NF_IO7,
++ MPP6_SYSRST_OUTn,
++ MPP7_SPI_SCn,
++ MPP8_GPIO, /* HDD Power */
++ MPP9_GPIO,
++ MPP10_UART0_TXD,
++ MPP11_UART0_RXD,
++ MPP12_GPO, /* USB VBUS EN */
++ MPP13_GPIO,
++ MPP14_GPIO,
++ MPP15_GPIO,
++ MPP16_GPIO, /* FAN HIGH: on:0, off:1 */
++ MPP17_GPIO, /* FAN LOW: on:0, off:1 */
++ MPP18_NF_IO0,
++ MPP19_NF_IO1,
++ MPP20_GPIO,
++ MPP21_GPIO,
++ MPP22_GPIO,
++ MPP23_GPIO,
++ MPP24_GPIO,
++ MPP25_GPIO,
++ MPP26_GPIO,
++ MPP27_GPIO,
++ MPP28_GPIO,
++ MPP29_GPIO,
++ MPP30_GPIO,
++ MPP31_GPIO,
++ MPP32_GPIO,
++ MPP33_GPO,
++ MPP34_GPIO,
++ MPP35_GPIO,
++ MPP36_GPIO, /* ALARM LED */ 
++ MPP37_GPIO, /* FUNC RED LED */
++ MPP38_GPIO, /* INFO LED */
++ MPP39_GPIO, /* FUNC LED */
++ MPP40_GPIO, /* POWER LED */
++ MPP41_GPIO,
++ MPP42_GPIO,
++ MPP43_GPIO, /* FAN LOCK */
++ MPP44_GPIO,
++ MPP45_GPIO, /* FUNC SW */
++ MPP46_GPIO, /* POWER SW */
++ MPP47_GPIO, /* POWER AUTO SW */
++ MPP48_GPIO, /* UART EN */
++ MPP49_GPIO,
++	0
++};
++
++/*****************************************************************************
++ * LS-VL specific power off method: reboot
++ ****************************************************************************/
++/*
++ * On the LS-VL, the shutdown process is following:
++ * - Userland monitors key events until the power switch goes to off position
++ * - The board reboots
++ * - U-boot starts and goes into an idle mode waiting for the user
++ * to move the switch to ON position
++ *
++ */
++
++static void lsvl_power_off(void)
++{
++	arm_machine_restart('h', NULL);
++}
++
++static void __init lsvl_init(void)
++{
++	/*
++	 * Basic setup. Needs to be called early.
++	 */
++	kirkwood_init();
++	kirkwood_mpp_conf(lsvl_mpp_config);
++
++	/*
++	 * Configure peripherals.
++	 */
++	kirkwood_uart0_init();
++	kirkwood_ehci_init();
++	kirkwood_ge00_init(&lsvl_ge00_data);
++	kirkwood_sata_init(&lsvl_sata_data);
++	kirkwood_spi_init();
++
++	platform_device_register(&lsvl_leds);
++//	platform_device_register(&lsvl_button_device);
++	platform_device_register(&lsvl_fan_device);
++
++	spi_register_board_info(lsvl_spi_slave_info,
++	 ARRAY_SIZE(lsvl_spi_slave_info));
++
++	/* usb power on */
++	gpio_set_value(LSVL_GPIO_USB_POWER, 1);
++
++	/* register power-off method */
++	pm_power_off = lsvl_power_off;
++
++	pr_info("%s: finished\n", __func__);
++}
++
++MACHINE_START(LSVL, "Buffalo LS-VL Series")
++	.atag_offset	= 0x100,
++	.init_machine	= lsvl_init,
++	.map_io	 = kirkwood_map_io,
++	.init_early	= kirkwood_init_early,
++	.init_irq	= kirkwood_init_irq,
++	.timer	 = &kirkwood_timer,
++MACHINE_END
++
+diff -uNr linux-3.2.33-go.orig/arch/arm/mach-kirkwood/Makefile linux-3.2.33-go/arch/arm/mach-kirkwood/Makefile
+--- linux-3.2.33-go.orig/arch/arm/mach-kirkwood/Makefile	2012-11-14 21:20:22.326388580 +0100
++++ linux-3.2.33-go/arch/arm/mach-kirkwood/Makefile	2012-11-14 21:22:20.882968794 +0100
+@@ -19,5 +19,6 @@
+ obj-$(CONFIG_MACH_NET5BIG_V2)		+= netxbig_v2-setup.o lacie_v2-common.o
+ obj-$(CONFIG_MACH_T5325)		+= t5325-setup.o
+ obj-$(CONFIG_MACH_LSXHL)		+= lsxhl-setup.o
++obj-$(CONFIG_MACH_LSVL)			+= lsvl-setup.o
+ 
+ obj-$(CONFIG_CPU_IDLE)			+= cpuidle.o
+diff -uNr linux-3.2.33-go.orig/arch/arm/tools/mach-types linux-3.2.33-go/arch/arm/tools/mach-types
+--- linux-3.2.33-go.orig/arch/arm/tools/mach-types	2012-11-14 21:20:22.348388327 +0100
++++ linux-3.2.33-go/arch/arm/tools/mach-types	2012-11-14 21:21:02.356908648 +0100
+@@ -118,6 +118,7 @@
+ omap_osk		MACH_OMAP_OSK		OMAP_OSK		515
+ tosa			MACH_TOSA		TOSA			520
+ avila			MACH_AVILA		AVILA			526
++lsvl			MACH_LSVL		LSVL			5277
+ edb9302			MACH_EDB9302		EDB9302			538
+ husky			MACH_HUSKY		HUSKY			543
+ shepherd		MACH_SHEPHERD		SHEPHERD		545
diff --git a/3.2.34/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-WVL.patch b/3.2.34/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-WVL.patch
new file mode 100644
index 0000000..7d272d7
--- /dev/null
+++ b/3.2.34/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-WVL.patch
@@ -0,0 +1,538 @@
+diff -uNr linux-3.2.34-go.orig/arch/arm/mach-kirkwood/Kconfig linux-3.2.34-go/arch/arm/mach-kirkwood/Kconfig
+--- linux-3.2.34-go.orig/arch/arm/mach-kirkwood/Kconfig	2012-11-19 21:03:42.654743005 +0100
++++ linux-3.2.34-go/arch/arm/mach-kirkwood/Kconfig	2012-11-19 21:04:02.744505974 +0100
+@@ -148,6 +148,12 @@
+ 	  Say 'Y' here if you want your kernel to support the
+ 	  Buffalo LS-CHLv2 Series.
+ 
++config MACH_LSWVL
++   bool "Buffalo LS-WVL Series"
++   help
++     Say 'Y' here if you want your kernel to support the
++     Buffalo LS-WVL/E-AP NAS
++
+ endmenu
+ 
+ endif
+diff -uNr linux-3.2.34-go.orig/arch/arm/mach-kirkwood/lswvl-setup.c linux-3.2.34-go/arch/arm/mach-kirkwood/lswvl-setup.c
+--- linux-3.2.34-go.orig/arch/arm/mach-kirkwood/lswvl-setup.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-go/arch/arm/mach-kirkwood/lswvl-setup.c	2012-11-19 21:04:02.745505962 +0100
+@@ -0,0 +1,366 @@
++/*
++ * arch/arm/mach-kirkwood/lswvl-setup.c
++ *
++ * Buffalo LS-WVL Series Setup
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2. This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/platform_device.h>
++#include <linux/mtd/physmap.h>
++#include <linux/ata_platform.h>
++#include <linux/mtd/partitions.h>
++#include <linux/spi/flash.h>
++#include <linux/spi/spi.h>
++#include <linux/mv643xx_eth.h>
++#include <linux/gpio.h>
++#include <linux/gpio_keys.h>
++#include <linux/gpio-fan.h>
++#include <linux/input.h>
++#include <linux/leds.h>
++#include <asm/mach-types.h>
++#include <asm/mach/arch.h>
++#include <mach/kirkwood.h>
++#include <plat/orion_nand.h>
++#include <plat/mvsdio.h>
++#include "common.h"
++#include "mpp.h"
++
++
++/*****************************************************************************
++ * 512MB NAND Flash on Device bus CS0
++ ****************************************************************************/
++static struct mtd_partition lswvl_nand_parts[] = {
++        {
++                .name   = "boot",
++                .offset = 0,
++                .size   = 16 * 1024 * 1024,
++        }, {
++                .name   = "rootfs",
++                .offset = MTDPART_OFS_NXTBLK,
++                .size   = 488 * 1024 * 1024,
++        }, {
++                .name   = "reserve",
++                .offset = MTDPART_OFS_NXTBLK,
++                .size   = MTDPART_SIZ_FULL,
++        },
++};
++
++/*****************************************************************************
++ * 512KB NOR Flash on BOOT Device
++ ****************************************************************************/
++static struct mtd_partition lswvl_partitions[] = {
++   {
++      .name = "u-boot",
++      .size = 0x80000,
++      .offset = 0x00000,
++      .mask_flags = MTD_WRITEABLE, /* force read-only */
++   },
++};
++
++static struct flash_platform_data lswvl_spi_slave_data = {
++   .parts = lswvl_partitions,
++   .nr_parts = ARRAY_SIZE(lswvl_partitions),
++};
++
++static struct spi_board_info __initdata lswvl_spi_slave_info[] = {
++   {
++      .modalias = "m25p80",
++      .platform_data = &lswvl_spi_slave_data,
++      .irq = -1,
++      .max_speed_hz = 20000000,
++      .bus_num = 0,
++      .chip_select = 0,
++   },
++};
++
++/*****************************************************************************
++ * Ethernet
++ ****************************************************************************/
++static struct mv643xx_eth_platform_data lswvl_ge00_data = {
++   .phy_addr = MV643XX_ETH_PHY_ADDR(0),
++};
++
++/*****************************************************************************
++ * SATA
++ ****************************************************************************/
++static struct mv_sata_platform_data lswvl_sata_data = {
++   .n_ports = 2,
++};
++
++/*****************************************************************************
++ * LEDs attached to GPIO
++ ****************************************************************************/
++#define LSWVL_GPIO_LED_HDDERR0 34
++#define LSWVL_GPIO_LED_HDDERR1 35
++#define LSWVL_GPIO_LED_ALARM 36
++#define LSWVL_GPIO_LED_FUNC_RED 37
++#define LSWVL_GPIO_LED_INFO 38
++#define LSWVL_GPIO_LED_FUNC_BLUE 39
++#define LSWVL_GPIO_LED_PWR 40
++
++static struct gpio_led lswvl_led_pins[] = {
++   {
++      .name = "lswvl:hdderr:0",
++      .gpio = LSWVL_GPIO_LED_HDDERR0,
++   }, {
++      .name = "lswvl:hdderr:1",
++      .gpio = LSWVL_GPIO_LED_HDDERR1,
++   }, {
++      .name = "lswvl:alarm:red",
++      .gpio = LSWVL_GPIO_LED_ALARM,
++   }, {
++      .name = "lswvl:func:red",
++      .gpio = LSWVL_GPIO_LED_FUNC_RED,
++   }, {
++      .name = "lswvl:info:amber",
++      .gpio = LSWVL_GPIO_LED_INFO,
++   }, {
++      .name = "lswvl:func:blue",
++      .gpio = LSWVL_GPIO_LED_FUNC_BLUE,
++   }, {
++      .name = "lswvl:power:blue",
++      .default_trigger = "default-on",
++      .gpio = LSWVL_GPIO_LED_PWR,
++      .active_low = 1,
++   },
++};
++
++static struct gpio_led_platform_data lswvl_led_data = {
++   .leds = lswvl_led_pins,
++   .num_leds = ARRAY_SIZE(lswvl_led_pins),
++};
++
++static struct platform_device lswvl_leds = {
++   .name = "leds-gpio",
++   .id = -1,
++   .dev = {
++      .platform_data = &lswvl_led_data,
++   }
++};
++
++/*****************************************************************************
++ * General Setup
++ ****************************************************************************/
++#define LSWVL_GPIO_HDD0_POWER 8
++#define LSWVL_GPIO_HDD1_POWER 9
++#define LSWVL_GPIO_USB_POWER 12
++
++/*****************************************************************************
++ * GPIO Attached Keys
++ ****************************************************************************/
++#define LSWVL_GPIO_KEY_FUNC 45
++#define LSWVL_GPIO_KEY_POWER 46
++#define LSWVL_GPIO_KEY_AUTOPOWER 47
++#define LSWVL_SW_POWER 0x00
++#define LSWVL_SW_AUTOPOWER 0x01
++#define LSWVL_SW_FUNC 0x02
++
++static struct gpio_keys_button lswvl_buttons[] = {
++   {
++      .type = EV_KEY,
++      .code = BTN_1,
++      .gpio = LSWVL_GPIO_KEY_POWER,
++      .desc = "power-on",
++      .active_low = 1,
++   }, {
++      .type = EV_KEY,
++      .code = BTN_2,
++      .gpio = LSWVL_GPIO_KEY_AUTOPOWER,
++      .desc = "power-auto",
++      .active_low = 1,
++   }, {
++      .type = EV_KEY,
++      .code = BTN_0,
++      .gpio = LSWVL_GPIO_KEY_FUNC,
++      .desc = "function",
++      .active_low = 1,
++   },
++};
++
++static struct gpio_keys_platform_data lswvl_button_data = {
++   .buttons = lswvl_buttons,
++   .nbuttons = ARRAY_SIZE(lswvl_buttons),
++};
++
++static struct platform_device lswvl_button_device = {
++   .name = "gpio-keys",
++   .id = -1,
++   .num_resources = 0,
++   .dev = {
++      .platform_data = &lswvl_button_data,
++   },
++};
++
++/*****************************************************************************
++ * GPIO Fan
++ ****************************************************************************/
++#define LSWVL_GPIO_FAN_HIGH 16
++#define LSWVL_GPIO_FAN_LOW 17
++#define LSWVL_GPIO_FAN_LOCK 43
++
++static struct gpio_fan_alarm lswvl_alarm = {
++   .gpio = LSWVL_GPIO_FAN_LOCK,
++};
++
++static struct gpio_fan_speed lswvl_speeds[] = {
++   {
++      .rpm = 0,
++      .ctrl_val = 3,
++   }, {
++      .rpm = 1500,
++      .ctrl_val = 1,
++   }, {
++      .rpm = 3250,
++      .ctrl_val = 2,
++   }, {
++      .rpm = 5000,
++      .ctrl_val = 0,
++   }
++};
++
++static int lswvl_gpio_list[] = {
++   LSWVL_GPIO_FAN_HIGH, LSWVL_GPIO_FAN_LOW,
++};
++
++static struct gpio_fan_platform_data lswvl_fan_data = {
++   .num_ctrl = ARRAY_SIZE(lswvl_gpio_list),
++   .ctrl = lswvl_gpio_list,
++   .alarm = &lswvl_alarm,
++   .num_speed = ARRAY_SIZE(lswvl_speeds),
++   .speed = lswvl_speeds,
++};
++
++static struct platform_device lswvl_fan_device = {
++   .name = "gpio-fan",
++   .id = -1,
++   .num_resources = 0,
++   .dev = {
++      .platform_data = &lswvl_fan_data,
++   },
++};
++
++/*****************************************************************************
++ * GPIO Data
++ ****************************************************************************/
++
++static unsigned int lswvl_mpp_config[] __initdata = {
++   MPP0_NF_IO2,
++   MPP1_NF_IO3,
++   MPP2_NF_IO4,
++   MPP3_NF_IO5,
++   MPP4_NF_IO6,
++   MPP5_NF_IO7,
++   MPP6_SYSRST_OUTn,
++   MPP7_SPI_SCn,
++   MPP8_GPIO, /* HDD Power */
++   MPP9_GPIO, /* HDD Power */
++   MPP10_UART0_TXD,
++   MPP11_UART0_RXD,
++   MPP12_GPO, /* USB VBUS EN */
++   MPP13_GPIO,
++   MPP14_GPIO,
++   MPP15_GPIO,
++   MPP16_GPIO, /* FAN HIGH: on:0, off:1 */
++   MPP17_GPIO, /* FAN LOW: on:0, off:1 */
++   MPP18_NF_IO0,
++   MPP19_NF_IO1,
++   MPP20_GPIO,
++   MPP21_GPIO,
++   MPP22_GPIO,
++   MPP23_GPIO,
++   MPP24_GPIO,
++   MPP25_GPIO,
++   MPP26_GPIO,
++   MPP27_GPIO,
++   MPP28_GPIO,
++   MPP29_GPIO,
++   MPP30_GPIO,
++   MPP31_GPIO,
++   MPP32_GPIO,
++   MPP33_GPO,
++   MPP34_GPIO, /*HDD ERROR LED 0*/
++   MPP35_GPIO, /*HDD ERROR LED 1*/
++   MPP36_GPIO, /* ALARM LED */
++   MPP37_GPIO, /* FUNC RED LED */
++   MPP38_GPIO, /* INFO LED */
++   MPP39_GPIO, /* FUNC LED */
++   MPP40_GPIO, /* POWER LED */
++   MPP41_GPIO,
++   MPP42_GPIO,
++   MPP43_GPIO, /* FAN LOCK */
++   MPP44_GPIO,
++   MPP45_GPIO, /* FUNC SW */
++   MPP46_GPIO, /* POWER SW */
++   MPP47_GPIO, /* POWER AUTO SW */
++   MPP48_GPIO, /* UART EN */
++   MPP49_GPIO,
++   0
++};
++
++/*****************************************************************************
++ * LS-WVL specific power off method: reboot
++ ****************************************************************************/
++/*
++ * On the LS-WVL, the shutdown process is following:
++ * - Userland monitors key events until the power switch goes to off position
++ * - The board reboots
++ * - U-boot starts and goes into an idle mode waiting for the user
++ * to move the switch to ON position
++ *
++ */
++
++static void lswvl_power_off(void)
++{
++   arm_machine_restart('h', NULL); //kirkwood_restart('h', NULL);
++}
++
++static void __init lswvl_init(void)
++{
++   /*
++    * Basic setup. Needs to be called early.
++    */
++   kirkwood_init();
++   kirkwood_mpp_conf(lswvl_mpp_config);
++
++   /*
++    * Configure peripherals.
++    */
++   kirkwood_ge00_init(&lswvl_ge00_data);
++   kirkwood_uart0_init();
++   kirkwood_uart1_init();
++   kirkwood_ehci_init();
++   kirkwood_sata_init(&lswvl_sata_data);
++
++   spi_register_board_info(lswvl_spi_slave_info,
++            ARRAY_SIZE(lswvl_spi_slave_info));
++   kirkwood_spi_init();
++   kirkwood_nand_init(ARRAY_AND_SIZE(lswvl_nand_parts), 25);
++
++   platform_device_register(&lswvl_leds);
++   platform_device_register(&lswvl_button_device);
++   platform_device_register(&lswvl_fan_device);
++
++   /* usb power on */
++   gpio_set_value(LSWVL_GPIO_USB_POWER, 1);
++
++   /* register power-off method */
++   pm_power_off = lswvl_power_off;
++
++   pr_info("%s: finished\n", __func__);
++}
++
++MACHINE_START(LSWVL, "Buffalo LS-WVL Series")
++   .atag_offset = 0x100,
++   .map_io = kirkwood_map_io,
++   .init_early = kirkwood_init_early,
++   .init_irq = kirkwood_init_irq,
++   .timer = &kirkwood_timer,
++   .init_machine = lswvl_init,
++   // .restart = kirkwood_restart,
++MACHINE_END
++
+diff -uNr linux-3.2.34-go.orig/arch/arm/mach-kirkwood/Makefile linux-3.2.34-go/arch/arm/mach-kirkwood/Makefile
+--- linux-3.2.34-go.orig/arch/arm/mach-kirkwood/Makefile	2012-11-19 21:03:42.653743017 +0100
++++ linux-3.2.34-go/arch/arm/mach-kirkwood/Makefile	2012-11-19 21:04:42.686036907 +0100
+@@ -21,5 +21,6 @@
+ obj-$(CONFIG_MACH_LINKSTATION_CHLV2)		+= lschlv2-setup.o
+ obj-$(CONFIG_MACH_LSXHL)		+= lsxhl-setup.o
+ obj-$(CONFIG_MACH_LSVL)			+= lsvl-setup.o
++obj-$(CONFIG_MACH_LSWVL)		+= lswvl-setup.o
+ 
+ obj-$(CONFIG_CPU_IDLE)			+= cpuidle.o
+diff -uNr linux-3.2.34-go.orig/arch/arm/plat-orion/mpp.c linux-3.2.34-go/arch/arm/plat-orion/mpp.c
+--- linux-3.2.34-go.orig/arch/arm/plat-orion/mpp.c	2012-11-19 21:03:42.766741717 +0100
++++ linux-3.2.34-go/arch/arm/plat-orion/mpp.c	2012-11-19 21:04:02.747505938 +0100
+@@ -15,6 +15,7 @@
+ #include <linux/gpio.h>
+ #include <mach/hardware.h>
+ #include <plat/mpp.h>
++#include <asm/mach-types.h>
+ 
+ /* Address of the ith MPP control register */
+ static __init unsigned long mpp_ctrl_addr(unsigned int i,
+@@ -75,3 +76,37 @@
+ 	}
+ 	printk("\n");
+ }
++
++#ifdef CONFIG_MACH_LSWVL
++
++static u32 boot_mpp_value = 0x21111111;
++/*
++ * change MPP[3:1] to SPI mode
++ */
++void lswvl_setup_spi_mpp(void)
++{
++        u32 spival = 0;
++        u32 bootval = 0;
++
++        spival = 0x00002220;
++        boot_mpp_value = bootval = readl(mpp_ctrl_addr(0, DEV_BUS_VIRT_BASE));
++   bootval &= 0xffff000f;
++   writel(spival | bootval, mpp_ctrl_addr(0, DEV_BUS_VIRT_BASE));
++}
++
++/*
++ * change back MPP[3:1] to default configuration
++ */
++void lswvl_reset_mpp(void)
++{
++        u32 spival = 0;
++        u32 bootval = 0;
++
++        spival = readl(mpp_ctrl_addr(0, DEV_BUS_VIRT_BASE));
++        spival &= 0xffff000f;
++   bootval = boot_mpp_value & ~0xffff000f;
++        writel(spival | bootval, mpp_ctrl_addr(0, DEV_BUS_VIRT_BASE));
++}
++
++#endif
++
+diff -uNr linux-3.2.34-go.orig/arch/arm/tools/mach-types linux-3.2.34-go/arch/arm/tools/mach-types
+--- linux-3.2.34-go.orig/arch/arm/tools/mach-types	2012-11-19 21:03:42.675742765 +0100
++++ linux-3.2.34-go/arch/arm/tools/mach-types	2012-11-19 21:22:29.653445807 +0100
+@@ -119,6 +119,7 @@
+ tosa			MACH_TOSA		TOSA			520
+ avila			MACH_AVILA		AVILA			526
+ lsvl			MACH_LSVL		LSVL			5277
++lswvl			MACH_LSWVL		LSWVL			5278
+ edb9302			MACH_EDB9302		EDB9302			538
+ husky			MACH_HUSKY		HUSKY			543
+ shepherd		MACH_SHEPHERD		SHEPHERD		545
+diff -uNr linux-3.2.34-go.orig/drivers/spi/spi-orion.c linux-3.2.34-go/drivers/spi/spi-orion.c
+--- linux-3.2.34-go.orig/drivers/spi/spi-orion.c	2012-11-19 21:03:41.809752734 +0100
++++ linux-3.2.34-go/drivers/spi/spi-orion.c	2012-11-19 21:20:55.123558883 +0100
+@@ -19,6 +19,12 @@
+ #include <linux/spi/orion_spi.h>
+ #include <linux/module.h>
+ #include <asm/unaligned.h>
++#include <asm/mach-types.h>
++
++#ifdef CONFIG_MACH_LSWVL
++void lswvl_setup_spi_mpp(void);
++void lswvl_reset_mpp(void);
++#endif
+ 
+ #define DRIVER_NAME			"orion_spi"
+ 
+@@ -141,6 +147,9 @@
+ 	unsigned int bits_per_word = spi->bits_per_word;
+ 	int	rc;
+ 
++#ifdef CONFIG_MACH_LSWVL
++	lswvl_setup_spi_mpp();
++#endif
+ 	orion_spi = spi_master_get_devdata(spi->master);
+ 
+ 	if ((t != NULL) && t->speed_hz)
+@@ -153,15 +162,37 @@
+ 	if (rc)
+ 		return rc;
+ 
++#ifdef CONFIG_MACH_LSWVL
++	rc = orion_spi_set_transfer_size(orion_spi, bits_per_word);
++	lswvl_reset_mpp();
++	return rc;
++#else
+ 	return orion_spi_set_transfer_size(orion_spi, bits_per_word);
++#endif
+ }
+ 
+ static void orion_spi_set_cs(struct orion_spi *orion_spi, int enable)
+ {
+ 	if (enable)
++#ifdef CONFIG_MACH_LSWVL
++	{
++		lswvl_setup_spi_mpp();
++		udelay(1);
++		orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
++	}
++#else
+ 		orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
++#endif
+ 	else
+ 		orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
++#ifdef CONFIG_MACH_LSWVL
++	{
++		orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
++		lswvl_reset_mpp();
++	}
++#else
++		orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
++#endif
+ }
+ 
+ static inline int orion_spi_wait_till_ready(struct orion_spi *orion_spi)
+@@ -361,8 +392,17 @@
+ 
+ 	/* Fix ac timing if required.   */
+ 	if (orion_spi->spi_info->enable_clock_fix)
++#ifdef CONFIG_MACH_LSWVL
++	{
++		lswvl_setup_spi_mpp();
++		orion_spi_setbits(orion_spi, ORION_SPI_IF_CONFIG_REG,
++				  (1 << 14));
++		lswvl_reset_mpp();
++	}
++#else
+ 		orion_spi_setbits(orion_spi, ORION_SPI_IF_CONFIG_REG,
+ 				  (1 << 14));
++#endif
+ 
+ 	if ((spi->max_speed_hz == 0)
+ 			|| (spi->max_speed_hz > orion_spi->max_speed))
diff --git a/3.2.34/vserver-3.2.34-vs2.3.2.15.patch b/3.2.34/vserver-3.2.34-vs2.3.2.15.patch
new file mode 100644
index 0000000..7fdd459
--- /dev/null
+++ b/3.2.34/vserver-3.2.34-vs2.3.2.15.patch
@@ -0,0 +1,26125 @@
+diff -NurpP --minimal linux-3.2.34/Documentation/vserver/debug.txt linux-3.2.34-vs2.3.2.15/Documentation/vserver/debug.txt
+--- linux-3.2.34/Documentation/vserver/debug.txt	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/Documentation/vserver/debug.txt	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,154 @@
++
++debug_cvirt:
++
++ 2   4	"vx_map_tgid: %p/%llx: %d -> %d"
++	"vx_rmap_tgid: %p/%llx: %d -> %d"
++
++debug_dlim:
++
++ 0   1	"ALLOC (%p,#%d)%c inode (%d)"
++	"FREE  (%p,#%d)%c inode"
++ 1   2	"ALLOC (%p,#%d)%c %lld bytes (%d)"
++	"FREE  (%p,#%d)%c %lld bytes"
++ 2   4	"ADJUST: %lld,%lld on %ld,%ld [mult=%d]"
++ 3   8	"ext3_has_free_blocks(%p): %lu<%lu+1, %c, %u!=%u r=%d"
++	"ext3_has_free_blocks(%p): free=%lu, root=%lu"
++	"rcu_free_dl_info(%p)"
++ 4  10	"alloc_dl_info(%p,%d) = %p"
++	"dealloc_dl_info(%p)"
++	"get_dl_info(%p[#%d.%d])"
++	"put_dl_info(%p[#%d.%d])"
++ 5  20	"alloc_dl_info(%p,%d)*"
++ 6  40	"__hash_dl_info: %p[#%d]"
++	"__unhash_dl_info: %p[#%d]"
++ 7  80	"locate_dl_info(%p,#%d) = %p"
++
++debug_misc:
++
++ 0   1	"destroy_dqhash: %p [#0x%08x] c=%d"
++	"new_dqhash: %p [#0x%08x]"
++	"vroot[%d]_clr_dev: dev=%p[%lu,%d:%d]"
++	"vroot[%d]_get_real_bdev: dev=%p[%lu,%d:%d]"
++	"vroot[%d]_set_dev: dev=%p[%lu,%d:%d]"
++	"vroot_get_real_bdev not set"
++ 1   2	"cow_break_link(�%s�)"
++	"temp copy �%s�"
++ 2   4	"dentry_open(new): %p"
++	"dentry_open(old): %p"
++	"lookup_create(new): %p"
++	"old path �%s�"
++	"path_lookup(old): %d"
++	"vfs_create(new): %d"
++	"vfs_rename: %d"
++	"vfs_sendfile: %d"
++ 3   8	"fput(new_file=%p[#%d])"
++	"fput(old_file=%p[#%d])"
++ 4  10	"vx_info_kill(%p[#%d],%d,%d) = %d"
++	"vx_info_kill(%p[#%d],%d,%d)*"
++ 5  20	"vs_reboot(%p[#%d],%d)"
++ 6  40	"dropping task %p[#%u,%u] for %p[#%u,%u]"
++
++debug_net:
++
++ 2   4	"nx_addr_conflict(%p,%p) %d.%d,%d.%d"
++ 3   8	"inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d"
++	"inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d"
++ 4  10	"ip_route_connect(%p) %p,%p;%lx"
++ 5  20	"__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx"
++ 6  40	"sk,egf: %p [#%d] (from %d)"
++	"sk,egn: %p [#%d] (from %d)"
++	"sk,req: %p [#%d] (from %d)"
++	"sk: %p [#%d] (from %d)"
++	"tw: %p [#%d] (from %d)"
++ 7  80	"__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d"
++	"__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d"
++
++debug_nid:
++
++ 0   1	"__lookup_nx_info(#%u): %p[#%u]"
++	"alloc_nx_info(%d) = %p"
++	"create_nx_info(%d) (dynamic rejected)"
++	"create_nx_info(%d) = %p (already there)"
++	"create_nx_info(%d) = %p (new)"
++	"dealloc_nx_info(%p)"
++ 1   2	"alloc_nx_info(%d)*"
++	"create_nx_info(%d)*"
++ 2   4	"get_nx_info(%p[#%d.%d])"
++	"put_nx_info(%p[#%d.%d])"
++ 3   8	"claim_nx_info(%p[#%d.%d.%d]) %p"
++	"clr_nx_info(%p[#%d.%d])"
++	"init_nx_info(%p[#%d.%d])"
++	"release_nx_info(%p[#%d.%d.%d]) %p"
++	"set_nx_info(%p[#%d.%d])"
++ 4  10	"__hash_nx_info: %p[#%d]"
++	"__nx_dynamic_id: [#%d]"
++	"__unhash_nx_info: %p[#%d.%d.%d]"
++ 5  20	"moved task %p into nxi:%p[#%d]"
++	"nx_migrate_task(%p,%p[#%d.%d.%d])"
++	"task_get_nx_info(%p)"
++ 6  40	"nx_clear_persistent(%p[#%d])"
++
++debug_quota:
++
++ 0   1	"quota_sync_dqh(%p,%d) discard inode %p"
++ 1   2	"quota_sync_dqh(%p,%d)"
++	"sync_dquots(%p,%d)"
++	"sync_dquots_dqh(%p,%d)"
++ 3   8	"do_quotactl(%p,%d,cmd=%d,id=%d,%p)"
++
++debug_switch:
++
++ 0   1	"vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]"
++ 1   2	"vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]"
++ 4  10	"%s: (%s %s) returned %s with %d"
++
++debug_tag:
++
++ 7  80	"dx_parse_tag(�%s�): %d:#%d"
++	"dx_propagate_tag(%p[#%lu.%d]): %d,%d"
++
++debug_xid:
++
++ 0   1	"__lookup_vx_info(#%u): %p[#%u]"
++	"alloc_vx_info(%d) = %p"
++	"alloc_vx_info(%d)*"
++	"create_vx_info(%d) (dynamic rejected)"
++	"create_vx_info(%d) = %p (already there)"
++	"create_vx_info(%d) = %p (new)"
++	"dealloc_vx_info(%p)"
++	"loc_vx_info(%d) = %p (found)"
++	"loc_vx_info(%d) = %p (new)"
++	"loc_vx_info(%d) = %p (not available)"
++ 1   2	"create_vx_info(%d)*"
++	"loc_vx_info(%d)*"
++ 2   4	"get_vx_info(%p[#%d.%d])"
++	"put_vx_info(%p[#%d.%d])"
++ 3   8	"claim_vx_info(%p[#%d.%d.%d]) %p"
++	"clr_vx_info(%p[#%d.%d])"
++	"init_vx_info(%p[#%d.%d])"
++	"release_vx_info(%p[#%d.%d.%d]) %p"
++	"set_vx_info(%p[#%d.%d])"
++ 4  10	"__hash_vx_info: %p[#%d]"
++	"__unhash_vx_info: %p[#%d.%d.%d]"
++	"__vx_dynamic_id: [#%d]"
++ 5  20	"enter_vx_info(%p[#%d],%p) %p[#%d,%p]"
++	"leave_vx_info(%p[#%d,%p]) %p[#%d,%p]"
++	"moved task %p into vxi:%p[#%d]"
++	"task_get_vx_info(%p)"
++	"vx_migrate_task(%p,%p[#%d.%d])"
++ 6  40	"vx_clear_persistent(%p[#%d])"
++	"vx_exit_init(%p[#%d],%p[#%d,%d,%d])"
++	"vx_set_init(%p[#%d],%p[#%d,%d,%d])"
++	"vx_set_persistent(%p[#%d])"
++	"vx_set_reaper(%p[#%d],%p[#%d,%d])"
++ 7  80	"vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]"
++
++
++debug_limit:
++
++ n 2^n	"vx_acc_cres[%5d,%s,%2d]: %5d%s"
++	"vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
++
++ m 2^m	"vx_acc_page[%5d,%s,%2d]: %5d%s"
++	"vx_acc_pages[%5d,%s,%2d]: %5d += %5d"
++	"vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
+diff -NurpP --minimal linux-3.2.34/arch/alpha/Kconfig linux-3.2.34-vs2.3.2.15/arch/alpha/Kconfig
+--- linux-3.2.34/arch/alpha/Kconfig	2012-01-09 16:13:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/alpha/Kconfig	2011-12-05 19:33:02.000000000 +0100
+@@ -665,6 +665,8 @@ config DUMMY_CONSOLE
+ 	depends on VGA_HOSE
+ 	default y
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.2.34/arch/alpha/kernel/entry.S linux-3.2.34-vs2.3.2.15/arch/alpha/kernel/entry.S
+--- linux-3.2.34/arch/alpha/kernel/entry.S	2010-10-21 13:06:45.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/arch/alpha/kernel/entry.S	2011-12-05 19:33:02.000000000 +0100
+@@ -860,24 +860,15 @@ sys_getxgid:
+ 	.globl	sys_getxpid
+ 	.ent	sys_getxpid
+ sys_getxpid:
++	lda	$sp, -16($sp)
++	stq	$26, 0($sp)
+ 	.prologue 0
+-	ldq	$2, TI_TASK($8)
+ 
+-	/* See linux/kernel/timer.c sys_getppid for discussion
+-	   about this loop.  */
+-	ldq	$3, TASK_GROUP_LEADER($2)
+-	ldq	$4, TASK_REAL_PARENT($3)
+-	ldl	$0, TASK_TGID($2)
+-1:	ldl	$1, TASK_TGID($4)
+-#ifdef CONFIG_SMP
+-	mov	$4, $5
+-	mb
+-	ldq	$3, TASK_GROUP_LEADER($2)
+-	ldq	$4, TASK_REAL_PARENT($3)
+-	cmpeq	$4, $5, $5
+-	beq	$5, 1b
+-#endif
+-	stq	$1, 80($sp)
++	lda	$16, 96($sp)
++	jsr	$26, do_getxpid
++	ldq	$26, 0($sp)
++
++	lda	$sp, 16($sp)
+ 	ret
+ .end sys_getxpid
+ 
+diff -NurpP --minimal linux-3.2.34/arch/alpha/kernel/ptrace.c linux-3.2.34-vs2.3.2.15/arch/alpha/kernel/ptrace.c
+--- linux-3.2.34/arch/alpha/kernel/ptrace.c	2011-01-05 21:48:40.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/alpha/kernel/ptrace.c	2011-12-05 19:33:02.000000000 +0100
+@@ -13,6 +13,7 @@
+ #include <linux/user.h>
+ #include <linux/security.h>
+ #include <linux/signal.h>
++#include <linux/vs_base.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+diff -NurpP --minimal linux-3.2.34/arch/alpha/kernel/systbls.S linux-3.2.34-vs2.3.2.15/arch/alpha/kernel/systbls.S
+--- linux-3.2.34/arch/alpha/kernel/systbls.S	2012-01-09 16:13:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/alpha/kernel/systbls.S	2011-12-05 19:33:02.000000000 +0100
+@@ -446,7 +446,7 @@ sys_call_table:
+ 	.quad sys_stat64			/* 425 */
+ 	.quad sys_lstat64
+ 	.quad sys_fstat64
+-	.quad sys_ni_syscall			/* sys_vserver */
++	.quad sys_vserver			/* sys_vserver */
+ 	.quad sys_ni_syscall			/* sys_mbind */
+ 	.quad sys_ni_syscall			/* sys_get_mempolicy */
+ 	.quad sys_ni_syscall			/* sys_set_mempolicy */
+diff -NurpP --minimal linux-3.2.34/arch/alpha/kernel/traps.c linux-3.2.34-vs2.3.2.15/arch/alpha/kernel/traps.c
+--- linux-3.2.34/arch/alpha/kernel/traps.c	2010-10-21 13:06:46.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/arch/alpha/kernel/traps.c	2011-12-05 19:33:02.000000000 +0100
+@@ -183,7 +183,8 @@ die_if_kernel(char * str, struct pt_regs
+ #ifdef CONFIG_SMP
+ 	printk("CPU %d ", hard_smp_processor_id());
+ #endif
+-	printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
++	printk("%s(%d[#%u]): %s %ld\n", current->comm,
++		task_pid_nr(current), current->xid, str, err);
+ 	dik_show_regs(regs, r9_15);
+ 	add_taint(TAINT_DIE);
+ 	dik_show_trace((unsigned long *)(regs+1));
+diff -NurpP --minimal linux-3.2.34/arch/arm/Kconfig linux-3.2.34-vs2.3.2.15/arch/arm/Kconfig
+--- linux-3.2.34/arch/arm/Kconfig	2012-11-18 18:42:07.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/arm/Kconfig	2012-10-22 12:59:45.000000000 +0200
+@@ -2252,6 +2252,8 @@ source "fs/Kconfig"
+ 
+ source "arch/arm/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.2.34/arch/arm/kernel/calls.S linux-3.2.34-vs2.3.2.15/arch/arm/kernel/calls.S
+--- linux-3.2.34/arch/arm/kernel/calls.S	2012-01-09 16:13:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/arm/kernel/calls.S	2011-12-05 19:33:02.000000000 +0100
+@@ -322,7 +322,7 @@
+ /* 310 */	CALL(sys_request_key)
+ 		CALL(sys_keyctl)
+ 		CALL(ABI(sys_semtimedop, sys_oabi_semtimedop))
+-/* vserver */	CALL(sys_ni_syscall)
++		CALL(sys_vserver)
+ 		CALL(sys_ioprio_set)
+ /* 315 */	CALL(sys_ioprio_get)
+ 		CALL(sys_inotify_init)
+diff -NurpP --minimal linux-3.2.34/arch/arm/kernel/process.c linux-3.2.34-vs2.3.2.15/arch/arm/kernel/process.c
+--- linux-3.2.34/arch/arm/kernel/process.c	2012-11-18 18:42:07.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/arm/kernel/process.c	2012-08-13 12:40:51.000000000 +0200
+@@ -322,7 +322,8 @@ void __show_regs(struct pt_regs *regs)
+ void show_regs(struct pt_regs * regs)
+ {
+ 	printk("\n");
+-	printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
++	printk("Pid: %d[#%u], comm: %20s\n",
++		task_pid_nr(current), current->xid, current->comm);
+ 	__show_regs(regs);
+ 	dump_stack();
+ }
+diff -NurpP --minimal linux-3.2.34/arch/arm/kernel/traps.c linux-3.2.34-vs2.3.2.15/arch/arm/kernel/traps.c
+--- linux-3.2.34/arch/arm/kernel/traps.c	2012-11-18 18:42:07.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/arm/kernel/traps.c	2012-10-22 12:59:46.000000000 +0200
+@@ -244,8 +244,8 @@ static int __die(const char *str, int er
+ 
+ 	print_modules();
+ 	__show_regs(regs);
+-	printk(KERN_EMERG "Process %.*s (pid: %d, stack limit = 0x%p)\n",
+-		TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1);
++	printk(KERN_EMERG "Process %.*s (pid: %d:#%u, stack limit = 0x%p)\n",
++		TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), tsk->xid, thread + 1);
+ 
+ 	if (!user_mode(regs) || in_interrupt()) {
+ 		dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
+diff -NurpP --minimal linux-3.2.34/arch/cris/Kconfig linux-3.2.34-vs2.3.2.15/arch/cris/Kconfig
+--- linux-3.2.34/arch/cris/Kconfig	2012-01-09 16:14:01.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/cris/Kconfig	2011-12-05 19:33:02.000000000 +0100
+@@ -678,6 +678,8 @@ source "drivers/staging/Kconfig"
+ 
+ source "arch/cris/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.2.34/arch/frv/kernel/kernel_thread.S linux-3.2.34-vs2.3.2.15/arch/frv/kernel/kernel_thread.S
+--- linux-3.2.34/arch/frv/kernel/kernel_thread.S	2008-12-25 00:26:37.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/frv/kernel/kernel_thread.S	2011-12-05 19:33:02.000000000 +0100
+@@ -37,7 +37,7 @@ kernel_thread:
+ 
+ 	# start by forking the current process, but with shared VM
+ 	setlos.p	#__NR_clone,gr7		; syscall number
+-	ori		gr10,#CLONE_VM,gr8	; first syscall arg	[clone_flags]
++	ori		gr10,#CLONE_KT,gr8	; first syscall arg	[clone_flags]
+ 	sethi.p		#0xe4e4,gr9		; second syscall arg	[newsp]
+ 	setlo		#0xe4e4,gr9
+ 	setlos.p	#0,gr10			; third syscall arg	[parent_tidptr]
+diff -NurpP --minimal linux-3.2.34/arch/h8300/Kconfig linux-3.2.34-vs2.3.2.15/arch/h8300/Kconfig
+--- linux-3.2.34/arch/h8300/Kconfig	2012-01-09 16:14:01.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/h8300/Kconfig	2011-12-05 19:33:02.000000000 +0100
+@@ -213,6 +213,8 @@ source "fs/Kconfig"
+ 
+ source "arch/h8300/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.2.34/arch/ia64/Kconfig linux-3.2.34-vs2.3.2.15/arch/ia64/Kconfig
+--- linux-3.2.34/arch/ia64/Kconfig	2012-01-09 16:14:01.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/ia64/Kconfig	2011-12-05 19:33:02.000000000 +0100
+@@ -657,6 +657,8 @@ source "fs/Kconfig"
+ 
+ source "arch/ia64/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.2.34/arch/ia64/kernel/entry.S linux-3.2.34-vs2.3.2.15/arch/ia64/kernel/entry.S
+--- linux-3.2.34/arch/ia64/kernel/entry.S	2012-11-18 18:42:08.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/ia64/kernel/entry.S	2012-05-21 18:54:41.000000000 +0200
+@@ -1714,7 +1714,7 @@ sys_call_table:
+ 	data8 sys_mq_notify
+ 	data8 sys_mq_getsetattr
+ 	data8 sys_kexec_load
+-	data8 sys_ni_syscall			// reserved for vserver
++	data8 sys_vserver
+ 	data8 sys_waitid			// 1270
+ 	data8 sys_add_key
+ 	data8 sys_request_key
+diff -NurpP --minimal linux-3.2.34/arch/ia64/kernel/process.c linux-3.2.34-vs2.3.2.15/arch/ia64/kernel/process.c
+--- linux-3.2.34/arch/ia64/kernel/process.c	2011-03-15 18:06:39.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/ia64/kernel/process.c	2011-12-05 19:33:02.000000000 +0100
+@@ -109,8 +109,8 @@ show_regs (struct pt_regs *regs)
+ 	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+ 
+ 	print_modules();
+-	printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current),
+-			smp_processor_id(), current->comm);
++	printk("\nPid: %d[#%u], CPU %d, comm: %20s\n", task_pid_nr(current),
++			current->xid, smp_processor_id(), current->comm);
+ 	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s (%s)\n",
+ 	       regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(),
+ 	       init_utsname()->release);
+diff -NurpP --minimal linux-3.2.34/arch/ia64/kernel/ptrace.c linux-3.2.34-vs2.3.2.15/arch/ia64/kernel/ptrace.c
+--- linux-3.2.34/arch/ia64/kernel/ptrace.c	2011-01-05 21:48:59.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/ia64/kernel/ptrace.c	2011-12-05 19:33:02.000000000 +0100
+@@ -21,6 +21,7 @@
+ #include <linux/regset.h>
+ #include <linux/elf.h>
+ #include <linux/tracehook.h>
++#include <linux/vs_base.h>
+ 
+ #include <asm/pgtable.h>
+ #include <asm/processor.h>
+diff -NurpP --minimal linux-3.2.34/arch/ia64/kernel/traps.c linux-3.2.34-vs2.3.2.15/arch/ia64/kernel/traps.c
+--- linux-3.2.34/arch/ia64/kernel/traps.c	2010-07-07 18:31:01.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/arch/ia64/kernel/traps.c	2011-12-05 19:33:02.000000000 +0100
+@@ -59,8 +59,9 @@ die (const char *str, struct pt_regs *re
+ 	put_cpu();
+ 
+ 	if (++die.lock_owner_depth < 3) {
+-		printk("%s[%d]: %s %ld [%d]\n",
+-		current->comm, task_pid_nr(current), str, err, ++die_counter);
++		printk("%s[%d[#%u]]: %s %ld [%d]\n",
++			current->comm, task_pid_nr(current), current->xid,
++			str, err, ++die_counter);
+ 		if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
+ 	            != NOTIFY_STOP)
+ 			show_regs(regs);
+@@ -323,8 +324,9 @@ handle_fpu_swa (int fp_fault, struct pt_
+ 			if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
+ 				last.time = current_jiffies + 5 * HZ;
+ 				printk(KERN_WARNING
+-		       			"%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
+-		       			current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
++					"%s(%d[#%u]): floating-point assist fault at ip %016lx, isr %016lx\n",
++					current->comm, task_pid_nr(current), current->xid,
++					regs->cr_iip + ia64_psr(regs)->ri, isr);
+ 			}
+ 		}
+ 	}
+diff -NurpP --minimal linux-3.2.34/arch/m32r/kernel/traps.c linux-3.2.34-vs2.3.2.15/arch/m32r/kernel/traps.c
+--- linux-3.2.34/arch/m32r/kernel/traps.c	2011-10-24 18:44:58.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/arch/m32r/kernel/traps.c	2011-12-05 19:33:02.000000000 +0100
+@@ -196,8 +196,9 @@ static void show_registers(struct pt_reg
+ 	} else {
+ 		printk("SPI: %08lx\n", sp);
+ 	}
+-	printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
+-		current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current);
++	printk("Process %s (pid: %d[#%u], process nr: %d, stackpage=%08lx)",
++		current->comm, task_pid_nr(current), current->xid,
++		0xffff & i, 4096+(unsigned long)current);
+ 
+ 	/*
+ 	 * When in-kernel, we also print out the stack and code at the
+diff -NurpP --minimal linux-3.2.34/arch/m68k/Kconfig linux-3.2.34-vs2.3.2.15/arch/m68k/Kconfig
+--- linux-3.2.34/arch/m68k/Kconfig	2012-01-09 16:14:03.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/m68k/Kconfig	2011-12-05 19:33:02.000000000 +0100
+@@ -135,6 +135,8 @@ source "fs/Kconfig"
+ 
+ source "arch/m68k/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.2.34/arch/mips/Kconfig linux-3.2.34-vs2.3.2.15/arch/mips/Kconfig
+--- linux-3.2.34/arch/mips/Kconfig	2012-01-09 16:14:04.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/mips/Kconfig	2011-12-05 19:33:02.000000000 +0100
+@@ -2478,6 +2478,8 @@ source "fs/Kconfig"
+ 
+ source "arch/mips/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.2.34/arch/mips/kernel/ptrace.c linux-3.2.34-vs2.3.2.15/arch/mips/kernel/ptrace.c
+--- linux-3.2.34/arch/mips/kernel/ptrace.c	2011-07-22 11:17:36.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/arch/mips/kernel/ptrace.c	2011-12-05 19:33:02.000000000 +0100
+@@ -25,6 +25,7 @@
+ #include <linux/security.h>
+ #include <linux/audit.h>
+ #include <linux/seccomp.h>
++#include <linux/vs_base.h>
+ 
+ #include <asm/byteorder.h>
+ #include <asm/cpu.h>
+@@ -263,6 +264,9 @@ long arch_ptrace(struct task_struct *chi
+ 	void __user *datavp = (void __user *) data;
+ 	unsigned long __user *datalp = (void __user *) data;
+ 
++	if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT))
++		goto out;
++
+ 	switch (request) {
+ 	/* when I and D space are separate, these will need to be fixed. */
+ 	case PTRACE_PEEKTEXT: /* read word at location addr. */
+diff -NurpP --minimal linux-3.2.34/arch/mips/kernel/scall32-o32.S linux-3.2.34-vs2.3.2.15/arch/mips/kernel/scall32-o32.S
+--- linux-3.2.34/arch/mips/kernel/scall32-o32.S	2012-01-09 16:14:05.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/mips/kernel/scall32-o32.S	2011-12-05 19:33:02.000000000 +0100
+@@ -523,7 +523,7 @@ einval:	li	v0, -ENOSYS
+ 	sys	sys_mq_timedreceive	5
+ 	sys	sys_mq_notify		2	/* 4275 */
+ 	sys	sys_mq_getsetattr	3
+-	sys	sys_ni_syscall		0	/* sys_vserver */
++	sys	sys_vserver		3
+ 	sys	sys_waitid		5
+ 	sys	sys_ni_syscall		0	/* available, was setaltroot */
+ 	sys	sys_add_key		5	/* 4280 */
+diff -NurpP --minimal linux-3.2.34/arch/mips/kernel/scall64-64.S linux-3.2.34-vs2.3.2.15/arch/mips/kernel/scall64-64.S
+--- linux-3.2.34/arch/mips/kernel/scall64-64.S	2012-01-09 16:14:05.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/mips/kernel/scall64-64.S	2011-12-05 19:33:02.000000000 +0100
+@@ -362,7 +362,7 @@ sys_call_table:
+ 	PTR	sys_mq_timedreceive
+ 	PTR	sys_mq_notify
+ 	PTR	sys_mq_getsetattr		/* 5235 */
+-	PTR	sys_ni_syscall			/* sys_vserver */
++	PTR	sys_vserver
+ 	PTR	sys_waitid
+ 	PTR	sys_ni_syscall			/* available, was setaltroot */
+ 	PTR	sys_add_key
+diff -NurpP --minimal linux-3.2.34/arch/mips/kernel/scall64-n32.S linux-3.2.34-vs2.3.2.15/arch/mips/kernel/scall64-n32.S
+--- linux-3.2.34/arch/mips/kernel/scall64-n32.S	2012-01-09 16:14:05.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/mips/kernel/scall64-n32.S	2011-12-05 19:33:02.000000000 +0100
+@@ -361,7 +361,7 @@ EXPORT(sysn32_call_table)
+ 	PTR	compat_sys_mq_timedreceive
+ 	PTR	compat_sys_mq_notify
+ 	PTR	compat_sys_mq_getsetattr
+-	PTR	sys_ni_syscall			/* 6240, sys_vserver */
++	PTR	sys32_vserver			/* 6240 */
+ 	PTR	compat_sys_waitid
+ 	PTR	sys_ni_syscall			/* available, was setaltroot */
+ 	PTR	sys_add_key
+diff -NurpP --minimal linux-3.2.34/arch/mips/kernel/scall64-o32.S linux-3.2.34-vs2.3.2.15/arch/mips/kernel/scall64-o32.S
+--- linux-3.2.34/arch/mips/kernel/scall64-o32.S	2012-01-09 16:14:05.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/mips/kernel/scall64-o32.S	2011-12-05 19:33:02.000000000 +0100
+@@ -480,7 +480,7 @@ sys_call_table:
+ 	PTR	compat_sys_mq_timedreceive
+ 	PTR	compat_sys_mq_notify		/* 4275 */
+ 	PTR	compat_sys_mq_getsetattr
+-	PTR	sys_ni_syscall			/* sys_vserver */
++	PTR	sys32_vserver
+ 	PTR	sys_32_waitid
+ 	PTR	sys_ni_syscall			/* available, was setaltroot */
+ 	PTR	sys_add_key			/* 4280 */
+diff -NurpP --minimal linux-3.2.34/arch/mips/kernel/traps.c linux-3.2.34-vs2.3.2.15/arch/mips/kernel/traps.c
+--- linux-3.2.34/arch/mips/kernel/traps.c	2012-01-09 16:14:05.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/mips/kernel/traps.c	2011-12-05 19:33:02.000000000 +0100
+@@ -343,9 +343,10 @@ void show_registers(struct pt_regs *regs
+ 
+ 	__show_regs(regs);
+ 	print_modules();
+-	printk("Process %s (pid: %d, threadinfo=%p, task=%p, tls=%0*lx)\n",
+-	       current->comm, current->pid, current_thread_info(), current,
+-	      field, current_thread_info()->tp_value);
++	printk("Process %s (pid: %d:#%u, threadinfo=%p, task=%p, tls=%0*lx)\n",
++		current->comm, task_pid_nr(current), current->xid,
++		current_thread_info(), current,
++		field, current_thread_info()->tp_value);
+ 	if (cpu_has_userlocal) {
+ 		unsigned long tls;
+ 
+diff -NurpP --minimal linux-3.2.34/arch/parisc/Kconfig linux-3.2.34-vs2.3.2.15/arch/parisc/Kconfig
+--- linux-3.2.34/arch/parisc/Kconfig	2012-01-09 16:14:05.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/parisc/Kconfig	2011-12-05 19:33:02.000000000 +0100
+@@ -278,6 +278,8 @@ source "fs/Kconfig"
+ 
+ source "arch/parisc/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.2.34/arch/parisc/kernel/syscall_table.S linux-3.2.34-vs2.3.2.15/arch/parisc/kernel/syscall_table.S
+--- linux-3.2.34/arch/parisc/kernel/syscall_table.S	2011-10-24 18:45:00.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/arch/parisc/kernel/syscall_table.S	2011-12-05 19:33:02.000000000 +0100
+@@ -361,7 +361,7 @@
+ 	ENTRY_COMP(mbind)		/* 260 */
+ 	ENTRY_COMP(get_mempolicy)
+ 	ENTRY_COMP(set_mempolicy)
+-	ENTRY_SAME(ni_syscall)	/* 263: reserved for vserver */
++	ENTRY_DIFF(vserver)
+ 	ENTRY_SAME(add_key)
+ 	ENTRY_SAME(request_key)		/* 265 */
+ 	ENTRY_SAME(keyctl)
+diff -NurpP --minimal linux-3.2.34/arch/parisc/kernel/traps.c linux-3.2.34-vs2.3.2.15/arch/parisc/kernel/traps.c
+--- linux-3.2.34/arch/parisc/kernel/traps.c	2011-10-24 18:45:00.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/arch/parisc/kernel/traps.c	2011-12-05 19:33:02.000000000 +0100
+@@ -236,8 +236,9 @@ void die_if_kernel(char *str, struct pt_
+ 		if (err == 0)
+ 			return; /* STFU */
+ 
+-		printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
+-			current->comm, task_pid_nr(current), str, err, regs->iaoq[0]);
++		printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld) at " RFMT "\n",
++			current->comm, task_pid_nr(current), current->xid,
++			str, err, regs->iaoq[0]);
+ #ifdef PRINT_USER_FAULTS
+ 		/* XXX for debugging only */
+ 		show_regs(regs);
+@@ -270,8 +271,8 @@ void die_if_kernel(char *str, struct pt_
+ 		pdc_console_restart();
+ 	
+ 	if (err)
+-		printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
+-			current->comm, task_pid_nr(current), str, err);
++		printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld)\n",
++			current->comm, task_pid_nr(current), current->xid, str, err);
+ 
+ 	/* Wot's wrong wif bein' racy? */
+ 	if (current->thread.flags & PARISC_KERNEL_DEATH) {
+diff -NurpP --minimal linux-3.2.34/arch/parisc/mm/fault.c linux-3.2.34-vs2.3.2.15/arch/parisc/mm/fault.c
+--- linux-3.2.34/arch/parisc/mm/fault.c	2010-08-02 16:52:06.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/arch/parisc/mm/fault.c	2011-12-05 19:33:02.000000000 +0100
+@@ -237,8 +237,9 @@ bad_area:
+ 
+ #ifdef PRINT_USER_FAULTS
+ 		printk(KERN_DEBUG "\n");
+-		printk(KERN_DEBUG "do_page_fault() pid=%d command='%s' type=%lu address=0x%08lx\n",
+-		    task_pid_nr(tsk), tsk->comm, code, address);
++		printk(KERN_DEBUG "do_page_fault() pid=%d:#%u "
++		    "command='%s' type=%lu address=0x%08lx\n",
++		    task_pid_nr(tsk), tsk->xid, tsk->comm, code, address);
+ 		if (vma) {
+ 			printk(KERN_DEBUG "vm_start = 0x%08lx, vm_end = 0x%08lx\n",
+ 					vma->vm_start, vma->vm_end);
+diff -NurpP --minimal linux-3.2.34/arch/powerpc/Kconfig linux-3.2.34-vs2.3.2.15/arch/powerpc/Kconfig
+--- linux-3.2.34/arch/powerpc/Kconfig	2012-01-09 16:14:05.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/powerpc/Kconfig	2011-12-05 19:33:02.000000000 +0100
+@@ -960,6 +960,8 @@ source "lib/Kconfig"
+ 
+ source "arch/powerpc/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ config KEYS_COMPAT
+diff -NurpP --minimal linux-3.2.34/arch/powerpc/include/asm/unistd.h linux-3.2.34-vs2.3.2.15/arch/powerpc/include/asm/unistd.h
+--- linux-3.2.34/arch/powerpc/include/asm/unistd.h	2012-01-09 16:14:05.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/powerpc/include/asm/unistd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -275,7 +275,7 @@
+ #endif
+ #define __NR_rtas		255
+ #define __NR_sys_debug_setcontext 256
+-/* Number 257 is reserved for vserver */
++#define __NR_vserver		257
+ #define __NR_migrate_pages	258
+ #define __NR_mbind		259
+ #define __NR_get_mempolicy	260
+diff -NurpP --minimal linux-3.2.34/arch/powerpc/kernel/process.c linux-3.2.34-vs2.3.2.15/arch/powerpc/kernel/process.c
+--- linux-3.2.34/arch/powerpc/kernel/process.c	2012-11-18 18:42:08.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/powerpc/kernel/process.c	2012-10-22 12:59:46.000000000 +0200
+@@ -640,8 +640,9 @@ void show_regs(struct pt_regs * regs)
+ #else
+ 		printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr);
+ #endif
+-	printk("TASK = %p[%d] '%s' THREAD: %p",
+-	       current, task_pid_nr(current), current->comm, task_thread_info(current));
++	printk("TASK = %p[%d,#%u] '%s' THREAD: %p",
++	       current, task_pid_nr(current), current->xid,
++	       current->comm, task_thread_info(current));
+ 
+ #ifdef CONFIG_SMP
+ 	printk(" CPU: %d", raw_smp_processor_id());
+diff -NurpP --minimal linux-3.2.34/arch/powerpc/kernel/traps.c linux-3.2.34-vs2.3.2.15/arch/powerpc/kernel/traps.c
+--- linux-3.2.34/arch/powerpc/kernel/traps.c	2012-11-18 18:42:08.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/powerpc/kernel/traps.c	2012-10-22 12:59:46.000000000 +0200
+@@ -1083,8 +1083,9 @@ void nonrecoverable_exception(struct pt_
+ 
+ void trace_syscall(struct pt_regs *regs)
+ {
+-	printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
+-	       current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
++	printk("Task: %p(%d[#%u]), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
++	       current, task_pid_nr(current), current->xid,
++	       regs->nip, regs->link, regs->gpr[0],
+ 	       regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
+ }
+ 
+diff -NurpP --minimal linux-3.2.34/arch/s390/Kconfig linux-3.2.34-vs2.3.2.15/arch/s390/Kconfig
+--- linux-3.2.34/arch/s390/Kconfig	2012-11-18 18:42:08.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/s390/Kconfig	2012-04-24 16:50:48.000000000 +0200
+@@ -643,6 +643,8 @@ source "fs/Kconfig"
+ 
+ source "arch/s390/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.2.34/arch/s390/include/asm/tlb.h linux-3.2.34-vs2.3.2.15/arch/s390/include/asm/tlb.h
+--- linux-3.2.34/arch/s390/include/asm/tlb.h	2012-11-18 18:42:08.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/s390/include/asm/tlb.h	2012-04-24 16:50:48.000000000 +0200
+@@ -24,6 +24,7 @@
+ #include <linux/mm.h>
+ #include <linux/pagemap.h>
+ #include <linux/swap.h>
++
+ #include <asm/processor.h>
+ #include <asm/pgalloc.h>
+ #include <asm/tlbflush.h>
+diff -NurpP --minimal linux-3.2.34/arch/s390/include/asm/unistd.h linux-3.2.34-vs2.3.2.15/arch/s390/include/asm/unistd.h
+--- linux-3.2.34/arch/s390/include/asm/unistd.h	2012-01-09 16:14:06.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/s390/include/asm/unistd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -202,7 +202,7 @@
+ #define __NR_clock_gettime	(__NR_timer_create+6)
+ #define __NR_clock_getres	(__NR_timer_create+7)
+ #define __NR_clock_nanosleep	(__NR_timer_create+8)
+-/* Number 263 is reserved for vserver */
++#define __NR_vserver		263
+ #define __NR_statfs64		265
+ #define __NR_fstatfs64		266
+ #define __NR_remap_file_pages	267
+diff -NurpP --minimal linux-3.2.34/arch/s390/kernel/ptrace.c linux-3.2.34-vs2.3.2.15/arch/s390/kernel/ptrace.c
+--- linux-3.2.34/arch/s390/kernel/ptrace.c	2012-11-18 18:42:08.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/s390/kernel/ptrace.c	2012-03-14 10:25:26.000000000 +0100
+@@ -20,6 +20,7 @@
+ #include <linux/regset.h>
+ #include <linux/tracehook.h>
+ #include <linux/seccomp.h>
++#include <linux/vs_base.h>
+ #include <linux/compat.h>
+ #include <trace/syscall.h>
+ #include <asm/segment.h>
+diff -NurpP --minimal linux-3.2.34/arch/s390/kernel/syscalls.S linux-3.2.34-vs2.3.2.15/arch/s390/kernel/syscalls.S
+--- linux-3.2.34/arch/s390/kernel/syscalls.S	2012-01-09 16:14:06.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/s390/kernel/syscalls.S	2011-12-05 19:33:02.000000000 +0100
+@@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,sys_clock_sett
+ SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper)	/* 260 */
+ SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper)
+ SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper)
+-NI_SYSCALL							/* reserved for vserver */
++SYSCALL(sys_vserver,sys_vserver,sys32_vserver)
+ SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper)
+ SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper)
+ SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper)
+diff -NurpP --minimal linux-3.2.34/arch/sh/Kconfig linux-3.2.34-vs2.3.2.15/arch/sh/Kconfig
+--- linux-3.2.34/arch/sh/Kconfig	2012-01-09 16:14:07.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/sh/Kconfig	2011-12-05 19:33:02.000000000 +0100
+@@ -901,6 +901,8 @@ source "fs/Kconfig"
+ 
+ source "arch/sh/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.2.34/arch/sh/kernel/irq.c linux-3.2.34-vs2.3.2.15/arch/sh/kernel/irq.c
+--- linux-3.2.34/arch/sh/kernel/irq.c	2011-07-22 11:17:41.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/arch/sh/kernel/irq.c	2011-12-05 19:33:02.000000000 +0100
+@@ -14,6 +14,7 @@
+ #include <linux/ftrace.h>
+ #include <linux/delay.h>
+ #include <linux/ratelimit.h>
++// #include <linux/vs_context.h>
+ #include <asm/processor.h>
+ #include <asm/machvec.h>
+ #include <asm/uaccess.h>
+diff -NurpP --minimal linux-3.2.34/arch/sparc/Kconfig linux-3.2.34-vs2.3.2.15/arch/sparc/Kconfig
+--- linux-3.2.34/arch/sparc/Kconfig	2012-11-18 18:42:08.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/sparc/Kconfig	2012-06-14 20:45:24.000000000 +0200
+@@ -598,6 +598,8 @@ source "fs/Kconfig"
+ 
+ source "arch/sparc/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.2.34/arch/sparc/include/asm/unistd.h linux-3.2.34-vs2.3.2.15/arch/sparc/include/asm/unistd.h
+--- linux-3.2.34/arch/sparc/include/asm/unistd.h	2012-01-09 16:14:07.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/sparc/include/asm/unistd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -335,7 +335,7 @@
+ #define __NR_timer_getoverrun	264
+ #define __NR_timer_delete	265
+ #define __NR_timer_create	266
+-/* #define __NR_vserver		267 Reserved for VSERVER */
++#define __NR_vserver		267
+ #define __NR_io_setup		268
+ #define __NR_io_destroy		269
+ #define __NR_io_submit		270
+diff -NurpP --minimal linux-3.2.34/arch/sparc/kernel/systbls_32.S linux-3.2.34-vs2.3.2.15/arch/sparc/kernel/systbls_32.S
+--- linux-3.2.34/arch/sparc/kernel/systbls_32.S	2012-01-09 16:14:09.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/sparc/kernel/systbls_32.S	2011-12-05 19:33:02.000000000 +0100
+@@ -70,7 +70,7 @@ sys_call_table:
+ /*250*/	.long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_ni_syscall
+ /*255*/	.long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
+ /*260*/	.long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
+-/*265*/	.long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
++/*265*/	.long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
+ /*270*/	.long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
+ /*275*/	.long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
+ /*280*/	.long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
+diff -NurpP --minimal linux-3.2.34/arch/sparc/kernel/systbls_64.S linux-3.2.34-vs2.3.2.15/arch/sparc/kernel/systbls_64.S
+--- linux-3.2.34/arch/sparc/kernel/systbls_64.S	2012-11-18 18:42:08.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/sparc/kernel/systbls_64.S	2012-06-14 20:45:24.000000000 +0200
+@@ -71,7 +71,7 @@ sys_call_table32:
+ /*250*/	.word sys_mremap, compat_sys_sysctl, sys32_getsid, sys_fdatasync, sys_nis_syscall
+ 	.word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
+ /*260*/	.word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
+-	.word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
++	.word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy
+ /*270*/	.word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
+ 	.word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
+ /*280*/	.word sys32_tee, sys_add_key, sys_request_key, compat_sys_keyctl, compat_sys_openat
+@@ -148,7 +148,7 @@ sys_call_table:
+ /*250*/	.word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
+ 	.word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
+ /*260*/	.word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
+-	.word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
++	.word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
+ /*270*/	.word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
+ 	.word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
+ /*280*/	.word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
+diff -NurpP --minimal linux-3.2.34/arch/um/Kconfig.rest linux-3.2.34-vs2.3.2.15/arch/um/Kconfig.rest
+--- linux-3.2.34/arch/um/Kconfig.rest	2012-01-09 16:14:09.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/um/Kconfig.rest	2011-12-05 19:33:02.000000000 +0100
+@@ -12,6 +12,8 @@ source "arch/um/Kconfig.net"
+ 
+ source "fs/Kconfig"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.2.34/arch/um/include/shared/kern_constants.h linux-3.2.34-vs2.3.2.15/arch/um/include/shared/kern_constants.h
+--- linux-3.2.34/arch/um/include/shared/kern_constants.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/um/include/shared/kern_constants.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1 @@
++#include "../../../../include/generated/asm-offsets.h"
+diff -NurpP --minimal linux-3.2.34/arch/um/include/shared/user_constants.h linux-3.2.34-vs2.3.2.15/arch/um/include/shared/user_constants.h
+--- linux-3.2.34/arch/um/include/shared/user_constants.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/um/include/shared/user_constants.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,40 @@
++/*
++ * DO NOT MODIFY.
++ *
++ * This file was generated by arch/um/Makefile
++ *
++ */
++
++#define HOST_SC_CR2 176 /* offsetof(struct sigcontext, cr2)	# */
++#define HOST_SC_ERR 152 /* offsetof(struct sigcontext, err)	# */
++#define HOST_SC_TRAPNO 160 /* offsetof(struct sigcontext, trapno)	# */
++#define HOST_FP_SIZE 64 /* sizeof(struct _fpstate) / sizeof(unsigned long)	# */
++#define HOST_RBX 5 /* RBX	# */
++#define HOST_RCX 11 /* RCX	# */
++#define HOST_RDI 14 /* RDI	# */
++#define HOST_RSI 13 /* RSI	# */
++#define HOST_RDX 12 /* RDX	# */
++#define HOST_RBP 4 /* RBP	# */
++#define HOST_RAX 10 /* RAX	# */
++#define HOST_R8 9 /* R8	# */
++#define HOST_R9 8 /* R9	# */
++#define HOST_R10 7 /* R10	# */
++#define HOST_R11 6 /* R11	# */
++#define HOST_R12 3 /* R12	# */
++#define HOST_R13 2 /* R13	# */
++#define HOST_R14 1 /* R14	# */
++#define HOST_R15 0 /* R15	# */
++#define HOST_ORIG_RAX 15 /* ORIG_RAX	# */
++#define HOST_CS 17 /* CS	# */
++#define HOST_SS 20 /* SS	# */
++#define HOST_EFLAGS 18 /* EFLAGS	# */
++#define HOST_IP 16 /* RIP	# */
++#define HOST_SP 19 /* RSP	# */
++#define UM_FRAME_SIZE 216 /* sizeof(struct user_regs_struct)	# */
++#define UM_POLLIN 1 /* POLLIN	# */
++#define UM_POLLPRI 2 /* POLLPRI	# */
++#define UM_POLLOUT 4 /* POLLOUT	# */
++#define UM_PROT_READ 1 /* PROT_READ	# */
++#define UM_PROT_WRITE 2 /* PROT_WRITE	# */
++#define UM_PROT_EXEC 4 /* PROT_EXEC	# */
++
+diff -NurpP --minimal linux-3.2.34/arch/x86/Kconfig linux-3.2.34-vs2.3.2.15/arch/x86/Kconfig
+--- linux-3.2.34/arch/x86/Kconfig	2012-01-09 16:14:10.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/x86/Kconfig	2011-12-15 01:11:29.000000000 +0100
+@@ -2170,6 +2170,8 @@ source "fs/Kconfig"
+ 
+ source "arch/x86/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.2.34/arch/x86/ia32/ia32entry.S linux-3.2.34-vs2.3.2.15/arch/x86/ia32/ia32entry.S
+--- linux-3.2.34/arch/x86/ia32/ia32entry.S	2012-01-09 16:14:10.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/x86/ia32/ia32entry.S	2011-12-05 19:33:02.000000000 +0100
+@@ -776,7 +776,7 @@ ia32_sys_call_table:
+ 	.quad sys_tgkill		/* 270 */
+ 	.quad compat_sys_utimes
+ 	.quad sys32_fadvise64_64
+-	.quad quiet_ni_syscall	/* sys_vserver */
++	.quad sys32_vserver
+ 	.quad sys_mbind
+ 	.quad compat_sys_get_mempolicy	/* 275 */
+ 	.quad sys_set_mempolicy
+diff -NurpP --minimal linux-3.2.34/arch/x86/include/asm/unistd_64.h linux-3.2.34-vs2.3.2.15/arch/x86/include/asm/unistd_64.h
+--- linux-3.2.34/arch/x86/include/asm/unistd_64.h	2012-01-09 16:14:11.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/x86/include/asm/unistd_64.h	2011-12-05 19:33:02.000000000 +0100
+@@ -535,7 +535,7 @@ __SYSCALL(__NR_tgkill, sys_tgkill)
+ #define __NR_utimes				235
+ __SYSCALL(__NR_utimes, sys_utimes)
+ #define __NR_vserver				236
+-__SYSCALL(__NR_vserver, sys_ni_syscall)
++__SYSCALL(__NR_vserver, sys_vserver)
+ #define __NR_mbind				237
+ __SYSCALL(__NR_mbind, sys_mbind)
+ #define __NR_set_mempolicy			238
+diff -NurpP --minimal linux-3.2.34/arch/x86/kernel/syscall_table_32.S linux-3.2.34-vs2.3.2.15/arch/x86/kernel/syscall_table_32.S
+--- linux-3.2.34/arch/x86/kernel/syscall_table_32.S	2012-01-09 16:14:11.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/arch/x86/kernel/syscall_table_32.S	2011-12-05 19:33:02.000000000 +0100
+@@ -272,7 +272,7 @@ ENTRY(sys_call_table)
+ 	.long sys_tgkill	/* 270 */
+ 	.long sys_utimes
+  	.long sys_fadvise64_64
+-	.long sys_ni_syscall	/* sys_vserver */
++	.long sys_vserver
+ 	.long sys_mbind
+ 	.long sys_get_mempolicy
+ 	.long sys_set_mempolicy
+diff -NurpP --minimal linux-3.2.34/drivers/block/Kconfig linux-3.2.34-vs2.3.2.15/drivers/block/Kconfig
+--- linux-3.2.34/drivers/block/Kconfig	2011-10-24 18:45:08.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/drivers/block/Kconfig	2011-12-05 19:33:02.000000000 +0100
+@@ -288,6 +288,13 @@ config BLK_DEV_CRYPTOLOOP
+ 
+ source "drivers/block/drbd/Kconfig"
+ 
++config BLK_DEV_VROOT
++	tristate "Virtual Root device support"
++	depends on QUOTACTL
++	---help---
++	  Saying Y here will allow you to use quota/fs ioctls on a shared
++	  partition within a virtual server without compromising security.
++
+ config BLK_DEV_NBD
+ 	tristate "Network block device support"
+ 	depends on NET
+diff -NurpP --minimal linux-3.2.34/drivers/block/Makefile linux-3.2.34-vs2.3.2.15/drivers/block/Makefile
+--- linux-3.2.34/drivers/block/Makefile	2011-07-22 11:17:44.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/drivers/block/Makefile	2011-12-05 19:33:02.000000000 +0100
+@@ -34,6 +34,7 @@ obj-$(CONFIG_VIODASD)		+= viodasd.o
+ obj-$(CONFIG_BLK_DEV_SX8)	+= sx8.o
+ obj-$(CONFIG_BLK_DEV_UB)	+= ub.o
+ obj-$(CONFIG_BLK_DEV_HD)	+= hd.o
++obj-$(CONFIG_BLK_DEV_VROOT)	+= vroot.o
+ 
+ obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= xen-blkfront.o
+ obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= xen-blkback/
+diff -NurpP --minimal linux-3.2.34/drivers/block/loop.c linux-3.2.34-vs2.3.2.15/drivers/block/loop.c
+--- linux-3.2.34/drivers/block/loop.c	2012-01-09 16:14:15.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/drivers/block/loop.c	2012-01-09 16:19:31.000000000 +0100
+@@ -77,6 +77,7 @@
+ #include <linux/sysfs.h>
+ #include <linux/miscdevice.h>
+ #include <linux/falloc.h>
++#include <linux/vs_context.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -868,6 +869,7 @@ static int loop_set_fd(struct loop_devic
+ 	lo->lo_blocksize = lo_blocksize;
+ 	lo->lo_device = bdev;
+ 	lo->lo_flags = lo_flags;
++	lo->lo_xid = vx_current_xid();
+ 	lo->lo_backing_file = file;
+ 	lo->transfer = transfer_none;
+ 	lo->ioctl = NULL;
+@@ -1000,6 +1002,7 @@ static int loop_clr_fd(struct loop_devic
+ 	lo->lo_sizelimit = 0;
+ 	lo->lo_encrypt_key_size = 0;
+ 	lo->lo_thread = NULL;
++	lo->lo_xid = 0;
+ 	memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
+ 	memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
+ 	memset(lo->lo_file_name, 0, LO_NAME_SIZE);
+@@ -1041,7 +1044,7 @@ loop_set_status(struct loop_device *lo, 
+ 
+ 	if (lo->lo_encrypt_key_size &&
+ 	    lo->lo_key_owner != uid &&
+-	    !capable(CAP_SYS_ADMIN))
++	    !vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP))
+ 		return -EPERM;
+ 	if (lo->lo_state != Lo_bound)
+ 		return -ENXIO;
+@@ -1131,7 +1134,8 @@ loop_get_status(struct loop_device *lo, 
+ 	memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
+ 	info->lo_encrypt_type =
+ 		lo->lo_encryption ? lo->lo_encryption->number : 0;
+-	if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
++	if (lo->lo_encrypt_key_size &&
++		vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP)) {
+ 		info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
+ 		memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
+ 		       lo->lo_encrypt_key_size);
+@@ -1491,6 +1495,11 @@ static int lo_open(struct block_device *
+ 		goto out;
+ 	}
+ 
++	if (!vx_check(lo->lo_xid, VS_IDENT|VS_HOSTID|VS_ADMIN_P)) {
++		err = -EACCES;
++		goto out;
++	}
++
+ 	mutex_lock(&lo->lo_ctl_mutex);
+ 	lo->lo_refcnt++;
+ 	mutex_unlock(&lo->lo_ctl_mutex);
+diff -NurpP --minimal linux-3.2.34/drivers/block/vroot.c linux-3.2.34-vs2.3.2.15/drivers/block/vroot.c
+--- linux-3.2.34/drivers/block/vroot.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/drivers/block/vroot.c	2011-12-07 00:05:16.000000000 +0100
+@@ -0,0 +1,291 @@
++/*
++ *  linux/drivers/block/vroot.c
++ *
++ *  written by Herbert P�tzl, 9/11/2002
++ *  ported to 2.6.10 by Herbert P�tzl, 30/12/2004
++ *
++ *  based on the loop.c code by Theodore Ts'o.
++ *
++ * Copyright (C) 2002-2007 by Herbert P�tzl.
++ * Redistribution of this file is permitted under the
++ * GNU General Public License.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++#include <linux/file.h>
++#include <linux/major.h>
++#include <linux/blkdev.h>
++#include <linux/slab.h>
++
++#include <linux/vroot.h>
++#include <linux/vs_context.h>
++
++
++static int max_vroot = 8;
++
++static struct vroot_device *vroot_dev;
++static struct gendisk **disks;
++
++
++static int vroot_set_dev(
++	struct vroot_device *vr,
++	struct block_device *bdev,
++	unsigned int arg)
++{
++	struct block_device *real_bdev;
++	struct file *file;
++	struct inode *inode;
++	int error;
++
++	error = -EBUSY;
++	if (vr->vr_state != Vr_unbound)
++		goto out;
++
++	error = -EBADF;
++	file = fget(arg);
++	if (!file)
++		goto out;
++
++	error = -EINVAL;
++	inode = file->f_dentry->d_inode;
++
++
++	if (S_ISBLK(inode->i_mode)) {
++		real_bdev = inode->i_bdev;
++		vr->vr_device = real_bdev;
++		__iget(real_bdev->bd_inode);
++	} else
++		goto out_fput;
++
++	vxdprintk(VXD_CBIT(misc, 0),
++		"vroot[%d]_set_dev: dev=" VXF_DEV,
++		vr->vr_number, VXD_DEV(real_bdev));
++
++	vr->vr_state = Vr_bound;
++	error = 0;
++
++ out_fput:
++	fput(file);
++ out:
++	return error;
++}
++
++static int vroot_clr_dev(
++	struct vroot_device *vr,
++	struct block_device *bdev)
++{
++	struct block_device *real_bdev;
++
++	if (vr->vr_state != Vr_bound)
++		return -ENXIO;
++	if (vr->vr_refcnt > 1)	/* we needed one fd for the ioctl */
++		return -EBUSY;
++
++	real_bdev = vr->vr_device;
++
++	vxdprintk(VXD_CBIT(misc, 0),
++		"vroot[%d]_clr_dev: dev=" VXF_DEV,
++		vr->vr_number, VXD_DEV(real_bdev));
++
++	bdput(real_bdev);
++	vr->vr_state = Vr_unbound;
++	vr->vr_device = NULL;
++	return 0;
++}
++
++
++static int vr_ioctl(struct block_device *bdev, fmode_t mode,
++	unsigned int cmd, unsigned long arg)
++{
++	struct vroot_device *vr = bdev->bd_disk->private_data;
++	int err;
++
++	down(&vr->vr_ctl_mutex);
++	switch (cmd) {
++	case VROOT_SET_DEV:
++		err = vroot_set_dev(vr, bdev, arg);
++		break;
++	case VROOT_CLR_DEV:
++		err = vroot_clr_dev(vr, bdev);
++		break;
++	default:
++		err = -EINVAL;
++		break;
++	}
++	up(&vr->vr_ctl_mutex);
++	return err;
++}
++
++static int vr_open(struct block_device *bdev, fmode_t mode)
++{
++	struct vroot_device *vr = bdev->bd_disk->private_data;
++
++	down(&vr->vr_ctl_mutex);
++	vr->vr_refcnt++;
++	up(&vr->vr_ctl_mutex);
++	return 0;
++}
++
++static int vr_release(struct gendisk *disk, fmode_t mode)
++{
++	struct vroot_device *vr = disk->private_data;
++
++	down(&vr->vr_ctl_mutex);
++	--vr->vr_refcnt;
++	up(&vr->vr_ctl_mutex);
++	return 0;
++}
++
++static struct block_device_operations vr_fops = {
++	.owner =	THIS_MODULE,
++	.open =		vr_open,
++	.release =	vr_release,
++	.ioctl =	vr_ioctl,
++};
++
++static void vroot_make_request(struct request_queue *q, struct bio *bio)
++{
++	printk("vroot_make_request %p, %p\n", q, bio);
++	bio_io_error(bio);
++}
++
++struct block_device *__vroot_get_real_bdev(struct block_device *bdev)
++{
++	struct inode *inode = bdev->bd_inode;
++	struct vroot_device *vr;
++	struct block_device *real_bdev;
++	int minor = iminor(inode);
++
++	vr = &vroot_dev[minor];
++	real_bdev = vr->vr_device;
++
++	vxdprintk(VXD_CBIT(misc, 0),
++		"vroot[%d]_get_real_bdev: dev=" VXF_DEV,
++		vr->vr_number, VXD_DEV(real_bdev));
++
++	if (vr->vr_state != Vr_bound)
++		return ERR_PTR(-ENXIO);
++
++	__iget(real_bdev->bd_inode);
++	return real_bdev;
++}
++
++
++
++/*
++ * And now the modules code and kernel interface.
++ */
++
++module_param(max_vroot, int, 0);
++
++MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)");
++MODULE_LICENSE("GPL");
++MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR);
++
++MODULE_AUTHOR ("Herbert P�tzl");
++MODULE_DESCRIPTION ("Virtual Root Device Mapper");
++
++
++int __init vroot_init(void)
++{
++	int err, i;
++
++	if (max_vroot < 1 || max_vroot > 256) {
++		max_vroot = MAX_VROOT_DEFAULT;
++		printk(KERN_WARNING "vroot: invalid max_vroot "
++			"(must be between 1 and 256), "
++			"using default (%d)\n", max_vroot);
++	}
++
++	if (register_blkdev(VROOT_MAJOR, "vroot"))
++		return -EIO;
++
++	err = -ENOMEM;
++	vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL);
++	if (!vroot_dev)
++		goto out_mem1;
++	memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device));
++
++	disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL);
++	if (!disks)
++		goto out_mem2;
++
++	for (i = 0; i < max_vroot; i++) {
++		disks[i] = alloc_disk(1);
++		if (!disks[i])
++			goto out_mem3;
++		disks[i]->queue = blk_alloc_queue(GFP_KERNEL);
++		if (!disks[i]->queue)
++			goto out_mem3;
++		blk_queue_make_request(disks[i]->queue, vroot_make_request);
++	}
++
++	for (i = 0; i < max_vroot; i++) {
++		struct vroot_device *vr = &vroot_dev[i];
++		struct gendisk *disk = disks[i];
++
++		memset(vr, 0, sizeof(*vr));
++		sema_init(&vr->vr_ctl_mutex, 1);
++		vr->vr_number = i;
++		disk->major = VROOT_MAJOR;
++		disk->first_minor = i;
++		disk->fops = &vr_fops;
++		sprintf(disk->disk_name, "vroot%d", i);
++		disk->private_data = vr;
++	}
++
++	err = register_vroot_grb(&__vroot_get_real_bdev);
++	if (err)
++		goto out_mem3;
++
++	for (i = 0; i < max_vroot; i++)
++		add_disk(disks[i]);
++	printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot);
++	return 0;
++
++out_mem3:
++	while (i--)
++		put_disk(disks[i]);
++	kfree(disks);
++out_mem2:
++	kfree(vroot_dev);
++out_mem1:
++	unregister_blkdev(VROOT_MAJOR, "vroot");
++	printk(KERN_ERR "vroot: ran out of memory\n");
++	return err;
++}
++
++void vroot_exit(void)
++{
++	int i;
++
++	if (unregister_vroot_grb(&__vroot_get_real_bdev))
++		printk(KERN_WARNING "vroot: cannot unregister grb\n");
++
++	for (i = 0; i < max_vroot; i++) {
++		del_gendisk(disks[i]);
++		put_disk(disks[i]);
++	}
++	unregister_blkdev(VROOT_MAJOR, "vroot");
++
++	kfree(disks);
++	kfree(vroot_dev);
++}
++
++module_init(vroot_init);
++module_exit(vroot_exit);
++
++#ifndef MODULE
++
++static int __init max_vroot_setup(char *str)
++{
++	max_vroot = simple_strtol(str, NULL, 0);
++	return 1;
++}
++
++__setup("max_vroot=", max_vroot_setup);
++
++#endif
++
+diff -NurpP --minimal linux-3.2.34/drivers/infiniband/Kconfig linux-3.2.34-vs2.3.2.15/drivers/infiniband/Kconfig
+--- linux-3.2.34/drivers/infiniband/Kconfig	2011-07-22 11:17:45.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/drivers/infiniband/Kconfig	2012-02-15 03:26:22.000000000 +0100
+@@ -39,7 +39,7 @@ config INFINIBAND_USER_MEM
+ config INFINIBAND_ADDR_TRANS
+ 	bool
+ 	depends on INET
+-	depends on !(INFINIBAND = y && IPV6 = m)
++	depends on !(INFINIBAND = y && IPV6 = y)
+ 	default y
+ 
+ source "drivers/infiniband/hw/mthca/Kconfig"
+diff -NurpP --minimal linux-3.2.34/drivers/infiniband/core/addr.c linux-3.2.34-vs2.3.2.15/drivers/infiniband/core/addr.c
+--- linux-3.2.34/drivers/infiniband/core/addr.c	2012-01-09 16:14:19.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/drivers/infiniband/core/addr.c	2011-12-05 19:33:02.000000000 +0100
+@@ -255,7 +255,7 @@ static int addr6_resolve(struct sockaddr
+ 
+ 	if (ipv6_addr_any(&fl6.saddr)) {
+ 		ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
+-					 &fl6.daddr, 0, &fl6.saddr);
++					 &fl6.daddr, 0, &fl6.saddr, NULL);
+ 		if (ret)
+ 			goto put;
+ 
+diff -NurpP --minimal linux-3.2.34/drivers/md/dm-ioctl.c linux-3.2.34-vs2.3.2.15/drivers/md/dm-ioctl.c
+--- linux-3.2.34/drivers/md/dm-ioctl.c	2012-11-18 18:42:11.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/drivers/md/dm-ioctl.c	2012-03-14 10:24:05.000000000 +0100
+@@ -16,6 +16,7 @@
+ #include <linux/dm-ioctl.h>
+ #include <linux/hdreg.h>
+ #include <linux/compat.h>
++#include <linux/vs_context.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -106,7 +107,8 @@ static struct hash_cell *__get_name_cell
+ 	unsigned int h = hash_str(str);
+ 
+ 	list_for_each_entry (hc, _name_buckets + h, name_list)
+-		if (!strcmp(hc->name, str)) {
++		if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
++			!strcmp(hc->name, str)) {
+ 			dm_get(hc->md);
+ 			return hc;
+ 		}
+@@ -120,7 +122,8 @@ static struct hash_cell *__get_uuid_cell
+ 	unsigned int h = hash_str(str);
+ 
+ 	list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
+-		if (!strcmp(hc->uuid, str)) {
++		if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
++			!strcmp(hc->uuid, str)) {
+ 			dm_get(hc->md);
+ 			return hc;
+ 		}
+@@ -131,13 +134,15 @@ static struct hash_cell *__get_uuid_cell
+ static struct hash_cell *__get_dev_cell(uint64_t dev)
+ {
+ 	struct mapped_device *md;
+-	struct hash_cell *hc;
++	struct hash_cell *hc = NULL;
+ 
+ 	md = dm_get_md(huge_decode_dev(dev));
+ 	if (!md)
+ 		return NULL;
+ 
+-	hc = dm_get_mdptr(md);
++	if (vx_check(dm_get_xid(md), VS_WATCH_P | VS_IDENT))
++		hc = dm_get_mdptr(md);
++
+ 	if (!hc) {
+ 		dm_put(md);
+ 		return NULL;
+@@ -445,6 +450,9 @@ typedef int (*ioctl_fn)(struct dm_ioctl 
+ 
+ static int remove_all(struct dm_ioctl *param, size_t param_size)
+ {
++	if (!vx_check(0, VS_ADMIN))
++		return -EPERM;
++
+ 	dm_hash_remove_all(1);
+ 	param->data_size = 0;
+ 	return 0;
+@@ -492,6 +500,8 @@ static int list_devices(struct dm_ioctl 
+ 	 */
+ 	for (i = 0; i < NUM_BUCKETS; i++) {
+ 		list_for_each_entry (hc, _name_buckets + i, name_list) {
++			if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
++				continue;
+ 			needed += sizeof(struct dm_name_list);
+ 			needed += strlen(hc->name) + 1;
+ 			needed += ALIGN_MASK;
+@@ -515,6 +525,8 @@ static int list_devices(struct dm_ioctl 
+ 	 */
+ 	for (i = 0; i < NUM_BUCKETS; i++) {
+ 		list_for_each_entry (hc, _name_buckets + i, name_list) {
++			if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
++				continue;
+ 			if (old_nl)
+ 				old_nl->next = (uint32_t) ((void *) nl -
+ 							   (void *) old_nl);
+@@ -1615,8 +1627,8 @@ static int ctl_ioctl(uint command, struc
+ 	ioctl_fn fn = NULL;
+ 	size_t input_param_size;
+ 
+-	/* only root can play with this */
+-	if (!capable(CAP_SYS_ADMIN))
++	/* only root and certain contexts can play with this */
++	if (!vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_MAPPER))
+ 		return -EACCES;
+ 
+ 	if (_IOC_TYPE(command) != DM_IOCTL)
+diff -NurpP --minimal linux-3.2.34/drivers/md/dm.c linux-3.2.34-vs2.3.2.15/drivers/md/dm.c
+--- linux-3.2.34/drivers/md/dm.c	2012-11-18 18:42:12.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/drivers/md/dm.c	2012-10-22 12:59:48.000000000 +0200
+@@ -20,6 +20,7 @@
+ #include <linux/idr.h>
+ #include <linux/hdreg.h>
+ #include <linux/delay.h>
++#include <linux/vs_base.h>
+ 
+ #include <trace/events/block.h>
+ 
+@@ -132,6 +133,7 @@ struct mapped_device {
+ 	rwlock_t map_lock;
+ 	atomic_t holders;
+ 	atomic_t open_count;
++	xid_t xid;
+ 
+ 	unsigned long flags;
+ 
+@@ -344,6 +346,7 @@ int dm_deleting_md(struct mapped_device 
+ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
+ {
+ 	struct mapped_device *md;
++	int ret = -ENXIO;
+ 
+ 	spin_lock(&_minor_lock);
+ 
+@@ -352,18 +355,19 @@ static int dm_blk_open(struct block_devi
+ 		goto out;
+ 
+ 	if (test_bit(DMF_FREEING, &md->flags) ||
+-	    dm_deleting_md(md)) {
+-		md = NULL;
++	    dm_deleting_md(md))
++		goto out;
++
++	ret = -EACCES;
++	if (!vx_check(md->xid, VS_IDENT|VS_HOSTID))
+ 		goto out;
+-	}
+ 
+ 	dm_get(md);
+ 	atomic_inc(&md->open_count);
+-
++	ret = 0;
+ out:
+ 	spin_unlock(&_minor_lock);
+-
+-	return md ? 0 : -ENXIO;
++	return ret;
+ }
+ 
+ static int dm_blk_close(struct gendisk *disk, fmode_t mode)
+@@ -584,6 +588,14 @@ int dm_set_geometry(struct mapped_device
+ 	return 0;
+ }
+ 
++/*
++ * Get the xid associated with a dm device
++ */
++xid_t dm_get_xid(struct mapped_device *md)
++{
++	return md->xid;
++}
++
+ /*-----------------------------------------------------------------
+  * CRUD START:
+  *   A more elegant soln is in the works that uses the queue
+@@ -1870,6 +1882,7 @@ static struct mapped_device *alloc_dev(i
+ 	INIT_LIST_HEAD(&md->uevent_list);
+ 	spin_lock_init(&md->uevent_lock);
+ 
++	md->xid = vx_current_xid();
+ 	md->queue = blk_alloc_queue(GFP_KERNEL);
+ 	if (!md->queue)
+ 		goto bad_queue;
+diff -NurpP --minimal linux-3.2.34/drivers/md/dm.h linux-3.2.34-vs2.3.2.15/drivers/md/dm.h
+--- linux-3.2.34/drivers/md/dm.h	2012-01-09 16:14:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/drivers/md/dm.h	2011-12-05 19:33:02.000000000 +0100
+@@ -41,6 +41,8 @@ struct dm_dev_internal {
+ struct dm_table;
+ struct dm_md_mempools;
+ 
++xid_t dm_get_xid(struct mapped_device *md);
++
+ /*-----------------------------------------------------------------
+  * Internal table functions.
+  *---------------------------------------------------------------*/
+diff -NurpP --minimal linux-3.2.34/drivers/net/tun.c linux-3.2.34-vs2.3.2.15/drivers/net/tun.c
+--- linux-3.2.34/drivers/net/tun.c	2012-11-18 18:42:14.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/drivers/net/tun.c	2012-09-01 11:10:32.000000000 +0200
+@@ -64,6 +64,7 @@
+ #include <linux/nsproxy.h>
+ #include <linux/virtio_net.h>
+ #include <linux/rcupdate.h>
++#include <linux/vs_network.h>
+ #include <net/net_namespace.h>
+ #include <net/netns/generic.h>
+ #include <net/rtnetlink.h>
+@@ -121,6 +122,7 @@ struct tun_struct {
+ 	unsigned int 		flags;
+ 	uid_t			owner;
+ 	gid_t			group;
++	nid_t			nid;
+ 
+ 	struct net_device	*dev;
+ 	u32			set_features;
+@@ -909,6 +911,7 @@ static void tun_setup(struct net_device 
+ 
+ 	tun->owner = -1;
+ 	tun->group = -1;
++	tun->nid = current->nid;
+ 
+ 	dev->ethtool_ops = &tun_ethtool_ops;
+ 	dev->destructor = tun_free_netdev;
+@@ -1059,7 +1062,7 @@ static int tun_set_iff(struct net *net, 
+ 
+ 		if (((tun->owner != -1 && cred->euid != tun->owner) ||
+ 		     (tun->group != -1 && !in_egroup_p(tun->group))) &&
+-		    !capable(CAP_NET_ADMIN))
++		!cap_raised(current_cap(), CAP_NET_ADMIN))
+ 			return -EPERM;
+ 		err = security_tun_dev_attach(tun->socket.sk);
+ 		if (err < 0)
+@@ -1073,7 +1076,7 @@ static int tun_set_iff(struct net *net, 
+ 		char *name;
+ 		unsigned long flags = 0;
+ 
+-		if (!capable(CAP_NET_ADMIN))
++		if (!nx_capable(CAP_NET_ADMIN, NXC_TUN_CREATE))
+ 			return -EPERM;
+ 		err = security_tun_dev_create();
+ 		if (err < 0)
+@@ -1141,6 +1144,9 @@ static int tun_set_iff(struct net *net, 
+ 
+ 		sk->sk_destruct = tun_sock_destruct;
+ 
++		if (!nx_check(tun->nid, VS_IDENT | VS_HOSTID | VS_ADMIN_P))
++			return -EPERM;
++
+ 		err = tun_attach(tun, file);
+ 		if (err < 0)
+ 			goto failed;
+@@ -1324,6 +1330,16 @@ static long __tun_chr_ioctl(struct file 
+ 		tun_debug(KERN_INFO, tun, "group set to %d\n", tun->group);
+ 		break;
+ 
++	case TUNSETNID:
++		if (!capable(CAP_CONTEXT))
++			return -EPERM;
++
++		/* Set nid owner of the device */
++		tun->nid = (nid_t) arg;
++
++		tun_debug(KERN_INFO, tun, "nid owner set to %u\n", tun->nid);
++		break;
++
+ 	case TUNSETLINK:
+ 		/* Only allow setting the type when the interface is down */
+ 		if (tun->dev->flags & IFF_UP) {
+diff -NurpP --minimal linux-3.2.34/drivers/tty/sysrq.c linux-3.2.34-vs2.3.2.15/drivers/tty/sysrq.c
+--- linux-3.2.34/drivers/tty/sysrq.c	2011-05-22 16:17:44.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/drivers/tty/sysrq.c	2011-12-05 19:33:02.000000000 +0100
+@@ -41,6 +41,7 @@
+ #include <linux/oom.h>
+ #include <linux/slab.h>
+ #include <linux/input.h>
++#include <linux/vserver/debug.h>
+ 
+ #include <asm/ptrace.h>
+ #include <asm/irq_regs.h>
+@@ -395,6 +396,21 @@ static struct sysrq_key_op sysrq_unrt_op
+ 	.enable_mask	= SYSRQ_ENABLE_RTNICE,
+ };
+ 
++
++#ifdef CONFIG_VSERVER_DEBUG
++static void sysrq_handle_vxinfo(int key)
++{
++	dump_vx_info_inactive((key == 'x') ? 0 : 1);
++}
++
++static struct sysrq_key_op sysrq_showvxinfo_op = {
++	.handler	= sysrq_handle_vxinfo,
++	.help_msg	= "conteXt",
++	.action_msg	= "Show Context Info",
++	.enable_mask	= SYSRQ_ENABLE_DUMP,
++};
++#endif
++
+ /* Key Operations table and lock */
+ static DEFINE_SPINLOCK(sysrq_key_table_lock);
+ 
+@@ -449,7 +465,11 @@ static struct sysrq_key_op *sysrq_key_ta
+ 	NULL,				/* v */
+ 	&sysrq_showstate_blocked_op,	/* w */
+ 	/* x: May be registered on ppc/powerpc for xmon */
++#ifdef CONFIG_VSERVER_DEBUG
++	&sysrq_showvxinfo_op,		/* x */
++#else
+ 	NULL,				/* x */
++#endif
+ 	/* y: May be registered on sparc64 for global register dump */
+ 	NULL,				/* y */
+ 	&sysrq_ftrace_dump_op,		/* z */
+@@ -464,6 +484,8 @@ static int sysrq_key_table_key2index(int
+ 		retval = key - '0';
+ 	else if ((key >= 'a') && (key <= 'z'))
+ 		retval = key + 10 - 'a';
++	else if ((key >= 'A') && (key <= 'Z'))
++		retval = key + 10 - 'A';
+ 	else
+ 		retval = -1;
+ 	return retval;
+diff -NurpP --minimal linux-3.2.34/drivers/tty/tty_io.c linux-3.2.34-vs2.3.2.15/drivers/tty/tty_io.c
+--- linux-3.2.34/drivers/tty/tty_io.c	2012-01-09 16:14:48.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/drivers/tty/tty_io.c	2011-12-05 19:33:02.000000000 +0100
+@@ -105,6 +105,7 @@
+ 
+ #include <linux/kmod.h>
+ #include <linux/nsproxy.h>
++#include <linux/vs_pid.h>
+ 
+ #undef TTY_DEBUG_HANGUP
+ 
+@@ -2080,7 +2081,8 @@ static int tiocsti(struct tty_struct *tt
+ 	char ch, mbz = 0;
+ 	struct tty_ldisc *ld;
+ 
+-	if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
++	if (((current->signal->tty != tty) &&
++		!vx_capable(CAP_SYS_ADMIN, VXC_TIOCSTI)))
+ 		return -EPERM;
+ 	if (get_user(ch, p))
+ 		return -EFAULT;
+@@ -2368,6 +2370,7 @@ static int tiocspgrp(struct tty_struct *
+ 		return -ENOTTY;
+ 	if (get_user(pgrp_nr, p))
+ 		return -EFAULT;
++	pgrp_nr = vx_rmap_pid(pgrp_nr);
+ 	if (pgrp_nr < 0)
+ 		return -EINVAL;
+ 	rcu_read_lock();
+diff -NurpP --minimal linux-3.2.34/fs/attr.c linux-3.2.34-vs2.3.2.15/fs/attr.c
+--- linux-3.2.34/fs/attr.c	2012-11-18 18:42:20.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/attr.c	2012-06-14 20:45:24.000000000 +0200
+@@ -14,6 +14,9 @@
+ #include <linux/fcntl.h>
+ #include <linux/security.h>
+ #include <linux/evm.h>
++#include <linux/proc_fs.h>
++#include <linux/devpts_fs.h>
++#include <linux/vs_tag.h>
+ 
+ /**
+  * inode_change_ok - check if attribute changes to an inode are allowed
+@@ -74,6 +77,10 @@ int inode_change_ok(const struct inode *
+ 			return -EPERM;
+ 	}
+ 
++	/* check for inode tag permission */
++	if (dx_permission(inode, MAY_WRITE))
++		return -EACCES;
++
+ 	return 0;
+ }
+ EXPORT_SYMBOL(inode_change_ok);
+@@ -144,6 +151,8 @@ void setattr_copy(struct inode *inode, c
+ 		inode->i_uid = attr->ia_uid;
+ 	if (ia_valid & ATTR_GID)
+ 		inode->i_gid = attr->ia_gid;
++	if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode))
++		inode->i_tag = attr->ia_tag;
+ 	if (ia_valid & ATTR_ATIME)
+ 		inode->i_atime = timespec_trunc(attr->ia_atime,
+ 						inode->i_sb->s_time_gran);
+@@ -171,7 +180,8 @@ int notify_change(struct dentry * dentry
+ 	struct timespec now;
+ 	unsigned int ia_valid = attr->ia_valid;
+ 
+-	if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
++	if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
++		ATTR_TAG | ATTR_TIMES_SET)) {
+ 		if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ 			return -EPERM;
+ 	}
+diff -NurpP --minimal linux-3.2.34/fs/block_dev.c linux-3.2.34-vs2.3.2.15/fs/block_dev.c
+--- linux-3.2.34/fs/block_dev.c	2012-11-18 18:42:20.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/block_dev.c	2012-06-14 20:45:24.000000000 +0200
+@@ -25,6 +25,7 @@
+ #include <linux/namei.h>
+ #include <linux/log2.h>
+ #include <linux/kmemleak.h>
++#include <linux/vs_device.h>
+ #include <asm/uaccess.h>
+ #include "internal.h"
+ 
+@@ -563,6 +564,7 @@ struct block_device *bdget(dev_t dev)
+ 		bdev->bd_invalidated = 0;
+ 		inode->i_mode = S_IFBLK;
+ 		inode->i_rdev = dev;
++		inode->i_mdev = dev;
+ 		inode->i_bdev = bdev;
+ 		inode->i_data.a_ops = &def_blk_aops;
+ 		mapping_set_gfp_mask(&inode->i_data, GFP_USER);
+@@ -609,6 +611,11 @@ EXPORT_SYMBOL(bdput);
+ static struct block_device *bd_acquire(struct inode *inode)
+ {
+ 	struct block_device *bdev;
++	dev_t mdev;
++
++	if (!vs_map_blkdev(inode->i_rdev, &mdev, DATTR_OPEN))
++		return NULL;
++	inode->i_mdev = mdev;
+ 
+ 	spin_lock(&bdev_lock);
+ 	bdev = inode->i_bdev;
+@@ -619,7 +626,7 @@ static struct block_device *bd_acquire(s
+ 	}
+ 	spin_unlock(&bdev_lock);
+ 
+-	bdev = bdget(inode->i_rdev);
++	bdev = bdget(mdev);
+ 	if (bdev) {
+ 		spin_lock(&bdev_lock);
+ 		if (!inode->i_bdev) {
+diff -NurpP --minimal linux-3.2.34/fs/btrfs/ctree.h linux-3.2.34-vs2.3.2.15/fs/btrfs/ctree.h
+--- linux-3.2.34/fs/btrfs/ctree.h	2012-11-18 18:42:20.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/btrfs/ctree.h	2012-05-15 18:16:52.000000000 +0200
+@@ -643,11 +643,14 @@ struct btrfs_inode_item {
+ 	/* modification sequence number for NFS */
+ 	__le64 sequence;
+ 
++	__le16 tag;
+ 	/*
+ 	 * a little future expansion, for more than this we can
+ 	 * just grow the inode item and version it
+ 	 */
+-	__le64 reserved[4];
++	__le16 reserved16;
++	__le32 reserved32;
++	__le64 reserved[3];
+ 	struct btrfs_timespec atime;
+ 	struct btrfs_timespec ctime;
+ 	struct btrfs_timespec mtime;
+@@ -1414,6 +1417,8 @@ struct btrfs_ioctl_defrag_range_args {
+ #define BTRFS_MOUNT_INODE_MAP_CACHE	(1 << 17)
+ #define BTRFS_MOUNT_RECOVERY		(1 << 18)
+ 
++#define BTRFS_MOUNT_TAGGED		(1 << 24)
++
+ #define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
+ #define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
+ #define btrfs_test_opt(root, opt)	((root)->fs_info->mount_opt & \
+@@ -1621,6 +1626,7 @@ BTRFS_SETGET_FUNCS(inode_block_group, st
+ BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
+ BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
+ BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
++BTRFS_SETGET_FUNCS(inode_tag, struct btrfs_inode_item, tag, 16);
+ BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
+ BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
+ BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
+@@ -1674,6 +1680,10 @@ BTRFS_SETGET_FUNCS(extent_flags, struct 
+ 
+ BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
+ 
++#define BTRFS_INODE_IXUNLINK		(1 << 24)
++#define BTRFS_INODE_BARRIER		(1 << 25)
++#define BTRFS_INODE_COW			(1 << 26)
++
+ 
+ BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
+ 
+@@ -2730,6 +2740,7 @@ extern const struct dentry_operations bt
+ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+ void btrfs_update_iflags(struct inode *inode);
+ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
++int btrfs_sync_flags(struct inode *inode, int, int);
+ int btrfs_defrag_file(struct inode *inode, struct file *file,
+ 		      struct btrfs_ioctl_defrag_range_args *range,
+ 		      u64 newer_than, unsigned long max_pages);
+diff -NurpP --minimal linux-3.2.34/fs/btrfs/disk-io.c linux-3.2.34-vs2.3.2.15/fs/btrfs/disk-io.c
+--- linux-3.2.34/fs/btrfs/disk-io.c	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/btrfs/disk-io.c	2012-08-13 12:40:51.000000000 +0200
+@@ -2104,6 +2104,9 @@ struct btrfs_root *open_ctree(struct sup
+ 		goto fail_alloc;
+ 	}
+ 
++	if (btrfs_test_opt(tree_root, TAGGED))
++		sb->s_flags |= MS_TAGGED;
++
+ 	features = btrfs_super_incompat_flags(disk_super) &
+ 		~BTRFS_FEATURE_INCOMPAT_SUPP;
+ 	if (features) {
+diff -NurpP --minimal linux-3.2.34/fs/btrfs/inode.c linux-3.2.34-vs2.3.2.15/fs/btrfs/inode.c
+--- linux-3.2.34/fs/btrfs/inode.c	2012-01-09 16:14:53.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/btrfs/inode.c	2012-01-09 16:19:51.000000000 +0100
+@@ -39,6 +39,7 @@
+ #include <linux/slab.h>
+ #include <linux/ratelimit.h>
+ #include <linux/mount.h>
++#include <linux/vs_tag.h>
+ #include "compat.h"
+ #include "ctree.h"
+ #include "disk-io.h"
+@@ -2332,6 +2333,8 @@ static void btrfs_read_locked_inode(stru
+ 	struct btrfs_key location;
+ 	int maybe_acls;
+ 	u32 rdev;
++	uid_t uid;
++	gid_t gid;
+ 	int ret;
+ 	bool filled = false;
+ 
+@@ -2359,8 +2362,13 @@ static void btrfs_read_locked_inode(stru
+ 				    struct btrfs_inode_item);
+ 	inode->i_mode = btrfs_inode_mode(leaf, inode_item);
+ 	set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
+-	inode->i_uid = btrfs_inode_uid(leaf, inode_item);
+-	inode->i_gid = btrfs_inode_gid(leaf, inode_item);
++
++	uid = btrfs_inode_uid(leaf, inode_item);
++	gid = btrfs_inode_gid(leaf, inode_item);
++	inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++	inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++	inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
++		btrfs_inode_tag(leaf, inode_item));
+ 	btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
+ 
+ 	tspec = btrfs_inode_atime(inode_item);
+@@ -2438,8 +2446,14 @@ static void fill_inode_item(struct btrfs
+ 			    struct btrfs_inode_item *item,
+ 			    struct inode *inode)
+ {
+-	btrfs_set_inode_uid(leaf, item, inode->i_uid);
+-	btrfs_set_inode_gid(leaf, item, inode->i_gid);
++	uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
++	gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
++
++	btrfs_set_inode_uid(leaf, item, uid);
++	btrfs_set_inode_gid(leaf, item, gid);
++#ifdef CONFIG_TAGGING_INTERN
++	btrfs_set_inode_tag(leaf, item, inode->i_tag);
++#endif
+ 	btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
+ 	btrfs_set_inode_mode(leaf, item, inode->i_mode);
+ 	btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
+@@ -7377,11 +7391,13 @@ static const struct inode_operations btr
+ 	.listxattr	= btrfs_listxattr,
+ 	.removexattr	= btrfs_removexattr,
+ 	.permission	= btrfs_permission,
++	.sync_flags	= btrfs_sync_flags,
+ 	.get_acl	= btrfs_get_acl,
+ };
+ static const struct inode_operations btrfs_dir_ro_inode_operations = {
+ 	.lookup		= btrfs_lookup,
+ 	.permission	= btrfs_permission,
++	.sync_flags	= btrfs_sync_flags,
+ 	.get_acl	= btrfs_get_acl,
+ };
+ 
+diff -NurpP --minimal linux-3.2.34/fs/btrfs/ioctl.c linux-3.2.34-vs2.3.2.15/fs/btrfs/ioctl.c
+--- linux-3.2.34/fs/btrfs/ioctl.c	2012-01-09 16:14:53.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/btrfs/ioctl.c	2012-01-09 16:19:31.000000000 +0100
+@@ -71,10 +71,13 @@ static unsigned int btrfs_flags_to_ioctl
+ {
+ 	unsigned int iflags = 0;
+ 
+-	if (flags & BTRFS_INODE_SYNC)
+-		iflags |= FS_SYNC_FL;
+ 	if (flags & BTRFS_INODE_IMMUTABLE)
+ 		iflags |= FS_IMMUTABLE_FL;
++	if (flags & BTRFS_INODE_IXUNLINK)
++		iflags |= FS_IXUNLINK_FL;
++
++	if (flags & BTRFS_INODE_SYNC)
++		iflags |= FS_SYNC_FL;
+ 	if (flags & BTRFS_INODE_APPEND)
+ 		iflags |= FS_APPEND_FL;
+ 	if (flags & BTRFS_INODE_NODUMP)
+@@ -91,28 +94,78 @@ static unsigned int btrfs_flags_to_ioctl
+ 	else if (flags & BTRFS_INODE_NOCOMPRESS)
+ 		iflags |= FS_NOCOMP_FL;
+ 
++	if (flags & BTRFS_INODE_BARRIER)
++		iflags |= FS_BARRIER_FL;
++	if (flags & BTRFS_INODE_COW)
++		iflags |= FS_COW_FL;
+ 	return iflags;
+ }
+ 
+ /*
+- * Update inode->i_flags based on the btrfs internal flags.
++ * Update inode->i_(v)flags based on the btrfs internal flags.
+  */
+ void btrfs_update_iflags(struct inode *inode)
+ {
+ 	struct btrfs_inode *ip = BTRFS_I(inode);
+ 
+-	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
++	inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
++		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
+ 
+-	if (ip->flags & BTRFS_INODE_SYNC)
+-		inode->i_flags |= S_SYNC;
+ 	if (ip->flags & BTRFS_INODE_IMMUTABLE)
+ 		inode->i_flags |= S_IMMUTABLE;
++	if (ip->flags & BTRFS_INODE_IXUNLINK)
++		inode->i_flags |= S_IXUNLINK;
++
++	if (ip->flags & BTRFS_INODE_SYNC)
++		inode->i_flags |= S_SYNC;
+ 	if (ip->flags & BTRFS_INODE_APPEND)
+ 		inode->i_flags |= S_APPEND;
+ 	if (ip->flags & BTRFS_INODE_NOATIME)
+ 		inode->i_flags |= S_NOATIME;
+ 	if (ip->flags & BTRFS_INODE_DIRSYNC)
+ 		inode->i_flags |= S_DIRSYNC;
++
++	inode->i_vflags &= ~(V_BARRIER | V_COW);
++
++	if (ip->flags & BTRFS_INODE_BARRIER)
++		inode->i_vflags |= V_BARRIER;
++	if (ip->flags & BTRFS_INODE_COW)
++		inode->i_vflags |= V_COW;
++}
++
++/*
++ * Update btrfs internal flags from inode->i_(v)flags.
++ */
++void btrfs_update_flags(struct inode *inode)
++{
++	struct btrfs_inode *ip = BTRFS_I(inode);
++
++	unsigned int flags = inode->i_flags;
++	unsigned int vflags = inode->i_vflags;
++
++	ip->flags &= ~(BTRFS_INODE_SYNC | BTRFS_INODE_APPEND |
++			BTRFS_INODE_IMMUTABLE | BTRFS_INODE_IXUNLINK |
++			BTRFS_INODE_NOATIME | BTRFS_INODE_DIRSYNC |
++			BTRFS_INODE_BARRIER | BTRFS_INODE_COW);
++
++	if (flags & S_IMMUTABLE)
++		ip->flags |= BTRFS_INODE_IMMUTABLE;
++	if (flags & S_IXUNLINK)
++		ip->flags |= BTRFS_INODE_IXUNLINK;
++
++	if (flags & S_SYNC)
++		ip->flags |= BTRFS_INODE_SYNC;
++	if (flags & S_APPEND)
++		ip->flags |= BTRFS_INODE_APPEND;
++	if (flags & S_NOATIME)
++		ip->flags |= BTRFS_INODE_NOATIME;
++	if (flags & S_DIRSYNC)
++		ip->flags |= BTRFS_INODE_DIRSYNC;
++
++	if (vflags & V_BARRIER)
++		ip->flags |= BTRFS_INODE_BARRIER;
++	if (vflags & V_COW)
++		ip->flags |= BTRFS_INODE_COW;
+ }
+ 
+ /*
+@@ -128,6 +181,7 @@ void btrfs_inherit_iflags(struct inode *
+ 		return;
+ 
+ 	flags = BTRFS_I(dir)->flags;
++	flags &= ~BTRFS_INODE_BARRIER;
+ 
+ 	if (flags & BTRFS_INODE_NOCOMPRESS) {
+ 		BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
+@@ -143,6 +197,30 @@ void btrfs_inherit_iflags(struct inode *
+ 	btrfs_update_iflags(inode);
+ }
+ 
++int btrfs_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	struct btrfs_inode *ip = BTRFS_I(inode);
++	struct btrfs_root *root = ip->root;
++	struct btrfs_trans_handle *trans;
++	int ret;
++
++	trans = btrfs_join_transaction(root);
++	BUG_ON(!trans);
++
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	btrfs_update_flags(inode);
++
++	ret = btrfs_update_inode(trans, root, inode);
++	BUG_ON(ret);
++
++	btrfs_update_iflags(inode);
++	inode->i_ctime = CURRENT_TIME;
++	btrfs_end_transaction(trans, root);
++
++	return 0;
++}
++
+ static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
+ {
+ 	struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode);
+@@ -194,7 +272,8 @@ static int btrfs_ioctl_setflags(struct f
+ 
+ 	flags = btrfs_mask_flags(inode->i_mode, flags);
+ 	oldflags = btrfs_flags_to_ioctl(ip->flags);
+-	if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
++	if ((flags ^ oldflags) & (FS_APPEND_FL |
++		FS_IMMUTABLE_FL | FS_IXUNLINK_FL)) {
+ 		if (!capable(CAP_LINUX_IMMUTABLE)) {
+ 			ret = -EPERM;
+ 			goto out_unlock;
+@@ -205,14 +284,19 @@ static int btrfs_ioctl_setflags(struct f
+ 	if (ret)
+ 		goto out_unlock;
+ 
+-	if (flags & FS_SYNC_FL)
+-		ip->flags |= BTRFS_INODE_SYNC;
+-	else
+-		ip->flags &= ~BTRFS_INODE_SYNC;
+ 	if (flags & FS_IMMUTABLE_FL)
+ 		ip->flags |= BTRFS_INODE_IMMUTABLE;
+ 	else
+ 		ip->flags &= ~BTRFS_INODE_IMMUTABLE;
++	if (flags & FS_IXUNLINK_FL)
++		ip->flags |= BTRFS_INODE_IXUNLINK;
++	else
++		ip->flags &= ~BTRFS_INODE_IXUNLINK;
++
++	if (flags & FS_SYNC_FL)
++		ip->flags |= BTRFS_INODE_SYNC;
++	else
++		ip->flags &= ~BTRFS_INODE_SYNC;
+ 	if (flags & FS_APPEND_FL)
+ 		ip->flags |= BTRFS_INODE_APPEND;
+ 	else
+diff -NurpP --minimal linux-3.2.34/fs/btrfs/super.c linux-3.2.34-vs2.3.2.15/fs/btrfs/super.c
+--- linux-3.2.34/fs/btrfs/super.c	2012-01-09 16:14:53.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/btrfs/super.c	2012-01-09 16:19:31.000000000 +0100
+@@ -165,7 +165,8 @@ enum {
+ 	Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
+ 	Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
+ 	Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
+-	Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err,
++	Opt_inode_cache, Opt_no_space_cache, Opt_recovery,
++	Opt_tag, Opt_notag, Opt_tagid, Opt_err,
+ };
+ 
+ static match_table_t tokens = {
+@@ -200,6 +201,9 @@ static match_table_t tokens = {
+ 	{Opt_inode_cache, "inode_cache"},
+ 	{Opt_no_space_cache, "nospace_cache"},
+ 	{Opt_recovery, "recovery"},
++	{Opt_tag, "tag"},
++	{Opt_notag, "notag"},
++	{Opt_tagid, "tagid=%u"},
+ 	{Opt_err, NULL},
+ };
+ 
+@@ -398,6 +402,22 @@ int btrfs_parse_options(struct btrfs_roo
+ 			printk(KERN_INFO "btrfs: enabling auto recovery");
+ 			btrfs_set_opt(info->mount_opt, RECOVERY);
+ 			break;
++#ifndef CONFIG_TAGGING_NONE
++		case Opt_tag:
++			printk(KERN_INFO "btrfs: use tagging\n");
++			btrfs_set_opt(info->mount_opt, TAGGED);
++			break;
++		case Opt_notag:
++			printk(KERN_INFO "btrfs: disabled tagging\n");
++			btrfs_clear_opt(info->mount_opt, TAGGED);
++			break;
++#endif
++#ifdef CONFIG_PROPAGATE
++		case Opt_tagid:
++			/* use args[0] */
++			btrfs_set_opt(info->mount_opt, TAGGED);
++			break;
++#endif
+ 		case Opt_err:
+ 			printk(KERN_INFO "btrfs: unrecognized mount option "
+ 			       "'%s'\n", p);
+@@ -985,6 +1005,12 @@ static int btrfs_remount(struct super_bl
+ 	if (ret)
+ 		return -EINVAL;
+ 
++	if (btrfs_test_opt(root, TAGGED) && !(sb->s_flags & MS_TAGGED)) {
++		printk("btrfs: %s: tagging not permitted on remount.\n",
++			sb->s_id);
++		return -EINVAL;
++	}
++
+ 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+ 		return 0;
+ 
+diff -NurpP --minimal linux-3.2.34/fs/char_dev.c linux-3.2.34-vs2.3.2.15/fs/char_dev.c
+--- linux-3.2.34/fs/char_dev.c	2011-03-15 18:07:31.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/char_dev.c	2011-12-05 19:33:02.000000000 +0100
+@@ -21,6 +21,8 @@
+ #include <linux/mutex.h>
+ #include <linux/backing-dev.h>
+ #include <linux/tty.h>
++#include <linux/vs_context.h>
++#include <linux/vs_device.h>
+ 
+ #include "internal.h"
+ 
+@@ -371,14 +373,21 @@ static int chrdev_open(struct inode *ino
+ 	struct cdev *p;
+ 	struct cdev *new = NULL;
+ 	int ret = 0;
++	dev_t mdev;
++
++	if (!vs_map_chrdev(inode->i_rdev, &mdev, DATTR_OPEN))
++		return -EPERM;
++	inode->i_mdev = mdev;
+ 
+ 	spin_lock(&cdev_lock);
+ 	p = inode->i_cdev;
+ 	if (!p) {
+ 		struct kobject *kobj;
+ 		int idx;
++
+ 		spin_unlock(&cdev_lock);
+-		kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
++
++		kobj = kobj_lookup(cdev_map, mdev, &idx);
+ 		if (!kobj)
+ 			return -ENXIO;
+ 		new = container_of(kobj, struct cdev, kobj);
+diff -NurpP --minimal linux-3.2.34/fs/dcache.c linux-3.2.34-vs2.3.2.15/fs/dcache.c
+--- linux-3.2.34/fs/dcache.c	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/dcache.c	2012-10-22 12:59:51.000000000 +0200
+@@ -37,6 +37,7 @@
+ #include <linux/rculist_bl.h>
+ #include <linux/prefetch.h>
+ #include <linux/ratelimit.h>
++#include <linux/vs_limit.h>
+ #include "internal.h"
+ 
+ /*
+@@ -539,6 +540,8 @@ int d_invalidate(struct dentry * dentry)
+ 		spin_lock(&dentry->d_lock);
+ 	}
+ 
++	vx_dentry_dec(dentry);
++
+ 	/*
+ 	 * Somebody else still using it?
+ 	 *
+@@ -568,6 +571,7 @@ EXPORT_SYMBOL(d_invalidate);
+ static inline void __dget_dlock(struct dentry *dentry)
+ {
+ 	dentry->d_count++;
++	vx_dentry_inc(dentry);
+ }
+ 
+ static inline void __dget(struct dentry *dentry)
+@@ -1196,6 +1200,9 @@ struct dentry *__d_alloc(struct super_bl
+ 	struct dentry *dentry;
+ 	char *dname;
+ 
++	if (!vx_dentry_avail(1))
++		return NULL;
++
+ 	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
+ 	if (!dentry)
+ 		return NULL;
+@@ -1218,6 +1225,7 @@ struct dentry *__d_alloc(struct super_bl
+ 
+ 	dentry->d_count = 1;
+ 	dentry->d_flags = 0;
++	vx_dentry_inc(dentry);
+ 	spin_lock_init(&dentry->d_lock);
+ 	seqcount_init(&dentry->d_seq);
+ 	dentry->d_inode = NULL;
+@@ -1876,6 +1884,7 @@ struct dentry *__d_lookup(struct dentry 
+ 		}
+ 
+ 		dentry->d_count++;
++		vx_dentry_inc(dentry);
+ 		found = dentry;
+ 		spin_unlock(&dentry->d_lock);
+ 		break;
+diff -NurpP --minimal linux-3.2.34/fs/devpts/inode.c linux-3.2.34-vs2.3.2.15/fs/devpts/inode.c
+--- linux-3.2.34/fs/devpts/inode.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/devpts/inode.c	2011-12-05 21:23:19.000000000 +0100
+@@ -25,6 +25,7 @@
+ #include <linux/parser.h>
+ #include <linux/fsnotify.h>
+ #include <linux/seq_file.h>
++#include <linux/vs_base.h>
+ 
+ #define DEVPTS_DEFAULT_MODE 0600
+ /*
+@@ -36,6 +37,20 @@
+ #define DEVPTS_DEFAULT_PTMX_MODE 0000
+ #define PTMX_MINOR	2
+ 
++static int devpts_permission(struct inode *inode, int mask)
++{
++	int ret = -EACCES;
++
++	/* devpts is xid tagged */
++	if (vx_check((xid_t)inode->i_tag, VS_WATCH_P | VS_IDENT))
++		ret = generic_permission(inode, mask);
++	return ret;
++}
++
++static struct inode_operations devpts_file_inode_operations = {
++	.permission     = devpts_permission,
++};
++
+ extern int pty_limit;			/* Config limit on Unix98 ptys */
+ static DEFINE_MUTEX(allocated_ptys_lock);
+ 
+@@ -263,6 +278,34 @@ static int devpts_show_options(struct se
+ 	return 0;
+ }
+ 
++static int devpts_filter(struct dentry *de)
++{
++	xid_t xid = 0;
++
++	/* devpts is xid tagged */
++	if (de && de->d_inode)
++		xid = (xid_t)de->d_inode->i_tag;
++#ifdef CONFIG_VSERVER_WARN_DEVPTS
++	else
++		vxwprintk_task(1, "devpts " VS_Q("%.*s") " without inode.",
++			de->d_name.len, de->d_name.name);
++#endif
++	return vx_check(xid, VS_WATCH_P | VS_IDENT);
++}
++
++static int devpts_readdir(struct file * filp, void * dirent, filldir_t filldir)
++{
++	return dcache_readdir_filter(filp, dirent, filldir, devpts_filter);
++}
++
++static struct file_operations devpts_dir_operations = {
++	.open		= dcache_dir_open,
++	.release	= dcache_dir_close,
++	.llseek		= dcache_dir_lseek,
++	.read		= generic_read_dir,
++	.readdir	= devpts_readdir,
++};
++
+ static const struct super_operations devpts_sops = {
+ 	.statfs		= simple_statfs,
+ 	.remount_fs	= devpts_remount,
+@@ -306,8 +349,10 @@ devpts_fill_super(struct super_block *s,
+ 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ 	inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
+ 	inode->i_op = &simple_dir_inode_operations;
+-	inode->i_fop = &simple_dir_operations;
++	inode->i_fop = &devpts_dir_operations;
+ 	set_nlink(inode, 2);
++	/* devpts is xid tagged */
++	inode->i_tag = (tag_t)vx_current_xid();
+ 
+ 	s->s_root = d_alloc_root(inode);
+ 	if (s->s_root)
+@@ -494,6 +539,9 @@ int devpts_pty_new(struct inode *ptmx_in
+ 	inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
+ 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ 	init_special_inode(inode, S_IFCHR|opts->mode, device);
++	/* devpts is xid tagged */
++	inode->i_tag = (tag_t)vx_current_xid();
++	inode->i_op = &devpts_file_inode_operations;
+ 	inode->i_private = tty;
+ 	tty->driver_data = inode;
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ext2/balloc.c linux-3.2.34-vs2.3.2.15/fs/ext2/balloc.c
+--- linux-3.2.34/fs/ext2/balloc.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext2/balloc.c	2011-12-05 19:33:02.000000000 +0100
+@@ -701,7 +701,6 @@ ext2_try_to_allocate(struct super_block 
+ 			start = 0;
+ 		end = EXT2_BLOCKS_PER_GROUP(sb);
+ 	}
+-
+ 	BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb));
+ 
+ repeat:
+diff -NurpP --minimal linux-3.2.34/fs/ext2/ext2.h linux-3.2.34-vs2.3.2.15/fs/ext2/ext2.h
+--- linux-3.2.34/fs/ext2/ext2.h	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext2/ext2.h	2011-12-05 19:33:02.000000000 +0100
+@@ -126,6 +126,7 @@ extern void ext2_set_inode_flags(struct 
+ extern void ext2_get_inode_flags(struct ext2_inode_info *);
+ extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ 		       u64 start, u64 len);
++extern int ext2_sync_flags(struct inode *, int, int);
+ 
+ /* ioctl.c */
+ extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
+diff -NurpP --minimal linux-3.2.34/fs/ext2/file.c linux-3.2.34-vs2.3.2.15/fs/ext2/file.c
+--- linux-3.2.34/fs/ext2/file.c	2011-10-24 18:45:27.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/ext2/file.c	2011-12-05 19:33:02.000000000 +0100
+@@ -104,4 +104,5 @@ const struct inode_operations ext2_file_
+ 	.setattr	= ext2_setattr,
+ 	.get_acl	= ext2_get_acl,
+ 	.fiemap		= ext2_fiemap,
++	.sync_flags	= ext2_sync_flags,
+ };
+diff -NurpP --minimal linux-3.2.34/fs/ext2/ialloc.c linux-3.2.34-vs2.3.2.15/fs/ext2/ialloc.c
+--- linux-3.2.34/fs/ext2/ialloc.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext2/ialloc.c	2011-12-05 19:33:02.000000000 +0100
+@@ -17,6 +17,7 @@
+ #include <linux/backing-dev.h>
+ #include <linux/buffer_head.h>
+ #include <linux/random.h>
++#include <linux/vs_tag.h>
+ #include "ext2.h"
+ #include "xattr.h"
+ #include "acl.h"
+@@ -549,6 +550,7 @@ got:
+ 		inode->i_mode = mode;
+ 		inode->i_uid = current_fsuid();
+ 		inode->i_gid = dir->i_gid;
++		inode->i_tag = dx_current_fstag(sb);
+ 	} else
+ 		inode_init_owner(inode, dir, mode);
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ext2/inode.c linux-3.2.34-vs2.3.2.15/fs/ext2/inode.c
+--- linux-3.2.34/fs/ext2/inode.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext2/inode.c	2011-12-05 21:24:12.000000000 +0100
+@@ -32,6 +32,7 @@
+ #include <linux/mpage.h>
+ #include <linux/fiemap.h>
+ #include <linux/namei.h>
++#include <linux/vs_tag.h>
+ #include "ext2.h"
+ #include "acl.h"
+ #include "xip.h"
+@@ -1167,7 +1168,7 @@ static void ext2_truncate_blocks(struct 
+ 		return;
+ 	if (ext2_inode_is_fast_symlink(inode))
+ 		return;
+-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
++	if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
+ 		return;
+ 	__ext2_truncate_blocks(inode, offset);
+ }
+@@ -1258,36 +1259,61 @@ void ext2_set_inode_flags(struct inode *
+ {
+ 	unsigned int flags = EXT2_I(inode)->i_flags;
+ 
+-	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
++	inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
++		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
++
++
++	if (flags & EXT2_IMMUTABLE_FL)
++		inode->i_flags |= S_IMMUTABLE;
++	if (flags & EXT2_IXUNLINK_FL)
++		inode->i_flags |= S_IXUNLINK;
++
+ 	if (flags & EXT2_SYNC_FL)
+ 		inode->i_flags |= S_SYNC;
+ 	if (flags & EXT2_APPEND_FL)
+ 		inode->i_flags |= S_APPEND;
+-	if (flags & EXT2_IMMUTABLE_FL)
+-		inode->i_flags |= S_IMMUTABLE;
+ 	if (flags & EXT2_NOATIME_FL)
+ 		inode->i_flags |= S_NOATIME;
+ 	if (flags & EXT2_DIRSYNC_FL)
+ 		inode->i_flags |= S_DIRSYNC;
++
++	inode->i_vflags &= ~(V_BARRIER | V_COW);
++
++	if (flags & EXT2_BARRIER_FL)
++		inode->i_vflags |= V_BARRIER;
++	if (flags & EXT2_COW_FL)
++		inode->i_vflags |= V_COW;
+ }
+ 
+ /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
+ void ext2_get_inode_flags(struct ext2_inode_info *ei)
+ {
+ 	unsigned int flags = ei->vfs_inode.i_flags;
++	unsigned int vflags = ei->vfs_inode.i_vflags;
++
++	ei->i_flags &= ~(EXT2_SYNC_FL | EXT2_APPEND_FL |
++			EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL |
++			EXT2_NOATIME_FL | EXT2_DIRSYNC_FL |
++			EXT2_BARRIER_FL | EXT2_COW_FL);
++
++	if (flags & S_IMMUTABLE)
++		ei->i_flags |= EXT2_IMMUTABLE_FL;
++	if (flags & S_IXUNLINK)
++		ei->i_flags |= EXT2_IXUNLINK_FL;
+ 
+-	ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
+-			EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
+ 	if (flags & S_SYNC)
+ 		ei->i_flags |= EXT2_SYNC_FL;
+ 	if (flags & S_APPEND)
+ 		ei->i_flags |= EXT2_APPEND_FL;
+-	if (flags & S_IMMUTABLE)
+-		ei->i_flags |= EXT2_IMMUTABLE_FL;
+ 	if (flags & S_NOATIME)
+ 		ei->i_flags |= EXT2_NOATIME_FL;
+ 	if (flags & S_DIRSYNC)
+ 		ei->i_flags |= EXT2_DIRSYNC_FL;
++
++	if (vflags & V_BARRIER)
++		ei->i_flags |= EXT2_BARRIER_FL;
++	if (vflags & V_COW)
++		ei->i_flags |= EXT2_COW_FL;
+ }
+ 
+ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
+@@ -1297,6 +1323,8 @@ struct inode *ext2_iget (struct super_bl
+ 	struct ext2_inode *raw_inode;
+ 	struct inode *inode;
+ 	long ret = -EIO;
++	uid_t uid;
++	gid_t gid;
+ 	int n;
+ 
+ 	inode = iget_locked(sb, ino);
+@@ -1315,12 +1343,16 @@ struct inode *ext2_iget (struct super_bl
+ 	}
+ 
+ 	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+-	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+-	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
++	uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
++	gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ 	if (!(test_opt (inode->i_sb, NO_UID32))) {
+-		inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+-		inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
++		uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
++		gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+ 	}
++	inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++	inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++	inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
++		le16_to_cpu(raw_inode->i_raw_tag));
+ 	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
+ 	inode->i_size = le32_to_cpu(raw_inode->i_size);
+ 	inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
+@@ -1418,8 +1450,8 @@ static int __ext2_write_inode(struct ino
+ 	struct ext2_inode_info *ei = EXT2_I(inode);
+ 	struct super_block *sb = inode->i_sb;
+ 	ino_t ino = inode->i_ino;
+-	uid_t uid = inode->i_uid;
+-	gid_t gid = inode->i_gid;
++	uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
++	gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
+ 	struct buffer_head * bh;
+ 	struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
+ 	int n;
+@@ -1455,6 +1487,9 @@ static int __ext2_write_inode(struct ino
+ 		raw_inode->i_uid_high = 0;
+ 		raw_inode->i_gid_high = 0;
+ 	}
++#ifdef CONFIG_TAGGING_INTERN
++	raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
++#endif
+ 	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+ 	raw_inode->i_size = cpu_to_le32(inode->i_size);
+ 	raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
+@@ -1535,7 +1570,8 @@ int ext2_setattr(struct dentry *dentry, 
+ 	if (is_quota_modification(inode, iattr))
+ 		dquot_initialize(inode);
+ 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
+-	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
++	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) ||
++	    (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) {
+ 		error = dquot_transfer(inode, iattr);
+ 		if (error)
+ 			return error;
+diff -NurpP --minimal linux-3.2.34/fs/ext2/ioctl.c linux-3.2.34-vs2.3.2.15/fs/ext2/ioctl.c
+--- linux-3.2.34/fs/ext2/ioctl.c	2011-05-22 16:17:51.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/ext2/ioctl.c	2011-12-05 19:33:02.000000000 +0100
+@@ -17,6 +17,16 @@
+ #include <asm/uaccess.h>
+ 
+ 
++int ext2_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	ext2_get_inode_flags(EXT2_I(inode));
++	inode->i_ctime = CURRENT_TIME_SEC;
++	mark_inode_dirty(inode);
++	return 0;
++}
++
+ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_dentry->d_inode;
+@@ -51,6 +61,11 @@ long ext2_ioctl(struct file *filp, unsig
+ 
+ 		flags = ext2_mask_flags(inode->i_mode, flags);
+ 
++		if (IS_BARRIER(inode)) {
++			vxwprintk_task(1, "messing with the barrier.");
++			return -EACCES;
++		}
++
+ 		mutex_lock(&inode->i_mutex);
+ 		/* Is it quota file? Do not allow user to mess with it */
+ 		if (IS_NOQUOTA(inode)) {
+@@ -66,7 +81,9 @@ long ext2_ioctl(struct file *filp, unsig
+ 		 *
+ 		 * This test looks nicer. Thanks to Pauline Middelink
+ 		 */
+-		if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
++		if ((oldflags & EXT2_IMMUTABLE_FL) ||
++			((flags ^ oldflags) & (EXT2_APPEND_FL |
++			EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL))) {
+ 			if (!capable(CAP_LINUX_IMMUTABLE)) {
+ 				mutex_unlock(&inode->i_mutex);
+ 				ret = -EPERM;
+@@ -74,7 +91,7 @@ long ext2_ioctl(struct file *filp, unsig
+ 			}
+ 		}
+ 
+-		flags = flags & EXT2_FL_USER_MODIFIABLE;
++		flags &= EXT2_FL_USER_MODIFIABLE;
+ 		flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
+ 		ei->i_flags = flags;
+ 		mutex_unlock(&inode->i_mutex);
+diff -NurpP --minimal linux-3.2.34/fs/ext2/namei.c linux-3.2.34-vs2.3.2.15/fs/ext2/namei.c
+--- linux-3.2.34/fs/ext2/namei.c	2011-10-24 18:45:27.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/ext2/namei.c	2011-12-05 19:33:02.000000000 +0100
+@@ -32,6 +32,7 @@
+ 
+ #include <linux/pagemap.h>
+ #include <linux/quotaops.h>
++#include <linux/vs_tag.h>
+ #include "ext2.h"
+ #include "xattr.h"
+ #include "acl.h"
+@@ -73,6 +74,7 @@ static struct dentry *ext2_lookup(struct
+ 					(unsigned long) ino);
+ 			return ERR_PTR(-EIO);
+ 		}
++		dx_propagate_tag(nd, inode);
+ 	}
+ 	return d_splice_alias(inode, dentry);
+ }
+@@ -408,6 +410,7 @@ const struct inode_operations ext2_dir_i
+ 	.removexattr	= generic_removexattr,
+ #endif
+ 	.setattr	= ext2_setattr,
++	.sync_flags	= ext2_sync_flags,
+ 	.get_acl	= ext2_get_acl,
+ };
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ext2/super.c linux-3.2.34-vs2.3.2.15/fs/ext2/super.c
+--- linux-3.2.34/fs/ext2/super.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext2/super.c	2011-12-05 19:33:02.000000000 +0100
+@@ -394,7 +394,8 @@ enum {
+ 	Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
+ 	Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
+ 	Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
+-	Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
++	Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation,
++	Opt_tag, Opt_notag, Opt_tagid
+ };
+ 
+ static const match_table_t tokens = {
+@@ -422,6 +423,9 @@ static const match_table_t tokens = {
+ 	{Opt_acl, "acl"},
+ 	{Opt_noacl, "noacl"},
+ 	{Opt_xip, "xip"},
++	{Opt_tag, "tag"},
++	{Opt_notag, "notag"},
++	{Opt_tagid, "tagid=%u"},
+ 	{Opt_grpquota, "grpquota"},
+ 	{Opt_ignore, "noquota"},
+ 	{Opt_quota, "quota"},
+@@ -492,6 +496,20 @@ static int parse_options(char *options, 
+ 		case Opt_nouid32:
+ 			set_opt (sbi->s_mount_opt, NO_UID32);
+ 			break;
++#ifndef CONFIG_TAGGING_NONE
++		case Opt_tag:
++			set_opt (sbi->s_mount_opt, TAGGED);
++			break;
++		case Opt_notag:
++			clear_opt (sbi->s_mount_opt, TAGGED);
++			break;
++#endif
++#ifdef CONFIG_PROPAGATE
++		case Opt_tagid:
++			/* use args[0] */
++			set_opt (sbi->s_mount_opt, TAGGED);
++			break;
++#endif
+ 		case Opt_nocheck:
+ 			clear_opt (sbi->s_mount_opt, CHECK);
+ 			break;
+@@ -850,6 +868,8 @@ static int ext2_fill_super(struct super_
+ 	if (!parse_options((char *) data, sb))
+ 		goto failed_mount;
+ 
++	if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGGED)
++		sb->s_flags |= MS_TAGGED;
+ 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ 		((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
+ 		 MS_POSIXACL : 0);
+@@ -1224,6 +1244,14 @@ static int ext2_remount (struct super_bl
+ 		goto restore_opts;
+ 	}
+ 
++	if ((sbi->s_mount_opt & EXT2_MOUNT_TAGGED) &&
++		!(sb->s_flags & MS_TAGGED)) {
++		printk("EXT2-fs: %s: tagging not permitted on remount.\n",
++		       sb->s_id);
++		err = -EINVAL;
++		goto restore_opts;
++	}
++
+ 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ 		((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ext3/file.c linux-3.2.34-vs2.3.2.15/fs/ext3/file.c
+--- linux-3.2.34/fs/ext3/file.c	2011-10-24 18:45:27.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/ext3/file.c	2011-12-05 19:33:02.000000000 +0100
+@@ -80,5 +80,6 @@ const struct inode_operations ext3_file_
+ #endif
+ 	.get_acl	= ext3_get_acl,
+ 	.fiemap		= ext3_fiemap,
++	.sync_flags	= ext3_sync_flags,
+ };
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ext3/ialloc.c linux-3.2.34-vs2.3.2.15/fs/ext3/ialloc.c
+--- linux-3.2.34/fs/ext3/ialloc.c	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext3/ialloc.c	2012-06-14 20:45:24.000000000 +0200
+@@ -23,6 +23,7 @@
+ #include <linux/buffer_head.h>
+ #include <linux/random.h>
+ #include <linux/bitops.h>
++#include <linux/vs_tag.h>
+ #include <trace/events/ext3.h>
+ 
+ #include <asm/byteorder.h>
+@@ -496,6 +497,7 @@ got:
+ 		inode->i_mode = mode;
+ 		inode->i_uid = current_fsuid();
+ 		inode->i_gid = dir->i_gid;
++		inode->i_tag = dx_current_fstag(sb);
+ 	} else
+ 		inode_init_owner(inode, dir, mode);
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ext3/inode.c linux-3.2.34-vs2.3.2.15/fs/ext3/inode.c
+--- linux-3.2.34/fs/ext3/inode.c	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext3/inode.c	2012-10-22 12:59:51.000000000 +0200
+@@ -38,6 +38,7 @@
+ #include <linux/bio.h>
+ #include <linux/fiemap.h>
+ #include <linux/namei.h>
++#include <linux/vs_tag.h>
+ #include <trace/events/ext3.h>
+ #include "xattr.h"
+ #include "acl.h"
+@@ -2852,36 +2853,60 @@ void ext3_set_inode_flags(struct inode *
+ {
+ 	unsigned int flags = EXT3_I(inode)->i_flags;
+ 
+-	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
++	inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
++		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
++
++	if (flags & EXT3_IMMUTABLE_FL)
++		inode->i_flags |= S_IMMUTABLE;
++	if (flags & EXT3_IXUNLINK_FL)
++		inode->i_flags |= S_IXUNLINK;
++
+ 	if (flags & EXT3_SYNC_FL)
+ 		inode->i_flags |= S_SYNC;
+ 	if (flags & EXT3_APPEND_FL)
+ 		inode->i_flags |= S_APPEND;
+-	if (flags & EXT3_IMMUTABLE_FL)
+-		inode->i_flags |= S_IMMUTABLE;
+ 	if (flags & EXT3_NOATIME_FL)
+ 		inode->i_flags |= S_NOATIME;
+ 	if (flags & EXT3_DIRSYNC_FL)
+ 		inode->i_flags |= S_DIRSYNC;
++
++	inode->i_vflags &= ~(V_BARRIER | V_COW);
++
++	if (flags & EXT3_BARRIER_FL)
++		inode->i_vflags |= V_BARRIER;
++	if (flags & EXT3_COW_FL)
++		inode->i_vflags |= V_COW;
+ }
+ 
+ /* Propagate flags from i_flags to EXT3_I(inode)->i_flags */
+ void ext3_get_inode_flags(struct ext3_inode_info *ei)
+ {
+ 	unsigned int flags = ei->vfs_inode.i_flags;
++	unsigned int vflags = ei->vfs_inode.i_vflags;
++
++	ei->i_flags &= ~(EXT3_SYNC_FL | EXT3_APPEND_FL |
++			EXT3_IMMUTABLE_FL | EXT3_IXUNLINK_FL |
++			EXT3_NOATIME_FL | EXT3_DIRSYNC_FL |
++			EXT3_BARRIER_FL | EXT3_COW_FL);
++
++	if (flags & S_IMMUTABLE)
++		ei->i_flags |= EXT3_IMMUTABLE_FL;
++	if (flags & S_IXUNLINK)
++		ei->i_flags |= EXT3_IXUNLINK_FL;
+ 
+-	ei->i_flags &= ~(EXT3_SYNC_FL|EXT3_APPEND_FL|
+-			EXT3_IMMUTABLE_FL|EXT3_NOATIME_FL|EXT3_DIRSYNC_FL);
+ 	if (flags & S_SYNC)
+ 		ei->i_flags |= EXT3_SYNC_FL;
+ 	if (flags & S_APPEND)
+ 		ei->i_flags |= EXT3_APPEND_FL;
+-	if (flags & S_IMMUTABLE)
+-		ei->i_flags |= EXT3_IMMUTABLE_FL;
+ 	if (flags & S_NOATIME)
+ 		ei->i_flags |= EXT3_NOATIME_FL;
+ 	if (flags & S_DIRSYNC)
+ 		ei->i_flags |= EXT3_DIRSYNC_FL;
++
++	if (vflags & V_BARRIER)
++		ei->i_flags |= EXT3_BARRIER_FL;
++	if (vflags & V_COW)
++		ei->i_flags |= EXT3_COW_FL;
+ }
+ 
+ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
+@@ -2895,6 +2920,8 @@ struct inode *ext3_iget(struct super_blo
+ 	transaction_t *transaction;
+ 	long ret;
+ 	int block;
++	uid_t uid;
++	gid_t gid;
+ 
+ 	inode = iget_locked(sb, ino);
+ 	if (!inode)
+@@ -2911,12 +2938,16 @@ struct inode *ext3_iget(struct super_blo
+ 	bh = iloc.bh;
+ 	raw_inode = ext3_raw_inode(&iloc);
+ 	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+-	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+-	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
++	uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
++	gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ 	if(!(test_opt (inode->i_sb, NO_UID32))) {
+-		inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+-		inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
++		uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
++		gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+ 	}
++	inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++	inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++	inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
++		le16_to_cpu(raw_inode->i_raw_tag));
+ 	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
+ 	inode->i_size = le32_to_cpu(raw_inode->i_size);
+ 	inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
+@@ -3071,6 +3102,8 @@ static int ext3_do_update_inode(handle_t
+ 	struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
+ 	struct ext3_inode_info *ei = EXT3_I(inode);
+ 	struct buffer_head *bh = iloc->bh;
++	uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
++	gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
+ 	int err = 0, rc, block;
+ 	int need_datasync = 0;
+ 	__le32 disksize;
+@@ -3087,29 +3120,32 @@ again:
+ 	ext3_get_inode_flags(ei);
+ 	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
+ 	if(!(test_opt(inode->i_sb, NO_UID32))) {
+-		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
+-		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
++		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
++		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
+ /*
+  * Fix up interoperability with old kernels. Otherwise, old inodes get
+  * re-used with the upper 16 bits of the uid/gid intact
+  */
+ 		if(!ei->i_dtime) {
+ 			raw_inode->i_uid_high =
+-				cpu_to_le16(high_16_bits(inode->i_uid));
++				cpu_to_le16(high_16_bits(uid));
+ 			raw_inode->i_gid_high =
+-				cpu_to_le16(high_16_bits(inode->i_gid));
++				cpu_to_le16(high_16_bits(gid));
+ 		} else {
+ 			raw_inode->i_uid_high = 0;
+ 			raw_inode->i_gid_high = 0;
+ 		}
+ 	} else {
+ 		raw_inode->i_uid_low =
+-			cpu_to_le16(fs_high2lowuid(inode->i_uid));
++			cpu_to_le16(fs_high2lowuid(uid));
+ 		raw_inode->i_gid_low =
+-			cpu_to_le16(fs_high2lowgid(inode->i_gid));
++			cpu_to_le16(fs_high2lowgid(gid));
+ 		raw_inode->i_uid_high = 0;
+ 		raw_inode->i_gid_high = 0;
+ 	}
++#ifdef CONFIG_TAGGING_INTERN
++	raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
++#endif
+ 	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+ 	disksize = cpu_to_le32(ei->i_disksize);
+ 	if (disksize != raw_inode->i_size) {
+@@ -3278,7 +3314,8 @@ int ext3_setattr(struct dentry *dentry, 
+ 	if (is_quota_modification(inode, attr))
+ 		dquot_initialize(inode);
+ 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+-		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
++		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
++		(ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
+ 		handle_t *handle;
+ 
+ 		/* (user+group)*(old+new) structure, inode write (sb,
+@@ -3300,6 +3337,8 @@ int ext3_setattr(struct dentry *dentry, 
+ 			inode->i_uid = attr->ia_uid;
+ 		if (attr->ia_valid & ATTR_GID)
+ 			inode->i_gid = attr->ia_gid;
++		if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
++			inode->i_tag = attr->ia_tag;
+ 		error = ext3_mark_inode_dirty(handle, inode);
+ 		ext3_journal_stop(handle);
+ 	}
+diff -NurpP --minimal linux-3.2.34/fs/ext3/ioctl.c linux-3.2.34-vs2.3.2.15/fs/ext3/ioctl.c
+--- linux-3.2.34/fs/ext3/ioctl.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext3/ioctl.c	2011-12-05 19:33:02.000000000 +0100
+@@ -8,6 +8,7 @@
+  */
+ 
+ #include <linux/fs.h>
++#include <linux/mount.h>
+ #include <linux/jbd.h>
+ #include <linux/capability.h>
+ #include <linux/ext3_fs.h>
+@@ -17,6 +18,34 @@
+ #include <linux/compat.h>
+ #include <asm/uaccess.h>
+ 
++
++int ext3_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	handle_t *handle = NULL;
++	struct ext3_iloc iloc;
++	int err;
++
++	handle = ext3_journal_start(inode, 1);
++	if (IS_ERR(handle))
++		return PTR_ERR(handle);
++
++	if (IS_SYNC(inode))
++		handle->h_sync = 1;
++	err = ext3_reserve_inode_write(handle, inode, &iloc);
++	if (err)
++		goto flags_err;
++
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	ext3_get_inode_flags(EXT3_I(inode));
++	inode->i_ctime = CURRENT_TIME_SEC;
++
++	err = ext3_mark_iloc_dirty(handle, inode, &iloc);
++flags_err:
++	ext3_journal_stop(handle);
++	return err;
++}
++
+ long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_dentry->d_inode;
+@@ -50,6 +79,11 @@ long ext3_ioctl(struct file *filp, unsig
+ 
+ 		flags = ext3_mask_flags(inode->i_mode, flags);
+ 
++		if (IS_BARRIER(inode)) {
++			vxwprintk_task(1, "messing with the barrier.");
++			return -EACCES;
++		}
++
+ 		mutex_lock(&inode->i_mutex);
+ 
+ 		/* Is it quota file? Do not allow user to mess with it */
+@@ -68,7 +102,9 @@ long ext3_ioctl(struct file *filp, unsig
+ 		 *
+ 		 * This test looks nicer. Thanks to Pauline Middelink
+ 		 */
+-		if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
++		if ((oldflags & EXT3_IMMUTABLE_FL) ||
++			((flags ^ oldflags) & (EXT3_APPEND_FL |
++			EXT3_IMMUTABLE_FL | EXT3_IXUNLINK_FL))) {
+ 			if (!capable(CAP_LINUX_IMMUTABLE))
+ 				goto flags_out;
+ 		}
+@@ -93,7 +129,7 @@ long ext3_ioctl(struct file *filp, unsig
+ 		if (err)
+ 			goto flags_err;
+ 
+-		flags = flags & EXT3_FL_USER_MODIFIABLE;
++		flags &= EXT3_FL_USER_MODIFIABLE;
+ 		flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE;
+ 		ei->i_flags = flags;
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ext3/namei.c linux-3.2.34-vs2.3.2.15/fs/ext3/namei.c
+--- linux-3.2.34/fs/ext3/namei.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext3/namei.c	2011-12-05 19:33:02.000000000 +0100
+@@ -36,6 +36,7 @@
+ #include <linux/quotaops.h>
+ #include <linux/buffer_head.h>
+ #include <linux/bio.h>
++#include <linux/vs_tag.h>
+ #include <trace/events/ext3.h>
+ 
+ #include "namei.h"
+@@ -925,6 +926,7 @@ restart:
+ 					ll_rw_block(READ | REQ_META | REQ_PRIO,
+ 						    1, &bh);
+ 			}
++		dx_propagate_tag(nd, inode);
+ 		}
+ 		if ((bh = bh_use[ra_ptr++]) == NULL)
+ 			goto next;
+@@ -2535,6 +2537,7 @@ const struct inode_operations ext3_dir_i
+ 	.listxattr	= ext3_listxattr,
+ 	.removexattr	= generic_removexattr,
+ #endif
++	.sync_flags	= ext3_sync_flags,
+ 	.get_acl	= ext3_get_acl,
+ };
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ext3/super.c linux-3.2.34-vs2.3.2.15/fs/ext3/super.c
+--- linux-3.2.34/fs/ext3/super.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext3/super.c	2011-12-05 19:33:02.000000000 +0100
+@@ -831,7 +831,8 @@ enum {
+ 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+ 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
+ 	Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
+-	Opt_resize, Opt_usrquota, Opt_grpquota
++	Opt_resize, Opt_usrquota, Opt_grpquota,
++	Opt_tag, Opt_notag, Opt_tagid
+ };
+ 
+ static const match_table_t tokens = {
+@@ -888,6 +889,9 @@ static const match_table_t tokens = {
+ 	{Opt_barrier, "barrier"},
+ 	{Opt_nobarrier, "nobarrier"},
+ 	{Opt_resize, "resize"},
++	{Opt_tag, "tag"},
++	{Opt_notag, "notag"},
++	{Opt_tagid, "tagid=%u"},
+ 	{Opt_err, NULL},
+ };
+ 
+@@ -1040,6 +1044,20 @@ static int parse_options (char *options,
+ 		case Opt_nouid32:
+ 			set_opt (sbi->s_mount_opt, NO_UID32);
+ 			break;
++#ifndef CONFIG_TAGGING_NONE
++		case Opt_tag:
++			set_opt (sbi->s_mount_opt, TAGGED);
++			break;
++		case Opt_notag:
++			clear_opt (sbi->s_mount_opt, TAGGED);
++			break;
++#endif
++#ifdef CONFIG_PROPAGATE
++		case Opt_tagid:
++			/* use args[0] */
++			set_opt (sbi->s_mount_opt, TAGGED);
++			break;
++#endif
+ 		case Opt_nocheck:
+ 			clear_opt (sbi->s_mount_opt, CHECK);
+ 			break;
+@@ -1738,6 +1756,9 @@ static int ext3_fill_super (struct super
+ 			    NULL, 0))
+ 		goto failed_mount;
+ 
++	if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAGGED)
++		sb->s_flags |= MS_TAGGED;
++
+ 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ 		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
+ 
+@@ -2619,6 +2640,14 @@ static int ext3_remount (struct super_bl
+ 	if (test_opt(sb, ABORT))
+ 		ext3_abort(sb, __func__, "Abort forced by user");
+ 
++	if ((sbi->s_mount_opt & EXT3_MOUNT_TAGGED) &&
++		!(sb->s_flags & MS_TAGGED)) {
++		printk("EXT3-fs: %s: tagging not permitted on remount.\n",
++			sb->s_id);
++		err = -EINVAL;
++		goto restore_opts;
++	}
++
+ 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ 		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ext4/ext4.h linux-3.2.34-vs2.3.2.15/fs/ext4/ext4.h
+--- linux-3.2.34/fs/ext4/ext4.h	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext4/ext4.h	2012-08-13 12:40:51.000000000 +0200
+@@ -373,8 +373,12 @@ struct flex_groups {
+ #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
+ #define EXT4_EA_INODE_FL	        0x00200000 /* Inode used for large EA */
+ #define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF */
++#define EXT4_IXUNLINK_FL		0x08000000 /* Immutable invert on unlink */
+ #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
+ 
++#define EXT4_BARRIER_FL			0x04000000 /* Barrier for chroot() */
++#define EXT4_COW_FL			0x20000000 /* Copy on Write marker */
++
+ #define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
+ #define EXT4_FL_USER_MODIFIABLE		0x004B80FF /* User modifiable flags */
+ 
+@@ -634,7 +638,8 @@ struct ext4_inode {
+ 			__le16	l_i_file_acl_high;
+ 			__le16	l_i_uid_high;	/* these 2 fields */
+ 			__le16	l_i_gid_high;	/* were reserved2[0] */
+-			__u32	l_i_reserved2;
++			__le16	l_i_tag;	/* Context Tag */
++			__u16	l_i_reserved2;
+ 		} linux2;
+ 		struct {
+ 			__le16	h_i_reserved1;	/* Obsoleted fragment number/size which are removed in ext4 */
+@@ -752,6 +757,7 @@ do {									       \
+ #define i_gid_low	i_gid
+ #define i_uid_high	osd2.linux2.l_i_uid_high
+ #define i_gid_high	osd2.linux2.l_i_gid_high
++#define i_raw_tag	osd2.linux2.l_i_tag
+ #define i_reserved2	osd2.linux2.l_i_reserved2
+ 
+ #elif defined(__GNU__)
+@@ -928,6 +934,7 @@ struct ext4_inode_info {
+ #define EXT4_MOUNT_POSIX_ACL		0x08000	/* POSIX Access Control Lists */
+ #define EXT4_MOUNT_NO_AUTO_DA_ALLOC	0x10000	/* No auto delalloc mapping */
+ #define EXT4_MOUNT_BARRIER		0x20000 /* Use block barriers */
++#define EXT4_MOUNT_TAGGED		0x40000 /* Enable Context Tags */
+ #define EXT4_MOUNT_QUOTA		0x80000 /* Some quota option set */
+ #define EXT4_MOUNT_USRQUOTA		0x100000 /* "old" user quota */
+ #define EXT4_MOUNT_GRPQUOTA		0x200000 /* "old" group quota */
+@@ -2269,6 +2276,7 @@ extern int ext4_map_blocks(handle_t *han
+ 			   struct ext4_map_blocks *map, int flags);
+ extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ 			__u64 start, __u64 len);
++extern int ext4_sync_flags(struct inode *, int, int);
+ /* move_extent.c */
+ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ 			     __u64 start_orig, __u64 start_donor,
+diff -NurpP --minimal linux-3.2.34/fs/ext4/file.c linux-3.2.34-vs2.3.2.15/fs/ext4/file.c
+--- linux-3.2.34/fs/ext4/file.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext4/file.c	2011-12-05 19:33:02.000000000 +0100
+@@ -258,5 +258,6 @@ const struct inode_operations ext4_file_
+ #endif
+ 	.get_acl	= ext4_get_acl,
+ 	.fiemap		= ext4_fiemap,
++	.sync_flags	= ext4_sync_flags,
+ };
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ext4/ialloc.c linux-3.2.34-vs2.3.2.15/fs/ext4/ialloc.c
+--- linux-3.2.34/fs/ext4/ialloc.c	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext4/ialloc.c	2012-08-13 12:40:51.000000000 +0200
+@@ -22,6 +22,7 @@
+ #include <linux/random.h>
+ #include <linux/bitops.h>
+ #include <linux/blkdev.h>
++#include <linux/vs_tag.h>
+ #include <asm/byteorder.h>
+ 
+ #include "ext4.h"
+@@ -860,6 +861,7 @@ got:
+ 		inode->i_mode = mode;
+ 		inode->i_uid = current_fsuid();
+ 		inode->i_gid = dir->i_gid;
++		inode->i_tag = dx_current_fstag(sb);
+ 	} else
+ 		inode_init_owner(inode, dir, mode);
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ext4/inode.c linux-3.2.34-vs2.3.2.15/fs/ext4/inode.c
+--- linux-3.2.34/fs/ext4/inode.c	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext4/inode.c	2012-10-22 12:59:51.000000000 +0200
+@@ -38,6 +38,7 @@
+ #include <linux/printk.h>
+ #include <linux/slab.h>
+ #include <linux/ratelimit.h>
++#include <linux/vs_tag.h>
+ 
+ #include "ext4_jbd2.h"
+ #include "xattr.h"
+@@ -3697,41 +3698,64 @@ void ext4_set_inode_flags(struct inode *
+ {
+ 	unsigned int flags = EXT4_I(inode)->i_flags;
+ 
+-	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
++	inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
++		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
++
++	if (flags & EXT4_IMMUTABLE_FL)
++		inode->i_flags |= S_IMMUTABLE;
++	if (flags & EXT4_IXUNLINK_FL)
++		inode->i_flags |= S_IXUNLINK;
++
+ 	if (flags & EXT4_SYNC_FL)
+ 		inode->i_flags |= S_SYNC;
+ 	if (flags & EXT4_APPEND_FL)
+ 		inode->i_flags |= S_APPEND;
+-	if (flags & EXT4_IMMUTABLE_FL)
+-		inode->i_flags |= S_IMMUTABLE;
+ 	if (flags & EXT4_NOATIME_FL)
+ 		inode->i_flags |= S_NOATIME;
+ 	if (flags & EXT4_DIRSYNC_FL)
+ 		inode->i_flags |= S_DIRSYNC;
++
++	inode->i_vflags &= ~(V_BARRIER | V_COW);
++
++	if (flags & EXT4_BARRIER_FL)
++		inode->i_vflags |= V_BARRIER;
++	if (flags & EXT4_COW_FL)
++		inode->i_vflags |= V_COW;
+ }
+ 
+ /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
+ void ext4_get_inode_flags(struct ext4_inode_info *ei)
+ {
+-	unsigned int vfs_fl;
++	unsigned int vfs_fl, vfs_vf;
+ 	unsigned long old_fl, new_fl;
+ 
+ 	do {
+ 		vfs_fl = ei->vfs_inode.i_flags;
++		vfs_vf = ei->vfs_inode.i_vflags;
+ 		old_fl = ei->i_flags;
+ 		new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
+ 				EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
+-				EXT4_DIRSYNC_FL);
++				EXT4_DIRSYNC_FL|EXT4_BARRIER_FL|
++				EXT4_COW_FL);
++
++		if (vfs_fl & S_IMMUTABLE)
++			new_fl |= EXT4_IMMUTABLE_FL;
++		if (vfs_fl & S_IXUNLINK)
++			new_fl |= EXT4_IXUNLINK_FL;
++
+ 		if (vfs_fl & S_SYNC)
+ 			new_fl |= EXT4_SYNC_FL;
+ 		if (vfs_fl & S_APPEND)
+ 			new_fl |= EXT4_APPEND_FL;
+-		if (vfs_fl & S_IMMUTABLE)
+-			new_fl |= EXT4_IMMUTABLE_FL;
+ 		if (vfs_fl & S_NOATIME)
+ 			new_fl |= EXT4_NOATIME_FL;
+ 		if (vfs_fl & S_DIRSYNC)
+ 			new_fl |= EXT4_DIRSYNC_FL;
++
++		if (vfs_vf & V_BARRIER)
++			new_fl |= EXT4_BARRIER_FL;
++		if (vfs_vf & V_COW)
++			new_fl |= EXT4_COW_FL;
+ 	} while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
+ }
+ 
+@@ -3767,6 +3791,8 @@ struct inode *ext4_iget(struct super_blo
+ 	journal_t *journal = EXT4_SB(sb)->s_journal;
+ 	long ret;
+ 	int block;
++	uid_t uid;
++	gid_t gid;
+ 
+ 	inode = iget_locked(sb, ino);
+ 	if (!inode)
+@@ -3782,12 +3808,16 @@ struct inode *ext4_iget(struct super_blo
+ 		goto bad_inode;
+ 	raw_inode = ext4_raw_inode(&iloc);
+ 	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+-	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+-	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
++	uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
++	gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ 	if (!(test_opt(inode->i_sb, NO_UID32))) {
+-		inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+-		inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
++		uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
++		gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+ 	}
++	inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++	inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++	inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
++		le16_to_cpu(raw_inode->i_raw_tag));
+ 	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
+ 
+ 	ext4_clear_state_flags(ei);	/* Only relevant on 32-bit archs */
+@@ -4006,6 +4036,8 @@ static int ext4_do_update_inode(handle_t
+ 	struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
+ 	struct ext4_inode_info *ei = EXT4_I(inode);
+ 	struct buffer_head *bh = iloc->bh;
++	uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
++	gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
+ 	int err = 0, rc, block;
+ 	int need_datasync = 0;
+ 
+@@ -4017,29 +4049,32 @@ static int ext4_do_update_inode(handle_t
+ 	ext4_get_inode_flags(ei);
+ 	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
+ 	if (!(test_opt(inode->i_sb, NO_UID32))) {
+-		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
+-		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
++		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
++		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
+ /*
+  * Fix up interoperability with old kernels. Otherwise, old inodes get
+  * re-used with the upper 16 bits of the uid/gid intact
+  */
+ 		if (!ei->i_dtime) {
+ 			raw_inode->i_uid_high =
+-				cpu_to_le16(high_16_bits(inode->i_uid));
++				cpu_to_le16(high_16_bits(uid));
+ 			raw_inode->i_gid_high =
+-				cpu_to_le16(high_16_bits(inode->i_gid));
++				cpu_to_le16(high_16_bits(gid));
+ 		} else {
+ 			raw_inode->i_uid_high = 0;
+ 			raw_inode->i_gid_high = 0;
+ 		}
+ 	} else {
+ 		raw_inode->i_uid_low =
+-			cpu_to_le16(fs_high2lowuid(inode->i_uid));
++			cpu_to_le16(fs_high2lowuid(uid));
+ 		raw_inode->i_gid_low =
+-			cpu_to_le16(fs_high2lowgid(inode->i_gid));
++			cpu_to_le16(fs_high2lowgid(gid));
+ 		raw_inode->i_uid_high = 0;
+ 		raw_inode->i_gid_high = 0;
+ 	}
++#ifdef CONFIG_TAGGING_INTERN
++	raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
++#endif
+ 	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+ 
+ 	EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+@@ -4228,7 +4263,8 @@ int ext4_setattr(struct dentry *dentry, 
+ 	if (is_quota_modification(inode, attr))
+ 		dquot_initialize(inode);
+ 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+-		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
++		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
++		(ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
+ 		handle_t *handle;
+ 
+ 		/* (user+group)*(old+new) structure, inode write (sb,
+@@ -4250,6 +4286,8 @@ int ext4_setattr(struct dentry *dentry, 
+ 			inode->i_uid = attr->ia_uid;
+ 		if (attr->ia_valid & ATTR_GID)
+ 			inode->i_gid = attr->ia_gid;
++		if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
++			inode->i_tag = attr->ia_tag;
+ 		error = ext4_mark_inode_dirty(handle, inode);
+ 		ext4_journal_stop(handle);
+ 	}
+diff -NurpP --minimal linux-3.2.34/fs/ext4/ioctl.c linux-3.2.34-vs2.3.2.15/fs/ext4/ioctl.c
+--- linux-3.2.34/fs/ext4/ioctl.c	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext4/ioctl.c	2012-06-14 20:45:24.000000000 +0200
+@@ -14,10 +14,39 @@
+ #include <linux/compat.h>
+ #include <linux/mount.h>
+ #include <linux/file.h>
++#include <linux/vs_tag.h>
+ #include <asm/uaccess.h>
+ #include "ext4_jbd2.h"
+ #include "ext4.h"
+ 
++
++int ext4_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	handle_t *handle = NULL;
++	struct ext4_iloc iloc;
++	int err;
++
++	handle = ext4_journal_start(inode, 1);
++	if (IS_ERR(handle))
++		return PTR_ERR(handle);
++
++	if (IS_SYNC(inode))
++		ext4_handle_sync(handle);
++	err = ext4_reserve_inode_write(handle, inode, &iloc);
++	if (err)
++		goto flags_err;
++
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	ext4_get_inode_flags(EXT4_I(inode));
++	inode->i_ctime = ext4_current_time(inode);
++
++	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
++flags_err:
++	ext4_journal_stop(handle);
++	return err;
++}
++
+ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_dentry->d_inode;
+@@ -51,6 +80,11 @@ long ext4_ioctl(struct file *filp, unsig
+ 
+ 		flags = ext4_mask_flags(inode->i_mode, flags);
+ 
++		if (IS_BARRIER(inode)) {
++			vxwprintk_task(1, "messing with the barrier.");
++			return -EACCES;
++		}
++
+ 		err = -EPERM;
+ 		mutex_lock(&inode->i_mutex);
+ 		/* Is it quota file? Do not allow user to mess with it */
+@@ -68,7 +102,9 @@ long ext4_ioctl(struct file *filp, unsig
+ 		 *
+ 		 * This test looks nicer. Thanks to Pauline Middelink
+ 		 */
+-		if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
++		if ((oldflags & EXT4_IMMUTABLE_FL) ||
++			((flags ^ oldflags) & (EXT4_APPEND_FL |
++			EXT4_IMMUTABLE_FL | EXT4_IXUNLINK_FL))) {
+ 			if (!capable(CAP_LINUX_IMMUTABLE))
+ 				goto flags_out;
+ 		}
+diff -NurpP --minimal linux-3.2.34/fs/ext4/namei.c linux-3.2.34-vs2.3.2.15/fs/ext4/namei.c
+--- linux-3.2.34/fs/ext4/namei.c	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext4/namei.c	2012-10-22 12:59:51.000000000 +0200
+@@ -34,6 +34,7 @@
+ #include <linux/quotaops.h>
+ #include <linux/buffer_head.h>
+ #include <linux/bio.h>
++#include <linux/vs_tag.h>
+ #include "ext4.h"
+ #include "ext4_jbd2.h"
+ 
+@@ -925,6 +926,7 @@ restart:
+ 					ll_rw_block(READ | REQ_META | REQ_PRIO,
+ 						    1, &bh);
+ 			}
++		dx_propagate_tag(nd, inode);
+ 		}
+ 		if ((bh = bh_use[ra_ptr++]) == NULL)
+ 			goto next;
+@@ -2591,6 +2593,7 @@ const struct inode_operations ext4_dir_i
+ #endif
+ 	.get_acl	= ext4_get_acl,
+ 	.fiemap         = ext4_fiemap,
++	.sync_flags	= ext4_sync_flags,
+ };
+ 
+ const struct inode_operations ext4_special_inode_operations = {
+diff -NurpP --minimal linux-3.2.34/fs/ext4/super.c linux-3.2.34-vs2.3.2.15/fs/ext4/super.c
+--- linux-3.2.34/fs/ext4/super.c	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ext4/super.c	2012-09-16 18:25:50.000000000 +0200
+@@ -1336,6 +1336,7 @@ enum {
+ 	Opt_inode_readahead_blks, Opt_journal_ioprio,
+ 	Opt_dioread_nolock, Opt_dioread_lock,
+ 	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
++	Opt_tag, Opt_notag, Opt_tagid
+ };
+ 
+ static const match_table_t tokens = {
+@@ -1411,6 +1412,9 @@ static const match_table_t tokens = {
+ 	{Opt_init_itable, "init_itable=%u"},
+ 	{Opt_init_itable, "init_itable"},
+ 	{Opt_noinit_itable, "noinit_itable"},
++	{Opt_tag, "tag"},
++	{Opt_notag, "notag"},
++	{Opt_tagid, "tagid=%u"},
+ 	{Opt_err, NULL},
+ };
+ 
+@@ -1579,6 +1583,20 @@ static int parse_options(char *options, 
+ 		case Opt_nouid32:
+ 			set_opt(sb, NO_UID32);
+ 			break;
++#ifndef CONFIG_TAGGING_NONE
++		case Opt_tag:
++			set_opt(sb, TAGGED);
++			break;
++		case Opt_notag:
++			clear_opt(sb, TAGGED);
++			break;
++#endif
++#ifdef CONFIG_PROPAGATE
++		case Opt_tagid:
++			/* use args[0] */
++			set_opt(sb, TAGGED);
++			break;
++#endif
+ 		case Opt_debug:
+ 			set_opt(sb, DEBUG);
+ 			break;
+@@ -3376,6 +3394,9 @@ static int ext4_fill_super(struct super_
+ 		}
+ 	}
+ 
++	if (EXT4_SB(sb)->s_mount_opt & EXT4_MOUNT_TAGGED)
++		sb->s_flags |= MS_TAGGED;
++
+ 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ 		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
+ 
+@@ -4538,6 +4559,14 @@ static int ext4_remount(struct super_blo
+ 	if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
+ 		ext4_abort(sb, "Abort forced by user");
+ 
++	if ((sbi->s_mount_opt & EXT4_MOUNT_TAGGED) &&
++		!(sb->s_flags & MS_TAGGED)) {
++		printk("EXT4-fs: %s: tagging not permitted on remount.\n",
++			sb->s_id);
++		err = -EINVAL;
++		goto restore_opts;
++	}
++
+ 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ 		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
+ 
+diff -NurpP --minimal linux-3.2.34/fs/fcntl.c linux-3.2.34-vs2.3.2.15/fs/fcntl.c
+--- linux-3.2.34/fs/fcntl.c	2011-05-22 16:17:52.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/fcntl.c	2011-12-05 19:33:02.000000000 +0100
+@@ -20,6 +20,7 @@
+ #include <linux/signal.h>
+ #include <linux/rcupdate.h>
+ #include <linux/pid_namespace.h>
++#include <linux/vs_limit.h>
+ 
+ #include <asm/poll.h>
+ #include <asm/siginfo.h>
+@@ -103,6 +104,8 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldf
+ 
+ 	if (tofree)
+ 		filp_close(tofree, files);
++	else
++		vx_openfd_inc(newfd);	/* fd was unused */
+ 
+ 	return newfd;
+ 
+@@ -447,6 +450,8 @@ SYSCALL_DEFINE3(fcntl, unsigned int, fd,
+ 	filp = fget_raw(fd);
+ 	if (!filp)
+ 		goto out;
++	if (!vx_files_avail(1))
++		goto out;
+ 
+ 	if (unlikely(filp->f_mode & FMODE_PATH)) {
+ 		if (!check_fcntl_cmd(cmd)) {
+diff -NurpP --minimal linux-3.2.34/fs/file.c linux-3.2.34-vs2.3.2.15/fs/file.c
+--- linux-3.2.34/fs/file.c	2011-05-22 16:17:52.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/file.c	2011-12-05 19:33:02.000000000 +0100
+@@ -21,6 +21,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/rcupdate.h>
+ #include <linux/workqueue.h>
++#include <linux/vs_limit.h>
+ 
+ struct fdtable_defer {
+ 	spinlock_t lock;
+@@ -359,6 +360,8 @@ struct files_struct *dup_fd(struct files
+ 		struct file *f = *old_fds++;
+ 		if (f) {
+ 			get_file(f);
++			/* TODO: sum it first for check and performance */
++			vx_openfd_inc(open_files - i);
+ 		} else {
+ 			/*
+ 			 * The fd may be claimed in the fd bitmap but not yet
+@@ -466,6 +469,7 @@ repeat:
+ 	else
+ 		FD_CLR(fd, fdt->close_on_exec);
+ 	error = fd;
++	vx_openfd_inc(fd);
+ #if 1
+ 	/* Sanity check */
+ 	if (rcu_dereference_raw(fdt->fd[fd]) != NULL) {
+diff -NurpP --minimal linux-3.2.34/fs/file_table.c linux-3.2.34-vs2.3.2.15/fs/file_table.c
+--- linux-3.2.34/fs/file_table.c	2011-10-24 18:45:27.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/file_table.c	2011-12-05 19:33:02.000000000 +0100
+@@ -24,6 +24,8 @@
+ #include <linux/percpu_counter.h>
+ #include <linux/percpu.h>
+ #include <linux/ima.h>
++#include <linux/vs_limit.h>
++#include <linux/vs_context.h>
+ 
+ #include <linux/atomic.h>
+ 
+@@ -135,6 +137,8 @@ struct file *get_empty_filp(void)
+ 	spin_lock_init(&f->f_lock);
+ 	eventpoll_init_file(f);
+ 	/* f->f_version: 0 */
++	f->f_xid = vx_current_xid();
++	vx_files_inc(f);
+ 	return f;
+ 
+ over:
+@@ -253,6 +257,8 @@ static void __fput(struct file *file)
+ 	}
+ 	fops_put(file->f_op);
+ 	put_pid(file->f_owner.pid);
++	vx_files_dec(file);
++	file->f_xid = 0;
+ 	file_sb_list_del(file);
+ 	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+ 		i_readcount_dec(inode);
+@@ -383,6 +389,8 @@ void put_filp(struct file *file)
+ {
+ 	if (atomic_long_dec_and_test(&file->f_count)) {
+ 		security_file_free(file);
++		vx_files_dec(file);
++		file->f_xid = 0;
+ 		file_sb_list_del(file);
+ 		file_free(file);
+ 	}
+diff -NurpP --minimal linux-3.2.34/fs/fs_struct.c linux-3.2.34-vs2.3.2.15/fs/fs_struct.c
+--- linux-3.2.34/fs/fs_struct.c	2011-03-15 18:07:31.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/fs_struct.c	2011-12-05 19:33:02.000000000 +0100
+@@ -4,6 +4,7 @@
+ #include <linux/path.h>
+ #include <linux/slab.h>
+ #include <linux/fs_struct.h>
++#include <linux/vserver/global.h>
+ #include "internal.h"
+ 
+ static inline void path_get_longterm(struct path *path)
+@@ -96,6 +97,7 @@ void free_fs_struct(struct fs_struct *fs
+ {
+ 	path_put_longterm(&fs->root);
+ 	path_put_longterm(&fs->pwd);
++	atomic_dec(&vs_global_fs);
+ 	kmem_cache_free(fs_cachep, fs);
+ }
+ 
+@@ -135,6 +137,7 @@ struct fs_struct *copy_fs_struct(struct 
+ 		fs->pwd = old->pwd;
+ 		path_get_longterm(&fs->pwd);
+ 		spin_unlock(&old->lock);
++		atomic_inc(&vs_global_fs);
+ 	}
+ 	return fs;
+ }
+diff -NurpP --minimal linux-3.2.34/fs/gfs2/file.c linux-3.2.34-vs2.3.2.15/fs/gfs2/file.c
+--- linux-3.2.34/fs/gfs2/file.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/gfs2/file.c	2011-12-05 19:33:02.000000000 +0100
+@@ -143,6 +143,9 @@ static const u32 fsflags_to_gfs2[32] = {
+ 	[7] = GFS2_DIF_NOATIME,
+ 	[12] = GFS2_DIF_EXHASH,
+ 	[14] = GFS2_DIF_INHERIT_JDATA,
++	[27] = GFS2_DIF_IXUNLINK,
++	[26] = GFS2_DIF_BARRIER,
++	[29] = GFS2_DIF_COW,
+ };
+ 
+ static const u32 gfs2_to_fsflags[32] = {
+@@ -152,6 +155,9 @@ static const u32 gfs2_to_fsflags[32] = {
+ 	[gfs2fl_NoAtime] = FS_NOATIME_FL,
+ 	[gfs2fl_ExHash] = FS_INDEX_FL,
+ 	[gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
++	[gfs2fl_IXUnlink] = FS_IXUNLINK_FL,
++	[gfs2fl_Barrier] = FS_BARRIER_FL,
++	[gfs2fl_Cow] = FS_COW_FL,
+ };
+ 
+ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
+@@ -182,12 +188,18 @@ void gfs2_set_inode_flags(struct inode *
+ {
+ 	struct gfs2_inode *ip = GFS2_I(inode);
+ 	unsigned int flags = inode->i_flags;
++	unsigned int vflags = inode->i_vflags;
++
++	flags &= ~(S_IMMUTABLE | S_IXUNLINK |
++		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC | S_NOSEC);
+ 
+-	flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
+ 	if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
+ 		inode->i_flags |= S_NOSEC;
+ 	if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
+ 		flags |= S_IMMUTABLE;
++	if (ip->i_diskflags & GFS2_DIF_IXUNLINK)
++		flags |= S_IXUNLINK;
++
+ 	if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
+ 		flags |= S_APPEND;
+ 	if (ip->i_diskflags & GFS2_DIF_NOATIME)
+@@ -195,6 +207,43 @@ void gfs2_set_inode_flags(struct inode *
+ 	if (ip->i_diskflags & GFS2_DIF_SYNC)
+ 		flags |= S_SYNC;
+ 	inode->i_flags = flags;
++
++	vflags &= ~(V_BARRIER | V_COW);
++
++	if (ip->i_diskflags & GFS2_DIF_BARRIER)
++		vflags |= V_BARRIER;
++	if (ip->i_diskflags & GFS2_DIF_COW)
++		vflags |= V_COW;
++	inode->i_vflags = vflags;
++}
++
++void gfs2_get_inode_flags(struct inode *inode)
++{
++	struct gfs2_inode *ip = GFS2_I(inode);
++	unsigned int flags = inode->i_flags;
++	unsigned int vflags = inode->i_vflags;
++
++	ip->i_diskflags &= ~(GFS2_DIF_APPENDONLY |
++			GFS2_DIF_NOATIME | GFS2_DIF_SYNC |
++			GFS2_DIF_IMMUTABLE | GFS2_DIF_IXUNLINK |
++			GFS2_DIF_BARRIER | GFS2_DIF_COW);
++
++	if (flags & S_IMMUTABLE)
++		ip->i_diskflags |= GFS2_DIF_IMMUTABLE;
++	if (flags & S_IXUNLINK)
++		ip->i_diskflags |= GFS2_DIF_IXUNLINK;
++
++	if (flags & S_APPEND)
++		ip->i_diskflags |= GFS2_DIF_APPENDONLY;
++	if (flags & S_NOATIME)
++		ip->i_diskflags |= GFS2_DIF_NOATIME;
++	if (flags & S_SYNC)
++		ip->i_diskflags |= GFS2_DIF_SYNC;
++
++	if (vflags & V_BARRIER)
++		ip->i_diskflags |= GFS2_DIF_BARRIER;
++	if (vflags & V_COW)
++		ip->i_diskflags |= GFS2_DIF_COW;
+ }
+ 
+ /* Flags that can be set by user space */
+@@ -306,6 +355,37 @@ static int gfs2_set_flags(struct file *f
+ 	return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
+ }
+ 
++int gfs2_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	struct gfs2_inode *ip = GFS2_I(inode);
++	struct gfs2_sbd *sdp = GFS2_SB(inode);
++	struct buffer_head *bh;
++	struct gfs2_holder gh;
++	int error;
++
++	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
++	if (error)
++		return error;
++	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
++	if (error)
++		goto out;
++	error = gfs2_meta_inode_buffer(ip, &bh);
++	if (error)
++		goto out_trans_end;
++	gfs2_trans_add_bh(ip->i_gl, bh, 1);
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	gfs2_get_inode_flags(inode);
++	gfs2_dinode_out(ip, bh->b_data);
++	brelse(bh);
++	gfs2_set_aops(inode);
++out_trans_end:
++	gfs2_trans_end(sdp);
++out:
++	gfs2_glock_dq_uninit(&gh);
++	return error;
++}
++
+ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	switch(cmd) {
+diff -NurpP --minimal linux-3.2.34/fs/gfs2/inode.h linux-3.2.34-vs2.3.2.15/fs/gfs2/inode.h
+--- linux-3.2.34/fs/gfs2/inode.h	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/gfs2/inode.h	2011-12-05 19:33:02.000000000 +0100
+@@ -120,6 +120,7 @@ extern const struct file_operations gfs2
+ extern const struct file_operations gfs2_dir_fops_nolock;
+ 
+ extern void gfs2_set_inode_flags(struct inode *inode);
++extern int gfs2_sync_flags(struct inode *inode, int flags, int vflags);
+  
+ #ifdef CONFIG_GFS2_FS_LOCKING_DLM
+ extern const struct file_operations gfs2_file_fops;
+diff -NurpP --minimal linux-3.2.34/fs/inode.c linux-3.2.34-vs2.3.2.15/fs/inode.c
+--- linux-3.2.34/fs/inode.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/inode.c	2011-12-05 19:33:02.000000000 +0100
+@@ -26,6 +26,7 @@
+ #include <linux/ima.h>
+ #include <linux/cred.h>
+ #include <linux/buffer_head.h> /* for inode_has_buffers */
++#include <linux/vs_tag.h>
+ #include "internal.h"
+ 
+ /*
+@@ -137,6 +138,9 @@ int inode_init_always(struct super_block
+ 	struct address_space *const mapping = &inode->i_data;
+ 
+ 	inode->i_sb = sb;
++
++	/* essential because of inode slab reuse */
++	inode->i_tag = 0;
+ 	inode->i_blkbits = sb->s_blocksize_bits;
+ 	inode->i_flags = 0;
+ 	atomic_set(&inode->i_count, 1);
+@@ -158,6 +162,7 @@ int inode_init_always(struct super_block
+ 	inode->i_bdev = NULL;
+ 	inode->i_cdev = NULL;
+ 	inode->i_rdev = 0;
++	inode->i_mdev = 0;
+ 	inode->dirtied_when = 0;
+ 
+ 	if (security_inode_alloc(inode))
+@@ -399,6 +404,8 @@ void __insert_inode_hash(struct inode *i
+ }
+ EXPORT_SYMBOL(__insert_inode_hash);
+ 
++EXPORT_SYMBOL_GPL(__iget);
++
+ /**
+  *	__remove_inode_hash - remove an inode from the hash
+  *	@inode: inode to unhash
+@@ -1626,9 +1633,11 @@ void init_special_inode(struct inode *in
+ 	if (S_ISCHR(mode)) {
+ 		inode->i_fop = &def_chr_fops;
+ 		inode->i_rdev = rdev;
++		inode->i_mdev = rdev;
+ 	} else if (S_ISBLK(mode)) {
+ 		inode->i_fop = &def_blk_fops;
+ 		inode->i_rdev = rdev;
++		inode->i_mdev = rdev;
+ 	} else if (S_ISFIFO(mode))
+ 		inode->i_fop = &def_fifo_fops;
+ 	else if (S_ISSOCK(mode))
+@@ -1657,6 +1666,7 @@ void inode_init_owner(struct inode *inod
+ 	} else
+ 		inode->i_gid = current_fsgid();
+ 	inode->i_mode = mode;
++	inode->i_tag = dx_current_fstag(inode->i_sb);
+ }
+ EXPORT_SYMBOL(inode_init_owner);
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ioctl.c linux-3.2.34-vs2.3.2.15/fs/ioctl.c
+--- linux-3.2.34/fs/ioctl.c	2011-05-22 16:17:52.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/ioctl.c	2011-12-05 19:33:02.000000000 +0100
+@@ -15,6 +15,9 @@
+ #include <linux/writeback.h>
+ #include <linux/buffer_head.h>
+ #include <linux/falloc.h>
++#include <linux/proc_fs.h>
++#include <linux/vserver/inode.h>
++#include <linux/vs_tag.h>
+ 
+ #include <asm/ioctls.h>
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ioprio.c linux-3.2.34-vs2.3.2.15/fs/ioprio.c
+--- linux-3.2.34/fs/ioprio.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ioprio.c	2011-12-05 19:33:02.000000000 +0100
+@@ -28,6 +28,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/security.h>
+ #include <linux/pid_namespace.h>
++#include <linux/vs_base.h>
+ 
+ int set_task_ioprio(struct task_struct *task, int ioprio)
+ {
+@@ -120,6 +121,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which, 
+ 			else
+ 				pgrp = find_vpid(who);
+ 			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
++				if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
++					continue;
+ 				ret = set_task_ioprio(p, ioprio);
+ 				if (ret)
+ 					break;
+@@ -209,6 +212,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, 
+ 			else
+ 				pgrp = find_vpid(who);
+ 			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
++				if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
++					continue;
+ 				tmpio = get_task_ioprio(p);
+ 				if (tmpio < 0)
+ 					continue;
+diff -NurpP --minimal linux-3.2.34/fs/jfs/file.c linux-3.2.34-vs2.3.2.15/fs/jfs/file.c
+--- linux-3.2.34/fs/jfs/file.c	2011-10-24 18:45:27.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/jfs/file.c	2011-12-05 19:33:02.000000000 +0100
+@@ -109,7 +109,8 @@ int jfs_setattr(struct dentry *dentry, s
+ 	if (is_quota_modification(inode, iattr))
+ 		dquot_initialize(inode);
+ 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
+-	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
++	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) ||
++	    (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) {
+ 		rc = dquot_transfer(inode, iattr);
+ 		if (rc)
+ 			return rc;
+@@ -142,6 +143,7 @@ const struct inode_operations jfs_file_i
+ #ifdef CONFIG_JFS_POSIX_ACL
+ 	.get_acl	= jfs_get_acl,
+ #endif
++	.sync_flags	= jfs_sync_flags,
+ };
+ 
+ const struct file_operations jfs_file_operations = {
+diff -NurpP --minimal linux-3.2.34/fs/jfs/ioctl.c linux-3.2.34-vs2.3.2.15/fs/jfs/ioctl.c
+--- linux-3.2.34/fs/jfs/ioctl.c	2011-05-22 16:17:52.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/jfs/ioctl.c	2011-12-05 19:33:02.000000000 +0100
+@@ -11,6 +11,7 @@
+ #include <linux/mount.h>
+ #include <linux/time.h>
+ #include <linux/sched.h>
++#include <linux/mount.h>
+ #include <asm/current.h>
+ #include <asm/uaccess.h>
+ 
+@@ -52,6 +53,16 @@ static long jfs_map_ext2(unsigned long f
+ }
+ 
+ 
++int jfs_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	jfs_get_inode_flags(JFS_IP(inode));
++	inode->i_ctime = CURRENT_TIME_SEC;
++	mark_inode_dirty(inode);
++	return 0;
++}
++
+ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_dentry->d_inode;
+@@ -85,6 +96,11 @@ long jfs_ioctl(struct file *filp, unsign
+ 		if (!S_ISDIR(inode->i_mode))
+ 			flags &= ~JFS_DIRSYNC_FL;
+ 
++		if (IS_BARRIER(inode)) {
++			vxwprintk_task(1, "messing with the barrier.");
++			return -EACCES;
++		}
++
+ 		/* Is it quota file? Do not allow user to mess with it */
+ 		if (IS_NOQUOTA(inode)) {
+ 			err = -EPERM;
+@@ -102,8 +118,8 @@ long jfs_ioctl(struct file *filp, unsign
+ 		 * the relevant capability.
+ 		 */
+ 		if ((oldflags & JFS_IMMUTABLE_FL) ||
+-			((flags ^ oldflags) &
+-			(JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
++			((flags ^ oldflags) & (JFS_APPEND_FL |
++			JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL))) {
+ 			if (!capable(CAP_LINUX_IMMUTABLE)) {
+ 				mutex_unlock(&inode->i_mutex);
+ 				err = -EPERM;
+@@ -111,7 +127,7 @@ long jfs_ioctl(struct file *filp, unsign
+ 			}
+ 		}
+ 
+-		flags = flags & JFS_FL_USER_MODIFIABLE;
++		flags &= JFS_FL_USER_MODIFIABLE;
+ 		flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
+ 		jfs_inode->mode2 = flags;
+ 
+diff -NurpP --minimal linux-3.2.34/fs/jfs/jfs_dinode.h linux-3.2.34-vs2.3.2.15/fs/jfs/jfs_dinode.h
+--- linux-3.2.34/fs/jfs/jfs_dinode.h	2008-12-25 00:26:37.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/jfs/jfs_dinode.h	2011-12-05 19:33:02.000000000 +0100
+@@ -161,9 +161,13 @@ struct dinode {
+ 
+ #define JFS_APPEND_FL		0x01000000 /* writes to file may only append */
+ #define JFS_IMMUTABLE_FL	0x02000000 /* Immutable file */
++#define JFS_IXUNLINK_FL		0x08000000 /* Immutable invert on unlink */
+ 
+-#define JFS_FL_USER_VISIBLE	0x03F80000
+-#define JFS_FL_USER_MODIFIABLE	0x03F80000
++#define JFS_BARRIER_FL		0x04000000 /* Barrier for chroot() */
++#define JFS_COW_FL		0x20000000 /* Copy on Write marker */
++
++#define JFS_FL_USER_VISIBLE	0x07F80000
++#define JFS_FL_USER_MODIFIABLE	0x07F80000
+ #define JFS_FL_INHERIT		0x03C80000
+ 
+ /* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */
+diff -NurpP --minimal linux-3.2.34/fs/jfs/jfs_filsys.h linux-3.2.34-vs2.3.2.15/fs/jfs/jfs_filsys.h
+--- linux-3.2.34/fs/jfs/jfs_filsys.h	2008-12-25 00:26:37.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/jfs/jfs_filsys.h	2011-12-05 19:33:02.000000000 +0100
+@@ -263,6 +263,7 @@
+ #define JFS_NAME_MAX	255
+ #define JFS_PATH_MAX	BPSIZE
+ 
++#define JFS_TAGGED		0x00800000	/* Context Tagging */
+ 
+ /*
+  *	file system state (superblock state)
+diff -NurpP --minimal linux-3.2.34/fs/jfs/jfs_imap.c linux-3.2.34-vs2.3.2.15/fs/jfs/jfs_imap.c
+--- linux-3.2.34/fs/jfs/jfs_imap.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/jfs/jfs_imap.c	2011-12-05 19:33:02.000000000 +0100
+@@ -46,6 +46,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/quotaops.h>
+ #include <linux/slab.h>
++#include <linux/vs_tag.h>
+ 
+ #include "jfs_incore.h"
+ #include "jfs_inode.h"
+@@ -3058,6 +3059,8 @@ static int copy_from_dinode(struct dinod
+ {
+ 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+ 	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
++	uid_t uid;
++	gid_t gid;
+ 
+ 	jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
+ 	jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
+@@ -3078,14 +3081,18 @@ static int copy_from_dinode(struct dinod
+ 	}
+ 	set_nlink(ip, le32_to_cpu(dip->di_nlink));
+ 
+-	jfs_ip->saved_uid = le32_to_cpu(dip->di_uid);
++	uid = le32_to_cpu(dip->di_uid);
++	gid = le32_to_cpu(dip->di_gid);
++	ip->i_tag = INOTAG_TAG(DX_TAG(ip), uid, gid, 0);
++
++	jfs_ip->saved_uid = INOTAG_UID(DX_TAG(ip), uid, gid);
+ 	if (sbi->uid == -1)
+ 		ip->i_uid = jfs_ip->saved_uid;
+ 	else {
+ 		ip->i_uid = sbi->uid;
+ 	}
+ 
+-	jfs_ip->saved_gid = le32_to_cpu(dip->di_gid);
++	jfs_ip->saved_gid = INOTAG_GID(DX_TAG(ip), uid, gid);
+ 	if (sbi->gid == -1)
+ 		ip->i_gid = jfs_ip->saved_gid;
+ 	else {
+@@ -3150,14 +3157,12 @@ static void copy_to_dinode(struct dinode
+ 	dip->di_size = cpu_to_le64(ip->i_size);
+ 	dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
+ 	dip->di_nlink = cpu_to_le32(ip->i_nlink);
+-	if (sbi->uid == -1)
+-		dip->di_uid = cpu_to_le32(ip->i_uid);
+-	else
+-		dip->di_uid = cpu_to_le32(jfs_ip->saved_uid);
+-	if (sbi->gid == -1)
+-		dip->di_gid = cpu_to_le32(ip->i_gid);
+-	else
+-		dip->di_gid = cpu_to_le32(jfs_ip->saved_gid);
++
++	dip->di_uid = cpu_to_le32(TAGINO_UID(DX_TAG(ip),
++		(sbi->uid == -1) ? ip->i_uid : jfs_ip->saved_uid, ip->i_tag));
++	dip->di_gid = cpu_to_le32(TAGINO_GID(DX_TAG(ip),
++		(sbi->gid == -1) ? ip->i_gid : jfs_ip->saved_gid, ip->i_tag));
++
+ 	jfs_get_inode_flags(jfs_ip);
+ 	/*
+ 	 * mode2 is only needed for storing the higher order bits.
+diff -NurpP --minimal linux-3.2.34/fs/jfs/jfs_inode.c linux-3.2.34-vs2.3.2.15/fs/jfs/jfs_inode.c
+--- linux-3.2.34/fs/jfs/jfs_inode.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/jfs/jfs_inode.c	2011-12-05 19:33:02.000000000 +0100
+@@ -18,6 +18,7 @@
+ 
+ #include <linux/fs.h>
+ #include <linux/quotaops.h>
++#include <linux/vs_tag.h>
+ #include "jfs_incore.h"
+ #include "jfs_inode.h"
+ #include "jfs_filsys.h"
+@@ -30,29 +31,46 @@ void jfs_set_inode_flags(struct inode *i
+ {
+ 	unsigned int flags = JFS_IP(inode)->mode2;
+ 
+-	inode->i_flags &= ~(S_IMMUTABLE | S_APPEND |
+-		S_NOATIME | S_DIRSYNC | S_SYNC);
++	inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
++		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
+ 
+ 	if (flags & JFS_IMMUTABLE_FL)
+ 		inode->i_flags |= S_IMMUTABLE;
++	if (flags & JFS_IXUNLINK_FL)
++		inode->i_flags |= S_IXUNLINK;
++
++	if (flags & JFS_SYNC_FL)
++		inode->i_flags |= S_SYNC;
+ 	if (flags & JFS_APPEND_FL)
+ 		inode->i_flags |= S_APPEND;
+ 	if (flags & JFS_NOATIME_FL)
+ 		inode->i_flags |= S_NOATIME;
+ 	if (flags & JFS_DIRSYNC_FL)
+ 		inode->i_flags |= S_DIRSYNC;
+-	if (flags & JFS_SYNC_FL)
+-		inode->i_flags |= S_SYNC;
++
++	inode->i_vflags &= ~(V_BARRIER | V_COW);
++
++	if (flags & JFS_BARRIER_FL)
++		inode->i_vflags |= V_BARRIER;
++	if (flags & JFS_COW_FL)
++		inode->i_vflags |= V_COW;
+ }
+ 
+ void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip)
+ {
+ 	unsigned int flags = jfs_ip->vfs_inode.i_flags;
++	unsigned int vflags = jfs_ip->vfs_inode.i_vflags;
++
++	jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL |
++			   JFS_APPEND_FL | JFS_NOATIME_FL |
++			   JFS_DIRSYNC_FL | JFS_SYNC_FL |
++			   JFS_BARRIER_FL | JFS_COW_FL);
+ 
+-	jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL |
+-			   JFS_DIRSYNC_FL | JFS_SYNC_FL);
+ 	if (flags & S_IMMUTABLE)
+ 		jfs_ip->mode2 |= JFS_IMMUTABLE_FL;
++	if (flags & S_IXUNLINK)
++		jfs_ip->mode2 |= JFS_IXUNLINK_FL;
++
+ 	if (flags & S_APPEND)
+ 		jfs_ip->mode2 |= JFS_APPEND_FL;
+ 	if (flags & S_NOATIME)
+@@ -61,6 +79,11 @@ void jfs_get_inode_flags(struct jfs_inod
+ 		jfs_ip->mode2 |= JFS_DIRSYNC_FL;
+ 	if (flags & S_SYNC)
+ 		jfs_ip->mode2 |= JFS_SYNC_FL;
++
++	if (vflags & V_BARRIER)
++		jfs_ip->mode2 |= JFS_BARRIER_FL;
++	if (vflags & V_COW)
++		jfs_ip->mode2 |= JFS_COW_FL;
+ }
+ 
+ /*
+diff -NurpP --minimal linux-3.2.34/fs/jfs/jfs_inode.h linux-3.2.34-vs2.3.2.15/fs/jfs/jfs_inode.h
+--- linux-3.2.34/fs/jfs/jfs_inode.h	2011-10-24 18:45:27.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/jfs/jfs_inode.h	2011-12-05 19:33:02.000000000 +0100
+@@ -39,6 +39,7 @@ extern struct dentry *jfs_fh_to_dentry(s
+ extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
+ 	int fh_len, int fh_type);
+ extern void jfs_set_inode_flags(struct inode *);
++extern int jfs_sync_flags(struct inode *, int, int);
+ extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+ extern int jfs_setattr(struct dentry *, struct iattr *);
+ 
+diff -NurpP --minimal linux-3.2.34/fs/jfs/namei.c linux-3.2.34-vs2.3.2.15/fs/jfs/namei.c
+--- linux-3.2.34/fs/jfs/namei.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/jfs/namei.c	2011-12-05 19:33:02.000000000 +0100
+@@ -22,6 +22,7 @@
+ #include <linux/ctype.h>
+ #include <linux/quotaops.h>
+ #include <linux/exportfs.h>
++#include <linux/vs_tag.h>
+ #include "jfs_incore.h"
+ #include "jfs_superblock.h"
+ #include "jfs_inode.h"
+@@ -1474,6 +1475,7 @@ static struct dentry *jfs_lookup(struct 
+ 			jfs_err("jfs_lookup: iget failed on inum %d", (uint)inum);
+ 	}
+ 
++	dx_propagate_tag(nd, ip);
+ 	return d_splice_alias(ip, dentry);
+ }
+ 
+@@ -1538,6 +1540,7 @@ const struct inode_operations jfs_dir_in
+ #ifdef CONFIG_JFS_POSIX_ACL
+ 	.get_acl	= jfs_get_acl,
+ #endif
++	.sync_flags	= jfs_sync_flags,
+ };
+ 
+ const struct file_operations jfs_dir_operations = {
+diff -NurpP --minimal linux-3.2.34/fs/jfs/super.c linux-3.2.34-vs2.3.2.15/fs/jfs/super.c
+--- linux-3.2.34/fs/jfs/super.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/jfs/super.c	2011-12-05 19:33:02.000000000 +0100
+@@ -198,7 +198,8 @@ static void jfs_put_super(struct super_b
+ enum {
+ 	Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
+ 	Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
+-	Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask
++	Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask,
++	Opt_tag, Opt_notag, Opt_tagid
+ };
+ 
+ static const match_table_t tokens = {
+@@ -208,6 +209,10 @@ static const match_table_t tokens = {
+ 	{Opt_resize, "resize=%u"},
+ 	{Opt_resize_nosize, "resize"},
+ 	{Opt_errors, "errors=%s"},
++	{Opt_tag, "tag"},
++	{Opt_notag, "notag"},
++	{Opt_tagid, "tagid=%u"},
++	{Opt_tag, "tagxid"},
+ 	{Opt_ignore, "noquota"},
+ 	{Opt_ignore, "quota"},
+ 	{Opt_usrquota, "usrquota"},
+@@ -342,6 +347,20 @@ static int parse_options(char *options, 
+ 			}
+ 			break;
+ 		}
++#ifndef CONFIG_TAGGING_NONE
++		case Opt_tag:
++			*flag |= JFS_TAGGED;
++			break;
++		case Opt_notag:
++			*flag &= JFS_TAGGED;
++			break;
++#endif
++#ifdef CONFIG_PROPAGATE
++		case Opt_tagid:
++			/* use args[0] */
++			*flag |= JFS_TAGGED;
++			break;
++#endif
+ 		default:
+ 			printk("jfs: Unrecognized mount option \"%s\" "
+ 					" or missing value\n", p);
+@@ -373,6 +392,12 @@ static int jfs_remount(struct super_bloc
+ 		return -EINVAL;
+ 	}
+ 
++	if ((flag & JFS_TAGGED) && !(sb->s_flags & MS_TAGGED)) {
++		printk(KERN_ERR "JFS: %s: tagging not permitted on remount.\n",
++			sb->s_id);
++		return -EINVAL;
++	}
++
+ 	if (newLVSize) {
+ 		if (sb->s_flags & MS_RDONLY) {
+ 			printk(KERN_ERR
+@@ -455,6 +480,9 @@ static int jfs_fill_super(struct super_b
+ #ifdef CONFIG_JFS_POSIX_ACL
+ 	sb->s_flags |= MS_POSIXACL;
+ #endif
++	/* map mount option tagxid */
++	if (sbi->flag & JFS_TAGGED)
++		sb->s_flags |= MS_TAGGED;
+ 
+ 	if (newLVSize) {
+ 		printk(KERN_ERR "resize option for remount only\n");
+diff -NurpP --minimal linux-3.2.34/fs/libfs.c linux-3.2.34-vs2.3.2.15/fs/libfs.c
+--- linux-3.2.34/fs/libfs.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/libfs.c	2011-12-05 19:33:02.000000000 +0100
+@@ -135,7 +135,8 @@ static inline unsigned char dt_type(stru
+  * both impossible due to the lock on directory.
+  */
+ 
+-int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
++static inline int do_dcache_readdir_filter(struct file *filp,
++	void *dirent, filldir_t filldir, int (*filter)(struct dentry *dentry))
+ {
+ 	struct dentry *dentry = filp->f_path.dentry;
+ 	struct dentry *cursor = filp->private_data;
+@@ -166,6 +167,8 @@ int dcache_readdir(struct file * filp, v
+ 			for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
+ 				struct dentry *next;
+ 				next = list_entry(p, struct dentry, d_u.d_child);
++				if (filter && !filter(next))
++					continue;
+ 				spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
+ 				if (!simple_positive(next)) {
+ 					spin_unlock(&next->d_lock);
+@@ -192,6 +195,17 @@ int dcache_readdir(struct file * filp, v
+ 	return 0;
+ }
+ 
++int dcache_readdir(struct file *filp, void *dirent, filldir_t filldir)
++{
++	return do_dcache_readdir_filter(filp, dirent, filldir, NULL);
++}
++
++int dcache_readdir_filter(struct file *filp, void *dirent, filldir_t filldir,
++	int (*filter)(struct dentry *))
++{
++	return do_dcache_readdir_filter(filp, dirent, filldir, filter);
++}
++
+ ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
+ {
+ 	return -EISDIR;
+@@ -977,6 +991,7 @@ EXPORT_SYMBOL(dcache_dir_close);
+ EXPORT_SYMBOL(dcache_dir_lseek);
+ EXPORT_SYMBOL(dcache_dir_open);
+ EXPORT_SYMBOL(dcache_readdir);
++EXPORT_SYMBOL(dcache_readdir_filter);
+ EXPORT_SYMBOL(generic_read_dir);
+ EXPORT_SYMBOL(mount_pseudo);
+ EXPORT_SYMBOL(simple_write_begin);
+diff -NurpP --minimal linux-3.2.34/fs/locks.c linux-3.2.34-vs2.3.2.15/fs/locks.c
+--- linux-3.2.34/fs/locks.c	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/locks.c	2012-08-13 12:40:51.000000000 +0200
+@@ -126,6 +126,8 @@
+ #include <linux/time.h>
+ #include <linux/rcupdate.h>
+ #include <linux/pid_namespace.h>
++#include <linux/vs_base.h>
++#include <linux/vs_limit.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -184,11 +186,17 @@ static void locks_init_lock_heads(struct
+ /* Allocate an empty lock structure. */
+ struct file_lock *locks_alloc_lock(void)
+ {
+-	struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
++	struct file_lock *fl;
+ 
+-	if (fl)
+-		locks_init_lock_heads(fl);
++	if (!vx_locks_avail(1))
++		return NULL;
+ 
++	fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
++
++	if (fl) {
++		locks_init_lock_heads(fl);
++		fl->fl_xid = -1;
++	}
+ 	return fl;
+ }
+ EXPORT_SYMBOL_GPL(locks_alloc_lock);
+@@ -216,6 +224,7 @@ void locks_free_lock(struct file_lock *f
+ 	BUG_ON(!list_empty(&fl->fl_block));
+ 	BUG_ON(!list_empty(&fl->fl_link));
+ 
++	vx_locks_dec(fl);
+ 	locks_release_private(fl);
+ 	kmem_cache_free(filelock_cache, fl);
+ }
+@@ -225,6 +234,7 @@ void locks_init_lock(struct file_lock *f
+ {
+ 	memset(fl, 0, sizeof(struct file_lock));
+ 	locks_init_lock_heads(fl);
++	fl->fl_xid = -1;
+ }
+ 
+ EXPORT_SYMBOL(locks_init_lock);
+@@ -265,6 +275,7 @@ void locks_copy_lock(struct file_lock *n
+ 	new->fl_file = fl->fl_file;
+ 	new->fl_ops = fl->fl_ops;
+ 	new->fl_lmops = fl->fl_lmops;
++	new->fl_xid = fl->fl_xid;
+ 
+ 	locks_copy_private(new, fl);
+ }
+@@ -303,6 +314,11 @@ static int flock_make_lock(struct file *
+ 	fl->fl_flags = FL_FLOCK;
+ 	fl->fl_type = type;
+ 	fl->fl_end = OFFSET_MAX;
++
++	vxd_assert(filp->f_xid == vx_current_xid(),
++		"f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
++	fl->fl_xid = filp->f_xid;
++	vx_locks_inc(fl);
+ 	
+ 	*lock = fl;
+ 	return 0;
+@@ -452,6 +468,7 @@ static int lease_init(struct file *filp,
+ 
+ 	fl->fl_owner = current->files;
+ 	fl->fl_pid = current->tgid;
++	fl->fl_xid = vx_current_xid();
+ 
+ 	fl->fl_file = filp;
+ 	fl->fl_flags = FL_LEASE;
+@@ -471,6 +488,11 @@ static struct file_lock *lease_alloc(str
+ 	if (fl == NULL)
+ 		return ERR_PTR(error);
+ 
++	fl->fl_xid = vx_current_xid();
++	if (filp)
++		vxd_assert(filp->f_xid == fl->fl_xid,
++			"f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid);
++	vx_locks_inc(fl);
+ 	error = lease_init(filp, type, fl);
+ 	if (error) {
+ 		locks_free_lock(fl);
+@@ -773,6 +795,7 @@ static int flock_lock_file(struct file *
+ 		lock_flocks();
+ 	}
+ 
++	new_fl->fl_xid = -1;
+ find_conflict:
+ 	for_each_lock(inode, before) {
+ 		struct file_lock *fl = *before;
+@@ -793,6 +816,7 @@ find_conflict:
+ 		goto out;
+ 	locks_copy_lock(new_fl, request);
+ 	locks_insert_lock(before, new_fl);
++	vx_locks_inc(new_fl);
+ 	new_fl = NULL;
+ 	error = 0;
+ 
+@@ -803,7 +827,8 @@ out:
+ 	return error;
+ }
+ 
+-static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
++static int __posix_lock_file(struct inode *inode, struct file_lock *request,
++	struct file_lock *conflock, xid_t xid)
+ {
+ 	struct file_lock *fl;
+ 	struct file_lock *new_fl = NULL;
+@@ -813,6 +838,8 @@ static int __posix_lock_file(struct inod
+ 	struct file_lock **before;
+ 	int error, added = 0;
+ 
++	vxd_assert(xid == vx_current_xid(),
++		"xid(%d) == current(%d)", xid, vx_current_xid());
+ 	/*
+ 	 * We may need two file_lock structures for this operation,
+ 	 * so we get them in advance to avoid races.
+@@ -823,7 +850,11 @@ static int __posix_lock_file(struct inod
+ 	    (request->fl_type != F_UNLCK ||
+ 	     request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
+ 		new_fl = locks_alloc_lock();
++		new_fl->fl_xid = xid;
++		vx_locks_inc(new_fl);
+ 		new_fl2 = locks_alloc_lock();
++		new_fl2->fl_xid = xid;
++		vx_locks_inc(new_fl2);
+ 	}
+ 
+ 	lock_flocks();
+@@ -1022,7 +1053,8 @@ static int __posix_lock_file(struct inod
+ int posix_lock_file(struct file *filp, struct file_lock *fl,
+ 			struct file_lock *conflock)
+ {
+-	return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock);
++	return __posix_lock_file(filp->f_path.dentry->d_inode,
++		fl, conflock, filp->f_xid);
+ }
+ EXPORT_SYMBOL(posix_lock_file);
+ 
+@@ -1112,7 +1144,7 @@ int locks_mandatory_area(int read_write,
+ 	fl.fl_end = offset + count - 1;
+ 
+ 	for (;;) {
+-		error = __posix_lock_file(inode, &fl, NULL);
++		error = __posix_lock_file(inode, &fl, NULL, filp->f_xid);
+ 		if (error != FILE_LOCK_DEFERRED)
+ 			break;
+ 		error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
+@@ -1407,6 +1439,7 @@ int generic_add_lease(struct file *filp,
+ 		goto out;
+ 
+ 	locks_insert_lock(before, lease);
++	vx_locks_inc(lease);
+ 	return 0;
+ 
+ out:
+@@ -1847,6 +1880,11 @@ int fcntl_setlk(unsigned int fd, struct 
+ 	if (file_lock == NULL)
+ 		return -ENOLCK;
+ 
++	vxd_assert(filp->f_xid == vx_current_xid(),
++		"f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
++	file_lock->fl_xid = filp->f_xid;
++	vx_locks_inc(file_lock);
++
+ 	/*
+ 	 * This might block, so we do it before checking the inode.
+ 	 */
+@@ -1965,6 +2003,11 @@ int fcntl_setlk64(unsigned int fd, struc
+ 	if (file_lock == NULL)
+ 		return -ENOLCK;
+ 
++	vxd_assert(filp->f_xid == vx_current_xid(),
++		"f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
++	file_lock->fl_xid = filp->f_xid;
++	vx_locks_inc(file_lock);
++
+ 	/*
+ 	 * This might block, so we do it before checking the inode.
+ 	 */
+@@ -2230,8 +2273,11 @@ static int locks_show(struct seq_file *f
+ 
+ 	lock_get_status(f, fl, *((loff_t *)f->private), "");
+ 
+-	list_for_each_entry(bfl, &fl->fl_block, fl_block)
++	list_for_each_entry(bfl, &fl->fl_block, fl_block) {
++		if (!vx_check(fl->fl_xid, VS_WATCH_P | VS_IDENT))
++			continue;
+ 		lock_get_status(f, bfl, *((loff_t *)f->private), " ->");
++	}
+ 
+ 	return 0;
+ }
+diff -NurpP --minimal linux-3.2.34/fs/namei.c linux-3.2.34-vs2.3.2.15/fs/namei.c
+--- linux-3.2.34/fs/namei.c	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/namei.c	2012-03-23 14:48:48.000000000 +0100
+@@ -33,6 +33,14 @@
+ #include <linux/device_cgroup.h>
+ #include <linux/fs_struct.h>
+ #include <linux/posix_acl.h>
++#include <linux/proc_fs.h>
++#include <linux/vserver/inode.h>
++#include <linux/vs_base.h>
++#include <linux/vs_tag.h>
++#include <linux/vs_cowbl.h>
++#include <linux/vs_device.h>
++#include <linux/vs_context.h>
++#include <linux/pid_namespace.h>
+ #include <asm/uaccess.h>
+ 
+ #include "internal.h"
+@@ -222,6 +230,89 @@ static int check_acl(struct inode *inode
+ 	return -EAGAIN;
+ }
+ 
++static inline int dx_barrier(const struct inode *inode)
++{
++	if (IS_BARRIER(inode) && !vx_check(0, VS_ADMIN | VS_WATCH)) {
++		vxwprintk_task(1, "did hit the barrier.");
++		return 1;
++	}
++	return 0;
++}
++
++static int __dx_permission(const struct inode *inode, int mask)
++{
++	if (dx_barrier(inode))
++		return -EACCES;
++
++	if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) {
++		/* devpts is xid tagged */
++		if (S_ISDIR(inode->i_mode) ||
++		    vx_check((xid_t)inode->i_tag, VS_IDENT | VS_WATCH_P))
++			return 0;
++
++		/* just pretend we didn't find anything */
++		return -ENOENT;
++	}
++	else if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
++		struct proc_dir_entry *de = PDE(inode);
++
++		if (de && !vx_hide_check(0, de->vx_flags))
++			goto out;
++
++		if ((mask & (MAY_WRITE | MAY_APPEND))) {
++			struct pid *pid;
++			struct task_struct *tsk;
++
++			if (vx_check(0, VS_ADMIN | VS_WATCH_P) ||
++			    vx_flags(VXF_STATE_SETUP, 0))
++				return 0;
++
++			pid = PROC_I(inode)->pid;
++			if (!pid)
++				goto out;
++
++			rcu_read_lock();
++			tsk = pid_task(pid, PIDTYPE_PID);
++			vxdprintk(VXD_CBIT(tag, 0), "accessing %p[#%u]",
++				  tsk, (tsk ? vx_task_xid(tsk) : 0));
++			if (tsk &&
++				vx_check(vx_task_xid(tsk), VS_IDENT | VS_WATCH_P)) {
++				rcu_read_unlock();
++				return 0;
++			}
++			rcu_read_unlock();
++		}
++		else {
++			/* FIXME: Should we block some entries here? */
++			return 0;
++		}
++	}
++	else {
++		if (dx_notagcheck(inode->i_sb) ||
++		    dx_check(inode->i_tag, DX_HOSTID | DX_ADMIN | DX_WATCH |
++			     DX_IDENT))
++			return 0;
++	}
++
++out:
++	return -EACCES;
++}
++
++int dx_permission(const struct inode *inode, int mask)
++{
++	int ret = __dx_permission(inode, mask);
++	if (unlikely(ret)) {
++#ifndef	CONFIG_VSERVER_WARN_DEVPTS
++		if (inode->i_sb->s_magic != DEVPTS_SUPER_MAGIC)
++#endif
++		    vxwprintk_task(1,
++			"denied [0x%x] access to inode %s:%p[#%d,%lu]",
++			mask, inode->i_sb->s_id, inode, inode->i_tag,
++			inode->i_ino);
++	}
++	return ret;
++}
++
+ /*
+  * This does the basic permission checking
+  */
+@@ -357,10 +448,14 @@ int inode_permission(struct inode *inode
+ 		/*
+ 		 * Nobody gets write access to an immutable file.
+ 		 */
+-		if (IS_IMMUTABLE(inode))
++		if (IS_IMMUTABLE(inode) && !IS_COW(inode))
+ 			return -EACCES;
+ 	}
+ 
++	retval = dx_permission(inode, mask);
++	if (retval)
++		return retval;
++
+ 	retval = do_inode_permission(inode, mask);
+ 	if (retval)
+ 		return retval;
+@@ -1037,7 +1132,8 @@ static void follow_dotdot(struct nameida
+ 
+ 		if (nd->path.dentry == nd->root.dentry &&
+ 		    nd->path.mnt == nd->root.mnt) {
+-			break;
++			/* for sane '/' avoid follow_mount() */
++			return;
+ 		}
+ 		if (nd->path.dentry != nd->path.mnt->mnt_root) {
+ 			/* rare case of legitimate dget_parent()... */
+@@ -1148,6 +1244,9 @@ static int do_lookup(struct nameidata *n
+ 		}
+ 		if (unlikely(d_need_lookup(dentry)))
+ 			goto unlazy;
++
++		/* FIXME: check dx permission */
++
+ 		path->mnt = mnt;
+ 		path->dentry = dentry;
+ 		if (unlikely(!__follow_mount_rcu(nd, path, inode)))
+@@ -1209,6 +1308,8 @@ retry:
+ 		}
+ 	}
+ 
++	/* FIXME: check dx permission */
++
+ 	path->mnt = mnt;
+ 	path->dentry = dentry;
+ 	err = follow_managed(path, nd->flags);
+@@ -1903,7 +2004,7 @@ static int may_delete(struct inode *dir,
+ 	if (IS_APPEND(dir))
+ 		return -EPERM;
+ 	if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
+-	    IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
++		IS_IXORUNLINK(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+ 		return -EPERM;
+ 	if (isdir) {
+ 		if (!S_ISDIR(victim->d_inode->i_mode))
+@@ -1983,19 +2084,25 @@ int vfs_create(struct inode *dir, struct
+ {
+ 	int error = may_create(dir, dentry);
+ 
+-	if (error)
++	if (error) {
++		vxdprintk(VXD_CBIT(misc, 3), "may_create failed with %d", error);
+ 		return error;
++	}
+ 
+ 	if (!dir->i_op->create)
+ 		return -EACCES;	/* shouldn't it be ENOSYS? */
+ 	mode &= S_IALLUGO;
+ 	mode |= S_IFREG;
+ 	error = security_inode_create(dir, dentry, mode);
+-	if (error)
++	if (error) {
++		vxdprintk(VXD_CBIT(misc, 3), "security_inode_create failed with %d", error);
+ 		return error;
++	}
+ 	error = dir->i_op->create(dir, dentry, mode, nd);
+ 	if (!error)
+ 		fsnotify_create(dir, dentry);
++	else
++		vxdprintk(VXD_CBIT(misc, 3), "i_op->create failed with %d", error);
+ 	return error;
+ }
+ 
+@@ -2030,6 +2137,15 @@ static int may_open(struct path *path, i
+ 		break;
+ 	}
+ 
++#ifdef	CONFIG_VSERVER_COWBL
++	if (IS_COW(inode) &&
++		((flag & O_ACCMODE) != O_RDONLY)) {
++		if (IS_COW_LINK(inode))
++			return -EMLINK;
++		inode->i_flags &= ~(S_IXUNLINK|S_IMMUTABLE);
++		mark_inode_dirty(inode);
++	}
++#endif
+ 	error = inode_permission(inode, acc_mode);
+ 	if (error)
+ 		return error;
+@@ -2254,6 +2370,16 @@ ok:
+ 	}
+ common:
+ 	error = may_open(&nd->path, acc_mode, open_flag);
++#ifdef	CONFIG_VSERVER_COWBL
++	if (error == -EMLINK) {
++		struct dentry *dentry;
++		dentry = cow_break_link(pathname);
++		if (IS_ERR(dentry))
++			error = PTR_ERR(dentry);
++		else
++			dput(dentry);
++	}
++#endif
+ 	if (error)
+ 		goto exit;
+ 	filp = nameidata_to_filp(nd);
+@@ -2296,6 +2422,7 @@ static struct file *path_openat(int dfd,
+ 	struct path path;
+ 	int error;
+ 
++restart:
+ 	filp = get_empty_filp();
+ 	if (!filp)
+ 		return ERR_PTR(-ENFILE);
+@@ -2333,6 +2460,17 @@ static struct file *path_openat(int dfd,
+ 			filp = do_last(nd, &path, op, pathname);
+ 		put_link(nd, &link, cookie);
+ 	}
++
++#ifdef	CONFIG_VSERVER_COWBL
++	if (filp == ERR_PTR(-EMLINK)) {
++		if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
++			path_put(&nd->root);
++		if (base)
++			fput(base);
++		release_open_intent(nd);
++		goto restart;
++	}
++#endif
+ out:
+ 	if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
+ 		path_put(&nd->root);
+@@ -2422,6 +2560,11 @@ struct dentry *kern_path_create(int dfd,
+ 		goto fail;
+ 	}
+ 	*path = nd.path;
++	vxdprintk(VXD_CBIT(misc, 3), "kern_path_create path.dentry = %p (%.*s), dentry = %p (%.*s), d_inode = %p",
++		path->dentry, path->dentry->d_name.len,
++		path->dentry->d_name.name, dentry,
++		dentry->d_name.len, dentry->d_name.name,
++		path->dentry->d_inode);
+ 	return dentry;
+ eexist:
+ 	dput(dentry);
+@@ -2903,7 +3046,7 @@ int vfs_link(struct dentry *old_dentry, 
+ 	/*
+ 	 * A link to an append-only or immutable file cannot be created.
+ 	 */
+-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
++	if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
+ 		return -EPERM;
+ 	if (!dir->i_op->link)
+ 		return -EPERM;
+@@ -3284,6 +3427,227 @@ int vfs_follow_link(struct nameidata *nd
+ 	return __vfs_follow_link(nd, link);
+ }
+ 
++
++#ifdef	CONFIG_VSERVER_COWBL
++
++static inline
++long do_cow_splice(struct file *in, struct file *out, size_t len)
++{
++	loff_t ppos = 0;
++
++	return do_splice_direct(in, &ppos, out, len, 0);
++}
++
++struct dentry *cow_break_link(const char *pathname)
++{
++	int ret, mode, pathlen, redo = 0;
++	struct nameidata old_nd, dir_nd;
++	struct path old_path, dir_path;
++	struct dentry *dir, *old_dentry, *new_dentry = NULL;
++	struct file *old_file;
++	struct file *new_file;
++	char *to, *path, pad='\251';
++	loff_t size;
++
++	vxdprintk(VXD_CBIT(misc, 1),
++		"cow_break_link(" VS_Q("%s") ")", pathname);
++	path = kmalloc(PATH_MAX, GFP_KERNEL);
++	ret = -ENOMEM;
++	if (!path)
++		goto out;
++
++	/* old_nd will have refs to dentry and mnt */
++	ret = do_path_lookup(AT_FDCWD, pathname, LOOKUP_FOLLOW, &old_nd);
++	vxdprintk(VXD_CBIT(misc, 2),
++		"do_path_lookup(old): %d [r=%d]",
++		ret, mnt_get_count(old_nd.path.mnt));
++	if (ret < 0)
++		goto out_free_path;
++
++	old_path = old_nd.path;
++	old_dentry = old_path.dentry;
++	mode = old_dentry->d_inode->i_mode;
++
++	to = d_path(&old_path, path, PATH_MAX-2);
++	pathlen = strlen(to);
++	vxdprintk(VXD_CBIT(misc, 2),
++		"old path " VS_Q("%s") " [%p:" VS_Q("%.*s") ":%d]", to,
++		old_dentry,
++		old_dentry->d_name.len, old_dentry->d_name.name,
++		old_dentry->d_name.len);
++
++	to[pathlen + 1] = 0;
++retry:
++	new_dentry = NULL;
++	to[pathlen] = pad--;
++	ret = -ELOOP;
++	if (pad <= '\240')
++		goto out_rel_old;
++
++	vxdprintk(VXD_CBIT(misc, 1), "temp copy " VS_Q("%s"), to);
++	/* dir_nd will have refs to dentry and mnt */
++	ret = do_path_lookup(AT_FDCWD, to,
++		LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE, &dir_nd);
++	vxdprintk(VXD_CBIT(misc, 2), "do_path_lookup(new): %d", ret);
++	if (ret < 0)
++		goto retry;
++
++	/* this puppy downs the dir inode mutex if successful */
++	new_dentry = kern_path_create(AT_FDCWD, to, &dir_path, 0);
++	if (!new_dentry || IS_ERR(new_dentry)) {
++		path_put(&dir_nd.path);
++		vxdprintk(VXD_CBIT(misc, 2),
++			"kern_path_create(new) failed with %ld",
++			PTR_ERR(new_dentry));
++		goto retry;
++	}
++	path_put(&dir_path);
++	vxdprintk(VXD_CBIT(misc, 2),
++		"kern_path_create(new): %p [" VS_Q("%.*s") ":%d]",
++		new_dentry,
++		new_dentry->d_name.len, new_dentry->d_name.name,
++		new_dentry->d_name.len);
++
++	dir = dir_nd.path.dentry;
++
++	ret = vfs_create(dir->d_inode, new_dentry, mode, &dir_nd);
++	vxdprintk(VXD_CBIT(misc, 2),
++		"vfs_create(new): %d", ret);
++	if (ret == -EEXIST) {
++		mutex_unlock(&dir->d_inode->i_mutex);
++		path_put(&dir_nd.path);
++		dput(new_dentry);
++		goto retry;
++	}
++	else if (ret < 0)
++		goto out_unlock_new;
++
++	/* drop out early, ret passes ENOENT */
++	ret = -ENOENT;
++	if ((redo = d_unhashed(old_dentry)))
++		goto out_unlock_new;
++
++	path_get(&old_path);
++	/* this one cleans up the dentry/mnt in case of failure */
++	old_file = dentry_open(old_dentry, old_path.mnt,
++		O_RDONLY, current_cred());
++	vxdprintk(VXD_CBIT(misc, 2),
++		"dentry_open(old): %p", old_file);
++	if (IS_ERR(old_file)) {
++		ret = PTR_ERR(old_file);
++		goto out_unlock_new;
++	}
++
++	dget(new_dentry);
++	mntget(old_path.mnt);
++	/* this one cleans up the dentry/mnt in case of failure */
++	new_file = dentry_open(new_dentry, old_path.mnt,
++		O_WRONLY, current_cred());
++	vxdprintk(VXD_CBIT(misc, 2),
++		"dentry_open(new): %p", new_file);
++	if (IS_ERR(new_file)) {
++		ret = PTR_ERR(new_file);
++		goto out_fput_old;
++	}
++
++	size = i_size_read(old_file->f_dentry->d_inode);
++	ret = do_cow_splice(old_file, new_file, size);
++	vxdprintk(VXD_CBIT(misc, 2), "do_splice_direct: %d", ret);
++	if (ret < 0) {
++		goto out_fput_both;
++	} else if (ret < size) {
++		ret = -ENOSPC;
++		goto out_fput_both;
++	} else {
++		struct inode *old_inode = old_dentry->d_inode;
++		struct inode *new_inode = new_dentry->d_inode;
++		struct iattr attr = {
++			.ia_uid = old_inode->i_uid,
++			.ia_gid = old_inode->i_gid,
++			.ia_valid = ATTR_UID | ATTR_GID
++			};
++
++		setattr_copy(new_inode, &attr);
++		mark_inode_dirty(new_inode);
++	}
++
++	mutex_lock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
++
++	/* drop out late */
++	ret = -ENOENT;
++	if ((redo = d_unhashed(old_dentry)))
++		goto out_unlock;
++
++	vxdprintk(VXD_CBIT(misc, 2),
++		"vfs_rename: [" VS_Q("%*s") ":%d] -> [" VS_Q("%*s") ":%d]",
++		new_dentry->d_name.len, new_dentry->d_name.name,
++		new_dentry->d_name.len,
++		old_dentry->d_name.len, old_dentry->d_name.name,
++		old_dentry->d_name.len);
++	ret = vfs_rename(dir_nd.path.dentry->d_inode, new_dentry,
++		old_dentry->d_parent->d_inode, old_dentry);
++	vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret);
++
++out_unlock:
++	mutex_unlock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
++
++out_fput_both:
++	vxdprintk(VXD_CBIT(misc, 3),
++		"fput(new_file=%p[#%ld])", new_file,
++		atomic_long_read(&new_file->f_count));
++	fput(new_file);
++
++out_fput_old:
++	vxdprintk(VXD_CBIT(misc, 3),
++		"fput(old_file=%p[#%ld])", old_file,
++		atomic_long_read(&old_file->f_count));
++	fput(old_file);
++
++out_unlock_new:
++	mutex_unlock(&dir->d_inode->i_mutex);
++	if (!ret)
++		goto out_redo;
++
++	/* error path cleanup */
++	vfs_unlink(dir->d_inode, new_dentry);
++
++out_redo:
++	if (!redo)
++		goto out_rel_both;
++	/* lookup dentry once again */
++	/* old_nd.path is freed as old_path in out_rel_old */
++	ret = do_path_lookup(AT_FDCWD, pathname, LOOKUP_FOLLOW, &old_nd);
++	if (ret)
++		goto out_rel_both;
++
++	dput(new_dentry);
++	new_dentry = old_nd.path.dentry;
++	vxdprintk(VXD_CBIT(misc, 2),
++		"do_path_lookup(redo): %p [" VS_Q("%.*s") ":%d]",
++		new_dentry,
++		new_dentry->d_name.len, new_dentry->d_name.name,
++		new_dentry->d_name.len);
++	dget(new_dentry);
++
++out_rel_both:
++	path_put(&dir_nd.path);
++out_rel_old:
++	path_put(&old_path);
++out_free_path:
++	kfree(path);
++out:
++	if (ret) {
++		dput(new_dentry);
++		new_dentry = ERR_PTR(ret);
++	}
++	vxdprintk(VXD_CBIT(misc, 3),
++		"cow_break_link returning with %p [r=%d]",
++		new_dentry, mnt_get_count(old_nd.path.mnt));
++	return new_dentry;
++}
++
++#endif
++
+ /* get the link contents into pagecache */
+ static char *page_getlink(struct dentry * dentry, struct page **ppage)
+ {
+diff -NurpP --minimal linux-3.2.34/fs/namespace.c linux-3.2.34-vs2.3.2.15/fs/namespace.c
+--- linux-3.2.34/fs/namespace.c	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/namespace.c	2012-06-14 20:45:24.000000000 +0200
+@@ -31,6 +31,11 @@
+ #include <linux/idr.h>
+ #include <linux/fs_struct.h>
+ #include <linux/fsnotify.h>
++#include <linux/vs_base.h>
++#include <linux/vs_context.h>
++#include <linux/vs_tag.h>
++#include <linux/vserver/space.h>
++#include <linux/vserver/global.h>
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+ #include "pnode.h"
+@@ -679,6 +684,10 @@ vfs_kern_mount(struct file_system_type *
+ 	if (!type)
+ 		return ERR_PTR(-ENODEV);
+ 
++	if ((type->fs_flags & FS_BINARY_MOUNTDATA) &&
++		!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT))
++		return ERR_PTR(-EPERM);
++
+ 	mnt = alloc_vfsmnt(name);
+ 	if (!mnt)
+ 		return ERR_PTR(-ENOMEM);
+@@ -724,6 +733,7 @@ static struct vfsmount *clone_mnt(struct
+ 		mnt->mnt_root = dget(root);
+ 		mnt->mnt_mountpoint = mnt->mnt_root;
+ 		mnt->mnt_parent = mnt;
++		mnt->mnt_tag = old->mnt_tag;
+ 
+ 		if (flag & CL_SLAVE) {
+ 			list_add(&mnt->mnt_slave, &old->mnt_slave_list);
+@@ -852,6 +862,31 @@ static inline void mangle(struct seq_fil
+ 	seq_escape(m, s, " \t\n\\");
+ }
+ 
++static int mnt_is_reachable(struct vfsmount *mnt)
++{
++	struct path root;
++	struct dentry *point;
++	int ret;
++
++	if (mnt == mnt->mnt_ns->root)
++		return 1;
++
++	br_read_lock(vfsmount_lock);
++	root = current->fs->root;
++	point = root.dentry;
++
++	while ((mnt != mnt->mnt_parent) && (mnt != root.mnt)) {
++		point = mnt->mnt_mountpoint;
++		mnt = mnt->mnt_parent;
++	}
++
++	ret = (mnt == root.mnt) && is_subdir(point, root.dentry);
++
++	br_read_unlock(vfsmount_lock);
++
++	return ret;
++}
++
+ /*
+  * Simple .show_options callback for filesystems which don't want to
+  * implement more complex mount option showing.
+@@ -954,6 +989,8 @@ static int show_sb_opts(struct seq_file 
+ 		{ MS_SYNCHRONOUS, ",sync" },
+ 		{ MS_DIRSYNC, ",dirsync" },
+ 		{ MS_MANDLOCK, ",mand" },
++		{ MS_TAGGED, ",tag" },
++		{ MS_NOTAGCHECK, ",notagcheck" },
+ 		{ 0, NULL }
+ 	};
+ 	const struct proc_fs_info *fs_infop;
+@@ -1000,16 +1037,26 @@ static int show_vfsmnt(struct seq_file *
+ 	int err = 0;
+ 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
+ 
+-	if (mnt->mnt_sb->s_op->show_devname) {
+-		err = mnt->mnt_sb->s_op->show_devname(m, mnt);
+-		if (err)
+-			goto out;
++	if (vx_flags(VXF_HIDE_MOUNT, 0))
++		return SEQ_SKIP;
++	if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
++		return SEQ_SKIP;
++
++	if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
++		mnt == current->fs->root.mnt) {
++		seq_puts(m, "/dev/root / ");
+ 	} else {
+-		mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
++		if (mnt->mnt_sb->s_op->show_devname) {
++			err = mnt->mnt_sb->s_op->show_devname(m, mnt);
++			if (err)
++				goto out;
++		} else {
++			mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
++		}
++		seq_putc(m, ' ');
++		seq_path(m, &mnt_path, " \t\n\\");
++		seq_putc(m, ' ');
+ 	}
+-	seq_putc(m, ' ');
+-	seq_path(m, &mnt_path, " \t\n\\");
+-	seq_putc(m, ' ');
+ 	show_type(m, mnt->mnt_sb);
+ 	seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
+ 	err = show_sb_opts(m, mnt->mnt_sb);
+@@ -1039,6 +1086,11 @@ static int show_mountinfo(struct seq_fil
+ 	struct path root = p->root;
+ 	int err = 0;
+ 
++	if (vx_flags(VXF_HIDE_MOUNT, 0))
++		return SEQ_SKIP;
++	if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
++		return SEQ_SKIP;
++
+ 	seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
+ 		   MAJOR(sb->s_dev), MINOR(sb->s_dev));
+ 	if (sb->s_op->show_path)
+@@ -1104,22 +1156,32 @@ static int show_vfsstat(struct seq_file 
+ 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
+ 	int err = 0;
+ 
+-	/* device */
+-	if (mnt->mnt_sb->s_op->show_devname) {
+-		seq_puts(m, "device ");
+-		err = mnt->mnt_sb->s_op->show_devname(m, mnt);
++	if (vx_flags(VXF_HIDE_MOUNT, 0))
++		return SEQ_SKIP;
++	if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
++		return SEQ_SKIP;
++
++	if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
++		mnt == current->fs->root.mnt) {
++		seq_puts(m, "device /dev/root mounted on / ");
+ 	} else {
+-		if (mnt->mnt_devname) {
++		/* device */
++		if (mnt->mnt_sb->s_op->show_devname) {
+ 			seq_puts(m, "device ");
+-			mangle(m, mnt->mnt_devname);
+-		} else
+-			seq_puts(m, "no device");
+-	}
++			err = mnt->mnt_sb->s_op->show_devname(m, mnt);
++		} else {
++			if (mnt->mnt_devname) {
++				seq_puts(m, "device ");
++				mangle(m, mnt->mnt_devname);
++			} else
++				seq_puts(m, "no device");
++		}
+ 
+-	/* mount point */
+-	seq_puts(m, " mounted on ");
+-	seq_path(m, &mnt_path, " \t\n\\");
+-	seq_putc(m, ' ');
++		/* mount point */
++		seq_puts(m, " mounted on ");
++		seq_path(m, &mnt_path, " \t\n\\");
++		seq_putc(m, ' ');
++	}
+ 
+ 	/* file system type */
+ 	seq_puts(m, "with fstype ");
+@@ -1379,7 +1441,7 @@ SYSCALL_DEFINE2(umount, char __user *, n
+ 		goto dput_and_out;
+ 
+ 	retval = -EPERM;
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
+ 		goto dput_and_out;
+ 
+ 	retval = do_umount(path.mnt, flags);
+@@ -1405,7 +1467,7 @@ SYSCALL_DEFINE1(oldumount, char __user *
+ 
+ static int mount_is_safe(struct path *path)
+ {
+-	if (capable(CAP_SYS_ADMIN))
++	if (vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
+ 		return 0;
+ 	return -EPERM;
+ #ifdef notyet
+@@ -1715,7 +1777,7 @@ static int do_change_type(struct path *p
+ 	int type;
+ 	int err = 0;
+ 
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!vx_capable(CAP_SYS_ADMIN, VXC_NAMESPACE))
+ 		return -EPERM;
+ 
+ 	if (path->dentry != path->mnt->mnt_root)
+@@ -1731,6 +1793,7 @@ static int do_change_type(struct path *p
+ 		if (err)
+ 			goto out_unlock;
+ 	}
++	// mnt->mnt_flags = mnt_flags;
+ 
+ 	br_write_lock(vfsmount_lock);
+ 	for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
+@@ -1746,12 +1809,14 @@ static int do_change_type(struct path *p
+  * do loopback mount.
+  */
+ static int do_loopback(struct path *path, char *old_name,
+-				int recurse)
++	tag_t tag, unsigned long flags, int mnt_flags)
+ {
+ 	LIST_HEAD(umount_list);
+ 	struct path old_path;
+ 	struct vfsmount *mnt = NULL;
+ 	int err = mount_is_safe(path);
++	int recurse = flags & MS_REC;
++
+ 	if (err)
+ 		return err;
+ 	if (!old_name || !*old_name)
+@@ -1817,12 +1882,12 @@ static int change_mount_flags(struct vfs
+  * on it - tough luck.
+  */
+ static int do_remount(struct path *path, int flags, int mnt_flags,
+-		      void *data)
++	void *data, xid_t xid)
+ {
+ 	int err;
+ 	struct super_block *sb = path->mnt->mnt_sb;
+ 
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_REMOUNT))
+ 		return -EPERM;
+ 
+ 	if (!check_mnt(path->mnt))
+@@ -1870,7 +1935,7 @@ static int do_move_mount(struct path *pa
+ 	struct path old_path, parent_path;
+ 	struct vfsmount *p;
+ 	int err = 0;
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
+ 		return -EPERM;
+ 	if (!old_name || !*old_name)
+ 		return -EINVAL;
+@@ -2021,7 +2086,7 @@ static int do_new_mount(struct path *pat
+ 		return -EINVAL;
+ 
+ 	/* we need capabilities... */
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
+ 		return -EPERM;
+ 
+ 	mnt = do_kern_mount(type, flags, name, data);
+@@ -2290,6 +2355,7 @@ long do_mount(char *dev_name, char *dir_
+ 	struct path path;
+ 	int retval = 0;
+ 	int mnt_flags = 0;
++	tag_t tag = 0;
+ 
+ 	/* Discard magic */
+ 	if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
+@@ -2317,6 +2383,12 @@ long do_mount(char *dev_name, char *dir_
+ 	if (!(flags & MS_NOATIME))
+ 		mnt_flags |= MNT_RELATIME;
+ 
++	if (dx_parse_tag(data_page, &tag, 1, &mnt_flags, &flags)) {
++		/* FIXME: bind and re-mounts get the tag flag? */
++		if (flags & (MS_BIND|MS_REMOUNT))
++			flags |= MS_TAGID;
++	}
++
+ 	/* Separate the per-mountpoint flags */
+ 	if (flags & MS_NOSUID)
+ 		mnt_flags |= MNT_NOSUID;
+@@ -2333,15 +2405,17 @@ long do_mount(char *dev_name, char *dir_
+ 	if (flags & MS_RDONLY)
+ 		mnt_flags |= MNT_READONLY;
+ 
++	if (!capable(CAP_SYS_ADMIN))
++		mnt_flags |= MNT_NODEV;
+ 	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
+ 		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
+ 		   MS_STRICTATIME);
+ 
+ 	if (flags & MS_REMOUNT)
+ 		retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
+-				    data_page);
++				    data_page, tag);
+ 	else if (flags & MS_BIND)
+-		retval = do_loopback(&path, dev_name, flags & MS_REC);
++		retval = do_loopback(&path, dev_name, tag, flags, mnt_flags);
+ 	else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
+ 		retval = do_change_type(&path, flags);
+ 	else if (flags & MS_MOVE)
+@@ -2441,6 +2515,7 @@ static struct mnt_namespace *dup_mnt_ns(
+ 		q = next_mnt(q, new_ns->root);
+ 	}
+ 	up_write(&namespace_sem);
++	atomic_inc(&vs_global_mnt_ns);
+ 
+ 	if (rootmnt)
+ 		mntput(rootmnt);
+@@ -2612,9 +2687,10 @@ SYSCALL_DEFINE2(pivot_root, const char _
+ 		goto out3;
+ 
+ 	error = -EINVAL;
+-	if (IS_MNT_SHARED(old.mnt) ||
++	if ((IS_MNT_SHARED(old.mnt) ||
+ 		IS_MNT_SHARED(new.mnt->mnt_parent) ||
+-		IS_MNT_SHARED(root.mnt->mnt_parent))
++		IS_MNT_SHARED(root.mnt->mnt_parent)) &&
++		!vx_flags(VXF_STATE_SETUP, 0))
+ 		goto out4;
+ 	if (!check_mnt(root.mnt) || !check_mnt(new.mnt))
+ 		goto out4;
+@@ -2746,6 +2822,7 @@ void put_mnt_ns(struct mnt_namespace *ns
+ 	br_write_unlock(vfsmount_lock);
+ 	up_write(&namespace_sem);
+ 	release_mounts(&umount_list);
++	atomic_dec(&vs_global_mnt_ns);
+ 	kfree(ns);
+ }
+ EXPORT_SYMBOL(put_mnt_ns);
+diff -NurpP --minimal linux-3.2.34/fs/nfs/client.c linux-3.2.34-vs2.3.2.15/fs/nfs/client.c
+--- linux-3.2.34/fs/nfs/client.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/nfs/client.c	2011-12-05 19:33:02.000000000 +0100
+@@ -779,6 +779,9 @@ static int nfs_init_server_rpcclient(str
+ 	if (server->flags & NFS_MOUNT_SOFT)
+ 		server->client->cl_softrtry = 1;
+ 
++	server->client->cl_tag = 0;
++	if (server->flags & NFS_MOUNT_TAGGED)
++		server->client->cl_tag = 1;
+ 	return 0;
+ }
+ 
+@@ -953,6 +956,10 @@ static void nfs_server_set_fsinfo(struct
+ 		server->acdirmin = server->acdirmax = 0;
+ 	}
+ 
++	/* FIXME: needs fsinfo
++	if (server->flags & NFS_MOUNT_TAGGED)
++		sb->s_flags |= MS_TAGGED;	*/
++
+ 	server->maxfilesize = fsinfo->maxfilesize;
+ 
+ 	server->time_delta = fsinfo->time_delta;
+diff -NurpP --minimal linux-3.2.34/fs/nfs/dir.c linux-3.2.34-vs2.3.2.15/fs/nfs/dir.c
+--- linux-3.2.34/fs/nfs/dir.c	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/nfs/dir.c	2012-09-16 18:25:50.000000000 +0200
+@@ -35,6 +35,7 @@
+ #include <linux/sched.h>
+ #include <linux/kmemleak.h>
+ #include <linux/xattr.h>
++#include <linux/vs_tag.h>
+ 
+ #include "delegation.h"
+ #include "iostat.h"
+@@ -1311,6 +1312,7 @@ static struct dentry *nfs_lookup(struct 
+ 	if (IS_ERR(res))
+ 		goto out_unblock_sillyrename;
+ 
++	dx_propagate_tag(nd, inode);
+ no_entry:
+ 	res = d_materialise_unique(dentry, inode);
+ 	if (res != NULL) {
+diff -NurpP --minimal linux-3.2.34/fs/nfs/inode.c linux-3.2.34-vs2.3.2.15/fs/nfs/inode.c
+--- linux-3.2.34/fs/nfs/inode.c	2012-11-18 18:42:21.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/nfs/inode.c	2012-10-22 12:59:52.000000000 +0200
+@@ -38,6 +38,7 @@
+ #include <linux/nfs_xdr.h>
+ #include <linux/slab.h>
+ #include <linux/compat.h>
++#include <linux/vs_tag.h>
+ 
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
+@@ -273,6 +274,8 @@ nfs_fhget(struct super_block *sb, struct
+ 	if (inode->i_state & I_NEW) {
+ 		struct nfs_inode *nfsi = NFS_I(inode);
+ 		unsigned long now = jiffies;
++		uid_t uid;
++		gid_t gid;
+ 
+ 		/* We set i_ino for the few things that still rely on it,
+ 		 * such as stat(2) */
+@@ -321,8 +324,8 @@ nfs_fhget(struct super_block *sb, struct
+ 		inode->i_version = 0;
+ 		inode->i_size = 0;
+ 		clear_nlink(inode);
+-		inode->i_uid = -2;
+-		inode->i_gid = -2;
++		uid = -2;
++		gid = -2;
+ 		inode->i_blocks = 0;
+ 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+ 
+@@ -359,13 +362,13 @@ nfs_fhget(struct super_block *sb, struct
+ 		else if (nfs_server_capable(inode, NFS_CAP_NLINK))
+ 			nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ 		if (fattr->valid & NFS_ATTR_FATTR_OWNER)
+-			inode->i_uid = fattr->uid;
++			uid = fattr->uid;
+ 		else if (nfs_server_capable(inode, NFS_CAP_OWNER))
+ 			nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+ 				| NFS_INO_INVALID_ACCESS
+ 				| NFS_INO_INVALID_ACL;
+ 		if (fattr->valid & NFS_ATTR_FATTR_GROUP)
+-			inode->i_gid = fattr->gid;
++			gid = fattr->gid;
+ 		else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
+ 			nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+ 				| NFS_INO_INVALID_ACCESS
+@@ -378,6 +381,11 @@ nfs_fhget(struct super_block *sb, struct
+ 			 */
+ 			inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
+ 		}
++		inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++		inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++		inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, 0);
++				/* maybe fattr->xid someday */
++
+ 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
+ 		nfsi->attrtimeo_timestamp = now;
+ 		nfsi->access_cache = RB_ROOT;
+@@ -494,6 +502,8 @@ void nfs_setattr_update_inode(struct ino
+ 			inode->i_uid = attr->ia_uid;
+ 		if ((attr->ia_valid & ATTR_GID) != 0)
+ 			inode->i_gid = attr->ia_gid;
++		if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
++			inode->i_tag = attr->ia_tag;
+ 		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ 		spin_unlock(&inode->i_lock);
+ 	}
+@@ -943,6 +953,9 @@ static int nfs_check_inode_attributes(st
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 	loff_t cur_size, new_isize;
+ 	unsigned long invalid = 0;
++	uid_t uid;
++	gid_t gid;
++	tag_t tag;
+ 
+ 
+ 	/* Has the inode gone and changed behind our back? */
+@@ -966,13 +979,18 @@ static int nfs_check_inode_attributes(st
+ 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+ 	}
+ 
++	uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid);
++	gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid);
++	tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0);
++
+ 	/* Have any file permissions changed? */
+ 	if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
+ 		invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
+-	if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid)
++	if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && uid != fattr->uid)
+ 		invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
+-	if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid)
++	if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && gid != fattr->gid)
+ 		invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
++		/* maybe check for tag too? */
+ 
+ 	/* Has the link count changed? */
+ 	if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
+@@ -1207,6 +1225,9 @@ static int nfs_update_inode(struct inode
+ 	unsigned long invalid = 0;
+ 	unsigned long now = jiffies;
+ 	unsigned long save_cache_validity;
++	uid_t uid;
++	gid_t gid;
++	tag_t tag;
+ 
+ 	dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
+ 			__func__, inode->i_sb->s_id, inode->i_ino,
+@@ -1314,6 +1335,9 @@ static int nfs_update_inode(struct inode
+ 				| NFS_INO_REVAL_PAGECACHE
+ 				| NFS_INO_REVAL_FORCED);
+ 
++	uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
++	gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
++	tag = inode->i_tag;
+ 
+ 	if (fattr->valid & NFS_ATTR_FATTR_ATIME)
+ 		memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
+@@ -1335,9 +1359,9 @@ static int nfs_update_inode(struct inode
+ 				| NFS_INO_REVAL_FORCED);
+ 
+ 	if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
+-		if (inode->i_uid != fattr->uid) {
++		if (uid != fattr->uid) {
+ 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+-			inode->i_uid = fattr->uid;
++			uid = fattr->uid;
+ 		}
+ 	} else if (server->caps & NFS_CAP_OWNER)
+ 		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+@@ -1346,9 +1370,9 @@ static int nfs_update_inode(struct inode
+ 				| NFS_INO_REVAL_FORCED);
+ 
+ 	if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
+-		if (inode->i_gid != fattr->gid) {
++		if (gid != fattr->gid) {
+ 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+-			inode->i_gid = fattr->gid;
++			gid = fattr->gid;
+ 		}
+ 	} else if (server->caps & NFS_CAP_OWNER_GROUP)
+ 		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+@@ -1356,6 +1380,10 @@ static int nfs_update_inode(struct inode
+ 				| NFS_INO_INVALID_ACL
+ 				| NFS_INO_REVAL_FORCED);
+ 
++	inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++	inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++	inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, tag);
++
+ 	if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
+ 		if (inode->i_nlink != fattr->nlink) {
+ 			invalid |= NFS_INO_INVALID_ATTR;
+diff -NurpP --minimal linux-3.2.34/fs/nfs/nfs3xdr.c linux-3.2.34-vs2.3.2.15/fs/nfs/nfs3xdr.c
+--- linux-3.2.34/fs/nfs/nfs3xdr.c	2011-03-15 18:07:32.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/nfs/nfs3xdr.c	2011-12-05 19:33:02.000000000 +0100
+@@ -20,6 +20,7 @@
+ #include <linux/nfs3.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfsacl.h>
++#include <linux/vs_tag.h>
+ #include "internal.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_XDR
+@@ -562,7 +563,8 @@ static __be32 *xdr_decode_nfstime3(__be3
+  *		set_mtime	mtime;
+  *	};
+  */
+-static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
++static void encode_sattr3(struct xdr_stream *xdr,
++	const struct iattr *attr, int tag)
+ {
+ 	u32 nbytes;
+ 	__be32 *p;
+@@ -594,15 +596,19 @@ static void encode_sattr3(struct xdr_str
+ 	} else
+ 		*p++ = xdr_zero;
+ 
+-	if (attr->ia_valid & ATTR_UID) {
++	if (attr->ia_valid & ATTR_UID ||
++		(tag && (attr->ia_valid & ATTR_TAG))) {
+ 		*p++ = xdr_one;
+-		*p++ = cpu_to_be32(attr->ia_uid);
++		*p++ = cpu_to_be32(TAGINO_UID(tag,
++			attr->ia_uid, attr->ia_tag));
+ 	} else
+ 		*p++ = xdr_zero;
+ 
+-	if (attr->ia_valid & ATTR_GID) {
++	if (attr->ia_valid & ATTR_GID ||
++		(tag && (attr->ia_valid & ATTR_TAG))) {
+ 		*p++ = xdr_one;
+-		*p++ = cpu_to_be32(attr->ia_gid);
++		*p++ = cpu_to_be32(TAGINO_GID(tag,
++			attr->ia_gid, attr->ia_tag));
+ 	} else
+ 		*p++ = xdr_zero;
+ 
+@@ -878,7 +884,7 @@ static void nfs3_xdr_enc_setattr3args(st
+ 				      const struct nfs3_sattrargs *args)
+ {
+ 	encode_nfs_fh3(xdr, args->fh);
+-	encode_sattr3(xdr, args->sattr);
++	encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
+ 	encode_sattrguard3(xdr, args);
+ }
+ 
+@@ -1028,13 +1034,13 @@ static void nfs3_xdr_enc_write3args(stru
+  *	};
+  */
+ static void encode_createhow3(struct xdr_stream *xdr,
+-			      const struct nfs3_createargs *args)
++	const struct nfs3_createargs *args, int tag)
+ {
+ 	encode_uint32(xdr, args->createmode);
+ 	switch (args->createmode) {
+ 	case NFS3_CREATE_UNCHECKED:
+ 	case NFS3_CREATE_GUARDED:
+-		encode_sattr3(xdr, args->sattr);
++		encode_sattr3(xdr, args->sattr, tag);
+ 		break;
+ 	case NFS3_CREATE_EXCLUSIVE:
+ 		encode_createverf3(xdr, args->verifier);
+@@ -1049,7 +1055,7 @@ static void nfs3_xdr_enc_create3args(str
+ 				     const struct nfs3_createargs *args)
+ {
+ 	encode_diropargs3(xdr, args->fh, args->name, args->len);
+-	encode_createhow3(xdr, args);
++	encode_createhow3(xdr, args, req->rq_task->tk_client->cl_tag);
+ }
+ 
+ /*
+@@ -1065,7 +1071,7 @@ static void nfs3_xdr_enc_mkdir3args(stru
+ 				    const struct nfs3_mkdirargs *args)
+ {
+ 	encode_diropargs3(xdr, args->fh, args->name, args->len);
+-	encode_sattr3(xdr, args->sattr);
++	encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
+ }
+ 
+ /*
+@@ -1082,9 +1088,9 @@ static void nfs3_xdr_enc_mkdir3args(stru
+  *	};
+  */
+ static void encode_symlinkdata3(struct xdr_stream *xdr,
+-				const struct nfs3_symlinkargs *args)
++	const struct nfs3_symlinkargs *args, int tag)
+ {
+-	encode_sattr3(xdr, args->sattr);
++	encode_sattr3(xdr, args->sattr, tag);
+ 	encode_nfspath3(xdr, args->pages, args->pathlen);
+ }
+ 
+@@ -1093,7 +1099,7 @@ static void nfs3_xdr_enc_symlink3args(st
+ 				      const struct nfs3_symlinkargs *args)
+ {
+ 	encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
+-	encode_symlinkdata3(xdr, args);
++	encode_symlinkdata3(xdr, args, req->rq_task->tk_client->cl_tag);
+ }
+ 
+ /*
+@@ -1121,24 +1127,24 @@ static void nfs3_xdr_enc_symlink3args(st
+  *	};
+  */
+ static void encode_devicedata3(struct xdr_stream *xdr,
+-			       const struct nfs3_mknodargs *args)
++	const struct nfs3_mknodargs *args, int tag)
+ {
+-	encode_sattr3(xdr, args->sattr);
++	encode_sattr3(xdr, args->sattr, tag);
+ 	encode_specdata3(xdr, args->rdev);
+ }
+ 
+ static void encode_mknoddata3(struct xdr_stream *xdr,
+-			      const struct nfs3_mknodargs *args)
++	const struct nfs3_mknodargs *args, int tag)
+ {
+ 	encode_ftype3(xdr, args->type);
+ 	switch (args->type) {
+ 	case NF3CHR:
+ 	case NF3BLK:
+-		encode_devicedata3(xdr, args);
++		encode_devicedata3(xdr, args, tag);
+ 		break;
+ 	case NF3SOCK:
+ 	case NF3FIFO:
+-		encode_sattr3(xdr, args->sattr);
++		encode_sattr3(xdr, args->sattr, tag);
+ 		break;
+ 	case NF3REG:
+ 	case NF3DIR:
+@@ -1153,7 +1159,7 @@ static void nfs3_xdr_enc_mknod3args(stru
+ 				    const struct nfs3_mknodargs *args)
+ {
+ 	encode_diropargs3(xdr, args->fh, args->name, args->len);
+-	encode_mknoddata3(xdr, args);
++	encode_mknoddata3(xdr, args, req->rq_task->tk_client->cl_tag);
+ }
+ 
+ /*
+diff -NurpP --minimal linux-3.2.34/fs/nfs/super.c linux-3.2.34-vs2.3.2.15/fs/nfs/super.c
+--- linux-3.2.34/fs/nfs/super.c	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/nfs/super.c	2012-11-18 21:11:16.000000000 +0100
+@@ -53,6 +53,7 @@
+ #include <linux/nfs_xdr.h>
+ #include <linux/magic.h>
+ #include <linux/parser.h>
++#include <linux/vs_tag.h>
+ 
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
+@@ -87,6 +88,7 @@ enum {
+ 	Opt_sharecache, Opt_nosharecache,
+ 	Opt_resvport, Opt_noresvport,
+ 	Opt_fscache, Opt_nofscache,
++	Opt_tag, Opt_notag,
+ 
+ 	/* Mount options that take integer arguments */
+ 	Opt_port,
+@@ -100,6 +102,7 @@ enum {
+ 	Opt_mountvers,
+ 	Opt_nfsvers,
+ 	Opt_minorversion,
++	Opt_tagid,
+ 
+ 	/* Mount options that take string arguments */
+ 	Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
+@@ -180,6 +183,10 @@ static const match_table_t nfs_mount_opt
+ 	{ Opt_fscache_uniq, "fsc=%s" },
+ 	{ Opt_local_lock, "local_lock=%s" },
+ 
++	{ Opt_tag, "tag" },
++	{ Opt_notag, "notag" },
++	{ Opt_tagid, "tagid=%u" },
++
+ 	{ Opt_err, NULL }
+ };
+ 
+@@ -650,6 +657,7 @@ static void nfs_show_mount_options(struc
+ 		{ NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
+ 		{ NFS_MOUNT_UNSHARED, ",nosharecache", "" },
+ 		{ NFS_MOUNT_NORESVPORT, ",noresvport", "" },
++		{ NFS_MOUNT_TAGGED, ",tag", "" },
+ 		{ 0, NULL, NULL }
+ 	};
+ 	const struct proc_nfs_info *nfs_infop;
+@@ -1217,6 +1225,14 @@ static int nfs_parse_mount_options(char 
+ 			kfree(mnt->fscache_uniq);
+ 			mnt->fscache_uniq = NULL;
+ 			break;
++#ifndef CONFIG_TAGGING_NONE
++		case Opt_tag:
++			mnt->flags |= NFS_MOUNT_TAGGED;
++			break;
++		case Opt_notag:
++			mnt->flags &= ~NFS_MOUNT_TAGGED;
++			break;
++#endif
+ 
+ 		/*
+ 		 * options that take numeric values
+@@ -1323,6 +1339,12 @@ static int nfs_parse_mount_options(char 
+ 				goto out_invalid_value;
+ 			mnt->minorversion = option;
+ 			break;
++#ifdef CONFIG_PROPAGATE
++		case Opt_tagid:
++			/* use args[0] */
++			nfs_data.flags |= NFS_MOUNT_TAGGED;
++			break;
++#endif
+ 
+ 		/*
+ 		 * options that take text values
+diff -NurpP --minimal linux-3.2.34/fs/nfsd/auth.c linux-3.2.34-vs2.3.2.15/fs/nfsd/auth.c
+--- linux-3.2.34/fs/nfsd/auth.c	2010-02-25 11:52:05.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/nfsd/auth.c	2011-12-05 19:33:02.000000000 +0100
+@@ -1,6 +1,7 @@
+ /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
+ 
+ #include <linux/sched.h>
++#include <linux/vs_tag.h>
+ #include "nfsd.h"
+ #include "auth.h"
+ 
+@@ -36,6 +37,9 @@ int nfsd_setuser(struct svc_rqst *rqstp,
+ 
+ 	new->fsuid = rqstp->rq_cred.cr_uid;
+ 	new->fsgid = rqstp->rq_cred.cr_gid;
++	/* FIXME: this desperately needs a tag :)
++	new->xid = (xid_t)INOTAG_TAG(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid, 0);
++			*/
+ 
+ 	rqgi = rqstp->rq_cred.cr_group_info;
+ 
+diff -NurpP --minimal linux-3.2.34/fs/nfsd/nfs3xdr.c linux-3.2.34-vs2.3.2.15/fs/nfsd/nfs3xdr.c
+--- linux-3.2.34/fs/nfsd/nfs3xdr.c	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/nfsd/nfs3xdr.c	2012-05-15 18:16:52.000000000 +0200
+@@ -7,6 +7,7 @@
+  */
+ 
+ #include <linux/namei.h>
++#include <linux/vs_tag.h>
+ #include "xdr3.h"
+ #include "auth.h"
+ 
+@@ -95,6 +96,8 @@ static __be32 *
+ decode_sattr3(__be32 *p, struct iattr *iap)
+ {
+ 	u32	tmp;
++	uid_t	uid = 0;
++	gid_t	gid = 0;
+ 
+ 	iap->ia_valid = 0;
+ 
+@@ -104,12 +107,15 @@ decode_sattr3(__be32 *p, struct iattr *i
+ 	}
+ 	if (*p++) {
+ 		iap->ia_valid |= ATTR_UID;
+-		iap->ia_uid = ntohl(*p++);
++		uid = ntohl(*p++);
+ 	}
+ 	if (*p++) {
+ 		iap->ia_valid |= ATTR_GID;
+-		iap->ia_gid = ntohl(*p++);
++		gid = ntohl(*p++);
+ 	}
++	iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid);
++	iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid);
++	iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0);
+ 	if (*p++) {
+ 		u64	newsize;
+ 
+@@ -165,8 +171,12 @@ encode_fattr3(struct svc_rqst *rqstp, __
+ 	*p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
+ 	*p++ = htonl((u32) stat->mode);
+ 	*p++ = htonl((u32) stat->nlink);
+-	*p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
+-	*p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
++	*p++ = htonl((u32) nfsd_ruid(rqstp,
++		TAGINO_UID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
++		stat->uid, stat->tag)));
++	*p++ = htonl((u32) nfsd_rgid(rqstp,
++		TAGINO_GID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
++		stat->gid, stat->tag)));
+ 	if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
+ 		p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
+ 	} else {
+diff -NurpP --minimal linux-3.2.34/fs/nfsd/nfs4xdr.c linux-3.2.34-vs2.3.2.15/fs/nfsd/nfs4xdr.c
+--- linux-3.2.34/fs/nfsd/nfs4xdr.c	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/nfsd/nfs4xdr.c	2012-08-13 12:40:51.000000000 +0200
+@@ -46,6 +46,7 @@
+ #include <linux/utsname.h>
+ #include <linux/pagemap.h>
+ #include <linux/sunrpc/svcauth_gss.h>
++#include <linux/vs_tag.h>
+ 
+ #include "idmap.h"
+ #include "acl.h"
+@@ -2328,14 +2329,18 @@ out_acl:
+ 		WRITE32(stat.nlink);
+ 	}
+ 	if (bmval1 & FATTR4_WORD1_OWNER) {
+-		status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen);
++		status = nfsd4_encode_user(rqstp,
++			TAGINO_UID(DX_TAG(dentry->d_inode),
++			stat.uid, stat.tag), &p, &buflen);
+ 		if (status == nfserr_resource)
+ 			goto out_resource;
+ 		if (status)
+ 			goto out;
+ 	}
+ 	if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
+-		status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen);
++		status = nfsd4_encode_group(rqstp,
++			TAGINO_GID(DX_TAG(dentry->d_inode),
++			stat.gid, stat.tag), &p, &buflen);
+ 		if (status == nfserr_resource)
+ 			goto out_resource;
+ 		if (status)
+diff -NurpP --minimal linux-3.2.34/fs/nfsd/nfsxdr.c linux-3.2.34-vs2.3.2.15/fs/nfsd/nfsxdr.c
+--- linux-3.2.34/fs/nfsd/nfsxdr.c	2011-05-22 16:17:53.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/nfsd/nfsxdr.c	2011-12-05 19:33:02.000000000 +0100
+@@ -6,6 +6,7 @@
+ 
+ #include "xdr.h"
+ #include "auth.h"
++#include <linux/vs_tag.h>
+ 
+ #define NFSDDBG_FACILITY		NFSDDBG_XDR
+ 
+@@ -88,6 +89,8 @@ static __be32 *
+ decode_sattr(__be32 *p, struct iattr *iap)
+ {
+ 	u32	tmp, tmp1;
++	uid_t	uid = 0;
++	gid_t	gid = 0;
+ 
+ 	iap->ia_valid = 0;
+ 
+@@ -101,12 +104,15 @@ decode_sattr(__be32 *p, struct iattr *ia
+ 	}
+ 	if ((tmp = ntohl(*p++)) != (u32)-1) {
+ 		iap->ia_valid |= ATTR_UID;
+-		iap->ia_uid = tmp;
++		uid = tmp;
+ 	}
+ 	if ((tmp = ntohl(*p++)) != (u32)-1) {
+ 		iap->ia_valid |= ATTR_GID;
+-		iap->ia_gid = tmp;
++		gid = tmp;
+ 	}
++	iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid);
++	iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid);
++	iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0);
+ 	if ((tmp = ntohl(*p++)) != (u32)-1) {
+ 		iap->ia_valid |= ATTR_SIZE;
+ 		iap->ia_size = tmp;
+@@ -151,8 +157,10 @@ encode_fattr(struct svc_rqst *rqstp, __b
+ 	*p++ = htonl(nfs_ftypes[type >> 12]);
+ 	*p++ = htonl((u32) stat->mode);
+ 	*p++ = htonl((u32) stat->nlink);
+-	*p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
+-	*p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
++	*p++ = htonl((u32) nfsd_ruid(rqstp,
++		TAGINO_UID(DX_TAG(dentry->d_inode), stat->uid, stat->tag)));
++	*p++ = htonl((u32) nfsd_rgid(rqstp,
++		TAGINO_GID(DX_TAG(dentry->d_inode), stat->gid, stat->tag)));
+ 
+ 	if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
+ 		*p++ = htonl(NFS_MAXPATHLEN);
+diff -NurpP --minimal linux-3.2.34/fs/ocfs2/dlmglue.c linux-3.2.34-vs2.3.2.15/fs/ocfs2/dlmglue.c
+--- linux-3.2.34/fs/ocfs2/dlmglue.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ocfs2/dlmglue.c	2011-12-05 19:33:02.000000000 +0100
+@@ -2047,6 +2047,7 @@ static void __ocfs2_stuff_meta_lvb(struc
+ 	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
+ 	lvb->lvb_iuid      = cpu_to_be32(inode->i_uid);
+ 	lvb->lvb_igid      = cpu_to_be32(inode->i_gid);
++	lvb->lvb_itag      = cpu_to_be16(inode->i_tag);
+ 	lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
+ 	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
+ 	lvb->lvb_iatime_packed  =
+@@ -2097,6 +2098,7 @@ static void ocfs2_refresh_inode_from_lvb
+ 
+ 	inode->i_uid     = be32_to_cpu(lvb->lvb_iuid);
+ 	inode->i_gid     = be32_to_cpu(lvb->lvb_igid);
++	inode->i_tag     = be16_to_cpu(lvb->lvb_itag);
+ 	inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
+ 	set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
+ 	ocfs2_unpack_timespec(&inode->i_atime,
+diff -NurpP --minimal linux-3.2.34/fs/ocfs2/dlmglue.h linux-3.2.34-vs2.3.2.15/fs/ocfs2/dlmglue.h
+--- linux-3.2.34/fs/ocfs2/dlmglue.h	2010-10-21 13:07:50.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/ocfs2/dlmglue.h	2011-12-05 19:33:02.000000000 +0100
+@@ -46,7 +46,8 @@ struct ocfs2_meta_lvb {
+ 	__be16       lvb_inlink;
+ 	__be32       lvb_iattr;
+ 	__be32       lvb_igeneration;
+-	__be32       lvb_reserved2;
++	__be16       lvb_itag;
++	__be16       lvb_reserved2;
+ };
+ 
+ #define OCFS2_QINFO_LVB_VERSION 1
+diff -NurpP --minimal linux-3.2.34/fs/ocfs2/file.c linux-3.2.34-vs2.3.2.15/fs/ocfs2/file.c
+--- linux-3.2.34/fs/ocfs2/file.c	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ocfs2/file.c	2012-08-13 12:40:51.000000000 +0200
+@@ -1123,7 +1123,7 @@ int ocfs2_setattr(struct dentry *dentry,
+ 		attr->ia_valid &= ~ATTR_SIZE;
+ 
+ #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
+-			   | ATTR_GID | ATTR_UID | ATTR_MODE)
++			   | ATTR_GID | ATTR_UID | ATTR_TAG | ATTR_MODE)
+ 	if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
+ 		return 0;
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ocfs2/inode.c linux-3.2.34-vs2.3.2.15/fs/ocfs2/inode.c
+--- linux-3.2.34/fs/ocfs2/inode.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ocfs2/inode.c	2011-12-05 19:33:02.000000000 +0100
+@@ -28,6 +28,7 @@
+ #include <linux/highmem.h>
+ #include <linux/pagemap.h>
+ #include <linux/quotaops.h>
++#include <linux/vs_tag.h>
+ 
+ #include <asm/byteorder.h>
+ 
+@@ -78,11 +79,13 @@ void ocfs2_set_inode_flags(struct inode 
+ {
+ 	unsigned int flags = OCFS2_I(inode)->ip_attr;
+ 
+-	inode->i_flags &= ~(S_IMMUTABLE |
++	inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
+ 		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
+ 
+ 	if (flags & OCFS2_IMMUTABLE_FL)
+ 		inode->i_flags |= S_IMMUTABLE;
++	if (flags & OCFS2_IXUNLINK_FL)
++		inode->i_flags |= S_IXUNLINK;
+ 
+ 	if (flags & OCFS2_SYNC_FL)
+ 		inode->i_flags |= S_SYNC;
+@@ -92,25 +95,44 @@ void ocfs2_set_inode_flags(struct inode 
+ 		inode->i_flags |= S_NOATIME;
+ 	if (flags & OCFS2_DIRSYNC_FL)
+ 		inode->i_flags |= S_DIRSYNC;
++
++	inode->i_vflags &= ~(V_BARRIER | V_COW);
++
++	if (flags & OCFS2_BARRIER_FL)
++		inode->i_vflags |= V_BARRIER;
++	if (flags & OCFS2_COW_FL)
++		inode->i_vflags |= V_COW;
+ }
+ 
+ /* Propagate flags from i_flags to OCFS2_I(inode)->ip_attr */
+ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
+ {
+ 	unsigned int flags = oi->vfs_inode.i_flags;
++	unsigned int vflags = oi->vfs_inode.i_vflags;
++
++	oi->ip_attr &= ~(OCFS2_SYNC_FL | OCFS2_APPEND_FL |
++			OCFS2_IMMUTABLE_FL | OCFS2_IXUNLINK_FL |
++			OCFS2_NOATIME_FL | OCFS2_DIRSYNC_FL |
++			OCFS2_BARRIER_FL | OCFS2_COW_FL);
++
++	if (flags & S_IMMUTABLE)
++		oi->ip_attr |= OCFS2_IMMUTABLE_FL;
++	if (flags & S_IXUNLINK)
++		oi->ip_attr |= OCFS2_IXUNLINK_FL;
+ 
+-	oi->ip_attr &= ~(OCFS2_SYNC_FL|OCFS2_APPEND_FL|
+-			OCFS2_IMMUTABLE_FL|OCFS2_NOATIME_FL|OCFS2_DIRSYNC_FL);
+ 	if (flags & S_SYNC)
+ 		oi->ip_attr |= OCFS2_SYNC_FL;
+ 	if (flags & S_APPEND)
+ 		oi->ip_attr |= OCFS2_APPEND_FL;
+-	if (flags & S_IMMUTABLE)
+-		oi->ip_attr |= OCFS2_IMMUTABLE_FL;
+ 	if (flags & S_NOATIME)
+ 		oi->ip_attr |= OCFS2_NOATIME_FL;
+ 	if (flags & S_DIRSYNC)
+ 		oi->ip_attr |= OCFS2_DIRSYNC_FL;
++
++	if (vflags & V_BARRIER)
++		oi->ip_attr |= OCFS2_BARRIER_FL;
++	if (vflags & V_COW)
++		oi->ip_attr |= OCFS2_COW_FL;
+ }
+ 
+ struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
+@@ -241,6 +263,8 @@ void ocfs2_populate_inode(struct inode *
+ 	struct super_block *sb;
+ 	struct ocfs2_super *osb;
+ 	int use_plocks = 1;
++	uid_t uid;
++	gid_t gid;
+ 
+ 	sb = inode->i_sb;
+ 	osb = OCFS2_SB(sb);
+@@ -269,8 +293,12 @@ void ocfs2_populate_inode(struct inode *
+ 	inode->i_generation = le32_to_cpu(fe->i_generation);
+ 	inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
+ 	inode->i_mode = le16_to_cpu(fe->i_mode);
+-	inode->i_uid = le32_to_cpu(fe->i_uid);
+-	inode->i_gid = le32_to_cpu(fe->i_gid);
++	uid = le32_to_cpu(fe->i_uid);
++	gid = le32_to_cpu(fe->i_gid);
++	inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++	inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++	inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
++		/* le16_to_cpu(raw_inode->i_raw_tag)i */ 0);
+ 
+ 	/* Fast symlinks will have i_size but no allocated clusters. */
+ 	if (S_ISLNK(inode->i_mode) && !fe->i_clusters)
+diff -NurpP --minimal linux-3.2.34/fs/ocfs2/inode.h linux-3.2.34-vs2.3.2.15/fs/ocfs2/inode.h
+--- linux-3.2.34/fs/ocfs2/inode.h	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ocfs2/inode.h	2011-12-05 19:33:02.000000000 +0100
+@@ -154,6 +154,7 @@ struct buffer_head *ocfs2_bread(struct i
+ 
+ void ocfs2_set_inode_flags(struct inode *inode);
+ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
++int ocfs2_sync_flags(struct inode *inode, int, int);
+ 
+ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
+ {
+diff -NurpP --minimal linux-3.2.34/fs/ocfs2/ioctl.c linux-3.2.34-vs2.3.2.15/fs/ocfs2/ioctl.c
+--- linux-3.2.34/fs/ocfs2/ioctl.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ocfs2/ioctl.c	2011-12-05 19:33:02.000000000 +0100
+@@ -78,7 +78,41 @@ static int ocfs2_get_inode_attr(struct i
+ 	return status;
+ }
+ 
+-static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
++int ocfs2_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
++	struct buffer_head *bh = NULL;
++	handle_t *handle = NULL;
++	int status;
++
++	status = ocfs2_inode_lock(inode, &bh, 1);
++	if (status < 0) {
++		mlog_errno(status);
++		return status;
++	}
++	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
++	if (IS_ERR(handle)) {
++		status = PTR_ERR(handle);
++		mlog_errno(status);
++		goto bail_unlock;
++	}
++
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	ocfs2_get_inode_flags(OCFS2_I(inode));
++
++	status = ocfs2_mark_inode_dirty(handle, inode, bh);
++	if (status < 0)
++		mlog_errno(status);
++
++	ocfs2_commit_trans(osb, handle);
++bail_unlock:
++	ocfs2_inode_unlock(inode, 1);
++	brelse(bh);
++	return status;
++}
++
++int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
+ 				unsigned mask)
+ {
+ 	struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
+@@ -103,6 +137,11 @@ static int ocfs2_set_inode_attr(struct i
+ 	if (!S_ISDIR(inode->i_mode))
+ 		flags &= ~OCFS2_DIRSYNC_FL;
+ 
++	if (IS_BARRIER(inode)) {
++		vxwprintk_task(1, "messing with the barrier.");
++		goto bail_unlock;
++	}
++
+ 	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+ 	if (IS_ERR(handle)) {
+ 		status = PTR_ERR(handle);
+@@ -881,6 +920,7 @@ bail:
+ 	return status;
+ }
+ 
++
+ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_path.dentry->d_inode;
+diff -NurpP --minimal linux-3.2.34/fs/ocfs2/namei.c linux-3.2.34-vs2.3.2.15/fs/ocfs2/namei.c
+--- linux-3.2.34/fs/ocfs2/namei.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ocfs2/namei.c	2011-12-05 19:33:02.000000000 +0100
+@@ -41,6 +41,7 @@
+ #include <linux/slab.h>
+ #include <linux/highmem.h>
+ #include <linux/quotaops.h>
++#include <linux/vs_tag.h>
+ 
+ #include <cluster/masklog.h>
+ 
+@@ -475,6 +476,7 @@ static int __ocfs2_mknod_locked(struct i
+ 	struct ocfs2_dinode *fe = NULL;
+ 	struct ocfs2_extent_list *fel;
+ 	u16 feat;
++	tag_t tag;
+ 
+ 	*new_fe_bh = NULL;
+ 
+@@ -512,8 +514,11 @@ static int __ocfs2_mknod_locked(struct i
+ 	fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
+ 	fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
+ 	fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
+-	fe->i_uid = cpu_to_le32(inode->i_uid);
+-	fe->i_gid = cpu_to_le32(inode->i_gid);
++
++	tag = dx_current_fstag(osb->sb);
++	fe->i_uid = cpu_to_le32(TAGINO_UID(DX_TAG(inode), inode->i_uid, tag));
++	fe->i_gid = cpu_to_le32(TAGINO_GID(DX_TAG(inode), inode->i_gid, tag));
++	inode->i_tag = tag;
+ 	fe->i_mode = cpu_to_le16(inode->i_mode);
+ 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ 		fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
+diff -NurpP --minimal linux-3.2.34/fs/ocfs2/ocfs2.h linux-3.2.34-vs2.3.2.15/fs/ocfs2/ocfs2.h
+--- linux-3.2.34/fs/ocfs2/ocfs2.h	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ocfs2/ocfs2.h	2011-12-05 19:33:02.000000000 +0100
+@@ -272,6 +272,7 @@ enum ocfs2_mount_options
+ 						     writes */
+ 	OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */
+ 	OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */
++	OCFS2_MOUNT_TAGGED = 1 << 15, /* use tagging */
+ };
+ 
+ #define OCFS2_OSB_SOFT_RO			0x0001
+diff -NurpP --minimal linux-3.2.34/fs/ocfs2/ocfs2_fs.h linux-3.2.34-vs2.3.2.15/fs/ocfs2/ocfs2_fs.h
+--- linux-3.2.34/fs/ocfs2/ocfs2_fs.h	2011-05-22 16:17:53.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/ocfs2/ocfs2_fs.h	2011-12-05 19:33:02.000000000 +0100
+@@ -266,6 +266,11 @@
+ #define OCFS2_TOPDIR_FL			FS_TOPDIR_FL	/* Top of directory hierarchies*/
+ #define OCFS2_RESERVED_FL		FS_RESERVED_FL	/* reserved for ext2 lib */
+ 
++#define OCFS2_IXUNLINK_FL		FS_IXUNLINK_FL	/* Immutable invert on unlink */
++
++#define OCFS2_BARRIER_FL		FS_BARRIER_FL	/* Barrier for chroot() */
++#define OCFS2_COW_FL			FS_COW_FL	/* Copy on Write marker */
++
+ #define OCFS2_FL_VISIBLE		FS_FL_USER_VISIBLE	/* User visible flags */
+ #define OCFS2_FL_MODIFIABLE		FS_FL_USER_MODIFIABLE	/* User modifiable flags */
+ 
+diff -NurpP --minimal linux-3.2.34/fs/ocfs2/super.c linux-3.2.34-vs2.3.2.15/fs/ocfs2/super.c
+--- linux-3.2.34/fs/ocfs2/super.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/ocfs2/super.c	2011-12-05 19:33:02.000000000 +0100
+@@ -185,6 +185,7 @@ enum {
+ 	Opt_coherency_full,
+ 	Opt_resv_level,
+ 	Opt_dir_resv_level,
++	Opt_tag, Opt_notag, Opt_tagid,
+ 	Opt_err,
+ };
+ 
+@@ -216,6 +217,9 @@ static const match_table_t tokens = {
+ 	{Opt_coherency_full, "coherency=full"},
+ 	{Opt_resv_level, "resv_level=%u"},
+ 	{Opt_dir_resv_level, "dir_resv_level=%u"},
++	{Opt_tag, "tag"},
++	{Opt_notag, "notag"},
++	{Opt_tagid, "tagid=%u"},
+ 	{Opt_err, NULL}
+ };
+ 
+@@ -663,6 +667,13 @@ static int ocfs2_remount(struct super_bl
+ 		goto out;
+ 	}
+ 
++	if ((osb->s_mount_opt & OCFS2_MOUNT_TAGGED) !=
++	    (parsed_options.mount_opt & OCFS2_MOUNT_TAGGED)) {
++		ret = -EINVAL;
++		mlog(ML_ERROR, "Cannot change tagging on remount\n");
++		goto out;
++	}
++
+ 	/* We're going to/from readonly mode. */
+ 	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
+ 		/* Disable quota accounting before remounting RO */
+@@ -1178,6 +1189,9 @@ static int ocfs2_fill_super(struct super
+ 
+ 	ocfs2_complete_mount_recovery(osb);
+ 
++	if (osb->s_mount_opt & OCFS2_MOUNT_TAGGED)
++		sb->s_flags |= MS_TAGGED;
++
+ 	if (ocfs2_mount_local(osb))
+ 		snprintf(nodestr, sizeof(nodestr), "local");
+ 	else
+@@ -1507,6 +1521,20 @@ static int ocfs2_parse_options(struct su
+ 			    option < OCFS2_MAX_RESV_LEVEL)
+ 				mopt->dir_resv_level = option;
+ 			break;
++#ifndef CONFIG_TAGGING_NONE
++		case Opt_tag:
++			mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
++			break;
++		case Opt_notag:
++			mopt->mount_opt &= ~OCFS2_MOUNT_TAGGED;
++			break;
++#endif
++#ifdef CONFIG_PROPAGATE
++		case Opt_tagid:
++			/* use args[0] */
++			mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
++			break;
++#endif
+ 		default:
+ 			mlog(ML_ERROR,
+ 			     "Unrecognized mount option \"%s\" "
+diff -NurpP --minimal linux-3.2.34/fs/open.c linux-3.2.34-vs2.3.2.15/fs/open.c
+--- linux-3.2.34/fs/open.c	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/open.c	2012-09-16 18:25:50.000000000 +0200
+@@ -30,6 +30,11 @@
+ #include <linux/fs_struct.h>
+ #include <linux/ima.h>
+ #include <linux/dnotify.h>
++#include <linux/vs_base.h>
++#include <linux/vs_limit.h>
++#include <linux/vs_tag.h>
++#include <linux/vs_cowbl.h>
++#include <linux/vserver/dlimit.h>
+ 
+ #include "internal.h"
+ 
+@@ -74,6 +79,12 @@ static long do_sys_truncate(const char _
+ 	error = user_path(pathname, &path);
+ 	if (error)
+ 		goto out;
++
++#ifdef CONFIG_VSERVER_COWBL
++	error = cow_check_and_break(&path);
++	if (error)
++		goto dput_and_out;
++#endif
+ 	inode = path.dentry->d_inode;
+ 
+ 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
+@@ -489,6 +500,10 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, cons
+ 
+ 	error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
+ 	if (!error) {
++#ifdef CONFIG_VSERVER_COWBL
++		error = cow_check_and_break(&path);
++		if (!error)
++#endif
+ 		error = chmod_common(&path, mode);
+ 		path_put(&path);
+ 	}
+@@ -509,11 +524,11 @@ static int chown_common(struct path *pat
+ 	newattrs.ia_valid =  ATTR_CTIME;
+ 	if (user != (uid_t) -1) {
+ 		newattrs.ia_valid |= ATTR_UID;
+-		newattrs.ia_uid = user;
++		newattrs.ia_uid = dx_map_uid(user);
+ 	}
+ 	if (group != (gid_t) -1) {
+ 		newattrs.ia_valid |= ATTR_GID;
+-		newattrs.ia_gid = group;
++		newattrs.ia_gid = dx_map_gid(group);
+ 	}
+ 	if (!S_ISDIR(inode->i_mode))
+ 		newattrs.ia_valid |=
+@@ -538,6 +553,10 @@ SYSCALL_DEFINE3(chown, const char __user
+ 	error = mnt_want_write(path.mnt);
+ 	if (error)
+ 		goto out_release;
++#ifdef CONFIG_VSERVER_COWBL
++	error = cow_check_and_break(&path);
++	if (!error)
++#endif
+ 	error = chown_common(&path, user, group);
+ 	mnt_drop_write(path.mnt);
+ out_release:
+@@ -565,6 +584,10 @@ SYSCALL_DEFINE5(fchownat, int, dfd, cons
+ 	error = mnt_want_write(path.mnt);
+ 	if (error)
+ 		goto out_release;
++#ifdef CONFIG_VSERVER_COWBL
++	error = cow_check_and_break(&path);
++	if (!error)
++#endif
+ 	error = chown_common(&path, user, group);
+ 	mnt_drop_write(path.mnt);
+ out_release:
+@@ -584,6 +607,10 @@ SYSCALL_DEFINE3(lchown, const char __use
+ 	error = mnt_want_write(path.mnt);
+ 	if (error)
+ 		goto out_release;
++#ifdef CONFIG_VSERVER_COWBL
++	error = cow_check_and_break(&path);
++	if (!error)
++#endif
+ 	error = chown_common(&path, user, group);
+ 	mnt_drop_write(path.mnt);
+ out_release:
+@@ -839,6 +866,7 @@ static void __put_unused_fd(struct files
+ 	__FD_CLR(fd, fdt->open_fds);
+ 	if (fd < files->next_fd)
+ 		files->next_fd = fd;
++	vx_openfd_dec(fd);
+ }
+ 
+ void put_unused_fd(unsigned int fd)
+diff -NurpP --minimal linux-3.2.34/fs/proc/array.c linux-3.2.34-vs2.3.2.15/fs/proc/array.c
+--- linux-3.2.34/fs/proc/array.c	2011-10-24 18:45:27.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/proc/array.c	2011-12-05 19:33:02.000000000 +0100
+@@ -81,6 +81,8 @@
+ #include <linux/pid_namespace.h>
+ #include <linux/ptrace.h>
+ #include <linux/tracehook.h>
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
+ 
+ #include <asm/pgtable.h>
+ #include <asm/processor.h>
+@@ -170,6 +172,9 @@ static inline void task_state(struct seq
+ 	rcu_read_lock();
+ 	ppid = pid_alive(p) ?
+ 		task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
++	if (unlikely(vx_current_initpid(p->pid)))
++		ppid = 0;
++
+ 	tpid = 0;
+ 	if (pid_alive(p)) {
+ 		struct task_struct *tracer = ptrace_parent(p);
+@@ -287,7 +292,7 @@ static inline void task_sig(struct seq_f
+ }
+ 
+ static void render_cap_t(struct seq_file *m, const char *header,
+-			kernel_cap_t *a)
++			struct vx_info *vxi, kernel_cap_t *a)
+ {
+ 	unsigned __capi;
+ 
+@@ -312,10 +317,11 @@ static inline void task_cap(struct seq_f
+ 	cap_bset	= cred->cap_bset;
+ 	rcu_read_unlock();
+ 
+-	render_cap_t(m, "CapInh:\t", &cap_inheritable);
+-	render_cap_t(m, "CapPrm:\t", &cap_permitted);
+-	render_cap_t(m, "CapEff:\t", &cap_effective);
+-	render_cap_t(m, "CapBnd:\t", &cap_bset);
++	/* FIXME: maybe move the p->vx_info masking to __task_cred() ? */
++	render_cap_t(m, "CapInh:\t", p->vx_info, &cap_inheritable);
++	render_cap_t(m, "CapPrm:\t", p->vx_info, &cap_permitted);
++	render_cap_t(m, "CapEff:\t", p->vx_info, &cap_effective);
++	render_cap_t(m, "CapBnd:\t", p->vx_info, &cap_bset);
+ }
+ 
+ static inline void task_context_switch_counts(struct seq_file *m,
+@@ -337,6 +343,42 @@ static void task_cpus_allowed(struct seq
+ 	seq_putc(m, '\n');
+ }
+ 
++int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
++			struct pid *pid, struct task_struct *task)
++{
++	seq_printf(m,	"Proxy:\t%p(%c)\n"
++			"Count:\t%u\n"
++			"uts:\t%p(%c)\n"
++			"ipc:\t%p(%c)\n"
++			"mnt:\t%p(%c)\n"
++			"pid:\t%p(%c)\n"
++			"net:\t%p(%c)\n",
++			task->nsproxy,
++			(task->nsproxy == init_task.nsproxy ? 'I' : '-'),
++			atomic_read(&task->nsproxy->count),
++			task->nsproxy->uts_ns,
++			(task->nsproxy->uts_ns == init_task.nsproxy->uts_ns ? 'I' : '-'),
++			task->nsproxy->ipc_ns,
++			(task->nsproxy->ipc_ns == init_task.nsproxy->ipc_ns ? 'I' : '-'),
++			task->nsproxy->mnt_ns,
++			(task->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns ? 'I' : '-'),
++			task->nsproxy->pid_ns,
++			(task->nsproxy->pid_ns == init_task.nsproxy->pid_ns ? 'I' : '-'),
++			task->nsproxy->net_ns,
++			(task->nsproxy->net_ns == init_task.nsproxy->net_ns ? 'I' : '-'));
++	return 0;
++}
++
++void task_vs_id(struct seq_file *m, struct task_struct *task)
++{
++	if (task_vx_flags(task, VXF_HIDE_VINFO, 0))
++		return;
++
++	seq_printf(m, "VxID: %d\n", vx_task_xid(task));
++	seq_printf(m, "NxID: %d\n", nx_task_nid(task));
++}
++
++
+ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
+ 			struct pid *pid, struct task_struct *task)
+ {
+@@ -353,6 +395,7 @@ int proc_pid_status(struct seq_file *m, 
+ 	task_cap(m, task);
+ 	task_cpus_allowed(m, task);
+ 	cpuset_task_status_allowed(m, task);
++	task_vs_id(m, task);
+ 	task_context_switch_counts(m, task);
+ 	return 0;
+ }
+@@ -462,6 +505,17 @@ static int do_task_stat(struct seq_file 
+ 	/* convert nsec -> ticks */
+ 	start_time = nsec_to_clock_t(start_time);
+ 
++	/* fixup start time for virt uptime */
++	if (vx_flags(VXF_VIRT_UPTIME, 0)) {
++		unsigned long long bias =
++			current->vx_info->cvirt.bias_clock;
++
++		if (start_time > bias)
++			start_time -= bias;
++		else
++			start_time = 0;
++	}
++
+ 	seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
+ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
+ %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
+diff -NurpP --minimal linux-3.2.34/fs/proc/base.c linux-3.2.34-vs2.3.2.15/fs/proc/base.c
+--- linux-3.2.34/fs/proc/base.c	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/proc/base.c	2012-02-15 03:26:56.000000000 +0100
+@@ -83,6 +83,8 @@
+ #include <linux/pid_namespace.h>
+ #include <linux/fs_struct.h>
+ #include <linux/slab.h>
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
+ #ifdef CONFIG_HARDWALL
+ #include <asm/hardwall.h>
+ #endif
+@@ -1021,11 +1023,16 @@ static ssize_t oom_adjust_write(struct f
+ 		goto err_task_lock;
+ 	}
+ 
+-	if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
++	if (oom_adjust < task->signal->oom_adj &&
++		!vx_capable(CAP_SYS_RESOURCE, VXC_OOM_ADJUST)) {
+ 		err = -EACCES;
+ 		goto err_sighand;
+ 	}
+ 
++	/* prevent guest processes from circumventing the oom killer */
++	if (vx_current_xid() && (oom_adjust == OOM_DISABLE))
++		oom_adjust = OOM_ADJUST_MIN;
++
+ 	/*
+ 	 * Warn that /proc/pid/oom_adj is deprecated, see
+ 	 * Documentation/feature-removal-schedule.txt.
+@@ -1180,7 +1187,7 @@ static ssize_t proc_loginuid_write(struc
+ 	ssize_t length;
+ 	uid_t loginuid;
+ 
+-	if (!capable(CAP_AUDIT_CONTROL))
++	if (!vx_capable(CAP_AUDIT_CONTROL, VXC_AUDIT_CONTROL))
+ 		return -EPERM;
+ 
+ 	rcu_read_lock();
+@@ -1627,6 +1634,8 @@ struct inode *proc_pid_make_inode(struct
+ 		inode->i_gid = cred->egid;
+ 		rcu_read_unlock();
+ 	}
++	/* procfs is xid tagged */
++	inode->i_tag = (tag_t)vx_task_xid(task);
+ 	security_task_to_inode(task, inode);
+ 
+ out:
+@@ -1663,6 +1672,8 @@ int pid_getattr(struct vfsmount *mnt, st
+ 
+ /* dentry stuff */
+ 
++static unsigned name_to_int(struct dentry *dentry);
++
+ /*
+  *	Exceptional case: normally we are not allowed to unhash a busy
+  * directory. In this case, however, we can do it - no aliasing problems
+@@ -1691,6 +1702,12 @@ int pid_revalidate(struct dentry *dentry
+ 	task = get_proc_task(inode);
+ 
+ 	if (task) {
++		unsigned pid = name_to_int(dentry);
++
++		if (pid != ~0U && pid != vx_map_pid(task->pid)) {
++			put_task_struct(task);
++			goto drop;
++		}
+ 		if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
+ 		    task_dumpable(task)) {
+ 			rcu_read_lock();
+@@ -1707,6 +1724,7 @@ int pid_revalidate(struct dentry *dentry
+ 		put_task_struct(task);
+ 		return 1;
+ 	}
++drop:
+ 	d_drop(dentry);
+ 	return 0;
+ }
+@@ -2196,6 +2214,13 @@ static struct dentry *proc_pident_lookup
+ 	if (!task)
+ 		goto out_no_task;
+ 
++	/* TODO: maybe we can come up with a generic approach? */
++	if (task_vx_flags(task, VXF_HIDE_VINFO, 0) &&
++		(dentry->d_name.len == 5) &&
++		(!memcmp(dentry->d_name.name, "vinfo", 5) ||
++		!memcmp(dentry->d_name.name, "ninfo", 5)))
++		goto out;
++
+ 	/*
+ 	 * Yes, it does not scale. And it should not. Don't add
+ 	 * new entries into /proc/<tgid>/ without very good reasons.
+@@ -2581,7 +2606,7 @@ out_iput:
+ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
+ {
+ 	struct dentry *error;
+-	struct task_struct *task = get_proc_task(dir);
++	struct task_struct *task = get_proc_task_real(dir);
+ 	const struct pid_entry *p, *last;
+ 
+ 	error = ERR_PTR(-ENOENT);
+@@ -2688,6 +2713,9 @@ static int proc_pid_personality(struct s
+ static const struct file_operations proc_task_operations;
+ static const struct inode_operations proc_task_inode_operations;
+ 
++extern int proc_pid_vx_info(struct task_struct *, char *);
++extern int proc_pid_nx_info(struct task_struct *, char *);
++
+ static const struct pid_entry tgid_base_stuff[] = {
+ 	DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
+ 	DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
+@@ -2751,6 +2779,8 @@ static const struct pid_entry tgid_base_
+ #ifdef CONFIG_CGROUPS
+ 	REG("cgroup",  S_IRUGO, proc_cgroup_operations),
+ #endif
++	INF("vinfo",      S_IRUGO, proc_pid_vx_info),
++	INF("ninfo",	  S_IRUGO, proc_pid_nx_info),
+ 	INF("oom_score",  S_IRUGO, proc_oom_score),
+ 	REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
+ 	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
+@@ -2770,6 +2800,7 @@ static const struct pid_entry tgid_base_
+ #ifdef CONFIG_HARDWALL
+ 	INF("hardwall",   S_IRUGO, proc_pid_hardwall),
+ #endif
++	ONE("nsproxy",	S_IRUGO, proc_pid_nsproxy),
+ };
+ 
+ static int proc_tgid_base_readdir(struct file * filp,
+@@ -2962,7 +2993,7 @@ retry:
+ 	iter.task = NULL;
+ 	pid = find_ge_pid(iter.tgid, ns);
+ 	if (pid) {
+-		iter.tgid = pid_nr_ns(pid, ns);
++		iter.tgid = pid_unmapped_nr_ns(pid, ns);
+ 		iter.task = pid_task(pid, PIDTYPE_PID);
+ 		/* What we to know is if the pid we have find is the
+ 		 * pid of a thread_group_leader.  Testing for task
+@@ -2992,7 +3023,7 @@ static int proc_pid_fill_cache(struct fi
+ 	struct tgid_iter iter)
+ {
+ 	char name[PROC_NUMBUF];
+-	int len = snprintf(name, sizeof(name), "%d", iter.tgid);
++	int len = snprintf(name, sizeof(name), "%d", vx_map_tgid(iter.tgid));
+ 	return proc_fill_cache(filp, dirent, filldir, name, len,
+ 				proc_pid_instantiate, iter.task, NULL);
+ }
+@@ -3009,7 +3040,7 @@ int proc_pid_readdir(struct file * filp,
+ 		goto out_no_task;
+ 	nr = filp->f_pos - FIRST_PROCESS_ENTRY;
+ 
+-	reaper = get_proc_task(filp->f_path.dentry->d_inode);
++	reaper = get_proc_task_real(filp->f_path.dentry->d_inode);
+ 	if (!reaper)
+ 		goto out_no_task;
+ 
+@@ -3026,6 +3057,8 @@ int proc_pid_readdir(struct file * filp,
+ 	     iter.task;
+ 	     iter.tgid += 1, iter = next_tgid(ns, iter)) {
+ 		filp->f_pos = iter.tgid + TGID_OFFSET;
++		if (!vx_proc_task_visible(iter.task))
++			continue;
+ 		if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) {
+ 			put_task_struct(iter.task);
+ 			goto out;
+@@ -3179,6 +3212,8 @@ static struct dentry *proc_task_lookup(s
+ 	tid = name_to_int(dentry);
+ 	if (tid == ~0U)
+ 		goto out;
++	if (vx_current_initpid(tid))
++		goto out;
+ 
+ 	ns = dentry->d_sb->s_fs_info;
+ 	rcu_read_lock();
+diff -NurpP --minimal linux-3.2.34/fs/proc/generic.c linux-3.2.34-vs2.3.2.15/fs/proc/generic.c
+--- linux-3.2.34/fs/proc/generic.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/proc/generic.c	2011-12-05 19:33:02.000000000 +0100
+@@ -22,6 +22,7 @@
+ #include <linux/bitops.h>
+ #include <linux/spinlock.h>
+ #include <linux/completion.h>
++#include <linux/vserver/inode.h>
+ #include <asm/uaccess.h>
+ 
+ #include "internal.h"
+@@ -424,11 +425,15 @@ struct dentry *proc_lookup_de(struct pro
+ 	for (de = de->subdir; de ; de = de->next) {
+ 		if (de->namelen != dentry->d_name.len)
+ 			continue;
++		if (!vx_hide_check(0, de->vx_flags))
++			continue;
+ 		if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
+ 			pde_get(de);
+ 			spin_unlock(&proc_subdir_lock);
+ 			error = -EINVAL;
+ 			inode = proc_get_inode(dir->i_sb, de);
++			/* generic proc entries belong to the host */
++			inode->i_tag = 0;
+ 			goto out_unlock;
+ 		}
+ 	}
+@@ -506,6 +511,8 @@ int proc_readdir_de(struct proc_dir_entr
+ 
+ 				/* filldir passes info to user space */
+ 				pde_get(de);
++				if (!vx_hide_check(0, de->vx_flags))
++					goto skip;
+ 				spin_unlock(&proc_subdir_lock);
+ 				if (filldir(dirent, de->name, de->namelen, filp->f_pos,
+ 					    de->low_ino, de->mode >> 12) < 0) {
+@@ -513,6 +520,7 @@ int proc_readdir_de(struct proc_dir_entr
+ 					goto out;
+ 				}
+ 				spin_lock(&proc_subdir_lock);
++			skip:
+ 				filp->f_pos++;
+ 				next = de->next;
+ 				pde_put(de);
+@@ -626,6 +634,7 @@ static struct proc_dir_entry *__proc_cre
+ 	ent->nlink = nlink;
+ 	atomic_set(&ent->count, 1);
+ 	ent->pde_users = 0;
++	ent->vx_flags = IATTR_PROC_DEFAULT;
+ 	spin_lock_init(&ent->pde_unload_lock);
+ 	ent->pde_unload_completion = NULL;
+ 	INIT_LIST_HEAD(&ent->pde_openers);
+@@ -649,7 +658,8 @@ struct proc_dir_entry *proc_symlink(cons
+ 				kfree(ent->data);
+ 				kfree(ent);
+ 				ent = NULL;
+-			}
++			} else
++				ent->vx_flags = IATTR_PROC_SYMLINK;
+ 		} else {
+ 			kfree(ent);
+ 			ent = NULL;
+diff -NurpP --minimal linux-3.2.34/fs/proc/inode.c linux-3.2.34-vs2.3.2.15/fs/proc/inode.c
+--- linux-3.2.34/fs/proc/inode.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/proc/inode.c	2011-12-05 19:33:02.000000000 +0100
+@@ -442,6 +442,8 @@ struct inode *proc_get_inode(struct supe
+ 			inode->i_uid = de->uid;
+ 			inode->i_gid = de->gid;
+ 		}
++		if (de->vx_flags)
++			PROC_I(inode)->vx_flags = de->vx_flags;
+ 		if (de->size)
+ 			inode->i_size = de->size;
+ 		if (de->nlink)
+diff -NurpP --minimal linux-3.2.34/fs/proc/internal.h linux-3.2.34-vs2.3.2.15/fs/proc/internal.h
+--- linux-3.2.34/fs/proc/internal.h	2011-07-22 11:18:06.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/proc/internal.h	2011-12-05 19:33:02.000000000 +0100
+@@ -10,6 +10,7 @@
+  */
+ 
+ #include <linux/proc_fs.h>
++#include <linux/vs_pid.h>
+ 
+ extern struct proc_dir_entry proc_root;
+ #ifdef CONFIG_PROC_SYSCTL
+@@ -51,6 +52,9 @@ extern int proc_pid_status(struct seq_fi
+ 				struct pid *pid, struct task_struct *task);
+ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
+ 				struct pid *pid, struct task_struct *task);
++extern int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
++				struct pid *pid, struct task_struct *task);
++
+ extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
+ 
+ extern const struct file_operations proc_maps_operations;
+@@ -76,11 +80,16 @@ static inline struct pid *proc_pid(struc
+ 	return PROC_I(inode)->pid;
+ }
+ 
+-static inline struct task_struct *get_proc_task(struct inode *inode)
++static inline struct task_struct *get_proc_task_real(struct inode *inode)
+ {
+ 	return get_pid_task(proc_pid(inode), PIDTYPE_PID);
+ }
+ 
++static inline struct task_struct *get_proc_task(struct inode *inode)
++{
++	return vx_get_proc_task(inode, proc_pid(inode));
++}
++
+ static inline int proc_fd(struct inode *inode)
+ {
+ 	return PROC_I(inode)->fd;
+diff -NurpP --minimal linux-3.2.34/fs/proc/loadavg.c linux-3.2.34-vs2.3.2.15/fs/proc/loadavg.c
+--- linux-3.2.34/fs/proc/loadavg.c	2009-09-10 15:26:23.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/proc/loadavg.c	2011-12-05 19:33:02.000000000 +0100
+@@ -12,15 +12,27 @@
+ 
+ static int loadavg_proc_show(struct seq_file *m, void *v)
+ {
++	unsigned long running;
++	unsigned int threads;
+ 	unsigned long avnrun[3];
+ 
+ 	get_avenrun(avnrun, FIXED_1/200, 0);
+ 
++	if (vx_flags(VXF_VIRT_LOAD, 0)) {
++		struct vx_info *vxi = current_vx_info();
++
++		running = atomic_read(&vxi->cvirt.nr_running);
++		threads = atomic_read(&vxi->cvirt.nr_threads);
++	} else {
++		running = nr_running();
++		threads = nr_threads;
++	}
++
+ 	seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
+ 		LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
+ 		LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
+ 		LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
+-		nr_running(), nr_threads,
++		running, threads,
+ 		task_active_pid_ns(current)->last_pid);
+ 	return 0;
+ }
+diff -NurpP --minimal linux-3.2.34/fs/proc/meminfo.c linux-3.2.34-vs2.3.2.15/fs/proc/meminfo.c
+--- linux-3.2.34/fs/proc/meminfo.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/proc/meminfo.c	2011-12-15 01:11:32.000000000 +0100
+@@ -39,7 +39,8 @@ static int meminfo_proc_show(struct seq_
+ 	allowed = ((totalram_pages - hugetlb_total_pages())
+ 		* sysctl_overcommit_ratio / 100) + total_swap_pages;
+ 
+-	cached = global_page_state(NR_FILE_PAGES) -
++	cached = vx_flags(VXF_VIRT_MEM, 0) ?
++		vx_vsi_cached(&i) : global_page_state(NR_FILE_PAGES) -
+ 			total_swapcache_pages - i.bufferram;
+ 	if (cached < 0)
+ 		cached = 0;
+diff -NurpP --minimal linux-3.2.34/fs/proc/root.c linux-3.2.34-vs2.3.2.15/fs/proc/root.c
+--- linux-3.2.34/fs/proc/root.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/proc/root.c	2012-01-09 16:19:31.000000000 +0100
+@@ -18,9 +18,14 @@
+ #include <linux/bitops.h>
+ #include <linux/mount.h>
+ #include <linux/pid_namespace.h>
++#include <linux/vserver/inode.h>
+ 
+ #include "internal.h"
+ 
++struct proc_dir_entry *proc_virtual;
++
++extern void proc_vx_init(void);
++
+ static int proc_test_super(struct super_block *sb, void *data)
+ {
+ 	return sb->s_fs_info == data;
+@@ -123,6 +128,7 @@ void __init proc_root_init(void)
+ #endif
+ 	proc_mkdir("bus", NULL);
+ 	proc_sys_init();
++	proc_vx_init();
+ }
+ 
+ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
+@@ -190,6 +196,7 @@ struct proc_dir_entry proc_root = {
+ 	.proc_iops	= &proc_root_inode_operations, 
+ 	.proc_fops	= &proc_root_operations,
+ 	.parent		= &proc_root,
++	.vx_flags	= IATTR_ADMIN | IATTR_WATCH,
+ 	.name		= "/proc",
+ };
+ 
+diff -NurpP --minimal linux-3.2.34/fs/proc/stat.c linux-3.2.34-vs2.3.2.15/fs/proc/stat.c
+--- linux-3.2.34/fs/proc/stat.c	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/proc/stat.c	2012-11-06 18:08:24.000000000 +0100
+@@ -9,6 +9,7 @@
+ #include <linux/slab.h>
+ #include <linux/time.h>
+ #include <linux/irqnr.h>
++#include <linux/vserver/cvirt.h>
+ #include <asm/cputime.h>
+ #include <linux/tick.h>
+ 
+@@ -72,6 +73,10 @@ static int show_stat(struct seq_file *p,
+ 		irq = softirq = steal = cputime64_zero;
+ 	guest = guest_nice = cputime64_zero;
+ 	getboottime(&boottime);
++
++	if (vx_flags(VXF_VIRT_UPTIME, 0))
++		vx_vsi_boottime(&boottime);
++
+ 	jif = boottime.tv_sec;
+ 
+ 	for_each_possible_cpu(i) {
+diff -NurpP --minimal linux-3.2.34/fs/proc/uptime.c linux-3.2.34-vs2.3.2.15/fs/proc/uptime.c
+--- linux-3.2.34/fs/proc/uptime.c	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/proc/uptime.c	2012-01-26 09:03:19.000000000 +0100
+@@ -5,6 +5,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/time.h>
+ #include <linux/kernel_stat.h>
++#include <linux/vserver/cvirt.h>
+ #include <asm/cputime.h>
+ 
+ static int uptime_proc_show(struct seq_file *m, void *v)
+@@ -25,6 +26,10 @@ static int uptime_proc_show(struct seq_f
+ 	nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
+ 	idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
+ 	idle.tv_nsec = rem;
++
++	if (vx_flags(VXF_VIRT_UPTIME, 0))
++		vx_vsi_uptime(&uptime, &idle);
++
+ 	seq_printf(m, "%lu.%02lu %lu.%02lu\n",
+ 			(unsigned long) uptime.tv_sec,
+ 			(uptime.tv_nsec / (NSEC_PER_SEC / 100)),
+diff -NurpP --minimal linux-3.2.34/fs/quota/dquot.c linux-3.2.34-vs2.3.2.15/fs/quota/dquot.c
+--- linux-3.2.34/fs/quota/dquot.c	2011-07-22 11:18:06.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/quota/dquot.c	2011-12-05 19:33:02.000000000 +0100
+@@ -1548,6 +1548,9 @@ int __dquot_alloc_space(struct inode *in
+ 	int reserve = flags & DQUOT_SPACE_RESERVE;
+ 	int nofail = flags & DQUOT_SPACE_NOFAIL;
+ 
++	if ((ret = dl_alloc_space(inode, number)))
++		return ret;
++
+ 	/*
+ 	 * First test before acquiring mutex - solves deadlocks when we
+ 	 * re-enter the quota code and are already holding the mutex
+@@ -1602,6 +1605,9 @@ int dquot_alloc_inode(const struct inode
+ 	int cnt, ret = 0;
+ 	char warntype[MAXQUOTAS];
+ 
++	if ((ret = dl_alloc_inode(inode)))
++		return ret;
++
+ 	/* First test before acquiring mutex - solves deadlocks when we
+          * re-enter the quota code and are already holding the mutex */
+ 	if (!dquot_active(inode))
+@@ -1672,6 +1678,8 @@ void __dquot_free_space(struct inode *in
+ 	char warntype[MAXQUOTAS];
+ 	int reserve = flags & DQUOT_SPACE_RESERVE;
+ 
++	dl_free_space(inode, number);
++
+ 	/* First test before acquiring mutex - solves deadlocks when we
+          * re-enter the quota code and are already holding the mutex */
+ 	if (!dquot_active(inode)) {
+@@ -1710,6 +1718,8 @@ void dquot_free_inode(const struct inode
+ 	unsigned int cnt;
+ 	char warntype[MAXQUOTAS];
+ 
++	dl_free_inode(inode);
++
+ 	/* First test before acquiring mutex - solves deadlocks when we
+          * re-enter the quota code and are already holding the mutex */
+ 	if (!dquot_active(inode))
+diff -NurpP --minimal linux-3.2.34/fs/quota/quota.c linux-3.2.34-vs2.3.2.15/fs/quota/quota.c
+--- linux-3.2.34/fs/quota/quota.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/quota/quota.c	2011-12-05 19:33:02.000000000 +0100
+@@ -8,6 +8,7 @@
+ #include <linux/fs.h>
+ #include <linux/namei.h>
+ #include <linux/slab.h>
++#include <linux/vs_context.h>
+ #include <asm/current.h>
+ #include <asm/uaccess.h>
+ #include <linux/kernel.h>
+@@ -38,7 +39,7 @@ static int check_quotactl_permission(str
+ 			break;
+ 		/*FALLTHROUGH*/
+ 	default:
+-		if (!capable(CAP_SYS_ADMIN))
++		if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
+ 			return -EPERM;
+ 	}
+ 
+@@ -293,6 +294,46 @@ static int do_quotactl(struct super_bloc
+ 	}
+ }
+ 
++#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
++
++#include <linux/vroot.h>
++#include <linux/major.h>
++#include <linux/module.h>
++#include <linux/kallsyms.h>
++#include <linux/vserver/debug.h>
++
++static vroot_grb_func *vroot_get_real_bdev = NULL;
++
++static DEFINE_SPINLOCK(vroot_grb_lock);
++
++int register_vroot_grb(vroot_grb_func *func) {
++	int ret = -EBUSY;
++
++	spin_lock(&vroot_grb_lock);
++	if (!vroot_get_real_bdev) {
++		vroot_get_real_bdev = func;
++		ret = 0;
++	}
++	spin_unlock(&vroot_grb_lock);
++	return ret;
++}
++EXPORT_SYMBOL(register_vroot_grb);
++
++int unregister_vroot_grb(vroot_grb_func *func) {
++	int ret = -EINVAL;
++
++	spin_lock(&vroot_grb_lock);
++	if (vroot_get_real_bdev) {
++		vroot_get_real_bdev = NULL;
++		ret = 0;
++	}
++	spin_unlock(&vroot_grb_lock);
++	return ret;
++}
++EXPORT_SYMBOL(unregister_vroot_grb);
++
++#endif
++
+ /*
+  * look up a superblock on which quota ops will be performed
+  * - use the name of a block device to find the superblock thereon
+@@ -310,6 +351,22 @@ static struct super_block *quotactl_bloc
+ 	putname(tmp);
+ 	if (IS_ERR(bdev))
+ 		return ERR_CAST(bdev);
++#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
++	if (bdev && bdev->bd_inode &&
++			imajor(bdev->bd_inode) == VROOT_MAJOR) {
++		struct block_device *bdnew = (void *)-EINVAL;
++
++		if (vroot_get_real_bdev)
++			bdnew = vroot_get_real_bdev(bdev);
++		else
++			vxdprintk(VXD_CBIT(misc, 0),
++					"vroot_get_real_bdev not set");
++		bdput(bdev);
++		if (IS_ERR(bdnew))
++			return ERR_PTR(PTR_ERR(bdnew));
++		bdev = bdnew;
++	}
++#endif
+ 	sb = get_super(bdev);
+ 	bdput(bdev);
+ 	if (!sb)
+diff -NurpP --minimal linux-3.2.34/fs/reiserfs/file.c linux-3.2.34-vs2.3.2.15/fs/reiserfs/file.c
+--- linux-3.2.34/fs/reiserfs/file.c	2011-10-24 18:45:27.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/reiserfs/file.c	2011-12-05 19:33:02.000000000 +0100
+@@ -319,5 +319,6 @@ const struct inode_operations reiserfs_f
+ 	.listxattr = reiserfs_listxattr,
+ 	.removexattr = reiserfs_removexattr,
+ 	.permission = reiserfs_permission,
++	.sync_flags = reiserfs_sync_flags,
+ 	.get_acl = reiserfs_get_acl,
+ };
+diff -NurpP --minimal linux-3.2.34/fs/reiserfs/inode.c linux-3.2.34-vs2.3.2.15/fs/reiserfs/inode.c
+--- linux-3.2.34/fs/reiserfs/inode.c	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/reiserfs/inode.c	2012-11-06 18:08:24.000000000 +0100
+@@ -18,6 +18,7 @@
+ #include <linux/writeback.h>
+ #include <linux/quotaops.h>
+ #include <linux/swap.h>
++#include <linux/vs_tag.h>
+ 
+ int reiserfs_commit_write(struct file *f, struct page *page,
+ 			  unsigned from, unsigned to);
+@@ -1131,6 +1132,8 @@ static void init_inode(struct inode *ino
+ 	struct buffer_head *bh;
+ 	struct item_head *ih;
+ 	__u32 rdev;
++	uid_t uid;
++	gid_t gid;
+ 	//int version = ITEM_VERSION_1;
+ 
+ 	bh = PATH_PLAST_BUFFER(path);
+@@ -1151,12 +1154,13 @@ static void init_inode(struct inode *ino
+ 		    (struct stat_data_v1 *)B_I_PITEM(bh, ih);
+ 		unsigned long blocks;
+ 
++		uid = sd_v1_uid(sd);
++		gid = sd_v1_gid(sd);
++
+ 		set_inode_item_key_version(inode, KEY_FORMAT_3_5);
+ 		set_inode_sd_version(inode, STAT_DATA_V1);
+ 		inode->i_mode = sd_v1_mode(sd);
+ 		set_nlink(inode, sd_v1_nlink(sd));
+-		inode->i_uid = sd_v1_uid(sd);
+-		inode->i_gid = sd_v1_gid(sd);
+ 		inode->i_size = sd_v1_size(sd);
+ 		inode->i_atime.tv_sec = sd_v1_atime(sd);
+ 		inode->i_mtime.tv_sec = sd_v1_mtime(sd);
+@@ -1198,11 +1202,12 @@ static void init_inode(struct inode *ino
+ 		// (directories and symlinks)
+ 		struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
+ 
++		uid    = sd_v2_uid(sd);
++		gid    = sd_v2_gid(sd);
++
+ 		inode->i_mode = sd_v2_mode(sd);
+ 		set_nlink(inode, sd_v2_nlink(sd));
+-		inode->i_uid = sd_v2_uid(sd);
+ 		inode->i_size = sd_v2_size(sd);
+-		inode->i_gid = sd_v2_gid(sd);
+ 		inode->i_mtime.tv_sec = sd_v2_mtime(sd);
+ 		inode->i_atime.tv_sec = sd_v2_atime(sd);
+ 		inode->i_ctime.tv_sec = sd_v2_ctime(sd);
+@@ -1232,6 +1237,10 @@ static void init_inode(struct inode *ino
+ 		sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
+ 	}
+ 
++	inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++	inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++	inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, 0);
++
+ 	pathrelse(path);
+ 	if (S_ISREG(inode->i_mode)) {
+ 		inode->i_op = &reiserfs_file_inode_operations;
+@@ -1254,13 +1263,15 @@ static void init_inode(struct inode *ino
+ static void inode2sd(void *sd, struct inode *inode, loff_t size)
+ {
+ 	struct stat_data *sd_v2 = (struct stat_data *)sd;
++	uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
++	gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
+ 	__u16 flags;
+ 
++	set_sd_v2_uid(sd_v2, uid);
++	set_sd_v2_gid(sd_v2, gid);
+ 	set_sd_v2_mode(sd_v2, inode->i_mode);
+ 	set_sd_v2_nlink(sd_v2, inode->i_nlink);
+-	set_sd_v2_uid(sd_v2, inode->i_uid);
+ 	set_sd_v2_size(sd_v2, size);
+-	set_sd_v2_gid(sd_v2, inode->i_gid);
+ 	set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
+ 	set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
+ 	set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
+@@ -2872,14 +2883,19 @@ int reiserfs_commit_write(struct file *f
+ void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
+ {
+ 	if (reiserfs_attrs(inode->i_sb)) {
+-		if (sd_attrs & REISERFS_SYNC_FL)
+-			inode->i_flags |= S_SYNC;
+-		else
+-			inode->i_flags &= ~S_SYNC;
+ 		if (sd_attrs & REISERFS_IMMUTABLE_FL)
+ 			inode->i_flags |= S_IMMUTABLE;
+ 		else
+ 			inode->i_flags &= ~S_IMMUTABLE;
++		if (sd_attrs & REISERFS_IXUNLINK_FL)
++			inode->i_flags |= S_IXUNLINK;
++		else
++			inode->i_flags &= ~S_IXUNLINK;
++
++		if (sd_attrs & REISERFS_SYNC_FL)
++			inode->i_flags |= S_SYNC;
++		else
++			inode->i_flags &= ~S_SYNC;
+ 		if (sd_attrs & REISERFS_APPEND_FL)
+ 			inode->i_flags |= S_APPEND;
+ 		else
+@@ -2892,6 +2908,15 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs,
+ 			REISERFS_I(inode)->i_flags |= i_nopack_mask;
+ 		else
+ 			REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
++
++		if (sd_attrs & REISERFS_BARRIER_FL)
++			inode->i_vflags |= V_BARRIER;
++		else
++			inode->i_vflags &= ~V_BARRIER;
++		if (sd_attrs & REISERFS_COW_FL)
++			inode->i_vflags |= V_COW;
++		else
++			inode->i_vflags &= ~V_COW;
+ 	}
+ }
+ 
+@@ -2902,6 +2927,11 @@ void i_attrs_to_sd_attrs(struct inode *i
+ 			*sd_attrs |= REISERFS_IMMUTABLE_FL;
+ 		else
+ 			*sd_attrs &= ~REISERFS_IMMUTABLE_FL;
++		if (inode->i_flags & S_IXUNLINK)
++			*sd_attrs |= REISERFS_IXUNLINK_FL;
++		else
++			*sd_attrs &= ~REISERFS_IXUNLINK_FL;
++
+ 		if (inode->i_flags & S_SYNC)
+ 			*sd_attrs |= REISERFS_SYNC_FL;
+ 		else
+@@ -2914,6 +2944,15 @@ void i_attrs_to_sd_attrs(struct inode *i
+ 			*sd_attrs |= REISERFS_NOTAIL_FL;
+ 		else
+ 			*sd_attrs &= ~REISERFS_NOTAIL_FL;
++
++		if (inode->i_vflags & V_BARRIER)
++			*sd_attrs |= REISERFS_BARRIER_FL;
++		else
++			*sd_attrs &= ~REISERFS_BARRIER_FL;
++		if (inode->i_vflags & V_COW)
++			*sd_attrs |= REISERFS_COW_FL;
++		else
++			*sd_attrs &= ~REISERFS_COW_FL;
+ 	}
+ }
+ 
+@@ -3159,7 +3198,8 @@ int reiserfs_setattr(struct dentry *dent
+ 	}
+ 
+ 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+-	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
++	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
++	    (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
+ 		struct reiserfs_transaction_handle th;
+ 		int jbegin_count =
+ 		    2 *
+@@ -3188,6 +3228,9 @@ int reiserfs_setattr(struct dentry *dent
+ 			inode->i_uid = attr->ia_uid;
+ 		if (attr->ia_valid & ATTR_GID)
+ 			inode->i_gid = attr->ia_gid;
++				if ((attr->ia_valid & ATTR_TAG) &&
++					IS_TAGGED(inode))
++					inode->i_tag = attr->ia_tag;
+ 		mark_inode_dirty(inode);
+ 		error = journal_end(&th, inode->i_sb, jbegin_count);
+ 		if (error)
+diff -NurpP --minimal linux-3.2.34/fs/reiserfs/ioctl.c linux-3.2.34-vs2.3.2.15/fs/reiserfs/ioctl.c
+--- linux-3.2.34/fs/reiserfs/ioctl.c	2011-05-22 16:17:53.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/reiserfs/ioctl.c	2011-12-05 19:33:02.000000000 +0100
+@@ -11,6 +11,21 @@
+ #include <linux/pagemap.h>
+ #include <linux/compat.h>
+ 
++
++int reiserfs_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	__u16 sd_attrs = 0;
++
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++
++	i_attrs_to_sd_attrs(inode, &sd_attrs);
++	REISERFS_I(inode)->i_attrs = sd_attrs;
++	inode->i_ctime = CURRENT_TIME_SEC;
++	mark_inode_dirty(inode);
++	return 0;
++}
++
+ /*
+  * reiserfs_ioctl - handler for ioctl for inode
+  * supported commands:
+@@ -22,7 +37,7 @@
+ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_path.dentry->d_inode;
+-	unsigned int flags;
++	unsigned int flags, oldflags;
+ 	int err = 0;
+ 
+ 	reiserfs_write_lock(inode->i_sb);
+@@ -47,6 +62,7 @@ long reiserfs_ioctl(struct file *filp, u
+ 
+ 		flags = REISERFS_I(inode)->i_attrs;
+ 		i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
++		flags &= REISERFS_FL_USER_VISIBLE;
+ 		err = put_user(flags, (int __user *)arg);
+ 		break;
+ 	case REISERFS_IOC_SETFLAGS:{
+@@ -67,6 +83,10 @@ long reiserfs_ioctl(struct file *filp, u
+ 				err = -EFAULT;
+ 				goto setflags_out;
+ 			}
++			if (IS_BARRIER(inode)) {
++				vxwprintk_task(1, "messing with the barrier.");
++				return -EACCES;
++			}
+ 			/*
+ 			 * Is it quota file? Do not allow user to mess with it
+ 			 */
+@@ -91,6 +111,10 @@ long reiserfs_ioctl(struct file *filp, u
+ 					goto setflags_out;
+ 				}
+ 			}
++
++			oldflags = REISERFS_I(inode)->i_attrs;
++			flags &= REISERFS_FL_USER_MODIFIABLE;
++			flags |= oldflags & ~REISERFS_FL_USER_MODIFIABLE;
+ 			sd_attrs_to_i_attrs(flags, inode);
+ 			REISERFS_I(inode)->i_attrs = flags;
+ 			inode->i_ctime = CURRENT_TIME_SEC;
+diff -NurpP --minimal linux-3.2.34/fs/reiserfs/namei.c linux-3.2.34-vs2.3.2.15/fs/reiserfs/namei.c
+--- linux-3.2.34/fs/reiserfs/namei.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/reiserfs/namei.c	2011-12-05 19:33:02.000000000 +0100
+@@ -18,6 +18,7 @@
+ #include <linux/reiserfs_acl.h>
+ #include <linux/reiserfs_xattr.h>
+ #include <linux/quotaops.h>
++#include <linux/vs_tag.h>
+ 
+ #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); }
+ #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i);
+@@ -362,6 +363,7 @@ static struct dentry *reiserfs_lookup(st
+ 	if (retval == IO_ERROR) {
+ 		return ERR_PTR(-EIO);
+ 	}
++		dx_propagate_tag(nd, inode);
+ 
+ 	return d_splice_alias(inode, dentry);
+ }
+diff -NurpP --minimal linux-3.2.34/fs/reiserfs/super.c linux-3.2.34-vs2.3.2.15/fs/reiserfs/super.c
+--- linux-3.2.34/fs/reiserfs/super.c	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/reiserfs/super.c	2012-01-18 02:58:07.000000000 +0100
+@@ -903,6 +903,14 @@ static int reiserfs_parse_options(struct
+ 		{"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
+ 		{"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
+ #endif
++#ifndef CONFIG_TAGGING_NONE
++		{"tagxid",.setmask = 1 << REISERFS_TAGGED},
++		{"tag",.setmask = 1 << REISERFS_TAGGED},
++		{"notag",.clrmask = 1 << REISERFS_TAGGED},
++#endif
++#ifdef CONFIG_PROPAGATE
++		{"tag",.arg_required = 'T',.values = NULL},
++#endif
+ #ifdef CONFIG_REISERFS_FS_POSIX_ACL
+ 		{"acl",.setmask = 1 << REISERFS_POSIXACL},
+ 		{"noacl",.clrmask = 1 << REISERFS_POSIXACL},
+@@ -1213,6 +1221,14 @@ static int reiserfs_remount(struct super
+ 	handle_quota_files(s, qf_names, &qfmt);
+ #endif
+ 
++	if ((mount_options & (1 << REISERFS_TAGGED)) &&
++		!(s->s_flags & MS_TAGGED)) {
++		reiserfs_warning(s, "super-vs01",
++			"reiserfs: tagging not permitted on remount.");
++		err = -EINVAL;
++		goto out_err;
++	}
++
+ 	handle_attrs(s);
+ 
+ 	/* Add options that are safe here */
+@@ -1696,6 +1712,10 @@ static int reiserfs_fill_super(struct su
+ 		goto error;
+ 	}
+ 
++	/* map mount option tagxid */
++	if (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TAGGED))
++		s->s_flags |= MS_TAGGED;
++
+ 	rs = SB_DISK_SUPER_BLOCK(s);
+ 	/* Let's do basic sanity check to verify that underlying device is not
+ 	   smaller than the filesystem. If the check fails then abort and scream,
+diff -NurpP --minimal linux-3.2.34/fs/reiserfs/xattr.c linux-3.2.34-vs2.3.2.15/fs/reiserfs/xattr.c
+--- linux-3.2.34/fs/reiserfs/xattr.c	2011-10-24 18:45:27.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/reiserfs/xattr.c	2011-12-05 19:33:02.000000000 +0100
+@@ -40,6 +40,7 @@
+ #include <linux/errno.h>
+ #include <linux/gfp.h>
+ #include <linux/fs.h>
++#include <linux/mount.h>
+ #include <linux/file.h>
+ #include <linux/pagemap.h>
+ #include <linux/xattr.h>
+diff -NurpP --minimal linux-3.2.34/fs/stat.c linux-3.2.34-vs2.3.2.15/fs/stat.c
+--- linux-3.2.34/fs/stat.c	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/stat.c	2012-10-22 12:59:52.000000000 +0200
+@@ -26,6 +26,7 @@ void generic_fillattr(struct inode *inod
+ 	stat->nlink = inode->i_nlink;
+ 	stat->uid = inode->i_uid;
+ 	stat->gid = inode->i_gid;
++	stat->tag = inode->i_tag;
+ 	stat->rdev = inode->i_rdev;
+ 	stat->size = i_size_read(inode);
+ 	stat->atime = inode->i_atime;
+diff -NurpP --minimal linux-3.2.34/fs/statfs.c linux-3.2.34-vs2.3.2.15/fs/statfs.c
+--- linux-3.2.34/fs/statfs.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/statfs.c	2011-12-05 19:33:02.000000000 +0100
+@@ -7,6 +7,8 @@
+ #include <linux/statfs.h>
+ #include <linux/security.h>
+ #include <linux/uaccess.h>
++#include <linux/vs_base.h>
++#include <linux/vs_dlimit.h>
+ 
+ static int flags_by_mnt(int mnt_flags)
+ {
+@@ -59,6 +61,8 @@ int statfs_by_dentry(struct dentry *dent
+ 	retval = dentry->d_sb->s_op->statfs(dentry, buf);
+ 	if (retval == 0 && buf->f_frsize == 0)
+ 		buf->f_frsize = buf->f_bsize;
++	if (!vx_check(0, VS_ADMIN|VS_WATCH))
++		vx_vsi_statfs(dentry->d_sb, buf);
+ 	return retval;
+ }
+ 
+diff -NurpP --minimal linux-3.2.34/fs/super.c linux-3.2.34-vs2.3.2.15/fs/super.c
+--- linux-3.2.34/fs/super.c	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/super.c	2012-06-14 20:45:24.000000000 +0200
+@@ -32,6 +32,9 @@
+ #include <linux/backing-dev.h>
+ #include <linux/rculist_bl.h>
+ #include <linux/cleancache.h>
++#include <linux/devpts_fs.h>
++#include <linux/proc_fs.h>
++#include <linux/vs_context.h>
+ #include "internal.h"
+ 
+ 
+@@ -1100,6 +1103,13 @@ mount_fs(struct file_system_type *type, 
+ 	WARN_ON(sb->s_bdi == &default_backing_dev_info);
+ 	sb->s_flags |= MS_BORN;
+ 
++	error = -EPERM;
++	if (!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT) &&
++		!sb->s_bdev &&
++		(sb->s_magic != PROC_SUPER_MAGIC) &&
++		(sb->s_magic != DEVPTS_SUPER_MAGIC))
++		goto out_sb;
++
+ 	error = security_sb_kern_mount(sb, flags, secdata);
+ 	if (error)
+ 		goto out_sb;
+diff -NurpP --minimal linux-3.2.34/fs/sysfs/mount.c linux-3.2.34-vs2.3.2.15/fs/sysfs/mount.c
+--- linux-3.2.34/fs/sysfs/mount.c	2011-07-22 11:18:06.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/sysfs/mount.c	2011-12-05 19:33:02.000000000 +0100
+@@ -47,7 +47,7 @@ static int sysfs_fill_super(struct super
+ 
+ 	sb->s_blocksize = PAGE_CACHE_SIZE;
+ 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+-	sb->s_magic = SYSFS_MAGIC;
++	sb->s_magic = SYSFS_SUPER_MAGIC;
+ 	sb->s_op = &sysfs_ops;
+ 	sb->s_time_gran = 1;
+ 
+diff -NurpP --minimal linux-3.2.34/fs/utimes.c linux-3.2.34-vs2.3.2.15/fs/utimes.c
+--- linux-3.2.34/fs/utimes.c	2011-05-22 16:17:54.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/utimes.c	2011-12-05 19:33:02.000000000 +0100
+@@ -8,6 +8,8 @@
+ #include <linux/stat.h>
+ #include <linux/utime.h>
+ #include <linux/syscalls.h>
++#include <linux/mount.h>
++#include <linux/vs_cowbl.h>
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+ 
+@@ -52,12 +54,18 @@ static int utimes_common(struct path *pa
+ {
+ 	int error;
+ 	struct iattr newattrs;
+-	struct inode *inode = path->dentry->d_inode;
++	struct inode *inode;
+ 
+ 	error = mnt_want_write(path->mnt);
+ 	if (error)
+ 		goto out;
+ 
++	error = cow_check_and_break(path);
++	if (error)
++		goto mnt_drop_write_and_out;
++
++	inode = path->dentry->d_inode;
++
+ 	if (times && times[0].tv_nsec == UTIME_NOW &&
+ 		     times[1].tv_nsec == UTIME_NOW)
+ 		times = NULL;
+diff -NurpP --minimal linux-3.2.34/fs/xattr.c linux-3.2.34-vs2.3.2.15/fs/xattr.c
+--- linux-3.2.34/fs/xattr.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/xattr.c	2011-12-05 19:33:02.000000000 +0100
+@@ -19,6 +19,7 @@
+ #include <linux/module.h>
+ #include <linux/fsnotify.h>
+ #include <linux/audit.h>
++#include <linux/mount.h>
+ #include <asm/uaccess.h>
+ 
+ 
+@@ -50,7 +51,7 @@ xattr_permission(struct inode *inode, co
+ 	 * The trusted.* namespace can only be accessed by privileged users.
+ 	 */
+ 	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
+-		if (!capable(CAP_SYS_ADMIN))
++		if (!vx_capable(CAP_SYS_ADMIN, VXC_FS_TRUSTED))
+ 			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
+ 		return 0;
+ 	}
+diff -NurpP --minimal linux-3.2.34/fs/xfs/xfs_dinode.h linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_dinode.h
+--- linux-3.2.34/fs/xfs/xfs_dinode.h	2011-10-24 18:45:31.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_dinode.h	2011-12-05 19:33:02.000000000 +0100
+@@ -51,7 +51,9 @@ typedef struct xfs_dinode {
+ 	__be32		di_nlink;	/* number of links to file */
+ 	__be16		di_projid_lo;	/* lower part of owner's project id */
+ 	__be16		di_projid_hi;	/* higher part owner's project id */
+-	__u8		di_pad[6];	/* unused, zeroed space */
++	__u8		di_pad[2];	/* unused, zeroed space */
++	__be16		di_tag;		/* context tagging */
++	__be16		di_vflags;	/* vserver specific flags */
+ 	__be16		di_flushiter;	/* incremented on flush */
+ 	xfs_timestamp_t	di_atime;	/* time last accessed */
+ 	xfs_timestamp_t	di_mtime;	/* time last modified */
+@@ -184,6 +186,8 @@ static inline void xfs_dinode_put_rdev(s
+ #define XFS_DIFLAG_EXTSZINHERIT_BIT 12	/* inherit inode extent size */
+ #define XFS_DIFLAG_NODEFRAG_BIT     13	/* do not reorganize/defragment */
+ #define XFS_DIFLAG_FILESTREAM_BIT   14  /* use filestream allocator */
++#define XFS_DIFLAG_IXUNLINK_BIT     15	/* Immutable inver on unlink */
++
+ #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
+ #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
+ #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
+@@ -199,6 +203,7 @@ static inline void xfs_dinode_put_rdev(s
+ #define XFS_DIFLAG_EXTSZINHERIT  (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
+ #define XFS_DIFLAG_NODEFRAG      (1 << XFS_DIFLAG_NODEFRAG_BIT)
+ #define XFS_DIFLAG_FILESTREAM    (1 << XFS_DIFLAG_FILESTREAM_BIT)
++#define XFS_DIFLAG_IXUNLINK      (1 << XFS_DIFLAG_IXUNLINK_BIT)
+ 
+ #ifdef CONFIG_XFS_RT
+ #define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME)
+@@ -211,6 +216,10 @@ static inline void xfs_dinode_put_rdev(s
+ 	 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
+ 	 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
+ 	 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
+-	 XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
++	 XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM | \
++	 XFS_DIFLAG_IXUNLINK)
++
++#define XFS_DIVFLAG_BARRIER	0x01
++#define XFS_DIVFLAG_COW		0x02
+ 
+ #endif	/* __XFS_DINODE_H__ */
+diff -NurpP --minimal linux-3.2.34/fs/xfs/xfs_fs.h linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_fs.h
+--- linux-3.2.34/fs/xfs/xfs_fs.h	2011-10-24 18:45:31.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_fs.h	2011-12-05 19:33:02.000000000 +0100
+@@ -67,6 +67,9 @@ struct fsxattr {
+ #define XFS_XFLAG_EXTSZINHERIT	0x00001000	/* inherit inode extent size */
+ #define XFS_XFLAG_NODEFRAG	0x00002000  	/* do not defragment */
+ #define XFS_XFLAG_FILESTREAM	0x00004000	/* use filestream allocator */
++#define XFS_XFLAG_IXUNLINK	0x00008000	/* immutable invert on unlink */
++#define XFS_XFLAG_BARRIER	0x10000000	/* chroot() barrier */
++#define XFS_XFLAG_COW		0x20000000	/* copy on write mark */
+ #define XFS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this	*/
+ 
+ /*
+@@ -302,7 +305,8 @@ typedef struct xfs_bstat {
+ #define	bs_projid	bs_projid_lo	/* (previously just bs_projid)	*/
+ 	__u16		bs_forkoff;	/* inode fork offset in bytes	*/
+ 	__u16		bs_projid_hi;	/* higher part of project id	*/
+-	unsigned char	bs_pad[10];	/* pad space, unused		*/
++	unsigned char	bs_pad[8];	/* pad space, unused		*/
++	__u16		bs_tag;		/* context tagging		*/
+ 	__u32		bs_dmevmask;	/* DMIG event mask		*/
+ 	__u16		bs_dmstate;	/* DMIG state info		*/
+ 	__u16		bs_aextents;	/* attribute number of extents	*/
+diff -NurpP --minimal linux-3.2.34/fs/xfs/xfs_ialloc.c linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_ialloc.c
+--- linux-3.2.34/fs/xfs/xfs_ialloc.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_ialloc.c	2011-12-05 19:33:02.000000000 +0100
+@@ -37,7 +37,6 @@
+ #include "xfs_error.h"
+ #include "xfs_bmap.h"
+ 
+-
+ /*
+  * Allocation group level functions.
+  */
+diff -NurpP --minimal linux-3.2.34/fs/xfs/xfs_inode.c linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_inode.c
+--- linux-3.2.34/fs/xfs/xfs_inode.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_inode.c	2011-12-15 01:11:32.000000000 +0100
+@@ -236,6 +236,7 @@ xfs_inotobp(
+ 	return 0;
+ }
+ 
++#include <linux/vs_tag.h>
+ 
+ /*
+  * This routine is called to map an inode to the buffer containing
+@@ -634,15 +635,25 @@ xfs_iformat_btree(
+ STATIC void
+ xfs_dinode_from_disk(
+ 	xfs_icdinode_t		*to,
+-	xfs_dinode_t		*from)
++	xfs_dinode_t		*from,
++	int tagged)
+ {
++	uint32_t uid, gid, tag;
++
+ 	to->di_magic = be16_to_cpu(from->di_magic);
+ 	to->di_mode = be16_to_cpu(from->di_mode);
+ 	to->di_version = from ->di_version;
+ 	to->di_format = from->di_format;
+ 	to->di_onlink = be16_to_cpu(from->di_onlink);
+-	to->di_uid = be32_to_cpu(from->di_uid);
+-	to->di_gid = be32_to_cpu(from->di_gid);
++
++	uid = be32_to_cpu(from->di_uid);
++	gid = be32_to_cpu(from->di_gid);
++	tag = be16_to_cpu(from->di_tag);
++
++	to->di_uid = INOTAG_UID(tagged, uid, gid);
++	to->di_gid = INOTAG_GID(tagged, uid, gid);
++	to->di_tag = INOTAG_TAG(tagged, uid, gid, tag);
++
+ 	to->di_nlink = be32_to_cpu(from->di_nlink);
+ 	to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
+ 	to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
+@@ -664,21 +675,26 @@ xfs_dinode_from_disk(
+ 	to->di_dmevmask	= be32_to_cpu(from->di_dmevmask);
+ 	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
+ 	to->di_flags	= be16_to_cpu(from->di_flags);
++	to->di_vflags	= be16_to_cpu(from->di_vflags);
+ 	to->di_gen	= be32_to_cpu(from->di_gen);
+ }
+ 
+ void
+ xfs_dinode_to_disk(
+ 	xfs_dinode_t		*to,
+-	xfs_icdinode_t		*from)
++	xfs_icdinode_t		*from,
++	int tagged)
+ {
+ 	to->di_magic = cpu_to_be16(from->di_magic);
+ 	to->di_mode = cpu_to_be16(from->di_mode);
+ 	to->di_version = from ->di_version;
+ 	to->di_format = from->di_format;
+ 	to->di_onlink = cpu_to_be16(from->di_onlink);
+-	to->di_uid = cpu_to_be32(from->di_uid);
+-	to->di_gid = cpu_to_be32(from->di_gid);
++
++	to->di_uid = cpu_to_be32(TAGINO_UID(tagged, from->di_uid, from->di_tag));
++	to->di_gid = cpu_to_be32(TAGINO_GID(tagged, from->di_gid, from->di_tag));
++	to->di_tag = cpu_to_be16(TAGINO_TAG(tagged, from->di_tag));
++
+ 	to->di_nlink = cpu_to_be32(from->di_nlink);
+ 	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
+ 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
+@@ -700,12 +716,14 @@ xfs_dinode_to_disk(
+ 	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
+ 	to->di_dmstate = cpu_to_be16(from->di_dmstate);
+ 	to->di_flags = cpu_to_be16(from->di_flags);
++	to->di_vflags = cpu_to_be16(from->di_vflags);
+ 	to->di_gen = cpu_to_be32(from->di_gen);
+ }
+ 
+ STATIC uint
+ _xfs_dic2xflags(
+-	__uint16_t		di_flags)
++	__uint16_t		di_flags,
++	__uint16_t		di_vflags)
+ {
+ 	uint			flags = 0;
+ 
+@@ -716,6 +734,8 @@ _xfs_dic2xflags(
+ 			flags |= XFS_XFLAG_PREALLOC;
+ 		if (di_flags & XFS_DIFLAG_IMMUTABLE)
+ 			flags |= XFS_XFLAG_IMMUTABLE;
++		if (di_flags & XFS_DIFLAG_IXUNLINK)
++			flags |= XFS_XFLAG_IXUNLINK;
+ 		if (di_flags & XFS_DIFLAG_APPEND)
+ 			flags |= XFS_XFLAG_APPEND;
+ 		if (di_flags & XFS_DIFLAG_SYNC)
+@@ -740,6 +760,10 @@ _xfs_dic2xflags(
+ 			flags |= XFS_XFLAG_FILESTREAM;
+ 	}
+ 
++	if (di_vflags & XFS_DIVFLAG_BARRIER)
++		flags |= FS_BARRIER_FL;
++	if (di_vflags & XFS_DIVFLAG_COW)
++		flags |= FS_COW_FL;
+ 	return flags;
+ }
+ 
+@@ -749,7 +773,7 @@ xfs_ip2xflags(
+ {
+ 	xfs_icdinode_t		*dic = &ip->i_d;
+ 
+-	return _xfs_dic2xflags(dic->di_flags) |
++	return _xfs_dic2xflags(dic->di_flags, dic->di_vflags) |
+ 				(XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
+ }
+ 
+@@ -757,7 +781,8 @@ uint
+ xfs_dic2xflags(
+ 	xfs_dinode_t		*dip)
+ {
+-	return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
++	return _xfs_dic2xflags(be16_to_cpu(dip->di_flags),
++				be16_to_cpu(dip->di_vflags)) |
+ 				(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
+ }
+ 
+@@ -790,7 +815,6 @@ xfs_iread(
+ 	if (error)
+ 		return error;
+ 	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
+-
+ 	/*
+ 	 * If we got something that isn't an inode it means someone
+ 	 * (nfs or dmi) has a stale handle.
+@@ -813,7 +837,8 @@ xfs_iread(
+ 	 * Otherwise, just get the truly permanent information.
+ 	 */
+ 	if (dip->di_mode) {
+-		xfs_dinode_from_disk(&ip->i_d, dip);
++		xfs_dinode_from_disk(&ip->i_d, dip,
++			mp->m_flags & XFS_MOUNT_TAGGED);
+ 		error = xfs_iformat(ip, dip);
+ 		if (error)  {
+ #ifdef DEBUG
+@@ -1008,6 +1033,7 @@ xfs_ialloc(
+ 	ASSERT(ip->i_d.di_nlink == nlink);
+ 	ip->i_d.di_uid = current_fsuid();
+ 	ip->i_d.di_gid = current_fsgid();
++	ip->i_d.di_tag = current_fstag(&ip->i_vnode);
+ 	xfs_set_projid(ip, prid);
+ 	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
+ 
+@@ -1068,6 +1094,7 @@ xfs_ialloc(
+ 	ip->i_d.di_dmevmask = 0;
+ 	ip->i_d.di_dmstate = 0;
+ 	ip->i_d.di_flags = 0;
++	ip->i_d.di_vflags = 0;
+ 	flags = XFS_ILOG_CORE;
+ 	switch (mode & S_IFMT) {
+ 	case S_IFIFO:
+@@ -1842,6 +1869,7 @@ xfs_ifree(
+ 	}
+ 	ip->i_d.di_mode = 0;		/* mark incore inode as free */
+ 	ip->i_d.di_flags = 0;
++	ip->i_d.di_vflags = 0;
+ 	ip->i_d.di_dmevmask = 0;
+ 	ip->i_d.di_forkoff = 0;		/* mark the attr fork not in use */
+ 	ip->i_df.if_ext_max =
+@@ -2723,7 +2751,8 @@ xfs_iflush_int(
+ 	 * because if the inode is dirty at all the core must
+ 	 * be.
+ 	 */
+-	xfs_dinode_to_disk(dip, &ip->i_d);
++	xfs_dinode_to_disk(dip, &ip->i_d,
++		mp->m_flags & XFS_MOUNT_TAGGED);
+ 
+ 	/* Wrap, we never let the log put out DI_MAX_FLUSH */
+ 	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
+diff -NurpP --minimal linux-3.2.34/fs/xfs/xfs_inode.h linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_inode.h
+--- linux-3.2.34/fs/xfs/xfs_inode.h	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_inode.h	2011-12-15 01:11:32.000000000 +0100
+@@ -135,7 +135,9 @@ typedef struct xfs_icdinode {
+ 	__uint32_t	di_nlink;	/* number of links to file */
+ 	__uint16_t	di_projid_lo;	/* lower part of owner's project id */
+ 	__uint16_t	di_projid_hi;	/* higher part of owner's project id */
+-	__uint8_t	di_pad[6];	/* unused, zeroed space */
++	__uint8_t	di_pad[2];	/* unused, zeroed space */
++	__uint16_t	di_tag;		/* context tagging */
++	__uint16_t	di_vflags;	/* vserver specific flags */
+ 	__uint16_t	di_flushiter;	/* incremented on flush */
+ 	xfs_ictimestamp_t di_atime;	/* time last accessed */
+ 	xfs_ictimestamp_t di_mtime;	/* time last modified */
+@@ -536,7 +538,7 @@ int		xfs_itobp(struct xfs_mount *, struc
+ int		xfs_iread(struct xfs_mount *, struct xfs_trans *,
+ 			  struct xfs_inode *, uint);
+ void		xfs_dinode_to_disk(struct xfs_dinode *,
+-				   struct xfs_icdinode *);
++				   struct xfs_icdinode *, int);
+ void		xfs_idestroy_fork(struct xfs_inode *, int);
+ void		xfs_idata_realloc(struct xfs_inode *, int, int);
+ void		xfs_iroot_realloc(struct xfs_inode *, int, int);
+diff -NurpP --minimal linux-3.2.34/fs/xfs/xfs_ioctl.c linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_ioctl.c
+--- linux-3.2.34/fs/xfs/xfs_ioctl.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_ioctl.c	2011-12-05 19:33:02.000000000 +0100
+@@ -28,7 +28,7 @@
+ #include "xfs_bmap_btree.h"
+ #include "xfs_dinode.h"
+ #include "xfs_inode.h"
+-#include "xfs_ioctl.h"
++// #include "xfs_ioctl.h"
+ #include "xfs_rtalloc.h"
+ #include "xfs_itable.h"
+ #include "xfs_error.h"
+@@ -748,6 +748,10 @@ xfs_merge_ioc_xflags(
+ 		xflags |= XFS_XFLAG_IMMUTABLE;
+ 	else
+ 		xflags &= ~XFS_XFLAG_IMMUTABLE;
++	if (flags & FS_IXUNLINK_FL)
++		xflags |= XFS_XFLAG_IXUNLINK;
++	else
++		xflags &= ~XFS_XFLAG_IXUNLINK;
+ 	if (flags & FS_APPEND_FL)
+ 		xflags |= XFS_XFLAG_APPEND;
+ 	else
+@@ -776,6 +780,8 @@ xfs_di2lxflags(
+ 
+ 	if (di_flags & XFS_DIFLAG_IMMUTABLE)
+ 		flags |= FS_IMMUTABLE_FL;
++	if (di_flags & XFS_DIFLAG_IXUNLINK)
++		flags |= FS_IXUNLINK_FL;
+ 	if (di_flags & XFS_DIFLAG_APPEND)
+ 		flags |= FS_APPEND_FL;
+ 	if (di_flags & XFS_DIFLAG_SYNC)
+@@ -836,6 +842,8 @@ xfs_set_diflags(
+ 	di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
+ 	if (xflags & XFS_XFLAG_IMMUTABLE)
+ 		di_flags |= XFS_DIFLAG_IMMUTABLE;
++	if (xflags & XFS_XFLAG_IXUNLINK)
++		di_flags |= XFS_DIFLAG_IXUNLINK;
+ 	if (xflags & XFS_XFLAG_APPEND)
+ 		di_flags |= XFS_DIFLAG_APPEND;
+ 	if (xflags & XFS_XFLAG_SYNC)
+@@ -878,6 +886,10 @@ xfs_diflags_to_linux(
+ 		inode->i_flags |= S_IMMUTABLE;
+ 	else
+ 		inode->i_flags &= ~S_IMMUTABLE;
++	if (xflags & XFS_XFLAG_IXUNLINK)
++		inode->i_flags |= S_IXUNLINK;
++	else
++		inode->i_flags &= ~S_IXUNLINK;
+ 	if (xflags & XFS_XFLAG_APPEND)
+ 		inode->i_flags |= S_APPEND;
+ 	else
+@@ -1370,10 +1382,18 @@ xfs_file_ioctl(
+ 	case XFS_IOC_FSGETXATTRA:
+ 		return xfs_ioc_fsgetxattr(ip, 1, arg);
+ 	case XFS_IOC_FSSETXATTR:
++		if (IS_BARRIER(inode)) {
++			vxwprintk_task(1, "messing with the barrier.");
++			return -XFS_ERROR(EACCES);
++		}
+ 		return xfs_ioc_fssetxattr(ip, filp, arg);
+ 	case XFS_IOC_GETXFLAGS:
+ 		return xfs_ioc_getxflags(ip, arg);
+ 	case XFS_IOC_SETXFLAGS:
++		if (IS_BARRIER(inode)) {
++			vxwprintk_task(1, "messing with the barrier.");
++			return -XFS_ERROR(EACCES);
++		}
+ 		return xfs_ioc_setxflags(ip, filp, arg);
+ 
+ 	case XFS_IOC_FSSETDM: {
+diff -NurpP --minimal linux-3.2.34/fs/xfs/xfs_ioctl.h linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_ioctl.h
+--- linux-3.2.34/fs/xfs/xfs_ioctl.h	2011-10-24 18:45:31.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_ioctl.h	2011-12-05 19:33:02.000000000 +0100
+@@ -70,6 +70,12 @@ xfs_handle_to_dentry(
+ 	void __user		*uhandle,
+ 	u32			hlen);
+ 
++extern int
++xfs_sync_flags(
++	struct inode		*inode,
++	int			flags,
++	int			vflags);
++
+ extern long
+ xfs_file_ioctl(
+ 	struct file		*filp,
+diff -NurpP --minimal linux-3.2.34/fs/xfs/xfs_iops.c linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_iops.c
+--- linux-3.2.34/fs/xfs/xfs_iops.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_iops.c	2011-12-05 19:33:02.000000000 +0100
+@@ -30,6 +30,7 @@
+ #include "xfs_bmap_btree.h"
+ #include "xfs_dinode.h"
+ #include "xfs_inode.h"
++#include "xfs_ioctl.h"
+ #include "xfs_bmap.h"
+ #include "xfs_rtalloc.h"
+ #include "xfs_error.h"
+@@ -49,6 +50,7 @@
+ #include <linux/security.h>
+ #include <linux/fiemap.h>
+ #include <linux/slab.h>
++#include <linux/vs_tag.h>
+ 
+ /*
+  * Bring the timestamps in the XFS inode uptodate.
+@@ -474,6 +476,7 @@ xfs_vn_getattr(
+ 	stat->nlink = ip->i_d.di_nlink;
+ 	stat->uid = ip->i_d.di_uid;
+ 	stat->gid = ip->i_d.di_gid;
++	stat->tag = ip->i_d.di_tag;
+ 	stat->ino = ip->i_ino;
+ 	stat->atime = inode->i_atime;
+ 	stat->mtime = inode->i_mtime;
+@@ -1039,6 +1042,7 @@ static const struct inode_operations xfs
+ 	.removexattr		= generic_removexattr,
+ 	.listxattr		= xfs_vn_listxattr,
+ 	.fiemap			= xfs_vn_fiemap,
++	.sync_flags		= xfs_sync_flags,
+ };
+ 
+ static const struct inode_operations xfs_dir_inode_operations = {
+@@ -1064,6 +1068,7 @@ static const struct inode_operations xfs
+ 	.getxattr		= generic_getxattr,
+ 	.removexattr		= generic_removexattr,
+ 	.listxattr		= xfs_vn_listxattr,
++	.sync_flags		= xfs_sync_flags,
+ };
+ 
+ static const struct inode_operations xfs_dir_ci_inode_operations = {
+@@ -1113,6 +1118,10 @@ xfs_diflags_to_iflags(
+ 		inode->i_flags |= S_IMMUTABLE;
+ 	else
+ 		inode->i_flags &= ~S_IMMUTABLE;
++	if (ip->i_d.di_flags & XFS_DIFLAG_IXUNLINK)
++		inode->i_flags |= S_IXUNLINK;
++	else
++		inode->i_flags &= ~S_IXUNLINK;
+ 	if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+ 		inode->i_flags |= S_APPEND;
+ 	else
+@@ -1125,6 +1134,15 @@ xfs_diflags_to_iflags(
+ 		inode->i_flags |= S_NOATIME;
+ 	else
+ 		inode->i_flags &= ~S_NOATIME;
++
++	if (ip->i_d.di_vflags & XFS_DIVFLAG_BARRIER)
++		inode->i_vflags |= V_BARRIER;
++	else
++		inode->i_vflags &= ~V_BARRIER;
++	if (ip->i_d.di_vflags & XFS_DIVFLAG_COW)
++		inode->i_vflags |= V_COW;
++	else
++		inode->i_vflags &= ~V_COW;
+ }
+ 
+ /*
+@@ -1156,6 +1174,7 @@ xfs_setup_inode(
+ 	set_nlink(inode, ip->i_d.di_nlink);
+ 	inode->i_uid	= ip->i_d.di_uid;
+ 	inode->i_gid	= ip->i_d.di_gid;
++	inode->i_tag    = ip->i_d.di_tag;
+ 
+ 	switch (inode->i_mode & S_IFMT) {
+ 	case S_IFBLK:
+diff -NurpP --minimal linux-3.2.34/fs/xfs/xfs_itable.c linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_itable.c
+--- linux-3.2.34/fs/xfs/xfs_itable.c	2011-05-22 16:17:54.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_itable.c	2011-12-05 19:33:02.000000000 +0100
+@@ -98,6 +98,7 @@ xfs_bulkstat_one_int(
+ 	buf->bs_mode = dic->di_mode;
+ 	buf->bs_uid = dic->di_uid;
+ 	buf->bs_gid = dic->di_gid;
++	buf->bs_tag = dic->di_tag;
+ 	buf->bs_size = dic->di_size;
+ 
+ 	/*
+diff -NurpP --minimal linux-3.2.34/fs/xfs/xfs_linux.h linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_linux.h
+--- linux-3.2.34/fs/xfs/xfs_linux.h	2011-10-24 18:45:31.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_linux.h	2011-12-05 19:33:02.000000000 +0100
+@@ -121,6 +121,7 @@
+ 
+ #define current_cpu()		(raw_smp_processor_id())
+ #define current_pid()		(current->pid)
++#define current_fstag(vp)	(dx_current_fstag((vp)->i_sb))
+ #define current_test_flags(f)	(current->flags & (f))
+ #define current_set_flags_nested(sp, f)		\
+ 		(*(sp) = current->flags, current->flags |= (f))
+diff -NurpP --minimal linux-3.2.34/fs/xfs/xfs_log_recover.c linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_log_recover.c
+--- linux-3.2.34/fs/xfs/xfs_log_recover.c	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_log_recover.c	2012-11-18 21:11:16.000000000 +0100
+@@ -2344,7 +2344,8 @@ xlog_recover_inode_pass2(
+ 	}
+ 
+ 	/* The core is in in-core format */
+-	xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr);
++	xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr,
++		mp->m_flags & XFS_MOUNT_TAGGED);
+ 
+ 	/* the rest is in on-disk format */
+ 	if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
+diff -NurpP --minimal linux-3.2.34/fs/xfs/xfs_mount.h linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_mount.h
+--- linux-3.2.34/fs/xfs/xfs_mount.h	2011-10-24 18:45:31.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_mount.h	2011-12-05 19:33:02.000000000 +0100
+@@ -249,6 +249,7 @@ typedef struct xfs_mount {
+ 						   allocator */
+ #define XFS_MOUNT_NOATTR2	(1ULL << 25)	/* disable use of attr2 format */
+ 
++#define XFS_MOUNT_TAGGED	(1ULL << 31)	/* context tagging */
+ 
+ /*
+  * Default minimum read and write sizes.
+diff -NurpP --minimal linux-3.2.34/fs/xfs/xfs_super.c linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_super.c
+--- linux-3.2.34/fs/xfs/xfs_super.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_super.c	2012-01-09 16:19:31.000000000 +0100
+@@ -113,6 +113,9 @@ mempool_t *xfs_ioend_pool;
+ #define MNTOPT_NODELAYLOG  "nodelaylog"	/* Delayed logging disabled */
+ #define MNTOPT_DISCARD	   "discard"	/* Discard unused blocks */
+ #define MNTOPT_NODISCARD   "nodiscard"	/* Do not discard unused blocks */
++#define MNTOPT_TAGXID	"tagxid"	/* context tagging for inodes */
++#define MNTOPT_TAGGED	"tag"		/* context tagging for inodes */
++#define MNTOPT_NOTAGTAG	"notag"		/* do not use context tagging */
+ 
+ /*
+  * Table driven mount option parser.
+@@ -121,10 +124,14 @@ mempool_t *xfs_ioend_pool;
+  * in the future, too.
+  */
+ enum {
++	Opt_tag, Opt_notag,
+ 	Opt_barrier, Opt_nobarrier, Opt_err
+ };
+ 
+ static const match_table_t tokens = {
++	{Opt_tag, "tagxid"},
++	{Opt_tag, "tag"},
++	{Opt_notag, "notag"},
+ 	{Opt_barrier, "barrier"},
+ 	{Opt_nobarrier, "nobarrier"},
+ 	{Opt_err, NULL}
+@@ -374,6 +381,19 @@ xfs_parseargs(
+ 		} else if (!strcmp(this_char, "irixsgid")) {
+ 			xfs_warn(mp,
+ 	"irixsgid is now a sysctl(2) variable, option is deprecated.");
++#ifndef CONFIG_TAGGING_NONE
++		} else if (!strcmp(this_char, MNTOPT_TAGGED)) {
++			mp->m_flags |= XFS_MOUNT_TAGGED;
++		} else if (!strcmp(this_char, MNTOPT_NOTAGTAG)) {
++			mp->m_flags &= ~XFS_MOUNT_TAGGED;
++		} else if (!strcmp(this_char, MNTOPT_TAGXID)) {
++			mp->m_flags |= XFS_MOUNT_TAGGED;
++#endif
++#ifdef CONFIG_PROPAGATE
++		} else if (!strcmp(this_char, MNTOPT_TAGGED)) {
++			/* use value */
++			mp->m_flags |= XFS_MOUNT_TAGGED;
++#endif
+ 		} else {
+ 			xfs_warn(mp, "unknown mount option [%s].", this_char);
+ 			return EINVAL;
+@@ -1138,6 +1158,16 @@ xfs_fs_remount(
+ 		case Opt_nobarrier:
+ 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
+ 			break;
++		case Opt_tag:
++			if (!(sb->s_flags & MS_TAGGED)) {
++				printk(KERN_INFO
++					"XFS: %s: tagging not permitted on remount.\n",
++					sb->s_id);
++				return -EINVAL;
++			}
++			break;
++		case Opt_notag:
++			break;
+ 		default:
+ 			/*
+ 			 * Logically we would return an error here to prevent
+@@ -1353,6 +1383,9 @@ xfs_fs_fill_super(
+ 	if (error)
+ 		goto out_free_sb;
+ 
++	if (mp->m_flags & XFS_MOUNT_TAGGED)
++		sb->s_flags |= MS_TAGGED;
++
+ 	/*
+ 	 * we must configure the block size in the superblock before we run the
+ 	 * full mount process as the mount process can lookup and cache inodes.
+diff -NurpP --minimal linux-3.2.34/fs/xfs/xfs_vnodeops.c linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_vnodeops.c
+--- linux-3.2.34/fs/xfs/xfs_vnodeops.c	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/fs/xfs/xfs_vnodeops.c	2012-03-14 10:19:18.000000000 +0100
+@@ -106,6 +106,77 @@ xfs_readlink_bmap(
+ 	return error;
+ }
+ 
++
++STATIC void
++xfs_get_inode_flags(
++	xfs_inode_t	*ip)
++{
++	struct inode 	*inode = VFS_I(ip);
++	unsigned int 	flags = inode->i_flags;
++	unsigned int 	vflags = inode->i_vflags;
++
++	if (flags & S_IMMUTABLE)
++		ip->i_d.di_flags |= XFS_DIFLAG_IMMUTABLE;
++	else
++		ip->i_d.di_flags &= ~XFS_DIFLAG_IMMUTABLE;
++	if (flags & S_IXUNLINK)
++		ip->i_d.di_flags |= XFS_DIFLAG_IXUNLINK;
++	else
++		ip->i_d.di_flags &= ~XFS_DIFLAG_IXUNLINK;
++
++	if (vflags & V_BARRIER)
++		ip->i_d.di_vflags |= XFS_DIVFLAG_BARRIER;
++	else
++		ip->i_d.di_vflags &= ~XFS_DIVFLAG_BARRIER;
++	if (vflags & V_COW)
++		ip->i_d.di_vflags |= XFS_DIVFLAG_COW;
++	else
++		ip->i_d.di_vflags &= ~XFS_DIVFLAG_COW;
++}
++
++int
++xfs_sync_flags(
++	struct inode		*inode,
++	int			flags,
++	int			vflags)
++{
++	struct xfs_inode	*ip = XFS_I(inode);
++	struct xfs_mount	*mp = ip->i_mount;
++	struct xfs_trans        *tp;
++	unsigned int		lock_flags = 0;
++	int			code;
++
++	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
++	code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
++	if (code)
++		goto error_out;
++
++	xfs_ilock(ip, XFS_ILOCK_EXCL);
++	xfs_trans_ijoin(tp, ip, 0);
++
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	xfs_get_inode_flags(ip);
++
++	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
++	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
++
++	XFS_STATS_INC(xs_ig_attrchg);
++
++	if (mp->m_flags & XFS_MOUNT_WSYNC)
++		xfs_trans_set_sync(tp);
++	code = xfs_trans_commit(tp, 0);
++	xfs_iunlock(ip, XFS_ILOCK_EXCL);
++	return code;
++
++error_out:
++	xfs_trans_cancel(tp, 0);
++	if (lock_flags)
++		xfs_iunlock(ip, XFS_ILOCK_EXCL);
++	return code;
++}
++
++
+ int
+ xfs_readlink(
+ 	xfs_inode_t     *ip,
+diff -NurpP --minimal linux-3.2.34/include/linux/Kbuild linux-3.2.34-vs2.3.2.15/include/linux/Kbuild
+--- linux-3.2.34/include/linux/Kbuild	2012-11-18 18:42:22.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/Kbuild	2012-08-13 12:40:51.000000000 +0200
+@@ -17,6 +17,7 @@ header-y += netfilter_bridge/
+ header-y += netfilter_ipv4/
+ header-y += netfilter_ipv6/
+ header-y += usb/
++header-y += vserver/
+ header-y += wimax/
+ 
+ objhdr-y += version.h
+diff -NurpP --minimal linux-3.2.34/include/linux/capability.h linux-3.2.34-vs2.3.2.15/include/linux/capability.h
+--- linux-3.2.34/include/linux/capability.h	2012-01-09 16:14:56.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/capability.h	2011-12-05 19:33:02.000000000 +0100
+@@ -280,6 +280,7 @@ struct cpu_vfs_cap_data {
+    arbitrary SCSI commands */
+ /* Allow setting encryption key on loopback filesystem */
+ /* Allow setting zone reclaim policy */
++/* Allow the selection of a security context */
+ 
+ #define CAP_SYS_ADMIN        21
+ 
+@@ -363,7 +364,12 @@ struct cpu_vfs_cap_data {
+ 
+ #define CAP_LAST_CAP         CAP_WAKE_ALARM
+ 
+-#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
++/* Allow context manipulations */
++/* Allow changing context info on files */
++
++#define CAP_CONTEXT	     63
++
++#define cap_valid(x) ((x) >= 0 && ((x) <= CAP_LAST_CAP || (x) == CAP_CONTEXT))
+ 
+ /*
+  * Bit location of each capability (used by user-space library and kernel)
+diff -NurpP --minimal linux-3.2.34/include/linux/cred.h linux-3.2.34-vs2.3.2.15/include/linux/cred.h
+--- linux-3.2.34/include/linux/cred.h	2011-10-24 18:45:31.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/linux/cred.h	2011-12-05 19:33:02.000000000 +0100
+@@ -156,6 +156,7 @@ extern void exit_creds(struct task_struc
+ extern int copy_creds(struct task_struct *, unsigned long);
+ extern const struct cred *get_task_cred(struct task_struct *);
+ extern struct cred *cred_alloc_blank(void);
++extern struct cred *__prepare_creds(const struct cred *);
+ extern struct cred *prepare_creds(void);
+ extern struct cred *prepare_exec_creds(void);
+ extern int commit_creds(struct cred *);
+@@ -209,6 +210,31 @@ static inline void validate_process_cred
+ }
+ #endif
+ 
++static inline void set_cred_subscribers(struct cred *cred, int n)
++{
++#ifdef CONFIG_DEBUG_CREDENTIALS
++	atomic_set(&cred->subscribers, n);
++#endif
++}
++
++static inline int read_cred_subscribers(const struct cred *cred)
++{
++#ifdef CONFIG_DEBUG_CREDENTIALS
++	return atomic_read(&cred->subscribers);
++#else
++	return 0;
++#endif
++}
++
++static inline void alter_cred_subscribers(const struct cred *_cred, int n)
++{
++#ifdef CONFIG_DEBUG_CREDENTIALS
++	struct cred *cred = (struct cred *) _cred;
++
++	atomic_add(n, &cred->subscribers);
++#endif
++}
++
+ /**
+  * get_new_cred - Get a reference on a new set of credentials
+  * @cred: The new credentials to reference
+diff -NurpP --minimal linux-3.2.34/include/linux/devpts_fs.h linux-3.2.34-vs2.3.2.15/include/linux/devpts_fs.h
+--- linux-3.2.34/include/linux/devpts_fs.h	2008-12-25 00:26:37.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/devpts_fs.h	2011-12-05 19:33:02.000000000 +0100
+@@ -45,5 +45,4 @@ static inline void devpts_pty_kill(struc
+ 
+ #endif
+ 
+-
+ #endif /* _LINUX_DEVPTS_FS_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/ext2_fs.h linux-3.2.34-vs2.3.2.15/include/linux/ext2_fs.h
+--- linux-3.2.34/include/linux/ext2_fs.h	2012-01-09 16:14:56.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/ext2_fs.h	2011-12-05 19:33:02.000000000 +0100
+@@ -190,8 +190,12 @@ struct ext2_group_desc
+ #define EXT2_NOTAIL_FL			FS_NOTAIL_FL	/* file tail should not be merged */
+ #define EXT2_DIRSYNC_FL			FS_DIRSYNC_FL	/* dirsync behaviour (directories only) */
+ #define EXT2_TOPDIR_FL			FS_TOPDIR_FL	/* Top of directory hierarchies*/
++#define EXT2_IXUNLINK_FL		FS_IXUNLINK_FL	/* Immutable invert on unlink */
+ #define EXT2_RESERVED_FL		FS_RESERVED_FL	/* reserved for ext2 lib */
+ 
++#define EXT2_BARRIER_FL			FS_BARRIER_FL	/* Barrier for chroot() */
++#define EXT2_COW_FL			FS_COW_FL	/* Copy on Write marker */
++
+ #define EXT2_FL_USER_VISIBLE		FS_FL_USER_VISIBLE	/* User visible flags */
+ #define EXT2_FL_USER_MODIFIABLE		FS_FL_USER_MODIFIABLE	/* User modifiable flags */
+ 
+@@ -275,7 +279,8 @@ struct ext2_inode {
+ 			__u16	i_pad1;
+ 			__le16	l_i_uid_high;	/* these 2 fields    */
+ 			__le16	l_i_gid_high;	/* were reserved2[0] */
+-			__u32	l_i_reserved2;
++			__le16	l_i_tag;	/* Context Tag */
++			__u16	l_i_reserved2;
+ 		} linux2;
+ 		struct {
+ 			__u8	h_i_frag;	/* Fragment number */
+@@ -304,6 +309,7 @@ struct ext2_inode {
+ #define i_gid_low	i_gid
+ #define i_uid_high	osd2.linux2.l_i_uid_high
+ #define i_gid_high	osd2.linux2.l_i_gid_high
++#define i_raw_tag	osd2.linux2.l_i_tag
+ #define i_reserved2	osd2.linux2.l_i_reserved2
+ #endif
+ 
+@@ -348,6 +354,7 @@ struct ext2_inode {
+ #define EXT2_MOUNT_USRQUOTA		0x020000  /* user quota */
+ #define EXT2_MOUNT_GRPQUOTA		0x040000  /* group quota */
+ #define EXT2_MOUNT_RESERVATION		0x080000  /* Preallocation */
++#define EXT2_MOUNT_TAGGED		(1<<24)	  /* Enable Context Tags */
+ 
+ 
+ #define clear_opt(o, opt)		o &= ~EXT2_MOUNT_##opt
+diff -NurpP --minimal linux-3.2.34/include/linux/ext3_fs.h linux-3.2.34-vs2.3.2.15/include/linux/ext3_fs.h
+--- linux-3.2.34/include/linux/ext3_fs.h	2012-01-09 16:14:56.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/ext3_fs.h	2011-12-05 19:33:02.000000000 +0100
+@@ -173,10 +173,14 @@ struct ext3_group_desc
+ #define EXT3_NOTAIL_FL			0x00008000 /* file tail should not be merged */
+ #define EXT3_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
+ #define EXT3_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
++#define EXT3_IXUNLINK_FL		0x08000000 /* Immutable invert on unlink */
+ #define EXT3_RESERVED_FL		0x80000000 /* reserved for ext3 lib */
+ 
+-#define EXT3_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
+-#define EXT3_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
++#define EXT3_BARRIER_FL			0x04000000 /* Barrier for chroot() */
++#define EXT3_COW_FL			0x20000000 /* Copy on Write marker */
++
++#define EXT3_FL_USER_VISIBLE		0x0103DFFF /* User visible flags */
++#define EXT3_FL_USER_MODIFIABLE		0x010380FF /* User modifiable flags */
+ 
+ /* Flags that should be inherited by new inodes from their parent. */
+ #define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
+@@ -312,7 +316,8 @@ struct ext3_inode {
+ 			__u16	i_pad1;
+ 			__le16	l_i_uid_high;	/* these 2 fields    */
+ 			__le16	l_i_gid_high;	/* were reserved2[0] */
+-			__u32	l_i_reserved2;
++			__le16	l_i_tag;	/* Context Tag */
++			__u16	l_i_reserved2;
+ 		} linux2;
+ 		struct {
+ 			__u8	h_i_frag;	/* Fragment number */
+@@ -343,6 +348,7 @@ struct ext3_inode {
+ #define i_gid_low	i_gid
+ #define i_uid_high	osd2.linux2.l_i_uid_high
+ #define i_gid_high	osd2.linux2.l_i_gid_high
++#define i_raw_tag	osd2.linux2.l_i_tag
+ #define i_reserved2	osd2.linux2.l_i_reserved2
+ 
+ #elif defined(__GNU__)
+@@ -405,6 +411,7 @@ struct ext3_inode {
+ #define EXT3_MOUNT_GRPQUOTA		0x200000 /* "old" group quota */
+ #define EXT3_MOUNT_DATA_ERR_ABORT	0x400000 /* Abort on file data write
+ 						  * error in ordered mode */
++#define EXT3_MOUNT_TAGGED		(1<<24) /* Enable Context Tags */
+ 
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
+@@ -918,6 +925,7 @@ extern void ext3_get_inode_flags(struct 
+ extern void ext3_set_aops(struct inode *inode);
+ extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ 		       u64 start, u64 len);
++extern int ext3_sync_flags(struct inode *, int, int);
+ 
+ /* ioctl.c */
+ extern long ext3_ioctl(struct file *, unsigned int, unsigned long);
+diff -NurpP --minimal linux-3.2.34/include/linux/fs.h linux-3.2.34-vs2.3.2.15/include/linux/fs.h
+--- linux-3.2.34/include/linux/fs.h	2012-11-18 18:42:23.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/fs.h	2012-08-13 12:40:51.000000000 +0200
+@@ -210,6 +210,9 @@ struct inodes_stat_t {
+ #define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */
+ #define MS_I_VERSION	(1<<23) /* Update inode I_version field */
+ #define MS_STRICTATIME	(1<<24) /* Always perform atime updates */
++#define MS_TAGGED	(1<<25) /* use generic inode tagging */
++#define MS_TAGID	(1<<26) /* use specific tag for this mount */
++#define MS_NOTAGCHECK	(1<<27) /* don't check tags */
+ #define MS_NOSEC	(1<<28)
+ #define MS_BORN		(1<<29)
+ #define MS_ACTIVE	(1<<30)
+@@ -241,6 +244,14 @@ struct inodes_stat_t {
+ #define S_IMA		1024	/* Inode has an associated IMA struct */
+ #define S_AUTOMOUNT	2048	/* Automount/referral quasi-directory */
+ #define S_NOSEC		4096	/* no suid or xattr security attributes */
++#define S_IXUNLINK	8192	/* Immutable Invert on unlink */
++
++/* Linux-VServer related Inode flags */
++
++#define V_VALID		1
++#define V_XATTR		2
++#define V_BARRIER	4	/* Barrier for chroot() */
++#define V_COW		8	/* Copy on Write */
+ 
+ /*
+  * Note that nosuid etc flags are inode-specific: setting some file-system
+@@ -263,12 +274,15 @@ struct inodes_stat_t {
+ #define IS_DIRSYNC(inode)	(__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \
+ 					((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
+ #define IS_MANDLOCK(inode)	__IS_FLG(inode, MS_MANDLOCK)
+-#define IS_NOATIME(inode)   __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
+-#define IS_I_VERSION(inode)   __IS_FLG(inode, MS_I_VERSION)
++#define IS_NOATIME(inode)	__IS_FLG(inode, MS_RDONLY|MS_NOATIME)
++#define IS_I_VERSION(inode)	__IS_FLG(inode, MS_I_VERSION)
++#define IS_TAGGED(inode)	__IS_FLG(inode, MS_TAGGED)
+ 
+ #define IS_NOQUOTA(inode)	((inode)->i_flags & S_NOQUOTA)
+ #define IS_APPEND(inode)	((inode)->i_flags & S_APPEND)
+ #define IS_IMMUTABLE(inode)	((inode)->i_flags & S_IMMUTABLE)
++#define IS_IXUNLINK(inode)	((inode)->i_flags & S_IXUNLINK)
++#define IS_IXORUNLINK(inode)	((IS_IXUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode))
+ #define IS_POSIXACL(inode)	__IS_FLG(inode, MS_POSIXACL)
+ 
+ #define IS_DEADDIR(inode)	((inode)->i_flags & S_DEAD)
+@@ -279,6 +293,16 @@ struct inodes_stat_t {
+ #define IS_AUTOMOUNT(inode)	((inode)->i_flags & S_AUTOMOUNT)
+ #define IS_NOSEC(inode)		((inode)->i_flags & S_NOSEC)
+ 
++#define IS_BARRIER(inode)	(S_ISDIR((inode)->i_mode) && ((inode)->i_vflags & V_BARRIER))
++
++#ifdef CONFIG_VSERVER_COWBL
++#  define IS_COW(inode)		(IS_IXUNLINK(inode) && IS_IMMUTABLE(inode))
++#  define IS_COW_LINK(inode)	(S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1))
++#else
++#  define IS_COW(inode)		(0)
++#  define IS_COW_LINK(inode)	(0)
++#endif
++
+ /* the read-only stuff doesn't really belong here, but any other place is
+    probably as bad and I don't want to create yet another include file. */
+ 
+@@ -364,11 +388,14 @@ struct inodes_stat_t {
+ #define FS_EXTENT_FL			0x00080000 /* Extents */
+ #define FS_DIRECTIO_FL			0x00100000 /* Use direct i/o */
+ #define FS_NOCOW_FL			0x00800000 /* Do not cow file */
++#define FS_IXUNLINK_FL			0x08000000 /* Immutable invert on unlink */
+ #define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
+ 
+-#define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
+-#define FS_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
++#define FS_BARRIER_FL			0x04000000 /* Barrier for chroot() */
++#define FS_COW_FL			0x20000000 /* Copy on Write marker */
+ 
++#define FS_FL_USER_VISIBLE		0x0103DFFF /* User visible flags */
++#define FS_FL_USER_MODIFIABLE		0x010380FF /* User modifiable flags */
+ 
+ #define SYNC_FILE_RANGE_WAIT_BEFORE	1
+ #define SYNC_FILE_RANGE_WRITE		2
+@@ -449,6 +476,7 @@ typedef void (dio_iodone_t)(struct kiocb
+ #define ATTR_KILL_PRIV	(1 << 14)
+ #define ATTR_OPEN	(1 << 15) /* Truncating from open(O_TRUNC) */
+ #define ATTR_TIMES_SET	(1 << 16)
++#define ATTR_TAG	(1 << 17)
+ 
+ /*
+  * This is the Inode Attributes structure, used for notify_change().  It
+@@ -464,6 +492,7 @@ struct iattr {
+ 	umode_t		ia_mode;
+ 	uid_t		ia_uid;
+ 	gid_t		ia_gid;
++	tag_t		ia_tag;
+ 	loff_t		ia_size;
+ 	struct timespec	ia_atime;
+ 	struct timespec	ia_mtime;
+@@ -477,6 +506,9 @@ struct iattr {
+ 	struct file	*ia_file;
+ };
+ 
++#define ATTR_FLAG_BARRIER	512	/* Barrier for chroot() */
++#define ATTR_FLAG_IXUNLINK	1024	/* Immutable invert on unlink */
++
+ /*
+  * Includes for diskquotas.
+  */
+@@ -755,7 +787,9 @@ struct inode {
+ 	unsigned short		i_opflags;
+ 	uid_t			i_uid;
+ 	gid_t			i_gid;
+-	unsigned int		i_flags;
++	tag_t			i_tag;
++	unsigned short		i_flags;
++	unsigned short		i_vflags;
+ 
+ #ifdef CONFIG_FS_POSIX_ACL
+ 	struct posix_acl	*i_acl;
+@@ -784,6 +818,7 @@ struct inode {
+ 		unsigned int __i_nlink;
+ 	};
+ 	dev_t			i_rdev;
++	dev_t			i_mdev;
+ 	struct timespec		i_atime;
+ 	struct timespec		i_mtime;
+ 	struct timespec		i_ctime;
+@@ -921,12 +956,12 @@ static inline void i_size_write(struct i
+ 
+ static inline unsigned iminor(const struct inode *inode)
+ {
+-	return MINOR(inode->i_rdev);
++	return MINOR(inode->i_mdev);
+ }
+ 
+ static inline unsigned imajor(const struct inode *inode)
+ {
+-	return MAJOR(inode->i_rdev);
++	return MAJOR(inode->i_mdev);
+ }
+ 
+ extern struct block_device *I_BDEV(struct inode *inode);
+@@ -993,6 +1028,7 @@ struct file {
+ 	loff_t			f_pos;
+ 	struct fown_struct	f_owner;
+ 	const struct cred	*f_cred;
++	xid_t			f_xid;
+ 	struct file_ra_state	f_ra;
+ 
+ 	u64			f_version;
+@@ -1140,6 +1176,7 @@ struct file_lock {
+ 	struct file *fl_file;
+ 	loff_t fl_start;
+ 	loff_t fl_end;
++	xid_t fl_xid;
+ 
+ 	struct fasync_struct *	fl_fasync; /* for lease break notifications */
+ 	/* for lease breaks: */
+@@ -1645,6 +1682,7 @@ struct inode_operations {
+ 	ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
+ 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
+ 	int (*removexattr) (struct dentry *, const char *);
++	int (*sync_flags) (struct inode *, int, int);
+ 	void (*truncate_range)(struct inode *, loff_t, loff_t);
+ 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
+ 		      u64 len);
+@@ -1664,6 +1702,7 @@ extern ssize_t vfs_readv(struct file *, 
+ 		unsigned long, loff_t *);
+ extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
+ 		unsigned long, loff_t *);
++ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t);
+ 
+ struct super_operations {
+    	struct inode *(*alloc_inode)(struct super_block *sb);
+@@ -2552,6 +2591,7 @@ extern int dcache_dir_open(struct inode 
+ extern int dcache_dir_close(struct inode *, struct file *);
+ extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
+ extern int dcache_readdir(struct file *, void *, filldir_t);
++extern int dcache_readdir_filter(struct file *, void *, filldir_t, int (*)(struct dentry *));
+ extern int simple_setattr(struct dentry *, struct iattr *);
+ extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+ extern int simple_statfs(struct dentry *, struct kstatfs *);
+diff -NurpP --minimal linux-3.2.34/include/linux/gfs2_ondisk.h linux-3.2.34-vs2.3.2.15/include/linux/gfs2_ondisk.h
+--- linux-3.2.34/include/linux/gfs2_ondisk.h	2010-07-07 18:31:55.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/linux/gfs2_ondisk.h	2011-12-05 19:33:02.000000000 +0100
+@@ -211,6 +211,9 @@ enum {
+ 	gfs2fl_NoAtime		= 7,
+ 	gfs2fl_Sync		= 8,
+ 	gfs2fl_System		= 9,
++	gfs2fl_IXUnlink		= 16,
++	gfs2fl_Barrier		= 17,
++	gfs2fl_Cow		= 18,
+ 	gfs2fl_TruncInProg	= 29,
+ 	gfs2fl_InheritDirectio	= 30,
+ 	gfs2fl_InheritJdata	= 31,
+@@ -227,6 +230,9 @@ enum {
+ #define GFS2_DIF_NOATIME		0x00000080
+ #define GFS2_DIF_SYNC			0x00000100
+ #define GFS2_DIF_SYSTEM			0x00000200 /* New in gfs2 */
++#define GFS2_DIF_IXUNLINK		0x00010000
++#define GFS2_DIF_BARRIER		0x00020000
++#define GFS2_DIF_COW			0x00040000
+ #define GFS2_DIF_TRUNC_IN_PROG		0x20000000 /* New in gfs2 */
+ #define GFS2_DIF_INHERIT_DIRECTIO	0x40000000
+ #define GFS2_DIF_INHERIT_JDATA		0x80000000
+diff -NurpP --minimal linux-3.2.34/include/linux/if_tun.h linux-3.2.34-vs2.3.2.15/include/linux/if_tun.h
+--- linux-3.2.34/include/linux/if_tun.h	2010-08-02 16:52:54.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/linux/if_tun.h	2011-12-05 19:33:02.000000000 +0100
+@@ -53,6 +53,7 @@
+ #define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog)
+ #define TUNGETVNETHDRSZ _IOR('T', 215, int)
+ #define TUNSETVNETHDRSZ _IOW('T', 216, int)
++#define TUNSETNID     _IOW('T', 217, int)
+ 
+ /* TUNSETIFF ifr flags */
+ #define IFF_TUN		0x0001
+diff -NurpP --minimal linux-3.2.34/include/linux/init_task.h linux-3.2.34-vs2.3.2.15/include/linux/init_task.h
+--- linux-3.2.34/include/linux/init_task.h	2012-11-18 18:42:23.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/init_task.h	2012-08-13 12:42:19.000000000 +0200
+@@ -211,6 +211,10 @@ extern struct task_group root_task_group
+ 	INIT_TRACE_RECURSION						\
+ 	INIT_TASK_RCU_PREEMPT(tsk)					\
+ 	INIT_CPUSET_SEQ							\
++	.xid		= 0,						\
++	.vx_info	= NULL,						\
++	.nid		= 0,						\
++	.nx_info	= NULL,						\
+ }
+ 
+ 
+diff -NurpP --minimal linux-3.2.34/include/linux/ipc.h linux-3.2.34-vs2.3.2.15/include/linux/ipc.h
+--- linux-3.2.34/include/linux/ipc.h	2009-12-03 20:02:55.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/ipc.h	2011-12-05 19:33:02.000000000 +0100
+@@ -91,6 +91,7 @@ struct kern_ipc_perm
+ 	key_t		key;
+ 	uid_t		uid;
+ 	gid_t		gid;
++	xid_t		xid;
+ 	uid_t		cuid;
+ 	gid_t		cgid;
+ 	mode_t		mode; 
+diff -NurpP --minimal linux-3.2.34/include/linux/ipc_namespace.h linux-3.2.34-vs2.3.2.15/include/linux/ipc_namespace.h
+--- linux-3.2.34/include/linux/ipc_namespace.h	2011-10-24 18:45:32.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/linux/ipc_namespace.h	2011-12-05 19:33:02.000000000 +0100
+@@ -101,7 +101,8 @@ static inline int mq_init_ns(struct ipc_
+ 
+ #if defined(CONFIG_IPC_NS)
+ extern struct ipc_namespace *copy_ipcs(unsigned long flags,
+-				       struct task_struct *tsk);
++				       struct ipc_namespace *old_ns,
++				       struct user_namespace *user_ns);
+ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
+ {
+ 	if (ns)
+@@ -112,12 +113,13 @@ static inline struct ipc_namespace *get_
+ extern void put_ipc_ns(struct ipc_namespace *ns);
+ #else
+ static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
+-					      struct task_struct *tsk)
++					      struct ipc_namespace *old_ns,
++					      struct user_namespace *user_ns)
+ {
+ 	if (flags & CLONE_NEWIPC)
+ 		return ERR_PTR(-EINVAL);
+ 
+-	return tsk->nsproxy->ipc_ns;
++	return old_ns;
+ }
+ 
+ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
+diff -NurpP --minimal linux-3.2.34/include/linux/loop.h linux-3.2.34-vs2.3.2.15/include/linux/loop.h
+--- linux-3.2.34/include/linux/loop.h	2012-01-09 16:14:58.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/loop.h	2011-12-05 19:33:02.000000000 +0100
+@@ -45,6 +45,7 @@ struct loop_device {
+ 	struct loop_func_table *lo_encryption;
+ 	__u32           lo_init[2];
+ 	uid_t		lo_key_owner;	/* Who set the key */
++	xid_t		lo_xid;
+ 	int		(*ioctl)(struct loop_device *, int cmd, 
+ 				 unsigned long arg); 
+ 
+diff -NurpP --minimal linux-3.2.34/include/linux/magic.h linux-3.2.34-vs2.3.2.15/include/linux/magic.h
+--- linux-3.2.34/include/linux/magic.h	2012-01-09 16:14:58.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/magic.h	2011-12-05 19:33:02.000000000 +0100
+@@ -3,7 +3,7 @@
+ 
+ #define ADFS_SUPER_MAGIC	0xadf5
+ #define AFFS_SUPER_MAGIC	0xadff
+-#define AFS_SUPER_MAGIC                0x5346414F
++#define AFS_SUPER_MAGIC		0x5346414F
+ #define AUTOFS_SUPER_MAGIC	0x0187
+ #define CODA_SUPER_MAGIC	0x73757245
+ #define CRAMFS_MAGIC		0x28cd3d45	/* some random number */
+@@ -41,6 +41,7 @@
+ #define NFS_SUPER_MAGIC		0x6969
+ #define OPENPROM_SUPER_MAGIC	0x9fa1
+ #define PROC_SUPER_MAGIC	0x9fa0
++#define DEVPTS_SUPER_MAGIC	0x1cd1
+ #define QNX4_SUPER_MAGIC	0x002f		/* qnx4 fs detection */
+ 
+ #define REISERFS_SUPER_MAGIC	0x52654973	/* used by gcc */
+diff -NurpP --minimal linux-3.2.34/include/linux/major.h linux-3.2.34-vs2.3.2.15/include/linux/major.h
+--- linux-3.2.34/include/linux/major.h	2009-09-10 15:26:25.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/linux/major.h	2011-12-05 19:33:02.000000000 +0100
+@@ -15,6 +15,7 @@
+ #define HD_MAJOR		IDE0_MAJOR
+ #define PTY_SLAVE_MAJOR		3
+ #define TTY_MAJOR		4
++#define VROOT_MAJOR		4
+ #define TTYAUX_MAJOR		5
+ #define LP_MAJOR		6
+ #define VCS_MAJOR		7
+diff -NurpP --minimal linux-3.2.34/include/linux/memcontrol.h linux-3.2.34-vs2.3.2.15/include/linux/memcontrol.h
+--- linux-3.2.34/include/linux/memcontrol.h	2012-11-18 18:42:23.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/memcontrol.h	2012-01-26 08:52:10.000000000 +0100
+@@ -85,6 +85,13 @@ extern struct mem_cgroup *try_get_mem_cg
+ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
+ extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
+ 
++extern u64 mem_cgroup_res_read_u64(struct mem_cgroup *mem, int member);
++extern u64 mem_cgroup_memsw_read_u64(struct mem_cgroup *mem, int member);
++
++extern s64 mem_cgroup_stat_read_cache(struct mem_cgroup *mem);
++extern s64 mem_cgroup_stat_read_anon(struct mem_cgroup *mem);
++extern s64 mem_cgroup_stat_read_mapped(struct mem_cgroup *mem);
++
+ static inline
+ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
+ {
+diff -NurpP --minimal linux-3.2.34/include/linux/mm_types.h linux-3.2.34-vs2.3.2.15/include/linux/mm_types.h
+--- linux-3.2.34/include/linux/mm_types.h	2012-01-09 16:14:58.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/mm_types.h	2011-12-05 19:33:02.000000000 +0100
+@@ -344,6 +344,7 @@ struct mm_struct {
+ 
+ 	/* Architecture-specific MM context */
+ 	mm_context_t context;
++	struct vx_info *mm_vx_info;
+ 
+ 	/* Swap token stuff */
+ 	/*
+diff -NurpP --minimal linux-3.2.34/include/linux/mmzone.h linux-3.2.34-vs2.3.2.15/include/linux/mmzone.h
+--- linux-3.2.34/include/linux/mmzone.h	2012-11-18 18:42:23.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/mmzone.h	2012-08-13 12:40:51.000000000 +0200
+@@ -675,6 +675,13 @@ typedef struct pglist_data {
+ 	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;\
+ })
+ 
++#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
++
++#define node_end_pfn(nid) ({\
++	pg_data_t *__pgdat = NODE_DATA(nid);\
++	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;\
++})
++
+ #include <linux/memory_hotplug.h>
+ 
+ extern struct mutex zonelists_mutex;
+diff -NurpP --minimal linux-3.2.34/include/linux/mount.h linux-3.2.34-vs2.3.2.15/include/linux/mount.h
+--- linux-3.2.34/include/linux/mount.h	2011-10-24 18:45:32.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/linux/mount.h	2011-12-05 19:33:02.000000000 +0100
+@@ -52,6 +52,9 @@ struct mnt_pcp {
+ 	int mnt_writers;
+ };
+ 
++#define MNT_TAGID	0x10000
++#define MNT_NOTAG	0x20000
++
+ struct vfsmount {
+ 	struct list_head mnt_hash;
+ 	struct vfsmount *mnt_parent;	/* fs we are mounted on */
+@@ -86,6 +89,7 @@ struct vfsmount {
+ 	int mnt_expiry_mark;		/* true if marked for expiry */
+ 	int mnt_pinned;
+ 	int mnt_ghosts;
++	tag_t mnt_tag;			/* tagging used for vfsmount */
+ };
+ 
+ struct file; /* forward dec */
+diff -NurpP --minimal linux-3.2.34/include/linux/net.h linux-3.2.34-vs2.3.2.15/include/linux/net.h
+--- linux-3.2.34/include/linux/net.h	2011-07-22 11:18:11.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/linux/net.h	2011-12-05 19:33:02.000000000 +0100
+@@ -72,6 +72,7 @@ struct net;
+ #define SOCK_NOSPACE		2
+ #define SOCK_PASSCRED		3
+ #define SOCK_PASSSEC		4
++#define SOCK_USER_SOCKET	5
+ 
+ #ifndef ARCH_HAS_SOCKET_TYPES
+ /**
+diff -NurpP --minimal linux-3.2.34/include/linux/netdevice.h linux-3.2.34-vs2.3.2.15/include/linux/netdevice.h
+--- linux-3.2.34/include/linux/netdevice.h	2012-11-18 18:42:23.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/netdevice.h	2012-10-22 12:59:52.000000000 +0200
+@@ -1641,6 +1641,7 @@ extern void		netdev_resync_ops(struct ne
+ 
+ extern struct net_device	*dev_get_by_index(struct net *net, int ifindex);
+ extern struct net_device	*__dev_get_by_index(struct net *net, int ifindex);
++extern struct net_device	*dev_get_by_index_real_rcu(struct net *net, int ifindex);
+ extern struct net_device	*dev_get_by_index_rcu(struct net *net, int ifindex);
+ extern int		dev_restart(struct net_device *dev);
+ #ifdef CONFIG_NETPOLL_TRAP
+diff -NurpP --minimal linux-3.2.34/include/linux/nfs_mount.h linux-3.2.34-vs2.3.2.15/include/linux/nfs_mount.h
+--- linux-3.2.34/include/linux/nfs_mount.h	2011-01-05 21:50:31.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/nfs_mount.h	2011-12-05 19:33:02.000000000 +0100
+@@ -63,7 +63,8 @@ struct nfs_mount_data {
+ #define NFS_MOUNT_SECFLAVOUR	0x2000	/* 5 */
+ #define NFS_MOUNT_NORDIRPLUS	0x4000	/* 5 */
+ #define NFS_MOUNT_UNSHARED	0x8000	/* 5 */
+-#define NFS_MOUNT_FLAGMASK	0xFFFF
++#define NFS_MOUNT_TAGGED	0x10000	/* context tagging */
++#define NFS_MOUNT_FLAGMASK	0x1FFFF
+ 
+ /* The following are for internal use only */
+ #define NFS_MOUNT_LOOKUP_CACHE_NONEG	0x10000
+diff -NurpP --minimal linux-3.2.34/include/linux/nsproxy.h linux-3.2.34-vs2.3.2.15/include/linux/nsproxy.h
+--- linux-3.2.34/include/linux/nsproxy.h	2011-10-24 18:45:32.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/linux/nsproxy.h	2011-12-05 19:33:02.000000000 +0100
+@@ -3,6 +3,7 @@
+ 
+ #include <linux/spinlock.h>
+ #include <linux/sched.h>
++#include <linux/vserver/debug.h>
+ 
+ struct mnt_namespace;
+ struct uts_namespace;
+@@ -63,6 +64,7 @@ static inline struct nsproxy *task_nspro
+ }
+ 
+ int copy_namespaces(unsigned long flags, struct task_struct *tsk);
++struct nsproxy *copy_nsproxy(struct nsproxy *orig);
+ void exit_task_namespaces(struct task_struct *tsk);
+ void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
+ void free_nsproxy(struct nsproxy *ns);
+@@ -70,16 +72,26 @@ int unshare_nsproxy_namespaces(unsigned 
+ 	struct fs_struct *);
+ int __init nsproxy_cache_init(void);
+ 
+-static inline void put_nsproxy(struct nsproxy *ns)
++#define	get_nsproxy(n)	__get_nsproxy(n, __FILE__, __LINE__)
++
++static inline void __get_nsproxy(struct nsproxy *ns,
++	const char *_file, int _line)
+ {
+-	if (atomic_dec_and_test(&ns->count)) {
+-		free_nsproxy(ns);
+-	}
++	vxlprintk(VXD_CBIT(space, 0), "get_nsproxy(%p[%u])",
++		ns, atomic_read(&ns->count), _file, _line);
++	atomic_inc(&ns->count);
+ }
+ 
+-static inline void get_nsproxy(struct nsproxy *ns)
++#define	put_nsproxy(n)	__put_nsproxy(n, __FILE__, __LINE__)
++
++static inline void __put_nsproxy(struct nsproxy *ns,
++	const char *_file, int _line)
+ {
+-	atomic_inc(&ns->count);
++	vxlprintk(VXD_CBIT(space, 0), "put_nsproxy(%p[%u])",
++		ns, atomic_read(&ns->count), _file, _line);
++	if (atomic_dec_and_test(&ns->count)) {
++		free_nsproxy(ns);
++	}
+ }
+ 
+ #endif
+diff -NurpP --minimal linux-3.2.34/include/linux/pid.h linux-3.2.34-vs2.3.2.15/include/linux/pid.h
+--- linux-3.2.34/include/linux/pid.h	2011-07-22 11:18:11.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/linux/pid.h	2011-12-05 19:33:02.000000000 +0100
+@@ -8,7 +8,8 @@ enum pid_type
+ 	PIDTYPE_PID,
+ 	PIDTYPE_PGID,
+ 	PIDTYPE_SID,
+-	PIDTYPE_MAX
++	PIDTYPE_MAX,
++	PIDTYPE_REALPID
+ };
+ 
+ /*
+@@ -171,6 +172,7 @@ static inline pid_t pid_nr(struct pid *p
+ }
+ 
+ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
++pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns);
+ pid_t pid_vnr(struct pid *pid);
+ 
+ #define do_each_pid_task(pid, type, task)				\
+diff -NurpP --minimal linux-3.2.34/include/linux/proc_fs.h linux-3.2.34-vs2.3.2.15/include/linux/proc_fs.h
+--- linux-3.2.34/include/linux/proc_fs.h	2011-10-24 18:45:32.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/linux/proc_fs.h	2011-12-05 19:33:02.000000000 +0100
+@@ -54,6 +54,7 @@ struct proc_dir_entry {
+ 	nlink_t nlink;
+ 	uid_t uid;
+ 	gid_t gid;
++	int vx_flags;
+ 	loff_t size;
+ 	const struct inode_operations *proc_iops;
+ 	/*
+@@ -252,12 +253,18 @@ extern const struct proc_ns_operations n
+ extern const struct proc_ns_operations utsns_operations;
+ extern const struct proc_ns_operations ipcns_operations;
+ 
++struct vx_info;
++struct nx_info;
++
+ union proc_op {
+ 	int (*proc_get_link)(struct inode *, struct path *);
+ 	int (*proc_read)(struct task_struct *task, char *page);
+ 	int (*proc_show)(struct seq_file *m,
+ 		struct pid_namespace *ns, struct pid *pid,
+ 		struct task_struct *task);
++	int (*proc_vs_read)(char *page);
++	int (*proc_vxi_read)(struct vx_info *vxi, char *page);
++	int (*proc_nxi_read)(struct nx_info *nxi, char *page);
+ };
+ 
+ struct ctl_table_header;
+@@ -265,6 +272,7 @@ struct ctl_table;
+ 
+ struct proc_inode {
+ 	struct pid *pid;
++	int vx_flags;
+ 	int fd;
+ 	union proc_op op;
+ 	struct proc_dir_entry *pde;
+diff -NurpP --minimal linux-3.2.34/include/linux/quotaops.h linux-3.2.34-vs2.3.2.15/include/linux/quotaops.h
+--- linux-3.2.34/include/linux/quotaops.h	2012-01-09 16:14:58.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/quotaops.h	2011-12-05 19:33:02.000000000 +0100
+@@ -8,6 +8,7 @@
+ #define _LINUX_QUOTAOPS_
+ 
+ #include <linux/fs.h>
++#include <linux/vs_dlimit.h>
+ 
+ #define DQUOT_SPACE_WARN	0x1
+ #define DQUOT_SPACE_RESERVE	0x2
+@@ -204,11 +205,12 @@ static inline void dquot_drop(struct ino
+ 
+ static inline int dquot_alloc_inode(const struct inode *inode)
+ {
+-	return 0;
++	return dl_alloc_inode(inode);
+ }
+ 
+ static inline void dquot_free_inode(const struct inode *inode)
+ {
++	dl_free_inode(inode);
+ }
+ 
+ static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
+@@ -219,6 +221,10 @@ static inline int dquot_transfer(struct 
+ static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
+ 		int flags)
+ {
++	int ret = 0;
++
++	if ((ret = dl_alloc_space(inode, number)))
++		return ret;
+ 	if (!(flags & DQUOT_SPACE_RESERVE))
+ 		inode_add_bytes(inode, number);
+ 	return 0;
+@@ -229,6 +235,7 @@ static inline void __dquot_free_space(st
+ {
+ 	if (!(flags & DQUOT_SPACE_RESERVE))
+ 		inode_sub_bytes(inode, number);
++	dl_free_space(inode, number);
+ }
+ 
+ static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
+diff -NurpP --minimal linux-3.2.34/include/linux/reboot.h linux-3.2.34-vs2.3.2.15/include/linux/reboot.h
+--- linux-3.2.34/include/linux/reboot.h	2011-10-24 18:45:32.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/linux/reboot.h	2011-12-05 19:33:02.000000000 +0100
+@@ -33,6 +33,7 @@
+ #define	LINUX_REBOOT_CMD_RESTART2	0xA1B2C3D4
+ #define	LINUX_REBOOT_CMD_SW_SUSPEND	0xD000FCE2
+ #define	LINUX_REBOOT_CMD_KEXEC		0x45584543
++#define	LINUX_REBOOT_CMD_OOM		0xDEADBEEF
+ 
+ 
+ #ifdef __KERNEL__
+diff -NurpP --minimal linux-3.2.34/include/linux/reiserfs_fs.h linux-3.2.34-vs2.3.2.15/include/linux/reiserfs_fs.h
+--- linux-3.2.34/include/linux/reiserfs_fs.h	2011-10-24 18:45:32.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/linux/reiserfs_fs.h	2011-12-05 19:33:02.000000000 +0100
+@@ -976,6 +976,11 @@ struct stat_data_v1 {
+ #define REISERFS_COMPR_FL     FS_COMPR_FL
+ #define REISERFS_NOTAIL_FL    FS_NOTAIL_FL
+ 
++/* unfortunately reiserfs sdattr is only 16 bit */
++#define REISERFS_IXUNLINK_FL  (FS_IXUNLINK_FL >> 16)
++#define REISERFS_BARRIER_FL   (FS_BARRIER_FL >> 16)
++#define REISERFS_COW_FL       (FS_COW_FL >> 16)
++
+ /* persistent flags that file inherits from the parent directory */
+ #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL |	\
+ 				REISERFS_SYNC_FL |	\
+@@ -985,6 +990,9 @@ struct stat_data_v1 {
+ 				REISERFS_COMPR_FL |	\
+ 				REISERFS_NOTAIL_FL )
+ 
++#define REISERFS_FL_USER_VISIBLE	0x80FF
++#define REISERFS_FL_USER_MODIFIABLE	0x80FF
++
+ /* Stat Data on disk (reiserfs version of UFS disk inode minus the
+    address blocks) */
+ struct stat_data {
+@@ -2073,6 +2081,7 @@ static inline void reiserfs_update_sd(st
+ void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
+ void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
+ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
++int reiserfs_sync_flags(struct inode *inode, int, int);
+ 
+ int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
+ 
+diff -NurpP --minimal linux-3.2.34/include/linux/reiserfs_fs_sb.h linux-3.2.34-vs2.3.2.15/include/linux/reiserfs_fs_sb.h
+--- linux-3.2.34/include/linux/reiserfs_fs_sb.h	2010-02-25 11:52:07.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/reiserfs_fs_sb.h	2011-12-05 19:33:02.000000000 +0100
+@@ -476,6 +476,7 @@ enum reiserfs_mount_options {
+ 	REISERFS_EXPOSE_PRIVROOT,
+ 	REISERFS_BARRIER_NONE,
+ 	REISERFS_BARRIER_FLUSH,
++	REISERFS_TAGGED,
+ 
+ 	/* Actions on error */
+ 	REISERFS_ERROR_PANIC,
+diff -NurpP --minimal linux-3.2.34/include/linux/sched.h linux-3.2.34-vs2.3.2.15/include/linux/sched.h
+--- linux-3.2.34/include/linux/sched.h	2012-11-18 18:42:23.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/sched.h	2012-08-13 12:40:51.000000000 +0200
+@@ -1407,6 +1407,14 @@ struct task_struct {
+ #endif
+ 	seccomp_t seccomp;
+ 
++/* vserver context data */
++	struct vx_info *vx_info;
++	struct nx_info *nx_info;
++
++	xid_t xid;
++	nid_t nid;
++	tag_t tag;
++
+ /* Thread group tracking */
+    	u32 parent_exec_id;
+    	u32 self_exec_id;
+@@ -1655,6 +1663,11 @@ struct pid_namespace;
+ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
+ 			struct pid_namespace *ns);
+ 
++#include <linux/vserver/base.h>
++#include <linux/vserver/context.h>
++#include <linux/vserver/debug.h>
++#include <linux/vserver/pid.h>
++
+ static inline pid_t task_pid_nr(struct task_struct *tsk)
+ {
+ 	return tsk->pid;
+@@ -1668,7 +1681,8 @@ static inline pid_t task_pid_nr_ns(struc
+ 
+ static inline pid_t task_pid_vnr(struct task_struct *tsk)
+ {
+-	return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
++	// return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
++	return vx_map_pid(__task_pid_nr_ns(tsk, PIDTYPE_PID, NULL));
+ }
+ 
+ 
+@@ -1681,7 +1695,7 @@ pid_t task_tgid_nr_ns(struct task_struct
+ 
+ static inline pid_t task_tgid_vnr(struct task_struct *tsk)
+ {
+-	return pid_vnr(task_tgid(tsk));
++	return vx_map_tgid(pid_vnr(task_tgid(tsk)));
+ }
+ 
+ 
+diff -NurpP --minimal linux-3.2.34/include/linux/shmem_fs.h linux-3.2.34-vs2.3.2.15/include/linux/shmem_fs.h
+--- linux-3.2.34/include/linux/shmem_fs.h	2012-11-18 18:42:23.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/shmem_fs.h	2012-01-26 08:52:10.000000000 +0100
+@@ -8,6 +8,9 @@
+ 
+ /* inode in-kernel data */
+ 
++#define TMPFS_SUPER_MAGIC	0x01021994
++
++
+ struct shmem_inode_info {
+ 	spinlock_t		lock;
+ 	unsigned long		flags;
+diff -NurpP --minimal linux-3.2.34/include/linux/stat.h linux-3.2.34-vs2.3.2.15/include/linux/stat.h
+--- linux-3.2.34/include/linux/stat.h	2008-12-25 00:26:37.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/stat.h	2011-12-05 19:33:02.000000000 +0100
+@@ -66,6 +66,7 @@ struct kstat {
+ 	unsigned int	nlink;
+ 	uid_t		uid;
+ 	gid_t		gid;
++	tag_t		tag;
+ 	dev_t		rdev;
+ 	loff_t		size;
+ 	struct timespec  atime;
+diff -NurpP --minimal linux-3.2.34/include/linux/sunrpc/auth.h linux-3.2.34-vs2.3.2.15/include/linux/sunrpc/auth.h
+--- linux-3.2.34/include/linux/sunrpc/auth.h	2011-10-24 18:45:32.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/linux/sunrpc/auth.h	2011-12-05 19:33:02.000000000 +0100
+@@ -25,6 +25,7 @@
+ struct auth_cred {
+ 	uid_t	uid;
+ 	gid_t	gid;
++	tag_t	tag;
+ 	struct group_info *group_info;
+ 	unsigned char machine_cred : 1;
+ };
+diff -NurpP --minimal linux-3.2.34/include/linux/sunrpc/clnt.h linux-3.2.34-vs2.3.2.15/include/linux/sunrpc/clnt.h
+--- linux-3.2.34/include/linux/sunrpc/clnt.h	2012-01-09 16:14:58.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/sunrpc/clnt.h	2011-12-05 19:33:02.000000000 +0100
+@@ -50,7 +50,8 @@ struct rpc_clnt {
+ 	unsigned int		cl_softrtry : 1,/* soft timeouts */
+ 				cl_discrtry : 1,/* disconnect before retry */
+ 				cl_autobind : 1,/* use getport() */
+-				cl_chatty   : 1;/* be verbose */
++				cl_chatty   : 1,/* be verbose */
++				cl_tag      : 1;/* context tagging */
+ 
+ 	struct rpc_rtt *	cl_rtt;		/* RTO estimator data */
+ 	const struct rpc_timeout *cl_timeout;	/* Timeout strategy */
+diff -NurpP --minimal linux-3.2.34/include/linux/syscalls.h linux-3.2.34-vs2.3.2.15/include/linux/syscalls.h
+--- linux-3.2.34/include/linux/syscalls.h	2012-01-09 16:14:58.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/syscalls.h	2011-12-05 19:33:02.000000000 +0100
+@@ -483,6 +483,8 @@ asmlinkage long sys_symlink(const char _
+ asmlinkage long sys_unlink(const char __user *pathname);
+ asmlinkage long sys_rename(const char __user *oldname,
+ 				const char __user *newname);
++asmlinkage long sys_copyfile(const char __user *from, const char __user *to,
++				umode_t mode);
+ asmlinkage long sys_chmod(const char __user *filename, mode_t mode);
+ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode);
+ 
+diff -NurpP --minimal linux-3.2.34/include/linux/sysctl.h linux-3.2.34-vs2.3.2.15/include/linux/sysctl.h
+--- linux-3.2.34/include/linux/sysctl.h	2012-01-09 16:14:58.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/sysctl.h	2011-12-05 19:33:02.000000000 +0100
+@@ -60,6 +60,7 @@ enum
+ 	CTL_ABI=9,		/* Binary emulation */
+ 	CTL_CPU=10,		/* CPU stuff (speed scaling, etc) */
+ 	CTL_ARLAN=254,		/* arlan wireless driver */
++	CTL_VSERVER=4242,	/* Linux-VServer debug */
+ 	CTL_S390DBF=5677,	/* s390 debug */
+ 	CTL_SUNRPC=7249,	/* sunrpc debug */
+ 	CTL_PM=9899,		/* frv power management */
+@@ -94,6 +95,7 @@ enum
+ 
+ 	KERN_PANIC=15,		/* int: panic timeout */
+ 	KERN_REALROOTDEV=16,	/* real root device to mount after initrd */
++	KERN_VSHELPER=17,	/* string: path to vshelper policy agent */
+ 
+ 	KERN_SPARC_REBOOT=21,	/* reboot command on Sparc */
+ 	KERN_CTLALTDEL=22,	/* int: allow ctl-alt-del to reboot */
+diff -NurpP --minimal linux-3.2.34/include/linux/sysfs.h linux-3.2.34-vs2.3.2.15/include/linux/sysfs.h
+--- linux-3.2.34/include/linux/sysfs.h	2012-01-09 16:14:58.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/sysfs.h	2011-12-05 19:33:02.000000000 +0100
+@@ -19,6 +19,8 @@
+ #include <linux/kobject_ns.h>
+ #include <linux/atomic.h>
+ 
++#define SYSFS_SUPER_MAGIC	0x62656572
++
+ struct kobject;
+ struct module;
+ enum kobj_ns_type;
+diff -NurpP --minimal linux-3.2.34/include/linux/time.h linux-3.2.34-vs2.3.2.15/include/linux/time.h
+--- linux-3.2.34/include/linux/time.h	2012-11-18 18:42:23.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/time.h	2012-10-22 12:59:52.000000000 +0200
+@@ -281,6 +281,9 @@ static __always_inline void timespec_add
+ 	a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns);
+ 	a->tv_nsec = ns;
+ }
++
++#include <linux/vs_time.h>
++
+ #endif /* __KERNEL__ */
+ 
+ #define NFDBITS			__NFDBITS
+diff -NurpP --minimal linux-3.2.34/include/linux/types.h linux-3.2.34-vs2.3.2.15/include/linux/types.h
+--- linux-3.2.34/include/linux/types.h	2012-01-09 16:14:59.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/types.h	2011-12-05 19:33:02.000000000 +0100
+@@ -40,6 +40,9 @@ typedef __kernel_uid32_t	uid_t;
+ typedef __kernel_gid32_t	gid_t;
+ typedef __kernel_uid16_t        uid16_t;
+ typedef __kernel_gid16_t        gid16_t;
++typedef unsigned int		xid_t;
++typedef unsigned int		nid_t;
++typedef unsigned int		tag_t;
+ 
+ typedef unsigned long		uintptr_t;
+ 
+diff -NurpP --minimal linux-3.2.34/include/linux/utsname.h linux-3.2.34-vs2.3.2.15/include/linux/utsname.h
+--- linux-3.2.34/include/linux/utsname.h	2012-01-09 16:14:59.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/utsname.h	2011-12-05 19:33:02.000000000 +0100
+@@ -62,7 +62,8 @@ static inline void get_uts_ns(struct uts
+ }
+ 
+ extern struct uts_namespace *copy_utsname(unsigned long flags,
+-					  struct task_struct *tsk);
++					  struct uts_namespace *old_ns,
++					  struct user_namespace *user_ns);
+ extern void free_uts_ns(struct kref *kref);
+ 
+ static inline void put_uts_ns(struct uts_namespace *ns)
+@@ -79,12 +80,13 @@ static inline void put_uts_ns(struct uts
+ }
+ 
+ static inline struct uts_namespace *copy_utsname(unsigned long flags,
+-						 struct task_struct *tsk)
++						 struct uts_namespace *old_ns,
++						 struct user_namespace *user_ns)
+ {
+ 	if (flags & CLONE_NEWUTS)
+ 		return ERR_PTR(-EINVAL);
+ 
+-	return tsk->nsproxy->uts_ns;
++	return old_ns;
+ }
+ #endif
+ 
+diff -NurpP --minimal linux-3.2.34/include/linux/vroot.h linux-3.2.34-vs2.3.2.15/include/linux/vroot.h
+--- linux-3.2.34/include/linux/vroot.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vroot.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,51 @@
++
++/*
++ * include/linux/vroot.h
++ *
++ * written by Herbert P�tzl, 9/11/2002
++ * ported to 2.6 by Herbert P�tzl, 30/12/2004
++ *
++ * Copyright (C) 2002-2007 by Herbert P�tzl.
++ * Redistribution of this file is permitted under the
++ * GNU General Public License.
++ */
++
++#ifndef _LINUX_VROOT_H
++#define _LINUX_VROOT_H
++
++
++#ifdef __KERNEL__
++
++/* Possible states of device */
++enum {
++	Vr_unbound,
++	Vr_bound,
++};
++
++struct vroot_device {
++	int		vr_number;
++	int		vr_refcnt;
++
++	struct semaphore	vr_ctl_mutex;
++	struct block_device    *vr_device;
++	int			vr_state;
++};
++
++
++typedef struct block_device *(vroot_grb_func)(struct block_device *);
++
++extern int register_vroot_grb(vroot_grb_func *);
++extern int unregister_vroot_grb(vroot_grb_func *);
++
++#endif /* __KERNEL__ */
++
++#define MAX_VROOT_DEFAULT	8
++
++/*
++ * IOCTL commands --- we will commandeer 0x56 ('V')
++ */
++
++#define VROOT_SET_DEV		0x5600
++#define VROOT_CLR_DEV		0x5601
++
++#endif /* _LINUX_VROOT_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_base.h linux-3.2.34-vs2.3.2.15/include/linux/vs_base.h
+--- linux-3.2.34/include/linux/vs_base.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_base.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,10 @@
++#ifndef _VS_BASE_H
++#define _VS_BASE_H
++
++#include "vserver/base.h"
++#include "vserver/check.h"
++#include "vserver/debug.h"
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_context.h linux-3.2.34-vs2.3.2.15/include/linux/vs_context.h
+--- linux-3.2.34/include/linux/vs_context.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_context.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,242 @@
++#ifndef _VS_CONTEXT_H
++#define _VS_CONTEXT_H
++
++#include "vserver/base.h"
++#include "vserver/check.h"
++#include "vserver/context.h"
++#include "vserver/history.h"
++#include "vserver/debug.h"
++
++#include <linux/sched.h>
++
++
++#define get_vx_info(i) __get_vx_info(i, __FILE__, __LINE__, __HERE__)
++
++static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
++	const char *_file, int _line, void *_here)
++{
++	if (!vxi)
++		return NULL;
++
++	vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
++		vxi, vxi ? vxi->vx_id : 0,
++		vxi ? atomic_read(&vxi->vx_usecnt) : 0,
++		_file, _line);
++	__vxh_get_vx_info(vxi, _here);
++
++	atomic_inc(&vxi->vx_usecnt);
++	return vxi;
++}
++
++
++extern void free_vx_info(struct vx_info *);
++
++#define put_vx_info(i) __put_vx_info(i, __FILE__, __LINE__, __HERE__)
++
++static inline void __put_vx_info(struct vx_info *vxi,
++	const char *_file, int _line, void *_here)
++{
++	if (!vxi)
++		return;
++
++	vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
++		vxi, vxi ? vxi->vx_id : 0,
++		vxi ? atomic_read(&vxi->vx_usecnt) : 0,
++		_file, _line);
++	__vxh_put_vx_info(vxi, _here);
++
++	if (atomic_dec_and_test(&vxi->vx_usecnt))
++		free_vx_info(vxi);
++}
++
++
++#define init_vx_info(p, i) \
++	__init_vx_info(p, i, __FILE__, __LINE__, __HERE__)
++
++static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi,
++	const char *_file, int _line, void *_here)
++{
++	if (vxi) {
++		vxlprintk(VXD_CBIT(xid, 3),
++			"init_vx_info(%p[#%d.%d])",
++			vxi, vxi ? vxi->vx_id : 0,
++			vxi ? atomic_read(&vxi->vx_usecnt) : 0,
++			_file, _line);
++		__vxh_init_vx_info(vxi, vxp, _here);
++
++		atomic_inc(&vxi->vx_usecnt);
++	}
++	*vxp = vxi;
++}
++
++
++#define set_vx_info(p, i) \
++	__set_vx_info(p, i, __FILE__, __LINE__, __HERE__)
++
++static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
++	const char *_file, int _line, void *_here)
++{
++	struct vx_info *vxo;
++
++	if (!vxi)
++		return;
++
++	vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])",
++		vxi, vxi ? vxi->vx_id : 0,
++		vxi ? atomic_read(&vxi->vx_usecnt) : 0,
++		_file, _line);
++	__vxh_set_vx_info(vxi, vxp, _here);
++
++	atomic_inc(&vxi->vx_usecnt);
++	vxo = xchg(vxp, vxi);
++	BUG_ON(vxo);
++}
++
++
++#define clr_vx_info(p) __clr_vx_info(p, __FILE__, __LINE__, __HERE__)
++
++static inline void __clr_vx_info(struct vx_info **vxp,
++	const char *_file, int _line, void *_here)
++{
++	struct vx_info *vxo;
++
++	vxo = xchg(vxp, NULL);
++	if (!vxo)
++		return;
++
++	vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])",
++		vxo, vxo ? vxo->vx_id : 0,
++		vxo ? atomic_read(&vxo->vx_usecnt) : 0,
++		_file, _line);
++	__vxh_clr_vx_info(vxo, vxp, _here);
++
++	if (atomic_dec_and_test(&vxo->vx_usecnt))
++		free_vx_info(vxo);
++}
++
++
++#define claim_vx_info(v, p) \
++	__claim_vx_info(v, p, __FILE__, __LINE__, __HERE__)
++
++static inline void __claim_vx_info(struct vx_info *vxi,
++	struct task_struct *task,
++	const char *_file, int _line, void *_here)
++{
++	vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p",
++		vxi, vxi ? vxi->vx_id : 0,
++		vxi ? atomic_read(&vxi->vx_usecnt) : 0,
++		vxi ? atomic_read(&vxi->vx_tasks) : 0,
++		task, _file, _line);
++	__vxh_claim_vx_info(vxi, task, _here);
++
++	atomic_inc(&vxi->vx_tasks);
++}
++
++
++extern void unhash_vx_info(struct vx_info *);
++
++#define release_vx_info(v, p) \
++	__release_vx_info(v, p, __FILE__, __LINE__, __HERE__)
++
++static inline void __release_vx_info(struct vx_info *vxi,
++	struct task_struct *task,
++	const char *_file, int _line, void *_here)
++{
++	vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p",
++		vxi, vxi ? vxi->vx_id : 0,
++		vxi ? atomic_read(&vxi->vx_usecnt) : 0,
++		vxi ? atomic_read(&vxi->vx_tasks) : 0,
++		task, _file, _line);
++	__vxh_release_vx_info(vxi, task, _here);
++
++	might_sleep();
++
++	if (atomic_dec_and_test(&vxi->vx_tasks))
++		unhash_vx_info(vxi);
++}
++
++
++#define task_get_vx_info(p) \
++	__task_get_vx_info(p, __FILE__, __LINE__, __HERE__)
++
++static inline struct vx_info *__task_get_vx_info(struct task_struct *p,
++	const char *_file, int _line, void *_here)
++{
++	struct vx_info *vxi;
++
++	task_lock(p);
++	vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)",
++		p, _file, _line);
++	vxi = __get_vx_info(p->vx_info, _file, _line, _here);
++	task_unlock(p);
++	return vxi;
++}
++
++
++static inline void __wakeup_vx_info(struct vx_info *vxi)
++{
++	if (waitqueue_active(&vxi->vx_wait))
++		wake_up_interruptible(&vxi->vx_wait);
++}
++
++
++#define enter_vx_info(v, s) __enter_vx_info(v, s, __FILE__, __LINE__)
++
++static inline void __enter_vx_info(struct vx_info *vxi,
++	struct vx_info_save *vxis, const char *_file, int _line)
++{
++	vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]",
++		vxi, vxi ? vxi->vx_id : 0, vxis, current,
++		current->xid, current->vx_info, _file, _line);
++	vxis->vxi = xchg(&current->vx_info, vxi);
++	vxis->xid = current->xid;
++	current->xid = vxi ? vxi->vx_id : 0;
++}
++
++#define leave_vx_info(s) __leave_vx_info(s, __FILE__, __LINE__)
++
++static inline void __leave_vx_info(struct vx_info_save *vxis,
++	const char *_file, int _line)
++{
++	vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]",
++		vxis, vxis->xid, vxis->vxi, current,
++		current->xid, current->vx_info, _file, _line);
++	(void)xchg(&current->vx_info, vxis->vxi);
++	current->xid = vxis->xid;
++}
++
++
++static inline void __enter_vx_admin(struct vx_info_save *vxis)
++{
++	vxis->vxi = xchg(&current->vx_info, NULL);
++	vxis->xid = xchg(&current->xid, (xid_t)0);
++}
++
++static inline void __leave_vx_admin(struct vx_info_save *vxis)
++{
++	(void)xchg(&current->xid, vxis->xid);
++	(void)xchg(&current->vx_info, vxis->vxi);
++}
++
++#define task_is_init(p) \
++	__task_is_init(p, __FILE__, __LINE__, __HERE__)
++
++static inline int __task_is_init(struct task_struct *p,
++	const char *_file, int _line, void *_here)
++{
++	int is_init = is_global_init(p);
++
++	task_lock(p);
++	if (p->vx_info)
++		is_init = p->vx_info->vx_initpid == p->pid;
++	task_unlock(p);
++	return is_init;
++}
++
++extern void exit_vx_info(struct task_struct *, int);
++extern void exit_vx_info_early(struct task_struct *, int);
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_cowbl.h linux-3.2.34-vs2.3.2.15/include/linux/vs_cowbl.h
+--- linux-3.2.34/include/linux/vs_cowbl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_cowbl.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,48 @@
++#ifndef _VS_COWBL_H
++#define _VS_COWBL_H
++
++#include <linux/fs.h>
++#include <linux/dcache.h>
++#include <linux/namei.h>
++#include <linux/slab.h>
++
++extern struct dentry *cow_break_link(const char *pathname);
++
++static inline int cow_check_and_break(struct path *path)
++{
++	struct inode *inode = path->dentry->d_inode;
++	int error = 0;
++
++	/* do we need this check? */
++	if (IS_RDONLY(inode))
++		return -EROFS;
++
++	if (IS_COW(inode)) {
++		if (IS_COW_LINK(inode)) {
++			struct dentry *new_dentry, *old_dentry = path->dentry;
++			char *pp, *buf;
++
++			buf = kmalloc(PATH_MAX, GFP_KERNEL);
++			if (!buf) {
++				return -ENOMEM;
++			}
++			pp = d_path(path, buf, PATH_MAX);
++			new_dentry = cow_break_link(pp);
++			kfree(buf);
++			if (!IS_ERR(new_dentry)) {
++				path->dentry = new_dentry;
++				dput(old_dentry);
++			} else
++				error = PTR_ERR(new_dentry);
++		} else {
++			inode->i_flags &= ~(S_IXUNLINK | S_IMMUTABLE);
++			inode->i_ctime = CURRENT_TIME;
++			mark_inode_dirty(inode);
++		}
++	}
++	return error;
++}
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_cvirt.h linux-3.2.34-vs2.3.2.15/include/linux/vs_cvirt.h
+--- linux-3.2.34/include/linux/vs_cvirt.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_cvirt.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,50 @@
++#ifndef _VS_CVIRT_H
++#define _VS_CVIRT_H
++
++#include "vserver/cvirt.h"
++#include "vserver/context.h"
++#include "vserver/base.h"
++#include "vserver/check.h"
++#include "vserver/debug.h"
++
++
++static inline void vx_activate_task(struct task_struct *p)
++{
++	struct vx_info *vxi;
++
++	if ((vxi = p->vx_info)) {
++		vx_update_load(vxi);
++		atomic_inc(&vxi->cvirt.nr_running);
++	}
++}
++
++static inline void vx_deactivate_task(struct task_struct *p)
++{
++	struct vx_info *vxi;
++
++	if ((vxi = p->vx_info)) {
++		vx_update_load(vxi);
++		atomic_dec(&vxi->cvirt.nr_running);
++	}
++}
++
++static inline void vx_uninterruptible_inc(struct task_struct *p)
++{
++	struct vx_info *vxi;
++
++	if ((vxi = p->vx_info))
++		atomic_inc(&vxi->cvirt.nr_uninterruptible);
++}
++
++static inline void vx_uninterruptible_dec(struct task_struct *p)
++{
++	struct vx_info *vxi;
++
++	if ((vxi = p->vx_info))
++		atomic_dec(&vxi->cvirt.nr_uninterruptible);
++}
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_device.h linux-3.2.34-vs2.3.2.15/include/linux/vs_device.h
+--- linux-3.2.34/include/linux/vs_device.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_device.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,45 @@
++#ifndef _VS_DEVICE_H
++#define _VS_DEVICE_H
++
++#include "vserver/base.h"
++#include "vserver/device.h"
++#include "vserver/debug.h"
++
++
++#ifdef CONFIG_VSERVER_DEVICE
++
++int vs_map_device(struct vx_info *, dev_t, dev_t *, umode_t);
++
++#define vs_device_perm(v, d, m, p) \
++	((vs_map_device(current_vx_info(), d, NULL, m) & (p)) == (p))
++
++#else
++
++static inline
++int vs_map_device(struct vx_info *vxi,
++	dev_t device, dev_t *target, umode_t mode)
++{
++	if (target)
++		*target = device;
++	return ~0;
++}
++
++#define vs_device_perm(v, d, m, p) ((p) == (p))
++
++#endif
++
++
++#define vs_map_chrdev(d, t, p) \
++	((vs_map_device(current_vx_info(), d, t, S_IFCHR) & (p)) == (p))
++#define vs_map_blkdev(d, t, p) \
++	((vs_map_device(current_vx_info(), d, t, S_IFBLK) & (p)) == (p))
++
++#define vs_chrdev_perm(d, p) \
++	vs_device_perm(current_vx_info(), d, S_IFCHR, p)
++#define vs_blkdev_perm(d, p) \
++	vs_device_perm(current_vx_info(), d, S_IFBLK, p)
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_dlimit.h linux-3.2.34-vs2.3.2.15/include/linux/vs_dlimit.h
+--- linux-3.2.34/include/linux/vs_dlimit.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_dlimit.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,215 @@
++#ifndef _VS_DLIMIT_H
++#define _VS_DLIMIT_H
++
++#include <linux/fs.h>
++
++#include "vserver/dlimit.h"
++#include "vserver/base.h"
++#include "vserver/debug.h"
++
++
++#define get_dl_info(i)	__get_dl_info(i, __FILE__, __LINE__)
++
++static inline struct dl_info *__get_dl_info(struct dl_info *dli,
++	const char *_file, int _line)
++{
++	if (!dli)
++		return NULL;
++	vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])",
++		dli, dli ? dli->dl_tag : 0,
++		dli ? atomic_read(&dli->dl_usecnt) : 0,
++		_file, _line);
++	atomic_inc(&dli->dl_usecnt);
++	return dli;
++}
++
++
++#define free_dl_info(i) \
++	call_rcu(&(i)->dl_rcu, rcu_free_dl_info)
++
++#define put_dl_info(i)	__put_dl_info(i, __FILE__, __LINE__)
++
++static inline void __put_dl_info(struct dl_info *dli,
++	const char *_file, int _line)
++{
++	if (!dli)
++		return;
++	vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])",
++		dli, dli ? dli->dl_tag : 0,
++		dli ? atomic_read(&dli->dl_usecnt) : 0,
++		_file, _line);
++	if (atomic_dec_and_test(&dli->dl_usecnt))
++		free_dl_info(dli);
++}
++
++
++#define __dlimit_char(d)	((d) ? '*' : ' ')
++
++static inline int __dl_alloc_space(struct super_block *sb,
++	tag_t tag, dlsize_t nr, const char *file, int line)
++{
++	struct dl_info *dli = NULL;
++	int ret = 0;
++
++	if (nr == 0)
++		goto out;
++	dli = locate_dl_info(sb, tag);
++	if (!dli)
++		goto out;
++
++	spin_lock(&dli->dl_lock);
++	ret = (dli->dl_space_used + nr > dli->dl_space_total);
++	if (!ret)
++		dli->dl_space_used += nr;
++	spin_unlock(&dli->dl_lock);
++	put_dl_info(dli);
++out:
++	vxlprintk(VXD_CBIT(dlim, 1),
++		"ALLOC (%p,#%d)%c %lld bytes (%d)",
++		sb, tag, __dlimit_char(dli), (long long)nr,
++		ret, file, line);
++	return ret ? -ENOSPC : 0;
++}
++
++static inline void __dl_free_space(struct super_block *sb,
++	tag_t tag, dlsize_t nr, const char *_file, int _line)
++{
++	struct dl_info *dli = NULL;
++
++	if (nr == 0)
++		goto out;
++	dli = locate_dl_info(sb, tag);
++	if (!dli)
++		goto out;
++
++	spin_lock(&dli->dl_lock);
++	if (dli->dl_space_used > nr)
++		dli->dl_space_used -= nr;
++	else
++		dli->dl_space_used = 0;
++	spin_unlock(&dli->dl_lock);
++	put_dl_info(dli);
++out:
++	vxlprintk(VXD_CBIT(dlim, 1),
++		"FREE  (%p,#%d)%c %lld bytes",
++		sb, tag, __dlimit_char(dli), (long long)nr,
++		_file, _line);
++}
++
++static inline int __dl_alloc_inode(struct super_block *sb,
++	tag_t tag, const char *_file, int _line)
++{
++	struct dl_info *dli;
++	int ret = 0;
++
++	dli = locate_dl_info(sb, tag);
++	if (!dli)
++		goto out;
++
++	spin_lock(&dli->dl_lock);
++	dli->dl_inodes_used++;
++	ret = (dli->dl_inodes_used > dli->dl_inodes_total);
++	spin_unlock(&dli->dl_lock);
++	put_dl_info(dli);
++out:
++	vxlprintk(VXD_CBIT(dlim, 0),
++		"ALLOC (%p,#%d)%c inode (%d)",
++		sb, tag, __dlimit_char(dli), ret, _file, _line);
++	return ret ? -ENOSPC : 0;
++}
++
++static inline void __dl_free_inode(struct super_block *sb,
++	tag_t tag, const char *_file, int _line)
++{
++	struct dl_info *dli;
++
++	dli = locate_dl_info(sb, tag);
++	if (!dli)
++		goto out;
++
++	spin_lock(&dli->dl_lock);
++	if (dli->dl_inodes_used > 1)
++		dli->dl_inodes_used--;
++	else
++		dli->dl_inodes_used = 0;
++	spin_unlock(&dli->dl_lock);
++	put_dl_info(dli);
++out:
++	vxlprintk(VXD_CBIT(dlim, 0),
++		"FREE  (%p,#%d)%c inode",
++		sb, tag, __dlimit_char(dli), _file, _line);
++}
++
++static inline void __dl_adjust_block(struct super_block *sb, tag_t tag,
++	unsigned long long *free_blocks, unsigned long long *root_blocks,
++	const char *_file, int _line)
++{
++	struct dl_info *dli;
++	uint64_t broot, bfree;
++
++	dli = locate_dl_info(sb, tag);
++	if (!dli)
++		return;
++
++	spin_lock(&dli->dl_lock);
++	broot = (dli->dl_space_total -
++		(dli->dl_space_total >> 10) * dli->dl_nrlmult)
++		>> sb->s_blocksize_bits;
++	bfree = (dli->dl_space_total - dli->dl_space_used)
++			>> sb->s_blocksize_bits;
++	spin_unlock(&dli->dl_lock);
++
++	vxlprintk(VXD_CBIT(dlim, 2),
++		"ADJUST: %lld,%lld on %lld,%lld [mult=%d]",
++		(long long)bfree, (long long)broot,
++		*free_blocks, *root_blocks, dli->dl_nrlmult,
++		_file, _line);
++	if (free_blocks) {
++		if (*free_blocks > bfree)
++			*free_blocks = bfree;
++	}
++	if (root_blocks) {
++		if (*root_blocks > broot)
++			*root_blocks = broot;
++	}
++	put_dl_info(dli);
++}
++
++#define dl_prealloc_space(in, bytes) \
++	__dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
++		__FILE__, __LINE__ )
++
++#define dl_alloc_space(in, bytes) \
++	__dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
++		__FILE__, __LINE__ )
++
++#define dl_reserve_space(in, bytes) \
++	__dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
++		__FILE__, __LINE__ )
++
++#define dl_claim_space(in, bytes) (0)
++
++#define dl_release_space(in, bytes) \
++	__dl_free_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
++		__FILE__, __LINE__ )
++
++#define dl_free_space(in, bytes) \
++	__dl_free_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
++		__FILE__, __LINE__ )
++
++
++
++#define dl_alloc_inode(in) \
++	__dl_alloc_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ )
++
++#define dl_free_inode(in) \
++	__dl_free_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ )
++
++
++#define dl_adjust_block(sb, tag, fb, rb) \
++	__dl_adjust_block(sb, tag, fb, rb, __FILE__, __LINE__ )
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_inet.h linux-3.2.34-vs2.3.2.15/include/linux/vs_inet.h
+--- linux-3.2.34/include/linux/vs_inet.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_inet.h	2012-02-15 03:03:53.000000000 +0100
+@@ -0,0 +1,353 @@
++#ifndef _VS_INET_H
++#define _VS_INET_H
++
++#include "vserver/base.h"
++#include "vserver/network.h"
++#include "vserver/debug.h"
++
++#define IPI_LOOPBACK	htonl(INADDR_LOOPBACK)
++
++#define NXAV4(a)	NIPQUAD((a)->ip[0]), NIPQUAD((a)->ip[1]), \
++			NIPQUAD((a)->mask), (a)->type
++#define NXAV4_FMT	"[" NIPQUAD_FMT "-" NIPQUAD_FMT "/" NIPQUAD_FMT ":%04x]"
++
++#define NIPQUAD(addr) \
++	((unsigned char *)&addr)[0], \
++	((unsigned char *)&addr)[1], \
++	((unsigned char *)&addr)[2], \
++	((unsigned char *)&addr)[3]
++
++#define NIPQUAD_FMT "%u.%u.%u.%u"
++
++
++static inline
++int v4_addr_match(struct nx_addr_v4 *nxa, __be32 addr, uint16_t tmask)
++{
++	__be32 ip = nxa->ip[0].s_addr;
++	__be32 mask = nxa->mask.s_addr;
++	__be32 bcast = ip | ~mask;
++	int ret = 0;
++
++	switch (nxa->type & tmask) {
++	case NXA_TYPE_MASK:
++		ret = (ip == (addr & mask));
++		break;
++	case NXA_TYPE_ADDR:
++		ret = 3;
++		if (addr == ip)
++			break;
++		/* fall through to broadcast */
++	case NXA_MOD_BCAST:
++		ret = ((tmask & NXA_MOD_BCAST) && (addr == bcast));
++		break;
++	case NXA_TYPE_RANGE:
++		ret = ((nxa->ip[0].s_addr <= addr) &&
++			(nxa->ip[1].s_addr > addr));
++		break;
++	case NXA_TYPE_ANY:
++		ret = 2;
++		break;
++	}
++
++	vxdprintk(VXD_CBIT(net, 0),
++		"v4_addr_match(%p" NXAV4_FMT "," NIPQUAD_FMT ",%04x) = %d",
++		nxa, NXAV4(nxa), NIPQUAD(addr), tmask, ret);
++	return ret;
++}
++
++static inline
++int v4_addr_in_nx_info(struct nx_info *nxi, __be32 addr, uint16_t tmask)
++{
++	struct nx_addr_v4 *nxa;
++	int ret = 1;
++
++	if (!nxi)
++		goto out;
++
++	ret = 2;
++	/* allow 127.0.0.1 when remapping lback */
++	if ((tmask & NXA_LOOPBACK) &&
++		(addr == IPI_LOOPBACK) &&
++		nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
++		goto out;
++	ret = 3;
++	/* check for lback address */
++	if ((tmask & NXA_MOD_LBACK) &&
++		(nxi->v4_lback.s_addr == addr))
++		goto out;
++	ret = 4;
++	/* check for broadcast address */
++	if ((tmask & NXA_MOD_BCAST) &&
++		(nxi->v4_bcast.s_addr == addr))
++		goto out;
++	ret = 5;
++	/* check for v4 addresses */
++	for (nxa = &nxi->v4; nxa; nxa = nxa->next)
++		if (v4_addr_match(nxa, addr, tmask))
++			goto out;
++	ret = 0;
++out:
++	vxdprintk(VXD_CBIT(net, 0),
++		"v4_addr_in_nx_info(%p[#%u]," NIPQUAD_FMT ",%04x) = %d",
++		nxi, nxi ? nxi->nx_id : 0, NIPQUAD(addr), tmask, ret);
++	return ret;
++}
++
++static inline
++int v4_nx_addr_match(struct nx_addr_v4 *nxa, struct nx_addr_v4 *addr, uint16_t mask)
++{
++	/* FIXME: needs full range checks */
++	return v4_addr_match(nxa, addr->ip[0].s_addr, mask);
++}
++
++static inline
++int v4_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v4 *nxa, uint16_t mask)
++{
++	struct nx_addr_v4 *ptr;
++
++	for (ptr = &nxi->v4; ptr; ptr = ptr->next)
++		if (v4_nx_addr_match(ptr, nxa, mask))
++			return 1;
++	return 0;
++}
++
++#include <net/inet_sock.h>
++
++/*
++ *	Check if a given address matches for a socket
++ *
++ *	nxi:		the socket's nx_info if any
++ *	addr:		to be verified address
++ */
++static inline
++int v4_sock_addr_match (
++	struct nx_info *nxi,
++	struct inet_sock *inet,
++	__be32 addr)
++{
++	__be32 saddr = inet->inet_rcv_saddr;
++	__be32 bcast = nxi ? nxi->v4_bcast.s_addr : INADDR_BROADCAST;
++
++	if (addr && (saddr == addr || bcast == addr))
++		return 1;
++	if (!saddr)
++		return v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND);
++	return 0;
++}
++
++
++/* inet related checks and helpers */
++
++
++struct in_ifaddr;
++struct net_device;
++struct sock;
++
++#ifdef CONFIG_INET
++
++#include <linux/netdevice.h>
++#include <linux/inetdevice.h>
++#include <net/inet_sock.h>
++#include <net/inet_timewait_sock.h>
++
++
++int dev_in_nx_info(struct net_device *, struct nx_info *);
++int v4_dev_in_nx_info(struct net_device *, struct nx_info *);
++int nx_v4_addr_conflict(struct nx_info *, struct nx_info *);
++
++
++/*
++ *	check if address is covered by socket
++ *
++ *	sk:	the socket to check against
++ *	addr:	the address in question (must be != 0)
++ */
++
++static inline
++int __v4_addr_match_socket(const struct sock *sk, struct nx_addr_v4 *nxa)
++{
++	struct nx_info *nxi = sk->sk_nx_info;
++	__be32 saddr = sk_rcv_saddr(sk);
++
++	vxdprintk(VXD_CBIT(net, 5),
++		"__v4_addr_in_socket(%p," NXAV4_FMT ") %p:" NIPQUAD_FMT " %p;%lx",
++		sk, NXAV4(nxa), nxi, NIPQUAD(saddr), sk->sk_socket,
++		(sk->sk_socket?sk->sk_socket->flags:0));
++
++	if (saddr) {		/* direct address match */
++		return v4_addr_match(nxa, saddr, -1);
++	} else if (nxi) {	/* match against nx_info */
++		return v4_nx_addr_in_nx_info(nxi, nxa, -1);
++	} else {		/* unrestricted any socket */
++		return 1;
++	}
++}
++
++
++
++static inline
++int nx_dev_visible(struct nx_info *nxi, struct net_device *dev)
++{
++	vxdprintk(VXD_CBIT(net, 1),
++		"nx_dev_visible(%p[#%u],%p " VS_Q("%s") ") %d",
++		nxi, nxi ? nxi->nx_id : 0, dev, dev->name,
++		nxi ? dev_in_nx_info(dev, nxi) : 0);
++
++	if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
++		return 1;
++	if (dev_in_nx_info(dev, nxi))
++		return 1;
++	return 0;
++}
++
++
++static inline
++int v4_ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
++{
++	if (!nxi)
++		return 1;
++	if (!ifa)
++		return 0;
++	return v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW);
++}
++
++static inline
++int nx_v4_ifa_visible(struct nx_info *nxi, struct in_ifaddr *ifa)
++{
++	vxdprintk(VXD_CBIT(net, 1), "nx_v4_ifa_visible(%p[#%u],%p) %d",
++		nxi, nxi ? nxi->nx_id : 0, ifa,
++		nxi ? v4_ifa_in_nx_info(ifa, nxi) : 0);
++
++	if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
++		return 1;
++	if (v4_ifa_in_nx_info(ifa, nxi))
++		return 1;
++	return 0;
++}
++
++
++struct nx_v4_sock_addr {
++	__be32 saddr;	/* Address used for validation */
++	__be32 baddr;	/* Address used for socket bind */
++};
++
++static inline
++int v4_map_sock_addr(struct inet_sock *inet, struct sockaddr_in *addr,
++	struct nx_v4_sock_addr *nsa)
++{
++	struct sock *sk = &inet->sk;
++	struct nx_info *nxi = sk->sk_nx_info;
++	__be32 saddr = addr->sin_addr.s_addr;
++	__be32 baddr = saddr;
++
++	vxdprintk(VXD_CBIT(net, 3),
++		"inet_bind(%p)* %p,%p;%lx " NIPQUAD_FMT,
++		sk, sk->sk_nx_info, sk->sk_socket,
++		(sk->sk_socket ? sk->sk_socket->flags : 0),
++		NIPQUAD(saddr));
++
++	if (nxi) {
++		if (saddr == INADDR_ANY) {
++			if (nx_info_flags(nxi, NXF_SINGLE_IP, 0))
++				baddr = nxi->v4.ip[0].s_addr;
++		} else if (saddr == IPI_LOOPBACK) {
++			if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
++				baddr = nxi->v4_lback.s_addr;
++		} else if (!ipv4_is_multicast(saddr) ||
++			!nx_info_ncaps(nxi, NXC_MULTICAST)) {
++			/* normal address bind */
++			if (!v4_addr_in_nx_info(nxi, saddr, NXA_MASK_BIND))
++				return -EADDRNOTAVAIL;
++		}
++	}
++
++	vxdprintk(VXD_CBIT(net, 3),
++		"inet_bind(%p) " NIPQUAD_FMT ", " NIPQUAD_FMT,
++		sk, NIPQUAD(saddr), NIPQUAD(baddr));
++
++	nsa->saddr = saddr;
++	nsa->baddr = baddr;
++	return 0;
++}
++
++static inline
++void v4_set_sock_addr(struct inet_sock *inet, struct nx_v4_sock_addr *nsa)
++{
++	inet->inet_saddr = nsa->baddr;
++	inet->inet_rcv_saddr = nsa->baddr;
++}
++
++
++/*
++ *      helper to simplify inet_lookup_listener
++ *
++ *      nxi:	the socket's nx_info if any
++ *      addr:	to be verified address
++ *      saddr:	socket address
++ */
++static inline int v4_inet_addr_match (
++	struct nx_info *nxi,
++	__be32 addr,
++	__be32 saddr)
++{
++	if (addr && (saddr == addr))
++		return 1;
++	if (!saddr)
++		return nxi ? v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND) : 1;
++	return 0;
++}
++
++static inline __be32 nx_map_sock_lback(struct nx_info *nxi, __be32 addr)
++{
++	if (nx_info_flags(nxi, NXF_HIDE_LBACK, 0) &&
++		(addr == nxi->v4_lback.s_addr))
++		return IPI_LOOPBACK;
++	return addr;
++}
++
++static inline
++int nx_info_has_v4(struct nx_info *nxi)
++{
++	if (!nxi)
++		return 1;
++	if (NX_IPV4(nxi))
++		return 1;
++	if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
++		return 1;
++	return 0;
++}
++
++#else /* CONFIG_INET */
++
++static inline
++int nx_dev_visible(struct nx_info *n, struct net_device *d)
++{
++	return 1;
++}
++
++static inline
++int nx_v4_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
++{
++	return 1;
++}
++
++static inline
++int v4_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
++{
++	return 1;
++}
++
++static inline
++int nx_info_has_v4(struct nx_info *nxi)
++{
++	return 0;
++}
++
++#endif /* CONFIG_INET */
++
++#define current_nx_info_has_v4() \
++	nx_info_has_v4(current_nx_info())
++
++#else
++// #warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_inet6.h linux-3.2.34-vs2.3.2.15/include/linux/vs_inet6.h
+--- linux-3.2.34/include/linux/vs_inet6.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_inet6.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,246 @@
++#ifndef _VS_INET6_H
++#define _VS_INET6_H
++
++#include "vserver/base.h"
++#include "vserver/network.h"
++#include "vserver/debug.h"
++
++#include <net/ipv6.h>
++
++#define NXAV6(a)	&(a)->ip, &(a)->mask, (a)->prefix, (a)->type
++#define NXAV6_FMT	"[%pI6/%pI6/%d:%04x]"
++
++
++#ifdef	CONFIG_IPV6
++
++static inline
++int v6_addr_match(struct nx_addr_v6 *nxa,
++	const struct in6_addr *addr, uint16_t mask)
++{
++	int ret = 0;
++
++	switch (nxa->type & mask) {
++	case NXA_TYPE_MASK:
++		ret = ipv6_masked_addr_cmp(&nxa->ip, &nxa->mask, addr);
++		break;
++	case NXA_TYPE_ADDR:
++		ret = ipv6_addr_equal(&nxa->ip, addr);
++		break;
++	case NXA_TYPE_ANY:
++		ret = 1;
++		break;
++	}
++	vxdprintk(VXD_CBIT(net, 0),
++		"v6_addr_match(%p" NXAV6_FMT ",%pI6,%04x) = %d",
++		nxa, NXAV6(nxa), addr, mask, ret);
++	return ret;
++}
++
++static inline
++int v6_addr_in_nx_info(struct nx_info *nxi,
++	const struct in6_addr *addr, uint16_t mask)
++{
++	struct nx_addr_v6 *nxa;
++	int ret = 1;
++
++	if (!nxi)
++		goto out;
++	for (nxa = &nxi->v6; nxa; nxa = nxa->next)
++		if (v6_addr_match(nxa, addr, mask))
++			goto out;
++	ret = 0;
++out:
++	vxdprintk(VXD_CBIT(net, 0),
++		"v6_addr_in_nx_info(%p[#%u],%pI6,%04x) = %d",
++		nxi, nxi ? nxi->nx_id : 0, addr, mask, ret);
++	return ret;
++}
++
++static inline
++int v6_nx_addr_match(struct nx_addr_v6 *nxa, struct nx_addr_v6 *addr, uint16_t mask)
++{
++	/* FIXME: needs full range checks */
++	return v6_addr_match(nxa, &addr->ip, mask);
++}
++
++static inline
++int v6_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v6 *nxa, uint16_t mask)
++{
++	struct nx_addr_v6 *ptr;
++
++	for (ptr = &nxi->v6; ptr; ptr = ptr->next)
++		if (v6_nx_addr_match(ptr, nxa, mask))
++			return 1;
++	return 0;
++}
++
++
++/*
++ *	Check if a given address matches for a socket
++ *
++ *	nxi:		the socket's nx_info if any
++ *	addr:		to be verified address
++ */
++static inline
++int v6_sock_addr_match (
++	struct nx_info *nxi,
++	struct inet_sock *inet,
++	struct in6_addr *addr)
++{
++	struct sock *sk = &inet->sk;
++	struct in6_addr *saddr = inet6_rcv_saddr(sk);
++
++	if (!ipv6_addr_any(addr) &&
++		ipv6_addr_equal(saddr, addr))
++		return 1;
++	if (ipv6_addr_any(saddr))
++		return v6_addr_in_nx_info(nxi, addr, -1);
++	return 0;
++}
++
++/*
++ *	check if address is covered by socket
++ *
++ *	sk:	the socket to check against
++ *	addr:	the address in question (must be != 0)
++ */
++
++static inline
++int __v6_addr_match_socket(const struct sock *sk, struct nx_addr_v6 *nxa)
++{
++	struct nx_info *nxi = sk->sk_nx_info;
++	struct in6_addr *saddr = inet6_rcv_saddr(sk);
++
++	vxdprintk(VXD_CBIT(net, 5),
++		"__v6_addr_in_socket(%p," NXAV6_FMT ") %p:%pI6 %p;%lx",
++		sk, NXAV6(nxa), nxi, saddr, sk->sk_socket,
++		(sk->sk_socket?sk->sk_socket->flags:0));
++
++	if (!ipv6_addr_any(saddr)) {	/* direct address match */
++		return v6_addr_match(nxa, saddr, -1);
++	} else if (nxi) {		/* match against nx_info */
++		return v6_nx_addr_in_nx_info(nxi, nxa, -1);
++	} else {			/* unrestricted any socket */
++		return 1;
++	}
++}
++
++
++/* inet related checks and helpers */
++
++
++struct in_ifaddr;
++struct net_device;
++struct sock;
++
++
++#include <linux/netdevice.h>
++#include <linux/inetdevice.h>
++#include <net/inet_timewait_sock.h>
++
++
++int dev_in_nx_info(struct net_device *, struct nx_info *);
++int v6_dev_in_nx_info(struct net_device *, struct nx_info *);
++int nx_v6_addr_conflict(struct nx_info *, struct nx_info *);
++
++
++
++static inline
++int v6_ifa_in_nx_info(struct inet6_ifaddr *ifa, struct nx_info *nxi)
++{
++	if (!nxi)
++		return 1;
++	if (!ifa)
++		return 0;
++	return v6_addr_in_nx_info(nxi, &ifa->addr, -1);
++}
++
++static inline
++int nx_v6_ifa_visible(struct nx_info *nxi, struct inet6_ifaddr *ifa)
++{
++	vxdprintk(VXD_CBIT(net, 1), "nx_v6_ifa_visible(%p[#%u],%p) %d",
++		nxi, nxi ? nxi->nx_id : 0, ifa,
++		nxi ? v6_ifa_in_nx_info(ifa, nxi) : 0);
++
++	if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
++		return 1;
++	if (v6_ifa_in_nx_info(ifa, nxi))
++		return 1;
++	return 0;
++}
++
++
++struct nx_v6_sock_addr {
++	struct in6_addr saddr;	/* Address used for validation */
++	struct in6_addr baddr;	/* Address used for socket bind */
++};
++
++static inline
++int v6_map_sock_addr(struct inet_sock *inet, struct sockaddr_in6 *addr,
++	struct nx_v6_sock_addr *nsa)
++{
++	// struct sock *sk = &inet->sk;
++	// struct nx_info *nxi = sk->sk_nx_info;
++	struct in6_addr saddr = addr->sin6_addr;
++	struct in6_addr baddr = saddr;
++
++	nsa->saddr = saddr;
++	nsa->baddr = baddr;
++	return 0;
++}
++
++static inline
++void v6_set_sock_addr(struct inet_sock *inet, struct nx_v6_sock_addr *nsa)
++{
++	// struct sock *sk = &inet->sk;
++	// struct in6_addr *saddr = inet6_rcv_saddr(sk);
++
++	// *saddr = nsa->baddr;
++	// inet->inet_saddr = nsa->baddr;
++}
++
++static inline
++int nx_info_has_v6(struct nx_info *nxi)
++{
++	if (!nxi)
++		return 1;
++	if (NX_IPV6(nxi))
++		return 1;
++	return 0;
++}
++
++#else /* CONFIG_IPV6 */
++
++static inline
++int nx_v6_dev_visible(struct nx_info *n, struct net_device *d)
++{
++	return 1;
++}
++
++
++static inline
++int nx_v6_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
++{
++	return 1;
++}
++
++static inline
++int v6_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
++{
++	return 1;
++}
++
++static inline
++int nx_info_has_v6(struct nx_info *nxi)
++{
++	return 0;
++}
++
++#endif /* CONFIG_IPV6 */
++
++#define current_nx_info_has_v6() \
++	nx_info_has_v6(current_nx_info())
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_limit.h linux-3.2.34-vs2.3.2.15/include/linux/vs_limit.h
+--- linux-3.2.34/include/linux/vs_limit.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_limit.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,140 @@
++#ifndef _VS_LIMIT_H
++#define _VS_LIMIT_H
++
++#include "vserver/limit.h"
++#include "vserver/base.h"
++#include "vserver/context.h"
++#include "vserver/debug.h"
++#include "vserver/context.h"
++#include "vserver/limit_int.h"
++
++
++#define vx_acc_cres(v, d, p, r) \
++	__vx_acc_cres(v, r, d, p, __FILE__, __LINE__)
++
++#define vx_acc_cres_cond(x, d, p, r) \
++	__vx_acc_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
++	r, d, p, __FILE__, __LINE__)
++
++
++#define vx_add_cres(v, a, p, r) \
++	__vx_add_cres(v, r, a, p, __FILE__, __LINE__)
++#define vx_sub_cres(v, a, p, r)		vx_add_cres(v, -(a), p, r)
++
++#define vx_add_cres_cond(x, a, p, r) \
++	__vx_add_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
++	r, a, p, __FILE__, __LINE__)
++#define vx_sub_cres_cond(x, a, p, r)	vx_add_cres_cond(x, -(a), p, r)
++
++
++/* process and file limits */
++
++#define vx_nproc_inc(p) \
++	vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC)
++
++#define vx_nproc_dec(p) \
++	vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC)
++
++#define vx_files_inc(f) \
++	vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE)
++
++#define vx_files_dec(f) \
++	vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE)
++
++#define vx_locks_inc(l) \
++	vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS)
++
++#define vx_locks_dec(l) \
++	vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS)
++
++#define vx_openfd_inc(f) \
++	vx_acc_cres(current_vx_info(), 1, (void *)(long)(f), VLIMIT_OPENFD)
++
++#define vx_openfd_dec(f) \
++	vx_acc_cres(current_vx_info(),-1, (void *)(long)(f), VLIMIT_OPENFD)
++
++
++#define vx_cres_avail(v, n, r) \
++	__vx_cres_avail(v, r, n, __FILE__, __LINE__)
++
++
++#define vx_nproc_avail(n) \
++	vx_cres_avail(current_vx_info(), n, RLIMIT_NPROC)
++
++#define vx_files_avail(n) \
++	vx_cres_avail(current_vx_info(), n, RLIMIT_NOFILE)
++
++#define vx_locks_avail(n) \
++	vx_cres_avail(current_vx_info(), n, RLIMIT_LOCKS)
++
++#define vx_openfd_avail(n) \
++	vx_cres_avail(current_vx_info(), n, VLIMIT_OPENFD)
++
++
++/* dentry limits */
++
++#define vx_dentry_inc(d) do {						\
++	if ((d)->d_count == 1)						\
++		vx_acc_cres(current_vx_info(), 1, d, VLIMIT_DENTRY);	\
++	} while (0)
++
++#define vx_dentry_dec(d) do {						\
++	if ((d)->d_count == 0)						\
++		vx_acc_cres(current_vx_info(),-1, d, VLIMIT_DENTRY);	\
++	} while (0)
++
++#define vx_dentry_avail(n) \
++	vx_cres_avail(current_vx_info(), n, VLIMIT_DENTRY)
++
++
++/* socket limits */
++
++#define vx_sock_inc(s) \
++	vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK)
++
++#define vx_sock_dec(s) \
++	vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK)
++
++#define vx_sock_avail(n) \
++	vx_cres_avail(current_vx_info(), n, VLIMIT_NSOCK)
++
++
++/* ipc resource limits */
++
++#define vx_ipcmsg_add(v, u, a) \
++	vx_add_cres(v, a, u, RLIMIT_MSGQUEUE)
++
++#define vx_ipcmsg_sub(v, u, a) \
++	vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE)
++
++#define vx_ipcmsg_avail(v, a) \
++	vx_cres_avail(v, a, RLIMIT_MSGQUEUE)
++
++
++#define vx_ipcshm_add(v, k, a) \
++	vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
++
++#define vx_ipcshm_sub(v, k, a) \
++	vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
++
++#define vx_ipcshm_avail(v, a) \
++	vx_cres_avail(v, a, VLIMIT_SHMEM)
++
++
++#define vx_semary_inc(a) \
++	vx_acc_cres(current_vx_info(), 1, a, VLIMIT_SEMARY)
++
++#define vx_semary_dec(a) \
++	vx_acc_cres(current_vx_info(), -1, a, VLIMIT_SEMARY)
++
++
++#define vx_nsems_add(a,n) \
++	vx_add_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
++
++#define vx_nsems_sub(a,n) \
++	vx_sub_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_network.h linux-3.2.34-vs2.3.2.15/include/linux/vs_network.h
+--- linux-3.2.34/include/linux/vs_network.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_network.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,169 @@
++#ifndef _NX_VS_NETWORK_H
++#define _NX_VS_NETWORK_H
++
++#include "vserver/context.h"
++#include "vserver/network.h"
++#include "vserver/base.h"
++#include "vserver/check.h"
++#include "vserver/debug.h"
++
++#include <linux/sched.h>
++
++
++#define get_nx_info(i) __get_nx_info(i, __FILE__, __LINE__)
++
++static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
++	const char *_file, int _line)
++{
++	if (!nxi)
++		return NULL;
++
++	vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])",
++		nxi, nxi ? nxi->nx_id : 0,
++		nxi ? atomic_read(&nxi->nx_usecnt) : 0,
++		_file, _line);
++
++	atomic_inc(&nxi->nx_usecnt);
++	return nxi;
++}
++
++
++extern void free_nx_info(struct nx_info *);
++
++#define put_nx_info(i) __put_nx_info(i, __FILE__, __LINE__)
++
++static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
++{
++	if (!nxi)
++		return;
++
++	vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])",
++		nxi, nxi ? nxi->nx_id : 0,
++		nxi ? atomic_read(&nxi->nx_usecnt) : 0,
++		_file, _line);
++
++	if (atomic_dec_and_test(&nxi->nx_usecnt))
++		free_nx_info(nxi);
++}
++
++
++#define init_nx_info(p, i) __init_nx_info(p, i, __FILE__, __LINE__)
++
++static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi,
++		const char *_file, int _line)
++{
++	if (nxi) {
++		vxlprintk(VXD_CBIT(nid, 3),
++			"init_nx_info(%p[#%d.%d])",
++			nxi, nxi ? nxi->nx_id : 0,
++			nxi ? atomic_read(&nxi->nx_usecnt) : 0,
++			_file, _line);
++
++		atomic_inc(&nxi->nx_usecnt);
++	}
++	*nxp = nxi;
++}
++
++
++#define set_nx_info(p, i) __set_nx_info(p, i, __FILE__, __LINE__)
++
++static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
++	const char *_file, int _line)
++{
++	struct nx_info *nxo;
++
++	if (!nxi)
++		return;
++
++	vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])",
++		nxi, nxi ? nxi->nx_id : 0,
++		nxi ? atomic_read(&nxi->nx_usecnt) : 0,
++		_file, _line);
++
++	atomic_inc(&nxi->nx_usecnt);
++	nxo = xchg(nxp, nxi);
++	BUG_ON(nxo);
++}
++
++#define clr_nx_info(p) __clr_nx_info(p, __FILE__, __LINE__)
++
++static inline void __clr_nx_info(struct nx_info **nxp,
++	const char *_file, int _line)
++{
++	struct nx_info *nxo;
++
++	nxo = xchg(nxp, NULL);
++	if (!nxo)
++		return;
++
++	vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])",
++		nxo, nxo ? nxo->nx_id : 0,
++		nxo ? atomic_read(&nxo->nx_usecnt) : 0,
++		_file, _line);
++
++	if (atomic_dec_and_test(&nxo->nx_usecnt))
++		free_nx_info(nxo);
++}
++
++
++#define claim_nx_info(v, p) __claim_nx_info(v, p, __FILE__, __LINE__)
++
++static inline void __claim_nx_info(struct nx_info *nxi,
++	struct task_struct *task, const char *_file, int _line)
++{
++	vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p",
++		nxi, nxi ? nxi->nx_id : 0,
++		nxi?atomic_read(&nxi->nx_usecnt):0,
++		nxi?atomic_read(&nxi->nx_tasks):0,
++		task, _file, _line);
++
++	atomic_inc(&nxi->nx_tasks);
++}
++
++
++extern void unhash_nx_info(struct nx_info *);
++
++#define release_nx_info(v, p) __release_nx_info(v, p, __FILE__, __LINE__)
++
++static inline void __release_nx_info(struct nx_info *nxi,
++	struct task_struct *task, const char *_file, int _line)
++{
++	vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p",
++		nxi, nxi ? nxi->nx_id : 0,
++		nxi ? atomic_read(&nxi->nx_usecnt) : 0,
++		nxi ? atomic_read(&nxi->nx_tasks) : 0,
++		task, _file, _line);
++
++	might_sleep();
++
++	if (atomic_dec_and_test(&nxi->nx_tasks))
++		unhash_nx_info(nxi);
++}
++
++
++#define task_get_nx_info(i)	__task_get_nx_info(i, __FILE__, __LINE__)
++
++static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
++	const char *_file, int _line)
++{
++	struct nx_info *nxi;
++
++	task_lock(p);
++	vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)",
++		p, _file, _line);
++	nxi = __get_nx_info(p->nx_info, _file, _line);
++	task_unlock(p);
++	return nxi;
++}
++
++
++static inline void exit_nx_info(struct task_struct *p)
++{
++	if (p->nx_info)
++		release_nx_info(p->nx_info, p);
++}
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_pid.h linux-3.2.34-vs2.3.2.15/include/linux/vs_pid.h
+--- linux-3.2.34/include/linux/vs_pid.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_pid.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,50 @@
++#ifndef _VS_PID_H
++#define _VS_PID_H
++
++#include "vserver/base.h"
++#include "vserver/check.h"
++#include "vserver/context.h"
++#include "vserver/debug.h"
++#include "vserver/pid.h"
++#include <linux/pid_namespace.h>
++
++
++#define VXF_FAKE_INIT	(VXF_INFO_INIT | VXF_STATE_INIT)
++
++static inline
++int vx_proc_task_visible(struct task_struct *task)
++{
++	if ((task->pid == 1) &&
++		!vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT))
++		/* show a blend through init */
++		goto visible;
++	if (vx_check(vx_task_xid(task), VS_WATCH | VS_IDENT))
++		goto visible;
++	return 0;
++visible:
++	return 1;
++}
++
++#define find_task_by_real_pid(pid) find_task_by_pid_ns(pid, &init_pid_ns)
++
++
++static inline
++struct task_struct *vx_get_proc_task(struct inode *inode, struct pid *pid)
++{
++	struct task_struct *task = get_pid_task(pid, PIDTYPE_PID);
++
++	if (task && !vx_proc_task_visible(task)) {
++		vxdprintk(VXD_CBIT(misc, 6),
++			"dropping task (get) %p[#%u,%u] for %p[#%u,%u]",
++			task, task->xid, task->pid,
++			current, current->xid, current->pid);
++		put_task_struct(task);
++		task = NULL;
++	}
++	return task;
++}
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_sched.h linux-3.2.34-vs2.3.2.15/include/linux/vs_sched.h
+--- linux-3.2.34/include/linux/vs_sched.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_sched.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,40 @@
++#ifndef _VS_SCHED_H
++#define _VS_SCHED_H
++
++#include "vserver/base.h"
++#include "vserver/context.h"
++#include "vserver/sched.h"
++
++
++#define MAX_PRIO_BIAS		 20
++#define MIN_PRIO_BIAS		-20
++
++static inline
++int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
++{
++	struct vx_info *vxi = p->vx_info;
++
++	if (vxi)
++		prio += vx_cpu(vxi, sched_pc).prio_bias;
++	return prio;
++}
++
++static inline void vx_account_user(struct vx_info *vxi,
++	cputime_t cputime, int nice)
++{
++	if (!vxi)
++		return;
++	vx_cpu(vxi, sched_pc).user_ticks += cputime;
++}
++
++static inline void vx_account_system(struct vx_info *vxi,
++	cputime_t cputime, int idle)
++{
++	if (!vxi)
++		return;
++	vx_cpu(vxi, sched_pc).sys_ticks += cputime;
++}
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_socket.h linux-3.2.34-vs2.3.2.15/include/linux/vs_socket.h
+--- linux-3.2.34/include/linux/vs_socket.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_socket.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,67 @@
++#ifndef _VS_SOCKET_H
++#define _VS_SOCKET_H
++
++#include "vserver/debug.h"
++#include "vserver/base.h"
++#include "vserver/cacct.h"
++#include "vserver/context.h"
++#include "vserver/tag.h"
++
++
++/* socket accounting */
++
++#include <linux/socket.h>
++
++static inline int vx_sock_type(int family)
++{
++	switch (family) {
++	case PF_UNSPEC:
++		return VXA_SOCK_UNSPEC;
++	case PF_UNIX:
++		return VXA_SOCK_UNIX;
++	case PF_INET:
++		return VXA_SOCK_INET;
++	case PF_INET6:
++		return VXA_SOCK_INET6;
++	case PF_PACKET:
++		return VXA_SOCK_PACKET;
++	default:
++		return VXA_SOCK_OTHER;
++	}
++}
++
++#define vx_acc_sock(v, f, p, s) \
++	__vx_acc_sock(v, f, p, s, __FILE__, __LINE__)
++
++static inline void __vx_acc_sock(struct vx_info *vxi,
++	int family, int pos, int size, char *file, int line)
++{
++	if (vxi) {
++		int type = vx_sock_type(family);
++
++		atomic_long_inc(&vxi->cacct.sock[type][pos].count);
++		atomic_long_add(size, &vxi->cacct.sock[type][pos].total);
++	}
++}
++
++#define vx_sock_recv(sk, s) \
++	vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, s)
++#define vx_sock_send(sk, s) \
++	vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, s)
++#define vx_sock_fail(sk, s) \
++	vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, s)
++
++
++#define sock_vx_init(s) do {		\
++	(s)->sk_xid = 0;		\
++	(s)->sk_vx_info = NULL;		\
++	} while (0)
++
++#define sock_nx_init(s) do {		\
++	(s)->sk_nid = 0;		\
++	(s)->sk_nx_info = NULL;		\
++	} while (0)
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_tag.h linux-3.2.34-vs2.3.2.15/include/linux/vs_tag.h
+--- linux-3.2.34/include/linux/vs_tag.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_tag.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,47 @@
++#ifndef _VS_TAG_H
++#define _VS_TAG_H
++
++#include <linux/vserver/tag.h>
++
++/* check conditions */
++
++#define DX_ADMIN	0x0001
++#define DX_WATCH	0x0002
++#define DX_HOSTID	0x0008
++
++#define DX_IDENT	0x0010
++
++#define DX_ARG_MASK	0x0010
++
++
++#define dx_task_tag(t)	((t)->tag)
++
++#define dx_current_tag() dx_task_tag(current)
++
++#define dx_check(c, m)	__dx_check(dx_current_tag(), c, m)
++
++#define dx_weak_check(c, m)	((m) ? dx_check(c, m) : 1)
++
++
++/*
++ * check current context for ADMIN/WATCH and
++ * optionally against supplied argument
++ */
++static inline int __dx_check(tag_t cid, tag_t id, unsigned int mode)
++{
++	if (mode & DX_ARG_MASK) {
++		if ((mode & DX_IDENT) && (id == cid))
++			return 1;
++	}
++	return (((mode & DX_ADMIN) && (cid == 0)) ||
++		((mode & DX_WATCH) && (cid == 1)) ||
++		((mode & DX_HOSTID) && (id == 0)));
++}
++
++struct inode;
++int dx_permission(const struct inode *inode, int mask);
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vs_time.h linux-3.2.34-vs2.3.2.15/include/linux/vs_time.h
+--- linux-3.2.34/include/linux/vs_time.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vs_time.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,19 @@
++#ifndef _VS_TIME_H
++#define _VS_TIME_H
++
++
++/* time faking stuff */
++
++#ifdef CONFIG_VSERVER_VTIME
++
++extern void vx_adjust_timespec(struct timespec *ts);
++extern int vx_settimeofday(const struct timespec *ts);
++
++#else
++#define	vx_adjust_timespec(t)	do { } while (0)
++#define	vx_settimeofday(t)	do_settimeofday(t)
++#endif
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/Kbuild linux-3.2.34-vs2.3.2.15/include/linux/vserver/Kbuild
+--- linux-3.2.34/include/linux/vserver/Kbuild	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/Kbuild	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,8 @@
++
++header-y += context_cmd.h network_cmd.h space_cmd.h \
++	cacct_cmd.h cvirt_cmd.h limit_cmd.h dlimit_cmd.h \
++	inode_cmd.h tag_cmd.h sched_cmd.h signal_cmd.h \
++	debug_cmd.h device_cmd.h
++
++header-y += switch.h network.h monitor.h inode.h device.h
++
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/base.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/base.h
+--- linux-3.2.34/include/linux/vserver/base.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/base.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,178 @@
++#ifndef _VX_BASE_H
++#define _VX_BASE_H
++
++
++/* context state changes */
++
++enum {
++	VSC_STARTUP = 1,
++	VSC_SHUTDOWN,
++
++	VSC_NETUP,
++	VSC_NETDOWN,
++};
++
++
++
++#define vx_task_xid(t)	((t)->xid)
++
++#define vx_current_xid() vx_task_xid(current)
++
++#define current_vx_info() (current->vx_info)
++
++
++#define nx_task_nid(t)	((t)->nid)
++
++#define nx_current_nid() nx_task_nid(current)
++
++#define current_nx_info() (current->nx_info)
++
++
++/* generic flag merging */
++
++#define vs_check_flags(v, m, f)	(((v) & (m)) ^ (f))
++
++#define vs_mask_flags(v, f, m)	(((v) & ~(m)) | ((f) & (m)))
++
++#define vs_mask_mask(v, f, m)	(((v) & ~(m)) | ((v) & (f) & (m)))
++
++#define vs_check_bit(v, n)	((v) & (1LL << (n)))
++
++
++/* context flags */
++
++#define __vx_flags(v)	((v) ? (v)->vx_flags : 0)
++
++#define vx_current_flags()	__vx_flags(current_vx_info())
++
++#define vx_info_flags(v, m, f) \
++	vs_check_flags(__vx_flags(v), m, f)
++
++#define task_vx_flags(t, m, f) \
++	((t) && vx_info_flags((t)->vx_info, m, f))
++
++#define vx_flags(m, f)	vx_info_flags(current_vx_info(), m, f)
++
++
++/* context caps */
++
++#define __vx_ccaps(v)	((v) ? (v)->vx_ccaps : 0)
++
++#define vx_current_ccaps()	__vx_ccaps(current_vx_info())
++
++#define vx_info_ccaps(v, c)	(__vx_ccaps(v) & (c))
++
++#define vx_ccaps(c)	vx_info_ccaps(current_vx_info(), (c))
++
++
++
++/* network flags */
++
++#define __nx_flags(n)	((n) ? (n)->nx_flags : 0)
++
++#define nx_current_flags()	__nx_flags(current_nx_info())
++
++#define nx_info_flags(n, m, f) \
++	vs_check_flags(__nx_flags(n), m, f)
++
++#define task_nx_flags(t, m, f) \
++	((t) && nx_info_flags((t)->nx_info, m, f))
++
++#define nx_flags(m, f)	nx_info_flags(current_nx_info(), m, f)
++
++
++/* network caps */
++
++#define __nx_ncaps(n)	((n) ? (n)->nx_ncaps : 0)
++
++#define nx_current_ncaps()	__nx_ncaps(current_nx_info())
++
++#define nx_info_ncaps(n, c)	(__nx_ncaps(n) & (c))
++
++#define nx_ncaps(c)	nx_info_ncaps(current_nx_info(), c)
++
++
++/* context mask capabilities */
++
++#define __vx_mcaps(v)	((v) ? (v)->vx_ccaps >> 32UL : ~0 )
++
++#define vx_info_mcaps(v, c)	(__vx_mcaps(v) & (c))
++
++#define vx_mcaps(c)	vx_info_mcaps(current_vx_info(), c)
++
++
++/* context bcap mask */
++
++#define __vx_bcaps(v)		((v)->vx_bcaps)
++
++#define vx_current_bcaps()	__vx_bcaps(current_vx_info())
++
++
++/* mask given bcaps */
++
++#define vx_info_mbcaps(v, c)	((v) ? cap_intersect(__vx_bcaps(v), c) : c)
++
++#define vx_mbcaps(c)		vx_info_mbcaps(current_vx_info(), c)
++
++
++/* masked cap_bset */
++
++#define vx_info_cap_bset(v)	vx_info_mbcaps(v, current->cap_bset)
++
++#define vx_current_cap_bset()	vx_info_cap_bset(current_vx_info())
++
++#if 0
++#define vx_info_mbcap(v, b) \
++	(!vx_info_flags(v, VXF_STATE_SETUP, 0) ? \
++	vx_info_bcaps(v, b) : (b))
++
++#define task_vx_mbcap(t, b) \
++	vx_info_mbcap((t)->vx_info, (t)->b)
++
++#define vx_mbcap(b)	task_vx_mbcap(current, b)
++#endif
++
++#define vx_cap_raised(v, c, f)	cap_raised(vx_info_mbcaps(v, c), f)
++
++#define vx_capable(b, c) (capable(b) || \
++	(cap_raised(current_cap(), b) && vx_ccaps(c)))
++
++#define vx_ns_capable(n, b, c) (ns_capable(n, b) || \
++	(cap_raised(current_cap(), b) && vx_ccaps(c)))
++
++#define nx_capable(b, c) (capable(b) || \
++	(cap_raised(current_cap(), b) && nx_ncaps(c)))
++
++#define vx_task_initpid(t, n) \
++	((t)->vx_info && \
++	((t)->vx_info->vx_initpid == (n)))
++
++#define vx_current_initpid(n)	vx_task_initpid(current, n)
++
++
++/* context unshare mask */
++
++#define __vx_umask(v)		((v)->vx_umask)
++
++#define vx_current_umask()	__vx_umask(current_vx_info())
++
++#define vx_can_unshare(b, f) (capable(b) || \
++	(cap_raised(current_cap(), b) && \
++	!((f) & ~vx_current_umask())))
++
++
++#define __vx_wmask(v)		((v)->vx_wmask)
++
++#define vx_current_wmask()	__vx_wmask(current_vx_info())
++
++
++#define __vx_state(v)	((v) ? ((v)->vx_state) : 0)
++
++#define vx_info_state(v, m)	(__vx_state(v) & (m))
++
++
++#define __nx_state(n)	((n) ? ((n)->nx_state) : 0)
++
++#define nx_info_state(n, m)	(__nx_state(n) & (m))
++
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/cacct.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/cacct.h
+--- linux-3.2.34/include/linux/vserver/cacct.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/cacct.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,15 @@
++#ifndef _VX_CACCT_H
++#define _VX_CACCT_H
++
++
++enum sock_acc_field {
++	VXA_SOCK_UNSPEC = 0,
++	VXA_SOCK_UNIX,
++	VXA_SOCK_INET,
++	VXA_SOCK_INET6,
++	VXA_SOCK_PACKET,
++	VXA_SOCK_OTHER,
++	VXA_SOCK_SIZE	/* array size */
++};
++
++#endif	/* _VX_CACCT_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/cacct_cmd.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/cacct_cmd.h
+--- linux-3.2.34/include/linux/vserver/cacct_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/cacct_cmd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,23 @@
++#ifndef _VX_CACCT_CMD_H
++#define _VX_CACCT_CMD_H
++
++
++/* virtual host info name commands */
++
++#define VCMD_sock_stat		VC_CMD(VSTAT, 5, 0)
++
++struct	vcmd_sock_stat_v0 {
++	uint32_t field;
++	uint32_t count[3];
++	uint64_t total[3];
++};
++
++
++#ifdef	__KERNEL__
++
++#include <linux/compiler.h>
++
++extern int vc_sock_stat(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_CACCT_CMD_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/cacct_def.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/cacct_def.h
+--- linux-3.2.34/include/linux/vserver/cacct_def.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/cacct_def.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,43 @@
++#ifndef _VX_CACCT_DEF_H
++#define _VX_CACCT_DEF_H
++
++#include <asm/atomic.h>
++#include <linux/vserver/cacct.h>
++
++
++struct _vx_sock_acc {
++	atomic_long_t count;
++	atomic_long_t total;
++};
++
++/* context sub struct */
++
++struct _vx_cacct {
++	struct _vx_sock_acc sock[VXA_SOCK_SIZE][3];
++	atomic_t slab[8];
++	atomic_t page[6][8];
++};
++
++#ifdef CONFIG_VSERVER_DEBUG
++
++static inline void __dump_vx_cacct(struct _vx_cacct *cacct)
++{
++	int i, j;
++
++	printk("\t_vx_cacct:");
++	for (i = 0; i < 6; i++) {
++		struct _vx_sock_acc *ptr = cacct->sock[i];
++
++		printk("\t [%d] =", i);
++		for (j = 0; j < 3; j++) {
++			printk(" [%d] = %8lu, %8lu", j,
++				atomic_long_read(&ptr[j].count),
++				atomic_long_read(&ptr[j].total));
++		}
++		printk("\n");
++	}
++}
++
++#endif
++
++#endif	/* _VX_CACCT_DEF_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/cacct_int.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/cacct_int.h
+--- linux-3.2.34/include/linux/vserver/cacct_int.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/cacct_int.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,21 @@
++#ifndef _VX_CACCT_INT_H
++#define _VX_CACCT_INT_H
++
++
++#ifdef	__KERNEL__
++
++static inline
++unsigned long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
++{
++	return atomic_long_read(&cacct->sock[type][pos].count);
++}
++
++
++static inline
++unsigned long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
++{
++	return atomic_long_read(&cacct->sock[type][pos].total);
++}
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_CACCT_INT_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/check.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/check.h
+--- linux-3.2.34/include/linux/vserver/check.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/check.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,89 @@
++#ifndef _VS_CHECK_H
++#define _VS_CHECK_H
++
++
++#define MAX_S_CONTEXT	65535	/* Arbitrary limit */
++
++#ifdef	CONFIG_VSERVER_DYNAMIC_IDS
++#define MIN_D_CONTEXT	49152	/* dynamic contexts start here */
++#else
++#define MIN_D_CONTEXT	65536
++#endif
++
++/* check conditions */
++
++#define VS_ADMIN	0x0001
++#define VS_WATCH	0x0002
++#define VS_HIDE		0x0004
++#define VS_HOSTID	0x0008
++
++#define VS_IDENT	0x0010
++#define VS_EQUIV	0x0020
++#define VS_PARENT	0x0040
++#define VS_CHILD	0x0080
++
++#define VS_ARG_MASK	0x00F0
++
++#define VS_DYNAMIC	0x0100
++#define VS_STATIC	0x0200
++
++#define VS_ATR_MASK	0x0F00
++
++#ifdef	CONFIG_VSERVER_PRIVACY
++#define VS_ADMIN_P	(0)
++#define VS_WATCH_P	(0)
++#else
++#define VS_ADMIN_P	VS_ADMIN
++#define VS_WATCH_P	VS_WATCH
++#endif
++
++#define VS_HARDIRQ	0x1000
++#define VS_SOFTIRQ	0x2000
++#define VS_IRQ		0x4000
++
++#define VS_IRQ_MASK	0xF000
++
++#include <linux/hardirq.h>
++
++/*
++ * check current context for ADMIN/WATCH and
++ * optionally against supplied argument
++ */
++static inline int __vs_check(int cid, int id, unsigned int mode)
++{
++	if (mode & VS_ARG_MASK) {
++		if ((mode & VS_IDENT) && (id == cid))
++			return 1;
++	}
++	if (mode & VS_ATR_MASK) {
++		if ((mode & VS_DYNAMIC) &&
++			(id >= MIN_D_CONTEXT) &&
++			(id <= MAX_S_CONTEXT))
++			return 1;
++		if ((mode & VS_STATIC) &&
++			(id > 1) && (id < MIN_D_CONTEXT))
++			return 1;
++	}
++	if (mode & VS_IRQ_MASK) {
++		if ((mode & VS_IRQ) && unlikely(in_interrupt()))
++			return 1;
++		if ((mode & VS_HARDIRQ) && unlikely(in_irq()))
++			return 1;
++		if ((mode & VS_SOFTIRQ) && unlikely(in_softirq()))
++			return 1;
++	}
++	return (((mode & VS_ADMIN) && (cid == 0)) ||
++		((mode & VS_WATCH) && (cid == 1)) ||
++		((mode & VS_HOSTID) && (id == 0)));
++}
++
++#define vx_check(c, m)	__vs_check(vx_current_xid(), c, (m) | VS_IRQ)
++
++#define vx_weak_check(c, m)	((m) ? vx_check(c, m) : 1)
++
++
++#define nx_check(c, m)	__vs_check(nx_current_nid(), c, m)
++
++#define nx_weak_check(c, m)	((m) ? nx_check(c, m) : 1)
++
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/context.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/context.h
+--- linux-3.2.34/include/linux/vserver/context.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/context.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,188 @@
++#ifndef _VX_CONTEXT_H
++#define _VX_CONTEXT_H
++
++#include <linux/types.h>
++#include <linux/capability.h>
++
++
++/* context flags */
++
++#define VXF_INFO_SCHED		0x00000002
++#define VXF_INFO_NPROC		0x00000004
++#define VXF_INFO_PRIVATE	0x00000008
++
++#define VXF_INFO_INIT		0x00000010
++#define VXF_INFO_HIDE		0x00000020
++#define VXF_INFO_ULIMIT		0x00000040
++#define VXF_INFO_NSPACE		0x00000080
++
++#define VXF_SCHED_HARD		0x00000100
++#define VXF_SCHED_PRIO		0x00000200
++#define VXF_SCHED_PAUSE		0x00000400
++
++#define VXF_VIRT_MEM		0x00010000
++#define VXF_VIRT_UPTIME		0x00020000
++#define VXF_VIRT_CPU		0x00040000
++#define VXF_VIRT_LOAD		0x00080000
++#define VXF_VIRT_TIME		0x00100000
++
++#define VXF_HIDE_MOUNT		0x01000000
++/* was	VXF_HIDE_NETIF		0x02000000 */
++#define VXF_HIDE_VINFO		0x04000000
++
++#define VXF_STATE_SETUP		(1ULL << 32)
++#define VXF_STATE_INIT		(1ULL << 33)
++#define VXF_STATE_ADMIN		(1ULL << 34)
++
++#define VXF_SC_HELPER		(1ULL << 36)
++#define VXF_REBOOT_KILL		(1ULL << 37)
++#define VXF_PERSISTENT		(1ULL << 38)
++
++#define VXF_FORK_RSS		(1ULL << 48)
++#define VXF_PROLIFIC		(1ULL << 49)
++
++#define VXF_IGNEG_NICE		(1ULL << 52)
++
++#define VXF_ONE_TIME		(0x0007ULL << 32)
++
++#define VXF_INIT_SET		(VXF_STATE_SETUP | VXF_STATE_INIT | VXF_STATE_ADMIN)
++
++
++/* context migration */
++
++#define VXM_SET_INIT		0x00000001
++#define VXM_SET_REAPER		0x00000002
++
++/* context caps */
++
++#define VXC_SET_UTSNAME		0x00000001
++#define VXC_SET_RLIMIT		0x00000002
++#define VXC_FS_SECURITY		0x00000004
++#define VXC_FS_TRUSTED		0x00000008
++#define VXC_TIOCSTI		0x00000010
++
++/* was	VXC_RAW_ICMP		0x00000100 */
++#define VXC_SYSLOG		0x00001000
++#define VXC_OOM_ADJUST		0x00002000
++#define VXC_AUDIT_CONTROL	0x00004000
++
++#define VXC_SECURE_MOUNT	0x00010000
++#define VXC_SECURE_REMOUNT	0x00020000
++#define VXC_BINARY_MOUNT	0x00040000
++
++#define VXC_QUOTA_CTL		0x00100000
++#define VXC_ADMIN_MAPPER	0x00200000
++#define VXC_ADMIN_CLOOP		0x00400000
++
++#define VXC_KTHREAD		0x01000000
++#define VXC_NAMESPACE		0x02000000
++
++
++#ifdef	__KERNEL__
++
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/rcupdate.h>
++
++#include "limit_def.h"
++#include "sched_def.h"
++#include "cvirt_def.h"
++#include "cacct_def.h"
++#include "device_def.h"
++
++#define VX_SPACES	2
++
++struct _vx_info_pc {
++	struct _vx_sched_pc sched_pc;
++	struct _vx_cvirt_pc cvirt_pc;
++};
++
++struct _vx_space {
++	unsigned long vx_nsmask;		/* assignment mask */
++	struct nsproxy *vx_nsproxy;             /* private namespaces */
++	struct fs_struct *vx_fs;                /* private namespace fs */
++	const struct cred *vx_cred;             /* task credentials */
++};
++
++struct vx_info {
++	struct hlist_node vx_hlist;		/* linked list of contexts */
++	xid_t vx_id;				/* context id */
++	atomic_t vx_usecnt;			/* usage count */
++	atomic_t vx_tasks;			/* tasks count */
++	struct vx_info *vx_parent;		/* parent context */
++	int vx_state;				/* context state */
++
++	struct _vx_space space[VX_SPACES];	/* namespace store */
++
++	uint64_t vx_flags;			/* context flags */
++	uint64_t vx_ccaps;			/* context caps (vserver) */
++	uint64_t vx_umask;			/* unshare mask (guest) */
++	uint64_t vx_wmask;			/* warn mask (guest) */
++	kernel_cap_t vx_bcaps;			/* bounding caps (system) */
++
++	struct task_struct *vx_reaper;		/* guest reaper process */
++	pid_t vx_initpid;			/* PID of guest init */
++	int64_t vx_badness_bias;		/* OOM points bias */
++
++	struct _vx_limit limit;			/* vserver limits */
++	struct _vx_sched sched;			/* vserver scheduler */
++	struct _vx_cvirt cvirt;			/* virtual/bias stuff */
++	struct _vx_cacct cacct;			/* context accounting */
++
++	struct _vx_device dmap;			/* default device map targets */
++
++#ifndef CONFIG_SMP
++	struct _vx_info_pc info_pc;		/* per cpu data */
++#else
++	struct _vx_info_pc *ptr_pc;		/* per cpu array */
++#endif
++
++	wait_queue_head_t vx_wait;		/* context exit waitqueue */
++	int reboot_cmd;				/* last sys_reboot() cmd */
++	int exit_code;				/* last process exit code */
++
++	char vx_name[65];			/* vserver name */
++};
++
++#ifndef CONFIG_SMP
++#define	vx_ptr_pc(vxi)		(&(vxi)->info_pc)
++#define	vx_per_cpu(vxi, v, id)	vx_ptr_pc(vxi)->v
++#else
++#define	vx_ptr_pc(vxi)		((vxi)->ptr_pc)
++#define	vx_per_cpu(vxi, v, id)	per_cpu_ptr(vx_ptr_pc(vxi), id)->v
++#endif
++
++#define	vx_cpu(vxi, v)		vx_per_cpu(vxi, v, smp_processor_id())
++
++
++struct vx_info_save {
++	struct vx_info *vxi;
++	xid_t xid;
++};
++
++
++/* status flags */
++
++#define VXS_HASHED	0x0001
++#define VXS_PAUSED	0x0010
++#define VXS_SHUTDOWN	0x0100
++#define VXS_HELPER	0x1000
++#define VXS_RELEASED	0x8000
++
++
++extern void claim_vx_info(struct vx_info *, struct task_struct *);
++extern void release_vx_info(struct vx_info *, struct task_struct *);
++
++extern struct vx_info *lookup_vx_info(int);
++extern struct vx_info *lookup_or_create_vx_info(int);
++
++extern int get_xid_list(int, unsigned int *, int);
++extern int xid_is_hashed(xid_t);
++
++extern int vx_migrate_task(struct task_struct *, struct vx_info *, int);
++
++extern long vs_state_change(struct vx_info *, unsigned int);
++
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_CONTEXT_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/context_cmd.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/context_cmd.h
+--- linux-3.2.34/include/linux/vserver/context_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/context_cmd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,162 @@
++#ifndef _VX_CONTEXT_CMD_H
++#define _VX_CONTEXT_CMD_H
++
++
++/* vinfo commands */
++
++#define VCMD_task_xid		VC_CMD(VINFO, 1, 0)
++
++#ifdef	__KERNEL__
++extern int vc_task_xid(uint32_t);
++
++#endif	/* __KERNEL__ */
++
++#define VCMD_vx_info		VC_CMD(VINFO, 5, 0)
++
++struct	vcmd_vx_info_v0 {
++	uint32_t xid;
++	uint32_t initpid;
++	/* more to come */
++};
++
++#ifdef	__KERNEL__
++extern int vc_vx_info(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++#define VCMD_ctx_stat		VC_CMD(VSTAT, 0, 0)
++
++struct	vcmd_ctx_stat_v0 {
++	uint32_t usecnt;
++	uint32_t tasks;
++	/* more to come */
++};
++
++#ifdef	__KERNEL__
++extern int vc_ctx_stat(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++/* context commands */
++
++#define VCMD_ctx_create_v0	VC_CMD(VPROC, 1, 0)
++#define VCMD_ctx_create		VC_CMD(VPROC, 1, 1)
++
++struct	vcmd_ctx_create {
++	uint64_t flagword;
++};
++
++#define VCMD_ctx_migrate_v0	VC_CMD(PROCMIG, 1, 0)
++#define VCMD_ctx_migrate	VC_CMD(PROCMIG, 1, 1)
++
++struct	vcmd_ctx_migrate {
++	uint64_t flagword;
++};
++
++#ifdef	__KERNEL__
++extern int vc_ctx_create(uint32_t, void __user *);
++extern int vc_ctx_migrate(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* flag commands */
++
++#define VCMD_get_cflags		VC_CMD(FLAGS, 1, 0)
++#define VCMD_set_cflags		VC_CMD(FLAGS, 2, 0)
++
++struct	vcmd_ctx_flags_v0 {
++	uint64_t flagword;
++	uint64_t mask;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_cflags(struct vx_info *, void __user *);
++extern int vc_set_cflags(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* context caps commands */
++
++#define VCMD_get_ccaps		VC_CMD(FLAGS, 3, 1)
++#define VCMD_set_ccaps		VC_CMD(FLAGS, 4, 1)
++
++struct	vcmd_ctx_caps_v1 {
++	uint64_t ccaps;
++	uint64_t cmask;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_ccaps(struct vx_info *, void __user *);
++extern int vc_set_ccaps(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* bcaps commands */
++
++#define VCMD_get_bcaps		VC_CMD(FLAGS, 9, 0)
++#define VCMD_set_bcaps		VC_CMD(FLAGS, 10, 0)
++
++struct	vcmd_bcaps {
++	uint64_t bcaps;
++	uint64_t bmask;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_bcaps(struct vx_info *, void __user *);
++extern int vc_set_bcaps(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* umask commands */
++
++#define VCMD_get_umask		VC_CMD(FLAGS, 13, 0)
++#define VCMD_set_umask		VC_CMD(FLAGS, 14, 0)
++
++struct	vcmd_umask {
++	uint64_t umask;
++	uint64_t mask;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_umask(struct vx_info *, void __user *);
++extern int vc_set_umask(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* wmask commands */
++
++#define VCMD_get_wmask		VC_CMD(FLAGS, 15, 0)
++#define VCMD_set_wmask		VC_CMD(FLAGS, 16, 0)
++
++struct	vcmd_wmask {
++	uint64_t wmask;
++	uint64_t mask;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_wmask(struct vx_info *, void __user *);
++extern int vc_set_wmask(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* OOM badness */
++
++#define VCMD_get_badness	VC_CMD(MEMCTRL, 5, 0)
++#define VCMD_set_badness	VC_CMD(MEMCTRL, 6, 0)
++
++struct	vcmd_badness_v0 {
++	int64_t bias;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_badness(struct vx_info *, void __user *);
++extern int vc_set_badness(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_CONTEXT_CMD_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/cvirt.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/cvirt.h
+--- linux-3.2.34/include/linux/vserver/cvirt.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/cvirt.h	2012-04-24 00:44:36.000000000 +0200
+@@ -0,0 +1,22 @@
++#ifndef _VX_CVIRT_H
++#define _VX_CVIRT_H
++
++
++#ifdef	__KERNEL__
++
++struct timespec;
++
++void vx_vsi_boottime(struct timespec *);
++
++void vx_vsi_uptime(struct timespec *, struct timespec *);
++
++
++struct vx_info;
++
++void vx_update_load(struct vx_info *);
++
++
++int vx_do_syslog(int, char __user *, int);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_CVIRT_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/cvirt_cmd.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/cvirt_cmd.h
+--- linux-3.2.34/include/linux/vserver/cvirt_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/cvirt_cmd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,53 @@
++#ifndef _VX_CVIRT_CMD_H
++#define _VX_CVIRT_CMD_H
++
++
++/* virtual host info name commands */
++
++#define VCMD_set_vhi_name	VC_CMD(VHOST, 1, 0)
++#define VCMD_get_vhi_name	VC_CMD(VHOST, 2, 0)
++
++struct	vcmd_vhi_name_v0 {
++	uint32_t field;
++	char name[65];
++};
++
++
++enum vhi_name_field {
++	VHIN_CONTEXT = 0,
++	VHIN_SYSNAME,
++	VHIN_NODENAME,
++	VHIN_RELEASE,
++	VHIN_VERSION,
++	VHIN_MACHINE,
++	VHIN_DOMAINNAME,
++};
++
++
++#ifdef	__KERNEL__
++
++#include <linux/compiler.h>
++
++extern int vc_set_vhi_name(struct vx_info *, void __user *);
++extern int vc_get_vhi_name(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++#define VCMD_virt_stat		VC_CMD(VSTAT, 3, 0)
++
++struct	vcmd_virt_stat_v0 {
++	uint64_t offset;
++	uint64_t uptime;
++	uint32_t nr_threads;
++	uint32_t nr_running;
++	uint32_t nr_uninterruptible;
++	uint32_t nr_onhold;
++	uint32_t nr_forks;
++	uint32_t load[3];
++};
++
++#ifdef	__KERNEL__
++extern int vc_virt_stat(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_CVIRT_CMD_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/cvirt_def.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/cvirt_def.h
+--- linux-3.2.34/include/linux/vserver/cvirt_def.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/cvirt_def.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,80 @@
++#ifndef _VX_CVIRT_DEF_H
++#define _VX_CVIRT_DEF_H
++
++#include <linux/jiffies.h>
++#include <linux/spinlock.h>
++#include <linux/wait.h>
++#include <linux/time.h>
++#include <asm/atomic.h>
++
++
++struct _vx_usage_stat {
++	uint64_t user;
++	uint64_t nice;
++	uint64_t system;
++	uint64_t softirq;
++	uint64_t irq;
++	uint64_t idle;
++	uint64_t iowait;
++};
++
++struct _vx_syslog {
++	wait_queue_head_t log_wait;
++	spinlock_t logbuf_lock;		/* lock for the log buffer */
++
++	unsigned long log_start;	/* next char to be read by syslog() */
++	unsigned long con_start;	/* next char to be sent to consoles */
++	unsigned long log_end;	/* most-recently-written-char + 1 */
++	unsigned long logged_chars;	/* #chars since last read+clear operation */
++
++	char log_buf[1024];
++};
++
++
++/* context sub struct */
++
++struct _vx_cvirt {
++	atomic_t nr_threads;		/* number of current threads */
++	atomic_t nr_running;		/* number of running threads */
++	atomic_t nr_uninterruptible;	/* number of uninterruptible threads */
++
++	atomic_t nr_onhold;		/* processes on hold */
++	uint32_t onhold_last;		/* jiffies when put on hold */
++
++	struct timespec bias_ts;	/* time offset to the host */
++	struct timespec bias_idle;
++	struct timespec bias_uptime;	/* context creation point */
++	uint64_t bias_clock;		/* offset in clock_t */
++
++	spinlock_t load_lock;		/* lock for the load averages */
++	atomic_t load_updates;		/* nr of load updates done so far */
++	uint32_t load_last;		/* last time load was calculated */
++	uint32_t load[3];		/* load averages 1,5,15 */
++
++	atomic_t total_forks;		/* number of forks so far */
++
++	struct _vx_syslog syslog;
++};
++
++struct _vx_cvirt_pc {
++	struct _vx_usage_stat cpustat;
++};
++
++
++#ifdef CONFIG_VSERVER_DEBUG
++
++static inline void __dump_vx_cvirt(struct _vx_cvirt *cvirt)
++{
++	printk("\t_vx_cvirt:\n");
++	printk("\t threads: %4d, %4d, %4d, %4d\n",
++		atomic_read(&cvirt->nr_threads),
++		atomic_read(&cvirt->nr_running),
++		atomic_read(&cvirt->nr_uninterruptible),
++		atomic_read(&cvirt->nr_onhold));
++	/* add rest here */
++	printk("\t total_forks = %d\n", atomic_read(&cvirt->total_forks));
++}
++
++#endif
++
++#endif	/* _VX_CVIRT_DEF_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/debug.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/debug.h
+--- linux-3.2.34/include/linux/vserver/debug.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/debug.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,145 @@
++#ifndef _VX_DEBUG_H
++#define _VX_DEBUG_H
++
++
++#define VXD_CBIT(n, m)	(vs_debug_ ## n & (1 << (m)))
++#define VXD_CMIN(n, m)	(vs_debug_ ## n > (m))
++#define VXD_MASK(n, m)	(vs_debug_ ## n & (m))
++
++#define VXD_DEV(d)	(d), (d)->bd_inode->i_ino,		\
++			imajor((d)->bd_inode), iminor((d)->bd_inode)
++#define VXF_DEV		"%p[%lu,%d:%d]"
++
++#if	defined(CONFIG_QUOTES_UTF8)
++#define	VS_Q_LQM	"\xc2\xbb"
++#define	VS_Q_RQM	"\xc2\xab"
++#elif	defined(CONFIG_QUOTES_ASCII)
++#define	VS_Q_LQM	"\x27"
++#define	VS_Q_RQM	"\x27"
++#else
++#define	VS_Q_LQM	"\xbb"
++#define	VS_Q_RQM	"\xab"
++#endif
++
++#define	VS_Q(f)		VS_Q_LQM f VS_Q_RQM
++
++
++#define vxd_path(p)						\
++	({ static char _buffer[PATH_MAX];			\
++	   d_path(p, _buffer, sizeof(_buffer)); })
++
++#define vxd_cond_path(n)					\
++	((n) ? vxd_path(&(n)->path) : "<null>" )
++
++
++#ifdef	CONFIG_VSERVER_DEBUG
++
++extern unsigned int vs_debug_switch;
++extern unsigned int vs_debug_xid;
++extern unsigned int vs_debug_nid;
++extern unsigned int vs_debug_tag;
++extern unsigned int vs_debug_net;
++extern unsigned int vs_debug_limit;
++extern unsigned int vs_debug_cres;
++extern unsigned int vs_debug_dlim;
++extern unsigned int vs_debug_quota;
++extern unsigned int vs_debug_cvirt;
++extern unsigned int vs_debug_space;
++extern unsigned int vs_debug_perm;
++extern unsigned int vs_debug_misc;
++
++
++#define VX_LOGLEVEL	"vxD: "
++#define VX_PROC_FMT	"%p: "
++#define VX_PROCESS	current
++
++#define vxdprintk(c, f, x...)					\
++	do {							\
++		if (c)						\
++			printk(VX_LOGLEVEL VX_PROC_FMT f "\n",	\
++				VX_PROCESS , ##x);		\
++	} while (0)
++
++#define vxlprintk(c, f, x...)					\
++	do {							\
++		if (c)						\
++			printk(VX_LOGLEVEL f " @%s:%d\n", x);	\
++	} while (0)
++
++#define vxfprintk(c, f, x...)					\
++	do {							\
++		if (c)						\
++			printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
++	} while (0)
++
++
++struct vx_info;
++
++void dump_vx_info(struct vx_info *, int);
++void dump_vx_info_inactive(int);
++
++#else	/* CONFIG_VSERVER_DEBUG */
++
++#define vs_debug_switch	0
++#define vs_debug_xid	0
++#define vs_debug_nid	0
++#define vs_debug_tag	0
++#define vs_debug_net	0
++#define vs_debug_limit	0
++#define vs_debug_cres	0
++#define vs_debug_dlim	0
++#define vs_debug_quota	0
++#define vs_debug_cvirt	0
++#define vs_debug_space	0
++#define vs_debug_perm	0
++#define vs_debug_misc	0
++
++#define vxdprintk(x...) do { } while (0)
++#define vxlprintk(x...) do { } while (0)
++#define vxfprintk(x...) do { } while (0)
++
++#endif	/* CONFIG_VSERVER_DEBUG */
++
++
++#ifdef	CONFIG_VSERVER_WARN
++
++#define VX_WARNLEVEL	KERN_WARNING "vxW: "
++#define VX_WARN_TASK	"[" VS_Q("%s") ",%u:#%u|%u|%u] "
++#define VX_WARN_XID	"[xid #%u] "
++#define VX_WARN_NID	"[nid #%u] "
++#define VX_WARN_TAG	"[tag #%u] "
++
++#define vxwprintk(c, f, x...)					\
++	do {							\
++		if (c)						\
++			printk(VX_WARNLEVEL f "\n", ##x);	\
++	} while (0)
++
++#else	/* CONFIG_VSERVER_WARN */
++
++#define vxwprintk(x...) do { } while (0)
++
++#endif	/* CONFIG_VSERVER_WARN */
++
++#define vxwprintk_task(c, f, x...)				\
++	vxwprintk(c, VX_WARN_TASK f,				\
++		current->comm, current->pid,			\
++		current->xid, current->nid, current->tag, ##x)
++#define vxwprintk_xid(c, f, x...)				\
++	vxwprintk(c, VX_WARN_XID f, current->xid, x)
++#define vxwprintk_nid(c, f, x...)				\
++	vxwprintk(c, VX_WARN_NID f, current->nid, x)
++#define vxwprintk_tag(c, f, x...)				\
++	vxwprintk(c, VX_WARN_TAG f, current->tag, x)
++
++#ifdef	CONFIG_VSERVER_DEBUG
++#define vxd_assert_lock(l)	assert_spin_locked(l)
++#define vxd_assert(c, f, x...)	vxlprintk(!(c), \
++	"assertion [" f "] failed.", ##x, __FILE__, __LINE__)
++#else
++#define vxd_assert_lock(l)	do { } while (0)
++#define vxd_assert(c, f, x...)	do { } while (0)
++#endif
++
++
++#endif /* _VX_DEBUG_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/debug_cmd.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/debug_cmd.h
+--- linux-3.2.34/include/linux/vserver/debug_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/debug_cmd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,58 @@
++#ifndef _VX_DEBUG_CMD_H
++#define _VX_DEBUG_CMD_H
++
++
++/* debug commands */
++
++#define VCMD_dump_history	VC_CMD(DEBUG, 1, 0)
++
++#define VCMD_read_history	VC_CMD(DEBUG, 5, 0)
++#define VCMD_read_monitor	VC_CMD(DEBUG, 6, 0)
++
++struct  vcmd_read_history_v0 {
++	uint32_t index;
++	uint32_t count;
++	char __user *data;
++};
++
++struct  vcmd_read_monitor_v0 {
++	uint32_t index;
++	uint32_t count;
++	char __user *data;
++};
++
++
++#ifdef	__KERNEL__
++
++#ifdef	CONFIG_COMPAT
++
++#include <asm/compat.h>
++
++struct	vcmd_read_history_v0_x32 {
++	uint32_t index;
++	uint32_t count;
++	compat_uptr_t data_ptr;
++};
++
++struct	vcmd_read_monitor_v0_x32 {
++	uint32_t index;
++	uint32_t count;
++	compat_uptr_t data_ptr;
++};
++
++#endif  /* CONFIG_COMPAT */
++
++extern int vc_dump_history(uint32_t);
++
++extern int vc_read_history(uint32_t, void __user *);
++extern int vc_read_monitor(uint32_t, void __user *);
++
++#ifdef	CONFIG_COMPAT
++
++extern int vc_read_history_x32(uint32_t, void __user *);
++extern int vc_read_monitor_x32(uint32_t, void __user *);
++
++#endif  /* CONFIG_COMPAT */
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_DEBUG_CMD_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/device.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/device.h
+--- linux-3.2.34/include/linux/vserver/device.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/device.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,15 @@
++#ifndef _VX_DEVICE_H
++#define _VX_DEVICE_H
++
++
++#define DATTR_CREATE	0x00000001
++#define DATTR_OPEN	0x00000002
++
++#define DATTR_REMAP	0x00000010
++
++#define DATTR_MASK	0x00000013
++
++
++#else	/* _VX_DEVICE_H */
++#warning duplicate inclusion
++#endif	/* _VX_DEVICE_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/device_cmd.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/device_cmd.h
+--- linux-3.2.34/include/linux/vserver/device_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/device_cmd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,44 @@
++#ifndef _VX_DEVICE_CMD_H
++#define _VX_DEVICE_CMD_H
++
++
++/*  device vserver commands */
++
++#define VCMD_set_mapping	VC_CMD(DEVICE, 1, 0)
++#define VCMD_unset_mapping	VC_CMD(DEVICE, 2, 0)
++
++struct	vcmd_set_mapping_v0 {
++	const char __user *device;
++	const char __user *target;
++	uint32_t flags;
++};
++
++
++#ifdef	__KERNEL__
++
++#ifdef	CONFIG_COMPAT
++
++#include <asm/compat.h>
++
++struct	vcmd_set_mapping_v0_x32 {
++	compat_uptr_t device_ptr;
++	compat_uptr_t target_ptr;
++	uint32_t flags;
++};
++
++#endif	/* CONFIG_COMPAT */
++
++#include <linux/compiler.h>
++
++extern int vc_set_mapping(struct vx_info *, void __user *);
++extern int vc_unset_mapping(struct vx_info *, void __user *);
++
++#ifdef	CONFIG_COMPAT
++
++extern int vc_set_mapping_x32(struct vx_info *, void __user *);
++extern int vc_unset_mapping_x32(struct vx_info *, void __user *);
++
++#endif	/* CONFIG_COMPAT */
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_DEVICE_CMD_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/device_def.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/device_def.h
+--- linux-3.2.34/include/linux/vserver/device_def.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/device_def.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,17 @@
++#ifndef _VX_DEVICE_DEF_H
++#define _VX_DEVICE_DEF_H
++
++#include <linux/types.h>
++
++struct vx_dmap_target {
++	dev_t target;
++	uint32_t flags;
++};
++
++struct _vx_device {
++#ifdef CONFIG_VSERVER_DEVICE
++	struct vx_dmap_target targets[2];
++#endif
++};
++
++#endif	/* _VX_DEVICE_DEF_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/dlimit.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/dlimit.h
+--- linux-3.2.34/include/linux/vserver/dlimit.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/dlimit.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,54 @@
++#ifndef _VX_DLIMIT_H
++#define _VX_DLIMIT_H
++
++#include "switch.h"
++
++
++#ifdef	__KERNEL__
++
++/*      keep in sync with CDLIM_INFINITY	*/
++
++#define DLIM_INFINITY		(~0ULL)
++
++#include <linux/spinlock.h>
++#include <linux/rcupdate.h>
++
++struct super_block;
++
++struct dl_info {
++	struct hlist_node dl_hlist;		/* linked list of contexts */
++	struct rcu_head dl_rcu;			/* the rcu head */
++	tag_t dl_tag;				/* context tag */
++	atomic_t dl_usecnt;			/* usage count */
++	atomic_t dl_refcnt;			/* reference count */
++
++	struct super_block *dl_sb;		/* associated superblock */
++
++	spinlock_t dl_lock;			/* protect the values */
++
++	unsigned long long dl_space_used;	/* used space in bytes */
++	unsigned long long dl_space_total;	/* maximum space in bytes */
++	unsigned long dl_inodes_used;		/* used inodes */
++	unsigned long dl_inodes_total;		/* maximum inodes */
++
++	unsigned int dl_nrlmult;		/* non root limit mult */
++};
++
++struct rcu_head;
++
++extern void rcu_free_dl_info(struct rcu_head *);
++extern void unhash_dl_info(struct dl_info *);
++
++extern struct dl_info *locate_dl_info(struct super_block *, tag_t);
++
++
++struct kstatfs;
++
++extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
++
++typedef uint64_t dlsize_t;
++
++#endif	/* __KERNEL__ */
++#else	/* _VX_DLIMIT_H */
++#warning duplicate inclusion
++#endif	/* _VX_DLIMIT_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/dlimit_cmd.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/dlimit_cmd.h
+--- linux-3.2.34/include/linux/vserver/dlimit_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/dlimit_cmd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,109 @@
++#ifndef _VX_DLIMIT_CMD_H
++#define _VX_DLIMIT_CMD_H
++
++
++/*  dlimit vserver commands */
++
++#define VCMD_add_dlimit		VC_CMD(DLIMIT, 1, 0)
++#define VCMD_rem_dlimit		VC_CMD(DLIMIT, 2, 0)
++
++#define VCMD_set_dlimit		VC_CMD(DLIMIT, 5, 0)
++#define VCMD_get_dlimit		VC_CMD(DLIMIT, 6, 0)
++
++struct	vcmd_ctx_dlimit_base_v0 {
++	const char __user *name;
++	uint32_t flags;
++};
++
++struct	vcmd_ctx_dlimit_v0 {
++	const char __user *name;
++	uint32_t space_used;			/* used space in kbytes */
++	uint32_t space_total;			/* maximum space in kbytes */
++	uint32_t inodes_used;			/* used inodes */
++	uint32_t inodes_total;			/* maximum inodes */
++	uint32_t reserved;			/* reserved for root in % */
++	uint32_t flags;
++};
++
++#define CDLIM_UNSET		((uint32_t)0UL)
++#define CDLIM_INFINITY		((uint32_t)~0UL)
++#define CDLIM_KEEP		((uint32_t)~1UL)
++
++#define DLIME_UNIT	0
++#define DLIME_KILO	1
++#define DLIME_MEGA	2
++#define DLIME_GIGA	3
++
++#define DLIMF_SHIFT	0x10
++
++#define DLIMS_USED	0
++#define DLIMS_TOTAL	2
++
++static inline
++uint64_t dlimit_space_32to64(uint32_t val, uint32_t flags, int shift)
++{
++	int exp = (flags & DLIMF_SHIFT) ?
++		(flags >> shift) & DLIME_GIGA : DLIME_KILO;
++	return ((uint64_t)val) << (10 * exp);
++}
++
++static inline
++uint32_t dlimit_space_64to32(uint64_t val, uint32_t *flags, int shift)
++{
++	int exp = 0;
++
++	if (*flags & DLIMF_SHIFT) {
++		while (val > (1LL << 32) && (exp < 3)) {
++			val >>= 10;
++			exp++;
++		}
++		*flags &= ~(DLIME_GIGA << shift);
++		*flags |= exp << shift;
++	} else
++		val >>= 10;
++	return val;
++}
++
++#ifdef	__KERNEL__
++
++#ifdef	CONFIG_COMPAT
++
++#include <asm/compat.h>
++
++struct	vcmd_ctx_dlimit_base_v0_x32 {
++	compat_uptr_t name_ptr;
++	uint32_t flags;
++};
++
++struct	vcmd_ctx_dlimit_v0_x32 {
++	compat_uptr_t name_ptr;
++	uint32_t space_used;			/* used space in kbytes */
++	uint32_t space_total;			/* maximum space in kbytes */
++	uint32_t inodes_used;			/* used inodes */
++	uint32_t inodes_total;			/* maximum inodes */
++	uint32_t reserved;			/* reserved for root in % */
++	uint32_t flags;
++};
++
++#endif	/* CONFIG_COMPAT */
++
++#include <linux/compiler.h>
++
++extern int vc_add_dlimit(uint32_t, void __user *);
++extern int vc_rem_dlimit(uint32_t, void __user *);
++
++extern int vc_set_dlimit(uint32_t, void __user *);
++extern int vc_get_dlimit(uint32_t, void __user *);
++
++#ifdef	CONFIG_COMPAT
++
++extern int vc_add_dlimit_x32(uint32_t, void __user *);
++extern int vc_rem_dlimit_x32(uint32_t, void __user *);
++
++extern int vc_set_dlimit_x32(uint32_t, void __user *);
++extern int vc_get_dlimit_x32(uint32_t, void __user *);
++
++#endif	/* CONFIG_COMPAT */
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_DLIMIT_CMD_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/global.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/global.h
+--- linux-3.2.34/include/linux/vserver/global.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/global.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,19 @@
++#ifndef _VX_GLOBAL_H
++#define _VX_GLOBAL_H
++
++
++extern atomic_t vx_global_ctotal;
++extern atomic_t vx_global_cactive;
++
++extern atomic_t nx_global_ctotal;
++extern atomic_t nx_global_cactive;
++
++extern atomic_t vs_global_nsproxy;
++extern atomic_t vs_global_fs;
++extern atomic_t vs_global_mnt_ns;
++extern atomic_t vs_global_uts_ns;
++extern atomic_t vs_global_user_ns;
++extern atomic_t vs_global_pid_ns;
++
++
++#endif /* _VX_GLOBAL_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/history.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/history.h
+--- linux-3.2.34/include/linux/vserver/history.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/history.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,197 @@
++#ifndef _VX_HISTORY_H
++#define _VX_HISTORY_H
++
++
++enum {
++	VXH_UNUSED = 0,
++	VXH_THROW_OOPS = 1,
++
++	VXH_GET_VX_INFO,
++	VXH_PUT_VX_INFO,
++	VXH_INIT_VX_INFO,
++	VXH_SET_VX_INFO,
++	VXH_CLR_VX_INFO,
++	VXH_CLAIM_VX_INFO,
++	VXH_RELEASE_VX_INFO,
++	VXH_ALLOC_VX_INFO,
++	VXH_DEALLOC_VX_INFO,
++	VXH_HASH_VX_INFO,
++	VXH_UNHASH_VX_INFO,
++	VXH_LOC_VX_INFO,
++	VXH_LOOKUP_VX_INFO,
++	VXH_CREATE_VX_INFO,
++};
++
++struct _vxhe_vxi {
++	struct vx_info *ptr;
++	unsigned xid;
++	unsigned usecnt;
++	unsigned tasks;
++};
++
++struct _vxhe_set_clr {
++	void *data;
++};
++
++struct _vxhe_loc_lookup {
++	unsigned arg;
++};
++
++struct _vx_hist_entry {
++	void *loc;
++	unsigned short seq;
++	unsigned short type;
++	struct _vxhe_vxi vxi;
++	union {
++		struct _vxhe_set_clr sc;
++		struct _vxhe_loc_lookup ll;
++	};
++};
++
++#ifdef	CONFIG_VSERVER_HISTORY
++
++extern unsigned volatile int vxh_active;
++
++struct _vx_hist_entry *vxh_advance(void *loc);
++
++
++static inline
++void	__vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
++{
++	entry->vxi.ptr = vxi;
++	if (vxi) {
++		entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
++		entry->vxi.tasks = atomic_read(&vxi->vx_tasks);
++		entry->vxi.xid = vxi->vx_id;
++	}
++}
++
++
++#define	__HERE__ current_text_addr()
++
++#define __VXH_BODY(__type, __data, __here)	\
++	struct _vx_hist_entry *entry;		\
++						\
++	preempt_disable();			\
++	entry = vxh_advance(__here);		\
++	__data;					\
++	entry->type = __type;			\
++	preempt_enable();
++
++
++	/* pass vxi only */
++
++#define __VXH_SMPL				\
++	__vxh_copy_vxi(entry, vxi)
++
++static inline
++void	__vxh_smpl(struct vx_info *vxi, int __type, void *__here)
++{
++	__VXH_BODY(__type, __VXH_SMPL, __here)
++}
++
++	/* pass vxi and data (void *) */
++
++#define __VXH_DATA				\
++	__vxh_copy_vxi(entry, vxi);		\
++	entry->sc.data = data
++
++static inline
++void	__vxh_data(struct vx_info *vxi, void *data,
++			int __type, void *__here)
++{
++	__VXH_BODY(__type, __VXH_DATA, __here)
++}
++
++	/* pass vxi and arg (long) */
++
++#define __VXH_LONG				\
++	__vxh_copy_vxi(entry, vxi);		\
++	entry->ll.arg = arg
++
++static inline
++void	__vxh_long(struct vx_info *vxi, long arg,
++			int __type, void *__here)
++{
++	__VXH_BODY(__type, __VXH_LONG, __here)
++}
++
++
++static inline
++void	__vxh_throw_oops(void *__here)
++{
++	__VXH_BODY(VXH_THROW_OOPS, {}, __here);
++	/* prevent further acquisition */
++	vxh_active = 0;
++}
++
++
++#define vxh_throw_oops()	__vxh_throw_oops(__HERE__);
++
++#define __vxh_get_vx_info(v, h)	__vxh_smpl(v, VXH_GET_VX_INFO, h);
++#define __vxh_put_vx_info(v, h)	__vxh_smpl(v, VXH_PUT_VX_INFO, h);
++
++#define __vxh_init_vx_info(v, d, h) \
++	__vxh_data(v, d, VXH_INIT_VX_INFO, h);
++#define __vxh_set_vx_info(v, d, h) \
++	__vxh_data(v, d, VXH_SET_VX_INFO, h);
++#define __vxh_clr_vx_info(v, d, h) \
++	__vxh_data(v, d, VXH_CLR_VX_INFO, h);
++
++#define __vxh_claim_vx_info(v, d, h) \
++	__vxh_data(v, d, VXH_CLAIM_VX_INFO, h);
++#define __vxh_release_vx_info(v, d, h) \
++	__vxh_data(v, d, VXH_RELEASE_VX_INFO, h);
++
++#define vxh_alloc_vx_info(v) \
++	__vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__);
++#define vxh_dealloc_vx_info(v) \
++	__vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__);
++
++#define vxh_hash_vx_info(v) \
++	__vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__);
++#define vxh_unhash_vx_info(v) \
++	__vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__);
++
++#define vxh_loc_vx_info(v, l) \
++	__vxh_long(v, l, VXH_LOC_VX_INFO, __HERE__);
++#define vxh_lookup_vx_info(v, l) \
++	__vxh_long(v, l, VXH_LOOKUP_VX_INFO, __HERE__);
++#define vxh_create_vx_info(v, l) \
++	__vxh_long(v, l, VXH_CREATE_VX_INFO, __HERE__);
++
++extern void vxh_dump_history(void);
++
++
++#else  /* CONFIG_VSERVER_HISTORY */
++
++#define	__HERE__	0
++
++#define vxh_throw_oops()		do { } while (0)
++
++#define __vxh_get_vx_info(v, h)		do { } while (0)
++#define __vxh_put_vx_info(v, h)		do { } while (0)
++
++#define __vxh_init_vx_info(v, d, h)	do { } while (0)
++#define __vxh_set_vx_info(v, d, h)	do { } while (0)
++#define __vxh_clr_vx_info(v, d, h)	do { } while (0)
++
++#define __vxh_claim_vx_info(v, d, h)	do { } while (0)
++#define __vxh_release_vx_info(v, d, h)	do { } while (0)
++
++#define vxh_alloc_vx_info(v)		do { } while (0)
++#define vxh_dealloc_vx_info(v)		do { } while (0)
++
++#define vxh_hash_vx_info(v)		do { } while (0)
++#define vxh_unhash_vx_info(v)		do { } while (0)
++
++#define vxh_loc_vx_info(v, l)		do { } while (0)
++#define vxh_lookup_vx_info(v, l)	do { } while (0)
++#define vxh_create_vx_info(v, l)	do { } while (0)
++
++#define vxh_dump_history()		do { } while (0)
++
++
++#endif /* CONFIG_VSERVER_HISTORY */
++
++#endif /* _VX_HISTORY_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/inode.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/inode.h
+--- linux-3.2.34/include/linux/vserver/inode.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/inode.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,39 @@
++#ifndef _VX_INODE_H
++#define _VX_INODE_H
++
++
++#define IATTR_TAG	0x01000000
++
++#define IATTR_ADMIN	0x00000001
++#define IATTR_WATCH	0x00000002
++#define IATTR_HIDE	0x00000004
++#define IATTR_FLAGS	0x00000007
++
++#define IATTR_BARRIER	0x00010000
++#define IATTR_IXUNLINK	0x00020000
++#define IATTR_IMMUTABLE 0x00040000
++#define IATTR_COW	0x00080000
++
++#ifdef	__KERNEL__
++
++
++#ifdef	CONFIG_VSERVER_PROC_SECURE
++#define IATTR_PROC_DEFAULT	( IATTR_ADMIN | IATTR_HIDE )
++#define IATTR_PROC_SYMLINK	( IATTR_ADMIN )
++#else
++#define IATTR_PROC_DEFAULT	( IATTR_ADMIN )
++#define IATTR_PROC_SYMLINK	( IATTR_ADMIN )
++#endif
++
++#define vx_hide_check(c, m)	(((m) & IATTR_HIDE) ? vx_check(c, m) : 1)
++
++#endif	/* __KERNEL__ */
++
++/* inode ioctls */
++
++#define FIOC_GETXFLG	_IOR('x', 5, long)
++#define FIOC_SETXFLG	_IOW('x', 6, long)
++
++#else	/* _VX_INODE_H */
++#warning duplicate inclusion
++#endif	/* _VX_INODE_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/inode_cmd.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/inode_cmd.h
+--- linux-3.2.34/include/linux/vserver/inode_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/inode_cmd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,59 @@
++#ifndef _VX_INODE_CMD_H
++#define _VX_INODE_CMD_H
++
++
++/*  inode vserver commands */
++
++#define VCMD_get_iattr		VC_CMD(INODE, 1, 1)
++#define VCMD_set_iattr		VC_CMD(INODE, 2, 1)
++
++#define VCMD_fget_iattr		VC_CMD(INODE, 3, 0)
++#define VCMD_fset_iattr		VC_CMD(INODE, 4, 0)
++
++struct	vcmd_ctx_iattr_v1 {
++	const char __user *name;
++	uint32_t tag;
++	uint32_t flags;
++	uint32_t mask;
++};
++
++struct	vcmd_ctx_fiattr_v0 {
++	uint32_t tag;
++	uint32_t flags;
++	uint32_t mask;
++};
++
++
++#ifdef	__KERNEL__
++
++
++#ifdef	CONFIG_COMPAT
++
++#include <asm/compat.h>
++
++struct	vcmd_ctx_iattr_v1_x32 {
++	compat_uptr_t name_ptr;
++	uint32_t tag;
++	uint32_t flags;
++	uint32_t mask;
++};
++
++#endif	/* CONFIG_COMPAT */
++
++#include <linux/compiler.h>
++
++extern int vc_get_iattr(void __user *);
++extern int vc_set_iattr(void __user *);
++
++extern int vc_fget_iattr(uint32_t, void __user *);
++extern int vc_fset_iattr(uint32_t, void __user *);
++
++#ifdef	CONFIG_COMPAT
++
++extern int vc_get_iattr_x32(void __user *);
++extern int vc_set_iattr_x32(void __user *);
++
++#endif	/* CONFIG_COMPAT */
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_INODE_CMD_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/limit.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/limit.h
+--- linux-3.2.34/include/linux/vserver/limit.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/limit.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,71 @@
++#ifndef _VX_LIMIT_H
++#define _VX_LIMIT_H
++
++#define VLIMIT_NSOCK	16
++#define VLIMIT_OPENFD	17
++#define VLIMIT_ANON	18
++#define VLIMIT_SHMEM	19
++#define VLIMIT_SEMARY	20
++#define VLIMIT_NSEMS	21
++#define VLIMIT_DENTRY	22
++#define VLIMIT_MAPPED	23
++
++
++#ifdef	__KERNEL__
++
++#define	VLIM_NOCHECK	((1L << VLIMIT_DENTRY) | (1L << RLIMIT_RSS))
++
++/*	keep in sync with CRLIM_INFINITY */
++
++#define	VLIM_INFINITY	(~0ULL)
++
++#include <asm/atomic.h>
++#include <asm/resource.h>
++
++#ifndef RLIM_INFINITY
++#warning RLIM_INFINITY is undefined
++#endif
++
++#define __rlim_val(l, r, v)	((l)->res[r].v)
++
++#define __rlim_soft(l, r)	__rlim_val(l, r, soft)
++#define __rlim_hard(l, r)	__rlim_val(l, r, hard)
++
++#define __rlim_rcur(l, r)	__rlim_val(l, r, rcur)
++#define __rlim_rmin(l, r)	__rlim_val(l, r, rmin)
++#define __rlim_rmax(l, r)	__rlim_val(l, r, rmax)
++
++#define __rlim_lhit(l, r)	__rlim_val(l, r, lhit)
++#define __rlim_hit(l, r)	atomic_inc(&__rlim_lhit(l, r))
++
++typedef atomic_long_t rlim_atomic_t;
++typedef unsigned long rlim_t;
++
++#define __rlim_get(l, r)	atomic_long_read(&__rlim_rcur(l, r))
++#define __rlim_set(l, r, v)	atomic_long_set(&__rlim_rcur(l, r), v)
++#define __rlim_inc(l, r)	atomic_long_inc(&__rlim_rcur(l, r))
++#define __rlim_dec(l, r)	atomic_long_dec(&__rlim_rcur(l, r))
++#define __rlim_add(l, r, v)	atomic_long_add(v, &__rlim_rcur(l, r))
++#define __rlim_sub(l, r, v)	atomic_long_sub(v, &__rlim_rcur(l, r))
++
++
++#if	(RLIM_INFINITY == VLIM_INFINITY)
++#define	VX_VLIM(r) ((long long)(long)(r))
++#define	VX_RLIM(v) ((rlim_t)(v))
++#else
++#define	VX_VLIM(r) (((r) == RLIM_INFINITY) \
++		? VLIM_INFINITY : (long long)(r))
++#define	VX_RLIM(v) (((v) == VLIM_INFINITY) \
++		? RLIM_INFINITY : (rlim_t)(v))
++#endif
++
++struct sysinfo;
++
++void vx_vsi_meminfo(struct sysinfo *);
++void vx_vsi_swapinfo(struct sysinfo *);
++long vx_vsi_cached(struct sysinfo *);
++
++#define NUM_LIMITS	24
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_LIMIT_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/limit_cmd.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/limit_cmd.h
+--- linux-3.2.34/include/linux/vserver/limit_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/limit_cmd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,71 @@
++#ifndef _VX_LIMIT_CMD_H
++#define _VX_LIMIT_CMD_H
++
++
++/*  rlimit vserver commands */
++
++#define VCMD_get_rlimit		VC_CMD(RLIMIT, 1, 0)
++#define VCMD_set_rlimit		VC_CMD(RLIMIT, 2, 0)
++#define VCMD_get_rlimit_mask	VC_CMD(RLIMIT, 3, 0)
++#define VCMD_reset_hits		VC_CMD(RLIMIT, 7, 0)
++#define VCMD_reset_minmax	VC_CMD(RLIMIT, 9, 0)
++
++struct	vcmd_ctx_rlimit_v0 {
++	uint32_t id;
++	uint64_t minimum;
++	uint64_t softlimit;
++	uint64_t maximum;
++};
++
++struct	vcmd_ctx_rlimit_mask_v0 {
++	uint32_t minimum;
++	uint32_t softlimit;
++	uint32_t maximum;
++};
++
++#define VCMD_rlimit_stat	VC_CMD(VSTAT, 1, 0)
++
++struct	vcmd_rlimit_stat_v0 {
++	uint32_t id;
++	uint32_t hits;
++	uint64_t value;
++	uint64_t minimum;
++	uint64_t maximum;
++};
++
++#define CRLIM_UNSET		(0ULL)
++#define CRLIM_INFINITY		(~0ULL)
++#define CRLIM_KEEP		(~1ULL)
++
++#ifdef	__KERNEL__
++
++#ifdef	CONFIG_IA32_EMULATION
++
++struct	vcmd_ctx_rlimit_v0_x32 {
++	uint32_t id;
++	uint64_t minimum;
++	uint64_t softlimit;
++	uint64_t maximum;
++} __attribute__ ((packed));
++
++#endif	/* CONFIG_IA32_EMULATION */
++
++#include <linux/compiler.h>
++
++extern int vc_get_rlimit_mask(uint32_t, void __user *);
++extern int vc_get_rlimit(struct vx_info *, void __user *);
++extern int vc_set_rlimit(struct vx_info *, void __user *);
++extern int vc_reset_hits(struct vx_info *, void __user *);
++extern int vc_reset_minmax(struct vx_info *, void __user *);
++
++extern int vc_rlimit_stat(struct vx_info *, void __user *);
++
++#ifdef	CONFIG_IA32_EMULATION
++
++extern int vc_get_rlimit_x32(struct vx_info *, void __user *);
++extern int vc_set_rlimit_x32(struct vx_info *, void __user *);
++
++#endif	/* CONFIG_IA32_EMULATION */
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_LIMIT_CMD_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/limit_def.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/limit_def.h
+--- linux-3.2.34/include/linux/vserver/limit_def.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/limit_def.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,47 @@
++#ifndef _VX_LIMIT_DEF_H
++#define _VX_LIMIT_DEF_H
++
++#include <asm/atomic.h>
++#include <asm/resource.h>
++
++#include "limit.h"
++
++
++struct _vx_res_limit {
++	rlim_t soft;		/* Context soft limit */
++	rlim_t hard;		/* Context hard limit */
++
++	rlim_atomic_t rcur;	/* Current value */
++	rlim_t rmin;		/* Context minimum */
++	rlim_t rmax;		/* Context maximum */
++
++	atomic_t lhit;		/* Limit hits */
++};
++
++/* context sub struct */
++
++struct _vx_limit {
++	struct _vx_res_limit res[NUM_LIMITS];
++};
++
++#ifdef CONFIG_VSERVER_DEBUG
++
++static inline void __dump_vx_limit(struct _vx_limit *limit)
++{
++	int i;
++
++	printk("\t_vx_limit:");
++	for (i = 0; i < NUM_LIMITS; i++) {
++		printk("\t [%2d] = %8lu %8lu/%8lu, %8ld/%8ld, %8d\n",
++			i, (unsigned long)__rlim_get(limit, i),
++			(unsigned long)__rlim_rmin(limit, i),
++			(unsigned long)__rlim_rmax(limit, i),
++			(long)__rlim_soft(limit, i),
++			(long)__rlim_hard(limit, i),
++			atomic_read(&__rlim_lhit(limit, i)));
++	}
++}
++
++#endif
++
++#endif	/* _VX_LIMIT_DEF_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/limit_int.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/limit_int.h
+--- linux-3.2.34/include/linux/vserver/limit_int.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/limit_int.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,198 @@
++#ifndef _VX_LIMIT_INT_H
++#define _VX_LIMIT_INT_H
++
++#include "context.h"
++
++#ifdef	__KERNEL__
++
++#define VXD_RCRES_COND(r)	VXD_CBIT(cres, r)
++#define VXD_RLIMIT_COND(r)	VXD_CBIT(limit, r)
++
++extern const char *vlimit_name[NUM_LIMITS];
++
++static inline void __vx_acc_cres(struct vx_info *vxi,
++	int res, int dir, void *_data, char *_file, int _line)
++{
++	if (VXD_RCRES_COND(res))
++		vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5ld%s (%p)",
++			(vxi ? vxi->vx_id : -1), vlimit_name[res], res,
++			(vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
++			(dir > 0) ? "++" : "--", _data, _file, _line);
++	if (!vxi)
++		return;
++
++	if (dir > 0)
++		__rlim_inc(&vxi->limit, res);
++	else
++		__rlim_dec(&vxi->limit, res);
++}
++
++static inline void __vx_add_cres(struct vx_info *vxi,
++	int res, int amount, void *_data, char *_file, int _line)
++{
++	if (VXD_RCRES_COND(res))
++		vxlprintk(1, "vx_add_cres[%5d,%s,%2d]: %5ld += %5d (%p)",
++			(vxi ? vxi->vx_id : -1), vlimit_name[res], res,
++			(vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
++			amount, _data, _file, _line);
++	if (amount == 0)
++		return;
++	if (!vxi)
++		return;
++	__rlim_add(&vxi->limit, res, amount);
++}
++
++static inline
++int __vx_cres_adjust_max(struct _vx_limit *limit, int res, rlim_t value)
++{
++	int cond = (value > __rlim_rmax(limit, res));
++
++	if (cond)
++		__rlim_rmax(limit, res) = value;
++	return cond;
++}
++
++static inline
++int __vx_cres_adjust_min(struct _vx_limit *limit, int res, rlim_t value)
++{
++	int cond = (value < __rlim_rmin(limit, res));
++
++	if (cond)
++		__rlim_rmin(limit, res) = value;
++	return cond;
++}
++
++static inline
++void __vx_cres_fixup(struct _vx_limit *limit, int res, rlim_t value)
++{
++	if (!__vx_cres_adjust_max(limit, res, value))
++		__vx_cres_adjust_min(limit, res, value);
++}
++
++
++/*	return values:
++	 +1 ... no limit hit
++	 -1 ... over soft limit
++	  0 ... over hard limit		*/
++
++static inline int __vx_cres_avail(struct vx_info *vxi,
++	int res, int num, char *_file, int _line)
++{
++	struct _vx_limit *limit;
++	rlim_t value;
++
++	if (VXD_RLIMIT_COND(res))
++		vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld/%5ld > %5ld + %5d",
++			(vxi ? vxi->vx_id : -1), vlimit_name[res], res,
++			(vxi ? (long)__rlim_soft(&vxi->limit, res) : -1),
++			(vxi ? (long)__rlim_hard(&vxi->limit, res) : -1),
++			(vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
++			num, _file, _line);
++	if (!vxi)
++		return 1;
++
++	limit = &vxi->limit;
++	value = __rlim_get(limit, res);
++
++	if (!__vx_cres_adjust_max(limit, res, value))
++		__vx_cres_adjust_min(limit, res, value);
++
++	if (num == 0)
++		return 1;
++
++	if (__rlim_soft(limit, res) == RLIM_INFINITY)
++		return -1;
++	if (value + num <= __rlim_soft(limit, res))
++		return -1;
++
++	if (__rlim_hard(limit, res) == RLIM_INFINITY)
++		return 1;
++	if (value + num <= __rlim_hard(limit, res))
++		return 1;
++
++	__rlim_hit(limit, res);
++	return 0;
++}
++
++
++static const int VLA_RSS[] = { RLIMIT_RSS, VLIMIT_ANON, VLIMIT_MAPPED, 0 };
++
++static inline
++rlim_t __vx_cres_array_sum(struct _vx_limit *limit, const int *array)
++{
++	rlim_t value, sum = 0;
++	int res;
++
++	while ((res = *array++)) {
++		value = __rlim_get(limit, res);
++		__vx_cres_fixup(limit, res, value);
++		sum += value;
++	}
++	return sum;
++}
++
++static inline
++rlim_t __vx_cres_array_fixup(struct _vx_limit *limit, const int *array)
++{
++	rlim_t value = __vx_cres_array_sum(limit, array + 1);
++	int res = *array;
++
++	if (value == __rlim_get(limit, res))
++		return value;
++
++	__rlim_set(limit, res, value);
++	/* now adjust min/max */
++	if (!__vx_cres_adjust_max(limit, res, value))
++		__vx_cres_adjust_min(limit, res, value);
++
++	return value;
++}
++
++static inline int __vx_cres_array_avail(struct vx_info *vxi,
++	const int *array, int num, char *_file, int _line)
++{
++	struct _vx_limit *limit;
++	rlim_t value = 0;
++	int res;
++
++	if (num == 0)
++		return 1;
++	if (!vxi)
++		return 1;
++
++	limit = &vxi->limit;
++	res = *array;
++	value = __vx_cres_array_sum(limit, array + 1);
++
++	__rlim_set(limit, res, value);
++	__vx_cres_fixup(limit, res, value);
++
++	return __vx_cres_avail(vxi, res, num, _file, _line);
++}
++
++
++static inline void vx_limit_fixup(struct _vx_limit *limit, int id)
++{
++	rlim_t value;
++	int res;
++
++	/* complex resources first */
++	if ((id < 0) || (id == RLIMIT_RSS))
++		__vx_cres_array_fixup(limit, VLA_RSS);
++
++	for (res = 0; res < NUM_LIMITS; res++) {
++		if ((id > 0) && (res != id))
++			continue;
++
++		value = __rlim_get(limit, res);
++		__vx_cres_fixup(limit, res, value);
++
++		/* not supposed to happen, maybe warn? */
++		if (__rlim_rmax(limit, res) > __rlim_hard(limit, res))
++			__rlim_rmax(limit, res) = __rlim_hard(limit, res);
++	}
++}
++
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_LIMIT_INT_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/monitor.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/monitor.h
+--- linux-3.2.34/include/linux/vserver/monitor.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/monitor.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,96 @@
++#ifndef _VX_MONITOR_H
++#define _VX_MONITOR_H
++
++#include <linux/types.h>
++
++enum {
++	VXM_UNUSED = 0,
++
++	VXM_SYNC = 0x10,
++
++	VXM_UPDATE = 0x20,
++	VXM_UPDATE_1,
++	VXM_UPDATE_2,
++
++	VXM_RQINFO_1 = 0x24,
++	VXM_RQINFO_2,
++
++	VXM_ACTIVATE = 0x40,
++	VXM_DEACTIVATE,
++	VXM_IDLE,
++
++	VXM_HOLD = 0x44,
++	VXM_UNHOLD,
++
++	VXM_MIGRATE = 0x48,
++	VXM_RESCHED,
++
++	/* all other bits are flags */
++	VXM_SCHED = 0x80,
++};
++
++struct _vxm_update_1 {
++	uint32_t tokens_max;
++	uint32_t fill_rate;
++	uint32_t interval;
++};
++
++struct _vxm_update_2 {
++	uint32_t tokens_min;
++	uint32_t fill_rate;
++	uint32_t interval;
++};
++
++struct _vxm_rqinfo_1 {
++	uint16_t running;
++	uint16_t onhold;
++	uint16_t iowait;
++	uint16_t uintr;
++	uint32_t idle_tokens;
++};
++
++struct _vxm_rqinfo_2 {
++	uint32_t norm_time;
++	uint32_t idle_time;
++	uint32_t idle_skip;
++};
++
++struct _vxm_sched {
++	uint32_t tokens;
++	uint32_t norm_time;
++	uint32_t idle_time;
++};
++
++struct _vxm_task {
++	uint16_t pid;
++	uint16_t state;
++};
++
++struct _vxm_event {
++	uint32_t jif;
++	union {
++		uint32_t seq;
++		uint32_t sec;
++	};
++	union {
++		uint32_t tokens;
++		uint32_t nsec;
++		struct _vxm_task tsk;
++	};
++};
++
++struct _vx_mon_entry {
++	uint16_t type;
++	uint16_t xid;
++	union {
++		struct _vxm_event ev;
++		struct _vxm_sched sd;
++		struct _vxm_update_1 u1;
++		struct _vxm_update_2 u2;
++		struct _vxm_rqinfo_1 q1;
++		struct _vxm_rqinfo_2 q2;
++	};
++};
++
++
++#endif /* _VX_MONITOR_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/network.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/network.h
+--- linux-3.2.34/include/linux/vserver/network.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/network.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,148 @@
++#ifndef _VX_NETWORK_H
++#define _VX_NETWORK_H
++
++#include <linux/types.h>
++
++
++#define MAX_N_CONTEXT	65535	/* Arbitrary limit */
++
++
++/* network flags */
++
++#define NXF_INFO_PRIVATE	0x00000008
++
++#define NXF_SINGLE_IP		0x00000100
++#define NXF_LBACK_REMAP		0x00000200
++#define NXF_LBACK_ALLOW		0x00000400
++
++#define NXF_HIDE_NETIF		0x02000000
++#define NXF_HIDE_LBACK		0x04000000
++
++#define NXF_STATE_SETUP		(1ULL << 32)
++#define NXF_STATE_ADMIN		(1ULL << 34)
++
++#define NXF_SC_HELPER		(1ULL << 36)
++#define NXF_PERSISTENT		(1ULL << 38)
++
++#define NXF_ONE_TIME		(0x0005ULL << 32)
++
++
++#define	NXF_INIT_SET		(__nxf_init_set())
++
++static inline uint64_t __nxf_init_set(void) {
++	return	  NXF_STATE_ADMIN
++#ifdef	CONFIG_VSERVER_AUTO_LBACK
++		| NXF_LBACK_REMAP
++		| NXF_HIDE_LBACK
++#endif
++#ifdef	CONFIG_VSERVER_AUTO_SINGLE
++		| NXF_SINGLE_IP
++#endif
++		| NXF_HIDE_NETIF;
++}
++
++
++/* network caps */
++
++#define NXC_TUN_CREATE		0x00000001
++
++#define NXC_RAW_ICMP		0x00000100
++
++#define NXC_MULTICAST		0x00001000
++
++
++/* address types */
++
++#define NXA_TYPE_IPV4		0x0001
++#define NXA_TYPE_IPV6		0x0002
++
++#define NXA_TYPE_NONE		0x0000
++#define NXA_TYPE_ANY		0x00FF
++
++#define NXA_TYPE_ADDR		0x0010
++#define NXA_TYPE_MASK		0x0020
++#define NXA_TYPE_RANGE		0x0040
++
++#define NXA_MASK_ALL		(NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE)
++
++#define NXA_MOD_BCAST		0x0100
++#define NXA_MOD_LBACK		0x0200
++
++#define NXA_LOOPBACK		0x1000
++
++#define NXA_MASK_BIND		(NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK)
++#define NXA_MASK_SHOW		(NXA_MASK_ALL | NXA_LOOPBACK)
++
++#ifdef	__KERNEL__
++
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/rcupdate.h>
++#include <linux/in.h>
++#include <linux/in6.h>
++#include <asm/atomic.h>
++
++struct nx_addr_v4 {
++	struct nx_addr_v4 *next;
++	struct in_addr ip[2];
++	struct in_addr mask;
++	uint16_t type;
++	uint16_t flags;
++};
++
++struct nx_addr_v6 {
++	struct nx_addr_v6 *next;
++	struct in6_addr ip;
++	struct in6_addr mask;
++	uint32_t prefix;
++	uint16_t type;
++	uint16_t flags;
++};
++
++struct nx_info {
++	struct hlist_node nx_hlist;	/* linked list of nxinfos */
++	nid_t nx_id;			/* vnet id */
++	atomic_t nx_usecnt;		/* usage count */
++	atomic_t nx_tasks;		/* tasks count */
++	int nx_state;			/* context state */
++
++	uint64_t nx_flags;		/* network flag word */
++	uint64_t nx_ncaps;		/* network capabilities */
++
++	struct in_addr v4_lback;	/* Loopback address */
++	struct in_addr v4_bcast;	/* Broadcast address */
++	struct nx_addr_v4 v4;		/* First/Single ipv4 address */
++#ifdef	CONFIG_IPV6
++	struct nx_addr_v6 v6;		/* First/Single ipv6 address */
++#endif
++	char nx_name[65];		/* network context name */
++};
++
++
++/* status flags */
++
++#define NXS_HASHED      0x0001
++#define NXS_SHUTDOWN    0x0100
++#define NXS_RELEASED    0x8000
++
++extern struct nx_info *lookup_nx_info(int);
++
++extern int get_nid_list(int, unsigned int *, int);
++extern int nid_is_hashed(nid_t);
++
++extern int nx_migrate_task(struct task_struct *, struct nx_info *);
++
++extern long vs_net_change(struct nx_info *, unsigned int);
++
++struct sock;
++
++
++#define NX_IPV4(n)	((n)->v4.type != NXA_TYPE_NONE)
++#ifdef  CONFIG_IPV6
++#define NX_IPV6(n)	((n)->v6.type != NXA_TYPE_NONE)
++#else
++#define NX_IPV6(n)	(0)
++#endif
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_NETWORK_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/network_cmd.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/network_cmd.h
+--- linux-3.2.34/include/linux/vserver/network_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/network_cmd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,164 @@
++#ifndef _VX_NETWORK_CMD_H
++#define _VX_NETWORK_CMD_H
++
++
++/* vinfo commands */
++
++#define VCMD_task_nid		VC_CMD(VINFO, 2, 0)
++
++#ifdef	__KERNEL__
++extern int vc_task_nid(uint32_t);
++
++#endif	/* __KERNEL__ */
++
++#define VCMD_nx_info		VC_CMD(VINFO, 6, 0)
++
++struct	vcmd_nx_info_v0 {
++	uint32_t nid;
++	/* more to come */
++};
++
++#ifdef	__KERNEL__
++extern int vc_nx_info(struct nx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++#include <linux/in.h>
++#include <linux/in6.h>
++
++#define VCMD_net_create_v0	VC_CMD(VNET, 1, 0)
++#define VCMD_net_create		VC_CMD(VNET, 1, 1)
++
++struct  vcmd_net_create {
++	uint64_t flagword;
++};
++
++#define VCMD_net_migrate	VC_CMD(NETMIG, 1, 0)
++
++#define VCMD_net_add		VC_CMD(NETALT, 1, 0)
++#define VCMD_net_remove		VC_CMD(NETALT, 2, 0)
++
++struct	vcmd_net_addr_v0 {
++	uint16_t type;
++	uint16_t count;
++	struct in_addr ip[4];
++	struct in_addr mask[4];
++};
++
++#define VCMD_net_add_ipv4_v1	VC_CMD(NETALT, 1, 1)
++#define VCMD_net_rem_ipv4_v1	VC_CMD(NETALT, 2, 1)
++
++struct	vcmd_net_addr_ipv4_v1 {
++	uint16_t type;
++	uint16_t flags;
++	struct in_addr ip;
++	struct in_addr mask;
++};
++
++#define VCMD_net_add_ipv4	VC_CMD(NETALT, 1, 2)
++#define VCMD_net_rem_ipv4	VC_CMD(NETALT, 2, 2)
++
++struct	vcmd_net_addr_ipv4_v2 {
++	uint16_t type;
++	uint16_t flags;
++	struct in_addr ip;
++	struct in_addr ip2;
++	struct in_addr mask;
++};
++
++#define VCMD_net_add_ipv6	VC_CMD(NETALT, 3, 1)
++#define VCMD_net_remove_ipv6	VC_CMD(NETALT, 4, 1)
++
++struct	vcmd_net_addr_ipv6_v1 {
++	uint16_t type;
++	uint16_t flags;
++	uint32_t prefix;
++	struct in6_addr ip;
++	struct in6_addr mask;
++};
++
++#define VCMD_add_match_ipv4	VC_CMD(NETALT, 5, 0)
++#define VCMD_get_match_ipv4	VC_CMD(NETALT, 6, 0)
++
++struct	vcmd_match_ipv4_v0 {
++	uint16_t type;
++	uint16_t flags;
++	uint16_t parent;
++	uint16_t prefix;
++	struct in_addr ip;
++	struct in_addr ip2;
++	struct in_addr mask;
++};
++
++#define VCMD_add_match_ipv6	VC_CMD(NETALT, 7, 0)
++#define VCMD_get_match_ipv6	VC_CMD(NETALT, 8, 0)
++
++struct	vcmd_match_ipv6_v0 {
++	uint16_t type;
++	uint16_t flags;
++	uint16_t parent;
++	uint16_t prefix;
++	struct in6_addr ip;
++	struct in6_addr ip2;
++	struct in6_addr mask;
++};
++
++
++#ifdef	__KERNEL__
++extern int vc_net_create(uint32_t, void __user *);
++extern int vc_net_migrate(struct nx_info *, void __user *);
++
++extern int vc_net_add(struct nx_info *, void __user *);
++extern int vc_net_remove(struct nx_info *, void __user *);
++
++extern int vc_net_add_ipv4_v1(struct nx_info *, void __user *);
++extern int vc_net_add_ipv4(struct nx_info *, void __user *);
++
++extern int vc_net_rem_ipv4_v1(struct nx_info *, void __user *);
++extern int vc_net_rem_ipv4(struct nx_info *, void __user *);
++
++extern int vc_net_add_ipv6(struct nx_info *, void __user *);
++extern int vc_net_remove_ipv6(struct nx_info *, void __user *);
++
++extern int vc_add_match_ipv4(struct nx_info *, void __user *);
++extern int vc_get_match_ipv4(struct nx_info *, void __user *);
++
++extern int vc_add_match_ipv6(struct nx_info *, void __user *);
++extern int vc_get_match_ipv6(struct nx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* flag commands */
++
++#define VCMD_get_nflags		VC_CMD(FLAGS, 5, 0)
++#define VCMD_set_nflags		VC_CMD(FLAGS, 6, 0)
++
++struct	vcmd_net_flags_v0 {
++	uint64_t flagword;
++	uint64_t mask;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_nflags(struct nx_info *, void __user *);
++extern int vc_set_nflags(struct nx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* network caps commands */
++
++#define VCMD_get_ncaps		VC_CMD(FLAGS, 7, 0)
++#define VCMD_set_ncaps		VC_CMD(FLAGS, 8, 0)
++
++struct	vcmd_net_caps_v0 {
++	uint64_t ncaps;
++	uint64_t cmask;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_ncaps(struct nx_info *, void __user *);
++extern int vc_set_ncaps(struct nx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_CONTEXT_CMD_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/percpu.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/percpu.h
+--- linux-3.2.34/include/linux/vserver/percpu.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/percpu.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,14 @@
++#ifndef _VX_PERCPU_H
++#define _VX_PERCPU_H
++
++#include "cvirt_def.h"
++#include "sched_def.h"
++
++struct	_vx_percpu {
++	struct _vx_cvirt_pc cvirt;
++	struct _vx_sched_pc sched;
++};
++
++#define	PERCPU_PERCTX	(sizeof(struct _vx_percpu))
++
++#endif	/* _VX_PERCPU_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/pid.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/pid.h
+--- linux-3.2.34/include/linux/vserver/pid.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/pid.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,51 @@
++#ifndef _VSERVER_PID_H
++#define _VSERVER_PID_H
++
++/* pid faking stuff */
++
++#define vx_info_map_pid(v, p) \
++	__vx_info_map_pid((v), (p), __func__, __FILE__, __LINE__)
++#define vx_info_map_tgid(v,p)  vx_info_map_pid(v,p)
++#define vx_map_pid(p) vx_info_map_pid(current_vx_info(), p)
++#define vx_map_tgid(p) vx_map_pid(p)
++
++static inline int __vx_info_map_pid(struct vx_info *vxi, int pid,
++	const char *func, const char *file, int line)
++{
++	if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
++		vxfprintk(VXD_CBIT(cvirt, 2),
++			"vx_map_tgid: %p/%llx: %d -> %d",
++			vxi, (long long)vxi->vx_flags, pid,
++			(pid && pid == vxi->vx_initpid) ? 1 : pid,
++			func, file, line);
++		if (pid == 0)
++			return 0;
++		if (pid == vxi->vx_initpid)
++			return 1;
++	}
++	return pid;
++}
++
++#define vx_info_rmap_pid(v, p) \
++	__vx_info_rmap_pid((v), (p), __func__, __FILE__, __LINE__)
++#define vx_rmap_pid(p) vx_info_rmap_pid(current_vx_info(), p)
++#define vx_rmap_tgid(p) vx_rmap_pid(p)
++
++static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid,
++	const char *func, const char *file, int line)
++{
++	if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
++		vxfprintk(VXD_CBIT(cvirt, 2),
++			"vx_rmap_tgid: %p/%llx: %d -> %d",
++			vxi, (long long)vxi->vx_flags, pid,
++			(pid == 1) ? vxi->vx_initpid : pid,
++			func, file, line);
++		if ((pid == 1) && vxi->vx_initpid)
++			return vxi->vx_initpid;
++		if (pid == vxi->vx_initpid)
++			return ~0U;
++	}
++	return pid;
++}
++
++#endif
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/sched.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/sched.h
+--- linux-3.2.34/include/linux/vserver/sched.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/sched.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,23 @@
++#ifndef _VX_SCHED_H
++#define _VX_SCHED_H
++
++
++#ifdef	__KERNEL__
++
++struct timespec;
++
++void vx_vsi_uptime(struct timespec *, struct timespec *);
++
++
++struct vx_info;
++
++void vx_update_load(struct vx_info *);
++
++
++void vx_update_sched_param(struct _vx_sched *sched,
++	struct _vx_sched_pc *sched_pc);
++
++#endif	/* __KERNEL__ */
++#else	/* _VX_SCHED_H */
++#warning duplicate inclusion
++#endif	/* _VX_SCHED_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/sched_cmd.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/sched_cmd.h
+--- linux-3.2.34/include/linux/vserver/sched_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/sched_cmd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,21 @@
++#ifndef _VX_SCHED_CMD_H
++#define _VX_SCHED_CMD_H
++
++
++struct	vcmd_prio_bias {
++	int32_t cpu_id;
++	int32_t prio_bias;
++};
++
++#define VCMD_set_prio_bias	VC_CMD(SCHED, 4, 0)
++#define VCMD_get_prio_bias	VC_CMD(SCHED, 5, 0)
++
++#ifdef	__KERNEL__
++
++#include <linux/compiler.h>
++
++extern int vc_set_prio_bias(struct vx_info *, void __user *);
++extern int vc_get_prio_bias(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_SCHED_CMD_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/sched_def.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/sched_def.h
+--- linux-3.2.34/include/linux/vserver/sched_def.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/sched_def.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,38 @@
++#ifndef _VX_SCHED_DEF_H
++#define _VX_SCHED_DEF_H
++
++#include <linux/spinlock.h>
++#include <linux/jiffies.h>
++#include <linux/cpumask.h>
++#include <asm/atomic.h>
++#include <asm/param.h>
++
++
++/* context sub struct */
++
++struct _vx_sched {
++	int prio_bias;			/* bias offset for priority */
++
++	cpumask_t update;		/* CPUs which should update */
++};
++
++struct _vx_sched_pc {
++	int prio_bias;			/* bias offset for priority */
++
++	uint64_t user_ticks;		/* token tick events */
++	uint64_t sys_ticks;		/* token tick events */
++	uint64_t hold_ticks;		/* token ticks paused */
++};
++
++
++#ifdef CONFIG_VSERVER_DEBUG
++
++static inline void __dump_vx_sched(struct _vx_sched *sched)
++{
++	printk("\t_vx_sched:\n");
++	printk("\t priority = %4d\n", sched->prio_bias);
++}
++
++#endif
++
++#endif	/* _VX_SCHED_DEF_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/signal.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/signal.h
+--- linux-3.2.34/include/linux/vserver/signal.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/signal.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,14 @@
++#ifndef _VX_SIGNAL_H
++#define _VX_SIGNAL_H
++
++
++#ifdef	__KERNEL__
++
++struct vx_info;
++
++int vx_info_kill(struct vx_info *, int, int);
++
++#endif	/* __KERNEL__ */
++#else	/* _VX_SIGNAL_H */
++#warning duplicate inclusion
++#endif	/* _VX_SIGNAL_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/signal_cmd.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/signal_cmd.h
+--- linux-3.2.34/include/linux/vserver/signal_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/signal_cmd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,43 @@
++#ifndef _VX_SIGNAL_CMD_H
++#define _VX_SIGNAL_CMD_H
++
++
++/*  signalling vserver commands */
++
++#define VCMD_ctx_kill		VC_CMD(PROCTRL, 1, 0)
++#define VCMD_wait_exit		VC_CMD(EVENT, 99, 0)
++
++struct	vcmd_ctx_kill_v0 {
++	int32_t pid;
++	int32_t sig;
++};
++
++struct	vcmd_wait_exit_v0 {
++	int32_t reboot_cmd;
++	int32_t exit_code;
++};
++
++#ifdef	__KERNEL__
++
++extern int vc_ctx_kill(struct vx_info *, void __user *);
++extern int vc_wait_exit(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++/*  process alteration commands */
++
++#define VCMD_get_pflags		VC_CMD(PROCALT, 5, 0)
++#define VCMD_set_pflags		VC_CMD(PROCALT, 6, 0)
++
++struct	vcmd_pflags_v0 {
++	uint32_t flagword;
++	uint32_t mask;
++};
++
++#ifdef	__KERNEL__
++
++extern int vc_get_pflags(uint32_t pid, void __user *);
++extern int vc_set_pflags(uint32_t pid, void __user *);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_SIGNAL_CMD_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/space.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/space.h
+--- linux-3.2.34/include/linux/vserver/space.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/space.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,12 @@
++#ifndef _VX_SPACE_H
++#define _VX_SPACE_H
++
++#include <linux/types.h>
++
++struct vx_info;
++
++int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index);
++
++#else	/* _VX_SPACE_H */
++#warning duplicate inclusion
++#endif	/* _VX_SPACE_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/space_cmd.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/space_cmd.h
+--- linux-3.2.34/include/linux/vserver/space_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/space_cmd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,38 @@
++#ifndef _VX_SPACE_CMD_H
++#define _VX_SPACE_CMD_H
++
++
++#define VCMD_enter_space_v0	VC_CMD(PROCALT, 1, 0)
++#define VCMD_enter_space_v1	VC_CMD(PROCALT, 1, 1)
++#define VCMD_enter_space	VC_CMD(PROCALT, 1, 2)
++
++#define VCMD_set_space_v0	VC_CMD(PROCALT, 3, 0)
++#define VCMD_set_space_v1	VC_CMD(PROCALT, 3, 1)
++#define VCMD_set_space		VC_CMD(PROCALT, 3, 2)
++
++#define VCMD_get_space_mask_v0	VC_CMD(PROCALT, 4, 0)
++
++#define VCMD_get_space_mask	VC_CMD(VSPACE, 0, 1)
++#define VCMD_get_space_default	VC_CMD(VSPACE, 1, 0)
++
++
++struct	vcmd_space_mask_v1 {
++	uint64_t mask;
++};
++
++struct	vcmd_space_mask_v2 {
++	uint64_t mask;
++	uint32_t index;
++};
++
++
++#ifdef	__KERNEL__
++
++extern int vc_enter_space_v1(struct vx_info *, void __user *);
++extern int vc_set_space_v1(struct vx_info *, void __user *);
++extern int vc_enter_space(struct vx_info *, void __user *);
++extern int vc_set_space(struct vx_info *, void __user *);
++extern int vc_get_space_mask(void __user *, int);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_SPACE_CMD_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/switch.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/switch.h
+--- linux-3.2.34/include/linux/vserver/switch.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/switch.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,98 @@
++#ifndef _VX_SWITCH_H
++#define _VX_SWITCH_H
++
++#include <linux/types.h>
++
++
++#define VC_CATEGORY(c)		(((c) >> 24) & 0x3F)
++#define VC_COMMAND(c)		(((c) >> 16) & 0xFF)
++#define VC_VERSION(c)		((c) & 0xFFF)
++
++#define VC_CMD(c, i, v)		((((VC_CAT_ ## c) & 0x3F) << 24) \
++				| (((i) & 0xFF) << 16) | ((v) & 0xFFF))
++
++/*
++
++  Syscall Matrix V2.8
++
++	 |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
++	 |STATS  |DESTROY|ALTER  |CHANGE |LIMIT  |TEST   | |       |       |
++	 |INFO   |SETUP  |       |MOVE   |       |       | |       |       |
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++  SYSTEM |VERSION|VSETUP |VHOST  |       |       |       | |DEVICE |       |
++  HOST   |     00|     01|     02|     03|     04|     05| |     06|     07|
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++  CPU    |       |VPROC  |PROCALT|PROCMIG|PROCTRL|       | |SCHED. |       |
++  PROCESS|     08|     09|     10|     11|     12|     13| |     14|     15|
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++  MEMORY |       |       |       |       |MEMCTRL|       | |SWAP   |       |
++	 |     16|     17|     18|     19|     20|     21| |     22|     23|
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++  NETWORK|       |VNET   |NETALT |NETMIG |NETCTL |       | |SERIAL |       |
++	 |     24|     25|     26|     27|     28|     29| |     30|     31|
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++  DISK   |       |       |       |TAGMIG |DLIMIT |       | |INODE  |       |
++  VFS    |     32|     33|     34|     35|     36|     37| |     38|     39|
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++  OTHER  |VSTAT  |       |       |       |       |       | |VINFO  |       |
++	 |     40|     41|     42|     43|     44|     45| |     46|     47|
++  =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
++  SPECIAL|EVENT  |       |       |       |FLAGS  |       | |VSPACE |       |
++	 |     48|     49|     50|     51|     52|     53| |     54|     55|
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++  SPECIAL|DEBUG  |       |       |       |RLIMIT |SYSCALL| |       |COMPAT |
++	 |     56|     57|     58|     59|     60|TEST 61| |     62|     63|
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++
++*/
++
++#define VC_CAT_VERSION		0
++
++#define VC_CAT_VSETUP		1
++#define VC_CAT_VHOST		2
++
++#define VC_CAT_DEVICE		6
++
++#define VC_CAT_VPROC		9
++#define VC_CAT_PROCALT		10
++#define VC_CAT_PROCMIG		11
++#define VC_CAT_PROCTRL		12
++
++#define VC_CAT_SCHED		14
++#define VC_CAT_MEMCTRL		20
++
++#define VC_CAT_VNET		25
++#define VC_CAT_NETALT		26
++#define VC_CAT_NETMIG		27
++#define VC_CAT_NETCTRL		28
++
++#define VC_CAT_TAGMIG		35
++#define VC_CAT_DLIMIT		36
++#define VC_CAT_INODE		38
++
++#define VC_CAT_VSTAT		40
++#define VC_CAT_VINFO		46
++#define VC_CAT_EVENT		48
++
++#define VC_CAT_FLAGS		52
++#define VC_CAT_VSPACE		54
++#define VC_CAT_DEBUG		56
++#define VC_CAT_RLIMIT		60
++
++#define VC_CAT_SYSTEST		61
++#define VC_CAT_COMPAT		63
++
++/*  query version */
++
++#define VCMD_get_version	VC_CMD(VERSION, 0, 0)
++#define VCMD_get_vci		VC_CMD(VERSION, 1, 0)
++
++
++#ifdef	__KERNEL__
++
++#include <linux/errno.h>
++
++#endif	/* __KERNEL__ */
++
++#endif	/* _VX_SWITCH_H */
++
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/tag.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/tag.h
+--- linux-3.2.34/include/linux/vserver/tag.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/tag.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,143 @@
++#ifndef _DX_TAG_H
++#define _DX_TAG_H
++
++#include <linux/types.h>
++
++
++#define DX_TAG(in)	(IS_TAGGED(in))
++
++
++#ifdef CONFIG_TAG_NFSD
++#define DX_TAG_NFSD	1
++#else
++#define DX_TAG_NFSD	0
++#endif
++
++
++#ifdef CONFIG_TAGGING_NONE
++
++#define MAX_UID		0xFFFFFFFF
++#define MAX_GID		0xFFFFFFFF
++
++#define INOTAG_TAG(cond, uid, gid, tag)	(0)
++
++#define TAGINO_UID(cond, uid, tag)	(uid)
++#define TAGINO_GID(cond, gid, tag)	(gid)
++
++#endif
++
++
++#ifdef CONFIG_TAGGING_GID16
++
++#define MAX_UID		0xFFFFFFFF
++#define MAX_GID		0x0000FFFF
++
++#define INOTAG_TAG(cond, uid, gid, tag)	\
++	((cond) ? (((gid) >> 16) & 0xFFFF) : 0)
++
++#define TAGINO_UID(cond, uid, tag)	(uid)
++#define TAGINO_GID(cond, gid, tag)	\
++	((cond) ? (((gid) & 0xFFFF) | ((tag) << 16)) : (gid))
++
++#endif
++
++
++#ifdef CONFIG_TAGGING_ID24
++
++#define MAX_UID		0x00FFFFFF
++#define MAX_GID		0x00FFFFFF
++
++#define INOTAG_TAG(cond, uid, gid, tag)	\
++	((cond) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0)
++
++#define TAGINO_UID(cond, uid, tag)	\
++	((cond) ? (((uid) & 0xFFFFFF) | (((tag) & 0xFF00) << 16)) : (uid))
++#define TAGINO_GID(cond, gid, tag)	\
++	((cond) ? (((gid) & 0xFFFFFF) | (((tag) & 0x00FF) << 24)) : (gid))
++
++#endif
++
++
++#ifdef CONFIG_TAGGING_UID16
++
++#define MAX_UID		0x0000FFFF
++#define MAX_GID		0xFFFFFFFF
++
++#define INOTAG_TAG(cond, uid, gid, tag)	\
++	((cond) ? (((uid) >> 16) & 0xFFFF) : 0)
++
++#define TAGINO_UID(cond, uid, tag)	\
++	((cond) ? (((uid) & 0xFFFF) | ((tag) << 16)) : (uid))
++#define TAGINO_GID(cond, gid, tag)	(gid)
++
++#endif
++
++
++#ifdef CONFIG_TAGGING_INTERN
++
++#define MAX_UID		0xFFFFFFFF
++#define MAX_GID		0xFFFFFFFF
++
++#define INOTAG_TAG(cond, uid, gid, tag)	\
++	((cond) ? (tag) : 0)
++
++#define TAGINO_UID(cond, uid, tag)	(uid)
++#define TAGINO_GID(cond, gid, tag)	(gid)
++
++#endif
++
++
++#ifndef CONFIG_TAGGING_NONE
++#define dx_current_fstag(sb)	\
++	((sb)->s_flags & MS_TAGGED ? dx_current_tag() : 0)
++#else
++#define dx_current_fstag(sb)	(0)
++#endif
++
++#ifndef CONFIG_TAGGING_INTERN
++#define TAGINO_TAG(cond, tag)	(0)
++#else
++#define TAGINO_TAG(cond, tag)	((cond) ? (tag) : 0)
++#endif
++
++#define INOTAG_UID(cond, uid, gid)	\
++	((cond) ? ((uid) & MAX_UID) : (uid))
++#define INOTAG_GID(cond, uid, gid)	\
++	((cond) ? ((gid) & MAX_GID) : (gid))
++
++
++static inline uid_t dx_map_uid(uid_t uid)
++{
++	if ((uid > MAX_UID) && (uid != -1))
++		uid = -2;
++	return (uid & MAX_UID);
++}
++
++static inline gid_t dx_map_gid(gid_t gid)
++{
++	if ((gid > MAX_GID) && (gid != -1))
++		gid = -2;
++	return (gid & MAX_GID);
++}
++
++struct peer_tag {
++	int32_t xid;
++	int32_t nid;
++};
++
++#define dx_notagcheck(sb) ((sb) && ((sb)->s_flags & MS_NOTAGCHECK))
++
++int dx_parse_tag(char *string, tag_t *tag, int remove, int *mnt_flags,
++		 unsigned long *flags);
++
++#ifdef	CONFIG_PROPAGATE
++
++void __dx_propagate_tag(struct nameidata *nd, struct inode *inode);
++
++#define dx_propagate_tag(n, i)	__dx_propagate_tag(n, i)
++
++#else
++#define dx_propagate_tag(n, i)	do { } while (0)
++#endif
++
++#endif /* _DX_TAG_H */
+diff -NurpP --minimal linux-3.2.34/include/linux/vserver/tag_cmd.h linux-3.2.34-vs2.3.2.15/include/linux/vserver/tag_cmd.h
+--- linux-3.2.34/include/linux/vserver/tag_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/linux/vserver/tag_cmd.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,22 @@
++#ifndef _VX_TAG_CMD_H
++#define _VX_TAG_CMD_H
++
++
++/* vinfo commands */
++
++#define VCMD_task_tag		VC_CMD(VINFO, 3, 0)
++
++#ifdef	__KERNEL__
++extern int vc_task_tag(uint32_t);
++
++#endif	/* __KERNEL__ */
++
++/* context commands */
++
++#define VCMD_tag_migrate	VC_CMD(TAGMIG, 1, 0)
++
++#ifdef	__KERNEL__
++extern int vc_tag_migrate(uint32_t);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_TAG_CMD_H */
+diff -NurpP --minimal linux-3.2.34/include/net/addrconf.h linux-3.2.34-vs2.3.2.15/include/net/addrconf.h
+--- linux-3.2.34/include/net/addrconf.h	2012-01-09 16:14:59.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/net/addrconf.h	2011-12-05 19:33:02.000000000 +0100
+@@ -80,7 +80,8 @@ extern int			ipv6_dev_get_saddr(struct n
+ 					       struct net_device *dev,
+ 					       const struct in6_addr *daddr,
+ 					       unsigned int srcprefs,
+-					       struct in6_addr *saddr);
++					       struct in6_addr *saddr,
++					       struct nx_info *nxi);
+ extern int			ipv6_get_lladdr(struct net_device *dev,
+ 						struct in6_addr *addr,
+ 						unsigned char banned_flags);
+diff -NurpP --minimal linux-3.2.34/include/net/af_unix.h linux-3.2.34-vs2.3.2.15/include/net/af_unix.h
+--- linux-3.2.34/include/net/af_unix.h	2011-07-22 11:18:11.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/net/af_unix.h	2011-12-05 19:33:02.000000000 +0100
+@@ -4,6 +4,7 @@
+ #include <linux/socket.h>
+ #include <linux/un.h>
+ #include <linux/mutex.h>
++#include <linux/vs_base.h>
+ #include <net/sock.h>
+ 
+ extern void unix_inflight(struct file *fp);
+diff -NurpP --minimal linux-3.2.34/include/net/inet_timewait_sock.h linux-3.2.34-vs2.3.2.15/include/net/inet_timewait_sock.h
+--- linux-3.2.34/include/net/inet_timewait_sock.h	2012-01-09 16:14:59.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/net/inet_timewait_sock.h	2011-12-05 19:33:02.000000000 +0100
+@@ -112,6 +112,10 @@ struct inet_timewait_sock {
+ #define tw_net			__tw_common.skc_net
+ #define tw_daddr        	__tw_common.skc_daddr
+ #define tw_rcv_saddr    	__tw_common.skc_rcv_saddr
++#define tw_xid			__tw_common.skc_xid
++#define tw_vx_info		__tw_common.skc_vx_info
++#define tw_nid			__tw_common.skc_nid
++#define tw_nx_info		__tw_common.skc_nx_info
+ 	int			tw_timeout;
+ 	volatile unsigned char	tw_substate;
+ 	unsigned char		tw_rcv_wscale;
+diff -NurpP --minimal linux-3.2.34/include/net/ip6_route.h linux-3.2.34-vs2.3.2.15/include/net/ip6_route.h
+--- linux-3.2.34/include/net/ip6_route.h	2011-07-22 11:18:11.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/include/net/ip6_route.h	2011-12-05 19:33:02.000000000 +0100
+@@ -86,7 +86,8 @@ extern int			ip6_route_get_saddr(struct 
+ 						    struct rt6_info *rt,
+ 						    const struct in6_addr *daddr,
+ 						    unsigned int prefs,
+-						    struct in6_addr *saddr);
++						    struct in6_addr *saddr,
++						    struct nx_info *nxi);
+ 
+ extern struct rt6_info		*rt6_lookup(struct net *net,
+ 					    const struct in6_addr *daddr,
+diff -NurpP --minimal linux-3.2.34/include/net/route.h linux-3.2.34-vs2.3.2.15/include/net/route.h
+--- linux-3.2.34/include/net/route.h	2012-11-18 18:42:23.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/net/route.h	2012-03-01 21:39:38.000000000 +0100
+@@ -202,6 +202,9 @@ static inline void ip_rt_put(struct rtab
+ 		dst_release(&rt->dst);
+ }
+ 
++#include <linux/vs_base.h>
++#include <linux/vs_inet.h>
++
+ #define IPTOS_RT_MASK	(IPTOS_TOS_MASK & ~3)
+ 
+ extern const __u8 ip_tos2prio[16];
+@@ -253,6 +256,9 @@ static inline void ip_route_connect_init
+ 			   protocol, flow_flags, dst, src, dport, sport);
+ }
+ 
++extern struct rtable *ip_v4_find_src(struct net *net, struct nx_info *,
++	struct flowi4 *);
++
+ static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
+ 					      __be32 dst, __be32 src, u32 tos,
+ 					      int oif, u8 protocol,
+@@ -261,11 +267,25 @@ static inline struct rtable *ip_route_co
+ {
+ 	struct net *net = sock_net(sk);
+ 	struct rtable *rt;
++	struct nx_info *nx_info = current_nx_info();
+ 
+ 	ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
+ 			      sport, dport, sk, can_sleep);
+ 
+-	if (!dst || !src) {
++	if (sk)
++		nx_info = sk->sk_nx_info;
++
++	vxdprintk(VXD_CBIT(net, 4),
++		"ip_route_connect(%p) %p,%p;%lx",
++		sk, nx_info, sk->sk_socket,
++		(sk->sk_socket?sk->sk_socket->flags:0));
++
++	rt = ip_v4_find_src(net, nx_info, fl4);
++	if (IS_ERR(rt))
++		return rt;
++	ip_rt_put(rt);
++
++	if (!fl4->daddr || !fl4->saddr) {
+ 		rt = __ip_route_output_key(net, fl4);
+ 		if (IS_ERR(rt))
+ 			return rt;
+diff -NurpP --minimal linux-3.2.34/include/net/sock.h linux-3.2.34-vs2.3.2.15/include/net/sock.h
+--- linux-3.2.34/include/net/sock.h	2012-11-18 18:42:23.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/include/net/sock.h	2012-10-22 12:59:52.000000000 +0200
+@@ -149,6 +149,10 @@ struct sock_common {
+ #ifdef CONFIG_NET_NS
+ 	struct net	 	*skc_net;
+ #endif
++	xid_t			skc_xid;
++	struct vx_info		*skc_vx_info;
++	nid_t			skc_nid;
++	struct nx_info		*skc_nx_info;
+ 	/*
+ 	 * fields between dontcopy_begin/dontcopy_end
+ 	 * are not copied in sock_copy()
+@@ -257,6 +261,10 @@ struct sock {
+ #define sk_bind_node		__sk_common.skc_bind_node
+ #define sk_prot			__sk_common.skc_prot
+ #define sk_net			__sk_common.skc_net
++#define sk_xid			__sk_common.skc_xid
++#define sk_vx_info		__sk_common.skc_vx_info
++#define sk_nid			__sk_common.skc_nid
++#define sk_nx_info		__sk_common.skc_nx_info
+ 	socket_lock_t		sk_lock;
+ 	struct sk_buff_head	sk_receive_queue;
+ 	/*
+diff -NurpP --minimal linux-3.2.34/init/Kconfig linux-3.2.34-vs2.3.2.15/init/Kconfig
+--- linux-3.2.34/init/Kconfig	2012-01-09 16:14:59.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/init/Kconfig	2011-12-05 19:33:02.000000000 +0100
+@@ -574,6 +574,7 @@ config HAVE_UNSTABLE_SCHED_CLOCK
+ menuconfig CGROUPS
+ 	boolean "Control Group support"
+ 	depends on EVENTFD
++	default y
+ 	help
+ 	  This option adds support for grouping sets of processes together, for
+ 	  use with process control subsystems such as Cpusets, CFS, memory
+@@ -802,6 +803,7 @@ config IPC_NS
+ config USER_NS
+ 	bool "User namespace (EXPERIMENTAL)"
+ 	depends on EXPERIMENTAL
++	depends on VSERVER_DISABLED
+ 	default y
+ 	help
+ 	  This allows containers, i.e. vservers, to use user namespaces
+diff -NurpP --minimal linux-3.2.34/init/main.c linux-3.2.34-vs2.3.2.15/init/main.c
+--- linux-3.2.34/init/main.c	2012-11-18 18:42:23.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/init/main.c	2012-06-14 20:45:24.000000000 +0200
+@@ -68,6 +68,7 @@
+ #include <linux/shmem_fs.h>
+ #include <linux/slab.h>
+ #include <linux/perf_event.h>
++#include <linux/vserver/percpu.h>
+ 
+ #include <asm/io.h>
+ #include <asm/bugs.h>
+diff -NurpP --minimal linux-3.2.34/ipc/mqueue.c linux-3.2.34-vs2.3.2.15/ipc/mqueue.c
+--- linux-3.2.34/ipc/mqueue.c	2012-01-09 16:14:59.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/ipc/mqueue.c	2012-01-09 16:19:31.000000000 +0100
+@@ -33,6 +33,8 @@
+ #include <linux/pid.h>
+ #include <linux/ipc_namespace.h>
+ #include <linux/slab.h>
++#include <linux/vs_context.h>
++#include <linux/vs_limit.h>
+ 
+ #include <net/sock.h>
+ #include "util.h"
+@@ -66,6 +68,7 @@ struct mqueue_inode_info {
+ 	struct sigevent notify;
+ 	struct pid* notify_owner;
+ 	struct user_struct *user;	/* user who created, for accounting */
++	struct vx_info *vxi;
+ 	struct sock *notify_sock;
+ 	struct sk_buff *notify_cookie;
+ 
+@@ -128,6 +131,7 @@ static struct inode *mqueue_get_inode(st
+ 	if (S_ISREG(mode)) {
+ 		struct mqueue_inode_info *info;
+ 		struct task_struct *p = current;
++		struct vx_info *vxi = p->vx_info;
+ 		unsigned long mq_bytes, mq_msg_tblsz;
+ 
+ 		inode->i_fop = &mqueue_file_operations;
+@@ -141,6 +145,7 @@ static struct inode *mqueue_get_inode(st
+ 		info->notify_owner = NULL;
+ 		info->qsize = 0;
+ 		info->user = NULL;	/* set when all is ok */
++		info->vxi = NULL;
+ 		memset(&info->attr, 0, sizeof(info->attr));
+ 		info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
+ 		info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
+@@ -158,17 +163,20 @@ static struct inode *mqueue_get_inode(st
+ 
+ 		spin_lock(&mq_lock);
+ 		if (u->mq_bytes + mq_bytes < u->mq_bytes ||
+-		    u->mq_bytes + mq_bytes > task_rlimit(p, RLIMIT_MSGQUEUE)) {
++		    u->mq_bytes + mq_bytes > task_rlimit(p, RLIMIT_MSGQUEUE) ||
++		    !vx_ipcmsg_avail(vxi, mq_bytes)) {
+ 			spin_unlock(&mq_lock);
+ 			/* mqueue_evict_inode() releases info->messages */
+ 			ret = -EMFILE;
+ 			goto out_inode;
+ 		}
+ 		u->mq_bytes += mq_bytes;
++		vx_ipcmsg_add(vxi, u, mq_bytes);
+ 		spin_unlock(&mq_lock);
+ 
+ 		/* all is ok */
+ 		info->user = get_uid(u);
++		info->vxi = get_vx_info(vxi);
+ 	} else if (S_ISDIR(mode)) {
+ 		inc_nlink(inode);
+ 		/* Some things misbehave if size == 0 on a directory */
+@@ -278,8 +286,11 @@ static void mqueue_evict_inode(struct in
+ 	    + info->attr.mq_msgsize);
+ 	user = info->user;
+ 	if (user) {
++		struct vx_info *vxi = info->vxi;
++
+ 		spin_lock(&mq_lock);
+ 		user->mq_bytes -= mq_bytes;
++		vx_ipcmsg_sub(vxi, user, mq_bytes);
+ 		/*
+ 		 * get_ns_from_inode() ensures that the
+ 		 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
+@@ -289,6 +300,7 @@ static void mqueue_evict_inode(struct in
+ 		if (ipc_ns)
+ 			ipc_ns->mq_queues_count--;
+ 		spin_unlock(&mq_lock);
++		put_vx_info(vxi);
+ 		free_uid(user);
+ 	}
+ 	if (ipc_ns)
+diff -NurpP --minimal linux-3.2.34/ipc/msg.c linux-3.2.34-vs2.3.2.15/ipc/msg.c
+--- linux-3.2.34/ipc/msg.c	2011-05-22 16:17:59.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/ipc/msg.c	2011-12-05 19:33:02.000000000 +0100
+@@ -37,6 +37,7 @@
+ #include <linux/rwsem.h>
+ #include <linux/nsproxy.h>
+ #include <linux/ipc_namespace.h>
++#include <linux/vs_base.h>
+ 
+ #include <asm/current.h>
+ #include <asm/uaccess.h>
+@@ -190,6 +191,7 @@ static int newque(struct ipc_namespace *
+ 
+ 	msq->q_perm.mode = msgflg & S_IRWXUGO;
+ 	msq->q_perm.key = key;
++	msq->q_perm.xid = vx_current_xid();
+ 
+ 	msq->q_perm.security = NULL;
+ 	retval = security_msg_queue_alloc(msq);
+diff -NurpP --minimal linux-3.2.34/ipc/namespace.c linux-3.2.34-vs2.3.2.15/ipc/namespace.c
+--- linux-3.2.34/ipc/namespace.c	2011-07-22 11:18:12.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/ipc/namespace.c	2011-12-05 19:33:02.000000000 +0100
+@@ -13,11 +13,12 @@
+ #include <linux/mount.h>
+ #include <linux/user_namespace.h>
+ #include <linux/proc_fs.h>
++#include <linux/vs_base.h>
++#include <linux/vserver/global.h>
+ 
+ #include "util.h"
+ 
+-static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
+-					   struct ipc_namespace *old_ns)
++static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns)
+ {
+ 	struct ipc_namespace *ns;
+ 	int err;
+@@ -46,19 +47,18 @@ static struct ipc_namespace *create_ipc_
+ 	ipcns_notify(IPCNS_CREATED);
+ 	register_ipcns_notifier(ns);
+ 
+-	ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
++	ns->user_ns = get_user_ns(user_ns);
+ 
+ 	return ns;
+ }
+ 
+ struct ipc_namespace *copy_ipcs(unsigned long flags,
+-				struct task_struct *tsk)
++				struct ipc_namespace *old_ns,
++				struct user_namespace *user_ns)
+ {
+-	struct ipc_namespace *ns = tsk->nsproxy->ipc_ns;
+-
+ 	if (!(flags & CLONE_NEWIPC))
+-		return get_ipc_ns(ns);
+-	return create_ipc_ns(tsk, ns);
++		return get_ipc_ns(old_ns);
++	return create_ipc_ns(user_ns);
+ }
+ 
+ /*
+diff -NurpP --minimal linux-3.2.34/ipc/sem.c linux-3.2.34-vs2.3.2.15/ipc/sem.c
+--- linux-3.2.34/ipc/sem.c	2012-01-09 16:14:59.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/ipc/sem.c	2011-12-05 19:33:02.000000000 +0100
+@@ -86,6 +86,8 @@
+ #include <linux/rwsem.h>
+ #include <linux/nsproxy.h>
+ #include <linux/ipc_namespace.h>
++#include <linux/vs_base.h>
++#include <linux/vs_limit.h>
+ 
+ #include <asm/uaccess.h>
+ #include "util.h"
+@@ -306,6 +308,7 @@ static int newary(struct ipc_namespace *
+ 
+ 	sma->sem_perm.mode = (semflg & S_IRWXUGO);
+ 	sma->sem_perm.key = key;
++	sma->sem_perm.xid = vx_current_xid();
+ 
+ 	sma->sem_perm.security = NULL;
+ 	retval = security_sem_alloc(sma);
+@@ -321,6 +324,9 @@ static int newary(struct ipc_namespace *
+ 		return id;
+ 	}
+ 	ns->used_sems += nsems;
++	/* FIXME: obsoleted? */
++	vx_semary_inc(sma);
++	vx_nsems_add(sma, nsems);
+ 
+ 	sma->sem_base = (struct sem *) &sma[1];
+ 
+@@ -770,6 +776,9 @@ static void freeary(struct ipc_namespace
+ 
+ 	wake_up_sem_queue_do(&tasks);
+ 	ns->used_sems -= sma->sem_nsems;
++	/* FIXME: obsoleted? */
++	vx_nsems_sub(sma, sma->sem_nsems);
++	vx_semary_dec(sma);
+ 	security_sem_free(sma);
+ 	ipc_rcu_putref(sma);
+ }
+diff -NurpP --minimal linux-3.2.34/ipc/shm.c linux-3.2.34-vs2.3.2.15/ipc/shm.c
+--- linux-3.2.34/ipc/shm.c	2012-11-18 18:42:23.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/ipc/shm.c	2012-01-26 08:52:10.000000000 +0100
+@@ -39,6 +39,8 @@
+ #include <linux/nsproxy.h>
+ #include <linux/mount.h>
+ #include <linux/ipc_namespace.h>
++#include <linux/vs_context.h>
++#include <linux/vs_limit.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -187,7 +189,12 @@ static void shm_open(struct vm_area_stru
+  */
+ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
+ {
+-	ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid);
++	int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
++
++	vx_ipcshm_sub(vxi, shp, numpages);
++	ns->shm_tot -= numpages;
++
+ 	shm_rmid(ns, shp);
+ 	shm_unlock(shp);
+ 	if (!is_file_hugepages(shp->shm_file))
+@@ -197,6 +204,7 @@ static void shm_destroy(struct ipc_names
+ 						shp->mlock_user);
+ 	fput (shp->shm_file);
+ 	security_shm_free(shp);
++	put_vx_info(vxi);
+ 	ipc_rcu_putref(shp);
+ }
+ 
+@@ -462,11 +470,15 @@ static int newseg(struct ipc_namespace *
+ 	if (ns->shm_tot + numpages > ns->shm_ctlall)
+ 		return -ENOSPC;
+ 
++	if (!vx_ipcshm_avail(current_vx_info(), numpages))
++		return -ENOSPC;
++
+ 	shp = ipc_rcu_alloc(sizeof(*shp));
+ 	if (!shp)
+ 		return -ENOMEM;
+ 
+ 	shp->shm_perm.key = key;
++	shp->shm_perm.xid = vx_current_xid();
+ 	shp->shm_perm.mode = (shmflg & S_IRWXUGO);
+ 	shp->mlock_user = NULL;
+ 
+@@ -521,6 +533,7 @@ static int newseg(struct ipc_namespace *
+ 	ns->shm_tot += numpages;
+ 	error = shp->shm_perm.id;
+ 	shm_unlock(shp);
++	vx_ipcshm_add(current_vx_info(), key, numpages);
+ 	return error;
+ 
+ no_id:
+diff -NurpP --minimal linux-3.2.34/kernel/Makefile linux-3.2.34-vs2.3.2.15/kernel/Makefile
+--- linux-3.2.34/kernel/Makefile	2012-01-09 16:14:59.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/Makefile	2011-12-05 19:33:02.000000000 +0100
+@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
+ CFLAGS_REMOVE_irq_work.o = -pg
+ endif
+ 
++obj-y += vserver/
+ obj-$(CONFIG_FREEZER) += freezer.o
+ obj-$(CONFIG_PROFILING) += profile.o
+ obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
+diff -NurpP --minimal linux-3.2.34/kernel/capability.c linux-3.2.34-vs2.3.2.15/kernel/capability.c
+--- linux-3.2.34/kernel/capability.c	2012-01-09 16:14:59.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/capability.c	2011-12-05 19:33:02.000000000 +0100
+@@ -15,6 +15,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/pid_namespace.h>
+ #include <linux/user_namespace.h>
++#include <linux/vs_context.h>
+ #include <asm/uaccess.h>
+ 
+ /*
+@@ -116,6 +117,7 @@ static int cap_validate_magic(cap_user_h
+ 	return 0;
+ }
+ 
++
+ /*
+  * The only thing that can change the capabilities of the current
+  * process is the current process. As such, we can't be in this code
+@@ -340,6 +342,8 @@ bool has_capability_noaudit(struct task_
+ 	return (ret == 0);
+ }
+ 
++#include <linux/vserver/base.h>
++
+ /**
+  * capable - Determine if the current task has a superior capability in effect
+  * @cap: The capability to be tested for
+diff -NurpP --minimal linux-3.2.34/kernel/compat.c linux-3.2.34-vs2.3.2.15/kernel/compat.c
+--- linux-3.2.34/kernel/compat.c	2012-11-18 18:42:23.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/compat.c	2012-06-14 20:45:24.000000000 +0200
+@@ -1002,7 +1002,7 @@ asmlinkage long compat_sys_stime(compat_
+ 	if (err)
+ 		return err;
+ 
+-	do_settimeofday(&tv);
++	vx_settimeofday(&tv);
+ 	return 0;
+ }
+ 
+diff -NurpP --minimal linux-3.2.34/kernel/cred.c linux-3.2.34-vs2.3.2.15/kernel/cred.c
+--- linux-3.2.34/kernel/cred.c	2012-11-18 18:42:23.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/cred.c	2012-04-16 12:14:54.000000000 +0200
+@@ -61,31 +61,6 @@ struct cred init_cred = {
+ #endif
+ };
+ 
+-static inline void set_cred_subscribers(struct cred *cred, int n)
+-{
+-#ifdef CONFIG_DEBUG_CREDENTIALS
+-	atomic_set(&cred->subscribers, n);
+-#endif
+-}
+-
+-static inline int read_cred_subscribers(const struct cred *cred)
+-{
+-#ifdef CONFIG_DEBUG_CREDENTIALS
+-	return atomic_read(&cred->subscribers);
+-#else
+-	return 0;
+-#endif
+-}
+-
+-static inline void alter_cred_subscribers(const struct cred *_cred, int n)
+-{
+-#ifdef CONFIG_DEBUG_CREDENTIALS
+-	struct cred *cred = (struct cred *) _cred;
+-
+-	atomic_add(n, &cred->subscribers);
+-#endif
+-}
+-
+ /*
+  * Dispose of the shared task group credentials
+  */
+@@ -281,21 +256,16 @@ error:
+  *
+  * Call commit_creds() or abort_creds() to clean up.
+  */
+-struct cred *prepare_creds(void)
++struct cred *__prepare_creds(const struct cred *old)
+ {
+-	struct task_struct *task = current;
+-	const struct cred *old;
+ 	struct cred *new;
+ 
+-	validate_process_creds();
+-
+ 	new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
+ 	if (!new)
+ 		return NULL;
+ 
+ 	kdebug("prepare_creds() alloc %p", new);
+ 
+-	old = task->cred;
+ 	memcpy(new, old, sizeof(struct cred));
+ 
+ 	atomic_set(&new->usage, 1);
+@@ -322,6 +292,13 @@ error:
+ 	abort_creds(new);
+ 	return NULL;
+ }
++
++struct cred *prepare_creds(void)
++{
++	validate_process_creds();
++
++	return __prepare_creds(current->cred);
++}
+ EXPORT_SYMBOL(prepare_creds);
+ 
+ /*
+diff -NurpP --minimal linux-3.2.34/kernel/exit.c linux-3.2.34-vs2.3.2.15/kernel/exit.c
+--- linux-3.2.34/kernel/exit.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/exit.c	2012-10-22 12:59:52.000000000 +0200
+@@ -48,6 +48,10 @@
+ #include <linux/fs_struct.h>
+ #include <linux/init_task.h>
+ #include <linux/perf_event.h>
++#include <linux/vs_limit.h>
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
++#include <linux/vs_pid.h>
+ #include <trace/events/sched.h>
+ #include <linux/hw_breakpoint.h>
+ #include <linux/oom.h>
+@@ -480,9 +484,11 @@ static void close_files(struct files_str
+ 					filp_close(file, files);
+ 					cond_resched();
+ 				}
++				vx_openfd_dec(i);
+ 			}
+ 			i++;
+ 			set >>= 1;
++			cond_resched();
+ 		}
+ 	}
+ }
+@@ -987,6 +993,9 @@ NORET_TYPE void do_exit(long code)
+ 	 */
+ 	ptrace_put_breakpoints(tsk);
+ 
++	/* needs to stay before exit_notify() */
++	exit_vx_info_early(tsk, code);
++
+ 	exit_notify(tsk, group_dead);
+ #ifdef CONFIG_NUMA
+ 	task_lock(tsk);
+@@ -1017,6 +1026,10 @@ NORET_TYPE void do_exit(long code)
+ 
+ 	validate_creds_for_do_exit(tsk);
+ 
++	/* needs to stay after exit_notify() */
++	exit_vx_info(tsk, code);
++	exit_nx_info(tsk);
++
+ 	preempt_disable();
+ 	exit_rcu();
+ 
+@@ -1038,6 +1051,7 @@ NORET_TYPE void do_exit(long code)
+ 	/* causes final put_task_struct in finish_task_switch(). */
+ 	tsk->state = TASK_DEAD;
+ 	schedule();
++	printk("bad task: %p [%lx]\n", current, current->state);
+ 	BUG();
+ 	/* Avoid "noreturn function does return".  */
+ 	for (;;)
+diff -NurpP --minimal linux-3.2.34/kernel/fork.c linux-3.2.34-vs2.3.2.15/kernel/fork.c
+--- linux-3.2.34/kernel/fork.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/fork.c	2012-08-13 12:40:51.000000000 +0200
+@@ -68,6 +68,9 @@
+ #include <linux/oom.h>
+ #include <linux/khugepaged.h>
+ #include <linux/signalfd.h>
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
++#include <linux/vs_limit.h>
+ 
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+@@ -167,6 +170,8 @@ void free_task(struct task_struct *tsk)
+ 	account_kernel_stack(tsk->stack, -1);
+ 	free_thread_info(tsk->stack);
+ 	rt_mutex_debug_task_free(tsk);
++	clr_vx_info(&tsk->vx_info);
++	clr_nx_info(&tsk->nx_info);
+ 	ftrace_graph_exit_task(tsk);
+ 	free_task_struct(tsk);
+ }
+@@ -503,6 +508,7 @@ static struct mm_struct *mm_init(struct 
+ 	if (likely(!mm_alloc_pgd(mm))) {
+ 		mm->def_flags = 0;
+ 		mmu_notifier_mm_init(mm);
++		set_vx_info(&mm->mm_vx_info, p->vx_info);
+ 		return mm;
+ 	}
+ 
+@@ -540,6 +546,7 @@ void __mmdrop(struct mm_struct *mm)
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ 	VM_BUG_ON(mm->pmd_huge_pte);
+ #endif
++	clr_vx_info(&mm->mm_vx_info);
+ 	free_mm(mm);
+ }
+ EXPORT_SYMBOL_GPL(__mmdrop);
+@@ -727,6 +734,7 @@ struct mm_struct *dup_mm(struct task_str
+ 		goto fail_nomem;
+ 
+ 	memcpy(mm, oldmm, sizeof(*mm));
++	mm->mm_vx_info = NULL;
+ 	mm_init_cpumask(mm);
+ 
+ 	/* Initializing for Swap token stuff */
+@@ -770,6 +778,7 @@ fail_nocontext:
+ 	 * If init_new_context() failed, we cannot use mmput() to free the mm
+ 	 * because it calls destroy_context()
+ 	 */
++	clr_vx_info(&mm->mm_vx_info);
+ 	mm_free_pgd(mm);
+ 	free_mm(mm);
+ 	return NULL;
+@@ -1058,6 +1067,8 @@ static struct task_struct *copy_process(
+ 	int retval;
+ 	struct task_struct *p;
+ 	int cgroup_callbacks_done = 0;
++	struct vx_info *vxi;
++	struct nx_info *nxi;
+ 
+ 	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
+ 		return ERR_PTR(-EINVAL);
+@@ -1104,7 +1115,12 @@ static struct task_struct *copy_process(
+ 	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
+ 	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
+ #endif
++	init_vx_info(&p->vx_info, current_vx_info());
++	init_nx_info(&p->nx_info, current_nx_info());
++
+ 	retval = -EAGAIN;
++	if (!vx_nproc_avail(1))
++		goto bad_fork_free;
+ 	if (atomic_read(&p->real_cred->user->processes) >=
+ 			task_rlimit(p, RLIMIT_NPROC)) {
+ 		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
+@@ -1377,6 +1393,18 @@ static struct task_struct *copy_process(
+ 
+ 	total_forks++;
+ 	spin_unlock(&current->sighand->siglock);
++
++	/* p is copy of current */
++	vxi = p->vx_info;
++	if (vxi) {
++		claim_vx_info(vxi, p);
++		atomic_inc(&vxi->cvirt.nr_threads);
++		atomic_inc(&vxi->cvirt.total_forks);
++		vx_nproc_inc(p);
++	}
++	nxi = p->nx_info;
++	if (nxi)
++		claim_nx_info(nxi, p);
+ 	write_unlock_irq(&tasklist_lock);
+ 	proc_fork_connector(p);
+ 	cgroup_post_fork(p);
+diff -NurpP --minimal linux-3.2.34/kernel/kthread.c linux-3.2.34-vs2.3.2.15/kernel/kthread.c
+--- linux-3.2.34/kernel/kthread.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/kthread.c	2011-12-05 19:33:02.000000000 +0100
+@@ -16,6 +16,7 @@
+ #include <linux/mutex.h>
+ #include <linux/slab.h>
+ #include <linux/freezer.h>
++#include <linux/vs_pid.h>
+ #include <trace/events/sched.h>
+ 
+ static DEFINE_SPINLOCK(kthread_create_lock);
+diff -NurpP --minimal linux-3.2.34/kernel/nsproxy.c linux-3.2.34-vs2.3.2.15/kernel/nsproxy.c
+--- linux-3.2.34/kernel/nsproxy.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/nsproxy.c	2011-12-05 19:33:02.000000000 +0100
+@@ -20,6 +20,8 @@
+ #include <linux/mnt_namespace.h>
+ #include <linux/utsname.h>
+ #include <linux/pid_namespace.h>
++#include <linux/vserver/global.h>
++#include <linux/vserver/debug.h>
+ #include <net/net_namespace.h>
+ #include <linux/ipc_namespace.h>
+ #include <linux/proc_fs.h>
+@@ -46,8 +48,11 @@ static inline struct nsproxy *create_nsp
+ 	struct nsproxy *nsproxy;
+ 
+ 	nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
+-	if (nsproxy)
++	if (nsproxy) {
+ 		atomic_set(&nsproxy->count, 1);
++		atomic_inc(&vs_global_nsproxy);
++	}
++	vxdprintk(VXD_CBIT(space, 2), "create_nsproxy = %p[1]", nsproxy);
+ 	return nsproxy;
+ }
+ 
+@@ -56,8 +61,11 @@ static inline struct nsproxy *create_nsp
+  * Return the newly created nsproxy.  Do not attach this to the task,
+  * leave it to the caller to do proper locking and attach it to task.
+  */
+-static struct nsproxy *create_new_namespaces(unsigned long flags,
+-			struct task_struct *tsk, struct fs_struct *new_fs)
++static struct nsproxy *unshare_namespaces(unsigned long flags,
++			struct nsproxy *orig,
++			struct fs_struct *new_fs,
++			struct user_namespace *new_user,
++			struct pid_namespace *new_pid)
+ {
+ 	struct nsproxy *new_nsp;
+ 	int err;
+@@ -66,31 +74,31 @@ static struct nsproxy *create_new_namesp
+ 	if (!new_nsp)
+ 		return ERR_PTR(-ENOMEM);
+ 
+-	new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
++	new_nsp->mnt_ns = copy_mnt_ns(flags, orig->mnt_ns, new_fs);
+ 	if (IS_ERR(new_nsp->mnt_ns)) {
+ 		err = PTR_ERR(new_nsp->mnt_ns);
+ 		goto out_ns;
+ 	}
+ 
+-	new_nsp->uts_ns = copy_utsname(flags, tsk);
++	new_nsp->uts_ns = copy_utsname(flags, orig->uts_ns, new_user);
+ 	if (IS_ERR(new_nsp->uts_ns)) {
+ 		err = PTR_ERR(new_nsp->uts_ns);
+ 		goto out_uts;
+ 	}
+ 
+-	new_nsp->ipc_ns = copy_ipcs(flags, tsk);
++	new_nsp->ipc_ns = copy_ipcs(flags, orig->ipc_ns, new_user);
+ 	if (IS_ERR(new_nsp->ipc_ns)) {
+ 		err = PTR_ERR(new_nsp->ipc_ns);
+ 		goto out_ipc;
+ 	}
+ 
+-	new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
++	new_nsp->pid_ns = copy_pid_ns(flags, new_pid);
+ 	if (IS_ERR(new_nsp->pid_ns)) {
+ 		err = PTR_ERR(new_nsp->pid_ns);
+ 		goto out_pid;
+ 	}
+ 
+-	new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns);
++	new_nsp->net_ns = copy_net_ns(flags, orig->net_ns);
+ 	if (IS_ERR(new_nsp->net_ns)) {
+ 		err = PTR_ERR(new_nsp->net_ns);
+ 		goto out_net;
+@@ -115,6 +123,40 @@ out_ns:
+ 	return ERR_PTR(err);
+ }
+ 
++static struct nsproxy *create_new_namespaces(unsigned long flags,
++			struct task_struct *tsk, struct fs_struct *new_fs)
++{
++	return unshare_namespaces(flags, tsk->nsproxy,
++		new_fs, task_cred_xxx(tsk, user)->user_ns,
++		task_active_pid_ns(tsk));
++}
++
++/*
++ * copies the nsproxy, setting refcount to 1, and grabbing a
++ * reference to all contained namespaces.
++ */
++struct nsproxy *copy_nsproxy(struct nsproxy *orig)
++{
++	struct nsproxy *ns = create_nsproxy();
++
++	if (ns) {
++		memcpy(ns, orig, sizeof(struct nsproxy));
++		atomic_set(&ns->count, 1);
++
++		if (ns->mnt_ns)
++			get_mnt_ns(ns->mnt_ns);
++		if (ns->uts_ns)
++			get_uts_ns(ns->uts_ns);
++		if (ns->ipc_ns)
++			get_ipc_ns(ns->ipc_ns);
++		if (ns->pid_ns)
++			get_pid_ns(ns->pid_ns);
++		if (ns->net_ns)
++			get_net(ns->net_ns);
++	}
++	return ns;
++}
++
+ /*
+  * called from clone.  This now handles copy for nsproxy and all
+  * namespaces therein.
+@@ -122,9 +164,12 @@ out_ns:
+ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
+ {
+ 	struct nsproxy *old_ns = tsk->nsproxy;
+-	struct nsproxy *new_ns;
++	struct nsproxy *new_ns = NULL;
+ 	int err = 0;
+ 
++	vxdprintk(VXD_CBIT(space, 7), "copy_namespaces(0x%08lx,%p[%p])",
++		flags, tsk, old_ns);
++
+ 	if (!old_ns)
+ 		return 0;
+ 
+@@ -134,7 +179,7 @@ int copy_namespaces(unsigned long flags,
+ 				CLONE_NEWPID | CLONE_NEWNET)))
+ 		return 0;
+ 
+-	if (!capable(CAP_SYS_ADMIN)) {
++	if (!vx_can_unshare(CAP_SYS_ADMIN, flags)) {
+ 		err = -EPERM;
+ 		goto out;
+ 	}
+@@ -161,6 +206,9 @@ int copy_namespaces(unsigned long flags,
+ 
+ out:
+ 	put_nsproxy(old_ns);
++	vxdprintk(VXD_CBIT(space, 3),
++		"copy_namespaces(0x%08lx,%p[%p]) = %d [%p]",
++		flags, tsk, old_ns, err, new_ns);
+ 	return err;
+ }
+ 
+@@ -174,7 +222,9 @@ void free_nsproxy(struct nsproxy *ns)
+ 		put_ipc_ns(ns->ipc_ns);
+ 	if (ns->pid_ns)
+ 		put_pid_ns(ns->pid_ns);
+-	put_net(ns->net_ns);
++	if (ns->net_ns)
++		put_net(ns->net_ns);
++	atomic_dec(&vs_global_nsproxy);
+ 	kmem_cache_free(nsproxy_cachep, ns);
+ }
+ 
+@@ -187,11 +237,15 @@ int unshare_nsproxy_namespaces(unsigned 
+ {
+ 	int err = 0;
+ 
++	vxdprintk(VXD_CBIT(space, 4),
++		"unshare_nsproxy_namespaces(0x%08lx,[%p])",
++		unshare_flags, current->nsproxy);
++
+ 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+ 			       CLONE_NEWNET)))
+ 		return 0;
+ 
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!vx_can_unshare(CAP_SYS_ADMIN, unshare_flags))
+ 		return -EPERM;
+ 
+ 	*new_nsp = create_new_namespaces(unshare_flags, current,
+diff -NurpP --minimal linux-3.2.34/kernel/pid.c linux-3.2.34-vs2.3.2.15/kernel/pid.c
+--- linux-3.2.34/kernel/pid.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/pid.c	2011-12-05 19:43:14.000000000 +0100
+@@ -36,6 +36,7 @@
+ #include <linux/pid_namespace.h>
+ #include <linux/init_task.h>
+ #include <linux/syscalls.h>
++#include <linux/vs_pid.h>
+ 
+ #define pid_hashfn(nr, ns)	\
+ 	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
+@@ -342,7 +343,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns);
+ 
+ struct pid *find_vpid(int nr)
+ {
+-	return find_pid_ns(nr, current->nsproxy->pid_ns);
++	return find_pid_ns(vx_rmap_pid(nr), current->nsproxy->pid_ns);
+ }
+ EXPORT_SYMBOL_GPL(find_vpid);
+ 
+@@ -402,6 +403,9 @@ void transfer_pid(struct task_struct *ol
+ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
+ {
+ 	struct task_struct *result = NULL;
++
++	if (type == PIDTYPE_REALPID)
++		type = PIDTYPE_PID;
+ 	if (pid) {
+ 		struct hlist_node *first;
+ 		first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
+@@ -421,7 +425,7 @@ struct task_struct *find_task_by_pid_ns(
+ 	rcu_lockdep_assert(rcu_read_lock_held(),
+ 			   "find_task_by_pid_ns() needs rcu_read_lock()"
+ 			   " protection");
+-	return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
++	return pid_task(find_pid_ns(vx_rmap_pid(nr), ns), PIDTYPE_PID);
+ }
+ 
+ struct task_struct *find_task_by_vpid(pid_t vnr)
+@@ -465,7 +469,7 @@ struct pid *find_get_pid(pid_t nr)
+ }
+ EXPORT_SYMBOL_GPL(find_get_pid);
+ 
+-pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
++pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns)
+ {
+ 	struct upid *upid;
+ 	pid_t nr = 0;
+@@ -478,6 +482,11 @@ pid_t pid_nr_ns(struct pid *pid, struct 
+ 	return nr;
+ }
+ 
++pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
++{
++	return vx_map_pid(pid_unmapped_nr_ns(pid, ns));
++}
++
+ pid_t pid_vnr(struct pid *pid)
+ {
+ 	return pid_nr_ns(pid, current->nsproxy->pid_ns);
+diff -NurpP --minimal linux-3.2.34/kernel/pid_namespace.c linux-3.2.34-vs2.3.2.15/kernel/pid_namespace.c
+--- linux-3.2.34/kernel/pid_namespace.c	2011-05-22 16:17:59.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/kernel/pid_namespace.c	2011-12-05 19:33:02.000000000 +0100
+@@ -15,6 +15,7 @@
+ #include <linux/acct.h>
+ #include <linux/slab.h>
+ #include <linux/proc_fs.h>
++#include <linux/vserver/global.h>
+ 
+ #define BITS_PER_PAGE		(PAGE_SIZE*8)
+ 
+@@ -88,6 +89,7 @@ static struct pid_namespace *create_pid_
+ 		goto out_free_map;
+ 
+ 	kref_init(&ns->kref);
++	atomic_inc(&vs_global_pid_ns);
+ 	ns->level = level;
+ 	ns->parent = get_pid_ns(parent_pid_ns);
+ 
+@@ -119,6 +121,7 @@ static void destroy_pid_namespace(struct
+ 
+ 	for (i = 0; i < PIDMAP_ENTRIES; i++)
+ 		kfree(ns->pidmap[i].page);
++	atomic_dec(&vs_global_pid_ns);
+ 	kmem_cache_free(pid_ns_cachep, ns);
+ }
+ 
+diff -NurpP --minimal linux-3.2.34/kernel/posix-timers.c linux-3.2.34-vs2.3.2.15/kernel/posix-timers.c
+--- linux-3.2.34/kernel/posix-timers.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/posix-timers.c	2011-12-05 19:44:00.000000000 +0100
+@@ -47,6 +47,7 @@
+ #include <linux/wait.h>
+ #include <linux/workqueue.h>
+ #include <linux/export.h>
++#include <linux/vs_context.h>
+ 
+ /*
+  * Management arrays for POSIX timers.	 Timers are kept in slab memory
+@@ -340,6 +341,7 @@ int posix_timer_event(struct k_itimer *t
+ {
+ 	struct task_struct *task;
+ 	int shared, ret = -1;
++
+ 	/*
+ 	 * FIXME: if ->sigq is queued we can race with
+ 	 * dequeue_signal()->do_schedule_next_timer().
+@@ -356,10 +358,18 @@ int posix_timer_event(struct k_itimer *t
+ 	rcu_read_lock();
+ 	task = pid_task(timr->it_pid, PIDTYPE_PID);
+ 	if (task) {
++		struct vx_info_save vxis;
++		struct vx_info *vxi;
++
++		vxi = get_vx_info(task->vx_info);
++		enter_vx_info(vxi, &vxis);
+ 		shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
+ 		ret = send_sigqueue(timr->sigq, task, shared);
++		leave_vx_info(&vxis);
++		put_vx_info(vxi);
+ 	}
+ 	rcu_read_unlock();
++
+ 	/* If we failed to send the signal the timer stops. */
+ 	return ret > 0;
+ }
+diff -NurpP --minimal linux-3.2.34/kernel/printk.c linux-3.2.34-vs2.3.2.15/kernel/printk.c
+--- linux-3.2.34/kernel/printk.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/printk.c	2011-12-15 01:11:33.000000000 +0100
+@@ -41,6 +41,7 @@
+ #include <linux/cpu.h>
+ #include <linux/notifier.h>
+ #include <linux/rculist.h>
++#include <linux/vs_cvirt.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -314,7 +315,7 @@ static int check_syslog_permissions(int 
+ 		return 0;
+ 
+ 	if (syslog_action_restricted(type)) {
+-		if (capable(CAP_SYSLOG))
++		if (vx_capable(CAP_SYSLOG, VXC_SYSLOG))
+ 			return 0;
+ 		/* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */
+ 		if (capable(CAP_SYS_ADMIN)) {
+@@ -344,12 +345,9 @@ int do_syslog(int type, char __user *buf
+ 	if (error)
+ 		return error;
+ 
+-	switch (type) {
+-	case SYSLOG_ACTION_CLOSE:	/* Close log */
+-		break;
+-	case SYSLOG_ACTION_OPEN:	/* Open log */
+-		break;
+-	case SYSLOG_ACTION_READ:	/* Read from log */
++	if ((type == SYSLOG_ACTION_READ) ||
++	    (type == SYSLOG_ACTION_READ_ALL) ||
++	    (type == SYSLOG_ACTION_READ_CLEAR)) {
+ 		error = -EINVAL;
+ 		if (!buf || len < 0)
+ 			goto out;
+@@ -360,6 +358,16 @@ int do_syslog(int type, char __user *buf
+ 			error = -EFAULT;
+ 			goto out;
+ 		}
++	}
++	if (!vx_check(0, VS_ADMIN|VS_WATCH))
++		return vx_do_syslog(type, buf, len);
++
++	switch (type) {
++	case SYSLOG_ACTION_CLOSE:	/* Close log */
++		break;
++	case SYSLOG_ACTION_OPEN:	/* Open log */
++		break;
++	case SYSLOG_ACTION_READ:	/* Read from log */
+ 		error = wait_event_interruptible(log_wait,
+ 							(log_start - log_end));
+ 		if (error)
+@@ -386,16 +394,6 @@ int do_syslog(int type, char __user *buf
+ 		/* FALL THRU */
+ 	/* Read last kernel messages */
+ 	case SYSLOG_ACTION_READ_ALL:
+-		error = -EINVAL;
+-		if (!buf || len < 0)
+-			goto out;
+-		error = 0;
+-		if (!len)
+-			goto out;
+-		if (!access_ok(VERIFY_WRITE, buf, len)) {
+-			error = -EFAULT;
+-			goto out;
+-		}
+ 		count = len;
+ 		if (count > log_buf_len)
+ 			count = log_buf_len;
+diff -NurpP --minimal linux-3.2.34/kernel/ptrace.c linux-3.2.34-vs2.3.2.15/kernel/ptrace.c
+--- linux-3.2.34/kernel/ptrace.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/ptrace.c	2012-01-09 16:19:31.000000000 +0100
+@@ -22,6 +22,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/uaccess.h>
+ #include <linux/regset.h>
++#include <linux/vs_context.h>
+ #include <linux/hw_breakpoint.h>
+ #include <linux/cn_proc.h>
+ 
+@@ -209,6 +210,11 @@ ok:
+ 		dumpable = get_dumpable(task->mm);
+ 	if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE))
+ 		return -EPERM;
++	if (!vx_check(task->xid, VS_ADMIN_P|VS_WATCH_P|VS_IDENT))
++		return -EPERM;
++	if (!vx_check(task->xid, VS_IDENT) &&
++		!task_vx_flags(task, VXF_STATE_ADMIN, 0))
++		return -EACCES;
+ 
+ 	return security_ptrace_access_check(task, mode);
+ }
+diff -NurpP --minimal linux-3.2.34/kernel/sched.c linux-3.2.34-vs2.3.2.15/kernel/sched.c
+--- linux-3.2.34/kernel/sched.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/sched.c	2012-10-22 12:59:52.000000000 +0200
+@@ -72,6 +72,8 @@
+ #include <linux/ftrace.h>
+ #include <linux/slab.h>
+ #include <linux/init_task.h>
++#include <linux/vs_sched.h>
++#include <linux/vs_cvirt.h>
+ 
+ #include <asm/tlb.h>
+ #include <asm/irq_regs.h>
+@@ -3460,9 +3462,17 @@ EXPORT_SYMBOL(avenrun); /* should be rem
+  */
+ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
+ {
+-	loads[0] = (avenrun[0] + offset) << shift;
+-	loads[1] = (avenrun[1] + offset) << shift;
+-	loads[2] = (avenrun[2] + offset) << shift;
++	if (vx_flags(VXF_VIRT_LOAD, 0)) {
++		struct vx_info *vxi = current_vx_info();
++
++		loads[0] = (vxi->cvirt.load[0] + offset) << shift;
++		loads[1] = (vxi->cvirt.load[1] + offset) << shift;
++		loads[2] = (vxi->cvirt.load[2] + offset) << shift;
++	} else {
++		loads[0] = (avenrun[0] + offset) << shift;
++		loads[1] = (avenrun[1] + offset) << shift;
++		loads[2] = (avenrun[2] + offset) << shift;
++	}
+ }
+ 
+ static long calc_load_fold_active(struct rq *this_rq)
+@@ -4054,16 +4064,19 @@ void account_user_time(struct task_struc
+ 		       cputime_t cputime_scaled)
+ {
+ 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
++	struct vx_info *vxi = p->vx_info;  /* p is _always_ current */
+ 	cputime64_t tmp;
++	int nice = (TASK_NICE(p) > 0);
+ 
+ 	/* Add user time to process. */
+ 	p->utime = cputime_add(p->utime, cputime);
+ 	p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
++	vx_account_user(vxi, cputime, nice);
+ 	account_group_user_time(p, cputime);
+ 
+ 	/* Add user time to cpustat. */
+ 	tmp = cputime_to_cputime64(cputime);
+-	if (TASK_NICE(p) > 0)
++	if (nice)
+ 		cpustat->nice = cputime64_add(cpustat->nice, tmp);
+ 	else
+ 		cpustat->user = cputime64_add(cpustat->user, tmp);
+@@ -4115,10 +4128,12 @@ void __account_system_time(struct task_s
+ 			cputime_t cputime_scaled, cputime64_t *target_cputime64)
+ {
+ 	cputime64_t tmp = cputime_to_cputime64(cputime);
++	struct vx_info *vxi = p->vx_info;  /* p is _always_ current */
+ 
+ 	/* Add system time to process. */
+ 	p->stime = cputime_add(p->stime, cputime);
+ 	p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
++	vx_account_system(vxi, cputime, 0 /* do we have idle time? */);
+ 	account_group_system_time(p, cputime);
+ 
+ 	/* Add system time to cpustat. */
+@@ -5324,7 +5339,7 @@ SYSCALL_DEFINE1(nice, int, increment)
+ 		nice = 19;
+ 
+ 	if (increment < 0 && !can_nice(current, nice))
+-		return -EPERM;
++		return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM;
+ 
+ 	retval = security_task_setnice(current, nice);
+ 	if (retval)
+diff -NurpP --minimal linux-3.2.34/kernel/sched_fair.c linux-3.2.34-vs2.3.2.15/kernel/sched_fair.c
+--- linux-3.2.34/kernel/sched_fair.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/sched_fair.c	2012-08-13 12:40:51.000000000 +0200
+@@ -1014,6 +1014,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
+ 		__enqueue_entity(cfs_rq, se);
+ 	se->on_rq = 1;
+ 
++	if (entity_is_task(se))
++		vx_activate_task(task_of(se));
+ 	if (cfs_rq->nr_running == 1) {
+ 		list_add_leaf_cfs_rq(cfs_rq);
+ 		check_enqueue_throttle(cfs_rq);
+@@ -1094,6 +1096,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
+ 	if (se != cfs_rq->curr)
+ 		__dequeue_entity(cfs_rq, se);
+ 	se->on_rq = 0;
++	if (entity_is_task(se))
++		vx_deactivate_task(task_of(se));
+ 	update_cfs_load(cfs_rq, 0);
+ 	account_entity_dequeue(cfs_rq, se);
+ 
+diff -NurpP --minimal linux-3.2.34/kernel/signal.c linux-3.2.34-vs2.3.2.15/kernel/signal.c
+--- linux-3.2.34/kernel/signal.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/signal.c	2012-05-15 18:16:52.000000000 +0200
+@@ -28,6 +28,8 @@
+ #include <linux/freezer.h>
+ #include <linux/pid_namespace.h>
+ #include <linux/nsproxy.h>
++#include <linux/vs_context.h>
++#include <linux/vs_pid.h>
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/signal.h>
+ 
+@@ -789,9 +791,18 @@ static int check_kill_permission(int sig
+ 	struct pid *sid;
+ 	int error;
+ 
++	vxdprintk(VXD_CBIT(misc, 7),
++		"check_kill_permission(%d,%p,%p[#%u,%u])",
++		sig, info, t, vx_task_xid(t), t->pid);
++
+ 	if (!valid_signal(sig))
+ 		return -EINVAL;
+ 
++/*	FIXME: needed? if so, why?
++	if ((info != SEND_SIG_NOINFO) &&
++		(is_si_special(info) || !si_fromuser(info)))
++		goto skip;	*/
++
+ 	if (!si_fromuser(info))
+ 		return 0;
+ 
+@@ -815,6 +826,20 @@ static int check_kill_permission(int sig
+ 		}
+ 	}
+ 
++	error = -EPERM;
++	if (t->pid == 1 && current->xid)
++		return error;
++
++	error = -ESRCH;
++	/* FIXME: we shouldn't return ESRCH ever, to avoid
++		  loops, maybe ENOENT or EACCES? */
++	if (!vx_check(vx_task_xid(t), VS_WATCH_P | VS_IDENT)) {
++		vxdprintk(current->xid || VXD_CBIT(misc, 7),
++			"signal %d[%p] xid mismatch %p[#%u,%u] xid=#%u",
++			sig, info, t, vx_task_xid(t), t->pid, current->xid);
++		return error;
++	}
++/* skip: */
+ 	return security_task_kill(t, info, sig, 0);
+ }
+ 
+@@ -1319,7 +1344,7 @@ int kill_pid_info(int sig, struct siginf
+ 	rcu_read_lock();
+ retry:
+ 	p = pid_task(pid, PIDTYPE_PID);
+-	if (p) {
++	if (p && vx_check(vx_task_xid(p), VS_IDENT)) {
+ 		error = group_send_sig_info(sig, info, p);
+ 		if (unlikely(error == -ESRCH))
+ 			/*
+@@ -1369,7 +1394,7 @@ int kill_pid_info_as_cred(int sig, struc
+ 
+ 	rcu_read_lock();
+ 	p = pid_task(pid, PIDTYPE_PID);
+-	if (!p) {
++	if (!p || !vx_check(vx_task_xid(p), VS_IDENT)) {
+ 		ret = -ESRCH;
+ 		goto out_unlock;
+ 	}
+@@ -1421,8 +1446,10 @@ static int kill_something_info(int sig, 
+ 		struct task_struct * p;
+ 
+ 		for_each_process(p) {
+-			if (task_pid_vnr(p) > 1 &&
+-					!same_thread_group(p, current)) {
++			if (vx_check(vx_task_xid(p), VS_ADMIN|VS_IDENT) &&
++				task_pid_vnr(p) > 1 &&
++				!same_thread_group(p, current) &&
++				!vx_current_initpid(p->pid)) {
+ 				int err = group_send_sig_info(sig, info, p);
+ 				++count;
+ 				if (err != -EPERM)
+@@ -2264,6 +2291,11 @@ relock:
+ 				!sig_kernel_only(signr))
+ 			continue;
+ 
++		/* virtual init is protected against user signals */
++		if ((info->si_code == SI_USER) &&
++			vx_current_initpid(current->pid))
++			continue;
++
+ 		if (sig_kernel_stop(signr)) {
+ 			/*
+ 			 * The default action is to stop all threads in
+diff -NurpP --minimal linux-3.2.34/kernel/softirq.c linux-3.2.34-vs2.3.2.15/kernel/softirq.c
+--- linux-3.2.34/kernel/softirq.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/softirq.c	2011-12-05 19:33:02.000000000 +0100
+@@ -24,6 +24,7 @@
+ #include <linux/ftrace.h>
+ #include <linux/smp.h>
+ #include <linux/tick.h>
++#include <linux/vs_context.h>
+ 
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/irq.h>
+diff -NurpP --minimal linux-3.2.34/kernel/sys.c linux-3.2.34-vs2.3.2.15/kernel/sys.c
+--- linux-3.2.34/kernel/sys.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/sys.c	2012-11-06 18:08:24.000000000 +0100
+@@ -45,6 +45,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/kprobes.h>
+ #include <linux/user_namespace.h>
++#include <linux/vs_pid.h>
+ 
+ #include <linux/kmsg_dump.h>
+ /* Move somewhere else to avoid recompiling? */
+@@ -155,7 +156,10 @@ static int set_one_prio(struct task_stru
+ 		goto out;
+ 	}
+ 	if (niceval < task_nice(p) && !can_nice(p, niceval)) {
+-		error = -EACCES;
++		if (vx_flags(VXF_IGNEG_NICE, 0))
++			error = 0;
++		else
++			error = -EACCES;
+ 		goto out;
+ 	}
+ 	no_nice = security_task_setnice(p, niceval);
+@@ -205,6 +209,8 @@ SYSCALL_DEFINE3(setpriority, int, which,
+ 			else
+ 				pgrp = task_pgrp(current);
+ 			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
++				if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
++					continue;
+ 				error = set_one_prio(p, niceval, error);
+ 			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
+ 			break;
+@@ -268,6 +274,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
+ 			else
+ 				pgrp = task_pgrp(current);
+ 			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
++				if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
++					continue;
+ 				niceval = 20 - task_nice(p);
+ 				if (niceval > retval)
+ 					retval = niceval;
+@@ -419,6 +427,8 @@ EXPORT_SYMBOL_GPL(kernel_power_off);
+ 
+ static DEFINE_MUTEX(reboot_mutex);
+ 
++long vs_reboot(unsigned int, void __user *);
++
+ /*
+  * Reboot system call: for obvious reasons only root may call it,
+  * and even root needs to set up some magic numbers in the registers
+@@ -451,6 +461,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int
+ 	if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
+ 		cmd = LINUX_REBOOT_CMD_HALT;
+ 
++	if (!vx_check(0, VS_ADMIN|VS_WATCH))
++		return vs_reboot(cmd, arg);
++
+ 	mutex_lock(&reboot_mutex);
+ 	switch (cmd) {
+ 	case LINUX_REBOOT_CMD_RESTART:
+@@ -1276,7 +1289,8 @@ SYSCALL_DEFINE2(sethostname, char __user
+ 	int errno;
+ 	char tmp[__NEW_UTS_LEN];
+ 
+-	if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
++	if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
++		CAP_SYS_ADMIN, VXC_SET_UTSNAME))
+ 		return -EPERM;
+ 
+ 	if (len < 0 || len > __NEW_UTS_LEN)
+@@ -1327,7 +1341,8 @@ SYSCALL_DEFINE2(setdomainname, char __us
+ 	int errno;
+ 	char tmp[__NEW_UTS_LEN];
+ 
+-	if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
++	if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
++		CAP_SYS_ADMIN, VXC_SET_UTSNAME))
+ 		return -EPERM;
+ 	if (len < 0 || len > __NEW_UTS_LEN)
+ 		return -EINVAL;
+@@ -1446,7 +1461,7 @@ int do_prlimit(struct task_struct *tsk, 
+ 		/* Keep the capable check against init_user_ns until
+ 		   cgroups can contain all limits */
+ 		if (new_rlim->rlim_max > rlim->rlim_max &&
+-				!capable(CAP_SYS_RESOURCE))
++			!vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
+ 			retval = -EPERM;
+ 		if (!retval)
+ 			retval = security_task_setrlimit(tsk->group_leader,
+@@ -1500,7 +1515,8 @@ static int check_prlimit_permission(stru
+ 	     cred->gid == tcred->sgid &&
+ 	     cred->gid == tcred->gid))
+ 		return 0;
+-	if (ns_capable(tcred->user->user_ns, CAP_SYS_RESOURCE))
++	if (vx_ns_capable(tcred->user->user_ns,
++		CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
+ 		return 0;
+ 
+ 	return -EPERM;
+diff -NurpP --minimal linux-3.2.34/kernel/sysctl.c linux-3.2.34-vs2.3.2.15/kernel/sysctl.c
+--- linux-3.2.34/kernel/sysctl.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/sysctl.c	2012-04-16 12:14:54.000000000 +0200
+@@ -76,6 +76,7 @@
+ #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
+ #include <linux/lockdep.h>
+ #endif
++extern char vshelper_path[];
+ #ifdef CONFIG_CHR_DEV_SG
+ #include <scsi/sg.h>
+ #endif
+@@ -572,6 +573,13 @@ static struct ctl_table kern_table[] = {
+ 		.proc_handler	= proc_dostring,
+ 	},
+ #endif
++	{
++		.procname	= "vshelper",
++		.data		= &vshelper_path,
++		.maxlen		= 256,
++		.mode		= 0644,
++		.proc_handler	= &proc_dostring,
++	},
+ #ifdef CONFIG_CHR_DEV_SG
+ 	{
+ 		.procname	= "sg-big-buff",
+diff -NurpP --minimal linux-3.2.34/kernel/sysctl_binary.c linux-3.2.34-vs2.3.2.15/kernel/sysctl_binary.c
+--- linux-3.2.34/kernel/sysctl_binary.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/sysctl_binary.c	2012-01-09 16:19:31.000000000 +0100
+@@ -73,6 +73,7 @@ static const struct bin_table bin_kern_t
+ 
+ 	{ CTL_INT,	KERN_PANIC,			"panic" },
+ 	{ CTL_INT,	KERN_REALROOTDEV,		"real-root-dev" },
++	{ CTL_STR,	KERN_VSHELPER,			"vshelper" },
+ 
+ 	{ CTL_STR,	KERN_SPARC_REBOOT,		"reboot-cmd" },
+ 	{ CTL_INT,	KERN_CTLALTDEL,			"ctrl-alt-del" },
+diff -NurpP --minimal linux-3.2.34/kernel/time/timekeeping.c linux-3.2.34-vs2.3.2.15/kernel/time/timekeeping.c
+--- linux-3.2.34/kernel/time/timekeeping.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/time/timekeeping.c	2012-11-06 18:08:24.000000000 +0100
+@@ -253,6 +253,7 @@ void getnstimeofday(struct timespec *ts)
+ 	} while (read_seqretry(&xtime_lock, seq));
+ 
+ 	timespec_add_ns(ts, nsecs);
++	vx_adjust_timespec(ts);
+ }
+ 
+ EXPORT_SYMBOL(getnstimeofday);
+diff -NurpP --minimal linux-3.2.34/kernel/time.c linux-3.2.34-vs2.3.2.15/kernel/time.c
+--- linux-3.2.34/kernel/time.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/time.c	2011-12-05 19:33:02.000000000 +0100
+@@ -92,7 +92,7 @@ SYSCALL_DEFINE1(stime, time_t __user *, 
+ 	if (err)
+ 		return err;
+ 
+-	do_settimeofday(&tv);
++	vx_settimeofday(&tv);
+ 	return 0;
+ }
+ 
+@@ -177,7 +177,7 @@ int do_sys_settimeofday(const struct tim
+ 		/* SMP safe, again the code in arch/foo/time.c should
+ 		 * globally block out interrupts when it runs.
+ 		 */
+-		return do_settimeofday(tv);
++		return vx_settimeofday(tv);
+ 	}
+ 	return 0;
+ }
+diff -NurpP --minimal linux-3.2.34/kernel/timer.c linux-3.2.34-vs2.3.2.15/kernel/timer.c
+--- linux-3.2.34/kernel/timer.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/timer.c	2012-11-06 18:08:24.000000000 +0100
+@@ -40,6 +40,10 @@
+ #include <linux/irq_work.h>
+ #include <linux/sched.h>
+ #include <linux/slab.h>
++#include <linux/vs_base.h>
++#include <linux/vs_cvirt.h>
++#include <linux/vs_pid.h>
++#include <linux/vserver/sched.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+@@ -1338,12 +1342,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, sec
+ 
+ #endif
+ 
+-#ifndef __alpha__
+-
+-/*
+- * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
+- * should be moved into arch/i386 instead?
+- */
+ 
+ /**
+  * sys_getpid - return the thread group id of the current process
+@@ -1372,10 +1370,23 @@ SYSCALL_DEFINE0(getppid)
+ 	rcu_read_lock();
+ 	pid = task_tgid_vnr(rcu_dereference(current->real_parent));
+ 	rcu_read_unlock();
++	return vx_map_pid(pid);
++}
+ 
+-	return pid;
++#ifdef __alpha__
++
++/*
++ * The Alpha uses getxpid, getxuid, and getxgid instead.
++ */
++
++asmlinkage long do_getxpid(long *ppid)
++{
++	*ppid = sys_getppid();
++	return sys_getpid();
+ }
+ 
++#else /* _alpha_ */
++
+ SYSCALL_DEFINE0(getuid)
+ {
+ 	/* Only we change this so SMP safe */
+diff -NurpP --minimal linux-3.2.34/kernel/user_namespace.c linux-3.2.34-vs2.3.2.15/kernel/user_namespace.c
+--- linux-3.2.34/kernel/user_namespace.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/user_namespace.c	2011-12-05 19:33:02.000000000 +0100
+@@ -11,6 +11,7 @@
+ #include <linux/user_namespace.h>
+ #include <linux/highuid.h>
+ #include <linux/cred.h>
++#include <linux/vserver/global.h>
+ 
+ static struct kmem_cache *user_ns_cachep __read_mostly;
+ 
+@@ -33,6 +34,7 @@ int create_user_ns(struct cred *new)
+ 		return -ENOMEM;
+ 
+ 	kref_init(&ns->kref);
++	atomic_inc(&vs_global_user_ns);
+ 
+ 	for (n = 0; n < UIDHASH_SZ; ++n)
+ 		INIT_HLIST_HEAD(ns->uidhash_table + n);
+@@ -81,6 +83,8 @@ void free_user_ns(struct kref *kref)
+ 	struct user_namespace *ns =
+ 		container_of(kref, struct user_namespace, kref);
+ 
++	/* FIXME: maybe move into destroyer? */
++	atomic_dec(&vs_global_user_ns);
+ 	INIT_WORK(&ns->destroyer, free_user_ns_work);
+ 	schedule_work(&ns->destroyer);
+ }
+diff -NurpP --minimal linux-3.2.34/kernel/utsname.c linux-3.2.34-vs2.3.2.15/kernel/utsname.c
+--- linux-3.2.34/kernel/utsname.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/utsname.c	2011-12-05 19:33:02.000000000 +0100
+@@ -16,14 +16,17 @@
+ #include <linux/slab.h>
+ #include <linux/user_namespace.h>
+ #include <linux/proc_fs.h>
++#include <linux/vserver/global.h>
+ 
+ static struct uts_namespace *create_uts_ns(void)
+ {
+ 	struct uts_namespace *uts_ns;
+ 
+ 	uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
+-	if (uts_ns)
++	if (uts_ns) {
+ 		kref_init(&uts_ns->kref);
++		atomic_inc(&vs_global_uts_ns);
++	}
+ 	return uts_ns;
+ }
+ 
+@@ -32,8 +35,8 @@ static struct uts_namespace *create_uts_
+  * @old_ns: namespace to clone
+  * Return NULL on error (failure to kmalloc), new ns otherwise
+  */
+-static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
+-					  struct uts_namespace *old_ns)
++static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns,
++					  struct user_namespace *old_user)
+ {
+ 	struct uts_namespace *ns;
+ 
+@@ -43,7 +46,7 @@ static struct uts_namespace *clone_uts_n
+ 
+ 	down_read(&uts_sem);
+ 	memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
+-	ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
++	ns->user_ns = get_user_ns(old_user);
+ 	up_read(&uts_sem);
+ 	return ns;
+ }
+@@ -55,9 +58,9 @@ static struct uts_namespace *clone_uts_n
+  * versa.
+  */
+ struct uts_namespace *copy_utsname(unsigned long flags,
+-				   struct task_struct *tsk)
++				   struct uts_namespace *old_ns,
++				   struct user_namespace *user_ns)
+ {
+-	struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
+ 	struct uts_namespace *new_ns;
+ 
+ 	BUG_ON(!old_ns);
+@@ -66,7 +69,7 @@ struct uts_namespace *copy_utsname(unsig
+ 	if (!(flags & CLONE_NEWUTS))
+ 		return old_ns;
+ 
+-	new_ns = clone_uts_ns(tsk, old_ns);
++	new_ns = clone_uts_ns(old_ns, user_ns);
+ 
+ 	put_uts_ns(old_ns);
+ 	return new_ns;
+@@ -78,6 +81,7 @@ void free_uts_ns(struct kref *kref)
+ 
+ 	ns = container_of(kref, struct uts_namespace, kref);
+ 	put_user_ns(ns->user_ns);
++	atomic_dec(&vs_global_uts_ns);
+ 	kfree(ns);
+ }
+ 
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/Kconfig linux-3.2.34-vs2.3.2.15/kernel/vserver/Kconfig
+--- linux-3.2.34/kernel/vserver/Kconfig	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/Kconfig	2011-12-15 01:52:48.000000000 +0100
+@@ -0,0 +1,224 @@
++#
++# Linux VServer configuration
++#
++
++menu "Linux VServer"
++
++config	VSERVER_AUTO_LBACK
++	bool    "Automatically Assign Loopback IP"
++	default y
++	help
++	  Automatically assign a guest specific loopback
++	  IP and add it to the kernel network stack on
++	  startup.
++
++config	VSERVER_AUTO_SINGLE
++	bool	"Automatic Single IP Special Casing"
++	depends on EXPERIMENTAL
++	default y
++	help
++	  This allows network contexts with a single IP to
++	  automatically remap 0.0.0.0 bindings to that IP,
++	  avoiding further network checks and improving
++	  performance.
++
++	  (note: such guests do not allow to change the ip
++	   on the fly and do not show loopback addresses)
++
++config	VSERVER_COWBL
++	bool	"Enable COW Immutable Link Breaking"
++	default y
++	help
++	  This enables the COW (Copy-On-Write) link break code.
++	  It allows you to treat unified files like normal files
++	  when writing to them (which will implicitely break the
++	  link and create a copy of the unified file)
++
++config	VSERVER_VTIME
++	bool	"Enable Virtualized Guest Time"
++	depends on EXPERIMENTAL
++	default n
++	help
++	  This enables per guest time offsets to allow for
++	  adjusting the system clock individually per guest.
++	  this adds some overhead to the time functions and
++	  therefore should not be enabled without good reason.
++
++config	VSERVER_DEVICE
++	bool	"Enable Guest Device Mapping"
++	depends on EXPERIMENTAL
++	default n
++	help
++	  This enables generic device remapping.
++
++config	VSERVER_PROC_SECURE
++	bool	"Enable Proc Security"
++	depends on PROC_FS
++	default y
++	help
++	  This configures ProcFS security to initially hide
++	  non-process entries for all contexts except the main and
++	  spectator context (i.e. for all guests), which is a secure
++	  default.
++
++	  (note: on 1.2x the entries were visible by default)
++
++choice
++	prompt	"Persistent Inode Tagging"
++	default	TAGGING_ID24
++	help
++	  This adds persistent context information to filesystems
++	  mounted with the tagxid option. Tagging is a requirement
++	  for per-context disk limits and per-context quota.
++
++
++config	TAGGING_NONE
++	bool	"Disabled"
++	help
++	  do not store per-context information in inodes.
++
++config	TAGGING_UID16
++	bool	"UID16/GID32"
++	help
++	  reduces UID to 16 bit, but leaves GID at 32 bit.
++
++config	TAGGING_GID16
++	bool	"UID32/GID16"
++	help
++	  reduces GID to 16 bit, but leaves UID at 32 bit.
++
++config	TAGGING_ID24
++	bool	"UID24/GID24"
++	help
++	  uses the upper 8bit from UID and GID for XID tagging
++	  which leaves 24bit for UID/GID each, which should be
++	  more than sufficient for normal use.
++
++config	TAGGING_INTERN
++	bool	"UID32/GID32"
++	help
++	  this uses otherwise reserved inode fields in the on
++	  disk representation, which limits the use to a few
++	  filesystems (currently ext2 and ext3)
++
++endchoice
++
++config	TAG_NFSD
++	bool	"Tag NFSD User Auth and Files"
++	default n
++	help
++	  Enable this if you do want the in-kernel NFS
++	  Server to use the tagging specified above.
++	  (will require patched clients too)
++
++config	VSERVER_PRIVACY
++	bool	"Honor Privacy Aspects of Guests"
++	default n
++	help
++	  When enabled, most context checks will disallow
++	  access to structures assigned to a specific context,
++	  like ptys or loop devices.
++
++config	VSERVER_CONTEXTS
++	int	"Maximum number of Contexts (1-65533)"	if EMBEDDED
++	range 1 65533
++	default "768"	if 64BIT
++	default "256"
++	help
++	  This setting will optimize certain data structures
++	  and memory allocations according to the expected
++	  maximum.
++
++	  note: this is not a strict upper limit.
++
++config	VSERVER_WARN
++	bool	"VServer Warnings"
++	default y
++	help
++	  This enables various runtime warnings, which will
++	  notify about potential manipulation attempts or
++	  resource shortage. It is generally considered to
++	  be a good idea to have that enabled.
++
++config	VSERVER_WARN_DEVPTS
++	bool	"VServer DevPTS Warnings"
++	depends on VSERVER_WARN
++	default y
++	help
++	  This enables DevPTS related warnings, issued when a
++	  process inside a context tries to lookup or access
++	  a dynamic pts from the host or a different context.
++
++config	VSERVER_DEBUG
++	bool	"VServer Debugging Code"
++	default n
++	help
++	  Set this to yes if you want to be able to activate
++	  debugging output at runtime. It adds a very small
++	  overhead to all vserver related functions and
++	  increases the kernel size by about 20k.
++
++config	VSERVER_HISTORY
++	bool	"VServer History Tracing"
++	depends on VSERVER_DEBUG
++	default n
++	help
++	  Set this to yes if you want to record the history of
++	  linux-vserver activities, so they can be replayed in
++	  the event of a kernel panic or oops.
++
++config	VSERVER_HISTORY_SIZE
++	int	"Per-CPU History Size (32-65536)"
++	depends on VSERVER_HISTORY
++	range 32 65536
++	default 64
++	help
++	  This allows you to specify the number of entries in
++	  the per-CPU history buffer.
++
++choice
++	prompt	"Quotes used in debug and warn messages"
++	default	QUOTES_ISO8859
++
++config	QUOTES_ISO8859
++	bool	"Extended ASCII (ISO 8859) angle quotes"
++	help
++	  This uses the extended ASCII characters \xbb
++	  and \xab for quoting file and process names.
++
++config	QUOTES_UTF8
++	bool	"UTF-8 angle quotes"
++	help
++	  This uses the the UTF-8 sequences for angle
++	  quotes to quote file and process names.
++
++config	QUOTES_ASCII
++	bool	"ASCII single quotes"
++	help
++	  This uses the ASCII single quote character
++	  (\x27) to quote file and process names.
++
++endchoice
++
++endmenu
++
++
++config	VSERVER
++	bool
++	default y
++	select NAMESPACES
++	select UTS_NS
++	select IPC_NS
++#	select USER_NS
++	select SYSVIPC
++
++config	VSERVER_SECURITY
++	bool
++	depends on SECURITY
++	default y
++	select SECURITY_CAPABILITIES
++
++config	VSERVER_DISABLED
++	bool
++	default n
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/Makefile linux-3.2.34-vs2.3.2.15/kernel/vserver/Makefile
+--- linux-3.2.34/kernel/vserver/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/Makefile	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,18 @@
++#
++# Makefile for the Linux vserver routines.
++#
++
++
++obj-y		+= vserver.o
++
++vserver-y	:= switch.o context.o space.o sched.o network.o inode.o \
++		   limit.o cvirt.o cacct.o signal.o helper.o init.o \
++		   dlimit.o tag.o
++
++vserver-$(CONFIG_INET) += inet.o
++vserver-$(CONFIG_PROC_FS) += proc.o
++vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o debug.o
++vserver-$(CONFIG_VSERVER_HISTORY) += history.o
++vserver-$(CONFIG_VSERVER_MONITOR) += monitor.o
++vserver-$(CONFIG_VSERVER_DEVICE) += device.o
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/cacct.c linux-3.2.34-vs2.3.2.15/kernel/vserver/cacct.c
+--- linux-3.2.34/kernel/vserver/cacct.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/cacct.c	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,42 @@
++/*
++ *  linux/kernel/vserver/cacct.c
++ *
++ *  Virtual Server: Context Accounting
++ *
++ *  Copyright (C) 2006-2007 Herbert P�tzl
++ *
++ *  V0.01  added accounting stats
++ *
++ */
++
++#include <linux/types.h>
++#include <linux/vs_context.h>
++#include <linux/vserver/cacct_cmd.h>
++#include <linux/vserver/cacct_int.h>
++
++#include <asm/errno.h>
++#include <asm/uaccess.h>
++
++
++int vc_sock_stat(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_sock_stat_v0 vc_data;
++	int j, field;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	field = vc_data.field;
++	if ((field < 0) || (field >= VXA_SOCK_SIZE))
++		return -EINVAL;
++
++	for (j = 0; j < 3; j++) {
++		vc_data.count[j] = vx_sock_count(&vxi->cacct, field, j);
++		vc_data.total[j] = vx_sock_total(&vxi->cacct, field, j);
++	}
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/cacct_init.h linux-3.2.34-vs2.3.2.15/kernel/vserver/cacct_init.h
+--- linux-3.2.34/kernel/vserver/cacct_init.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/cacct_init.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,25 @@
++
++
++static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
++{
++	int i, j;
++
++
++	for (i = 0; i < VXA_SOCK_SIZE; i++) {
++		for (j = 0; j < 3; j++) {
++			atomic_long_set(&cacct->sock[i][j].count, 0);
++			atomic_long_set(&cacct->sock[i][j].total, 0);
++		}
++	}
++	for (i = 0; i < 8; i++)
++		atomic_set(&cacct->slab[i], 0);
++	for (i = 0; i < 5; i++)
++		for (j = 0; j < 4; j++)
++			atomic_set(&cacct->page[i][j], 0);
++}
++
++static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
++{
++	return;
++}
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/cacct_proc.h linux-3.2.34-vs2.3.2.15/kernel/vserver/cacct_proc.h
+--- linux-3.2.34/kernel/vserver/cacct_proc.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/cacct_proc.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,53 @@
++#ifndef _VX_CACCT_PROC_H
++#define _VX_CACCT_PROC_H
++
++#include <linux/vserver/cacct_int.h>
++
++
++#define VX_SOCKA_TOP	\
++	"Type\t    recv #/bytes\t\t   send #/bytes\t\t    fail #/bytes\n"
++
++static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
++{
++	int i, j, length = 0;
++	static char *type[VXA_SOCK_SIZE] = {
++		"UNSPEC", "UNIX", "INET", "INET6", "PACKET", "OTHER"
++	};
++
++	length += sprintf(buffer + length, VX_SOCKA_TOP);
++	for (i = 0; i < VXA_SOCK_SIZE; i++) {
++		length += sprintf(buffer + length, "%s:", type[i]);
++		for (j = 0; j < 3; j++) {
++			length += sprintf(buffer + length,
++				"\t%10lu/%-10lu",
++				vx_sock_count(cacct, i, j),
++				vx_sock_total(cacct, i, j));
++		}
++		buffer[length++] = '\n';
++	}
++
++	length += sprintf(buffer + length, "\n");
++	length += sprintf(buffer + length,
++		"slab:\t %8u %8u %8u %8u\n",
++		atomic_read(&cacct->slab[1]),
++		atomic_read(&cacct->slab[4]),
++		atomic_read(&cacct->slab[0]),
++		atomic_read(&cacct->slab[2]));
++
++	length += sprintf(buffer + length, "\n");
++	for (i = 0; i < 5; i++) {
++		length += sprintf(buffer + length,
++			"page[%d]: %8u %8u %8u %8u\t %8u %8u %8u %8u\n", i,
++			atomic_read(&cacct->page[i][0]),
++			atomic_read(&cacct->page[i][1]),
++			atomic_read(&cacct->page[i][2]),
++			atomic_read(&cacct->page[i][3]),
++			atomic_read(&cacct->page[i][4]),
++			atomic_read(&cacct->page[i][5]),
++			atomic_read(&cacct->page[i][6]),
++			atomic_read(&cacct->page[i][7]));
++	}
++	return length;
++}
++
++#endif	/* _VX_CACCT_PROC_H */
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/context.c linux-3.2.34-vs2.3.2.15/kernel/vserver/context.c
+--- linux-3.2.34/kernel/vserver/context.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/context.c	2012-06-27 05:01:29.000000000 +0200
+@@ -0,0 +1,1119 @@
++/*
++ *  linux/kernel/vserver/context.c
++ *
++ *  Virtual Server: Context Support
++ *
++ *  Copyright (C) 2003-2011  Herbert P�tzl
++ *
++ *  V0.01  context helper
++ *  V0.02  vx_ctx_kill syscall command
++ *  V0.03  replaced context_info calls
++ *  V0.04  redesign of struct (de)alloc
++ *  V0.05  rlimit basic implementation
++ *  V0.06  task_xid and info commands
++ *  V0.07  context flags and caps
++ *  V0.08  switch to RCU based hash
++ *  V0.09  revert to non RCU for now
++ *  V0.10  and back to working RCU hash
++ *  V0.11  and back to locking again
++ *  V0.12  referenced context store
++ *  V0.13  separate per cpu data
++ *  V0.14  changed vcmds to vxi arg
++ *  V0.15  added context stat
++ *  V0.16  have __create claim() the vxi
++ *  V0.17  removed older and legacy stuff
++ *  V0.18  added user credentials
++ *  V0.19  added warn mask
++ *
++ */
++
++#include <linux/slab.h>
++#include <linux/types.h>
++#include <linux/security.h>
++#include <linux/pid_namespace.h>
++#include <linux/capability.h>
++
++#include <linux/vserver/context.h>
++#include <linux/vserver/network.h>
++#include <linux/vserver/debug.h>
++#include <linux/vserver/limit.h>
++#include <linux/vserver/limit_int.h>
++#include <linux/vserver/space.h>
++#include <linux/init_task.h>
++#include <linux/fs_struct.h>
++#include <linux/cred.h>
++
++#include <linux/vs_context.h>
++#include <linux/vs_limit.h>
++#include <linux/vs_pid.h>
++#include <linux/vserver/context_cmd.h>
++
++#include "cvirt_init.h"
++#include "cacct_init.h"
++#include "limit_init.h"
++#include "sched_init.h"
++
++
++atomic_t vx_global_ctotal	= ATOMIC_INIT(0);
++atomic_t vx_global_cactive	= ATOMIC_INIT(0);
++
++
++/*	now inactive context structures */
++
++static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
++
++static DEFINE_SPINLOCK(vx_info_inactive_lock);
++
++
++/*	__alloc_vx_info()
++
++	* allocate an initialized vx_info struct
++	* doesn't make it visible (hash)			*/
++
++static struct vx_info *__alloc_vx_info(xid_t xid)
++{
++	struct vx_info *new = NULL;
++	int cpu, index;
++
++	vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
++
++	/* would this benefit from a slab cache? */
++	new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
++	if (!new)
++		return 0;
++
++	memset(new, 0, sizeof(struct vx_info));
++#ifdef CONFIG_SMP
++	new->ptr_pc = alloc_percpu(struct _vx_info_pc);
++	if (!new->ptr_pc)
++		goto error;
++#endif
++	new->vx_id = xid;
++	INIT_HLIST_NODE(&new->vx_hlist);
++	atomic_set(&new->vx_usecnt, 0);
++	atomic_set(&new->vx_tasks, 0);
++	new->vx_parent = NULL;
++	new->vx_state = 0;
++	init_waitqueue_head(&new->vx_wait);
++
++	/* prepare reaper */
++	get_task_struct(init_pid_ns.child_reaper);
++	new->vx_reaper = init_pid_ns.child_reaper;
++	new->vx_badness_bias = 0;
++
++	/* rest of init goes here */
++	vx_info_init_limit(&new->limit);
++	vx_info_init_sched(&new->sched);
++	vx_info_init_cvirt(&new->cvirt);
++	vx_info_init_cacct(&new->cacct);
++
++	/* per cpu data structures */
++	for_each_possible_cpu(cpu) {
++		vx_info_init_sched_pc(
++			&vx_per_cpu(new, sched_pc, cpu), cpu);
++		vx_info_init_cvirt_pc(
++			&vx_per_cpu(new, cvirt_pc, cpu), cpu);
++	}
++
++	new->vx_flags = VXF_INIT_SET;
++	new->vx_bcaps = CAP_FULL_SET;	// maybe ~CAP_SETPCAP
++	new->vx_ccaps = 0;
++	new->vx_umask = 0;
++	new->vx_wmask = 0;
++
++	new->reboot_cmd = 0;
++	new->exit_code = 0;
++
++	// preconfig spaces
++	for (index = 0; index < VX_SPACES; index++) {
++		struct _vx_space *space = &new->space[index];
++
++		// filesystem
++		spin_lock(&init_fs.lock);
++		init_fs.users++;
++		spin_unlock(&init_fs.lock);
++		space->vx_fs = &init_fs;
++
++		/* FIXME: do we want defaults? */
++		// space->vx_real_cred = 0;
++		// space->vx_cred = 0;
++	}
++
++
++	vxdprintk(VXD_CBIT(xid, 0),
++		"alloc_vx_info(%d) = %p", xid, new);
++	vxh_alloc_vx_info(new);
++	atomic_inc(&vx_global_ctotal);
++	return new;
++#ifdef CONFIG_SMP
++error:
++	kfree(new);
++	return 0;
++#endif
++}
++
++/*	__dealloc_vx_info()
++
++	* final disposal of vx_info				*/
++
++static void __dealloc_vx_info(struct vx_info *vxi)
++{
++#ifdef	CONFIG_VSERVER_WARN
++	struct vx_info_save vxis;
++	int cpu;
++#endif
++	vxdprintk(VXD_CBIT(xid, 0),
++		"dealloc_vx_info(%p)", vxi);
++	vxh_dealloc_vx_info(vxi);
++
++#ifdef	CONFIG_VSERVER_WARN
++	enter_vx_info(vxi, &vxis);
++	vx_info_exit_limit(&vxi->limit);
++	vx_info_exit_sched(&vxi->sched);
++	vx_info_exit_cvirt(&vxi->cvirt);
++	vx_info_exit_cacct(&vxi->cacct);
++
++	for_each_possible_cpu(cpu) {
++		vx_info_exit_sched_pc(
++			&vx_per_cpu(vxi, sched_pc, cpu), cpu);
++		vx_info_exit_cvirt_pc(
++			&vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
++	}
++	leave_vx_info(&vxis);
++#endif
++
++	vxi->vx_id = -1;
++	vxi->vx_state |= VXS_RELEASED;
++
++#ifdef CONFIG_SMP
++	free_percpu(vxi->ptr_pc);
++#endif
++	kfree(vxi);
++	atomic_dec(&vx_global_ctotal);
++}
++
++static void __shutdown_vx_info(struct vx_info *vxi)
++{
++	struct nsproxy *nsproxy;
++	struct fs_struct *fs;
++	struct cred *cred;
++	int index, kill;
++
++	might_sleep();
++
++	vxi->vx_state |= VXS_SHUTDOWN;
++	vs_state_change(vxi, VSC_SHUTDOWN);
++
++	for (index = 0; index < VX_SPACES; index++) {
++		struct _vx_space *space = &vxi->space[index];
++
++		nsproxy = xchg(&space->vx_nsproxy, NULL);
++		if (nsproxy)
++			put_nsproxy(nsproxy);
++
++		fs = xchg(&space->vx_fs, NULL);
++		spin_lock(&fs->lock);
++		kill = !--fs->users;
++		spin_unlock(&fs->lock);
++		if (kill)
++			free_fs_struct(fs);
++
++		cred = (struct cred *)xchg(&space->vx_cred, NULL);
++		if (cred)
++			abort_creds(cred);
++	}
++}
++
++/* exported stuff */
++
++void free_vx_info(struct vx_info *vxi)
++{
++	unsigned long flags;
++	unsigned index;
++
++	/* check for reference counts first */
++	BUG_ON(atomic_read(&vxi->vx_usecnt));
++	BUG_ON(atomic_read(&vxi->vx_tasks));
++
++	/* context must not be hashed */
++	BUG_ON(vx_info_state(vxi, VXS_HASHED));
++
++	/* context shutdown is mandatory */
++	BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
++
++	/* spaces check */
++	for (index = 0; index < VX_SPACES; index++) {
++		struct _vx_space *space = &vxi->space[index];
++
++		BUG_ON(space->vx_nsproxy);
++		BUG_ON(space->vx_fs);
++		// BUG_ON(space->vx_real_cred);
++		// BUG_ON(space->vx_cred);
++	}
++
++	spin_lock_irqsave(&vx_info_inactive_lock, flags);
++	hlist_del(&vxi->vx_hlist);
++	spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
++
++	__dealloc_vx_info(vxi);
++}
++
++
++/*	hash table for vx_info hash */
++
++#define VX_HASH_SIZE	13
++
++static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
++	{ [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
++
++static DEFINE_SPINLOCK(vx_info_hash_lock);
++
++
++static inline unsigned int __hashval(xid_t xid)
++{
++	return (xid % VX_HASH_SIZE);
++}
++
++
++
++/*	__hash_vx_info()
++
++	* add the vxi to the global hash table
++	* requires the hash_lock to be held			*/
++
++static inline void __hash_vx_info(struct vx_info *vxi)
++{
++	struct hlist_head *head;
++
++	vxd_assert_lock(&vx_info_hash_lock);
++	vxdprintk(VXD_CBIT(xid, 4),
++		"__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
++	vxh_hash_vx_info(vxi);
++
++	/* context must not be hashed */
++	BUG_ON(vx_info_state(vxi, VXS_HASHED));
++
++	vxi->vx_state |= VXS_HASHED;
++	head = &vx_info_hash[__hashval(vxi->vx_id)];
++	hlist_add_head(&vxi->vx_hlist, head);
++	atomic_inc(&vx_global_cactive);
++}
++
++/*	__unhash_vx_info()
++
++	* remove the vxi from the global hash table
++	* requires the hash_lock to be held			*/
++
++static inline void __unhash_vx_info(struct vx_info *vxi)
++{
++	unsigned long flags;
++
++	vxd_assert_lock(&vx_info_hash_lock);
++	vxdprintk(VXD_CBIT(xid, 4),
++		"__unhash_vx_info: %p[#%d.%d.%d]", vxi, vxi->vx_id,
++		atomic_read(&vxi->vx_usecnt), atomic_read(&vxi->vx_tasks));
++	vxh_unhash_vx_info(vxi);
++
++	/* context must be hashed */
++	BUG_ON(!vx_info_state(vxi, VXS_HASHED));
++	/* but without tasks */
++	BUG_ON(atomic_read(&vxi->vx_tasks));
++
++	vxi->vx_state &= ~VXS_HASHED;
++	hlist_del_init(&vxi->vx_hlist);
++	spin_lock_irqsave(&vx_info_inactive_lock, flags);
++	hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
++	spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
++	atomic_dec(&vx_global_cactive);
++}
++
++
++/*	__lookup_vx_info()
++
++	* requires the hash_lock to be held
++	* doesn't increment the vx_refcnt			*/
++
++static inline struct vx_info *__lookup_vx_info(xid_t xid)
++{
++	struct hlist_head *head = &vx_info_hash[__hashval(xid)];
++	struct hlist_node *pos;
++	struct vx_info *vxi;
++
++	vxd_assert_lock(&vx_info_hash_lock);
++	hlist_for_each(pos, head) {
++		vxi = hlist_entry(pos, struct vx_info, vx_hlist);
++
++		if (vxi->vx_id == xid)
++			goto found;
++	}
++	vxi = NULL;
++found:
++	vxdprintk(VXD_CBIT(xid, 0),
++		"__lookup_vx_info(#%u): %p[#%u]",
++		xid, vxi, vxi ? vxi->vx_id : 0);
++	vxh_lookup_vx_info(vxi, xid);
++	return vxi;
++}
++
++
++/*	__create_vx_info()
++
++	* create the requested context
++	* get(), claim() and hash it				*/
++
++static struct vx_info *__create_vx_info(int id)
++{
++	struct vx_info *new, *vxi = NULL;
++
++	vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
++
++	if (!(new = __alloc_vx_info(id)))
++		return ERR_PTR(-ENOMEM);
++
++	/* required to make dynamic xids unique */
++	spin_lock(&vx_info_hash_lock);
++
++	/* static context requested */
++	if ((vxi = __lookup_vx_info(id))) {
++		vxdprintk(VXD_CBIT(xid, 0),
++			"create_vx_info(%d) = %p (already there)", id, vxi);
++		if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
++			vxi = ERR_PTR(-EBUSY);
++		else
++			vxi = ERR_PTR(-EEXIST);
++		goto out_unlock;
++	}
++	/* new context */
++	vxdprintk(VXD_CBIT(xid, 0),
++		"create_vx_info(%d) = %p (new)", id, new);
++	claim_vx_info(new, NULL);
++	__hash_vx_info(get_vx_info(new));
++	vxi = new, new = NULL;
++
++out_unlock:
++	spin_unlock(&vx_info_hash_lock);
++	vxh_create_vx_info(IS_ERR(vxi) ? NULL : vxi, id);
++	if (new)
++		__dealloc_vx_info(new);
++	return vxi;
++}
++
++
++/*	exported stuff						*/
++
++
++void unhash_vx_info(struct vx_info *vxi)
++{
++	spin_lock(&vx_info_hash_lock);
++	__unhash_vx_info(vxi);
++	spin_unlock(&vx_info_hash_lock);
++	__shutdown_vx_info(vxi);
++	__wakeup_vx_info(vxi);
++}
++
++
++/*	lookup_vx_info()
++
++	* search for a vx_info and get() it
++	* negative id means current				*/
++
++struct vx_info *lookup_vx_info(int id)
++{
++	struct vx_info *vxi = NULL;
++
++	if (id < 0) {
++		vxi = get_vx_info(current_vx_info());
++	} else if (id > 1) {
++		spin_lock(&vx_info_hash_lock);
++		vxi = get_vx_info(__lookup_vx_info(id));
++		spin_unlock(&vx_info_hash_lock);
++	}
++	return vxi;
++}
++
++/*	xid_is_hashed()
++
++	* verify that xid is still hashed			*/
++
++int xid_is_hashed(xid_t xid)
++{
++	int hashed;
++
++	spin_lock(&vx_info_hash_lock);
++	hashed = (__lookup_vx_info(xid) != NULL);
++	spin_unlock(&vx_info_hash_lock);
++	return hashed;
++}
++
++#ifdef	CONFIG_PROC_FS
++
++/*	get_xid_list()
++
++	* get a subset of hashed xids for proc
++	* assumes size is at least one				*/
++
++int get_xid_list(int index, unsigned int *xids, int size)
++{
++	int hindex, nr_xids = 0;
++
++	/* only show current and children */
++	if (!vx_check(0, VS_ADMIN | VS_WATCH)) {
++		if (index > 0)
++			return 0;
++		xids[nr_xids] = vx_current_xid();
++		return 1;
++	}
++
++	for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
++		struct hlist_head *head = &vx_info_hash[hindex];
++		struct hlist_node *pos;
++
++		spin_lock(&vx_info_hash_lock);
++		hlist_for_each(pos, head) {
++			struct vx_info *vxi;
++
++			if (--index > 0)
++				continue;
++
++			vxi = hlist_entry(pos, struct vx_info, vx_hlist);
++			xids[nr_xids] = vxi->vx_id;
++			if (++nr_xids >= size) {
++				spin_unlock(&vx_info_hash_lock);
++				goto out;
++			}
++		}
++		/* keep the lock time short */
++		spin_unlock(&vx_info_hash_lock);
++	}
++out:
++	return nr_xids;
++}
++#endif
++
++#ifdef	CONFIG_VSERVER_DEBUG
++
++void	dump_vx_info_inactive(int level)
++{
++	struct hlist_node *entry, *next;
++
++	hlist_for_each_safe(entry, next, &vx_info_inactive) {
++		struct vx_info *vxi =
++			list_entry(entry, struct vx_info, vx_hlist);
++
++		dump_vx_info(vxi, level);
++	}
++}
++
++#endif
++
++#if 0
++int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
++{
++	struct user_struct *new_user, *old_user;
++
++	if (!p || !vxi)
++		BUG();
++
++	if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
++		return -EACCES;
++
++	new_user = alloc_uid(vxi->vx_id, p->uid);
++	if (!new_user)
++		return -ENOMEM;
++
++	old_user = p->user;
++	if (new_user != old_user) {
++		atomic_inc(&new_user->processes);
++		atomic_dec(&old_user->processes);
++		p->user = new_user;
++	}
++	free_uid(old_user);
++	return 0;
++}
++#endif
++
++#if 0
++void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
++{
++	// p->cap_effective &= vxi->vx_cap_bset;
++	p->cap_effective =
++		cap_intersect(p->cap_effective, vxi->cap_bset);
++	// p->cap_inheritable &= vxi->vx_cap_bset;
++	p->cap_inheritable =
++		cap_intersect(p->cap_inheritable, vxi->cap_bset);
++	// p->cap_permitted &= vxi->vx_cap_bset;
++	p->cap_permitted =
++		cap_intersect(p->cap_permitted, vxi->cap_bset);
++}
++#endif
++
++
++#include <linux/file.h>
++#include <linux/fdtable.h>
++
++static int vx_openfd_task(struct task_struct *tsk)
++{
++	struct files_struct *files = tsk->files;
++	struct fdtable *fdt;
++	const unsigned long *bptr;
++	int count, total;
++
++	/* no rcu_read_lock() because of spin_lock() */
++	spin_lock(&files->file_lock);
++	fdt = files_fdtable(files);
++	bptr = fdt->open_fds->fds_bits;
++	count = fdt->max_fds / (sizeof(unsigned long) * 8);
++	for (total = 0; count > 0; count--) {
++		if (*bptr)
++			total += hweight_long(*bptr);
++		bptr++;
++	}
++	spin_unlock(&files->file_lock);
++	return total;
++}
++
++
++/*	for *space compatibility */
++
++asmlinkage long sys_unshare(unsigned long);
++
++/*
++ *	migrate task to new context
++ *	gets vxi, puts old_vxi on change
++ *	optionally unshares namespaces (hack)
++ */
++
++int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare)
++{
++	struct vx_info *old_vxi;
++	int ret = 0;
++
++	if (!p || !vxi)
++		BUG();
++
++	vxdprintk(VXD_CBIT(xid, 5),
++		"vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
++		vxi->vx_id, atomic_read(&vxi->vx_usecnt));
++
++	if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) &&
++		!vx_info_flags(vxi, VXF_STATE_SETUP, 0))
++		return -EACCES;
++
++	if (vx_info_state(vxi, VXS_SHUTDOWN))
++		return -EFAULT;
++
++	old_vxi = task_get_vx_info(p);
++	if (old_vxi == vxi)
++		goto out;
++
++//	if (!(ret = vx_migrate_user(p, vxi))) {
++	{
++		int openfd;
++
++		task_lock(p);
++		openfd = vx_openfd_task(p);
++
++		if (old_vxi) {
++			atomic_dec(&old_vxi->cvirt.nr_threads);
++			atomic_dec(&old_vxi->cvirt.nr_running);
++			__rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
++			/* FIXME: what about the struct files here? */
++			__rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
++			/* account for the executable */
++			__rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
++		}
++		atomic_inc(&vxi->cvirt.nr_threads);
++		atomic_inc(&vxi->cvirt.nr_running);
++		__rlim_inc(&vxi->limit, RLIMIT_NPROC);
++		/* FIXME: what about the struct files here? */
++		__rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
++		/* account for the executable */
++		__rlim_inc(&vxi->limit, VLIMIT_DENTRY);
++
++		if (old_vxi) {
++			release_vx_info(old_vxi, p);
++			clr_vx_info(&p->vx_info);
++		}
++		claim_vx_info(vxi, p);
++		set_vx_info(&p->vx_info, vxi);
++		p->xid = vxi->vx_id;
++
++		vxdprintk(VXD_CBIT(xid, 5),
++			"moved task %p into vxi:%p[#%d]",
++			p, vxi, vxi->vx_id);
++
++		// vx_mask_cap_bset(vxi, p);
++		task_unlock(p);
++
++		/* hack for *spaces to provide compatibility */
++		if (unshare) {
++			struct nsproxy *old_nsp, *new_nsp;
++
++			ret = unshare_nsproxy_namespaces(
++				CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER,
++				&new_nsp, NULL);
++			if (ret)
++				goto out;
++
++			old_nsp = xchg(&p->nsproxy, new_nsp);
++			vx_set_space(vxi,
++				CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER, 0);
++			put_nsproxy(old_nsp);
++		}
++	}
++out:
++	put_vx_info(old_vxi);
++	return ret;
++}
++
++int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
++{
++	struct task_struct *old_reaper;
++	struct vx_info *reaper_vxi;
++
++	if (!vxi)
++		return -EINVAL;
++
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_set_reaper(%p[#%d],%p[#%d,%d])",
++		vxi, vxi->vx_id, p, p->xid, p->pid);
++
++	old_reaper = vxi->vx_reaper;
++	if (old_reaper == p)
++		return 0;
++
++	reaper_vxi = task_get_vx_info(p);
++	if (reaper_vxi && reaper_vxi != vxi) {
++		vxwprintk(1,
++			"Unsuitable reaper [" VS_Q("%s") ",%u:#%u] "
++			"for [xid #%u]",
++			p->comm, p->pid, p->xid, vx_current_xid());
++		goto out;
++	}
++
++	/* set new child reaper */
++	get_task_struct(p);
++	vxi->vx_reaper = p;
++	put_task_struct(old_reaper);
++out:
++	put_vx_info(reaper_vxi);
++	return 0;
++}
++
++int vx_set_init(struct vx_info *vxi, struct task_struct *p)
++{
++	if (!vxi)
++		return -EINVAL;
++
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_set_init(%p[#%d],%p[#%d,%d,%d])",
++		vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
++
++	vxi->vx_flags &= ~VXF_STATE_INIT;
++	// vxi->vx_initpid = p->tgid;
++	vxi->vx_initpid = p->pid;
++	return 0;
++}
++
++void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
++{
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
++		vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
++
++	vxi->exit_code = code;
++	vxi->vx_initpid = 0;
++}
++
++
++void vx_set_persistent(struct vx_info *vxi)
++{
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
++
++	get_vx_info(vxi);
++	claim_vx_info(vxi, NULL);
++}
++
++void vx_clear_persistent(struct vx_info *vxi)
++{
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
++
++	release_vx_info(vxi, NULL);
++	put_vx_info(vxi);
++}
++
++void vx_update_persistent(struct vx_info *vxi)
++{
++	if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
++		vx_set_persistent(vxi);
++	else
++		vx_clear_persistent(vxi);
++}
++
++
++/*	task must be current or locked		*/
++
++void	exit_vx_info(struct task_struct *p, int code)
++{
++	struct vx_info *vxi = p->vx_info;
++
++	if (vxi) {
++		atomic_dec(&vxi->cvirt.nr_threads);
++		vx_nproc_dec(p);
++
++		vxi->exit_code = code;
++		release_vx_info(vxi, p);
++	}
++}
++
++void	exit_vx_info_early(struct task_struct *p, int code)
++{
++	struct vx_info *vxi = p->vx_info;
++
++	if (vxi) {
++		if (vxi->vx_initpid == p->pid)
++			vx_exit_init(vxi, p, code);
++		if (vxi->vx_reaper == p)
++			vx_set_reaper(vxi, init_pid_ns.child_reaper);
++	}
++}
++
++
++/* vserver syscall commands below here */
++
++/* taks xid and vx_info functions */
++
++#include <asm/uaccess.h>
++
++
++int vc_task_xid(uint32_t id)
++{
++	xid_t xid;
++
++	if (id) {
++		struct task_struct *tsk;
++
++		rcu_read_lock();
++		tsk = find_task_by_real_pid(id);
++		xid = (tsk) ? tsk->xid : -ESRCH;
++		rcu_read_unlock();
++	} else
++		xid = vx_current_xid();
++	return xid;
++}
++
++
++int vc_vx_info(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_vx_info_v0 vc_data;
++
++	vc_data.xid = vxi->vx_id;
++	vc_data.initpid = vxi->vx_initpid;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++
++int vc_ctx_stat(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_stat_v0 vc_data;
++
++	vc_data.usecnt = atomic_read(&vxi->vx_usecnt);
++	vc_data.tasks = atomic_read(&vxi->vx_tasks);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++
++/* context functions */
++
++int vc_ctx_create(uint32_t xid, void __user *data)
++{
++	struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
++	struct vx_info *new_vxi;
++	int ret;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	if ((xid > MAX_S_CONTEXT) || (xid < 2))
++		return -EINVAL;
++
++	new_vxi = __create_vx_info(xid);
++	if (IS_ERR(new_vxi))
++		return PTR_ERR(new_vxi);
++
++	/* initial flags */
++	new_vxi->vx_flags = vc_data.flagword;
++
++	ret = -ENOEXEC;
++	if (vs_state_change(new_vxi, VSC_STARTUP))
++		goto out;
++
++	ret = vx_migrate_task(current, new_vxi, (!data));
++	if (ret)
++		goto out;
++
++	/* return context id on success */
++	ret = new_vxi->vx_id;
++
++	/* get a reference for persistent contexts */
++	if ((vc_data.flagword & VXF_PERSISTENT))
++		vx_set_persistent(new_vxi);
++out:
++	release_vx_info(new_vxi, NULL);
++	put_vx_info(new_vxi);
++	return ret;
++}
++
++
++int vc_ctx_migrate(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
++	int ret;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = vx_migrate_task(current, vxi, 0);
++	if (ret)
++		return ret;
++	if (vc_data.flagword & VXM_SET_INIT)
++		ret = vx_set_init(vxi, current);
++	if (ret)
++		return ret;
++	if (vc_data.flagword & VXM_SET_REAPER)
++		ret = vx_set_reaper(vxi, current);
++	return ret;
++}
++
++
++int vc_get_cflags(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_flags_v0 vc_data;
++
++	vc_data.flagword = vxi->vx_flags;
++
++	/* special STATE flag handling */
++	vc_data.mask = vs_mask_flags(~0ULL, vxi->vx_flags, VXF_ONE_TIME);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_cflags(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_flags_v0 vc_data;
++	uint64_t mask, trigger;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	/* special STATE flag handling */
++	mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
++	trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
++
++	if (vxi == current_vx_info()) {
++		/* if (trigger & VXF_STATE_SETUP)
++			vx_mask_cap_bset(vxi, current); */
++		if (trigger & VXF_STATE_INIT) {
++			int ret;
++
++			ret = vx_set_init(vxi, current);
++			if (ret)
++				return ret;
++			ret = vx_set_reaper(vxi, current);
++			if (ret)
++				return ret;
++		}
++	}
++
++	vxi->vx_flags = vs_mask_flags(vxi->vx_flags,
++		vc_data.flagword, mask);
++	if (trigger & VXF_PERSISTENT)
++		vx_update_persistent(vxi);
++
++	return 0;
++}
++
++
++static inline uint64_t caps_from_cap_t(kernel_cap_t c)
++{
++	uint64_t v = c.cap[0] | ((uint64_t)c.cap[1] << 32);
++
++	// printk("caps_from_cap_t(%08x:%08x) = %016llx\n", c.cap[1], c.cap[0], v);
++	return v;
++}
++
++static inline kernel_cap_t cap_t_from_caps(uint64_t v)
++{
++	kernel_cap_t c = __cap_empty_set;
++
++	c.cap[0] = v & 0xFFFFFFFF;
++	c.cap[1] = (v >> 32) & 0xFFFFFFFF;
++
++	// printk("cap_t_from_caps(%016llx) = %08x:%08x\n", v, c.cap[1], c.cap[0]);
++	return c;
++}
++
++
++static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps)
++{
++	if (bcaps)
++		*bcaps = caps_from_cap_t(vxi->vx_bcaps);
++	if (ccaps)
++		*ccaps = vxi->vx_ccaps;
++
++	return 0;
++}
++
++int vc_get_ccaps(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_caps_v1 vc_data;
++	int ret;
++
++	ret = do_get_caps(vxi, NULL, &vc_data.ccaps);
++	if (ret)
++		return ret;
++	vc_data.cmask = ~0ULL;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++static int do_set_caps(struct vx_info *vxi,
++	uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask)
++{
++	uint64_t bcold = caps_from_cap_t(vxi->vx_bcaps);
++
++#if 0
++	printk("do_set_caps(%16llx, %16llx, %16llx, %16llx)\n",
++		bcaps, bmask, ccaps, cmask);
++#endif
++	vxi->vx_bcaps = cap_t_from_caps(
++		vs_mask_flags(bcold, bcaps, bmask));
++	vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask);
++
++	return 0;
++}
++
++int vc_set_ccaps(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_caps_v1 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask);
++}
++
++int vc_get_bcaps(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_bcaps vc_data;
++	int ret;
++
++	ret = do_get_caps(vxi, &vc_data.bcaps, NULL);
++	if (ret)
++		return ret;
++	vc_data.bmask = ~0ULL;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_bcaps(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_bcaps vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0);
++}
++
++
++int vc_get_umask(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_umask vc_data;
++
++	vc_data.umask = vxi->vx_umask;
++	vc_data.mask = ~0ULL;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_umask(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_umask vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	vxi->vx_umask = vs_mask_flags(vxi->vx_umask,
++		vc_data.umask, vc_data.mask);
++	return 0;
++}
++
++
++int vc_get_wmask(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_wmask vc_data;
++
++	vc_data.wmask = vxi->vx_wmask;
++	vc_data.mask = ~0ULL;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_wmask(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_wmask vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	vxi->vx_wmask = vs_mask_flags(vxi->vx_wmask,
++		vc_data.wmask, vc_data.mask);
++	return 0;
++}
++
++
++int vc_get_badness(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_badness_v0 vc_data;
++
++	vc_data.bias = vxi->vx_badness_bias;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_badness(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_badness_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	vxi->vx_badness_bias = vc_data.bias;
++	return 0;
++}
++
++#include <linux/module.h>
++
++EXPORT_SYMBOL_GPL(free_vx_info);
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/cvirt.c linux-3.2.34-vs2.3.2.15/kernel/vserver/cvirt.c
+--- linux-3.2.34/kernel/vserver/cvirt.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/cvirt.c	2012-04-24 00:43:39.000000000 +0200
+@@ -0,0 +1,313 @@
++/*
++ *  linux/kernel/vserver/cvirt.c
++ *
++ *  Virtual Server: Context Virtualization
++ *
++ *  Copyright (C) 2004-2007  Herbert P�tzl
++ *
++ *  V0.01  broken out from limit.c
++ *  V0.02  added utsname stuff
++ *  V0.03  changed vcmds to vxi arg
++ *
++ */
++
++#include <linux/types.h>
++#include <linux/utsname.h>
++#include <linux/vs_cvirt.h>
++#include <linux/vserver/switch.h>
++#include <linux/vserver/cvirt_cmd.h>
++
++#include <asm/uaccess.h>
++
++
++void vx_vsi_boottime(struct timespec *boottime)
++{
++	struct vx_info *vxi = current_vx_info();
++
++	set_normalized_timespec(boottime,
++		boottime->tv_sec + vxi->cvirt.bias_uptime.tv_sec,
++		boottime->tv_nsec + vxi->cvirt.bias_uptime.tv_nsec);
++	return;
++}
++
++void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
++{
++	struct vx_info *vxi = current_vx_info();
++
++	set_normalized_timespec(uptime,
++		uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec,
++		uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec);
++	if (!idle)
++		return;
++	set_normalized_timespec(idle,
++		idle->tv_sec - vxi->cvirt.bias_idle.tv_sec,
++		idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec);
++	return;
++}
++
++uint64_t vx_idle_jiffies(void)
++{
++	return init_task.utime + init_task.stime;
++}
++
++
++
++static inline uint32_t __update_loadavg(uint32_t load,
++	int wsize, int delta, int n)
++{
++	unsigned long long calc, prev;
++
++	/* just set it to n */
++	if (unlikely(delta >= wsize))
++		return (n << FSHIFT);
++
++	calc = delta * n;
++	calc <<= FSHIFT;
++	prev = (wsize - delta);
++	prev *= load;
++	calc += prev;
++	do_div(calc, wsize);
++	return calc;
++}
++
++
++void vx_update_load(struct vx_info *vxi)
++{
++	uint32_t now, last, delta;
++	unsigned int nr_running, nr_uninterruptible;
++	unsigned int total;
++	unsigned long flags;
++
++	spin_lock_irqsave(&vxi->cvirt.load_lock, flags);
++
++	now = jiffies;
++	last = vxi->cvirt.load_last;
++	delta = now - last;
++
++	if (delta < 5*HZ)
++		goto out;
++
++	nr_running = atomic_read(&vxi->cvirt.nr_running);
++	nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
++	total = nr_running + nr_uninterruptible;
++
++	vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
++		60*HZ, delta, total);
++	vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
++		5*60*HZ, delta, total);
++	vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
++		15*60*HZ, delta, total);
++
++	vxi->cvirt.load_last = now;
++out:
++	atomic_inc(&vxi->cvirt.load_updates);
++	spin_unlock_irqrestore(&vxi->cvirt.load_lock, flags);
++}
++
++
++/*
++ * Commands to do_syslog:
++ *
++ *      0 -- Close the log.  Currently a NOP.
++ *      1 -- Open the log. Currently a NOP.
++ *      2 -- Read from the log.
++ *      3 -- Read all messages remaining in the ring buffer.
++ *      4 -- Read and clear all messages remaining in the ring buffer
++ *      5 -- Clear ring buffer.
++ *      6 -- Disable printk's to console
++ *      7 -- Enable printk's to console
++ *      8 -- Set level of messages printed to console
++ *      9 -- Return number of unread characters in the log buffer
++ *     10 -- Return size of the log buffer
++ */
++int vx_do_syslog(int type, char __user *buf, int len)
++{
++	int error = 0;
++	int do_clear = 0;
++	struct vx_info *vxi = current_vx_info();
++	struct _vx_syslog *log;
++
++	if (!vxi)
++		return -EINVAL;
++	log = &vxi->cvirt.syslog;
++
++	switch (type) {
++	case 0:		/* Close log */
++	case 1:		/* Open log */
++		break;
++	case 2:		/* Read from log */
++		error = wait_event_interruptible(log->log_wait,
++			(log->log_start - log->log_end));
++		if (error)
++			break;
++		spin_lock_irq(&log->logbuf_lock);
++		spin_unlock_irq(&log->logbuf_lock);
++		break;
++	case 4:		/* Read/clear last kernel messages */
++		do_clear = 1;
++		/* fall through */
++	case 3:		/* Read last kernel messages */
++		return 0;
++
++	case 5:		/* Clear ring buffer */
++		return 0;
++
++	case 6:		/* Disable logging to console */
++	case 7:		/* Enable logging to console */
++	case 8:		/* Set level of messages printed to console */
++		break;
++
++	case 9:		/* Number of chars in the log buffer */
++		return 0;
++	case 10:	/* Size of the log buffer */
++		return 0;
++	default:
++		error = -EINVAL;
++		break;
++	}
++	return error;
++}
++
++
++/* virtual host info names */
++
++static char *vx_vhi_name(struct vx_info *vxi, int id)
++{
++	struct nsproxy *nsproxy;
++	struct uts_namespace *uts;
++
++	if (id == VHIN_CONTEXT)
++		return vxi->vx_name;
++
++	nsproxy = vxi->space[0].vx_nsproxy;
++	if (!nsproxy)
++		return NULL;
++
++	uts = nsproxy->uts_ns;
++	if (!uts)
++		return NULL;
++
++	switch (id) {
++	case VHIN_SYSNAME:
++		return uts->name.sysname;
++	case VHIN_NODENAME:
++		return uts->name.nodename;
++	case VHIN_RELEASE:
++		return uts->name.release;
++	case VHIN_VERSION:
++		return uts->name.version;
++	case VHIN_MACHINE:
++		return uts->name.machine;
++	case VHIN_DOMAINNAME:
++		return uts->name.domainname;
++	default:
++		return NULL;
++	}
++	return NULL;
++}
++
++int vc_set_vhi_name(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_vhi_name_v0 vc_data;
++	char *name;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	name = vx_vhi_name(vxi, vc_data.field);
++	if (!name)
++		return -EINVAL;
++
++	memcpy(name, vc_data.name, 65);
++	return 0;
++}
++
++int vc_get_vhi_name(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_vhi_name_v0 vc_data;
++	char *name;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	name = vx_vhi_name(vxi, vc_data.field);
++	if (!name)
++		return -EINVAL;
++
++	memcpy(vc_data.name, name, 65);
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++
++int vc_virt_stat(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_virt_stat_v0 vc_data;
++	struct _vx_cvirt *cvirt = &vxi->cvirt;
++	struct timespec uptime;
++
++	do_posix_clock_monotonic_gettime(&uptime);
++	set_normalized_timespec(&uptime,
++		uptime.tv_sec - cvirt->bias_uptime.tv_sec,
++		uptime.tv_nsec - cvirt->bias_uptime.tv_nsec);
++
++	vc_data.offset = timespec_to_ns(&cvirt->bias_ts);
++	vc_data.uptime = timespec_to_ns(&uptime);
++	vc_data.nr_threads = atomic_read(&cvirt->nr_threads);
++	vc_data.nr_running = atomic_read(&cvirt->nr_running);
++	vc_data.nr_uninterruptible = atomic_read(&cvirt->nr_uninterruptible);
++	vc_data.nr_onhold = atomic_read(&cvirt->nr_onhold);
++	vc_data.nr_forks = atomic_read(&cvirt->total_forks);
++	vc_data.load[0] = cvirt->load[0];
++	vc_data.load[1] = cvirt->load[1];
++	vc_data.load[2] = cvirt->load[2];
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++
++#ifdef CONFIG_VSERVER_VTIME
++
++/* virtualized time base */
++
++void vx_adjust_timespec(struct timespec *ts)
++{
++	struct vx_info *vxi;
++
++	if (!vx_flags(VXF_VIRT_TIME, 0))
++		return;
++
++	vxi = current_vx_info();
++	ts->tv_sec += vxi->cvirt.bias_ts.tv_sec;
++	ts->tv_nsec += vxi->cvirt.bias_ts.tv_nsec;
++
++	if (ts->tv_nsec >= NSEC_PER_SEC) {
++		ts->tv_sec++;
++		ts->tv_nsec -= NSEC_PER_SEC;
++	} else if (ts->tv_nsec < 0) {
++		ts->tv_sec--;
++		ts->tv_nsec += NSEC_PER_SEC;
++	}
++}
++
++int vx_settimeofday(const struct timespec *ts)
++{
++	struct timespec ats, delta;
++	struct vx_info *vxi;
++
++	if (!vx_flags(VXF_VIRT_TIME, 0))
++		return do_settimeofday(ts);
++
++	getnstimeofday(&ats);
++	delta = timespec_sub(*ts, ats);
++
++	vxi = current_vx_info();
++	vxi->cvirt.bias_ts = timespec_add(vxi->cvirt.bias_ts, delta);
++	return 0;
++}
++
++#endif
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/cvirt_init.h linux-3.2.34-vs2.3.2.15/kernel/vserver/cvirt_init.h
+--- linux-3.2.34/kernel/vserver/cvirt_init.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/cvirt_init.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,70 @@
++
++
++extern uint64_t vx_idle_jiffies(void);
++
++static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
++{
++	uint64_t idle_jiffies = vx_idle_jiffies();
++	uint64_t nsuptime;
++
++	do_posix_clock_monotonic_gettime(&cvirt->bias_uptime);
++	nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
++		* NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
++	cvirt->bias_clock = nsec_to_clock_t(nsuptime);
++	cvirt->bias_ts.tv_sec = 0;
++	cvirt->bias_ts.tv_nsec = 0;
++
++	jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
++	atomic_set(&cvirt->nr_threads, 0);
++	atomic_set(&cvirt->nr_running, 0);
++	atomic_set(&cvirt->nr_uninterruptible, 0);
++	atomic_set(&cvirt->nr_onhold, 0);
++
++	spin_lock_init(&cvirt->load_lock);
++	cvirt->load_last = jiffies;
++	atomic_set(&cvirt->load_updates, 0);
++	cvirt->load[0] = 0;
++	cvirt->load[1] = 0;
++	cvirt->load[2] = 0;
++	atomic_set(&cvirt->total_forks, 0);
++
++	spin_lock_init(&cvirt->syslog.logbuf_lock);
++	init_waitqueue_head(&cvirt->syslog.log_wait);
++	cvirt->syslog.log_start = 0;
++	cvirt->syslog.log_end = 0;
++	cvirt->syslog.con_start = 0;
++	cvirt->syslog.logged_chars = 0;
++}
++
++static inline
++void vx_info_init_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
++{
++	// cvirt_pc->cpustat = { 0 };
++}
++
++static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
++{
++#ifdef	CONFIG_VSERVER_WARN
++	int value;
++#endif
++	vxwprintk_xid((value = atomic_read(&cvirt->nr_threads)),
++		"!!! cvirt: %p[nr_threads] = %d on exit.",
++		cvirt, value);
++	vxwprintk_xid((value = atomic_read(&cvirt->nr_running)),
++		"!!! cvirt: %p[nr_running] = %d on exit.",
++		cvirt, value);
++	vxwprintk_xid((value = atomic_read(&cvirt->nr_uninterruptible)),
++		"!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
++		cvirt, value);
++	vxwprintk_xid((value = atomic_read(&cvirt->nr_onhold)),
++		"!!! cvirt: %p[nr_onhold] = %d on exit.",
++		cvirt, value);
++	return;
++}
++
++static inline
++void vx_info_exit_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
++{
++	return;
++}
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/cvirt_proc.h linux-3.2.34-vs2.3.2.15/kernel/vserver/cvirt_proc.h
+--- linux-3.2.34/kernel/vserver/cvirt_proc.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/cvirt_proc.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,135 @@
++#ifndef _VX_CVIRT_PROC_H
++#define _VX_CVIRT_PROC_H
++
++#include <linux/nsproxy.h>
++#include <linux/mnt_namespace.h>
++#include <linux/ipc_namespace.h>
++#include <linux/utsname.h>
++#include <linux/ipc.h>
++
++
++static inline
++int vx_info_proc_nsproxy(struct nsproxy *nsproxy, char *buffer)
++{
++	struct mnt_namespace *ns;
++	struct uts_namespace *uts;
++	struct ipc_namespace *ipc;
++	struct path path;
++	char *pstr, *root;
++	int length = 0;
++
++	if (!nsproxy)
++		goto out;
++
++	length += sprintf(buffer + length,
++		"NSProxy:\t%p [%p,%p,%p]\n",
++		nsproxy, nsproxy->mnt_ns,
++		nsproxy->uts_ns, nsproxy->ipc_ns);
++
++	ns = nsproxy->mnt_ns;
++	if (!ns)
++		goto skip_ns;
++
++	pstr = kmalloc(PATH_MAX, GFP_KERNEL);
++	if (!pstr)
++		goto skip_ns;
++
++	path.mnt = ns->root;
++	path.dentry = ns->root->mnt_root;
++	root = d_path(&path, pstr, PATH_MAX - 2);
++	length += sprintf(buffer + length,
++		"Namespace:\t%p [#%u]\n"
++		"RootPath:\t%s\n",
++		ns, atomic_read(&ns->count),
++		root);
++	kfree(pstr);
++skip_ns:
++
++	uts = nsproxy->uts_ns;
++	if (!uts)
++		goto skip_uts;
++
++	length += sprintf(buffer + length,
++		"SysName:\t%.*s\n"
++		"NodeName:\t%.*s\n"
++		"Release:\t%.*s\n"
++		"Version:\t%.*s\n"
++		"Machine:\t%.*s\n"
++		"DomainName:\t%.*s\n",
++		__NEW_UTS_LEN, uts->name.sysname,
++		__NEW_UTS_LEN, uts->name.nodename,
++		__NEW_UTS_LEN, uts->name.release,
++		__NEW_UTS_LEN, uts->name.version,
++		__NEW_UTS_LEN, uts->name.machine,
++		__NEW_UTS_LEN, uts->name.domainname);
++skip_uts:
++
++	ipc = nsproxy->ipc_ns;
++	if (!ipc)
++		goto skip_ipc;
++
++	length += sprintf(buffer + length,
++		"SEMS:\t\t%d %d %d %d  %d\n"
++		"MSG:\t\t%d %d %d\n"
++		"SHM:\t\t%lu %lu  %d %d\n",
++		ipc->sem_ctls[0], ipc->sem_ctls[1],
++		ipc->sem_ctls[2], ipc->sem_ctls[3],
++		ipc->used_sems,
++		ipc->msg_ctlmax, ipc->msg_ctlmnb, ipc->msg_ctlmni,
++		(unsigned long)ipc->shm_ctlmax,
++		(unsigned long)ipc->shm_ctlall,
++		ipc->shm_ctlmni, ipc->shm_tot);
++skip_ipc:
++out:
++	return length;
++}
++
++
++#include <linux/sched.h>
++
++#define LOAD_INT(x) ((x) >> FSHIFT)
++#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1 - 1)) * 100)
++
++static inline
++int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
++{
++	int length = 0;
++	int a, b, c;
++
++	length += sprintf(buffer + length,
++		"BiasUptime:\t%lu.%02lu\n",
++		(unsigned long)cvirt->bias_uptime.tv_sec,
++		(cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
++
++	a = cvirt->load[0] + (FIXED_1 / 200);
++	b = cvirt->load[1] + (FIXED_1 / 200);
++	c = cvirt->load[2] + (FIXED_1 / 200);
++	length += sprintf(buffer + length,
++		"nr_threads:\t%d\n"
++		"nr_running:\t%d\n"
++		"nr_unintr:\t%d\n"
++		"nr_onhold:\t%d\n"
++		"load_updates:\t%d\n"
++		"loadavg:\t%d.%02d %d.%02d %d.%02d\n"
++		"total_forks:\t%d\n",
++		atomic_read(&cvirt->nr_threads),
++		atomic_read(&cvirt->nr_running),
++		atomic_read(&cvirt->nr_uninterruptible),
++		atomic_read(&cvirt->nr_onhold),
++		atomic_read(&cvirt->load_updates),
++		LOAD_INT(a), LOAD_FRAC(a),
++		LOAD_INT(b), LOAD_FRAC(b),
++		LOAD_INT(c), LOAD_FRAC(c),
++		atomic_read(&cvirt->total_forks));
++	return length;
++}
++
++static inline
++int vx_info_proc_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc,
++	char *buffer, int cpu)
++{
++	int length = 0;
++	return length;
++}
++
++#endif	/* _VX_CVIRT_PROC_H */
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/debug.c linux-3.2.34-vs2.3.2.15/kernel/vserver/debug.c
+--- linux-3.2.34/kernel/vserver/debug.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/debug.c	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,32 @@
++/*
++ *  kernel/vserver/debug.c
++ *
++ *  Copyright (C) 2005-2007 Herbert P�tzl
++ *
++ *  V0.01  vx_info dump support
++ *
++ */
++
++#include <linux/module.h>
++
++#include <linux/vserver/context.h>
++
++
++void	dump_vx_info(struct vx_info *vxi, int level)
++{
++	printk("vx_info %p[#%d, %d.%d, %4x]\n", vxi, vxi->vx_id,
++		atomic_read(&vxi->vx_usecnt),
++		atomic_read(&vxi->vx_tasks),
++		vxi->vx_state);
++	if (level > 0) {
++		__dump_vx_limit(&vxi->limit);
++		__dump_vx_sched(&vxi->sched);
++		__dump_vx_cvirt(&vxi->cvirt);
++		__dump_vx_cacct(&vxi->cacct);
++	}
++	printk("---\n");
++}
++
++
++EXPORT_SYMBOL_GPL(dump_vx_info);
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/device.c linux-3.2.34-vs2.3.2.15/kernel/vserver/device.c
+--- linux-3.2.34/kernel/vserver/device.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/device.c	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,443 @@
++/*
++ *  linux/kernel/vserver/device.c
++ *
++ *  Linux-VServer: Device Support
++ *
++ *  Copyright (C) 2006  Herbert P�tzl
++ *  Copyright (C) 2007  Daniel Hokka Zakrisson
++ *
++ *  V0.01  device mapping basics
++ *  V0.02  added defaults
++ *
++ */
++
++#include <linux/slab.h>
++#include <linux/rcupdate.h>
++#include <linux/fs.h>
++#include <linux/namei.h>
++#include <linux/hash.h>
++
++#include <asm/errno.h>
++#include <asm/uaccess.h>
++#include <linux/vserver/base.h>
++#include <linux/vserver/debug.h>
++#include <linux/vserver/context.h>
++#include <linux/vserver/device.h>
++#include <linux/vserver/device_cmd.h>
++
++
++#define DMAP_HASH_BITS	4
++
++
++struct vs_mapping {
++	union {
++		struct hlist_node hlist;
++		struct list_head list;
++	} u;
++#define dm_hlist	u.hlist
++#define dm_list		u.list
++	xid_t xid;
++	dev_t device;
++	struct vx_dmap_target target;
++};
++
++
++static struct hlist_head dmap_main_hash[1 << DMAP_HASH_BITS];
++
++static DEFINE_SPINLOCK(dmap_main_hash_lock);
++
++static struct vx_dmap_target dmap_defaults[2] = {
++	{ .flags = DATTR_OPEN },
++	{ .flags = DATTR_OPEN },
++};
++
++
++struct kmem_cache *dmap_cachep __read_mostly;
++
++int __init dmap_cache_init(void)
++{
++	dmap_cachep = kmem_cache_create("dmap_cache",
++		sizeof(struct vs_mapping), 0,
++		SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
++	return 0;
++}
++
++__initcall(dmap_cache_init);
++
++
++static inline unsigned int __hashval(dev_t dev, int bits)
++{
++	return hash_long((unsigned long)dev, bits);
++}
++
++
++/*	__hash_mapping()
++ *	add the mapping to the hash table
++ */
++static inline void __hash_mapping(struct vx_info *vxi, struct vs_mapping *vdm)
++{
++	spinlock_t *hash_lock = &dmap_main_hash_lock;
++	struct hlist_head *head, *hash = dmap_main_hash;
++	int device = vdm->device;
++
++	spin_lock(hash_lock);
++	vxdprintk(VXD_CBIT(misc, 8), "__hash_mapping: %p[#%d] %08x:%08x",
++		vxi, vxi ? vxi->vx_id : 0, device, vdm->target.target);
++
++	head = &hash[__hashval(device, DMAP_HASH_BITS)];
++	hlist_add_head(&vdm->dm_hlist, head);
++	spin_unlock(hash_lock);
++}
++
++
++static inline int __mode_to_default(umode_t mode)
++{
++	switch (mode) {
++	case S_IFBLK:
++		return 0;
++	case S_IFCHR:
++		return 1;
++	default:
++		BUG();
++	}
++}
++
++
++/*	__set_default()
++ *	set a default
++ */
++static inline void __set_default(struct vx_info *vxi, umode_t mode,
++	struct vx_dmap_target *vdmt)
++{
++	spinlock_t *hash_lock = &dmap_main_hash_lock;
++	spin_lock(hash_lock);
++
++	if (vxi)
++		vxi->dmap.targets[__mode_to_default(mode)] = *vdmt;
++	else
++		dmap_defaults[__mode_to_default(mode)] = *vdmt;
++
++
++	spin_unlock(hash_lock);
++
++	vxdprintk(VXD_CBIT(misc, 8), "__set_default: %p[#%u] %08x %04x",
++		  vxi, vxi ? vxi->vx_id : 0, vdmt->target, vdmt->flags);
++}
++
++
++/*	__remove_default()
++ *	remove a default
++ */
++static inline int __remove_default(struct vx_info *vxi, umode_t mode)
++{
++	spinlock_t *hash_lock = &dmap_main_hash_lock;
++	spin_lock(hash_lock);
++
++	if (vxi)
++		vxi->dmap.targets[__mode_to_default(mode)].flags = 0;
++	else	/* remove == reset */
++		dmap_defaults[__mode_to_default(mode)].flags = DATTR_OPEN | mode;
++
++	spin_unlock(hash_lock);
++	return 0;
++}
++
++
++/*	__find_mapping()
++ *	find a mapping in the hash table
++ *
++ *	caller must hold hash_lock
++ */
++static inline int __find_mapping(xid_t xid, dev_t device, umode_t mode,
++	struct vs_mapping **local, struct vs_mapping **global)
++{
++	struct hlist_head *hash = dmap_main_hash;
++	struct hlist_head *head = &hash[__hashval(device, DMAP_HASH_BITS)];
++	struct hlist_node *pos;
++	struct vs_mapping *vdm;
++
++	*local = NULL;
++	if (global)
++		*global = NULL;
++
++	hlist_for_each(pos, head) {
++		vdm = hlist_entry(pos, struct vs_mapping, dm_hlist);
++
++		if ((vdm->device == device) &&
++			!((vdm->target.flags ^ mode) & S_IFMT)) {
++			if (vdm->xid == xid) {
++				*local = vdm;
++				return 1;
++			} else if (global && vdm->xid == 0)
++				*global = vdm;
++		}
++	}
++
++	if (global && *global)
++		return 0;
++	else
++		return -ENOENT;
++}
++
++
++/*	__lookup_mapping()
++ *	find a mapping and store the result in target and flags
++ */
++static inline int __lookup_mapping(struct vx_info *vxi,
++	dev_t device, dev_t *target, int *flags, umode_t mode)
++{
++	spinlock_t *hash_lock = &dmap_main_hash_lock;
++	struct vs_mapping *vdm, *global;
++	struct vx_dmap_target *vdmt;
++	int ret = 0;
++	xid_t xid = vxi->vx_id;
++	int index;
++
++	spin_lock(hash_lock);
++	if (__find_mapping(xid, device, mode, &vdm, &global) > 0) {
++		ret = 1;
++		vdmt = &vdm->target;
++		goto found;
++	}
++
++	index = __mode_to_default(mode);
++	if (vxi && vxi->dmap.targets[index].flags) {
++		ret = 2;
++		vdmt = &vxi->dmap.targets[index];
++	} else if (global) {
++		ret = 3;
++		vdmt = &global->target;
++		goto found;
++	} else {
++		ret = 4;
++		vdmt = &dmap_defaults[index];
++	}
++
++found:
++	if (target && (vdmt->flags & DATTR_REMAP))
++		*target = vdmt->target;
++	else if (target)
++		*target = device;
++	if (flags)
++		*flags = vdmt->flags;
++
++	spin_unlock(hash_lock);
++
++	return ret;
++}
++
++
++/*	__remove_mapping()
++ *	remove a mapping from the hash table
++ */
++static inline int __remove_mapping(struct vx_info *vxi, dev_t device,
++	umode_t mode)
++{
++	spinlock_t *hash_lock = &dmap_main_hash_lock;
++	struct vs_mapping *vdm = NULL;
++	int ret = 0;
++
++	spin_lock(hash_lock);
++
++	ret = __find_mapping((vxi ? vxi->vx_id : 0), device, mode, &vdm,
++		NULL);
++	vxdprintk(VXD_CBIT(misc, 8), "__remove_mapping: %p[#%d] %08x %04x",
++		vxi, vxi ? vxi->vx_id : 0, device, mode);
++	if (ret < 0)
++		goto out;
++	hlist_del(&vdm->dm_hlist);
++
++out:
++	spin_unlock(hash_lock);
++	if (vdm)
++		kmem_cache_free(dmap_cachep, vdm);
++	return ret;
++}
++
++
++
++int vs_map_device(struct vx_info *vxi,
++	dev_t device, dev_t *target, umode_t mode)
++{
++	int ret, flags = DATTR_MASK;
++
++	if (!vxi) {
++		if (target)
++			*target = device;
++		goto out;
++	}
++	ret = __lookup_mapping(vxi, device, target, &flags, mode);
++	vxdprintk(VXD_CBIT(misc, 8), "vs_map_device: %08x target: %08x flags: %04x mode: %04x mapped=%d",
++		device, target ? *target : 0, flags, mode, ret);
++out:
++	return (flags & DATTR_MASK);
++}
++
++
++
++static int do_set_mapping(struct vx_info *vxi,
++	dev_t device, dev_t target, int flags, umode_t mode)
++{
++	if (device) {
++		struct vs_mapping *new;
++
++		new = kmem_cache_alloc(dmap_cachep, GFP_KERNEL);
++		if (!new)
++			return -ENOMEM;
++
++		INIT_HLIST_NODE(&new->dm_hlist);
++		new->device = device;
++		new->target.target = target;
++		new->target.flags = flags | mode;
++		new->xid = (vxi ? vxi->vx_id : 0);
++
++		vxdprintk(VXD_CBIT(misc, 8), "do_set_mapping: %08x target: %08x flags: %04x", device, target, flags);
++		__hash_mapping(vxi, new);
++	} else {
++		struct vx_dmap_target new = {
++			.target = target,
++			.flags = flags | mode,
++		};
++		__set_default(vxi, mode, &new);
++	}
++	return 0;
++}
++
++
++static int do_unset_mapping(struct vx_info *vxi,
++	dev_t device, dev_t target, int flags, umode_t mode)
++{
++	int ret = -EINVAL;
++
++	if (device) {
++		ret = __remove_mapping(vxi, device, mode);
++		if (ret < 0)
++			goto out;
++	} else {
++		ret = __remove_default(vxi, mode);
++		if (ret < 0)
++			goto out;
++	}
++
++out:
++	return ret;
++}
++
++
++static inline int __user_device(const char __user *name, dev_t *dev,
++	umode_t *mode)
++{
++	struct nameidata nd;
++	int ret;
++
++	if (!name) {
++		*dev = 0;
++		return 0;
++	}
++	ret = user_lpath(name, &nd.path);
++	if (ret)
++		return ret;
++	if (nd.path.dentry->d_inode) {
++		*dev = nd.path.dentry->d_inode->i_rdev;
++		*mode = nd.path.dentry->d_inode->i_mode;
++	}
++	path_put(&nd.path);
++	return 0;
++}
++
++static inline int __mapping_mode(dev_t device, dev_t target,
++	umode_t device_mode, umode_t target_mode, umode_t *mode)
++{
++	if (device)
++		*mode = device_mode & S_IFMT;
++	else if (target)
++		*mode = target_mode & S_IFMT;
++	else
++		return -EINVAL;
++
++	/* if both given, device and target mode have to match */
++	if (device && target &&
++		((device_mode ^ target_mode) & S_IFMT))
++		return -EINVAL;
++	return 0;
++}
++
++
++static inline int do_mapping(struct vx_info *vxi, const char __user *device_path,
++	const char __user *target_path, int flags, int set)
++{
++	dev_t device = ~0, target = ~0;
++	umode_t device_mode = 0, target_mode = 0, mode;
++	int ret;
++
++	ret = __user_device(device_path, &device, &device_mode);
++	if (ret)
++		return ret;
++	ret = __user_device(target_path, &target, &target_mode);
++	if (ret)
++		return ret;
++
++	ret = __mapping_mode(device, target,
++		device_mode, target_mode, &mode);
++	if (ret)
++		return ret;
++
++	if (set)
++		return do_set_mapping(vxi, device, target,
++			flags, mode);
++	else
++		return do_unset_mapping(vxi, device, target,
++			flags, mode);
++}
++
++
++int vc_set_mapping(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_set_mapping_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_mapping(vxi, vc_data.device, vc_data.target,
++		vc_data.flags, 1);
++}
++
++int vc_unset_mapping(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_set_mapping_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_mapping(vxi, vc_data.device, vc_data.target,
++		vc_data.flags, 0);
++}
++
++
++#ifdef	CONFIG_COMPAT
++
++int vc_set_mapping_x32(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_set_mapping_v0_x32 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
++		compat_ptr(vc_data.target_ptr), vc_data.flags, 1);
++}
++
++int vc_unset_mapping_x32(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_set_mapping_v0_x32 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
++		compat_ptr(vc_data.target_ptr), vc_data.flags, 0);
++}
++
++#endif	/* CONFIG_COMPAT */
++
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/dlimit.c linux-3.2.34-vs2.3.2.15/kernel/vserver/dlimit.c
+--- linux-3.2.34/kernel/vserver/dlimit.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/dlimit.c	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,531 @@
++/*
++ *  linux/kernel/vserver/dlimit.c
++ *
++ *  Virtual Server: Context Disk Limits
++ *
++ *  Copyright (C) 2004-2009  Herbert P�tzl
++ *
++ *  V0.01  initial version
++ *  V0.02  compat32 splitup
++ *  V0.03  extended interface
++ *
++ */
++
++#include <linux/statfs.h>
++#include <linux/sched.h>
++#include <linux/namei.h>
++#include <linux/vs_tag.h>
++#include <linux/vs_dlimit.h>
++#include <linux/vserver/dlimit_cmd.h>
++#include <linux/slab.h>
++// #include <linux/gfp.h>
++
++#include <asm/uaccess.h>
++
++/*	__alloc_dl_info()
++
++	* allocate an initialized dl_info struct
++	* doesn't make it visible (hash)			*/
++
++static struct dl_info *__alloc_dl_info(struct super_block *sb, tag_t tag)
++{
++	struct dl_info *new = NULL;
++
++	vxdprintk(VXD_CBIT(dlim, 5),
++		"alloc_dl_info(%p,%d)*", sb, tag);
++
++	/* would this benefit from a slab cache? */
++	new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
++	if (!new)
++		return 0;
++
++	memset(new, 0, sizeof(struct dl_info));
++	new->dl_tag = tag;
++	new->dl_sb = sb;
++	// INIT_RCU_HEAD(&new->dl_rcu);
++	INIT_HLIST_NODE(&new->dl_hlist);
++	spin_lock_init(&new->dl_lock);
++	atomic_set(&new->dl_refcnt, 0);
++	atomic_set(&new->dl_usecnt, 0);
++
++	/* rest of init goes here */
++
++	vxdprintk(VXD_CBIT(dlim, 4),
++		"alloc_dl_info(%p,%d) = %p", sb, tag, new);
++	return new;
++}
++
++/*	__dealloc_dl_info()
++
++	* final disposal of dl_info				*/
++
++static void __dealloc_dl_info(struct dl_info *dli)
++{
++	vxdprintk(VXD_CBIT(dlim, 4),
++		"dealloc_dl_info(%p)", dli);
++
++	dli->dl_hlist.next = LIST_POISON1;
++	dli->dl_tag = -1;
++	dli->dl_sb = 0;
++
++	BUG_ON(atomic_read(&dli->dl_usecnt));
++	BUG_ON(atomic_read(&dli->dl_refcnt));
++
++	kfree(dli);
++}
++
++
++/*	hash table for dl_info hash */
++
++#define DL_HASH_SIZE	13
++
++struct hlist_head dl_info_hash[DL_HASH_SIZE];
++
++static DEFINE_SPINLOCK(dl_info_hash_lock);
++
++
++static inline unsigned int __hashval(struct super_block *sb, tag_t tag)
++{
++	return ((tag ^ (unsigned long)sb) % DL_HASH_SIZE);
++}
++
++
++
++/*	__hash_dl_info()
++
++	* add the dli to the global hash table
++	* requires the hash_lock to be held			*/
++
++static inline void __hash_dl_info(struct dl_info *dli)
++{
++	struct hlist_head *head;
++
++	vxdprintk(VXD_CBIT(dlim, 6),
++		"__hash_dl_info: %p[#%d]", dli, dli->dl_tag);
++	get_dl_info(dli);
++	head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_tag)];
++	hlist_add_head_rcu(&dli->dl_hlist, head);
++}
++
++/*	__unhash_dl_info()
++
++	* remove the dli from the global hash table
++	* requires the hash_lock to be held			*/
++
++static inline void __unhash_dl_info(struct dl_info *dli)
++{
++	vxdprintk(VXD_CBIT(dlim, 6),
++		"__unhash_dl_info: %p[#%d]", dli, dli->dl_tag);
++	hlist_del_rcu(&dli->dl_hlist);
++	put_dl_info(dli);
++}
++
++
++/*	__lookup_dl_info()
++
++	* requires the rcu_read_lock()
++	* doesn't increment the dl_refcnt			*/
++
++static inline struct dl_info *__lookup_dl_info(struct super_block *sb, tag_t tag)
++{
++	struct hlist_head *head = &dl_info_hash[__hashval(sb, tag)];
++	struct hlist_node *pos;
++	struct dl_info *dli;
++
++	hlist_for_each_entry_rcu(dli, pos, head, dl_hlist) {
++
++		if (dli->dl_tag == tag && dli->dl_sb == sb) {
++			return dli;
++		}
++	}
++	return NULL;
++}
++
++
++struct dl_info *locate_dl_info(struct super_block *sb, tag_t tag)
++{
++	struct dl_info *dli;
++
++	rcu_read_lock();
++	dli = get_dl_info(__lookup_dl_info(sb, tag));
++	vxdprintk(VXD_CBIT(dlim, 7),
++		"locate_dl_info(%p,#%d) = %p", sb, tag, dli);
++	rcu_read_unlock();
++	return dli;
++}
++
++void rcu_free_dl_info(struct rcu_head *head)
++{
++	struct dl_info *dli = container_of(head, struct dl_info, dl_rcu);
++	int usecnt, refcnt;
++
++	BUG_ON(!dli || !head);
++
++	usecnt = atomic_read(&dli->dl_usecnt);
++	BUG_ON(usecnt < 0);
++
++	refcnt = atomic_read(&dli->dl_refcnt);
++	BUG_ON(refcnt < 0);
++
++	vxdprintk(VXD_CBIT(dlim, 3),
++		"rcu_free_dl_info(%p)", dli);
++	if (!usecnt)
++		__dealloc_dl_info(dli);
++	else
++		printk("!!! rcu didn't free\n");
++}
++
++
++
++
++static int do_addrem_dlimit(uint32_t id, const char __user *name,
++	uint32_t flags, int add)
++{
++	struct path path;
++	int ret;
++
++	ret = user_lpath(name, &path);
++	if (!ret) {
++		struct super_block *sb;
++		struct dl_info *dli;
++
++		ret = -EINVAL;
++		if (!path.dentry->d_inode)
++			goto out_release;
++		if (!(sb = path.dentry->d_inode->i_sb))
++			goto out_release;
++
++		if (add) {
++			dli = __alloc_dl_info(sb, id);
++			spin_lock(&dl_info_hash_lock);
++
++			ret = -EEXIST;
++			if (__lookup_dl_info(sb, id))
++				goto out_unlock;
++			__hash_dl_info(dli);
++			dli = NULL;
++		} else {
++			spin_lock(&dl_info_hash_lock);
++			dli = __lookup_dl_info(sb, id);
++
++			ret = -ESRCH;
++			if (!dli)
++				goto out_unlock;
++			__unhash_dl_info(dli);
++		}
++		ret = 0;
++	out_unlock:
++		spin_unlock(&dl_info_hash_lock);
++		if (add && dli)
++			__dealloc_dl_info(dli);
++	out_release:
++		path_put(&path);
++	}
++	return ret;
++}
++
++int vc_add_dlimit(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_base_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1);
++}
++
++int vc_rem_dlimit(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_base_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0);
++}
++
++#ifdef	CONFIG_COMPAT
++
++int vc_add_dlimit_x32(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_addrem_dlimit(id,
++		compat_ptr(vc_data.name_ptr), vc_data.flags, 1);
++}
++
++int vc_rem_dlimit_x32(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_addrem_dlimit(id,
++		compat_ptr(vc_data.name_ptr), vc_data.flags, 0);
++}
++
++#endif	/* CONFIG_COMPAT */
++
++
++static inline
++int do_set_dlimit(uint32_t id, const char __user *name,
++	uint32_t space_used, uint32_t space_total,
++	uint32_t inodes_used, uint32_t inodes_total,
++	uint32_t reserved, uint32_t flags)
++{
++	struct path path;
++	int ret;
++
++	ret = user_lpath(name, &path);
++	if (!ret) {
++		struct super_block *sb;
++		struct dl_info *dli;
++
++		ret = -EINVAL;
++		if (!path.dentry->d_inode)
++			goto out_release;
++		if (!(sb = path.dentry->d_inode->i_sb))
++			goto out_release;
++
++		/* sanity checks */
++		if ((reserved != CDLIM_KEEP &&
++			reserved > 100) ||
++			(inodes_used != CDLIM_KEEP &&
++			inodes_used > inodes_total) ||
++			(space_used != CDLIM_KEEP &&
++			space_used > space_total))
++			goto out_release;
++
++		ret = -ESRCH;
++		dli = locate_dl_info(sb, id);
++		if (!dli)
++			goto out_release;
++
++		spin_lock(&dli->dl_lock);
++
++		if (inodes_used != CDLIM_KEEP)
++			dli->dl_inodes_used = inodes_used;
++		if (inodes_total != CDLIM_KEEP)
++			dli->dl_inodes_total = inodes_total;
++		if (space_used != CDLIM_KEEP)
++			dli->dl_space_used = dlimit_space_32to64(
++				space_used, flags, DLIMS_USED);
++
++		if (space_total == CDLIM_INFINITY)
++			dli->dl_space_total = DLIM_INFINITY;
++		else if (space_total != CDLIM_KEEP)
++			dli->dl_space_total = dlimit_space_32to64(
++				space_total, flags, DLIMS_TOTAL);
++
++		if (reserved != CDLIM_KEEP)
++			dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100;
++
++		spin_unlock(&dli->dl_lock);
++
++		put_dl_info(dli);
++		ret = 0;
++
++	out_release:
++		path_put(&path);
++	}
++	return ret;
++}
++
++int vc_set_dlimit(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_dlimit(id, vc_data.name,
++		vc_data.space_used, vc_data.space_total,
++		vc_data.inodes_used, vc_data.inodes_total,
++		vc_data.reserved, vc_data.flags);
++}
++
++#ifdef	CONFIG_COMPAT
++
++int vc_set_dlimit_x32(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_v0_x32 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_dlimit(id, compat_ptr(vc_data.name_ptr),
++		vc_data.space_used, vc_data.space_total,
++		vc_data.inodes_used, vc_data.inodes_total,
++		vc_data.reserved, vc_data.flags);
++}
++
++#endif	/* CONFIG_COMPAT */
++
++
++static inline
++int do_get_dlimit(uint32_t id, const char __user *name,
++	uint32_t *space_used, uint32_t *space_total,
++	uint32_t *inodes_used, uint32_t *inodes_total,
++	uint32_t *reserved, uint32_t *flags)
++{
++	struct path path;
++	int ret;
++
++	ret = user_lpath(name, &path);
++	if (!ret) {
++		struct super_block *sb;
++		struct dl_info *dli;
++
++		ret = -EINVAL;
++		if (!path.dentry->d_inode)
++			goto out_release;
++		if (!(sb = path.dentry->d_inode->i_sb))
++			goto out_release;
++
++		ret = -ESRCH;
++		dli = locate_dl_info(sb, id);
++		if (!dli)
++			goto out_release;
++
++		spin_lock(&dli->dl_lock);
++		*inodes_used = dli->dl_inodes_used;
++		*inodes_total = dli->dl_inodes_total;
++
++		*space_used = dlimit_space_64to32(
++			dli->dl_space_used, flags, DLIMS_USED);
++
++		if (dli->dl_space_total == DLIM_INFINITY)
++			*space_total = CDLIM_INFINITY;
++		else
++			*space_total = dlimit_space_64to32(
++				dli->dl_space_total, flags, DLIMS_TOTAL);
++
++		*reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
++		spin_unlock(&dli->dl_lock);
++
++		put_dl_info(dli);
++		ret = -EFAULT;
++
++		ret = 0;
++	out_release:
++		path_put(&path);
++	}
++	return ret;
++}
++
++
++int vc_get_dlimit(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_v0 vc_data;
++	int ret;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = do_get_dlimit(id, vc_data.name,
++		&vc_data.space_used, &vc_data.space_total,
++		&vc_data.inodes_used, &vc_data.inodes_total,
++		&vc_data.reserved, &vc_data.flags);
++	if (ret)
++		return ret;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++#ifdef	CONFIG_COMPAT
++
++int vc_get_dlimit_x32(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_v0_x32 vc_data;
++	int ret;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr),
++		&vc_data.space_used, &vc_data.space_total,
++		&vc_data.inodes_used, &vc_data.inodes_total,
++		&vc_data.reserved, &vc_data.flags);
++	if (ret)
++		return ret;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++#endif	/* CONFIG_COMPAT */
++
++
++void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
++{
++	struct dl_info *dli;
++	__u64 blimit, bfree, bavail;
++	__u32 ifree;
++
++	dli = locate_dl_info(sb, dx_current_tag());
++	if (!dli)
++		return;
++
++	spin_lock(&dli->dl_lock);
++	if (dli->dl_inodes_total == (unsigned long)DLIM_INFINITY)
++		goto no_ilim;
++
++	/* reduce max inodes available to limit */
++	if (buf->f_files > dli->dl_inodes_total)
++		buf->f_files = dli->dl_inodes_total;
++
++	ifree = dli->dl_inodes_total - dli->dl_inodes_used;
++	/* reduce free inodes to min */
++	if (ifree < buf->f_ffree)
++		buf->f_ffree = ifree;
++
++no_ilim:
++	if (dli->dl_space_total == DLIM_INFINITY)
++		goto no_blim;
++
++	blimit = dli->dl_space_total >> sb->s_blocksize_bits;
++
++	if (dli->dl_space_total < dli->dl_space_used)
++		bfree = 0;
++	else
++		bfree = (dli->dl_space_total - dli->dl_space_used)
++			>> sb->s_blocksize_bits;
++
++	bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
++	if (bavail < dli->dl_space_used)
++		bavail = 0;
++	else
++		bavail = (bavail - dli->dl_space_used)
++			>> sb->s_blocksize_bits;
++
++	/* reduce max space available to limit */
++	if (buf->f_blocks > blimit)
++		buf->f_blocks = blimit;
++
++	/* reduce free space to min */
++	if (bfree < buf->f_bfree)
++		buf->f_bfree = bfree;
++
++	/* reduce avail space to min */
++	if (bavail < buf->f_bavail)
++		buf->f_bavail = bavail;
++
++no_blim:
++	spin_unlock(&dli->dl_lock);
++	put_dl_info(dli);
++
++	return;
++}
++
++#include <linux/module.h>
++
++EXPORT_SYMBOL_GPL(locate_dl_info);
++EXPORT_SYMBOL_GPL(rcu_free_dl_info);
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/helper.c linux-3.2.34-vs2.3.2.15/kernel/vserver/helper.c
+--- linux-3.2.34/kernel/vserver/helper.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/helper.c	2012-09-16 18:26:07.000000000 +0200
+@@ -0,0 +1,229 @@
++/*
++ *  linux/kernel/vserver/helper.c
++ *
++ *  Virtual Context Support
++ *
++ *  Copyright (C) 2004-2007  Herbert P�tzl
++ *
++ *  V0.01  basic helper
++ *
++ */
++
++#include <linux/kmod.h>
++#include <linux/reboot.h>
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
++#include <linux/vserver/signal.h>
++
++
++char vshelper_path[255] = "/sbin/vshelper";
++
++static int vshelper_init(struct subprocess_info *info, struct cred *new_cred)
++{
++	current->flags &= ~PF_THREAD_BOUND;
++	return 0;
++}
++
++static int do_vshelper(char *name, char *argv[], char *envp[], int sync)
++{
++	int ret;
++
++	if ((ret = call_usermodehelper_fns(name, argv, envp,
++		sync ? UMH_WAIT_PROC : UMH_WAIT_EXEC,
++		vshelper_init, NULL, NULL))) {
++		printk(KERN_WARNING "%s: (%s %s) returned %s with %d\n",
++			name, argv[1], argv[2],
++			sync ? "sync" : "async", ret);
++	}
++	vxdprintk(VXD_CBIT(switch, 4),
++		"%s: (%s %s) returned %s with %d",
++		name, argv[1], argv[2], sync ? "sync" : "async", ret);
++	return ret;
++}
++
++/*
++ *      vshelper path is set via /proc/sys
++ *      invoked by vserver sys_reboot(), with
++ *      the following arguments
++ *
++ *      argv [0] = vshelper_path;
++ *      argv [1] = action: "restart", "halt", "poweroff", ...
++ *      argv [2] = context identifier
++ *
++ *      envp [*] = type-specific parameters
++ */
++
++long vs_reboot_helper(struct vx_info *vxi, int cmd, void __user *arg)
++{
++	char id_buf[8], cmd_buf[16];
++	char uid_buf[16], pid_buf[16];
++	int ret;
++
++	char *argv[] = {vshelper_path, NULL, id_buf, 0};
++	char *envp[] = {"HOME=/", "TERM=linux",
++			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
++			uid_buf, pid_buf, cmd_buf, 0};
++
++	if (vx_info_state(vxi, VXS_HELPER))
++		return -EAGAIN;
++	vxi->vx_state |= VXS_HELPER;
++
++	snprintf(id_buf, sizeof(id_buf), "%d", vxi->vx_id);
++
++	snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
++	snprintf(uid_buf, sizeof(uid_buf), "VS_UID=%d", current_uid());
++	snprintf(pid_buf, sizeof(pid_buf), "VS_PID=%d", current->pid);
++
++	switch (cmd) {
++	case LINUX_REBOOT_CMD_RESTART:
++		argv[1] = "restart";
++		break;
++
++	case LINUX_REBOOT_CMD_HALT:
++		argv[1] = "halt";
++		break;
++
++	case LINUX_REBOOT_CMD_POWER_OFF:
++		argv[1] = "poweroff";
++		break;
++
++	case LINUX_REBOOT_CMD_SW_SUSPEND:
++		argv[1] = "swsusp";
++		break;
++
++	case LINUX_REBOOT_CMD_OOM:
++		argv[1] = "oom";
++		break;
++
++	default:
++		vxi->vx_state &= ~VXS_HELPER;
++		return 0;
++	}
++
++	ret = do_vshelper(vshelper_path, argv, envp, 0);
++	vxi->vx_state &= ~VXS_HELPER;
++	__wakeup_vx_info(vxi);
++	return (ret) ? -EPERM : 0;
++}
++
++
++long vs_reboot(unsigned int cmd, void __user *arg)
++{
++	struct vx_info *vxi = current_vx_info();
++	long ret = 0;
++
++	vxdprintk(VXD_CBIT(misc, 5),
++		"vs_reboot(%p[#%d],%u)",
++		vxi, vxi ? vxi->vx_id : 0, cmd);
++
++	ret = vs_reboot_helper(vxi, cmd, arg);
++	if (ret)
++		return ret;
++
++	vxi->reboot_cmd = cmd;
++	if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
++		switch (cmd) {
++		case LINUX_REBOOT_CMD_RESTART:
++		case LINUX_REBOOT_CMD_HALT:
++		case LINUX_REBOOT_CMD_POWER_OFF:
++			vx_info_kill(vxi, 0, SIGKILL);
++			vx_info_kill(vxi, 1, SIGKILL);
++		default:
++			break;
++		}
++	}
++	return 0;
++}
++
++long vs_oom_action(unsigned int cmd)
++{
++	struct vx_info *vxi = current_vx_info();
++	long ret = 0;
++
++	vxdprintk(VXD_CBIT(misc, 5),
++		"vs_oom_action(%p[#%d],%u)",
++		vxi, vxi ? vxi->vx_id : 0, cmd);
++
++	ret = vs_reboot_helper(vxi, cmd, NULL);
++	if (ret)
++		return ret;
++
++	vxi->reboot_cmd = cmd;
++	if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
++		vx_info_kill(vxi, 0, SIGKILL);
++		vx_info_kill(vxi, 1, SIGKILL);
++	}
++	return 0;
++}
++
++/*
++ *      argv [0] = vshelper_path;
++ *      argv [1] = action: "startup", "shutdown"
++ *      argv [2] = context identifier
++ *
++ *      envp [*] = type-specific parameters
++ */
++
++long vs_state_change(struct vx_info *vxi, unsigned int cmd)
++{
++	char id_buf[8], cmd_buf[16];
++	char *argv[] = {vshelper_path, NULL, id_buf, 0};
++	char *envp[] = {"HOME=/", "TERM=linux",
++			"PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
++
++	if (!vx_info_flags(vxi, VXF_SC_HELPER, 0))
++		return 0;
++
++	snprintf(id_buf, sizeof(id_buf), "%d", vxi->vx_id);
++	snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
++
++	switch (cmd) {
++	case VSC_STARTUP:
++		argv[1] = "startup";
++		break;
++	case VSC_SHUTDOWN:
++		argv[1] = "shutdown";
++		break;
++	default:
++		return 0;
++	}
++
++	return do_vshelper(vshelper_path, argv, envp, 1);
++}
++
++
++/*
++ *      argv [0] = vshelper_path;
++ *      argv [1] = action: "netup", "netdown"
++ *      argv [2] = context identifier
++ *
++ *      envp [*] = type-specific parameters
++ */
++
++long vs_net_change(struct nx_info *nxi, unsigned int cmd)
++{
++	char id_buf[8], cmd_buf[16];
++	char *argv[] = {vshelper_path, NULL, id_buf, 0};
++	char *envp[] = {"HOME=/", "TERM=linux",
++			"PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
++
++	if (!nx_info_flags(nxi, NXF_SC_HELPER, 0))
++		return 0;
++
++	snprintf(id_buf, sizeof(id_buf), "%d", nxi->nx_id);
++	snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
++
++	switch (cmd) {
++	case VSC_NETUP:
++		argv[1] = "netup";
++		break;
++	case VSC_NETDOWN:
++		argv[1] = "netdown";
++		break;
++	default:
++		return 0;
++	}
++
++	return do_vshelper(vshelper_path, argv, envp, 1);
++}
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/history.c linux-3.2.34-vs2.3.2.15/kernel/vserver/history.c
+--- linux-3.2.34/kernel/vserver/history.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/history.c	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,258 @@
++/*
++ *  kernel/vserver/history.c
++ *
++ *  Virtual Context History Backtrace
++ *
++ *  Copyright (C) 2004-2007  Herbert P�tzl
++ *
++ *  V0.01  basic structure
++ *  V0.02  hash/unhash and trace
++ *  V0.03  preemption fixes
++ *
++ */
++
++#include <linux/module.h>
++#include <asm/uaccess.h>
++
++#include <linux/vserver/context.h>
++#include <linux/vserver/debug.h>
++#include <linux/vserver/debug_cmd.h>
++#include <linux/vserver/history.h>
++
++
++#ifdef	CONFIG_VSERVER_HISTORY
++#define VXH_SIZE	CONFIG_VSERVER_HISTORY_SIZE
++#else
++#define VXH_SIZE	64
++#endif
++
++struct _vx_history {
++	unsigned int counter;
++
++	struct _vx_hist_entry entry[VXH_SIZE + 1];
++};
++
++
++DEFINE_PER_CPU(struct _vx_history, vx_history_buffer);
++
++unsigned volatile int vxh_active = 1;
++
++static atomic_t sequence = ATOMIC_INIT(0);
++
++
++/*	vxh_advance()
++
++	* requires disabled preemption				*/
++
++struct _vx_hist_entry *vxh_advance(void *loc)
++{
++	unsigned int cpu = smp_processor_id();
++	struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
++	struct _vx_hist_entry *entry;
++	unsigned int index;
++
++	index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE;
++	entry = &hist->entry[index];
++
++	entry->seq = atomic_inc_return(&sequence);
++	entry->loc = loc;
++	return entry;
++}
++
++EXPORT_SYMBOL_GPL(vxh_advance);
++
++
++#define VXH_LOC_FMTS	"(#%04x,*%d):%p"
++
++#define VXH_LOC_ARGS(e)	(e)->seq, cpu, (e)->loc
++
++
++#define VXH_VXI_FMTS	"%p[#%d,%d.%d]"
++
++#define VXH_VXI_ARGS(e)	(e)->vxi.ptr,				\
++			(e)->vxi.ptr ? (e)->vxi.xid : 0,	\
++			(e)->vxi.ptr ? (e)->vxi.usecnt : 0,	\
++			(e)->vxi.ptr ? (e)->vxi.tasks : 0
++
++void	vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu)
++{
++	switch (e->type) {
++	case VXH_THROW_OOPS:
++		printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e));
++		break;
++
++	case VXH_GET_VX_INFO:
++	case VXH_PUT_VX_INFO:
++		printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
++			VXH_LOC_ARGS(e),
++			(e->type == VXH_GET_VX_INFO) ? "get" : "put",
++			VXH_VXI_ARGS(e));
++		break;
++
++	case VXH_INIT_VX_INFO:
++	case VXH_SET_VX_INFO:
++	case VXH_CLR_VX_INFO:
++		printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
++			VXH_LOC_ARGS(e),
++			(e->type == VXH_INIT_VX_INFO) ? "init" :
++			((e->type == VXH_SET_VX_INFO) ? "set" : "clr"),
++			VXH_VXI_ARGS(e), e->sc.data);
++		break;
++
++	case VXH_CLAIM_VX_INFO:
++	case VXH_RELEASE_VX_INFO:
++		printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
++			VXH_LOC_ARGS(e),
++			(e->type == VXH_CLAIM_VX_INFO) ? "claim" : "release",
++			VXH_VXI_ARGS(e), e->sc.data);
++		break;
++
++	case VXH_ALLOC_VX_INFO:
++	case VXH_DEALLOC_VX_INFO:
++		printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
++			VXH_LOC_ARGS(e),
++			(e->type == VXH_ALLOC_VX_INFO) ? "alloc" : "dealloc",
++			VXH_VXI_ARGS(e));
++		break;
++
++	case VXH_HASH_VX_INFO:
++	case VXH_UNHASH_VX_INFO:
++		printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n",
++			VXH_LOC_ARGS(e),
++			(e->type == VXH_HASH_VX_INFO) ? "hash" : "unhash",
++			VXH_VXI_ARGS(e));
++		break;
++
++	case VXH_LOC_VX_INFO:
++	case VXH_LOOKUP_VX_INFO:
++	case VXH_CREATE_VX_INFO:
++		printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n",
++			VXH_LOC_ARGS(e),
++			(e->type == VXH_CREATE_VX_INFO) ? "create" :
++			((e->type == VXH_LOC_VX_INFO) ? "loc" : "lookup"),
++			e->ll.arg, VXH_VXI_ARGS(e));
++		break;
++	}
++}
++
++static void __vxh_dump_history(void)
++{
++	unsigned int i, cpu;
++
++	printk("History:\tSEQ: %8x\tNR_CPUS: %d\n",
++		atomic_read(&sequence), NR_CPUS);
++
++	for (i = 0; i < VXH_SIZE; i++) {
++		for_each_online_cpu(cpu) {
++			struct _vx_history *hist =
++				&per_cpu(vx_history_buffer, cpu);
++			unsigned int index = (hist->counter - i) % VXH_SIZE;
++			struct _vx_hist_entry *entry = &hist->entry[index];
++
++			vxh_dump_entry(entry, cpu);
++		}
++	}
++}
++
++void	vxh_dump_history(void)
++{
++	vxh_active = 0;
++#ifdef CONFIG_SMP
++	local_irq_enable();
++	smp_send_stop();
++	local_irq_disable();
++#endif
++	__vxh_dump_history();
++}
++
++
++/* vserver syscall commands below here */
++
++
++int vc_dump_history(uint32_t id)
++{
++	vxh_active = 0;
++	__vxh_dump_history();
++	vxh_active = 1;
++
++	return 0;
++}
++
++
++int do_read_history(struct __user _vx_hist_entry *data,
++	int cpu, uint32_t *index, uint32_t *count)
++{
++	int pos, ret = 0;
++	struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
++	int end = hist->counter;
++	int start = end - VXH_SIZE + 2;
++	int idx = *index;
++
++	/* special case: get current pos */
++	if (!*count) {
++		*index = end;
++		return 0;
++	}
++
++	/* have we lost some data? */
++	if (idx < start)
++		idx = start;
++
++	for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) {
++		struct _vx_hist_entry *entry =
++			&hist->entry[idx % VXH_SIZE];
++
++		/* send entry to userspace */
++		ret = copy_to_user(&data[pos], entry, sizeof(*entry));
++		if (ret)
++			break;
++	}
++	/* save new index and count */
++	*index = idx;
++	*count = pos;
++	return ret ? ret : (*index < end);
++}
++
++int vc_read_history(uint32_t id, void __user *data)
++{
++	struct vcmd_read_history_v0 vc_data;
++	int ret;
++
++	if (id >= NR_CPUS)
++		return -EINVAL;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = do_read_history((struct __user _vx_hist_entry *)vc_data.data,
++		id, &vc_data.index, &vc_data.count);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return ret;
++}
++
++#ifdef	CONFIG_COMPAT
++
++int vc_read_history_x32(uint32_t id, void __user *data)
++{
++	struct vcmd_read_history_v0_x32 vc_data;
++	int ret;
++
++	if (id >= NR_CPUS)
++		return -EINVAL;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = do_read_history((struct __user _vx_hist_entry *)
++		compat_ptr(vc_data.data_ptr),
++		id, &vc_data.index, &vc_data.count);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return ret;
++}
++
++#endif	/* CONFIG_COMPAT */
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/inet.c linux-3.2.34-vs2.3.2.15/kernel/vserver/inet.c
+--- linux-3.2.34/kernel/vserver/inet.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/inet.c	2011-12-15 01:33:09.000000000 +0100
+@@ -0,0 +1,226 @@
++
++#include <linux/in.h>
++#include <linux/inetdevice.h>
++#include <linux/export.h>
++#include <linux/vs_inet.h>
++#include <linux/vs_inet6.h>
++#include <linux/vserver/debug.h>
++#include <net/route.h>
++#include <net/addrconf.h>
++
++
++int nx_v4_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
++{
++	int ret = 0;
++
++	if (!nxi1 || !nxi2 || nxi1 == nxi2)
++		ret = 1;
++	else {
++		struct nx_addr_v4 *ptr;
++
++		for (ptr = &nxi1->v4; ptr; ptr = ptr->next) {
++			if (v4_nx_addr_in_nx_info(nxi2, ptr, -1)) {
++				ret = 1;
++				break;
++			}
++		}
++	}
++
++	vxdprintk(VXD_CBIT(net, 2),
++		"nx_v4_addr_conflict(%p,%p): %d",
++		nxi1, nxi2, ret);
++
++	return ret;
++}
++
++
++#ifdef	CONFIG_IPV6
++
++int nx_v6_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
++{
++	int ret = 0;
++
++	if (!nxi1 || !nxi2 || nxi1 == nxi2)
++		ret = 1;
++	else {
++		struct nx_addr_v6 *ptr;
++
++		for (ptr = &nxi1->v6; ptr; ptr = ptr->next) {
++			if (v6_nx_addr_in_nx_info(nxi2, ptr, -1)) {
++				ret = 1;
++				break;
++			}
++		}
++	}
++
++	vxdprintk(VXD_CBIT(net, 2),
++		"nx_v6_addr_conflict(%p,%p): %d",
++		nxi1, nxi2, ret);
++
++	return ret;
++}
++
++#endif
++
++int v4_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
++{
++	struct in_device *in_dev;
++	struct in_ifaddr **ifap;
++	struct in_ifaddr *ifa;
++	int ret = 0;
++
++	if (!dev)
++		goto out;
++	in_dev = in_dev_get(dev);
++	if (!in_dev)
++		goto out;
++
++	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
++		ifap = &ifa->ifa_next) {
++		if (v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW)) {
++			ret = 1;
++			break;
++		}
++	}
++	in_dev_put(in_dev);
++out:
++	return ret;
++}
++
++
++#ifdef	CONFIG_IPV6
++
++int v6_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
++{
++	struct inet6_dev *in_dev;
++	struct inet6_ifaddr *ifa;
++	int ret = 0;
++
++	if (!dev)
++		goto out;
++	in_dev = in6_dev_get(dev);
++	if (!in_dev)
++		goto out;
++
++	// for (ifap = &in_dev->addr_list; (ifa = *ifap) != NULL;
++	list_for_each_entry(ifa, &in_dev->addr_list, if_list) {
++		if (v6_addr_in_nx_info(nxi, &ifa->addr, -1)) {
++			ret = 1;
++			break;
++		}
++	}
++	in6_dev_put(in_dev);
++out:
++	return ret;
++}
++
++#endif
++
++int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
++{
++	int ret = 1;
++
++	if (!nxi)
++		goto out;
++	if (nxi->v4.type && v4_dev_in_nx_info(dev, nxi))
++		goto out;
++#ifdef	CONFIG_IPV6
++	ret = 2;
++	if (nxi->v6.type && v6_dev_in_nx_info(dev, nxi))
++		goto out;
++#endif
++	ret = 0;
++out:
++	vxdprintk(VXD_CBIT(net, 3),
++		"dev_in_nx_info(%p,%p[#%d]) = %d",
++		dev, nxi, nxi ? nxi->nx_id : 0, ret);
++	return ret;
++}
++
++struct rtable *ip_v4_find_src(struct net *net, struct nx_info *nxi,
++	struct flowi4 *fl4)
++{
++	struct rtable *rt;
++
++	if (!nxi)
++		return NULL;
++
++	/* FIXME: handle lback only case */
++	if (!NX_IPV4(nxi))
++		return ERR_PTR(-EPERM);
++
++	vxdprintk(VXD_CBIT(net, 4),
++		"ip_v4_find_src(%p[#%u]) " NIPQUAD_FMT " -> " NIPQUAD_FMT,
++		nxi, nxi ? nxi->nx_id : 0,
++		NIPQUAD(fl4->saddr), NIPQUAD(fl4->daddr));
++
++	/* single IP is unconditional */
++	if (nx_info_flags(nxi, NXF_SINGLE_IP, 0) &&
++		(fl4->saddr == INADDR_ANY))
++		fl4->saddr = nxi->v4.ip[0].s_addr;
++
++	if (fl4->saddr == INADDR_ANY) {
++		struct nx_addr_v4 *ptr;
++		__be32 found = 0;
++
++		rt = __ip_route_output_key(net, fl4);
++		if (!IS_ERR(rt)) {
++			found = fl4->saddr;
++			ip_rt_put(rt);
++			vxdprintk(VXD_CBIT(net, 4),
++				"ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
++				nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(found));
++			if (v4_addr_in_nx_info(nxi, found, NXA_MASK_BIND))
++				goto found;
++		}
++
++		for (ptr = &nxi->v4; ptr; ptr = ptr->next) {
++			__be32 primary = ptr->ip[0].s_addr;
++			__be32 mask = ptr->mask.s_addr;
++			__be32 neta = primary & mask;
++
++			vxdprintk(VXD_CBIT(net, 4), "ip_v4_find_src(%p[#%u]) chk: "
++				NIPQUAD_FMT "/" NIPQUAD_FMT "/" NIPQUAD_FMT,
++				nxi, nxi ? nxi->nx_id : 0, NIPQUAD(primary),
++				NIPQUAD(mask), NIPQUAD(neta));
++			if ((found & mask) != neta)
++				continue;
++
++			fl4->saddr = primary;
++			rt = __ip_route_output_key(net, fl4);
++			vxdprintk(VXD_CBIT(net, 4),
++				"ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
++				nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(primary));
++			if (!IS_ERR(rt)) {
++				found = fl4->saddr;
++				ip_rt_put(rt);
++				if (found == primary)
++					goto found;
++			}
++		}
++		/* still no source ip? */
++		found = ipv4_is_loopback(fl4->daddr)
++			? IPI_LOOPBACK : nxi->v4.ip[0].s_addr;
++	found:
++		/* assign src ip to flow */
++		fl4->saddr = found;
++
++	} else {
++		if (!v4_addr_in_nx_info(nxi, fl4->saddr, NXA_MASK_BIND))
++			return ERR_PTR(-EPERM);
++	}
++
++	if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0)) {
++		if (ipv4_is_loopback(fl4->daddr))
++			fl4->daddr = nxi->v4_lback.s_addr;
++		if (ipv4_is_loopback(fl4->saddr))
++			fl4->saddr = nxi->v4_lback.s_addr;
++	} else if (ipv4_is_loopback(fl4->daddr) &&
++		!nx_info_flags(nxi, NXF_LBACK_ALLOW, 0))
++		return ERR_PTR(-EPERM);
++
++	return NULL;
++}
++
++EXPORT_SYMBOL_GPL(ip_v4_find_src);
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/init.c linux-3.2.34-vs2.3.2.15/kernel/vserver/init.c
+--- linux-3.2.34/kernel/vserver/init.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/init.c	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,45 @@
++/*
++ *  linux/kernel/init.c
++ *
++ *  Virtual Server Init
++ *
++ *  Copyright (C) 2004-2007  Herbert P�tzl
++ *
++ *  V0.01  basic structure
++ *
++ */
++
++#include <linux/init.h>
++
++int	vserver_register_sysctl(void);
++void	vserver_unregister_sysctl(void);
++
++
++static int __init init_vserver(void)
++{
++	int ret = 0;
++
++#ifdef	CONFIG_VSERVER_DEBUG
++	vserver_register_sysctl();
++#endif
++	return ret;
++}
++
++
++static void __exit exit_vserver(void)
++{
++
++#ifdef	CONFIG_VSERVER_DEBUG
++	vserver_unregister_sysctl();
++#endif
++	return;
++}
++
++/* FIXME: GFP_ZONETYPES gone
++long vx_slab[GFP_ZONETYPES]; */
++long vx_area;
++
++
++module_init(init_vserver);
++module_exit(exit_vserver);
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/inode.c linux-3.2.34-vs2.3.2.15/kernel/vserver/inode.c
+--- linux-3.2.34/kernel/vserver/inode.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/inode.c	2011-12-06 23:56:11.000000000 +0100
+@@ -0,0 +1,437 @@
++/*
++ *  linux/kernel/vserver/inode.c
++ *
++ *  Virtual Server: File System Support
++ *
++ *  Copyright (C) 2004-2007  Herbert P�tzl
++ *
++ *  V0.01  separated from vcontext V0.05
++ *  V0.02  moved to tag (instead of xid)
++ *
++ */
++
++#include <linux/tty.h>
++#include <linux/proc_fs.h>
++#include <linux/devpts_fs.h>
++#include <linux/fs.h>
++#include <linux/file.h>
++#include <linux/mount.h>
++#include <linux/parser.h>
++#include <linux/namei.h>
++#include <linux/vserver/inode.h>
++#include <linux/vserver/inode_cmd.h>
++#include <linux/vs_base.h>
++#include <linux/vs_tag.h>
++
++#include <asm/uaccess.h>
++
++
++static int __vc_get_iattr(struct inode *in, uint32_t *tag, uint32_t *flags, uint32_t *mask)
++{
++	struct proc_dir_entry *entry;
++
++	if (!in || !in->i_sb)
++		return -ESRCH;
++
++	*flags = IATTR_TAG
++		| (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0)
++		| (IS_IXUNLINK(in) ? IATTR_IXUNLINK : 0)
++		| (IS_BARRIER(in) ? IATTR_BARRIER : 0)
++		| (IS_COW(in) ? IATTR_COW : 0);
++	*mask = IATTR_IXUNLINK | IATTR_IMMUTABLE | IATTR_COW;
++
++	if (S_ISDIR(in->i_mode))
++		*mask |= IATTR_BARRIER;
++
++	if (IS_TAGGED(in)) {
++		*tag = in->i_tag;
++		*mask |= IATTR_TAG;
++	}
++
++	switch (in->i_sb->s_magic) {
++	case PROC_SUPER_MAGIC:
++		entry = PROC_I(in)->pde;
++
++		/* check for specific inodes? */
++		if (entry)
++			*mask |= IATTR_FLAGS;
++		if (entry)
++			*flags |= (entry->vx_flags & IATTR_FLAGS);
++		else
++			*flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
++		break;
++
++	case DEVPTS_SUPER_MAGIC:
++		*tag = in->i_tag;
++		*mask |= IATTR_TAG;
++		break;
++
++	default:
++		break;
++	}
++	return 0;
++}
++
++int vc_get_iattr(void __user *data)
++{
++	struct path path;
++	struct vcmd_ctx_iattr_v1 vc_data = { .tag = -1 };
++	int ret;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = user_lpath(vc_data.name, &path);
++	if (!ret) {
++		ret = __vc_get_iattr(path.dentry->d_inode,
++			&vc_data.tag, &vc_data.flags, &vc_data.mask);
++		path_put(&path);
++	}
++	if (ret)
++		return ret;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		ret = -EFAULT;
++	return ret;
++}
++
++#ifdef	CONFIG_COMPAT
++
++int vc_get_iattr_x32(void __user *data)
++{
++	struct path path;
++	struct vcmd_ctx_iattr_v1_x32 vc_data = { .tag = -1 };
++	int ret;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
++	if (!ret) {
++		ret = __vc_get_iattr(path.dentry->d_inode,
++			&vc_data.tag, &vc_data.flags, &vc_data.mask);
++		path_put(&path);
++	}
++	if (ret)
++		return ret;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		ret = -EFAULT;
++	return ret;
++}
++
++#endif	/* CONFIG_COMPAT */
++
++
++int vc_fget_iattr(uint32_t fd, void __user *data)
++{
++	struct file *filp;
++	struct vcmd_ctx_fiattr_v0 vc_data = { .tag = -1 };
++	int ret;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	filp = fget(fd);
++	if (!filp || !filp->f_dentry || !filp->f_dentry->d_inode)
++		return -EBADF;
++
++	ret = __vc_get_iattr(filp->f_dentry->d_inode,
++		&vc_data.tag, &vc_data.flags, &vc_data.mask);
++
++	fput(filp);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		ret = -EFAULT;
++	return ret;
++}
++
++
++static int __vc_set_iattr(struct dentry *de, uint32_t *tag, uint32_t *flags, uint32_t *mask)
++{
++	struct inode *in = de->d_inode;
++	int error = 0, is_proc = 0, has_tag = 0;
++	struct iattr attr = { 0 };
++
++	if (!in || !in->i_sb)
++		return -ESRCH;
++
++	is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
++	if ((*mask & IATTR_FLAGS) && !is_proc)
++		return -EINVAL;
++
++	has_tag = IS_TAGGED(in) ||
++		(in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
++	if ((*mask & IATTR_TAG) && !has_tag)
++		return -EINVAL;
++
++	mutex_lock(&in->i_mutex);
++	if (*mask & IATTR_TAG) {
++		attr.ia_tag = *tag;
++		attr.ia_valid |= ATTR_TAG;
++	}
++
++	if (*mask & IATTR_FLAGS) {
++		struct proc_dir_entry *entry = PROC_I(in)->pde;
++		unsigned int iflags = PROC_I(in)->vx_flags;
++
++		iflags = (iflags & ~(*mask & IATTR_FLAGS))
++			| (*flags & IATTR_FLAGS);
++		PROC_I(in)->vx_flags = iflags;
++		if (entry)
++			entry->vx_flags = iflags;
++	}
++
++	if (*mask & (IATTR_IMMUTABLE | IATTR_IXUNLINK |
++		IATTR_BARRIER | IATTR_COW)) {
++		int iflags = in->i_flags;
++		int vflags = in->i_vflags;
++
++		if (*mask & IATTR_IMMUTABLE) {
++			if (*flags & IATTR_IMMUTABLE)
++				iflags |= S_IMMUTABLE;
++			else
++				iflags &= ~S_IMMUTABLE;
++		}
++		if (*mask & IATTR_IXUNLINK) {
++			if (*flags & IATTR_IXUNLINK)
++				iflags |= S_IXUNLINK;
++			else
++				iflags &= ~S_IXUNLINK;
++		}
++		if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) {
++			if (*flags & IATTR_BARRIER)
++				vflags |= V_BARRIER;
++			else
++				vflags &= ~V_BARRIER;
++		}
++		if (S_ISREG(in->i_mode) && (*mask & IATTR_COW)) {
++			if (*flags & IATTR_COW)
++				vflags |= V_COW;
++			else
++				vflags &= ~V_COW;
++		}
++		if (in->i_op && in->i_op->sync_flags) {
++			error = in->i_op->sync_flags(in, iflags, vflags);
++			if (error)
++				goto out;
++		}
++	}
++
++	if (attr.ia_valid) {
++		if (in->i_op && in->i_op->setattr)
++			error = in->i_op->setattr(de, &attr);
++		else {
++			error = inode_change_ok(in, &attr);
++			if (!error) {
++				setattr_copy(in, &attr);
++				mark_inode_dirty(in);
++			}
++		}
++	}
++
++out:
++	mutex_unlock(&in->i_mutex);
++	return error;
++}
++
++int vc_set_iattr(void __user *data)
++{
++	struct path path;
++	struct vcmd_ctx_iattr_v1 vc_data;
++	int ret;
++
++	if (!capable(CAP_LINUX_IMMUTABLE))
++		return -EPERM;
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = user_lpath(vc_data.name, &path);
++	if (!ret) {
++		ret = __vc_set_iattr(path.dentry,
++			&vc_data.tag, &vc_data.flags, &vc_data.mask);
++		path_put(&path);
++	}
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		ret = -EFAULT;
++	return ret;
++}
++
++#ifdef	CONFIG_COMPAT
++
++int vc_set_iattr_x32(void __user *data)
++{
++	struct path path;
++	struct vcmd_ctx_iattr_v1_x32 vc_data;
++	int ret;
++
++	if (!capable(CAP_LINUX_IMMUTABLE))
++		return -EPERM;
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
++	if (!ret) {
++		ret = __vc_set_iattr(path.dentry,
++			&vc_data.tag, &vc_data.flags, &vc_data.mask);
++		path_put(&path);
++	}
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		ret = -EFAULT;
++	return ret;
++}
++
++#endif	/* CONFIG_COMPAT */
++
++int vc_fset_iattr(uint32_t fd, void __user *data)
++{
++	struct file *filp;
++	struct vcmd_ctx_fiattr_v0 vc_data;
++	int ret;
++
++	if (!capable(CAP_LINUX_IMMUTABLE))
++		return -EPERM;
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	filp = fget(fd);
++	if (!filp || !filp->f_dentry || !filp->f_dentry->d_inode)
++		return -EBADF;
++
++	ret = __vc_set_iattr(filp->f_dentry, &vc_data.tag,
++		&vc_data.flags, &vc_data.mask);
++
++	fput(filp);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return ret;
++}
++
++
++enum { Opt_notagcheck, Opt_tag, Opt_notag, Opt_tagid, Opt_err };
++
++static match_table_t tokens = {
++	{Opt_notagcheck, "notagcheck"},
++#ifdef	CONFIG_PROPAGATE
++	{Opt_notag, "notag"},
++	{Opt_tag, "tag"},
++	{Opt_tagid, "tagid=%u"},
++#endif
++	{Opt_err, NULL}
++};
++
++
++static void __dx_parse_remove(char *string, char *opt)
++{
++	char *p = strstr(string, opt);
++	char *q = p;
++
++	if (p) {
++		while (*q != '\0' && *q != ',')
++			q++;
++		while (*q)
++			*p++ = *q++;
++		while (*p)
++			*p++ = '\0';
++	}
++}
++
++int dx_parse_tag(char *string, tag_t *tag, int remove, int *mnt_flags,
++		 unsigned long *flags)
++{
++	int set = 0;
++	substring_t args[MAX_OPT_ARGS];
++	int token;
++	char *s, *p, *opts;
++#if defined(CONFIG_PROPAGATE) || defined(CONFIG_VSERVER_DEBUG)
++	int option = 0;
++#endif
++
++	if (!string)
++		return 0;
++	s = kstrdup(string, GFP_KERNEL | GFP_ATOMIC);
++	if (!s)
++		return 0;
++
++	opts = s;
++	while ((p = strsep(&opts, ",")) != NULL) {
++		token = match_token(p, tokens, args);
++
++		switch (token) {
++#ifdef CONFIG_PROPAGATE
++		case Opt_tag:
++			if (tag)
++				*tag = 0;
++			if (remove)
++				__dx_parse_remove(s, "tag");
++			*mnt_flags |= MNT_TAGID;
++			set |= MNT_TAGID;
++			break;
++		case Opt_notag:
++			if (remove)
++				__dx_parse_remove(s, "notag");
++			*mnt_flags |= MNT_NOTAG;
++			set |= MNT_NOTAG;
++			break;
++		case Opt_tagid:
++			if (tag && !match_int(args, &option))
++				*tag = option;
++			if (remove)
++				__dx_parse_remove(s, "tagid");
++			*mnt_flags |= MNT_TAGID;
++			set |= MNT_TAGID;
++			break;
++#endif	/* CONFIG_PROPAGATE */
++		case Opt_notagcheck:
++			if (remove)
++				__dx_parse_remove(s, "notagcheck");
++			*flags |= MS_NOTAGCHECK;
++			set |= MS_NOTAGCHECK;
++			break;
++		}
++		vxdprintk(VXD_CBIT(tag, 7),
++			"dx_parse_tag(" VS_Q("%s") "): %d:#%d",
++			p, token, option);
++	}
++	if (set)
++		strcpy(string, s);
++	kfree(s);
++	return set;
++}
++
++#ifdef	CONFIG_PROPAGATE
++
++void __dx_propagate_tag(struct nameidata *nd, struct inode *inode)
++{
++	tag_t new_tag = 0;
++	struct vfsmount *mnt;
++	int propagate;
++
++	if (!nd)
++		return;
++	mnt = nd->path.mnt;
++	if (!mnt)
++		return;
++
++	propagate = (mnt->mnt_flags & MNT_TAGID);
++	if (propagate)
++		new_tag = mnt->mnt_tag;
++
++	vxdprintk(VXD_CBIT(tag, 7),
++		"dx_propagate_tag(%p[#%lu.%d]): %d,%d",
++		inode, inode->i_ino, inode->i_tag,
++		new_tag, (propagate) ? 1 : 0);
++
++	if (propagate)
++		inode->i_tag = new_tag;
++}
++
++#include <linux/module.h>
++
++EXPORT_SYMBOL_GPL(__dx_propagate_tag);
++
++#endif	/* CONFIG_PROPAGATE */
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/limit.c linux-3.2.34-vs2.3.2.15/kernel/vserver/limit.c
+--- linux-3.2.34/kernel/vserver/limit.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/limit.c	2012-08-13 15:49:40.000000000 +0200
+@@ -0,0 +1,345 @@
++/*
++ *  linux/kernel/vserver/limit.c
++ *
++ *  Virtual Server: Context Limits
++ *
++ *  Copyright (C) 2004-2010  Herbert P�tzl
++ *
++ *  V0.01  broken out from vcontext V0.05
++ *  V0.02  changed vcmds to vxi arg
++ *  V0.03  added memory cgroup support
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/module.h>
++#include <linux/memcontrol.h>
++#include <linux/res_counter.h>
++#include <linux/vs_limit.h>
++#include <linux/vserver/limit.h>
++#include <linux/vserver/limit_cmd.h>
++
++#include <asm/uaccess.h>
++
++
++const char *vlimit_name[NUM_LIMITS] = {
++	[RLIMIT_CPU]		= "CPU",
++	[RLIMIT_NPROC]		= "NPROC",
++	[RLIMIT_NOFILE]		= "NOFILE",
++	[RLIMIT_LOCKS]		= "LOCKS",
++	[RLIMIT_SIGPENDING]	= "SIGP",
++	[RLIMIT_MSGQUEUE]	= "MSGQ",
++
++	[VLIMIT_NSOCK]		= "NSOCK",
++	[VLIMIT_OPENFD]		= "OPENFD",
++	[VLIMIT_SHMEM]		= "SHMEM",
++	[VLIMIT_DENTRY]		= "DENTRY",
++};
++
++EXPORT_SYMBOL_GPL(vlimit_name);
++
++#define MASK_ENTRY(x)	(1 << (x))
++
++const struct vcmd_ctx_rlimit_mask_v0 vlimit_mask = {
++		/* minimum */
++	0
++	,	/* softlimit */
++	0
++	,       /* maximum */
++	MASK_ENTRY( RLIMIT_NPROC	) |
++	MASK_ENTRY( RLIMIT_NOFILE	) |
++	MASK_ENTRY( RLIMIT_LOCKS	) |
++	MASK_ENTRY( RLIMIT_MSGQUEUE	) |
++
++	MASK_ENTRY( VLIMIT_NSOCK	) |
++	MASK_ENTRY( VLIMIT_OPENFD	) |
++	MASK_ENTRY( VLIMIT_SHMEM	) |
++	MASK_ENTRY( VLIMIT_DENTRY	) |
++	0
++};
++		/* accounting only */
++uint32_t account_mask =
++	MASK_ENTRY( VLIMIT_SEMARY	) |
++	MASK_ENTRY( VLIMIT_NSEMS	) |
++	MASK_ENTRY( VLIMIT_MAPPED	) |
++	0;
++
++
++static int is_valid_vlimit(int id)
++{
++	uint32_t mask = vlimit_mask.minimum |
++		vlimit_mask.softlimit | vlimit_mask.maximum;
++	return mask & (1 << id);
++}
++
++static int is_accounted_vlimit(int id)
++{
++	if (is_valid_vlimit(id))
++		return 1;
++	return account_mask & (1 << id);
++}
++
++
++static inline uint64_t vc_get_soft(struct vx_info *vxi, int id)
++{
++	rlim_t limit = __rlim_soft(&vxi->limit, id);
++	return VX_VLIM(limit);
++}
++
++static inline uint64_t vc_get_hard(struct vx_info *vxi, int id)
++{
++	rlim_t limit = __rlim_hard(&vxi->limit, id);
++	return VX_VLIM(limit);
++}
++
++static int do_get_rlimit(struct vx_info *vxi, uint32_t id,
++	uint64_t *minimum, uint64_t *softlimit, uint64_t *maximum)
++{
++	if (!is_valid_vlimit(id))
++		return -EINVAL;
++
++	if (minimum)
++		*minimum = CRLIM_UNSET;
++	if (softlimit)
++		*softlimit = vc_get_soft(vxi, id);
++	if (maximum)
++		*maximum = vc_get_hard(vxi, id);
++	return 0;
++}
++
++int vc_get_rlimit(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_rlimit_v0 vc_data;
++	int ret;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = do_get_rlimit(vxi, vc_data.id,
++		&vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
++	if (ret)
++		return ret;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++static int do_set_rlimit(struct vx_info *vxi, uint32_t id,
++	uint64_t minimum, uint64_t softlimit, uint64_t maximum)
++{
++	if (!is_valid_vlimit(id))
++		return -EINVAL;
++
++	if (maximum != CRLIM_KEEP)
++		__rlim_hard(&vxi->limit, id) = VX_RLIM(maximum);
++	if (softlimit != CRLIM_KEEP)
++		__rlim_soft(&vxi->limit, id) = VX_RLIM(softlimit);
++
++	/* clamp soft limit */
++	if (__rlim_soft(&vxi->limit, id) > __rlim_hard(&vxi->limit, id))
++		__rlim_soft(&vxi->limit, id) = __rlim_hard(&vxi->limit, id);
++
++	return 0;
++}
++
++int vc_set_rlimit(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_rlimit_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_rlimit(vxi, vc_data.id,
++		vc_data.minimum, vc_data.softlimit, vc_data.maximum);
++}
++
++#ifdef	CONFIG_IA32_EMULATION
++
++int vc_set_rlimit_x32(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_rlimit_v0_x32 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_rlimit(vxi, vc_data.id,
++		vc_data.minimum, vc_data.softlimit, vc_data.maximum);
++}
++
++int vc_get_rlimit_x32(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_rlimit_v0_x32 vc_data;
++	int ret;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = do_get_rlimit(vxi, vc_data.id,
++		&vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
++	if (ret)
++		return ret;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++#endif	/* CONFIG_IA32_EMULATION */
++
++
++int vc_get_rlimit_mask(uint32_t id, void __user *data)
++{
++	if (copy_to_user(data, &vlimit_mask, sizeof(vlimit_mask)))
++		return -EFAULT;
++	return 0;
++}
++
++
++static inline void vx_reset_hits(struct _vx_limit *limit)
++{
++	int lim;
++
++	for (lim = 0; lim < NUM_LIMITS; lim++) {
++		atomic_set(&__rlim_lhit(limit, lim), 0);
++	}
++}
++
++int vc_reset_hits(struct vx_info *vxi, void __user *data)
++{
++	vx_reset_hits(&vxi->limit);
++	return 0;
++}
++
++static inline void vx_reset_minmax(struct _vx_limit *limit)
++{
++	rlim_t value;
++	int lim;
++
++	for (lim = 0; lim < NUM_LIMITS; lim++) {
++		value = __rlim_get(limit, lim);
++		__rlim_rmax(limit, lim) = value;
++		__rlim_rmin(limit, lim) = value;
++	}
++}
++
++int vc_reset_minmax(struct vx_info *vxi, void __user *data)
++{
++	vx_reset_minmax(&vxi->limit);
++	return 0;
++}
++
++
++int vc_rlimit_stat(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_rlimit_stat_v0 vc_data;
++	struct _vx_limit *limit = &vxi->limit;
++	int id;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	id = vc_data.id;
++	if (!is_accounted_vlimit(id))
++		return -EINVAL;
++
++	vx_limit_fixup(limit, id);
++	vc_data.hits = atomic_read(&__rlim_lhit(limit, id));
++	vc_data.value = __rlim_get(limit, id);
++	vc_data.minimum = __rlim_rmin(limit, id);
++	vc_data.maximum = __rlim_rmax(limit, id);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++
++void vx_vsi_meminfo(struct sysinfo *val)
++{
++#ifdef	CONFIG_CGROUP_MEM_RES_CTLR
++	struct mem_cgroup *mcg;
++	u64 res_limit, res_usage;
++
++	rcu_read_lock();
++	mcg = mem_cgroup_from_task(current);
++	if (!mcg)
++		goto out;
++
++	res_limit = mem_cgroup_res_read_u64(mcg, RES_LIMIT);
++	res_usage = mem_cgroup_res_read_u64(mcg, RES_USAGE);
++
++	if (res_limit != RESOURCE_MAX)
++		val->totalram = (res_limit >> PAGE_SHIFT);
++	val->freeram = val->totalram - (res_usage >> PAGE_SHIFT);
++	val->bufferram = 0;
++	val->totalhigh = 0;
++	val->freehigh = 0;
++out:
++	rcu_read_unlock();
++#endif	/* CONFIG_CGROUP_MEM_RES_CTLR */
++	return;
++}
++
++void vx_vsi_swapinfo(struct sysinfo *val)
++{
++#ifdef	CONFIG_CGROUP_MEM_RES_CTLR
++#ifdef	CONFIG_CGROUP_MEM_RES_CTLR_SWAP
++	struct mem_cgroup *mcg;
++	u64 res_limit, res_usage, memsw_limit, memsw_usage;
++	s64 swap_limit, swap_usage;
++
++	rcu_read_lock();
++	mcg = mem_cgroup_from_task(current);
++	if (!mcg)
++		goto out;
++
++	res_limit = mem_cgroup_res_read_u64(mcg, RES_LIMIT);
++	res_usage = mem_cgroup_res_read_u64(mcg, RES_USAGE);
++	memsw_limit = mem_cgroup_memsw_read_u64(mcg, RES_LIMIT);
++	memsw_usage = mem_cgroup_memsw_read_u64(mcg, RES_USAGE);
++
++	/* memory unlimited */
++	if (res_limit == RESOURCE_MAX)
++		goto out;
++
++	swap_limit = memsw_limit - res_limit;
++	/* we have a swap limit? */
++	if (memsw_limit != RESOURCE_MAX)
++		val->totalswap = swap_limit >> PAGE_SHIFT;
++
++	/* calculate swap part */
++	swap_usage = (memsw_usage > res_usage) ?
++		memsw_usage - res_usage : 0;
++
++	/* total shown minus usage gives free swap */
++	val->freeswap = (swap_usage < swap_limit) ?
++		val->totalswap - (swap_usage >> PAGE_SHIFT) : 0;
++out:
++	rcu_read_unlock();
++#else	/* !CONFIG_CGROUP_MEM_RES_CTLR_SWAP */
++	val->totalswap = 0;
++	val->freeswap = 0;
++#endif	/* !CONFIG_CGROUP_MEM_RES_CTLR_SWAP */
++#endif	/* CONFIG_CGROUP_MEM_RES_CTLR */
++	return;
++}
++
++long vx_vsi_cached(struct sysinfo *val)
++{
++	long cache = 0;
++#ifdef	CONFIG_CGROUP_MEM_RES_CTLR
++	struct mem_cgroup *mcg;
++
++	rcu_read_lock();
++	mcg = mem_cgroup_from_task(current);
++	if (!mcg)
++		goto out;
++
++	cache = mem_cgroup_stat_read_cache(mcg);
++out:
++	rcu_read_unlock();
++#endif
++	return cache;
++}
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/limit_init.h linux-3.2.34-vs2.3.2.15/kernel/vserver/limit_init.h
+--- linux-3.2.34/kernel/vserver/limit_init.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/limit_init.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,31 @@
++
++
++static inline void vx_info_init_limit(struct _vx_limit *limit)
++{
++	int lim;
++
++	for (lim = 0; lim < NUM_LIMITS; lim++) {
++		__rlim_soft(limit, lim) = RLIM_INFINITY;
++		__rlim_hard(limit, lim) = RLIM_INFINITY;
++		__rlim_set(limit, lim, 0);
++		atomic_set(&__rlim_lhit(limit, lim), 0);
++		__rlim_rmin(limit, lim) = 0;
++		__rlim_rmax(limit, lim) = 0;
++	}
++}
++
++static inline void vx_info_exit_limit(struct _vx_limit *limit)
++{
++	rlim_t value;
++	int lim;
++
++	for (lim = 0; lim < NUM_LIMITS; lim++) {
++		if ((1 << lim) & VLIM_NOCHECK)
++			continue;
++		value = __rlim_get(limit, lim);
++		vxwprintk_xid(value,
++			"!!! limit: %p[%s,%d] = %ld on exit.",
++			limit, vlimit_name[lim], lim, (long)value);
++	}
++}
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/limit_proc.h linux-3.2.34-vs2.3.2.15/kernel/vserver/limit_proc.h
+--- linux-3.2.34/kernel/vserver/limit_proc.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/limit_proc.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,57 @@
++#ifndef _VX_LIMIT_PROC_H
++#define _VX_LIMIT_PROC_H
++
++#include <linux/vserver/limit_int.h>
++
++
++#define VX_LIMIT_FMT	":\t%8ld\t%8ld/%8ld\t%8lld/%8lld\t%6d\n"
++#define VX_LIMIT_TOP	\
++	"Limit\t current\t     min/max\t\t    soft/hard\t\thits\n"
++
++#define VX_LIMIT_ARG(r)				\
++	(unsigned long)__rlim_get(limit, r),	\
++	(unsigned long)__rlim_rmin(limit, r),	\
++	(unsigned long)__rlim_rmax(limit, r),	\
++	VX_VLIM(__rlim_soft(limit, r)),		\
++	VX_VLIM(__rlim_hard(limit, r)),		\
++	atomic_read(&__rlim_lhit(limit, r))
++
++static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
++{
++	vx_limit_fixup(limit, -1);
++	return sprintf(buffer, VX_LIMIT_TOP
++		"PROC"	VX_LIMIT_FMT
++		"VM"	VX_LIMIT_FMT
++		"VML"	VX_LIMIT_FMT
++		"RSS"	VX_LIMIT_FMT
++		"ANON"	VX_LIMIT_FMT
++		"RMAP"	VX_LIMIT_FMT
++		"FILES" VX_LIMIT_FMT
++		"OFD"	VX_LIMIT_FMT
++		"LOCKS" VX_LIMIT_FMT
++		"SOCK"	VX_LIMIT_FMT
++		"MSGQ"	VX_LIMIT_FMT
++		"SHM"	VX_LIMIT_FMT
++		"SEMA"	VX_LIMIT_FMT
++		"SEMS"	VX_LIMIT_FMT
++		"DENT"	VX_LIMIT_FMT,
++		VX_LIMIT_ARG(RLIMIT_NPROC),
++		VX_LIMIT_ARG(RLIMIT_AS),
++		VX_LIMIT_ARG(RLIMIT_MEMLOCK),
++		VX_LIMIT_ARG(RLIMIT_RSS),
++		VX_LIMIT_ARG(VLIMIT_ANON),
++		VX_LIMIT_ARG(VLIMIT_MAPPED),
++		VX_LIMIT_ARG(RLIMIT_NOFILE),
++		VX_LIMIT_ARG(VLIMIT_OPENFD),
++		VX_LIMIT_ARG(RLIMIT_LOCKS),
++		VX_LIMIT_ARG(VLIMIT_NSOCK),
++		VX_LIMIT_ARG(RLIMIT_MSGQUEUE),
++		VX_LIMIT_ARG(VLIMIT_SHMEM),
++		VX_LIMIT_ARG(VLIMIT_SEMARY),
++		VX_LIMIT_ARG(VLIMIT_NSEMS),
++		VX_LIMIT_ARG(VLIMIT_DENTRY));
++}
++
++#endif	/* _VX_LIMIT_PROC_H */
++
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/network.c linux-3.2.34-vs2.3.2.15/kernel/vserver/network.c
+--- linux-3.2.34/kernel/vserver/network.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/network.c	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,912 @@
++/*
++ *  linux/kernel/vserver/network.c
++ *
++ *  Virtual Server: Network Support
++ *
++ *  Copyright (C) 2003-2007  Herbert P�tzl
++ *
++ *  V0.01  broken out from vcontext V0.05
++ *  V0.02  cleaned up implementation
++ *  V0.03  added equiv nx commands
++ *  V0.04  switch to RCU based hash
++ *  V0.05  and back to locking again
++ *  V0.06  changed vcmds to nxi arg
++ *  V0.07  have __create claim() the nxi
++ *
++ */
++
++#include <linux/err.h>
++#include <linux/slab.h>
++#include <linux/rcupdate.h>
++
++#include <linux/vs_network.h>
++#include <linux/vs_pid.h>
++#include <linux/vserver/network_cmd.h>
++
++
++atomic_t nx_global_ctotal	= ATOMIC_INIT(0);
++atomic_t nx_global_cactive	= ATOMIC_INIT(0);
++
++static struct kmem_cache *nx_addr_v4_cachep = NULL;
++static struct kmem_cache *nx_addr_v6_cachep = NULL;
++
++
++static int __init init_network(void)
++{
++	nx_addr_v4_cachep = kmem_cache_create("nx_v4_addr_cache",
++		sizeof(struct nx_addr_v4), 0,
++		SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
++	nx_addr_v6_cachep = kmem_cache_create("nx_v6_addr_cache",
++		sizeof(struct nx_addr_v6), 0,
++		SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
++	return 0;
++}
++
++
++/*	__alloc_nx_addr_v4()					*/
++
++static inline struct nx_addr_v4 *__alloc_nx_addr_v4(void)
++{
++	struct nx_addr_v4 *nxa = kmem_cache_alloc(
++		nx_addr_v4_cachep, GFP_KERNEL);
++
++	if (!IS_ERR(nxa))
++		memset(nxa, 0, sizeof(*nxa));
++	return nxa;
++}
++
++/*	__dealloc_nx_addr_v4()					*/
++
++static inline void __dealloc_nx_addr_v4(struct nx_addr_v4 *nxa)
++{
++	kmem_cache_free(nx_addr_v4_cachep, nxa);
++}
++
++/*	__dealloc_nx_addr_v4_all()				*/
++
++static inline void __dealloc_nx_addr_v4_all(struct nx_addr_v4 *nxa)
++{
++	while (nxa) {
++		struct nx_addr_v4 *next = nxa->next;
++
++		__dealloc_nx_addr_v4(nxa);
++		nxa = next;
++	}
++}
++
++
++#ifdef CONFIG_IPV6
++
++/*	__alloc_nx_addr_v6()					*/
++
++static inline struct nx_addr_v6 *__alloc_nx_addr_v6(void)
++{
++	struct nx_addr_v6 *nxa = kmem_cache_alloc(
++		nx_addr_v6_cachep, GFP_KERNEL);
++
++	if (!IS_ERR(nxa))
++		memset(nxa, 0, sizeof(*nxa));
++	return nxa;
++}
++
++/*	__dealloc_nx_addr_v6()					*/
++
++static inline void __dealloc_nx_addr_v6(struct nx_addr_v6 *nxa)
++{
++	kmem_cache_free(nx_addr_v6_cachep, nxa);
++}
++
++/*	__dealloc_nx_addr_v6_all()				*/
++
++static inline void __dealloc_nx_addr_v6_all(struct nx_addr_v6 *nxa)
++{
++	while (nxa) {
++		struct nx_addr_v6 *next = nxa->next;
++
++		__dealloc_nx_addr_v6(nxa);
++		nxa = next;
++	}
++}
++
++#endif	/* CONFIG_IPV6 */
++
++/*	__alloc_nx_info()
++
++	* allocate an initialized nx_info struct
++	* doesn't make it visible (hash)			*/
++
++static struct nx_info *__alloc_nx_info(nid_t nid)
++{
++	struct nx_info *new = NULL;
++
++	vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
++
++	/* would this benefit from a slab cache? */
++	new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
++	if (!new)
++		return 0;
++
++	memset(new, 0, sizeof(struct nx_info));
++	new->nx_id = nid;
++	INIT_HLIST_NODE(&new->nx_hlist);
++	atomic_set(&new->nx_usecnt, 0);
++	atomic_set(&new->nx_tasks, 0);
++	new->nx_state = 0;
++
++	new->nx_flags = NXF_INIT_SET;
++
++	/* rest of init goes here */
++
++	new->v4_lback.s_addr = htonl(INADDR_LOOPBACK);
++	new->v4_bcast.s_addr = htonl(INADDR_BROADCAST);
++
++	vxdprintk(VXD_CBIT(nid, 0),
++		"alloc_nx_info(%d) = %p", nid, new);
++	atomic_inc(&nx_global_ctotal);
++	return new;
++}
++
++/*	__dealloc_nx_info()
++
++	* final disposal of nx_info				*/
++
++static void __dealloc_nx_info(struct nx_info *nxi)
++{
++	vxdprintk(VXD_CBIT(nid, 0),
++		"dealloc_nx_info(%p)", nxi);
++
++	nxi->nx_hlist.next = LIST_POISON1;
++	nxi->nx_id = -1;
++
++	BUG_ON(atomic_read(&nxi->nx_usecnt));
++	BUG_ON(atomic_read(&nxi->nx_tasks));
++
++	__dealloc_nx_addr_v4_all(nxi->v4.next);
++
++	nxi->nx_state |= NXS_RELEASED;
++	kfree(nxi);
++	atomic_dec(&nx_global_ctotal);
++}
++
++static void __shutdown_nx_info(struct nx_info *nxi)
++{
++	nxi->nx_state |= NXS_SHUTDOWN;
++	vs_net_change(nxi, VSC_NETDOWN);
++}
++
++/*	exported stuff						*/
++
++void free_nx_info(struct nx_info *nxi)
++{
++	/* context shutdown is mandatory */
++	BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
++
++	/* context must not be hashed */
++	BUG_ON(nxi->nx_state & NXS_HASHED);
++
++	BUG_ON(atomic_read(&nxi->nx_usecnt));
++	BUG_ON(atomic_read(&nxi->nx_tasks));
++
++	__dealloc_nx_info(nxi);
++}
++
++
++void __nx_set_lback(struct nx_info *nxi)
++{
++	int nid = nxi->nx_id;
++	__be32 lback = htonl(INADDR_LOOPBACK ^ ((nid & 0xFFFF) << 8));
++
++	nxi->v4_lback.s_addr = lback;
++}
++
++extern int __nx_inet_add_lback(__be32 addr);
++extern int __nx_inet_del_lback(__be32 addr);
++
++
++/*	hash table for nx_info hash */
++
++#define NX_HASH_SIZE	13
++
++struct hlist_head nx_info_hash[NX_HASH_SIZE];
++
++static DEFINE_SPINLOCK(nx_info_hash_lock);
++
++
++static inline unsigned int __hashval(nid_t nid)
++{
++	return (nid % NX_HASH_SIZE);
++}
++
++
++
++/*	__hash_nx_info()
++
++	* add the nxi to the global hash table
++	* requires the hash_lock to be held			*/
++
++static inline void __hash_nx_info(struct nx_info *nxi)
++{
++	struct hlist_head *head;
++
++	vxd_assert_lock(&nx_info_hash_lock);
++	vxdprintk(VXD_CBIT(nid, 4),
++		"__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
++
++	/* context must not be hashed */
++	BUG_ON(nx_info_state(nxi, NXS_HASHED));
++
++	nxi->nx_state |= NXS_HASHED;
++	head = &nx_info_hash[__hashval(nxi->nx_id)];
++	hlist_add_head(&nxi->nx_hlist, head);
++	atomic_inc(&nx_global_cactive);
++}
++
++/*	__unhash_nx_info()
++
++	* remove the nxi from the global hash table
++	* requires the hash_lock to be held			*/
++
++static inline void __unhash_nx_info(struct nx_info *nxi)
++{
++	vxd_assert_lock(&nx_info_hash_lock);
++	vxdprintk(VXD_CBIT(nid, 4),
++		"__unhash_nx_info: %p[#%d.%d.%d]", nxi, nxi->nx_id,
++		atomic_read(&nxi->nx_usecnt), atomic_read(&nxi->nx_tasks));
++
++	/* context must be hashed */
++	BUG_ON(!nx_info_state(nxi, NXS_HASHED));
++	/* but without tasks */
++	BUG_ON(atomic_read(&nxi->nx_tasks));
++
++	nxi->nx_state &= ~NXS_HASHED;
++	hlist_del(&nxi->nx_hlist);
++	atomic_dec(&nx_global_cactive);
++}
++
++
++/*	__lookup_nx_info()
++
++	* requires the hash_lock to be held
++	* doesn't increment the nx_refcnt			*/
++
++static inline struct nx_info *__lookup_nx_info(nid_t nid)
++{
++	struct hlist_head *head = &nx_info_hash[__hashval(nid)];
++	struct hlist_node *pos;
++	struct nx_info *nxi;
++
++	vxd_assert_lock(&nx_info_hash_lock);
++	hlist_for_each(pos, head) {
++		nxi = hlist_entry(pos, struct nx_info, nx_hlist);
++
++		if (nxi->nx_id == nid)
++			goto found;
++	}
++	nxi = NULL;
++found:
++	vxdprintk(VXD_CBIT(nid, 0),
++		"__lookup_nx_info(#%u): %p[#%u]",
++		nid, nxi, nxi ? nxi->nx_id : 0);
++	return nxi;
++}
++
++
++/*	__create_nx_info()
++
++	* create the requested context
++	* get(), claim() and hash it				*/
++
++static struct nx_info *__create_nx_info(int id)
++{
++	struct nx_info *new, *nxi = NULL;
++
++	vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
++
++	if (!(new = __alloc_nx_info(id)))
++		return ERR_PTR(-ENOMEM);
++
++	/* required to make dynamic xids unique */
++	spin_lock(&nx_info_hash_lock);
++
++	/* static context requested */
++	if ((nxi = __lookup_nx_info(id))) {
++		vxdprintk(VXD_CBIT(nid, 0),
++			"create_nx_info(%d) = %p (already there)", id, nxi);
++		if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
++			nxi = ERR_PTR(-EBUSY);
++		else
++			nxi = ERR_PTR(-EEXIST);
++		goto out_unlock;
++	}
++	/* new context */
++	vxdprintk(VXD_CBIT(nid, 0),
++		"create_nx_info(%d) = %p (new)", id, new);
++	claim_nx_info(new, NULL);
++	__nx_set_lback(new);
++	__hash_nx_info(get_nx_info(new));
++	nxi = new, new = NULL;
++
++out_unlock:
++	spin_unlock(&nx_info_hash_lock);
++	if (new)
++		__dealloc_nx_info(new);
++	return nxi;
++}
++
++
++
++/*	exported stuff						*/
++
++
++void unhash_nx_info(struct nx_info *nxi)
++{
++	__shutdown_nx_info(nxi);
++	spin_lock(&nx_info_hash_lock);
++	__unhash_nx_info(nxi);
++	spin_unlock(&nx_info_hash_lock);
++}
++
++/*	lookup_nx_info()
++
++	* search for a nx_info and get() it
++	* negative id means current				*/
++
++struct nx_info *lookup_nx_info(int id)
++{
++	struct nx_info *nxi = NULL;
++
++	if (id < 0) {
++		nxi = get_nx_info(current_nx_info());
++	} else if (id > 1) {
++		spin_lock(&nx_info_hash_lock);
++		nxi = get_nx_info(__lookup_nx_info(id));
++		spin_unlock(&nx_info_hash_lock);
++	}
++	return nxi;
++}
++
++/*	nid_is_hashed()
++
++	* verify that nid is still hashed			*/
++
++int nid_is_hashed(nid_t nid)
++{
++	int hashed;
++
++	spin_lock(&nx_info_hash_lock);
++	hashed = (__lookup_nx_info(nid) != NULL);
++	spin_unlock(&nx_info_hash_lock);
++	return hashed;
++}
++
++
++#ifdef	CONFIG_PROC_FS
++
++/*	get_nid_list()
++
++	* get a subset of hashed nids for proc
++	* assumes size is at least one				*/
++
++int get_nid_list(int index, unsigned int *nids, int size)
++{
++	int hindex, nr_nids = 0;
++
++	/* only show current and children */
++	if (!nx_check(0, VS_ADMIN | VS_WATCH)) {
++		if (index > 0)
++			return 0;
++		nids[nr_nids] = nx_current_nid();
++		return 1;
++	}
++
++	for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
++		struct hlist_head *head = &nx_info_hash[hindex];
++		struct hlist_node *pos;
++
++		spin_lock(&nx_info_hash_lock);
++		hlist_for_each(pos, head) {
++			struct nx_info *nxi;
++
++			if (--index > 0)
++				continue;
++
++			nxi = hlist_entry(pos, struct nx_info, nx_hlist);
++			nids[nr_nids] = nxi->nx_id;
++			if (++nr_nids >= size) {
++				spin_unlock(&nx_info_hash_lock);
++				goto out;
++			}
++		}
++		/* keep the lock time short */
++		spin_unlock(&nx_info_hash_lock);
++	}
++out:
++	return nr_nids;
++}
++#endif
++
++
++/*
++ *	migrate task to new network
++ *	gets nxi, puts old_nxi on change
++ */
++
++int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
++{
++	struct nx_info *old_nxi;
++	int ret = 0;
++
++	if (!p || !nxi)
++		BUG();
++
++	vxdprintk(VXD_CBIT(nid, 5),
++		"nx_migrate_task(%p,%p[#%d.%d.%d])",
++		p, nxi, nxi->nx_id,
++		atomic_read(&nxi->nx_usecnt),
++		atomic_read(&nxi->nx_tasks));
++
++	if (nx_info_flags(nxi, NXF_INFO_PRIVATE, 0) &&
++		!nx_info_flags(nxi, NXF_STATE_SETUP, 0))
++		return -EACCES;
++
++	if (nx_info_state(nxi, NXS_SHUTDOWN))
++		return -EFAULT;
++
++	/* maybe disallow this completely? */
++	old_nxi = task_get_nx_info(p);
++	if (old_nxi == nxi)
++		goto out;
++
++	task_lock(p);
++	if (old_nxi)
++		clr_nx_info(&p->nx_info);
++	claim_nx_info(nxi, p);
++	set_nx_info(&p->nx_info, nxi);
++	p->nid = nxi->nx_id;
++	task_unlock(p);
++
++	vxdprintk(VXD_CBIT(nid, 5),
++		"moved task %p into nxi:%p[#%d]",
++		p, nxi, nxi->nx_id);
++
++	if (old_nxi)
++		release_nx_info(old_nxi, p);
++	ret = 0;
++out:
++	put_nx_info(old_nxi);
++	return ret;
++}
++
++
++void nx_set_persistent(struct nx_info *nxi)
++{
++	vxdprintk(VXD_CBIT(nid, 6),
++		"nx_set_persistent(%p[#%d])", nxi, nxi->nx_id);
++
++	get_nx_info(nxi);
++	claim_nx_info(nxi, NULL);
++}
++
++void nx_clear_persistent(struct nx_info *nxi)
++{
++	vxdprintk(VXD_CBIT(nid, 6),
++		"nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
++
++	release_nx_info(nxi, NULL);
++	put_nx_info(nxi);
++}
++
++void nx_update_persistent(struct nx_info *nxi)
++{
++	if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
++		nx_set_persistent(nxi);
++	else
++		nx_clear_persistent(nxi);
++}
++
++/* vserver syscall commands below here */
++
++/* taks nid and nx_info functions */
++
++#include <asm/uaccess.h>
++
++
++int vc_task_nid(uint32_t id)
++{
++	nid_t nid;
++
++	if (id) {
++		struct task_struct *tsk;
++
++		rcu_read_lock();
++		tsk = find_task_by_real_pid(id);
++		nid = (tsk) ? tsk->nid : -ESRCH;
++		rcu_read_unlock();
++	} else
++		nid = nx_current_nid();
++	return nid;
++}
++
++
++int vc_nx_info(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_nx_info_v0 vc_data;
++
++	vc_data.nid = nxi->nx_id;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++
++/* network functions */
++
++int vc_net_create(uint32_t nid, void __user *data)
++{
++	struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
++	struct nx_info *new_nxi;
++	int ret;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	if ((nid > MAX_S_CONTEXT) || (nid < 2))
++		return -EINVAL;
++
++	new_nxi = __create_nx_info(nid);
++	if (IS_ERR(new_nxi))
++		return PTR_ERR(new_nxi);
++
++	/* initial flags */
++	new_nxi->nx_flags = vc_data.flagword;
++
++	ret = -ENOEXEC;
++	if (vs_net_change(new_nxi, VSC_NETUP))
++		goto out;
++
++	ret = nx_migrate_task(current, new_nxi);
++	if (ret)
++		goto out;
++
++	/* return context id on success */
++	ret = new_nxi->nx_id;
++
++	/* get a reference for persistent contexts */
++	if ((vc_data.flagword & NXF_PERSISTENT))
++		nx_set_persistent(new_nxi);
++out:
++	release_nx_info(new_nxi, NULL);
++	put_nx_info(new_nxi);
++	return ret;
++}
++
++
++int vc_net_migrate(struct nx_info *nxi, void __user *data)
++{
++	return nx_migrate_task(current, nxi);
++}
++
++
++
++int do_add_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
++	uint16_t type, uint16_t flags)
++{
++	struct nx_addr_v4 *nxa = &nxi->v4;
++
++	if (NX_IPV4(nxi)) {
++		/* locate last entry */
++		for (; nxa->next; nxa = nxa->next);
++		nxa->next = __alloc_nx_addr_v4();
++		nxa = nxa->next;
++
++		if (IS_ERR(nxa))
++			return PTR_ERR(nxa);
++	}
++
++	if (nxi->v4.next)
++		/* remove single ip for ip list */
++		nxi->nx_flags &= ~NXF_SINGLE_IP;
++
++	nxa->ip[0].s_addr = ip;
++	nxa->ip[1].s_addr = ip2;
++	nxa->mask.s_addr = mask;
++	nxa->type = type;
++	nxa->flags = flags;
++	return 0;
++}
++
++int do_remove_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
++	uint16_t type, uint16_t flags)
++{
++	struct nx_addr_v4 *nxa = &nxi->v4;
++
++	switch (type) {
++/*	case NXA_TYPE_ADDR:
++		break;		*/
++
++	case NXA_TYPE_ANY:
++		__dealloc_nx_addr_v4_all(xchg(&nxa->next, NULL));
++		memset(nxa, 0, sizeof(*nxa));
++		break;
++
++	default:
++		return -EINVAL;
++	}
++	return 0;
++}
++
++
++int vc_net_add(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_v0 vc_data;
++	int index, ret = 0;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	switch (vc_data.type) {
++	case NXA_TYPE_IPV4:
++		if ((vc_data.count < 1) || (vc_data.count > 4))
++			return -EINVAL;
++
++		index = 0;
++		while (index < vc_data.count) {
++			ret = do_add_v4_addr(nxi, vc_data.ip[index].s_addr, 0,
++				vc_data.mask[index].s_addr, NXA_TYPE_ADDR, 0);
++			if (ret)
++				return ret;
++			index++;
++		}
++		ret = index;
++		break;
++
++	case NXA_TYPE_IPV4|NXA_MOD_BCAST:
++		nxi->v4_bcast = vc_data.ip[0];
++		ret = 1;
++		break;
++
++	case NXA_TYPE_IPV4|NXA_MOD_LBACK:
++		nxi->v4_lback = vc_data.ip[0];
++		ret = 1;
++		break;
++
++	default:
++		ret = -EINVAL;
++		break;
++	}
++	return ret;
++}
++
++int vc_net_remove(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_v0 vc_data;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	switch (vc_data.type) {
++	case NXA_TYPE_ANY:
++		__dealloc_nx_addr_v4_all(xchg(&nxi->v4.next, NULL));
++		memset(&nxi->v4, 0, sizeof(nxi->v4));
++		break;
++
++	default:
++		return -EINVAL;
++	}
++	return 0;
++}
++
++
++int vc_net_add_ipv4_v1(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_ipv4_v1 vc_data;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	switch (vc_data.type) {
++	case NXA_TYPE_ADDR:
++	case NXA_TYPE_MASK:
++		return do_add_v4_addr(nxi, vc_data.ip.s_addr, 0,
++			vc_data.mask.s_addr, vc_data.type, vc_data.flags);
++
++	case NXA_TYPE_ADDR | NXA_MOD_BCAST:
++		nxi->v4_bcast = vc_data.ip;
++		break;
++
++	case NXA_TYPE_ADDR | NXA_MOD_LBACK:
++		nxi->v4_lback = vc_data.ip;
++		break;
++
++	default:
++		return -EINVAL;
++	}
++	return 0;
++}
++
++int vc_net_add_ipv4(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_ipv4_v2 vc_data;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	switch (vc_data.type) {
++	case NXA_TYPE_ADDR:
++	case NXA_TYPE_MASK:
++	case NXA_TYPE_RANGE:
++		return do_add_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
++			vc_data.mask.s_addr, vc_data.type, vc_data.flags);
++
++	case NXA_TYPE_ADDR | NXA_MOD_BCAST:
++		nxi->v4_bcast = vc_data.ip;
++		break;
++
++	case NXA_TYPE_ADDR | NXA_MOD_LBACK:
++		nxi->v4_lback = vc_data.ip;
++		break;
++
++	default:
++		return -EINVAL;
++	}
++	return 0;
++}
++
++int vc_net_rem_ipv4_v1(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_ipv4_v1 vc_data;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_remove_v4_addr(nxi, vc_data.ip.s_addr, 0,
++		vc_data.mask.s_addr, vc_data.type, vc_data.flags);
++}
++
++int vc_net_rem_ipv4(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_ipv4_v2 vc_data;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_remove_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
++		vc_data.mask.s_addr, vc_data.type, vc_data.flags);
++}
++
++#ifdef CONFIG_IPV6
++
++int do_add_v6_addr(struct nx_info *nxi,
++	struct in6_addr *ip, struct in6_addr *mask,
++	uint32_t prefix, uint16_t type, uint16_t flags)
++{
++	struct nx_addr_v6 *nxa = &nxi->v6;
++
++	if (NX_IPV6(nxi)) {
++		/* locate last entry */
++		for (; nxa->next; nxa = nxa->next);
++		nxa->next = __alloc_nx_addr_v6();
++		nxa = nxa->next;
++
++		if (IS_ERR(nxa))
++			return PTR_ERR(nxa);
++	}
++
++	nxa->ip = *ip;
++	nxa->mask = *mask;
++	nxa->prefix = prefix;
++	nxa->type = type;
++	nxa->flags = flags;
++	return 0;
++}
++
++
++int vc_net_add_ipv6(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_ipv6_v1 vc_data;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	switch (vc_data.type) {
++	case NXA_TYPE_ADDR:
++		memset(&vc_data.mask, ~0, sizeof(vc_data.mask));
++		/* fallthrough */
++	case NXA_TYPE_MASK:
++		return do_add_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
++			vc_data.prefix, vc_data.type, vc_data.flags);
++	default:
++		return -EINVAL;
++	}
++	return 0;
++}
++
++int vc_net_remove_ipv6(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_ipv6_v1 vc_data;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	switch (vc_data.type) {
++	case NXA_TYPE_ANY:
++		__dealloc_nx_addr_v6_all(xchg(&nxi->v6.next, NULL));
++		memset(&nxi->v6, 0, sizeof(nxi->v6));
++		break;
++
++	default:
++		return -EINVAL;
++	}
++	return 0;
++}
++
++#endif	/* CONFIG_IPV6 */
++
++
++int vc_get_nflags(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_flags_v0 vc_data;
++
++	vc_data.flagword = nxi->nx_flags;
++
++	/* special STATE flag handling */
++	vc_data.mask = vs_mask_flags(~0ULL, nxi->nx_flags, NXF_ONE_TIME);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_nflags(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_flags_v0 vc_data;
++	uint64_t mask, trigger;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	/* special STATE flag handling */
++	mask = vs_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
++	trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
++
++	nxi->nx_flags = vs_mask_flags(nxi->nx_flags,
++		vc_data.flagword, mask);
++	if (trigger & NXF_PERSISTENT)
++		nx_update_persistent(nxi);
++
++	return 0;
++}
++
++int vc_get_ncaps(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_caps_v0 vc_data;
++
++	vc_data.ncaps = nxi->nx_ncaps;
++	vc_data.cmask = ~0ULL;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_ncaps(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_caps_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	nxi->nx_ncaps = vs_mask_flags(nxi->nx_ncaps,
++		vc_data.ncaps, vc_data.cmask);
++	return 0;
++}
++
++
++#include <linux/module.h>
++
++module_init(init_network);
++
++EXPORT_SYMBOL_GPL(free_nx_info);
++EXPORT_SYMBOL_GPL(unhash_nx_info);
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/proc.c linux-3.2.34-vs2.3.2.15/kernel/vserver/proc.c
+--- linux-3.2.34/kernel/vserver/proc.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/proc.c	2011-12-06 23:19:52.000000000 +0100
+@@ -0,0 +1,1103 @@
++/*
++ *  linux/kernel/vserver/proc.c
++ *
++ *  Virtual Context Support
++ *
++ *  Copyright (C) 2003-2011  Herbert P�tzl
++ *
++ *  V0.01  basic structure
++ *  V0.02  adaptation vs1.3.0
++ *  V0.03  proc permissions
++ *  V0.04  locking/generic
++ *  V0.05  next generation procfs
++ *  V0.06  inode validation
++ *  V0.07  generic rewrite vid
++ *  V0.08  remove inode type
++ *  V0.09  added u/wmask info
++ *
++ */
++
++#include <linux/proc_fs.h>
++#include <linux/fs_struct.h>
++#include <linux/mount.h>
++#include <asm/unistd.h>
++
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
++#include <linux/vs_cvirt.h>
++
++#include <linux/in.h>
++#include <linux/inetdevice.h>
++#include <linux/vs_inet.h>
++#include <linux/vs_inet6.h>
++
++#include <linux/vserver/global.h>
++
++#include "cvirt_proc.h"
++#include "cacct_proc.h"
++#include "limit_proc.h"
++#include "sched_proc.h"
++#include "vci_config.h"
++
++
++static inline char *print_cap_t(char *buffer, kernel_cap_t *c)
++{
++	unsigned __capi;
++
++	CAP_FOR_EACH_U32(__capi) {
++		buffer += sprintf(buffer, "%08x",
++			c->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
++	}
++	return buffer;
++}
++
++
++static struct proc_dir_entry *proc_virtual;
++
++static struct proc_dir_entry *proc_virtnet;
++
++
++/* first the actual feeds */
++
++
++static int proc_vci(char *buffer)
++{
++	return sprintf(buffer,
++		"VCIVersion:\t%04x:%04x\n"
++		"VCISyscall:\t%d\n"
++		"VCIKernel:\t%08x\n",
++		VCI_VERSION >> 16,
++		VCI_VERSION & 0xFFFF,
++		__NR_vserver,
++		vci_kernel_config());
++}
++
++static int proc_virtual_info(char *buffer)
++{
++	return proc_vci(buffer);
++}
++
++static int proc_virtual_status(char *buffer)
++{
++	return sprintf(buffer,
++		"#CTotal:\t%d\n"
++		"#CActive:\t%d\n"
++		"#NSProxy:\t%d\t%d %d %d %d %d %d\n"
++		"#InitTask:\t%d\t%d %d\n",
++		atomic_read(&vx_global_ctotal),
++		atomic_read(&vx_global_cactive),
++		atomic_read(&vs_global_nsproxy),
++		atomic_read(&vs_global_fs),
++		atomic_read(&vs_global_mnt_ns),
++		atomic_read(&vs_global_uts_ns),
++		atomic_read(&nr_ipc_ns),
++		atomic_read(&vs_global_user_ns),
++		atomic_read(&vs_global_pid_ns),
++		atomic_read(&init_task.usage),
++		atomic_read(&init_task.nsproxy->count),
++		init_task.fs->users);
++}
++
++
++int proc_vxi_info(struct vx_info *vxi, char *buffer)
++{
++	int length;
++
++	length = sprintf(buffer,
++		"ID:\t%d\n"
++		"Info:\t%p\n"
++		"Init:\t%d\n"
++		"OOM:\t%lld\n",
++		vxi->vx_id,
++		vxi,
++		vxi->vx_initpid,
++		vxi->vx_badness_bias);
++	return length;
++}
++
++int proc_vxi_status(struct vx_info *vxi, char *buffer)
++{
++	char *orig = buffer;
++
++	buffer += sprintf(buffer,
++		"UseCnt:\t%d\n"
++		"Tasks:\t%d\n"
++		"Flags:\t%016llx\n",
++		atomic_read(&vxi->vx_usecnt),
++		atomic_read(&vxi->vx_tasks),
++		(unsigned long long)vxi->vx_flags);
++
++	buffer += sprintf(buffer, "BCaps:\t");
++	buffer = print_cap_t(buffer, &vxi->vx_bcaps);
++	buffer += sprintf(buffer, "\n");
++
++	buffer += sprintf(buffer,
++		"CCaps:\t%016llx\n"
++		"Umask:\t%16llx\n"
++		"Wmask:\t%16llx\n"
++		"Spaces:\t%08lx %08lx\n",
++		(unsigned long long)vxi->vx_ccaps,
++		(unsigned long long)vxi->vx_umask,
++		(unsigned long long)vxi->vx_wmask,
++		vxi->space[0].vx_nsmask, vxi->space[1].vx_nsmask);
++	return buffer - orig;
++}
++
++int proc_vxi_limit(struct vx_info *vxi, char *buffer)
++{
++	return vx_info_proc_limit(&vxi->limit, buffer);
++}
++
++int proc_vxi_sched(struct vx_info *vxi, char *buffer)
++{
++	int cpu, length;
++
++	length = vx_info_proc_sched(&vxi->sched, buffer);
++	for_each_online_cpu(cpu) {
++		length += vx_info_proc_sched_pc(
++			&vx_per_cpu(vxi, sched_pc, cpu),
++			buffer + length, cpu);
++	}
++	return length;
++}
++
++int proc_vxi_nsproxy0(struct vx_info *vxi, char *buffer)
++{
++	return vx_info_proc_nsproxy(vxi->space[0].vx_nsproxy, buffer);
++}
++
++int proc_vxi_nsproxy1(struct vx_info *vxi, char *buffer)
++{
++	return vx_info_proc_nsproxy(vxi->space[1].vx_nsproxy, buffer);
++}
++
++int proc_vxi_cvirt(struct vx_info *vxi, char *buffer)
++{
++	int cpu, length;
++
++	vx_update_load(vxi);
++	length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
++	for_each_online_cpu(cpu) {
++		length += vx_info_proc_cvirt_pc(
++			&vx_per_cpu(vxi, cvirt_pc, cpu),
++			buffer + length, cpu);
++	}
++	return length;
++}
++
++int proc_vxi_cacct(struct vx_info *vxi, char *buffer)
++{
++	return vx_info_proc_cacct(&vxi->cacct, buffer);
++}
++
++
++static int proc_virtnet_info(char *buffer)
++{
++	return proc_vci(buffer);
++}
++
++static int proc_virtnet_status(char *buffer)
++{
++	return sprintf(buffer,
++		"#CTotal:\t%d\n"
++		"#CActive:\t%d\n",
++		atomic_read(&nx_global_ctotal),
++		atomic_read(&nx_global_cactive));
++}
++
++int proc_nxi_info(struct nx_info *nxi, char *buffer)
++{
++	struct nx_addr_v4 *v4a;
++#ifdef	CONFIG_IPV6
++	struct nx_addr_v6 *v6a;
++#endif
++	int length, i;
++
++	length = sprintf(buffer,
++		"ID:\t%d\n"
++		"Info:\t%p\n"
++		"Bcast:\t" NIPQUAD_FMT "\n"
++		"Lback:\t" NIPQUAD_FMT "\n",
++		nxi->nx_id,
++		nxi,
++		NIPQUAD(nxi->v4_bcast.s_addr),
++		NIPQUAD(nxi->v4_lback.s_addr));
++
++	if (!NX_IPV4(nxi))
++		goto skip_v4;
++	for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
++		length += sprintf(buffer + length, "%d:\t" NXAV4_FMT "\n",
++			i, NXAV4(v4a));
++skip_v4:
++#ifdef	CONFIG_IPV6
++	if (!NX_IPV6(nxi))
++		goto skip_v6;
++	for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
++		length += sprintf(buffer + length, "%d:\t" NXAV6_FMT "\n",
++			i, NXAV6(v6a));
++skip_v6:
++#endif
++	return length;
++}
++
++int proc_nxi_status(struct nx_info *nxi, char *buffer)
++{
++	int length;
++
++	length = sprintf(buffer,
++		"UseCnt:\t%d\n"
++		"Tasks:\t%d\n"
++		"Flags:\t%016llx\n"
++		"NCaps:\t%016llx\n",
++		atomic_read(&nxi->nx_usecnt),
++		atomic_read(&nxi->nx_tasks),
++		(unsigned long long)nxi->nx_flags,
++		(unsigned long long)nxi->nx_ncaps);
++	return length;
++}
++
++
++
++/* here the inode helpers */
++
++struct vs_entry {
++	int len;
++	char *name;
++	mode_t mode;
++	struct inode_operations *iop;
++	struct file_operations *fop;
++	union proc_op op;
++};
++
++static struct inode *vs_proc_make_inode(struct super_block *sb, struct vs_entry *p)
++{
++	struct inode *inode = new_inode(sb);
++
++	if (!inode)
++		goto out;
++
++	inode->i_mode = p->mode;
++	if (p->iop)
++		inode->i_op = p->iop;
++	if (p->fop)
++		inode->i_fop = p->fop;
++
++	set_nlink(inode, (p->mode & S_IFDIR) ? 2 : 1);
++	inode->i_flags |= S_IMMUTABLE;
++
++	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++
++	inode->i_uid = 0;
++	inode->i_gid = 0;
++	inode->i_tag = 0;
++out:
++	return inode;
++}
++
++static struct dentry *vs_proc_instantiate(struct inode *dir,
++	struct dentry *dentry, int id, void *ptr)
++{
++	struct vs_entry *p = ptr;
++	struct inode *inode = vs_proc_make_inode(dir->i_sb, p);
++	struct dentry *error = ERR_PTR(-EINVAL);
++
++	if (!inode)
++		goto out;
++
++	PROC_I(inode)->op = p->op;
++	PROC_I(inode)->fd = id;
++	d_add(dentry, inode);
++	error = NULL;
++out:
++	return error;
++}
++
++/* Lookups */
++
++typedef struct dentry *instantiate_t(struct inode *, struct dentry *, int, void *);
++
++/*
++ * Fill a directory entry.
++ *
++ * If possible create the dcache entry and derive our inode number and
++ * file type from dcache entry.
++ *
++ * Since all of the proc inode numbers are dynamically generated, the inode
++ * numbers do not exist until the inode is cache.  This means creating the
++ * the dcache entry in readdir is necessary to keep the inode numbers
++ * reported by readdir in sync with the inode numbers reported
++ * by stat.
++ */
++static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
++	char *name, int len, instantiate_t instantiate, int id, void *ptr)
++{
++	struct dentry *child, *dir = filp->f_dentry;
++	struct inode *inode;
++	struct qstr qname;
++	ino_t ino = 0;
++	unsigned type = DT_UNKNOWN;
++
++	qname.name = name;
++	qname.len  = len;
++	qname.hash = full_name_hash(name, len);
++
++	child = d_lookup(dir, &qname);
++	if (!child) {
++		struct dentry *new;
++		new = d_alloc(dir, &qname);
++		if (new) {
++			child = instantiate(dir->d_inode, new, id, ptr);
++			if (child)
++				dput(new);
++			else
++				child = new;
++		}
++	}
++	if (!child || IS_ERR(child) || !child->d_inode)
++		goto end_instantiate;
++	inode = child->d_inode;
++	if (inode) {
++		ino = inode->i_ino;
++		type = inode->i_mode >> 12;
++	}
++	dput(child);
++end_instantiate:
++	if (!ino)
++		ino = find_inode_number(dir, &qname);
++	if (!ino)
++		ino = 1;
++	return filldir(dirent, name, len, filp->f_pos, ino, type);
++}
++
++
++
++/* get and revalidate vx_info/xid */
++
++static inline
++struct vx_info *get_proc_vx_info(struct inode *inode)
++{
++	return lookup_vx_info(PROC_I(inode)->fd);
++}
++
++static int proc_xid_revalidate(struct dentry *dentry, struct nameidata *nd)
++{
++	struct inode *inode = dentry->d_inode;
++	xid_t xid = PROC_I(inode)->fd;
++
++	if (!xid || xid_is_hashed(xid))
++		return 1;
++	d_drop(dentry);
++	return 0;
++}
++
++
++/* get and revalidate nx_info/nid */
++
++static int proc_nid_revalidate(struct dentry *dentry, struct nameidata *nd)
++{
++	struct inode *inode = dentry->d_inode;
++	nid_t nid = PROC_I(inode)->fd;
++
++	if (!nid || nid_is_hashed(nid))
++		return 1;
++	d_drop(dentry);
++	return 0;
++}
++
++
++
++#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
++
++static ssize_t proc_vs_info_read(struct file *file, char __user *buf,
++			  size_t count, loff_t *ppos)
++{
++	struct inode *inode = file->f_dentry->d_inode;
++	unsigned long page;
++	ssize_t length = 0;
++
++	if (count > PROC_BLOCK_SIZE)
++		count = PROC_BLOCK_SIZE;
++
++	/* fade that out as soon as stable */
++	WARN_ON(PROC_I(inode)->fd);
++
++	if (!(page = __get_free_page(GFP_KERNEL)))
++		return -ENOMEM;
++
++	BUG_ON(!PROC_I(inode)->op.proc_vs_read);
++	length = PROC_I(inode)->op.proc_vs_read((char *)page);
++
++	if (length >= 0)
++		length = simple_read_from_buffer(buf, count, ppos,
++			(char *)page, length);
++
++	free_page(page);
++	return length;
++}
++
++static ssize_t proc_vx_info_read(struct file *file, char __user *buf,
++			  size_t count, loff_t *ppos)
++{
++	struct inode *inode = file->f_dentry->d_inode;
++	struct vx_info *vxi = NULL;
++	xid_t xid = PROC_I(inode)->fd;
++	unsigned long page;
++	ssize_t length = 0;
++
++	if (count > PROC_BLOCK_SIZE)
++		count = PROC_BLOCK_SIZE;
++
++	/* fade that out as soon as stable */
++	WARN_ON(!xid);
++	vxi = lookup_vx_info(xid);
++	if (!vxi)
++		goto out;
++
++	length = -ENOMEM;
++	if (!(page = __get_free_page(GFP_KERNEL)))
++		goto out_put;
++
++	BUG_ON(!PROC_I(inode)->op.proc_vxi_read);
++	length = PROC_I(inode)->op.proc_vxi_read(vxi, (char *)page);
++
++	if (length >= 0)
++		length = simple_read_from_buffer(buf, count, ppos,
++			(char *)page, length);
++
++	free_page(page);
++out_put:
++	put_vx_info(vxi);
++out:
++	return length;
++}
++
++static ssize_t proc_nx_info_read(struct file *file, char __user *buf,
++			  size_t count, loff_t *ppos)
++{
++	struct inode *inode = file->f_dentry->d_inode;
++	struct nx_info *nxi = NULL;
++	nid_t nid = PROC_I(inode)->fd;
++	unsigned long page;
++	ssize_t length = 0;
++
++	if (count > PROC_BLOCK_SIZE)
++		count = PROC_BLOCK_SIZE;
++
++	/* fade that out as soon as stable */
++	WARN_ON(!nid);
++	nxi = lookup_nx_info(nid);
++	if (!nxi)
++		goto out;
++
++	length = -ENOMEM;
++	if (!(page = __get_free_page(GFP_KERNEL)))
++		goto out_put;
++
++	BUG_ON(!PROC_I(inode)->op.proc_nxi_read);
++	length = PROC_I(inode)->op.proc_nxi_read(nxi, (char *)page);
++
++	if (length >= 0)
++		length = simple_read_from_buffer(buf, count, ppos,
++			(char *)page, length);
++
++	free_page(page);
++out_put:
++	put_nx_info(nxi);
++out:
++	return length;
++}
++
++
++
++/* here comes the lower level */
++
++
++#define NOD(NAME, MODE, IOP, FOP, OP) {	\
++	.len  = sizeof(NAME) - 1,	\
++	.name = (NAME),			\
++	.mode = MODE,			\
++	.iop  = IOP,			\
++	.fop  = FOP,			\
++	.op   = OP,			\
++}
++
++
++#define DIR(NAME, MODE, OTYPE)				\
++	NOD(NAME, (S_IFDIR | (MODE)),			\
++		&proc_ ## OTYPE ## _inode_operations,	\
++		&proc_ ## OTYPE ## _file_operations, { } )
++
++#define INF(NAME, MODE, OTYPE)				\
++	NOD(NAME, (S_IFREG | (MODE)), NULL,		\
++		&proc_vs_info_file_operations,		\
++		{ .proc_vs_read = &proc_##OTYPE } )
++
++#define VINF(NAME, MODE, OTYPE)				\
++	NOD(NAME, (S_IFREG | (MODE)), NULL,		\
++		&proc_vx_info_file_operations,		\
++		{ .proc_vxi_read = &proc_##OTYPE } )
++
++#define NINF(NAME, MODE, OTYPE)				\
++	NOD(NAME, (S_IFREG | (MODE)), NULL,		\
++		&proc_nx_info_file_operations,		\
++		{ .proc_nxi_read = &proc_##OTYPE } )
++
++
++static struct file_operations proc_vs_info_file_operations = {
++	.read =		proc_vs_info_read,
++};
++
++static struct file_operations proc_vx_info_file_operations = {
++	.read =		proc_vx_info_read,
++};
++
++static struct dentry_operations proc_xid_dentry_operations = {
++	.d_revalidate =	proc_xid_revalidate,
++};
++
++static struct vs_entry vx_base_stuff[] = {
++	VINF("info",	S_IRUGO, vxi_info),
++	VINF("status",	S_IRUGO, vxi_status),
++	VINF("limit",	S_IRUGO, vxi_limit),
++	VINF("sched",	S_IRUGO, vxi_sched),
++	VINF("nsproxy",	S_IRUGO, vxi_nsproxy0),
++	VINF("nsproxy1",S_IRUGO, vxi_nsproxy1),
++	VINF("cvirt",	S_IRUGO, vxi_cvirt),
++	VINF("cacct",	S_IRUGO, vxi_cacct),
++	{}
++};
++
++
++
++
++static struct dentry *proc_xid_instantiate(struct inode *dir,
++	struct dentry *dentry, int id, void *ptr)
++{
++	dentry->d_op = &proc_xid_dentry_operations;
++	return vs_proc_instantiate(dir, dentry, id, ptr);
++}
++
++static struct dentry *proc_xid_lookup(struct inode *dir,
++	struct dentry *dentry, struct nameidata *nd)
++{
++	struct vs_entry *p = vx_base_stuff;
++	struct dentry *error = ERR_PTR(-ENOENT);
++
++	for (; p->name; p++) {
++		if (p->len != dentry->d_name.len)
++			continue;
++		if (!memcmp(dentry->d_name.name, p->name, p->len))
++			break;
++	}
++	if (!p->name)
++		goto out;
++
++	error = proc_xid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
++out:
++	return error;
++}
++
++static int proc_xid_readdir(struct file *filp,
++	void *dirent, filldir_t filldir)
++{
++	struct dentry *dentry = filp->f_dentry;
++	struct inode *inode = dentry->d_inode;
++	struct vs_entry *p = vx_base_stuff;
++	int size = sizeof(vx_base_stuff) / sizeof(struct vs_entry);
++	int pos, index;
++	u64 ino;
++
++	pos = filp->f_pos;
++	switch (pos) {
++	case 0:
++		ino = inode->i_ino;
++		if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	case 1:
++		ino = parent_ino(dentry);
++		if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	default:
++		index = pos - 2;
++		if (index >= size)
++			goto out;
++		for (p += index; p->name; p++) {
++			if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
++				vs_proc_instantiate, PROC_I(inode)->fd, p))
++				goto out;
++			pos++;
++		}
++	}
++out:
++	filp->f_pos = pos;
++	return 1;
++}
++
++
++
++static struct file_operations proc_nx_info_file_operations = {
++	.read =		proc_nx_info_read,
++};
++
++static struct dentry_operations proc_nid_dentry_operations = {
++	.d_revalidate =	proc_nid_revalidate,
++};
++
++static struct vs_entry nx_base_stuff[] = {
++	NINF("info",	S_IRUGO, nxi_info),
++	NINF("status",	S_IRUGO, nxi_status),
++	{}
++};
++
++
++static struct dentry *proc_nid_instantiate(struct inode *dir,
++	struct dentry *dentry, int id, void *ptr)
++{
++	dentry->d_op = &proc_nid_dentry_operations;
++	return vs_proc_instantiate(dir, dentry, id, ptr);
++}
++
++static struct dentry *proc_nid_lookup(struct inode *dir,
++	struct dentry *dentry, struct nameidata *nd)
++{
++	struct vs_entry *p = nx_base_stuff;
++	struct dentry *error = ERR_PTR(-ENOENT);
++
++	for (; p->name; p++) {
++		if (p->len != dentry->d_name.len)
++			continue;
++		if (!memcmp(dentry->d_name.name, p->name, p->len))
++			break;
++	}
++	if (!p->name)
++		goto out;
++
++	error = proc_nid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
++out:
++	return error;
++}
++
++static int proc_nid_readdir(struct file *filp,
++	void *dirent, filldir_t filldir)
++{
++	struct dentry *dentry = filp->f_dentry;
++	struct inode *inode = dentry->d_inode;
++	struct vs_entry *p = nx_base_stuff;
++	int size = sizeof(nx_base_stuff) / sizeof(struct vs_entry);
++	int pos, index;
++	u64 ino;
++
++	pos = filp->f_pos;
++	switch (pos) {
++	case 0:
++		ino = inode->i_ino;
++		if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	case 1:
++		ino = parent_ino(dentry);
++		if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	default:
++		index = pos - 2;
++		if (index >= size)
++			goto out;
++		for (p += index; p->name; p++) {
++			if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
++				vs_proc_instantiate, PROC_I(inode)->fd, p))
++				goto out;
++			pos++;
++		}
++	}
++out:
++	filp->f_pos = pos;
++	return 1;
++}
++
++
++#define MAX_MULBY10	((~0U - 9) / 10)
++
++static inline int atovid(const char *str, int len)
++{
++	int vid, c;
++
++	vid = 0;
++	while (len-- > 0) {
++		c = *str - '0';
++		str++;
++		if (c > 9)
++			return -1;
++		if (vid >= MAX_MULBY10)
++			return -1;
++		vid *= 10;
++		vid += c;
++		if (!vid)
++			return -1;
++	}
++	return vid;
++}
++
++/* now the upper level (virtual) */
++
++
++static struct file_operations proc_xid_file_operations = {
++	.read =		generic_read_dir,
++	.readdir =	proc_xid_readdir,
++};
++
++static struct inode_operations proc_xid_inode_operations = {
++	.lookup =	proc_xid_lookup,
++};
++
++static struct vs_entry vx_virtual_stuff[] = {
++	INF("info",	S_IRUGO, virtual_info),
++	INF("status",	S_IRUGO, virtual_status),
++	DIR(NULL,	S_IRUGO | S_IXUGO, xid),
++};
++
++
++static struct dentry *proc_virtual_lookup(struct inode *dir,
++	struct dentry *dentry, struct nameidata *nd)
++{
++	struct vs_entry *p = vx_virtual_stuff;
++	struct dentry *error = ERR_PTR(-ENOENT);
++	int id = 0;
++
++	for (; p->name; p++) {
++		if (p->len != dentry->d_name.len)
++			continue;
++		if (!memcmp(dentry->d_name.name, p->name, p->len))
++			break;
++	}
++	if (p->name)
++		goto instantiate;
++
++	id = atovid(dentry->d_name.name, dentry->d_name.len);
++	if ((id < 0) || !xid_is_hashed(id))
++		goto out;
++
++instantiate:
++	error = proc_xid_instantiate(dir, dentry, id, p);
++out:
++	return error;
++}
++
++static struct file_operations proc_nid_file_operations = {
++	.read =		generic_read_dir,
++	.readdir =	proc_nid_readdir,
++};
++
++static struct inode_operations proc_nid_inode_operations = {
++	.lookup =	proc_nid_lookup,
++};
++
++static struct vs_entry nx_virtnet_stuff[] = {
++	INF("info",	S_IRUGO, virtnet_info),
++	INF("status",	S_IRUGO, virtnet_status),
++	DIR(NULL,	S_IRUGO | S_IXUGO, nid),
++};
++
++
++static struct dentry *proc_virtnet_lookup(struct inode *dir,
++	struct dentry *dentry, struct nameidata *nd)
++{
++	struct vs_entry *p = nx_virtnet_stuff;
++	struct dentry *error = ERR_PTR(-ENOENT);
++	int id = 0;
++
++	for (; p->name; p++) {
++		if (p->len != dentry->d_name.len)
++			continue;
++		if (!memcmp(dentry->d_name.name, p->name, p->len))
++			break;
++	}
++	if (p->name)
++		goto instantiate;
++
++	id = atovid(dentry->d_name.name, dentry->d_name.len);
++	if ((id < 0) || !nid_is_hashed(id))
++		goto out;
++
++instantiate:
++	error = proc_nid_instantiate(dir, dentry, id, p);
++out:
++	return error;
++}
++
++
++#define PROC_MAXVIDS 32
++
++int proc_virtual_readdir(struct file *filp,
++	void *dirent, filldir_t filldir)
++{
++	struct dentry *dentry = filp->f_dentry;
++	struct inode *inode = dentry->d_inode;
++	struct vs_entry *p = vx_virtual_stuff;
++	int size = sizeof(vx_virtual_stuff) / sizeof(struct vs_entry);
++	int pos, index;
++	unsigned int xid_array[PROC_MAXVIDS];
++	char buf[PROC_NUMBUF];
++	unsigned int nr_xids, i;
++	u64 ino;
++
++	pos = filp->f_pos;
++	switch (pos) {
++	case 0:
++		ino = inode->i_ino;
++		if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	case 1:
++		ino = parent_ino(dentry);
++		if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	default:
++		index = pos - 2;
++		if (index >= size)
++			goto entries;
++		for (p += index; p->name; p++) {
++			if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
++				vs_proc_instantiate, 0, p))
++				goto out;
++			pos++;
++		}
++	entries:
++		index = pos - size;
++		p = &vx_virtual_stuff[size - 1];
++		nr_xids = get_xid_list(index, xid_array, PROC_MAXVIDS);
++		for (i = 0; i < nr_xids; i++) {
++			int n, xid = xid_array[i];
++			unsigned int j = PROC_NUMBUF;
++
++			n = xid;
++			do
++				buf[--j] = '0' + (n % 10);
++			while (n /= 10);
++
++			if (proc_fill_cache(filp, dirent, filldir,
++				buf + j, PROC_NUMBUF - j,
++				vs_proc_instantiate, xid, p))
++				goto out;
++			pos++;
++		}
++	}
++out:
++	filp->f_pos = pos;
++	return 0;
++}
++
++static int proc_virtual_getattr(struct vfsmount *mnt,
++	struct dentry *dentry, struct kstat *stat)
++{
++	struct inode *inode = dentry->d_inode;
++
++	generic_fillattr(inode, stat);
++	stat->nlink = 2 + atomic_read(&vx_global_cactive);
++	return 0;
++}
++
++static struct file_operations proc_virtual_dir_operations = {
++	.read =		generic_read_dir,
++	.readdir =	proc_virtual_readdir,
++};
++
++static struct inode_operations proc_virtual_dir_inode_operations = {
++	.getattr =	proc_virtual_getattr,
++	.lookup =	proc_virtual_lookup,
++};
++
++
++
++
++
++int proc_virtnet_readdir(struct file *filp,
++	void *dirent, filldir_t filldir)
++{
++	struct dentry *dentry = filp->f_dentry;
++	struct inode *inode = dentry->d_inode;
++	struct vs_entry *p = nx_virtnet_stuff;
++	int size = sizeof(nx_virtnet_stuff) / sizeof(struct vs_entry);
++	int pos, index;
++	unsigned int nid_array[PROC_MAXVIDS];
++	char buf[PROC_NUMBUF];
++	unsigned int nr_nids, i;
++	u64 ino;
++
++	pos = filp->f_pos;
++	switch (pos) {
++	case 0:
++		ino = inode->i_ino;
++		if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	case 1:
++		ino = parent_ino(dentry);
++		if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	default:
++		index = pos - 2;
++		if (index >= size)
++			goto entries;
++		for (p += index; p->name; p++) {
++			if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
++				vs_proc_instantiate, 0, p))
++				goto out;
++			pos++;
++		}
++	entries:
++		index = pos - size;
++		p = &nx_virtnet_stuff[size - 1];
++		nr_nids = get_nid_list(index, nid_array, PROC_MAXVIDS);
++		for (i = 0; i < nr_nids; i++) {
++			int n, nid = nid_array[i];
++			unsigned int j = PROC_NUMBUF;
++
++			n = nid;
++			do
++				buf[--j] = '0' + (n % 10);
++			while (n /= 10);
++
++			if (proc_fill_cache(filp, dirent, filldir,
++				buf + j, PROC_NUMBUF - j,
++				vs_proc_instantiate, nid, p))
++				goto out;
++			pos++;
++		}
++	}
++out:
++	filp->f_pos = pos;
++	return 0;
++}
++
++static int proc_virtnet_getattr(struct vfsmount *mnt,
++	struct dentry *dentry, struct kstat *stat)
++{
++	struct inode *inode = dentry->d_inode;
++
++	generic_fillattr(inode, stat);
++	stat->nlink = 2 + atomic_read(&nx_global_cactive);
++	return 0;
++}
++
++static struct file_operations proc_virtnet_dir_operations = {
++	.read =		generic_read_dir,
++	.readdir =	proc_virtnet_readdir,
++};
++
++static struct inode_operations proc_virtnet_dir_inode_operations = {
++	.getattr =	proc_virtnet_getattr,
++	.lookup =	proc_virtnet_lookup,
++};
++
++
++
++void proc_vx_init(void)
++{
++	struct proc_dir_entry *ent;
++
++	ent = proc_mkdir("virtual", 0);
++	if (ent) {
++		ent->proc_fops = &proc_virtual_dir_operations;
++		ent->proc_iops = &proc_virtual_dir_inode_operations;
++	}
++	proc_virtual = ent;
++
++	ent = proc_mkdir("virtnet", 0);
++	if (ent) {
++		ent->proc_fops = &proc_virtnet_dir_operations;
++		ent->proc_iops = &proc_virtnet_dir_inode_operations;
++	}
++	proc_virtnet = ent;
++}
++
++
++
++
++/* per pid info */
++
++
++int proc_pid_vx_info(struct task_struct *p, char *buffer)
++{
++	struct vx_info *vxi;
++	char *orig = buffer;
++
++	buffer += sprintf(buffer, "XID:\t%d\n", vx_task_xid(p));
++
++	vxi = task_get_vx_info(p);
++	if (!vxi)
++		goto out;
++
++	buffer += sprintf(buffer, "BCaps:\t");
++	buffer = print_cap_t(buffer, &vxi->vx_bcaps);
++	buffer += sprintf(buffer, "\n");
++	buffer += sprintf(buffer, "CCaps:\t%016llx\n",
++		(unsigned long long)vxi->vx_ccaps);
++	buffer += sprintf(buffer, "CFlags:\t%016llx\n",
++		(unsigned long long)vxi->vx_flags);
++	buffer += sprintf(buffer, "CIPid:\t%d\n", vxi->vx_initpid);
++
++	put_vx_info(vxi);
++out:
++	return buffer - orig;
++}
++
++
++int proc_pid_nx_info(struct task_struct *p, char *buffer)
++{
++	struct nx_info *nxi;
++	struct nx_addr_v4 *v4a;
++#ifdef	CONFIG_IPV6
++	struct nx_addr_v6 *v6a;
++#endif
++	char *orig = buffer;
++	int i;
++
++	buffer += sprintf(buffer, "NID:\t%d\n", nx_task_nid(p));
++
++	nxi = task_get_nx_info(p);
++	if (!nxi)
++		goto out;
++
++	buffer += sprintf(buffer, "NCaps:\t%016llx\n",
++		(unsigned long long)nxi->nx_ncaps);
++	buffer += sprintf(buffer, "NFlags:\t%016llx\n",
++		(unsigned long long)nxi->nx_flags);
++
++	buffer += sprintf(buffer,
++		"V4Root[bcast]:\t" NIPQUAD_FMT "\n",
++		NIPQUAD(nxi->v4_bcast.s_addr));
++	buffer += sprintf (buffer,
++		"V4Root[lback]:\t" NIPQUAD_FMT "\n",
++		NIPQUAD(nxi->v4_lback.s_addr));
++	if (!NX_IPV4(nxi))
++		goto skip_v4;
++	for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
++		buffer += sprintf(buffer, "V4Root[%d]:\t" NXAV4_FMT "\n",
++			i, NXAV4(v4a));
++skip_v4:
++#ifdef	CONFIG_IPV6
++	if (!NX_IPV6(nxi))
++		goto skip_v6;
++	for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
++		buffer += sprintf(buffer, "V6Root[%d]:\t" NXAV6_FMT "\n",
++			i, NXAV6(v6a));
++skip_v6:
++#endif
++	put_nx_info(nxi);
++out:
++	return buffer - orig;
++}
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/sched.c linux-3.2.34-vs2.3.2.15/kernel/vserver/sched.c
+--- linux-3.2.34/kernel/vserver/sched.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/sched.c	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,82 @@
++/*
++ *  linux/kernel/vserver/sched.c
++ *
++ *  Virtual Server: Scheduler Support
++ *
++ *  Copyright (C) 2004-2010  Herbert P�tzl
++ *
++ *  V0.01  adapted Sam Vilains version to 2.6.3
++ *  V0.02  removed legacy interface
++ *  V0.03  changed vcmds to vxi arg
++ *  V0.04  removed older and legacy interfaces
++ *  V0.05  removed scheduler code/commands
++ *
++ */
++
++#include <linux/vs_context.h>
++#include <linux/vs_sched.h>
++#include <linux/vserver/sched_cmd.h>
++
++#include <asm/uaccess.h>
++
++
++void vx_update_sched_param(struct _vx_sched *sched,
++	struct _vx_sched_pc *sched_pc)
++{
++	sched_pc->prio_bias = sched->prio_bias;
++}
++
++static int do_set_prio_bias(struct vx_info *vxi, struct vcmd_prio_bias *data)
++{
++	int cpu;
++
++	if (data->prio_bias > MAX_PRIO_BIAS)
++		data->prio_bias = MAX_PRIO_BIAS;
++	if (data->prio_bias < MIN_PRIO_BIAS)
++		data->prio_bias = MIN_PRIO_BIAS;
++
++	if (data->cpu_id != ~0) {
++		vxi->sched.update = cpumask_of_cpu(data->cpu_id);
++		cpus_and(vxi->sched.update, cpu_online_map,
++			vxi->sched.update);
++	} else
++		vxi->sched.update = cpu_online_map;
++
++	for_each_cpu_mask(cpu, vxi->sched.update)
++		vx_update_sched_param(&vxi->sched,
++			&vx_per_cpu(vxi, sched_pc, cpu));
++	return 0;
++}
++
++int vc_set_prio_bias(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_prio_bias vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_prio_bias(vxi, &vc_data);
++}
++
++int vc_get_prio_bias(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_prio_bias vc_data;
++	struct _vx_sched_pc *pcd;
++	int cpu;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	cpu = vc_data.cpu_id;
++
++	if (!cpu_possible(cpu))
++		return -EINVAL;
++
++	pcd = &vx_per_cpu(vxi, sched_pc, cpu);
++	vc_data.prio_bias = pcd->prio_bias;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/sched_init.h linux-3.2.34-vs2.3.2.15/kernel/vserver/sched_init.h
+--- linux-3.2.34/kernel/vserver/sched_init.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/sched_init.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,27 @@
++
++static inline void vx_info_init_sched(struct _vx_sched *sched)
++{
++	/* scheduling; hard code starting values as constants */
++	sched->prio_bias = 0;
++}
++
++static inline
++void vx_info_init_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
++{
++	sched_pc->prio_bias = 0;
++
++	sched_pc->user_ticks = 0;
++	sched_pc->sys_ticks = 0;
++	sched_pc->hold_ticks = 0;
++}
++
++static inline void vx_info_exit_sched(struct _vx_sched *sched)
++{
++	return;
++}
++
++static inline
++void vx_info_exit_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
++{
++	return;
++}
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/sched_proc.h linux-3.2.34-vs2.3.2.15/kernel/vserver/sched_proc.h
+--- linux-3.2.34/kernel/vserver/sched_proc.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/sched_proc.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,32 @@
++#ifndef _VX_SCHED_PROC_H
++#define _VX_SCHED_PROC_H
++
++
++static inline
++int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
++{
++	int length = 0;
++
++	length += sprintf(buffer,
++		"PrioBias:\t%8d\n",
++		sched->prio_bias);
++	return length;
++}
++
++static inline
++int vx_info_proc_sched_pc(struct _vx_sched_pc *sched_pc,
++	char *buffer, int cpu)
++{
++	int length = 0;
++
++	length += sprintf(buffer + length,
++		"cpu %d: %lld %lld %lld", cpu,
++		(unsigned long long)sched_pc->user_ticks,
++		(unsigned long long)sched_pc->sys_ticks,
++		(unsigned long long)sched_pc->hold_ticks);
++	length += sprintf(buffer + length,
++		" %d\n", sched_pc->prio_bias);
++	return length;
++}
++
++#endif	/* _VX_SCHED_PROC_H */
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/signal.c linux-3.2.34-vs2.3.2.15/kernel/vserver/signal.c
+--- linux-3.2.34/kernel/vserver/signal.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/signal.c	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,134 @@
++/*
++ *  linux/kernel/vserver/signal.c
++ *
++ *  Virtual Server: Signal Support
++ *
++ *  Copyright (C) 2003-2007  Herbert P�tzl
++ *
++ *  V0.01  broken out from vcontext V0.05
++ *  V0.02  changed vcmds to vxi arg
++ *  V0.03  adjusted siginfo for kill
++ *
++ */
++
++#include <asm/uaccess.h>
++
++#include <linux/vs_context.h>
++#include <linux/vs_pid.h>
++#include <linux/vserver/signal_cmd.h>
++
++
++int vx_info_kill(struct vx_info *vxi, int pid, int sig)
++{
++	int retval, count = 0;
++	struct task_struct *p;
++	struct siginfo *sip = SEND_SIG_PRIV;
++
++	retval = -ESRCH;
++	vxdprintk(VXD_CBIT(misc, 4),
++		"vx_info_kill(%p[#%d],%d,%d)*",
++		vxi, vxi->vx_id, pid, sig);
++	read_lock(&tasklist_lock);
++	switch (pid) {
++	case  0:
++	case -1:
++		for_each_process(p) {
++			int err = 0;
++
++			if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 ||
++				(pid && vxi->vx_initpid == p->pid))
++				continue;
++
++			err = group_send_sig_info(sig, sip, p);
++			++count;
++			if (err != -EPERM)
++				retval = err;
++		}
++		break;
++
++	case 1:
++		if (vxi->vx_initpid) {
++			pid = vxi->vx_initpid;
++			/* for now, only SIGINT to private init ... */
++			if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
++				/* ... as long as there are tasks left */
++				(atomic_read(&vxi->vx_tasks) > 1))
++				sig = SIGINT;
++		}
++		/* fallthrough */
++	default:
++		rcu_read_lock();
++		p = find_task_by_real_pid(pid);
++		rcu_read_unlock();
++		if (p) {
++			if (vx_task_xid(p) == vxi->vx_id)
++				retval = group_send_sig_info(sig, sip, p);
++		}
++		break;
++	}
++	read_unlock(&tasklist_lock);
++	vxdprintk(VXD_CBIT(misc, 4),
++		"vx_info_kill(%p[#%d],%d,%d,%ld) = %d",
++		vxi, vxi->vx_id, pid, sig, (long)sip, retval);
++	return retval;
++}
++
++int vc_ctx_kill(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_kill_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	/* special check to allow guest shutdown */
++	if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
++		/* forbid killall pid=0 when init is present */
++		(((vc_data.pid < 1) && vxi->vx_initpid) ||
++		(vc_data.pid > 1)))
++		return -EACCES;
++
++	return vx_info_kill(vxi, vc_data.pid, vc_data.sig);
++}
++
++
++static int __wait_exit(struct vx_info *vxi)
++{
++	DECLARE_WAITQUEUE(wait, current);
++	int ret = 0;
++
++	add_wait_queue(&vxi->vx_wait, &wait);
++	set_current_state(TASK_INTERRUPTIBLE);
++
++wait:
++	if (vx_info_state(vxi,
++		VXS_SHUTDOWN | VXS_HASHED | VXS_HELPER) == VXS_SHUTDOWN)
++		goto out;
++	if (signal_pending(current)) {
++		ret = -ERESTARTSYS;
++		goto out;
++	}
++	schedule();
++	goto wait;
++
++out:
++	set_current_state(TASK_RUNNING);
++	remove_wait_queue(&vxi->vx_wait, &wait);
++	return ret;
++}
++
++
++
++int vc_wait_exit(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_wait_exit_v0 vc_data;
++	int ret;
++
++	ret = __wait_exit(vxi);
++	vc_data.reboot_cmd = vxi->reboot_cmd;
++	vc_data.exit_code = vxi->exit_code;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		ret = -EFAULT;
++	return ret;
++}
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/space.c linux-3.2.34-vs2.3.2.15/kernel/vserver/space.c
+--- linux-3.2.34/kernel/vserver/space.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/space.c	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,435 @@
++/*
++ *  linux/kernel/vserver/space.c
++ *
++ *  Virtual Server: Context Space Support
++ *
++ *  Copyright (C) 2003-2010  Herbert P�tzl
++ *
++ *  V0.01  broken out from context.c 0.07
++ *  V0.02  added task locking for namespace
++ *  V0.03  broken out vx_enter_namespace
++ *  V0.04  added *space support and commands
++ *  V0.05  added credential support
++ *
++ */
++
++#include <linux/utsname.h>
++#include <linux/nsproxy.h>
++#include <linux/err.h>
++#include <linux/fs_struct.h>
++#include <linux/cred.h>
++#include <asm/uaccess.h>
++
++#include <linux/vs_context.h>
++#include <linux/vserver/space.h>
++#include <linux/vserver/space_cmd.h>
++
++atomic_t vs_global_nsproxy	= ATOMIC_INIT(0);
++atomic_t vs_global_fs		= ATOMIC_INIT(0);
++atomic_t vs_global_mnt_ns	= ATOMIC_INIT(0);
++atomic_t vs_global_uts_ns	= ATOMIC_INIT(0);
++atomic_t vs_global_user_ns	= ATOMIC_INIT(0);
++atomic_t vs_global_pid_ns	= ATOMIC_INIT(0);
++
++
++/* namespace functions */
++
++#include <linux/mnt_namespace.h>
++#include <linux/user_namespace.h>
++#include <linux/pid_namespace.h>
++#include <linux/ipc_namespace.h>
++#include <net/net_namespace.h>
++
++
++static const struct vcmd_space_mask_v1 space_mask_v0 = {
++	.mask = CLONE_FS |
++		CLONE_NEWNS |
++#ifdef	CONFIG_UTS_NS
++		CLONE_NEWUTS |
++#endif
++#ifdef	CONFIG_IPC_NS
++		CLONE_NEWIPC |
++#endif
++#ifdef	CONFIG_USER_NS
++		CLONE_NEWUSER |
++#endif
++		0
++};
++
++static const struct vcmd_space_mask_v1 space_mask = {
++	.mask = CLONE_FS |
++		CLONE_NEWNS |
++#ifdef	CONFIG_UTS_NS
++		CLONE_NEWUTS |
++#endif
++#ifdef	CONFIG_IPC_NS
++		CLONE_NEWIPC |
++#endif
++#ifdef	CONFIG_USER_NS
++		CLONE_NEWUSER |
++#endif
++#ifdef	CONFIG_PID_NS
++		CLONE_NEWPID |
++#endif
++#ifdef	CONFIG_NET_NS
++		CLONE_NEWNET |
++#endif
++		0
++};
++
++static const struct vcmd_space_mask_v1 default_space_mask = {
++	.mask = CLONE_FS |
++		CLONE_NEWNS |
++#ifdef	CONFIG_UTS_NS
++		CLONE_NEWUTS |
++#endif
++#ifdef	CONFIG_IPC_NS
++		CLONE_NEWIPC |
++#endif
++#ifdef	CONFIG_USER_NS
++		CLONE_NEWUSER |
++#endif
++#ifdef	CONFIG_PID_NS
++//		CLONE_NEWPID |
++#endif
++		0
++};
++
++/*
++ *	build a new nsproxy mix
++ *      assumes that both proxies are 'const'
++ *	does not touch nsproxy refcounts
++ *	will hold a reference on the result.
++ */
++
++struct nsproxy *vs_mix_nsproxy(struct nsproxy *old_nsproxy,
++	struct nsproxy *new_nsproxy, unsigned long mask)
++{
++	struct mnt_namespace *old_ns;
++	struct uts_namespace *old_uts;
++	struct ipc_namespace *old_ipc;
++#ifdef	CONFIG_PID_NS
++	struct pid_namespace *old_pid;
++#endif
++#ifdef	CONFIG_NET_NS
++	struct net *old_net;
++#endif
++	struct nsproxy *nsproxy;
++
++	nsproxy = copy_nsproxy(old_nsproxy);
++	if (!nsproxy)
++		goto out;
++
++	if (mask & CLONE_NEWNS) {
++		old_ns = nsproxy->mnt_ns;
++		nsproxy->mnt_ns = new_nsproxy->mnt_ns;
++		if (nsproxy->mnt_ns)
++			get_mnt_ns(nsproxy->mnt_ns);
++	} else
++		old_ns = NULL;
++
++	if (mask & CLONE_NEWUTS) {
++		old_uts = nsproxy->uts_ns;
++		nsproxy->uts_ns = new_nsproxy->uts_ns;
++		if (nsproxy->uts_ns)
++			get_uts_ns(nsproxy->uts_ns);
++	} else
++		old_uts = NULL;
++
++	if (mask & CLONE_NEWIPC) {
++		old_ipc = nsproxy->ipc_ns;
++		nsproxy->ipc_ns = new_nsproxy->ipc_ns;
++		if (nsproxy->ipc_ns)
++			get_ipc_ns(nsproxy->ipc_ns);
++	} else
++		old_ipc = NULL;
++
++#ifdef	CONFIG_PID_NS
++	if (mask & CLONE_NEWPID) {
++		old_pid = nsproxy->pid_ns;
++		nsproxy->pid_ns = new_nsproxy->pid_ns;
++		if (nsproxy->pid_ns)
++			get_pid_ns(nsproxy->pid_ns);
++	} else
++		old_pid = NULL;
++#endif
++#ifdef	CONFIG_NET_NS
++	if (mask & CLONE_NEWNET) {
++		old_net = nsproxy->net_ns;
++		nsproxy->net_ns = new_nsproxy->net_ns;
++		if (nsproxy->net_ns)
++			get_net(nsproxy->net_ns);
++	} else
++		old_net = NULL;
++#endif
++	if (old_ns)
++		put_mnt_ns(old_ns);
++	if (old_uts)
++		put_uts_ns(old_uts);
++	if (old_ipc)
++		put_ipc_ns(old_ipc);
++#ifdef	CONFIG_PID_NS
++	if (old_pid)
++		put_pid_ns(old_pid);
++#endif
++#ifdef	CONFIG_NET_NS
++	if (old_net)
++		put_net(old_net);
++#endif
++out:
++	return nsproxy;
++}
++
++
++/*
++ *	merge two nsproxy structs into a new one.
++ *	will hold a reference on the result.
++ */
++
++static inline
++struct nsproxy *__vs_merge_nsproxy(struct nsproxy *old,
++	struct nsproxy *proxy, unsigned long mask)
++{
++	struct nsproxy null_proxy = { .mnt_ns = NULL };
++
++	if (!proxy)
++		return NULL;
++
++	if (mask) {
++		/* vs_mix_nsproxy returns with reference */
++		return vs_mix_nsproxy(old ? old : &null_proxy,
++			proxy, mask);
++	}
++	get_nsproxy(proxy);
++	return proxy;
++}
++
++
++int vx_enter_space(struct vx_info *vxi, unsigned long mask, unsigned index)
++{
++	struct nsproxy *proxy, *proxy_cur, *proxy_new;
++	struct fs_struct *fs_cur, *fs = NULL;
++	struct _vx_space *space;
++	int ret, kill = 0;
++
++	vxdprintk(VXD_CBIT(space, 8), "vx_enter_space(%p[#%u],0x%08lx,%d)",
++		vxi, vxi->vx_id, mask, index);
++
++	if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
++		return -EACCES;
++
++	if (index >= VX_SPACES)
++		return -EINVAL;
++
++	space = &vxi->space[index];
++
++	if (!mask)
++		mask = space->vx_nsmask;
++
++	if ((mask & space->vx_nsmask) != mask)
++		return -EINVAL;
++
++	if (mask & CLONE_FS) {
++		fs = copy_fs_struct(space->vx_fs);
++		if (!fs)
++			return -ENOMEM;
++	}
++	proxy = space->vx_nsproxy;
++
++	vxdprintk(VXD_CBIT(space, 9),
++		"vx_enter_space(%p[#%u],0x%08lx,%d) -> (%p,%p)",
++		vxi, vxi->vx_id, mask, index, proxy, fs);
++
++	task_lock(current);
++	fs_cur = current->fs;
++
++	if (mask & CLONE_FS) {
++		spin_lock(&fs_cur->lock);
++		current->fs = fs;
++		kill = !--fs_cur->users;
++		spin_unlock(&fs_cur->lock);
++	}
++
++	proxy_cur = current->nsproxy;
++	get_nsproxy(proxy_cur);
++	task_unlock(current);
++
++	if (kill)
++		free_fs_struct(fs_cur);
++
++	proxy_new = __vs_merge_nsproxy(proxy_cur, proxy, mask);
++	if (IS_ERR(proxy_new)) {
++		ret = PTR_ERR(proxy_new);
++		goto out_put;
++	}
++
++	proxy_new = xchg(&current->nsproxy, proxy_new);
++
++	if (mask & CLONE_NEWUSER) {
++		struct cred *cred;
++
++		vxdprintk(VXD_CBIT(space, 10),
++			"vx_enter_space(%p[#%u],%p) cred (%p,%p)",
++			vxi, vxi->vx_id, space->vx_cred,
++			current->real_cred, current->cred);
++
++		if (space->vx_cred) {
++			cred = __prepare_creds(space->vx_cred);
++			if (cred)
++				commit_creds(cred);
++		}
++	}
++
++	ret = 0;
++
++	if (proxy_new)
++		put_nsproxy(proxy_new);
++out_put:
++	if (proxy_cur)
++		put_nsproxy(proxy_cur);
++	return ret;
++}
++
++
++int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index)
++{
++	struct nsproxy *proxy_vxi, *proxy_cur, *proxy_new;
++	struct fs_struct *fs_vxi, *fs;
++	struct _vx_space *space;
++	int ret, kill = 0;
++
++	vxdprintk(VXD_CBIT(space, 8), "vx_set_space(%p[#%u],0x%08lx,%d)",
++		vxi, vxi->vx_id, mask, index);
++
++	if ((mask & space_mask.mask) != mask)
++		return -EINVAL;
++
++	if (index >= VX_SPACES)
++		return -EINVAL;
++
++	space = &vxi->space[index];
++
++	proxy_vxi = space->vx_nsproxy;
++	fs_vxi = space->vx_fs;
++
++	if (mask & CLONE_FS) {
++		fs = copy_fs_struct(current->fs);
++		if (!fs)
++			return -ENOMEM;
++	}
++
++	task_lock(current);
++
++	if (mask & CLONE_FS) {
++		spin_lock(&fs_vxi->lock);
++		space->vx_fs = fs;
++		kill = !--fs_vxi->users;
++		spin_unlock(&fs_vxi->lock);
++	}
++
++	proxy_cur = current->nsproxy;
++	get_nsproxy(proxy_cur);
++	task_unlock(current);
++
++	if (kill)
++		free_fs_struct(fs_vxi);
++
++	proxy_new = __vs_merge_nsproxy(proxy_vxi, proxy_cur, mask);
++	if (IS_ERR(proxy_new)) {
++		ret = PTR_ERR(proxy_new);
++		goto out_put;
++	}
++
++	proxy_new = xchg(&space->vx_nsproxy, proxy_new);
++	space->vx_nsmask |= mask;
++
++	if (mask & CLONE_NEWUSER) {
++		struct cred *cred;
++
++		vxdprintk(VXD_CBIT(space, 10),
++			"vx_set_space(%p[#%u],%p) cred (%p,%p)",
++			vxi, vxi->vx_id, space->vx_cred,
++			current->real_cred, current->cred);
++
++		cred = prepare_creds();
++		cred = (struct cred *)xchg(&space->vx_cred, cred);
++		if (cred)
++			abort_creds(cred);
++	}
++
++	ret = 0;
++
++	if (proxy_new)
++		put_nsproxy(proxy_new);
++out_put:
++	if (proxy_cur)
++		put_nsproxy(proxy_cur);
++	return ret;
++}
++
++
++int vc_enter_space_v1(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return vx_enter_space(vxi, vc_data.mask, 0);
++}
++
++int vc_enter_space(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	if (vc_data.index >= VX_SPACES)
++		return -EINVAL;
++
++	return vx_enter_space(vxi, vc_data.mask, vc_data.index);
++}
++
++int vc_set_space_v1(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return vx_set_space(vxi, vc_data.mask, 0);
++}
++
++int vc_set_space(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	if (vc_data.index >= VX_SPACES)
++		return -EINVAL;
++
++	return vx_set_space(vxi, vc_data.mask, vc_data.index);
++}
++
++int vc_get_space_mask(void __user *data, int type)
++{
++	const struct vcmd_space_mask_v1 *mask;
++
++	if (type == 0)
++		mask = &space_mask_v0;
++	else if (type == 1)
++		mask = &space_mask;
++	else
++		mask = &default_space_mask;
++
++	vxdprintk(VXD_CBIT(space, 10),
++		"vc_get_space_mask(%d) = %08llx", type, mask->mask);
++
++	if (copy_to_user(data, mask, sizeof(*mask)))
++		return -EFAULT;
++	return 0;
++}
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/switch.c linux-3.2.34-vs2.3.2.15/kernel/vserver/switch.c
+--- linux-3.2.34/kernel/vserver/switch.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/switch.c	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,556 @@
++/*
++ *  linux/kernel/vserver/switch.c
++ *
++ *  Virtual Server: Syscall Switch
++ *
++ *  Copyright (C) 2003-2011  Herbert P�tzl
++ *
++ *  V0.01  syscall switch
++ *  V0.02  added signal to context
++ *  V0.03  added rlimit functions
++ *  V0.04  added iattr, task/xid functions
++ *  V0.05  added debug/history stuff
++ *  V0.06  added compat32 layer
++ *  V0.07  vcmd args and perms
++ *  V0.08  added status commands
++ *  V0.09  added tag commands
++ *  V0.10  added oom bias
++ *  V0.11  added device commands
++ *  V0.12  added warn mask
++ *
++ */
++
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
++#include <linux/vserver/switch.h>
++
++#include "vci_config.h"
++
++
++static inline
++int vc_get_version(uint32_t id)
++{
++	return VCI_VERSION;
++}
++
++static inline
++int vc_get_vci(uint32_t id)
++{
++	return vci_kernel_config();
++}
++
++#include <linux/vserver/context_cmd.h>
++#include <linux/vserver/cvirt_cmd.h>
++#include <linux/vserver/cacct_cmd.h>
++#include <linux/vserver/limit_cmd.h>
++#include <linux/vserver/network_cmd.h>
++#include <linux/vserver/sched_cmd.h>
++#include <linux/vserver/debug_cmd.h>
++#include <linux/vserver/inode_cmd.h>
++#include <linux/vserver/dlimit_cmd.h>
++#include <linux/vserver/signal_cmd.h>
++#include <linux/vserver/space_cmd.h>
++#include <linux/vserver/tag_cmd.h>
++#include <linux/vserver/device_cmd.h>
++
++#include <linux/vserver/inode.h>
++#include <linux/vserver/dlimit.h>
++
++
++#ifdef	CONFIG_COMPAT
++#define __COMPAT(name, id, data, compat)	\
++	(compat) ? name ## _x32(id, data) : name(id, data)
++#define __COMPAT_NO_ID(name, data, compat)	\
++	(compat) ? name ## _x32(data) : name(data)
++#else
++#define __COMPAT(name, id, data, compat)	\
++	name(id, data)
++#define __COMPAT_NO_ID(name, data, compat)	\
++	name(data)
++#endif
++
++
++static inline
++long do_vcmd(uint32_t cmd, uint32_t id,
++	struct vx_info *vxi, struct nx_info *nxi,
++	void __user *data, int compat)
++{
++	switch (cmd) {
++
++	case VCMD_get_version:
++		return vc_get_version(id);
++	case VCMD_get_vci:
++		return vc_get_vci(id);
++
++	case VCMD_task_xid:
++		return vc_task_xid(id);
++	case VCMD_vx_info:
++		return vc_vx_info(vxi, data);
++
++	case VCMD_task_nid:
++		return vc_task_nid(id);
++	case VCMD_nx_info:
++		return vc_nx_info(nxi, data);
++
++	case VCMD_task_tag:
++		return vc_task_tag(id);
++
++	case VCMD_set_space_v1:
++		return vc_set_space_v1(vxi, data);
++	/* this is version 2 */
++	case VCMD_set_space:
++		return vc_set_space(vxi, data);
++
++	case VCMD_get_space_mask_v0:
++		return vc_get_space_mask(data, 0);
++	/* this is version 1 */
++	case VCMD_get_space_mask:
++		return vc_get_space_mask(data, 1);
++
++	case VCMD_get_space_default:
++		return vc_get_space_mask(data, -1);
++
++	case VCMD_set_umask:
++		return vc_set_umask(vxi, data);
++
++	case VCMD_get_umask:
++		return vc_get_umask(vxi, data);
++
++	case VCMD_set_wmask:
++		return vc_set_wmask(vxi, data);
++
++	case VCMD_get_wmask:
++		return vc_get_wmask(vxi, data);
++#ifdef	CONFIG_IA32_EMULATION
++	case VCMD_get_rlimit:
++		return __COMPAT(vc_get_rlimit, vxi, data, compat);
++	case VCMD_set_rlimit:
++		return __COMPAT(vc_set_rlimit, vxi, data, compat);
++#else
++	case VCMD_get_rlimit:
++		return vc_get_rlimit(vxi, data);
++	case VCMD_set_rlimit:
++		return vc_set_rlimit(vxi, data);
++#endif
++	case VCMD_get_rlimit_mask:
++		return vc_get_rlimit_mask(id, data);
++	case VCMD_reset_hits:
++		return vc_reset_hits(vxi, data);
++	case VCMD_reset_minmax:
++		return vc_reset_minmax(vxi, data);
++
++	case VCMD_get_vhi_name:
++		return vc_get_vhi_name(vxi, data);
++	case VCMD_set_vhi_name:
++		return vc_set_vhi_name(vxi, data);
++
++	case VCMD_ctx_stat:
++		return vc_ctx_stat(vxi, data);
++	case VCMD_virt_stat:
++		return vc_virt_stat(vxi, data);
++	case VCMD_sock_stat:
++		return vc_sock_stat(vxi, data);
++	case VCMD_rlimit_stat:
++		return vc_rlimit_stat(vxi, data);
++
++	case VCMD_set_cflags:
++		return vc_set_cflags(vxi, data);
++	case VCMD_get_cflags:
++		return vc_get_cflags(vxi, data);
++
++	/* this is version 1 */
++	case VCMD_set_ccaps:
++		return vc_set_ccaps(vxi, data);
++	/* this is version 1 */
++	case VCMD_get_ccaps:
++		return vc_get_ccaps(vxi, data);
++	case VCMD_set_bcaps:
++		return vc_set_bcaps(vxi, data);
++	case VCMD_get_bcaps:
++		return vc_get_bcaps(vxi, data);
++
++	case VCMD_set_badness:
++		return vc_set_badness(vxi, data);
++	case VCMD_get_badness:
++		return vc_get_badness(vxi, data);
++
++	case VCMD_set_nflags:
++		return vc_set_nflags(nxi, data);
++	case VCMD_get_nflags:
++		return vc_get_nflags(nxi, data);
++
++	case VCMD_set_ncaps:
++		return vc_set_ncaps(nxi, data);
++	case VCMD_get_ncaps:
++		return vc_get_ncaps(nxi, data);
++
++	case VCMD_set_prio_bias:
++		return vc_set_prio_bias(vxi, data);
++	case VCMD_get_prio_bias:
++		return vc_get_prio_bias(vxi, data);
++	case VCMD_add_dlimit:
++		return __COMPAT(vc_add_dlimit, id, data, compat);
++	case VCMD_rem_dlimit:
++		return __COMPAT(vc_rem_dlimit, id, data, compat);
++	case VCMD_set_dlimit:
++		return __COMPAT(vc_set_dlimit, id, data, compat);
++	case VCMD_get_dlimit:
++		return __COMPAT(vc_get_dlimit, id, data, compat);
++
++	case VCMD_ctx_kill:
++		return vc_ctx_kill(vxi, data);
++
++	case VCMD_wait_exit:
++		return vc_wait_exit(vxi, data);
++
++	case VCMD_get_iattr:
++		return __COMPAT_NO_ID(vc_get_iattr, data, compat);
++	case VCMD_set_iattr:
++		return __COMPAT_NO_ID(vc_set_iattr, data, compat);
++
++	case VCMD_fget_iattr:
++		return vc_fget_iattr(id, data);
++	case VCMD_fset_iattr:
++		return vc_fset_iattr(id, data);
++
++	case VCMD_enter_space_v0:
++		return vc_enter_space_v1(vxi, NULL);
++	case VCMD_enter_space_v1:
++		return vc_enter_space_v1(vxi, data);
++	/* this is version 2 */
++	case VCMD_enter_space:
++		return vc_enter_space(vxi, data);
++
++	case VCMD_ctx_create_v0:
++		return vc_ctx_create(id, NULL);
++	case VCMD_ctx_create:
++		return vc_ctx_create(id, data);
++	case VCMD_ctx_migrate_v0:
++		return vc_ctx_migrate(vxi, NULL);
++	case VCMD_ctx_migrate:
++		return vc_ctx_migrate(vxi, data);
++
++	case VCMD_net_create_v0:
++		return vc_net_create(id, NULL);
++	case VCMD_net_create:
++		return vc_net_create(id, data);
++	case VCMD_net_migrate:
++		return vc_net_migrate(nxi, data);
++
++	case VCMD_tag_migrate:
++		return vc_tag_migrate(id);
++
++	case VCMD_net_add:
++		return vc_net_add(nxi, data);
++	case VCMD_net_remove:
++		return vc_net_remove(nxi, data);
++
++	case VCMD_net_add_ipv4_v1:
++		return vc_net_add_ipv4_v1(nxi, data);
++	/* this is version 2 */
++	case VCMD_net_add_ipv4:
++		return vc_net_add_ipv4(nxi, data);
++
++	case VCMD_net_rem_ipv4_v1:
++		return vc_net_rem_ipv4_v1(nxi, data);
++	/* this is version 2 */
++	case VCMD_net_rem_ipv4:
++		return vc_net_rem_ipv4(nxi, data);
++#ifdef	CONFIG_IPV6
++	case VCMD_net_add_ipv6:
++		return vc_net_add_ipv6(nxi, data);
++	case VCMD_net_remove_ipv6:
++		return vc_net_remove_ipv6(nxi, data);
++#endif
++/*	case VCMD_add_match_ipv4:
++		return vc_add_match_ipv4(nxi, data);
++	case VCMD_get_match_ipv4:
++		return vc_get_match_ipv4(nxi, data);
++#ifdef	CONFIG_IPV6
++	case VCMD_add_match_ipv6:
++		return vc_add_match_ipv6(nxi, data);
++	case VCMD_get_match_ipv6:
++		return vc_get_match_ipv6(nxi, data);
++#endif	*/
++
++#ifdef	CONFIG_VSERVER_DEVICE
++	case VCMD_set_mapping:
++		return __COMPAT(vc_set_mapping, vxi, data, compat);
++	case VCMD_unset_mapping:
++		return __COMPAT(vc_unset_mapping, vxi, data, compat);
++#endif
++#ifdef	CONFIG_VSERVER_HISTORY
++	case VCMD_dump_history:
++		return vc_dump_history(id);
++	case VCMD_read_history:
++		return __COMPAT(vc_read_history, id, data, compat);
++#endif
++	default:
++		vxwprintk_task(1, "unimplemented VCMD_%02d_%d[%d]",
++			VC_CATEGORY(cmd), VC_COMMAND(cmd), VC_VERSION(cmd));
++	}
++	return -ENOSYS;
++}
++
++
++#define	__VCMD(vcmd, _perm, _args, _flags)		\
++	case VCMD_ ## vcmd: perm = _perm;		\
++		args = _args; flags = _flags; break
++
++
++#define VCA_NONE	0x00
++#define VCA_VXI		0x01
++#define VCA_NXI		0x02
++
++#define VCF_NONE	0x00
++#define VCF_INFO	0x01
++#define VCF_ADMIN	0x02
++#define VCF_ARES	0x06	/* includes admin */
++#define VCF_SETUP	0x08
++
++#define VCF_ZIDOK	0x10	/* zero id okay */
++
++
++static inline
++long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
++{
++	long ret;
++	int permit = -1, state = 0;
++	int perm = -1, args = 0, flags = 0;
++	struct vx_info *vxi = NULL;
++	struct nx_info *nxi = NULL;
++
++	switch (cmd) {
++	/* unpriviledged commands */
++	__VCMD(get_version,	 0, VCA_NONE,	0);
++	__VCMD(get_vci,		 0, VCA_NONE,	0);
++	__VCMD(get_rlimit_mask,	 0, VCA_NONE,	0);
++	__VCMD(get_space_mask_v0,0, VCA_NONE,   0);
++	__VCMD(get_space_mask,	 0, VCA_NONE,   0);
++	__VCMD(get_space_default,0, VCA_NONE,   0);
++
++	/* info commands */
++	__VCMD(task_xid,	 2, VCA_NONE,	0);
++	__VCMD(reset_hits,	 2, VCA_VXI,	0);
++	__VCMD(reset_minmax,	 2, VCA_VXI,	0);
++	__VCMD(vx_info,		 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_bcaps,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_ccaps,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_cflags,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_umask,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_wmask,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_badness,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_vhi_name,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_rlimit,	 3, VCA_VXI,	VCF_INFO);
++
++	__VCMD(ctx_stat,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(virt_stat,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(sock_stat,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(rlimit_stat,	 3, VCA_VXI,	VCF_INFO);
++
++	__VCMD(task_nid,	 2, VCA_NONE,	0);
++	__VCMD(nx_info,		 3, VCA_NXI,	VCF_INFO);
++	__VCMD(get_ncaps,	 3, VCA_NXI,	VCF_INFO);
++	__VCMD(get_nflags,	 3, VCA_NXI,	VCF_INFO);
++
++	__VCMD(task_tag,	 2, VCA_NONE,	0);
++
++	__VCMD(get_iattr,	 2, VCA_NONE,	0);
++	__VCMD(fget_iattr,	 2, VCA_NONE,	0);
++	__VCMD(get_dlimit,	 3, VCA_NONE,	VCF_INFO);
++	__VCMD(get_prio_bias,	 3, VCA_VXI,	VCF_INFO);
++
++	/* lower admin commands */
++	__VCMD(wait_exit,	 4, VCA_VXI,	VCF_INFO);
++	__VCMD(ctx_create_v0,	 5, VCA_NONE,	0);
++	__VCMD(ctx_create,	 5, VCA_NONE,	0);
++	__VCMD(ctx_migrate_v0,	 5, VCA_VXI,	VCF_ADMIN);
++	__VCMD(ctx_migrate,	 5, VCA_VXI,	VCF_ADMIN);
++	__VCMD(enter_space_v0,	 5, VCA_VXI,	VCF_ADMIN);
++	__VCMD(enter_space_v1,	 5, VCA_VXI,	VCF_ADMIN);
++	__VCMD(enter_space,	 5, VCA_VXI,	VCF_ADMIN);
++
++	__VCMD(net_create_v0,	 5, VCA_NONE,	0);
++	__VCMD(net_create,	 5, VCA_NONE,	0);
++	__VCMD(net_migrate,	 5, VCA_NXI,	VCF_ADMIN);
++
++	__VCMD(tag_migrate,	 5, VCA_NONE,	VCF_ADMIN);
++
++	/* higher admin commands */
++	__VCMD(ctx_kill,	 6, VCA_VXI,	VCF_ARES);
++	__VCMD(set_space_v1,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_space,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++
++	__VCMD(set_ccaps,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_bcaps,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_cflags,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_umask,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_wmask,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_badness,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++
++	__VCMD(set_vhi_name,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_rlimit,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_prio_bias,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++
++	__VCMD(set_ncaps,	 7, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_nflags,	 7, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(net_add,		 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(net_remove,	 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(net_add_ipv4_v1,	 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(net_rem_ipv4_v1,	 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(net_add_ipv4,	 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(net_rem_ipv4,	 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++#ifdef	CONFIG_IPV6
++	__VCMD(net_add_ipv6,	 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(net_remove_ipv6,	 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++#endif
++	__VCMD(set_iattr,	 7, VCA_NONE,	0);
++	__VCMD(fset_iattr,	 7, VCA_NONE,	0);
++	__VCMD(set_dlimit,	 7, VCA_NONE,	VCF_ARES);
++	__VCMD(add_dlimit,	 8, VCA_NONE,	VCF_ARES);
++	__VCMD(rem_dlimit,	 8, VCA_NONE,	VCF_ARES);
++
++#ifdef	CONFIG_VSERVER_DEVICE
++	__VCMD(set_mapping,	 8, VCA_VXI,    VCF_ARES|VCF_ZIDOK);
++	__VCMD(unset_mapping,	 8, VCA_VXI,	VCF_ARES|VCF_ZIDOK);
++#endif
++	/* debug level admin commands */
++#ifdef	CONFIG_VSERVER_HISTORY
++	__VCMD(dump_history,	 9, VCA_NONE,	0);
++	__VCMD(read_history,	 9, VCA_NONE,	0);
++#endif
++
++	default:
++		perm = -1;
++	}
++
++	vxdprintk(VXD_CBIT(switch, 0),
++		"vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]",
++		VC_CATEGORY(cmd), VC_COMMAND(cmd),
++		VC_VERSION(cmd), id, data, compat,
++		perm, args, flags);
++
++	ret = -ENOSYS;
++	if (perm < 0)
++		goto out;
++
++	state = 1;
++	if (!capable(CAP_CONTEXT))
++		goto out;
++
++	state = 2;
++	/* moved here from the individual commands */
++	ret = -EPERM;
++	if ((perm > 1) && !capable(CAP_SYS_ADMIN))
++		goto out;
++
++	state = 3;
++	/* vcmd involves resource management  */
++	ret = -EPERM;
++	if ((flags & VCF_ARES) && !capable(CAP_SYS_RESOURCE))
++		goto out;
++
++	state = 4;
++	/* various legacy exceptions */
++	switch (cmd) {
++	/* will go away when spectator is a cap */
++	case VCMD_ctx_migrate_v0:
++	case VCMD_ctx_migrate:
++		if (id == 1) {
++			current->xid = 1;
++			ret = 1;
++			goto out;
++		}
++		break;
++
++	/* will go away when spectator is a cap */
++	case VCMD_net_migrate:
++		if (id == 1) {
++			current->nid = 1;
++			ret = 1;
++			goto out;
++		}
++		break;
++	}
++
++	/* vcmds are fine by default */
++	permit = 1;
++
++	/* admin type vcmds require admin ... */
++	if (flags & VCF_ADMIN)
++		permit = vx_check(0, VS_ADMIN) ? 1 : 0;
++
++	/* ... but setup type vcmds override that */
++	if (!permit && (flags & VCF_SETUP))
++		permit = vx_flags(VXF_STATE_SETUP, 0) ? 2 : 0;
++
++	state = 5;
++	ret = -EPERM;
++	if (!permit)
++		goto out;
++
++	state = 6;
++	if (!id && (flags & VCF_ZIDOK))
++		goto skip_id;
++
++	ret = -ESRCH;
++	if (args & VCA_VXI) {
++		vxi = lookup_vx_info(id);
++		if (!vxi)
++			goto out;
++
++		if ((flags & VCF_ADMIN) &&
++			/* special case kill for shutdown */
++			(cmd != VCMD_ctx_kill) &&
++			/* can context be administrated? */
++			!vx_info_flags(vxi, VXF_STATE_ADMIN, 0)) {
++			ret = -EACCES;
++			goto out_vxi;
++		}
++	}
++	state = 7;
++	if (args & VCA_NXI) {
++		nxi = lookup_nx_info(id);
++		if (!nxi)
++			goto out_vxi;
++
++		if ((flags & VCF_ADMIN) &&
++			/* can context be administrated? */
++			!nx_info_flags(nxi, NXF_STATE_ADMIN, 0)) {
++			ret = -EACCES;
++			goto out_nxi;
++		}
++	}
++skip_id:
++	state = 8;
++	ret = do_vcmd(cmd, id, vxi, nxi, data, compat);
++
++out_nxi:
++	if ((args & VCA_NXI) && nxi)
++		put_nx_info(nxi);
++out_vxi:
++	if ((args & VCA_VXI) && vxi)
++		put_vx_info(vxi);
++out:
++	vxdprintk(VXD_CBIT(switch, 1),
++		"vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]",
++		VC_CATEGORY(cmd), VC_COMMAND(cmd),
++		VC_VERSION(cmd), ret, ret, state, permit);
++	return ret;
++}
++
++asmlinkage long
++sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
++{
++	return do_vserver(cmd, id, data, 0);
++}
++
++#ifdef	CONFIG_COMPAT
++
++asmlinkage long
++sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
++{
++	return do_vserver(cmd, id, data, 1);
++}
++
++#endif	/* CONFIG_COMPAT */
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/sysctl.c linux-3.2.34-vs2.3.2.15/kernel/vserver/sysctl.c
+--- linux-3.2.34/kernel/vserver/sysctl.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/sysctl.c	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,247 @@
++/*
++ *  kernel/vserver/sysctl.c
++ *
++ *  Virtual Context Support
++ *
++ *  Copyright (C) 2004-2007  Herbert P�tzl
++ *
++ *  V0.01  basic structure
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/ctype.h>
++#include <linux/sysctl.h>
++#include <linux/parser.h>
++#include <asm/uaccess.h>
++
++enum {
++	CTL_DEBUG_ERROR		= 0,
++	CTL_DEBUG_SWITCH	= 1,
++	CTL_DEBUG_XID,
++	CTL_DEBUG_NID,
++	CTL_DEBUG_TAG,
++	CTL_DEBUG_NET,
++	CTL_DEBUG_LIMIT,
++	CTL_DEBUG_CRES,
++	CTL_DEBUG_DLIM,
++	CTL_DEBUG_QUOTA,
++	CTL_DEBUG_CVIRT,
++	CTL_DEBUG_SPACE,
++	CTL_DEBUG_PERM,
++	CTL_DEBUG_MISC,
++};
++
++
++unsigned int vs_debug_switch	= 0;
++unsigned int vs_debug_xid	= 0;
++unsigned int vs_debug_nid	= 0;
++unsigned int vs_debug_tag	= 0;
++unsigned int vs_debug_net	= 0;
++unsigned int vs_debug_limit	= 0;
++unsigned int vs_debug_cres	= 0;
++unsigned int vs_debug_dlim	= 0;
++unsigned int vs_debug_quota	= 0;
++unsigned int vs_debug_cvirt	= 0;
++unsigned int vs_debug_space	= 0;
++unsigned int vs_debug_perm	= 0;
++unsigned int vs_debug_misc	= 0;
++
++
++static struct ctl_table_header *vserver_table_header;
++static ctl_table vserver_root_table[];
++
++
++void vserver_register_sysctl(void)
++{
++	if (!vserver_table_header) {
++		vserver_table_header = register_sysctl_table(vserver_root_table);
++	}
++
++}
++
++void vserver_unregister_sysctl(void)
++{
++	if (vserver_table_header) {
++		unregister_sysctl_table(vserver_table_header);
++		vserver_table_header = NULL;
++	}
++}
++
++
++static int proc_dodebug(ctl_table *table, int write,
++	void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++	char		tmpbuf[20], *p, c;
++	unsigned int	value;
++	size_t		left, len;
++
++	if ((*ppos && !write) || !*lenp) {
++		*lenp = 0;
++		return 0;
++	}
++
++	left = *lenp;
++
++	if (write) {
++		if (!access_ok(VERIFY_READ, buffer, left))
++			return -EFAULT;
++		p = (char *)buffer;
++		while (left && __get_user(c, p) >= 0 && isspace(c))
++			left--, p++;
++		if (!left)
++			goto done;
++
++		if (left > sizeof(tmpbuf) - 1)
++			return -EINVAL;
++		if (copy_from_user(tmpbuf, p, left))
++			return -EFAULT;
++		tmpbuf[left] = '\0';
++
++		for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
++			value = 10 * value + (*p - '0');
++		if (*p && !isspace(*p))
++			return -EINVAL;
++		while (left && isspace(*p))
++			left--, p++;
++		*(unsigned int *)table->data = value;
++	} else {
++		if (!access_ok(VERIFY_WRITE, buffer, left))
++			return -EFAULT;
++		len = sprintf(tmpbuf, "%d", *(unsigned int *)table->data);
++		if (len > left)
++			len = left;
++		if (__copy_to_user(buffer, tmpbuf, len))
++			return -EFAULT;
++		if ((left -= len) > 0) {
++			if (put_user('\n', (char *)buffer + len))
++				return -EFAULT;
++			left--;
++		}
++	}
++
++done:
++	*lenp -= left;
++	*ppos += *lenp;
++	return 0;
++}
++
++static int zero;
++
++#define	CTL_ENTRY(ctl, name)				\
++	{						\
++		.procname	= #name,		\
++		.data		= &vs_ ## name,		\
++		.maxlen		= sizeof(int),		\
++		.mode		= 0644,			\
++		.proc_handler	= &proc_dodebug,	\
++		.extra1		= &zero,		\
++		.extra2		= &zero,		\
++	}
++
++static ctl_table vserver_debug_table[] = {
++	CTL_ENTRY(CTL_DEBUG_SWITCH,	debug_switch),
++	CTL_ENTRY(CTL_DEBUG_XID,	debug_xid),
++	CTL_ENTRY(CTL_DEBUG_NID,	debug_nid),
++	CTL_ENTRY(CTL_DEBUG_TAG,	debug_tag),
++	CTL_ENTRY(CTL_DEBUG_NET,	debug_net),
++	CTL_ENTRY(CTL_DEBUG_LIMIT,	debug_limit),
++	CTL_ENTRY(CTL_DEBUG_CRES,	debug_cres),
++	CTL_ENTRY(CTL_DEBUG_DLIM,	debug_dlim),
++	CTL_ENTRY(CTL_DEBUG_QUOTA,	debug_quota),
++	CTL_ENTRY(CTL_DEBUG_CVIRT,	debug_cvirt),
++	CTL_ENTRY(CTL_DEBUG_SPACE,	debug_space),
++	CTL_ENTRY(CTL_DEBUG_PERM,	debug_perm),
++	CTL_ENTRY(CTL_DEBUG_MISC,	debug_misc),
++	{ 0 }
++};
++
++static ctl_table vserver_root_table[] = {
++	{
++		.procname	= "vserver",
++		.mode		= 0555,
++		.child		= vserver_debug_table
++	},
++	{ 0 }
++};
++
++
++static match_table_t tokens = {
++	{ CTL_DEBUG_SWITCH,	"switch=%x"	},
++	{ CTL_DEBUG_XID,	"xid=%x"	},
++	{ CTL_DEBUG_NID,	"nid=%x"	},
++	{ CTL_DEBUG_TAG,	"tag=%x"	},
++	{ CTL_DEBUG_NET,	"net=%x"	},
++	{ CTL_DEBUG_LIMIT,	"limit=%x"	},
++	{ CTL_DEBUG_CRES,	"cres=%x"	},
++	{ CTL_DEBUG_DLIM,	"dlim=%x"	},
++	{ CTL_DEBUG_QUOTA,	"quota=%x"	},
++	{ CTL_DEBUG_CVIRT,	"cvirt=%x"	},
++	{ CTL_DEBUG_SPACE,	"space=%x"	},
++	{ CTL_DEBUG_PERM,	"perm=%x"	},
++	{ CTL_DEBUG_MISC,	"misc=%x"	},
++	{ CTL_DEBUG_ERROR,	NULL		}
++};
++
++#define	HANDLE_CASE(id, name, val)				\
++	case CTL_DEBUG_ ## id:					\
++		vs_debug_ ## name = val;			\
++		printk("vs_debug_" #name "=0x%x\n", val);	\
++		break
++
++
++static int __init vs_debug_setup(char *str)
++{
++	char *p;
++	int token;
++
++	printk("vs_debug_setup(%s)\n", str);
++	while ((p = strsep(&str, ",")) != NULL) {
++		substring_t args[MAX_OPT_ARGS];
++		unsigned int value;
++
++		if (!*p)
++			continue;
++
++		token = match_token(p, tokens, args);
++		value = (token > 0) ? simple_strtoul(args[0].from, NULL, 0) : 0;
++
++		switch (token) {
++		HANDLE_CASE(SWITCH, switch, value);
++		HANDLE_CASE(XID,    xid,    value);
++		HANDLE_CASE(NID,    nid,    value);
++		HANDLE_CASE(TAG,    tag,    value);
++		HANDLE_CASE(NET,    net,    value);
++		HANDLE_CASE(LIMIT,  limit,  value);
++		HANDLE_CASE(CRES,   cres,   value);
++		HANDLE_CASE(DLIM,   dlim,   value);
++		HANDLE_CASE(QUOTA,  quota,  value);
++		HANDLE_CASE(CVIRT,  cvirt,  value);
++		HANDLE_CASE(SPACE,  space,  value);
++		HANDLE_CASE(PERM,   perm,   value);
++		HANDLE_CASE(MISC,   misc,   value);
++		default:
++			return -EINVAL;
++			break;
++		}
++	}
++	return 1;
++}
++
++__setup("vsdebug=", vs_debug_setup);
++
++
++
++EXPORT_SYMBOL_GPL(vs_debug_switch);
++EXPORT_SYMBOL_GPL(vs_debug_xid);
++EXPORT_SYMBOL_GPL(vs_debug_nid);
++EXPORT_SYMBOL_GPL(vs_debug_net);
++EXPORT_SYMBOL_GPL(vs_debug_limit);
++EXPORT_SYMBOL_GPL(vs_debug_cres);
++EXPORT_SYMBOL_GPL(vs_debug_dlim);
++EXPORT_SYMBOL_GPL(vs_debug_quota);
++EXPORT_SYMBOL_GPL(vs_debug_cvirt);
++EXPORT_SYMBOL_GPL(vs_debug_space);
++EXPORT_SYMBOL_GPL(vs_debug_perm);
++EXPORT_SYMBOL_GPL(vs_debug_misc);
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/tag.c linux-3.2.34-vs2.3.2.15/kernel/vserver/tag.c
+--- linux-3.2.34/kernel/vserver/tag.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/tag.c	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,63 @@
++/*
++ *  linux/kernel/vserver/tag.c
++ *
++ *  Virtual Server: Shallow Tag Space
++ *
++ *  Copyright (C) 2007  Herbert P�tzl
++ *
++ *  V0.01  basic implementation
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/vserver/debug.h>
++#include <linux/vs_pid.h>
++#include <linux/vs_tag.h>
++
++#include <linux/vserver/tag_cmd.h>
++
++
++int dx_migrate_task(struct task_struct *p, tag_t tag)
++{
++	if (!p)
++		BUG();
++
++	vxdprintk(VXD_CBIT(tag, 5),
++		"dx_migrate_task(%p[#%d],#%d)", p, p->tag, tag);
++
++	task_lock(p);
++	p->tag = tag;
++	task_unlock(p);
++
++	vxdprintk(VXD_CBIT(tag, 5),
++		"moved task %p into [#%d]", p, tag);
++	return 0;
++}
++
++/* vserver syscall commands below here */
++
++/* taks xid and vx_info functions */
++
++
++int vc_task_tag(uint32_t id)
++{
++	tag_t tag;
++
++	if (id) {
++		struct task_struct *tsk;
++		rcu_read_lock();
++		tsk = find_task_by_real_pid(id);
++		tag = (tsk) ? tsk->tag : -ESRCH;
++		rcu_read_unlock();
++	} else
++		tag = dx_current_tag();
++	return tag;
++}
++
++
++int vc_tag_migrate(uint32_t tag)
++{
++	return dx_migrate_task(current, tag & 0xFFFF);
++}
++
++
+diff -NurpP --minimal linux-3.2.34/kernel/vserver/vci_config.h linux-3.2.34-vs2.3.2.15/kernel/vserver/vci_config.h
+--- linux-3.2.34/kernel/vserver/vci_config.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/kernel/vserver/vci_config.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,76 @@
++
++/*  interface version */
++
++#define VCI_VERSION		0x00020308
++
++
++enum {
++	VCI_KCBIT_NO_DYNAMIC = 0,
++
++	VCI_KCBIT_PROC_SECURE = 4,
++	/* VCI_KCBIT_HARDCPU = 5, */
++	/* VCI_KCBIT_IDLELIMIT = 6, */
++	/* VCI_KCBIT_IDLETIME = 7, */
++
++	VCI_KCBIT_COWBL = 8,
++	VCI_KCBIT_FULLCOWBL = 9,
++	VCI_KCBIT_SPACES = 10,
++	VCI_KCBIT_NETV2 = 11,
++	VCI_KCBIT_MEMCG = 12,
++
++	VCI_KCBIT_DEBUG = 16,
++	VCI_KCBIT_HISTORY = 20,
++	VCI_KCBIT_TAGGED = 24,
++	VCI_KCBIT_PPTAG = 28,
++
++	VCI_KCBIT_MORE = 31,
++};
++
++
++static inline uint32_t vci_kernel_config(void)
++{
++	return
++	(1 << VCI_KCBIT_NO_DYNAMIC) |
++
++	/* configured features */
++#ifdef	CONFIG_VSERVER_PROC_SECURE
++	(1 << VCI_KCBIT_PROC_SECURE) |
++#endif
++#ifdef	CONFIG_VSERVER_COWBL
++	(1 << VCI_KCBIT_COWBL) |
++	(1 << VCI_KCBIT_FULLCOWBL) |
++#endif
++	(1 << VCI_KCBIT_SPACES) |
++	(1 << VCI_KCBIT_NETV2) |
++#ifdef	CONFIG_CGROUP_MEM_RES_CTLR
++	(1 << VCI_KCBIT_MEMCG) |
++#endif
++
++	/* debug options */
++#ifdef	CONFIG_VSERVER_DEBUG
++	(1 << VCI_KCBIT_DEBUG) |
++#endif
++#ifdef	CONFIG_VSERVER_HISTORY
++	(1 << VCI_KCBIT_HISTORY) |
++#endif
++
++	/* inode context tagging */
++#if	defined(CONFIG_TAGGING_NONE)
++	(0 << VCI_KCBIT_TAGGED) |
++#elif	defined(CONFIG_TAGGING_UID16)
++	(1 << VCI_KCBIT_TAGGED) |
++#elif	defined(CONFIG_TAGGING_GID16)
++	(2 << VCI_KCBIT_TAGGED) |
++#elif	defined(CONFIG_TAGGING_ID24)
++	(3 << VCI_KCBIT_TAGGED) |
++#elif	defined(CONFIG_TAGGING_INTERN)
++	(4 << VCI_KCBIT_TAGGED) |
++#elif	defined(CONFIG_TAGGING_RUNTIME)
++	(5 << VCI_KCBIT_TAGGED) |
++#else
++	(7 << VCI_KCBIT_TAGGED) |
++#endif
++	(1 << VCI_KCBIT_PPTAG) |
++	0;
++}
++
+diff -NurpP --minimal linux-3.2.34/mm/memcontrol.c linux-3.2.34-vs2.3.2.15/mm/memcontrol.c
+--- linux-3.2.34/mm/memcontrol.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/mm/memcontrol.c	2012-06-14 20:45:24.000000000 +0200
+@@ -766,6 +766,31 @@ struct mem_cgroup *mem_cgroup_from_task(
+ 				struct mem_cgroup, css);
+ }
+ 
++u64 mem_cgroup_res_read_u64(struct mem_cgroup *mem, int member)
++{
++	return res_counter_read_u64(&mem->res, member);
++}
++
++u64 mem_cgroup_memsw_read_u64(struct mem_cgroup *mem, int member)
++{
++	return res_counter_read_u64(&mem->memsw, member);
++}
++
++s64 mem_cgroup_stat_read_cache(struct mem_cgroup *mem)
++{
++	return mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
++}
++
++s64 mem_cgroup_stat_read_anon(struct mem_cgroup *mem)
++{
++	return mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
++}
++
++s64 mem_cgroup_stat_read_mapped(struct mem_cgroup *mem)
++{
++	return mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_MAPPED);
++}
++
+ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
+ {
+ 	struct mem_cgroup *memcg = NULL;
+diff -NurpP --minimal linux-3.2.34/mm/oom_kill.c linux-3.2.34-vs2.3.2.15/mm/oom_kill.c
+--- linux-3.2.34/mm/oom_kill.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/mm/oom_kill.c	2012-01-09 16:19:31.000000000 +0100
+@@ -33,6 +33,8 @@
+ #include <linux/security.h>
+ #include <linux/ptrace.h>
+ #include <linux/freezer.h>
++#include <linux/reboot.h>
++#include <linux/vs_context.h>
+ 
+ int sysctl_panic_on_oom;
+ int sysctl_oom_kill_allocating_task;
+@@ -148,11 +150,18 @@ struct task_struct *find_lock_task_mm(st
+ static bool oom_unkillable_task(struct task_struct *p,
+ 		const struct mem_cgroup *mem, const nodemask_t *nodemask)
+ {
+-	if (is_global_init(p))
++	unsigned xid = vx_current_xid();
++
++	/* skip the init task, global and per guest */
++	if (task_is_init(p))
+ 		return true;
+ 	if (p->flags & PF_KTHREAD)
+ 		return true;
+ 
++	/* skip other guest and host processes if oom in guest */
++	if (xid && vx_task_xid(p) != xid)
++		return true;
++
+ 	/* When mem_cgroup_out_of_memory() and p is not member of the group */
+ 	if (mem && !task_in_mem_cgroup(p, mem))
+ 		return true;
+@@ -440,8 +449,8 @@ static int oom_kill_task(struct task_str
+ 	/* mm cannot be safely dereferenced after task_unlock(p) */
+ 	mm = p->mm;
+ 
+-	pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
+-		task_pid_nr(p), p->comm, K(p->mm->total_vm),
++	pr_err("Killed process %d:#%u (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
++		task_pid_nr(p), p->xid, p->comm, K(p->mm->total_vm),
+ 		K(get_mm_counter(p->mm, MM_ANONPAGES)),
+ 		K(get_mm_counter(p->mm, MM_FILEPAGES)));
+ 	task_unlock(p);
+@@ -499,8 +508,8 @@ static int oom_kill_process(struct task_
+ 	}
+ 
+ 	task_lock(p);
+-	pr_err("%s: Kill process %d (%s) score %d or sacrifice child\n",
+-		message, task_pid_nr(p), p->comm, points);
++	pr_err("%s: Kill process %d:#%u (%s) score %d or sacrifice child\n",
++		message, task_pid_nr(p), p->xid, p->comm, points);
+ 	task_unlock(p);
+ 
+ 	/*
+@@ -601,6 +610,8 @@ int unregister_oom_notifier(struct notif
+ }
+ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
+ 
++long vs_oom_action(unsigned int);
++
+ /*
+  * Try to acquire the OOM killer lock for the zones in zonelist.  Returns zero
+  * if a parallel OOM killing is already taking place that includes a zone in
+@@ -759,7 +770,12 @@ retry:
+ 	if (!p) {
+ 		dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
+ 		read_unlock(&tasklist_lock);
+-		panic("Out of memory and no killable processes...\n");
++
++		/* avoid panic for guest OOM */
++		if (current->xid)
++			vs_oom_action(LINUX_REBOOT_CMD_OOM);
++		else
++			panic("Out of memory and no killable processes...\n");
+ 	}
+ 
+ 	if (oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
+diff -NurpP --minimal linux-3.2.34/mm/page_alloc.c linux-3.2.34-vs2.3.2.15/mm/page_alloc.c
+--- linux-3.2.34/mm/page_alloc.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/mm/page_alloc.c	2012-10-22 12:59:53.000000000 +0200
+@@ -57,6 +57,8 @@
+ #include <linux/ftrace_event.h>
+ #include <linux/memcontrol.h>
+ #include <linux/prefetch.h>
++#include <linux/vs_base.h>
++#include <linux/vs_limit.h>
+ 
+ #include <asm/tlbflush.h>
+ #include <asm/div64.h>
+@@ -2527,6 +2529,9 @@ void si_meminfo(struct sysinfo *val)
+ 	val->totalhigh = totalhigh_pages;
+ 	val->freehigh = nr_free_highpages();
+ 	val->mem_unit = PAGE_SIZE;
++
++	if (vx_flags(VXF_VIRT_MEM, 0))
++		vx_vsi_meminfo(val);
+ }
+ 
+ EXPORT_SYMBOL(si_meminfo);
+@@ -2547,6 +2552,9 @@ void si_meminfo_node(struct sysinfo *val
+ 	val->freehigh = 0;
+ #endif
+ 	val->mem_unit = PAGE_SIZE;
++
++	if (vx_flags(VXF_VIRT_MEM, 0))
++		vx_vsi_meminfo(val);
+ }
+ #endif
+ 
+diff -NurpP --minimal linux-3.2.34/mm/pgtable-generic.c linux-3.2.34-vs2.3.2.15/mm/pgtable-generic.c
+--- linux-3.2.34/mm/pgtable-generic.c	2011-03-15 18:07:42.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/mm/pgtable-generic.c	2011-12-05 19:33:02.000000000 +0100
+@@ -6,6 +6,8 @@
+  *  Copyright (C) 2010  Linus Torvalds
+  */
+ 
++#include <linux/mm.h>
++
+ #include <linux/pagemap.h>
+ #include <asm/tlb.h>
+ #include <asm-generic/pgtable.h>
+diff -NurpP --minimal linux-3.2.34/mm/shmem.c linux-3.2.34-vs2.3.2.15/mm/shmem.c
+--- linux-3.2.34/mm/shmem.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/mm/shmem.c	2012-11-06 18:08:24.000000000 +0100
+@@ -1461,7 +1461,7 @@ static int shmem_statfs(struct dentry *d
+ {
+ 	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
+ 
+-	buf->f_type = TMPFS_MAGIC;
++	buf->f_type = TMPFS_SUPER_MAGIC;
+ 	buf->f_bsize = PAGE_CACHE_SIZE;
+ 	buf->f_namelen = NAME_MAX;
+ 	if (sbinfo->max_blocks) {
+@@ -2220,7 +2220,7 @@ int shmem_fill_super(struct super_block 
+ 	sb->s_maxbytes = MAX_LFS_FILESIZE;
+ 	sb->s_blocksize = PAGE_CACHE_SIZE;
+ 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+-	sb->s_magic = TMPFS_MAGIC;
++	sb->s_magic = TMPFS_SUPER_MAGIC;
+ 	sb->s_op = &shmem_ops;
+ 	sb->s_time_gran = 1;
+ #ifdef CONFIG_TMPFS_XATTR
+diff -NurpP --minimal linux-3.2.34/mm/slab.c linux-3.2.34-vs2.3.2.15/mm/slab.c
+--- linux-3.2.34/mm/slab.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/mm/slab.c	2012-10-22 12:59:53.000000000 +0200
+@@ -411,6 +411,8 @@ static void kmem_list3_init(struct kmem_
+ #define STATS_INC_FREEMISS(x)	do { } while (0)
+ #endif
+ 
++#include "slab_vs.h"
++
+ #if DEBUG
+ 
+ /*
+@@ -3400,6 +3402,7 @@ retry:
+ 
+ 	obj = slab_get_obj(cachep, slabp, nodeid);
+ 	check_slabp(cachep, slabp);
++	vx_slab_alloc(cachep, flags);
+ 	l3->free_objects--;
+ 	/* move slabp to correct slabp list: */
+ 	list_del(&slabp->list);
+@@ -3477,6 +3480,7 @@ __cache_alloc_node(struct kmem_cache *ca
+ 	/* ___cache_alloc_node can fall back to other nodes */
+ 	ptr = ____cache_alloc_node(cachep, flags, nodeid);
+   out:
++	vx_slab_alloc(cachep, flags);
+ 	local_irq_restore(save_flags);
+ 	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
+ 	kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
+@@ -3664,6 +3668,7 @@ static inline void __cache_free(struct k
+ 	check_irq_off();
+ 	kmemleak_free_recursive(objp, cachep->flags);
+ 	objp = cache_free_debugcheck(cachep, objp, caller);
++	vx_slab_free(cachep);
+ 
+ 	kmemcheck_slab_free(cachep, objp, obj_size(cachep));
+ 
+diff -NurpP --minimal linux-3.2.34/mm/slab_vs.h linux-3.2.34-vs2.3.2.15/mm/slab_vs.h
+--- linux-3.2.34/mm/slab_vs.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/mm/slab_vs.h	2011-12-05 19:33:02.000000000 +0100
+@@ -0,0 +1,29 @@
++
++#include <linux/vserver/context.h>
++
++#include <linux/vs_context.h>
++
++static inline
++void vx_slab_alloc(struct kmem_cache *cachep, gfp_t flags)
++{
++	int what = gfp_zone(cachep->gfpflags);
++	struct vx_info *vxi = current_vx_info();
++
++	if (!vxi)
++		return;
++
++	atomic_add(cachep->buffer_size, &vxi->cacct.slab[what]);
++}
++
++static inline
++void vx_slab_free(struct kmem_cache *cachep)
++{
++	int what = gfp_zone(cachep->gfpflags);
++	struct vx_info *vxi = current_vx_info();
++
++	if (!vxi)
++		return;
++
++	atomic_sub(cachep->buffer_size, &vxi->cacct.slab[what]);
++}
++
+diff -NurpP --minimal linux-3.2.34/mm/swapfile.c linux-3.2.34-vs2.3.2.15/mm/swapfile.c
+--- linux-3.2.34/mm/swapfile.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/mm/swapfile.c	2012-06-22 19:03:16.000000000 +0200
+@@ -36,6 +36,7 @@
+ #include <asm/tlbflush.h>
+ #include <linux/swapops.h>
+ #include <linux/page_cgroup.h>
++#include <linux/vs_base.h>
+ 
+ static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
+ 				 unsigned char);
+@@ -1751,6 +1752,16 @@ static int swap_show(struct seq_file *sw
+ 
+ 	if (si == SEQ_START_TOKEN) {
+ 		seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
++		if (vx_flags(VXF_VIRT_MEM, 0)) {
++			struct sysinfo si;
++
++			vx_vsi_swapinfo(&si);
++			if (si.totalswap < (1 << 10))
++				return 0;
++			seq_printf(swap, "%s\t\t\t\t\t%s\t%lu\t%lu\t%d\n",
++				"hdv0", "partition", si.totalswap >> 10,
++				(si.totalswap - si.freeswap) >> 10, -1);
++		}
+ 		return 0;
+ 	}
+ 
+@@ -2170,6 +2181,8 @@ void si_swapinfo(struct sysinfo *val)
+ 	val->freeswap = nr_swap_pages + nr_to_be_unused;
+ 	val->totalswap = total_swap_pages + nr_to_be_unused;
+ 	spin_unlock(&swap_lock);
++	if (vx_flags(VXF_VIRT_MEM, 0))
++		vx_vsi_swapinfo(val);
+ }
+ 
+ /*
+diff -NurpP --minimal linux-3.2.34/net/bridge/br_multicast.c linux-3.2.34-vs2.3.2.15/net/bridge/br_multicast.c
+--- linux-3.2.34/net/bridge/br_multicast.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/bridge/br_multicast.c	2012-05-15 18:16:52.000000000 +0200
+@@ -445,7 +445,7 @@ static struct sk_buff *br_ip6_multicast_
+ 	ip6h->hop_limit = 1;
+ 	ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
+ 	if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
+-			       &ip6h->saddr)) {
++			       &ip6h->saddr, NULL)) {
+ 		kfree_skb(skb);
+ 		return NULL;
+ 	}
+diff -NurpP --minimal linux-3.2.34/net/core/dev.c linux-3.2.34-vs2.3.2.15/net/core/dev.c
+--- linux-3.2.34/net/core/dev.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/core/dev.c	2012-11-18 21:11:16.000000000 +0100
+@@ -127,6 +127,7 @@
+ #include <linux/in.h>
+ #include <linux/jhash.h>
+ #include <linux/random.h>
++#include <linux/vs_inet.h>
+ #include <trace/events/napi.h>
+ #include <trace/events/net.h>
+ #include <trace/events/skb.h>
+@@ -623,7 +624,8 @@ struct net_device *__dev_get_by_name(str
+ 	struct hlist_head *head = dev_name_hash(net, name);
+ 
+ 	hlist_for_each_entry(dev, p, head, name_hlist)
+-		if (!strncmp(dev->name, name, IFNAMSIZ))
++		if (!strncmp(dev->name, name, IFNAMSIZ) &&
++		    nx_dev_visible(current_nx_info(), dev))
+ 			return dev;
+ 
+ 	return NULL;
+@@ -649,7 +651,8 @@ struct net_device *dev_get_by_name_rcu(s
+ 	struct hlist_head *head = dev_name_hash(net, name);
+ 
+ 	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
+-		if (!strncmp(dev->name, name, IFNAMSIZ))
++		if (!strncmp(dev->name, name, IFNAMSIZ) &&
++		    nx_dev_visible(current_nx_info(), dev))
+ 			return dev;
+ 
+ 	return NULL;
+@@ -700,7 +703,8 @@ struct net_device *__dev_get_by_index(st
+ 	struct hlist_head *head = dev_index_hash(net, ifindex);
+ 
+ 	hlist_for_each_entry(dev, p, head, index_hlist)
+-		if (dev->ifindex == ifindex)
++		if ((dev->ifindex == ifindex) &&
++		    nx_dev_visible(current_nx_info(), dev))
+ 			return dev;
+ 
+ 	return NULL;
+@@ -718,7 +722,7 @@ EXPORT_SYMBOL(__dev_get_by_index);
+  *	about locking. The caller must hold RCU lock.
+  */
+ 
+-struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
++struct net_device *dev_get_by_index_real_rcu(struct net *net, int ifindex)
+ {
+ 	struct hlist_node *p;
+ 	struct net_device *dev;
+@@ -730,6 +734,16 @@ struct net_device *dev_get_by_index_rcu(
+ 
+ 	return NULL;
+ }
++EXPORT_SYMBOL(dev_get_by_index_real_rcu);
++
++struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
++{
++	struct net_device *dev = dev_get_by_index_real_rcu(net, ifindex);
++
++	if (nx_dev_visible(current_nx_info(), dev))
++		return dev;
++	return NULL;
++}
+ EXPORT_SYMBOL(dev_get_by_index_rcu);
+ 
+ 
+@@ -778,7 +792,8 @@ struct net_device *dev_getbyhwaddr_rcu(s
+ 
+ 	for_each_netdev_rcu(net, dev)
+ 		if (dev->type == type &&
+-		    !memcmp(dev->dev_addr, ha, dev->addr_len))
++		    !memcmp(dev->dev_addr, ha, dev->addr_len) &&
++		    nx_dev_visible(current_nx_info(), dev))
+ 			return dev;
+ 
+ 	return NULL;
+@@ -790,9 +805,11 @@ struct net_device *__dev_getfirstbyhwtyp
+ 	struct net_device *dev;
+ 
+ 	ASSERT_RTNL();
+-	for_each_netdev(net, dev)
+-		if (dev->type == type)
++	for_each_netdev(net, dev) {
++		if ((dev->type == type) &&
++		    nx_dev_visible(current_nx_info(), dev))
+ 			return dev;
++	}
+ 
+ 	return NULL;
+ }
+@@ -910,6 +927,8 @@ static int __dev_alloc_name(struct net *
+ 				continue;
+ 			if (i < 0 || i >= max_netdevices)
+ 				continue;
++			if (!nx_dev_visible(current_nx_info(), d))
++				continue;
+ 
+ 			/*  avoid cases where sscanf is not exact inverse of printf */
+ 			snprintf(buf, IFNAMSIZ, name, i);
+@@ -4071,6 +4090,8 @@ static int dev_ifconf(struct net *net, c
+ 
+ 	total = 0;
+ 	for_each_netdev(net, dev) {
++		if (!nx_dev_visible(current_nx_info(), dev))
++			continue;
+ 		for (i = 0; i < NPROTO; i++) {
+ 			if (gifconf_list[i]) {
+ 				int done;
+@@ -4173,6 +4194,10 @@ static void dev_seq_printf_stats(struct 
+ 	struct rtnl_link_stats64 temp;
+ 	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+ 
++	/* device visible inside network context? */
++	if (!nx_dev_visible(current_nx_info(), dev))
++		return;
++
+ 	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+ 		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
+ 		   dev->name, stats->rx_bytes, stats->rx_packets,
+diff -NurpP --minimal linux-3.2.34/net/core/rtnetlink.c linux-3.2.34-vs2.3.2.15/net/core/rtnetlink.c
+--- linux-3.2.34/net/core/rtnetlink.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/core/rtnetlink.c	2012-11-18 21:11:16.000000000 +0100
+@@ -1076,6 +1076,8 @@ static int rtnl_dump_ifinfo(struct sk_bu
+ 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+ 			if (idx < s_idx)
+ 				goto cont;
++			if (!nx_dev_visible(skb->sk->sk_nx_info, dev))
++				continue;
+ 			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+ 					     NETLINK_CB(cb->skb).pid,
+ 					     cb->nlh->nlmsg_seq, 0,
+@@ -1958,6 +1960,9 @@ void rtmsg_ifinfo(int type, struct net_d
+ 	int err = -ENOBUFS;
+ 	size_t if_info_size;
+ 
++	if (!nx_dev_visible(current_nx_info(), dev))
++		return;
++
+ 	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL);
+ 	if (skb == NULL)
+ 		goto errout;
+diff -NurpP --minimal linux-3.2.34/net/core/sock.c linux-3.2.34-vs2.3.2.15/net/core/sock.c
+--- linux-3.2.34/net/core/sock.c	2012-11-18 18:42:24.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/core/sock.c	2012-10-22 12:59:53.000000000 +0200
+@@ -127,6 +127,10 @@
+ #include <net/cls_cgroup.h>
+ 
+ #include <linux/filter.h>
++#include <linux/vs_socket.h>
++#include <linux/vs_limit.h>
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
+ 
+ #include <trace/events/sock.h>
+ 
+@@ -1067,6 +1071,8 @@ static struct sock *sk_prot_alloc(struct
+ 			goto out_free_sec;
+ 		sk_tx_queue_clear(sk);
+ 	}
++		sock_vx_init(sk);
++		sock_nx_init(sk);
+ 
+ 	return sk;
+ 
+@@ -1166,6 +1172,11 @@ static void __sk_free(struct sock *sk)
+ 		put_cred(sk->sk_peer_cred);
+ 	put_pid(sk->sk_peer_pid);
+ 	put_net(sock_net(sk));
++	vx_sock_dec(sk);
++	clr_vx_info(&sk->sk_vx_info);
++	sk->sk_xid = -1;
++	clr_nx_info(&sk->sk_nx_info);
++	sk->sk_nid = -1;
+ 	sk_prot_free(sk->sk_prot_creator, sk);
+ }
+ 
+@@ -1213,6 +1224,8 @@ struct sock *sk_clone(const struct sock 
+ 
+ 		/* SANITY */
+ 		get_net(sock_net(newsk));
++		sock_vx_init(newsk);
++		sock_nx_init(newsk);
+ 		sk_node_init(&newsk->sk_node);
+ 		sock_lock_init(newsk);
+ 		bh_lock_sock(newsk);
+@@ -1269,6 +1282,12 @@ struct sock *sk_clone(const struct sock 
+ 		smp_wmb();
+ 		atomic_set(&newsk->sk_refcnt, 2);
+ 
++		set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
++		newsk->sk_xid = sk->sk_xid;
++		vx_sock_inc(newsk);
++		set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
++		newsk->sk_nid = sk->sk_nid;
++
+ 		/*
+ 		 * Increment the counter in the same struct proto as the master
+ 		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
+@@ -2019,6 +2038,12 @@ void sock_init_data(struct socket *sock,
+ 
+ 	sk->sk_stamp = ktime_set(-1L, 0);
+ 
++	set_vx_info(&sk->sk_vx_info, current_vx_info());
++	sk->sk_xid = vx_current_xid();
++	vx_sock_inc(sk);
++	set_nx_info(&sk->sk_nx_info, current_nx_info());
++	sk->sk_nid = nx_current_nid();
++
+ 	/*
+ 	 * Before updating sk_refcnt, we must commit prior changes to memory
+ 	 * (Documentation/RCU/rculist_nulls.txt for details)
+diff -NurpP --minimal linux-3.2.34/net/ipv4/af_inet.c linux-3.2.34-vs2.3.2.15/net/ipv4/af_inet.c
+--- linux-3.2.34/net/ipv4/af_inet.c	2012-01-09 16:15:03.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/af_inet.c	2012-02-15 03:03:53.000000000 +0100
+@@ -117,6 +117,7 @@
+ #ifdef CONFIG_IP_MROUTE
+ #include <linux/mroute.h>
+ #endif
++#include <linux/vs_limit.h>
+ 
+ 
+ /* The inetsw table contains everything that inet_create needs to
+@@ -326,9 +327,13 @@ lookup_protocol:
+ 	}
+ 
+ 	err = -EPERM;
++	if ((protocol == IPPROTO_ICMP) &&
++		nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
++		goto override;
++
+ 	if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
+ 		goto out_rcu_unlock;
+-
++override:
+ 	err = -EAFNOSUPPORT;
+ 	if (!inet_netns_ok(net, protocol))
+ 		goto out_rcu_unlock;
+@@ -452,6 +457,7 @@ int inet_bind(struct socket *sock, struc
+ 	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
+ 	struct sock *sk = sock->sk;
+ 	struct inet_sock *inet = inet_sk(sk);
++	struct nx_v4_sock_addr nsa;
+ 	unsigned short snum;
+ 	int chk_addr_ret;
+ 	int err;
+@@ -475,7 +481,11 @@ int inet_bind(struct socket *sock, struc
+ 			goto out;
+ 	}
+ 
+-	chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
++	err = v4_map_sock_addr(inet, addr, &nsa);
++	if (err)
++		goto out;
++
++	chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
+ 
+ 	/* Not specified by any standard per-se, however it breaks too
+ 	 * many applications when removed.  It is unfortunate since
+@@ -487,7 +497,7 @@ int inet_bind(struct socket *sock, struc
+ 	err = -EADDRNOTAVAIL;
+ 	if (!sysctl_ip_nonlocal_bind &&
+ 	    !(inet->freebind || inet->transparent) &&
+-	    addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
++	    nsa.saddr != htonl(INADDR_ANY) &&
+ 	    chk_addr_ret != RTN_LOCAL &&
+ 	    chk_addr_ret != RTN_MULTICAST &&
+ 	    chk_addr_ret != RTN_BROADCAST)
+@@ -512,7 +522,7 @@ int inet_bind(struct socket *sock, struc
+ 	if (sk->sk_state != TCP_CLOSE || inet->inet_num)
+ 		goto out_release_sock;
+ 
+-	inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
++	v4_set_sock_addr(inet, &nsa);
+ 	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
+ 		inet->inet_saddr = 0;  /* Use device */
+ 
+@@ -715,11 +725,13 @@ int inet_getname(struct socket *sock, st
+ 		     peer == 1))
+ 			return -ENOTCONN;
+ 		sin->sin_port = inet->inet_dport;
+-		sin->sin_addr.s_addr = inet->inet_daddr;
++		sin->sin_addr.s_addr =
++			nx_map_sock_lback(sk->sk_nx_info, inet->inet_daddr);
+ 	} else {
+ 		__be32 addr = inet->inet_rcv_saddr;
+ 		if (!addr)
+ 			addr = inet->inet_saddr;
++		addr = nx_map_sock_lback(sk->sk_nx_info, addr);
+ 		sin->sin_port = inet->inet_sport;
+ 		sin->sin_addr.s_addr = addr;
+ 	}
+diff -NurpP --minimal linux-3.2.34/net/ipv4/arp.c linux-3.2.34-vs2.3.2.15/net/ipv4/arp.c
+--- linux-3.2.34/net/ipv4/arp.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/arp.c	2012-03-01 21:39:38.000000000 +0100
+@@ -1333,6 +1333,7 @@ static void arp_format_neigh_entry(struc
+ 	struct net_device *dev = n->dev;
+ 	int hatype = dev->type;
+ 
++	/* FIXME: check for network context */
+ 	read_lock(&n->lock);
+ 	/* Convert hardware address to XX:XX:XX:XX ... form. */
+ #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+@@ -1364,6 +1365,7 @@ static void arp_format_pneigh_entry(stru
+ 	int hatype = dev ? dev->type : 0;
+ 	char tbuf[16];
+ 
++	/* FIXME: check for network context */
+ 	sprintf(tbuf, "%pI4", n->key);
+ 	seq_printf(seq, "%-16s 0x%-10x0x%-10x%s     *        %s\n",
+ 		   tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00",
+diff -NurpP --minimal linux-3.2.34/net/ipv4/devinet.c linux-3.2.34-vs2.3.2.15/net/ipv4/devinet.c
+--- linux-3.2.34/net/ipv4/devinet.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/devinet.c	2012-02-07 03:14:01.000000000 +0100
+@@ -518,6 +518,7 @@ struct in_device *inetdev_by_index(struc
+ }
+ EXPORT_SYMBOL(inetdev_by_index);
+ 
++
+ /* Called only from RTNL semaphored context. No locks. */
+ 
+ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
+@@ -759,6 +760,8 @@ int devinet_ioctl(struct net *net, unsig
+ 
+ 	in_dev = __in_dev_get_rtnl(dev);
+ 	if (in_dev) {
++		struct nx_info *nxi = current_nx_info();
++
+ 		if (tryaddrmatch) {
+ 			/* Matthias Andree */
+ 			/* compare label and address (4.4BSD style) */
+@@ -767,6 +770,8 @@ int devinet_ioctl(struct net *net, unsig
+ 			   This is checked above. */
+ 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+ 			     ifap = &ifa->ifa_next) {
++				if (!nx_v4_ifa_visible(nxi, ifa))
++					continue;
+ 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
+ 				    sin_orig.sin_addr.s_addr ==
+ 							ifa->ifa_local) {
+@@ -779,9 +784,12 @@ int devinet_ioctl(struct net *net, unsig
+ 		   comparing just the label */
+ 		if (!ifa) {
+ 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+-			     ifap = &ifa->ifa_next)
++			     ifap = &ifa->ifa_next) {
++				if (!nx_v4_ifa_visible(nxi, ifa))
++					continue;
+ 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
+ 					break;
++			}
+ 		}
+ 	}
+ 
+@@ -934,6 +942,8 @@ static int inet_gifconf(struct net_devic
+ 		goto out;
+ 
+ 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
++		if (!nx_v4_ifa_visible(current_nx_info(), ifa))
++			continue;
+ 		if (!buf) {
+ 			done += sizeof(ifr);
+ 			continue;
+@@ -1294,6 +1304,7 @@ static int inet_dump_ifaddr(struct sk_bu
+ 	struct net_device *dev;
+ 	struct in_device *in_dev;
+ 	struct in_ifaddr *ifa;
++	struct sock *sk = skb->sk;
+ 	struct hlist_head *head;
+ 	struct hlist_node *node;
+ 
+@@ -1316,6 +1327,8 @@ static int inet_dump_ifaddr(struct sk_bu
+ 
+ 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
+ 			     ifa = ifa->ifa_next, ip_idx++) {
++			if (sk && !nx_v4_ifa_visible(sk->sk_nx_info, ifa))
++				continue;
+ 				if (ip_idx < s_ip_idx)
+ 					continue;
+ 				if (inet_fill_ifaddr(skb, ifa,
+diff -NurpP --minimal linux-3.2.34/net/ipv4/fib_trie.c linux-3.2.34-vs2.3.2.15/net/ipv4/fib_trie.c
+--- linux-3.2.34/net/ipv4/fib_trie.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/fib_trie.c	2012-06-14 20:45:24.000000000 +0200
+@@ -2557,6 +2557,7 @@ static int fib_route_seq_show(struct seq
+ 			    || fa->fa_type == RTN_MULTICAST)
+ 				continue;
+ 
++			/* FIXME: check for network context? */
+ 			if (fi)
+ 				seq_printf(seq,
+ 					 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
+diff -NurpP --minimal linux-3.2.34/net/ipv4/inet_connection_sock.c linux-3.2.34-vs2.3.2.15/net/ipv4/inet_connection_sock.c
+--- linux-3.2.34/net/ipv4/inet_connection_sock.c	2011-07-22 11:18:13.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/inet_connection_sock.c	2012-02-07 03:13:38.000000000 +0100
+@@ -52,6 +52,37 @@ void inet_get_local_port_range(int *low,
+ }
+ EXPORT_SYMBOL(inet_get_local_port_range);
+ 
++int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
++{
++	__be32	sk1_rcv_saddr = sk_rcv_saddr(sk1),
++		sk2_rcv_saddr = sk_rcv_saddr(sk2);
++
++	if (inet_v6_ipv6only(sk2))
++		return 0;
++
++	if (sk1_rcv_saddr &&
++	    sk2_rcv_saddr &&
++	    sk1_rcv_saddr == sk2_rcv_saddr)
++		return 1;
++
++	if (sk1_rcv_saddr &&
++	    !sk2_rcv_saddr &&
++	    v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, NXA_MASK_BIND))
++		return 1;
++
++	if (sk2_rcv_saddr &&
++	    !sk1_rcv_saddr &&
++	    v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, NXA_MASK_BIND))
++		return 1;
++
++	if (!sk1_rcv_saddr &&
++	    !sk2_rcv_saddr &&
++	    nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info))
++		return 1;
++
++	return 0;
++}
++
+ int inet_csk_bind_conflict(const struct sock *sk,
+ 			   const struct inet_bind_bucket *tb)
+ {
+@@ -74,9 +105,7 @@ int inet_csk_bind_conflict(const struct 
+ 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+ 			if (!reuse || !sk2->sk_reuse ||
+ 			    sk2->sk_state == TCP_LISTEN) {
+-				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
+-				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
+-				    sk2_rcv_saddr == sk_rcv_saddr(sk))
++				if (ipv4_rcv_saddr_equal(sk, sk2))
+ 					break;
+ 			}
+ 		}
+diff -NurpP --minimal linux-3.2.34/net/ipv4/inet_diag.c linux-3.2.34-vs2.3.2.15/net/ipv4/inet_diag.c
+--- linux-3.2.34/net/ipv4/inet_diag.c	2012-01-09 16:15:03.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/inet_diag.c	2012-02-15 03:03:53.000000000 +0100
+@@ -33,6 +33,8 @@
+ #include <linux/stddef.h>
+ 
+ #include <linux/inet_diag.h>
++#include <linux/vs_network.h>
++#include <linux/vs_inet.h>
+ 
+ static const struct inet_diag_handler **inet_diag_table;
+ 
+@@ -119,8 +121,10 @@ static int inet_csk_diag_fill(struct soc
+ 
+ 	r->id.idiag_sport = inet->inet_sport;
+ 	r->id.idiag_dport = inet->inet_dport;
+-	r->id.idiag_src[0] = inet->inet_rcv_saddr;
+-	r->id.idiag_dst[0] = inet->inet_daddr;
++	r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info,
++		inet->inet_rcv_saddr);
++	r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info,
++		inet->inet_daddr);
+ 
+ 	/* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
+ 	 * hence this needs to be included regardless of socket family.
+@@ -214,8 +218,8 @@ static int inet_twsk_diag_fill(struct in
+ 	r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
+ 	r->id.idiag_sport     = tw->tw_sport;
+ 	r->id.idiag_dport     = tw->tw_dport;
+-	r->id.idiag_src[0]    = tw->tw_rcv_saddr;
+-	r->id.idiag_dst[0]    = tw->tw_daddr;
++	r->id.idiag_src[0]    = nx_map_sock_lback(tw->tw_nx_info, tw->tw_rcv_saddr);
++	r->id.idiag_dst[0]    = nx_map_sock_lback(tw->tw_nx_info, tw->tw_daddr);
+ 	r->idiag_state	      = tw->tw_substate;
+ 	r->idiag_timer	      = 3;
+ 	r->idiag_expires      = DIV_ROUND_UP(tmo * 1000, HZ);
+@@ -272,6 +276,7 @@ static int inet_diag_get_exact(struct sk
+ 	err = -EINVAL;
+ 
+ 	if (req->idiag_family == AF_INET) {
++		/* TODO: lback */
+ 		sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0],
+ 				 req->id.idiag_dport, req->id.idiag_src[0],
+ 				 req->id.idiag_sport, req->id.idiag_if);
+@@ -514,6 +519,7 @@ static int inet_csk_diag_dump(struct soc
+ 		} else
+ #endif
+ 		{
++			/* TODO: lback */
+ 			entry.saddr = &inet->inet_rcv_saddr;
+ 			entry.daddr = &inet->inet_daddr;
+ 		}
+@@ -552,6 +558,7 @@ static int inet_twsk_diag_dump(struct in
+ 		} else
+ #endif
+ 		{
++			/* TODO: lback */
+ 			entry.saddr = &tw->tw_rcv_saddr;
+ 			entry.daddr = &tw->tw_daddr;
+ 		}
+@@ -598,8 +605,8 @@ static int inet_diag_fill_req(struct sk_
+ 
+ 	r->id.idiag_sport = inet->inet_sport;
+ 	r->id.idiag_dport = ireq->rmt_port;
+-	r->id.idiag_src[0] = ireq->loc_addr;
+-	r->id.idiag_dst[0] = ireq->rmt_addr;
++	r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info, ireq->loc_addr);
++	r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info, ireq->rmt_addr);
+ 	r->idiag_expires = jiffies_to_msecs(tmo);
+ 	r->idiag_rqueue = 0;
+ 	r->idiag_wqueue = 0;
+@@ -670,6 +677,7 @@ static int inet_diag_dump_reqs(struct sk
+ 				continue;
+ 
+ 			if (bc) {
++				/* TODO: lback */
+ 				entry.saddr =
+ #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+ 					(entry.family == AF_INET6) ?
+@@ -740,6 +748,8 @@ static int inet_diag_dump(struct sk_buff
+ 			sk_nulls_for_each(sk, node, &ilb->head) {
+ 				struct inet_sock *inet = inet_sk(sk);
+ 
++				if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
++					continue;
+ 				if (num < s_num) {
+ 					num++;
+ 					continue;
+@@ -806,6 +816,8 @@ skip_listen_ht:
+ 		sk_nulls_for_each(sk, node, &head->chain) {
+ 			struct inet_sock *inet = inet_sk(sk);
+ 
++			if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
++				continue;
+ 			if (num < s_num)
+ 				goto next_normal;
+ 			if (!(r->idiag_states & (1 << sk->sk_state)))
+@@ -830,6 +842,8 @@ next_normal:
+ 			inet_twsk_for_each(tw, node,
+ 				    &head->twchain) {
+ 
++				if (!nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))
++					continue;
+ 				if (num < s_num)
+ 					goto next_dying;
+ 				if (r->id.idiag_sport != tw->tw_sport &&
+diff -NurpP --minimal linux-3.2.34/net/ipv4/inet_hashtables.c linux-3.2.34-vs2.3.2.15/net/ipv4/inet_hashtables.c
+--- linux-3.2.34/net/ipv4/inet_hashtables.c	2011-10-24 18:45:34.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/inet_hashtables.c	2011-12-05 19:33:02.000000000 +0100
+@@ -22,6 +22,7 @@
+ #include <net/inet_connection_sock.h>
+ #include <net/inet_hashtables.h>
+ #include <net/secure_seq.h>
++#include <net/route.h>
+ #include <net/ip.h>
+ 
+ /*
+@@ -156,6 +157,11 @@ static inline int compute_score(struct s
+ 			if (rcv_saddr != daddr)
+ 				return -1;
+ 			score += 2;
++		} else {
++			/* block non nx_info ips */
++			if (!v4_addr_in_nx_info(sk->sk_nx_info,
++				daddr, NXA_MASK_BIND))
++				return -1;
+ 		}
+ 		if (sk->sk_bound_dev_if) {
+ 			if (sk->sk_bound_dev_if != dif)
+@@ -173,7 +179,6 @@ static inline int compute_score(struct s
+  * wildcarded during the search since they can never be otherwise.
+  */
+ 
+-
+ struct sock *__inet_lookup_listener(struct net *net,
+ 				    struct inet_hashinfo *hashinfo,
+ 				    const __be32 daddr, const unsigned short hnum,
+@@ -196,6 +201,7 @@ begin:
+ 			hiscore = score;
+ 		}
+ 	}
++
+ 	/*
+ 	 * if the nulls value we got at the end of this lookup is
+ 	 * not the expected one, we must restart lookup.
+diff -NurpP --minimal linux-3.2.34/net/ipv4/netfilter/nf_nat_helper.c linux-3.2.34-vs2.3.2.15/net/ipv4/netfilter/nf_nat_helper.c
+--- linux-3.2.34/net/ipv4/netfilter/nf_nat_helper.c	2011-07-22 11:18:13.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/netfilter/nf_nat_helper.c	2011-12-05 19:33:02.000000000 +0100
+@@ -20,6 +20,7 @@
+ #include <net/route.h>
+ 
+ #include <linux/netfilter_ipv4.h>
++#include <net/route.h>
+ #include <net/netfilter/nf_conntrack.h>
+ #include <net/netfilter/nf_conntrack_helper.h>
+ #include <net/netfilter/nf_conntrack_ecache.h>
+diff -NurpP --minimal linux-3.2.34/net/ipv4/netfilter.c linux-3.2.34-vs2.3.2.15/net/ipv4/netfilter.c
+--- linux-3.2.34/net/ipv4/netfilter.c	2012-01-09 16:15:03.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/netfilter.c	2012-02-07 03:13:38.000000000 +0100
+@@ -6,7 +6,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/gfp.h>
+ #include <linux/export.h>
+-#include <net/route.h>
++// #include <net/route.h>
+ #include <net/xfrm.h>
+ #include <net/ip.h>
+ #include <net/netfilter/nf_queue.h>
+diff -NurpP --minimal linux-3.2.34/net/ipv4/raw.c linux-3.2.34-vs2.3.2.15/net/ipv4/raw.c
+--- linux-3.2.34/net/ipv4/raw.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/raw.c	2012-10-22 12:59:53.000000000 +0200
+@@ -118,7 +118,7 @@ static struct sock *__raw_v4_lookup(stru
+ 
+ 		if (net_eq(sock_net(sk), net) && inet->inet_num == num	&&
+ 		    !(inet->inet_daddr && inet->inet_daddr != raddr) 	&&
+-		    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
++		    v4_sock_addr_match(sk->sk_nx_info, inet, laddr)	&&
+ 		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ 			goto found; /* gotcha */
+ 	}
+@@ -388,6 +388,12 @@ static int raw_send_hdrinc(struct sock *
+ 		icmp_out_count(net, ((struct icmphdr *)
+ 			skb_transport_header(skb))->type);
+ 
++	err = -EPERM;
++	if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) &&
++		sk->sk_nx_info &&
++		!v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND))
++		goto error_free;
++
+ 	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
+ 		      rt->dst.dev, dst_output);
+ 	if (err > 0)
+@@ -575,6 +581,16 @@ static int raw_sendmsg(struct kiocb *ioc
+ 			goto done;
+ 	}
+ 
++	if (sk->sk_nx_info) {
++		rt = ip_v4_find_src(sock_net(sk), sk->sk_nx_info, &fl4);
++		if (IS_ERR(rt)) {
++			err = PTR_ERR(rt);
++			rt = NULL;
++			goto done;
++		}
++		ip_rt_put(rt);
++	}
++
+ 	security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
+ 	rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
+ 	if (IS_ERR(rt)) {
+@@ -651,17 +667,19 @@ static int raw_bind(struct sock *sk, str
+ {
+ 	struct inet_sock *inet = inet_sk(sk);
+ 	struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
++	struct nx_v4_sock_addr nsa = { 0 };
+ 	int ret = -EINVAL;
+ 	int chk_addr_ret;
+ 
+ 	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
+ 		goto out;
+-	chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
++	v4_map_sock_addr(inet, addr, &nsa);
++	chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
+ 	ret = -EADDRNOTAVAIL;
+-	if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
++	if (nsa.saddr && chk_addr_ret != RTN_LOCAL &&
+ 	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
+ 		goto out;
+-	inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
++	v4_set_sock_addr(inet, &nsa);
+ 	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
+ 		inet->inet_saddr = 0;  /* Use device */
+ 	sk_dst_reset(sk);
+@@ -713,7 +731,8 @@ static int raw_recvmsg(struct kiocb *ioc
+ 	/* Copy the address. */
+ 	if (sin) {
+ 		sin->sin_family = AF_INET;
+-		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
++		sin->sin_addr.s_addr =
++			nx_map_sock_lback(sk->sk_nx_info, ip_hdr(skb)->saddr);
+ 		sin->sin_port = 0;
+ 		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+ 	}
+@@ -909,7 +928,8 @@ static struct sock *raw_get_first(struct
+ 		struct hlist_node *node;
+ 
+ 		sk_for_each(sk, node, &state->h->ht[state->bucket])
+-			if (sock_net(sk) == seq_file_net(seq))
++			if ((sock_net(sk) == seq_file_net(seq)) &&
++				nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
+ 				goto found;
+ 	}
+ 	sk = NULL;
+@@ -925,7 +945,8 @@ static struct sock *raw_get_next(struct 
+ 		sk = sk_next(sk);
+ try_again:
+ 		;
+-	} while (sk && sock_net(sk) != seq_file_net(seq));
++	} while (sk && ((sock_net(sk) != seq_file_net(seq)) ||
++		!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
+ 
+ 	if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
+ 		sk = sk_head(&state->h->ht[state->bucket]);
+diff -NurpP --minimal linux-3.2.34/net/ipv4/route.c linux-3.2.34-vs2.3.2.15/net/ipv4/route.c
+--- linux-3.2.34/net/ipv4/route.c	2012-01-09 16:15:04.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/route.c	2012-02-07 03:13:38.000000000 +0100
+@@ -2709,7 +2709,7 @@ static struct rtable *ip_route_output_sl
+ 
+ 
+ 	if (fl4->flowi4_oif) {
+-		dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
++		dev_out = dev_get_by_index_real_rcu(net, fl4->flowi4_oif);
+ 		rth = ERR_PTR(-ENODEV);
+ 		if (dev_out == NULL)
+ 			goto out;
+diff -NurpP --minimal linux-3.2.34/net/ipv4/tcp.c linux-3.2.34-vs2.3.2.15/net/ipv4/tcp.c
+--- linux-3.2.34/net/ipv4/tcp.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/tcp.c	2012-11-18 21:11:16.000000000 +0100
+@@ -266,6 +266,7 @@
+ #include <linux/crypto.h>
+ #include <linux/time.h>
+ #include <linux/slab.h>
++#include <linux/in.h>
+ 
+ #include <net/icmp.h>
+ #include <net/tcp.h>
+diff -NurpP --minimal linux-3.2.34/net/ipv4/tcp_ipv4.c linux-3.2.34-vs2.3.2.15/net/ipv4/tcp_ipv4.c
+--- linux-3.2.34/net/ipv4/tcp_ipv4.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/tcp_ipv4.c	2012-11-06 18:08:24.000000000 +0100
+@@ -2033,6 +2033,12 @@ static void *listening_get_next(struct s
+ 		req = req->dl_next;
+ 		while (1) {
+ 			while (req) {
++				vxdprintk(VXD_CBIT(net, 6),
++					"sk,req: %p [#%d] (from %d)", req->sk,
++					(req->sk)?req->sk->sk_nid:0, nx_current_nid());
++				if (req->sk &&
++					!nx_check(req->sk->sk_nid, VS_WATCH_P | VS_IDENT))
++					continue;
+ 				if (req->rsk_ops->family == st->family) {
+ 					cur = req;
+ 					goto out;
+@@ -2057,6 +2063,10 @@ get_req:
+ 	}
+ get_sk:
+ 	sk_nulls_for_each_from(sk, node) {
++		vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
++			sk, sk->sk_nid, nx_current_nid());
++		if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
++			continue;
+ 		if (!net_eq(sock_net(sk), net))
+ 			continue;
+ 		if (sk->sk_family == st->family) {
+@@ -2133,6 +2143,11 @@ static void *established_get_first(struc
+ 
+ 		spin_lock_bh(lock);
+ 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
++			vxdprintk(VXD_CBIT(net, 6),
++				"sk,egf: %p [#%d] (from %d)",
++				sk, sk->sk_nid, nx_current_nid());
++			if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
++				continue;
+ 			if (sk->sk_family != st->family ||
+ 			    !net_eq(sock_net(sk), net)) {
+ 				continue;
+@@ -2143,6 +2158,11 @@ static void *established_get_first(struc
+ 		st->state = TCP_SEQ_STATE_TIME_WAIT;
+ 		inet_twsk_for_each(tw, node,
+ 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
++			vxdprintk(VXD_CBIT(net, 6),
++				"tw: %p [#%d] (from %d)",
++				tw, tw->tw_nid, nx_current_nid());
++			if (!nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))
++				continue;
+ 			if (tw->tw_family != st->family ||
+ 			    !net_eq(twsk_net(tw), net)) {
+ 				continue;
+@@ -2172,7 +2192,9 @@ static void *established_get_next(struct
+ 		tw = cur;
+ 		tw = tw_next(tw);
+ get_tw:
+-		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
++		while (tw && (tw->tw_family != st->family ||
++			!net_eq(twsk_net(tw), net) ||
++			!nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))) {
+ 			tw = tw_next(tw);
+ 		}
+ 		if (tw) {
+@@ -2196,6 +2218,11 @@ get_tw:
+ 		sk = sk_nulls_next(sk);
+ 
+ 	sk_nulls_for_each_from(sk, node) {
++		vxdprintk(VXD_CBIT(net, 6),
++			"sk,egn: %p [#%d] (from %d)",
++			sk, sk->sk_nid, nx_current_nid());
++		if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
++			continue;
+ 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
+ 			goto found;
+ 	}
+@@ -2401,9 +2428,9 @@ static void get_openreq4(const struct so
+ 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
+ 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
+ 		i,
+-		ireq->loc_addr,
++		nx_map_sock_lback(current_nx_info(), ireq->loc_addr),
+ 		ntohs(inet_sk(sk)->inet_sport),
+-		ireq->rmt_addr,
++		nx_map_sock_lback(current_nx_info(), ireq->rmt_addr),
+ 		ntohs(ireq->rmt_port),
+ 		TCP_SYN_RECV,
+ 		0, 0, /* could print option size, but that is af dependent. */
+@@ -2425,8 +2452,8 @@ static void get_tcp4_sock(struct sock *s
+ 	const struct tcp_sock *tp = tcp_sk(sk);
+ 	const struct inet_connection_sock *icsk = inet_csk(sk);
+ 	const struct inet_sock *inet = inet_sk(sk);
+-	__be32 dest = inet->inet_daddr;
+-	__be32 src = inet->inet_rcv_saddr;
++	__be32 dest = nx_map_sock_lback(current_nx_info(), inet->inet_daddr);
++	__be32 src = nx_map_sock_lback(current_nx_info(), inet->inet_rcv_saddr);
+ 	__u16 destp = ntohs(inet->inet_dport);
+ 	__u16 srcp = ntohs(inet->inet_sport);
+ 	int rx_queue;
+@@ -2483,8 +2510,8 @@ static void get_timewait4_sock(const str
+ 	if (ttd < 0)
+ 		ttd = 0;
+ 
+-	dest  = tw->tw_daddr;
+-	src   = tw->tw_rcv_saddr;
++	dest  = nx_map_sock_lback(current_nx_info(), tw->tw_daddr);
++	src   = nx_map_sock_lback(current_nx_info(), tw->tw_rcv_saddr);
+ 	destp = ntohs(tw->tw_dport);
+ 	srcp  = ntohs(tw->tw_sport);
+ 
+diff -NurpP --minimal linux-3.2.34/net/ipv4/tcp_minisocks.c linux-3.2.34-vs2.3.2.15/net/ipv4/tcp_minisocks.c
+--- linux-3.2.34/net/ipv4/tcp_minisocks.c	2012-01-09 16:15:04.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/tcp_minisocks.c	2011-12-05 19:33:02.000000000 +0100
+@@ -23,6 +23,9 @@
+ #include <linux/slab.h>
+ #include <linux/sysctl.h>
+ #include <linux/workqueue.h>
++#include <linux/vs_limit.h>
++#include <linux/vs_socket.h>
++#include <linux/vs_context.h>
+ #include <net/tcp.h>
+ #include <net/inet_common.h>
+ #include <net/xfrm.h>
+@@ -336,6 +339,11 @@ void tcp_time_wait(struct sock *sk, int 
+ 		tcptw->tw_ts_recent	= tp->rx_opt.ts_recent;
+ 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
+ 
++		tw->tw_xid		= sk->sk_xid;
++		tw->tw_vx_info		= NULL;
++		tw->tw_nid		= sk->sk_nid;
++		tw->tw_nx_info		= NULL;
++
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ 		if (tw->tw_family == PF_INET6) {
+ 			struct ipv6_pinfo *np = inet6_sk(sk);
+diff -NurpP --minimal linux-3.2.34/net/ipv4/udp.c linux-3.2.34-vs2.3.2.15/net/ipv4/udp.c
+--- linux-3.2.34/net/ipv4/udp.c	2012-01-09 16:15:04.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv4/udp.c	2012-06-26 19:07:00.000000000 +0200
+@@ -297,14 +297,7 @@ fail:
+ }
+ EXPORT_SYMBOL(udp_lib_get_port);
+ 
+-static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+-{
+-	struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
+-
+-	return 	(!ipv6_only_sock(sk2)  &&
+-		 (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
+-		   inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
+-}
++extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
+ 
+ static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
+ 				       unsigned int port)
+@@ -339,6 +332,11 @@ static inline int compute_score(struct s
+ 			if (inet->inet_rcv_saddr != daddr)
+ 				return -1;
+ 			score += 2;
++		} else {
++			/* block non nx_info ips */
++			if (!v4_addr_in_nx_info(sk->sk_nx_info,
++				daddr, NXA_MASK_BIND))
++				return -1;
+ 		}
+ 		if (inet->inet_daddr) {
+ 			if (inet->inet_daddr != saddr)
+@@ -442,6 +440,7 @@ exact_match:
+ 	return result;
+ }
+ 
++
+ /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
+  * harder than this. -DaveM
+  */
+@@ -487,6 +486,11 @@ begin:
+ 	sk_nulls_for_each_rcu(sk, node, &hslot->head) {
+ 		score = compute_score(sk, net, saddr, hnum, sport,
+ 				      daddr, dport, dif);
++		/* FIXME: disabled?
++		if (score == 9) {
++			result = sk;
++			break;
++		} else */
+ 		if (score > badness) {
+ 			result = sk;
+ 			badness = score;
+@@ -500,6 +504,7 @@ begin:
+ 	if (get_nulls_value(node) != slot)
+ 		goto begin;
+ 
++
+ 	if (result) {
+ 		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
+ 			result = NULL;
+@@ -509,6 +514,7 @@ begin:
+ 			goto begin;
+ 		}
+ 	}
++
+ 	rcu_read_unlock();
+ 	return result;
+ }
+@@ -551,8 +557,7 @@ static inline struct sock *udp_v4_mcast_
+ 		    udp_sk(s)->udp_port_hash != hnum ||
+ 		    (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
+ 		    (inet->inet_dport != rmt_port && inet->inet_dport) ||
+-		    (inet->inet_rcv_saddr &&
+-		     inet->inet_rcv_saddr != loc_addr) ||
++		    !v4_sock_addr_match(sk->sk_nx_info, inet, loc_addr)	||
+ 		    ipv6_only_sock(s) ||
+ 		    (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
+ 			continue;
+@@ -930,6 +935,16 @@ int udp_sendmsg(struct kiocb *iocb, stru
+ 				   inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP,
+ 				   faddr, saddr, dport, inet->inet_sport);
+ 
++		if (sk->sk_nx_info) {
++			rt = ip_v4_find_src(net, sk->sk_nx_info, fl4);
++			if (IS_ERR(rt)) {
++				err = PTR_ERR(rt);
++				rt = NULL;
++				goto out;
++			}
++			ip_rt_put(rt);
++		}
++
+ 		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
+ 		rt = ip_route_output_flow(net, fl4, sk);
+ 		if (IS_ERR(rt)) {
+@@ -1228,7 +1243,8 @@ try_again:
+ 	if (sin) {
+ 		sin->sin_family = AF_INET;
+ 		sin->sin_port = udp_hdr(skb)->source;
+-		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
++		sin->sin_addr.s_addr = nx_map_sock_lback(
++			skb->sk->sk_nx_info, ip_hdr(skb)->saddr);
+ 		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+ 	}
+ 	if (inet->cmsg_flags)
+@@ -1974,6 +1990,8 @@ static struct sock *udp_get_first(struct
+ 		sk_nulls_for_each(sk, node, &hslot->head) {
+ 			if (!net_eq(sock_net(sk), net))
+ 				continue;
++			if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
++				continue;
+ 			if (sk->sk_family == state->family)
+ 				goto found;
+ 		}
+@@ -1991,7 +2009,9 @@ static struct sock *udp_get_next(struct 
+ 
+ 	do {
+ 		sk = sk_nulls_next(sk);
+-	} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
++	} while (sk && (!net_eq(sock_net(sk), net) ||
++		sk->sk_family != state->family ||
++		!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
+ 
+ 	if (!sk) {
+ 		if (state->bucket <= state->udp_table->mask)
+@@ -2087,8 +2107,8 @@ static void udp4_format_sock(struct sock
+ 		int bucket, int *len)
+ {
+ 	struct inet_sock *inet = inet_sk(sp);
+-	__be32 dest = inet->inet_daddr;
+-	__be32 src  = inet->inet_rcv_saddr;
++	__be32 dest = nx_map_sock_lback(current_nx_info(), inet->inet_daddr);
++	__be32 src = nx_map_sock_lback(current_nx_info(), inet->inet_rcv_saddr);
+ 	__u16 destp	  = ntohs(inet->inet_dport);
+ 	__u16 srcp	  = ntohs(inet->inet_sport);
+ 
+diff -NurpP --minimal linux-3.2.34/net/ipv6/Kconfig linux-3.2.34-vs2.3.2.15/net/ipv6/Kconfig
+--- linux-3.2.34/net/ipv6/Kconfig	2010-08-02 16:52:59.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/net/ipv6/Kconfig	2011-12-05 19:33:02.000000000 +0100
+@@ -4,8 +4,8 @@
+ 
+ #   IPv6 as module will cause a CRASH if you try to unload it
+ menuconfig IPV6
+-	tristate "The IPv6 protocol"
+-	default m
++	bool "The IPv6 protocol"
++	default n
+ 	---help---
+ 	  This is complemental support for the IP version 6.
+ 	  You will still be able to do traditional IPv4 networking as well.
+diff -NurpP --minimal linux-3.2.34/net/ipv6/addrconf.c linux-3.2.34-vs2.3.2.15/net/ipv6/addrconf.c
+--- linux-3.2.34/net/ipv6/addrconf.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv6/addrconf.c	2012-10-22 12:59:53.000000000 +0200
+@@ -88,6 +88,8 @@
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include <linux/export.h>
++#include <linux/vs_network.h>
++#include <linux/vs_inet6.h>
+ 
+ /* Set to 3 to get tracing... */
+ #define ACONF_DEBUG 2
+@@ -1111,7 +1113,7 @@ out:
+ 
+ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
+ 		       const struct in6_addr *daddr, unsigned int prefs,
+-		       struct in6_addr *saddr)
++		       struct in6_addr *saddr, struct nx_info *nxi)
+ {
+ 	struct ipv6_saddr_score scores[2],
+ 				*score = &scores[0], *hiscore = &scores[1];
+@@ -1183,6 +1185,8 @@ int ipv6_dev_get_saddr(struct net *net, 
+ 					       dev->name);
+ 				continue;
+ 			}
++			if (!v6_addr_in_nx_info(nxi, &score->ifa->addr, -1))
++				continue;
+ 
+ 			score->rule = -1;
+ 			bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
+@@ -3155,7 +3159,10 @@ static void if6_seq_stop(struct seq_file
+ static int if6_seq_show(struct seq_file *seq, void *v)
+ {
+ 	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
+-	seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
++
++	if (nx_check(0, VS_ADMIN|VS_WATCH) ||
++	    v6_addr_in_nx_info(current_nx_info(), &ifp->addr, -1))
++		seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
+ 		   &ifp->addr,
+ 		   ifp->idev->dev->ifindex,
+ 		   ifp->prefix_len,
+@@ -3661,6 +3668,11 @@ static int in6_dump_addrs(struct inet6_d
+ 	struct ifacaddr6 *ifaca;
+ 	int err = 1;
+ 	int ip_idx = *p_ip_idx;
++	struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
++
++	/* disable ipv6 on non v6 guests */
++	if (nxi && !nx_info_has_v6(nxi))
++		return skb->len;
+ 
+ 	read_lock_bh(&idev->lock);
+ 	switch (type) {
+@@ -3671,6 +3683,8 @@ static int in6_dump_addrs(struct inet6_d
+ 		list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ 			if (++ip_idx < s_ip_idx)
+ 				continue;
++				if (!v6_addr_in_nx_info(nxi, &ifa->addr, -1))
++					continue;
+ 			err = inet6_fill_ifaddr(skb, ifa,
+ 						NETLINK_CB(cb->skb).pid,
+ 						cb->nlh->nlmsg_seq,
+@@ -3687,6 +3701,8 @@ static int in6_dump_addrs(struct inet6_d
+ 		     ifmca = ifmca->next, ip_idx++) {
+ 			if (ip_idx < s_ip_idx)
+ 				continue;
++				if (!v6_addr_in_nx_info(nxi, &ifmca->mca_addr, -1))
++					continue;
+ 			err = inet6_fill_ifmcaddr(skb, ifmca,
+ 						  NETLINK_CB(cb->skb).pid,
+ 						  cb->nlh->nlmsg_seq,
+@@ -3702,6 +3718,8 @@ static int in6_dump_addrs(struct inet6_d
+ 		     ifaca = ifaca->aca_next, ip_idx++) {
+ 			if (ip_idx < s_ip_idx)
+ 				continue;
++				if (!v6_addr_in_nx_info(nxi, &ifaca->aca_addr, -1))
++					continue;
+ 			err = inet6_fill_ifacaddr(skb, ifaca,
+ 						  NETLINK_CB(cb->skb).pid,
+ 						  cb->nlh->nlmsg_seq,
+@@ -4087,6 +4105,11 @@ static int inet6_dump_ifinfo(struct sk_b
+ 	struct inet6_dev *idev;
+ 	struct hlist_head *head;
+ 	struct hlist_node *node;
++	struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
++
++	/* FIXME: maybe disable ipv6 on non v6 guests?
++	if (skb->sk && skb->sk->sk_vx_info)
++		return skb->len; */
+ 
+ 	s_h = cb->args[0];
+ 	s_idx = cb->args[1];
+@@ -4098,6 +4121,8 @@ static int inet6_dump_ifinfo(struct sk_b
+ 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+ 			if (idx < s_idx)
+ 				goto cont;
++			if (!v6_dev_in_nx_info(dev, nxi))
++				goto cont;
+ 			idev = __in6_dev_get(dev);
+ 			if (!idev)
+ 				goto cont;
+diff -NurpP --minimal linux-3.2.34/net/ipv6/af_inet6.c linux-3.2.34-vs2.3.2.15/net/ipv6/af_inet6.c
+--- linux-3.2.34/net/ipv6/af_inet6.c	2011-10-24 18:45:34.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/net/ipv6/af_inet6.c	2012-09-01 11:40:43.000000000 +0200
+@@ -42,6 +42,8 @@
+ #include <linux/netdevice.h>
+ #include <linux/icmpv6.h>
+ #include <linux/netfilter_ipv6.h>
++#include <linux/vs_inet.h>
++#include <linux/vs_inet6.h>
+ 
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+@@ -160,9 +162,12 @@ lookup_protocol:
+ 	}
+ 
+ 	err = -EPERM;
++	if ((protocol == IPPROTO_ICMPV6) &&
++		nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
++		goto override;
+ 	if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
+ 		goto out_rcu_unlock;
+-
++override:
+ 	sock->ops = answer->ops;
+ 	answer_prot = answer->prot;
+ 	answer_no_check = answer->no_check;
+@@ -261,6 +266,7 @@ int inet6_bind(struct socket *sock, stru
+ 	struct inet_sock *inet = inet_sk(sk);
+ 	struct ipv6_pinfo *np = inet6_sk(sk);
+ 	struct net *net = sock_net(sk);
++	struct nx_v6_sock_addr nsa;
+ 	__be32 v4addr = 0;
+ 	unsigned short snum;
+ 	int addr_type = 0;
+@@ -276,6 +282,10 @@ int inet6_bind(struct socket *sock, stru
+ 	if (addr->sin6_family != AF_INET6)
+ 		return -EAFNOSUPPORT;
+ 
++	err = v6_map_sock_addr(inet, addr, &nsa);
++	if (err)
++		return err;
++
+ 	addr_type = ipv6_addr_type(&addr->sin6_addr);
+ 	if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
+ 		return -EINVAL;
+@@ -307,6 +317,7 @@ int inet6_bind(struct socket *sock, stru
+ 		/* Reproduce AF_INET checks to make the bindings consistent */
+ 		v4addr = addr->sin6_addr.s6_addr32[3];
+ 		chk_addr_ret = inet_addr_type(net, v4addr);
++
+ 		if (!sysctl_ip_nonlocal_bind &&
+ 		    !(inet->freebind || inet->transparent) &&
+ 		    v4addr != htonl(INADDR_ANY) &&
+@@ -316,6 +327,10 @@ int inet6_bind(struct socket *sock, stru
+ 			err = -EADDRNOTAVAIL;
+ 			goto out;
+ 		}
++		if (!v4_addr_in_nx_info(sk->sk_nx_info, v4addr, NXA_MASK_BIND)) {
++			err = -EADDRNOTAVAIL;
++			goto out;
++		}
+ 	} else {
+ 		if (addr_type != IPV6_ADDR_ANY) {
+ 			struct net_device *dev = NULL;
+@@ -342,6 +357,11 @@ int inet6_bind(struct socket *sock, stru
+ 				}
+ 			}
+ 
++			if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
++				err = -EADDRNOTAVAIL;
++				goto out_unlock;
++			}
++
+ 			/* ipv4 addr of the socket is invalid.  Only the
+ 			 * unspecified and mapped address have a v4 equivalent.
+ 			 */
+@@ -358,6 +378,9 @@ int inet6_bind(struct socket *sock, stru
+ 		}
+ 	}
+ 
++	/* what's that for? */
++	v6_set_sock_addr(inet, &nsa);
++
+ 	inet->inet_rcv_saddr = v4addr;
+ 	inet->inet_saddr = v4addr;
+ 
+@@ -459,9 +482,11 @@ int inet6_getname(struct socket *sock, s
+ 			return -ENOTCONN;
+ 		sin->sin6_port = inet->inet_dport;
+ 		ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
++		/* FIXME: remap lback? */
+ 		if (np->sndflow)
+ 			sin->sin6_flowinfo = np->flow_label;
+ 	} else {
++		/* FIXME: remap lback? */
+ 		if (ipv6_addr_any(&np->rcv_saddr))
+ 			ipv6_addr_copy(&sin->sin6_addr, &np->saddr);
+ 		else
+diff -NurpP --minimal linux-3.2.34/net/ipv6/datagram.c linux-3.2.34-vs2.3.2.15/net/ipv6/datagram.c
+--- linux-3.2.34/net/ipv6/datagram.c	2012-01-09 16:15:04.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv6/datagram.c	2011-12-05 19:33:02.000000000 +0100
+@@ -642,7 +642,7 @@ int datagram_send_ctl(struct net *net, s
+ 
+ 			rcu_read_lock();
+ 			if (fl6->flowi6_oif) {
+-				dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
++				dev = dev_get_by_index_real_rcu(net, fl6->flowi6_oif);
+ 				if (!dev) {
+ 					rcu_read_unlock();
+ 					return -ENODEV;
+diff -NurpP --minimal linux-3.2.34/net/ipv6/fib6_rules.c linux-3.2.34-vs2.3.2.15/net/ipv6/fib6_rules.c
+--- linux-3.2.34/net/ipv6/fib6_rules.c	2012-01-09 16:15:04.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv6/fib6_rules.c	2011-12-05 19:33:02.000000000 +0100
+@@ -91,7 +91,7 @@ static int fib6_rule_action(struct fib_r
+ 					       ip6_dst_idev(&rt->dst)->dev,
+ 					       &flp6->daddr,
+ 					       rt6_flags2srcprefs(flags),
+-					       &saddr))
++					       &saddr, NULL))
+ 				goto again;
+ 			if (!ipv6_prefix_equal(&saddr, &r->src.addr,
+ 					       r->src.plen))
+diff -NurpP --minimal linux-3.2.34/net/ipv6/inet6_hashtables.c linux-3.2.34-vs2.3.2.15/net/ipv6/inet6_hashtables.c
+--- linux-3.2.34/net/ipv6/inet6_hashtables.c	2011-10-24 18:45:34.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/net/ipv6/inet6_hashtables.c	2011-12-05 19:33:02.000000000 +0100
+@@ -16,6 +16,7 @@
+ 
+ #include <linux/module.h>
+ #include <linux/random.h>
++#include <linux/vs_inet6.h>
+ 
+ #include <net/inet_connection_sock.h>
+ #include <net/inet_hashtables.h>
+@@ -83,7 +84,6 @@ struct sock *__inet6_lookup_established(
+ 	unsigned int slot = hash & hashinfo->ehash_mask;
+ 	struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+ 
+-
+ 	rcu_read_lock();
+ begin:
+ 	sk_nulls_for_each_rcu(sk, node, &head->chain) {
+@@ -95,7 +95,7 @@ begin:
+ 				sock_put(sk);
+ 				goto begin;
+ 			}
+-		goto out;
++			goto out;
+ 		}
+ 	}
+ 	if (get_nulls_value(node) != slot)
+@@ -141,6 +141,9 @@ static inline int compute_score(struct s
+ 			if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ 				return -1;
+ 			score++;
++		} else {
++			if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
++				return -1;
+ 		}
+ 		if (sk->sk_bound_dev_if) {
+ 			if (sk->sk_bound_dev_if != dif)
+diff -NurpP --minimal linux-3.2.34/net/ipv6/ip6_output.c linux-3.2.34-vs2.3.2.15/net/ipv6/ip6_output.c
+--- linux-3.2.34/net/ipv6/ip6_output.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv6/ip6_output.c	2012-06-14 20:45:24.000000000 +0200
+@@ -963,7 +963,8 @@ static int ip6_dst_lookup_tail(struct so
+ 		struct rt6_info *rt = (struct rt6_info *) *dst;
+ 		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
+ 					  sk ? inet6_sk(sk)->srcprefs : 0,
+-					  &fl6->saddr);
++					  &fl6->saddr,
++					  sk ? sk->sk_nx_info : NULL);
+ 		if (err)
+ 			goto out_err_release;
+ 	}
+diff -NurpP --minimal linux-3.2.34/net/ipv6/ndisc.c linux-3.2.34-vs2.3.2.15/net/ipv6/ndisc.c
+--- linux-3.2.34/net/ipv6/ndisc.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv6/ndisc.c	2012-11-18 21:11:16.000000000 +0100
+@@ -588,7 +588,7 @@ static void ndisc_send_na(struct net_dev
+ 	} else {
+ 		if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
+ 				       inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
+-				       &tmpaddr))
++				       &tmpaddr, NULL))
+ 			return;
+ 		src_addr = &tmpaddr;
+ 	}
+diff -NurpP --minimal linux-3.2.34/net/ipv6/raw.c linux-3.2.34-vs2.3.2.15/net/ipv6/raw.c
+--- linux-3.2.34/net/ipv6/raw.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv6/raw.c	2012-10-22 12:59:53.000000000 +0200
+@@ -30,6 +30,7 @@
+ #include <linux/icmpv6.h>
+ #include <linux/netfilter.h>
+ #include <linux/netfilter_ipv6.h>
++#include <linux/vs_inet6.h>
+ #include <linux/skbuff.h>
+ #include <linux/compat.h>
+ #include <asm/uaccess.h>
+@@ -284,6 +285,13 @@ static int rawv6_bind(struct sock *sk, s
+ 				goto out_unlock;
+ 		}
+ 
++		if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
++			err = -EADDRNOTAVAIL;
++			if (dev)
++				dev_put(dev);
++			goto out;
++		}
++
+ 		/* ipv4 addr of the socket is invalid.  Only the
+ 		 * unspecified and mapped address have a v4 equivalent.
+ 		 */
+diff -NurpP --minimal linux-3.2.34/net/ipv6/route.c linux-3.2.34-vs2.3.2.15/net/ipv6/route.c
+--- linux-3.2.34/net/ipv6/route.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv6/route.c	2012-11-18 21:11:16.000000000 +0100
+@@ -55,6 +55,7 @@
+ #include <net/xfrm.h>
+ #include <net/netevent.h>
+ #include <net/netlink.h>
++#include <linux/vs_inet6.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -2094,15 +2095,17 @@ int ip6_route_get_saddr(struct net *net,
+ 			struct rt6_info *rt,
+ 			const struct in6_addr *daddr,
+ 			unsigned int prefs,
+-			struct in6_addr *saddr)
++			struct in6_addr *saddr,
++			struct nx_info *nxi)
+ {
+ 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
+ 	int err = 0;
+-	if (rt->rt6i_prefsrc.plen)
++	if (rt->rt6i_prefsrc.plen && (!nxi ||
++	    v6_addr_in_nx_info(nxi, &rt->rt6i_prefsrc.addr, NXA_TYPE_ADDR)))
+ 		ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
+ 	else
+ 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
+-					 daddr, prefs, saddr);
++					 daddr, prefs, saddr, nxi);
+ 	return err;
+ }
+ 
+@@ -2432,7 +2435,8 @@ static int rt6_fill_node(struct net *net
+ 			NLA_PUT_U32(skb, RTA_IIF, iif);
+ 	} else if (dst) {
+ 		struct in6_addr saddr_buf;
+-		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
++		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf,
++					(skb->sk ? skb->sk->sk_nx_info : NULL)) == 0)
+ 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
+ 	}
+ 
+@@ -2639,6 +2643,7 @@ static int rt6_info_route(struct rt6_inf
+ 	struct seq_file *m = p_arg;
+ 	struct neighbour *n;
+ 
++	/* FIXME: check for network context? */
+ 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
+ 
+ #ifdef CONFIG_IPV6_SUBTREES
+diff -NurpP --minimal linux-3.2.34/net/ipv6/tcp_ipv6.c linux-3.2.34-vs2.3.2.15/net/ipv6/tcp_ipv6.c
+--- linux-3.2.34/net/ipv6/tcp_ipv6.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv6/tcp_ipv6.c	2012-11-06 18:08:24.000000000 +0100
+@@ -70,6 +70,7 @@
+ 
+ #include <linux/crypto.h>
+ #include <linux/scatterlist.h>
++#include <linux/vs_inet6.h>
+ 
+ static void	tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
+ static void	tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+@@ -162,8 +163,15 @@ static int tcp_v6_connect(struct sock *s
+ 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
+ 	 */
+ 
+-	if(ipv6_addr_any(&usin->sin6_addr))
+-		usin->sin6_addr.s6_addr[15] = 0x1;
++	if(ipv6_addr_any(&usin->sin6_addr)) {
++		struct nx_info *nxi =  sk->sk_nx_info;
++
++		if (nxi && nx_info_has_v6(nxi))
++			/* FIXME: remap lback? */
++			usin->sin6_addr = nxi->v6.ip;
++		else
++			usin->sin6_addr.s6_addr[15] = 0x1;
++	}
+ 
+ 	addr_type = ipv6_addr_type(&usin->sin6_addr);
+ 
+diff -NurpP --minimal linux-3.2.34/net/ipv6/udp.c linux-3.2.34-vs2.3.2.15/net/ipv6/udp.c
+--- linux-3.2.34/net/ipv6/udp.c	2012-01-09 16:15:04.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/ipv6/udp.c	2011-12-15 01:11:37.000000000 +0100
+@@ -45,41 +45,67 @@
+ #include <net/tcp_states.h>
+ #include <net/ip6_checksum.h>
+ #include <net/xfrm.h>
++#include <linux/vs_inet6.h>
+ 
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include "udp_impl.h"
+ 
+-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
++int ipv6_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+ {
+-	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
++	const struct in6_addr *sk1_rcv_saddr6 = &inet6_sk(sk1)->rcv_saddr;
+ 	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
+-	__be32 sk1_rcv_saddr = sk_rcv_saddr(sk);
++	__be32 sk1_rcv_saddr = sk_rcv_saddr(sk1);
+ 	__be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
+-	int sk_ipv6only = ipv6_only_sock(sk);
++	int sk1_ipv6only = ipv6_only_sock(sk1);
+ 	int sk2_ipv6only = inet_v6_ipv6only(sk2);
+-	int addr_type = ipv6_addr_type(sk_rcv_saddr6);
++	int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
+ 	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
+ 
+ 	/* if both are mapped, treat as IPv4 */
+-	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
+-		return (!sk2_ipv6only &&
++	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
++		if (!sk2_ipv6only &&
+ 			(!sk1_rcv_saddr || !sk2_rcv_saddr ||
+-			  sk1_rcv_saddr == sk2_rcv_saddr));
++			  sk1_rcv_saddr == sk2_rcv_saddr))
++			goto vs_v4;
++		else
++			return 0;
++	}
+ 
+ 	if (addr_type2 == IPV6_ADDR_ANY &&
+ 	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
+-		return 1;
++		goto vs;
+ 
+ 	if (addr_type == IPV6_ADDR_ANY &&
+-	    !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
+-		return 1;
++	    !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
++		goto vs;
+ 
+ 	if (sk2_rcv_saddr6 &&
+-	    ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
+-		return 1;
++	    ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
++		goto vs;
+ 
+ 	return 0;
++
++vs_v4:
++	if (!sk1_rcv_saddr && !sk2_rcv_saddr)
++		return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
++	if (!sk2_rcv_saddr)
++		return v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, -1);
++	if (!sk1_rcv_saddr)
++		return v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, -1);
++	return 1;
++vs:
++	if (addr_type2 == IPV6_ADDR_ANY && addr_type == IPV6_ADDR_ANY)
++		return nx_v6_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
++	else if (addr_type2 == IPV6_ADDR_ANY)
++		return v6_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr6, -1);
++	else if (addr_type == IPV6_ADDR_ANY) {
++		if (addr_type2 == IPV6_ADDR_MAPPED)
++			return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
++		else
++			return v6_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr6, -1);
++	}
++	return 1;
+ }
+ 
+ static unsigned int udp6_portaddr_hash(struct net *net,
+@@ -143,6 +169,10 @@ static inline int compute_score(struct s
+ 			if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ 				return -1;
+ 			score++;
++		} else {
++			/* block non nx_info ips */
++			if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
++				return -1;
+ 		}
+ 		if (!ipv6_addr_any(&np->daddr)) {
+ 			if (!ipv6_addr_equal(&np->daddr, saddr))
+diff -NurpP --minimal linux-3.2.34/net/ipv6/xfrm6_policy.c linux-3.2.34-vs2.3.2.15/net/ipv6/xfrm6_policy.c
+--- linux-3.2.34/net/ipv6/xfrm6_policy.c	2011-07-22 11:18:13.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/net/ipv6/xfrm6_policy.c	2011-12-05 19:33:02.000000000 +0100
+@@ -63,7 +63,7 @@ static int xfrm6_get_saddr(struct net *n
+ 	dev = ip6_dst_idev(dst)->dev;
+ 	ipv6_dev_get_saddr(dev_net(dev), dev,
+ 			   (struct in6_addr *)&daddr->a6, 0,
+-			   (struct in6_addr *)&saddr->a6);
++			   (struct in6_addr *)&saddr->a6, NULL);
+ 	dst_release(dst);
+ 	return 0;
+ }
+diff -NurpP --minimal linux-3.2.34/net/netfilter/ipvs/ip_vs_xmit.c linux-3.2.34-vs2.3.2.15/net/netfilter/ipvs/ip_vs_xmit.c
+--- linux-3.2.34/net/netfilter/ipvs/ip_vs_xmit.c	2012-01-09 16:15:04.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/netfilter/ipvs/ip_vs_xmit.c	2012-02-07 03:13:38.000000000 +0100
+@@ -226,7 +226,7 @@ __ip_vs_route_output_v6(struct net *net,
+ 		return dst;
+ 	if (ipv6_addr_any(&fl6.saddr) &&
+ 	    ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
+-			       &fl6.daddr, 0, &fl6.saddr) < 0)
++			       &fl6.daddr, 0, &fl6.saddr, NULL) < 0)
+ 		goto out_err;
+ 	if (do_xfrm) {
+ 		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+diff -NurpP --minimal linux-3.2.34/net/netlink/af_netlink.c linux-3.2.34-vs2.3.2.15/net/netlink/af_netlink.c
+--- linux-3.2.34/net/netlink/af_netlink.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/netlink/af_netlink.c	2012-11-18 21:11:16.000000000 +0100
+@@ -55,6 +55,9 @@
+ #include <linux/types.h>
+ #include <linux/audit.h>
+ #include <linux/mutex.h>
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
++#include <linux/vs_limit.h>
+ 
+ #include <net/net_namespace.h>
+ #include <net/sock.h>
+@@ -1926,6 +1929,8 @@ static struct sock *netlink_seq_socket_i
+ 			sk_for_each(s, node, &hash->table[j]) {
+ 				if (sock_net(s) != seq_file_net(seq))
+ 					continue;
++				if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
++					continue;
+ 				if (off == pos) {
+ 					iter->link = i;
+ 					iter->hash_idx = j;
+@@ -1960,7 +1965,8 @@ static void *netlink_seq_next(struct seq
+ 	s = v;
+ 	do {
+ 		s = sk_next(s);
+-	} while (s && sock_net(s) != seq_file_net(seq));
++	} while (s && (sock_net(s) != seq_file_net(seq) ||
++		!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT)));
+ 	if (s)
+ 		return s;
+ 
+@@ -1972,7 +1978,8 @@ static void *netlink_seq_next(struct seq
+ 
+ 		for (; j <= hash->mask; j++) {
+ 			s = sk_head(&hash->table[j]);
+-			while (s && sock_net(s) != seq_file_net(seq))
++			while (s && (sock_net(s) != seq_file_net(seq) ||
++				!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT)))
+ 				s = sk_next(s);
+ 			if (s) {
+ 				iter->link = i;
+diff -NurpP --minimal linux-3.2.34/net/socket.c linux-3.2.34-vs2.3.2.15/net/socket.c
+--- linux-3.2.34/net/socket.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/socket.c	2012-10-22 12:59:53.000000000 +0200
+@@ -98,6 +98,10 @@
+ 
+ #include <net/sock.h>
+ #include <linux/netfilter.h>
++#include <linux/vs_base.h>
++#include <linux/vs_socket.h>
++#include <linux/vs_inet.h>
++#include <linux/vs_inet6.h>
+ 
+ #include <linux/if_tun.h>
+ #include <linux/ipv6_route.h>
+@@ -546,6 +550,7 @@ static inline int __sock_sendmsg_nosec(s
+ 				       struct msghdr *msg, size_t size)
+ {
+ 	struct sock_iocb *si = kiocb_to_siocb(iocb);
++	size_t len;
+ 
+ 	sock_update_classid(sock->sk);
+ 
+@@ -554,7 +559,22 @@ static inline int __sock_sendmsg_nosec(s
+ 	si->msg = msg;
+ 	si->size = size;
+ 
+-	return sock->ops->sendmsg(iocb, sock, msg, size);
++	len = sock->ops->sendmsg(iocb, sock, msg, size);
++	if (sock->sk) {
++		if (len == size)
++			vx_sock_send(sock->sk, size);
++		else
++			vx_sock_fail(sock->sk, size);
++	}
++	vxdprintk(VXD_CBIT(net, 7),
++		"__sock_sendmsg: %p[%p,%p,%p;%d/%d]:%d/%zu",
++		sock, sock->sk,
++		(sock->sk)?sock->sk->sk_nx_info:0,
++		(sock->sk)?sock->sk->sk_vx_info:0,
++		(sock->sk)?sock->sk->sk_xid:0,
++		(sock->sk)?sock->sk->sk_nid:0,
++		(unsigned int)size, len);
++	return len;
+ }
+ 
+ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+@@ -694,6 +714,7 @@ static inline int __sock_recvmsg_nosec(s
+ 				       struct msghdr *msg, size_t size, int flags)
+ {
+ 	struct sock_iocb *si = kiocb_to_siocb(iocb);
++	int len;
+ 
+ 	sock_update_classid(sock->sk);
+ 
+@@ -703,7 +724,18 @@ static inline int __sock_recvmsg_nosec(s
+ 	si->size = size;
+ 	si->flags = flags;
+ 
+-	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
++	len = sock->ops->recvmsg(iocb, sock, msg, size, flags);
++	if ((len >= 0) && sock->sk)
++		vx_sock_recv(sock->sk, len);
++	vxdprintk(VXD_CBIT(net, 7),
++		"__sock_recvmsg: %p[%p,%p,%p;%d/%d]:%d/%d",
++		sock, sock->sk,
++		(sock->sk)?sock->sk->sk_nx_info:0,
++		(sock->sk)?sock->sk->sk_vx_info:0,
++		(sock->sk)?sock->sk->sk_xid:0,
++		(sock->sk)?sock->sk->sk_nid:0,
++		(unsigned int)size, len);
++	return len;
+ }
+ 
+ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+@@ -1188,6 +1220,13 @@ int __sock_create(struct net *net, int f
+ 	if (type < 0 || type >= SOCK_MAX)
+ 		return -EINVAL;
+ 
++	if (!nx_check(0, VS_ADMIN)) {
++		if (family == PF_INET && !current_nx_info_has_v4())
++			return -EAFNOSUPPORT;
++		if (family == PF_INET6 && !current_nx_info_has_v6())
++			return -EAFNOSUPPORT;
++	}
++
+ 	/* Compatibility.
+ 
+ 	   This uglymoron is moved from INET layer to here to avoid
+@@ -1323,6 +1362,7 @@ SYSCALL_DEFINE3(socket, int, family, int
+ 	if (retval < 0)
+ 		goto out;
+ 
++	set_bit(SOCK_USER_SOCKET, &sock->flags);
+ 	retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
+ 	if (retval < 0)
+ 		goto out_release;
+@@ -1364,10 +1404,12 @@ SYSCALL_DEFINE4(socketpair, int, family,
+ 	err = sock_create(family, type, protocol, &sock1);
+ 	if (err < 0)
+ 		goto out;
++	set_bit(SOCK_USER_SOCKET, &sock1->flags);
+ 
+ 	err = sock_create(family, type, protocol, &sock2);
+ 	if (err < 0)
+ 		goto out_release_1;
++	set_bit(SOCK_USER_SOCKET, &sock2->flags);
+ 
+ 	err = sock1->ops->socketpair(sock1, sock2);
+ 	if (err < 0)
+diff -NurpP --minimal linux-3.2.34/net/sunrpc/auth.c linux-3.2.34-vs2.3.2.15/net/sunrpc/auth.c
+--- linux-3.2.34/net/sunrpc/auth.c	2011-10-24 18:45:34.000000000 +0200
++++ linux-3.2.34-vs2.3.2.15/net/sunrpc/auth.c	2011-12-05 19:33:02.000000000 +0100
+@@ -14,6 +14,7 @@
+ #include <linux/hash.h>
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/spinlock.h>
++#include <linux/vs_tag.h>
+ 
+ #ifdef RPC_DEBUG
+ # define RPCDBG_FACILITY	RPCDBG_AUTH
+@@ -427,6 +428,7 @@ rpcauth_lookupcred(struct rpc_auth *auth
+ 	memset(&acred, 0, sizeof(acred));
+ 	acred.uid = cred->fsuid;
+ 	acred.gid = cred->fsgid;
++	acred.tag = dx_current_tag();
+ 	acred.group_info = get_group_info(((struct cred *)cred)->group_info);
+ 
+ 	ret = auth->au_ops->lookup_cred(auth, &acred, flags);
+@@ -467,6 +469,7 @@ rpcauth_bind_root_cred(struct rpc_task *
+ 	struct auth_cred acred = {
+ 		.uid = 0,
+ 		.gid = 0,
++		.tag = dx_current_tag(),
+ 	};
+ 
+ 	dprintk("RPC: %5u looking up %s cred\n",
+diff -NurpP --minimal linux-3.2.34/net/sunrpc/auth_unix.c linux-3.2.34-vs2.3.2.15/net/sunrpc/auth_unix.c
+--- linux-3.2.34/net/sunrpc/auth_unix.c	2012-01-09 16:15:04.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/sunrpc/auth_unix.c	2011-12-05 19:33:02.000000000 +0100
+@@ -12,12 +12,14 @@
+ #include <linux/module.h>
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/sunrpc/auth.h>
++#include <linux/vs_tag.h>
+ 
+ #define NFS_NGROUPS	16
+ 
+ struct unx_cred {
+ 	struct rpc_cred		uc_base;
+ 	gid_t			uc_gid;
++	tag_t			uc_tag;
+ 	gid_t			uc_gids[NFS_NGROUPS];
+ };
+ #define uc_uid			uc_base.cr_uid
+@@ -78,6 +80,7 @@ unx_create_cred(struct rpc_auth *auth, s
+ 		groups = NFS_NGROUPS;
+ 
+ 	cred->uc_gid = acred->gid;
++	cred->uc_tag = acred->tag;
+ 	for (i = 0; i < groups; i++)
+ 		cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
+ 	if (i < NFS_NGROUPS)
+@@ -119,7 +122,9 @@ unx_match(struct auth_cred *acred, struc
+ 	unsigned int i;
+ 
+ 
+-	if (cred->uc_uid != acred->uid || cred->uc_gid != acred->gid)
++	if (cred->uc_uid != acred->uid ||
++		cred->uc_gid != acred->gid ||
++		cred->uc_tag != acred->tag)
+ 		return 0;
+ 
+ 	if (acred->group_info != NULL)
+@@ -145,7 +150,7 @@ unx_marshal(struct rpc_task *task, __be3
+ 	struct rpc_clnt	*clnt = task->tk_client;
+ 	struct unx_cred	*cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
+ 	__be32		*base, *hold;
+-	int		i;
++	int		i, tag;
+ 
+ 	*p++ = htonl(RPC_AUTH_UNIX);
+ 	base = p++;
+@@ -155,9 +160,12 @@ unx_marshal(struct rpc_task *task, __be3
+ 	 * Copy the UTS nodename captured when the client was created.
+ 	 */
+ 	p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
++	tag = task->tk_client->cl_tag;
+ 
+-	*p++ = htonl((u32) cred->uc_uid);
+-	*p++ = htonl((u32) cred->uc_gid);
++	*p++ = htonl((u32) TAGINO_UID(tag,
++		cred->uc_uid, cred->uc_tag));
++	*p++ = htonl((u32) TAGINO_GID(tag,
++		cred->uc_gid, cred->uc_tag));
+ 	hold = p++;
+ 	for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
+ 		*p++ = htonl((u32) cred->uc_gids[i]);
+diff -NurpP --minimal linux-3.2.34/net/sunrpc/clnt.c linux-3.2.34-vs2.3.2.15/net/sunrpc/clnt.c
+--- linux-3.2.34/net/sunrpc/clnt.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/sunrpc/clnt.c	2012-06-14 20:45:24.000000000 +0200
+@@ -31,6 +31,7 @@
+ #include <linux/in.h>
+ #include <linux/in6.h>
+ #include <linux/un.h>
++#include <linux/vs_cvirt.h>
+ 
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/sunrpc/rpc_pipe_fs.h>
+@@ -361,6 +362,9 @@ struct rpc_clnt *rpc_create(struct rpc_c
+ 	if (!(args->flags & RPC_CLNT_CREATE_QUIET))
+ 		clnt->cl_chatty = 1;
+ 
++	/* TODO: handle RPC_CLNT_CREATE_TAGGED
++	if (args->flags & RPC_CLNT_CREATE_TAGGED)
++		clnt->cl_tag = 1; */
+ 	return clnt;
+ }
+ EXPORT_SYMBOL_GPL(rpc_create);
+diff -NurpP --minimal linux-3.2.34/net/unix/af_unix.c linux-3.2.34-vs2.3.2.15/net/unix/af_unix.c
+--- linux-3.2.34/net/unix/af_unix.c	2012-11-18 18:42:25.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/net/unix/af_unix.c	2012-10-22 12:59:54.000000000 +0200
+@@ -114,6 +114,8 @@
+ #include <linux/mount.h>
+ #include <net/checksum.h>
+ #include <linux/security.h>
++#include <linux/vs_context.h>
++#include <linux/vs_limit.h>
+ 
+ static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
+ static DEFINE_SPINLOCK(unix_table_lock);
+@@ -258,6 +260,8 @@ static struct sock *__unix_find_socket_b
+ 		if (!net_eq(sock_net(s), net))
+ 			continue;
+ 
++		if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
++			continue;
+ 		if (u->addr->len == len &&
+ 		    !memcmp(u->addr->name, sunname, len))
+ 			goto found;
+@@ -2219,6 +2223,8 @@ static struct sock *unix_seq_idx(struct 
+ 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
+ 		if (sock_net(s) != seq_file_net(seq))
+ 			continue;
++		if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
++			continue;
+ 		if (off == pos)
+ 			return s;
+ 		++off;
+@@ -2243,7 +2249,8 @@ static void *unix_seq_next(struct seq_fi
+ 		sk = first_unix_socket(&iter->i);
+ 	else
+ 		sk = next_unix_socket(&iter->i, sk);
+-	while (sk && (sock_net(sk) != seq_file_net(seq)))
++	while (sk && (sock_net(sk) != seq_file_net(seq) ||
++		!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)))
+ 		sk = next_unix_socket(&iter->i, sk);
+ 	return sk;
+ }
+diff -NurpP --minimal linux-3.2.34/scripts/checksyscalls.sh linux-3.2.34-vs2.3.2.15/scripts/checksyscalls.sh
+--- linux-3.2.34/scripts/checksyscalls.sh	2011-03-15 18:07:46.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/scripts/checksyscalls.sh	2011-12-05 19:33:02.000000000 +0100
+@@ -193,7 +193,6 @@ cat << EOF
+ #define __IGNORE_afs_syscall
+ #define __IGNORE_getpmsg
+ #define __IGNORE_putpmsg
+-#define __IGNORE_vserver
+ EOF
+ }
+ 
+diff -NurpP --minimal linux-3.2.34/security/commoncap.c linux-3.2.34-vs2.3.2.15/security/commoncap.c
+--- linux-3.2.34/security/commoncap.c	2012-11-18 18:42:26.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/security/commoncap.c	2012-04-24 16:50:48.000000000 +0200
+@@ -63,6 +63,7 @@ int cap_netlink_recv(struct sk_buff *skb
+ 		return -EPERM;
+ 	return 0;
+ }
++
+ EXPORT_SYMBOL(cap_netlink_recv);
+ 
+ /**
+@@ -84,14 +85,20 @@ EXPORT_SYMBOL(cap_netlink_recv);
+ int cap_capable(struct task_struct *tsk, const struct cred *cred,
+ 		struct user_namespace *targ_ns, int cap, int audit)
+ {
++	struct vx_info *vxi = tsk->vx_info;
++
+ 	for (;;) {
+ 		/* The creator of the user namespace has all caps. */
+ 		if (targ_ns != &init_user_ns && targ_ns->creator == cred->user)
+ 			return 0;
+ 
+ 		/* Do we have the necessary capabilities? */
+-		if (targ_ns == cred->user->user_ns)
+-			return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
++		if (targ_ns == cred->user->user_ns) {
++			if (vx_info_flags(vxi, VXF_STATE_SETUP, 0) &&
++			    cap_raised(cred->cap_effective, cap))
++				return 0;
++			return vx_cap_raised(vxi, cred->cap_effective, cap) ? 0 : -EPERM;
++		}
+ 
+ 		/* Have we tried all of the parent namespaces? */
+ 		if (targ_ns == &init_user_ns)
+@@ -621,7 +628,7 @@ int cap_inode_setxattr(struct dentry *de
+ 
+ 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
+ 		     sizeof(XATTR_SECURITY_PREFIX) - 1) &&
+-	    !capable(CAP_SYS_ADMIN))
++		!vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
+ 		return -EPERM;
+ 	return 0;
+ }
+@@ -647,7 +654,7 @@ int cap_inode_removexattr(struct dentry 
+ 
+ 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
+ 		     sizeof(XATTR_SECURITY_PREFIX) - 1) &&
+-	    !capable(CAP_SYS_ADMIN))
++		!vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
+ 		return -EPERM;
+ 	return 0;
+ }
+diff -NurpP --minimal linux-3.2.34/security/selinux/hooks.c linux-3.2.34-vs2.3.2.15/security/selinux/hooks.c
+--- linux-3.2.34/security/selinux/hooks.c	2012-01-09 16:15:05.000000000 +0100
++++ linux-3.2.34-vs2.3.2.15/security/selinux/hooks.c	2011-12-05 19:33:02.000000000 +0100
+@@ -67,7 +67,6 @@
+ #include <linux/dccp.h>
+ #include <linux/quota.h>
+ #include <linux/un.h>		/* for Unix socket types */
+-#include <net/af_unix.h>	/* for Unix socket types */
+ #include <linux/parser.h>
+ #include <linux/nfs_mount.h>
+ #include <net/ipv6.h>
diff --git a/3.2.34/wrapfs-v3.2.2-45-ga5296eb.patch b/3.2.34/wrapfs-v3.2.2-45-ga5296eb.patch
new file mode 100644
index 0000000..ebe5788
--- /dev/null
+++ b/3.2.34/wrapfs-v3.2.2-45-ga5296eb.patch
@@ -0,0 +1,2084 @@
+diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
+index 8c624a1..b8822ed 100644
+--- a/Documentation/filesystems/00-INDEX
++++ b/Documentation/filesystems/00-INDEX
+@@ -114,6 +114,8 @@ vfat.txt
+ 	- info on using the VFAT filesystem used in Windows NT and Windows 95
+ vfs.txt
+ 	- overview of the Virtual File System
++wrapfs.txt
++	- info and mount options for the stackable wrapper file system
+ xfs.txt
+ 	- info and mount options for the XFS filesystem.
+ xip.txt
+diff --git a/Documentation/filesystems/wrapfs.txt b/Documentation/filesystems/wrapfs.txt
+new file mode 100644
+index 0000000..f61879a
+--- /dev/null
++++ b/Documentation/filesystems/wrapfs.txt
+@@ -0,0 +1,165 @@
++Wrapfs: a null-layer (aka wrapper) stackable file system
++
++Maintainer: Erez Zadok <ezk AT cs DOT stonybrook DOT edu>
++Web Site: <http://wrapfs.filesystems.org/>
++
++------------------------------------------------------------------------------
++MOTIVATION:
++
++Wrapfs is a small null-layer stackable file system, similar to BSD's Nullfs.
++Wrapfs is small, under 1800 lines of code.  Compare that to, say, eCryptfs
++and Unionfs, each of which are over 10,000 LoC.  As such, Wrapfs is simple
++and easy to read and understand.  Wrapfs is useful for several reasons:
++
++1. Many people like to experiment with in-kernel file system ideas as a
++   prototype; Wrapfs is an ideal small template from which one could modify
++   the code to create new file system functionality incrementally.
++
++2. As a platform to test and debug generic stacking problems in other Linux
++   stackable file systems (e.g., ecryptfs).
++
++3. As a way to test VFS enhancements to better support stacking in Linux.
++
++4. Wrapfs is a very useful instructional tool, often used as a starting
++   point for course assignments, for people who want a small example of who
++   the Linux VFS works, or for those who want to learn to write new Linux
++   file systems.
++
++Various versions of Wrapfs appeared as part of the "fistgen" package since
++1994, and have been used by numerous users world-wide.  For a more detailed
++history of Wrapfs, and list of most of its known users, see the section
++marked "HISTORY" below.
++
++------------------------------------------------------------------------------
++OPERATION:
++
++This is a brief description of how Wrapfs operates.  For more information,
++see the full paper published in Linux Expo 1999, titled "A Stackable File
++System Interface For Linux":
++
++	<http://www.fsl.cs.sunysb.edu/docs/linux-stacking/linux.pdf>
++
++The basic function of a stackable file system is to pass an operation and
++its arguments to the lower-level file system.  For every VFS object (inode,
++dentry, file, superblock, etc.), Wrapfs keeps a one-to-one mapping of a
++Wrapfs-level object to the lower one.  We call the Wrapfs object the "upper"
++one, and the one below we call the "lower" one.  Wrapfs stores these
++mappings as simple pointers inside the private field of the existing VFS
++objects (e.g., dentry->d_fsdata, sb->s_fs_info, and a container for inodes).
++
++There are two kinds of stackable operations: those that create new VFS
++objects and those that don't.
++
++The following distilled code snippet shows a method which doesn't create a
++new object.  The method just has to pass it to the lower layer and propagate
++any errors back up the VFS:
++
++int wrapfs_unlink(struct inode *dir, struct dentry *dentry)
++{
++	int err;
++	struct inode *lower_dir;
++	struct dentry *lower_dentry;
++	lower_dir = get_lower_inode(dir);
++	lower_dentry = get_lower_dentry(dentry);
++	err = lower_dir->i_op->unlink(lower_dir, lower_dentry);
++	return err;
++}
++
++The following code snippet shows a method which creates a new object.  After
++a lower object gets created, Wrapfs has to also create its own object, and
++make the pointer connections between the upper and lower objects (the latter
++is done via a helper routine called "interpose"):
++
++int wrapfs_create(struct inode *dir, struct dentry *dentry, int mode)
++{
++	int err;
++	struct dentry *lower_dentry;
++	struct inode *lower_dir;
++	lower_dir = wrapfs_lower_inode(dir);
++	lower_dentry = wrapfs_lower_dentry(dentry);
++	err = vfs_create(lower_dir, lower_dentry, mode);
++	if (!err)
++		err = wrapfs_interpose(dentry, dir->i_sb);
++	return err;
++}
++
++The wrapfs_unlink code snippet above can be easily modified to change the
++behavior of unlink(2).  For example, if an ->unlink operation is changed to
++->rename, this could become the basis for an "undo" file system; or if the
++lower_dentry's name gets encrypted before calling the lower ->unlink, this
++could be part of an encryption file system.
++
++------------------------------------------------------------------------------
++USAGE:
++
++First, you have to have some pre-existing directory already mounted from any
++other file system, say /some/lower/path.  Then, to mount wrapfs in
++/mnt/wrapfs, on that lower directory, issue this command:
++
++# mount -t wrapfs /some/lower/path /mnt/wrapfs
++
++To access the files via Wrapfs, use the mount point /mnt/wrapfs.
++
++------------------------------------------------------------------------------
++CAVEATS:
++
++Stacking on NFS.  Wrapfs has been tested with LTP, racer, fsx, parallel
++compile, and more.  It's been tested on top of ext2, ext3, xfs, reiserfs,
++and tmpfs -- and passed all tests.  However, on top of nfs3, wrapfs has to
++treat silly-deleted files as if they don't exist: in ->unlink, if we try to
++vfs_unlink an NFS silly-deleted file, NFS returns EBUSY; so we simply ignore
++it and return 0 (success) to the VFS.  NFS will delete this file later on
++anyway.  As the VFS also has special handling for silly-deleted files, this
++isn't unusual.  A cleaner way to handle this in the future is if the VFS
++were to handle silly-deleted (aka "delayed-delete") files entirely at the
++VFS.
++
++------------------------------------------------------------------------------
++HISTORY:
++
++Wrapfs was developed initially in 1994 for Linux 2.1, as part of Erez
++Zadok's graduate work at Columbia University.  It was designed to be a
++flexible null-layer, pass-through, stackable file system, from which other
++file systems would be developed and even instantiated automatically using a
++high-level language.  One of the first file systems developed from Wrapfs
++was a simple encryption file system called Cryptfs (eCryptfs is based on
++Cryptfs).  Other examples include Gzipfs, a stackable compression file
++system, and Unionfs, a stackable unification file system.  Wrapfs was
++integrated into a larger package called fistgen (see www.filesystems.org),
++and ported to FreeBSD and Solaris.  Wrapfs and fistgen continued to be
++maintained for newer versions of kernels, but remained largely standalone
++until recently: this release of Wrapfs for Linux represents a clean version
++written from scratch.
++
++Over the past 15+ years, versions of Wrapfs had been used by many users and
++companies.  At one point or another, the following groups have used stacking
++code based on Wrapfs.
++
++1. PROJECTS: eCryptfs, Unionfs, mini_fo, Aufs, FindFS, StoreCompress,
++   TestFS, ToPAS, and MFS.
++
++2. COMPANIES AND RESEARCH LABS: Bell Labs's Plan 9 group, EMC,
++   Hewlett-Packard, IBM Research Almaden, IBM Research Austin, Red Hat,
++   SuSE, Sun Microsystems, Veritas, Booyaka, CalSoft (India), Computer Farm,
++   Deutsche Bank (Germany), DreamWorks LLC, Eli Lilly and Company, FAME
++   Information Services, GMX AG (Germany), IBM global services (India), IDA
++   Center for Communications Research, Indra Networks, Inc., Kavi
++   Corporation, Mendepie, Mitsubishi Electric (Japan), Mobile-Mind, Monster
++   Labs, Morning Network (Russia), NeST Technologies, Packet General
++   Networks, Inc., Outstep Technologies, Reflective Systems Group, River
++   Styx Internet, SARAI Net, Saint-Petersburg Official Web Site (Russia),
++   Shadow Island Games, TISCover (Germany), Trymedia Systems, Uber Admin,
++   Videsh Sanchar Nigam Limited (India), Wanadoo (France), and iNsu
++   Innovations.
++
++3. UNIVERSITIES: Georgia Institute of Technology, Stanford University, UC
++   Berkeley, UCLA, University of Maryland, College Park, University of
++   Michigan, Ben Gurion University (Israel), Clarkson University, Clemson
++   University, Deutsches Elektronen Synchrotron (Germany), Electronics and
++   Telecommunications Research Institute (South Korea), Indian Institute of
++   Technology (India), National Taiwan University, Pune University (India),
++   The College of William \& Mary, Trinity College (Ireland), Universitaet
++   Frankfurt am Main (Germany), University Hospital Olomouc (Czech
++   Republic), and University of Salermo (Italy).
++
++------------------------------------------------------------------------------
+diff --git a/MAINTAINERS b/MAINTAINERS
+index f986e7d..1aecfdb 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -7374,6 +7374,16 @@ F:	include/linux/workqueue.h
+ F:	kernel/workqueue.c
+ F:	Documentation/workqueue.txt
+ 
++WRAPFS
++P:	Erez Zadok
++M:	ezk@cs.sunysb.edu
++L:	wrapfs@filesystems.org
++W:	http://wrapfs.filesystems.org/
++T:	git git.kernel.org/pub/scm/linux/kernel/git/ezk/wrapfs.git
++S:	Maintained
++F:	Documentation/filesystems/wrapfs.txt
++F:	fs/wrapfs/
++
+ X.25 NETWORK LAYER
+ M:	Andrew Hendry <andrew.hendry@gmail.com>
+ L:	linux-x25@vger.kernel.org
+diff --git a/fs/Kconfig b/fs/Kconfig
+index 6ad58a5..73699ed 100644
+--- a/fs/Kconfig
++++ b/fs/Kconfig
+@@ -194,6 +194,7 @@ if MISC_FILESYSTEMS
+ source "fs/adfs/Kconfig"
+ source "fs/affs/Kconfig"
+ source "fs/ecryptfs/Kconfig"
++source "fs/wrapfs/Kconfig"
+ source "fs/hfs/Kconfig"
+ source "fs/hfsplus/Kconfig"
+ source "fs/befs/Kconfig"
+diff --git a/fs/Makefile b/fs/Makefile
+index d2c3353..36daa5e 100644
+--- a/fs/Makefile
++++ b/fs/Makefile
+@@ -82,6 +82,7 @@ obj-$(CONFIG_ISO9660_FS)	+= isofs/
+ obj-$(CONFIG_HFSPLUS_FS)	+= hfsplus/ # Before hfs to find wrapped HFS+
+ obj-$(CONFIG_HFS_FS)		+= hfs/
+ obj-$(CONFIG_ECRYPT_FS)		+= ecryptfs/
++obj-$(CONFIG_WRAP_FS)		+= wrapfs/
+ obj-$(CONFIG_VXFS_FS)		+= freevxfs/
+ obj-$(CONFIG_NFS_FS)		+= nfs/
+ obj-$(CONFIG_EXPORTFS)		+= exportfs/
+diff --git a/fs/wrapfs/Kconfig b/fs/wrapfs/Kconfig
+new file mode 100644
+index 0000000..d790ccd
+--- /dev/null
++++ b/fs/wrapfs/Kconfig
+@@ -0,0 +1,9 @@
++config WRAP_FS
++	tristate "Wrapfs stackable file system (EXPERIMENTAL)"
++	depends on EXPERIMENTAL
++	help
++	  Wrapfs is a stackable file system which simply passes its
++	  operations to the lower layer.  It is designed as a useful
++	  template for developing or debugging other stackable file systems,
++	  and more (see Documentation/filesystems/wrapfs.txt).  See
++	  <http://wrapfs.filesystems.org/> for details.
+diff --git a/fs/wrapfs/Makefile b/fs/wrapfs/Makefile
+new file mode 100644
+index 0000000..f318d11
+--- /dev/null
++++ b/fs/wrapfs/Makefile
+@@ -0,0 +1,7 @@
++WRAPFS_VERSION="0.1"
++
++EXTRA_CFLAGS += -DWRAPFS_VERSION=\"$(WRAPFS_VERSION)\"
++
++obj-$(CONFIG_WRAP_FS) += wrapfs.o
++
++wrapfs-y := dentry.o file.o inode.o main.o super.o lookup.o mmap.o
+diff --git a/fs/wrapfs/dentry.c b/fs/wrapfs/dentry.c
+new file mode 100644
+index 0000000..b173153
+--- /dev/null
++++ b/fs/wrapfs/dentry.c
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "wrapfs.h"
++
++/*
++ * returns: -ERRNO if error (returned to user)
++ *          0: tell VFS to invalidate dentry
++ *          1: dentry is valid
++ */
++static int wrapfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
++{
++	struct path lower_path, saved_path;
++	struct dentry *lower_dentry;
++	int err = 1;
++
++	if (nd && nd->flags & LOOKUP_RCU)
++		return -ECHILD;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
++		goto out;
++	pathcpy(&saved_path, &nd->path);
++	pathcpy(&nd->path, &lower_path);
++	err = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
++	pathcpy(&nd->path, &saved_path);
++out:
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++static void wrapfs_d_release(struct dentry *dentry)
++{
++	/* release and reset the lower paths */
++	wrapfs_put_reset_lower_path(dentry);
++	free_dentry_private_data(dentry);
++	return;
++}
++
++const struct dentry_operations wrapfs_dops = {
++	.d_revalidate	= wrapfs_d_revalidate,
++	.d_release	= wrapfs_d_release,
++};
+diff --git a/fs/wrapfs/file.c b/fs/wrapfs/file.c
+new file mode 100644
+index 0000000..7a7fe1e
+--- /dev/null
++++ b/fs/wrapfs/file.c
+@@ -0,0 +1,298 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "wrapfs.h"
++
++static ssize_t wrapfs_read(struct file *file, char __user *buf,
++			   size_t count, loff_t *ppos)
++{
++	int err;
++	struct file *lower_file;
++	struct dentry *dentry = file->f_path.dentry;
++
++	lower_file = wrapfs_lower_file(file);
++	err = vfs_read(lower_file, buf, count, ppos);
++	/* update our inode atime upon a successful lower read */
++	if (err >= 0)
++		fsstack_copy_attr_atime(dentry->d_inode,
++					lower_file->f_path.dentry->d_inode);
++
++	return err;
++}
++
++static ssize_t wrapfs_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *ppos)
++{
++	int err = 0;
++	struct file *lower_file;
++	struct dentry *dentry = file->f_path.dentry;
++
++	lower_file = wrapfs_lower_file(file);
++	err = vfs_write(lower_file, buf, count, ppos);
++	/* update our inode times+sizes upon a successful lower write */
++	if (err >= 0) {
++		fsstack_copy_inode_size(dentry->d_inode,
++					lower_file->f_path.dentry->d_inode);
++		fsstack_copy_attr_times(dentry->d_inode,
++					lower_file->f_path.dentry->d_inode);
++	}
++
++	return err;
++}
++
++static int wrapfs_readdir(struct file *file, void *dirent, filldir_t filldir)
++{
++	int err = 0;
++	struct file *lower_file = NULL;
++	struct dentry *dentry = file->f_path.dentry;
++
++	lower_file = wrapfs_lower_file(file);
++	err = vfs_readdir(lower_file, filldir, dirent);
++	file->f_pos = lower_file->f_pos;
++	if (err >= 0)		/* copy the atime */
++		fsstack_copy_attr_atime(dentry->d_inode,
++					lower_file->f_path.dentry->d_inode);
++	return err;
++}
++
++static long wrapfs_unlocked_ioctl(struct file *file, unsigned int cmd,
++				  unsigned long arg)
++{
++	long err = -ENOTTY;
++	struct file *lower_file;
++
++	lower_file = wrapfs_lower_file(file);
++
++	/* XXX: use vfs_ioctl if/when VFS exports it */
++	if (!lower_file || !lower_file->f_op)
++		goto out;
++	if (lower_file->f_op->unlocked_ioctl)
++		err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
++
++out:
++	return err;
++}
++
++#ifdef CONFIG_COMPAT
++static long wrapfs_compat_ioctl(struct file *file, unsigned int cmd,
++				unsigned long arg)
++{
++	long err = -ENOTTY;
++	struct file *lower_file;
++
++	lower_file = wrapfs_lower_file(file);
++
++	/* XXX: use vfs_ioctl if/when VFS exports it */
++	if (!lower_file || !lower_file->f_op)
++		goto out;
++	if (lower_file->f_op->compat_ioctl)
++		err = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
++
++out:
++	return err;
++}
++#endif
++
++static int wrapfs_mmap(struct file *file, struct vm_area_struct *vma)
++{
++	int err = 0;
++	bool willwrite;
++	struct file *lower_file;
++	const struct vm_operations_struct *saved_vm_ops = NULL;
++
++	/* this might be deferred to mmap's writepage */
++	willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
++
++	/*
++	 * File systems which do not implement ->writepage may use
++	 * generic_file_readonly_mmap as their ->mmap op.  If you call
++	 * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
++	 * But we cannot call the lower ->mmap op, so we can't tell that
++	 * writeable mappings won't work.  Therefore, our only choice is to
++	 * check if the lower file system supports the ->writepage, and if
++	 * not, return EINVAL (the same error that
++	 * generic_file_readonly_mmap returns in that case).
++	 */
++	lower_file = wrapfs_lower_file(file);
++	if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
++		err = -EINVAL;
++		printk(KERN_ERR "wrapfs: lower file system does not "
++		       "support writeable mmap\n");
++		goto out;
++	}
++
++	/*
++	 * find and save lower vm_ops.
++	 *
++	 * XXX: the VFS should have a cleaner way of finding the lower vm_ops
++	 */
++	if (!WRAPFS_F(file)->lower_vm_ops) {
++		err = lower_file->f_op->mmap(lower_file, vma);
++		if (err) {
++			printk(KERN_ERR "wrapfs: lower mmap failed %d\n", err);
++			goto out;
++		}
++		saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */
++		err = do_munmap(current->mm, vma->vm_start,
++				vma->vm_end - vma->vm_start);
++		if (err) {
++			printk(KERN_ERR "wrapfs: do_munmap failed %d\n", err);
++			goto out;
++		}
++	}
++
++	/*
++	 * Next 3 lines are all I need from generic_file_mmap.  I definitely
++	 * don't want its test for ->readpage which returns -ENOEXEC.
++	 */
++	file_accessed(file);
++	vma->vm_ops = &wrapfs_vm_ops;
++	vma->vm_flags |= VM_CAN_NONLINEAR;
++
++	file->f_mapping->a_ops = &wrapfs_aops; /* set our aops */
++	if (!WRAPFS_F(file)->lower_vm_ops) /* save for our ->fault */
++		WRAPFS_F(file)->lower_vm_ops = saved_vm_ops;
++
++out:
++	return err;
++}
++
++static int wrapfs_open(struct inode *inode, struct file *file)
++{
++	int err = 0;
++	struct file *lower_file = NULL;
++	struct path lower_path;
++
++	/* don't open unhashed/deleted files */
++	if (d_unhashed(file->f_path.dentry)) {
++		err = -ENOENT;
++		goto out_err;
++	}
++
++	file->private_data =
++		kzalloc(sizeof(struct wrapfs_file_info), GFP_KERNEL);
++	if (!WRAPFS_F(file)) {
++		err = -ENOMEM;
++		goto out_err;
++	}
++
++	/* open lower object and link wrapfs's file struct to lower's */
++	wrapfs_get_lower_path(file->f_path.dentry, &lower_path);
++	lower_file = dentry_open(lower_path.dentry, lower_path.mnt,
++				 file->f_flags, current_cred());
++	if (IS_ERR(lower_file)) {
++		err = PTR_ERR(lower_file);
++		lower_file = wrapfs_lower_file(file);
++		if (lower_file) {
++			wrapfs_set_lower_file(file, NULL);
++			fput(lower_file); /* fput calls dput for lower_dentry */
++		}
++	} else {
++		wrapfs_set_lower_file(file, lower_file);
++	}
++
++	if (err)
++		kfree(WRAPFS_F(file));
++	else
++		fsstack_copy_attr_all(inode, wrapfs_lower_inode(inode));
++out_err:
++	return err;
++}
++
++static int wrapfs_flush(struct file *file, fl_owner_t id)
++{
++	int err = 0;
++	struct file *lower_file = NULL;
++
++	lower_file = wrapfs_lower_file(file);
++	if (lower_file && lower_file->f_op && lower_file->f_op->flush)
++		err = lower_file->f_op->flush(lower_file, id);
++
++	return err;
++}
++
++/* release all lower object references & free the file info structure */
++static int wrapfs_file_release(struct inode *inode, struct file *file)
++{
++	struct file *lower_file;
++
++	lower_file = wrapfs_lower_file(file);
++	if (lower_file) {
++		wrapfs_set_lower_file(file, NULL);
++		fput(lower_file);
++	}
++
++	kfree(WRAPFS_F(file));
++	return 0;
++}
++
++static int wrapfs_fsync(struct file *file, loff_t start, loff_t end,
++			int datasync)
++{
++	int err;
++	struct file *lower_file;
++	struct path lower_path;
++	struct dentry *dentry = file->f_path.dentry;
++
++	err = generic_file_fsync(file, start, end, datasync);
++	if (err)
++		goto out;
++	lower_file = wrapfs_lower_file(file);
++	wrapfs_get_lower_path(dentry, &lower_path);
++	err = vfs_fsync_range(lower_file, start, end, datasync);
++	wrapfs_put_lower_path(dentry, &lower_path);
++out:
++	return err;
++}
++
++static int wrapfs_fasync(int fd, struct file *file, int flag)
++{
++	int err = 0;
++	struct file *lower_file = NULL;
++
++	lower_file = wrapfs_lower_file(file);
++	if (lower_file->f_op && lower_file->f_op->fasync)
++		err = lower_file->f_op->fasync(fd, lower_file, flag);
++
++	return err;
++}
++
++const struct file_operations wrapfs_main_fops = {
++	.llseek		= generic_file_llseek,
++	.read		= wrapfs_read,
++	.write		= wrapfs_write,
++	.unlocked_ioctl	= wrapfs_unlocked_ioctl,
++#ifdef CONFIG_COMPAT
++	.compat_ioctl	= wrapfs_compat_ioctl,
++#endif
++	.mmap		= wrapfs_mmap,
++	.open		= wrapfs_open,
++	.flush		= wrapfs_flush,
++	.release	= wrapfs_file_release,
++	.fsync		= wrapfs_fsync,
++	.fasync		= wrapfs_fasync,
++};
++
++/* trimmed directory options */
++const struct file_operations wrapfs_dir_fops = {
++	.llseek		= generic_file_llseek,
++	.read		= generic_read_dir,
++	.readdir	= wrapfs_readdir,
++	.unlocked_ioctl	= wrapfs_unlocked_ioctl,
++#ifdef CONFIG_COMPAT
++	.compat_ioctl	= wrapfs_compat_ioctl,
++#endif
++	.open		= wrapfs_open,
++	.release	= wrapfs_file_release,
++	.flush		= wrapfs_flush,
++	.fsync		= wrapfs_fsync,
++	.fasync		= wrapfs_fasync,
++};
+diff --git a/fs/wrapfs/inode.c b/fs/wrapfs/inode.c
+new file mode 100644
+index 0000000..1dc3645
+--- /dev/null
++++ b/fs/wrapfs/inode.c
+@@ -0,0 +1,514 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "wrapfs.h"
++
++static int wrapfs_create(struct inode *dir, struct dentry *dentry,
++			 int mode, struct nameidata *nd)
++{
++	int err = 0;
++	struct dentry *lower_dentry;
++	struct dentry *lower_parent_dentry = NULL;
++	struct path lower_path, saved_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	lower_parent_dentry = lock_parent(lower_dentry);
++
++	err = mnt_want_write(lower_path.mnt);
++	if (err)
++		goto out_unlock;
++
++	pathcpy(&saved_path, &nd->path);
++	pathcpy(&nd->path, &lower_path);
++	err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode, nd);
++	pathcpy(&nd->path, &saved_path);
++	if (err)
++		goto out;
++
++	err = wrapfs_interpose(dentry, dir->i_sb, &lower_path);
++	if (err)
++		goto out;
++	fsstack_copy_attr_times(dir, wrapfs_lower_inode(dir));
++	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
++
++out:
++	mnt_drop_write(lower_path.mnt);
++out_unlock:
++	unlock_dir(lower_parent_dentry);
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++static int wrapfs_link(struct dentry *old_dentry, struct inode *dir,
++		       struct dentry *new_dentry)
++{
++	struct dentry *lower_old_dentry;
++	struct dentry *lower_new_dentry;
++	struct dentry *lower_dir_dentry;
++	u64 file_size_save;
++	int err;
++	struct path lower_old_path, lower_new_path;
++
++	file_size_save = i_size_read(old_dentry->d_inode);
++	wrapfs_get_lower_path(old_dentry, &lower_old_path);
++	wrapfs_get_lower_path(new_dentry, &lower_new_path);
++	lower_old_dentry = lower_old_path.dentry;
++	lower_new_dentry = lower_new_path.dentry;
++	lower_dir_dentry = lock_parent(lower_new_dentry);
++
++	err = mnt_want_write(lower_new_path.mnt);
++	if (err)
++		goto out_unlock;
++
++	err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
++		       lower_new_dentry);
++	if (err || !lower_new_dentry->d_inode)
++		goto out;
++
++	err = wrapfs_interpose(new_dentry, dir->i_sb, &lower_new_path);
++	if (err)
++		goto out;
++	fsstack_copy_attr_times(dir, lower_new_dentry->d_inode);
++	fsstack_copy_inode_size(dir, lower_new_dentry->d_inode);
++	set_nlink(old_dentry->d_inode,
++		  wrapfs_lower_inode(old_dentry->d_inode)->i_nlink);
++	i_size_write(new_dentry->d_inode, file_size_save);
++out:
++	mnt_drop_write(lower_new_path.mnt);
++out_unlock:
++	unlock_dir(lower_dir_dentry);
++	wrapfs_put_lower_path(old_dentry, &lower_old_path);
++	wrapfs_put_lower_path(new_dentry, &lower_new_path);
++	return err;
++}
++
++static int wrapfs_unlink(struct inode *dir, struct dentry *dentry)
++{
++	int err;
++	struct dentry *lower_dentry;
++	struct inode *lower_dir_inode = wrapfs_lower_inode(dir);
++	struct dentry *lower_dir_dentry;
++	struct path lower_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	dget(lower_dentry);
++	lower_dir_dentry = lock_parent(lower_dentry);
++
++	err = mnt_want_write(lower_path.mnt);
++	if (err)
++		goto out_unlock;
++	err = vfs_unlink(lower_dir_inode, lower_dentry);
++
++	/*
++	 * Note: unlinking on top of NFS can cause silly-renamed files.
++	 * Trying to delete such files results in EBUSY from NFS
++	 * below.  Silly-renamed files will get deleted by NFS later on, so
++	 * we just need to detect them here and treat such EBUSY errors as
++	 * if the upper file was successfully deleted.
++	 */
++	if (err == -EBUSY && lower_dentry->d_flags & DCACHE_NFSFS_RENAMED)
++		err = 0;
++	if (err)
++		goto out;
++	fsstack_copy_attr_times(dir, lower_dir_inode);
++	fsstack_copy_inode_size(dir, lower_dir_inode);
++	set_nlink(dentry->d_inode,
++		  wrapfs_lower_inode(dentry->d_inode)->i_nlink);
++	dentry->d_inode->i_ctime = dir->i_ctime;
++	d_drop(dentry); /* this is needed, else LTP fails (VFS won't do it) */
++out:
++	mnt_drop_write(lower_path.mnt);
++out_unlock:
++	unlock_dir(lower_dir_dentry);
++	dput(lower_dentry);
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++static int wrapfs_symlink(struct inode *dir, struct dentry *dentry,
++			  const char *symname)
++{
++	int err = 0;
++	struct dentry *lower_dentry;
++	struct dentry *lower_parent_dentry = NULL;
++	struct path lower_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	lower_parent_dentry = lock_parent(lower_dentry);
++
++	err = mnt_want_write(lower_path.mnt);
++	if (err)
++		goto out_unlock;
++	err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry, symname);
++	if (err)
++		goto out;
++	err = wrapfs_interpose(dentry, dir->i_sb, &lower_path);
++	if (err)
++		goto out;
++	fsstack_copy_attr_times(dir, wrapfs_lower_inode(dir));
++	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
++
++out:
++	mnt_drop_write(lower_path.mnt);
++out_unlock:
++	unlock_dir(lower_parent_dentry);
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++static int wrapfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
++{
++	int err = 0;
++	struct dentry *lower_dentry;
++	struct dentry *lower_parent_dentry = NULL;
++	struct path lower_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	lower_parent_dentry = lock_parent(lower_dentry);
++
++	err = mnt_want_write(lower_path.mnt);
++	if (err)
++		goto out_unlock;
++	err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry, mode);
++	if (err)
++		goto out;
++
++	err = wrapfs_interpose(dentry, dir->i_sb, &lower_path);
++	if (err)
++		goto out;
++
++	fsstack_copy_attr_times(dir, wrapfs_lower_inode(dir));
++	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
++	/* update number of links on parent directory */
++	set_nlink(dir, wrapfs_lower_inode(dir)->i_nlink);
++
++out:
++	mnt_drop_write(lower_path.mnt);
++out_unlock:
++	unlock_dir(lower_parent_dentry);
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++static int wrapfs_rmdir(struct inode *dir, struct dentry *dentry)
++{
++	struct dentry *lower_dentry;
++	struct dentry *lower_dir_dentry;
++	int err;
++	struct path lower_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	lower_dir_dentry = lock_parent(lower_dentry);
++
++	err = mnt_want_write(lower_path.mnt);
++	if (err)
++		goto out_unlock;
++	err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
++	if (err)
++		goto out;
++
++	d_drop(dentry);	/* drop our dentry on success (why not VFS's job?) */
++	if (dentry->d_inode)
++		clear_nlink(dentry->d_inode);
++	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
++	fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
++	set_nlink(dir, lower_dir_dentry->d_inode->i_nlink);
++
++out:
++	mnt_drop_write(lower_path.mnt);
++out_unlock:
++	unlock_dir(lower_dir_dentry);
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++static int wrapfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
++			dev_t dev)
++{
++	int err = 0;
++	struct dentry *lower_dentry;
++	struct dentry *lower_parent_dentry = NULL;
++	struct path lower_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	lower_parent_dentry = lock_parent(lower_dentry);
++
++	err = mnt_want_write(lower_path.mnt);
++	if (err)
++		goto out_unlock;
++	err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev);
++	if (err)
++		goto out;
++
++	err = wrapfs_interpose(dentry, dir->i_sb, &lower_path);
++	if (err)
++		goto out;
++	fsstack_copy_attr_times(dir, wrapfs_lower_inode(dir));
++	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
++
++out:
++	mnt_drop_write(lower_path.mnt);
++out_unlock:
++	unlock_dir(lower_parent_dentry);
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++/*
++ * The locking rules in wrapfs_rename are complex.  We could use a simpler
++ * superblock-level name-space lock for renames and copy-ups.
++ */
++static int wrapfs_rename(struct inode *old_dir, struct dentry *old_dentry,
++			 struct inode *new_dir, struct dentry *new_dentry)
++{
++	int err = 0;
++	struct dentry *lower_old_dentry = NULL;
++	struct dentry *lower_new_dentry = NULL;
++	struct dentry *lower_old_dir_dentry = NULL;
++	struct dentry *lower_new_dir_dentry = NULL;
++	struct dentry *trap = NULL;
++	struct path lower_old_path, lower_new_path;
++
++	wrapfs_get_lower_path(old_dentry, &lower_old_path);
++	wrapfs_get_lower_path(new_dentry, &lower_new_path);
++	lower_old_dentry = lower_old_path.dentry;
++	lower_new_dentry = lower_new_path.dentry;
++	lower_old_dir_dentry = dget_parent(lower_old_dentry);
++	lower_new_dir_dentry = dget_parent(lower_new_dentry);
++
++	trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
++	/* source should not be ancestor of target */
++	if (trap == lower_old_dentry) {
++		err = -EINVAL;
++		goto out;
++	}
++	/* target should not be ancestor of source */
++	if (trap == lower_new_dentry) {
++		err = -ENOTEMPTY;
++		goto out;
++	}
++
++	err = mnt_want_write(lower_old_path.mnt);
++	if (err)
++		goto out;
++	err = mnt_want_write(lower_new_path.mnt);
++	if (err)
++		goto out_drop_old_write;
++
++	err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
++			 lower_new_dir_dentry->d_inode, lower_new_dentry);
++	if (err)
++		goto out_err;
++
++	fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
++	fsstack_copy_inode_size(new_dir, lower_new_dir_dentry->d_inode);
++	if (new_dir != old_dir) {
++		fsstack_copy_attr_all(old_dir,
++				      lower_old_dir_dentry->d_inode);
++		fsstack_copy_inode_size(old_dir,
++					lower_old_dir_dentry->d_inode);
++	}
++
++out_err:
++	mnt_drop_write(lower_new_path.mnt);
++out_drop_old_write:
++	mnt_drop_write(lower_old_path.mnt);
++out:
++	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
++	dput(lower_old_dir_dentry);
++	dput(lower_new_dir_dentry);
++	wrapfs_put_lower_path(old_dentry, &lower_old_path);
++	wrapfs_put_lower_path(new_dentry, &lower_new_path);
++	return err;
++}
++
++static int wrapfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
++{
++	int err;
++	struct dentry *lower_dentry;
++	struct path lower_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	if (!lower_dentry->d_inode->i_op ||
++	    !lower_dentry->d_inode->i_op->readlink) {
++		err = -EINVAL;
++		goto out;
++	}
++
++	err = lower_dentry->d_inode->i_op->readlink(lower_dentry,
++						    buf, bufsiz);
++	if (err < 0)
++		goto out;
++	fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode);
++
++out:
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++static void *wrapfs_follow_link(struct dentry *dentry, struct nameidata *nd)
++{
++	char *buf;
++	int len = PAGE_SIZE, err;
++	mm_segment_t old_fs;
++
++	/* This is freed by the put_link method assuming a successful call. */
++	buf = kmalloc(len, GFP_KERNEL);
++	if (!buf) {
++		buf = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	/* read the symlink, and then we will follow it */
++	old_fs = get_fs();
++	set_fs(KERNEL_DS);
++	err = wrapfs_readlink(dentry, buf, len);
++	set_fs(old_fs);
++	if (err < 0) {
++		kfree(buf);
++		buf = ERR_PTR(err);
++	} else {
++		buf[err] = '\0';
++	}
++out:
++	nd_set_link(nd, buf);
++	return NULL;
++}
++
++/* this @nd *IS* still used */
++static void wrapfs_put_link(struct dentry *dentry, struct nameidata *nd,
++			    void *cookie)
++{
++	char *buf = nd_get_link(nd);
++	if (!IS_ERR(buf))	/* free the char* */
++		kfree(buf);
++}
++
++static int wrapfs_permission(struct inode *inode, int mask)
++{
++	struct inode *lower_inode;
++	int err;
++
++	lower_inode = wrapfs_lower_inode(inode);
++	err = inode_permission(lower_inode, mask);
++	return err;
++}
++
++static int wrapfs_setattr(struct dentry *dentry, struct iattr *ia)
++{
++	int err = 0;
++	struct dentry *lower_dentry;
++	struct inode *inode;
++	struct inode *lower_inode;
++	struct path lower_path;
++	struct iattr lower_ia;
++
++	inode = dentry->d_inode;
++
++	/*
++	 * Check if user has permission to change inode.  We don't check if
++	 * this user can change the lower inode: that should happen when
++	 * calling notify_change on the lower inode.
++	 */
++	err = inode_change_ok(inode, ia);
++	if (err)
++		goto out_err;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	lower_inode = wrapfs_lower_inode(inode);
++
++	/* prepare our own lower struct iattr (with the lower file) */
++	memcpy(&lower_ia, ia, sizeof(lower_ia));
++	if (ia->ia_valid & ATTR_FILE)
++		lower_ia.ia_file = wrapfs_lower_file(ia->ia_file);
++
++	/*
++	 * If shrinking, first truncate upper level to cancel writing dirty
++	 * pages beyond the new eof; and also if its' maxbytes is more
++	 * limiting (fail with -EFBIG before making any change to the lower
++	 * level).  There is no need to vmtruncate the upper level
++	 * afterwards in the other cases: we fsstack_copy_inode_size from
++	 * the lower level.
++	 */
++	if (ia->ia_valid & ATTR_SIZE) {
++		err = inode_newsize_ok(inode, ia->ia_size);
++		if (err)
++			goto out;
++		truncate_setsize(inode, ia->ia_size);
++	}
++
++	/*
++	 * mode change is for clearing setuid/setgid bits. Allow lower fs
++	 * to interpret this in its own way.
++	 */
++	if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
++		lower_ia.ia_valid &= ~ATTR_MODE;
++
++	/* notify the (possibly copied-up) lower inode */
++	/*
++	 * Note: we use lower_dentry->d_inode, because lower_inode may be
++	 * unlinked (no inode->i_sb and i_ino==0.  This happens if someone
++	 * tries to open(), unlink(), then ftruncate() a file.
++	 */
++	mutex_lock(&lower_dentry->d_inode->i_mutex);
++	err = notify_change(lower_dentry, &lower_ia); /* note: lower_ia */
++	mutex_unlock(&lower_dentry->d_inode->i_mutex);
++	if (err)
++		goto out;
++
++	/* get attributes from the lower inode */
++	fsstack_copy_attr_all(inode, lower_inode);
++	/*
++	 * Not running fsstack_copy_inode_size(inode, lower_inode), because
++	 * VFS should update our inode size, and notify_change on
++	 * lower_inode should update its size.
++	 */
++
++out:
++	wrapfs_put_lower_path(dentry, &lower_path);
++out_err:
++	return err;
++}
++
++const struct inode_operations wrapfs_symlink_iops = {
++	.readlink	= wrapfs_readlink,
++	.permission	= wrapfs_permission,
++	.follow_link	= wrapfs_follow_link,
++	.setattr	= wrapfs_setattr,
++	.put_link	= wrapfs_put_link,
++};
++
++const struct inode_operations wrapfs_dir_iops = {
++	.create		= wrapfs_create,
++	.lookup		= wrapfs_lookup,
++	.link		= wrapfs_link,
++	.unlink		= wrapfs_unlink,
++	.symlink	= wrapfs_symlink,
++	.mkdir		= wrapfs_mkdir,
++	.rmdir		= wrapfs_rmdir,
++	.mknod		= wrapfs_mknod,
++	.rename		= wrapfs_rename,
++	.permission	= wrapfs_permission,
++	.setattr	= wrapfs_setattr,
++};
++
++const struct inode_operations wrapfs_main_iops = {
++	.permission	= wrapfs_permission,
++	.setattr	= wrapfs_setattr,
++};
+diff --git a/fs/wrapfs/lookup.c b/fs/wrapfs/lookup.c
+new file mode 100644
+index 0000000..325b2ba
+--- /dev/null
++++ b/fs/wrapfs/lookup.c
+@@ -0,0 +1,304 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "wrapfs.h"
++
++/* The dentry cache is just so we have properly sized dentries */
++static struct kmem_cache *wrapfs_dentry_cachep;
++
++int wrapfs_init_dentry_cache(void)
++{
++	wrapfs_dentry_cachep =
++		kmem_cache_create("wrapfs_dentry",
++				  sizeof(struct wrapfs_dentry_info),
++				  0, SLAB_RECLAIM_ACCOUNT, NULL);
++
++	return wrapfs_dentry_cachep ? 0 : -ENOMEM;
++}
++
++void wrapfs_destroy_dentry_cache(void)
++{
++	if (wrapfs_dentry_cachep)
++		kmem_cache_destroy(wrapfs_dentry_cachep);
++}
++
++void free_dentry_private_data(struct dentry *dentry)
++{
++	if (!dentry || !dentry->d_fsdata)
++		return;
++	kmem_cache_free(wrapfs_dentry_cachep, dentry->d_fsdata);
++	dentry->d_fsdata = NULL;
++}
++
++/* allocate new dentry private data */
++int new_dentry_private_data(struct dentry *dentry)
++{
++	struct wrapfs_dentry_info *info = WRAPFS_D(dentry);
++
++	/* use zalloc to init dentry_info.lower_path */
++	info = kmem_cache_zalloc(wrapfs_dentry_cachep, GFP_ATOMIC);
++	if (!info)
++		return -ENOMEM;
++
++	spin_lock_init(&info->lock);
++	dentry->d_fsdata = info;
++
++	return 0;
++}
++
++static int wrapfs_inode_test(struct inode *inode, void *candidate_lower_inode)
++{
++	struct inode *current_lower_inode = wrapfs_lower_inode(inode);
++	if (current_lower_inode == (struct inode *)candidate_lower_inode)
++		return 1; /* found a match */
++	else
++		return 0; /* no match */
++}
++
++static int wrapfs_inode_set(struct inode *inode, void *lower_inode)
++{
++	/* we do actual inode initialization in wrapfs_iget */
++	return 0;
++}
++
++struct inode *wrapfs_iget(struct super_block *sb, struct inode *lower_inode)
++{
++	struct wrapfs_inode_info *info;
++	struct inode *inode; /* the new inode to return */
++	int err;
++
++	inode = iget5_locked(sb, /* our superblock */
++			     /*
++			      * hashval: we use inode number, but we can
++			      * also use "(unsigned long)lower_inode"
++			      * instead.
++			      */
++			     lower_inode->i_ino, /* hashval */
++			     wrapfs_inode_test,	/* inode comparison function */
++			     wrapfs_inode_set, /* inode init function */
++			     lower_inode); /* data passed to test+set fxns */
++	if (!inode) {
++		err = -EACCES;
++		iput(lower_inode);
++		return ERR_PTR(err);
++	}
++	/* if found a cached inode, then just return it */
++	if (!(inode->i_state & I_NEW))
++		return inode;
++
++	/* initialize new inode */
++	info = WRAPFS_I(inode);
++
++	inode->i_ino = lower_inode->i_ino;
++	if (!igrab(lower_inode)) {
++		err = -ESTALE;
++		return ERR_PTR(err);
++	}
++	wrapfs_set_lower_inode(inode, lower_inode);
++
++	inode->i_version++;
++
++	/* use different set of inode ops for symlinks & directories */
++	if (S_ISDIR(lower_inode->i_mode))
++		inode->i_op = &wrapfs_dir_iops;
++	else if (S_ISLNK(lower_inode->i_mode))
++		inode->i_op = &wrapfs_symlink_iops;
++	else
++		inode->i_op = &wrapfs_main_iops;
++
++	/* use different set of file ops for directories */
++	if (S_ISDIR(lower_inode->i_mode))
++		inode->i_fop = &wrapfs_dir_fops;
++	else
++		inode->i_fop = &wrapfs_main_fops;
++
++	inode->i_mapping->a_ops = &wrapfs_aops;
++
++	inode->i_atime.tv_sec = 0;
++	inode->i_atime.tv_nsec = 0;
++	inode->i_mtime.tv_sec = 0;
++	inode->i_mtime.tv_nsec = 0;
++	inode->i_ctime.tv_sec = 0;
++	inode->i_ctime.tv_nsec = 0;
++
++	/* properly initialize special inodes */
++	if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
++	    S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
++		init_special_inode(inode, lower_inode->i_mode,
++				   lower_inode->i_rdev);
++
++	/* all well, copy inode attributes */
++	fsstack_copy_attr_all(inode, lower_inode);
++	fsstack_copy_inode_size(inode, lower_inode);
++
++	unlock_new_inode(inode);
++	return inode;
++}
++
++/*
++ * Connect a wrapfs inode dentry/inode with several lower ones.  This is
++ * the classic stackable file system "vnode interposition" action.
++ *
++ * @dentry: wrapfs's dentry which interposes on lower one
++ * @sb: wrapfs's super_block
++ * @lower_path: the lower path (caller does path_get/put)
++ */
++int wrapfs_interpose(struct dentry *dentry, struct super_block *sb,
++		     struct path *lower_path)
++{
++	int err = 0;
++	struct inode *inode;
++	struct inode *lower_inode;
++	struct super_block *lower_sb;
++
++	lower_inode = lower_path->dentry->d_inode;
++	lower_sb = wrapfs_lower_super(sb);
++
++	/* check that the lower file system didn't cross a mount point */
++	if (lower_inode->i_sb != lower_sb) {
++		err = -EXDEV;
++		goto out;
++	}
++
++	/*
++	 * We allocate our new inode below by calling wrapfs_iget,
++	 * which will initialize some of the new inode's fields
++	 */
++
++	/* inherit lower inode number for wrapfs's inode */
++	inode = wrapfs_iget(sb, lower_inode);
++	if (IS_ERR(inode)) {
++		err = PTR_ERR(inode);
++		goto out;
++	}
++
++	d_add(dentry, inode);
++
++out:
++	return err;
++}
++
++/*
++ * Main driver function for wrapfs's lookup.
++ *
++ * Returns: NULL (ok), ERR_PTR if an error occurred.
++ * Fills in lower_parent_path with <dentry,mnt> on success.
++ */
++static struct dentry *__wrapfs_lookup(struct dentry *dentry, int flags,
++				      struct path *lower_parent_path)
++{
++	int err = 0;
++	struct vfsmount *lower_dir_mnt;
++	struct dentry *lower_dir_dentry = NULL;
++	struct dentry *lower_dentry;
++	const char *name;
++	struct path lower_path;
++	struct qstr this;
++
++	/* must initialize dentry operations */
++	d_set_d_op(dentry, &wrapfs_dops);
++
++	if (IS_ROOT(dentry))
++		goto out;
++
++	name = dentry->d_name.name;
++
++	/* now start the actual lookup procedure */
++	lower_dir_dentry = lower_parent_path->dentry;
++	lower_dir_mnt = lower_parent_path->mnt;
++
++	/* Use vfs_path_lookup to check if the dentry exists or not */
++	err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0,
++			      &lower_path);
++
++	/* no error: handle positive dentries */
++	if (!err) {
++		wrapfs_set_lower_path(dentry, &lower_path);
++		err = wrapfs_interpose(dentry, dentry->d_sb, &lower_path);
++		if (err) /* path_put underlying path on error */
++			wrapfs_put_reset_lower_path(dentry);
++		goto out;
++	}
++
++	/*
++	 * We don't consider ENOENT an error, and we want to return a
++	 * negative dentry.
++	 */
++	if (err && err != -ENOENT)
++		goto out;
++
++	/* instatiate a new negative dentry */
++	this.name = name;
++	this.len = strlen(name);
++	this.hash = full_name_hash(this.name, this.len);
++	lower_dentry = d_lookup(lower_dir_dentry, &this);
++	if (lower_dentry)
++		goto setup_lower;
++
++	lower_dentry = d_alloc(lower_dir_dentry, &this);
++	if (!lower_dentry) {
++		err = -ENOMEM;
++		goto out;
++	}
++	d_add(lower_dentry, NULL); /* instantiate and hash */
++
++setup_lower:
++	lower_path.dentry = lower_dentry;
++	lower_path.mnt = mntget(lower_dir_mnt);
++	wrapfs_set_lower_path(dentry, &lower_path);
++
++	/*
++	 * If the intent is to create a file, then don't return an error, so
++	 * the VFS will continue the process of making this negative dentry
++	 * into a positive one.
++	 */
++	if (flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET))
++		err = 0;
++
++out:
++	return ERR_PTR(err);
++}
++
++struct dentry *wrapfs_lookup(struct inode *dir, struct dentry *dentry,
++			     struct nameidata *nd)
++{
++	struct dentry *ret, *parent;
++	struct path lower_parent_path;
++	int err = 0;
++
++	BUG_ON(!nd);
++	parent = dget_parent(dentry);
++
++	wrapfs_get_lower_path(parent, &lower_parent_path);
++
++	/* allocate dentry private data.  We free it in ->d_release */
++	err = new_dentry_private_data(dentry);
++	if (err) {
++		ret = ERR_PTR(err);
++		goto out;
++	}
++	ret = __wrapfs_lookup(dentry, nd->flags, &lower_parent_path);
++	if (IS_ERR(ret))
++		goto out;
++	if (ret)
++		dentry = ret;
++	if (dentry->d_inode)
++		fsstack_copy_attr_times(dentry->d_inode,
++					wrapfs_lower_inode(dentry->d_inode));
++	/* update parent directory's atime */
++	fsstack_copy_attr_atime(parent->d_inode,
++				wrapfs_lower_inode(parent->d_inode));
++
++out:
++	wrapfs_put_lower_path(parent, &lower_parent_path);
++	dput(parent);
++	return ret;
++}
+diff --git a/fs/wrapfs/main.c b/fs/wrapfs/main.c
+new file mode 100644
+index 0000000..130aca6
+--- /dev/null
++++ b/fs/wrapfs/main.c
+@@ -0,0 +1,173 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "wrapfs.h"
++#include <linux/module.h>
++
++/*
++ * There is no need to lock the wrapfs_super_info's rwsem as there is no
++ * way anyone can have a reference to the superblock at this point in time.
++ */
++static int wrapfs_read_super(struct super_block *sb, void *raw_data, int silent)
++{
++	int err = 0;
++	struct super_block *lower_sb;
++	struct path lower_path;
++	char *dev_name = (char *) raw_data;
++	struct inode *inode;
++
++	if (!dev_name) {
++		printk(KERN_ERR
++		       "wrapfs: read_super: missing dev_name argument\n");
++		err = -EINVAL;
++		goto out;
++	}
++
++	/* parse lower path */
++	err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
++			&lower_path);
++	if (err) {
++		printk(KERN_ERR	"wrapfs: error accessing "
++		       "lower directory '%s'\n", dev_name);
++		goto out;
++	}
++
++	/* allocate superblock private data */
++	sb->s_fs_info = kzalloc(sizeof(struct wrapfs_sb_info), GFP_KERNEL);
++	if (!WRAPFS_SB(sb)) {
++		printk(KERN_CRIT "wrapfs: read_super: out of memory\n");
++		err = -ENOMEM;
++		goto out_free;
++	}
++
++	/* set the lower superblock field of upper superblock */
++	lower_sb = lower_path.dentry->d_sb;
++	atomic_inc(&lower_sb->s_active);
++	wrapfs_set_lower_super(sb, lower_sb);
++
++	/* inherit maxbytes from lower file system */
++	sb->s_maxbytes = lower_sb->s_maxbytes;
++
++	/*
++	 * Our c/m/atime granularity is 1 ns because we may stack on file
++	 * systems whose granularity is as good.
++	 */
++	sb->s_time_gran = 1;
++
++	sb->s_op = &wrapfs_sops;
++
++	/* get a new inode and allocate our root dentry */
++	inode = wrapfs_iget(sb, lower_path.dentry->d_inode);
++	if (IS_ERR(inode)) {
++		err = PTR_ERR(inode);
++		goto out_sput;
++	}
++	sb->s_root = d_alloc_root(inode);
++	if (!sb->s_root) {
++		err = -ENOMEM;
++		goto out_iput;
++	}
++	d_set_d_op(sb->s_root, &wrapfs_dops);
++
++	/* link the upper and lower dentries */
++	sb->s_root->d_fsdata = NULL;
++	err = new_dentry_private_data(sb->s_root);
++	if (err)
++		goto out_freeroot;
++
++	/* if get here: cannot have error */
++
++	/* set the lower dentries for s_root */
++	wrapfs_set_lower_path(sb->s_root, &lower_path);
++
++	/*
++	 * No need to call interpose because we already have a positive
++	 * dentry, which was instantiated by d_alloc_root.  Just need to
++	 * d_rehash it.
++	 */
++	d_rehash(sb->s_root);
++	if (!silent)
++		printk(KERN_INFO
++		       "wrapfs: mounted on top of %s type %s\n",
++		       dev_name, lower_sb->s_type->name);
++	goto out; /* all is well */
++
++	/* no longer needed: free_dentry_private_data(sb->s_root); */
++out_freeroot:
++	dput(sb->s_root);
++out_iput:
++	iput(inode);
++out_sput:
++	/* drop refs we took earlier */
++	atomic_dec(&lower_sb->s_active);
++	kfree(WRAPFS_SB(sb));
++	sb->s_fs_info = NULL;
++out_free:
++	path_put(&lower_path);
++
++out:
++	return err;
++}
++
++struct dentry *wrapfs_mount(struct file_system_type *fs_type, int flags,
++			    const char *dev_name, void *raw_data)
++{
++	void *lower_path_name = (void *) dev_name;
++
++	return mount_nodev(fs_type, flags, lower_path_name,
++			   wrapfs_read_super);
++}
++
++static struct file_system_type wrapfs_fs_type = {
++	.owner		= THIS_MODULE,
++	.name		= WRAPFS_NAME,
++	.mount		= wrapfs_mount,
++	.kill_sb	= generic_shutdown_super,
++	.fs_flags	= FS_REVAL_DOT,
++};
++
++static int __init init_wrapfs_fs(void)
++{
++	int err;
++
++	pr_info("Registering wrapfs " WRAPFS_VERSION "\n");
++
++	err = wrapfs_init_inode_cache();
++	if (err)
++		goto out;
++	err = wrapfs_init_dentry_cache();
++	if (err)
++		goto out;
++	err = register_filesystem(&wrapfs_fs_type);
++out:
++	if (err) {
++		wrapfs_destroy_inode_cache();
++		wrapfs_destroy_dentry_cache();
++	}
++	return err;
++}
++
++static void __exit exit_wrapfs_fs(void)
++{
++	wrapfs_destroy_inode_cache();
++	wrapfs_destroy_dentry_cache();
++	unregister_filesystem(&wrapfs_fs_type);
++	pr_info("Completed wrapfs module unload\n");
++}
++
++MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University"
++	      " (http://www.fsl.cs.sunysb.edu/)");
++MODULE_DESCRIPTION("Wrapfs " WRAPFS_VERSION
++		   " (http://wrapfs.filesystems.org/)");
++MODULE_LICENSE("GPL");
++
++module_init(init_wrapfs_fs);
++module_exit(exit_wrapfs_fs);
+diff --git a/fs/wrapfs/mmap.c b/fs/wrapfs/mmap.c
+new file mode 100644
+index 0000000..c224fc3
+--- /dev/null
++++ b/fs/wrapfs/mmap.c
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "wrapfs.h"
++
++static int wrapfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
++{
++	int err;
++	struct file *file, *lower_file;
++	const struct vm_operations_struct *lower_vm_ops;
++	struct vm_area_struct lower_vma;
++
++	memcpy(&lower_vma, vma, sizeof(struct vm_area_struct));
++	file = lower_vma.vm_file;
++	lower_vm_ops = WRAPFS_F(file)->lower_vm_ops;
++	BUG_ON(!lower_vm_ops);
++
++	lower_file = wrapfs_lower_file(file);
++	/*
++	 * XXX: vm_ops->fault may be called in parallel.  Because we have to
++	 * resort to temporarily changing the vma->vm_file to point to the
++	 * lower file, a concurrent invocation of wrapfs_fault could see a
++	 * different value.  In this workaround, we keep a different copy of
++	 * the vma structure in our stack, so we never expose a different
++	 * value of the vma->vm_file called to us, even temporarily.  A
++	 * better fix would be to change the calling semantics of ->fault to
++	 * take an explicit file pointer.
++	 */
++	lower_vma.vm_file = lower_file;
++	err = lower_vm_ops->fault(&lower_vma, vmf);
++	return err;
++}
++
++/*
++ * XXX: the default address_space_ops for wrapfs is empty.  We cannot set
++ * our inode->i_mapping->a_ops to NULL because too many code paths expect
++ * the a_ops vector to be non-NULL.
++ */
++const struct address_space_operations wrapfs_aops = {
++	/* empty on purpose */
++};
++
++const struct vm_operations_struct wrapfs_vm_ops = {
++	.fault		= wrapfs_fault,
++};
+diff --git a/fs/wrapfs/super.c b/fs/wrapfs/super.c
+new file mode 100644
+index 0000000..89d277d
+--- /dev/null
++++ b/fs/wrapfs/super.c
+@@ -0,0 +1,168 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "wrapfs.h"
++
++/*
++ * The inode cache is used with alloc_inode for both our inode info and the
++ * vfs inode.
++ */
++static struct kmem_cache *wrapfs_inode_cachep;
++
++/* final actions when unmounting a file system */
++static void wrapfs_put_super(struct super_block *sb)
++{
++	struct wrapfs_sb_info *spd;
++	struct super_block *s;
++
++	spd = WRAPFS_SB(sb);
++	if (!spd)
++		return;
++
++	/* decrement lower super references */
++	s = wrapfs_lower_super(sb);
++	wrapfs_set_lower_super(sb, NULL);
++	atomic_dec(&s->s_active);
++
++	kfree(spd);
++	sb->s_fs_info = NULL;
++}
++
++static int wrapfs_statfs(struct dentry *dentry, struct kstatfs *buf)
++{
++	int err;
++	struct path lower_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	err = vfs_statfs(&lower_path, buf);
++	wrapfs_put_lower_path(dentry, &lower_path);
++
++	/* set return buf to our f/s to avoid confusing user-level utils */
++	buf->f_type = WRAPFS_SUPER_MAGIC;
++
++	return err;
++}
++
++/*
++ * @flags: numeric mount options
++ * @options: mount options string
++ */
++static int wrapfs_remount_fs(struct super_block *sb, int *flags, char *options)
++{
++	int err = 0;
++
++	/*
++	 * The VFS will take care of "ro" and "rw" flags among others.  We
++	 * can safely accept a few flags (RDONLY, MANDLOCK), and honor
++	 * SILENT, but anything else left over is an error.
++	 */
++	if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT)) != 0) {
++		printk(KERN_ERR
++		       "wrapfs: remount flags 0x%x unsupported\n", *flags);
++		err = -EINVAL;
++	}
++
++	return err;
++}
++
++/*
++ * Called by iput() when the inode reference count reached zero
++ * and the inode is not hashed anywhere.  Used to clear anything
++ * that needs to be, before the inode is completely destroyed and put
++ * on the inode free list.
++ */
++static void wrapfs_evict_inode(struct inode *inode)
++{
++	struct inode *lower_inode;
++
++	truncate_inode_pages(&inode->i_data, 0);
++	end_writeback(inode);
++	/*
++	 * Decrement a reference to a lower_inode, which was incremented
++	 * by our read_inode when it was created initially.
++	 */
++	lower_inode = wrapfs_lower_inode(inode);
++	wrapfs_set_lower_inode(inode, NULL);
++	iput(lower_inode);
++}
++
++static struct inode *wrapfs_alloc_inode(struct super_block *sb)
++{
++	struct wrapfs_inode_info *i;
++
++	i = kmem_cache_alloc(wrapfs_inode_cachep, GFP_KERNEL);
++	if (!i)
++		return NULL;
++
++	/* memset everything up to the inode to 0 */
++	memset(i, 0, offsetof(struct wrapfs_inode_info, vfs_inode));
++
++	i->vfs_inode.i_version = 1;
++	return &i->vfs_inode;
++}
++
++static void wrapfs_destroy_inode(struct inode *inode)
++{
++	kmem_cache_free(wrapfs_inode_cachep, WRAPFS_I(inode));
++}
++
++/* wrapfs inode cache constructor */
++static void init_once(void *obj)
++{
++	struct wrapfs_inode_info *i = obj;
++
++	inode_init_once(&i->vfs_inode);
++}
++
++int wrapfs_init_inode_cache(void)
++{
++	int err = 0;
++
++	wrapfs_inode_cachep =
++		kmem_cache_create("wrapfs_inode_cache",
++				  sizeof(struct wrapfs_inode_info), 0,
++				  SLAB_RECLAIM_ACCOUNT, init_once);
++	if (!wrapfs_inode_cachep)
++		err = -ENOMEM;
++	return err;
++}
++
++/* wrapfs inode cache destructor */
++void wrapfs_destroy_inode_cache(void)
++{
++	if (wrapfs_inode_cachep)
++		kmem_cache_destroy(wrapfs_inode_cachep);
++}
++
++/*
++ * Used only in nfs, to kill any pending RPC tasks, so that subsequent
++ * code can actually succeed and won't leave tasks that need handling.
++ */
++static void wrapfs_umount_begin(struct super_block *sb)
++{
++	struct super_block *lower_sb;
++
++	lower_sb = wrapfs_lower_super(sb);
++	if (lower_sb && lower_sb->s_op && lower_sb->s_op->umount_begin)
++		lower_sb->s_op->umount_begin(lower_sb);
++}
++
++const struct super_operations wrapfs_sops = {
++	.put_super	= wrapfs_put_super,
++	.statfs		= wrapfs_statfs,
++	.remount_fs	= wrapfs_remount_fs,
++	.evict_inode	= wrapfs_evict_inode,
++	.umount_begin	= wrapfs_umount_begin,
++	.show_options	= generic_show_options,
++	.alloc_inode	= wrapfs_alloc_inode,
++	.destroy_inode	= wrapfs_destroy_inode,
++	.drop_inode	= generic_delete_inode,
++};
+diff --git a/fs/wrapfs/wrapfs.h b/fs/wrapfs/wrapfs.h
+new file mode 100644
+index 0000000..25b5795
+--- /dev/null
++++ b/fs/wrapfs/wrapfs.h
+@@ -0,0 +1,204 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#ifndef _WRAPFS_H_
++#define _WRAPFS_H_
++
++#include <linux/dcache.h>
++#include <linux/file.h>
++#include <linux/fs.h>
++#include <linux/mm.h>
++#include <linux/mount.h>
++#include <linux/namei.h>
++#include <linux/seq_file.h>
++#include <linux/statfs.h>
++#include <linux/fs_stack.h>
++#include <linux/magic.h>
++#include <linux/uaccess.h>
++#include <linux/slab.h>
++#include <linux/sched.h>
++
++/* the file system name */
++#define WRAPFS_NAME "wrapfs"
++
++/* wrapfs root inode number */
++#define WRAPFS_ROOT_INO     1
++
++/* useful for tracking code reachability */
++#define UDBG printk(KERN_DEFAULT "DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__)
++
++/* operations vectors defined in specific files */
++extern const struct file_operations wrapfs_main_fops;
++extern const struct file_operations wrapfs_dir_fops;
++extern const struct inode_operations wrapfs_main_iops;
++extern const struct inode_operations wrapfs_dir_iops;
++extern const struct inode_operations wrapfs_symlink_iops;
++extern const struct super_operations wrapfs_sops;
++extern const struct dentry_operations wrapfs_dops;
++extern const struct address_space_operations wrapfs_aops, wrapfs_dummy_aops;
++extern const struct vm_operations_struct wrapfs_vm_ops;
++
++extern int wrapfs_init_inode_cache(void);
++extern void wrapfs_destroy_inode_cache(void);
++extern int wrapfs_init_dentry_cache(void);
++extern void wrapfs_destroy_dentry_cache(void);
++extern int new_dentry_private_data(struct dentry *dentry);
++extern void free_dentry_private_data(struct dentry *dentry);
++extern struct dentry *wrapfs_lookup(struct inode *dir, struct dentry *dentry,
++				    struct nameidata *nd);
++extern struct inode *wrapfs_iget(struct super_block *sb,
++				 struct inode *lower_inode);
++extern int wrapfs_interpose(struct dentry *dentry, struct super_block *sb,
++			    struct path *lower_path);
++
++/* file private data */
++struct wrapfs_file_info {
++	struct file *lower_file;
++	const struct vm_operations_struct *lower_vm_ops;
++};
++
++/* wrapfs inode data in memory */
++struct wrapfs_inode_info {
++	struct inode *lower_inode;
++	struct inode vfs_inode;
++};
++
++/* wrapfs dentry data in memory */
++struct wrapfs_dentry_info {
++	spinlock_t lock;	/* protects lower_path */
++	struct path lower_path;
++};
++
++/* wrapfs super-block data in memory */
++struct wrapfs_sb_info {
++	struct super_block *lower_sb;
++};
++
++/*
++ * inode to private data
++ *
++ * Since we use containers and the struct inode is _inside_ the
++ * wrapfs_inode_info structure, WRAPFS_I will always (given a non-NULL
++ * inode pointer), return a valid non-NULL pointer.
++ */
++static inline struct wrapfs_inode_info *WRAPFS_I(const struct inode *inode)
++{
++	return container_of(inode, struct wrapfs_inode_info, vfs_inode);
++}
++
++/* dentry to private data */
++#define WRAPFS_D(dent) ((struct wrapfs_dentry_info *)(dent)->d_fsdata)
++
++/* superblock to private data */
++#define WRAPFS_SB(super) ((struct wrapfs_sb_info *)(super)->s_fs_info)
++
++/* file to private Data */
++#define WRAPFS_F(file) ((struct wrapfs_file_info *)((file)->private_data))
++
++/* file to lower file */
++static inline struct file *wrapfs_lower_file(const struct file *f)
++{
++	return WRAPFS_F(f)->lower_file;
++}
++
++static inline void wrapfs_set_lower_file(struct file *f, struct file *val)
++{
++	WRAPFS_F(f)->lower_file = val;
++}
++
++/* inode to lower inode. */
++static inline struct inode *wrapfs_lower_inode(const struct inode *i)
++{
++	return WRAPFS_I(i)->lower_inode;
++}
++
++static inline void wrapfs_set_lower_inode(struct inode *i, struct inode *val)
++{
++	WRAPFS_I(i)->lower_inode = val;
++}
++
++/* superblock to lower superblock */
++static inline struct super_block *wrapfs_lower_super(
++	const struct super_block *sb)
++{
++	return WRAPFS_SB(sb)->lower_sb;
++}
++
++static inline void wrapfs_set_lower_super(struct super_block *sb,
++					  struct super_block *val)
++{
++	WRAPFS_SB(sb)->lower_sb = val;
++}
++
++/* path based (dentry/mnt) macros */
++static inline void pathcpy(struct path *dst, const struct path *src)
++{
++	dst->dentry = src->dentry;
++	dst->mnt = src->mnt;
++}
++/* Returns struct path.  Caller must path_put it. */
++static inline void wrapfs_get_lower_path(const struct dentry *dent,
++					 struct path *lower_path)
++{
++	spin_lock(&WRAPFS_D(dent)->lock);
++	pathcpy(lower_path, &WRAPFS_D(dent)->lower_path);
++	path_get(lower_path);
++	spin_unlock(&WRAPFS_D(dent)->lock);
++	return;
++}
++static inline void wrapfs_put_lower_path(const struct dentry *dent,
++					 struct path *lower_path)
++{
++	path_put(lower_path);
++	return;
++}
++static inline void wrapfs_set_lower_path(const struct dentry *dent,
++					 struct path *lower_path)
++{
++	spin_lock(&WRAPFS_D(dent)->lock);
++	pathcpy(&WRAPFS_D(dent)->lower_path, lower_path);
++	spin_unlock(&WRAPFS_D(dent)->lock);
++	return;
++}
++static inline void wrapfs_reset_lower_path(const struct dentry *dent)
++{
++	spin_lock(&WRAPFS_D(dent)->lock);
++	WRAPFS_D(dent)->lower_path.dentry = NULL;
++	WRAPFS_D(dent)->lower_path.mnt = NULL;
++	spin_unlock(&WRAPFS_D(dent)->lock);
++	return;
++}
++static inline void wrapfs_put_reset_lower_path(const struct dentry *dent)
++{
++	struct path lower_path;
++	spin_lock(&WRAPFS_D(dent)->lock);
++	pathcpy(&lower_path, &WRAPFS_D(dent)->lower_path);
++	WRAPFS_D(dent)->lower_path.dentry = NULL;
++	WRAPFS_D(dent)->lower_path.mnt = NULL;
++	spin_unlock(&WRAPFS_D(dent)->lock);
++	path_put(&lower_path);
++	return;
++}
++
++/* locking helpers */
++static inline struct dentry *lock_parent(struct dentry *dentry)
++{
++	struct dentry *dir = dget_parent(dentry);
++	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
++	return dir;
++}
++
++static inline void unlock_dir(struct dentry *dir)
++{
++	mutex_unlock(&dir->d_inode->i_mutex);
++	dput(dir);
++}
++#endif	/* not _WRAPFS_H_ */
+diff --git a/include/linux/magic.h b/include/linux/magic.h
+index 2d4beab..8ef0170 100644
+--- a/include/linux/magic.h
++++ b/include/linux/magic.h
+@@ -50,6 +50,8 @@
+ #define REISER2FS_SUPER_MAGIC_STRING	"ReIsEr2Fs"
+ #define REISER2FS_JR_SUPER_MAGIC_STRING	"ReIsEr3Fs"
+ 
++#define WRAPFS_SUPER_MAGIC	0xb550ca10
++
+ #define SMB_SUPER_MAGIC		0x517B
+ #define USBDEVICE_SUPER_MAGIC	0x9fa2
+ #define CGROUP_SUPER_MAGIC	0x27e0eb
diff --git a/3.3.8/0001-AppArmor-compatibility-patch-for-v5-network-controll.patch b/3.3.8/0001-AppArmor-compatibility-patch-for-v5-network-controll.patch
new file mode 100644
index 0000000..00c8712
--- /dev/null
+++ b/3.3.8/0001-AppArmor-compatibility-patch-for-v5-network-controll.patch
@@ -0,0 +1,553 @@
+From dc13dec93dbd04bfa7a9ba67df1b8ed3431d8d48 Mon Sep 17 00:00:00 2001
+From: John Johansen <john.johansen@canonical.com>
+Date: Wed, 10 Aug 2011 22:02:39 -0700
+Subject: [PATCH 1/3] AppArmor: compatibility patch for v5 network controll
+
+Add compatibility for v5 network rules.
+
+Signed-off-by: John Johansen <john.johansen@canonical.com>
+---
+ include/linux/lsm_audit.h          |    4 +
+ security/apparmor/Makefile         |   19 ++++-
+ security/apparmor/include/net.h    |   40 +++++++++
+ security/apparmor/include/policy.h |    3 +
+ security/apparmor/lsm.c            |  112 +++++++++++++++++++++++
+ security/apparmor/net.c            |  170 ++++++++++++++++++++++++++++++++++++
+ security/apparmor/policy.c         |    1 +
+ security/apparmor/policy_unpack.c  |   48 ++++++++++-
+ 8 files changed, 394 insertions(+), 3 deletions(-)
+ create mode 100644 security/apparmor/include/net.h
+ create mode 100644 security/apparmor/net.c
+
+diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
+index 88e78de..c63979a 100644
+--- a/include/linux/lsm_audit.h
++++ b/include/linux/lsm_audit.h
+@@ -124,6 +124,10 @@ struct common_audit_data {
+ 					u32 denied;
+ 					uid_t ouid;
+ 				} fs;
++				struct {
++					int type, protocol;
++					struct sock *sk;
++				} net;
+ 			};
+ 		} apparmor_audit_data;
+ #endif
+diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
+index 2dafe50..7cefef9 100644
+--- a/security/apparmor/Makefile
++++ b/security/apparmor/Makefile
+@@ -4,9 +4,9 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o
+ 
+ apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \
+               path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
+-              resource.o sid.o file.o
++              resource.o sid.o file.o net.o
+ 
+-clean-files := capability_names.h rlim_names.h
++clean-files := capability_names.h rlim_names.h af_names.h
+ 
+ 
+ # Build a lower case string table of capability names
+@@ -44,9 +44,24 @@ cmd_make-rlim = echo "static const char *rlim_names[] = {" > $@ ;\
+ 	sed -r -n "s/^\# ?define[ \t]+(RLIMIT_[A-Z0-9_]+).*/\1,/p" $< >> $@ ;\
+ 	echo "};" >> $@
+ 
++# Build a lower case string table of address family names.
++# Transform lines from
++# #define AF_INET		2	/* Internet IP Protocol 	*/
++# to
++# [2] = "inet",
++quiet_cmd_make-af = GEN     $@
++cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\
++	sed $< >> $@ -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e \
++	  's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+).*/[\2] = "\L\1",/p';\
++	echo "};" >> $@
++
++
+ $(obj)/capability.o : $(obj)/capability_names.h
+ $(obj)/resource.o : $(obj)/rlim_names.h
++$(obj)/net.o : $(obj)/af_names.h
+ $(obj)/capability_names.h : $(srctree)/include/linux/capability.h
+ 	$(call cmd,make-caps)
+ $(obj)/rlim_names.h : $(srctree)/include/asm-generic/resource.h
+ 	$(call cmd,make-rlim)
++$(obj)/af_names.h : $(srctree)/include/linux/socket.h
++	$(call cmd,make-af)
+\ No newline at end of file
+diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h
+new file mode 100644
+index 0000000..3c7d599
+--- /dev/null
++++ b/security/apparmor/include/net.h
+@@ -0,0 +1,40 @@
++/*
++ * AppArmor security module
++ *
++ * This file contains AppArmor network mediation definitions.
++ *
++ * Copyright (C) 1998-2008 Novell/SUSE
++ * Copyright 2009-2010 Canonical Ltd.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation, version 2 of the
++ * License.
++ */
++
++#ifndef __AA_NET_H
++#define __AA_NET_H
++
++#include <net/sock.h>
++
++/* struct aa_net - network confinement data
++ * @allowed: basic network families permissions
++ * @audit_network: which network permissions to force audit
++ * @quiet_network: which network permissions to quiet rejects
++ */
++struct aa_net {
++	u16 allow[AF_MAX];
++	u16 audit[AF_MAX];
++	u16 quiet[AF_MAX];
++};
++
++extern int aa_net_perm(int op, struct aa_profile *profile, u16 family,
++		       int type, int protocol, struct sock *sk);
++extern int aa_revalidate_sk(int op, struct sock *sk);
++
++static inline void aa_free_net_rules(struct aa_net *new)
++{
++	/* NOP */
++}
++
++#endif /* __AA_NET_H */
+diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
+index aeda5cf..6776929 100644
+--- a/security/apparmor/include/policy.h
++++ b/security/apparmor/include/policy.h
+@@ -27,6 +27,7 @@
+ #include "capability.h"
+ #include "domain.h"
+ #include "file.h"
++#include "net.h"
+ #include "resource.h"
+ 
+ extern const char *profile_mode_names[];
+@@ -145,6 +146,7 @@ struct aa_namespace {
+  * @size: the memory consumed by this profiles rules
+  * @file: The set of rules governing basic file access and domain transitions
+  * @caps: capabilities for the profile
++ * @net: network controls for the profile
+  * @rlimits: rlimits for the profile
+  *
+  * The AppArmor profile contains the basic confinement data.  Each profile
+@@ -181,6 +183,7 @@ struct aa_profile {
+ 
+ 	struct aa_file_rules file;
+ 	struct aa_caps caps;
++	struct aa_net net;
+ 	struct aa_rlimit rlimits;
+ };
+ 
+diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
+index 3d2fd14..aa293ae 100644
+--- a/security/apparmor/lsm.c
++++ b/security/apparmor/lsm.c
+@@ -32,6 +32,7 @@
+ #include "include/context.h"
+ #include "include/file.h"
+ #include "include/ipc.h"
++#include "include/net.h"
+ #include "include/path.h"
+ #include "include/policy.h"
+ #include "include/procattr.h"
+@@ -621,6 +622,104 @@ static int apparmor_task_setrlimit(struct task_struct *task,
+ 	return error;
+ }
+ 
++static int apparmor_socket_create(int family, int type, int protocol, int kern)
++{
++	struct aa_profile *profile;
++	int error = 0;
++
++	if (kern)
++		return 0;
++
++	profile = __aa_current_profile();
++	if (!unconfined(profile))
++		error = aa_net_perm(OP_CREATE, profile, family, type, protocol,
++				    NULL);
++	return error;
++}
++
++static int apparmor_socket_bind(struct socket *sock,
++				struct sockaddr *address, int addrlen)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_BIND, sk);
++}
++
++static int apparmor_socket_connect(struct socket *sock,
++				   struct sockaddr *address, int addrlen)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_CONNECT, sk);
++}
++
++static int apparmor_socket_listen(struct socket *sock, int backlog)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_LISTEN, sk);
++}
++
++static int apparmor_socket_accept(struct socket *sock, struct socket *newsock)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_ACCEPT, sk);
++}
++
++static int apparmor_socket_sendmsg(struct socket *sock,
++				   struct msghdr *msg, int size)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_SENDMSG, sk);
++}
++
++static int apparmor_socket_recvmsg(struct socket *sock,
++				   struct msghdr *msg, int size, int flags)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_RECVMSG, sk);
++}
++
++static int apparmor_socket_getsockname(struct socket *sock)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_GETSOCKNAME, sk);
++}
++
++static int apparmor_socket_getpeername(struct socket *sock)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_GETPEERNAME, sk);
++}
++
++static int apparmor_socket_getsockopt(struct socket *sock, int level,
++				      int optname)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_GETSOCKOPT, sk);
++}
++
++static int apparmor_socket_setsockopt(struct socket *sock, int level,
++				      int optname)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_SETSOCKOPT, sk);
++}
++
++static int apparmor_socket_shutdown(struct socket *sock, int how)
++{
++	struct sock *sk = sock->sk;
++
++	return aa_revalidate_sk(OP_SOCK_SHUTDOWN, sk);
++}
++
+ static struct security_operations apparmor_ops = {
+ 	.name =				"apparmor",
+ 
+@@ -652,6 +751,19 @@ static struct security_operations apparmor_ops = {
+ 	.getprocattr =			apparmor_getprocattr,
+ 	.setprocattr =			apparmor_setprocattr,
+ 
++	.socket_create =		apparmor_socket_create,
++	.socket_bind =			apparmor_socket_bind,
++	.socket_connect =		apparmor_socket_connect,
++	.socket_listen =		apparmor_socket_listen,
++	.socket_accept =		apparmor_socket_accept,
++	.socket_sendmsg =		apparmor_socket_sendmsg,
++	.socket_recvmsg =		apparmor_socket_recvmsg,
++	.socket_getsockname =		apparmor_socket_getsockname,
++	.socket_getpeername =		apparmor_socket_getpeername,
++	.socket_getsockopt =		apparmor_socket_getsockopt,
++	.socket_setsockopt =		apparmor_socket_setsockopt,
++	.socket_shutdown =		apparmor_socket_shutdown,
++
+ 	.cred_alloc_blank =		apparmor_cred_alloc_blank,
+ 	.cred_free =			apparmor_cred_free,
+ 	.cred_prepare =			apparmor_cred_prepare,
+diff --git a/security/apparmor/net.c b/security/apparmor/net.c
+new file mode 100644
+index 0000000..1765901
+--- /dev/null
++++ b/security/apparmor/net.c
+@@ -0,0 +1,170 @@
++/*
++ * AppArmor security module
++ *
++ * This file contains AppArmor network mediation
++ *
++ * Copyright (C) 1998-2008 Novell/SUSE
++ * Copyright 2009-2010 Canonical Ltd.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation, version 2 of the
++ * License.
++ */
++
++#include "include/apparmor.h"
++#include "include/audit.h"
++#include "include/context.h"
++#include "include/net.h"
++#include "include/policy.h"
++
++#include "af_names.h"
++
++static const char *sock_type_names[] = {
++	"unknown(0)",
++	"stream",
++	"dgram",
++	"raw",
++	"rdm",
++	"seqpacket",
++	"dccp",
++	"unknown(7)",
++	"unknown(8)",
++	"unknown(9)",
++	"packet",
++};
++
++/* audit callback for net specific fields */
++static void audit_cb(struct audit_buffer *ab, void *va)
++{
++	struct common_audit_data *sa = va;
++
++	audit_log_format(ab, " family=");
++	if (address_family_names[sa->u.net.family]) {
++		audit_log_string(ab, address_family_names[sa->u.net.family]);
++	} else {
++		audit_log_format(ab, " \"unknown(%d)\"", sa->u.net.family);
++	}
++
++	audit_log_format(ab, " sock_type=");
++	if (sock_type_names[sa->aad.net.type]) {
++		audit_log_string(ab, sock_type_names[sa->aad.net.type]);
++	} else {
++		audit_log_format(ab, "\"unknown(%d)\"", sa->aad.net.type);
++	}
++
++	audit_log_format(ab, " protocol=%d", sa->aad.net.protocol);
++}
++
++/**
++ * audit_net - audit network access
++ * @profile: profile being enforced  (NOT NULL)
++ * @op: operation being checked
++ * @family: network family
++ * @type:   network type
++ * @protocol: network protocol
++ * @sk: socket auditing is being applied to
++ * @error: error code for failure else 0
++ *
++ * Returns: %0 or sa->error else other errorcode on failure
++ */
++static int audit_net(struct aa_profile *profile, int op, u16 family, int type,
++		     int protocol, struct sock *sk, int error)
++{
++	int audit_type = AUDIT_APPARMOR_AUTO;
++	struct common_audit_data sa;
++	if (sk) {
++		COMMON_AUDIT_DATA_INIT(&sa, NET);
++	} else {
++		COMMON_AUDIT_DATA_INIT(&sa, NONE);
++	}
++	/* todo fill in socket addr info */
++
++	sa.aad.op = op,
++	sa.u.net.family = family;
++	sa.u.net.sk = sk;
++	sa.aad.net.type = type;
++	sa.aad.net.protocol = protocol;
++	sa.aad.error = error;
++
++	if (likely(!sa.aad.error)) {
++		u16 audit_mask = profile->net.audit[sa.u.net.family];
++		if (likely((AUDIT_MODE(profile) != AUDIT_ALL) &&
++			   !(1 << sa.aad.net.type & audit_mask)))
++			return 0;
++		audit_type = AUDIT_APPARMOR_AUDIT;
++	} else {
++		u16 quiet_mask = profile->net.quiet[sa.u.net.family];
++		u16 kill_mask = 0;
++		u16 denied = (1 << sa.aad.net.type) & ~quiet_mask;
++
++		if (denied & kill_mask)
++			audit_type = AUDIT_APPARMOR_KILL;
++
++		if ((denied & quiet_mask) &&
++		    AUDIT_MODE(profile) != AUDIT_NOQUIET &&
++		    AUDIT_MODE(profile) != AUDIT_ALL)
++			return COMPLAIN_MODE(profile) ? 0 : sa.aad.error;
++	}
++
++	return aa_audit(audit_type, profile, GFP_KERNEL, &sa, audit_cb);
++}
++
++/**
++ * aa_net_perm - very course network access check
++ * @op: operation being checked
++ * @profile: profile being enforced  (NOT NULL)
++ * @family: network family
++ * @type:   network type
++ * @protocol: network protocol
++ *
++ * Returns: %0 else error if permission denied
++ */
++int aa_net_perm(int op, struct aa_profile *profile, u16 family, int type,
++		int protocol, struct sock *sk)
++{
++	u16 family_mask;
++	int error;
++
++	if ((family < 0) || (family >= AF_MAX))
++		return -EINVAL;
++
++	if ((type < 0) || (type >= SOCK_MAX))
++		return -EINVAL;
++
++	/* unix domain and netlink sockets are handled by ipc */
++	if (family == AF_UNIX || family == AF_NETLINK)
++		return 0;
++
++	family_mask = profile->net.allow[family];
++
++	error = (family_mask & (1 << type)) ? 0 : -EACCES;
++
++	return audit_net(profile, op, family, type, protocol, sk, error);
++}
++
++/**
++ * aa_revalidate_sk - Revalidate access to a sock
++ * @op: operation being checked
++ * @sk: sock being revalidated  (NOT NULL)
++ *
++ * Returns: %0 else error if permission denied
++ */
++int aa_revalidate_sk(int op, struct sock *sk)
++{
++	struct aa_profile *profile;
++	int error = 0;
++
++	/* aa_revalidate_sk should not be called from interrupt context
++	 * don't mediate these calls as they are not task related
++	 */
++	if (in_interrupt())
++		return 0;
++
++	profile = __aa_current_profile();
++	if (!unconfined(profile))
++		error = aa_net_perm(op, profile, sk->sk_family, sk->sk_type,
++				    sk->sk_protocol, sk);
++
++	return error;
++}
+diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
+index 4f0eade..4d5ce13 100644
+--- a/security/apparmor/policy.c
++++ b/security/apparmor/policy.c
+@@ -745,6 +745,7 @@ static void free_profile(struct aa_profile *profile)
+ 
+ 	aa_free_file_rules(&profile->file);
+ 	aa_free_cap_rules(&profile->caps);
++	aa_free_net_rules(&profile->net);
+ 	aa_free_rlimit_rules(&profile->rlimits);
+ 
+ 	aa_free_sid(profile->sid);
+diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
+index d6d9a57..f4874c4 100644
+--- a/security/apparmor/policy_unpack.c
++++ b/security/apparmor/policy_unpack.c
+@@ -190,6 +190,19 @@ fail:
+ 	return 0;
+ }
+ 
++static bool unpack_u16(struct aa_ext *e, u16 *data, const char *name)
++{
++	if (unpack_nameX(e, AA_U16, name)) {
++		if (!inbounds(e, sizeof(u16)))
++			return 0;
++		if (data)
++			*data = le16_to_cpu(get_unaligned((u16 *) e->pos));
++		e->pos += sizeof(u16);
++		return 1;
++	}
++	return 0;
++}
++
+ static bool unpack_u32(struct aa_ext *e, u32 *data, const char *name)
+ {
+ 	if (unpack_nameX(e, AA_U32, name)) {
+@@ -468,7 +481,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e)
+ {
+ 	struct aa_profile *profile = NULL;
+ 	const char *name = NULL;
+-	int error = -EPROTO;
++	size_t size = 0;
++	int i, error = -EPROTO;
+ 	kernel_cap_t tmpcap;
+ 	u32 tmp;
+ 
+@@ -559,6 +573,38 @@ static struct aa_profile *unpack_profile(struct aa_ext *e)
+ 	if (!unpack_rlimits(e, profile))
+ 		goto fail;
+ 
++	size = unpack_array(e, "net_allowed_af");
++	if (size) {
++
++		for (i = 0; i < size; i++) {
++			/* discard extraneous rules that this kernel will
++			 * never request
++			 */
++			if (i >= AF_MAX) {
++				u16 tmp;
++				if (!unpack_u16(e, &tmp, NULL) ||
++				    !unpack_u16(e, &tmp, NULL) ||
++				    !unpack_u16(e, &tmp, NULL))
++					goto fail;
++				continue;
++			}
++			if (!unpack_u16(e, &profile->net.allow[i], NULL))
++				goto fail;
++			if (!unpack_u16(e, &profile->net.audit[i], NULL))
++				goto fail;
++			if (!unpack_u16(e, &profile->net.quiet[i], NULL))
++				goto fail;
++		}
++		if (!unpack_nameX(e, AA_ARRAYEND, NULL))
++			goto fail;
++		/*
++		 * allow unix domain and netlink sockets they are handled
++		 * by IPC
++		 */
++	}
++	profile->net.allow[AF_UNIX] = 0xffff;
++	profile->net.allow[AF_NETLINK] = 0xffff;
++
+ 	/* get file rules */
+ 	profile->file.dfa = unpack_dfa(e);
+ 	if (IS_ERR(profile->file.dfa)) {
+-- 
+1.7.5.4
+
diff --git a/3.3.8/0001-block-cgroups-kconfig-build-bits-for-BFQ-v5-3.3.patch b/3.3.8/0001-block-cgroups-kconfig-build-bits-for-BFQ-v5-3.3.patch
new file mode 100644
index 0000000..0ea5a23
--- /dev/null
+++ b/3.3.8/0001-block-cgroups-kconfig-build-bits-for-BFQ-v5-3.3.patch
@@ -0,0 +1,99 @@
+From 2ddeb856c43f60139fc5c8e2ed9bc350b9bed590 Mon Sep 17 00:00:00 2001
+From: Arianna Avanzini <avanzini.arianna@gmail.com>
+Date: Sat, 4 Feb 2012 10:55:51 +0100
+Subject: [PATCH 1/2] block: cgroups, kconfig, build bits for BFQ-v5-3.3
+
+Update Kconfig.iosched to include kernel configuration options
+for BFQ. Add a Kconfig option and do the related Makefile changes
+to compile the scheduler. Also let the cgroups subsystem know about
+the BFQ I/O controller.
+
+Signed-off-by: Fabio Checconi <fabio@gandalf.sssup.it>
+Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
+Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
+---
+ block/Kconfig.iosched         |   26 ++++++++++++++++++++++++++
+ block/Makefile                |    1 +
+ include/linux/cgroup_subsys.h |    6 ++++++
+ 3 files changed, 33 insertions(+)
+
+diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
+index 3199b76..5905452 100644
+--- a/block/Kconfig.iosched
++++ b/block/Kconfig.iosched
+@@ -43,6 +43,28 @@ config CFQ_GROUP_IOSCHED
+ 	---help---
+ 	  Enable group IO scheduling in CFQ.
+ 
++config IOSCHED_BFQ
++	tristate "BFQ I/O scheduler"
++	depends on EXPERIMENTAL
++	default n
++	---help---
++	  The BFQ I/O scheduler tries to distribute bandwidth among
++	  all processes according to their weights.
++	  It aims at distributing the bandwidth as desired, independently of
++	  the disk parameters and with any workload. It also tries to
++	  guarantee low latency to interactive and soft real-time
++	  applications.  If compiled built-in (saying Y here), BFQ can
++	  be configured to support hierarchical scheduling.
++
++config CGROUP_BFQIO
++	bool "BFQ hierarchical scheduling support"
++	depends on CGROUPS && IOSCHED_BFQ=y
++	default n
++	---help---
++	  Enable hierarchical scheduling in BFQ, using the cgroups
++	  filesystem interface.  The name of the subsystem will be
++	  bfqio.
++
+ choice
+ 	prompt "Default I/O scheduler"
+ 	default DEFAULT_CFQ
+@@ -56,6 +78,9 @@ choice
+ 	config DEFAULT_CFQ
+ 		bool "CFQ" if IOSCHED_CFQ=y
+ 
++	config DEFAULT_BFQ
++		bool "BFQ" if IOSCHED_BFQ=y
++
+ 	config DEFAULT_NOOP
+ 		bool "No-op"
+ 
+@@ -65,6 +90,7 @@ config DEFAULT_IOSCHED
+ 	string
+ 	default "deadline" if DEFAULT_DEADLINE
+ 	default "cfq" if DEFAULT_CFQ
++	default "bfq" if DEFAULT_BFQ
+ 	default "noop" if DEFAULT_NOOP
+ 
+ endmenu
+diff --git a/block/Makefile b/block/Makefile
+index 39b76ba..c0d20fa 100644
+--- a/block/Makefile
++++ b/block/Makefile
+@@ -15,6 +15,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
+ obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
+ obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
+ obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
++obj-$(CONFIG_IOSCHED_BFQ)	+= bfq-iosched.o
+ 
+ obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o
+ obj-$(CONFIG_BLK_DEV_INTEGRITY)	+= blk-integrity.o
+diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
+index 0bd390c..cbf22b1 100644
+--- a/include/linux/cgroup_subsys.h
++++ b/include/linux/cgroup_subsys.h
+@@ -72,3 +72,9 @@ SUBSYS(net_prio)
+ #endif
+ 
+ /* */
++
++#ifdef CONFIG_CGROUP_BFQIO
++SUBSYS(bfqio)
++#endif
++
++/* */
+-- 
+1.7.10.4
+
diff --git a/3.3.8/0002-AppArmor-compatibility-patch-for-v5-interface.patch b/3.3.8/0002-AppArmor-compatibility-patch-for-v5-interface.patch
new file mode 100644
index 0000000..10d4640
--- /dev/null
+++ b/3.3.8/0002-AppArmor-compatibility-patch-for-v5-interface.patch
@@ -0,0 +1,391 @@
+From a2515f25ad5a7833ddc5a032d34eee6a5ddee3a2 Mon Sep 17 00:00:00 2001
+From: John Johansen <john.johansen@canonical.com>
+Date: Wed, 10 Aug 2011 22:02:40 -0700
+Subject: [PATCH 2/3] AppArmor: compatibility patch for v5 interface
+
+Signed-off-by: John Johansen <john.johansen@canonical.com>
+---
+ security/apparmor/Kconfig              |    9 +
+ security/apparmor/Makefile             |    1 +
+ security/apparmor/apparmorfs-24.c      |  287 ++++++++++++++++++++++++++++++++
+ security/apparmor/apparmorfs.c         |   18 ++-
+ security/apparmor/include/apparmorfs.h |    6 +
+ 5 files changed, 319 insertions(+), 2 deletions(-)
+ create mode 100644 security/apparmor/apparmorfs-24.c
+
+diff --git a/security/apparmor/Kconfig b/security/apparmor/Kconfig
+index 9b9013b..51ebf96 100644
+--- a/security/apparmor/Kconfig
++++ b/security/apparmor/Kconfig
+@@ -29,3 +29,12 @@ config SECURITY_APPARMOR_BOOTPARAM_VALUE
+ 	  boot.
+ 
+ 	  If you are unsure how to answer this question, answer 1.
++
++config SECURITY_APPARMOR_COMPAT_24
++	bool "Enable AppArmor 2.4 compatability"
++	depends on SECURITY_APPARMOR
++	default y
++	help
++	  This option enables compatability with AppArmor 2.4.  It is
++          recommended if compatability with older versions of AppArmor
++          is desired.
+diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
+index 7cefef9..0bb604b 100644
+--- a/security/apparmor/Makefile
++++ b/security/apparmor/Makefile
+@@ -5,6 +5,7 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o
+ apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \
+               path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
+               resource.o sid.o file.o net.o
++apparmor-$(CONFIG_SECURITY_APPARMOR_COMPAT_24) += apparmorfs-24.o
+ 
+ clean-files := capability_names.h rlim_names.h af_names.h
+ 
+diff --git a/security/apparmor/apparmorfs-24.c b/security/apparmor/apparmorfs-24.c
+new file mode 100644
+index 0000000..dc8c744
+--- /dev/null
++++ b/security/apparmor/apparmorfs-24.c
+@@ -0,0 +1,287 @@
++/*
++ * AppArmor security module
++ *
++ * This file contains AppArmor /sys/kernel/secrutiy/apparmor interface functions
++ *
++ * Copyright (C) 1998-2008 Novell/SUSE
++ * Copyright 2009-2010 Canonical Ltd.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation, version 2 of the
++ * License.
++ *
++ *
++ * This file contain functions providing an interface for <= AppArmor 2.4
++ * compatibility.  It is dependent on CONFIG_SECURITY_APPARMOR_COMPAT_24
++ * being set (see Makefile).
++ */
++
++#include <linux/security.h>
++#include <linux/vmalloc.h>
++#include <linux/module.h>
++#include <linux/seq_file.h>
++#include <linux/uaccess.h>
++#include <linux/namei.h>
++
++#include "include/apparmor.h"
++#include "include/audit.h"
++#include "include/context.h"
++#include "include/policy.h"
++
++
++/* apparmor/matching */
++static ssize_t aa_matching_read(struct file *file, char __user *buf,
++				size_t size, loff_t *ppos)
++{
++	const char matching[] = "pattern=aadfa audit perms=crwxamlk/ "
++	    "user::other";
++
++	return simple_read_from_buffer(buf, size, ppos, matching,
++				       sizeof(matching) - 1);
++}
++
++const struct file_operations aa_fs_matching_fops = {
++	.read = aa_matching_read,
++};
++
++/* apparmor/features */
++static ssize_t aa_features_read(struct file *file, char __user *buf,
++				size_t size, loff_t *ppos)
++{
++	const char features[] = "file=3.1 capability=2.0 network=1.0 "
++	    "change_hat=1.5 change_profile=1.1 " "aanamespaces=1.1 rlimit=1.1";
++
++	return simple_read_from_buffer(buf, size, ppos, features,
++				       sizeof(features) - 1);
++}
++
++const struct file_operations aa_fs_features_fops = {
++	.read = aa_features_read,
++};
++
++/**
++ * __next_namespace - find the next namespace to list
++ * @root: root namespace to stop search at (NOT NULL)
++ * @ns: current ns position (NOT NULL)
++ *
++ * Find the next namespace from @ns under @root and handle all locking needed
++ * while switching current namespace.
++ *
++ * Returns: next namespace or NULL if at last namespace under @root
++ * NOTE: will not unlock root->lock
++ */
++static struct aa_namespace *__next_namespace(struct aa_namespace *root,
++					     struct aa_namespace *ns)
++{
++	struct aa_namespace *parent;
++
++	/* is next namespace a child */
++	if (!list_empty(&ns->sub_ns)) {
++		struct aa_namespace *next;
++		next = list_first_entry(&ns->sub_ns, typeof(*ns), base.list);
++		read_lock(&next->lock);
++		return next;
++	}
++
++	/* check if the next ns is a sibling, parent, gp, .. */
++	parent = ns->parent;
++	while (parent) {
++		read_unlock(&ns->lock);
++		list_for_each_entry_continue(ns, &parent->sub_ns, base.list) {
++			read_lock(&ns->lock);
++			return ns;
++		}
++		if (parent == root)
++			return NULL;
++		ns = parent;
++		parent = parent->parent;
++	}
++
++	return NULL;
++}
++
++/**
++ * __first_profile - find the first profile in a namespace
++ * @root: namespace that is root of profiles being displayed (NOT NULL)
++ * @ns: namespace to start in   (NOT NULL)
++ *
++ * Returns: unrefcounted profile or NULL if no profile
++ */
++static struct aa_profile *__first_profile(struct aa_namespace *root,
++					  struct aa_namespace *ns)
++{
++	for ( ; ns; ns = __next_namespace(root, ns)) {
++		if (!list_empty(&ns->base.profiles))
++			return list_first_entry(&ns->base.profiles,
++						struct aa_profile, base.list);
++	}
++	return NULL;
++}
++
++/**
++ * __next_profile - step to the next profile in a profile tree
++ * @profile: current profile in tree (NOT NULL)
++ *
++ * Perform a depth first taversal on the profile tree in a namespace
++ *
++ * Returns: next profile or NULL if done
++ * Requires: profile->ns.lock to be held
++ */
++static struct aa_profile *__next_profile(struct aa_profile *p)
++{
++	struct aa_profile *parent;
++	struct aa_namespace *ns = p->ns;
++
++	/* is next profile a child */
++	if (!list_empty(&p->base.profiles))
++		return list_first_entry(&p->base.profiles, typeof(*p),
++					base.list);
++
++	/* is next profile a sibling, parent sibling, gp, subling, .. */
++	parent = p->parent;
++	while (parent) {
++		list_for_each_entry_continue(p, &parent->base.profiles,
++					     base.list)
++				return p;
++		p = parent;
++		parent = parent->parent;
++	}
++
++	/* is next another profile in the namespace */
++	list_for_each_entry_continue(p, &ns->base.profiles, base.list)
++		return p;
++
++	return NULL;
++}
++
++/**
++ * next_profile - step to the next profile in where ever it may be
++ * @root: root namespace  (NOT NULL)
++ * @profile: current profile  (NOT NULL)
++ *
++ * Returns: next profile or NULL if there isn't one
++ */
++static struct aa_profile *next_profile(struct aa_namespace *root,
++				       struct aa_profile *profile)
++{
++	struct aa_profile *next = __next_profile(profile);
++	if (next)
++		return next;
++
++	/* finished all profiles in namespace move to next namespace */
++	return __first_profile(root, __next_namespace(root, profile->ns));
++}
++
++/**
++ * p_start - start a depth first traversal of profile tree
++ * @f: seq_file to fill
++ * @pos: current position
++ *
++ * Returns: first profile under current namespace or NULL if none found
++ *
++ * acquires first ns->lock
++ */
++static void *p_start(struct seq_file *f, loff_t *pos)
++	__acquires(root->lock)
++{
++	struct aa_profile *profile = NULL;
++	struct aa_namespace *root = aa_current_profile()->ns;
++	loff_t l = *pos;
++	f->private = aa_get_namespace(root);
++
++
++	/* find the first profile */
++	read_lock(&root->lock);
++	profile = __first_profile(root, root);
++
++	/* skip to position */
++	for (; profile && l > 0; l--)
++		profile = next_profile(root, profile);
++
++	return profile;
++}
++
++/**
++ * p_next - read the next profile entry
++ * @f: seq_file to fill
++ * @p: profile previously returned
++ * @pos: current position
++ *
++ * Returns: next profile after @p or NULL if none
++ *
++ * may acquire/release locks in namespace tree as necessary
++ */
++static void *p_next(struct seq_file *f, void *p, loff_t *pos)
++{
++	struct aa_profile *profile = p;
++	struct aa_namespace *root = f->private;
++	(*pos)++;
++
++	return next_profile(root, profile);
++}
++
++/**
++ * p_stop - stop depth first traversal
++ * @f: seq_file we are filling
++ * @p: the last profile writen
++ *
++ * Release all locking done by p_start/p_next on namespace tree
++ */
++static void p_stop(struct seq_file *f, void *p)
++	__releases(root->lock)
++{
++	struct aa_profile *profile = p;
++	struct aa_namespace *root = f->private, *ns;
++
++	if (profile) {
++		for (ns = profile->ns; ns && ns != root; ns = ns->parent)
++			read_unlock(&ns->lock);
++	}
++	read_unlock(&root->lock);
++	aa_put_namespace(root);
++}
++
++/**
++ * seq_show_profile - show a profile entry
++ * @f: seq_file to file
++ * @p: current position (profile)    (NOT NULL)
++ *
++ * Returns: error on failure
++ */
++static int seq_show_profile(struct seq_file *f, void *p)
++{
++	struct aa_profile *profile = (struct aa_profile *)p;
++	struct aa_namespace *root = f->private;
++
++	if (profile->ns != root)
++		seq_printf(f, ":%s://", aa_ns_name(root, profile->ns));
++	seq_printf(f, "%s (%s)\n", profile->base.hname,
++		   COMPLAIN_MODE(profile) ? "complain" : "enforce");
++
++	return 0;
++}
++
++static const struct seq_operations aa_fs_profiles_op = {
++	.start = p_start,
++	.next = p_next,
++	.stop = p_stop,
++	.show = seq_show_profile,
++};
++
++static int profiles_open(struct inode *inode, struct file *file)
++{
++	return seq_open(file, &aa_fs_profiles_op);
++}
++
++static int profiles_release(struct inode *inode, struct file *file)
++{
++	return seq_release(inode, file);
++}
++
++const struct file_operations aa_fs_profiles_fops = {
++	.open = profiles_open,
++	.read = seq_read,
++	.llseek = seq_lseek,
++	.release = profiles_release,
++};
+diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
+index 0848292..28c52ac 100644
+--- a/security/apparmor/apparmorfs.c
++++ b/security/apparmor/apparmorfs.c
+@@ -187,7 +187,11 @@ void __init aa_destroy_aafs(void)
+ 		aafs_remove(".remove");
+ 		aafs_remove(".replace");
+ 		aafs_remove(".load");
+-
++#ifdef CONFIG_SECURITY_APPARMOR_COMPAT_24
++		aafs_remove("profiles");
++		aafs_remove("matching");
++		aafs_remove("features");
++#endif
+ 		securityfs_remove(aa_fs_dentry);
+ 		aa_fs_dentry = NULL;
+ 	}
+@@ -218,7 +222,17 @@ int __init aa_create_aafs(void)
+ 		aa_fs_dentry = NULL;
+ 		goto error;
+ 	}
+-
++#ifdef CONFIG_SECURITY_APPARMOR_COMPAT_24
++	error = aafs_create("matching", 0444, &aa_fs_matching_fops);
++	if (error)
++		goto error;
++	error = aafs_create("features", 0444, &aa_fs_features_fops);
++	if (error)
++		goto error;
++#endif
++	error = aafs_create("profiles", 0440, &aa_fs_profiles_fops);
++	if (error)
++		goto error;
+ 	error = aafs_create(".load", 0640, &aa_fs_profile_load);
+ 	if (error)
+ 		goto error;
+diff --git a/security/apparmor/include/apparmorfs.h b/security/apparmor/include/apparmorfs.h
+index cb1e93a..14f955c 100644
+--- a/security/apparmor/include/apparmorfs.h
++++ b/security/apparmor/include/apparmorfs.h
+@@ -17,4 +17,10 @@
+ 
+ extern void __init aa_destroy_aafs(void);
+ 
++#ifdef CONFIG_SECURITY_APPARMOR_COMPAT_24
++extern const struct file_operations aa_fs_matching_fops;
++extern const struct file_operations aa_fs_features_fops;
++extern const struct file_operations aa_fs_profiles_fops;
++#endif
++
+ #endif /* __AA_APPARMORFS_H */
+-- 
+1.7.5.4
+
diff --git a/3.3.8/0002-block-introduce-the-BFQ-v5-I-O-sched-for-3.3.patch b/3.3.8/0002-block-introduce-the-BFQ-v5-I-O-sched-for-3.3.patch
new file mode 100644
index 0000000..0f6b038
--- /dev/null
+++ b/3.3.8/0002-block-introduce-the-BFQ-v5-I-O-sched-for-3.3.patch
@@ -0,0 +1,5624 @@
+From bb6938c4f08c3c5a537175c887d5cc4e14e804a2 Mon Sep 17 00:00:00 2001
+From: Arianna Avanzini <avanzini.arianna@gmail.com>
+Date: Sun, 5 Feb 2012 01:04:27 +0100
+Subject: [PATCH 2/2] block: introduce the BFQ-v5 I/O sched for 3.3
+
+Add the BFQ-v5 I/O scheduler to 3.3.
+The general structure is borrowed from CFQ, as much code. A (bfq_)queue is
+associated to each task doing I/O on a device, and each time a scheduling
+decision has to be taken a queue is selected and it is served until it expires.
+
+    - Slices are given in the service domain: tasks are assigned budgets,
+      measured in number of sectors. Once got the disk, a task must
+      however consume its assigned budget within a configurable maximum time
+      (by default, the maximum possible value of the budgets is automatically
+      computed to comply with this timeout). This allows the desired latency
+      vs "throughput boosting" tradeoff to be set.
+
+    - Budgets are scheduled according to a variant of WF2Q+, implemented
+      using an augmented rb-tree to take eligibility into account while
+      preserving an O(log N) overall complexity.
+
+    - A low-latency tunable is provided; if enabled, both interactive and soft
+      real-time applications are guaranteed very low latency.
+
+    - Latency guarantees are preserved also in presence of NCQ.
+
+    - High throughput with flash-based devices, while still preserving
+      latency guarantees.
+
+    - Useful features borrowed from CFQ: cooperating-queues merging (with
+      some additional optimizations with respect to the original CFQ version),
+      static fallback queue for OOM.
+
+    - BFQ supports full hierarchical scheduling, exporting a cgroups
+      interface.  Each node has a full scheduler, so each group can
+      be assigned its own ioprio and an ioprio_class.
+
+    - If the cgroups interface is used, weights can be explictly assigned,
+      otherwise ioprio values are mapped to weights using the relation
+      weight = IOPRIO_BE_NR - ioprio.
+
+    - ioprio classes are served in strict priority order, i.e., lower
+      priority queues are not served as long as there are higher priority
+      queues.  Among queues in the same class the bandwidth is distributed
+      in proportion to the weights of each queue. A very thin extra bandwidth
+      is however guaranteed to the Idle class, to prevent it from starving.
+
+Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
+Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
+---
+ block/bfq-cgroup.c  |  841 +++++++++++++++
+ block/bfq-ioc.c     |   34 +
+ block/bfq-iosched.c | 2985 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ block/bfq-sched.c   | 1070 ++++++++++++++++++
+ block/bfq.h         |  599 +++++++++++
+ 5 files changed, 5529 insertions(+)
+ create mode 100644 block/bfq-cgroup.c
+ create mode 100644 block/bfq-ioc.c
+ create mode 100644 block/bfq-iosched.c
+ create mode 100644 block/bfq-sched.c
+ create mode 100644 block/bfq.h
+
+diff --git a/block/bfq-cgroup.c block/bfq-cgroup.c
+new file mode 100644
+index 0000000..6bb907d
+--- /dev/null
++++ block/bfq-cgroup.c
+@@ -0,0 +1,841 @@
++/*
++ * BFQ: CGROUPS support.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file.
++ */
++
++#ifdef CONFIG_CGROUP_BFQIO
++static struct bfqio_cgroup bfqio_root_cgroup = {
++	.weight = BFQ_DEFAULT_GRP_WEIGHT,
++	.ioprio = BFQ_DEFAULT_GRP_IOPRIO,
++	.ioprio_class = BFQ_DEFAULT_GRP_CLASS,
++};
++
++static inline void bfq_init_entity(struct bfq_entity *entity,
++				   struct bfq_group *bfqg)
++{
++	entity->weight = entity->new_weight;
++	entity->orig_weight = entity->new_weight;
++	entity->ioprio = entity->new_ioprio;
++	entity->ioprio_class = entity->new_ioprio_class;
++	entity->parent = bfqg->my_entity;
++	entity->sched_data = &bfqg->sched_data;
++}
++
++static struct bfqio_cgroup *cgroup_to_bfqio(struct cgroup *cgroup)
++{
++	return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id),
++			    struct bfqio_cgroup, css);
++}
++
++/*
++ * Search the bfq_group for bfqd into the hash table (by now only a list)
++ * of bgrp.  Must be called under rcu_read_lock().
++ */
++static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp,
++					    struct bfq_data *bfqd)
++{
++	struct bfq_group *bfqg;
++	struct hlist_node *n;
++	void *key;
++
++	hlist_for_each_entry_rcu(bfqg, n, &bgrp->group_data, group_node) {
++		key = rcu_dereference(bfqg->bfqd);
++		if (key == bfqd)
++			return bfqg;
++	}
++
++	return NULL;
++}
++
++static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp,
++					 struct bfq_group *bfqg)
++{
++	struct bfq_entity *entity = &bfqg->entity;
++
++	entity->weight = entity->new_weight = bgrp->weight;
++	entity->orig_weight = entity->new_weight;
++	entity->ioprio = entity->new_ioprio = bgrp->ioprio;
++	entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class;
++	entity->ioprio_changed = 1;
++	entity->my_sched_data = &bfqg->sched_data;
++}
++
++static inline void bfq_group_set_parent(struct bfq_group *bfqg,
++					struct bfq_group *parent)
++{
++	struct bfq_entity *entity;
++
++	BUG_ON(parent == NULL);
++	BUG_ON(bfqg == NULL);
++
++	entity = &bfqg->entity;
++	entity->parent = parent->my_entity;
++	entity->sched_data = &parent->sched_data;
++}
++
++/**
++ * bfq_group_chain_alloc - allocate a chain of groups.
++ * @bfqd: queue descriptor.
++ * @cgroup: the leaf cgroup this chain starts from.
++ *
++ * Allocate a chain of groups starting from the one belonging to
++ * @cgroup up to the root cgroup.  Stop if a cgroup on the chain
++ * to the root has already an allocated group on @bfqd.
++ */
++static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd,
++					       struct cgroup *cgroup)
++{
++	struct bfqio_cgroup *bgrp;
++	struct bfq_group *bfqg, *prev = NULL, *leaf = NULL;
++
++	for (; cgroup != NULL; cgroup = cgroup->parent) {
++		bgrp = cgroup_to_bfqio(cgroup);
++
++		bfqg = bfqio_lookup_group(bgrp, bfqd);
++		if (bfqg != NULL) {
++			/*
++			 * All the cgroups in the path from there to the
++			 * root must have a bfq_group for bfqd, so we don't
++			 * need any more allocations.
++			 */
++			break;
++		}
++
++		bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC);
++		if (bfqg == NULL)
++			goto cleanup;
++
++		bfq_group_init_entity(bgrp, bfqg);
++		bfqg->my_entity = &bfqg->entity;
++
++		if (leaf == NULL) {
++			leaf = bfqg;
++			prev = leaf;
++		} else {
++			bfq_group_set_parent(prev, bfqg);
++			/*
++			 * Build a list of allocated nodes using the bfqd
++			 * filed, that is still unused and will be initialized
++			 * only after the node will be connected.
++			 */
++			prev->bfqd = bfqg;
++			prev = bfqg;
++		}
++	}
++
++	return leaf;
++
++cleanup:
++	while (leaf != NULL) {
++		prev = leaf;
++		leaf = leaf->bfqd;
++		kfree(prev);
++	}
++
++	return NULL;
++}
++
++/**
++ * bfq_group_chain_link - link an allocatd group chain to a cgroup hierarchy.
++ * @bfqd: the queue descriptor.
++ * @cgroup: the leaf cgroup to start from.
++ * @leaf: the leaf group (to be associated to @cgroup).
++ *
++ * Try to link a chain of groups to a cgroup hierarchy, connecting the
++ * nodes bottom-up, so we can be sure that when we find a cgroup in the
++ * hierarchy that already as a group associated to @bfqd all the nodes
++ * in the path to the root cgroup have one too.
++ *
++ * On locking: the queue lock protects the hierarchy (there is a hierarchy
++ * per device) while the bfqio_cgroup lock protects the list of groups
++ * belonging to the same cgroup.
++ */
++static void bfq_group_chain_link(struct bfq_data *bfqd, struct cgroup *cgroup,
++				 struct bfq_group *leaf)
++{
++	struct bfqio_cgroup *bgrp;
++	struct bfq_group *bfqg, *next, *prev = NULL;
++	unsigned long flags;
++
++	assert_spin_locked(bfqd->queue->queue_lock);
++
++	for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) {
++		bgrp = cgroup_to_bfqio(cgroup);
++		next = leaf->bfqd;
++
++		bfqg = bfqio_lookup_group(bgrp, bfqd);
++		BUG_ON(bfqg != NULL);
++
++		spin_lock_irqsave(&bgrp->lock, flags);
++
++		rcu_assign_pointer(leaf->bfqd, bfqd);
++		hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data);
++		hlist_add_head(&leaf->bfqd_node, &bfqd->group_list);
++
++		spin_unlock_irqrestore(&bgrp->lock, flags);
++
++		prev = leaf;
++		leaf = next;
++	}
++
++	BUG_ON(cgroup == NULL && leaf != NULL);
++	if (cgroup != NULL && prev != NULL) {
++		bgrp = cgroup_to_bfqio(cgroup);
++		bfqg = bfqio_lookup_group(bgrp, bfqd);
++		bfq_group_set_parent(prev, bfqg);
++	}
++}
++
++/**
++ * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup.
++ * @bfqd: queue descriptor.
++ * @cgroup: cgroup being searched for.
++ *
++ * Return a group associated to @bfqd in @cgroup, allocating one if
++ * necessary.  When a group is returned all the cgroups in the path
++ * to the root have a group associated to @bfqd.
++ *
++ * If the allocation fails, return the root group: this breaks guarantees
++ * but is a safe fallbak.  If this loss becames a problem it can be
++ * mitigated using the equivalent weight (given by the product of the
++ * weights of the groups in the path from @group to the root) in the
++ * root scheduler.
++ *
++ * We allocate all the missing nodes in the path from the leaf cgroup
++ * to the root and we connect the nodes only after all the allocations
++ * have been successful.
++ */
++static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
++					      struct cgroup *cgroup)
++{
++	struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup);
++	struct bfq_group *bfqg;
++
++	bfqg = bfqio_lookup_group(bgrp, bfqd);
++	if (bfqg != NULL)
++		return bfqg;
++
++	bfqg = bfq_group_chain_alloc(bfqd, cgroup);
++	if (bfqg != NULL)
++		bfq_group_chain_link(bfqd, cgroup, bfqg);
++	else
++		bfqg = bfqd->root_group;
++
++	return bfqg;
++}
++
++/**
++ * bfq_bfqq_move - migrate @bfqq to @bfqg.
++ * @bfqd: queue descriptor.
++ * @bfqq: the queue to move.
++ * @entity: @bfqq's entity.
++ * @bfqg: the group to move to.
++ *
++ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
++ * it on the new one.  Avoid putting the entity on the old group idle tree.
++ *
++ * Must be called under the queue lock; the cgroup owning @bfqg must
++ * not disappear (by now this just means that we are called under
++ * rcu_read_lock()).
++ */
++static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++			  struct bfq_entity *entity, struct bfq_group *bfqg)
++{
++	int busy, resume;
++
++	busy = bfq_bfqq_busy(bfqq);
++	resume = !RB_EMPTY_ROOT(&bfqq->sort_list);
++
++	BUG_ON(resume && !entity->on_st);
++	BUG_ON(busy && !resume && entity->on_st && bfqq != bfqd->active_queue);
++
++	if (busy) {
++		BUG_ON(atomic_read(&bfqq->ref) < 2);
++
++		if (!resume)
++			bfq_del_bfqq_busy(bfqd, bfqq, 0);
++		else
++			bfq_deactivate_bfqq(bfqd, bfqq, 0);
++	} else if (entity->on_st)
++		bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
++
++	/*
++	 * Here we use a reference to bfqg.  We don't need a refcounter
++	 * as the cgroup reference will not be dropped, so that its
++	 * destroy() callback will not be invoked.
++	 */
++	entity->parent = bfqg->my_entity;
++	entity->sched_data = &bfqg->sched_data;
++
++	if (busy && resume)
++		bfq_activate_bfqq(bfqd, bfqq);
++}
++
++/**
++ * __bfq_bic_change_cgroup - move @bic to @cgroup.
++ * @bfqd: the queue descriptor.
++ * @bic: the bic to move.
++ * @cgroup: the cgroup to move to.
++ *
++ * Move bic to cgroup, assuming that bfqd->queue is locked; the caller
++ * has to make sure that the reference to cgroup is valid across the call.
++ *
++ * NOTE: an alternative approach might have been to store the current
++ * cgroup in bfqq and getting a reference to it, reducing the lookup
++ * time here, at the price of slightly more complex code.
++ */
++static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
++						 struct bfq_io_cq *bic,
++						 struct cgroup *cgroup)
++{
++	struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
++	struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
++	struct bfq_entity *entity;
++	struct bfq_group *bfqg;
++	struct bfqio_cgroup *bgrp;
++
++	bgrp = cgroup_to_bfqio(cgroup);
++
++	bfqg = bfq_find_alloc_group(bfqd, cgroup);
++	if (async_bfqq != NULL) {
++		entity = &async_bfqq->entity;
++
++		if (entity->sched_data != &bfqg->sched_data) {
++			bic_set_bfqq(bic, NULL, 0);
++			bfq_log_bfqq(bfqd, async_bfqq,
++				     "bic_change_group: %p %d",
++				     async_bfqq, atomic_read(&async_bfqq->ref));
++			bfq_put_queue(async_bfqq);
++		}
++	}
++
++	if (sync_bfqq != NULL) {
++		entity = &sync_bfqq->entity;
++		if (entity->sched_data != &bfqg->sched_data)
++			bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
++	}
++
++	return bfqg;
++}
++
++/**
++ * bfq_bic_change_cgroup - move @bic to @cgroup.
++ * @bic: the bic being migrated.
++ * @cgroup: the destination cgroup.
++ *
++ * When the task owning @bic is moved to @cgroup, @bic is immediately
++ * moved into its new parent group.
++ */
++static void bfq_bic_change_cgroup(struct bfq_io_cq *bic,
++				  struct cgroup *cgroup)
++{
++	struct bfq_data *bfqd;
++	unsigned long uninitialized_var(flags);
++
++	bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), &flags);
++	if (bfqd != NULL) {
++		__bfq_bic_change_cgroup(bfqd, bic, cgroup);
++		bfq_put_bfqd_unlock(bfqd, &flags);
++	}
++}
++
++/**
++ * bfq_bic_update_cgroup - update the cgroup of @bic.
++ * @bic: the @bic to update.
++ *
++ * Make sure that @bic is enqueued in the cgroup of the current task.
++ * We need this in addition to moving bics during the cgroup attach
++ * phase because the task owning @bic could be at its first disk
++ * access or we may end up in the root cgroup as the result of a
++ * memory allocation failure and here we try to move to the right
++ * group.
++ *
++ * Must be called under the queue lock.  It is safe to use the returned
++ * value even after the rcu_read_unlock() as the migration/destruction
++ * paths act under the queue lock too.  IOW it is impossible to race with
++ * group migration/destruction and end up with an invalid group as:
++ *   a) here cgroup has not yet been destroyed, nor its destroy callback
++ *      has started execution, as current holds a reference to it,
++ *   b) if it is destroyed after rcu_read_unlock() [after current is
++ *      migrated to a different cgroup] its attach() callback will have
++ *      taken care of remove all the references to the old cgroup data.
++ */
++static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic)
++{
++	struct bfq_data *bfqd = bic_to_bfqd(bic);
++	struct bfq_group *bfqg;
++	struct cgroup *cgroup;
++
++	BUG_ON(bfqd == NULL);
++
++	rcu_read_lock();
++	cgroup = task_cgroup(current, bfqio_subsys_id);
++	bfqg = __bfq_bic_change_cgroup(bfqd, bic, cgroup);
++	rcu_read_unlock();
++
++	return bfqg;
++}
++
++/**
++ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
++ * @st: the service tree being flushed.
++ */
++static inline void bfq_flush_idle_tree(struct bfq_service_tree *st)
++{
++	struct bfq_entity *entity = st->first_idle;
++
++	for (; entity != NULL; entity = st->first_idle)
++		__bfq_deactivate_entity(entity, 0);
++}
++
++/**
++ * bfq_reparent_leaf_entity - move leaf entity to the root_group.
++ * @bfqd: the device data structure with the root group.
++ * @entity: the entity to move.
++ */
++static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
++					    struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++	BUG_ON(bfqq == NULL);
++	bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
++	return;
++}
++
++/**
++ * bfq_reparent_active_entities - move to the root group all active entities.
++ * @bfqd: the device data structure with the root group.
++ * @bfqg: the group to move from.
++ * @st: the service tree with the entities.
++ *
++ * Needs queue_lock to be taken and reference to be valid over the call.
++ */
++static inline void bfq_reparent_active_entities(struct bfq_data *bfqd,
++						struct bfq_group *bfqg,
++						struct bfq_service_tree *st)
++{
++	struct rb_root *active = &st->active;
++	struct bfq_entity *entity = NULL;
++
++	if (!RB_EMPTY_ROOT(&st->active))
++		entity = bfq_entity_of(rb_first(active));
++
++	for (; entity != NULL ; entity = bfq_entity_of(rb_first(active)))
++		bfq_reparent_leaf_entity(bfqd, entity);
++
++	if (bfqg->sched_data.active_entity != NULL)
++		bfq_reparent_leaf_entity(bfqd, bfqg->sched_data.active_entity);
++
++	return;
++}
++
++/**
++ * bfq_destroy_group - destroy @bfqg.
++ * @bgrp: the bfqio_cgroup containing @bfqg.
++ * @bfqg: the group being destroyed.
++ *
++ * Destroy @bfqg, making sure that it is not referenced from its parent.
++ */
++static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg)
++{
++	struct bfq_data *bfqd;
++	struct bfq_service_tree *st;
++	struct bfq_entity *entity = bfqg->my_entity;
++	unsigned long uninitialized_var(flags);
++	int i;
++
++	hlist_del(&bfqg->group_node);
++
++	/*
++	 * Empty all service_trees belonging to this group before deactivating
++	 * the group itself.
++	 */
++	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
++		st = bfqg->sched_data.service_tree + i;
++
++		/*
++		 * The idle tree may still contain bfq_queues belonging
++		 * to exited task because they never migrated to a different
++		 * cgroup from the one being destroyed now.  Noone else
++		 * can access them so it's safe to act without any lock.
++		 */
++		bfq_flush_idle_tree(st);
++
++		/*
++		 * It may happen that some queues are still active
++		 * (busy) upon group destruction (if the corresponding
++		 * processes have been forced to terminate). We move
++		 * all the leaf entities corresponding to these queues
++		 * to the root_group.
++		 * Also, it may happen that the group has an entity
++		 * under service, which is disconnected from the active
++		 * tree: it must be moved, too.
++		 * There is no need to put the sync queues, as the
++		 * scheduler has taken no reference.
++		 */
++		bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
++		if (bfqd != NULL) {
++			bfq_reparent_active_entities(bfqd, bfqg, st);
++			bfq_put_bfqd_unlock(bfqd, &flags);
++		}
++		BUG_ON(!RB_EMPTY_ROOT(&st->active));
++		BUG_ON(!RB_EMPTY_ROOT(&st->idle));
++	}
++	BUG_ON(bfqg->sched_data.next_active != NULL);
++	BUG_ON(bfqg->sched_data.active_entity != NULL);
++
++	/*
++	 * We may race with device destruction, take extra care when
++	 * dereferencing bfqg->bfqd.
++	 */
++	bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
++	if (bfqd != NULL) {
++		hlist_del(&bfqg->bfqd_node);
++		__bfq_deactivate_entity(entity, 0);
++		bfq_put_async_queues(bfqd, bfqg);
++		bfq_put_bfqd_unlock(bfqd, &flags);
++	}
++	BUG_ON(entity->tree != NULL);
++
++	/*
++	 * No need to defer the kfree() to the end of the RCU grace
++	 * period: we are called from the destroy() callback of our
++	 * cgroup, so we can be sure that noone is a) still using
++	 * this cgroup or b) doing lookups in it.
++	 */
++	kfree(bfqg);
++}
++
++/**
++ * bfq_disconnect_groups - diconnect @bfqd from all its groups.
++ * @bfqd: the device descriptor being exited.
++ *
++ * When the device exits we just make sure that no lookup can return
++ * the now unused group structures.  They will be deallocated on cgroup
++ * destruction.
++ */
++static void bfq_disconnect_groups(struct bfq_data *bfqd)
++{
++	struct hlist_node *pos, *n;
++	struct bfq_group *bfqg;
++
++	bfq_log(bfqd, "disconnect_groups beginning") ;
++	hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) {
++		hlist_del(&bfqg->bfqd_node);
++
++		__bfq_deactivate_entity(bfqg->my_entity, 0);
++
++		/*
++		 * Don't remove from the group hash, just set an
++		 * invalid key.  No lookups can race with the
++		 * assignment as bfqd is being destroyed; this
++		 * implies also that new elements cannot be added
++		 * to the list.
++		 */
++		rcu_assign_pointer(bfqg->bfqd, NULL);
++
++		bfq_log(bfqd, "disconnect_groups: put async for group %p",
++			bfqg) ;
++		bfq_put_async_queues(bfqd, bfqg);
++	}
++}
++
++static inline void bfq_free_root_group(struct bfq_data *bfqd)
++{
++	struct bfqio_cgroup *bgrp = &bfqio_root_cgroup;
++	struct bfq_group *bfqg = bfqd->root_group;
++
++	bfq_put_async_queues(bfqd, bfqg);
++
++	spin_lock_irq(&bgrp->lock);
++	hlist_del_rcu(&bfqg->group_node);
++	spin_unlock_irq(&bgrp->lock);
++
++	/*
++	 * No need to synchronize_rcu() here: since the device is gone
++	 * there cannot be any read-side access to its root_group.
++	 */
++	kfree(bfqg);
++}
++
++static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
++{
++	struct bfq_group *bfqg;
++	struct bfqio_cgroup *bgrp;
++	int i;
++
++	bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
++	if (bfqg == NULL)
++		return NULL;
++
++	bfqg->entity.parent = NULL;
++	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
++		bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
++
++	bgrp = &bfqio_root_cgroup;
++	spin_lock_irq(&bgrp->lock);
++	rcu_assign_pointer(bfqg->bfqd, bfqd);
++	hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data);
++	spin_unlock_irq(&bgrp->lock);
++
++	return bfqg;
++}
++
++#define SHOW_FUNCTION(__VAR)						\
++static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup,		\
++				       struct cftype *cftype)		\
++{									\
++	struct bfqio_cgroup *bgrp;					\
++	u64 ret;							\
++									\
++	if (!cgroup_lock_live_group(cgroup))				\
++		return -ENODEV;						\
++									\
++	bgrp = cgroup_to_bfqio(cgroup);					\
++	spin_lock_irq(&bgrp->lock);					\
++	ret = bgrp->__VAR;						\
++	spin_unlock_irq(&bgrp->lock);					\
++									\
++	cgroup_unlock();						\
++									\
++	return ret;							\
++}
++
++SHOW_FUNCTION(weight);
++SHOW_FUNCTION(ioprio);
++SHOW_FUNCTION(ioprio_class);
++#undef SHOW_FUNCTION
++
++#define STORE_FUNCTION(__VAR, __MIN, __MAX)				\
++static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup,		\
++					struct cftype *cftype,		\
++					u64 val)			\
++{									\
++	struct bfqio_cgroup *bgrp;					\
++	struct bfq_group *bfqg;						\
++	struct hlist_node *n;						\
++									\
++	if (val < (__MIN) || val > (__MAX))				\
++		return -EINVAL;						\
++									\
++	if (!cgroup_lock_live_group(cgroup))				\
++		return -ENODEV;						\
++									\
++	bgrp = cgroup_to_bfqio(cgroup);					\
++									\
++	spin_lock_irq(&bgrp->lock);					\
++	bgrp->__VAR = (unsigned short)val;				\
++	hlist_for_each_entry(bfqg, n, &bgrp->group_data, group_node) {	\
++		bfqg->entity.new_##__VAR = (unsigned short)val;		\
++		smp_wmb();						\
++		bfqg->entity.ioprio_changed = 1;			\
++	}								\
++	spin_unlock_irq(&bgrp->lock);					\
++									\
++	cgroup_unlock();						\
++									\
++	return 0;							\
++}
++
++STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT);
++STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1);
++STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE);
++#undef STORE_FUNCTION
++
++static struct cftype bfqio_files[] = {
++	{
++		.name = "weight",
++		.read_u64 = bfqio_cgroup_weight_read,
++		.write_u64 = bfqio_cgroup_weight_write,
++	},
++	{
++		.name = "ioprio",
++		.read_u64 = bfqio_cgroup_ioprio_read,
++		.write_u64 = bfqio_cgroup_ioprio_write,
++	},
++	{
++		.name = "ioprio_class",
++		.read_u64 = bfqio_cgroup_ioprio_class_read,
++		.write_u64 = bfqio_cgroup_ioprio_class_write,
++	},
++};
++
++static int bfqio_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
++{
++	return cgroup_add_files(cgroup, subsys, bfqio_files,
++				ARRAY_SIZE(bfqio_files));
++}
++
++static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys *subsys,
++						struct cgroup *cgroup)
++{
++	struct bfqio_cgroup *bgrp;
++
++	if (cgroup->parent != NULL) {
++		bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL);
++		if (bgrp == NULL)
++			return ERR_PTR(-ENOMEM);
++	} else
++		bgrp = &bfqio_root_cgroup;
++
++	spin_lock_init(&bgrp->lock);
++	INIT_HLIST_HEAD(&bgrp->group_data);
++	bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO;
++	bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS;
++
++	return &bgrp->css;
++}
++
++/*
++ * We cannot support shared io contexts, as we have no mean to support
++ * two tasks with the same ioc in two different groups without major rework
++ * of the main bic/bfqq data structures.  By now we allow a task to change
++ * its cgroup only if it's the only owner of its ioc; the drawback of this
++ * behavior is that a group containing a task that forked using CLONE_IO
++ * will not be destroyed until the tasks sharing the ioc die.
++ */
++static int bfqio_can_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
++			    struct cgroup_taskset *tset)
++{
++	struct task_struct *task;
++	struct io_context *ioc;
++	int ret = 0;
++
++	cgroup_taskset_for_each(task, cgroup, tset) {
++		/* task_lock() is needed to avoid races with exit_io_context() */
++		task_lock(task);
++		ioc = task->io_context;
++		if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1)
++			/*
++			 * ioc == NULL means that the task is either too young or
++			 * exiting: if it has still no ioc the ioc can't be shared,
++			 * if the task is exiting the attach will fail anyway, no
++			 * matter what we return here.
++			 */
++			ret = -EINVAL;
++		task_unlock(task);
++		if (ret)
++			break;
++	}
++
++	return ret;
++}
++
++static void bfqio_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
++			 struct cgroup_taskset *tset)
++{
++	struct task_struct *task;
++	struct io_context *ioc;
++	struct io_cq *icq;
++	struct hlist_node *n;
++
++	/*
++	 * IMPORTANT NOTE: The move of more than one process at a time to a
++	 * new group has not yet been tested.
++	 */
++	cgroup_taskset_for_each(task, cgroup, tset) {
++		ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
++		if (ioc) {
++			/*
++			 * Handle cgroup change here.
++			 */
++			rcu_read_lock();
++			hlist_for_each_entry_rcu(icq, n, &ioc->icq_list, ioc_node)
++				if (!strncmp(icq->q->elevator->type->elevator_name,
++					     "bfq", ELV_NAME_MAX))
++					bfq_bic_change_cgroup(icq_to_bic(icq),
++							      cgroup);
++			rcu_read_unlock();
++			put_io_context(ioc);
++		}
++	}
++}
++
++static void bfqio_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
++{
++	struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup);
++	struct hlist_node *n, *tmp;
++	struct bfq_group *bfqg;
++
++	/*
++	 * Since we are destroying the cgroup, there are no more tasks
++	 * referencing it, and all the RCU grace periods that may have
++	 * referenced it are ended (as the destruction of the parent
++	 * cgroup is RCU-safe); bgrp->group_data will not be accessed by
++	 * anything else and we don't need any synchronization.
++	 */
++	hlist_for_each_entry_safe(bfqg, n, tmp, &bgrp->group_data, group_node)
++		bfq_destroy_group(bgrp, bfqg);
++
++	BUG_ON(!hlist_empty(&bgrp->group_data));
++
++	kfree(bgrp);
++}
++
++struct cgroup_subsys bfqio_subsys = {
++	.name = "bfqio",
++	.create = bfqio_create,
++	.can_attach = bfqio_can_attach,
++	.attach = bfqio_attach,
++	.destroy = bfqio_destroy,
++	.populate = bfqio_populate,
++	.subsys_id = bfqio_subsys_id,
++};
++#else
++static inline void bfq_init_entity(struct bfq_entity *entity,
++				   struct bfq_group *bfqg)
++{
++	entity->weight = entity->new_weight;
++	entity->orig_weight = entity->new_weight;
++	entity->ioprio = entity->new_ioprio;
++	entity->ioprio_class = entity->new_ioprio_class;
++	entity->sched_data = &bfqg->sched_data;
++}
++
++static inline struct bfq_group *
++bfq_bic_update_cgroup(struct bfq_io_cq *bic)
++{
++	struct bfq_data *bfqd = bic_to_bfqd(bic);
++	return bfqd->root_group;
++}
++
++static inline void bfq_bfqq_move(struct bfq_data *bfqd,
++				 struct bfq_queue *bfqq,
++				 struct bfq_entity *entity,
++				 struct bfq_group *bfqg)
++{
++}
++
++static inline void bfq_disconnect_groups(struct bfq_data *bfqd)
++{
++	bfq_put_async_queues(bfqd, bfqd->root_group);
++}
++
++static inline void bfq_free_root_group(struct bfq_data *bfqd)
++{
++	kfree(bfqd->root_group);
++}
++
++static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
++{
++	struct bfq_group *bfqg;
++	int i;
++
++	bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
++	if (bfqg == NULL)
++		return NULL;
++
++	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
++		bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
++
++	return bfqg;
++}
++#endif
+diff --git a/block/bfq-ioc.c block/bfq-ioc.c
+new file mode 100644
+index 0000000..af791d2
+--- /dev/null
++++ block/bfq-ioc.c
+@@ -0,0 +1,34 @@
++/*
++ * BFQ: I/O context handling.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ */
++
++/**
++ * icq_to_bic - convert iocontext queue structure to bfq_io_cq.
++ * @icq: the iocontext queue.
++ */
++static inline struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
++{
++	/* bic->icq is the first member, %NULL will convert to %NULL */
++	return container_of(icq, struct bfq_io_cq, icq);
++}
++
++/**
++ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd.
++ * @bfqd: the lookup key.
++ * @ioc: the io_context of the process doing I/O.
++ *
++ * Queue lock must be held.
++ */
++static inline struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
++					       struct io_context *ioc)
++{
++	if(ioc)
++		return icq_to_bic(ioc_lookup_icq(ioc, bfqd->queue));
++	return NULL;
++}
+diff --git a/block/bfq-iosched.c block/bfq-iosched.c
+new file mode 100644
+index 0000000..4122afd
+--- /dev/null
++++ block/bfq-iosched.c
+@@ -0,0 +1,2985 @@
++/*
++ * BFQ, or Budget Fair Queueing, disk scheduler.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ *
++ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ file.
++ *
++ * BFQ is a proportional share disk scheduling algorithm based on the
++ * slice-by-slice service scheme of CFQ. But BFQ assigns budgets,
++ * measured in number of sectors, to tasks instead of time slices.
++ * The disk is not granted to the active task for a given time slice,
++ * but until it has exahusted its assigned budget.  This change from
++ * the time to the service domain allows BFQ to distribute the disk
++ * bandwidth among tasks as desired, without any distortion due to
++ * ZBR, workload fluctuations or other factors. BFQ uses an ad hoc
++ * internal scheduler, called B-WF2Q+, to schedule tasks according to
++ * their budgets.  Thanks to this accurate scheduler, BFQ can afford
++ * to assign high budgets to disk-bound non-seeky tasks (to boost the
++ * throughput), and yet guarantee low latencies to interactive and
++ * soft real-time applications.
++ *
++ * BFQ has been introduced in [1], where the interested reader can
++ * find an accurate description of the algorithm, the bandwidth
++ * distribution and latency guarantees it provides, plus formal proofs
++ * of all the properties.  With respect to the algorithm presented in
++ * the paper, this implementation adds several little heuristics, and
++ * a hierarchical extension, based on H-WF2Q+.
++ *
++ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with
++ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N)
++ * complexity derives from the one introduced with EEVDF in [3].
++ *
++ * [1] P. Valente and F. Checconi, ``High Throughput Disk Scheduling
++ *     with Deterministic Guarantees on Bandwidth Distribution,'',
++ *     IEEE Transactions on Computer, May 2010.
++ *
++ *     http://algo.ing.unimo.it/people/paolo/disk_sched/bfq-techreport.pdf
++ *
++ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing
++ *     Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689,
++ *     Oct 1997.
++ *
++ *     http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz
++ *
++ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline
++ *     First: A Flexible and Accurate Mechanism for Proportional Share
++ *     Resource Allocation,'' technical report.
++ *
++ *     http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf
++ */
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/blkdev.h>
++#include <linux/cgroup.h>
++#include <linux/elevator.h>
++#include <linux/jiffies.h>
++#include <linux/rbtree.h>
++#include <linux/ioprio.h>
++#include "bfq.h"
++#include "blk.h"
++
++/* Max number of dispatches in one round of service. */
++static const int bfq_quantum = 4;
++
++/* Expiration time of sync (0) and async (1) requests, in jiffies. */
++static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
++
++/* Maximum backwards seek, in KiB. */
++static const int bfq_back_max = 16 * 1024;
++
++/* Penalty of a backwards seek, in number of sectors. */
++static const int bfq_back_penalty = 2;
++
++/* Idling period duration, in jiffies. */
++static int bfq_slice_idle = HZ / 125;
++
++/* Default maximum budget values, in sectors and number of requests. */
++static const int bfq_default_max_budget = 16 * 1024;
++static const int bfq_max_budget_async_rq = 4;
++
++/*
++ * Async to sync throughput distribution is controlled as follows:
++ * when an async request is served, the entity is charged the number
++ * of sectors of the request, multipled by the factor below
++ */
++static const int bfq_async_charge_factor = 10;
++
++/* Default timeout values, in jiffies, approximating CFQ defaults. */
++static const int bfq_timeout_sync = HZ / 8;
++static int bfq_timeout_async = HZ / 25;
++
++struct kmem_cache *bfq_pool;
++
++/* Below this threshold (in ms), we consider thinktime immediate. */
++#define BFQ_MIN_TT		2
++
++/* hw_tag detection: parallel requests threshold and min samples needed. */
++#define BFQ_HW_QUEUE_THRESHOLD	4
++#define BFQ_HW_QUEUE_SAMPLES	32
++
++#define BFQQ_SEEK_THR	 (sector_t)(8 * 1024)
++#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR)
++
++/* Min samples used for peak rate estimation (for autotuning). */
++#define BFQ_PEAK_RATE_SAMPLES	32
++
++/* Shift used for peak rate fixed precision calculations. */
++#define BFQ_RATE_SHIFT		16
++
++/*
++ * The duration of the weight raising for interactive applications is
++ * computed automatically (as default behaviour), using the following
++ * formula: duration = (R / r) * T, where r is the peak rate of the
++ * disk, and R and T are two reference parameters. In particular, R is
++ * the peak rate of a reference disk, and T is about the maximum time
++ * for starting popular large applications on that disk, under BFQ and
++ * while reading two files in parallel. Finally, BFQ uses two
++ * different pairs (R, T) depending on whether the disk is rotational
++ * or non-rotational.
++ */
++#define T_rot			(msecs_to_jiffies(5500))
++#define T_nonrot		(msecs_to_jiffies(2000))
++/* Next two quantities are in sectors/usec, left-shifted by BFQ_RATE_SHIFT */
++#define R_rot			17415
++#define R_nonrot		34791
++
++#define BFQ_SERVICE_TREE_INIT	((struct bfq_service_tree)		\
++				{ RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })
++
++#define RQ_BIC(rq)		((struct bfq_io_cq *) (rq)->elv.priv[0])
++#define RQ_BFQQ(rq)		((rq)->elv.priv[1])
++
++#include "bfq-ioc.c"
++#include "bfq-sched.c"
++#include "bfq-cgroup.c"
++
++#define bfq_class_idle(bfqq)	((bfqq)->entity.ioprio_class ==\
++				 IOPRIO_CLASS_IDLE)
++#define bfq_class_rt(bfqq)	((bfqq)->entity.ioprio_class ==\
++				 IOPRIO_CLASS_RT)
++
++#define bfq_sample_valid(samples)	((samples) > 80)
++
++/*
++ * We regard a request as SYNC, if either it's a read or has the SYNC bit
++ * set (in which case it could also be a direct WRITE).
++ */
++static inline int bfq_bio_sync(struct bio *bio)
++{
++	if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC))
++		return 1;
++
++	return 0;
++}
++
++/*
++ * Scheduler run of queue, if there are requests pending and no one in the
++ * driver that will restart queueing.
++ */
++static inline void bfq_schedule_dispatch(struct bfq_data *bfqd)
++{
++	if (bfqd->queued != 0) {
++		bfq_log(bfqd, "schedule dispatch");
++		kblockd_schedule_work(bfqd->queue, &bfqd->unplug_work);
++	}
++}
++
++/*
++ * Lifted from AS - choose which of rq1 and rq2 that is best served now.
++ * We choose the request that is closesr to the head right now.  Distance
++ * behind the head is penalized and only allowed to a certain extent.
++ */
++static struct request *bfq_choose_req(struct bfq_data *bfqd,
++				      struct request *rq1,
++				      struct request *rq2,
++				      sector_t last)
++{
++	sector_t s1, s2, d1 = 0, d2 = 0;
++	unsigned long back_max;
++#define BFQ_RQ1_WRAP	0x01 /* request 1 wraps */
++#define BFQ_RQ2_WRAP	0x02 /* request 2 wraps */
++	unsigned wrap = 0; /* bit mask: requests behind the disk head? */
++
++	if (rq1 == NULL || rq1 == rq2)
++		return rq2;
++	if (rq2 == NULL)
++		return rq1;
++
++	if (rq_is_sync(rq1) && !rq_is_sync(rq2))
++		return rq1;
++	else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
++		return rq2;
++	if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
++		return rq1;
++	else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META))
++		return rq2;
++
++	s1 = blk_rq_pos(rq1);
++	s2 = blk_rq_pos(rq2);
++
++	/*
++	 * By definition, 1KiB is 2 sectors.
++	 */
++	back_max = bfqd->bfq_back_max * 2;
++
++	/*
++	 * Strict one way elevator _except_ in the case where we allow
++	 * short backward seeks which are biased as twice the cost of a
++	 * similar forward seek.
++	 */
++	if (s1 >= last)
++		d1 = s1 - last;
++	else if (s1 + back_max >= last)
++		d1 = (last - s1) * bfqd->bfq_back_penalty;
++	else
++		wrap |= BFQ_RQ1_WRAP;
++
++	if (s2 >= last)
++		d2 = s2 - last;
++	else if (s2 + back_max >= last)
++		d2 = (last - s2) * bfqd->bfq_back_penalty;
++	else
++		wrap |= BFQ_RQ2_WRAP;
++
++	/* Found required data */
++
++	/*
++	 * By doing switch() on the bit mask "wrap" we avoid having to
++	 * check two variables for all permutations: --> faster!
++	 */
++	switch (wrap) {
++	case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
++		if (d1 < d2)
++			return rq1;
++		else if (d2 < d1)
++			return rq2;
++		else {
++			if (s1 >= s2)
++				return rq1;
++			else
++				return rq2;
++		}
++
++	case BFQ_RQ2_WRAP:
++		return rq1;
++	case BFQ_RQ1_WRAP:
++		return rq2;
++	case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */
++	default:
++		/*
++		 * Since both rqs are wrapped,
++		 * start with the one that's further behind head
++		 * (--> only *one* back seek required),
++		 * since back seek takes more time than forward.
++		 */
++		if (s1 <= s2)
++			return rq1;
++		else
++			return rq2;
++	}
++}
++
++static struct bfq_queue *
++bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,
++		     sector_t sector, struct rb_node **ret_parent,
++		     struct rb_node ***rb_link)
++{
++	struct rb_node **p, *parent;
++	struct bfq_queue *bfqq = NULL;
++
++	parent = NULL;
++	p = &root->rb_node;
++	while (*p) {
++		struct rb_node **n;
++
++		parent = *p;
++		bfqq = rb_entry(parent, struct bfq_queue, pos_node);
++
++		/*
++		 * Sort strictly based on sector. Smallest to the left,
++		 * largest to the right.
++		 */
++		if (sector > blk_rq_pos(bfqq->next_rq))
++			n = &(*p)->rb_right;
++		else if (sector < blk_rq_pos(bfqq->next_rq))
++			n = &(*p)->rb_left;
++		else
++			break;
++		p = n;
++		bfqq = NULL;
++	}
++
++	*ret_parent = parent;
++	if (rb_link)
++		*rb_link = p;
++
++	bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d",
++		(long long unsigned)sector,
++		bfqq != NULL ? bfqq->pid : 0);
++
++	return bfqq;
++}
++
++static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	struct rb_node **p, *parent;
++	struct bfq_queue *__bfqq;
++
++	if (bfqq->pos_root != NULL) {
++		rb_erase(&bfqq->pos_node, bfqq->pos_root);
++		bfqq->pos_root = NULL;
++	}
++
++	if (bfq_class_idle(bfqq))
++		return;
++	if (!bfqq->next_rq)
++		return;
++
++	bfqq->pos_root = &bfqd->rq_pos_tree;
++	__bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,
++			blk_rq_pos(bfqq->next_rq), &parent, &p);
++	if (__bfqq == NULL) {
++		rb_link_node(&bfqq->pos_node, parent, p);
++		rb_insert_color(&bfqq->pos_node, bfqq->pos_root);
++	} else
++		bfqq->pos_root = NULL;
++}
++
++static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
++					struct bfq_queue *bfqq,
++					struct request *last)
++{
++	struct rb_node *rbnext = rb_next(&last->rb_node);
++	struct rb_node *rbprev = rb_prev(&last->rb_node);
++	struct request *next = NULL, *prev = NULL;
++
++	BUG_ON(RB_EMPTY_NODE(&last->rb_node));
++
++	if (rbprev != NULL)
++		prev = rb_entry_rq(rbprev);
++
++	if (rbnext != NULL)
++		next = rb_entry_rq(rbnext);
++	else {
++		rbnext = rb_first(&bfqq->sort_list);
++		if (rbnext && rbnext != &last->rb_node)
++			next = rb_entry_rq(rbnext);
++	}
++
++	return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last));
++}
++
++static void bfq_del_rq_rb(struct request *rq)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++	struct bfq_data *bfqd = bfqq->bfqd;
++	const int sync = rq_is_sync(rq);
++
++	BUG_ON(bfqq->queued[sync] == 0);
++	bfqq->queued[sync]--;
++	bfqd->queued--;
++
++	elv_rb_del(&bfqq->sort_list, rq);
++
++	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
++		if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->active_queue)
++			bfq_del_bfqq_busy(bfqd, bfqq, 1);
++		/*
++		 * Remove queue from request-position tree as it is empty.
++		 */
++		if (bfqq->pos_root != NULL) {
++			rb_erase(&bfqq->pos_node, bfqq->pos_root);
++			bfqq->pos_root = NULL;
++		}
++	}
++}
++
++/* see the definition of bfq_async_charge_factor for details */
++static inline unsigned long bfq_serv_to_charge(struct request *rq,
++					       struct bfq_queue *bfqq)
++{
++	return blk_rq_sectors(rq) *
++		(1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->raising_coeff == 1) *
++		bfq_async_charge_factor));
++}
++
++/**
++ * bfq_updated_next_req - update the queue after a new next_rq selection.
++ * @bfqd: the device data the queue belongs to.
++ * @bfqq: the queue to update.
++ *
++ * If the first request of a queue changes we make sure that the queue
++ * has enough budget to serve at least its first request (if the
++ * request has grown).  We do this because if the queue has not enough
++ * budget for its first request, it has to go through two dispatch
++ * rounds to actually get it dispatched.
++ */
++static void bfq_updated_next_req(struct bfq_data *bfqd,
++				 struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
++	struct request *next_rq = bfqq->next_rq;
++	unsigned long new_budget;
++
++	if (next_rq == NULL)
++		return;
++
++	if (bfqq == bfqd->active_queue)
++		/*
++		 * In order not to break guarantees, budgets cannot be
++		 * changed after an entity has been selected.
++		 */
++		return;
++
++	BUG_ON(entity->tree != &st->active);
++	BUG_ON(entity == entity->sched_data->active_entity);
++
++	new_budget = max_t(unsigned long, bfqq->max_budget,
++			   bfq_serv_to_charge(next_rq, bfqq));
++	entity->budget = new_budget;
++	bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", new_budget);
++	bfq_activate_bfqq(bfqd, bfqq);
++}
++
++static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd)
++{
++	u64 dur;
++
++	if (bfqd->bfq_raising_max_time > 0)
++		return bfqd->bfq_raising_max_time;
++
++	dur = bfqd->RT_prod;
++	do_div(dur, bfqd->peak_rate);
++
++	return dur;
++}
++
++static void bfq_add_rq_rb(struct request *rq)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++	struct bfq_entity *entity = &bfqq->entity;
++	struct bfq_data *bfqd = bfqq->bfqd;
++	struct request *next_rq, *prev;
++	unsigned long old_raising_coeff = bfqq->raising_coeff;
++	int idle_for_long_time = bfqq->budget_timeout +
++		bfqd->bfq_raising_min_idle_time < jiffies;
++
++	bfq_log_bfqq(bfqd, bfqq, "add_rq_rb %d", rq_is_sync(rq));
++	bfqq->queued[rq_is_sync(rq)]++;
++	bfqd->queued++;
++
++	elv_rb_add(&bfqq->sort_list, rq);
++
++	/*
++	 * Check if this request is a better next-serve candidate.
++	 */
++	prev = bfqq->next_rq;
++	next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
++	BUG_ON(next_rq == NULL);
++	bfqq->next_rq = next_rq;
++
++	/*
++	 * Adjust priority tree position, if next_rq changes.
++	 */
++	if (prev != bfqq->next_rq)
++		bfq_rq_pos_tree_add(bfqd, bfqq);
++
++	if (!bfq_bfqq_busy(bfqq)) {
++		int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 &&
++			bfqq->soft_rt_next_start < jiffies;
++		entity->budget = max_t(unsigned long, bfqq->max_budget,
++				       bfq_serv_to_charge(next_rq, bfqq));
++
++		if (! bfqd->low_latency)
++			goto add_bfqq_busy;
++
++		/*
++		 * If the queue is not being boosted and has been idle
++		 * for enough time, start a weight-raising period
++		 */
++		if(old_raising_coeff == 1 && (idle_for_long_time || soft_rt)) {
++			bfqq->raising_coeff = bfqd->bfq_raising_coeff;
++			if (idle_for_long_time)
++				bfqq->raising_cur_max_time =
++					bfq_wrais_duration(bfqd);
++			else
++				bfqq->raising_cur_max_time =
++					bfqd->bfq_raising_rt_max_time;
++			bfq_log_bfqq(bfqd, bfqq,
++				     "wrais starting at %llu msec,"
++				     "rais_max_time %u",
++				     bfqq->last_rais_start_finish,
++				     jiffies_to_msecs(bfqq->
++					raising_cur_max_time));
++		} else if (old_raising_coeff > 1) {
++			if (idle_for_long_time)
++				bfqq->raising_cur_max_time =
++					bfq_wrais_duration(bfqd);
++			else if (bfqq->raising_cur_max_time ==
++				 bfqd->bfq_raising_rt_max_time &&
++				 !soft_rt) {
++				bfqq->raising_coeff = 1;
++				bfq_log_bfqq(bfqd, bfqq,
++					     "wrais ending at %llu msec,"
++					     "rais_max_time %u",
++					     bfqq->last_rais_start_finish,
++					     jiffies_to_msecs(bfqq->
++						raising_cur_max_time));
++				}
++		}
++		if (old_raising_coeff != bfqq->raising_coeff)
++			entity->ioprio_changed = 1;
++add_bfqq_busy:
++		bfq_add_bfqq_busy(bfqd, bfqq);
++        } else {
++                if(bfqd->low_latency && old_raising_coeff == 1 &&
++			!rq_is_sync(rq) &&
++			bfqq->last_rais_start_finish +
++                        bfqd->bfq_raising_min_inter_arr_async < jiffies) {
++                        bfqq->raising_coeff = bfqd->bfq_raising_coeff;
++			bfqq->raising_cur_max_time = bfq_wrais_duration(bfqd);
++
++			entity->ioprio_changed = 1;
++			bfq_log_bfqq(bfqd, bfqq,
++				     "non-idle wrais starting at %llu msec,"
++				     "rais_max_time %u",
++				     bfqq->last_rais_start_finish,
++				     jiffies_to_msecs(bfqq->
++					raising_cur_max_time));
++                }
++                bfq_updated_next_req(bfqd, bfqq);
++	}
++
++	if(bfqd->low_latency &&
++		(old_raising_coeff == 1 || bfqq->raising_coeff == 1 ||
++		 idle_for_long_time))
++		bfqq->last_rais_start_finish = jiffies;
++}
++
++static void bfq_reposition_rq_rb(struct bfq_queue *bfqq, struct request *rq)
++{
++	elv_rb_del(&bfqq->sort_list, rq);
++	bfqq->queued[rq_is_sync(rq)]--;
++	bfqq->bfqd->queued--;
++	bfq_add_rq_rb(rq);
++}
++
++static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd,
++					  struct bio *bio)
++{
++	struct task_struct *tsk = current;
++	struct bfq_io_cq *bic;
++	struct bfq_queue *bfqq;
++
++	bic = bfq_bic_lookup(bfqd, tsk->io_context);
++	if (bic == NULL)
++		return NULL;
++
++	bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
++	if (bfqq != NULL) {
++		sector_t sector = bio->bi_sector + bio_sectors(bio);
++
++		return elv_rb_find(&bfqq->sort_list, sector);
++	}
++
++	return NULL;
++}
++
++static void bfq_activate_request(struct request_queue *q, struct request *rq)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++
++	bfqd->rq_in_driver++;
++	bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
++	bfq_log(bfqd, "activate_request: new bfqd->last_position %llu",
++		(long long unsigned)bfqd->last_position);
++}
++
++static void bfq_deactivate_request(struct request_queue *q, struct request *rq)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++
++	WARN_ON(bfqd->rq_in_driver == 0);
++	bfqd->rq_in_driver--;
++}
++
++static void bfq_remove_request(struct request *rq)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++	struct bfq_data *bfqd = bfqq->bfqd;
++
++	if (bfqq->next_rq == rq) {
++		bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq);
++		bfq_updated_next_req(bfqd, bfqq);
++	}
++
++	list_del_init(&rq->queuelist);
++	bfq_del_rq_rb(rq);
++
++	if (rq->cmd_flags & REQ_META) {
++		WARN_ON(bfqq->meta_pending == 0);
++		bfqq->meta_pending--;
++	}
++}
++
++static int bfq_merge(struct request_queue *q, struct request **req,
++		     struct bio *bio)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct request *__rq;
++
++	__rq = bfq_find_rq_fmerge(bfqd, bio);
++	if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) {
++		*req = __rq;
++		return ELEVATOR_FRONT_MERGE;
++	}
++
++	return ELEVATOR_NO_MERGE;
++}
++
++static void bfq_merged_request(struct request_queue *q, struct request *req,
++			       int type)
++{
++	if (type == ELEVATOR_FRONT_MERGE) {
++		struct bfq_queue *bfqq = RQ_BFQQ(req);
++
++		bfq_reposition_rq_rb(bfqq, req);
++	}
++}
++
++static void bfq_merged_requests(struct request_queue *q, struct request *rq,
++				struct request *next)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++
++	/*
++	 * Reposition in fifo if next is older than rq.
++	 */
++	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
++	    time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
++		list_move(&rq->queuelist, &next->queuelist);
++		rq_set_fifo_time(rq, rq_fifo_time(next));
++	}
++
++	if (bfqq->next_rq == next)
++		bfqq->next_rq = rq;
++
++	bfq_remove_request(next);
++}
++
++static int bfq_allow_merge(struct request_queue *q, struct request *rq,
++			   struct bio *bio)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct bfq_io_cq *bic;
++	struct bfq_queue *bfqq;
++
++	/*
++	 * Disallow merge of a sync bio into an async request.
++	 */
++	if (bfq_bio_sync(bio) && !rq_is_sync(rq))
++		return 0;
++
++	/*
++	 * Lookup the bfqq that this bio will be queued with. Allow
++	 * merge only if rq is queued there.
++	 * Queue lock is held here.
++	 */
++	bic = bfq_bic_lookup(bfqd, current->io_context);
++	if (bic == NULL)
++		return 0;
++
++	bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
++	return bfqq == RQ_BFQQ(rq);
++}
++
++static void __bfq_set_active_queue(struct bfq_data *bfqd,
++				   struct bfq_queue *bfqq)
++{
++	if (bfqq != NULL) {
++		bfq_mark_bfqq_must_alloc(bfqq);
++		bfq_mark_bfqq_budget_new(bfqq);
++		bfq_clear_bfqq_fifo_expire(bfqq);
++
++		bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
++
++		bfq_log_bfqq(bfqd, bfqq, "set_active_queue, cur-budget = %lu",
++			     bfqq->entity.budget);
++	}
++
++	bfqd->active_queue = bfqq;
++}
++
++/*
++ * Get and set a new active queue for service.
++ */
++static struct bfq_queue *bfq_set_active_queue(struct bfq_data *bfqd,
++					      struct bfq_queue *bfqq)
++{
++	if (!bfqq)
++		bfqq = bfq_get_next_queue(bfqd);
++	else
++		bfq_get_next_queue_forced(bfqd, bfqq);
++
++	__bfq_set_active_queue(bfqd, bfqq);
++	return bfqq;
++}
++
++static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd,
++					  struct request *rq)
++{
++	if (blk_rq_pos(rq) >= bfqd->last_position)
++		return blk_rq_pos(rq) - bfqd->last_position;
++	else
++		return bfqd->last_position - blk_rq_pos(rq);
++}
++
++/*
++ * Return true if bfqq has no request pending and rq is close enough to
++ * bfqd->last_position, or if rq is closer to bfqd->last_position than
++ * bfqq->next_rq
++ */
++static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq)
++{
++	return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR;
++}
++
++static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
++{
++	struct rb_root *root = &bfqd->rq_pos_tree;
++	struct rb_node *parent, *node;
++	struct bfq_queue *__bfqq;
++	sector_t sector = bfqd->last_position;
++
++	if (RB_EMPTY_ROOT(root))
++		return NULL;
++
++	/*
++	 * First, if we find a request starting at the end of the last
++	 * request, choose it.
++	 */
++	__bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL);
++	if (__bfqq != NULL)
++		return __bfqq;
++
++	/*
++	 * If the exact sector wasn't found, the parent of the NULL leaf
++	 * will contain the closest sector (rq_pos_tree sorted by next_request
++	 * position).
++	 */
++	__bfqq = rb_entry(parent, struct bfq_queue, pos_node);
++	if (bfq_rq_close(bfqd, __bfqq->next_rq))
++		return __bfqq;
++
++	if (blk_rq_pos(__bfqq->next_rq) < sector)
++		node = rb_next(&__bfqq->pos_node);
++	else
++		node = rb_prev(&__bfqq->pos_node);
++	if (node == NULL)
++		return NULL;
++
++	__bfqq = rb_entry(node, struct bfq_queue, pos_node);
++	if (bfq_rq_close(bfqd, __bfqq->next_rq))
++		return __bfqq;
++
++	return NULL;
++}
++
++/*
++ * bfqd - obvious
++ * cur_bfqq - passed in so that we don't decide that the current queue
++ *            is closely cooperating with itself.
++ *
++ * We are assuming that cur_bfqq has dispatched at least one request,
++ * and that bfqd->last_position reflects a position on the disk associated
++ * with the I/O issued by cur_bfqq.
++ */
++static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,
++					      struct bfq_queue *cur_bfqq)
++{
++	struct bfq_queue *bfqq;
++
++	if (bfq_class_idle(cur_bfqq))
++		return NULL;
++	if (!bfq_bfqq_sync(cur_bfqq))
++		return NULL;
++	if (BFQQ_SEEKY(cur_bfqq))
++		return NULL;
++
++	/* If device has only one backlogged bfq_queue, don't search. */
++	if (bfqd->busy_queues == 1)
++		return NULL;
++
++	/*
++	 * We should notice if some of the queues are cooperating, e.g.
++	 * working closely on the same area of the disk. In that case,
++	 * we can group them together and don't waste time idling.
++	 */
++	bfqq = bfqq_close(bfqd);
++	if (bfqq == NULL || bfqq == cur_bfqq)
++		return NULL;
++
++	/*
++	 * Do not merge queues from different bfq_groups.
++	*/
++	if (bfqq->entity.parent != cur_bfqq->entity.parent)
++		return NULL;
++
++	/*
++	 * It only makes sense to merge sync queues.
++	 */
++	if (!bfq_bfqq_sync(bfqq))
++		return NULL;
++	if (BFQQ_SEEKY(bfqq))
++		return NULL;
++
++	/*
++	 * Do not merge queues of different priority classes.
++	 */
++	if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq))
++		return NULL;
++
++	return bfqq;
++}
++
++/*
++ * If enough samples have been computed, return the current max budget
++ * stored in bfqd, which is dynamically updated according to the
++ * estimated disk peak rate; otherwise return the default max budget
++ */
++static inline unsigned long bfq_max_budget(struct bfq_data *bfqd)
++{
++	if (bfqd->budgets_assigned < 194)
++		return bfq_default_max_budget;
++	else
++		return bfqd->bfq_max_budget;
++}
++
++/*
++ * Return min budget, which is a fraction of the current or default
++ * max budget (trying with 1/32)
++ */
++static inline unsigned long bfq_min_budget(struct bfq_data *bfqd)
++{
++	if (bfqd->budgets_assigned < 194)
++		return bfq_default_max_budget;
++	else
++		return bfqd->bfq_max_budget / 32;
++}
++
++/*
++ * Decides whether idling should be done for given device and
++ * given active queue.
++ */
++static inline bool bfq_queue_nonrot_noidle(struct bfq_data *bfqd,
++					   struct bfq_queue *active_bfqq)
++{
++	if (active_bfqq == NULL)
++		return false;
++	/*
++	 * If device is SSD it has no seek penalty, disable idling; but
++	 * do so only if:
++	 * - device does not support queuing, otherwise we still have
++	 *   a problem with sync vs async workloads;
++	 * - the queue is not weight-raised, to preserve guarantees.
++	 */
++	return (blk_queue_nonrot(bfqd->queue) && bfqd->hw_tag &&
++		active_bfqq->raising_coeff == 1);
++}
++
++static void bfq_arm_slice_timer(struct bfq_data *bfqd)
++{
++	struct bfq_queue *bfqq = bfqd->active_queue;
++	struct bfq_io_cq *bic;
++	unsigned long sl;
++
++	WARN_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
++
++	if (bfq_queue_nonrot_noidle(bfqd, bfqq))
++		return;
++
++	/* Idling is disabled, either manually or by past process history. */
++	if (bfqd->bfq_slice_idle == 0 || !bfq_bfqq_idle_window(bfqq))
++		return;
++
++	/* Tasks have exited, don't wait. */
++	bic = bfqd->active_bic;
++	if (bic == NULL || atomic_read(&bic->icq.ioc->nr_tasks) == 0)
++		return;
++
++	bfq_mark_bfqq_wait_request(bfqq);
++
++	/*
++	 * We don't want to idle for seeks, but we do want to allow
++	 * fair distribution of slice time for a process doing back-to-back
++	 * seeks. So allow a little bit of time for him to submit a new rq.
++	 *
++	 * To prevent processes with (partly) seeky workloads from
++	 * being too ill-treated, grant them a small fraction of the
++	 * assigned budget before reducing the waiting time to
++	 * BFQ_MIN_TT. This happened to help reduce latency.
++	 */
++	sl = bfqd->bfq_slice_idle;
++	if (bfq_sample_valid(bfqq->seek_samples) && BFQQ_SEEKY(bfqq) &&
++	    bfqq->entity.service > bfq_max_budget(bfqd) / 8 &&
++	    bfqq->raising_coeff == 1)
++		sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));
++	else if (bfqq->raising_coeff > 1)
++		sl = sl * 3;
++	bfqd->last_idling_start = ktime_get();
++	mod_timer(&bfqd->idle_slice_timer, jiffies + sl);
++	bfq_log(bfqd, "arm idle: %u/%u ms",
++		jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));
++}
++
++/*
++ * Set the maximum time for the active queue to consume its
++ * budget. This prevents seeky processes from lowering the disk
++ * throughput (always guaranteed with a time slice scheme as in CFQ).
++ */
++static void bfq_set_budget_timeout(struct bfq_data *bfqd)
++{
++	struct bfq_queue *bfqq = bfqd->active_queue;
++	unsigned int timeout_coeff;
++	if (bfqq->raising_cur_max_time == bfqd->bfq_raising_rt_max_time)
++		timeout_coeff = 1;
++	else
++		timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
++
++	bfqd->last_budget_start = ktime_get();
++
++	bfq_clear_bfqq_budget_new(bfqq);
++	bfqq->budget_timeout = jiffies +
++		bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff;
++
++	bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u",
++		jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] *
++		timeout_coeff));
++}
++
++/*
++ * Move request from internal lists to the request queue dispatch list.
++ */
++static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++
++	bfq_remove_request(rq);
++	bfqq->dispatched++;
++	elv_dispatch_sort(q, rq);
++
++	if (bfq_bfqq_sync(bfqq))
++		bfqd->sync_flight++;
++}
++
++/*
++ * Return expired entry, or NULL to just start from scratch in rbtree.
++ */
++static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
++{
++	struct request *rq = NULL;
++
++	if (bfq_bfqq_fifo_expire(bfqq))
++		return NULL;
++
++	bfq_mark_bfqq_fifo_expire(bfqq);
++
++	if (list_empty(&bfqq->fifo))
++		return NULL;
++
++	rq = rq_entry_fifo(bfqq->fifo.next);
++
++	if (time_before(jiffies, rq_fifo_time(rq)))
++		return NULL;
++
++	return rq;
++}
++
++/*
++ * Must be called with the queue_lock held.
++ */
++static int bfqq_process_refs(struct bfq_queue *bfqq)
++{
++	int process_refs, io_refs;
++
++	io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
++	process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
++	BUG_ON(process_refs < 0);
++	return process_refs;
++}
++
++static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
++{
++	int process_refs, new_process_refs;
++	struct bfq_queue *__bfqq;
++
++	/*
++	 * If there are no process references on the new_bfqq, then it is
++	 * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
++	 * may have dropped their last reference (not just their last process
++	 * reference).
++	 */
++	if (!bfqq_process_refs(new_bfqq))
++		return;
++
++	/* Avoid a circular list and skip interim queue merges. */
++	while ((__bfqq = new_bfqq->new_bfqq)) {
++		if (__bfqq == bfqq)
++			return;
++		new_bfqq = __bfqq;
++	}
++
++	process_refs = bfqq_process_refs(bfqq);
++	new_process_refs = bfqq_process_refs(new_bfqq);
++	/*
++	 * If the process for the bfqq has gone away, there is no
++	 * sense in merging the queues.
++	 */
++	if (process_refs == 0 || new_process_refs == 0)
++		return;
++
++	/*
++	 * Merge in the direction of the lesser amount of work.
++	 */
++	if (new_process_refs >= process_refs) {
++		bfqq->new_bfqq = new_bfqq;
++		atomic_add(process_refs, &new_bfqq->ref);
++	} else {
++		new_bfqq->new_bfqq = bfqq;
++		atomic_add(new_process_refs, &bfqq->ref);
++	}
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
++		new_bfqq->pid);
++}
++
++static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++	return entity->budget - entity->service;
++}
++
++static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	BUG_ON(bfqq != bfqd->active_queue);
++
++	__bfq_bfqd_reset_active(bfqd);
++
++	if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
++		bfq_del_bfqq_busy(bfqd, bfqq, 1);
++		/*
++		 * overloading budget_timeout field to store when
++		 * the queue remains with no backlog, used by
++		 * the weight-raising mechanism
++		 */
++		bfqq->budget_timeout = jiffies ;
++	} else {
++		bfq_activate_bfqq(bfqd, bfqq);
++		/*
++		 * Resort priority tree of potential close cooperators.
++		 */
++		bfq_rq_pos_tree_add(bfqd, bfqq);
++	}
++
++	/*
++	 * If this bfqq is shared between multiple processes, check
++	 * to make sure that those processes are still issuing I/Os
++	 * within the mean seek distance. If not, it may be time to
++	 * break the queues apart again.
++	 */
++	if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq))
++		bfq_mark_bfqq_split_coop(bfqq);
++}
++
++/**
++ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior.
++ * @bfqd: device data.
++ * @bfqq: queue to update.
++ * @reason: reason for expiration.
++ *
++ * Handle the feedback on @bfqq budget.  See the body for detailed
++ * comments.
++ */
++static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
++				     struct bfq_queue *bfqq,
++				     enum bfqq_expiration reason)
++{
++	struct request *next_rq;
++	unsigned long budget, min_budget;
++
++	budget = bfqq->max_budget;
++	min_budget = bfq_min_budget(bfqd);
++
++	BUG_ON(bfqq != bfqd->active_queue);
++
++	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu",
++		bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
++	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu",
++		budget, bfq_min_budget(bfqd));
++	bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",
++		bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->active_queue));
++
++	if (bfq_bfqq_sync(bfqq)) {
++		switch (reason) {
++		/*
++		 * Caveat: in all the following cases we trade latency
++		 * for throughput.
++		 */
++		case BFQ_BFQQ_TOO_IDLE:
++			/*
++			 * This is the only case where we may reduce
++			 * the budget: if there is no requets of the
++			 * process still waiting for completion, then
++			 * we assume (tentatively) that the timer has
++			 * expired because the batch of requests of
++			 * the process could have been served with a
++			 * smaller budget.  Hence, betting that
++			 * process will behave in the same way when it
++			 * becomes backlogged again, we reduce its
++			 * next budget.  As long as we guess right,
++			 * this budget cut reduces the latency
++			 * experienced by the process.
++			 *
++			 * However, if there are still outstanding
++			 * requests, then the process may have not yet
++			 * issued its next request just because it is
++			 * still waiting for the completion of some of
++			 * the still oustanding ones.  So in this
++			 * subcase we do not reduce its budget, on the
++			 * contrary we increase it to possibly boost
++			 * the throughput, as discussed in the
++			 * comments to the BUDGET_TIMEOUT case.
++			 */
++			if (bfqq->dispatched > 0) /* still oustanding reqs */
++				budget = min(budget * 2, bfqd->bfq_max_budget);
++			else {
++				if (budget > 5 * min_budget)
++					budget -= 4 * min_budget;
++				else
++					budget = min_budget;
++			}
++			break;
++		case BFQ_BFQQ_BUDGET_TIMEOUT:
++			/*
++			 * We double the budget here because: 1) it
++			 * gives the chance to boost the throughput if
++			 * this is not a seeky process (which may have
++			 * bumped into this timeout because of, e.g.,
++			 * ZBR), 2) together with charge_full_budget
++			 * it helps give seeky processes higher
++			 * timestamps, and hence be served less
++			 * frequently.
++			 */
++			budget = min(budget * 2, bfqd->bfq_max_budget);
++			break;
++		case BFQ_BFQQ_BUDGET_EXHAUSTED:
++			/*
++			 * The process still has backlog, and did not
++			 * let either the budget timeout or the disk
++			 * idling timeout expire. Hence it is not
++			 * seeky, has a short thinktime and may be
++			 * happy with a higher budget too. So
++			 * definitely increase the budget of this good
++			 * candidate to boost the disk throughput.
++			 */
++			budget = min(budget * 4, bfqd->bfq_max_budget);
++			break;
++		case BFQ_BFQQ_NO_MORE_REQUESTS:
++		       /*
++			* Leave the budget unchanged.
++			*/
++		default:
++			return;
++		}
++	} else /* async queue */
++	    /* async queues get always the maximum possible budget
++	     * (their ability to dispatch is limited by
++	     * @bfqd->bfq_max_budget_async_rq).
++	     */
++		budget = bfqd->bfq_max_budget;
++
++	bfqq->max_budget = budget;
++
++	if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 &&
++	    bfqq->max_budget > bfqd->bfq_max_budget)
++		bfqq->max_budget = bfqd->bfq_max_budget;
++
++	/*
++	 * Make sure that we have enough budget for the next request.
++	 * Since the finish time of the bfqq must be kept in sync with
++	 * the budget, be sure to call __bfq_bfqq_expire() after the
++	 * update.
++	 */
++	next_rq = bfqq->next_rq;
++	if (next_rq != NULL)
++		bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget,
++					    bfq_serv_to_charge(next_rq, bfqq));
++	else
++		bfqq->entity.budget = bfqq->max_budget;
++
++	bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu",
++			next_rq != NULL ? blk_rq_sectors(next_rq) : 0,
++			bfqq->entity.budget);
++}
++
++static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout)
++{
++	unsigned long max_budget;
++
++	/*
++	 * The max_budget calculated when autotuning is equal to the
++	 * amount of sectors transfered in timeout_sync at the
++	 * estimated peak rate.
++	 */
++	max_budget = (unsigned long)(peak_rate * 1000 *
++				     timeout >> BFQ_RATE_SHIFT);
++
++	return max_budget;
++}
++
++/*
++ * In addition to updating the peak rate, checks whether the process
++ * is "slow", and returns 1 if so. This slow flag is used, in addition
++ * to the budget timeout, to reduce the amount of service provided to
++ * seeky processes, and hence reduce their chances to lower the
++ * throughput. See the code for more details.
++ */
++static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++				int compensate, enum bfqq_expiration reason)
++{
++	u64 bw, usecs, expected, timeout;
++	ktime_t delta;
++	int update = 0;
++
++	if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq))
++		return 0;
++
++	if (compensate)
++		delta = bfqd->last_idling_start;
++	else
++		delta = ktime_get();
++	delta = ktime_sub(delta, bfqd->last_budget_start);
++	usecs = ktime_to_us(delta);
++
++	/* Don't trust short/unrealistic values. */
++	if (usecs < 100 || usecs >= LONG_MAX)
++		return 0;
++
++	/*
++	 * Calculate the bandwidth for the last slice.  We use a 64 bit
++	 * value to store the peak rate, in sectors per usec in fixed
++	 * point math.  We do so to have enough precision in the estimate
++	 * and to avoid overflows.
++	 */
++	bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT;
++	do_div(bw, (unsigned long)usecs);
++
++	timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
++
++	/*
++	 * Use only long (> 20ms) intervals to filter out spikes for
++	 * the peak rate estimation.
++	 */
++	if (usecs > 20000) {
++		if (bw > bfqd->peak_rate ||
++		   (!BFQQ_SEEKY(bfqq) &&
++		    reason == BFQ_BFQQ_BUDGET_TIMEOUT)) {
++			bfq_log(bfqd, "measured bw =%llu", bw);
++			/*
++			 * To smooth oscillations use a low-pass filter with
++			 * alpha=7/8, i.e.,
++			 * new_rate = (7/8) * old_rate + (1/8) * bw
++			 */
++			do_div(bw, 8);
++			bfqd->peak_rate *= 7;
++			do_div(bfqd->peak_rate, 8);
++			bfqd->peak_rate += bw;
++			update = 1;
++			bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate);
++		}
++
++		update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1;
++
++		if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES)
++			bfqd->peak_rate_samples++;
++
++		if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES &&
++		    update && bfqd->bfq_user_max_budget == 0) {
++			bfqd->bfq_max_budget =
++				bfq_calc_max_budget(bfqd->peak_rate, timeout);
++			bfq_log(bfqd, "new max_budget=%lu",
++				bfqd->bfq_max_budget);
++		}
++	}
++
++	/*
++	 * If the process has been served for a too short time
++	 * interval to let its possible sequential accesses prevail on
++	 * the initial seek time needed to move the disk head on the
++	 * first sector it requested, then give the process a chance
++	 * and for the moment return false.
++	 */
++	if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8)
++		return 0;
++
++	/*
++	 * A process is considered ``slow'' (i.e., seeky, so that we
++	 * cannot treat it fairly in the service domain, as it would
++	 * slow down too much the other processes) if, when a slice
++	 * ends for whatever reason, it has received service at a
++	 * rate that would not be high enough to complete the budget
++	 * before the budget timeout expiration.
++	 */
++	expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT;
++
++	/*
++	 * Caveat: processes doing IO in the slower disk zones will
++	 * tend to be slow(er) even if not seeky. And the estimated
++	 * peak rate will actually be an average over the disk
++	 * surface. Hence, to not be too harsh with unlucky processes,
++	 * we keep a budget/3 margin of safety before declaring a
++	 * process slow.
++	 */
++	return expected > (4 * bfqq->entity.budget) / 3;
++}
++
++/**
++ * bfq_bfqq_expire - expire a queue.
++ * @bfqd: device owning the queue.
++ * @bfqq: the queue to expire.
++ * @compensate: if true, compensate for the time spent idling.
++ * @reason: the reason causing the expiration.
++ *
++ *
++ * If the process associated to the queue is slow (i.e., seeky), or in
++ * case of budget timeout, or, finally, if it is async, we
++ * artificially charge it an entire budget (independently of the
++ * actual service it received). As a consequence, the queue will get
++ * higher timestamps than the correct ones upon reactivation, and
++ * hence it will be rescheduled as if it had received more service
++ * than what it actually received. In the end, this class of processes
++ * will receive less service in proportion to how slowly they consume
++ * their budgets (and hence how seriously they tend to lower the
++ * throughput).
++ *
++ * In contrast, when a queue expires because it has been idling for
++ * too much or because it exhausted its budget, we do not touch the
++ * amount of service it has received. Hence when the queue will be
++ * reactivated and its timestamps updated, the latter will be in sync
++ * with the actual service received by the queue until expiration.
++ *
++ * Charging a full budget to the first type of queues and the exact
++ * service to the others has the effect of using the WF2Q+ policy to
++ * schedule the former on a timeslice basis, without violating the
++ * service domain guarantees of the latter.
++ */
++static void bfq_bfqq_expire(struct bfq_data *bfqd,
++			    struct bfq_queue *bfqq,
++			    int compensate,
++			    enum bfqq_expiration reason)
++{
++	int slow;
++	BUG_ON(bfqq != bfqd->active_queue);
++
++	/* Update disk peak rate for autotuning and check whether the
++	 * process is slow (see bfq_update_peak_rate).
++	 */
++	slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason);
++
++	/*
++	 * As above explained, 'punish' slow (i.e., seeky), timed-out
++	 * and async queues, to favor sequential sync workloads.
++	 *
++	 * Processes doing IO in the slower disk zones will tend to be
++	 * slow(er) even if not seeky. Hence, since the estimated peak
++	 * rate is actually an average over the disk surface, these
++	 * processes may timeout just for bad luck. To avoid punishing
++	 * them we do not charge a full budget to a process that
++	 * succeeded in consuming at least 2/3 of its budget.
++	 */
++	if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT &&
++		     bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3))
++		bfq_bfqq_charge_full_budget(bfqq);
++
++	if (bfqd->low_latency && bfqq->raising_coeff == 1)
++		bfqq->last_rais_start_finish = jiffies;
++
++	if (bfqd->low_latency && bfqd->bfq_raising_max_softrt_rate > 0) {
++	    if(reason != BFQ_BFQQ_BUDGET_TIMEOUT)
++		bfqq->soft_rt_next_start =
++			jiffies +
++			HZ * bfqq->entity.service /
++			bfqd->bfq_raising_max_softrt_rate;
++		else
++			bfqq->soft_rt_next_start = -1; /* infinity */
++	}
++	bfq_log_bfqq(bfqd, bfqq,
++		"expire (%d, slow %d, num_disp %d, idle_win %d)", reason, slow,
++		bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
++
++	/* Increase, decrease or leave budget unchanged according to reason */
++	__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
++	__bfq_bfqq_expire(bfqd, bfqq);
++}
++
++/*
++ * Budget timeout is not implemented through a dedicated timer, but
++ * just checked on request arrivals and completions, as well as on
++ * idle timer expirations.
++ */
++static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
++{
++	if (bfq_bfqq_budget_new(bfqq))
++		return 0;
++
++	if (time_before(jiffies, bfqq->budget_timeout))
++		return 0;
++
++	return 1;
++}
++
++/*
++ * If we expire a queue that is waiting for the arrival of a new
++ * request, we may prevent the fictitious timestamp backshifting that
++ * allows the guarantees of the queue to be preserved (see [1] for
++ * this tricky aspect). Hence we return true only if this condition
++ * does not hold, or if the queue is slow enough to deserve only to be
++ * kicked off for preserving a high throughput.
++*/
++static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
++{
++	bfq_log_bfqq(bfqq->bfqd, bfqq,
++		"may_budget_timeout: wr %d left %d timeout %d",
++		bfq_bfqq_wait_request(bfqq),
++			bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3,
++		bfq_bfqq_budget_timeout(bfqq));
++
++	return (!bfq_bfqq_wait_request(bfqq) ||
++		bfq_bfqq_budget_left(bfqq) >=  bfqq->entity.budget / 3)
++		&&
++		bfq_bfqq_budget_timeout(bfqq);
++}
++
++/*
++ * Select a queue for service.  If we have a current active queue,
++ * check whether to continue servicing it, or retrieve and set a new one.
++ */
++static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
++{
++	struct bfq_queue *bfqq, *new_bfqq = NULL;
++	struct request *next_rq;
++	enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;
++
++	bfqq = bfqd->active_queue;
++	if (bfqq == NULL)
++		goto new_queue;
++
++	bfq_log_bfqq(bfqd, bfqq, "select_queue: already active queue");
++
++	/*
++         * If another queue has a request waiting within our mean seek
++         * distance, let it run. The expire code will check for close
++         * cooperators and put the close queue at the front of the
++         * service tree. If possible, merge the expiring queue with the
++         * new bfqq.
++         */
++        new_bfqq = bfq_close_cooperator(bfqd, bfqq);
++        if (new_bfqq != NULL && bfqq->new_bfqq == NULL)
++                bfq_setup_merge(bfqq, new_bfqq);
++
++	if (bfq_may_expire_for_budg_timeout(bfqq))
++		goto expire;
++
++	next_rq = bfqq->next_rq;
++	/*
++	 * If bfqq has requests queued and it has enough budget left to
++	 * serve them, keep the queue, otherwise expire it.
++	 */
++	if (next_rq != NULL) {
++		if (bfq_serv_to_charge(next_rq, bfqq) >
++			bfq_bfqq_budget_left(bfqq)) {
++			reason = BFQ_BFQQ_BUDGET_EXHAUSTED;
++			goto expire;
++		} else {
++			/*
++			 * The idle timer may be pending because we may not
++			 * disable disk idling even when a new request arrives
++			 */
++			if (timer_pending(&bfqd->idle_slice_timer)) {
++				/*
++				 * If we get here: 1) at least a new request
++				 * has arrived but we have not disabled the
++				 * timer because the request was too small,
++				 * 2) then the block layer has unplugged the
++				 * device, causing the dispatch to be invoked.
++				 *
++				 * Since the device is unplugged, now the
++				 * requests are probably large enough to
++				 * provide a reasonable throughput.
++				 * So we disable idling.
++				 */
++				bfq_clear_bfqq_wait_request(bfqq);
++				del_timer(&bfqd->idle_slice_timer);
++			}
++			if (new_bfqq == NULL)
++				goto keep_queue;
++			else
++				goto expire;
++		}
++	}
++
++	/*
++	 * No requests pending.  If there is no cooperator, and the active
++	 * queue still has requests in flight or is idling for a new request,
++	 * then keep it.
++	 */
++	if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) ||
++		(bfqq->dispatched != 0 && bfq_bfqq_idle_window(bfqq) &&
++		 !bfq_queue_nonrot_noidle(bfqd, bfqq)))) {
++		bfqq = NULL;
++		goto keep_queue;
++	} else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) {
++		/*
++		 * Expiring the queue because there is a close cooperator,
++		 * cancel timer.
++		 */
++		bfq_clear_bfqq_wait_request(bfqq);
++		del_timer(&bfqd->idle_slice_timer);
++	}
++
++	reason = BFQ_BFQQ_NO_MORE_REQUESTS;
++expire:
++	bfq_bfqq_expire(bfqd, bfqq, 0, reason);
++new_queue:
++	bfqq = bfq_set_active_queue(bfqd, new_bfqq);
++	bfq_log(bfqd, "select_queue: new queue %d returned",
++		bfqq != NULL ? bfqq->pid : 0);
++keep_queue:
++	return bfqq;
++}
++
++static void update_raising_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	if (bfqq->raising_coeff > 1) { /* queue is being boosted */
++		struct bfq_entity *entity = &bfqq->entity;
++
++		bfq_log_bfqq(bfqd, bfqq,
++			"raising period dur %u/%u msec, "
++			"old raising coeff %u, w %d(%d)",
++			jiffies_to_msecs(jiffies -
++				bfqq->last_rais_start_finish),
++			jiffies_to_msecs(bfqq->raising_cur_max_time),
++			bfqq->raising_coeff,
++			bfqq->entity.weight, bfqq->entity.orig_weight);
++
++		BUG_ON(bfqq != bfqd->active_queue && entity->weight !=
++			entity->orig_weight * bfqq->raising_coeff);
++		if(entity->ioprio_changed)
++			bfq_log_bfqq(bfqd, bfqq,
++			"WARN: pending prio change");
++		/*
++		 * If too much time has elapsed from the beginning
++		 * of this weight-raising period and process is not soft
++		 * real-time, stop it
++		 */
++		if (jiffies - bfqq->last_rais_start_finish >
++			bfqq->raising_cur_max_time) {
++			int soft_rt = bfqd->bfq_raising_max_softrt_rate > 0 &&
++				bfqq->soft_rt_next_start < jiffies;
++
++			bfqq->last_rais_start_finish = jiffies;
++			if (soft_rt)
++				bfqq->raising_cur_max_time =
++					bfqd->bfq_raising_rt_max_time;
++			else {
++				bfq_log_bfqq(bfqd, bfqq,
++					     "wrais ending at %llu msec,"
++					     "rais_max_time %u",
++					     bfqq->last_rais_start_finish,
++					     jiffies_to_msecs(bfqq->
++						raising_cur_max_time));
++				bfqq->raising_coeff = 1;
++				entity->ioprio_changed = 1;
++				__bfq_entity_update_weight_prio(
++					bfq_entity_service_tree(entity),
++					entity);
++			}
++		}
++	}
++}
++
++
++/*
++ * Dispatch one request from bfqq, moving it to the request queue
++ * dispatch list.
++ */
++static int bfq_dispatch_request(struct bfq_data *bfqd,
++				struct bfq_queue *bfqq)
++{
++	int dispatched = 0;
++	struct request *rq;
++	unsigned long service_to_charge;
++
++	BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list));
++
++	/* Follow expired path, else get first next available. */
++	rq = bfq_check_fifo(bfqq);
++	if (rq == NULL)
++		rq = bfqq->next_rq;
++	service_to_charge = bfq_serv_to_charge(rq, bfqq);
++
++	if (service_to_charge > bfq_bfqq_budget_left(bfqq)) {
++		/*
++		 * This may happen if the next rq is chosen
++		 * in fifo order instead of sector order.
++		 * The budget is properly dimensioned
++		 * to be always sufficient to serve the next request
++		 * only if it is chosen in sector order. The reason is
++		 * that it would be quite inefficient and little useful
++		 * to always make sure that the budget is large enough
++		 * to serve even the possible next rq in fifo order.
++		 * In fact, requests are seldom served in fifo order.
++		 *
++		 * Expire the queue for budget exhaustion, and
++		 * make sure that the next act_budget is enough
++		 * to serve the next request, even if it comes
++		 * from the fifo expired path.
++		 */
++		bfqq->next_rq = rq;
++		/*
++		 * Since this dispatch is failed, make sure that
++		 * a new one will be performed
++		 */
++		if (!bfqd->rq_in_driver)
++			bfq_schedule_dispatch(bfqd);
++		goto expire;
++	}
++
++	/* Finally, insert request into driver dispatch list. */
++	bfq_bfqq_served(bfqq, service_to_charge);
++	bfq_dispatch_insert(bfqd->queue, rq);
++
++	update_raising_data(bfqd, bfqq);
++
++	bfq_log_bfqq(bfqd, bfqq, "dispatched %u sec req (%llu), "
++			"budg left %lu",
++			blk_rq_sectors(rq),
++			(long long unsigned)blk_rq_pos(rq),
++			bfq_bfqq_budget_left(bfqq));
++
++	dispatched++;
++
++	if (bfqd->active_bic == NULL) {
++		atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount);
++		bfqd->active_bic = RQ_BIC(rq);
++	}
++
++	if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) &&
++	    dispatched >= bfqd->bfq_max_budget_async_rq) ||
++	    bfq_class_idle(bfqq)))
++		goto expire;
++
++	return dispatched;
++
++expire:
++	bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED);
++	return dispatched;
++}
++
++static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq)
++{
++	int dispatched = 0;
++
++	while (bfqq->next_rq != NULL) {
++		bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq);
++		dispatched++;
++	}
++
++	BUG_ON(!list_empty(&bfqq->fifo));
++	return dispatched;
++}
++
++/*
++ * Drain our current requests.  Used for barriers and when switching
++ * io schedulers on-the-fly.
++ */
++static int bfq_forced_dispatch(struct bfq_data *bfqd)
++{
++	struct bfq_queue *bfqq, *n;
++	struct bfq_service_tree *st;
++	int dispatched = 0;
++
++	bfqq = bfqd->active_queue;
++	if (bfqq != NULL)
++		__bfq_bfqq_expire(bfqd, bfqq);
++
++	/*
++	 * Loop through classes, and be careful to leave the scheduler
++	 * in a consistent state, as feedback mechanisms and vtime
++	 * updates cannot be disabled during the process.
++	 */
++	list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) {
++		st = bfq_entity_service_tree(&bfqq->entity);
++
++		dispatched += __bfq_forced_dispatch_bfqq(bfqq);
++		bfqq->max_budget = bfq_max_budget(bfqd);
++
++		bfq_forget_idle(st);
++	}
++
++	BUG_ON(bfqd->busy_queues != 0);
++
++	return dispatched;
++}
++
++static int bfq_dispatch_requests(struct request_queue *q, int force)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct bfq_queue *bfqq;
++	int max_dispatch;
++
++	bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues);
++	if (bfqd->busy_queues == 0)
++		return 0;
++
++	if (unlikely(force))
++		return bfq_forced_dispatch(bfqd);
++
++	if((bfqq = bfq_select_queue(bfqd)) == NULL)
++		return 0;
++
++	max_dispatch = bfqd->bfq_quantum;
++	if (bfq_class_idle(bfqq))
++		max_dispatch = 1;
++
++	if (!bfq_bfqq_sync(bfqq))
++		max_dispatch = bfqd->bfq_max_budget_async_rq;
++
++	if (bfqq->dispatched >= max_dispatch) {
++		if (bfqd->busy_queues > 1)
++			return 0;
++		if (bfqq->dispatched >= 4 * max_dispatch)
++			return 0;
++	}
++
++	if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq))
++		return 0;
++
++	bfq_clear_bfqq_wait_request(bfqq);
++	BUG_ON(timer_pending(&bfqd->idle_slice_timer));
++
++	if (! bfq_dispatch_request(bfqd, bfqq))
++		return 0;
++
++	bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d"
++		     "(max_disp %d)", bfqq->pid, max_dispatch);
++
++	return 1;
++}
++
++/*
++ * Task holds one reference to the queue, dropped when task exits.  Each rq
++ * in-flight on this queue also holds a reference, dropped when rq is freed.
++ *
++ * Queue lock must be held here.
++ */
++static void bfq_put_queue(struct bfq_queue *bfqq)
++{
++	struct bfq_data *bfqd = bfqq->bfqd;
++
++	BUG_ON(atomic_read(&bfqq->ref) <= 0);
++
++	bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq,
++		     atomic_read(&bfqq->ref));
++	if (!atomic_dec_and_test(&bfqq->ref))
++		return;
++
++	BUG_ON(rb_first(&bfqq->sort_list) != NULL);
++	BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0);
++	BUG_ON(bfqq->entity.tree != NULL);
++	BUG_ON(bfq_bfqq_busy(bfqq));
++	BUG_ON(bfqd->active_queue == bfqq);
++
++	bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq);
++
++	kmem_cache_free(bfq_pool, bfqq);
++}
++
++static void bfq_put_cooperator(struct bfq_queue *bfqq)
++{
++	struct bfq_queue *__bfqq, *next;
++
++	/*
++	 * If this queue was scheduled to merge with another queue, be
++	 * sure to drop the reference taken on that queue (and others in
++	 * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs.
++	 */
++	__bfqq = bfqq->new_bfqq;
++	while (__bfqq) {
++		if (__bfqq == bfqq) {
++			WARN(1, "bfqq->new_bfqq loop detected.\n");
++			break;
++		}
++		next = __bfqq->new_bfqq;
++		bfq_put_queue(__bfqq);
++		__bfqq = next;
++	}
++}
++
++static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	if (bfqq == bfqd->active_queue) {
++		__bfq_bfqq_expire(bfqd, bfqq);
++		bfq_schedule_dispatch(bfqd);
++	}
++
++	bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq,
++		     atomic_read(&bfqq->ref));
++
++	bfq_put_cooperator(bfqq);
++
++	bfq_put_queue(bfqq);
++}
++
++static void bfq_init_icq(struct io_cq *icq)
++{
++	struct bfq_io_cq *bic = icq_to_bic(icq);
++
++	bic->ttime.last_end_request = jiffies;
++}
++
++static void bfq_exit_icq(struct io_cq *icq)
++{
++	struct bfq_io_cq *bic = icq_to_bic(icq);
++	struct bfq_data *bfqd = bic_to_bfqd(bic);
++
++	if (bic->bfqq[BLK_RW_ASYNC]) {
++		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]);
++		bic->bfqq[BLK_RW_ASYNC] = NULL;
++	}
++
++	if (bic->bfqq[BLK_RW_SYNC]) {
++		bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);
++		bic->bfqq[BLK_RW_SYNC] = NULL;
++	}
++}
++
++/*
++ * Update the entity prio values; note that the new values will not
++ * be used until the next (re)activation.
++ */
++static void bfq_init_prio_data(struct bfq_queue *bfqq, struct io_context *ioc)
++{
++	struct task_struct *tsk = current;
++	int ioprio_class;
++
++	if (!bfq_bfqq_prio_changed(bfqq))
++		return;
++
++	ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio);
++	switch (ioprio_class) {
++	default:
++		printk(KERN_ERR "bfq: bad prio %x\n", ioprio_class);
++	case IOPRIO_CLASS_NONE:
++		/*
++		 * No prio set, inherit CPU scheduling settings.
++		 */
++		bfqq->entity.new_ioprio = task_nice_ioprio(tsk);
++		bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk);
++		break;
++	case IOPRIO_CLASS_RT:
++		bfqq->entity.new_ioprio = task_ioprio(ioc);
++		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT;
++		break;
++	case IOPRIO_CLASS_BE:
++		bfqq->entity.new_ioprio = task_ioprio(ioc);
++		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE;
++		break;
++	case IOPRIO_CLASS_IDLE:
++		bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE;
++		bfqq->entity.new_ioprio = 7;
++		bfq_clear_bfqq_idle_window(bfqq);
++		break;
++	}
++
++	bfqq->entity.ioprio_changed = 1;
++
++	/*
++	 * Keep track of original prio settings in case we have to temporarily
++	 * elevate the priority of this queue.
++	 */
++	bfqq->org_ioprio = bfqq->entity.new_ioprio;
++	bfq_clear_bfqq_prio_changed(bfqq);
++}
++
++static void bfq_changed_ioprio(struct io_context *ioc,
++			       struct bfq_io_cq *bic)
++{
++	struct bfq_data *bfqd;
++	struct bfq_queue *bfqq, *new_bfqq;
++	struct bfq_group *bfqg;
++	unsigned long uninitialized_var(flags);
++
++	bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), &flags);
++	if (unlikely(bfqd == NULL))
++		return;
++
++	bfqq = bic->bfqq[BLK_RW_ASYNC];
++	if (bfqq != NULL) {
++		bfqg = container_of(bfqq->entity.sched_data, struct bfq_group,
++				    sched_data);
++		new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, bic->icq.ioc,
++					 GFP_ATOMIC);
++		if (new_bfqq != NULL) {
++			bic->bfqq[BLK_RW_ASYNC] = new_bfqq;
++			bfq_log_bfqq(bfqd, bfqq,
++				     "changed_ioprio: bfqq %p %d",
++				     bfqq, atomic_read(&bfqq->ref));
++			bfq_put_queue(bfqq);
++		}
++	}
++
++	bfqq = bic->bfqq[BLK_RW_SYNC];
++	if (bfqq != NULL)
++		bfq_mark_bfqq_prio_changed(bfqq);
++
++	bfq_put_bfqd_unlock(bfqd, &flags);
++}
++
++static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++			  pid_t pid, int is_sync)
++{
++	RB_CLEAR_NODE(&bfqq->entity.rb_node);
++	INIT_LIST_HEAD(&bfqq->fifo);
++
++	atomic_set(&bfqq->ref, 0);
++	bfqq->bfqd = bfqd;
++
++	bfq_mark_bfqq_prio_changed(bfqq);
++
++	if (is_sync) {
++		if (!bfq_class_idle(bfqq))
++			bfq_mark_bfqq_idle_window(bfqq);
++		bfq_mark_bfqq_sync(bfqq);
++	}
++
++	/* Tentative initial value to trade off between thr and lat */
++	bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;
++	bfqq->pid = pid;
++
++	bfqq->raising_coeff = 1;
++	bfqq->last_rais_start_finish = 0;
++	bfqq->soft_rt_next_start = -1;
++}
++
++static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd,
++					      struct bfq_group *bfqg,
++					      int is_sync,
++					      struct io_context *ioc,
++					      gfp_t gfp_mask)
++{
++	struct bfq_queue *bfqq, *new_bfqq = NULL;
++	struct bfq_io_cq *bic;
++
++retry:
++	bic = bfq_bic_lookup(bfqd, ioc);
++	/* bic always exists here */
++	bfqq = bic_to_bfqq(bic, is_sync);
++
++	/*
++	 * Always try a new alloc if we fall back to the OOM bfqq
++	 * originally, since it should just be a temporary situation.
++	 */
++	if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) {
++		bfqq = NULL;
++		if (new_bfqq != NULL) {
++			bfqq = new_bfqq;
++			new_bfqq = NULL;
++		} else if (gfp_mask & __GFP_WAIT) {
++			spin_unlock_irq(bfqd->queue->queue_lock);
++			new_bfqq = kmem_cache_alloc_node(bfq_pool,
++					gfp_mask | __GFP_ZERO,
++					bfqd->queue->node);
++			spin_lock_irq(bfqd->queue->queue_lock);
++			if (new_bfqq != NULL)
++				goto retry;
++		} else {
++			bfqq = kmem_cache_alloc_node(bfq_pool,
++					gfp_mask | __GFP_ZERO,
++					bfqd->queue->node);
++		}
++
++		if (bfqq != NULL) {
++			bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync);
++			bfq_log_bfqq(bfqd, bfqq, "allocated");
++		} else {
++			bfqq = &bfqd->oom_bfqq;
++			bfq_log_bfqq(bfqd, bfqq, "using oom bfqq");
++		}
++
++		bfq_init_prio_data(bfqq, ioc);
++		bfq_init_entity(&bfqq->entity, bfqg);
++	}
++
++	if (new_bfqq != NULL)
++		kmem_cache_free(bfq_pool, new_bfqq);
++
++	return bfqq;
++}
++
++static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
++					       struct bfq_group *bfqg,
++					       int ioprio_class, int ioprio)
++{
++	switch (ioprio_class) {
++	case IOPRIO_CLASS_RT:
++		return &bfqg->async_bfqq[0][ioprio];
++	case IOPRIO_CLASS_BE:
++		return &bfqg->async_bfqq[1][ioprio];
++	case IOPRIO_CLASS_IDLE:
++		return &bfqg->async_idle_bfqq;
++	default:
++		BUG();
++	}
++}
++
++static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
++				       struct bfq_group *bfqg, int is_sync,
++				       struct io_context *ioc, gfp_t gfp_mask)
++{
++	const int ioprio = task_ioprio(ioc);
++	const int ioprio_class = task_ioprio_class(ioc);
++	struct bfq_queue **async_bfqq = NULL;
++	struct bfq_queue *bfqq = NULL;
++
++	if (!is_sync) {
++		async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
++						  ioprio);
++		bfqq = *async_bfqq;
++	}
++
++	if (bfqq == NULL)
++		bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, ioc, gfp_mask);
++
++	/*
++	 * Pin the queue now that it's allocated, scheduler exit will prune it.
++	 */
++	if (!is_sync && *async_bfqq == NULL) {
++		atomic_inc(&bfqq->ref);
++		bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d",
++			     bfqq, atomic_read(&bfqq->ref));
++		*async_bfqq = bfqq;
++	}
++
++	atomic_inc(&bfqq->ref);
++	bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq,
++		     atomic_read(&bfqq->ref));
++	return bfqq;
++}
++
++static void bfq_update_io_thinktime(struct bfq_data *bfqd,
++				    struct bfq_io_cq *bic)
++{
++	unsigned long elapsed = jiffies - bic->ttime.last_end_request;
++	unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle);
++
++	bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8;
++	bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8;
++	bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) / bic->ttime.ttime_samples;
++}
++
++static void bfq_update_io_seektime(struct bfq_data *bfqd,
++				   struct bfq_queue *bfqq,
++				   struct request *rq)
++{
++	sector_t sdist;
++	u64 total;
++
++	if (bfqq->last_request_pos < blk_rq_pos(rq))
++		sdist = blk_rq_pos(rq) - bfqq->last_request_pos;
++	else
++		sdist = bfqq->last_request_pos - blk_rq_pos(rq);
++
++	/*
++	 * Don't allow the seek distance to get too large from the
++	 * odd fragment, pagein, etc.
++	 */
++	if (bfqq->seek_samples == 0) /* first request, not really a seek */
++		sdist = 0;
++	else if (bfqq->seek_samples <= 60) /* second & third seek */
++		sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024);
++	else
++		sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64);
++
++	bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8;
++	bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8;
++	total = bfqq->seek_total + (bfqq->seek_samples/2);
++	do_div(total, bfqq->seek_samples);
++	if (bfq_bfqq_coop(bfqq)) {
++		/*
++		 * If the mean seektime increases for a (non-seeky) shared
++		 * queue, some cooperator is likely to be idling too much.
++		 * On the contrary,  if it decreases, some cooperator has
++		 * probably waked up.
++		 *
++		 */
++		if ((sector_t)total < bfqq->seek_mean)
++			bfq_mark_bfqq_some_coop_idle(bfqq) ;
++		else if ((sector_t)total > bfqq->seek_mean)
++			bfq_clear_bfqq_some_coop_idle(bfqq) ;
++	}
++	bfqq->seek_mean = (sector_t)total;
++
++	bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist,
++			(u64)bfqq->seek_mean);
++}
++
++/*
++ * Disable idle window if the process thinks too long or seeks so much that
++ * it doesn't matter.
++ */
++static void bfq_update_idle_window(struct bfq_data *bfqd,
++				   struct bfq_queue *bfqq,
++				   struct bfq_io_cq *bic)
++{
++	int enable_idle;
++
++	/* Don't idle for async or idle io prio class. */
++	if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
++		return;
++
++	enable_idle = bfq_bfqq_idle_window(bfqq);
++
++	if (atomic_read(&bic->icq.ioc->nr_tasks) == 0 ||
++	    bfqd->bfq_slice_idle == 0 ||
++		(bfqd->hw_tag && BFQQ_SEEKY(bfqq) &&
++			bfqq->raising_coeff == 1))
++		enable_idle = 0;
++	else if (bfq_sample_valid(bic->ttime.ttime_samples)) {
++		if (bic->ttime.ttime_mean > bfqd->bfq_slice_idle &&
++			bfqq->raising_coeff == 1)
++			enable_idle = 0;
++		else
++			enable_idle = 1;
++	}
++	bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d",
++		enable_idle);
++
++	if (enable_idle)
++		bfq_mark_bfqq_idle_window(bfqq);
++	else
++		bfq_clear_bfqq_idle_window(bfqq);
++}
++
++/*
++ * Called when a new fs request (rq) is added to bfqq.  Check if there's
++ * something we should do about it.
++ */
++static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++			    struct request *rq)
++{
++	struct bfq_io_cq *bic = RQ_BIC(rq);
++
++	if (rq->cmd_flags & REQ_META)
++		bfqq->meta_pending++;
++
++	bfq_update_io_thinktime(bfqd, bic);
++	bfq_update_io_seektime(bfqd, bfqq, rq);
++	if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
++	    !BFQQ_SEEKY(bfqq))
++		bfq_update_idle_window(bfqd, bfqq, bic);
++
++	bfq_log_bfqq(bfqd, bfqq,
++		     "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",
++		     bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq),
++		     (long long unsigned)bfqq->seek_mean);
++
++	bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
++
++	if (bfqq == bfqd->active_queue) {
++		/*
++		 * If there is just this request queued and the request
++		 * is small, just exit.
++		 * In this way, if the disk is being idled to wait for a new
++		 * request from the active queue, we avoid unplugging the
++		 * device now.
++		 *
++		 * By doing so, we spare the disk to be committed
++		 * to serve just a small request. On the contrary, we wait for
++		 * the block layer to decide when to unplug the device:
++		 * hopefully, new requests will be merged to this
++		 * one quickly, then the device will be unplugged
++		 * and larger requests will be dispatched.
++		 */
++	        if (bfqq->queued[rq_is_sync(rq)] == 1 &&
++		    blk_rq_sectors(rq) < 32) {
++		        return;
++		}
++		if (bfq_bfqq_wait_request(bfqq)) {
++			/*
++			 * If we are waiting for a request for this queue, let
++			 * it rip immediately and flag that we must not expire
++			 * this queue just now.
++			 */
++			bfq_clear_bfqq_wait_request(bfqq);
++			del_timer(&bfqd->idle_slice_timer);
++			/*
++			 * Here we can safely expire the queue, in
++			 * case of budget timeout, without wasting
++			 * guarantees
++			 */
++			if (bfq_bfqq_budget_timeout(bfqq))
++				bfq_bfqq_expire(bfqd, bfqq, 0,
++						BFQ_BFQQ_BUDGET_TIMEOUT);
++			__blk_run_queue(bfqd->queue);
++		}
++	}
++}
++
++static void bfq_insert_request(struct request_queue *q, struct request *rq)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++
++	assert_spin_locked(bfqd->queue->queue_lock);
++	bfq_init_prio_data(bfqq, RQ_BIC(rq)->icq.ioc);
++
++	bfq_add_rq_rb(rq);
++
++	rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]);
++	list_add_tail(&rq->queuelist, &bfqq->fifo);
++
++	bfq_rq_enqueued(bfqd, bfqq, rq);
++}
++
++static void bfq_update_hw_tag(struct bfq_data *bfqd)
++{
++	bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver,
++				     bfqd->rq_in_driver);
++
++	if (bfqd->hw_tag == 1)
++		return;
++
++	/*
++	 * This sample is valid if the number of outstanding requests
++	 * is large enough to allow a queueing behavior.  Note that the
++	 * sum is not exact, as it's not taking into account deactivated
++	 * requests.
++	 */
++	if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD)
++		return;
++
++	if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES)
++		return;
++
++	bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD;
++	bfqd->max_rq_in_driver = 0;
++	bfqd->hw_tag_samples = 0;
++}
++
++static void bfq_completed_request(struct request_queue *q, struct request *rq)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++	struct bfq_data *bfqd = bfqq->bfqd;
++	const int sync = rq_is_sync(rq);
++
++	bfq_log_bfqq(bfqd, bfqq, "completed %u sects req (%d)",
++			blk_rq_sectors(rq), sync);
++
++	bfq_update_hw_tag(bfqd);
++
++	WARN_ON(!bfqd->rq_in_driver);
++	WARN_ON(!bfqq->dispatched);
++	bfqd->rq_in_driver--;
++	bfqq->dispatched--;
++
++	if (bfq_bfqq_sync(bfqq))
++		bfqd->sync_flight--;
++
++	if (sync)
++		RQ_BIC(rq)->ttime.last_end_request = jiffies;
++
++	/*
++	 * If this is the active queue, check if it needs to be expired,
++	 * or if we want to idle in case it has no pending requests.
++	 */
++	if (bfqd->active_queue == bfqq) {
++		if (bfq_bfqq_budget_new(bfqq))
++			bfq_set_budget_timeout(bfqd);
++
++		/* Idling is disabled also for cooperation issues:
++		 * 1) there is a close cooperator for the queue, or
++		 * 2) the queue is shared and some cooperator is likely
++		 *    to be idle (in this case, by not arming the idle timer,
++		 *    we try to slow down the queue, to prevent the zones
++		 *    of the disk accessed by the active cooperators to become
++		 *    too distant from the zone that will be accessed by the
++		 *    currently idle cooperators)
++		 */
++		if (bfq_may_expire_for_budg_timeout(bfqq))
++			bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT);
++		else if (sync &&
++			(bfqd->rq_in_driver == 0 ||
++				bfqq->raising_coeff > 1)
++			&& RB_EMPTY_ROOT(&bfqq->sort_list)
++			&& !bfq_close_cooperator(bfqd, bfqq)
++			&& (!bfq_bfqq_coop(bfqq) ||
++				!bfq_bfqq_some_coop_idle(bfqq)))
++			bfq_arm_slice_timer(bfqd);
++	}
++
++	if (!bfqd->rq_in_driver)
++		bfq_schedule_dispatch(bfqd);
++}
++
++static inline int __bfq_may_queue(struct bfq_queue *bfqq)
++{
++	if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) {
++		bfq_clear_bfqq_must_alloc(bfqq);
++		return ELV_MQUEUE_MUST;
++	}
++
++	return ELV_MQUEUE_MAY;
++}
++
++static int bfq_may_queue(struct request_queue *q, int rw)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct task_struct *tsk = current;
++	struct bfq_io_cq *bic;
++	struct bfq_queue *bfqq;
++
++	/*
++	 * Don't force setup of a queue from here, as a call to may_queue
++	 * does not necessarily imply that a request actually will be queued.
++	 * So just lookup a possibly existing queue, or return 'may queue'
++	 * if that fails.
++	 */
++	bic = bfq_bic_lookup(bfqd, tsk->io_context);
++	if (bic == NULL)
++		return ELV_MQUEUE_MAY;
++
++	bfqq = bic_to_bfqq(bic, rw_is_sync(rw));
++	if (bfqq != NULL) {
++		bfq_init_prio_data(bfqq, bic->icq.ioc);
++
++		return __bfq_may_queue(bfqq);
++	}
++
++	return ELV_MQUEUE_MAY;
++}
++
++/*
++ * Queue lock held here.
++ */
++static void bfq_put_request(struct request *rq)
++{
++	struct bfq_queue *bfqq = RQ_BFQQ(rq);
++
++	if (bfqq != NULL) {
++		const int rw = rq_data_dir(rq);
++
++		BUG_ON(!bfqq->allocated[rw]);
++		bfqq->allocated[rw]--;
++
++		rq->elv.priv[0] = NULL;
++		rq->elv.priv[1] = NULL;
++
++		bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d",
++			     bfqq, atomic_read(&bfqq->ref));
++		bfq_put_queue(bfqq);
++	}
++}
++
++static struct bfq_queue *
++bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
++                struct bfq_queue *bfqq)
++{
++        bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
++		(long unsigned)bfqq->new_bfqq->pid);
++        bic_set_bfqq(bic, bfqq->new_bfqq, 1);
++        bfq_mark_bfqq_coop(bfqq->new_bfqq);
++        bfq_put_queue(bfqq);
++        return bic_to_bfqq(bic, 1);
++}
++
++/*
++ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this
++ * was the last process referring to said bfqq.
++ */
++static struct bfq_queue *
++bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
++{
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");
++	if (bfqq_process_refs(bfqq) == 1) {
++		bfqq->pid = current->pid;
++		bfq_clear_bfqq_some_coop_idle(bfqq);
++		bfq_clear_bfqq_coop(bfqq);
++		bfq_clear_bfqq_split_coop(bfqq);
++		return bfqq;
++	}
++
++	bic_set_bfqq(bic, NULL, 1);
++
++	bfq_put_cooperator(bfqq);
++
++	bfq_put_queue(bfqq);
++	return NULL;
++}
++
++/*
++ * Allocate bfq data structures associated with this request.
++ */
++static int bfq_set_request(struct request_queue *q, struct request *rq,
++			   gfp_t gfp_mask)
++{
++	struct bfq_data *bfqd = q->elevator->elevator_data;
++	struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);
++	const int rw = rq_data_dir(rq);
++	const int is_sync = rq_is_sync(rq);
++	struct bfq_queue *bfqq;
++	struct bfq_group *bfqg;
++	unsigned long flags;
++
++	/* handle changed prio notifications; cgroup change is handled separately */
++	if (unlikely(icq_get_changed(&bic->icq) & ICQ_IOPRIO_CHANGED))
++		bfq_changed_ioprio(bic->icq.ioc, bic);
++
++	might_sleep_if(gfp_mask & __GFP_WAIT);
++
++	spin_lock_irqsave(q->queue_lock, flags);
++
++	if (bic == NULL)
++		goto queue_fail;
++
++	bfqg = bfq_bic_update_cgroup(bic);
++
++new_queue:
++	bfqq = bic_to_bfqq(bic, is_sync);
++	if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) {
++		bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic->icq.ioc, gfp_mask);
++		bic_set_bfqq(bic, bfqq, is_sync);
++	} else {
++		/*
++		 * If the queue was seeky for too long, break it apart.
++		 */
++		if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
++			bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
++			bfqq = bfq_split_bfqq(bic, bfqq);
++			if (!bfqq)
++				goto new_queue;
++		}
++
++		/*
++		 * Check to see if this queue is scheduled to merge with
++		 * another closely cooperating queue. The merging of queues
++		 * happens here as it must be done in process context.
++		 * The reference on new_bfqq was taken in merge_bfqqs.
++		 */
++		if (bfqq->new_bfqq != NULL)
++			bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq);
++	}
++
++	bfqq->allocated[rw]++;
++	atomic_inc(&bfqq->ref);
++	bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq,
++		     atomic_read(&bfqq->ref));
++
++	rq->elv.priv[0] = bic;
++	rq->elv.priv[1] = bfqq;
++
++	spin_unlock_irqrestore(q->queue_lock, flags);
++
++	return 0;
++
++queue_fail:
++	bfq_schedule_dispatch(bfqd);
++	spin_unlock_irqrestore(q->queue_lock, flags);
++
++	return 1;
++}
++
++static void bfq_kick_queue(struct work_struct *work)
++{
++	struct bfq_data *bfqd =
++		container_of(work, struct bfq_data, unplug_work);
++	struct request_queue *q = bfqd->queue;
++
++	spin_lock_irq(q->queue_lock);
++	__blk_run_queue(q);
++	spin_unlock_irq(q->queue_lock);
++}
++
++/*
++ * Handler of the expiration of the timer running if the active_queue
++ * is idling inside its time slice.
++ */
++static void bfq_idle_slice_timer(unsigned long data)
++{
++	struct bfq_data *bfqd = (struct bfq_data *)data;
++	struct bfq_queue *bfqq;
++	unsigned long flags;
++	enum bfqq_expiration reason;
++
++	spin_lock_irqsave(bfqd->queue->queue_lock, flags);
++
++	bfqq = bfqd->active_queue;
++	/*
++	 * Theoretical race here: active_queue can be NULL or different
++	 * from the queue that was idling if the timer handler spins on
++	 * the queue_lock and a new request arrives for the current
++	 * queue and there is a full dispatch cycle that changes the
++	 * active_queue.  This can hardly happen, but in the worst case
++	 * we just expire a queue too early.
++	 */
++	if (bfqq != NULL) {
++		bfq_log_bfqq(bfqd, bfqq, "slice_timer expired");
++		if (bfq_bfqq_budget_timeout(bfqq))
++			/*
++			 * Also here the queue can be safely expired
++			 * for budget timeout without wasting
++			 * guarantees
++			 */
++			reason = BFQ_BFQQ_BUDGET_TIMEOUT;
++		else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0)
++			/*
++			 * The queue may not be empty upon timer expiration,
++			 * because we may not disable the timer when the first
++			 * request of the active queue arrives during
++			 * disk idling
++			 */
++			reason = BFQ_BFQQ_TOO_IDLE;
++		else
++			goto schedule_dispatch;
++
++		bfq_bfqq_expire(bfqd, bfqq, 1, reason);
++	}
++
++schedule_dispatch:
++	bfq_schedule_dispatch(bfqd);
++
++	spin_unlock_irqrestore(bfqd->queue->queue_lock, flags);
++}
++
++static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
++{
++	del_timer_sync(&bfqd->idle_slice_timer);
++	cancel_work_sync(&bfqd->unplug_work);
++}
++
++static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd,
++					struct bfq_queue **bfqq_ptr)
++{
++	struct bfq_group *root_group = bfqd->root_group;
++	struct bfq_queue *bfqq = *bfqq_ptr;
++
++	bfq_log(bfqd, "put_async_bfqq: %p", bfqq);
++	if (bfqq != NULL) {
++		bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group);
++		bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d",
++			     bfqq, atomic_read(&bfqq->ref));
++		bfq_put_queue(bfqq);
++		*bfqq_ptr = NULL;
++	}
++}
++
++/*
++ * Release all the bfqg references to its async queues.  If we are
++ * deallocating the group these queues may still contain requests, so
++ * we reparent them to the root cgroup (i.e., the only one that will
++ * exist for sure untill all the requests on a device are gone).
++ */
++static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
++{
++	int i, j;
++
++	for (i = 0; i < 2; i++)
++		for (j = 0; j < IOPRIO_BE_NR; j++)
++			__bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]);
++
++	__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
++}
++
++static void bfq_exit_queue(struct elevator_queue *e)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	struct request_queue *q = bfqd->queue;
++	struct bfq_queue *bfqq, *n;
++
++	bfq_shutdown_timer_wq(bfqd);
++
++	spin_lock_irq(q->queue_lock);
++
++	BUG_ON(bfqd->active_queue != NULL);
++	list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
++		bfq_deactivate_bfqq(bfqd, bfqq, 0);
++
++	bfq_disconnect_groups(bfqd);
++	spin_unlock_irq(q->queue_lock);
++
++	bfq_shutdown_timer_wq(bfqd);
++
++	synchronize_rcu();
++
++	BUG_ON(timer_pending(&bfqd->idle_slice_timer));
++
++	bfq_free_root_group(bfqd);
++	kfree(bfqd);
++}
++
++static void *bfq_init_queue(struct request_queue *q)
++{
++	struct bfq_group *bfqg;
++	struct bfq_data *bfqd;
++
++	bfqd = kmalloc_node(sizeof(*bfqd), GFP_KERNEL | __GFP_ZERO, q->node);
++	if (bfqd == NULL)
++		return NULL;
++
++	/*
++	 * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
++	 * Grab a permanent reference to it, so that the normal code flow
++	 * will not attempt to free it.
++	 */
++	bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0);
++	atomic_inc(&bfqd->oom_bfqq.ref);
++
++	bfqd->queue = q;
++
++	bfqg = bfq_alloc_root_group(bfqd, q->node);
++	if (bfqg == NULL) {
++		kfree(bfqd);
++		return NULL;
++	}
++
++	bfqd->root_group = bfqg;
++
++	init_timer(&bfqd->idle_slice_timer);
++	bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
++	bfqd->idle_slice_timer.data = (unsigned long)bfqd;
++
++	bfqd->rq_pos_tree = RB_ROOT;
++
++	INIT_WORK(&bfqd->unplug_work, bfq_kick_queue);
++
++	INIT_LIST_HEAD(&bfqd->active_list);
++	INIT_LIST_HEAD(&bfqd->idle_list);
++
++	bfqd->hw_tag = -1;
++
++	bfqd->bfq_max_budget = bfq_default_max_budget;
++
++	bfqd->bfq_quantum = bfq_quantum;
++	bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0];
++	bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1];
++	bfqd->bfq_back_max = bfq_back_max;
++	bfqd->bfq_back_penalty = bfq_back_penalty;
++	bfqd->bfq_slice_idle = bfq_slice_idle;
++	bfqd->bfq_class_idle_last_service = 0;
++	bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq;
++	bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async;
++	bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync;
++
++	bfqd->low_latency = true;
++
++	bfqd->bfq_raising_coeff = 20;
++	bfqd->bfq_raising_rt_max_time = msecs_to_jiffies(300);
++	bfqd->bfq_raising_max_time = 0;
++	bfqd->bfq_raising_min_idle_time = msecs_to_jiffies(2000);
++	bfqd->bfq_raising_min_inter_arr_async = msecs_to_jiffies(500);
++	bfqd->bfq_raising_max_softrt_rate = 7000;
++
++	/* Initially estimate the device's peak rate as the reference rate */
++	if (blk_queue_nonrot(bfqd->queue)) {
++		bfqd->RT_prod = R_nonrot * T_nonrot;
++		bfqd->peak_rate = R_nonrot;
++	} else {
++		bfqd->RT_prod = R_rot * T_rot;
++		bfqd->peak_rate = R_rot;
++	}
++
++	return bfqd;
++}
++
++static void bfq_slab_kill(void)
++{
++	if (bfq_pool != NULL)
++		kmem_cache_destroy(bfq_pool);
++}
++
++static int __init bfq_slab_setup(void)
++{
++	bfq_pool = KMEM_CACHE(bfq_queue, 0);
++	if (bfq_pool == NULL)
++		return -ENOMEM;
++	return 0;
++}
++
++static ssize_t bfq_var_show(unsigned int var, char *page)
++{
++	return sprintf(page, "%d\n", var);
++}
++
++static ssize_t bfq_var_store(unsigned long *var, const char *page, size_t count)
++{
++	unsigned long new_val;
++	int ret = strict_strtoul(page, 10, &new_val);
++
++	if (ret == 0)
++		*var = new_val;
++
++	return count;
++}
++
++static ssize_t bfq_raising_max_time_show(struct elevator_queue *e, char *page)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	return sprintf(page, "%d\n", bfqd->bfq_raising_max_time > 0 ?
++		       bfqd->bfq_raising_max_time :
++		       bfq_wrais_duration(bfqd));
++}
++
++static ssize_t bfq_weights_show(struct elevator_queue *e, char *page)
++{
++	struct bfq_queue *bfqq;
++	struct bfq_data *bfqd = e->elevator_data;
++	ssize_t num_char = 0;
++
++	num_char += sprintf(page + num_char, "Active:\n");
++	list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) {
++		num_char += sprintf(page + num_char,
++			"pid%d: weight %hu, dur %d/%u\n",
++			bfqq->pid,
++			bfqq->entity.weight,
++			jiffies_to_msecs(jiffies -
++				bfqq->last_rais_start_finish),
++			jiffies_to_msecs(bfqq->raising_cur_max_time));
++	}
++	num_char += sprintf(page + num_char, "Idle:\n");
++	list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) {
++			num_char += sprintf(page + num_char,
++				"pid%d: weight %hu, dur %d/%u\n",
++				bfqq->pid,
++				bfqq->entity.weight,
++				jiffies_to_msecs(jiffies -
++					bfqq->last_rais_start_finish),
++				jiffies_to_msecs(bfqq->raising_cur_max_time));
++	}
++	return num_char;
++}
++
++#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
++static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
++{									\
++	struct bfq_data *bfqd = e->elevator_data;			\
++	unsigned int __data = __VAR;					\
++	if (__CONV)							\
++		__data = jiffies_to_msecs(__data);			\
++	return bfq_var_show(__data, (page));				\
++}
++SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0);
++SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1);
++SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1);
++SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
++SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
++SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1);
++SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
++SHOW_FUNCTION(bfq_max_budget_async_rq_show, bfqd->bfq_max_budget_async_rq, 0);
++SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1);
++SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1);
++SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
++SHOW_FUNCTION(bfq_raising_coeff_show, bfqd->bfq_raising_coeff, 0);
++SHOW_FUNCTION(bfq_raising_rt_max_time_show, bfqd->bfq_raising_rt_max_time, 1);
++SHOW_FUNCTION(bfq_raising_min_idle_time_show, bfqd->bfq_raising_min_idle_time,
++	1);
++SHOW_FUNCTION(bfq_raising_min_inter_arr_async_show,
++	bfqd->bfq_raising_min_inter_arr_async,
++	1);
++SHOW_FUNCTION(bfq_raising_max_softrt_rate_show,
++	bfqd->bfq_raising_max_softrt_rate, 0);
++#undef SHOW_FUNCTION
++
++#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
++static ssize_t								\
++__FUNC(struct elevator_queue *e, const char *page, size_t count)	\
++{									\
++	struct bfq_data *bfqd = e->elevator_data;			\
++	unsigned long __data;						\
++	int ret = bfq_var_store(&__data, (page), count);		\
++	if (__data < (MIN))						\
++		__data = (MIN);						\
++	else if (__data > (MAX))					\
++		__data = (MAX);						\
++	if (__CONV)							\
++		*(__PTR) = msecs_to_jiffies(__data);			\
++	else								\
++		*(__PTR) = __data;					\
++	return ret;							\
++}
++STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0);
++STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1,
++		INT_MAX, 1);
++STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1,
++		INT_MAX, 1);
++STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
++STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,
++		INT_MAX, 0);
++STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1);
++STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq,
++		1, INT_MAX, 0);
++STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0,
++		INT_MAX, 1);
++STORE_FUNCTION(bfq_raising_coeff_store, &bfqd->bfq_raising_coeff, 1,
++		INT_MAX, 0);
++STORE_FUNCTION(bfq_raising_max_time_store, &bfqd->bfq_raising_max_time, 0,
++		INT_MAX, 1);
++STORE_FUNCTION(bfq_raising_rt_max_time_store, &bfqd->bfq_raising_rt_max_time, 0,
++		INT_MAX, 1);
++STORE_FUNCTION(bfq_raising_min_idle_time_store,
++	       &bfqd->bfq_raising_min_idle_time, 0, INT_MAX, 1);
++STORE_FUNCTION(bfq_raising_min_inter_arr_async_store,
++		&bfqd->bfq_raising_min_inter_arr_async, 0, INT_MAX, 1);
++STORE_FUNCTION(bfq_raising_max_softrt_rate_store,
++	       &bfqd->bfq_raising_max_softrt_rate, 0, INT_MAX, 0);
++#undef STORE_FUNCTION
++
++/* do nothing for the moment */
++static ssize_t bfq_weights_store(struct elevator_queue *e,
++				    const char *page, size_t count)
++{
++	return count;
++}
++
++static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)
++{
++	u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
++
++	if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES)
++		return bfq_calc_max_budget(bfqd->peak_rate, timeout);
++	else
++		return bfq_default_max_budget;
++}
++
++static ssize_t bfq_max_budget_store(struct elevator_queue *e,
++				    const char *page, size_t count)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	unsigned long __data;
++	int ret = bfq_var_store(&__data, (page), count);
++
++	if (__data == 0)
++		bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);
++	else {
++		if (__data > INT_MAX)
++			__data = INT_MAX;
++		bfqd->bfq_max_budget = __data;
++	}
++
++	bfqd->bfq_user_max_budget = __data;
++
++	return ret;
++}
++
++static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
++				      const char *page, size_t count)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	unsigned long __data;
++	int ret = bfq_var_store(&__data, (page), count);
++
++	if (__data < 1)
++		__data = 1;
++	else if (__data > INT_MAX)
++		__data = INT_MAX;
++
++	bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data);
++	if (bfqd->bfq_user_max_budget == 0)
++		bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd);
++
++	return ret;
++}
++
++static ssize_t bfq_low_latency_store(struct elevator_queue *e,
++				     const char *page, size_t count)
++{
++	struct bfq_data *bfqd = e->elevator_data;
++	unsigned long __data;
++	int ret = bfq_var_store(&__data, (page), count);
++
++	if (__data > 1)
++		__data = 1;
++	bfqd->low_latency = __data;
++
++	return ret;
++}
++
++#define BFQ_ATTR(name) \
++	__ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store)
++
++static struct elv_fs_entry bfq_attrs[] = {
++	BFQ_ATTR(quantum),
++	BFQ_ATTR(fifo_expire_sync),
++	BFQ_ATTR(fifo_expire_async),
++	BFQ_ATTR(back_seek_max),
++	BFQ_ATTR(back_seek_penalty),
++	BFQ_ATTR(slice_idle),
++	BFQ_ATTR(max_budget),
++	BFQ_ATTR(max_budget_async_rq),
++	BFQ_ATTR(timeout_sync),
++	BFQ_ATTR(timeout_async),
++	BFQ_ATTR(low_latency),
++	BFQ_ATTR(raising_coeff),
++	BFQ_ATTR(raising_max_time),
++	BFQ_ATTR(raising_rt_max_time),
++	BFQ_ATTR(raising_min_idle_time),
++	BFQ_ATTR(raising_min_inter_arr_async),
++	BFQ_ATTR(raising_max_softrt_rate),
++	BFQ_ATTR(weights),
++	__ATTR_NULL
++};
++
++static struct elevator_type iosched_bfq = {
++	.ops = {
++		.elevator_merge_fn =		bfq_merge,
++		.elevator_merged_fn =		bfq_merged_request,
++		.elevator_merge_req_fn =	bfq_merged_requests,
++		.elevator_allow_merge_fn =	bfq_allow_merge,
++		.elevator_dispatch_fn =		bfq_dispatch_requests,
++		.elevator_add_req_fn =		bfq_insert_request,
++		.elevator_activate_req_fn =	bfq_activate_request,
++		.elevator_deactivate_req_fn =	bfq_deactivate_request,
++		.elevator_completed_req_fn =	bfq_completed_request,
++		.elevator_former_req_fn =	elv_rb_former_request,
++		.elevator_latter_req_fn =	elv_rb_latter_request,
++		.elevator_init_icq_fn =		bfq_init_icq,
++		.elevator_exit_icq_fn =		bfq_exit_icq,
++		.elevator_set_req_fn =		bfq_set_request,
++		.elevator_put_req_fn =		bfq_put_request,
++		.elevator_may_queue_fn =	bfq_may_queue,
++		.elevator_init_fn =		bfq_init_queue,
++		.elevator_exit_fn =		bfq_exit_queue,
++	},
++	.icq_size =		sizeof(struct bfq_io_cq),
++	.icq_align =		__alignof__(struct bfq_io_cq),
++	.elevator_attrs =	bfq_attrs,
++	.elevator_name =	"bfq",
++	.elevator_owner =	THIS_MODULE,
++};
++
++static int __init bfq_init(void)
++{
++	/*
++	 * Can be 0 on HZ < 1000 setups.
++	 */
++	if (bfq_slice_idle == 0)
++		bfq_slice_idle = 1;
++
++	if (bfq_timeout_async == 0)
++		bfq_timeout_async = 1;
++
++	if (bfq_slab_setup())
++		return -ENOMEM;
++
++	elv_register(&iosched_bfq);
++
++	return 0;
++}
++
++static void __exit bfq_exit(void)
++{
++	elv_unregister(&iosched_bfq);
++	bfq_slab_kill();
++}
++
++module_init(bfq_init);
++module_exit(bfq_exit);
++
++MODULE_AUTHOR("Fabio Checconi, Paolo Valente");
++MODULE_LICENSE("GPL");
++MODULE_DESCRIPTION("Budget Fair Queueing IO scheduler");
+diff --git a/block/bfq-sched.c block/bfq-sched.c
+new file mode 100644
+index 0000000..87bea97
+--- /dev/null
++++ block/bfq-sched.c
+@@ -0,0 +1,1070 @@
++/*
++ * BFQ: Hierarchical B-WF2Q+ scheduler.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ */
++
++#ifdef CONFIG_CGROUP_BFQIO
++#define for_each_entity(entity)	\
++	for (; entity != NULL; entity = entity->parent)
++
++#define for_each_entity_safe(entity, parent) \
++	for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
++
++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
++						 int extract,
++						 struct bfq_data *bfqd);
++
++static inline void bfq_update_budget(struct bfq_entity *next_active)
++{
++	struct bfq_entity *bfqg_entity;
++	struct bfq_group *bfqg;
++	struct bfq_sched_data *group_sd;
++
++	BUG_ON(next_active == NULL);
++
++	group_sd = next_active->sched_data;
++
++	bfqg = container_of(group_sd, struct bfq_group, sched_data);
++	/*
++	 * bfq_group's my_entity field is not NULL only if the group
++	 * is not the root group. We must not touch the root entity
++	 * as it must never become an active entity.
++	 */
++	bfqg_entity = bfqg->my_entity;
++	if (bfqg_entity != NULL)
++		bfqg_entity->budget = next_active->budget;
++}
++
++static int bfq_update_next_active(struct bfq_sched_data *sd)
++{
++	struct bfq_entity *next_active;
++
++	if (sd->active_entity != NULL)
++		/* will update/requeue at the end of service */
++		return 0;
++
++	/*
++	 * NOTE: this can be improved in many ways, such as returning
++	 * 1 (and thus propagating upwards the update) only when the
++	 * budget changes, or caching the bfqq that will be scheduled
++	 * next from this subtree.  By now we worry more about
++	 * correctness than about performance...
++	 */
++	next_active = bfq_lookup_next_entity(sd, 0, NULL);
++	sd->next_active = next_active;
++
++	if (next_active != NULL)
++		bfq_update_budget(next_active);
++
++	return 1;
++}
++
++static inline void bfq_check_next_active(struct bfq_sched_data *sd,
++					 struct bfq_entity *entity)
++{
++	BUG_ON(sd->next_active != entity);
++}
++#else
++#define for_each_entity(entity)	\
++	for (; entity != NULL; entity = NULL)
++
++#define for_each_entity_safe(entity, parent) \
++	for (parent = NULL; entity != NULL; entity = parent)
++
++static inline int bfq_update_next_active(struct bfq_sched_data *sd)
++{
++	return 0;
++}
++
++static inline void bfq_check_next_active(struct bfq_sched_data *sd,
++					 struct bfq_entity *entity)
++{
++}
++
++static inline void bfq_update_budget(struct bfq_entity *next_active)
++{
++}
++#endif
++
++/*
++ * Shift for timestamp calculations.  This actually limits the maximum
++ * service allowed in one timestamp delta (small shift values increase it),
++ * the maximum total weight that can be used for the queues in the system
++ * (big shift values increase it), and the period of virtual time wraparounds.
++ */
++#define WFQ_SERVICE_SHIFT	22
++
++/**
++ * bfq_gt - compare two timestamps.
++ * @a: first ts.
++ * @b: second ts.
++ *
++ * Return @a > @b, dealing with wrapping correctly.
++ */
++static inline int bfq_gt(u64 a, u64 b)
++{
++	return (s64)(a - b) > 0;
++}
++
++static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = NULL;
++
++	BUG_ON(entity == NULL);
++
++	if (entity->my_sched_data == NULL)
++		bfqq = container_of(entity, struct bfq_queue, entity);
++
++	return bfqq;
++}
++
++
++/**
++ * bfq_delta - map service into the virtual time domain.
++ * @service: amount of service.
++ * @weight: scale factor (weight of an entity or weight sum).
++ */
++static inline u64 bfq_delta(unsigned long service,
++					unsigned long weight)
++{
++	u64 d = (u64)service << WFQ_SERVICE_SHIFT;
++
++	do_div(d, weight);
++	return d;
++}
++
++/**
++ * bfq_calc_finish - assign the finish time to an entity.
++ * @entity: the entity to act upon.
++ * @service: the service to be charged to the entity.
++ */
++static inline void bfq_calc_finish(struct bfq_entity *entity,
++				   unsigned long service)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++	BUG_ON(entity->weight == 0);
++
++	entity->finish = entity->start +
++		bfq_delta(service, entity->weight);
++
++	if (bfqq != NULL) {
++		bfq_log_bfqq(bfqq->bfqd, bfqq,
++			"calc_finish: serv %lu, w %d",
++			service, entity->weight);
++		bfq_log_bfqq(bfqq->bfqd, bfqq,
++			"calc_finish: start %llu, finish %llu, delta %llu",
++			entity->start, entity->finish,
++			bfq_delta(service, entity->weight));
++	}
++}
++
++/**
++ * bfq_entity_of - get an entity from a node.
++ * @node: the node field of the entity.
++ *
++ * Convert a node pointer to the relative entity.  This is used only
++ * to simplify the logic of some functions and not as the generic
++ * conversion mechanism because, e.g., in the tree walking functions,
++ * the check for a %NULL value would be redundant.
++ */
++static inline struct bfq_entity *bfq_entity_of(struct rb_node *node)
++{
++	struct bfq_entity *entity = NULL;
++
++	if (node != NULL)
++		entity = rb_entry(node, struct bfq_entity, rb_node);
++
++	return entity;
++}
++
++/**
++ * bfq_extract - remove an entity from a tree.
++ * @root: the tree root.
++ * @entity: the entity to remove.
++ */
++static inline void bfq_extract(struct rb_root *root,
++			       struct bfq_entity *entity)
++{
++	BUG_ON(entity->tree != root);
++
++	entity->tree = NULL;
++	rb_erase(&entity->rb_node, root);
++}
++
++/**
++ * bfq_idle_extract - extract an entity from the idle tree.
++ * @st: the service tree of the owning @entity.
++ * @entity: the entity being removed.
++ */
++static void bfq_idle_extract(struct bfq_service_tree *st,
++			     struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct rb_node *next;
++
++	BUG_ON(entity->tree != &st->idle);
++
++	if (entity == st->first_idle) {
++		next = rb_next(&entity->rb_node);
++		st->first_idle = bfq_entity_of(next);
++	}
++
++	if (entity == st->last_idle) {
++		next = rb_prev(&entity->rb_node);
++		st->last_idle = bfq_entity_of(next);
++	}
++
++	bfq_extract(&st->idle, entity);
++
++	if (bfqq != NULL)
++		list_del(&bfqq->bfqq_list);
++}
++
++/**
++ * bfq_insert - generic tree insertion.
++ * @root: tree root.
++ * @entity: entity to insert.
++ *
++ * This is used for the idle and the active tree, since they are both
++ * ordered by finish time.
++ */
++static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)
++{
++	struct bfq_entity *entry;
++	struct rb_node **node = &root->rb_node;
++	struct rb_node *parent = NULL;
++
++	BUG_ON(entity->tree != NULL);
++
++	while (*node != NULL) {
++		parent = *node;
++		entry = rb_entry(parent, struct bfq_entity, rb_node);
++
++		if (bfq_gt(entry->finish, entity->finish))
++			node = &parent->rb_left;
++		else
++			node = &parent->rb_right;
++	}
++
++	rb_link_node(&entity->rb_node, parent, node);
++	rb_insert_color(&entity->rb_node, root);
++
++	entity->tree = root;
++}
++
++/**
++ * bfq_update_min - update the min_start field of a entity.
++ * @entity: the entity to update.
++ * @node: one of its children.
++ *
++ * This function is called when @entity may store an invalid value for
++ * min_start due to updates to the active tree.  The function  assumes
++ * that the subtree rooted at @node (which may be its left or its right
++ * child) has a valid min_start value.
++ */
++static inline void bfq_update_min(struct bfq_entity *entity,
++				  struct rb_node *node)
++{
++	struct bfq_entity *child;
++
++	if (node != NULL) {
++		child = rb_entry(node, struct bfq_entity, rb_node);
++		if (bfq_gt(entity->min_start, child->min_start))
++			entity->min_start = child->min_start;
++	}
++}
++
++/**
++ * bfq_update_active_node - recalculate min_start.
++ * @node: the node to update.
++ *
++ * @node may have changed position or one of its children may have moved,
++ * this function updates its min_start value.  The left and right subtrees
++ * are assumed to hold a correct min_start value.
++ */
++static inline void bfq_update_active_node(struct rb_node *node)
++{
++	struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);
++
++	entity->min_start = entity->start;
++	bfq_update_min(entity, node->rb_right);
++	bfq_update_min(entity, node->rb_left);
++}
++
++/**
++ * bfq_update_active_tree - update min_start for the whole active tree.
++ * @node: the starting node.
++ *
++ * @node must be the deepest modified node after an update.  This function
++ * updates its min_start using the values held by its children, assuming
++ * that they did not change, and then updates all the nodes that may have
++ * changed in the path to the root.  The only nodes that may have changed
++ * are the ones in the path or their siblings.
++ */
++static void bfq_update_active_tree(struct rb_node *node)
++{
++	struct rb_node *parent;
++
++up:
++	bfq_update_active_node(node);
++
++	parent = rb_parent(node);
++	if (parent == NULL)
++		return;
++
++	if (node == parent->rb_left && parent->rb_right != NULL)
++		bfq_update_active_node(parent->rb_right);
++	else if (parent->rb_left != NULL)
++		bfq_update_active_node(parent->rb_left);
++
++	node = parent;
++	goto up;
++}
++
++/**
++ * bfq_active_insert - insert an entity in the active tree of its group/device.
++ * @st: the service tree of the entity.
++ * @entity: the entity being inserted.
++ *
++ * The active tree is ordered by finish time, but an extra key is kept
++ * per each node, containing the minimum value for the start times of
++ * its children (and the node itself), so it's possible to search for
++ * the eligible node with the lowest finish time in logarithmic time.
++ */
++static void bfq_active_insert(struct bfq_service_tree *st,
++			      struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct rb_node *node = &entity->rb_node;
++
++	bfq_insert(&st->active, entity);
++
++	if (node->rb_left != NULL)
++		node = node->rb_left;
++	else if (node->rb_right != NULL)
++		node = node->rb_right;
++
++	bfq_update_active_tree(node);
++
++	if (bfqq != NULL)
++		list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);
++}
++
++/**
++ * bfq_ioprio_to_weight - calc a weight from an ioprio.
++ * @ioprio: the ioprio value to convert.
++ */
++static unsigned short bfq_ioprio_to_weight(int ioprio)
++{
++	WARN_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);
++	return IOPRIO_BE_NR - ioprio;
++}
++
++/**
++ * bfq_weight_to_ioprio - calc an ioprio from a weight.
++ * @weight: the weight value to convert.
++ *
++ * To preserve as mush as possible the old only-ioprio user interface,
++ * 0 is used as an escape ioprio value for weights (numerically) equal or
++ * larger than IOPRIO_BE_NR
++ */
++static unsigned short bfq_weight_to_ioprio(int weight)
++{
++	WARN_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT);
++	return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight;
++}
++
++static inline void bfq_get_entity(struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct bfq_sched_data *sd;
++
++	if (bfqq != NULL) {
++		sd = entity->sched_data;
++		atomic_inc(&bfqq->ref);
++		bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
++			     bfqq, atomic_read(&bfqq->ref));
++	}
++}
++
++/**
++ * bfq_find_deepest - find the deepest node that an extraction can modify.
++ * @node: the node being removed.
++ *
++ * Do the first step of an extraction in an rb tree, looking for the
++ * node that will replace @node, and returning the deepest node that
++ * the following modifications to the tree can touch.  If @node is the
++ * last node in the tree return %NULL.
++ */
++static struct rb_node *bfq_find_deepest(struct rb_node *node)
++{
++	struct rb_node *deepest;
++
++	if (node->rb_right == NULL && node->rb_left == NULL)
++		deepest = rb_parent(node);
++	else if (node->rb_right == NULL)
++		deepest = node->rb_left;
++	else if (node->rb_left == NULL)
++		deepest = node->rb_right;
++	else {
++		deepest = rb_next(node);
++		if (deepest->rb_right != NULL)
++			deepest = deepest->rb_right;
++		else if (rb_parent(deepest) != node)
++			deepest = rb_parent(deepest);
++	}
++
++	return deepest;
++}
++
++/**
++ * bfq_active_extract - remove an entity from the active tree.
++ * @st: the service_tree containing the tree.
++ * @entity: the entity being removed.
++ */
++static void bfq_active_extract(struct bfq_service_tree *st,
++			       struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct rb_node *node;
++
++	node = bfq_find_deepest(&entity->rb_node);
++	bfq_extract(&st->active, entity);
++
++	if (node != NULL)
++		bfq_update_active_tree(node);
++
++	if (bfqq != NULL)
++		list_del(&bfqq->bfqq_list);
++}
++
++/**
++ * bfq_idle_insert - insert an entity into the idle tree.
++ * @st: the service tree containing the tree.
++ * @entity: the entity to insert.
++ */
++static void bfq_idle_insert(struct bfq_service_tree *st,
++			    struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct bfq_entity *first_idle = st->first_idle;
++	struct bfq_entity *last_idle = st->last_idle;
++
++	if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish))
++		st->first_idle = entity;
++	if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish))
++		st->last_idle = entity;
++
++	bfq_insert(&st->idle, entity);
++
++	if (bfqq != NULL)
++		list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);
++}
++
++/**
++ * bfq_forget_entity - remove an entity from the wfq trees.
++ * @st: the service tree.
++ * @entity: the entity being removed.
++ *
++ * Update the device status and forget everything about @entity, putting
++ * the device reference to it, if it is a queue.  Entities belonging to
++ * groups are not refcounted.
++ */
++static void bfq_forget_entity(struct bfq_service_tree *st,
++			      struct bfq_entity *entity)
++{
++	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++	struct bfq_sched_data *sd;
++
++	BUG_ON(!entity->on_st);
++
++	entity->on_st = 0;
++	st->wsum -= entity->weight;
++	if (bfqq != NULL) {
++		sd = entity->sched_data;
++		bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d",
++			     bfqq, atomic_read(&bfqq->ref));
++		bfq_put_queue(bfqq);
++	}
++}
++
++/**
++ * bfq_put_idle_entity - release the idle tree ref of an entity.
++ * @st: service tree for the entity.
++ * @entity: the entity being released.
++ */
++static void bfq_put_idle_entity(struct bfq_service_tree *st,
++				struct bfq_entity *entity)
++{
++	bfq_idle_extract(st, entity);
++	bfq_forget_entity(st, entity);
++}
++
++/**
++ * bfq_forget_idle - update the idle tree if necessary.
++ * @st: the service tree to act upon.
++ *
++ * To preserve the global O(log N) complexity we only remove one entry here;
++ * as the idle tree will not grow indefinitely this can be done safely.
++ */
++static void bfq_forget_idle(struct bfq_service_tree *st)
++{
++	struct bfq_entity *first_idle = st->first_idle;
++	struct bfq_entity *last_idle = st->last_idle;
++
++	if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL &&
++	    !bfq_gt(last_idle->finish, st->vtime)) {
++		/*
++		 * Forget the whole idle tree, increasing the vtime past
++		 * the last finish time of idle entities.
++		 */
++		st->vtime = last_idle->finish;
++	}
++
++	if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime))
++		bfq_put_idle_entity(st, first_idle);
++}
++
++static struct bfq_service_tree *
++__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
++			 struct bfq_entity *entity)
++{
++	struct bfq_service_tree *new_st = old_st;
++
++	if (entity->ioprio_changed) {
++		struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
++
++		BUG_ON(old_st->wsum < entity->weight);
++		old_st->wsum -= entity->weight;
++
++		if (entity->new_weight != entity->orig_weight) {
++			entity->orig_weight = entity->new_weight;
++			entity->ioprio =
++				bfq_weight_to_ioprio(entity->orig_weight);
++		} else if (entity->new_ioprio != entity->ioprio) {
++			entity->ioprio = entity->new_ioprio;
++			entity->orig_weight =
++					bfq_ioprio_to_weight(entity->ioprio);
++		} else
++			entity->new_weight = entity->orig_weight =
++				bfq_ioprio_to_weight(entity->ioprio);
++
++		entity->ioprio_class = entity->new_ioprio_class;
++		entity->ioprio_changed = 0;
++
++		/*
++		 * NOTE: here we may be changing the weight too early,
++		 * this will cause unfairness.  The correct approach
++		 * would have required additional complexity to defer
++		 * weight changes to the proper time instants (i.e.,
++		 * when entity->finish <= old_st->vtime).
++		 */
++		new_st = bfq_entity_service_tree(entity);
++		entity->weight = entity->orig_weight *
++			(bfqq != NULL ? bfqq->raising_coeff : 1);
++		new_st->wsum += entity->weight;
++
++		if (new_st != old_st)
++			entity->start = new_st->vtime;
++	}
++
++	return new_st;
++}
++
++/**
++ * bfq_bfqq_served - update the scheduler status after selection for service.
++ * @bfqq: the queue being served.
++ * @served: bytes to transfer.
++ *
++ * NOTE: this can be optimized, as the timestamps of upper level entities
++ * are synchronized every time a new bfqq is selected for service.  By now,
++ * we keep it to better check consistency.
++ */
++static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++	struct bfq_service_tree *st;
++
++	for_each_entity(entity) {
++		st = bfq_entity_service_tree(entity);
++
++		entity->service += served;
++		BUG_ON(entity->service > entity->budget);
++		BUG_ON(st->wsum == 0);
++
++		st->vtime += bfq_delta(served, st->wsum);
++		bfq_forget_idle(st);
++	}
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served);
++}
++
++/**
++ * bfq_bfqq_charge_full_budget - set the service to the entity budget.
++ * @bfqq: the queue that needs a service update.
++ *
++ * When it's not possible to be fair in the service domain, because
++ * a queue is not consuming its budget fast enough (the meaning of
++ * fast depends on the timeout parameter), we charge it a full
++ * budget.  In this way we should obtain a sort of time-domain
++ * fairness among all the seeky/slow queues.
++ */
++static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++
++	bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget");
++
++	bfq_bfqq_served(bfqq, entity->budget - entity->service);
++}
++
++/**
++ * __bfq_activate_entity - activate an entity.
++ * @entity: the entity being activated.
++ *
++ * Called whenever an entity is activated, i.e., it is not active and one
++ * of its children receives a new request, or has to be reactivated due to
++ * budget exhaustion.  It uses the current budget of the entity (and the
++ * service received if @entity is active) of the queue to calculate its
++ * timestamps.
++ */
++static void __bfq_activate_entity(struct bfq_entity *entity)
++{
++	struct bfq_sched_data *sd = entity->sched_data;
++	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
++
++	if (entity == sd->active_entity) {
++		BUG_ON(entity->tree != NULL);
++		/*
++		 * If we are requeueing the current entity we have
++		 * to take care of not charging to it service it has
++		 * not received.
++		 */
++		bfq_calc_finish(entity, entity->service);
++		entity->start = entity->finish;
++		sd->active_entity = NULL;
++	} else if (entity->tree == &st->active) {
++		/*
++		 * Requeueing an entity due to a change of some
++		 * next_active entity below it.  We reuse the old
++		 * start time.
++		 */
++		bfq_active_extract(st, entity);
++	} else if (entity->tree == &st->idle) {
++		/*
++		 * Must be on the idle tree, bfq_idle_extract() will
++		 * check for that.
++		 */
++		bfq_idle_extract(st, entity);
++		entity->start = bfq_gt(st->vtime, entity->finish) ?
++				       st->vtime : entity->finish;
++	} else {
++		/*
++		 * The finish time of the entity may be invalid, and
++		 * it is in the past for sure, otherwise the queue
++		 * would have been on the idle tree.
++		 */
++		entity->start = st->vtime;
++		st->wsum += entity->weight;
++		bfq_get_entity(entity);
++
++		BUG_ON(entity->on_st);
++		entity->on_st = 1;
++	}
++
++	st = __bfq_entity_update_weight_prio(st, entity);
++	bfq_calc_finish(entity, entity->budget);
++	bfq_active_insert(st, entity);
++}
++
++/**
++ * bfq_activate_entity - activate an entity and its ancestors if necessary.
++ * @entity: the entity to activate.
++ *
++ * Activate @entity and all the entities on the path from it to the root.
++ */
++static void bfq_activate_entity(struct bfq_entity *entity)
++{
++	struct bfq_sched_data *sd;
++
++	for_each_entity(entity) {
++		__bfq_activate_entity(entity);
++
++		sd = entity->sched_data;
++		if (!bfq_update_next_active(sd))
++			/*
++			 * No need to propagate the activation to the
++			 * upper entities, as they will be updated when
++			 * the active entity is rescheduled.
++			 */
++			break;
++	}
++}
++
++/**
++ * __bfq_deactivate_entity - deactivate an entity from its service tree.
++ * @entity: the entity to deactivate.
++ * @requeue: if false, the entity will not be put into the idle tree.
++ *
++ * Deactivate an entity, independently from its previous state.  If the
++ * entity was not on a service tree just return, otherwise if it is on
++ * any scheduler tree, extract it from that tree, and if necessary
++ * and if the caller did not specify @requeue, put it on the idle tree.
++ *
++ * Return %1 if the caller should update the entity hierarchy, i.e.,
++ * if the entity was under service or if it was the next_active for
++ * its sched_data; return %0 otherwise.
++ */
++static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
++{
++	struct bfq_sched_data *sd = entity->sched_data;
++	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
++	int was_active = entity == sd->active_entity;
++	int ret = 0;
++
++	if (!entity->on_st)
++		return 0;
++
++	BUG_ON(was_active && entity->tree != NULL);
++
++	if (was_active) {
++		bfq_calc_finish(entity, entity->service);
++		sd->active_entity = NULL;
++	} else if (entity->tree == &st->active)
++		bfq_active_extract(st, entity);
++	else if (entity->tree == &st->idle)
++		bfq_idle_extract(st, entity);
++	else if (entity->tree != NULL)
++		BUG();
++
++	if (was_active || sd->next_active == entity)
++		ret = bfq_update_next_active(sd);
++
++	if (!requeue || !bfq_gt(entity->finish, st->vtime))
++		bfq_forget_entity(st, entity);
++	else
++		bfq_idle_insert(st, entity);
++
++	BUG_ON(sd->active_entity == entity);
++	BUG_ON(sd->next_active == entity);
++
++	return ret;
++}
++
++/**
++ * bfq_deactivate_entity - deactivate an entity.
++ * @entity: the entity to deactivate.
++ * @requeue: true if the entity can be put on the idle tree
++ */
++static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
++{
++	struct bfq_sched_data *sd;
++	struct bfq_entity *parent;
++
++	for_each_entity_safe(entity, parent) {
++		sd = entity->sched_data;
++
++		if (!__bfq_deactivate_entity(entity, requeue))
++			/*
++			 * The parent entity is still backlogged, and
++			 * we don't need to update it as it is still
++			 * under service.
++			 */
++			break;
++
++		if (sd->next_active != NULL)
++			/*
++			 * The parent entity is still backlogged and
++			 * the budgets on the path towards the root
++			 * need to be updated.
++			 */
++			goto update;
++
++		/*
++		 * If we reach there the parent is no more backlogged and
++		 * we want to propagate the dequeue upwards.
++		 */
++		requeue = 1;
++	}
++
++	return;
++
++update:
++	entity = parent;
++	for_each_entity(entity) {
++		__bfq_activate_entity(entity);
++
++		sd = entity->sched_data;
++		if (!bfq_update_next_active(sd))
++			break;
++	}
++}
++
++/**
++ * bfq_update_vtime - update vtime if necessary.
++ * @st: the service tree to act upon.
++ *
++ * If necessary update the service tree vtime to have at least one
++ * eligible entity, skipping to its start time.  Assumes that the
++ * active tree of the device is not empty.
++ *
++ * NOTE: this hierarchical implementation updates vtimes quite often,
++ * we may end up with reactivated tasks getting timestamps after a
++ * vtime skip done because we needed a ->first_active entity on some
++ * intermediate node.
++ */
++static void bfq_update_vtime(struct bfq_service_tree *st)
++{
++	struct bfq_entity *entry;
++	struct rb_node *node = st->active.rb_node;
++
++	entry = rb_entry(node, struct bfq_entity, rb_node);
++	if (bfq_gt(entry->min_start, st->vtime)) {
++		st->vtime = entry->min_start;
++		bfq_forget_idle(st);
++	}
++}
++
++/**
++ * bfq_first_active - find the eligible entity with the smallest finish time
++ * @st: the service tree to select from.
++ *
++ * This function searches the first schedulable entity, starting from the
++ * root of the tree and going on the left every time on this side there is
++ * a subtree with at least one eligible (start >= vtime) entity.  The path
++ * on the right is followed only if a) the left subtree contains no eligible
++ * entities and b) no eligible entity has been found yet.
++ */
++static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
++{
++	struct bfq_entity *entry, *first = NULL;
++	struct rb_node *node = st->active.rb_node;
++
++	while (node != NULL) {
++		entry = rb_entry(node, struct bfq_entity, rb_node);
++left:
++		if (!bfq_gt(entry->start, st->vtime))
++			first = entry;
++
++		BUG_ON(bfq_gt(entry->min_start, st->vtime));
++
++		if (node->rb_left != NULL) {
++			entry = rb_entry(node->rb_left,
++					 struct bfq_entity, rb_node);
++			if (!bfq_gt(entry->min_start, st->vtime)) {
++				node = node->rb_left;
++				goto left;
++			}
++		}
++		if (first != NULL)
++			break;
++		node = node->rb_right;
++	}
++
++	BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active));
++	return first;
++}
++
++/**
++ * __bfq_lookup_next_entity - return the first eligible entity in @st.
++ * @st: the service tree.
++ *
++ * Update the virtual time in @st and return the first eligible entity
++ * it contains.
++ */
++static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st,
++						   bool force)
++{
++	struct bfq_entity *entity, *new_next_active = NULL;
++
++	if (RB_EMPTY_ROOT(&st->active))
++		return NULL;
++
++	bfq_update_vtime(st);
++	entity = bfq_first_active_entity(st);
++	BUG_ON(bfq_gt(entity->start, st->vtime));
++
++	/*
++	 * If the chosen entity does not match with the sched_data's
++	 * next_active and we are forcedly serving the IDLE priority
++	 * class tree, bubble up budget update.
++	 */
++	if (unlikely(force && entity != entity->sched_data->next_active)) {
++		new_next_active = entity;
++		for_each_entity(new_next_active)
++			bfq_update_budget(new_next_active);
++	}
++
++	return entity;
++}
++
++/**
++ * bfq_lookup_next_entity - return the first eligible entity in @sd.
++ * @sd: the sched_data.
++ * @extract: if true the returned entity will be also extracted from @sd.
++ *
++ * NOTE: since we cache the next_active entity at each level of the
++ * hierarchy, the complexity of the lookup can be decreased with
++ * absolutely no effort just returning the cached next_active value;
++ * we prefer to do full lookups to test the consistency of * the data
++ * structures.
++ */
++static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
++						 int extract,
++						 struct bfq_data *bfqd)
++{
++	struct bfq_service_tree *st = sd->service_tree;
++	struct bfq_entity *entity;
++	int i=0;
++
++	BUG_ON(sd->active_entity != NULL);
++
++	if (bfqd != NULL &&
++	    jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) {
++		entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, true);
++		if (entity != NULL) {
++			i = BFQ_IOPRIO_CLASSES - 1;
++			bfqd->bfq_class_idle_last_service = jiffies;
++			sd->next_active = entity;
++		}
++	}
++	for (; i < BFQ_IOPRIO_CLASSES; i++) {
++		entity = __bfq_lookup_next_entity(st + i, false);
++		if (entity != NULL) {
++			if (extract) {
++				bfq_check_next_active(sd, entity);
++				bfq_active_extract(st + i, entity);
++				sd->active_entity = entity;
++				sd->next_active = NULL;
++			}
++			break;
++		}
++	}
++
++	return entity;
++}
++
++/*
++ * Get next queue for service.
++ */
++static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
++{
++	struct bfq_entity *entity = NULL;
++	struct bfq_sched_data *sd;
++	struct bfq_queue *bfqq;
++
++	BUG_ON(bfqd->active_queue != NULL);
++
++	if (bfqd->busy_queues == 0)
++		return NULL;
++
++	sd = &bfqd->root_group->sched_data;
++	for (; sd != NULL; sd = entity->my_sched_data) {
++		entity = bfq_lookup_next_entity(sd, 1, bfqd);
++		BUG_ON(entity == NULL);
++		entity->service = 0;
++	}
++
++	bfqq = bfq_entity_to_bfqq(entity);
++	BUG_ON(bfqq == NULL);
++
++	return bfqq;
++}
++
++/*
++ * Forced extraction of the given queue.
++ */
++static void bfq_get_next_queue_forced(struct bfq_data *bfqd,
++				      struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity;
++	struct bfq_sched_data *sd;
++
++	BUG_ON(bfqd->active_queue != NULL);
++
++	entity = &bfqq->entity;
++	/*
++	 * Bubble up extraction/update from the leaf to the root.
++	*/
++	for_each_entity(entity) {
++		sd = entity->sched_data;
++		bfq_update_budget(entity);
++		bfq_update_vtime(bfq_entity_service_tree(entity));
++		bfq_active_extract(bfq_entity_service_tree(entity), entity);
++		sd->active_entity = entity;
++		sd->next_active = NULL;
++		entity->service = 0;
++	}
++
++	return;
++}
++
++static void __bfq_bfqd_reset_active(struct bfq_data *bfqd)
++{
++	if (bfqd->active_bic != NULL) {
++		put_io_context(bfqd->active_bic->icq.ioc);
++		bfqd->active_bic = NULL;
++	}
++
++	bfqd->active_queue = NULL;
++	del_timer(&bfqd->idle_slice_timer);
++}
++
++static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++				int requeue)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++
++	if (bfqq == bfqd->active_queue)
++		__bfq_bfqd_reset_active(bfqd);
++
++	bfq_deactivate_entity(entity, requeue);
++}
++
++static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	struct bfq_entity *entity = &bfqq->entity;
++
++	bfq_activate_entity(entity);
++}
++
++/*
++ * Called when the bfqq no longer has requests pending, remove it from
++ * the service tree.
++ */
++static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++			      int requeue)
++{
++	BUG_ON(!bfq_bfqq_busy(bfqq));
++	BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
++
++	bfq_log_bfqq(bfqd, bfqq, "del from busy");
++
++	bfq_clear_bfqq_busy(bfqq);
++
++	BUG_ON(bfqd->busy_queues == 0);
++	bfqd->busy_queues--;
++
++	bfq_deactivate_bfqq(bfqd, bfqq, requeue);
++}
++
++/*
++ * Called when an inactive queue receives a new request.
++ */
++static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
++{
++	BUG_ON(bfq_bfqq_busy(bfqq));
++	BUG_ON(bfqq == bfqd->active_queue);
++
++	bfq_log_bfqq(bfqd, bfqq, "add to busy");
++
++	bfq_activate_bfqq(bfqd, bfqq);
++
++	bfq_mark_bfqq_busy(bfqq);
++	bfqd->busy_queues++;
++}
+diff --git a/block/bfq.h block/bfq.h
+new file mode 100644
+index 0000000..f487c32
+--- /dev/null
++++ block/bfq.h
+@@ -0,0 +1,599 @@
++/*
++ * BFQ-v5 for 3.3.0: data structures and common functions prototypes.
++ *
++ * Based on ideas and code from CFQ:
++ * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
++ *
++ * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
++ *		      Paolo Valente <paolo.valente@unimore.it>
++ */
++
++#ifndef _BFQ_H
++#define _BFQ_H
++
++#include <linux/blktrace_api.h>
++#include <linux/hrtimer.h>
++#include <linux/ioprio.h>
++#include <linux/rbtree.h>
++
++#define BFQ_IOPRIO_CLASSES	3
++#define BFQ_CL_IDLE_TIMEOUT	HZ/5
++
++#define BFQ_MIN_WEIGHT	1
++#define BFQ_MAX_WEIGHT	1000
++
++#define BFQ_DEFAULT_GRP_WEIGHT	10
++#define BFQ_DEFAULT_GRP_IOPRIO	0
++#define BFQ_DEFAULT_GRP_CLASS	IOPRIO_CLASS_BE
++
++struct bfq_entity;
++
++/**
++ * struct bfq_service_tree - per ioprio_class service tree.
++ * @active: tree for active entities (i.e., those backlogged).
++ * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i).
++ * @first_idle: idle entity with minimum F_i.
++ * @last_idle: idle entity with maximum F_i.
++ * @vtime: scheduler virtual time.
++ * @wsum: scheduler weight sum; active and idle entities contribute to it.
++ *
++ * Each service tree represents a B-WF2Q+ scheduler on its own.  Each
++ * ioprio_class has its own independent scheduler, and so its own
++ * bfq_service_tree.  All the fields are protected by the queue lock
++ * of the containing bfqd.
++ */
++struct bfq_service_tree {
++	struct rb_root active;
++	struct rb_root idle;
++
++	struct bfq_entity *first_idle;
++	struct bfq_entity *last_idle;
++
++	u64 vtime;
++	unsigned long wsum;
++};
++
++/**
++ * struct bfq_sched_data - multi-class scheduler.
++ * @active_entity: entity under service.
++ * @next_active: head-of-the-line entity in the scheduler.
++ * @service_tree: array of service trees, one per ioprio_class.
++ *
++ * bfq_sched_data is the basic scheduler queue.  It supports three
++ * ioprio_classes, and can be used either as a toplevel queue or as
++ * an intermediate queue on a hierarchical setup.
++ * @next_active points to the active entity of the sched_data service
++ * trees that will be scheduled next.
++ *
++ * The supported ioprio_classes are the same as in CFQ, in descending
++ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
++ * Requests from higher priority queues are served before all the
++ * requests from lower priority queues; among requests of the same
++ * queue requests are served according to B-WF2Q+.
++ * All the fields are protected by the queue lock of the containing bfqd.
++ */
++struct bfq_sched_data {
++	struct bfq_entity *active_entity;
++	struct bfq_entity *next_active;
++	struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
++};
++
++/**
++ * struct bfq_entity - schedulable entity.
++ * @rb_node: service_tree member.
++ * @on_st: flag, true if the entity is on a tree (either the active or
++ *         the idle one of its service_tree).
++ * @finish: B-WF2Q+ finish timestamp (aka F_i).
++ * @start: B-WF2Q+ start timestamp (aka S_i).
++ * @tree: tree the entity is enqueued into; %NULL if not on a tree.
++ * @min_start: minimum start time of the (active) subtree rooted at
++ *             this entity; used for O(log N) lookups into active trees.
++ * @service: service received during the last round of service.
++ * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight.
++ * @weight: weight of the queue
++ * @parent: parent entity, for hierarchical scheduling.
++ * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the
++ *                 associated scheduler queue, %NULL on leaf nodes.
++ * @sched_data: the scheduler queue this entity belongs to.
++ * @ioprio: the ioprio in use.
++ * @new_weight: when a weight change is requested, the new weight value.
++ * @orig_weight: original weight, used to implement weight boosting
++ * @new_ioprio: when an ioprio change is requested, the new ioprio value.
++ * @ioprio_class: the ioprio_class in use.
++ * @new_ioprio_class: when an ioprio_class change is requested, the new
++ *                    ioprio_class value.
++ * @ioprio_changed: flag, true when the user requested a weight, ioprio or
++ *                  ioprio_class change.
++ *
++ * A bfq_entity is used to represent either a bfq_queue (leaf node in the
++ * cgroup hierarchy) or a bfq_group into the upper level scheduler.  Each
++ * entity belongs to the sched_data of the parent group in the cgroup
++ * hierarchy.  Non-leaf entities have also their own sched_data, stored
++ * in @my_sched_data.
++ *
++ * Each entity stores independently its priority values; this would
++ * allow different weights on different devices, but this
++ * functionality is not exported to userspace by now.  Priorities and
++ * weights are updated lazily, first storing the new values into the
++ * new_* fields, then setting the @ioprio_changed flag.  As soon as
++ * there is a transition in the entity state that allows the priority
++ * update to take place the effective and the requested priority
++ * values are synchronized.
++ *
++ * Unless cgroups are used, the weight value is calculated from the
++ * ioprio to export the same interface as CFQ.  When dealing with
++ * ``well-behaved'' queues (i.e., queues that do not spend too much
++ * time to consume their budget and have true sequential behavior, and
++ * when there are no external factors breaking anticipation) the
++ * relative weights at each level of the cgroups hierarchy should be
++ * guaranteed.  All the fields are protected by the queue lock of the
++ * containing bfqd.
++ */
++struct bfq_entity {
++	struct rb_node rb_node;
++
++	int on_st;
++
++	u64 finish;
++	u64 start;
++
++	struct rb_root *tree;
++
++	u64 min_start;
++
++	unsigned long service, budget;
++	unsigned short weight, new_weight;
++	unsigned short orig_weight;
++
++	struct bfq_entity *parent;
++
++	struct bfq_sched_data *my_sched_data;
++	struct bfq_sched_data *sched_data;
++
++	unsigned short ioprio, new_ioprio;
++	unsigned short ioprio_class, new_ioprio_class;
++
++	int ioprio_changed;
++};
++
++struct bfq_group;
++
++/**
++ * struct bfq_queue - leaf schedulable entity.
++ * @ref: reference counter.
++ * @bfqd: parent bfq_data.
++ * @new_bfqq: shared bfq_queue if queue is cooperating with
++ *           one or more other queues.
++ * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree).
++ * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree).
++ * @sort_list: sorted list of pending requests.
++ * @next_rq: if fifo isn't expired, next request to serve.
++ * @queued: nr of requests queued in @sort_list.
++ * @allocated: currently allocated requests.
++ * @meta_pending: pending metadata requests.
++ * @fifo: fifo list of requests in sort_list.
++ * @entity: entity representing this queue in the scheduler.
++ * @max_budget: maximum budget allowed from the feedback mechanism.
++ * @budget_timeout: budget expiration (in jiffies).
++ * @dispatched: number of requests on the dispatch list or inside driver.
++ * @org_ioprio: saved ioprio during boosted periods.
++ * @flags: status flags.
++ * @bfqq_list: node for active/idle bfqq list inside our bfqd.
++ * @seek_samples: number of seeks sampled
++ * @seek_total: sum of the distances of the seeks sampled
++ * @seek_mean: mean seek distance
++ * @last_request_pos: position of the last request enqueued
++ * @pid: pid of the process owning the queue, used for logging purposes.
++ * @last_rais_start_time: last (idle -> weight-raised) transition attempt
++ * @raising_cur_max_time: current max raising time for this queue
++ *
++ * A bfq_queue is a leaf request queue; it can be associated to an io_context
++ * or more (if it is an async one).  @cgroup holds a reference to the
++ * cgroup, to be sure that it does not disappear while a bfqq still
++ * references it (mostly to avoid races between request issuing and task
++ * migration followed by cgroup distruction).
++ * All the fields are protected by the queue lock of the containing bfqd.
++ */
++struct bfq_queue {
++	atomic_t ref;
++	struct bfq_data *bfqd;
++
++	/* fields for cooperating queues handling */
++	struct bfq_queue *new_bfqq;
++	struct rb_node pos_node;
++	struct rb_root *pos_root;
++
++	struct rb_root sort_list;
++	struct request *next_rq;
++	int queued[2];
++	int allocated[2];
++	int meta_pending;
++	struct list_head fifo;
++
++	struct bfq_entity entity;
++
++	unsigned long max_budget;
++	unsigned long budget_timeout;
++
++	int dispatched;
++
++	unsigned short org_ioprio;
++
++	unsigned int flags;
++
++	struct list_head bfqq_list;
++
++	unsigned int seek_samples;
++	u64 seek_total;
++	sector_t seek_mean;
++	sector_t last_request_pos;
++
++	pid_t pid;
++
++	/* weight-raising fields */
++	unsigned int raising_cur_max_time;
++	u64 last_rais_start_finish, soft_rt_next_start;
++	unsigned int raising_coeff;
++};
++
++/**
++ * struct bfq_ttime - per process thinktime stats.
++ * @ttime_total: total process thinktime
++ * @ttime_samples: number of thinktime samples
++ * @ttime_mean: average process thinktime
++ */
++struct bfq_ttime {
++	unsigned long last_end_request;
++
++	unsigned long ttime_total;
++	unsigned long ttime_samples;
++	unsigned long ttime_mean;
++};
++
++/**
++ * struct bfq_io_cq - per (request_queue, io_context) structure.
++ * @icq: associated io_cq structure
++ * @bfqq: array of two process queues, the sync and the async
++ * @ttime: associated @bfq_ttime struct
++ */
++struct bfq_io_cq {
++	struct io_cq icq; /* must be the first member */
++	struct bfq_queue *bfqq[2];
++	struct bfq_ttime ttime;
++};
++
++/**
++ * struct bfq_data - per device data structure.
++ * @queue: request queue for the managed device.
++ * @root_group: root bfq_group for the device.
++ * @rq_pos_tree: rbtree sorted by next_request position,
++ *		used when determining if two or more queues
++ *		have interleaving requests (see bfq_close_cooperator).
++ * @busy_queues: number of bfq_queues containing requests (including the
++ *		 queue under service, even if it is idling).
++ * @queued: number of queued requests.
++ * @rq_in_driver: number of requests dispatched and waiting for completion.
++ * @sync_flight: number of sync requests in the driver.
++ * @max_rq_in_driver: max number of reqs in driver in the last @hw_tag_samples
++ *		      completed requests .
++ * @hw_tag_samples: nr of samples used to calculate hw_tag.
++ * @hw_tag: flag set to one if the driver is showing a queueing behavior.
++ * @budgets_assigned: number of budgets assigned.
++ * @idle_slice_timer: timer set when idling for the next sequential request
++ *                    from the queue under service.
++ * @unplug_work: delayed work to restart dispatching on the request queue.
++ * @active_queue: bfq_queue under service.
++ * @active_bic: bfq_io_cq (bic) associated with the @active_queue.
++ * @last_position: on-disk position of the last served request.
++ * @last_budget_start: beginning of the last budget.
++ * @last_idling_start: beginning of the last idle slice.
++ * @peak_rate: peak transfer rate observed for a budget.
++ * @peak_rate_samples: number of samples used to calculate @peak_rate.
++ * @bfq_max_budget: maximum budget allotted to a bfq_queue before rescheduling.
++ * @group_list: list of all the bfq_groups active on the device.
++ * @active_list: list of all the bfq_queues active on the device.
++ * @idle_list: list of all the bfq_queues idle on the device.
++ * @bfq_quantum: max number of requests dispatched per dispatch round.
++ * @bfq_fifo_expire: timeout for async/sync requests; when it expires
++ *                   requests are served in fifo order.
++ * @bfq_back_penalty: weight of backward seeks wrt forward ones.
++ * @bfq_back_max: maximum allowed backward seek.
++ * @bfq_slice_idle: maximum idling time.
++ * @bfq_user_max_budget: user-configured max budget value (0 for auto-tuning).
++ * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to
++ *                           async queues.
++ * @bfq_timeout: timeout for bfq_queues to consume their budget; used to
++ *               to prevent seeky queues to impose long latencies to well
++ *               behaved ones (this also implies that seeky queues cannot
++ *               receive guarantees in the service domain; after a timeout
++ *               they are charged for the whole allocated budget, to try
++ *               to preserve a behavior reasonably fair among them, but
++ *               without service-domain guarantees).
++ * @bfq_raising_coeff: Maximum factor by which the weight of a boosted
++ *                            queue is multiplied
++ * @bfq_raising_max_time: maximum duration of a weight-raising period (jiffies)
++ * @bfq_raising_rt_max_time: maximum duration for soft real-time processes
++ * @bfq_raising_min_idle_time: minimum idle period after which weight-raising
++ *			       may be reactivated for a queue (in jiffies)
++ * @bfq_raising_min_inter_arr_async: minimum period between request arrivals
++ *				     after which weight-raising may be
++ *				     reactivated for an already busy queue
++ *				     (in jiffies)
++ * @bfq_raising_max_softrt_rate: max service-rate for a soft real-time queue,
++ *			         sectors per seconds
++ * @RT_prod: cached value of the product R*T used for computing the maximum
++ * 	     duration of the weight raising automatically
++ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions
++ *
++ * All the fields are protected by the @queue lock.
++ */
++struct bfq_data {
++	struct request_queue *queue;
++
++	struct bfq_group *root_group;
++
++	struct rb_root rq_pos_tree;
++
++	int busy_queues;
++	int queued;
++	int rq_in_driver;
++	int sync_flight;
++
++	int max_rq_in_driver;
++	int hw_tag_samples;
++	int hw_tag;
++
++	int budgets_assigned;
++
++	struct timer_list idle_slice_timer;
++	struct work_struct unplug_work;
++
++	struct bfq_queue *active_queue;
++	struct bfq_io_cq *active_bic;
++
++	sector_t last_position;
++
++	ktime_t last_budget_start;
++	ktime_t last_idling_start;
++	int peak_rate_samples;
++	u64 peak_rate;
++	unsigned long bfq_max_budget;
++
++	struct hlist_head group_list;
++	struct list_head active_list;
++	struct list_head idle_list;
++
++	unsigned int bfq_quantum;
++	unsigned int bfq_fifo_expire[2];
++	unsigned int bfq_back_penalty;
++	unsigned int bfq_back_max;
++	unsigned int bfq_slice_idle;
++	u64 bfq_class_idle_last_service;
++
++	unsigned int bfq_user_max_budget;
++	unsigned int bfq_max_budget_async_rq;
++	unsigned int bfq_timeout[2];
++
++	bool low_latency;
++
++	/* parameters of the low_latency heuristics */
++	unsigned int bfq_raising_coeff;
++	unsigned int bfq_raising_max_time;
++	unsigned int bfq_raising_rt_max_time;
++	unsigned int bfq_raising_min_idle_time;
++	unsigned int bfq_raising_min_inter_arr_async;
++	unsigned int bfq_raising_max_softrt_rate;
++	u64 RT_prod;
++
++	struct bfq_queue oom_bfqq;
++};
++
++enum bfqq_state_flags {
++	BFQ_BFQQ_FLAG_busy = 0,		/* has requests or is under service */
++	BFQ_BFQQ_FLAG_wait_request,	/* waiting for a request */
++	BFQ_BFQQ_FLAG_must_alloc,	/* must be allowed rq alloc */
++	BFQ_BFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */
++	BFQ_BFQQ_FLAG_idle_window,	/* slice idling enabled */
++	BFQ_BFQQ_FLAG_prio_changed,	/* task priority has changed */
++	BFQ_BFQQ_FLAG_sync,		/* synchronous queue */
++	BFQ_BFQQ_FLAG_budget_new,	/* no completion with this budget */
++	BFQ_BFQQ_FLAG_coop,		/* bfqq is shared */
++	BFQ_BFQQ_FLAG_split_coop,	/* shared bfqq will be splitted */
++	BFQ_BFQQ_FLAG_some_coop_idle,   /* some cooperator is inactive */
++};
++
++#define BFQ_BFQQ_FNS(name)						\
++static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq)		\
++{									\
++	(bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name);			\
++}									\
++static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq)	\
++{									\
++	(bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name);			\
++}									\
++static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq)		\
++{									\
++	return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0;	\
++}
++
++BFQ_BFQQ_FNS(busy);
++BFQ_BFQQ_FNS(wait_request);
++BFQ_BFQQ_FNS(must_alloc);
++BFQ_BFQQ_FNS(fifo_expire);
++BFQ_BFQQ_FNS(idle_window);
++BFQ_BFQQ_FNS(prio_changed);
++BFQ_BFQQ_FNS(sync);
++BFQ_BFQQ_FNS(budget_new);
++BFQ_BFQQ_FNS(coop);
++BFQ_BFQQ_FNS(split_coop);
++BFQ_BFQQ_FNS(some_coop_idle);
++#undef BFQ_BFQQ_FNS
++
++/* Logging facilities. */
++#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
++	blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args)
++
++#define bfq_log(bfqd, fmt, args...) \
++	blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
++
++/* Expiration reasons. */
++enum bfqq_expiration {
++	BFQ_BFQQ_TOO_IDLE = 0,		/* queue has been idling for too long */
++	BFQ_BFQQ_BUDGET_TIMEOUT,	/* budget took too long to be used */
++	BFQ_BFQQ_BUDGET_EXHAUSTED,	/* budget consumed */
++	BFQ_BFQQ_NO_MORE_REQUESTS,	/* the queue has no more requests */
++};
++
++#ifdef CONFIG_CGROUP_BFQIO
++/**
++ * struct bfq_group - per (device, cgroup) data structure.
++ * @entity: schedulable entity to insert into the parent group sched_data.
++ * @sched_data: own sched_data, to contain child entities (they may be
++ *              both bfq_queues and bfq_groups).
++ * @group_node: node to be inserted into the bfqio_cgroup->group_data
++ *              list of the containing cgroup's bfqio_cgroup.
++ * @bfqd_node: node to be inserted into the @bfqd->group_list list
++ *             of the groups active on the same device; used for cleanup.
++ * @bfqd: the bfq_data for the device this group acts upon.
++ * @async_bfqq: array of async queues for all the tasks belonging to
++ *              the group, one queue per ioprio value per ioprio_class,
++ *              except for the idle class that has only one queue.
++ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).
++ * @my_entity: pointer to @entity, %NULL for the toplevel group; used
++ *             to avoid too many special cases during group creation/migration.
++ *
++ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
++ * there is a set of bfq_groups, each one collecting the lower-level
++ * entities belonging to the group that are acting on the same device.
++ *
++ * Locking works as follows:
++ *    o @group_node is protected by the bfqio_cgroup lock, and is accessed
++ *      via RCU from its readers.
++ *    o @bfqd is protected by the queue lock, RCU is used to access it
++ *      from the readers.
++ *    o All the other fields are protected by the @bfqd queue lock.
++ */
++struct bfq_group {
++	struct bfq_entity entity;
++	struct bfq_sched_data sched_data;
++
++	struct hlist_node group_node;
++	struct hlist_node bfqd_node;
++
++	void *bfqd;
++
++	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
++	struct bfq_queue *async_idle_bfqq;
++
++	struct bfq_entity *my_entity;
++};
++
++/**
++ * struct bfqio_cgroup - bfq cgroup data structure.
++ * @css: subsystem state for bfq in the containing cgroup.
++ * @weight: cgroup weight.
++ * @ioprio: cgroup ioprio.
++ * @ioprio_class: cgroup ioprio_class.
++ * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data.
++ * @group_data: list containing the bfq_group belonging to this cgroup.
++ *
++ * @group_data is accessed using RCU, with @lock protecting the updates,
++ * @ioprio and @ioprio_class are protected by @lock.
++ */
++struct bfqio_cgroup {
++	struct cgroup_subsys_state css;
++
++	unsigned short weight, ioprio, ioprio_class;
++
++	spinlock_t lock;
++	struct hlist_head group_data;
++};
++#else
++struct bfq_group {
++	struct bfq_sched_data sched_data;
++
++	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
++	struct bfq_queue *async_idle_bfqq;
++};
++#endif
++
++static inline struct bfq_service_tree *
++bfq_entity_service_tree(struct bfq_entity *entity)
++{
++	struct bfq_sched_data *sched_data = entity->sched_data;
++	unsigned int idx = entity->ioprio_class - 1;
++
++	BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
++	BUG_ON(sched_data == NULL);
++
++	return sched_data->service_tree + idx;
++}
++
++static inline struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic,
++					    int is_sync)
++{
++	return bic->bfqq[!!is_sync];
++}
++
++static inline void bic_set_bfqq(struct bfq_io_cq *bic,
++				struct bfq_queue *bfqq, int is_sync)
++{
++	bic->bfqq[!!is_sync] = bfqq;
++}
++
++static inline struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
++{
++	return bic->icq.q->elevator->elevator_data;
++}
++
++/**
++ * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer.
++ * @ptr: a pointer to a bfqd.
++ * @flags: storage for the flags to be saved.
++ *
++ * This function allows bfqg->bfqd to be protected by the
++ * queue lock of the bfqd they reference; the pointer is dereferenced
++ * under RCU, so the storage for bfqd is assured to be safe as long
++ * as the RCU read side critical section does not end.  After the
++ * bfqd->queue->queue_lock is taken the pointer is rechecked, to be
++ * sure that no other writer accessed it.  If we raced with a writer,
++ * the function returns NULL, with the queue unlocked, otherwise it
++ * returns the dereferenced pointer, with the queue locked.
++ */
++static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr,
++						   unsigned long *flags)
++{
++	struct bfq_data *bfqd;
++
++	rcu_read_lock();
++	bfqd = rcu_dereference(*(struct bfq_data **)ptr);
++
++	if (bfqd != NULL) {
++		spin_lock_irqsave(bfqd->queue->queue_lock, *flags);
++		if (*ptr == bfqd)
++			goto out;
++		spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
++	}
++
++	bfqd = NULL;
++out:
++	rcu_read_unlock();
++	return bfqd;
++}
++
++static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd,
++				       unsigned long *flags)
++{
++	spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
++}
++
++static void bfq_changed_ioprio(struct io_context *ioc,
++			       struct bfq_io_cq *bic);
++static void bfq_put_queue(struct bfq_queue *bfqq);
++static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);
++static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
++				       struct bfq_group *bfqg, int is_sync,
++				       struct io_context *ioc, gfp_t gfp_mask);
++static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
++static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
++#endif
+-- 
+1.7.10.4
+
diff --git a/3.3.8/0003-AppArmor-Allow-dfa-backward-compatibility-with-broke.patch b/3.3.8/0003-AppArmor-Allow-dfa-backward-compatibility-with-broke.patch
new file mode 100644
index 0000000..be32585
--- /dev/null
+++ b/3.3.8/0003-AppArmor-Allow-dfa-backward-compatibility-with-broke.patch
@@ -0,0 +1,69 @@
+From 7a10d093f9779f42cb8d6affcb6a4436d3ebd6d3 Mon Sep 17 00:00:00 2001
+From: John Johansen <john.johansen@canonical.com>
+Date: Wed, 10 Aug 2011 22:02:41 -0700
+Subject: [PATCH 3/3] AppArmor: Allow dfa backward compatibility with broken
+ userspace
+
+The apparmor_parser when compiling policy could generate invalid dfas
+that did not have sufficient padding to avoid invalid references, when
+used by the kernel.  The kernels check to verify the next/check table
+size was broken meaning invalid dfas were being created by userspace
+and not caught.
+
+To remain compatible with old tools that are not fixed, pad the loaded
+dfas next/check table.  The dfa's themselves are valid except for the
+high padding for potentially invalid transitions (high bounds error),
+which have a maximimum is 256 entries.  So just allocate an extra null filled
+256 entries for the next/check tables.  This will guarentee all bounds
+are good and invalid transitions go to the null (0) state.
+
+Signed-off-by: John Johansen <john.johansen@canonical.com>
+---
+ security/apparmor/match.c |   17 +++++++++++++++++
+ 1 files changed, 17 insertions(+), 0 deletions(-)
+
+diff --git a/security/apparmor/match.c b/security/apparmor/match.c
+index 94de6b4..081491e 100644
+--- a/security/apparmor/match.c
++++ b/security/apparmor/match.c
+@@ -57,8 +57,17 @@ static struct table_header *unpack_table(char *blob, size_t bsize)
+ 	if (bsize < tsize)
+ 		goto out;
+ 
++	/* Pad table allocation for next/check by 256 entries to remain
++	 * backwards compatible with old (buggy) tools and remain safe without
++	 * run time checks
++	 */
++	if (th.td_id == YYTD_ID_NXT || th.td_id == YYTD_ID_CHK)
++		tsize += 256 * th.td_flags;
++
+ 	table = kvmalloc(tsize);
+ 	if (table) {
++		/* ensure the pad is clear, else there will be errors */
++		memset(table, 0, tsize);
+ 		*table = th;
+ 		if (th.td_flags == YYTD_DATA8)
+ 			UNPACK_ARRAY(table->td_data, blob, th.td_lolen,
+@@ -134,11 +143,19 @@ static int verify_dfa(struct aa_dfa *dfa, int flags)
+ 		goto out;
+ 
+ 	if (flags & DFA_FLAG_VERIFY_STATES) {
++		int warning = 0;
+ 		for (i = 0; i < state_count; i++) {
+ 			if (DEFAULT_TABLE(dfa)[i] >= state_count)
+ 				goto out;
+ 			/* TODO: do check that DEF state recursion terminates */
+ 			if (BASE_TABLE(dfa)[i] + 255 >= trans_count) {
++				if (warning)
++					continue;
++				printk(KERN_WARNING "AppArmor DFA next/check "
++				       "upper bounds error fixed, upgrade "
++				       "user space tools \n");
++				warning = 1;
++			} else if (BASE_TABLE(dfa)[i] >= trans_count) {
+ 				printk(KERN_ERR "AppArmor DFA next/check upper "
+ 				       "bounds error\n");
+ 				goto out;
+-- 
+1.7.5.4
+
diff --git a/3.3.8/01patch-2.6.33_atopcnt.patch b/3.3.8/01patch-2.6.33_atopcnt.patch
new file mode 100644
index 0000000..28bf733
--- /dev/null
+++ b/3.3.8/01patch-2.6.33_atopcnt.patch
@@ -0,0 +1,174 @@
+diff --git a/block/blk-core.c b/block/blk-core.c
+index d1a9a0a..8b54acb 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -73,6 +73,17 @@ static void drive_stat_acct(struct request *rq, int new_io)
+ 		part_inc_in_flight(part, rw);
+ 	}
+ 
++ switch (rw) {             /* ATOP */
++   case READ:              /* ATOP */
++     current->group_leader->stat.dsk_rio += new_io;    /* ATOP */
++     current->group_leader->stat.dsk_rsz += blk_rq_sectors(rq);  /* ATOP */
++     break;                /* ATOP */
++   case WRITE:             /* ATOP */
++     current->group_leader->stat.dsk_wio += new_io;          /* ATOP */
++     current->group_leader->stat.dsk_wsz += blk_rq_sectors(rq);  /* ATOP */
++     break;        /* ATOP */
++ }                 /* ATOP */
++
+ 	part_stat_unlock();
+ }
+ 
+diff --git a/fs/proc/array.c b/fs/proc/array.c
+index 13b5d07..cac522e 100644
+--- a/fs/proc/array.c
++++ b/fs/proc/array.c
+@@ -515,6 +515,25 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+ 		(unsigned long long)delayacct_blkio_ticks(task),
+ 		cputime_to_clock_t(gtime),
+ 		cputime_to_clock_t(cgtime));
++
++		seq_printf(m,                                /* ATOP */
++			"%lu %llu %lu %llu %lu %llu %lu "    /* ATOP */
++			"%llu %lu %llu %lu %llu %lu %lu\n",  /* ATOP */
++			task->stat.dsk_rio,                  /* ATOP */
++			task->stat.dsk_rsz,                  /* ATOP */
++			task->stat.dsk_wio,                  /* ATOP */
++			task->stat.dsk_wsz,                  /* ATOP */
++			task->stat.tcp_snd,                  /* ATOP */
++			task->stat.tcp_ssz,                  /* ATOP */
++			task->stat.tcp_rcv,                  /* ATOP */
++			task->stat.tcp_rsz,                  /* ATOP */
++			task->stat.udp_snd,                  /* ATOP */
++			task->stat.udp_ssz,                  /* ATOP */
++			task->stat.udp_rcv,                  /* ATOP */
++			task->stat.udp_rsz,                  /* ATOP */
++			task->stat.raw_snd,                  /* ATOP */
++			task->stat.raw_rcv);                 /* ATOP */
++ 
+ 	if (mm)
+ 		mmput(mm);
+ 	return 0;
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 78efe7c..22391bf 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1512,6 +1512,17 @@ struct task_struct {
+ #endif
+ 	atomic_t fs_excl;	/* holding fs exclusive resources */
+ 	struct rcu_head rcu;
++ 
++	struct {                                   /* ATOP */
++		unsigned long      dsk_rio, dsk_wio;   /* ATOP */
++		unsigned long long dsk_rsz, dsk_wsz;   /* ATOP */
++		unsigned long      tcp_snd, tcp_rcv;   /* ATOP */
++		unsigned long long tcp_ssz, tcp_rsz;   /* ATOP */
++		unsigned long      udp_snd, udp_rcv;   /* ATOP */
++		unsigned long long udp_ssz, udp_rsz;   /* ATOP */
++		unsigned long      raw_snd, raw_rcv;   /* ATOP */
++	} stat;                                    /* ATOP */
++
+ 
+ 	/*
+ 	 * cache last used pipe for splice
+diff --git a/kernel/acct.c b/kernel/acct.c
+index a6605ca..d5df53a 100644
+--- a/kernel/acct.c
++++ b/kernel/acct.c
+@@ -565,7 +565,7 @@ static void do_acct_process(struct bsd_acct_struct *acct,
+ 	ac.ac_exitcode = pacct->ac_exitcode;
+ 	spin_unlock_irq(&current->sighand->siglock);
+ 	ac.ac_io = encode_comp_t(0 /* current->io_usage */);	/* %% */
+-	ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
++  ac.ac_rw = encode_comp_t(current->stat.dsk_rio + current->stat.dsk_wio); /* ATOP */
+ 	ac.ac_swaps = encode_comp_t(0);
+ 
+ 	/*
+diff --git a/kernel/fork.c b/kernel/fork.c
+index f88bd98..bab2085 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -683,6 +683,14 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
+ 
+ 	tsk->min_flt = tsk->maj_flt = 0;
+ 	tsk->nvcsw = tsk->nivcsw = 0;
++	tsk->stat.dsk_rio = tsk->stat.dsk_wio = 0;    /* ATOP */
++	tsk->stat.dsk_rsz = tsk->stat.dsk_wsz = 0;    /* ATOP */
++	tsk->stat.tcp_snd = tsk->stat.tcp_rcv = 0;    /* ATOP */
++	tsk->stat.tcp_ssz = tsk->stat.tcp_rsz = 0;    /* ATOP */
++	tsk->stat.udp_snd = tsk->stat.udp_rcv = 0;    /* ATOP */
++	tsk->stat.udp_ssz = tsk->stat.udp_rsz = 0;    /* ATOP */
++	tsk->stat.raw_snd = tsk->stat.raw_rcv = 0;    /* ATOP */
++
+ #ifdef CONFIG_DETECT_HUNG_TASK
+ 	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+ #endif
+diff --git a/net/socket.c b/net/socket.c
+index 769c386..3ba19f6 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -547,10 +547,28 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+ 	si->size = size;
+ 
+ 	err = security_socket_sendmsg(sock, msg, size);
+-	if (err)
+-		return err;
+-
+-	return sock->ops->sendmsg(iocb, sock, msg, size);
++  if (!err)
++    err = sock->ops->sendmsg(iocb, sock, msg, size);
++
++	if (err >= 0 && sock->sk) {                                   /* ATOP */
++		switch (sock->sk->sk_family) {                        /* ATOP */
++		   case PF_INET:                                      /* ATOP */
++		   case PF_INET6:                                     /* ATOP */
++			switch (sock->sk->sk_type) {                   /* ATOP */
++			   case SOCK_STREAM:                           /* ATOP */
++           current->group_leader->stat.tcp_snd++;    /* ATOP */
++			     current->group_leader->stat.tcp_ssz+=size;/* ATOP */
++			     break;                                    /* ATOP */
++			   case SOCK_DGRAM:                            /* ATOP */
++			     current->group_leader->stat.udp_snd++;    /* ATOP */
++           current->group_leader->stat.udp_ssz+=size;/* ATOP */
++			     break;                                    /* ATOP */
++			   case SOCK_RAW:                              /* ATOP */
++			     current->group_leader->stat.raw_snd++;    /* ATOP */
++		  }                                              /* ATOP */
++   }                                                /* ATOP */
++ }                                                  /* ATOP */
++       return err;
+ }
+ 
+ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+@@ -682,7 +700,29 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+ {
+ 	int err = security_socket_recvmsg(sock, msg, size, flags);
+ 
+-	return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
++	if (!err)
++		err = __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
++
++	if (err >= 0 && sock->sk) {					/* ATOP */
++		switch (sock->sk->sk_family) {				/* ATOP */
++			case PF_INET:					/* ATOP */
++		   	case PF_INET6:					/* ATOP */
++				switch (sock->sk->sk_type) {		/* ATOP */
++			   		case SOCK_STREAM:		/* ATOP */
++			    			current->group_leader->stat.tcp_rcv++;		/* ATOP */
++						current->group_leader->stat.tcp_rsz+=err;	/* ATOP */
++						break;			/* ATOP */
++					case SOCK_DGRAM:		/* ATOP */
++						current->group_leader->stat.udp_rcv++;		/* ATOP */
++						current->group_leader->stat.udp_rsz+=err;	/* ATOP */
++						break;			/* ATOP */
++					case SOCK_RAW:			/* ATOP */
++						current->group_leader->stat.raw_rcv++;		/* ATOP */
++						break;			/* ATOP */
++				}					/* ATOP */
++		}							/* ATOP */
++	}								/* ATOP */
++	return err;
+ }
+ 
+ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
diff --git a/3.3.8/02patch-2.6.33_atopacct.patch b/3.3.8/02patch-2.6.33_atopacct.patch
new file mode 100644
index 0000000..74e6a1c
--- /dev/null
+++ b/3.3.8/02patch-2.6.33_atopacct.patch
@@ -0,0 +1,125 @@
+Index: linux-2.6.28/include/linux/acct.h
+===================================================================
+--- linux-2.6.28.orig/include/linux/acct.h	2009-01-14 13:02:24.000000000 +0100
++++ linux-2.6.28/include/linux/acct.h	2009-01-14 13:03:33.000000000 +0100
+@@ -97,6 +97,54 @@
+ 	char		ac_comm[ACCT_COMM];	/* Command Name */
+ };
+ 
++struct acct_atop
++{
++	char		ac_flag;		/* Flags */
++	char		ac_version;		/* Always set to ACCT_VERSION */
++	__u32		ac_pid;			/* Process ID */
++	__u32		ac_ppid;		/* Parent Process ID */
++	__u16		ac_uid16;		/* LSB of Real User ID */
++	__u16		ac_gid16;		/* LSB of Real Group ID */
++	__u16		ac_tty;			/* Control Terminal */
++	__u32		ac_btime;		/* Process Creation Time */
++	comp_t		ac_utime;		/* User Time */
++	comp_t		ac_stime;		/* System Time */
++	comp_t		ac_etime;		/* Elapsed Time */
++	comp_t		ac_mem;			/* Virtual  Memory */
++	comp_t		ac_rss;			/* Resident Memory */
++	comp_t		ac_io;			/* Chars Transferred */
++	comp_t		ac_rw;			/* Blocks Read or Written */
++	comp_t		ac_bread;		/* Blocks Read */
++	comp_t		ac_bwrite;		/* Blocks Written */
++	comp2_t		ac_dskrsz;		/* Cum. blocks read */
++	comp2_t		ac_dskwsz;		/* Cum. blocks written */
++	comp_t		ac_tcpsnd;		/* TCP send requests */
++	comp_t		ac_tcprcv;		/* TCP recv requests */
++	comp2_t		ac_tcpssz;		/* TCP cum. length   */
++	comp2_t		ac_tcprsz;		/* TCP cum. length   */
++	comp_t		ac_udpsnd;		/* UDP send requests */
++	comp_t		ac_udprcv;		/* UDP recv requests */
++	comp2_t		ac_udpssz;		/* UDP cum. length   */
++	comp2_t		ac_udprsz;		/* UDP cum. length   */
++	comp_t		ac_rawsnd;		/* RAW send requests */
++	comp_t		ac_rawrcv;		/* RAW recv requests */
++	comp_t		ac_minflt;		/* Minor Pagefaults */
++	comp_t		ac_majflt;		/* Major Pagefaults */
++	comp_t		ac_swaps;		/* Number of Swaps */
++/* m68k had no padding here. */
++#if !defined(CONFIG_M68K) || !defined(__KERNEL__)
++	__u16		ac_ahz;			/* AHZ */
++#endif
++	__u32		ac_exitcode;		/* Exitcode */
++	char		ac_comm[ACCT_COMM + 1];	/* Command Name */
++	__u8		ac_etime_hi;		/* Elapsed Time MSB */
++	__u16		ac_etime_lo;		/* Elapsed Time LSB */
++	__u32		ac_uid;			/* Real User ID */
++	__u32		ac_gid;			/* Real Group ID */
++};
++
++
++
+ /*
+  *  accounting flags
+  */
+@@ -146,7 +194,13 @@
+  * 5: new binary incompatible format (128 bytes, second half)
+  *
+  */
++#define CONFIG_PROCESS_ACCT_ATOP
+ 
++#ifdef CONFIG_PROCESS_ACCT_ATOP
++#define ACCT_VERSION	6
++#define AHZ		(USER_HZ)
++typedef struct acct_atop acct_t;
++#else
+ #ifdef CONFIG_BSD_PROCESS_ACCT_V3
+ #define ACCT_VERSION	3
+ #define AHZ		100
+@@ -160,6 +214,7 @@
+ #define AHZ		(USER_HZ)
+ typedef struct acct acct_t;
+ #endif
++#endif
+ 
+ #else
+ #define ACCT_VERSION	2
+Index: linux-2.6.28/kernel/acct.c
+===================================================================
+--- linux-2.6.28.orig/kernel/acct.c	2009-01-14 13:03:31.000000000 +0100
++++ linux-2.6.28/kernel/acct.c	2009-01-14 13:03:33.000000000 +0100
+@@ -405,7 +405,7 @@
+ 	return exp;
+ }
+ 
+-#if ACCT_VERSION==1 || ACCT_VERSION==2
++#if ACCT_VERSION==1 || ACCT_VERSION==2 || ACCT_VERSION==6
+ /*
+  * encode an u64 into a comp2_t (24 bits)
+  *
+@@ -552,6 +552,30 @@
+ 	ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
+ 	rcu_read_unlock();
+ #endif
++#if ACCT_VERSION==6        /* ATOP */
++	ac.ac_pid    = current->pid;
++	ac.ac_ppid   = current->parent->pid;
++       ac.ac_uid16  = ac.ac_uid;
++       ac.ac_gid16  = ac.ac_gid;
++	ac.ac_ahz    = AHZ;
++	ac.ac_bread  = encode_comp_t(current->stat.dsk_rio);
++	ac.ac_bwrite = encode_comp_t(current->stat.dsk_wio);
++	ac.ac_dskrsz = encode_comp2_t(current->stat.dsk_rsz);
++	ac.ac_dskwsz = encode_comp2_t(current->stat.dsk_wsz);
++	ac.ac_tcpsnd = encode_comp_t(current->stat.tcp_snd);
++	ac.ac_tcprcv = encode_comp_t(current->stat.tcp_rcv);
++	ac.ac_tcpssz = encode_comp2_t(current->stat.tcp_ssz);
++	ac.ac_tcprsz = encode_comp2_t(current->stat.tcp_rsz);
++	ac.ac_udpsnd = encode_comp_t(current->stat.udp_snd);
++	ac.ac_udprcv = encode_comp_t(current->stat.udp_rcv);
++	ac.ac_udpssz = encode_comp2_t(current->stat.udp_ssz);
++	ac.ac_udprsz = encode_comp2_t(current->stat.udp_rsz);
++	ac.ac_rawsnd = encode_comp_t(current->stat.raw_snd);
++	ac.ac_rawrcv = encode_comp_t(current->stat.raw_rcv);
++	ac.ac_rss    = current->mm ?
++		encode_comp_t(get_mm_rss(current->mm)<<(PAGE_SHIFT-10)) :
++		encode_comp_t(0);
++#endif
+ 
+ 	spin_lock_irq(&current->sighand->siglock);
+ 	tty = current->signal->tty;	/* Safe as we hold the siglock */
diff --git a/3.3.8/3.3-ck1.patch b/3.3.8/3.3-ck1.patch
new file mode 100644
index 0000000..9c58ff0
--- /dev/null
+++ b/3.3.8/3.3-ck1.patch
@@ -0,0 +1,8782 @@
+Index: linux-3.3-ck1/arch/powerpc/platforms/cell/spufs/sched.c
+===================================================================
+--- linux-3.3-ck1.orig/arch/powerpc/platforms/cell/spufs/sched.c	2012-03-24 19:30:00.013420381 +1100
++++ linux-3.3-ck1/arch/powerpc/platforms/cell/spufs/sched.c	2012-03-24 19:30:29.038925740 +1100
+@@ -63,11 +63,6 @@ static struct timer_list spusched_timer;
+ static struct timer_list spuloadavg_timer;
+ 
+ /*
+- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
+- */
+-#define NORMAL_PRIO		120
+-
+-/*
+  * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
+  * tick for every 10 CPU scheduler ticks.
+  */
+Index: linux-3.3-ck1/Documentation/scheduler/sched-BFS.txt
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ linux-3.3-ck1/Documentation/scheduler/sched-BFS.txt	2012-03-24 19:30:29.038925740 +1100
+@@ -0,0 +1,347 @@
++BFS - The Brain Fuck Scheduler by Con Kolivas.
++
++Goals.
++
++The goal of the Brain Fuck Scheduler, referred to as BFS from here on, is to
++completely do away with the complex designs of the past for the cpu process
++scheduler and instead implement one that is very simple in basic design.
++The main focus of BFS is to achieve excellent desktop interactivity and
++responsiveness without heuristics and tuning knobs that are difficult to
++understand, impossible to model and predict the effect of, and when tuned to
++one workload cause massive detriment to another.
++
++
++Design summary.
++
++BFS is best described as a single runqueue, O(n) lookup, earliest effective
++virtual deadline first design, loosely based on EEVDF (earliest eligible virtual
++deadline first) and my previous Staircase Deadline scheduler. Each component
++shall be described in order to understand the significance of, and reasoning for
++it. The codebase when the first stable version was released was approximately
++9000 lines less code than the existing mainline linux kernel scheduler (in
++2.6.31). This does not even take into account the removal of documentation and
++the cgroups code that is not used.
++
++Design reasoning.
++
++The single runqueue refers to the queued but not running processes for the
++entire system, regardless of the number of CPUs. The reason for going back to
++a single runqueue design is that once multiple runqueues are introduced,
++per-CPU or otherwise, there will be complex interactions as each runqueue will
++be responsible for the scheduling latency and fairness of the tasks only on its
++own runqueue, and to achieve fairness and low latency across multiple CPUs, any
++advantage in throughput of having CPU local tasks causes other disadvantages.
++This is due to requiring a very complex balancing system to at best achieve some
++semblance of fairness across CPUs and can only maintain relatively low latency
++for tasks bound to the same CPUs, not across them. To increase said fairness
++and latency across CPUs, the advantage of local runqueue locking, which makes
++for better scalability, is lost due to having to grab multiple locks.
++
++A significant feature of BFS is that all accounting is done purely based on CPU
++used and nowhere is sleep time used in any way to determine entitlement or
++interactivity. Interactivity "estimators" that use some kind of sleep/run
++algorithm are doomed to fail to detect all interactive tasks, and to falsely tag
++tasks that aren't interactive as being so. The reason for this is that it is
++close to impossible to determine that when a task is sleeping, whether it is
++doing it voluntarily, as in a userspace application waiting for input in the
++form of a mouse click or otherwise, or involuntarily, because it is waiting for
++another thread, process, I/O, kernel activity or whatever. Thus, such an
++estimator will introduce corner cases, and more heuristics will be required to
++cope with those corner cases, introducing more corner cases and failed
++interactivity detection and so on. Interactivity in BFS is built into the design
++by virtue of the fact that tasks that are waking up have not used up their quota
++of CPU time, and have earlier effective deadlines, thereby making it very likely
++they will preempt any CPU bound task of equivalent nice level. See below for
++more information on the virtual deadline mechanism. Even if they do not preempt
++a running task, because the rr interval is guaranteed to have a bound upper
++limit on how long a task will wait for, it will be scheduled within a timeframe
++that will not cause visible interface jitter.
++
++
++Design details.
++
++Task insertion.
++
++BFS inserts tasks into each relevant queue as an O(1) insertion into a double
++linked list. On insertion, *every* running queue is checked to see if the newly
++queued task can run on any idle queue, or preempt the lowest running task on the
++system. This is how the cross-CPU scheduling of BFS achieves significantly lower
++latency per extra CPU the system has. In this case the lookup is, in the worst
++case scenario, O(n) where n is the number of CPUs on the system.
++
++Data protection.
++
++BFS has one single lock protecting the process local data of every task in the
++global queue. Thus every insertion, removal and modification of task data in the
++global runqueue needs to grab the global lock. However, once a task is taken by
++a CPU, the CPU has its own local data copy of the running process' accounting
++information which only that CPU accesses and modifies (such as during a
++timer tick) thus allowing the accounting data to be updated lockless. Once a
++CPU has taken a task to run, it removes it from the global queue. Thus the
++global queue only ever has, at most,
++
++	(number of tasks requesting cpu time) - (number of logical CPUs) + 1
++
++tasks in the global queue. This value is relevant for the time taken to look up
++tasks during scheduling. This will increase if many tasks with CPU affinity set
++in their policy to limit which CPUs they're allowed to run on if they outnumber
++the number of CPUs. The +1 is because when rescheduling a task, the CPU's
++currently running task is put back on the queue. Lookup will be described after
++the virtual deadline mechanism is explained.
++
++Virtual deadline.
++
++The key to achieving low latency, scheduling fairness, and "nice level"
++distribution in BFS is entirely in the virtual deadline mechanism. The one
++tunable in BFS is the rr_interval, or "round robin interval". This is the
++maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy)
++tasks of the same nice level will be running for, or looking at it the other
++way around, the longest duration two tasks of the same nice level will be
++delayed for. When a task requests cpu time, it is given a quota (time_slice)
++equal to the rr_interval and a virtual deadline. The virtual deadline is
++offset from the current time in jiffies by this equation:
++
++	jiffies + (prio_ratio * rr_interval)
++
++The prio_ratio is determined as a ratio compared to the baseline of nice -20
++and increases by 10% per nice level. The deadline is a virtual one only in that
++no guarantee is placed that a task will actually be scheduled by this time, but
++it is used to compare which task should go next. There are three components to
++how a task is next chosen. First is time_slice expiration. If a task runs out
++of its time_slice, it is descheduled, the time_slice is refilled, and the
++deadline reset to that formula above. Second is sleep, where a task no longer
++is requesting CPU for whatever reason. The time_slice and deadline are _not_
++adjusted in this case and are just carried over for when the task is next
++scheduled. Third is preemption, and that is when a newly waking task is deemed
++higher priority than a currently running task on any cpu by virtue of the fact
++that it has an earlier virtual deadline than the currently running task. The
++earlier deadline is the key to which task is next chosen for the first and
++second cases. Once a task is descheduled, it is put back on the queue, and an
++O(n) lookup of all queued-but-not-running tasks is done to determine which has
++the earliest deadline and that task is chosen to receive CPU next.
++
++The CPU proportion of different nice tasks works out to be approximately the
++
++	(prio_ratio difference)^2
++
++The reason it is squared is that a task's deadline does not change while it is
++running unless it runs out of time_slice. Thus, even if the time actually
++passes the deadline of another task that is queued, it will not get CPU time
++unless the current running task deschedules, and the time "base" (jiffies) is
++constantly moving.
++
++Task lookup.
++
++BFS has 103 priority queues. 100 of these are dedicated to the static priority
++of realtime tasks, and the remaining 3 are, in order of best to worst priority,
++SCHED_ISO (isochronous), SCHED_NORMAL, and SCHED_IDLEPRIO (idle priority
++scheduling). When a task of these priorities is queued, a bitmap of running
++priorities is set showing which of these priorities has tasks waiting for CPU
++time. When a CPU is made to reschedule, the lookup for the next task to get
++CPU time is performed in the following way:
++
++First the bitmap is checked to see what static priority tasks are queued. If
++any realtime priorities are found, the corresponding queue is checked and the
++first task listed there is taken (provided CPU affinity is suitable) and lookup
++is complete. If the priority corresponds to a SCHED_ISO task, they are also
++taken in FIFO order (as they behave like SCHED_RR). If the priority corresponds
++to either SCHED_NORMAL or SCHED_IDLEPRIO, then the lookup becomes O(n). At this
++stage, every task in the runlist that corresponds to that priority is checked
++to see which has the earliest set deadline, and (provided it has suitable CPU
++affinity) it is taken off the runqueue and given the CPU. If a task has an
++expired deadline, it is taken and the rest of the lookup aborted (as they are
++chosen in FIFO order).
++
++Thus, the lookup is O(n) in the worst case only, where n is as described
++earlier, as tasks may be chosen before the whole task list is looked over.
++
++
++Scalability.
++
++The major limitations of BFS will be that of scalability, as the separate
++runqueue designs will have less lock contention as the number of CPUs rises.
++However they do not scale linearly even with separate runqueues as multiple
++runqueues will need to be locked concurrently on such designs to be able to
++achieve fair CPU balancing, to try and achieve some sort of nice-level fairness
++across CPUs, and to achieve low enough latency for tasks on a busy CPU when
++other CPUs would be more suited. BFS has the advantage that it requires no
++balancing algorithm whatsoever, as balancing occurs by proxy simply because
++all CPUs draw off the global runqueue, in priority and deadline order. Despite
++the fact that scalability is _not_ the prime concern of BFS, it both shows very
++good scalability to smaller numbers of CPUs and is likely a more scalable design
++at these numbers of CPUs.
++
++It also has some very low overhead scalability features built into the design
++when it has been deemed their overhead is so marginal that they're worth adding.
++The first is the local copy of the running process' data to the CPU it's running
++on to allow that data to be updated lockless where possible. Then there is
++deference paid to the last CPU a task was running on, by trying that CPU first
++when looking for an idle CPU to use the next time it's scheduled. Finally there
++is the notion of "sticky" tasks that are flagged when they are involuntarily
++descheduled, meaning they still want further CPU time. This sticky flag is
++used to bias heavily against those tasks being scheduled on a different CPU
++unless that CPU would be otherwise idle. When a cpu frequency governor is used
++that scales with CPU load, such as ondemand, sticky tasks are not scheduled
++on a different CPU at all, preferring instead to go idle. This means the CPU
++they were bound to is more likely to increase its speed while the other CPU
++will go idle, thus speeding up total task execution time and likely decreasing
++power usage. This is the only scenario where BFS will allow a CPU to go idle
++in preference to scheduling a task on the earliest available spare CPU.
++
++The real cost of migrating a task from one CPU to another is entirely dependant
++on the cache footprint of the task, how cache intensive the task is, how long
++it's been running on that CPU to take up the bulk of its cache, how big the CPU
++cache is, how fast and how layered the CPU cache is, how fast a context switch
++is... and so on. In other words, it's close to random in the real world where we
++do more than just one sole workload. The only thing we can be sure of is that
++it's not free. So BFS uses the principle that an idle CPU is a wasted CPU and
++utilising idle CPUs is more important than cache locality, and cache locality
++only plays a part after that.
++
++When choosing an idle CPU for a waking task, the cache locality is determined
++according to where the task last ran and then idle CPUs are ranked from best
++to worst to choose the most suitable idle CPU based on cache locality, NUMA
++node locality and hyperthread sibling business. They are chosen in the
++following preference (if idle):
++
++* Same core, idle or busy cache, idle threads
++* Other core, same cache, idle or busy cache, idle threads.
++* Same node, other CPU, idle cache, idle threads.
++* Same node, other CPU, busy cache, idle threads.
++* Same core, busy threads.
++* Other core, same cache, busy threads.
++* Same node, other CPU, busy threads.
++* Other node, other CPU, idle cache, idle threads.
++* Other node, other CPU, busy cache, idle threads.
++* Other node, other CPU, busy threads.
++
++This shows the SMT or "hyperthread" awareness in the design as well which will
++choose a real idle core first before a logical SMT sibling which already has
++tasks on the physical CPU.
++
++Early benchmarking of BFS suggested scalability dropped off at the 16 CPU mark.
++However this benchmarking was performed on an earlier design that was far less
++scalable than the current one so it's hard to know how scalable it is in terms
++of both CPUs (due to the global runqueue) and heavily loaded machines (due to
++O(n) lookup) at this stage. Note that in terms of scalability, the number of
++_logical_ CPUs matters, not the number of _physical_ CPUs. Thus, a dual (2x)
++quad core (4X) hyperthreaded (2X) machine is effectively a 16X. Newer benchmark
++results are very promising indeed, without needing to tweak any knobs, features
++or options. Benchmark contributions are most welcome.
++
++
++Features
++
++As the initial prime target audience for BFS was the average desktop user, it
++was designed to not need tweaking, tuning or have features set to obtain benefit
++from it. Thus the number of knobs and features has been kept to an absolute
++minimum and should not require extra user input for the vast majority of cases.
++There are precisely 2 tunables, and 2 extra scheduling policies. The rr_interval
++and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO policies. In addition
++to this, BFS also uses sub-tick accounting. What BFS does _not_ now feature is
++support for CGROUPS. The average user should neither need to know what these
++are, nor should they need to be using them to have good desktop behaviour.
++
++rr_interval
++
++There is only one "scheduler" tunable, the round robin interval. This can be
++accessed in
++
++	/proc/sys/kernel/rr_interval
++
++The value is in milliseconds, and the default value is set to 6ms. Valid values
++are from 1 to 1000. Decreasing the value will decrease latencies at the cost of
++decreasing throughput, while increasing it will improve throughput, but at the
++cost of worsening latencies. The accuracy of the rr interval is limited by HZ
++resolution of the kernel configuration. Thus, the worst case latencies are
++usually slightly higher than this actual value. BFS uses "dithering" to try and
++minimise the effect the Hz limitation has. The default value of 6 is not an
++arbitrary one. It is based on the fact that humans can detect jitter at
++approximately 7ms, so aiming for much lower latencies is pointless under most
++circumstances. It is worth noting this fact when comparing the latency
++performance of BFS to other schedulers. Worst case latencies being higher than
++7ms are far worse than average latencies not being in the microsecond range.
++Experimentation has shown that rr intervals being increased up to 300 can
++improve throughput but beyond that, scheduling noise from elsewhere prevents
++further demonstrable throughput.
++
++Isochronous scheduling.
++
++Isochronous scheduling is a unique scheduling policy designed to provide
++near-real-time performance to unprivileged (ie non-root) users without the
++ability to starve the machine indefinitely. Isochronous tasks (which means
++"same time") are set using, for example, the schedtool application like so:
++
++	schedtool -I -e amarok
++
++This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works
++is that it has a priority level between true realtime tasks and SCHED_NORMAL
++which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie,
++if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval
++rate). However if ISO tasks run for more than a tunable finite amount of time,
++they are then demoted back to SCHED_NORMAL scheduling. This finite amount of
++time is the percentage of _total CPU_ available across the machine, configurable
++as a percentage in the following "resource handling" tunable (as opposed to a
++scheduler tunable):
++
++	/proc/sys/kernel/iso_cpu
++
++and is set to 70% by default. It is calculated over a rolling 5 second average
++Because it is the total CPU available, it means that on a multi CPU machine, it
++is possible to have an ISO task running as realtime scheduling indefinitely on
++just one CPU, as the other CPUs will be available. Setting this to 100 is the
++equivalent of giving all users SCHED_RR access and setting it to 0 removes the
++ability to run any pseudo-realtime tasks.
++
++A feature of BFS is that it detects when an application tries to obtain a
++realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the
++appropriate privileges to use those policies. When it detects this, it will
++give the task SCHED_ISO policy instead. Thus it is transparent to the user.
++Because some applications constantly set their policy as well as their nice
++level, there is potential for them to undo the override specified by the user
++on the command line of setting the policy to SCHED_ISO. To counter this, once
++a task has been set to SCHED_ISO policy, it needs superuser privileges to set
++it back to SCHED_NORMAL. This will ensure the task remains ISO and all child
++processes and threads will also inherit the ISO policy.
++
++Idleprio scheduling.
++
++Idleprio scheduling is a scheduling policy designed to give out CPU to a task
++_only_ when the CPU would be otherwise idle. The idea behind this is to allow
++ultra low priority tasks to be run in the background that have virtually no
++effect on the foreground tasks. This is ideally suited to distributed computing
++clients (like setiathome, folding, mprime etc) but can also be used to start
++a video encode or so on without any slowdown of other tasks. To avoid this
++policy from grabbing shared resources and holding them indefinitely, if it
++detects a state where the task is waiting on I/O, the machine is about to
++suspend to ram and so on, it will transiently schedule them as SCHED_NORMAL. As
++per the Isochronous task management, once a task has been scheduled as IDLEPRIO,
++it cannot be put back to SCHED_NORMAL without superuser privileges. Tasks can
++be set to start as SCHED_IDLEPRIO with the schedtool command like so:
++
++	schedtool -D -e ./mprime
++
++Subtick accounting.
++
++It is surprisingly difficult to get accurate CPU accounting, and in many cases,
++the accounting is done by simply determining what is happening at the precise
++moment a timer tick fires off. This becomes increasingly inaccurate as the
++timer tick frequency (HZ) is lowered. It is possible to create an application
++which uses almost 100% CPU, yet by being descheduled at the right time, records
++zero CPU usage. While the main problem with this is that there are possible
++security implications, it is also difficult to determine how much CPU a task
++really does use. BFS tries to use the sub-tick accounting from the TSC clock,
++where possible, to determine real CPU usage. This is not entirely reliable, but
++is far more likely to produce accurate CPU usage data than the existing designs
++and will not show tasks as consuming no CPU usage when they actually are. Thus,
++the amount of CPU reported as being used by BFS will more accurately represent
++how much CPU the task itself is using (as is shown for example by the 'time'
++application), so the reported values may be quite different to other schedulers.
++Values reported as the 'load' are more prone to problems with this design, but
++per process values are closer to real usage. When comparing throughput of BFS
++to other designs, it is important to compare the actual completed work in terms
++of total wall clock time taken and total work done, rather than the reported
++"cpu usage".
++
++
++Con Kolivas <kernel@kolivas.org> Tue, 5 Apr 2011
+Index: linux-3.3-ck1/Documentation/sysctl/kernel.txt
+===================================================================
+--- linux-3.3-ck1.orig/Documentation/sysctl/kernel.txt	2012-03-24 19:30:00.012420362 +1100
++++ linux-3.3-ck1/Documentation/sysctl/kernel.txt	2012-03-24 19:30:29.039925758 +1100
+@@ -33,6 +33,7 @@ show up in /proc/sys/kernel:
+ - domainname
+ - hostname
+ - hotplug
++- iso_cpu
+ - kptr_restrict
+ - kstack_depth_to_print       [ X86 only ]
+ - l2cr                        [ PPC only ]
+@@ -59,6 +60,7 @@ show up in /proc/sys/kernel:
+ - randomize_va_space
+ - real-root-dev               ==> Documentation/initrd.txt
+ - reboot-cmd                  [ SPARC only ]
++- rr_interval
+ - rtsig-max
+ - rtsig-nr
+ - sem
+@@ -301,6 +303,16 @@ kernel stack.
+ 
+ ==============================================================
+ 
++iso_cpu: (BFS CPU scheduler only).
++
++This sets the percentage cpu that the unprivileged SCHED_ISO tasks can
++run effectively at realtime priority, averaged over a rolling five
++seconds over the -whole- system, meaning all cpus.
++
++Set to 70 (percent) by default.
++
++==============================================================
++
+ l2cr: (PPC only)
+ 
+ This flag controls the L2 cache of G3 processor boards. If
+@@ -517,6 +529,20 @@ rebooting. ???
+ 
+ ==============================================================
+ 
++rr_interval: (BFS CPU scheduler only)
++
++This is the smallest duration that any cpu process scheduling unit
++will run for. Increasing this value can increase throughput of cpu
++bound tasks substantially but at the expense of increased latencies
++overall. Conversely decreasing it will decrease average and maximum
++latencies but at the expense of throughput. This value is in
++milliseconds and the default value chosen depends on the number of
++cpus available at scheduler initialisation with a minimum of 6.
++
++Valid values are from 1-1000.
++
++==============================================================
++
+ rtsig-max & rtsig-nr:
+ 
+ The file rtsig-max can be used to tune the maximum number
+Index: linux-3.3-ck1/fs/proc/base.c
+===================================================================
+--- linux-3.3-ck1.orig/fs/proc/base.c	2012-03-24 19:30:00.013420381 +1100
++++ linux-3.3-ck1/fs/proc/base.c	2012-03-24 19:30:29.039925758 +1100
+@@ -342,7 +342,7 @@ static int proc_pid_stack(struct seq_fil
+ static int proc_pid_schedstat(struct task_struct *task, char *buffer)
+ {
+ 	return sprintf(buffer, "%llu %llu %lu\n",
+-			(unsigned long long)task->se.sum_exec_runtime,
++			(unsigned long long)tsk_seruntime(task),
+ 			(unsigned long long)task->sched_info.run_delay,
+ 			task->sched_info.pcount);
+ }
+Index: linux-3.3-ck1/include/linux/init_task.h
+===================================================================
+--- linux-3.3-ck1.orig/include/linux/init_task.h	2012-03-24 19:30:00.013420381 +1100
++++ linux-3.3-ck1/include/linux/init_task.h	2012-03-24 19:30:29.039925758 +1100
+@@ -125,12 +125,70 @@ extern struct cred init_cred;
+ # define INIT_PERF_EVENTS(tsk)
+ #endif
+ 
+-#define INIT_TASK_COMM "swapper"
+-
+ /*
+  *  INIT_TASK is used to set up the first task table, touch at
+  * your own risk!. Base=0, limit=0x1fffff (=2MB)
+  */
++#ifdef CONFIG_SCHED_BFS
++#define INIT_TASK_COMM "BFS"
++#define INIT_TASK(tsk)	\
++{									\
++	.state		= 0,						\
++	.stack		= &init_thread_info,				\
++	.usage		= ATOMIC_INIT(2),				\
++	.flags		= PF_KTHREAD,					\
++	.prio		= NORMAL_PRIO,					\
++	.static_prio	= MAX_PRIO-20,					\
++	.normal_prio	= NORMAL_PRIO,					\
++	.deadline	= 0,						\
++	.policy		= SCHED_NORMAL,					\
++	.cpus_allowed	= CPU_MASK_ALL,					\
++	.mm		= NULL,						\
++	.active_mm	= &init_mm,					\
++	.run_list	= LIST_HEAD_INIT(tsk.run_list),			\
++	.time_slice	= HZ,					\
++	.tasks		= LIST_HEAD_INIT(tsk.tasks),			\
++	INIT_PUSHABLE_TASKS(tsk)					\
++	.ptraced	= LIST_HEAD_INIT(tsk.ptraced),			\
++	.ptrace_entry	= LIST_HEAD_INIT(tsk.ptrace_entry),		\
++	.real_parent	= &tsk,						\
++	.parent		= &tsk,						\
++	.children	= LIST_HEAD_INIT(tsk.children),			\
++	.sibling	= LIST_HEAD_INIT(tsk.sibling),			\
++	.group_leader	= &tsk,						\
++	RCU_INIT_POINTER(.real_cred, &init_cred),			\
++	RCU_INIT_POINTER(.cred, &init_cred),				\
++	.comm		= INIT_TASK_COMM,				\
++	.thread		= INIT_THREAD,					\
++	.fs		= &init_fs,					\
++	.files		= &init_files,					\
++	.signal		= &init_signals,				\
++	.sighand	= &init_sighand,				\
++	.nsproxy	= &init_nsproxy,				\
++	.pending	= {						\
++		.list = LIST_HEAD_INIT(tsk.pending.list),		\
++		.signal = {{0}}},					\
++	.blocked	= {{0}},					\
++	.alloc_lock	= __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),		\
++	.journal_info	= NULL,						\
++	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
++	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),		\
++	.timer_slack_ns = 50000, /* 50 usec default slack */		\
++	.pids = {							\
++		[PIDTYPE_PID]  = INIT_PID_LINK(PIDTYPE_PID),		\
++		[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),		\
++		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
++	},								\
++	INIT_IDS							\
++	INIT_PERF_EVENTS(tsk)						\
++	INIT_TRACE_IRQFLAGS						\
++	INIT_LOCKDEP							\
++	INIT_FTRACE_GRAPH						\
++	INIT_TRACE_RECURSION						\
++	INIT_TASK_RCU_PREEMPT(tsk)					\
++}
++#else /* CONFIG_SCHED_BFS */
++#define INIT_TASK_COMM "swapper"
+ #define INIT_TASK(tsk)	\
+ {									\
+ 	.state		= 0,						\
+@@ -193,7 +251,7 @@ extern struct cred init_cred;
+ 	INIT_TRACE_RECURSION						\
+ 	INIT_TASK_RCU_PREEMPT(tsk)					\
+ }
+-
++#endif /* CONFIG_SCHED_BFS */
+ 
+ #define INIT_CPU_TIMERS(cpu_timers)					\
+ {									\
+Index: linux-3.3-ck1/include/linux/ioprio.h
+===================================================================
+--- linux-3.3-ck1.orig/include/linux/ioprio.h	2012-03-24 19:30:00.013420381 +1100
++++ linux-3.3-ck1/include/linux/ioprio.h	2012-03-24 19:30:29.039925758 +1100
+@@ -64,6 +64,8 @@ static inline int task_ioprio_class(stru
+ 
+ static inline int task_nice_ioprio(struct task_struct *task)
+ {
++	if (iso_task(task))
++		return 0;
+ 	return (task_nice(task) + 20) / 5;
+ }
+ 
+Index: linux-3.3-ck1/include/linux/sched.h
+===================================================================
+--- linux-3.3-ck1.orig/include/linux/sched.h	2012-03-24 19:30:00.013420381 +1100
++++ linux-3.3-ck1/include/linux/sched.h	2012-03-24 19:34:53.640769520 +1100
+@@ -37,8 +37,15 @@
+ #define SCHED_FIFO		1
+ #define SCHED_RR		2
+ #define SCHED_BATCH		3
+-/* SCHED_ISO: reserved but not implemented yet */
++/* SCHED_ISO: Implemented on BFS only */
+ #define SCHED_IDLE		5
++#define SCHED_IDLEPRIO		SCHED_IDLE
++#ifdef CONFIG_SCHED_BFS
++#define SCHED_ISO		4
++#define SCHED_MAX		(SCHED_IDLEPRIO)
++#define SCHED_RANGE(policy)	((policy) <= SCHED_MAX)
++#endif
++
+ /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
+ #define SCHED_RESET_ON_FORK     0x40000000
+ 
+@@ -269,8 +276,6 @@ extern asmlinkage void schedule_tail(str
+ extern void init_idle(struct task_struct *idle, int cpu);
+ extern void init_idle_bootup_task(struct task_struct *idle);
+ 
+-extern int runqueue_is_locked(int cpu);
+-
+ #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
+ extern void select_nohz_load_balancer(int stop_tick);
+ extern void set_cpu_sd_state_idle(void);
+@@ -1243,15 +1248,31 @@ struct task_struct {
+ 
+ #ifdef CONFIG_SMP
+ 	struct llist_node wake_entry;
+-	int on_cpu;
+ #endif
+-	int on_rq;
++#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_BFS)
++	bool on_cpu;
++#endif
++#ifndef CONFIG_SCHED_BFS
++	bool on_rq;
++#endif
+ 
+ 	int prio, static_prio, normal_prio;
+ 	unsigned int rt_priority;
++#ifdef CONFIG_SCHED_BFS
++	int time_slice;
++	u64 deadline;
++	struct list_head run_list;
++	u64 last_ran;
++	u64 sched_time; /* sched_clock time spent running */
++#ifdef CONFIG_SMP
++	bool sticky; /* Soft affined flag */
++#endif
++	unsigned long rt_timeout;
++#else /* CONFIG_SCHED_BFS */
+ 	const struct sched_class *sched_class;
+ 	struct sched_entity se;
+ 	struct sched_rt_entity rt;
++#endif
+ 
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+ 	/* list of struct preempt_notifier: */
+@@ -1358,6 +1379,9 @@ struct task_struct {
+ 	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */
+ 
+ 	cputime_t utime, stime, utimescaled, stimescaled;
++#ifdef CONFIG_SCHED_BFS
++	unsigned long utime_pc, stime_pc;
++#endif
+ 	cputime_t gtime;
+ #ifndef CONFIG_VIRT_CPU_ACCOUNTING
+ 	cputime_t prev_utime, prev_stime;
+@@ -1592,6 +1616,64 @@ struct task_struct {
+ #endif
+ };
+ 
++#ifdef CONFIG_SCHED_BFS
++bool grunqueue_is_locked(void);
++void grq_unlock_wait(void);
++void cpu_scaling(int cpu);
++void cpu_nonscaling(int cpu);
++bool above_background_load(void);
++#define tsk_seruntime(t)		((t)->sched_time)
++#define tsk_rttimeout(t)		((t)->rt_timeout)
++
++static inline void tsk_cpus_current(struct task_struct *p)
++{
++}
++
++static inline int runqueue_is_locked(int cpu)
++{
++	return grunqueue_is_locked();
++}
++
++void print_scheduler_version(void);
++
++static inline bool iso_task(struct task_struct *p)
++{
++	return (p->policy == SCHED_ISO);
++}
++#else /* CFS */
++extern int runqueue_is_locked(int cpu);
++static inline void cpu_scaling(int cpu)
++{
++}
++
++static inline void cpu_nonscaling(int cpu)
++{
++}
++#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
++#define tsk_rttimeout(t)	((t)->rt.timeout)
++
++static inline void tsk_cpus_current(struct task_struct *p)
++{
++	p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
++}
++
++static inline void print_scheduler_version(void)
++{
++	printk(KERN_INFO"CFS CPU scheduler.\n");
++}
++
++static inline bool iso_task(struct task_struct *p)
++{
++	return false;
++}
++
++/* Anyone feel like implementing this? */
++static inline bool above_background_load(void)
++{
++	return false;
++}
++#endif /* CONFIG_SCHED_BFS */
++
+ /* Future-safe accessor for struct task_struct's cpus_allowed. */
+ #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
+ 
+@@ -1609,10 +1691,20 @@ struct task_struct {
+  */
+ 
+ #define MAX_USER_RT_PRIO	100
+-#define MAX_RT_PRIO		MAX_USER_RT_PRIO
++#define MAX_RT_PRIO		(MAX_USER_RT_PRIO + 1)
++#define DEFAULT_PRIO		(MAX_RT_PRIO + 20)
+ 
++#ifdef CONFIG_SCHED_BFS
++#define PRIO_RANGE		(40)
++#define MAX_PRIO		(MAX_RT_PRIO + PRIO_RANGE)
++#define ISO_PRIO		(MAX_RT_PRIO)
++#define NORMAL_PRIO		(MAX_RT_PRIO + 1)
++#define IDLE_PRIO		(MAX_RT_PRIO + 2)
++#define PRIO_LIMIT		((IDLE_PRIO) + 1)
++#else /* CONFIG_SCHED_BFS */
+ #define MAX_PRIO		(MAX_RT_PRIO + 40)
+-#define DEFAULT_PRIO		(MAX_RT_PRIO + 20)
++#define NORMAL_PRIO		DEFAULT_PRIO
++#endif /* CONFIG_SCHED_BFS */
+ 
+ static inline int rt_prio(int prio)
+ {
+@@ -1976,7 +2068,7 @@ extern unsigned long long
+ task_sched_runtime(struct task_struct *task);
+ 
+ /* sched_exec is called by processes performing an exec */
+-#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_BFS)
+ extern void sched_exec(void);
+ #else
+ #define sched_exec()   {}
+@@ -2668,7 +2760,7 @@ static inline unsigned int task_cpu(cons
+ 	return 0;
+ }
+ 
+-static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
++static inline void set_task_cpu(struct task_struct *p, int cpu)
+ {
+ }
+ 
+Index: linux-3.3-ck1/init/Kconfig
+===================================================================
+--- linux-3.3-ck1.orig/init/Kconfig	2012-03-24 19:30:00.013420381 +1100
++++ linux-3.3-ck1/init/Kconfig	2012-03-24 19:30:29.040925775 +1100
+@@ -29,6 +29,19 @@ config IRQ_WORK
+ 
+ menu "General setup"
+ 
++config SCHED_BFS
++	bool "BFS cpu scheduler"
++	---help---
++	  The Brain Fuck CPU Scheduler for excellent interactivity and
++	  responsiveness on the desktop and solid scalability on normal
++          hardware. Not recommended for 4096 CPUs.
++
++	  Currently incompatible with the Group CPU scheduler, and RCU TORTURE
++          TEST so these options are disabled.
++
++          Say Y here.
++	default y
++
+ config EXPERIMENTAL
+ 	bool "Prompt for development and/or incomplete code/drivers"
+ 	---help---
+@@ -640,6 +653,7 @@ config PROC_PID_CPUSET
+ 
+ config CGROUP_CPUACCT
+ 	bool "Simple CPU accounting cgroup subsystem"
++	depends on !SCHED_BFS
+ 	help
+ 	  Provides a simple Resource Controller for monitoring the
+ 	  total CPU consumed by the tasks in a cgroup.
+@@ -727,6 +741,7 @@ config CGROUP_PERF
+ 
+ menuconfig CGROUP_SCHED
+ 	bool "Group CPU scheduler"
++	depends on !SCHED_BFS
+ 	default n
+ 	help
+ 	  This feature lets CPU scheduler recognize task groups and control CPU
+@@ -863,6 +878,7 @@ endif # NAMESPACES
+ 
+ config SCHED_AUTOGROUP
+ 	bool "Automatic process group scheduling"
++	depends on !SCHED_BFS
+ 	select EVENTFD
+ 	select CGROUPS
+ 	select CGROUP_SCHED
+Index: linux-3.3-ck1/init/main.c
+===================================================================
+--- linux-3.3-ck1.orig/init/main.c	2012-03-24 19:30:00.013420381 +1100
++++ linux-3.3-ck1/init/main.c	2012-03-24 19:30:29.041925792 +1100
+@@ -757,6 +757,7 @@ static noinline int init_post(void)
+ 	system_state = SYSTEM_RUNNING;
+ 	numa_default_policy();
+ 
++	print_scheduler_version();
+ 
+ 	current->signal->flags |= SIGNAL_UNKILLABLE;
+ 
+Index: linux-3.3-ck1/kernel/delayacct.c
+===================================================================
+--- linux-3.3-ck1.orig/kernel/delayacct.c	2012-03-24 19:30:00.014420399 +1100
++++ linux-3.3-ck1/kernel/delayacct.c	2012-03-24 19:30:29.041925792 +1100
+@@ -130,7 +130,7 @@ int __delayacct_add_tsk(struct taskstats
+ 	 */
+ 	t1 = tsk->sched_info.pcount;
+ 	t2 = tsk->sched_info.run_delay;
+-	t3 = tsk->se.sum_exec_runtime;
++	t3 = tsk_seruntime(tsk);
+ 
+ 	d->cpu_count += t1;
+ 
+Index: linux-3.3-ck1/kernel/exit.c
+===================================================================
+--- linux-3.3-ck1.orig/kernel/exit.c	2012-03-24 19:30:00.014420399 +1100
++++ linux-3.3-ck1/kernel/exit.c	2012-03-24 19:30:29.041925792 +1100
+@@ -132,7 +132,7 @@ static void __exit_signal(struct task_st
+ 		sig->inblock += task_io_get_inblock(tsk);
+ 		sig->oublock += task_io_get_oublock(tsk);
+ 		task_io_accounting_add(&sig->ioac, &tsk->ioac);
+-		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
++		sig->sum_sched_runtime += tsk_seruntime(tsk);
+ 	}
+ 
+ 	sig->nr_threads--;
+Index: linux-3.3-ck1/kernel/posix-cpu-timers.c
+===================================================================
+--- linux-3.3-ck1.orig/kernel/posix-cpu-timers.c	2012-03-24 19:30:00.014420399 +1100
++++ linux-3.3-ck1/kernel/posix-cpu-timers.c	2012-03-24 19:30:29.042925809 +1100
+@@ -495,7 +495,7 @@ static void cleanup_timers(struct list_h
+ void posix_cpu_timers_exit(struct task_struct *tsk)
+ {
+ 	cleanup_timers(tsk->cpu_timers,
+-		       tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
++		       tsk->utime, tsk->stime, tsk_seruntime(tsk));
+ 
+ }
+ void posix_cpu_timers_exit_group(struct task_struct *tsk)
+@@ -504,7 +504,7 @@ void posix_cpu_timers_exit_group(struct
+ 
+ 	cleanup_timers(tsk->signal->cpu_timers,
+ 		       tsk->utime + sig->utime, tsk->stime + sig->stime,
+-		       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
++		       tsk_seruntime(tsk) + sig->sum_sched_runtime);
+ }
+ 
+ static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
+@@ -934,7 +934,7 @@ static void check_thread_timers(struct t
+ 		struct cpu_timer_list *t = list_first_entry(timers,
+ 						      struct cpu_timer_list,
+ 						      entry);
+-		if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
++		if (!--maxfire || tsk_seruntime(tsk) < t->expires.sched) {
+ 			tsk->cputime_expires.sched_exp = t->expires.sched;
+ 			break;
+ 		}
+@@ -951,7 +951,7 @@ static void check_thread_timers(struct t
+ 			ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
+ 
+ 		if (hard != RLIM_INFINITY &&
+-		    tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
++		    tsk_rttimeout(tsk) > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
+ 			/*
+ 			 * At the hard limit, we just die.
+ 			 * No need to calculate anything else now.
+@@ -959,7 +959,7 @@ static void check_thread_timers(struct t
+ 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
+ 			return;
+ 		}
+-		if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
++		if (tsk_rttimeout(tsk) > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
+ 			/*
+ 			 * At the soft limit, send a SIGXCPU every second.
+ 			 */
+@@ -1252,7 +1252,7 @@ static inline int fastpath_timer_check(s
+ 		struct task_cputime task_sample = {
+ 			.utime = tsk->utime,
+ 			.stime = tsk->stime,
+-			.sum_exec_runtime = tsk->se.sum_exec_runtime
++			.sum_exec_runtime = tsk_seruntime(tsk)
+ 		};
+ 
+ 		if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
+Index: linux-3.3-ck1/kernel/sysctl.c
+===================================================================
+--- linux-3.3-ck1.orig/kernel/sysctl.c	2012-03-24 19:30:00.013420381 +1100
++++ linux-3.3-ck1/kernel/sysctl.c	2012-03-24 19:30:29.042925809 +1100
+@@ -121,7 +121,12 @@ static int __maybe_unused one = 1;
+ static int __maybe_unused two = 2;
+ static int __maybe_unused three = 3;
+ static unsigned long one_ul = 1;
+-static int one_hundred = 100;
++static int __maybe_unused one_hundred = 100;
++#ifdef CONFIG_SCHED_BFS
++extern int rr_interval;
++extern int sched_iso_cpu;
++static int __read_mostly one_thousand = 1000;
++#endif
+ #ifdef CONFIG_PRINTK
+ static int ten_thousand = 10000;
+ #endif
+@@ -251,7 +256,7 @@ static struct ctl_table root_table[] = {
+ 	{ }
+ };
+ 
+-#ifdef CONFIG_SCHED_DEBUG
++#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_BFS)
+ static int min_sched_granularity_ns = 100000;		/* 100 usecs */
+ static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
+ static int min_wakeup_granularity_ns;			/* 0 usecs */
+@@ -266,6 +271,7 @@ static int max_extfrag_threshold = 1000;
+ #endif
+ 
+ static struct ctl_table kern_table[] = {
++#ifndef CONFIG_SCHED_BFS
+ 	{
+ 		.procname	= "sched_child_runs_first",
+ 		.data		= &sysctl_sched_child_runs_first,
+@@ -383,6 +389,7 @@ static struct ctl_table kern_table[] = {
+ 		.extra1		= &one,
+ 	},
+ #endif
++#endif /* !CONFIG_SCHED_BFS */
+ #ifdef CONFIG_PROVE_LOCKING
+ 	{
+ 		.procname	= "prove_locking",
+@@ -850,6 +857,26 @@ static struct ctl_table kern_table[] = {
+ 		.proc_handler	= proc_dointvec,
+ 	},
+ #endif
++#ifdef CONFIG_SCHED_BFS
++	{
++		.procname	= "rr_interval",
++		.data		= &rr_interval,
++		.maxlen		= sizeof (int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.extra1		= &one,
++		.extra2		= &one_thousand,
++	},
++	{
++		.procname	= "iso_cpu",
++		.data		= &sched_iso_cpu,
++		.maxlen		= sizeof (int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.extra1		= &zero,
++		.extra2		= &one_hundred,
++	},
++#endif
+ #if defined(CONFIG_S390) && defined(CONFIG_SMP)
+ 	{
+ 		.procname	= "spin_retry",
+Index: linux-3.3-ck1/lib/Kconfig.debug
+===================================================================
+--- linux-3.3-ck1.orig/lib/Kconfig.debug	2012-03-24 19:30:00.012420362 +1100
++++ linux-3.3-ck1/lib/Kconfig.debug	2012-03-24 19:30:29.042925809 +1100
+@@ -875,7 +875,7 @@ config BOOT_PRINTK_DELAY
+ 
+ config RCU_TORTURE_TEST
+ 	tristate "torture tests for RCU"
+-	depends on DEBUG_KERNEL
++	depends on DEBUG_KERNEL && !SCHED_BFS
+ 	default n
+ 	help
+ 	  This option provides a kernel module that runs torture tests
+Index: linux-3.3-ck1/include/linux/jiffies.h
+===================================================================
+--- linux-3.3-ck1.orig/include/linux/jiffies.h	2012-03-24 19:30:00.012420362 +1100
++++ linux-3.3-ck1/include/linux/jiffies.h	2012-03-24 19:30:29.043925827 +1100
+@@ -164,7 +164,7 @@ static inline u64 get_jiffies_64(void)
+  * Have the 32 bit jiffies value wrap 5 minutes after boot
+  * so jiffies wrap bugs show up earlier.
+  */
+-#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
++#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-10*HZ))
+ 
+ /*
+  * Change timeval to jiffies, trying to avoid the
+Index: linux-3.3-ck1/drivers/cpufreq/cpufreq.c
+===================================================================
+--- linux-3.3-ck1.orig/drivers/cpufreq/cpufreq.c	2012-03-24 19:30:00.012420362 +1100
++++ linux-3.3-ck1/drivers/cpufreq/cpufreq.c	2012-03-24 19:30:29.043925827 +1100
+@@ -28,6 +28,7 @@
+ #include <linux/cpu.h>
+ #include <linux/completion.h>
+ #include <linux/mutex.h>
++#include <linux/sched.h>
+ #include <linux/syscore_ops.h>
+ 
+ #include <trace/events/power.h>
+@@ -1445,6 +1446,12 @@ int __cpufreq_driver_target(struct cpufr
+ 		target_freq, relation);
+ 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
+ 		retval = cpufreq_driver->target(policy, target_freq, relation);
++	if (likely(retval != -EINVAL)) {
++		if (target_freq == policy->max)
++			cpu_nonscaling(policy->cpu);
++		else
++			cpu_scaling(policy->cpu);
++	}
+ 
+ 	return retval;
+ }
+Index: linux-3.3-ck1/drivers/cpufreq/cpufreq_ondemand.c
+===================================================================
+--- linux-3.3-ck1.orig/drivers/cpufreq/cpufreq_ondemand.c	2012-03-24 19:30:00.012420362 +1100
++++ linux-3.3-ck1/drivers/cpufreq/cpufreq_ondemand.c	2012-03-24 19:30:29.043925827 +1100
+@@ -28,8 +28,8 @@
+  * It helps to keep variable names smaller, simpler
+  */
+ 
+-#define DEF_FREQUENCY_DOWN_DIFFERENTIAL		(10)
+-#define DEF_FREQUENCY_UP_THRESHOLD		(80)
++#define DEF_FREQUENCY_DOWN_DIFFERENTIAL		(26)
++#define DEF_FREQUENCY_UP_THRESHOLD		(63)
+ #define DEF_SAMPLING_DOWN_FACTOR		(1)
+ #define MAX_SAMPLING_DOWN_FACTOR		(100000)
+ #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL	(3)
+@@ -416,10 +416,10 @@ static void dbs_check_cpu(struct cpu_dbs
+ 
+ 	/*
+ 	 * Every sampling_rate, we check, if current idle time is less
+-	 * than 20% (default), then we try to increase frequency
++	 * than 37% (default), then we try to increase frequency
+ 	 * Every sampling_rate, we look for a the lowest
+ 	 * frequency which can sustain the load while keeping idle time over
+-	 * 30%. If such a frequency exist, we try to decrease to this frequency.
++	 * 63%. If such a frequency exist, we try to decrease to this frequency.
+ 	 *
+ 	 * Any frequency increase takes it to the maximum frequency.
+ 	 * Frequency reduction happens at minimum steps of
+Index: linux-3.3-ck1/drivers/cpufreq/cpufreq_conservative.c
+===================================================================
+--- linux-3.3-ck1.orig/drivers/cpufreq/cpufreq_conservative.c	2012-03-24 19:30:00.012420362 +1100
++++ linux-3.3-ck1/drivers/cpufreq/cpufreq_conservative.c	2012-03-24 19:30:29.043925827 +1100
+@@ -29,8 +29,8 @@
+  * It helps to keep variable names smaller, simpler
+  */
+ 
+-#define DEF_FREQUENCY_UP_THRESHOLD		(80)
+-#define DEF_FREQUENCY_DOWN_THRESHOLD		(20)
++#define DEF_FREQUENCY_UP_THRESHOLD		(63)
++#define DEF_FREQUENCY_DOWN_THRESHOLD		(26)
+ 
+ /*
+  * The polling frequency of this governor depends on the capability of
+Index: linux-3.3-ck1/arch/x86/Kconfig
+===================================================================
+--- linux-3.3-ck1.orig/arch/x86/Kconfig	2012-03-24 19:30:00.013420381 +1100
++++ linux-3.3-ck1/arch/x86/Kconfig	2012-03-24 19:34:53.659769871 +1100
+@@ -806,15 +806,7 @@ config SCHED_MC
+ 	  increased overhead in some places. If unsure say N here.
+ 
+ config IRQ_TIME_ACCOUNTING
+-	bool "Fine granularity task level IRQ time accounting"
+-	default n
+-	---help---
+-	  Select this option to enable fine granularity task irq time
+-	  accounting. This is done by reading a timestamp on each
+-	  transitions between softirq and hardirq state, so there can be a
+-	  small performance impact.
+-
+-	  If in doubt, say N here.
++	def_bool y
+ 
+ source "kernel/Kconfig.preempt"
+ 
+@@ -1112,7 +1104,7 @@ endchoice
+ 
+ choice
+ 	depends on EXPERIMENTAL
+-	prompt "Memory split" if EXPERT
++	prompt "Memory split"
+ 	default VMSPLIT_3G
+ 	depends on X86_32
+ 	---help---
+@@ -1132,17 +1124,17 @@ choice
+ 	  option alone!
+ 
+ 	config VMSPLIT_3G
+-		bool "3G/1G user/kernel split"
++		bool "Default 896MB lowmem (3G/1G user/kernel split)"
+ 	config VMSPLIT_3G_OPT
+ 		depends on !X86_PAE
+-		bool "3G/1G user/kernel split (for full 1G low memory)"
++		bool "1GB lowmem (3G/1G user/kernel split)"
+ 	config VMSPLIT_2G
+-		bool "2G/2G user/kernel split"
++		bool "2GB lowmem (2G/2G user/kernel split)"
+ 	config VMSPLIT_2G_OPT
+ 		depends on !X86_PAE
+-		bool "2G/2G user/kernel split (for full 2G low memory)"
++		bool "2GB lowmem (2G/2G user/kernel split)"
+ 	config VMSPLIT_1G
+-		bool "1G/3G user/kernel split"
++		bool "3GB lowmem (1G/3G user/kernel split)"
+ endchoice
+ 
+ config PAGE_OFFSET
+Index: linux-3.3-ck1/kernel/sched/bfs.c
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ linux-3.3-ck1/kernel/sched/bfs.c	2012-03-24 19:30:29.047925897 +1100
+@@ -0,0 +1,7251 @@
++/*
++ *  kernel/sched/bfs.c, was kernel/sched.c
++ *
++ *  Kernel scheduler and related syscalls
++ *
++ *  Copyright (C) 1991-2002  Linus Torvalds
++ *
++ *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
++ *		make semaphores SMP safe
++ *  1998-11-19	Implemented schedule_timeout() and related stuff
++ *		by Andrea Arcangeli
++ *  2002-01-04	New ultra-scalable O(1) scheduler by Ingo Molnar:
++ *		hybrid priority-list and round-robin design with
++ *		an array-switch method of distributing timeslices
++ *		and per-CPU runqueues.  Cleanups and useful suggestions
++ *		by Davide Libenzi, preemptible kernel bits by Robert Love.
++ *  2003-09-03	Interactivity tuning by Con Kolivas.
++ *  2004-04-02	Scheduler domains code by Nick Piggin
++ *  2007-04-15  Work begun on replacing all interactivity tuning with a
++ *              fair scheduling design by Con Kolivas.
++ *  2007-05-05  Load balancing (smp-nice) and other improvements
++ *              by Peter Williams
++ *  2007-05-06  Interactivity improvements to CFS by Mike Galbraith
++ *  2007-07-01  Group scheduling enhancements by Srivatsa Vaddagiri
++ *  2007-11-29  RT balancing improvements by Steven Rostedt, Gregory Haskins,
++ *              Thomas Gleixner, Mike Kravetz
++ *  now		Brainfuck deadline scheduling policy by Con Kolivas deletes
++ *              a whole lot of those previous things.
++ */
++
++#include <linux/mm.h>
++#include <linux/module.h>
++#include <linux/nmi.h>
++#include <linux/init.h>
++#include <asm/uaccess.h>
++#include <linux/highmem.h>
++#include <asm/mmu_context.h>
++#include <linux/interrupt.h>
++#include <linux/capability.h>
++#include <linux/completion.h>
++#include <linux/kernel_stat.h>
++#include <linux/debug_locks.h>
++#include <linux/perf_event.h>
++#include <linux/security.h>
++#include <linux/notifier.h>
++#include <linux/profile.h>
++#include <linux/freezer.h>
++#include <linux/vmalloc.h>
++#include <linux/blkdev.h>
++#include <linux/delay.h>
++#include <linux/smp.h>
++#include <linux/threads.h>
++#include <linux/timer.h>
++#include <linux/rcupdate.h>
++#include <linux/cpu.h>
++#include <linux/cpuset.h>
++#include <linux/cpumask.h>
++#include <linux/percpu.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <linux/syscalls.h>
++#include <linux/times.h>
++#include <linux/tsacct_kern.h>
++#include <linux/kprobes.h>
++#include <linux/delayacct.h>
++#include <linux/log2.h>
++#include <linux/bootmem.h>
++#include <linux/ftrace.h>
++#include <linux/slab.h>
++#include <linux/init_task.h>
++
++#include <asm/tlb.h>
++#include <asm/unistd.h>
++#include <asm/mutex.h>
++#ifdef CONFIG_PARAVIRT
++#include <asm/paravirt.h>
++#endif
++
++#include "cpupri.h"
++#include "../workqueue_sched.h"
++
++#define CREATE_TRACE_POINTS
++#include <trace/events/sched.h>
++
++#define rt_prio(prio)		unlikely((prio) < MAX_RT_PRIO)
++#define rt_task(p)		rt_prio((p)->prio)
++#define rt_queue(rq)		rt_prio((rq)->rq_prio)
++#define batch_task(p)		(unlikely((p)->policy == SCHED_BATCH))
++#define is_rt_policy(policy)	((policy) == SCHED_FIFO || \
++					(policy) == SCHED_RR)
++#define has_rt_policy(p)	unlikely(is_rt_policy((p)->policy))
++#define idleprio_task(p)	unlikely((p)->policy == SCHED_IDLEPRIO)
++#define iso_task(p)		unlikely((p)->policy == SCHED_ISO)
++#define iso_queue(rq)		unlikely((rq)->rq_policy == SCHED_ISO)
++#define rq_running_iso(rq)	((rq)->rq_prio == ISO_PRIO)
++
++#define ISO_PERIOD		((5 * HZ * grq.noc) + 1)
++
++/*
++ * Convert user-nice values [ -20 ... 0 ... 19 ]
++ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
++ * and back.
++ */
++#define NICE_TO_PRIO(nice)	(MAX_RT_PRIO + (nice) + 20)
++#define PRIO_TO_NICE(prio)	((prio) - MAX_RT_PRIO - 20)
++#define TASK_NICE(p)		PRIO_TO_NICE((p)->static_prio)
++
++/*
++ * 'User priority' is the nice value converted to something we
++ * can work with better when scaling various scheduler parameters,
++ * it's a [ 0 ... 39 ] range.
++ */
++#define USER_PRIO(p)		((p) - MAX_RT_PRIO)
++#define TASK_USER_PRIO(p)	USER_PRIO((p)->static_prio)
++#define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))
++#define SCHED_PRIO(p)		((p) + MAX_RT_PRIO)
++#define STOP_PRIO		(MAX_RT_PRIO - 1)
++
++/*
++ * Some helpers for converting to/from various scales. Use shifts to get
++ * approximate multiples of ten for less overhead.
++ */
++#define JIFFIES_TO_NS(TIME)	((TIME) * (1000000000 / HZ))
++#define JIFFY_NS		(1000000000 / HZ)
++#define HALF_JIFFY_NS		(1000000000 / HZ / 2)
++#define HALF_JIFFY_US		(1000000 / HZ / 2)
++#define MS_TO_NS(TIME)		((TIME) << 20)
++#define MS_TO_US(TIME)		((TIME) << 10)
++#define NS_TO_MS(TIME)		((TIME) >> 20)
++#define NS_TO_US(TIME)		((TIME) >> 10)
++
++#define RESCHED_US	(100) /* Reschedule if less than this many μs left */
++
++void print_scheduler_version(void)
++{
++	printk(KERN_INFO "BFS CPU scheduler v0.420 by Con Kolivas.\n");
++}
++
++/*
++ * This is the time all tasks within the same priority round robin.
++ * Value is in ms and set to a minimum of 6ms. Scales with number of cpus.
++ * Tunable via /proc interface.
++ */
++int rr_interval __read_mostly = 6;
++
++/*
++ * sched_iso_cpu - sysctl which determines the cpu percentage SCHED_ISO tasks
++ * are allowed to run five seconds as real time tasks. This is the total over
++ * all online cpus.
++ */
++int sched_iso_cpu __read_mostly = 70;
++
++/*
++ * The relative length of deadline for each priority(nice) level.
++ */
++static int prio_ratios[PRIO_RANGE] __read_mostly;
++
++/*
++ * The quota handed out to tasks of all priority levels when refilling their
++ * time_slice.
++ */
++static inline int timeslice(void)
++{
++	return MS_TO_US(rr_interval);
++}
++
++/*
++ * The global runqueue data that all CPUs work off. Data is protected either
++ * by the global grq lock, or the discrete lock that precedes the data in this
++ * struct.
++ */
++struct global_rq {
++	raw_spinlock_t lock;
++	unsigned long nr_running;
++	unsigned long nr_uninterruptible;
++	unsigned long long nr_switches;
++	struct list_head queue[PRIO_LIMIT];
++	DECLARE_BITMAP(prio_bitmap, PRIO_LIMIT + 1);
++#ifdef CONFIG_SMP
++	unsigned long qnr; /* queued not running */
++	cpumask_t cpu_idle_map;
++	bool idle_cpus;
++#endif
++	int noc; /* num_online_cpus stored and updated when it changes */
++	u64 niffies; /* Nanosecond jiffies */
++	unsigned long last_jiffy; /* Last jiffy we updated niffies */
++
++	raw_spinlock_t iso_lock;
++	int iso_ticks;
++	bool iso_refractory;
++};
++
++#ifdef CONFIG_SMP
++
++/*
++ * We add the notion of a root-domain which will be used to define per-domain
++ * variables. Each exclusive cpuset essentially defines an island domain by
++ * fully partitioning the member cpus from any other cpuset. Whenever a new
++ * exclusive cpuset is created, we also create and attach a new root-domain
++ * object.
++ *
++ */
++struct root_domain {
++	atomic_t refcount;
++	atomic_t rto_count;
++	struct rcu_head rcu;
++	cpumask_var_t span;
++	cpumask_var_t online;
++
++	/*
++	 * The "RT overload" flag: it gets set if a CPU has more than
++	 * one runnable RT task.
++	 */
++	cpumask_var_t rto_mask;
++	struct cpupri cpupri;
++};
++
++/*
++ * By default the system creates a single root-domain with all cpus as
++ * members (mimicking the global state we have today).
++ */
++static struct root_domain def_root_domain;
++
++#endif /* CONFIG_SMP */
++
++/* There can be only one */
++static struct global_rq grq;
++
++/*
++ * This is the main, per-CPU runqueue data structure.
++ * This data should only be modified by the local cpu.
++ */
++struct rq {
++#ifdef CONFIG_SMP
++#ifdef CONFIG_NO_HZ
++	u64 nohz_stamp;
++	unsigned char in_nohz_recently;
++#endif
++#endif
++
++	struct task_struct *curr, *idle, *stop;
++	struct mm_struct *prev_mm;
++
++	/* Stored data about rq->curr to work outside grq lock */
++	u64 rq_deadline;
++	unsigned int rq_policy;
++	int rq_time_slice;
++	u64 rq_last_ran;
++	int rq_prio;
++	bool rq_running; /* There is a task running */
++
++	/* Accurate timekeeping data */
++	u64 timekeep_clock;
++	unsigned long user_pc, nice_pc, irq_pc, softirq_pc, system_pc,
++		iowait_pc, idle_pc;
++	long account_pc;
++	atomic_t nr_iowait;
++
++#ifdef CONFIG_SMP
++	int cpu;		/* cpu of this runqueue */
++	bool online;
++	bool scaling; /* This CPU is managed by a scaling CPU freq governor */
++	struct task_struct *sticky_task;
++
++	struct root_domain *rd;
++	struct sched_domain *sd;
++	int *cpu_locality; /* CPU relative cache distance */
++#ifdef CONFIG_SCHED_SMT
++	bool (*siblings_idle)(int cpu);
++	/* See if all smt siblings are idle */
++	cpumask_t smt_siblings;
++#endif
++#ifdef CONFIG_SCHED_MC
++	bool (*cache_idle)(int cpu);
++	/* See if all cache siblings are idle */
++	cpumask_t cache_siblings;
++#endif
++	u64 last_niffy; /* Last time this RQ updated grq.niffies */
++#endif
++#ifdef CONFIG_IRQ_TIME_ACCOUNTING
++	u64 prev_irq_time;
++#endif
++#ifdef CONFIG_PARAVIRT
++	u64 prev_steal_time;
++#endif
++#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
++	u64 prev_steal_time_rq;
++#endif
++
++	u64 clock, old_clock, last_tick;
++	u64 clock_task;
++	bool dither;
++
++#ifdef CONFIG_SCHEDSTATS
++
++	/* latency stats */
++	struct sched_info rq_sched_info;
++	unsigned long long rq_cpu_time;
++	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
++
++	/* sys_sched_yield() stats */
++	unsigned int yld_count;
++
++	/* schedule() stats */
++	unsigned int sched_switch;
++	unsigned int sched_count;
++	unsigned int sched_goidle;
++
++	/* try_to_wake_up() stats */
++	unsigned int ttwu_count;
++	unsigned int ttwu_local;
++#endif
++};
++
++DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
++static DEFINE_MUTEX(sched_hotcpu_mutex);
++
++#ifdef CONFIG_SMP
++/*
++ * sched_domains_mutex serialises calls to init_sched_domains,
++ * detach_destroy_domains and partition_sched_domains.
++ */
++static DEFINE_MUTEX(sched_domains_mutex);
++
++/*
++ * By default the system creates a single root-domain with all cpus as
++ * members (mimicking the global state we have today).
++ */
++static struct root_domain def_root_domain;
++
++int __weak arch_sd_sibling_asym_packing(void)
++{
++       return 0*SD_ASYM_PACKING;
++}
++#endif
++
++#define rcu_dereference_check_sched_domain(p) \
++	rcu_dereference_check((p), \
++			      lockdep_is_held(&sched_domains_mutex))
++
++/*
++ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
++ * See detach_destroy_domains: synchronize_sched for details.
++ *
++ * The domain tree of any CPU may only be accessed from within
++ * preempt-disabled sections.
++ */
++#define for_each_domain(cpu, __sd) \
++	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
++
++static inline void update_rq_clock(struct rq *rq);
++
++/*
++ * Sanity check should sched_clock return bogus values. We make sure it does
++ * not appear to go backwards, and use jiffies to determine the maximum and
++ * minimum it could possibly have increased, and round down to the nearest
++ * jiffy when it falls outside this.
++ */
++static inline void niffy_diff(s64 *niff_diff, int jiff_diff)
++{
++	unsigned long min_diff, max_diff;
++
++	if (jiff_diff > 1)
++		min_diff = JIFFIES_TO_NS(jiff_diff - 1);
++	else
++		min_diff = 1;
++	/*  Round up to the nearest tick for maximum */
++	max_diff = JIFFIES_TO_NS(jiff_diff + 1);
++
++	if (unlikely(*niff_diff < min_diff || *niff_diff > max_diff))
++		*niff_diff = min_diff;
++}
++
++#ifdef CONFIG_SMP
++#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
++#define this_rq()		(&__get_cpu_var(runqueues))
++#define task_rq(p)		cpu_rq(task_cpu(p))
++#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
++static inline int cpu_of(struct rq *rq)
++{
++	return rq->cpu;
++}
++
++/*
++ * Niffies are a globally increasing nanosecond counter. Whenever a runqueue
++ * clock is updated with the grq.lock held, it is an opportunity to update the
++ * niffies value. Any CPU can update it by adding how much its clock has
++ * increased since it last updated niffies, minus any added niffies by other
++ * CPUs.
++ */
++static inline void update_clocks(struct rq *rq)
++{
++	s64 ndiff;
++	long jdiff;
++
++	update_rq_clock(rq);
++	ndiff = rq->clock - rq->old_clock;
++	/* old_clock is only updated when we are updating niffies */
++	rq->old_clock = rq->clock;
++	ndiff -= grq.niffies - rq->last_niffy;
++	jdiff = jiffies - grq.last_jiffy;
++	niffy_diff(&ndiff, jdiff);
++	grq.last_jiffy += jdiff;
++	grq.niffies += ndiff;
++	rq->last_niffy = grq.niffies;
++}
++#else /* CONFIG_SMP */
++static struct rq *uprq;
++#define cpu_rq(cpu)	(uprq)
++#define this_rq()	(uprq)
++#define task_rq(p)	(uprq)
++#define cpu_curr(cpu)	((uprq)->curr)
++static inline int cpu_of(struct rq *rq)
++{
++	return 0;
++}
++
++static inline void update_clocks(struct rq *rq)
++{
++	s64 ndiff;
++	long jdiff;
++
++	update_rq_clock(rq);
++	ndiff = rq->clock - rq->old_clock;
++	rq->old_clock = rq->clock;
++	jdiff = jiffies - grq.last_jiffy;
++	niffy_diff(&ndiff, jdiff);
++	grq.last_jiffy += jdiff;
++	grq.niffies += ndiff;
++}
++#endif
++#define raw_rq()	(&__raw_get_cpu_var(runqueues))
++
++#include "stats.h"
++
++#ifndef prepare_arch_switch
++# define prepare_arch_switch(next)	do { } while (0)
++#endif
++#ifndef finish_arch_switch
++# define finish_arch_switch(prev)	do { } while (0)
++#endif
++
++/*
++ * All common locking functions performed on grq.lock. rq->clock is local to
++ * the CPU accessing it so it can be modified just with interrupts disabled
++ * when we're not updating niffies.
++ * Looking up task_rq must be done under grq.lock to be safe.
++ */
++static void update_rq_clock_task(struct rq *rq, s64 delta);
++
++static inline void update_rq_clock(struct rq *rq)
++{
++	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
++
++	rq->clock += delta;
++	update_rq_clock_task(rq, delta);
++}
++
++static inline bool task_running(struct task_struct *p)
++{
++	return p->on_cpu;
++}
++
++static inline void grq_lock(void)
++	__acquires(grq.lock)
++{
++	raw_spin_lock(&grq.lock);
++}
++
++static inline void grq_unlock(void)
++	__releases(grq.lock)
++{
++	raw_spin_unlock(&grq.lock);
++}
++
++static inline void grq_lock_irq(void)
++	__acquires(grq.lock)
++{
++	raw_spin_lock_irq(&grq.lock);
++}
++
++static inline void time_lock_grq(struct rq *rq)
++	__acquires(grq.lock)
++{
++	grq_lock();
++	update_clocks(rq);
++}
++
++static inline void grq_unlock_irq(void)
++	__releases(grq.lock)
++{
++	raw_spin_unlock_irq(&grq.lock);
++}
++
++static inline void grq_lock_irqsave(unsigned long *flags)
++	__acquires(grq.lock)
++{
++	raw_spin_lock_irqsave(&grq.lock, *flags);
++}
++
++static inline void grq_unlock_irqrestore(unsigned long *flags)
++	__releases(grq.lock)
++{
++	raw_spin_unlock_irqrestore(&grq.lock, *flags);
++}
++
++static inline struct rq
++*task_grq_lock(struct task_struct *p, unsigned long *flags)
++	__acquires(grq.lock)
++{
++	grq_lock_irqsave(flags);
++	return task_rq(p);
++}
++
++static inline struct rq
++*time_task_grq_lock(struct task_struct *p, unsigned long *flags)
++	__acquires(grq.lock)
++{
++	struct rq *rq = task_grq_lock(p, flags);
++	update_clocks(rq);
++	return rq;
++}
++
++static inline struct rq *task_grq_lock_irq(struct task_struct *p)
++	__acquires(grq.lock)
++{
++	grq_lock_irq();
++	return task_rq(p);
++}
++
++static inline void time_task_grq_lock_irq(struct task_struct *p)
++	__acquires(grq.lock)
++{
++	struct rq *rq = task_grq_lock_irq(p);
++	update_clocks(rq);
++}
++
++static inline void task_grq_unlock_irq(void)
++	__releases(grq.lock)
++{
++	grq_unlock_irq();
++}
++
++static inline void task_grq_unlock(unsigned long *flags)
++	__releases(grq.lock)
++{
++	grq_unlock_irqrestore(flags);
++}
++
++/**
++ * grunqueue_is_locked
++ *
++ * Returns true if the global runqueue is locked.
++ * This interface allows printk to be called with the runqueue lock
++ * held and know whether or not it is OK to wake up the klogd.
++ */
++bool grunqueue_is_locked(void)
++{
++	return raw_spin_is_locked(&grq.lock);
++}
++
++void grq_unlock_wait(void)
++	__releases(grq.lock)
++{
++	smp_mb(); /* spin-unlock-wait is not a full memory barrier */
++	raw_spin_unlock_wait(&grq.lock);
++}
++
++static inline void time_grq_lock(struct rq *rq, unsigned long *flags)
++	__acquires(grq.lock)
++{
++	local_irq_save(*flags);
++	time_lock_grq(rq);
++}
++
++static inline struct rq *__task_grq_lock(struct task_struct *p)
++	__acquires(grq.lock)
++{
++	grq_lock();
++	return task_rq(p);
++}
++
++static inline void __task_grq_unlock(void)
++	__releases(grq.lock)
++{
++	grq_unlock();
++}
++
++/*
++ * Look for any tasks *anywhere* that are running nice 0 or better. We do
++ * this lockless for overhead reasons since the occasional wrong result
++ * is harmless.
++ */
++bool above_background_load(void)
++{
++	int cpu;
++
++	for_each_online_cpu(cpu) {
++		struct task_struct *cpu_curr = cpu_rq(cpu)->curr;
++
++		if (unlikely(!cpu_curr))
++			continue;
++		if (PRIO_TO_NICE(cpu_curr->static_prio) < 1) {
++			return true;
++		}
++	}
++	return false;
++}
++
++#ifndef __ARCH_WANT_UNLOCKED_CTXSW
++static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
++{
++}
++
++static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
++{
++#ifdef CONFIG_DEBUG_SPINLOCK
++	/* this is a valid case when another task releases the spinlock */
++	grq.lock.owner = current;
++#endif
++	/*
++	 * If we are tracking spinlock dependencies then we have to
++	 * fix up the runqueue lock - which gets 'carried over' from
++	 * prev into current:
++	 */
++	spin_acquire(&grq.lock.dep_map, 0, 0, _THIS_IP_);
++
++	grq_unlock_irq();
++}
++
++#else /* __ARCH_WANT_UNLOCKED_CTXSW */
++
++static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
++{
++#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
++	grq_unlock_irq();
++#else
++	grq_unlock();
++#endif
++}
++
++static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
++{
++	smp_wmb();
++#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
++	local_irq_enable();
++#endif
++}
++#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
++
++static inline bool deadline_before(u64 deadline, u64 time)
++{
++	return (deadline < time);
++}
++
++static inline bool deadline_after(u64 deadline, u64 time)
++{
++	return (deadline > time);
++}
++
++/*
++ * A task that is queued but not running will be on the grq run list.
++ * A task that is not running or queued will not be on the grq run list.
++ * A task that is currently running will have ->on_cpu set but not on the
++ * grq run list.
++ */
++static inline bool task_queued(struct task_struct *p)
++{
++	return (!list_empty(&p->run_list));
++}
++
++/*
++ * Removing from the global runqueue. Enter with grq locked.
++ */
++static void dequeue_task(struct task_struct *p)
++{
++	list_del_init(&p->run_list);
++	if (list_empty(grq.queue + p->prio))
++		__clear_bit(p->prio, grq.prio_bitmap);
++}
++
++/*
++ * To determine if it's safe for a task of SCHED_IDLEPRIO to actually run as
++ * an idle task, we ensure none of the following conditions are met.
++ */
++static bool idleprio_suitable(struct task_struct *p)
++{
++	return (!freezing(p) && !signal_pending(p) &&
++		!(task_contributes_to_load(p)) && !(p->flags & (PF_EXITING)));
++}
++
++/*
++ * To determine if a task of SCHED_ISO can run in pseudo-realtime, we check
++ * that the iso_refractory flag is not set.
++ */
++static bool isoprio_suitable(void)
++{
++	return !grq.iso_refractory;
++}
++
++/*
++ * Adding to the global runqueue. Enter with grq locked.
++ */
++static void enqueue_task(struct task_struct *p)
++{
++	if (!rt_task(p)) {
++		/* Check it hasn't gotten rt from PI */
++		if ((idleprio_task(p) && idleprio_suitable(p)) ||
++		   (iso_task(p) && isoprio_suitable()))
++			p->prio = p->normal_prio;
++		else
++			p->prio = NORMAL_PRIO;
++	}
++	__set_bit(p->prio, grq.prio_bitmap);
++	list_add_tail(&p->run_list, grq.queue + p->prio);
++	sched_info_queued(p);
++}
++
++/* Only idle task does this as a real time task*/
++static inline void enqueue_task_head(struct task_struct *p)
++{
++	__set_bit(p->prio, grq.prio_bitmap);
++	list_add(&p->run_list, grq.queue + p->prio);
++	sched_info_queued(p);
++}
++
++static inline void requeue_task(struct task_struct *p)
++{
++	sched_info_queued(p);
++}
++
++/*
++ * Returns the relative length of deadline all compared to the shortest
++ * deadline which is that of nice -20.
++ */
++static inline int task_prio_ratio(struct task_struct *p)
++{
++	return prio_ratios[TASK_USER_PRIO(p)];
++}
++
++/*
++ * task_timeslice - all tasks of all priorities get the exact same timeslice
++ * length. CPU distribution is handled by giving different deadlines to
++ * tasks of different priorities. Use 128 as the base value for fast shifts.
++ */
++static inline int task_timeslice(struct task_struct *p)
++{
++	return (rr_interval * task_prio_ratio(p) / 128);
++}
++
++#ifdef CONFIG_SMP
++/*
++ * qnr is the "queued but not running" count which is the total number of
++ * tasks on the global runqueue list waiting for cpu time but not actually
++ * currently running on a cpu.
++ */
++static inline void inc_qnr(void)
++{
++	grq.qnr++;
++}
++
++static inline void dec_qnr(void)
++{
++	grq.qnr--;
++}
++
++static inline int queued_notrunning(void)
++{
++	return grq.qnr;
++}
++
++/*
++ * The cpu_idle_map stores a bitmap of all the CPUs currently idle to
++ * allow easy lookup of whether any suitable idle CPUs are available.
++ * It's cheaper to maintain a binary yes/no if there are any idle CPUs on the
++ * idle_cpus variable than to do a full bitmask check when we are busy.
++ */
++static inline void set_cpuidle_map(int cpu)
++{
++	if (likely(cpu_online(cpu))) {
++		cpu_set(cpu, grq.cpu_idle_map);
++		grq.idle_cpus = true;
++	}
++}
++
++static inline void clear_cpuidle_map(int cpu)
++{
++	cpu_clear(cpu, grq.cpu_idle_map);
++	if (cpus_empty(grq.cpu_idle_map))
++		grq.idle_cpus = false;
++}
++
++static bool suitable_idle_cpus(struct task_struct *p)
++{
++	if (!grq.idle_cpus)
++		return false;
++	return (cpus_intersects(p->cpus_allowed, grq.cpu_idle_map));
++}
++
++#define CPUIDLE_DIFF_THREAD	(1)
++#define CPUIDLE_DIFF_CORE	(2)
++#define CPUIDLE_CACHE_BUSY	(4)
++#define CPUIDLE_DIFF_CPU	(8)
++#define CPUIDLE_THREAD_BUSY	(16)
++#define CPUIDLE_DIFF_NODE	(32)
++
++static void resched_task(struct task_struct *p);
++
++/*
++ * The best idle CPU is chosen according to the CPUIDLE ranking above where the
++ * lowest value would give the most suitable CPU to schedule p onto next. The
++ * order works out to be the following:
++ *
++ * Same core, idle or busy cache, idle or busy threads
++ * Other core, same cache, idle or busy cache, idle threads.
++ * Same node, other CPU, idle cache, idle threads.
++ * Same node, other CPU, busy cache, idle threads.
++ * Other core, same cache, busy threads.
++ * Same node, other CPU, busy threads.
++ * Other node, other CPU, idle cache, idle threads.
++ * Other node, other CPU, busy cache, idle threads.
++ * Other node, other CPU, busy threads.
++ */
++static void
++resched_best_mask(int best_cpu, struct rq *rq, cpumask_t *tmpmask)
++{
++	unsigned int best_ranking = CPUIDLE_DIFF_NODE | CPUIDLE_THREAD_BUSY |
++		CPUIDLE_DIFF_CPU | CPUIDLE_CACHE_BUSY | CPUIDLE_DIFF_CORE |
++		CPUIDLE_DIFF_THREAD;
++	int cpu_tmp;
++
++	if (cpu_isset(best_cpu, *tmpmask))
++		goto out;
++
++	for_each_cpu_mask(cpu_tmp, *tmpmask) {
++		unsigned int ranking;
++		struct rq *tmp_rq;
++
++		ranking = 0;
++		tmp_rq = cpu_rq(cpu_tmp);
++
++#ifdef CONFIG_NUMA
++		if (rq->cpu_locality[cpu_tmp] > 3)
++			ranking |= CPUIDLE_DIFF_NODE;
++		else
++#endif
++		if (rq->cpu_locality[cpu_tmp] > 2)
++			ranking |= CPUIDLE_DIFF_CPU;
++#ifdef CONFIG_SCHED_MC
++		if (rq->cpu_locality[cpu_tmp] == 2)
++			ranking |= CPUIDLE_DIFF_CORE;
++		if (!(tmp_rq->cache_idle(cpu_tmp)))
++			ranking |= CPUIDLE_CACHE_BUSY;
++#endif
++#ifdef CONFIG_SCHED_SMT
++		if (rq->cpu_locality[cpu_tmp] == 1)
++			ranking |= CPUIDLE_DIFF_THREAD;
++		if (!(tmp_rq->siblings_idle(cpu_tmp)))
++			ranking |= CPUIDLE_THREAD_BUSY;
++#endif
++		if (ranking < best_ranking) {
++			best_cpu = cpu_tmp;
++			best_ranking = ranking;
++		}
++	}
++out:
++	resched_task(cpu_rq(best_cpu)->curr);
++}
++
++static void resched_best_idle(struct task_struct *p)
++{
++	cpumask_t tmpmask;
++
++	cpus_and(tmpmask, p->cpus_allowed, grq.cpu_idle_map);
++	resched_best_mask(task_cpu(p), task_rq(p), &tmpmask);
++}
++
++static inline void resched_suitable_idle(struct task_struct *p)
++{
++	if (suitable_idle_cpus(p))
++		resched_best_idle(p);
++}
++/*
++ * Flags to tell us whether this CPU is running a CPU frequency governor that
++ * has slowed its speed or not. No locking required as the very rare wrongly
++ * read value would be harmless.
++ */
++void cpu_scaling(int cpu)
++{
++	cpu_rq(cpu)->scaling = true;
++}
++
++void cpu_nonscaling(int cpu)
++{
++	cpu_rq(cpu)->scaling = false;
++}
++
++static inline bool scaling_rq(struct rq *rq)
++{
++	return rq->scaling;
++}
++
++static inline int locality_diff(struct task_struct *p, struct rq *rq)
++{
++	return rq->cpu_locality[task_cpu(p)];
++}
++#else /* CONFIG_SMP */
++static inline void inc_qnr(void)
++{
++}
++
++static inline void dec_qnr(void)
++{
++}
++
++static inline int queued_notrunning(void)
++{
++	return grq.nr_running;
++}
++
++static inline void set_cpuidle_map(int cpu)
++{
++}
++
++static inline void clear_cpuidle_map(int cpu)
++{
++}
++
++static inline bool suitable_idle_cpus(struct task_struct *p)
++{
++	return uprq->curr == uprq->idle;
++}
++
++static inline void resched_suitable_idle(struct task_struct *p)
++{
++}
++
++void cpu_scaling(int __unused)
++{
++}
++
++void cpu_nonscaling(int __unused)
++{
++}
++
++/*
++ * Although CPUs can scale in UP, there is nowhere else for tasks to go so this
++ * always returns 0.
++ */
++static inline bool scaling_rq(struct rq *rq)
++{
++	return false;
++}
++
++static inline int locality_diff(struct task_struct *p, struct rq *rq)
++{
++	return 0;
++}
++#endif /* CONFIG_SMP */
++EXPORT_SYMBOL_GPL(cpu_scaling);
++EXPORT_SYMBOL_GPL(cpu_nonscaling);
++
++/*
++ * activate_idle_task - move idle task to the _front_ of runqueue.
++ */
++static inline void activate_idle_task(struct task_struct *p)
++{
++	enqueue_task_head(p);
++	grq.nr_running++;
++	inc_qnr();
++}
++
++static inline int normal_prio(struct task_struct *p)
++{
++	if (has_rt_policy(p))
++		return MAX_RT_PRIO - 1 - p->rt_priority;
++	if (idleprio_task(p))
++		return IDLE_PRIO;
++	if (iso_task(p))
++		return ISO_PRIO;
++	return NORMAL_PRIO;
++}
++
++/*
++ * Calculate the current priority, i.e. the priority
++ * taken into account by the scheduler. This value might
++ * be boosted by RT tasks as it will be RT if the task got
++ * RT-boosted. If not then it returns p->normal_prio.
++ */
++static int effective_prio(struct task_struct *p)
++{
++	p->normal_prio = normal_prio(p);
++	/*
++	 * If we are RT tasks or we were boosted to RT priority,
++	 * keep the priority unchanged. Otherwise, update priority
++	 * to the normal priority:
++	 */
++	if (!rt_prio(p->prio))
++		return p->normal_prio;
++	return p->prio;
++}
++
++/*
++ * activate_task - move a task to the runqueue. Enter with grq locked.
++ */
++static void activate_task(struct task_struct *p, struct rq *rq)
++{
++	update_clocks(rq);
++
++	/*
++	 * Sleep time is in units of nanosecs, so shift by 20 to get a
++	 * milliseconds-range estimation of the amount of time that the task
++	 * spent sleeping:
++	 */
++	if (unlikely(prof_on == SLEEP_PROFILING)) {
++		if (p->state == TASK_UNINTERRUPTIBLE)
++			profile_hits(SLEEP_PROFILING, (void *)get_wchan(p),
++				     (rq->clock - p->last_ran) >> 20);
++	}
++
++	p->prio = effective_prio(p);
++	if (task_contributes_to_load(p))
++		grq.nr_uninterruptible--;
++	enqueue_task(p);
++	grq.nr_running++;
++	inc_qnr();
++}
++
++static inline void clear_sticky(struct task_struct *p);
++
++/*
++ * deactivate_task - If it's running, it's not on the grq and we can just
++ * decrement the nr_running. Enter with grq locked.
++ */
++static inline void deactivate_task(struct task_struct *p)
++{
++	if (task_contributes_to_load(p))
++		grq.nr_uninterruptible++;
++	grq.nr_running--;
++	clear_sticky(p);
++}
++
++#ifdef CONFIG_SMP
++void set_task_cpu(struct task_struct *p, unsigned int cpu)
++{
++#ifdef CONFIG_LOCKDEP
++	/*
++	 * The caller should hold grq lock.
++	 */
++	WARN_ON_ONCE(debug_locks && !lockdep_is_held(&grq.lock));
++#endif
++	trace_sched_migrate_task(p, cpu);
++	if (task_cpu(p) != cpu)
++		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
++
++	/*
++	 * After ->cpu is set up to a new value, task_grq_lock(p, ...) can be
++	 * successfully executed on another CPU. We must ensure that updates of
++	 * per-task data have been completed by this moment.
++	 */
++	smp_wmb();
++	task_thread_info(p)->cpu = cpu;
++}
++
++static inline void clear_sticky(struct task_struct *p)
++{
++	p->sticky = false;
++}
++
++static inline bool task_sticky(struct task_struct *p)
++{
++	return p->sticky;
++}
++
++/* Reschedule the best idle CPU that is not this one. */
++static void
++resched_closest_idle(struct rq *rq, int cpu, struct task_struct *p)
++{
++	cpumask_t tmpmask;
++
++	cpus_and(tmpmask, p->cpus_allowed, grq.cpu_idle_map);
++	cpu_clear(cpu, tmpmask);
++	if (cpus_empty(tmpmask))
++		return;
++	resched_best_mask(cpu, rq, &tmpmask);
++}
++
++/*
++ * We set the sticky flag on a task that is descheduled involuntarily meaning
++ * it is awaiting further CPU time. If the last sticky task is still sticky
++ * but unlucky enough to not be the next task scheduled, we unstick it and try
++ * to find it an idle CPU. Realtime tasks do not stick to minimise their
++ * latency at all times.
++ */
++static inline void
++swap_sticky(struct rq *rq, int cpu, struct task_struct *p)
++{
++	if (rq->sticky_task) {
++		if (rq->sticky_task == p) {
++			p->sticky = true;
++			return;
++		}
++		if (task_sticky(rq->sticky_task)) {
++			clear_sticky(rq->sticky_task);
++			resched_closest_idle(rq, cpu, rq->sticky_task);
++		}
++	}
++	if (!rt_task(p)) {
++		p->sticky = true;
++		rq->sticky_task = p;
++	} else {
++		resched_closest_idle(rq, cpu, p);
++		rq->sticky_task = NULL;
++	}
++}
++
++static inline void unstick_task(struct rq *rq, struct task_struct *p)
++{
++	rq->sticky_task = NULL;
++	clear_sticky(p);
++}
++#else
++static inline void clear_sticky(struct task_struct *p)
++{
++}
++
++static inline bool task_sticky(struct task_struct *p)
++{
++	return false;
++}
++
++static inline void
++swap_sticky(struct rq *rq, int cpu, struct task_struct *p)
++{
++}
++
++static inline void unstick_task(struct rq *rq, struct task_struct *p)
++{
++}
++#endif
++
++/*
++ * Move a task off the global queue and take it to a cpu for it will
++ * become the running task.
++ */
++static inline void take_task(int cpu, struct task_struct *p)
++{
++	set_task_cpu(p, cpu);
++	dequeue_task(p);
++	clear_sticky(p);
++	dec_qnr();
++}
++
++/*
++ * Returns a descheduling task to the grq runqueue unless it is being
++ * deactivated.
++ */
++static inline void return_task(struct task_struct *p, bool deactivate)
++{
++	if (deactivate)
++		deactivate_task(p);
++	else {
++		inc_qnr();
++		enqueue_task(p);
++	}
++}
++
++/*
++ * resched_task - mark a task 'to be rescheduled now'.
++ *
++ * On UP this means the setting of the need_resched flag, on SMP it
++ * might also involve a cross-CPU call to trigger the scheduler on
++ * the target CPU.
++ */
++#ifdef CONFIG_SMP
++
++#ifndef tsk_is_polling
++#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
++#endif
++
++static void resched_task(struct task_struct *p)
++{
++	int cpu;
++
++	assert_raw_spin_locked(&grq.lock);
++
++	if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
++		return;
++
++	set_tsk_thread_flag(p, TIF_NEED_RESCHED);
++
++	cpu = task_cpu(p);
++	if (cpu == smp_processor_id())
++		return;
++
++	/* NEED_RESCHED must be visible before we test polling */
++	smp_mb();
++	if (!tsk_is_polling(p))
++		smp_send_reschedule(cpu);
++}
++
++#else
++static inline void resched_task(struct task_struct *p)
++{
++	assert_raw_spin_locked(&grq.lock);
++	set_tsk_need_resched(p);
++}
++#endif
++
++/**
++ * task_curr - is this task currently executing on a CPU?
++ * @p: the task in question.
++ */
++inline int task_curr(const struct task_struct *p)
++{
++	return cpu_curr(task_cpu(p)) == p;
++}
++
++#ifdef CONFIG_SMP
++struct migration_req {
++	struct task_struct *task;
++	int dest_cpu;
++};
++
++/*
++ * wait_task_inactive - wait for a thread to unschedule.
++ *
++ * If @match_state is nonzero, it's the @p->state value just checked and
++ * not expected to change.  If it changes, i.e. @p might have woken up,
++ * then return zero.  When we succeed in waiting for @p to be off its CPU,
++ * we return a positive number (its total switch count).  If a second call
++ * a short while later returns the same number, the caller can be sure that
++ * @p has remained unscheduled the whole time.
++ *
++ * The caller must ensure that the task *will* unschedule sometime soon,
++ * else this function might spin for a *long* time. This function can't
++ * be called with interrupts off, or it may introduce deadlock with
++ * smp_call_function() if an IPI is sent by the same process we are
++ * waiting to become inactive.
++ */
++unsigned long wait_task_inactive(struct task_struct *p, long match_state)
++{
++	unsigned long flags;
++	bool running, on_rq;
++	unsigned long ncsw;
++	struct rq *rq;
++
++	for (;;) {
++		/*
++		 * We do the initial early heuristics without holding
++		 * any task-queue locks at all. We'll only try to get
++		 * the runqueue lock when things look like they will
++		 * work out! In the unlikely event rq is dereferenced
++		 * since we're lockless, grab it again.
++		 */
++#ifdef CONFIG_SMP
++retry_rq:
++		rq = task_rq(p);
++		if (unlikely(!rq))
++			goto retry_rq;
++#else /* CONFIG_SMP */
++		rq = task_rq(p);
++#endif
++		/*
++		 * If the task is actively running on another CPU
++		 * still, just relax and busy-wait without holding
++		 * any locks.
++		 *
++		 * NOTE! Since we don't hold any locks, it's not
++		 * even sure that "rq" stays as the right runqueue!
++		 * But we don't care, since this will return false
++		 * if the runqueue has changed and p is actually now
++		 * running somewhere else!
++		 */
++		while (task_running(p) && p == rq->curr) {
++			if (match_state && unlikely(p->state != match_state))
++				return 0;
++			cpu_relax();
++		}
++
++		/*
++		 * Ok, time to look more closely! We need the grq
++		 * lock now, to be *sure*. If we're wrong, we'll
++		 * just go back and repeat.
++		 */
++		rq = task_grq_lock(p, &flags);
++		trace_sched_wait_task(p);
++		running = task_running(p);
++		on_rq = task_queued(p);
++		ncsw = 0;
++		if (!match_state || p->state == match_state)
++			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
++		task_grq_unlock(&flags);
++
++		/*
++		 * If it changed from the expected state, bail out now.
++		 */
++		if (unlikely(!ncsw))
++			break;
++
++		/*
++		 * Was it really running after all now that we
++		 * checked with the proper locks actually held?
++		 *
++		 * Oops. Go back and try again..
++		 */
++		if (unlikely(running)) {
++			cpu_relax();
++			continue;
++		}
++
++		/*
++		 * It's not enough that it's not actively running,
++		 * it must be off the runqueue _entirely_, and not
++		 * preempted!
++		 *
++		 * So if it was still runnable (but just not actively
++		 * running right now), it's preempted, and we should
++		 * yield - it could be a while.
++		 */
++		if (unlikely(on_rq)) {
++			ktime_t to = ktime_set(0, NSEC_PER_SEC / HZ);
++
++			set_current_state(TASK_UNINTERRUPTIBLE);
++			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
++			continue;
++		}
++
++		/*
++		 * Ahh, all good. It wasn't running, and it wasn't
++		 * runnable, which means that it will never become
++		 * running in the future either. We're all done!
++		 */
++		break;
++	}
++
++	return ncsw;
++}
++
++/***
++ * kick_process - kick a running thread to enter/exit the kernel
++ * @p: the to-be-kicked thread
++ *
++ * Cause a process which is running on another CPU to enter
++ * kernel-mode, without any delay. (to get signals handled.)
++ *
++ * NOTE: this function doesn't have to take the runqueue lock,
++ * because all it wants to ensure is that the remote task enters
++ * the kernel. If the IPI races and the task has been migrated
++ * to another CPU then no harm is done and the purpose has been
++ * achieved as well.
++ */
++void kick_process(struct task_struct *p)
++{
++	int cpu;
++
++	preempt_disable();
++	cpu = task_cpu(p);
++	if ((cpu != smp_processor_id()) && task_curr(p))
++		smp_send_reschedule(cpu);
++	preempt_enable();
++}
++EXPORT_SYMBOL_GPL(kick_process);
++#endif
++
++#define rq_idle(rq)	((rq)->rq_prio == PRIO_LIMIT)
++
++/*
++ * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the
++ * basis of earlier deadlines. SCHED_IDLEPRIO don't preempt anything else or
++ * between themselves, they cooperatively multitask. An idle rq scores as
++ * prio PRIO_LIMIT so it is always preempted.
++ */
++static inline bool
++can_preempt(struct task_struct *p, int prio, u64 deadline)
++{
++	/* Better static priority RT task or better policy preemption */
++	if (p->prio < prio)
++		return true;
++	if (p->prio > prio)
++		return false;
++	/* SCHED_NORMAL, BATCH and ISO will preempt based on deadline */
++	if (!deadline_before(p->deadline, deadline))
++		return false;
++	return true;
++}
++
++#ifdef CONFIG_SMP
++#ifdef CONFIG_HOTPLUG_CPU
++/*
++ * Check to see if there is a task that is affined only to offline CPUs but
++ * still wants runtime. This happens to kernel threads during suspend/halt and
++ * disabling of CPUs.
++ */
++static inline bool online_cpus(struct task_struct *p)
++{
++	return (likely(cpus_intersects(cpu_online_map, p->cpus_allowed)));
++}
++#else /* CONFIG_HOTPLUG_CPU */
++/* All available CPUs are always online without hotplug. */
++static inline bool online_cpus(struct task_struct *p)
++{
++	return true;
++}
++#endif
++
++/*
++ * Check to see if p can run on cpu, and if not, whether there are any online
++ * CPUs it can run on instead.
++ */
++static inline bool needs_other_cpu(struct task_struct *p, int cpu)
++{
++	if (unlikely(!cpu_isset(cpu, p->cpus_allowed)))
++		return true;
++	return false;
++}
++
++/*
++ * When all else is equal, still prefer this_rq.
++ */
++static void try_preempt(struct task_struct *p, struct rq *this_rq)
++{
++	struct rq *highest_prio_rq = NULL;
++	int cpu, highest_prio;
++	u64 latest_deadline;
++	cpumask_t tmp;
++
++	/*
++	 * We clear the sticky flag here because for a task to have called
++	 * try_preempt with the sticky flag enabled means some complicated
++	 * re-scheduling has occurred and we should ignore the sticky flag.
++	 */
++	clear_sticky(p);
++
++	if (suitable_idle_cpus(p)) {
++		resched_best_idle(p);
++		return;
++	}
++
++	/* IDLEPRIO tasks never preempt anything but idle */
++	if (p->policy == SCHED_IDLEPRIO)
++		return;
++
++	if (likely(online_cpus(p)))
++		cpus_and(tmp, cpu_online_map, p->cpus_allowed);
++	else
++		return;
++
++	highest_prio = latest_deadline = 0;
++
++	for_each_cpu_mask(cpu, tmp) {
++		struct rq *rq;
++		int rq_prio;
++
++		rq = cpu_rq(cpu);
++		rq_prio = rq->rq_prio;
++		if (rq_prio < highest_prio)
++			continue;
++
++		if (rq_prio > highest_prio ||
++		    deadline_after(rq->rq_deadline, latest_deadline)) {
++			latest_deadline = rq->rq_deadline;
++			highest_prio = rq_prio;
++			highest_prio_rq = rq;
++		}
++	}
++
++	if (likely(highest_prio_rq)) {
++		if (can_preempt(p, highest_prio, highest_prio_rq->rq_deadline))
++			resched_task(highest_prio_rq->curr);
++	}
++}
++#else /* CONFIG_SMP */
++static inline bool needs_other_cpu(struct task_struct *p, int cpu)
++{
++	return false;
++}
++
++static void try_preempt(struct task_struct *p, struct rq *this_rq)
++{
++	if (p->policy == SCHED_IDLEPRIO)
++		return;
++	if (can_preempt(p, uprq->rq_prio, uprq->rq_deadline))
++		resched_task(uprq->curr);
++}
++#endif /* CONFIG_SMP */
++
++static void
++ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
++{
++#ifdef CONFIG_SCHEDSTATS
++	struct rq *rq = this_rq();
++
++#ifdef CONFIG_SMP
++	int this_cpu = smp_processor_id();
++
++	if (cpu == this_cpu)
++		schedstat_inc(rq, ttwu_local);
++	else {
++		struct sched_domain *sd;
++
++		rcu_read_lock();
++		for_each_domain(this_cpu, sd) {
++			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
++				schedstat_inc(sd, ttwu_wake_remote);
++				break;
++			}
++		}
++		rcu_read_unlock();
++	}
++
++#endif /* CONFIG_SMP */
++
++	schedstat_inc(rq, ttwu_count);
++#endif /* CONFIG_SCHEDSTATS */
++}
++
++static inline void ttwu_activate(struct task_struct *p, struct rq *rq,
++				 bool is_sync)
++{
++	activate_task(p, rq);
++
++	/*
++	 * Sync wakeups (i.e. those types of wakeups where the waker
++	 * has indicated that it will leave the CPU in short order)
++	 * don't trigger a preemption if there are no idle cpus,
++	 * instead waiting for current to deschedule.
++	 */
++	if (!is_sync || suitable_idle_cpus(p))
++		try_preempt(p, rq);
++}
++
++static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
++					bool success)
++{
++	trace_sched_wakeup(p, success);
++	p->state = TASK_RUNNING;
++
++	/*
++	 * if a worker is waking up, notify workqueue. Note that on BFS, we
++	 * don't really know what cpu it will be, so we fake it for
++	 * wq_worker_waking_up :/
++	 */
++	if ((p->flags & PF_WQ_WORKER) && success)
++		wq_worker_waking_up(p, cpu_of(rq));
++}
++
++#ifdef CONFIG_SMP
++void scheduler_ipi(void)
++{
++}
++#endif /* CONFIG_SMP */
++
++/***
++ * try_to_wake_up - wake up a thread
++ * @p: the thread to be awakened
++ * @state: the mask of task states that can be woken
++ * @wake_flags: wake modifier flags (WF_*)
++ *
++ * Put it on the run-queue if it's not already there. The "current"
++ * thread is always on the run-queue (except when the actual
++ * re-schedule is in progress), and as such you're allowed to do
++ * the simpler "current->state = TASK_RUNNING" to mark yourself
++ * runnable without the overhead of this.
++ *
++ * Returns %true if @p was woken up, %false if it was already running
++ * or @state didn't match @p's state.
++ */
++static bool try_to_wake_up(struct task_struct *p, unsigned int state,
++			  int wake_flags)
++{
++	bool success = false;
++	unsigned long flags;
++	struct rq *rq;
++	int cpu;
++
++	get_cpu();
++
++	/* This barrier is undocumented, probably for p->state? くそ */
++	smp_wmb();
++
++	/*
++	 * No need to do time_lock_grq as we only need to update the rq clock
++	 * if we activate the task
++	 */
++	rq = task_grq_lock(p, &flags);
++	cpu = task_cpu(p);
++
++	/* state is a volatile long, どうして、分からない */
++	if (!((unsigned int)p->state & state))
++		goto out_unlock;
++
++	if (task_queued(p) || task_running(p))
++		goto out_running;
++
++	ttwu_activate(p, rq, wake_flags & WF_SYNC);
++	success = true;
++
++out_running:
++	ttwu_post_activation(p, rq, success);
++out_unlock:
++	task_grq_unlock(&flags);
++
++	ttwu_stat(p, cpu, wake_flags);
++
++	put_cpu();
++
++	return success;
++}
++
++/**
++ * try_to_wake_up_local - try to wake up a local task with grq lock held
++ * @p: the thread to be awakened
++ *
++ * Put @p on the run-queue if it's not already there. The caller must
++ * ensure that grq is locked and, @p is not the current task.
++ * grq stays locked over invocation.
++ */
++static void try_to_wake_up_local(struct task_struct *p)
++{
++	struct rq *rq = task_rq(p);
++	bool success = false;
++
++	lockdep_assert_held(&grq.lock);
++
++	if (!(p->state & TASK_NORMAL))
++		return;
++
++	if (!task_queued(p)) {
++		if (likely(!task_running(p))) {
++			schedstat_inc(rq, ttwu_count);
++			schedstat_inc(rq, ttwu_local);
++		}
++		ttwu_activate(p, rq, false);
++		ttwu_stat(p, smp_processor_id(), 0);
++		success = true;
++	}
++	ttwu_post_activation(p, rq, success);
++}
++
++/**
++ * wake_up_process - Wake up a specific process
++ * @p: The process to be woken up.
++ *
++ * Attempt to wake up the nominated process and move it to the set of runnable
++ * processes.  Returns 1 if the process was woken up, 0 if it was already
++ * running.
++ *
++ * It may be assumed that this function implies a write memory barrier before
++ * changing the task state if and only if any tasks are woken up.
++ */
++int wake_up_process(struct task_struct *p)
++{
++	return try_to_wake_up(p, TASK_ALL, 0);
++}
++EXPORT_SYMBOL(wake_up_process);
++
++int wake_up_state(struct task_struct *p, unsigned int state)
++{
++	return try_to_wake_up(p, state, 0);
++}
++
++static void time_slice_expired(struct task_struct *p);
++
++/*
++ * Perform scheduler related setup for a newly forked process p.
++ * p is forked by current.
++ */
++void sched_fork(struct task_struct *p)
++{
++	struct task_struct *curr;
++	int cpu = get_cpu();
++	struct rq *rq;
++
++#ifdef CONFIG_PREEMPT_NOTIFIERS
++	INIT_HLIST_HEAD(&p->preempt_notifiers);
++#endif
++	/*
++	 * We mark the process as running here. This guarantees that
++	 * nobody will actually run it, and a signal or other external
++	 * event cannot wake it up and insert it on the runqueue either.
++	 */
++	p->state = TASK_RUNNING;
++	set_task_cpu(p, cpu);
++
++	/* Should be reset in fork.c but done here for ease of bfs patching */
++	p->sched_time = p->stime_pc = p->utime_pc = 0;
++
++	/*
++	 * Revert to default priority/policy on fork if requested.
++	 */
++	if (unlikely(p->sched_reset_on_fork)) {
++		if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
++			p->policy = SCHED_NORMAL;
++			p->normal_prio = normal_prio(p);
++		}
++
++		if (PRIO_TO_NICE(p->static_prio) < 0) {
++			p->static_prio = NICE_TO_PRIO(0);
++			p->normal_prio = p->static_prio;
++		}
++
++		/*
++		 * We don't need the reset flag anymore after the fork. It has
++		 * fulfilled its duty:
++		 */
++		p->sched_reset_on_fork = 0;
++	}
++
++	curr = current;
++	/*
++	 * Make sure we do not leak PI boosting priority to the child.
++	 */
++	p->prio = curr->normal_prio;
++
++	INIT_LIST_HEAD(&p->run_list);
++#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
++	if (unlikely(sched_info_on()))
++		memset(&p->sched_info, 0, sizeof(p->sched_info));
++#endif
++
++	p->on_cpu = false;
++	clear_sticky(p);
++
++#ifdef CONFIG_PREEMPT_COUNT
++	/* Want to start with kernel preemption disabled. */
++	task_thread_info(p)->preempt_count = 1;
++#endif
++	if (unlikely(p->policy == SCHED_FIFO))
++		goto out;
++	/*
++	 * Share the timeslice between parent and child, thus the
++	 * total amount of pending timeslices in the system doesn't change,
++	 * resulting in more scheduling fairness. If it's negative, it won't
++	 * matter since that's the same as being 0. current's time_slice is
++	 * actually in rq_time_slice when it's running, as is its last_ran
++	 * value. rq->rq_deadline is only modified within schedule() so it
++	 * is always equal to current->deadline.
++	 */
++	rq = task_grq_lock_irq(curr);
++	if (likely(rq->rq_time_slice >= RESCHED_US * 2)) {
++		rq->rq_time_slice /= 2;
++		p->time_slice = rq->rq_time_slice;
++	} else {
++		/*
++		 * Forking task has run out of timeslice. Reschedule it and
++		 * start its child with a new time slice and deadline. The
++		 * child will end up running first because its deadline will
++		 * be slightly earlier.
++		 */
++		rq->rq_time_slice = 0;
++		set_tsk_need_resched(curr);
++		time_slice_expired(p);
++	}
++	p->last_ran = rq->rq_last_ran;
++	task_grq_unlock_irq();
++out:
++	put_cpu();
++}
++
++/*
++ * wake_up_new_task - wake up a newly created task for the first time.
++ *
++ * This function will do some initial scheduler statistics housekeeping
++ * that must be done for every newly created context, then puts the task
++ * on the runqueue and wakes it.
++ */
++void wake_up_new_task(struct task_struct *p)
++{
++	struct task_struct *parent;
++	unsigned long flags;
++	struct rq *rq;
++
++	rq = task_grq_lock(p, &flags);
++	p->state = TASK_RUNNING;
++	parent = p->parent;
++	/* Unnecessary but small chance that the parent changed CPU */
++	set_task_cpu(p, task_cpu(parent));
++	activate_task(p, rq);
++	trace_sched_wakeup_new(p, 1);
++	if (rq->curr == parent && !suitable_idle_cpus(p)) {
++		/*
++		 * The VM isn't cloned, so we're in a good position to
++		 * do child-runs-first in anticipation of an exec. This
++		 * usually avoids a lot of COW overhead.
++		 */
++		resched_task(parent);
++	} else
++		try_preempt(p, rq);
++	task_grq_unlock(&flags);
++}
++
++#ifdef CONFIG_PREEMPT_NOTIFIERS
++
++/**
++ * preempt_notifier_register - tell me when current is being preempted & rescheduled
++ * @notifier: notifier struct to register
++ */
++void preempt_notifier_register(struct preempt_notifier *notifier)
++{
++	hlist_add_head(&notifier->link, &current->preempt_notifiers);
++}
++EXPORT_SYMBOL_GPL(preempt_notifier_register);
++
++/**
++ * preempt_notifier_unregister - no longer interested in preemption notifications
++ * @notifier: notifier struct to unregister
++ *
++ * This is safe to call from within a preemption notifier.
++ */
++void preempt_notifier_unregister(struct preempt_notifier *notifier)
++{
++	hlist_del(&notifier->link);
++}
++EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
++
++static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
++{
++	struct preempt_notifier *notifier;
++	struct hlist_node *node;
++
++	hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
++		notifier->ops->sched_in(notifier, raw_smp_processor_id());
++}
++
++static void
++fire_sched_out_preempt_notifiers(struct task_struct *curr,
++				 struct task_struct *next)
++{
++	struct preempt_notifier *notifier;
++	struct hlist_node *node;
++
++	hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
++		notifier->ops->sched_out(notifier, next);
++}
++
++#else /* !CONFIG_PREEMPT_NOTIFIERS */
++
++static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
++{
++}
++
++static void
++fire_sched_out_preempt_notifiers(struct task_struct *curr,
++				 struct task_struct *next)
++{
++}
++
++#endif /* CONFIG_PREEMPT_NOTIFIERS */
++
++/**
++ * prepare_task_switch - prepare to switch tasks
++ * @rq: the runqueue preparing to switch
++ * @next: the task we are going to switch to.
++ *
++ * This is called with the rq lock held and interrupts off. It must
++ * be paired with a subsequent finish_task_switch after the context
++ * switch.
++ *
++ * prepare_task_switch sets up locking and calls architecture specific
++ * hooks.
++ */
++static inline void
++prepare_task_switch(struct rq *rq, struct task_struct *prev,
++		    struct task_struct *next)
++{
++	sched_info_switch(prev, next);
++	perf_event_task_sched_out(prev, next);
++	fire_sched_out_preempt_notifiers(prev, next);
++	prepare_lock_switch(rq, next);
++	prepare_arch_switch(next);
++	trace_sched_switch(prev, next);
++}
++
++/**
++ * finish_task_switch - clean up after a task-switch
++ * @rq: runqueue associated with task-switch
++ * @prev: the thread we just switched away from.
++ *
++ * finish_task_switch must be called after the context switch, paired
++ * with a prepare_task_switch call before the context switch.
++ * finish_task_switch will reconcile locking set up by prepare_task_switch,
++ * and do any other architecture-specific cleanup actions.
++ *
++ * Note that we may have delayed dropping an mm in context_switch(). If
++ * so, we finish that here outside of the runqueue lock.  (Doing it
++ * with the lock held can cause deadlocks; see schedule() for
++ * details.)
++ */
++static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
++	__releases(grq.lock)
++{
++	struct mm_struct *mm = rq->prev_mm;
++	long prev_state;
++
++	rq->prev_mm = NULL;
++
++	/*
++	 * A task struct has one reference for the use as "current".
++	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
++	 * schedule one last time. The schedule call will never return, and
++	 * the scheduled task must drop that reference.
++	 * The test for TASK_DEAD must occur while the runqueue locks are
++	 * still held, otherwise prev could be scheduled on another cpu, die
++	 * there before we look at prev->state, and then the reference would
++	 * be dropped twice.
++	 *		Manfred Spraul <manfred@colorfullife.com>
++	 */
++	prev_state = prev->state;
++	finish_arch_switch(prev);
++#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
++	local_irq_disable();
++#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
++	perf_event_task_sched_in(prev, current);
++#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
++	local_irq_enable();
++#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
++	finish_lock_switch(rq, prev);
++
++	fire_sched_in_preempt_notifiers(current);
++	if (mm)
++		mmdrop(mm);
++	if (unlikely(prev_state == TASK_DEAD)) {
++		/*
++		 * Remove function-return probe instances associated with this
++		 * task and put them back on the free list.
++		 */
++		kprobe_flush_task(prev);
++		put_task_struct(prev);
++	}
++}
++
++/**
++ * schedule_tail - first thing a freshly forked thread must call.
++ * @prev: the thread we just switched away from.
++ */
++asmlinkage void schedule_tail(struct task_struct *prev)
++	__releases(grq.lock)
++{
++	struct rq *rq = this_rq();
++
++	finish_task_switch(rq, prev);
++#ifdef __ARCH_WANT_UNLOCKED_CTXSW
++	/* In this case, finish_task_switch does not reenable preemption */
++	preempt_enable();
++#endif
++	if (current->set_child_tid)
++		put_user(current->pid, current->set_child_tid);
++}
++
++/*
++ * context_switch - switch to the new MM and the new
++ * thread's register state.
++ */
++static inline void
++context_switch(struct rq *rq, struct task_struct *prev,
++	       struct task_struct *next)
++{
++	struct mm_struct *mm, *oldmm;
++
++	prepare_task_switch(rq, prev, next);
++
++	mm = next->mm;
++	oldmm = prev->active_mm;
++	/*
++	 * For paravirt, this is coupled with an exit in switch_to to
++	 * combine the page table reload and the switch backend into
++	 * one hypercall.
++	 */
++	arch_start_context_switch(prev);
++
++	if (!mm) {
++		next->active_mm = oldmm;
++		atomic_inc(&oldmm->mm_count);
++		enter_lazy_tlb(oldmm, next);
++	} else
++		switch_mm(oldmm, mm, next);
++
++	if (!prev->mm) {
++		prev->active_mm = NULL;
++		rq->prev_mm = oldmm;
++	}
++	/*
++	 * Since the runqueue lock will be released by the next
++	 * task (which is an invalid locking op but in the case
++	 * of the scheduler it's an obvious special-case), so we
++	 * do an early lockdep release here:
++	 */
++#ifndef __ARCH_WANT_UNLOCKED_CTXSW
++	spin_release(&grq.lock.dep_map, 1, _THIS_IP_);
++#endif
++
++	/* Here we just switch the register state and the stack. */
++	switch_to(prev, next, prev);
++
++	barrier();
++	/*
++	 * this_rq must be evaluated again because prev may have moved
++	 * CPUs since it called schedule(), thus the 'rq' on its stack
++	 * frame will be invalid.
++	 */
++	finish_task_switch(this_rq(), prev);
++}
++
++/*
++ * nr_running, nr_uninterruptible and nr_context_switches:
++ *
++ * externally visible scheduler statistics: current number of runnable
++ * threads, current number of uninterruptible-sleeping threads, total
++ * number of context switches performed since bootup. All are measured
++ * without grabbing the grq lock but the occasional inaccurate result
++ * doesn't matter so long as it's positive.
++ */
++unsigned long nr_running(void)
++{
++	long nr = grq.nr_running;
++
++	if (unlikely(nr < 0))
++		nr = 0;
++	return (unsigned long)nr;
++}
++
++unsigned long nr_uninterruptible(void)
++{
++	long nu = grq.nr_uninterruptible;
++
++	if (unlikely(nu < 0))
++		nu = 0;
++	return nu;
++}
++
++unsigned long long nr_context_switches(void)
++{
++	long long ns = grq.nr_switches;
++
++	/* This is of course impossible */
++	if (unlikely(ns < 0))
++		ns = 1;
++	return (unsigned long long)ns;
++}
++
++unsigned long nr_iowait(void)
++{
++	unsigned long i, sum = 0;
++
++	for_each_possible_cpu(i)
++		sum += atomic_read(&cpu_rq(i)->nr_iowait);
++
++	return sum;
++}
++
++unsigned long nr_iowait_cpu(int cpu)
++{
++	struct rq *this = cpu_rq(cpu);
++	return atomic_read(&this->nr_iowait);
++}
++
++unsigned long nr_active(void)
++{
++	return nr_running() + nr_uninterruptible();
++}
++
++/* Beyond a task running on this CPU, load is equal everywhere on BFS */
++unsigned long this_cpu_load(void)
++{
++	return this_rq()->rq_running +
++		((queued_notrunning() + nr_uninterruptible()) / grq.noc);
++}
++
++/* Variables and functions for calc_load */
++static unsigned long calc_load_update;
++unsigned long avenrun[3];
++EXPORT_SYMBOL(avenrun);
++
++/**
++ * get_avenrun - get the load average array
++ * @loads:	pointer to dest load array
++ * @offset:	offset to add
++ * @shift:	shift count to shift the result left
++ *
++ * These values are estimates at best, so no need for locking.
++ */
++void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
++{
++	loads[0] = (avenrun[0] + offset) << shift;
++	loads[1] = (avenrun[1] + offset) << shift;
++	loads[2] = (avenrun[2] + offset) << shift;
++}
++
++static unsigned long
++calc_load(unsigned long load, unsigned long exp, unsigned long active)
++{
++	load *= exp;
++	load += active * (FIXED_1 - exp);
++	return load >> FSHIFT;
++}
++
++/*
++ * calc_load - update the avenrun load estimates every LOAD_FREQ seconds.
++ */
++void calc_global_load(unsigned long ticks)
++{
++	long active;
++
++	if (time_before(jiffies, calc_load_update))
++		return;
++	active = nr_active() * FIXED_1;
++
++	avenrun[0] = calc_load(avenrun[0], EXP_1, active);
++	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
++	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
++
++	calc_load_update = jiffies + LOAD_FREQ;
++}
++
++DEFINE_PER_CPU(struct kernel_stat, kstat);
++DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
++
++EXPORT_PER_CPU_SYMBOL(kstat);
++EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
++
++#ifdef CONFIG_IRQ_TIME_ACCOUNTING
++
++/*
++ * There are no locks covering percpu hardirq/softirq time.
++ * They are only modified in account_system_vtime, on corresponding CPU
++ * with interrupts disabled. So, writes are safe.
++ * They are read and saved off onto struct rq in update_rq_clock().
++ * This may result in other CPU reading this CPU's irq time and can
++ * race with irq/account_system_vtime on this CPU. We would either get old
++ * or new value with a side effect of accounting a slice of irq time to wrong
++ * task when irq is in progress while we read rq->clock. That is a worthy
++ * compromise in place of having locks on each irq in account_system_time.
++ */
++static DEFINE_PER_CPU(u64, cpu_hardirq_time);
++static DEFINE_PER_CPU(u64, cpu_softirq_time);
++
++static DEFINE_PER_CPU(u64, irq_start_time);
++static int sched_clock_irqtime;
++
++void enable_sched_clock_irqtime(void)
++{
++	sched_clock_irqtime = 1;
++}
++
++void disable_sched_clock_irqtime(void)
++{
++	sched_clock_irqtime = 0;
++}
++
++#ifndef CONFIG_64BIT
++static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
++
++static inline void irq_time_write_begin(void)
++{
++	__this_cpu_inc(irq_time_seq.sequence);
++	smp_wmb();
++}
++
++static inline void irq_time_write_end(void)
++{
++	smp_wmb();
++	__this_cpu_inc(irq_time_seq.sequence);
++}
++
++static inline u64 irq_time_read(int cpu)
++{
++	u64 irq_time;
++	unsigned seq;
++
++	do {
++		seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
++		irq_time = per_cpu(cpu_softirq_time, cpu) +
++			   per_cpu(cpu_hardirq_time, cpu);
++	} while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
++
++	return irq_time;
++}
++#else /* CONFIG_64BIT */
++static inline void irq_time_write_begin(void)
++{
++}
++
++static inline void irq_time_write_end(void)
++{
++}
++
++static inline u64 irq_time_read(int cpu)
++{
++	return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
++}
++#endif /* CONFIG_64BIT */
++
++/*
++ * Called before incrementing preempt_count on {soft,}irq_enter
++ * and before decrementing preempt_count on {soft,}irq_exit.
++ */
++void account_system_vtime(struct task_struct *curr)
++{
++	unsigned long flags;
++	s64 delta;
++	int cpu;
++
++	if (!sched_clock_irqtime)
++		return;
++
++	local_irq_save(flags);
++
++	cpu = smp_processor_id();
++	delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
++	__this_cpu_add(irq_start_time, delta);
++
++	irq_time_write_begin();
++	/*
++	 * We do not account for softirq time from ksoftirqd here.
++	 * We want to continue accounting softirq time to ksoftirqd thread
++	 * in that case, so as not to confuse scheduler with a special task
++	 * that do not consume any time, but still wants to run.
++	 */
++	if (hardirq_count())
++		__this_cpu_add(cpu_hardirq_time, delta);
++	else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
++		__this_cpu_add(cpu_softirq_time, delta);
++
++	irq_time_write_end();
++	local_irq_restore(flags);
++}
++EXPORT_SYMBOL_GPL(account_system_vtime);
++
++#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
++
++#ifdef CONFIG_PARAVIRT
++static inline u64 steal_ticks(u64 steal)
++{
++	if (unlikely(steal > NSEC_PER_SEC))
++		return div_u64(steal, TICK_NSEC);
++
++	return __iter_div_u64_rem(steal, TICK_NSEC, &steal);
++}
++#endif
++
++static void update_rq_clock_task(struct rq *rq, s64 delta)
++{
++#ifdef CONFIG_IRQ_TIME_ACCOUNTING
++	s64 irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
++
++	/*
++	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
++	 * this case when a previous update_rq_clock() happened inside a
++	 * {soft,}irq region.
++	 *
++	 * When this happens, we stop ->clock_task and only update the
++	 * prev_irq_time stamp to account for the part that fit, so that a next
++	 * update will consume the rest. This ensures ->clock_task is
++	 * monotonic.
++	 *
++	 * It does however cause some slight miss-attribution of {soft,}irq
++	 * time, a more accurate solution would be to update the irq_time using
++	 * the current rq->clock timestamp, except that would require using
++	 * atomic ops.
++	 */
++	if (irq_delta > delta)
++		irq_delta = delta;
++
++	rq->prev_irq_time += irq_delta;
++	delta -= irq_delta;
++#endif
++#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
++	if (static_branch((&paravirt_steal_rq_enabled))) {
++		u64 st, steal = paravirt_steal_clock(cpu_of(rq));
++
++		steal -= rq->prev_steal_time_rq;
++
++		if (unlikely(steal > delta))
++			steal = delta;
++
++		st = steal_ticks(steal);
++		steal = st * TICK_NSEC;
++
++		rq->prev_steal_time_rq += steal;
++
++		delta -= steal;
++	}
++#endif
++
++	rq->clock_task += delta;
++}
++
++#ifndef nsecs_to_cputime
++# define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
++#endif
++
++#ifdef CONFIG_IRQ_TIME_ACCOUNTING
++static void irqtime_account_hi_si(void)
++{
++	u64 *cpustat = kcpustat_this_cpu->cpustat;
++	u64 latest_ns;
++
++	latest_ns = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time));
++	if (latest_ns > cpustat[CPUTIME_IRQ])
++		cpustat[CPUTIME_IRQ] += (__force u64)cputime_one_jiffy;
++
++	latest_ns = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time));
++	if (latest_ns > cpustat[CPUTIME_SOFTIRQ])
++		cpustat[CPUTIME_SOFTIRQ] += (__force u64)cputime_one_jiffy;
++}
++#else /* CONFIG_IRQ_TIME_ACCOUNTING */
++
++#define sched_clock_irqtime	(0)
++
++static inline void irqtime_account_hi_si(void)
++{
++}
++#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
++
++static __always_inline bool steal_account_process_tick(void)
++{
++#ifdef CONFIG_PARAVIRT
++	if (static_branch(&paravirt_steal_enabled)) {
++		u64 steal, st = 0;
++
++		steal = paravirt_steal_clock(smp_processor_id());
++		steal -= this_rq()->prev_steal_time;
++
++		st = steal_ticks(steal);
++		this_rq()->prev_steal_time += st * TICK_NSEC;
++
++		account_steal_time(st);
++		return st;
++	}
++#endif
++	return false;
++}
++
++/*
++ * On each tick, see what percentage of that tick was attributed to each
++ * component and add the percentage to the _pc values. Once a _pc value has
++ * accumulated one tick's worth, account for that. This means the total
++ * percentage of load components will always be 128 (pseudo 100) per tick.
++ */
++static void pc_idle_time(struct rq *rq, unsigned long pc)
++{
++	u64 *cpustat = kcpustat_this_cpu->cpustat;
++
++	if (atomic_read(&rq->nr_iowait) > 0) {
++		rq->iowait_pc += pc;
++		if (rq->iowait_pc >= 128) {
++			rq->iowait_pc %= 128;
++			cpustat[CPUTIME_IOWAIT] += (__force u64)cputime_one_jiffy;
++		}
++	} else {
++		rq->idle_pc += pc;
++		if (rq->idle_pc >= 128) {
++			rq->idle_pc %= 128;
++			cpustat[CPUTIME_IDLE] += (__force u64)cputime_one_jiffy;
++		}
++	}
++}
++
++static void
++pc_system_time(struct rq *rq, struct task_struct *p, int hardirq_offset,
++	       unsigned long pc, unsigned long ns)
++{
++	u64 *cpustat = kcpustat_this_cpu->cpustat;
++	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
++
++	p->stime_pc += pc;
++	if (p->stime_pc >= 128) {
++		p->stime_pc %= 128;
++		p->stime += (__force u64)cputime_one_jiffy;
++		p->stimescaled += one_jiffy_scaled;
++		account_group_system_time(p, cputime_one_jiffy);
++		acct_update_integrals(p);
++	}
++	p->sched_time += ns;
++
++	if (hardirq_count() - hardirq_offset) {
++		rq->irq_pc += pc;
++		if (rq->irq_pc >= 128) {
++			rq->irq_pc %= 128;
++			cpustat[CPUTIME_IRQ] += (__force u64)cputime_one_jiffy;
++		}
++	} else if (in_serving_softirq()) {
++		rq->softirq_pc += pc;
++		if (rq->softirq_pc >= 128) {
++			rq->softirq_pc %= 128;
++			cpustat[CPUTIME_SOFTIRQ] += (__force u64)cputime_one_jiffy;
++		}
++	} else {
++		rq->system_pc += pc;
++		if (rq->system_pc >= 128) {
++			rq->system_pc %= 128;
++			cpustat[CPUTIME_SYSTEM] += (__force u64)cputime_one_jiffy;
++		}
++	}
++}
++
++static void pc_user_time(struct rq *rq, struct task_struct *p,
++			 unsigned long pc, unsigned long ns)
++{
++	u64 *cpustat = kcpustat_this_cpu->cpustat;
++	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
++
++	p->utime_pc += pc;
++	if (p->utime_pc >= 128) {
++		p->utime_pc %= 128;
++		p->utime += (__force u64)cputime_one_jiffy;
++		p->utimescaled += one_jiffy_scaled;
++		account_group_user_time(p, cputime_one_jiffy);
++		acct_update_integrals(p);
++	}
++	p->sched_time += ns;
++
++	if (this_cpu_ksoftirqd() == p) {
++		/*
++		 * ksoftirqd time do not get accounted in cpu_softirq_time.
++		 * So, we have to handle it separately here.
++		 */
++		rq->softirq_pc += pc;
++		if (rq->softirq_pc >= 128) {
++			rq->softirq_pc %= 128;
++			cpustat[CPUTIME_SOFTIRQ] += (__force u64)cputime_one_jiffy;
++		}
++	}
++
++	if (TASK_NICE(p) > 0 || idleprio_task(p)) {
++		rq->nice_pc += pc;
++		if (rq->nice_pc >= 128) {
++			rq->nice_pc %= 128;
++			cpustat[CPUTIME_NICE] += (__force u64)cputime_one_jiffy;
++		}
++	} else {
++		rq->user_pc += pc;
++		if (rq->user_pc >= 128) {
++			rq->user_pc %= 128;
++			cpustat[CPUTIME_USER] += (__force u64)cputime_one_jiffy;
++		}
++	}
++}
++
++/*
++ * Convert nanoseconds to pseudo percentage of one tick. Use 128 for fast
++ * shifts instead of 100
++ */
++#define NS_TO_PC(NS)	(NS * 128 / JIFFY_NS)
++
++/*
++ * This is called on clock ticks and on context switches.
++ * Bank in p->sched_time the ns elapsed since the last tick or switch.
++ * CPU scheduler quota accounting is also performed here in microseconds.
++ */
++static void
++update_cpu_clock(struct rq *rq, struct task_struct *p, bool tick)
++{
++	long account_ns = rq->clock - rq->timekeep_clock;
++	struct task_struct *idle = rq->idle;
++	unsigned long account_pc;
++
++	if (unlikely(account_ns < 0))
++		account_ns = 0;
++
++	account_pc = NS_TO_PC(account_ns);
++
++	if (tick) {
++		int user_tick;
++
++		/* Accurate tick timekeeping */
++		rq->account_pc += account_pc - 128;
++		if (rq->account_pc < 0) {
++			/*
++			 * Small errors in micro accounting may not make the
++			 * accounting add up to 128 each tick so we keep track
++			 * of the percentage and round it up when less than 128
++			 */
++			account_pc += -rq->account_pc;
++			rq->account_pc = 0;
++		}
++		if (steal_account_process_tick())
++			goto ts_account;
++
++		user_tick = user_mode(get_irq_regs());
++
++		if (user_tick)
++			pc_user_time(rq, p, account_pc, account_ns);
++		else if (p != idle || (irq_count() != HARDIRQ_OFFSET))
++			pc_system_time(rq, p, HARDIRQ_OFFSET,
++				       account_pc, account_ns);
++		else
++			pc_idle_time(rq, account_pc);
++
++		if (sched_clock_irqtime)
++			irqtime_account_hi_si();
++	} else {
++		/* Accurate subtick timekeeping */
++		rq->account_pc += account_pc;
++		if (p == idle)
++			pc_idle_time(rq, account_pc);
++		else
++			pc_user_time(rq, p, account_pc, account_ns);
++	}
++
++ts_account:
++	/* time_slice accounting is done in usecs to avoid overflow on 32bit */
++	if (rq->rq_policy != SCHED_FIFO && p != idle) {
++		s64 time_diff = rq->clock - rq->rq_last_ran;
++
++		niffy_diff(&time_diff, 1);
++		rq->rq_time_slice -= NS_TO_US(time_diff);
++	}
++	rq->rq_last_ran = rq->timekeep_clock = rq->clock;
++}
++
++/*
++ * Return any ns on the sched_clock that have not yet been accounted in
++ * @p in case that task is currently running.
++ *
++ * Called with task_grq_lock() held.
++ */
++static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
++{
++	u64 ns = 0;
++
++	if (p == rq->curr) {
++		update_clocks(rq);
++		ns = rq->clock_task - rq->rq_last_ran;
++		if (unlikely((s64)ns < 0))
++			ns = 0;
++	}
++
++	return ns;
++}
++
++unsigned long long task_delta_exec(struct task_struct *p)
++{
++	unsigned long flags;
++	struct rq *rq;
++	u64 ns;
++
++	rq = task_grq_lock(p, &flags);
++	ns = do_task_delta_exec(p, rq);
++	task_grq_unlock(&flags);
++
++	return ns;
++}
++
++/*
++ * Return accounted runtime for the task.
++ * In case the task is currently running, return the runtime plus current's
++ * pending runtime that have not been accounted yet.
++ */
++unsigned long long task_sched_runtime(struct task_struct *p)
++{
++	unsigned long flags;
++	struct rq *rq;
++	u64 ns;
++
++	rq = task_grq_lock(p, &flags);
++	ns = p->sched_time + do_task_delta_exec(p, rq);
++	task_grq_unlock(&flags);
++
++	return ns;
++}
++
++/* Compatibility crap for removal */
++void account_user_time(struct task_struct *p, cputime_t cputime,
++		       cputime_t cputime_scaled)
++{
++}
++
++void account_idle_time(cputime_t cputime)
++{
++}
++
++/*
++ * Account guest cpu time to a process.
++ * @p: the process that the cpu time gets accounted to
++ * @cputime: the cpu time spent in virtual machine since the last update
++ * @cputime_scaled: cputime scaled by cpu frequency
++ */
++static void account_guest_time(struct task_struct *p, cputime_t cputime,
++			       cputime_t cputime_scaled)
++{
++	u64 *cpustat = kcpustat_this_cpu->cpustat;
++
++	/* Add guest time to process. */
++	p->utime += (__force u64)cputime;
++	p->utimescaled += (__force u64)cputime_scaled;
++	account_group_user_time(p, cputime);
++	p->gtime += (__force u64)cputime;
++
++	/* Add guest time to cpustat. */
++	if (TASK_NICE(p) > 0) {
++		cpustat[CPUTIME_NICE] += (__force u64)cputime;
++		cpustat[CPUTIME_GUEST_NICE] += (__force u64)cputime;
++	} else {
++		cpustat[CPUTIME_USER] += (__force u64)cputime;
++		cpustat[CPUTIME_GUEST] += (__force u64)cputime;
++	}
++}
++
++/*
++ * Account system cpu time to a process and desired cpustat field
++ * @p: the process that the cpu time gets accounted to
++ * @cputime: the cpu time spent in kernel space since the last update
++ * @cputime_scaled: cputime scaled by cpu frequency
++ * @target_cputime64: pointer to cpustat field that has to be updated
++ */
++static inline
++void __account_system_time(struct task_struct *p, cputime_t cputime,
++			cputime_t cputime_scaled, cputime64_t *target_cputime64)
++{
++	/* Add system time to process. */
++	p->stime += (__force u64)cputime;
++	p->stimescaled += (__force u64)cputime_scaled;
++	account_group_system_time(p, cputime);
++
++	/* Add system time to cpustat. */
++	*target_cputime64 += (__force u64)cputime;
++
++	/* Account for system time used */
++	acct_update_integrals(p);
++}
++
++/*
++ * Account system cpu time to a process.
++ * @p: the process that the cpu time gets accounted to
++ * @hardirq_offset: the offset to subtract from hardirq_count()
++ * @cputime: the cpu time spent in kernel space since the last update
++ * @cputime_scaled: cputime scaled by cpu frequency
++ * This is for guest only now.
++ */
++void account_system_time(struct task_struct *p, int hardirq_offset,
++			 cputime_t cputime, cputime_t cputime_scaled)
++{
++
++	if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0))
++		account_guest_time(p, cputime, cputime_scaled);
++}
++
++/*
++ * Account for involuntary wait time.
++ * @steal: the cpu time spent in involuntary wait
++ */
++void account_steal_time(cputime_t cputime)
++{
++	u64 *cpustat = kcpustat_this_cpu->cpustat;
++
++	cpustat[CPUTIME_STEAL] += (__force u64)cputime;
++}
++
++/*
++ * Account for idle time.
++ * @cputime: the cpu time spent in idle wait
++ */
++static void account_idle_times(cputime_t cputime)
++{
++	u64 *cpustat = kcpustat_this_cpu->cpustat;
++	struct rq *rq = this_rq();
++
++	if (atomic_read(&rq->nr_iowait) > 0)
++		cpustat[CPUTIME_IOWAIT] += (__force u64)cputime;
++	else
++		cpustat[CPUTIME_IDLE] += (__force u64)cputime;
++}
++
++#ifndef CONFIG_VIRT_CPU_ACCOUNTING
++
++void account_process_tick(struct task_struct *p, int user_tick)
++{
++}
++
++/*
++ * Account multiple ticks of steal time.
++ * @p: the process from which the cpu time has been stolen
++ * @ticks: number of stolen ticks
++ */
++void account_steal_ticks(unsigned long ticks)
++{
++	account_steal_time(jiffies_to_cputime(ticks));
++}
++
++/*
++ * Account multiple ticks of idle time.
++ * @ticks: number of stolen ticks
++ */
++void account_idle_ticks(unsigned long ticks)
++{
++	account_idle_times(jiffies_to_cputime(ticks));
++}
++#endif
++
++static inline void grq_iso_lock(void)
++	__acquires(grq.iso_lock)
++{
++	raw_spin_lock(&grq.iso_lock);
++}
++
++static inline void grq_iso_unlock(void)
++	__releases(grq.iso_lock)
++{
++	raw_spin_unlock(&grq.iso_lock);
++}
++
++/*
++ * Functions to test for when SCHED_ISO tasks have used their allocated
++ * quota as real time scheduling and convert them back to SCHED_NORMAL.
++ * Where possible, the data is tested lockless, to avoid grabbing iso_lock
++ * because the occasional inaccurate result won't matter. However the
++ * tick data is only ever modified under lock. iso_refractory is only simply
++ * set to 0 or 1 so it's not worth grabbing the lock yet again for that.
++ */
++static bool set_iso_refractory(void)
++{
++	grq.iso_refractory = true;
++	return grq.iso_refractory;
++}
++
++static bool clear_iso_refractory(void)
++{
++	grq.iso_refractory = false;
++	return grq.iso_refractory;
++}
++
++/*
++ * Test if SCHED_ISO tasks have run longer than their alloted period as RT
++ * tasks and set the refractory flag if necessary. There is 10% hysteresis
++ * for unsetting the flag. 115/128 is ~90/100 as a fast shift instead of a
++ * slow division.
++ */
++static bool test_ret_isorefractory(struct rq *rq)
++{
++	if (likely(!grq.iso_refractory)) {
++		if (grq.iso_ticks > ISO_PERIOD * sched_iso_cpu)
++			return set_iso_refractory();
++	} else {
++		if (grq.iso_ticks < ISO_PERIOD * (sched_iso_cpu * 115 / 128))
++			return clear_iso_refractory();
++	}
++	return grq.iso_refractory;
++}
++
++static void iso_tick(void)
++{
++	grq_iso_lock();
++	grq.iso_ticks += 100;
++	grq_iso_unlock();
++}
++
++/* No SCHED_ISO task was running so decrease rq->iso_ticks */
++static inline void no_iso_tick(void)
++{
++	if (grq.iso_ticks) {
++		grq_iso_lock();
++		grq.iso_ticks -= grq.iso_ticks / ISO_PERIOD + 1;
++		if (unlikely(grq.iso_refractory && grq.iso_ticks <
++		    ISO_PERIOD * (sched_iso_cpu * 115 / 128)))
++			clear_iso_refractory();
++		grq_iso_unlock();
++	}
++}
++
++/* This manages tasks that have run out of timeslice during a scheduler_tick */
++static void task_running_tick(struct rq *rq)
++{
++	struct task_struct *p;
++
++	/*
++	 * If a SCHED_ISO task is running we increment the iso_ticks. In
++	 * order to prevent SCHED_ISO tasks from causing starvation in the
++	 * presence of true RT tasks we account those as iso_ticks as well.
++	 */
++	if ((rt_queue(rq) || (iso_queue(rq) && !grq.iso_refractory))) {
++		if (grq.iso_ticks <= (ISO_PERIOD * 128) - 128)
++			iso_tick();
++	} else
++		no_iso_tick();
++
++	if (iso_queue(rq)) {
++		if (unlikely(test_ret_isorefractory(rq))) {
++			if (rq_running_iso(rq)) {
++				/*
++				 * SCHED_ISO task is running as RT and limit
++				 * has been hit. Force it to reschedule as
++				 * SCHED_NORMAL by zeroing its time_slice
++				 */
++				rq->rq_time_slice = 0;
++			}
++		}
++	}
++
++	/* SCHED_FIFO tasks never run out of timeslice. */
++	if (rq->rq_policy == SCHED_FIFO)
++		return;
++	/*
++	 * Tasks that were scheduled in the first half of a tick are not
++	 * allowed to run into the 2nd half of the next tick if they will
++	 * run out of time slice in the interim. Otherwise, if they have
++	 * less than RESCHED_US μs of time slice left they will be rescheduled.
++	 */
++	if (rq->dither) {
++		if (rq->rq_time_slice > HALF_JIFFY_US)
++			return;
++		else
++			rq->rq_time_slice = 0;
++	} else if (rq->rq_time_slice >= RESCHED_US)
++			return;
++
++	/* p->time_slice < RESCHED_US. We only modify task_struct under grq lock */
++	p = rq->curr;
++	grq_lock();
++	requeue_task(p);
++	set_tsk_need_resched(p);
++	grq_unlock();
++}
++
++void wake_up_idle_cpu(int cpu);
++
++/*
++ * This function gets called by the timer code, with HZ frequency.
++ * We call it with interrupts disabled. The data modified is all
++ * local to struct rq so we don't need to grab grq lock.
++ */
++void scheduler_tick(void)
++{
++	int cpu __maybe_unused = smp_processor_id();
++	struct rq *rq = cpu_rq(cpu);
++
++	sched_clock_tick();
++	/* grq lock not grabbed, so only update rq clock */
++	update_rq_clock(rq);
++	update_cpu_clock(rq, rq->curr, true);
++	if (!rq_idle(rq))
++		task_running_tick(rq);
++	else
++		no_iso_tick();
++	rq->last_tick = rq->clock;
++	perf_event_task_tick();
++}
++
++notrace unsigned long get_parent_ip(unsigned long addr)
++{
++	if (in_lock_functions(addr)) {
++		addr = CALLER_ADDR2;
++		if (in_lock_functions(addr))
++			addr = CALLER_ADDR3;
++	}
++	return addr;
++}
++
++#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
++				defined(CONFIG_PREEMPT_TRACER))
++void __kprobes add_preempt_count(int val)
++{
++#ifdef CONFIG_DEBUG_PREEMPT
++	/*
++	 * Underflow?
++	 */
++	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
++		return;
++#endif
++	preempt_count() += val;
++#ifdef CONFIG_DEBUG_PREEMPT
++	/*
++	 * Spinlock count overflowing soon?
++	 */
++	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
++				PREEMPT_MASK - 10);
++#endif
++	if (preempt_count() == val)
++		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
++}
++EXPORT_SYMBOL(add_preempt_count);
++
++void __kprobes sub_preempt_count(int val)
++{
++#ifdef CONFIG_DEBUG_PREEMPT
++	/*
++	 * Underflow?
++	 */
++	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
++		return;
++	/*
++	 * Is the spinlock portion underflowing?
++	 */
++	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
++			!(preempt_count() & PREEMPT_MASK)))
++		return;
++#endif
++
++	if (preempt_count() == val)
++		trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
++	preempt_count() -= val;
++}
++EXPORT_SYMBOL(sub_preempt_count);
++#endif
++
++/*
++ * Deadline is "now" in niffies + (offset by priority). Setting the deadline
++ * is the key to everything. It distributes cpu fairly amongst tasks of the
++ * same nice value, it proportions cpu according to nice level, it means the
++ * task that last woke up the longest ago has the earliest deadline, thus
++ * ensuring that interactive tasks get low latency on wake up. The CPU
++ * proportion works out to the square of the virtual deadline difference, so
++ * this equation will give nice 19 3% CPU compared to nice 0.
++ */
++static inline u64 prio_deadline_diff(int user_prio)
++{
++	return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128));
++}
++
++static inline u64 task_deadline_diff(struct task_struct *p)
++{
++	return prio_deadline_diff(TASK_USER_PRIO(p));
++}
++
++static inline u64 static_deadline_diff(int static_prio)
++{
++	return prio_deadline_diff(USER_PRIO(static_prio));
++}
++
++static inline int longest_deadline_diff(void)
++{
++	return prio_deadline_diff(39);
++}
++
++static inline int ms_longest_deadline_diff(void)
++{
++	return NS_TO_MS(longest_deadline_diff());
++}
++
++/*
++ * The time_slice is only refilled when it is empty and that is when we set a
++ * new deadline.
++ */
++static void time_slice_expired(struct task_struct *p)
++{
++	p->time_slice = timeslice();
++	p->deadline = grq.niffies + task_deadline_diff(p);
++}
++
++/*
++ * Timeslices below RESCHED_US are considered as good as expired as there's no
++ * point rescheduling when there's so little time left. SCHED_BATCH tasks
++ * have been flagged be not latency sensitive and likely to be fully CPU
++ * bound so every time they're rescheduled they have their time_slice
++ * refilled, but get a new later deadline to have little effect on
++ * SCHED_NORMAL tasks.
++
++ */
++static inline void check_deadline(struct task_struct *p)
++{
++	if (p->time_slice < RESCHED_US || batch_task(p))
++		time_slice_expired(p);
++}
++
++#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
++
++/*
++ * Scheduler queue bitmap specific find next bit.
++ */
++static inline unsigned long
++next_sched_bit(const unsigned long *addr, unsigned long offset)
++{
++	const unsigned long *p;
++	unsigned long result;
++	unsigned long size;
++	unsigned long tmp;
++
++	size = PRIO_LIMIT;
++	if (offset >= size)
++		return size;
++
++	p = addr + BITOP_WORD(offset);
++	result = offset & ~(BITS_PER_LONG-1);
++	size -= result;
++	offset %= BITS_PER_LONG;
++	if (offset) {
++		tmp = *(p++);
++		tmp &= (~0UL << offset);
++		if (size < BITS_PER_LONG)
++			goto found_first;
++		if (tmp)
++			goto found_middle;
++		size -= BITS_PER_LONG;
++		result += BITS_PER_LONG;
++	}
++	while (size & ~(BITS_PER_LONG-1)) {
++		if ((tmp = *(p++)))
++			goto found_middle;
++		result += BITS_PER_LONG;
++		size -= BITS_PER_LONG;
++	}
++	if (!size)
++		return result;
++	tmp = *p;
++
++found_first:
++	tmp &= (~0UL >> (BITS_PER_LONG - size));
++	if (tmp == 0UL)		/* Are any bits set? */
++		return result + size;	/* Nope. */
++found_middle:
++	return result + __ffs(tmp);
++}
++
++/*
++ * O(n) lookup of all tasks in the global runqueue. The real brainfuck
++ * of lock contention and O(n). It's not really O(n) as only the queued,
++ * but not running tasks are scanned, and is O(n) queued in the worst case
++ * scenario only because the right task can be found before scanning all of
++ * them.
++ * Tasks are selected in this order:
++ * Real time tasks are selected purely by their static priority and in the
++ * order they were queued, so the lowest value idx, and the first queued task
++ * of that priority value is chosen.
++ * If no real time tasks are found, the SCHED_ISO priority is checked, and
++ * all SCHED_ISO tasks have the same priority value, so they're selected by
++ * the earliest deadline value.
++ * If no SCHED_ISO tasks are found, SCHED_NORMAL tasks are selected by the
++ * earliest deadline.
++ * Finally if no SCHED_NORMAL tasks are found, SCHED_IDLEPRIO tasks are
++ * selected by the earliest deadline.
++ */
++static inline struct
++task_struct *earliest_deadline_task(struct rq *rq, int cpu, struct task_struct *idle)
++{
++	struct task_struct *edt = NULL;
++	unsigned long idx = -1;
++
++	do {
++		struct list_head *queue;
++		struct task_struct *p;
++		u64 earliest_deadline;
++
++		idx = next_sched_bit(grq.prio_bitmap, ++idx);
++		if (idx >= PRIO_LIMIT)
++			return idle;
++		queue = grq.queue + idx;
++
++		if (idx < MAX_RT_PRIO) {
++			/* We found an rt task */
++			list_for_each_entry(p, queue, run_list) {
++				/* Make sure cpu affinity is ok */
++				if (needs_other_cpu(p, cpu))
++					continue;
++				edt = p;
++				goto out_take;
++			}
++			/*
++			 * None of the RT tasks at this priority can run on
++			 * this cpu
++			 */
++			continue;
++		}
++
++		/*
++		 * No rt tasks. Find the earliest deadline task. Now we're in
++		 * O(n) territory.
++		 */
++		earliest_deadline = ~0ULL;
++		list_for_each_entry(p, queue, run_list) {
++			u64 dl;
++
++			/* Make sure cpu affinity is ok */
++			if (needs_other_cpu(p, cpu))
++				continue;
++
++			/*
++			 * Soft affinity happens here by not scheduling a task
++			 * with its sticky flag set that ran on a different CPU
++			 * last when the CPU is scaling, or by greatly biasing
++			 * against its deadline when not, based on cpu cache
++			 * locality.
++			 */
++			if (task_sticky(p) && task_rq(p) != rq) {
++				if (scaling_rq(rq))
++					continue;
++				dl = p->deadline << locality_diff(p, rq);
++			} else
++				dl = p->deadline;
++
++			if (deadline_before(dl, earliest_deadline)) {
++				earliest_deadline = dl;
++				edt = p;
++			}
++		}
++	} while (!edt);
++
++out_take:
++	take_task(cpu, edt);
++	return edt;
++}
++
++
++/*
++ * Print scheduling while atomic bug:
++ */
++static noinline void __schedule_bug(struct task_struct *prev)
++{
++	struct pt_regs *regs = get_irq_regs();
++
++	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
++		prev->comm, prev->pid, preempt_count());
++
++	debug_show_held_locks(prev);
++	print_modules();
++	if (irqs_disabled())
++		print_irqtrace_events(prev);
++
++	if (regs)
++		show_regs(regs);
++	else
++		dump_stack();
++}
++
++/*
++ * Various schedule()-time debugging checks and statistics:
++ */
++static inline void schedule_debug(struct task_struct *prev)
++{
++	/*
++	 * Test if we are atomic. Since do_exit() needs to call into
++	 * schedule() atomically, we ignore that path for now.
++	 * Otherwise, whine if we are scheduling when we should not be.
++	 */
++	if (unlikely(in_atomic_preempt_off() && !prev->exit_state))
++		__schedule_bug(prev);
++	rcu_sleep_check();
++
++	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
++
++	schedstat_inc(this_rq(), sched_count);
++}
++
++/*
++ * The currently running task's information is all stored in rq local data
++ * which is only modified by the local CPU, thereby allowing the data to be
++ * changed without grabbing the grq lock.
++ */
++static inline void set_rq_task(struct rq *rq, struct task_struct *p)
++{
++	rq->rq_time_slice = p->time_slice;
++	rq->rq_deadline = p->deadline;
++	rq->rq_last_ran = p->last_ran = rq->clock;
++	rq->rq_policy = p->policy;
++	rq->rq_prio = p->prio;
++	if (p != rq->idle)
++		rq->rq_running = true;
++	else
++		rq->rq_running = false;
++}
++
++static void reset_rq_task(struct rq *rq, struct task_struct *p)
++{
++	rq->rq_policy = p->policy;
++	rq->rq_prio = p->prio;
++}
++
++/*
++ * schedule() is the main scheduler function.
++ */
++asmlinkage void __sched schedule(void)
++{
++	struct task_struct *prev, *next, *idle;
++	unsigned long *switch_count;
++	bool deactivate;
++	struct rq *rq;
++	int cpu;
++
++need_resched:
++	preempt_disable();
++
++	cpu = smp_processor_id();
++	rq = cpu_rq(cpu);
++	rcu_note_context_switch(cpu);
++	prev = rq->curr;
++
++	deactivate = false;
++	schedule_debug(prev);
++
++	grq_lock_irq();
++
++	switch_count = &prev->nivcsw;
++	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
++		if (unlikely(signal_pending_state(prev->state, prev))) {
++			prev->state = TASK_RUNNING;
++		} else {
++			deactivate = true;
++			/*
++			 * If a worker is going to sleep, notify and
++			 * ask workqueue whether it wants to wake up a
++			 * task to maintain concurrency.  If so, wake
++			 * up the task.
++			 */
++			if (prev->flags & PF_WQ_WORKER) {
++				struct task_struct *to_wakeup;
++
++				to_wakeup = wq_worker_sleeping(prev, cpu);
++				if (to_wakeup) {
++					/* This shouldn't happen, but does */
++					if (unlikely(to_wakeup == prev))
++						deactivate = false;
++					else
++						try_to_wake_up_local(to_wakeup);
++				}
++			}
++		}
++		switch_count = &prev->nvcsw;
++	}
++
++	/*
++	 * If we are going to sleep and we have plugged IO queued, make
++	 * sure to submit it to avoid deadlocks.
++	 */
++	if (unlikely(deactivate && blk_needs_flush_plug(prev))) {
++		grq_unlock_irq();
++		preempt_enable_no_resched();
++		blk_schedule_flush_plug(prev);
++		goto need_resched;
++	}
++
++	update_clocks(rq);
++	update_cpu_clock(rq, prev, false);
++	if (rq->clock - rq->last_tick > HALF_JIFFY_NS)
++		rq->dither = false;
++	else
++		rq->dither = true;
++
++	clear_tsk_need_resched(prev);
++
++	idle = rq->idle;
++	if (idle != prev) {
++		/* Update all the information stored on struct rq */
++		prev->time_slice = rq->rq_time_slice;
++		prev->deadline = rq->rq_deadline;
++		check_deadline(prev);
++		prev->last_ran = rq->clock;
++
++		/* Task changed affinity off this CPU */
++		if (needs_other_cpu(prev, cpu))
++			resched_suitable_idle(prev);
++		else if (!deactivate) {
++			if (!queued_notrunning()) {
++				/*
++				* We now know prev is the only thing that is
++				* awaiting CPU so we can bypass rechecking for
++				* the earliest deadline task and just run it
++				* again.
++				*/
++				set_rq_task(rq, prev);
++				grq_unlock_irq();
++				goto rerun_prev_unlocked;
++			} else
++				swap_sticky(rq, cpu, prev);
++		}
++		return_task(prev, deactivate);
++	}
++
++	if (unlikely(!queued_notrunning())) {
++		/*
++		 * This CPU is now truly idle as opposed to when idle is
++		 * scheduled as a high priority task in its own right.
++		 */
++		next = idle;
++		schedstat_inc(rq, sched_goidle);
++		set_cpuidle_map(cpu);
++	} else {
++		next = earliest_deadline_task(rq, cpu, idle);
++		if (likely(next->prio != PRIO_LIMIT))
++			clear_cpuidle_map(cpu);
++		else
++			set_cpuidle_map(cpu);
++	}
++
++	if (likely(prev != next)) {
++		/*
++		 * Don't stick tasks when a real time task is going to run as
++		 * they may literally get stuck.
++		 */
++		if (rt_task(next))
++			unstick_task(rq, prev);
++		set_rq_task(rq, next);
++		grq.nr_switches++;
++		prev->on_cpu = false;
++		next->on_cpu = true;
++		rq->curr = next;
++		++*switch_count;
++
++		context_switch(rq, prev, next); /* unlocks the grq */
++		/*
++		 * The context switch have flipped the stack from under us
++		 * and restored the local variables which were saved when
++		 * this task called schedule() in the past. prev == current
++		 * is still correct, but it can be moved to another cpu/rq.
++		 */
++		cpu = smp_processor_id();
++		rq = cpu_rq(cpu);
++		idle = rq->idle;
++	} else
++		grq_unlock_irq();
++
++rerun_prev_unlocked:
++	preempt_enable_no_resched();
++	if (unlikely(need_resched()))
++		goto need_resched;
++}
++EXPORT_SYMBOL(schedule);
++
++#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
++
++static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
++{
++	if (lock->owner != owner)
++		return false;
++
++	/*
++	 * Ensure we emit the owner->on_cpu, dereference _after_ checking
++	 * lock->owner still matches owner, if that fails, owner might
++	 * point to free()d memory, if it still matches, the rcu_read_lock()
++	 * ensures the memory stays valid.
++	 */
++	barrier();
++
++	return owner->on_cpu;
++}
++
++/*
++ * Look out! "owner" is an entirely speculative pointer
++ * access and not reliable.
++ */
++int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
++{
++	rcu_read_lock();
++	while (owner_running(lock, owner)) {
++		if (need_resched())
++			break;
++
++		arch_mutex_cpu_relax();
++	}
++	rcu_read_unlock();
++
++	/*
++	 * We break out the loop above on need_resched() and when the
++	 * owner changed, which is a sign for heavy contention. Return
++	 * success only when lock->owner is NULL.
++	 */
++	return lock->owner == NULL;
++}
++#endif
++
++#ifdef CONFIG_PREEMPT
++/*
++ * this is the entry point to schedule() from in-kernel preemption
++ * off of preempt_enable. Kernel preemptions off return from interrupt
++ * occur there and call schedule directly.
++ */
++asmlinkage void __sched notrace preempt_schedule(void)
++{
++	struct thread_info *ti = current_thread_info();
++
++	/*
++	 * If there is a non-zero preempt_count or interrupts are disabled,
++	 * we do not want to preempt the current task. Just return..
++	 */
++	if (likely(ti->preempt_count || irqs_disabled()))
++		return;
++
++	do {
++		add_preempt_count_notrace(PREEMPT_ACTIVE);
++		schedule();
++		sub_preempt_count_notrace(PREEMPT_ACTIVE);
++
++		/*
++		 * Check again in case we missed a preemption opportunity
++		 * between schedule and now.
++		 */
++		barrier();
++	} while (need_resched());
++}
++EXPORT_SYMBOL(preempt_schedule);
++
++/*
++ * this is the entry point to schedule() from kernel preemption
++ * off of irq context.
++ * Note, that this is called and return with irqs disabled. This will
++ * protect us against recursive calling from irq.
++ */
++asmlinkage void __sched preempt_schedule_irq(void)
++{
++	struct thread_info *ti = current_thread_info();
++
++	/* Catch callers which need to be fixed */
++	BUG_ON(ti->preempt_count || !irqs_disabled());
++
++	do {
++		add_preempt_count(PREEMPT_ACTIVE);
++		local_irq_enable();
++		schedule();
++		local_irq_disable();
++		sub_preempt_count(PREEMPT_ACTIVE);
++
++		/*
++		 * Check again in case we missed a preemption opportunity
++		 * between schedule and now.
++		 */
++		barrier();
++	} while (need_resched());
++}
++
++#endif /* CONFIG_PREEMPT */
++
++int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
++			  void *key)
++{
++	return try_to_wake_up(curr->private, mode, wake_flags);
++}
++EXPORT_SYMBOL(default_wake_function);
++
++/*
++ * The core wakeup function.  Non-exclusive wakeups (nr_exclusive == 0) just
++ * wake everything up.  If it's an exclusive wakeup (nr_exclusive == small +ve
++ * number) then we wake all the non-exclusive tasks and one exclusive task.
++ *
++ * There are circumstances in which we can try to wake a task which has already
++ * started to run but is not in state TASK_RUNNING.  try_to_wake_up() returns
++ * zero in this (rare) case, and we handle it by continuing to scan the queue.
++ */
++static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
++			int nr_exclusive, int wake_flags, void *key)
++{
++	struct list_head *tmp, *next;
++
++	list_for_each_safe(tmp, next, &q->task_list) {
++		wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
++		unsigned int flags = curr->flags;
++
++		if (curr->func(curr, mode, wake_flags, key) &&
++				(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
++			break;
++	}
++}
++
++/**
++ * __wake_up - wake up threads blocked on a waitqueue.
++ * @q: the waitqueue
++ * @mode: which threads
++ * @nr_exclusive: how many wake-one or wake-many threads to wake up
++ * @key: is directly passed to the wakeup function
++ *
++ * It may be assumed that this function implies a write memory barrier before
++ * changing the task state if and only if any tasks are woken up.
++ */
++void __wake_up(wait_queue_head_t *q, unsigned int mode,
++			int nr_exclusive, void *key)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&q->lock, flags);
++	__wake_up_common(q, mode, nr_exclusive, 0, key);
++	spin_unlock_irqrestore(&q->lock, flags);
++}
++EXPORT_SYMBOL(__wake_up);
++
++/*
++ * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
++ */
++void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
++{
++	__wake_up_common(q, mode, 1, 0, NULL);
++}
++EXPORT_SYMBOL_GPL(__wake_up_locked);
++
++void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
++{
++	__wake_up_common(q, mode, 1, 0, key);
++}
++EXPORT_SYMBOL_GPL(__wake_up_locked_key);
++
++/**
++ * __wake_up_sync_key - wake up threads blocked on a waitqueue.
++ * @q: the waitqueue
++ * @mode: which threads
++ * @nr_exclusive: how many wake-one or wake-many threads to wake up
++ * @key: opaque value to be passed to wakeup targets
++ *
++ * The sync wakeup differs that the waker knows that it will schedule
++ * away soon, so while the target thread will be woken up, it will not
++ * be migrated to another CPU - ie. the two threads are 'synchronised'
++ * with each other. This can prevent needless bouncing between CPUs.
++ *
++ * On UP it can prevent extra preemption.
++ *
++ * It may be assumed that this function implies a write memory barrier before
++ * changing the task state if and only if any tasks are woken up.
++ */
++void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
++			int nr_exclusive, void *key)
++{
++	unsigned long flags;
++	int wake_flags = WF_SYNC;
++
++	if (unlikely(!q))
++		return;
++
++	if (unlikely(!nr_exclusive))
++		wake_flags = 0;
++
++	spin_lock_irqsave(&q->lock, flags);
++	__wake_up_common(q, mode, nr_exclusive, wake_flags, key);
++	spin_unlock_irqrestore(&q->lock, flags);
++}
++EXPORT_SYMBOL_GPL(__wake_up_sync_key);
++
++/**
++ * __wake_up_sync - wake up threads blocked on a waitqueue.
++ * @q: the waitqueue
++ * @mode: which threads
++ * @nr_exclusive: how many wake-one or wake-many threads to wake up
++ *
++ * The sync wakeup differs that the waker knows that it will schedule
++ * away soon, so while the target thread will be woken up, it will not
++ * be migrated to another CPU - ie. the two threads are 'synchronised'
++ * with each other. This can prevent needless bouncing between CPUs.
++ *
++ * On UP it can prevent extra preemption.
++ */
++void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
++{
++	unsigned long flags;
++	int sync = 1;
++
++	if (unlikely(!q))
++		return;
++
++	if (unlikely(!nr_exclusive))
++		sync = 0;
++
++	spin_lock_irqsave(&q->lock, flags);
++	__wake_up_common(q, mode, nr_exclusive, sync, NULL);
++	spin_unlock_irqrestore(&q->lock, flags);
++}
++EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
++
++/**
++ * complete: - signals a single thread waiting on this completion
++ * @x:  holds the state of this particular completion
++ *
++ * This will wake up a single thread waiting on this completion. Threads will be
++ * awakened in the same order in which they were queued.
++ *
++ * See also complete_all(), wait_for_completion() and related routines.
++ *
++ * It may be assumed that this function implies a write memory barrier before
++ * changing the task state if and only if any tasks are woken up.
++ */
++void complete(struct completion *x)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&x->wait.lock, flags);
++	x->done++;
++	__wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
++	spin_unlock_irqrestore(&x->wait.lock, flags);
++}
++EXPORT_SYMBOL(complete);
++
++/**
++ * complete_all: - signals all threads waiting on this completion
++ * @x:  holds the state of this particular completion
++ *
++ * This will wake up all threads waiting on this particular completion event.
++ *
++ * It may be assumed that this function implies a write memory barrier before
++ * changing the task state if and only if any tasks are woken up.
++ */
++void complete_all(struct completion *x)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&x->wait.lock, flags);
++	x->done += UINT_MAX/2;
++	__wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
++	spin_unlock_irqrestore(&x->wait.lock, flags);
++}
++EXPORT_SYMBOL(complete_all);
++
++static inline long __sched
++do_wait_for_common(struct completion *x, long timeout, int state)
++{
++	if (!x->done) {
++		DECLARE_WAITQUEUE(wait, current);
++
++		__add_wait_queue_tail_exclusive(&x->wait, &wait);
++		do {
++			if (signal_pending_state(state, current)) {
++				timeout = -ERESTARTSYS;
++				break;
++			}
++			__set_current_state(state);
++			spin_unlock_irq(&x->wait.lock);
++			timeout = schedule_timeout(timeout);
++			spin_lock_irq(&x->wait.lock);
++		} while (!x->done && timeout);
++		__remove_wait_queue(&x->wait, &wait);
++		if (!x->done)
++			return timeout;
++	}
++	x->done--;
++	return timeout ?: 1;
++}
++
++static long __sched
++wait_for_common(struct completion *x, long timeout, int state)
++{
++	might_sleep();
++
++	spin_lock_irq(&x->wait.lock);
++	timeout = do_wait_for_common(x, timeout, state);
++	spin_unlock_irq(&x->wait.lock);
++	return timeout;
++}
++
++/**
++ * wait_for_completion: - waits for completion of a task
++ * @x:  holds the state of this particular completion
++ *
++ * This waits to be signaled for completion of a specific task. It is NOT
++ * interruptible and there is no timeout.
++ *
++ * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
++ * and interrupt capability. Also see complete().
++ */
++void __sched wait_for_completion(struct completion *x)
++{
++	wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
++}
++EXPORT_SYMBOL(wait_for_completion);
++
++/**
++ * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
++ * @x:  holds the state of this particular completion
++ * @timeout:  timeout value in jiffies
++ *
++ * This waits for either a completion of a specific task to be signaled or for a
++ * specified timeout to expire. The timeout is in jiffies. It is not
++ * interruptible.
++ *
++ * The return value is 0 if timed out, and positive (at least 1, or number of
++ * jiffies left till timeout) if completed.
++ */
++unsigned long __sched
++wait_for_completion_timeout(struct completion *x, unsigned long timeout)
++{
++	return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
++}
++EXPORT_SYMBOL(wait_for_completion_timeout);
++
++/**
++ * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
++ * @x:  holds the state of this particular completion
++ *
++ * This waits for completion of a specific task to be signaled. It is
++ * interruptible.
++ *
++ * The return value is -ERESTARTSYS if interrupted, 0 if completed.
++ */
++int __sched wait_for_completion_interruptible(struct completion *x)
++{
++	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
++	if (t == -ERESTARTSYS)
++		return t;
++	return 0;
++}
++EXPORT_SYMBOL(wait_for_completion_interruptible);
++
++/**
++ * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
++ * @x:  holds the state of this particular completion
++ * @timeout:  timeout value in jiffies
++ *
++ * This waits for either a completion of a specific task to be signaled or for a
++ * specified timeout to expire. It is interruptible. The timeout is in jiffies.
++ *
++ * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
++ * positive (at least 1, or number of jiffies left till timeout) if completed.
++ */
++long __sched
++wait_for_completion_interruptible_timeout(struct completion *x,
++					  unsigned long timeout)
++{
++	return wait_for_common(x, timeout, TASK_INTERRUPTIBLE);
++}
++EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
++
++/**
++ * wait_for_completion_killable: - waits for completion of a task (killable)
++ * @x:  holds the state of this particular completion
++ *
++ * This waits to be signaled for completion of a specific task. It can be
++ * interrupted by a kill signal.
++ *
++ * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
++ * positive (at least 1, or number of jiffies left till timeout) if completed.
++ */
++int __sched wait_for_completion_killable(struct completion *x)
++{
++	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
++	if (t == -ERESTARTSYS)
++		return t;
++	return 0;
++}
++EXPORT_SYMBOL(wait_for_completion_killable);
++
++/**
++ * wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
++ * @x:  holds the state of this particular completion
++ * @timeout:  timeout value in jiffies
++ *
++ * This waits for either a completion of a specific task to be
++ * signaled or for a specified timeout to expire. It can be
++ * interrupted by a kill signal. The timeout is in jiffies.
++ */
++long __sched
++wait_for_completion_killable_timeout(struct completion *x,
++				     unsigned long timeout)
++{
++	return wait_for_common(x, timeout, TASK_KILLABLE);
++}
++EXPORT_SYMBOL(wait_for_completion_killable_timeout);
++
++/**
++ *	try_wait_for_completion - try to decrement a completion without blocking
++ *	@x:	completion structure
++ *
++ *	Returns: 0 if a decrement cannot be done without blocking
++ *		 1 if a decrement succeeded.
++ *
++ *	If a completion is being used as a counting completion,
++ *	attempt to decrement the counter without blocking. This
++ *	enables us to avoid waiting if the resource the completion
++ *	is protecting is not available.
++ */
++bool try_wait_for_completion(struct completion *x)
++{
++	unsigned long flags;
++	int ret = 1;
++
++	spin_lock_irqsave(&x->wait.lock, flags);
++	if (!x->done)
++		ret = 0;
++	else
++		x->done--;
++	spin_unlock_irqrestore(&x->wait.lock, flags);
++	return ret;
++}
++EXPORT_SYMBOL(try_wait_for_completion);
++
++/**
++ *	completion_done - Test to see if a completion has any waiters
++ *	@x:	completion structure
++ *
++ *	Returns: 0 if there are waiters (wait_for_completion() in progress)
++ *		 1 if there are no waiters.
++ *
++ */
++bool completion_done(struct completion *x)
++{
++	unsigned long flags;
++	int ret = 1;
++
++	spin_lock_irqsave(&x->wait.lock, flags);
++	if (!x->done)
++		ret = 0;
++	spin_unlock_irqrestore(&x->wait.lock, flags);
++	return ret;
++}
++EXPORT_SYMBOL(completion_done);
++
++static long __sched
++sleep_on_common(wait_queue_head_t *q, int state, long timeout)
++{
++	unsigned long flags;
++	wait_queue_t wait;
++
++	init_waitqueue_entry(&wait, current);
++
++	__set_current_state(state);
++
++	spin_lock_irqsave(&q->lock, flags);
++	__add_wait_queue(q, &wait);
++	spin_unlock(&q->lock);
++	timeout = schedule_timeout(timeout);
++	spin_lock_irq(&q->lock);
++	__remove_wait_queue(q, &wait);
++	spin_unlock_irqrestore(&q->lock, flags);
++
++	return timeout;
++}
++
++void __sched interruptible_sleep_on(wait_queue_head_t *q)
++{
++	sleep_on_common(q, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
++}
++EXPORT_SYMBOL(interruptible_sleep_on);
++
++long __sched
++interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
++{
++	return sleep_on_common(q, TASK_INTERRUPTIBLE, timeout);
++}
++EXPORT_SYMBOL(interruptible_sleep_on_timeout);
++
++void __sched sleep_on(wait_queue_head_t *q)
++{
++	sleep_on_common(q, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
++}
++EXPORT_SYMBOL(sleep_on);
++
++long __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
++{
++	return sleep_on_common(q, TASK_UNINTERRUPTIBLE, timeout);
++}
++EXPORT_SYMBOL(sleep_on_timeout);
++
++#ifdef CONFIG_RT_MUTEXES
++
++/*
++ * rt_mutex_setprio - set the current priority of a task
++ * @p: task
++ * @prio: prio value (kernel-internal form)
++ *
++ * This function changes the 'effective' priority of a task. It does
++ * not touch ->normal_prio like __setscheduler().
++ *
++ * Used by the rt_mutex code to implement priority inheritance logic.
++ */
++void rt_mutex_setprio(struct task_struct *p, int prio)
++{
++	unsigned long flags;
++	int queued, oldprio;
++	struct rq *rq;
++
++	BUG_ON(prio < 0 || prio > MAX_PRIO);
++
++	rq = task_grq_lock(p, &flags);
++
++	trace_sched_pi_setprio(p, prio);
++	oldprio = p->prio;
++	queued = task_queued(p);
++	if (queued)
++		dequeue_task(p);
++	p->prio = prio;
++	if (task_running(p) && prio > oldprio)
++		resched_task(p);
++	if (queued) {
++		enqueue_task(p);
++		try_preempt(p, rq);
++	}
++
++	task_grq_unlock(&flags);
++}
++
++#endif
++
++/*
++ * Adjust the deadline for when the priority is to change, before it's
++ * changed.
++ */
++static inline void adjust_deadline(struct task_struct *p, int new_prio)
++{
++	p->deadline += static_deadline_diff(new_prio) - task_deadline_diff(p);
++}
++
++void set_user_nice(struct task_struct *p, long nice)
++{
++	int queued, new_static, old_static;
++	unsigned long flags;
++	struct rq *rq;
++
++	if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
++		return;
++	new_static = NICE_TO_PRIO(nice);
++	/*
++	 * We have to be careful, if called from sys_setpriority(),
++	 * the task might be in the middle of scheduling on another CPU.
++	 */
++	rq = time_task_grq_lock(p, &flags);
++	/*
++	 * The RT priorities are set via sched_setscheduler(), but we still
++	 * allow the 'normal' nice value to be set - but as expected
++	 * it wont have any effect on scheduling until the task is
++	 * not SCHED_NORMAL/SCHED_BATCH:
++	 */
++	if (has_rt_policy(p)) {
++		p->static_prio = new_static;
++		goto out_unlock;
++	}
++	queued = task_queued(p);
++	if (queued)
++		dequeue_task(p);
++
++	adjust_deadline(p, new_static);
++	old_static = p->static_prio;
++	p->static_prio = new_static;
++	p->prio = effective_prio(p);
++
++	if (queued) {
++		enqueue_task(p);
++		if (new_static < old_static)
++			try_preempt(p, rq);
++	} else if (task_running(p)) {
++		reset_rq_task(rq, p);
++		if (old_static < new_static)
++			resched_task(p);
++	}
++out_unlock:
++	task_grq_unlock(&flags);
++}
++EXPORT_SYMBOL(set_user_nice);
++
++/*
++ * can_nice - check if a task can reduce its nice value
++ * @p: task
++ * @nice: nice value
++ */
++int can_nice(const struct task_struct *p, const int nice)
++{
++	/* convert nice value [19,-20] to rlimit style value [1,40] */
++	int nice_rlim = 20 - nice;
++
++	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
++		capable(CAP_SYS_NICE));
++}
++
++#ifdef __ARCH_WANT_SYS_NICE
++
++/*
++ * sys_nice - change the priority of the current process.
++ * @increment: priority increment
++ *
++ * sys_setpriority is a more generic, but much slower function that
++ * does similar things.
++ */
++SYSCALL_DEFINE1(nice, int, increment)
++{
++	long nice, retval;
++
++	/*
++	 * Setpriority might change our priority at the same moment.
++	 * We don't have to worry. Conceptually one call occurs first
++	 * and we have a single winner.
++	 */
++	if (increment < -40)
++		increment = -40;
++	if (increment > 40)
++		increment = 40;
++
++	nice = TASK_NICE(current) + increment;
++	if (nice < -20)
++		nice = -20;
++	if (nice > 19)
++		nice = 19;
++
++	if (increment < 0 && !can_nice(current, nice))
++		return -EPERM;
++
++	retval = security_task_setnice(current, nice);
++	if (retval)
++		return retval;
++
++	set_user_nice(current, nice);
++	return 0;
++}
++
++#endif
++
++/**
++ * task_prio - return the priority value of a given task.
++ * @p: the task in question.
++ *
++ * This is the priority value as seen by users in /proc.
++ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes
++ * from 0 (SCHED_ISO) up to 82 (nice +19 SCHED_IDLEPRIO).
++ */
++int task_prio(const struct task_struct *p)
++{
++	int delta, prio = p->prio - MAX_RT_PRIO;
++
++	/* rt tasks and iso tasks */
++	if (prio <= 0)
++		goto out;
++
++	/* Convert to ms to avoid overflows */
++	delta = NS_TO_MS(p->deadline - grq.niffies);
++	delta = delta * 40 / ms_longest_deadline_diff();
++	if (delta > 0 && delta <= 80)
++		prio += delta;
++	if (idleprio_task(p))
++		prio += 40;
++out:
++	return prio;
++}
++
++/**
++ * task_nice - return the nice value of a given task.
++ * @p: the task in question.
++ */
++int task_nice(const struct task_struct *p)
++{
++	return TASK_NICE(p);
++}
++EXPORT_SYMBOL_GPL(task_nice);
++
++/**
++ * idle_cpu - is a given cpu idle currently?
++ * @cpu: the processor in question.
++ */
++int idle_cpu(int cpu)
++{
++	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
++}
++
++/**
++ * idle_task - return the idle task for a given cpu.
++ * @cpu: the processor in question.
++ */
++struct task_struct *idle_task(int cpu)
++{
++	return cpu_rq(cpu)->idle;
++}
++
++/**
++ * find_process_by_pid - find a process with a matching PID value.
++ * @pid: the pid in question.
++ */
++static inline struct task_struct *find_process_by_pid(pid_t pid)
++{
++	return pid ? find_task_by_vpid(pid) : current;
++}
++
++/* Actually do priority change: must hold grq lock. */
++static void
++__setscheduler(struct task_struct *p, struct rq *rq, int policy, int prio)
++{
++	int oldrtprio, oldprio;
++
++	p->policy = policy;
++	oldrtprio = p->rt_priority;
++	p->rt_priority = prio;
++	p->normal_prio = normal_prio(p);
++	oldprio = p->prio;
++	/* we are holding p->pi_lock already */
++	p->prio = rt_mutex_getprio(p);
++	if (task_running(p)) {
++		reset_rq_task(rq, p);
++		/* Resched only if we might now be preempted */
++		if (p->prio > oldprio || p->rt_priority > oldrtprio)
++			resched_task(p);
++	}
++}
++
++/*
++ * check the target process has a UID that matches the current process's
++ */
++static bool check_same_owner(struct task_struct *p)
++{
++	const struct cred *cred = current_cred(), *pcred;
++	bool match;
++
++	rcu_read_lock();
++	pcred = __task_cred(p);
++	if (cred->user->user_ns == pcred->user->user_ns)
++		match = (cred->euid == pcred->euid ||
++			 cred->euid == pcred->uid);
++	else
++		match = false;
++	rcu_read_unlock();
++	return match;
++}
++
++static int __sched_setscheduler(struct task_struct *p, int policy,
++				const struct sched_param *param, bool user)
++{
++	struct sched_param zero_param = { .sched_priority = 0 };
++	int queued, retval, oldpolicy = -1;
++	unsigned long flags, rlim_rtprio = 0;
++	int reset_on_fork;
++	struct rq *rq;
++
++	/* may grab non-irq protected spin_locks */
++	BUG_ON(in_interrupt());
++
++	if (is_rt_policy(policy) && !capable(CAP_SYS_NICE)) {
++		unsigned long lflags;
++
++		if (!lock_task_sighand(p, &lflags))
++			return -ESRCH;
++		rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
++		unlock_task_sighand(p, &lflags);
++		if (rlim_rtprio)
++			goto recheck;
++		/*
++		 * If the caller requested an RT policy without having the
++		 * necessary rights, we downgrade the policy to SCHED_ISO.
++		 * We also set the parameter to zero to pass the checks.
++		 */
++		policy = SCHED_ISO;
++		param = &zero_param;
++	}
++recheck:
++	/* double check policy once rq lock held */
++	if (policy < 0) {
++		reset_on_fork = p->sched_reset_on_fork;
++		policy = oldpolicy = p->policy;
++	} else {
++		reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
++		policy &= ~SCHED_RESET_ON_FORK;
++
++		if (!SCHED_RANGE(policy))
++			return -EINVAL;
++	}
++
++	/*
++	 * Valid priorities for SCHED_FIFO and SCHED_RR are
++	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
++	 * SCHED_BATCH is 0.
++	 */
++	if (param->sched_priority < 0 ||
++	    (p->mm && param->sched_priority > MAX_USER_RT_PRIO - 1) ||
++	    (!p->mm && param->sched_priority > MAX_RT_PRIO - 1))
++		return -EINVAL;
++	if (is_rt_policy(policy) != (param->sched_priority != 0))
++		return -EINVAL;
++
++	/*
++	 * Allow unprivileged RT tasks to decrease priority:
++	 */
++	if (user && !capable(CAP_SYS_NICE)) {
++		if (is_rt_policy(policy)) {
++			unsigned long rlim_rtprio =
++					task_rlimit(p, RLIMIT_RTPRIO);
++
++			/* can't set/change the rt policy */
++			if (policy != p->policy && !rlim_rtprio)
++				return -EPERM;
++
++			/* can't increase priority */
++			if (param->sched_priority > p->rt_priority &&
++			    param->sched_priority > rlim_rtprio)
++				return -EPERM;
++		} else {
++			switch (p->policy) {
++				/*
++				 * Can only downgrade policies but not back to
++				 * SCHED_NORMAL
++				 */
++				case SCHED_ISO:
++					if (policy == SCHED_ISO)
++						goto out;
++					if (policy == SCHED_NORMAL)
++						return -EPERM;
++					break;
++				case SCHED_BATCH:
++					if (policy == SCHED_BATCH)
++						goto out;
++					if (policy != SCHED_IDLEPRIO)
++						return -EPERM;
++					break;
++				case SCHED_IDLEPRIO:
++					if (policy == SCHED_IDLEPRIO)
++						goto out;
++					return -EPERM;
++				default:
++					break;
++			}
++		}
++
++		/* can't change other user's priorities */
++		if (!check_same_owner(p))
++			return -EPERM;
++
++		/* Normal users shall not reset the sched_reset_on_fork flag */
++		if (p->sched_reset_on_fork && !reset_on_fork)
++			return -EPERM;
++	}
++
++	if (user) {
++		retval = security_task_setscheduler(p);
++		if (retval)
++			return retval;
++	}
++
++	/*
++	 * make sure no PI-waiters arrive (or leave) while we are
++	 * changing the priority of the task:
++	 */
++	raw_spin_lock_irqsave(&p->pi_lock, flags);
++	/*
++	 * To be able to change p->policy safely, the grunqueue lock must be
++	 * held.
++	 */
++	rq = __task_grq_lock(p);
++
++	/*
++	 * Changing the policy of the stop threads its a very bad idea
++	 */
++	if (p == rq->stop) {
++		__task_grq_unlock();
++		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++		return -EINVAL;
++	}
++
++	/*
++	 * If not changing anything there's no need to proceed further:
++	 */
++	if (unlikely(policy == p->policy && (!is_rt_policy(policy) ||
++			param->sched_priority == p->rt_priority))) {
++
++		__task_grq_unlock();
++		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++		return 0;
++	}
++
++	/* recheck policy now with rq lock held */
++	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
++		policy = oldpolicy = -1;
++		__task_grq_unlock();
++		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++		goto recheck;
++	}
++	update_clocks(rq);
++	p->sched_reset_on_fork = reset_on_fork;
++
++	queued = task_queued(p);
++	if (queued)
++		dequeue_task(p);
++	__setscheduler(p, rq, policy, param->sched_priority);
++	if (queued) {
++		enqueue_task(p);
++		try_preempt(p, rq);
++	}
++	__task_grq_unlock();
++	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++
++	rt_mutex_adjust_pi(p);
++out:
++	return 0;
++}
++
++/**
++ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
++ * @p: the task in question.
++ * @policy: new policy.
++ * @param: structure containing the new RT priority.
++ *
++ * NOTE that the task may be already dead.
++ */
++int sched_setscheduler(struct task_struct *p, int policy,
++		       const struct sched_param *param)
++{
++	return __sched_setscheduler(p, policy, param, true);
++}
++
++EXPORT_SYMBOL_GPL(sched_setscheduler);
++
++/**
++ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
++ * @p: the task in question.
++ * @policy: new policy.
++ * @param: structure containing the new RT priority.
++ *
++ * Just like sched_setscheduler, only don't bother checking if the
++ * current context has permission.  For example, this is needed in
++ * stop_machine(): we create temporary high priority worker threads,
++ * but our caller might not have that capability.
++ */
++int sched_setscheduler_nocheck(struct task_struct *p, int policy,
++			       const struct sched_param *param)
++{
++	return __sched_setscheduler(p, policy, param, false);
++}
++
++static int
++do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
++{
++	struct sched_param lparam;
++	struct task_struct *p;
++	int retval;
++
++	if (!param || pid < 0)
++		return -EINVAL;
++	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
++		return -EFAULT;
++
++	rcu_read_lock();
++	retval = -ESRCH;
++	p = find_process_by_pid(pid);
++	if (p != NULL)
++		retval = sched_setscheduler(p, policy, &lparam);
++	rcu_read_unlock();
++
++	return retval;
++}
++
++/**
++ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
++ * @pid: the pid in question.
++ * @policy: new policy.
++ * @param: structure containing the new RT priority.
++ */
++asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
++				       struct sched_param __user *param)
++{
++	/* negative values for policy are not valid */
++	if (policy < 0)
++		return -EINVAL;
++
++	return do_sched_setscheduler(pid, policy, param);
++}
++
++/**
++ * sys_sched_setparam - set/change the RT priority of a thread
++ * @pid: the pid in question.
++ * @param: structure containing the new RT priority.
++ */
++SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
++{
++	return do_sched_setscheduler(pid, -1, param);
++}
++
++/**
++ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
++ * @pid: the pid in question.
++ */
++SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
++{
++	struct task_struct *p;
++	int retval = -EINVAL;
++
++	if (pid < 0)
++		goto out_nounlock;
++
++	retval = -ESRCH;
++	rcu_read_lock();
++	p = find_process_by_pid(pid);
++	if (p) {
++		retval = security_task_getscheduler(p);
++		if (!retval)
++			retval = p->policy;
++	}
++	rcu_read_unlock();
++
++out_nounlock:
++	return retval;
++}
++
++/**
++ * sys_sched_getscheduler - get the RT priority of a thread
++ * @pid: the pid in question.
++ * @param: structure containing the RT priority.
++ */
++SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
++{
++	struct sched_param lp;
++	struct task_struct *p;
++	int retval = -EINVAL;
++
++	if (!param || pid < 0)
++		goto out_nounlock;
++
++	rcu_read_lock();
++	p = find_process_by_pid(pid);
++	retval = -ESRCH;
++	if (!p)
++		goto out_unlock;
++
++	retval = security_task_getscheduler(p);
++	if (retval)
++		goto out_unlock;
++
++	lp.sched_priority = p->rt_priority;
++	rcu_read_unlock();
++
++	/*
++	 * This one might sleep, we cannot do it with a spinlock held ...
++	 */
++	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
++
++out_nounlock:
++	return retval;
++
++out_unlock:
++	rcu_read_unlock();
++	return retval;
++}
++
++long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
++{
++	cpumask_var_t cpus_allowed, new_mask;
++	struct task_struct *p;
++	int retval;
++
++	get_online_cpus();
++	rcu_read_lock();
++
++	p = find_process_by_pid(pid);
++	if (!p) {
++		rcu_read_unlock();
++		put_online_cpus();
++		return -ESRCH;
++	}
++
++	/* Prevent p going away */
++	get_task_struct(p);
++	rcu_read_unlock();
++
++	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
++		retval = -ENOMEM;
++		goto out_put_task;
++	}
++	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
++		retval = -ENOMEM;
++		goto out_free_cpus_allowed;
++	}
++	retval = -EPERM;
++	if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE))
++		goto out_unlock;
++
++	retval = security_task_setscheduler(p);
++	if (retval)
++		goto out_unlock;
++
++	cpuset_cpus_allowed(p, cpus_allowed);
++	cpumask_and(new_mask, in_mask, cpus_allowed);
++again:
++	retval = set_cpus_allowed_ptr(p, new_mask);
++
++	if (!retval) {
++		cpuset_cpus_allowed(p, cpus_allowed);
++		if (!cpumask_subset(new_mask, cpus_allowed)) {
++			/*
++			 * We must have raced with a concurrent cpuset
++			 * update. Just reset the cpus_allowed to the
++			 * cpuset's cpus_allowed
++			 */
++			cpumask_copy(new_mask, cpus_allowed);
++			goto again;
++		}
++	}
++out_unlock:
++	free_cpumask_var(new_mask);
++out_free_cpus_allowed:
++	free_cpumask_var(cpus_allowed);
++out_put_task:
++	put_task_struct(p);
++	put_online_cpus();
++	return retval;
++}
++
++static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
++			     cpumask_t *new_mask)
++{
++	if (len < sizeof(cpumask_t)) {
++		memset(new_mask, 0, sizeof(cpumask_t));
++	} else if (len > sizeof(cpumask_t)) {
++		len = sizeof(cpumask_t);
++	}
++	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
++}
++
++
++/**
++ * sys_sched_setaffinity - set the cpu affinity of a process
++ * @pid: pid of the process
++ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
++ * @user_mask_ptr: user-space pointer to the new cpu mask
++ */
++SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
++		unsigned long __user *, user_mask_ptr)
++{
++	cpumask_var_t new_mask;
++	int retval;
++
++	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
++		return -ENOMEM;
++
++	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
++	if (retval == 0)
++		retval = sched_setaffinity(pid, new_mask);
++	free_cpumask_var(new_mask);
++	return retval;
++}
++
++long sched_getaffinity(pid_t pid, cpumask_t *mask)
++{
++	struct task_struct *p;
++	unsigned long flags;
++	int retval;
++
++	get_online_cpus();
++	rcu_read_lock();
++
++	retval = -ESRCH;
++	p = find_process_by_pid(pid);
++	if (!p)
++		goto out_unlock;
++
++	retval = security_task_getscheduler(p);
++	if (retval)
++		goto out_unlock;
++
++	grq_lock_irqsave(&flags);
++	cpumask_and(mask, tsk_cpus_allowed(p), cpu_online_mask);
++	grq_unlock_irqrestore(&flags);
++
++out_unlock:
++	rcu_read_unlock();
++	put_online_cpus();
++
++	return retval;
++}
++
++/**
++ * sys_sched_getaffinity - get the cpu affinity of a process
++ * @pid: pid of the process
++ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
++ * @user_mask_ptr: user-space pointer to hold the current cpu mask
++ */
++SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
++		unsigned long __user *, user_mask_ptr)
++{
++	int ret;
++	cpumask_var_t mask;
++
++	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
++		return -EINVAL;
++	if (len & (sizeof(unsigned long)-1))
++		return -EINVAL;
++
++	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
++		return -ENOMEM;
++
++	ret = sched_getaffinity(pid, mask);
++	if (ret == 0) {
++		size_t retlen = min_t(size_t, len, cpumask_size());
++
++		if (copy_to_user(user_mask_ptr, mask, retlen))
++			ret = -EFAULT;
++		else
++			ret = retlen;
++	}
++	free_cpumask_var(mask);
++
++	return ret;
++}
++
++/**
++ * sys_sched_yield - yield the current processor to other threads.
++ *
++ * This function yields the current CPU to other tasks. It does this by
++ * scheduling away the current task. If it still has the earliest deadline
++ * it will be scheduled again as the next task.
++ */
++SYSCALL_DEFINE0(sched_yield)
++{
++	struct task_struct *p;
++
++	p = current;
++	grq_lock_irq();
++	schedstat_inc(task_rq(p), yld_count);
++	requeue_task(p);
++
++	/*
++	 * Since we are going to call schedule() anyway, there's
++	 * no need to preempt or enable interrupts:
++	 */
++	__release(grq.lock);
++	spin_release(&grq.lock.dep_map, 1, _THIS_IP_);
++	do_raw_spin_unlock(&grq.lock);
++	preempt_enable_no_resched();
++
++	schedule();
++
++	return 0;
++}
++
++static inline bool should_resched(void)
++{
++	return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
++}
++
++static void __cond_resched(void)
++{
++	/* NOT a real fix but will make voluntary preempt work. 馬鹿な事 */
++	if (unlikely(system_state != SYSTEM_RUNNING))
++		return;
++
++	add_preempt_count(PREEMPT_ACTIVE);
++	schedule();
++	sub_preempt_count(PREEMPT_ACTIVE);
++}
++
++int __sched _cond_resched(void)
++{
++	if (should_resched()) {
++		__cond_resched();
++		return 1;
++	}
++	return 0;
++}
++EXPORT_SYMBOL(_cond_resched);
++
++/*
++ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
++ * call schedule, and on return reacquire the lock.
++ *
++ * This works OK both with and without CONFIG_PREEMPT.  We do strange low-level
++ * operations here to prevent schedule() from being called twice (once via
++ * spin_unlock(), once by hand).
++ */
++int __cond_resched_lock(spinlock_t *lock)
++{
++	int resched = should_resched();
++	int ret = 0;
++
++	lockdep_assert_held(lock);
++
++	if (spin_needbreak(lock) || resched) {
++		spin_unlock(lock);
++		if (resched)
++			__cond_resched();
++		else
++			cpu_relax();
++		ret = 1;
++		spin_lock(lock);
++	}
++	return ret;
++}
++EXPORT_SYMBOL(__cond_resched_lock);
++
++int __sched __cond_resched_softirq(void)
++{
++	BUG_ON(!in_softirq());
++
++	if (should_resched()) {
++		local_bh_enable();
++		__cond_resched();
++		local_bh_disable();
++		return 1;
++	}
++	return 0;
++}
++EXPORT_SYMBOL(__cond_resched_softirq);
++
++/**
++ * yield - yield the current processor to other threads.
++ *
++ * This is a shortcut for kernel-space yielding - it marks the
++ * thread runnable and calls sys_sched_yield().
++ */
++void __sched yield(void)
++{
++	set_current_state(TASK_RUNNING);
++	sys_sched_yield();
++}
++EXPORT_SYMBOL(yield);
++
++/**
++ * yield_to - yield the current processor to another thread in
++ * your thread group, or accelerate that thread toward the
++ * processor it's on.
++ * @p: target task
++ * @preempt: whether task preemption is allowed or not
++ *
++ * It's the caller's job to ensure that the target task struct
++ * can't go away on us before we can do any checks.
++ *
++ * Returns true if we indeed boosted the target task.
++ */
++bool __sched yield_to(struct task_struct *p, bool preempt)
++{
++	unsigned long flags;
++	bool yielded = 0;
++	struct rq *rq;
++
++	rq = this_rq();
++	grq_lock_irqsave(&flags);
++	if (task_running(p) || p->state)
++		goto out_unlock;
++	yielded = 1;
++	if (p->deadline > rq->rq_deadline)
++		p->deadline = rq->rq_deadline;
++	p->time_slice += rq->rq_time_slice;
++	rq->rq_time_slice = 0;
++	if (p->time_slice > timeslice())
++		p->time_slice = timeslice();
++	set_tsk_need_resched(rq->curr);
++out_unlock:
++	grq_unlock_irqrestore(&flags);
++
++	if (yielded)
++		schedule();
++	return yielded;
++}
++EXPORT_SYMBOL_GPL(yield_to);
++
++/*
++ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
++ * that process accounting knows that this is a task in IO wait state.
++ *
++ * But don't do that if it is a deliberate, throttling IO wait (this task
++ * has set its backing_dev_info: the queue against which it should throttle)
++ */
++void __sched io_schedule(void)
++{
++	struct rq *rq = raw_rq();
++
++	delayacct_blkio_start();
++	atomic_inc(&rq->nr_iowait);
++	blk_flush_plug(current);
++	current->in_iowait = 1;
++	schedule();
++	current->in_iowait = 0;
++	atomic_dec(&rq->nr_iowait);
++	delayacct_blkio_end();
++}
++EXPORT_SYMBOL(io_schedule);
++
++long __sched io_schedule_timeout(long timeout)
++{
++	struct rq *rq = raw_rq();
++	long ret;
++
++	delayacct_blkio_start();
++	atomic_inc(&rq->nr_iowait);
++	blk_flush_plug(current);
++	current->in_iowait = 1;
++	ret = schedule_timeout(timeout);
++	current->in_iowait = 0;
++	atomic_dec(&rq->nr_iowait);
++	delayacct_blkio_end();
++	return ret;
++}
++
++/**
++ * sys_sched_get_priority_max - return maximum RT priority.
++ * @policy: scheduling class.
++ *
++ * this syscall returns the maximum rt_priority that can be used
++ * by a given scheduling class.
++ */
++SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
++{
++	int ret = -EINVAL;
++
++	switch (policy) {
++	case SCHED_FIFO:
++	case SCHED_RR:
++		ret = MAX_USER_RT_PRIO-1;
++		break;
++	case SCHED_NORMAL:
++	case SCHED_BATCH:
++	case SCHED_ISO:
++	case SCHED_IDLEPRIO:
++		ret = 0;
++		break;
++	}
++	return ret;
++}
++
++/**
++ * sys_sched_get_priority_min - return minimum RT priority.
++ * @policy: scheduling class.
++ *
++ * this syscall returns the minimum rt_priority that can be used
++ * by a given scheduling class.
++ */
++SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
++{
++	int ret = -EINVAL;
++
++	switch (policy) {
++	case SCHED_FIFO:
++	case SCHED_RR:
++		ret = 1;
++		break;
++	case SCHED_NORMAL:
++	case SCHED_BATCH:
++	case SCHED_ISO:
++	case SCHED_IDLEPRIO:
++		ret = 0;
++		break;
++	}
++	return ret;
++}
++
++/**
++ * sys_sched_rr_get_interval - return the default timeslice of a process.
++ * @pid: pid of the process.
++ * @interval: userspace pointer to the timeslice value.
++ *
++ * this syscall writes the default timeslice value of a given process
++ * into the user-space timespec buffer. A value of '0' means infinity.
++ */
++SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
++		struct timespec __user *, interval)
++{
++	struct task_struct *p;
++	unsigned int time_slice;
++	unsigned long flags;
++	int retval;
++	struct timespec t;
++
++	if (pid < 0)
++		return -EINVAL;
++
++	retval = -ESRCH;
++	rcu_read_lock();
++	p = find_process_by_pid(pid);
++	if (!p)
++		goto out_unlock;
++
++	retval = security_task_getscheduler(p);
++	if (retval)
++		goto out_unlock;
++
++	grq_lock_irqsave(&flags);
++	time_slice = p->policy == SCHED_FIFO ? 0 : MS_TO_NS(task_timeslice(p));
++	grq_unlock_irqrestore(&flags);
++
++	rcu_read_unlock();
++	t = ns_to_timespec(time_slice);
++	retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
++	return retval;
++
++out_unlock:
++	rcu_read_unlock();
++	return retval;
++}
++
++static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
++
++void sched_show_task(struct task_struct *p)
++{
++	unsigned long free = 0;
++	unsigned state;
++
++	state = p->state ? __ffs(p->state) + 1 : 0;
++	printk(KERN_INFO "%-15.15s %c", p->comm,
++		state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
++#if BITS_PER_LONG == 32
++	if (state == TASK_RUNNING)
++		printk(KERN_CONT " running  ");
++	else
++		printk(KERN_CONT " %08lx ", thread_saved_pc(p));
++#else
++	if (state == TASK_RUNNING)
++		printk(KERN_CONT "  running task    ");
++	else
++		printk(KERN_CONT " %016lx ", thread_saved_pc(p));
++#endif
++#ifdef CONFIG_DEBUG_STACK_USAGE
++	free = stack_not_used(p);
++#endif
++	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
++		task_pid_nr(p), task_pid_nr(p->real_parent),
++		(unsigned long)task_thread_info(p)->flags);
++
++	show_stack(p, NULL);
++}
++
++void show_state_filter(unsigned long state_filter)
++{
++	struct task_struct *g, *p;
++
++#if BITS_PER_LONG == 32
++	printk(KERN_INFO
++		"  task                PC stack   pid father\n");
++#else
++	printk(KERN_INFO
++		"  task                        PC stack   pid father\n");
++#endif
++	rcu_read_lock();
++	do_each_thread(g, p) {
++		/*
++		 * reset the NMI-timeout, listing all files on a slow
++		 * console might take a lot of time:
++		 */
++		touch_nmi_watchdog();
++		if (!state_filter || (p->state & state_filter))
++			sched_show_task(p);
++	} while_each_thread(g, p);
++
++	touch_all_softlockup_watchdogs();
++
++	rcu_read_unlock();
++	/*
++	 * Only show locks if all tasks are dumped:
++	 */
++	if (!state_filter)
++		debug_show_all_locks();
++}
++
++#ifdef CONFIG_SMP
++void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
++{
++	cpumask_copy(tsk_cpus_allowed(p), new_mask);
++}
++#endif
++
++/**
++ * init_idle - set up an idle thread for a given CPU
++ * @idle: task in question
++ * @cpu: cpu the idle task belongs to
++ *
++ * NOTE: this function does not set the idle thread's NEED_RESCHED
++ * flag, to make booting more robust.
++ */
++void init_idle(struct task_struct *idle, int cpu)
++{
++	struct rq *rq = cpu_rq(cpu);
++	unsigned long flags;
++
++	time_grq_lock(rq, &flags);
++	idle->last_ran = rq->clock;
++	idle->state = TASK_RUNNING;
++	/* Setting prio to illegal value shouldn't matter when never queued */
++	idle->prio = PRIO_LIMIT;
++	set_rq_task(rq, idle);
++	do_set_cpus_allowed(idle, &cpumask_of_cpu(cpu));
++	/* Silence PROVE_RCU */
++	rcu_read_lock();
++	set_task_cpu(idle, cpu);
++	rcu_read_unlock();
++	rq->curr = rq->idle = idle;
++	idle->on_cpu = 1;
++	grq_unlock_irqrestore(&flags);
++
++	/* Set the preempt count _outside_ the spinlocks! */
++	task_thread_info(idle)->preempt_count = 0;
++
++	ftrace_graph_init_idle_task(idle, cpu);
++#if defined(CONFIG_SMP)
++	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
++#endif
++}
++
++#ifdef CONFIG_SMP
++#ifdef CONFIG_NO_HZ
++void select_nohz_load_balancer(int stop_tick)
++{
++}
++
++void set_cpu_sd_state_idle(void) {}
++#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
++/**
++ * lowest_flag_domain - Return lowest sched_domain containing flag.
++ * @cpu:	The cpu whose lowest level of sched domain is to
++ *		be returned.
++ * @flag:	The flag to check for the lowest sched_domain
++ *		for the given cpu.
++ *
++ * Returns the lowest sched_domain of a cpu which contains the given flag.
++ */
++static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
++{
++	struct sched_domain *sd;
++
++	for_each_domain(cpu, sd)
++		if (sd && (sd->flags & flag))
++			break;
++
++	return sd;
++}
++
++/**
++ * for_each_flag_domain - Iterates over sched_domains containing the flag.
++ * @cpu:	The cpu whose domains we're iterating over.
++ * @sd:		variable holding the value of the power_savings_sd
++ *		for cpu.
++ * @flag:	The flag to filter the sched_domains to be iterated.
++ *
++ * Iterates over all the scheduler domains for a given cpu that has the 'flag'
++ * set, starting from the lowest sched_domain to the highest.
++ */
++#define for_each_flag_domain(cpu, sd, flag) \
++	for (sd = lowest_flag_domain(cpu, flag); \
++		(sd && (sd->flags & flag)); sd = sd->parent)
++
++#endif /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
++
++static inline void resched_cpu(int cpu)
++{
++	unsigned long flags;
++
++	grq_lock_irqsave(&flags);
++	resched_task(cpu_curr(cpu));
++	grq_unlock_irqrestore(&flags);
++}
++
++/*
++ * In the semi idle case, use the nearest busy cpu for migrating timers
++ * from an idle cpu.  This is good for power-savings.
++ *
++ * We don't do similar optimization for completely idle system, as
++ * selecting an idle cpu will add more delays to the timers than intended
++ * (as that cpu's timer base may not be uptodate wrt jiffies etc).
++ */
++int get_nohz_timer_target(void)
++{
++	int cpu = smp_processor_id();
++	int i;
++	struct sched_domain *sd;
++
++	rcu_read_lock();
++	for_each_domain(cpu, sd) {
++		for_each_cpu(i, sched_domain_span(sd)) {
++			if (!idle_cpu(i))
++				cpu = i;
++			goto unlock;
++		}
++	}
++unlock:
++	rcu_read_unlock();
++	return cpu;
++}
++
++/*
++ * When add_timer_on() enqueues a timer into the timer wheel of an
++ * idle CPU then this timer might expire before the next timer event
++ * which is scheduled to wake up that CPU. In case of a completely
++ * idle system the next event might even be infinite time into the
++ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
++ * leaves the inner idle loop so the newly added timer is taken into
++ * account when the CPU goes back to idle and evaluates the timer
++ * wheel for the next timer event.
++ */
++void wake_up_idle_cpu(int cpu)
++{
++	struct task_struct *idle;
++	struct rq *rq;
++
++	if (cpu == smp_processor_id())
++		return;
++
++	rq = cpu_rq(cpu);
++	idle = rq->idle;
++
++	/*
++	 * This is safe, as this function is called with the timer
++	 * wheel base lock of (cpu) held. When the CPU is on the way
++	 * to idle and has not yet set rq->curr to idle then it will
++	 * be serialised on the timer wheel base lock and take the new
++	 * timer into account automatically.
++	 */
++	if (unlikely(rq->curr != idle))
++		return;
++
++	/*
++	 * We can set TIF_RESCHED on the idle task of the other CPU
++	 * lockless. The worst case is that the other CPU runs the
++	 * idle task through an additional NOOP schedule()
++	 */
++	set_tsk_need_resched(idle);
++
++	/* NEED_RESCHED must be visible before we test polling */
++	smp_mb();
++	if (!tsk_is_polling(idle))
++		smp_send_reschedule(cpu);
++}
++
++#endif /* CONFIG_NO_HZ */
++
++/*
++ * Change a given task's CPU affinity. Migrate the thread to a
++ * proper CPU and schedule it away if the CPU it's executing on
++ * is removed from the allowed bitmask.
++ *
++ * NOTE: the caller must have a valid reference to the task, the
++ * task must not exit() & deallocate itself prematurely. The
++ * call is not atomic; no spinlocks may be held.
++ */
++int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
++{
++	bool running_wrong = false;
++	bool queued = false;
++	unsigned long flags;
++	struct rq *rq;
++	int ret = 0;
++
++	rq = task_grq_lock(p, &flags);
++
++	if (cpumask_equal(tsk_cpus_allowed(p), new_mask))
++		goto out;
++
++	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	queued = task_queued(p);
++
++	do_set_cpus_allowed(p, new_mask);
++
++	/* Can the task run on the task's current CPU? If so, we're done */
++	if (cpumask_test_cpu(task_cpu(p), new_mask))
++		goto out;
++
++	if (task_running(p)) {
++		/* Task is running on the wrong cpu now, reschedule it. */
++		if (rq == this_rq()) {
++			set_tsk_need_resched(p);
++			running_wrong = true;
++		} else
++			resched_task(p);
++	} else
++		set_task_cpu(p, cpumask_any_and(cpu_active_mask, new_mask));
++
++out:
++	if (queued)
++		try_preempt(p, rq);
++	task_grq_unlock(&flags);
++
++	if (running_wrong)
++		_cond_resched();
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
++
++#ifdef CONFIG_HOTPLUG_CPU
++/* Run through task list and find tasks affined to just the dead cpu, then
++ * allocate a new affinity */
++static void break_sole_affinity(int src_cpu, struct task_struct *idle)
++{
++	struct task_struct *p, *t;
++
++	do_each_thread(t, p) {
++		if (p != idle && !online_cpus(p)) {
++			cpumask_copy(tsk_cpus_allowed(p), cpu_possible_mask);
++			/*
++			 * Don't tell them about moving exiting tasks or
++			 * kernel threads (both mm NULL), since they never
++			 * leave kernel.
++			 */
++			if (p->mm && printk_ratelimit()) {
++				printk(KERN_INFO "process %d (%s) no "
++				       "longer affine to cpu %d\n",
++				       task_pid_nr(p), p->comm, src_cpu);
++			}
++		}
++		clear_sticky(p);
++	} while_each_thread(t, p);
++}
++
++/*
++ * Schedules idle task to be the next runnable task on current CPU.
++ * It does so by boosting its priority to highest possible.
++ * Used by CPU offline code.
++ */
++void sched_idle_next(struct rq *rq, int this_cpu, struct task_struct *idle)
++{
++	/* cpu has to be offline */
++	BUG_ON(cpu_online(this_cpu));
++
++	__setscheduler(idle, rq, SCHED_FIFO, STOP_PRIO);
++
++	activate_idle_task(idle);
++	set_tsk_need_resched(rq->curr);
++}
++
++/*
++ * Ensures that the idle task is using init_mm right before its cpu goes
++ * offline.
++ */
++void idle_task_exit(void)
++{
++	struct mm_struct *mm = current->active_mm;
++
++	BUG_ON(cpu_online(smp_processor_id()));
++
++	if (mm != &init_mm)
++		switch_mm(mm, &init_mm, current);
++	mmdrop(mm);
++}
++#endif /* CONFIG_HOTPLUG_CPU */
++void sched_set_stop_task(int cpu, struct task_struct *stop)
++{
++	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
++	struct sched_param start_param = { .sched_priority = MAX_USER_RT_PRIO - 1 };
++	struct task_struct *old_stop = cpu_rq(cpu)->stop;
++
++	if (stop) {
++		/*
++		 * Make it appear like a SCHED_FIFO task, its something
++		 * userspace knows about and won't get confused about.
++		 *
++		 * Also, it will make PI more or less work without too
++		 * much confusion -- but then, stop work should not
++		 * rely on PI working anyway.
++		 */
++		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
++	}
++
++	cpu_rq(cpu)->stop = stop;
++
++	if (old_stop) {
++		/*
++		 * Reset it back to a normal rt scheduling prio so that
++		 * it can die in pieces.
++		 */
++		sched_setscheduler_nocheck(old_stop, SCHED_FIFO, &start_param);
++	}
++}
++
++
++#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
++
++static struct ctl_table sd_ctl_dir[] = {
++	{
++		.procname	= "sched_domain",
++		.mode		= 0555,
++	},
++	{}
++};
++
++static struct ctl_table sd_ctl_root[] = {
++	{
++		.procname	= "kernel",
++		.mode		= 0555,
++		.child		= sd_ctl_dir,
++	},
++	{}
++};
++
++static struct ctl_table *sd_alloc_ctl_entry(int n)
++{
++	struct ctl_table *entry =
++		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
++
++	return entry;
++}
++
++static void sd_free_ctl_entry(struct ctl_table **tablep)
++{
++	struct ctl_table *entry;
++
++	/*
++	 * In the intermediate directories, both the child directory and
++	 * procname are dynamically allocated and could fail but the mode
++	 * will always be set. In the lowest directory the names are
++	 * static strings and all have proc handlers.
++	 */
++	for (entry = *tablep; entry->mode; entry++) {
++		if (entry->child)
++			sd_free_ctl_entry(&entry->child);
++		if (entry->proc_handler == NULL)
++			kfree(entry->procname);
++	}
++
++	kfree(*tablep);
++	*tablep = NULL;
++}
++
++static void
++set_table_entry(struct ctl_table *entry,
++		const char *procname, void *data, int maxlen,
++		mode_t mode, proc_handler *proc_handler)
++{
++	entry->procname = procname;
++	entry->data = data;
++	entry->maxlen = maxlen;
++	entry->mode = mode;
++	entry->proc_handler = proc_handler;
++}
++
++static struct ctl_table *
++sd_alloc_ctl_domain_table(struct sched_domain *sd)
++{
++	struct ctl_table *table = sd_alloc_ctl_entry(13);
++
++	if (table == NULL)
++		return NULL;
++
++	set_table_entry(&table[0], "min_interval", &sd->min_interval,
++		sizeof(long), 0644, proc_doulongvec_minmax);
++	set_table_entry(&table[1], "max_interval", &sd->max_interval,
++		sizeof(long), 0644, proc_doulongvec_minmax);
++	set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[9], "cache_nice_tries",
++		&sd->cache_nice_tries,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[10], "flags", &sd->flags,
++		sizeof(int), 0644, proc_dointvec_minmax);
++	set_table_entry(&table[11], "name", sd->name,
++		CORENAME_MAX_SIZE, 0444, proc_dostring);
++	/* &table[12] is terminator */
++
++	return table;
++}
++
++static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
++{
++	struct ctl_table *entry, *table;
++	struct sched_domain *sd;
++	int domain_num = 0, i;
++	char buf[32];
++
++	for_each_domain(cpu, sd)
++		domain_num++;
++	entry = table = sd_alloc_ctl_entry(domain_num + 1);
++	if (table == NULL)
++		return NULL;
++
++	i = 0;
++	for_each_domain(cpu, sd) {
++		snprintf(buf, 32, "domain%d", i);
++		entry->procname = kstrdup(buf, GFP_KERNEL);
++		entry->mode = 0555;
++		entry->child = sd_alloc_ctl_domain_table(sd);
++		entry++;
++		i++;
++	}
++	return table;
++}
++
++static struct ctl_table_header *sd_sysctl_header;
++static void register_sched_domain_sysctl(void)
++{
++	int i, cpu_num = num_possible_cpus();
++	struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
++	char buf[32];
++
++	WARN_ON(sd_ctl_dir[0].child);
++	sd_ctl_dir[0].child = entry;
++
++	if (entry == NULL)
++		return;
++
++	for_each_possible_cpu(i) {
++		snprintf(buf, 32, "cpu%d", i);
++		entry->procname = kstrdup(buf, GFP_KERNEL);
++		entry->mode = 0555;
++		entry->child = sd_alloc_ctl_cpu_table(i);
++		entry++;
++	}
++
++	WARN_ON(sd_sysctl_header);
++	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
++}
++
++/* may be called multiple times per register */
++static void unregister_sched_domain_sysctl(void)
++{
++	if (sd_sysctl_header)
++		unregister_sysctl_table(sd_sysctl_header);
++	sd_sysctl_header = NULL;
++	if (sd_ctl_dir[0].child)
++		sd_free_ctl_entry(&sd_ctl_dir[0].child);
++}
++#else
++static void register_sched_domain_sysctl(void)
++{
++}
++static void unregister_sched_domain_sysctl(void)
++{
++}
++#endif
++
++static void set_rq_online(struct rq *rq)
++{
++	if (!rq->online) {
++		cpumask_set_cpu(cpu_of(rq), rq->rd->online);
++		rq->online = true;
++	}
++}
++
++static void set_rq_offline(struct rq *rq)
++{
++	if (rq->online) {
++		cpumask_clear_cpu(cpu_of(rq), rq->rd->online);
++		rq->online = false;
++	}
++}
++
++/*
++ * migration_call - callback that gets triggered when a CPU is added.
++ */
++static int __cpuinit
++migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
++{
++	int cpu = (long)hcpu;
++	unsigned long flags;
++	struct rq *rq = cpu_rq(cpu);
++#ifdef CONFIG_HOTPLUG_CPU
++	struct task_struct *idle = rq->idle;
++#endif
++
++	switch (action & ~CPU_TASKS_FROZEN) {
++
++	case CPU_UP_PREPARE:
++		break;
++
++	case CPU_ONLINE:
++		/* Update our root-domain */
++		grq_lock_irqsave(&flags);
++		if (rq->rd) {
++			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
++
++			set_rq_online(rq);
++		}
++		grq.noc = num_online_cpus();
++		grq_unlock_irqrestore(&flags);
++		break;
++
++#ifdef CONFIG_HOTPLUG_CPU
++	case CPU_DEAD:
++		/* Idle task back to normal (off runqueue, low prio) */
++		grq_lock_irq();
++		return_task(idle, true);
++		idle->static_prio = MAX_PRIO;
++		__setscheduler(idle, rq, SCHED_NORMAL, 0);
++		idle->prio = PRIO_LIMIT;
++		set_rq_task(rq, idle);
++		update_clocks(rq);
++		grq_unlock_irq();
++		break;
++
++	case CPU_DYING:
++		/* Update our root-domain */
++		grq_lock_irqsave(&flags);
++		sched_idle_next(rq, cpu, idle);
++		if (rq->rd) {
++			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
++			set_rq_offline(rq);
++		}
++		break_sole_affinity(cpu, idle);
++		grq.noc = num_online_cpus();
++		grq_unlock_irqrestore(&flags);
++		break;
++#endif
++	}
++	return NOTIFY_OK;
++}
++
++/*
++ * Register at high priority so that task migration (migrate_all_tasks)
++ * happens before everything else.  This has to be lower priority than
++ * the notifier in the perf_counter subsystem, though.
++ */
++static struct notifier_block __cpuinitdata migration_notifier = {
++	.notifier_call = migration_call,
++	.priority = CPU_PRI_MIGRATION,
++};
++
++static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
++				      unsigned long action, void *hcpu)
++{
++	switch (action & ~CPU_TASKS_FROZEN) {
++	case CPU_ONLINE:
++	case CPU_DOWN_FAILED:
++		set_cpu_active((long)hcpu, true);
++		return NOTIFY_OK;
++	default:
++		return NOTIFY_DONE;
++	}
++}
++
++static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
++					unsigned long action, void *hcpu)
++{
++	switch (action & ~CPU_TASKS_FROZEN) {
++	case CPU_DOWN_PREPARE:
++		set_cpu_active((long)hcpu, false);
++		return NOTIFY_OK;
++	default:
++		return NOTIFY_DONE;
++	}
++}
++
++int __init migration_init(void)
++{
++	void *cpu = (void *)(long)smp_processor_id();
++	int err;
++
++	/* Initialise migration for the boot CPU */
++	err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
++	BUG_ON(err == NOTIFY_BAD);
++	migration_call(&migration_notifier, CPU_ONLINE, cpu);
++	register_cpu_notifier(&migration_notifier);
++
++	/* Register cpu active notifiers */
++	cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
++	cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
++
++	return 0;
++}
++early_initcall(migration_init);
++#endif
++
++#ifdef CONFIG_SMP
++
++static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
++
++#ifdef CONFIG_SCHED_DEBUG
++
++static __read_mostly int sched_domain_debug_enabled;
++
++static int __init sched_domain_debug_setup(char *str)
++{
++	sched_domain_debug_enabled = 1;
++
++	return 0;
++}
++early_param("sched_debug", sched_domain_debug_setup);
++
++static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
++				  struct cpumask *groupmask)
++{
++	struct sched_group *group = sd->groups;
++	char str[256];
++
++	cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd));
++	cpumask_clear(groupmask);
++
++	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
++
++	if (!(sd->flags & SD_LOAD_BALANCE)) {
++		printk("does not load-balance\n");
++		if (sd->parent)
++			printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
++					" has parent");
++		return -1;
++	}
++
++	printk(KERN_CONT "span %s level %s\n", str, sd->name);
++
++	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
++		printk(KERN_ERR "ERROR: domain->span does not contain "
++				"CPU%d\n", cpu);
++	}
++	if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
++		printk(KERN_ERR "ERROR: domain->groups does not contain"
++				" CPU%d\n", cpu);
++	}
++
++	printk(KERN_DEBUG "%*s groups:", level + 1, "");
++	do {
++		if (!group) {
++			printk("\n");
++			printk(KERN_ERR "ERROR: group is NULL\n");
++			break;
++		}
++
++		if (!group->sgp->power) {
++			printk(KERN_CONT "\n");
++			printk(KERN_ERR "ERROR: domain->cpu_power not "
++					"set\n");
++			break;
++		}
++
++		if (!cpumask_weight(sched_group_cpus(group))) {
++			printk(KERN_CONT "\n");
++			printk(KERN_ERR "ERROR: empty group\n");
++			break;
++		}
++
++		if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
++			printk(KERN_CONT "\n");
++			printk(KERN_ERR "ERROR: repeated CPUs\n");
++			break;
++		}
++
++		cpumask_or(groupmask, groupmask, sched_group_cpus(group));
++
++		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
++
++		printk(KERN_CONT " %s", str);
++		if (group->sgp->power != SCHED_POWER_SCALE) {
++			printk(KERN_CONT " (cpu_power = %d)",
++				group->sgp->power);
++		}
++
++		group = group->next;
++	} while (group != sd->groups);
++	printk(KERN_CONT "\n");
++
++	if (!cpumask_equal(sched_domain_span(sd), groupmask))
++		printk(KERN_ERR "ERROR: groups don't span domain->span\n");
++
++	if (sd->parent &&
++	    !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
++		printk(KERN_ERR "ERROR: parent span is not a superset "
++			"of domain->span\n");
++	return 0;
++}
++
++static void sched_domain_debug(struct sched_domain *sd, int cpu)
++{
++	int level = 0;
++
++	if (!sched_domain_debug_enabled)
++		return;
++
++	if (!sd) {
++		printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
++		return;
++	}
++
++	printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
++
++	for (;;) {
++		if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
++			break;
++		level++;
++		sd = sd->parent;
++		if (!sd)
++			break;
++	}
++}
++#else /* !CONFIG_SCHED_DEBUG */
++# define sched_domain_debug(sd, cpu) do { } while (0)
++#endif /* CONFIG_SCHED_DEBUG */
++
++static int sd_degenerate(struct sched_domain *sd)
++{
++	if (cpumask_weight(sched_domain_span(sd)) == 1)
++		return 1;
++
++	/* Following flags need at least 2 groups */
++	if (sd->flags & (SD_LOAD_BALANCE |
++			 SD_BALANCE_NEWIDLE |
++			 SD_BALANCE_FORK |
++			 SD_BALANCE_EXEC |
++			 SD_SHARE_CPUPOWER |
++			 SD_SHARE_PKG_RESOURCES)) {
++		if (sd->groups != sd->groups->next)
++			return 0;
++	}
++
++	/* Following flags don't use groups */
++	if (sd->flags & (SD_WAKE_AFFINE))
++		return 0;
++
++	return 1;
++}
++
++static int
++sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
++{
++	unsigned long cflags = sd->flags, pflags = parent->flags;
++
++	if (sd_degenerate(parent))
++		return 1;
++
++	if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
++		return 0;
++
++	/* Flags needing groups don't count if only 1 group in parent */
++	if (parent->groups == parent->groups->next) {
++		pflags &= ~(SD_LOAD_BALANCE |
++				SD_BALANCE_NEWIDLE |
++				SD_BALANCE_FORK |
++				SD_BALANCE_EXEC |
++				SD_SHARE_CPUPOWER |
++				SD_SHARE_PKG_RESOURCES);
++		if (nr_node_ids == 1)
++			pflags &= ~SD_SERIALIZE;
++	}
++	if (~cflags & pflags)
++		return 0;
++
++	return 1;
++}
++
++static void free_rootdomain(struct rcu_head *rcu)
++{
++	struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
++
++	cpupri_cleanup(&rd->cpupri);
++	free_cpumask_var(rd->rto_mask);
++	free_cpumask_var(rd->online);
++	free_cpumask_var(rd->span);
++	kfree(rd);
++}
++
++static void rq_attach_root(struct rq *rq, struct root_domain *rd)
++{
++	struct root_domain *old_rd = NULL;
++	unsigned long flags;
++
++	grq_lock_irqsave(&flags);
++
++	if (rq->rd) {
++		old_rd = rq->rd;
++
++		if (cpumask_test_cpu(rq->cpu, old_rd->online))
++			set_rq_offline(rq);
++
++		cpumask_clear_cpu(rq->cpu, old_rd->span);
++
++		/*
++		 * If we dont want to free the old_rt yet then
++		 * set old_rd to NULL to skip the freeing later
++		 * in this function:
++		 */
++		if (!atomic_dec_and_test(&old_rd->refcount))
++			old_rd = NULL;
++	}
++
++	atomic_inc(&rd->refcount);
++	rq->rd = rd;
++
++	cpumask_set_cpu(rq->cpu, rd->span);
++	if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
++		set_rq_online(rq);
++
++	grq_unlock_irqrestore(&flags);
++
++	if (old_rd)
++		call_rcu_sched(&old_rd->rcu, free_rootdomain);
++}
++
++static int init_rootdomain(struct root_domain *rd)
++{
++	memset(rd, 0, sizeof(*rd));
++
++	if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
++		goto out;
++	if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
++		goto free_span;
++	if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
++		goto free_online;
++
++	if (cpupri_init(&rd->cpupri) != 0)
++		goto free_rto_mask;
++	return 0;
++
++free_rto_mask:
++	free_cpumask_var(rd->rto_mask);
++free_online:
++	free_cpumask_var(rd->online);
++free_span:
++	free_cpumask_var(rd->span);
++out:
++	return -ENOMEM;
++}
++
++static void init_defrootdomain(void)
++{
++	init_rootdomain(&def_root_domain);
++
++	atomic_set(&def_root_domain.refcount, 1);
++}
++
++static struct root_domain *alloc_rootdomain(void)
++{
++	struct root_domain *rd;
++
++	rd = kmalloc(sizeof(*rd), GFP_KERNEL);
++	if (!rd)
++		return NULL;
++
++	if (init_rootdomain(rd) != 0) {
++		kfree(rd);
++		return NULL;
++	}
++
++	return rd;
++}
++
++static void free_sched_groups(struct sched_group *sg, int free_sgp)
++{
++	struct sched_group *tmp, *first;
++
++	if (!sg)
++		return;
++
++	first = sg;
++	do {
++		tmp = sg->next;
++
++		if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
++			kfree(sg->sgp);
++
++		kfree(sg);
++		sg = tmp;
++	} while (sg != first);
++}
++
++static void free_sched_domain(struct rcu_head *rcu)
++{
++	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
++
++	/*
++	 * If its an overlapping domain it has private groups, iterate and
++	 * nuke them all.
++	 */
++	if (sd->flags & SD_OVERLAP) {
++		free_sched_groups(sd->groups, 1);
++	} else if (atomic_dec_and_test(&sd->groups->ref)) {
++		kfree(sd->groups->sgp);
++		kfree(sd->groups);
++	}
++	kfree(sd);
++}
++
++static void destroy_sched_domain(struct sched_domain *sd, int cpu)
++{
++	call_rcu(&sd->rcu, free_sched_domain);
++}
++
++static void destroy_sched_domains(struct sched_domain *sd, int cpu)
++{
++	for (; sd; sd = sd->parent)
++		destroy_sched_domain(sd, cpu);
++}
++
++/*
++ * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
++ * hold the hotplug lock.
++ */
++static void
++cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
++{
++	struct rq *rq = cpu_rq(cpu);
++	struct sched_domain *tmp;
++
++	/* Remove the sched domains which do not contribute to scheduling. */
++	for (tmp = sd; tmp; ) {
++		struct sched_domain *parent = tmp->parent;
++		if (!parent)
++			break;
++
++		if (sd_parent_degenerate(tmp, parent)) {
++			tmp->parent = parent->parent;
++			if (parent->parent)
++				parent->parent->child = tmp;
++			destroy_sched_domain(parent, cpu);
++		} else
++			tmp = tmp->parent;
++	}
++
++	if (sd && sd_degenerate(sd)) {
++		tmp = sd;
++		sd = sd->parent;
++		destroy_sched_domain(tmp, cpu);
++		if (sd)
++			sd->child = NULL;
++	}
++
++	sched_domain_debug(sd, cpu);
++
++	rq_attach_root(rq, rd);
++	tmp = rq->sd;
++	rcu_assign_pointer(rq->sd, sd);
++	destroy_sched_domains(tmp, cpu);
++}
++
++/* cpus with isolated domains */
++static cpumask_var_t cpu_isolated_map;
++
++/* Setup the mask of cpus configured for isolated domains */
++static int __init isolated_cpu_setup(char *str)
++{
++	alloc_bootmem_cpumask_var(&cpu_isolated_map);
++	cpulist_parse(str, cpu_isolated_map);
++	return 1;
++}
++
++__setup("isolcpus=", isolated_cpu_setup);
++
++#define SD_NODES_PER_DOMAIN 16
++
++#ifdef CONFIG_NUMA
++
++/**
++ * find_next_best_node - find the next node to include in a sched_domain
++ * @node: node whose sched_domain we're building
++ * @used_nodes: nodes already in the sched_domain
++ *
++ * Find the next node to include in a given scheduling domain. Simply
++ * finds the closest node not already in the @used_nodes map.
++ *
++ * Should use nodemask_t.
++ */
++static int find_next_best_node(int node, nodemask_t *used_nodes)
++{
++	int i, n, val, min_val, best_node = -1;
++
++	min_val = INT_MAX;
++
++	for (i = 0; i < nr_node_ids; i++) {
++		/* Start at @node */
++		n = (node + i) % nr_node_ids;
++
++		if (!nr_cpus_node(n))
++			continue;
++
++		/* Skip already used nodes */
++		if (node_isset(n, *used_nodes))
++			continue;
++
++		/* Simple min distance search */
++		val = node_distance(node, n);
++
++		if (val < min_val) {
++			min_val = val;
++			best_node = n;
++		}
++	}
++
++	if (best_node != -1)
++		node_set(best_node, *used_nodes);
++	return best_node;
++}
++
++/**
++ * sched_domain_node_span - get a cpumask for a node's sched_domain
++ * @node: node whose cpumask we're constructing
++ * @span: resulting cpumask
++ *
++ * Given a node, construct a good cpumask for its sched_domain to span. It
++ * should be one that prevents unnecessary balancing, but also spreads tasks
++ * out optimally.
++ */
++static void sched_domain_node_span(int node, struct cpumask *span)
++{
++	nodemask_t used_nodes;
++	int i;
++
++	cpumask_clear(span);
++	nodes_clear(used_nodes);
++
++	cpumask_or(span, span, cpumask_of_node(node));
++	node_set(node, used_nodes);
++
++	for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
++		int next_node = find_next_best_node(node, &used_nodes);
++		if (next_node < 0)
++			break;
++		cpumask_or(span, span, cpumask_of_node(next_node));
++	}
++}
++
++static const struct cpumask *cpu_node_mask(int cpu)
++{
++	lockdep_assert_held(&sched_domains_mutex);
++
++	sched_domain_node_span(cpu_to_node(cpu), sched_domains_tmpmask);
++
++	return sched_domains_tmpmask;
++}
++
++static const struct cpumask *cpu_allnodes_mask(int cpu)
++{
++	return cpu_possible_mask;
++}
++#endif /* CONFIG_NUMA */
++
++static const struct cpumask *cpu_cpu_mask(int cpu)
++{
++	return cpumask_of_node(cpu_to_node(cpu));
++}
++
++int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
++
++struct sd_data {
++	struct sched_domain **__percpu sd;
++	struct sched_group **__percpu sg;
++	struct sched_group_power **__percpu sgp;
++};
++
++struct s_data {
++	struct sched_domain ** __percpu sd;
++	struct root_domain	*rd;
++};
++
++enum s_alloc {
++	sa_rootdomain,
++	sa_sd,
++	sa_sd_storage,
++	sa_none,
++};
++
++struct sched_domain_topology_level;
++
++typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
++typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
++
++#define SDTL_OVERLAP	0x01
++
++struct sched_domain_topology_level {
++	sched_domain_init_f init;
++	sched_domain_mask_f mask;
++	int		    flags;
++	struct sd_data      data;
++};
++
++static int
++build_overlap_sched_groups(struct sched_domain *sd, int cpu)
++{
++	struct sched_group *first = NULL, *last = NULL, *groups = NULL, *sg;
++	const struct cpumask *span = sched_domain_span(sd);
++	struct cpumask *covered = sched_domains_tmpmask;
++	struct sd_data *sdd = sd->private;
++	struct sched_domain *child;
++	int i;
++
++	cpumask_clear(covered);
++
++	for_each_cpu(i, span) {
++		struct cpumask *sg_span;
++
++		if (cpumask_test_cpu(i, covered))
++			continue;
++
++		sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
++				GFP_KERNEL, cpu_to_node(i));
++
++		if (!sg)
++			goto fail;
++
++		sg_span = sched_group_cpus(sg);
++
++		child = *per_cpu_ptr(sdd->sd, i);
++		if (child->child) {
++			child = child->child;
++			cpumask_copy(sg_span, sched_domain_span(child));
++		} else
++			cpumask_set_cpu(i, sg_span);
++
++		cpumask_or(covered, covered, sg_span);
++
++		sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
++		atomic_inc(&sg->sgp->ref);
++
++		if (cpumask_test_cpu(cpu, sg_span))
++			groups = sg;
++
++		if (!first)
++			first = sg;
++		if (last)
++			last->next = sg;
++		last = sg;
++		last->next = first;
++	}
++	sd->groups = groups;
++
++	return 0;
++
++fail:
++	free_sched_groups(first, 0);
++
++	return -ENOMEM;
++}
++
++static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
++{
++	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
++	struct sched_domain *child = sd->child;
++
++	if (child)
++		cpu = cpumask_first(sched_domain_span(child));
++
++	if (sg) {
++		*sg = *per_cpu_ptr(sdd->sg, cpu);
++		(*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
++		atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */
++	}
++
++	return cpu;
++}
++
++/*
++ * build_sched_groups will build a circular linked list of the groups
++ * covered by the given span, and will set each group's ->cpumask correctly,
++ * and ->cpu_power to 0.
++ *
++ * Assumes the sched_domain tree is fully constructed
++ */
++static int
++build_sched_groups(struct sched_domain *sd, int cpu)
++{
++	struct sched_group *first = NULL, *last = NULL;
++	struct sd_data *sdd = sd->private;
++	const struct cpumask *span = sched_domain_span(sd);
++	struct cpumask *covered;
++	int i;
++
++	get_group(cpu, sdd, &sd->groups);
++	atomic_inc(&sd->groups->ref);
++
++	if (cpu != cpumask_first(sched_domain_span(sd)))
++		return 0;
++
++	lockdep_assert_held(&sched_domains_mutex);
++	covered = sched_domains_tmpmask;
++
++	cpumask_clear(covered);
++
++	for_each_cpu(i, span) {
++		struct sched_group *sg;
++		int group = get_group(i, sdd, &sg);
++		int j;
++
++		if (cpumask_test_cpu(i, covered))
++			continue;
++
++		cpumask_clear(sched_group_cpus(sg));
++		sg->sgp->power = 0;
++
++		for_each_cpu(j, span) {
++			if (get_group(j, sdd, NULL) != group)
++				continue;
++
++			cpumask_set_cpu(j, covered);
++			cpumask_set_cpu(j, sched_group_cpus(sg));
++		}
++
++		if (!first)
++			first = sg;
++		if (last)
++			last->next = sg;
++		last = sg;
++	}
++	last->next = first;
++
++	return 0;
++}
++
++/*
++ * Initializers for schedule domains
++ * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
++ */
++
++#ifdef CONFIG_SCHED_DEBUG
++# define SD_INIT_NAME(sd, type)		sd->name = #type
++#else
++# define SD_INIT_NAME(sd, type)		do { } while (0)
++#endif
++
++#define SD_INIT_FUNC(type)						\
++static noinline struct sched_domain *					\
++sd_init_##type(struct sched_domain_topology_level *tl, int cpu) 	\
++{									\
++	struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);	\
++	*sd = SD_##type##_INIT;						\
++	SD_INIT_NAME(sd, type);						\
++	sd->private = &tl->data;					\
++	return sd;							\
++}
++
++SD_INIT_FUNC(CPU)
++#ifdef CONFIG_NUMA
++ SD_INIT_FUNC(ALLNODES)
++ SD_INIT_FUNC(NODE)
++#endif
++#ifdef CONFIG_SCHED_SMT
++ SD_INIT_FUNC(SIBLING)
++#endif
++#ifdef CONFIG_SCHED_MC
++ SD_INIT_FUNC(MC)
++#endif
++#ifdef CONFIG_SCHED_BOOK
++ SD_INIT_FUNC(BOOK)
++#endif
++
++static int default_relax_domain_level = -1;
++int sched_domain_level_max;
++
++static int __init setup_relax_domain_level(char *str)
++{
++	unsigned long val;
++
++	val = simple_strtoul(str, NULL, 0);
++	if (val < sched_domain_level_max)
++		default_relax_domain_level = val;
++
++	return 1;
++}
++__setup("relax_domain_level=", setup_relax_domain_level);
++
++static void set_domain_attribute(struct sched_domain *sd,
++				 struct sched_domain_attr *attr)
++{
++	int request;
++
++	if (!attr || attr->relax_domain_level < 0) {
++		if (default_relax_domain_level < 0)
++			return;
++		else
++			request = default_relax_domain_level;
++	} else
++		request = attr->relax_domain_level;
++	if (request < sd->level) {
++		/* turn off idle balance on this domain */
++		sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
++	} else {
++		/* turn on idle balance on this domain */
++		sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
++	}
++}
++
++static void __sdt_free(const struct cpumask *cpu_map);
++static int __sdt_alloc(const struct cpumask *cpu_map);
++
++static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
++				 const struct cpumask *cpu_map)
++{
++	switch (what) {
++	case sa_rootdomain:
++		if (!atomic_read(&d->rd->refcount))
++			free_rootdomain(&d->rd->rcu); /* fall through */
++	case sa_sd:
++		free_percpu(d->sd); /* fall through */
++	case sa_sd_storage:
++		__sdt_free(cpu_map); /* fall through */
++	case sa_none:
++		break;
++	}
++}
++
++static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
++						   const struct cpumask *cpu_map)
++{
++	memset(d, 0, sizeof(*d));
++
++	if (__sdt_alloc(cpu_map))
++		return sa_sd_storage;
++	d->sd = alloc_percpu(struct sched_domain *);
++	if (!d->sd)
++		return sa_sd_storage;
++	d->rd = alloc_rootdomain();
++	if (!d->rd)
++		return sa_sd;
++	return sa_rootdomain;
++}
++
++/*
++ * NULL the sd_data elements we've used to build the sched_domain and
++ * sched_group structure so that the subsequent __free_domain_allocs()
++ * will not free the data we're using.
++ */
++static void claim_allocations(int cpu, struct sched_domain *sd)
++{
++	struct sd_data *sdd = sd->private;
++
++	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
++	*per_cpu_ptr(sdd->sd, cpu) = NULL;
++
++	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
++		*per_cpu_ptr(sdd->sg, cpu) = NULL;
++
++	if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
++		*per_cpu_ptr(sdd->sgp, cpu) = NULL;
++}
++
++#ifdef CONFIG_SCHED_SMT
++static const struct cpumask *cpu_smt_mask(int cpu)
++{
++	return topology_thread_cpumask(cpu);
++}
++#endif
++
++/*
++ * Topology list, bottom-up.
++ */
++static struct sched_domain_topology_level default_topology[] = {
++#ifdef CONFIG_SCHED_SMT
++	{ sd_init_SIBLING, cpu_smt_mask, },
++#endif
++#ifdef CONFIG_SCHED_MC
++	{ sd_init_MC, cpu_coregroup_mask, },
++#endif
++#ifdef CONFIG_SCHED_BOOK
++	{ sd_init_BOOK, cpu_book_mask, },
++#endif
++	{ sd_init_CPU, cpu_cpu_mask, },
++#ifdef CONFIG_NUMA
++	{ sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, },
++	{ sd_init_ALLNODES, cpu_allnodes_mask, },
++#endif
++	{ NULL, },
++};
++
++static struct sched_domain_topology_level *sched_domain_topology = default_topology;
++
++static int __sdt_alloc(const struct cpumask *cpu_map)
++{
++	struct sched_domain_topology_level *tl;
++	int j;
++
++	for (tl = sched_domain_topology; tl->init; tl++) {
++		struct sd_data *sdd = &tl->data;
++
++		sdd->sd = alloc_percpu(struct sched_domain *);
++		if (!sdd->sd)
++			return -ENOMEM;
++
++		sdd->sg = alloc_percpu(struct sched_group *);
++		if (!sdd->sg)
++			return -ENOMEM;
++
++		sdd->sgp = alloc_percpu(struct sched_group_power *);
++		if (!sdd->sgp)
++			return -ENOMEM;
++
++		for_each_cpu(j, cpu_map) {
++			struct sched_domain *sd;
++			struct sched_group *sg;
++			struct sched_group_power *sgp;
++
++		       	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
++					GFP_KERNEL, cpu_to_node(j));
++			if (!sd)
++				return -ENOMEM;
++
++			*per_cpu_ptr(sdd->sd, j) = sd;
++
++			sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
++					GFP_KERNEL, cpu_to_node(j));
++			if (!sg)
++				return -ENOMEM;
++
++			*per_cpu_ptr(sdd->sg, j) = sg;
++
++			sgp = kzalloc_node(sizeof(struct sched_group_power),
++					GFP_KERNEL, cpu_to_node(j));
++			if (!sgp)
++				return -ENOMEM;
++
++			*per_cpu_ptr(sdd->sgp, j) = sgp;
++		}
++	}
++
++	return 0;
++}
++
++static void __sdt_free(const struct cpumask *cpu_map)
++{
++	struct sched_domain_topology_level *tl;
++	int j;
++
++	for (tl = sched_domain_topology; tl->init; tl++) {
++		struct sd_data *sdd = &tl->data;
++
++		for_each_cpu(j, cpu_map) {
++			struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
++			if (sd && (sd->flags & SD_OVERLAP))
++				free_sched_groups(sd->groups, 0);
++			kfree(*per_cpu_ptr(sdd->sd, j));
++			kfree(*per_cpu_ptr(sdd->sg, j));
++			kfree(*per_cpu_ptr(sdd->sgp, j));
++		}
++		free_percpu(sdd->sd);
++		free_percpu(sdd->sg);
++		free_percpu(sdd->sgp);
++	}
++}
++
++struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
++		struct s_data *d, const struct cpumask *cpu_map,
++		struct sched_domain_attr *attr, struct sched_domain *child,
++		int cpu)
++{
++	struct sched_domain *sd = tl->init(tl, cpu);
++	if (!sd)
++		return child;
++
++	set_domain_attribute(sd, attr);
++	cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
++	if (child) {
++		sd->level = child->level + 1;
++		sched_domain_level_max = max(sched_domain_level_max, sd->level);
++		child->parent = sd;
++	}
++	sd->child = child;
++
++	return sd;
++}
++
++/*
++ * Build sched domains for a given set of cpus and attach the sched domains
++ * to the individual cpus
++ */
++static int build_sched_domains(const struct cpumask *cpu_map,
++			       struct sched_domain_attr *attr)
++{
++	enum s_alloc alloc_state = sa_none;
++	struct sched_domain *sd;
++	struct s_data d;
++	int i, ret = -ENOMEM;
++
++	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
++	if (alloc_state != sa_rootdomain)
++		goto error;
++
++	/* Set up domains for cpus specified by the cpu_map. */
++	for_each_cpu(i, cpu_map) {
++		struct sched_domain_topology_level *tl;
++
++		sd = NULL;
++		for (tl = sched_domain_topology; tl->init; tl++) {
++			sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
++			if (tl->flags & SDTL_OVERLAP)
++				sd->flags |= SD_OVERLAP;
++			if (cpumask_equal(cpu_map, sched_domain_span(sd)))
++				break;
++		}
++
++		while (sd->child)
++			sd = sd->child;
++
++		*per_cpu_ptr(d.sd, i) = sd;
++	}
++
++	/* Build the groups for the domains */
++	for_each_cpu(i, cpu_map) {
++		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
++			sd->span_weight = cpumask_weight(sched_domain_span(sd));
++			if (sd->flags & SD_OVERLAP) {
++				if (build_overlap_sched_groups(sd, i))
++					goto error;
++			} else {
++				if (build_sched_groups(sd, i))
++					goto error;
++			}
++		}
++	}
++
++	/* Calculate CPU power for physical packages and nodes */
++	for (i = nr_cpumask_bits-1; i >= 0; i--) {
++		if (!cpumask_test_cpu(i, cpu_map))
++			continue;
++
++		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
++			claim_allocations(i, sd);
++		}
++	}
++
++	/* Attach the domains */
++	rcu_read_lock();
++	for_each_cpu(i, cpu_map) {
++		sd = *per_cpu_ptr(d.sd, i);
++		cpu_attach_domain(sd, d.rd, i);
++	}
++	rcu_read_unlock();
++
++	ret = 0;
++error:
++	__free_domain_allocs(&d, alloc_state, cpu_map);
++	return ret;
++}
++
++static cpumask_var_t *doms_cur;	/* current sched domains */
++static int ndoms_cur;		/* number of sched domains in 'doms_cur' */
++static struct sched_domain_attr *dattr_cur;
++				/* attribues of custom domains in 'doms_cur' */
++
++/*
++ * Special case: If a kmalloc of a doms_cur partition (array of
++ * cpumask) fails, then fallback to a single sched domain,
++ * as determined by the single cpumask fallback_doms.
++ */
++static cpumask_var_t fallback_doms;
++
++/*
++ * arch_update_cpu_topology lets virtualized architectures update the
++ * cpu core maps. It is supposed to return 1 if the topology changed
++ * or 0 if it stayed the same.
++ */
++int __attribute__((weak)) arch_update_cpu_topology(void)
++{
++	return 0;
++}
++
++cpumask_var_t *alloc_sched_domains(unsigned int ndoms)
++{
++	int i;
++	cpumask_var_t *doms;
++
++	doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL);
++	if (!doms)
++		return NULL;
++	for (i = 0; i < ndoms; i++) {
++		if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) {
++			free_sched_domains(doms, i);
++			return NULL;
++		}
++	}
++	return doms;
++}
++
++void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
++{
++	unsigned int i;
++	for (i = 0; i < ndoms; i++)
++		free_cpumask_var(doms[i]);
++	kfree(doms);
++}
++
++/*
++ * Set up scheduler domains and groups. Callers must hold the hotplug lock.
++ * For now this just excludes isolated cpus, but could be used to
++ * exclude other special cases in the future.
++ */
++static int init_sched_domains(const struct cpumask *cpu_map)
++{
++	int err;
++
++	arch_update_cpu_topology();
++	ndoms_cur = 1;
++	doms_cur = alloc_sched_domains(ndoms_cur);
++	if (!doms_cur)
++		doms_cur = &fallback_doms;
++	cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
++	dattr_cur = NULL;
++	err = build_sched_domains(doms_cur[0], NULL);
++	register_sched_domain_sysctl();
++
++	return err;
++}
++
++/*
++ * Detach sched domains from a group of cpus specified in cpu_map
++ * These cpus will now be attached to the NULL domain
++ */
++static void detach_destroy_domains(const struct cpumask *cpu_map)
++{
++	int i;
++
++	rcu_read_lock();
++	for_each_cpu(i, cpu_map)
++		cpu_attach_domain(NULL, &def_root_domain, i);
++	rcu_read_unlock();
++}
++
++/* handle null as "default" */
++static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
++			struct sched_domain_attr *new, int idx_new)
++{
++	struct sched_domain_attr tmp;
++
++	/* fast path */
++	if (!new && !cur)
++		return 1;
++
++	tmp = SD_ATTR_INIT;
++	return !memcmp(cur ? (cur + idx_cur) : &tmp,
++			new ? (new + idx_new) : &tmp,
++			sizeof(struct sched_domain_attr));
++}
++
++/*
++ * Partition sched domains as specified by the 'ndoms_new'
++ * cpumasks in the array doms_new[] of cpumasks. This compares
++ * doms_new[] to the current sched domain partitioning, doms_cur[].
++ * It destroys each deleted domain and builds each new domain.
++ *
++ * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'.
++ * The masks don't intersect (don't overlap.) We should setup one
++ * sched domain for each mask. CPUs not in any of the cpumasks will
++ * not be load balanced. If the same cpumask appears both in the
++ * current 'doms_cur' domains and in the new 'doms_new', we can leave
++ * it as it is.
++ *
++ * The passed in 'doms_new' should be allocated using
++ * alloc_sched_domains.  This routine takes ownership of it and will
++ * free_sched_domains it when done with it. If the caller failed the
++ * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1,
++ * and partition_sched_domains() will fallback to the single partition
++ * 'fallback_doms', it also forces the domains to be rebuilt.
++ *
++ * If doms_new == NULL it will be replaced with cpu_online_mask.
++ * ndoms_new == 0 is a special case for destroying existing domains,
++ * and it will not create the default domain.
++ *
++ * Call with hotplug lock held
++ */
++void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
++			     struct sched_domain_attr *dattr_new)
++{
++	int i, j, n;
++	int new_topology;
++
++	mutex_lock(&sched_domains_mutex);
++
++	/* always unregister in case we don't destroy any domains */
++	unregister_sched_domain_sysctl();
++
++	/* Let architecture update cpu core mappings. */
++	new_topology = arch_update_cpu_topology();
++
++	n = doms_new ? ndoms_new : 0;
++
++	/* Destroy deleted domains */
++	for (i = 0; i < ndoms_cur; i++) {
++		for (j = 0; j < n && !new_topology; j++) {
++			if (cpumask_equal(doms_cur[i], doms_new[j])
++			    && dattrs_equal(dattr_cur, i, dattr_new, j))
++				goto match1;
++		}
++		/* no match - a current sched domain not in new doms_new[] */
++		detach_destroy_domains(doms_cur[i]);
++match1:
++		;
++	}
++
++	if (doms_new == NULL) {
++		ndoms_cur = 0;
++		doms_new = &fallback_doms;
++		cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
++		WARN_ON_ONCE(dattr_new);
++	}
++
++	/* Build new domains */
++	for (i = 0; i < ndoms_new; i++) {
++		for (j = 0; j < ndoms_cur && !new_topology; j++) {
++			if (cpumask_equal(doms_new[i], doms_cur[j])
++			    && dattrs_equal(dattr_new, i, dattr_cur, j))
++				goto match2;
++		}
++		/* no match - add a new doms_new */
++		build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
++match2:
++		;
++	}
++
++	/* Remember the new sched domains */
++	if (doms_cur != &fallback_doms)
++		free_sched_domains(doms_cur, ndoms_cur);
++	kfree(dattr_cur);	/* kfree(NULL) is safe */
++	doms_cur = doms_new;
++	dattr_cur = dattr_new;
++	ndoms_cur = ndoms_new;
++
++	register_sched_domain_sysctl();
++
++	mutex_unlock(&sched_domains_mutex);
++}
++
++#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
++static void reinit_sched_domains(void)
++{
++	get_online_cpus();
++
++	/* Destroy domains first to force the rebuild */
++	partition_sched_domains(0, NULL, NULL);
++
++	rebuild_sched_domains();
++	put_online_cpus();
++}
++
++static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
++{
++	unsigned int level = 0;
++
++	if (sscanf(buf, "%u", &level) != 1)
++		return -EINVAL;
++
++	/*
++	 * level is always be positive so don't check for
++	 * level < POWERSAVINGS_BALANCE_NONE which is 0
++	 * What happens on 0 or 1 byte write,
++	 * need to check for count as well?
++	 */
++
++	if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS)
++		return -EINVAL;
++
++	if (smt)
++		sched_smt_power_savings = level;
++	else
++		sched_mc_power_savings = level;
++
++	reinit_sched_domains();
++
++	return count;
++}
++
++#ifdef CONFIG_SCHED_MC
++static ssize_t sched_mc_power_savings_show(struct device *dev,
++					   struct device_attribute *attr,
++					   char *buf)
++{
++	return sprintf(buf, "%u\n", sched_mc_power_savings);
++}
++static ssize_t sched_mc_power_savings_store(struct device *dev,
++					    struct device_attribute *attr,
++					    const char *buf, size_t count)
++{
++	return sched_power_savings_store(buf, count, 0);
++}
++static DEVICE_ATTR(sched_mc_power_savings, 0644,
++		   sched_mc_power_savings_show,
++		   sched_mc_power_savings_store);
++#endif
++
++#ifdef CONFIG_SCHED_SMT
++static ssize_t sched_smt_power_savings_show(struct device *dev,
++					    struct device_attribute *attr,
++					    char *buf)
++{
++	return sprintf(buf, "%u\n", sched_smt_power_savings);
++}
++static ssize_t sched_smt_power_savings_store(struct device *dev,
++					    struct device_attribute *attr,
++					     const char *buf, size_t count)
++{
++	return sched_power_savings_store(buf, count, 1);
++}
++static DEVICE_ATTR(sched_smt_power_savings, 0644,
++		   sched_smt_power_savings_show,
++		   sched_smt_power_savings_store);
++#endif
++
++int __init sched_create_sysfs_power_savings_entries(struct device *dev)
++{
++	int err = 0;
++
++#ifdef CONFIG_SCHED_SMT
++	if (smt_capable())
++		err = device_create_file(dev, &dev_attr_sched_smt_power_savings);
++#endif
++#ifdef CONFIG_SCHED_MC
++	if (!err && mc_capable())
++		err = device_create_file(dev, &dev_attr_sched_mc_power_savings);
++#endif
++	return err;
++}
++#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
++
++/*
++ * Update cpusets according to cpu_active mask.  If cpusets are
++ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
++ * around partition_sched_domains().
++ */
++static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
++			     void *hcpu)
++{
++	switch (action & ~CPU_TASKS_FROZEN) {
++	case CPU_ONLINE:
++	case CPU_DOWN_FAILED:
++		cpuset_update_active_cpus();
++		return NOTIFY_OK;
++	default:
++		return NOTIFY_DONE;
++	}
++}
++
++static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
++			       void *hcpu)
++{
++	switch (action & ~CPU_TASKS_FROZEN) {
++	case CPU_DOWN_PREPARE:
++		cpuset_update_active_cpus();
++		return NOTIFY_OK;
++	default:
++		return NOTIFY_DONE;
++	}
++}
++
++#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
++/*
++ * Cheaper version of the below functions in case support for SMT and MC is
++ * compiled in but CPUs have no siblings.
++ */
++static bool sole_cpu_idle(int cpu)
++{
++	return rq_idle(cpu_rq(cpu));
++}
++#endif
++#ifdef CONFIG_SCHED_SMT
++/* All this CPU's SMT siblings are idle */
++static bool siblings_cpu_idle(int cpu)
++{
++	return cpumask_subset(&(cpu_rq(cpu)->smt_siblings),
++			      &grq.cpu_idle_map);
++}
++#endif
++#ifdef CONFIG_SCHED_MC
++/* All this CPU's shared cache siblings are idle */
++static bool cache_cpu_idle(int cpu)
++{
++	return cpumask_subset(&(cpu_rq(cpu)->cache_siblings),
++			      &grq.cpu_idle_map);
++}
++#endif
++
++enum sched_domain_level {
++	SD_LV_NONE = 0,
++	SD_LV_SIBLING,
++	SD_LV_MC,
++	SD_LV_BOOK,
++	SD_LV_CPU,
++	SD_LV_NODE,
++	SD_LV_ALLNODES,
++	SD_LV_MAX
++};
++
++void __init sched_init_smp(void)
++{
++	struct sched_domain *sd;
++	int cpu;
++
++	cpumask_var_t non_isolated_cpus;
++
++	alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
++	alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
++
++	get_online_cpus();
++	mutex_lock(&sched_domains_mutex);
++	init_sched_domains(cpu_active_mask);
++	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
++	if (cpumask_empty(non_isolated_cpus))
++		cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
++	mutex_unlock(&sched_domains_mutex);
++	put_online_cpus();
++
++	hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
++	hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
++
++	/* Move init over to a non-isolated CPU */
++	if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
++		BUG();
++	free_cpumask_var(non_isolated_cpus);
++
++	grq_lock_irq();
++	/*
++	 * Set up the relative cache distance of each online cpu from each
++	 * other in a simple array for quick lookup. Locality is determined
++	 * by the closest sched_domain that CPUs are separated by. CPUs with
++	 * shared cache in SMT and MC are treated as local. Separate CPUs
++	 * (within the same package or physically) within the same node are
++	 * treated as not local. CPUs not even in the same domain (different
++	 * nodes) are treated as very distant.
++	 */
++	for_each_online_cpu(cpu) {
++		struct rq *rq = cpu_rq(cpu);
++		for_each_domain(cpu, sd) {
++			int locality, other_cpu;
++
++#ifdef CONFIG_SCHED_SMT
++			if (sd->level == SD_LV_SIBLING) {
++				for_each_cpu_mask(other_cpu, *sched_domain_span(sd))
++					cpumask_set_cpu(other_cpu, &rq->smt_siblings);
++			}
++#endif
++#ifdef CONFIG_SCHED_MC
++			if (sd->level == SD_LV_MC) {
++				for_each_cpu_mask(other_cpu, *sched_domain_span(sd))
++					cpumask_set_cpu(other_cpu, &rq->cache_siblings);
++			}
++#endif
++			if (sd->level <= SD_LV_SIBLING)
++				locality = 1;
++			else if (sd->level <= SD_LV_MC)
++				locality = 2;
++			else if (sd->level <= SD_LV_NODE)
++				locality = 3;
++			else
++				continue;
++
++			for_each_cpu_mask(other_cpu, *sched_domain_span(sd)) {
++				if (locality < rq->cpu_locality[other_cpu])
++					rq->cpu_locality[other_cpu] = locality;
++			}
++		}
++
++/*
++		 * Each runqueue has its own function in case it doesn't have
++		 * siblings of its own allowing mixed topologies.
++		 */
++#ifdef CONFIG_SCHED_SMT
++		if (cpus_weight(rq->smt_siblings) > 1)
++			rq->siblings_idle = siblings_cpu_idle;
++#endif
++#ifdef CONFIG_SCHED_MC
++		if (cpus_weight(rq->cache_siblings) > 1)
++			rq->cache_idle = cache_cpu_idle;
++#endif
++	}
++	grq_unlock_irq();
++}
++#else
++void __init sched_init_smp(void)
++{
++}
++#endif /* CONFIG_SMP */
++
++unsigned int sysctl_timer_migration = 1;
++
++int in_sched_functions(unsigned long addr)
++{
++	return in_lock_functions(addr) ||
++		(addr >= (unsigned long)__sched_text_start
++		&& addr < (unsigned long)__sched_text_end);
++}
++
++void __init sched_init(void)
++{
++	int i;
++	struct rq *rq;
++
++	prio_ratios[0] = 128;
++	for (i = 1 ; i < PRIO_RANGE ; i++)
++		prio_ratios[i] = prio_ratios[i - 1] * 11 / 10;
++
++	raw_spin_lock_init(&grq.lock);
++	grq.nr_running = grq.nr_uninterruptible = grq.nr_switches = 0;
++	grq.niffies = 0;
++	grq.last_jiffy = jiffies;
++	raw_spin_lock_init(&grq.iso_lock);
++	grq.iso_ticks = 0;
++	grq.iso_refractory = false;
++	grq.noc = 1;
++#ifdef CONFIG_SMP
++	init_defrootdomain();
++	grq.qnr = grq.idle_cpus = 0;
++	cpumask_clear(&grq.cpu_idle_map);
++#else
++	uprq = &per_cpu(runqueues, 0);
++#endif
++	for_each_possible_cpu(i) {
++		rq = cpu_rq(i);
++		rq->user_pc = rq->nice_pc = rq->softirq_pc = rq->system_pc =
++			      rq->iowait_pc = rq->idle_pc = 0;
++		rq->dither = false;
++#ifdef CONFIG_SMP
++		rq->sticky_task = NULL;
++		rq->last_niffy = 0;
++		rq->sd = NULL;
++		rq->rd = NULL;
++		rq->online = false;
++		rq->cpu = i;
++		rq_attach_root(rq, &def_root_domain);
++#endif
++		atomic_set(&rq->nr_iowait, 0);
++	}
++
++#ifdef CONFIG_SMP
++	nr_cpu_ids = i;
++	/*
++	 * Set the base locality for cpu cache distance calculation to
++	 * "distant" (3). Make sure the distance from a CPU to itself is 0.
++	 */
++	for_each_possible_cpu(i) {
++		int j;
++
++		rq = cpu_rq(i);
++#ifdef CONFIG_SCHED_SMT
++		cpumask_clear(&rq->smt_siblings);
++		cpumask_set_cpu(i, &rq->smt_siblings);
++		rq->siblings_idle = sole_cpu_idle;
++		cpumask_set_cpu(i, &rq->smt_siblings);
++#endif
++#ifdef CONFIG_SCHED_MC
++		cpumask_clear(&rq->cache_siblings);
++		cpumask_set_cpu(i, &rq->cache_siblings);
++		rq->cache_idle = sole_cpu_idle;
++		cpumask_set_cpu(i, &rq->cache_siblings);
++#endif
++		rq->cpu_locality = kmalloc(nr_cpu_ids * sizeof(int *), GFP_ATOMIC);
++		for_each_possible_cpu(j) {
++			if (i == j)
++				rq->cpu_locality[j] = 0;
++			else
++				rq->cpu_locality[j] = 4;
++		}
++	}
++#endif
++
++	for (i = 0; i < PRIO_LIMIT; i++)
++		INIT_LIST_HEAD(grq.queue + i);
++	/* delimiter for bitsearch */
++	__set_bit(PRIO_LIMIT, grq.prio_bitmap);
++
++#ifdef CONFIG_PREEMPT_NOTIFIERS
++	INIT_HLIST_HEAD(&init_task.preempt_notifiers);
++#endif
++
++#ifdef CONFIG_RT_MUTEXES
++	plist_head_init(&init_task.pi_waiters);
++#endif
++
++	/*
++	 * The boot idle thread does lazy MMU switching as well:
++	 */
++	atomic_inc(&init_mm.mm_count);
++	enter_lazy_tlb(&init_mm, current);
++
++	/*
++	 * Make us the idle thread. Technically, schedule() should not be
++	 * called from this thread, however somewhere below it might be,
++	 * but because we are the idle thread, we just pick up running again
++	 * when this runqueue becomes "idle".
++	 */
++	init_idle(current, smp_processor_id());
++
++#ifdef CONFIG_SMP
++	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
++	/* May be allocated at isolcpus cmdline parse time */
++	if (cpu_isolated_map == NULL)
++		zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
++#endif /* SMP */
++}
++
++#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
++static inline int preempt_count_equals(int preempt_offset)
++{
++	int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
++
++	return (nested == preempt_offset);
++}
++
++void __might_sleep(const char *file, int line, int preempt_offset)
++{
++	static unsigned long prev_jiffy;	/* ratelimiting */
++
++	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
++	if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
++	    system_state != SYSTEM_RUNNING || oops_in_progress)
++		return;
++	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
++		return;
++	prev_jiffy = jiffies;
++
++	printk(KERN_ERR
++		"BUG: sleeping function called from invalid context at %s:%d\n",
++			file, line);
++	printk(KERN_ERR
++		"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
++			in_atomic(), irqs_disabled(),
++			current->pid, current->comm);
++
++	debug_show_held_locks(current);
++	if (irqs_disabled())
++		print_irqtrace_events(current);
++	dump_stack();
++}
++EXPORT_SYMBOL(__might_sleep);
++#endif
++
++#ifdef CONFIG_MAGIC_SYSRQ
++void normalize_rt_tasks(void)
++{
++	struct task_struct *g, *p;
++	unsigned long flags;
++	struct rq *rq;
++	int queued;
++
++	read_lock_irq(&tasklist_lock);
++
++	do_each_thread(g, p) {
++		if (!rt_task(p) && !iso_task(p))
++			continue;
++
++		raw_spin_lock_irqsave(&p->pi_lock, flags);
++		rq = __task_grq_lock(p);
++
++		queued = task_queued(p);
++		if (queued)
++			dequeue_task(p);
++		__setscheduler(p, rq, SCHED_NORMAL, 0);
++		if (queued) {
++			enqueue_task(p);
++			try_preempt(p, rq);
++		}
++
++		__task_grq_unlock();
++		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++	} while_each_thread(g, p);
++
++	read_unlock_irq(&tasklist_lock);
++}
++#endif /* CONFIG_MAGIC_SYSRQ */
++
++#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
++/*
++ * These functions are only useful for the IA64 MCA handling, or kdb.
++ *
++ * They can only be called when the whole system has been
++ * stopped - every CPU needs to be quiescent, and no scheduling
++ * activity can take place. Using them for anything else would
++ * be a serious bug, and as a result, they aren't even visible
++ * under any other configuration.
++ */
++
++/**
++ * curr_task - return the current task for a given cpu.
++ * @cpu: the processor in question.
++ *
++ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
++ */
++struct task_struct *curr_task(int cpu)
++{
++	return cpu_curr(cpu);
++}
++
++#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
++
++#ifdef CONFIG_IA64
++/**
++ * set_curr_task - set the current task for a given cpu.
++ * @cpu: the processor in question.
++ * @p: the task pointer to set.
++ *
++ * Description: This function must only be used when non-maskable interrupts
++ * are serviced on a separate stack.  It allows the architecture to switch the
++ * notion of the current task on a cpu in a non-blocking manner.  This function
++ * must be called with all CPU's synchronised, and interrupts disabled, the
++ * and caller must save the original value of the current task (see
++ * curr_task() above) and restore that value before reenabling interrupts and
++ * re-starting the system.
++ *
++ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
++ */
++void set_curr_task(int cpu, struct task_struct *p)
++{
++	cpu_curr(cpu) = p;
++}
++
++#endif
++
++/*
++ * Use precise platform statistics if available:
++ */
++#ifdef CONFIG_VIRT_CPU_ACCOUNTING
++void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
++{
++	*ut = p->utime;
++	*st = p->stime;
++}
++
++void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
++{
++	struct task_cputime cputime;
++
++	thread_group_cputime(p, &cputime);
++
++	*ut = cputime.utime;
++	*st = cputime.stime;
++}
++#else
++
++void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
++{
++	cputime_t rtime, utime = p->utime, total = utime + p->stime;
++
++	rtime = nsecs_to_cputime(p->sched_time);
++
++	if (total) {
++		u64 temp;
++
++		temp = (u64)(rtime * utime);
++		do_div(temp, total);
++		utime = (cputime_t)temp;
++	} else
++		utime = rtime;
++
++	/*
++	 * Compare with previous values, to keep monotonicity:
++	 */
++	p->prev_utime = max(p->prev_utime, utime);
++	p->prev_stime = max(p->prev_stime, (rtime - p->prev_utime));
++
++	*ut = p->prev_utime;
++	*st = p->prev_stime;
++}
++
++/*
++ * Must be called with siglock held.
++ */
++void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
++{
++	struct signal_struct *sig = p->signal;
++	struct task_cputime cputime;
++	cputime_t rtime, utime, total;
++
++	thread_group_cputime(p, &cputime);
++
++	total = cputime.utime + cputime.stime;
++	rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
++
++	if (total) {
++		u64 temp;
++
++		temp = (u64)(rtime * cputime.utime);
++		do_div(temp, total);
++		utime = (cputime_t)temp;
++	} else
++		utime = rtime;
++
++	sig->prev_utime = max(sig->prev_utime, utime);
++	sig->prev_stime = max(sig->prev_stime, (rtime - sig->prev_utime));
++
++	*ut = sig->prev_utime;
++	*st = sig->prev_stime;
++}
++#endif
++
++inline cputime_t task_gtime(struct task_struct *p)
++{
++	return p->gtime;
++}
++
++void __cpuinit init_idle_bootup_task(struct task_struct *idle)
++{}
++
++#ifdef CONFIG_SCHED_DEBUG
++void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
++{}
++
++void proc_sched_set_task(struct task_struct *p)
++{}
++#endif
++
++#ifdef CONFIG_SMP
++unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
++{
++	return SCHED_LOAD_SCALE;
++}
++
++unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
++{
++	unsigned long weight = cpumask_weight(sched_domain_span(sd));
++	unsigned long smt_gain = sd->smt_gain;
++
++	smt_gain /= weight;
++
++	return smt_gain;
++}
++#endif
+Index: linux-3.3-ck1/kernel/sched/Makefile
+===================================================================
+--- linux-3.3-ck1.orig/kernel/sched/Makefile	2012-03-24 19:30:00.014420399 +1100
++++ linux-3.3-ck1/kernel/sched/Makefile	2012-03-24 19:30:29.047925897 +1100
+@@ -11,10 +11,14 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER
+ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
+ endif
+ 
++ifdef CONFIG_SCHED_BFS
++obj-y += bfs.o clock.o
++else
+ obj-y += core.o clock.o idle_task.o fair.o rt.o stop_task.o
+-obj-$(CONFIG_SMP) += cpupri.o
+ obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
+-obj-$(CONFIG_SCHEDSTATS) += stats.o
+ obj-$(CONFIG_SCHED_DEBUG) += debug.o
++endif
++obj-$(CONFIG_SMP) += cpupri.o
++obj-$(CONFIG_SCHEDSTATS) += stats.o
+ 
+ 
+Index: linux-3.3-ck1/mm/vmscan.c
+===================================================================
+--- linux-3.3-ck1.orig/mm/vmscan.c	2012-03-24 19:29:59.987419928 +1100
++++ linux-3.3-ck1/mm/vmscan.c	2012-03-24 19:34:53.640769520 +1100
+@@ -153,7 +153,7 @@ struct mem_cgroup_zone {
+ /*
+  * From 0 .. 100.  Higher means more swappy.
+  */
+-int vm_swappiness = 60;
++int vm_swappiness = 10;
+ long vm_total_pages;	/* The total number of pages which the VM controls */
+ 
+ static LIST_HEAD(shrinker_list);
+@@ -999,7 +999,7 @@ cull_mlocked:
+ 
+ activate_locked:
+ 		/* Not a candidate for swapping, so reclaim swap space. */
+-		if (PageSwapCache(page) && vm_swap_full())
++		if (PageSwapCache(page))
+ 			try_to_free_swap(page);
+ 		VM_BUG_ON(PageActive(page));
+ 		SetPageActive(page);
+@@ -2206,6 +2206,35 @@ static inline bool compaction_ready(stru
+ }
+ 
+ /*
++ * Helper functions to adjust nice level of kswapd, based on the priority of
++ * the task (p) that called it. If it is already higher priority we do not
++ * demote its nice level since it is still working on behalf of a higher
++ * priority task. With kernel threads we leave it at nice 0.
++ *
++ * We don't ever run kswapd real time, so if a real time task calls kswapd we
++ * set it to highest SCHED_NORMAL priority.
++ */
++static inline int effective_sc_prio(struct task_struct *p)
++{
++	if (likely(p->mm)) {
++		if (rt_task(p))
++			return -20;
++		if (p->policy == SCHED_IDLEPRIO)
++			return 19;
++		return task_nice(p);
++	}
++	return 0;
++}
++
++static void set_kswapd_nice(struct task_struct *kswapd, int active)
++{
++	long nice = effective_sc_prio(current);
++
++	if (task_nice(kswapd) > nice || !active)
++		set_user_nice(kswapd, nice);
++}
++
++/*
+  * This is the direct reclaim path, for page-allocating processes.  We only
+  * try to reclaim pages from zones which will satisfy the caller's allocation
+  * request.
+@@ -3090,6 +3119,7 @@ static int kswapd(void *p)
+ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
+ {
+ 	pg_data_t *pgdat;
++	int active;
+ 
+ 	if (!populated_zone(zone))
+ 		return;
+@@ -3101,7 +3131,9 @@ void wakeup_kswapd(struct zone *zone, in
+ 		pgdat->kswapd_max_order = order;
+ 		pgdat->classzone_idx = min(pgdat->classzone_idx, classzone_idx);
+ 	}
+-	if (!waitqueue_active(&pgdat->kswapd_wait))
++	active = waitqueue_active(&pgdat->kswapd_wait);
++	set_kswapd_nice(pgdat->kswapd, active);
++	if (!active)
+ 		return;
+ 	if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
+ 		return;
+Index: linux-3.3-ck1/include/linux/swap.h
+===================================================================
+--- linux-3.3-ck1.orig/include/linux/swap.h	2012-03-24 19:29:59.953419335 +1100
++++ linux-3.3-ck1/include/linux/swap.h	2012-03-24 19:30:29.779938643 +1100
+@@ -201,7 +201,7 @@ struct swap_list_t {
+ 	int next;	/* swapfile to be used next */
+ };
+ 
+-/* Swap 50% full? Release swapcache more aggressively.. */
++/* Swap 50% full? */
+ #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
+ 
+ /* linux/mm/page_alloc.c */
+@@ -351,9 +351,10 @@ extern void grab_swap_token(struct mm_st
+ extern void __put_swap_token(struct mm_struct *);
+ extern void disable_swap_token(struct mem_cgroup *memcg);
+ 
++/* Only allow swap token to have effect if swap is full */
+ static inline int has_swap_token(struct mm_struct *mm)
+ {
+-	return (mm == swap_token_mm);
++	return (mm == swap_token_mm && vm_swap_full());
+ }
+ 
+ static inline void put_swap_token(struct mm_struct *mm)
+Index: linux-3.3-ck1/mm/memory.c
+===================================================================
+--- linux-3.3-ck1.orig/mm/memory.c	2012-03-24 19:29:59.916418690 +1100
++++ linux-3.3-ck1/mm/memory.c	2012-03-24 19:30:29.780938660 +1100
+@@ -3003,7 +3003,7 @@ static int do_swap_page(struct mm_struct
+ 	mem_cgroup_commit_charge_swapin(page, ptr);
+ 
+ 	swap_free(entry);
+-	if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
++	if ((vma->vm_flags & VM_LOCKED) || PageMlocked(page))
+ 		try_to_free_swap(page);
+ 	unlock_page(page);
+ 	if (swapcache) {
+Index: linux-3.3-ck1/mm/swapfile.c
+===================================================================
+--- linux-3.3-ck1.orig/mm/swapfile.c	2012-03-24 19:29:59.915418673 +1100
++++ linux-3.3-ck1/mm/swapfile.c	2012-03-24 19:30:29.780938660 +1100
+@@ -288,7 +288,7 @@ checks:
+ 		scan_base = offset = si->lowest_bit;
+ 
+ 	/* reuse swap entry of cache-only swap if not busy. */
+-	if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
++	if (si->swap_map[offset] == SWAP_HAS_CACHE) {
+ 		int swap_was_freed;
+ 		spin_unlock(&swap_lock);
+ 		swap_was_freed = __try_to_reclaim_swap(si, offset);
+@@ -377,7 +377,7 @@ scan:
+ 			spin_lock(&swap_lock);
+ 			goto checks;
+ 		}
+-		if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
++		if (si->swap_map[offset] == SWAP_HAS_CACHE) {
+ 			spin_lock(&swap_lock);
+ 			goto checks;
+ 		}
+@@ -392,7 +392,7 @@ scan:
+ 			spin_lock(&swap_lock);
+ 			goto checks;
+ 		}
+-		if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
++		if (si->swap_map[offset] == SWAP_HAS_CACHE) {
+ 			spin_lock(&swap_lock);
+ 			goto checks;
+ 		}
+@@ -706,8 +706,7 @@ int free_swap_and_cache(swp_entry_t entr
+ 		 * Not mapped elsewhere, or swap space full? Free it!
+ 		 * Also recheck PageSwapCache now page is locked (above).
+ 		 */
+-		if (PageSwapCache(page) && !PageWriteback(page) &&
+-				(!page_mapped(page) || vm_swap_full())) {
++		if (PageSwapCache(page) && !PageWriteback(page)) {
+ 			delete_from_swap_cache(page);
+ 			SetPageDirty(page);
+ 		}
+Index: linux-3.3-ck1/mm/page-writeback.c
+===================================================================
+--- linux-3.3-ck1.orig/mm/page-writeback.c	2012-03-24 19:31:32.335037467 +1100
++++ linux-3.3-ck1/mm/page-writeback.c	2012-03-24 19:34:53.649769687 +1100
+@@ -65,7 +65,7 @@ static long ratelimit_pages = 32;
+ /*
+  * Start background writeback (via writeback threads) at this percentage
+  */
+-int dirty_background_ratio = 10;
++int dirty_background_ratio = 1;
+ 
+ /*
+  * dirty_background_bytes starts at 0 (disabled) so that it is a function of
+@@ -82,7 +82,7 @@ int vm_highmem_is_dirtyable;
+ /*
+  * The generator of dirty data starts writeback at this percentage
+  */
+-int vm_dirty_ratio = 20;
++int vm_dirty_ratio = 1;
+ 
+ /*
+  * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
+Index: linux-3.3-ck1/kernel/Kconfig.hz
+===================================================================
+--- linux-3.3-ck1.orig/kernel/Kconfig.hz	2012-03-24 19:31:32.301036837 +1100
++++ linux-3.3-ck1/kernel/Kconfig.hz	2012-03-24 19:34:53.685770353 +1100
+@@ -4,7 +4,7 @@
+ 
+ choice
+ 	prompt "Timer frequency"
+-	default HZ_250
++	default HZ_1000
+ 	help
+ 	 Allows the configuration of the timer frequency. It is customary
+ 	 to have the timer interrupt run at 1000 Hz but 100 Hz may be more
+@@ -23,13 +23,14 @@ choice
+ 	  with lots of processors that may show reduced performance if
+ 	  too many timer interrupts are occurring.
+ 
+-	config HZ_250
++	config HZ_250_NODEFAULT
+ 		bool "250 HZ"
+ 	help
+-	 250 Hz is a good compromise choice allowing server performance
+-	 while also showing good interactive responsiveness even
+-	 on SMP and NUMA systems. If you are going to be using NTSC video
+-	 or multimedia, selected 300Hz instead.
++	 250 HZ is a lousy compromise choice allowing server interactivity
++	 while also showing desktop throughput and no extra power saving on
++	 laptops. No good for anything.
++
++	 Recommend 100 or 1000 instead.
+ 
+ 	config HZ_300
+ 		bool "300 HZ"
+@@ -43,16 +44,82 @@ choice
+ 		bool "1000 HZ"
+ 	help
+ 	 1000 Hz is the preferred choice for desktop systems and other
+-	 systems requiring fast interactive responses to events.
++	 systems requiring fast interactive responses to events. Laptops
++	 can also benefit from this choice without sacrificing battery life
++	 if dynticks is also enabled.
++
++	config HZ_1500
++		bool "1500 HZ"
++	help
++	 1500 Hz is an insane value to use to run broken software that is Hz
++	 limited.
++
++	 Being over 1000, driver breakage is likely.
++
++	config HZ_2000
++		bool "2000 HZ"
++	help
++	 2000 Hz is an insane value to use to run broken software that is Hz
++	 limited.
++
++	 Being over 1000, driver breakage is likely.
++
++	config HZ_3000
++		bool "3000 HZ"
++	help
++	 3000 Hz is an insane value to use to run broken software that is Hz
++	 limited.
++
++	 Being over 1000, driver breakage is likely.
++
++	config HZ_4000
++		bool "4000 HZ"
++	help
++	 4000 Hz is an insane value to use to run broken software that is Hz
++	 limited.
++
++	 Being over 1000, driver breakage is likely.
++
++	config HZ_5000
++		bool "5000 HZ"
++	help
++	 5000 Hz is an obscene value to use to run broken software that is Hz
++	 limited.
++
++	 Being over 1000, driver breakage is likely.
++
++	config HZ_7500
++		bool "7500 HZ"
++	help
++	 7500 Hz is an obscene value to use to run broken software that is Hz
++	 limited.
++
++	 Being over 1000, driver breakage is likely.
++
++	config HZ_10000
++		bool "10000 HZ"
++	help
++	 10000 Hz is an obscene value to use to run broken software that is Hz
++	 limited.
++
++	 Being over 1000, driver breakage is likely.
++
+ 
+ endchoice
+ 
+ config HZ
+ 	int
+ 	default 100 if HZ_100
+-	default 250 if HZ_250
++	default 250 if HZ_250_NODEFAULT
+ 	default 300 if HZ_300
+ 	default 1000 if HZ_1000
++	default 1500 if HZ_1500
++	default 2000 if HZ_2000
++	default 3000 if HZ_3000
++	default 4000 if HZ_4000
++	default 5000 if HZ_5000
++	default 7500 if HZ_7500
++	default 10000 if HZ_10000
+ 
+ config SCHED_HRTICK
+ 	def_bool HIGH_RES_TIMERS && (!SMP || USE_GENERIC_SMP_HELPERS)
+Index: linux-3.3-ck1/arch/x86/kernel/cpu/proc.c
+===================================================================
+--- linux-3.3-ck1.orig/arch/x86/kernel/cpu/proc.c	2012-03-24 19:31:32.230035518 +1100
++++ linux-3.3-ck1/arch/x86/kernel/cpu/proc.c	2012-03-24 19:34:53.684770335 +1100
+@@ -109,7 +109,7 @@ static int show_cpuinfo(struct seq_file
+ 
+ 	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
+ 		   c->loops_per_jiffy/(500000/HZ),
+-		   (c->loops_per_jiffy/(5000/HZ)) % 100);
++		   (c->loops_per_jiffy * 10 /(50000/HZ)) % 100);
+ 
+ #ifdef CONFIG_X86_64
+ 	if (c->x86_tlbsize > 0)
+Index: linux-3.3-ck1/arch/x86/kernel/smpboot.c
+===================================================================
+--- linux-3.3-ck1.orig/arch/x86/kernel/smpboot.c	2012-03-24 19:31:32.230035518 +1100
++++ linux-3.3-ck1/arch/x86/kernel/smpboot.c	2012-03-24 19:34:53.685770353 +1100
+@@ -436,7 +436,7 @@ static void impress_friends(void)
+ 		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+ 		num_online_cpus(),
+ 		bogosum/(500000/HZ),
+-		(bogosum/(5000/HZ))%100);
++		(bogosum * 10/(50000/HZ))%100);
+ 
+ 	pr_debug("Before bogocount - setting activated=1.\n");
+ }
+Index: linux-3.3-ck1/include/linux/nfsd/stats.h
+===================================================================
+--- linux-3.3-ck1.orig/include/linux/nfsd/stats.h	2012-03-24 19:31:32.229035499 +1100
++++ linux-3.3-ck1/include/linux/nfsd/stats.h	2012-03-24 19:34:53.685770353 +1100
+@@ -11,8 +11,8 @@
+ 
+ #include <linux/nfs4.h>
+ 
+-/* thread usage wraps very million seconds (approx one fortnight) */
+-#define	NFSD_USAGE_WRAP	(HZ*1000000)
++/* thread usage wraps every one hundred thousand seconds (approx one day) */
++#define	NFSD_USAGE_WRAP	(HZ*100000)
+ 
+ #ifdef __KERNEL__
+ 
+Index: linux-3.3-ck1/include/net/inet_timewait_sock.h
+===================================================================
+--- linux-3.3-ck1.orig/include/net/inet_timewait_sock.h	2012-03-24 19:31:32.229035499 +1100
++++ linux-3.3-ck1/include/net/inet_timewait_sock.h	2012-03-24 19:34:53.685770353 +1100
+@@ -38,8 +38,8 @@ struct inet_hashinfo;
+  * If time > 4sec, it is "slow" path, no recycling is required,
+  * so that we select tick to get range about 4 seconds.
+  */
+-#if HZ <= 16 || HZ > 4096
+-# error Unsupported: HZ <= 16 or HZ > 4096
++#if HZ <= 16 || HZ > 16384
++# error Unsupported: HZ <= 16 or HZ > 16384
+ #elif HZ <= 32
+ # define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+ #elif HZ <= 64
+@@ -54,8 +54,12 @@ struct inet_hashinfo;
+ # define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+ #elif HZ <= 2048
+ # define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+-#else
++#elif HZ <= 4096
+ # define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
++#elif HZ <= 8192
++# define INET_TWDR_RECYCLE_TICK (13 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
++#else
++# define INET_TWDR_RECYCLE_TICK (14 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
+ #endif
+ 
+ /* TIME_WAIT reaping mechanism. */
+Index: linux-3.3-ck1/init/calibrate.c
+===================================================================
+--- linux-3.3-ck1.orig/init/calibrate.c	2012-03-24 19:31:32.230035518 +1100
++++ linux-3.3-ck1/init/calibrate.c	2012-03-24 19:34:53.685770353 +1100
+@@ -293,7 +293,7 @@ void __cpuinit calibrate_delay(void)
+ 	if (!printed)
+ 		pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
+ 			lpj/(500000/HZ),
+-			(lpj/(5000/HZ)) % 100, lpj);
++			(lpj * 10 /(50000 / HZ)) % 100, lpj);
+ 
+ 	loops_per_jiffy = lpj;
+ 	printed = true;
+Index: linux-3.3-ck1/kernel/Kconfig.preempt
+===================================================================
+--- linux-3.3-ck1.orig/kernel/Kconfig.preempt	2012-03-24 19:31:32.213035203 +1100
++++ linux-3.3-ck1/kernel/Kconfig.preempt	2012-03-24 19:34:53.694770519 +1100
+@@ -1,7 +1,7 @@
+ 
+ choice
+ 	prompt "Preemption Model"
+-	default PREEMPT_NONE
++	default PREEMPT
+ 
+ config PREEMPT_NONE
+ 	bool "No Forced Preemption (Server)"
+@@ -17,7 +17,7 @@ config PREEMPT_NONE
+ 	  latencies.
+ 
+ config PREEMPT_VOLUNTARY
+-	bool "Voluntary Kernel Preemption (Desktop)"
++	bool "Voluntary Kernel Preemption (Nothing)"
+ 	help
+ 	  This option reduces the latency of the kernel by adding more
+ 	  "explicit preemption points" to the kernel code. These new
+@@ -31,7 +31,8 @@ config PREEMPT_VOLUNTARY
+ 	  applications to run more 'smoothly' even when the system is
+ 	  under load.
+ 
+-	  Select this if you are building a kernel for a desktop system.
++	  Select this for no system in particular (choose Preemptible
++	  instead on a desktop if you know what's good for you).
+ 
+ config PREEMPT
+ 	bool "Preemptible Kernel (Low-Latency Desktop)"
+Index: linux-3.3-ck1/Makefile
+===================================================================
+--- linux-3.3-ck1.orig/Makefile	2012-03-24 19:31:32.197034905 +1100
++++ linux-3.3-ck1/Makefile	2012-03-24 19:34:53.703770687 +1100
+@@ -10,6 +10,10 @@ NAME = Saber-toothed Squirrel
+ # Comments in this file are targeted only to the developer, do not
+ # expect to learn how to build the kernel reading this file.
+ 
++CKVERSION = -ck1
++CKNAME = BFS Powered
++EXTRAVERSION := $(EXTRAVERSION)$(CKVERSION)
++
+ # Do not:
+ # o  use make's built-in rules and variables
+ #    (this increases performance and avoids hard-to-debug behaviour);
diff --git a/3.3.8/3rd-3rdparty-1.0-tree.patch b/3.3.8/3rd-3rdparty-1.0-tree.patch
new file mode 100644
index 0000000..2a6ed72
--- /dev/null
+++ b/3.3.8/3rd-3rdparty-1.0-tree.patch
@@ -0,0 +1,181 @@
+
+ 3rdparty/mkbuild.pl        |   92 +++++++++++++++++++++++++++++++++++++++++++++
+ Documentation/3rdparty.txt |   76 +++++++++++++++++++++++++++++++++++++
+ 2 files changed, 168 insertions(+)
+
+diff -Nurp linux-2.6.37/3rdparty/mkbuild.pl 3rdparty/mkbuild.pl
+--- linux-2.6.37/3rdparty/mkbuild.pl	1970-01-01 02:00:00.000000000 +0200
++++ 3rdparty/mkbuild.pl	2004-04-23 14:59:03.000000000 +0300
+@@ -0,0 +1,92 @@
++#!/usr/bin/perl -w
++#
++# Version 1.0
++#
++# Copyright 2001 Jeff Garzik <jgarzik@mandrakesoft.com>
++# Copyright 2002 Juan Quintela <quintela@mandrakesoft.com>
++# Copyright 2003 Nicolas Planel <nplanel@mandrakesoft.com>
++#
++# This software may be used and distributed according to the terms
++# of the GNU General Public License, incorporated herein by reference.
++#
++#
++# Run "mkbuild.pl"
++#
++# This program generates the following files
++#	Makefile
++#	Makefile.drivers
++#	Config.in
++# using the information in the subdirs of this directory.
++#
++# subdirs need to have:
++# 	a Config.in file
++#	a Makefile with a O_TARGET/L_TARGET targets
++#	The config.in should set a CONFIG_<module_dir_name> to m/y.
++
++use strict;
++
++opendir(THISDIR, ".");
++# get dirs without . and .. garbage
++my (@modules) = grep(!/\.\.?$/,grep(-d, readdir(THISDIR)));
++closedir(THISDIR);
++
++generate_kconfig(@modules);
++generate_makefile(@modules);
++exit(0);
++
++##########################################################################
++
++sub generate_makefile {
++    my (@modules) = @_;
++
++    local *F;
++    open F, "> Makefile" or die "Cannot create new Makefile: $!\n";
++    print F <<'EOM';
++#
++# THIS IS AN AUTOMATICALLY GENERATED FILE.  DO NOT EDIT.
++#
++
++EOM
++    printf F "obj- := 3rdparty.o  # Dummy rule to force built-in.o to be made\n";
++    printf F "obj-\$(%s) += %s\n", to_CONFIG($_), $_ . '/' foreach @modules;
++}
++
++sub generate_kconfig {
++    my (@modules) = @_;
++
++    local *F;
++    open F, "> Kconfig" or die "Cannot create Kconfig: $!\n";
++    print F <<"EOM";
++#
++# THIS IS AN AUTOMATICALLY GENERATED FILE.  DO NOT EDIT.
++#
++
++menu "Unofficial 3rd party kernel additions"
++
++EOM
++	
++    foreach (@modules) {
++	die "No Kconfig in $_.\n" if ! -r "$_/Kconfig";
++	print F "source 3rdparty/$_/Kconfig\n";
++    }
++    print F "\n\nendmenu\n";
++}
++
++sub to_CONFIG {
++    local $_ = $_[0];
++    tr/a-z/A-Z/;
++    s/[\-\. ]/_/g;
++    "CONFIG_$_";
++}
++
++sub find_target {
++	my ($module_dir) = @_;
++
++	local *F;
++	open(F, "$module_dir/Makefile") or die "$module_dir/Makefile: $!\n";
++	while (<F>) {
++		chomp;
++		return $1 if (/[LO]_TARGET.*:=\s+(\S+)/);
++	}
++}
++
+diff -Nurp linux-2.6.37/Documentation/3rdparty.txt Documentation/3rdparty.txt
+--- linux-2.6.37/Documentation/3rdparty.txt	1970-01-01 02:00:00.000000000 +0200
++++ Documentation/3rdparty.txt	2003-11-22 01:07:26.000000000 +0200
+@@ -0,0 +1,76 @@
++
++Third-Party Kernel Source Module Support, or
++an easy way to add modules to your kernel build.
++
++
++
++Vendors quite often add additional drivers and features to the kernel
++which require nothing more than modifying Kconfig, Makefile, and
++adding one or more files to a sub-directory.  As a single discrete task,
++this is not a problem.  However, using patches to add modules to the
++kernel very often results in patch conflicts, resulting in needless time
++wastage as developers regenerate an otherwise working kernel patch.
++
++This is designed as a solution to these problems.  It is NOT designed as
++a replacement for the kernel build system, but merely as a tool for
++vendors and system administrators to ease the pain of patch management.
++
++The key feature of this system is the distinct lack of patches.  Drivers
++are installed via unpacking a tarball.
++
++
++
++Adding a directory to the build (usually from a tarball)
++--------------------------------------------------------
++If a directory exists inside the 3rdparty sub-directory that contains a
++proper Makefile, it can be added to the build.  It also needs a
++Kconfig file.
++
++	cd /usr/src/linux-2.4.3/3rdparty
++	bzcat /tmp/my-driver2.tar.bz2 | tar xf - # creates "my2" dir
++
++
++Limitations
++-----------
++There are some limitations to this system.  This system is only
++designed to support a very common case.  If you find yourself running
++into limitations (kernel build experts can spot them right off),
++then you should probably be patching the kernel instead of using
++mkbuild.pl for that particular module.
++
++FIXME: actually list the limitations
++
++
++
++Other notes
++-----------
++Link order is controlled by the order of mkbuild.pl executions.
++
++"make mrproper" will erase Makefile.meta, and empty Kconfig, Makefile,
++and Makefile.drivers.
++
++IMPORTANT NOTE: Because this feature modifies the kernel's makefiles and
++configuration system, you MUST complete all mkbuild.pl runs before
++running any "make" command.
++
++Building in the 3rdparty dir
++----------------------------
++
++If you use modules that:
++	- are contained in one subdir with the name of the module
++	- has a Makefile
++	- has a Kconfig file
++
++The system calls the ./mkbuild.pl script.  It will search for
++subdirectories, and will try to build each of them as a module.
++Things to note:
++
++       The dependencies will be done in a module called:
++
++       3rdparty/<module_dir_name>/<module_name>
++
++depending of CONFIG_<module_name_in_uppercase>.
++
++<module_name> is the value of O_TARGET/L_TARGET.
++
++
diff --git a/3.3.8/3rd-3rdparty-button_hotplug-0.4.1.patch b/3.3.8/3rd-3rdparty-button_hotplug-0.4.1.patch
new file mode 100644
index 0000000..a7b4a15
--- /dev/null
+++ b/3.3.8/3rd-3rdparty-button_hotplug-0.4.1.patch
@@ -0,0 +1,372 @@
+Submitted By: Mario Fetka (mario dot fetka at gmail dot com)
+Date: 2012-11-18
+Initial Package Version: 3.2.33
+Origin: openwtr.org packages/system/button-hotplug
+Upstream Status: unknown
+Description: create uevents from button usage
+
+diff -Naur linux-3.2.33-go.orig/3rdparty/button_hotplug/Kconfig 3rdparty/button_hotplug/Kconfig
+--- linux-3.2.33-go.orig/3rdparty/button_hotplug/Kconfig	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/button_hotplug/Kconfig	2012-11-18 14:45:26.000000000 +0000
+@@ -0,0 +1,2 @@
++config BUTTON_HOTPLUG
++	tristate "Button Hotplug driver"
+diff -Naur linux-3.2.33-go.orig/3rdparty/button_hotplug/Makefile 3rdparty/button_hotplug/Makefile
+--- linux-3.2.33-go.orig/3rdparty/button_hotplug/Makefile	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/button_hotplug/Makefile	2012-11-18 14:45:26.000000000 +0000
+@@ -0,0 +1 @@
++obj-${CONFIG_BUTTON_HOTPLUG}	+= button-hotplug.o
+\ No newline at end of file
+diff -Naur linux-3.2.33-go.orig/3rdparty/button_hotplug/button-hotplug.c 3rdparty/button_hotplug/button-hotplug.c
+--- linux-3.2.33-go.orig/3rdparty/button_hotplug/button-hotplug.c	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/button_hotplug/button-hotplug.c	2012-11-18 14:45:26.000000000 +0000
+@@ -0,0 +1,349 @@
++/*
++ *  Button Hotplug driver
++ *
++ *  Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
++ *
++ *  Based on the diag.c - GPIO interface driver for Broadcom boards
++ *    Copyright (C) 2006 Mike Baker <mbm@openwrt.org>,
++ *    Copyright (C) 2006-2007 Felix Fietkau <nbd@openwrt.org>
++ *    Copyright (C) 2008 Andy Boyett <agb@openwrt.org>
++ *
++ *  This program is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License version 2 as published
++ *  by the Free Software Foundation.
++ */
++
++#include <linux/module.h>
++#include <linux/version.h>
++#include <linux/kmod.h>
++#include <linux/input.h>
++
++#include <linux/workqueue.h>
++#include <linux/skbuff.h>
++#include <linux/netlink.h>
++#include <linux/kobject.h>
++
++#define DRV_NAME	"button-hotplug"
++#define DRV_VERSION	"0.4.1"
++#define DRV_DESC	"Button Hotplug driver"
++
++#define BH_SKB_SIZE	2048
++
++#define PFX	DRV_NAME ": "
++
++#undef BH_DEBUG
++
++#ifdef BH_DEBUG
++#define BH_DBG(fmt, args...) printk(KERN_DEBUG "%s: " fmt, DRV_NAME, ##args )
++#else
++#define BH_DBG(fmt, args...) do {} while (0)
++#endif
++
++#define BH_ERR(fmt, args...) printk(KERN_ERR "%s: " fmt, DRV_NAME, ##args )
++
++#ifndef BIT_MASK
++#define BIT_MASK(nr)            (1UL << ((nr) % BITS_PER_LONG))
++#endif
++
++struct bh_priv {
++	unsigned long		*seen;
++	struct input_handle	handle;
++};
++
++struct bh_event {
++	const char		*name;
++	char			*action;
++	unsigned long		seen;
++
++	struct sk_buff		*skb;
++	struct work_struct	work;
++};
++
++struct bh_map {
++	unsigned int	code;
++	const char	*name;
++};
++
++extern u64 uevent_next_seqnum(void);
++
++#define BH_MAP(_code, _name)		\
++	{				\
++		.code = (_code),	\
++		.name = (_name),	\
++	}
++
++static struct bh_map button_map[] = {
++	BH_MAP(BTN_0,		"BTN_0"),
++	BH_MAP(BTN_1,		"BTN_1"),
++	BH_MAP(BTN_2,		"BTN_2"),
++	BH_MAP(BTN_3,		"BTN_3"),
++	BH_MAP(BTN_4,		"BTN_4"),
++	BH_MAP(BTN_5,		"BTN_5"),
++	BH_MAP(BTN_6,		"BTN_6"),
++	BH_MAP(BTN_7,		"BTN_7"),
++	BH_MAP(BTN_8,		"BTN_8"),
++	BH_MAP(BTN_9,		"BTN_9"),
++	BH_MAP(KEY_RESTART,	"reset"),
++#ifdef KEY_WPS_BUTTON
++	BH_MAP(KEY_WPS_BUTTON,	"wps"),
++#endif /* KEY_WPS_BUTTON */
++};
++
++/* -------------------------------------------------------------------------*/
++
++static int bh_event_add_var(struct bh_event *event, int argv,
++		const char *format, ...)
++{
++	static char buf[128];
++	char *s;
++	va_list args;
++	int len;
++
++	if (argv)
++		return 0;
++
++	va_start(args, format);
++	len = vsnprintf(buf, sizeof(buf), format, args);
++	va_end(args);
++
++	if (len >= sizeof(buf)) {
++		BH_ERR("buffer size too small\n");
++		WARN_ON(1);
++		return -ENOMEM;
++	}
++
++	s = skb_put(event->skb, len + 1);
++	strcpy(s, buf);
++
++	BH_DBG("added variable '%s'\n", s);
++
++	return 0;
++}
++
++static int button_hotplug_fill_event(struct bh_event *event)
++{
++	int ret;
++
++	ret = bh_event_add_var(event, 0, "HOME=%s", "/");
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "PATH=%s",
++					"/sbin:/bin:/usr/sbin:/usr/bin");
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "SUBSYSTEM=%s", "button");
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "ACTION=%s", event->action);
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "BUTTON=%s", event->name);
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "SEEN=%ld", event->seen);
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "SEQNUM=%llu", uevent_next_seqnum());
++
++	return ret;
++}
++
++static void button_hotplug_work(struct work_struct *work)
++{
++	struct bh_event *event = container_of(work, struct bh_event, work);
++	int ret = 0;
++
++	event->skb = alloc_skb(BH_SKB_SIZE, GFP_KERNEL);
++	if (!event->skb)
++		goto out_free_event;
++
++	ret = bh_event_add_var(event, 0, "%s@", event->action);
++	if (ret)
++		goto out_free_skb;
++
++	ret = button_hotplug_fill_event(event);
++	if (ret)
++		goto out_free_skb;
++
++	NETLINK_CB(event->skb).dst_group = 1;
++	broadcast_uevent(event->skb, 0, 1, GFP_KERNEL);
++
++ out_free_skb:
++	if (ret) {
++		BH_ERR("work error %d\n", ret);
++		kfree_skb(event->skb);
++	}
++ out_free_event:
++	kfree(event);
++}
++
++static int button_hotplug_create_event(const char *name, unsigned long seen,
++		int pressed)
++{
++	struct bh_event *event;
++
++	BH_DBG("create event, name=%s, seen=%lu, pressed=%d\n",
++		name, seen, pressed);
++
++	event = kzalloc(sizeof(*event), GFP_KERNEL);
++	if (!event)
++		return -ENOMEM;
++
++	event->name = name;
++	event->seen = seen;
++	event->action = pressed ? "pressed" : "released";
++
++	INIT_WORK(&event->work, (void *)(void *)button_hotplug_work);
++	schedule_work(&event->work);
++
++	return 0;
++}
++
++/* -------------------------------------------------------------------------*/
++
++#ifdef	CONFIG_HOTPLUG
++static int button_get_index(unsigned int code)
++{
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(button_map); i++)
++		if (button_map[i].code == code)
++			return i;
++
++	return -1;
++}
++static void button_hotplug_event(struct input_handle *handle,
++			   unsigned int type, unsigned int code, int value)
++{
++	struct bh_priv *priv = handle->private;
++	unsigned long seen = jiffies;
++	int btn;
++
++	BH_DBG("event type=%u, code=%u, value=%d\n", type, code, value);
++
++	if (type != EV_KEY)
++		return;
++
++	btn = button_get_index(code);
++	if (btn < 0)
++		return;
++
++	button_hotplug_create_event(button_map[btn].name,
++			(seen - priv->seen[btn]) / HZ, value);
++	priv->seen[btn] = seen;
++}
++#else
++static void button_hotplug_event(struct input_handle *handle,
++			   unsigned int type, unsigned int code, int value)
++{
++}
++#endif	/* CONFIG_HOTPLUG */
++
++static int button_hotplug_connect(struct input_handler *handler,
++		struct input_dev *dev, const struct input_device_id *id)
++{
++	struct bh_priv *priv;
++	int ret;
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(button_map); i++)
++		if (test_bit(button_map[i].code, dev->keybit))
++			break;
++
++	if (i == ARRAY_SIZE(button_map))
++		return -ENODEV;
++
++	priv = kzalloc(sizeof(*priv) +
++		       (sizeof(unsigned long) * ARRAY_SIZE(button_map)),
++		       GFP_KERNEL);
++	if (!priv)
++		return -ENOMEM;
++
++	priv->seen = (unsigned long *) &priv[1];
++	priv->handle.private = priv;
++	priv->handle.dev = dev;
++	priv->handle.handler = handler;
++	priv->handle.name = DRV_NAME;
++
++	ret = input_register_handle(&priv->handle);
++	if (ret)
++		goto err_free_priv;
++
++	ret = input_open_device(&priv->handle);
++	if (ret)
++		goto err_unregister_handle;
++
++	BH_DBG("connected to %s\n", dev->name);
++
++	return 0;
++
++ err_unregister_handle:
++	input_unregister_handle(&priv->handle);
++
++ err_free_priv:
++	kfree(priv);
++	return ret;
++}
++
++static void button_hotplug_disconnect(struct input_handle *handle)
++{
++	struct bh_priv *priv = handle->private;
++
++	input_close_device(handle);
++	input_unregister_handle(handle);
++
++	kfree(priv);
++}
++
++static const struct input_device_id button_hotplug_ids[] = {
++	{
++                .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
++                .evbit = { BIT_MASK(EV_KEY) },
++        },
++	{
++		/* Terminating entry */
++	},
++};
++
++MODULE_DEVICE_TABLE(input, button_hotplug_ids);
++
++static struct input_handler button_hotplug_handler = {
++	.event =	button_hotplug_event,
++	.connect =	button_hotplug_connect,
++	.disconnect =	button_hotplug_disconnect,
++	.name =		DRV_NAME,
++	.id_table =	button_hotplug_ids,
++};
++
++/* -------------------------------------------------------------------------*/
++
++static int __init button_hotplug_init(void)
++{
++	int ret;
++
++	printk(KERN_INFO DRV_DESC " version " DRV_VERSION "\n");
++	ret = input_register_handler(&button_hotplug_handler);
++	if (ret)
++		BH_ERR("unable to register input handler\n");
++
++	return ret;
++}
++module_init(button_hotplug_init);
++
++static void __exit button_hotplug_exit(void)
++{
++	input_unregister_handler(&button_hotplug_handler);
++}
++module_exit(button_hotplug_exit);
++
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++MODULE_AUTHOR("Gabor Juhos <juhosg@openwrt.org>");
++MODULE_LICENSE("GPL v2");
++
diff --git a/3.3.8/3rd-3rdparty-gpio_button_hotplug-0.1.patch b/3.3.8/3rd-3rdparty-gpio_button_hotplug-0.1.patch
new file mode 100644
index 0000000..6b2e78e
--- /dev/null
+++ b/3.3.8/3rd-3rdparty-gpio_button_hotplug-0.1.patch
@@ -0,0 +1,472 @@
+Submitted By: Mario Fetka (mario dot fetka at gmail dot com)
+Date: 2012-11-18
+Initial Package Version: 3.2.33
+Origin: openwtr.org packages/system/gpio-button-hotplug
+Upstream Status: unknown
+Description: gpio button uevent
+ 
+diff -Naur linux-3.2.33-go.orig/3rdparty/gpio_button_hotplug/Kconfig 3rdparty/gpio_button_hotplug/Kconfig
+--- linux-3.2.33-go.orig/3rdparty/gpio_button_hotplug/Kconfig	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/gpio_button_hotplug/Kconfig	2012-11-18 18:41:43.048939468 +0000
+@@ -0,0 +1,2 @@
++config GPIO_BUTTON_HOTPLUG
++	tristate "GPIO Button Hotplug driver"
+diff -Naur linux-3.2.33-go.orig/3rdparty/gpio_button_hotplug/Makefile 3rdparty/gpio_button_hotplug/Makefile
+--- linux-3.2.33-go.orig/3rdparty/gpio_button_hotplug/Makefile	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/gpio_button_hotplug/Makefile	2012-11-18 14:45:26.000000000 +0000
+@@ -0,0 +1 @@
++obj-${CONFIG_GPIO_BUTTON_HOTPLUG} += gpio-button-hotplug.o
+diff -Naur linux-3.2.33-go.orig/3rdparty/gpio_button_hotplug/gpio-button-hotplug.c 3rdparty/gpio_button_hotplug/gpio-button-hotplug.c
+--- linux-3.2.33-go.orig/3rdparty/gpio_button_hotplug/gpio-button-hotplug.c	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/gpio_button_hotplug/gpio-button-hotplug.c	2012-11-18 14:45:26.000000000 +0000
+@@ -0,0 +1,450 @@
++/*
++ *  GPIO Button Hotplug driver
++ *
++ *  Copyright (C) 2012 Felix Fietkau <nbd@openwrt.org>
++ *  Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
++ *
++ *  Based on the diag.c - GPIO interface driver for Broadcom boards
++ *    Copyright (C) 2006 Mike Baker <mbm@openwrt.org>,
++ *    Copyright (C) 2006-2007 Felix Fietkau <nbd@openwrt.org>
++ *    Copyright (C) 2008 Andy Boyett <agb@openwrt.org>
++ *
++ *  This program is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License version 2 as published
++ *  by the Free Software Foundation.
++ */
++
++#include <linux/module.h>
++#include <linux/version.h>
++#include <linux/kmod.h>
++
++#include <linux/workqueue.h>
++#include <linux/skbuff.h>
++#include <linux/netlink.h>
++#include <linux/kobject.h>
++#include <linux/input.h>
++#include <linux/platform_device.h>
++#include <linux/gpio.h>
++#include <linux/gpio_keys.h>
++
++#define DRV_NAME	"gpio-keys-polled"
++
++#define BH_SKB_SIZE	2048
++
++#define PFX	DRV_NAME ": "
++
++#undef BH_DEBUG
++
++#ifdef BH_DEBUG
++#define BH_DBG(fmt, args...) printk(KERN_DEBUG "%s: " fmt, DRV_NAME, ##args )
++#else
++#define BH_DBG(fmt, args...) do {} while (0)
++#endif
++
++#define BH_ERR(fmt, args...) printk(KERN_ERR "%s: " fmt, DRV_NAME, ##args )
++
++struct bh_priv {
++	unsigned long		seen;
++};
++
++struct bh_event {
++	const char		*name;
++	char			*action;
++	unsigned long		seen;
++
++	struct sk_buff		*skb;
++	struct work_struct	work;
++};
++
++struct bh_map {
++	unsigned int	code;
++	const char	*name;
++};
++
++struct gpio_keys_button_data {
++	struct delayed_work work;
++	struct bh_priv bh;
++	int last_state;
++	int count;
++	int threshold;
++	int can_sleep;
++};
++
++extern u64 uevent_next_seqnum(void);
++
++#define BH_MAP(_code, _name)		\
++	{				\
++		.code = (_code),	\
++		.name = (_name),	\
++	}
++
++static struct bh_map button_map[] = {
++	BH_MAP(BTN_0,		"BTN_0"),
++	BH_MAP(BTN_1,		"BTN_1"),
++	BH_MAP(BTN_2,		"BTN_2"),
++	BH_MAP(BTN_3,		"BTN_3"),
++	BH_MAP(BTN_4,		"BTN_4"),
++	BH_MAP(BTN_5,		"BTN_5"),
++	BH_MAP(BTN_6,		"BTN_6"),
++	BH_MAP(BTN_7,		"BTN_7"),
++	BH_MAP(BTN_8,		"BTN_8"),
++	BH_MAP(BTN_9,		"BTN_9"),
++	BH_MAP(KEY_RESTART,	"reset"),
++#ifdef KEY_WPS_BUTTON
++	BH_MAP(KEY_WPS_BUTTON,	"wps"),
++#endif /* KEY_WPS_BUTTON */
++};
++
++/* -------------------------------------------------------------------------*/
++
++static int bh_event_add_var(struct bh_event *event, int argv,
++		const char *format, ...)
++{
++	static char buf[128];
++	char *s;
++	va_list args;
++	int len;
++
++	if (argv)
++		return 0;
++
++	va_start(args, format);
++	len = vsnprintf(buf, sizeof(buf), format, args);
++	va_end(args);
++
++	if (len >= sizeof(buf)) {
++		BH_ERR("buffer size too small\n");
++		WARN_ON(1);
++		return -ENOMEM;
++	}
++
++	s = skb_put(event->skb, len + 1);
++	strcpy(s, buf);
++
++	BH_DBG("added variable '%s'\n", s);
++
++	return 0;
++}
++
++static int button_hotplug_fill_event(struct bh_event *event)
++{
++	int ret;
++
++	ret = bh_event_add_var(event, 0, "HOME=%s", "/");
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "PATH=%s",
++					"/sbin:/bin:/usr/sbin:/usr/bin");
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "SUBSYSTEM=%s", "button");
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "ACTION=%s", event->action);
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "BUTTON=%s", event->name);
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "SEEN=%ld", event->seen);
++	if (ret)
++		return ret;
++
++	ret = bh_event_add_var(event, 0, "SEQNUM=%llu", uevent_next_seqnum());
++
++	return ret;
++}
++
++static void button_hotplug_work(struct work_struct *work)
++{
++	struct bh_event *event = container_of(work, struct bh_event, work);
++	int ret = 0;
++
++	event->skb = alloc_skb(BH_SKB_SIZE, GFP_KERNEL);
++	if (!event->skb)
++		goto out_free_event;
++
++	ret = bh_event_add_var(event, 0, "%s@", event->action);
++	if (ret)
++		goto out_free_skb;
++
++	ret = button_hotplug_fill_event(event);
++	if (ret)
++		goto out_free_skb;
++
++	NETLINK_CB(event->skb).dst_group = 1;
++	broadcast_uevent(event->skb, 0, 1, GFP_KERNEL);
++
++ out_free_skb:
++	if (ret) {
++		BH_ERR("work error %d\n", ret);
++		kfree_skb(event->skb);
++	}
++ out_free_event:
++	kfree(event);
++}
++
++static int button_hotplug_create_event(const char *name, unsigned long seen,
++		int pressed)
++{
++	struct bh_event *event;
++
++	BH_DBG("create event, name=%s, seen=%lu, pressed=%d\n",
++		name, seen, pressed);
++
++	event = kzalloc(sizeof(*event), GFP_KERNEL);
++	if (!event)
++		return -ENOMEM;
++
++	event->name = name;
++	event->seen = seen;
++	event->action = pressed ? "pressed" : "released";
++
++	INIT_WORK(&event->work, (void *)(void *)button_hotplug_work);
++	schedule_work(&event->work);
++
++	return 0;
++}
++
++/* -------------------------------------------------------------------------*/
++
++#ifdef	CONFIG_HOTPLUG
++static int button_get_index(unsigned int code)
++{
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(button_map); i++)
++		if (button_map[i].code == code)
++			return i;
++
++	return -1;
++}
++static void button_hotplug_event(struct gpio_keys_button_data *data,
++			   unsigned int type, unsigned int code, int value)
++{
++	struct bh_priv *priv = &data->bh;
++	unsigned long seen = jiffies;
++	int btn;
++
++	BH_DBG("event type=%u, code=%u, value=%d\n", type, code, value);
++
++	if (type != EV_KEY)
++		return;
++
++	btn = button_get_index(code);
++	if (btn < 0)
++		return;
++
++	button_hotplug_create_event(button_map[btn].name,
++			(seen - priv->seen) / HZ, value);
++	priv->seen = seen;
++}
++#else
++static void button_hotplug_event(struct gpio_keys_button_data *data,
++			   unsigned int type, unsigned int code, int value)
++{
++}
++#endif	/* CONFIG_HOTPLUG */
++
++struct gpio_keys_polled_dev {
++	struct delayed_work work;
++
++	struct device *dev;
++	struct gpio_keys_platform_data *pdata;
++	struct gpio_keys_button_data data[0];
++};
++
++static void gpio_keys_polled_check_state(struct gpio_keys_button *button,
++					 struct gpio_keys_button_data *bdata)
++{
++	int state;
++
++	if (bdata->can_sleep)
++		state = !!gpio_get_value_cansleep(button->gpio);
++	else
++		state = !!gpio_get_value(button->gpio);
++
++	state = !!(state ^ button->active_low);
++	if (state != bdata->last_state) {
++		unsigned int type = button->type ?: EV_KEY;
++
++		button_hotplug_event(bdata, type, button->code, state);
++		bdata->count = 0;
++		bdata->last_state = state;
++	}
++}
++
++static void gpio_keys_polled_queue_work(struct gpio_keys_polled_dev *bdev)
++{
++	struct gpio_keys_platform_data *pdata = bdev->pdata;
++	unsigned long delay = msecs_to_jiffies(pdata->poll_interval);
++
++	if (delay >= HZ)
++	    delay = round_jiffies_relative(delay);
++	schedule_delayed_work(&bdev->work, delay);
++}
++
++static void gpio_keys_polled_poll(struct work_struct *work)
++{
++	struct gpio_keys_polled_dev *bdev =
++		container_of(work, struct gpio_keys_polled_dev, work.work);
++	struct gpio_keys_platform_data *pdata = bdev->pdata;
++	int i;
++
++	for (i = 0; i < bdev->pdata->nbuttons; i++) {
++		struct gpio_keys_button_data *bdata = &bdev->data[i];
++
++		if (bdata->count < bdata->threshold)
++			bdata->count++;
++		else
++			gpio_keys_polled_check_state(&pdata->buttons[i], bdata);
++	}
++	gpio_keys_polled_queue_work(bdev);
++}
++
++static void __devinit gpio_keys_polled_open(struct gpio_keys_polled_dev *bdev)
++{
++	struct gpio_keys_platform_data *pdata = bdev->pdata;
++	int i;
++
++	if (pdata->enable)
++		pdata->enable(bdev->dev);
++
++	/* report initial state of the buttons */
++	for (i = 0; i < pdata->nbuttons; i++)
++		gpio_keys_polled_check_state(&pdata->buttons[i], &bdev->data[i]);
++
++	gpio_keys_polled_queue_work(bdev);
++}
++
++static void __devexit gpio_keys_polled_close(struct gpio_keys_polled_dev *bdev)
++{
++	struct gpio_keys_platform_data *pdata = bdev->pdata;
++
++	cancel_delayed_work_sync(&bdev->work);
++
++	if (pdata->disable)
++		pdata->disable(bdev->dev);
++}
++
++static int __devinit gpio_keys_polled_probe(struct platform_device *pdev)
++{
++	struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
++	struct device *dev = &pdev->dev;
++	struct gpio_keys_polled_dev *bdev;
++	int error;
++	int i;
++
++	if (!pdata || !pdata->poll_interval)
++		return -EINVAL;
++
++	bdev = kzalloc(sizeof(struct gpio_keys_polled_dev) +
++		       pdata->nbuttons * sizeof(struct gpio_keys_button_data),
++		       GFP_KERNEL);
++	if (!bdev) {
++		dev_err(dev, "no memory for private data\n");
++		return -ENOMEM;
++	}
++
++	for (i = 0; i < pdata->nbuttons; i++) {
++		struct gpio_keys_button *button = &pdata->buttons[i];
++		struct gpio_keys_button_data *bdata = &bdev->data[i];
++		unsigned int gpio = button->gpio;
++
++		if (button->wakeup) {
++			dev_err(dev, DRV_NAME " does not support wakeup\n");
++			error = -EINVAL;
++			goto err_free_gpio;
++		}
++
++		error = gpio_request(gpio,
++				     button->desc ? button->desc : DRV_NAME);
++		if (error) {
++			dev_err(dev, "unable to claim gpio %u, err=%d\n",
++				gpio, error);
++			goto err_free_gpio;
++		}
++
++		error = gpio_direction_input(gpio);
++		if (error) {
++			dev_err(dev,
++				"unable to set direction on gpio %u, err=%d\n",
++				gpio, error);
++			goto err_free_gpio;
++		}
++
++		bdata->can_sleep = gpio_cansleep(gpio);
++		bdata->last_state = 0;
++		bdata->threshold = DIV_ROUND_UP(button->debounce_interval,
++						pdata->poll_interval);
++	}
++
++	bdev->dev = &pdev->dev;
++	bdev->pdata = pdata;
++	platform_set_drvdata(pdev, bdev);
++
++	INIT_DELAYED_WORK(&bdev->work, gpio_keys_polled_poll);
++
++	gpio_keys_polled_open(bdev);
++
++	return 0;
++
++err_free_gpio:
++	while (--i >= 0)
++		gpio_free(pdata->buttons[i].gpio);
++
++	kfree(bdev);
++	platform_set_drvdata(pdev, NULL);
++
++	return error;
++}
++
++static int __devexit gpio_keys_polled_remove(struct platform_device *pdev)
++{
++	struct gpio_keys_polled_dev *bdev = platform_get_drvdata(pdev);
++	struct gpio_keys_platform_data *pdata = bdev->pdata;
++	int i = pdata->nbuttons;
++
++	gpio_keys_polled_close(bdev);
++
++	while (--i >= 0)
++		gpio_free(pdata->buttons[i].gpio);
++
++	kfree(bdev);
++	platform_set_drvdata(pdev, NULL);
++
++	return 0;
++}
++
++static struct platform_driver gpio_keys_polled_driver = {
++	.probe	= gpio_keys_polled_probe,
++	.remove	= __devexit_p(gpio_keys_polled_remove),
++	.driver	= {
++		.name	= DRV_NAME,
++		.owner	= THIS_MODULE,
++	},
++};
++
++static int __init gpio_keys_polled_init(void)
++{
++	return platform_driver_register(&gpio_keys_polled_driver);
++}
++
++static void __exit gpio_keys_polled_exit(void)
++{
++	platform_driver_unregister(&gpio_keys_polled_driver);
++}
++
++module_init(gpio_keys_polled_init);
++module_exit(gpio_keys_polled_exit);
++
++MODULE_AUTHOR("Gabor Juhos <juhosg@openwrt.org>");
++MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>");
++MODULE_DESCRIPTION("Polled GPIO Buttons hotplug driver");
++MODULE_LICENSE("GPL v2");
++MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/3.3.8/3rd-3rdparty-gpio_event_drv-0.1.patch b/3.3.8/3rd-3rdparty-gpio_event_drv-0.1.patch
new file mode 100644
index 0000000..3a75e28
--- /dev/null
+++ b/3.3.8/3rd-3rdparty-gpio_event_drv-0.1.patch
@@ -0,0 +1,1354 @@
+Submitted By: Mario Fetka (mario dot fetka at gmail dot com)
+Date: 2012-11-18
+Initial Package Version: 3.2.33
+Origin: http://wiki.gumstix.org/index.php?title=GPIO_Event_Driver
+Upstream Status: unknown
+Description: The gpio-event driver consists of a loadable kernel module, 
+which registers an interrupt handler, along with an example user-mode program, 
+which allows the settings to be manipulated and changes to be reported.
+ 
+diff -Naur linux-3.2.33-go.orig/3rdparty/gpio_event_drv/Kconfig 3rdparty/gpio_event_drv/Kconfig
+--- linux-3.2.33-go.orig/3rdparty/gpio_event_drv/Kconfig	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/gpio_event_drv/Kconfig	2012-11-18 19:03:08.020733547 +0000
+@@ -0,0 +1,2 @@
++config GPIO_EVENT_DRV
++	tristate "GPIO Event Driver (requires userspace app)"
+diff -Naur linux-3.2.33-go.orig/3rdparty/gpio_event_drv/Makefile 3rdparty/gpio_event_drv/Makefile
+--- linux-3.2.33-go.orig/3rdparty/gpio_event_drv/Makefile	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/gpio_event_drv/Makefile	2012-11-18 19:02:20.409297191 +0000
+@@ -0,0 +1 @@
++obj-${CONFIG_GPIO_EVENT_DRV}	+= gpio-event-drv.o
+\ No newline at end of file
+diff -Naur linux-3.2.33-go.orig/3rdparty/gpio_event_drv/gpio-event-drv.c 3rdparty/gpio_event_drv/gpio-event-drv.c
+--- linux-3.2.33-go.orig/3rdparty/gpio_event_drv/gpio-event-drv.c	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/gpio_event_drv/gpio-event-drv.c	2012-11-18 10:24:14.000000000 +0000
+@@ -0,0 +1,1210 @@
++/****************************************************************************
++*
++*   Copyright (c) 2006 Dave Hylands     <dhylands@gmail.com>
++*
++*   This program is free software; you can redistribute it and/or modify
++*   it under the terms of the GNU General Public License version 2 as
++*   published by the Free Software Foundation.
++*
++*   Alternatively, this software may be distributed under the terms of BSD
++*   license.
++*
++*   See README and COPYING for more details.
++*
++****************************************************************************
++*
++*   This driver allows multiple GPIO pins to be monitored and allows a user
++*   mode program to be notified when the pin changes.
++*
++****************************************************************************/
++
++/* ---- Include Files ---------------------------------------------------- */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/spinlock.h>
++#include <linux/proc_fs.h>
++#include <linux/sysctl.h>
++#include <linux/poll.h>
++#include <linux/interrupt.h>
++#include <linux/device.h>
++#include <linux/cdev.h>
++#include <linux/seq_file.h>
++#include <linux/spinlock.h>
++#include <linux/version.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/version.h>
++
++#include <asm/uaccess.h>
++#include <asm/ioctls.h>
++
++#include <linux/gpio.h>
++
++#include "gpio-event-drv.h"
++
++/* ---- Public Variables ------------------------------------------------- */
++/* ---- Private Constants and Types -------------------------------------- */
++
++#define GPIO_EVENT_DEV_NAME "gpio-event"
++
++#define DEBUG_ENABLED   1
++
++#if DEBUG_ENABLED
++#   define DEBUG( flag, fmt, args... ) do { if ( gDebug ## flag ) printk( "%s: " fmt, __FUNCTION__ , ## args ); } while (0)
++#else
++#   define DEBUG( flag, fmt, args... )
++#endif  
++
++/* ---- Private Variables ------------------------------------------------ */
++
++static char gBanner[] __initdata = KERN_INFO "GPIO Event Monitor 0.1 Compiled: " __DATE__ " at " __TIME__ "\n";
++
++static  int         gDebugTrace = 0;
++static  int         gDebugIoctl = 0;
++static  int         gDebugError = 1;
++static  int         gLostEvents = 0;
++
++static  struct ctl_table_header    *gSysCtlHeader;
++
++#if ( LINUX_VERSION_CODE >= KERNEL_VERSION( 2, 6, 33 ))
++#define CTL_NAME(x)
++#else
++#define CTL_NAME(x)     .ctl_name = x,
++#endif
++
++static struct ctl_table gSysCtlSample[] =
++{
++    {
++        CTL_NAME(1)
++        .procname       = "lost-events",
++        .data           = &gLostEvents,
++        .maxlen         = sizeof( int ),
++        .mode           = 0644,
++        .proc_handler   = &proc_dointvec
++    },
++    {
++        CTL_NAME(101)
++        .procname       = "debug-trace",
++        .data           = &gDebugTrace,
++        .maxlen         = sizeof( int ),
++        .mode           = 0644,
++        .proc_handler   = &proc_dointvec
++    },
++    {
++        CTL_NAME(102)
++        .procname       = "debug-ioctl",
++        .data           = &gDebugIoctl,
++        .maxlen         = sizeof( int ),
++        .mode           = 0644,
++        .proc_handler   = &proc_dointvec
++    },
++    {
++        CTL_NAME(103)
++        .procname       = "debug-error",
++        .data           = &gDebugError,
++        .maxlen         = sizeof( int ),
++        .mode           = 0644,
++        .proc_handler   = &proc_dointvec
++    },
++    { 0 }
++};
++
++static struct ctl_table gSysCtl[] =
++{
++    {
++        CTL_NAME(CTL_GPIO_EVENT)
++        .procname       = "gpio-event", 
++        .mode           = 0555, 
++        .child          = gSysCtlSample
++    },
++    { 0 }
++};
++
++/*
++ * An instance of GPIO_FileData_t is maintained for file open
++ */
++
++#define GPIO_EVENT_QUEUE_LEN    20
++
++// GPIO_EVENT_BUFFER_SIZE needs to be big enough to hold the ASCII version
++// of the GPIO_Event_t as well as the binary version of the GPIO_Event_t
++
++#define GPIO_EVENT_BUFFER_SIZE  32
++
++typedef struct
++{
++    struct list_head        list;
++    wait_queue_head_t       waitQueue;
++
++    spinlock_t              queueLock;
++    GPIO_Event_t            queueData[ GPIO_EVENT_QUEUE_LEN ];
++    volatile int            getIndex;
++    volatile int            putIndex;
++    volatile int            numEvents;
++
++    GPIO_EventReadMode_t    readMode;
++
++    char                    buffer[ GPIO_EVENT_BUFFER_SIZE ];
++    int                     bufBytes;
++
++} GPIO_FileData_t;
++
++/*
++ * An instance of GPIO_PinData_t is maintained for each GPIO line which is 
++ * monitored, 
++ */ 
++
++typedef enum
++{
++    PIN_LOW             = 0,    // Matches level of GPIO line
++    PIN_HIGH            = 1,
++    PIN_BOUNCING_LOW,
++    PIN_BOUNCING_HIGH,
++} PinState_t;
++
++typedef struct
++{
++    struct  list_head       list;               // list of all pins
++
++    int                     gpio;               // The gpio line being monitored
++
++    // We maintain two lists, a global list of pins, and a list associated with each open
++
++
++    struct  timer_list      debounceTimer;      // Timer to wake u up after an edge
++    uint8_t                 debounceMilliSec;   // debounce time in milliseconds
++    char                    devName[ 16 ];      // gpio xx event
++
++    GPIO_EventEdgeType_t    edgeType;   // Type of edge(s) we're looking for.
++
++    PinState_t              pinState;          // Was the GPIO line low or high?
++
++} GPIO_PinData_t;
++
++static  volatile    int     gReportLostEvents = 1;
++
++static  struct class       *gGpioEventClass = NULL;
++static  struct  cdev        gGpioEventCDev;
++static  dev_t               gGpioEventDevNum = 0;
++
++static  DEFINE_SPINLOCK( gFileListLock );
++static  DEFINE_SPINLOCK( gPinListLock );
++
++static  LIST_HEAD( gFileList );
++static  LIST_HEAD( gPinList );
++
++static struct proc_dir_entry *gProcGpioEvent;
++static struct proc_dir_entry *gProcPins;
++
++
++/* ---- Private Function Prototypes -------------------------------------- */
++/* ---- Functions -------------------------------------------------------- */
++
++typedef struct
++{
++    unsigned    long    flags;
++    struct list_head   *list;
++
++} pin_seq_t;
++
++/****************************************************************************
++*
++*  pin_seq_start
++*
++*   seq_file iterator which goes through the pins being monitored
++*
++****************************************************************************/
++
++static void *pin_seq_start( struct seq_file *s, loff_t *pos )
++{
++    pin_seq_t  *ps;
++    loff_t      i;
++
++    s->private = NULL;
++
++    if (( ps = kcalloc( 1, sizeof( pin_seq_t ), GFP_KERNEL )) == NULL )
++    {
++        return ERR_PTR( -ENOMEM );
++    }
++    s->private = ps;
++
++    spin_lock_irqsave( &gPinListLock, ps->flags );
++
++    if ( list_empty( &gPinList ))
++    {
++        DEBUG( Trace, "list_empty\n" );
++        return NULL;
++    }
++    ps->list = gPinList.next;
++
++    for ( i = 0; i < *pos; i++ ) 
++    {
++        if ( list_is_last( ps->list, &gPinList ))
++        {
++            DEBUG( Trace, "No item @ %llu\n", i + 1 );
++            return NULL;
++        }
++        ps->list = ps->list->next;
++    }
++
++
++    DEBUG( Trace, "ps->list = 0x%08lx, *pos = %llu\n", (long)ps->list, *pos );
++
++    return ps->list;
++
++} // pin_seq_start
++
++/****************************************************************************
++*
++*  pin_seq_show
++*
++*   seq_file iterator which goes through the pins being monitored
++*
++****************************************************************************/
++
++static int pin_seq_show( struct seq_file *s, void *v )
++{
++    GPIO_PinData_t *pin = list_entry( v, GPIO_PinData_t, list );
++    char           *edgeTypeStr;
++
++    DEBUG( Trace, "v = 0x%08lx\n", (long)v );
++
++    switch ( pin->edgeType )
++    {
++        case GPIO_EventRisingEdge:  edgeTypeStr = "Rising ";    break;
++        case GPIO_EventFallingEdge: edgeTypeStr = "Falling";    break;
++        case GPIO_EventBothEdges:   edgeTypeStr = "Both   ";    break;
++        default:                    edgeTypeStr = "Unknown";    break;
++    }
++
++    seq_printf( s, "GPIO: %3d Edge: %s Debounce: %d msec\n", pin->gpio, edgeTypeStr, pin->debounceMilliSec );
++
++    return 0;
++
++} // pin_seq_show
++	
++/****************************************************************************
++*
++*  pin_seq_next
++*
++*   seq_file iterator which goes through the pins being monitored
++*
++****************************************************************************/
++
++static void *pin_seq_next( struct seq_file *s, void *v, loff_t *pos )
++{
++    pin_seq_t  *ps = s->private;
++
++    DEBUG( Trace, "v = 0x%08lx *pos = %llu\n", (long)v, *pos );
++
++    if ( list_is_last( ps->list, &gPinList ))
++    {
++        DEBUG( Trace, "ps->list = 0x%08lx (end of list)\n", (long)ps->list );
++
++        return NULL;
++    }
++    (*pos)++;
++    ps->list = ps->list->next;
++
++    DEBUG( Trace, "ps->list = 0x%08lx\n", (long)ps->list );
++
++    return ps->list;
++
++} // pin_seq_next
++
++/****************************************************************************
++*
++*  pin_seq_stop
++*
++*   seq_file iterator which goes through the pins being monitored
++*
++****************************************************************************/
++
++static void pin_seq_stop( struct seq_file *s, void *v )
++{
++    pin_seq_t  *ps = s->private;
++
++    DEBUG( Trace, "v = 0x%08lx\n", (long)v );
++
++    if ( ps != NULL )
++    {
++        spin_unlock_irqrestore( &gPinListLock, ps->flags );
++        kfree( ps );
++    }
++
++} // pin_seq_stop
++
++/****************************************************************************
++*
++*  pin_seq_ops
++*
++*   Ties all of the pin_seq_xxx routines together.
++*
++****************************************************************************/
++
++static struct seq_operations pin_seq_ops = 
++{
++	.start = pin_seq_start,
++	.next  = pin_seq_next,
++	.stop  = pin_seq_stop,
++	.show  = pin_seq_show
++};
++
++/****************************************************************************
++*
++*  pins_proc_open
++*
++*   Open method for /proc/gpio-event/pin
++*
++****************************************************************************/
++
++static int pins_proc_open( struct inode *inode, struct file *file )
++{
++    DEBUG( Trace, "called\n" );
++
++    return seq_open( file, &pin_seq_ops );
++}
++
++/****************************************************************************
++*
++*  pin_proc_ops
++*
++*   File operations for our /proc/gpio-event/pins file
++*
++****************************************************************************/
++
++static struct file_operations pins_proc_ops = 
++{
++	.owner   = THIS_MODULE,
++	.open    = pins_proc_open,
++	.read    = seq_read,
++	.llseek  = seq_lseek,
++	.release = seq_release
++};
++	
++
++
++/****************************************************************************
++*
++*  find_pin
++*
++*   Searches the list to see if 'gpio' is currently being monitored.
++*
++****************************************************************************/
++
++static GPIO_PinData_t *find_pin( int gpio )
++{
++    struct  list_head   *pin;
++
++    assert_spin_locked( &gPinListLock );
++
++    list_for_each( pin, &gPinList )
++    {
++        GPIO_PinData_t *pinData = list_entry( pin, GPIO_PinData_t, list );
++
++        if ( pinData->gpio == gpio )
++        {
++            return pinData;
++        }
++    }
++
++    return NULL;
++
++} // find_pin
++
++/****************************************************************************
++*
++*  gpio_event_queue_event
++*
++*   Queues an sample event from the bottom half to the top half. This
++*   function queues up the event on every file that's open.
++*
++****************************************************************************/
++
++static void gpio_event_queue_event( const GPIO_Event_t *gpioEvent )
++{
++    unsigned long       flags;
++    struct list_head   *file;
++
++    DEBUG( Trace, "gpio %d:%c@%ld.%06ld\n", 
++           gpioEvent->gpio,
++           gpioEvent->edgeType == GPIO_EventRisingEdge ? 'R' : 'F',
++           gpioEvent->time.tv_sec, 
++           gpioEvent->time.tv_usec );
++
++    // Queue up the event on all of the open files
++    //
++    // This function is only called from the ISR, with interrupts already
++    // disabled.
++
++    spin_lock_irqsave( &gFileListLock, flags );
++
++    list_for_each( file, &gFileList )
++    {
++        GPIO_FileData_t *fileData = list_entry( file, GPIO_FileData_t, list );
++
++        spin_lock( &fileData->queueLock );
++        {
++            if ( fileData->numEvents >= GPIO_EVENT_QUEUE_LEN )
++            {
++                // Queue is full - Only report first event lost
++    
++                if ( gReportLostEvents )
++                {
++                    printk( KERN_ERR "GPIO Event: event lost due to queue full\n" );
++                    gReportLostEvents = 0;
++                }
++                gLostEvents++;
++            }
++            else
++            {
++                fileData->queueData[ fileData->putIndex++ ] = *gpioEvent;
++                if ( fileData->putIndex >= GPIO_EVENT_QUEUE_LEN )
++                {
++                    fileData->putIndex = 0;
++                }
++                fileData->numEvents++;
++            }
++        }
++        spin_unlock( &fileData->queueLock );
++
++        wake_up_interruptible( &fileData->waitQueue );
++    }
++    spin_unlock_irqrestore( &gFileListLock, flags );
++
++} // gpio_event_queue_event
++
++/****************************************************************************
++*
++*  gpio_event_dequeue_event
++*
++*   Removes an event from the queue
++*
++****************************************************************************/
++
++static int gpio_event_dequeue_event( GPIO_FileData_t *fileData, GPIO_Event_t *gpioEvent )
++{
++    unsigned long   flags;
++    int             eventAvailable = 0;
++
++    spin_lock_irqsave( &fileData->queueLock, flags );
++    {
++        if ( fileData->numEvents > 0 )
++        {
++            *gpioEvent = fileData->queueData[ fileData->getIndex++ ];
++            if ( fileData->getIndex >= GPIO_EVENT_QUEUE_LEN )
++            {
++                fileData->getIndex = 0;
++            }
++            fileData->numEvents--;
++
++            eventAvailable = 1;
++
++            if ( fileData->numEvents == 0 )
++            {
++                // Since somebody is reading the queue now, indicate that we
++                // can report lost events again
++
++                gReportLostEvents = 1;
++            }
++        }
++    }
++    spin_unlock_irqrestore( &fileData->queueLock, flags );
++
++    DEBUG( Trace, "gpio %d:%c@%ld.%06ld\n", 
++           gpioEvent->gpio,
++           gpioEvent->edgeType == GPIO_EventRisingEdge ? 'R' : 'F',
++           gpioEvent->time.tv_sec, 
++           gpioEvent->time.tv_usec );
++
++    return eventAvailable;
++
++} // gpio_event_dequeue_event
++
++/****************************************************************************
++*
++*  gpio_event_irq
++*
++****************************************************************************/
++
++static irqreturn_t gpio_event_irq( int irq, void *dev_id )
++{
++    GPIO_PinData_t         *pinData = (GPIO_PinData_t *)dev_id;
++    GPIO_Event_t            gpioEvent;
++    int                     currLevel = gpio_get_value( pinData->gpio );
++
++    // We're called with interrupts disabled.
++
++    (void)irq;
++
++    do_gettimeofday( &gpioEvent.time );
++    gpioEvent.gpio = pinData->gpio;
++
++    if ( pinData->debounceMilliSec == 0 )
++    {
++        // We assume that this is a clean signal
++
++        pinData->pinState = (PinState_t)currLevel;
++
++        if ( pinData->edgeType == GPIO_EventBothEdges )
++        {
++            // There's no register to tell which edge just occurred. So we
++            // assume that it just changed into its current level.
++
++            if ( currLevel )
++            {
++                // Pin is currently high, so this must be a rising edge
++
++                gpioEvent.edgeType = GPIO_EventRisingEdge;
++            }
++            else
++            {
++                // Pin is currently low, so this must be a falling edge
++
++                gpioEvent.edgeType = GPIO_EventFallingEdge;
++            }
++        }
++        else
++        {
++            // If we're only monitoring one type of edge, then that's the one
++            // that happened.
++
++            gpioEvent.edgeType = pinData->edgeType;
++        }
++        gpio_event_queue_event( &gpioEvent );
++    }
++    else
++    {
++        gpioEvent.edgeType = 0;
++
++        // If we need to debounce, then we need to monitor both edges, and
++        // use the debounce timer to figure out the real state. So we don't
++        // actually know which edge we just got. We use a state machine
++        // to track things.
++
++        switch ( pinData->pinState )
++        {
++            case PIN_LOW:
++            {
++                pinData->pinState = PIN_BOUNCING_HIGH;
++                gpioEvent.edgeType = GPIO_EventRisingEdge;
++                break;
++            }
++
++            case PIN_HIGH:
++            {
++                pinData->pinState = PIN_BOUNCING_LOW;
++                gpioEvent.edgeType = GPIO_EventFallingEdge;
++                break;
++            }
++
++            default:
++            {
++                break;
++            }
++        }
++
++        if (( pinData->edgeType & gpioEvent.edgeType ) != 0 )
++        {
++            // This is an edge that the user is interested in - send it along.
++
++            gpio_event_queue_event( &gpioEvent );
++        }
++
++        // Disable interrupts for our gpio to allow debounce to occur. The 
++        // timer will re-enable the interrupt.
++
++        disable_irq_nosync( irq );
++
++        // Since we have no idea when in the current jiffy that the edge
++        // occurred, we add 1 to the calculation to guarantee at least one
++        // whole jiffy.
++
++        mod_timer( &pinData->debounceTimer, jiffies + msecs_to_jiffies( pinData->debounceMilliSec ) + 1 );
++    }
++
++    return IRQ_HANDLED;
++
++} // gpio_event_irq
++
++/****************************************************************************
++*
++*  gpio_event_timer
++*
++****************************************************************************/
++
++void gpio_event_timer( unsigned long data )
++{
++    GPIO_PinData_t         *pinData = (GPIO_PinData_t *)data;
++
++    // This function is called when the debounce timer for a gpio expires.
++    // We record the state of the pin so that we can figure out what the 
++    // next edge will be.
++
++    pinData->pinState = ( gpio_get_value( pinData->gpio ) != 0 );
++
++    // Turn interrupts back on so we can catch the next edge
++
++    enable_irq( gpio_to_irq( pinData->gpio ));
++
++} // gpio_event_timer
++
++/****************************************************************************
++*
++*  gpio_event_monitor
++*
++****************************************************************************/
++
++static int gpio_event_monitor( GPIO_EventMonitor_t *monitor )
++{
++    int             rc = 0;
++    unsigned long   flags;
++    GPIO_PinData_t *pinData;
++    unsigned long   irqFlags;
++
++    spin_lock_irqsave( &gPinListLock, flags );
++
++    if ( monitor->onOff )
++    {
++        // Check to make sure we aren't already monitoring the gpio
++
++        if (( pinData = find_pin( monitor->gpio )) != NULL )
++        {
++            // We are already monitoring the pin. Unmonitor the pin and then
++            // proceed.
++
++            monitor->onOff = 0;
++
++            spin_unlock_irqrestore( &gPinListLock, flags );
++            gpio_event_monitor( monitor );
++            spin_lock_irqsave( &gPinListLock, flags );
++        }
++
++        if (( pinData = kcalloc( 1, sizeof( *pinData ), GFP_KERNEL )) == NULL )
++        {
++            DEBUG( Error, "GPIO %d: Out of memory\n", monitor->gpio );
++            rc = -ENOMEM;
++            goto out;
++        }
++
++        INIT_LIST_HEAD( &pinData->list );
++
++        snprintf( pinData->devName, sizeof( pinData->devName ), "gpio %d event", monitor->gpio );
++
++        // Note:
++        //     Calling request_irq will automatically set the pin to be an input.
++
++        irqFlags = 0;
++
++        if ( monitor->debounceMilliSec == 0 )
++        {
++            // A clean signal is being presented, so we can just look for
++            // a particular edge
++
++            if (( monitor->edgeType & GPIO_EventRisingEdge ) != 0 )
++            {
++                irqFlags |= IRQF_TRIGGER_RISING;
++            }
++            if (( monitor->edgeType & GPIO_EventFallingEdge ) != 0 )
++            {
++                irqFlags |= IRQF_TRIGGER_FALLING;
++            }
++        }
++        else
++        {
++            // Since we need to debounce, we need to look for both types of
++            // edges, since we get both types of edges whenever a bounce
++            // happens.
++
++            irqFlags |= IRQF_TRIGGER_RISING;
++            irqFlags |= IRQF_TRIGGER_FALLING;
++        }
++
++        if (( rc = request_irq( gpio_to_irq( monitor->gpio ), gpio_event_irq, irqFlags, pinData->devName, pinData )) != 0 )
++        {
++            DEBUG( Error, "Unable to register irq for GPIO %d\n", monitor->gpio );
++            kfree( pinData );
++            goto out;
++        }
++
++        pinData->gpio             = monitor->gpio;
++        pinData->edgeType         = monitor->edgeType;
++        pinData->debounceMilliSec = monitor->debounceMilliSec;
++
++        init_timer( &pinData->debounceTimer );
++
++        pinData->debounceTimer.data = (unsigned long)pinData;
++        pinData->debounceTimer.function = gpio_event_timer;
++
++        list_add_tail( &pinData->list, &gPinList );
++
++        if ( gpio_get_value( pinData->gpio ) == 0 )
++        {
++            pinData->pinState = PIN_LOW;
++        }
++        else
++        {
++            pinData->pinState = PIN_HIGH;
++        }
++    }
++    else
++    {
++        if (( pinData = find_pin( monitor->gpio )) == NULL )
++        {
++            DEBUG( Error, "GPIO %d isn't being monitored\n", monitor->gpio );
++            rc = -ENXIO;
++            goto out;
++        }
++
++        // We've found the gpio being monitored - turn things off.
++
++        free_irq( gpio_to_irq( pinData->gpio ), pinData );
++
++        del_timer_sync( &pinData->debounceTimer );
++        list_del( &pinData->list );
++
++        kfree( pinData );
++    }
++
++out:
++
++    spin_unlock_irqrestore( &gPinListLock, flags );
++
++    return rc;
++
++} // gpio_event_monitor
++
++/****************************************************************************
++*
++*   gpio_event_ioctl
++*
++*   Called to process ioctl requests
++*
++*****************************************************************************/
++
++long gpio_event_ioctl( struct file *file, unsigned int cmd, unsigned long arg )
++{
++    GPIO_FileData_t    *fileData;
++
++    DEBUG( Trace, "type: '%c' cmd: 0x%x\n", _IOC_TYPE( cmd ), _IOC_NR( cmd ));
++
++    fileData = file->private_data;
++
++    switch ( cmd )
++    {
++        case GPIO_EVENT_IOCTL_MONITOR_GPIO:
++        {
++            GPIO_EventMonitor_t monitor;
++
++            if ( copy_from_user( &monitor, (void *)arg, sizeof( monitor )) != 0 )
++            {
++                return -EFAULT;
++            }
++            return gpio_event_monitor( &monitor );
++        }
++
++        case GPIO_EVENT_IOCTL_SET_READ_MODE:
++        {
++            fileData->readMode = (GPIO_EventReadMode_t)arg;
++            break;
++        }
++
++        case TCGETS:
++        {
++            // When cat opens this device, we get this ioctl
++            return -ENOTTY;
++        }
++
++        default:
++        {
++            DEBUG( Error, "Unrecognized ioctl: '0x%x'\n", cmd );
++            return -ENOTTY;
++        }
++    }
++
++    return 0;
++
++} // gpio_event_ioctl
++
++/****************************************************************************
++*
++*  gpio_event_open
++*
++****************************************************************************/
++
++static int gpio_event_open( struct inode *inode, struct file *file )
++{
++    unsigned long       flags;
++    GPIO_FileData_t    *fileData;
++
++    DEBUG( Trace, "gpio_event_open called, major = %d, minor = %d\n", MAJOR( inode->i_rdev ),  MINOR( inode->i_rdev ));
++
++    // Allocate a per-open data structure
++
++    if (( fileData = kcalloc( 1, sizeof( *fileData ), GFP_KERNEL )) == NULL )
++    {
++        return -ENOMEM;
++    }
++
++    INIT_LIST_HEAD( &fileData->list );
++
++    init_waitqueue_head( &fileData->waitQueue );
++
++    spin_lock_init( &fileData->queueLock );
++
++    fileData->getIndex = 0;
++    fileData->putIndex = 0;
++    fileData->numEvents = 0;
++    fileData->bufBytes = 0;
++
++    fileData->readMode = GPIO_EventReadModeAscii;
++
++    file->private_data = fileData;
++
++    spin_lock_irqsave( &gFileListLock, flags );
++    {
++        list_add_tail( &fileData->list, &gFileList );
++    }
++    spin_unlock_irqrestore( &gFileListLock, flags );
++    
++    return 0;
++
++} // gpio_event_open
++
++/****************************************************************************
++*
++*  gpio_event_read
++*
++****************************************************************************/
++
++static ssize_t gpio_event_read( struct file *file, char *buffer, size_t spaceRemaining, loff_t *ppos )
++{
++    int                 rc;
++    ssize_t             bytesCopied = 0;
++    ssize_t             bytesToCopy;
++    GPIO_FileData_t    *fileData = file->private_data;
++
++    DEBUG( Trace, "gpio_event_read called, major = %d, minor = %d\n", MAJOR( file->f_dentry->d_inode->i_rdev ), MINOR( file->f_dentry->d_inode->i_rdev ));
++
++    if ( spaceRemaining == 0 )
++    {
++        return 0;
++    }
++
++    // First of all, return any unread data from the previous call
++
++    if ( fileData->bufBytes > 0 )
++    {
++        if ( spaceRemaining < fileData->bufBytes )
++        {
++            bytesCopied = spaceRemaining;
++        }
++        else
++        {
++            bytesCopied = fileData->bufBytes;
++        }
++
++        if ( copy_to_user( &buffer[0], &fileData->buffer[0], bytesCopied ) != 0 )
++        {
++            return -EFAULT;
++        }
++        if ( fileData->bufBytes > bytesCopied )
++        {
++            memmove( &fileData->buffer[ 0 ], &fileData->buffer[ bytesCopied ], fileData->bufBytes - bytesCopied );
++        }
++        fileData->bufBytes -= bytesCopied;
++
++        if ( fileData->bufBytes > 0 )
++        {
++            // We copied some data, but not all of it. Return early.
++
++            return bytesCopied;
++        }
++    }
++
++    do
++    {
++        if ((( file->f_flags & O_NONBLOCK ) != 0 ) && ( fileData->numEvents == 0 ))
++        {
++            // File was opened non-blocking and no more data is available
++            // We don't want to wait for an event, so exit from the loop
++
++            break;
++        }
++
++        rc = wait_event_interruptible( fileData->waitQueue, ( fileData->numEvents > 0 ));
++        if ( rc != 0 )
++        {
++            return rc;
++        }
++
++        if ( fileData->readMode == GPIO_EventReadModeBinary )
++        {
++            gpio_event_dequeue_event( fileData, (GPIO_Event_t *)&fileData->buffer[0] );
++
++            fileData->bufBytes = sizeof( GPIO_Event_t );
++            
++        }
++        else
++        {
++            GPIO_Event_t    gpioEvent;
++
++            gpio_event_dequeue_event( fileData, &gpioEvent );
++
++            // ASCII Mode output:
++            //
++            // nn E tttttttt.tttttt
++            //
++            // Where nn is the base-10 GPIO number
++            //       E is R or F (for rising or falling edge)
++            //       tttttttt.tttttt is the timestamp with microsecond resolution
++
++            fileData->bufBytes = snprintf( fileData->buffer, sizeof( fileData->buffer ), 
++                                           "%2d %c %ld.%06ld\n", 
++                                           gpioEvent.gpio,
++                                           (( gpioEvent.edgeType == GPIO_EventRisingEdge ) ? 'R' : 'F' ),
++                                           gpioEvent.time.tv_sec,
++                                           gpioEvent.time.tv_usec );
++        }
++
++        if ( spaceRemaining >= fileData->bufBytes )
++        {
++            bytesToCopy = fileData->bufBytes;
++        }
++        else
++        {
++            bytesToCopy = spaceRemaining;
++        }
++
++        if ( copy_to_user( &buffer[ bytesCopied ], &fileData->buffer[0], bytesToCopy ) != 0 )
++        {
++            return -EFAULT;
++        }
++        spaceRemaining -= bytesToCopy;
++        bytesCopied += bytesToCopy;
++        fileData->bufBytes -= bytesToCopy;
++
++        if ( fileData->bufBytes > 0 )
++        {
++            // We couldn't copy all of the data out of the buffer. Move the
++            // remaining data to the beginning of the buffer and exit.
++
++            memmove( &fileData->buffer[ 0 ], &fileData->buffer[ bytesToCopy ], fileData->bufBytes );
++            return bytesCopied;
++        }
++    } while (( fileData->numEvents > 0 ) && ( spaceRemaining > 0 ));
++
++    if ((( file->f_flags & O_NONBLOCK ) != 0 ) && ( bytesCopied == 0 ))
++    {
++        // File was opened non-blocking and we didn't copy any data.
++
++        return -EAGAIN;
++    }
++
++    return bytesCopied;
++
++} // gpio_event_read
++
++/****************************************************************************
++*
++*  gpio_event_poll - used by select & poll
++*
++****************************************************************************/
++
++static unsigned int gpio_event_poll(struct file *file, poll_table *wait)
++{
++    unsigned long       flags;
++    GPIO_FileData_t    *fileData = file->private_data;
++    unsigned int        mask = 0;
++
++    poll_wait( file, &fileData->waitQueue, wait );
++
++    spin_lock_irqsave( &fileData->queueLock, flags );
++    {
++        if (( fileData->bufBytes > 0 ) || ( fileData->numEvents > 0 ))
++        {
++            mask |= POLLIN | POLLRDNORM;    // readable
++        }
++    }
++    spin_unlock_irqrestore( &fileData->queueLock, flags );
++
++    return mask;
++
++} // gpio_event_poll
++
++/****************************************************************************
++*
++*  gpio_event_release
++*
++****************************************************************************/
++
++static int gpio_event_release( struct inode *inode, struct file *file )
++{
++    unsigned long       flags;
++    GPIO_FileData_t    *fileData = file->private_data;
++
++    DEBUG( Trace, "gpio_event_release called\n" );
++
++    spin_lock_irqsave( &gFileListLock, flags );
++    {
++        list_del( &fileData->list );
++    }
++    spin_unlock_irqrestore( &gFileListLock, flags );
++
++    kfree( fileData );
++
++    return 0;
++
++} // gpio_event_release
++
++/****************************************************************************
++*
++*   File Operations (these are the device driver entry points)
++*
++****************************************************************************/
++
++struct file_operations gpio_event_fops =
++{
++    owner:      	THIS_MODULE,
++    unlocked_ioctl:     gpio_event_ioctl,
++    open:       	gpio_event_open,
++    poll:       	gpio_event_poll,
++    release:    	gpio_event_release,
++    read:       	gpio_event_read,
++};
++
++/****************************************************************************
++*
++*  gpio_event_init
++*
++*     Called to perform module initialization when the module is loaded
++*
++****************************************************************************/
++
++static int __init gpio_event_init( void )
++{
++    int rc;
++
++    DEBUG( Trace, "called\n" );
++
++    printk( gBanner );
++
++    // Get a major number
++
++    if (( rc = alloc_chrdev_region( &gGpioEventDevNum, 0, 1, GPIO_EVENT_DEV_NAME )) < 0 )
++    {
++        printk( KERN_WARNING "sample: Unable to allocate major, err: %d\n", rc );
++        return rc;
++    }
++    DEBUG( Trace, "allocated major:%d minor:%d\n", MAJOR( gGpioEventDevNum ), MINOR( gGpioEventDevNum ));
++
++    // Register our proc entries.
++
++    gProcGpioEvent = create_proc_entry( "gpio-event", S_IFDIR | S_IRUGO | S_IXUGO, NULL );
++    if ( gProcGpioEvent == NULL )
++    {
++        return -ENOMEM;
++    }
++    gProcPins = create_proc_entry( "pins", 0444, gProcGpioEvent );
++    if ( gProcPins != NULL )
++    {
++        gProcPins->proc_fops = &pins_proc_ops;
++    }
++
++#if ( LINUX_VERSION_CODE <= KERNEL_VERSION( 2, 6, 20 ))
++    gSysCtlHeader = register_sysctl_table( gSysCtl, 0 );
++    if ( gSysCtlHeader != NULL )
++    {
++        gSysCtlHeader->ctl_table->child->de->owner = THIS_MODULE;
++    }
++#else
++    gSysCtlHeader = register_sysctl_table( gSysCtl );
++#endif
++
++    // Register our device. The device becomes "active" as soon as cdev_add 
++    // is called.
++
++    cdev_init( &gGpioEventCDev, &gpio_event_fops );
++    gGpioEventCDev.owner = THIS_MODULE;
++
++    if (( rc = cdev_add( &gGpioEventCDev, gGpioEventDevNum, 1 )) != 0 )
++    {
++        printk( KERN_WARNING "sample: cdev_add failed: %d\n", rc );
++        return rc;
++    }
++
++    // Create a class, so that udev will make the /dev entry
++
++    gGpioEventClass = class_create( THIS_MODULE, GPIO_EVENT_DEV_NAME );
++    if ( IS_ERR( gGpioEventClass ))
++    {
++        printk( KERN_WARNING "sample: Unable to create class\n" );
++        return -1;
++    }
++
++    device_create( gGpioEventClass, NULL, gGpioEventDevNum, NULL, GPIO_EVENT_DEV_NAME );
++
++    return 0;
++
++} // gpio_event_init
++
++/****************************************************************************
++*
++*  gpio_event_exit
++*
++*       Called to perform module cleanup when the module is unloaded.
++*
++****************************************************************************/
++
++static void __exit gpio_event_exit( void )
++{
++    struct  list_head  *next;
++    struct  list_head  *pin;
++    GPIO_EventMonitor_t monitor;
++
++    DEBUG( Trace, "called\n" );
++
++    // If there are any pins which are currently being monitored, then we 
++    // need to unmonitor them.
++
++    memset( &monitor, 0, sizeof( monitor ));
++
++    list_for_each_safe( pin, next, &gPinList )
++    {
++        GPIO_PinData_t *pinData = list_entry( pin, GPIO_PinData_t, list );
++
++        monitor.gpio = pinData->gpio;
++
++        gpio_event_monitor( &monitor );
++    }
++
++    // Deregister our driver
++
++    device_destroy( gGpioEventClass, gGpioEventDevNum );
++    class_destroy( gGpioEventClass );
++
++    cdev_del( &gGpioEventCDev );
++
++    if ( gSysCtlHeader != NULL )
++    {
++        unregister_sysctl_table( gSysCtlHeader );
++    }
++    remove_proc_entry( "pins", gProcGpioEvent );
++    remove_proc_entry( "gpio-event", NULL );    
++                                        
++    unregister_chrdev_region( gGpioEventDevNum, 1 );
++
++} // gpio_event_exit
++
++/****************************************************************************/
++
++module_init(gpio_event_init);
++module_exit(gpio_event_exit);
++
++MODULE_AUTHOR("Dave Hylands");
++MODULE_DESCRIPTION("GPIO Event Driver");
++MODULE_LICENSE("Dual BSD/GPL");
++
+diff -Naur linux-3.2.33-go.orig/3rdparty/gpio_event_drv/gpio-event-drv.h 3rdparty/gpio_event_drv/gpio-event-drv.h
+--- linux-3.2.33-go.orig/3rdparty/gpio_event_drv/gpio-event-drv.h	1970-01-01 00:00:00.000000000 +0000
++++ 3rdparty/gpio_event_drv/gpio-event-drv.h	2012-11-18 10:24:14.000000000 +0000
+@@ -0,0 +1,115 @@
++/****************************************************************************
++*
++*   Copyright (c) 2006 Dave Hylands     <dhylands@gmail.com>
++*
++*   This program is free software; you can redistribute it and/or modify
++*   it under the terms of the GNU General Public License version 2 as
++*   published by the Free Software Foundation.
++*
++*   Alternatively, this software may be distributed under the terms of BSD
++*   license.
++*
++*   See README and COPYING for more details.
++*
++****************************************************************************
++*
++*   This driver allows multiple GPIO pins to be monitored and allows a user
++*   mode program to be notified when the pin changes.
++*
++****************************************************************************/
++
++#if !defined( GPIO_EVENT_DRV_H )
++#define GPIO_EVENT_DRV_H
++
++/* ---- Include Files ----------------------------------------------------- */
++
++#if defined( __KERNEL__ )
++#   include <linux/types.h>
++#   include <linux/time.h>
++#   include <linux/ioctl.h>
++#else
++#   include <stdint.h>
++#   include <sys/time.h>
++#   include <sys/ioctl.h>
++#endif
++
++
++/* ---- Constants and Types ----------------------------------------------- */
++
++// The ioctl "magic" is just some character value which is used to help
++// detect when incorrect ioctl values are sent down to a driver.
++
++#define GPIO_EVENT_IOCTL_MAGIC  'G'
++
++/**
++ *  Deefines for each of the ioctl commands. Note that since we want to reduce
++ *  the possibility that a user mode program gets out of sync with a given 
++ *  driver, we explicitly assign a value to each enumeration. This makes
++ *  it more difficult to stick new ioctl's in the middle of the list.
++ */
++
++typedef enum
++{
++    GPIO_EVENT_CMD_FIRST                = 0x80,
++
++    GPIO_EVENT_CMD_MONITOR_GPIO         = 0x80,
++    GPIO_EVENT_CMD_SET_READ_MODE        = 0x81,
++
++    /* Insert new ioctls here                                               */
++
++    GPIO_EVENT_CMD_LAST,
++
++} GPIO_EVENT_CMD;
++
++typedef enum
++{
++    GPIO_EventRisingEdge     = 0x01,
++    GPIO_EventFallingEdge    = 0x02,
++    GPIO_EventBothEdges      = GPIO_EventRisingEdge | GPIO_EventFallingEdge,
++
++} GPIO_EventEdgeType_t;
++
++typedef struct
++{
++    uint8_t                 gpio;               // gpio to monitor
++    uint8_t                 onOff;              // 0 = stop monitoring, 1 = start monitoring
++    GPIO_EventEdgeType_t    edgeType;           // Monitor rising/falling/both edges?
++    uint8_t                 debounceMilliSec;   // debounce time in milliseconds
++
++} GPIO_EventMonitor_t;
++
++typedef enum
++{
++    GPIO_EventReadModeAscii     = 0x00,     // Reads return ASCII data (default)
++    GPIO_EventReadModeBinary    = 0x01,     // Reads return Binary data
++
++} GPIO_EventReadMode_t;
++
++/*
++ * Definitions for the actual ioctl commands
++ */
++
++#define GPIO_EVENT_IOCTL_MONITOR_GPIO   _IOW( GPIO_EVENT_IOCTL_MAGIC, GPIO_EVENT_CMD_MONITOR_GPIO,  GPIO_EventMonitor_t ) // arg is GPIO_EventMonitor *
++#define GPIO_EVENT_IOCTL_SET_READ_MODE  _IO(  GPIO_EVENT_IOCTL_MAGIC, GPIO_EVENT_CMD_SET_READ_MODE )                      // arg is int
++
++/*
++ * Definitions for sysctl. The top level define has to be unique system wide.
++ * The kernel defines values 1 thru about 10 (see include/linunx/sysctl.h)
++ */
++
++#define CTL_GPIO_EVENT          0x47504576    // 'GPEv' in hex form
++
++/*
++ * Reads return GPIO_Event_t structures
++ */
++
++typedef struct
++{
++    uint8_t                 gpio;       // GPIO that this event is for
++    GPIO_EventEdgeType_t    edgeType;   // Type of edge detected     
++    struct timeval          time;       // Time the event occurred
++
++} GPIO_Event_t;
++
++#endif  // GPIO_EVENT_DRV_H
++
diff --git a/3.3.8/3rd-3rdparty-merge.patch b/3.3.8/3rd-3rdparty-merge.patch
new file mode 100644
index 0000000..dff4679
--- /dev/null
+++ b/3.3.8/3rd-3rdparty-merge.patch
@@ -0,0 +1,156 @@
+diff -uNr linux-3.2.33-go.orig/arch/alpha/Kconfig linux-3.2.33-go/arch/alpha/Kconfig
+--- linux-3.2.33-go.orig/arch/alpha/Kconfig	2012-11-15 22:08:02.768806792 +0100
++++ linux-3.2.33-go/arch/alpha/Kconfig	2012-11-15 22:08:29.937483632 +0100
+@@ -673,3 +673,4 @@
+ 
+ source "lib/Kconfig"
+ 
++source "3rdparty/Kconfig"
+diff -uNr linux-3.2.33-go.orig/arch/arm/Kconfig linux-3.2.33-go/arch/arm/Kconfig
+--- linux-3.2.33-go.orig/arch/arm/Kconfig	2012-11-15 22:07:59.952839378 +0100
++++ linux-3.2.33-go/arch/arm/Kconfig	2012-11-15 22:14:01.950566716 +0100
+@@ -2259,3 +2259,5 @@
+ source "crypto/Kconfig"
+ 
+ source "lib/Kconfig"
++
++source "3rdparty/Kconfig"
+diff -uNr linux-3.2.33-go.orig/arch/ia64/Kconfig linux-3.2.33-go/arch/ia64/Kconfig
+--- linux-3.2.33-go.orig/arch/ia64/Kconfig	2012-11-15 22:08:00.893828523 +0100
++++ linux-3.2.33-go/arch/ia64/Kconfig	2012-11-15 22:08:29.938483621 +0100
+@@ -669,3 +669,5 @@
+ 
+ config IOMMU_HELPER
+ 	def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
++
++source "3rdparty/Kconfig"
+diff -uNr linux-3.2.33-go.orig/arch/mips/Kconfig linux-3.2.33-go/arch/mips/Kconfig
+--- linux-3.2.33-go.orig/arch/mips/Kconfig	2012-11-15 22:08:02.698807597 +0100
++++ linux-3.2.33-go/arch/mips/Kconfig	2012-11-15 22:08:29.939483610 +0100
+@@ -2485,3 +2485,5 @@
+ source "crypto/Kconfig"
+ 
+ source "lib/Kconfig"
++
++source "3rdparty/Kconfig"
+diff -uNr linux-3.2.33-go.orig/arch/powerpc/Kconfig linux-3.2.33-go/arch/powerpc/Kconfig
+--- linux-3.2.33-go.orig/arch/powerpc/Kconfig	2012-11-15 22:08:01.893816938 +0100
++++ linux-3.2.33-go/arch/powerpc/Kconfig	2012-11-15 22:08:29.940483598 +0100
+@@ -980,3 +980,5 @@
+ 	bool
+ 
+ source "arch/powerpc/kvm/Kconfig"
++
++source "3rdparty/Kconfig"
+diff -uNr linux-3.2.33-go.orig/arch/sparc/Kconfig linux-3.2.33-go/arch/sparc/Kconfig
+--- linux-3.2.33-go.orig/arch/sparc/Kconfig	2012-11-15 22:08:00.130837331 +0100
++++ linux-3.2.33-go/arch/sparc/Kconfig	2012-11-15 22:08:29.941483586 +0100
+@@ -605,3 +605,5 @@
+ source "crypto/Kconfig"
+ 
+ source "lib/Kconfig"
++
++source "3rdparty/Kconfig"
+diff -uNr linux-3.2.33-go.orig/arch/x86/Kconfig linux-3.2.33-go/arch/x86/Kconfig
+--- linux-3.2.33-go.orig/arch/x86/Kconfig	2012-11-15 22:08:00.435833823 +0100
++++ linux-3.2.33-go/arch/x86/Kconfig	2012-11-15 22:08:29.945483540 +0100
+@@ -2179,3 +2179,5 @@
+ source "arch/x86/kvm/Kconfig"
+ 
+ source "lib/Kconfig"
++
++source "3rdparty/Kconfig"
+diff -uNr linux-3.2.33-go.orig/Makefile linux-3.2.33-go/Makefile
+--- linux-3.2.33-go.orig/Makefile	2012-11-15 22:08:03.435799123 +0100
++++ linux-3.2.33-go/Makefile	2012-11-15 22:08:29.946483529 +0100
+@@ -507,7 +507,7 @@
+ 
+ # Objects we will link into vmlinux / subdirs we need to visit
+ init-y		:= init/
+-drivers-y	:= drivers/ sound/ firmware/
++drivers-y	:= drivers/ sound/ firmware/ 3rdparty/
+ net-y		:= net/
+ libs-y		:= lib/
+ core-y		:= usr/
+diff -uNr linux-3.2.33-go.orig/scripts/kconfig/Makefile linux-3.2.33-go/scripts/kconfig/Makefile
+--- linux-3.2.33-go.orig/scripts/kconfig/Makefile	2012-11-15 22:07:58.064861094 +0100
++++ linux-3.2.33-go/scripts/kconfig/Makefile	2012-11-15 22:08:55.603180188 +0100
+@@ -11,29 +11,29 @@
+ Kconfig := Kconfig
+ endif
+ 
+-xconfig: $(obj)/qconf
++xconfig: $(obj)/qconf 3rdparty/Makefile
+ 	$< $(Kconfig)
+ 
+-gconfig: $(obj)/gconf
++gconfig: $(obj)/gconf 3rdparty/Makefile
+ 	$< $(Kconfig)
+ 
+-menuconfig: $(obj)/mconf
++menuconfig: $(obj)/mconf 3rdparty/Makefile
+ 	$< $(Kconfig)
+ 
+-config: $(obj)/conf
++config: $(obj)/conf 3rdparty/Makefile
+ 	$< --oldaskconfig $(Kconfig)
+ 
+-nconfig: $(obj)/nconf
++nconfig: $(obj)/nconf 3rdparty/Makefile
+ 	$< $(Kconfig)
+ 
+-oldconfig: $(obj)/conf
++oldconfig: $(obj)/conf 3rdparty/Makefile
+ 	$< --$@ $(Kconfig)
+ 
+-silentoldconfig: $(obj)/conf
++silentoldconfig: $(obj)/conf 3rdparty/Makefile
+ 	$(Q)mkdir -p include/generated
+ 	$< --$@ $(Kconfig)
+ 
+-localyesconfig localmodconfig: $(obj)/streamline_config.pl $(obj)/conf
++localyesconfig localmodconfig: $(obj)/streamline_config.pl $(obj)/conf 3rdparty/Makefile
+ 	$(Q)mkdir -p include/generated
+ 	$(Q)perl $< --$@ $(srctree) $(Kconfig) > .tmp.config
+ 	$(Q)if [ -f .config ]; then 					\
+@@ -90,18 +90,18 @@
+ 	*)	cat $(CLONECONFIG) > .config.running ;;		\
+ 	esac &&							\
+ 	echo -e "Cloning configuration file $(CLONECONFIG)\n"
+-	$(Q)$< --defconfig=.config.running arch/$(SRCARCH)/Kconfig
++	$(Q)$< --defconfig=.config.running arch/$(SRCARCH)/Kconfig 3rdparty/Makefile
+ 
+ 
+ PHONY += listnewconfig oldnoconfig savedefconfig defconfig
+ 
+-listnewconfig oldnoconfig: $(obj)/conf
++listnewconfig oldnoconfig: $(obj)/conf 3rdparty/Makefile
+ 	$< --$@ $(Kconfig)
+ 
+-savedefconfig: $(obj)/conf
++savedefconfig: $(obj)/conf 3rdparty/Makefile
+ 	$< --$@=defconfig $(Kconfig)
+ 
+-defconfig: $(obj)/conf
++defconfig: $(obj)/conf 3rdparty/Makefile
+ ifeq ($(KBUILD_DEFCONFIG),)
+ 	$< --defconfig $(Kconfig)
+ else
+@@ -109,7 +109,7 @@
+ 	$(Q)$< --defconfig=arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG) $(Kconfig)
+ endif
+ 
+-%_defconfig: $(obj)/conf
++%_defconfig: $(obj)/conf 3rdparty/Makefile
+ 	$(Q)$< --defconfig=arch/$(SRCARCH)/configs/$@ $(Kconfig)
+ 
+ # Help text used by make help
+@@ -186,6 +186,8 @@
+ 	gconf-target := 1
+ endif
+ 
++3rdparty/Makefile:
++	pushd $(srctree)/3rdparty ; $(PERL) ./mkbuild.pl ; popd
+ 
+ ifeq ($(qconf-target),1)
+ 	hostprogs-y += qconf
diff --git a/3.3.8/3rd-3rdparty-netatop-0.1.1.patch b/3.3.8/3rd-3rdparty-netatop-0.1.1.patch
new file mode 100644
index 0000000..a06a77d
--- /dev/null
+++ b/3.3.8/3rd-3rdparty-netatop-0.1.1.patch
@@ -0,0 +1,1769 @@
+diff -uNr linux-3.2.33-go.orig/3rdparty/netatop/Kconfig 3rdparty/netatop/Kconfig
+--- linux-3.2.33-go.orig/3rdparty/netatop/Kconfig	1970-01-01 01:00:00.000000000 +0100
++++ 3rdparty/netatop/Kconfig	2012-11-15 22:48:00.753390796 +0100
+@@ -0,0 +1,8 @@
++config NETATOP
++	tristate "Netatop kernel module"
++	help
++	  The optional kernel module netatop can be loaded to gather statistics 
++	  about the TCP and UDP packets that have been transmitted/received 
++	  per process and per thread
++
++	  If unsure, see you again in six months.
+diff -uNr linux-3.2.33-go.orig/3rdparty/netatop/Makefile 3rdparty/netatop/Makefile
+--- linux-3.2.33-go.orig/3rdparty/netatop/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ 3rdparty/netatop/Makefile	2012-11-15 22:50:01.332957868 +0100
+@@ -0,0 +1,5 @@
++#
++# THIS IS AN AUTOMATICALLY GENERATED FILE.  DO NOT EDIT.
++#
++
++obj-$(CONFIG_NETATOP) += netatop.o
+diff -uNr linux-3.2.33-go.orig/3rdparty/netatop/netatop.c 3rdparty/netatop/netatop.c
+--- linux-3.2.33-go.orig/3rdparty/netatop/netatop.c	1970-01-01 01:00:00.000000000 +0100
++++ 3rdparty/netatop/netatop.c	2012-11-15 22:57:52.989419565 +0100
+@@ -0,0 +1,1687 @@
++/*
++** This module uses the netfilter interface to maintain statistics
++** about the network traffic per task, on level of thread group
++** and individual thread.
++**
++** General setup
++** -------------
++** Once the module is active, it is called for every packet that is
++** transmitted by a local process and every packet that is received
++** from an interface. Not only the packets that contain the user data
++** are passed but also the TCP related protocol packets (SYN, ACK, ...).
++**
++** When the module discovers a packet for a connection (TCP) or local
++** port (UDP) that is new, it creates a sockinfo structure. As soon as
++** possible the sockinfo struct will be connected to a taskinfo struct
++** that represents the proces or thread that is related to the socket.
++** However, the task can only be determined when a packet is transmitted,
++** i.e. the module is called during system call handling in the context
++** of the transmitting process. At that moment the tgid (process) and
++** pid (thread) can be obtained from the process administration to
++** be stored in the module's own taskinfo structs (one for the process,
++** one for the thread).
++** For the time that the sockinfo struct can not be related to a taskinfo
++** struct (e.g. when only packets are received), counters are maintained
++** temporarily in the sockinfo struct. As soon as a related taskinfo struct
++** is discovered when the task transmits, counters will be maintained in
++** the taskinfo struct itself.
++** When packets are only received for a socket (e.g. another machine is
++** sending UDP packets to the local machine) while the local task
++** never responds, no match to a process can be made and the packets
++** remain unidentified by the netatop module. At least one packet should
++** have been sent by a local process to be able to match packets for such
++** socket.
++** In the file /proc/netatop counters can be found that show the total
++** number of packets sent/received and how many of these packets were
++** unidentified (i.e. not accounted to a process/thread).
++**
++** Garbage collection
++** ------------------
++** The module uses a garbage collector to cleanup the unused sockinfo
++** structs if connections do not exist any more (TCP) or have not been
++** used for some time (TCP/UDP).
++** Furthermore, the garbage collector checks if the taskinfo structs
++** still represent existing processes or threads. If not, the taskinfo struct
++** is destroyed (in case of a thread) or it is moved to a separate list of
++** finished processes (in case of a process). Analysis programs can read
++** the taskinfo of such finished process. When the taskinfo of a finished
++** process is not read within 15 seconds, the taskinfo will be destroyed.
++**
++** A garbage collector cycle can be triggered by issueing a getsockopt
++** call from an analysis program (e.g. atop). Apart from that, a time-based
++** garbage collector cycle is issued anyhow every 15 seconds.
++**
++** Interface with user mode
++** ------------------------
++** Programs can open an IP socket and use the getsockopt() system call
++** to issue commands to this module. With the command ATOP_GETCNT_TGID
++** the current counters can be obtained on process level (thread group)
++** and with the command ATOP_GETCNT_PID the counters on thread level.
++** For both commands, the tgid/pid has to be passed of the required thread
++** (group). When the required thread (group) does not exist, an errno ESRCH
++** is given.
++**
++** The command ATOP_GETCNT_EXIT can be issued to obtain the counters of
++** an exited process. As stated above, such command has to be issued
++** within 15 seconds after a process has been declared 'finished' by
++** the garbage collector. Whenever this command is issued and no exited
++** process is in the exitlist, the requesting process is blocked until
++** an exited process is available. 
++**
++** The command NETATOP_FORCE_GC activates the garbage collector of the
++** netatop module to  determine if sockinfo's of old connections/ports
++** can be destroyed and if taskinfo's of exited processes can be
++** The command NETATOP_EMPTY_EXIT can be issued to wait until the exitlist
++** with the taskinfo's of exited processes is empty.
++** ----------------------------------------------------------------------
++** Copyright (C) 2012    Gerlof Langeveld (gerlof.langeveld@atoptool.nl)
++**
++** This program is free software; you can redistribute it and/or modify
++** it under the terms of the GNU General Public License version 2 as
++** published by the Free Software Foundation.
++*/
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/netfilter.h>
++#include <linux/netfilter_ipv4.h>
++#include <linux/sched.h>
++#include <linux/skbuff.h>
++#include <linux/types.h>
++#include <linux/file.h>
++#include <net/sock.h>
++#include <linux/ip.h>
++#include <linux/tcp.h>
++#include <linux/udp.h>
++#include <net/ip.h>
++#include <net/tcp.h>
++#include <net/udp.h>
++
++#include "netatop.h"
++#include "netatopversion.h"
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Gerlof Langeveld <gerlof.langeveld@atoptool.nl>");
++MODULE_DESCRIPTION("Per-task network statistics");
++MODULE_VERSION(NETATOPVERSION);
++
++#define	GCINTERVAL	(HZ*15)		// interval garbage collector (jiffies)
++#define	GCMAXUDP	(HZ*16)		// max inactivity for UDP     (jiffies)
++#define	GCMAXTCP	(HZ*1800)	// max inactivity for TCP     (jiffies)
++#define	GCMAXUNREF	(HZ*60)		// max time without taskref   (jiffies)
++
++#define	SILIMIT		(2048*1024)	// maximum memory for sockinfo structs
++#define	TILIMIT		(1024*1024)	// maximum memory for taskinfo structs
++
++#define NF_IP_PRE_ROUTING       0
++#define NF_IP_LOCAL_IN          1
++#define NF_IP_FORWARD           2
++#define NF_IP_LOCAL_OUT         3
++#define NF_IP_POST_ROUTING      4
++
++/*
++** struct that maintains statistics about the network
++** traffic caused per thread or thread group
++*/
++struct chainer {
++	void			*next;
++	void 			*prev;
++};
++
++struct taskinfobucket;
++
++struct taskinfo {
++	struct chainer		ch;
++
++	pid_t			id;		// tgid or pid
++	char			type;		// 'g' (thread group) or
++						// 't' (thread)
++	unsigned char		state;		// see below
++	char			command[COMLEN];
++	unsigned long		btime;		// start time of process
++	unsigned long long	exittime;	// time inserted in exitlist
++
++	struct taskcount	tc;
++};
++
++// state values above
++#define	CHECKED		1	// verified that task still exists
++#define	INDELETE	2	// task exited but still in hash list
++#define	FINISHED	3	// task on exit list
++
++/*
++** hash tables to find a particular thread group or thread
++*/
++#define	TBUCKS		1024	// must be multiple of 2!
++#define	THASH(x, t)	(((x)+t)&(TBUCKS-1))
++
++struct taskinfobucket {
++	struct chainer	ch;
++	spinlock_t	lock;
++}	thash[TBUCKS];
++
++static unsigned long	nrt;     // current number of taskinfo allocated
++static unsigned long	nrt_ovf; // no taskinfo allocated due to overflow
++static DEFINE_SPINLOCK(nrtlock);
++
++
++static struct taskinfo	*exithead;	// linked list of exited processes
++static struct taskinfo	*exittail;
++static DEFINE_SPINLOCK(exitlock);
++
++static DECLARE_WAIT_QUEUE_HEAD(exitlist_filled);
++static DECLARE_WAIT_QUEUE_HEAD(exitlist_empty);
++
++static unsigned long	nre;	// current number of taskinfo on exitlist
++
++/*
++** structs that uniquely identify a TCP connection (host endian format)
++*/
++struct tcpv4_ident {
++        uint32_t	laddr;  /* local  IP  address */
++        uint32_t	raddr;  /* remote IP  address */
++        uint16_t	lport;  /* local  port number */
++        uint16_t	rport;  /* remote port number */
++};
++
++struct tcpv6_ident {
++        struct in6_addr	laddr;  /* local  IP  address */
++        struct in6_addr	raddr;  /* remote IP  address */
++        uint16_t	lport;  /* local  port number */
++        uint16_t	rport;  /* remote port number */
++};
++
++/*
++** struct to maintain the reference from a socket
++** to a thread and thread-group
++*/
++struct sockinfo {
++	struct chainer		ch;
++
++	unsigned char		last_state;	// last known state of socket
++        uint8_t			proto;  	// protocol
++
++	union keydef {
++		uint16_t		udp;	// UDP ident (only portnumber)
++		struct tcpv4_ident	tcp4;	// TCP connection ident IPv4
++		struct tcpv6_ident	tcp6;	// TCP connection ident IPv6
++	} key;
++
++	struct taskinfo		*tgp;		// ref to thread group
++	struct taskinfo		*thp;		// ref to thread (or NULL)
++
++	short			tgh;		// hash number of thread group
++	short			thh;		// hash number of thread
++
++        unsigned long      	sndpacks;	// temporary counters in case 
++        unsigned long      	sndbytes;	// no relation to process is
++        unsigned long      	rcvpacks; 	// known yet
++        unsigned long      	rcvbytes;
++
++	unsigned long long	lastact;	// last updated (jiffies)
++};
++
++/*
++** hash table to find a socket reference
++*/
++#define	SBUCKS		1024	// must be multiple of 2!
++#define	SHASHTCP4(x)	(((x).raddr+(x).lport+(x).rport)&(SBUCKS-1))
++#define	SHASHUDP(x)	((x)&(SBUCKS-1))
++
++struct {
++	struct chainer	ch;
++	spinlock_t	lock;
++}	shash[SBUCKS];
++
++static unsigned long	nrs;     // current number sockinfo allocated
++static unsigned long	nrs_ovf; // no sockinfo allocated due to overflow
++static DEFINE_SPINLOCK(nrslock);
++
++/*
++** various static counters
++*/
++static unsigned long	icmpsndbytes;
++static unsigned long	icmpsndpacks;
++static unsigned long	icmprcvbytes;
++static unsigned long	icmprcvpacks;
++
++static unsigned long	tcpsndpacks;
++static unsigned long	tcprcvpacks;
++static unsigned long	udpsndpacks;
++static unsigned long	udprcvpacks;
++static unsigned long	unidentudpsndpacks;
++static unsigned long	unidentudprcvpacks;
++static unsigned long	unidenttcpsndpacks;
++static unsigned long	unidenttcprcvpacks;
++
++static unsigned long	unknownproto;
++
++static struct timer_list timer;
++static DEFINE_SPINLOCK(gclock);
++static unsigned long long	gclast;	// last garbage collection (jiffies)
++
++static struct timespec	boottime;
++
++/*
++** function prototypes
++*/
++static void 		analyze_tcpv4_packet(struct sk_buff *,
++				const struct net_device *, int, char,
++				struct iphdr *, void *);
++
++static void 		analyze_udp_packet(struct sk_buff *,
++				const struct net_device *, int, char,
++				struct iphdr *, void *);
++
++static int		sock2task(char, struct sockinfo *,
++			     	struct taskinfo **, short *,
++				struct sk_buff *, const struct net_device *,
++				int, char);
++
++static void		update_taskcounters(struct sk_buff *,
++				const struct net_device *,
++				struct taskinfo *, char);
++
++static void		update_sockcounters(struct sk_buff *,
++				const struct net_device *,
++				struct sockinfo *, char);
++
++static void		sock2task_sync(struct sk_buff *,
++				struct sockinfo *, struct taskinfo *);
++
++static void		register_unident(struct sockinfo *);
++
++static int		calc_reallen(struct sk_buff *,
++			             const struct net_device *);
++
++static void		get_tcpv4_ident(struct iphdr *, void *,
++				char, union keydef *);
++
++static struct sockinfo	*find_sockinfo(int, union keydef *, int, int);
++static struct sockinfo	*make_sockinfo(int, union keydef *, int, int);
++
++static void		wipesockinfo(void);
++static void		wipetaskinfo(void);
++static void		wipetaskexit(void);
++
++static void		garbage_collector(void);
++static void		gcperiodic(unsigned long unused);
++static void		gctaskexit(void);
++static void		gcsockinfo(void);
++static void		gctaskinfo(void);
++
++static void		move_taskinfo(struct taskinfo *);
++static void		delete_taskinfo(struct taskinfo *);
++static void		delete_sockinfo(struct sockinfo *);
++
++static struct taskinfo	*get_taskinfo(pid_t, char);
++
++static int		getsockopt(struct sock *, int, void *, int *);
++
++/*
++** hook definitions
++*/
++static struct nf_hook_ops hookin_ipv4;
++static struct nf_hook_ops hookout_ipv4;
++
++/*
++** getsockopt definitions for communication with user space
++*/
++static struct nf_sockopt_ops sockopts = {
++        .pf             = PF_INET,
++        .get_optmin     = NETATOP_BASE_CTL,
++        .get_optmax     = NETATOP_BASE_CTL+6,
++        .get            = getsockopt,
++        .owner          = THIS_MODULE,
++};
++
++/*
++** hook function to be called for every incoming local packet
++*/
++static unsigned int
++ipv4_hookin(unsigned int hooknum,
++		struct sk_buff *skb,
++		const struct net_device *in,
++		const struct net_device *out,
++		int (*okfn)(struct sk_buff *))
++{
++	struct iphdr	*iph;
++	void		*trh;
++
++	if (skb == NULL)		// useless socket buffer?
++		return NF_ACCEPT;
++
++	/*
++	** get pointer to IP header and transport header
++	*/
++	iph = (struct iphdr *)skb_network_header(skb);
++	trh = ((char *)iph + (iph->ihl * 4));
++
++	/*
++	** react on protocol number
++	*/
++	switch (iph->protocol) {
++	   case IPPROTO_TCP:
++		tcprcvpacks++;
++		analyze_tcpv4_packet(skb, in, 0, 'i', iph, trh);
++		break;
++
++	   case IPPROTO_UDP:
++		udprcvpacks++;
++		analyze_udp_packet(skb, in, 0, 'i', iph, trh);
++		break;
++
++	   case IPPROTO_ICMP:
++		icmprcvpacks++;
++		icmprcvbytes += skb->len + in->hard_header_len + 4;
++		break;
++
++	   default:
++		unknownproto++;
++	}
++
++	// accept every packet after stats gathering
++	return NF_ACCEPT;
++}
++
++/*
++** hook function to be called for every outgoing local packet
++*/
++static unsigned int
++ipv4_hookout(unsigned int hooknum,
++		struct sk_buff *skb,
++		const struct net_device *in,
++		const struct net_device *out,
++		int (*okfn)(struct sk_buff *))
++{
++	int		in_syscall = !in_interrupt();
++	struct iphdr	*iph;
++	void		*trh;
++
++	if (skb == NULL)		// useless socket buffer?
++		return NF_ACCEPT;
++
++	/*
++	** get pointer to IP header and transport header
++	*/
++	iph = (struct iphdr *)skb_network_header(skb);
++	trh = skb_transport_header(skb);
++
++	/*
++	** react on protocol number
++	*/
++	switch (iph->protocol) {
++	   case IPPROTO_TCP:
++		tcpsndpacks++;
++		analyze_tcpv4_packet(skb, out, in_syscall, 'o', iph, trh);
++		break;
++
++	   case IPPROTO_UDP:
++		udpsndpacks++;
++		analyze_udp_packet(skb, out, in_syscall, 'o', iph, trh);
++		break;
++
++	   case IPPROTO_ICMP:
++		icmpsndpacks++;
++		icmpsndbytes += skb->len + out->hard_header_len + 4;
++		break;
++
++	   default:
++		unknownproto++;
++	}
++
++	// accept every packet after stats gathering
++	return NF_ACCEPT;
++}
++
++/*
++** generic function (for input and output) to analyze the current packet
++*/
++static void
++analyze_tcpv4_packet(struct sk_buff *skb,
++	const struct net_device *ndev,	// interface description
++	int in_syscall,			// called during system call?
++	char direction,			// incoming ('i') or outgoing ('o')
++	struct iphdr *iph, void *trh)
++{
++	union keydef		key;
++	struct sockinfo		*sip;
++	int			bs;	// hash bucket for sockinfo
++	unsigned long		sflags;
++
++	/*
++	** determine tcpv4_ident that identifies this TCP packet
++	** and calculate hash bucket in sockinfo hash
++	*/
++	get_tcpv4_ident(iph, trh, direction, &key);
++
++	/*
++	** check if we have seen this tcpv4_ident before with a
++	** corresponding thread and thread group
++	*/
++	bs = SHASHTCP4(key.tcp4);
++
++	spin_lock_irqsave(&shash[bs].lock, sflags);
++
++	if ( (sip = find_sockinfo(IPPROTO_TCP, &key, sizeof key.tcp4, bs))
++								== NULL) {
++		// no sockinfo yet: create one
++		if ( (sip = make_sockinfo(IPPROTO_TCP, &key,
++					sizeof key.tcp4, bs)) == NULL) {
++			if (direction == 'i') 
++				unidenttcprcvpacks++;
++			else
++				unidenttcpsndpacks++;
++			goto unlocks;
++		}
++	}
++
++	if (skb->sk)
++		sip->last_state = skb->sk->sk_state;
++
++	/*
++	** if needed (re)connect the sockinfo to a taskinfo and update
++	** the counters
++	*/
++
++	// connect to thread group and update
++	if (sock2task('g', sip, &sip->tgp, &sip->tgh,
++				skb, ndev, in_syscall, direction)) {
++		// connect to thread and update
++		(void) sock2task('t', sip, &sip->thp, &sip->thh,
++				skb, ndev, in_syscall, direction);
++	}
++
++unlocks:
++	spin_unlock_irqrestore(&shash[bs].lock, sflags);
++}
++
++
++/*
++** generic function (for input and output) to analyze the current packet
++*/
++static void
++analyze_udp_packet(struct sk_buff *skb,
++	const struct net_device *ndev,	// interface description
++	int in_syscall,			// called during system call?
++	char direction,			// incoming ('i') or outgoing ('o')
++	struct iphdr *iph, void *trh)
++{
++	struct udphdr	*udph = (struct udphdr *)trh;
++	uint16_t	udplocal = (direction == 'i' ?
++				    ntohs(udph->dest) : ntohs(udph->source));
++	int		bs;	// hash bucket for sockinfo
++
++	union keydef 	key;
++	struct sockinfo	*sip;
++	unsigned long	sflags;
++
++	/*
++	** check if we have seen this local UDP port before with a
++	** corresponding thread and thread group
++	*/
++	key.udp	= udplocal;
++	bs      = SHASHUDP(udplocal);
++
++	spin_lock_irqsave(&shash[bs].lock, sflags);
++	
++	if ( (sip = find_sockinfo(IPPROTO_UDP, &key, sizeof key.udp, bs))
++								== NULL) {
++		// no sockinfo yet: create one
++		if ( (sip = make_sockinfo(IPPROTO_UDP, &key,
++				sizeof key.udp, bs)) == NULL) {
++			if (direction == 'i') 
++				unidentudprcvpacks++;
++			else
++				unidentudpsndpacks++;
++			goto unlocks;
++		}
++	}
++
++	/*
++	** if needed (re)connect the sockinfo to a taskinfo and update
++	** the counters
++	*/
++
++	// connect to thread group and update
++	if (sock2task('g', sip, &sip->tgp, &sip->tgh,
++				skb, ndev, in_syscall, direction)) {
++		// connect to thread and update
++		(void) sock2task('t', sip, &sip->thp, &sip->thh,
++				skb, ndev, in_syscall, direction);
++	}
++
++unlocks:
++	spin_unlock_irqrestore(&shash[bs].lock, sflags);
++}
++
++/*
++** connect the sockinfo to the correct taskinfo and update the counters
++*/
++static int
++sock2task(char idtype, struct sockinfo *sip, struct taskinfo **tipp,
++	short *hash, struct sk_buff *skb, const struct net_device *ndev,
++	int in_syscall, char direction)
++{
++	pid_t		curid;
++	unsigned long	tflags;
++
++	if (*tipp == NULL) {
++		/*
++		** no taskinfo connected yet for this reference from
++		** sockinfo; to connect to a taskinfo, we must
++		** be in system call handling now --> verify
++		*/
++		if (!in_syscall) {
++			if (idtype == 'g')
++				update_sockcounters(skb, ndev, sip, direction);
++
++			return 0;	// failed
++		}
++
++		/*
++		** try to find existing taskinfo or create new taskinfo
++		*/
++		curid = (idtype == 'g' ? current->tgid : current->pid);
++
++		*hash = THASH(curid, idtype);		// calc hashQ
++
++		spin_lock_irqsave(&thash[*hash].lock, tflags);
++
++		if ( (*tipp = get_taskinfo(curid, idtype)) == NULL) {
++			/*
++			** not possible to connect
++			*/
++			spin_unlock_irqrestore(&thash[*hash].lock, tflags);
++
++			if (idtype == 'g')
++				update_sockcounters(skb, ndev, sip, direction);
++
++			return 0;			// failed
++		}
++
++		/*
++		** new connection made:
++		** update task counters with sock counters
++		*/
++		sock2task_sync(skb, sip, *tipp);
++	} else {
++		/*
++		** already related to thread group or thread
++		** lock existing task
++		*/
++		spin_lock_irqsave(&thash[*hash].lock, tflags);
++
++		/*
++		** check if socket has been passed to another process in the
++		** meantime, like programs as xinetd use to do
++		** if so, connect sockinfo to the new task
++		*/
++		if (in_syscall) {
++			curid = (idtype == 'g' ? current->tgid : current->pid);
++
++			if ((*tipp)->id != curid) {
++				spin_unlock_irqrestore(&thash[*hash].lock,
++									tflags);
++				*hash = THASH(curid, idtype);
++
++				spin_lock_irqsave(&thash[*hash].lock, tflags);
++
++				if ( (*tipp = get_taskinfo(curid, idtype))
++								== NULL) {
++					spin_unlock_irqrestore(
++						&thash[*hash].lock, tflags);
++					return 0;
++				}
++			}
++		}
++	}
++
++	update_taskcounters(skb, ndev, *tipp, direction);
++
++	spin_unlock_irqrestore(&thash[*hash].lock, tflags);
++
++	return 1;
++}
++
++/*
++** update the statistics of a particular thread group or thread
++*/
++static void
++update_taskcounters(struct sk_buff *skb, const struct net_device *ndev,
++		struct taskinfo *tip, char direction)
++{
++	struct iphdr	*iph = (struct iphdr *)skb_network_header(skb);
++	int		reallen = calc_reallen(skb, ndev);
++
++	switch (iph->protocol) {
++	   case IPPROTO_TCP:
++		if (direction == 'i') {
++			tip->tc.tcprcvpacks++;
++			tip->tc.tcprcvbytes += reallen;
++		} else {
++			tip->tc.tcpsndpacks++;
++			tip->tc.tcpsndbytes += reallen;
++		}
++		break;
++
++	   case IPPROTO_UDP:
++		if (direction == 'i') {
++			tip->tc.udprcvpacks++;
++			tip->tc.udprcvbytes += reallen;
++		} else {
++			tip->tc.udpsndpacks++;
++			tip->tc.udpsndbytes += reallen;
++		}
++ 	}
++}
++
++/*
++** update the statistics of a sockinfo without a connected task
++*/
++static void
++update_sockcounters(struct sk_buff *skb, const struct net_device *ndev,
++		struct sockinfo *sip, char direction)
++{
++	int	reallen = calc_reallen(skb, ndev);
++
++	if (direction == 'i') {
++		sip->rcvpacks++;
++		sip->rcvbytes += reallen;
++	} else {
++		sip->sndpacks++;
++		sip->sndbytes += reallen;
++	}
++}
++
++/*
++** add the temporary counters in the sockinfo to the new connected task
++*/
++static void
++sock2task_sync(struct sk_buff *skb, struct sockinfo *sip, struct taskinfo *tip)
++{
++	struct iphdr	*iph = (struct iphdr *)skb_network_header(skb);
++
++	switch (iph->protocol) {
++	   case IPPROTO_TCP:
++		tip->tc.tcprcvpacks	+= sip->rcvpacks;
++		tip->tc.tcprcvbytes 	+= sip->rcvbytes;
++		tip->tc.tcpsndpacks	+= sip->sndpacks;
++		tip->tc.tcpsndbytes	+= sip->sndbytes;
++		break;
++
++	   case IPPROTO_UDP:
++		tip->tc.udprcvpacks	+= sip->rcvpacks;
++		tip->tc.udprcvbytes 	+= sip->rcvbytes;
++		tip->tc.udpsndpacks	+= sip->sndpacks;
++		tip->tc.udpsndbytes	+= sip->sndbytes;
++	}
++}
++
++static void
++register_unident(struct sockinfo *sip)
++{
++	switch (sip->proto) {
++	   case IPPROTO_TCP:
++		unidenttcprcvpacks += sip->rcvpacks;
++		unidenttcpsndpacks += sip->sndpacks;
++		break;
++
++	   case IPPROTO_UDP:
++		unidentudprcvpacks += sip->rcvpacks;
++		unidentudpsndpacks += sip->sndpacks;
++	}
++}
++
++/*
++** calculate the number of bytes that are really sent or received
++*/
++static int
++calc_reallen(struct sk_buff *skb, const struct net_device *ndev)
++{
++	/*
++	** calculate the real load of this packet on the network:
++	**
++	**  - length of IP header, TCP/UDP header and data (skb->len)
++	**
++	**    since packet assembly/disassembly is done by the IP layer
++	**    (we get an input packet that has been assembled already and
++	**    an output packet that still has to be assembled), additional
++	**    IP headers/interface headers and interface headers have
++	**    to be calculated for packets that are larger than the mtu
++	**
++	**  - interface header length + 4 bytes crc
++	*/
++	int reallen = skb->len;
++
++	if (reallen > ndev->mtu)
++		reallen += (reallen / ndev->mtu) *
++			   (sizeof(struct iphdr) + ndev->hard_header_len + 4);
++
++	reallen += ndev->hard_header_len + 4;
++
++	return reallen;
++}
++
++/*
++** find the tcpv4_ident for the current packet, represented by
++** the skb_buff
++*/
++static void
++get_tcpv4_ident(struct iphdr *iph, void *trh, char direction, union keydef *key)
++{
++	struct tcphdr	*tcph = (struct tcphdr *)trh;
++
++	memset(key, 0, sizeof *key); 	// important for memcmp later on
++
++	/*
++	** determine local/remote IP address and
++	** determine local/remote port number
++	*/
++	switch (direction) {
++	   case 'i':	// incoming packet
++		key->tcp4.laddr = ntohl(iph->daddr);
++		key->tcp4.raddr = ntohl(iph->saddr);
++		key->tcp4.lport = ntohs(tcph->dest);
++		key->tcp4.rport = ntohs(tcph->source);
++		break;
++
++	   case 'o':	// outgoing packet
++		key->tcp4.laddr = ntohl(iph->saddr);
++		key->tcp4.raddr = ntohl(iph->daddr);
++		key->tcp4.lport = ntohs(tcph->source);
++		key->tcp4.rport = ntohs(tcph->dest);
++	}
++}
++
++/*
++** search for the sockinfo holding the given address info
++** the appropriate hash bucket must have been locked before calling
++*/
++static struct sockinfo *
++find_sockinfo(int proto, union keydef *identp, int identsz, int hash)
++{
++	struct sockinfo	*sip = shash[hash].ch.next;
++
++	/*
++	** search for appropriate struct
++	*/
++	while (sip != (void *)&shash[hash].ch) {
++		if ( memcmp(&sip->key, identp, identsz) == 0 &&
++						sip->proto == proto) {
++			sip->lastact = jiffies_64;
++			return sip;
++		}
++
++		sip = sip->ch.next;
++	}
++
++	return NULL;	// not existing
++}
++
++/*
++** create a new sockinfo and fill
++** the appropriate hash bucket must have been locked before calling
++*/
++static struct sockinfo *
++make_sockinfo(int proto, union keydef *identp, int identsz, int hash)
++{
++	struct sockinfo	*sip;
++	unsigned long	flags;
++
++	/*
++	** check if the threshold of memory used for sockinfo structs
++	** is reached to avoid that a fork bomb of processes opening
++	** a socket leads to memory overload
++	*/
++	if ( (nrs+1) * sizeof(struct sockinfo) > SILIMIT) {
++		spin_lock_irqsave(&nrslock, flags);
++		nrs_ovf++;
++		spin_unlock_irqrestore(&nrslock, flags);
++		return NULL;
++	}
++
++	if ( (sip = kzalloc(sizeof *sip, GFP_ATOMIC)) == NULL)
++		return NULL;
++
++	spin_lock_irqsave(&nrslock, flags);
++	nrs++;
++	spin_unlock_irqrestore(&nrslock, flags);
++
++	/*
++	** insert new struct in doubly linked list
++	*/
++	sip->ch.next 		= &shash[hash].ch;
++	sip->ch.prev 		=  shash[hash].ch.prev;
++	((struct sockinfo *)shash[hash].ch.prev)->ch.next = sip;
++	shash[hash].ch.prev 	= sip;
++
++	sip->proto 		= proto;
++	sip->lastact 		= jiffies_64;
++	sip->key	 	= *identp;
++
++	return sip;
++}
++
++/*
++** search the taskinfo structure holding the info about the given id/type
++** if such taskinfo is not yet present, create a new one
++*/
++static struct taskinfo *
++get_taskinfo(pid_t id, char type)
++{
++	int		bt = THASH(id, type);
++	struct taskinfo	*tip = thash[bt].ch.next;
++	unsigned long	tflags;
++
++	/*
++	** search if id exists already
++	*/
++	while (tip != (void *)&thash[bt].ch) {
++		if (tip->id == id && tip->type == type)
++			return tip;
++
++		tip = tip->ch.next;
++	}
++
++	/*
++	** check if the threshold of memory used for taskinfo structs
++	** is reached to avoid that a fork bomb of processes opening
++	** a socket lead to memory overload
++	*/
++	if ( (nre+nrt+1) * sizeof(struct taskinfo) > TILIMIT) {
++		spin_lock_irqsave(&nrtlock, tflags);
++		nrt_ovf++;
++		spin_unlock_irqrestore(&nrtlock, tflags);
++		return NULL;
++	}
++
++	/*
++	** id not known yet
++	** add new entry to hash list
++	*/
++	if ( (tip = kzalloc(sizeof *tip, GFP_ATOMIC)) == NULL)
++		return NULL;
++
++	spin_lock_irqsave(&nrtlock, tflags);
++	nrt++;
++	spin_unlock_irqrestore(&nrtlock, tflags);
++
++	/*
++	** insert new struct in doubly linked list
++	** and fill values
++	*/
++	tip->ch.next 		= &thash[bt].ch;
++	tip->ch.prev 		=  thash[bt].ch.prev;
++	((struct taskinfo *)thash[bt].ch.prev)->ch.next = tip;
++	thash[bt].ch.prev  	= tip;
++
++	tip->id    	= id;
++	tip->type    	= type;
++
++	tip->btime 	= current->real_start_time.tv_sec + boottime.tv_sec;
++
++	if (current->real_start_time.tv_nsec + boottime.tv_nsec > NSEC_PER_SEC)
++		tip->btime++;
++
++	strncpy(tip->command, current->comm, COMLEN);
++
++	return tip;
++}
++
++/*
++** function that runs every second to see if a 
++** time-based garbage collection cycle has to be
++** forced (i.e. if no process forces it)
++*/
++static void
++gcperiodic(unsigned long unused)
++{
++	if (jiffies_64 >= gclast + GCINTERVAL)
++		garbage_collector();
++
++	/*
++	** set timer for next second
++	*/
++	timer.expires  = jiffies_64 + HZ;
++	timer.function = gcperiodic;
++	add_timer(&timer);
++}
++
++/*
++** garbage collector that removes:
++** - exited tasks that are not by user mode programs
++** - sockinfo's that are not used any more
++** - taskinfo's that do not exist any more
++**
++** a lock avoids that the garbage collector runs several times in parallel
++*/
++static void
++garbage_collector(void)
++{
++	unsigned long	flags;
++
++	spin_lock_irqsave(&gclock, flags);
++
++	if (jiffies_64 < gclast + (HZ/2)) { // maximum 2 GC cycles per second
++		spin_unlock_irqrestore(&gclock, flags);
++		return;
++	}
++
++	gctaskexit();	// remove remaining taskinfo structs from exit list
++
++	gcsockinfo();	// clean up sockinfo structs in shash list
++
++	gctaskinfo();	// clean up taskinfo structs in thash list
++
++	gclast = jiffies_64;
++
++	spin_unlock_irqrestore(&gclock, flags);
++}
++
++/*
++** tasks in the exitlist can be read by a user mode process for a limited
++** amount of time; this function removes all taskinfo structures that have
++** not been read within that period of time
++** notice that exited processes are chained to the tail, so the oldest
++** can be found at the head
++*/
++static void
++gctaskexit()
++{
++	unsigned long	flags;
++	struct taskinfo	*tip;
++
++	spin_lock_irqsave(&exitlock, flags);
++
++	for (tip=exithead; tip;) {
++		if (jiffies_64 < tip->exittime + GCINTERVAL)
++			break;
++
++		// remove taskinfo from exitlist
++		exithead = tip->ch.next;
++		kfree(tip);
++		nre--;
++		tip = exithead;
++	} 
++
++	/*
++	** if list empty now, then exithead and exittail both NULL
++	** wakeup waiters for emptylist
++	*/
++	if (nre == 0) {
++		exittail = NULL;  
++		wake_up_interruptible(&exitlist_empty);
++	}
++
++	spin_unlock_irqrestore(&exitlock, flags);
++}
++
++/*
++** cleanup sockinfo structures that are connected to finished processes
++*/
++static void
++gcsockinfo()
++{
++	int		i;
++	struct sockinfo	*sip, *sipsave;
++	unsigned long	sflags, tflags;
++
++	/*
++	** go through all sockinfo hash buckets 
++	*/
++	for (i=0; i < SBUCKS; i++) {
++		if (shash[i].ch.next == (void *)&shash[i].ch) 
++			continue;	// quick return without lock
++
++		spin_lock_irqsave(&shash[i].lock, sflags);
++
++		sip  = shash[i].ch.next;
++
++		/*
++		** search all sockinfo structs chained in one bucket
++		*/
++		while (sip != (void *)&shash[i].ch) {
++			/*
++ 			** TCP connections that were not in
++			** state ESTABLISHED or LISTEN can be
++			** eliminated
++			*/ 		
++			if (sip->proto == IPPROTO_TCP) {
++				switch (sip->last_state) {
++				   case TCP_ESTABLISHED:
++				   case TCP_LISTEN:
++					break;
++
++				   default:
++					sipsave = sip->ch.next;
++					delete_sockinfo(sip);
++					sip = sipsave;
++					continue;
++				}
++			}
++
++			/*
++			** check if this sockinfo has no relation
++			** for a while with a thread group
++			** if so, delete the sockinfo
++			*/
++			if (sip->tgp == NULL) {
++				if (sip->lastact + GCMAXUNREF < jiffies_64) {
++					register_unident(sip);
++					sipsave = sip->ch.next;
++					delete_sockinfo(sip);
++					sip = sipsave;
++				} else {
++					sip = sip->ch.next;
++				}
++				continue;
++			}
++
++			/*
++			** check if referred thread group is
++			** already marked as 'indelete' during this
++			** sockinfo search
++			** if so, delete this sockinfo
++			*/
++			spin_lock_irqsave(&thash[sip->tgh].lock, tflags);
++
++			if (sip->tgp->state == INDELETE) {
++				spin_unlock_irqrestore(&thash[sip->tgh].lock,
++									tflags);
++				sipsave = sip->ch.next;
++				delete_sockinfo(sip);
++				sip = sipsave;
++				continue;
++			}
++
++			/*
++			** check if referred thread group still exists;
++			** this step will be skipped if we already verified
++			** the existance of the thread group earlier during
++			** this garbage collection cycle
++			*/
++			if (sip->tgp->state != CHECKED) {
++				/*
++				** connected thread group not yet verified
++				** during this cycle, so check if it still
++				** exists
++				** if not, mark the thread group as 'indelete'
++				** (it can not be deleted right now because
++				** we might find other sockinfo's referring
++				** to this thread group during the current
++				** cycle) and delete this sockinfo
++				** if the thread group exists, just mark
++				** it  as 'checked' for this cycle
++				*/
++				if (find_vpid(sip->tgp->id) == NULL) {
++					sip->tgp->state = INDELETE;
++					spin_unlock_irqrestore(
++						&thash[sip->tgh].lock, tflags);
++
++					sipsave = sip->ch.next;
++					delete_sockinfo(sip);
++					sip = sipsave;
++					continue;
++				} else {
++					sip->tgp->state = CHECKED;
++				}
++			}
++
++			spin_unlock_irqrestore(&thash[sip->tgh].lock, tflags);
++
++			/*
++			** check if this sockinfo has a relation with a thread
++			** if not, skip further handling of this sockinfo
++			*/
++			if (sip->thp == NULL) {
++				sip = sip->ch.next;
++				continue;
++			}
++
++			/*
++			** check if referred thread is already marked
++			** as 'indelete' during this sockinfo search
++			** if so, break connection
++			*/
++			spin_lock_irqsave(&thash[sip->thh].lock, tflags);
++
++			if (sip->thp->state == INDELETE) {
++				spin_unlock_irqrestore(&thash[sip->thh].lock,
++									tflags);
++				sip->thp = NULL;
++				sip = sip->ch.next;
++				continue;
++			}
++
++			/*
++			** check if referred thread is already checked
++			** during this sockinfo search
++			*/
++			if (sip->thp->state == CHECKED) {
++				spin_unlock_irqrestore(&thash[sip->thh].lock,
++								tflags);
++				sip = sip->ch.next;
++				continue;
++			}
++
++			/*
++			** connected thread not yet verified
++			** check if it still exists
++			** if not, mark it as 'indelete' and break connection
++			** if thread exists, mark it 'checked'
++			*/
++			if (find_vpid(sip->thp->id) == NULL) {
++				sip->thp->state = INDELETE;
++				sip->thp = NULL;
++			} else {
++				sip->thp->state = CHECKED;
++			}
++
++			spin_unlock_irqrestore(&thash[sip->thh].lock, tflags);
++
++			/*
++			** check if a TCP port has not been used
++			** for some time --> destroy even if the thread
++			** (group) is still there
++			*/
++			if (sip->proto == IPPROTO_TCP &&
++			    sip->lastact + GCMAXTCP < jiffies_64) {
++				sipsave = sip->ch.next;
++				delete_sockinfo(sip);
++				sip = sipsave;
++				continue;
++			}
++
++			/*
++			** check if a UDP port has not been used
++			** for some time --> destroy even if the thread
++			** (group) is still there
++			** e.g. outgoing DNS requests (to remote port 53) are
++			** issued every time with another source port being
++			** a new object that should not be kept too long;
++			** local well-known ports are useful to keep
++			*/
++			if (sip->proto == IPPROTO_UDP &&
++			    sip->lastact + GCMAXUDP < jiffies_64 &&
++			    sip->key.udp > 1024)                {
++				sipsave = sip->ch.next;
++				delete_sockinfo(sip);
++				sip = sipsave;
++				continue;
++			}
++
++			sip = sip->ch.next;
++		}
++
++		spin_unlock_irqrestore(&shash[i].lock, sflags);
++	}
++}
++
++/*
++** remove taskinfo structures of finished tasks from hash list
++*/
++static void
++gctaskinfo()
++{
++	int		i;
++	struct taskinfo	*tip, *tipsave;
++	unsigned long	tflags;
++
++	/*
++	** go through all taskinfo hash buckets 
++	*/
++	for (i=0; i < TBUCKS; i++) {
++		if (thash[i].ch.next == (void *)&thash[i].ch) 
++			continue;	// quick return without lock
++
++		spin_lock_irqsave(&thash[i].lock, tflags);
++
++		tip = thash[i].ch.next;
++
++		/*
++		** check all taskinfo structs chained to this bucket
++		*/
++		while (tip != (void *)&thash[i].ch) {
++			switch (tip->state) {
++				/*
++				** remove INDELETE tasks from the hash buckets
++				** -- move thread group to exitlist
++				** -- destroy thread right away
++				*/
++			   case INDELETE:
++				tipsave = tip->ch.next;
++
++				if (tip->type == 'g') 
++					move_taskinfo(tip);	// thread group
++				else
++					delete_taskinfo(tip);	// thread
++
++				tip = tipsave;
++				break;
++
++			   case CHECKED:
++				tip->state = 0;
++				tip = tip->ch.next;
++				break;
++
++			   default:	// not checked yet
++				if (find_vpid(tip->id) == NULL) {
++					tipsave = tip->ch.next;
++
++					if (tip->type == 'g') 
++						move_taskinfo(tip);
++					else
++						delete_taskinfo(tip);
++
++					tip = tipsave;
++				} else {
++					tip = tip->ch.next;
++				}
++			}
++		}
++
++		spin_unlock_irqrestore(&thash[i].lock, tflags);
++	}
++}
++
++
++/*
++** remove all sockinfo structs
++*/
++static void
++wipesockinfo()
++{
++	struct sockinfo	*sip, *sipsave;
++	int 		i;
++	unsigned long	sflags;
++
++	for (i=0; i < SBUCKS; i++) {
++		spin_lock_irqsave(&shash[i].lock, sflags);
++
++		sip = shash[i].ch.next;
++
++		/*
++		** free all structs chained in one bucket
++		*/
++		while (sip != (void *)&shash[i].ch) {
++			sipsave = sip->ch.next;
++			delete_sockinfo(sip);
++			sip = sipsave;
++		}
++
++		spin_unlock_irqrestore(&shash[i].lock, sflags);
++	}
++}
++
++/*
++** remove all taskinfo structs from hash list
++*/
++static void
++wipetaskinfo()
++{
++	struct taskinfo	*tip, *tipsave;
++	int 		i;
++	unsigned long	tflags;
++
++	for (i=0; i < TBUCKS; i++) {
++		spin_lock_irqsave(&thash[i].lock, tflags);
++
++		tip = thash[i].ch.next;
++
++		/*
++		** free all structs chained in one bucket
++		*/
++		while (tip != (void *)&thash[i].ch) {
++			tipsave = tip->ch.next;
++			delete_taskinfo(tip);
++			tip = tipsave;
++		}
++
++		spin_unlock_irqrestore(&thash[i].lock, tflags);
++	}
++}
++
++/*
++** remove all taskinfo structs from exit list
++*/
++static void
++wipetaskexit()
++{
++	gctaskexit();
++}
++
++/*
++** move one taskinfo struct from hash bucket to exitlist
++*/
++static void
++move_taskinfo(struct taskinfo *tip)
++{
++	unsigned long 	flags;
++
++	/*
++	** remove from hash list
++	*/
++	((struct taskinfo *)tip->ch.next)->ch.prev = tip->ch.prev;
++	((struct taskinfo *)tip->ch.prev)->ch.next = tip->ch.next;
++
++	spin_lock_irqsave(&nrtlock, flags);
++	nrt--;
++	spin_unlock_irqrestore(&nrtlock, flags);
++
++	/*
++	** add to exitlist
++	*/
++	tip->ch.next 	= NULL;
++	tip->state   	= FINISHED;
++	tip->exittime	= jiffies_64;
++
++	spin_lock_irqsave(&exitlock, flags);
++
++	if (exittail) {			// list filled?
++		exittail->ch.next = tip;
++		exittail          = tip;
++	} else {			// list empty
++		exithead = exittail = tip;
++	}
++
++	nre++;
++
++	wake_up_interruptible(&exitlist_filled);
++
++	spin_unlock_irqrestore(&exitlock, flags);
++}
++
++/*
++** remove one taskinfo struct for the hash bucket chain
++*/
++static void
++delete_taskinfo(struct taskinfo *tip)
++{
++	unsigned long	flags;
++
++	((struct taskinfo *)tip->ch.next)->ch.prev = tip->ch.prev;
++	((struct taskinfo *)tip->ch.prev)->ch.next = tip->ch.next;
++
++	kfree(tip);
++
++	spin_lock_irqsave(&nrtlock, flags);
++	nrt--;
++	spin_unlock_irqrestore(&nrtlock, flags);
++}
++
++/*
++** remove one sockinfo struct for the hash bucket chain
++*/
++static void
++delete_sockinfo(struct sockinfo *sip)
++{
++	unsigned long	flags;
++
++	((struct sockinfo *)sip->ch.next)->ch.prev = sip->ch.prev;
++	((struct sockinfo *)sip->ch.prev)->ch.next = sip->ch.next;
++
++	kfree(sip);
++
++	spin_lock_irqsave(&nrslock, flags);
++	nrs--;
++	spin_unlock_irqrestore(&nrslock, flags);
++}
++
++/*
++** read function for /proc/netatop
++*/
++static int
++netatop_read_proc(char *buf, char **start, off_t offset,
++             	   int count, int *eof, void *data)
++{
++	return sprintf(buf, "tcpsndpacks:  %9lu (unident: %9lu)\n"
++	                    "tcprcvpacks:  %9lu (unident: %9lu)\n"
++	                    "udpsndpacks:  %9lu (unident: %9lu)\n"
++	                    "udprcvpacks:  %9lu (unident: %9lu)\n\n"
++	                    "icmpsndpacks: %9lu\n"
++	                    "icmprcvpacks: %9lu\n\n"
++	                    "#sockinfo:    %9lu (overflow: %8lu)\n"
++	                    "#taskinfo:    %9lu (overflow: %8lu)\n"
++	                    "#taskexit:    %9lu\n",
++				tcpsndpacks,  unidenttcpsndpacks,
++				tcprcvpacks,  unidenttcprcvpacks,
++				udpsndpacks,  unidentudpsndpacks,
++				udprcvpacks,  unidentudprcvpacks,
++				icmpsndpacks, icmprcvpacks,
++ 				nrs,          nrs_ovf,
++				nrt,          nrt_ovf,
++				nre);
++}
++
++/*
++** called when user spce issues system call getsockopt()
++*/
++static int
++getsockopt(struct sock *sk, int cmd, void __user *user, int *len)
++{
++	int			bt;
++	struct taskinfo		*tip;
++	char			tasktype = 't';
++	struct netpertask	npt;
++	unsigned long		tflags;
++
++	/*
++	** verify the proper privileges
++	*/
++	if (!capable(CAP_NET_ADMIN))
++		return -EPERM;
++
++	/*
++	** react on command
++	*/
++	switch (cmd) {
++ 	   case NETATOP_PROBE:
++		break;
++
++ 	   case NETATOP_FORCE_GC:
++		garbage_collector();
++		break;
++
++ 	   case NETATOP_EMPTY_EXIT:
++		while (nre > 0) {
++			if (wait_event_interruptible(exitlist_empty, nre == 0))
++				return -ERESTARTSYS;
++		}
++		break;
++
++ 	   case NETATOP_GETCNT_EXIT:
++		if (nre == 0)
++			wake_up_interruptible(&exitlist_empty);
++
++		if (*len < sizeof(pid_t))
++			return -EINVAL;
++
++		if (*len > sizeof npt)
++			*len = sizeof npt;
++
++		spin_lock_irqsave(&exitlock, tflags);
++
++		/*
++		** check if an exited process is present
++		** if not, wait for it...
++		*/
++		while (nre == 0) {
++			spin_unlock_irqrestore(&exitlock, tflags);
++
++			if ( wait_event_interruptible(exitlist_filled, nre > 0))
++				return -ERESTARTSYS;
++
++			spin_lock_irqsave(&exitlock, tflags);
++		}
++
++		/*
++		** get first eprocess from exitlist and remove it from there
++		*/
++		tip = exithead;
++
++		if ( (exithead = tip->ch.next) == NULL)
++			exittail = NULL;
++
++		nre--;
++
++		spin_unlock_irqrestore(&exitlock, tflags);
++
++		/*
++		** pass relevant info to user mode
++		** and free taskinfo struct
++		*/
++		npt.id		= tip->id;
++		npt.tc		= tip->tc;
++		npt.btime	= tip->btime;
++		memcpy(npt.command, tip->command, COMLEN);
++
++		if (copy_to_user(user, &npt, *len) != 0)
++			return -EFAULT;
++
++		kfree(tip);
++
++		return 0;
++
++ 	   case NETATOP_GETCNT_TGID:
++		tasktype = 'g';
++
++ 	   case NETATOP_GETCNT_PID:
++		if (*len < sizeof(pid_t))
++			return -EINVAL;
++
++		if (*len > sizeof npt)
++			*len = sizeof npt;
++
++		if (copy_from_user(&npt, user, *len) != 0)
++			return -EFAULT;
++
++		/*
++		** search requested id in taskinfo hash
++		*/
++		bt = THASH(npt.id, tasktype);	// calculate hash
++
++		if (thash[bt].ch.next == (void *)&thash[bt].ch)
++			return -ESRCH;		// quick return without lock
++
++		spin_lock_irqsave(&thash[bt].lock, tflags);
++
++		tip = thash[bt].ch.next;
++
++		while (tip != (void *)&thash[bt].ch) {
++			// is this the one?
++			if (tip->id == npt.id && tip->type == tasktype) {
++				/*
++				** found: copy results to user space
++				*/
++				memcpy(npt.command, tip->command, COMLEN);
++				npt.tc    = tip->tc;
++				npt.btime = tip->btime;
++
++				spin_unlock_irqrestore(&thash[bt].lock, tflags);
++
++				if (copy_to_user(user, &npt, *len) != 0)
++					return -EFAULT;
++				else
++					return 0;
++			}
++
++			tip = tip->ch.next;
++		}
++
++		spin_unlock_irqrestore(&thash[bt].lock, tflags);
++		return -ESRCH;
++
++	   default:
++		printk(KERN_INFO "unknown getsockopt command %d\n", cmd);
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++/*
++** called when module loaded
++*/
++int
++init_module()
++{
++	int i;
++
++	/*
++	** initialize various admi
++	*/
++	for (i=0; i < TBUCKS; i++) {
++		thash[i].ch.next = &thash[i].ch;
++		thash[i].ch.prev = &thash[i].ch;
++		spin_lock_init(&thash[i].lock);
++	}
++
++	for (i=0; i < SBUCKS; i++) {
++		shash[i].ch.next = &shash[i].ch;
++		shash[i].ch.prev = &shash[i].ch;
++		spin_lock_init(&shash[i].lock);
++	}
++
++	getboottime(&boottime);
++
++	/*
++	** register getsockopt for user space communication
++	*/
++	if (nf_register_sockopt(&sockopts) < 0)
++		return -1;
++
++	/*
++	** prepare hooks and register
++	*/
++	hookin_ipv4.hooknum	= NF_IP_LOCAL_IN;	// input packs
++	hookin_ipv4.hook	= ipv4_hookin;		// func to call
++	hookin_ipv4.pf		= PF_INET;		// IPV4 packets
++	hookin_ipv4.priority	= NF_IP_PRI_FIRST;	// highest prio
++
++	hookout_ipv4.hooknum	= NF_IP_LOCAL_OUT;	// output packs
++	hookout_ipv4.hook	= ipv4_hookout;		// func to call
++	hookout_ipv4.pf		= PF_INET;		// IPV4 packets
++	hookout_ipv4.priority	= NF_IP_PRI_FIRST;	// highest prio
++
++	nf_register_hook(&hookin_ipv4);			// register hook
++	nf_register_hook(&hookout_ipv4);		// register hook
++
++	/*
++	** create a /proc-entry to produce status-info on request
++	*/
++	create_proc_read_entry("netatop", 0444, NULL, netatop_read_proc, NULL);
++
++	/*
++	** activate timer for periodic call of garbage collector
++	*/
++	init_timer(&timer);
++
++	timer.expires  = jiffies_64 + HZ;
++	timer.function = gcperiodic;
++	add_timer(&timer);
++
++	return 0;		// return success
++}
++
++/*
++** called when module unloaded
++*/
++void
++cleanup_module()
++{
++	nf_unregister_hook(&hookin_ipv4);
++	nf_unregister_hook(&hookout_ipv4);
++
++	remove_proc_entry("netatop", NULL);
++
++	del_timer(&timer);
++
++	nf_unregister_sockopt(&sockopts);
++
++	/*
++	** destroy allocated stats
++	*/
++	wipesockinfo();
++	wipetaskinfo();	
++	wipetaskexit();	
++}
+diff -uNr linux-3.2.33-go.orig/3rdparty/netatop/netatop.h 3rdparty/netatop/netatop.h
+--- linux-3.2.33-go.orig/3rdparty/netatop/netatop.h	1970-01-01 01:00:00.000000000 +0100
++++ 3rdparty/netatop/netatop.h	2012-11-12 18:08:29.000000000 +0100
+@@ -0,0 +1,47 @@
++#define	COMLEN	16
++
++struct taskcount {
++	unsigned long long	tcpsndpacks;
++	unsigned long long	tcpsndbytes;
++	unsigned long long	tcprcvpacks;
++	unsigned long long	tcprcvbytes;
++
++	unsigned long long	udpsndpacks;
++	unsigned long long	udpsndbytes;
++	unsigned long long	udprcvpacks;
++	unsigned long long	udprcvbytes;
++
++	/* space for future extensions */
++};
++
++struct netpertask {
++	pid_t			id;	// tgid or tid (depending on command)
++	unsigned long		btime;
++	char			command[COMLEN];
++
++	struct taskcount	tc;
++};
++
++
++/*
++** getsocktop commands
++*/
++#define NETATOP_BASE_CTL   	15661
++
++// just probe if the netatop module is active
++#define NETATOP_PROBE		(NETATOP_BASE_CTL)
++
++// force garbage collection to make finished processes available
++#define NETATOP_FORCE_GC	(NETATOP_BASE_CTL+1)
++
++// wait until all finished processes are read (blocks until done)
++#define NETATOP_EMPTY_EXIT	(NETATOP_BASE_CTL+2)
++
++// get info for finished process (blocks until available)
++#define NETATOP_GETCNT_EXIT	(NETATOP_BASE_CTL+3)
++
++// get counters for thread group (i.e. process):  input is 'id' (pid)
++#define NETATOP_GETCNT_TGID	(NETATOP_BASE_CTL+4)
++
++// get counters for thread:  input is 'id' (tid)
++#define NETATOP_GETCNT_PID 	(NETATOP_BASE_CTL+5)
+diff -uNr linux-3.2.33-go.orig/3rdparty/netatop/netatopversion.h 3rdparty/netatop/netatopversion.h
+--- linux-3.2.33-go.orig/3rdparty/netatop/netatopversion.h	1970-01-01 01:00:00.000000000 +0100
++++ 3rdparty/netatop/netatopversion.h	2012-11-12 18:08:29.000000000 +0100
+@@ -0,0 +1,2 @@
++#define	NETATOPVERSION	"0.1.1"
++#define	NETATOPDATE	"2012/11/12 18:08:23"
diff --git a/3.3.8/600-netfilter_layer7_2.22.patch b/3.3.8/600-netfilter_layer7_2.22.patch
new file mode 100644
index 0000000..f305559
--- /dev/null
+++ b/3.3.8/600-netfilter_layer7_2.22.patch
@@ -0,0 +1,2142 @@
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -1053,6 +1053,27 @@ config NETFILTER_XT_MATCH_STATE
+ 
+ 	  To compile it as a module, choose M here.  If unsure, say N.
+ 
++config NETFILTER_XT_MATCH_LAYER7
++	tristate '"layer7" match support'
++	depends on NETFILTER_XTABLES
++	depends on EXPERIMENTAL && (IP_NF_CONNTRACK || NF_CONNTRACK)
++       depends on NETFILTER_ADVANCED
++	help
++	  Say Y if you want to be able to classify connections (and their
++	  packets) based on regular expression matching of their application
++	  layer data.   This is one way to classify applications such as
++	  peer-to-peer filesharing systems that do not always use the same
++	  port.
++
++	  To compile it as a module, choose M here.  If unsure, say N.
++
++config NETFILTER_XT_MATCH_LAYER7_DEBUG
++        bool 'Layer 7 debugging output'
++        depends on NETFILTER_XT_MATCH_LAYER7
++        help
++          Say Y to get lots of debugging output.
++
++
+ config NETFILTER_XT_MATCH_STATISTIC
+ 	tristate '"statistic" match support'
+ 	depends on NETFILTER_ADVANCED
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -105,6 +105,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT)
+ obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
++obj-$(CONFIG_NETFILTER_XT_MATCH_LAYER7) += xt_layer7.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
+--- /dev/null
++++ b/net/netfilter/xt_layer7.c
+@@ -0,0 +1,666 @@
++/*
++  Kernel module to match application layer (OSI layer 7) data in connections.
++
++  http://l7-filter.sf.net
++
++  (C) 2003-2009 Matthew Strait and Ethan Sommer.
++
++  This program is free software; you can redistribute it and/or
++  modify it under the terms of the GNU General Public License
++  as published by the Free Software Foundation; either version
++  2 of the License, or (at your option) any later version.
++  http://www.gnu.org/licenses/gpl.txt
++
++  Based on ipt_string.c (C) 2000 Emmanuel Roger <winfield@freegates.be>,
++  xt_helper.c (C) 2002 Harald Welte and cls_layer7.c (C) 2003 Matthew Strait,
++  Ethan Sommer, Justin Levandoski.
++*/
++
++#include <linux/spinlock.h>
++#include <linux/version.h>
++#include <net/ip.h>
++#include <net/tcp.h>
++#include <linux/module.h>
++#include <linux/skbuff.h>
++#include <linux/netfilter.h>
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_core.h>
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
++#include <net/netfilter/nf_conntrack_extend.h>
++#include <net/netfilter/nf_conntrack_acct.h>
++#endif
++#include <linux/netfilter/x_tables.h>
++#include <linux/netfilter/xt_layer7.h>
++#include <linux/ctype.h>
++#include <linux/proc_fs.h>
++
++#include "regexp/regexp.c"
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Matthew Strait <quadong@users.sf.net>, Ethan Sommer <sommere@users.sf.net>");
++MODULE_DESCRIPTION("iptables application layer match module");
++MODULE_ALIAS("ipt_layer7");
++MODULE_VERSION("2.21");
++
++static int maxdatalen = 2048; // this is the default
++module_param(maxdatalen, int, 0444);
++MODULE_PARM_DESC(maxdatalen, "maximum bytes of data looked at by l7-filter");
++#ifdef CONFIG_NETFILTER_XT_MATCH_LAYER7_DEBUG
++	#define DPRINTK(format,args...) printk(format,##args)
++#else
++	#define DPRINTK(format,args...)
++#endif
++
++/* Number of packets whose data we look at.
++This can be modified through /proc/net/layer7_numpackets */
++static int num_packets = 10;
++
++static struct pattern_cache {
++	char * regex_string;
++	regexp * pattern;
++	struct pattern_cache * next;
++} * first_pattern_cache = NULL;
++
++DEFINE_SPINLOCK(l7_lock);
++
++static int total_acct_packets(struct nf_conn *ct)
++{
++#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 26)
++	BUG_ON(ct == NULL);
++	return (ct->counters[IP_CT_DIR_ORIGINAL].packets + ct->counters[IP_CT_DIR_REPLY].packets);
++#else
++	struct nf_conn_counter *acct;
++
++	BUG_ON(ct == NULL);
++	acct = nf_conn_acct_find(ct);
++	if (!acct)
++		return 0;
++	return (atomic64_read(&acct[IP_CT_DIR_ORIGINAL].packets) + atomic64_read(&acct[IP_CT_DIR_REPLY].packets));
++#endif
++}
++
++#ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG
++/* Converts an unfriendly string into a friendly one by
++replacing unprintables with periods and all whitespace with " ". */
++static char * friendly_print(unsigned char * s)
++{
++	char * f = kmalloc(strlen(s) + 1, GFP_ATOMIC);
++	int i;
++
++	if(!f) {
++		if (net_ratelimit())
++			printk(KERN_ERR "layer7: out of memory in "
++					"friendly_print, bailing.\n");
++		return NULL;
++	}
++
++	for(i = 0; i < strlen(s); i++){
++		if(isprint(s[i]) && s[i] < 128)	f[i] = s[i];
++		else if(isspace(s[i]))		f[i] = ' ';
++		else 				f[i] = '.';
++	}
++	f[i] = '\0';
++	return f;
++}
++
++static char dec2hex(int i)
++{
++	switch (i) {
++		case 0 ... 9:
++			return (i + '0');
++			break;
++		case 10 ... 15:
++			return (i - 10 + 'a');
++			break;
++		default:
++			if (net_ratelimit())
++				printk("layer7: Problem in dec2hex\n");
++			return '\0';
++	}
++}
++
++static char * hex_print(unsigned char * s)
++{
++	char * g = kmalloc(strlen(s)*3 + 1, GFP_ATOMIC);
++	int i;
++
++	if(!g) {
++	       if (net_ratelimit())
++			printk(KERN_ERR "layer7: out of memory in hex_print, "
++					"bailing.\n");
++	       return NULL;
++	}
++
++	for(i = 0; i < strlen(s); i++) {
++		g[i*3    ] = dec2hex(s[i]/16);
++		g[i*3 + 1] = dec2hex(s[i]%16);
++		g[i*3 + 2] = ' ';
++	}
++	g[i*3] = '\0';
++
++	return g;
++}
++#endif // DEBUG
++
++/* Use instead of regcomp.  As we expect to be seeing the same regexps over and
++over again, it make sense to cache the results. */
++static regexp * compile_and_cache(const char * regex_string, 
++                                  const char * protocol)
++{
++	struct pattern_cache * node               = first_pattern_cache;
++	struct pattern_cache * last_pattern_cache = first_pattern_cache;
++	struct pattern_cache * tmp;
++	unsigned int len;
++
++	while (node != NULL) {
++		if (!strcmp(node->regex_string, regex_string))
++		return node->pattern;
++
++		last_pattern_cache = node;/* points at the last non-NULL node */
++		node = node->next;
++	}
++
++	/* If we reach the end of the list, then we have not yet cached
++	   the pattern for this regex. Let's do that now.
++	   Be paranoid about running out of memory to avoid list corruption. */
++	tmp = kmalloc(sizeof(struct pattern_cache), GFP_ATOMIC);
++
++	if(!tmp) {
++		if (net_ratelimit())
++			printk(KERN_ERR "layer7: out of memory in "
++					"compile_and_cache, bailing.\n");
++		return NULL;
++	}
++
++	tmp->regex_string  = kmalloc(strlen(regex_string) + 1, GFP_ATOMIC);
++	tmp->pattern       = kmalloc(sizeof(struct regexp),    GFP_ATOMIC);
++	tmp->next = NULL;
++
++	if(!tmp->regex_string || !tmp->pattern) {
++		if (net_ratelimit())
++			printk(KERN_ERR "layer7: out of memory in "
++					"compile_and_cache, bailing.\n");
++		kfree(tmp->regex_string);
++		kfree(tmp->pattern);
++		kfree(tmp);
++		return NULL;
++	}
++
++	/* Ok.  The new node is all ready now. */
++	node = tmp;
++
++	if(first_pattern_cache == NULL) /* list is empty */
++		first_pattern_cache = node; /* make node the beginning */
++	else
++		last_pattern_cache->next = node; /* attach node to the end */
++
++	/* copy the string and compile the regex */
++	len = strlen(regex_string);
++	DPRINTK("About to compile this: \"%s\"\n", regex_string);
++	node->pattern = regcomp((char *)regex_string, &len);
++	if ( !node->pattern ) {
++		if (net_ratelimit())
++			printk(KERN_ERR "layer7: Error compiling regexp "
++					"\"%s\" (%s)\n", 
++					regex_string, protocol);
++		/* pattern is now cached as NULL, so we won't try again. */
++	}
++
++	strcpy(node->regex_string, regex_string);
++	return node->pattern;
++}
++
++static int can_handle(const struct sk_buff *skb)
++{
++	if(!ip_hdr(skb)) /* not IP */
++		return 0;
++	if(ip_hdr(skb)->protocol != IPPROTO_TCP &&
++	   ip_hdr(skb)->protocol != IPPROTO_UDP &&
++	   ip_hdr(skb)->protocol != IPPROTO_ICMP)
++		return 0;
++	return 1;
++}
++
++/* Returns offset the into the skb->data that the application data starts */
++static int app_data_offset(const struct sk_buff *skb)
++{
++	/* In case we are ported somewhere (ebtables?) where ip_hdr(skb)
++	isn't set, this can be gotten from 4*(skb->data[0] & 0x0f) as well. */
++	int ip_hl = 4*ip_hdr(skb)->ihl;
++
++	if( ip_hdr(skb)->protocol == IPPROTO_TCP ) {
++		/* 12 == offset into TCP header for the header length field.
++		Can't get this with skb->h.th->doff because the tcphdr
++		struct doesn't get set when routing (this is confirmed to be
++		true in Netfilter as well as QoS.) */
++		int tcp_hl = 4*(skb->data[ip_hl + 12] >> 4);
++
++		return ip_hl + tcp_hl;
++	} else if( ip_hdr(skb)->protocol == IPPROTO_UDP  ) {
++		return ip_hl + 8; /* UDP header is always 8 bytes */
++	} else if( ip_hdr(skb)->protocol == IPPROTO_ICMP ) {
++		return ip_hl + 8; /* ICMP header is 8 bytes */
++	} else {
++		if (net_ratelimit())
++			printk(KERN_ERR "layer7: tried to handle unknown "
++					"protocol!\n");
++		return ip_hl + 8; /* something reasonable */
++	}
++}
++
++/* handles whether there's a match when we aren't appending data anymore */
++static int match_no_append(struct nf_conn * conntrack, 
++                           struct nf_conn * master_conntrack, 
++                           enum ip_conntrack_info ctinfo,
++                           enum ip_conntrack_info master_ctinfo,
++                           const struct xt_layer7_info * info)
++{
++	/* If we're in here, throw the app data away */
++	if(master_conntrack->layer7.app_data != NULL) {
++
++	#ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG
++		if(!master_conntrack->layer7.app_proto) {
++			char * f = 
++			  friendly_print(master_conntrack->layer7.app_data);
++			char * g = 
++			  hex_print(master_conntrack->layer7.app_data);
++			DPRINTK("\nl7-filter gave up after %d bytes "
++				"(%d packets):\n%s\n",
++				strlen(f), total_acct_packets(master_conntrack), f);
++			kfree(f);
++			DPRINTK("In hex: %s\n", g);
++			kfree(g);
++		}
++	#endif
++
++		kfree(master_conntrack->layer7.app_data);
++		master_conntrack->layer7.app_data = NULL; /* don't free again */
++	}
++
++	if(master_conntrack->layer7.app_proto){
++		/* Here child connections set their .app_proto (for /proc) */
++		if(!conntrack->layer7.app_proto) {
++			conntrack->layer7.app_proto = 
++			  kmalloc(strlen(master_conntrack->layer7.app_proto)+1, 
++			    GFP_ATOMIC);
++			if(!conntrack->layer7.app_proto){
++				if (net_ratelimit())
++					printk(KERN_ERR "layer7: out of memory "
++							"in match_no_append, "
++							"bailing.\n");
++				return 1;
++			}
++			strcpy(conntrack->layer7.app_proto, 
++				master_conntrack->layer7.app_proto);
++		}
++
++		return (!strcmp(master_conntrack->layer7.app_proto, 
++				info->protocol));
++	}
++	else {
++		/* If not classified, set to "unknown" to distinguish from
++		connections that are still being tested. */
++		master_conntrack->layer7.app_proto = 
++			kmalloc(strlen("unknown")+1, GFP_ATOMIC);
++		if(!master_conntrack->layer7.app_proto){
++			if (net_ratelimit())
++				printk(KERN_ERR "layer7: out of memory in "
++						"match_no_append, bailing.\n");
++			return 1;
++		}
++		strcpy(master_conntrack->layer7.app_proto, "unknown");
++		return 0;
++	}
++}
++
++/* add the new app data to the conntrack.  Return number of bytes added. */
++static int add_data(struct nf_conn * master_conntrack,
++                    char * app_data, int appdatalen)
++{
++	int length = 0, i;
++	int oldlength = master_conntrack->layer7.app_data_len;
++
++	/* This is a fix for a race condition by Deti Fliegl. However, I'm not 
++	   clear on whether the race condition exists or whether this really 
++	   fixes it.  I might just be being dense... Anyway, if it's not really 
++	   a fix, all it does is waste a very small amount of time. */
++	if(!master_conntrack->layer7.app_data) return 0;
++
++	/* Strip nulls. Make everything lower case (our regex lib doesn't
++	do case insensitivity).  Add it to the end of the current data. */
++	for(i = 0; i < maxdatalen-oldlength-1 &&
++		   i < appdatalen; i++) {
++		if(app_data[i] != '\0') {
++			/* the kernel version of tolower mungs 'upper ascii' */
++			master_conntrack->layer7.app_data[length+oldlength] =
++				isascii(app_data[i])? 
++					tolower(app_data[i]) : app_data[i];
++			length++;
++		}
++	}
++
++	master_conntrack->layer7.app_data[length+oldlength] = '\0';
++	master_conntrack->layer7.app_data_len = length + oldlength;
++
++	return length;
++}
++
++/* taken from drivers/video/modedb.c */
++static int my_atoi(const char *s)
++{
++	int val = 0;
++
++	for (;; s++) {
++		switch (*s) {
++			case '0'...'9':
++			val = 10*val+(*s-'0');
++			break;
++		default:
++			return val;
++		}
++	}
++}
++
++/* write out num_packets to userland. */
++static int layer7_read_proc(char* page, char ** start, off_t off, int count,
++                            int* eof, void * data)
++{
++	if(num_packets > 99 && net_ratelimit())
++		printk(KERN_ERR "layer7: NOT REACHED. num_packets too big\n");
++
++	page[0] = num_packets/10 + '0';
++	page[1] = num_packets%10 + '0';
++	page[2] = '\n';
++	page[3] = '\0';
++
++	*eof=1;
++
++	return 3;
++}
++
++/* Read in num_packets from userland */
++static int layer7_write_proc(struct file* file, const char* buffer,
++                             unsigned long count, void *data)
++{
++	char * foo = kmalloc(count, GFP_ATOMIC);
++
++	if(!foo){
++		if (net_ratelimit())
++			printk(KERN_ERR "layer7: out of memory, bailing. "
++					"num_packets unchanged.\n");
++		return count;
++	}
++
++	if(copy_from_user(foo, buffer, count)) {
++		return -EFAULT;
++	}
++
++
++	num_packets = my_atoi(foo);
++	kfree (foo);
++
++	/* This has an arbitrary limit to make the math easier. I'm lazy.
++	But anyway, 99 is a LOT! If you want more, you're doing it wrong! */
++	if(num_packets > 99) {
++		printk(KERN_WARNING "layer7: num_packets can't be > 99.\n");
++		num_packets = 99;
++	} else if(num_packets < 1) {
++		printk(KERN_WARNING "layer7: num_packets can't be < 1.\n");
++		num_packets = 1;
++	}
++
++	return count;
++}
++
++static bool
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
++match(const struct sk_buff *skbin, const struct xt_match_param *par)
++#else
++match(const struct sk_buff *skbin,
++      const struct net_device *in,
++      const struct net_device *out,
++      const struct xt_match *match,
++      const void *matchinfo,
++      int offset,
++      unsigned int protoff,
++      bool *hotdrop)
++#endif
++{
++	/* sidestep const without getting a compiler warning... */
++	struct sk_buff * skb = (struct sk_buff *)skbin; 
++
++	const struct xt_layer7_info * info = 
++	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
++		par->matchinfo;
++	#else
++		matchinfo;
++	#endif
++
++	enum ip_conntrack_info master_ctinfo, ctinfo;
++	struct nf_conn *master_conntrack, *conntrack;
++	unsigned char * app_data;
++	unsigned int pattern_result, appdatalen;
++	regexp * comppattern;
++
++	/* Be paranoid/incompetent - lock the entire match function. */
++	spin_lock_bh(&l7_lock);
++
++	if(!can_handle(skb)){
++		DPRINTK("layer7: This is some protocol I can't handle.\n");
++		spin_unlock_bh(&l7_lock);
++		return info->invert;
++	}
++
++	/* Treat parent & all its children together as one connection, except
++	for the purpose of setting conntrack->layer7.app_proto in the actual
++	connection. This makes /proc/net/ip_conntrack more satisfying. */
++	if(!(conntrack = nf_ct_get(skb, &ctinfo)) ||
++	   !(master_conntrack=nf_ct_get(skb,&master_ctinfo))){
++		DPRINTK("layer7: couldn't get conntrack.\n");
++		spin_unlock_bh(&l7_lock);
++		return info->invert;
++	}
++
++	/* Try to get a master conntrack (and its master etc) for FTP, etc. */
++	while (master_ct(master_conntrack) != NULL)
++		master_conntrack = master_ct(master_conntrack);
++
++	/* if we've classified it or seen too many packets */
++	if(total_acct_packets(master_conntrack) > num_packets ||
++	   master_conntrack->layer7.app_proto) {
++
++		pattern_result = match_no_append(conntrack, master_conntrack, 
++						 ctinfo, master_ctinfo, info);
++
++		/* skb->cb[0] == seen. Don't do things twice if there are 
++		multiple l7 rules. I'm not sure that using cb for this purpose 
++		is correct, even though it says "put your private variables 
++		there". But it doesn't look like it is being used for anything
++		else in the skbs that make it here. */
++		skb->cb[0] = 1; /* marking it seen here's probably irrelevant */
++
++		spin_unlock_bh(&l7_lock);
++		return (pattern_result ^ info->invert);
++	}
++
++	if(skb_is_nonlinear(skb)){
++		if(skb_linearize(skb) != 0){
++			if (net_ratelimit())
++				printk(KERN_ERR "layer7: failed to linearize "
++						"packet, bailing.\n");
++			spin_unlock_bh(&l7_lock);
++			return info->invert;
++		}
++	}
++
++	/* now that the skb is linearized, it's safe to set these. */
++	app_data = skb->data + app_data_offset(skb);
++	appdatalen = skb_tail_pointer(skb) - app_data;
++
++	/* the return value gets checked later, when we're ready to use it */
++	comppattern = compile_and_cache(info->pattern, info->protocol);
++
++	/* On the first packet of a connection, allocate space for app data */
++	if(total_acct_packets(master_conntrack) == 1 && !skb->cb[0] && 
++	   !master_conntrack->layer7.app_data){
++		master_conntrack->layer7.app_data = 
++			kmalloc(maxdatalen, GFP_ATOMIC);
++		if(!master_conntrack->layer7.app_data){
++			if (net_ratelimit())
++				printk(KERN_ERR "layer7: out of memory in "
++						"match, bailing.\n");
++			spin_unlock_bh(&l7_lock);
++			return info->invert;
++		}
++
++		master_conntrack->layer7.app_data[0] = '\0';
++	}
++
++	/* Can be here, but unallocated, if numpackets is increased near
++	the beginning of a connection */
++	if(master_conntrack->layer7.app_data == NULL){
++		spin_unlock_bh(&l7_lock);
++		return info->invert; /* unmatched */
++	}
++
++	if(!skb->cb[0]){
++		int newbytes;
++		newbytes = add_data(master_conntrack, app_data, appdatalen);
++
++		if(newbytes == 0) { /* didn't add any data */
++			skb->cb[0] = 1;
++			/* Didn't match before, not going to match now */
++			spin_unlock_bh(&l7_lock);
++			return info->invert;
++		}
++	}
++
++	/* If looking for "unknown", then never match.  "Unknown" means that
++	we've given up; we're still trying with these packets. */
++	if(!strcmp(info->protocol, "unknown")) {
++		pattern_result = 0;
++	/* If looking for "unset", then always match. "Unset" means that we
++	haven't yet classified the connection. */
++	} else if(!strcmp(info->protocol, "unset")) {
++		pattern_result = 2;
++		DPRINTK("layer7: matched unset: not yet classified "
++			"(%d/%d packets)\n",
++                        total_acct_packets(master_conntrack), num_packets);
++	/* If the regexp failed to compile, don't bother running it */
++	} else if(comppattern && 
++		  regexec(comppattern, master_conntrack->layer7.app_data)){
++		DPRINTK("layer7: matched %s\n", info->protocol);
++		pattern_result = 1;
++	} else pattern_result = 0;
++
++	if(pattern_result == 1) {
++		master_conntrack->layer7.app_proto = 
++			kmalloc(strlen(info->protocol)+1, GFP_ATOMIC);
++		if(!master_conntrack->layer7.app_proto){
++			if (net_ratelimit())
++				printk(KERN_ERR "layer7: out of memory in "
++						"match, bailing.\n");
++			spin_unlock_bh(&l7_lock);
++			return (pattern_result ^ info->invert);
++		}
++		strcpy(master_conntrack->layer7.app_proto, info->protocol);
++	} else if(pattern_result > 1) { /* cleanup from "unset" */
++		pattern_result = 1;
++	}
++
++	/* mark the packet seen */
++	skb->cb[0] = 1;
++
++	spin_unlock_bh(&l7_lock);
++	return (pattern_result ^ info->invert);
++}
++
++// load nf_conntrack_ipv4
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
++static bool check(const struct xt_mtchk_param *par)
++{
++        if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
++                printk(KERN_WARNING "can't load conntrack support for "
++                                    "proto=%d\n", par->match->family);
++#else
++static bool check(const char *tablename, const void *inf,
++		 const struct xt_match *match, void *matchinfo,
++		 unsigned int hook_mask)
++{
++        if (nf_ct_l3proto_try_module_get(match->family) < 0) {
++                printk(KERN_WARNING "can't load conntrack support for "
++                                    "proto=%d\n", match->family);
++#endif
++                return 0;
++        }
++	return 1;
++}
++
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
++	static void destroy(const struct xt_mtdtor_param *par)
++	{
++		nf_ct_l3proto_module_put(par->match->family);
++	}
++#else
++	static void destroy(const struct xt_match *match, void *matchinfo)
++	{
++		nf_ct_l3proto_module_put(match->family);
++	}
++#endif
++
++static struct xt_match xt_layer7_match[] __read_mostly = {
++{
++	.name		= "layer7",
++	.family		= AF_INET,
++	.checkentry	= check,
++	.match		= match,
++	.destroy	= destroy,
++	.matchsize	= sizeof(struct xt_layer7_info),
++	.me		= THIS_MODULE
++}
++};
++
++static void layer7_cleanup_proc(void)
++{
++	remove_proc_entry("layer7_numpackets", init_net.proc_net);
++}
++
++/* register the proc file */
++static void layer7_init_proc(void)
++{
++	struct proc_dir_entry* entry;
++	entry = create_proc_entry("layer7_numpackets", 0644, init_net.proc_net);
++	entry->read_proc = layer7_read_proc;
++	entry->write_proc = layer7_write_proc;
++}
++
++static int __init xt_layer7_init(void)
++{
++	need_conntrack();
++
++	layer7_init_proc();
++	if(maxdatalen < 1) {
++		printk(KERN_WARNING "layer7: maxdatalen can't be < 1, "
++			"using 1\n");
++		maxdatalen = 1;
++	}
++	/* This is not a hard limit.  It's just here to prevent people from
++	bringing their slow machines to a grinding halt. */
++	else if(maxdatalen > 65536) {
++		printk(KERN_WARNING "layer7: maxdatalen can't be > 65536, "
++			"using 65536\n");
++		maxdatalen = 65536;
++	}
++	return xt_register_matches(xt_layer7_match,
++				   ARRAY_SIZE(xt_layer7_match));
++}
++
++static void __exit xt_layer7_fini(void)
++{
++	layer7_cleanup_proc();
++	xt_unregister_matches(xt_layer7_match, ARRAY_SIZE(xt_layer7_match));
++}
++
++module_init(xt_layer7_init);
++module_exit(xt_layer7_fini);
+--- /dev/null
++++ b/net/netfilter/regexp/regexp.c
+@@ -0,0 +1,1197 @@
++/*
++ * regcomp and regexec -- regsub and regerror are elsewhere
++ * @(#)regexp.c	1.3 of 18 April 87
++ *
++ *	Copyright (c) 1986 by University of Toronto.
++ *	Written by Henry Spencer.  Not derived from licensed software.
++ *
++ *	Permission is granted to anyone to use this software for any
++ *	purpose on any computer system, and to redistribute it freely,
++ *	subject to the following restrictions:
++ *
++ *	1. The author is not responsible for the consequences of use of
++ *		this software, no matter how awful, even if they arise
++ *		from defects in it.
++ *
++ *	2. The origin of this software must not be misrepresented, either
++ *		by explicit claim or by omission.
++ *
++ *	3. Altered versions must be plainly marked as such, and must not
++ *		be misrepresented as being the original software.
++ *
++ * Beware that some of this code is subtly aware of the way operator
++ * precedence is structured in regular expressions.  Serious changes in
++ * regular-expression syntax might require a total rethink.
++ *
++ * This code was modified by Ethan Sommer to work within the kernel
++ * (it now uses kmalloc etc..)
++ *
++ * Modified slightly by Matthew Strait to use more modern C.
++ */
++
++#include "regexp.h"
++#include "regmagic.h"
++
++/* added by ethan and matt.  Lets it work in both kernel and user space.
++(So iptables can use it, for instance.)  Yea, it goes both ways... */
++#if __KERNEL__
++  #define malloc(foo) kmalloc(foo,GFP_ATOMIC)
++#else
++  #define printk(format,args...) printf(format,##args)
++#endif
++
++void regerror(char * s)
++{
++        printk("<3>Regexp: %s\n", s);
++        /* NOTREACHED */
++}
++
++/*
++ * The "internal use only" fields in regexp.h are present to pass info from
++ * compile to execute that permits the execute phase to run lots faster on
++ * simple cases.  They are:
++ *
++ * regstart	char that must begin a match; '\0' if none obvious
++ * reganch	is the match anchored (at beginning-of-line only)?
++ * regmust	string (pointer into program) that match must include, or NULL
++ * regmlen	length of regmust string
++ *
++ * Regstart and reganch permit very fast decisions on suitable starting points
++ * for a match, cutting down the work a lot.  Regmust permits fast rejection
++ * of lines that cannot possibly match.  The regmust tests are costly enough
++ * that regcomp() supplies a regmust only if the r.e. contains something
++ * potentially expensive (at present, the only such thing detected is * or +
++ * at the start of the r.e., which can involve a lot of backup).  Regmlen is
++ * supplied because the test in regexec() needs it and regcomp() is computing
++ * it anyway.
++ */
++
++/*
++ * Structure for regexp "program".  This is essentially a linear encoding
++ * of a nondeterministic finite-state machine (aka syntax charts or
++ * "railroad normal form" in parsing technology).  Each node is an opcode
++ * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
++ * all nodes except BRANCH implement concatenation; a "next" pointer with
++ * a BRANCH on both ends of it is connecting two alternatives.  (Here we
++ * have one of the subtle syntax dependencies:  an individual BRANCH (as
++ * opposed to a collection of them) is never concatenated with anything
++ * because of operator precedence.)  The operand of some types of node is
++ * a literal string; for others, it is a node leading into a sub-FSM.  In
++ * particular, the operand of a BRANCH node is the first node of the branch.
++ * (NB this is *not* a tree structure:  the tail of the branch connects
++ * to the thing following the set of BRANCHes.)  The opcodes are:
++ */
++
++/* definition	number	opnd?	meaning */
++#define	END	0	/* no	End of program. */
++#define	BOL	1	/* no	Match "" at beginning of line. */
++#define	EOL	2	/* no	Match "" at end of line. */
++#define	ANY	3	/* no	Match any one character. */
++#define	ANYOF	4	/* str	Match any character in this string. */
++#define	ANYBUT	5	/* str	Match any character not in this string. */
++#define	BRANCH	6	/* node	Match this alternative, or the next... */
++#define	BACK	7	/* no	Match "", "next" ptr points backward. */
++#define	EXACTLY	8	/* str	Match this string. */
++#define	NOTHING	9	/* no	Match empty string. */
++#define	STAR	10	/* node	Match this (simple) thing 0 or more times. */
++#define	PLUS	11	/* node	Match this (simple) thing 1 or more times. */
++#define	OPEN	20	/* no	Mark this point in input as start of #n. */
++			/*	OPEN+1 is number 1, etc. */
++#define	CLOSE	30	/* no	Analogous to OPEN. */
++
++/*
++ * Opcode notes:
++ *
++ * BRANCH	The set of branches constituting a single choice are hooked
++ *		together with their "next" pointers, since precedence prevents
++ *		anything being concatenated to any individual branch.  The
++ *		"next" pointer of the last BRANCH in a choice points to the
++ *		thing following the whole choice.  This is also where the
++ *		final "next" pointer of each individual branch points; each
++ *		branch starts with the operand node of a BRANCH node.
++ *
++ * BACK		Normal "next" pointers all implicitly point forward; BACK
++ *		exists to make loop structures possible.
++ *
++ * STAR,PLUS	'?', and complex '*' and '+', are implemented as circular
++ *		BRANCH structures using BACK.  Simple cases (one character
++ *		per match) are implemented with STAR and PLUS for speed
++ *		and to minimize recursive plunges.
++ *
++ * OPEN,CLOSE	...are numbered at compile time.
++ */
++
++/*
++ * A node is one char of opcode followed by two chars of "next" pointer.
++ * "Next" pointers are stored as two 8-bit pieces, high order first.  The
++ * value is a positive offset from the opcode of the node containing it.
++ * An operand, if any, simply follows the node.  (Note that much of the
++ * code generation knows about this implicit relationship.)
++ *
++ * Using two bytes for the "next" pointer is vast overkill for most things,
++ * but allows patterns to get big without disasters.
++ */
++#define	OP(p)	(*(p))
++#define	NEXT(p)	(((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
++#define	OPERAND(p)	((p) + 3)
++
++/*
++ * See regmagic.h for one further detail of program structure.
++ */
++
++
++/*
++ * Utility definitions.
++ */
++#ifndef CHARBITS
++#define	UCHARAT(p)	((int)*(unsigned char *)(p))
++#else
++#define	UCHARAT(p)	((int)*(p)&CHARBITS)
++#endif
++
++#define	FAIL(m)	{ regerror(m); return(NULL); }
++#define	ISMULT(c)	((c) == '*' || (c) == '+' || (c) == '?')
++#define	META	"^$.[()|?+*\\"
++
++/*
++ * Flags to be passed up and down.
++ */
++#define	HASWIDTH	01	/* Known never to match null string. */
++#define	SIMPLE		02	/* Simple enough to be STAR/PLUS operand. */
++#define	SPSTART		04	/* Starts with * or +. */
++#define	WORST		0	/* Worst case. */
++
++/*
++ * Global work variables for regcomp().
++ */
++struct match_globals {
++char *reginput;		/* String-input pointer. */
++char *regbol;		/* Beginning of input, for ^ check. */
++char **regstartp;	/* Pointer to startp array. */
++char **regendp;		/* Ditto for endp. */
++char *regparse;		/* Input-scan pointer. */
++int regnpar;		/* () count. */
++char regdummy;
++char *regcode;		/* Code-emit pointer; &regdummy = don't. */
++long regsize;		/* Code size. */
++};
++
++/*
++ * Forward declarations for regcomp()'s friends.
++ */
++#ifndef STATIC
++#define	STATIC	static
++#endif
++STATIC char *reg(struct match_globals *g, int paren,int *flagp);
++STATIC char *regbranch(struct match_globals *g, int *flagp);
++STATIC char *regpiece(struct match_globals *g, int *flagp);
++STATIC char *regatom(struct match_globals *g, int *flagp);
++STATIC char *regnode(struct match_globals *g, char op);
++STATIC char *regnext(struct match_globals *g, char *p);
++STATIC void regc(struct match_globals *g, char b);
++STATIC void reginsert(struct match_globals *g, char op, char *opnd);
++STATIC void regtail(struct match_globals *g, char *p, char *val);
++STATIC void regoptail(struct match_globals *g, char *p, char *val);
++
++
++__kernel_size_t my_strcspn(const char *s1,const char *s2)
++{
++        char *scan1;
++        char *scan2;
++        int count;
++
++        count = 0;
++        for (scan1 = (char *)s1; *scan1 != '\0'; scan1++) {
++                for (scan2 = (char *)s2; *scan2 != '\0';)       /* ++ moved down. */
++                        if (*scan1 == *scan2++)
++                                return(count);
++                count++;
++        }
++        return(count);
++}
++
++/*
++ - regcomp - compile a regular expression into internal code
++ *
++ * We can't allocate space until we know how big the compiled form will be,
++ * but we can't compile it (and thus know how big it is) until we've got a
++ * place to put the code.  So we cheat:  we compile it twice, once with code
++ * generation turned off and size counting turned on, and once "for real".
++ * This also means that we don't allocate space until we are sure that the
++ * thing really will compile successfully, and we never have to move the
++ * code and thus invalidate pointers into it.  (Note that it has to be in
++ * one piece because free() must be able to free it all.)
++ *
++ * Beware that the optimization-preparation code in here knows about some
++ * of the structure of the compiled regexp.
++ */
++regexp *
++regcomp(char *exp,int *patternsize)
++{
++	register regexp *r;
++	register char *scan;
++	register char *longest;
++	register int len;
++	int flags;
++	struct match_globals g;
++	
++	/* commented out by ethan
++	   extern char *malloc();
++	*/
++
++	if (exp == NULL)
++		FAIL("NULL argument");
++
++	/* First pass: determine size, legality. */
++	g.regparse = exp;
++	g.regnpar = 1;
++	g.regsize = 0L;
++	g.regcode = &g.regdummy;
++	regc(&g, MAGIC);
++	if (reg(&g, 0, &flags) == NULL)
++		return(NULL);
++
++	/* Small enough for pointer-storage convention? */
++	if (g.regsize >= 32767L)		/* Probably could be 65535L. */
++		FAIL("regexp too big");
++
++	/* Allocate space. */
++	*patternsize=sizeof(regexp) + (unsigned)g.regsize;
++	r = (regexp *)malloc(sizeof(regexp) + (unsigned)g.regsize);
++	if (r == NULL)
++		FAIL("out of space");
++
++	/* Second pass: emit code. */
++	g.regparse = exp;
++	g.regnpar = 1;
++	g.regcode = r->program;
++	regc(&g, MAGIC);
++	if (reg(&g, 0, &flags) == NULL)
++		return(NULL);
++
++	/* Dig out information for optimizations. */
++	r->regstart = '\0';	/* Worst-case defaults. */
++	r->reganch = 0;
++	r->regmust = NULL;
++	r->regmlen = 0;
++	scan = r->program+1;			/* First BRANCH. */
++	if (OP(regnext(&g, scan)) == END) {		/* Only one top-level choice. */
++		scan = OPERAND(scan);
++
++		/* Starting-point info. */
++		if (OP(scan) == EXACTLY)
++			r->regstart = *OPERAND(scan);
++		else if (OP(scan) == BOL)
++			r->reganch++;
++
++		/*
++		 * If there's something expensive in the r.e., find the
++		 * longest literal string that must appear and make it the
++		 * regmust.  Resolve ties in favor of later strings, since
++		 * the regstart check works with the beginning of the r.e.
++		 * and avoiding duplication strengthens checking.  Not a
++		 * strong reason, but sufficient in the absence of others.
++		 */
++		if (flags&SPSTART) {
++			longest = NULL;
++			len = 0;
++			for (; scan != NULL; scan = regnext(&g, scan))
++				if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
++					longest = OPERAND(scan);
++					len = strlen(OPERAND(scan));
++				}
++			r->regmust = longest;
++			r->regmlen = len;
++		}
++	}
++
++	return(r);
++}
++
++/*
++ - reg - regular expression, i.e. main body or parenthesized thing
++ *
++ * Caller must absorb opening parenthesis.
++ *
++ * Combining parenthesis handling with the base level of regular expression
++ * is a trifle forced, but the need to tie the tails of the branches to what
++ * follows makes it hard to avoid.
++ */
++static char *
++reg(struct match_globals *g, int paren, int *flagp /* Parenthesized? */ )
++{
++	register char *ret;
++	register char *br;
++	register char *ender;
++	register int parno = 0; /* 0 makes gcc happy */
++	int flags;
++
++	*flagp = HASWIDTH;	/* Tentatively. */
++
++	/* Make an OPEN node, if parenthesized. */
++	if (paren) {
++		if (g->regnpar >= NSUBEXP)
++			FAIL("too many ()");
++		parno = g->regnpar;
++		g->regnpar++;
++		ret = regnode(g, OPEN+parno);
++	} else
++		ret = NULL;
++
++	/* Pick up the branches, linking them together. */
++	br = regbranch(g, &flags);
++	if (br == NULL)
++		return(NULL);
++	if (ret != NULL)
++		regtail(g, ret, br);	/* OPEN -> first. */
++	else
++		ret = br;
++	if (!(flags&HASWIDTH))
++		*flagp &= ~HASWIDTH;
++	*flagp |= flags&SPSTART;
++	while (*g->regparse == '|') {
++		g->regparse++;
++		br = regbranch(g, &flags);
++		if (br == NULL)
++			return(NULL);
++		regtail(g, ret, br);	/* BRANCH -> BRANCH. */
++		if (!(flags&HASWIDTH))
++			*flagp &= ~HASWIDTH;
++		*flagp |= flags&SPSTART;
++	}
++
++	/* Make a closing node, and hook it on the end. */
++	ender = regnode(g, (paren) ? CLOSE+parno : END);	
++	regtail(g, ret, ender);
++
++	/* Hook the tails of the branches to the closing node. */
++	for (br = ret; br != NULL; br = regnext(g, br))
++		regoptail(g, br, ender);
++
++	/* Check for proper termination. */
++	if (paren && *g->regparse++ != ')') {
++		FAIL("unmatched ()");
++	} else if (!paren && *g->regparse != '\0') {
++		if (*g->regparse == ')') {
++			FAIL("unmatched ()");
++		} else
++			FAIL("junk on end");	/* "Can't happen". */
++		/* NOTREACHED */
++	}
++
++	return(ret);
++}
++
++/*
++ - regbranch - one alternative of an | operator
++ *
++ * Implements the concatenation operator.
++ */
++static char *
++regbranch(struct match_globals *g, int *flagp)
++{
++	register char *ret;
++	register char *chain;
++	register char *latest;
++	int flags;
++
++	*flagp = WORST;		/* Tentatively. */
++
++	ret = regnode(g, BRANCH);
++	chain = NULL;
++	while (*g->regparse != '\0' && *g->regparse != '|' && *g->regparse != ')') {
++		latest = regpiece(g, &flags);
++		if (latest == NULL)
++			return(NULL);
++		*flagp |= flags&HASWIDTH;
++		if (chain == NULL)	/* First piece. */
++			*flagp |= flags&SPSTART;
++		else
++			regtail(g, chain, latest);
++		chain = latest;
++	}
++	if (chain == NULL)	/* Loop ran zero times. */
++		(void) regnode(g, NOTHING);
++
++	return(ret);
++}
++
++/*
++ - regpiece - something followed by possible [*+?]
++ *
++ * Note that the branching code sequences used for ? and the general cases
++ * of * and + are somewhat optimized:  they use the same NOTHING node as
++ * both the endmarker for their branch list and the body of the last branch.
++ * It might seem that this node could be dispensed with entirely, but the
++ * endmarker role is not redundant.
++ */
++static char *
++regpiece(struct match_globals *g, int *flagp)
++{
++	register char *ret;
++	register char op;
++	register char *next;
++	int flags;
++
++	ret = regatom(g, &flags);
++	if (ret == NULL)
++		return(NULL);
++
++	op = *g->regparse;
++	if (!ISMULT(op)) {
++		*flagp = flags;
++		return(ret);
++	}
++
++	if (!(flags&HASWIDTH) && op != '?')
++		FAIL("*+ operand could be empty");
++	*flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
++
++	if (op == '*' && (flags&SIMPLE))
++		reginsert(g, STAR, ret);
++	else if (op == '*') {
++		/* Emit x* as (x&|), where & means "self". */
++		reginsert(g, BRANCH, ret);			/* Either x */
++		regoptail(g, ret, regnode(g, BACK));		/* and loop */
++		regoptail(g, ret, ret);			/* back */
++		regtail(g, ret, regnode(g, BRANCH));		/* or */
++		regtail(g, ret, regnode(g, NOTHING));		/* null. */
++	} else if (op == '+' && (flags&SIMPLE))
++		reginsert(g, PLUS, ret);
++	else if (op == '+') {
++		/* Emit x+ as x(&|), where & means "self". */
++		next = regnode(g, BRANCH);			/* Either */
++		regtail(g, ret, next);
++		regtail(g, regnode(g, BACK), ret);		/* loop back */
++		regtail(g, next, regnode(g, BRANCH));		/* or */
++		regtail(g, ret, regnode(g, NOTHING));		/* null. */
++	} else if (op == '?') {
++		/* Emit x? as (x|) */
++		reginsert(g, BRANCH, ret);			/* Either x */
++		regtail(g, ret, regnode(g, BRANCH));		/* or */
++		next = regnode(g, NOTHING);		/* null. */
++		regtail(g, ret, next);
++		regoptail(g, ret, next);
++	}
++	g->regparse++;
++	if (ISMULT(*g->regparse))
++		FAIL("nested *?+");
++
++	return(ret);
++}
++
++/*
++ - regatom - the lowest level
++ *
++ * Optimization:  gobbles an entire sequence of ordinary characters so that
++ * it can turn them into a single node, which is smaller to store and
++ * faster to run.  Backslashed characters are exceptions, each becoming a
++ * separate node; the code is simpler that way and it's not worth fixing.
++ */
++static char *
++regatom(struct match_globals *g, int *flagp)
++{
++	register char *ret;
++	int flags;
++
++	*flagp = WORST;		/* Tentatively. */
++
++	switch (*g->regparse++) {
++	case '^':
++		ret = regnode(g, BOL);
++		break;
++	case '$':
++		ret = regnode(g, EOL);
++		break;
++	case '.':
++		ret = regnode(g, ANY);
++		*flagp |= HASWIDTH|SIMPLE;
++		break;
++	case '[': {
++			register int class;
++			register int classend;
++
++			if (*g->regparse == '^') {	/* Complement of range. */
++				ret = regnode(g, ANYBUT);
++				g->regparse++;
++			} else
++				ret = regnode(g, ANYOF);
++			if (*g->regparse == ']' || *g->regparse == '-')
++				regc(g, *g->regparse++);
++			while (*g->regparse != '\0' && *g->regparse != ']') {
++				if (*g->regparse == '-') {
++					g->regparse++;
++					if (*g->regparse == ']' || *g->regparse == '\0')
++						regc(g, '-');
++					else {
++						class = UCHARAT(g->regparse-2)+1;
++						classend = UCHARAT(g->regparse);
++						if (class > classend+1)
++							FAIL("invalid [] range");
++						for (; class <= classend; class++)
++							regc(g, class);
++						g->regparse++;
++					}
++				} else
++					regc(g, *g->regparse++);
++			}
++			regc(g, '\0');
++			if (*g->regparse != ']')
++				FAIL("unmatched []");
++			g->regparse++;
++			*flagp |= HASWIDTH|SIMPLE;
++		}
++		break;
++	case '(':
++		ret = reg(g, 1, &flags);
++		if (ret == NULL)
++			return(NULL);
++		*flagp |= flags&(HASWIDTH|SPSTART);
++		break;
++	case '\0':
++	case '|':
++	case ')':
++		FAIL("internal urp");	/* Supposed to be caught earlier. */
++		break;
++	case '?':
++	case '+':
++	case '*':
++		FAIL("?+* follows nothing");
++		break;
++	case '\\':
++		if (*g->regparse == '\0')
++			FAIL("trailing \\");
++		ret = regnode(g, EXACTLY);
++		regc(g, *g->regparse++);
++		regc(g, '\0');
++		*flagp |= HASWIDTH|SIMPLE;
++		break;
++	default: {
++			register int len;
++			register char ender;
++
++			g->regparse--;
++			len = my_strcspn((const char *)g->regparse, (const char *)META);
++			if (len <= 0)
++				FAIL("internal disaster");
++			ender = *(g->regparse+len);
++			if (len > 1 && ISMULT(ender))
++				len--;		/* Back off clear of ?+* operand. */
++			*flagp |= HASWIDTH;
++			if (len == 1)
++				*flagp |= SIMPLE;
++			ret = regnode(g, EXACTLY);
++			while (len > 0) {
++				regc(g, *g->regparse++);
++				len--;
++			}
++			regc(g, '\0');
++		}
++		break;
++	}
++
++	return(ret);
++}
++
++/*
++ - regnode - emit a node
++ */
++static char *			/* Location. */
++regnode(struct match_globals *g, char op)
++{
++	register char *ret;
++	register char *ptr;
++
++	ret = g->regcode;
++	if (ret == &g->regdummy) {
++		g->regsize += 3;
++		return(ret);
++	}
++
++	ptr = ret;
++	*ptr++ = op;
++	*ptr++ = '\0';		/* Null "next" pointer. */
++	*ptr++ = '\0';
++	g->regcode = ptr;
++
++	return(ret);
++}
++
++/*
++ - regc - emit (if appropriate) a byte of code
++ */
++static void
++regc(struct match_globals *g, char b)
++{
++	if (g->regcode != &g->regdummy)
++		*g->regcode++ = b;
++	else
++		g->regsize++;
++}
++
++/*
++ - reginsert - insert an operator in front of already-emitted operand
++ *
++ * Means relocating the operand.
++ */
++static void
++reginsert(struct match_globals *g, char op, char* opnd)
++{
++	register char *src;
++	register char *dst;
++	register char *place;
++
++	if (g->regcode == &g->regdummy) {
++		g->regsize += 3;
++		return;
++	}
++
++	src = g->regcode;
++	g->regcode += 3;
++	dst = g->regcode;
++	while (src > opnd)
++		*--dst = *--src;
++
++	place = opnd;		/* Op node, where operand used to be. */
++	*place++ = op;
++	*place++ = '\0';
++	*place++ = '\0';
++}
++
++/*
++ - regtail - set the next-pointer at the end of a node chain
++ */
++static void
++regtail(struct match_globals *g, char *p, char *val)
++{
++	register char *scan;
++	register char *temp;
++	register int offset;
++
++	if (p == &g->regdummy)
++		return;
++
++	/* Find last node. */
++	scan = p;
++	for (;;) {
++		temp = regnext(g, scan);
++		if (temp == NULL)
++			break;
++		scan = temp;
++	}
++
++	if (OP(scan) == BACK)
++		offset = scan - val;
++	else
++		offset = val - scan;
++	*(scan+1) = (offset>>8)&0377;
++	*(scan+2) = offset&0377;
++}
++
++/*
++ - regoptail - regtail on operand of first argument; nop if operandless
++ */
++static void
++regoptail(struct match_globals *g, char *p, char *val)
++{
++	/* "Operandless" and "op != BRANCH" are synonymous in practice. */
++	if (p == NULL || p == &g->regdummy || OP(p) != BRANCH)
++		return;
++	regtail(g, OPERAND(p), val);
++}
++
++/*
++ * regexec and friends
++ */
++
++
++/*
++ * Forwards.
++ */
++STATIC int regtry(struct match_globals *g, regexp *prog, char *string);
++STATIC int regmatch(struct match_globals *g, char *prog);
++STATIC int regrepeat(struct match_globals *g, char *p);
++
++#ifdef DEBUG
++int regnarrate = 0;
++void regdump();
++STATIC char *regprop(char *op);
++#endif
++
++/*
++ - regexec - match a regexp against a string
++ */
++int
++regexec(regexp *prog, char *string)
++{
++	register char *s;
++	struct match_globals g;
++
++	/* Be paranoid... */
++	if (prog == NULL || string == NULL) {
++		printk("<3>Regexp: NULL parameter\n");
++		return(0);
++	}
++
++	/* Check validity of program. */
++	if (UCHARAT(prog->program) != MAGIC) {
++		printk("<3>Regexp: corrupted program\n");
++		return(0);
++	}
++
++	/* If there is a "must appear" string, look for it. */
++	if (prog->regmust != NULL) {
++		s = string;
++		while ((s = strchr(s, prog->regmust[0])) != NULL) {
++			if (strncmp(s, prog->regmust, prog->regmlen) == 0)
++				break;	/* Found it. */
++			s++;
++		}
++		if (s == NULL)	/* Not present. */
++			return(0);
++	}
++
++	/* Mark beginning of line for ^ . */
++	g.regbol = string;
++
++	/* Simplest case:  anchored match need be tried only once. */
++	if (prog->reganch)
++		return(regtry(&g, prog, string));
++
++	/* Messy cases:  unanchored match. */
++	s = string;
++	if (prog->regstart != '\0')
++		/* We know what char it must start with. */
++		while ((s = strchr(s, prog->regstart)) != NULL) {
++			if (regtry(&g, prog, s))
++				return(1);
++			s++;
++		}
++	else
++		/* We don't -- general case. */
++		do {
++			if (regtry(&g, prog, s))
++				return(1);
++		} while (*s++ != '\0');
++
++	/* Failure. */
++	return(0);
++}
++
++/*
++ - regtry - try match at specific point
++ */
++static int			/* 0 failure, 1 success */
++regtry(struct match_globals *g, regexp *prog, char *string)
++{
++	register int i;
++	register char **sp;
++	register char **ep;
++
++	g->reginput = string;
++	g->regstartp = prog->startp;
++	g->regendp = prog->endp;
++
++	sp = prog->startp;
++	ep = prog->endp;
++	for (i = NSUBEXP; i > 0; i--) {
++		*sp++ = NULL;
++		*ep++ = NULL;
++	}
++	if (regmatch(g, prog->program + 1)) {
++		prog->startp[0] = string;
++		prog->endp[0] = g->reginput;
++		return(1);
++	} else
++		return(0);
++}
++
++/*
++ - regmatch - main matching routine
++ *
++ * Conceptually the strategy is simple:  check to see whether the current
++ * node matches, call self recursively to see whether the rest matches,
++ * and then act accordingly.  In practice we make some effort to avoid
++ * recursion, in particular by going through "ordinary" nodes (that don't
++ * need to know whether the rest of the match failed) by a loop instead of
++ * by recursion.
++ */
++static int			/* 0 failure, 1 success */
++regmatch(struct match_globals *g, char *prog)
++{
++	register char *scan = prog; /* Current node. */
++	char *next;		    /* Next node. */
++
++#ifdef DEBUG
++	if (scan != NULL && regnarrate)
++		fprintf(stderr, "%s(\n", regprop(scan));
++#endif
++	while (scan != NULL) {
++#ifdef DEBUG
++		if (regnarrate)
++			fprintf(stderr, "%s...\n", regprop(scan));
++#endif
++		next = regnext(g, scan);
++
++		switch (OP(scan)) {
++		case BOL:
++			if (g->reginput != g->regbol)
++				return(0);
++			break;
++		case EOL:
++			if (*g->reginput != '\0')
++				return(0);
++			break;
++		case ANY:
++			if (*g->reginput == '\0')
++				return(0);
++			g->reginput++;
++			break;
++		case EXACTLY: {
++				register int len;
++				register char *opnd;
++
++				opnd = OPERAND(scan);
++				/* Inline the first character, for speed. */
++				if (*opnd != *g->reginput)
++					return(0);
++				len = strlen(opnd);
++				if (len > 1 && strncmp(opnd, g->reginput, len) != 0)
++					return(0);
++				g->reginput += len;
++			}
++			break;
++		case ANYOF:
++			if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) == NULL)
++				return(0);
++			g->reginput++;
++			break;
++		case ANYBUT:
++			if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) != NULL)
++				return(0);
++			g->reginput++;
++			break;
++		case NOTHING:
++		case BACK:
++			break;
++		case OPEN+1:
++		case OPEN+2:
++		case OPEN+3:
++		case OPEN+4:
++		case OPEN+5:
++		case OPEN+6:
++		case OPEN+7:
++		case OPEN+8:
++		case OPEN+9: {
++				register int no;
++				register char *save;
++
++				no = OP(scan) - OPEN;
++				save = g->reginput;
++
++				if (regmatch(g, next)) {
++					/*
++					 * Don't set startp if some later
++					 * invocation of the same parentheses
++					 * already has.
++					 */
++					if (g->regstartp[no] == NULL)
++						g->regstartp[no] = save;
++					return(1);
++				} else
++					return(0);
++			}
++			break;
++		case CLOSE+1:
++		case CLOSE+2:
++		case CLOSE+3:
++		case CLOSE+4:
++		case CLOSE+5:
++		case CLOSE+6:
++		case CLOSE+7:
++		case CLOSE+8:
++		case CLOSE+9:
++			{
++				register int no;
++				register char *save;
++
++				no = OP(scan) - CLOSE;
++				save = g->reginput;
++
++				if (regmatch(g, next)) {
++					/*
++					 * Don't set endp if some later
++					 * invocation of the same parentheses
++					 * already has.
++					 */
++					if (g->regendp[no] == NULL)
++						g->regendp[no] = save;
++					return(1);
++				} else
++					return(0);
++			}
++			break;
++		case BRANCH: {
++				register char *save;
++
++				if (OP(next) != BRANCH)		/* No choice. */
++					next = OPERAND(scan);	/* Avoid recursion. */
++				else {
++					do {
++						save = g->reginput;
++						if (regmatch(g, OPERAND(scan)))
++							return(1);
++						g->reginput = save;
++						scan = regnext(g, scan);
++					} while (scan != NULL && OP(scan) == BRANCH);
++					return(0);
++					/* NOTREACHED */
++				}
++			}
++			break;
++		case STAR:
++		case PLUS: {
++				register char nextch;
++				register int no;
++				register char *save;
++				register int min;
++
++				/*
++				 * Lookahead to avoid useless match attempts
++				 * when we know what character comes next.
++				 */
++				nextch = '\0';
++				if (OP(next) == EXACTLY)
++					nextch = *OPERAND(next);
++				min = (OP(scan) == STAR) ? 0 : 1;
++				save = g->reginput;
++				no = regrepeat(g, OPERAND(scan));
++				while (no >= min) {
++					/* If it could work, try it. */
++					if (nextch == '\0' || *g->reginput == nextch)
++						if (regmatch(g, next))
++							return(1);
++					/* Couldn't or didn't -- back up. */
++					no--;
++					g->reginput = save + no;
++				}
++				return(0);
++			}
++			break;
++		case END:
++			return(1);	/* Success! */
++			break;
++		default:
++			printk("<3>Regexp: memory corruption\n");
++			return(0);
++			break;
++		}
++
++		scan = next;
++	}
++
++	/*
++	 * We get here only if there's trouble -- normally "case END" is
++	 * the terminating point.
++	 */
++	printk("<3>Regexp: corrupted pointers\n");
++	return(0);
++}
++
++/*
++ - regrepeat - repeatedly match something simple, report how many
++ */
++static int
++regrepeat(struct match_globals *g, char *p)
++{
++	register int count = 0;
++	register char *scan;
++	register char *opnd;
++
++	scan = g->reginput;
++	opnd = OPERAND(p);
++	switch (OP(p)) {
++	case ANY:
++		count = strlen(scan);
++		scan += count;
++		break;
++	case EXACTLY:
++		while (*opnd == *scan) {
++			count++;
++			scan++;
++		}
++		break;
++	case ANYOF:
++		while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
++			count++;
++			scan++;
++		}
++		break;
++	case ANYBUT:
++		while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
++			count++;
++			scan++;
++		}
++		break;
++	default:		/* Oh dear.  Called inappropriately. */
++		printk("<3>Regexp: internal foulup\n");
++		count = 0;	/* Best compromise. */
++		break;
++	}
++	g->reginput = scan;
++
++	return(count);
++}
++
++/*
++ - regnext - dig the "next" pointer out of a node
++ */
++static char*
++regnext(struct match_globals *g, char *p)
++{
++	register int offset;
++
++	if (p == &g->regdummy)
++		return(NULL);
++
++	offset = NEXT(p);
++	if (offset == 0)
++		return(NULL);
++
++	if (OP(p) == BACK)
++		return(p-offset);
++	else
++		return(p+offset);
++}
++
++#ifdef DEBUG
++
++STATIC char *regprop();
++
++/*
++ - regdump - dump a regexp onto stdout in vaguely comprehensible form
++ */
++void
++regdump(regexp *r)
++{
++	register char *s;
++	register char op = EXACTLY;	/* Arbitrary non-END op. */
++	register char *next;
++	/* extern char *strchr(); */
++
++
++	s = r->program + 1;
++	while (op != END) {	/* While that wasn't END last time... */
++		op = OP(s);
++		printf("%2d%s", s-r->program, regprop(s));	/* Where, what. */
++		next = regnext(s);
++		if (next == NULL)		/* Next ptr. */
++			printf("(0)");
++		else
++			printf("(%d)", (s-r->program)+(next-s));
++		s += 3;
++		if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
++			/* Literal string, where present. */
++			while (*s != '\0') {
++				putchar(*s);
++				s++;
++			}
++			s++;
++		}
++		putchar('\n');
++	}
++
++	/* Header fields of interest. */
++	if (r->regstart != '\0')
++		printf("start `%c' ", r->regstart);
++	if (r->reganch)
++		printf("anchored ");
++	if (r->regmust != NULL)
++		printf("must have \"%s\"", r->regmust);
++	printf("\n");
++}
++
++/*
++ - regprop - printable representation of opcode
++ */
++static char *
++regprop(char *op)
++{
++#define BUFLEN 50
++	register char *p;
++	static char buf[BUFLEN];
++
++	strcpy(buf, ":");
++
++	switch (OP(op)) {
++	case BOL:
++		p = "BOL";
++		break;
++	case EOL:
++		p = "EOL";
++		break;
++	case ANY:
++		p = "ANY";
++		break;
++	case ANYOF:
++		p = "ANYOF";
++		break;
++	case ANYBUT:
++		p = "ANYBUT";
++		break;
++	case BRANCH:
++		p = "BRANCH";
++		break;
++	case EXACTLY:
++		p = "EXACTLY";
++		break;
++	case NOTHING:
++		p = "NOTHING";
++		break;
++	case BACK:
++		p = "BACK";
++		break;
++	case END:
++		p = "END";
++		break;
++	case OPEN+1:
++	case OPEN+2:
++	case OPEN+3:
++	case OPEN+4:
++	case OPEN+5:
++	case OPEN+6:
++	case OPEN+7:
++	case OPEN+8:
++	case OPEN+9:
++		snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "OPEN%d", OP(op)-OPEN);
++		p = NULL;
++		break;
++	case CLOSE+1:
++	case CLOSE+2:
++	case CLOSE+3:
++	case CLOSE+4:
++	case CLOSE+5:
++	case CLOSE+6:
++	case CLOSE+7:
++	case CLOSE+8:
++	case CLOSE+9:
++		snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "CLOSE%d", OP(op)-CLOSE);
++		p = NULL;
++		break;
++	case STAR:
++		p = "STAR";
++		break;
++	case PLUS:
++		p = "PLUS";
++		break;
++	default:
++		printk("<3>Regexp: corrupted opcode\n");
++		break;
++	}
++	if (p != NULL)
++		strncat(buf, p, BUFLEN-strlen(buf));
++	return(buf);
++}
++#endif
++
++
+--- /dev/null
++++ b/net/netfilter/regexp/regexp.h
+@@ -0,0 +1,41 @@
++/*
++ * Definitions etc. for regexp(3) routines.
++ *
++ * Caveat:  this is V8 regexp(3) [actually, a reimplementation thereof],
++ * not the System V one.
++ */
++
++#ifndef REGEXP_H
++#define REGEXP_H
++
++
++/*
++http://www.opensource.apple.com/darwinsource/10.3/expect-1/expect/expect.h ,
++which contains a version of this library, says:
++
++ *
++ * NSUBEXP must be at least 10, and no greater than 117 or the parser
++ * will not work properly.
++ *
++
++However, it looks rather like this library is limited to 10.  If you think
++otherwise, let us know.
++*/
++
++#define NSUBEXP  10
++typedef struct regexp {
++	char *startp[NSUBEXP];
++	char *endp[NSUBEXP];
++	char regstart;		/* Internal use only. */
++	char reganch;		/* Internal use only. */
++	char *regmust;		/* Internal use only. */
++	int regmlen;		/* Internal use only. */
++	char program[1];	/* Unwarranted chumminess with compiler. */
++} regexp;
++
++regexp * regcomp(char *exp, int *patternsize);
++int regexec(regexp *prog, char *string);
++void regsub(regexp *prog, char *source, char *dest);
++void regerror(char *s);
++
++#endif
+--- /dev/null
++++ b/net/netfilter/regexp/regmagic.h
+@@ -0,0 +1,5 @@
++/*
++ * The first byte of the regexp internal "program" is actually this magic
++ * number; the start node begins in the second byte.
++ */
++#define	MAGIC	0234
+--- /dev/null
++++ b/net/netfilter/regexp/regsub.c
+@@ -0,0 +1,95 @@
++/*
++ * regsub
++ * @(#)regsub.c	1.3 of 2 April 86
++ *
++ *	Copyright (c) 1986 by University of Toronto.
++ *	Written by Henry Spencer.  Not derived from licensed software.
++ *
++ *	Permission is granted to anyone to use this software for any
++ *	purpose on any computer system, and to redistribute it freely,
++ *	subject to the following restrictions:
++ *
++ *	1. The author is not responsible for the consequences of use of
++ *		this software, no matter how awful, even if they arise
++ *		from defects in it.
++ *
++ *	2. The origin of this software must not be misrepresented, either
++ *		by explicit claim or by omission.
++ *
++ *	3. Altered versions must be plainly marked as such, and must not
++ *		be misrepresented as being the original software.
++ *
++ *
++ * This code was modified by Ethan Sommer to work within the kernel
++ * (it now uses kmalloc etc..)
++ *
++ */
++#include "regexp.h"
++#include "regmagic.h"
++#include <linux/string.h>
++
++
++#ifndef CHARBITS
++#define	UCHARAT(p)	((int)*(unsigned char *)(p))
++#else
++#define	UCHARAT(p)	((int)*(p)&CHARBITS)
++#endif
++
++#if 0
++//void regerror(char * s)
++//{
++//        printk("regexp(3): %s", s);
++//        /* NOTREACHED */
++//}
++#endif
++
++/*
++ - regsub - perform substitutions after a regexp match
++ */
++void
++regsub(regexp * prog, char * source, char * dest)
++{
++	register char *src;
++	register char *dst;
++	register char c;
++	register int no;
++	register int len;
++	
++	/* Not necessary and gcc doesn't like it -MLS */
++	/*extern char *strncpy();*/
++
++	if (prog == NULL || source == NULL || dest == NULL) {
++		regerror("NULL parm to regsub");
++		return;
++	}
++	if (UCHARAT(prog->program) != MAGIC) {
++		regerror("damaged regexp fed to regsub");
++		return;
++	}
++
++	src = source;
++	dst = dest;
++	while ((c = *src++) != '\0') {
++		if (c == '&')
++			no = 0;
++		else if (c == '\\' && '0' <= *src && *src <= '9')
++			no = *src++ - '0';
++		else
++			no = -1;
++
++		if (no < 0) {	/* Ordinary character. */
++			if (c == '\\' && (*src == '\\' || *src == '&'))
++				c = *src++;
++			*dst++ = c;
++		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
++			len = prog->endp[no] - prog->startp[no];
++			(void) strncpy(dst, prog->startp[no], len);
++			dst += len;
++			if (len != 0 && *(dst-1) == '\0') {	/* strncpy hit NUL. */
++				regerror("damaged match string");
++				return;
++			}
++		}
++	}
++	*dst++ = '\0';
++}
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -214,6 +214,14 @@ destroy_conntrack(struct nf_conntrack *n
+ 	 * too. */
+ 	nf_ct_remove_expectations(ct);
+ 
++	#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
++	if(ct->layer7.app_proto)
++		kfree(ct->layer7.app_proto);
++	if(ct->layer7.app_data)
++	kfree(ct->layer7.app_data);
++	#endif
++
++
+ 	/* We overload first tuple to link into unconfirmed list. */
+ 	if (!nf_ct_is_confirmed(ct)) {
+ 		BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
+--- a/net/netfilter/nf_conntrack_standalone.c
++++ b/net/netfilter/nf_conntrack_standalone.c
+@@ -239,6 +239,12 @@ static int ct_seq_show(struct seq_file *
+ 	if (ct_show_delta_time(s, ct))
+ 		goto release;
+ 
++#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
++	if(ct->layer7.app_proto &&
++           seq_printf(s, "l7proto=%s ", ct->layer7.app_proto))
++		return -ENOSPC;
++#endif
++
+ 	if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
+ 		goto release;
+ 
+--- a/include/net/netfilter/nf_conntrack.h
++++ b/include/net/netfilter/nf_conntrack.h
+@@ -134,6 +134,22 @@ struct nf_conn {
+ 	struct net *ct_net;
+ #endif
+ 
++#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || \
++    defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
++	struct {
++		/*
++		 * e.g. "http". NULL before decision. "unknown" after decision
++		 * if no match.
++		 */
++		char *app_proto;
++		/*
++		 * application layer data so far. NULL after match decision.
++		 */
++		char *app_data;
++		unsigned int app_data_len;
++	} layer7;
++#endif
++
+ 	/* Storage reserved for other modules, must be the last member */
+ 	union nf_conntrack_proto proto;
+ };
+--- /dev/null
++++ b/include/linux/netfilter/xt_layer7.h
+@@ -0,0 +1,13 @@
++#ifndef _XT_LAYER7_H
++#define _XT_LAYER7_H
++
++#define MAX_PATTERN_LEN 8192
++#define MAX_PROTOCOL_LEN 256
++
++struct xt_layer7_info {
++    char protocol[MAX_PROTOCOL_LEN];
++    char pattern[MAX_PATTERN_LEN];
++    u_int8_t invert;
++};
++
++#endif /* _XT_LAYER7_H */
+--- a/include/linux/netfilter/Kbuild
++++ b/include/linux/netfilter/Kbuild
+@@ -49,6 +49,7 @@ header-y += xt_hashlimit.h
+ header-y += xt_helper.h
+ header-y += xt_iprange.h
+ header-y += xt_ipvs.h
++header-y += xt_layer7.h
+ header-y += xt_length.h
+ header-y += xt_limit.h
+ header-y += xt_mac.h
diff --git a/3.3.8/601-netfilter_layer7_pktmatch.patch b/3.3.8/601-netfilter_layer7_pktmatch.patch
new file mode 100644
index 0000000..f65e301
--- /dev/null
+++ b/3.3.8/601-netfilter_layer7_pktmatch.patch
@@ -0,0 +1,108 @@
+--- a/include/linux/netfilter/xt_layer7.h
++++ b/include/linux/netfilter/xt_layer7.h
+@@ -8,6 +8,7 @@ struct xt_layer7_info {
+     char protocol[MAX_PROTOCOL_LEN];
+     char pattern[MAX_PATTERN_LEN];
+     u_int8_t invert;
++    u_int8_t pkt;
+ };
+ 
+ #endif /* _XT_LAYER7_H */
+--- a/net/netfilter/xt_layer7.c
++++ b/net/netfilter/xt_layer7.c
+@@ -314,33 +314,35 @@ static int match_no_append(struct nf_con
+ }
+ 
+ /* add the new app data to the conntrack.  Return number of bytes added. */
+-static int add_data(struct nf_conn * master_conntrack,
+-                    char * app_data, int appdatalen)
++static int add_datastr(char *target, int offset, char *app_data, int len)
+ {
+ 	int length = 0, i;
+-	int oldlength = master_conntrack->layer7.app_data_len;
+-
+-	/* This is a fix for a race condition by Deti Fliegl. However, I'm not 
+-	   clear on whether the race condition exists or whether this really 
+-	   fixes it.  I might just be being dense... Anyway, if it's not really 
+-	   a fix, all it does is waste a very small amount of time. */
+-	if(!master_conntrack->layer7.app_data) return 0;
++	if (!target) return 0;
+ 
+ 	/* Strip nulls. Make everything lower case (our regex lib doesn't
+ 	do case insensitivity).  Add it to the end of the current data. */
+-	for(i = 0; i < maxdatalen-oldlength-1 &&
+-		   i < appdatalen; i++) {
++ 	for(i = 0; i < maxdatalen-offset-1 && i < len; i++) {
+ 		if(app_data[i] != '\0') {
+ 			/* the kernel version of tolower mungs 'upper ascii' */
+-			master_conntrack->layer7.app_data[length+oldlength] =
++			target[length+offset] =
+ 				isascii(app_data[i])? 
+ 					tolower(app_data[i]) : app_data[i];
+ 			length++;
+ 		}
+ 	}
++	target[length+offset] = '\0';
++
++	return length;
++}
++
++/* add the new app data to the conntrack.  Return number of bytes added. */
++static int add_data(struct nf_conn * master_conntrack,
++                    char * app_data, int appdatalen)
++{
++	int length;
+ 
+-	master_conntrack->layer7.app_data[length+oldlength] = '\0';
+-	master_conntrack->layer7.app_data_len = length + oldlength;
++	length = add_datastr(master_conntrack->layer7.app_data, master_conntrack->layer7.app_data_len, app_data, appdatalen);
++	master_conntrack->layer7.app_data_len += length;
+ 
+ 	return length;
+ }
+@@ -438,7 +440,7 @@ match(const struct sk_buff *skbin,
+ 
+ 	enum ip_conntrack_info master_ctinfo, ctinfo;
+ 	struct nf_conn *master_conntrack, *conntrack;
+-	unsigned char * app_data;
++	unsigned char *app_data, *tmp_data;
+ 	unsigned int pattern_result, appdatalen;
+ 	regexp * comppattern;
+ 
+@@ -466,8 +468,8 @@ match(const struct sk_buff *skbin,
+ 		master_conntrack = master_ct(master_conntrack);
+ 
+ 	/* if we've classified it or seen too many packets */
+-	if(total_acct_packets(master_conntrack) > num_packets ||
+-	   master_conntrack->layer7.app_proto) {
++	if(!info->pkt && (total_acct_packets(master_conntrack) > num_packets ||
++	   master_conntrack->layer7.app_proto)) {
+ 
+ 		pattern_result = match_no_append(conntrack, master_conntrack, 
+ 						 ctinfo, master_ctinfo, info);
+@@ -500,6 +502,25 @@ match(const struct sk_buff *skbin,
+ 	/* the return value gets checked later, when we're ready to use it */
+ 	comppattern = compile_and_cache(info->pattern, info->protocol);
+ 
++	if (info->pkt) {
++		tmp_data = kmalloc(maxdatalen, GFP_ATOMIC);
++		if(!tmp_data){
++			if (net_ratelimit())
++				printk(KERN_ERR "layer7: out of memory in match, bailing.\n");
++			return info->invert;
++		}
++
++		tmp_data[0] = '\0';
++		add_datastr(tmp_data, 0, app_data, appdatalen);
++		pattern_result = ((comppattern && regexec(comppattern, tmp_data)) ? 1 : 0);
++
++		kfree(tmp_data);
++		tmp_data = NULL;
++		spin_unlock_bh(&l7_lock);
++
++		return (pattern_result ^ info->invert);
++	}
++
+ 	/* On the first packet of a connection, allocate space for app data */
+ 	if(total_acct_packets(master_conntrack) == 1 && !skb->cb[0] && 
+ 	   !master_conntrack->layer7.app_data){
diff --git a/3.3.8/602-netfilter_layer7_match.patch b/3.3.8/602-netfilter_layer7_match.patch
new file mode 100644
index 0000000..b2e48c8
--- /dev/null
+++ b/3.3.8/602-netfilter_layer7_match.patch
@@ -0,0 +1,51 @@
+--- a/net/netfilter/xt_layer7.c
++++ b/net/netfilter/xt_layer7.c
+@@ -415,7 +415,9 @@ static int layer7_write_proc(struct file
+ }
+ 
+ static bool
+-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
++match(const struct sk_buff *skbin, struct xt_action_param *par)
++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
+ match(const struct sk_buff *skbin, const struct xt_match_param *par)
+ #else
+ match(const struct sk_buff *skbin,
+@@ -597,14 +599,19 @@ match(const struct sk_buff *skbin,
+ }
+ 
+ // load nf_conntrack_ipv4
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
++static int
++#else
++static bool
++#endif
+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
+-static bool check(const struct xt_mtchk_param *par)
++check(const struct xt_mtchk_param *par)
+ {
+         if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
+                 printk(KERN_WARNING "can't load conntrack support for "
+                                     "proto=%d\n", par->match->family);
+ #else
+-static bool check(const char *tablename, const void *inf,
++check(const char *tablename, const void *inf,
+ 		 const struct xt_match *match, void *matchinfo,
+ 		 unsigned int hook_mask)
+ {
+@@ -612,9 +619,15 @@ static bool check(const char *tablename,
+                 printk(KERN_WARNING "can't load conntrack support for "
+                                     "proto=%d\n", match->family);
+ #endif
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
++		return -EINVAL;
++	}
++	return 0;
++#else
+                 return 0;
+         }
+ 	return 1;
++#endif
+ }
+ 
+ 
diff --git a/3.3.8/603-netfilter_layer7_2.6.36_fix.patch b/3.3.8/603-netfilter_layer7_2.6.36_fix.patch
new file mode 100644
index 0000000..92a7200
--- /dev/null
+++ b/3.3.8/603-netfilter_layer7_2.6.36_fix.patch
@@ -0,0 +1,61 @@
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -857,6 +857,27 @@ config NETFILTER_XT_MATCH_IPVS
+ 
+ 	  If unsure, say N.
+ 
++config NETFILTER_XT_MATCH_LAYER7
++	tristate '"layer7" match support'
++	depends on EXPERIMENTAL
++	depends on NETFILTER_XTABLES
++	depends on NETFILTER_ADVANCED
++	depends on NF_CONNTRACK
++	help
++	  Say Y if you want to be able to classify connections (and their
++	  packets) based on regular expression matching of their application
++	  layer data.   This is one way to classify applications such as
++	  peer-to-peer filesharing systems that do not always use the same
++	  port.
++
++	  To compile it as a module, choose M here.  If unsure, say N.
++
++config NETFILTER_XT_MATCH_LAYER7_DEBUG
++	bool 'Layer 7 debugging output'
++	depends on NETFILTER_XT_MATCH_LAYER7
++	help
++	  Say Y to get lots of debugging output.
++
+ config NETFILTER_XT_MATCH_LENGTH
+ 	tristate '"length" match support'
+ 	depends on NETFILTER_ADVANCED
+@@ -1053,26 +1074,11 @@ config NETFILTER_XT_MATCH_STATE
+ 
+ 	  To compile it as a module, choose M here.  If unsure, say N.
+ 
+-config NETFILTER_XT_MATCH_LAYER7
+-	tristate '"layer7" match support'
+-	depends on NETFILTER_XTABLES
+-	depends on EXPERIMENTAL && (IP_NF_CONNTRACK || NF_CONNTRACK)
+-       depends on NETFILTER_ADVANCED
+-	help
+-	  Say Y if you want to be able to classify connections (and their
+-	  packets) based on regular expression matching of their application
+-	  layer data.   This is one way to classify applications such as
+-	  peer-to-peer filesharing systems that do not always use the same
+-	  port.
+-
+-	  To compile it as a module, choose M here.  If unsure, say N.
+-
+ config NETFILTER_XT_MATCH_LAYER7_DEBUG
+-        bool 'Layer 7 debugging output'
+-        depends on NETFILTER_XT_MATCH_LAYER7
+-        help
+-          Say Y to get lots of debugging output.
+-
++	bool 'Layer 7 debugging output'
++	depends on NETFILTER_XT_MATCH_LAYER7
++	help
++	  Say Y to get lots of debugging output.
+ 
+ config NETFILTER_XT_MATCH_STATISTIC
+ 	tristate '"statistic" match support'
diff --git a/3.3.8/604-netfilter_cisco_794x_iphone.patch b/3.3.8/604-netfilter_cisco_794x_iphone.patch
new file mode 100644
index 0000000..662a499
--- /dev/null
+++ b/3.3.8/604-netfilter_cisco_794x_iphone.patch
@@ -0,0 +1,118 @@
+--- a/include/linux/netfilter/nf_conntrack_sip.h
++++ b/include/linux/netfilter/nf_conntrack_sip.h
+@@ -2,12 +2,15 @@
+ #define __NF_CONNTRACK_SIP_H__
+ #ifdef __KERNEL__
+ 
++#include <linux/types.h>
++
+ #define SIP_PORT	5060
+ #define SIP_TIMEOUT	3600
+ 
+ struct nf_ct_sip_master {
+ 	unsigned int	register_cseq;
+ 	unsigned int	invite_cseq;
++	__be16		forced_dport;
+ };
+ 
+ enum sip_expectation_classes {
+--- a/net/ipv4/netfilter/nf_nat_sip.c
++++ b/net/ipv4/netfilter/nf_nat_sip.c
+@@ -73,6 +73,7 @@ static int map_addr(struct sk_buff *skb,
+ 	enum ip_conntrack_info ctinfo;
+ 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
++	struct nf_conn_help *help = nfct_help(ct);
+ 	char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+ 	unsigned int buflen;
+ 	__be32 newaddr;
+@@ -85,7 +86,8 @@ static int map_addr(struct sk_buff *skb,
+ 	} else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip &&
+ 		   ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
+ 		newaddr = ct->tuplehash[!dir].tuple.src.u3.ip;
+-		newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
++		newport = help->help.ct_sip_info.forced_dport ? :
++			  ct->tuplehash[!dir].tuple.src.u.udp.port;
+ 	} else
+ 		return 1;
+ 
+@@ -121,6 +123,7 @@ static unsigned int ip_nat_sip(struct sk
+ 	enum ip_conntrack_info ctinfo;
+ 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
++	struct nf_conn_help *help = nfct_help(ct);
+ 	unsigned int coff, matchoff, matchlen;
+ 	enum sip_header_types hdr;
+ 	union nf_inet_addr addr;
+@@ -229,6 +232,20 @@ next:
+ 	    !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO))
+ 		return NF_DROP;
+ 
++	/* Mangle destination port for Cisco phones, then fix up checksums */
++	if (dir == IP_CT_DIR_REPLY && help->help.ct_sip_info.forced_dport) {
++		struct udphdr *uh;
++
++		if (!skb_make_writable(skb, skb->len))
++			return NF_DROP;
++
++		uh = (struct udphdr *)(skb->data + ip_hdrlen(skb));
++		uh->dest = help->help.ct_sip_info.forced_dport;
++
++		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, 0, 0, NULL, 0))
++			return NF_DROP;
++	}
++
+ 	return NF_ACCEPT;
+ }
+ 
+@@ -280,8 +297,10 @@ static unsigned int ip_nat_sip_expect(st
+ 	enum ip_conntrack_info ctinfo;
+ 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
++	struct nf_conn_help *help = nfct_help(ct);
+ 	__be32 newip;
+ 	u_int16_t port;
++	__be16 srcport;
+ 	char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+ 	unsigned buflen;
+ 
+@@ -294,8 +313,9 @@ static unsigned int ip_nat_sip_expect(st
+ 	/* If the signalling port matches the connection's source port in the
+ 	 * original direction, try to use the destination port in the opposite
+ 	 * direction. */
+-	if (exp->tuple.dst.u.udp.port ==
+-	    ct->tuplehash[dir].tuple.src.u.udp.port)
++	srcport = help->help.ct_sip_info.forced_dport ? :
++		  ct->tuplehash[dir].tuple.src.u.udp.port;
++	if (exp->tuple.dst.u.udp.port == srcport)
+ 		port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
+ 	else
+ 		port = ntohs(exp->tuple.dst.u.udp.port);
+--- a/net/netfilter/nf_conntrack_sip.c
++++ b/net/netfilter/nf_conntrack_sip.c
+@@ -1363,8 +1363,25 @@ static int process_sip_request(struct sk
+ {
+ 	enum ip_conntrack_info ctinfo;
+ 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
++	struct nf_conn_help *help = nfct_help(ct);
++	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ 	unsigned int matchoff, matchlen;
+ 	unsigned int cseq, i;
++	union nf_inet_addr addr;
++	__be16 port;
++
++	/* Many Cisco IP phones use a high source port for SIP requests, but
++	 * listen for the response on port 5060.  If we are the local
++	 * router for one of these phones, save the port number from the
++	 * Via: header so that nf_nat_sip can redirect the responses to
++	 * the correct port.
++	 */
++	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
++				    SIP_HDR_VIA_UDP, NULL, &matchoff,
++				    &matchlen, &addr, &port) > 0 &&
++	    port != ct->tuplehash[dir].tuple.src.u.udp.port &&
++	    nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3))
++		help->help.ct_sip_info.forced_dport = port;
+ 
+ 	for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
+ 		const struct sip_handler *handler;
diff --git a/3.3.8/610-netfilter_match_bypass_default_checks.patch b/3.3.8/610-netfilter_match_bypass_default_checks.patch
new file mode 100644
index 0000000..51c9e09
--- /dev/null
+++ b/3.3.8/610-netfilter_match_bypass_default_checks.patch
@@ -0,0 +1,93 @@
+--- a/include/linux/netfilter_ipv4/ip_tables.h
++++ b/include/linux/netfilter_ipv4/ip_tables.h
+@@ -93,6 +93,7 @@ struct ipt_ip {
+ #define IPT_F_FRAG		0x01	/* Set if rule is a fragment rule */
+ #define IPT_F_GOTO		0x02	/* Set if jump is a goto */
+ #define IPT_F_MASK		0x03	/* All possible flag bits mask. */
++#define IPT_F_NO_DEF_MATCH	0x80	/* Internal: no default match rules present */
+ 
+ /* Values for "inv" field in struct ipt_ip. */
+ #define IPT_INV_VIA_IN		0x01	/* Invert the sense of IN IFACE. */
+--- a/net/ipv4/netfilter/ip_tables.c
++++ b/net/ipv4/netfilter/ip_tables.c
+@@ -81,6 +81,9 @@ ip_packet_match(const struct iphdr *ip,
+ 
+ #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
+ 
++	if (ipinfo->flags & IPT_F_NO_DEF_MATCH)
++		return true;
++
+ 	if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
+ 		  IPT_INV_SRCIP) ||
+ 	    FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
+@@ -134,6 +137,29 @@ ip_packet_match(const struct iphdr *ip,
+ 	return true;
+ }
+ 
++static void
++ip_checkdefault(struct ipt_ip *ip)
++{
++	static const char iface_mask[IFNAMSIZ] = {};
++
++	if (ip->invflags || ip->flags & IPT_F_FRAG)
++		return;
++
++	if (memcmp(ip->iniface_mask, iface_mask, IFNAMSIZ) != 0)
++		return;
++
++	if (memcmp(ip->outiface_mask, iface_mask, IFNAMSIZ) != 0)
++		return;
++
++	if (ip->smsk.s_addr || ip->dmsk.s_addr)
++		return;
++
++	if (ip->proto)
++		return;
++
++	ip->flags |= IPT_F_NO_DEF_MATCH;
++}
++
+ static bool
+ ip_checkentry(const struct ipt_ip *ip)
+ {
+@@ -561,7 +587,7 @@ static void cleanup_match(struct xt_entr
+ }
+ 
+ static int
+-check_entry(const struct ipt_entry *e, const char *name)
++check_entry(struct ipt_entry *e, const char *name)
+ {
+ 	const struct xt_entry_target *t;
+ 
+@@ -570,6 +596,8 @@ check_entry(const struct ipt_entry *e, c
+ 		return -EINVAL;
+ 	}
+ 
++	ip_checkdefault(&e->ip);
++
+ 	if (e->target_offset + sizeof(struct xt_entry_target) >
+ 	    e->next_offset)
+ 		return -EINVAL;
+@@ -931,6 +959,7 @@ copy_entries_to_user(unsigned int total_
+ 	const struct xt_table_info *private = table->private;
+ 	int ret = 0;
+ 	const void *loc_cpu_entry;
++	u8 flags;
+ 
+ 	counters = alloc_counters(table);
+ 	if (IS_ERR(counters))
+@@ -961,6 +990,14 @@ copy_entries_to_user(unsigned int total_
+ 			ret = -EFAULT;
+ 			goto free_counters;
+ 		}
++
++		flags = e->ip.flags & IPT_F_MASK;
++		if (copy_to_user(userptr + off
++				 + offsetof(struct ipt_entry, ip.flags),
++				 &flags, sizeof(flags)) != 0) {
++			ret = -EFAULT;
++			goto free_counters;
++		}
+ 
+ 		for (i = sizeof(struct ipt_entry);
+ 		     i < e->target_offset;
diff --git a/3.3.8/611-netfilter_match_bypass_default_table.patch b/3.3.8/611-netfilter_match_bypass_default_table.patch
new file mode 100644
index 0000000..3cf0e5a
--- /dev/null
+++ b/3.3.8/611-netfilter_match_bypass_default_table.patch
@@ -0,0 +1,81 @@
+--- a/net/ipv4/netfilter/ip_tables.c
++++ b/net/ipv4/netfilter/ip_tables.c
+@@ -310,6 +310,33 @@ struct ipt_entry *ipt_next_entry(const s
+ 	return (void *)entry + entry->next_offset;
+ }
+ 
++static bool
++ipt_handle_default_rule(struct ipt_entry *e, unsigned int *verdict)
++{
++	struct xt_entry_target *t;
++	struct xt_standard_target *st;
++
++	if (e->target_offset != sizeof(struct ipt_entry))
++		return false;
++
++	if (!(e->ip.flags & IPT_F_NO_DEF_MATCH))
++		return false;
++
++	t = ipt_get_target(e);
++	if (t->u.kernel.target->target)
++		return false;
++
++	st = (struct xt_standard_target *) t;
++	if (st->verdict == XT_RETURN)
++		return false;
++
++	if (st->verdict >= 0)
++		return false;
++
++	*verdict = (unsigned)(-st->verdict) - 1;
++	return true;
++}
++
+ /* Returns one of the generic firewall policies, like NF_ACCEPT. */
+ unsigned int
+ ipt_do_table(struct sk_buff *skb,
+@@ -334,6 +361,25 @@ ipt_do_table(struct sk_buff *skb,
+ 	ip = ip_hdr(skb);
+ 	indev = in ? in->name : nulldevname;
+ 	outdev = out ? out->name : nulldevname;
++
++	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
++	local_bh_disable();
++	addend = xt_write_recseq_begin();
++	private = table->private;
++	cpu        = smp_processor_id();
++	table_base = private->entries[cpu];
++	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
++	stackptr   = per_cpu_ptr(private->stackptr, cpu);
++	origptr    = *stackptr;
++
++	e = get_entry(table_base, private->hook_entry[hook]);
++	if (ipt_handle_default_rule(e, &verdict)) {
++		ADD_COUNTER(e->counters, skb->len, 1);
++		xt_write_recseq_end(addend);
++		local_bh_enable();
++		return verdict;
++	}
++
+ 	/* We handle fragments by dealing with the first fragment as
+ 	 * if it was a normal packet.  All other fragments are treated
+ 	 * normally, except that they will NEVER match rules that ask
+@@ -348,18 +394,6 @@ ipt_do_table(struct sk_buff *skb,
+ 	acpar.family  = NFPROTO_IPV4;
+ 	acpar.hooknum = hook;
+ 
+-	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+-	local_bh_disable();
+-	addend = xt_write_recseq_begin();
+-	private = table->private;
+-	cpu        = smp_processor_id();
+-	table_base = private->entries[cpu];
+-	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
+-	stackptr   = per_cpu_ptr(private->stackptr, cpu);
+-	origptr    = *stackptr;
+-
+-	e = get_entry(table_base, private->hook_entry[hook]);
+-
+ 	pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
+ 		 table->name, hook, origptr,
+ 		 get_entry(table_base, private->underflow[hook]));
diff --git a/3.3.8/612-netfilter_match_reduce_memory_access.patch b/3.3.8/612-netfilter_match_reduce_memory_access.patch
new file mode 100644
index 0000000..f506165
--- /dev/null
+++ b/3.3.8/612-netfilter_match_reduce_memory_access.patch
@@ -0,0 +1,16 @@
+--- a/net/ipv4/netfilter/ip_tables.c
++++ b/net/ipv4/netfilter/ip_tables.c
+@@ -84,9 +84,11 @@ ip_packet_match(const struct iphdr *ip,
+ 	if (ipinfo->flags & IPT_F_NO_DEF_MATCH)
+ 		return true;
+ 
+-	if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
++	if (FWINV(ipinfo->smsk.s_addr &&
++		  (ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
+ 		  IPT_INV_SRCIP) ||
+-	    FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
++	    FWINV(ipinfo->dmsk.s_addr &&
++		  (ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
+ 		  IPT_INV_DSTIP)) {
+ 		dprintf("Source or dest mismatch.\n");
+ 
diff --git a/3.3.8/613-netfilter_optional_tcp_window_check.patch b/3.3.8/613-netfilter_optional_tcp_window_check.patch
new file mode 100644
index 0000000..1c259d4
--- /dev/null
+++ b/3.3.8/613-netfilter_optional_tcp_window_check.patch
@@ -0,0 +1,36 @@
+--- a/net/netfilter/nf_conntrack_proto_tcp.c
++++ b/net/netfilter/nf_conntrack_proto_tcp.c
+@@ -29,6 +29,9 @@
+ #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+ #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+ 
++/* Do not check the TCP window for incoming packets  */
++static int nf_ct_tcp_no_window_check __read_mostly = 1;
++
+ /* "Be conservative in what you do,
+     be liberal in what you accept from others."
+     If it's non-zero, we mark only out of window RST segments as INVALID. */
+@@ -524,6 +527,9 @@ static bool tcp_in_window(const struct n
+ 	s16 receiver_offset;
+ 	bool res;
+ 
++	if (nf_ct_tcp_no_window_check)
++		return true;
++
+ 	/*
+ 	 * Get the required data from the packet.
+ 	 */
+@@ -1321,6 +1327,13 @@ static struct ctl_table tcp_sysctl_table
+ 		.proc_handler	= proc_dointvec,
+ 	},
+ 	{
++		.procname       = "nf_conntrack_tcp_no_window_check",
++		.data           = &nf_ct_tcp_no_window_check,
++		.maxlen         = sizeof(unsigned int),
++		.mode           = 0644,
++		.proc_handler   = proc_dointvec,
++	},
++	{
+ 		.procname       = "nf_conntrack_tcp_be_liberal",
+ 		.data           = &nf_ct_tcp_be_liberal,
+ 		.maxlen         = sizeof(unsigned int),
diff --git a/3.3.8/620-sched_esfq.patch b/3.3.8/620-sched_esfq.patch
new file mode 100644
index 0000000..1fdf09d
--- /dev/null
+++ b/3.3.8/620-sched_esfq.patch
@@ -0,0 +1,791 @@
+--- a/include/linux/pkt_sched.h
++++ b/include/linux/pkt_sched.h
+@@ -193,6 +193,33 @@ struct tc_sfq_xstats {
+ 	__s32		allot;
+ };
+ 
++/* ESFQ section */
++
++enum
++{
++        /* traditional */
++	TCA_SFQ_HASH_CLASSIC,
++	TCA_SFQ_HASH_DST,
++	TCA_SFQ_HASH_SRC,
++	TCA_SFQ_HASH_FWMARK,
++	/* conntrack */
++	TCA_SFQ_HASH_CTORIGDST,
++	TCA_SFQ_HASH_CTORIGSRC,
++	TCA_SFQ_HASH_CTREPLDST,
++	TCA_SFQ_HASH_CTREPLSRC,
++	TCA_SFQ_HASH_CTNATCHG,
++};
++
++struct tc_esfq_qopt
++{
++	unsigned	quantum;	/* Bytes per round allocated to flow */
++	int		perturb_period;	/* Period of hash perturbation */
++	__u32		limit;		/* Maximal packets in queue */
++	unsigned	divisor;	/* Hash divisor  */
++	unsigned	flows;		/* Maximal number of flows  */
++	unsigned	hash_kind;	/* Hash function to use for flow identification */
++};
++
+ /* RED section */
+ 
+ enum {
+--- a/net/sched/Kconfig
++++ b/net/sched/Kconfig
+@@ -148,6 +148,37 @@ config NET_SCH_SFQ
+ 	  To compile this code as a module, choose M here: the
+ 	  module will be called sch_sfq.
+ 
++config NET_SCH_ESFQ
++	tristate "Enhanced Stochastic Fairness Queueing (ESFQ)"
++	---help---
++	  Say Y here if you want to use the Enhanced Stochastic Fairness
++	  Queueing (ESFQ) packet scheduling algorithm for some of your network
++	  devices or as a leaf discipline for a classful qdisc such as HTB or
++	  CBQ (see the top of <file:net/sched/sch_esfq.c> for details and
++	  references to the SFQ algorithm).
++
++	  This is an enchanced SFQ version which allows you to control some
++	  hardcoded values in the SFQ scheduler.
++
++	  ESFQ also adds control of the hash function used to identify packet
++	  flows. The original SFQ discipline hashes by connection; ESFQ add
++	  several other hashing methods, such as by src IP or by dst IP, which
++	  can be more fair to users in some networking situations.
++
++	  To compile this code as a module, choose M here: the
++	  module will be called sch_esfq.
++
++config NET_SCH_ESFQ_NFCT
++	bool "Connection Tracking Hash Types"
++	depends on NET_SCH_ESFQ && NF_CONNTRACK
++	---help---
++	  Say Y here to enable support for hashing based on netfilter connection
++	  tracking information. This is useful for a router that is also using
++	  NAT to connect privately-addressed hosts to the Internet. If you want
++	  to provide fair distribution of upstream bandwidth, ESFQ must use
++	  connection tracking information, since all outgoing packets will share
++	  the same source address.
++
+ config NET_SCH_TEQL
+ 	tristate "True Link Equalizer (TEQL)"
+ 	---help---
+--- a/net/sched/Makefile
++++ b/net/sched/Makefile
+@@ -26,6 +26,7 @@ obj-$(CONFIG_NET_SCH_INGRESS)	+= sch_ing
+ obj-$(CONFIG_NET_SCH_DSMARK)	+= sch_dsmark.o
+ obj-$(CONFIG_NET_SCH_SFB)	+= sch_sfb.o
+ obj-$(CONFIG_NET_SCH_SFQ)	+= sch_sfq.o
++obj-$(CONFIG_NET_SCH_ESFQ)	+= sch_esfq.o
+ obj-$(CONFIG_NET_SCH_TBF)	+= sch_tbf.o
+ obj-$(CONFIG_NET_SCH_TEQL)	+= sch_teql.o
+ obj-$(CONFIG_NET_SCH_PRIO)	+= sch_prio.o
+--- /dev/null
++++ b/net/sched/sch_esfq.c
+@@ -0,0 +1,702 @@
++/*
++ * net/sched/sch_esfq.c	Extended Stochastic Fairness Queueing discipline.
++ *
++ *		This program is free software; you can redistribute it and/or
++ *		modify it under the terms of the GNU General Public License
++ *		as published by the Free Software Foundation; either version
++ *		2 of the License, or (at your option) any later version.
++ *
++ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
++ *
++ * Changes:	Alexander Atanasov, <alex@ssi.bg>
++ *		Added dynamic depth,limit,divisor,hash_kind options.
++ *		Added dst and src hashes.
++ *
++ * 		Alexander Clouter, <alex@digriz.org.uk>
++ *		Ported ESFQ to Linux 2.6.
++ *
++ * 		Corey Hickey, <bugfood-c@fatooh.org>
++ *		Maintenance of the Linux 2.6 port.
++ *		Added fwmark hash (thanks to Robert Kurjata).
++ *		Added usage of jhash.
++ *		Added conntrack support.
++ *		Added ctnatchg hash (thanks to Ben Pfountz).
++ */
++
++#include <linux/module.h>
++#include <asm/uaccess.h>
++#include <asm/system.h>
++#include <linux/bitops.h>
++#include <linux/types.h>
++#include <linux/kernel.h>
++#include <linux/jiffies.h>
++#include <linux/string.h>
++#include <linux/mm.h>
++#include <linux/socket.h>
++#include <linux/sockios.h>
++#include <linux/in.h>
++#include <linux/errno.h>
++#include <linux/interrupt.h>
++#include <linux/if_ether.h>
++#include <linux/inet.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/notifier.h>
++#include <linux/init.h>
++#include <net/ip.h>
++#include <net/netlink.h>
++#include <linux/ipv6.h>
++#include <net/route.h>
++#include <linux/skbuff.h>
++#include <net/sock.h>
++#include <net/pkt_sched.h>
++#include <linux/jhash.h>
++#ifdef CONFIG_NET_SCH_ESFQ_NFCT
++#include <net/netfilter/nf_conntrack.h>
++#endif
++
++/*	Stochastic Fairness Queuing algorithm.
++	For more comments look at sch_sfq.c.
++	The difference is that you can change limit, depth,
++	hash table size and choose alternate hash types.
++
++	classic:	same as in sch_sfq.c
++	dst:		destination IP address
++	src:		source IP address
++	fwmark:		netfilter mark value
++	ctorigdst:	original destination IP address
++	ctorigsrc:	original source IP address
++	ctrepldst:	reply destination IP address
++	ctreplsrc:	reply source IP
++
++*/
++
++#define ESFQ_HEAD 0
++#define ESFQ_TAIL 1
++
++/* This type should contain at least SFQ_DEPTH*2 values */
++typedef unsigned int esfq_index;
++
++struct esfq_head
++{
++	esfq_index	next;
++	esfq_index	prev;
++};
++
++struct esfq_sched_data
++{
++/* Parameters */
++	int		perturb_period;
++	unsigned	quantum;	/* Allotment per round: MUST BE >= MTU */
++	int		limit;
++	unsigned	depth;
++	unsigned	hash_divisor;
++	unsigned	hash_kind;
++/* Variables */
++	struct timer_list perturb_timer;
++	int		perturbation;
++	esfq_index	tail;		/* Index of current slot in round */
++	esfq_index	max_depth;	/* Maximal depth */
++
++	esfq_index	*ht;			/* Hash table */
++	esfq_index	*next;			/* Active slots link */
++	short		*allot;			/* Current allotment per slot */
++	unsigned short	*hash;			/* Hash value indexed by slots */
++	struct sk_buff_head	*qs;		/* Slot queue */
++	struct esfq_head	*dep;		/* Linked list of slots, indexed by depth */
++};
++
++/* This contains the info we will hash. */
++struct esfq_packet_info
++{
++	u32	proto;		/* protocol or port */
++	u32	src;		/* source from packet header */
++	u32	dst;		/* destination from packet header */
++	u32	ctorigsrc;	/* original source from conntrack */
++	u32	ctorigdst;	/* original destination from conntrack */
++	u32	ctreplsrc;	/* reply source from conntrack */
++	u32	ctrepldst;	/* reply destination from conntrack */
++	u32	mark;		/* netfilter mark (fwmark) */
++};
++
++static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a)
++{
++	return jhash_1word(a, q->perturbation) & (q->hash_divisor-1);
++}
++
++static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b)
++{
++	return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1);
++}
++
++static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c)
++{
++	return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1);
++}
++
++static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb)
++{
++	struct esfq_packet_info info;
++#ifdef CONFIG_NET_SCH_ESFQ_NFCT
++	enum ip_conntrack_info ctinfo;
++	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
++#endif
++
++	switch (skb->protocol) {
++	case __constant_htons(ETH_P_IP):
++	{
++		struct iphdr *iph = ip_hdr(skb);
++		info.dst = iph->daddr;
++		info.src = iph->saddr;
++		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
++		    (iph->protocol == IPPROTO_TCP ||
++		     iph->protocol == IPPROTO_UDP ||
++		     iph->protocol == IPPROTO_SCTP ||
++		     iph->protocol == IPPROTO_DCCP ||
++		     iph->protocol == IPPROTO_ESP))
++			info.proto = *(((u32*)iph) + iph->ihl);
++		else
++			info.proto = iph->protocol;
++		break;
++	}
++	case __constant_htons(ETH_P_IPV6):
++	{
++		struct ipv6hdr *iph = ipv6_hdr(skb);
++		/* Hash ipv6 addresses into a u32. This isn't ideal,
++		 * but the code is simple. */
++		info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation);
++		info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation);
++		if (iph->nexthdr == IPPROTO_TCP ||
++		    iph->nexthdr == IPPROTO_UDP ||
++		    iph->nexthdr == IPPROTO_SCTP ||
++		    iph->nexthdr == IPPROTO_DCCP ||
++		    iph->nexthdr == IPPROTO_ESP)
++			info.proto = *(u32*)&iph[1];
++		else
++			info.proto = iph->nexthdr;
++		break;
++	}
++	default:
++		info.dst   = (u32)(unsigned long)skb_dst(skb);
++		info.src   = (u32)(unsigned long)skb->sk;
++		info.proto = skb->protocol;
++	}
++
++	info.mark = skb->mark;
++
++#ifdef CONFIG_NET_SCH_ESFQ_NFCT
++	/* defaults if there is no conntrack info */
++	info.ctorigsrc = info.src;
++	info.ctorigdst = info.dst;
++	info.ctreplsrc = info.dst;
++	info.ctrepldst = info.src;
++	/* collect conntrack info */
++	if (ct && ct != &nf_conntrack_untracked) {
++		if (skb->protocol == __constant_htons(ETH_P_IP)) {
++			info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
++			info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip;
++			info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip;
++			info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip;
++		}
++		else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
++			/* Again, hash ipv6 addresses into a single u32. */
++			info.ctorigsrc = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, q->perturbation);
++			info.ctorigdst = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, q->perturbation);
++			info.ctreplsrc = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, q->perturbation);
++			info.ctrepldst = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, q->perturbation);
++		}
++
++	}
++#endif
++
++	switch(q->hash_kind) {
++	case TCA_SFQ_HASH_CLASSIC:
++		return esfq_jhash_3words(q, info.dst, info.src, info.proto);
++	case TCA_SFQ_HASH_DST:
++		return esfq_jhash_1word(q, info.dst);
++	case TCA_SFQ_HASH_SRC:
++		return esfq_jhash_1word(q, info.src);
++	case TCA_SFQ_HASH_FWMARK:
++		return esfq_jhash_1word(q, info.mark);
++#ifdef CONFIG_NET_SCH_ESFQ_NFCT
++	case TCA_SFQ_HASH_CTORIGDST:
++		return esfq_jhash_1word(q, info.ctorigdst);
++	case TCA_SFQ_HASH_CTORIGSRC:
++		return esfq_jhash_1word(q, info.ctorigsrc);
++	case TCA_SFQ_HASH_CTREPLDST:
++		return esfq_jhash_1word(q, info.ctrepldst);
++	case TCA_SFQ_HASH_CTREPLSRC:
++		return esfq_jhash_1word(q, info.ctreplsrc);
++	case TCA_SFQ_HASH_CTNATCHG:
++	{
++		if (info.ctorigdst == info.ctreplsrc)
++			return esfq_jhash_1word(q, info.ctorigsrc);
++		return esfq_jhash_1word(q, info.ctreplsrc);
++	}
++#endif
++	default:
++		if (net_ratelimit())
++			printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n");
++	}
++	return esfq_jhash_3words(q, info.dst, info.src, info.proto);
++}
++
++static inline void esfq_link(struct esfq_sched_data *q, esfq_index x)
++{
++	esfq_index p, n;
++	int d = q->qs[x].qlen + q->depth;
++
++	p = d;
++	n = q->dep[d].next;
++	q->dep[x].next = n;
++	q->dep[x].prev = p;
++	q->dep[p].next = q->dep[n].prev = x;
++}
++
++static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x)
++{
++	esfq_index p, n;
++
++	n = q->dep[x].next;
++	p = q->dep[x].prev;
++	q->dep[p].next = n;
++	q->dep[n].prev = p;
++
++	if (n == p && q->max_depth == q->qs[x].qlen + 1)
++		q->max_depth--;
++
++	esfq_link(q, x);
++}
++
++static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x)
++{
++	esfq_index p, n;
++	int d;
++
++	n = q->dep[x].next;
++	p = q->dep[x].prev;
++	q->dep[p].next = n;
++	q->dep[n].prev = p;
++	d = q->qs[x].qlen;
++	if (q->max_depth < d)
++		q->max_depth = d;
++
++	esfq_link(q, x);
++}
++
++static unsigned int esfq_drop(struct Qdisc *sch)
++{
++	struct esfq_sched_data *q = qdisc_priv(sch);
++	esfq_index d = q->max_depth;
++	struct sk_buff *skb;
++	unsigned int len;
++
++	/* Queue is full! Find the longest slot and
++	   drop a packet from it */
++
++	if (d > 1) {
++		esfq_index x = q->dep[d+q->depth].next;
++		skb = q->qs[x].prev;
++		len = skb->len;
++		__skb_unlink(skb, &q->qs[x]);
++		kfree_skb(skb);
++		esfq_dec(q, x);
++		sch->q.qlen--;
++		sch->qstats.drops++;
++		sch->qstats.backlog -= len;
++		return len;
++	}
++
++	if (d == 1) {
++		/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
++		d = q->next[q->tail];
++		q->next[q->tail] = q->next[d];
++		q->allot[q->next[d]] += q->quantum;
++		skb = q->qs[d].prev;
++		len = skb->len;
++		__skb_unlink(skb, &q->qs[d]);
++		kfree_skb(skb);
++		esfq_dec(q, d);
++		sch->q.qlen--;
++		q->ht[q->hash[d]] = q->depth;
++		sch->qstats.drops++;
++		sch->qstats.backlog -= len;
++		return len;
++	}
++
++	return 0;
++}
++
++static void esfq_q_enqueue(struct sk_buff *skb, struct esfq_sched_data *q, unsigned int end)
++{
++	unsigned hash = esfq_hash(q, skb);
++	unsigned depth = q->depth;
++	esfq_index x;
++
++	x = q->ht[hash];
++	if (x == depth) {
++		q->ht[hash] = x = q->dep[depth].next;
++		q->hash[x] = hash;
++	}
++
++	if (end == ESFQ_TAIL)
++		__skb_queue_tail(&q->qs[x], skb);
++	else
++		__skb_queue_head(&q->qs[x], skb);
++
++	esfq_inc(q, x);
++	if (q->qs[x].qlen == 1) {		/* The flow is new */
++		if (q->tail == depth) {	/* It is the first flow */
++			q->tail = x;
++			q->next[x] = x;
++			q->allot[x] = q->quantum;
++		} else {
++			q->next[x] = q->next[q->tail];
++			q->next[q->tail] = x;
++			q->tail = x;
++		}
++	}
++}
++
++static int esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
++{
++	struct esfq_sched_data *q = qdisc_priv(sch);
++	esfq_q_enqueue(skb, q, ESFQ_TAIL);
++	sch->qstats.backlog += skb->len;
++	if (++sch->q.qlen < q->limit-1) {
++		sch->bstats.bytes += skb->len;
++		sch->bstats.packets++;
++		return 0;
++	}
++
++	sch->qstats.drops++;
++	esfq_drop(sch);
++	return NET_XMIT_CN;
++}
++
++static struct sk_buff *esfq_peek(struct Qdisc* sch)
++{
++	struct esfq_sched_data *q = qdisc_priv(sch);
++	esfq_index a;
++
++	/* No active slots */
++	if (q->tail == q->depth)
++		return NULL;
++
++	a = q->next[q->tail];
++	return skb_peek(&q->qs[a]);
++}
++
++static struct sk_buff *esfq_q_dequeue(struct esfq_sched_data *q)
++{
++	struct sk_buff *skb;
++	unsigned depth = q->depth;
++	esfq_index a, old_a;
++
++	/* No active slots */
++	if (q->tail == depth)
++		return NULL;
++
++	a = old_a = q->next[q->tail];
++
++	/* Grab packet */
++	skb = __skb_dequeue(&q->qs[a]);
++	esfq_dec(q, a);
++
++	/* Is the slot empty? */
++	if (q->qs[a].qlen == 0) {
++		q->ht[q->hash[a]] = depth;
++		a = q->next[a];
++		if (a == old_a) {
++			q->tail = depth;
++			return skb;
++		}
++		q->next[q->tail] = a;
++		q->allot[a] += q->quantum;
++	} else if ((q->allot[a] -= skb->len) <= 0) {
++		q->tail = a;
++		a = q->next[a];
++		q->allot[a] += q->quantum;
++	}
++
++	return skb;
++}
++
++static struct sk_buff *esfq_dequeue(struct Qdisc* sch)
++{
++	struct esfq_sched_data *q = qdisc_priv(sch);
++	struct sk_buff *skb;
++
++	skb = esfq_q_dequeue(q);
++	if (skb == NULL)
++		return NULL;
++	sch->q.qlen--;
++	sch->qstats.backlog -= skb->len;
++	return skb;
++}
++
++static void esfq_q_destroy(struct esfq_sched_data *q)
++{
++	del_timer(&q->perturb_timer);
++	if(q->ht)
++		kfree(q->ht);
++	if(q->dep)
++		kfree(q->dep);
++	if(q->next)
++		kfree(q->next);
++	if(q->allot)
++		kfree(q->allot);
++	if(q->hash)
++		kfree(q->hash);
++	if(q->qs)
++		kfree(q->qs);
++}
++
++static void esfq_destroy(struct Qdisc *sch)
++{
++	struct esfq_sched_data *q = qdisc_priv(sch);
++	esfq_q_destroy(q);
++}
++
++
++static void esfq_reset(struct Qdisc* sch)
++{
++	struct sk_buff *skb;
++
++	while ((skb = esfq_dequeue(sch)) != NULL)
++		kfree_skb(skb);
++}
++
++static void esfq_perturbation(unsigned long arg)
++{
++	struct Qdisc *sch = (struct Qdisc*)arg;
++	struct esfq_sched_data *q = qdisc_priv(sch);
++
++	q->perturbation = net_random()&0x1F;
++
++	if (q->perturb_period) {
++		q->perturb_timer.expires = jiffies + q->perturb_period;
++		add_timer(&q->perturb_timer);
++	}
++}
++
++static unsigned int esfq_check_hash(unsigned int kind)
++{
++	switch (kind) {
++	case TCA_SFQ_HASH_CTORIGDST:
++	case TCA_SFQ_HASH_CTORIGSRC:
++	case TCA_SFQ_HASH_CTREPLDST:
++	case TCA_SFQ_HASH_CTREPLSRC:
++	case TCA_SFQ_HASH_CTNATCHG:
++#ifndef CONFIG_NET_SCH_ESFQ_NFCT
++	{
++		if (net_ratelimit())
++			printk(KERN_WARNING "ESFQ: Conntrack hash types disabled in kernel config. Falling back to classic.\n");
++		return TCA_SFQ_HASH_CLASSIC;
++	}
++#endif
++	case TCA_SFQ_HASH_CLASSIC:
++	case TCA_SFQ_HASH_DST:
++	case TCA_SFQ_HASH_SRC:
++	case TCA_SFQ_HASH_FWMARK:
++		return kind;
++	default:
++	{
++		if (net_ratelimit())
++			printk(KERN_WARNING "ESFQ: Unknown hash type. Falling back to classic.\n");
++		return TCA_SFQ_HASH_CLASSIC;
++	}
++	}
++}
++
++static int esfq_q_init(struct esfq_sched_data *q, struct nlattr *opt)
++{
++	struct tc_esfq_qopt *ctl = nla_data(opt);
++	esfq_index p = ~0U/2;
++	int i;
++
++	if (opt && opt->nla_len < nla_attr_size(sizeof(*ctl)))
++		return -EINVAL;
++
++	q->perturbation = 0;
++	q->hash_kind = TCA_SFQ_HASH_CLASSIC;
++	q->max_depth = 0;
++	if (opt == NULL) {
++		q->perturb_period = 0;
++		q->hash_divisor = 1024;
++		q->tail = q->limit = q->depth = 128;
++
++	} else {
++		struct tc_esfq_qopt *ctl = nla_data(opt);
++		if (ctl->quantum)
++			q->quantum = ctl->quantum;
++		q->perturb_period = ctl->perturb_period*HZ;
++		q->hash_divisor = ctl->divisor ? : 1024;
++		q->tail = q->limit = q->depth = ctl->flows ? : 128;
++
++		if ( q->depth > p - 1 )
++			return -EINVAL;
++
++		if (ctl->limit)
++			q->limit = min_t(u32, ctl->limit, q->depth);
++
++		if (ctl->hash_kind) {
++			q->hash_kind = esfq_check_hash(ctl->hash_kind);
++		}
++	}
++
++	q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL);
++	if (!q->ht)
++		goto err_case;
++	q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL);
++	if (!q->dep)
++		goto err_case;
++	q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL);
++	if (!q->next)
++		goto err_case;
++	q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL);
++	if (!q->allot)
++		goto err_case;
++	q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL);
++	if (!q->hash)
++		goto err_case;
++	q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL);
++	if (!q->qs)
++		goto err_case;
++
++	for (i=0; i< q->hash_divisor; i++)
++		q->ht[i] = q->depth;
++	for (i=0; i<q->depth; i++) {
++		skb_queue_head_init(&q->qs[i]);
++		q->dep[i+q->depth].next = i+q->depth;
++		q->dep[i+q->depth].prev = i+q->depth;
++	}
++
++	for (i=0; i<q->depth; i++)
++		esfq_link(q, i);
++	return 0;
++err_case:
++	esfq_q_destroy(q);
++	return -ENOBUFS;
++}
++
++static int esfq_init(struct Qdisc *sch, struct nlattr *opt)
++{
++	struct esfq_sched_data *q = qdisc_priv(sch);
++	int err;
++
++	q->quantum = psched_mtu(qdisc_dev(sch)); /* default */
++	if ((err = esfq_q_init(q, opt)))
++		return err;
++
++	init_timer(&q->perturb_timer);
++	q->perturb_timer.data = (unsigned long)sch;
++	q->perturb_timer.function = esfq_perturbation;
++	if (q->perturb_period) {
++		q->perturb_timer.expires = jiffies + q->perturb_period;
++		add_timer(&q->perturb_timer);
++	}
++
++	return 0;
++}
++
++static int esfq_change(struct Qdisc *sch, struct nlattr *opt)
++{
++	struct esfq_sched_data *q = qdisc_priv(sch);
++	struct esfq_sched_data new;
++	struct sk_buff *skb;
++	int err;
++
++	/* set up new queue */
++	memset(&new, 0, sizeof(struct esfq_sched_data));
++	new.quantum = psched_mtu(qdisc_dev(sch)); /* default */
++	if ((err = esfq_q_init(&new, opt)))
++		return err;
++
++	/* copy all packets from the old queue to the new queue */
++	sch_tree_lock(sch);
++	while ((skb = esfq_q_dequeue(q)) != NULL)
++		esfq_q_enqueue(skb, &new, ESFQ_TAIL);
++
++	/* clean up the old queue */
++	esfq_q_destroy(q);
++
++	/* copy elements of the new queue into the old queue */
++	q->perturb_period = new.perturb_period;
++	q->quantum        = new.quantum;
++	q->limit          = new.limit;
++	q->depth          = new.depth;
++	q->hash_divisor   = new.hash_divisor;
++	q->hash_kind      = new.hash_kind;
++	q->tail           = new.tail;
++	q->max_depth      = new.max_depth;
++	q->ht    = new.ht;
++	q->dep   = new.dep;
++	q->next  = new.next;
++	q->allot = new.allot;
++	q->hash  = new.hash;
++	q->qs    = new.qs;
++
++	/* finish up */
++	if (q->perturb_period) {
++		q->perturb_timer.expires = jiffies + q->perturb_period;
++		add_timer(&q->perturb_timer);
++	} else {
++		q->perturbation = 0;
++	}
++	sch_tree_unlock(sch);
++	return 0;
++}
++
++static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb)
++{
++	struct esfq_sched_data *q = qdisc_priv(sch);
++	unsigned char *b = skb_tail_pointer(skb);
++	struct tc_esfq_qopt opt;
++
++	opt.quantum = q->quantum;
++	opt.perturb_period = q->perturb_period/HZ;
++
++	opt.limit = q->limit;
++	opt.divisor = q->hash_divisor;
++	opt.flows = q->depth;
++	opt.hash_kind = q->hash_kind;
++
++	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
++
++	return skb->len;
++
++nla_put_failure:
++	nlmsg_trim(skb, b);
++	return -1;
++}
++
++static struct Qdisc_ops esfq_qdisc_ops =
++{
++	.next		=	NULL,
++	.cl_ops		=	NULL,
++	.id		=	"esfq",
++	.priv_size	=	sizeof(struct esfq_sched_data),
++	.enqueue	=	esfq_enqueue,
++	.dequeue	=	esfq_dequeue,
++	.peek		=	esfq_peek,
++	.drop		=	esfq_drop,
++	.init		=	esfq_init,
++	.reset		=	esfq_reset,
++	.destroy	=	esfq_destroy,
++	.change		=	esfq_change,
++	.dump		=	esfq_dump,
++	.owner		=	THIS_MODULE,
++};
++
++static int __init esfq_module_init(void)
++{
++	return register_qdisc(&esfq_qdisc_ops);
++}
++static void __exit esfq_module_exit(void)
++{
++	unregister_qdisc(&esfq_qdisc_ops);
++}
++module_init(esfq_module_init)
++module_exit(esfq_module_exit)
++MODULE_LICENSE("GPL");
diff --git a/3.3.8/621-sched_act_connmark.patch b/3.3.8/621-sched_act_connmark.patch
new file mode 100644
index 0000000..b6adce1
--- /dev/null
+++ b/3.3.8/621-sched_act_connmark.patch
@@ -0,0 +1,172 @@
+--- /dev/null
++++ b/net/sched/act_connmark.c
+@@ -0,0 +1,137 @@
++/*
++ * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++ * Place - Suite 330, Boston, MA 02111-1307 USA.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/skbuff.h>
++#include <linux/rtnetlink.h>
++#include <linux/pkt_cls.h>
++#include <linux/ip.h>
++#include <linux/ipv6.h>
++#include <net/netlink.h>
++#include <net/pkt_sched.h>
++#include <net/act_api.h>
++
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_core.h>
++
++#define TCA_ACT_CONNMARK	20
++
++#define CONNMARK_TAB_MASK     3
++static struct tcf_common *tcf_connmark_ht[CONNMARK_TAB_MASK + 1];
++static u32 connmark_idx_gen;
++static DEFINE_RWLOCK(connmark_lock);
++
++static struct tcf_hashinfo connmark_hash_info = {
++	.htab	=	tcf_connmark_ht,
++	.hmask	=	CONNMARK_TAB_MASK,
++	.lock	=	&connmark_lock,
++};
++
++static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
++		       struct tcf_result *res)
++{
++	struct nf_conn *c;
++	enum ip_conntrack_info ctinfo;
++	int proto;
++	int r;
++
++	if (skb->protocol == htons(ETH_P_IP)) {
++		if (skb->len < sizeof(struct iphdr))
++			goto out;
++		proto = PF_INET;
++	} else if (skb->protocol == htons(ETH_P_IPV6)) {
++		if (skb->len < sizeof(struct ipv6hdr))
++			goto out;
++		proto = PF_INET6;
++	} else
++		goto out;
++
++	r = nf_conntrack_in(dev_net(skb->dev), proto, NF_INET_PRE_ROUTING, skb);
++	if (r != NF_ACCEPT)
++		goto out;
++
++	c = nf_ct_get(skb, &ctinfo);
++	if (!c)
++		goto out;
++
++	skb->mark = c->mark;
++	nf_conntrack_put(skb->nfct);
++	skb->nfct = NULL;
++
++out:
++	return TC_ACT_PIPE;
++}
++
++static int tcf_connmark_init(struct nlattr *nla, struct nlattr *est,
++			 struct tc_action *a, int ovr, int bind)
++{
++	struct tcf_common *pc;
++
++	pc = tcf_hash_create(0, est, a, sizeof(*pc), bind,
++			     &connmark_idx_gen, &connmark_hash_info);
++	if (IS_ERR(pc))
++	    return PTR_ERR(pc);
++
++	tcf_hash_insert(pc, &connmark_hash_info);
++
++	return ACT_P_CREATED;
++}
++
++static inline int tcf_connmark_cleanup(struct tc_action *a, int bind)
++{
++	if (a->priv)
++		return tcf_hash_release(a->priv, bind, &connmark_hash_info);
++	return 0;
++}
++
++static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
++				int bind, int ref)
++{
++	return skb->len;
++}
++
++static struct tc_action_ops act_connmark_ops = {
++	.kind		=	"connmark",
++	.hinfo		=	&connmark_hash_info,
++	.type		=	TCA_ACT_CONNMARK,
++	.capab		=	TCA_CAP_NONE,
++	.owner		=	THIS_MODULE,
++	.act		=	tcf_connmark,
++	.dump		=	tcf_connmark_dump,
++	.cleanup	=	tcf_connmark_cleanup,
++	.init		=	tcf_connmark_init,
++	.walk		=	tcf_generic_walker,
++};
++
++MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>");
++MODULE_DESCRIPTION("Connection tracking mark restoring");
++MODULE_LICENSE("GPL");
++
++static int __init connmark_init_module(void)
++{
++	return tcf_register_action(&act_connmark_ops);
++}
++
++static void __exit connmark_cleanup_module(void)
++{
++	tcf_unregister_action(&act_connmark_ops);
++}
++
++module_init(connmark_init_module);
++module_exit(connmark_cleanup_module);
+--- a/net/sched/Kconfig
++++ b/net/sched/Kconfig
+@@ -624,6 +624,19 @@ config NET_ACT_CSUM
+ 	  To compile this code as a module, choose M here: the
+ 	  module will be called act_csum.
+ 
++config NET_ACT_CONNMARK
++        tristate "Connection Tracking Marking"
++        depends on NET_CLS_ACT
++        depends on NF_CONNTRACK
++	 depends on NF_CONNTRACK_MARK
++        ---help---
++	  Say Y here to restore the connmark from a scheduler action
++
++	  If unsure, say N.
++
++	  To compile this code as a module, choose M here: the
++	  module will be called act_connmark.
++
+ config NET_CLS_IND
+ 	bool "Incoming device classification"
+ 	depends on NET_CLS_U32 || NET_CLS_FW
+--- a/net/sched/Makefile
++++ b/net/sched/Makefile
+@@ -16,6 +16,7 @@ obj-$(CONFIG_NET_ACT_PEDIT)	+= act_pedit
+ obj-$(CONFIG_NET_ACT_SIMP)	+= act_simple.o
+ obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
+ obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
++obj-$(CONFIG_NET_ACT_CONNMARK)	+= act_connmark.o
+ obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
+ obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
+ obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/3.3.8/910-kobject_uevent.patch b/3.3.8/910-kobject_uevent.patch
new file mode 100644
index 0000000..aa9a40f
--- /dev/null
+++ b/3.3.8/910-kobject_uevent.patch
@@ -0,0 +1,21 @@
+--- a/lib/kobject_uevent.c
++++ b/lib/kobject_uevent.c
+@@ -50,6 +50,18 @@ static const char *kobject_actions[] = {
+ 	[KOBJ_OFFLINE] =	"offline",
+ };
+ 
++u64 uevent_next_seqnum(void)
++{
++	u64 seq;
++
++	mutex_lock(&uevent_sock_mutex);
++	seq = ++uevent_seqnum;
++	mutex_unlock(&uevent_sock_mutex);
++
++	return seq;
++}
++EXPORT_SYMBOL_GPL(uevent_next_seqnum);
++
+ /**
+  * kobject_action_type - translate action string to numeric type
+  *
diff --git a/3.3.8/911-kobject_add_broadcast_uevent.patch b/3.3.8/911-kobject_add_broadcast_uevent.patch
new file mode 100644
index 0000000..104df13
--- /dev/null
+++ b/3.3.8/911-kobject_add_broadcast_uevent.patch
@@ -0,0 +1,85 @@
+--- a/include/linux/kobject.h
++++ b/include/linux/kobject.h
+@@ -31,6 +31,8 @@
+ #define UEVENT_NUM_ENVP			32	/* number of env pointers */
+ #define UEVENT_BUFFER_SIZE		2048	/* buffer for the variables */
+ 
++struct sk_buff;
++
+ /* path to the userspace helper executed on an event */
+ extern char uevent_helper[];
+ 
+@@ -213,6 +215,10 @@ int add_uevent_var(struct kobj_uevent_en
+ 
+ int kobject_action_type(const char *buf, size_t count,
+ 			enum kobject_action *type);
++
++int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group,
++		     gfp_t allocation);
++
+ #else
+ static inline int kobject_uevent(struct kobject *kobj,
+ 				 enum kobject_action action)
+@@ -229,6 +235,16 @@ int add_uevent_var(struct kobj_uevent_en
+ static inline int kobject_action_type(const char *buf, size_t count,
+ 				      enum kobject_action *type)
+ { return -EINVAL; }
++
++void kfree_skb(struct sk_buff *);
++
++static inline int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group,
++				   gfp_t allocation)
++{
++	kfree_skb(skb);
++	return 0;
++}
++
+ #endif
+ 
+ #endif /* _KOBJECT_H_ */
+--- a/lib/kobject_uevent.c
++++ b/lib/kobject_uevent.c
+@@ -381,6 +381,43 @@ int add_uevent_var(struct kobj_uevent_en
+ EXPORT_SYMBOL_GPL(add_uevent_var);
+ 
+ #if defined(CONFIG_NET)
++int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group,
++		     gfp_t allocation)
++{
++	struct uevent_sock *ue_sk;
++	int err = 0;
++
++	/* send netlink message */
++	mutex_lock(&uevent_sock_mutex);
++	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
++		struct sock *uevent_sock = ue_sk->sk;
++		struct sk_buff *skb2;
++
++		skb2 = skb_clone(skb, allocation);
++		if (!skb2)
++			break;
++
++		err = netlink_broadcast(uevent_sock, skb2, pid, group,
++					allocation);
++		if (err)
++			break;
++	}
++	mutex_unlock(&uevent_sock_mutex);
++
++	kfree_skb(skb);
++	return err;
++}
++#else
++int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group,
++		     gfp_t allocation)
++{
++	kfree_skb(skb);
++	return 0;
++}
++#endif
++EXPORT_SYMBOL_GPL(broadcast_uevent);
++
++#if defined(CONFIG_NET)
+ static int uevent_net_init(struct net *net)
+ {
+ 	struct uevent_sock *ue_sk;
diff --git a/3.3.8/Add_CONFIG_VFAT_FS_DUALNAMES_option.patch b/3.3.8/Add_CONFIG_VFAT_FS_DUALNAMES_option.patch
new file mode 100644
index 0000000..5e3cfe1
--- /dev/null
+++ b/3.3.8/Add_CONFIG_VFAT_FS_DUALNAMES_option.patch
@@ -0,0 +1,145 @@
+diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig
+index 182f9ff..907a5de 100644
+--- a/fs/fat/Kconfig
++++ b/fs/fat/Kconfig
+@@ -74,6 +74,26 @@ config VFAT_FS
+ 	  To compile this as a module, choose M here: the module will be called
+ 	  vfat.
+ 
++config VFAT_FS_DUALNAMES
++	bool "VFAT dual names support"
++	depends on VFAT_FS
++	help
++	  This option provides support for dual filenames on VFAT filesystems.
++	  If this option is disabled then file creation will either put
++	  a short (8.3) name or a long name on the file, but never both.
++	  The field where a shortname would normally go is filled with
++	  invalid characters such that it cannot be considered a valid
++	  short filename.
++
++	  That means that long filenames created with this option
++	  disabled will not be accessible at all to operating systems
++	  that do not understand the VFAT extensions.
++
++	  Users considering enabling this option should consider the implications
++	  of any patents that may exist on dual filenames in VFAT.
++
++	  If unsure, say N
++
+ config FAT_DEFAULT_CODEPAGE
+ 	int "Default codepage for FAT"
+ 	depends on MSDOS_FS || VFAT_FS
+diff --git a/fs/fat/dir.c b/fs/fat/dir.c
+index 38ff75a..cd5d3ec 100644
+--- a/fs/fat/dir.c
++++ b/fs/fat/dir.c
+@@ -415,14 +415,13 @@
+ 			}
+ 			i += chl;
+ 		}
+-		if (!last_u)
+-			continue;
+-
+-		/* Compare shortname */
+-		bufuname[last_u] = 0x0000;
+-		len = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
+-		if (fat_name_match(sbi, name, name_len, bufname, len))
+-			goto found;
++		if (last_u) {
++			/* Compare shortname */
++			bufuname[last_u] = 0x0000;
++			len = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
++			if (fat_name_match(sbi, name, name_len, bufname, len))
++				goto found;
++		}
+ 
+ 		if (nr_slots) {
+ 			void *longname = unicode + FAT_MAX_UNI_CHARS;
+diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
+index 73471b7..894f44d 100644
+--- a/fs/fat/namei_vfat.c
++++ b/fs/fat/namei_vfat.c
+@@ -22,6 +22,7 @@
+ #include <linux/smp_lock.h>
+ #include <linux/buffer_head.h>
+ #include <linux/namei.h>
++#include <linux/random.h>
+ #include "fat.h"
+ 
+ /*
+@@ -586,6 +587,59 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
+ 	return 0;
+ }
+ 
++#ifndef CONFIG_VFAT_FS_DUALNAMES
++/*
++ * build a 11 byte 8.3 buffer which is not a short filename. We want 11
++ * bytes which:
++ *    - will be seen as a constant string to all APIs on Linux and Windows
++ *    - cannot be matched with wildcard patterns
++ *    - cannot be used to access the file
++ *    - has a low probability of collision within a directory
++ *    - has an invalid 3 byte extension
++ *    - contains at least one non-space and non-nul byte
++ */
++static void vfat_build_dummy_83_buffer(struct inode *dir, char *msdos_name)
++{
++	u32 rand_num = random32() & 0x3FFFFFFF;
++	int i;
++
++	/* a value of zero would leave us with only nul and spaces,
++	 * which would not work with older linux systems
++	 */
++	if (rand_num == 0)
++		rand_num = 1;
++
++	/* we start with a space followed by nul as spaces at the
++	 * start of an entry are trimmed in FAT, which means that
++	 * starting the 11 bytes with 0x20 0x00 gives us a value which
++	 * cannot be used to access the file. It also means that the
++	 * value as seen from all Windows and Linux APIs is a constant
++	 */
++	msdos_name[0]  = ' ';
++	msdos_name[1]  = 0;
++
++	/* we use / and 2 nul bytes for the extension. These are
++	 * invalid in FAT and mean that utilities that show the
++	 * directory show no extension, but still work via the long
++	 * name for old Linux kernels
++	 */
++	msdos_name[8]  = '/';
++	msdos_name[9]  = 0;
++	msdos_name[10] = 0;
++
++	/*
++	 * fill the remaining 6 bytes with random invalid values
++	 * This gives us a low collision rate, which means a low
++	 * chance of problems with chkdsk.exe and WindowsXP
++	 */
++	for (i = 2; i < 8; i++) {
++		msdos_name[i] = rand_num & 0x1F;
++		rand_num >>= 5;
++	}
++}
++#endif
++
++
+ static int vfat_build_slots(struct inode *dir, const unsigned char *name,
+ 			    int len, int is_dir, int cluster,
+ 			    struct timespec *ts,
+@@ -628,6 +682,11 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name,
+ 		goto shortname;
+ 	}
+ 
++#ifndef CONFIG_VFAT_FS_DUALNAMES
++	vfat_build_dummy_83_buffer(dir, msdos_name);
++	lcase = 0;
++#endif
++
+ 	/* build the entry of long file name */
+ 	cksum = fat_checksum(msdos_name);
+ 
+-- 
+1.6.0.4
+
+  
diff --git a/3.3.8/accessfs-3.2-0.26.patch b/3.3.8/accessfs-3.2-0.26.patch
new file mode 100644
index 0000000..f36e634
--- /dev/null
+++ b/3.3.8/accessfs-3.2-0.26.patch
@@ -0,0 +1,1036 @@
+diff --git a/Documentation/filesystems/accessfs.txt b/Documentation/filesystems/accessfs.txt
+new file mode 100644
+index 0000000..bf135b5
+--- /dev/null
++++ b/Documentation/filesystems/accessfs.txt
+@@ -0,0 +1,41 @@
++Accessfs is a permission managing filesystem. It allows to control access to
++system resources, based on file permissions. The recommended mount point for
++this file-system is /proc/access, which will appear automatically in the
++/proc filesystem.
++
++Currently there are two modules using accessfs, userports and usercaps.
++
++With userports, you will be able to control access to IP ports based
++on user-/groupid.
++
++There's no need anymore to run internet daemons as root. You can
++individually configure which user/program can bind to protected ports
++(by default, below 1024).
++
++For example, you can say, user www is allowed to bind to port 80 or
++user mail is allowed to bind to port 25. Then, you can run apache as
++user www and sendmail as user mail. Now, you don't have to rely on
++apache or sendmail giving up superuser rights to enhance security.
++
++To use this option, you need to mount the access file system
++and do a chown on the appropriate ports:
++
++# mount -t accessfs none /proc/access
++# chown www /proc/access/net/ip/bind/80
++# chown mail /proc/access/net/ip/bind/25
++
++You can grant access to a group for individual ports as well. Just say:
++
++# chgrp lp /proc/access/net/ip/bind/515
++# chown g+x /proc/access/net/ip/bind/515
++
++With usercaps, you will be able to grant capabilities based on
++user-/groupid (root by default).
++
++For example you can create a group raw and change the capability
++net_raw to this group:
++
++# chgrp raw /proc/access/capabilities/net_raw
++# chmod ug+x /proc/access/capabilities/net_raw
++# chgrp raw /sbin/ping
++# chmod u-s /sbin/ping; chmod g+s /sbin/ping
+diff --git a/fs/Kconfig b/fs/Kconfig
+index 5f4c45d..24f7348 100644
+--- a/fs/Kconfig
++++ b/fs/Kconfig
+@@ -210,6 +210,7 @@
+ # UBIFS File system configuration
+ source "fs/ubifs/Kconfig"
+ source "fs/logfs/Kconfig"
++source "fs/accessfs/Kconfig"
+ source "fs/cramfs/Kconfig"
+ source "fs/squashfs/Kconfig"
+ source "fs/freevxfs/Kconfig"
+diff --git a/fs/Makefile b/fs/Makefile
+index d2c3353..fea1cfc 100644
+--- a/fs/Makefile
++++ b/fs/Makefile
+@@ -121,5 +121,6 @@ obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
+ obj-$(CONFIG_BTRFS_FS)		+= btrfs/
+ obj-$(CONFIG_GFS2_FS)           += gfs2/
+ obj-y				+= exofs/ # Multiple modules
++obj-$(CONFIG_ACCESS_FS)		+= accessfs/
+ obj-$(CONFIG_CEPH_FS)		+= ceph/
+ obj-$(CONFIG_PSTORE)		+= pstore/
+diff --git a/fs/accessfs/Kconfig b/fs/accessfs/Kconfig
+new file mode 100644
+index 0000000..539d6e9
+--- /dev/null
++++ b/fs/accessfs/Kconfig
+@@ -0,0 +1,61 @@
++config ACCESS_FS
++	tristate "Accessfs support (Experimental)"
++	depends on EXPERIMENTAL
++	default n
++	help
++	  This is a new file system to manage permissions. It is not very
++	  useful on its own. You need to enable other options below.
++
++	  If you're unsure, say N.
++
++config ACCESSFS_USER_PORTS
++	tristate "User permission based IP ports"
++	depends on ACCESS_FS && INET
++	select NET_HOOKS
++	default n
++	help
++	  If you say Y here, you will be able to control access to IP ports
++	  based on user-/groupid.
++
++	  If you're unsure, say N.
++
++config ACCESSFS_PROT_SOCK
++	int "Range of protected ports (1024-65536)"
++	depends on ACCESSFS_USER_PORTS
++	default 1024
++	help
++	  Here you can extend the range of protected ports. This is
++	  from 1-1023 inclusive on normal unix systems. One use for this
++	  could be to reserve ports for X11 (port 6000) or database
++	  servers (port 3306 for mysql), so nobody else could grab this port.
++	  The default permission for extended ports is --x--x--x.
++
++	  If you build this as a module, you can specify the range of
++	  protected ports at module load time (max_prot_sock).
++
++	  If you're unsure, say 1024.
++
++config ACCESSFS_IGNORE_NET_BIND_SERVICE
++	bool "Ignore CAP_NET_BIND_SERVICE capability"
++	depends on ACCESSFS_USER_PORTS
++	default n
++	help
++	  This option lets you decide, wether a user with
++	  CAP_NET_BIND_SERVICE capability is able to override
++	  your userport configuration.
++
++	  If you build this as a module, you can specify this
++	  option at module load time (ignore_net_bind_service).
++
++	  If you're unsure, say n.
++
++config ACCESSFS_USER_CAPABILITIES
++	bool "User permission based capabilities"
++	depends on ACCESS_FS = y
++	select SECURITY
++	default n
++	help
++	  If you say Y here, you will be able to grant capabilities based on
++	  user-/groupid (root by default).
++
++	  If you're unsure, say N.
+diff --git a/fs/accessfs/Makefile b/fs/accessfs/Makefile
+new file mode 100644
+index 0000000..63a5647
+--- /dev/null
++++ b/fs/accessfs/Makefile
+@@ -0,0 +1,11 @@
++#
++# Makefile for the linux accessfs routines.
++#
++
++obj-$(CONFIG_ACCESS_FS) += accessfs.o
++obj-$(CONFIG_ACCESSFS_USER_CAPABILITIES) += usercaps.o
++obj-$(CONFIG_ACCESSFS_USER_PORTS) += userports.o
++
++accessfs-objs := inode.o
++usercaps-objs := capabilities.o
++userports-objs := ip.o
+diff --git a/fs/accessfs/capabilities.c b/fs/accessfs/capabilities.c
+new file mode 100644
+index 0000000..1c43f36
+--- /dev/null
++++ b/fs/accessfs/capabilities.c
+@@ -0,0 +1,109 @@
++/* Copyright (c) 2002-2006 Olaf Dietsche
++ *
++ * User based capabilities for Linux.
++ */
++
++#include <linux/accessfs_fs.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/security.h>
++
++/* perl -n -e 'print "\"", lc($1), "\",\n" if (m/^#define\s+CAP_(.+?)\s+\d+$/);' include/linux/capability.h */
++static const char *names[] = {
++	"chown",
++	"dac_override",
++	"dac_read_search",
++	"fowner",
++	"fsetid",
++	"kill",
++	"setgid",
++	"setuid",
++	"setpcap",
++	"linux_immutable",
++	"net_bind_service",
++	"net_broadcast",
++	"net_admin",
++	"net_raw",
++	"ipc_lock",
++	"ipc_owner",
++	"sys_module",
++	"sys_rawio",
++	"sys_chroot",
++	"sys_ptrace",
++	"sys_pacct",
++	"sys_admin",
++	"sys_boot",
++	"sys_nice",
++	"sys_resource",
++	"sys_time",
++	"sys_tty_config",
++	"mknod",
++	"lease",
++	"audit_write",
++	"audit_control",
++	"setfcap",
++	"mac_override",
++	"mac_admin",
++	"syslog",
++	"wake_alarm",
++};
++
++static struct access_attr caps[ARRAY_SIZE(names)];
++
++static int accessfs_capable(struct task_struct *tsk, const struct cred *cred, struct user_namespace *ns, int cap, int audit)
++{
++	if (accessfs_permitted(&caps[cap], MAY_EXEC)) {
++		/* capability granted */
++		return 0;
++	}
++
++	/* capability denied */
++	return -EPERM;
++}
++
++static struct security_operations accessfs_security_ops = {
++	.name = "usercaps",
++	.capable =	accessfs_capable,
++};
++
++static void unregister_capabilities(struct accessfs_direntry *dir, int n)
++{
++	int	i;
++	for (i = 0; i < n; ++i)
++		accessfs_unregister(dir, names[i]);
++}
++
++static int __init init_capabilities(void)
++{
++	struct accessfs_direntry *dir;
++	int i, err;
++	dir = accessfs_make_dirpath("capabilities");
++	if (dir == 0)
++		return -ENOTDIR;
++
++	for (i = 0; i < ARRAY_SIZE(caps); ++i) {
++		caps[i].uid = 0;
++		caps[i].gid = 0;
++		caps[i].mode = S_IXUSR;
++		err = accessfs_register(dir, names[i], &caps[i]);
++		if (err) {
++			unregister_capabilities(dir, i);
++			return err;
++		}
++	}
++
++	if (!security_module_enable(&accessfs_security_ops))
++		return -EAGAIN;
++
++	err = register_security(&accessfs_security_ops);
++	if (err != 0)
++		unregister_capabilities(dir, ARRAY_SIZE(names));
++
++	return err;
++}
++
++security_initcall(init_capabilities);
++
++MODULE_AUTHOR("Olaf Dietsche");
++MODULE_DESCRIPTION("User based capabilities");
++MODULE_LICENSE("GPL v2");
+diff --git a/fs/accessfs/inode.c b/fs/accessfs/inode.c
+new file mode 100644
+index 0000000..a2247e2
+--- /dev/null
++++ b/fs/accessfs/inode.c
+@@ -0,0 +1,431 @@
++/* Copyright (c) 2001-2006 Olaf Dietsche
++ *
++ * Access permission filesystem for Linux.
++ *
++ * 2002 Ben Clifford, create mount point at /proc/access
++ * 2002 Ben Clifford, trying to make it work under 2.5.5-dj2
++ *          (see comments: BENC255 for reminders and todos)
++ *
++ *
++ * BENC255: the kernel doesn't lock BKL for us when entering methods
++ *          (see Documentation/fs/porting.txt)
++ *          Need to look at code here and see if we need either the BKL
++ *          or our own lock - I think probably not.
++ *
++ */
++
++#include <linux/accessfs_fs.h>
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/pagemap.h>
++#include <linux/init.h>
++#include <linux/semaphore.h>
++#include <linux/slab.h>
++#include <linux/string.h>
++#include <linux/proc_fs.h>
++#include <asm/statfs.h>
++#include <asm/uaccess.h>
++
++#define ACCESSFS_MAGIC	0x3c1d36e7
++
++static struct proc_dir_entry *mountdir = NULL;
++
++static DEFINE_MUTEX(accessfs_sem);
++
++static struct inode_operations accessfs_inode_operations;
++static struct file_operations accessfs_dir_file_operations;
++static struct inode_operations accessfs_dir_inode_operations;
++
++static inline void accessfs_readdir_aux(struct file *filp,
++					struct accessfs_direntry *dir,
++					int start, void *dirent,
++					filldir_t filldir)
++{
++	struct list_head *list;
++	int i = 2;
++	list_for_each(list, &dir->children) {
++		struct accessfs_entry *de;
++		if (i++ < start)
++			continue;
++
++		de = list_entry(list, struct accessfs_entry, siblings);
++		if (filldir(dirent, de->name, strlen(de->name), filp->f_pos,
++			    de->ino, DT_UNKNOWN) < 0)
++			break;
++
++		++filp->f_pos;
++	}
++}
++
++static int accessfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
++{
++	int i;
++	struct dentry *dentry = filp->f_dentry;
++	struct accessfs_direntry *dir;
++
++	i = filp->f_pos;
++	switch (i) {
++	case 0:
++		if (filldir(dirent, ".", 1, i, dentry->d_inode->i_ino,
++			    DT_DIR) < 0)
++			break;
++
++		++i;
++		++filp->f_pos;
++		/* NO break; */
++	case 1:
++		if (filldir(dirent, "..", 2, i,
++			    dentry->d_parent->d_inode->i_ino, DT_DIR) < 0)
++			break;
++
++		++i;
++		++filp->f_pos;
++		/* NO break; */
++	default:
++		mutex_lock(&accessfs_sem);
++		dir = dentry->d_inode->i_private;
++		accessfs_readdir_aux(filp, dir, i, dirent, filldir);
++		mutex_unlock(&accessfs_sem);
++		break;
++	}
++
++	return 0;
++}
++
++static struct accessfs_entry *accessfs_lookup_entry(struct accessfs_entry *pe,
++						    const char *name, int len)
++{
++	struct list_head *list;
++	struct accessfs_direntry *dir;
++	if (!S_ISDIR(pe->attr->mode))
++		return NULL;
++
++	dir = (struct accessfs_direntry *) pe;
++	list_for_each(list, &dir->children) {
++		struct accessfs_entry *de = list_entry(list, struct accessfs_entry, siblings);
++		if (strncmp(de->name, name, len) == 0 && de->name[len] == 0)
++			return de;
++	}
++
++	return NULL;
++}
++
++static struct accessfs_direntry	accessfs_rootdir = {
++	{ "/",
++	  LIST_HEAD_INIT(accessfs_rootdir.node.hash),
++	  LIST_HEAD_INIT(accessfs_rootdir.node.siblings),
++	  1, &accessfs_rootdir.attr },
++	NULL, LIST_HEAD_INIT(accessfs_rootdir.children),
++	{ 0, 0, S_IFDIR | 0755 }
++};
++
++static void accessfs_init_inode(struct inode *inode, struct accessfs_entry *pe)
++{
++	static const struct timespec epoch = {0, 0};
++	inode->i_private = pe;
++	inode->i_uid = pe->attr->uid;
++	inode->i_gid = pe->attr->gid;
++	inode->i_mode = pe->attr->mode;
++/*
++	inode->i_blksize = PAGE_CACHE_SIZE;
++	inode->i_blocks = 0;
++	inode->i_rdev = NODEV;
++*/
++	inode->i_atime = inode->i_mtime = inode->i_ctime = epoch;
++	switch (inode->i_mode & S_IFMT) {
++	case S_IFREG:
++		inode->i_op = &accessfs_inode_operations;
++		break;
++	case S_IFDIR:
++		inode->i_op = &accessfs_dir_inode_operations;
++		inode->i_fop = &accessfs_dir_file_operations;
++		break;
++	default:
++		BUG();
++		break;
++	}
++}
++
++static struct inode *accessfs_get_root_inode(struct super_block *sb)
++{
++	struct inode *inode = new_inode(sb);
++	if (inode) {
++		mutex_lock(&accessfs_sem);
++/* 		inode->i_ino = accessfs_rootdir.node.ino; */
++		accessfs_init_inode(inode, &accessfs_rootdir.node);
++		accessfs_rootdir.node.ino = inode->i_ino;
++		mutex_unlock(&accessfs_sem);
++	}
++
++	return inode;
++}
++
++static LIST_HEAD(hash);
++
++static int accessfs_node_init(struct accessfs_direntry *parent,
++			      struct accessfs_entry *de, const char *name,
++			      size_t len, struct access_attr *attr, mode_t mode)
++{
++	static unsigned long ino = 1;
++	de->name = kmalloc(len + 1, GFP_KERNEL);
++	if (de->name == NULL)
++		return -ENOMEM;
++
++	strncpy(de->name, name, len);
++	de->name[len] = 0;
++	de->ino = ++ino;
++	de->attr = attr;
++	de->attr->uid = 0;
++	de->attr->gid = 0;
++	de->attr->mode = mode;
++
++	list_add_tail(&de->hash, &hash);
++	list_add_tail(&de->siblings, &parent->children);
++	return 0;
++}
++
++static int accessfs_mknod(struct accessfs_direntry *dir, const char *name,
++			  struct access_attr *attr)
++{
++	struct accessfs_entry *pe;
++	pe = kmalloc(sizeof(struct accessfs_entry), GFP_KERNEL);
++	if (pe == NULL)
++		return -ENOMEM;
++
++	accessfs_node_init(dir, pe, name, strlen(name), attr,
++			   S_IFREG | attr->mode);
++	return 0;
++}
++
++static struct accessfs_direntry	*accessfs_mkdir(struct accessfs_direntry *parent,
++						const char *name, size_t len)
++{
++	int err;
++	struct accessfs_direntry *dir;
++	dir = kmalloc(sizeof(struct accessfs_direntry), GFP_KERNEL);
++	if (dir == NULL)
++		return NULL;
++
++	dir->parent = parent;
++	INIT_LIST_HEAD(&dir->children);
++	err = accessfs_node_init(parent, &dir->node, name, len, &dir->attr,
++				 S_IFDIR | 0755);
++	if (err) {
++		kfree(dir);
++		dir = 0;
++	}
++
++	return dir;
++}
++
++struct accessfs_direntry *accessfs_make_dirpath(const char *name)
++{
++	struct accessfs_direntry *dir = &accessfs_rootdir;
++	const char *slash;
++	mutex_lock(&accessfs_sem);
++	do {
++		struct accessfs_entry *de;
++		size_t len;
++		while (*name == '/')
++			++name;
++
++		slash = strchr(name, '/');
++		len = slash ? slash - name : strlen(name);
++		de = accessfs_lookup_entry(&dir->node, name, len);
++		if (de == NULL) {
++			dir = accessfs_mkdir(dir, name, len);
++		} else if (S_ISDIR(de->attr->mode)) {
++			dir = (struct accessfs_direntry *) de;
++		} else {
++			dir = NULL;
++		}
++
++		if (dir == NULL)
++			break;
++
++		name = slash  + 1;
++	} while (slash != NULL);
++
++	mutex_unlock(&accessfs_sem);
++	return dir;
++}
++
++static void accessfs_unlink(struct accessfs_entry *pe)
++{
++	list_del_init(&pe->hash);
++	list_del_init(&pe->siblings);
++	kfree(pe->name);
++	kfree(pe);
++}
++
++static int accessfs_notify_change(struct dentry *dentry, struct iattr *iattr)
++{
++	struct accessfs_entry *pe;
++	struct inode *i = dentry->d_inode;
++	int err;
++	err = inode_change_ok(i, iattr);
++	if (err)
++		return err;
++
++	setattr_copy(i, iattr);
++
++	pe = (struct accessfs_entry *) i->i_private;
++	pe->attr->uid = i->i_uid;
++	pe->attr->gid = i->i_gid;
++	pe->attr->mode = i->i_mode;
++	return 0;
++}
++
++static struct inode *accessfs_iget(struct super_block *sb, unsigned long ino)
++{
++	struct list_head *list;
++	struct inode *inode = iget_locked(sb, ino);
++	if (!inode)
++		return ERR_PTR(-ENOMEM);
++
++	if (!(inode->i_state & I_NEW))
++		return inode;
++
++	mutex_lock(&accessfs_sem);
++	list_for_each(list, &hash) {
++		struct accessfs_entry *pe;
++		pe = list_entry(list, struct accessfs_entry, hash);
++		if (pe->ino == ino) {
++			accessfs_init_inode(inode, pe);
++			break;
++		}
++	}
++
++	mutex_unlock(&accessfs_sem);
++	return inode;
++}
++
++static struct dentry *accessfs_lookup(struct inode *dir, struct dentry *dentry,
++				      struct nameidata *nd)
++{
++	struct inode *inode = NULL;
++	struct accessfs_entry *pe;
++	mutex_lock(&accessfs_sem);
++	pe = accessfs_lookup_entry(dir->i_private, dentry->d_name.name,
++				   dentry->d_name.len);
++	mutex_unlock(&accessfs_sem);
++	if (pe)
++		inode = accessfs_iget(dir->i_sb, pe->ino);
++
++	d_add(dentry, inode);
++	return NULL;
++}
++
++static struct inode_operations accessfs_inode_operations = {
++	.setattr =	accessfs_notify_change,
++};
++
++static struct inode_operations accessfs_dir_inode_operations = {
++	.lookup =	accessfs_lookup,
++	.setattr =	accessfs_notify_change,
++};
++
++static struct file_operations accessfs_dir_file_operations = {
++	.readdir =	accessfs_readdir,
++};
++
++static struct super_operations accessfs_ops = {
++	.statfs =	simple_statfs,
++};
++
++static int accessfs_fill_super(struct super_block *sb, void *data, int silent)
++{
++	struct inode *inode;
++	struct dentry *root;
++
++	sb->s_blocksize = PAGE_CACHE_SIZE;
++	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
++	sb->s_magic = ACCESSFS_MAGIC;
++	sb->s_op = &accessfs_ops;
++	inode = accessfs_get_root_inode(sb);
++	if (!inode)
++		return -ENOMEM;
++
++	root = d_alloc_root(inode);
++	if (!root) {
++		iput(inode);
++		return -ENOMEM;
++	}
++
++	sb->s_root = root;
++	return 0;
++}
++
++static struct dentry *accessfs_mount(struct file_system_type *fs_type,
++			   int flags, const char *dev_name, void *data)
++{
++	return mount_single(fs_type, flags, data, accessfs_fill_super);
++}
++
++int accessfs_permitted(struct access_attr *p, int mask)
++{
++	mode_t mode = p->mode;
++	if (current_fsuid() == p->uid)
++		mode >>= 6;
++	else if (in_group_p(p->gid))
++		mode >>= 3;
++
++	return (mode & mask) == mask;
++}
++
++int accessfs_register(struct accessfs_direntry *dir, const char *name,
++		      struct access_attr *attr)
++{
++	int err;
++	if (dir == 0)
++		return -EINVAL;
++
++	mutex_lock(&accessfs_sem);
++	err = accessfs_mknod(dir, name, attr);
++	mutex_unlock(&accessfs_sem);
++	return err;
++}
++
++void accessfs_unregister(struct accessfs_direntry *dir, const char *name)
++{
++	struct accessfs_entry *pe;
++	mutex_lock(&accessfs_sem);
++	pe = accessfs_lookup_entry(&dir->node, name, strlen(name));
++	if (pe)
++		accessfs_unlink(pe);
++
++	mutex_unlock(&accessfs_sem);
++}
++
++static struct file_system_type accessfs_fs_type = {
++	.owner =	THIS_MODULE,
++	.name =		"accessfs",
++	.mount =	accessfs_mount,
++	.kill_sb =	kill_anon_super,
++};
++
++static int __init init_accessfs_fs(void)
++{
++
++	/* create mount point for accessfs */
++	mountdir = proc_mkdir("access", NULL);
++	return register_filesystem(&accessfs_fs_type);
++}
++
++static void __exit exit_accessfs_fs(void)
++{
++	unregister_filesystem(&accessfs_fs_type);
++	remove_proc_entry("access", NULL);
++}
++
++module_init(init_accessfs_fs)
++module_exit(exit_accessfs_fs)
++
++MODULE_AUTHOR("Olaf Dietsche");
++MODULE_DESCRIPTION("Access Filesystem");
++MODULE_LICENSE("GPL v2");
++
++EXPORT_SYMBOL(accessfs_permitted);
++EXPORT_SYMBOL(accessfs_make_dirpath);
++EXPORT_SYMBOL(accessfs_register);
++EXPORT_SYMBOL(accessfs_unregister);
+diff --git a/fs/accessfs/ip.c b/fs/accessfs/ip.c
+new file mode 100644
+index 0000000..bddd2f0
+--- /dev/null
++++ b/fs/accessfs/ip.c
+@@ -0,0 +1,101 @@
++/* Copyright (c) 2002-2006 Olaf Dietsche
++ *
++ * User permission based port access for Linux.
++ */
++
++#include <linux/accessfs_fs.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <net/sock.h>
++
++static int max_prot_sock = CONFIG_ACCESSFS_PROT_SOCK;
++#ifndef CONFIG_ACCESSFS_IGNORE_NET_BIND_SERVICE
++#define CONFIG_ACCESSFS_IGNORE_NET_BIND_SERVICE 0
++#endif
++static int ignore_net_bind_service = CONFIG_ACCESSFS_IGNORE_NET_BIND_SERVICE;
++static struct access_attr *bind_to_port;
++
++static int accessfs_ip_prot_sock(struct socket *sock,
++				 struct sockaddr *uaddr, int addr_len)
++{
++	struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
++	unsigned short snum = ntohs(addr->sin_port);
++	if (snum && snum < max_prot_sock
++	    && !accessfs_permitted(&bind_to_port[snum], MAY_EXEC)
++	    && (ignore_net_bind_service || !capable(CAP_NET_BIND_SERVICE)))
++		return -EACCES;
++
++	return 0;
++}
++
++static int accessfs_ip6_prot_sock(struct socket *sock,
++				  struct sockaddr *uaddr, int addr_len)
++{
++	struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
++	unsigned short snum = ntohs(addr->sin6_port);
++	if (snum && snum < max_prot_sock
++	    && !accessfs_permitted(&bind_to_port[snum], MAY_EXEC)
++	    && !capable(CAP_NET_BIND_SERVICE))
++		return -EACCES;
++
++	return 0;
++}
++
++static struct net_hook_operations ip_net_ops = {
++	.ip_prot_sock =	accessfs_ip_prot_sock,
++	.ip6_prot_sock =	accessfs_ip6_prot_sock,
++};
++
++static int __init init_ip(void)
++{
++	struct accessfs_direntry *dir = accessfs_make_dirpath("net/ip/bind");
++	int i;
++
++	if (max_prot_sock < PROT_SOCK)
++		max_prot_sock = PROT_SOCK;
++	else if (max_prot_sock > 65536)
++		max_prot_sock = 65536;
++
++	bind_to_port = kmalloc(max_prot_sock * sizeof(*bind_to_port),
++			       GFP_KERNEL);
++	if (bind_to_port == 0)
++		return -ENOMEM;
++
++	for (i = 1; i < max_prot_sock; ++i) {
++		char	buf[sizeof("65536")];
++		bind_to_port[i].uid = 0;
++		bind_to_port[i].gid = 0;
++		bind_to_port[i].mode = i < PROT_SOCK ? S_IXUSR : S_IXUGO;
++		sprintf(buf, "%d", i);
++		accessfs_register(dir, buf, &bind_to_port[i]);
++	}
++
++	net_hooks_register(&ip_net_ops);
++	return 0;
++}
++
++static void __exit exit_ip(void)
++{
++	struct accessfs_direntry *dir = accessfs_make_dirpath("net/ip/bind");
++	int i;
++	net_hooks_unregister(&ip_net_ops);
++	for (i = 1; i < max_prot_sock; ++i) {
++		char	buf[sizeof("65536")];
++		sprintf(buf, "%d", i);
++		accessfs_unregister(dir, buf);
++	}
++
++	if (bind_to_port != NULL)
++		kfree(bind_to_port);
++}
++
++module_init(init_ip)
++module_exit(exit_ip)
++
++MODULE_AUTHOR("Olaf Dietsche");
++MODULE_DESCRIPTION("User based IP ports permission");
++MODULE_LICENSE("GPL v2");
++module_param(max_prot_sock, int, 0444);
++MODULE_PARM_DESC(max_prot_sock, "Number of protected ports");
++module_param(ignore_net_bind_service, bool, 0644);
++MODULE_PARM_DESC(ignore_net_bind_service, "Ignore CAP_NET_BIND_SERVICE capability");
+diff --git a/include/linux/accessfs_fs.h b/include/linux/accessfs_fs.h
+new file mode 100644
+index 0000000..ecd914e
+--- /dev/null
++++ b/include/linux/accessfs_fs.h
+@@ -0,0 +1,42 @@
++/* -*- mode: c -*- */
++#ifndef __accessfs_fs_h_included__
++#define __accessfs_fs_h_included__	1
++
++/* Copyright (c) 2001 Olaf Dietsche
++ *
++ * Access permission filesystem for Linux.
++ */
++
++#include <linux/in.h>
++#include <linux/in6.h>
++#include <linux/fs.h>
++#include <linux/init.h>
++#include <net/sock.h>
++
++struct access_attr {
++	uid_t	uid;
++	gid_t	gid;
++	mode_t	mode;
++};
++
++struct accessfs_entry {
++	char	*name;
++	struct list_head	hash;
++	struct list_head	siblings;
++	ino_t	ino;
++	struct access_attr	*attr;
++};
++
++struct accessfs_direntry {
++	struct accessfs_entry	node;
++	struct accessfs_direntry	*parent;
++	struct list_head	children;
++	struct access_attr	attr;
++};
++
++extern int accessfs_permitted(struct access_attr *p, int mask);
++extern struct accessfs_direntry *accessfs_make_dirpath(const char *name);
++extern int accessfs_register(struct accessfs_direntry *dir, const char *name, struct access_attr *attr);
++extern void accessfs_unregister(struct accessfs_direntry *dir, const char *name);
++
++#endif
+diff --git a/include/net/sock.h b/include/net/sock.h
+index 32e3937..5fa9348 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1860,4 +1860,47 @@ extern int sysctl_optmem_max;
+ extern __u32 sysctl_wmem_default;
+ extern __u32 sysctl_rmem_default;
+ 
++/* Networking hooks */
++extern int default_ip_prot_sock(struct socket *sock, struct sockaddr *uaddr,
++				int addr_len);
++extern int default_ip6_prot_sock(struct socket *sock, struct sockaddr *uaddr,
++				 int addr_len);
++#ifdef CONFIG_NET_HOOKS
++struct net_hook_operations {
++	int	(*ip_prot_sock)(struct socket *sock,
++				struct sockaddr *uaddr, int addr_len);
++	int	(*ip6_prot_sock)(struct socket *sock,
++				 struct sockaddr *uaddr, int addr_len);
++};
++
++extern struct net_hook_operations	*net_ops;
++
++extern void net_hooks_register(struct net_hook_operations *ops);
++extern void net_hooks_unregister(struct net_hook_operations *ops);
++
++static inline int ip_prot_sock(struct socket *sock, struct sockaddr *uaddr,
++			       int addr_len)
++{
++	return net_ops->ip_prot_sock(sock, uaddr, addr_len);
++}
++
++static inline int ip6_prot_sock(struct socket *sock, struct sockaddr *uaddr,
++				int addr_len)
++{
++	return net_ops->ip6_prot_sock(sock, uaddr, addr_len);
++}
++#else
++static inline int ip_prot_sock(struct socket *sock, struct sockaddr *uaddr,
++			       int addr_len)
++{
++	return default_ip_prot_sock(sock, uaddr, addr_len);
++}
++
++static inline int ip6_prot_sock(struct socket *sock, struct sockaddr *uaddr,
++				int addr_len)
++{
++	return default_ip6_prot_sock(sock, uaddr, addr_len);
++}
++#endif
++
+ #endif	/* _SOCK_H */
+diff --git a/net/Kconfig b/net/Kconfig
+index a073148..bb5fb42 100644
+--- a/net/Kconfig
++++ b/net/Kconfig
+@@ -75,6 +75,18 @@ config INET
+ if INET
+ source "net/ipv4/Kconfig"
+ source "net/ipv6/Kconfig"
++
++config NET_HOOKS
++	bool "IP: Networking hooks (Experimental)"
++	depends on INET && EXPERIMENTAL
++	default n
++	help
++	  This option enables other kernel parts or modules to hook into the
++	  networking area and provide fine grained control over the access to
++	  IP ports.
++
++	  If you're unsure, say N.
++
+ source "net/netlabel/Kconfig"
+ 
+ endif # if INET
+diff --git a/net/Makefile b/net/Makefile
+index acdde49..4e5dc79 100644
+--- a/net/Makefile
++++ b/net/Makefile
+@@ -61,6 +61,7 @@
+ obj-$(CONFIG_IEEE802154)	+= ieee802154/
+ obj-$(CONFIG_MAC802154)		+= mac802154/
+ 
++obj-$(CONFIG_NET)		+= hooks.o
+ ifeq ($(CONFIG_NET),y)
+ obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
+ endif
+diff --git a/net/hooks.c b/net/hooks.c
+new file mode 100644
+index 0000000..33100e6
+--- /dev/null
++++ b/net/hooks.c
+@@ -0,0 +1,55 @@
++/* Copyright (c) 2002 Olaf Dietsche
++ *
++ * Networking hooks. Currently for IPv4 and IPv6 only.
++ */
++
++#include <linux/module.h>
++#include <linux/in.h>
++#include <linux/in6.h>
++#include <net/sock.h>
++
++int default_ip_prot_sock(struct socket *sock, struct sockaddr *uaddr, int addr_len)
++{
++	struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
++	unsigned short snum = ntohs(addr->sin_port);
++	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
++		return -EACCES;
++
++	return 0;
++}
++
++int default_ip6_prot_sock(struct socket *sock, struct sockaddr *uaddr, int addr_len)
++{
++	struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
++	unsigned short snum = ntohs(addr->sin6_port);
++	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
++		return -EACCES;
++
++	return 0;
++}
++
++EXPORT_SYMBOL(default_ip_prot_sock);
++EXPORT_SYMBOL(default_ip6_prot_sock);
++
++#ifdef CONFIG_NET_HOOKS
++static struct net_hook_operations default_net_ops = {
++	.ip_prot_sock =	default_ip_prot_sock,
++	.ip6_prot_sock =	default_ip6_prot_sock,
++};
++
++struct net_hook_operations *net_ops = &default_net_ops;
++
++void net_hooks_register(struct net_hook_operations *ops)
++{
++	net_ops = ops;
++}
++
++void net_hooks_unregister(struct net_hook_operations *ops)
++{
++	net_ops = &default_net_ops;
++}
++
++EXPORT_SYMBOL(net_ops);
++EXPORT_SYMBOL(net_hooks_register);
++EXPORT_SYMBOL(net_hooks_unregister);
++#endif
+diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
+index 1b5096a..9460a3c 100644
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -495,7 +495,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ 
+ 	snum = ntohs(addr->sin_port);
+ 	err = -EACCES;
+-	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
++	if (ip_prot_sock(sock, uaddr, addr_len))
+ 		goto out;
+ 
+ 	/*      We keep a pair of addresses. rcv_saddr is the one
+diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
+index d27c797..154b1ec 100644
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -281,7 +281,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ 		return -EINVAL;
+ 
+ 	snum = ntohs(addr->sin6_port);
+-	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
++	if (ip6_prot_sock(sock, uaddr, addr_len))
+ 		return -EACCES;
+ 
+ 	lock_sock(sk);
+
diff --git a/3.3.8/ata-prefer-ata-drivers-over-ide-drivers-when-both-are-built.patch b/3.3.8/ata-prefer-ata-drivers-over-ide-drivers-when-both-are-built.patch
new file mode 100644
index 0000000..7af90e4
--- /dev/null
+++ b/3.3.8/ata-prefer-ata-drivers-over-ide-drivers-when-both-are-built.patch
@@ -0,0 +1,36 @@
+>From 9f04e51293b130474504216a477bb2a73cbf59e1 Mon Sep 17 00:00:00 2001
+From: Anssi Hannula <anssi.hannula@iki.fi>
+Date: Thu, 22 Mar 2012 22:29:11 +0200
+Subject: [PATCH] ata: prefer ata drivers over ide drivers when both are built
+
+Currently the old IDE drivers are preferred over ATA drivers when both
+are built, since ide/ is listed first in drivers/Makefile and therefore
+the IDE drivers end up before ATA drivers in modules.order which is used
+by depmod/modprobe for module ordering.
+
+Change it so that ATA drivers are preferred over IDE driver by moving
+the ide/ entry under ata/ in drivers/Makefile.
+
+Signed-off-by: Anssi Hannula <anssi.hannula@iki.fi>
+---
+ drivers/Makefile |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/Makefile b/drivers/Makefile
+index 932e8bf..e8df3d0 100644
+--- a/drivers/Makefile
++++ b/drivers/Makefile
+@@ -47,9 +47,9 @@ obj-$(CONFIG_PARPORT)		+= parport/
+ obj-y				+= base/ block/ misc/ mfd/ nfc/
+ obj-$(CONFIG_NUBUS)		+= nubus/
+ obj-y				+= macintosh/
+-obj-$(CONFIG_IDE)		+= ide/
+ obj-$(CONFIG_SCSI)		+= scsi/
+ obj-$(CONFIG_ATA)		+= ata/
++obj-$(CONFIG_IDE)		+= ide/
+ obj-$(CONFIG_TARGET_CORE)	+= target/
+ obj-$(CONFIG_MTD)		+= mtd/
+ obj-$(CONFIG_SPI)		+= spi/
+-- 
+1.7.9.3
+
diff --git a/3.3.8/aufs-3.x-rcN.patch b/3.3.8/aufs-3.x-rcN.patch
new file mode 100644
index 0000000..176df5a
--- /dev/null
+++ b/3.3.8/aufs-3.x-rcN.patch
@@ -0,0 +1,29364 @@
+aufs3.x-rcN kbuild patch
+
+diff --git a/fs/Kconfig b/fs/Kconfig
+index d621f02..9b9694c 100644
+--- a/fs/Kconfig
++++ b/fs/Kconfig
+@@ -215,6 +215,7 @@ source "fs/pstore/Kconfig"
+ source "fs/sysv/Kconfig"
+ source "fs/ufs/Kconfig"
+ source "fs/exofs/Kconfig"
++source "fs/aufs/Kconfig"
+ 
+ endif # MISC_FILESYSTEMS
+ 
+diff --git a/fs/Makefile b/fs/Makefile
+index 93804d4..cf3dcb9 100644
+--- a/fs/Makefile
++++ b/fs/Makefile
+@@ -124,3 +124,4 @@ obj-$(CONFIG_GFS2_FS)           += gfs2/
+ obj-y				+= exofs/ # Multiple modules
+ obj-$(CONFIG_CEPH_FS)		+= ceph/
+ obj-$(CONFIG_PSTORE)		+= pstore/
++obj-$(CONFIG_AUFS_FS)           += aufs/
+diff --git a/include/linux/Kbuild b/include/linux/Kbuild
+index c94e717..fccb9df 100644
+--- a/include/linux/Kbuild
++++ b/include/linux/Kbuild
+@@ -65,6 +65,7 @@ header-y += atmppp.h
+ header-y += atmsap.h
+ header-y += atmsvc.h
+ header-y += audit.h
++header-y += aufs_type.h
+ header-y += auto_fs.h
+ header-y += auto_fs4.h
+ header-y += auxvec.h
+aufs3.x-rcN base patch
+
+diff --git a/fs/namei.c b/fs/namei.c
+index e2ba628..fde8ea2 100644
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -1781,7 +1781,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
+  * needs parent already locked. Doesn't follow mounts.
+  * SMP-safe.
+  */
+-static struct dentry *lookup_hash(struct nameidata *nd)
++struct dentry *lookup_hash(struct nameidata *nd)
+ {
+ 	return __lookup_hash(&nd->last, nd->path.dentry, nd);
+ }
+diff --git a/fs/splice.c b/fs/splice.c
+index 1ec0493..c599f73 100644
+--- a/fs/splice.c
++++ b/fs/splice.c
+@@ -1084,8 +1084,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
+ /*
+  * Attempt to initiate a splice from pipe to file.
+  */
+-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
+-			   loff_t *ppos, size_t len, unsigned int flags)
++long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
++		    loff_t *ppos, size_t len, unsigned int flags)
+ {
+ 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
+ 				loff_t *, size_t, unsigned int);
+@@ -1112,9 +1112,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
+ /*
+  * Attempt to initiate a splice from a file to a pipe.
+  */
+-static long do_splice_to(struct file *in, loff_t *ppos,
+-			 struct pipe_inode_info *pipe, size_t len,
+-			 unsigned int flags)
++long do_splice_to(struct file *in, loff_t *ppos,
++		  struct pipe_inode_info *pipe, size_t len,
++		  unsigned int flags)
+ {
+ 	ssize_t (*splice_read)(struct file *, loff_t *,
+ 			       struct pipe_inode_info *, size_t, unsigned int);
+diff --git a/include/linux/namei.h b/include/linux/namei.h
+index ffc0213..ef35a31 100644
+--- a/include/linux/namei.h
++++ b/include/linux/namei.h
+@@ -85,6 +85,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
+ extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
+ 		int (*open)(struct inode *, struct file *));
+ 
++extern struct dentry *lookup_hash(struct nameidata *nd);
+ extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
+ 
+ extern int follow_down_one(struct path *);
+diff --git a/include/linux/splice.h b/include/linux/splice.h
+index 26e5b61..3ffef2f 100644
+--- a/include/linux/splice.h
++++ b/include/linux/splice.h
+@@ -91,4 +91,10 @@ extern void splice_shrink_spd(struct pipe_inode_info *,
+ extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
+ 
+ extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
++
++extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
++			   loff_t *ppos, size_t len, unsigned int flags);
++extern long do_splice_to(struct file *in, loff_t *ppos,
++			 struct pipe_inode_info *pipe, size_t len,
++			 unsigned int flags);
+ #endif
+aufs3.x-rcN standalone patch
+
+diff --git a/fs/file_table.c b/fs/file_table.c
+index 20002e3..6d792ad 100644
+--- a/fs/file_table.c
++++ b/fs/file_table.c
+@@ -443,6 +443,8 @@ void file_sb_list_del(struct file *file)
+ 	}
+ }
+ 
++EXPORT_SYMBOL(file_sb_list_del);
++
+ #ifdef CONFIG_SMP
+ 
+ /*
+diff --git a/fs/inode.c b/fs/inode.c
+index d3ebdbe..6db6251 100644
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -66,6 +66,7 @@ static struct hlist_head *inode_hashtable __read_mostly;
+ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
+ 
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
++EXPORT_SYMBOL(inode_sb_list_lock);
+ 
+ /*
+  * Empty aops. Can be used for the cases where the user does not
+diff --git a/fs/namei.c b/fs/namei.c
+index fde8ea2..62f2302 100644
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -1785,6 +1785,7 @@ struct dentry *lookup_hash(struct nameidata *nd)
+ {
+ 	return __lookup_hash(&nd->last, nd->path.dentry, nd);
+ }
++EXPORT_SYMBOL(lookup_hash);
+ 
+ /**
+  * lookup_one_len - filesystem helper to lookup single pathname component
+diff --git a/fs/namespace.c b/fs/namespace.c
+index e608199..38fcc2e 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1339,6 +1339,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
+ 	}
+ 	return 0;
+ }
++EXPORT_SYMBOL(iterate_mounts);
+ 
+ static void cleanup_group_ids(struct mount *mnt, struct mount *end)
+ {
+diff --git a/fs/notify/group.c b/fs/notify/group.c
+index 63fc294..6f4adca 100644
+--- a/fs/notify/group.c
++++ b/fs/notify/group.c
+@@ -22,6 +22,7 @@
+ #include <linux/srcu.h>
+ #include <linux/rculist.h>
+ #include <linux/wait.h>
++#include <linux/module.h>
+ 
+ #include <linux/fsnotify_backend.h>
+ #include "fsnotify.h"
+@@ -70,6 +71,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
+ 	if (atomic_dec_and_test(&group->refcnt))
+ 		fsnotify_destroy_group(group);
+ }
++EXPORT_SYMBOL(fsnotify_put_group);
+ 
+ /*
+  * Create a new fsnotify_group and hold a reference for the group returned.
+@@ -102,3 +104,4 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
+ 
+ 	return group;
+ }
++EXPORT_SYMBOL(fsnotify_alloc_group);
+diff --git a/fs/notify/mark.c b/fs/notify/mark.c
+index f104d56..54f36db 100644
+--- a/fs/notify/mark.c
++++ b/fs/notify/mark.c
+@@ -112,6 +112,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
+ 	if (atomic_dec_and_test(&mark->refcnt))
+ 		mark->free_mark(mark);
+ }
++EXPORT_SYMBOL(fsnotify_put_mark);
+ 
+ /*
+  * Any time a mark is getting freed we end up here.
+@@ -191,6 +192,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
+ 	if (unlikely(atomic_dec_and_test(&group->num_marks)))
+ 		fsnotify_final_destroy_group(group);
+ }
++EXPORT_SYMBOL(fsnotify_destroy_mark);
+ 
+ void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
+ {
+@@ -278,6 +280,7 @@ err:
+ 
+ 	return ret;
+ }
++EXPORT_SYMBOL(fsnotify_add_mark);
+ 
+ /*
+  * clear any marks in a group in which mark->flags & flags is true
+@@ -333,6 +336,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
+ 	atomic_set(&mark->refcnt, 1);
+ 	mark->free_mark = free_mark;
+ }
++EXPORT_SYMBOL(fsnotify_init_mark);
+ 
+ static int fsnotify_mark_destroy(void *ignored)
+ {
+diff --git a/fs/open.c b/fs/open.c
+index 77becc0..f634f02 100644
+--- a/fs/open.c
++++ b/fs/open.c
+@@ -60,6 +60,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
+ 	mutex_unlock(&dentry->d_inode->i_mutex);
+ 	return ret;
+ }
++EXPORT_SYMBOL(do_truncate);
+ 
+ static long do_sys_truncate(const char __user *pathname, loff_t length)
+ {
+diff --git a/fs/splice.c b/fs/splice.c
+index c599f73..00303ba 100644
+--- a/fs/splice.c
++++ b/fs/splice.c
+@@ -1108,6 +1108,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
+ 
+ 	return splice_write(pipe, out, ppos, len, flags);
+ }
++EXPORT_SYMBOL(do_splice_from);
+ 
+ /*
+  * Attempt to initiate a splice from a file to a pipe.
+@@ -1134,6 +1135,7 @@ long do_splice_to(struct file *in, loff_t *ppos,
+ 
+ 	return splice_read(in, ppos, pipe, len, flags);
+ }
++EXPORT_SYMBOL(do_splice_to);
+ 
+ /**
+  * splice_direct_to_actor - splices data directly between two non-pipes
+diff --git a/security/commoncap.c b/security/commoncap.c
+index 7ce191e..19a95be 100644
+--- a/security/commoncap.c
++++ b/security/commoncap.c
+@@ -965,3 +965,4 @@ int cap_file_mmap(struct file *file, unsigned long reqprot,
+ 	}
+ 	return ret;
+ }
++EXPORT_SYMBOL(cap_file_mmap);
+diff --git a/security/device_cgroup.c b/security/device_cgroup.c
+index 8b5b5d8..911850c 100644
+--- a/security/device_cgroup.c
++++ b/security/device_cgroup.c
+@@ -7,6 +7,7 @@
+ #include <linux/device_cgroup.h>
+ #include <linux/cgroup.h>
+ #include <linux/ctype.h>
++#include <linux/export.h>
+ #include <linux/list.h>
+ #include <linux/uaccess.h>
+ #include <linux/seq_file.h>
+@@ -501,6 +502,7 @@ found:
+ 
+ 	return -EPERM;
+ }
++EXPORT_SYMBOL(__devcgroup_inode_permission);
+ 
+ int devcgroup_inode_mknod(int mode, dev_t dev)
+ {
+diff --git a/security/security.c b/security/security.c
+index d754249..1aa6154 100644
+--- a/security/security.c
++++ b/security/security.c
+@@ -392,6 +392,7 @@ int security_path_rmdir(struct path *dir, struct dentry *dentry)
+ 		return 0;
+ 	return security_ops->path_rmdir(dir, dentry);
+ }
++EXPORT_SYMBOL(security_path_rmdir);
+ 
+ int security_path_unlink(struct path *dir, struct dentry *dentry)
+ {
+@@ -408,6 +409,7 @@ int security_path_symlink(struct path *dir, struct dentry *dentry,
+ 		return 0;
+ 	return security_ops->path_symlink(dir, dentry, old_name);
+ }
++EXPORT_SYMBOL(security_path_symlink);
+ 
+ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
+ 		       struct dentry *new_dentry)
+@@ -416,6 +418,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
+ 		return 0;
+ 	return security_ops->path_link(old_dentry, new_dir, new_dentry);
+ }
++EXPORT_SYMBOL(security_path_link);
+ 
+ int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
+ 			 struct path *new_dir, struct dentry *new_dentry)
+@@ -434,6 +437,7 @@ int security_path_truncate(struct path *path)
+ 		return 0;
+ 	return security_ops->path_truncate(path);
+ }
++EXPORT_SYMBOL(security_path_truncate);
+ 
+ int security_path_chmod(struct path *path, umode_t mode)
+ {
+@@ -441,6 +445,7 @@ int security_path_chmod(struct path *path, umode_t mode)
+ 		return 0;
+ 	return security_ops->path_chmod(path, mode);
+ }
++EXPORT_SYMBOL(security_path_chmod);
+ 
+ int security_path_chown(struct path *path, uid_t uid, gid_t gid)
+ {
+@@ -448,6 +453,7 @@ int security_path_chown(struct path *path, uid_t uid, gid_t gid)
+ 		return 0;
+ 	return security_ops->path_chown(path, uid, gid);
+ }
++EXPORT_SYMBOL(security_path_chown);
+ 
+ int security_path_chroot(struct path *path)
+ {
+@@ -524,6 +530,7 @@ int security_inode_readlink(struct dentry *dentry)
+ 		return 0;
+ 	return security_ops->inode_readlink(dentry);
+ }
++EXPORT_SYMBOL(security_inode_readlink);
+ 
+ int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd)
+ {
+@@ -538,6 +545,7 @@ int security_inode_permission(struct inode *inode, int mask)
+ 		return 0;
+ 	return security_ops->inode_permission(inode, mask);
+ }
++EXPORT_SYMBOL(security_inode_permission);
+ 
+ int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
+ {
+@@ -653,6 +661,7 @@ int security_file_permission(struct file *file, int mask)
+ 
+ 	return fsnotify_perm(file, mask);
+ }
++EXPORT_SYMBOL(security_file_permission);
+ 
+ int security_file_alloc(struct file *file)
+ {
+@@ -680,6 +689,7 @@ int security_file_mmap(struct file *file, unsigned long reqprot,
+ 		return ret;
+ 	return ima_file_mmap(file, prot);
+ }
++EXPORT_SYMBOL(security_file_mmap);
+ 
+ int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
+ 			    unsigned long prot)
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/Documentation/ABI/testing/debugfs-aufs	2011-10-25 09:52:26.000000000 +0200
+@@ -0,0 +1,37 @@
++What:		/debug/aufs/si_<id>/
++Date:		March 2009
++Contact:	J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++		Under /debug/aufs, a directory named si_<id> is created
++		per aufs mount, where <id> is a unique id generated
++		internally.
++
++What:		/debug/aufs/si_<id>/xib
++Date:		March 2009
++Contact:	J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++		It shows the consumed blocks by xib (External Inode Number
++		Bitmap), its block size and file size.
++		When the aufs mount option 'noxino' is specified, it
++		will be empty. About XINO files, see the aufs manual.
++
++What:		/debug/aufs/si_<id>/xino0, xino1 ... xinoN
++Date:		March 2009
++Contact:	J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++		It shows the consumed blocks by xino (External Inode Number
++		Translation Table), its link count, block size and file
++		size.
++		When the aufs mount option 'noxino' is specified, it
++		will be empty. About XINO files, see the aufs manual.
++
++What:		/debug/aufs/si_<id>/xigen
++Date:		March 2009
++Contact:	J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++		It shows the consumed blocks by xigen (External Inode
++		Generation Table), its block size and file size.
++		If CONFIG_AUFS_EXPORT is disabled, this entry will not
++		be created.
++		When the aufs mount option 'noxino' is specified, it
++		will be empty. About XINO files, see the aufs manual.
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/Documentation/ABI/testing/sysfs-aufs	2011-10-25 09:52:26.000000000 +0200
+@@ -0,0 +1,24 @@
++What:		/sys/fs/aufs/si_<id>/
++Date:		March 2009
++Contact:	J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++		Under /sys/fs/aufs, a directory named si_<id> is created
++		per aufs mount, where <id> is a unique id generated
++		internally.
++
++What:		/sys/fs/aufs/si_<id>/br0, br1 ... brN
++Date:		March 2009
++Contact:	J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++		It shows the abolute path of a member directory (which
++		is called branch) in aufs, and its permission.
++
++What:		/sys/fs/aufs/si_<id>/xi_path
++Date:		March 2009
++Contact:	J. R. Okajima <hooanon05@yahoo.co.jp>
++Description:
++		It shows the abolute path of XINO (External Inode Number
++		Bitmap, Translation Table and Generation Table) file
++		even if it is the default path.
++		When the aufs mount option 'noxino' is specified, it
++		will be empty. About XINO files, see the aufs manual.
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/aufs.h	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,60 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * all header files
++ */
++
++#ifndef __AUFS_H__
++#define __AUFS_H__
++
++#ifdef __KERNEL__
++
++#define AuStub(type, name, body, ...) \
++	static inline type name(__VA_ARGS__) { body; }
++
++#define AuStubVoid(name, ...) \
++	AuStub(void, name, , __VA_ARGS__)
++#define AuStubInt0(name, ...) \
++	AuStub(int, name, return 0, __VA_ARGS__)
++
++#include "debug.h"
++
++#include "branch.h"
++#include "cpup.h"
++#include "dcsub.h"
++#include "dbgaufs.h"
++#include "dentry.h"
++#include "dir.h"
++#include "dynop.h"
++#include "file.h"
++#include "fstype.h"
++#include "inode.h"
++#include "loop.h"
++#include "module.h"
++#include "opts.h"
++#include "rwsem.h"
++#include "spl.h"
++#include "super.h"
++#include "sysaufs.h"
++#include "vfsub.h"
++#include "whout.h"
++#include "wkq.h"
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/branch.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,1169 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * branch management
++ */
++
++#include <linux/compat.h>
++#include <linux/statfs.h>
++#include "aufs.h"
++
++/*
++ * free a single branch
++ */
++static void au_br_do_free(struct au_branch *br)
++{
++	int i;
++	struct au_wbr *wbr;
++	struct au_dykey **key;
++
++	au_hnotify_fin_br(br);
++
++	if (br->br_xino.xi_file)
++		fput(br->br_xino.xi_file);
++	mutex_destroy(&br->br_xino.xi_nondir_mtx);
++
++	AuDebugOn(atomic_read(&br->br_count));
++
++	wbr = br->br_wbr;
++	if (wbr) {
++		for (i = 0; i < AuBrWh_Last; i++)
++			dput(wbr->wbr_wh[i]);
++		AuDebugOn(atomic_read(&wbr->wbr_wh_running));
++		AuRwDestroy(&wbr->wbr_wh_rwsem);
++	}
++
++	key = br->br_dykey;
++	for (i = 0; i < AuBrDynOp; i++, key++)
++		if (*key)
++			au_dy_put(*key);
++		else
++			break;
++
++	mntput(br->br_mnt);
++	kfree(wbr);
++	kfree(br);
++}
++
++/*
++ * frees all branches
++ */
++void au_br_free(struct au_sbinfo *sbinfo)
++{
++	aufs_bindex_t bmax;
++	struct au_branch **br;
++
++	AuRwMustWriteLock(&sbinfo->si_rwsem);
++
++	bmax = sbinfo->si_bend + 1;
++	br = sbinfo->si_branch;
++	while (bmax--)
++		au_br_do_free(*br++);
++}
++
++/*
++ * find the index of a branch which is specified by @br_id.
++ */
++int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
++{
++	aufs_bindex_t bindex, bend;
++
++	bend = au_sbend(sb);
++	for (bindex = 0; bindex <= bend; bindex++)
++		if (au_sbr_id(sb, bindex) == br_id)
++			return bindex;
++	return -1;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * add a branch
++ */
++
++static int test_overlap(struct super_block *sb, struct dentry *h_adding,
++			struct dentry *h_root)
++{
++	if (unlikely(h_adding == h_root
++		     || au_test_loopback_overlap(sb, h_adding)))
++		return 1;
++	if (h_adding->d_sb != h_root->d_sb)
++		return 0;
++	return au_test_subdir(h_adding, h_root)
++		|| au_test_subdir(h_root, h_adding);
++}
++
++/*
++ * returns a newly allocated branch. @new_nbranch is a number of branches
++ * after adding a branch.
++ */
++static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
++				     int perm)
++{
++	struct au_branch *add_branch;
++	struct dentry *root;
++	int err;
++
++	err = -ENOMEM;
++	root = sb->s_root;
++	add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS);
++	if (unlikely(!add_branch))
++		goto out;
++
++	err = au_hnotify_init_br(add_branch, perm);
++	if (unlikely(err))
++		goto out_br;
++
++	add_branch->br_wbr = NULL;
++	if (au_br_writable(perm)) {
++		/* may be freed separately at changing the branch permission */
++		add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr),
++					     GFP_NOFS);
++		if (unlikely(!add_branch->br_wbr))
++			goto out_hnotify;
++	}
++
++	err = au_sbr_realloc(au_sbi(sb), new_nbranch);
++	if (!err)
++		err = au_di_realloc(au_di(root), new_nbranch);
++	if (!err)
++		err = au_ii_realloc(au_ii(root->d_inode), new_nbranch);
++	if (!err)
++		return add_branch; /* success */
++
++	kfree(add_branch->br_wbr);
++
++out_hnotify:
++	au_hnotify_fin_br(add_branch);
++out_br:
++	kfree(add_branch);
++out:
++	return ERR_PTR(err);
++}
++
++/*
++ * test if the branch permission is legal or not.
++ */
++static int test_br(struct inode *inode, int brperm, char *path)
++{
++	int err;
++
++	err = (au_br_writable(brperm) && IS_RDONLY(inode));
++	if (!err)
++		goto out;
++
++	err = -EINVAL;
++	pr_err("write permission for readonly mount or inode, %s\n", path);
++
++out:
++	return err;
++}
++
++/*
++ * returns:
++ * 0: success, the caller will add it
++ * plus: success, it is already unified, the caller should ignore it
++ * minus: error
++ */
++static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
++{
++	int err;
++	aufs_bindex_t bend, bindex;
++	struct dentry *root;
++	struct inode *inode, *h_inode;
++
++	root = sb->s_root;
++	bend = au_sbend(sb);
++	if (unlikely(bend >= 0
++		     && au_find_dbindex(root, add->path.dentry) >= 0)) {
++		err = 1;
++		if (!remount) {
++			err = -EINVAL;
++			pr_err("%s duplicated\n", add->pathname);
++		}
++		goto out;
++	}
++
++	err = -ENOSPC; /* -E2BIG; */
++	if (unlikely(AUFS_BRANCH_MAX <= add->bindex
++		     || AUFS_BRANCH_MAX - 1 <= bend)) {
++		pr_err("number of branches exceeded %s\n", add->pathname);
++		goto out;
++	}
++
++	err = -EDOM;
++	if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
++		pr_err("bad index %d\n", add->bindex);
++		goto out;
++	}
++
++	inode = add->path.dentry->d_inode;
++	err = -ENOENT;
++	if (unlikely(!inode->i_nlink)) {
++		pr_err("no existence %s\n", add->pathname);
++		goto out;
++	}
++
++	err = -EINVAL;
++	if (unlikely(inode->i_sb == sb)) {
++		pr_err("%s must be outside\n", add->pathname);
++		goto out;
++	}
++
++	if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
++		pr_err("unsupported filesystem, %s (%s)\n",
++		       add->pathname, au_sbtype(inode->i_sb));
++		goto out;
++	}
++
++	err = test_br(add->path.dentry->d_inode, add->perm, add->pathname);
++	if (unlikely(err))
++		goto out;
++
++	if (bend < 0)
++		return 0; /* success */
++
++	err = -EINVAL;
++	for (bindex = 0; bindex <= bend; bindex++)
++		if (unlikely(test_overlap(sb, add->path.dentry,
++					  au_h_dptr(root, bindex)))) {
++			pr_err("%s is overlapped\n", add->pathname);
++			goto out;
++		}
++
++	err = 0;
++	if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
++		h_inode = au_h_dptr(root, 0)->d_inode;
++		if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
++		    || h_inode->i_uid != inode->i_uid
++		    || h_inode->i_gid != inode->i_gid)
++			pr_warning("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
++				   add->pathname,
++				   inode->i_uid, inode->i_gid,
++				   (inode->i_mode & S_IALLUGO),
++				   h_inode->i_uid, h_inode->i_gid,
++				   (h_inode->i_mode & S_IALLUGO));
++	}
++
++out:
++	return err;
++}
++
++/*
++ * initialize or clean the whiteouts for an adding branch
++ */
++static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
++			 int new_perm, struct dentry *h_root)
++{
++	int err, old_perm;
++	aufs_bindex_t bindex;
++	struct mutex *h_mtx;
++	struct au_wbr *wbr;
++	struct au_hinode *hdir;
++
++	wbr = br->br_wbr;
++	old_perm = br->br_perm;
++	br->br_perm = new_perm;
++	hdir = NULL;
++	h_mtx = NULL;
++	bindex = au_br_index(sb, br->br_id);
++	if (0 <= bindex) {
++		hdir = au_hi(sb->s_root->d_inode, bindex);
++		au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
++	} else {
++		h_mtx = &h_root->d_inode->i_mutex;
++		mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
++	}
++	if (!wbr)
++		err = au_wh_init(h_root, br, sb);
++	else {
++		wbr_wh_write_lock(wbr);
++		err = au_wh_init(h_root, br, sb);
++		wbr_wh_write_unlock(wbr);
++	}
++	if (hdir)
++		au_hn_imtx_unlock(hdir);
++	else
++		mutex_unlock(h_mtx);
++	br->br_perm = old_perm;
++
++	if (!err && wbr && !au_br_writable(new_perm)) {
++		kfree(wbr);
++		br->br_wbr = NULL;
++	}
++
++	return err;
++}
++
++static int au_wbr_init(struct au_branch *br, struct super_block *sb,
++		       int perm, struct path *path)
++{
++	int err;
++	struct kstatfs kst;
++	struct au_wbr *wbr;
++	struct dentry *h_dentry;
++
++	wbr = br->br_wbr;
++	au_rw_init(&wbr->wbr_wh_rwsem);
++	memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh));
++	atomic_set(&wbr->wbr_wh_running, 0);
++	wbr->wbr_bytes = 0;
++
++	/*
++	 * a limit for rmdir/rename a dir
++	 * cf. AUFS_MAX_NAMELEN in include/linux/aufs_type.h
++	 */
++	err = vfs_statfs(path, &kst);
++	if (unlikely(err))
++		goto out;
++	err = -EINVAL;
++	h_dentry = path->dentry;
++	if (kst.f_namelen >= NAME_MAX)
++		err = au_br_init_wh(sb, br, perm, h_dentry);
++	else
++		pr_err("%.*s(%s), unsupported namelen %ld\n",
++		       AuDLNPair(h_dentry), au_sbtype(h_dentry->d_sb),
++		       kst.f_namelen);
++
++out:
++	return err;
++}
++
++/* intialize a new branch */
++static int au_br_init(struct au_branch *br, struct super_block *sb,
++		      struct au_opt_add *add)
++{
++	int err;
++
++	err = 0;
++	memset(&br->br_xino, 0, sizeof(br->br_xino));
++	mutex_init(&br->br_xino.xi_nondir_mtx);
++	br->br_perm = add->perm;
++	br->br_mnt = add->path.mnt; /* set first, mntget() later */
++	spin_lock_init(&br->br_dykey_lock);
++	memset(br->br_dykey, 0, sizeof(br->br_dykey));
++	atomic_set(&br->br_count, 0);
++	br->br_xino_upper = AUFS_XINO_TRUNC_INIT;
++	atomic_set(&br->br_xino_running, 0);
++	br->br_id = au_new_br_id(sb);
++	AuDebugOn(br->br_id < 0);
++
++	if (au_br_writable(add->perm)) {
++		err = au_wbr_init(br, sb, add->perm, &add->path);
++		if (unlikely(err))
++			goto out_err;
++	}
++
++	if (au_opt_test(au_mntflags(sb), XINO)) {
++		err = au_xino_br(sb, br, add->path.dentry->d_inode->i_ino,
++				 au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
++		if (unlikely(err)) {
++			AuDebugOn(br->br_xino.xi_file);
++			goto out_err;
++		}
++	}
++
++	sysaufs_br_init(br);
++	mntget(add->path.mnt);
++	goto out; /* success */
++
++out_err:
++	br->br_mnt = NULL;
++out:
++	return err;
++}
++
++static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
++			     struct au_branch *br, aufs_bindex_t bend,
++			     aufs_bindex_t amount)
++{
++	struct au_branch **brp;
++
++	AuRwMustWriteLock(&sbinfo->si_rwsem);
++
++	brp = sbinfo->si_branch + bindex;
++	memmove(brp + 1, brp, sizeof(*brp) * amount);
++	*brp = br;
++	sbinfo->si_bend++;
++	if (unlikely(bend < 0))
++		sbinfo->si_bend = 0;
++}
++
++static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
++			     aufs_bindex_t bend, aufs_bindex_t amount)
++{
++	struct au_hdentry *hdp;
++
++	AuRwMustWriteLock(&dinfo->di_rwsem);
++
++	hdp = dinfo->di_hdentry + bindex;
++	memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
++	au_h_dentry_init(hdp);
++	dinfo->di_bend++;
++	if (unlikely(bend < 0))
++		dinfo->di_bstart = 0;
++}
++
++static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
++			     aufs_bindex_t bend, aufs_bindex_t amount)
++{
++	struct au_hinode *hip;
++
++	AuRwMustWriteLock(&iinfo->ii_rwsem);
++
++	hip = iinfo->ii_hinode + bindex;
++	memmove(hip + 1, hip, sizeof(*hip) * amount);
++	hip->hi_inode = NULL;
++	au_hn_init(hip);
++	iinfo->ii_bend++;
++	if (unlikely(bend < 0))
++		iinfo->ii_bstart = 0;
++}
++
++static void au_br_do_add(struct super_block *sb, struct dentry *h_dentry,
++			 struct au_branch *br, aufs_bindex_t bindex)
++{
++	struct dentry *root;
++	struct inode *root_inode;
++	aufs_bindex_t bend, amount;
++
++	root = sb->s_root;
++	root_inode = root->d_inode;
++	bend = au_sbend(sb);
++	amount = bend + 1 - bindex;
++	au_sbilist_lock();
++	au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
++	au_br_do_add_hdp(au_di(root), bindex, bend, amount);
++	au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
++	au_set_h_dptr(root, bindex, dget(h_dentry));
++	au_set_h_iptr(root_inode, bindex, au_igrab(h_dentry->d_inode),
++		      /*flags*/0);
++	au_sbilist_unlock();
++}
++
++int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
++{
++	int err;
++	aufs_bindex_t bend, add_bindex;
++	struct dentry *root, *h_dentry;
++	struct inode *root_inode;
++	struct au_branch *add_branch;
++
++	root = sb->s_root;
++	root_inode = root->d_inode;
++	IMustLock(root_inode);
++	err = test_add(sb, add, remount);
++	if (unlikely(err < 0))
++		goto out;
++	if (err) {
++		err = 0;
++		goto out; /* success */
++	}
++
++	bend = au_sbend(sb);
++	add_branch = au_br_alloc(sb, bend + 2, add->perm);
++	err = PTR_ERR(add_branch);
++	if (IS_ERR(add_branch))
++		goto out;
++
++	err = au_br_init(add_branch, sb, add);
++	if (unlikely(err)) {
++		au_br_do_free(add_branch);
++		goto out;
++	}
++
++	add_bindex = add->bindex;
++	h_dentry = add->path.dentry;
++	if (!remount)
++		au_br_do_add(sb, h_dentry, add_branch, add_bindex);
++	else {
++		sysaufs_brs_del(sb, add_bindex);
++		au_br_do_add(sb, h_dentry, add_branch, add_bindex);
++		sysaufs_brs_add(sb, add_bindex);
++	}
++
++	if (!add_bindex) {
++		au_cpup_attr_all(root_inode, /*force*/1);
++		sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
++	} else
++		au_add_nlink(root_inode, h_dentry->d_inode);
++
++	/*
++	 * this test/set prevents aufs from handling unnecesary notify events
++	 * of xino files, in case of re-adding a writable branch which was
++	 * once detached from aufs.
++	 */
++	if (au_xino_brid(sb) < 0
++	    && au_br_writable(add_branch->br_perm)
++	    && !au_test_fs_bad_xino(h_dentry->d_sb)
++	    && add_branch->br_xino.xi_file
++	    && add_branch->br_xino.xi_file->f_dentry->d_parent == h_dentry)
++		au_xino_brid_set(sb, add_branch->br_id);
++
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * delete a branch
++ */
++
++/* to show the line number, do not make it inlined function */
++#define AuVerbose(do_info, fmt, ...) do { \
++	if (do_info) \
++		pr_info(fmt, ##__VA_ARGS__); \
++} while (0)
++
++static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart,
++			 aufs_bindex_t bend)
++{
++	return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend;
++}
++
++static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart,
++			 aufs_bindex_t bend)
++{
++	return au_test_ibusy(dentry->d_inode, bstart, bend);
++}
++
++/*
++ * test if the branch is deletable or not.
++ */
++static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
++			    unsigned int sigen, const unsigned int verbose)
++{
++	int err, i, j, ndentry;
++	aufs_bindex_t bstart, bend;
++	struct au_dcsub_pages dpages;
++	struct au_dpage *dpage;
++	struct dentry *d;
++
++	err = au_dpages_init(&dpages, GFP_NOFS);
++	if (unlikely(err))
++		goto out;
++	err = au_dcsub_pages(&dpages, root, NULL, NULL);
++	if (unlikely(err))
++		goto out_dpages;
++
++	for (i = 0; !err && i < dpages.ndpage; i++) {
++		dpage = dpages.dpages + i;
++		ndentry = dpage->ndentry;
++		for (j = 0; !err && j < ndentry; j++) {
++			d = dpage->dentries[j];
++			AuDebugOn(!d->d_count);
++			if (!au_digen_test(d, sigen)) {
++				di_read_lock_child(d, AuLock_IR);
++				if (unlikely(au_dbrange_test(d))) {
++					di_read_unlock(d, AuLock_IR);
++					continue;
++				}
++			} else {
++				di_write_lock_child(d);
++				if (unlikely(au_dbrange_test(d))) {
++					di_write_unlock(d);
++					continue;
++				}
++				err = au_reval_dpath(d, sigen);
++				if (!err)
++					di_downgrade_lock(d, AuLock_IR);
++				else {
++					di_write_unlock(d);
++					break;
++				}
++			}
++
++			/* AuDbgDentry(d); */
++			bstart = au_dbstart(d);
++			bend = au_dbend(d);
++			if (bstart <= bindex
++			    && bindex <= bend
++			    && au_h_dptr(d, bindex)
++			    && au_test_dbusy(d, bstart, bend)) {
++				err = -EBUSY;
++				AuVerbose(verbose, "busy %.*s\n", AuDLNPair(d));
++				AuDbgDentry(d);
++			}
++			di_read_unlock(d, AuLock_IR);
++		}
++	}
++
++out_dpages:
++	au_dpages_free(&dpages);
++out:
++	return err;
++}
++
++static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
++			   unsigned int sigen, const unsigned int verbose)
++{
++	int err;
++	unsigned long long max, ull;
++	struct inode *i, **array;
++	aufs_bindex_t bstart, bend;
++
++	array = au_iarray_alloc(sb, &max);
++	err = PTR_ERR(array);
++	if (IS_ERR(array))
++		goto out;
++
++	err = 0;
++	AuDbg("b%d\n", bindex);
++	for (ull = 0; !err && ull < max; ull++) {
++		i = array[ull];
++		if (i->i_ino == AUFS_ROOT_INO)
++			continue;
++
++		/* AuDbgInode(i); */
++		if (au_iigen(i) == sigen)
++			ii_read_lock_child(i);
++		else {
++			ii_write_lock_child(i);
++			err = au_refresh_hinode_self(i);
++			au_iigen_dec(i);
++			if (!err)
++				ii_downgrade_lock(i);
++			else {
++				ii_write_unlock(i);
++				break;
++			}
++		}
++
++		bstart = au_ibstart(i);
++		bend = au_ibend(i);
++		if (bstart <= bindex
++		    && bindex <= bend
++		    && au_h_iptr(i, bindex)
++		    && au_test_ibusy(i, bstart, bend)) {
++			err = -EBUSY;
++			AuVerbose(verbose, "busy i%lu\n", i->i_ino);
++			AuDbgInode(i);
++		}
++		ii_read_unlock(i);
++	}
++	au_iarray_free(array, max);
++
++out:
++	return err;
++}
++
++static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
++			      const unsigned int verbose)
++{
++	int err;
++	unsigned int sigen;
++
++	sigen = au_sigen(root->d_sb);
++	DiMustNoWaiters(root);
++	IiMustNoWaiters(root->d_inode);
++	di_write_unlock(root);
++	err = test_dentry_busy(root, bindex, sigen, verbose);
++	if (!err)
++		err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
++	di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
++
++	return err;
++}
++
++static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
++			     const aufs_bindex_t bindex,
++			     const aufs_bindex_t bend)
++{
++	struct au_branch **brp, **p;
++
++	AuRwMustWriteLock(&sbinfo->si_rwsem);
++
++	brp = sbinfo->si_branch + bindex;
++	if (bindex < bend)
++		memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
++	sbinfo->si_branch[0 + bend] = NULL;
++	sbinfo->si_bend--;
++
++	p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST);
++	if (p)
++		sbinfo->si_branch = p;
++	/* harmless error */
++}
++
++static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
++			     const aufs_bindex_t bend)
++{
++	struct au_hdentry *hdp, *p;
++
++	AuRwMustWriteLock(&dinfo->di_rwsem);
++
++	hdp = dinfo->di_hdentry;
++	if (bindex < bend)
++		memmove(hdp + bindex, hdp + bindex + 1,
++			sizeof(*hdp) * (bend - bindex));
++	hdp[0 + bend].hd_dentry = NULL;
++	dinfo->di_bend--;
++
++	p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST);
++	if (p)
++		dinfo->di_hdentry = p;
++	/* harmless error */
++}
++
++static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
++			     const aufs_bindex_t bend)
++{
++	struct au_hinode *hip, *p;
++
++	AuRwMustWriteLock(&iinfo->ii_rwsem);
++
++	hip = iinfo->ii_hinode + bindex;
++	if (bindex < bend)
++		memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
++	iinfo->ii_hinode[0 + bend].hi_inode = NULL;
++	au_hn_init(iinfo->ii_hinode + bend);
++	iinfo->ii_bend--;
++
++	p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST);
++	if (p)
++		iinfo->ii_hinode = p;
++	/* harmless error */
++}
++
++static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
++			 struct au_branch *br)
++{
++	aufs_bindex_t bend;
++	struct au_sbinfo *sbinfo;
++	struct dentry *root, *h_root;
++	struct inode *inode, *h_inode;
++	struct au_hinode *hinode;
++
++	SiMustWriteLock(sb);
++
++	root = sb->s_root;
++	inode = root->d_inode;
++	sbinfo = au_sbi(sb);
++	bend = sbinfo->si_bend;
++
++	h_root = au_h_dptr(root, bindex);
++	hinode = au_hi(inode, bindex);
++	h_inode = au_igrab(hinode->hi_inode);
++	au_hiput(hinode);
++
++	au_sbilist_lock();
++	au_br_do_del_brp(sbinfo, bindex, bend);
++	au_br_do_del_hdp(au_di(root), bindex, bend);
++	au_br_do_del_hip(au_ii(inode), bindex, bend);
++	au_sbilist_unlock();
++
++	dput(h_root);
++	iput(h_inode);
++	au_br_do_free(br);
++}
++
++int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
++{
++	int err, rerr, i;
++	unsigned int mnt_flags;
++	aufs_bindex_t bindex, bend, br_id;
++	unsigned char do_wh, verbose;
++	struct au_branch *br;
++	struct au_wbr *wbr;
++
++	err = 0;
++	bindex = au_find_dbindex(sb->s_root, del->h_path.dentry);
++	if (bindex < 0) {
++		if (remount)
++			goto out; /* success */
++		err = -ENOENT;
++		pr_err("%s no such branch\n", del->pathname);
++		goto out;
++	}
++	AuDbg("bindex b%d\n", bindex);
++
++	err = -EBUSY;
++	mnt_flags = au_mntflags(sb);
++	verbose = !!au_opt_test(mnt_flags, VERBOSE);
++	bend = au_sbend(sb);
++	if (unlikely(!bend)) {
++		AuVerbose(verbose, "no more branches left\n");
++		goto out;
++	}
++	br = au_sbr(sb, bindex);
++	i = atomic_read(&br->br_count);
++	if (unlikely(i)) {
++		AuVerbose(verbose, "%d file(s) opened\n", i);
++		goto out;
++	}
++
++	wbr = br->br_wbr;
++	do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
++	if (do_wh) {
++		/* instead of WbrWhMustWriteLock(wbr) */
++		SiMustWriteLock(sb);
++		for (i = 0; i < AuBrWh_Last; i++) {
++			dput(wbr->wbr_wh[i]);
++			wbr->wbr_wh[i] = NULL;
++		}
++	}
++
++	err = test_children_busy(sb->s_root, bindex, verbose);
++	if (unlikely(err)) {
++		if (do_wh)
++			goto out_wh;
++		goto out;
++	}
++
++	err = 0;
++	br_id = br->br_id;
++	if (!remount)
++		au_br_do_del(sb, bindex, br);
++	else {
++		sysaufs_brs_del(sb, bindex);
++		au_br_do_del(sb, bindex, br);
++		sysaufs_brs_add(sb, bindex);
++	}
++
++	if (!bindex) {
++		au_cpup_attr_all(sb->s_root->d_inode, /*force*/1);
++		sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
++	} else
++		au_sub_nlink(sb->s_root->d_inode, del->h_path.dentry->d_inode);
++	if (au_opt_test(mnt_flags, PLINK))
++		au_plink_half_refresh(sb, br_id);
++
++	if (au_xino_brid(sb) == br_id)
++		au_xino_brid_set(sb, -1);
++	goto out; /* success */
++
++out_wh:
++	/* revert */
++	rerr = au_br_init_wh(sb, br, br->br_perm, del->h_path.dentry);
++	if (rerr)
++		pr_warning("failed re-creating base whiteout, %s. (%d)\n",
++			   del->pathname, rerr);
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
++{
++	int err;
++	aufs_bindex_t bstart, bend;
++	struct aufs_ibusy ibusy;
++	struct inode *inode, *h_inode;
++
++	err = -EPERM;
++	if (unlikely(!capable(CAP_SYS_ADMIN)))
++		goto out;
++
++	err = copy_from_user(&ibusy, arg, sizeof(ibusy));
++	if (!err)
++		err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
++	if (unlikely(err)) {
++		err = -EFAULT;
++		AuTraceErr(err);
++		goto out;
++	}
++
++	err = -EINVAL;
++	si_read_lock(sb, AuLock_FLUSH);
++	if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb)))
++		goto out_unlock;
++
++	err = 0;
++	ibusy.h_ino = 0; /* invalid */
++	inode = ilookup(sb, ibusy.ino);
++	if (!inode
++	    || inode->i_ino == AUFS_ROOT_INO
++	    || is_bad_inode(inode))
++		goto out_unlock;
++
++	ii_read_lock_child(inode);
++	bstart = au_ibstart(inode);
++	bend = au_ibend(inode);
++	if (bstart <= ibusy.bindex && ibusy.bindex <= bend) {
++		h_inode = au_h_iptr(inode, ibusy.bindex);
++		if (h_inode && au_test_ibusy(inode, bstart, bend))
++			ibusy.h_ino = h_inode->i_ino;
++	}
++	ii_read_unlock(inode);
++	iput(inode);
++
++out_unlock:
++	si_read_unlock(sb);
++	if (!err) {
++		err = __put_user(ibusy.h_ino, &arg->h_ino);
++		if (unlikely(err)) {
++			err = -EFAULT;
++			AuTraceErr(err);
++		}
++	}
++out:
++	return err;
++}
++
++long au_ibusy_ioctl(struct file *file, unsigned long arg)
++{
++	return au_ibusy(file->f_dentry->d_sb, (void __user *)arg);
++}
++
++#ifdef CONFIG_COMPAT
++long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
++{
++	return au_ibusy(file->f_dentry->d_sb, compat_ptr(arg));
++}
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * change a branch permission
++ */
++
++static void au_warn_ima(void)
++{
++#ifdef CONFIG_IMA
++	/* since it doesn't support mark_files_ro() */
++	AuWarn1("RW -> RO makes IMA to produce wrong message\n");
++#endif
++}
++
++static int do_need_sigen_inc(int a, int b)
++{
++	return au_br_whable(a) && !au_br_whable(b);
++}
++
++static int need_sigen_inc(int old, int new)
++{
++	return do_need_sigen_inc(old, new)
++		|| do_need_sigen_inc(new, old);
++}
++
++static unsigned long long au_farray_cb(void *a,
++				       unsigned long long max __maybe_unused,
++				       void *arg)
++{
++	unsigned long long n;
++	struct file **p, *f;
++	struct super_block *sb = arg;
++
++	n = 0;
++	p = a;
++	lg_global_lock(files_lglock);
++	do_file_list_for_each_entry(sb, f) {
++		if (au_fi(f)
++		    && file_count(f)
++		    && !special_file(f->f_dentry->d_inode->i_mode)) {
++			get_file(f);
++			*p++ = f;
++			n++;
++			AuDebugOn(n > max);
++		}
++	} while_file_list_for_each_entry;
++	lg_global_unlock(files_lglock);
++
++	return n;
++}
++
++static struct file **au_farray_alloc(struct super_block *sb,
++				     unsigned long long *max)
++{
++	*max = atomic_long_read(&au_sbi(sb)->si_nfiles);
++	return au_array_alloc(max, au_farray_cb, sb);
++}
++
++static void au_farray_free(struct file **a, unsigned long long max)
++{
++	unsigned long long ull;
++
++	for (ull = 0; ull < max; ull++)
++		if (a[ull])
++			fput(a[ull]);
++	au_array_free(a);
++}
++
++static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
++{
++	int err, do_warn;
++	unsigned int mnt_flags;
++	unsigned long long ull, max;
++	aufs_bindex_t br_id;
++	unsigned char verbose;
++	struct file *file, *hf, **array;
++	struct inode *inode;
++	struct au_hfile *hfile;
++
++	mnt_flags = au_mntflags(sb);
++	verbose = !!au_opt_test(mnt_flags, VERBOSE);
++
++	array = au_farray_alloc(sb, &max);
++	err = PTR_ERR(array);
++	if (IS_ERR(array))
++		goto out;
++
++	do_warn = 0;
++	br_id = au_sbr_id(sb, bindex);
++	for (ull = 0; ull < max; ull++) {
++		file = array[ull];
++
++		/* AuDbg("%.*s\n", AuDLNPair(file->f_dentry)); */
++		fi_read_lock(file);
++		if (unlikely(au_test_mmapped(file))) {
++			err = -EBUSY;
++			AuVerbose(verbose, "mmapped %.*s\n",
++				  AuDLNPair(file->f_dentry));
++			AuDbgFile(file);
++			FiMustNoWaiters(file);
++			fi_read_unlock(file);
++			goto out_array;
++		}
++
++		inode = file->f_dentry->d_inode;
++		hfile = &au_fi(file)->fi_htop;
++		hf = hfile->hf_file;
++		if (!S_ISREG(inode->i_mode)
++		    || !(file->f_mode & FMODE_WRITE)
++		    || hfile->hf_br->br_id != br_id
++		    || !(hf->f_mode & FMODE_WRITE))
++			array[ull] = NULL;
++		else {
++			do_warn = 1;
++			get_file(file);
++		}
++
++		FiMustNoWaiters(file);
++		fi_read_unlock(file);
++		fput(file);
++	}
++
++	err = 0;
++	if (do_warn)
++		au_warn_ima();
++
++	for (ull = 0; ull < max; ull++) {
++		file = array[ull];
++		if (!file)
++			continue;
++
++		/* todo: already flushed? */
++		/* cf. fs/super.c:mark_files_ro() */
++		/* fi_read_lock(file); */
++		hfile = &au_fi(file)->fi_htop;
++		hf = hfile->hf_file;
++		/* fi_read_unlock(file); */
++		spin_lock(&hf->f_lock);
++		hf->f_mode &= ~FMODE_WRITE;
++		spin_unlock(&hf->f_lock);
++		if (!file_check_writeable(hf)) {
++			file_release_write(hf);
++			mnt_drop_write(hf->f_vfsmnt);
++		}
++	}
++
++out_array:
++	au_farray_free(array, max);
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
++	      int *do_refresh)
++{
++	int err, rerr;
++	aufs_bindex_t bindex;
++	struct path path;
++	struct dentry *root;
++	struct au_branch *br;
++
++	root = sb->s_root;
++	bindex = au_find_dbindex(root, mod->h_root);
++	if (bindex < 0) {
++		if (remount)
++			return 0; /* success */
++		err = -ENOENT;
++		pr_err("%s no such branch\n", mod->path);
++		goto out;
++	}
++	AuDbg("bindex b%d\n", bindex);
++
++	err = test_br(mod->h_root->d_inode, mod->perm, mod->path);
++	if (unlikely(err))
++		goto out;
++
++	br = au_sbr(sb, bindex);
++	if (br->br_perm == mod->perm)
++		return 0; /* success */
++
++	if (au_br_writable(br->br_perm)) {
++		/* remove whiteout base */
++		err = au_br_init_wh(sb, br, mod->perm, mod->h_root);
++		if (unlikely(err))
++			goto out;
++
++		if (!au_br_writable(mod->perm)) {
++			/* rw --> ro, file might be mmapped */
++			DiMustNoWaiters(root);
++			IiMustNoWaiters(root->d_inode);
++			di_write_unlock(root);
++			err = au_br_mod_files_ro(sb, bindex);
++			/* aufs_write_lock() calls ..._child() */
++			di_write_lock_child(root);
++
++			if (unlikely(err)) {
++				rerr = -ENOMEM;
++				br->br_wbr = kmalloc(sizeof(*br->br_wbr),
++						     GFP_NOFS);
++				if (br->br_wbr) {
++					path.mnt = br->br_mnt;
++					path.dentry = mod->h_root;
++					rerr = au_wbr_init(br, sb, br->br_perm,
++							   &path);
++				}
++				if (unlikely(rerr)) {
++					AuIOErr("nested error %d (%d)\n",
++						rerr, err);
++					br->br_perm = mod->perm;
++				}
++			}
++		}
++	} else if (au_br_writable(mod->perm)) {
++		/* ro --> rw */
++		err = -ENOMEM;
++		br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS);
++		if (br->br_wbr) {
++			path.mnt = br->br_mnt;
++			path.dentry = mod->h_root;
++			err = au_wbr_init(br, sb, mod->perm, &path);
++			if (unlikely(err)) {
++				kfree(br->br_wbr);
++				br->br_wbr = NULL;
++			}
++		}
++	}
++
++	if (!err) {
++		*do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
++		br->br_perm = mod->perm;
++	}
++
++out:
++	AuTraceErr(err);
++	return err;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/branch.h	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,230 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * branch filesystems and xino for them
++ */
++
++#ifndef __AUFS_BRANCH_H__
++#define __AUFS_BRANCH_H__
++
++#ifdef __KERNEL__
++
++#include <linux/mount.h>
++#include "dynop.h"
++#include "rwsem.h"
++#include "super.h"
++
++/* ---------------------------------------------------------------------- */
++
++/* a xino file */
++struct au_xino_file {
++	struct file		*xi_file;
++	struct mutex		xi_nondir_mtx;
++
++	/* todo: make xino files an array to support huge inode number */
++
++#ifdef CONFIG_DEBUG_FS
++	struct dentry		 *xi_dbgaufs;
++#endif
++};
++
++/* members for writable branch only */
++enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
++struct au_wbr {
++	struct au_rwsem		wbr_wh_rwsem;
++	struct dentry		*wbr_wh[AuBrWh_Last];
++	atomic_t		wbr_wh_running;
++#define wbr_whbase		wbr_wh[AuBrWh_BASE]	/* whiteout base */
++#define wbr_plink		wbr_wh[AuBrWh_PLINK]	/* pseudo-link dir */
++#define wbr_orph		wbr_wh[AuBrWh_ORPH]	/* dir for orphans */
++
++	/* mfs mode */
++	unsigned long long	wbr_bytes;
++};
++
++/* ext2 has 3 types of operations at least, ext3 has 4 */
++#define AuBrDynOp (AuDyLast * 4)
++
++/* protected by superblock rwsem */
++struct au_branch {
++	struct au_xino_file	br_xino;
++
++	aufs_bindex_t		br_id;
++
++	int			br_perm;
++	struct vfsmount		*br_mnt;
++	spinlock_t		br_dykey_lock;
++	struct au_dykey		*br_dykey[AuBrDynOp];
++	atomic_t		br_count;
++
++	struct au_wbr		*br_wbr;
++
++	/* xino truncation */
++	blkcnt_t		br_xino_upper;	/* watermark in blocks */
++	atomic_t		br_xino_running;
++
++#ifdef CONFIG_AUFS_HFSNOTIFY
++	struct fsnotify_group	*br_hfsn_group;
++	struct fsnotify_ops	br_hfsn_ops;
++#endif
++
++#ifdef CONFIG_SYSFS
++	/* an entry under sysfs per mount-point */
++	char			br_name[8];
++	struct attribute	br_attr;
++#endif
++};
++
++/* ---------------------------------------------------------------------- */
++
++/* branch permissions and attributes */
++#define AuBrPerm_RW		1		/* writable, hardlinkable wh */
++#define AuBrPerm_RO		(1 << 1)	/* readonly */
++#define AuBrPerm_RR		(1 << 2)	/* natively readonly */
++#define AuBrPerm_Mask		(AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR)
++
++#define AuBrRAttr_WH		(1 << 3)	/* whiteout-able */
++
++#define AuBrWAttr_NoLinkWH	(1 << 4)	/* un-hardlinkable whiteouts */
++
++static inline int au_br_writable(int brperm)
++{
++	return brperm & AuBrPerm_RW;
++}
++
++static inline int au_br_whable(int brperm)
++{
++	return brperm & (AuBrPerm_RW | AuBrRAttr_WH);
++}
++
++static inline int au_br_wh_linkable(int brperm)
++{
++	return !(brperm & AuBrWAttr_NoLinkWH);
++}
++
++static inline int au_br_rdonly(struct au_branch *br)
++{
++	return ((br->br_mnt->mnt_sb->s_flags & MS_RDONLY)
++		|| !au_br_writable(br->br_perm))
++		? -EROFS : 0;
++}
++
++static inline int au_br_hnotifyable(int brperm __maybe_unused)
++{
++#ifdef CONFIG_AUFS_HNOTIFY
++	return !(brperm & AuBrPerm_RR);
++#else
++	return 0;
++#endif
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* branch.c */
++struct au_sbinfo;
++void au_br_free(struct au_sbinfo *sinfo);
++int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
++struct au_opt_add;
++int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
++struct au_opt_del;
++int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
++long au_ibusy_ioctl(struct file *file, unsigned long arg);
++#ifdef CONFIG_COMPAT
++long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
++#endif
++struct au_opt_mod;
++int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
++	      int *do_refresh);
++
++/* xino.c */
++static const loff_t au_loff_max = LLONG_MAX;
++
++int au_xib_trunc(struct super_block *sb);
++ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size,
++		   loff_t *pos);
++ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
++		    loff_t *pos);
++struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
++struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
++ino_t au_xino_new_ino(struct super_block *sb);
++void au_xino_delete_inode(struct inode *inode, const int unlinked);
++int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++		  ino_t ino);
++int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++		 ino_t *ino);
++int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
++	       struct file *base_file, int do_test);
++int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
++
++struct au_opt_xino;
++int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
++void au_xino_clr(struct super_block *sb);
++struct file *au_xino_def(struct super_block *sb);
++int au_xino_path(struct seq_file *seq, struct file *file);
++
++/* ---------------------------------------------------------------------- */
++
++/* Superblock to branch */
++static inline
++aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
++{
++	return au_sbr(sb, bindex)->br_id;
++}
++
++static inline
++struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
++{
++	return au_sbr(sb, bindex)->br_mnt;
++}
++
++static inline
++struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
++{
++	return au_sbr_mnt(sb, bindex)->mnt_sb;
++}
++
++static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
++{
++	atomic_dec(&au_sbr(sb, bindex)->br_count);
++}
++
++static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
++{
++	return au_sbr(sb, bindex)->br_perm;
++}
++
++static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
++{
++	return au_br_whable(au_sbr_perm(sb, bindex));
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * wbr_wh_read_lock, wbr_wh_write_lock
++ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
++ */
++AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
++
++#define WbrWhMustNoWaiters(wbr)	AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
++#define WbrWhMustAnyLock(wbr)	AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
++#define WbrWhMustWriteLock(wbr)	AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_BRANCH_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/conf.mk	2011-10-25 09:52:26.000000000 +0200
+@@ -0,0 +1,38 @@
++
++AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
++
++define AuConf
++ifdef ${1}
++AuConfStr += ${1}=${${1}}
++endif
++endef
++
++AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
++	SBILIST \
++	HNOTIFY HFSNOTIFY \
++	EXPORT INO_T_64 \
++	RDU \
++	PROC_MAP \
++	SP_IATTR \
++	SHWH \
++	BR_RAMFS \
++	BR_FUSE POLL \
++	BR_HFSPLUS \
++	BDEV_LOOP \
++	DEBUG MAGIC_SYSRQ
++$(foreach i, ${AuConfAll}, \
++	$(eval $(call AuConf,CONFIG_AUFS_${i})))
++
++AuConfName = ${obj}/conf.str
++${AuConfName}.tmp: FORCE
++	@echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
++${AuConfName}: ${AuConfName}.tmp
++	@diff -q $< $@ > /dev/null 2>&1 || { \
++	echo '  GEN    ' $@; \
++	cp -p $< $@; \
++	}
++FORCE:
++clean-files += ${AuConfName} ${AuConfName}.tmp
++${obj}/sysfs.o: ${AuConfName}
++
++-include ${srctree}/${src}/conf_priv.mk
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/cpup.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,1084 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * copy-up functions, see wbr_policy.c for copy-down
++ */
++
++#include <linux/fs_stack.h>
++#include <linux/mm.h>
++#include "aufs.h"
++
++void au_cpup_attr_flags(struct inode *dst, struct inode *src)
++{
++	const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
++		| S_NOATIME | S_NOCMTIME;
++
++	dst->i_flags |= src->i_flags & ~mask;
++	if (au_test_fs_notime(dst->i_sb))
++		dst->i_flags |= S_NOATIME | S_NOCMTIME;
++}
++
++void au_cpup_attr_timesizes(struct inode *inode)
++{
++	struct inode *h_inode;
++
++	h_inode = au_h_iptr(inode, au_ibstart(inode));
++	fsstack_copy_attr_times(inode, h_inode);
++	fsstack_copy_inode_size(inode, h_inode);
++}
++
++void au_cpup_attr_nlink(struct inode *inode, int force)
++{
++	struct inode *h_inode;
++	struct super_block *sb;
++	aufs_bindex_t bindex, bend;
++
++	sb = inode->i_sb;
++	bindex = au_ibstart(inode);
++	h_inode = au_h_iptr(inode, bindex);
++	if (!force
++	    && !S_ISDIR(h_inode->i_mode)
++	    && au_opt_test(au_mntflags(sb), PLINK)
++	    && au_plink_test(inode))
++		return;
++
++	/*
++	 * 0 can happen in revalidating.
++	 * h_inode->i_mutex is not held, but it is harmless since once i_nlink
++	 * reaches 0, it will never become positive.
++	 */
++	vfsub_set_nlink(inode, h_inode->i_nlink);
++
++	/*
++	 * fewer nlink makes find(1) noisy, but larger nlink doesn't.
++	 * it may includes whplink directory.
++	 */
++	if (S_ISDIR(h_inode->i_mode)) {
++		bend = au_ibend(inode);
++		for (bindex++; bindex <= bend; bindex++) {
++			h_inode = au_h_iptr(inode, bindex);
++			if (h_inode)
++				au_add_nlink(inode, h_inode);
++		}
++	}
++}
++
++void au_cpup_attr_changeable(struct inode *inode)
++{
++	struct inode *h_inode;
++
++	h_inode = au_h_iptr(inode, au_ibstart(inode));
++	inode->i_mode = h_inode->i_mode;
++	inode->i_uid = h_inode->i_uid;
++	inode->i_gid = h_inode->i_gid;
++	au_cpup_attr_timesizes(inode);
++	au_cpup_attr_flags(inode, h_inode);
++}
++
++void au_cpup_igen(struct inode *inode, struct inode *h_inode)
++{
++	struct au_iinfo *iinfo = au_ii(inode);
++
++	IiMustWriteLock(inode);
++
++	iinfo->ii_higen = h_inode->i_generation;
++	iinfo->ii_hsb1 = h_inode->i_sb;
++}
++
++void au_cpup_attr_all(struct inode *inode, int force)
++{
++	struct inode *h_inode;
++
++	h_inode = au_h_iptr(inode, au_ibstart(inode));
++	au_cpup_attr_changeable(inode);
++	if (inode->i_nlink > 0)
++		au_cpup_attr_nlink(inode, force);
++	inode->i_rdev = h_inode->i_rdev;
++	inode->i_blkbits = h_inode->i_blkbits;
++	au_cpup_igen(inode, h_inode);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
++
++/* keep the timestamps of the parent dir when cpup */
++void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
++		    struct path *h_path)
++{
++	struct inode *h_inode;
++
++	dt->dt_dentry = dentry;
++	dt->dt_h_path = *h_path;
++	h_inode = h_path->dentry->d_inode;
++	dt->dt_atime = h_inode->i_atime;
++	dt->dt_mtime = h_inode->i_mtime;
++	/* smp_mb(); */
++}
++
++void au_dtime_revert(struct au_dtime *dt)
++{
++	struct iattr attr;
++	int err;
++
++	attr.ia_atime = dt->dt_atime;
++	attr.ia_mtime = dt->dt_mtime;
++	attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
++		| ATTR_ATIME | ATTR_ATIME_SET;
++
++	err = vfsub_notify_change(&dt->dt_h_path, &attr);
++	if (unlikely(err))
++		pr_warning("restoring timestamps failed(%d). ignored\n", err);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static noinline_for_stack
++int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src)
++{
++	int err, sbits;
++	struct iattr ia;
++	struct path h_path;
++	struct inode *h_isrc, *h_idst;
++
++	h_path.dentry = au_h_dptr(dst, bindex);
++	h_idst = h_path.dentry->d_inode;
++	h_path.mnt = au_sbr_mnt(dst->d_sb, bindex);
++	h_isrc = h_src->d_inode;
++	ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
++		| ATTR_ATIME | ATTR_MTIME
++		| ATTR_ATIME_SET | ATTR_MTIME_SET;
++	ia.ia_uid = h_isrc->i_uid;
++	ia.ia_gid = h_isrc->i_gid;
++	ia.ia_atime = h_isrc->i_atime;
++	ia.ia_mtime = h_isrc->i_mtime;
++	if (h_idst->i_mode != h_isrc->i_mode
++	    && !S_ISLNK(h_idst->i_mode)) {
++		ia.ia_valid |= ATTR_MODE;
++		ia.ia_mode = h_isrc->i_mode;
++	}
++	sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
++	au_cpup_attr_flags(h_idst, h_isrc);
++	err = vfsub_notify_change(&h_path, &ia);
++
++	/* is this nfs only? */
++	if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
++		ia.ia_valid = ATTR_FORCE | ATTR_MODE;
++		ia.ia_mode = h_isrc->i_mode;
++		err = vfsub_notify_change(&h_path, &ia);
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
++			   char *buf, unsigned long blksize)
++{
++	int err;
++	size_t sz, rbytes, wbytes;
++	unsigned char all_zero;
++	char *p, *zp;
++	struct mutex *h_mtx;
++	/* reduce stack usage */
++	struct iattr *ia;
++
++	zp = page_address(ZERO_PAGE(0));
++	if (unlikely(!zp))
++		return -ENOMEM; /* possible? */
++
++	err = 0;
++	all_zero = 0;
++	while (len) {
++		AuDbg("len %lld\n", len);
++		sz = blksize;
++		if (len < blksize)
++			sz = len;
++
++		rbytes = 0;
++		/* todo: signal_pending? */
++		while (!rbytes || err == -EAGAIN || err == -EINTR) {
++			rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
++			err = rbytes;
++		}
++		if (unlikely(err < 0))
++			break;
++
++		all_zero = 0;
++		if (len >= rbytes && rbytes == blksize)
++			all_zero = !memcmp(buf, zp, rbytes);
++		if (!all_zero) {
++			wbytes = rbytes;
++			p = buf;
++			while (wbytes) {
++				size_t b;
++
++				b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
++				err = b;
++				/* todo: signal_pending? */
++				if (unlikely(err == -EAGAIN || err == -EINTR))
++					continue;
++				if (unlikely(err < 0))
++					break;
++				wbytes -= b;
++				p += b;
++			}
++		} else {
++			loff_t res;
++
++			AuLabel(hole);
++			res = vfsub_llseek(dst, rbytes, SEEK_CUR);
++			err = res;
++			if (unlikely(res < 0))
++				break;
++		}
++		len -= rbytes;
++		err = 0;
++	}
++
++	/* the last block may be a hole */
++	if (!err && all_zero) {
++		AuLabel(last hole);
++
++		err = 1;
++		if (au_test_nfs(dst->f_dentry->d_sb)) {
++			/* nfs requires this step to make last hole */
++			/* is this only nfs? */
++			do {
++				/* todo: signal_pending? */
++				err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
++			} while (err == -EAGAIN || err == -EINTR);
++			if (err == 1)
++				dst->f_pos--;
++		}
++
++		if (err == 1) {
++			ia = (void *)buf;
++			ia->ia_size = dst->f_pos;
++			ia->ia_valid = ATTR_SIZE | ATTR_FILE;
++			ia->ia_file = dst;
++			h_mtx = &dst->f_dentry->d_inode->i_mutex;
++			mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
++			err = vfsub_notify_change(&dst->f_path, ia);
++			mutex_unlock(h_mtx);
++		}
++	}
++
++	return err;
++}
++
++int au_copy_file(struct file *dst, struct file *src, loff_t len)
++{
++	int err;
++	unsigned long blksize;
++	unsigned char do_kfree;
++	char *buf;
++
++	err = -ENOMEM;
++	blksize = dst->f_dentry->d_sb->s_blocksize;
++	if (!blksize || PAGE_SIZE < blksize)
++		blksize = PAGE_SIZE;
++	AuDbg("blksize %lu\n", blksize);
++	do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
++	if (do_kfree)
++		buf = kmalloc(blksize, GFP_NOFS);
++	else
++		buf = (void *)__get_free_page(GFP_NOFS);
++	if (unlikely(!buf))
++		goto out;
++
++	if (len > (1 << 22))
++		AuDbg("copying a large file %lld\n", (long long)len);
++
++	src->f_pos = 0;
++	dst->f_pos = 0;
++	err = au_do_copy_file(dst, src, len, buf, blksize);
++	if (do_kfree)
++		kfree(buf);
++	else
++		free_page((unsigned long)buf);
++
++out:
++	return err;
++}
++
++/*
++ * to support a sparse file which is opened with O_APPEND,
++ * we need to close the file.
++ */
++static int au_cp_regular(struct dentry *dentry, aufs_bindex_t bdst,
++			 aufs_bindex_t bsrc, loff_t len)
++{
++	int err, i;
++	enum { SRC, DST };
++	struct {
++		aufs_bindex_t bindex;
++		unsigned int flags;
++		struct dentry *dentry;
++		struct file *file;
++		void *label, *label_file;
++	} *f, file[] = {
++		{
++			.bindex = bsrc,
++			.flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
++			.file = NULL,
++			.label = &&out,
++			.label_file = &&out_src
++		},
++		{
++			.bindex = bdst,
++			.flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
++			.file = NULL,
++			.label = &&out_src,
++			.label_file = &&out_dst
++		}
++	};
++	struct super_block *sb;
++
++	/* bsrc branch can be ro/rw. */
++	sb = dentry->d_sb;
++	f = file;
++	for (i = 0; i < 2; i++, f++) {
++		f->dentry = au_h_dptr(dentry, f->bindex);
++		f->file = au_h_open(dentry, f->bindex, f->flags, /*file*/NULL);
++		err = PTR_ERR(f->file);
++		if (IS_ERR(f->file))
++			goto *f->label;
++		err = -EINVAL;
++		if (unlikely(!f->file->f_op))
++			goto *f->label_file;
++	}
++
++	/* try stopping to update while we copyup */
++	IMustLock(file[SRC].dentry->d_inode);
++	err = au_copy_file(file[DST].file, file[SRC].file, len);
++
++out_dst:
++	fput(file[DST].file);
++	au_sbr_put(sb, file[DST].bindex);
++out_src:
++	fput(file[SRC].file);
++	au_sbr_put(sb, file[SRC].bindex);
++out:
++	return err;
++}
++
++static int au_do_cpup_regular(struct dentry *dentry, aufs_bindex_t bdst,
++			      aufs_bindex_t bsrc, loff_t len,
++			      struct inode *h_dir, struct path *h_path)
++{
++	int err, rerr;
++	loff_t l;
++
++	err = 0;
++	l = i_size_read(au_h_iptr(dentry->d_inode, bsrc));
++	if (len == -1 || l < len)
++		len = l;
++	if (len)
++		err = au_cp_regular(dentry, bdst, bsrc, len);
++	if (!err)
++		goto out; /* success */
++
++	rerr = vfsub_unlink(h_dir, h_path, /*force*/0);
++	if (rerr) {
++		AuIOErr("failed unlinking cpup-ed %.*s(%d, %d)\n",
++			AuDLNPair(h_path->dentry), err, rerr);
++		err = -EIO;
++	}
++
++out:
++	return err;
++}
++
++static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
++			      struct inode *h_dir)
++{
++	int err, symlen;
++	mm_segment_t old_fs;
++	union {
++		char *k;
++		char __user *u;
++	} sym;
++
++	err = -ENOSYS;
++	if (unlikely(!h_src->d_inode->i_op->readlink))
++		goto out;
++
++	err = -ENOMEM;
++	sym.k = __getname_gfp(GFP_NOFS);
++	if (unlikely(!sym.k))
++		goto out;
++
++	/* unnecessary to support mmap_sem since symlink is not mmap-able */
++	old_fs = get_fs();
++	set_fs(KERNEL_DS);
++	symlen = h_src->d_inode->i_op->readlink(h_src, sym.u, PATH_MAX);
++	err = symlen;
++	set_fs(old_fs);
++
++	if (symlen > 0) {
++		sym.k[symlen] = 0;
++		err = vfsub_symlink(h_dir, h_path, sym.k);
++	}
++	__putname(sym.k);
++
++out:
++	return err;
++}
++
++/* return with the lower dst inode is locked */
++static noinline_for_stack
++int cpup_entry(struct dentry *dentry, aufs_bindex_t bdst,
++	       aufs_bindex_t bsrc, loff_t len, unsigned int flags,
++	       struct dentry *dst_parent)
++{
++	int err;
++	umode_t mode;
++	unsigned int mnt_flags;
++	unsigned char isdir;
++	const unsigned char do_dt = !!au_ftest_cpup(flags, DTIME);
++	struct au_dtime dt;
++	struct path h_path;
++	struct dentry *h_src, *h_dst, *h_parent;
++	struct inode *h_inode, *h_dir;
++	struct super_block *sb;
++
++	/* bsrc branch can be ro/rw. */
++	h_src = au_h_dptr(dentry, bsrc);
++	h_inode = h_src->d_inode;
++	AuDebugOn(h_inode != au_h_iptr(dentry->d_inode, bsrc));
++
++	/* try stopping to be referenced while we are creating */
++	h_dst = au_h_dptr(dentry, bdst);
++	h_parent = h_dst->d_parent; /* dir inode is locked */
++	h_dir = h_parent->d_inode;
++	IMustLock(h_dir);
++	AuDebugOn(h_parent != h_dst->d_parent);
++
++	sb = dentry->d_sb;
++	h_path.mnt = au_sbr_mnt(sb, bdst);
++	if (do_dt) {
++		h_path.dentry = h_parent;
++		au_dtime_store(&dt, dst_parent, &h_path);
++	}
++	h_path.dentry = h_dst;
++
++	isdir = 0;
++	mode = h_inode->i_mode;
++	switch (mode & S_IFMT) {
++	case S_IFREG:
++		/* try stopping to update while we are referencing */
++		IMustLock(h_inode);
++		err = vfsub_create(h_dir, &h_path, mode | S_IWUSR);
++		if (!err)
++			err = au_do_cpup_regular
++				(dentry, bdst, bsrc, len,
++				 au_h_iptr(dst_parent->d_inode, bdst), &h_path);
++		break;
++	case S_IFDIR:
++		isdir = 1;
++		err = vfsub_mkdir(h_dir, &h_path, mode);
++		if (!err) {
++			/*
++			 * strange behaviour from the users view,
++			 * particularry setattr case
++			 */
++			if (au_ibstart(dst_parent->d_inode) == bdst)
++				au_cpup_attr_nlink(dst_parent->d_inode,
++						   /*force*/1);
++			au_cpup_attr_nlink(dentry->d_inode, /*force*/1);
++		}
++		break;
++	case S_IFLNK:
++		err = au_do_cpup_symlink(&h_path, h_src, h_dir);
++		break;
++	case S_IFCHR:
++	case S_IFBLK:
++		AuDebugOn(!capable(CAP_MKNOD));
++		/*FALLTHROUGH*/
++	case S_IFIFO:
++	case S_IFSOCK:
++		err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
++		break;
++	default:
++		AuIOErr("Unknown inode type 0%o\n", mode);
++		err = -EIO;
++	}
++
++	mnt_flags = au_mntflags(sb);
++	if (!au_opt_test(mnt_flags, UDBA_NONE)
++	    && !isdir
++	    && au_opt_test(mnt_flags, XINO)
++	    && h_inode->i_nlink == 1
++	    /* todo: unnecessary? */
++	    /* && dentry->d_inode->i_nlink == 1 */
++	    && bdst < bsrc
++	    && !au_ftest_cpup(flags, KEEPLINO))
++		au_xino_write(sb, bsrc, h_inode->i_ino, /*ino*/0);
++		/* ignore this error */
++
++	if (do_dt)
++		au_dtime_revert(&dt);
++	return err;
++}
++
++/*
++ * copyup the @dentry from @bsrc to @bdst.
++ * the caller must set the both of lower dentries.
++ * @len is for truncating when it is -1 copyup the entire file.
++ * in link/rename cases, @dst_parent may be different from the real one.
++ */
++static int au_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
++			  aufs_bindex_t bsrc, loff_t len, unsigned int flags,
++			  struct dentry *dst_parent)
++{
++	int err, rerr;
++	aufs_bindex_t old_ibstart;
++	unsigned char isdir, plink;
++	struct au_dtime dt;
++	struct path h_path;
++	struct dentry *h_src, *h_dst, *h_parent;
++	struct inode *dst_inode, *h_dir, *inode;
++	struct super_block *sb;
++
++	AuDebugOn(bsrc <= bdst);
++
++	sb = dentry->d_sb;
++	h_path.mnt = au_sbr_mnt(sb, bdst);
++	h_dst = au_h_dptr(dentry, bdst);
++	h_parent = h_dst->d_parent; /* dir inode is locked */
++	h_dir = h_parent->d_inode;
++	IMustLock(h_dir);
++
++	h_src = au_h_dptr(dentry, bsrc);
++	inode = dentry->d_inode;
++
++	if (!dst_parent)
++		dst_parent = dget_parent(dentry);
++	else
++		dget(dst_parent);
++
++	plink = !!au_opt_test(au_mntflags(sb), PLINK);
++	dst_inode = au_h_iptr(inode, bdst);
++	if (dst_inode) {
++		if (unlikely(!plink)) {
++			err = -EIO;
++			AuIOErr("hi%lu(i%lu) exists on b%d "
++				"but plink is disabled\n",
++				dst_inode->i_ino, inode->i_ino, bdst);
++			goto out;
++		}
++
++		if (dst_inode->i_nlink) {
++			const int do_dt = au_ftest_cpup(flags, DTIME);
++
++			h_src = au_plink_lkup(inode, bdst);
++			err = PTR_ERR(h_src);
++			if (IS_ERR(h_src))
++				goto out;
++			if (unlikely(!h_src->d_inode)) {
++				err = -EIO;
++				AuIOErr("i%lu exists on a upper branch "
++					"but not pseudo-linked\n",
++					inode->i_ino);
++				dput(h_src);
++				goto out;
++			}
++
++			if (do_dt) {
++				h_path.dentry = h_parent;
++				au_dtime_store(&dt, dst_parent, &h_path);
++			}
++			h_path.dentry = h_dst;
++			err = vfsub_link(h_src, h_dir, &h_path);
++			if (do_dt)
++				au_dtime_revert(&dt);
++			dput(h_src);
++			goto out;
++		} else
++			/* todo: cpup_wh_file? */
++			/* udba work */
++			au_update_ibrange(inode, /*do_put_zero*/1);
++	}
++
++	old_ibstart = au_ibstart(inode);
++	err = cpup_entry(dentry, bdst, bsrc, len, flags, dst_parent);
++	if (unlikely(err))
++		goto out;
++	dst_inode = h_dst->d_inode;
++	mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
++
++	err = cpup_iattr(dentry, bdst, h_src);
++	isdir = S_ISDIR(dst_inode->i_mode);
++	if (!err) {
++		if (bdst < old_ibstart) {
++			if (S_ISREG(inode->i_mode)) {
++				err = au_dy_iaop(inode, bdst, dst_inode);
++				if (unlikely(err))
++					goto out_rev;
++			}
++			au_set_ibstart(inode, bdst);
++		}
++		au_set_h_iptr(inode, bdst, au_igrab(dst_inode),
++			      au_hi_flags(inode, isdir));
++		mutex_unlock(&dst_inode->i_mutex);
++		if (!isdir
++		    && h_src->d_inode->i_nlink > 1
++		    && plink)
++			au_plink_append(inode, bdst, h_dst);
++		goto out; /* success */
++	}
++
++	/* revert */
++out_rev:
++	h_path.dentry = h_parent;
++	mutex_unlock(&dst_inode->i_mutex);
++	au_dtime_store(&dt, dst_parent, &h_path);
++	h_path.dentry = h_dst;
++	if (!isdir)
++		rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
++	else
++		rerr = vfsub_rmdir(h_dir, &h_path);
++	au_dtime_revert(&dt);
++	if (rerr) {
++		AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
++		err = -EIO;
++	}
++
++out:
++	dput(dst_parent);
++	return err;
++}
++
++struct au_cpup_single_args {
++	int *errp;
++	struct dentry *dentry;
++	aufs_bindex_t bdst, bsrc;
++	loff_t len;
++	unsigned int flags;
++	struct dentry *dst_parent;
++};
++
++static void au_call_cpup_single(void *args)
++{
++	struct au_cpup_single_args *a = args;
++	*a->errp = au_cpup_single(a->dentry, a->bdst, a->bsrc, a->len,
++				  a->flags, a->dst_parent);
++}
++
++/*
++ * prevent SIGXFSZ in copy-up.
++ * testing CAP_MKNOD is for generic fs,
++ * but CAP_FSETID is for xfs only, currently.
++ */
++static int au_cpup_sio_test(struct super_block *sb, umode_t mode)
++{
++	int do_sio;
++
++	do_sio = 0;
++	if (!au_wkq_test()
++	    && (!au_sbi(sb)->si_plink_maint_pid
++		|| au_plink_maint(sb, AuLock_NOPLM))) {
++		switch (mode & S_IFMT) {
++		case S_IFREG:
++			/* no condition about RLIMIT_FSIZE and the file size */
++			do_sio = 1;
++			break;
++		case S_IFCHR:
++		case S_IFBLK:
++			do_sio = !capable(CAP_MKNOD);
++			break;
++		}
++		if (!do_sio)
++			do_sio = ((mode & (S_ISUID | S_ISGID))
++				  && !capable(CAP_FSETID));
++	}
++
++	return do_sio;
++}
++
++int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
++		       aufs_bindex_t bsrc, loff_t len, unsigned int flags,
++		       struct dentry *dst_parent)
++{
++	int err, wkq_err;
++	struct dentry *h_dentry;
++
++	h_dentry = au_h_dptr(dentry, bsrc);
++	if (!au_cpup_sio_test(dentry->d_sb, h_dentry->d_inode->i_mode))
++		err = au_cpup_single(dentry, bdst, bsrc, len, flags,
++				     dst_parent);
++	else {
++		struct au_cpup_single_args args = {
++			.errp		= &err,
++			.dentry		= dentry,
++			.bdst		= bdst,
++			.bsrc		= bsrc,
++			.len		= len,
++			.flags		= flags,
++			.dst_parent	= dst_parent
++		};
++		wkq_err = au_wkq_wait(au_call_cpup_single, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++
++	return err;
++}
++
++/*
++ * copyup the @dentry from the first active lower branch to @bdst,
++ * using au_cpup_single().
++ */
++static int au_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++			  unsigned int flags)
++{
++	int err;
++	aufs_bindex_t bsrc, bend;
++
++	bend = au_dbend(dentry);
++	for (bsrc = bdst + 1; bsrc <= bend; bsrc++)
++		if (au_h_dptr(dentry, bsrc))
++			break;
++
++	err = au_lkup_neg(dentry, bdst);
++	if (!err) {
++		err = au_cpup_single(dentry, bdst, bsrc, len, flags, NULL);
++		if (!err)
++			return 0; /* success */
++
++		/* revert */
++		au_set_h_dptr(dentry, bdst, NULL);
++		au_set_dbstart(dentry, bsrc);
++	}
++
++	return err;
++}
++
++struct au_cpup_simple_args {
++	int *errp;
++	struct dentry *dentry;
++	aufs_bindex_t bdst;
++	loff_t len;
++	unsigned int flags;
++};
++
++static void au_call_cpup_simple(void *args)
++{
++	struct au_cpup_simple_args *a = args;
++	*a->errp = au_cpup_simple(a->dentry, a->bdst, a->len, a->flags);
++}
++
++int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++		       unsigned int flags)
++{
++	int err, wkq_err;
++	struct dentry *parent;
++	struct inode *h_dir;
++
++	parent = dget_parent(dentry);
++	h_dir = au_h_iptr(parent->d_inode, bdst);
++	if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
++	    && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode))
++		err = au_cpup_simple(dentry, bdst, len, flags);
++	else {
++		struct au_cpup_simple_args args = {
++			.errp		= &err,
++			.dentry		= dentry,
++			.bdst		= bdst,
++			.len		= len,
++			.flags		= flags
++		};
++		wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++
++	dput(parent);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * copyup the deleted file for writing.
++ */
++static int au_do_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst,
++			 struct dentry *wh_dentry, struct file *file,
++			 loff_t len)
++{
++	int err;
++	aufs_bindex_t bstart;
++	struct au_dinfo *dinfo;
++	struct dentry *h_d_dst, *h_d_start;
++	struct au_hdentry *hdp;
++
++	dinfo = au_di(dentry);
++	AuRwMustWriteLock(&dinfo->di_rwsem);
++
++	bstart = dinfo->di_bstart;
++	hdp = dinfo->di_hdentry;
++	h_d_dst = hdp[0 + bdst].hd_dentry;
++	dinfo->di_bstart = bdst;
++	hdp[0 + bdst].hd_dentry = wh_dentry;
++	if (file) {
++		h_d_start = hdp[0 + bstart].hd_dentry;
++		hdp[0 + bstart].hd_dentry = au_hf_top(file)->f_dentry;
++	}
++	err = au_cpup_single(dentry, bdst, bstart, len, !AuCpup_DTIME,
++			     /*h_parent*/NULL);
++	if (file) {
++		if (!err)
++			err = au_reopen_nondir(file);
++		hdp[0 + bstart].hd_dentry = h_d_start;
++	}
++	hdp[0 + bdst].hd_dentry = h_d_dst;
++	dinfo->di_bstart = bstart;
++
++	return err;
++}
++
++static int au_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++		      struct file *file)
++{
++	int err;
++	struct au_dtime dt;
++	struct dentry *parent, *h_parent, *wh_dentry;
++	struct au_branch *br;
++	struct path h_path;
++
++	br = au_sbr(dentry->d_sb, bdst);
++	parent = dget_parent(dentry);
++	h_parent = au_h_dptr(parent, bdst);
++	wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
++	err = PTR_ERR(wh_dentry);
++	if (IS_ERR(wh_dentry))
++		goto out;
++
++	h_path.dentry = h_parent;
++	h_path.mnt = br->br_mnt;
++	au_dtime_store(&dt, parent, &h_path);
++	err = au_do_cpup_wh(dentry, bdst, wh_dentry, file, len);
++	if (unlikely(err))
++		goto out_wh;
++
++	dget(wh_dentry);
++	h_path.dentry = wh_dentry;
++	if (!S_ISDIR(wh_dentry->d_inode->i_mode))
++		err = vfsub_unlink(h_parent->d_inode, &h_path, /*force*/0);
++	else
++		err = vfsub_rmdir(h_parent->d_inode, &h_path);
++	if (unlikely(err)) {
++		AuIOErr("failed remove copied-up tmp file %.*s(%d)\n",
++			AuDLNPair(wh_dentry), err);
++		err = -EIO;
++	}
++	au_dtime_revert(&dt);
++	au_set_hi_wh(dentry->d_inode, bdst, wh_dentry);
++
++out_wh:
++	dput(wh_dentry);
++out:
++	dput(parent);
++	return err;
++}
++
++struct au_cpup_wh_args {
++	int *errp;
++	struct dentry *dentry;
++	aufs_bindex_t bdst;
++	loff_t len;
++	struct file *file;
++};
++
++static void au_call_cpup_wh(void *args)
++{
++	struct au_cpup_wh_args *a = args;
++	*a->errp = au_cpup_wh(a->dentry, a->bdst, a->len, a->file);
++}
++
++int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++		   struct file *file)
++{
++	int err, wkq_err;
++	struct dentry *parent, *h_orph, *h_parent, *h_dentry;
++	struct inode *dir, *h_dir, *h_tmpdir, *h_inode;
++	struct au_wbr *wbr;
++
++	parent = dget_parent(dentry);
++	dir = parent->d_inode;
++	h_orph = NULL;
++	h_parent = NULL;
++	h_dir = au_igrab(au_h_iptr(dir, bdst));
++	h_tmpdir = h_dir;
++	if (!h_dir->i_nlink) {
++		wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
++		h_orph = wbr->wbr_orph;
++
++		h_parent = dget(au_h_dptr(parent, bdst));
++		au_set_h_dptr(parent, bdst, dget(h_orph));
++		h_tmpdir = h_orph->d_inode;
++		au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
++
++		/* this temporary unlock is safe */
++		if (file)
++			h_dentry = au_hf_top(file)->f_dentry;
++		else
++			h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
++		h_inode = h_dentry->d_inode;
++		IMustLock(h_inode);
++		mutex_unlock(&h_inode->i_mutex);
++		mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
++		mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++		/* todo: au_h_open_pre()? */
++	}
++
++	if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
++	    && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode))
++		err = au_cpup_wh(dentry, bdst, len, file);
++	else {
++		struct au_cpup_wh_args args = {
++			.errp	= &err,
++			.dentry	= dentry,
++			.bdst	= bdst,
++			.len	= len,
++			.file	= file
++		};
++		wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++
++	if (h_orph) {
++		mutex_unlock(&h_tmpdir->i_mutex);
++		/* todo: au_h_open_post()? */
++		au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
++		au_set_h_dptr(parent, bdst, h_parent);
++	}
++	iput(h_dir);
++	dput(parent);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * generic routine for both of copy-up and copy-down.
++ */
++/* cf. revalidate function in file.c */
++int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
++	       int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
++			 struct dentry *h_parent, void *arg),
++	       void *arg)
++{
++	int err;
++	struct au_pin pin;
++	struct dentry *d, *parent, *h_parent, *real_parent;
++
++	err = 0;
++	parent = dget_parent(dentry);
++	if (IS_ROOT(parent))
++		goto out;
++
++	au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
++		    au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
++
++	/* do not use au_dpage */
++	real_parent = parent;
++	while (1) {
++		dput(parent);
++		parent = dget_parent(dentry);
++		h_parent = au_h_dptr(parent, bdst);
++		if (h_parent)
++			goto out; /* success */
++
++		/* find top dir which is necessary to cpup */
++		do {
++			d = parent;
++			dput(parent);
++			parent = dget_parent(d);
++			di_read_lock_parent3(parent, !AuLock_IR);
++			h_parent = au_h_dptr(parent, bdst);
++			di_read_unlock(parent, !AuLock_IR);
++		} while (!h_parent);
++
++		if (d != real_parent)
++			di_write_lock_child3(d);
++
++		/* somebody else might create while we were sleeping */
++		if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) {
++			if (au_h_dptr(d, bdst))
++				au_update_dbstart(d);
++
++			au_pin_set_dentry(&pin, d);
++			err = au_do_pin(&pin);
++			if (!err) {
++				err = cp(d, bdst, h_parent, arg);
++				au_unpin(&pin);
++			}
++		}
++
++		if (d != real_parent)
++			di_write_unlock(d);
++		if (unlikely(err))
++			break;
++	}
++
++out:
++	dput(parent);
++	return err;
++}
++
++static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
++		       struct dentry *h_parent __maybe_unused ,
++		       void *arg __maybe_unused)
++{
++	return au_sio_cpup_simple(dentry, bdst, -1, AuCpup_DTIME);
++}
++
++int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
++{
++	return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
++}
++
++int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
++{
++	int err;
++	struct dentry *parent;
++	struct inode *dir;
++
++	parent = dget_parent(dentry);
++	dir = parent->d_inode;
++	err = 0;
++	if (au_h_iptr(dir, bdst))
++		goto out;
++
++	di_read_unlock(parent, AuLock_IR);
++	di_write_lock_parent(parent);
++	/* someone else might change our inode while we were sleeping */
++	if (!au_h_iptr(dir, bdst))
++		err = au_cpup_dirs(dentry, bdst);
++	di_downgrade_lock(parent, AuLock_IR);
++
++out:
++	dput(parent);
++	return err;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/cpup.h	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,81 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * copy-up/down functions
++ */
++
++#ifndef __AUFS_CPUP_H__
++#define __AUFS_CPUP_H__
++
++#ifdef __KERNEL__
++
++#include <linux/path.h>
++
++struct inode;
++struct file;
++
++void au_cpup_attr_flags(struct inode *dst, struct inode *src);
++void au_cpup_attr_timesizes(struct inode *inode);
++void au_cpup_attr_nlink(struct inode *inode, int force);
++void au_cpup_attr_changeable(struct inode *inode);
++void au_cpup_igen(struct inode *inode, struct inode *h_inode);
++void au_cpup_attr_all(struct inode *inode, int force);
++
++/* ---------------------------------------------------------------------- */
++
++/* cpup flags */
++#define AuCpup_DTIME	1		/* do dtime_store/revert */
++#define AuCpup_KEEPLINO	(1 << 1)	/* do not clear the lower xino,
++					   for link(2) */
++#define au_ftest_cpup(flags, name)	((flags) & AuCpup_##name)
++#define au_fset_cpup(flags, name) \
++	do { (flags) |= AuCpup_##name; } while (0)
++#define au_fclr_cpup(flags, name) \
++	do { (flags) &= ~AuCpup_##name; } while (0)
++
++int au_copy_file(struct file *dst, struct file *src, loff_t len);
++int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
++		       aufs_bindex_t bsrc, loff_t len, unsigned int flags,
++		       struct dentry *dst_parent);
++int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++		       unsigned int flags);
++int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
++		   struct file *file);
++
++int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
++	       int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
++			 struct dentry *h_parent, void *arg),
++	       void *arg);
++int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
++int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
++
++/* ---------------------------------------------------------------------- */
++
++/* keep timestamps when copyup */
++struct au_dtime {
++	struct dentry *dt_dentry;
++	struct path dt_h_path;
++	struct timespec dt_atime, dt_mtime;
++};
++void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
++		    struct path *h_path);
++void au_dtime_revert(struct au_dtime *dt);
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_CPUP_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/dbgaufs.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,334 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * debugfs interface
++ */
++
++#include <linux/debugfs.h>
++#include "aufs.h"
++
++#ifndef CONFIG_SYSFS
++#error DEBUG_FS depends upon SYSFS
++#endif
++
++static struct dentry *dbgaufs;
++static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
++
++/* 20 is max digits length of ulong 64 */
++struct dbgaufs_arg {
++	int n;
++	char a[20 * 4];
++};
++
++/*
++ * common function for all XINO files
++ */
++static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
++			      struct file *file)
++{
++	kfree(file->private_data);
++	return 0;
++}
++
++static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
++{
++	int err;
++	struct kstat st;
++	struct dbgaufs_arg *p;
++
++	err = -ENOMEM;
++	p = kmalloc(sizeof(*p), GFP_NOFS);
++	if (unlikely(!p))
++		goto out;
++
++	err = 0;
++	p->n = 0;
++	file->private_data = p;
++	if (!xf)
++		goto out;
++
++	err = vfs_getattr(xf->f_vfsmnt, xf->f_dentry, &st);
++	if (!err) {
++		if (do_fcnt)
++			p->n = snprintf
++				(p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
++				 (long)file_count(xf), st.blocks, st.blksize,
++				 (long long)st.size);
++		else
++			p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
++					st.blocks, st.blksize,
++					(long long)st.size);
++		AuDebugOn(p->n >= sizeof(p->a));
++	} else {
++		p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
++		err = 0;
++	}
++
++out:
++	return err;
++
++}
++
++static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
++			       size_t count, loff_t *ppos)
++{
++	struct dbgaufs_arg *p;
++
++	p = file->private_data;
++	return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int dbgaufs_xib_open(struct inode *inode, struct file *file)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++	struct super_block *sb;
++
++	sbinfo = inode->i_private;
++	sb = sbinfo->si_sb;
++	si_noflush_read_lock(sb);
++	err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
++	si_read_unlock(sb);
++	return err;
++}
++
++static const struct file_operations dbgaufs_xib_fop = {
++	.owner		= THIS_MODULE,
++	.open		= dbgaufs_xib_open,
++	.release	= dbgaufs_xi_release,
++	.read		= dbgaufs_xi_read
++};
++
++/* ---------------------------------------------------------------------- */
++
++#define DbgaufsXi_PREFIX "xi"
++
++static int dbgaufs_xino_open(struct inode *inode, struct file *file)
++{
++	int err;
++	long l;
++	struct au_sbinfo *sbinfo;
++	struct super_block *sb;
++	struct file *xf;
++	struct qstr *name;
++
++	err = -ENOENT;
++	xf = NULL;
++	name = &file->f_dentry->d_name;
++	if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
++		     || memcmp(name->name, DbgaufsXi_PREFIX,
++			       sizeof(DbgaufsXi_PREFIX) - 1)))
++		goto out;
++	err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
++	if (unlikely(err))
++		goto out;
++
++	sbinfo = inode->i_private;
++	sb = sbinfo->si_sb;
++	si_noflush_read_lock(sb);
++	if (l <= au_sbend(sb)) {
++		xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
++		err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
++	} else
++		err = -ENOENT;
++	si_read_unlock(sb);
++
++out:
++	return err;
++}
++
++static const struct file_operations dbgaufs_xino_fop = {
++	.owner		= THIS_MODULE,
++	.open		= dbgaufs_xino_open,
++	.release	= dbgaufs_xi_release,
++	.read		= dbgaufs_xi_read
++};
++
++void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
++{
++	aufs_bindex_t bend;
++	struct au_branch *br;
++	struct au_xino_file *xi;
++
++	if (!au_sbi(sb)->si_dbgaufs)
++		return;
++
++	bend = au_sbend(sb);
++	for (; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		xi = &br->br_xino;
++		if (xi->xi_dbgaufs) {
++			debugfs_remove(xi->xi_dbgaufs);
++			xi->xi_dbgaufs = NULL;
++		}
++	}
++}
++
++void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
++{
++	struct au_sbinfo *sbinfo;
++	struct dentry *parent;
++	struct au_branch *br;
++	struct au_xino_file *xi;
++	aufs_bindex_t bend;
++	char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
++
++	sbinfo = au_sbi(sb);
++	parent = sbinfo->si_dbgaufs;
++	if (!parent)
++		return;
++
++	bend = au_sbend(sb);
++	for (; bindex <= bend; bindex++) {
++		snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
++		br = au_sbr(sb, bindex);
++		xi = &br->br_xino;
++		AuDebugOn(xi->xi_dbgaufs);
++		xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
++						     sbinfo, &dbgaufs_xino_fop);
++		/* ignore an error */
++		if (unlikely(!xi->xi_dbgaufs))
++			AuWarn1("failed %s under debugfs\n", name);
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_EXPORT
++static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++	struct super_block *sb;
++
++	sbinfo = inode->i_private;
++	sb = sbinfo->si_sb;
++	si_noflush_read_lock(sb);
++	err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
++	si_read_unlock(sb);
++	return err;
++}
++
++static const struct file_operations dbgaufs_xigen_fop = {
++	.owner		= THIS_MODULE,
++	.open		= dbgaufs_xigen_open,
++	.release	= dbgaufs_xi_release,
++	.read		= dbgaufs_xi_read
++};
++
++static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
++{
++	int err;
++
++	/*
++	 * This function is a dynamic '__init' fucntion actually,
++	 * so the tiny check for si_rwsem is unnecessary.
++	 */
++	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
++
++	err = -EIO;
++	sbinfo->si_dbgaufs_xigen = debugfs_create_file
++		("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
++		 &dbgaufs_xigen_fop);
++	if (sbinfo->si_dbgaufs_xigen)
++		err = 0;
++
++	return err;
++}
++#else
++static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
++{
++	return 0;
++}
++#endif /* CONFIG_AUFS_EXPORT */
++
++/* ---------------------------------------------------------------------- */
++
++void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
++{
++	/*
++	 * This function is a dynamic '__init' fucntion actually,
++	 * so the tiny check for si_rwsem is unnecessary.
++	 */
++	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
++
++	debugfs_remove_recursive(sbinfo->si_dbgaufs);
++	sbinfo->si_dbgaufs = NULL;
++	kobject_put(&sbinfo->si_kobj);
++}
++
++int dbgaufs_si_init(struct au_sbinfo *sbinfo)
++{
++	int err;
++	char name[SysaufsSiNameLen];
++
++	/*
++	 * This function is a dynamic '__init' fucntion actually,
++	 * so the tiny check for si_rwsem is unnecessary.
++	 */
++	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
++
++	err = -ENOENT;
++	if (!dbgaufs) {
++		AuErr1("/debug/aufs is uninitialized\n");
++		goto out;
++	}
++
++	err = -EIO;
++	sysaufs_name(sbinfo, name);
++	sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
++	if (unlikely(!sbinfo->si_dbgaufs))
++		goto out;
++	kobject_get(&sbinfo->si_kobj);
++
++	sbinfo->si_dbgaufs_xib = debugfs_create_file
++		("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
++		 &dbgaufs_xib_fop);
++	if (unlikely(!sbinfo->si_dbgaufs_xib))
++		goto out_dir;
++
++	err = dbgaufs_xigen_init(sbinfo);
++	if (!err)
++		goto out; /* success */
++
++out_dir:
++	dbgaufs_si_fin(sbinfo);
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void dbgaufs_fin(void)
++{
++	debugfs_remove(dbgaufs);
++}
++
++int __init dbgaufs_init(void)
++{
++	int err;
++
++	err = -EIO;
++	dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
++	if (dbgaufs)
++		err = 0;
++	return err;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/dbgaufs.h	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,49 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * debugfs interface
++ */
++
++#ifndef __DBGAUFS_H__
++#define __DBGAUFS_H__
++
++#ifdef __KERNEL__
++
++struct super_block;
++struct au_sbinfo;
++
++#ifdef CONFIG_DEBUG_FS
++/* dbgaufs.c */
++void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
++void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
++void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
++int dbgaufs_si_init(struct au_sbinfo *sbinfo);
++void dbgaufs_fin(void);
++int __init dbgaufs_init(void);
++#else
++AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
++AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
++AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
++AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
++AuStubVoid(dbgaufs_fin, void)
++AuStubInt0(__init dbgaufs_init, void)
++#endif /* CONFIG_DEBUG_FS */
++
++#endif /* __KERNEL__ */
++#endif /* __DBGAUFS_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/dcsub.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,243 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * sub-routines for dentry cache
++ */
++
++#include "aufs.h"
++
++static void au_dpage_free(struct au_dpage *dpage)
++{
++	int i;
++	struct dentry **p;
++
++	p = dpage->dentries;
++	for (i = 0; i < dpage->ndentry; i++)
++		dput(*p++);
++	free_page((unsigned long)dpage->dentries);
++}
++
++int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
++{
++	int err;
++	void *p;
++
++	err = -ENOMEM;
++	dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
++	if (unlikely(!dpages->dpages))
++		goto out;
++
++	p = (void *)__get_free_page(gfp);
++	if (unlikely(!p))
++		goto out_dpages;
++
++	dpages->dpages[0].ndentry = 0;
++	dpages->dpages[0].dentries = p;
++	dpages->ndpage = 1;
++	return 0; /* success */
++
++out_dpages:
++	kfree(dpages->dpages);
++out:
++	return err;
++}
++
++void au_dpages_free(struct au_dcsub_pages *dpages)
++{
++	int i;
++	struct au_dpage *p;
++
++	p = dpages->dpages;
++	for (i = 0; i < dpages->ndpage; i++)
++		au_dpage_free(p++);
++	kfree(dpages->dpages);
++}
++
++static int au_dpages_append(struct au_dcsub_pages *dpages,
++			    struct dentry *dentry, gfp_t gfp)
++{
++	int err, sz;
++	struct au_dpage *dpage;
++	void *p;
++
++	dpage = dpages->dpages + dpages->ndpage - 1;
++	sz = PAGE_SIZE / sizeof(dentry);
++	if (unlikely(dpage->ndentry >= sz)) {
++		AuLabel(new dpage);
++		err = -ENOMEM;
++		sz = dpages->ndpage * sizeof(*dpages->dpages);
++		p = au_kzrealloc(dpages->dpages, sz,
++				 sz + sizeof(*dpages->dpages), gfp);
++		if (unlikely(!p))
++			goto out;
++
++		dpages->dpages = p;
++		dpage = dpages->dpages + dpages->ndpage;
++		p = (void *)__get_free_page(gfp);
++		if (unlikely(!p))
++			goto out;
++
++		dpage->ndentry = 0;
++		dpage->dentries = p;
++		dpages->ndpage++;
++	}
++
++	AuDebugOn(!dentry->d_count);
++	dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
++	return 0; /* success */
++
++out:
++	return err;
++}
++
++int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
++		   au_dpages_test test, void *arg)
++{
++	int err;
++	struct dentry *this_parent;
++	struct list_head *next;
++	struct super_block *sb = root->d_sb;
++
++	err = 0;
++	write_seqlock(&rename_lock);
++	this_parent = root;
++	spin_lock(&this_parent->d_lock);
++repeat:
++	next = this_parent->d_subdirs.next;
++resume:
++	if (this_parent->d_sb == sb
++	    && !IS_ROOT(this_parent)
++	    && au_di(this_parent)
++	    && this_parent->d_count
++	    && (!test || test(this_parent, arg))) {
++		err = au_dpages_append(dpages, this_parent, GFP_ATOMIC);
++		if (unlikely(err))
++			goto out;
++	}
++
++	while (next != &this_parent->d_subdirs) {
++		struct list_head *tmp = next;
++		struct dentry *dentry = list_entry(tmp, struct dentry,
++						   d_u.d_child);
++
++		next = tmp->next;
++		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
++		if (dentry->d_count) {
++			if (!list_empty(&dentry->d_subdirs)) {
++				spin_unlock(&this_parent->d_lock);
++				spin_release(&dentry->d_lock.dep_map, 1,
++					     _RET_IP_);
++				this_parent = dentry;
++				spin_acquire(&this_parent->d_lock.dep_map, 0, 1,
++					     _RET_IP_);
++				goto repeat;
++			}
++			if (dentry->d_sb == sb
++			    && au_di(dentry)
++			    && (!test || test(dentry, arg)))
++				err = au_dpages_append(dpages, dentry,
++						       GFP_ATOMIC);
++		}
++		spin_unlock(&dentry->d_lock);
++		if (unlikely(err))
++			goto out;
++	}
++
++	if (this_parent != root) {
++		struct dentry *tmp;
++		struct dentry *child;
++
++		tmp = this_parent->d_parent;
++		rcu_read_lock();
++		spin_unlock(&this_parent->d_lock);
++		child = this_parent;
++		this_parent = tmp;
++		spin_lock(&this_parent->d_lock);
++		rcu_read_unlock();
++		next = child->d_u.d_child.next;
++		goto resume;
++	}
++
++out:
++	spin_unlock(&this_parent->d_lock);
++	write_sequnlock(&rename_lock);
++	return err;
++}
++
++int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
++		       int do_include, au_dpages_test test, void *arg)
++{
++	int err;
++
++	err = 0;
++	write_seqlock(&rename_lock);
++	spin_lock(&dentry->d_lock);
++	if (do_include
++	    && dentry->d_count
++	    && (!test || test(dentry, arg)))
++		err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
++	spin_unlock(&dentry->d_lock);
++	if (unlikely(err))
++		goto out;
++
++	/*
++	 * vfsmount_lock is unnecessary since this is a traverse in a single
++	 * mount
++	 */
++	while (!IS_ROOT(dentry)) {
++		dentry = dentry->d_parent; /* rename_lock is locked */
++		spin_lock(&dentry->d_lock);
++		if (dentry->d_count
++		    && (!test || test(dentry, arg)))
++			err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
++		spin_unlock(&dentry->d_lock);
++		if (unlikely(err))
++			break;
++	}
++
++out:
++	write_sequnlock(&rename_lock);
++	return err;
++}
++
++static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
++{
++	return au_di(dentry) && dentry->d_sb == arg;
++}
++
++int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
++			    struct dentry *dentry, int do_include)
++{
++	return au_dcsub_pages_rev(dpages, dentry, do_include,
++				  au_dcsub_dpages_aufs, dentry->d_sb);
++}
++
++int au_test_subdir(struct dentry *d1, struct dentry *d2)
++{
++	struct path path[2] = {
++		{
++			.dentry = d1
++		},
++		{
++			.dentry = d2
++		}
++	};
++
++	return path_is_under(path + 0, path + 1);
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/dcsub.h	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,94 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * sub-routines for dentry cache
++ */
++
++#ifndef __AUFS_DCSUB_H__
++#define __AUFS_DCSUB_H__
++
++#ifdef __KERNEL__
++
++#include <linux/dcache.h>
++#include <linux/fs.h>
++
++struct dentry;
++
++struct au_dpage {
++	int ndentry;
++	struct dentry **dentries;
++};
++
++struct au_dcsub_pages {
++	int ndpage;
++	struct au_dpage *dpages;
++};
++
++/* ---------------------------------------------------------------------- */
++
++/* dcsub.c */
++int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
++void au_dpages_free(struct au_dcsub_pages *dpages);
++typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
++int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
++		   au_dpages_test test, void *arg);
++int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
++		       int do_include, au_dpages_test test, void *arg);
++int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
++			    struct dentry *dentry, int do_include);
++int au_test_subdir(struct dentry *d1, struct dentry *d2);
++
++/* ---------------------------------------------------------------------- */
++
++static inline int au_d_hashed_positive(struct dentry *d)
++{
++	int err;
++	struct inode *inode = d->d_inode;
++	err = 0;
++	if (unlikely(d_unhashed(d) || !inode || !inode->i_nlink))
++		err = -ENOENT;
++	return err;
++}
++
++static inline int au_d_alive(struct dentry *d)
++{
++	int err;
++	struct inode *inode;
++	err = 0;
++	if (!IS_ROOT(d))
++		err = au_d_hashed_positive(d);
++	else {
++		inode = d->d_inode;
++		if (unlikely(d_unlinked(d) || !inode || !inode->i_nlink))
++			err = -ENOENT;
++	}
++	return err;
++}
++
++static inline int au_alive_dir(struct dentry *d)
++{
++	int err;
++	err = au_d_alive(d);
++	if (unlikely(err || IS_DEADDIR(d->d_inode)))
++		err = -ENOENT;
++	return err;
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_DCSUB_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/debug.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,489 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * debug print functions
++ */
++
++#include <linux/vt_kern.h>
++#include "aufs.h"
++
++int aufs_debug;
++MODULE_PARM_DESC(debug, "debug print");
++module_param_named(debug, aufs_debug, int, S_IRUGO | S_IWUSR | S_IWGRP);
++
++char *au_plevel = KERN_DEBUG;
++#define dpri(fmt, ...) do {					\
++	if ((au_plevel						\
++	     && strcmp(au_plevel, KERN_DEBUG))			\
++	    || au_debug_test())					\
++		printk("%s" fmt, au_plevel, ##__VA_ARGS__);	\
++} while (0)
++
++/* ---------------------------------------------------------------------- */
++
++void au_dpri_whlist(struct au_nhash *whlist)
++{
++	unsigned long ul, n;
++	struct hlist_head *head;
++	struct au_vdir_wh *tpos;
++	struct hlist_node *pos;
++
++	n = whlist->nh_num;
++	head = whlist->nh_head;
++	for (ul = 0; ul < n; ul++) {
++		hlist_for_each_entry(tpos, pos, head, wh_hash)
++			dpri("b%d, %.*s, %d\n",
++			     tpos->wh_bindex,
++			     tpos->wh_str.len, tpos->wh_str.name,
++			     tpos->wh_str.len);
++		head++;
++	}
++}
++
++void au_dpri_vdir(struct au_vdir *vdir)
++{
++	unsigned long ul;
++	union au_vdir_deblk_p p;
++	unsigned char *o;
++
++	if (!vdir || IS_ERR(vdir)) {
++		dpri("err %ld\n", PTR_ERR(vdir));
++		return;
++	}
++
++	dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
++	     vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
++	     vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
++	for (ul = 0; ul < vdir->vd_nblk; ul++) {
++		p.deblk = vdir->vd_deblk[ul];
++		o = p.deblk;
++		dpri("[%lu]: %p\n", ul, o);
++	}
++}
++
++static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
++			struct dentry *wh)
++{
++	char *n = NULL;
++	int l = 0;
++
++	if (!inode || IS_ERR(inode)) {
++		dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
++		return -1;
++	}
++
++	/* the type of i_blocks depends upon CONFIG_LSF */
++	BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
++		     && sizeof(inode->i_blocks) != sizeof(u64));
++	if (wh) {
++		n = (void *)wh->d_name.name;
++		l = wh->d_name.len;
++	}
++
++	dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
++	     " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
++	     bindex, inode,
++	     inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
++	     atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
++	     i_size_read(inode), (unsigned long long)inode->i_blocks,
++	     hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
++	     inode->i_mapping ? inode->i_mapping->nrpages : 0,
++	     inode->i_state, inode->i_flags, inode->i_version,
++	     inode->i_generation,
++	     l ? ", wh " : "", l, n);
++	return 0;
++}
++
++void au_dpri_inode(struct inode *inode)
++{
++	struct au_iinfo *iinfo;
++	aufs_bindex_t bindex;
++	int err, hn;
++
++	err = do_pri_inode(-1, inode, -1, NULL);
++	if (err || !au_test_aufs(inode->i_sb))
++		return;
++
++	iinfo = au_ii(inode);
++	if (!iinfo)
++		return;
++	dpri("i-1: bstart %d, bend %d, gen %d\n",
++	     iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode));
++	if (iinfo->ii_bstart < 0)
++		return;
++	hn = 0;
++	for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) {
++		hn = !!au_hn(iinfo->ii_hinode + bindex);
++		do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn,
++			     iinfo->ii_hinode[0 + bindex].hi_whdentry);
++	}
++}
++
++void au_dpri_dalias(struct inode *inode)
++{
++	struct dentry *d;
++
++	spin_lock(&inode->i_lock);
++	list_for_each_entry(d, &inode->i_dentry, d_alias)
++		au_dpri_dentry(d);
++	spin_unlock(&inode->i_lock);
++}
++
++static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
++{
++	struct dentry *wh = NULL;
++	int hn;
++
++	if (!dentry || IS_ERR(dentry)) {
++		dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
++		return -1;
++	}
++	/* do not call dget_parent() here */
++	/* note: access d_xxx without d_lock */
++	dpri("d%d: %.*s?/%.*s, %s, cnt %d, flags 0x%x\n",
++	     bindex,
++	     AuDLNPair(dentry->d_parent), AuDLNPair(dentry),
++	     dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
++	     dentry->d_count, dentry->d_flags);
++	hn = -1;
++	if (bindex >= 0 && dentry->d_inode && au_test_aufs(dentry->d_sb)) {
++		struct au_iinfo *iinfo = au_ii(dentry->d_inode);
++		if (iinfo) {
++			hn = !!au_hn(iinfo->ii_hinode + bindex);
++			wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
++		}
++	}
++	do_pri_inode(bindex, dentry->d_inode, hn, wh);
++	return 0;
++}
++
++void au_dpri_dentry(struct dentry *dentry)
++{
++	struct au_dinfo *dinfo;
++	aufs_bindex_t bindex;
++	int err;
++	struct au_hdentry *hdp;
++
++	err = do_pri_dentry(-1, dentry);
++	if (err || !au_test_aufs(dentry->d_sb))
++		return;
++
++	dinfo = au_di(dentry);
++	if (!dinfo)
++		return;
++	dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d\n",
++	     dinfo->di_bstart, dinfo->di_bend,
++	     dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry));
++	if (dinfo->di_bstart < 0)
++		return;
++	hdp = dinfo->di_hdentry;
++	for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
++		do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry);
++}
++
++static int do_pri_file(aufs_bindex_t bindex, struct file *file)
++{
++	char a[32];
++
++	if (!file || IS_ERR(file)) {
++		dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
++		return -1;
++	}
++	a[0] = 0;
++	if (bindex < 0
++	    && file->f_dentry
++	    && au_test_aufs(file->f_dentry->d_sb)
++	    && au_fi(file))
++		snprintf(a, sizeof(a), ", gen %d, mmapped %d",
++			 au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
++	dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
++	     bindex, file->f_mode, file->f_flags, (long)file_count(file),
++	     file->f_version, file->f_pos, a);
++	if (file->f_dentry)
++		do_pri_dentry(bindex, file->f_dentry);
++	return 0;
++}
++
++void au_dpri_file(struct file *file)
++{
++	struct au_finfo *finfo;
++	struct au_fidir *fidir;
++	struct au_hfile *hfile;
++	aufs_bindex_t bindex;
++	int err;
++
++	err = do_pri_file(-1, file);
++	if (err || !file->f_dentry || !au_test_aufs(file->f_dentry->d_sb))
++		return;
++
++	finfo = au_fi(file);
++	if (!finfo)
++		return;
++	if (finfo->fi_btop < 0)
++		return;
++	fidir = finfo->fi_hdir;
++	if (!fidir)
++		do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
++	else
++		for (bindex = finfo->fi_btop;
++		     bindex >= 0 && bindex <= fidir->fd_bbot;
++		     bindex++) {
++			hfile = fidir->fd_hfile + bindex;
++			do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
++		}
++}
++
++static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
++{
++	struct vfsmount *mnt;
++	struct super_block *sb;
++
++	if (!br || IS_ERR(br))
++		goto out;
++	mnt = br->br_mnt;
++	if (!mnt || IS_ERR(mnt))
++		goto out;
++	sb = mnt->mnt_sb;
++	if (!sb || IS_ERR(sb))
++		goto out;
++
++	dpri("s%d: {perm 0x%x, id %d, cnt %d, wbr %p}, "
++	     "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
++	     "xino %d\n",
++	     bindex, br->br_perm, br->br_id, atomic_read(&br->br_count),
++	     br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
++	     sb->s_flags, sb->s_count,
++	     atomic_read(&sb->s_active), !!br->br_xino.xi_file);
++	return 0;
++
++out:
++	dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
++	return -1;
++}
++
++void au_dpri_sb(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++	aufs_bindex_t bindex;
++	int err;
++	/* to reuduce stack size */
++	struct {
++		struct vfsmount mnt;
++		struct au_branch fake;
++	} *a;
++
++	/* this function can be called from magic sysrq */
++	a = kzalloc(sizeof(*a), GFP_ATOMIC);
++	if (unlikely(!a)) {
++		dpri("no memory\n");
++		return;
++	}
++
++	a->mnt.mnt_sb = sb;
++	a->fake.br_perm = 0;
++	a->fake.br_mnt = &a->mnt;
++	a->fake.br_xino.xi_file = NULL;
++	atomic_set(&a->fake.br_count, 0);
++	smp_mb(); /* atomic_set */
++	err = do_pri_br(-1, &a->fake);
++	kfree(a);
++	dpri("dev 0x%x\n", sb->s_dev);
++	if (err || !au_test_aufs(sb))
++		return;
++
++	sbinfo = au_sbi(sb);
++	if (!sbinfo)
++		return;
++	dpri("nw %d, gen %u, kobj %d\n",
++	     atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
++	     atomic_read(&sbinfo->si_kobj.kref.refcount));
++	for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
++		do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_dbg_sleep_jiffy(int jiffy)
++{
++	while (jiffy)
++		jiffy = schedule_timeout_uninterruptible(jiffy);
++}
++
++void au_dbg_iattr(struct iattr *ia)
++{
++#define AuBit(name)	if (ia->ia_valid & ATTR_ ## name) \
++				dpri(#name "\n")
++	AuBit(MODE);
++	AuBit(UID);
++	AuBit(GID);
++	AuBit(SIZE);
++	AuBit(ATIME);
++	AuBit(MTIME);
++	AuBit(CTIME);
++	AuBit(ATIME_SET);
++	AuBit(MTIME_SET);
++	AuBit(FORCE);
++	AuBit(ATTR_FLAG);
++	AuBit(KILL_SUID);
++	AuBit(KILL_SGID);
++	AuBit(FILE);
++	AuBit(KILL_PRIV);
++	AuBit(OPEN);
++	AuBit(TIMES_SET);
++#undef	AuBit
++	dpri("ia_file %p\n", ia->ia_file);
++}
++
++/* ---------------------------------------------------------------------- */
++
++void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
++{
++	struct inode *h_inode, *inode = dentry->d_inode;
++	struct dentry *h_dentry;
++	aufs_bindex_t bindex, bend, bi;
++
++	if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
++		return;
++
++	bend = au_dbend(dentry);
++	bi = au_ibend(inode);
++	if (bi < bend)
++		bend = bi;
++	bindex = au_dbstart(dentry);
++	bi = au_ibstart(inode);
++	if (bi > bindex)
++		bindex = bi;
++
++	for (; bindex <= bend; bindex++) {
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (!h_dentry)
++			continue;
++		h_inode = au_h_iptr(inode, bindex);
++		if (unlikely(h_inode != h_dentry->d_inode)) {
++			int old = au_debug_test();
++			if (!old)
++				au_debug(1);
++			AuDbg("b%d, %s:%d\n", bindex, func, line);
++			AuDbgDentry(dentry);
++			AuDbgInode(inode);
++			if (!old)
++				au_debug(0);
++			BUG();
++		}
++	}
++}
++
++void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen)
++{
++	struct dentry *parent;
++
++	parent = dget_parent(dentry);
++	AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
++	AuDebugOn(IS_ROOT(dentry));
++	AuDebugOn(au_digen_test(parent, sigen));
++	dput(parent);
++}
++
++void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen)
++{
++	struct dentry *parent;
++	struct inode *inode;
++
++	parent = dget_parent(dentry);
++	inode = dentry->d_inode;
++	AuDebugOn(inode && S_ISDIR(dentry->d_inode->i_mode));
++	AuDebugOn(au_digen_test(parent, sigen));
++	dput(parent);
++}
++
++void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
++{
++	int err, i, j;
++	struct au_dcsub_pages dpages;
++	struct au_dpage *dpage;
++	struct dentry **dentries;
++
++	err = au_dpages_init(&dpages, GFP_NOFS);
++	AuDebugOn(err);
++	err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
++	AuDebugOn(err);
++	for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
++		dpage = dpages.dpages + i;
++		dentries = dpage->dentries;
++		for (j = dpage->ndentry - 1; !err && j >= 0; j--)
++			AuDebugOn(au_digen_test(dentries[j], sigen));
++	}
++	au_dpages_free(&dpages);
++}
++
++void au_dbg_verify_kthread(void)
++{
++	if (au_wkq_test()) {
++		au_dbg_blocked();
++		/*
++		 * It may be recursive, but udba=notify between two aufs mounts,
++		 * where a single ro branch is shared, is not a problem.
++		 */
++		/* WARN_ON(1); */
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_debug_sbinfo_init(struct au_sbinfo *sbinfo __maybe_unused)
++{
++#ifdef AuForceNoPlink
++	au_opt_clr(sbinfo->si_mntflags, PLINK);
++#endif
++#ifdef AuForceNoXino
++	au_opt_clr(sbinfo->si_mntflags, XINO);
++#endif
++#ifdef AuForceNoRefrof
++	au_opt_clr(sbinfo->si_mntflags, REFROF);
++#endif
++#ifdef AuForceHnotify
++	au_opt_set_udba(sbinfo->si_mntflags, UDBA_HNOTIFY);
++#endif
++#ifdef AuForceRd0
++	sbinfo->si_rdblk = 0;
++	sbinfo->si_rdhash = 0;
++#endif
++}
++
++int __init au_debug_init(void)
++{
++	aufs_bindex_t bindex;
++	struct au_vdir_destr destr;
++
++	bindex = -1;
++	AuDebugOn(bindex >= 0);
++
++	destr.len = -1;
++	AuDebugOn(destr.len < NAME_MAX);
++
++#ifdef CONFIG_4KSTACKS
++	pr_warning("CONFIG_4KSTACKS is defined.\n");
++#endif
++
++#ifdef AuForceNoBrs
++	sysaufs_brs = 0;
++#endif
++
++	return 0;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/debug.h	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,243 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * debug print functions
++ */
++
++#ifndef __AUFS_DEBUG_H__
++#define __AUFS_DEBUG_H__
++
++#ifdef __KERNEL__
++
++#include <asm/system.h>
++#include <linux/module.h>
++#include <linux/kallsyms.h>
++#include <linux/sysrq.h>
++
++#ifdef CONFIG_AUFS_DEBUG
++#define AuDebugOn(a)		BUG_ON(a)
++
++/* module parameter */
++extern int aufs_debug;
++static inline void au_debug(int n)
++{
++	aufs_debug = n;
++	smp_mb();
++}
++
++static inline int au_debug_test(void)
++{
++	return aufs_debug;
++}
++#else
++#define AuDebugOn(a)		do {} while (0)
++AuStubVoid(au_debug, int n)
++AuStubInt0(au_debug_test, void)
++#endif /* CONFIG_AUFS_DEBUG */
++
++/* ---------------------------------------------------------------------- */
++
++/* debug print */
++
++#define AuDbg(fmt, ...) do { \
++	if (au_debug_test()) \
++		pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
++} while (0)
++#define AuLabel(l)		AuDbg(#l "\n")
++#define AuIOErr(fmt, ...)	pr_err("I/O Error, " fmt, ##__VA_ARGS__)
++#define AuWarn1(fmt, ...) do { \
++	static unsigned char _c; \
++	if (!_c++) \
++		pr_warning(fmt, ##__VA_ARGS__); \
++} while (0)
++
++#define AuErr1(fmt, ...) do { \
++	static unsigned char _c; \
++	if (!_c++) \
++		pr_err(fmt, ##__VA_ARGS__); \
++} while (0)
++
++#define AuIOErr1(fmt, ...) do { \
++	static unsigned char _c; \
++	if (!_c++) \
++		AuIOErr(fmt, ##__VA_ARGS__); \
++} while (0)
++
++#define AuUnsupportMsg	"This operation is not supported." \
++			" Please report this application to aufs-users ML."
++#define AuUnsupport(fmt, ...) do { \
++	pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
++	dump_stack(); \
++} while (0)
++
++#define AuTraceErr(e) do { \
++	if (unlikely((e) < 0)) \
++		AuDbg("err %d\n", (int)(e)); \
++} while (0)
++
++#define AuTraceErrPtr(p) do { \
++	if (IS_ERR(p)) \
++		AuDbg("err %ld\n", PTR_ERR(p)); \
++} while (0)
++
++/* dirty macros for debug print, use with "%.*s" and caution */
++#define AuLNPair(qstr)		(qstr)->len, (qstr)->name
++#define AuDLNPair(d)		AuLNPair(&(d)->d_name)
++
++/* ---------------------------------------------------------------------- */
++
++struct au_sbinfo;
++struct au_finfo;
++struct dentry;
++#ifdef CONFIG_AUFS_DEBUG
++extern char *au_plevel;
++struct au_nhash;
++void au_dpri_whlist(struct au_nhash *whlist);
++struct au_vdir;
++void au_dpri_vdir(struct au_vdir *vdir);
++struct inode;
++void au_dpri_inode(struct inode *inode);
++void au_dpri_dalias(struct inode *inode);
++void au_dpri_dentry(struct dentry *dentry);
++struct file;
++void au_dpri_file(struct file *filp);
++struct super_block;
++void au_dpri_sb(struct super_block *sb);
++
++void au_dbg_sleep_jiffy(int jiffy);
++struct iattr;
++void au_dbg_iattr(struct iattr *ia);
++
++#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
++void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
++void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen);
++void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen);
++void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
++void au_dbg_verify_kthread(void);
++
++int __init au_debug_init(void);
++void au_debug_sbinfo_init(struct au_sbinfo *sbinfo);
++#define AuDbgWhlist(w) do { \
++	AuDbg(#w "\n"); \
++	au_dpri_whlist(w); \
++} while (0)
++
++#define AuDbgVdir(v) do { \
++	AuDbg(#v "\n"); \
++	au_dpri_vdir(v); \
++} while (0)
++
++#define AuDbgInode(i) do { \
++	AuDbg(#i "\n"); \
++	au_dpri_inode(i); \
++} while (0)
++
++#define AuDbgDAlias(i) do { \
++	AuDbg(#i "\n"); \
++	au_dpri_dalias(i); \
++} while (0)
++
++#define AuDbgDentry(d) do { \
++	AuDbg(#d "\n"); \
++	au_dpri_dentry(d); \
++} while (0)
++
++#define AuDbgFile(f) do { \
++	AuDbg(#f "\n"); \
++	au_dpri_file(f); \
++} while (0)
++
++#define AuDbgSb(sb) do { \
++	AuDbg(#sb "\n"); \
++	au_dpri_sb(sb); \
++} while (0)
++
++#define AuDbgSleep(sec) do { \
++	AuDbg("sleep %d sec\n", sec); \
++	ssleep(sec); \
++} while (0)
++
++#define AuDbgSleepJiffy(jiffy) do { \
++	AuDbg("sleep %d jiffies\n", jiffy); \
++	au_dbg_sleep_jiffy(jiffy); \
++} while (0)
++
++#define AuDbgIAttr(ia) do { \
++	AuDbg("ia_valid 0x%x\n", (ia)->ia_valid); \
++	au_dbg_iattr(ia); \
++} while (0)
++
++#define AuDbgSym(addr) do {				\
++	char sym[KSYM_SYMBOL_LEN];			\
++	sprint_symbol(sym, (unsigned long)addr);	\
++	AuDbg("%s\n", sym);				\
++} while (0)
++
++#define AuInfoSym(addr) do {				\
++	char sym[KSYM_SYMBOL_LEN];			\
++	sprint_symbol(sym, (unsigned long)addr);	\
++	AuInfo("%s\n", sym);				\
++} while (0)
++#else
++AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
++AuStubVoid(au_dbg_verify_dir_parent, struct dentry *dentry, unsigned int sigen)
++AuStubVoid(au_dbg_verify_nondir_parent, struct dentry *dentry,
++	   unsigned int sigen)
++AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
++AuStubVoid(au_dbg_verify_kthread, void)
++AuStubInt0(__init au_debug_init, void)
++AuStubVoid(au_debug_sbinfo_init, struct au_sbinfo *sbinfo)
++
++#define AuDbgWhlist(w)		do {} while (0)
++#define AuDbgVdir(v)		do {} while (0)
++#define AuDbgInode(i)		do {} while (0)
++#define AuDbgDAlias(i)		do {} while (0)
++#define AuDbgDentry(d)		do {} while (0)
++#define AuDbgFile(f)		do {} while (0)
++#define AuDbgSb(sb)		do {} while (0)
++#define AuDbgSleep(sec)		do {} while (0)
++#define AuDbgSleepJiffy(jiffy)	do {} while (0)
++#define AuDbgIAttr(ia)		do {} while (0)
++#define AuDbgSym(addr)		do {} while (0)
++#define AuInfoSym(addr)		do {} while (0)
++#endif /* CONFIG_AUFS_DEBUG */
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_MAGIC_SYSRQ
++int __init au_sysrq_init(void);
++void au_sysrq_fin(void);
++
++#ifdef CONFIG_HW_CONSOLE
++#define au_dbg_blocked() do { \
++	WARN_ON(1); \
++	handle_sysrq('w'); \
++} while (0)
++#else
++AuStubVoid(au_dbg_blocked, void)
++#endif
++
++#else
++AuStubInt0(__init au_sysrq_init, void)
++AuStubVoid(au_sysrq_fin, void)
++AuStubVoid(au_dbg_blocked, void)
++#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_DEBUG_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/dentry.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,1140 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * lookup and dentry operations
++ */
++
++#include <linux/namei.h>
++#include "aufs.h"
++
++static void au_h_nd(struct nameidata *h_nd, struct nameidata *nd)
++{
++	if (nd) {
++		*h_nd = *nd;
++
++		/*
++		 * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
++		 * due to whiteout and branch permission.
++		 */
++		h_nd->flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
++				 | LOOKUP_FOLLOW | LOOKUP_EXCL);
++		/* unnecessary? */
++		h_nd->intent.open.file = NULL;
++	} else
++		memset(h_nd, 0, sizeof(*h_nd));
++}
++
++struct au_lkup_one_args {
++	struct dentry **errp;
++	struct qstr *name;
++	struct dentry *h_parent;
++	struct au_branch *br;
++	struct nameidata *nd;
++};
++
++struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
++			   struct au_branch *br, struct nameidata *nd)
++{
++	struct dentry *h_dentry;
++	int err;
++	struct nameidata h_nd;
++
++	if (au_test_fs_null_nd(h_parent->d_sb))
++		return vfsub_lookup_one_len(name->name, h_parent, name->len);
++
++	au_h_nd(&h_nd, nd);
++	h_nd.path.dentry = h_parent;
++	h_nd.path.mnt = br->br_mnt;
++
++	err = vfsub_name_hash(name->name, &h_nd.last, name->len);
++	h_dentry = ERR_PTR(err);
++	if (!err) {
++		path_get(&h_nd.path);
++		h_dentry = vfsub_lookup_hash(&h_nd);
++		path_put(&h_nd.path);
++	}
++
++	AuTraceErrPtr(h_dentry);
++	return h_dentry;
++}
++
++static void au_call_lkup_one(void *args)
++{
++	struct au_lkup_one_args *a = args;
++	*a->errp = au_lkup_one(a->name, a->h_parent, a->br, a->nd);
++}
++
++#define AuLkup_ALLOW_NEG	1
++#define au_ftest_lkup(flags, name)	((flags) & AuLkup_##name)
++#define au_fset_lkup(flags, name) \
++	do { (flags) |= AuLkup_##name; } while (0)
++#define au_fclr_lkup(flags, name) \
++	do { (flags) &= ~AuLkup_##name; } while (0)
++
++struct au_do_lookup_args {
++	unsigned int		flags;
++	mode_t			type;
++	struct nameidata	*nd;
++};
++
++/*
++ * returns positive/negative dentry, NULL or an error.
++ * NULL means whiteout-ed or not-found.
++ */
++static struct dentry*
++au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
++	     aufs_bindex_t bindex, struct qstr *wh_name,
++	     struct au_do_lookup_args *args)
++{
++	struct dentry *h_dentry;
++	struct inode *h_inode, *inode;
++	struct au_branch *br;
++	int wh_found, opq;
++	unsigned char wh_able;
++	const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
++
++	wh_found = 0;
++	br = au_sbr(dentry->d_sb, bindex);
++	wh_able = !!au_br_whable(br->br_perm);
++	if (wh_able)
++		wh_found = au_wh_test(h_parent, wh_name, br, /*try_sio*/0);
++	h_dentry = ERR_PTR(wh_found);
++	if (!wh_found)
++		goto real_lookup;
++	if (unlikely(wh_found < 0))
++		goto out;
++
++	/* We found a whiteout */
++	/* au_set_dbend(dentry, bindex); */
++	au_set_dbwh(dentry, bindex);
++	if (!allow_neg)
++		return NULL; /* success */
++
++real_lookup:
++	h_dentry = au_lkup_one(&dentry->d_name, h_parent, br, args->nd);
++	if (IS_ERR(h_dentry))
++		goto out;
++
++	h_inode = h_dentry->d_inode;
++	if (!h_inode) {
++		if (!allow_neg)
++			goto out_neg;
++	} else if (wh_found
++		   || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
++		goto out_neg;
++
++	if (au_dbend(dentry) <= bindex)
++		au_set_dbend(dentry, bindex);
++	if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
++		au_set_dbstart(dentry, bindex);
++	au_set_h_dptr(dentry, bindex, h_dentry);
++
++	inode = dentry->d_inode;
++	if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able
++	    || (inode && !S_ISDIR(inode->i_mode)))
++		goto out; /* success */
++
++	mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++	opq = au_diropq_test(h_dentry, br);
++	mutex_unlock(&h_inode->i_mutex);
++	if (opq > 0)
++		au_set_dbdiropq(dentry, bindex);
++	else if (unlikely(opq < 0)) {
++		au_set_h_dptr(dentry, bindex, NULL);
++		h_dentry = ERR_PTR(opq);
++	}
++	goto out;
++
++out_neg:
++	dput(h_dentry);
++	h_dentry = NULL;
++out:
++	return h_dentry;
++}
++
++static int au_test_shwh(struct super_block *sb, const struct qstr *name)
++{
++	if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
++		     && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
++		return -EPERM;
++	return 0;
++}
++
++/*
++ * returns the number of lower positive dentries,
++ * otherwise an error.
++ * can be called at unlinking with @type is zero.
++ */
++int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
++		   struct nameidata *nd)
++{
++	int npositive, err;
++	aufs_bindex_t bindex, btail, bdiropq;
++	unsigned char isdir;
++	struct qstr whname;
++	struct au_do_lookup_args args = {
++		.flags	= 0,
++		.type	= type,
++		.nd	= nd
++	};
++	const struct qstr *name = &dentry->d_name;
++	struct dentry *parent;
++	struct inode *inode;
++
++	err = au_test_shwh(dentry->d_sb, name);
++	if (unlikely(err))
++		goto out;
++
++	err = au_wh_name_alloc(&whname, name);
++	if (unlikely(err))
++		goto out;
++
++	inode = dentry->d_inode;
++	isdir = !!(inode && S_ISDIR(inode->i_mode));
++	if (!type)
++		au_fset_lkup(args.flags, ALLOW_NEG);
++
++	npositive = 0;
++	parent = dget_parent(dentry);
++	btail = au_dbtaildir(parent);
++	for (bindex = bstart; bindex <= btail; bindex++) {
++		struct dentry *h_parent, *h_dentry;
++		struct inode *h_inode, *h_dir;
++
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (h_dentry) {
++			if (h_dentry->d_inode)
++				npositive++;
++			if (type != S_IFDIR)
++				break;
++			continue;
++		}
++		h_parent = au_h_dptr(parent, bindex);
++		if (!h_parent)
++			continue;
++		h_dir = h_parent->d_inode;
++		if (!h_dir || !S_ISDIR(h_dir->i_mode))
++			continue;
++
++		mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
++		h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
++					&args);
++		mutex_unlock(&h_dir->i_mutex);
++		err = PTR_ERR(h_dentry);
++		if (IS_ERR(h_dentry))
++			goto out_parent;
++		au_fclr_lkup(args.flags, ALLOW_NEG);
++
++		if (au_dbwh(dentry) >= 0)
++			break;
++		if (!h_dentry)
++			continue;
++		h_inode = h_dentry->d_inode;
++		if (!h_inode)
++			continue;
++		npositive++;
++		if (!args.type)
++			args.type = h_inode->i_mode & S_IFMT;
++		if (args.type != S_IFDIR)
++			break;
++		else if (isdir) {
++			/* the type of lower may be different */
++			bdiropq = au_dbdiropq(dentry);
++			if (bdiropq >= 0 && bdiropq <= bindex)
++				break;
++		}
++	}
++
++	if (npositive) {
++		AuLabel(positive);
++		au_update_dbstart(dentry);
++	}
++	err = npositive;
++	if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
++		     && au_dbstart(dentry) < 0)) {
++		err = -EIO;
++		AuIOErr("both of real entry and whiteout found, %.*s, err %d\n",
++			AuDLNPair(dentry), err);
++	}
++
++out_parent:
++	dput(parent);
++	kfree(whname.name);
++out:
++	return err;
++}
++
++struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
++			       struct au_branch *br)
++{
++	struct dentry *dentry;
++	int wkq_err;
++
++	if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC))
++		dentry = au_lkup_one(name, parent, br, /*nd*/NULL);
++	else {
++		struct au_lkup_one_args args = {
++			.errp		= &dentry,
++			.name		= name,
++			.h_parent	= parent,
++			.br		= br,
++			.nd		= NULL
++		};
++
++		wkq_err = au_wkq_wait(au_call_lkup_one, &args);
++		if (unlikely(wkq_err))
++			dentry = ERR_PTR(wkq_err);
++	}
++
++	return dentry;
++}
++
++/*
++ * lookup @dentry on @bindex which should be negative.
++ */
++int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	int err;
++	struct dentry *parent, *h_parent, *h_dentry;
++
++	parent = dget_parent(dentry);
++	h_parent = au_h_dptr(parent, bindex);
++	h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent,
++				   au_sbr(dentry->d_sb, bindex));
++	err = PTR_ERR(h_dentry);
++	if (IS_ERR(h_dentry))
++		goto out;
++	if (unlikely(h_dentry->d_inode)) {
++		err = -EIO;
++		AuIOErr("%.*s should be negative on b%d.\n",
++			AuDLNPair(h_dentry), bindex);
++		dput(h_dentry);
++		goto out;
++	}
++
++	err = 0;
++	if (bindex < au_dbstart(dentry))
++		au_set_dbstart(dentry, bindex);
++	if (au_dbend(dentry) < bindex)
++		au_set_dbend(dentry, bindex);
++	au_set_h_dptr(dentry, bindex, h_dentry);
++
++out:
++	dput(parent);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* subset of struct inode */
++struct au_iattr {
++	unsigned long		i_ino;
++	/* unsigned int		i_nlink; */
++	uid_t			i_uid;
++	gid_t			i_gid;
++	u64			i_version;
++/*
++	loff_t			i_size;
++	blkcnt_t		i_blocks;
++*/
++	umode_t			i_mode;
++};
++
++static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
++{
++	ia->i_ino = h_inode->i_ino;
++	/* ia->i_nlink = h_inode->i_nlink; */
++	ia->i_uid = h_inode->i_uid;
++	ia->i_gid = h_inode->i_gid;
++	ia->i_version = h_inode->i_version;
++/*
++	ia->i_size = h_inode->i_size;
++	ia->i_blocks = h_inode->i_blocks;
++*/
++	ia->i_mode = (h_inode->i_mode & S_IFMT);
++}
++
++static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
++{
++	return ia->i_ino != h_inode->i_ino
++		/* || ia->i_nlink != h_inode->i_nlink */
++		|| ia->i_uid != h_inode->i_uid
++		|| ia->i_gid != h_inode->i_gid
++		|| ia->i_version != h_inode->i_version
++/*
++		|| ia->i_size != h_inode->i_size
++		|| ia->i_blocks != h_inode->i_blocks
++*/
++		|| ia->i_mode != (h_inode->i_mode & S_IFMT);
++}
++
++static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
++			      struct au_branch *br)
++{
++	int err;
++	struct au_iattr ia;
++	struct inode *h_inode;
++	struct dentry *h_d;
++	struct super_block *h_sb;
++
++	err = 0;
++	memset(&ia, -1, sizeof(ia));
++	h_sb = h_dentry->d_sb;
++	h_inode = h_dentry->d_inode;
++	if (h_inode)
++		au_iattr_save(&ia, h_inode);
++	else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
++		/* nfs d_revalidate may return 0 for negative dentry */
++		/* fuse d_revalidate always return 0 for negative dentry */
++		goto out;
++
++	/* main purpose is namei.c:cached_lookup() and d_revalidate */
++	h_d = au_lkup_one(&h_dentry->d_name, h_parent, br, /*nd*/NULL);
++	err = PTR_ERR(h_d);
++	if (IS_ERR(h_d))
++		goto out;
++
++	err = 0;
++	if (unlikely(h_d != h_dentry
++		     || h_d->d_inode != h_inode
++		     || (h_inode && au_iattr_test(&ia, h_inode))))
++		err = au_busy_or_stale();
++	dput(h_d);
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
++		struct dentry *h_parent, struct au_branch *br)
++{
++	int err;
++
++	err = 0;
++	if (udba == AuOpt_UDBA_REVAL
++	    && !au_test_fs_remote(h_dentry->d_sb)) {
++		IMustLock(h_dir);
++		err = (h_dentry->d_parent->d_inode != h_dir);
++	} else if (udba != AuOpt_UDBA_NONE)
++		err = au_h_verify_dentry(h_dentry, h_parent, br);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
++{
++	int err;
++	aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
++	struct au_hdentry tmp, *p, *q;
++	struct au_dinfo *dinfo;
++	struct super_block *sb;
++
++	DiMustWriteLock(dentry);
++
++	sb = dentry->d_sb;
++	dinfo = au_di(dentry);
++	bend = dinfo->di_bend;
++	bwh = dinfo->di_bwh;
++	bdiropq = dinfo->di_bdiropq;
++	p = dinfo->di_hdentry + dinfo->di_bstart;
++	for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
++		if (!p->hd_dentry)
++			continue;
++
++		new_bindex = au_br_index(sb, p->hd_id);
++		if (new_bindex == bindex)
++			continue;
++
++		if (dinfo->di_bwh == bindex)
++			bwh = new_bindex;
++		if (dinfo->di_bdiropq == bindex)
++			bdiropq = new_bindex;
++		if (new_bindex < 0) {
++			au_hdput(p);
++			p->hd_dentry = NULL;
++			continue;
++		}
++
++		/* swap two lower dentries, and loop again */
++		q = dinfo->di_hdentry + new_bindex;
++		tmp = *q;
++		*q = *p;
++		*p = tmp;
++		if (tmp.hd_dentry) {
++			bindex--;
++			p--;
++		}
++	}
++
++	dinfo->di_bwh = -1;
++	if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
++		dinfo->di_bwh = bwh;
++
++	dinfo->di_bdiropq = -1;
++	if (bdiropq >= 0
++	    && bdiropq <= au_sbend(sb)
++	    && au_sbr_whable(sb, bdiropq))
++		dinfo->di_bdiropq = bdiropq;
++
++	err = -EIO;
++	dinfo->di_bstart = -1;
++	dinfo->di_bend = -1;
++	bend = au_dbend(parent);
++	p = dinfo->di_hdentry;
++	for (bindex = 0; bindex <= bend; bindex++, p++)
++		if (p->hd_dentry) {
++			dinfo->di_bstart = bindex;
++			break;
++		}
++
++	if (dinfo->di_bstart >= 0) {
++		p = dinfo->di_hdentry + bend;
++		for (bindex = bend; bindex >= 0; bindex--, p--)
++			if (p->hd_dentry) {
++				dinfo->di_bend = bindex;
++				err = 0;
++				break;
++			}
++	}
++
++	return err;
++}
++
++static void au_do_hide(struct dentry *dentry)
++{
++	struct inode *inode;
++
++	inode = dentry->d_inode;
++	if (inode) {
++		if (!S_ISDIR(inode->i_mode)) {
++			if (inode->i_nlink && !d_unhashed(dentry))
++				drop_nlink(inode);
++		} else {
++			clear_nlink(inode);
++			/* stop next lookup */
++			inode->i_flags |= S_DEAD;
++		}
++		smp_mb(); /* necessary? */
++	}
++	d_drop(dentry);
++}
++
++static int au_hide_children(struct dentry *parent)
++{
++	int err, i, j, ndentry;
++	struct au_dcsub_pages dpages;
++	struct au_dpage *dpage;
++	struct dentry *dentry;
++
++	err = au_dpages_init(&dpages, GFP_NOFS);
++	if (unlikely(err))
++		goto out;
++	err = au_dcsub_pages(&dpages, parent, NULL, NULL);
++	if (unlikely(err))
++		goto out_dpages;
++
++	/* in reverse order */
++	for (i = dpages.ndpage - 1; i >= 0; i--) {
++		dpage = dpages.dpages + i;
++		ndentry = dpage->ndentry;
++		for (j = ndentry - 1; j >= 0; j--) {
++			dentry = dpage->dentries[j];
++			if (dentry != parent)
++				au_do_hide(dentry);
++		}
++	}
++
++out_dpages:
++	au_dpages_free(&dpages);
++out:
++	return err;
++}
++
++static void au_hide(struct dentry *dentry)
++{
++	int err;
++	struct inode *inode;
++
++	AuDbgDentry(dentry);
++	inode = dentry->d_inode;
++	if (inode && S_ISDIR(inode->i_mode)) {
++		/* shrink_dcache_parent(dentry); */
++		err = au_hide_children(dentry);
++		if (unlikely(err))
++			AuIOErr("%.*s, failed hiding children, ignored %d\n",
++				AuDLNPair(dentry), err);
++	}
++	au_do_hide(dentry);
++}
++
++/*
++ * By adding a dirty branch, a cached dentry may be affected in various ways.
++ *
++ * a dirty branch is added
++ * - on the top of layers
++ * - in the middle of layers
++ * - to the bottom of layers
++ *
++ * on the added branch there exists
++ * - a whiteout
++ * - a diropq
++ * - a same named entry
++ *   + exist
++ *     * negative --> positive
++ *     * positive --> positive
++ *	 - type is unchanged
++ *	 - type is changed
++ *   + doesn't exist
++ *     * negative --> negative
++ *     * positive --> negative (rejected by au_br_del() for non-dir case)
++ * - none
++ */
++static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
++			       struct au_dinfo *tmp)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	struct {
++		struct dentry *dentry;
++		struct inode *inode;
++		mode_t mode;
++	} orig_h, tmp_h;
++	struct au_hdentry *hd;
++	struct inode *inode, *h_inode;
++	struct dentry *h_dentry;
++
++	err = 0;
++	AuDebugOn(dinfo->di_bstart < 0);
++	orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry;
++	orig_h.inode = orig_h.dentry->d_inode;
++	orig_h.mode = 0;
++	if (orig_h.inode)
++		orig_h.mode = orig_h.inode->i_mode & S_IFMT;
++	memset(&tmp_h, 0, sizeof(tmp_h));
++	if (tmp->di_bstart >= 0) {
++		tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry;
++		tmp_h.inode = tmp_h.dentry->d_inode;
++		if (tmp_h.inode)
++			tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
++	}
++
++	inode = dentry->d_inode;
++	if (!orig_h.inode) {
++		AuDbg("nagative originally\n");
++		if (inode) {
++			au_hide(dentry);
++			goto out;
++		}
++		AuDebugOn(inode);
++		AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
++		AuDebugOn(dinfo->di_bdiropq != -1);
++
++		if (!tmp_h.inode) {
++			AuDbg("negative --> negative\n");
++			/* should have only one negative lower */
++			if (tmp->di_bstart >= 0
++			    && tmp->di_bstart < dinfo->di_bstart) {
++				AuDebugOn(tmp->di_bstart != tmp->di_bend);
++				AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
++				au_set_h_dptr(dentry, dinfo->di_bstart, NULL);
++				au_di_cp(dinfo, tmp);
++				hd = tmp->di_hdentry + tmp->di_bstart;
++				au_set_h_dptr(dentry, tmp->di_bstart,
++					      dget(hd->hd_dentry));
++			}
++			au_dbg_verify_dinode(dentry);
++		} else {
++			AuDbg("negative --> positive\n");
++			/*
++			 * similar to the behaviour of creating with bypassing
++			 * aufs.
++			 * unhash it in order to force an error in the
++			 * succeeding create operation.
++			 * we should not set S_DEAD here.
++			 */
++			d_drop(dentry);
++			/* au_di_swap(tmp, dinfo); */
++			au_dbg_verify_dinode(dentry);
++		}
++	} else {
++		AuDbg("positive originally\n");
++		/* inode may be NULL */
++		AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
++		if (!tmp_h.inode) {
++			AuDbg("positive --> negative\n");
++			/* or bypassing aufs */
++			au_hide(dentry);
++			if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart)
++				dinfo->di_bwh = tmp->di_bwh;
++			if (inode)
++				err = au_refresh_hinode_self(inode);
++			au_dbg_verify_dinode(dentry);
++		} else if (orig_h.mode == tmp_h.mode) {
++			AuDbg("positive --> positive, same type\n");
++			if (!S_ISDIR(orig_h.mode)
++			    && dinfo->di_bstart > tmp->di_bstart) {
++				/*
++				 * similar to the behaviour of removing and
++				 * creating.
++				 */
++				au_hide(dentry);
++				if (inode)
++					err = au_refresh_hinode_self(inode);
++				au_dbg_verify_dinode(dentry);
++			} else {
++				/* fill empty slots */
++				if (dinfo->di_bstart > tmp->di_bstart)
++					dinfo->di_bstart = tmp->di_bstart;
++				if (dinfo->di_bend < tmp->di_bend)
++					dinfo->di_bend = tmp->di_bend;
++				dinfo->di_bwh = tmp->di_bwh;
++				dinfo->di_bdiropq = tmp->di_bdiropq;
++				hd = tmp->di_hdentry;
++				bend = dinfo->di_bend;
++				for (bindex = tmp->di_bstart; bindex <= bend;
++				     bindex++) {
++					if (au_h_dptr(dentry, bindex))
++						continue;
++					h_dentry = hd[bindex].hd_dentry;
++					if (!h_dentry)
++						continue;
++					h_inode = h_dentry->d_inode;
++					AuDebugOn(!h_inode);
++					AuDebugOn(orig_h.mode
++						  != (h_inode->i_mode
++						      & S_IFMT));
++					au_set_h_dptr(dentry, bindex,
++						      dget(h_dentry));
++				}
++				err = au_refresh_hinode(inode, dentry);
++				au_dbg_verify_dinode(dentry);
++			}
++		} else {
++			AuDbg("positive --> positive, different type\n");
++			/* similar to the behaviour of removing and creating */
++			au_hide(dentry);
++			if (inode)
++				err = au_refresh_hinode_self(inode);
++			au_dbg_verify_dinode(dentry);
++		}
++	}
++
++out:
++	return err;
++}
++
++int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
++{
++	int err, ebrange;
++	unsigned int sigen;
++	struct au_dinfo *dinfo, *tmp;
++	struct super_block *sb;
++	struct inode *inode;
++
++	DiMustWriteLock(dentry);
++	AuDebugOn(IS_ROOT(dentry));
++	AuDebugOn(!parent->d_inode);
++
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	sigen = au_sigen(sb);
++	err = au_digen_test(parent, sigen);
++	if (unlikely(err))
++		goto out;
++
++	dinfo = au_di(dentry);
++	err = au_di_realloc(dinfo, au_sbend(sb) + 1);
++	if (unlikely(err))
++		goto out;
++	ebrange = au_dbrange_test(dentry);
++	if (!ebrange)
++		ebrange = au_do_refresh_hdentry(dentry, parent);
++
++	if (d_unhashed(dentry) || ebrange) {
++		AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0);
++		if (inode)
++			err = au_refresh_hinode_self(inode);
++		au_dbg_verify_dinode(dentry);
++		if (!err)
++			goto out_dgen; /* success */
++		goto out;
++	}
++
++	/* temporary dinfo */
++	AuDbgDentry(dentry);
++	err = -ENOMEM;
++	tmp = au_di_alloc(sb, AuLsc_DI_TMP);
++	if (unlikely(!tmp))
++		goto out;
++	au_di_swap(tmp, dinfo);
++	/* returns the number of positive dentries */
++	/*
++	 * if current working dir is removed, it returns an error.
++	 * but the dentry is legal.
++	 */
++	err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0, /*nd*/NULL);
++	AuDbgDentry(dentry);
++	au_di_swap(tmp, dinfo);
++	if (err == -ENOENT)
++		err = 0;
++	if (err >= 0) {
++		/* compare/refresh by dinfo */
++		AuDbgDentry(dentry);
++		err = au_refresh_by_dinfo(dentry, dinfo, tmp);
++		au_dbg_verify_dinode(dentry);
++		AuTraceErr(err);
++	}
++	au_rw_write_unlock(&tmp->di_rwsem);
++	au_di_free(tmp);
++	if (unlikely(err))
++		goto out;
++
++out_dgen:
++	au_update_digen(dentry);
++out:
++	if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
++		AuIOErr("failed refreshing %.*s, %d\n",
++			AuDLNPair(dentry), err);
++		AuDbgDentry(dentry);
++	}
++	AuTraceErr(err);
++	return err;
++}
++
++static noinline_for_stack
++int au_do_h_d_reval(struct dentry *h_dentry, struct nameidata *nd,
++		    struct dentry *dentry, aufs_bindex_t bindex)
++{
++	int err, valid;
++	int (*reval)(struct dentry *, struct nameidata *);
++
++	err = 0;
++	if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
++		goto out;
++	reval = h_dentry->d_op->d_revalidate;
++
++	AuDbg("b%d\n", bindex);
++	if (au_test_fs_null_nd(h_dentry->d_sb))
++		/* it may return tri-state */
++		valid = reval(h_dentry, NULL);
++	else {
++		struct nameidata h_nd;
++		int locked;
++		struct dentry *parent;
++
++		au_h_nd(&h_nd, nd);
++		parent = nd->path.dentry;
++		locked = (nd && nd->path.dentry != dentry);
++		if (locked)
++			di_read_lock_parent(parent, AuLock_IR);
++		BUG_ON(bindex > au_dbend(parent));
++		h_nd.path.dentry = au_h_dptr(parent, bindex);
++		BUG_ON(!h_nd.path.dentry);
++		h_nd.path.mnt = au_sbr(parent->d_sb, bindex)->br_mnt;
++		path_get(&h_nd.path);
++		valid = reval(h_dentry, &h_nd);
++		path_put(&h_nd.path);
++		if (locked)
++			di_read_unlock(parent, AuLock_IR);
++	}
++
++	if (unlikely(valid < 0))
++		err = valid;
++	else if (!valid)
++		err = -EINVAL;
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++/* todo: remove this */
++static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
++			  struct nameidata *nd, int do_udba)
++{
++	int err;
++	umode_t mode, h_mode;
++	aufs_bindex_t bindex, btail, bstart, ibs, ibe;
++	unsigned char plus, unhashed, is_root, h_plus;
++	struct inode *h_inode, *h_cached_inode;
++	struct dentry *h_dentry;
++	struct qstr *name, *h_name;
++
++	err = 0;
++	plus = 0;
++	mode = 0;
++	ibs = -1;
++	ibe = -1;
++	unhashed = !!d_unhashed(dentry);
++	is_root = !!IS_ROOT(dentry);
++	name = &dentry->d_name;
++
++	/*
++	 * Theoretically, REVAL test should be unnecessary in case of
++	 * {FS,I}NOTIFY.
++	 * But {fs,i}notify doesn't fire some necessary events,
++	 *	IN_ATTRIB for atime/nlink/pageio
++	 *	IN_DELETE for NFS dentry
++	 * Let's do REVAL test too.
++	 */
++	if (do_udba && inode) {
++		mode = (inode->i_mode & S_IFMT);
++		plus = (inode->i_nlink > 0);
++		ibs = au_ibstart(inode);
++		ibe = au_ibend(inode);
++	}
++
++	bstart = au_dbstart(dentry);
++	btail = bstart;
++	if (inode && S_ISDIR(inode->i_mode))
++		btail = au_dbtaildir(dentry);
++	for (bindex = bstart; bindex <= btail; bindex++) {
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (!h_dentry)
++			continue;
++
++		AuDbg("b%d, %.*s\n", bindex, AuDLNPair(h_dentry));
++		spin_lock(&h_dentry->d_lock);
++		h_name = &h_dentry->d_name;
++		if (unlikely(do_udba
++			     && !is_root
++			     && (unhashed != !!d_unhashed(h_dentry)
++				 || name->len != h_name->len
++				 || memcmp(name->name, h_name->name, name->len))
++			    )) {
++			AuDbg("unhash 0x%x 0x%x, %.*s %.*s\n",
++				  unhashed, d_unhashed(h_dentry),
++				  AuDLNPair(dentry), AuDLNPair(h_dentry));
++			spin_unlock(&h_dentry->d_lock);
++			goto err;
++		}
++		spin_unlock(&h_dentry->d_lock);
++
++		err = au_do_h_d_reval(h_dentry, nd, dentry, bindex);
++		if (unlikely(err))
++			/* do not goto err, to keep the errno */
++			break;
++
++		/* todo: plink too? */
++		if (!do_udba)
++			continue;
++
++		/* UDBA tests */
++		h_inode = h_dentry->d_inode;
++		if (unlikely(!!inode != !!h_inode))
++			goto err;
++
++		h_plus = plus;
++		h_mode = mode;
++		h_cached_inode = h_inode;
++		if (h_inode) {
++			h_mode = (h_inode->i_mode & S_IFMT);
++			h_plus = (h_inode->i_nlink > 0);
++		}
++		if (inode && ibs <= bindex && bindex <= ibe)
++			h_cached_inode = au_h_iptr(inode, bindex);
++
++		if (unlikely(plus != h_plus
++			     || mode != h_mode
++			     || h_cached_inode != h_inode))
++			goto err;
++		continue;
++
++	err:
++		err = -EINVAL;
++		break;
++	}
++
++	return err;
++}
++
++/* todo: consolidate with do_refresh() and au_reval_for_attr() */
++static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
++{
++	int err;
++	struct dentry *parent;
++
++	if (!au_digen_test(dentry, sigen))
++		return 0;
++
++	parent = dget_parent(dentry);
++	di_read_lock_parent(parent, AuLock_IR);
++	AuDebugOn(au_digen_test(parent, sigen));
++	au_dbg_verify_gen(parent, sigen);
++	err = au_refresh_dentry(dentry, parent);
++	di_read_unlock(parent, AuLock_IR);
++	dput(parent);
++	AuTraceErr(err);
++	return err;
++}
++
++int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
++{
++	int err;
++	struct dentry *d, *parent;
++	struct inode *inode;
++
++	if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
++		return simple_reval_dpath(dentry, sigen);
++
++	/* slow loop, keep it simple and stupid */
++	/* cf: au_cpup_dirs() */
++	err = 0;
++	parent = NULL;
++	while (au_digen_test(dentry, sigen)) {
++		d = dentry;
++		while (1) {
++			dput(parent);
++			parent = dget_parent(d);
++			if (!au_digen_test(parent, sigen))
++				break;
++			d = parent;
++		}
++
++		inode = d->d_inode;
++		if (d != dentry)
++			di_write_lock_child2(d);
++
++		/* someone might update our dentry while we were sleeping */
++		if (au_digen_test(d, sigen)) {
++			/*
++			 * todo: consolidate with simple_reval_dpath(),
++			 * do_refresh() and au_reval_for_attr().
++			 */
++			di_read_lock_parent(parent, AuLock_IR);
++			err = au_refresh_dentry(d, parent);
++			di_read_unlock(parent, AuLock_IR);
++		}
++
++		if (d != dentry)
++			di_write_unlock(d);
++		dput(parent);
++		if (unlikely(err))
++			break;
++	}
++
++	return err;
++}
++
++/*
++ * if valid returns 1, otherwise 0.
++ */
++static int aufs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
++{
++	int valid, err;
++	unsigned int sigen;
++	unsigned char do_udba;
++	struct super_block *sb;
++	struct inode *inode;
++
++	/* todo: support rcu-walk? */
++	if (nd && (nd->flags & LOOKUP_RCU))
++		return -ECHILD;
++
++	valid = 0;
++	if (unlikely(!au_di(dentry)))
++		goto out;
++
++	inode = dentry->d_inode;
++	if (inode && is_bad_inode(inode))
++		goto out;
++
++	valid = 1;
++	sb = dentry->d_sb;
++	/*
++	 * todo: very ugly
++	 * i_mutex of parent dir may be held,
++	 * but we should not return 'invalid' due to busy.
++	 */
++	err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
++	if (unlikely(err)) {
++		valid = err;
++		AuTraceErr(err);
++		goto out;
++	}
++	if (unlikely(au_dbrange_test(dentry))) {
++		err = -EINVAL;
++		AuTraceErr(err);
++		goto out_dgrade;
++	}
++
++	sigen = au_sigen(sb);
++	if (au_digen_test(dentry, sigen)) {
++		AuDebugOn(IS_ROOT(dentry));
++		err = au_reval_dpath(dentry, sigen);
++		if (unlikely(err)) {
++			AuTraceErr(err);
++			goto out_dgrade;
++		}
++	}
++	di_downgrade_lock(dentry, AuLock_IR);
++
++	err = -EINVAL;
++	if (inode && (IS_DEADDIR(inode) || !inode->i_nlink))
++		goto out_inval;
++
++	do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
++	if (do_udba && inode) {
++		aufs_bindex_t bstart = au_ibstart(inode);
++		struct inode *h_inode;
++
++		if (bstart >= 0) {
++			h_inode = au_h_iptr(inode, bstart);
++			if (h_inode && au_test_higen(inode, h_inode))
++				goto out_inval;
++		}
++	}
++
++	err = h_d_revalidate(dentry, inode, nd, do_udba);
++	if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) {
++		err = -EIO;
++		AuDbg("both of real entry and whiteout found, %.*s, err %d\n",
++		      AuDLNPair(dentry), err);
++	}
++	goto out_inval;
++
++out_dgrade:
++	di_downgrade_lock(dentry, AuLock_IR);
++out_inval:
++	aufs_read_unlock(dentry, AuLock_IR);
++	AuTraceErr(err);
++	valid = !err;
++out:
++	if (!valid) {
++		AuDbg("%.*s invalid, %d\n", AuDLNPair(dentry), valid);
++		d_drop(dentry);
++	}
++	return valid;
++}
++
++static void aufs_d_release(struct dentry *dentry)
++{
++	if (au_di(dentry)) {
++		au_di_fin(dentry);
++		au_hn_di_reinit(dentry);
++	}
++}
++
++const struct dentry_operations aufs_dop = {
++	.d_revalidate	= aufs_d_revalidate,
++	.d_release	= aufs_d_release
++};
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/dentry.h	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,237 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * lookup and dentry operations
++ */
++
++#ifndef __AUFS_DENTRY_H__
++#define __AUFS_DENTRY_H__
++
++#ifdef __KERNEL__
++
++#include <linux/dcache.h>
++#include "rwsem.h"
++
++struct au_hdentry {
++	struct dentry		*hd_dentry;
++	aufs_bindex_t		hd_id;
++};
++
++struct au_dinfo {
++	atomic_t		di_generation;
++
++	struct au_rwsem		di_rwsem;
++	aufs_bindex_t		di_bstart, di_bend, di_bwh, di_bdiropq;
++	struct au_hdentry	*di_hdentry;
++} ____cacheline_aligned_in_smp;
++
++/* ---------------------------------------------------------------------- */
++
++/* dentry.c */
++extern const struct dentry_operations aufs_dop;
++struct au_branch;
++struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
++			   struct au_branch *br, struct nameidata *nd);
++struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
++			       struct au_branch *br);
++int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
++		struct dentry *h_parent, struct au_branch *br);
++
++int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
++		   struct nameidata *nd);
++int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex);
++int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
++int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
++
++/* dinfo.c */
++void au_di_init_once(void *_di);
++struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
++void au_di_free(struct au_dinfo *dinfo);
++void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
++void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
++int au_di_init(struct dentry *dentry);
++void au_di_fin(struct dentry *dentry);
++int au_di_realloc(struct au_dinfo *dinfo, int nbr);
++
++void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
++void di_read_unlock(struct dentry *d, int flags);
++void di_downgrade_lock(struct dentry *d, int flags);
++void di_write_lock(struct dentry *d, unsigned int lsc);
++void di_write_unlock(struct dentry *d);
++void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
++void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
++void di_write_unlock2(struct dentry *d1, struct dentry *d2);
++
++struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
++struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
++aufs_bindex_t au_dbtail(struct dentry *dentry);
++aufs_bindex_t au_dbtaildir(struct dentry *dentry);
++
++void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
++		   struct dentry *h_dentry);
++int au_digen_test(struct dentry *dentry, unsigned int sigen);
++int au_dbrange_test(struct dentry *dentry);
++void au_update_digen(struct dentry *dentry);
++void au_update_dbrange(struct dentry *dentry, int do_put_zero);
++void au_update_dbstart(struct dentry *dentry);
++void au_update_dbend(struct dentry *dentry);
++int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
++
++/* ---------------------------------------------------------------------- */
++
++static inline struct au_dinfo *au_di(struct dentry *dentry)
++{
++	return dentry->d_fsdata;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* lock subclass for dinfo */
++enum {
++	AuLsc_DI_CHILD,		/* child first */
++	AuLsc_DI_CHILD2,	/* rename(2), link(2), and cpup at hnotify */
++	AuLsc_DI_CHILD3,	/* copyup dirs */
++	AuLsc_DI_PARENT,
++	AuLsc_DI_PARENT2,
++	AuLsc_DI_PARENT3,
++	AuLsc_DI_TMP		/* temp for replacing dinfo */
++};
++
++/*
++ * di_read_lock_child, di_write_lock_child,
++ * di_read_lock_child2, di_write_lock_child2,
++ * di_read_lock_child3, di_write_lock_child3,
++ * di_read_lock_parent, di_write_lock_parent,
++ * di_read_lock_parent2, di_write_lock_parent2,
++ * di_read_lock_parent3, di_write_lock_parent3,
++ */
++#define AuReadLockFunc(name, lsc) \
++static inline void di_read_lock_##name(struct dentry *d, int flags) \
++{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
++
++#define AuWriteLockFunc(name, lsc) \
++static inline void di_write_lock_##name(struct dentry *d) \
++{ di_write_lock(d, AuLsc_DI_##lsc); }
++
++#define AuRWLockFuncs(name, lsc) \
++	AuReadLockFunc(name, lsc) \
++	AuWriteLockFunc(name, lsc)
++
++AuRWLockFuncs(child, CHILD);
++AuRWLockFuncs(child2, CHILD2);
++AuRWLockFuncs(child3, CHILD3);
++AuRWLockFuncs(parent, PARENT);
++AuRWLockFuncs(parent2, PARENT2);
++AuRWLockFuncs(parent3, PARENT3);
++
++#undef AuReadLockFunc
++#undef AuWriteLockFunc
++#undef AuRWLockFuncs
++
++#define DiMustNoWaiters(d)	AuRwMustNoWaiters(&au_di(d)->di_rwsem)
++#define DiMustAnyLock(d)	AuRwMustAnyLock(&au_di(d)->di_rwsem)
++#define DiMustWriteLock(d)	AuRwMustWriteLock(&au_di(d)->di_rwsem)
++
++/* ---------------------------------------------------------------------- */
++
++/* todo: memory barrier? */
++static inline unsigned int au_digen(struct dentry *d)
++{
++	return atomic_read(&au_di(d)->di_generation);
++}
++
++static inline void au_h_dentry_init(struct au_hdentry *hdentry)
++{
++	hdentry->hd_dentry = NULL;
++}
++
++static inline void au_hdput(struct au_hdentry *hd)
++{
++	if (hd)
++		dput(hd->hd_dentry);
++}
++
++static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
++{
++	DiMustAnyLock(dentry);
++	return au_di(dentry)->di_bstart;
++}
++
++static inline aufs_bindex_t au_dbend(struct dentry *dentry)
++{
++	DiMustAnyLock(dentry);
++	return au_di(dentry)->di_bend;
++}
++
++static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
++{
++	DiMustAnyLock(dentry);
++	return au_di(dentry)->di_bwh;
++}
++
++static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
++{
++	DiMustAnyLock(dentry);
++	return au_di(dentry)->di_bdiropq;
++}
++
++/* todo: hard/soft set? */
++static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	DiMustWriteLock(dentry);
++	au_di(dentry)->di_bstart = bindex;
++}
++
++static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	DiMustWriteLock(dentry);
++	au_di(dentry)->di_bend = bindex;
++}
++
++static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	DiMustWriteLock(dentry);
++	/* dbwh can be outside of bstart - bend range */
++	au_di(dentry)->di_bwh = bindex;
++}
++
++static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	DiMustWriteLock(dentry);
++	au_di(dentry)->di_bdiropq = bindex;
++}
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_HNOTIFY
++static inline void au_digen_dec(struct dentry *d)
++{
++	atomic_dec(&au_di(d)->di_generation);
++}
++
++static inline void au_hn_di_reinit(struct dentry *dentry)
++{
++	dentry->d_fsdata = NULL;
++}
++#else
++AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
++#endif /* CONFIG_AUFS_HNOTIFY */
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_DENTRY_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/dinfo.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,543 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * dentry private data
++ */
++
++#include "aufs.h"
++
++void au_di_init_once(void *_dinfo)
++{
++	struct au_dinfo *dinfo = _dinfo;
++	static struct lock_class_key aufs_di;
++
++	au_rw_init(&dinfo->di_rwsem);
++	au_rw_class(&dinfo->di_rwsem, &aufs_di);
++}
++
++struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
++{
++	struct au_dinfo *dinfo;
++	int nbr, i;
++
++	dinfo = au_cache_alloc_dinfo();
++	if (unlikely(!dinfo))
++		goto out;
++
++	nbr = au_sbend(sb) + 1;
++	if (nbr <= 0)
++		nbr = 1;
++	dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
++	if (dinfo->di_hdentry) {
++		au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
++		dinfo->di_bstart = -1;
++		dinfo->di_bend = -1;
++		dinfo->di_bwh = -1;
++		dinfo->di_bdiropq = -1;
++		for (i = 0; i < nbr; i++)
++			dinfo->di_hdentry[i].hd_id = -1;
++		goto out;
++	}
++
++	au_cache_free_dinfo(dinfo);
++	dinfo = NULL;
++
++out:
++	return dinfo;
++}
++
++void au_di_free(struct au_dinfo *dinfo)
++{
++	struct au_hdentry *p;
++	aufs_bindex_t bend, bindex;
++
++	/* dentry may not be revalidated */
++	bindex = dinfo->di_bstart;
++	if (bindex >= 0) {
++		bend = dinfo->di_bend;
++		p = dinfo->di_hdentry + bindex;
++		while (bindex++ <= bend)
++			au_hdput(p++);
++	}
++	kfree(dinfo->di_hdentry);
++	au_cache_free_dinfo(dinfo);
++}
++
++void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
++{
++	struct au_hdentry *p;
++	aufs_bindex_t bi;
++
++	AuRwMustWriteLock(&a->di_rwsem);
++	AuRwMustWriteLock(&b->di_rwsem);
++
++#define DiSwap(v, name)				\
++	do {					\
++		v = a->di_##name;		\
++		a->di_##name = b->di_##name;	\
++		b->di_##name = v;		\
++	} while (0)
++
++	DiSwap(p, hdentry);
++	DiSwap(bi, bstart);
++	DiSwap(bi, bend);
++	DiSwap(bi, bwh);
++	DiSwap(bi, bdiropq);
++	/* smp_mb(); */
++
++#undef DiSwap
++}
++
++void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
++{
++	AuRwMustWriteLock(&dst->di_rwsem);
++	AuRwMustWriteLock(&src->di_rwsem);
++
++	dst->di_bstart = src->di_bstart;
++	dst->di_bend = src->di_bend;
++	dst->di_bwh = src->di_bwh;
++	dst->di_bdiropq = src->di_bdiropq;
++	/* smp_mb(); */
++}
++
++int au_di_init(struct dentry *dentry)
++{
++	int err;
++	struct super_block *sb;
++	struct au_dinfo *dinfo;
++
++	err = 0;
++	sb = dentry->d_sb;
++	dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
++	if (dinfo) {
++		atomic_set(&dinfo->di_generation, au_sigen(sb));
++		/* smp_mb(); */ /* atomic_set */
++		dentry->d_fsdata = dinfo;
++	} else
++		err = -ENOMEM;
++
++	return err;
++}
++
++void au_di_fin(struct dentry *dentry)
++{
++	struct au_dinfo *dinfo;
++
++	dinfo = au_di(dentry);
++	AuRwDestroy(&dinfo->di_rwsem);
++	au_di_free(dinfo);
++}
++
++int au_di_realloc(struct au_dinfo *dinfo, int nbr)
++{
++	int err, sz;
++	struct au_hdentry *hdp;
++
++	AuRwMustWriteLock(&dinfo->di_rwsem);
++
++	err = -ENOMEM;
++	sz = sizeof(*hdp) * (dinfo->di_bend + 1);
++	if (!sz)
++		sz = sizeof(*hdp);
++	hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
++	if (hdp) {
++		dinfo->di_hdentry = hdp;
++		err = 0;
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
++{
++	switch (lsc) {
++	case AuLsc_DI_CHILD:
++		ii_write_lock_child(inode);
++		break;
++	case AuLsc_DI_CHILD2:
++		ii_write_lock_child2(inode);
++		break;
++	case AuLsc_DI_CHILD3:
++		ii_write_lock_child3(inode);
++		break;
++	case AuLsc_DI_PARENT:
++		ii_write_lock_parent(inode);
++		break;
++	case AuLsc_DI_PARENT2:
++		ii_write_lock_parent2(inode);
++		break;
++	case AuLsc_DI_PARENT3:
++		ii_write_lock_parent3(inode);
++		break;
++	default:
++		BUG();
++	}
++}
++
++static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
++{
++	switch (lsc) {
++	case AuLsc_DI_CHILD:
++		ii_read_lock_child(inode);
++		break;
++	case AuLsc_DI_CHILD2:
++		ii_read_lock_child2(inode);
++		break;
++	case AuLsc_DI_CHILD3:
++		ii_read_lock_child3(inode);
++		break;
++	case AuLsc_DI_PARENT:
++		ii_read_lock_parent(inode);
++		break;
++	case AuLsc_DI_PARENT2:
++		ii_read_lock_parent2(inode);
++		break;
++	case AuLsc_DI_PARENT3:
++		ii_read_lock_parent3(inode);
++		break;
++	default:
++		BUG();
++	}
++}
++
++void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
++{
++	au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
++	if (d->d_inode) {
++		if (au_ftest_lock(flags, IW))
++			do_ii_write_lock(d->d_inode, lsc);
++		else if (au_ftest_lock(flags, IR))
++			do_ii_read_lock(d->d_inode, lsc);
++	}
++}
++
++void di_read_unlock(struct dentry *d, int flags)
++{
++	if (d->d_inode) {
++		if (au_ftest_lock(flags, IW)) {
++			au_dbg_verify_dinode(d);
++			ii_write_unlock(d->d_inode);
++		} else if (au_ftest_lock(flags, IR)) {
++			au_dbg_verify_dinode(d);
++			ii_read_unlock(d->d_inode);
++		}
++	}
++	au_rw_read_unlock(&au_di(d)->di_rwsem);
++}
++
++void di_downgrade_lock(struct dentry *d, int flags)
++{
++	if (d->d_inode && au_ftest_lock(flags, IR))
++		ii_downgrade_lock(d->d_inode);
++	au_rw_dgrade_lock(&au_di(d)->di_rwsem);
++}
++
++void di_write_lock(struct dentry *d, unsigned int lsc)
++{
++	au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
++	if (d->d_inode)
++		do_ii_write_lock(d->d_inode, lsc);
++}
++
++void di_write_unlock(struct dentry *d)
++{
++	au_dbg_verify_dinode(d);
++	if (d->d_inode)
++		ii_write_unlock(d->d_inode);
++	au_rw_write_unlock(&au_di(d)->di_rwsem);
++}
++
++void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
++{
++	AuDebugOn(d1 == d2
++		  || d1->d_inode == d2->d_inode
++		  || d1->d_sb != d2->d_sb);
++
++	if (isdir && au_test_subdir(d1, d2)) {
++		di_write_lock_child(d1);
++		di_write_lock_child2(d2);
++	} else {
++		/* there should be no races */
++		di_write_lock_child(d2);
++		di_write_lock_child2(d1);
++	}
++}
++
++void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
++{
++	AuDebugOn(d1 == d2
++		  || d1->d_inode == d2->d_inode
++		  || d1->d_sb != d2->d_sb);
++
++	if (isdir && au_test_subdir(d1, d2)) {
++		di_write_lock_parent(d1);
++		di_write_lock_parent2(d2);
++	} else {
++		/* there should be no races */
++		di_write_lock_parent(d2);
++		di_write_lock_parent2(d1);
++	}
++}
++
++void di_write_unlock2(struct dentry *d1, struct dentry *d2)
++{
++	di_write_unlock(d1);
++	if (d1->d_inode == d2->d_inode)
++		au_rw_write_unlock(&au_di(d2)->di_rwsem);
++	else
++		di_write_unlock(d2);
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	struct dentry *d;
++
++	DiMustAnyLock(dentry);
++
++	if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
++		return NULL;
++	AuDebugOn(bindex < 0);
++	d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
++	AuDebugOn(d && d->d_count <= 0);
++	return d;
++}
++
++/*
++ * extended version of au_h_dptr().
++ * returns a hashed and positive h_dentry in bindex, NULL, or error.
++ */
++struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	struct dentry *h_dentry;
++	struct inode *inode, *h_inode;
++
++	inode = dentry->d_inode;
++	AuDebugOn(!inode);
++
++	h_dentry = NULL;
++	if (au_dbstart(dentry) <= bindex
++	    && bindex <= au_dbend(dentry))
++		h_dentry = au_h_dptr(dentry, bindex);
++	if (h_dentry && !au_d_hashed_positive(h_dentry)) {
++		dget(h_dentry);
++		goto out; /* success */
++	}
++
++	AuDebugOn(bindex < au_ibstart(inode));
++	AuDebugOn(au_ibend(inode) < bindex);
++	h_inode = au_h_iptr(inode, bindex);
++	h_dentry = d_find_alias(h_inode);
++	if (h_dentry) {
++		if (!IS_ERR(h_dentry)) {
++			if (!au_d_hashed_positive(h_dentry))
++				goto out; /* success */
++			dput(h_dentry);
++		} else
++			goto out;
++	}
++
++	if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
++		h_dentry = au_plink_lkup(inode, bindex);
++		AuDebugOn(!h_dentry);
++		if (!IS_ERR(h_dentry)) {
++			if (!au_d_hashed_positive(h_dentry))
++				goto out; /* success */
++			dput(h_dentry);
++			h_dentry = NULL;
++		}
++	}
++
++out:
++	AuDbgDentry(h_dentry);
++	return h_dentry;
++}
++
++aufs_bindex_t au_dbtail(struct dentry *dentry)
++{
++	aufs_bindex_t bend, bwh;
++
++	bend = au_dbend(dentry);
++	if (0 <= bend) {
++		bwh = au_dbwh(dentry);
++		if (!bwh)
++			return bwh;
++		if (0 < bwh && bwh < bend)
++			return bwh - 1;
++	}
++	return bend;
++}
++
++aufs_bindex_t au_dbtaildir(struct dentry *dentry)
++{
++	aufs_bindex_t bend, bopq;
++
++	bend = au_dbtail(dentry);
++	if (0 <= bend) {
++		bopq = au_dbdiropq(dentry);
++		if (0 <= bopq && bopq < bend)
++			bend = bopq;
++	}
++	return bend;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
++		   struct dentry *h_dentry)
++{
++	struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
++	struct au_branch *br;
++
++	DiMustWriteLock(dentry);
++
++	au_hdput(hd);
++	hd->hd_dentry = h_dentry;
++	if (h_dentry) {
++		br = au_sbr(dentry->d_sb, bindex);
++		hd->hd_id = br->br_id;
++	}
++}
++
++int au_dbrange_test(struct dentry *dentry)
++{
++	int err;
++	aufs_bindex_t bstart, bend;
++
++	err = 0;
++	bstart = au_dbstart(dentry);
++	bend = au_dbend(dentry);
++	if (bstart >= 0)
++		AuDebugOn(bend < 0 && bstart > bend);
++	else {
++		err = -EIO;
++		AuDebugOn(bend >= 0);
++	}
++
++	return err;
++}
++
++int au_digen_test(struct dentry *dentry, unsigned int sigen)
++{
++	int err;
++
++	err = 0;
++	if (unlikely(au_digen(dentry) != sigen
++		     || au_iigen_test(dentry->d_inode, sigen)))
++		err = -EIO;
++
++	return err;
++}
++
++void au_update_digen(struct dentry *dentry)
++{
++	atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
++	/* smp_mb(); */ /* atomic_set */
++}
++
++void au_update_dbrange(struct dentry *dentry, int do_put_zero)
++{
++	struct au_dinfo *dinfo;
++	struct dentry *h_d;
++	struct au_hdentry *hdp;
++
++	DiMustWriteLock(dentry);
++
++	dinfo = au_di(dentry);
++	if (!dinfo || dinfo->di_bstart < 0)
++		return;
++
++	hdp = dinfo->di_hdentry;
++	if (do_put_zero) {
++		aufs_bindex_t bindex, bend;
++
++		bend = dinfo->di_bend;
++		for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
++			h_d = hdp[0 + bindex].hd_dentry;
++			if (h_d && !h_d->d_inode)
++				au_set_h_dptr(dentry, bindex, NULL);
++		}
++	}
++
++	dinfo->di_bstart = -1;
++	while (++dinfo->di_bstart <= dinfo->di_bend)
++		if (hdp[0 + dinfo->di_bstart].hd_dentry)
++			break;
++	if (dinfo->di_bstart > dinfo->di_bend) {
++		dinfo->di_bstart = -1;
++		dinfo->di_bend = -1;
++		return;
++	}
++
++	dinfo->di_bend++;
++	while (0 <= --dinfo->di_bend)
++		if (hdp[0 + dinfo->di_bend].hd_dentry)
++			break;
++	AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
++}
++
++void au_update_dbstart(struct dentry *dentry)
++{
++	aufs_bindex_t bindex, bend;
++	struct dentry *h_dentry;
++
++	bend = au_dbend(dentry);
++	for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (!h_dentry)
++			continue;
++		if (h_dentry->d_inode) {
++			au_set_dbstart(dentry, bindex);
++			return;
++		}
++		au_set_h_dptr(dentry, bindex, NULL);
++	}
++}
++
++void au_update_dbend(struct dentry *dentry)
++{
++	aufs_bindex_t bindex, bstart;
++	struct dentry *h_dentry;
++
++	bstart = au_dbstart(dentry);
++	for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) {
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (!h_dentry)
++			continue;
++		if (h_dentry->d_inode) {
++			au_set_dbend(dentry, bindex);
++			return;
++		}
++		au_set_h_dptr(dentry, bindex, NULL);
++	}
++}
++
++int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
++{
++	aufs_bindex_t bindex, bend;
++
++	bend = au_dbend(dentry);
++	for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
++		if (au_h_dptr(dentry, bindex) == h_dentry)
++			return bindex;
++	return -1;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/dir.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,636 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * directory operations
++ */
++
++#include <linux/fs_stack.h>
++#include "aufs.h"
++
++void au_add_nlink(struct inode *dir, struct inode *h_dir)
++{
++	unsigned int nlink;
++
++	AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
++
++	nlink = dir->i_nlink;
++	nlink += h_dir->i_nlink - 2;
++	if (h_dir->i_nlink < 2)
++		nlink += 2;
++	/* 0 can happen in revaliding */
++	vfsub_set_nlink(dir, nlink);
++}
++
++void au_sub_nlink(struct inode *dir, struct inode *h_dir)
++{
++	unsigned int nlink;
++
++	AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
++
++	nlink = dir->i_nlink;
++	nlink -= h_dir->i_nlink - 2;
++	if (h_dir->i_nlink < 2)
++		nlink -= 2;
++	/* no vfsub version. nlink == 0 means the branch-fs is broken */
++	set_nlink(dir, nlink);
++}
++
++loff_t au_dir_size(struct file *file, struct dentry *dentry)
++{
++	loff_t sz;
++	aufs_bindex_t bindex, bend;
++	struct file *h_file;
++	struct dentry *h_dentry;
++
++	sz = 0;
++	if (file) {
++		AuDebugOn(!file->f_dentry);
++		AuDebugOn(!file->f_dentry->d_inode);
++		AuDebugOn(!S_ISDIR(file->f_dentry->d_inode->i_mode));
++
++		bend = au_fbend_dir(file);
++		for (bindex = au_fbstart(file);
++		     bindex <= bend && sz < KMALLOC_MAX_SIZE;
++		     bindex++) {
++			h_file = au_hf_dir(file, bindex);
++			if (h_file
++			    && h_file->f_dentry
++			    && h_file->f_dentry->d_inode)
++				sz += i_size_read(h_file->f_dentry->d_inode);
++		}
++	} else {
++		AuDebugOn(!dentry);
++		AuDebugOn(!dentry->d_inode);
++		AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
++
++		bend = au_dbtaildir(dentry);
++		for (bindex = au_dbstart(dentry);
++		     bindex <= bend && sz < KMALLOC_MAX_SIZE;
++		     bindex++) {
++			h_dentry = au_h_dptr(dentry, bindex);
++			if (h_dentry && h_dentry->d_inode)
++				sz += i_size_read(h_dentry->d_inode);
++		}
++	}
++	if (sz < KMALLOC_MAX_SIZE)
++		sz = roundup_pow_of_two(sz);
++	if (sz > KMALLOC_MAX_SIZE)
++		sz = KMALLOC_MAX_SIZE;
++	else if (sz < NAME_MAX) {
++		BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
++		sz = AUFS_RDBLK_DEF;
++	}
++	return sz;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int reopen_dir(struct file *file)
++{
++	int err;
++	unsigned int flags;
++	aufs_bindex_t bindex, btail, bstart;
++	struct dentry *dentry, *h_dentry;
++	struct file *h_file;
++
++	/* open all lower dirs */
++	dentry = file->f_dentry;
++	bstart = au_dbstart(dentry);
++	for (bindex = au_fbstart(file); bindex < bstart; bindex++)
++		au_set_h_fptr(file, bindex, NULL);
++	au_set_fbstart(file, bstart);
++
++	btail = au_dbtaildir(dentry);
++	for (bindex = au_fbend_dir(file); btail < bindex; bindex--)
++		au_set_h_fptr(file, bindex, NULL);
++	au_set_fbend_dir(file, btail);
++
++	flags = vfsub_file_flags(file);
++	for (bindex = bstart; bindex <= btail; bindex++) {
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (!h_dentry)
++			continue;
++		h_file = au_hf_dir(file, bindex);
++		if (h_file)
++			continue;
++
++		h_file = au_h_open(dentry, bindex, flags, file);
++		err = PTR_ERR(h_file);
++		if (IS_ERR(h_file))
++			goto out; /* close all? */
++		au_set_h_fptr(file, bindex, h_file);
++	}
++	au_update_figen(file);
++	/* todo: necessary? */
++	/* file->f_ra = h_file->f_ra; */
++	err = 0;
++
++out:
++	return err;
++}
++
++static int do_open_dir(struct file *file, int flags)
++{
++	int err;
++	aufs_bindex_t bindex, btail;
++	struct dentry *dentry, *h_dentry;
++	struct file *h_file;
++
++	FiMustWriteLock(file);
++
++	dentry = file->f_dentry;
++	err = au_alive_dir(dentry);
++	if (unlikely(err))
++		goto out;
++
++	file->f_version = dentry->d_inode->i_version;
++	bindex = au_dbstart(dentry);
++	au_set_fbstart(file, bindex);
++	btail = au_dbtaildir(dentry);
++	au_set_fbend_dir(file, btail);
++	for (; !err && bindex <= btail; bindex++) {
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (!h_dentry)
++			continue;
++
++		h_file = au_h_open(dentry, bindex, flags, file);
++		if (IS_ERR(h_file)) {
++			err = PTR_ERR(h_file);
++			break;
++		}
++		au_set_h_fptr(file, bindex, h_file);
++	}
++	au_update_figen(file);
++	/* todo: necessary? */
++	/* file->f_ra = h_file->f_ra; */
++	if (!err)
++		return 0; /* success */
++
++	/* close all */
++	for (bindex = au_fbstart(file); bindex <= btail; bindex++)
++		au_set_h_fptr(file, bindex, NULL);
++	au_set_fbstart(file, -1);
++	au_set_fbend_dir(file, -1);
++
++out:
++	return err;
++}
++
++static int aufs_open_dir(struct inode *inode __maybe_unused,
++			 struct file *file)
++{
++	int err;
++	struct super_block *sb;
++	struct au_fidir *fidir;
++
++	err = -ENOMEM;
++	sb = file->f_dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	fidir = au_fidir_alloc(sb);
++	if (fidir) {
++		err = au_do_open(file, do_open_dir, fidir);
++		if (unlikely(err))
++			kfree(fidir);
++	}
++	si_read_unlock(sb);
++	return err;
++}
++
++static int aufs_release_dir(struct inode *inode __maybe_unused,
++			    struct file *file)
++{
++	struct au_vdir *vdir_cache;
++	struct au_finfo *finfo;
++	struct au_fidir *fidir;
++	aufs_bindex_t bindex, bend;
++
++	finfo = au_fi(file);
++	fidir = finfo->fi_hdir;
++	if (fidir) {
++		/* remove me from sb->s_files */
++		file_sb_list_del(file);
++
++		vdir_cache = fidir->fd_vdir_cache; /* lock-free */
++		if (vdir_cache)
++			au_vdir_free(vdir_cache);
++
++		bindex = finfo->fi_btop;
++		if (bindex >= 0) {
++			/*
++			 * calls fput() instead of filp_close(),
++			 * since no dnotify or lock for the lower file.
++			 */
++			bend = fidir->fd_bbot;
++			for (; bindex <= bend; bindex++)
++				au_set_h_fptr(file, bindex, NULL);
++		}
++		kfree(fidir);
++		finfo->fi_hdir = NULL;
++	}
++	au_finfo_fin(file);
++	return 0;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_do_flush_dir(struct file *file, fl_owner_t id)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	struct file *h_file;
++
++	err = 0;
++	bend = au_fbend_dir(file);
++	for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
++		h_file = au_hf_dir(file, bindex);
++		if (h_file)
++			err = vfsub_flush(h_file, id);
++	}
++	return err;
++}
++
++static int aufs_flush_dir(struct file *file, fl_owner_t id)
++{
++	return au_do_flush(file, id, au_do_flush_dir);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
++{
++	int err;
++	aufs_bindex_t bend, bindex;
++	struct inode *inode;
++	struct super_block *sb;
++
++	err = 0;
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	IMustLock(inode);
++	bend = au_dbend(dentry);
++	for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
++		struct path h_path;
++
++		if (au_test_ro(sb, bindex, inode))
++			continue;
++		h_path.dentry = au_h_dptr(dentry, bindex);
++		if (!h_path.dentry)
++			continue;
++
++		h_path.mnt = au_sbr_mnt(sb, bindex);
++		err = vfsub_fsync(NULL, &h_path, datasync);
++	}
++
++	return err;
++}
++
++static int au_do_fsync_dir(struct file *file, int datasync)
++{
++	int err;
++	aufs_bindex_t bend, bindex;
++	struct file *h_file;
++	struct super_block *sb;
++	struct inode *inode;
++
++	err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
++	if (unlikely(err))
++		goto out;
++
++	sb = file->f_dentry->d_sb;
++	inode = file->f_dentry->d_inode;
++	bend = au_fbend_dir(file);
++	for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
++		h_file = au_hf_dir(file, bindex);
++		if (!h_file || au_test_ro(sb, bindex, inode))
++			continue;
++
++		err = vfsub_fsync(h_file, &h_file->f_path, datasync);
++	}
++
++out:
++	return err;
++}
++
++/*
++ * @file may be NULL
++ */
++static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
++			  int datasync)
++{
++	int err;
++	struct dentry *dentry;
++	struct super_block *sb;
++	struct mutex *mtx;
++
++	err = 0;
++	dentry = file->f_dentry;
++	mtx = &dentry->d_inode->i_mutex;
++	mutex_lock(mtx);
++	sb = dentry->d_sb;
++	si_noflush_read_lock(sb);
++	if (file)
++		err = au_do_fsync_dir(file, datasync);
++	else {
++		di_write_lock_child(dentry);
++		err = au_do_fsync_dir_no_file(dentry, datasync);
++	}
++	au_cpup_attr_timesizes(dentry->d_inode);
++	di_write_unlock(dentry);
++	if (file)
++		fi_write_unlock(file);
++
++	si_read_unlock(sb);
++	mutex_unlock(mtx);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int aufs_readdir(struct file *file, void *dirent, filldir_t filldir)
++{
++	int err;
++	struct dentry *dentry;
++	struct inode *inode, *h_inode;
++	struct super_block *sb;
++
++	dentry = file->f_dentry;
++	inode = dentry->d_inode;
++	IMustLock(inode);
++
++	sb = dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
++	if (unlikely(err))
++		goto out;
++	err = au_alive_dir(dentry);
++	if (!err)
++		err = au_vdir_init(file);
++	di_downgrade_lock(dentry, AuLock_IR);
++	if (unlikely(err))
++		goto out_unlock;
++
++	h_inode = au_h_iptr(inode, au_ibstart(inode));
++	if (!au_test_nfsd()) {
++		err = au_vdir_fill_de(file, dirent, filldir);
++		fsstack_copy_attr_atime(inode, h_inode);
++	} else {
++		/*
++		 * nfsd filldir may call lookup_one_len(), vfs_getattr(),
++		 * encode_fh() and others.
++		 */
++		atomic_inc(&h_inode->i_count);
++		di_read_unlock(dentry, AuLock_IR);
++		si_read_unlock(sb);
++		err = au_vdir_fill_de(file, dirent, filldir);
++		fsstack_copy_attr_atime(inode, h_inode);
++		fi_write_unlock(file);
++		iput(h_inode);
++
++		AuTraceErr(err);
++		return err;
++	}
++
++out_unlock:
++	di_read_unlock(dentry, AuLock_IR);
++	fi_write_unlock(file);
++out:
++	si_read_unlock(sb);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++#define AuTestEmpty_WHONLY	1
++#define AuTestEmpty_CALLED	(1 << 1)
++#define AuTestEmpty_SHWH	(1 << 2)
++#define au_ftest_testempty(flags, name)	((flags) & AuTestEmpty_##name)
++#define au_fset_testempty(flags, name) \
++	do { (flags) |= AuTestEmpty_##name; } while (0)
++#define au_fclr_testempty(flags, name) \
++	do { (flags) &= ~AuTestEmpty_##name; } while (0)
++
++#ifndef CONFIG_AUFS_SHWH
++#undef AuTestEmpty_SHWH
++#define AuTestEmpty_SHWH	0
++#endif
++
++struct test_empty_arg {
++	struct au_nhash *whlist;
++	unsigned int flags;
++	int err;
++	aufs_bindex_t bindex;
++};
++
++static int test_empty_cb(void *__arg, const char *__name, int namelen,
++			 loff_t offset __maybe_unused, u64 ino,
++			 unsigned int d_type)
++{
++	struct test_empty_arg *arg = __arg;
++	char *name = (void *)__name;
++
++	arg->err = 0;
++	au_fset_testempty(arg->flags, CALLED);
++	/* smp_mb(); */
++	if (name[0] == '.'
++	    && (namelen == 1 || (name[1] == '.' && namelen == 2)))
++		goto out; /* success */
++
++	if (namelen <= AUFS_WH_PFX_LEN
++	    || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
++		if (au_ftest_testempty(arg->flags, WHONLY)
++		    && !au_nhash_test_known_wh(arg->whlist, name, namelen))
++			arg->err = -ENOTEMPTY;
++		goto out;
++	}
++
++	name += AUFS_WH_PFX_LEN;
++	namelen -= AUFS_WH_PFX_LEN;
++	if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
++		arg->err = au_nhash_append_wh
++			(arg->whlist, name, namelen, ino, d_type, arg->bindex,
++			 au_ftest_testempty(arg->flags, SHWH));
++
++out:
++	/* smp_mb(); */
++	AuTraceErr(arg->err);
++	return arg->err;
++}
++
++static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
++{
++	int err;
++	struct file *h_file;
++
++	h_file = au_h_open(dentry, arg->bindex,
++			   O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
++			   /*file*/NULL);
++	err = PTR_ERR(h_file);
++	if (IS_ERR(h_file))
++		goto out;
++
++	err = 0;
++	if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
++	    && !h_file->f_dentry->d_inode->i_nlink)
++		goto out_put;
++
++	do {
++		arg->err = 0;
++		au_fclr_testempty(arg->flags, CALLED);
++		/* smp_mb(); */
++		err = vfsub_readdir(h_file, test_empty_cb, arg);
++		if (err >= 0)
++			err = arg->err;
++	} while (!err && au_ftest_testempty(arg->flags, CALLED));
++
++out_put:
++	fput(h_file);
++	au_sbr_put(dentry->d_sb, arg->bindex);
++out:
++	return err;
++}
++
++struct do_test_empty_args {
++	int *errp;
++	struct dentry *dentry;
++	struct test_empty_arg *arg;
++};
++
++static void call_do_test_empty(void *args)
++{
++	struct do_test_empty_args *a = args;
++	*a->errp = do_test_empty(a->dentry, a->arg);
++}
++
++static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
++{
++	int err, wkq_err;
++	struct dentry *h_dentry;
++	struct inode *h_inode;
++
++	h_dentry = au_h_dptr(dentry, arg->bindex);
++	h_inode = h_dentry->d_inode;
++	/* todo: i_mode changes anytime? */
++	mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++	err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
++	mutex_unlock(&h_inode->i_mutex);
++	if (!err)
++		err = do_test_empty(dentry, arg);
++	else {
++		struct do_test_empty_args args = {
++			.errp	= &err,
++			.dentry	= dentry,
++			.arg	= arg
++		};
++		unsigned int flags = arg->flags;
++
++		wkq_err = au_wkq_wait(call_do_test_empty, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++		arg->flags = flags;
++	}
++
++	return err;
++}
++
++int au_test_empty_lower(struct dentry *dentry)
++{
++	int err;
++	unsigned int rdhash;
++	aufs_bindex_t bindex, bstart, btail;
++	struct au_nhash whlist;
++	struct test_empty_arg arg;
++
++	SiMustAnyLock(dentry->d_sb);
++
++	rdhash = au_sbi(dentry->d_sb)->si_rdhash;
++	if (!rdhash)
++		rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
++	err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
++	if (unlikely(err))
++		goto out;
++
++	arg.flags = 0;
++	arg.whlist = &whlist;
++	bstart = au_dbstart(dentry);
++	if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
++		au_fset_testempty(arg.flags, SHWH);
++	arg.bindex = bstart;
++	err = do_test_empty(dentry, &arg);
++	if (unlikely(err))
++		goto out_whlist;
++
++	au_fset_testempty(arg.flags, WHONLY);
++	btail = au_dbtaildir(dentry);
++	for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
++		struct dentry *h_dentry;
++
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (h_dentry && h_dentry->d_inode) {
++			arg.bindex = bindex;
++			err = do_test_empty(dentry, &arg);
++		}
++	}
++
++out_whlist:
++	au_nhash_wh_free(&whlist);
++out:
++	return err;
++}
++
++int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
++{
++	int err;
++	struct test_empty_arg arg;
++	aufs_bindex_t bindex, btail;
++
++	err = 0;
++	arg.whlist = whlist;
++	arg.flags = AuTestEmpty_WHONLY;
++	if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
++		au_fset_testempty(arg.flags, SHWH);
++	btail = au_dbtaildir(dentry);
++	for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
++		struct dentry *h_dentry;
++
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (h_dentry && h_dentry->d_inode) {
++			arg.bindex = bindex;
++			err = sio_test_empty(dentry, &arg);
++		}
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++const struct file_operations aufs_dir_fop = {
++	.owner		= THIS_MODULE,
++	.llseek		= default_llseek,
++	.read		= generic_read_dir,
++	.readdir	= aufs_readdir,
++	.unlocked_ioctl	= aufs_ioctl_dir,
++#ifdef CONFIG_COMPAT
++	.compat_ioctl	= aufs_compat_ioctl_dir,
++#endif
++	.open		= aufs_open_dir,
++	.release	= aufs_release_dir,
++	.flush		= aufs_flush_dir,
++	.fsync		= aufs_fsync_dir
++};
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/dir.h	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,137 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * directory operations
++ */
++
++#ifndef __AUFS_DIR_H__
++#define __AUFS_DIR_H__
++
++#ifdef __KERNEL__
++
++#include <linux/fs.h>
++
++/* ---------------------------------------------------------------------- */
++
++/* need to be faster and smaller */
++
++struct au_nhash {
++	unsigned int		nh_num;
++	struct hlist_head	*nh_head;
++};
++
++struct au_vdir_destr {
++	unsigned char	len;
++	unsigned char	name[0];
++} __packed;
++
++struct au_vdir_dehstr {
++	struct hlist_node	hash;
++	struct au_vdir_destr	*str;
++} ____cacheline_aligned_in_smp;
++
++struct au_vdir_de {
++	ino_t			de_ino;
++	unsigned char		de_type;
++	/* caution: packed */
++	struct au_vdir_destr	de_str;
++} __packed;
++
++struct au_vdir_wh {
++	struct hlist_node	wh_hash;
++#ifdef CONFIG_AUFS_SHWH
++	ino_t			wh_ino;
++	aufs_bindex_t		wh_bindex;
++	unsigned char		wh_type;
++#else
++	aufs_bindex_t		wh_bindex;
++#endif
++	/* caution: packed */
++	struct au_vdir_destr	wh_str;
++} __packed;
++
++union au_vdir_deblk_p {
++	unsigned char		*deblk;
++	struct au_vdir_de	*de;
++};
++
++struct au_vdir {
++	unsigned char	**vd_deblk;
++	unsigned long	vd_nblk;
++	struct {
++		unsigned long		ul;
++		union au_vdir_deblk_p	p;
++	} vd_last;
++
++	unsigned long	vd_version;
++	unsigned int	vd_deblk_sz;
++	unsigned long	vd_jiffy;
++} ____cacheline_aligned_in_smp;
++
++/* ---------------------------------------------------------------------- */
++
++/* dir.c */
++extern const struct file_operations aufs_dir_fop;
++void au_add_nlink(struct inode *dir, struct inode *h_dir);
++void au_sub_nlink(struct inode *dir, struct inode *h_dir);
++loff_t au_dir_size(struct file *file, struct dentry *dentry);
++int au_test_empty_lower(struct dentry *dentry);
++int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
++
++/* vdir.c */
++unsigned int au_rdhash_est(loff_t sz);
++int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
++void au_nhash_wh_free(struct au_nhash *whlist);
++int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
++			    int limit);
++int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
++int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
++		       unsigned int d_type, aufs_bindex_t bindex,
++		       unsigned char shwh);
++void au_vdir_free(struct au_vdir *vdir);
++int au_vdir_init(struct file *file);
++int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir);
++
++/* ioctl.c */
++long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
++
++#ifdef CONFIG_AUFS_RDU
++/* rdu.c */
++long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
++#ifdef CONFIG_COMPAT
++long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
++			 unsigned long arg);
++#endif
++#else
++static inline long au_rdu_ioctl(struct file *file, unsigned int cmd,
++				unsigned long arg)
++{
++	return -EINVAL;
++}
++#ifdef CONFIG_COMPAT
++static inline long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
++				       unsigned long arg)
++{
++	return -EINVAL;
++}
++#endif
++#endif
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_DIR_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/dynop.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,377 @@
++/*
++ * Copyright (C) 2010-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * dynamically customizable operations for regular files
++ */
++
++#include "aufs.h"
++
++#define DyPrSym(key)	AuDbgSym(key->dk_op.dy_hop)
++
++/*
++ * How large will these lists be?
++ * Usually just a few elements, 20-30 at most for each, I guess.
++ */
++static struct au_splhead dynop[AuDyLast];
++
++static struct au_dykey *dy_gfind_get(struct au_splhead *spl, const void *h_op)
++{
++	struct au_dykey *key, *tmp;
++	struct list_head *head;
++
++	key = NULL;
++	head = &spl->head;
++	rcu_read_lock();
++	list_for_each_entry_rcu(tmp, head, dk_list)
++		if (tmp->dk_op.dy_hop == h_op) {
++			key = tmp;
++			kref_get(&key->dk_kref);
++			break;
++		}
++	rcu_read_unlock();
++
++	return key;
++}
++
++static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
++{
++	struct au_dykey **k, *found;
++	const void *h_op = key->dk_op.dy_hop;
++	int i;
++
++	found = NULL;
++	k = br->br_dykey;
++	for (i = 0; i < AuBrDynOp; i++)
++		if (k[i]) {
++			if (k[i]->dk_op.dy_hop == h_op) {
++				found = k[i];
++				break;
++			}
++		} else
++			break;
++	if (!found) {
++		spin_lock(&br->br_dykey_lock);
++		for (; i < AuBrDynOp; i++)
++			if (k[i]) {
++				if (k[i]->dk_op.dy_hop == h_op) {
++					found = k[i];
++					break;
++				}
++			} else {
++				k[i] = key;
++				break;
++			}
++		spin_unlock(&br->br_dykey_lock);
++		BUG_ON(i == AuBrDynOp); /* expand the array */
++	}
++
++	return found;
++}
++
++/* kref_get() if @key is already added */
++static struct au_dykey *dy_gadd(struct au_splhead *spl, struct au_dykey *key)
++{
++	struct au_dykey *tmp, *found;
++	struct list_head *head;
++	const void *h_op = key->dk_op.dy_hop;
++
++	found = NULL;
++	head = &spl->head;
++	spin_lock(&spl->spin);
++	list_for_each_entry(tmp, head, dk_list)
++		if (tmp->dk_op.dy_hop == h_op) {
++			kref_get(&tmp->dk_kref);
++			found = tmp;
++			break;
++		}
++	if (!found)
++		list_add_rcu(&key->dk_list, head);
++	spin_unlock(&spl->spin);
++
++	if (!found)
++		DyPrSym(key);
++	return found;
++}
++
++static void dy_free_rcu(struct rcu_head *rcu)
++{
++	struct au_dykey *key;
++
++	key = container_of(rcu, struct au_dykey, dk_rcu);
++	DyPrSym(key);
++	kfree(key);
++}
++
++static void dy_free(struct kref *kref)
++{
++	struct au_dykey *key;
++	struct au_splhead *spl;
++
++	key = container_of(kref, struct au_dykey, dk_kref);
++	spl = dynop + key->dk_op.dy_type;
++	au_spl_del_rcu(&key->dk_list, spl);
++	call_rcu(&key->dk_rcu, dy_free_rcu);
++}
++
++void au_dy_put(struct au_dykey *key)
++{
++	kref_put(&key->dk_kref, dy_free);
++}
++
++/* ---------------------------------------------------------------------- */
++
++#define DyDbgSize(cnt, op)	AuDebugOn(cnt != sizeof(op)/sizeof(void *))
++
++#ifdef CONFIG_AUFS_DEBUG
++#define DyDbgDeclare(cnt)	unsigned int cnt = 0
++#define DyDbgInc(cnt)		do { cnt++; } while (0)
++#else
++#define DyDbgDeclare(cnt)	do {} while (0)
++#define DyDbgInc(cnt)		do {} while (0)
++#endif
++
++#define DySet(func, dst, src, h_op, h_sb) do {				\
++	DyDbgInc(cnt);							\
++	if (h_op->func) {						\
++		if (src.func)						\
++			dst.func = src.func;				\
++		else							\
++			AuDbg("%s %s\n", au_sbtype(h_sb), #func);	\
++	}								\
++} while (0)
++
++#define DySetForce(func, dst, src) do {		\
++	AuDebugOn(!src.func);			\
++	DyDbgInc(cnt);				\
++	dst.func = src.func;			\
++} while (0)
++
++#define DySetAop(func) \
++	DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
++#define DySetAopForce(func) \
++	DySetForce(func, dyaop->da_op, aufs_aop)
++
++static void dy_aop(struct au_dykey *key, const void *h_op,
++		   struct super_block *h_sb __maybe_unused)
++{
++	struct au_dyaop *dyaop = (void *)key;
++	const struct address_space_operations *h_aop = h_op;
++	DyDbgDeclare(cnt);
++
++	AuDbg("%s\n", au_sbtype(h_sb));
++
++	DySetAop(writepage);
++	DySetAopForce(readpage);	/* force */
++	DySetAop(writepages);
++	DySetAop(set_page_dirty);
++	DySetAop(readpages);
++	DySetAop(write_begin);
++	DySetAop(write_end);
++	DySetAop(bmap);
++	DySetAop(invalidatepage);
++	DySetAop(releasepage);
++	DySetAop(freepage);
++	/* these two will be changed according to an aufs mount option */
++	DySetAop(direct_IO);
++	DySetAop(get_xip_mem);
++	DySetAop(migratepage);
++	DySetAop(launder_page);
++	DySetAop(is_partially_uptodate);
++	DySetAop(error_remove_page);
++
++	DyDbgSize(cnt, *h_aop);
++	dyaop->da_get_xip_mem = h_aop->get_xip_mem;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void dy_bug(struct kref *kref)
++{
++	BUG();
++}
++
++static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
++{
++	struct au_dykey *key, *old;
++	struct au_splhead *spl;
++	struct op {
++		unsigned int sz;
++		void (*set)(struct au_dykey *key, const void *h_op,
++			    struct super_block *h_sb __maybe_unused);
++	};
++	static const struct op a[] = {
++		[AuDy_AOP] = {
++			.sz	= sizeof(struct au_dyaop),
++			.set	= dy_aop
++		}
++	};
++	const struct op *p;
++
++	spl = dynop + op->dy_type;
++	key = dy_gfind_get(spl, op->dy_hop);
++	if (key)
++		goto out_add; /* success */
++
++	p = a + op->dy_type;
++	key = kzalloc(p->sz, GFP_NOFS);
++	if (unlikely(!key)) {
++		key = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	key->dk_op.dy_hop = op->dy_hop;
++	kref_init(&key->dk_kref);
++	p->set(key, op->dy_hop, br->br_mnt->mnt_sb);
++	old = dy_gadd(spl, key);
++	if (old) {
++		kfree(key);
++		key = old;
++	}
++
++out_add:
++	old = dy_bradd(br, key);
++	if (old)
++		/* its ref-count should never be zero here */
++		kref_put(&key->dk_kref, dy_bug);
++out:
++	return key;
++}
++
++/* ---------------------------------------------------------------------- */
++/*
++ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
++ * This behaviour is neccessary to return an error from open(O_DIRECT) instead
++ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
++ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
++ * See the aufs manual in detail.
++ *
++ * To keep this behaviour, aufs has to set NULL to ->get_xip_mem too, and the
++ * performance of fadvise() and madvise() may be affected.
++ */
++static void dy_adx(struct au_dyaop *dyaop, int do_dx)
++{
++	if (!do_dx) {
++		dyaop->da_op.direct_IO = NULL;
++		dyaop->da_op.get_xip_mem = NULL;
++	} else {
++		dyaop->da_op.direct_IO = aufs_aop.direct_IO;
++		dyaop->da_op.get_xip_mem = aufs_aop.get_xip_mem;
++		if (!dyaop->da_get_xip_mem)
++			dyaop->da_op.get_xip_mem = NULL;
++	}
++}
++
++static struct au_dyaop *dy_aget(struct au_branch *br,
++				const struct address_space_operations *h_aop,
++				int do_dx)
++{
++	struct au_dyaop *dyaop;
++	struct au_dynop op;
++
++	op.dy_type = AuDy_AOP;
++	op.dy_haop = h_aop;
++	dyaop = (void *)dy_get(&op, br);
++	if (IS_ERR(dyaop))
++		goto out;
++	dy_adx(dyaop, do_dx);
++
++out:
++	return dyaop;
++}
++
++int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
++		struct inode *h_inode)
++{
++	int err, do_dx;
++	struct super_block *sb;
++	struct au_branch *br;
++	struct au_dyaop *dyaop;
++
++	AuDebugOn(!S_ISREG(h_inode->i_mode));
++	IiMustWriteLock(inode);
++
++	sb = inode->i_sb;
++	br = au_sbr(sb, bindex);
++	do_dx = !!au_opt_test(au_mntflags(sb), DIO);
++	dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
++	err = PTR_ERR(dyaop);
++	if (IS_ERR(dyaop))
++		/* unnecessary to call dy_fput() */
++		goto out;
++
++	err = 0;
++	inode->i_mapping->a_ops = &dyaop->da_op;
++
++out:
++	return err;
++}
++
++/*
++ * Is it safe to replace a_ops during the inode/file is in operation?
++ * Yes, I hope so.
++ */
++int au_dy_irefresh(struct inode *inode)
++{
++	int err;
++	aufs_bindex_t bstart;
++	struct inode *h_inode;
++
++	err = 0;
++	if (S_ISREG(inode->i_mode)) {
++		bstart = au_ibstart(inode);
++		h_inode = au_h_iptr(inode, bstart);
++		err = au_dy_iaop(inode, bstart, h_inode);
++	}
++	return err;
++}
++
++void au_dy_arefresh(int do_dx)
++{
++	struct au_splhead *spl;
++	struct list_head *head;
++	struct au_dykey *key;
++
++	spl = dynop + AuDy_AOP;
++	head = &spl->head;
++	spin_lock(&spl->spin);
++	list_for_each_entry(key, head, dk_list)
++		dy_adx((void *)key, do_dx);
++	spin_unlock(&spl->spin);
++}
++
++/* ---------------------------------------------------------------------- */
++
++void __init au_dy_init(void)
++{
++	int i;
++
++	/* make sure that 'struct au_dykey *' can be any type */
++	BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
++
++	for (i = 0; i < AuDyLast; i++)
++		au_spl_init(dynop + i);
++}
++
++void au_dy_fin(void)
++{
++	int i;
++
++	for (i = 0; i < AuDyLast; i++)
++		WARN_ON(!list_empty(&dynop[i].head));
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/dynop.h	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,76 @@
++/*
++ * Copyright (C) 2010-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * dynamically customizable operations (for regular files only)
++ */
++
++#ifndef __AUFS_DYNOP_H__
++#define __AUFS_DYNOP_H__
++
++#ifdef __KERNEL__
++
++#include "inode.h"
++
++enum {AuDy_AOP, AuDyLast};
++
++struct au_dynop {
++	int						dy_type;
++	union {
++		const void				*dy_hop;
++		const struct address_space_operations	*dy_haop;
++	};
++};
++
++struct au_dykey {
++	union {
++		struct list_head	dk_list;
++		struct rcu_head		dk_rcu;
++	};
++	struct au_dynop		dk_op;
++
++	/*
++	 * during I am in the branch local array, kref is gotten. when the
++	 * branch is removed, kref is put.
++	 */
++	struct kref		dk_kref;
++};
++
++/* stop unioning since their sizes are very different from each other */
++struct au_dyaop {
++	struct au_dykey			da_key;
++	struct address_space_operations	da_op; /* not const */
++	int (*da_get_xip_mem)(struct address_space *, pgoff_t, int,
++			      void **, unsigned long *);
++};
++
++/* ---------------------------------------------------------------------- */
++
++/* dynop.c */
++struct au_branch;
++void au_dy_put(struct au_dykey *key);
++int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
++		struct inode *h_inode);
++int au_dy_irefresh(struct inode *inode);
++void au_dy_arefresh(int do_dio);
++
++void __init au_dy_init(void);
++void au_dy_fin(void);
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_DYNOP_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/export.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,803 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * export via nfs
++ */
++
++#include <linux/exportfs.h>
++#include <linux/fs_struct.h>
++#include <linux/namei.h>
++#include <linux/nsproxy.h>
++#include <linux/random.h>
++#include <linux/writeback.h>
++#include "../fs/mount.h"
++#include "aufs.h"
++
++union conv {
++#ifdef CONFIG_AUFS_INO_T_64
++	__u32 a[2];
++#else
++	__u32 a[1];
++#endif
++	ino_t ino;
++};
++
++static ino_t decode_ino(__u32 *a)
++{
++	union conv u;
++
++	BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
++	u.a[0] = a[0];
++#ifdef CONFIG_AUFS_INO_T_64
++	u.a[1] = a[1];
++#endif
++	return u.ino;
++}
++
++static void encode_ino(__u32 *a, ino_t ino)
++{
++	union conv u;
++
++	u.ino = ino;
++	a[0] = u.a[0];
++#ifdef CONFIG_AUFS_INO_T_64
++	a[1] = u.a[1];
++#endif
++}
++
++/* NFS file handle */
++enum {
++	Fh_br_id,
++	Fh_sigen,
++#ifdef CONFIG_AUFS_INO_T_64
++	/* support 64bit inode number */
++	Fh_ino1,
++	Fh_ino2,
++	Fh_dir_ino1,
++	Fh_dir_ino2,
++#else
++	Fh_ino1,
++	Fh_dir_ino1,
++#endif
++	Fh_igen,
++	Fh_h_type,
++	Fh_tail,
++
++	Fh_ino = Fh_ino1,
++	Fh_dir_ino = Fh_dir_ino1
++};
++
++static int au_test_anon(struct dentry *dentry)
++{
++	/* note: read d_flags without d_lock */
++	return !!(dentry->d_flags & DCACHE_DISCONNECTED);
++}
++
++/* ---------------------------------------------------------------------- */
++/* inode generation external table */
++
++void au_xigen_inc(struct inode *inode)
++{
++	loff_t pos;
++	ssize_t sz;
++	__u32 igen;
++	struct super_block *sb;
++	struct au_sbinfo *sbinfo;
++
++	sb = inode->i_sb;
++	AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
++
++	sbinfo = au_sbi(sb);
++	pos = inode->i_ino;
++	pos *= sizeof(igen);
++	igen = inode->i_generation + 1;
++	sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
++			 sizeof(igen), &pos);
++	if (sz == sizeof(igen))
++		return; /* success */
++
++	if (unlikely(sz >= 0))
++		AuIOErr("xigen error (%zd)\n", sz);
++}
++
++int au_xigen_new(struct inode *inode)
++{
++	int err;
++	loff_t pos;
++	ssize_t sz;
++	struct super_block *sb;
++	struct au_sbinfo *sbinfo;
++	struct file *file;
++
++	err = 0;
++	/* todo: dirty, at mount time */
++	if (inode->i_ino == AUFS_ROOT_INO)
++		goto out;
++	sb = inode->i_sb;
++	SiMustAnyLock(sb);
++	if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
++		goto out;
++
++	err = -EFBIG;
++	pos = inode->i_ino;
++	if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
++		AuIOErr1("too large i%lld\n", pos);
++		goto out;
++	}
++	pos *= sizeof(inode->i_generation);
++
++	err = 0;
++	sbinfo = au_sbi(sb);
++	file = sbinfo->si_xigen;
++	BUG_ON(!file);
++
++	if (i_size_read(file->f_dentry->d_inode)
++	    < pos + sizeof(inode->i_generation)) {
++		inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
++		sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
++				 sizeof(inode->i_generation), &pos);
++	} else
++		sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
++				sizeof(inode->i_generation), &pos);
++	if (sz == sizeof(inode->i_generation))
++		goto out; /* success */
++
++	err = sz;
++	if (unlikely(sz >= 0)) {
++		err = -EIO;
++		AuIOErr("xigen error (%zd)\n", sz);
++	}
++
++out:
++	return err;
++}
++
++int au_xigen_set(struct super_block *sb, struct file *base)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++	struct file *file;
++
++	SiMustWriteLock(sb);
++
++	sbinfo = au_sbi(sb);
++	file = au_xino_create2(base, sbinfo->si_xigen);
++	err = PTR_ERR(file);
++	if (IS_ERR(file))
++		goto out;
++	err = 0;
++	if (sbinfo->si_xigen)
++		fput(sbinfo->si_xigen);
++	sbinfo->si_xigen = file;
++
++out:
++	return err;
++}
++
++void au_xigen_clr(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++
++	SiMustWriteLock(sb);
++
++	sbinfo = au_sbi(sb);
++	if (sbinfo->si_xigen) {
++		fput(sbinfo->si_xigen);
++		sbinfo->si_xigen = NULL;
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
++				    ino_t dir_ino)
++{
++	struct dentry *dentry, *d;
++	struct inode *inode;
++	unsigned int sigen;
++
++	dentry = NULL;
++	inode = ilookup(sb, ino);
++	if (!inode)
++		goto out;
++
++	dentry = ERR_PTR(-ESTALE);
++	sigen = au_sigen(sb);
++	if (unlikely(is_bad_inode(inode)
++		     || IS_DEADDIR(inode)
++		     || sigen != au_iigen(inode)))
++		goto out_iput;
++
++	dentry = NULL;
++	if (!dir_ino || S_ISDIR(inode->i_mode))
++		dentry = d_find_alias(inode);
++	else {
++		spin_lock(&inode->i_lock);
++		list_for_each_entry(d, &inode->i_dentry, d_alias) {
++			spin_lock(&d->d_lock);
++			if (!au_test_anon(d)
++			    && d->d_parent->d_inode->i_ino == dir_ino) {
++				dentry = dget_dlock(d);
++				spin_unlock(&d->d_lock);
++				break;
++			}
++			spin_unlock(&d->d_lock);
++		}
++		spin_unlock(&inode->i_lock);
++	}
++	if (unlikely(dentry && au_digen_test(dentry, sigen))) {
++		/* need to refresh */
++		dput(dentry);
++		dentry = NULL;
++	}
++
++out_iput:
++	iput(inode);
++out:
++	AuTraceErrPtr(dentry);
++	return dentry;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* todo: dirty? */
++/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
++
++struct au_compare_mnt_args {
++	/* input */
++	struct super_block *sb;
++
++	/* output */
++	struct vfsmount *mnt;
++};
++
++static int au_compare_mnt(struct vfsmount *mnt, void *arg)
++{
++	struct au_compare_mnt_args *a = arg;
++
++	if (mnt->mnt_sb != a->sb)
++		return 0;
++	a->mnt = mntget(mnt);
++	return 1;
++}
++
++static struct vfsmount *au_mnt_get(struct super_block *sb)
++{
++	int err;
++	struct path root;
++	struct au_compare_mnt_args args = {
++		.sb = sb
++	};
++
++	get_fs_root(current->fs, &root);
++	br_read_lock(vfsmount_lock);
++	err = iterate_mounts(au_compare_mnt, &args, root.mnt);
++	br_read_unlock(vfsmount_lock);
++	path_put(&root);
++	AuDebugOn(!err);
++	AuDebugOn(!args.mnt);
++	return args.mnt;
++}
++
++struct au_nfsd_si_lock {
++	unsigned int sigen;
++	aufs_bindex_t bindex, br_id;
++	unsigned char force_lock;
++};
++
++static int si_nfsd_read_lock(struct super_block *sb,
++			     struct au_nfsd_si_lock *nsi_lock)
++{
++	int err;
++	aufs_bindex_t bindex;
++
++	si_read_lock(sb, AuLock_FLUSH);
++
++	/* branch id may be wrapped around */
++	err = 0;
++	bindex = au_br_index(sb, nsi_lock->br_id);
++	if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
++		goto out; /* success */
++
++	err = -ESTALE;
++	bindex = -1;
++	if (!nsi_lock->force_lock)
++		si_read_unlock(sb);
++
++out:
++	nsi_lock->bindex = bindex;
++	return err;
++}
++
++struct find_name_by_ino {
++	int called, found;
++	ino_t ino;
++	char *name;
++	int namelen;
++};
++
++static int
++find_name_by_ino(void *arg, const char *name, int namelen, loff_t offset,
++		 u64 ino, unsigned int d_type)
++{
++	struct find_name_by_ino *a = arg;
++
++	a->called++;
++	if (a->ino != ino)
++		return 0;
++
++	memcpy(a->name, name, namelen);
++	a->namelen = namelen;
++	a->found = 1;
++	return 1;
++}
++
++static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
++				     struct au_nfsd_si_lock *nsi_lock)
++{
++	struct dentry *dentry, *parent;
++	struct file *file;
++	struct inode *dir;
++	struct find_name_by_ino arg;
++	int err;
++
++	parent = path->dentry;
++	if (nsi_lock)
++		si_read_unlock(parent->d_sb);
++	file = vfsub_dentry_open(path, au_dir_roflags);
++	dentry = (void *)file;
++	if (IS_ERR(file))
++		goto out;
++
++	dentry = ERR_PTR(-ENOMEM);
++	arg.name = __getname_gfp(GFP_NOFS);
++	if (unlikely(!arg.name))
++		goto out_file;
++	arg.ino = ino;
++	arg.found = 0;
++	do {
++		arg.called = 0;
++		/* smp_mb(); */
++		err = vfsub_readdir(file, find_name_by_ino, &arg);
++	} while (!err && !arg.found && arg.called);
++	dentry = ERR_PTR(err);
++	if (unlikely(err))
++		goto out_name;
++	dentry = ERR_PTR(-ENOENT);
++	if (!arg.found)
++		goto out_name;
++
++	/* do not call au_lkup_one() */
++	dir = parent->d_inode;
++	mutex_lock(&dir->i_mutex);
++	dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
++	mutex_unlock(&dir->i_mutex);
++	AuTraceErrPtr(dentry);
++	if (IS_ERR(dentry))
++		goto out_name;
++	AuDebugOn(au_test_anon(dentry));
++	if (unlikely(!dentry->d_inode)) {
++		dput(dentry);
++		dentry = ERR_PTR(-ENOENT);
++	}
++
++out_name:
++	__putname(arg.name);
++out_file:
++	fput(file);
++out:
++	if (unlikely(nsi_lock
++		     && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
++		if (!IS_ERR(dentry)) {
++			dput(dentry);
++			dentry = ERR_PTR(-ESTALE);
++		}
++	AuTraceErrPtr(dentry);
++	return dentry;
++}
++
++static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
++					ino_t dir_ino,
++					struct au_nfsd_si_lock *nsi_lock)
++{
++	struct dentry *dentry;
++	struct path path;
++
++	if (dir_ino != AUFS_ROOT_INO) {
++		path.dentry = decode_by_ino(sb, dir_ino, 0);
++		dentry = path.dentry;
++		if (!path.dentry || IS_ERR(path.dentry))
++			goto out;
++		AuDebugOn(au_test_anon(path.dentry));
++	} else
++		path.dentry = dget(sb->s_root);
++
++	path.mnt = au_mnt_get(sb);
++	dentry = au_lkup_by_ino(&path, ino, nsi_lock);
++	path_put(&path);
++
++out:
++	AuTraceErrPtr(dentry);
++	return dentry;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int h_acceptable(void *expv, struct dentry *dentry)
++{
++	return 1;
++}
++
++static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
++			   char *buf, int len, struct super_block *sb)
++{
++	char *p;
++	int n;
++	struct path path;
++
++	p = d_path(h_rootpath, buf, len);
++	if (IS_ERR(p))
++		goto out;
++	n = strlen(p);
++
++	path.mnt = h_rootpath->mnt;
++	path.dentry = h_parent;
++	p = d_path(&path, buf, len);
++	if (IS_ERR(p))
++		goto out;
++	if (n != 1)
++		p += n;
++
++	path.mnt = au_mnt_get(sb);
++	path.dentry = sb->s_root;
++	p = d_path(&path, buf, len - strlen(p));
++	mntput(path.mnt);
++	if (IS_ERR(p))
++		goto out;
++	if (n != 1)
++		p[strlen(p)] = '/';
++
++out:
++	AuTraceErrPtr(p);
++	return p;
++}
++
++static
++struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
++			      int fh_len, struct au_nfsd_si_lock *nsi_lock)
++{
++	struct dentry *dentry, *h_parent, *root;
++	struct super_block *h_sb;
++	char *pathname, *p;
++	struct vfsmount *h_mnt;
++	struct au_branch *br;
++	int err;
++	struct path path;
++
++	br = au_sbr(sb, nsi_lock->bindex);
++	h_mnt = br->br_mnt;
++	h_sb = h_mnt->mnt_sb;
++	/* todo: call lower fh_to_dentry()? fh_to_parent()? */
++	h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
++				      fh_len - Fh_tail, fh[Fh_h_type],
++				      h_acceptable, /*context*/NULL);
++	dentry = h_parent;
++	if (unlikely(!h_parent || IS_ERR(h_parent))) {
++		AuWarn1("%s decode_fh failed, %ld\n",
++			au_sbtype(h_sb), PTR_ERR(h_parent));
++		goto out;
++	}
++	dentry = NULL;
++	if (unlikely(au_test_anon(h_parent))) {
++		AuWarn1("%s decode_fh returned a disconnected dentry\n",
++			au_sbtype(h_sb));
++		goto out_h_parent;
++	}
++
++	dentry = ERR_PTR(-ENOMEM);
++	pathname = (void *)__get_free_page(GFP_NOFS);
++	if (unlikely(!pathname))
++		goto out_h_parent;
++
++	root = sb->s_root;
++	path.mnt = h_mnt;
++	di_read_lock_parent(root, !AuLock_IR);
++	path.dentry = au_h_dptr(root, nsi_lock->bindex);
++	di_read_unlock(root, !AuLock_IR);
++	p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
++	dentry = (void *)p;
++	if (IS_ERR(p))
++		goto out_pathname;
++
++	si_read_unlock(sb);
++	err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
++	dentry = ERR_PTR(err);
++	if (unlikely(err))
++		goto out_relock;
++
++	dentry = ERR_PTR(-ENOENT);
++	AuDebugOn(au_test_anon(path.dentry));
++	if (unlikely(!path.dentry->d_inode))
++		goto out_path;
++
++	if (ino != path.dentry->d_inode->i_ino)
++		dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
++	else
++		dentry = dget(path.dentry);
++
++out_path:
++	path_put(&path);
++out_relock:
++	if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
++		if (!IS_ERR(dentry)) {
++			dput(dentry);
++			dentry = ERR_PTR(-ESTALE);
++		}
++out_pathname:
++	free_page((unsigned long)pathname);
++out_h_parent:
++	dput(h_parent);
++out:
++	AuTraceErrPtr(dentry);
++	return dentry;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct dentry *
++aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
++		  int fh_type)
++{
++	struct dentry *dentry;
++	__u32 *fh = fid->raw;
++	struct au_branch *br;
++	ino_t ino, dir_ino;
++	struct au_nfsd_si_lock nsi_lock = {
++		.force_lock	= 0
++	};
++
++	dentry = ERR_PTR(-ESTALE);
++	/* it should never happen, but the file handle is unreliable */
++	if (unlikely(fh_len < Fh_tail))
++		goto out;
++	nsi_lock.sigen = fh[Fh_sigen];
++	nsi_lock.br_id = fh[Fh_br_id];
++
++	/* branch id may be wrapped around */
++	br = NULL;
++	if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
++		goto out;
++	nsi_lock.force_lock = 1;
++
++	/* is this inode still cached? */
++	ino = decode_ino(fh + Fh_ino);
++	/* it should never happen */
++	if (unlikely(ino == AUFS_ROOT_INO))
++		goto out;
++
++	dir_ino = decode_ino(fh + Fh_dir_ino);
++	dentry = decode_by_ino(sb, ino, dir_ino);
++	if (IS_ERR(dentry))
++		goto out_unlock;
++	if (dentry)
++		goto accept;
++
++	/* is the parent dir cached? */
++	br = au_sbr(sb, nsi_lock.bindex);
++	atomic_inc(&br->br_count);
++	dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
++	if (IS_ERR(dentry))
++		goto out_unlock;
++	if (dentry)
++		goto accept;
++
++	/* lookup path */
++	dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
++	if (IS_ERR(dentry))
++		goto out_unlock;
++	if (unlikely(!dentry))
++		/* todo?: make it ESTALE */
++		goto out_unlock;
++
++accept:
++	if (!au_digen_test(dentry, au_sigen(sb))
++	    && dentry->d_inode->i_generation == fh[Fh_igen])
++		goto out_unlock; /* success */
++
++	dput(dentry);
++	dentry = ERR_PTR(-ESTALE);
++out_unlock:
++	if (br)
++		atomic_dec(&br->br_count);
++	si_read_unlock(sb);
++out:
++	AuTraceErrPtr(dentry);
++	return dentry;
++}
++
++#if 0 /* reserved for future use */
++/* support subtreecheck option */
++static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
++					int fh_len, int fh_type)
++{
++	struct dentry *parent;
++	__u32 *fh = fid->raw;
++	ino_t dir_ino;
++
++	dir_ino = decode_ino(fh + Fh_dir_ino);
++	parent = decode_by_ino(sb, dir_ino, 0);
++	if (IS_ERR(parent))
++		goto out;
++	if (!parent)
++		parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
++					dir_ino, fh, fh_len);
++
++out:
++	AuTraceErrPtr(parent);
++	return parent;
++}
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++static int aufs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len,
++			  int connectable)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	struct super_block *sb, *h_sb;
++	struct inode *inode;
++	struct dentry *parent, *h_parent;
++	struct au_branch *br;
++
++	AuDebugOn(au_test_anon(dentry));
++
++	parent = NULL;
++	err = -ENOSPC;
++	if (unlikely(*max_len <= Fh_tail)) {
++		AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
++		goto out;
++	}
++
++	err = FILEID_ROOT;
++	if (IS_ROOT(dentry)) {
++		AuDebugOn(dentry->d_inode->i_ino != AUFS_ROOT_INO);
++		goto out;
++	}
++
++	h_parent = NULL;
++	err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_IR | AuLock_GEN);
++	if (unlikely(err))
++		goto out;
++
++	inode = dentry->d_inode;
++	AuDebugOn(!inode);
++	sb = dentry->d_sb;
++#ifdef CONFIG_AUFS_DEBUG
++	if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
++		AuWarn1("NFS-exporting requires xino\n");
++#endif
++	err = -EIO;
++	parent = dget_parent(dentry);
++	di_read_lock_parent(parent, !AuLock_IR);
++	bend = au_dbtaildir(parent);
++	for (bindex = au_dbstart(parent); bindex <= bend; bindex++) {
++		h_parent = au_h_dptr(parent, bindex);
++		if (h_parent) {
++			dget(h_parent);
++			break;
++		}
++	}
++	if (unlikely(!h_parent))
++		goto out_unlock;
++
++	err = -EPERM;
++	br = au_sbr(sb, bindex);
++	h_sb = br->br_mnt->mnt_sb;
++	if (unlikely(!h_sb->s_export_op)) {
++		AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
++		goto out_dput;
++	}
++
++	fh[Fh_br_id] = br->br_id;
++	fh[Fh_sigen] = au_sigen(sb);
++	encode_ino(fh + Fh_ino, inode->i_ino);
++	encode_ino(fh + Fh_dir_ino, parent->d_inode->i_ino);
++	fh[Fh_igen] = inode->i_generation;
++
++	*max_len -= Fh_tail;
++	fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
++					   max_len,
++					   /*connectable or subtreecheck*/0);
++	err = fh[Fh_h_type];
++	*max_len += Fh_tail;
++	/* todo: macros? */
++	if (err != 255)
++		err = 99;
++	else
++		AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
++
++out_dput:
++	dput(h_parent);
++out_unlock:
++	di_read_unlock(parent, !AuLock_IR);
++	dput(parent);
++	aufs_read_unlock(dentry, AuLock_IR);
++out:
++	if (unlikely(err < 0))
++		err = 255;
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int aufs_commit_metadata(struct inode *inode)
++{
++	int err;
++	aufs_bindex_t bindex;
++	struct super_block *sb;
++	struct inode *h_inode;
++	int (*f)(struct inode *inode);
++
++	sb = inode->i_sb;
++	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++	ii_write_lock_child(inode);
++	bindex = au_ibstart(inode);
++	AuDebugOn(bindex < 0);
++	h_inode = au_h_iptr(inode, bindex);
++
++	f = h_inode->i_sb->s_export_op->commit_metadata;
++	if (f)
++		err = f(h_inode);
++	else {
++		struct writeback_control wbc = {
++			.sync_mode	= WB_SYNC_ALL,
++			.nr_to_write	= 0 /* metadata only */
++		};
++
++		err = sync_inode(h_inode, &wbc);
++	}
++
++	au_cpup_attr_timesizes(inode);
++	ii_write_unlock(inode);
++	si_read_unlock(sb);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct export_operations aufs_export_op = {
++	.fh_to_dentry		= aufs_fh_to_dentry,
++	/* .fh_to_parent	= aufs_fh_to_parent, */
++	.encode_fh		= aufs_encode_fh,
++	.commit_metadata	= aufs_commit_metadata
++};
++
++void au_export_init(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++	__u32 u;
++
++	sb->s_export_op = &aufs_export_op;
++	sbinfo = au_sbi(sb);
++	sbinfo->si_xigen = NULL;
++	get_random_bytes(&u, sizeof(u));
++	BUILD_BUG_ON(sizeof(u) != sizeof(int));
++	atomic_set(&sbinfo->si_xigen_next, u);
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/file.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,676 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * handling file/dir, and address_space operation
++ */
++
++#ifdef CONFIG_AUFS_DEBUG
++#include <linux/migrate.h>
++#endif
++#include <linux/pagemap.h>
++#include "aufs.h"
++
++/* drop flags for writing */
++unsigned int au_file_roflags(unsigned int flags)
++{
++	flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
++	flags |= O_RDONLY | O_NOATIME;
++	return flags;
++}
++
++/* common functions to regular file and dir */
++struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
++		       struct file *file)
++{
++	struct file *h_file;
++	struct dentry *h_dentry;
++	struct inode *h_inode;
++	struct super_block *sb;
++	struct au_branch *br;
++	struct path h_path;
++	int err, exec_flag;
++
++	/* a race condition can happen between open and unlink/rmdir */
++	h_file = ERR_PTR(-ENOENT);
++	h_dentry = au_h_dptr(dentry, bindex);
++	if (au_test_nfsd() && !h_dentry)
++		goto out;
++	h_inode = h_dentry->d_inode;
++	if (au_test_nfsd() && !h_inode)
++		goto out;
++	spin_lock(&h_dentry->d_lock);
++	err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
++		|| !h_inode
++		/* || !dentry->d_inode->i_nlink */
++		;
++	spin_unlock(&h_dentry->d_lock);
++	if (unlikely(err))
++		goto out;
++
++	sb = dentry->d_sb;
++	br = au_sbr(sb, bindex);
++	h_file = ERR_PTR(-EACCES);
++	exec_flag = flags & __FMODE_EXEC;
++	if (exec_flag && (br->br_mnt->mnt_flags & MNT_NOEXEC))
++		goto out;
++
++	/* drop flags for writing */
++	if (au_test_ro(sb, bindex, dentry->d_inode))
++		flags = au_file_roflags(flags);
++	flags &= ~O_CREAT;
++	atomic_inc(&br->br_count);
++	h_path.dentry = h_dentry;
++	h_path.mnt = br->br_mnt;
++	if (!au_special_file(h_inode->i_mode))
++		h_file = vfsub_dentry_open(&h_path, flags);
++	else {
++		/* this block depends upon the configuration */
++		di_read_unlock(dentry, AuLock_IR);
++		fi_write_unlock(file);
++		si_read_unlock(sb);
++		h_file = vfsub_dentry_open(&h_path, flags);
++		si_noflush_read_lock(sb);
++		fi_write_lock(file);
++		di_read_lock_child(dentry, AuLock_IR);
++	}
++	if (IS_ERR(h_file))
++		goto out_br;
++
++	if (exec_flag) {
++		err = deny_write_access(h_file);
++		if (unlikely(err)) {
++			fput(h_file);
++			h_file = ERR_PTR(err);
++			goto out_br;
++		}
++	}
++	fsnotify_open(h_file);
++	goto out; /* success */
++
++out_br:
++	atomic_dec(&br->br_count);
++out:
++	return h_file;
++}
++
++int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
++	       struct au_fidir *fidir)
++{
++	int err;
++	struct dentry *dentry;
++
++	err = au_finfo_init(file, fidir);
++	if (unlikely(err))
++		goto out;
++
++	dentry = file->f_dentry;
++	di_read_lock_child(dentry, AuLock_IR);
++	err = open(file, vfsub_file_flags(file));
++	di_read_unlock(dentry, AuLock_IR);
++
++	fi_write_unlock(file);
++	if (unlikely(err)) {
++		au_fi(file)->fi_hdir = NULL;
++		au_finfo_fin(file);
++	}
++
++out:
++	return err;
++}
++
++int au_reopen_nondir(struct file *file)
++{
++	int err;
++	aufs_bindex_t bstart;
++	struct dentry *dentry;
++	struct file *h_file, *h_file_tmp;
++
++	dentry = file->f_dentry;
++	AuDebugOn(au_special_file(dentry->d_inode->i_mode));
++	bstart = au_dbstart(dentry);
++	h_file_tmp = NULL;
++	if (au_fbstart(file) == bstart) {
++		h_file = au_hf_top(file);
++		if (file->f_mode == h_file->f_mode)
++			return 0; /* success */
++		h_file_tmp = h_file;
++		get_file(h_file_tmp);
++		au_set_h_fptr(file, bstart, NULL);
++	}
++	AuDebugOn(au_fi(file)->fi_hdir);
++	AuDebugOn(au_fbstart(file) < bstart);
++
++	h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC,
++			   file);
++	err = PTR_ERR(h_file);
++	if (IS_ERR(h_file))
++		goto out; /* todo: close all? */
++
++	err = 0;
++	au_set_fbstart(file, bstart);
++	au_set_h_fptr(file, bstart, h_file);
++	au_update_figen(file);
++	/* todo: necessary? */
++	/* file->f_ra = h_file->f_ra; */
++
++out:
++	if (h_file_tmp)
++		fput(h_file_tmp);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
++			struct dentry *hi_wh)
++{
++	int err;
++	aufs_bindex_t bstart;
++	struct au_dinfo *dinfo;
++	struct dentry *h_dentry;
++	struct au_hdentry *hdp;
++
++	dinfo = au_di(file->f_dentry);
++	AuRwMustWriteLock(&dinfo->di_rwsem);
++
++	bstart = dinfo->di_bstart;
++	dinfo->di_bstart = btgt;
++	hdp = dinfo->di_hdentry;
++	h_dentry = hdp[0 + btgt].hd_dentry;
++	hdp[0 + btgt].hd_dentry = hi_wh;
++	err = au_reopen_nondir(file);
++	hdp[0 + btgt].hd_dentry = h_dentry;
++	dinfo->di_bstart = bstart;
++
++	return err;
++}
++
++static int au_ready_to_write_wh(struct file *file, loff_t len,
++				aufs_bindex_t bcpup)
++{
++	int err;
++	struct inode *inode, *h_inode;
++	struct dentry *dentry, *h_dentry, *hi_wh;
++
++	dentry = file->f_dentry;
++	au_update_dbstart(dentry);
++	inode = dentry->d_inode;
++	h_inode = NULL;
++	if (au_dbstart(dentry) <= bcpup && au_dbend(dentry) >= bcpup) {
++		h_dentry = au_h_dptr(dentry, bcpup);
++		if (h_dentry)
++			h_inode = h_dentry->d_inode;
++	}
++	hi_wh = au_hi_wh(inode, bcpup);
++	if (!hi_wh && !h_inode)
++		err = au_sio_cpup_wh(dentry, bcpup, len, file);
++	else
++		/* already copied-up after unlink */
++		err = au_reopen_wh(file, bcpup, hi_wh);
++
++	if (!err
++	    && inode->i_nlink > 1
++	    && au_opt_test(au_mntflags(dentry->d_sb), PLINK))
++		au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup));
++
++	return err;
++}
++
++/*
++ * prepare the @file for writing.
++ */
++int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
++{
++	int err;
++	aufs_bindex_t bstart, bcpup, dbstart;
++	struct dentry *dentry, *parent, *h_dentry;
++	struct inode *h_inode, *inode;
++	struct super_block *sb;
++	struct file *h_file;
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	AuDebugOn(au_special_file(inode->i_mode));
++	bstart = au_fbstart(file);
++	err = au_test_ro(sb, bstart, inode);
++	if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
++		err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0);
++		goto out;
++	}
++
++	/* need to cpup or reopen */
++	parent = dget_parent(dentry);
++	di_write_lock_parent(parent);
++	err = AuWbrCopyup(au_sbi(sb), dentry);
++	bcpup = err;
++	if (unlikely(err < 0))
++		goto out_dgrade;
++	err = 0;
++
++	if (!d_unhashed(dentry) && !au_h_dptr(parent, bcpup)) {
++		err = au_cpup_dirs(dentry, bcpup);
++		if (unlikely(err))
++			goto out_dgrade;
++	}
++
++	err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE,
++		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++	if (unlikely(err))
++		goto out_dgrade;
++
++	h_dentry = au_hf_top(file)->f_dentry;
++	h_inode = h_dentry->d_inode;
++	dbstart = au_dbstart(dentry);
++	if (dbstart <= bcpup) {
++		h_dentry = au_h_dptr(dentry, bcpup);
++		AuDebugOn(!h_dentry);
++		h_inode = h_dentry->d_inode;
++		AuDebugOn(!h_inode);
++		bstart = bcpup;
++	}
++
++	if (dbstart <= bcpup		/* just reopen */
++	    || !d_unhashed(dentry)	/* copyup and reopen */
++		) {
++		mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++		h_file = au_h_open_pre(dentry, bstart);
++		if (IS_ERR(h_file)) {
++			err = PTR_ERR(h_file);
++			h_file = NULL;
++		} else {
++			di_downgrade_lock(parent, AuLock_IR);
++			if (dbstart > bcpup)
++				err = au_sio_cpup_simple(dentry, bcpup, len,
++							 AuCpup_DTIME);
++			if (!err)
++				err = au_reopen_nondir(file);
++		}
++		mutex_unlock(&h_inode->i_mutex);
++		au_h_open_post(dentry, bstart, h_file);
++	} else {			/* copyup as wh and reopen */
++		/*
++		 * since writable hfsplus branch is not supported,
++		 * h_open_pre/post() are unnecessary.
++		 */
++		mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++		err = au_ready_to_write_wh(file, len, bcpup);
++		di_downgrade_lock(parent, AuLock_IR);
++		mutex_unlock(&h_inode->i_mutex);
++	}
++
++	if (!err) {
++		au_pin_set_parent_lflag(pin, /*lflag*/0);
++		goto out_dput; /* success */
++	}
++	au_unpin(pin);
++	goto out_unlock;
++
++out_dgrade:
++	di_downgrade_lock(parent, AuLock_IR);
++out_unlock:
++	di_read_unlock(parent, AuLock_IR);
++out_dput:
++	dput(parent);
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_do_flush(struct file *file, fl_owner_t id,
++		int (*flush)(struct file *file, fl_owner_t id))
++{
++	int err;
++	struct dentry *dentry;
++	struct super_block *sb;
++	struct inode *inode;
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	si_noflush_read_lock(sb);
++	fi_read_lock(file);
++	ii_read_lock_child(inode);
++
++	err = flush(file, id);
++	au_cpup_attr_timesizes(inode);
++
++	ii_read_unlock(inode);
++	fi_read_unlock(file);
++	si_read_unlock(sb);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
++{
++	int err;
++	aufs_bindex_t bstart;
++	struct au_pin pin;
++	struct au_finfo *finfo;
++	struct dentry *dentry, *parent, *hi_wh;
++	struct inode *inode;
++	struct super_block *sb;
++
++	FiMustWriteLock(file);
++
++	err = 0;
++	finfo = au_fi(file);
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	bstart = au_ibstart(inode);
++	if (bstart == finfo->fi_btop || IS_ROOT(dentry))
++		goto out;
++
++	parent = dget_parent(dentry);
++	if (au_test_ro(sb, bstart, inode)) {
++		di_read_lock_parent(parent, !AuLock_IR);
++		err = AuWbrCopyup(au_sbi(sb), dentry);
++		bstart = err;
++		di_read_unlock(parent, !AuLock_IR);
++		if (unlikely(err < 0))
++			goto out_parent;
++		err = 0;
++	}
++
++	di_read_lock_parent(parent, AuLock_IR);
++	hi_wh = au_hi_wh(inode, bstart);
++	if (!S_ISDIR(inode->i_mode)
++	    && au_opt_test(au_mntflags(sb), PLINK)
++	    && au_plink_test(inode)
++	    && !d_unhashed(dentry)) {
++		err = au_test_and_cpup_dirs(dentry, bstart);
++		if (unlikely(err))
++			goto out_unlock;
++
++		/* always superio. */
++		err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE,
++			     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++		if (!err)
++			err = au_sio_cpup_simple(dentry, bstart, -1,
++						 AuCpup_DTIME);
++		au_unpin(&pin);
++	} else if (hi_wh) {
++		/* already copied-up after unlink */
++		err = au_reopen_wh(file, bstart, hi_wh);
++		*need_reopen = 0;
++	}
++
++out_unlock:
++	di_read_unlock(parent, AuLock_IR);
++out_parent:
++	dput(parent);
++out:
++	return err;
++}
++
++static void au_do_refresh_dir(struct file *file)
++{
++	aufs_bindex_t bindex, bend, new_bindex, brid;
++	struct au_hfile *p, tmp, *q;
++	struct au_finfo *finfo;
++	struct super_block *sb;
++	struct au_fidir *fidir;
++
++	FiMustWriteLock(file);
++
++	sb = file->f_dentry->d_sb;
++	finfo = au_fi(file);
++	fidir = finfo->fi_hdir;
++	AuDebugOn(!fidir);
++	p = fidir->fd_hfile + finfo->fi_btop;
++	brid = p->hf_br->br_id;
++	bend = fidir->fd_bbot;
++	for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) {
++		if (!p->hf_file)
++			continue;
++
++		new_bindex = au_br_index(sb, p->hf_br->br_id);
++		if (new_bindex == bindex)
++			continue;
++		if (new_bindex < 0) {
++			au_set_h_fptr(file, bindex, NULL);
++			continue;
++		}
++
++		/* swap two lower inode, and loop again */
++		q = fidir->fd_hfile + new_bindex;
++		tmp = *q;
++		*q = *p;
++		*p = tmp;
++		if (tmp.hf_file) {
++			bindex--;
++			p--;
++		}
++	}
++
++	p = fidir->fd_hfile;
++	if (!au_test_mmapped(file) && !d_unlinked(file->f_dentry)) {
++		bend = au_sbend(sb);
++		for (finfo->fi_btop = 0; finfo->fi_btop <= bend;
++		     finfo->fi_btop++, p++)
++			if (p->hf_file) {
++				if (p->hf_file->f_dentry
++				    && p->hf_file->f_dentry->d_inode)
++					break;
++				else
++					au_hfput(p, file);
++			}
++	} else {
++		bend = au_br_index(sb, brid);
++		for (finfo->fi_btop = 0; finfo->fi_btop < bend;
++		     finfo->fi_btop++, p++)
++			if (p->hf_file)
++				au_hfput(p, file);
++		bend = au_sbend(sb);
++	}
++
++	p = fidir->fd_hfile + bend;
++	for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop;
++	     fidir->fd_bbot--, p--)
++		if (p->hf_file) {
++			if (p->hf_file->f_dentry
++			    && p->hf_file->f_dentry->d_inode)
++				break;
++			else
++				au_hfput(p, file);
++		}
++	AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
++}
++
++/*
++ * after branch manipulating, refresh the file.
++ */
++static int refresh_file(struct file *file, int (*reopen)(struct file *file))
++{
++	int err, need_reopen;
++	aufs_bindex_t bend, bindex;
++	struct dentry *dentry;
++	struct au_finfo *finfo;
++	struct au_hfile *hfile;
++
++	dentry = file->f_dentry;
++	finfo = au_fi(file);
++	if (!finfo->fi_hdir) {
++		hfile = &finfo->fi_htop;
++		AuDebugOn(!hfile->hf_file);
++		bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id);
++		AuDebugOn(bindex < 0);
++		if (bindex != finfo->fi_btop)
++			au_set_fbstart(file, bindex);
++	} else {
++		err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1);
++		if (unlikely(err))
++			goto out;
++		au_do_refresh_dir(file);
++	}
++
++	err = 0;
++	need_reopen = 1;
++	if (!au_test_mmapped(file))
++		err = au_file_refresh_by_inode(file, &need_reopen);
++	if (!err && need_reopen && !d_unlinked(dentry))
++		err = reopen(file);
++	if (!err) {
++		au_update_figen(file);
++		goto out; /* success */
++	}
++
++	/* error, close all lower files */
++	if (finfo->fi_hdir) {
++		bend = au_fbend_dir(file);
++		for (bindex = au_fbstart(file); bindex <= bend; bindex++)
++			au_set_h_fptr(file, bindex, NULL);
++	}
++
++out:
++	return err;
++}
++
++/* common function to regular file and dir */
++int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
++			  int wlock)
++{
++	int err;
++	unsigned int sigen, figen;
++	aufs_bindex_t bstart;
++	unsigned char pseudo_link;
++	struct dentry *dentry;
++	struct inode *inode;
++
++	err = 0;
++	dentry = file->f_dentry;
++	inode = dentry->d_inode;
++	AuDebugOn(au_special_file(inode->i_mode));
++	sigen = au_sigen(dentry->d_sb);
++	fi_write_lock(file);
++	figen = au_figen(file);
++	di_write_lock_child(dentry);
++	bstart = au_dbstart(dentry);
++	pseudo_link = (bstart != au_ibstart(inode));
++	if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
++		if (!wlock) {
++			di_downgrade_lock(dentry, AuLock_IR);
++			fi_downgrade_lock(file);
++		}
++		goto out; /* success */
++	}
++
++	AuDbg("sigen %d, figen %d\n", sigen, figen);
++	if (au_digen_test(dentry, sigen)) {
++		err = au_reval_dpath(dentry, sigen);
++		AuDebugOn(!err && au_digen_test(dentry, sigen));
++	}
++
++	if (!err)
++		err = refresh_file(file, reopen);
++	if (!err) {
++		if (!wlock) {
++			di_downgrade_lock(dentry, AuLock_IR);
++			fi_downgrade_lock(file);
++		}
++	} else {
++		di_write_unlock(dentry);
++		fi_write_unlock(file);
++	}
++
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* cf. aufs_nopage() */
++/* for madvise(2) */
++static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
++{
++	unlock_page(page);
++	return 0;
++}
++
++/* it will never be called, but necessary to support O_DIRECT */
++static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb,
++			      const struct iovec *iov, loff_t offset,
++			      unsigned long nr_segs)
++{ BUG(); return 0; }
++
++/*
++ * it will never be called, but madvise and fadvise behaves differently
++ * when get_xip_mem is defined
++ */
++static int aufs_get_xip_mem(struct address_space *mapping, pgoff_t pgoff,
++			    int create, void **kmem, unsigned long *pfn)
++{ BUG(); return 0; }
++
++/* they will never be called. */
++#ifdef CONFIG_AUFS_DEBUG
++static int aufs_write_begin(struct file *file, struct address_space *mapping,
++			    loff_t pos, unsigned len, unsigned flags,
++			    struct page **pagep, void **fsdata)
++{ AuUnsupport(); return 0; }
++static int aufs_write_end(struct file *file, struct address_space *mapping,
++			  loff_t pos, unsigned len, unsigned copied,
++			  struct page *page, void *fsdata)
++{ AuUnsupport(); return 0; }
++static int aufs_writepage(struct page *page, struct writeback_control *wbc)
++{ AuUnsupport(); return 0; }
++
++static int aufs_set_page_dirty(struct page *page)
++{ AuUnsupport(); return 0; }
++static void aufs_invalidatepage(struct page *page, unsigned long offset)
++{ AuUnsupport(); }
++static int aufs_releasepage(struct page *page, gfp_t gfp)
++{ AuUnsupport(); return 0; }
++static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
++			    struct page *page, enum migrate_mode mode)
++{ AuUnsupport(); return 0; }
++static int aufs_launder_page(struct page *page)
++{ AuUnsupport(); return 0; }
++static int aufs_is_partially_uptodate(struct page *page,
++				      read_descriptor_t *desc,
++				      unsigned long from)
++{ AuUnsupport(); return 0; }
++static int aufs_error_remove_page(struct address_space *mapping,
++				  struct page *page)
++{ AuUnsupport(); return 0; }
++#endif /* CONFIG_AUFS_DEBUG */
++
++const struct address_space_operations aufs_aop = {
++	.readpage		= aufs_readpage,
++	.direct_IO		= aufs_direct_IO,
++	.get_xip_mem		= aufs_get_xip_mem,
++#ifdef CONFIG_AUFS_DEBUG
++	.writepage		= aufs_writepage,
++	/* no writepages, because of writepage */
++	.set_page_dirty		= aufs_set_page_dirty,
++	/* no readpages, because of readpage */
++	.write_begin		= aufs_write_begin,
++	.write_end		= aufs_write_end,
++	/* no bmap, no block device */
++	.invalidatepage		= aufs_invalidatepage,
++	.releasepage		= aufs_releasepage,
++	.migratepage		= aufs_migratepage,
++	.launder_page		= aufs_launder_page,
++	.is_partially_uptodate	= aufs_is_partially_uptodate,
++	.error_remove_page	= aufs_error_remove_page
++#endif /* CONFIG_AUFS_DEBUG */
++};
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/file.h	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,298 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * file operations
++ */
++
++#ifndef __AUFS_FILE_H__
++#define __AUFS_FILE_H__
++
++#ifdef __KERNEL__
++
++#include <linux/file.h>
++#include <linux/fs.h>
++#include <linux/poll.h>
++#include "rwsem.h"
++
++struct au_branch;
++struct au_hfile {
++	struct file		*hf_file;
++	struct au_branch	*hf_br;
++};
++
++struct au_vdir;
++struct au_fidir {
++	aufs_bindex_t		fd_bbot;
++	aufs_bindex_t		fd_nent;
++	struct au_vdir		*fd_vdir_cache;
++	struct au_hfile		fd_hfile[];
++};
++
++static inline int au_fidir_sz(int nent)
++{
++	AuDebugOn(nent < 0);
++	return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
++}
++
++struct au_finfo {
++	atomic_t		fi_generation;
++
++	struct au_rwsem		fi_rwsem;
++	aufs_bindex_t		fi_btop;
++
++	/* do not union them */
++	struct {				/* for non-dir */
++		struct au_hfile			fi_htop;
++		atomic_t			fi_mmapped;
++	};
++	struct au_fidir		*fi_hdir;	/* for dir only */
++} ____cacheline_aligned_in_smp;
++
++/* ---------------------------------------------------------------------- */
++
++/* file.c */
++extern const struct address_space_operations aufs_aop;
++unsigned int au_file_roflags(unsigned int flags);
++struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
++		       struct file *file);
++int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
++	       struct au_fidir *fidir);
++int au_reopen_nondir(struct file *file);
++struct au_pin;
++int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
++int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
++			  int wlock);
++int au_do_flush(struct file *file, fl_owner_t id,
++		int (*flush)(struct file *file, fl_owner_t id));
++
++/* poll.c */
++#ifdef CONFIG_AUFS_POLL
++unsigned int aufs_poll(struct file *file, poll_table *wait);
++#endif
++
++#ifdef CONFIG_AUFS_BR_HFSPLUS
++/* hfsplus.c */
++struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex);
++void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
++		    struct file *h_file);
++#else
++static inline
++struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	return NULL;
++}
++
++AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
++	   struct file *h_file);
++#endif
++
++/* f_op.c */
++extern const struct file_operations aufs_file_fop;
++int au_do_open_nondir(struct file *file, int flags);
++int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
++
++#ifdef CONFIG_AUFS_SP_IATTR
++/* f_op_sp.c */
++int au_special_file(umode_t mode);
++void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev);
++#else
++AuStubInt0(au_special_file, umode_t mode)
++static inline void au_init_special_fop(struct inode *inode, umode_t mode,
++				       dev_t rdev)
++{
++	init_special_inode(inode, mode, rdev);
++}
++#endif
++
++/* finfo.c */
++void au_hfput(struct au_hfile *hf, struct file *file);
++void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
++		   struct file *h_file);
++
++void au_update_figen(struct file *file);
++struct au_fidir *au_fidir_alloc(struct super_block *sb);
++int au_fidir_realloc(struct au_finfo *finfo, int nbr);
++
++void au_fi_init_once(void *_fi);
++void au_finfo_fin(struct file *file);
++int au_finfo_init(struct file *file, struct au_fidir *fidir);
++
++/* ioctl.c */
++long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
++#ifdef CONFIG_COMPAT
++long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
++			   unsigned long arg);
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++static inline struct au_finfo *au_fi(struct file *file)
++{
++	return file->private_data;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * fi_read_lock, fi_write_lock,
++ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
++ */
++AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
++
++#define FiMustNoWaiters(f)	AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
++#define FiMustAnyLock(f)	AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
++#define FiMustWriteLock(f)	AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
++
++/* ---------------------------------------------------------------------- */
++
++/* todo: hard/soft set? */
++static inline aufs_bindex_t au_fbstart(struct file *file)
++{
++	FiMustAnyLock(file);
++	return au_fi(file)->fi_btop;
++}
++
++static inline aufs_bindex_t au_fbend_dir(struct file *file)
++{
++	FiMustAnyLock(file);
++	AuDebugOn(!au_fi(file)->fi_hdir);
++	return au_fi(file)->fi_hdir->fd_bbot;
++}
++
++static inline struct au_vdir *au_fvdir_cache(struct file *file)
++{
++	FiMustAnyLock(file);
++	AuDebugOn(!au_fi(file)->fi_hdir);
++	return au_fi(file)->fi_hdir->fd_vdir_cache;
++}
++
++static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
++{
++	FiMustWriteLock(file);
++	au_fi(file)->fi_btop = bindex;
++}
++
++static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex)
++{
++	FiMustWriteLock(file);
++	AuDebugOn(!au_fi(file)->fi_hdir);
++	au_fi(file)->fi_hdir->fd_bbot = bindex;
++}
++
++static inline void au_set_fvdir_cache(struct file *file,
++				      struct au_vdir *vdir_cache)
++{
++	FiMustWriteLock(file);
++	AuDebugOn(!au_fi(file)->fi_hdir);
++	au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
++}
++
++static inline struct file *au_hf_top(struct file *file)
++{
++	FiMustAnyLock(file);
++	AuDebugOn(au_fi(file)->fi_hdir);
++	return au_fi(file)->fi_htop.hf_file;
++}
++
++static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
++{
++	FiMustAnyLock(file);
++	AuDebugOn(!au_fi(file)->fi_hdir);
++	return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
++}
++
++/* todo: memory barrier? */
++static inline unsigned int au_figen(struct file *f)
++{
++	return atomic_read(&au_fi(f)->fi_generation);
++}
++
++static inline void au_set_mmapped(struct file *f)
++{
++	if (atomic_inc_return(&au_fi(f)->fi_mmapped))
++		return;
++	pr_warning("fi_mmapped wrapped around\n");
++	while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
++		;
++}
++
++static inline void au_unset_mmapped(struct file *f)
++{
++	atomic_dec(&au_fi(f)->fi_mmapped);
++}
++
++static inline int au_test_mmapped(struct file *f)
++{
++	return atomic_read(&au_fi(f)->fi_mmapped);
++}
++
++/* customize vma->vm_file */
++
++static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
++				       struct file *file)
++{
++	struct file *f;
++
++	f = vma->vm_file;
++	get_file(file);
++	vma->vm_file = file;
++	fput(f);
++}
++
++#ifdef CONFIG_MMU
++#define AuDbgVmRegion(file, vma) do {} while (0)
++
++static inline void au_vm_file_reset(struct vm_area_struct *vma,
++				    struct file *file)
++{
++	au_do_vm_file_reset(vma, file);
++}
++#else
++#define AuDbgVmRegion(file, vma) \
++	AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
++
++static inline void au_vm_file_reset(struct vm_area_struct *vma,
++				    struct file *file)
++{
++	struct file *f;
++
++	au_do_vm_file_reset(vma, file);
++	f = vma->vm_region->vm_file;
++	get_file(file);
++	vma->vm_region->vm_file = file;
++	fput(f);
++}
++#endif /* CONFIG_MMU */
++
++/* handle vma->vm_prfile */
++static inline void au_vm_prfile_set(struct vm_area_struct *vma,
++				    struct file *file)
++{
++#ifdef CONFIG_AUFS_PROC_MAP
++	get_file(file);
++	vma->vm_prfile = file;
++#ifndef CONFIG_MMU
++	get_file(file);
++	vma->vm_region->vm_prfile = file;
++#endif
++#endif
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_FILE_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/finfo.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,156 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * file private data
++ */
++
++#include "aufs.h"
++
++void au_hfput(struct au_hfile *hf, struct file *file)
++{
++	/* todo: direct access f_flags */
++	if (vfsub_file_flags(file) & __FMODE_EXEC)
++		allow_write_access(hf->hf_file);
++	fput(hf->hf_file);
++	hf->hf_file = NULL;
++	atomic_dec(&hf->hf_br->br_count);
++	hf->hf_br = NULL;
++}
++
++void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
++{
++	struct au_finfo *finfo = au_fi(file);
++	struct au_hfile *hf;
++	struct au_fidir *fidir;
++
++	fidir = finfo->fi_hdir;
++	if (!fidir) {
++		AuDebugOn(finfo->fi_btop != bindex);
++		hf = &finfo->fi_htop;
++	} else
++		hf = fidir->fd_hfile + bindex;
++
++	if (hf && hf->hf_file)
++		au_hfput(hf, file);
++	if (val) {
++		FiMustWriteLock(file);
++		hf->hf_file = val;
++		hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex);
++	}
++}
++
++void au_update_figen(struct file *file)
++{
++	atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry));
++	/* smp_mb(); */ /* atomic_set */
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct au_fidir *au_fidir_alloc(struct super_block *sb)
++{
++	struct au_fidir *fidir;
++	int nbr;
++
++	nbr = au_sbend(sb) + 1;
++	if (nbr < 2)
++		nbr = 2; /* initial allocate for 2 branches */
++	fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
++	if (fidir) {
++		fidir->fd_bbot = -1;
++		fidir->fd_nent = nbr;
++		fidir->fd_vdir_cache = NULL;
++	}
++
++	return fidir;
++}
++
++int au_fidir_realloc(struct au_finfo *finfo, int nbr)
++{
++	int err;
++	struct au_fidir *fidir, *p;
++
++	AuRwMustWriteLock(&finfo->fi_rwsem);
++	fidir = finfo->fi_hdir;
++	AuDebugOn(!fidir);
++
++	err = -ENOMEM;
++	p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
++			 GFP_NOFS);
++	if (p) {
++		p->fd_nent = nbr;
++		finfo->fi_hdir = p;
++		err = 0;
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_finfo_fin(struct file *file)
++{
++	struct au_finfo *finfo;
++
++	au_nfiles_dec(file->f_dentry->d_sb);
++
++	finfo = au_fi(file);
++	AuDebugOn(finfo->fi_hdir);
++	AuRwDestroy(&finfo->fi_rwsem);
++	au_cache_free_finfo(finfo);
++}
++
++void au_fi_init_once(void *_finfo)
++{
++	struct au_finfo *finfo = _finfo;
++	static struct lock_class_key aufs_fi;
++
++	au_rw_init(&finfo->fi_rwsem);
++	au_rw_class(&finfo->fi_rwsem, &aufs_fi);
++}
++
++int au_finfo_init(struct file *file, struct au_fidir *fidir)
++{
++	int err, lc_idx;
++	struct au_finfo *finfo;
++	struct dentry *dentry;
++
++	err = -ENOMEM;
++	dentry = file->f_dentry;
++	finfo = au_cache_alloc_finfo();
++	if (unlikely(!finfo))
++		goto out;
++
++	err = 0;
++	au_nfiles_inc(dentry->d_sb);
++	lc_idx = AuLcNonDir_FIINFO;
++	if (fidir)
++		lc_idx = AuLcDir_FIINFO;
++	au_rw_class(&finfo->fi_rwsem, au_lc_key + lc_idx);
++	au_rw_write_lock(&finfo->fi_rwsem);
++	finfo->fi_btop = -1;
++	finfo->fi_hdir = fidir;
++	atomic_set(&finfo->fi_generation, au_digen(dentry));
++	/* smp_mb(); */ /* atomic_set */
++
++	file->private_data = finfo;
++
++out:
++	return err;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/f_op.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,729 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * file and vm operations
++ */
++
++#include <linux/fs_stack.h>
++#include <linux/mman.h>
++#include <linux/security.h>
++#include "aufs.h"
++
++int au_do_open_nondir(struct file *file, int flags)
++{
++	int err;
++	aufs_bindex_t bindex;
++	struct file *h_file;
++	struct dentry *dentry;
++	struct au_finfo *finfo;
++
++	FiMustWriteLock(file);
++
++	dentry = file->f_dentry;
++	err = au_d_alive(dentry);
++	if (unlikely(err))
++		goto out;
++
++	finfo = au_fi(file);
++	memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
++	atomic_set(&finfo->fi_mmapped, 0);
++	bindex = au_dbstart(dentry);
++	h_file = au_h_open(dentry, bindex, flags, file);
++	if (IS_ERR(h_file))
++		err = PTR_ERR(h_file);
++	else {
++		au_set_fbstart(file, bindex);
++		au_set_h_fptr(file, bindex, h_file);
++		au_update_figen(file);
++		/* todo: necessary? */
++		/* file->f_ra = h_file->f_ra; */
++	}
++
++out:
++	return err;
++}
++
++static int aufs_open_nondir(struct inode *inode __maybe_unused,
++			    struct file *file)
++{
++	int err;
++	struct super_block *sb;
++
++	AuDbg("%.*s, f_flags 0x%x, f_mode 0x%x\n",
++	      AuDLNPair(file->f_dentry), vfsub_file_flags(file),
++	      file->f_mode);
++
++	sb = file->f_dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	err = au_do_open(file, au_do_open_nondir, /*fidir*/NULL);
++	si_read_unlock(sb);
++	return err;
++}
++
++int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
++{
++	struct au_finfo *finfo;
++	aufs_bindex_t bindex;
++
++	finfo = au_fi(file);
++	bindex = finfo->fi_btop;
++	if (bindex >= 0) {
++		/* remove me from sb->s_files */
++		file_sb_list_del(file);
++		au_set_h_fptr(file, bindex, NULL);
++	}
++
++	au_finfo_fin(file);
++	return 0;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_do_flush_nondir(struct file *file, fl_owner_t id)
++{
++	int err;
++	struct file *h_file;
++
++	err = 0;
++	h_file = au_hf_top(file);
++	if (h_file)
++		err = vfsub_flush(h_file, id);
++	return err;
++}
++
++static int aufs_flush_nondir(struct file *file, fl_owner_t id)
++{
++	return au_do_flush(file, id, au_do_flush_nondir);
++}
++
++/* ---------------------------------------------------------------------- */
++/*
++ * read and write functions acquire [fdi]_rwsem once, but release before
++ * mmap_sem. This is because to stop a race condition between mmap(2).
++ * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
++ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
++ * read functions after [fdi]_rwsem are released, but it should be harmless.
++ */
++
++static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
++			 loff_t *ppos)
++{
++	ssize_t err;
++	struct dentry *dentry;
++	struct file *h_file;
++	struct super_block *sb;
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++	if (unlikely(err))
++		goto out;
++
++	h_file = au_hf_top(file);
++	get_file(h_file);
++	di_read_unlock(dentry, AuLock_IR);
++	fi_read_unlock(file);
++
++	/* filedata may be obsoleted by concurrent copyup, but no problem */
++	err = vfsub_read_u(h_file, buf, count, ppos);
++	/* todo: necessary? */
++	/* file->f_ra = h_file->f_ra; */
++	/* update without lock, I don't think it a problem */
++	fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
++	fput(h_file);
++
++out:
++	si_read_unlock(sb);
++	return err;
++}
++
++/*
++ * todo: very ugly
++ * it locks both of i_mutex and si_rwsem for read in safe.
++ * if the plink maintenance mode continues forever (that is the problem),
++ * may loop forever.
++ */
++static void au_mtx_and_read_lock(struct inode *inode)
++{
++	int err;
++	struct super_block *sb = inode->i_sb;
++
++	while (1) {
++		mutex_lock(&inode->i_mutex);
++		err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++		if (!err)
++			break;
++		mutex_unlock(&inode->i_mutex);
++		si_read_lock(sb, AuLock_NOPLMW);
++		si_read_unlock(sb);
++	}
++}
++
++static ssize_t aufs_write(struct file *file, const char __user *ubuf,
++			  size_t count, loff_t *ppos)
++{
++	ssize_t err;
++	struct au_pin pin;
++	struct dentry *dentry;
++	struct super_block *sb;
++	struct inode *inode;
++	struct file *h_file;
++	char __user *buf = (char __user *)ubuf;
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	au_mtx_and_read_lock(inode);
++
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++	if (unlikely(err))
++		goto out;
++
++	err = au_ready_to_write(file, -1, &pin);
++	di_downgrade_lock(dentry, AuLock_IR);
++	if (unlikely(err)) {
++		di_read_unlock(dentry, AuLock_IR);
++		fi_write_unlock(file);
++		goto out;
++	}
++
++	h_file = au_hf_top(file);
++	get_file(h_file);
++	au_unpin(&pin);
++	di_read_unlock(dentry, AuLock_IR);
++	fi_write_unlock(file);
++
++	err = vfsub_write_u(h_file, buf, count, ppos);
++	ii_write_lock_child(inode);
++	au_cpup_attr_timesizes(inode);
++	inode->i_mode = h_file->f_dentry->d_inode->i_mode;
++	ii_write_unlock(inode);
++	fput(h_file);
++
++out:
++	si_read_unlock(sb);
++	mutex_unlock(&inode->i_mutex);
++	return err;
++}
++
++static ssize_t au_do_aio(struct file *h_file, int rw, struct kiocb *kio,
++			 const struct iovec *iov, unsigned long nv, loff_t pos)
++{
++	ssize_t err;
++	struct file *file;
++	ssize_t (*func)(struct kiocb *, const struct iovec *, unsigned long,
++			loff_t);
++
++	err = security_file_permission(h_file, rw);
++	if (unlikely(err))
++		goto out;
++
++	err = -ENOSYS;
++	func = NULL;
++	if (rw == MAY_READ)
++		func = h_file->f_op->aio_read;
++	else if (rw == MAY_WRITE)
++		func = h_file->f_op->aio_write;
++	if (func) {
++		file = kio->ki_filp;
++		kio->ki_filp = h_file;
++		lockdep_off();
++		err = func(kio, iov, nv, pos);
++		lockdep_on();
++		kio->ki_filp = file;
++	} else
++		/* currently there is no such fs */
++		WARN_ON_ONCE(1);
++
++out:
++	return err;
++}
++
++static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov,
++			     unsigned long nv, loff_t pos)
++{
++	ssize_t err;
++	struct file *file, *h_file;
++	struct dentry *dentry;
++	struct super_block *sb;
++
++	file = kio->ki_filp;
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++	if (unlikely(err))
++		goto out;
++
++	h_file = au_hf_top(file);
++	get_file(h_file);
++	di_read_unlock(dentry, AuLock_IR);
++	fi_read_unlock(file);
++
++	err = au_do_aio(h_file, MAY_READ, kio, iov, nv, pos);
++	/* todo: necessary? */
++	/* file->f_ra = h_file->f_ra; */
++	/* update without lock, I don't think it a problem */
++	fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
++	fput(h_file);
++
++out:
++	si_read_unlock(sb);
++	return err;
++}
++
++static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov,
++			      unsigned long nv, loff_t pos)
++{
++	ssize_t err;
++	struct au_pin pin;
++	struct dentry *dentry;
++	struct inode *inode;
++	struct file *file, *h_file;
++	struct super_block *sb;
++
++	file = kio->ki_filp;
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	au_mtx_and_read_lock(inode);
++
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++	if (unlikely(err))
++		goto out;
++
++	err = au_ready_to_write(file, -1, &pin);
++	di_downgrade_lock(dentry, AuLock_IR);
++	if (unlikely(err)) {
++		di_read_unlock(dentry, AuLock_IR);
++		fi_write_unlock(file);
++		goto out;
++	}
++
++	h_file = au_hf_top(file);
++	get_file(h_file);
++	au_unpin(&pin);
++	di_read_unlock(dentry, AuLock_IR);
++	fi_write_unlock(file);
++
++	err = au_do_aio(h_file, MAY_WRITE, kio, iov, nv, pos);
++	ii_write_lock_child(inode);
++	au_cpup_attr_timesizes(inode);
++	inode->i_mode = h_file->f_dentry->d_inode->i_mode;
++	ii_write_unlock(inode);
++	fput(h_file);
++
++out:
++	si_read_unlock(sb);
++	mutex_unlock(&inode->i_mutex);
++	return err;
++}
++
++static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
++				struct pipe_inode_info *pipe, size_t len,
++				unsigned int flags)
++{
++	ssize_t err;
++	struct file *h_file;
++	struct dentry *dentry;
++	struct super_block *sb;
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++	if (unlikely(err))
++		goto out;
++
++	err = -EINVAL;
++	h_file = au_hf_top(file);
++	get_file(h_file);
++	if (au_test_loopback_kthread()) {
++		au_warn_loopback(h_file->f_dentry->d_sb);
++		if (file->f_mapping != h_file->f_mapping) {
++			file->f_mapping = h_file->f_mapping;
++			smp_mb(); /* unnecessary? */
++		}
++	}
++	di_read_unlock(dentry, AuLock_IR);
++	fi_read_unlock(file);
++
++	err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
++	/* todo: necessasry? */
++	/* file->f_ra = h_file->f_ra; */
++	/* update without lock, I don't think it a problem */
++	fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
++	fput(h_file);
++
++out:
++	si_read_unlock(sb);
++	return err;
++}
++
++static ssize_t
++aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
++		  size_t len, unsigned int flags)
++{
++	ssize_t err;
++	struct au_pin pin;
++	struct dentry *dentry;
++	struct inode *inode;
++	struct file *h_file;
++	struct super_block *sb;
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	au_mtx_and_read_lock(inode);
++
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++	if (unlikely(err))
++		goto out;
++
++	err = au_ready_to_write(file, -1, &pin);
++	di_downgrade_lock(dentry, AuLock_IR);
++	if (unlikely(err)) {
++		di_read_unlock(dentry, AuLock_IR);
++		fi_write_unlock(file);
++		goto out;
++	}
++
++	h_file = au_hf_top(file);
++	get_file(h_file);
++	au_unpin(&pin);
++	di_read_unlock(dentry, AuLock_IR);
++	fi_write_unlock(file);
++
++	err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
++	ii_write_lock_child(inode);
++	au_cpup_attr_timesizes(inode);
++	inode->i_mode = h_file->f_dentry->d_inode->i_mode;
++	ii_write_unlock(inode);
++	fput(h_file);
++
++out:
++	si_read_unlock(sb);
++	mutex_unlock(&inode->i_mutex);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * The locking order around current->mmap_sem.
++ * - in most and regular cases
++ *   file I/O syscall -- aufs_read() or something
++ *	-- si_rwsem for read -- mmap_sem
++ *	(Note that [fdi]i_rwsem are released before mmap_sem).
++ * - in mmap case
++ *   mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
++ * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
++ * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
++ * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
++ * It means that when aufs acquires si_rwsem for write, the process should never
++ * acquire mmap_sem.
++ *
++ * Actually aufs_readdir() holds [fdi]i_rwsem before mmap_sem, but this is not a
++ * problem either since any directory is not able to be mmap-ed.
++ * The similar scenario is applied to aufs_readlink() too.
++ */
++
++/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
++#define AuConv_VM_PROT(f, b)	_calc_vm_trans(f, VM_##b, PROT_##b)
++
++static unsigned long au_arch_prot_conv(unsigned long flags)
++{
++	/* currently ppc64 only */
++#ifdef CONFIG_PPC64
++	/* cf. linux/arch/powerpc/include/asm/mman.h */
++	AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
++	return AuConv_VM_PROT(flags, SAO);
++#else
++	AuDebugOn(arch_calc_vm_prot_bits(-1));
++	return 0;
++#endif
++}
++
++static unsigned long au_prot_conv(unsigned long flags)
++{
++	return AuConv_VM_PROT(flags, READ)
++		| AuConv_VM_PROT(flags, WRITE)
++		| AuConv_VM_PROT(flags, EXEC)
++		| au_arch_prot_conv(flags);
++}
++
++/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
++#define AuConv_VM_MAP(f, b)	_calc_vm_trans(f, VM_##b, MAP_##b)
++
++static unsigned long au_flag_conv(unsigned long flags)
++{
++	return AuConv_VM_MAP(flags, GROWSDOWN)
++		| AuConv_VM_MAP(flags, DENYWRITE)
++		| AuConv_VM_MAP(flags, EXECUTABLE)
++		| AuConv_VM_MAP(flags, LOCKED);
++}
++
++static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
++{
++	int err;
++	unsigned long prot;
++	aufs_bindex_t bstart;
++	const unsigned char wlock
++		= (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
++	struct dentry *dentry;
++	struct super_block *sb;
++	struct file *h_file;
++	struct au_branch *br;
++	struct au_pin pin;
++
++	AuDbgVmRegion(file, vma);
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	lockdep_off();
++	si_read_lock(sb, AuLock_NOPLMW);
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++	if (unlikely(err))
++		goto out;
++
++	if (wlock) {
++		err = au_ready_to_write(file, -1, &pin);
++		di_write_unlock(dentry);
++		if (unlikely(err)) {
++			fi_write_unlock(file);
++			goto out;
++		}
++		au_unpin(&pin);
++	} else
++		di_write_unlock(dentry);
++
++	bstart = au_fbstart(file);
++	br = au_sbr(sb, bstart);
++	h_file = au_hf_top(file);
++	get_file(h_file);
++	au_set_mmapped(file);
++	fi_write_unlock(file);
++	lockdep_on();
++
++	au_vm_file_reset(vma, h_file);
++	prot = au_prot_conv(vma->vm_flags);
++	err = security_file_mmap(h_file, /*reqprot*/prot, prot,
++				 au_flag_conv(vma->vm_flags), vma->vm_start, 0);
++	if (!err)
++		err = h_file->f_op->mmap(h_file, vma);
++	if (unlikely(err))
++		goto out_reset;
++
++	au_vm_prfile_set(vma, file);
++	/* update without lock, I don't think it a problem */
++	fsstack_copy_attr_atime(file->f_dentry->d_inode,
++				h_file->f_dentry->d_inode);
++	goto out_fput; /* success */
++
++out_reset:
++	au_unset_mmapped(file);
++	au_vm_file_reset(vma, file);
++out_fput:
++	fput(h_file);
++	lockdep_off();
++out:
++	si_read_unlock(sb);
++	lockdep_on();
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
++			     int datasync)
++{
++	int err;
++	struct au_pin pin;
++	struct dentry *dentry;
++	struct inode *inode;
++	struct file *h_file;
++	struct super_block *sb;
++
++	dentry = file->f_dentry;
++	inode = dentry->d_inode;
++	sb = dentry->d_sb;
++	mutex_lock(&inode->i_mutex);
++	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++	if (unlikely(err))
++		goto out;
++
++	err = 0; /* -EBADF; */ /* posix? */
++	if (unlikely(!(file->f_mode & FMODE_WRITE)))
++		goto out_si;
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++	if (unlikely(err))
++		goto out_si;
++
++	err = au_ready_to_write(file, -1, &pin);
++	di_downgrade_lock(dentry, AuLock_IR);
++	if (unlikely(err))
++		goto out_unlock;
++	au_unpin(&pin);
++
++	err = -EINVAL;
++	h_file = au_hf_top(file);
++	err = vfsub_fsync(h_file, &h_file->f_path, datasync);
++	au_cpup_attr_timesizes(inode);
++
++out_unlock:
++	di_read_unlock(dentry, AuLock_IR);
++	fi_write_unlock(file);
++out_si:
++	si_read_unlock(sb);
++out:
++	mutex_unlock(&inode->i_mutex);
++	return err;
++}
++
++/* no one supports this operation, currently */
++#if 0
++static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
++{
++	int err;
++	struct au_pin pin;
++	struct dentry *dentry;
++	struct inode *inode;
++	struct file *file, *h_file;
++
++	file = kio->ki_filp;
++	dentry = file->f_dentry;
++	inode = dentry->d_inode;
++	au_mtx_and_read_lock(inode);
++
++	err = 0; /* -EBADF; */ /* posix? */
++	if (unlikely(!(file->f_mode & FMODE_WRITE)))
++		goto out;
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++	if (unlikely(err))
++		goto out;
++
++	err = au_ready_to_write(file, -1, &pin);
++	di_downgrade_lock(dentry, AuLock_IR);
++	if (unlikely(err))
++		goto out_unlock;
++	au_unpin(&pin);
++
++	err = -ENOSYS;
++	h_file = au_hf_top(file);
++	if (h_file->f_op && h_file->f_op->aio_fsync) {
++		struct dentry *h_d;
++		struct mutex *h_mtx;
++
++		h_d = h_file->f_dentry;
++		h_mtx = &h_d->d_inode->i_mutex;
++		if (!is_sync_kiocb(kio)) {
++			get_file(h_file);
++			fput(file);
++		}
++		kio->ki_filp = h_file;
++		err = h_file->f_op->aio_fsync(kio, datasync);
++		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++		if (!err)
++			vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
++		/*ignore*/
++		au_cpup_attr_timesizes(inode);
++		mutex_unlock(h_mtx);
++	}
++
++out_unlock:
++	di_read_unlock(dentry, AuLock_IR);
++	fi_write_unlock(file);
++out:
++	si_read_unlock(inode->sb);
++	mutex_unlock(&inode->i_mutex);
++	return err;
++}
++#endif
++
++static int aufs_fasync(int fd, struct file *file, int flag)
++{
++	int err;
++	struct file *h_file;
++	struct dentry *dentry;
++	struct super_block *sb;
++
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++	if (unlikely(err))
++		goto out;
++
++	h_file = au_hf_top(file);
++	if (h_file->f_op && h_file->f_op->fasync)
++		err = h_file->f_op->fasync(fd, h_file, flag);
++
++	di_read_unlock(dentry, AuLock_IR);
++	fi_read_unlock(file);
++
++out:
++	si_read_unlock(sb);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* no one supports this operation, currently */
++#if 0
++static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
++			     size_t len, loff_t *pos , int more)
++{
++}
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++const struct file_operations aufs_file_fop = {
++	.owner		= THIS_MODULE,
++
++	.llseek		= default_llseek,
++
++	.read		= aufs_read,
++	.write		= aufs_write,
++	.aio_read	= aufs_aio_read,
++	.aio_write	= aufs_aio_write,
++#ifdef CONFIG_AUFS_POLL
++	.poll		= aufs_poll,
++#endif
++	.unlocked_ioctl	= aufs_ioctl_nondir,
++#ifdef CONFIG_COMPAT
++	.compat_ioctl	= aufs_ioctl_nondir, /* same */
++#endif
++	.mmap		= aufs_mmap,
++	.open		= aufs_open_nondir,
++	.flush		= aufs_flush_nondir,
++	.release	= aufs_release_nondir,
++	.fsync		= aufs_fsync_nondir,
++	/* .aio_fsync	= aufs_aio_fsync_nondir, */
++	.fasync		= aufs_fasync,
++	/* .sendpage	= aufs_sendpage, */
++	.splice_write	= aufs_splice_write,
++	.splice_read	= aufs_splice_read,
++#if 0
++	.aio_splice_write = aufs_aio_splice_write,
++	.aio_splice_read  = aufs_aio_splice_read
++#endif
++};
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/f_op_sp.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,298 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * file operations for special files.
++ * while they exist in aufs virtually,
++ * their file I/O is handled out of aufs.
++ */
++
++#include "aufs.h"
++
++static ssize_t aufs_aio_read_sp(struct kiocb *kio, const struct iovec *iov,
++				unsigned long nv, loff_t pos)
++{
++	ssize_t err;
++	aufs_bindex_t bstart;
++	unsigned char wbr;
++	struct file *file, *h_file;
++	struct super_block *sb;
++
++	file = kio->ki_filp;
++	sb = file->f_dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	fi_read_lock(file);
++	bstart = au_fbstart(file);
++	h_file = au_hf_top(file);
++	fi_read_unlock(file);
++	wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
++	si_read_unlock(sb);
++
++	/* do not change the file in kio */
++	AuDebugOn(!h_file->f_op || !h_file->f_op->aio_read);
++	err = h_file->f_op->aio_read(kio, iov, nv, pos);
++	if (err > 0 && wbr)
++		file_accessed(h_file);
++
++	return err;
++}
++
++static ssize_t aufs_aio_write_sp(struct kiocb *kio, const struct iovec *iov,
++				 unsigned long nv, loff_t pos)
++{
++	ssize_t err;
++	aufs_bindex_t bstart;
++	unsigned char wbr;
++	struct super_block *sb;
++	struct file *file, *h_file;
++
++	file = kio->ki_filp;
++	sb = file->f_dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	fi_read_lock(file);
++	bstart = au_fbstart(file);
++	h_file = au_hf_top(file);
++	fi_read_unlock(file);
++	wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
++	si_read_unlock(sb);
++
++	/* do not change the file in kio */
++	AuDebugOn(!h_file->f_op || !h_file->f_op->aio_write);
++	err = h_file->f_op->aio_write(kio, iov, nv, pos);
++	if (err > 0 && wbr)
++		file_update_time(h_file);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int aufs_release_sp(struct inode *inode, struct file *file)
++{
++	int err;
++	struct file *h_file;
++
++	fi_read_lock(file);
++	h_file = au_hf_top(file);
++	fi_read_unlock(file);
++	/* close this fifo in aufs */
++	err = h_file->f_op->release(inode, file); /* ignore */
++	aufs_release_nondir(inode, file); /* ignore */
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* currently, support only FIFO */
++enum {
++	AuSp_FIFO, AuSp_FIFO_R, AuSp_FIFO_W, AuSp_FIFO_RW,
++	/* AuSp_SOCK, AuSp_CHR, AuSp_BLK, */
++	AuSp_Last
++};
++static int aufs_open_sp(struct inode *inode, struct file *file);
++static struct au_sp_fop {
++	int			done;
++	struct file_operations	fop;	/* not 'const' */
++	spinlock_t		spin;
++} au_sp_fop[AuSp_Last] = {
++	[AuSp_FIFO] = {
++		.fop	= {
++			.owner	= THIS_MODULE,
++			.open	= aufs_open_sp
++		}
++	}
++};
++
++static void au_init_fop_sp(struct file *file)
++{
++	struct au_sp_fop *p;
++	int i;
++	struct file *h_file;
++
++	p = au_sp_fop;
++	if (unlikely(!p->done)) {
++		/* initialize first time only */
++		static DEFINE_SPINLOCK(spin);
++
++		spin_lock(&spin);
++		if (!p->done) {
++			BUILD_BUG_ON(sizeof(au_sp_fop)/sizeof(*au_sp_fop)
++				     != AuSp_Last);
++			for (i = 0; i < AuSp_Last; i++)
++				spin_lock_init(&p[i].spin);
++			p->done = 1;
++		}
++		spin_unlock(&spin);
++	}
++
++	switch (file->f_mode & (FMODE_READ | FMODE_WRITE)) {
++	case FMODE_READ:
++		i = AuSp_FIFO_R;
++		break;
++	case FMODE_WRITE:
++		i = AuSp_FIFO_W;
++		break;
++	case FMODE_READ | FMODE_WRITE:
++		i = AuSp_FIFO_RW;
++		break;
++	default:
++		BUG();
++	}
++
++	p += i;
++	if (unlikely(!p->done)) {
++		/* initialize first time only */
++		h_file = au_hf_top(file);
++		spin_lock(&p->spin);
++		if (!p->done) {
++			p->fop = *h_file->f_op;
++			p->fop.owner = THIS_MODULE;
++			if (p->fop.aio_read)
++				p->fop.aio_read = aufs_aio_read_sp;
++			if (p->fop.aio_write)
++				p->fop.aio_write = aufs_aio_write_sp;
++			p->fop.release = aufs_release_sp;
++			p->done = 1;
++		}
++		spin_unlock(&p->spin);
++	}
++	file->f_op = &p->fop;
++}
++
++static int au_cpup_sp(struct dentry *dentry)
++{
++	int err;
++	aufs_bindex_t bcpup;
++	struct au_pin pin;
++	struct au_wr_dir_args wr_dir_args = {
++		.force_btgt	= -1,
++		.flags		= 0
++	};
++
++	AuDbg("%.*s\n", AuDLNPair(dentry));
++
++	di_read_unlock(dentry, AuLock_IR);
++	di_write_lock_child(dentry);
++	err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
++	if (unlikely(err < 0))
++		goto out;
++	bcpup = err;
++	err = 0;
++	if (bcpup == au_dbstart(dentry))
++		goto out; /* success */
++
++	err = au_pin(&pin, dentry, bcpup, au_opt_udba(dentry->d_sb),
++		     AuPin_MNT_WRITE);
++	if (!err) {
++		err = au_sio_cpup_simple(dentry, bcpup, -1, AuCpup_DTIME);
++		au_unpin(&pin);
++	}
++
++out:
++	di_downgrade_lock(dentry, AuLock_IR);
++	return err;
++}
++
++static int au_do_open_sp(struct file *file, int flags)
++{
++	int err;
++	struct dentry *dentry;
++	struct super_block *sb;
++	struct file *h_file;
++	struct inode *h_inode;
++
++	dentry = file->f_dentry;
++	AuDbg("%.*s\n", AuDLNPair(dentry));
++
++	/*
++	 * try copying-up.
++	 * operate on the ro branch is not an error.
++	 */
++	au_cpup_sp(dentry); /* ignore */
++
++	/* prepare h_file */
++	err = au_do_open_nondir(file, vfsub_file_flags(file));
++	if (unlikely(err))
++		goto out;
++
++	sb = dentry->d_sb;
++	h_file = au_hf_top(file);
++	h_inode = h_file->f_dentry->d_inode;
++	di_read_unlock(dentry, AuLock_IR);
++	fi_write_unlock(file);
++	si_read_unlock(sb);
++	/* open this fifo in aufs */
++	err = h_inode->i_fop->open(file->f_dentry->d_inode, file);
++	si_noflush_read_lock(sb);
++	fi_write_lock(file);
++	di_read_lock_child(dentry, AuLock_IR);
++	if (!err)
++		au_init_fop_sp(file);
++
++out:
++	return err;
++}
++
++static int aufs_open_sp(struct inode *inode, struct file *file)
++{
++	int err;
++	struct super_block *sb;
++
++	sb = file->f_dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	err = au_do_open(file, au_do_open_sp, /*fidir*/NULL);
++	si_read_unlock(sb);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev)
++{
++	init_special_inode(inode, mode, rdev);
++
++	switch (mode & S_IFMT) {
++	case S_IFIFO:
++		inode->i_fop = &au_sp_fop[AuSp_FIFO].fop;
++		/*FALLTHROUGH*/
++	case S_IFCHR:
++	case S_IFBLK:
++	case S_IFSOCK:
++		break;
++	default:
++		AuDebugOn(1);
++	}
++}
++
++int au_special_file(umode_t mode)
++{
++	int ret;
++
++	ret = 0;
++	switch (mode & S_IFMT) {
++	case S_IFIFO:
++#if 0
++	case S_IFCHR:
++	case S_IFBLK:
++	case S_IFSOCK:
++#endif
++		ret = 1;
++	}
++
++	return ret;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/fstype.h	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,496 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * judging filesystem type
++ */
++
++#ifndef __AUFS_FSTYPE_H__
++#define __AUFS_FSTYPE_H__
++
++#ifdef __KERNEL__
++
++#include <linux/fs.h>
++#include <linux/magic.h>
++#include <linux/romfs_fs.h>
++
++static inline int au_test_aufs(struct super_block *sb)
++{
++	return sb->s_magic == AUFS_SUPER_MAGIC;
++}
++
++static inline const char *au_sbtype(struct super_block *sb)
++{
++	return sb->s_type->name;
++}
++
++static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE)
++	return sb->s_magic == ROMFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_romfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE)
++	return sb->s_magic == ISOFS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE)
++	return sb->s_magic == CRAMFS_MAGIC;
++#endif
++	return 0;
++}
++
++static inline int au_test_nfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
++	return sb->s_magic == NFS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_fuse(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
++	return sb->s_magic == FUSE_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_xfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE)
++	return sb->s_magic == XFS_SB_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
++{
++#ifdef CONFIG_TMPFS
++	return sb->s_magic == TMPFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE)
++	return !strcmp(au_sbtype(sb), "ecryptfs");
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_smbfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_SMB_FS) || defined(CONFIG_SMB_FS_MODULE)
++	return sb->s_magic == SMB_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_ocfs2(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_OCFS2_FS) || defined(CONFIG_OCFS2_FS_MODULE)
++	return sb->s_magic == OCFS2_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_ocfs2_dlmfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_OCFS2_FS_O2CB) || defined(CONFIG_OCFS2_FS_O2CB_MODULE)
++	return sb->s_magic == DLMFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_coda(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_CODA_FS) || defined(CONFIG_CODA_FS_MODULE)
++	return sb->s_magic == CODA_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_v9fs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_9P_FS) || defined(CONFIG_9P_FS_MODULE)
++	return sb->s_magic == V9FS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_ext4(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_EXT4DEV_FS) || defined(CONFIG_EXT4DEV_FS_MODULE)
++	return sb->s_magic == EXT4_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_sysv(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_SYSV_FS) || defined(CONFIG_SYSV_FS_MODULE)
++	return !strcmp(au_sbtype(sb), "sysv");
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_ramfs(struct super_block *sb)
++{
++	return sb->s_magic == RAMFS_MAGIC;
++}
++
++static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE)
++	return sb->s_magic == UBIFS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_procfs(struct super_block *sb __maybe_unused)
++{
++#ifdef CONFIG_PROC_FS
++	return sb->s_magic == PROC_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
++{
++#ifdef CONFIG_SYSFS
++	return sb->s_magic == SYSFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_configfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE)
++	return sb->s_magic == CONFIGFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_minix(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE)
++	return sb->s_magic == MINIX3_SUPER_MAGIC
++		|| sb->s_magic == MINIX2_SUPER_MAGIC
++		|| sb->s_magic == MINIX2_SUPER_MAGIC2
++		|| sb->s_magic == MINIX_SUPER_MAGIC
++		|| sb->s_magic == MINIX_SUPER_MAGIC2;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_cifs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_CIFS_FS) || defined(CONFIGCIFS_FS_MODULE)
++	return sb->s_magic == CIFS_MAGIC_NUMBER;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_fat(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE)
++	return sb->s_magic == MSDOS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_msdos(struct super_block *sb)
++{
++	return au_test_fat(sb);
++}
++
++static inline int au_test_vfat(struct super_block *sb)
++{
++	return au_test_fat(sb);
++}
++
++static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
++{
++#ifdef CONFIG_SECURITYFS
++	return sb->s_magic == SECURITYFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE)
++	return sb->s_magic == SQUASHFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE)
++	return sb->s_magic == BTRFS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE)
++	return sb->s_magic == XENFS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
++{
++#ifdef CONFIG_DEBUG_FS
++	return sb->s_magic == DEBUGFS_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE)
++	return sb->s_magic == NILFS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
++{
++#if defined(CONFIG_HFSPLUS_FS) || defined(CONFIG_HFSPLUS_FS_MODULE)
++	return sb->s_magic == HFSPLUS_SUPER_MAGIC;
++#else
++	return 0;
++#endif
++}
++
++/* ---------------------------------------------------------------------- */
++/*
++ * they can't be an aufs branch.
++ */
++static inline int au_test_fs_unsuppoted(struct super_block *sb)
++{
++	return
++#ifndef CONFIG_AUFS_BR_RAMFS
++		au_test_ramfs(sb) ||
++#endif
++		au_test_procfs(sb)
++		|| au_test_sysfs(sb)
++		|| au_test_configfs(sb)
++		|| au_test_debugfs(sb)
++		|| au_test_securityfs(sb)
++		|| au_test_xenfs(sb)
++		|| au_test_ecryptfs(sb)
++		/* || !strcmp(au_sbtype(sb), "unionfs") */
++		|| au_test_aufs(sb); /* will be supported in next version */
++}
++
++/*
++ * If the filesystem supports NFS-export, then it has to support NULL as
++ * a nameidata parameter for ->create(), ->lookup() and ->d_revalidate().
++ * We can apply this principle when we handle a lower filesystem.
++ */
++static inline int au_test_fs_null_nd(struct super_block *sb)
++{
++	return !!sb->s_export_op;
++}
++
++static inline int au_test_fs_remote(struct super_block *sb)
++{
++	return !au_test_tmpfs(sb)
++#ifdef CONFIG_AUFS_BR_RAMFS
++		&& !au_test_ramfs(sb)
++#endif
++		&& !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * Note: these functions (below) are created after reading ->getattr() in all
++ * filesystems under linux/fs. it means we have to do so in every update...
++ */
++
++/*
++ * some filesystems require getattr to refresh the inode attributes before
++ * referencing.
++ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
++ * and leave the work for d_revalidate()
++ */
++static inline int au_test_fs_refresh_iattr(struct super_block *sb)
++{
++	return au_test_nfs(sb)
++		|| au_test_fuse(sb)
++		/* || au_test_smbfs(sb) */	/* untested */
++		/* || au_test_ocfs2(sb) */	/* untested */
++		/* || au_test_btrfs(sb) */	/* untested */
++		/* || au_test_coda(sb) */	/* untested */
++		/* || au_test_v9fs(sb) */	/* untested */
++		;
++}
++
++/*
++ * filesystems which don't maintain i_size or i_blocks.
++ */
++static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
++{
++	return au_test_xfs(sb)
++		|| au_test_btrfs(sb)
++		|| au_test_ubifs(sb)
++		|| au_test_hfsplus(sb)	/* maintained, but incorrect */
++		/* || au_test_ext4(sb) */	/* untested */
++		/* || au_test_ocfs2(sb) */	/* untested */
++		/* || au_test_ocfs2_dlmfs(sb) */ /* untested */
++		/* || au_test_sysv(sb) */	/* untested */
++		/* || au_test_minix(sb) */	/* untested */
++		;
++}
++
++/*
++ * filesystems which don't store the correct value in some of their inode
++ * attributes.
++ */
++static inline int au_test_fs_bad_iattr(struct super_block *sb)
++{
++	return au_test_fs_bad_iattr_size(sb)
++		/* || au_test_cifs(sb) */	/* untested */
++		|| au_test_fat(sb)
++		|| au_test_msdos(sb)
++		|| au_test_vfat(sb);
++}
++
++/* they don't check i_nlink in link(2) */
++static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
++{
++	return au_test_tmpfs(sb)
++#ifdef CONFIG_AUFS_BR_RAMFS
++		|| au_test_ramfs(sb)
++#endif
++		|| au_test_ubifs(sb)
++		|| au_test_btrfs(sb)
++		|| au_test_hfsplus(sb);
++}
++
++/*
++ * filesystems which sets S_NOATIME and S_NOCMTIME.
++ */
++static inline int au_test_fs_notime(struct super_block *sb)
++{
++	return au_test_nfs(sb)
++		|| au_test_fuse(sb)
++		|| au_test_ubifs(sb)
++		/* || au_test_cifs(sb) */	/* untested */
++		;
++}
++
++/*
++ * filesystems which requires replacing i_mapping.
++ */
++static inline int au_test_fs_bad_mapping(struct super_block *sb)
++{
++	return au_test_fuse(sb)
++		|| au_test_ubifs(sb);
++}
++
++/* temporary support for i#1 in cramfs */
++static inline int au_test_fs_unique_ino(struct inode *inode)
++{
++	if (au_test_cramfs(inode->i_sb))
++		return inode->i_ino != 1;
++	return 1;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * the filesystem where the xino files placed must support i/o after unlink and
++ * maintain i_size and i_blocks.
++ */
++static inline int au_test_fs_bad_xino(struct super_block *sb)
++{
++	return au_test_fs_remote(sb)
++		|| au_test_fs_bad_iattr_size(sb)
++#ifdef CONFIG_AUFS_BR_RAMFS
++		|| !(au_test_ramfs(sb) || au_test_fs_null_nd(sb))
++#else
++		|| !au_test_fs_null_nd(sb) /* to keep xino code simple */
++#endif
++		/* don't want unnecessary work for xino */
++		|| au_test_aufs(sb)
++		|| au_test_ecryptfs(sb)
++		|| au_test_nilfs(sb);
++}
++
++static inline int au_test_fs_trunc_xino(struct super_block *sb)
++{
++	return au_test_tmpfs(sb)
++		|| au_test_ramfs(sb);
++}
++
++/*
++ * test if the @sb is real-readonly.
++ */
++static inline int au_test_fs_rr(struct super_block *sb)
++{
++	return au_test_squashfs(sb)
++		|| au_test_iso9660(sb)
++		|| au_test_cramfs(sb)
++		|| au_test_romfs(sb);
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_FSTYPE_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/hfsnotify.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,260 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * fsnotify for the lower directories
++ */
++
++#include "aufs.h"
++
++/* FS_IN_IGNORED is unnecessary */
++static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
++				 | FS_CREATE | FS_EVENT_ON_CHILD);
++static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
++static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0);
++
++static void au_hfsn_free_mark(struct fsnotify_mark *mark)
++{
++	struct au_hnotify *hn = container_of(mark, struct au_hnotify,
++					     hn_mark);
++	AuDbg("here\n");
++	au_cache_free_hnotify(hn);
++	smp_mb__before_atomic_dec();
++	atomic64_dec(&au_hfsn_ifree);
++	wake_up(&au_hfsn_wq);
++}
++
++static int au_hfsn_alloc(struct au_hinode *hinode)
++{
++	struct au_hnotify *hn;
++	struct super_block *sb;
++	struct au_branch *br;
++	struct fsnotify_mark *mark;
++	aufs_bindex_t bindex;
++
++	hn = hinode->hi_notify;
++	sb = hn->hn_aufs_inode->i_sb;
++	bindex = au_br_index(sb, hinode->hi_id);
++	br = au_sbr(sb, bindex);
++	mark = &hn->hn_mark;
++	fsnotify_init_mark(mark, au_hfsn_free_mark);
++	mark->mask = AuHfsnMask;
++	/*
++	 * by udba rename or rmdir, aufs assign a new inode to the known
++	 * h_inode, so specify 1 to allow dups.
++	 */
++	return fsnotify_add_mark(mark, br->br_hfsn_group, hinode->hi_inode,
++				 /*mnt*/NULL, /*allow_dups*/1);
++}
++
++static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
++{
++	struct fsnotify_mark *mark;
++	unsigned long long ull;
++
++	ull = atomic64_inc_return(&au_hfsn_ifree);
++	BUG_ON(!ull);
++
++	mark = &hn->hn_mark;
++	fsnotify_destroy_mark(mark);
++	fsnotify_put_mark(mark);
++
++	/* free hn by myself */
++	return 0;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
++{
++	struct fsnotify_mark *mark;
++
++	mark = &hinode->hi_notify->hn_mark;
++	spin_lock(&mark->lock);
++	if (do_set) {
++		AuDebugOn(mark->mask & AuHfsnMask);
++		mark->mask |= AuHfsnMask;
++	} else {
++		AuDebugOn(!(mark->mask & AuHfsnMask));
++		mark->mask &= ~AuHfsnMask;
++	}
++	spin_unlock(&mark->lock);
++	/* fsnotify_recalc_inode_mask(hinode->hi_inode); */
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* #define AuDbgHnotify */
++#ifdef AuDbgHnotify
++static char *au_hfsn_name(u32 mask)
++{
++#ifdef CONFIG_AUFS_DEBUG
++#define test_ret(flag)	if (mask & flag) \
++				return #flag;
++	test_ret(FS_ACCESS);
++	test_ret(FS_MODIFY);
++	test_ret(FS_ATTRIB);
++	test_ret(FS_CLOSE_WRITE);
++	test_ret(FS_CLOSE_NOWRITE);
++	test_ret(FS_OPEN);
++	test_ret(FS_MOVED_FROM);
++	test_ret(FS_MOVED_TO);
++	test_ret(FS_CREATE);
++	test_ret(FS_DELETE);
++	test_ret(FS_DELETE_SELF);
++	test_ret(FS_MOVE_SELF);
++	test_ret(FS_UNMOUNT);
++	test_ret(FS_Q_OVERFLOW);
++	test_ret(FS_IN_IGNORED);
++	test_ret(FS_IN_ISDIR);
++	test_ret(FS_IN_ONESHOT);
++	test_ret(FS_EVENT_ON_CHILD);
++	return "";
++#undef test_ret
++#else
++	return "??";
++#endif
++}
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++static int au_hfsn_handle_event(struct fsnotify_group *group,
++				struct fsnotify_mark *inode_mark,
++				struct fsnotify_mark *vfsmount_mark,
++				struct fsnotify_event *event)
++{
++	int err;
++	struct au_hnotify *hnotify;
++	struct inode *h_dir, *h_inode;
++	__u32 mask;
++	struct qstr h_child_qstr = {
++		.name	= event->file_name,
++		.len	= event->name_len
++	};
++
++	AuDebugOn(event->data_type != FSNOTIFY_EVENT_INODE);
++
++	err = 0;
++	/* if FS_UNMOUNT happens, there must be another bug */
++	mask = event->mask;
++	AuDebugOn(mask & FS_UNMOUNT);
++	if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
++		goto out;
++
++	h_dir = event->to_tell;
++	h_inode = event->inode;
++#ifdef AuDbgHnotify
++	au_debug(1);
++	if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
++	    || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
++		AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
++		      h_dir->i_ino, mask, au_hfsn_name(mask),
++		      AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
++		/* WARN_ON(1); */
++	}
++	au_debug(0);
++#endif
++
++	AuDebugOn(!inode_mark);
++	hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
++	err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
++
++out:
++	return err;
++}
++
++/* isn't it waste to ask every registered 'group'? */
++/* copied from linux/fs/notify/inotify/inotify_fsnotiry.c */
++/* it should be exported to modules */
++static bool au_hfsn_should_send_event(struct fsnotify_group *group,
++				      struct inode *h_inode,
++				      struct fsnotify_mark *inode_mark,
++				      struct fsnotify_mark *vfsmount_mark,
++				      __u32 mask, void *data, int data_type)
++{
++	mask = (mask & ~FS_EVENT_ON_CHILD);
++	return inode_mark->mask & mask;
++}
++
++static struct fsnotify_ops au_hfsn_ops = {
++	.should_send_event	= au_hfsn_should_send_event,
++	.handle_event		= au_hfsn_handle_event
++};
++
++/* ---------------------------------------------------------------------- */
++
++static void au_hfsn_fin_br(struct au_branch *br)
++{
++	if (br->br_hfsn_group)
++		fsnotify_put_group(br->br_hfsn_group);
++}
++
++static int au_hfsn_init_br(struct au_branch *br, int perm)
++{
++	br->br_hfsn_group = NULL;
++	br->br_hfsn_ops = au_hfsn_ops;
++	return 0;
++}
++
++static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
++{
++	int err;
++
++	err = 0;
++	if (udba != AuOpt_UDBA_HNOTIFY
++	    || !au_br_hnotifyable(perm)) {
++		au_hfsn_fin_br(br);
++		br->br_hfsn_group = NULL;
++		goto out;
++	}
++
++	if (br->br_hfsn_group)
++		goto out;
++
++	br->br_hfsn_group = fsnotify_alloc_group(&br->br_hfsn_ops);
++	if (IS_ERR(br->br_hfsn_group)) {
++		err = PTR_ERR(br->br_hfsn_group);
++		pr_err("fsnotify_alloc_group() failed, %d\n", err);
++		br->br_hfsn_group = NULL;
++	}
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void au_hfsn_fin(void)
++{
++	AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree));
++	wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree));
++}
++
++const struct au_hnotify_op au_hnotify_op = {
++	.ctl		= au_hfsn_ctl,
++	.alloc		= au_hfsn_alloc,
++	.free		= au_hfsn_free,
++
++	.fin		= au_hfsn_fin,
++
++	.reset_br	= au_hfsn_reset_br,
++	.fin_br		= au_hfsn_fin_br,
++	.init_br	= au_hfsn_init_br
++};
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/hfsplus.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (C) 2010-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * special support for filesystems which aqucires an inode mutex
++ * at final closing a file, eg, hfsplus.
++ *
++ * This trick is very simple and stupid, just to open the file before really
++ * neceeary open to tell hfsplus that this is not the final closing.
++ * The caller should call au_h_open_pre() after acquiring the inode mutex,
++ * and au_h_open_post() after releasing it.
++ */
++
++#include "aufs.h"
++
++struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	struct file *h_file;
++	struct dentry *h_dentry;
++
++	h_dentry = au_h_dptr(dentry, bindex);
++	AuDebugOn(!h_dentry);
++	AuDebugOn(!h_dentry->d_inode);
++	IMustLock(h_dentry->d_inode);
++
++	h_file = NULL;
++	if (au_test_hfsplus(h_dentry->d_sb)
++	    && S_ISREG(h_dentry->d_inode->i_mode))
++		h_file = au_h_open(dentry, bindex,
++				   O_RDONLY | O_NOATIME | O_LARGEFILE,
++				   /*file*/NULL);
++	return h_file;
++}
++
++void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
++		    struct file *h_file)
++{
++	if (h_file) {
++		fput(h_file);
++		au_sbr_put(dentry->d_sb, bindex);
++	}
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/hnotify.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,712 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * abstraction to notify the direct changes on lower directories
++ */
++
++#include "aufs.h"
++
++int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
++{
++	int err;
++	struct au_hnotify *hn;
++
++	err = -ENOMEM;
++	hn = au_cache_alloc_hnotify();
++	if (hn) {
++		hn->hn_aufs_inode = inode;
++		hinode->hi_notify = hn;
++		err = au_hnotify_op.alloc(hinode);
++		AuTraceErr(err);
++		if (unlikely(err)) {
++			hinode->hi_notify = NULL;
++			au_cache_free_hnotify(hn);
++			/*
++			 * The upper dir was removed by udba, but the same named
++			 * dir left. In this case, aufs assignes a new inode
++			 * number and set the monitor again.
++			 * For the lower dir, the old monitnor is still left.
++			 */
++			if (err == -EEXIST)
++				err = 0;
++		}
++	}
++
++	AuTraceErr(err);
++	return err;
++}
++
++void au_hn_free(struct au_hinode *hinode)
++{
++	struct au_hnotify *hn;
++
++	hn = hinode->hi_notify;
++	if (hn) {
++		hinode->hi_notify = NULL;
++		if (au_hnotify_op.free(hinode, hn))
++			au_cache_free_hnotify(hn);
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_hn_ctl(struct au_hinode *hinode, int do_set)
++{
++	if (hinode->hi_notify)
++		au_hnotify_op.ctl(hinode, do_set);
++}
++
++void au_hn_reset(struct inode *inode, unsigned int flags)
++{
++	aufs_bindex_t bindex, bend;
++	struct inode *hi;
++	struct dentry *iwhdentry;
++
++	bend = au_ibend(inode);
++	for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
++		hi = au_h_iptr(inode, bindex);
++		if (!hi)
++			continue;
++
++		/* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
++		iwhdentry = au_hi_wh(inode, bindex);
++		if (iwhdentry)
++			dget(iwhdentry);
++		au_igrab(hi);
++		au_set_h_iptr(inode, bindex, NULL, 0);
++		au_set_h_iptr(inode, bindex, au_igrab(hi),
++			      flags & ~AuHi_XINO);
++		iput(hi);
++		dput(iwhdentry);
++		/* mutex_unlock(&hi->i_mutex); */
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int hn_xino(struct inode *inode, struct inode *h_inode)
++{
++	int err;
++	aufs_bindex_t bindex, bend, bfound, bstart;
++	struct inode *h_i;
++
++	err = 0;
++	if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
++		pr_warning("branch root dir was changed\n");
++		goto out;
++	}
++
++	bfound = -1;
++	bend = au_ibend(inode);
++	bstart = au_ibstart(inode);
++#if 0 /* reserved for future use */
++	if (bindex == bend) {
++		/* keep this ino in rename case */
++		goto out;
++	}
++#endif
++	for (bindex = bstart; bindex <= bend; bindex++)
++		if (au_h_iptr(inode, bindex) == h_inode) {
++			bfound = bindex;
++			break;
++		}
++	if (bfound < 0)
++		goto out;
++
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		h_i = au_h_iptr(inode, bindex);
++		if (!h_i)
++			continue;
++
++		err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
++		/* ignore this error */
++		/* bad action? */
++	}
++
++	/* children inode number will be broken */
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++static int hn_gen_tree(struct dentry *dentry)
++{
++	int err, i, j, ndentry;
++	struct au_dcsub_pages dpages;
++	struct au_dpage *dpage;
++	struct dentry **dentries;
++
++	err = au_dpages_init(&dpages, GFP_NOFS);
++	if (unlikely(err))
++		goto out;
++	err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
++	if (unlikely(err))
++		goto out_dpages;
++
++	for (i = 0; i < dpages.ndpage; i++) {
++		dpage = dpages.dpages + i;
++		dentries = dpage->dentries;
++		ndentry = dpage->ndentry;
++		for (j = 0; j < ndentry; j++) {
++			struct dentry *d;
++
++			d = dentries[j];
++			if (IS_ROOT(d))
++				continue;
++
++			au_digen_dec(d);
++			if (d->d_inode)
++				/* todo: reset children xino?
++				   cached children only? */
++				au_iigen_dec(d->d_inode);
++		}
++	}
++
++out_dpages:
++	au_dpages_free(&dpages);
++
++#if 0
++	/* discard children */
++	dentry_unhash(dentry);
++	dput(dentry);
++#endif
++out:
++	return err;
++}
++
++/*
++ * return 0 if processed.
++ */
++static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
++			   const unsigned int isdir)
++{
++	int err;
++	struct dentry *d;
++	struct qstr *dname;
++
++	err = 1;
++	if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
++		pr_warning("branch root dir was changed\n");
++		err = 0;
++		goto out;
++	}
++
++	if (!isdir) {
++		AuDebugOn(!name);
++		au_iigen_dec(inode);
++		spin_lock(&inode->i_lock);
++		list_for_each_entry(d, &inode->i_dentry, d_alias) {
++			spin_lock(&d->d_lock);
++			dname = &d->d_name;
++			if (dname->len != nlen
++			    && memcmp(dname->name, name, nlen)) {
++				spin_unlock(&d->d_lock);
++				continue;
++			}
++			err = 0;
++			au_digen_dec(d);
++			spin_unlock(&d->d_lock);
++			break;
++		}
++		spin_unlock(&inode->i_lock);
++	} else {
++		au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
++		d = d_find_alias(inode);
++		if (!d) {
++			au_iigen_dec(inode);
++			goto out;
++		}
++
++		spin_lock(&d->d_lock);
++		dname = &d->d_name;
++		if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
++			spin_unlock(&d->d_lock);
++			err = hn_gen_tree(d);
++			spin_lock(&d->d_lock);
++		}
++		spin_unlock(&d->d_lock);
++		dput(d);
++	}
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
++{
++	int err;
++	struct inode *inode;
++
++	inode = dentry->d_inode;
++	if (IS_ROOT(dentry)
++	    /* || (inode && inode->i_ino == AUFS_ROOT_INO) */
++		) {
++		pr_warning("branch root dir was changed\n");
++		return 0;
++	}
++
++	err = 0;
++	if (!isdir) {
++		au_digen_dec(dentry);
++		if (inode)
++			au_iigen_dec(inode);
++	} else {
++		au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
++		if (inode)
++			err = hn_gen_tree(dentry);
++	}
++
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* hnotify job flags */
++#define AuHnJob_XINO0		1
++#define AuHnJob_GEN		(1 << 1)
++#define AuHnJob_DIRENT		(1 << 2)
++#define AuHnJob_ISDIR		(1 << 3)
++#define AuHnJob_TRYXINO0	(1 << 4)
++#define AuHnJob_MNTPNT		(1 << 5)
++#define au_ftest_hnjob(flags, name)	((flags) & AuHnJob_##name)
++#define au_fset_hnjob(flags, name) \
++	do { (flags) |= AuHnJob_##name; } while (0)
++#define au_fclr_hnjob(flags, name) \
++	do { (flags) &= ~AuHnJob_##name; } while (0)
++
++enum {
++	AuHn_CHILD,
++	AuHn_PARENT,
++	AuHnLast
++};
++
++struct au_hnotify_args {
++	struct inode *h_dir, *dir, *h_child_inode;
++	u32 mask;
++	unsigned int flags[AuHnLast];
++	unsigned int h_child_nlen;
++	char h_child_name[];
++};
++
++struct hn_job_args {
++	unsigned int flags;
++	struct inode *inode, *h_inode, *dir, *h_dir;
++	struct dentry *dentry;
++	char *h_name;
++	int h_nlen;
++};
++
++static int hn_job(struct hn_job_args *a)
++{
++	const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
++
++	/* reset xino */
++	if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
++		hn_xino(a->inode, a->h_inode); /* ignore this error */
++
++	if (au_ftest_hnjob(a->flags, TRYXINO0)
++	    && a->inode
++	    && a->h_inode) {
++		mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
++		if (!a->h_inode->i_nlink)
++			hn_xino(a->inode, a->h_inode); /* ignore this error */
++		mutex_unlock(&a->h_inode->i_mutex);
++	}
++
++	/* make the generation obsolete */
++	if (au_ftest_hnjob(a->flags, GEN)) {
++		int err = -1;
++		if (a->inode)
++			err = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
++					      isdir);
++		if (err && a->dentry)
++			hn_gen_by_name(a->dentry, isdir);
++		/* ignore this error */
++	}
++
++	/* make dir entries obsolete */
++	if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
++		struct au_vdir *vdir;
++
++		vdir = au_ivdir(a->inode);
++		if (vdir)
++			vdir->vd_jiffy = 0;
++		/* IMustLock(a->inode); */
++		/* a->inode->i_version++; */
++	}
++
++	/* can do nothing but warn */
++	if (au_ftest_hnjob(a->flags, MNTPNT)
++	    && a->dentry
++	    && d_mountpoint(a->dentry))
++		pr_warning("mount-point %.*s is removed or renamed\n",
++			   AuDLNPair(a->dentry));
++
++	return 0;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
++					   struct inode *dir)
++{
++	struct dentry *dentry, *d, *parent;
++	struct qstr *dname;
++
++	parent = d_find_alias(dir);
++	if (!parent)
++		return NULL;
++
++	dentry = NULL;
++	spin_lock(&parent->d_lock);
++	list_for_each_entry(d, &parent->d_subdirs, d_u.d_child) {
++		/* AuDbg("%.*s\n", AuDLNPair(d)); */
++		spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
++		dname = &d->d_name;
++		if (dname->len != nlen || memcmp(dname->name, name, nlen))
++			goto cont_unlock;
++		if (au_di(d))
++			au_digen_dec(d);
++		else
++			goto cont_unlock;
++		if (d->d_count) {
++			dentry = dget_dlock(d);
++			spin_unlock(&d->d_lock);
++			break;
++		}
++
++	cont_unlock:
++		spin_unlock(&d->d_lock);
++	}
++	spin_unlock(&parent->d_lock);
++	dput(parent);
++
++	if (dentry)
++		di_write_lock_child(dentry);
++
++	return dentry;
++}
++
++static struct inode *lookup_wlock_by_ino(struct super_block *sb,
++					 aufs_bindex_t bindex, ino_t h_ino)
++{
++	struct inode *inode;
++	ino_t ino;
++	int err;
++
++	inode = NULL;
++	err = au_xino_read(sb, bindex, h_ino, &ino);
++	if (!err && ino)
++		inode = ilookup(sb, ino);
++	if (!inode)
++		goto out;
++
++	if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
++		pr_warning("wrong root branch\n");
++		iput(inode);
++		inode = NULL;
++		goto out;
++	}
++
++	ii_write_lock_child(inode);
++
++out:
++	return inode;
++}
++
++static void au_hn_bh(void *_args)
++{
++	struct au_hnotify_args *a = _args;
++	struct super_block *sb;
++	aufs_bindex_t bindex, bend, bfound;
++	unsigned char xino, try_iput;
++	int err;
++	struct inode *inode;
++	ino_t h_ino;
++	struct hn_job_args args;
++	struct dentry *dentry;
++	struct au_sbinfo *sbinfo;
++
++	AuDebugOn(!_args);
++	AuDebugOn(!a->h_dir);
++	AuDebugOn(!a->dir);
++	AuDebugOn(!a->mask);
++	AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
++	      a->mask, a->dir->i_ino, a->h_dir->i_ino,
++	      a->h_child_inode ? a->h_child_inode->i_ino : 0);
++
++	inode = NULL;
++	dentry = NULL;
++	/*
++	 * do not lock a->dir->i_mutex here
++	 * because of d_revalidate() may cause a deadlock.
++	 */
++	sb = a->dir->i_sb;
++	AuDebugOn(!sb);
++	sbinfo = au_sbi(sb);
++	AuDebugOn(!sbinfo);
++	si_write_lock(sb, AuLock_NOPLMW);
++
++	ii_read_lock_parent(a->dir);
++	bfound = -1;
++	bend = au_ibend(a->dir);
++	for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
++		if (au_h_iptr(a->dir, bindex) == a->h_dir) {
++			bfound = bindex;
++			break;
++		}
++	ii_read_unlock(a->dir);
++	if (unlikely(bfound < 0))
++		goto out;
++
++	xino = !!au_opt_test(au_mntflags(sb), XINO);
++	h_ino = 0;
++	if (a->h_child_inode)
++		h_ino = a->h_child_inode->i_ino;
++
++	if (a->h_child_nlen
++	    && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
++		|| au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
++		dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
++					      a->dir);
++	try_iput = 0;
++	if (dentry)
++		inode = dentry->d_inode;
++	if (xino && !inode && h_ino
++	    && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
++		|| au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
++		|| au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
++		inode = lookup_wlock_by_ino(sb, bfound, h_ino);
++		try_iput = 1;
++	    }
++
++	args.flags = a->flags[AuHn_CHILD];
++	args.dentry = dentry;
++	args.inode = inode;
++	args.h_inode = a->h_child_inode;
++	args.dir = a->dir;
++	args.h_dir = a->h_dir;
++	args.h_name = a->h_child_name;
++	args.h_nlen = a->h_child_nlen;
++	err = hn_job(&args);
++	if (dentry) {
++		if (au_di(dentry))
++			di_write_unlock(dentry);
++		dput(dentry);
++	}
++	if (inode && try_iput) {
++		ii_write_unlock(inode);
++		iput(inode);
++	}
++
++	ii_write_lock_parent(a->dir);
++	args.flags = a->flags[AuHn_PARENT];
++	args.dentry = NULL;
++	args.inode = a->dir;
++	args.h_inode = a->h_dir;
++	args.dir = NULL;
++	args.h_dir = NULL;
++	args.h_name = NULL;
++	args.h_nlen = 0;
++	err = hn_job(&args);
++	ii_write_unlock(a->dir);
++
++out:
++	iput(a->h_child_inode);
++	iput(a->h_dir);
++	iput(a->dir);
++	si_write_unlock(sb);
++	au_nwt_done(&sbinfo->si_nowait);
++	kfree(a);
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
++	       struct qstr *h_child_qstr, struct inode *h_child_inode)
++{
++	int err, len;
++	unsigned int flags[AuHnLast], f;
++	unsigned char isdir, isroot, wh;
++	struct inode *dir;
++	struct au_hnotify_args *args;
++	char *p, *h_child_name;
++
++	err = 0;
++	AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
++	dir = igrab(hnotify->hn_aufs_inode);
++	if (!dir)
++		goto out;
++
++	isroot = (dir->i_ino == AUFS_ROOT_INO);
++	wh = 0;
++	h_child_name = (void *)h_child_qstr->name;
++	len = h_child_qstr->len;
++	if (h_child_name) {
++		if (len > AUFS_WH_PFX_LEN
++		    && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
++			h_child_name += AUFS_WH_PFX_LEN;
++			len -= AUFS_WH_PFX_LEN;
++			wh = 1;
++		}
++	}
++
++	isdir = 0;
++	if (h_child_inode)
++		isdir = !!S_ISDIR(h_child_inode->i_mode);
++	flags[AuHn_PARENT] = AuHnJob_ISDIR;
++	flags[AuHn_CHILD] = 0;
++	if (isdir)
++		flags[AuHn_CHILD] = AuHnJob_ISDIR;
++	au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
++	au_fset_hnjob(flags[AuHn_CHILD], GEN);
++	switch (mask & FS_EVENTS_POSS_ON_CHILD) {
++	case FS_MOVED_FROM:
++	case FS_MOVED_TO:
++		au_fset_hnjob(flags[AuHn_CHILD], XINO0);
++		au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
++		/*FALLTHROUGH*/
++	case FS_CREATE:
++		AuDebugOn(!h_child_name || !h_child_inode);
++		break;
++
++	case FS_DELETE:
++		/*
++		 * aufs never be able to get this child inode.
++		 * revalidation should be in d_revalidate()
++		 * by checking i_nlink, i_generation or d_unhashed().
++		 */
++		AuDebugOn(!h_child_name);
++		au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
++		au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
++		break;
++
++	default:
++		AuDebugOn(1);
++	}
++
++	if (wh)
++		h_child_inode = NULL;
++
++	err = -ENOMEM;
++	/* iput() and kfree() will be called in au_hnotify() */
++	args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
++	if (unlikely(!args)) {
++		AuErr1("no memory\n");
++		iput(dir);
++		goto out;
++	}
++	args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
++	args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
++	args->mask = mask;
++	args->dir = dir;
++	args->h_dir = igrab(h_dir);
++	if (h_child_inode)
++		h_child_inode = igrab(h_child_inode); /* can be NULL */
++	args->h_child_inode = h_child_inode;
++	args->h_child_nlen = len;
++	if (len) {
++		p = (void *)args;
++		p += sizeof(*args);
++		memcpy(p, h_child_name, len);
++		p[len] = 0;
++	}
++
++	f = 0;
++	if (!dir->i_nlink)
++		f = AuWkq_NEST;
++	err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
++	if (unlikely(err)) {
++		pr_err("wkq %d\n", err);
++		iput(args->h_child_inode);
++		iput(args->h_dir);
++		iput(args->dir);
++		kfree(args);
++	}
++
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
++{
++	int err;
++
++	AuDebugOn(!(udba & AuOptMask_UDBA));
++
++	err = 0;
++	if (au_hnotify_op.reset_br)
++		err = au_hnotify_op.reset_br(udba, br, perm);
++
++	return err;
++}
++
++int au_hnotify_init_br(struct au_branch *br, int perm)
++{
++	int err;
++
++	err = 0;
++	if (au_hnotify_op.init_br)
++		err = au_hnotify_op.init_br(br, perm);
++
++	return err;
++}
++
++void au_hnotify_fin_br(struct au_branch *br)
++{
++	if (au_hnotify_op.fin_br)
++		au_hnotify_op.fin_br(br);
++}
++
++static void au_hn_destroy_cache(void)
++{
++	kmem_cache_destroy(au_cachep[AuCache_HNOTIFY]);
++	au_cachep[AuCache_HNOTIFY] = NULL;
++}
++
++int __init au_hnotify_init(void)
++{
++	int err;
++
++	err = -ENOMEM;
++	au_cachep[AuCache_HNOTIFY] = AuCache(au_hnotify);
++	if (au_cachep[AuCache_HNOTIFY]) {
++		err = 0;
++		if (au_hnotify_op.init)
++			err = au_hnotify_op.init();
++		if (unlikely(err))
++			au_hn_destroy_cache();
++	}
++	AuTraceErr(err);
++	return err;
++}
++
++void au_hnotify_fin(void)
++{
++	if (au_hnotify_op.fin)
++		au_hnotify_op.fin();
++	/* cf. au_cache_fin() */
++	if (au_cachep[AuCache_HNOTIFY])
++		au_hn_destroy_cache();
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/iinfo.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,264 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * inode private data
++ */
++
++#include "aufs.h"
++
++struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
++{
++	struct inode *h_inode;
++
++	IiMustAnyLock(inode);
++
++	h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
++	AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
++	return h_inode;
++}
++
++/* todo: hard/soft set? */
++void au_hiput(struct au_hinode *hinode)
++{
++	au_hn_free(hinode);
++	dput(hinode->hi_whdentry);
++	iput(hinode->hi_inode);
++}
++
++unsigned int au_hi_flags(struct inode *inode, int isdir)
++{
++	unsigned int flags;
++	const unsigned int mnt_flags = au_mntflags(inode->i_sb);
++
++	flags = 0;
++	if (au_opt_test(mnt_flags, XINO))
++		au_fset_hi(flags, XINO);
++	if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
++		au_fset_hi(flags, HNOTIFY);
++	return flags;
++}
++
++void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
++		   struct inode *h_inode, unsigned int flags)
++{
++	struct au_hinode *hinode;
++	struct inode *hi;
++	struct au_iinfo *iinfo = au_ii(inode);
++
++	IiMustWriteLock(inode);
++
++	hinode = iinfo->ii_hinode + bindex;
++	hi = hinode->hi_inode;
++	AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
++
++	if (hi)
++		au_hiput(hinode);
++	hinode->hi_inode = h_inode;
++	if (h_inode) {
++		int err;
++		struct super_block *sb = inode->i_sb;
++		struct au_branch *br;
++
++		AuDebugOn(inode->i_mode
++			  && (h_inode->i_mode & S_IFMT)
++			  != (inode->i_mode & S_IFMT));
++		if (bindex == iinfo->ii_bstart)
++			au_cpup_igen(inode, h_inode);
++		br = au_sbr(sb, bindex);
++		hinode->hi_id = br->br_id;
++		if (au_ftest_hi(flags, XINO)) {
++			err = au_xino_write(sb, bindex, h_inode->i_ino,
++					    inode->i_ino);
++			if (unlikely(err))
++				AuIOErr1("failed au_xino_write() %d\n", err);
++		}
++
++		if (au_ftest_hi(flags, HNOTIFY)
++		    && au_br_hnotifyable(br->br_perm)) {
++			err = au_hn_alloc(hinode, inode);
++			if (unlikely(err))
++				AuIOErr1("au_hn_alloc() %d\n", err);
++		}
++	}
++}
++
++void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
++		  struct dentry *h_wh)
++{
++	struct au_hinode *hinode;
++
++	IiMustWriteLock(inode);
++
++	hinode = au_ii(inode)->ii_hinode + bindex;
++	AuDebugOn(hinode->hi_whdentry);
++	hinode->hi_whdentry = h_wh;
++}
++
++void au_update_iigen(struct inode *inode)
++{
++	atomic_set(&au_ii(inode)->ii_generation, au_sigen(inode->i_sb));
++	/* smp_mb(); */ /* atomic_set */
++}
++
++/* it may be called at remount time, too */
++void au_update_ibrange(struct inode *inode, int do_put_zero)
++{
++	struct au_iinfo *iinfo;
++	aufs_bindex_t bindex, bend;
++
++	iinfo = au_ii(inode);
++	if (!iinfo)
++		return;
++
++	IiMustWriteLock(inode);
++
++	if (do_put_zero && iinfo->ii_bstart >= 0) {
++		for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
++		     bindex++) {
++			struct inode *h_i;
++
++			h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
++			if (h_i && !h_i->i_nlink)
++				au_set_h_iptr(inode, bindex, NULL, 0);
++		}
++	}
++
++	iinfo->ii_bstart = -1;
++	iinfo->ii_bend = -1;
++	bend = au_sbend(inode->i_sb);
++	for (bindex = 0; bindex <= bend; bindex++)
++		if (iinfo->ii_hinode[0 + bindex].hi_inode) {
++			iinfo->ii_bstart = bindex;
++			break;
++		}
++	if (iinfo->ii_bstart >= 0)
++		for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--)
++			if (iinfo->ii_hinode[0 + bindex].hi_inode) {
++				iinfo->ii_bend = bindex;
++				break;
++			}
++	AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend);
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_icntnr_init_once(void *_c)
++{
++	struct au_icntnr *c = _c;
++	struct au_iinfo *iinfo = &c->iinfo;
++	static struct lock_class_key aufs_ii;
++
++	au_rw_init(&iinfo->ii_rwsem);
++	au_rw_class(&iinfo->ii_rwsem, &aufs_ii);
++	inode_init_once(&c->vfs_inode);
++}
++
++int au_iinfo_init(struct inode *inode)
++{
++	struct au_iinfo *iinfo;
++	struct super_block *sb;
++	int nbr, i;
++
++	sb = inode->i_sb;
++	iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
++	nbr = au_sbend(sb) + 1;
++	if (unlikely(nbr <= 0))
++		nbr = 1;
++	iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
++	if (iinfo->ii_hinode) {
++		au_ninodes_inc(sb);
++		for (i = 0; i < nbr; i++)
++			iinfo->ii_hinode[i].hi_id = -1;
++
++		atomic_set(&iinfo->ii_generation, au_sigen(sb));
++		/* smp_mb(); */ /* atomic_set */
++		iinfo->ii_bstart = -1;
++		iinfo->ii_bend = -1;
++		iinfo->ii_vdir = NULL;
++		return 0;
++	}
++	return -ENOMEM;
++}
++
++int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
++{
++	int err, sz;
++	struct au_hinode *hip;
++
++	AuRwMustWriteLock(&iinfo->ii_rwsem);
++
++	err = -ENOMEM;
++	sz = sizeof(*hip) * (iinfo->ii_bend + 1);
++	if (!sz)
++		sz = sizeof(*hip);
++	hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
++	if (hip) {
++		iinfo->ii_hinode = hip;
++		err = 0;
++	}
++
++	return err;
++}
++
++void au_iinfo_fin(struct inode *inode)
++{
++	struct au_iinfo *iinfo;
++	struct au_hinode *hi;
++	struct super_block *sb;
++	aufs_bindex_t bindex, bend;
++	const unsigned char unlinked = !inode->i_nlink;
++
++	iinfo = au_ii(inode);
++	/* bad_inode case */
++	if (!iinfo)
++		return;
++
++	sb = inode->i_sb;
++	au_ninodes_dec(sb);
++	if (si_pid_test(sb))
++		au_xino_delete_inode(inode, unlinked);
++	else {
++		/*
++		 * it is safe to hide the dependency between sbinfo and
++		 * sb->s_umount.
++		 */
++		lockdep_off();
++		si_noflush_read_lock(sb);
++		au_xino_delete_inode(inode, unlinked);
++		si_read_unlock(sb);
++		lockdep_on();
++	}
++
++	if (iinfo->ii_vdir)
++		au_vdir_free(iinfo->ii_vdir);
++
++	bindex = iinfo->ii_bstart;
++	if (bindex >= 0) {
++		hi = iinfo->ii_hinode + bindex;
++		bend = iinfo->ii_bend;
++		while (bindex++ <= bend) {
++			if (hi->hi_inode)
++				au_hiput(hi);
++			hi++;
++		}
++	}
++	kfree(iinfo->ii_hinode);
++	iinfo->ii_hinode = NULL;
++	AuRwDestroy(&iinfo->ii_rwsem);
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/inode.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,471 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * inode functions
++ */
++
++#include "aufs.h"
++
++struct inode *au_igrab(struct inode *inode)
++{
++	if (inode) {
++		AuDebugOn(!atomic_read(&inode->i_count));
++		ihold(inode);
++	}
++	return inode;
++}
++
++static void au_refresh_hinode_attr(struct inode *inode, int do_version)
++{
++	au_cpup_attr_all(inode, /*force*/0);
++	au_update_iigen(inode);
++	if (do_version)
++		inode->i_version++;
++}
++
++static int au_ii_refresh(struct inode *inode, int *update)
++{
++	int err, e;
++	umode_t type;
++	aufs_bindex_t bindex, new_bindex;
++	struct super_block *sb;
++	struct au_iinfo *iinfo;
++	struct au_hinode *p, *q, tmp;
++
++	IiMustWriteLock(inode);
++
++	*update = 0;
++	sb = inode->i_sb;
++	type = inode->i_mode & S_IFMT;
++	iinfo = au_ii(inode);
++	err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
++	if (unlikely(err))
++		goto out;
++
++	AuDebugOn(iinfo->ii_bstart < 0);
++	p = iinfo->ii_hinode + iinfo->ii_bstart;
++	for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
++	     bindex++, p++) {
++		if (!p->hi_inode)
++			continue;
++
++		AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
++		new_bindex = au_br_index(sb, p->hi_id);
++		if (new_bindex == bindex)
++			continue;
++
++		if (new_bindex < 0) {
++			*update = 1;
++			au_hiput(p);
++			p->hi_inode = NULL;
++			continue;
++		}
++
++		if (new_bindex < iinfo->ii_bstart)
++			iinfo->ii_bstart = new_bindex;
++		if (iinfo->ii_bend < new_bindex)
++			iinfo->ii_bend = new_bindex;
++		/* swap two lower inode, and loop again */
++		q = iinfo->ii_hinode + new_bindex;
++		tmp = *q;
++		*q = *p;
++		*p = tmp;
++		if (tmp.hi_inode) {
++			bindex--;
++			p--;
++		}
++	}
++	au_update_ibrange(inode, /*do_put_zero*/0);
++	e = au_dy_irefresh(inode);
++	if (unlikely(e && !err))
++		err = e;
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++int au_refresh_hinode_self(struct inode *inode)
++{
++	int err, update;
++
++	err = au_ii_refresh(inode, &update);
++	if (!err)
++		au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
++
++	AuTraceErr(err);
++	return err;
++}
++
++int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
++{
++	int err, e, update;
++	unsigned int flags;
++	umode_t mode;
++	aufs_bindex_t bindex, bend;
++	unsigned char isdir;
++	struct au_hinode *p;
++	struct au_iinfo *iinfo;
++
++	err = au_ii_refresh(inode, &update);
++	if (unlikely(err))
++		goto out;
++
++	update = 0;
++	iinfo = au_ii(inode);
++	p = iinfo->ii_hinode + iinfo->ii_bstart;
++	mode = (inode->i_mode & S_IFMT);
++	isdir = S_ISDIR(mode);
++	flags = au_hi_flags(inode, isdir);
++	bend = au_dbend(dentry);
++	for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
++		struct inode *h_i;
++		struct dentry *h_d;
++
++		h_d = au_h_dptr(dentry, bindex);
++		if (!h_d || !h_d->d_inode)
++			continue;
++
++		AuDebugOn(mode != (h_d->d_inode->i_mode & S_IFMT));
++		if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
++			h_i = au_h_iptr(inode, bindex);
++			if (h_i) {
++				if (h_i == h_d->d_inode)
++					continue;
++				err = -EIO;
++				break;
++			}
++		}
++		if (bindex < iinfo->ii_bstart)
++			iinfo->ii_bstart = bindex;
++		if (iinfo->ii_bend < bindex)
++			iinfo->ii_bend = bindex;
++		au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags);
++		update = 1;
++	}
++	au_update_ibrange(inode, /*do_put_zero*/0);
++	e = au_dy_irefresh(inode);
++	if (unlikely(e && !err))
++		err = e;
++	if (!err)
++		au_refresh_hinode_attr(inode, update && isdir);
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++static int set_inode(struct inode *inode, struct dentry *dentry)
++{
++	int err;
++	unsigned int flags;
++	umode_t mode;
++	aufs_bindex_t bindex, bstart, btail;
++	unsigned char isdir;
++	struct dentry *h_dentry;
++	struct inode *h_inode;
++	struct au_iinfo *iinfo;
++
++	IiMustWriteLock(inode);
++
++	err = 0;
++	isdir = 0;
++	bstart = au_dbstart(dentry);
++	h_inode = au_h_dptr(dentry, bstart)->d_inode;
++	mode = h_inode->i_mode;
++	switch (mode & S_IFMT) {
++	case S_IFREG:
++		btail = au_dbtail(dentry);
++		inode->i_op = &aufs_iop;
++		inode->i_fop = &aufs_file_fop;
++		err = au_dy_iaop(inode, bstart, h_inode);
++		if (unlikely(err))
++			goto out;
++		break;
++	case S_IFDIR:
++		isdir = 1;
++		btail = au_dbtaildir(dentry);
++		inode->i_op = &aufs_dir_iop;
++		inode->i_fop = &aufs_dir_fop;
++		break;
++	case S_IFLNK:
++		btail = au_dbtail(dentry);
++		inode->i_op = &aufs_symlink_iop;
++		break;
++	case S_IFBLK:
++	case S_IFCHR:
++	case S_IFIFO:
++	case S_IFSOCK:
++		btail = au_dbtail(dentry);
++		inode->i_op = &aufs_iop;
++		au_init_special_fop(inode, mode, h_inode->i_rdev);
++		break;
++	default:
++		AuIOErr("Unknown file type 0%o\n", mode);
++		err = -EIO;
++		goto out;
++	}
++
++	/* do not set hnotify for whiteouted dirs (SHWH mode) */
++	flags = au_hi_flags(inode, isdir);
++	if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
++	    && au_ftest_hi(flags, HNOTIFY)
++	    && dentry->d_name.len > AUFS_WH_PFX_LEN
++	    && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
++		au_fclr_hi(flags, HNOTIFY);
++	iinfo = au_ii(inode);
++	iinfo->ii_bstart = bstart;
++	iinfo->ii_bend = btail;
++	for (bindex = bstart; bindex <= btail; bindex++) {
++		h_dentry = au_h_dptr(dentry, bindex);
++		if (h_dentry)
++			au_set_h_iptr(inode, bindex,
++				      au_igrab(h_dentry->d_inode), flags);
++	}
++	au_cpup_attr_all(inode, /*force*/1);
++
++out:
++	return err;
++}
++
++/*
++ * successful returns with iinfo write_locked
++ * minus: errno
++ * zero: success, matched
++ * plus: no error, but unmatched
++ */
++static int reval_inode(struct inode *inode, struct dentry *dentry)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	struct inode *h_inode, *h_dinode;
++
++	/*
++	 * before this function, if aufs got any iinfo lock, it must be only
++	 * one, the parent dir.
++	 * it can happen by UDBA and the obsoleted inode number.
++	 */
++	err = -EIO;
++	if (unlikely(inode->i_ino == parent_ino(dentry)))
++		goto out;
++
++	err = 1;
++	ii_write_lock_new_child(inode);
++	h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode;
++	bend = au_ibend(inode);
++	for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
++		h_inode = au_h_iptr(inode, bindex);
++		if (h_inode && h_inode == h_dinode) {
++			err = 0;
++			if (au_iigen_test(inode, au_digen(dentry)))
++				err = au_refresh_hinode(inode, dentry);
++			break;
++		}
++	}
++
++	if (unlikely(err))
++		ii_write_unlock(inode);
++out:
++	return err;
++}
++
++int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++	   unsigned int d_type, ino_t *ino)
++{
++	int err;
++	struct mutex *mtx;
++
++	/* prevent hardlinked inode number from race condition */
++	mtx = NULL;
++	if (d_type != DT_DIR) {
++		mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
++		mutex_lock(mtx);
++	}
++	err = au_xino_read(sb, bindex, h_ino, ino);
++	if (unlikely(err))
++		goto out;
++
++	if (!*ino) {
++		err = -EIO;
++		*ino = au_xino_new_ino(sb);
++		if (unlikely(!*ino))
++			goto out;
++		err = au_xino_write(sb, bindex, h_ino, *ino);
++		if (unlikely(err))
++			goto out;
++	}
++
++out:
++	if (mtx)
++		mutex_unlock(mtx);
++	return err;
++}
++
++/* successful returns with iinfo write_locked */
++/* todo: return with unlocked? */
++struct inode *au_new_inode(struct dentry *dentry, int must_new)
++{
++	struct inode *inode, *h_inode;
++	struct dentry *h_dentry;
++	struct super_block *sb;
++	struct mutex *mtx;
++	ino_t h_ino, ino;
++	int err;
++	aufs_bindex_t bstart;
++
++	sb = dentry->d_sb;
++	bstart = au_dbstart(dentry);
++	h_dentry = au_h_dptr(dentry, bstart);
++	h_inode = h_dentry->d_inode;
++	h_ino = h_inode->i_ino;
++
++	/*
++	 * stop 'race'-ing between hardlinks under different
++	 * parents.
++	 */
++	mtx = NULL;
++	if (!S_ISDIR(h_inode->i_mode))
++		mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
++
++new_ino:
++	if (mtx)
++		mutex_lock(mtx);
++	err = au_xino_read(sb, bstart, h_ino, &ino);
++	inode = ERR_PTR(err);
++	if (unlikely(err))
++		goto out;
++
++	if (!ino) {
++		ino = au_xino_new_ino(sb);
++		if (unlikely(!ino)) {
++			inode = ERR_PTR(-EIO);
++			goto out;
++		}
++	}
++
++	AuDbg("i%lu\n", (unsigned long)ino);
++	inode = au_iget_locked(sb, ino);
++	err = PTR_ERR(inode);
++	if (IS_ERR(inode))
++		goto out;
++
++	AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
++	if (inode->i_state & I_NEW) {
++		ii_write_lock_new_child(inode);
++		err = set_inode(inode, dentry);
++		if (!err) {
++			unlock_new_inode(inode);
++			goto out; /* success */
++		}
++
++		/*
++		 * iget_failed() calls iput(), but we need to call
++		 * ii_write_unlock() after iget_failed(). so dirty hack for
++		 * i_count.
++		 */
++		atomic_inc(&inode->i_count);
++		iget_failed(inode);
++		ii_write_unlock(inode);
++		au_xino_write(sb, bstart, h_ino, /*ino*/0);
++		/* ignore this error */
++		goto out_iput;
++	} else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
++		/*
++		 * horrible race condition between lookup, readdir and copyup
++		 * (or something).
++		 */
++		if (mtx)
++			mutex_unlock(mtx);
++		err = reval_inode(inode, dentry);
++		if (unlikely(err < 0)) {
++			mtx = NULL;
++			goto out_iput;
++		}
++
++		if (!err) {
++			mtx = NULL;
++			goto out; /* success */
++		} else if (mtx)
++			mutex_lock(mtx);
++	}
++
++	if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode)))
++		AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
++			" b%d, %s, %.*s, hi%lu, i%lu.\n",
++			bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry),
++			(unsigned long)h_ino, (unsigned long)ino);
++	ino = 0;
++	err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
++	if (!err) {
++		iput(inode);
++		if (mtx)
++			mutex_unlock(mtx);
++		goto new_ino;
++	}
++
++out_iput:
++	iput(inode);
++	inode = ERR_PTR(err);
++out:
++	if (mtx)
++		mutex_unlock(mtx);
++	return inode;
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
++	       struct inode *inode)
++{
++	int err;
++
++	err = au_br_rdonly(au_sbr(sb, bindex));
++
++	/* pseudo-link after flushed may happen out of bounds */
++	if (!err
++	    && inode
++	    && au_ibstart(inode) <= bindex
++	    && bindex <= au_ibend(inode)) {
++		/*
++		 * permission check is unnecessary since vfsub routine
++		 * will be called later
++		 */
++		struct inode *hi = au_h_iptr(inode, bindex);
++		if (hi)
++			err = IS_IMMUTABLE(hi) ? -EROFS : 0;
++	}
++
++	return err;
++}
++
++int au_test_h_perm(struct inode *h_inode, int mask)
++{
++	if (!current_fsuid())
++		return 0;
++	return inode_permission(h_inode, mask);
++}
++
++int au_test_h_perm_sio(struct inode *h_inode, int mask)
++{
++	if (au_test_nfs(h_inode->i_sb)
++	    && (mask & MAY_WRITE)
++	    && S_ISDIR(h_inode->i_mode))
++		mask |= MAY_READ; /* force permission check */
++	return au_test_h_perm(h_inode, mask);
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/inode.h	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,560 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * inode operations
++ */
++
++#ifndef __AUFS_INODE_H__
++#define __AUFS_INODE_H__
++
++#ifdef __KERNEL__
++
++#include <linux/fsnotify.h>
++#include "rwsem.h"
++
++struct vfsmount;
++
++struct au_hnotify {
++#ifdef CONFIG_AUFS_HNOTIFY
++#ifdef CONFIG_AUFS_HFSNOTIFY
++	/* never use fsnotify_add_vfsmount_mark() */
++	struct fsnotify_mark		hn_mark;
++#endif
++	struct inode			*hn_aufs_inode;	/* no get/put */
++#endif
++} ____cacheline_aligned_in_smp;
++
++struct au_hinode {
++	struct inode		*hi_inode;
++	aufs_bindex_t		hi_id;
++#ifdef CONFIG_AUFS_HNOTIFY
++	struct au_hnotify	*hi_notify;
++#endif
++
++	/* reference to the copied-up whiteout with get/put */
++	struct dentry		*hi_whdentry;
++};
++
++struct au_vdir;
++struct au_iinfo {
++	atomic_t		ii_generation;
++	struct super_block	*ii_hsb1;	/* no get/put */
++
++	struct au_rwsem		ii_rwsem;
++	aufs_bindex_t		ii_bstart, ii_bend;
++	__u32			ii_higen;
++	struct au_hinode	*ii_hinode;
++	struct au_vdir		*ii_vdir;
++};
++
++struct au_icntnr {
++	struct au_iinfo iinfo;
++	struct inode vfs_inode;
++} ____cacheline_aligned_in_smp;
++
++/* au_pin flags */
++#define AuPin_DI_LOCKED		1
++#define AuPin_MNT_WRITE		(1 << 1)
++#define au_ftest_pin(flags, name)	((flags) & AuPin_##name)
++#define au_fset_pin(flags, name) \
++	do { (flags) |= AuPin_##name; } while (0)
++#define au_fclr_pin(flags, name) \
++	do { (flags) &= ~AuPin_##name; } while (0)
++
++struct au_pin {
++	/* input */
++	struct dentry *dentry;
++	unsigned int udba;
++	unsigned char lsc_di, lsc_hi, flags;
++	aufs_bindex_t bindex;
++
++	/* output */
++	struct dentry *parent;
++	struct au_hinode *hdir;
++	struct vfsmount *h_mnt;
++};
++
++/* ---------------------------------------------------------------------- */
++
++static inline struct au_iinfo *au_ii(struct inode *inode)
++{
++	struct au_iinfo *iinfo;
++
++	iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
++	if (iinfo->ii_hinode)
++		return iinfo;
++	return NULL; /* debugging bad_inode case */
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* inode.c */
++struct inode *au_igrab(struct inode *inode);
++int au_refresh_hinode_self(struct inode *inode);
++int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
++int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++	   unsigned int d_type, ino_t *ino);
++struct inode *au_new_inode(struct dentry *dentry, int must_new);
++int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
++	       struct inode *inode);
++int au_test_h_perm(struct inode *h_inode, int mask);
++int au_test_h_perm_sio(struct inode *h_inode, int mask);
++
++static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
++			    ino_t h_ino, unsigned int d_type, ino_t *ino)
++{
++#ifdef CONFIG_AUFS_SHWH
++	return au_ino(sb, bindex, h_ino, d_type, ino);
++#else
++	return 0;
++#endif
++}
++
++/* i_op.c */
++extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop;
++
++/* au_wr_dir flags */
++#define AuWrDir_ADD_ENTRY	1
++#define AuWrDir_ISDIR		(1 << 1)
++#define au_ftest_wrdir(flags, name)	((flags) & AuWrDir_##name)
++#define au_fset_wrdir(flags, name) \
++	do { (flags) |= AuWrDir_##name; } while (0)
++#define au_fclr_wrdir(flags, name) \
++	do { (flags) &= ~AuWrDir_##name; } while (0)
++
++struct au_wr_dir_args {
++	aufs_bindex_t force_btgt;
++	unsigned char flags;
++};
++int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
++	      struct au_wr_dir_args *args);
++
++struct dentry *au_pinned_h_parent(struct au_pin *pin);
++void au_pin_init(struct au_pin *pin, struct dentry *dentry,
++		 aufs_bindex_t bindex, int lsc_di, int lsc_hi,
++		 unsigned int udba, unsigned char flags);
++int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
++	   unsigned int udba, unsigned char flags) __must_check;
++int au_do_pin(struct au_pin *pin) __must_check;
++void au_unpin(struct au_pin *pin);
++
++/* i_op_add.c */
++int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
++	       struct dentry *h_parent, int isdir);
++int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
++	       dev_t dev);
++int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
++int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
++		struct nameidata *nd);
++int aufs_link(struct dentry *src_dentry, struct inode *dir,
++	      struct dentry *dentry);
++int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
++
++/* i_op_del.c */
++int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
++int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
++	       struct dentry *h_parent, int isdir);
++int aufs_unlink(struct inode *dir, struct dentry *dentry);
++int aufs_rmdir(struct inode *dir, struct dentry *dentry);
++
++/* i_op_ren.c */
++int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
++int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
++		struct inode *dir, struct dentry *dentry);
++
++/* iinfo.c */
++struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
++void au_hiput(struct au_hinode *hinode);
++void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
++		  struct dentry *h_wh);
++unsigned int au_hi_flags(struct inode *inode, int isdir);
++
++/* hinode flags */
++#define AuHi_XINO	1
++#define AuHi_HNOTIFY	(1 << 1)
++#define au_ftest_hi(flags, name)	((flags) & AuHi_##name)
++#define au_fset_hi(flags, name) \
++	do { (flags) |= AuHi_##name; } while (0)
++#define au_fclr_hi(flags, name) \
++	do { (flags) &= ~AuHi_##name; } while (0)
++
++#ifndef CONFIG_AUFS_HNOTIFY
++#undef AuHi_HNOTIFY
++#define AuHi_HNOTIFY	0
++#endif
++
++void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
++		   struct inode *h_inode, unsigned int flags);
++
++void au_update_iigen(struct inode *inode);
++void au_update_ibrange(struct inode *inode, int do_put_zero);
++
++void au_icntnr_init_once(void *_c);
++int au_iinfo_init(struct inode *inode);
++void au_iinfo_fin(struct inode *inode);
++int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
++
++#ifdef CONFIG_PROC_FS
++/* plink.c */
++int au_plink_maint(struct super_block *sb, int flags);
++void au_plink_maint_leave(struct au_sbinfo *sbinfo);
++int au_plink_maint_enter(struct super_block *sb);
++#ifdef CONFIG_AUFS_DEBUG
++void au_plink_list(struct super_block *sb);
++#else
++AuStubVoid(au_plink_list, struct super_block *sb)
++#endif
++int au_plink_test(struct inode *inode);
++struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
++void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
++		     struct dentry *h_dentry);
++void au_plink_put(struct super_block *sb, int verbose);
++void au_plink_clean(struct super_block *sb, int verbose);
++void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
++#else
++AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
++AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
++AuStubInt0(au_plink_maint_enter, struct super_block *sb);
++AuStubVoid(au_plink_list, struct super_block *sb);
++AuStubInt0(au_plink_test, struct inode *inode);
++AuStub(struct dentry *, au_plink_lkup, return NULL,
++       struct inode *inode, aufs_bindex_t bindex);
++AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
++	   struct dentry *h_dentry);
++AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
++AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
++AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
++#endif /* CONFIG_PROC_FS */
++
++/* ---------------------------------------------------------------------- */
++
++/* lock subclass for iinfo */
++enum {
++	AuLsc_II_CHILD,		/* child first */
++	AuLsc_II_CHILD2,	/* rename(2), link(2), and cpup at hnotify */
++	AuLsc_II_CHILD3,	/* copyup dirs */
++	AuLsc_II_PARENT,	/* see AuLsc_I_PARENT in vfsub.h */
++	AuLsc_II_PARENT2,
++	AuLsc_II_PARENT3,	/* copyup dirs */
++	AuLsc_II_NEW_CHILD
++};
++
++/*
++ * ii_read_lock_child, ii_write_lock_child,
++ * ii_read_lock_child2, ii_write_lock_child2,
++ * ii_read_lock_child3, ii_write_lock_child3,
++ * ii_read_lock_parent, ii_write_lock_parent,
++ * ii_read_lock_parent2, ii_write_lock_parent2,
++ * ii_read_lock_parent3, ii_write_lock_parent3,
++ * ii_read_lock_new_child, ii_write_lock_new_child,
++ */
++#define AuReadLockFunc(name, lsc) \
++static inline void ii_read_lock_##name(struct inode *i) \
++{ \
++	au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
++}
++
++#define AuWriteLockFunc(name, lsc) \
++static inline void ii_write_lock_##name(struct inode *i) \
++{ \
++	au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
++}
++
++#define AuRWLockFuncs(name, lsc) \
++	AuReadLockFunc(name, lsc) \
++	AuWriteLockFunc(name, lsc)
++
++AuRWLockFuncs(child, CHILD);
++AuRWLockFuncs(child2, CHILD2);
++AuRWLockFuncs(child3, CHILD3);
++AuRWLockFuncs(parent, PARENT);
++AuRWLockFuncs(parent2, PARENT2);
++AuRWLockFuncs(parent3, PARENT3);
++AuRWLockFuncs(new_child, NEW_CHILD);
++
++#undef AuReadLockFunc
++#undef AuWriteLockFunc
++#undef AuRWLockFuncs
++
++/*
++ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
++ */
++AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
++
++#define IiMustNoWaiters(i)	AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
++#define IiMustAnyLock(i)	AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
++#define IiMustWriteLock(i)	AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
++
++/* ---------------------------------------------------------------------- */
++
++static inline void au_icntnr_init(struct au_icntnr *c)
++{
++#ifdef CONFIG_AUFS_DEBUG
++	c->vfs_inode.i_mode = 0;
++#endif
++}
++
++static inline unsigned int au_iigen(struct inode *inode)
++{
++	return atomic_read(&au_ii(inode)->ii_generation);
++}
++
++/* tiny test for inode number */
++/* tmpfs generation is too rough */
++static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
++{
++	struct au_iinfo *iinfo;
++
++	iinfo = au_ii(inode);
++	AuRwMustAnyLock(&iinfo->ii_rwsem);
++	return !(iinfo->ii_hsb1 == h_inode->i_sb
++		 && iinfo->ii_higen == h_inode->i_generation);
++}
++
++static inline void au_iigen_dec(struct inode *inode)
++{
++	atomic_dec(&au_ii(inode)->ii_generation);
++}
++
++static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
++{
++	int err;
++
++	err = 0;
++	if (unlikely(inode && au_iigen(inode) != sigen))
++		err = -EIO;
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
++					aufs_bindex_t bindex)
++{
++	IiMustAnyLock(inode);
++	return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
++}
++
++static inline aufs_bindex_t au_ibstart(struct inode *inode)
++{
++	IiMustAnyLock(inode);
++	return au_ii(inode)->ii_bstart;
++}
++
++static inline aufs_bindex_t au_ibend(struct inode *inode)
++{
++	IiMustAnyLock(inode);
++	return au_ii(inode)->ii_bend;
++}
++
++static inline struct au_vdir *au_ivdir(struct inode *inode)
++{
++	IiMustAnyLock(inode);
++	return au_ii(inode)->ii_vdir;
++}
++
++static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
++{
++	IiMustAnyLock(inode);
++	return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
++}
++
++static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
++{
++	IiMustWriteLock(inode);
++	au_ii(inode)->ii_bstart = bindex;
++}
++
++static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
++{
++	IiMustWriteLock(inode);
++	au_ii(inode)->ii_bend = bindex;
++}
++
++static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
++{
++	IiMustWriteLock(inode);
++	au_ii(inode)->ii_vdir = vdir;
++}
++
++static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
++{
++	IiMustAnyLock(inode);
++	return au_ii(inode)->ii_hinode + bindex;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static inline struct dentry *au_pinned_parent(struct au_pin *pin)
++{
++	if (pin)
++		return pin->parent;
++	return NULL;
++}
++
++static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
++{
++	if (pin && pin->hdir)
++		return pin->hdir->hi_inode;
++	return NULL;
++}
++
++static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
++{
++	if (pin)
++		return pin->hdir;
++	return NULL;
++}
++
++static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
++{
++	if (pin)
++		pin->dentry = dentry;
++}
++
++static inline void au_pin_set_parent_lflag(struct au_pin *pin,
++					   unsigned char lflag)
++{
++	if (pin) {
++		if (lflag)
++			au_fset_pin(pin->flags, DI_LOCKED);
++		else
++			au_fclr_pin(pin->flags, DI_LOCKED);
++	}
++}
++
++static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
++{
++	if (pin) {
++		dput(pin->parent);
++		pin->parent = dget(parent);
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct au_branch;
++#ifdef CONFIG_AUFS_HNOTIFY
++struct au_hnotify_op {
++	void (*ctl)(struct au_hinode *hinode, int do_set);
++	int (*alloc)(struct au_hinode *hinode);
++
++	/*
++	 * if it returns true, the the caller should free hinode->hi_notify,
++	 * otherwise ->free() frees it.
++	 */
++	int (*free)(struct au_hinode *hinode,
++		    struct au_hnotify *hn) __must_check;
++
++	void (*fin)(void);
++	int (*init)(void);
++
++	int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
++	void (*fin_br)(struct au_branch *br);
++	int (*init_br)(struct au_branch *br, int perm);
++};
++
++/* hnotify.c */
++int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
++void au_hn_free(struct au_hinode *hinode);
++void au_hn_ctl(struct au_hinode *hinode, int do_set);
++void au_hn_reset(struct inode *inode, unsigned int flags);
++int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
++	       struct qstr *h_child_qstr, struct inode *h_child_inode);
++int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
++int au_hnotify_init_br(struct au_branch *br, int perm);
++void au_hnotify_fin_br(struct au_branch *br);
++int __init au_hnotify_init(void);
++void au_hnotify_fin(void);
++
++/* hfsnotify.c */
++extern const struct au_hnotify_op au_hnotify_op;
++
++static inline
++void au_hn_init(struct au_hinode *hinode)
++{
++	hinode->hi_notify = NULL;
++}
++
++static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
++{
++	return hinode->hi_notify;
++}
++
++#else
++static inline
++int au_hn_alloc(struct au_hinode *hinode __maybe_unused,
++		struct inode *inode __maybe_unused)
++{
++	return -EOPNOTSUPP;
++}
++
++static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
++{
++	return NULL;
++}
++
++AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
++AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
++	   int do_set __maybe_unused)
++AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
++	   unsigned int flags __maybe_unused)
++AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
++	   struct au_branch *br __maybe_unused,
++	   int perm __maybe_unused)
++AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
++	   int perm __maybe_unused)
++AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
++AuStubInt0(__init au_hnotify_init, void)
++AuStubVoid(au_hnotify_fin, void)
++AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
++#endif /* CONFIG_AUFS_HNOTIFY */
++
++static inline void au_hn_suspend(struct au_hinode *hdir)
++{
++	au_hn_ctl(hdir, /*do_set*/0);
++}
++
++static inline void au_hn_resume(struct au_hinode *hdir)
++{
++	au_hn_ctl(hdir, /*do_set*/1);
++}
++
++static inline void au_hn_imtx_lock(struct au_hinode *hdir)
++{
++	mutex_lock(&hdir->hi_inode->i_mutex);
++	au_hn_suspend(hdir);
++}
++
++static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir,
++					  unsigned int sc __maybe_unused)
++{
++	mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
++	au_hn_suspend(hdir);
++}
++
++static inline void au_hn_imtx_unlock(struct au_hinode *hdir)
++{
++	au_hn_resume(hdir);
++	mutex_unlock(&hdir->hi_inode->i_mutex);
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_INODE_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/ioctl.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,196 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * ioctl
++ * plink-management and readdir in userspace.
++ * assist the pathconf(3) wrapper library.
++ */
++
++#include "aufs.h"
++
++static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
++{
++	int err, fd;
++	aufs_bindex_t wbi, bindex, bend;
++	struct file *h_file;
++	struct super_block *sb;
++	struct dentry *root;
++	struct au_branch *br;
++	struct aufs_wbr_fd wbrfd = {
++		.oflags	= au_dir_roflags,
++		.brid	= -1
++	};
++	const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
++		| O_NOATIME | O_CLOEXEC;
++
++	AuDebugOn(wbrfd.oflags & ~valid);
++
++	if (arg) {
++		err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
++		if (unlikely(err)) {
++			err = -EFAULT;
++			goto out;
++		}
++
++		err = -EINVAL;
++		AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
++		wbrfd.oflags |= au_dir_roflags;
++		AuDbg("0%o\n", wbrfd.oflags);
++		if (unlikely(wbrfd.oflags & ~valid))
++			goto out;
++	}
++
++	fd = get_unused_fd();
++	err = fd;
++	if (unlikely(fd < 0))
++		goto out;
++
++	h_file = ERR_PTR(-EINVAL);
++	wbi = 0;
++	br = NULL;
++	sb = path->dentry->d_sb;
++	root = sb->s_root;
++	aufs_read_lock(root, AuLock_IR);
++	bend = au_sbend(sb);
++	if (wbrfd.brid >= 0) {
++		wbi = au_br_index(sb, wbrfd.brid);
++		if (unlikely(wbi < 0 || wbi > bend))
++			goto out_unlock;
++	}
++
++	h_file = ERR_PTR(-ENOENT);
++	br = au_sbr(sb, wbi);
++	if (!au_br_writable(br->br_perm)) {
++		if (arg)
++			goto out_unlock;
++
++		bindex = wbi + 1;
++		wbi = -1;
++		for (; bindex <= bend; bindex++) {
++			br = au_sbr(sb, bindex);
++			if (au_br_writable(br->br_perm)) {
++				wbi = bindex;
++				br = au_sbr(sb, wbi);
++				break;
++			}
++		}
++	}
++	AuDbg("wbi %d\n", wbi);
++	if (wbi >= 0)
++		h_file = au_h_open(root, wbi, wbrfd.oflags, NULL);
++
++out_unlock:
++	aufs_read_unlock(root, AuLock_IR);
++	err = PTR_ERR(h_file);
++	if (IS_ERR(h_file))
++		goto out_fd;
++
++	atomic_dec(&br->br_count); /* cf. au_h_open() */
++	fd_install(fd, h_file);
++	err = fd;
++	goto out; /* success */
++
++out_fd:
++	put_unused_fd(fd);
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	long err;
++
++	switch (cmd) {
++	case AUFS_CTL_RDU:
++	case AUFS_CTL_RDU_INO:
++		err = au_rdu_ioctl(file, cmd, arg);
++		break;
++
++	case AUFS_CTL_WBR_FD:
++		err = au_wbr_fd(&file->f_path, (void __user *)arg);
++		break;
++
++	case AUFS_CTL_IBUSY:
++		err = au_ibusy_ioctl(file, arg);
++		break;
++
++	default:
++		/* do not call the lower */
++		AuDbg("0x%x\n", cmd);
++		err = -ENOTTY;
++	}
++
++	AuTraceErr(err);
++	return err;
++}
++
++long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	long err;
++
++	switch (cmd) {
++	case AUFS_CTL_WBR_FD:
++		err = au_wbr_fd(&file->f_path, (void __user *)arg);
++		break;
++
++	default:
++		/* do not call the lower */
++		AuDbg("0x%x\n", cmd);
++		err = -ENOTTY;
++	}
++
++	AuTraceErr(err);
++	return err;
++}
++
++#ifdef CONFIG_COMPAT
++long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
++			   unsigned long arg)
++{
++	long err;
++
++	switch (cmd) {
++	case AUFS_CTL_RDU:
++	case AUFS_CTL_RDU_INO:
++		err = au_rdu_compat_ioctl(file, cmd, arg);
++		break;
++
++	case AUFS_CTL_IBUSY:
++		err = au_ibusy_compat_ioctl(file, arg);
++		break;
++
++	default:
++		err = aufs_ioctl_dir(file, cmd, arg);
++	}
++
++	AuTraceErr(err);
++	return err;
++}
++
++#if 0 /* unused yet */
++long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
++			      unsigned long arg)
++{
++	return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
++}
++#endif
++#endif
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/i_op_add.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,712 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * inode operations (add entry)
++ */
++
++#include "aufs.h"
++
++/*
++ * final procedure of adding a new entry, except link(2).
++ * remove whiteout, instantiate, copyup the parent dir's times and size
++ * and update version.
++ * if it failed, re-create the removed whiteout.
++ */
++static int epilog(struct inode *dir, aufs_bindex_t bindex,
++		  struct dentry *wh_dentry, struct dentry *dentry)
++{
++	int err, rerr;
++	aufs_bindex_t bwh;
++	struct path h_path;
++	struct inode *inode, *h_dir;
++	struct dentry *wh;
++
++	bwh = -1;
++	if (wh_dentry) {
++		h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
++		IMustLock(h_dir);
++		AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
++		bwh = au_dbwh(dentry);
++		h_path.dentry = wh_dentry;
++		h_path.mnt = au_sbr_mnt(dir->i_sb, bindex);
++		err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
++					  dentry);
++		if (unlikely(err))
++			goto out;
++	}
++
++	inode = au_new_inode(dentry, /*must_new*/1);
++	if (!IS_ERR(inode)) {
++		d_instantiate(dentry, inode);
++		dir = dentry->d_parent->d_inode; /* dir inode is locked */
++		IMustLock(dir);
++		if (au_ibstart(dir) == au_dbstart(dentry))
++			au_cpup_attr_timesizes(dir);
++		dir->i_version++;
++		return 0; /* success */
++	}
++
++	err = PTR_ERR(inode);
++	if (!wh_dentry)
++		goto out;
++
++	/* revert */
++	/* dir inode is locked */
++	wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
++	rerr = PTR_ERR(wh);
++	if (IS_ERR(wh)) {
++		AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
++			AuDLNPair(dentry), err, rerr);
++		err = -EIO;
++	} else
++		dput(wh);
++
++out:
++	return err;
++}
++
++static int au_d_may_add(struct dentry *dentry)
++{
++	int err;
++
++	err = 0;
++	if (unlikely(d_unhashed(dentry)))
++		err = -ENOENT;
++	if (unlikely(dentry->d_inode))
++		err = -EEXIST;
++	return err;
++}
++
++/*
++ * simple tests for the adding inode operations.
++ * following the checks in vfs, plus the parent-child relationship.
++ */
++int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
++	       struct dentry *h_parent, int isdir)
++{
++	int err;
++	umode_t h_mode;
++	struct dentry *h_dentry;
++	struct inode *h_inode;
++
++	err = -ENAMETOOLONG;
++	if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
++		goto out;
++
++	h_dentry = au_h_dptr(dentry, bindex);
++	h_inode = h_dentry->d_inode;
++	if (!dentry->d_inode) {
++		err = -EEXIST;
++		if (unlikely(h_inode))
++			goto out;
++	} else {
++		/* rename(2) case */
++		err = -EIO;
++		if (unlikely(!h_inode || !h_inode->i_nlink))
++			goto out;
++
++		h_mode = h_inode->i_mode;
++		if (!isdir) {
++			err = -EISDIR;
++			if (unlikely(S_ISDIR(h_mode)))
++				goto out;
++		} else if (unlikely(!S_ISDIR(h_mode))) {
++			err = -ENOTDIR;
++			goto out;
++		}
++	}
++
++	err = 0;
++	/* expected parent dir is locked */
++	if (unlikely(h_parent != h_dentry->d_parent))
++		err = -EIO;
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++/*
++ * initial procedure of adding a new entry.
++ * prepare writable branch and the parent dir, lock it,
++ * and lookup whiteout for the new entry.
++ */
++static struct dentry*
++lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
++		  struct dentry *src_dentry, struct au_pin *pin,
++		  struct au_wr_dir_args *wr_dir_args)
++{
++	struct dentry *wh_dentry, *h_parent;
++	struct super_block *sb;
++	struct au_branch *br;
++	int err;
++	unsigned int udba;
++	aufs_bindex_t bcpup;
++
++	AuDbg("%.*s\n", AuDLNPair(dentry));
++
++	err = au_wr_dir(dentry, src_dentry, wr_dir_args);
++	bcpup = err;
++	wh_dentry = ERR_PTR(err);
++	if (unlikely(err < 0))
++		goto out;
++
++	sb = dentry->d_sb;
++	udba = au_opt_udba(sb);
++	err = au_pin(pin, dentry, bcpup, udba,
++		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++	wh_dentry = ERR_PTR(err);
++	if (unlikely(err))
++		goto out;
++
++	h_parent = au_pinned_h_parent(pin);
++	if (udba != AuOpt_UDBA_NONE
++	    && au_dbstart(dentry) == bcpup)
++		err = au_may_add(dentry, bcpup, h_parent,
++				 au_ftest_wrdir(wr_dir_args->flags, ISDIR));
++	else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
++		err = -ENAMETOOLONG;
++	wh_dentry = ERR_PTR(err);
++	if (unlikely(err))
++		goto out_unpin;
++
++	br = au_sbr(sb, bcpup);
++	if (dt) {
++		struct path tmp = {
++			.dentry	= h_parent,
++			.mnt	= br->br_mnt
++		};
++		au_dtime_store(dt, au_pinned_parent(pin), &tmp);
++	}
++
++	wh_dentry = NULL;
++	if (bcpup != au_dbwh(dentry))
++		goto out; /* success */
++
++	wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
++
++out_unpin:
++	if (IS_ERR(wh_dentry))
++		au_unpin(pin);
++out:
++	return wh_dentry;
++}
++
++/* ---------------------------------------------------------------------- */
++
++enum { Mknod, Symlink, Creat };
++struct simple_arg {
++	int type;
++	union {
++		struct {
++			umode_t mode;
++			struct nameidata *nd;
++		} c;
++		struct {
++			const char *symname;
++		} s;
++		struct {
++			umode_t mode;
++			dev_t dev;
++		} m;
++	} u;
++};
++
++static int add_simple(struct inode *dir, struct dentry *dentry,
++		      struct simple_arg *arg)
++{
++	int err;
++	aufs_bindex_t bstart;
++	unsigned char created;
++	struct au_dtime dt;
++	struct au_pin pin;
++	struct path h_path;
++	struct dentry *wh_dentry, *parent;
++	struct inode *h_dir;
++	struct au_wr_dir_args wr_dir_args = {
++		.force_btgt	= -1,
++		.flags		= AuWrDir_ADD_ENTRY
++	};
++
++	AuDbg("%.*s\n", AuDLNPair(dentry));
++	IMustLock(dir);
++
++	parent = dentry->d_parent; /* dir inode is locked */
++	err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
++	if (unlikely(err))
++		goto out;
++	err = au_d_may_add(dentry);
++	if (unlikely(err))
++		goto out_unlock;
++	di_write_lock_parent(parent);
++	wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin,
++				      &wr_dir_args);
++	err = PTR_ERR(wh_dentry);
++	if (IS_ERR(wh_dentry))
++		goto out_parent;
++
++	bstart = au_dbstart(dentry);
++	h_path.dentry = au_h_dptr(dentry, bstart);
++	h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
++	h_dir = au_pinned_h_dir(&pin);
++	switch (arg->type) {
++	case Creat:
++		err = vfsub_create(h_dir, &h_path, arg->u.c.mode);
++		break;
++	case Symlink:
++		err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname);
++		break;
++	case Mknod:
++		err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev);
++		break;
++	default:
++		BUG();
++	}
++	created = !err;
++	if (!err)
++		err = epilog(dir, bstart, wh_dentry, dentry);
++
++	/* revert */
++	if (unlikely(created && err && h_path.dentry->d_inode)) {
++		int rerr;
++		rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
++		if (rerr) {
++			AuIOErr("%.*s revert failure(%d, %d)\n",
++				AuDLNPair(dentry), err, rerr);
++			err = -EIO;
++		}
++		au_dtime_revert(&dt);
++	}
++
++	au_unpin(&pin);
++	dput(wh_dentry);
++
++out_parent:
++	di_write_unlock(parent);
++out_unlock:
++	if (unlikely(err)) {
++		au_update_dbstart(dentry);
++		d_drop(dentry);
++	}
++	aufs_read_unlock(dentry, AuLock_DW);
++out:
++	return err;
++}
++
++int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
++	       dev_t dev)
++{
++	struct simple_arg arg = {
++		.type = Mknod,
++		.u.m = {
++			.mode	= mode,
++			.dev	= dev
++		}
++	};
++	return add_simple(dir, dentry, &arg);
++}
++
++int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
++{
++	struct simple_arg arg = {
++		.type = Symlink,
++		.u.s.symname = symname
++	};
++	return add_simple(dir, dentry, &arg);
++}
++
++int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
++		struct nameidata *nd)
++{
++	struct simple_arg arg = {
++		.type = Creat,
++		.u.c = {
++			.mode	= mode,
++			.nd	= nd
++		}
++	};
++	return add_simple(dir, dentry, &arg);
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct au_link_args {
++	aufs_bindex_t bdst, bsrc;
++	struct au_pin pin;
++	struct path h_path;
++	struct dentry *src_parent, *parent;
++};
++
++static int au_cpup_before_link(struct dentry *src_dentry,
++			       struct au_link_args *a)
++{
++	int err;
++	struct dentry *h_src_dentry;
++	struct mutex *h_mtx;
++	struct file *h_file;
++
++	di_read_lock_parent(a->src_parent, AuLock_IR);
++	err = au_test_and_cpup_dirs(src_dentry, a->bdst);
++	if (unlikely(err))
++		goto out;
++
++	h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
++	h_mtx = &h_src_dentry->d_inode->i_mutex;
++	err = au_pin(&a->pin, src_dentry, a->bdst,
++		     au_opt_udba(src_dentry->d_sb),
++		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++	if (unlikely(err))
++		goto out;
++	mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++	h_file = au_h_open_pre(src_dentry, a->bsrc);
++	if (IS_ERR(h_file)) {
++		err = PTR_ERR(h_file);
++		h_file = NULL;
++	} else
++		err = au_sio_cpup_simple(src_dentry, a->bdst, -1,
++					 AuCpup_DTIME /* | AuCpup_KEEPLINO */);
++	mutex_unlock(h_mtx);
++	au_h_open_post(src_dentry, a->bsrc, h_file);
++	au_unpin(&a->pin);
++
++out:
++	di_read_unlock(a->src_parent, AuLock_IR);
++	return err;
++}
++
++static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a)
++{
++	int err;
++	unsigned char plink;
++	struct inode *h_inode, *inode;
++	struct dentry *h_src_dentry;
++	struct super_block *sb;
++	struct file *h_file;
++
++	plink = 0;
++	h_inode = NULL;
++	sb = src_dentry->d_sb;
++	inode = src_dentry->d_inode;
++	if (au_ibstart(inode) <= a->bdst)
++		h_inode = au_h_iptr(inode, a->bdst);
++	if (!h_inode || !h_inode->i_nlink) {
++		/* copyup src_dentry as the name of dentry. */
++		au_set_dbstart(src_dentry, a->bdst);
++		au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry));
++		h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode;
++		mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++		h_file = au_h_open_pre(src_dentry, a->bsrc);
++		if (IS_ERR(h_file)) {
++			err = PTR_ERR(h_file);
++			h_file = NULL;
++		} else
++			err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc,
++						 -1, AuCpup_KEEPLINO,
++						 a->parent);
++		mutex_unlock(&h_inode->i_mutex);
++		au_h_open_post(src_dentry, a->bsrc, h_file);
++		au_set_h_dptr(src_dentry, a->bdst, NULL);
++		au_set_dbstart(src_dentry, a->bsrc);
++	} else {
++		/* the inode of src_dentry already exists on a.bdst branch */
++		h_src_dentry = d_find_alias(h_inode);
++		if (!h_src_dentry && au_plink_test(inode)) {
++			plink = 1;
++			h_src_dentry = au_plink_lkup(inode, a->bdst);
++			err = PTR_ERR(h_src_dentry);
++			if (IS_ERR(h_src_dentry))
++				goto out;
++
++			if (unlikely(!h_src_dentry->d_inode)) {
++				dput(h_src_dentry);
++				h_src_dentry = NULL;
++			}
++
++		}
++		if (h_src_dentry) {
++			err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
++					 &a->h_path);
++			dput(h_src_dentry);
++		} else {
++			AuIOErr("no dentry found for hi%lu on b%d\n",
++				h_inode->i_ino, a->bdst);
++			err = -EIO;
++		}
++	}
++
++	if (!err && !plink)
++		au_plink_append(inode, a->bdst, a->h_path.dentry);
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++int aufs_link(struct dentry *src_dentry, struct inode *dir,
++	      struct dentry *dentry)
++{
++	int err, rerr;
++	struct au_dtime dt;
++	struct au_link_args *a;
++	struct dentry *wh_dentry, *h_src_dentry;
++	struct inode *inode;
++	struct super_block *sb;
++	struct au_wr_dir_args wr_dir_args = {
++		/* .force_btgt	= -1, */
++		.flags		= AuWrDir_ADD_ENTRY
++	};
++
++	IMustLock(dir);
++	inode = src_dentry->d_inode;
++	IMustLock(inode);
++
++	err = -ENOMEM;
++	a = kzalloc(sizeof(*a), GFP_NOFS);
++	if (unlikely(!a))
++		goto out;
++
++	a->parent = dentry->d_parent; /* dir inode is locked */
++	err = aufs_read_and_write_lock2(dentry, src_dentry,
++					AuLock_NOPLM | AuLock_GEN);
++	if (unlikely(err))
++		goto out_kfree;
++	err = au_d_hashed_positive(src_dentry);
++	if (unlikely(err))
++		goto out_unlock;
++	err = au_d_may_add(dentry);
++	if (unlikely(err))
++		goto out_unlock;
++
++	a->src_parent = dget_parent(src_dentry);
++	wr_dir_args.force_btgt = au_ibstart(inode);
++
++	di_write_lock_parent(a->parent);
++	wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
++	wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
++				      &wr_dir_args);
++	err = PTR_ERR(wh_dentry);
++	if (IS_ERR(wh_dentry))
++		goto out_parent;
++
++	err = 0;
++	sb = dentry->d_sb;
++	a->bdst = au_dbstart(dentry);
++	a->h_path.dentry = au_h_dptr(dentry, a->bdst);
++	a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
++	a->bsrc = au_ibstart(inode);
++	h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
++	if (!h_src_dentry) {
++		a->bsrc = au_dbstart(src_dentry);
++		h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
++		AuDebugOn(!h_src_dentry);
++	} else if (IS_ERR(h_src_dentry))
++		goto out_parent;
++
++	if (au_opt_test(au_mntflags(sb), PLINK)) {
++		if (a->bdst < a->bsrc
++		    /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
++			err = au_cpup_or_link(src_dentry, a);
++		else
++			err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
++					 &a->h_path);
++		dput(h_src_dentry);
++	} else {
++		/*
++		 * copyup src_dentry to the branch we process,
++		 * and then link(2) to it.
++		 */
++		dput(h_src_dentry);
++		if (a->bdst < a->bsrc
++		    /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
++			au_unpin(&a->pin);
++			di_write_unlock(a->parent);
++			err = au_cpup_before_link(src_dentry, a);
++			di_write_lock_parent(a->parent);
++			if (!err)
++				err = au_pin(&a->pin, dentry, a->bdst,
++					     au_opt_udba(sb),
++					     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++			if (unlikely(err))
++				goto out_wh;
++		}
++		if (!err) {
++			h_src_dentry = au_h_dptr(src_dentry, a->bdst);
++			err = -ENOENT;
++			if (h_src_dentry && h_src_dentry->d_inode)
++				err = vfsub_link(h_src_dentry,
++						 au_pinned_h_dir(&a->pin),
++						 &a->h_path);
++		}
++	}
++	if (unlikely(err))
++		goto out_unpin;
++
++	if (wh_dentry) {
++		a->h_path.dentry = wh_dentry;
++		err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
++					  dentry);
++		if (unlikely(err))
++			goto out_revert;
++	}
++
++	dir->i_version++;
++	if (au_ibstart(dir) == au_dbstart(dentry))
++		au_cpup_attr_timesizes(dir);
++	inc_nlink(inode);
++	inode->i_ctime = dir->i_ctime;
++	d_instantiate(dentry, au_igrab(inode));
++	if (d_unhashed(a->h_path.dentry))
++		/* some filesystem calls d_drop() */
++		d_drop(dentry);
++	goto out_unpin; /* success */
++
++out_revert:
++	rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0);
++	if (unlikely(rerr)) {
++		AuIOErr("%.*s reverting failed(%d, %d)\n",
++			AuDLNPair(dentry), err, rerr);
++		err = -EIO;
++	}
++	au_dtime_revert(&dt);
++out_unpin:
++	au_unpin(&a->pin);
++out_wh:
++	dput(wh_dentry);
++out_parent:
++	di_write_unlock(a->parent);
++	dput(a->src_parent);
++out_unlock:
++	if (unlikely(err)) {
++		au_update_dbstart(dentry);
++		d_drop(dentry);
++	}
++	aufs_read_and_write_unlock2(dentry, src_dentry);
++out_kfree:
++	kfree(a);
++out:
++	return err;
++}
++
++int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
++{
++	int err, rerr;
++	aufs_bindex_t bindex;
++	unsigned char diropq;
++	struct path h_path;
++	struct dentry *wh_dentry, *parent, *opq_dentry;
++	struct mutex *h_mtx;
++	struct super_block *sb;
++	struct {
++		struct au_pin pin;
++		struct au_dtime dt;
++	} *a; /* reduce the stack usage */
++	struct au_wr_dir_args wr_dir_args = {
++		.force_btgt	= -1,
++		.flags		= AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
++	};
++
++	IMustLock(dir);
++
++	err = -ENOMEM;
++	a = kmalloc(sizeof(*a), GFP_NOFS);
++	if (unlikely(!a))
++		goto out;
++
++	err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
++	if (unlikely(err))
++		goto out_free;
++	err = au_d_may_add(dentry);
++	if (unlikely(err))
++		goto out_unlock;
++
++	parent = dentry->d_parent; /* dir inode is locked */
++	di_write_lock_parent(parent);
++	wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
++				      &a->pin, &wr_dir_args);
++	err = PTR_ERR(wh_dentry);
++	if (IS_ERR(wh_dentry))
++		goto out_parent;
++
++	sb = dentry->d_sb;
++	bindex = au_dbstart(dentry);
++	h_path.dentry = au_h_dptr(dentry, bindex);
++	h_path.mnt = au_sbr_mnt(sb, bindex);
++	err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
++	if (unlikely(err))
++		goto out_unpin;
++
++	/* make the dir opaque */
++	diropq = 0;
++	h_mtx = &h_path.dentry->d_inode->i_mutex;
++	if (wh_dentry
++	    || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
++		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++		opq_dentry = au_diropq_create(dentry, bindex);
++		mutex_unlock(h_mtx);
++		err = PTR_ERR(opq_dentry);
++		if (IS_ERR(opq_dentry))
++			goto out_dir;
++		dput(opq_dentry);
++		diropq = 1;
++	}
++
++	err = epilog(dir, bindex, wh_dentry, dentry);
++	if (!err) {
++		inc_nlink(dir);
++		goto out_unpin; /* success */
++	}
++
++	/* revert */
++	if (diropq) {
++		AuLabel(revert opq);
++		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++		rerr = au_diropq_remove(dentry, bindex);
++		mutex_unlock(h_mtx);
++		if (rerr) {
++			AuIOErr("%.*s reverting diropq failed(%d, %d)\n",
++				AuDLNPair(dentry), err, rerr);
++			err = -EIO;
++		}
++	}
++
++out_dir:
++	AuLabel(revert dir);
++	rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
++	if (rerr) {
++		AuIOErr("%.*s reverting dir failed(%d, %d)\n",
++			AuDLNPair(dentry), err, rerr);
++		err = -EIO;
++	}
++	au_dtime_revert(&a->dt);
++out_unpin:
++	au_unpin(&a->pin);
++	dput(wh_dentry);
++out_parent:
++	di_write_unlock(parent);
++out_unlock:
++	if (unlikely(err)) {
++		au_update_dbstart(dentry);
++		d_drop(dentry);
++	}
++	aufs_read_unlock(dentry, AuLock_DW);
++out_free:
++	kfree(a);
++out:
++	return err;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/i_op.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,992 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * inode operations (except add/del/rename)
++ */
++
++#include <linux/device_cgroup.h>
++#include <linux/fs_stack.h>
++#include <linux/namei.h>
++#include <linux/security.h>
++#include "aufs.h"
++
++static int h_permission(struct inode *h_inode, int mask,
++			struct vfsmount *h_mnt, int brperm)
++{
++	int err;
++	const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
++
++	err = -EACCES;
++	if ((write_mask && IS_IMMUTABLE(h_inode))
++	    || ((mask & MAY_EXEC)
++		&& S_ISREG(h_inode->i_mode)
++		&& ((h_mnt->mnt_flags & MNT_NOEXEC)
++		    || !(h_inode->i_mode & S_IXUGO))))
++		goto out;
++
++	/*
++	 * - skip the lower fs test in the case of write to ro branch.
++	 * - nfs dir permission write check is optimized, but a policy for
++	 *   link/rename requires a real check.
++	 */
++	if ((write_mask && !au_br_writable(brperm))
++	    || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
++		&& write_mask && !(mask & MAY_READ))
++	    || !h_inode->i_op->permission) {
++		/* AuLabel(generic_permission); */
++		err = generic_permission(h_inode, mask);
++	} else {
++		/* AuLabel(h_inode->permission); */
++		err = h_inode->i_op->permission(h_inode, mask);
++		AuTraceErr(err);
++	}
++
++	if (!err)
++		err = devcgroup_inode_permission(h_inode, mask);
++	if (!err)
++		err = security_inode_permission(h_inode, mask);
++
++#if 0
++	if (!err) {
++		/* todo: do we need to call ima_path_check()? */
++		struct path h_path = {
++			.dentry	=
++			.mnt	= h_mnt
++		};
++		err = ima_path_check(&h_path,
++				     mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
++				     IMA_COUNT_LEAVE);
++	}
++#endif
++
++out:
++	return err;
++}
++
++static int aufs_permission(struct inode *inode, int mask)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	const unsigned char isdir = !!S_ISDIR(inode->i_mode),
++		write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
++	struct inode *h_inode;
++	struct super_block *sb;
++	struct au_branch *br;
++
++	/* todo: support rcu-walk? */
++	if (mask & MAY_NOT_BLOCK)
++		return -ECHILD;
++
++	sb = inode->i_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	ii_read_lock_child(inode);
++#if 0
++	err = au_iigen_test(inode, au_sigen(sb));
++	if (unlikely(err))
++		goto out;
++#endif
++
++	if (!isdir || write_mask) {
++		err = au_busy_or_stale();
++		h_inode = au_h_iptr(inode, au_ibstart(inode));
++		if (unlikely(!h_inode
++			     || (h_inode->i_mode & S_IFMT)
++			     != (inode->i_mode & S_IFMT)))
++			goto out;
++
++		err = 0;
++		bindex = au_ibstart(inode);
++		br = au_sbr(sb, bindex);
++		err = h_permission(h_inode, mask, br->br_mnt, br->br_perm);
++		if (write_mask
++		    && !err
++		    && !special_file(h_inode->i_mode)) {
++			/* test whether the upper writable branch exists */
++			err = -EROFS;
++			for (; bindex >= 0; bindex--)
++				if (!au_br_rdonly(au_sbr(sb, bindex))) {
++					err = 0;
++					break;
++				}
++		}
++		goto out;
++	}
++
++	/* non-write to dir */
++	err = 0;
++	bend = au_ibend(inode);
++	for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
++		h_inode = au_h_iptr(inode, bindex);
++		if (h_inode) {
++			err = au_busy_or_stale();
++			if (unlikely(!S_ISDIR(h_inode->i_mode)))
++				break;
++
++			br = au_sbr(sb, bindex);
++			err = h_permission(h_inode, mask, br->br_mnt,
++					   br->br_perm);
++		}
++	}
++
++out:
++	ii_read_unlock(inode);
++	si_read_unlock(sb);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
++				  struct nameidata *nd)
++{
++	struct dentry *ret, *parent;
++	struct inode *inode;
++	struct super_block *sb;
++	int err, npositive, lc_idx;
++
++	IMustLock(dir);
++
++	sb = dir->i_sb;
++	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++	ret = ERR_PTR(err);
++	if (unlikely(err))
++		goto out;
++
++	ret = ERR_PTR(-ENAMETOOLONG);
++	if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
++		goto out_si;
++	err = au_di_init(dentry);
++	ret = ERR_PTR(err);
++	if (unlikely(err))
++		goto out_si;
++
++	inode = NULL;
++	npositive = 0; /* suppress a warning */
++	parent = dentry->d_parent; /* dir inode is locked */
++	di_read_lock_parent(parent, AuLock_IR);
++	err = au_alive_dir(parent);
++	if (!err)
++		err = au_digen_test(parent, au_sigen(sb));
++	if (!err) {
++		npositive = au_lkup_dentry(dentry, au_dbstart(parent),
++					   /*type*/0, nd);
++		err = npositive;
++	}
++	di_read_unlock(parent, AuLock_IR);
++	ret = ERR_PTR(err);
++	if (unlikely(err < 0))
++		goto out_unlock;
++
++	if (npositive) {
++		inode = au_new_inode(dentry, /*must_new*/0);
++		ret = (void *)inode;
++	}
++	if (IS_ERR(inode)) {
++		inode = NULL;
++		goto out_unlock;
++	}
++
++	ret = d_splice_alias(inode, dentry);
++	if (unlikely(IS_ERR(ret) && inode)) {
++		ii_write_unlock(inode);
++		lc_idx = AuLcNonDir_IIINFO;
++		if (S_ISLNK(inode->i_mode))
++			lc_idx = AuLcSymlink_IIINFO;
++		else if (S_ISDIR(inode->i_mode))
++			lc_idx = AuLcDir_IIINFO;
++		au_rw_class(&au_ii(inode)->ii_rwsem, au_lc_key + lc_idx);
++		iput(inode);
++	}
++
++out_unlock:
++	di_write_unlock(dentry);
++	if (unlikely(IS_ERR(ret) && inode)) {
++		lc_idx = AuLcNonDir_DIINFO;
++		if (S_ISLNK(inode->i_mode))
++			lc_idx = AuLcSymlink_DIINFO;
++		else if (S_ISDIR(inode->i_mode))
++			lc_idx = AuLcDir_DIINFO;
++		au_rw_class(&au_di(dentry)->di_rwsem, au_lc_key + lc_idx);
++	}
++out_si:
++	si_read_unlock(sb);
++out:
++	return ret;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
++			  const unsigned char add_entry, aufs_bindex_t bcpup,
++			  aufs_bindex_t bstart)
++{
++	int err;
++	struct dentry *h_parent;
++	struct inode *h_dir;
++
++	if (add_entry)
++		IMustLock(parent->d_inode);
++	else
++		di_write_lock_parent(parent);
++
++	err = 0;
++	if (!au_h_dptr(parent, bcpup)) {
++		if (bstart < bcpup)
++			err = au_cpdown_dirs(dentry, bcpup);
++		else
++			err = au_cpup_dirs(dentry, bcpup);
++	}
++	if (!err && add_entry) {
++		h_parent = au_h_dptr(parent, bcpup);
++		h_dir = h_parent->d_inode;
++		mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
++		err = au_lkup_neg(dentry, bcpup);
++		/* todo: no unlock here */
++		mutex_unlock(&h_dir->i_mutex);
++
++		AuDbg("bcpup %d\n", bcpup);
++		if (!err) {
++			if (!dentry->d_inode)
++				au_set_h_dptr(dentry, bstart, NULL);
++			au_update_dbrange(dentry, /*do_put_zero*/0);
++		}
++	}
++
++	if (!add_entry)
++		di_write_unlock(parent);
++	if (!err)
++		err = bcpup; /* success */
++
++	AuTraceErr(err);
++	return err;
++}
++
++/*
++ * decide the branch and the parent dir where we will create a new entry.
++ * returns new bindex or an error.
++ * copyup the parent dir if needed.
++ */
++int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
++	      struct au_wr_dir_args *args)
++{
++	int err;
++	aufs_bindex_t bcpup, bstart, src_bstart;
++	const unsigned char add_entry = !!au_ftest_wrdir(args->flags,
++							 ADD_ENTRY);
++	struct super_block *sb;
++	struct dentry *parent;
++	struct au_sbinfo *sbinfo;
++
++	sb = dentry->d_sb;
++	sbinfo = au_sbi(sb);
++	parent = dget_parent(dentry);
++	bstart = au_dbstart(dentry);
++	bcpup = bstart;
++	if (args->force_btgt < 0) {
++		if (src_dentry) {
++			src_bstart = au_dbstart(src_dentry);
++			if (src_bstart < bstart)
++				bcpup = src_bstart;
++		} else if (add_entry) {
++			err = AuWbrCreate(sbinfo, dentry,
++					  au_ftest_wrdir(args->flags, ISDIR));
++			bcpup = err;
++		}
++
++		if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) {
++			if (add_entry)
++				err = AuWbrCopyup(sbinfo, dentry);
++			else {
++				if (!IS_ROOT(dentry)) {
++					di_read_lock_parent(parent, !AuLock_IR);
++					err = AuWbrCopyup(sbinfo, dentry);
++					di_read_unlock(parent, !AuLock_IR);
++				} else
++					err = AuWbrCopyup(sbinfo, dentry);
++			}
++			bcpup = err;
++			if (unlikely(err < 0))
++				goto out;
++		}
++	} else {
++		bcpup = args->force_btgt;
++		AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode));
++	}
++
++	AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
++	err = bcpup;
++	if (bcpup == bstart)
++		goto out; /* success */
++
++	/* copyup the new parent into the branch we process */
++	err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
++	if (err >= 0) {
++		if (!dentry->d_inode) {
++			au_set_h_dptr(dentry, bstart, NULL);
++			au_set_dbstart(dentry, bcpup);
++			au_set_dbend(dentry, bcpup);
++		}
++		AuDebugOn(add_entry && !au_h_dptr(dentry, bcpup));
++	}
++
++out:
++	dput(parent);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct dentry *au_pinned_h_parent(struct au_pin *pin)
++{
++	if (pin && pin->parent)
++		return au_h_dptr(pin->parent, pin->bindex);
++	return NULL;
++}
++
++void au_unpin(struct au_pin *p)
++{
++	if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
++		mnt_drop_write(p->h_mnt);
++	if (!p->hdir)
++		return;
++
++	au_hn_imtx_unlock(p->hdir);
++	if (!au_ftest_pin(p->flags, DI_LOCKED))
++		di_read_unlock(p->parent, AuLock_IR);
++	iput(p->hdir->hi_inode);
++	dput(p->parent);
++	p->parent = NULL;
++	p->hdir = NULL;
++	p->h_mnt = NULL;
++}
++
++int au_do_pin(struct au_pin *p)
++{
++	int err;
++	struct super_block *sb;
++	struct dentry *h_dentry, *h_parent;
++	struct au_branch *br;
++	struct inode *h_dir;
++
++	err = 0;
++	sb = p->dentry->d_sb;
++	br = au_sbr(sb, p->bindex);
++	if (IS_ROOT(p->dentry)) {
++		if (au_ftest_pin(p->flags, MNT_WRITE)) {
++			p->h_mnt = br->br_mnt;
++			err = mnt_want_write(p->h_mnt);
++			if (unlikely(err)) {
++				au_fclr_pin(p->flags, MNT_WRITE);
++				goto out_err;
++			}
++		}
++		goto out;
++	}
++
++	h_dentry = NULL;
++	if (p->bindex <= au_dbend(p->dentry))
++		h_dentry = au_h_dptr(p->dentry, p->bindex);
++
++	p->parent = dget_parent(p->dentry);
++	if (!au_ftest_pin(p->flags, DI_LOCKED))
++		di_read_lock(p->parent, AuLock_IR, p->lsc_di);
++
++	h_dir = NULL;
++	h_parent = au_h_dptr(p->parent, p->bindex);
++	p->hdir = au_hi(p->parent->d_inode, p->bindex);
++	if (p->hdir)
++		h_dir = p->hdir->hi_inode;
++
++	/*
++	 * udba case, or
++	 * if DI_LOCKED is not set, then p->parent may be different
++	 * and h_parent can be NULL.
++	 */
++	if (unlikely(!p->hdir || !h_dir || !h_parent)) {
++		err = -EBUSY;
++		if (!au_ftest_pin(p->flags, DI_LOCKED))
++			di_read_unlock(p->parent, AuLock_IR);
++		dput(p->parent);
++		p->parent = NULL;
++		goto out_err;
++	}
++
++	au_igrab(h_dir);
++	au_hn_imtx_lock_nested(p->hdir, p->lsc_hi);
++
++	if (unlikely(p->hdir->hi_inode != h_parent->d_inode)) {
++		err = -EBUSY;
++		goto out_unpin;
++	}
++	if (h_dentry) {
++		err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br);
++		if (unlikely(err)) {
++			au_fclr_pin(p->flags, MNT_WRITE);
++			goto out_unpin;
++		}
++	}
++
++	if (au_ftest_pin(p->flags, MNT_WRITE)) {
++		p->h_mnt = br->br_mnt;
++		err = mnt_want_write(p->h_mnt);
++		if (unlikely(err)) {
++			au_fclr_pin(p->flags, MNT_WRITE);
++			goto out_unpin;
++		}
++	}
++	goto out; /* success */
++
++out_unpin:
++	au_unpin(p);
++out_err:
++	pr_err("err %d\n", err);
++	err = au_busy_or_stale();
++out:
++	return err;
++}
++
++void au_pin_init(struct au_pin *p, struct dentry *dentry,
++		 aufs_bindex_t bindex, int lsc_di, int lsc_hi,
++		 unsigned int udba, unsigned char flags)
++{
++	p->dentry = dentry;
++	p->udba = udba;
++	p->lsc_di = lsc_di;
++	p->lsc_hi = lsc_hi;
++	p->flags = flags;
++	p->bindex = bindex;
++
++	p->parent = NULL;
++	p->hdir = NULL;
++	p->h_mnt = NULL;
++}
++
++int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
++	   unsigned int udba, unsigned char flags)
++{
++	au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
++		    udba, flags);
++	return au_do_pin(pin);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * ->setattr() and ->getattr() are called in various cases.
++ * chmod, stat: dentry is revalidated.
++ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
++ *		  unhashed.
++ * for ->setattr(), ia->ia_file is passed from ftruncate only.
++ */
++/* todo: consolidate with do_refresh() and simple_reval_dpath() */
++static int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
++{
++	int err;
++	struct inode *inode;
++	struct dentry *parent;
++
++	err = 0;
++	inode = dentry->d_inode;
++	if (au_digen_test(dentry, sigen)) {
++		parent = dget_parent(dentry);
++		di_read_lock_parent(parent, AuLock_IR);
++		err = au_refresh_dentry(dentry, parent);
++		di_read_unlock(parent, AuLock_IR);
++		dput(parent);
++	}
++
++	AuTraceErr(err);
++	return err;
++}
++
++#define AuIcpup_DID_CPUP	1
++#define au_ftest_icpup(flags, name)	((flags) & AuIcpup_##name)
++#define au_fset_icpup(flags, name) \
++	do { (flags) |= AuIcpup_##name; } while (0)
++#define au_fclr_icpup(flags, name) \
++	do { (flags) &= ~AuIcpup_##name; } while (0)
++
++struct au_icpup_args {
++	unsigned char flags;
++	unsigned char pin_flags;
++	aufs_bindex_t btgt;
++	unsigned int udba;
++	struct au_pin pin;
++	struct path h_path;
++	struct inode *h_inode;
++};
++
++static int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
++			    struct au_icpup_args *a)
++{
++	int err;
++	loff_t sz;
++	aufs_bindex_t bstart, ibstart;
++	struct dentry *hi_wh, *parent;
++	struct inode *inode;
++	struct file *h_file;
++	struct au_wr_dir_args wr_dir_args = {
++		.force_btgt	= -1,
++		.flags		= 0
++	};
++
++	bstart = au_dbstart(dentry);
++	inode = dentry->d_inode;
++	if (S_ISDIR(inode->i_mode))
++		au_fset_wrdir(wr_dir_args.flags, ISDIR);
++	/* plink or hi_wh() case */
++	ibstart = au_ibstart(inode);
++	if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
++		wr_dir_args.force_btgt = ibstart;
++	err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
++	if (unlikely(err < 0))
++		goto out;
++	a->btgt = err;
++	if (err != bstart)
++		au_fset_icpup(a->flags, DID_CPUP);
++
++	err = 0;
++	a->pin_flags = AuPin_MNT_WRITE;
++	parent = NULL;
++	if (!IS_ROOT(dentry)) {
++		au_fset_pin(a->pin_flags, DI_LOCKED);
++		parent = dget_parent(dentry);
++		di_write_lock_parent(parent);
++	}
++
++	err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
++	if (unlikely(err))
++		goto out_parent;
++
++	a->h_path.dentry = au_h_dptr(dentry, bstart);
++	a->h_inode = a->h_path.dentry->d_inode;
++	mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
++	sz = -1;
++	if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode))
++		sz = ia->ia_size;
++
++	h_file = NULL;
++	hi_wh = NULL;
++	if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
++		hi_wh = au_hi_wh(inode, a->btgt);
++		if (!hi_wh) {
++			err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL);
++			if (unlikely(err))
++				goto out_unlock;
++			hi_wh = au_hi_wh(inode, a->btgt);
++			/* todo: revalidate hi_wh? */
++		}
++	}
++
++	if (parent) {
++		au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
++		di_downgrade_lock(parent, AuLock_IR);
++		dput(parent);
++		parent = NULL;
++	}
++	if (!au_ftest_icpup(a->flags, DID_CPUP))
++		goto out; /* success */
++
++	if (!d_unhashed(dentry)) {
++		h_file = au_h_open_pre(dentry, bstart);
++		if (IS_ERR(h_file)) {
++			err = PTR_ERR(h_file);
++			h_file = NULL;
++		} else
++			err = au_sio_cpup_simple(dentry, a->btgt, sz,
++						 AuCpup_DTIME);
++		if (!err)
++			a->h_path.dentry = au_h_dptr(dentry, a->btgt);
++	} else if (!hi_wh)
++		a->h_path.dentry = au_h_dptr(dentry, a->btgt);
++	else
++		a->h_path.dentry = hi_wh; /* do not dget here */
++
++out_unlock:
++	mutex_unlock(&a->h_inode->i_mutex);
++	au_h_open_post(dentry, bstart, h_file);
++	a->h_inode = a->h_path.dentry->d_inode;
++	if (!err) {
++		mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
++		goto out; /* success */
++	}
++
++	au_unpin(&a->pin);
++out_parent:
++	if (parent) {
++		di_write_unlock(parent);
++		dput(parent);
++	}
++out:
++	return err;
++}
++
++static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
++{
++	int err;
++	struct inode *inode;
++	struct super_block *sb;
++	struct file *file;
++	struct au_icpup_args *a;
++
++	inode = dentry->d_inode;
++	IMustLock(inode);
++
++	err = -ENOMEM;
++	a = kzalloc(sizeof(*a), GFP_NOFS);
++	if (unlikely(!a))
++		goto out;
++
++	if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
++		ia->ia_valid &= ~ATTR_MODE;
++
++	file = NULL;
++	sb = dentry->d_sb;
++	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++	if (unlikely(err))
++		goto out_kfree;
++
++	if (ia->ia_valid & ATTR_FILE) {
++		/* currently ftruncate(2) only */
++		AuDebugOn(!S_ISREG(inode->i_mode));
++		file = ia->ia_file;
++		err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++		if (unlikely(err))
++			goto out_si;
++		ia->ia_file = au_hf_top(file);
++		a->udba = AuOpt_UDBA_NONE;
++	} else {
++		/* fchmod() doesn't pass ia_file */
++		a->udba = au_opt_udba(sb);
++		di_write_lock_child(dentry);
++		/* no d_unlinked(), to set UDBA_NONE for root */
++		if (d_unhashed(dentry))
++			a->udba = AuOpt_UDBA_NONE;
++		if (a->udba != AuOpt_UDBA_NONE) {
++			AuDebugOn(IS_ROOT(dentry));
++			err = au_reval_for_attr(dentry, au_sigen(sb));
++			if (unlikely(err))
++				goto out_dentry;
++		}
++	}
++
++	err = au_pin_and_icpup(dentry, ia, a);
++	if (unlikely(err < 0))
++		goto out_dentry;
++	if (au_ftest_icpup(a->flags, DID_CPUP)) {
++		ia->ia_file = NULL;
++		ia->ia_valid &= ~ATTR_FILE;
++	}
++
++	a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
++	if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
++	    == (ATTR_MODE | ATTR_CTIME)) {
++		err = security_path_chmod(&a->h_path, ia->ia_mode);
++		if (unlikely(err))
++			goto out_unlock;
++	} else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
++		   && (ia->ia_valid & ATTR_CTIME)) {
++		err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
++		if (unlikely(err))
++			goto out_unlock;
++	}
++
++	if (ia->ia_valid & ATTR_SIZE) {
++		struct file *f;
++
++		if (ia->ia_size < i_size_read(inode))
++			/* unmap only */
++			truncate_setsize(inode, ia->ia_size);
++
++		f = NULL;
++		if (ia->ia_valid & ATTR_FILE)
++			f = ia->ia_file;
++		mutex_unlock(&a->h_inode->i_mutex);
++		err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
++		mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
++	} else
++		err = vfsub_notify_change(&a->h_path, ia);
++	if (!err)
++		au_cpup_attr_changeable(inode);
++
++out_unlock:
++	mutex_unlock(&a->h_inode->i_mutex);
++	au_unpin(&a->pin);
++	if (unlikely(err))
++		au_update_dbstart(dentry);
++out_dentry:
++	di_write_unlock(dentry);
++	if (file) {
++		fi_write_unlock(file);
++		ia->ia_file = file;
++		ia->ia_valid |= ATTR_FILE;
++	}
++out_si:
++	si_read_unlock(sb);
++out_kfree:
++	kfree(a);
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++static void au_refresh_iattr(struct inode *inode, struct kstat *st,
++			     unsigned int nlink)
++{
++	unsigned int n;
++
++	inode->i_mode = st->mode;
++	inode->i_uid = st->uid;
++	inode->i_gid = st->gid;
++	inode->i_atime = st->atime;
++	inode->i_mtime = st->mtime;
++	inode->i_ctime = st->ctime;
++
++	au_cpup_attr_nlink(inode, /*force*/0);
++	if (S_ISDIR(inode->i_mode)) {
++		n = inode->i_nlink;
++		n -= nlink;
++		n += st->nlink;
++		/* 0 can happen */
++		vfsub_set_nlink(inode, n);
++	}
++
++	spin_lock(&inode->i_lock);
++	inode->i_blocks = st->blocks;
++	i_size_write(inode, st->size);
++	spin_unlock(&inode->i_lock);
++}
++
++static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
++			struct dentry *dentry, struct kstat *st)
++{
++	int err;
++	unsigned int mnt_flags;
++	aufs_bindex_t bindex;
++	unsigned char udba_none, positive;
++	struct super_block *sb, *h_sb;
++	struct inode *inode;
++	struct vfsmount *h_mnt;
++	struct dentry *h_dentry;
++
++	sb = dentry->d_sb;
++	inode = dentry->d_inode;
++	err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++	if (unlikely(err))
++		goto out;
++	mnt_flags = au_mntflags(sb);
++	udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
++
++	/* support fstat(2) */
++	if (!d_unlinked(dentry) && !udba_none) {
++		unsigned int sigen = au_sigen(sb);
++		err = au_digen_test(dentry, sigen);
++		if (!err) {
++			di_read_lock_child(dentry, AuLock_IR);
++			err = au_dbrange_test(dentry);
++			if (unlikely(err))
++				goto out_unlock;
++		} else {
++			AuDebugOn(IS_ROOT(dentry));
++			di_write_lock_child(dentry);
++			err = au_dbrange_test(dentry);
++			if (!err)
++				err = au_reval_for_attr(dentry, sigen);
++			di_downgrade_lock(dentry, AuLock_IR);
++			if (unlikely(err))
++				goto out_unlock;
++		}
++	} else
++		di_read_lock_child(dentry, AuLock_IR);
++
++	bindex = au_ibstart(inode);
++	h_mnt = au_sbr_mnt(sb, bindex);
++	h_sb = h_mnt->mnt_sb;
++	if (!au_test_fs_bad_iattr(h_sb) && udba_none)
++		goto out_fill; /* success */
++
++	h_dentry = NULL;
++	if (au_dbstart(dentry) == bindex)
++		h_dentry = dget(au_h_dptr(dentry, bindex));
++	else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
++		h_dentry = au_plink_lkup(inode, bindex);
++		if (IS_ERR(h_dentry))
++			goto out_fill; /* pretending success */
++	}
++	/* illegally overlapped or something */
++	if (unlikely(!h_dentry))
++		goto out_fill; /* pretending success */
++
++	positive = !!h_dentry->d_inode;
++	if (positive)
++		err = vfs_getattr(h_mnt, h_dentry, st);
++	dput(h_dentry);
++	if (!err) {
++		if (positive)
++			au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink);
++		goto out_fill; /* success */
++	}
++	AuTraceErr(err);
++	goto out_unlock;
++
++out_fill:
++	generic_fillattr(inode, st);
++out_unlock:
++	di_read_unlock(dentry, AuLock_IR);
++	si_read_unlock(sb);
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
++		      int bufsiz)
++{
++	int err;
++	struct super_block *sb;
++	struct dentry *h_dentry;
++
++	err = -EINVAL;
++	h_dentry = au_h_dptr(dentry, bindex);
++	if (unlikely(!h_dentry->d_inode->i_op->readlink))
++		goto out;
++
++	err = security_inode_readlink(h_dentry);
++	if (unlikely(err))
++		goto out;
++
++	sb = dentry->d_sb;
++	if (!au_test_ro(sb, bindex, dentry->d_inode)) {
++		vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
++		fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode);
++	}
++	err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz);
++
++out:
++	return err;
++}
++
++static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
++{
++	int err;
++
++	err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
++	if (unlikely(err))
++		goto out;
++	err = au_d_hashed_positive(dentry);
++	if (!err)
++		err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
++	aufs_read_unlock(dentry, AuLock_IR);
++
++out:
++	return err;
++}
++
++static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
++{
++	int err;
++	mm_segment_t old_fs;
++	union {
++		char *k;
++		char __user *u;
++	} buf;
++
++	err = -ENOMEM;
++	buf.k = __getname_gfp(GFP_NOFS);
++	if (unlikely(!buf.k))
++		goto out;
++
++	err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
++	if (unlikely(err))
++		goto out_name;
++
++	err = au_d_hashed_positive(dentry);
++	if (!err) {
++		old_fs = get_fs();
++		set_fs(KERNEL_DS);
++		err = h_readlink(dentry, au_dbstart(dentry), buf.u, PATH_MAX);
++		set_fs(old_fs);
++	}
++	aufs_read_unlock(dentry, AuLock_IR);
++
++	if (err >= 0) {
++		buf.k[err] = 0;
++		/* will be freed by put_link */
++		nd_set_link(nd, buf.k);
++		return NULL; /* success */
++	}
++
++out_name:
++	__putname(buf.k);
++out:
++	path_put(&nd->path);
++	AuTraceErr(err);
++	return ERR_PTR(err);
++}
++
++static void aufs_put_link(struct dentry *dentry __maybe_unused,
++			  struct nameidata *nd, void *cookie __maybe_unused)
++{
++	__putname(nd_get_link(nd));
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void aufs_truncate_range(struct inode *inode __maybe_unused,
++				loff_t start __maybe_unused,
++				loff_t end __maybe_unused)
++{
++	AuUnsupport();
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct inode_operations aufs_symlink_iop = {
++	.permission	= aufs_permission,
++	.setattr	= aufs_setattr,
++	.getattr	= aufs_getattr,
++	.readlink	= aufs_readlink,
++	.follow_link	= aufs_follow_link,
++	.put_link	= aufs_put_link
++};
++
++struct inode_operations aufs_dir_iop = {
++	.create		= aufs_create,
++	.lookup		= aufs_lookup,
++	.link		= aufs_link,
++	.unlink		= aufs_unlink,
++	.symlink	= aufs_symlink,
++	.mkdir		= aufs_mkdir,
++	.rmdir		= aufs_rmdir,
++	.mknod		= aufs_mknod,
++	.rename		= aufs_rename,
++
++	.permission	= aufs_permission,
++	.setattr	= aufs_setattr,
++	.getattr	= aufs_getattr
++};
++
++struct inode_operations aufs_iop = {
++	.permission	= aufs_permission,
++	.setattr	= aufs_setattr,
++	.getattr	= aufs_getattr,
++	.truncate_range	= aufs_truncate_range
++};
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/i_op_del.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,478 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * inode operations (del entry)
++ */
++
++#include "aufs.h"
++
++/*
++ * decide if a new whiteout for @dentry is necessary or not.
++ * when it is necessary, prepare the parent dir for the upper branch whose
++ * branch index is @bcpup for creation. the actual creation of the whiteout will
++ * be done by caller.
++ * return value:
++ * 0: wh is unnecessary
++ * plus: wh is necessary
++ * minus: error
++ */
++int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
++{
++	int need_wh, err;
++	aufs_bindex_t bstart;
++	struct super_block *sb;
++
++	sb = dentry->d_sb;
++	bstart = au_dbstart(dentry);
++	if (*bcpup < 0) {
++		*bcpup = bstart;
++		if (au_test_ro(sb, bstart, dentry->d_inode)) {
++			err = AuWbrCopyup(au_sbi(sb), dentry);
++			*bcpup = err;
++			if (unlikely(err < 0))
++				goto out;
++		}
++	} else
++		AuDebugOn(bstart < *bcpup
++			  || au_test_ro(sb, *bcpup, dentry->d_inode));
++	AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
++
++	if (*bcpup != bstart) {
++		err = au_cpup_dirs(dentry, *bcpup);
++		if (unlikely(err))
++			goto out;
++		need_wh = 1;
++	} else {
++		struct au_dinfo *dinfo, *tmp;
++
++		need_wh = -ENOMEM;
++		dinfo = au_di(dentry);
++		tmp = au_di_alloc(sb, AuLsc_DI_TMP);
++		if (tmp) {
++			au_di_cp(tmp, dinfo);
++			au_di_swap(tmp, dinfo);
++			/* returns the number of positive dentries */
++			need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0,
++						 /*nd*/NULL);
++			au_di_swap(tmp, dinfo);
++			au_rw_write_unlock(&tmp->di_rwsem);
++			au_di_free(tmp);
++		}
++	}
++	AuDbg("need_wh %d\n", need_wh);
++	err = need_wh;
++
++out:
++	return err;
++}
++
++/*
++ * simple tests for the del-entry operations.
++ * following the checks in vfs, plus the parent-child relationship.
++ */
++int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
++	       struct dentry *h_parent, int isdir)
++{
++	int err;
++	umode_t h_mode;
++	struct dentry *h_dentry, *h_latest;
++	struct inode *h_inode;
++
++	h_dentry = au_h_dptr(dentry, bindex);
++	h_inode = h_dentry->d_inode;
++	if (dentry->d_inode) {
++		err = -ENOENT;
++		if (unlikely(!h_inode || !h_inode->i_nlink))
++			goto out;
++
++		h_mode = h_inode->i_mode;
++		if (!isdir) {
++			err = -EISDIR;
++			if (unlikely(S_ISDIR(h_mode)))
++				goto out;
++		} else if (unlikely(!S_ISDIR(h_mode))) {
++			err = -ENOTDIR;
++			goto out;
++		}
++	} else {
++		/* rename(2) case */
++		err = -EIO;
++		if (unlikely(h_inode))
++			goto out;
++	}
++
++	err = -ENOENT;
++	/* expected parent dir is locked */
++	if (unlikely(h_parent != h_dentry->d_parent))
++		goto out;
++	err = 0;
++
++	/*
++	 * rmdir a dir may break the consistency on some filesystem.
++	 * let's try heavy test.
++	 */
++	err = -EACCES;
++	if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE)))
++		goto out;
++
++	h_latest = au_sio_lkup_one(&dentry->d_name, h_parent,
++				   au_sbr(dentry->d_sb, bindex));
++	err = -EIO;
++	if (IS_ERR(h_latest))
++		goto out;
++	if (h_latest == h_dentry)
++		err = 0;
++	dput(h_latest);
++
++out:
++	return err;
++}
++
++/*
++ * decide the branch where we operate for @dentry. the branch index will be set
++ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
++ * dir for reverting.
++ * when a new whiteout is necessary, create it.
++ */
++static struct dentry*
++lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
++		    struct au_dtime *dt, struct au_pin *pin)
++{
++	struct dentry *wh_dentry;
++	struct super_block *sb;
++	struct path h_path;
++	int err, need_wh;
++	unsigned int udba;
++	aufs_bindex_t bcpup;
++
++	need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
++	wh_dentry = ERR_PTR(need_wh);
++	if (unlikely(need_wh < 0))
++		goto out;
++
++	sb = dentry->d_sb;
++	udba = au_opt_udba(sb);
++	bcpup = *rbcpup;
++	err = au_pin(pin, dentry, bcpup, udba,
++		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++	wh_dentry = ERR_PTR(err);
++	if (unlikely(err))
++		goto out;
++
++	h_path.dentry = au_pinned_h_parent(pin);
++	if (udba != AuOpt_UDBA_NONE
++	    && au_dbstart(dentry) == bcpup) {
++		err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
++		wh_dentry = ERR_PTR(err);
++		if (unlikely(err))
++			goto out_unpin;
++	}
++
++	h_path.mnt = au_sbr_mnt(sb, bcpup);
++	au_dtime_store(dt, au_pinned_parent(pin), &h_path);
++	wh_dentry = NULL;
++	if (!need_wh)
++		goto out; /* success, no need to create whiteout */
++
++	wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
++	if (IS_ERR(wh_dentry))
++		goto out_unpin;
++
++	/* returns with the parent is locked and wh_dentry is dget-ed */
++	goto out; /* success */
++
++out_unpin:
++	au_unpin(pin);
++out:
++	return wh_dentry;
++}
++
++/*
++ * when removing a dir, rename it to a unique temporary whiteout-ed name first
++ * in order to be revertible and save time for removing many child whiteouts
++ * under the dir.
++ * returns 1 when there are too many child whiteout and caller should remove
++ * them asynchronously. returns 0 when the number of children is enough small to
++ * remove now or the branch fs is a remote fs.
++ * otherwise return an error.
++ */
++static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
++			   struct au_nhash *whlist, struct inode *dir)
++{
++	int rmdir_later, err, dirwh;
++	struct dentry *h_dentry;
++	struct super_block *sb;
++
++	sb = dentry->d_sb;
++	SiMustAnyLock(sb);
++	h_dentry = au_h_dptr(dentry, bindex);
++	err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
++	if (unlikely(err))
++		goto out;
++
++	/* stop monitoring */
++	au_hn_free(au_hi(dentry->d_inode, bindex));
++
++	if (!au_test_fs_remote(h_dentry->d_sb)) {
++		dirwh = au_sbi(sb)->si_dirwh;
++		rmdir_later = (dirwh <= 1);
++		if (!rmdir_later)
++			rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
++							      dirwh);
++		if (rmdir_later)
++			return rmdir_later;
++	}
++
++	err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
++	if (unlikely(err)) {
++		AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n",
++			AuDLNPair(h_dentry), bindex, err);
++		err = 0;
++	}
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++/*
++ * final procedure for deleting a entry.
++ * maintain dentry and iattr.
++ */
++static void epilog(struct inode *dir, struct dentry *dentry,
++		   aufs_bindex_t bindex)
++{
++	struct inode *inode;
++
++	inode = dentry->d_inode;
++	d_drop(dentry);
++	inode->i_ctime = dir->i_ctime;
++
++	if (au_ibstart(dir) == bindex)
++		au_cpup_attr_timesizes(dir);
++	dir->i_version++;
++}
++
++/*
++ * when an error happened, remove the created whiteout and revert everything.
++ */
++static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
++		     aufs_bindex_t bwh, struct dentry *wh_dentry,
++		     struct dentry *dentry, struct au_dtime *dt)
++{
++	int rerr;
++	struct path h_path = {
++		.dentry	= wh_dentry,
++		.mnt	= au_sbr_mnt(dir->i_sb, bindex)
++	};
++
++	rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
++	if (!rerr) {
++		au_set_dbwh(dentry, bwh);
++		au_dtime_revert(dt);
++		return 0;
++	}
++
++	AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
++		AuDLNPair(dentry), err, rerr);
++	return -EIO;
++}
++
++/* ---------------------------------------------------------------------- */
++
++int aufs_unlink(struct inode *dir, struct dentry *dentry)
++{
++	int err;
++	aufs_bindex_t bwh, bindex, bstart;
++	struct au_dtime dt;
++	struct au_pin pin;
++	struct path h_path;
++	struct inode *inode, *h_dir;
++	struct dentry *parent, *wh_dentry;
++
++	IMustLock(dir);
++
++	err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
++	if (unlikely(err))
++		goto out;
++	err = au_d_hashed_positive(dentry);
++	if (unlikely(err))
++		goto out_unlock;
++	inode = dentry->d_inode;
++	IMustLock(inode);
++	err = -EISDIR;
++	if (unlikely(S_ISDIR(inode->i_mode)))
++		goto out_unlock; /* possible? */
++
++	bstart = au_dbstart(dentry);
++	bwh = au_dbwh(dentry);
++	bindex = -1;
++	parent = dentry->d_parent; /* dir inode is locked */
++	di_write_lock_parent(parent);
++	wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin);
++	err = PTR_ERR(wh_dentry);
++	if (IS_ERR(wh_dentry))
++		goto out_parent;
++
++	h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
++	h_path.dentry = au_h_dptr(dentry, bstart);
++	dget(h_path.dentry);
++	if (bindex == bstart) {
++		h_dir = au_pinned_h_dir(&pin);
++		err = vfsub_unlink(h_dir, &h_path, /*force*/0);
++	} else {
++		/* dir inode is locked */
++		h_dir = wh_dentry->d_parent->d_inode;
++		IMustLock(h_dir);
++		err = 0;
++	}
++
++	if (!err) {
++		vfsub_drop_nlink(inode);
++		epilog(dir, dentry, bindex);
++
++		/* update target timestamps */
++		if (bindex == bstart) {
++			vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
++			inode->i_ctime = h_path.dentry->d_inode->i_ctime;
++		} else
++			/* todo: this timestamp may be reverted later */
++			inode->i_ctime = h_dir->i_ctime;
++		goto out_unpin; /* success */
++	}
++
++	/* revert */
++	if (wh_dentry) {
++		int rerr;
++
++		rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
++		if (rerr)
++			err = rerr;
++	}
++
++out_unpin:
++	au_unpin(&pin);
++	dput(wh_dentry);
++	dput(h_path.dentry);
++out_parent:
++	di_write_unlock(parent);
++out_unlock:
++	aufs_read_unlock(dentry, AuLock_DW);
++out:
++	return err;
++}
++
++int aufs_rmdir(struct inode *dir, struct dentry *dentry)
++{
++	int err, rmdir_later;
++	aufs_bindex_t bwh, bindex, bstart;
++	struct au_dtime dt;
++	struct au_pin pin;
++	struct inode *inode;
++	struct dentry *parent, *wh_dentry, *h_dentry;
++	struct au_whtmp_rmdir *args;
++
++	IMustLock(dir);
++
++	err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
++	if (unlikely(err))
++		goto out;
++	err = au_alive_dir(dentry);
++	if (unlikely(err))
++		goto out_unlock;
++	inode = dentry->d_inode;
++	IMustLock(inode);
++	err = -ENOTDIR;
++	if (unlikely(!S_ISDIR(inode->i_mode)))
++		goto out_unlock; /* possible? */
++
++	err = -ENOMEM;
++	args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
++	if (unlikely(!args))
++		goto out_unlock;
++
++	parent = dentry->d_parent; /* dir inode is locked */
++	di_write_lock_parent(parent);
++	err = au_test_empty(dentry, &args->whlist);
++	if (unlikely(err))
++		goto out_parent;
++
++	bstart = au_dbstart(dentry);
++	bwh = au_dbwh(dentry);
++	bindex = -1;
++	wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin);
++	err = PTR_ERR(wh_dentry);
++	if (IS_ERR(wh_dentry))
++		goto out_parent;
++
++	h_dentry = au_h_dptr(dentry, bstart);
++	dget(h_dentry);
++	rmdir_later = 0;
++	if (bindex == bstart) {
++		err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
++		if (err > 0) {
++			rmdir_later = err;
++			err = 0;
++		}
++	} else {
++		/* stop monitoring */
++		au_hn_free(au_hi(inode, bstart));
++
++		/* dir inode is locked */
++		IMustLock(wh_dentry->d_parent->d_inode);
++		err = 0;
++	}
++
++	if (!err) {
++		vfsub_dead_dir(inode);
++		au_set_dbdiropq(dentry, -1);
++		epilog(dir, dentry, bindex);
++
++		if (rmdir_later) {
++			au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
++			args = NULL;
++		}
++
++		goto out_unpin; /* success */
++	}
++
++	/* revert */
++	AuLabel(revert);
++	if (wh_dentry) {
++		int rerr;
++
++		rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
++		if (rerr)
++			err = rerr;
++	}
++
++out_unpin:
++	au_unpin(&pin);
++	dput(wh_dentry);
++	dput(h_dentry);
++out_parent:
++	di_write_unlock(parent);
++	if (args)
++		au_whtmp_rmdir_free(args);
++out_unlock:
++	aufs_read_unlock(dentry, AuLock_DW);
++out:
++	AuTraceErr(err);
++	return err;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/i_op_ren.c	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,1026 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * inode operation (rename entry)
++ * todo: this is crazy monster
++ */
++
++#include "aufs.h"
++
++enum { AuSRC, AuDST, AuSrcDst };
++enum { AuPARENT, AuCHILD, AuParentChild };
++
++#define AuRen_ISDIR	1
++#define AuRen_ISSAMEDIR	(1 << 1)
++#define AuRen_WHSRC	(1 << 2)
++#define AuRen_WHDST	(1 << 3)
++#define AuRen_MNT_WRITE	(1 << 4)
++#define AuRen_DT_DSTDIR	(1 << 5)
++#define AuRen_DIROPQ	(1 << 6)
++#define AuRen_CPUP	(1 << 7)
++#define au_ftest_ren(flags, name)	((flags) & AuRen_##name)
++#define au_fset_ren(flags, name) \
++	do { (flags) |= AuRen_##name; } while (0)
++#define au_fclr_ren(flags, name) \
++	do { (flags) &= ~AuRen_##name; } while (0)
++
++struct au_ren_args {
++	struct {
++		struct dentry *dentry, *h_dentry, *parent, *h_parent,
++			*wh_dentry;
++		struct inode *dir, *inode;
++		struct au_hinode *hdir;
++		struct au_dtime dt[AuParentChild];
++		aufs_bindex_t bstart;
++	} sd[AuSrcDst];
++
++#define src_dentry	sd[AuSRC].dentry
++#define src_dir		sd[AuSRC].dir
++#define src_inode	sd[AuSRC].inode
++#define src_h_dentry	sd[AuSRC].h_dentry
++#define src_parent	sd[AuSRC].parent
++#define src_h_parent	sd[AuSRC].h_parent
++#define src_wh_dentry	sd[AuSRC].wh_dentry
++#define src_hdir	sd[AuSRC].hdir
++#define src_h_dir	sd[AuSRC].hdir->hi_inode
++#define src_dt		sd[AuSRC].dt
++#define src_bstart	sd[AuSRC].bstart
++
++#define dst_dentry	sd[AuDST].dentry
++#define dst_dir		sd[AuDST].dir
++#define dst_inode	sd[AuDST].inode
++#define dst_h_dentry	sd[AuDST].h_dentry
++#define dst_parent	sd[AuDST].parent
++#define dst_h_parent	sd[AuDST].h_parent
++#define dst_wh_dentry	sd[AuDST].wh_dentry
++#define dst_hdir	sd[AuDST].hdir
++#define dst_h_dir	sd[AuDST].hdir->hi_inode
++#define dst_dt		sd[AuDST].dt
++#define dst_bstart	sd[AuDST].bstart
++
++	struct dentry *h_trap;
++	struct au_branch *br;
++	struct au_hinode *src_hinode;
++	struct path h_path;
++	struct au_nhash whlist;
++	aufs_bindex_t btgt, src_bwh, src_bdiropq;
++
++	unsigned int flags;
++
++	struct au_whtmp_rmdir *thargs;
++	struct dentry *h_dst;
++};
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * functions for reverting.
++ * when an error happened in a single rename systemcall, we should revert
++ * everything as if nothing happend.
++ * we don't need to revert the copied-up/down the parent dir since they are
++ * harmless.
++ */
++
++#define RevertFailure(fmt, ...) do { \
++	AuIOErr("revert failure: " fmt " (%d, %d)\n", \
++		##__VA_ARGS__, err, rerr); \
++	err = -EIO; \
++} while (0)
++
++static void au_ren_rev_diropq(int err, struct au_ren_args *a)
++{
++	int rerr;
++
++	au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
++	rerr = au_diropq_remove(a->src_dentry, a->btgt);
++	au_hn_imtx_unlock(a->src_hinode);
++	au_set_dbdiropq(a->src_dentry, a->src_bdiropq);
++	if (rerr)
++		RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry));
++}
++
++static void au_ren_rev_rename(int err, struct au_ren_args *a)
++{
++	int rerr;
++
++	a->h_path.dentry = au_lkup_one(&a->src_dentry->d_name, a->src_h_parent,
++				       a->br, /*nd*/NULL);
++	rerr = PTR_ERR(a->h_path.dentry);
++	if (IS_ERR(a->h_path.dentry)) {
++		RevertFailure("au_lkup_one %.*s", AuDLNPair(a->src_dentry));
++		return;
++	}
++
++	rerr = vfsub_rename(a->dst_h_dir,
++			    au_h_dptr(a->src_dentry, a->btgt),
++			    a->src_h_dir, &a->h_path);
++	d_drop(a->h_path.dentry);
++	dput(a->h_path.dentry);
++	/* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
++	if (rerr)
++		RevertFailure("rename %.*s", AuDLNPair(a->src_dentry));
++}
++
++static void au_ren_rev_cpup(int err, struct au_ren_args *a)
++{
++	int rerr;
++
++	a->h_path.dentry = a->dst_h_dentry;
++	rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0);
++	au_set_h_dptr(a->src_dentry, a->btgt, NULL);
++	au_set_dbstart(a->src_dentry, a->src_bstart);
++	if (rerr)
++		RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry));
++}
++
++static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
++{
++	int rerr;
++
++	a->h_path.dentry = au_lkup_one(&a->dst_dentry->d_name, a->dst_h_parent,
++				       a->br, /*nd*/NULL);
++	rerr = PTR_ERR(a->h_path.dentry);
++	if (IS_ERR(a->h_path.dentry)) {
++		RevertFailure("lookup %.*s", AuDLNPair(a->dst_dentry));
++		return;
++	}
++	if (a->h_path.dentry->d_inode) {
++		d_drop(a->h_path.dentry);
++		dput(a->h_path.dentry);
++		return;
++	}
++
++	rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path);
++	d_drop(a->h_path.dentry);
++	dput(a->h_path.dentry);
++	if (!rerr)
++		au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
++	else
++		RevertFailure("rename %.*s", AuDLNPair(a->h_dst));
++}
++
++static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
++{
++	int rerr;
++
++	a->h_path.dentry = a->src_wh_dentry;
++	rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
++	au_set_dbwh(a->src_dentry, a->src_bwh);
++	if (rerr)
++		RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry));
++}
++#undef RevertFailure
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * when we have to copyup the renaming entry, do it with the rename-target name
++ * in order to minimize the cost (the later actual rename is unnecessary).
++ * otherwise rename it on the target branch.
++ */
++static int au_ren_or_cpup(struct au_ren_args *a)
++{
++	int err;
++	struct dentry *d;
++
++	d = a->src_dentry;
++	if (au_dbstart(d) == a->btgt) {
++		a->h_path.dentry = a->dst_h_dentry;
++		if (au_ftest_ren(a->flags, DIROPQ)
++		    && au_dbdiropq(d) == a->btgt)
++			au_fclr_ren(a->flags, DIROPQ);
++		AuDebugOn(au_dbstart(d) != a->btgt);
++		err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
++				   a->dst_h_dir, &a->h_path);
++	} else {
++		struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
++		struct file *h_file;
++
++		au_fset_ren(a->flags, CPUP);
++		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++		au_set_dbstart(d, a->btgt);
++		au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry));
++		h_file = au_h_open_pre(d, a->src_bstart);
++		if (IS_ERR(h_file)) {
++			err = PTR_ERR(h_file);
++			h_file = NULL;
++		} else
++			err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1,
++						 !AuCpup_DTIME, a->dst_parent);
++		mutex_unlock(h_mtx);
++		au_h_open_post(d, a->src_bstart, h_file);
++		if (!err) {
++			d = a->dst_dentry;
++			au_set_h_dptr(d, a->btgt, NULL);
++			au_update_dbstart(d);
++		} else {
++			au_set_h_dptr(d, a->btgt, NULL);
++			au_set_dbstart(d, a->src_bstart);
++		}
++	}
++	if (!err && a->h_dst)
++		/* it will be set to dinfo later */
++		dget(a->h_dst);
++
++	return err;
++}
++
++/* cf. aufs_rmdir() */
++static int au_ren_del_whtmp(struct au_ren_args *a)
++{
++	int err;
++	struct inode *dir;
++
++	dir = a->dst_dir;
++	SiMustAnyLock(dir->i_sb);
++	if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
++				     au_sbi(dir->i_sb)->si_dirwh)
++	    || au_test_fs_remote(a->h_dst->d_sb)) {
++		err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
++		if (unlikely(err))
++			pr_warning("failed removing whtmp dir %.*s (%d), "
++				   "ignored.\n", AuDLNPair(a->h_dst), err);
++	} else {
++		au_nhash_wh_free(&a->thargs->whlist);
++		a->thargs->whlist = a->whlist;
++		a->whlist.nh_num = 0;
++		au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
++		dput(a->h_dst);
++		a->thargs = NULL;
++	}
++
++	return 0;
++}
++
++/* make it 'opaque' dir. */
++static int au_ren_diropq(struct au_ren_args *a)
++{
++	int err;
++	struct dentry *diropq;
++
++	err = 0;
++	a->src_bdiropq = au_dbdiropq(a->src_dentry);
++	a->src_hinode = au_hi(a->src_inode, a->btgt);
++	au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
++	diropq = au_diropq_create(a->src_dentry, a->btgt);
++	au_hn_imtx_unlock(a->src_hinode);
++	if (IS_ERR(diropq))
++		err = PTR_ERR(diropq);
++	dput(diropq);
++
++	return err;
++}
++
++static int do_rename(struct au_ren_args *a)
++{
++	int err;
++	struct dentry *d, *h_d;
++
++	/* prepare workqueue args for asynchronous rmdir */
++	h_d = a->dst_h_dentry;
++	if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) {
++		err = -ENOMEM;
++		a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
++		if (unlikely(!a->thargs))
++			goto out;
++		a->h_dst = dget(h_d);
++	}
++
++	/* create whiteout for src_dentry */
++	if (au_ftest_ren(a->flags, WHSRC)) {
++		a->src_bwh = au_dbwh(a->src_dentry);
++		AuDebugOn(a->src_bwh >= 0);
++		a->src_wh_dentry
++			= au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
++		err = PTR_ERR(a->src_wh_dentry);
++		if (IS_ERR(a->src_wh_dentry))
++			goto out_thargs;
++	}
++
++	/* lookup whiteout for dentry */
++	if (au_ftest_ren(a->flags, WHDST)) {
++		h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
++				 a->br);
++		err = PTR_ERR(h_d);
++		if (IS_ERR(h_d))
++			goto out_whsrc;
++		if (!h_d->d_inode)
++			dput(h_d);
++		else
++			a->dst_wh_dentry = h_d;
++	}
++
++	/* rename dentry to tmpwh */
++	if (a->thargs) {
++		err = au_whtmp_ren(a->dst_h_dentry, a->br);
++		if (unlikely(err))
++			goto out_whdst;
++
++		d = a->dst_dentry;
++		au_set_h_dptr(d, a->btgt, NULL);
++		err = au_lkup_neg(d, a->btgt);
++		if (unlikely(err))
++			goto out_whtmp;
++		a->dst_h_dentry = au_h_dptr(d, a->btgt);
++	}
++
++	/* cpup src */
++	if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) {
++		struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
++		struct file *h_file;
++
++		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++		AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart);
++		h_file = au_h_open_pre(a->src_dentry, a->src_bstart);
++		if (IS_ERR(h_file)) {
++			err = PTR_ERR(h_file);
++			h_file = NULL;
++		} else
++			err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1,
++						 !AuCpup_DTIME);
++		mutex_unlock(h_mtx);
++		au_h_open_post(a->src_dentry, a->src_bstart, h_file);
++		if (unlikely(err))
++			goto out_whtmp;
++	}
++
++	/* rename by vfs_rename or cpup */
++	d = a->dst_dentry;
++	if (au_ftest_ren(a->flags, ISDIR)
++	    && (a->dst_wh_dentry
++		|| au_dbdiropq(d) == a->btgt
++		/* hide the lower to keep xino */
++		|| a->btgt < au_dbend(d)
++		|| au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
++		au_fset_ren(a->flags, DIROPQ);
++	err = au_ren_or_cpup(a);
++	if (unlikely(err))
++		/* leave the copied-up one */
++		goto out_whtmp;
++
++	/* make dir opaque */
++	if (au_ftest_ren(a->flags, DIROPQ)) {
++		err = au_ren_diropq(a);
++		if (unlikely(err))
++			goto out_rename;
++	}
++
++	/* update target timestamps */
++	AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
++	a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
++	vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
++	a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime;
++
++	/* remove whiteout for dentry */
++	if (a->dst_wh_dentry) {
++		a->h_path.dentry = a->dst_wh_dentry;
++		err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
++					  a->dst_dentry);
++		if (unlikely(err))
++			goto out_diropq;
++	}
++
++	/* remove whtmp */
++	if (a->thargs)
++		au_ren_del_whtmp(a); /* ignore this error */
++
++	err = 0;
++	goto out_success;
++
++out_diropq:
++	if (au_ftest_ren(a->flags, DIROPQ))
++		au_ren_rev_diropq(err, a);
++out_rename:
++	if (!au_ftest_ren(a->flags, CPUP))
++		au_ren_rev_rename(err, a);
++	else
++		au_ren_rev_cpup(err, a);
++	dput(a->h_dst);
++out_whtmp:
++	if (a->thargs)
++		au_ren_rev_whtmp(err, a);
++out_whdst:
++	dput(a->dst_wh_dentry);
++	a->dst_wh_dentry = NULL;
++out_whsrc:
++	if (a->src_wh_dentry)
++		au_ren_rev_whsrc(err, a);
++out_success:
++	dput(a->src_wh_dentry);
++	dput(a->dst_wh_dentry);
++out_thargs:
++	if (a->thargs) {
++		dput(a->h_dst);
++		au_whtmp_rmdir_free(a->thargs);
++		a->thargs = NULL;
++	}
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * test if @dentry dir can be rename destination or not.
++ * success means, it is a logically empty dir.
++ */
++static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
++{
++	return au_test_empty(dentry, whlist);
++}
++
++/*
++ * test if @dentry dir can be rename source or not.
++ * if it can, return 0 and @children is filled.
++ * success means,
++ * - it is a logically empty dir.
++ * - or, it exists on writable branch and has no children including whiteouts
++ *       on the lower branch.
++ */
++static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
++{
++	int err;
++	unsigned int rdhash;
++	aufs_bindex_t bstart;
++
++	bstart = au_dbstart(dentry);
++	if (bstart != btgt) {
++		struct au_nhash whlist;
++
++		SiMustAnyLock(dentry->d_sb);
++		rdhash = au_sbi(dentry->d_sb)->si_rdhash;
++		if (!rdhash)
++			rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
++							   dentry));
++		err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
++		if (unlikely(err))
++			goto out;
++		err = au_test_empty(dentry, &whlist);
++		au_nhash_wh_free(&whlist);
++		goto out;
++	}
++
++	if (bstart == au_dbtaildir(dentry))
++		return 0; /* success */
++
++	err = au_test_empty_lower(dentry);
++
++out:
++	if (err == -ENOTEMPTY) {
++		AuWarn1("renaming dir who has child(ren) on multiple branches,"
++			" is not supported\n");
++		err = -EXDEV;
++	}
++	return err;
++}
++
++/* side effect: sets whlist and h_dentry */
++static int au_ren_may_dir(struct au_ren_args *a)
++{
++	int err;
++	unsigned int rdhash;
++	struct dentry *d;
++
++	d = a->dst_dentry;
++	SiMustAnyLock(d->d_sb);
++
++	err = 0;
++	if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
++		rdhash = au_sbi(d->d_sb)->si_rdhash;
++		if (!rdhash)
++			rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
++		err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
++		if (unlikely(err))
++			goto out;
++
++		au_set_dbstart(d, a->dst_bstart);
++		err = may_rename_dstdir(d, &a->whlist);
++		au_set_dbstart(d, a->btgt);
++	}
++	a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
++	if (unlikely(err))
++		goto out;
++
++	d = a->src_dentry;
++	a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
++	if (au_ftest_ren(a->flags, ISDIR)) {
++		err = may_rename_srcdir(d, a->btgt);
++		if (unlikely(err)) {
++			au_nhash_wh_free(&a->whlist);
++			a->whlist.nh_num = 0;
++		}
++	}
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * simple tests for rename.
++ * following the checks in vfs, plus the parent-child relationship.
++ */
++static int au_may_ren(struct au_ren_args *a)
++{
++	int err, isdir;
++	struct inode *h_inode;
++
++	if (a->src_bstart == a->btgt) {
++		err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
++				 au_ftest_ren(a->flags, ISDIR));
++		if (unlikely(err))
++			goto out;
++		err = -EINVAL;
++		if (unlikely(a->src_h_dentry == a->h_trap))
++			goto out;
++	}
++
++	err = 0;
++	if (a->dst_bstart != a->btgt)
++		goto out;
++
++	err = -ENOTEMPTY;
++	if (unlikely(a->dst_h_dentry == a->h_trap))
++		goto out;
++
++	err = -EIO;
++	h_inode = a->dst_h_dentry->d_inode;
++	isdir = !!au_ftest_ren(a->flags, ISDIR);
++	if (!a->dst_dentry->d_inode) {
++		if (unlikely(h_inode))
++			goto out;
++		err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent,
++				 isdir);
++	} else {
++		if (unlikely(!h_inode || !h_inode->i_nlink))
++			goto out;
++		err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent,
++				 isdir);
++		if (unlikely(err))
++			goto out;
++	}
++
++out:
++	if (unlikely(err == -ENOENT || err == -EEXIST))
++		err = -EIO;
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * locking order
++ * (VFS)
++ * - src_dir and dir by lock_rename()
++ * - inode if exitsts
++ * (aufs)
++ * - lock all
++ *   + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
++ *     + si_read_lock
++ *     + di_write_lock2_child()
++ *       + di_write_lock_child()
++ *	   + ii_write_lock_child()
++ *       + di_write_lock_child2()
++ *	   + ii_write_lock_child2()
++ *     + src_parent and parent
++ *       + di_write_lock_parent()
++ *	   + ii_write_lock_parent()
++ *       + di_write_lock_parent2()
++ *	   + ii_write_lock_parent2()
++ *   + lower src_dir and dir by vfsub_lock_rename()
++ *   + verify the every relationships between child and parent. if any
++ *     of them failed, unlock all and return -EBUSY.
++ */
++static void au_ren_unlock(struct au_ren_args *a)
++{
++	struct super_block *sb;
++
++	sb = a->dst_dentry->d_sb;
++	if (au_ftest_ren(a->flags, MNT_WRITE))
++		mnt_drop_write(a->br->br_mnt);
++	vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
++			    a->dst_h_parent, a->dst_hdir);
++}
++
++static int au_ren_lock(struct au_ren_args *a)
++{
++	int err;
++	unsigned int udba;
++
++	err = 0;
++	a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
++	a->src_hdir = au_hi(a->src_dir, a->btgt);
++	a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
++	a->dst_hdir = au_hi(a->dst_dir, a->btgt);
++	a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
++				      a->dst_h_parent, a->dst_hdir);
++	udba = au_opt_udba(a->src_dentry->d_sb);
++	if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode
++		     || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode))
++		err = au_busy_or_stale();
++	if (!err && au_dbstart(a->src_dentry) == a->btgt)
++		err = au_h_verify(a->src_h_dentry, udba,
++				  a->src_h_parent->d_inode, a->src_h_parent,
++				  a->br);
++	if (!err && au_dbstart(a->dst_dentry) == a->btgt)
++		err = au_h_verify(a->dst_h_dentry, udba,
++				  a->dst_h_parent->d_inode, a->dst_h_parent,
++				  a->br);
++	if (!err) {
++		err = mnt_want_write(a->br->br_mnt);
++		if (unlikely(err))
++			goto out_unlock;
++		au_fset_ren(a->flags, MNT_WRITE);
++		goto out; /* success */
++	}
++
++	err = au_busy_or_stale();
++
++out_unlock:
++	au_ren_unlock(a);
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void au_ren_refresh_dir(struct au_ren_args *a)
++{
++	struct inode *dir;
++
++	dir = a->dst_dir;
++	dir->i_version++;
++	if (au_ftest_ren(a->flags, ISDIR)) {
++		/* is this updating defined in POSIX? */
++		au_cpup_attr_timesizes(a->src_inode);
++		au_cpup_attr_nlink(dir, /*force*/1);
++	}
++
++	if (au_ibstart(dir) == a->btgt)
++		au_cpup_attr_timesizes(dir);
++
++	if (au_ftest_ren(a->flags, ISSAMEDIR))
++		return;
++
++	dir = a->src_dir;
++	dir->i_version++;
++	if (au_ftest_ren(a->flags, ISDIR))
++		au_cpup_attr_nlink(dir, /*force*/1);
++	if (au_ibstart(dir) == a->btgt)
++		au_cpup_attr_timesizes(dir);
++}
++
++static void au_ren_refresh(struct au_ren_args *a)
++{
++	aufs_bindex_t bend, bindex;
++	struct dentry *d, *h_d;
++	struct inode *i, *h_i;
++	struct super_block *sb;
++
++	d = a->dst_dentry;
++	d_drop(d);
++	if (a->h_dst)
++		/* already dget-ed by au_ren_or_cpup() */
++		au_set_h_dptr(d, a->btgt, a->h_dst);
++
++	i = a->dst_inode;
++	if (i) {
++		if (!au_ftest_ren(a->flags, ISDIR))
++			vfsub_drop_nlink(i);
++		else {
++			vfsub_dead_dir(i);
++			au_cpup_attr_timesizes(i);
++		}
++		au_update_dbrange(d, /*do_put_zero*/1);
++	} else {
++		bend = a->btgt;
++		for (bindex = au_dbstart(d); bindex < bend; bindex++)
++			au_set_h_dptr(d, bindex, NULL);
++		bend = au_dbend(d);
++		for (bindex = a->btgt + 1; bindex <= bend; bindex++)
++			au_set_h_dptr(d, bindex, NULL);
++		au_update_dbrange(d, /*do_put_zero*/0);
++	}
++
++	d = a->src_dentry;
++	au_set_dbwh(d, -1);
++	bend = au_dbend(d);
++	for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
++		h_d = au_h_dptr(d, bindex);
++		if (h_d)
++			au_set_h_dptr(d, bindex, NULL);
++	}
++	au_set_dbend(d, a->btgt);
++
++	sb = d->d_sb;
++	i = a->src_inode;
++	if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
++		return; /* success */
++
++	bend = au_ibend(i);
++	for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
++		h_i = au_h_iptr(i, bindex);
++		if (h_i) {
++			au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
++			/* ignore this error */
++			au_set_h_iptr(i, bindex, NULL, 0);
++		}
++	}
++	au_set_ibend(i, a->btgt);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* mainly for link(2) and rename(2) */
++int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
++{
++	aufs_bindex_t bdiropq, bwh;
++	struct dentry *parent;
++	struct au_branch *br;
++
++	parent = dentry->d_parent;
++	IMustLock(parent->d_inode); /* dir is locked */
++
++	bdiropq = au_dbdiropq(parent);
++	bwh = au_dbwh(dentry);
++	br = au_sbr(dentry->d_sb, btgt);
++	if (au_br_rdonly(br)
++	    || (0 <= bdiropq && bdiropq < btgt)
++	    || (0 <= bwh && bwh < btgt))
++		btgt = -1;
++
++	AuDbg("btgt %d\n", btgt);
++	return btgt;
++}
++
++/* sets src_bstart, dst_bstart and btgt */
++static int au_ren_wbr(struct au_ren_args *a)
++{
++	int err;
++	struct au_wr_dir_args wr_dir_args = {
++		/* .force_btgt	= -1, */
++		.flags		= AuWrDir_ADD_ENTRY
++	};
++
++	a->src_bstart = au_dbstart(a->src_dentry);
++	a->dst_bstart = au_dbstart(a->dst_dentry);
++	if (au_ftest_ren(a->flags, ISDIR))
++		au_fset_wrdir(wr_dir_args.flags, ISDIR);
++	wr_dir_args.force_btgt = a->src_bstart;
++	if (a->dst_inode && a->dst_bstart < a->src_bstart)
++		wr_dir_args.force_btgt = a->dst_bstart;
++	wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
++	err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
++	a->btgt = err;
++
++	return err;
++}
++
++static void au_ren_dt(struct au_ren_args *a)
++{
++	a->h_path.dentry = a->src_h_parent;
++	au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
++	if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
++		a->h_path.dentry = a->dst_h_parent;
++		au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
++	}
++
++	au_fclr_ren(a->flags, DT_DSTDIR);
++	if (!au_ftest_ren(a->flags, ISDIR))
++		return;
++
++	a->h_path.dentry = a->src_h_dentry;
++	au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
++	if (a->dst_h_dentry->d_inode) {
++		au_fset_ren(a->flags, DT_DSTDIR);
++		a->h_path.dentry = a->dst_h_dentry;
++		au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
++	}
++}
++
++static void au_ren_rev_dt(int err, struct au_ren_args *a)
++{
++	struct dentry *h_d;
++	struct mutex *h_mtx;
++
++	au_dtime_revert(a->src_dt + AuPARENT);
++	if (!au_ftest_ren(a->flags, ISSAMEDIR))
++		au_dtime_revert(a->dst_dt + AuPARENT);
++
++	if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
++		h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
++		h_mtx = &h_d->d_inode->i_mutex;
++		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++		au_dtime_revert(a->src_dt + AuCHILD);
++		mutex_unlock(h_mtx);
++
++		if (au_ftest_ren(a->flags, DT_DSTDIR)) {
++			h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
++			h_mtx = &h_d->d_inode->i_mutex;
++			mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++			au_dtime_revert(a->dst_dt + AuCHILD);
++			mutex_unlock(h_mtx);
++		}
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
++		struct inode *_dst_dir, struct dentry *_dst_dentry)
++{
++	int err, flags;
++	/* reduce stack space */
++	struct au_ren_args *a;
++
++	AuDbg("%.*s, %.*s\n", AuDLNPair(_src_dentry), AuDLNPair(_dst_dentry));
++	IMustLock(_src_dir);
++	IMustLock(_dst_dir);
++
++	err = -ENOMEM;
++	BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
++	a = kzalloc(sizeof(*a), GFP_NOFS);
++	if (unlikely(!a))
++		goto out;
++
++	a->src_dir = _src_dir;
++	a->src_dentry = _src_dentry;
++	a->src_inode = a->src_dentry->d_inode;
++	a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
++	a->dst_dir = _dst_dir;
++	a->dst_dentry = _dst_dentry;
++	a->dst_inode = a->dst_dentry->d_inode;
++	a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
++	if (a->dst_inode) {
++		IMustLock(a->dst_inode);
++		au_igrab(a->dst_inode);
++	}
++
++	err = -ENOTDIR;
++	flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
++	if (S_ISDIR(a->src_inode->i_mode)) {
++		au_fset_ren(a->flags, ISDIR);
++		if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode)))
++			goto out_free;
++		err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
++						AuLock_DIR | flags);
++	} else
++		err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
++						flags);
++	if (unlikely(err))
++		goto out_free;
++
++	err = au_d_hashed_positive(a->src_dentry);
++	if (unlikely(err))
++		goto out_unlock;
++	err = -ENOENT;
++	if (a->dst_inode) {
++		/*
++		 * If it is a dir, VFS unhash dst_dentry before this
++		 * function. It means we cannot rely upon d_unhashed().
++		 */
++		if (unlikely(!a->dst_inode->i_nlink))
++			goto out_unlock;
++		if (!S_ISDIR(a->dst_inode->i_mode)) {
++			err = au_d_hashed_positive(a->dst_dentry);
++			if (unlikely(err))
++				goto out_unlock;
++		} else if (unlikely(IS_DEADDIR(a->dst_inode)))
++			goto out_unlock;
++	} else if (unlikely(d_unhashed(a->dst_dentry)))
++		goto out_unlock;
++
++	/*
++	 * is it possible?
++	 * yes, it happend (in linux-3.3-rcN) but I don't know why.
++	 * there may exist a problem somewhere else.
++	 */
++	err = -EINVAL;
++	if (unlikely(a->dst_parent->d_inode == a->src_dentry->d_inode))
++		goto out_unlock;
++
++	au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
++	di_write_lock_parent(a->dst_parent);
++
++	/* which branch we process */
++	err = au_ren_wbr(a);
++	if (unlikely(err < 0))
++		goto out_parent;
++	a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
++	a->h_path.mnt = a->br->br_mnt;
++
++	/* are they available to be renamed */
++	err = au_ren_may_dir(a);
++	if (unlikely(err))
++		goto out_children;
++
++	/* prepare the writable parent dir on the same branch */
++	if (a->dst_bstart == a->btgt) {
++		au_fset_ren(a->flags, WHDST);
++	} else {
++		err = au_cpup_dirs(a->dst_dentry, a->btgt);
++		if (unlikely(err))
++			goto out_children;
++	}
++
++	if (a->src_dir != a->dst_dir) {
++		/*
++		 * this temporary unlock is safe,
++		 * because both dir->i_mutex are locked.
++		 */
++		di_write_unlock(a->dst_parent);
++		di_write_lock_parent(a->src_parent);
++		err = au_wr_dir_need_wh(a->src_dentry,
++					au_ftest_ren(a->flags, ISDIR),
++					&a->btgt);
++		di_write_unlock(a->src_parent);
++		di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
++		au_fclr_ren(a->flags, ISSAMEDIR);
++	} else
++		err = au_wr_dir_need_wh(a->src_dentry,
++					au_ftest_ren(a->flags, ISDIR),
++					&a->btgt);
++	if (unlikely(err < 0))
++		goto out_children;
++	if (err)
++		au_fset_ren(a->flags, WHSRC);
++
++	/* lock them all */
++	err = au_ren_lock(a);
++	if (unlikely(err))
++		goto out_children;
++
++	if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
++		err = au_may_ren(a);
++	else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
++		err = -ENAMETOOLONG;
++	if (unlikely(err))
++		goto out_hdir;
++
++	/* store timestamps to be revertible */
++	au_ren_dt(a);
++
++	/* here we go */
++	err = do_rename(a);
++	if (unlikely(err))
++		goto out_dt;
++
++	/* update dir attributes */
++	au_ren_refresh_dir(a);
++
++	/* dput/iput all lower dentries */
++	au_ren_refresh(a);
++
++	goto out_hdir; /* success */
++
++out_dt:
++	au_ren_rev_dt(err, a);
++out_hdir:
++	au_ren_unlock(a);
++out_children:
++	au_nhash_wh_free(&a->whlist);
++	if (err && a->dst_inode && a->dst_bstart != a->btgt) {
++		AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt);
++		au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
++		au_set_dbstart(a->dst_dentry, a->dst_bstart);
++	}
++out_parent:
++	if (!err)
++		d_move(a->src_dentry, a->dst_dentry);
++	else {
++		au_update_dbstart(a->dst_dentry);
++		if (!a->dst_inode)
++			d_drop(a->dst_dentry);
++	}
++	if (au_ftest_ren(a->flags, ISSAMEDIR))
++		di_write_unlock(a->dst_parent);
++	else
++		di_write_unlock2(a->src_parent, a->dst_parent);
++out_unlock:
++	aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
++out_free:
++	iput(a->dst_inode);
++	if (a->thargs)
++		au_whtmp_rmdir_free(a->thargs);
++	kfree(a);
++out:
++	AuTraceErr(err);
++	return err;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/Kconfig	2011-10-25 09:52:26.000000000 +0200
+@@ -0,0 +1,203 @@
++config AUFS_FS
++	tristate "Aufs (Advanced multi layered unification filesystem) support"
++	depends on EXPERIMENTAL
++	help
++	Aufs is a stackable unification filesystem such as Unionfs,
++	which unifies several directories and provides a merged single
++	directory.
++	In the early days, aufs was entirely re-designed and
++	re-implemented Unionfs Version 1.x series. Introducing many
++	original ideas, approaches and improvements, it becomes totally
++	different from Unionfs while keeping the basic features.
++
++if AUFS_FS
++choice
++	prompt "Maximum number of branches"
++	default AUFS_BRANCH_MAX_127
++	help
++	Specifies the maximum number of branches (or member directories)
++	in a single aufs. The larger value consumes more system
++	resources and has a minor impact to performance.
++config AUFS_BRANCH_MAX_127
++	bool "127"
++	help
++	Specifies the maximum number of branches (or member directories)
++	in a single aufs. The larger value consumes more system
++	resources and has a minor impact to performance.
++config AUFS_BRANCH_MAX_511
++	bool "511"
++	help
++	Specifies the maximum number of branches (or member directories)
++	in a single aufs. The larger value consumes more system
++	resources and has a minor impact to performance.
++config AUFS_BRANCH_MAX_1023
++	bool "1023"
++	help
++	Specifies the maximum number of branches (or member directories)
++	in a single aufs. The larger value consumes more system
++	resources and has a minor impact to performance.
++config AUFS_BRANCH_MAX_32767
++	bool "32767"
++	help
++	Specifies the maximum number of branches (or member directories)
++	in a single aufs. The larger value consumes more system
++	resources and has a minor impact to performance.
++endchoice
++
++config AUFS_SBILIST
++	bool
++	depends on AUFS_MAGIC_SYSRQ || PROC_FS
++	default y
++	help
++	Automatic configuration for internal use.
++	When aufs supports Magic SysRq or /proc, enabled automatically.
++
++config AUFS_HNOTIFY
++	bool "Detect direct branch access (bypassing aufs)"
++	help
++	If you want to modify files on branches directly, eg. bypassing aufs,
++	and want aufs to detect the changes of them fully, then enable this
++	option and use 'udba=notify' mount option.
++	Currently there is only one available configuration, "fsnotify".
++	It will have a negative impact to the performance.
++	See detail in aufs.5.
++
++choice
++	prompt "method" if AUFS_HNOTIFY
++	default AUFS_HFSNOTIFY
++config AUFS_HFSNOTIFY
++	bool "fsnotify"
++	select FSNOTIFY
++endchoice
++
++config AUFS_EXPORT
++	bool "NFS-exportable aufs"
++	depends on EXPORTFS
++	help
++	If you want to export your mounted aufs via NFS, then enable this
++	option. There are several requirements for this configuration.
++	See detail in aufs.5.
++
++config AUFS_INO_T_64
++	bool
++	depends on AUFS_EXPORT
++	depends on 64BIT && !(ALPHA || S390)
++	default y
++	help
++	Automatic configuration for internal use.
++	/* typedef unsigned long/int __kernel_ino_t */
++	/* alpha and s390x are int */
++
++config AUFS_RDU
++	bool "Readdir in userspace"
++	help
++	Aufs has two methods to provide a merged view for a directory,
++	by a user-space library and by kernel-space natively. The latter
++	is always enabled but sometimes large and slow.
++	If you enable this option, install the library in aufs2-util
++	package, and set some environment variables for your readdir(3),
++	then the work will be handled in user-space which generally
++	shows better performance in most cases.
++	See detail in aufs.5.
++
++config AUFS_PROC_MAP
++	bool "support for /proc/maps and lsof(1)"
++	depends on PROC_FS
++	help
++	When you issue mmap(2) in aufs, it is actually a direct mmap(2)
++	call to the file on the branch fs since the file in aufs is
++	purely virtual. And the file path printed in /proc/maps (and
++	others) will be the path on the branch fs. In most cases, it
++	does no harm. But some utilities like lsof(1) may confuse since
++	the utility or user may expect the file path in aufs to be
++	printed.
++	To address this issue, aufs provides a patch which introduces a
++	new member called vm_prfile into struct vm_are_struct. The patch
++	is meaningless without enabling this configuration since nobody
++	sets the new vm_prfile member.
++	If you don't apply the patch, then enabling this configuration
++	will cause a compile error.
++	This approach is fragile since if someone else make some changes
++	around vm_file, then vm_prfile may not work anymore. As a
++	workaround such case, aufs provides this configuration. If you
++	disable it, then lsof(1) may produce incorrect result but the
++	problem will be gone even if the aufs patch is applied (I hope).
++
++config AUFS_SP_IATTR
++	bool "Respect the attributes (mtime/ctime mainly) of special files"
++	help
++	When you write something to a special file, some attributes of it
++	(mtime/ctime mainly) may be updated. Generally such updates are
++	less important (actually some device drivers and NFS ignore
++	it). But some applications (such like test program) requires
++	such updates. If you need these updates, then enable this
++	configuration which introduces some overhead.
++	Currently this configuration handles FIFO only.
++
++config AUFS_SHWH
++	bool "Show whiteouts"
++	help
++	If you want to make the whiteouts in aufs visible, then enable
++	this option and specify 'shwh' mount option. Although it may
++	sounds like philosophy or something, but in technically it
++	simply shows the name of whiteout with keeping its behaviour.
++
++config AUFS_BR_RAMFS
++	bool "Ramfs (initramfs/rootfs) as an aufs branch"
++	help
++	If you want to use ramfs as an aufs branch fs, then enable this
++	option. Generally tmpfs is recommended.
++	Aufs prohibited them to be a branch fs by default, because
++	initramfs becomes unusable after switch_root or something
++	generally. If you sets initramfs as an aufs branch and boot your
++	system by switch_root, you will meet a problem easily since the
++	files in initramfs may be inaccessible.
++	Unless you are going to use ramfs as an aufs branch fs without
++	switch_root or something, leave it N.
++
++config AUFS_BR_FUSE
++	bool "Fuse fs as an aufs branch"
++	depends on FUSE_FS
++	select AUFS_POLL
++	help
++	If you want to use fuse-based userspace filesystem as an aufs
++	branch fs, then enable this option.
++	It implements the internal poll(2) operation which is
++	implemented by fuse only (curretnly).
++
++config AUFS_POLL
++	bool
++	help
++	Automatic configuration for internal use.
++
++config AUFS_BR_HFSPLUS
++	bool "Hfsplus as an aufs branch"
++	depends on HFSPLUS_FS
++	default y
++	help
++	If you want to use hfsplus fs as an aufs branch fs, then enable
++	this option. This option introduces a small overhead at
++	copying-up a file on hfsplus.
++
++config AUFS_BDEV_LOOP
++	bool
++	depends on BLK_DEV_LOOP
++	default y
++	help
++	Automatic configuration for internal use.
++	Convert =[ym] into =y.
++
++config AUFS_DEBUG
++	bool "Debug aufs"
++	help
++	Enable this to compile aufs internal debug code.
++	It will have a negative impact to the performance.
++
++config AUFS_MAGIC_SYSRQ
++	bool
++	depends on AUFS_DEBUG && MAGIC_SYSRQ
++	default y
++	help
++	Automatic configuration for internal use.
++	When aufs supports Magic SysRq, enabled automatically.
++endif
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/loop.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,133 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * support for loopback block device as a branch
++ */
++
++#include <linux/loop.h>
++#include "aufs.h"
++
++/*
++ * test if two lower dentries have overlapping branches.
++ */
++int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
++{
++	struct super_block *h_sb;
++	struct loop_device *l;
++
++	h_sb = h_adding->d_sb;
++	if (MAJOR(h_sb->s_dev) != LOOP_MAJOR)
++		return 0;
++
++	l = h_sb->s_bdev->bd_disk->private_data;
++	h_adding = l->lo_backing_file->f_dentry;
++	/*
++	 * h_adding can be local NFS.
++	 * in this case aufs cannot detect the loop.
++	 */
++	if (unlikely(h_adding->d_sb == sb))
++		return 1;
++	return !!au_test_subdir(h_adding, sb->s_root);
++}
++
++/* true if a kernel thread named 'loop[0-9].*' accesses a file */
++int au_test_loopback_kthread(void)
++{
++	int ret;
++	struct task_struct *tsk = current;
++
++	ret = 0;
++	if (tsk->flags & PF_KTHREAD) {
++		const char c = tsk->comm[4];
++		ret = ('0' <= c && c <= '9'
++		       && !strncmp(tsk->comm, "loop", 4));
++	}
++
++	return ret;
++}
++
++/* ---------------------------------------------------------------------- */
++
++#define au_warn_loopback_step	16
++static int au_warn_loopback_nelem = au_warn_loopback_step;
++static unsigned long *au_warn_loopback_array;
++
++void au_warn_loopback(struct super_block *h_sb)
++{
++	int i, new_nelem;
++	unsigned long *a, magic;
++	static DEFINE_SPINLOCK(spin);
++
++	magic = h_sb->s_magic;
++	spin_lock(&spin);
++	a = au_warn_loopback_array;
++	for (i = 0; i < au_warn_loopback_nelem && *a; i++)
++		if (a[i] == magic) {
++			spin_unlock(&spin);
++			return;
++		}
++
++	/* h_sb is new to us, print it */
++	if (i < au_warn_loopback_nelem) {
++		a[i] = magic;
++		goto pr;
++	}
++
++	/* expand the array */
++	new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
++	a = au_kzrealloc(au_warn_loopback_array,
++			 au_warn_loopback_nelem * sizeof(unsigned long),
++			 new_nelem * sizeof(unsigned long), GFP_ATOMIC);
++	if (a) {
++		au_warn_loopback_nelem = new_nelem;
++		au_warn_loopback_array = a;
++		a[i] = magic;
++		goto pr;
++	}
++
++	spin_unlock(&spin);
++	AuWarn1("realloc failed, ignored\n");
++	return;
++
++pr:
++	spin_unlock(&spin);
++	pr_warning("you may want to try another patch for loopback file "
++		   "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
++}
++
++int au_loopback_init(void)
++{
++	int err;
++	struct super_block *sb __maybe_unused;
++
++	AuDebugOn(sizeof(sb->s_magic) != sizeof(unsigned long));
++
++	err = 0;
++	au_warn_loopback_array = kcalloc(au_warn_loopback_step,
++					 sizeof(unsigned long), GFP_NOFS);
++	if (unlikely(!au_warn_loopback_array))
++		err = -ENOMEM;
++
++	return err;
++}
++
++void au_loopback_fin(void)
++{
++	kfree(au_warn_loopback_array);
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/loop.h	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,50 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * support for loopback mount as a branch
++ */
++
++#ifndef __AUFS_LOOP_H__
++#define __AUFS_LOOP_H__
++
++#ifdef __KERNEL__
++
++struct dentry;
++struct super_block;
++
++#ifdef CONFIG_AUFS_BDEV_LOOP
++/* loop.c */
++int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
++int au_test_loopback_kthread(void);
++void au_warn_loopback(struct super_block *h_sb);
++
++int au_loopback_init(void);
++void au_loopback_fin(void);
++#else
++AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
++	   struct dentry *h_adding)
++AuStubInt0(au_test_loopback_kthread, void)
++AuStubVoid(au_warn_loopback, struct super_block *h_sb)
++
++AuStubInt0(au_loopback_init, void)
++AuStubVoid(au_loopback_fin, void)
++#endif /* BLK_DEV_LOOP */
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_LOOP_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/magic.mk	2011-10-25 09:52:26.000000000 +0200
+@@ -0,0 +1,54 @@
++
++# defined in ${srctree}/fs/fuse/inode.c
++# tristate
++ifdef CONFIG_FUSE_FS
++ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
++endif
++
++# defined in ${srctree}/fs/ocfs2/ocfs2_fs.h
++# tristate
++ifdef CONFIG_OCFS2_FS
++ccflags-y += -DOCFS2_SUPER_MAGIC=0x7461636f
++endif
++
++# defined in ${srctree}/fs/ocfs2/dlm/userdlm.h
++# tristate
++ifdef CONFIG_OCFS2_FS_O2CB
++ccflags-y += -DDLMFS_MAGIC=0x76a9f425
++endif
++
++# defined in ${srctree}/fs/cifs/cifsfs.c
++# tristate
++ifdef CONFIG_CIFS_FS
++ccflags-y += -DCIFS_MAGIC_NUMBER=0xFF534D42
++endif
++
++# defined in ${srctree}/fs/xfs/xfs_sb.h
++# tristate
++ifdef CONFIG_XFS_FS
++ccflags-y += -DXFS_SB_MAGIC=0x58465342
++endif
++
++# defined in ${srctree}/fs/configfs/mount.c
++# tristate
++ifdef CONFIG_CONFIGFS_FS
++ccflags-y += -DCONFIGFS_MAGIC=0x62656570
++endif
++
++# defined in ${srctree}/fs/9p/v9fs.h
++# tristate
++ifdef CONFIG_9P_FS
++ccflags-y += -DV9FS_MAGIC=0x01021997
++endif
++
++# defined in ${srctree}/fs/ubifs/ubifs.h
++# tristate
++ifdef CONFIG_UBIFS_FS
++ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
++endif
++
++# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
++# tristate
++ifdef CONFIG_HFSPLUS_FS
++ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
++endif
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/Makefile	2012-03-20 17:31:17.000000000 +0100
+@@ -0,0 +1,42 @@
++
++include ${src}/magic.mk
++ifeq (${CONFIG_AUFS_FS},m)
++include ${src}/conf.mk
++endif
++-include ${src}/priv_def.mk
++
++# cf. include/linux/kernel.h
++# enable pr_debug
++ccflags-y += -DDEBUG
++# sparse requires the full pathname
++ifdef M
++ccflags-y += -include ${M}/../../include/linux/aufs_type.h
++else
++ccflags-y += -include ${srctree}/include/linux/aufs_type.h
++endif
++
++obj-$(CONFIG_AUFS_FS) += aufs.o
++aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
++	wkq.o vfsub.o dcsub.o \
++	cpup.o whout.o wbr_policy.o \
++	dinfo.o dentry.o \
++	dynop.o \
++	finfo.o file.o f_op.o \
++	dir.o vdir.o \
++	iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
++	ioctl.o
++
++# all are boolean
++aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
++aufs-$(CONFIG_SYSFS) += sysfs.o
++aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
++aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
++aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
++aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
++aufs-$(CONFIG_AUFS_EXPORT) += export.o
++aufs-$(CONFIG_AUFS_POLL) += poll.o
++aufs-$(CONFIG_AUFS_RDU) += rdu.o
++aufs-$(CONFIG_AUFS_SP_IATTR) += f_op_sp.o
++aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
++aufs-$(CONFIG_AUFS_DEBUG) += debug.o
++aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/module.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,196 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * module global variables and operations
++ */
++
++#include <linux/module.h>
++#include <linux/seq_file.h>
++#include "aufs.h"
++
++void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
++{
++	if (new_sz <= nused)
++		return p;
++
++	p = krealloc(p, new_sz, gfp);
++	if (p)
++		memset(p + nused, 0, new_sz - nused);
++	return p;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * aufs caches
++ */
++struct kmem_cache *au_cachep[AuCache_Last];
++static int __init au_cache_init(void)
++{
++	au_cachep[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
++	if (au_cachep[AuCache_DINFO])
++		/* SLAB_DESTROY_BY_RCU */
++		au_cachep[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
++							au_icntnr_init_once);
++	if (au_cachep[AuCache_ICNTNR])
++		au_cachep[AuCache_FINFO] = AuCacheCtor(au_finfo,
++						       au_fi_init_once);
++	if (au_cachep[AuCache_FINFO])
++		au_cachep[AuCache_VDIR] = AuCache(au_vdir);
++	if (au_cachep[AuCache_VDIR])
++		au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
++	if (au_cachep[AuCache_DEHSTR])
++		return 0;
++
++	return -ENOMEM;
++}
++
++static void au_cache_fin(void)
++{
++	int i;
++
++	/* excluding AuCache_HNOTIFY */
++	BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
++	for (i = 0; i < AuCache_HNOTIFY; i++)
++		if (au_cachep[i]) {
++			kmem_cache_destroy(au_cachep[i]);
++			au_cachep[i] = NULL;
++		}
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_dir_roflags;
++
++#ifdef CONFIG_AUFS_SBILIST
++/*
++ * iterate_supers_type() doesn't protect us from
++ * remounting (branch management)
++ */
++struct au_splhead au_sbilist;
++#endif
++
++struct lock_class_key au_lc_key[AuLcKey_Last];
++
++/*
++ * functions for module interface.
++ */
++MODULE_LICENSE("GPL");
++/* MODULE_LICENSE("GPL v2"); */
++MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
++MODULE_DESCRIPTION(AUFS_NAME
++	" -- Advanced multi layered unification filesystem");
++MODULE_VERSION(AUFS_VERSION);
++
++/* this module parameter has no meaning when SYSFS is disabled */
++int sysaufs_brs = 1;
++MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
++module_param_named(brs, sysaufs_brs, int, S_IRUGO);
++
++/* ---------------------------------------------------------------------- */
++
++static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
++
++int au_seq_path(struct seq_file *seq, struct path *path)
++{
++	return seq_path(seq, path, au_esc_chars);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int __init aufs_init(void)
++{
++	int err, i;
++	char *p;
++
++	p = au_esc_chars;
++	for (i = 1; i <= ' '; i++)
++		*p++ = i;
++	*p++ = '\\';
++	*p++ = '\x7f';
++	*p = 0;
++
++	au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
++
++	au_sbilist_init();
++	sysaufs_brs_init();
++	au_debug_init();
++	au_dy_init();
++	err = sysaufs_init();
++	if (unlikely(err))
++		goto out;
++	err = au_procfs_init();
++	if (unlikely(err))
++		goto out_sysaufs;
++	err = au_wkq_init();
++	if (unlikely(err))
++		goto out_procfs;
++	err = au_loopback_init();
++	if (unlikely(err))
++		goto out_wkq;
++	err = au_hnotify_init();
++	if (unlikely(err))
++		goto out_loopback;
++	err = au_sysrq_init();
++	if (unlikely(err))
++		goto out_hin;
++	err = au_cache_init();
++	if (unlikely(err))
++		goto out_sysrq;
++	err = register_filesystem(&aufs_fs_type);
++	if (unlikely(err))
++		goto out_cache;
++	/* since we define pr_fmt, call printk directly */
++	printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
++	goto out; /* success */
++
++out_cache:
++	au_cache_fin();
++out_sysrq:
++	au_sysrq_fin();
++out_hin:
++	au_hnotify_fin();
++out_loopback:
++	au_loopback_fin();
++out_wkq:
++	au_wkq_fin();
++out_procfs:
++	au_procfs_fin();
++out_sysaufs:
++	sysaufs_fin();
++	au_dy_fin();
++out:
++	return err;
++}
++
++static void __exit aufs_exit(void)
++{
++	unregister_filesystem(&aufs_fs_type);
++	au_cache_fin();
++	au_sysrq_fin();
++	au_hnotify_fin();
++	au_loopback_fin();
++	au_wkq_fin();
++	au_procfs_fin();
++	sysaufs_fin();
++	au_dy_fin();
++}
++
++module_init(aufs_init);
++module_exit(aufs_exit);
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/module.h	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,105 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * module initialization and module-global
++ */
++
++#ifndef __AUFS_MODULE_H__
++#define __AUFS_MODULE_H__
++
++#ifdef __KERNEL__
++
++#include <linux/slab.h>
++
++struct path;
++struct seq_file;
++
++/* module parameters */
++extern int sysaufs_brs;
++
++/* ---------------------------------------------------------------------- */
++
++extern int au_dir_roflags;
++
++enum {
++	AuLcNonDir_FIINFO,
++	AuLcNonDir_DIINFO,
++	AuLcNonDir_IIINFO,
++
++	AuLcDir_FIINFO,
++	AuLcDir_DIINFO,
++	AuLcDir_IIINFO,
++
++	AuLcSymlink_DIINFO,
++	AuLcSymlink_IIINFO,
++
++	AuLcKey_Last
++};
++extern struct lock_class_key au_lc_key[AuLcKey_Last];
++
++void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
++int au_seq_path(struct seq_file *seq, struct path *path);
++
++#ifdef CONFIG_PROC_FS
++/* procfs.c */
++int __init au_procfs_init(void);
++void au_procfs_fin(void);
++#else
++AuStubInt0(au_procfs_init, void);
++AuStubVoid(au_procfs_fin, void);
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++/* kmem cache */
++enum {
++	AuCache_DINFO,
++	AuCache_ICNTNR,
++	AuCache_FINFO,
++	AuCache_VDIR,
++	AuCache_DEHSTR,
++	AuCache_HNOTIFY, /* must be last */
++	AuCache_Last
++};
++
++#define AuCacheFlags		(SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
++#define AuCache(type)		KMEM_CACHE(type, AuCacheFlags)
++#define AuCacheCtor(type, ctor)	\
++	kmem_cache_create(#type, sizeof(struct type), \
++			  __alignof__(struct type), AuCacheFlags, ctor)
++
++extern struct kmem_cache *au_cachep[];
++
++#define AuCacheFuncs(name, index) \
++static inline struct au_##name *au_cache_alloc_##name(void) \
++{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \
++static inline void au_cache_free_##name(struct au_##name *p) \
++{ kmem_cache_free(au_cachep[AuCache_##index], p); }
++
++AuCacheFuncs(dinfo, DINFO);
++AuCacheFuncs(icntnr, ICNTNR);
++AuCacheFuncs(finfo, FINFO);
++AuCacheFuncs(vdir, VDIR);
++AuCacheFuncs(vdir_dehstr, DEHSTR);
++#ifdef CONFIG_AUFS_HNOTIFY
++AuCacheFuncs(hnotify, HNOTIFY);
++#endif
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_MODULE_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/opts.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,1677 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * mount options/flags
++ */
++
++#include <linux/namei.h>
++#include <linux/types.h> /* a distribution requires */
++#include <linux/parser.h>
++#include "aufs.h"
++
++/* ---------------------------------------------------------------------- */
++
++enum {
++	Opt_br,
++	Opt_add, Opt_del, Opt_mod, Opt_reorder, Opt_append, Opt_prepend,
++	Opt_idel, Opt_imod, Opt_ireorder,
++	Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash, Opt_rendir,
++	Opt_rdblk_def, Opt_rdhash_def,
++	Opt_xino, Opt_zxino, Opt_noxino,
++	Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
++	Opt_trunc_xino_path, Opt_itrunc_xino,
++	Opt_trunc_xib, Opt_notrunc_xib,
++	Opt_shwh, Opt_noshwh,
++	Opt_plink, Opt_noplink, Opt_list_plink,
++	Opt_udba,
++	Opt_dio, Opt_nodio,
++	/* Opt_lock, Opt_unlock, */
++	Opt_cmd, Opt_cmd_args,
++	Opt_diropq_a, Opt_diropq_w,
++	Opt_warn_perm, Opt_nowarn_perm,
++	Opt_wbr_copyup, Opt_wbr_create,
++	Opt_refrof, Opt_norefrof,
++	Opt_verbose, Opt_noverbose,
++	Opt_sum, Opt_nosum, Opt_wsum,
++	Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
++};
++
++static match_table_t options = {
++	{Opt_br, "br=%s"},
++	{Opt_br, "br:%s"},
++
++	{Opt_add, "add=%d:%s"},
++	{Opt_add, "add:%d:%s"},
++	{Opt_add, "ins=%d:%s"},
++	{Opt_add, "ins:%d:%s"},
++	{Opt_append, "append=%s"},
++	{Opt_append, "append:%s"},
++	{Opt_prepend, "prepend=%s"},
++	{Opt_prepend, "prepend:%s"},
++
++	{Opt_del, "del=%s"},
++	{Opt_del, "del:%s"},
++	/* {Opt_idel, "idel:%d"}, */
++	{Opt_mod, "mod=%s"},
++	{Opt_mod, "mod:%s"},
++	/* {Opt_imod, "imod:%d:%s"}, */
++
++	{Opt_dirwh, "dirwh=%d"},
++
++	{Opt_xino, "xino=%s"},
++	{Opt_noxino, "noxino"},
++	{Opt_trunc_xino, "trunc_xino"},
++	{Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
++	{Opt_notrunc_xino, "notrunc_xino"},
++	{Opt_trunc_xino_path, "trunc_xino=%s"},
++	{Opt_itrunc_xino, "itrunc_xino=%d"},
++	/* {Opt_zxino, "zxino=%s"}, */
++	{Opt_trunc_xib, "trunc_xib"},
++	{Opt_notrunc_xib, "notrunc_xib"},
++
++#ifdef CONFIG_PROC_FS
++	{Opt_plink, "plink"},
++#else
++	{Opt_ignore_silent, "plink"},
++#endif
++
++	{Opt_noplink, "noplink"},
++
++#ifdef CONFIG_AUFS_DEBUG
++	{Opt_list_plink, "list_plink"},
++#endif
++
++	{Opt_udba, "udba=%s"},
++
++	{Opt_dio, "dio"},
++	{Opt_nodio, "nodio"},
++
++	{Opt_diropq_a, "diropq=always"},
++	{Opt_diropq_a, "diropq=a"},
++	{Opt_diropq_w, "diropq=whiteouted"},
++	{Opt_diropq_w, "diropq=w"},
++
++	{Opt_warn_perm, "warn_perm"},
++	{Opt_nowarn_perm, "nowarn_perm"},
++
++	/* keep them temporary */
++	{Opt_ignore_silent, "coo=%s"},
++	{Opt_ignore_silent, "nodlgt"},
++	{Opt_ignore_silent, "nodirperm1"},
++	{Opt_ignore_silent, "clean_plink"},
++
++#ifdef CONFIG_AUFS_SHWH
++	{Opt_shwh, "shwh"},
++#endif
++	{Opt_noshwh, "noshwh"},
++
++	{Opt_rendir, "rendir=%d"},
++
++	{Opt_refrof, "refrof"},
++	{Opt_norefrof, "norefrof"},
++
++	{Opt_verbose, "verbose"},
++	{Opt_verbose, "v"},
++	{Opt_noverbose, "noverbose"},
++	{Opt_noverbose, "quiet"},
++	{Opt_noverbose, "q"},
++	{Opt_noverbose, "silent"},
++
++	{Opt_sum, "sum"},
++	{Opt_nosum, "nosum"},
++	{Opt_wsum, "wsum"},
++
++	{Opt_rdcache, "rdcache=%d"},
++	{Opt_rdblk, "rdblk=%d"},
++	{Opt_rdblk_def, "rdblk=def"},
++	{Opt_rdhash, "rdhash=%d"},
++	{Opt_rdhash_def, "rdhash=def"},
++
++	{Opt_wbr_create, "create=%s"},
++	{Opt_wbr_create, "create_policy=%s"},
++	{Opt_wbr_copyup, "cpup=%s"},
++	{Opt_wbr_copyup, "copyup=%s"},
++	{Opt_wbr_copyup, "copyup_policy=%s"},
++
++	/* internal use for the scripts */
++	{Opt_ignore_silent, "si=%s"},
++
++	{Opt_br, "dirs=%s"},
++	{Opt_ignore, "debug=%d"},
++	{Opt_ignore, "delete=whiteout"},
++	{Opt_ignore, "delete=all"},
++	{Opt_ignore, "imap=%s"},
++
++	/* temporary workaround, due to old mount(8)? */
++	{Opt_ignore_silent, "relatime"},
++
++	{Opt_err, NULL}
++};
++
++/* ---------------------------------------------------------------------- */
++
++static const char *au_parser_pattern(int val, struct match_token *token)
++{
++	while (token->pattern) {
++		if (token->token == val)
++			return token->pattern;
++		token++;
++	}
++	BUG();
++	return "??";
++}
++
++/* ---------------------------------------------------------------------- */
++
++static match_table_t brperm = {
++	{AuBrPerm_RO, AUFS_BRPERM_RO},
++	{AuBrPerm_RR, AUFS_BRPERM_RR},
++	{AuBrPerm_RW, AUFS_BRPERM_RW},
++	{0, NULL}
++};
++
++static match_table_t brrattr = {
++	{AuBrRAttr_WH, AUFS_BRRATTR_WH},
++	{0, NULL}
++};
++
++static match_table_t brwattr = {
++	{AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
++	{0, NULL}
++};
++
++#define AuBrStr_LONGEST	AUFS_BRPERM_RW "+" AUFS_BRWATTR_NLWH
++
++static int br_attr_val(char *str, match_table_t table, substring_t args[])
++{
++	int attr, v;
++	char *p;
++
++	attr = 0;
++	do {
++		p = strchr(str, '+');
++		if (p)
++			*p = 0;
++		v = match_token(str, table, args);
++		if (v)
++			attr |= v;
++		else {
++			if (p)
++				*p = '+';
++			pr_warning("ignored branch attribute %s\n", str);
++			break;
++		}
++		if (p)
++			str = p + 1;
++	} while (p);
++
++	return attr;
++}
++
++static int noinline_for_stack br_perm_val(char *perm)
++{
++	int val;
++	char *p;
++	substring_t args[MAX_OPT_ARGS];
++
++	p = strchr(perm, '+');
++	if (p)
++		*p = 0;
++	val = match_token(perm, brperm, args);
++	if (!val) {
++		if (p)
++			*p = '+';
++		pr_warning("ignored branch permission %s\n", perm);
++		val = AuBrPerm_RO;
++		goto out;
++	}
++	if (!p)
++		goto out;
++
++	switch (val) {
++	case AuBrPerm_RO:
++	case AuBrPerm_RR:
++		val |= br_attr_val(p + 1, brrattr, args);
++		break;
++	case AuBrPerm_RW:
++		val |= br_attr_val(p + 1, brwattr, args);
++		break;
++	}
++
++out:
++	return val;
++}
++
++/* Caller should free the return value */
++char *au_optstr_br_perm(int brperm)
++{
++	char *p, a[sizeof(AuBrStr_LONGEST)];
++	int sz;
++
++#define SetPerm(str) do {			\
++		sz = sizeof(str);		\
++		memcpy(a, str, sz);		\
++		p = a + sz - 1;			\
++	} while (0)
++
++#define AppendAttr(flag, str) do {			\
++		if (brperm & flag) {		\
++			sz = sizeof(str);	\
++			*p++ = '+';		\
++			memcpy(p, str, sz);	\
++			p += sz - 1;		\
++		}				\
++	} while (0)
++
++	switch (brperm & AuBrPerm_Mask) {
++	case AuBrPerm_RO:
++		SetPerm(AUFS_BRPERM_RO);
++		break;
++	case AuBrPerm_RR:
++		SetPerm(AUFS_BRPERM_RR);
++		break;
++	case AuBrPerm_RW:
++		SetPerm(AUFS_BRPERM_RW);
++		break;
++	default:
++		AuDebugOn(1);
++	}
++
++	AppendAttr(AuBrRAttr_WH, AUFS_BRRATTR_WH);
++	AppendAttr(AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH);
++
++	AuDebugOn(strlen(a) >= sizeof(a));
++	return kstrdup(a, GFP_NOFS);
++#undef SetPerm
++#undef AppendAttr
++}
++
++/* ---------------------------------------------------------------------- */
++
++static match_table_t udbalevel = {
++	{AuOpt_UDBA_REVAL, "reval"},
++	{AuOpt_UDBA_NONE, "none"},
++#ifdef CONFIG_AUFS_HNOTIFY
++	{AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
++#ifdef CONFIG_AUFS_HFSNOTIFY
++	{AuOpt_UDBA_HNOTIFY, "fsnotify"},
++#endif
++#endif
++	{-1, NULL}
++};
++
++static int noinline_for_stack udba_val(char *str)
++{
++	substring_t args[MAX_OPT_ARGS];
++
++	return match_token(str, udbalevel, args);
++}
++
++const char *au_optstr_udba(int udba)
++{
++	return au_parser_pattern(udba, (void *)udbalevel);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static match_table_t au_wbr_create_policy = {
++	{AuWbrCreate_TDP, "tdp"},
++	{AuWbrCreate_TDP, "top-down-parent"},
++	{AuWbrCreate_RR, "rr"},
++	{AuWbrCreate_RR, "round-robin"},
++	{AuWbrCreate_MFS, "mfs"},
++	{AuWbrCreate_MFS, "most-free-space"},
++	{AuWbrCreate_MFSV, "mfs:%d"},
++	{AuWbrCreate_MFSV, "most-free-space:%d"},
++
++	{AuWbrCreate_MFSRR, "mfsrr:%d"},
++	{AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
++	{AuWbrCreate_PMFS, "pmfs"},
++	{AuWbrCreate_PMFSV, "pmfs:%d"},
++
++	{-1, NULL}
++};
++
++/*
++ * cf. linux/lib/parser.c and cmdline.c
++ * gave up calling memparse() since it uses simple_strtoull() instead of
++ * kstrto...().
++ */
++static int noinline_for_stack
++au_match_ull(substring_t *s, unsigned long long *result)
++{
++	int err;
++	unsigned int len;
++	char a[32];
++
++	err = -ERANGE;
++	len = s->to - s->from;
++	if (len + 1 <= sizeof(a)) {
++		memcpy(a, s->from, len);
++		a[len] = '\0';
++		err = kstrtoull(a, 0, result);
++	}
++	return err;
++}
++
++static int au_wbr_mfs_wmark(substring_t *arg, char *str,
++			    struct au_opt_wbr_create *create)
++{
++	int err;
++	unsigned long long ull;
++
++	err = 0;
++	if (!au_match_ull(arg, &ull))
++		create->mfsrr_watermark = ull;
++	else {
++		pr_err("bad integer in %s\n", str);
++		err = -EINVAL;
++	}
++
++	return err;
++}
++
++static int au_wbr_mfs_sec(substring_t *arg, char *str,
++			  struct au_opt_wbr_create *create)
++{
++	int n, err;
++
++	err = 0;
++	if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
++		create->mfs_second = n;
++	else {
++		pr_err("bad integer in %s\n", str);
++		err = -EINVAL;
++	}
++
++	return err;
++}
++
++static int noinline_for_stack
++au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
++{
++	int err, e;
++	substring_t args[MAX_OPT_ARGS];
++
++	err = match_token(str, au_wbr_create_policy, args);
++	create->wbr_create = err;
++	switch (err) {
++	case AuWbrCreate_MFSRRV:
++		e = au_wbr_mfs_wmark(&args[0], str, create);
++		if (!e)
++			e = au_wbr_mfs_sec(&args[1], str, create);
++		if (unlikely(e))
++			err = e;
++		break;
++	case AuWbrCreate_MFSRR:
++		e = au_wbr_mfs_wmark(&args[0], str, create);
++		if (unlikely(e)) {
++			err = e;
++			break;
++		}
++		/*FALLTHROUGH*/
++	case AuWbrCreate_MFS:
++	case AuWbrCreate_PMFS:
++		create->mfs_second = AUFS_MFS_DEF_SEC;
++		break;
++	case AuWbrCreate_MFSV:
++	case AuWbrCreate_PMFSV:
++		e = au_wbr_mfs_sec(&args[0], str, create);
++		if (unlikely(e))
++			err = e;
++		break;
++	}
++
++	return err;
++}
++
++const char *au_optstr_wbr_create(int wbr_create)
++{
++	return au_parser_pattern(wbr_create, (void *)au_wbr_create_policy);
++}
++
++static match_table_t au_wbr_copyup_policy = {
++	{AuWbrCopyup_TDP, "tdp"},
++	{AuWbrCopyup_TDP, "top-down-parent"},
++	{AuWbrCopyup_BUP, "bup"},
++	{AuWbrCopyup_BUP, "bottom-up-parent"},
++	{AuWbrCopyup_BU, "bu"},
++	{AuWbrCopyup_BU, "bottom-up"},
++	{-1, NULL}
++};
++
++static int noinline_for_stack au_wbr_copyup_val(char *str)
++{
++	substring_t args[MAX_OPT_ARGS];
++
++	return match_token(str, au_wbr_copyup_policy, args);
++}
++
++const char *au_optstr_wbr_copyup(int wbr_copyup)
++{
++	return au_parser_pattern(wbr_copyup, (void *)au_wbr_copyup_policy);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
++
++static void dump_opts(struct au_opts *opts)
++{
++#ifdef CONFIG_AUFS_DEBUG
++	/* reduce stack space */
++	union {
++		struct au_opt_add *add;
++		struct au_opt_del *del;
++		struct au_opt_mod *mod;
++		struct au_opt_xino *xino;
++		struct au_opt_xino_itrunc *xino_itrunc;
++		struct au_opt_wbr_create *create;
++	} u;
++	struct au_opt *opt;
++
++	opt = opts->opt;
++	while (opt->type != Opt_tail) {
++		switch (opt->type) {
++		case Opt_add:
++			u.add = &opt->add;
++			AuDbg("add {b%d, %s, 0x%x, %p}\n",
++				  u.add->bindex, u.add->pathname, u.add->perm,
++				  u.add->path.dentry);
++			break;
++		case Opt_del:
++		case Opt_idel:
++			u.del = &opt->del;
++			AuDbg("del {%s, %p}\n",
++			      u.del->pathname, u.del->h_path.dentry);
++			break;
++		case Opt_mod:
++		case Opt_imod:
++			u.mod = &opt->mod;
++			AuDbg("mod {%s, 0x%x, %p}\n",
++				  u.mod->path, u.mod->perm, u.mod->h_root);
++			break;
++		case Opt_append:
++			u.add = &opt->add;
++			AuDbg("append {b%d, %s, 0x%x, %p}\n",
++				  u.add->bindex, u.add->pathname, u.add->perm,
++				  u.add->path.dentry);
++			break;
++		case Opt_prepend:
++			u.add = &opt->add;
++			AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
++				  u.add->bindex, u.add->pathname, u.add->perm,
++				  u.add->path.dentry);
++			break;
++		case Opt_dirwh:
++			AuDbg("dirwh %d\n", opt->dirwh);
++			break;
++		case Opt_rdcache:
++			AuDbg("rdcache %d\n", opt->rdcache);
++			break;
++		case Opt_rdblk:
++			AuDbg("rdblk %u\n", opt->rdblk);
++			break;
++		case Opt_rdblk_def:
++			AuDbg("rdblk_def\n");
++			break;
++		case Opt_rdhash:
++			AuDbg("rdhash %u\n", opt->rdhash);
++			break;
++		case Opt_rdhash_def:
++			AuDbg("rdhash_def\n");
++			break;
++		case Opt_xino:
++			u.xino = &opt->xino;
++			AuDbg("xino {%s %.*s}\n",
++				  u.xino->path,
++				  AuDLNPair(u.xino->file->f_dentry));
++			break;
++		case Opt_trunc_xino:
++			AuLabel(trunc_xino);
++			break;
++		case Opt_notrunc_xino:
++			AuLabel(notrunc_xino);
++			break;
++		case Opt_trunc_xino_path:
++		case Opt_itrunc_xino:
++			u.xino_itrunc = &opt->xino_itrunc;
++			AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
++			break;
++
++		case Opt_noxino:
++			AuLabel(noxino);
++			break;
++		case Opt_trunc_xib:
++			AuLabel(trunc_xib);
++			break;
++		case Opt_notrunc_xib:
++			AuLabel(notrunc_xib);
++			break;
++		case Opt_shwh:
++			AuLabel(shwh);
++			break;
++		case Opt_noshwh:
++			AuLabel(noshwh);
++			break;
++		case Opt_plink:
++			AuLabel(plink);
++			break;
++		case Opt_noplink:
++			AuLabel(noplink);
++			break;
++		case Opt_list_plink:
++			AuLabel(list_plink);
++			break;
++		case Opt_udba:
++			AuDbg("udba %d, %s\n",
++				  opt->udba, au_optstr_udba(opt->udba));
++			break;
++		case Opt_dio:
++			AuLabel(dio);
++			break;
++		case Opt_nodio:
++			AuLabel(nodio);
++			break;
++		case Opt_diropq_a:
++			AuLabel(diropq_a);
++			break;
++		case Opt_diropq_w:
++			AuLabel(diropq_w);
++			break;
++		case Opt_warn_perm:
++			AuLabel(warn_perm);
++			break;
++		case Opt_nowarn_perm:
++			AuLabel(nowarn_perm);
++			break;
++		case Opt_refrof:
++			AuLabel(refrof);
++			break;
++		case Opt_norefrof:
++			AuLabel(norefrof);
++			break;
++		case Opt_verbose:
++			AuLabel(verbose);
++			break;
++		case Opt_noverbose:
++			AuLabel(noverbose);
++			break;
++		case Opt_sum:
++			AuLabel(sum);
++			break;
++		case Opt_nosum:
++			AuLabel(nosum);
++			break;
++		case Opt_wsum:
++			AuLabel(wsum);
++			break;
++		case Opt_wbr_create:
++			u.create = &opt->wbr_create;
++			AuDbg("create %d, %s\n", u.create->wbr_create,
++				  au_optstr_wbr_create(u.create->wbr_create));
++			switch (u.create->wbr_create) {
++			case AuWbrCreate_MFSV:
++			case AuWbrCreate_PMFSV:
++				AuDbg("%d sec\n", u.create->mfs_second);
++				break;
++			case AuWbrCreate_MFSRR:
++				AuDbg("%llu watermark\n",
++					  u.create->mfsrr_watermark);
++				break;
++			case AuWbrCreate_MFSRRV:
++				AuDbg("%llu watermark, %d sec\n",
++					  u.create->mfsrr_watermark,
++					  u.create->mfs_second);
++				break;
++			}
++			break;
++		case Opt_wbr_copyup:
++			AuDbg("copyup %d, %s\n", opt->wbr_copyup,
++				  au_optstr_wbr_copyup(opt->wbr_copyup));
++			break;
++		default:
++			BUG();
++		}
++		opt++;
++	}
++#endif
++}
++
++void au_opts_free(struct au_opts *opts)
++{
++	struct au_opt *opt;
++
++	opt = opts->opt;
++	while (opt->type != Opt_tail) {
++		switch (opt->type) {
++		case Opt_add:
++		case Opt_append:
++		case Opt_prepend:
++			path_put(&opt->add.path);
++			break;
++		case Opt_del:
++		case Opt_idel:
++			path_put(&opt->del.h_path);
++			break;
++		case Opt_mod:
++		case Opt_imod:
++			dput(opt->mod.h_root);
++			break;
++		case Opt_xino:
++			fput(opt->xino.file);
++			break;
++		}
++		opt++;
++	}
++}
++
++static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
++		   aufs_bindex_t bindex)
++{
++	int err;
++	struct au_opt_add *add = &opt->add;
++	char *p;
++
++	add->bindex = bindex;
++	add->perm = AuBrPerm_RO;
++	add->pathname = opt_str;
++	p = strchr(opt_str, '=');
++	if (p) {
++		*p++ = 0;
++		if (*p)
++			add->perm = br_perm_val(p);
++	}
++
++	err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
++	if (!err) {
++		if (!p) {
++			add->perm = AuBrPerm_RO;
++			if (au_test_fs_rr(add->path.dentry->d_sb))
++				add->perm = AuBrPerm_RR;
++			else if (!bindex && !(sb_flags & MS_RDONLY))
++				add->perm = AuBrPerm_RW;
++		}
++		opt->type = Opt_add;
++		goto out;
++	}
++	pr_err("lookup failed %s (%d)\n", add->pathname, err);
++	err = -EINVAL;
++
++out:
++	return err;
++}
++
++static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
++{
++	int err;
++
++	del->pathname = args[0].from;
++	AuDbg("del path %s\n", del->pathname);
++
++	err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
++	if (unlikely(err))
++		pr_err("lookup failed %s (%d)\n", del->pathname, err);
++
++	return err;
++}
++
++#if 0 /* reserved for future use */
++static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
++			      struct au_opt_del *del, substring_t args[])
++{
++	int err;
++	struct dentry *root;
++
++	err = -EINVAL;
++	root = sb->s_root;
++	aufs_read_lock(root, AuLock_FLUSH);
++	if (bindex < 0 || au_sbend(sb) < bindex) {
++		pr_err("out of bounds, %d\n", bindex);
++		goto out;
++	}
++
++	err = 0;
++	del->h_path.dentry = dget(au_h_dptr(root, bindex));
++	del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
++
++out:
++	aufs_read_unlock(root, !AuLock_IR);
++	return err;
++}
++#endif
++
++static int noinline_for_stack
++au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
++{
++	int err;
++	struct path path;
++	char *p;
++
++	err = -EINVAL;
++	mod->path = args[0].from;
++	p = strchr(mod->path, '=');
++	if (unlikely(!p)) {
++		pr_err("no permssion %s\n", args[0].from);
++		goto out;
++	}
++
++	*p++ = 0;
++	err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
++	if (unlikely(err)) {
++		pr_err("lookup failed %s (%d)\n", mod->path, err);
++		goto out;
++	}
++
++	mod->perm = br_perm_val(p);
++	AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
++	mod->h_root = dget(path.dentry);
++	path_put(&path);
++
++out:
++	return err;
++}
++
++#if 0 /* reserved for future use */
++static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
++			      struct au_opt_mod *mod, substring_t args[])
++{
++	int err;
++	struct dentry *root;
++
++	err = -EINVAL;
++	root = sb->s_root;
++	aufs_read_lock(root, AuLock_FLUSH);
++	if (bindex < 0 || au_sbend(sb) < bindex) {
++		pr_err("out of bounds, %d\n", bindex);
++		goto out;
++	}
++
++	err = 0;
++	mod->perm = br_perm_val(args[1].from);
++	AuDbg("mod path %s, perm 0x%x, %s\n",
++	      mod->path, mod->perm, args[1].from);
++	mod->h_root = dget(au_h_dptr(root, bindex));
++
++out:
++	aufs_read_unlock(root, !AuLock_IR);
++	return err;
++}
++#endif
++
++static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
++			      substring_t args[])
++{
++	int err;
++	struct file *file;
++
++	file = au_xino_create(sb, args[0].from, /*silent*/0);
++	err = PTR_ERR(file);
++	if (IS_ERR(file))
++		goto out;
++
++	err = -EINVAL;
++	if (unlikely(file->f_dentry->d_sb == sb)) {
++		fput(file);
++		pr_err("%s must be outside\n", args[0].from);
++		goto out;
++	}
++
++	err = 0;
++	xino->file = file;
++	xino->path = args[0].from;
++
++out:
++	return err;
++}
++
++static int noinline_for_stack
++au_opts_parse_xino_itrunc_path(struct super_block *sb,
++			       struct au_opt_xino_itrunc *xino_itrunc,
++			       substring_t args[])
++{
++	int err;
++	aufs_bindex_t bend, bindex;
++	struct path path;
++	struct dentry *root;
++
++	err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
++	if (unlikely(err)) {
++		pr_err("lookup failed %s (%d)\n", args[0].from, err);
++		goto out;
++	}
++
++	xino_itrunc->bindex = -1;
++	root = sb->s_root;
++	aufs_read_lock(root, AuLock_FLUSH);
++	bend = au_sbend(sb);
++	for (bindex = 0; bindex <= bend; bindex++) {
++		if (au_h_dptr(root, bindex) == path.dentry) {
++			xino_itrunc->bindex = bindex;
++			break;
++		}
++	}
++	aufs_read_unlock(root, !AuLock_IR);
++	path_put(&path);
++
++	if (unlikely(xino_itrunc->bindex < 0)) {
++		pr_err("no such branch %s\n", args[0].from);
++		err = -EINVAL;
++	}
++
++out:
++	return err;
++}
++
++/* called without aufs lock */
++int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
++{
++	int err, n, token;
++	aufs_bindex_t bindex;
++	unsigned char skipped;
++	struct dentry *root;
++	struct au_opt *opt, *opt_tail;
++	char *opt_str;
++	/* reduce the stack space */
++	union {
++		struct au_opt_xino_itrunc *xino_itrunc;
++		struct au_opt_wbr_create *create;
++	} u;
++	struct {
++		substring_t args[MAX_OPT_ARGS];
++	} *a;
++
++	err = -ENOMEM;
++	a = kmalloc(sizeof(*a), GFP_NOFS);
++	if (unlikely(!a))
++		goto out;
++
++	root = sb->s_root;
++	err = 0;
++	bindex = 0;
++	opt = opts->opt;
++	opt_tail = opt + opts->max_opt - 1;
++	opt->type = Opt_tail;
++	while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
++		err = -EINVAL;
++		skipped = 0;
++		token = match_token(opt_str, options, a->args);
++		switch (token) {
++		case Opt_br:
++			err = 0;
++			while (!err && (opt_str = strsep(&a->args[0].from, ":"))
++			       && *opt_str) {
++				err = opt_add(opt, opt_str, opts->sb_flags,
++					      bindex++);
++				if (unlikely(!err && ++opt > opt_tail)) {
++					err = -E2BIG;
++					break;
++				}
++				opt->type = Opt_tail;
++				skipped = 1;
++			}
++			break;
++		case Opt_add:
++			if (unlikely(match_int(&a->args[0], &n))) {
++				pr_err("bad integer in %s\n", opt_str);
++				break;
++			}
++			bindex = n;
++			err = opt_add(opt, a->args[1].from, opts->sb_flags,
++				      bindex);
++			if (!err)
++				opt->type = token;
++			break;
++		case Opt_append:
++			err = opt_add(opt, a->args[0].from, opts->sb_flags,
++				      /*dummy bindex*/1);
++			if (!err)
++				opt->type = token;
++			break;
++		case Opt_prepend:
++			err = opt_add(opt, a->args[0].from, opts->sb_flags,
++				      /*bindex*/0);
++			if (!err)
++				opt->type = token;
++			break;
++		case Opt_del:
++			err = au_opts_parse_del(&opt->del, a->args);
++			if (!err)
++				opt->type = token;
++			break;
++#if 0 /* reserved for future use */
++		case Opt_idel:
++			del->pathname = "(indexed)";
++			if (unlikely(match_int(&args[0], &n))) {
++				pr_err("bad integer in %s\n", opt_str);
++				break;
++			}
++			err = au_opts_parse_idel(sb, n, &opt->del, a->args);
++			if (!err)
++				opt->type = token;
++			break;
++#endif
++		case Opt_mod:
++			err = au_opts_parse_mod(&opt->mod, a->args);
++			if (!err)
++				opt->type = token;
++			break;
++#ifdef IMOD /* reserved for future use */
++		case Opt_imod:
++			u.mod->path = "(indexed)";
++			if (unlikely(match_int(&a->args[0], &n))) {
++				pr_err("bad integer in %s\n", opt_str);
++				break;
++			}
++			err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
++			if (!err)
++				opt->type = token;
++			break;
++#endif
++		case Opt_xino:
++			err = au_opts_parse_xino(sb, &opt->xino, a->args);
++			if (!err)
++				opt->type = token;
++			break;
++
++		case Opt_trunc_xino_path:
++			err = au_opts_parse_xino_itrunc_path
++				(sb, &opt->xino_itrunc, a->args);
++			if (!err)
++				opt->type = token;
++			break;
++
++		case Opt_itrunc_xino:
++			u.xino_itrunc = &opt->xino_itrunc;
++			if (unlikely(match_int(&a->args[0], &n))) {
++				pr_err("bad integer in %s\n", opt_str);
++				break;
++			}
++			u.xino_itrunc->bindex = n;
++			aufs_read_lock(root, AuLock_FLUSH);
++			if (n < 0 || au_sbend(sb) < n) {
++				pr_err("out of bounds, %d\n", n);
++				aufs_read_unlock(root, !AuLock_IR);
++				break;
++			}
++			aufs_read_unlock(root, !AuLock_IR);
++			err = 0;
++			opt->type = token;
++			break;
++
++		case Opt_dirwh:
++			if (unlikely(match_int(&a->args[0], &opt->dirwh)))
++				break;
++			err = 0;
++			opt->type = token;
++			break;
++
++		case Opt_rdcache:
++			if (unlikely(match_int(&a->args[0], &n))) {
++				pr_err("bad integer in %s\n", opt_str);
++				break;
++			}
++			if (unlikely(n > AUFS_RDCACHE_MAX)) {
++				pr_err("rdcache must be smaller than %d\n",
++				       AUFS_RDCACHE_MAX);
++				break;
++			}
++			opt->rdcache = n;
++			err = 0;
++			opt->type = token;
++			break;
++		case Opt_rdblk:
++			if (unlikely(match_int(&a->args[0], &n)
++				     || n < 0
++				     || n > KMALLOC_MAX_SIZE)) {
++				pr_err("bad integer in %s\n", opt_str);
++				break;
++			}
++			if (unlikely(n && n < NAME_MAX)) {
++				pr_err("rdblk must be larger than %d\n",
++				       NAME_MAX);
++				break;
++			}
++			opt->rdblk = n;
++			err = 0;
++			opt->type = token;
++			break;
++		case Opt_rdhash:
++			if (unlikely(match_int(&a->args[0], &n)
++				     || n < 0
++				     || n * sizeof(struct hlist_head)
++				     > KMALLOC_MAX_SIZE)) {
++				pr_err("bad integer in %s\n", opt_str);
++				break;
++			}
++			opt->rdhash = n;
++			err = 0;
++			opt->type = token;
++			break;
++
++		case Opt_trunc_xino:
++		case Opt_notrunc_xino:
++		case Opt_noxino:
++		case Opt_trunc_xib:
++		case Opt_notrunc_xib:
++		case Opt_shwh:
++		case Opt_noshwh:
++		case Opt_plink:
++		case Opt_noplink:
++		case Opt_list_plink:
++		case Opt_dio:
++		case Opt_nodio:
++		case Opt_diropq_a:
++		case Opt_diropq_w:
++		case Opt_warn_perm:
++		case Opt_nowarn_perm:
++		case Opt_refrof:
++		case Opt_norefrof:
++		case Opt_verbose:
++		case Opt_noverbose:
++		case Opt_sum:
++		case Opt_nosum:
++		case Opt_wsum:
++		case Opt_rdblk_def:
++		case Opt_rdhash_def:
++			err = 0;
++			opt->type = token;
++			break;
++
++		case Opt_udba:
++			opt->udba = udba_val(a->args[0].from);
++			if (opt->udba >= 0) {
++				err = 0;
++				opt->type = token;
++			} else
++				pr_err("wrong value, %s\n", opt_str);
++			break;
++
++		case Opt_wbr_create:
++			u.create = &opt->wbr_create;
++			u.create->wbr_create
++				= au_wbr_create_val(a->args[0].from, u.create);
++			if (u.create->wbr_create >= 0) {
++				err = 0;
++				opt->type = token;
++			} else
++				pr_err("wrong value, %s\n", opt_str);
++			break;
++		case Opt_wbr_copyup:
++			opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
++			if (opt->wbr_copyup >= 0) {
++				err = 0;
++				opt->type = token;
++			} else
++				pr_err("wrong value, %s\n", opt_str);
++			break;
++
++		case Opt_ignore:
++			pr_warning("ignored %s\n", opt_str);
++			/*FALLTHROUGH*/
++		case Opt_ignore_silent:
++			skipped = 1;
++			err = 0;
++			break;
++		case Opt_err:
++			pr_err("unknown option %s\n", opt_str);
++			break;
++		}
++
++		if (!err && !skipped) {
++			if (unlikely(++opt > opt_tail)) {
++				err = -E2BIG;
++				opt--;
++				opt->type = Opt_tail;
++				break;
++			}
++			opt->type = Opt_tail;
++		}
++	}
++
++	kfree(a);
++	dump_opts(opts);
++	if (unlikely(err))
++		au_opts_free(opts);
++
++out:
++	return err;
++}
++
++static int au_opt_wbr_create(struct super_block *sb,
++			     struct au_opt_wbr_create *create)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++
++	SiMustWriteLock(sb);
++
++	err = 1; /* handled */
++	sbinfo = au_sbi(sb);
++	if (sbinfo->si_wbr_create_ops->fin) {
++		err = sbinfo->si_wbr_create_ops->fin(sb);
++		if (!err)
++			err = 1;
++	}
++
++	sbinfo->si_wbr_create = create->wbr_create;
++	sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
++	switch (create->wbr_create) {
++	case AuWbrCreate_MFSRRV:
++	case AuWbrCreate_MFSRR:
++		sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
++		/*FALLTHROUGH*/
++	case AuWbrCreate_MFS:
++	case AuWbrCreate_MFSV:
++	case AuWbrCreate_PMFS:
++	case AuWbrCreate_PMFSV:
++		sbinfo->si_wbr_mfs.mfs_expire
++			= msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
++		break;
++	}
++
++	if (sbinfo->si_wbr_create_ops->init)
++		sbinfo->si_wbr_create_ops->init(sb); /* ignore */
++
++	return err;
++}
++
++/*
++ * returns,
++ * plus: processed without an error
++ * zero: unprocessed
++ */
++static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
++			 struct au_opts *opts)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++
++	SiMustWriteLock(sb);
++
++	err = 1; /* handled */
++	sbinfo = au_sbi(sb);
++	switch (opt->type) {
++	case Opt_udba:
++		sbinfo->si_mntflags &= ~AuOptMask_UDBA;
++		sbinfo->si_mntflags |= opt->udba;
++		opts->given_udba |= opt->udba;
++		break;
++
++	case Opt_plink:
++		au_opt_set(sbinfo->si_mntflags, PLINK);
++		break;
++	case Opt_noplink:
++		if (au_opt_test(sbinfo->si_mntflags, PLINK))
++			au_plink_put(sb, /*verbose*/1);
++		au_opt_clr(sbinfo->si_mntflags, PLINK);
++		break;
++	case Opt_list_plink:
++		if (au_opt_test(sbinfo->si_mntflags, PLINK))
++			au_plink_list(sb);
++		break;
++
++	case Opt_dio:
++		au_opt_set(sbinfo->si_mntflags, DIO);
++		au_fset_opts(opts->flags, REFRESH_DYAOP);
++		break;
++	case Opt_nodio:
++		au_opt_clr(sbinfo->si_mntflags, DIO);
++		au_fset_opts(opts->flags, REFRESH_DYAOP);
++		break;
++
++	case Opt_diropq_a:
++		au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
++		break;
++	case Opt_diropq_w:
++		au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
++		break;
++
++	case Opt_warn_perm:
++		au_opt_set(sbinfo->si_mntflags, WARN_PERM);
++		break;
++	case Opt_nowarn_perm:
++		au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
++		break;
++
++	case Opt_refrof:
++		au_opt_set(sbinfo->si_mntflags, REFROF);
++		break;
++	case Opt_norefrof:
++		au_opt_clr(sbinfo->si_mntflags, REFROF);
++		break;
++
++	case Opt_verbose:
++		au_opt_set(sbinfo->si_mntflags, VERBOSE);
++		break;
++	case Opt_noverbose:
++		au_opt_clr(sbinfo->si_mntflags, VERBOSE);
++		break;
++
++	case Opt_sum:
++		au_opt_set(sbinfo->si_mntflags, SUM);
++		break;
++	case Opt_wsum:
++		au_opt_clr(sbinfo->si_mntflags, SUM);
++		au_opt_set(sbinfo->si_mntflags, SUM_W);
++	case Opt_nosum:
++		au_opt_clr(sbinfo->si_mntflags, SUM);
++		au_opt_clr(sbinfo->si_mntflags, SUM_W);
++		break;
++
++	case Opt_wbr_create:
++		err = au_opt_wbr_create(sb, &opt->wbr_create);
++		break;
++	case Opt_wbr_copyup:
++		sbinfo->si_wbr_copyup = opt->wbr_copyup;
++		sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
++		break;
++
++	case Opt_dirwh:
++		sbinfo->si_dirwh = opt->dirwh;
++		break;
++
++	case Opt_rdcache:
++		sbinfo->si_rdcache
++			= msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
++		break;
++	case Opt_rdblk:
++		sbinfo->si_rdblk = opt->rdblk;
++		break;
++	case Opt_rdblk_def:
++		sbinfo->si_rdblk = AUFS_RDBLK_DEF;
++		break;
++	case Opt_rdhash:
++		sbinfo->si_rdhash = opt->rdhash;
++		break;
++	case Opt_rdhash_def:
++		sbinfo->si_rdhash = AUFS_RDHASH_DEF;
++		break;
++
++	case Opt_shwh:
++		au_opt_set(sbinfo->si_mntflags, SHWH);
++		break;
++	case Opt_noshwh:
++		au_opt_clr(sbinfo->si_mntflags, SHWH);
++		break;
++
++	case Opt_trunc_xino:
++		au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
++		break;
++	case Opt_notrunc_xino:
++		au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
++		break;
++
++	case Opt_trunc_xino_path:
++	case Opt_itrunc_xino:
++		err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
++		if (!err)
++			err = 1;
++		break;
++
++	case Opt_trunc_xib:
++		au_fset_opts(opts->flags, TRUNC_XIB);
++		break;
++	case Opt_notrunc_xib:
++		au_fclr_opts(opts->flags, TRUNC_XIB);
++		break;
++
++	default:
++		err = 0;
++		break;
++	}
++
++	return err;
++}
++
++/*
++ * returns tri-state.
++ * plus: processed without an error
++ * zero: unprocessed
++ * minus: error
++ */
++static int au_opt_br(struct super_block *sb, struct au_opt *opt,
++		     struct au_opts *opts)
++{
++	int err, do_refresh;
++
++	err = 0;
++	switch (opt->type) {
++	case Opt_append:
++		opt->add.bindex = au_sbend(sb) + 1;
++		if (opt->add.bindex < 0)
++			opt->add.bindex = 0;
++		goto add;
++	case Opt_prepend:
++		opt->add.bindex = 0;
++	add:
++	case Opt_add:
++		err = au_br_add(sb, &opt->add,
++				au_ftest_opts(opts->flags, REMOUNT));
++		if (!err) {
++			err = 1;
++			au_fset_opts(opts->flags, REFRESH);
++		}
++		break;
++
++	case Opt_del:
++	case Opt_idel:
++		err = au_br_del(sb, &opt->del,
++				au_ftest_opts(opts->flags, REMOUNT));
++		if (!err) {
++			err = 1;
++			au_fset_opts(opts->flags, TRUNC_XIB);
++			au_fset_opts(opts->flags, REFRESH);
++		}
++		break;
++
++	case Opt_mod:
++	case Opt_imod:
++		err = au_br_mod(sb, &opt->mod,
++				au_ftest_opts(opts->flags, REMOUNT),
++				&do_refresh);
++		if (!err) {
++			err = 1;
++			if (do_refresh)
++				au_fset_opts(opts->flags, REFRESH);
++		}
++		break;
++	}
++
++	return err;
++}
++
++static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
++		       struct au_opt_xino **opt_xino,
++		       struct au_opts *opts)
++{
++	int err;
++	aufs_bindex_t bend, bindex;
++	struct dentry *root, *parent, *h_root;
++
++	err = 0;
++	switch (opt->type) {
++	case Opt_xino:
++		err = au_xino_set(sb, &opt->xino,
++				  !!au_ftest_opts(opts->flags, REMOUNT));
++		if (unlikely(err))
++			break;
++
++		*opt_xino = &opt->xino;
++		au_xino_brid_set(sb, -1);
++
++		/* safe d_parent access */
++		parent = opt->xino.file->f_dentry->d_parent;
++		root = sb->s_root;
++		bend = au_sbend(sb);
++		for (bindex = 0; bindex <= bend; bindex++) {
++			h_root = au_h_dptr(root, bindex);
++			if (h_root == parent) {
++				au_xino_brid_set(sb, au_sbr_id(sb, bindex));
++				break;
++			}
++		}
++		break;
++
++	case Opt_noxino:
++		au_xino_clr(sb);
++		au_xino_brid_set(sb, -1);
++		*opt_xino = (void *)-1;
++		break;
++	}
++
++	return err;
++}
++
++int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
++		   unsigned int pending)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	unsigned char do_plink, skip, do_free;
++	struct au_branch *br;
++	struct au_wbr *wbr;
++	struct dentry *root;
++	struct inode *dir, *h_dir;
++	struct au_sbinfo *sbinfo;
++	struct au_hinode *hdir;
++
++	SiMustAnyLock(sb);
++
++	sbinfo = au_sbi(sb);
++	AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
++
++	if (!(sb_flags & MS_RDONLY)) {
++		if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
++			pr_warning("first branch should be rw\n");
++		if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
++			pr_warning("shwh should be used with ro\n");
++	}
++
++	if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
++	    && !au_opt_test(sbinfo->si_mntflags, XINO))
++		pr_warning("udba=*notify requires xino\n");
++
++	err = 0;
++	root = sb->s_root;
++	dir = root->d_inode;
++	do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
++	bend = au_sbend(sb);
++	for (bindex = 0; !err && bindex <= bend; bindex++) {
++		skip = 0;
++		h_dir = au_h_iptr(dir, bindex);
++		br = au_sbr(sb, bindex);
++		do_free = 0;
++
++		wbr = br->br_wbr;
++		if (wbr)
++			wbr_wh_read_lock(wbr);
++
++		if (!au_br_writable(br->br_perm)) {
++			do_free = !!wbr;
++			skip = (!wbr
++				|| (!wbr->wbr_whbase
++				    && !wbr->wbr_plink
++				    && !wbr->wbr_orph));
++		} else if (!au_br_wh_linkable(br->br_perm)) {
++			/* skip = (!br->br_whbase && !br->br_orph); */
++			skip = (!wbr || !wbr->wbr_whbase);
++			if (skip && wbr) {
++				if (do_plink)
++					skip = !!wbr->wbr_plink;
++				else
++					skip = !wbr->wbr_plink;
++			}
++		} else {
++			/* skip = (br->br_whbase && br->br_ohph); */
++			skip = (wbr && wbr->wbr_whbase);
++			if (skip) {
++				if (do_plink)
++					skip = !!wbr->wbr_plink;
++				else
++					skip = !wbr->wbr_plink;
++			}
++		}
++		if (wbr)
++			wbr_wh_read_unlock(wbr);
++
++		if (skip)
++			continue;
++
++		hdir = au_hi(dir, bindex);
++		au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
++		if (wbr)
++			wbr_wh_write_lock(wbr);
++		err = au_wh_init(au_h_dptr(root, bindex), br, sb);
++		if (wbr)
++			wbr_wh_write_unlock(wbr);
++		au_hn_imtx_unlock(hdir);
++
++		if (!err && do_free) {
++			kfree(wbr);
++			br->br_wbr = NULL;
++		}
++	}
++
++	return err;
++}
++
++int au_opts_mount(struct super_block *sb, struct au_opts *opts)
++{
++	int err;
++	unsigned int tmp;
++	aufs_bindex_t bindex, bend;
++	struct au_opt *opt;
++	struct au_opt_xino *opt_xino, xino;
++	struct au_sbinfo *sbinfo;
++	struct au_branch *br;
++
++	SiMustWriteLock(sb);
++
++	err = 0;
++	opt_xino = NULL;
++	opt = opts->opt;
++	while (err >= 0 && opt->type != Opt_tail)
++		err = au_opt_simple(sb, opt++, opts);
++	if (err > 0)
++		err = 0;
++	else if (unlikely(err < 0))
++		goto out;
++
++	/* disable xino and udba temporary */
++	sbinfo = au_sbi(sb);
++	tmp = sbinfo->si_mntflags;
++	au_opt_clr(sbinfo->si_mntflags, XINO);
++	au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
++
++	opt = opts->opt;
++	while (err >= 0 && opt->type != Opt_tail)
++		err = au_opt_br(sb, opt++, opts);
++	if (err > 0)
++		err = 0;
++	else if (unlikely(err < 0))
++		goto out;
++
++	bend = au_sbend(sb);
++	if (unlikely(bend < 0)) {
++		err = -EINVAL;
++		pr_err("no branches\n");
++		goto out;
++	}
++
++	if (au_opt_test(tmp, XINO))
++		au_opt_set(sbinfo->si_mntflags, XINO);
++	opt = opts->opt;
++	while (!err && opt->type != Opt_tail)
++		err = au_opt_xino(sb, opt++, &opt_xino, opts);
++	if (unlikely(err))
++		goto out;
++
++	err = au_opts_verify(sb, sb->s_flags, tmp);
++	if (unlikely(err))
++		goto out;
++
++	/* restore xino */
++	if (au_opt_test(tmp, XINO) && !opt_xino) {
++		xino.file = au_xino_def(sb);
++		err = PTR_ERR(xino.file);
++		if (IS_ERR(xino.file))
++			goto out;
++
++		err = au_xino_set(sb, &xino, /*remount*/0);
++		fput(xino.file);
++		if (unlikely(err))
++			goto out;
++	}
++
++	/* restore udba */
++	tmp &= AuOptMask_UDBA;
++	sbinfo->si_mntflags &= ~AuOptMask_UDBA;
++	sbinfo->si_mntflags |= tmp;
++	bend = au_sbend(sb);
++	for (bindex = 0; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		err = au_hnotify_reset_br(tmp, br, br->br_perm);
++		if (unlikely(err))
++			AuIOErr("hnotify failed on br %d, %d, ignored\n",
++				bindex, err);
++		/* go on even if err */
++	}
++	if (au_opt_test(tmp, UDBA_HNOTIFY)) {
++		struct inode *dir = sb->s_root->d_inode;
++		au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
++	}
++
++out:
++	return err;
++}
++
++int au_opts_remount(struct super_block *sb, struct au_opts *opts)
++{
++	int err, rerr;
++	struct inode *dir;
++	struct au_opt_xino *opt_xino;
++	struct au_opt *opt;
++	struct au_sbinfo *sbinfo;
++
++	SiMustWriteLock(sb);
++
++	dir = sb->s_root->d_inode;
++	sbinfo = au_sbi(sb);
++	err = 0;
++	opt_xino = NULL;
++	opt = opts->opt;
++	while (err >= 0 && opt->type != Opt_tail) {
++		err = au_opt_simple(sb, opt, opts);
++		if (!err)
++			err = au_opt_br(sb, opt, opts);
++		if (!err)
++			err = au_opt_xino(sb, opt, &opt_xino, opts);
++		opt++;
++	}
++	if (err > 0)
++		err = 0;
++	AuTraceErr(err);
++	/* go on even err */
++
++	rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
++	if (unlikely(rerr && !err))
++		err = rerr;
++
++	if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
++		rerr = au_xib_trunc(sb);
++		if (unlikely(rerr && !err))
++			err = rerr;
++	}
++
++	/* will be handled by the caller */
++	if (!au_ftest_opts(opts->flags, REFRESH)
++	    && (opts->given_udba || au_opt_test(sbinfo->si_mntflags, XINO)))
++		au_fset_opts(opts->flags, REFRESH);
++
++	AuDbg("status 0x%x\n", opts->flags);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++unsigned int au_opt_udba(struct super_block *sb)
++{
++	return au_mntflags(sb) & AuOptMask_UDBA;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/opts.h	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,209 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * mount options/flags
++ */
++
++#ifndef __AUFS_OPTS_H__
++#define __AUFS_OPTS_H__
++
++#ifdef __KERNEL__
++
++#include <linux/path.h>
++
++struct file;
++struct super_block;
++
++/* ---------------------------------------------------------------------- */
++
++/* mount flags */
++#define AuOpt_XINO		1		/* external inode number bitmap
++						   and translation table */
++#define AuOpt_TRUNC_XINO	(1 << 1)	/* truncate xino files */
++#define AuOpt_UDBA_NONE		(1 << 2)	/* users direct branch access */
++#define AuOpt_UDBA_REVAL	(1 << 3)
++#define AuOpt_UDBA_HNOTIFY	(1 << 4)
++#define AuOpt_SHWH		(1 << 5)	/* show whiteout */
++#define AuOpt_PLINK		(1 << 6)	/* pseudo-link */
++#define AuOpt_DIRPERM1		(1 << 7)	/* unimplemented */
++#define AuOpt_REFROF		(1 << 8)	/* unimplemented */
++#define AuOpt_ALWAYS_DIROPQ	(1 << 9)	/* policy to creating diropq */
++#define AuOpt_SUM		(1 << 10)	/* summation for statfs(2) */
++#define AuOpt_SUM_W		(1 << 11)	/* unimplemented */
++#define AuOpt_WARN_PERM		(1 << 12)	/* warn when add-branch */
++#define AuOpt_VERBOSE		(1 << 13)	/* busy inode when del-branch */
++#define AuOpt_DIO		(1 << 14)	/* direct io */
++
++#ifndef CONFIG_AUFS_HNOTIFY
++#undef AuOpt_UDBA_HNOTIFY
++#define AuOpt_UDBA_HNOTIFY	0
++#endif
++#ifndef CONFIG_AUFS_SHWH
++#undef AuOpt_SHWH
++#define AuOpt_SHWH		0
++#endif
++
++#define AuOpt_Def	(AuOpt_XINO \
++			 | AuOpt_UDBA_REVAL \
++			 | AuOpt_PLINK \
++			 /* | AuOpt_DIRPERM1 */ \
++			 | AuOpt_WARN_PERM)
++#define AuOptMask_UDBA	(AuOpt_UDBA_NONE \
++			 | AuOpt_UDBA_REVAL \
++			 | AuOpt_UDBA_HNOTIFY)
++
++#define au_opt_test(flags, name)	(flags & AuOpt_##name)
++#define au_opt_set(flags, name) do { \
++	BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
++	((flags) |= AuOpt_##name); \
++} while (0)
++#define au_opt_set_udba(flags, name) do { \
++	(flags) &= ~AuOptMask_UDBA; \
++	((flags) |= AuOpt_##name); \
++} while (0)
++#define au_opt_clr(flags, name) do { \
++	((flags) &= ~AuOpt_##name); \
++} while (0)
++
++static inline unsigned int au_opts_plink(unsigned int mntflags)
++{
++#ifdef CONFIG_PROC_FS
++	return mntflags;
++#else
++	return mntflags & ~AuOpt_PLINK;
++#endif
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* policies to select one among multiple writable branches */
++enum {
++	AuWbrCreate_TDP,	/* top down parent */
++	AuWbrCreate_RR,		/* round robin */
++	AuWbrCreate_MFS,	/* most free space */
++	AuWbrCreate_MFSV,	/* mfs with seconds */
++	AuWbrCreate_MFSRR,	/* mfs then rr */
++	AuWbrCreate_MFSRRV,	/* mfs then rr with seconds */
++	AuWbrCreate_PMFS,	/* parent and mfs */
++	AuWbrCreate_PMFSV,	/* parent and mfs with seconds */
++
++	AuWbrCreate_Def = AuWbrCreate_TDP
++};
++
++enum {
++	AuWbrCopyup_TDP,	/* top down parent */
++	AuWbrCopyup_BUP,	/* bottom up parent */
++	AuWbrCopyup_BU,		/* bottom up */
++
++	AuWbrCopyup_Def = AuWbrCopyup_TDP
++};
++
++/* ---------------------------------------------------------------------- */
++
++struct au_opt_add {
++	aufs_bindex_t	bindex;
++	char		*pathname;
++	int		perm;
++	struct path	path;
++};
++
++struct au_opt_del {
++	char		*pathname;
++	struct path	h_path;
++};
++
++struct au_opt_mod {
++	char		*path;
++	int		perm;
++	struct dentry	*h_root;
++};
++
++struct au_opt_xino {
++	char		*path;
++	struct file	*file;
++};
++
++struct au_opt_xino_itrunc {
++	aufs_bindex_t	bindex;
++};
++
++struct au_opt_wbr_create {
++	int			wbr_create;
++	int			mfs_second;
++	unsigned long long	mfsrr_watermark;
++};
++
++struct au_opt {
++	int type;
++	union {
++		struct au_opt_xino	xino;
++		struct au_opt_xino_itrunc xino_itrunc;
++		struct au_opt_add	add;
++		struct au_opt_del	del;
++		struct au_opt_mod	mod;
++		int			dirwh;
++		int			rdcache;
++		unsigned int		rdblk;
++		unsigned int		rdhash;
++		int			udba;
++		struct au_opt_wbr_create wbr_create;
++		int			wbr_copyup;
++	};
++};
++
++/* opts flags */
++#define AuOpts_REMOUNT		1
++#define AuOpts_REFRESH		(1 << 1)
++#define AuOpts_TRUNC_XIB	(1 << 2)
++#define AuOpts_REFRESH_DYAOP	(1 << 3)
++#define au_ftest_opts(flags, name)	((flags) & AuOpts_##name)
++#define au_fset_opts(flags, name) \
++	do { (flags) |= AuOpts_##name; } while (0)
++#define au_fclr_opts(flags, name) \
++	do { (flags) &= ~AuOpts_##name; } while (0)
++
++struct au_opts {
++	struct au_opt	*opt;
++	int		max_opt;
++
++	unsigned int	given_udba;
++	unsigned int	flags;
++	unsigned long	sb_flags;
++};
++
++/* ---------------------------------------------------------------------- */
++
++char *au_optstr_br_perm(int brperm);
++const char *au_optstr_udba(int udba);
++const char *au_optstr_wbr_copyup(int wbr_copyup);
++const char *au_optstr_wbr_create(int wbr_create);
++
++void au_opts_free(struct au_opts *opts);
++int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
++int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
++		   unsigned int pending);
++int au_opts_mount(struct super_block *sb, struct au_opts *opts);
++int au_opts_remount(struct super_block *sb, struct au_opts *opts);
++
++unsigned int au_opt_udba(struct super_block *sb);
++
++/* ---------------------------------------------------------------------- */
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_OPTS_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/plink.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,515 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * pseudo-link
++ */
++
++#include "aufs.h"
++
++/*
++ * the pseudo-link maintenance mode.
++ * during a user process maintains the pseudo-links,
++ * prohibit adding a new plink and branch manipulation.
++ *
++ * Flags
++ * NOPLM:
++ *	For entry functions which will handle plink, and i_mutex is already held
++ *	in VFS.
++ *	They cannot wait and should return an error at once.
++ *	Callers has to check the error.
++ * NOPLMW:
++ *	For entry functions which will handle plink, but i_mutex is not held
++ *	in VFS.
++ *	They can wait the plink maintenance mode to finish.
++ *
++ * They behave like F_SETLK and F_SETLKW.
++ * If the caller never handle plink, then both flags are unnecessary.
++ */
++
++int au_plink_maint(struct super_block *sb, int flags)
++{
++	int err;
++	pid_t pid, ppid;
++	struct au_sbinfo *sbi;
++
++	SiMustAnyLock(sb);
++
++	err = 0;
++	if (!au_opt_test(au_mntflags(sb), PLINK))
++		goto out;
++
++	sbi = au_sbi(sb);
++	pid = sbi->si_plink_maint_pid;
++	if (!pid || pid == current->pid)
++		goto out;
++
++	/* todo: it highly depends upon /sbin/mount.aufs */
++	rcu_read_lock();
++	ppid = task_pid_vnr(rcu_dereference(current->real_parent));
++	rcu_read_unlock();
++	if (pid == ppid)
++		goto out;
++
++	if (au_ftest_lock(flags, NOPLMW)) {
++		/* if there is no i_mutex lock in VFS, we don't need to wait */
++		/* AuDebugOn(!lockdep_depth(current)); */
++		while (sbi->si_plink_maint_pid) {
++			si_read_unlock(sb);
++			/* gave up wake_up_bit() */
++			wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
++
++			if (au_ftest_lock(flags, FLUSH))
++				au_nwt_flush(&sbi->si_nowait);
++			si_noflush_read_lock(sb);
++		}
++	} else if (au_ftest_lock(flags, NOPLM)) {
++		AuDbg("ppid %d, pid %d\n", ppid, pid);
++		err = -EAGAIN;
++	}
++
++out:
++	return err;
++}
++
++void au_plink_maint_leave(struct au_sbinfo *sbinfo)
++{
++	spin_lock(&sbinfo->si_plink_maint_lock);
++	sbinfo->si_plink_maint_pid = 0;
++	spin_unlock(&sbinfo->si_plink_maint_lock);
++	wake_up_all(&sbinfo->si_plink_wq);
++}
++
++int au_plink_maint_enter(struct super_block *sb)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++
++	err = 0;
++	sbinfo = au_sbi(sb);
++	/* make sure i am the only one in this fs */
++	si_write_lock(sb, AuLock_FLUSH);
++	if (au_opt_test(au_mntflags(sb), PLINK)) {
++		spin_lock(&sbinfo->si_plink_maint_lock);
++		if (!sbinfo->si_plink_maint_pid)
++			sbinfo->si_plink_maint_pid = current->pid;
++		else
++			err = -EBUSY;
++		spin_unlock(&sbinfo->si_plink_maint_lock);
++	}
++	si_write_unlock(sb);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct pseudo_link {
++	union {
++		struct list_head list;
++		struct rcu_head rcu;
++	};
++	struct inode *inode;
++};
++
++#ifdef CONFIG_AUFS_DEBUG
++void au_plink_list(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++	struct list_head *plink_list;
++	struct pseudo_link *plink;
++
++	SiMustAnyLock(sb);
++
++	sbinfo = au_sbi(sb);
++	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
++	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
++
++	plink_list = &sbinfo->si_plink.head;
++	rcu_read_lock();
++	list_for_each_entry_rcu(plink, plink_list, list)
++		AuDbg("%lu\n", plink->inode->i_ino);
++	rcu_read_unlock();
++}
++#endif
++
++/* is the inode pseudo-linked? */
++int au_plink_test(struct inode *inode)
++{
++	int found;
++	struct au_sbinfo *sbinfo;
++	struct list_head *plink_list;
++	struct pseudo_link *plink;
++
++	sbinfo = au_sbi(inode->i_sb);
++	AuRwMustAnyLock(&sbinfo->si_rwsem);
++	AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
++	AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
++
++	found = 0;
++	plink_list = &sbinfo->si_plink.head;
++	rcu_read_lock();
++	list_for_each_entry_rcu(plink, plink_list, list)
++		if (plink->inode == inode) {
++			found = 1;
++			break;
++		}
++	rcu_read_unlock();
++	return found;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * generate a name for plink.
++ * the file will be stored under AUFS_WH_PLINKDIR.
++ */
++/* 20 is max digits length of ulong 64 */
++#define PLINK_NAME_LEN	((20 + 1) * 2)
++
++static int plink_name(char *name, int len, struct inode *inode,
++		      aufs_bindex_t bindex)
++{
++	int rlen;
++	struct inode *h_inode;
++
++	h_inode = au_h_iptr(inode, bindex);
++	rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
++	return rlen;
++}
++
++struct au_do_plink_lkup_args {
++	struct dentry **errp;
++	struct qstr *tgtname;
++	struct dentry *h_parent;
++	struct au_branch *br;
++};
++
++static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
++				       struct dentry *h_parent,
++				       struct au_branch *br)
++{
++	struct dentry *h_dentry;
++	struct mutex *h_mtx;
++
++	h_mtx = &h_parent->d_inode->i_mutex;
++	mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
++	h_dentry = au_lkup_one(tgtname, h_parent, br, /*nd*/NULL);
++	mutex_unlock(h_mtx);
++	return h_dentry;
++}
++
++static void au_call_do_plink_lkup(void *args)
++{
++	struct au_do_plink_lkup_args *a = args;
++	*a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
++}
++
++/* lookup the plink-ed @inode under the branch at @bindex */
++struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
++{
++	struct dentry *h_dentry, *h_parent;
++	struct au_branch *br;
++	struct inode *h_dir;
++	int wkq_err;
++	char a[PLINK_NAME_LEN];
++	struct qstr tgtname = {
++		.name	= a
++	};
++
++	AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
++
++	br = au_sbr(inode->i_sb, bindex);
++	h_parent = br->br_wbr->wbr_plink;
++	h_dir = h_parent->d_inode;
++	tgtname.len = plink_name(a, sizeof(a), inode, bindex);
++
++	if (current_fsuid()) {
++		struct au_do_plink_lkup_args args = {
++			.errp		= &h_dentry,
++			.tgtname	= &tgtname,
++			.h_parent	= h_parent,
++			.br		= br
++		};
++
++		wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
++		if (unlikely(wkq_err))
++			h_dentry = ERR_PTR(wkq_err);
++	} else
++		h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
++
++	return h_dentry;
++}
++
++/* create a pseudo-link */
++static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
++		      struct dentry *h_dentry, struct au_branch *br)
++{
++	int err;
++	struct path h_path = {
++		.mnt = br->br_mnt
++	};
++	struct inode *h_dir;
++
++	h_dir = h_parent->d_inode;
++	mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
++again:
++	h_path.dentry = au_lkup_one(tgt, h_parent, br, /*nd*/NULL);
++	err = PTR_ERR(h_path.dentry);
++	if (IS_ERR(h_path.dentry))
++		goto out;
++
++	err = 0;
++	/* wh.plink dir is not monitored */
++	/* todo: is it really safe? */
++	if (h_path.dentry->d_inode
++	    && h_path.dentry->d_inode != h_dentry->d_inode) {
++		err = vfsub_unlink(h_dir, &h_path, /*force*/0);
++		dput(h_path.dentry);
++		h_path.dentry = NULL;
++		if (!err)
++			goto again;
++	}
++	if (!err && !h_path.dentry->d_inode)
++		err = vfsub_link(h_dentry, h_dir, &h_path);
++	dput(h_path.dentry);
++
++out:
++	mutex_unlock(&h_dir->i_mutex);
++	return err;
++}
++
++struct do_whplink_args {
++	int *errp;
++	struct qstr *tgt;
++	struct dentry *h_parent;
++	struct dentry *h_dentry;
++	struct au_branch *br;
++};
++
++static void call_do_whplink(void *args)
++{
++	struct do_whplink_args *a = args;
++	*a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
++}
++
++static int whplink(struct dentry *h_dentry, struct inode *inode,
++		   aufs_bindex_t bindex, struct au_branch *br)
++{
++	int err, wkq_err;
++	struct au_wbr *wbr;
++	struct dentry *h_parent;
++	struct inode *h_dir;
++	char a[PLINK_NAME_LEN];
++	struct qstr tgtname = {
++		.name = a
++	};
++
++	wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
++	h_parent = wbr->wbr_plink;
++	h_dir = h_parent->d_inode;
++	tgtname.len = plink_name(a, sizeof(a), inode, bindex);
++
++	/* always superio. */
++	if (current_fsuid()) {
++		struct do_whplink_args args = {
++			.errp		= &err,
++			.tgt		= &tgtname,
++			.h_parent	= h_parent,
++			.h_dentry	= h_dentry,
++			.br		= br
++		};
++		wkq_err = au_wkq_wait(call_do_whplink, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	} else
++		err = do_whplink(&tgtname, h_parent, h_dentry, br);
++
++	return err;
++}
++
++/* free a single plink */
++static void do_put_plink(struct pseudo_link *plink, int do_del)
++{
++	if (do_del)
++		list_del(&plink->list);
++	iput(plink->inode);
++	kfree(plink);
++}
++
++static void do_put_plink_rcu(struct rcu_head *rcu)
++{
++	struct pseudo_link *plink;
++
++	plink = container_of(rcu, struct pseudo_link, rcu);
++	iput(plink->inode);
++	kfree(plink);
++}
++
++/*
++ * create a new pseudo-link for @h_dentry on @bindex.
++ * the linked inode is held in aufs @inode.
++ */
++void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
++		     struct dentry *h_dentry)
++{
++	struct super_block *sb;
++	struct au_sbinfo *sbinfo;
++	struct list_head *plink_list;
++	struct pseudo_link *plink, *tmp;
++	int found, err, cnt;
++
++	sb = inode->i_sb;
++	sbinfo = au_sbi(sb);
++	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
++	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
++
++	cnt = 0;
++	found = 0;
++	plink_list = &sbinfo->si_plink.head;
++	rcu_read_lock();
++	list_for_each_entry_rcu(plink, plink_list, list) {
++		cnt++;
++		if (plink->inode == inode) {
++			found = 1;
++			break;
++		}
++	}
++	rcu_read_unlock();
++	if (found)
++		return;
++
++	tmp = kmalloc(sizeof(*plink), GFP_NOFS);
++	if (tmp)
++		tmp->inode = au_igrab(inode);
++	else {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	spin_lock(&sbinfo->si_plink.spin);
++	list_for_each_entry(plink, plink_list, list) {
++		if (plink->inode == inode) {
++			found = 1;
++			break;
++		}
++	}
++	if (!found)
++		list_add_rcu(&tmp->list, plink_list);
++	spin_unlock(&sbinfo->si_plink.spin);
++	if (!found) {
++		cnt++;
++		WARN_ONCE(cnt > AUFS_PLINK_WARN,
++			  "unexpectedly many pseudo links, %d\n", cnt);
++		err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
++	} else {
++		do_put_plink(tmp, 0);
++		return;
++	}
++
++out:
++	if (unlikely(err)) {
++		pr_warning("err %d, damaged pseudo link.\n", err);
++		if (tmp) {
++			au_spl_del_rcu(&tmp->list, &sbinfo->si_plink);
++			call_rcu(&tmp->rcu, do_put_plink_rcu);
++		}
++	}
++}
++
++/* free all plinks */
++void au_plink_put(struct super_block *sb, int verbose)
++{
++	struct au_sbinfo *sbinfo;
++	struct list_head *plink_list;
++	struct pseudo_link *plink, *tmp;
++
++	SiMustWriteLock(sb);
++
++	sbinfo = au_sbi(sb);
++	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
++	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
++
++	plink_list = &sbinfo->si_plink.head;
++	/* no spin_lock since sbinfo is write-locked */
++	WARN(verbose && !list_empty(plink_list), "pseudo-link is not flushed");
++	list_for_each_entry_safe(plink, tmp, plink_list, list)
++		do_put_plink(plink, 0);
++	INIT_LIST_HEAD(plink_list);
++}
++
++void au_plink_clean(struct super_block *sb, int verbose)
++{
++	struct dentry *root;
++
++	root = sb->s_root;
++	aufs_write_lock(root);
++	if (au_opt_test(au_mntflags(sb), PLINK))
++		au_plink_put(sb, verbose);
++	aufs_write_unlock(root);
++}
++
++/* free the plinks on a branch specified by @br_id */
++void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
++{
++	struct au_sbinfo *sbinfo;
++	struct list_head *plink_list;
++	struct pseudo_link *plink, *tmp;
++	struct inode *inode;
++	aufs_bindex_t bstart, bend, bindex;
++	unsigned char do_put;
++
++	SiMustWriteLock(sb);
++
++	sbinfo = au_sbi(sb);
++	AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
++	AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
++
++	plink_list = &sbinfo->si_plink.head;
++	/* no spin_lock since sbinfo is write-locked */
++	list_for_each_entry_safe(plink, tmp, plink_list, list) {
++		do_put = 0;
++		inode = au_igrab(plink->inode);
++		ii_write_lock_child(inode);
++		bstart = au_ibstart(inode);
++		bend = au_ibend(inode);
++		if (bstart >= 0) {
++			for (bindex = bstart; bindex <= bend; bindex++) {
++				if (!au_h_iptr(inode, bindex)
++				    || au_ii_br_id(inode, bindex) != br_id)
++					continue;
++				au_set_h_iptr(inode, bindex, NULL, 0);
++				do_put = 1;
++				break;
++			}
++		} else
++			do_put_plink(plink, 1);
++
++		if (do_put) {
++			for (bindex = bstart; bindex <= bend; bindex++)
++				if (au_h_iptr(inode, bindex)) {
++					do_put = 0;
++					break;
++				}
++			if (do_put)
++				do_put_plink(plink, 1);
++		}
++		ii_write_unlock(inode);
++		iput(inode);
++	}
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/poll.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * poll operation
++ * There is only one filesystem which implements ->poll operation, currently.
++ */
++
++#include "aufs.h"
++
++unsigned int aufs_poll(struct file *file, poll_table *wait)
++{
++	unsigned int mask;
++	int err;
++	struct file *h_file;
++	struct dentry *dentry;
++	struct super_block *sb;
++
++	/* We should pretend an error happened. */
++	mask = POLLERR /* | POLLIN | POLLOUT */;
++	dentry = file->f_dentry;
++	sb = dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++	err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++	if (unlikely(err))
++		goto out;
++
++	/* it is not an error if h_file has no operation */
++	mask = DEFAULT_POLLMASK;
++	h_file = au_hf_top(file);
++	if (h_file->f_op && h_file->f_op->poll)
++		mask = h_file->f_op->poll(h_file, wait);
++
++	di_read_unlock(dentry, AuLock_IR);
++	fi_read_unlock(file);
++
++out:
++	si_read_unlock(sb);
++	AuTraceErr((int)mask);
++	return mask;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/procfs.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,170 @@
++/*
++ * Copyright (C) 2010-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * procfs interfaces
++ */
++
++#include <linux/proc_fs.h>
++#include "aufs.h"
++
++static int au_procfs_plm_release(struct inode *inode, struct file *file)
++{
++	struct au_sbinfo *sbinfo;
++
++	sbinfo = file->private_data;
++	if (sbinfo) {
++		au_plink_maint_leave(sbinfo);
++		kobject_put(&sbinfo->si_kobj);
++	}
++
++	return 0;
++}
++
++static void au_procfs_plm_write_clean(struct file *file)
++{
++	struct au_sbinfo *sbinfo;
++
++	sbinfo = file->private_data;
++	if (sbinfo)
++		au_plink_clean(sbinfo->si_sb, /*verbose*/0);
++}
++
++static int au_procfs_plm_write_si(struct file *file, unsigned long id)
++{
++	int err;
++	struct super_block *sb;
++	struct au_sbinfo *sbinfo;
++
++	err = -EBUSY;
++	if (unlikely(file->private_data))
++		goto out;
++
++	sb = NULL;
++	/* don't use au_sbilist_lock() here */
++	spin_lock(&au_sbilist.spin);
++	list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
++		if (id == sysaufs_si_id(sbinfo)) {
++			kobject_get(&sbinfo->si_kobj);
++			sb = sbinfo->si_sb;
++			break;
++		}
++	spin_unlock(&au_sbilist.spin);
++
++	err = -EINVAL;
++	if (unlikely(!sb))
++		goto out;
++
++	err = au_plink_maint_enter(sb);
++	if (!err)
++		/* keep kobject_get() */
++		file->private_data = sbinfo;
++	else
++		kobject_put(&sbinfo->si_kobj);
++out:
++	return err;
++}
++
++/*
++ * Accept a valid "si=xxxx" only.
++ * Once it is accepted successfully, accept "clean" too.
++ */
++static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
++				   size_t count, loff_t *ppos)
++{
++	ssize_t err;
++	unsigned long id;
++	/* last newline is allowed */
++	char buf[3 + sizeof(unsigned long) * 2 + 1];
++
++	err = -EACCES;
++	if (unlikely(!capable(CAP_SYS_ADMIN)))
++		goto out;
++
++	err = -EINVAL;
++	if (unlikely(count > sizeof(buf)))
++		goto out;
++
++	err = copy_from_user(buf, ubuf, count);
++	if (unlikely(err)) {
++		err = -EFAULT;
++		goto out;
++	}
++	buf[count] = 0;
++
++	err = -EINVAL;
++	if (!strcmp("clean", buf)) {
++		au_procfs_plm_write_clean(file);
++		goto out_success;
++	} else if (unlikely(strncmp("si=", buf, 3)))
++		goto out;
++
++	err = kstrtoul(buf + 3, 16, &id);
++	if (unlikely(err))
++		goto out;
++
++	err = au_procfs_plm_write_si(file, id);
++	if (unlikely(err))
++		goto out;
++
++out_success:
++	err = count; /* success */
++out:
++	return err;
++}
++
++static const struct file_operations au_procfs_plm_fop = {
++	.write		= au_procfs_plm_write,
++	.release	= au_procfs_plm_release,
++	.owner		= THIS_MODULE
++};
++
++/* ---------------------------------------------------------------------- */
++
++static struct proc_dir_entry *au_procfs_dir;
++
++void au_procfs_fin(void)
++{
++	remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
++	remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
++}
++
++int __init au_procfs_init(void)
++{
++	int err;
++	struct proc_dir_entry *entry;
++
++	err = -ENOMEM;
++	au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
++	if (unlikely(!au_procfs_dir))
++		goto out;
++
++	entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
++			    au_procfs_dir, &au_procfs_plm_fop);
++	if (unlikely(!entry))
++		goto out_dir;
++
++	err = 0;
++	goto out; /* success */
++
++
++out_dir:
++	remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
++out:
++	return err;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/rdu.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,383 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * readdir in userspace.
++ */
++
++#include <linux/compat.h>
++#include <linux/fs_stack.h>
++#include <linux/security.h>
++#include "aufs.h"
++
++/* bits for struct aufs_rdu.flags */
++#define	AuRdu_CALLED	1
++#define	AuRdu_CONT	(1 << 1)
++#define	AuRdu_FULL	(1 << 2)
++#define au_ftest_rdu(flags, name)	((flags) & AuRdu_##name)
++#define au_fset_rdu(flags, name) \
++	do { (flags) |= AuRdu_##name; } while (0)
++#define au_fclr_rdu(flags, name) \
++	do { (flags) &= ~AuRdu_##name; } while (0)
++
++struct au_rdu_arg {
++	struct aufs_rdu			*rdu;
++	union au_rdu_ent_ul		ent;
++	unsigned long			end;
++
++	struct super_block		*sb;
++	int				err;
++};
++
++static int au_rdu_fill(void *__arg, const char *name, int nlen,
++		       loff_t offset, u64 h_ino, unsigned int d_type)
++{
++	int err, len;
++	struct au_rdu_arg *arg = __arg;
++	struct aufs_rdu *rdu = arg->rdu;
++	struct au_rdu_ent ent;
++
++	err = 0;
++	arg->err = 0;
++	au_fset_rdu(rdu->cookie.flags, CALLED);
++	len = au_rdu_len(nlen);
++	if (arg->ent.ul + len  < arg->end) {
++		ent.ino = h_ino;
++		ent.bindex = rdu->cookie.bindex;
++		ent.type = d_type;
++		ent.nlen = nlen;
++		if (unlikely(nlen > AUFS_MAX_NAMELEN))
++			ent.type = DT_UNKNOWN;
++
++		/* unnecessary to support mmap_sem since this is a dir */
++		err = -EFAULT;
++		if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
++			goto out;
++		if (copy_to_user(arg->ent.e->name, name, nlen))
++			goto out;
++		/* the terminating NULL */
++		if (__put_user(0, arg->ent.e->name + nlen))
++			goto out;
++		err = 0;
++		/* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
++		arg->ent.ul += len;
++		rdu->rent++;
++	} else {
++		err = -EFAULT;
++		au_fset_rdu(rdu->cookie.flags, FULL);
++		rdu->full = 1;
++		rdu->tail = arg->ent;
++	}
++
++out:
++	/* AuTraceErr(err); */
++	return err;
++}
++
++static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
++{
++	int err;
++	loff_t offset;
++	struct au_rdu_cookie *cookie = &arg->rdu->cookie;
++
++	offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
++	err = offset;
++	if (unlikely(offset != cookie->h_pos))
++		goto out;
++
++	err = 0;
++	do {
++		arg->err = 0;
++		au_fclr_rdu(cookie->flags, CALLED);
++		/* smp_mb(); */
++		err = vfsub_readdir(h_file, au_rdu_fill, arg);
++		if (err >= 0)
++			err = arg->err;
++	} while (!err
++		 && au_ftest_rdu(cookie->flags, CALLED)
++		 && !au_ftest_rdu(cookie->flags, FULL));
++	cookie->h_pos = h_file->f_pos;
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++static int au_rdu(struct file *file, struct aufs_rdu *rdu)
++{
++	int err;
++	aufs_bindex_t bend;
++	struct au_rdu_arg arg;
++	struct dentry *dentry;
++	struct inode *inode;
++	struct file *h_file;
++	struct au_rdu_cookie *cookie = &rdu->cookie;
++
++	err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
++	if (unlikely(err)) {
++		err = -EFAULT;
++		AuTraceErr(err);
++		goto out;
++	}
++	rdu->rent = 0;
++	rdu->tail = rdu->ent;
++	rdu->full = 0;
++	arg.rdu = rdu;
++	arg.ent = rdu->ent;
++	arg.end = arg.ent.ul;
++	arg.end += rdu->sz;
++
++	err = -ENOTDIR;
++	if (unlikely(!file->f_op || !file->f_op->readdir))
++		goto out;
++
++	err = security_file_permission(file, MAY_READ);
++	AuTraceErr(err);
++	if (unlikely(err))
++		goto out;
++
++	dentry = file->f_dentry;
++	inode = dentry->d_inode;
++#if 1
++	mutex_lock(&inode->i_mutex);
++#else
++	err = mutex_lock_killable(&inode->i_mutex);
++	AuTraceErr(err);
++	if (unlikely(err))
++		goto out;
++#endif
++
++	arg.sb = inode->i_sb;
++	err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
++	if (unlikely(err))
++		goto out_mtx;
++	err = au_alive_dir(dentry);
++	if (unlikely(err))
++		goto out_si;
++	/* todo: reval? */
++	fi_read_lock(file);
++
++	err = -EAGAIN;
++	if (unlikely(au_ftest_rdu(cookie->flags, CONT)
++		     && cookie->generation != au_figen(file)))
++		goto out_unlock;
++
++	err = 0;
++	if (!rdu->blk) {
++		rdu->blk = au_sbi(arg.sb)->si_rdblk;
++		if (!rdu->blk)
++			rdu->blk = au_dir_size(file, /*dentry*/NULL);
++	}
++	bend = au_fbstart(file);
++	if (cookie->bindex < bend)
++		cookie->bindex = bend;
++	bend = au_fbend_dir(file);
++	/* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
++	for (; !err && cookie->bindex <= bend;
++	     cookie->bindex++, cookie->h_pos = 0) {
++		h_file = au_hf_dir(file, cookie->bindex);
++		if (!h_file)
++			continue;
++
++		au_fclr_rdu(cookie->flags, FULL);
++		err = au_rdu_do(h_file, &arg);
++		AuTraceErr(err);
++		if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
++			break;
++	}
++	AuDbg("rent %llu\n", rdu->rent);
++
++	if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
++		rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
++		au_fset_rdu(cookie->flags, CONT);
++		cookie->generation = au_figen(file);
++	}
++
++	ii_read_lock_child(inode);
++	fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
++	ii_read_unlock(inode);
++
++out_unlock:
++	fi_read_unlock(file);
++out_si:
++	si_read_unlock(arg.sb);
++out_mtx:
++	mutex_unlock(&inode->i_mutex);
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
++{
++	int err;
++	ino_t ino;
++	unsigned long long nent;
++	union au_rdu_ent_ul *u;
++	struct au_rdu_ent ent;
++	struct super_block *sb;
++
++	err = 0;
++	nent = rdu->nent;
++	u = &rdu->ent;
++	sb = file->f_dentry->d_sb;
++	si_read_lock(sb, AuLock_FLUSH);
++	while (nent-- > 0) {
++		/* unnecessary to support mmap_sem since this is a dir */
++		err = copy_from_user(&ent, u->e, sizeof(ent));
++		if (!err)
++			err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
++		if (unlikely(err)) {
++			err = -EFAULT;
++			AuTraceErr(err);
++			break;
++		}
++
++		/* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
++		if (!ent.wh)
++			err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
++		else
++			err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
++					&ino);
++		if (unlikely(err)) {
++			AuTraceErr(err);
++			break;
++		}
++
++		err = __put_user(ino, &u->e->ino);
++		if (unlikely(err)) {
++			err = -EFAULT;
++			AuTraceErr(err);
++			break;
++		}
++		u->ul += au_rdu_len(ent.nlen);
++	}
++	si_read_unlock(sb);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_rdu_verify(struct aufs_rdu *rdu)
++{
++	AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
++	      "%llu, b%d, 0x%x, g%u}\n",
++	      rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
++	      rdu->blk,
++	      rdu->rent, rdu->shwh, rdu->full,
++	      rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
++	      rdu->cookie.generation);
++
++	if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
++		return 0;
++
++	AuDbg("%u:%u\n",
++	      rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
++	return -EINVAL;
++}
++
++long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	long err, e;
++	struct aufs_rdu rdu;
++	void __user *p = (void __user *)arg;
++
++	err = copy_from_user(&rdu, p, sizeof(rdu));
++	if (unlikely(err)) {
++		err = -EFAULT;
++		AuTraceErr(err);
++		goto out;
++	}
++	err = au_rdu_verify(&rdu);
++	if (unlikely(err))
++		goto out;
++
++	switch (cmd) {
++	case AUFS_CTL_RDU:
++		err = au_rdu(file, &rdu);
++		if (unlikely(err))
++			break;
++
++		e = copy_to_user(p, &rdu, sizeof(rdu));
++		if (unlikely(e)) {
++			err = -EFAULT;
++			AuTraceErr(err);
++		}
++		break;
++	case AUFS_CTL_RDU_INO:
++		err = au_rdu_ino(file, &rdu);
++		break;
++
++	default:
++		/* err = -ENOTTY; */
++		err = -EINVAL;
++	}
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++#ifdef CONFIG_COMPAT
++long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	long err, e;
++	struct aufs_rdu rdu;
++	void __user *p = compat_ptr(arg);
++
++	/* todo: get_user()? */
++	err = copy_from_user(&rdu, p, sizeof(rdu));
++	if (unlikely(err)) {
++		err = -EFAULT;
++		AuTraceErr(err);
++		goto out;
++	}
++	rdu.ent.e = compat_ptr(rdu.ent.ul);
++	err = au_rdu_verify(&rdu);
++	if (unlikely(err))
++		goto out;
++
++	switch (cmd) {
++	case AUFS_CTL_RDU:
++		err = au_rdu(file, &rdu);
++		if (unlikely(err))
++			break;
++
++		rdu.ent.ul = ptr_to_compat(rdu.ent.e);
++		rdu.tail.ul = ptr_to_compat(rdu.tail.e);
++		e = copy_to_user(p, &rdu, sizeof(rdu));
++		if (unlikely(e)) {
++			err = -EFAULT;
++			AuTraceErr(err);
++		}
++		break;
++	case AUFS_CTL_RDU_INO:
++		err = au_rdu_ino(file, &rdu);
++		break;
++
++	default:
++		/* err = -ENOTTY; */
++		err = -EINVAL;
++	}
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++#endif
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/rwsem.h	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,188 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * simple read-write semaphore wrappers
++ */
++
++#ifndef __AUFS_RWSEM_H__
++#define __AUFS_RWSEM_H__
++
++#ifdef __KERNEL__
++
++#include "debug.h"
++
++struct au_rwsem {
++	struct rw_semaphore	rwsem;
++#ifdef CONFIG_AUFS_DEBUG
++	/* just for debugging, not almighty counter */
++	atomic_t		rcnt, wcnt;
++#endif
++};
++
++#ifdef CONFIG_AUFS_DEBUG
++#define AuDbgCntInit(rw) do { \
++	atomic_set(&(rw)->rcnt, 0); \
++	atomic_set(&(rw)->wcnt, 0); \
++	smp_mb(); /* atomic set */ \
++} while (0)
++
++#define AuDbgRcntInc(rw)	atomic_inc(&(rw)->rcnt)
++#define AuDbgRcntDec(rw)	WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
++#define AuDbgWcntInc(rw)	atomic_inc(&(rw)->wcnt)
++#define AuDbgWcntDec(rw)	WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
++#else
++#define AuDbgCntInit(rw)	do {} while (0)
++#define AuDbgRcntInc(rw)	do {} while (0)
++#define AuDbgRcntDec(rw)	do {} while (0)
++#define AuDbgWcntInc(rw)	do {} while (0)
++#define AuDbgWcntDec(rw)	do {} while (0)
++#endif /* CONFIG_AUFS_DEBUG */
++
++/* to debug easier, do not make them inlined functions */
++#define AuRwMustNoWaiters(rw)	AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
++/* rwsem_is_locked() is unusable */
++#define AuRwMustReadLock(rw)	AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
++#define AuRwMustWriteLock(rw)	AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
++#define AuRwMustAnyLock(rw)	AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
++					&& atomic_read(&(rw)->wcnt) <= 0)
++#define AuRwDestroy(rw)		AuDebugOn(atomic_read(&(rw)->rcnt) \
++					|| atomic_read(&(rw)->wcnt))
++
++#define au_rw_class(rw, key)	lockdep_set_class(&(rw)->rwsem, key)
++
++static inline void au_rw_init(struct au_rwsem *rw)
++{
++	AuDbgCntInit(rw);
++	init_rwsem(&rw->rwsem);
++}
++
++static inline void au_rw_init_wlock(struct au_rwsem *rw)
++{
++	au_rw_init(rw);
++	down_write(&rw->rwsem);
++	AuDbgWcntInc(rw);
++}
++
++static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
++					   unsigned int lsc)
++{
++	au_rw_init(rw);
++	down_write_nested(&rw->rwsem, lsc);
++	AuDbgWcntInc(rw);
++}
++
++static inline void au_rw_read_lock(struct au_rwsem *rw)
++{
++	down_read(&rw->rwsem);
++	AuDbgRcntInc(rw);
++}
++
++static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
++{
++	down_read_nested(&rw->rwsem, lsc);
++	AuDbgRcntInc(rw);
++}
++
++static inline void au_rw_read_unlock(struct au_rwsem *rw)
++{
++	AuRwMustReadLock(rw);
++	AuDbgRcntDec(rw);
++	up_read(&rw->rwsem);
++}
++
++static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
++{
++	AuRwMustWriteLock(rw);
++	AuDbgRcntInc(rw);
++	AuDbgWcntDec(rw);
++	downgrade_write(&rw->rwsem);
++}
++
++static inline void au_rw_write_lock(struct au_rwsem *rw)
++{
++	down_write(&rw->rwsem);
++	AuDbgWcntInc(rw);
++}
++
++static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
++					   unsigned int lsc)
++{
++	down_write_nested(&rw->rwsem, lsc);
++	AuDbgWcntInc(rw);
++}
++
++static inline void au_rw_write_unlock(struct au_rwsem *rw)
++{
++	AuRwMustWriteLock(rw);
++	AuDbgWcntDec(rw);
++	up_write(&rw->rwsem);
++}
++
++/* why is not _nested version defined */
++static inline int au_rw_read_trylock(struct au_rwsem *rw)
++{
++	int ret = down_read_trylock(&rw->rwsem);
++	if (ret)
++		AuDbgRcntInc(rw);
++	return ret;
++}
++
++static inline int au_rw_write_trylock(struct au_rwsem *rw)
++{
++	int ret = down_write_trylock(&rw->rwsem);
++	if (ret)
++		AuDbgWcntInc(rw);
++	return ret;
++}
++
++#undef AuDbgCntInit
++#undef AuDbgRcntInc
++#undef AuDbgRcntDec
++#undef AuDbgWcntInc
++#undef AuDbgWcntDec
++
++#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
++static inline void prefix##_read_lock(param) \
++{ au_rw_read_lock(rwsem); } \
++static inline void prefix##_write_lock(param) \
++{ au_rw_write_lock(rwsem); } \
++static inline int prefix##_read_trylock(param) \
++{ return au_rw_read_trylock(rwsem); } \
++static inline int prefix##_write_trylock(param) \
++{ return au_rw_write_trylock(rwsem); }
++/* why is not _nested version defined */
++/* static inline void prefix##_read_trylock_nested(param, lsc)
++{ au_rw_read_trylock_nested(rwsem, lsc)); }
++static inline void prefix##_write_trylock_nestd(param, lsc)
++{ au_rw_write_trylock_nested(rwsem, lsc); } */
++
++#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
++static inline void prefix##_read_unlock(param) \
++{ au_rw_read_unlock(rwsem); } \
++static inline void prefix##_write_unlock(param) \
++{ au_rw_write_unlock(rwsem); } \
++static inline void prefix##_downgrade_lock(param) \
++{ au_rw_dgrade_lock(rwsem); }
++
++#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
++	AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
++	AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_RWSEM_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/sbinfo.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,343 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * superblock private data
++ */
++
++#include "aufs.h"
++
++/*
++ * they are necessary regardless sysfs is disabled.
++ */
++void au_si_free(struct kobject *kobj)
++{
++	struct au_sbinfo *sbinfo;
++	char *locked __maybe_unused; /* debug only */
++
++	sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
++	AuDebugOn(!list_empty(&sbinfo->si_plink.head));
++	AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
++
++	au_rw_write_lock(&sbinfo->si_rwsem);
++	au_br_free(sbinfo);
++	au_rw_write_unlock(&sbinfo->si_rwsem);
++
++	AuDebugOn(radix_tree_gang_lookup
++		  (&sbinfo->au_si_pid.tree, (void **)&locked,
++		   /*first_index*/PID_MAX_DEFAULT - 1,
++		   /*max_items*/sizeof(locked)/sizeof(*locked)));
++
++	kfree(sbinfo->si_branch);
++	kfree(sbinfo->au_si_pid.bitmap);
++	mutex_destroy(&sbinfo->si_xib_mtx);
++	AuRwDestroy(&sbinfo->si_rwsem);
++
++	kfree(sbinfo);
++}
++
++int au_si_alloc(struct super_block *sb)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++	static struct lock_class_key aufs_si;
++
++	err = -ENOMEM;
++	sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
++	if (unlikely(!sbinfo))
++		goto out;
++
++	BUILD_BUG_ON(sizeof(unsigned long) !=
++		     sizeof(*sbinfo->au_si_pid.bitmap));
++	sbinfo->au_si_pid.bitmap = kcalloc(BITS_TO_LONGS(PID_MAX_DEFAULT),
++					sizeof(*sbinfo->au_si_pid.bitmap),
++					GFP_NOFS);
++	if (unlikely(!sbinfo->au_si_pid.bitmap))
++		goto out_sbinfo;
++
++	/* will be reallocated separately */
++	sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
++	if (unlikely(!sbinfo->si_branch))
++		goto out_pidmap;
++
++	err = sysaufs_si_init(sbinfo);
++	if (unlikely(err))
++		goto out_br;
++
++	au_nwt_init(&sbinfo->si_nowait);
++	au_rw_init_wlock(&sbinfo->si_rwsem);
++	au_rw_class(&sbinfo->si_rwsem, &aufs_si);
++	spin_lock_init(&sbinfo->au_si_pid.tree_lock);
++	INIT_RADIX_TREE(&sbinfo->au_si_pid.tree, GFP_ATOMIC | __GFP_NOFAIL);
++
++	atomic_long_set(&sbinfo->si_ninodes, 0);
++	atomic_long_set(&sbinfo->si_nfiles, 0);
++
++	sbinfo->si_bend = -1;
++
++	sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
++	sbinfo->si_wbr_create = AuWbrCreate_Def;
++	sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
++	sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
++
++	sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
++
++	mutex_init(&sbinfo->si_xib_mtx);
++	sbinfo->si_xino_brid = -1;
++	/* leave si_xib_last_pindex and si_xib_next_bit */
++
++	sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
++	sbinfo->si_rdblk = AUFS_RDBLK_DEF;
++	sbinfo->si_rdhash = AUFS_RDHASH_DEF;
++	sbinfo->si_dirwh = AUFS_DIRWH_DEF;
++
++	au_spl_init(&sbinfo->si_plink);
++	init_waitqueue_head(&sbinfo->si_plink_wq);
++	spin_lock_init(&sbinfo->si_plink_maint_lock);
++
++	/* leave other members for sysaufs and si_mnt. */
++	sbinfo->si_sb = sb;
++	sb->s_fs_info = sbinfo;
++	si_pid_set(sb);
++	au_debug_sbinfo_init(sbinfo);
++	return 0; /* success */
++
++out_br:
++	kfree(sbinfo->si_branch);
++out_pidmap:
++	kfree(sbinfo->au_si_pid.bitmap);
++out_sbinfo:
++	kfree(sbinfo);
++out:
++	return err;
++}
++
++int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
++{
++	int err, sz;
++	struct au_branch **brp;
++
++	AuRwMustWriteLock(&sbinfo->si_rwsem);
++
++	err = -ENOMEM;
++	sz = sizeof(*brp) * (sbinfo->si_bend + 1);
++	if (unlikely(!sz))
++		sz = sizeof(*brp);
++	brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
++	if (brp) {
++		sbinfo->si_branch = brp;
++		err = 0;
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++unsigned int au_sigen_inc(struct super_block *sb)
++{
++	unsigned int gen;
++
++	SiMustWriteLock(sb);
++
++	gen = ++au_sbi(sb)->si_generation;
++	au_update_digen(sb->s_root);
++	au_update_iigen(sb->s_root->d_inode);
++	sb->s_root->d_inode->i_version++;
++	return gen;
++}
++
++aufs_bindex_t au_new_br_id(struct super_block *sb)
++{
++	aufs_bindex_t br_id;
++	int i;
++	struct au_sbinfo *sbinfo;
++
++	SiMustWriteLock(sb);
++
++	sbinfo = au_sbi(sb);
++	for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
++		br_id = ++sbinfo->si_last_br_id;
++		AuDebugOn(br_id < 0);
++		if (br_id && au_br_index(sb, br_id) < 0)
++			return br_id;
++	}
++
++	return -1;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* it is ok that new 'nwt' tasks are appended while we are sleeping */
++int si_read_lock(struct super_block *sb, int flags)
++{
++	int err;
++
++	err = 0;
++	if (au_ftest_lock(flags, FLUSH))
++		au_nwt_flush(&au_sbi(sb)->si_nowait);
++
++	si_noflush_read_lock(sb);
++	err = au_plink_maint(sb, flags);
++	if (unlikely(err))
++		si_read_unlock(sb);
++
++	return err;
++}
++
++int si_write_lock(struct super_block *sb, int flags)
++{
++	int err;
++
++	if (au_ftest_lock(flags, FLUSH))
++		au_nwt_flush(&au_sbi(sb)->si_nowait);
++
++	si_noflush_write_lock(sb);
++	err = au_plink_maint(sb, flags);
++	if (unlikely(err))
++		si_write_unlock(sb);
++
++	return err;
++}
++
++/* dentry and super_block lock. call at entry point */
++int aufs_read_lock(struct dentry *dentry, int flags)
++{
++	int err;
++	struct super_block *sb;
++
++	sb = dentry->d_sb;
++	err = si_read_lock(sb, flags);
++	if (unlikely(err))
++		goto out;
++
++	if (au_ftest_lock(flags, DW))
++		di_write_lock_child(dentry);
++	else
++		di_read_lock_child(dentry, flags);
++
++	if (au_ftest_lock(flags, GEN)) {
++		err = au_digen_test(dentry, au_sigen(sb));
++		AuDebugOn(!err && au_dbrange_test(dentry));
++		if (unlikely(err))
++			aufs_read_unlock(dentry, flags);
++	}
++
++out:
++	return err;
++}
++
++void aufs_read_unlock(struct dentry *dentry, int flags)
++{
++	if (au_ftest_lock(flags, DW))
++		di_write_unlock(dentry);
++	else
++		di_read_unlock(dentry, flags);
++	si_read_unlock(dentry->d_sb);
++}
++
++void aufs_write_lock(struct dentry *dentry)
++{
++	si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
++	di_write_lock_child(dentry);
++}
++
++void aufs_write_unlock(struct dentry *dentry)
++{
++	di_write_unlock(dentry);
++	si_write_unlock(dentry->d_sb);
++}
++
++int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
++{
++	int err;
++	unsigned int sigen;
++	struct super_block *sb;
++
++	sb = d1->d_sb;
++	err = si_read_lock(sb, flags);
++	if (unlikely(err))
++		goto out;
++
++	di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIR));
++
++	if (au_ftest_lock(flags, GEN)) {
++		sigen = au_sigen(sb);
++		err = au_digen_test(d1, sigen);
++		AuDebugOn(!err && au_dbrange_test(d1));
++		if (!err) {
++			err = au_digen_test(d2, sigen);
++			AuDebugOn(!err && au_dbrange_test(d2));
++		}
++		if (unlikely(err))
++			aufs_read_and_write_unlock2(d1, d2);
++	}
++
++out:
++	return err;
++}
++
++void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
++{
++	di_write_unlock2(d1, d2);
++	si_read_unlock(d1->d_sb);
++}
++
++/* ---------------------------------------------------------------------- */
++
++int si_pid_test_slow(struct super_block *sb)
++{
++	void *p;
++
++	rcu_read_lock();
++	p = radix_tree_lookup(&au_sbi(sb)->au_si_pid.tree, current->pid);
++	rcu_read_unlock();
++
++	return (long)!!p;
++}
++
++void si_pid_set_slow(struct super_block *sb)
++{
++	int err;
++	struct au_sbinfo *sbinfo;
++
++	AuDebugOn(si_pid_test_slow(sb));
++
++	sbinfo = au_sbi(sb);
++	err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
++	AuDebugOn(err);
++	spin_lock(&sbinfo->au_si_pid.tree_lock);
++	err = radix_tree_insert(&sbinfo->au_si_pid.tree, current->pid,
++				/*any valid ptr*/sb);
++	spin_unlock(&sbinfo->au_si_pid.tree_lock);
++	AuDebugOn(err);
++	radix_tree_preload_end();
++}
++
++void si_pid_clr_slow(struct super_block *sb)
++{
++	void *p;
++	struct au_sbinfo *sbinfo;
++
++	AuDebugOn(!si_pid_test_slow(sb));
++
++	sbinfo = au_sbi(sb);
++	spin_lock(&sbinfo->au_si_pid.tree_lock);
++	p = radix_tree_delete(&sbinfo->au_si_pid.tree, current->pid);
++	spin_unlock(&sbinfo->au_si_pid.tree_lock);
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/spl.h	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * simple list protected by a spinlock
++ */
++
++#ifndef __AUFS_SPL_H__
++#define __AUFS_SPL_H__
++
++#ifdef __KERNEL__
++
++struct au_splhead {
++	spinlock_t		spin;
++	struct list_head	head;
++};
++
++static inline void au_spl_init(struct au_splhead *spl)
++{
++	spin_lock_init(&spl->spin);
++	INIT_LIST_HEAD(&spl->head);
++}
++
++static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
++{
++	spin_lock(&spl->spin);
++	list_add(list, &spl->head);
++	spin_unlock(&spl->spin);
++}
++
++static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
++{
++	spin_lock(&spl->spin);
++	list_del(list);
++	spin_unlock(&spl->spin);
++}
++
++static inline void au_spl_del_rcu(struct list_head *list,
++				  struct au_splhead *spl)
++{
++	spin_lock(&spl->spin);
++	list_del_rcu(list);
++	spin_unlock(&spl->spin);
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_SPL_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/super.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,936 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * mount and super_block operations
++ */
++
++#include <linux/mm.h>
++#include <linux/module.h>
++#include <linux/seq_file.h>
++#include <linux/statfs.h>
++#include <linux/vmalloc.h>
++#include <linux/writeback.h>
++#include "aufs.h"
++
++/*
++ * super_operations
++ */
++static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
++{
++	struct au_icntnr *c;
++
++	c = au_cache_alloc_icntnr();
++	if (c) {
++		au_icntnr_init(c);
++		c->vfs_inode.i_version = 1; /* sigen(sb); */
++		c->iinfo.ii_hinode = NULL;
++		return &c->vfs_inode;
++	}
++	return NULL;
++}
++
++static void aufs_destroy_inode_cb(struct rcu_head *head)
++{
++	struct inode *inode = container_of(head, struct inode, i_rcu);
++
++	INIT_LIST_HEAD(&inode->i_dentry);
++	au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
++}
++
++static void aufs_destroy_inode(struct inode *inode)
++{
++	au_iinfo_fin(inode);
++	call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
++}
++
++struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
++{
++	struct inode *inode;
++	int err;
++
++	inode = iget_locked(sb, ino);
++	if (unlikely(!inode)) {
++		inode = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++	if (!(inode->i_state & I_NEW))
++		goto out;
++
++	err = au_xigen_new(inode);
++	if (!err)
++		err = au_iinfo_init(inode);
++	if (!err)
++		inode->i_version++;
++	else {
++		iget_failed(inode);
++		inode = ERR_PTR(err);
++	}
++
++out:
++	/* never return NULL */
++	AuDebugOn(!inode);
++	AuTraceErrPtr(inode);
++	return inode;
++}
++
++/* lock free root dinfo */
++static int au_show_brs(struct seq_file *seq, struct super_block *sb)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	struct path path;
++	struct au_hdentry *hdp;
++	struct au_branch *br;
++	char *perm;
++
++	err = 0;
++	bend = au_sbend(sb);
++	hdp = au_di(sb->s_root)->di_hdentry;
++	for (bindex = 0; !err && bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		path.mnt = br->br_mnt;
++		path.dentry = hdp[bindex].hd_dentry;
++		err = au_seq_path(seq, &path);
++		if (err > 0) {
++			perm = au_optstr_br_perm(br->br_perm);
++			if (perm) {
++				err = seq_printf(seq, "=%s", perm);
++				kfree(perm);
++				if (err == -1)
++					err = -E2BIG;
++			} else
++				err = -ENOMEM;
++		}
++		if (!err && bindex != bend)
++			err = seq_putc(seq, ':');
++	}
++
++	return err;
++}
++
++static void au_show_wbr_create(struct seq_file *m, int v,
++			       struct au_sbinfo *sbinfo)
++{
++	const char *pat;
++
++	AuRwMustAnyLock(&sbinfo->si_rwsem);
++
++	seq_printf(m, ",create=");
++	pat = au_optstr_wbr_create(v);
++	switch (v) {
++	case AuWbrCreate_TDP:
++	case AuWbrCreate_RR:
++	case AuWbrCreate_MFS:
++	case AuWbrCreate_PMFS:
++		seq_printf(m, pat);
++		break;
++	case AuWbrCreate_MFSV:
++		seq_printf(m, /*pat*/"mfs:%lu",
++			   jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
++			   / MSEC_PER_SEC);
++		break;
++	case AuWbrCreate_PMFSV:
++		seq_printf(m, /*pat*/"pmfs:%lu",
++			   jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
++			   / MSEC_PER_SEC);
++		break;
++	case AuWbrCreate_MFSRR:
++		seq_printf(m, /*pat*/"mfsrr:%llu",
++			   sbinfo->si_wbr_mfs.mfsrr_watermark);
++		break;
++	case AuWbrCreate_MFSRRV:
++		seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
++			   sbinfo->si_wbr_mfs.mfsrr_watermark,
++			   jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
++			   / MSEC_PER_SEC);
++		break;
++	}
++}
++
++static int au_show_xino(struct seq_file *seq, struct super_block *sb)
++{
++#ifdef CONFIG_SYSFS
++	return 0;
++#else
++	int err;
++	const int len = sizeof(AUFS_XINO_FNAME) - 1;
++	aufs_bindex_t bindex, brid;
++	struct qstr *name;
++	struct file *f;
++	struct dentry *d, *h_root;
++	struct au_hdentry *hdp;
++
++	AuRwMustAnyLock(&sbinfo->si_rwsem);
++
++	err = 0;
++	f = au_sbi(sb)->si_xib;
++	if (!f)
++		goto out;
++
++	/* stop printing the default xino path on the first writable branch */
++	h_root = NULL;
++	brid = au_xino_brid(sb);
++	if (brid >= 0) {
++		bindex = au_br_index(sb, brid);
++		hdp = au_di(sb->s_root)->di_hdentry;
++		h_root = hdp[0 + bindex].hd_dentry;
++	}
++	d = f->f_dentry;
++	name = &d->d_name;
++	/* safe ->d_parent because the file is unlinked */
++	if (d->d_parent == h_root
++	    && name->len == len
++	    && !memcmp(name->name, AUFS_XINO_FNAME, len))
++		goto out;
++
++	seq_puts(seq, ",xino=");
++	err = au_xino_path(seq, f);
++
++out:
++	return err;
++#endif
++}
++
++/* seq_file will re-call me in case of too long string */
++static int aufs_show_options(struct seq_file *m, struct dentry *dentry)
++{
++	int err;
++	unsigned int mnt_flags, v;
++	struct super_block *sb;
++	struct au_sbinfo *sbinfo;
++
++#define AuBool(name, str) do { \
++	v = au_opt_test(mnt_flags, name); \
++	if (v != au_opt_test(AuOpt_Def, name)) \
++		seq_printf(m, ",%s" #str, v ? "" : "no"); \
++} while (0)
++
++#define AuStr(name, str) do { \
++	v = mnt_flags & AuOptMask_##name; \
++	if (v != (AuOpt_Def & AuOptMask_##name)) \
++		seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
++} while (0)
++
++#define AuUInt(name, str, val) do { \
++	if (val != AUFS_##name##_DEF) \
++		seq_printf(m, "," #str "=%u", val); \
++} while (0)
++
++	/* lock free root dinfo */
++	sb = dentry->d_sb;
++	si_noflush_read_lock(sb);
++	sbinfo = au_sbi(sb);
++	seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
++
++	mnt_flags = au_mntflags(sb);
++	if (au_opt_test(mnt_flags, XINO)) {
++		err = au_show_xino(m, sb);
++		if (unlikely(err))
++			goto out;
++	} else
++		seq_puts(m, ",noxino");
++
++	AuBool(TRUNC_XINO, trunc_xino);
++	AuStr(UDBA, udba);
++	AuBool(SHWH, shwh);
++	AuBool(PLINK, plink);
++	AuBool(DIO, dio);
++	/* AuBool(DIRPERM1, dirperm1); */
++	/* AuBool(REFROF, refrof); */
++
++	v = sbinfo->si_wbr_create;
++	if (v != AuWbrCreate_Def)
++		au_show_wbr_create(m, v, sbinfo);
++
++	v = sbinfo->si_wbr_copyup;
++	if (v != AuWbrCopyup_Def)
++		seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
++
++	v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
++	if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
++		seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
++
++	AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
++
++	v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
++	AuUInt(RDCACHE, rdcache, v);
++
++	AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
++	AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
++
++	AuBool(SUM, sum);
++	/* AuBool(SUM_W, wsum); */
++	AuBool(WARN_PERM, warn_perm);
++	AuBool(VERBOSE, verbose);
++
++out:
++	/* be sure to print "br:" last */
++	if (!sysaufs_brs) {
++		seq_puts(m, ",br:");
++		au_show_brs(m, sb);
++	}
++	si_read_unlock(sb);
++	return 0;
++
++#undef AuBool
++#undef AuStr
++#undef AuUInt
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* sum mode which returns the summation for statfs(2) */
++
++static u64 au_add_till_max(u64 a, u64 b)
++{
++	u64 old;
++
++	old = a;
++	a += b;
++	if (old < a)
++		return a;
++	return ULLONG_MAX;
++}
++
++static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
++{
++	int err;
++	u64 blocks, bfree, bavail, files, ffree;
++	aufs_bindex_t bend, bindex, i;
++	unsigned char shared;
++	struct path h_path;
++	struct super_block *h_sb;
++
++	blocks = 0;
++	bfree = 0;
++	bavail = 0;
++	files = 0;
++	ffree = 0;
++
++	err = 0;
++	bend = au_sbend(sb);
++	for (bindex = bend; bindex >= 0; bindex--) {
++		h_path.mnt = au_sbr_mnt(sb, bindex);
++		h_sb = h_path.mnt->mnt_sb;
++		shared = 0;
++		for (i = bindex + 1; !shared && i <= bend; i++)
++			shared = (au_sbr_sb(sb, i) == h_sb);
++		if (shared)
++			continue;
++
++		/* sb->s_root for NFS is unreliable */
++		h_path.dentry = h_path.mnt->mnt_root;
++		err = vfs_statfs(&h_path, buf);
++		if (unlikely(err))
++			goto out;
++
++		blocks = au_add_till_max(blocks, buf->f_blocks);
++		bfree = au_add_till_max(bfree, buf->f_bfree);
++		bavail = au_add_till_max(bavail, buf->f_bavail);
++		files = au_add_till_max(files, buf->f_files);
++		ffree = au_add_till_max(ffree, buf->f_ffree);
++	}
++
++	buf->f_blocks = blocks;
++	buf->f_bfree = bfree;
++	buf->f_bavail = bavail;
++	buf->f_files = files;
++	buf->f_ffree = ffree;
++
++out:
++	return err;
++}
++
++static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
++{
++	int err;
++	struct path h_path;
++	struct super_block *sb;
++
++	/* lock free root dinfo */
++	sb = dentry->d_sb;
++	si_noflush_read_lock(sb);
++	if (!au_opt_test(au_mntflags(sb), SUM)) {
++		/* sb->s_root for NFS is unreliable */
++		h_path.mnt = au_sbr_mnt(sb, 0);
++		h_path.dentry = h_path.mnt->mnt_root;
++		err = vfs_statfs(&h_path, buf);
++	} else
++		err = au_statfs_sum(sb, buf);
++	si_read_unlock(sb);
++
++	if (!err) {
++		buf->f_type = AUFS_SUPER_MAGIC;
++		buf->f_namelen = AUFS_MAX_NAMELEN;
++		memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
++	}
++	/* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* final actions when unmounting a file system */
++static void aufs_put_super(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++
++	sbinfo = au_sbi(sb);
++	if (!sbinfo)
++		return;
++
++	dbgaufs_si_fin(sbinfo);
++	kobject_put(&sbinfo->si_kobj);
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_array_free(void *array)
++{
++	if (array) {
++		if (!is_vmalloc_addr(array))
++			kfree(array);
++		else
++			vfree(array);
++	}
++}
++
++void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg)
++{
++	void *array;
++	unsigned long long n;
++
++	array = NULL;
++	n = 0;
++	if (!*hint)
++		goto out;
++
++	if (*hint > ULLONG_MAX / sizeof(array)) {
++		array = ERR_PTR(-EMFILE);
++		pr_err("hint %llu\n", *hint);
++		goto out;
++	}
++
++	array = kmalloc(sizeof(array) * *hint, GFP_NOFS);
++	if (unlikely(!array))
++		array = vmalloc(sizeof(array) * *hint);
++	if (unlikely(!array)) {
++		array = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	n = cb(array, *hint, arg);
++	AuDebugOn(n > *hint);
++
++out:
++	*hint = n;
++	return array;
++}
++
++static unsigned long long au_iarray_cb(void *a,
++				       unsigned long long max __maybe_unused,
++				       void *arg)
++{
++	unsigned long long n;
++	struct inode **p, *inode;
++	struct list_head *head;
++
++	n = 0;
++	p = a;
++	head = arg;
++	spin_lock(&inode_sb_list_lock);
++	list_for_each_entry(inode, head, i_sb_list) {
++		if (!is_bad_inode(inode)
++		    && au_ii(inode)->ii_bstart >= 0) {
++			spin_lock(&inode->i_lock);
++			if (atomic_read(&inode->i_count)) {
++				au_igrab(inode);
++				*p++ = inode;
++				n++;
++				AuDebugOn(n > max);
++			}
++			spin_unlock(&inode->i_lock);
++		}
++	}
++	spin_unlock(&inode_sb_list_lock);
++
++	return n;
++}
++
++struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
++{
++	*max = atomic_long_read(&au_sbi(sb)->si_ninodes);
++	return au_array_alloc(max, au_iarray_cb, &sb->s_inodes);
++}
++
++void au_iarray_free(struct inode **a, unsigned long long max)
++{
++	unsigned long long ull;
++
++	for (ull = 0; ull < max; ull++)
++		iput(a[ull]);
++	au_array_free(a);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * refresh dentry and inode at remount time.
++ */
++/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
++static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
++		      struct dentry *parent)
++{
++	int err;
++
++	di_write_lock_child(dentry);
++	di_read_lock_parent(parent, AuLock_IR);
++	err = au_refresh_dentry(dentry, parent);
++	if (!err && dir_flags)
++		au_hn_reset(dentry->d_inode, dir_flags);
++	di_read_unlock(parent, AuLock_IR);
++	di_write_unlock(dentry);
++
++	return err;
++}
++
++static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
++			   struct au_sbinfo *sbinfo,
++			   const unsigned int dir_flags)
++{
++	int err;
++	struct dentry *parent;
++	struct inode *inode;
++
++	err = 0;
++	parent = dget_parent(dentry);
++	if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
++		inode = dentry->d_inode;
++		if (inode) {
++			if (!S_ISDIR(inode->i_mode))
++				err = au_do_refresh(dentry, /*dir_flags*/0,
++						 parent);
++			else {
++				err = au_do_refresh(dentry, dir_flags, parent);
++				if (unlikely(err))
++					au_fset_si(sbinfo, FAILED_REFRESH_DIR);
++			}
++		} else
++			err = au_do_refresh(dentry, /*dir_flags*/0, parent);
++		AuDbgDentry(dentry);
++	}
++	dput(parent);
++
++	AuTraceErr(err);
++	return err;
++}
++
++static int au_refresh_d(struct super_block *sb)
++{
++	int err, i, j, ndentry, e;
++	unsigned int sigen;
++	struct au_dcsub_pages dpages;
++	struct au_dpage *dpage;
++	struct dentry **dentries, *d;
++	struct au_sbinfo *sbinfo;
++	struct dentry *root = sb->s_root;
++	const unsigned int dir_flags = au_hi_flags(root->d_inode, /*isdir*/1);
++
++	err = au_dpages_init(&dpages, GFP_NOFS);
++	if (unlikely(err))
++		goto out;
++	err = au_dcsub_pages(&dpages, root, NULL, NULL);
++	if (unlikely(err))
++		goto out_dpages;
++
++	sigen = au_sigen(sb);
++	sbinfo = au_sbi(sb);
++	for (i = 0; i < dpages.ndpage; i++) {
++		dpage = dpages.dpages + i;
++		dentries = dpage->dentries;
++		ndentry = dpage->ndentry;
++		for (j = 0; j < ndentry; j++) {
++			d = dentries[j];
++			e = au_do_refresh_d(d, sigen, sbinfo, dir_flags);
++			if (unlikely(e && !err))
++				err = e;
++			/* go on even err */
++		}
++	}
++
++out_dpages:
++	au_dpages_free(&dpages);
++out:
++	return err;
++}
++
++static int au_refresh_i(struct super_block *sb)
++{
++	int err, e;
++	unsigned int sigen;
++	unsigned long long max, ull;
++	struct inode *inode, **array;
++
++	array = au_iarray_alloc(sb, &max);
++	err = PTR_ERR(array);
++	if (IS_ERR(array))
++		goto out;
++
++	err = 0;
++	sigen = au_sigen(sb);
++	for (ull = 0; ull < max; ull++) {
++		inode = array[ull];
++		if (au_iigen(inode) != sigen) {
++			ii_write_lock_child(inode);
++			e = au_refresh_hinode_self(inode);
++			ii_write_unlock(inode);
++			if (unlikely(e)) {
++				pr_err("error %d, i%lu\n", e, inode->i_ino);
++				if (!err)
++					err = e;
++				/* go on even if err */
++			}
++		}
++	}
++
++	au_iarray_free(array, max);
++
++out:
++	return err;
++}
++
++static void au_remount_refresh(struct super_block *sb)
++{
++	int err, e;
++	unsigned int udba;
++	aufs_bindex_t bindex, bend;
++	struct dentry *root;
++	struct inode *inode;
++	struct au_branch *br;
++
++	au_sigen_inc(sb);
++	au_fclr_si(au_sbi(sb), FAILED_REFRESH_DIR);
++
++	root = sb->s_root;
++	DiMustNoWaiters(root);
++	inode = root->d_inode;
++	IiMustNoWaiters(inode);
++
++	udba = au_opt_udba(sb);
++	bend = au_sbend(sb);
++	for (bindex = 0; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		err = au_hnotify_reset_br(udba, br, br->br_perm);
++		if (unlikely(err))
++			AuIOErr("hnotify failed on br %d, %d, ignored\n",
++				bindex, err);
++		/* go on even if err */
++	}
++	au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
++
++	di_write_unlock(root);
++	err = au_refresh_d(sb);
++	e = au_refresh_i(sb);
++	if (unlikely(e && !err))
++		err = e;
++	/* aufs_write_lock() calls ..._child() */
++	di_write_lock_child(root);
++
++	au_cpup_attr_all(inode, /*force*/1);
++
++	if (unlikely(err))
++		AuIOErr("refresh failed, ignored, %d\n", err);
++}
++
++/* stop extra interpretation of errno in mount(8), and strange error messages */
++static int cvt_err(int err)
++{
++	AuTraceErr(err);
++
++	switch (err) {
++	case -ENOENT:
++	case -ENOTDIR:
++	case -EEXIST:
++	case -EIO:
++		err = -EINVAL;
++	}
++	return err;
++}
++
++static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
++{
++	int err, do_dx;
++	unsigned int mntflags;
++	struct au_opts opts;
++	struct dentry *root;
++	struct inode *inode;
++	struct au_sbinfo *sbinfo;
++
++	err = 0;
++	root = sb->s_root;
++	if (!data || !*data) {
++		err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++		if (!err) {
++			di_write_lock_child(root);
++			err = au_opts_verify(sb, *flags, /*pending*/0);
++			aufs_write_unlock(root);
++		}
++		goto out;
++	}
++
++	err = -ENOMEM;
++	memset(&opts, 0, sizeof(opts));
++	opts.opt = (void *)__get_free_page(GFP_NOFS);
++	if (unlikely(!opts.opt))
++		goto out;
++	opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
++	opts.flags = AuOpts_REMOUNT;
++	opts.sb_flags = *flags;
++
++	/* parse it before aufs lock */
++	err = au_opts_parse(sb, data, &opts);
++	if (unlikely(err))
++		goto out_opts;
++
++	sbinfo = au_sbi(sb);
++	inode = root->d_inode;
++	mutex_lock(&inode->i_mutex);
++	err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++	if (unlikely(err))
++		goto out_mtx;
++	di_write_lock_child(root);
++
++	/* au_opts_remount() may return an error */
++	err = au_opts_remount(sb, &opts);
++	au_opts_free(&opts);
++
++	if (au_ftest_opts(opts.flags, REFRESH))
++		au_remount_refresh(sb);
++
++	if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
++		mntflags = au_mntflags(sb);
++		do_dx = !!au_opt_test(mntflags, DIO);
++		au_dy_arefresh(do_dx);
++	}
++
++	aufs_write_unlock(root);
++
++out_mtx:
++	mutex_unlock(&inode->i_mutex);
++out_opts:
++	free_page((unsigned long)opts.opt);
++out:
++	err = cvt_err(err);
++	AuTraceErr(err);
++	return err;
++}
++
++static const struct super_operations aufs_sop = {
++	.alloc_inode	= aufs_alloc_inode,
++	.destroy_inode	= aufs_destroy_inode,
++	/* always deleting, no clearing */
++	.drop_inode	= generic_delete_inode,
++	.show_options	= aufs_show_options,
++	.statfs		= aufs_statfs,
++	.put_super	= aufs_put_super,
++	.remount_fs	= aufs_remount_fs
++};
++
++/* ---------------------------------------------------------------------- */
++
++static int alloc_root(struct super_block *sb)
++{
++	int err;
++	struct inode *inode;
++	struct dentry *root;
++
++	err = -ENOMEM;
++	inode = au_iget_locked(sb, AUFS_ROOT_INO);
++	err = PTR_ERR(inode);
++	if (IS_ERR(inode))
++		goto out;
++
++	inode->i_op = &aufs_dir_iop;
++	inode->i_fop = &aufs_dir_fop;
++	inode->i_mode = S_IFDIR;
++	set_nlink(inode, 2);
++	unlock_new_inode(inode);
++
++	root = d_alloc_root(inode);
++	if (unlikely(!root))
++		goto out_iput;
++	err = PTR_ERR(root);
++	if (IS_ERR(root))
++		goto out_iput;
++
++	err = au_di_init(root);
++	if (!err) {
++		sb->s_root = root;
++		return 0; /* success */
++	}
++	dput(root);
++	goto out; /* do not iput */
++
++out_iput:
++	iget_failed(inode);
++out:
++	return err;
++
++}
++
++static int aufs_fill_super(struct super_block *sb, void *raw_data,
++			   int silent __maybe_unused)
++{
++	int err;
++	struct au_opts opts;
++	struct dentry *root;
++	struct inode *inode;
++	char *arg = raw_data;
++
++	if (unlikely(!arg || !*arg)) {
++		err = -EINVAL;
++		pr_err("no arg\n");
++		goto out;
++	}
++
++	err = -ENOMEM;
++	memset(&opts, 0, sizeof(opts));
++	opts.opt = (void *)__get_free_page(GFP_NOFS);
++	if (unlikely(!opts.opt))
++		goto out;
++	opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
++	opts.sb_flags = sb->s_flags;
++
++	err = au_si_alloc(sb);
++	if (unlikely(err))
++		goto out_opts;
++
++	/* all timestamps always follow the ones on the branch */
++	sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
++	sb->s_op = &aufs_sop;
++	sb->s_d_op = &aufs_dop;
++	sb->s_magic = AUFS_SUPER_MAGIC;
++	sb->s_maxbytes = 0;
++	au_export_init(sb);
++
++	err = alloc_root(sb);
++	if (unlikely(err)) {
++		si_write_unlock(sb);
++		goto out_info;
++	}
++	root = sb->s_root;
++	inode = root->d_inode;
++
++	/*
++	 * actually we can parse options regardless aufs lock here.
++	 * but at remount time, parsing must be done before aufs lock.
++	 * so we follow the same rule.
++	 */
++	ii_write_lock_parent(inode);
++	aufs_write_unlock(root);
++	err = au_opts_parse(sb, arg, &opts);
++	if (unlikely(err))
++		goto out_root;
++
++	/* lock vfs_inode first, then aufs. */
++	mutex_lock(&inode->i_mutex);
++	aufs_write_lock(root);
++	err = au_opts_mount(sb, &opts);
++	au_opts_free(&opts);
++	aufs_write_unlock(root);
++	mutex_unlock(&inode->i_mutex);
++	if (!err)
++		goto out_opts; /* success */
++
++out_root:
++	dput(root);
++	sb->s_root = NULL;
++out_info:
++	dbgaufs_si_fin(au_sbi(sb));
++	kobject_put(&au_sbi(sb)->si_kobj);
++	sb->s_fs_info = NULL;
++out_opts:
++	free_page((unsigned long)opts.opt);
++out:
++	AuTraceErr(err);
++	err = cvt_err(err);
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
++				 const char *dev_name __maybe_unused,
++				 void *raw_data)
++{
++	struct dentry *root;
++	struct super_block *sb;
++
++	/* all timestamps always follow the ones on the branch */
++	/* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
++	root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
++	if (IS_ERR(root))
++		goto out;
++
++	sb = root->d_sb;
++	si_write_lock(sb, !AuLock_FLUSH);
++	sysaufs_brs_add(sb, 0);
++	si_write_unlock(sb);
++	au_sbilist_add(sb);
++
++out:
++	return root;
++}
++
++static void aufs_kill_sb(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++
++	sbinfo = au_sbi(sb);
++	if (sbinfo) {
++		au_sbilist_del(sb);
++		aufs_write_lock(sb->s_root);
++		if (sbinfo->si_wbr_create_ops->fin)
++			sbinfo->si_wbr_create_ops->fin(sb);
++		if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
++			au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
++			au_remount_refresh(sb);
++		}
++		if (au_opt_test(sbinfo->si_mntflags, PLINK))
++			au_plink_put(sb, /*verbose*/1);
++		au_xino_clr(sb);
++		sbinfo->si_sb = NULL;
++		aufs_write_unlock(sb->s_root);
++		au_nwt_flush(&sbinfo->si_nowait);
++	}
++	generic_shutdown_super(sb);
++}
++
++struct file_system_type aufs_fs_type = {
++	.name		= AUFS_FSTYPE,
++	.fs_flags	=
++		FS_RENAME_DOES_D_MOVE	/* a race between rename and others */
++		| FS_REVAL_DOT,		/* for NFS branch and udba */
++	.mount		= aufs_mount,
++	.kill_sb	= aufs_kill_sb,
++	/* no need to __module_get() and module_put(). */
++	.owner		= THIS_MODULE,
++};
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/super.h	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,546 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * super_block operations
++ */
++
++#ifndef __AUFS_SUPER_H__
++#define __AUFS_SUPER_H__
++
++#ifdef __KERNEL__
++
++#include <linux/fs.h>
++#include "rwsem.h"
++#include "spl.h"
++#include "wkq.h"
++
++typedef ssize_t (*au_readf_t)(struct file *, char __user *, size_t, loff_t *);
++typedef ssize_t (*au_writef_t)(struct file *, const char __user *, size_t,
++			       loff_t *);
++
++/* policies to select one among multiple writable branches */
++struct au_wbr_copyup_operations {
++	int (*copyup)(struct dentry *dentry);
++};
++
++struct au_wbr_create_operations {
++	int (*create)(struct dentry *dentry, int isdir);
++	int (*init)(struct super_block *sb);
++	int (*fin)(struct super_block *sb);
++};
++
++struct au_wbr_mfs {
++	struct mutex	mfs_lock; /* protect this structure */
++	unsigned long	mfs_jiffy;
++	unsigned long	mfs_expire;
++	aufs_bindex_t	mfs_bindex;
++
++	unsigned long long	mfsrr_bytes;
++	unsigned long long	mfsrr_watermark;
++};
++
++struct au_branch;
++struct au_sbinfo {
++	/* nowait tasks in the system-wide workqueue */
++	struct au_nowait_tasks	si_nowait;
++
++	/*
++	 * tried sb->s_umount, but failed due to the dependecy between i_mutex.
++	 * rwsem for au_sbinfo is necessary.
++	 */
++	struct au_rwsem		si_rwsem;
++
++	/* prevent recursive locking in deleting inode */
++	struct {
++		unsigned long		*bitmap;
++		spinlock_t		tree_lock;
++		struct radix_tree_root	tree;
++	} au_si_pid;
++
++	/*
++	 * dirty approach to protect sb->sb_inodes and ->s_files from remount.
++	 */
++	atomic_long_t		si_ninodes, si_nfiles;
++
++	/* branch management */
++	unsigned int		si_generation;
++
++	/* see above flags */
++	unsigned char		au_si_status;
++
++	aufs_bindex_t		si_bend;
++
++	/* dirty trick to keep br_id plus */
++	unsigned int		si_last_br_id :
++				sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
++	struct au_branch	**si_branch;
++
++	/* policy to select a writable branch */
++	unsigned char		si_wbr_copyup;
++	unsigned char		si_wbr_create;
++	struct au_wbr_copyup_operations *si_wbr_copyup_ops;
++	struct au_wbr_create_operations *si_wbr_create_ops;
++
++	/* round robin */
++	atomic_t		si_wbr_rr_next;
++
++	/* most free space */
++	struct au_wbr_mfs	si_wbr_mfs;
++
++	/* mount flags */
++	/* include/asm-ia64/siginfo.h defines a macro named si_flags */
++	unsigned int		si_mntflags;
++
++	/* external inode number (bitmap and translation table) */
++	au_readf_t		si_xread;
++	au_writef_t		si_xwrite;
++	struct file		*si_xib;
++	struct mutex		si_xib_mtx; /* protect xib members */
++	unsigned long		*si_xib_buf;
++	unsigned long		si_xib_last_pindex;
++	int			si_xib_next_bit;
++	aufs_bindex_t		si_xino_brid;
++	/* reserved for future use */
++	/* unsigned long long	si_xib_limit; */	/* Max xib file size */
++
++#ifdef CONFIG_AUFS_EXPORT
++	/* i_generation */
++	struct file		*si_xigen;
++	atomic_t		si_xigen_next;
++#endif
++
++	/* vdir parameters */
++	unsigned long		si_rdcache;	/* max cache time in jiffies */
++	unsigned int		si_rdblk;	/* deblk size */
++	unsigned int		si_rdhash;	/* hash size */
++
++	/*
++	 * If the number of whiteouts are larger than si_dirwh, leave all of
++	 * them after au_whtmp_ren to reduce the cost of rmdir(2).
++	 * future fsck.aufs or kernel thread will remove them later.
++	 * Otherwise, remove all whiteouts and the dir in rmdir(2).
++	 */
++	unsigned int		si_dirwh;
++
++	/*
++	 * rename(2) a directory with all children.
++	 */
++	/* reserved for future use */
++	/* int			si_rendir; */
++
++	/* pseudo_link list */
++	struct au_splhead	si_plink;
++	wait_queue_head_t	si_plink_wq;
++	spinlock_t		si_plink_maint_lock;
++	pid_t			si_plink_maint_pid;
++
++	/*
++	 * sysfs and lifetime management.
++	 * this is not a small structure and it may be a waste of memory in case
++	 * of sysfs is disabled, particulary when many aufs-es are mounted.
++	 * but using sysfs is majority.
++	 */
++	struct kobject		si_kobj;
++#ifdef CONFIG_DEBUG_FS
++	struct dentry		 *si_dbgaufs, *si_dbgaufs_xib;
++#ifdef CONFIG_AUFS_EXPORT
++	struct dentry		 *si_dbgaufs_xigen;
++#endif
++#endif
++
++#ifdef CONFIG_AUFS_SBILIST
++	struct list_head	si_list;
++#endif
++
++	/* dirty, necessary for unmounting, sysfs and sysrq */
++	struct super_block	*si_sb;
++};
++
++/* sbinfo status flags */
++/*
++ * set true when refresh_dirs() failed at remount time.
++ * then try refreshing dirs at access time again.
++ * if it is false, refreshing dirs at access time is unnecesary
++ */
++#define AuSi_FAILED_REFRESH_DIR	1
++static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
++					   unsigned int flag)
++{
++	AuRwMustAnyLock(&sbi->si_rwsem);
++	return sbi->au_si_status & flag;
++}
++#define au_ftest_si(sbinfo, name)	au_do_ftest_si(sbinfo, AuSi_##name)
++#define au_fset_si(sbinfo, name) do { \
++	AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
++	(sbinfo)->au_si_status |= AuSi_##name; \
++} while (0)
++#define au_fclr_si(sbinfo, name) do { \
++	AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
++	(sbinfo)->au_si_status &= ~AuSi_##name; \
++} while (0)
++
++/* ---------------------------------------------------------------------- */
++
++/* policy to select one among writable branches */
++#define AuWbrCopyup(sbinfo, ...) \
++	((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
++#define AuWbrCreate(sbinfo, ...) \
++	((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
++
++/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
++#define AuLock_DW		1		/* write-lock dentry */
++#define AuLock_IR		(1 << 1)	/* read-lock inode */
++#define AuLock_IW		(1 << 2)	/* write-lock inode */
++#define AuLock_FLUSH		(1 << 3)	/* wait for 'nowait' tasks */
++#define AuLock_DIR		(1 << 4)	/* target is a dir */
++#define AuLock_NOPLM		(1 << 5)	/* return err in plm mode */
++#define AuLock_NOPLMW		(1 << 6)	/* wait for plm mode ends */
++#define AuLock_GEN		(1 << 7)	/* test digen/iigen */
++#define au_ftest_lock(flags, name)	((flags) & AuLock_##name)
++#define au_fset_lock(flags, name) \
++	do { (flags) |= AuLock_##name; } while (0)
++#define au_fclr_lock(flags, name) \
++	do { (flags) &= ~AuLock_##name; } while (0)
++
++/* ---------------------------------------------------------------------- */
++
++/* super.c */
++extern struct file_system_type aufs_fs_type;
++struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
++typedef unsigned long long (*au_arraycb_t)(void *array, unsigned long long max,
++					   void *arg);
++void au_array_free(void *array);
++void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg);
++struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
++void au_iarray_free(struct inode **a, unsigned long long max);
++
++/* sbinfo.c */
++void au_si_free(struct kobject *kobj);
++int au_si_alloc(struct super_block *sb);
++int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr);
++
++unsigned int au_sigen_inc(struct super_block *sb);
++aufs_bindex_t au_new_br_id(struct super_block *sb);
++
++int si_read_lock(struct super_block *sb, int flags);
++int si_write_lock(struct super_block *sb, int flags);
++int aufs_read_lock(struct dentry *dentry, int flags);
++void aufs_read_unlock(struct dentry *dentry, int flags);
++void aufs_write_lock(struct dentry *dentry);
++void aufs_write_unlock(struct dentry *dentry);
++int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
++void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
++
++int si_pid_test_slow(struct super_block *sb);
++void si_pid_set_slow(struct super_block *sb);
++void si_pid_clr_slow(struct super_block *sb);
++
++/* wbr_policy.c */
++extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
++extern struct au_wbr_create_operations au_wbr_create_ops[];
++int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
++
++/* ---------------------------------------------------------------------- */
++
++static inline struct au_sbinfo *au_sbi(struct super_block *sb)
++{
++	return sb->s_fs_info;
++}
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_EXPORT
++void au_export_init(struct super_block *sb);
++
++static inline int au_test_nfsd(void)
++{
++	struct task_struct *tsk = current;
++
++	return (tsk->flags & PF_KTHREAD)
++		&& !strcmp(tsk->comm, "nfsd");
++}
++
++void au_xigen_inc(struct inode *inode);
++int au_xigen_new(struct inode *inode);
++int au_xigen_set(struct super_block *sb, struct file *base);
++void au_xigen_clr(struct super_block *sb);
++
++static inline int au_busy_or_stale(void)
++{
++	if (!au_test_nfsd())
++		return -EBUSY;
++	return -ESTALE;
++}
++#else
++AuStubVoid(au_export_init, struct super_block *sb)
++AuStubInt0(au_test_nfsd, void)
++AuStubVoid(au_xigen_inc, struct inode *inode)
++AuStubInt0(au_xigen_new, struct inode *inode)
++AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
++AuStubVoid(au_xigen_clr, struct super_block *sb)
++static inline int au_busy_or_stale(void)
++{
++	return -EBUSY;
++}
++#endif /* CONFIG_AUFS_EXPORT */
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_SBILIST
++/* module.c */
++extern struct au_splhead au_sbilist;
++
++static inline void au_sbilist_init(void)
++{
++	au_spl_init(&au_sbilist);
++}
++
++static inline void au_sbilist_add(struct super_block *sb)
++{
++	au_spl_add(&au_sbi(sb)->si_list, &au_sbilist);
++}
++
++static inline void au_sbilist_del(struct super_block *sb)
++{
++	au_spl_del(&au_sbi(sb)->si_list, &au_sbilist);
++}
++
++#ifdef CONFIG_AUFS_MAGIC_SYSRQ
++static inline void au_sbilist_lock(void)
++{
++	spin_lock(&au_sbilist.spin);
++}
++
++static inline void au_sbilist_unlock(void)
++{
++	spin_unlock(&au_sbilist.spin);
++}
++#define AuGFP_SBILIST	GFP_ATOMIC
++#else
++AuStubVoid(au_sbilist_lock, void)
++AuStubVoid(au_sbilist_unlock, void)
++#define AuGFP_SBILIST	GFP_NOFS
++#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
++#else
++AuStubVoid(au_sbilist_init, void)
++AuStubVoid(au_sbilist_add, struct super_block*)
++AuStubVoid(au_sbilist_del, struct super_block*)
++AuStubVoid(au_sbilist_lock, void)
++AuStubVoid(au_sbilist_unlock, void)
++#define AuGFP_SBILIST	GFP_NOFS
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
++{
++	/*
++	 * This function is a dynamic '__init' fucntion actually,
++	 * so the tiny check for si_rwsem is unnecessary.
++	 */
++	/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
++#ifdef CONFIG_DEBUG_FS
++	sbinfo->si_dbgaufs = NULL;
++	sbinfo->si_dbgaufs_xib = NULL;
++#ifdef CONFIG_AUFS_EXPORT
++	sbinfo->si_dbgaufs_xigen = NULL;
++#endif
++#endif
++}
++
++/* ---------------------------------------------------------------------- */
++
++static inline pid_t si_pid_bit(void)
++{
++	/* the origin of pid is 1, but the bitmap's is 0 */
++	return current->pid - 1;
++}
++
++static inline int si_pid_test(struct super_block *sb)
++{
++	pid_t bit = si_pid_bit();
++	if (bit < PID_MAX_DEFAULT)
++		return test_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
++	else
++		return si_pid_test_slow(sb);
++}
++
++static inline void si_pid_set(struct super_block *sb)
++{
++	pid_t bit = si_pid_bit();
++	if (bit < PID_MAX_DEFAULT) {
++		AuDebugOn(test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
++		set_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
++		/* smp_mb(); */
++	} else
++		si_pid_set_slow(sb);
++}
++
++static inline void si_pid_clr(struct super_block *sb)
++{
++	pid_t bit = si_pid_bit();
++	if (bit < PID_MAX_DEFAULT) {
++		AuDebugOn(!test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
++		clear_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
++		/* smp_mb(); */
++	} else
++		si_pid_clr_slow(sb);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* lock superblock. mainly for entry point functions */
++/*
++ * __si_read_lock, __si_write_lock,
++ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
++ */
++AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
++
++#define SiMustNoWaiters(sb)	AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
++#define SiMustAnyLock(sb)	AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
++#define SiMustWriteLock(sb)	AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
++
++static inline void si_noflush_read_lock(struct super_block *sb)
++{
++	__si_read_lock(sb);
++	si_pid_set(sb);
++}
++
++static inline int si_noflush_read_trylock(struct super_block *sb)
++{
++	int locked = __si_read_trylock(sb);
++	if (locked)
++		si_pid_set(sb);
++	return locked;
++}
++
++static inline void si_noflush_write_lock(struct super_block *sb)
++{
++	__si_write_lock(sb);
++	si_pid_set(sb);
++}
++
++static inline int si_noflush_write_trylock(struct super_block *sb)
++{
++	int locked = __si_write_trylock(sb);
++	if (locked)
++		si_pid_set(sb);
++	return locked;
++}
++
++#if 0 /* unused */
++static inline int si_read_trylock(struct super_block *sb, int flags)
++{
++	if (au_ftest_lock(flags, FLUSH))
++		au_nwt_flush(&au_sbi(sb)->si_nowait);
++	return si_noflush_read_trylock(sb);
++}
++#endif
++
++static inline void si_read_unlock(struct super_block *sb)
++{
++	si_pid_clr(sb);
++	__si_read_unlock(sb);
++}
++
++#if 0 /* unused */
++static inline int si_write_trylock(struct super_block *sb, int flags)
++{
++	if (au_ftest_lock(flags, FLUSH))
++		au_nwt_flush(&au_sbi(sb)->si_nowait);
++	return si_noflush_write_trylock(sb);
++}
++#endif
++
++static inline void si_write_unlock(struct super_block *sb)
++{
++	si_pid_clr(sb);
++	__si_write_unlock(sb);
++}
++
++#if 0 /* unused */
++static inline void si_downgrade_lock(struct super_block *sb)
++{
++	__si_downgrade_lock(sb);
++}
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++static inline aufs_bindex_t au_sbend(struct super_block *sb)
++{
++	SiMustAnyLock(sb);
++	return au_sbi(sb)->si_bend;
++}
++
++static inline unsigned int au_mntflags(struct super_block *sb)
++{
++	SiMustAnyLock(sb);
++	return au_sbi(sb)->si_mntflags;
++}
++
++static inline unsigned int au_sigen(struct super_block *sb)
++{
++	SiMustAnyLock(sb);
++	return au_sbi(sb)->si_generation;
++}
++
++static inline void au_ninodes_inc(struct super_block *sb)
++{
++	atomic_long_inc(&au_sbi(sb)->si_ninodes);
++}
++
++static inline void au_ninodes_dec(struct super_block *sb)
++{
++	AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_ninodes));
++	atomic_long_dec(&au_sbi(sb)->si_ninodes);
++}
++
++static inline void au_nfiles_inc(struct super_block *sb)
++{
++	atomic_long_inc(&au_sbi(sb)->si_nfiles);
++}
++
++static inline void au_nfiles_dec(struct super_block *sb)
++{
++	AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_nfiles));
++	atomic_long_dec(&au_sbi(sb)->si_nfiles);
++}
++
++static inline struct au_branch *au_sbr(struct super_block *sb,
++				       aufs_bindex_t bindex)
++{
++	SiMustAnyLock(sb);
++	return au_sbi(sb)->si_branch[0 + bindex];
++}
++
++static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
++{
++	SiMustWriteLock(sb);
++	au_sbi(sb)->si_xino_brid = brid;
++}
++
++static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
++{
++	SiMustAnyLock(sb);
++	return au_sbi(sb)->si_xino_brid;
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_SUPER_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/sysaufs.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,105 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * sysfs interface and lifetime management
++ * they are necessary regardless sysfs is disabled.
++ */
++
++#include <linux/random.h>
++#include "aufs.h"
++
++unsigned long sysaufs_si_mask;
++struct kset *sysaufs_kset;
++
++#define AuSiAttr(_name) { \
++	.attr   = { .name = __stringify(_name), .mode = 0444 },	\
++	.show   = sysaufs_si_##_name,				\
++}
++
++static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
++struct attribute *sysaufs_si_attrs[] = {
++	&sysaufs_si_attr_xi_path.attr,
++	NULL,
++};
++
++static const struct sysfs_ops au_sbi_ops = {
++	.show   = sysaufs_si_show
++};
++
++static struct kobj_type au_sbi_ktype = {
++	.release	= au_si_free,
++	.sysfs_ops	= &au_sbi_ops,
++	.default_attrs	= sysaufs_si_attrs
++};
++
++/* ---------------------------------------------------------------------- */
++
++int sysaufs_si_init(struct au_sbinfo *sbinfo)
++{
++	int err;
++
++	sbinfo->si_kobj.kset = sysaufs_kset;
++	/* cf. sysaufs_name() */
++	err = kobject_init_and_add
++		(&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
++		 SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
++
++	dbgaufs_si_null(sbinfo);
++	if (!err) {
++		err = dbgaufs_si_init(sbinfo);
++		if (unlikely(err))
++			kobject_put(&sbinfo->si_kobj);
++	}
++	return err;
++}
++
++void sysaufs_fin(void)
++{
++	dbgaufs_fin();
++	sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
++	kset_unregister(sysaufs_kset);
++}
++
++int __init sysaufs_init(void)
++{
++	int err;
++
++	do {
++		get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
++	} while (!sysaufs_si_mask);
++
++	err = -EINVAL;
++	sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
++	if (unlikely(!sysaufs_kset))
++		goto out;
++	err = PTR_ERR(sysaufs_kset);
++	if (IS_ERR(sysaufs_kset))
++		goto out;
++	err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
++	if (unlikely(err)) {
++		kset_unregister(sysaufs_kset);
++		goto out;
++	}
++
++	err = dbgaufs_init();
++	if (unlikely(err))
++		sysaufs_fin();
++out:
++	return err;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/sysaufs.h	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,104 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * sysfs interface and mount lifetime management
++ */
++
++#ifndef __SYSAUFS_H__
++#define __SYSAUFS_H__
++
++#ifdef __KERNEL__
++
++#include <linux/sysfs.h>
++#include "module.h"
++
++struct super_block;
++struct au_sbinfo;
++
++struct sysaufs_si_attr {
++	struct attribute attr;
++	int (*show)(struct seq_file *seq, struct super_block *sb);
++};
++
++/* ---------------------------------------------------------------------- */
++
++/* sysaufs.c */
++extern unsigned long sysaufs_si_mask;
++extern struct kset *sysaufs_kset;
++extern struct attribute *sysaufs_si_attrs[];
++int sysaufs_si_init(struct au_sbinfo *sbinfo);
++int __init sysaufs_init(void);
++void sysaufs_fin(void);
++
++/* ---------------------------------------------------------------------- */
++
++/* some people doesn't like to show a pointer in kernel */
++static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
++{
++	return sysaufs_si_mask ^ (unsigned long)sbinfo;
++}
++
++#define SysaufsSiNamePrefix	"si_"
++#define SysaufsSiNameLen	(sizeof(SysaufsSiNamePrefix) + 16)
++static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
++{
++	snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
++		 sysaufs_si_id(sbinfo));
++}
++
++struct au_branch;
++#ifdef CONFIG_SYSFS
++/* sysfs.c */
++extern struct attribute_group *sysaufs_attr_group;
++
++int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
++ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
++			 char *buf);
++
++void sysaufs_br_init(struct au_branch *br);
++void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
++void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
++
++#define sysaufs_brs_init()	do {} while (0)
++
++#else
++#define sysaufs_attr_group	NULL
++
++AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
++
++static inline
++ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
++			 char *buf)
++{
++	return 0;
++}
++
++AuStubVoid(sysaufs_br_init, struct au_branch *br)
++AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
++AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
++
++static inline void sysaufs_brs_init(void)
++{
++	sysaufs_brs = 0;
++}
++
++#endif /* CONFIG_SYSFS */
++
++#endif /* __KERNEL__ */
++#endif /* __SYSAUFS_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/sysfs.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,257 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * sysfs interface
++ */
++
++#include <linux/seq_file.h>
++#include "aufs.h"
++
++#ifdef CONFIG_AUFS_FS_MODULE
++/* this entry violates the "one line per file" policy of sysfs */
++static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
++			   char *buf)
++{
++	ssize_t err;
++	static char *conf =
++/* this file is generated at compiling */
++#include "conf.str"
++		;
++
++	err = snprintf(buf, PAGE_SIZE, conf);
++	if (unlikely(err >= PAGE_SIZE))
++		err = -EFBIG;
++	return err;
++}
++
++static struct kobj_attribute au_config_attr = __ATTR_RO(config);
++#endif
++
++static struct attribute *au_attr[] = {
++#ifdef CONFIG_AUFS_FS_MODULE
++	&au_config_attr.attr,
++#endif
++	NULL,	/* need to NULL terminate the list of attributes */
++};
++
++static struct attribute_group sysaufs_attr_group_body = {
++	.attrs = au_attr
++};
++
++struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
++
++/* ---------------------------------------------------------------------- */
++
++int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
++{
++	int err;
++
++	SiMustAnyLock(sb);
++
++	err = 0;
++	if (au_opt_test(au_mntflags(sb), XINO)) {
++		err = au_xino_path(seq, au_sbi(sb)->si_xib);
++		seq_putc(seq, '\n');
++	}
++	return err;
++}
++
++/*
++ * the lifetime of branch is independent from the entry under sysfs.
++ * sysfs handles the lifetime of the entry, and never call ->show() after it is
++ * unlinked.
++ */
++static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
++			 aufs_bindex_t bindex)
++{
++	int err;
++	struct path path;
++	struct dentry *root;
++	struct au_branch *br;
++	char *perm;
++
++	AuDbg("b%d\n", bindex);
++
++	err = 0;
++	root = sb->s_root;
++	di_read_lock_parent(root, !AuLock_IR);
++	br = au_sbr(sb, bindex);
++	path.mnt = br->br_mnt;
++	path.dentry = au_h_dptr(root, bindex);
++	au_seq_path(seq, &path);
++	di_read_unlock(root, !AuLock_IR);
++	perm = au_optstr_br_perm(br->br_perm);
++	if (perm) {
++		err = seq_printf(seq, "=%s\n", perm);
++		kfree(perm);
++		if (err == -1)
++			err = -E2BIG;
++	} else
++		err = -ENOMEM;
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static struct seq_file *au_seq(char *p, ssize_t len)
++{
++	struct seq_file *seq;
++
++	seq = kzalloc(sizeof(*seq), GFP_NOFS);
++	if (seq) {
++		/* mutex_init(&seq.lock); */
++		seq->buf = p;
++		seq->size = len;
++		return seq; /* success */
++	}
++
++	seq = ERR_PTR(-ENOMEM);
++	return seq;
++}
++
++#define SysaufsBr_PREFIX "br"
++
++/* todo: file size may exceed PAGE_SIZE */
++ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
++			char *buf)
++{
++	ssize_t err;
++	long l;
++	aufs_bindex_t bend;
++	struct au_sbinfo *sbinfo;
++	struct super_block *sb;
++	struct seq_file *seq;
++	char *name;
++	struct attribute **cattr;
++
++	sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
++	sb = sbinfo->si_sb;
++
++	/*
++	 * prevent a race condition between sysfs and aufs.
++	 * for instance, sysfs_file_read() calls sysfs_get_active_two() which
++	 * prohibits maintaining the sysfs entries.
++	 * hew we acquire read lock after sysfs_get_active_two().
++	 * on the other hand, the remount process may maintain the sysfs/aufs
++	 * entries after acquiring write lock.
++	 * it can cause a deadlock.
++	 * simply we gave up processing read here.
++	 */
++	err = -EBUSY;
++	if (unlikely(!si_noflush_read_trylock(sb)))
++		goto out;
++
++	seq = au_seq(buf, PAGE_SIZE);
++	err = PTR_ERR(seq);
++	if (IS_ERR(seq))
++		goto out_unlock;
++
++	name = (void *)attr->name;
++	cattr = sysaufs_si_attrs;
++	while (*cattr) {
++		if (!strcmp(name, (*cattr)->name)) {
++			err = container_of(*cattr, struct sysaufs_si_attr, attr)
++				->show(seq, sb);
++			goto out_seq;
++		}
++		cattr++;
++	}
++
++	bend = au_sbend(sb);
++	if (!strncmp(name, SysaufsBr_PREFIX, sizeof(SysaufsBr_PREFIX) - 1)) {
++		name += sizeof(SysaufsBr_PREFIX) - 1;
++		err = kstrtol(name, 10, &l);
++		if (!err) {
++			if (l <= bend)
++				err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l);
++			else
++				err = -ENOENT;
++		}
++		goto out_seq;
++	}
++	BUG();
++
++out_seq:
++	if (!err) {
++		err = seq->count;
++		/* sysfs limit */
++		if (unlikely(err == PAGE_SIZE))
++			err = -EFBIG;
++	}
++	kfree(seq);
++out_unlock:
++	si_read_unlock(sb);
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void sysaufs_br_init(struct au_branch *br)
++{
++	struct attribute *attr = &br->br_attr;
++
++	sysfs_attr_init(attr);
++	attr->name = br->br_name;
++	attr->mode = S_IRUGO;
++}
++
++void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
++{
++	struct au_branch *br;
++	struct kobject *kobj;
++	aufs_bindex_t bend;
++
++	dbgaufs_brs_del(sb, bindex);
++
++	if (!sysaufs_brs)
++		return;
++
++	kobj = &au_sbi(sb)->si_kobj;
++	bend = au_sbend(sb);
++	for (; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		sysfs_remove_file(kobj, &br->br_attr);
++	}
++}
++
++void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
++{
++	int err;
++	aufs_bindex_t bend;
++	struct kobject *kobj;
++	struct au_branch *br;
++
++	dbgaufs_brs_add(sb, bindex);
++
++	if (!sysaufs_brs)
++		return;
++
++	kobj = &au_sbi(sb)->si_kobj;
++	bend = au_sbend(sb);
++	for (; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		snprintf(br->br_name, sizeof(br->br_name), SysaufsBr_PREFIX
++			 "%d", bindex);
++		err = sysfs_create_file(kobj, &br->br_attr);
++		if (unlikely(err))
++			pr_warning("failed %s under sysfs(%d)\n",
++				   br->br_name, err);
++	}
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/sysrq.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,148 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * magic sysrq hanlder
++ */
++
++/* #include <linux/sysrq.h> */
++#include <linux/writeback.h>
++#include "aufs.h"
++
++/* ---------------------------------------------------------------------- */
++
++static void sysrq_sb(struct super_block *sb)
++{
++	char *plevel;
++	struct au_sbinfo *sbinfo;
++	struct file *file;
++
++	plevel = au_plevel;
++	au_plevel = KERN_WARNING;
++
++	sbinfo = au_sbi(sb);
++	/* since we define pr_fmt, call printk directly */
++	printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
++	printk(KERN_WARNING AUFS_NAME ": superblock\n");
++	au_dpri_sb(sb);
++
++#if 0
++	printk(KERN_WARNING AUFS_NAME ": root dentry\n");
++	au_dpri_dentry(sb->s_root);
++	printk(KERN_WARNING AUFS_NAME ": root inode\n");
++	au_dpri_inode(sb->s_root->d_inode);
++#endif
++
++#if 0
++	do {
++		int err, i, j, ndentry;
++		struct au_dcsub_pages dpages;
++		struct au_dpage *dpage;
++
++		err = au_dpages_init(&dpages, GFP_ATOMIC);
++		if (unlikely(err))
++			break;
++		err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
++		if (!err)
++			for (i = 0; i < dpages.ndpage; i++) {
++				dpage = dpages.dpages + i;
++				ndentry = dpage->ndentry;
++				for (j = 0; j < ndentry; j++)
++					au_dpri_dentry(dpage->dentries[j]);
++			}
++		au_dpages_free(&dpages);
++	} while (0);
++#endif
++
++#if 1
++	{
++		struct inode *i;
++		printk(KERN_WARNING AUFS_NAME ": isolated inode\n");
++		spin_lock(&inode_sb_list_lock);
++		list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
++			spin_lock(&i->i_lock);
++			if (1 || list_empty(&i->i_dentry))
++				au_dpri_inode(i);
++			spin_unlock(&i->i_lock);
++		}
++		spin_unlock(&inode_sb_list_lock);
++	}
++#endif
++	printk(KERN_WARNING AUFS_NAME ": files\n");
++	lg_global_lock(files_lglock);
++	do_file_list_for_each_entry(sb, file) {
++		umode_t mode;
++		mode = file->f_dentry->d_inode->i_mode;
++		if (!special_file(mode) || au_special_file(mode))
++			au_dpri_file(file);
++	} while_file_list_for_each_entry;
++	lg_global_unlock(files_lglock);
++	printk(KERN_WARNING AUFS_NAME ": done\n");
++
++	au_plevel = plevel;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* module parameter */
++static char *aufs_sysrq_key = "a";
++module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
++MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
++
++static void au_sysrq(int key __maybe_unused)
++{
++	struct au_sbinfo *sbinfo;
++
++	lockdep_off();
++	au_sbilist_lock();
++	list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
++		sysrq_sb(sbinfo->si_sb);
++	au_sbilist_unlock();
++	lockdep_on();
++}
++
++static struct sysrq_key_op au_sysrq_op = {
++	.handler	= au_sysrq,
++	.help_msg	= "Aufs",
++	.action_msg	= "Aufs",
++	.enable_mask	= SYSRQ_ENABLE_DUMP
++};
++
++/* ---------------------------------------------------------------------- */
++
++int __init au_sysrq_init(void)
++{
++	int err;
++	char key;
++
++	err = -1;
++	key = *aufs_sysrq_key;
++	if ('a' <= key && key <= 'z')
++		err = register_sysrq_key(key, &au_sysrq_op);
++	if (unlikely(err))
++		pr_err("err %d, sysrq=%c\n", err, key);
++	return err;
++}
++
++void au_sysrq_fin(void)
++{
++	int err;
++	err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
++	if (unlikely(err))
++		pr_err("err %d (ignored)\n", err);
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/vdir.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,885 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * virtual or vertical directory
++ */
++
++#include "aufs.h"
++
++static unsigned int calc_size(int nlen)
++{
++	return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
++}
++
++static int set_deblk_end(union au_vdir_deblk_p *p,
++			 union au_vdir_deblk_p *deblk_end)
++{
++	if (calc_size(0) <= deblk_end->deblk - p->deblk) {
++		p->de->de_str.len = 0;
++		/* smp_mb(); */
++		return 0;
++	}
++	return -1; /* error */
++}
++
++/* returns true or false */
++static int is_deblk_end(union au_vdir_deblk_p *p,
++			union au_vdir_deblk_p *deblk_end)
++{
++	if (calc_size(0) <= deblk_end->deblk - p->deblk)
++		return !p->de->de_str.len;
++	return 1;
++}
++
++static unsigned char *last_deblk(struct au_vdir *vdir)
++{
++	return vdir->vd_deblk[vdir->vd_nblk - 1];
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* estimate the apropriate size for name hash table */
++unsigned int au_rdhash_est(loff_t sz)
++{
++	unsigned int n;
++
++	n = UINT_MAX;
++	sz >>= 10;
++	if (sz < n)
++		n = sz;
++	if (sz < AUFS_RDHASH_DEF)
++		n = AUFS_RDHASH_DEF;
++	/* pr_info("n %u\n", n); */
++	return n;
++}
++
++/*
++ * the allocated memory has to be freed by
++ * au_nhash_wh_free() or au_nhash_de_free().
++ */
++int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
++{
++	struct hlist_head *head;
++	unsigned int u;
++
++	head = kmalloc(sizeof(*nhash->nh_head) * num_hash, gfp);
++	if (head) {
++		nhash->nh_num = num_hash;
++		nhash->nh_head = head;
++		for (u = 0; u < num_hash; u++)
++			INIT_HLIST_HEAD(head++);
++		return 0; /* success */
++	}
++
++	return -ENOMEM;
++}
++
++static void nhash_count(struct hlist_head *head)
++{
++#if 0
++	unsigned long n;
++	struct hlist_node *pos;
++
++	n = 0;
++	hlist_for_each(pos, head)
++		n++;
++	pr_info("%lu\n", n);
++#endif
++}
++
++static void au_nhash_wh_do_free(struct hlist_head *head)
++{
++	struct au_vdir_wh *tpos;
++	struct hlist_node *pos, *node;
++
++	hlist_for_each_entry_safe(tpos, pos, node, head, wh_hash) {
++		/* hlist_del(pos); */
++		kfree(tpos);
++	}
++}
++
++static void au_nhash_de_do_free(struct hlist_head *head)
++{
++	struct au_vdir_dehstr *tpos;
++	struct hlist_node *pos, *node;
++
++	hlist_for_each_entry_safe(tpos, pos, node, head, hash) {
++		/* hlist_del(pos); */
++		au_cache_free_vdir_dehstr(tpos);
++	}
++}
++
++static void au_nhash_do_free(struct au_nhash *nhash,
++			     void (*free)(struct hlist_head *head))
++{
++	unsigned int n;
++	struct hlist_head *head;
++
++	n = nhash->nh_num;
++	if (!n)
++		return;
++
++	head = nhash->nh_head;
++	while (n-- > 0) {
++		nhash_count(head);
++		free(head++);
++	}
++	kfree(nhash->nh_head);
++}
++
++void au_nhash_wh_free(struct au_nhash *whlist)
++{
++	au_nhash_do_free(whlist, au_nhash_wh_do_free);
++}
++
++static void au_nhash_de_free(struct au_nhash *delist)
++{
++	au_nhash_do_free(delist, au_nhash_de_do_free);
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
++			    int limit)
++{
++	int num;
++	unsigned int u, n;
++	struct hlist_head *head;
++	struct au_vdir_wh *tpos;
++	struct hlist_node *pos;
++
++	num = 0;
++	n = whlist->nh_num;
++	head = whlist->nh_head;
++	for (u = 0; u < n; u++, head++)
++		hlist_for_each_entry(tpos, pos, head, wh_hash)
++			if (tpos->wh_bindex == btgt && ++num > limit)
++				return 1;
++	return 0;
++}
++
++static struct hlist_head *au_name_hash(struct au_nhash *nhash,
++				       unsigned char *name,
++				       unsigned int len)
++{
++	unsigned int v;
++	/* const unsigned int magic_bit = 12; */
++
++	AuDebugOn(!nhash->nh_num || !nhash->nh_head);
++
++	v = 0;
++	while (len--)
++		v += *name++;
++	/* v = hash_long(v, magic_bit); */
++	v %= nhash->nh_num;
++	return nhash->nh_head + v;
++}
++
++static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
++			      int nlen)
++{
++	return str->len == nlen && !memcmp(str->name, name, nlen);
++}
++
++/* returns found or not */
++int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
++{
++	struct hlist_head *head;
++	struct au_vdir_wh *tpos;
++	struct hlist_node *pos;
++	struct au_vdir_destr *str;
++
++	head = au_name_hash(whlist, name, nlen);
++	hlist_for_each_entry(tpos, pos, head, wh_hash) {
++		str = &tpos->wh_str;
++		AuDbg("%.*s\n", str->len, str->name);
++		if (au_nhash_test_name(str, name, nlen))
++			return 1;
++	}
++	return 0;
++}
++
++/* returns found(true) or not */
++static int test_known(struct au_nhash *delist, char *name, int nlen)
++{
++	struct hlist_head *head;
++	struct au_vdir_dehstr *tpos;
++	struct hlist_node *pos;
++	struct au_vdir_destr *str;
++
++	head = au_name_hash(delist, name, nlen);
++	hlist_for_each_entry(tpos, pos, head, hash) {
++		str = tpos->str;
++		AuDbg("%.*s\n", str->len, str->name);
++		if (au_nhash_test_name(str, name, nlen))
++			return 1;
++	}
++	return 0;
++}
++
++static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
++			    unsigned char d_type)
++{
++#ifdef CONFIG_AUFS_SHWH
++	wh->wh_ino = ino;
++	wh->wh_type = d_type;
++#endif
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
++		       unsigned int d_type, aufs_bindex_t bindex,
++		       unsigned char shwh)
++{
++	int err;
++	struct au_vdir_destr *str;
++	struct au_vdir_wh *wh;
++
++	AuDbg("%.*s\n", nlen, name);
++	AuDebugOn(!whlist->nh_num || !whlist->nh_head);
++
++	err = -ENOMEM;
++	wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
++	if (unlikely(!wh))
++		goto out;
++
++	err = 0;
++	wh->wh_bindex = bindex;
++	if (shwh)
++		au_shwh_init_wh(wh, ino, d_type);
++	str = &wh->wh_str;
++	str->len = nlen;
++	memcpy(str->name, name, nlen);
++	hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
++	/* smp_mb(); */
++
++out:
++	return err;
++}
++
++static int append_deblk(struct au_vdir *vdir)
++{
++	int err;
++	unsigned long ul;
++	const unsigned int deblk_sz = vdir->vd_deblk_sz;
++	union au_vdir_deblk_p p, deblk_end;
++	unsigned char **o;
++
++	err = -ENOMEM;
++	o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
++		     GFP_NOFS);
++	if (unlikely(!o))
++		goto out;
++
++	vdir->vd_deblk = o;
++	p.deblk = kmalloc(deblk_sz, GFP_NOFS);
++	if (p.deblk) {
++		ul = vdir->vd_nblk++;
++		vdir->vd_deblk[ul] = p.deblk;
++		vdir->vd_last.ul = ul;
++		vdir->vd_last.p.deblk = p.deblk;
++		deblk_end.deblk = p.deblk + deblk_sz;
++		err = set_deblk_end(&p, &deblk_end);
++	}
++
++out:
++	return err;
++}
++
++static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
++		     unsigned int d_type, struct au_nhash *delist)
++{
++	int err;
++	unsigned int sz;
++	const unsigned int deblk_sz = vdir->vd_deblk_sz;
++	union au_vdir_deblk_p p, *room, deblk_end;
++	struct au_vdir_dehstr *dehstr;
++
++	p.deblk = last_deblk(vdir);
++	deblk_end.deblk = p.deblk + deblk_sz;
++	room = &vdir->vd_last.p;
++	AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
++		  || !is_deblk_end(room, &deblk_end));
++
++	sz = calc_size(nlen);
++	if (unlikely(sz > deblk_end.deblk - room->deblk)) {
++		err = append_deblk(vdir);
++		if (unlikely(err))
++			goto out;
++
++		p.deblk = last_deblk(vdir);
++		deblk_end.deblk = p.deblk + deblk_sz;
++		/* smp_mb(); */
++		AuDebugOn(room->deblk != p.deblk);
++	}
++
++	err = -ENOMEM;
++	dehstr = au_cache_alloc_vdir_dehstr();
++	if (unlikely(!dehstr))
++		goto out;
++
++	dehstr->str = &room->de->de_str;
++	hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
++	room->de->de_ino = ino;
++	room->de->de_type = d_type;
++	room->de->de_str.len = nlen;
++	memcpy(room->de->de_str.name, name, nlen);
++
++	err = 0;
++	room->deblk += sz;
++	if (unlikely(set_deblk_end(room, &deblk_end)))
++		err = append_deblk(vdir);
++	/* smp_mb(); */
++
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_vdir_free(struct au_vdir *vdir)
++{
++	unsigned char **deblk;
++
++	deblk = vdir->vd_deblk;
++	while (vdir->vd_nblk--)
++		kfree(*deblk++);
++	kfree(vdir->vd_deblk);
++	au_cache_free_vdir(vdir);
++}
++
++static struct au_vdir *alloc_vdir(struct file *file)
++{
++	struct au_vdir *vdir;
++	struct super_block *sb;
++	int err;
++
++	sb = file->f_dentry->d_sb;
++	SiMustAnyLock(sb);
++
++	err = -ENOMEM;
++	vdir = au_cache_alloc_vdir();
++	if (unlikely(!vdir))
++		goto out;
++
++	vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
++	if (unlikely(!vdir->vd_deblk))
++		goto out_free;
++
++	vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
++	if (!vdir->vd_deblk_sz) {
++		/* estimate the apropriate size for deblk */
++		vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
++		/* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
++	}
++	vdir->vd_nblk = 0;
++	vdir->vd_version = 0;
++	vdir->vd_jiffy = 0;
++	err = append_deblk(vdir);
++	if (!err)
++		return vdir; /* success */
++
++	kfree(vdir->vd_deblk);
++
++out_free:
++	au_cache_free_vdir(vdir);
++out:
++	vdir = ERR_PTR(err);
++	return vdir;
++}
++
++static int reinit_vdir(struct au_vdir *vdir)
++{
++	int err;
++	union au_vdir_deblk_p p, deblk_end;
++
++	while (vdir->vd_nblk > 1) {
++		kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
++		/* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
++		vdir->vd_nblk--;
++	}
++	p.deblk = vdir->vd_deblk[0];
++	deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
++	err = set_deblk_end(&p, &deblk_end);
++	/* keep vd_dblk_sz */
++	vdir->vd_last.ul = 0;
++	vdir->vd_last.p.deblk = vdir->vd_deblk[0];
++	vdir->vd_version = 0;
++	vdir->vd_jiffy = 0;
++	/* smp_mb(); */
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++#define AuFillVdir_CALLED	1
++#define AuFillVdir_WHABLE	(1 << 1)
++#define AuFillVdir_SHWH		(1 << 2)
++#define au_ftest_fillvdir(flags, name)	((flags) & AuFillVdir_##name)
++#define au_fset_fillvdir(flags, name) \
++	do { (flags) |= AuFillVdir_##name; } while (0)
++#define au_fclr_fillvdir(flags, name) \
++	do { (flags) &= ~AuFillVdir_##name; } while (0)
++
++#ifndef CONFIG_AUFS_SHWH
++#undef AuFillVdir_SHWH
++#define AuFillVdir_SHWH		0
++#endif
++
++struct fillvdir_arg {
++	struct file		*file;
++	struct au_vdir		*vdir;
++	struct au_nhash		delist;
++	struct au_nhash		whlist;
++	aufs_bindex_t		bindex;
++	unsigned int		flags;
++	int			err;
++};
++
++static int fillvdir(void *__arg, const char *__name, int nlen,
++		    loff_t offset __maybe_unused, u64 h_ino,
++		    unsigned int d_type)
++{
++	struct fillvdir_arg *arg = __arg;
++	char *name = (void *)__name;
++	struct super_block *sb;
++	ino_t ino;
++	const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
++
++	arg->err = 0;
++	sb = arg->file->f_dentry->d_sb;
++	au_fset_fillvdir(arg->flags, CALLED);
++	/* smp_mb(); */
++	if (nlen <= AUFS_WH_PFX_LEN
++	    || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
++		if (test_known(&arg->delist, name, nlen)
++		    || au_nhash_test_known_wh(&arg->whlist, name, nlen))
++			goto out; /* already exists or whiteouted */
++
++		sb = arg->file->f_dentry->d_sb;
++		arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
++		if (!arg->err) {
++			if (unlikely(nlen > AUFS_MAX_NAMELEN))
++				d_type = DT_UNKNOWN;
++			arg->err = append_de(arg->vdir, name, nlen, ino,
++					     d_type, &arg->delist);
++		}
++	} else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
++		name += AUFS_WH_PFX_LEN;
++		nlen -= AUFS_WH_PFX_LEN;
++		if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
++			goto out; /* already whiteouted */
++
++		if (shwh)
++			arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
++					     &ino);
++		if (!arg->err) {
++			if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
++				d_type = DT_UNKNOWN;
++			arg->err = au_nhash_append_wh
++				(&arg->whlist, name, nlen, ino, d_type,
++				 arg->bindex, shwh);
++		}
++	}
++
++out:
++	if (!arg->err)
++		arg->vdir->vd_jiffy = jiffies;
++	/* smp_mb(); */
++	AuTraceErr(arg->err);
++	return arg->err;
++}
++
++static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
++			  struct au_nhash *whlist, struct au_nhash *delist)
++{
++#ifdef CONFIG_AUFS_SHWH
++	int err;
++	unsigned int nh, u;
++	struct hlist_head *head;
++	struct au_vdir_wh *tpos;
++	struct hlist_node *pos, *n;
++	char *p, *o;
++	struct au_vdir_destr *destr;
++
++	AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
++
++	err = -ENOMEM;
++	o = p = __getname_gfp(GFP_NOFS);
++	if (unlikely(!p))
++		goto out;
++
++	err = 0;
++	nh = whlist->nh_num;
++	memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
++	p += AUFS_WH_PFX_LEN;
++	for (u = 0; u < nh; u++) {
++		head = whlist->nh_head + u;
++		hlist_for_each_entry_safe(tpos, pos, n, head, wh_hash) {
++			destr = &tpos->wh_str;
++			memcpy(p, destr->name, destr->len);
++			err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
++					tpos->wh_ino, tpos->wh_type, delist);
++			if (unlikely(err))
++				break;
++		}
++	}
++
++	__putname(o);
++
++out:
++	AuTraceErr(err);
++	return err;
++#else
++	return 0;
++#endif
++}
++
++static int au_do_read_vdir(struct fillvdir_arg *arg)
++{
++	int err;
++	unsigned int rdhash;
++	loff_t offset;
++	aufs_bindex_t bend, bindex, bstart;
++	unsigned char shwh;
++	struct file *hf, *file;
++	struct super_block *sb;
++
++	file = arg->file;
++	sb = file->f_dentry->d_sb;
++	SiMustAnyLock(sb);
++
++	rdhash = au_sbi(sb)->si_rdhash;
++	if (!rdhash)
++		rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
++	err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
++	if (unlikely(err))
++		goto out;
++	err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
++	if (unlikely(err))
++		goto out_delist;
++
++	err = 0;
++	arg->flags = 0;
++	shwh = 0;
++	if (au_opt_test(au_mntflags(sb), SHWH)) {
++		shwh = 1;
++		au_fset_fillvdir(arg->flags, SHWH);
++	}
++	bstart = au_fbstart(file);
++	bend = au_fbend_dir(file);
++	for (bindex = bstart; !err && bindex <= bend; bindex++) {
++		hf = au_hf_dir(file, bindex);
++		if (!hf)
++			continue;
++
++		offset = vfsub_llseek(hf, 0, SEEK_SET);
++		err = offset;
++		if (unlikely(offset))
++			break;
++
++		arg->bindex = bindex;
++		au_fclr_fillvdir(arg->flags, WHABLE);
++		if (shwh
++		    || (bindex != bend
++			&& au_br_whable(au_sbr_perm(sb, bindex))))
++			au_fset_fillvdir(arg->flags, WHABLE);
++		do {
++			arg->err = 0;
++			au_fclr_fillvdir(arg->flags, CALLED);
++			/* smp_mb(); */
++			err = vfsub_readdir(hf, fillvdir, arg);
++			if (err >= 0)
++				err = arg->err;
++		} while (!err && au_ftest_fillvdir(arg->flags, CALLED));
++	}
++
++	if (!err && shwh)
++		err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
++
++	au_nhash_wh_free(&arg->whlist);
++
++out_delist:
++	au_nhash_de_free(&arg->delist);
++out:
++	return err;
++}
++
++static int read_vdir(struct file *file, int may_read)
++{
++	int err;
++	unsigned long expire;
++	unsigned char do_read;
++	struct fillvdir_arg arg;
++	struct inode *inode;
++	struct au_vdir *vdir, *allocated;
++
++	err = 0;
++	inode = file->f_dentry->d_inode;
++	IMustLock(inode);
++	SiMustAnyLock(inode->i_sb);
++
++	allocated = NULL;
++	do_read = 0;
++	expire = au_sbi(inode->i_sb)->si_rdcache;
++	vdir = au_ivdir(inode);
++	if (!vdir) {
++		do_read = 1;
++		vdir = alloc_vdir(file);
++		err = PTR_ERR(vdir);
++		if (IS_ERR(vdir))
++			goto out;
++		err = 0;
++		allocated = vdir;
++	} else if (may_read
++		   && (inode->i_version != vdir->vd_version
++		       || time_after(jiffies, vdir->vd_jiffy + expire))) {
++		do_read = 1;
++		err = reinit_vdir(vdir);
++		if (unlikely(err))
++			goto out;
++	}
++
++	if (!do_read)
++		return 0; /* success */
++
++	arg.file = file;
++	arg.vdir = vdir;
++	err = au_do_read_vdir(&arg);
++	if (!err) {
++		/* file->f_pos = 0; */
++		vdir->vd_version = inode->i_version;
++		vdir->vd_last.ul = 0;
++		vdir->vd_last.p.deblk = vdir->vd_deblk[0];
++		if (allocated)
++			au_set_ivdir(inode, allocated);
++	} else if (allocated)
++		au_vdir_free(allocated);
++
++out:
++	return err;
++}
++
++static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
++{
++	int err, rerr;
++	unsigned long ul, n;
++	const unsigned int deblk_sz = src->vd_deblk_sz;
++
++	AuDebugOn(tgt->vd_nblk != 1);
++
++	err = -ENOMEM;
++	if (tgt->vd_nblk < src->vd_nblk) {
++		unsigned char **p;
++
++		p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
++			     GFP_NOFS);
++		if (unlikely(!p))
++			goto out;
++		tgt->vd_deblk = p;
++	}
++
++	if (tgt->vd_deblk_sz != deblk_sz) {
++		unsigned char *p;
++
++		tgt->vd_deblk_sz = deblk_sz;
++		p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS);
++		if (unlikely(!p))
++			goto out;
++		tgt->vd_deblk[0] = p;
++	}
++	memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
++	tgt->vd_version = src->vd_version;
++	tgt->vd_jiffy = src->vd_jiffy;
++
++	n = src->vd_nblk;
++	for (ul = 1; ul < n; ul++) {
++		tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
++					    GFP_NOFS);
++		if (unlikely(!tgt->vd_deblk[ul]))
++			goto out;
++		tgt->vd_nblk++;
++	}
++	tgt->vd_nblk = n;
++	tgt->vd_last.ul = tgt->vd_last.ul;
++	tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
++	tgt->vd_last.p.deblk += src->vd_last.p.deblk
++		- src->vd_deblk[src->vd_last.ul];
++	/* smp_mb(); */
++	return 0; /* success */
++
++out:
++	rerr = reinit_vdir(tgt);
++	BUG_ON(rerr);
++	return err;
++}
++
++int au_vdir_init(struct file *file)
++{
++	int err;
++	struct inode *inode;
++	struct au_vdir *vdir_cache, *allocated;
++
++	err = read_vdir(file, !file->f_pos);
++	if (unlikely(err))
++		goto out;
++
++	allocated = NULL;
++	vdir_cache = au_fvdir_cache(file);
++	if (!vdir_cache) {
++		vdir_cache = alloc_vdir(file);
++		err = PTR_ERR(vdir_cache);
++		if (IS_ERR(vdir_cache))
++			goto out;
++		allocated = vdir_cache;
++	} else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
++		err = reinit_vdir(vdir_cache);
++		if (unlikely(err))
++			goto out;
++	} else
++		return 0; /* success */
++
++	inode = file->f_dentry->d_inode;
++	err = copy_vdir(vdir_cache, au_ivdir(inode));
++	if (!err) {
++		file->f_version = inode->i_version;
++		if (allocated)
++			au_set_fvdir_cache(file, allocated);
++	} else if (allocated)
++		au_vdir_free(allocated);
++
++out:
++	return err;
++}
++
++static loff_t calc_offset(struct au_vdir *vdir)
++{
++	loff_t offset;
++	union au_vdir_deblk_p p;
++
++	p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
++	offset = vdir->vd_last.p.deblk - p.deblk;
++	offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
++	return offset;
++}
++
++/* returns true or false */
++static int seek_vdir(struct file *file)
++{
++	int valid;
++	unsigned int deblk_sz;
++	unsigned long ul, n;
++	loff_t offset;
++	union au_vdir_deblk_p p, deblk_end;
++	struct au_vdir *vdir_cache;
++
++	valid = 1;
++	vdir_cache = au_fvdir_cache(file);
++	offset = calc_offset(vdir_cache);
++	AuDbg("offset %lld\n", offset);
++	if (file->f_pos == offset)
++		goto out;
++
++	vdir_cache->vd_last.ul = 0;
++	vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
++	if (!file->f_pos)
++		goto out;
++
++	valid = 0;
++	deblk_sz = vdir_cache->vd_deblk_sz;
++	ul = div64_u64(file->f_pos, deblk_sz);
++	AuDbg("ul %lu\n", ul);
++	if (ul >= vdir_cache->vd_nblk)
++		goto out;
++
++	n = vdir_cache->vd_nblk;
++	for (; ul < n; ul++) {
++		p.deblk = vdir_cache->vd_deblk[ul];
++		deblk_end.deblk = p.deblk + deblk_sz;
++		offset = ul;
++		offset *= deblk_sz;
++		while (!is_deblk_end(&p, &deblk_end) && offset < file->f_pos) {
++			unsigned int l;
++
++			l = calc_size(p.de->de_str.len);
++			offset += l;
++			p.deblk += l;
++		}
++		if (!is_deblk_end(&p, &deblk_end)) {
++			valid = 1;
++			vdir_cache->vd_last.ul = ul;
++			vdir_cache->vd_last.p = p;
++			break;
++		}
++	}
++
++out:
++	/* smp_mb(); */
++	AuTraceErr(!valid);
++	return valid;
++}
++
++int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir)
++{
++	int err;
++	unsigned int l, deblk_sz;
++	union au_vdir_deblk_p deblk_end;
++	struct au_vdir *vdir_cache;
++	struct au_vdir_de *de;
++
++	vdir_cache = au_fvdir_cache(file);
++	if (!seek_vdir(file))
++		return 0;
++
++	deblk_sz = vdir_cache->vd_deblk_sz;
++	while (1) {
++		deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
++		deblk_end.deblk += deblk_sz;
++		while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
++			de = vdir_cache->vd_last.p.de;
++			AuDbg("%.*s, off%lld, i%lu, dt%d\n",
++			      de->de_str.len, de->de_str.name, file->f_pos,
++			      (unsigned long)de->de_ino, de->de_type);
++			err = filldir(dirent, de->de_str.name, de->de_str.len,
++				      file->f_pos, de->de_ino, de->de_type);
++			if (unlikely(err)) {
++				AuTraceErr(err);
++				/* todo: ignore the error caused by udba? */
++				/* return err; */
++				return 0;
++			}
++
++			l = calc_size(de->de_str.len);
++			vdir_cache->vd_last.p.deblk += l;
++			file->f_pos += l;
++		}
++		if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
++			vdir_cache->vd_last.ul++;
++			vdir_cache->vd_last.p.deblk
++				= vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
++			file->f_pos = deblk_sz * vdir_cache->vd_last.ul;
++			continue;
++		}
++		break;
++	}
++
++	/* smp_mb(); */
++	return 0;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/vfsub.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,832 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * sub-routines for VFS
++ */
++
++#include <linux/ima.h>
++#include <linux/namei.h>
++#include <linux/security.h>
++#include <linux/splice.h>
++#include "aufs.h"
++
++int vfsub_update_h_iattr(struct path *h_path, int *did)
++{
++	int err;
++	struct kstat st;
++	struct super_block *h_sb;
++
++	/* for remote fs, leave work for its getattr or d_revalidate */
++	/* for bad i_attr fs, handle them in aufs_getattr() */
++	/* still some fs may acquire i_mutex. we need to skip them */
++	err = 0;
++	if (!did)
++		did = &err;
++	h_sb = h_path->dentry->d_sb;
++	*did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
++	if (*did)
++		err = vfs_getattr(h_path->mnt, h_path->dentry, &st);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct file *vfsub_dentry_open(struct path *path, int flags)
++{
++	struct file *file;
++
++	path_get(path);
++	file = dentry_open(path->dentry, path->mnt,
++			   flags /* | __FMODE_NONOTIFY */,
++			   current_cred());
++	if (!IS_ERR_OR_NULL(file)
++	    && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
++		i_readcount_inc(path->dentry->d_inode);
++
++	return file;
++}
++
++struct file *vfsub_filp_open(const char *path, int oflags, int mode)
++{
++	struct file *file;
++
++	lockdep_off();
++	file = filp_open(path,
++			 oflags /* | __FMODE_NONOTIFY */,
++			 mode);
++	lockdep_on();
++	if (IS_ERR(file))
++		goto out;
++	vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
++
++out:
++	return file;
++}
++
++int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
++{
++	int err;
++
++	err = kern_path(name, flags, path);
++	if (!err && path->dentry->d_inode)
++		vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
++	return err;
++}
++
++struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
++				    int len)
++{
++	struct path path = {
++		.mnt = NULL
++	};
++
++	/* VFS checks it too, but by WARN_ON_ONCE() */
++	IMustLock(parent->d_inode);
++
++	path.dentry = lookup_one_len(name, parent, len);
++	if (IS_ERR(path.dentry))
++		goto out;
++	if (path.dentry->d_inode)
++		vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
++
++out:
++	AuTraceErrPtr(path.dentry);
++	return path.dentry;
++}
++
++struct dentry *vfsub_lookup_hash(struct nameidata *nd)
++{
++	struct path path = {
++		.mnt = nd->path.mnt
++	};
++
++	IMustLock(nd->path.dentry->d_inode);
++
++	path.dentry = lookup_hash(nd);
++	if (IS_ERR(path.dentry))
++		goto out;
++	if (path.dentry->d_inode)
++		vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
++
++out:
++	AuTraceErrPtr(path.dentry);
++	return path.dentry;
++}
++
++/*
++ * this is "VFS:__lookup_one_len()" which was removed and merged into
++ * VFS:lookup_one_len() by the commit.
++ *	6a96ba5 2011-03-14 kill __lookup_one_len()
++ * this function should always be equivalent to the corresponding part in
++ * VFS:lookup_one_len().
++ */
++int vfsub_name_hash(const char *name, struct qstr *this, int len)
++{
++	unsigned int c;
++
++	this->name = name;
++	this->len = len;
++	this->hash = full_name_hash(name, len);
++	if (!len)
++		return -EACCES;
++
++	while (len--) {
++		c = *(const unsigned char *)name++;
++		if (c == '/' || c == '\0')
++			return -EACCES;
++	}
++	return 0;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
++				 struct dentry *d2, struct au_hinode *hdir2)
++{
++	struct dentry *d;
++
++	lockdep_off();
++	d = lock_rename(d1, d2);
++	lockdep_on();
++	au_hn_suspend(hdir1);
++	if (hdir1 != hdir2)
++		au_hn_suspend(hdir2);
++
++	return d;
++}
++
++void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
++			 struct dentry *d2, struct au_hinode *hdir2)
++{
++	au_hn_resume(hdir1);
++	if (hdir1 != hdir2)
++		au_hn_resume(hdir2);
++	lockdep_off();
++	unlock_rename(d1, d2);
++	lockdep_on();
++}
++
++/* ---------------------------------------------------------------------- */
++
++int vfsub_create(struct inode *dir, struct path *path, int mode)
++{
++	int err;
++	struct dentry *d;
++
++	IMustLock(dir);
++
++	d = path->dentry;
++	path->dentry = d->d_parent;
++	err = security_path_mknod(path, d, mode, 0);
++	path->dentry = d;
++	if (unlikely(err))
++		goto out;
++
++	if (au_test_fs_null_nd(dir->i_sb))
++		err = vfs_create(dir, path->dentry, mode, NULL);
++	else {
++		struct nameidata h_nd;
++
++		memset(&h_nd, 0, sizeof(h_nd));
++		h_nd.flags = LOOKUP_CREATE;
++		h_nd.intent.open.flags = O_CREAT
++			| vfsub_fmode_to_uint(FMODE_READ);
++		h_nd.intent.open.create_mode = mode;
++		h_nd.path.dentry = path->dentry->d_parent;
++		h_nd.path.mnt = path->mnt;
++		path_get(&h_nd.path);
++		err = vfs_create(dir, path->dentry, mode, &h_nd);
++		path_put(&h_nd.path);
++	}
++
++	if (!err) {
++		struct path tmp = *path;
++		int did;
++
++		vfsub_update_h_iattr(&tmp, &did);
++		if (did) {
++			tmp.dentry = path->dentry->d_parent;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++		}
++		/*ignore*/
++	}
++
++out:
++	return err;
++}
++
++int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
++{
++	int err;
++	struct dentry *d;
++
++	IMustLock(dir);
++
++	d = path->dentry;
++	path->dentry = d->d_parent;
++	err = security_path_symlink(path, d, symname);
++	path->dentry = d;
++	if (unlikely(err))
++		goto out;
++
++	err = vfs_symlink(dir, path->dentry, symname);
++	if (!err) {
++		struct path tmp = *path;
++		int did;
++
++		vfsub_update_h_iattr(&tmp, &did);
++		if (did) {
++			tmp.dentry = path->dentry->d_parent;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++		}
++		/*ignore*/
++	}
++
++out:
++	return err;
++}
++
++int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
++{
++	int err;
++	struct dentry *d;
++
++	IMustLock(dir);
++
++	d = path->dentry;
++	path->dentry = d->d_parent;
++	err = security_path_mknod(path, d, mode, new_encode_dev(dev));
++	path->dentry = d;
++	if (unlikely(err))
++		goto out;
++
++	err = vfs_mknod(dir, path->dentry, mode, dev);
++	if (!err) {
++		struct path tmp = *path;
++		int did;
++
++		vfsub_update_h_iattr(&tmp, &did);
++		if (did) {
++			tmp.dentry = path->dentry->d_parent;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++		}
++		/*ignore*/
++	}
++
++out:
++	return err;
++}
++
++static int au_test_nlink(struct inode *inode)
++{
++	const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
++
++	if (!au_test_fs_no_limit_nlink(inode->i_sb)
++	    || inode->i_nlink < link_max)
++		return 0;
++	return -EMLINK;
++}
++
++int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path)
++{
++	int err;
++	struct dentry *d;
++
++	IMustLock(dir);
++
++	err = au_test_nlink(src_dentry->d_inode);
++	if (unlikely(err))
++		return err;
++
++	d = path->dentry;
++	path->dentry = d->d_parent;
++	err = security_path_link(src_dentry, path, d);
++	path->dentry = d;
++	if (unlikely(err))
++		goto out;
++
++	lockdep_off();
++	err = vfs_link(src_dentry, dir, path->dentry);
++	lockdep_on();
++	if (!err) {
++		struct path tmp = *path;
++		int did;
++
++		/* fuse has different memory inode for the same inumber */
++		vfsub_update_h_iattr(&tmp, &did);
++		if (did) {
++			tmp.dentry = path->dentry->d_parent;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++			tmp.dentry = src_dentry;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++		}
++		/*ignore*/
++	}
++
++out:
++	return err;
++}
++
++int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
++		 struct inode *dir, struct path *path)
++{
++	int err;
++	struct path tmp = {
++		.mnt	= path->mnt
++	};
++	struct dentry *d;
++
++	IMustLock(dir);
++	IMustLock(src_dir);
++
++	d = path->dentry;
++	path->dentry = d->d_parent;
++	tmp.dentry = src_dentry->d_parent;
++	err = security_path_rename(&tmp, src_dentry, path, d);
++	path->dentry = d;
++	if (unlikely(err))
++		goto out;
++
++	lockdep_off();
++	err = vfs_rename(src_dir, src_dentry, dir, path->dentry);
++	lockdep_on();
++	if (!err) {
++		int did;
++
++		tmp.dentry = d->d_parent;
++		vfsub_update_h_iattr(&tmp, &did);
++		if (did) {
++			tmp.dentry = src_dentry;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++			tmp.dentry = src_dentry->d_parent;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++		}
++		/*ignore*/
++	}
++
++out:
++	return err;
++}
++
++int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
++{
++	int err;
++	struct dentry *d;
++
++	IMustLock(dir);
++
++	d = path->dentry;
++	path->dentry = d->d_parent;
++	err = security_path_mkdir(path, d, mode);
++	path->dentry = d;
++	if (unlikely(err))
++		goto out;
++
++	err = vfs_mkdir(dir, path->dentry, mode);
++	if (!err) {
++		struct path tmp = *path;
++		int did;
++
++		vfsub_update_h_iattr(&tmp, &did);
++		if (did) {
++			tmp.dentry = path->dentry->d_parent;
++			vfsub_update_h_iattr(&tmp, /*did*/NULL);
++		}
++		/*ignore*/
++	}
++
++out:
++	return err;
++}
++
++int vfsub_rmdir(struct inode *dir, struct path *path)
++{
++	int err;
++	struct dentry *d;
++
++	IMustLock(dir);
++
++	d = path->dentry;
++	path->dentry = d->d_parent;
++	err = security_path_rmdir(path, d);
++	path->dentry = d;
++	if (unlikely(err))
++		goto out;
++
++	lockdep_off();
++	err = vfs_rmdir(dir, path->dentry);
++	lockdep_on();
++	if (!err) {
++		struct path tmp = {
++			.dentry	= path->dentry->d_parent,
++			.mnt	= path->mnt
++		};
++
++		vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
++	}
++
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* todo: support mmap_sem? */
++ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
++		     loff_t *ppos)
++{
++	ssize_t err;
++
++	lockdep_off();
++	err = vfs_read(file, ubuf, count, ppos);
++	lockdep_on();
++	if (err >= 0)
++		vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
++	return err;
++}
++
++/* todo: kernel_read()? */
++ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
++		     loff_t *ppos)
++{
++	ssize_t err;
++	mm_segment_t oldfs;
++	union {
++		void *k;
++		char __user *u;
++	} buf;
++
++	buf.k = kbuf;
++	oldfs = get_fs();
++	set_fs(KERNEL_DS);
++	err = vfsub_read_u(file, buf.u, count, ppos);
++	set_fs(oldfs);
++	return err;
++}
++
++ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
++		      loff_t *ppos)
++{
++	ssize_t err;
++
++	lockdep_off();
++	err = vfs_write(file, ubuf, count, ppos);
++	lockdep_on();
++	if (err >= 0)
++		vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
++	return err;
++}
++
++ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
++{
++	ssize_t err;
++	mm_segment_t oldfs;
++	union {
++		void *k;
++		const char __user *u;
++	} buf;
++
++	buf.k = kbuf;
++	oldfs = get_fs();
++	set_fs(KERNEL_DS);
++	err = vfsub_write_u(file, buf.u, count, ppos);
++	set_fs(oldfs);
++	return err;
++}
++
++int vfsub_flush(struct file *file, fl_owner_t id)
++{
++	int err;
++
++	err = 0;
++	if (file->f_op && file->f_op->flush) {
++		if (!au_test_nfs(file->f_dentry->d_sb))
++			err = file->f_op->flush(file, id);
++		else {
++			lockdep_off();
++			err = file->f_op->flush(file, id);
++			lockdep_on();
++		}
++		if (!err)
++			vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
++		/*ignore*/
++	}
++	return err;
++}
++
++int vfsub_readdir(struct file *file, filldir_t filldir, void *arg)
++{
++	int err;
++
++	lockdep_off();
++	err = vfs_readdir(file, filldir, arg);
++	lockdep_on();
++	if (err >= 0)
++		vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
++	return err;
++}
++
++long vfsub_splice_to(struct file *in, loff_t *ppos,
++		     struct pipe_inode_info *pipe, size_t len,
++		     unsigned int flags)
++{
++	long err;
++
++	lockdep_off();
++	err = do_splice_to(in, ppos, pipe, len, flags);
++	lockdep_on();
++	file_accessed(in);
++	if (err >= 0)
++		vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
++	return err;
++}
++
++long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
++		       loff_t *ppos, size_t len, unsigned int flags)
++{
++	long err;
++
++	lockdep_off();
++	err = do_splice_from(pipe, out, ppos, len, flags);
++	lockdep_on();
++	if (err >= 0)
++		vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
++	return err;
++}
++
++int vfsub_fsync(struct file *file, struct path *path, int datasync)
++{
++	int err;
++
++	/* file can be NULL */
++	lockdep_off();
++	err = vfs_fsync(file, datasync);
++	lockdep_on();
++	if (!err) {
++		if (!path) {
++			AuDebugOn(!file);
++			path = &file->f_path;
++		}
++		vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
++	}
++	return err;
++}
++
++/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
++int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
++		struct file *h_file)
++{
++	int err;
++	struct inode *h_inode;
++
++	h_inode = h_path->dentry->d_inode;
++	if (!h_file) {
++		err = mnt_want_write(h_path->mnt);
++		if (err)
++			goto out;
++		err = inode_permission(h_inode, MAY_WRITE);
++		if (err)
++			goto out_mnt;
++		err = get_write_access(h_inode);
++		if (err)
++			goto out_mnt;
++		err = break_lease(h_inode, O_WRONLY);
++		if (err)
++			goto out_inode;
++	}
++
++	err = locks_verify_truncate(h_inode, h_file, length);
++	if (!err)
++		err = security_path_truncate(h_path);
++	if (!err) {
++		lockdep_off();
++		err = do_truncate(h_path->dentry, length, attr, h_file);
++		lockdep_on();
++	}
++
++out_inode:
++	if (!h_file)
++		put_write_access(h_inode);
++out_mnt:
++	if (!h_file)
++		mnt_drop_write(h_path->mnt);
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct au_vfsub_mkdir_args {
++	int *errp;
++	struct inode *dir;
++	struct path *path;
++	int mode;
++};
++
++static void au_call_vfsub_mkdir(void *args)
++{
++	struct au_vfsub_mkdir_args *a = args;
++	*a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
++}
++
++int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
++{
++	int err, do_sio, wkq_err;
++
++	do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
++	if (!do_sio)
++		err = vfsub_mkdir(dir, path, mode);
++	else {
++		struct au_vfsub_mkdir_args args = {
++			.errp	= &err,
++			.dir	= dir,
++			.path	= path,
++			.mode	= mode
++		};
++		wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++
++	return err;
++}
++
++struct au_vfsub_rmdir_args {
++	int *errp;
++	struct inode *dir;
++	struct path *path;
++};
++
++static void au_call_vfsub_rmdir(void *args)
++{
++	struct au_vfsub_rmdir_args *a = args;
++	*a->errp = vfsub_rmdir(a->dir, a->path);
++}
++
++int vfsub_sio_rmdir(struct inode *dir, struct path *path)
++{
++	int err, do_sio, wkq_err;
++
++	do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
++	if (!do_sio)
++		err = vfsub_rmdir(dir, path);
++	else {
++		struct au_vfsub_rmdir_args args = {
++			.errp	= &err,
++			.dir	= dir,
++			.path	= path
++		};
++		wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct notify_change_args {
++	int *errp;
++	struct path *path;
++	struct iattr *ia;
++};
++
++static void call_notify_change(void *args)
++{
++	struct notify_change_args *a = args;
++	struct inode *h_inode;
++
++	h_inode = a->path->dentry->d_inode;
++	IMustLock(h_inode);
++
++	*a->errp = -EPERM;
++	if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
++		*a->errp = notify_change(a->path->dentry, a->ia);
++		if (!*a->errp)
++			vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
++	}
++	AuTraceErr(*a->errp);
++}
++
++int vfsub_notify_change(struct path *path, struct iattr *ia)
++{
++	int err;
++	struct notify_change_args args = {
++		.errp	= &err,
++		.path	= path,
++		.ia	= ia
++	};
++
++	call_notify_change(&args);
++
++	return err;
++}
++
++int vfsub_sio_notify_change(struct path *path, struct iattr *ia)
++{
++	int err, wkq_err;
++	struct notify_change_args args = {
++		.errp	= &err,
++		.path	= path,
++		.ia	= ia
++	};
++
++	wkq_err = au_wkq_wait(call_notify_change, &args);
++	if (unlikely(wkq_err))
++		err = wkq_err;
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct unlink_args {
++	int *errp;
++	struct inode *dir;
++	struct path *path;
++};
++
++static void call_unlink(void *args)
++{
++	struct unlink_args *a = args;
++	struct dentry *d = a->path->dentry;
++	struct inode *h_inode;
++	const int stop_sillyrename = (au_test_nfs(d->d_sb)
++				      && d->d_count == 1);
++
++	IMustLock(a->dir);
++
++	a->path->dentry = d->d_parent;
++	*a->errp = security_path_unlink(a->path, d);
++	a->path->dentry = d;
++	if (unlikely(*a->errp))
++		return;
++
++	if (!stop_sillyrename)
++		dget(d);
++	h_inode = d->d_inode;
++	if (h_inode)
++		ihold(h_inode);
++
++	lockdep_off();
++	*a->errp = vfs_unlink(a->dir, d);
++	lockdep_on();
++	if (!*a->errp) {
++		struct path tmp = {
++			.dentry = d->d_parent,
++			.mnt	= a->path->mnt
++		};
++		vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
++	}
++
++	if (!stop_sillyrename)
++		dput(d);
++	if (h_inode)
++		iput(h_inode);
++
++	AuTraceErr(*a->errp);
++}
++
++/*
++ * @dir: must be locked.
++ * @dentry: target dentry.
++ */
++int vfsub_unlink(struct inode *dir, struct path *path, int force)
++{
++	int err;
++	struct unlink_args args = {
++		.errp	= &err,
++		.dir	= dir,
++		.path	= path
++	};
++
++	if (!force)
++		call_unlink(&args);
++	else {
++		int wkq_err;
++
++		wkq_err = au_wkq_wait(call_unlink, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++
++	return err;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/vfsub.h	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,240 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * sub-routines for VFS
++ */
++
++#ifndef __AUFS_VFSUB_H__
++#define __AUFS_VFSUB_H__
++
++#ifdef __KERNEL__
++
++#include <linux/fs.h>
++#include <linux/lglock.h>
++#include "debug.h"
++
++/* copied from linux/fs/internal.h */
++/* todo: BAD approach!! */
++DECLARE_BRLOCK(vfsmount_lock);
++extern void file_sb_list_del(struct file *f);
++extern spinlock_t inode_sb_list_lock;
++
++/* copied from linux/fs/file_table.c */
++DECLARE_LGLOCK(files_lglock);
++#ifdef CONFIG_SMP
++/*
++ * These macros iterate all files on all CPUs for a given superblock.
++ * files_lglock must be held globally.
++ */
++#define do_file_list_for_each_entry(__sb, __file)		\
++{								\
++	int i;							\
++	for_each_possible_cpu(i) {				\
++		struct list_head *list;				\
++		list = per_cpu_ptr((__sb)->s_files, i);		\
++		list_for_each_entry((__file), list, f_u.fu_list)
++
++#define while_file_list_for_each_entry				\
++	}							\
++}
++
++#else
++
++#define do_file_list_for_each_entry(__sb, __file)		\
++{								\
++	struct list_head *list;					\
++	list = &(sb)->s_files;					\
++	list_for_each_entry((__file), list, f_u.fu_list)
++
++#define while_file_list_for_each_entry				\
++}
++#endif
++
++/* ---------------------------------------------------------------------- */
++
++/* lock subclass for lower inode */
++/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
++/* reduce? gave up. */
++enum {
++	AuLsc_I_Begin = I_MUTEX_QUOTA, /* 4 */
++	AuLsc_I_PARENT,		/* lower inode, parent first */
++	AuLsc_I_PARENT2,	/* copyup dirs */
++	AuLsc_I_PARENT3,	/* copyup wh */
++	AuLsc_I_CHILD,
++	AuLsc_I_CHILD2,
++	AuLsc_I_End
++};
++
++/* to debug easier, do not make them inlined functions */
++#define MtxMustLock(mtx)	AuDebugOn(!mutex_is_locked(mtx))
++#define IMustLock(i)		MtxMustLock(&(i)->i_mutex)
++
++/* ---------------------------------------------------------------------- */
++
++static inline void vfsub_drop_nlink(struct inode *inode)
++{
++	AuDebugOn(!inode->i_nlink);
++	drop_nlink(inode);
++}
++
++static inline void vfsub_dead_dir(struct inode *inode)
++{
++	AuDebugOn(!S_ISDIR(inode->i_mode));
++	inode->i_flags |= S_DEAD;
++	clear_nlink(inode);
++}
++
++static inline void vfsub_set_nlink(struct inode *inode, unsigned int nlink)
++{
++	if (nlink)
++		set_nlink(inode, nlink);
++	else
++		clear_nlink(inode);
++}
++
++/* ---------------------------------------------------------------------- */
++
++int vfsub_update_h_iattr(struct path *h_path, int *did);
++struct file *vfsub_dentry_open(struct path *path, int flags);
++struct file *vfsub_filp_open(const char *path, int oflags, int mode);
++int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
++struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
++				    int len);
++struct dentry *vfsub_lookup_hash(struct nameidata *nd);
++int vfsub_name_hash(const char *name, struct qstr *this, int len);
++
++/* ---------------------------------------------------------------------- */
++
++struct au_hinode;
++struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
++				 struct dentry *d2, struct au_hinode *hdir2);
++void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
++			 struct dentry *d2, struct au_hinode *hdir2);
++
++int vfsub_create(struct inode *dir, struct path *path, int mode);
++int vfsub_symlink(struct inode *dir, struct path *path,
++		  const char *symname);
++int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
++int vfsub_link(struct dentry *src_dentry, struct inode *dir,
++	       struct path *path);
++int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
++		 struct inode *hdir, struct path *path);
++int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
++int vfsub_rmdir(struct inode *dir, struct path *path);
++
++/* ---------------------------------------------------------------------- */
++
++ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
++		     loff_t *ppos);
++ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
++			loff_t *ppos);
++ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
++		      loff_t *ppos);
++ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
++		      loff_t *ppos);
++int vfsub_flush(struct file *file, fl_owner_t id);
++int vfsub_readdir(struct file *file, filldir_t filldir, void *arg);
++
++static inline unsigned int vfsub_file_flags(struct file *file)
++{
++	unsigned int flags;
++
++	spin_lock(&file->f_lock);
++	flags = file->f_flags;
++	spin_unlock(&file->f_lock);
++
++	return flags;
++}
++
++static inline void vfsub_file_accessed(struct file *h_file)
++{
++	file_accessed(h_file);
++	vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
++}
++
++static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
++				     struct dentry *h_dentry)
++{
++	struct path h_path = {
++		.dentry	= h_dentry,
++		.mnt	= h_mnt
++	};
++	touch_atime(h_mnt, h_dentry);
++	vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
++}
++
++long vfsub_splice_to(struct file *in, loff_t *ppos,
++		     struct pipe_inode_info *pipe, size_t len,
++		     unsigned int flags);
++long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
++		       loff_t *ppos, size_t len, unsigned int flags);
++int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
++		struct file *h_file);
++int vfsub_fsync(struct file *file, struct path *path, int datasync);
++
++/* ---------------------------------------------------------------------- */
++
++static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
++{
++	loff_t err;
++
++	lockdep_off();
++	err = vfs_llseek(file, offset, origin);
++	lockdep_on();
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* dirty workaround for strict type of fmode_t */
++union vfsub_fmu {
++	fmode_t fm;
++	unsigned int ui;
++};
++
++static inline unsigned int vfsub_fmode_to_uint(fmode_t fm)
++{
++	union vfsub_fmu u = {
++		.fm = fm
++	};
++
++	BUILD_BUG_ON(sizeof(u.fm) != sizeof(u.ui));
++
++	return u.ui;
++}
++
++static inline fmode_t vfsub_uint_to_fmode(unsigned int ui)
++{
++	union vfsub_fmu u = {
++		.ui = ui
++	};
++
++	return u.fm;
++}
++
++/* ---------------------------------------------------------------------- */
++
++int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
++int vfsub_sio_rmdir(struct inode *dir, struct path *path);
++int vfsub_sio_notify_change(struct path *path, struct iattr *ia);
++int vfsub_notify_change(struct path *path, struct iattr *ia);
++int vfsub_unlink(struct inode *dir, struct path *path, int force);
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_VFSUB_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/wbr_policy.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,700 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * policies for selecting one among multiple writable branches
++ */
++
++#include <linux/statfs.h>
++#include "aufs.h"
++
++/* subset of cpup_attr() */
++static noinline_for_stack
++int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
++{
++	int err, sbits;
++	struct iattr ia;
++	struct inode *h_isrc;
++
++	h_isrc = h_src->d_inode;
++	ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
++	ia.ia_mode = h_isrc->i_mode;
++	ia.ia_uid = h_isrc->i_uid;
++	ia.ia_gid = h_isrc->i_gid;
++	sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
++	au_cpup_attr_flags(h_path->dentry->d_inode, h_isrc);
++	err = vfsub_sio_notify_change(h_path, &ia);
++
++	/* is this nfs only? */
++	if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
++		ia.ia_valid = ATTR_FORCE | ATTR_MODE;
++		ia.ia_mode = h_isrc->i_mode;
++		err = vfsub_sio_notify_change(h_path, &ia);
++	}
++
++	return err;
++}
++
++#define AuCpdown_PARENT_OPQ	1
++#define AuCpdown_WHED		(1 << 1)
++#define AuCpdown_MADE_DIR	(1 << 2)
++#define AuCpdown_DIROPQ		(1 << 3)
++#define au_ftest_cpdown(flags, name)	((flags) & AuCpdown_##name)
++#define au_fset_cpdown(flags, name) \
++	do { (flags) |= AuCpdown_##name; } while (0)
++#define au_fclr_cpdown(flags, name) \
++	do { (flags) &= ~AuCpdown_##name; } while (0)
++
++struct au_cpdown_dir_args {
++	struct dentry *parent;
++	unsigned int flags;
++};
++
++static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
++			     struct au_cpdown_dir_args *a)
++{
++	int err;
++	struct dentry *opq_dentry;
++
++	opq_dentry = au_diropq_create(dentry, bdst);
++	err = PTR_ERR(opq_dentry);
++	if (IS_ERR(opq_dentry))
++		goto out;
++	dput(opq_dentry);
++	au_fset_cpdown(a->flags, DIROPQ);
++
++out:
++	return err;
++}
++
++static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
++			    struct inode *dir, aufs_bindex_t bdst)
++{
++	int err;
++	struct path h_path;
++	struct au_branch *br;
++
++	br = au_sbr(dentry->d_sb, bdst);
++	h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
++	err = PTR_ERR(h_path.dentry);
++	if (IS_ERR(h_path.dentry))
++		goto out;
++
++	err = 0;
++	if (h_path.dentry->d_inode) {
++		h_path.mnt = br->br_mnt;
++		err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
++					  dentry);
++	}
++	dput(h_path.dentry);
++
++out:
++	return err;
++}
++
++static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
++			 struct dentry *h_parent, void *arg)
++{
++	int err, rerr;
++	aufs_bindex_t bopq, bstart;
++	struct path h_path;
++	struct dentry *parent;
++	struct inode *h_dir, *h_inode, *inode, *dir;
++	struct au_cpdown_dir_args *args = arg;
++
++	bstart = au_dbstart(dentry);
++	/* dentry is di-locked */
++	parent = dget_parent(dentry);
++	dir = parent->d_inode;
++	h_dir = h_parent->d_inode;
++	AuDebugOn(h_dir != au_h_iptr(dir, bdst));
++	IMustLock(h_dir);
++
++	err = au_lkup_neg(dentry, bdst);
++	if (unlikely(err < 0))
++		goto out;
++	h_path.dentry = au_h_dptr(dentry, bdst);
++	h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
++	err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
++			      S_IRWXU | S_IRUGO | S_IXUGO);
++	if (unlikely(err))
++		goto out_put;
++	au_fset_cpdown(args->flags, MADE_DIR);
++
++	bopq = au_dbdiropq(dentry);
++	au_fclr_cpdown(args->flags, WHED);
++	au_fclr_cpdown(args->flags, DIROPQ);
++	if (au_dbwh(dentry) == bdst)
++		au_fset_cpdown(args->flags, WHED);
++	if (!au_ftest_cpdown(args->flags, PARENT_OPQ) && bopq <= bdst)
++		au_fset_cpdown(args->flags, PARENT_OPQ);
++	h_inode = h_path.dentry->d_inode;
++	mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++	if (au_ftest_cpdown(args->flags, WHED)) {
++		err = au_cpdown_dir_opq(dentry, bdst, args);
++		if (unlikely(err)) {
++			mutex_unlock(&h_inode->i_mutex);
++			goto out_dir;
++		}
++	}
++
++	err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
++	mutex_unlock(&h_inode->i_mutex);
++	if (unlikely(err))
++		goto out_opq;
++
++	if (au_ftest_cpdown(args->flags, WHED)) {
++		err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
++		if (unlikely(err))
++			goto out_opq;
++	}
++
++	inode = dentry->d_inode;
++	if (au_ibend(inode) < bdst)
++		au_set_ibend(inode, bdst);
++	au_set_h_iptr(inode, bdst, au_igrab(h_inode),
++		      au_hi_flags(inode, /*isdir*/1));
++	goto out; /* success */
++
++	/* revert */
++out_opq:
++	if (au_ftest_cpdown(args->flags, DIROPQ)) {
++		mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++		rerr = au_diropq_remove(dentry, bdst);
++		mutex_unlock(&h_inode->i_mutex);
++		if (unlikely(rerr)) {
++			AuIOErr("failed removing diropq for %.*s b%d (%d)\n",
++				AuDLNPair(dentry), bdst, rerr);
++			err = -EIO;
++			goto out;
++		}
++	}
++out_dir:
++	if (au_ftest_cpdown(args->flags, MADE_DIR)) {
++		rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
++		if (unlikely(rerr)) {
++			AuIOErr("failed removing %.*s b%d (%d)\n",
++				AuDLNPair(dentry), bdst, rerr);
++			err = -EIO;
++		}
++	}
++out_put:
++	au_set_h_dptr(dentry, bdst, NULL);
++	if (au_dbend(dentry) == bdst)
++		au_update_dbend(dentry);
++out:
++	dput(parent);
++	return err;
++}
++
++int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
++{
++	int err;
++	struct au_cpdown_dir_args args = {
++		.parent	= dget_parent(dentry),
++		.flags	= 0
++	};
++
++	err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &args);
++	dput(args.parent);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* policies for create */
++
++static int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	int err, i, j, ndentry;
++	aufs_bindex_t bopq;
++	struct au_dcsub_pages dpages;
++	struct au_dpage *dpage;
++	struct dentry **dentries, *parent, *d;
++
++	err = au_dpages_init(&dpages, GFP_NOFS);
++	if (unlikely(err))
++		goto out;
++	parent = dget_parent(dentry);
++	err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
++	if (unlikely(err))
++		goto out_free;
++
++	err = bindex;
++	for (i = 0; i < dpages.ndpage; i++) {
++		dpage = dpages.dpages + i;
++		dentries = dpage->dentries;
++		ndentry = dpage->ndentry;
++		for (j = 0; j < ndentry; j++) {
++			d = dentries[j];
++			di_read_lock_parent2(d, !AuLock_IR);
++			bopq = au_dbdiropq(d);
++			di_read_unlock(d, !AuLock_IR);
++			if (bopq >= 0 && bopq < err)
++				err = bopq;
++		}
++	}
++
++out_free:
++	dput(parent);
++	au_dpages_free(&dpages);
++out:
++	return err;
++}
++
++static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
++{
++	for (; bindex >= 0; bindex--)
++		if (!au_br_rdonly(au_sbr(sb, bindex)))
++			return bindex;
++	return -EROFS;
++}
++
++/* top down parent */
++static int au_wbr_create_tdp(struct dentry *dentry, int isdir __maybe_unused)
++{
++	int err;
++	aufs_bindex_t bstart, bindex;
++	struct super_block *sb;
++	struct dentry *parent, *h_parent;
++
++	sb = dentry->d_sb;
++	bstart = au_dbstart(dentry);
++	err = bstart;
++	if (!au_br_rdonly(au_sbr(sb, bstart)))
++		goto out;
++
++	err = -EROFS;
++	parent = dget_parent(dentry);
++	for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
++		h_parent = au_h_dptr(parent, bindex);
++		if (!h_parent || !h_parent->d_inode)
++			continue;
++
++		if (!au_br_rdonly(au_sbr(sb, bindex))) {
++			err = bindex;
++			break;
++		}
++	}
++	dput(parent);
++
++	/* bottom up here */
++	if (unlikely(err < 0)) {
++		err = au_wbr_bu(sb, bstart - 1);
++		if (err >= 0)
++			err = au_wbr_nonopq(dentry, err);
++	}
++
++out:
++	AuDbg("b%d\n", err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* an exception for the policy other than tdp */
++static int au_wbr_create_exp(struct dentry *dentry)
++{
++	int err;
++	aufs_bindex_t bwh, bdiropq;
++	struct dentry *parent;
++
++	err = -1;
++	bwh = au_dbwh(dentry);
++	parent = dget_parent(dentry);
++	bdiropq = au_dbdiropq(parent);
++	if (bwh >= 0) {
++		if (bdiropq >= 0)
++			err = min(bdiropq, bwh);
++		else
++			err = bwh;
++		AuDbg("%d\n", err);
++	} else if (bdiropq >= 0) {
++		err = bdiropq;
++		AuDbg("%d\n", err);
++	}
++	dput(parent);
++
++	if (err >= 0)
++		err = au_wbr_nonopq(dentry, err);
++
++	if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
++		err = -1;
++
++	AuDbg("%d\n", err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* round robin */
++static int au_wbr_create_init_rr(struct super_block *sb)
++{
++	int err;
++
++	err = au_wbr_bu(sb, au_sbend(sb));
++	atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
++	/* smp_mb(); */
++
++	AuDbg("b%d\n", err);
++	return err;
++}
++
++static int au_wbr_create_rr(struct dentry *dentry, int isdir)
++{
++	int err, nbr;
++	unsigned int u;
++	aufs_bindex_t bindex, bend;
++	struct super_block *sb;
++	atomic_t *next;
++
++	err = au_wbr_create_exp(dentry);
++	if (err >= 0)
++		goto out;
++
++	sb = dentry->d_sb;
++	next = &au_sbi(sb)->si_wbr_rr_next;
++	bend = au_sbend(sb);
++	nbr = bend + 1;
++	for (bindex = 0; bindex <= bend; bindex++) {
++		if (!isdir) {
++			err = atomic_dec_return(next) + 1;
++			/* modulo for 0 is meaningless */
++			if (unlikely(!err))
++				err = atomic_dec_return(next) + 1;
++		} else
++			err = atomic_read(next);
++		AuDbg("%d\n", err);
++		u = err;
++		err = u % nbr;
++		AuDbg("%d\n", err);
++		if (!au_br_rdonly(au_sbr(sb, err)))
++			break;
++		err = -EROFS;
++	}
++
++	if (err >= 0)
++		err = au_wbr_nonopq(dentry, err);
++
++out:
++	AuDbg("%d\n", err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* most free space */
++static void au_mfs(struct dentry *dentry)
++{
++	struct super_block *sb;
++	struct au_branch *br;
++	struct au_wbr_mfs *mfs;
++	aufs_bindex_t bindex, bend;
++	int err;
++	unsigned long long b, bavail;
++	struct path h_path;
++	/* reduce the stack usage */
++	struct kstatfs *st;
++
++	st = kmalloc(sizeof(*st), GFP_NOFS);
++	if (unlikely(!st)) {
++		AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
++		return;
++	}
++
++	bavail = 0;
++	sb = dentry->d_sb;
++	mfs = &au_sbi(sb)->si_wbr_mfs;
++	MtxMustLock(&mfs->mfs_lock);
++	mfs->mfs_bindex = -EROFS;
++	mfs->mfsrr_bytes = 0;
++	bend = au_sbend(sb);
++	for (bindex = 0; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		if (au_br_rdonly(br))
++			continue;
++
++		/* sb->s_root for NFS is unreliable */
++		h_path.mnt = br->br_mnt;
++		h_path.dentry = h_path.mnt->mnt_root;
++		err = vfs_statfs(&h_path, st);
++		if (unlikely(err)) {
++			AuWarn1("failed statfs, b%d, %d\n", bindex, err);
++			continue;
++		}
++
++		/* when the available size is equal, select the lower one */
++		BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
++			     || sizeof(b) < sizeof(st->f_bsize));
++		b = st->f_bavail * st->f_bsize;
++		br->br_wbr->wbr_bytes = b;
++		if (b >= bavail) {
++			bavail = b;
++			mfs->mfs_bindex = bindex;
++			mfs->mfs_jiffy = jiffies;
++		}
++	}
++
++	mfs->mfsrr_bytes = bavail;
++	AuDbg("b%d\n", mfs->mfs_bindex);
++	kfree(st);
++}
++
++static int au_wbr_create_mfs(struct dentry *dentry, int isdir __maybe_unused)
++{
++	int err;
++	struct super_block *sb;
++	struct au_wbr_mfs *mfs;
++
++	err = au_wbr_create_exp(dentry);
++	if (err >= 0)
++		goto out;
++
++	sb = dentry->d_sb;
++	mfs = &au_sbi(sb)->si_wbr_mfs;
++	mutex_lock(&mfs->mfs_lock);
++	if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
++	    || mfs->mfs_bindex < 0
++	    || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
++		au_mfs(dentry);
++	mutex_unlock(&mfs->mfs_lock);
++	err = mfs->mfs_bindex;
++
++	if (err >= 0)
++		err = au_wbr_nonopq(dentry, err);
++
++out:
++	AuDbg("b%d\n", err);
++	return err;
++}
++
++static int au_wbr_create_init_mfs(struct super_block *sb)
++{
++	struct au_wbr_mfs *mfs;
++
++	mfs = &au_sbi(sb)->si_wbr_mfs;
++	mutex_init(&mfs->mfs_lock);
++	mfs->mfs_jiffy = 0;
++	mfs->mfs_bindex = -EROFS;
++
++	return 0;
++}
++
++static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
++{
++	mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
++	return 0;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* most free space and then round robin */
++static int au_wbr_create_mfsrr(struct dentry *dentry, int isdir)
++{
++	int err;
++	struct au_wbr_mfs *mfs;
++
++	err = au_wbr_create_mfs(dentry, isdir);
++	if (err >= 0) {
++		mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
++		mutex_lock(&mfs->mfs_lock);
++		if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
++			err = au_wbr_create_rr(dentry, isdir);
++		mutex_unlock(&mfs->mfs_lock);
++	}
++
++	AuDbg("b%d\n", err);
++	return err;
++}
++
++static int au_wbr_create_init_mfsrr(struct super_block *sb)
++{
++	int err;
++
++	au_wbr_create_init_mfs(sb); /* ignore */
++	err = au_wbr_create_init_rr(sb);
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* top down parent and most free space */
++static int au_wbr_create_pmfs(struct dentry *dentry, int isdir)
++{
++	int err, e2;
++	unsigned long long b;
++	aufs_bindex_t bindex, bstart, bend;
++	struct super_block *sb;
++	struct dentry *parent, *h_parent;
++	struct au_branch *br;
++
++	err = au_wbr_create_tdp(dentry, isdir);
++	if (unlikely(err < 0))
++		goto out;
++	parent = dget_parent(dentry);
++	bstart = au_dbstart(parent);
++	bend = au_dbtaildir(parent);
++	if (bstart == bend)
++		goto out_parent; /* success */
++
++	e2 = au_wbr_create_mfs(dentry, isdir);
++	if (e2 < 0)
++		goto out_parent; /* success */
++
++	/* when the available size is equal, select upper one */
++	sb = dentry->d_sb;
++	br = au_sbr(sb, err);
++	b = br->br_wbr->wbr_bytes;
++	AuDbg("b%d, %llu\n", err, b);
++
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		h_parent = au_h_dptr(parent, bindex);
++		if (!h_parent || !h_parent->d_inode)
++			continue;
++
++		br = au_sbr(sb, bindex);
++		if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
++			b = br->br_wbr->wbr_bytes;
++			err = bindex;
++			AuDbg("b%d, %llu\n", err, b);
++		}
++	}
++
++	if (err >= 0)
++		err = au_wbr_nonopq(dentry, err);
++
++out_parent:
++	dput(parent);
++out:
++	AuDbg("b%d\n", err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* policies for copyup */
++
++/* top down parent */
++static int au_wbr_copyup_tdp(struct dentry *dentry)
++{
++	return au_wbr_create_tdp(dentry, /*isdir, anything is ok*/0);
++}
++
++/* bottom up parent */
++static int au_wbr_copyup_bup(struct dentry *dentry)
++{
++	int err;
++	aufs_bindex_t bindex, bstart;
++	struct dentry *parent, *h_parent;
++	struct super_block *sb;
++
++	err = -EROFS;
++	sb = dentry->d_sb;
++	parent = dget_parent(dentry);
++	bstart = au_dbstart(parent);
++	for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
++		h_parent = au_h_dptr(parent, bindex);
++		if (!h_parent || !h_parent->d_inode)
++			continue;
++
++		if (!au_br_rdonly(au_sbr(sb, bindex))) {
++			err = bindex;
++			break;
++		}
++	}
++	dput(parent);
++
++	/* bottom up here */
++	if (unlikely(err < 0))
++		err = au_wbr_bu(sb, bstart - 1);
++
++	AuDbg("b%d\n", err);
++	return err;
++}
++
++/* bottom up */
++static int au_wbr_copyup_bu(struct dentry *dentry)
++{
++	int err;
++	aufs_bindex_t bstart;
++
++	bstart = au_dbstart(dentry);
++	err = au_wbr_bu(dentry->d_sb, bstart);
++	AuDbg("b%d\n", err);
++	if (err > bstart)
++		err = au_wbr_nonopq(dentry, err);
++
++	AuDbg("b%d\n", err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
++	[AuWbrCopyup_TDP] = {
++		.copyup	= au_wbr_copyup_tdp
++	},
++	[AuWbrCopyup_BUP] = {
++		.copyup	= au_wbr_copyup_bup
++	},
++	[AuWbrCopyup_BU] = {
++		.copyup	= au_wbr_copyup_bu
++	}
++};
++
++struct au_wbr_create_operations au_wbr_create_ops[] = {
++	[AuWbrCreate_TDP] = {
++		.create	= au_wbr_create_tdp
++	},
++	[AuWbrCreate_RR] = {
++		.create	= au_wbr_create_rr,
++		.init	= au_wbr_create_init_rr
++	},
++	[AuWbrCreate_MFS] = {
++		.create	= au_wbr_create_mfs,
++		.init	= au_wbr_create_init_mfs,
++		.fin	= au_wbr_create_fin_mfs
++	},
++	[AuWbrCreate_MFSV] = {
++		.create	= au_wbr_create_mfs,
++		.init	= au_wbr_create_init_mfs,
++		.fin	= au_wbr_create_fin_mfs
++	},
++	[AuWbrCreate_MFSRR] = {
++		.create	= au_wbr_create_mfsrr,
++		.init	= au_wbr_create_init_mfsrr,
++		.fin	= au_wbr_create_fin_mfs
++	},
++	[AuWbrCreate_MFSRRV] = {
++		.create	= au_wbr_create_mfsrr,
++		.init	= au_wbr_create_init_mfsrr,
++		.fin	= au_wbr_create_fin_mfs
++	},
++	[AuWbrCreate_PMFS] = {
++		.create	= au_wbr_create_pmfs,
++		.init	= au_wbr_create_init_mfs,
++		.fin	= au_wbr_create_fin_mfs
++	},
++	[AuWbrCreate_PMFSV] = {
++		.create	= au_wbr_create_pmfs,
++		.init	= au_wbr_create_init_mfs,
++		.fin	= au_wbr_create_fin_mfs
++	}
++};
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/whout.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,1049 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * whiteout for logical deletion and opaque directory
++ */
++
++#include "aufs.h"
++
++#define WH_MASK			S_IRUGO
++
++/*
++ * If a directory contains this file, then it is opaque.  We start with the
++ * .wh. flag so that it is blocked by lookup.
++ */
++static struct qstr diropq_name = {
++	.name = AUFS_WH_DIROPQ,
++	.len = sizeof(AUFS_WH_DIROPQ) - 1
++};
++
++/*
++ * generate whiteout name, which is NOT terminated by NULL.
++ * @name: original d_name.name
++ * @len: original d_name.len
++ * @wh: whiteout qstr
++ * returns zero when succeeds, otherwise error.
++ * succeeded value as wh->name should be freed by kfree().
++ */
++int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
++{
++	char *p;
++
++	if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
++		return -ENAMETOOLONG;
++
++	wh->len = name->len + AUFS_WH_PFX_LEN;
++	p = kmalloc(wh->len, GFP_NOFS);
++	wh->name = p;
++	if (p) {
++		memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
++		memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
++		/* smp_mb(); */
++		return 0;
++	}
++	return -ENOMEM;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * test if the @wh_name exists under @h_parent.
++ * @try_sio specifies the necessary of super-io.
++ */
++int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
++	       struct au_branch *br, int try_sio)
++{
++	int err;
++	struct dentry *wh_dentry;
++
++	if (!try_sio)
++		wh_dentry = au_lkup_one(wh_name, h_parent, br, /*nd*/NULL);
++	else
++		wh_dentry = au_sio_lkup_one(wh_name, h_parent, br);
++	err = PTR_ERR(wh_dentry);
++	if (IS_ERR(wh_dentry))
++		goto out;
++
++	err = 0;
++	if (!wh_dentry->d_inode)
++		goto out_wh; /* success */
++
++	err = 1;
++	if (S_ISREG(wh_dentry->d_inode->i_mode))
++		goto out_wh; /* success */
++
++	err = -EIO;
++	AuIOErr("%.*s Invalid whiteout entry type 0%o.\n",
++		AuDLNPair(wh_dentry), wh_dentry->d_inode->i_mode);
++
++out_wh:
++	dput(wh_dentry);
++out:
++	return err;
++}
++
++/*
++ * test if the @h_dentry sets opaque or not.
++ */
++int au_diropq_test(struct dentry *h_dentry, struct au_branch *br)
++{
++	int err;
++	struct inode *h_dir;
++
++	h_dir = h_dentry->d_inode;
++	err = au_wh_test(h_dentry, &diropq_name, br,
++			 au_test_h_perm_sio(h_dir, MAY_EXEC));
++	return err;
++}
++
++/*
++ * returns a negative dentry whose name is unique and temporary.
++ */
++struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
++			     struct qstr *prefix)
++{
++	struct dentry *dentry;
++	int i;
++	char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
++		*name, *p;
++	/* strict atomic_t is unnecessary here */
++	static unsigned short cnt;
++	struct qstr qs;
++
++	BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
++
++	name = defname;
++	qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
++	if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
++		dentry = ERR_PTR(-ENAMETOOLONG);
++		if (unlikely(qs.len > NAME_MAX))
++			goto out;
++		dentry = ERR_PTR(-ENOMEM);
++		name = kmalloc(qs.len + 1, GFP_NOFS);
++		if (unlikely(!name))
++			goto out;
++	}
++
++	/* doubly whiteout-ed */
++	memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
++	p = name + AUFS_WH_PFX_LEN * 2;
++	memcpy(p, prefix->name, prefix->len);
++	p += prefix->len;
++	*p++ = '.';
++	AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
++
++	qs.name = name;
++	for (i = 0; i < 3; i++) {
++		sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
++		dentry = au_sio_lkup_one(&qs, h_parent, br);
++		if (IS_ERR(dentry) || !dentry->d_inode)
++			goto out_name;
++		dput(dentry);
++	}
++	/* pr_warning("could not get random name\n"); */
++	dentry = ERR_PTR(-EEXIST);
++	AuDbg("%.*s\n", AuLNPair(&qs));
++	BUG();
++
++out_name:
++	if (name != defname)
++		kfree(name);
++out:
++	AuTraceErrPtr(dentry);
++	return dentry;
++}
++
++/*
++ * rename the @h_dentry on @br to the whiteouted temporary name.
++ */
++int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
++{
++	int err;
++	struct path h_path = {
++		.mnt = br->br_mnt
++	};
++	struct inode *h_dir;
++	struct dentry *h_parent;
++
++	h_parent = h_dentry->d_parent; /* dir inode is locked */
++	h_dir = h_parent->d_inode;
++	IMustLock(h_dir);
++
++	h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
++	err = PTR_ERR(h_path.dentry);
++	if (IS_ERR(h_path.dentry))
++		goto out;
++
++	/* under the same dir, no need to lock_rename() */
++	err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path);
++	AuTraceErr(err);
++	dput(h_path.dentry);
++
++out:
++	AuTraceErr(err);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++/*
++ * functions for removing a whiteout
++ */
++
++static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
++{
++	int force;
++
++	/*
++	 * forces superio when the dir has a sticky bit.
++	 * this may be a violation of unix fs semantics.
++	 */
++	force = (h_dir->i_mode & S_ISVTX)
++		&& h_path->dentry->d_inode->i_uid != current_fsuid();
++	return vfsub_unlink(h_dir, h_path, force);
++}
++
++int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
++			struct dentry *dentry)
++{
++	int err;
++
++	err = do_unlink_wh(h_dir, h_path);
++	if (!err && dentry)
++		au_set_dbwh(dentry, -1);
++
++	return err;
++}
++
++static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
++			  struct au_branch *br)
++{
++	int err;
++	struct path h_path = {
++		.mnt = br->br_mnt
++	};
++
++	err = 0;
++	h_path.dentry = au_lkup_one(wh, h_parent, br, /*nd*/NULL);
++	if (IS_ERR(h_path.dentry))
++		err = PTR_ERR(h_path.dentry);
++	else {
++		if (h_path.dentry->d_inode
++		    && S_ISREG(h_path.dentry->d_inode->i_mode))
++			err = do_unlink_wh(h_parent->d_inode, &h_path);
++		dput(h_path.dentry);
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++/*
++ * initialize/clean whiteout for a branch
++ */
++
++static void au_wh_clean(struct inode *h_dir, struct path *whpath,
++			const int isdir)
++{
++	int err;
++
++	if (!whpath->dentry->d_inode)
++		return;
++
++	err = mnt_want_write(whpath->mnt);
++	if (!err) {
++		if (isdir)
++			err = vfsub_rmdir(h_dir, whpath);
++		else
++			err = vfsub_unlink(h_dir, whpath, /*force*/0);
++		mnt_drop_write(whpath->mnt);
++	}
++	if (unlikely(err))
++		pr_warning("failed removing %.*s (%d), ignored.\n",
++			   AuDLNPair(whpath->dentry), err);
++}
++
++static int test_linkable(struct dentry *h_root)
++{
++	struct inode *h_dir = h_root->d_inode;
++
++	if (h_dir->i_op->link)
++		return 0;
++
++	pr_err("%.*s (%s) doesn't support link(2), use noplink and rw+nolwh\n",
++	       AuDLNPair(h_root), au_sbtype(h_root->d_sb));
++	return -ENOSYS;
++}
++
++/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
++static int au_whdir(struct inode *h_dir, struct path *path)
++{
++	int err;
++
++	err = -EEXIST;
++	if (!path->dentry->d_inode) {
++		int mode = S_IRWXU;
++
++		if (au_test_nfs(path->dentry->d_sb))
++			mode |= S_IXUGO;
++		err = mnt_want_write(path->mnt);
++		if (!err) {
++			err = vfsub_mkdir(h_dir, path, mode);
++			mnt_drop_write(path->mnt);
++		}
++	} else if (S_ISDIR(path->dentry->d_inode->i_mode))
++		err = 0;
++	else
++		pr_err("unknown %.*s exists\n", AuDLNPair(path->dentry));
++
++	return err;
++}
++
++struct au_wh_base {
++	const struct qstr *name;
++	struct dentry *dentry;
++};
++
++static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
++			  struct path *h_path)
++{
++	h_path->dentry = base[AuBrWh_BASE].dentry;
++	au_wh_clean(h_dir, h_path, /*isdir*/0);
++	h_path->dentry = base[AuBrWh_PLINK].dentry;
++	au_wh_clean(h_dir, h_path, /*isdir*/1);
++	h_path->dentry = base[AuBrWh_ORPH].dentry;
++	au_wh_clean(h_dir, h_path, /*isdir*/1);
++}
++
++/*
++ * returns tri-state,
++ * minus: error, caller should print the mesage
++ * zero: succuess
++ * plus: error, caller should NOT print the mesage
++ */
++static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
++				int do_plink, struct au_wh_base base[],
++				struct path *h_path)
++{
++	int err;
++	struct inode *h_dir;
++
++	h_dir = h_root->d_inode;
++	h_path->dentry = base[AuBrWh_BASE].dentry;
++	au_wh_clean(h_dir, h_path, /*isdir*/0);
++	h_path->dentry = base[AuBrWh_PLINK].dentry;
++	if (do_plink) {
++		err = test_linkable(h_root);
++		if (unlikely(err)) {
++			err = 1;
++			goto out;
++		}
++
++		err = au_whdir(h_dir, h_path);
++		if (unlikely(err))
++			goto out;
++		wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
++	} else
++		au_wh_clean(h_dir, h_path, /*isdir*/1);
++	h_path->dentry = base[AuBrWh_ORPH].dentry;
++	err = au_whdir(h_dir, h_path);
++	if (unlikely(err))
++		goto out;
++	wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
++
++out:
++	return err;
++}
++
++/*
++ * for the moment, aufs supports the branch filesystem which does not support
++ * link(2). testing on FAT which does not support i_op->setattr() fully either,
++ * copyup failed. finally, such filesystem will not be used as the writable
++ * branch.
++ *
++ * returns tri-state, see above.
++ */
++static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
++			 int do_plink, struct au_wh_base base[],
++			 struct path *h_path)
++{
++	int err;
++	struct inode *h_dir;
++
++	WbrWhMustWriteLock(wbr);
++
++	err = test_linkable(h_root);
++	if (unlikely(err)) {
++		err = 1;
++		goto out;
++	}
++
++	/*
++	 * todo: should this create be done in /sbin/mount.aufs helper?
++	 */
++	err = -EEXIST;
++	h_dir = h_root->d_inode;
++	if (!base[AuBrWh_BASE].dentry->d_inode) {
++		err = mnt_want_write(h_path->mnt);
++		if (!err) {
++			h_path->dentry = base[AuBrWh_BASE].dentry;
++			err = vfsub_create(h_dir, h_path, WH_MASK);
++			mnt_drop_write(h_path->mnt);
++		}
++	} else if (S_ISREG(base[AuBrWh_BASE].dentry->d_inode->i_mode))
++		err = 0;
++	else
++		pr_err("unknown %.*s/%.*s exists\n",
++		       AuDLNPair(h_root), AuDLNPair(base[AuBrWh_BASE].dentry));
++	if (unlikely(err))
++		goto out;
++
++	h_path->dentry = base[AuBrWh_PLINK].dentry;
++	if (do_plink) {
++		err = au_whdir(h_dir, h_path);
++		if (unlikely(err))
++			goto out;
++		wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
++	} else
++		au_wh_clean(h_dir, h_path, /*isdir*/1);
++	wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
++
++	h_path->dentry = base[AuBrWh_ORPH].dentry;
++	err = au_whdir(h_dir, h_path);
++	if (unlikely(err))
++		goto out;
++	wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
++
++out:
++	return err;
++}
++
++/*
++ * initialize the whiteout base file/dir for @br.
++ */
++int au_wh_init(struct dentry *h_root, struct au_branch *br,
++	       struct super_block *sb)
++{
++	int err, i;
++	const unsigned char do_plink
++		= !!au_opt_test(au_mntflags(sb), PLINK);
++	struct path path = {
++		.mnt = br->br_mnt
++	};
++	struct inode *h_dir;
++	struct au_wbr *wbr = br->br_wbr;
++	static const struct qstr base_name[] = {
++		[AuBrWh_BASE] = {
++			.name	= AUFS_BASE_NAME,
++			.len	= sizeof(AUFS_BASE_NAME) - 1
++		},
++		[AuBrWh_PLINK] = {
++			.name	= AUFS_PLINKDIR_NAME,
++			.len	= sizeof(AUFS_PLINKDIR_NAME) - 1
++		},
++		[AuBrWh_ORPH] = {
++			.name	= AUFS_ORPHDIR_NAME,
++			.len	= sizeof(AUFS_ORPHDIR_NAME) - 1
++		}
++	};
++	struct au_wh_base base[] = {
++		[AuBrWh_BASE] = {
++			.name	= base_name + AuBrWh_BASE,
++			.dentry	= NULL
++		},
++		[AuBrWh_PLINK] = {
++			.name	= base_name + AuBrWh_PLINK,
++			.dentry	= NULL
++		},
++		[AuBrWh_ORPH] = {
++			.name	= base_name + AuBrWh_ORPH,
++			.dentry	= NULL
++		}
++	};
++
++	if (wbr)
++		WbrWhMustWriteLock(wbr);
++
++	for (i = 0; i < AuBrWh_Last; i++) {
++		/* doubly whiteouted */
++		struct dentry *d;
++
++		d = au_wh_lkup(h_root, (void *)base[i].name, br);
++		err = PTR_ERR(d);
++		if (IS_ERR(d))
++			goto out;
++
++		base[i].dentry = d;
++		AuDebugOn(wbr
++			  && wbr->wbr_wh[i]
++			  && wbr->wbr_wh[i] != base[i].dentry);
++	}
++
++	if (wbr)
++		for (i = 0; i < AuBrWh_Last; i++) {
++			dput(wbr->wbr_wh[i]);
++			wbr->wbr_wh[i] = NULL;
++		}
++
++	err = 0;
++	if (!au_br_writable(br->br_perm)) {
++		h_dir = h_root->d_inode;
++		au_wh_init_ro(h_dir, base, &path);
++	} else if (!au_br_wh_linkable(br->br_perm)) {
++		err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
++		if (err > 0)
++			goto out;
++		else if (err)
++			goto out_err;
++	} else {
++		err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
++		if (err > 0)
++			goto out;
++		else if (err)
++			goto out_err;
++	}
++	goto out; /* success */
++
++out_err:
++	pr_err("an error(%d) on the writable branch %.*s(%s)\n",
++	       err, AuDLNPair(h_root), au_sbtype(h_root->d_sb));
++out:
++	for (i = 0; i < AuBrWh_Last; i++)
++		dput(base[i].dentry);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++/*
++ * whiteouts are all hard-linked usually.
++ * when its link count reaches a ceiling, we create a new whiteout base
++ * asynchronously.
++ */
++
++struct reinit_br_wh {
++	struct super_block *sb;
++	struct au_branch *br;
++};
++
++static void reinit_br_wh(void *arg)
++{
++	int err;
++	aufs_bindex_t bindex;
++	struct path h_path;
++	struct reinit_br_wh *a = arg;
++	struct au_wbr *wbr;
++	struct inode *dir;
++	struct dentry *h_root;
++	struct au_hinode *hdir;
++
++	err = 0;
++	wbr = a->br->br_wbr;
++	/* big aufs lock */
++	si_noflush_write_lock(a->sb);
++	if (!au_br_writable(a->br->br_perm))
++		goto out;
++	bindex = au_br_index(a->sb, a->br->br_id);
++	if (unlikely(bindex < 0))
++		goto out;
++
++	di_read_lock_parent(a->sb->s_root, AuLock_IR);
++	dir = a->sb->s_root->d_inode;
++	hdir = au_hi(dir, bindex);
++	h_root = au_h_dptr(a->sb->s_root, bindex);
++
++	au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
++	wbr_wh_write_lock(wbr);
++	err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
++			  h_root, a->br);
++	if (!err) {
++		err = mnt_want_write(a->br->br_mnt);
++		if (!err) {
++			h_path.dentry = wbr->wbr_whbase;
++			h_path.mnt = a->br->br_mnt;
++			err = vfsub_unlink(hdir->hi_inode, &h_path, /*force*/0);
++			mnt_drop_write(a->br->br_mnt);
++		}
++	} else {
++		pr_warning("%.*s is moved, ignored\n",
++			   AuDLNPair(wbr->wbr_whbase));
++		err = 0;
++	}
++	dput(wbr->wbr_whbase);
++	wbr->wbr_whbase = NULL;
++	if (!err)
++		err = au_wh_init(h_root, a->br, a->sb);
++	wbr_wh_write_unlock(wbr);
++	au_hn_imtx_unlock(hdir);
++	di_read_unlock(a->sb->s_root, AuLock_IR);
++
++out:
++	if (wbr)
++		atomic_dec(&wbr->wbr_wh_running);
++	atomic_dec(&a->br->br_count);
++	si_write_unlock(a->sb);
++	au_nwt_done(&au_sbi(a->sb)->si_nowait);
++	kfree(arg);
++	if (unlikely(err))
++		AuIOErr("err %d\n", err);
++}
++
++static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
++{
++	int do_dec, wkq_err;
++	struct reinit_br_wh *arg;
++
++	do_dec = 1;
++	if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
++		goto out;
++
++	/* ignore ENOMEM */
++	arg = kmalloc(sizeof(*arg), GFP_NOFS);
++	if (arg) {
++		/*
++		 * dec(wh_running), kfree(arg) and dec(br_count)
++		 * in reinit function
++		 */
++		arg->sb = sb;
++		arg->br = br;
++		atomic_inc(&br->br_count);
++		wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
++		if (unlikely(wkq_err)) {
++			atomic_dec(&br->br_wbr->wbr_wh_running);
++			atomic_dec(&br->br_count);
++			kfree(arg);
++		}
++		do_dec = 0;
++	}
++
++out:
++	if (do_dec)
++		atomic_dec(&br->br_wbr->wbr_wh_running);
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * create the whiteout @wh.
++ */
++static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
++			     struct dentry *wh)
++{
++	int err;
++	struct path h_path = {
++		.dentry = wh
++	};
++	struct au_branch *br;
++	struct au_wbr *wbr;
++	struct dentry *h_parent;
++	struct inode *h_dir;
++
++	h_parent = wh->d_parent; /* dir inode is locked */
++	h_dir = h_parent->d_inode;
++	IMustLock(h_dir);
++
++	br = au_sbr(sb, bindex);
++	h_path.mnt = br->br_mnt;
++	wbr = br->br_wbr;
++	wbr_wh_read_lock(wbr);
++	if (wbr->wbr_whbase) {
++		err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path);
++		if (!err || err != -EMLINK)
++			goto out;
++
++		/* link count full. re-initialize br_whbase. */
++		kick_reinit_br_wh(sb, br);
++	}
++
++	/* return this error in this context */
++	err = vfsub_create(h_dir, &h_path, WH_MASK);
++
++out:
++	wbr_wh_read_unlock(wbr);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * create or remove the diropq.
++ */
++static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
++				unsigned int flags)
++{
++	struct dentry *opq_dentry, *h_dentry;
++	struct super_block *sb;
++	struct au_branch *br;
++	int err;
++
++	sb = dentry->d_sb;
++	br = au_sbr(sb, bindex);
++	h_dentry = au_h_dptr(dentry, bindex);
++	opq_dentry = au_lkup_one(&diropq_name, h_dentry, br, /*nd*/NULL);
++	if (IS_ERR(opq_dentry))
++		goto out;
++
++	if (au_ftest_diropq(flags, CREATE)) {
++		err = link_or_create_wh(sb, bindex, opq_dentry);
++		if (!err) {
++			au_set_dbdiropq(dentry, bindex);
++			goto out; /* success */
++		}
++	} else {
++		struct path tmp = {
++			.dentry = opq_dentry,
++			.mnt	= br->br_mnt
++		};
++		err = do_unlink_wh(au_h_iptr(dentry->d_inode, bindex), &tmp);
++		if (!err)
++			au_set_dbdiropq(dentry, -1);
++	}
++	dput(opq_dentry);
++	opq_dentry = ERR_PTR(err);
++
++out:
++	return opq_dentry;
++}
++
++struct do_diropq_args {
++	struct dentry **errp;
++	struct dentry *dentry;
++	aufs_bindex_t bindex;
++	unsigned int flags;
++};
++
++static void call_do_diropq(void *args)
++{
++	struct do_diropq_args *a = args;
++	*a->errp = do_diropq(a->dentry, a->bindex, a->flags);
++}
++
++struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
++			     unsigned int flags)
++{
++	struct dentry *diropq, *h_dentry;
++
++	h_dentry = au_h_dptr(dentry, bindex);
++	if (!au_test_h_perm_sio(h_dentry->d_inode, MAY_EXEC | MAY_WRITE))
++		diropq = do_diropq(dentry, bindex, flags);
++	else {
++		int wkq_err;
++		struct do_diropq_args args = {
++			.errp		= &diropq,
++			.dentry		= dentry,
++			.bindex		= bindex,
++			.flags		= flags
++		};
++
++		wkq_err = au_wkq_wait(call_do_diropq, &args);
++		if (unlikely(wkq_err))
++			diropq = ERR_PTR(wkq_err);
++	}
++
++	return diropq;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * lookup whiteout dentry.
++ * @h_parent: lower parent dentry which must exist and be locked
++ * @base_name: name of dentry which will be whiteouted
++ * returns dentry for whiteout.
++ */
++struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
++			  struct au_branch *br)
++{
++	int err;
++	struct qstr wh_name;
++	struct dentry *wh_dentry;
++
++	err = au_wh_name_alloc(&wh_name, base_name);
++	wh_dentry = ERR_PTR(err);
++	if (!err) {
++		wh_dentry = au_lkup_one(&wh_name, h_parent, br, /*nd*/NULL);
++		kfree(wh_name.name);
++	}
++	return wh_dentry;
++}
++
++/*
++ * link/create a whiteout for @dentry on @bindex.
++ */
++struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
++			    struct dentry *h_parent)
++{
++	struct dentry *wh_dentry;
++	struct super_block *sb;
++	int err;
++
++	sb = dentry->d_sb;
++	wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
++	if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) {
++		err = link_or_create_wh(sb, bindex, wh_dentry);
++		if (!err)
++			au_set_dbwh(dentry, bindex);
++		else {
++			dput(wh_dentry);
++			wh_dentry = ERR_PTR(err);
++		}
++	}
++
++	return wh_dentry;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* Delete all whiteouts in this directory on branch bindex. */
++static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
++			   aufs_bindex_t bindex, struct au_branch *br)
++{
++	int err;
++	unsigned long ul, n;
++	struct qstr wh_name;
++	char *p;
++	struct hlist_head *head;
++	struct au_vdir_wh *tpos;
++	struct hlist_node *pos;
++	struct au_vdir_destr *str;
++
++	err = -ENOMEM;
++	p = __getname_gfp(GFP_NOFS);
++	wh_name.name = p;
++	if (unlikely(!wh_name.name))
++		goto out;
++
++	err = 0;
++	memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
++	p += AUFS_WH_PFX_LEN;
++	n = whlist->nh_num;
++	head = whlist->nh_head;
++	for (ul = 0; !err && ul < n; ul++, head++) {
++		hlist_for_each_entry(tpos, pos, head, wh_hash) {
++			if (tpos->wh_bindex != bindex)
++				continue;
++
++			str = &tpos->wh_str;
++			if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
++				memcpy(p, str->name, str->len);
++				wh_name.len = AUFS_WH_PFX_LEN + str->len;
++				err = unlink_wh_name(h_dentry, &wh_name, br);
++				if (!err)
++					continue;
++				break;
++			}
++			AuIOErr("whiteout name too long %.*s\n",
++				str->len, str->name);
++			err = -EIO;
++			break;
++		}
++	}
++	__putname(wh_name.name);
++
++out:
++	return err;
++}
++
++struct del_wh_children_args {
++	int *errp;
++	struct dentry *h_dentry;
++	struct au_nhash *whlist;
++	aufs_bindex_t bindex;
++	struct au_branch *br;
++};
++
++static void call_del_wh_children(void *args)
++{
++	struct del_wh_children_args *a = args;
++	*a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
++{
++	struct au_whtmp_rmdir *whtmp;
++	int err;
++	unsigned int rdhash;
++
++	SiMustAnyLock(sb);
++
++	whtmp = kmalloc(sizeof(*whtmp), gfp);
++	if (unlikely(!whtmp)) {
++		whtmp = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	whtmp->dir = NULL;
++	whtmp->br = NULL;
++	whtmp->wh_dentry = NULL;
++	/* no estimation for dir size */
++	rdhash = au_sbi(sb)->si_rdhash;
++	if (!rdhash)
++		rdhash = AUFS_RDHASH_DEF;
++	err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
++	if (unlikely(err)) {
++		kfree(whtmp);
++		whtmp = ERR_PTR(err);
++	}
++
++out:
++	return whtmp;
++}
++
++void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
++{
++	if (whtmp->br)
++		atomic_dec(&whtmp->br->br_count);
++	dput(whtmp->wh_dentry);
++	iput(whtmp->dir);
++	au_nhash_wh_free(&whtmp->whlist);
++	kfree(whtmp);
++}
++
++/*
++ * rmdir the whiteouted temporary named dir @h_dentry.
++ * @whlist: whiteouted children.
++ */
++int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
++		   struct dentry *wh_dentry, struct au_nhash *whlist)
++{
++	int err;
++	struct path h_tmp;
++	struct inode *wh_inode, *h_dir;
++	struct au_branch *br;
++
++	h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
++	IMustLock(h_dir);
++
++	br = au_sbr(dir->i_sb, bindex);
++	wh_inode = wh_dentry->d_inode;
++	mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
++
++	/*
++	 * someone else might change some whiteouts while we were sleeping.
++	 * it means this whlist may have an obsoleted entry.
++	 */
++	if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
++		err = del_wh_children(wh_dentry, whlist, bindex, br);
++	else {
++		int wkq_err;
++		struct del_wh_children_args args = {
++			.errp		= &err,
++			.h_dentry	= wh_dentry,
++			.whlist		= whlist,
++			.bindex		= bindex,
++			.br		= br
++		};
++
++		wkq_err = au_wkq_wait(call_del_wh_children, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++	mutex_unlock(&wh_inode->i_mutex);
++
++	if (!err) {
++		h_tmp.dentry = wh_dentry;
++		h_tmp.mnt = br->br_mnt;
++		err = vfsub_rmdir(h_dir, &h_tmp);
++	}
++
++	if (!err) {
++		if (au_ibstart(dir) == bindex) {
++			/* todo: dir->i_mutex is necessary */
++			au_cpup_attr_timesizes(dir);
++			vfsub_drop_nlink(dir);
++		}
++		return 0; /* success */
++	}
++
++	pr_warning("failed removing %.*s(%d), ignored\n",
++		   AuDLNPair(wh_dentry), err);
++	return err;
++}
++
++static void call_rmdir_whtmp(void *args)
++{
++	int err;
++	aufs_bindex_t bindex;
++	struct au_whtmp_rmdir *a = args;
++	struct super_block *sb;
++	struct dentry *h_parent;
++	struct inode *h_dir;
++	struct au_hinode *hdir;
++
++	/* rmdir by nfsd may cause deadlock with this i_mutex */
++	/* mutex_lock(&a->dir->i_mutex); */
++	err = -EROFS;
++	sb = a->dir->i_sb;
++	si_read_lock(sb, !AuLock_FLUSH);
++	if (!au_br_writable(a->br->br_perm))
++		goto out;
++	bindex = au_br_index(sb, a->br->br_id);
++	if (unlikely(bindex < 0))
++		goto out;
++
++	err = -EIO;
++	ii_write_lock_parent(a->dir);
++	h_parent = dget_parent(a->wh_dentry);
++	h_dir = h_parent->d_inode;
++	hdir = au_hi(a->dir, bindex);
++	au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
++	err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
++			  a->br);
++	if (!err) {
++		err = mnt_want_write(a->br->br_mnt);
++		if (!err) {
++			err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry,
++					     &a->whlist);
++			mnt_drop_write(a->br->br_mnt);
++		}
++	}
++	au_hn_imtx_unlock(hdir);
++	dput(h_parent);
++	ii_write_unlock(a->dir);
++
++out:
++	/* mutex_unlock(&a->dir->i_mutex); */
++	au_whtmp_rmdir_free(a);
++	si_read_unlock(sb);
++	au_nwt_done(&au_sbi(sb)->si_nowait);
++	if (unlikely(err))
++		AuIOErr("err %d\n", err);
++}
++
++void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
++			 struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
++{
++	int wkq_err;
++	struct super_block *sb;
++
++	IMustLock(dir);
++
++	/* all post-process will be done in do_rmdir_whtmp(). */
++	sb = dir->i_sb;
++	args->dir = au_igrab(dir);
++	args->br = au_sbr(sb, bindex);
++	atomic_inc(&args->br->br_count);
++	args->wh_dentry = dget(wh_dentry);
++	wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
++	if (unlikely(wkq_err)) {
++		pr_warning("rmdir error %.*s (%d), ignored\n",
++			   AuDLNPair(wh_dentry), wkq_err);
++		au_whtmp_rmdir_free(args);
++	}
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/whout.h	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,88 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * whiteout for logical deletion and opaque directory
++ */
++
++#ifndef __AUFS_WHOUT_H__
++#define __AUFS_WHOUT_H__
++
++#ifdef __KERNEL__
++
++#include "dir.h"
++
++/* whout.c */
++int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
++struct au_branch;
++int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
++	       struct au_branch *br, int try_sio);
++int au_diropq_test(struct dentry *h_dentry, struct au_branch *br);
++struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
++			     struct qstr *prefix);
++int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
++int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
++			struct dentry *dentry);
++int au_wh_init(struct dentry *h_parent, struct au_branch *br,
++	       struct super_block *sb);
++
++/* diropq flags */
++#define AuDiropq_CREATE	1
++#define au_ftest_diropq(flags, name)	((flags) & AuDiropq_##name)
++#define au_fset_diropq(flags, name) \
++	do { (flags) |= AuDiropq_##name; } while (0)
++#define au_fclr_diropq(flags, name) \
++	do { (flags) &= ~AuDiropq_##name; } while (0)
++
++struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
++			     unsigned int flags);
++struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
++			  struct au_branch *br);
++struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
++			    struct dentry *h_parent);
++
++/* real rmdir for the whiteout-ed dir */
++struct au_whtmp_rmdir {
++	struct inode *dir;
++	struct au_branch *br;
++	struct dentry *wh_dentry;
++	struct au_nhash whlist;
++};
++
++struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
++void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
++int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
++		   struct dentry *wh_dentry, struct au_nhash *whlist);
++void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
++			 struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
++
++/* ---------------------------------------------------------------------- */
++
++static inline struct dentry *au_diropq_create(struct dentry *dentry,
++					      aufs_bindex_t bindex)
++{
++	return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
++}
++
++static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
++{
++	return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_WHOUT_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/wkq.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,214 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * workqueue for asynchronous/super-io operations
++ * todo: try new dredential scheme
++ */
++
++#include <linux/module.h>
++#include "aufs.h"
++
++/* internal workqueue named AUFS_WKQ_NAME */
++
++static struct workqueue_struct *au_wkq;
++
++struct au_wkinfo {
++	struct work_struct wk;
++	struct kobject *kobj;
++
++	unsigned int flags; /* see wkq.h */
++
++	au_wkq_func_t func;
++	void *args;
++
++	struct completion *comp;
++};
++
++/* ---------------------------------------------------------------------- */
++
++static void wkq_func(struct work_struct *wk)
++{
++	struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
++
++	AuDebugOn(current_fsuid());
++	AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
++
++	wkinfo->func(wkinfo->args);
++	if (au_ftest_wkq(wkinfo->flags, WAIT))
++		complete(wkinfo->comp);
++	else {
++		kobject_put(wkinfo->kobj);
++		module_put(THIS_MODULE); /* todo: ?? */
++		kfree(wkinfo);
++	}
++}
++
++/*
++ * Since struct completion is large, try allocating it dynamically.
++ */
++#if defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS)
++#define AuWkqCompDeclare(name)	struct completion *comp = NULL
++
++static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
++{
++	*comp = kmalloc(sizeof(**comp), GFP_NOFS);
++	if (*comp) {
++		init_completion(*comp);
++		wkinfo->comp = *comp;
++		return 0;
++	}
++	return -ENOMEM;
++}
++
++static void au_wkq_comp_free(struct completion *comp)
++{
++	kfree(comp);
++}
++
++#else
++
++/* no braces */
++#define AuWkqCompDeclare(name) \
++	DECLARE_COMPLETION_ONSTACK(_ ## name); \
++	struct completion *comp = &_ ## name
++
++static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
++{
++	wkinfo->comp = *comp;
++	return 0;
++}
++
++static void au_wkq_comp_free(struct completion *comp __maybe_unused)
++{
++	/* empty */
++}
++#endif /* 4KSTACKS */
++
++static void au_wkq_run(struct au_wkinfo *wkinfo)
++{
++	if (au_ftest_wkq(wkinfo->flags, NEST)) {
++		if (au_wkq_test()) {
++			AuWarn1("wkq from wkq, due to a dead dir by UDBA?\n");
++			AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
++		}
++	} else
++		au_dbg_verify_kthread();
++
++	if (au_ftest_wkq(wkinfo->flags, WAIT)) {
++		INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
++		queue_work(au_wkq, &wkinfo->wk);
++	} else {
++		INIT_WORK(&wkinfo->wk, wkq_func);
++		schedule_work(&wkinfo->wk);
++	}
++}
++
++/*
++ * Be careful. It is easy to make deadlock happen.
++ * processA: lock, wkq and wait
++ * processB: wkq and wait, lock in wkq
++ * --> deadlock
++ */
++int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
++{
++	int err;
++	AuWkqCompDeclare(comp);
++	struct au_wkinfo wkinfo = {
++		.flags	= flags,
++		.func	= func,
++		.args	= args
++	};
++
++	err = au_wkq_comp_alloc(&wkinfo, &comp);
++	if (!err) {
++		au_wkq_run(&wkinfo);
++		/* no timeout, no interrupt */
++		wait_for_completion(wkinfo.comp);
++		au_wkq_comp_free(comp);
++		destroy_work_on_stack(&wkinfo.wk);
++	}
++
++	return err;
++
++}
++
++/*
++ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
++ * problem in a concurrent umounting.
++ */
++int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
++		  unsigned int flags)
++{
++	int err;
++	struct au_wkinfo *wkinfo;
++
++	atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
++
++	/*
++	 * wkq_func() must free this wkinfo.
++	 * it highly depends upon the implementation of workqueue.
++	 */
++	err = 0;
++	wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
++	if (wkinfo) {
++		wkinfo->kobj = &au_sbi(sb)->si_kobj;
++		wkinfo->flags = flags & ~AuWkq_WAIT;
++		wkinfo->func = func;
++		wkinfo->args = args;
++		wkinfo->comp = NULL;
++		kobject_get(wkinfo->kobj);
++		__module_get(THIS_MODULE); /* todo: ?? */
++
++		au_wkq_run(wkinfo);
++	} else {
++		err = -ENOMEM;
++		au_nwt_done(&au_sbi(sb)->si_nowait);
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_nwt_init(struct au_nowait_tasks *nwt)
++{
++	atomic_set(&nwt->nw_len, 0);
++	/* smp_mb(); */ /* atomic_set */
++	init_waitqueue_head(&nwt->nw_wq);
++}
++
++void au_wkq_fin(void)
++{
++	destroy_workqueue(au_wkq);
++}
++
++int __init au_wkq_init(void)
++{
++	int err;
++
++	err = 0;
++	BUILD_BUG_ON(!WQ_RESCUER);
++	au_wkq = alloc_workqueue(AUFS_WKQ_NAME, !WQ_RESCUER, WQ_DFL_ACTIVE);
++	if (IS_ERR(au_wkq))
++		err = PTR_ERR(au_wkq);
++	else if (!au_wkq)
++		err = -ENOMEM;
++
++	return err;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/wkq.h	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,92 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * workqueue for asynchronous/super-io operations
++ * todo: try new credentials management scheme
++ */
++
++#ifndef __AUFS_WKQ_H__
++#define __AUFS_WKQ_H__
++
++#ifdef __KERNEL__
++
++struct super_block;
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
++ */
++struct au_nowait_tasks {
++	atomic_t		nw_len;
++	wait_queue_head_t	nw_wq;
++};
++
++/* ---------------------------------------------------------------------- */
++
++typedef void (*au_wkq_func_t)(void *args);
++
++/* wkq flags */
++#define AuWkq_WAIT	1
++#define AuWkq_NEST	(1 << 1)
++#define au_ftest_wkq(flags, name)	((flags) & AuWkq_##name)
++#define au_fset_wkq(flags, name) \
++	do { (flags) |= AuWkq_##name; } while (0)
++#define au_fclr_wkq(flags, name) \
++	do { (flags) &= ~AuWkq_##name; } while (0)
++
++#ifndef CONFIG_AUFS_HNOTIFY
++#undef AuWkq_NEST
++#define AuWkq_NEST	0
++#endif
++
++/* wkq.c */
++int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
++int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
++		  unsigned int flags);
++void au_nwt_init(struct au_nowait_tasks *nwt);
++int __init au_wkq_init(void);
++void au_wkq_fin(void);
++
++/* ---------------------------------------------------------------------- */
++
++static inline int au_wkq_test(void)
++{
++	return current->flags & PF_WQ_WORKER;
++}
++
++static inline int au_wkq_wait(au_wkq_func_t func, void *args)
++{
++	return au_wkq_do_wait(AuWkq_WAIT, func, args);
++}
++
++static inline void au_nwt_done(struct au_nowait_tasks *nwt)
++{
++	if (atomic_dec_and_test(&nwt->nw_len))
++		wake_up_all(&nwt->nw_wq);
++}
++
++static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
++{
++	wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
++	return 0;
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_WKQ_H__ */
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/fs/aufs/xino.c	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,1264 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++/*
++ * external inode number translation table and bitmap
++ */
++
++#include <linux/seq_file.h>
++#include "aufs.h"
++
++/* todo: unnecessary to support mmap_sem since kernel-space? */
++ssize_t xino_fread(au_readf_t func, struct file *file, void *kbuf, size_t size,
++		   loff_t *pos)
++{
++	ssize_t err;
++	mm_segment_t oldfs;
++	union {
++		void *k;
++		char __user *u;
++	} buf;
++
++	buf.k = kbuf;
++	oldfs = get_fs();
++	set_fs(KERNEL_DS);
++	do {
++		/* todo: signal_pending? */
++		err = func(file, buf.u, size, pos);
++	} while (err == -EAGAIN || err == -EINTR);
++	set_fs(oldfs);
++
++#if 0 /* reserved for future use */
++	if (err > 0)
++		fsnotify_access(file->f_dentry);
++#endif
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static ssize_t do_xino_fwrite(au_writef_t func, struct file *file, void *kbuf,
++			      size_t size, loff_t *pos)
++{
++	ssize_t err;
++	mm_segment_t oldfs;
++	union {
++		void *k;
++		const char __user *u;
++	} buf;
++
++	buf.k = kbuf;
++	oldfs = get_fs();
++	set_fs(KERNEL_DS);
++	do {
++		/* todo: signal_pending? */
++		err = func(file, buf.u, size, pos);
++	} while (err == -EAGAIN || err == -EINTR);
++	set_fs(oldfs);
++
++#if 0 /* reserved for future use */
++	if (err > 0)
++		fsnotify_modify(file->f_dentry);
++#endif
++
++	return err;
++}
++
++struct do_xino_fwrite_args {
++	ssize_t *errp;
++	au_writef_t func;
++	struct file *file;
++	void *buf;
++	size_t size;
++	loff_t *pos;
++};
++
++static void call_do_xino_fwrite(void *args)
++{
++	struct do_xino_fwrite_args *a = args;
++	*a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
++}
++
++ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
++		    loff_t *pos)
++{
++	ssize_t err;
++
++	/* todo: signal block and no wkq? */
++	if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
++		lockdep_off();
++		err = do_xino_fwrite(func, file, buf, size, pos);
++		lockdep_on();
++	} else {
++		/*
++		 * it breaks RLIMIT_FSIZE and normal user's limit,
++		 * users should care about quota and real 'filesystem full.'
++		 */
++		int wkq_err;
++		struct do_xino_fwrite_args args = {
++			.errp	= &err,
++			.func	= func,
++			.file	= file,
++			.buf	= buf,
++			.size	= size,
++			.pos	= pos
++		};
++
++		wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
++		if (unlikely(wkq_err))
++			err = wkq_err;
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * create a new xinofile at the same place/path as @base_file.
++ */
++struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
++{
++	struct file *file;
++	struct dentry *base, *parent;
++	struct inode *dir;
++	struct qstr *name;
++	struct path path;
++	int err;
++
++	base = base_file->f_dentry;
++	parent = base->d_parent; /* dir inode is locked */
++	dir = parent->d_inode;
++	IMustLock(dir);
++
++	file = ERR_PTR(-EINVAL);
++	name = &base->d_name;
++	path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
++	if (IS_ERR(path.dentry)) {
++		file = (void *)path.dentry;
++		pr_err("%.*s lookup err %ld\n",
++		       AuLNPair(name), PTR_ERR(path.dentry));
++		goto out;
++	}
++
++	/* no need to mnt_want_write() since we call dentry_open() later */
++	err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
++	if (unlikely(err)) {
++		file = ERR_PTR(err);
++		pr_err("%.*s create err %d\n", AuLNPair(name), err);
++		goto out_dput;
++	}
++
++	path.mnt = base_file->f_vfsmnt;
++	file = vfsub_dentry_open(&path,
++				 O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
++				 /* | __FMODE_NONOTIFY */);
++	if (IS_ERR(file)) {
++		pr_err("%.*s open err %ld\n", AuLNPair(name), PTR_ERR(file));
++		goto out_dput;
++	}
++
++	err = vfsub_unlink(dir, &file->f_path, /*force*/0);
++	if (unlikely(err)) {
++		pr_err("%.*s unlink err %d\n", AuLNPair(name), err);
++		goto out_fput;
++	}
++
++	if (copy_src) {
++		/* no one can touch copy_src xino */
++		err = au_copy_file(file, copy_src,
++				   i_size_read(copy_src->f_dentry->d_inode));
++		if (unlikely(err)) {
++			pr_err("%.*s copy err %d\n", AuLNPair(name), err);
++			goto out_fput;
++		}
++	}
++	goto out_dput; /* success */
++
++out_fput:
++	fput(file);
++	file = ERR_PTR(err);
++out_dput:
++	dput(path.dentry);
++out:
++	return file;
++}
++
++struct au_xino_lock_dir {
++	struct au_hinode *hdir;
++	struct dentry *parent;
++	struct mutex *mtx;
++};
++
++static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
++			     struct au_xino_lock_dir *ldir)
++{
++	aufs_bindex_t brid, bindex;
++
++	ldir->hdir = NULL;
++	bindex = -1;
++	brid = au_xino_brid(sb);
++	if (brid >= 0)
++		bindex = au_br_index(sb, brid);
++	if (bindex >= 0) {
++		ldir->hdir = au_hi(sb->s_root->d_inode, bindex);
++		au_hn_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT);
++	} else {
++		ldir->parent = dget_parent(xino->f_dentry);
++		ldir->mtx = &ldir->parent->d_inode->i_mutex;
++		mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT);
++	}
++}
++
++static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
++{
++	if (ldir->hdir)
++		au_hn_imtx_unlock(ldir->hdir);
++	else {
++		mutex_unlock(ldir->mtx);
++		dput(ldir->parent);
++	}
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* trucate xino files asynchronously */
++
++int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
++{
++	int err;
++	aufs_bindex_t bi, bend;
++	struct au_branch *br;
++	struct file *new_xino, *file;
++	struct super_block *h_sb;
++	struct au_xino_lock_dir ldir;
++
++	err = -EINVAL;
++	bend = au_sbend(sb);
++	if (unlikely(bindex < 0 || bend < bindex))
++		goto out;
++	br = au_sbr(sb, bindex);
++	file = br->br_xino.xi_file;
++	if (!file)
++		goto out;
++
++	au_xino_lock_dir(sb, file, &ldir);
++	/* mnt_want_write() is unnecessary here */
++	new_xino = au_xino_create2(file, file);
++	au_xino_unlock_dir(&ldir);
++	err = PTR_ERR(new_xino);
++	if (IS_ERR(new_xino))
++		goto out;
++	err = 0;
++	fput(file);
++	br->br_xino.xi_file = new_xino;
++
++	h_sb = br->br_mnt->mnt_sb;
++	for (bi = 0; bi <= bend; bi++) {
++		if (unlikely(bi == bindex))
++			continue;
++		br = au_sbr(sb, bi);
++		if (br->br_mnt->mnt_sb != h_sb)
++			continue;
++
++		fput(br->br_xino.xi_file);
++		br->br_xino.xi_file = new_xino;
++		get_file(new_xino);
++	}
++
++out:
++	return err;
++}
++
++struct xino_do_trunc_args {
++	struct super_block *sb;
++	struct au_branch *br;
++};
++
++static void xino_do_trunc(void *_args)
++{
++	struct xino_do_trunc_args *args = _args;
++	struct super_block *sb;
++	struct au_branch *br;
++	struct inode *dir;
++	int err;
++	aufs_bindex_t bindex;
++
++	err = 0;
++	sb = args->sb;
++	dir = sb->s_root->d_inode;
++	br = args->br;
++
++	si_noflush_write_lock(sb);
++	ii_read_lock_parent(dir);
++	bindex = au_br_index(sb, br->br_id);
++	err = au_xino_trunc(sb, bindex);
++	if (!err
++	    && br->br_xino.xi_file->f_dentry->d_inode->i_blocks
++	    >= br->br_xino_upper)
++		br->br_xino_upper += AUFS_XINO_TRUNC_STEP;
++
++	ii_read_unlock(dir);
++	if (unlikely(err))
++		pr_warning("err b%d, (%d)\n", bindex, err);
++	atomic_dec(&br->br_xino_running);
++	atomic_dec(&br->br_count);
++	si_write_unlock(sb);
++	au_nwt_done(&au_sbi(sb)->si_nowait);
++	kfree(args);
++}
++
++static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
++{
++	struct xino_do_trunc_args *args;
++	int wkq_err;
++
++	if (br->br_xino.xi_file->f_dentry->d_inode->i_blocks
++	    < br->br_xino_upper)
++		return;
++
++	if (atomic_inc_return(&br->br_xino_running) > 1)
++		goto out;
++
++	/* lock and kfree() will be called in trunc_xino() */
++	args = kmalloc(sizeof(*args), GFP_NOFS);
++	if (unlikely(!args)) {
++		AuErr1("no memory\n");
++		goto out_args;
++	}
++
++	atomic_inc(&br->br_count);
++	args->sb = sb;
++	args->br = br;
++	wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
++	if (!wkq_err)
++		return; /* success */
++
++	pr_err("wkq %d\n", wkq_err);
++	atomic_dec(&br->br_count);
++
++out_args:
++	kfree(args);
++out:
++	atomic_dec(&br->br_xino_running);
++}
++
++/* ---------------------------------------------------------------------- */
++
++static int au_xino_do_write(au_writef_t write, struct file *file,
++			    ino_t h_ino, ino_t ino)
++{
++	loff_t pos;
++	ssize_t sz;
++
++	pos = h_ino;
++	if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
++		AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
++		return -EFBIG;
++	}
++	pos *= sizeof(ino);
++	sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
++	if (sz == sizeof(ino))
++		return 0; /* success */
++
++	AuIOErr("write failed (%zd)\n", sz);
++	return -EIO;
++}
++
++/*
++ * write @ino to the xinofile for the specified branch{@sb, @bindex}
++ * at the position of @h_ino.
++ * even if @ino is zero, it is written to the xinofile and means no entry.
++ * if the size of the xino file on a specific filesystem exceeds the watermark,
++ * try truncating it.
++ */
++int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++		  ino_t ino)
++{
++	int err;
++	unsigned int mnt_flags;
++	struct au_branch *br;
++
++	BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
++		     || ((loff_t)-1) > 0);
++	SiMustAnyLock(sb);
++
++	mnt_flags = au_mntflags(sb);
++	if (!au_opt_test(mnt_flags, XINO))
++		return 0;
++
++	br = au_sbr(sb, bindex);
++	err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
++			       h_ino, ino);
++	if (!err) {
++		if (au_opt_test(mnt_flags, TRUNC_XINO)
++		    && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
++			xino_try_trunc(sb, br);
++		return 0; /* success */
++	}
++
++	AuIOErr("write failed (%d)\n", err);
++	return -EIO;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* aufs inode number bitmap */
++
++static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
++static ino_t xib_calc_ino(unsigned long pindex, int bit)
++{
++	ino_t ino;
++
++	AuDebugOn(bit < 0 || page_bits <= bit);
++	ino = AUFS_FIRST_INO + pindex * page_bits + bit;
++	return ino;
++}
++
++static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
++{
++	AuDebugOn(ino < AUFS_FIRST_INO);
++	ino -= AUFS_FIRST_INO;
++	*pindex = ino / page_bits;
++	*bit = ino % page_bits;
++}
++
++static int xib_pindex(struct super_block *sb, unsigned long pindex)
++{
++	int err;
++	loff_t pos;
++	ssize_t sz;
++	struct au_sbinfo *sbinfo;
++	struct file *xib;
++	unsigned long *p;
++
++	sbinfo = au_sbi(sb);
++	MtxMustLock(&sbinfo->si_xib_mtx);
++	AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
++		  || !au_opt_test(sbinfo->si_mntflags, XINO));
++
++	if (pindex == sbinfo->si_xib_last_pindex)
++		return 0;
++
++	xib = sbinfo->si_xib;
++	p = sbinfo->si_xib_buf;
++	pos = sbinfo->si_xib_last_pindex;
++	pos *= PAGE_SIZE;
++	sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
++	if (unlikely(sz != PAGE_SIZE))
++		goto out;
++
++	pos = pindex;
++	pos *= PAGE_SIZE;
++	if (i_size_read(xib->f_dentry->d_inode) >= pos + PAGE_SIZE)
++		sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
++	else {
++		memset(p, 0, PAGE_SIZE);
++		sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
++	}
++	if (sz == PAGE_SIZE) {
++		sbinfo->si_xib_last_pindex = pindex;
++		return 0; /* success */
++	}
++
++out:
++	AuIOErr1("write failed (%zd)\n", sz);
++	err = sz;
++	if (sz >= 0)
++		err = -EIO;
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void au_xib_clear_bit(struct inode *inode)
++{
++	int err, bit;
++	unsigned long pindex;
++	struct super_block *sb;
++	struct au_sbinfo *sbinfo;
++
++	AuDebugOn(inode->i_nlink);
++
++	sb = inode->i_sb;
++	xib_calc_bit(inode->i_ino, &pindex, &bit);
++	AuDebugOn(page_bits <= bit);
++	sbinfo = au_sbi(sb);
++	mutex_lock(&sbinfo->si_xib_mtx);
++	err = xib_pindex(sb, pindex);
++	if (!err) {
++		clear_bit(bit, sbinfo->si_xib_buf);
++		sbinfo->si_xib_next_bit = bit;
++	}
++	mutex_unlock(&sbinfo->si_xib_mtx);
++}
++
++/* for s_op->delete_inode() */
++void au_xino_delete_inode(struct inode *inode, const int unlinked)
++{
++	int err;
++	unsigned int mnt_flags;
++	aufs_bindex_t bindex, bend, bi;
++	unsigned char try_trunc;
++	struct au_iinfo *iinfo;
++	struct super_block *sb;
++	struct au_hinode *hi;
++	struct inode *h_inode;
++	struct au_branch *br;
++	au_writef_t xwrite;
++
++	sb = inode->i_sb;
++	mnt_flags = au_mntflags(sb);
++	if (!au_opt_test(mnt_flags, XINO)
++	    || inode->i_ino == AUFS_ROOT_INO)
++		return;
++
++	if (unlinked) {
++		au_xigen_inc(inode);
++		au_xib_clear_bit(inode);
++	}
++
++	iinfo = au_ii(inode);
++	if (!iinfo)
++		return;
++
++	bindex = iinfo->ii_bstart;
++	if (bindex < 0)
++		return;
++
++	xwrite = au_sbi(sb)->si_xwrite;
++	try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
++	hi = iinfo->ii_hinode + bindex;
++	bend = iinfo->ii_bend;
++	for (; bindex <= bend; bindex++, hi++) {
++		h_inode = hi->hi_inode;
++		if (!h_inode
++		    || (!unlinked && h_inode->i_nlink))
++			continue;
++
++		/* inode may not be revalidated */
++		bi = au_br_index(sb, hi->hi_id);
++		if (bi < 0)
++			continue;
++
++		br = au_sbr(sb, bi);
++		err = au_xino_do_write(xwrite, br->br_xino.xi_file,
++				       h_inode->i_ino, /*ino*/0);
++		if (!err && try_trunc
++		    && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
++			xino_try_trunc(sb, br);
++	}
++}
++
++/* get an unused inode number from bitmap */
++ino_t au_xino_new_ino(struct super_block *sb)
++{
++	ino_t ino;
++	unsigned long *p, pindex, ul, pend;
++	struct au_sbinfo *sbinfo;
++	struct file *file;
++	int free_bit, err;
++
++	if (!au_opt_test(au_mntflags(sb), XINO))
++		return iunique(sb, AUFS_FIRST_INO);
++
++	sbinfo = au_sbi(sb);
++	mutex_lock(&sbinfo->si_xib_mtx);
++	p = sbinfo->si_xib_buf;
++	free_bit = sbinfo->si_xib_next_bit;
++	if (free_bit < page_bits && !test_bit(free_bit, p))
++		goto out; /* success */
++	free_bit = find_first_zero_bit(p, page_bits);
++	if (free_bit < page_bits)
++		goto out; /* success */
++
++	pindex = sbinfo->si_xib_last_pindex;
++	for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
++		err = xib_pindex(sb, ul);
++		if (unlikely(err))
++			goto out_err;
++		free_bit = find_first_zero_bit(p, page_bits);
++		if (free_bit < page_bits)
++			goto out; /* success */
++	}
++
++	file = sbinfo->si_xib;
++	pend = i_size_read(file->f_dentry->d_inode) / PAGE_SIZE;
++	for (ul = pindex + 1; ul <= pend; ul++) {
++		err = xib_pindex(sb, ul);
++		if (unlikely(err))
++			goto out_err;
++		free_bit = find_first_zero_bit(p, page_bits);
++		if (free_bit < page_bits)
++			goto out; /* success */
++	}
++	BUG();
++
++out:
++	set_bit(free_bit, p);
++	sbinfo->si_xib_next_bit = free_bit + 1;
++	pindex = sbinfo->si_xib_last_pindex;
++	mutex_unlock(&sbinfo->si_xib_mtx);
++	ino = xib_calc_ino(pindex, free_bit);
++	AuDbg("i%lu\n", (unsigned long)ino);
++	return ino;
++out_err:
++	mutex_unlock(&sbinfo->si_xib_mtx);
++	AuDbg("i0\n");
++	return 0;
++}
++
++/*
++ * read @ino from xinofile for the specified branch{@sb, @bindex}
++ * at the position of @h_ino.
++ * if @ino does not exist and @do_new is true, get new one.
++ */
++int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++		 ino_t *ino)
++{
++	int err;
++	ssize_t sz;
++	loff_t pos;
++	struct file *file;
++	struct au_sbinfo *sbinfo;
++
++	*ino = 0;
++	if (!au_opt_test(au_mntflags(sb), XINO))
++		return 0; /* no xino */
++
++	err = 0;
++	sbinfo = au_sbi(sb);
++	pos = h_ino;
++	if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
++		AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
++		return -EFBIG;
++	}
++	pos *= sizeof(*ino);
++
++	file = au_sbr(sb, bindex)->br_xino.xi_file;
++	if (i_size_read(file->f_dentry->d_inode) < pos + sizeof(*ino))
++		return 0; /* no ino */
++
++	sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
++	if (sz == sizeof(*ino))
++		return 0; /* success */
++
++	err = sz;
++	if (unlikely(sz >= 0)) {
++		err = -EIO;
++		AuIOErr("xino read error (%zd)\n", sz);
++	}
++
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* create and set a new xino file */
++
++struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
++{
++	struct file *file;
++	struct dentry *h_parent, *d;
++	struct inode *h_dir;
++	int err;
++
++	/*
++	 * at mount-time, and the xino file is the default path,
++	 * hnotify is disabled so we have no notify events to ignore.
++	 * when a user specified the xino, we cannot get au_hdir to be ignored.
++	 */
++	file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
++			       /* | __FMODE_NONOTIFY */,
++			       S_IRUGO | S_IWUGO);
++	if (IS_ERR(file)) {
++		if (!silent)
++			pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
++		return file;
++	}
++
++	/* keep file count */
++	h_parent = dget_parent(file->f_dentry);
++	h_dir = h_parent->d_inode;
++	mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
++	/* mnt_want_write() is unnecessary here */
++	err = vfsub_unlink(h_dir, &file->f_path, /*force*/0);
++	mutex_unlock(&h_dir->i_mutex);
++	dput(h_parent);
++	if (unlikely(err)) {
++		if (!silent)
++			pr_err("unlink %s(%d)\n", fname, err);
++		goto out;
++	}
++
++	err = -EINVAL;
++	d = file->f_dentry;
++	if (unlikely(sb == d->d_sb)) {
++		if (!silent)
++			pr_err("%s must be outside\n", fname);
++		goto out;
++	}
++	if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
++		if (!silent)
++			pr_err("xino doesn't support %s(%s)\n",
++			       fname, au_sbtype(d->d_sb));
++		goto out;
++	}
++	return file; /* success */
++
++out:
++	fput(file);
++	file = ERR_PTR(err);
++	return file;
++}
++
++/*
++ * find another branch who is on the same filesystem of the specified
++ * branch{@btgt}. search until @bend.
++ */
++static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
++			aufs_bindex_t bend)
++{
++	aufs_bindex_t bindex;
++	struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
++
++	for (bindex = 0; bindex < btgt; bindex++)
++		if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
++			return bindex;
++	for (bindex++; bindex <= bend; bindex++)
++		if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
++			return bindex;
++	return -1;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * initialize the xinofile for the specified branch @br
++ * at the place/path where @base_file indicates.
++ * test whether another branch is on the same filesystem or not,
++ * if @do_test is true.
++ */
++int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
++	       struct file *base_file, int do_test)
++{
++	int err;
++	ino_t ino;
++	aufs_bindex_t bend, bindex;
++	struct au_branch *shared_br, *b;
++	struct file *file;
++	struct super_block *tgt_sb;
++
++	shared_br = NULL;
++	bend = au_sbend(sb);
++	if (do_test) {
++		tgt_sb = br->br_mnt->mnt_sb;
++		for (bindex = 0; bindex <= bend; bindex++) {
++			b = au_sbr(sb, bindex);
++			if (tgt_sb == b->br_mnt->mnt_sb) {
++				shared_br = b;
++				break;
++			}
++		}
++	}
++
++	if (!shared_br || !shared_br->br_xino.xi_file) {
++		struct au_xino_lock_dir ldir;
++
++		au_xino_lock_dir(sb, base_file, &ldir);
++		/* mnt_want_write() is unnecessary here */
++		file = au_xino_create2(base_file, NULL);
++		au_xino_unlock_dir(&ldir);
++		err = PTR_ERR(file);
++		if (IS_ERR(file))
++			goto out;
++		br->br_xino.xi_file = file;
++	} else {
++		br->br_xino.xi_file = shared_br->br_xino.xi_file;
++		get_file(br->br_xino.xi_file);
++	}
++
++	ino = AUFS_ROOT_INO;
++	err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
++			       h_ino, ino);
++	if (unlikely(err)) {
++		fput(br->br_xino.xi_file);
++		br->br_xino.xi_file = NULL;
++	}
++
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/* trucate a xino bitmap file */
++
++/* todo: slow */
++static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
++{
++	int err, bit;
++	ssize_t sz;
++	unsigned long pindex;
++	loff_t pos, pend;
++	struct au_sbinfo *sbinfo;
++	au_readf_t func;
++	ino_t *ino;
++	unsigned long *p;
++
++	err = 0;
++	sbinfo = au_sbi(sb);
++	MtxMustLock(&sbinfo->si_xib_mtx);
++	p = sbinfo->si_xib_buf;
++	func = sbinfo->si_xread;
++	pend = i_size_read(file->f_dentry->d_inode);
++	pos = 0;
++	while (pos < pend) {
++		sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
++		err = sz;
++		if (unlikely(sz <= 0))
++			goto out;
++
++		err = 0;
++		for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
++			if (unlikely(*ino < AUFS_FIRST_INO))
++				continue;
++
++			xib_calc_bit(*ino, &pindex, &bit);
++			AuDebugOn(page_bits <= bit);
++			err = xib_pindex(sb, pindex);
++			if (!err)
++				set_bit(bit, p);
++			else
++				goto out;
++		}
++	}
++
++out:
++	return err;
++}
++
++static int xib_restore(struct super_block *sb)
++{
++	int err;
++	aufs_bindex_t bindex, bend;
++	void *page;
++
++	err = -ENOMEM;
++	page = (void *)__get_free_page(GFP_NOFS);
++	if (unlikely(!page))
++		goto out;
++
++	err = 0;
++	bend = au_sbend(sb);
++	for (bindex = 0; !err && bindex <= bend; bindex++)
++		if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
++			err = do_xib_restore
++				(sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
++		else
++			AuDbg("b%d\n", bindex);
++	free_page((unsigned long)page);
++
++out:
++	return err;
++}
++
++int au_xib_trunc(struct super_block *sb)
++{
++	int err;
++	ssize_t sz;
++	loff_t pos;
++	struct au_xino_lock_dir ldir;
++	struct au_sbinfo *sbinfo;
++	unsigned long *p;
++	struct file *file;
++
++	SiMustWriteLock(sb);
++
++	err = 0;
++	sbinfo = au_sbi(sb);
++	if (!au_opt_test(sbinfo->si_mntflags, XINO))
++		goto out;
++
++	file = sbinfo->si_xib;
++	if (i_size_read(file->f_dentry->d_inode) <= PAGE_SIZE)
++		goto out;
++
++	au_xino_lock_dir(sb, file, &ldir);
++	/* mnt_want_write() is unnecessary here */
++	file = au_xino_create2(sbinfo->si_xib, NULL);
++	au_xino_unlock_dir(&ldir);
++	err = PTR_ERR(file);
++	if (IS_ERR(file))
++		goto out;
++	fput(sbinfo->si_xib);
++	sbinfo->si_xib = file;
++
++	p = sbinfo->si_xib_buf;
++	memset(p, 0, PAGE_SIZE);
++	pos = 0;
++	sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
++	if (unlikely(sz != PAGE_SIZE)) {
++		err = sz;
++		AuIOErr("err %d\n", err);
++		if (sz >= 0)
++			err = -EIO;
++		goto out;
++	}
++
++	mutex_lock(&sbinfo->si_xib_mtx);
++	/* mnt_want_write() is unnecessary here */
++	err = xib_restore(sb);
++	mutex_unlock(&sbinfo->si_xib_mtx);
++
++out:
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * xino mount option handlers
++ */
++static au_readf_t find_readf(struct file *h_file)
++{
++	const struct file_operations *fop = h_file->f_op;
++
++	if (fop) {
++		if (fop->read)
++			return fop->read;
++		if (fop->aio_read)
++			return do_sync_read;
++	}
++	return ERR_PTR(-ENOSYS);
++}
++
++static au_writef_t find_writef(struct file *h_file)
++{
++	const struct file_operations *fop = h_file->f_op;
++
++	if (fop) {
++		if (fop->write)
++			return fop->write;
++		if (fop->aio_write)
++			return do_sync_write;
++	}
++	return ERR_PTR(-ENOSYS);
++}
++
++/* xino bitmap */
++static void xino_clear_xib(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++
++	SiMustWriteLock(sb);
++
++	sbinfo = au_sbi(sb);
++	sbinfo->si_xread = NULL;
++	sbinfo->si_xwrite = NULL;
++	if (sbinfo->si_xib)
++		fput(sbinfo->si_xib);
++	sbinfo->si_xib = NULL;
++	free_page((unsigned long)sbinfo->si_xib_buf);
++	sbinfo->si_xib_buf = NULL;
++}
++
++static int au_xino_set_xib(struct super_block *sb, struct file *base)
++{
++	int err;
++	loff_t pos;
++	struct au_sbinfo *sbinfo;
++	struct file *file;
++
++	SiMustWriteLock(sb);
++
++	sbinfo = au_sbi(sb);
++	file = au_xino_create2(base, sbinfo->si_xib);
++	err = PTR_ERR(file);
++	if (IS_ERR(file))
++		goto out;
++	if (sbinfo->si_xib)
++		fput(sbinfo->si_xib);
++	sbinfo->si_xib = file;
++	sbinfo->si_xread = find_readf(file);
++	sbinfo->si_xwrite = find_writef(file);
++
++	err = -ENOMEM;
++	if (!sbinfo->si_xib_buf)
++		sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
++	if (unlikely(!sbinfo->si_xib_buf))
++		goto out_unset;
++
++	sbinfo->si_xib_last_pindex = 0;
++	sbinfo->si_xib_next_bit = 0;
++	if (i_size_read(file->f_dentry->d_inode) < PAGE_SIZE) {
++		pos = 0;
++		err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
++				  PAGE_SIZE, &pos);
++		if (unlikely(err != PAGE_SIZE))
++			goto out_free;
++	}
++	err = 0;
++	goto out; /* success */
++
++out_free:
++	free_page((unsigned long)sbinfo->si_xib_buf);
++	sbinfo->si_xib_buf = NULL;
++	if (err >= 0)
++		err = -EIO;
++out_unset:
++	fput(sbinfo->si_xib);
++	sbinfo->si_xib = NULL;
++	sbinfo->si_xread = NULL;
++	sbinfo->si_xwrite = NULL;
++out:
++	return err;
++}
++
++/* xino for each branch */
++static void xino_clear_br(struct super_block *sb)
++{
++	aufs_bindex_t bindex, bend;
++	struct au_branch *br;
++
++	bend = au_sbend(sb);
++	for (bindex = 0; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		if (!br || !br->br_xino.xi_file)
++			continue;
++
++		fput(br->br_xino.xi_file);
++		br->br_xino.xi_file = NULL;
++	}
++}
++
++static int au_xino_set_br(struct super_block *sb, struct file *base)
++{
++	int err;
++	ino_t ino;
++	aufs_bindex_t bindex, bend, bshared;
++	struct {
++		struct file *old, *new;
++	} *fpair, *p;
++	struct au_branch *br;
++	struct inode *inode;
++	au_writef_t writef;
++
++	SiMustWriteLock(sb);
++
++	err = -ENOMEM;
++	bend = au_sbend(sb);
++	fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
++	if (unlikely(!fpair))
++		goto out;
++
++	inode = sb->s_root->d_inode;
++	ino = AUFS_ROOT_INO;
++	writef = au_sbi(sb)->si_xwrite;
++	for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
++		br = au_sbr(sb, bindex);
++		bshared = is_sb_shared(sb, bindex, bindex - 1);
++		if (bshared >= 0) {
++			/* shared xino */
++			*p = fpair[bshared];
++			get_file(p->new);
++		}
++
++		if (!p->new) {
++			/* new xino */
++			p->old = br->br_xino.xi_file;
++			p->new = au_xino_create2(base, br->br_xino.xi_file);
++			err = PTR_ERR(p->new);
++			if (IS_ERR(p->new)) {
++				p->new = NULL;
++				goto out_pair;
++			}
++		}
++
++		err = au_xino_do_write(writef, p->new,
++				       au_h_iptr(inode, bindex)->i_ino, ino);
++		if (unlikely(err))
++			goto out_pair;
++	}
++
++	for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
++		br = au_sbr(sb, bindex);
++		if (br->br_xino.xi_file)
++			fput(br->br_xino.xi_file);
++		get_file(p->new);
++		br->br_xino.xi_file = p->new;
++	}
++
++out_pair:
++	for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
++		if (p->new)
++			fput(p->new);
++		else
++			break;
++	kfree(fpair);
++out:
++	return err;
++}
++
++void au_xino_clr(struct super_block *sb)
++{
++	struct au_sbinfo *sbinfo;
++
++	au_xigen_clr(sb);
++	xino_clear_xib(sb);
++	xino_clear_br(sb);
++	sbinfo = au_sbi(sb);
++	/* lvalue, do not call au_mntflags() */
++	au_opt_clr(sbinfo->si_mntflags, XINO);
++}
++
++int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
++{
++	int err, skip;
++	struct dentry *parent, *cur_parent;
++	struct qstr *dname, *cur_name;
++	struct file *cur_xino;
++	struct inode *dir;
++	struct au_sbinfo *sbinfo;
++
++	SiMustWriteLock(sb);
++
++	err = 0;
++	sbinfo = au_sbi(sb);
++	parent = dget_parent(xino->file->f_dentry);
++	if (remount) {
++		skip = 0;
++		dname = &xino->file->f_dentry->d_name;
++		cur_xino = sbinfo->si_xib;
++		if (cur_xino) {
++			cur_parent = dget_parent(cur_xino->f_dentry);
++			cur_name = &cur_xino->f_dentry->d_name;
++			skip = (cur_parent == parent
++				&& dname->len == cur_name->len
++				&& !memcmp(dname->name, cur_name->name,
++					   dname->len));
++			dput(cur_parent);
++		}
++		if (skip)
++			goto out;
++	}
++
++	au_opt_set(sbinfo->si_mntflags, XINO);
++	dir = parent->d_inode;
++	mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT);
++	/* mnt_want_write() is unnecessary here */
++	err = au_xino_set_xib(sb, xino->file);
++	if (!err)
++		err = au_xigen_set(sb, xino->file);
++	if (!err)
++		err = au_xino_set_br(sb, xino->file);
++	mutex_unlock(&dir->i_mutex);
++	if (!err)
++		goto out; /* success */
++
++	/* reset all */
++	AuIOErr("failed creating xino(%d).\n", err);
++
++out:
++	dput(parent);
++	return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++/*
++ * create a xinofile at the default place/path.
++ */
++struct file *au_xino_def(struct super_block *sb)
++{
++	struct file *file;
++	char *page, *p;
++	struct au_branch *br;
++	struct super_block *h_sb;
++	struct path path;
++	aufs_bindex_t bend, bindex, bwr;
++
++	br = NULL;
++	bend = au_sbend(sb);
++	bwr = -1;
++	for (bindex = 0; bindex <= bend; bindex++) {
++		br = au_sbr(sb, bindex);
++		if (au_br_writable(br->br_perm)
++		    && !au_test_fs_bad_xino(br->br_mnt->mnt_sb)) {
++			bwr = bindex;
++			break;
++		}
++	}
++
++	if (bwr >= 0) {
++		file = ERR_PTR(-ENOMEM);
++		page = __getname_gfp(GFP_NOFS);
++		if (unlikely(!page))
++			goto out;
++		path.mnt = br->br_mnt;
++		path.dentry = au_h_dptr(sb->s_root, bwr);
++		p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
++		file = (void *)p;
++		if (!IS_ERR(p)) {
++			strcat(p, "/" AUFS_XINO_FNAME);
++			AuDbg("%s\n", p);
++			file = au_xino_create(sb, p, /*silent*/0);
++			if (!IS_ERR(file))
++				au_xino_brid_set(sb, br->br_id);
++		}
++		__putname(page);
++	} else {
++		file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
++		if (IS_ERR(file))
++			goto out;
++		h_sb = file->f_dentry->d_sb;
++		if (unlikely(au_test_fs_bad_xino(h_sb))) {
++			pr_err("xino doesn't support %s(%s)\n",
++			       AUFS_XINO_DEFPATH, au_sbtype(h_sb));
++			fput(file);
++			file = ERR_PTR(-EINVAL);
++		}
++		if (!IS_ERR(file))
++			au_xino_brid_set(sb, -1);
++	}
++
++out:
++	return file;
++}
++
++/* ---------------------------------------------------------------------- */
++
++int au_xino_path(struct seq_file *seq, struct file *file)
++{
++	int err;
++
++	err = au_seq_path(seq, &file->f_path);
++	if (unlikely(err < 0))
++		goto out;
++
++	err = 0;
++#define Deleted "\\040(deleted)"
++	seq->count -= sizeof(Deleted) - 1;
++	AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
++			 sizeof(Deleted) - 1));
++#undef Deleted
++
++out:
++	return err;
++}
+--- /dev/null	2012-03-14 12:35:58.848999748 +0100
++++ b/include/linux/aufs_type.h	2012-03-20 17:31:18.000000000 +0100
+@@ -0,0 +1,233 @@
++/*
++ * Copyright (C) 2005-2012 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++#ifndef __AUFS_TYPE_H__
++#define __AUFS_TYPE_H__
++
++#define AUFS_NAME	"aufs"
++
++#ifdef __KERNEL__
++/*
++ * define it before including all other headers.
++ * sched.h may use pr_* macros before defining "current", so define the
++ * no-current version first, and re-define later.
++ */
++#define pr_fmt(fmt)	AUFS_NAME " %s:%d: " fmt, __func__, __LINE__
++#include <linux/sched.h>
++#undef pr_fmt
++#define pr_fmt(fmt)	AUFS_NAME " %s:%d:%s[%d]: " fmt, \
++		__func__, __LINE__, current->comm, current->pid
++#else
++#include <stdint.h>
++#include <sys/types.h>
++#endif /* __KERNEL__ */
++
++#include <linux/limits.h>
++
++#define AUFS_VERSION	"3.x-rcN-20120312"
++
++/* todo? move this to linux-2.6.19/include/magic.h */
++#define AUFS_SUPER_MAGIC	('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
++
++/* ---------------------------------------------------------------------- */
++
++#ifdef CONFIG_AUFS_BRANCH_MAX_127
++typedef int8_t aufs_bindex_t;
++#define AUFS_BRANCH_MAX 127
++#else
++typedef int16_t aufs_bindex_t;
++#ifdef CONFIG_AUFS_BRANCH_MAX_511
++#define AUFS_BRANCH_MAX 511
++#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
++#define AUFS_BRANCH_MAX 1023
++#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
++#define AUFS_BRANCH_MAX 32767
++#endif
++#endif
++
++#ifdef __KERNEL__
++#ifndef AUFS_BRANCH_MAX
++#error unknown CONFIG_AUFS_BRANCH_MAX value
++#endif
++#endif /* __KERNEL__ */
++
++/* ---------------------------------------------------------------------- */
++
++#define AUFS_FSTYPE		AUFS_NAME
++
++#define AUFS_ROOT_INO		2
++#define AUFS_FIRST_INO		11
++
++#define AUFS_WH_PFX		".wh."
++#define AUFS_WH_PFX_LEN		((int)sizeof(AUFS_WH_PFX) - 1)
++#define AUFS_WH_TMP_LEN		4
++/* a limit for rmdir/rename a dir */
++#define AUFS_MAX_NAMELEN	(NAME_MAX \
++				- AUFS_WH_PFX_LEN * 2	/* doubly whiteouted */\
++				- 1			/* dot */\
++				- AUFS_WH_TMP_LEN)	/* hex */
++#define AUFS_XINO_FNAME		"." AUFS_NAME ".xino"
++#define AUFS_XINO_DEFPATH	"/tmp/" AUFS_XINO_FNAME
++#define AUFS_XINO_TRUNC_INIT	64 /* blocks */
++#define AUFS_XINO_TRUNC_STEP	4  /* blocks */
++#define AUFS_DIRWH_DEF		3
++#define AUFS_RDCACHE_DEF	10 /* seconds */
++#define AUFS_RDCACHE_MAX	3600 /* seconds */
++#define AUFS_RDBLK_DEF		512 /* bytes */
++#define AUFS_RDHASH_DEF		32
++#define AUFS_WKQ_NAME		AUFS_NAME "d"
++#define AUFS_MFS_DEF_SEC	30 /* seconds */
++#define AUFS_MFS_MAX_SEC	3600 /* seconds */
++#define AUFS_PLINK_WARN		100 /* number of plinks */
++
++/* pseudo-link maintenace under /proc */
++#define AUFS_PLINK_MAINT_NAME	"plink_maint"
++#define AUFS_PLINK_MAINT_DIR	"fs/" AUFS_NAME
++#define AUFS_PLINK_MAINT_PATH	AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
++
++#define AUFS_DIROPQ_NAME	AUFS_WH_PFX ".opq" /* whiteouted doubly */
++#define AUFS_WH_DIROPQ		AUFS_WH_PFX AUFS_DIROPQ_NAME
++
++#define AUFS_BASE_NAME		AUFS_WH_PFX AUFS_NAME
++#define AUFS_PLINKDIR_NAME	AUFS_WH_PFX "plnk"
++#define AUFS_ORPHDIR_NAME	AUFS_WH_PFX "orph"
++
++/* doubly whiteouted */
++#define AUFS_WH_BASE		AUFS_WH_PFX AUFS_BASE_NAME
++#define AUFS_WH_PLINKDIR	AUFS_WH_PFX AUFS_PLINKDIR_NAME
++#define AUFS_WH_ORPHDIR		AUFS_WH_PFX AUFS_ORPHDIR_NAME
++
++/* branch permissions and attributes */
++#define AUFS_BRPERM_RW		"rw"
++#define AUFS_BRPERM_RO		"ro"
++#define AUFS_BRPERM_RR		"rr"
++#define AUFS_BRRATTR_WH		"wh"
++#define AUFS_BRWATTR_NLWH	"nolwh"
++
++/* ---------------------------------------------------------------------- */
++
++/* ioctl */
++enum {
++	/* readdir in userspace */
++	AuCtl_RDU,
++	AuCtl_RDU_INO,
++
++	/* pathconf wrapper */
++	AuCtl_WBR_FD,
++
++	/* busy inode */
++	AuCtl_IBUSY
++};
++
++/* borrowed from linux/include/linux/kernel.h */
++#ifndef ALIGN
++#define ALIGN(x, a)		__ALIGN_MASK(x, (typeof(x))(a)-1)
++#define __ALIGN_MASK(x, mask)	(((x)+(mask))&~(mask))
++#endif
++
++/* borrowed from linux/include/linux/compiler-gcc3.h */
++#ifndef __aligned
++#define __aligned(x)			__attribute__((aligned(x)))
++#endif
++
++#ifdef __KERNEL__
++#ifndef __packed
++#define __packed			__attribute__((packed))
++#endif
++#endif
++
++struct au_rdu_cookie {
++	uint64_t	h_pos;
++	int16_t		bindex;
++	uint8_t		flags;
++	uint8_t		pad;
++	uint32_t	generation;
++} __aligned(8);
++
++struct au_rdu_ent {
++	uint64_t	ino;
++	int16_t		bindex;
++	uint8_t		type;
++	uint8_t		nlen;
++	uint8_t		wh;
++	char		name[0];
++} __aligned(8);
++
++static inline int au_rdu_len(int nlen)
++{
++	/* include the terminating NULL */
++	return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
++		     sizeof(uint64_t));
++}
++
++union au_rdu_ent_ul {
++	struct au_rdu_ent __user	*e;
++	uint64_t			ul;
++};
++
++enum {
++	AufsCtlRduV_SZ,
++	AufsCtlRduV_End
++};
++
++struct aufs_rdu {
++	/* input */
++	union {
++		uint64_t	sz;	/* AuCtl_RDU */
++		uint64_t	nent;	/* AuCtl_RDU_INO */
++	};
++	union au_rdu_ent_ul	ent;
++	uint16_t		verify[AufsCtlRduV_End];
++
++	/* input/output */
++	uint32_t		blk;
++
++	/* output */
++	union au_rdu_ent_ul	tail;
++	/* number of entries which were added in a single call */
++	uint64_t		rent;
++	uint8_t			full;
++	uint8_t			shwh;
++
++	struct au_rdu_cookie	cookie;
++} __aligned(8);
++
++/* ---------------------------------------------------------------------- */
++
++struct aufs_wbr_fd {
++	uint32_t	oflags;
++	int16_t		brid;
++} __aligned(8);
++
++/* ---------------------------------------------------------------------- */
++
++struct aufs_ibusy {
++	uint64_t	ino, h_ino;
++	int16_t		bindex;
++} __aligned(8);
++
++/* ---------------------------------------------------------------------- */
++
++#define AuCtlType		'A'
++#define AUFS_CTL_RDU		_IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
++#define AUFS_CTL_RDU_INO	_IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
++#define AUFS_CTL_WBR_FD		_IOW(AuCtlType, AuCtl_WBR_FD, \
++				     struct aufs_wbr_fd)
++#define AUFS_CTL_IBUSY		_IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
++
++#endif /* __AUFS_TYPE_H__ */
+aufs3.x-rcN proc_map patch
+
+diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
+index b1822dd..8b29ab7 100644
+--- a/fs/proc/nommu.c
++++ b/fs/proc/nommu.c
+@@ -46,6 +46,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
+ 
+ 	if (file) {
+ 		struct inode *inode = region->vm_file->f_path.dentry->d_inode;
++		if (region->vm_prfile) {
++			file = region->vm_prfile;
++			inode = file->f_path.dentry->d_inode;
++		}
+ 		dev = inode->i_sb->s_dev;
+ 		ino = inode->i_ino;
+ 	}
+diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
+index 7dcd2a2..05a146b 100644
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -222,6 +222,10 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
+ 
+ 	if (file) {
+ 		struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
++		if (vma->vm_prfile) {
++			file = vma->vm_prfile;
++			inode = file->f_path.dentry->d_inode;
++		}
+ 		dev = inode->i_sb->s_dev;
+ 		ino = inode->i_ino;
+ 		pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
+@@ -1036,6 +1040,8 @@ static int show_numa_map(struct seq_file *m, void *v)
+ 
+ 	if (file) {
+ 		seq_printf(m, " file=");
++		if (vma->vm_prfile)
++			file = vma->vm_prfile;
+ 		seq_path(m, &file->f_path, "\n\t= ");
+ 	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
+ 		seq_printf(m, " heap");
+diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
+index 980de54..4ee031f 100644
+--- a/fs/proc/task_nommu.c
++++ b/fs/proc/task_nommu.c
+@@ -148,6 +148,10 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
+ 
+ 	if (file) {
+ 		struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
++		if (vma->vm_prfile) {
++			file = vma->vm_prfile;
++			inode = file->f_path.dentry->d_inode;
++		}
+ 		dev = inode->i_sb->s_dev;
+ 		ino = inode->i_ino;
+ 		pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
+diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
+index 3cc3062..9742239 100644
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -185,6 +185,7 @@ struct vm_region {
+ 	unsigned long	vm_top;		/* region allocated to here */
+ 	unsigned long	vm_pgoff;	/* the offset in vm_file corresponding to vm_start */
+ 	struct file	*vm_file;	/* the backing file or NULL */
++	struct file	*vm_prfile;	/* the virtual backing file or NULL */
+ 
+ 	int		vm_usage;	/* region usage count (access under nommu_region_sem) */
+ 	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
+@@ -244,6 +245,7 @@ struct vm_area_struct {
+ 	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
+ 					   units, *not* PAGE_CACHE_SIZE */
+ 	struct file * vm_file;		/* File we map to (can be NULL). */
++	struct file *vm_prfile;		/* shadow of vm_file */
+ 	void * vm_private_data;		/* was vm_pte (shared mem) */
+ 
+ #ifndef CONFIG_MMU
+diff --git a/kernel/fork.c b/kernel/fork.c
+index e2cd3e2..2c322f7 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -380,6 +380,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+ 			struct address_space *mapping = file->f_mapping;
+ 
+ 			get_file(file);
++			if (tmp->vm_prfile)
++				get_file(tmp->vm_prfile);
+ 			if (tmp->vm_flags & VM_DENYWRITE)
+ 				atomic_dec(&inode->i_writecount);
+ 			mutex_lock(&mapping->i_mmap_mutex);
+diff --git a/mm/memory.c b/mm/memory.c
+index fa2f04e..03ff1bc 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2641,6 +2641,8 @@ reuse:
+ 		/* file_update_time outside page_lock */
+ 		if (vma->vm_file)
+ 			file_update_time(vma->vm_file);
++		if (vma->vm_prfile)
++			file_update_time(vma->vm_prfile);
+ 
+ 		return ret;
+ 	}
+@@ -3326,6 +3328,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ 		/* file_update_time outside page_lock */
+ 		if (vma->vm_file)
+ 			file_update_time(vma->vm_file);
++		if (vma->vm_prfile)
++			file_update_time(vma->vm_prfile);
+ 	} else {
+ 		unlock_page(vmf.page);
+ 		if (anon)
+diff --git a/mm/mmap.c b/mm/mmap.c
+index 3f758c7..5518dd3 100644
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -232,6 +232,8 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+ 		vma->vm_ops->close(vma);
+ 	if (vma->vm_file) {
+ 		fput(vma->vm_file);
++		if (vma->vm_prfile)
++			fput(vma->vm_prfile);
+ 		if (vma->vm_flags & VM_EXECUTABLE)
+ 			removed_exe_file_vma(vma->vm_mm);
+ 	}
+@@ -619,6 +621,8 @@ again:			remove_next = 1 + (end > next->vm_end);
+ 	if (remove_next) {
+ 		if (file) {
+ 			fput(file);
++			if (vma->vm_prfile)
++				fput(vma->vm_prfile);
+ 			if (next->vm_flags & VM_EXECUTABLE)
+ 				removed_exe_file_vma(mm);
+ 		}
+@@ -1945,6 +1949,8 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+ 
+ 	if (new->vm_file) {
+ 		get_file(new->vm_file);
++		if (new->vm_prfile)
++			get_file(new->vm_prfile);
+ 		if (vma->vm_flags & VM_EXECUTABLE)
+ 			added_exe_file_vma(mm);
+ 	}
+@@ -1969,6 +1975,8 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+ 		if (vma->vm_flags & VM_EXECUTABLE)
+ 			removed_exe_file_vma(mm);
+ 		fput(new->vm_file);
++		if (new->vm_prfile)
++			fput(new->vm_prfile);
+ 	}
+ 	unlink_anon_vmas(new);
+  out_free_mpol:
+@@ -2354,6 +2362,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
+ 			new_vma->vm_pgoff = pgoff;
+ 			if (new_vma->vm_file) {
+ 				get_file(new_vma->vm_file);
++				if (new_vma->vm_prfile)
++					get_file(new_vma->vm_prfile);
+ 				if (vma->vm_flags & VM_EXECUTABLE)
+ 					added_exe_file_vma(mm);
+ 			}
+diff --git a/mm/nommu.c b/mm/nommu.c
+index f59e170..c24bbf3 100644
+--- a/mm/nommu.c
++++ b/mm/nommu.c
+@@ -633,6 +633,8 @@ static void __put_nommu_region(struct vm_region *region)
+ 
+ 		if (region->vm_file)
+ 			fput(region->vm_file);
++		if (region->vm_prfile)
++			fput(region->vm_prfile);
+ 
+ 		/* IO memory and memory shared directly out of the pagecache
+ 		 * from ramfs/tmpfs mustn't be released here */
+@@ -791,6 +793,8 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
+ 		vma->vm_ops->close(vma);
+ 	if (vma->vm_file) {
+ 		fput(vma->vm_file);
++		if (vma->vm_prfile)
++			fput(vma->vm_prfile);
+ 		if (vma->vm_flags & VM_EXECUTABLE)
+ 			removed_exe_file_vma(mm);
+ 	}
+@@ -1364,6 +1368,8 @@ unsigned long do_mmap_pgoff(struct file *file,
+ 				}
+ 			}
+ 			fput(region->vm_file);
++			if (region->vm_prfile)
++				fput(region->vm_prfile);
+ 			kmem_cache_free(vm_region_jar, region);
+ 			region = pregion;
+ 			result = start;
+@@ -1440,9 +1446,13 @@ error_just_free:
+ error:
+ 	if (region->vm_file)
+ 		fput(region->vm_file);
++	if (region->vm_prfile)
++		fput(region->vm_prfile);
+ 	kmem_cache_free(vm_region_jar, region);
+ 	if (vma->vm_file)
+ 		fput(vma->vm_file);
++	if (vma->vm_prfile)
++		fput(vma->vm_prfile);
+ 	if (vma->vm_flags & VM_EXECUTABLE)
+ 		removed_exe_file_vma(vma->vm_mm);
+ 	kmem_cache_free(vm_area_cachep, vma);
diff --git a/3.3.8/cloneconfig.patch b/3.3.8/cloneconfig.patch
new file mode 100644
index 0000000..4bfb615
--- /dev/null
+++ b/3.3.8/cloneconfig.patch
@@ -0,0 +1,41 @@
+From: Andreas Gruenbacher <agruen@suse.de>
+Subject: Add ``cloneconfig'' target
+Patch-mainline: Submitted 24 Feb 2011
+
+Cloneconfig takes the first configuration it finds which appears
+to belong to the running kernel, and configures the kernel sources
+to match this configuration as closely as possible.
+
+Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
+Signed-off-by: Jeff Mahoney <jeffm@suse.com>
+---
+
+ scripts/kconfig/Makefile |   17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+--- a/scripts/kconfig/Makefile
++++ b/scripts/kconfig/Makefile
+@@ -99,6 +99,23 @@ PHONY += allnoconfig allyesconfig allmod
+ 
+ allnoconfig allyesconfig allmodconfig alldefconfig randconfig: $(obj)/conf
+ 	$< --$@ $(Kconfig)
++ 
++UNAME_RELEASE := $(shell uname -r)
++CLONECONFIG := $(firstword $(wildcard /proc/config.gz \
++				      /lib/modules/$(UNAME_RELEASE)/.config \
++				      /etc/kernel-config \
++				      /boot/config-$(UNAME_RELEASE)))
++cloneconfig: $(obj)/conf
++	$(Q)case "$(CLONECONFIG)" in				\
++	'')	echo -e "The configuration of the running"	\
++			"kernel could not be determined\n";	\
++		false ;;					\
++	*.gz)	gzip -cd $(CLONECONFIG) > .config.running ;;	\
++	*)	cat $(CLONECONFIG) > .config.running ;;		\
++	esac &&							\
++	echo -e "Cloning configuration file $(CLONECONFIG)\n"
++	$(Q)$< --defconfig=.config.running arch/$(SRCARCH)/Kconfig
++
+ 
+ PHONY += listnewconfig oldnoconfig savedefconfig defconfig
+ 
diff --git a/3.3.8/colored-printk-3.3.8.patch b/3.3.8/colored-printk-3.3.8.patch
new file mode 100644
index 0000000..b9ab83d
--- /dev/null
+++ b/3.3.8/colored-printk-3.3.8.patch
@@ -0,0 +1,337 @@
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/arch/x86/kernel/early_printk.c linux-2.6.29.3-cprintk/arch/x86/kernel/early_printk.c
+--- a/arch/x86/kernel/early_printk.c	2009-03-24 00:12:14.000000000 +0100
++++ b/arch/x86/kernel/early_printk.c	2009-05-09 16:10:36.000000000 +0200
+@@ -23,7 +23,8 @@
+ static int max_ypos = 25, max_xpos = 80;
+ static int current_ypos = 25, current_xpos;
+ 
+-static void early_vga_write(struct console *con, const char *str, unsigned n)
++static void early_vga_write(struct console *con, const char *str, unsigned n,
++                            unsigned int loglevel)
+ {
+ 	char c;
+ 	int  i, k, j;
+@@ -93,7 +94,8 @@ static int early_serial_putc(unsigned ch
+ 	return timeout ? 0 : -1;
+ }
+ 
+-static void early_serial_write(struct console *con, const char *s, unsigned n)
++static void early_serial_write(struct console *con, const char *s, unsigned n,
++                               unsigned int loglevel)
+ {
+ 	while (*s && n-- > 0) {
+ 		if (*s == '\n')
+@@ -887,7 +889,7 @@ asmlinkage void early_printk(const char 
+ 
+ 	va_start(ap, fmt);
+ 	n = vscnprintf(buf, sizeof(buf), fmt, ap);
+-	early_console->write(early_console, buf, n);
++	early_console->write(early_console, buf, n, 0);
+ 	va_end(ap);
+ }
+ 
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/drivers/char/Kconfig linux-2.6.29.3-cprintk/drivers/tty/Kconfig
+--- a/drivers/char/Kconfig	2009-03-24 00:12:14.000000000 +0100
++++ b/drivers/tty/Kconfig	2009-05-09 14:43:48.000000000 +0200
+@@ -66,6 +66,111 @@ config VT_CONSOLE
+ 
+ 	  If unsure, say Y.
+ 
++menuconfig VT_CKO
++	bool "Colored kernel message output"
++	depends on VT_CONSOLE
++	---help---
++	  This option enables kernel messages to be emitted in
++	  colors other than the default.
++
++	  The color value you need to enter is composed (OR-ed)
++	  of a foreground and a background color.
++
++	  Foreground:
++	  0x00 = black,   0x08 = dark gray,
++	  0x01 = red,     0x09 = light red,
++	  0x02 = green,   0x0A = light green,
++	  0x03 = brown,   0x0B = yellow,
++	  0x04 = blue,    0x0C = light blue,
++	  0x05 = magenta, 0x0D = light magenta,
++	  0x06 = cyan,    0x0E = light cyan,
++	  0x07 = gray,    0x0F = white,
++
++	  (Foreground colors 0x08 to 0x0F do not work when a VGA
++	  console font with 512 glyphs is used.)
++
++	  Background:
++	  0x00 = black,   0x40 = blue,
++	  0x10 = red,     0x50 = magenta,
++	  0x20 = green,   0x60 = cyan,
++	  0x30 = brown,   0x70 = gray,
++
++	  For example, 0x1F would yield white on red.
++
++	  If unsure, say N.
++
++config VT_PRINTK_EMERG_COLOR
++	hex "Emergency messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel emergency messages will
++	  be printed to the console.
++
++config VT_PRINTK_ALERT_COLOR
++	hex "Alert messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel alert messages will
++	  be printed to the console.
++
++config VT_PRINTK_CRIT_COLOR
++	hex "Critical messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel critical messages will
++	  be printed to the console.
++
++config VT_PRINTK_ERR_COLOR
++	hex "Error messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel error messages will
++	  be printed to the console.
++
++config VT_PRINTK_WARNING_COLOR
++	hex "Warning messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel warning messages will
++	  be printed to the console.
++
++config VT_PRINTK_NOTICE_COLOR
++	hex "Notice messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel notice messages will
++	  be printed to the console.
++
++config VT_PRINTK_INFO_COLOR
++	hex "Information messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel information messages will
++	  be printed to the console.
++
++config VT_PRINTK_DEBUG_COLOR
++	hex "Debug messages color"
++	range 0x00 0xFF
++	depends on VT_CKO
++	default 0x07
++	---help---
++	  This option defines with which color kernel debug messages will
++	  be printed to the console.
++
+ config HW_CONSOLE
+ 	bool
+ 	depends on VT && !S390 && !UML
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/drivers/char/vt.c linux-2.6.29.3-cprintk/drivers/tty/vt/vt.c
+--- a/drivers/char/vt.c	2009-05-09 10:46:57.000000000 +0200
++++ b/drivers/tty/vt/vt.c	2009-05-09 14:43:48.000000000 +0200
+@@ -73,6 +73,7 @@
+  */
+ 
+ #include <linux/module.h>
++#include <linux/moduleparam.h>
+ #include <linux/types.h>
+ #include <linux/sched.h>
+ #include <linux/tty.h>
+@@ -2431,17 +2432,45 @@ struct tty_driver *console_driver;
+ 
+ #ifdef CONFIG_VT_CONSOLE
+ 
++#ifdef CONFIG_VT_CKO 
++static unsigned int printk_color[8] __read_mostly = {
++	CONFIG_VT_PRINTK_EMERG_COLOR,	/* KERN_EMERG */
++	CONFIG_VT_PRINTK_ALERT_COLOR,	/* KERN_ALERT */
++	CONFIG_VT_PRINTK_CRIT_COLOR,	/* KERN_CRIT */
++	CONFIG_VT_PRINTK_ERR_COLOR,	/* KERN_ERR */
++	CONFIG_VT_PRINTK_WARNING_COLOR,	/* KERN_WARNING */
++	CONFIG_VT_PRINTK_NOTICE_COLOR,	/* KERN_NOTICE */
++	CONFIG_VT_PRINTK_INFO_COLOR,	/* KERN_INFO */
++	CONFIG_VT_PRINTK_DEBUG_COLOR,	/* KERN_DEBUG */
++};
++module_param_array(printk_color, uint, NULL, S_IRUGO | S_IWUSR);
++
++static inline void vc_set_color(struct vc_data *vc, unsigned char color)
++{
++	vc->vc_color = color_table[color & 0xF] |
++	               (color_table[(color >> 4) & 0x7] << 4) |
++	               (color & 0x80);
++	update_attr(vc);
++}
++#else
++static unsigned int printk_color[8];
++static inline void vc_set_color(const struct vc_data *vc, unsigned char c)
++{
++}
++#endif
++
+ /*
+  *	Console on virtual terminal
+  *
+  * The console must be locked when we get here.
+  */
+ 
+-static void vt_console_print(struct console *co, const char *b, unsigned count)
++static void vt_console_print(struct console *co, const char *b, unsigned count,
++                             unsigned int loglevel)
+ {
+ 	struct vc_data *vc = vc_cons[fg_console].d;
+-	unsigned char c;
+ 	static DEFINE_SPINLOCK(printing_lock);
++	unsigned char current_color, c;
+ 	const ushort *start;
+ 	ushort cnt = 0;
+ 	ushort myx;
+@@ -2474,11 +2503,19 @@ static void vt_console_print(struct cons
+ 
+ 	start = (ushort *)vc->vc_pos;
+ 
++	/*
++	 * We always get a valid loglevel - <8> and "no level" is transformed
++	 * to <4> in the typical kernel.
++	 */
++	current_color = printk_color[loglevel];
++	vc_set_color(vc, current_color);
++
+ 	/* Contrived structure to try to emulate original need_wrap behaviour
+ 	 * Problems caused when we have need_wrap set on '\n' character */
+ 	while (count--) {
+ 		c = *b++;
+ 		if (c == 10 || c == 13 || c == 8 || vc->vc_need_wrap) {
++			vc_set_color(vc, vc->vc_def_color);
+ 			if (cnt > 0) {
+ 				if (CON_IS_VISIBLE(vc))
+ 					vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x);
+@@ -2491,6 +2528,7 @@ static void vt_console_print(struct cons
+ 				bs(vc);
+ 				start = (ushort *)vc->vc_pos;
+ 				myx = vc->vc_x;
++				vc_set_color(vc, current_color);
+ 				continue;
+ 			}
+ 			if (c != 13)
+@@ -2498,6 +2536,7 @@ static void vt_console_print(struct cons
+ 			cr(vc);
+ 			start = (ushort *)vc->vc_pos;
+ 			myx = vc->vc_x;
++			vc_set_color(vc, current_color);
+ 			if (c == 10 || c == 13)
+ 				continue;
+ 		}
+@@ -2520,6 +2559,7 @@ static void vt_console_print(struct cons
+ 			vc->vc_need_wrap = 1;
+ 		}
+ 	}
++	vc_set_color(vc, vc->vc_def_color);
+ 	set_cursor(vc);
+ 	notify_update(vc);
+ 
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/drivers/net/netconsole.c linux-2.6.29.3-cprintk/drivers/net/netconsole.c
+--- a/drivers/net/netconsole.c	2009-03-24 00:12:14.000000000 +0100
++++ b/drivers/net/netconsole.c	2009-05-09 14:43:48.000000000 +0200
+@@ -691,7 +691,8 @@ static struct notifier_block netconsole_
+ 	.notifier_call  = netconsole_netdev_event,
+ };
+ 
+-static void write_msg(struct console *con, const char *msg, unsigned int len)
++static void write_msg(struct console *con, const char *msg, unsigned int len,
++    unsigned int loglevel)
+ {
+ 	int frag, left;
+ 	unsigned long flags;
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/drivers/serial/8250.c linux-2.6.29.3-cprintk/drivers/tty/serial/8250/8250.c
+--- a/drivers/serial/8250.c	2009-03-24 00:12:14.000000000 +0100
++++ b/drivers/tty/serial/8250/8250.c	2009-05-09 14:43:48.000000000 +0200
+@@ -2698,7 +2698,8 @@ static void serial8250_console_putchar(s
+  *	The console_lock must be held when we get here.
+  */
+ static void
+-serial8250_console_write(struct console *co, const char *s, unsigned int count)
++serial8250_console_write(struct console *co, const char *s, unsigned int count,
++                         unsigned int loglevel)
+ {
+ 	struct uart_8250_port *up = &serial8250_ports[co->index];
+ 	unsigned long flags;
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/drivers/serial/8250_early.c linux-2.6.29.3-cprintk/drivers/tty/serial/8250/8250_early.c
+--- a/drivers/serial/8250_early.c	2009-03-24 00:12:14.000000000 +0100
++++ b/drivers/tty/serial/8250/8250_early.c	2009-05-09 14:43:48.000000000 +0200
+@@ -83,7 +83,7 @@ static void __init serial_putc(struct ua
+ }
+ 
+ static void __init early_serial8250_write(struct console *console,
+-					const char *s, unsigned int count)
++    const char *s, unsigned int count, unsigned int loglevel)
+ {
+ 	struct uart_port *port = &early_device.port;
+ 	unsigned int ier;
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/include/linux/console.h linux-2.6.29.3-cprintk/include/linux/console.h
+--- a/include/linux/console.h	2009-03-24 00:12:14.000000000 +0100
++++ b/include/linux/console.h	2009-05-09 14:43:48.000000000 +0200
+@@ -95,7 +95,7 @@ void give_up_console(const struct consw 
+ 
+ struct console {
+ 	char	name[16];
+-	void	(*write)(struct console *, const char *, unsigned);
++	void	(*write)(struct console *, const char *, unsigned, unsigned int);
+ 	int	(*read)(struct console *, char *, unsigned);
+ 	struct tty_driver *(*device)(struct console *, int *);
+ 	void	(*unblank)(void);
+diff -pruN -X linux/Documentation/dontdiff linux-2.6.29.3/kernel/printk.c linux-2.6.29.3-cprintk/kernel/printk.c
+--- a/kernel/printk.c	2009-03-24 00:12:14.000000000 +0100
++++ b/kernel/printk.c	2009-05-09 14:43:48.000000000 +0200
+@@ -389,7 +389,8 @@ SYSCALL_DEFINE3(syslog, int, type, char 
+ /*
+  * Call the console drivers on a range of log_buf
+  */
+-static void __call_console_drivers(unsigned start, unsigned end)
++static void __call_console_drivers(unsigned start, unsigned end,
++    unsigned int loglevel)
+ {
+ 	struct console *con;
+ 
+@@ -397,7 +398,7 @@ static void __call_console_drivers(unsig
+ 		if ((con->flags & CON_ENABLED) && con->write &&
+ 				(cpu_online(smp_processor_id()) ||
+ 				(con->flags & CON_ANYTIME)))
+-			con->write(con, &LOG_BUF(start), end - start);
++			con->write(con, &LOG_BUF(start), end - start, loglevel);
+ 	}
+ }
+ 
+@@ -424,10 +425,11 @@ static void _call_console_drivers(unsign
+ 		if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
+ 			/* wrapped write */
+ 			__call_console_drivers(start & LOG_BUF_MASK,
+-						log_buf_len);
+-			__call_console_drivers(0, end & LOG_BUF_MASK);
++						log_buf_len, msg_log_level);
++			__call_console_drivers(0, end & LOG_BUF_MASK,
++			                       msg_log_level);
+ 		} else {
+-			__call_console_drivers(start, end);
++			__call_console_drivers(start, end, msg_log_level);
+ 		}
+ 	}
+ }
diff --git a/3.3.8/fs-btrfs-run-delayed-directory-updates-during-log-replay.patch b/3.3.8/fs-btrfs-run-delayed-directory-updates-during-log-replay.patch
new file mode 100644
index 0000000..f902a9c
--- /dev/null
+++ b/3.3.8/fs-btrfs-run-delayed-directory-updates-during-log-replay.patch
@@ -0,0 +1,54 @@
+From b6305567e7d31b0bec1b8cb9ec0cadd7f7086f5f Mon Sep 17 00:00:00 2001
+From: Chris Mason <chris.mason@fusionio.com>
+Date: Mon, 2 Jul 2012 15:29:53 -0400
+Subject: Btrfs: run delayed directory updates during log replay
+
+From: Chris Mason <chris.mason@fusionio.com>
+
+commit b6305567e7d31b0bec1b8cb9ec0cadd7f7086f5f upstream.
+
+While we are resolving directory modifications in the
+tree log, we are triggering delayed metadata updates to
+the filesystem btrees.
+
+This commit forces the delayed updates to run so the
+replay code can find any modifications done.  It stops
+us from crashing because the directory deleltion replay
+expects items to be removed immediately from the tree.
+
+Signed-off-by: Chris Mason <chris.mason@fusionio.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -690,6 +690,8 @@ static noinline int drop_one_dir_item(st
+ 	kfree(name);
+ 
+ 	iput(inode);
++
++	btrfs_run_delayed_items(trans, root);
+ 	return ret;
+ }
+ 
+@@ -895,6 +897,7 @@ again:
+ 				ret = btrfs_unlink_inode(trans, root, dir,
+ 							 inode, victim_name,
+ 							 victim_name_len);
++				btrfs_run_delayed_items(trans, root);
+ 			}
+ 			kfree(victim_name);
+ 			ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
+@@ -1475,6 +1478,9 @@ again:
+ 			ret = btrfs_unlink_inode(trans, root, dir, inode,
+ 						 name, name_len);
+ 			BUG_ON(ret);
++
++			btrfs_run_delayed_items(trans, root);
++
+ 			kfree(name);
+ 			iput(inode);
+ 
diff --git a/3.3.8/fs-ecryptfs-fix-lockdep-warning-in-miscdev-operations.patch b/3.3.8/fs-ecryptfs-fix-lockdep-warning-in-miscdev-operations.patch
new file mode 100644
index 0000000..d557282
--- /dev/null
+++ b/3.3.8/fs-ecryptfs-fix-lockdep-warning-in-miscdev-operations.patch
@@ -0,0 +1,103 @@
+From 60d65f1f07a7d81d3eb3b91fc13fca80f2fdbb12 Mon Sep 17 00:00:00 2001
+From: Tyler Hicks <tyhicks@canonical.com>
+Date: Mon, 11 Jun 2012 10:21:34 -0700
+Subject: eCryptfs: Fix lockdep warning in miscdev operations
+
+From: Tyler Hicks <tyhicks@canonical.com>
+
+commit 60d65f1f07a7d81d3eb3b91fc13fca80f2fdbb12 upstream.
+
+Don't grab the daemon mutex while holding the message context mutex.
+Addresses this lockdep warning:
+
+ ecryptfsd/2141 is trying to acquire lock:
+  (&ecryptfs_msg_ctx_arr[i].mux){+.+.+.}, at: [<ffffffffa029c213>] ecryptfs_miscdev_read+0x143/0x470 [ecryptfs]
+
+ but task is already holding lock:
+  (&(*daemon)->mux){+.+...}, at: [<ffffffffa029c2ec>] ecryptfs_miscdev_read+0x21c/0x470 [ecryptfs]
+
+ which lock already depends on the new lock.
+
+ the existing dependency chain (in reverse order) is:
+
+ -> #1 (&(*daemon)->mux){+.+...}:
+        [<ffffffff810a3b8d>] lock_acquire+0x9d/0x220
+        [<ffffffff8151c6da>] __mutex_lock_common+0x5a/0x4b0
+        [<ffffffff8151cc64>] mutex_lock_nested+0x44/0x50
+        [<ffffffffa029c5d7>] ecryptfs_send_miscdev+0x97/0x120 [ecryptfs]
+        [<ffffffffa029b744>] ecryptfs_send_message+0x134/0x1e0 [ecryptfs]
+        [<ffffffffa029a24e>] ecryptfs_generate_key_packet_set+0x2fe/0xa80 [ecryptfs]
+        [<ffffffffa02960f8>] ecryptfs_write_metadata+0x108/0x250 [ecryptfs]
+        [<ffffffffa0290f80>] ecryptfs_create+0x130/0x250 [ecryptfs]
+        [<ffffffff811963a4>] vfs_create+0xb4/0x120
+        [<ffffffff81197865>] do_last+0x8c5/0xa10
+        [<ffffffff811998f9>] path_openat+0xd9/0x460
+        [<ffffffff81199da2>] do_filp_open+0x42/0xa0
+        [<ffffffff81187998>] do_sys_open+0xf8/0x1d0
+        [<ffffffff81187a91>] sys_open+0x21/0x30
+        [<ffffffff81527d69>] system_call_fastpath+0x16/0x1b
+
+ -> #0 (&ecryptfs_msg_ctx_arr[i].mux){+.+.+.}:
+        [<ffffffff810a3418>] __lock_acquire+0x1bf8/0x1c50
+        [<ffffffff810a3b8d>] lock_acquire+0x9d/0x220
+        [<ffffffff8151c6da>] __mutex_lock_common+0x5a/0x4b0
+        [<ffffffff8151cc64>] mutex_lock_nested+0x44/0x50
+        [<ffffffffa029c213>] ecryptfs_miscdev_read+0x143/0x470 [ecryptfs]
+        [<ffffffff811887d3>] vfs_read+0xb3/0x180
+        [<ffffffff811888ed>] sys_read+0x4d/0x90
+        [<ffffffff81527d69>] system_call_fastpath+0x16/0x1b
+
+Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ecryptfs/miscdev.c |   25 +++++++++++++------------
+ 1 file changed, 13 insertions(+), 12 deletions(-)
+
+--- a/fs/ecryptfs/miscdev.c
++++ b/fs/ecryptfs/miscdev.c
+@@ -195,31 +195,32 @@ int ecryptfs_send_miscdev(char *data, si
+ 			  struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
+ 			  u16 msg_flags, struct ecryptfs_daemon *daemon)
+ {
+-	int rc = 0;
++	struct ecryptfs_message *msg;
+ 
+-	mutex_lock(&msg_ctx->mux);
+-	msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
+-			       GFP_KERNEL);
+-	if (!msg_ctx->msg) {
+-		rc = -ENOMEM;
++	msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL);
++	if (!msg) {
+ 		printk(KERN_ERR "%s: Out of memory whilst attempting "
+ 		       "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
+-		       (sizeof(*msg_ctx->msg) + data_size));
+-		goto out_unlock;
++		       (sizeof(*msg) + data_size));
++		return -ENOMEM;
+ 	}
++
++	mutex_lock(&msg_ctx->mux);
++	msg_ctx->msg = msg;
+ 	msg_ctx->msg->index = msg_ctx->index;
+ 	msg_ctx->msg->data_len = data_size;
+ 	msg_ctx->type = msg_type;
+ 	memcpy(msg_ctx->msg->data, data, data_size);
+ 	msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
+-	mutex_lock(&daemon->mux);
+ 	list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
++	mutex_unlock(&msg_ctx->mux);
++
++	mutex_lock(&daemon->mux);
+ 	daemon->num_queued_msg_ctx++;
+ 	wake_up_interruptible(&daemon->wait);
+ 	mutex_unlock(&daemon->mux);
+-out_unlock:
+-	mutex_unlock(&msg_ctx->mux);
+-	return rc;
++
++	return 0;
+ }
+ 
+ /*
diff --git a/3.3.8/fs-ecryptfs-gracefully-refuse-miscdev-file-ops-on-inherited-passed-files.patch b/3.3.8/fs-ecryptfs-gracefully-refuse-miscdev-file-ops-on-inherited-passed-files.patch
new file mode 100644
index 0000000..f60a64c
--- /dev/null
+++ b/3.3.8/fs-ecryptfs-gracefully-refuse-miscdev-file-ops-on-inherited-passed-files.patch
@@ -0,0 +1,95 @@
+From 8dc6780587c99286c0d3de747a2946a76989414a Mon Sep 17 00:00:00 2001
+From: Tyler Hicks <tyhicks@canonical.com>
+Date: Mon, 11 Jun 2012 09:24:11 -0700
+Subject: eCryptfs: Gracefully refuse miscdev file ops on inherited/passed files
+
+From: Tyler Hicks <tyhicks@canonical.com>
+
+commit 8dc6780587c99286c0d3de747a2946a76989414a upstream.
+
+File operations on /dev/ecryptfs would BUG() when the operations were
+performed by processes other than the process that originally opened the
+file. This could happen with open files inherited after fork() or file
+descriptors passed through IPC mechanisms. Rather than calling BUG(), an
+error code can be safely returned in most situations.
+
+In ecryptfs_miscdev_release(), eCryptfs still needs to handle the
+release even if the last file reference is being held by a process that
+didn't originally open the file. ecryptfs_find_daemon_by_euid() will not
+be successful, so a pointer to the daemon is stored in the file's
+private_data. The private_data pointer is initialized when the miscdev
+file is opened and only used when the file is released.
+
+https://launchpad.net/bugs/994247
+
+Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
+Reported-by: Sasha Levin <levinsasha928@gmail.com>
+Tested-by: Sasha Levin <levinsasha928@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ecryptfs/miscdev.c |   23 ++++++++++++++++-------
+ 1 file changed, 16 insertions(+), 7 deletions(-)
+
+--- a/fs/ecryptfs/miscdev.c
++++ b/fs/ecryptfs/miscdev.c
+@@ -49,7 +49,10 @@ ecryptfs_miscdev_poll(struct file *file,
+ 	mutex_lock(&ecryptfs_daemon_hash_mux);
+ 	/* TODO: Just use file->private_data? */
+ 	rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
+-	BUG_ON(rc || !daemon);
++	if (rc || !daemon) {
++		mutex_unlock(&ecryptfs_daemon_hash_mux);
++		return -EINVAL;
++	}
+ 	mutex_lock(&daemon->mux);
+ 	mutex_unlock(&ecryptfs_daemon_hash_mux);
+ 	if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
+@@ -122,6 +125,7 @@ ecryptfs_miscdev_open(struct inode *inod
+ 		goto out_unlock_daemon;
+ 	}
+ 	daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
++	file->private_data = daemon;
+ 	atomic_inc(&ecryptfs_num_miscdev_opens);
+ out_unlock_daemon:
+ 	mutex_unlock(&daemon->mux);
+@@ -152,9 +156,9 @@ ecryptfs_miscdev_release(struct inode *i
+ 
+ 	mutex_lock(&ecryptfs_daemon_hash_mux);
+ 	rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
+-	BUG_ON(rc || !daemon);
++	if (rc || !daemon)
++		daemon = file->private_data;
+ 	mutex_lock(&daemon->mux);
+-	BUG_ON(daemon->pid != task_pid(current));
+ 	BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
+ 	daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
+ 	atomic_dec(&ecryptfs_num_miscdev_opens);
+@@ -269,8 +273,16 @@ ecryptfs_miscdev_read(struct file *file,
+ 	mutex_lock(&ecryptfs_daemon_hash_mux);
+ 	/* TODO: Just use file->private_data? */
+ 	rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
+-	BUG_ON(rc || !daemon);
++	if (rc || !daemon) {
++		mutex_unlock(&ecryptfs_daemon_hash_mux);
++		return -EINVAL;
++	}
+ 	mutex_lock(&daemon->mux);
++	if (task_pid(current) != daemon->pid) {
++		mutex_unlock(&daemon->mux);
++		mutex_unlock(&ecryptfs_daemon_hash_mux);
++		return -EPERM;
++	}
+ 	if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
+ 		rc = 0;
+ 		mutex_unlock(&ecryptfs_daemon_hash_mux);
+@@ -307,9 +319,6 @@ check_list:
+ 		 * message from the queue; try again */
+ 		goto check_list;
+ 	}
+-	BUG_ON(euid != daemon->euid);
+-	BUG_ON(current_user_ns() != daemon->user_ns);
+-	BUG_ON(task_pid(current) != daemon->pid);
+ 	msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
+ 				   struct ecryptfs_msg_ctx, daemon_out_list);
+ 	BUG_ON(!msg_ctx);
diff --git a/3.3.8/fs-ecryptfs-properly-check-for-o_rdonly-flag-before-doing-privileged-open.patch b/3.3.8/fs-ecryptfs-properly-check-for-o_rdonly-flag-before-doing-privileged-open.patch
new file mode 100644
index 0000000..c2a913b
--- /dev/null
+++ b/3.3.8/fs-ecryptfs-properly-check-for-o_rdonly-flag-before-doing-privileged-open.patch
@@ -0,0 +1,42 @@
+From 9fe79d7600497ed8a95c3981cbe5b73ab98222f0 Mon Sep 17 00:00:00 2001
+From: Tyler Hicks <tyhicks@canonical.com>
+Date: Tue, 12 Jun 2012 11:17:01 -0700
+Subject: eCryptfs: Properly check for O_RDONLY flag before doing privileged open
+
+From: Tyler Hicks <tyhicks@canonical.com>
+
+commit 9fe79d7600497ed8a95c3981cbe5b73ab98222f0 upstream.
+
+If the first attempt at opening the lower file read/write fails,
+eCryptfs will retry using a privileged kthread. However, the privileged
+retry should not happen if the lower file's inode is read-only because a
+read/write open will still be unsuccessful.
+
+The check for determining if the open should be retried was intended to
+be based on the access mode of the lower file's open flags being
+O_RDONLY, but the check was incorrectly performed. This would cause the
+open to be retried by the privileged kthread, resulting in a second
+failed open of the lower file. This patch corrects the check to
+determine if the open request should be handled by the privileged
+kthread.
+
+Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Acked-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ecryptfs/kthread.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ecryptfs/kthread.c
++++ b/fs/ecryptfs/kthread.c
+@@ -149,7 +149,7 @@ int ecryptfs_privileged_open(struct file
+ 	(*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
+ 	if (!IS_ERR(*lower_file))
+ 		goto out;
+-	if (flags & O_RDONLY) {
++	if ((flags & O_ACCMODE) == O_RDONLY) {
+ 		rc = PTR_ERR((*lower_file));
+ 		goto out;
+ 	}
diff --git a/3.3.8/fs-epoll-clear-the-tfile_check_list-on-ELOOP_CVE-2012-3375.patch b/3.3.8/fs-epoll-clear-the-tfile_check_list-on-ELOOP_CVE-2012-3375.patch
new file mode 100644
index 0000000..6979427
--- /dev/null
+++ b/3.3.8/fs-epoll-clear-the-tfile_check_list-on-ELOOP_CVE-2012-3375.patch
@@ -0,0 +1,35 @@
+commit 13d518074a952d33d47c428419693f63389547e9
+Author: Jason Baron <jbaron@redhat.com>
+Date:   Wed Apr 25 16:01:47 2012 -0700
+
+    epoll: clear the tfile_check_list on -ELOOP
+    
+    An epoll_ctl(,EPOLL_CTL_ADD,,) operation can return '-ELOOP' to prevent
+    circular epoll dependencies from being created.  However, in that case we
+    do not properly clear the 'tfile_check_list'.  Thus, add a call to
+    clear_tfile_check_list() for the -ELOOP case.
+    
+    Signed-off-by: Jason Baron <jbaron@redhat.com>
+    Reported-by: Yurij M. Plotnikov <Yurij.Plotnikov@oktetlabs.ru>
+    Cc: Nelson Elhage <nelhage@nelhage.com>
+    Cc: Davide Libenzi <davidel@xmailserver.org>
+    Tested-by: Alexandra N. Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
+    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index 739b098..c0b3c70 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -1663,8 +1663,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
+ 	if (op == EPOLL_CTL_ADD) {
+ 		if (is_file_epoll(tfile)) {
+ 			error = -ELOOP;
+-			if (ep_loop_check(ep, tfile) != 0)
++			if (ep_loop_check(ep, tfile) != 0) {
++				clear_tfile_check_list();
+ 				goto error_tgt_fput;
++			}
+ 		} else
+ 			list_add(&tfile->f_tfile_llink, &tfile_check_list);
+ 	}
diff --git a/3.3.8/fs-ext4-fix-duplicated-mnt_drop_write-call-in-ext4_ioc_move_ext.patch b/3.3.8/fs-ext4-fix-duplicated-mnt_drop_write-call-in-ext4_ioc_move_ext.patch
new file mode 100644
index 0000000..b2b2714
--- /dev/null
+++ b/3.3.8/fs-ext4-fix-duplicated-mnt_drop_write-call-in-ext4_ioc_move_ext.patch
@@ -0,0 +1,31 @@
+From 331ae4962b975246944ea039697a8f1cadce42bb Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@ZenIV.linux.org.uk>
+Date: Wed, 18 Jul 2012 09:31:36 +0100
+Subject: ext4: fix duplicated mnt_drop_write call in EXT4_IOC_MOVE_EXT
+
+From: Al Viro <viro@ZenIV.linux.org.uk>
+
+commit 331ae4962b975246944ea039697a8f1cadce42bb upstream.
+
+Caused, AFAICS, by mismerge in commit ff9cb1c4eead ("Merge branch
+'for_linus' into for_linus_merged")
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ioctl.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -261,7 +261,6 @@ group_extend_out:
+ 		err = ext4_move_extents(filp, donor_filp, me.orig_start,
+ 					me.donor_start, me.len, &me.moved_len);
+ 		mnt_drop_write_file(filp);
+-		mnt_drop_write(filp->f_path.mnt);
+ 
+ 		if (copy_to_user((struct move_extent __user *)arg,
+ 				 &me, sizeof(me)))
diff --git a/3.3.8/fs-ext4-fix-the-free-blocks-calculation-for-ext3-file-systems-w-uninit_bg.patch b/3.3.8/fs-ext4-fix-the-free-blocks-calculation-for-ext3-file-systems-w-uninit_bg.patch
new file mode 100644
index 0000000..5f7ab7c
--- /dev/null
+++ b/3.3.8/fs-ext4-fix-the-free-blocks-calculation-for-ext3-file-systems-w-uninit_bg.patch
@@ -0,0 +1,73 @@
+From b0dd6b70f0fda17ae9762fbb72d98e40a4f66556 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Thu, 7 Jun 2012 18:56:06 -0400
+Subject: ext4: fix the free blocks calculation for ext3 file systems w/ uninit_bg
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit b0dd6b70f0fda17ae9762fbb72d98e40a4f66556 upstream.
+
+Ext3 filesystems that are converted to use as many ext4 file system
+features as possible will enable uninit_bg to speed up e2fsck times.
+These file systems will have a native ext3 layout of inode tables and
+block allocation bitmaps (as opposed to ext4's flex_bg layout).
+Unfortunately, in these cases, when first allocating a block in an
+uninitialized block group, ext4 would incorrectly calculate the number
+of free blocks in that block group, and then errorneously report that
+the file system was corrupt:
+
+EXT4-fs error (device vdd): ext4_mb_generate_buddy:741: group 30, 32254 clusters in bitmap, 32258 in gd
+
+This problem can be reproduced via:
+
+    mke2fs -q -t ext4 -O ^flex_bg /dev/vdd 5g
+    mount -t ext4 /dev/vdd /mnt
+    fallocate -l 4600m /mnt/test
+
+The problem was caused by a bone headed mistake in the check to see if a
+particular metadata block was part of the block group.
+
+Many thanks to Kees Cook for finding and bisecting the buggy commit
+which introduced this bug (commit fd034a84e1, present since v3.2).
+
+Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
+Reported-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Tested-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/balloc.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -90,8 +90,8 @@ unsigned ext4_num_overhead_clusters(stru
+ 	 * unusual file system layouts.
+ 	 */
+ 	if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
+-		block_cluster = EXT4_B2C(sbi, (start -
+-					       ext4_block_bitmap(sb, gdp)));
++		block_cluster = EXT4_B2C(sbi,
++					 ext4_block_bitmap(sb, gdp) - start);
+ 		if (block_cluster < num_clusters)
+ 			block_cluster = -1;
+ 		else if (block_cluster == num_clusters) {
+@@ -102,7 +102,7 @@ unsigned ext4_num_overhead_clusters(stru
+ 
+ 	if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
+ 		inode_cluster = EXT4_B2C(sbi,
+-					 start - ext4_inode_bitmap(sb, gdp));
++					 ext4_inode_bitmap(sb, gdp) - start);
+ 		if (inode_cluster < num_clusters)
+ 			inode_cluster = -1;
+ 		else if (inode_cluster == num_clusters) {
+@@ -114,7 +114,7 @@ unsigned ext4_num_overhead_clusters(stru
+ 	itbl_blk = ext4_inode_table(sb, gdp);
+ 	for (i = 0; i < sbi->s_itb_per_group; i++) {
+ 		if (ext4_block_in_group(sb, itbl_blk + i, block_group)) {
+-			c = EXT4_B2C(sbi, start - itbl_blk + i);
++			c = EXT4_B2C(sbi, itbl_blk + i - start);
+ 			if ((c < num_clusters) || (c == inode_cluster) ||
+ 			    (c == block_cluster) || (c == itbl_cluster))
+ 				continue;
diff --git a/3.3.8/fs-remove-easily-user-triggerable-bug-from-generic_setlease.patch b/3.3.8/fs-remove-easily-user-triggerable-bug-from-generic_setlease.patch
new file mode 100644
index 0000000..26a323e
--- /dev/null
+++ b/3.3.8/fs-remove-easily-user-triggerable-bug-from-generic_setlease.patch
@@ -0,0 +1,39 @@
+From 8d657eb3b43861064d36241e88d9d61c709f33f0 Mon Sep 17 00:00:00 2001
+From: Dave Jones <davej@redhat.com>
+Date: Fri, 13 Jul 2012 13:35:36 -0400
+Subject: Remove easily user-triggerable BUG from generic_setlease
+
+From: Dave Jones <davej@redhat.com>
+
+commit 8d657eb3b43861064d36241e88d9d61c709f33f0 upstream.
+
+This can be trivially triggered from userspace by passing in something unexpected.
+
+    kernel BUG at fs/locks.c:1468!
+    invalid opcode: 0000 [#1] SMP
+    RIP: 0010:generic_setlease+0xc2/0x100
+    Call Trace:
+      __vfs_setlease+0x35/0x40
+      fcntl_setlease+0x76/0x150
+      sys_fcntl+0x1c6/0x810
+      system_call_fastpath+0x1a/0x1f
+
+Signed-off-by: Dave Jones <davej@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/locks.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -1465,7 +1465,7 @@ int generic_setlease(struct file *filp,
+ 	case F_WRLCK:
+ 		return generic_add_lease(filp, arg, flp);
+ 	default:
+-		BUG();
++		return -EINVAL;
+ 	}
+ }
+ EXPORT_SYMBOL(generic_setlease);
diff --git a/3.3.8/fs-udf-avoid-run-away-loop-when-partition-table-length-is-corrupted_CVE-2012-3400.patch b/3.3.8/fs-udf-avoid-run-away-loop-when-partition-table-length-is-corrupted_CVE-2012-3400.patch
new file mode 100644
index 0000000..9cff549
--- /dev/null
+++ b/3.3.8/fs-udf-avoid-run-away-loop-when-partition-table-length-is-corrupted_CVE-2012-3400.patch
@@ -0,0 +1,51 @@
+From adee11b2085bee90bd8f4f52123ffb07882d6256 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Wed, 27 Jun 2012 20:20:22 +0200
+Subject: udf: Avoid run away loop when partition table length is corrupted
+
+From: Jan Kara <jack@suse.cz>
+
+commit adee11b2085bee90bd8f4f52123ffb07882d6256 upstream.
+
+Check provided length of partition table so that (possibly maliciously)
+corrupted partition table cannot cause accessing data beyond current buffer.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/udf/super.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/udf/super.c
++++ b/fs/udf/super.c
+@@ -1225,6 +1225,7 @@ static int udf_load_logicalvol(struct su
+ 	struct genericPartitionMap *gpm;
+ 	uint16_t ident;
+ 	struct buffer_head *bh;
++	unsigned int table_len;
+ 	int ret = 0;
+ 
+ 	bh = udf_read_tagged(sb, block, block, &ident);
+@@ -1232,13 +1233,20 @@ static int udf_load_logicalvol(struct su
+ 		return 1;
+ 	BUG_ON(ident != TAG_IDENT_LVD);
+ 	lvd = (struct logicalVolDesc *)bh->b_data;
++	table_len = le32_to_cpu(lvd->mapTableLength);
++	if (sizeof(*lvd) + table_len > sb->s_blocksize) {
++		udf_err(sb, "error loading logical volume descriptor: "
++			"Partition table too long (%u > %lu)\n", table_len,
++			sb->s_blocksize - sizeof(*lvd));
++		goto out_bh;
++	}
+ 
+ 	ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
+ 	if (ret)
+ 		goto out_bh;
+ 
+ 	for (i = 0, offset = 0;
+-	     i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength);
++	     i < sbi->s_partitions && offset < table_len;
+ 	     i++, offset += gpm->partitionMapLength) {
+ 		struct udf_part_map *map = &sbi->s_partmaps[i];
+ 		gpm = (struct genericPartitionMap *)
diff --git a/3.3.8/fs-udf-fortify-loading-of-sparing-table_CVE-2012-3400.patch b/3.3.8/fs-udf-fortify-loading-of-sparing-table_CVE-2012-3400.patch
new file mode 100644
index 0000000..109f245
--- /dev/null
+++ b/3.3.8/fs-udf-fortify-loading-of-sparing-table_CVE-2012-3400.patch
@@ -0,0 +1,132 @@
+From 1df2ae31c724e57be9d7ac00d78db8a5dabdd050 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Wed, 27 Jun 2012 21:23:07 +0200
+Subject: udf: Fortify loading of sparing table
+
+From: Jan Kara <jack@suse.cz>
+
+commit 1df2ae31c724e57be9d7ac00d78db8a5dabdd050 upstream.
+
+Add sanity checks when loading sparing table from disk to avoid accessing
+unallocated memory or writing to it.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/udf/super.c |   86 +++++++++++++++++++++++++++++++++++----------------------
+ 1 file changed, 53 insertions(+), 33 deletions(-)
+
+--- a/fs/udf/super.c
++++ b/fs/udf/super.c
+@@ -56,6 +56,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/bitmap.h>
+ #include <linux/crc-itu-t.h>
++#include <linux/log2.h>
+ #include <asm/byteorder.h>
+ 
+ #include "udf_sb.h"
+@@ -1215,11 +1216,59 @@ out_bh:
+ 	return ret;
+ }
+ 
++static int udf_load_sparable_map(struct super_block *sb,
++				 struct udf_part_map *map,
++				 struct sparablePartitionMap *spm)
++{
++	uint32_t loc;
++	uint16_t ident;
++	struct sparingTable *st;
++	struct udf_sparing_data *sdata = &map->s_type_specific.s_sparing;
++	int i;
++	struct buffer_head *bh;
++
++	map->s_partition_type = UDF_SPARABLE_MAP15;
++	sdata->s_packet_len = le16_to_cpu(spm->packetLength);
++	if (!is_power_of_2(sdata->s_packet_len)) {
++		udf_err(sb, "error loading logical volume descriptor: "
++			"Invalid packet length %u\n",
++			(unsigned)sdata->s_packet_len);
++		return -EIO;
++	}
++	if (spm->numSparingTables > 4) {
++		udf_err(sb, "error loading logical volume descriptor: "
++			"Too many sparing tables (%d)\n",
++			(int)spm->numSparingTables);
++		return -EIO;
++	}
++
++	for (i = 0; i < spm->numSparingTables; i++) {
++		loc = le32_to_cpu(spm->locSparingTable[i]);
++		bh = udf_read_tagged(sb, loc, loc, &ident);
++		if (!bh)
++			continue;
++
++		st = (struct sparingTable *)bh->b_data;
++		if (ident != 0 ||
++		    strncmp(st->sparingIdent.ident, UDF_ID_SPARING,
++			    strlen(UDF_ID_SPARING)) ||
++		    sizeof(*st) + le16_to_cpu(st->reallocationTableLen) >
++							sb->s_blocksize) {
++			brelse(bh);
++			continue;
++		}
++
++		sdata->s_spar_map[i] = bh;
++	}
++	map->s_partition_func = udf_get_pblock_spar15;
++	return 0;
++}
++
+ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
+ 			       struct kernel_lb_addr *fileset)
+ {
+ 	struct logicalVolDesc *lvd;
+-	int i, j, offset;
++	int i, offset;
+ 	uint8_t type;
+ 	struct udf_sb_info *sbi = UDF_SB(sb);
+ 	struct genericPartitionMap *gpm;
+@@ -1281,38 +1330,9 @@ static int udf_load_logicalvol(struct su
+ 			} else if (!strncmp(upm2->partIdent.ident,
+ 						UDF_ID_SPARABLE,
+ 						strlen(UDF_ID_SPARABLE))) {
+-				uint32_t loc;
+-				struct sparingTable *st;
+-				struct sparablePartitionMap *spm =
+-					(struct sparablePartitionMap *)gpm;
+-
+-				map->s_partition_type = UDF_SPARABLE_MAP15;
+-				map->s_type_specific.s_sparing.s_packet_len =
+-						le16_to_cpu(spm->packetLength);
+-				for (j = 0; j < spm->numSparingTables; j++) {
+-					struct buffer_head *bh2;
+-
+-					loc = le32_to_cpu(
+-						spm->locSparingTable[j]);
+-					bh2 = udf_read_tagged(sb, loc, loc,
+-							     &ident);
+-					map->s_type_specific.s_sparing.
+-							s_spar_map[j] = bh2;
+-
+-					if (bh2 == NULL)
+-						continue;
+-
+-					st = (struct sparingTable *)bh2->b_data;
+-					if (ident != 0 || strncmp(
+-						st->sparingIdent.ident,
+-						UDF_ID_SPARING,
+-						strlen(UDF_ID_SPARING))) {
+-						brelse(bh2);
+-						map->s_type_specific.s_sparing.
+-							s_spar_map[j] = NULL;
+-					}
+-				}
+-				map->s_partition_func = udf_get_pblock_spar15;
++				if (udf_load_sparable_map(sb, map,
++				    (struct sparablePartitionMap *)gpm) < 0)
++					goto out_bh;
+ 			} else if (!strncmp(upm2->partIdent.ident,
+ 						UDF_ID_METADATA,
+ 						strlen(UDF_ID_METADATA))) {
diff --git a/3.3.8/fs-udf-use-ret-instead-of-abusing-i-in-udf_load_logicalvol.patch b/3.3.8/fs-udf-use-ret-instead-of-abusing-i-in-udf_load_logicalvol.patch
new file mode 100644
index 0000000..44e75ac
--- /dev/null
+++ b/3.3.8/fs-udf-use-ret-instead-of-abusing-i-in-udf_load_logicalvol.patch
@@ -0,0 +1,32 @@
+From cb14d340ef1737c24125dd663eff77734a482d47 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Wed, 27 Jun 2012 20:08:44 +0200
+Subject: udf: Use 'ret' instead of abusing 'i' in udf_load_logicalvol()
+
+From: Jan Kara <jack@suse.cz>
+
+commit cb14d340ef1737c24125dd663eff77734a482d47 upstream.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/udf/super.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/fs/udf/super.c
++++ b/fs/udf/super.c
+@@ -1233,11 +1233,9 @@ static int udf_load_logicalvol(struct su
+ 	BUG_ON(ident != TAG_IDENT_LVD);
+ 	lvd = (struct logicalVolDesc *)bh->b_data;
+ 
+-	i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
+-	if (i != 0) {
+-		ret = i;
++	ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
++	if (ret)
+ 		goto out_bh;
+-	}
+ 
+ 	for (i = 0, offset = 0;
+ 	     i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength);
diff --git a/3.3.8/hz-432-kconfig-option.patch b/3.3.8/hz-432-kconfig-option.patch
new file mode 100644
index 0000000..2fe9a4f
--- /dev/null
+++ b/3.3.8/hz-432-kconfig-option.patch
@@ -0,0 +1,25 @@
+diff -urN oldtree/kernel/Kconfig.hz newtree/kernel/Kconfig.hz
+--- oldtree/kernel/Kconfig.hz	2007-03-06 15:00:55.000000000 -0500
++++ newtree/kernel/Kconfig.hz	2007-03-06 17:52:36.000000000 -0500
+@@ -39,6 +39,14 @@
+ 	 on SMP and NUMA systems and exactly dividing by both PAL and
+ 	 NTSC frame rates for video and multimedia work.
+ 
++	config HZ_432
++	  bool "432 HZ"
++	help
++	  432 HZ is the best value for desktop systems. Most responsive
++	  out of all the options. This is for Dual Core/Processor systems only.
++	  as timer frequencies * number of processors = actual frequency.
++	  Try this if you have a dual-core/dual processor system.
++
+ 	config HZ_1000
+ 		bool "1000 HZ"
+ 	help
+@@ -52,5 +60,6 @@
+ 	default 100 if HZ_100
+ 	default 250 if HZ_250_NODEFAULT
+ 	default 300 if HZ_300
++	default 432 if HZ_432
+ 	default 1000 if HZ_1000
+ 
diff --git a/3.3.8/hz-864-kconfig-option.patch b/3.3.8/hz-864-kconfig-option.patch
new file mode 100644
index 0000000..6bdca04
--- /dev/null
+++ b/3.3.8/hz-864-kconfig-option.patch
@@ -0,0 +1,25 @@
+diff -urN oldtree/kernel/Kconfig.hz newtree/kernel/Kconfig.hz
+--- oldtree/kernel/Kconfig.hz	2007-03-06 15:00:55.000000000 -0500
++++ newtree/kernel/Kconfig.hz	2007-03-06 17:52:36.000000000 -0500
+@@ -39,6 +39,14 @@
+ 	  as timer frequencies * number of processors = actual frequency.
+ 	  Try this if you have a dual-core/dual processor system.
+ 
++	config HZ_864
++	  bool "864 HZ"
++	help
++	  864 HZ is the best value for desktop systems. Most responsive
++	  out of all the options. The only reason it is not default is
++	  because it may break few drivers. Give it a try if you have
++	  a desktop :).
++
+ 	config HZ_1000
+ 		bool "1000 HZ"
+ 	help
+@@ -52,5 +60,6 @@
+ 	default 250 if HZ_250_NODEFAULT
+ 	default 300 if HZ_300
+ 	default 432 if HZ_432
++	default 864 if HZ_864
+ 	default 1000 if HZ_1000
+ 
diff --git a/3.3.8/imqmq-3.3.patch b/3.3.8/imqmq-3.3.patch
new file mode 100644
index 0000000..a8f4c58
--- /dev/null
+++ b/3.3.8/imqmq-3.3.patch
@@ -0,0 +1,1613 @@
+diff -uNr linux-3.3/drivers/net/imq.c linux-3.3-imqmq/drivers/net/imq.c
+--- linux-3.3/drivers/net/imq.c	1970-01-01 02:00:00.000000000 +0200
++++ linux-3.3-imqmq/drivers/net/imq.c	2012-03-19 16:53:04.127494306 +0200
+@@ -0,0 +1,857 @@
++/*
++ *             Pseudo-driver for the intermediate queue device.
++ *
++ *             This program is free software; you can redistribute it and/or
++ *             modify it under the terms of the GNU General Public License
++ *             as published by the Free Software Foundation; either version
++ *             2 of the License, or (at your option) any later version.
++ *
++ * Authors:    Patrick McHardy, <kaber@trash.net>
++ *
++ *            The first version was written by Martin Devera, <devik@cdi.cz>
++ *
++ * Credits:    Jan Rafaj <imq2t@cedric.vabo.cz>
++ *              - Update patch to 2.4.21
++ *             Sebastian Strollo <sstrollo@nortelnetworks.com>
++ *              - Fix "Dead-loop on netdevice imq"-issue
++ *             Marcel Sebek <sebek64@post.cz>
++ *              - Update to 2.6.2-rc1
++ *
++ *	       After some time of inactivity there is a group taking care
++ *	       of IMQ again: http://www.linuximq.net
++ *
++ *
++ *	       2004/06/30 - New version of IMQ patch to kernels <=2.6.7
++ *             including the following changes:
++ *
++ *	       - Correction of ipv6 support "+"s issue (Hasso Tepper)
++ *	       - Correction of imq_init_devs() issue that resulted in
++ *	       kernel OOPS unloading IMQ as module (Norbert Buchmuller)
++ *	       - Addition of functionality to choose number of IMQ devices
++ *	       during kernel config (Andre Correa)
++ *	       - Addition of functionality to choose how IMQ hooks on
++ *	       PRE and POSTROUTING (after or before NAT) (Andre Correa)
++ *	       - Cosmetic corrections (Norbert Buchmuller) (Andre Correa)
++ *
++ *
++ *             2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were
++ *             released with almost no problems. 2.6.14-x was released
++ *             with some important changes: nfcache was removed; After
++ *             some weeks of trouble we figured out that some IMQ fields
++ *             in skb were missing in skbuff.c - skb_clone and copy_skb_header.
++ *             These functions are correctly patched by this new patch version.
++ *
++ *             Thanks for all who helped to figure out all the problems with
++ *             2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX,
++ *             Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully
++ *             I didn't forget anybody). I apologize again for my lack of time.
++ *
++ *
++ *             2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead
++ *             of qdisc_restart() and moved qdisc_run() to tasklet to avoid
++ *             recursive locking. New initialization routines to fix 'rmmod' not
++ *             working anymore. Used code from ifb.c. (Jussi Kivilinna)
++ *
++ *             2008/08/06 - 2.6.26 - (JK)
++ *              - Replaced tasklet with 'netif_schedule()'.
++ *              - Cleaned up and added comments for imq_nf_queue().
++ *
++ *             2009/04/12
++ *              - Add skb_save_cb/skb_restore_cb helper functions for backuping
++ *                control buffer. This is needed because qdisc-layer on kernels
++ *                2.6.27 and newer overwrite control buffer. (Jussi Kivilinna)
++ *              - Add better locking for IMQ device. Hopefully this will solve
++ *                SMP issues. (Jussi Kivilinna)
++ *              - Port to 2.6.27
++ *              - Port to 2.6.28
++ *              - Port to 2.6.29 + fix rmmod not working
++ *
++ *             2009/04/20 - (Jussi Kivilinna)
++ *              - Use netdevice feature flags to avoid extra packet handling
++ *                by core networking layer and possibly increase performance.
++ *
++ *             2009/09/26 - (Jussi Kivilinna)
++ *              - Add imq_nf_reinject_lockless to fix deadlock with
++ *                imq_nf_queue/imq_nf_reinject.
++ *
++ *             2009/12/08 - (Jussi Kivilinna)
++ *              - Port to 2.6.32
++ *              - Add check for skb->nf_queue_entry==NULL in imq_dev_xmit()
++ *              - Also add better error checking for skb->nf_queue_entry usage
++ *
++ *             2010/02/25 - (Jussi Kivilinna)
++ *              - Port to 2.6.33
++ *
++ *             2010/08/15 - (Jussi Kivilinna)
++ *              - Port to 2.6.35
++ *              - Simplify hook registration by using nf_register_hooks.
++ *              - nf_reinject doesn't need spinlock around it, therefore remove
++ *                imq_nf_reinject function. Other nf_reinject users protect
++ *                their own data with spinlock. With IMQ however all data is
++ *                needed is stored per skbuff, so no locking is needed.
++ *              - Changed IMQ to use 'separate' NF_IMQ_QUEUE instead of
++ *                NF_QUEUE, this allows working coexistance of IMQ and other
++ *                NF_QUEUE users.
++ *              - Make IMQ multi-queue. Number of IMQ device queues can be
++ *                increased with 'numqueues' module parameters. Default number
++ *                of queues is 1, in other words by default IMQ works as
++ *                single-queue device. Multi-queue selection is based on
++ *                IFB multi-queue patch by Changli Gao <xiaosuo@gmail.com>.
++ *
++ *             2011/03/18 - (Jussi Kivilinna)
++ *              - Port to 2.6.38
++ *
++ *             2011/07/12 - (syoder89@gmail.com)
++ *              - Crash fix that happens when the receiving interface has more
++ *                than one queue (add missing skb_set_queue_mapping in
++ *                imq_select_queue).
++ *
++ *             2011/07/26 - (Jussi Kivilinna)
++ *              - Add queue mapping checks for packets exiting IMQ.
++ *              - Port to 3.0
++ *
++ *             2011/08/16 - (Jussi Kivilinna)
++ *              - Clear IFF_TX_SKB_SHARING flag that was added for linux 3.0.2
++ *
++ *             2011/11/03 - Germano Michel <germanomichel@gmail.com>
++ *              - Fix IMQ for net namespaces
++ *
++ *             2011/11/04 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
++ *              - Port to 3.1
++ *              - Clean-up, move 'get imq device pointer by imqX name' to
++ *                separate function from imq_nf_queue().
++ *
++ *             2012/01/05 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
++ *              - Port to 3.2
++ *
++ *             2012/03/19 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
++ *              - Port to 3.3
++ *
++ *	       Also, many thanks to pablo Sebastian Greco for making the initial
++ *	       patch and to those who helped the testing.
++ *
++ *             More info at: http://www.linuximq.net/ (Andre Correa)
++ */
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/moduleparam.h>
++#include <linux/list.h>
++#include <linux/skbuff.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/rtnetlink.h>
++#include <linux/if_arp.h>
++#include <linux/netfilter.h>
++#include <linux/netfilter_ipv4.h>
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	#include <linux/netfilter_ipv6.h>
++#endif
++#include <linux/imq.h>
++#include <net/pkt_sched.h>
++#include <net/netfilter/nf_queue.h>
++#include <net/sock.h>
++#include <linux/ip.h>
++#include <linux/ipv6.h>
++#include <linux/if_vlan.h>
++#include <linux/if_pppox.h>
++#include <net/ip.h>
++#include <net/ipv6.h>
++
++static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num);
++
++static nf_hookfn imq_nf_hook;
++
++static struct nf_hook_ops imq_ops[] = {
++	{
++	/* imq_ingress_ipv4 */
++		.hook		= imq_nf_hook,
++		.owner		= THIS_MODULE,
++		.pf		= PF_INET,
++		.hooknum	= NF_INET_PRE_ROUTING,
++#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
++		.priority	= NF_IP_PRI_MANGLE + 1,
++#else
++		.priority	= NF_IP_PRI_NAT_DST + 1,
++#endif
++	},
++	{
++	/* imq_egress_ipv4 */
++		.hook		= imq_nf_hook,
++		.owner		= THIS_MODULE,
++		.pf		= PF_INET,
++		.hooknum	= NF_INET_POST_ROUTING,
++#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
++		.priority	= NF_IP_PRI_LAST,
++#else
++		.priority	= NF_IP_PRI_NAT_SRC - 1,
++#endif
++	},
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	{
++	/* imq_ingress_ipv6 */
++		.hook		= imq_nf_hook,
++		.owner		= THIS_MODULE,
++		.pf		= PF_INET6,
++		.hooknum	= NF_INET_PRE_ROUTING,
++#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
++		.priority	= NF_IP6_PRI_MANGLE + 1,
++#else
++		.priority	= NF_IP6_PRI_NAT_DST + 1,
++#endif
++	},
++	{
++	/* imq_egress_ipv6 */
++		.hook		= imq_nf_hook,
++		.owner		= THIS_MODULE,
++		.pf		= PF_INET6,
++		.hooknum	= NF_INET_POST_ROUTING,
++#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
++		.priority	= NF_IP6_PRI_LAST,
++#else
++		.priority	= NF_IP6_PRI_NAT_SRC - 1,
++#endif
++	},
++#endif
++};
++
++#if defined(CONFIG_IMQ_NUM_DEVS)
++static int numdevs = CONFIG_IMQ_NUM_DEVS;
++#else
++static int numdevs = IMQ_MAX_DEVS;
++#endif
++
++static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
++
++#define IMQ_MAX_QUEUES 32
++static int numqueues = 1;
++static u32 imq_hashrnd;
++
++static inline __be16 pppoe_proto(const struct sk_buff *skb)
++{
++	return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
++			sizeof(struct pppoe_hdr)));
++}
++
++static u16 imq_hash(struct net_device *dev, struct sk_buff *skb)
++{
++	unsigned int pull_len;
++	u16 protocol = skb->protocol;
++	u32 addr1, addr2;
++	u32 hash, ihl = 0;
++	union {
++		u16 in16[2];
++		u32 in32;
++	} ports;
++	u8 ip_proto;
++
++	pull_len = 0;
++
++recheck:
++	switch (protocol) {
++	case htons(ETH_P_8021Q): {
++		if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL))
++			goto other;
++
++		pull_len += VLAN_HLEN;
++		skb->network_header += VLAN_HLEN;
++
++		protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
++		goto recheck;
++	}
++
++	case htons(ETH_P_PPP_SES): {
++		if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL))
++			goto other;
++
++		pull_len += PPPOE_SES_HLEN;
++		skb->network_header += PPPOE_SES_HLEN;
++
++		protocol = pppoe_proto(skb);
++		goto recheck;
++	}
++
++	case htons(ETH_P_IP): {
++		const struct iphdr *iph = ip_hdr(skb);
++
++		if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr))))
++			goto other;
++
++		addr1 = iph->daddr;
++		addr2 = iph->saddr;
++
++		ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ?
++				 iph->protocol : 0;
++		ihl = ip_hdrlen(skb);
++
++		break;
++	}
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	case htons(ETH_P_IPV6): {
++		const struct ipv6hdr *iph = ipv6_hdr(skb);
++		__be16 fo = 0;
++
++		if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr))))
++			goto other;
++
++		addr1 = iph->daddr.s6_addr32[3];
++		addr2 = iph->saddr.s6_addr32[3];
++		ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto, &fo);
++		if (unlikely(ihl < 0))
++			goto other;
++
++		break;
++	}
++#endif
++	default:
++other:
++		if (pull_len != 0) {
++			skb_push(skb, pull_len);
++			skb->network_header -= pull_len;
++		}
++
++		return (u16)(ntohs(protocol) % dev->real_num_tx_queues);
++	}
++
++	if (addr1 > addr2)
++		swap(addr1, addr2);
++
++	switch (ip_proto) {
++	case IPPROTO_TCP:
++	case IPPROTO_UDP:
++	case IPPROTO_DCCP:
++	case IPPROTO_ESP:
++	case IPPROTO_AH:
++	case IPPROTO_SCTP:
++	case IPPROTO_UDPLITE: {
++		if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) {
++			if (ports.in16[0] > ports.in16[1])
++				swap(ports.in16[0], ports.in16[1]);
++			break;
++		}
++		/* fall-through */
++	}
++	default:
++		ports.in32 = 0;
++		break;
++	}
++
++	if (pull_len != 0) {
++		skb_push(skb, pull_len);
++		skb->network_header -= pull_len;
++	}
++
++	hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto);
++
++	return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
++}
++
++static inline bool sk_tx_queue_recorded(struct sock *sk)
++{
++	return (sk_tx_queue_get(sk) >= 0);
++}
++
++static struct netdev_queue *imq_select_queue(struct net_device *dev,
++						struct sk_buff *skb)
++{
++	u16 queue_index = 0;
++	u32 hash;
++
++	if (likely(dev->real_num_tx_queues == 1))
++		goto out;
++
++	/* IMQ can be receiving ingress or engress packets. */
++
++	/* Check first for if rx_queue is set */
++	if (skb_rx_queue_recorded(skb)) {
++		queue_index = skb_get_rx_queue(skb);
++		goto out;
++	}
++
++	/* Check if socket has tx_queue set */
++	if (sk_tx_queue_recorded(skb->sk)) {
++		queue_index = sk_tx_queue_get(skb->sk);
++		goto out;
++	}
++
++	/* Try use socket hash */
++	if (skb->sk && skb->sk->sk_hash) {
++		hash = skb->sk->sk_hash;
++		queue_index =
++			(u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
++		goto out;
++	}
++
++	/* Generate hash from packet data */
++	queue_index = imq_hash(dev, skb);
++
++out:
++	if (unlikely(queue_index >= dev->real_num_tx_queues))
++		queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues);
++
++	skb_set_queue_mapping(skb, queue_index);
++	return netdev_get_tx_queue(dev, queue_index);
++}
++
++static struct net_device_stats *imq_get_stats(struct net_device *dev)
++{
++	return &dev->stats;
++}
++
++/* called for packets kfree'd in qdiscs at places other than enqueue */
++static void imq_skb_destructor(struct sk_buff *skb)
++{
++	struct nf_queue_entry *entry = skb->nf_queue_entry;
++
++	skb->nf_queue_entry = NULL;
++
++	if (entry) {
++		nf_queue_entry_release_refs(entry);
++		kfree(entry);
++	}
++
++	skb_restore_cb(skb); /* kfree backup */
++}
++
++static void imq_done_check_queue_mapping(struct sk_buff *skb,
++					 struct net_device *dev)
++{
++	unsigned int queue_index;
++
++	/* Don't let queue_mapping be left too large after exiting IMQ */
++	if (likely(skb->dev != dev && skb->dev != NULL)) {
++		queue_index = skb_get_queue_mapping(skb);
++		if (unlikely(queue_index >= skb->dev->real_num_tx_queues)) {
++			queue_index = (u16)((u32)queue_index %
++						skb->dev->real_num_tx_queues);
++			skb_set_queue_mapping(skb, queue_index);
++		}
++	} else {
++		/* skb->dev was IMQ device itself or NULL, be on safe side and
++		 * just clear queue mapping.
++		 */
++		skb_set_queue_mapping(skb, 0);
++	}
++}
++
++static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	struct nf_queue_entry *entry = skb->nf_queue_entry;
++
++	skb->nf_queue_entry = NULL;
++	dev->trans_start = jiffies;
++
++	dev->stats.tx_bytes += skb->len;
++	dev->stats.tx_packets++;
++
++	if (unlikely(entry == NULL)) {
++		/* We don't know what is going on here.. packet is queued for
++		 * imq device, but (probably) not by us.
++		 *
++		 * If this packet was not send here by imq_nf_queue(), then
++		 * skb_save_cb() was not used and skb_free() should not show:
++		 *   WARNING: IMQ: kfree_skb: skb->cb_next:..
++		 * and/or
++		 *   WARNING: IMQ: kfree_skb: skb->nf_queue_entry...
++		 *
++		 * However if this message is shown, then IMQ is somehow broken
++		 * and you should report this to linuximq.net.
++		 */
++
++		/* imq_dev_xmit is black hole that eats all packets, report that
++		 * we eat this packet happily and increase dropped counters.
++		 */
++
++		dev->stats.tx_dropped++;
++		dev_kfree_skb(skb);
++
++		return NETDEV_TX_OK;
++	}
++
++	skb_restore_cb(skb); /* restore skb->cb */
++
++	skb->imq_flags = 0;
++	skb->destructor = NULL;
++
++	imq_done_check_queue_mapping(skb, dev);
++
++	nf_reinject(entry, NF_ACCEPT);
++
++	return NETDEV_TX_OK;
++}
++
++static struct net_device *get_imq_device_by_index(int index)
++{
++	struct net_device *dev = NULL;
++	struct net *net;
++	char buf[8];
++
++	/* get device by name and cache result */
++	snprintf(buf, sizeof(buf), "imq%d", index);
++
++	/* Search device from all namespaces. */
++	for_each_net(net) {
++		dev = dev_get_by_name(net, buf);
++		if (dev)
++			break;
++	}
++
++	if (WARN_ON_ONCE(dev == NULL)) {
++		/* IMQ device not found. Exotic config? */
++		return ERR_PTR(-ENODEV);
++	}
++
++	imq_devs_cache[index] = dev;
++	dev_put(dev);
++
++	return dev;
++}
++
++static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
++{
++	struct net_device *dev;
++	struct sk_buff *skb_orig, *skb, *skb_shared;
++	struct Qdisc *q;
++	struct netdev_queue *txq;
++	spinlock_t *root_lock;
++	int users, index;
++	int retval = -EINVAL;
++	unsigned int orig_queue_index;
++
++	index = entry->skb->imq_flags & IMQ_F_IFMASK;
++	if (unlikely(index > numdevs - 1)) {
++		if (net_ratelimit())
++			printk(KERN_WARNING
++			       "IMQ: invalid device specified, highest is %u\n",
++			       numdevs - 1);
++		retval = -EINVAL;
++		goto out;
++	}
++
++	/* check for imq device by index from cache */
++	dev = imq_devs_cache[index];
++	if (unlikely(!dev)) {
++		dev = get_imq_device_by_index(index);
++		if (IS_ERR(dev)) {
++			retval = PTR_ERR(dev);
++			goto out;
++		}
++	}
++
++	if (unlikely(!(dev->flags & IFF_UP))) {
++		entry->skb->imq_flags = 0;
++		nf_reinject(entry, NF_ACCEPT);
++		retval = 0;
++		goto out;
++	}
++	dev->last_rx = jiffies;
++
++	skb = entry->skb;
++	skb_orig = NULL;
++
++	/* skb has owner? => make clone */
++	if (unlikely(skb->destructor)) {
++		skb_orig = skb;
++		skb = skb_clone(skb, GFP_ATOMIC);
++		if (unlikely(!skb)) {
++			retval = -ENOMEM;
++			goto out;
++		}
++		entry->skb = skb;
++	}
++
++	skb->nf_queue_entry = entry;
++
++	dev->stats.rx_bytes += skb->len;
++	dev->stats.rx_packets++;
++
++	if (!skb->dev) {
++		/* skb->dev == NULL causes problems, try the find cause. */
++		if (net_ratelimit()) {
++			dev_warn(&dev->dev,
++				 "received packet with skb->dev == NULL\n");
++			dump_stack();
++		}
++
++		skb->dev = dev;
++	}
++
++	/* Disables softirqs for lock below */
++	rcu_read_lock_bh();
++
++	/* Multi-queue selection */
++	orig_queue_index = skb_get_queue_mapping(skb);
++	txq = imq_select_queue(dev, skb);
++
++	q = rcu_dereference(txq->qdisc);
++	if (unlikely(!q->enqueue))
++		goto packet_not_eaten_by_imq_dev;
++
++	root_lock = qdisc_lock(q);
++	spin_lock(root_lock);
++
++	users = atomic_read(&skb->users);
++
++	skb_shared = skb_get(skb); /* increase reference count by one */
++	skb_save_cb(skb_shared); /* backup skb->cb, as qdisc layer will
++					overwrite it */
++	qdisc_enqueue_root(skb_shared, q); /* might kfree_skb */
++
++	if (likely(atomic_read(&skb_shared->users) == users + 1)) {
++		kfree_skb(skb_shared); /* decrease reference count by one */
++
++		skb->destructor = &imq_skb_destructor;
++
++		/* cloned? */
++		if (unlikely(skb_orig))
++			kfree_skb(skb_orig); /* free original */
++
++		spin_unlock(root_lock);
++		rcu_read_unlock_bh();
++
++		/* schedule qdisc dequeue */
++		__netif_schedule(q);
++
++		retval = 0;
++		goto out;
++	} else {
++		skb_restore_cb(skb_shared); /* restore skb->cb */
++		skb->nf_queue_entry = NULL;
++		/* qdisc dropped packet and decreased skb reference count of
++		 * skb, so we don't really want to and try refree as that would
++		 * actually destroy the skb. */
++		spin_unlock(root_lock);
++		goto packet_not_eaten_by_imq_dev;
++	}
++
++packet_not_eaten_by_imq_dev:
++	skb_set_queue_mapping(skb, orig_queue_index);
++	rcu_read_unlock_bh();
++
++	/* cloned? restore original */
++	if (unlikely(skb_orig)) {
++		kfree_skb(skb);
++		entry->skb = skb_orig;
++	}
++	retval = -1;
++out:
++	return retval;
++}
++
++static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb,
++				const struct net_device *indev,
++				const struct net_device *outdev,
++				int (*okfn)(struct sk_buff *))
++{
++	return (pskb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT;
++}
++
++static int imq_close(struct net_device *dev)
++{
++	netif_stop_queue(dev);
++	return 0;
++}
++
++static int imq_open(struct net_device *dev)
++{
++	netif_start_queue(dev);
++	return 0;
++}
++
++static const struct net_device_ops imq_netdev_ops = {
++	.ndo_open		= imq_open,
++	.ndo_stop		= imq_close,
++	.ndo_start_xmit		= imq_dev_xmit,
++	.ndo_get_stats		= imq_get_stats,
++};
++
++static void imq_setup(struct net_device *dev)
++{
++	dev->netdev_ops		= &imq_netdev_ops;
++	dev->type		= ARPHRD_VOID;
++	dev->mtu		= 16000; /* too small? */
++	dev->tx_queue_len	= 11000; /* too big? */
++	dev->flags		= IFF_NOARP;
++	dev->features		= NETIF_F_SG | NETIF_F_FRAGLIST |
++				  NETIF_F_GSO | NETIF_F_HW_CSUM |
++				  NETIF_F_HIGHDMA;
++	dev->priv_flags		&= ~(IFF_XMIT_DST_RELEASE |
++				     IFF_TX_SKB_SHARING);
++}
++
++static int imq_validate(struct nlattr *tb[], struct nlattr *data[])
++{
++	int ret = 0;
++
++	if (tb[IFLA_ADDRESS]) {
++		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
++			ret = -EINVAL;
++			goto end;
++		}
++		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
++			ret = -EADDRNOTAVAIL;
++			goto end;
++		}
++	}
++	return 0;
++end:
++	printk(KERN_WARNING "IMQ: imq_validate failed (%d)\n", ret);
++	return ret;
++}
++
++static struct rtnl_link_ops imq_link_ops __read_mostly = {
++	.kind		= "imq",
++	.priv_size	= 0,
++	.setup		= imq_setup,
++	.validate	= imq_validate,
++};
++
++static const struct nf_queue_handler imq_nfqh = {
++	.name  = "imq",
++	.outfn = imq_nf_queue,
++};
++
++static int __init imq_init_hooks(void)
++{
++	int ret;
++
++	nf_register_queue_imq_handler(&imq_nfqh);
++
++	ret = nf_register_hooks(imq_ops, ARRAY_SIZE(imq_ops));
++	if (ret < 0)
++		nf_unregister_queue_imq_handler();
++
++	return ret;
++}
++
++static int __init imq_init_one(int index)
++{
++	struct net_device *dev;
++	int ret;
++
++	dev = alloc_netdev_mq(0, "imq%d", imq_setup, numqueues);
++	if (!dev)
++		return -ENOMEM;
++
++	ret = dev_alloc_name(dev, dev->name);
++	if (ret < 0)
++		goto fail;
++
++	dev->rtnl_link_ops = &imq_link_ops;
++	ret = register_netdevice(dev);
++	if (ret < 0)
++		goto fail;
++
++	return 0;
++fail:
++	free_netdev(dev);
++	return ret;
++}
++
++static int __init imq_init_devs(void)
++{
++	int err, i;
++
++	if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) {
++		printk(KERN_ERR "IMQ: numdevs has to be betweed 1 and %u\n",
++		       IMQ_MAX_DEVS);
++		return -EINVAL;
++	}
++
++	if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) {
++		printk(KERN_ERR "IMQ: numqueues has to be betweed 1 and %u\n",
++		       IMQ_MAX_QUEUES);
++		return -EINVAL;
++	}
++
++	get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd));
++
++	rtnl_lock();
++	err = __rtnl_link_register(&imq_link_ops);
++
++	for (i = 0; i < numdevs && !err; i++)
++		err = imq_init_one(i);
++
++	if (err) {
++		__rtnl_link_unregister(&imq_link_ops);
++		memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
++	}
++	rtnl_unlock();
++
++	return err;
++}
++
++static int __init imq_init_module(void)
++{
++	int err;
++
++#if defined(CONFIG_IMQ_NUM_DEVS)
++	BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16);
++	BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2);
++	BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK);
++#endif
++
++	err = imq_init_devs();
++	if (err) {
++		printk(KERN_ERR "IMQ: Error trying imq_init_devs(net)\n");
++		return err;
++	}
++
++	err = imq_init_hooks();
++	if (err) {
++		printk(KERN_ERR "IMQ: Error trying imq_init_hooks()\n");
++		rtnl_link_unregister(&imq_link_ops);
++		memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
++		return err;
++	}
++
++	printk(KERN_INFO "IMQ driver loaded successfully. "
++		"(numdevs = %d, numqueues = %d)\n", numdevs, numqueues);
++
++#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
++	printk(KERN_INFO "\tHooking IMQ before NAT on PREROUTING.\n");
++#else
++	printk(KERN_INFO "\tHooking IMQ after NAT on PREROUTING.\n");
++#endif
++#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB)
++	printk(KERN_INFO "\tHooking IMQ before NAT on POSTROUTING.\n");
++#else
++	printk(KERN_INFO "\tHooking IMQ after NAT on POSTROUTING.\n");
++#endif
++
++	return 0;
++}
++
++static void __exit imq_unhook(void)
++{
++	nf_unregister_hooks(imq_ops, ARRAY_SIZE(imq_ops));
++	nf_unregister_queue_imq_handler();
++}
++
++static void __exit imq_cleanup_devs(void)
++{
++	rtnl_link_unregister(&imq_link_ops);
++	memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
++}
++
++static void __exit imq_exit_module(void)
++{
++	imq_unhook();
++	imq_cleanup_devs();
++	printk(KERN_INFO "IMQ driver unloaded successfully.\n");
++}
++
++module_init(imq_init_module);
++module_exit(imq_exit_module);
++
++module_param(numdevs, int, 0);
++module_param(numqueues, int, 0);
++MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will "
++			"be created)");
++MODULE_PARM_DESC(numqueues, "number of queues per IMQ device");
++MODULE_AUTHOR("http://www.linuximq.net");
++MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See "
++			"http://www.linuximq.net/ for more information.");
++MODULE_LICENSE("GPL");
++MODULE_ALIAS_RTNL_LINK("imq");
++
+diff -uNr linux-3.3/drivers/net/Kconfig linux-3.3-imqmq/drivers/net/Kconfig
+--- linux-3.3/drivers/net/Kconfig	2012-03-19 01:15:34.000000000 +0200
++++ linux-3.3-imqmq/drivers/net/Kconfig	2012-03-19 09:46:57.656134747 +0200
+@@ -195,6 +195,125 @@
+ 	depends on RIONET
+ 	default "128"
+ 
++config IMQ
++	tristate "IMQ (intermediate queueing device) support"
++	depends on NETDEVICES && NETFILTER
++	---help---
++	  The IMQ device(s) is used as placeholder for QoS queueing
++	  disciplines. Every packet entering/leaving the IP stack can be
++	  directed through the IMQ device where it's enqueued/dequeued to the
++	  attached qdisc. This allows you to treat network devices as classes
++	  and distribute bandwidth among them. Iptables is used to specify
++	  through which IMQ device, if any, packets travel.
++
++	  More information at: http://www.linuximq.net/
++
++	  To compile this driver as a module, choose M here: the module
++	  will be called imq.  If unsure, say N.
++
++choice
++	prompt "IMQ behavior (PRE/POSTROUTING)"
++	depends on IMQ
++	default IMQ_BEHAVIOR_AB
++	help
++	  This setting defines how IMQ behaves in respect to its
++	  hooking in PREROUTING and POSTROUTING.
++
++	  IMQ can work in any of the following ways:
++
++	      PREROUTING   |      POSTROUTING
++	  -----------------|-------------------
++	  #1  After NAT    |      After NAT
++	  #2  After NAT    |      Before NAT
++	  #3  Before NAT   |      After NAT
++	  #4  Before NAT   |      Before NAT
++
++	  The default behavior is to hook before NAT on PREROUTING
++	  and after NAT on POSTROUTING (#3).
++
++	  This settings are specially usefull when trying to use IMQ
++	  to shape NATed clients.
++
++	  More information can be found at: www.linuximq.net
++
++	  If not sure leave the default settings alone.
++
++config IMQ_BEHAVIOR_AA
++	bool "IMQ AA"
++	help
++	  This setting defines how IMQ behaves in respect to its
++	  hooking in PREROUTING and POSTROUTING.
++
++	  Choosing this option will make IMQ hook like this:
++
++	  PREROUTING:   After NAT
++	  POSTROUTING:  After NAT
++
++	  More information can be found at: www.linuximq.net
++
++	  If not sure leave the default settings alone.
++
++config IMQ_BEHAVIOR_AB
++	bool "IMQ AB"
++	help
++	  This setting defines how IMQ behaves in respect to its
++	  hooking in PREROUTING and POSTROUTING.
++
++	  Choosing this option will make IMQ hook like this:
++
++	  PREROUTING:   After NAT
++	  POSTROUTING:  Before NAT
++
++	  More information can be found at: www.linuximq.net
++
++	  If not sure leave the default settings alone.
++
++config IMQ_BEHAVIOR_BA
++	bool "IMQ BA"
++	help
++	  This setting defines how IMQ behaves in respect to its
++	  hooking in PREROUTING and POSTROUTING.
++
++	  Choosing this option will make IMQ hook like this:
++
++	  PREROUTING:   Before NAT
++	  POSTROUTING:  After NAT
++
++	  More information can be found at: www.linuximq.net
++
++	  If not sure leave the default settings alone.
++
++config IMQ_BEHAVIOR_BB
++	bool "IMQ BB"
++	help
++	  This setting defines how IMQ behaves in respect to its
++	  hooking in PREROUTING and POSTROUTING.
++
++	  Choosing this option will make IMQ hook like this:
++
++	  PREROUTING:   Before NAT
++	  POSTROUTING:  Before NAT
++
++	  More information can be found at: www.linuximq.net
++
++	  If not sure leave the default settings alone.
++
++endchoice
++
++config IMQ_NUM_DEVS
++	int "Number of IMQ devices"
++	range 2 16
++	depends on IMQ
++	default "16"
++	help
++	  This setting defines how many IMQ devices will be created.
++
++	  The default value is 16.
++
++	  More information can be found at: www.linuximq.net
++
++	  If not sure leave the default settings alone.
++
+ config TUN
+ 	tristate "Universal TUN/TAP device driver support"
+ 	select CRC32
+diff -uNr linux-3.3/drivers/net/Makefile linux-3.3-imqmq/drivers/net/Makefile
+--- linux-3.3/drivers/net/Makefile	2012-03-19 01:15:34.000000000 +0200
++++ linux-3.3-imqmq/drivers/net/Makefile	2012-03-19 09:46:57.656134747 +0200
+@@ -9,6 +9,7 @@
+ obj-$(CONFIG_DUMMY) += dummy.o
+ obj-$(CONFIG_EQUALIZER) += eql.o
+ obj-$(CONFIG_IFB) += ifb.o
++obj-$(CONFIG_IMQ) += imq.o
+ obj-$(CONFIG_MACVLAN) += macvlan.o
+ obj-$(CONFIG_MACVTAP) += macvtap.o
+ obj-$(CONFIG_MII) += mii.o
+diff -uNr linux-3.3/include/linux/imq.h linux-3.3-imqmq/include/linux/imq.h
+--- linux-3.3/include/linux/imq.h	1970-01-01 02:00:00.000000000 +0200
++++ linux-3.3-imqmq/include/linux/imq.h	2012-03-19 09:46:57.656134747 +0200
+@@ -0,0 +1,13 @@
++#ifndef _IMQ_H
++#define _IMQ_H
++
++/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */
++#define IMQ_F_BITS	5
++
++#define IMQ_F_IFMASK	0x0f
++#define IMQ_F_ENQUEUE	0x10
++
++#define IMQ_MAX_DEVS	(IMQ_F_IFMASK + 1)
++
++#endif /* _IMQ_H */
++
+diff -uNr linux-3.3/include/linux/netfilter/xt_IMQ.h linux-3.3-imqmq/include/linux/netfilter/xt_IMQ.h
+--- linux-3.3/include/linux/netfilter/xt_IMQ.h	1970-01-01 02:00:00.000000000 +0200
++++ linux-3.3-imqmq/include/linux/netfilter/xt_IMQ.h	2012-03-19 09:46:57.656134747 +0200
+@@ -0,0 +1,9 @@
++#ifndef _XT_IMQ_H
++#define _XT_IMQ_H
++
++struct xt_imq_info {
++	unsigned int todev;     /* target imq device */
++};
++
++#endif /* _XT_IMQ_H */
++
+diff -uNr linux-3.3/include/linux/netfilter.h linux-3.3-imqmq/include/linux/netfilter.h
+--- linux-3.3/include/linux/netfilter.h	2012-03-19 01:15:34.000000000 +0200
++++ linux-3.3-imqmq/include/linux/netfilter.h	2012-03-19 09:46:57.656134747 +0200
+@@ -22,7 +22,8 @@
+ #define NF_QUEUE 3
+ #define NF_REPEAT 4
+ #define NF_STOP 5
+-#define NF_MAX_VERDICT NF_STOP
++#define NF_IMQ_QUEUE 6
++#define NF_MAX_VERDICT NF_IMQ_QUEUE
+ 
+ /* we overload the higher bits for encoding auxiliary data such as the queue
+  * number or errno values. Not nice, but better than additional function
+diff -uNr linux-3.3/include/linux/netfilter_ipv4/ipt_IMQ.h linux-3.3-imqmq/include/linux/netfilter_ipv4/ipt_IMQ.h
+--- linux-3.3/include/linux/netfilter_ipv4/ipt_IMQ.h	1970-01-01 02:00:00.000000000 +0200
++++ linux-3.3-imqmq/include/linux/netfilter_ipv4/ipt_IMQ.h	2012-03-19 09:46:57.656134747 +0200
+@@ -0,0 +1,10 @@
++#ifndef _IPT_IMQ_H
++#define _IPT_IMQ_H
++
++/* Backwards compatibility for old userspace */
++#include <linux/netfilter/xt_IMQ.h>
++
++#define ipt_imq_info xt_imq_info
++
++#endif /* _IPT_IMQ_H */
++
+diff -uNr linux-3.3/include/linux/netfilter_ipv6/ip6t_IMQ.h linux-3.3-imqmq/include/linux/netfilter_ipv6/ip6t_IMQ.h
+--- linux-3.3/include/linux/netfilter_ipv6/ip6t_IMQ.h	1970-01-01 02:00:00.000000000 +0200
++++ linux-3.3-imqmq/include/linux/netfilter_ipv6/ip6t_IMQ.h	2012-03-19 09:46:57.656134747 +0200
+@@ -0,0 +1,10 @@
++#ifndef _IP6T_IMQ_H
++#define _IP6T_IMQ_H
++
++/* Backwards compatibility for old userspace */
++#include <linux/netfilter/xt_IMQ.h>
++
++#define ip6t_imq_info xt_imq_info
++
++#endif /* _IP6T_IMQ_H */
++
+diff -uNr linux-3.3/include/linux/skbuff.h linux-3.3-imqmq/include/linux/skbuff.h
+--- linux-3.3/include/linux/skbuff.h	2012-03-19 01:15:34.000000000 +0200
++++ linux-3.3-imqmq/include/linux/skbuff.h	2012-03-19 09:49:11.892204930 +0200
+@@ -31,6 +31,9 @@
+ #include <linux/hrtimer.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/netdev_features.h>
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++#include <linux/imq.h>
++#endif
+ 
+ /* Don't change this without changing skb_csum_unnecessary! */
+ #define CHECKSUM_NONE 0
+@@ -395,6 +398,9 @@
+ 	 * first. This is owned by whoever has the skb queued ATM.
+ 	 */
+ 	char			cb[48] __aligned(8);
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++	void			*cb_next;
++#endif
+ 
+ 	unsigned long		_skb_refdst;
+ #ifdef CONFIG_XFRM
+@@ -433,6 +439,9 @@
+ #ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
+ 	struct sk_buff		*nfct_reasm;
+ #endif
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++	struct nf_queue_entry	*nf_queue_entry;
++#endif
+ #ifdef CONFIG_BRIDGE_NETFILTER
+ 	struct nf_bridge_info	*nf_bridge;
+ #endif
+@@ -459,6 +468,10 @@
+ 	/* 10/12 bit hole (depending on ndisc_nodetype presence) */
+ 	kmemcheck_bitfield_end(flags2);
+ 
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++	__u8			imq_flags:IMQ_F_BITS;
++#endif
++
+ #ifdef CONFIG_NET_DMA
+ 	dma_cookie_t		dma_cookie;
+ #endif
+@@ -545,6 +558,12 @@
+ 	return (struct rtable *)skb_dst(skb);
+ }
+ 
++
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++extern int skb_save_cb(struct sk_buff *skb);
++extern int skb_restore_cb(struct sk_buff *skb);
++#endif
++
+ extern void kfree_skb(struct sk_buff *skb);
+ extern void consume_skb(struct sk_buff *skb);
+ extern void	       __kfree_skb(struct sk_buff *skb);
+@@ -2364,6 +2383,10 @@
+ 	dst->nfct_reasm = src->nfct_reasm;
+ 	nf_conntrack_get_reasm(src->nfct_reasm);
+ #endif
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++	dst->imq_flags = src->imq_flags;
++	dst->nf_queue_entry = src->nf_queue_entry;
++#endif
+ #ifdef CONFIG_BRIDGE_NETFILTER
+ 	dst->nf_bridge  = src->nf_bridge;
+ 	nf_bridge_get(src->nf_bridge);
+diff -uNr linux-3.3/include/net/netfilter/nf_queue.h linux-3.3-imqmq/include/net/netfilter/nf_queue.h
+--- linux-3.3/include/net/netfilter/nf_queue.h	2012-03-19 01:15:34.000000000 +0200
++++ linux-3.3-imqmq/include/net/netfilter/nf_queue.h	2012-03-19 09:46:57.662801551 +0200
+@@ -30,5 +30,11 @@
+ 				       const struct nf_queue_handler *qh);
+ extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh);
+ extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
++extern void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
++
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++extern void nf_register_queue_imq_handler(const struct nf_queue_handler *qh);
++extern void nf_unregister_queue_imq_handler(void);
++#endif
+ 
+ #endif /* _NF_QUEUE_H */
+diff -uNr linux-3.3/net/core/dev.c linux-3.3-imqmq/net/core/dev.c
+--- linux-3.3/net/core/dev.c	2012-03-19 01:15:34.000000000 +0200
++++ linux-3.3-imqmq/net/core/dev.c	2012-03-19 09:46:57.669468353 +0200
+@@ -98,6 +98,9 @@
+ #include <net/net_namespace.h>
+ #include <net/sock.h>
+ #include <linux/rtnetlink.h>
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++#include <linux/imq.h>
++#endif
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include <linux/stat.h>
+@@ -2207,7 +2210,12 @@
+ 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+ 			skb_dst_drop(skb);
+ 
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++		if (!list_empty(&ptype_all) &&
++					!(skb->imq_flags & IMQ_F_ENQUEUE))
++#else
+ 		if (!list_empty(&ptype_all))
++#endif
+ 			dev_queue_xmit_nit(skb, dev);
+ 
+ 		skb_orphan_try(skb);
+diff -uNr linux-3.3/net/core/skbuff.c linux-3.3-imqmq/net/core/skbuff.c
+--- linux-3.3/net/core/skbuff.c	2012-03-19 01:15:34.000000000 +0200
++++ linux-3.3-imqmq/net/core/skbuff.c	2012-03-19 09:52:12.300707734 +0200
+@@ -73,6 +73,9 @@
+ 
+ static struct kmem_cache *skbuff_head_cache __read_mostly;
+ static struct kmem_cache *skbuff_fclone_cache __read_mostly;
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++static struct kmem_cache *skbuff_cb_store_cache __read_mostly;
++#endif
+ 
+ static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
+ 				  struct pipe_buffer *buf)
+@@ -92,6 +95,82 @@
+ 	return 1;
+ }
+ 
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++/* Control buffer save/restore for IMQ devices */
++struct skb_cb_table {
++	char			cb[48] __aligned(8);
++	void			*cb_next;
++	atomic_t		refcnt;
++};
++
++static DEFINE_SPINLOCK(skb_cb_store_lock);
++
++int skb_save_cb(struct sk_buff *skb)
++{
++	struct skb_cb_table *next;
++
++	next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC);
++	if (!next)
++		return -ENOMEM;
++
++	BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
++
++	memcpy(next->cb, skb->cb, sizeof(skb->cb));
++	next->cb_next = skb->cb_next;
++
++	atomic_set(&next->refcnt, 1);
++
++	skb->cb_next = next;
++	return 0;
++}
++EXPORT_SYMBOL(skb_save_cb);
++
++int skb_restore_cb(struct sk_buff *skb)
++{
++	struct skb_cb_table *next;
++
++	if (!skb->cb_next)
++		return 0;
++
++	next = skb->cb_next;
++
++	BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
++
++	memcpy(skb->cb, next->cb, sizeof(skb->cb));
++	skb->cb_next = next->cb_next;
++
++	spin_lock(&skb_cb_store_lock);
++
++	if (atomic_dec_and_test(&next->refcnt))
++		kmem_cache_free(skbuff_cb_store_cache, next);
++
++	spin_unlock(&skb_cb_store_lock);
++
++	return 0;
++}
++EXPORT_SYMBOL(skb_restore_cb);
++
++static void skb_copy_stored_cb(struct sk_buff *new, const struct sk_buff *__old)
++{
++	struct skb_cb_table *next;
++	struct sk_buff *old;
++
++	if (!__old->cb_next) {
++		new->cb_next = NULL;
++		return;
++	}
++
++	spin_lock(&skb_cb_store_lock);
++
++	old = (struct sk_buff *)__old;
++
++	next = old->cb_next;
++	atomic_inc(&next->refcnt);
++	new->cb_next = next;
++
++	spin_unlock(&skb_cb_store_lock);
++}
++#endif
+ 
+ /* Pipe buffer operations for a socket. */
+ static const struct pipe_buf_operations sock_pipe_buf_ops = {
+@@ -452,6 +531,29 @@
+ 		WARN_ON(in_irq());
+ 		skb->destructor(skb);
+ 	}
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++	/*
++	 * This should not happen. When it does, avoid memleak by restoring
++	 * the chain of cb-backups.
++	 */
++	while (skb->cb_next != NULL) {
++		if (net_ratelimit())
++			printk(KERN_WARNING "IMQ: kfree_skb: skb->cb_next: "
++				"%08x\n", (unsigned int)skb->cb_next);
++
++		skb_restore_cb(skb);
++	}
++	/*
++	 * This should not happen either, nf_queue_entry is nullified in
++	 * imq_dev_xmit(). If we have non-NULL nf_queue_entry then we are
++	 * leaking entry pointers, maybe memory. We don't know if this is
++	 * pointer to already freed memory, or should this be freed.
++	 * If this happens we need to add refcounting, etc for nf_queue_entry.
++	 */
++	if (skb->nf_queue_entry && net_ratelimit())
++		printk(KERN_WARNING
++			"IMQ: kfree_skb: skb->nf_queue_entry != NULL");
++#endif
+ #if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ 	nf_conntrack_put(skb->nfct);
+ #endif
+@@ -596,6 +698,9 @@
+ 	new->sp			= secpath_get(old->sp);
+ #endif
+ 	memcpy(new->cb, old->cb, sizeof(old->cb));
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++	skb_copy_stored_cb(new, old);
++#endif
+ 	new->csum		= old->csum;
+ 	new->local_df		= old->local_df;
+ 	new->pkt_type		= old->pkt_type;
+@@ -2956,6 +3061,13 @@
+ 						0,
+ 						SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+ 						NULL);
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++	skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache",
++						  sizeof(struct skb_cb_table),
++						  0,
++						  SLAB_HWCACHE_ALIGN|SLAB_PANIC,
++						  NULL);
++#endif
+ }
+ 
+ /**
+diff -uNr linux-3.3/net/ipv6/ip6_output.c linux-3.3-imqmq/net/ipv6/ip6_output.c
+--- linux-3.3/net/ipv6/ip6_output.c	2012-03-19 01:15:34.000000000 +0200
++++ linux-3.3-imqmq/net/ipv6/ip6_output.c	2012-03-19 09:46:57.672801754 +0200
+@@ -102,9 +102,6 @@
+ 	struct net_device *dev = dst->dev;
+ 	struct neighbour *neigh;
+ 
+-	skb->protocol = htons(ETH_P_IPV6);
+-	skb->dev = dev;
+-
+ 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
+ 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+ 
+@@ -170,6 +167,11 @@
+ 		return 0;
+ 	}
+ 
++	/* IMQ-patch: moved setting skb->dev and skb->protocol from
++	 * ip6_finish_output2 to fix crashing at netif_skb_features(). */
++	skb->protocol = htons(ETH_P_IPV6);
++	skb->dev = dev;
++
+ 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
+ 			    ip6_finish_output,
+ 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
+diff -uNr linux-3.3/net/netfilter/core.c linux-3.3-imqmq/net/netfilter/core.c
+--- linux-3.3/net/netfilter/core.c	2012-03-19 01:15:34.000000000 +0200
++++ linux-3.3-imqmq/net/netfilter/core.c	2012-03-19 09:46:57.676135156 +0200
+@@ -190,9 +190,11 @@
+ 		ret = NF_DROP_GETERR(verdict);
+ 		if (ret == 0)
+ 			ret = -EPERM;
+-	} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
++	} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE ||
++		   (verdict & NF_VERDICT_MASK) == NF_IMQ_QUEUE) {
+ 		int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+-						verdict >> NF_VERDICT_QBITS);
++						verdict >> NF_VERDICT_QBITS,
++						verdict & NF_VERDICT_MASK);
+ 		if (err < 0) {
+ 			if (err == -ECANCELED)
+ 				goto next_hook;
+diff -uNr linux-3.3/net/netfilter/Kconfig linux-3.3-imqmq/net/netfilter/Kconfig
+--- linux-3.3/net/netfilter/Kconfig	2012-03-19 01:15:34.000000000 +0200
++++ linux-3.3-imqmq/net/netfilter/Kconfig	2012-03-19 09:46:57.676135156 +0200
+@@ -524,6 +524,18 @@
+ 	  For more information on the LEDs available on your system, see
+ 	  Documentation/leds/leds-class.txt
+ 
++config NETFILTER_XT_TARGET_IMQ
++        tristate '"IMQ" target support'
++	depends on NETFILTER_XTABLES
++	depends on IP_NF_MANGLE || IP6_NF_MANGLE
++	select IMQ
++	default m if NETFILTER_ADVANCED=n
++        help
++          This option adds a `IMQ' target which is used to specify if and
++          to which imq device packets should get enqueued/dequeued.
++
++          To compile it as a module, choose M here.  If unsure, say N.
++
+ config NETFILTER_XT_TARGET_MARK
+ 	tristate '"MARK" target support'
+ 	depends on NETFILTER_ADVANCED
+diff -uNr linux-3.3/net/netfilter/Makefile linux-3.3-imqmq/net/netfilter/Makefile
+--- linux-3.3/net/netfilter/Makefile	2012-03-19 01:15:34.000000000 +0200
++++ linux-3.3-imqmq/net/netfilter/Makefile	2012-03-19 09:46:57.676135156 +0200
+@@ -57,6 +57,7 @@
+ obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
++obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
+diff -uNr linux-3.3/net/netfilter/nf_internals.h linux-3.3-imqmq/net/netfilter/nf_internals.h
+--- linux-3.3/net/netfilter/nf_internals.h	2012-03-19 01:15:34.000000000 +0200
++++ linux-3.3-imqmq/net/netfilter/nf_internals.h	2012-03-19 09:46:57.676135156 +0200
+@@ -29,7 +29,7 @@
+ 		    struct net_device *indev,
+ 		    struct net_device *outdev,
+ 		    int (*okfn)(struct sk_buff *),
+-		    unsigned int queuenum);
++		    unsigned int queuenum, unsigned int queuetype);
+ extern int __init netfilter_queue_init(void);
+ 
+ /* nf_log.c */
+diff -uNr linux-3.3/net/netfilter/nf_queue.c linux-3.3-imqmq/net/netfilter/nf_queue.c
+--- linux-3.3/net/netfilter/nf_queue.c	2012-03-19 01:15:34.000000000 +0200
++++ linux-3.3-imqmq/net/netfilter/nf_queue.c	2012-03-19 09:48:44.658316350 +0200
+@@ -22,6 +22,26 @@
+ 
+ static DEFINE_MUTEX(queue_handler_mutex);
+ 
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++static const struct nf_queue_handler *queue_imq_handler;
++
++void nf_register_queue_imq_handler(const struct nf_queue_handler *qh)
++{
++	mutex_lock(&queue_handler_mutex);
++	rcu_assign_pointer(queue_imq_handler, qh);
++	mutex_unlock(&queue_handler_mutex);
++}
++EXPORT_SYMBOL_GPL(nf_register_queue_imq_handler);
++
++void nf_unregister_queue_imq_handler(void)
++{
++	mutex_lock(&queue_handler_mutex);
++	rcu_assign_pointer(queue_imq_handler, NULL);
++	mutex_unlock(&queue_handler_mutex);
++}
++EXPORT_SYMBOL_GPL(nf_unregister_queue_imq_handler);
++#endif
++
+ /* return EBUSY when somebody else is registered, return EEXIST if the
+  * same handler is registered, return 0 in case of success. */
+ int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
+@@ -92,7 +112,7 @@
+ }
+ EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
+ 
+-static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
++void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
+ {
+ 	/* Release those devices we held, or Alexey will kill me. */
+ 	if (entry->indev)
+@@ -112,6 +132,7 @@
+ 	/* Drop reference to owner of hook which queued us. */
+ 	module_put(entry->elem->owner);
+ }
++EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
+ 
+ /*
+  * Any packet that leaves via this function must come back
+@@ -123,7 +144,8 @@
+ 		      struct net_device *indev,
+ 		      struct net_device *outdev,
+ 		      int (*okfn)(struct sk_buff *),
+-		      unsigned int queuenum)
++		      unsigned int queuenum,
++		      unsigned int queuetype)
+ {
+ 	int status = -ENOENT;
+ 	struct nf_queue_entry *entry = NULL;
+@@ -137,7 +159,17 @@
+ 	/* QUEUE == DROP if no one is waiting, to be safe. */
+ 	rcu_read_lock();
+ 
+-	qh = rcu_dereference(queue_handler[pf]);
++	if (queuetype == NF_IMQ_QUEUE) {
++#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
++		qh = rcu_dereference(queue_imq_handler);
++#else
++		BUG();
++		goto err_unlock;
++#endif
++	} else {
++		qh = rcu_dereference(queue_handler[pf]);
++	}
++
+ 	if (!qh) {
+ 		status = -ESRCH;
+ 		goto err_unlock;
+@@ -230,7 +262,8 @@
+ 	     struct net_device *indev,
+ 	     struct net_device *outdev,
+ 	     int (*okfn)(struct sk_buff *),
+-	     unsigned int queuenum)
++	     unsigned int queuenum,
++	     unsigned int queuetype)
+ {
+ 	struct sk_buff *segs;
+ 	int err = -EINVAL;
+@@ -238,7 +271,7 @@
+ 
+ 	if (!skb_is_gso(skb))
+ 		return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+-				  queuenum);
++				  queuenum, queuetype);
+ 
+ 	switch (pf) {
+ 	case NFPROTO_IPV4:
+@@ -266,7 +299,7 @@
+ 		if (err == 0) {
+ 			nf_bridge_adjust_segmented_data(segs);
+ 			err = __nf_queue(segs, elem, pf, hook, indev,
+-					   outdev, okfn, queuenum);
++					   outdev, okfn, queuenum, queuetype);
+ 		}
+ 		if (err == 0)
+ 			queued++;
+@@ -323,9 +356,11 @@
+ 		local_bh_enable();
+ 		break;
+ 	case NF_QUEUE:
++	case NF_IMQ_QUEUE:
+ 		err = __nf_queue(skb, elem, entry->pf, entry->hook,
+ 				 entry->indev, entry->outdev, entry->okfn,
+-				 verdict >> NF_VERDICT_QBITS);
++				 verdict >> NF_VERDICT_QBITS,
++				 verdict & NF_VERDICT_MASK);
+ 		if (err < 0) {
+ 			if (err == -ECANCELED)
+ 				goto next_hook;
+diff -uNr linux-3.3/net/netfilter/xt_IMQ.c linux-3.3-imqmq/net/netfilter/xt_IMQ.c
+--- linux-3.3/net/netfilter/xt_IMQ.c	1970-01-01 02:00:00.000000000 +0200
++++ linux-3.3-imqmq/net/netfilter/xt_IMQ.c	2012-03-19 09:46:57.679468557 +0200
+@@ -0,0 +1,74 @@
++/*
++ * This target marks packets to be enqueued to an imq device
++ */
++#include <linux/module.h>
++#include <linux/skbuff.h>
++#include <linux/netfilter/x_tables.h>
++#include <linux/netfilter/xt_IMQ.h>
++#include <linux/imq.h>
++
++static unsigned int imq_target(struct sk_buff *pskb,
++				const struct xt_action_param *par)
++{
++	const struct xt_imq_info *mr = par->targinfo;
++
++	pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE;
++
++	return XT_CONTINUE;
++}
++
++static int imq_checkentry(const struct xt_tgchk_param *par)
++{
++	struct xt_imq_info *mr = par->targinfo;
++
++	if (mr->todev > IMQ_MAX_DEVS - 1) {
++		printk(KERN_WARNING
++		       "IMQ: invalid device specified, highest is %u\n",
++		       IMQ_MAX_DEVS - 1);
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static struct xt_target xt_imq_reg[] __read_mostly = {
++	{
++		.name           = "IMQ",
++		.family		= AF_INET,
++		.checkentry     = imq_checkentry,
++		.target         = imq_target,
++		.targetsize	= sizeof(struct xt_imq_info),
++		.table		= "mangle",
++		.me             = THIS_MODULE
++	},
++	{
++		.name           = "IMQ",
++		.family		= AF_INET6,
++		.checkentry     = imq_checkentry,
++		.target         = imq_target,
++		.targetsize	= sizeof(struct xt_imq_info),
++		.table		= "mangle",
++		.me             = THIS_MODULE
++	},
++};
++
++static int __init imq_init(void)
++{
++	return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
++}
++
++static void __exit imq_fini(void)
++{
++	xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
++}
++
++module_init(imq_init);
++module_exit(imq_fini);
++
++MODULE_AUTHOR("http://www.linuximq.net");
++MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. "
++		   "See http://www.linuximq.net/ for more information.");
++MODULE_LICENSE("GPL");
++MODULE_ALIAS("ipt_IMQ");
++MODULE_ALIAS("ip6t_IMQ");
++
diff --git a/3.3.8/kbuild-compress-kernel-modules-on-installation.patch b/3.3.8/kbuild-compress-kernel-modules-on-installation.patch
new file mode 100644
index 0000000..cb3cc7f
--- /dev/null
+++ b/3.3.8/kbuild-compress-kernel-modules-on-installation.patch
@@ -0,0 +1,137 @@
+================================
+Signed-off-by: Steve Brokenshire <sbrokenshire@xestia.co.uk>
+[Rediffed for 2.6.31.3, defaulted to y and compress with -9 /Thomas]
+Signed-off-by: Thomas Backlund <tmb@mandriva.org>
+
+diff -Nurp linux-2.6.31/Documentation/kbuild/modules.txt linux-2.6.31.compress/Documentation/kbuild/modules.txt
+--- linux-2.6.31/Documentation/kbuild/modules.txt	2009-09-10 01:13:59.000000000 +0300
++++ linux-2.6.31.compress/Documentation/kbuild/modules.txt	2009-10-09 14:17:49.335619817 +0300
+@@ -123,6 +123,13 @@ executed to make module versioning work.
+ 		Install the external module(s). The default location is
+ 		/lib/modules/<kernel_release>/extra/, but a prefix may
+ 		be added with INSTALL_MOD_PATH (discussed in section 5).
++		If MODULES_COMPRESS is set when the modules_install target is
++		run then the module is compressed after it has been
++		copied to /lib/modules/<kernel-version>. Compressed modules
++		using the default gzip compression format will require
++		module-init-tools installed with --zlib-enabled.
++		Any options set in MODULE_COMPRESS_OPTIONS will be
++		passed to the selected compression format.
+ 
+ 	clean
+ 		Remove all generated files in the module directory only.
+diff -Nurp linux-2.6.31/init/Kconfig linux-2.6.31.compress/init/Kconfig
+--- linux-2.6.31/init/Kconfig	2009-09-10 01:13:59.000000000 +0300
++++ linux-2.6.31.compress/init/Kconfig	2009-10-09 14:19:01.812591181 +0300
+@@ -1161,6 +1161,64 @@ config MODULE_FORCE_UNLOAD
+ 	  rmmod).  This is mainly for kernel developers and desperate users.
+ 	  If unsure, say N.
+ 
++config MODULE_COMPRESS
++	bool "Compress kernel modules on installation"
++	depends on MODULES
++	default y
++	help
++	  This option compresses the kernel modules when 'make
++	  modules_install' is run.
++
++	  The modules will be compressed into the selected compression 
++	  format with gzip being the default compression format.
++
++	  When a kernel module is installed from outside of the main kernel
++	  source and uses the Kbuild system for installing modules then that
++	  kernel module will also be compressed when it is installed.
++
++	  When running mkinitrd you will find that an error message
++	  appears saying that it cannot find a certain kernel module.
++	  As a workaround, unset CONFIG_MODULE_COMPRESS, build the modules
++	  and install them, run mkinitrd and create the initrd image, place
++	  the initrd image in the correct place for booting, set
++	  CONFIG_MODULE_COMPRESS and then install the modules again.
++
++	  This option requires the module-init-tools package to be 
++	  configured with --enable-zlib (if using gzip which is the
++	  default compression format).
++
++	  If unsure, say Y.
++
++config MODULE_COMPRESS_OPTIONS
++	string "Compression format command line options"
++	depends on MODULE_COMPRESS
++	default "-9"
++	help
++	  This option specifies the command line options to be used for
++	  the selected compression format.
++
++	  Please refer to the selected compression format's documentation
++	  on which options should be used.
++
++	  If unsure, leave this option blank.
++
++choice
++	prompt "Kernel module compression format"
++	depends on MODULE_COMPRESS
++	default MODULE_COMPRESS_GZIP
++
++config MODULE_COMPRESS_GZIP
++	bool "gzip compression"
++	help
++	  Compresses the kernel modules using the gzip (GNU zip) 
++	  compression format.
++
++	  This option requires gzip to be installed.
++
++	  If unsure, leave this option selected.
++
++endchoice
++
+ config MODVERSIONS
+ 	bool "Module versioning support"
+ 	help
+diff -Nurp linux-2.6.31/scripts/Makefile.modinst linux-2.6.31.compress/scripts/Makefile.modinst
+--- linux-2.6.31/scripts/Makefile.modinst	2009-09-10 01:13:59.000000000 +0300
++++ linux-2.6.31.compress/scripts/Makefile.modinst	2009-10-09 14:17:49.337619404 +0300
+@@ -5,6 +5,7 @@
+ PHONY := __modinst
+ __modinst:
+ 
++include include/config/auto.conf
+ include scripts/Kbuild.include
+ 
+ #
+@@ -16,8 +17,21 @@ PHONY += $(modules)
+ __modinst: $(modules)
+ 	@:
+ 
+-quiet_cmd_modules_install = INSTALL $@
+-      cmd_modules_install = mkdir -p $(2); cp $@ $(2) ; $(mod_strip_cmd) $(2)/$(notdir $@)
++ifeq ($(CONFIG_MODULE_COMPRESS_OPTIONS), "")
++else
++ MODCOMPOPT = $(shell echo -n $(CONFIG_MODULE_COMPRESS_OPTIONS))
++endif
++
++quiet_cmd_modules_install = INSTALL  $@
++      cmd_modules_install = mkdir -p $(2); \
++				cp $@ $(2) ; \
++				$(mod_strip_cmd) $(2)/$(notdir $@)
++
++quiet_cmd_modules_compress_gzip = COMPRESS $@
++      cmd_modules_compress_gzip = gzip $(MODCOMPOPT) -c \
++				 $(2)/$(@F) \
++				 > $(2)/$(@F).gz; \
++				 rm $(2)/$(@F)
+ 
+ # Modules built outside the kernel source tree go into extra by default
+ INSTALL_MOD_DIR ?= extra
+@@ -26,8 +40,11 @@ ext-mod-dir = $(INSTALL_MOD_DIR)$(subst 
+ modinst_dir = $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D))
+ 
+ $(modules):
++
+ 	$(call cmd,modules_install,$(MODLIB)/$(modinst_dir))
+ 
++	$(if $(CONFIG_MODULE_COMPRESS_GZIP), \
++		$(call cmd,modules_compress_gzip,$(MODLIB)/$(modinst_dir)))
+ 
+ # Declare the contents of the .PHONY variable as phony.  We keep that
+ # information in a variable se we can use it in if_changed and friends.
diff --git a/3.3.8/kirkwood-jumbo-frame.patch b/3.3.8/kirkwood-jumbo-frame.patch
new file mode 100644
index 0000000..fdbc5b1
--- /dev/null
+++ b/3.3.8/kirkwood-jumbo-frame.patch
@@ -0,0 +1,135 @@
+kirkwood and dove have a smaller FIFO than other "orion" SoCs. This
+needs to be taken into account otherwise people using things like jumbo frames
+will get into some troubles.
+
+As a side note, this patch is an updated version of a patch sent some years
+ago: http://lists.infradead.org/pipermail/linux-arm-kernel/2010-June/017320.html
+which seems to have been lost.
+
+Signed-off-by: Arnaud Patard <arnaud.patard@xxxxxxxxxxx>
+
+Index: alunn/arch/arm/mach-dove/common.c
+===================================================================
+--- alunn.orig/arch/arm/mach-dove/common.c	2012-07-20 09:14:45.000000000 +0200
++++ alunn/arch/arm/mach-dove/common.c	2012-07-20 17:51:38.872925518 +0200
+@@ -102,7 +102,7 @@ void __init dove_ehci1_init(void)
+ void __init dove_ge00_init(struct mv643xx_eth_platform_data *eth_data)
+ {
+ 	orion_ge00_init(eth_data, DOVE_GE00_PHYS_BASE,
+-			IRQ_DOVE_GE00_SUM, IRQ_DOVE_GE00_ERR);
++			IRQ_DOVE_GE00_SUM, IRQ_DOVE_GE00_ERR, 0);
+ }
+ 
+ /*****************************************************************************
+Index: alunn/arch/arm/mach-kirkwood/common.c
+===================================================================
+--- alunn.orig/arch/arm/mach-kirkwood/common.c	2012-07-20 09:14:46.000000000 +0200
++++ alunn/arch/arm/mach-kirkwood/common.c	2012-07-20 17:51:03.104927094 +0200
+@@ -301,7 +301,7 @@ void __init kirkwood_ge00_init(struct mv
+ {
+ 	orion_ge00_init(eth_data,
+ 			GE00_PHYS_BASE, IRQ_KIRKWOOD_GE00_SUM,
+-			IRQ_KIRKWOOD_GE00_ERR);
++			IRQ_KIRKWOOD_GE00_ERR, 1600);
+ 	/* The interface forgets the MAC address assigned by u-boot if
+ 	the clock is turned off, so claim the clk now. */
+ 	clk_prepare_enable(ge0);
+@@ -315,7 +315,7 @@ void __init kirkwood_ge01_init(struct mv
+ {
+ 	orion_ge01_init(eth_data,
+ 			GE01_PHYS_BASE, IRQ_KIRKWOOD_GE01_SUM,
+-			IRQ_KIRKWOOD_GE01_ERR);
++			IRQ_KIRKWOOD_GE01_ERR, 1600);
+ 	clk_prepare_enable(ge1);
+ }
+ 
+Index: alunn/arch/arm/mach-mv78xx0/common.c
+===================================================================
+--- alunn.orig/arch/arm/mach-mv78xx0/common.c	2012-07-20 09:14:46.000000000 +0200
++++ alunn/arch/arm/mach-mv78xx0/common.c	2012-07-20 17:50:26.712928695 +0200
+@@ -213,7 +213,7 @@ void __init mv78xx0_ge00_init(struct mv6
+ {
+ 	orion_ge00_init(eth_data,
+ 			GE00_PHYS_BASE, IRQ_MV78XX0_GE00_SUM,
+-			IRQ_MV78XX0_GE_ERR);
++			IRQ_MV78XX0_GE_ERR, 0);
+ }
+ 
+ 
+@@ -224,7 +224,7 @@ void __init mv78xx0_ge01_init(struct mv6
+ {
+ 	orion_ge01_init(eth_data,
+ 			GE01_PHYS_BASE, IRQ_MV78XX0_GE01_SUM,
+-			NO_IRQ);
++			NO_IRQ, 0);
+ }
+ 
+ 
+Index: alunn/arch/arm/mach-orion5x/common.c
+===================================================================
+--- alunn.orig/arch/arm/mach-orion5x/common.c	2012-07-20 09:14:46.000000000 +0200
++++ alunn/arch/arm/mach-orion5x/common.c	2012-07-20 17:50:26.744928692 +0200
+@@ -109,7 +109,7 @@ void __init orion5x_eth_init(struct mv64
+ {
+ 	orion_ge00_init(eth_data,
+ 			ORION5X_ETH_PHYS_BASE, IRQ_ORION5X_ETH_SUM,
+-			IRQ_ORION5X_ETH_ERR);
++			IRQ_ORION5X_ETH_ERR, 0);
+ }
+ 
+ 
+Index: alunn/arch/arm/plat-orion/common.c
+===================================================================
+--- alunn.orig/arch/arm/plat-orion/common.c	2012-07-20 09:14:46.000000000 +0200
++++ alunn/arch/arm/plat-orion/common.c	2012-07-20 17:50:26.756928690 +0200
+@@ -291,10 +291,12 @@ static struct platform_device orion_ge00
+ void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    unsigned long mapbase,
+ 			    unsigned long irq,
+-			    unsigned long irq_err)
++			    unsigned long irq_err,
++			    unsigned int tx_csum_limit)
+ {
+ 	fill_resources(&orion_ge00_shared, orion_ge00_shared_resources,
+ 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
++	orion_ge00_shared_data.tx_csum_limit = tx_csum_limit;
+ 	ge_complete(&orion_ge00_shared_data,
+ 		    orion_ge00_resources, irq, &orion_ge00_shared,
+ 		    eth_data, &orion_ge00);
+@@ -343,10 +345,12 @@ static struct platform_device orion_ge01
+ void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    unsigned long mapbase,
+ 			    unsigned long irq,
+-			    unsigned long irq_err)
++			    unsigned long irq_err,
++			    unsigned int tx_csum_limit)
+ {
+ 	fill_resources(&orion_ge01_shared, orion_ge01_shared_resources,
+ 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
++	orion_ge01_shared_data.tx_csum_limit = tx_csum_limit;
+ 	ge_complete(&orion_ge01_shared_data,
+ 		    orion_ge01_resources, irq, &orion_ge01_shared,
+ 		    eth_data, &orion_ge01);
+Index: alunn/arch/arm/plat-orion/include/plat/common.h
+===================================================================
+--- alunn.orig/arch/arm/plat-orion/include/plat/common.h	2012-07-20 09:14:46.000000000 +0200
++++ alunn/arch/arm/plat-orion/include/plat/common.h	2012-07-20 17:50:26.772928691 +0200
+@@ -39,12 +39,14 @@ void __init orion_rtc_init(unsigned long
+ void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    unsigned long mapbase,
+ 			    unsigned long irq,
+-			    unsigned long irq_err);
++			    unsigned long irq_err,
++			    unsigned int tx_csum_limit);
+ 
+ void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    unsigned long mapbase,
+ 			    unsigned long irq,
+-			    unsigned long irq_err);
++			    unsigned long irq_err,
++			    unsigned int tx_csum_limit);
+ 
+ void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data,
+ 			    unsigned long mapbase,
+
+
diff --git a/3.3.8/linux-2.6-defaults-fat-utf8.patch b/3.3.8/linux-2.6-defaults-fat-utf8.patch
new file mode 100644
index 0000000..0d40fd3
--- /dev/null
+++ b/3.3.8/linux-2.6-defaults-fat-utf8.patch
@@ -0,0 +1,15 @@
+
+https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=181963
+
+--- linux-2.6.15.noarch/fs/fat/inode.c~	2006-02-20 23:20:12.000000000 -0500
++++ linux-2.6.15.noarch/fs/fat/inode.c	2006-02-20 23:21:42.000000000 -0500
+@@ -952,7 +952,8 @@ static int parse_options(char *options, 
+ 		opts->shortname = 0;
+ 	opts->name_check = 'n';
+ 	opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK =  0;
+-	opts->utf8 = opts->unicode_xlate = 0;
++	opts->utf8 = 1;
++	opts->unicode_xlate = 0;
+ 	opts->numtail = 1;
+ 	opts->nocase = 0;
+ 	*debug = 0;
diff --git a/3.3.8/linux-2.6-x86-tune-generic.patch b/3.3.8/linux-2.6-x86-tune-generic.patch
new file mode 100644
index 0000000..7a7c76e
--- /dev/null
+++ b/3.3.8/linux-2.6-x86-tune-generic.patch
@@ -0,0 +1,13 @@
+* Optimise for today's CPUs.
+
+--- linux-2.6/arch/x86/Makefile_32.cpu	2006-01-09 11:39:04.000000000 -0500
++++ linux-2.6/arch/x86/Makefile_32.cpu	2006-01-09 11:39:36.000000000 -0500
+@@ -15,7 +15,7 @@ cflags-$(CONFIG_M486)		+= -march=i486
+ cflags-$(CONFIG_M586)		+= -march=i586
+ cflags-$(CONFIG_M586TSC)	+= -march=i586
+ cflags-$(CONFIG_M586MMX)	+= -march=pentium-mmx
+-cflags-$(CONFIG_M686)		+= -march=i686
++cflags-$(CONFIG_M686)		+= -march=i686 $(call tune,generic)
+ cflags-$(CONFIG_MPENTIUMII)	+= -march=i686 $(call tune,pentium2)
+ cflags-$(CONFIG_MPENTIUMIII)	+= -march=i686 $(call tune,pentium3)
+ cflags-$(CONFIG_MPENTIUMM)	+= -march=i686 $(call tune,pentium3)
diff --git a/3.3.8/linux-3.4-e2c-0.4.59.patch b/3.3.8/linux-3.4-e2c-0.4.59.patch
new file mode 100644
index 0000000..ac2ac0e
--- /dev/null
+++ b/3.3.8/linux-3.4-e2c-0.4.59.patch
@@ -0,0 +1,7781 @@
+--- linux-3.4-rc5/fs/ext2/ChangeLog.e2compr-26port	1969-12-31 19:00:00.000000000 -0500
++++ linux-3.4-rc5-e2c/fs/ext2/ChangeLog.e2compr-26port	2012-05-03 22:17:53.267994289 -0400
+@@ -0,0 +1,453 @@
++
++e2compr - Released under the GPL V 2 license.
++
++
++Installation:
++=============
++
++1. gunzip:
++	> gunzip  linux-3.1-rc3-e2c-0.4.59.patch.gz
++
++2. change to you kernel directory
++
++3. make clean:
++	> make clean
++
++3. patch:
++	> patch -p1 <  ../patch/to/patch/linux-3.4-e2c-0.4.59.patch
++
++   see if any rejects occured:
++	> find | grep .rej
++	
++   WARNING: All rejects must be fixed manually!
++
++4. config:
++	> make oldconfig
++	> make menuconfig
++   Now enable at least the ext2-compression feature:
++   Filesystems:   
++	     <*> Second extended fs support                                     
++         [ ]   Ext2 extended attributes                                     
++         [ ]   Ext2 execute in place support                                
++         [*]   Ext2 file compression (DANGEROUS)                          
++                 Ext2 file compression options  ---> 
++
++5. make:
++	> make 
++
++
++Building a patch:
++=================
++
++files.txt:
++
++fs/ext2/ChangeLog.e2compr-26port
++Documentation/filesystems/e2compress.txt
++fs/ext2/Readme.e2compr
++fs/Kconfig
++include/linux/ext2_fs_c.h
++fs/ext2/Makefile
++fs/ext2/compress.c
++fs/ext2/e2zlib.c
++fs/ext2/adler32.c
++fs/ext2/super.c
++fs/ext2/ialloc.c
++fs/ext2/balloc.c
++fs/ext2/inode.c
++fs/ext2/file.c
++fs/ext2/ioctl.c
++fs/ext2/ext2.h
++include/linux/ext2_fs.h
++fs/fcntl.c
++mm/truncate.c
++mm/swapfile.c
++mm/filemap.c
++mm/page_alloc.c
++
++
++cat  files.txt | xargs -n1 -I '{}' diff -pruNbB linux-3.4/'{}' linux-3.4-e2c/'{}' > ./linux-3.1-e2c-0.4.59.patch
++
++
++Changelog:
++==========
++1 May 2012
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* released version 0.4.59 for kernel 3.4
++        * compress.c:
++		- ext2_get_cluster_pages()
++		  Removed dead code for releasing cached pages using
++                  page_cache_release() and pagevec_free(). 
++		  Releasing cached pages could not have worked since
++		  porting from 2.6.22 to 2.6.25 (April 2008)
++        * mm/truncate.c:
++		- fixes broken files on non ext2 partitions
++		- moved conditional truncate from truncate_pagecache()
++		  to vmtruncate() as suggested by Andreas Flick 
++        * made patch even smaller
++
++25 August 2011
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* released version 0.4.58 for kernel 3.1
++	* file.c: i_alloc_sem was removed. I am not sure if only holding i_mutex 
++		will be enough. See http://patchwork.ozlabs.org/patch/101859/.
++  		In ext2_file_write() I replaced:
++
++			mutex_lock(&inode->i_mutex);
++		- 	down_read(&inode->i_alloc_sem);
++		+ 	atomic_inc(&inode->i_dio_count);
++
++		- 	up_read(&inode->i_alloc_sem);
++		+	inode_dio_done(inode);
++			mutex_unlock(&inode->i_mutex);
++
++		The main prupose of i_dio_count is blocking vmtruncate_range()
++		as long as the i_dio_count is greater than 0. In other words, 
++		all direct io must be completed before truncating is allowed.
++		
++	* file.c: generic_osync_inode was removed from mm - added functionality to 
++		file.c as ex_generic_osync_inode()
++	* file.c: changed: &inode_lock to &inode->i_lock
++	* ext2_warning() replaced by ext2_msg()
++	* compress.c: vfs_dq_init(inode) replaced by dquot_initialize(inode)
++	* compress.c: ext2_truncate(inode) replaced by 
++		ext2_truncate_blocks(inode, inode->i_size) which looks like
++		exactly the same!
++	* inode.c: dentry->d_lock now seems to need 
++		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED) held.
++	* compress.c, inode.c: added might_schedule() before wait_on_buffer()
++		statements to assure we are not atomic at this point.
++	* truncate.c: removed patch from memory.c and moved it to truncate.c
++		as surrounding kernel code also moved there. vmtruncate() was
++		split in truncate_setsize() and truncate_pagecache() with kernel 3.1
++
++
++10 August 2009
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* released version 0.4.58
++	* merged assert.h and debug.h into ext2_fs_c.h
++	* merged NDEBUG into EXT2_COMPR_DEBUG
++	* disabled adler cheksums on "read" if not defined EXT2_COMPR_DEBUG.
++	* merged none.c into compress.c
++	* inserted multiple defines "CONFIG_EXT2_COMPRESS" to allow disabling 
++	  of ext2compression with patched sources.
++	* re-inserted EXPORT_SYMBOL(__pagevec_free) to support ext2 as module
++		
++05 August 2009
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* released version 0.4.57
++	* ported to kernel 2.6.30:
++		inode.c: after fix of generic ext2 ext2_get_blocks() needed to remove bforget.
++	* integrated SMP from version 0.4.56
++	* per CPU one separate read and one separate write working area
++	* removed all external compression codecs
++	* removed "verify compression" (never helped to find a bug anyway)	
++	* Lindent'ed all source and header files
++
++01 August 2008
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* released version 0.4.55
++	* complete code cleanup
++	* changed policy to ALWAYS_LOCKING pages in do_generic_mapping_read()
++	  => completely removed PG_Compr-Flag now!	
++
++31 July 2008
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* released version 0.4.54
++	* fixes rare himem bug: only occures if page > cluster in inode.c/readpage()
++	* fixes rare readpage bug in mm/filemap.c/do_generic_mapping_read():
++		PG_Compr flags dissallow reading a page while de/compressing.
++		Setting and unsetting it requires the page lock, with one exception
++		do_generic_mapping_read() in filemap.c. This is done because of performance 
++		reasons. Anyway, a simultaneous call of do_generic_mapping_read() for the SAME
++		page might break the PG_Compr-Mimic.
++
++		Solutions: Always lock any page before reading OR second(n-th) call of
++			   do_generic_mapping_read() busy waits until first is done.
++			   Default is busy wait now, ALWAYS_LOCK implemented as option via define.
++
++25 June 2008
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* released version 0.4.53
++	* fixes himem bug: unmapped block in ext2_decompress_cluster()
++	* fixes bdev bug: ext2_get_block() must be called for every block
++	  which cause ooops because of bdev == NULL. ext2_get_block() will
++	  set the correct bdev and the correct blocknumber of the block.
++	  
++	  NEVER assign bdev manually, because the blocknumber might be random then:
++	  	"block->b_bdev = something" (DON'T!)
++		
++	  ALWAYS use:
++	  	if (!buffer_mapped(block)) || (block->b_bdev == NULL)
++			ext2_get_block()
++	  
++	  Bdev bug is closely related to file holes (empty block in a file).
++	  If compressed data will be written to a former hole, then
++	  usually ext2_get_block() must be called with create.
++	  	ext2_get_block( , , , 1 /*create*/).
++
++	* fixed missing include in xattr.h
++	* EXT2_COMPRBLK might be removed during compression if a cluster 
++	  doesn't compress. During compression we re-raise EXT2_COMPRBLK
++	  flag after every cluster now.
++	* added missing export of __pagevec_free to (mm/page_alloc.c)
++	* deny O_DIRECT access mode after open of a file using fcntl()
++	  (in fs/fcntl.c).
++	* file.c: 
++	  Replaced ext2_filew_write() to use kernels generic
++          do_sync_write(). Writing on compressed files calls 
++          ext2_filew_write():
++		- divide write range into clusters
++		- ext2_decompress_cluster (if needed)
++		- do_sync_write()
++ 		- ext2_compress_cluster (if needed)
++	* inode.c:
++	  ext2_writepage()/ext2_writepages() usually writes back
++          dirty pages of an inode. They reside in the kernels page cache.
++	  This pages might e.g. be written/dirtied by a mmap()-ped file.
++	  Also generic_file_aio_write() uses ext2_writepage() finally.
++	  I don't see how the ext2_writepage() would handle compressed 
++          files, so I re-inserted and re-wrote this part of old 2.4 code.
++ 	  Don't know if this code (USE_WRITEPAGE) is needed at all.
++          So I leave it disabled by default. Enabled it might
++          leave compressed files with compression ratio of 100%. 
++	  Don't use yet!	  
++
++17 April 2008
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* first patch for kernel 2.6.25 released
++
++20 March 2008
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* version 0.4.52: EXT2_COMPRESS_WHEN_CLU didn't work. this
++	  feature enables compression during file write.
++
++15 Oct 2007
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* First offical Sourceforge release as version 0.4.51
++	* TODO: figure out what is necessary to enable swap 
++	  suppport for e2compr again (see mm/swapfile.c).	  
++
++27 Sep 2007
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* System stalled with a lot of I/O during de-compression of
++	  USB-Sticks, too. I replaced mark_buffer_dirty 
++	  with set_buffer_dirty. This achieves that ONLY the buffers 
++	  and not the pages are marked. Then I write back the 
++	  buffers with ll_rw_block() at the end of 
++	  ext2_decompress_cluster() and ext2_decompress_pages(). 
++	  This should stop flooding the system with dirty pages. 
++	  Because now every routine waits for its newly dirtied buffers.	  
++	  My system with 128MB of RAM is responding much more better during
++	  compression/decompression now. Desompression also seems
++	  to be a bit faster. 
++	  (this change is active with: #ifndef E2C_GENERIC_OSYNC)  
++
++25 Sep 2007
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* System stalled with a lot of I/O during compression of
++	  USB-Sticks. Seems generic_osync_inode() should not be 
++	  called in ext2_compress_cluster. Therefore I replaced
++	  it with ll_rw_block() to write the modified blocks
++	  directly back to disk. This gave also a ~100% better
++	  performance for compression.
++	
++9 Sep 2007
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	* fixed bdev-bug. this bug appeared primarily when
++	  files contained holes. A page with holes, which 
++	  was dirty caused ext2_get_cluster_blocks [ext2_get_block()]
++	  to create ALL blocks of the page, even if there were holes!
++	  These allocated hole-blocks weren't set to 0 anywhere and
++	  therefore contained invalid data. I changed the
++	  code to never allocate these holes.
++	  
++	* ext2_truncate() added again to ext2_compress_cluster for
++	  uncompressed clusters. Fixes filesize errors reported by 
++	  "e2fsck -f /dev/..."	  
++
++24 Aug 2007
++	Matthias Winkler <matthiaswinkler@users.sourceforge.net>
++	
++	Major changes:	
++	* completly ported inode->i_mutex
++	
++	* clever CONFIG_GZ_HACK to reject "uncompressable" files 
++	  (according to their extension) early. The IOCTL in ioctl.c 
++	  which sets the compression on the file already rejects such 
++	  extensions now.
++	 
++	* new create_empty_buffers_e2c() was necessary, because the
++	  "extra"-pages should NOT have a valid i_mapping! Further the
++	  buffers needed to be initalized right.
++	
++	* proper block initalization (bdev-bug) in:
++	   - create_empty_buffers_e2c()
++	   - ext2_get_cluster_blocks
++	   
++	* in file.c copied:  
++	  ...with one single change at ext2_mapping_read in label page_ok:
++	  A new Page-Flag (page-flags.h) the so called "PG_compr"-Flag is 
++	  checked to assure the corresponding page is not under 
++	  compression/decompression. This was necessary because
++	  generic_mapping_read() doesn't lock() the page in ALL cases!!!
++	  Otherwise the generic_mapping_read() would have to lock EVERY page
++	  in the whole system before returning it....
++	  
++	* Fixed HiMem-Support: Balanced ALL kamp/kunmap calls. Unbalanced
++	  functions cause the system to hang at "kmap_himem()" after some 
++	  time. Can be seen with magic-sysctrl "altgr + prtscr + W".
++	
++	* ext2_decompres_cluster() didn't mark uptodate pages for writeback.
++	  Don't know how this method could EVER have worked...
++	  
++	* ext2_compress_cluster() caused an always increasing amount of dirty-pages
++	  (cat /proc/vmstat) which couldn't be wrote back by sync/umount.
++	  I think this was due the ClearPageDirty at the end of ext2_compress_cluster().
++	  
++	* introduced ext2_get_dcount() to savely determine if a file is really "open"
++	  and to abort compression/decompression in such a case. 
++	  
++	* Removed gzip completely and not working assembler code. Replaced by the
++	  kernels built-in zlib, which is pretty the same code... 
++	
++	* New kernel configuration interface
++	 
++	* Rollback of some unecessary "fixes"...
++	
++	TODO:
++	
++	* HiMem-Support:
++	  One might try to use kmap_atomic instead of kamp in ext2_readpage. kmap_atomic
++	  doesn't block and might speed up the regular page reading. might.		
++	
++20 April 2007
++	Andreas:
++		
++	* Replaced GZIP with zlib of the kernel because the assembly versions of existing
++	  compression modules crashed.
++	  
++	* Replaced gzip with the kernel zlib, which is built-in anyway
++	
++	* Initial HiMem-Support. 
++
++
++06 Mar 2007
++
++	Terry Loveall <loveall@iinet.com>
++
++	* adapted linux-2.6.10-e2compr-0.4.45-alpha0126.diff to 2.6.18.5 kernel
++
++	* replaced most instances of down/up(inode->i_sem) with 
++	  lock/unlock(inode->i_mutex). For exception see file.c, below.
++
++	* made various printk regularizations to uniquely identify each printk
++	  instance. Inserted missing KERN_DEBUG and KERN_WARNING.
++
++	* compress.c:
++	  bug fix: ext2_count_blocks: init head_bh for each iteration.
++	  bug fix: ext2_count_blocks: add set clen=ulen for uncompressable clusters.
++	  bug fix: ext2_compress_cluster: replacement and inlining of an 
++	   invalidate_inode_buffers function to keep root filesystem changes
++	   uptodate on disk (prevents umounting root file system to update).
++	  warning fix: ext2_compress_cluster: various variables initialized.
++	  ext2_compress_cluster: removed #ifdef NDEBUG
++	  bug fix: ext2_compress_cluster: defined maxclus, calculate and set for:
++	  bug fix: ext2_compress_cluster: set filesize for uncompressed clusters.
++	  ext2_cleanup_compressed_inode: changed error message to indicate 'Z'
++	   flag was caused by trying to un/compress already open file.
++	  bug fix: cp to compr dir: Truncate uncompressed files to their
++	   uncompressed length, i.e. force kernel to update inode and sb
++
++	* file.c:
++	  removed file->f_error code since f_error no longer in file struct.
++	  ext2_file_write: changed down/up i_sem to down_read/up_read i_alloc_sem
++
++	* inode.c:
++	  bug fix: ext2_get_block: restored changed: loop to bforget
++
++	* ioctl.c:
++	  ext2_ioctl: scrubbed 'B' flag on file uncompress.
++
++	* match[56]86.S:
++	  made code dependent on #ifdef CONFIG_REGPARM to compile with either
++	  register variable or stack variable parameter passing.
++
++28 Feb 2005
++
++	Yabo Ding <bobfree_cn@yahoo.com.cn>,<yding@wyse.com>
++
++	* Corrected page unlocking in inode.c.
++
++19 Feb 2005
++
++	Paul Whittaker <whitpa@users.sourceforge.net>
++
++	* Added corrections le32_to_cpu in critical areas of compress.c
++	* Optimized function exit code in inode.c.
++
++24 Aug 2004
++Yabo Ding <bobfree_cn@yahoo.com.cn>,<yding@wyse.com>
++
++  compress.c
++*  ext2_decompress_pages()
++     The old code cannot reread data from disk to a changed buffers data pointer in 2.6.x.
++     So, I copy memory data(decompressed) to a temporary buffer;
++     Then reread data(compressed) from disk, and copy to head;
++     Then copy back the memory data from temporary buffer.
++     It seems clumsy, but it works well.
++*  ext2_compress_cluster()
++     Force write to disk.
++
++  inode.c
++*  ext2_writepage()
++     Delete old code. All directly call block_write_full_page() function.
++
++* ../Kconfig
++    Change e2compr config as a submenu config
++
++04 Aug 2004
++
++Paul Whittaker <whitpa@users.sourceforge.net>
++
++* compress.c: replaced mark_buffer_dirty(x,y) with mark_buffer_dirty(x).  I'm
++  still not at all sure that this is sufficient.
++
++03 Aug 2004
++
++Paul Whittaker <whitpa@users.sourceforge.net>
++
++* ../../include/linux/ext2_fs_c.h: added missing prototypes for ext2_iLZRW3A(),
++  ext2_iLZRW3A(), ext2_rLZRW3A().
++
++02 Aug 2004
++
++Paul Whittaker <whitpa@users.sourceforge.net>
++
++* ../../mm/page_alloc.c: added EXPORT_SYMBOL(__pagevec_free).
++
++* ../../include/linux/pagemap.h, ../../mm/filemap.c: removed inline from
++  __grab_cache_page() declarations, added EXPORT_SYMBOL(__grab_cache_page).
++
++* ../../include/linux/mm.h, ../../mm/filemap.c: removed inline from
++  page_waitqueue() declarations, added EXPORT_SYMBOL(page_waitqueue).
++
++* bzip2/{lib_bzip_d,lib_bzip_e}.c, {gzip,lzo,lzrw3a,lzv1}/e2compr*.c:
++  replaced MOD_INC_USE_COUNT and MOD_DEC_USE_COUNT with try_module_get()
++  and module_put() to avoid deprecation and safety warnings.
++
++* lzrw3a/lzrw3a.c: added (UBYTE *) casts to avoid compiler warnings.
++
++* compress.c, inode.c: incorporated Yabo's changes, correcting mistakes in
++  ext2_readpages() in inode.c.
++
++* removed printks for ext2_discard_prealloc from file.c and inode.c (not
++  needed now that this problem has been resolved).
++
++2.6.5 -> 2.6.7 updates:
++
++* ../../mm/filemap.c: rewrote CONFIG_EXT2_COMPRESS hunk for 2.6.7.
++
++* compress.c, file.c: use mapping_mapped(), since mapping->i_mmap has changed
++  and mapping->i_mmap_shared no longer exists.
++
++* inode.c: page->count becomes page->_count.
+--- linux-3.4-rc5/Documentation/filesystems/e2compress.txt	1969-12-31 19:00:00.000000000 -0500
++++ linux-3.4-rc5-e2c/Documentation/filesystems/e2compress.txt	2012-04-30 04:11:03.787143100 -0400
+@@ -0,0 +1,116 @@
++Transparent compression for ext2 filesystem
++===========================================
++
++What this document is.
++----------------------
++This document is intended for explaining how e2compress has been implented/ported
++in kernel 2.4. It also give a status of current work. You need to have e2compress
++knowledge (i.e. to know how e2compress works, from a general point of view)
++
++What this document is not.
++--------------------------
++This document is not a full explaination of how e2compress work. For this,
++there are other documents such as fs/ext2/Readme.e2compr file for the technical
++point of view and user manual can be found at <http://e2compr.sourceforge.net/>.
++This site is also a place were you will find many information about e2compress
++development for kernel 2.4, tools, manuals and so on.
++
++
++Introduction
++============
++
++This is a first adaptation of e2compress for kernel 2.4. The work has been done
++by Alcatel (Alcatel Business Systems - R&D) at Illkirch. It has been started
++from the latest patch provided by Peter Moulder for kernel 2.2,
++i.e. e2compr-0.4.39-patch-2.2.18.
++It is full compatible with previous version.
++Here after you will first find some explainations about the choices mades for
++the development, and then the status of current work from functionnal point of
++view.
++
++
++Development
++===========
++
++As for previous patches, most interesting happens when reading in ext2_readpage
++and when writing in ext2_writepage and ext2_file_write.
++In fact, in 2.2 kernel, compression occures on cluster of blocks. So when reading
++or writing a part of a file, we first have to compute the cluster on which I/O
++occures, then we have to get every buffers of the cluster, uncompress the data if
++needed, then reading/writing happens "as for normal files".
++In 2.4 kernels, I/O occures through page cache: i.e. when reading/writing to a
++part of the file, first the corresponding page is get, we then get the needed
++buffers, which point to the page; this means that for keeping same work as for 2.2,
++we have to use the notion of cluster of page. For getting every buffers of a cluster,
++we first get every pages of the cluster, then get buffers of every pages...
++
++So, things happens as follow:
++
++ext2_readpage
++-------------
++If data corresponding to the page are in a compressed cluster, this functions perfoms
++more works: instead of reading one page, it reads the whole "cluster of pages".
++In fact, anyway, we have to read all compressed buffer. Once we have got all buffers
++of the cluster, uncompressed (at least a part of) the data, and located the part of
++the uncompressed data which correspond to the requested page, there is not any more 
++lot of work for also reading (i.e. doing some memcpy) other pages belonging to this
++cluster.
++So, the first reading of the first page of the cluster is quite longer, but then,
++every pages of the cluster are uptodate in the cache.
++
++ext2_writepage
++--------------
++An overhead has been added for pages belonging to a compressed cluster.
++In fact, if cluster is still compressed on the disk, we can't directly write the
++page (which contains uncompressed data) in the middle of a compressed cluster.
++So, we first have to uncompress the whole cluster on the disk, then we can write the
++new data of the dirty page(s).
++
++ext2_file_write
++---------------
++This replaces `generic_file_write' when e2compress option is activated.
++It is a copy of `generic_file_write'. The main difference is that instead of looping
++page by page in `generic_file_write', we loops on cluster of page.
++In each loop:
++	* we compute the cluster on which beginning of data (to be written) belongs to.
++	* then, we get all pages of the cluster.
++	* If cluster is a compressed one, we read all pages, and uncompress it.
++	  Otherwise, we perfoms a `prepare_write' (as in generic_file_write).
++	* We copy the data on each page from user space,
++	* Call `commit_write' on dirty pages.
++	* When reaching end of cluster, we compress it. (As in 2.2)
++
++Note: Another implentation could have been to keep generic_file_write, and add an overhead
++to `ext2_prepare_write' and `ext2_commit_write'; on the first access to a page of a compressed
++cluster, whole cluster will be uncompressed (i.e. all pages of the cluster will be read and
++uncompressed in `ext2_prepare_write') and when commiting the last page of the cluster,
++compression occures...
++
++ext2_open_file
++--------------
++In 2.4.16 kernel, this function has been added for treating the case of files opened for
++"direct IO". Direct IO is not supported on compressed file. So opening a file by this way
++is forbidden.
++
++Other places in ext2
++--------------------
++Other changes occures as in 2.2 for managing the compression flags of files and specific
++`COMPRESSED_BLK_ADDR' address for compressed blocks.
++So please, refer to existing documentation for 2.2 about this topic.
++
++Status
++======
++Today (middle of december 2001), e2compress on kernel 2.4.16 has been tested on i386
++architecture, is used with success by tens of people in the department from some weeks.
++It is full fonctionnal on ix86, full compatible with 2.2 version of e2compress.
++It should work on other architecture, but has NOT been tested.
++Please, note the following:
++	* No performance tests have been done.
++	* I don't proclaim that code is optimized (and it is probably not, but I hope that
++	  "gurus" will not find it too bad)
++So, I think I can say that there is no known "big" bug or "blocking" bug.
++
++Some strange things has been observed in very limit case, i.e. when memory is overloaded.
++
++
++As usual, this e2compress comes without warranty, use it at your won risk, etc...
+--- linux-3.4-rc5/fs/ext2/Readme.e2compr	1969-12-31 19:00:00.000000000 -0500
++++ linux-3.4-rc5-e2c/fs/ext2/Readme.e2compr	2012-04-30 04:11:03.788143096 -0400
+@@ -0,0 +1,511 @@
++
++  0. Introduction
++  ~~~~~~~~~~~~~~~
++
++This file gives some technical information on e2compr and how it's
++implemented.  
++
++More general information on e2compr can be found at
++http://e2compr.sourceforge.net/.
++
++The first couple of sections of this document are written for those
++who have no interest in the source code but just want to know enough
++to be able to predict and understand e2compr behaviour and its
++implications.
++
++Section 3 describes the e2compr-specific ext2 attributes for a file
++(i.e. chattr things).
++
++Section 4 describes the e2compr ioctls from the point of view of a
++user-mode C programmer.
++
++Section 5 gives more detail about the file format on disk.
++
++Section 6 gives details on what's written where, i.e. a map of e2compr
++code in the kernel.
++
++
++Authorship: section 2 is written mainly by Antoine; the remainder is
++written by Peter.
++
++Questions should be sent to the e2compr mailing list,
++e2compr-misc@lists.sourceforge.net, or to the current maintainers,
++bothie@users.sourceforge.net and whitpa@users.sourceforge.net.
++
++
++  1. The idea
++  ~~~~~~~~~~~
++
++See section `E2compr implementation' in the main e2compr texinfo
++documentation for an introduction to how e2compr works.  (Type
++`info "(e2compr)Implementation"' at the shell prompt.)  It was
++originally written as part of the file you're now reading.
++
++
++  2. More details
++  ~~~~~~~~~~~~~~~
++
++Every compressed file stores its cluster size in the inode structure
++(in the ext2 attribute flags field).
++This (the cluster size) is the most important information: when
++knowing the cluster size, we can convert a block number into a cluster
++number, get the cluster the block belongs to, and then get the block.
++The inode's flags field also keeps the algorithm that is used to compress data
++written to the file.
++
++(The algorithm that was used to compress a given
++cluster is stored in the cluster head near the beginning of the
++compressed data.  This may differ from the current algorithm
++identified in the inode, which is only used to determine which
++algorithm to use at the time clusters are written.)
++
++The algorithm id and the cluster size are stored in the i_flags field
++(thus reducing the number of possible flags).  We also create some new
++flags: the COMPRBLK flags tells if there is at least one compressed
++cluster in the file, the ECOMPR flag indicates that an error (related
++to compression) occurred while reading from or writing to this file.
++If it is set, the file becomes read-only.  (In previous releases, you
++were denied even read access to the file unless you set the NOCOMPR
++flag.  There might be some benefit in returning to the old behaviour
++if decompressing erroneous data can cause an OOPS, but I think it
++would be better to correct the decompressors.  Others may disagree,
++pointing out that it costs CPU time to check for incorrect data.)
++
++Beside the information stored into the inode, each cluster holds some
++data.  Here is the cluster_head structure for e2compr-0.4:
++
++struct ext2_cluster_head {
++  __u16 magic;		/* == EXT2_COMPRESS_MAGIC_04X. */
++  __u8  method;		/* compression method id. */
++  __u8  holemap_nbytes;	/* length of holemap[] array */
++  __u32 checksum;	/* adler32 checksum.  Checksum covers all fields
++			   below this one, and the compressed data. */
++  __u32 ulen;		/* size of uncompressed data */
++  __u32 clen;		/* size of compressed data (excluding cluster head) */
++  __u8  holemap[0];     /* bitmap describing where to put holes. */
++};
++
++The `magic' field is a magic number.  It is used to detect filesystem
++corruption, and can also be used for data recovery purposes.  (The
++e2compress program for e2compr-0.3 does this.)
++
++The `checksum' field contains an Adler-32 checksum on the fields below
++it in the struct and the compressed data.  Its purpose is to protect
++us from buffer overruns caused by corrupted data.
++
++The `ulen' field says how many bytes are stored in the cluster, when
++uncompressed.
++
++The `clen' field says how many bytes are held in the cluster, when
++compressed.
++
++The `method'
++field identifies the algorithm that was used to compress the cluster
++(this id will be used to uncompress the cluster, not the one stored
++into the inode that will be used only to compress a new cluster).
++
++The variable-length `holemap' array says where to put hole blocks when
++decompressing data.  The `holemap_nbytes' field gives the length of
++this array.  Iff holemap_nbytes is zero then there are no holes (other
++than at the end of the cluster, as determined by ulen versus cluster
++size).
++
++The compressed data immediately follows the holemap array (with no
++padding before it).
++
++
++Compressing a cluster is done in the following way:  We first get every
++block in the cluster and compute the bitmap.  We then compress the
++non-hole data, and store back the compressed data into the existing
++blocks.  Unused blocks are then freed.
++
++Decompressing a cluster is done in the following way:  We get the
++cluster head and retrieve the bitmap.  Missing blocks are allocated and
++put where the bitmap says, and then compressed data is decompressed and
++stored back into the blocks.
++
++
++Reading from a compressed cluster is really easy: get the blocks,
++decompress them into a working area, and get the bytes we want from
++the working area.  Writing to a compressed cluster is done by first
++decompressing the cluster, and then write to it, as if it were a
++normal file.  The file is then marked so that the cluster will be
++recompressed later.  [pjm: Do we decompress the cluster even if it's
++to be entirely written over?]
++
++In the current version, compression really occurs only when the inode
++is put (which in turn only occurs when no processes have the file
++open).  This may change.
++
++
++  3. Ext2 file attributes
++  ~~~~~~~~~~~~~~~~~~~~~~~
++
++Attribute     Lsattr  Meaning
++~~~~~~~~~     ~~~~~~  ~~~~~~~
++EXT2_SECRM_FL	   s  Secure deletion (not yet implemented)
++EXT2_UNRM_FL	   u  Undelete-able.  (Not yet implemented.)
++EXT2_COMPR_FL	   c  Future writes to this file should be compressed.
++                      (Clearing this flag decompresses the file if it
++		      is a regular file and there is space to do so;
++		      see the e2compr FAQ for details.)
++EXT2_SYNC_FL	   S  Synchronous updates.  (As far as I know, this is
++                      not yet fully implemented.)
++EXT2_IMMUTABLE_FL  i  Immutable file.
++EXT2_APPEND_FL	   a  Writes to file may only append.
++EXT2_NODUMP_FL	   d  Not a candidate for backup with dump(8).
++EXT2_NOATIME_FL    A  No access time updates.
++EXT2_DIRTY_FL	   Z  De/compression is yet to happen.  Read the
++                      source for exact meaning.
++EXT2_COMPRBLK_FL   B  File contains one or more compressed clusters.
++EXT2_NOCOMPR_FL    X  Access raw compressed data.  This isn't really
++		      supported at the moment; user-space access is
++		      yet to be worked out for 0.4.
++EXT2_ECOMPR_FL	   E  Compression error associated with this file
++EXT2_BTREE_FL      I  B-tree indexed directory (seemingly not yet implemented)
++EXT2_RESERVED_FL   -  (reserved for ext2 lib)
++
++See the chattr(1) man page for more verbose descriptions of the
++non-e2compr flags.
++
++
++  4. Ioctls available
++  ~~~~~~~~~~~~~~~~~~~
++
++  In brief
++  ~~~~~~~~
++
++Action             Ioctl                    To kernel	 From kernel
++~~~~~~             ~~~~~                    ~~~~~~~~~    ~~~~~~~~~~~
++Get cluster bit    EXT2_IOC_GETCLUSTERBIT   Cluster num  1 or 0 (cmp,uncmp)
++Recognize compressed                        Cluster num  -
++                   EXT2_IOC_RECOGNIZE_COMPRESSED
++Get algorithm      EXT2_IOC_GETCOMPRMETHOD  -		 Id
++Set algorithm      EXT2_IOC_SETCOMPRMETHOD  Id		 -
++Get cluster size   EXT2_IOC_GETCLUSTERSIZE  -		 Cluster size
++Set cluster size   EXT2_IOC_SETCLUSTERSIZE  Cluster size -
++Get attributes     EXT2_IOC_GETFLAGS	    -		 Flags
++Set attributes     EXT2_IOC_SETFLAGS	    Flags	 -
++Get block size     FIGETBSZ		    -		 Block size
++
++#include <linux/ext2_fs.h> to use any of these ioctls, except FIGETBSZ,
++which requires <linux/fs.h>.
++
++To find out what errors can be returned by these ioctls, read
++fs/ext2/ioctl.c (for all of the above ioctls except FIGETBSZ) or
++fs/ioctl.c (for FIGETBSZ).
++
++
++  Setting or testing a cluster bit
++  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++[Note: user-space access to compression details are yet to be worked out,
++so this section may not be accurate.]
++
++EXT2_IOC_GETCLUSTERBIT sets *arg to 1 if the specified cluster (0 for first
++cluster, 1 for second, etc.) is stored in compressed form.
++
++To make the kernel consider a certain cluster to be compressed (after
++you've done the compression yourself, in user space), use
++EXT2_IOC_RECOGNIZE_COMPRESSED.  This ioctl checks the validity of the
++cluster's data, then marks it as compressed (if valid).  This ioctl
++requires special priveleges, because if the compressed data is not
++valid then it may be possible to crash the system (due to buffer
++overruns).
++
++
++  Setting or getting the compression algorithm
++  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++EXT2_IOC_SETCOMPRMETHOD sets the default compression method (stored in
++the inode).  This is the compression method that is used for future
++writes.  In the current version of e2compr [accurate at 0.4.36], this
++does not cause a change to how
++existing clusters are stored, except when the compression method
++changes from `none' to something else, in which case the kernel
++attempts to compress ,all currently-uncompressed clusters` using the
++new algorithm.  It is an error to use this ioctl on a file without the
++compressed attribute.
++
++EXT2_IOC_GETCOMPRMETHOD sets *arg to the current compression method.
++
++In either case, Id is one of: EXT2_DEFER_METH, EXT2_LZV1_METH,
++EXT2_AUTO_METH, EXT2_NEVER_METH, EXT2_BZIP2_METH, EXT2_LZO1X_1_METH,
++EXT2_LZRW3A_METH (deprecated), EXT2_GZIP1_METH, EXT2_GZIP2_METH, ...,
++EXT2_GZIP9_METH.
++
++
++  Setting or getting the cluster size
++  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++EXT2_IOC_SETCLUSTERSIZE sets the cluster size to the value of *arg.
++This ioctl fails if there are already compressed clusters in the file
++(as determined by checking the EXT2_COMPRBLK_FL attribute).
++
++EXT2_IOC_GETCLUSTERSIZE sets *arg to the current cluster size.
++Surprisingly, this ioctl succeeds even if the EXT2_COMPR_FL attribute
++is clear.  (Maybe this will change in future, since the result is
++meaningless.) 
++
++In either case, the size is one of {4, 8, 16, 32}, and represents the
++number of blocks per cluster.  To convert to or from a number of
++bytes, use the FIGETBSZ ioctl.
++
++
++  Setting or getting the ext2 file attributes
++  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++These ioctls (EXT2_IOC_GETFLAGS and EXT2_IOC_SETFLAGS) are not
++e2compr-specific, but some attributes are e2compr-specific.
++
++*arg consists of the set of attributes for that file OR'ed together.
++E.g. a value of (EXT2_COMPR_FL | EXT2_COMPRBLK_FL | EXT2_NODUMP_FL)
++for a regular file means that the file contains one or more compressed
++clusters, and should not be backed up when using dump(8).
++
++See section 3 for a description of the various attributes.
++
++Note that although the compression method and cluster size are
++physically stored in the flags field on disk this information is
++masked out (i.e. set to zero) for GETFLAGS if the kernel has e2compr compiled in.
++If the kernel does not have e2compr compiled in, then this information
++is not masked out.  See section 5 for how the cluster size and
++compression method is stored if you wish to work with ,kernels without
++e2compr`.
++
++
++  Getting the block size
++  ~~~~~~~~~~~~~~~~~~~~~~
++
++This ioctl (FIGETBSZ) is not e2compr-specific, but is useful in
++interpreting a cluster size (which is specified as a number of blocks
++rather than bytes or kilobytes).
++
++*arg is set to the block size (in bytes) of the file.  For ext2 files,
++this is one of {1024,2048,4096}.  It is the same value for all files
++on the same filesystem.
++
++You must #include <linux/fs.h> to use this ioctl (unlike the rest of
++the ioctls listed here, which require <linux/ext2_fs.h>).
++
++
++  5. File format
++  ~~~~~~~~~~~~~~
++
++A note on byte ordering.  All current versions of the kernel and
++e2compr write to disk in little-endian format, so the 16-bit number
++`0x8EC7' would be written as a 0xC7 byte followed by a 0x8E byte.
++Unless you want to know the most general rule for byte ordering, you
++can skip to the `Inode' heading.
++
++In kernel 2.0, the ext2 fs is written to disk in the native byte
++ordering.  On x86 machines, this means little endian; most other
++architectures are big-endian (so the same 16-bit number would be
++written as an 0x8E byte followed by 0xC7).
++
++On kernel 2.1 and later, the ext2 fs (including e2compr data) is
++written in little-endian order regardless of the host architecture.
++
++
++  5.1. Inode
++  ~~~~~~~~~~
++
++fs/inode.c controls the reading and writing of inode information
++to/from disk; consult this file (functions ext2_read_inode(),
++ext2_update_inode() and/or ext2_write_inode()) for any detail omitted
++from this section.
++
++The physical structure of an inode is struct ext2_inode (defined in
++include/linux/ext2_fs.h).
++
++
++The i_flags member contains the ext2 file attributes, as well as
++cluster size and compression method.  
++
++The normal flags are stored in the low 23 bits.  Only the low 12 bits
++are defined at present, including 4 flags introduced by the e2compr
++patch.  See ext2_fs.h for the flag meanings (search for
++EXT2_SECRM_FL).
++
++Bits 23 through 25 hold the cluster size, or more precisely the log2 of
++the number of filesystem blocks per cluster (excluding the first cluster;
++see ext2_first_cluster_nblocks in include/linux/ext2_fs_c.h).
++
++Bits 26 through 30 store the compression method.  See the definitions
++for EXT2_LZV1_METH etc. in ext2_fs_c.h for the interpretation.
++
++Bit 31 is reserved for ext2 lib (which means that programs like e2fsck
++store things there during its operation but it isn't used by the
++kernel).
++
++
++  Data blocks
++  ~~~~~~~~~~~
++
++Uncompressed clusters are stored just as they would be without
++e2compr.  So if there are no compressed clusters then the file
++is stored identically to any other file.
++
++
++If a cluster is compressed, then the first non-hole block starts with
++a `cluster head', as defined in struct ext2_cluster_head in ext2_fs.h.
++
++The magic number (i.e. the value of the `magic' field) is 0x8ec7.
++`method' holds one of EXT2_LZV1_ID and the like.  `reserved_0'
++contains zero.  `ubitmap' describes where the uncompressed data goes.
++(Recall that when we compress a cluster, we only compress the data
++from non-hole blocks, so we need to know where the holes and non-holes
++go when we decompress the data.)  A `0' bit means a hole and a `1' bit
++means a data block; bit 0 refers to the first block, b1 the second,
++and so on.
++
++
++The block positions within the file where the compressed data is held
++is a subset of where the uncompressed data would be held.  Further, if the
++uncompressed data occupies u non-hole blocks and this compresses to c
++blocks, then the compressed data occupies the first c non-hole blocks
++of the file (and the remainder are freed).
++
++[This paragraph is an expansion of the preceeding: if you understood
++the preceeding paragraph then skip this one.]  Consider an array
++cblock[] where cblock[0] holds the block number on disk (or 0 to
++represent a hole) of the first block of a certain cluster of a file,
++cblock[1] the second, and so on.  (If you are familiar with the bmap
++array or the format of first-level indirect blocks, then cblock[] is a
++section of that array.)  Suppose that the cluster size of this file is
++16 blocks.  Suppose too that, when uncompressed, blocks 0, 1, 5 and 6
++of the cluster are holes but the other 12 blocks (2,3,4,7,8,...,15)
++contain data.  (Thus the bitmap is 0x0000ff9c.)  Now if we compress this 
++cluster to just 5 blocks, then cblock[0], [1], [5] and [6] will continue 
++to be holes, ,the positions of the compressed data blocks` are stored in 
++cblock[2], cblock[3], [4], [7] and [8], the blocks referenced by 
++cblock[9] through cblock[15] are freed, and cblock[9] through cblock[15] 
++are set to zero.
++
++
++  6. What's coded where
++  ~~~~~~~~~~~~~~~~~~~~~
++
++File names in this section are relative to linux/fs/ext2, except for
++ext2_fs.h which is in linux/include/linux.
++
++Most of the action happens in compress.c; though note that a few
++small, commonly-used routines are written as inline functions in
++ext2_fs.h.
++
++ext2_readpage() and ext2_mmap() are in file.c.  ext2_file_write() is
++also there.
++
++Routines to read/write the inode from/to disk are in inode.c.
++
++super.c contains some e2compr initialisation code (such as allocating
++the e2compr work area).
++
++All ioctl handling is in ioctl.c.
++
++acl.c is where we deny open() access in a couple of situations (if the
++EXT2_NOCOMPR_FL is set and another process has the file open; and we
++deny write access to a file with EXT2_ECOMPR_FL set).
++
++ialloc.c contains code in ext2_new_inode() for newly-created files to
++inherit compression attributes from the directory in which they're
++created.
++
++truncate.c handles truncation, i.e. zeroing any part of the cluster
++bitmap that's been truncated, and decompressing the final cluster (but
++marking dirty so that we try to recompress it on file close) if the
++new size is part-way through a compressed cluster, so that zeroing
++over the truncated data works.
++
++linux/include/linux/ext2_fs_i.h has the definition of the
++ext2-specific parts of the in-memory inode.  (The on-disk inode is
++defined in ext2_fs.h.)
++
++linux/mm/filemap.c is also interesting, though there's no
++e2compr-specific code there.  Similarly linux/include/linux/mm.h and
++linux/include/linux/fs.h.
++
++generic_readpage() is in linux/fs/buffer.c.  Also all buffer handling.
++
++
++The cleanup scheme
++~~~~~~~~~~~~~~~~~~
++
++inode->u.ext2_i.i_compr_flags has only a single bit defined:
++EXT2_CLEANUP_FL.  This bit gets set to 1 to indicate that
++ext2_cleanup_compressed_inode() needs to be called.  
++
++There is a related flag stored on disk as well as in memory:
++EXT2_DIRTY_FL of i_flags.  If ext2_cleanup_compressed_inode() couldn't
++finish it's job (e.g. due to I/O error) then it clears EXT2_CLEANUP_FL
++of i_compr_flags, but leaves EXT2_DIRTY_FL high.
++
++In ext2_read_inode(), if EXT2_DIRTY_FL is high then EXT2_CLEANUP_FL is
++raised, in the hope that ,whatever was preventing
++ext2_cleanup_compressed_inode() from finishing` is now past.
++
++Except for ext2_read_inode() as noted above, everything that raises
++EXT2_CLEANUP_FL (i.e. ext2_write_file(), ext2_ioctl() and
++ext2_truncate()) also raises EXT2_DIRTY_FL.
++
++Nothing lowers either EXT2_CLEANUP_FL or EXT2_DIRTY_FL except
++ext2_cleanup_compressed_inode() (and one or both of new_inode and
++delete_inode routines).
++
++
++One feels that at least one of these cleanup flags ought to
++disappear.  The main use of the persistent EXT2_DIRTY_FL is where the
++user does `chattr -c' in order to decompress the file, but there isn't
++enough space on the device to do this.  We can get rid of this problem
++by having ext2_ioctl() call ext2_cleanup_compressed_inode()
++try to
++
++
++Notes on a few variables
++~~~~~~~~~~~~~~~~~~~~~~~~
++
++Don't confuse the inode->i_dirt flag with (inode->u.ext2_i.i_flags &
++EXT2_DIRTY_FL).  See section `The cleanup scheme' above for a
++description of EXT2_DIRTY_FL.
++
++
++inode->u.ext2_i.i_clu_nblocks,
++inode->u.ext2_i.i_log2_clu_nblocks:
++
++i_clu_nblocks is always equal to ,1 << i_clu_nblocks` (except during a
++couple of cycles while they're being changed; I haven't consciously
++tried to avoid problems for SMP machines in this respect).
++
++i_clu_nblocks is the number of blocks per cluster for this inode.
++
++Old information: these variables were previously called
++`i_cluster_bits' and `i_cluster_size'.  They were in an array:
++
++inode->u.ext2_i.i_cluster_bits[2], 
++inode->u.ext2_i.i_cluster_size[2]: 
++
++I believe the reason these were declared as an array was for the case
++where someone changes the cluster size of a file that was already
++compressed.  (Reason for this belief: All readers of these fields use
++[0].  On creation (ialloc), read_inode, and `chattr +c' (where
++previously uncompressed), both [0] and [1] are updated.  On change
++(IOC_SET_CLUSTERSIZE), only [0] is updated.)  Since ,changing cluster
++size of an already-compressed file` isn't implemented, I've renamed
++them and made them scalars rather than arrays.
++
++
++inode->u.ext2_i.i_flags: When the e2compr patch is applied, this
++variable only holds the low 24 bits of the on-disk i_flags field.
++(Without the e2compr patch applied, all 32 bits are available.  An
++interesting side effect of this is that user programs can access the
++compression algorithm and cluster size on kernels without e2compr
++patch by using the EXT2_IOC_GETFLAGS, EXT2_IOC_SETFLAGS ioctls.)
++
++
++inode->u.ext2_i.i_compr_method: Holds the compression method
++identifier.  Starting from e2compr-0.4.0, this is different from an
++algorithm identifier: an example of a method is gzip9; the
++corresponding algorithm is gzip.  See compress.c for where
++ext2_method_table and ext2_algorithm_table are defined.  ext2_fs.h has
++some enumerations for addressing these tables (search for
++`EXT2_NONE_METH' and `EXT2_NONE_ALG').
+--- linux-3.4-rc5/fs/Kconfig	2012-04-29 18:19:10.000000000 -0400
++++ linux-3.4-rc5-e2c/fs/Kconfig	2012-04-30 04:11:03.788143096 -0400
+@@ -11,6 +11,126 @@ config DCACHE_WORD_ACCESS
+ if BLOCK
+ 
+ source "fs/ext2/Kconfig"
++
++config EXT2_COMPRESS
++	bool "Ext2 file compression (DANGEROUS)"
++	depends on EXT2_FS && EXPERIMENTAL
++	select CRYPTO
++	select CRYPTO_ALGAPI
++	select CRYPTO_DEFLATE
++	select ZLIB_INFLATE
++	select ZLIB_DEFLATE
++	help
++	  Ext2 file compression allows transparent compression of files on an
++	  ext2 filesystem.  Transparent compression means that files are
++	  stored on the disk in a compressed format but they are automatically
++	  decompressed as they are read in and compressed when written out.
++	  The user is in control of how and which files are compressed, using
++	  the `chattr' utility (see chattr(1)).  For the sake of safety,
++	  administrative data (superblock, inodes, directories, etc.) are not
++	  compressed.
++
++	  Compression is very useful if you're short on disk space, and
++	  provides a better option than having lots of .gz files around.
++	  For more information, see <http://e2compr.sourceforge.net/>.
++
++	  You _need_ to have the special e2compr version of e2fsck to be able
++	  to make use of this.
++
++	  If you say Y, you will be asked which compression algorithms you wish
++	  to include.  Gzip is a good all-round algorithm, as its 1..9 parameter
++	  allows a good range of speed/compression trade-off.  Other noteworthy
++	  algorithms are LZV, which caters better to the faster/less compressing
++	  end of the scale, and bzip, which caters slightly better to the more
++	  compressing but slower end of the scale.
++
++	  Ext2 compression is still experimental, so unless you know you need
++	  it, you'd better say N.
++
++menu "Ext2 file compression options"
++	depends on EXT2_COMPRESS
++
++choice
++	#depends on EXT2_DEFAULT_COMPR_METHOD_GZIP
++	prompt "Gzip parameter for default compression method"
++	default EXT2_DEFAULT_COMPR_METHOD_GZIP8
++	help
++	  You have selected `gzip' as your default compression algorithm, but
++	  I need to know whether to use `gzip -1', `gzip -9', or somewhere
++	  in between.  gzip1 is the least compressing but fastest; gzip9 is the
++	  most compressing and slowest; and the numbers in between have
++	  characteristics in between (though not on a linear scale).
++	  If unsure, say `8'.
++
++config EXT2_DEFAULT_COMPR_METHOD_GZIP1
++	bool "1"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP2
++	bool "2"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP3
++	bool "3"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP4
++	bool "4"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP5
++	bool "5"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP6
++	bool "6"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP7
++	bool "7"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP8
++	bool "8"
++config EXT2_DEFAULT_COMPR_METHOD_GZIP9
++	bool "9"
++
++endchoice
++
++config GZ_HACK
++	bool "Exclude .gz files from automatic compression"
++	depends on EXT2_COMPRESS
++	default y
++	help
++	  If you say Y here, then files created with names ending in `.gz' or
++	  `.?gz' or `.bz2' don't inherit the `c' ("compress") attribute from
++	  their parent directory.  (However, you can still do `chattr +c FILE'
++	  if you want to try to compress it anyway.)  This means that you
++	  don't waste CPU time trying to compress a file that probably can't
++	  be compressed.  See fs/ext2/namei.c if you want to add other rules.
++	  If you have any aesthetic sensibilities then you will say N here
++	  and try to implement something better.  Most people will say Y here.
++
++
++choice
++	depends on EXT2_COMPRESS
++        prompt "Default cluster size (in blocks, usually 1KB each)"
++        default EXT2_DEFAULT_CLUSTER_BITS_5
++	help
++	  To make random access to compressed files reasonably fast the files
++	  are compressed in clusters.  By default, the clusters will be of the
++	  size defined here but there is a modified version of the chattr
++	  utility that can set the cluster size for each file independently.
++	  Large clusters usually result in better compression at the cost of
++	  being slower.
++
++	  Note that the answer to this question is specified in filesystem
++	  blocks rather than in kilobytes, though most filesystems have 1KB
++	  blocks anyway.  (If you have a filesystem with large blocks then
++	  you should know it, but if you want to check then "tune2fs -l
++	  /dev/xxx | grep size".)  The default is 32 blocks which is the
++	  slowest setting but gives the best compression.
++
++config EXT2_DEFAULT_CLUSTER_BITS_2
++	bool "4"
++config EXT2_DEFAULT_CLUSTER_BITS_3
++	bool "8"
++config EXT2_DEFAULT_CLUSTER_BITS_4
++	bool "16"
++config EXT2_DEFAULT_CLUSTER_BITS_5
++	bool "32"
++
++endchoice
++
++endmenu
++
++
+ source "fs/ext3/Kconfig"
+ source "fs/ext4/Kconfig"
+ 
+--- linux-3.4-rc5/include/linux/ext2_fs_c.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-3.4-rc5-e2c/include/linux/ext2_fs_c.h	2012-05-03 22:14:10.473000559 -0400
+@@ -0,0 +1,498 @@
++/*
++ *  Copyright (C) 2001 Alcatel Business Systems - R&D Illkirch
++ *  	(transparent compression code)
++ *  Pierre Peiffer (pierre.peiffer@sxb.bsf.alcatel.fr) - Denis Richard (denis.richard@sxb.bsf.alcatel.fr)
++ *  Adapted from patch e2compr-0.4.39-patch-2.2.18 .
++ */
++
++#ifndef EXT2_FS_C_H
++#define EXT2_FS_C_H
++
++#include <linux/ioctl.h>
++#include <linux/types.h>
++#include <linux/ext2_fs.h>
++#include "../../fs/ext2/ext2.h"
++
++/* EXT2_COMPR_DEBUG enables:
++ * - all assertions
++ * - adler checksum checking
++ */
++//#undef  EXT2_COMPR_DEBUG
++#define EXT2_COMPR_DEBUG
++
++#ifdef EXT2_COMPR_DEBUG
++#  define assert(expr) \
++	if(unlikely(!(expr))) {                                 \
++		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
++#expr, __FILE__, __func__, __LINE__);                   \
++	}
++#else
++#  define assert(expr) do {} while (0)
++#endif
++
++
++/* proof get_cpu and put_cpu correctness by calling might_sleep() or mabye schedule().
++   this will check if we are atomic */
++#ifdef EXT2_COMPR_DEBUG
++#define CHECK_NOT_ATOMIC  	assert(! in_atomic());//might_sleep();  
++#else 
++#define CHECK_NOT_ATOMIC  	
++#endif
++
++
++#undef  EXT2_COMPR_REPORT
++//#define EXT2_COMPR_REPORT
++//#define EXT2_COMPR_REPORT_VERBOSE
++//#define EXT2_COMPR_REPORT_PUT
++//# define EXT2_COMPR_REPORT_FILEOPEN
++//#define EXT2_COMPR_REPORT_MUTEX
++
++#ifdef  EXT2_COMPR_REPORT
++//# define EXT2_COMPR_REPORT_PUT
++//# define EXT2_COMPR_REPORT_WA
++//# define EXT2_COMPR_REPORT_MUTEX
++//# define EXT2_COMPR_REPORT_ALLOC  /* disk allocation etc. */
++//# define EXT2_COMPR_REPORT_ALGORITHMS /* Compression algorithms */
++//# define EXT2_COMPR_REPORT_VERBOSE /* Various things I don't think
++//				      useful at the moment. */
++//#define EXT2_COMPR_REPORT_VERBOSE_INODE
++#endif
++
++
++#ifdef EXT2_COMPR_DEBUG
++#define E2COMPR_VERSION "ext2-compression: e2c-0.4.59-smp-debug (1 May 2012) for kernel 3.4"
++#else  
++#define E2COMPR_VERSION "ext2-compression: e2c-0.4.59-smp-release (1 May 2012) for kernel 3.4"
++#endif
++
++#define EXT2_IOC_GETCLUSTERSIZE 	_IOR('c', 0, long)
++#define EXT2_IOC_SETCLUSTERSIZE 	_IOW('c', 0, long)
++#define EXT2_IOC_GETCOMPRMETHOD 	_IOR('c', 1, long)
++#define EXT2_IOC_SETCOMPRMETHOD 	_IOW('c', 1, long)
++#define EXT2_IOC_GETFIRSTCLUSTERSIZE 	_IOR('c', 2, long)
++#define EXT2_IOC_RECOGNIZE_COMPRESSED	_IOW('c', 2, long)
++#define EXT2_IOC_GETCLUSTERBIT		_IOR('c', 3, long)
++#define EXT2_IOC_GETCOMPRRATIO		_IOR('c', 4, long)
++/* Don't use _IOW('c', {5,6}, long), as these are used by old
++   e2compress binaries as SETCLUSTERBIT and CLRCLUSTERBIT
++   respectively. */
++
++/* EXT2_xxxx_ALG is an index into ext2_algorithm_table[] defined in
++   fs/ext2/compress.c. */
++/* N.B. Don't change these without also changing the table in
++   compress.c.  Be careful not to break binary compatibility.
++   (EXT2_NONE_ALG and EXT2_UNDEF_ALG are safe from binary
++   compatibility problems, though, so they can safely be renumbered --
++   and indeed probably should be if you do add another algorithm.) */
++#define EXT2_LZV1_ALG 0
++#define EXT2_LZRW3A_ALG 1
++#define EXT2_GZIP_ALG 2
++#define EXT2_BZIP2_ALG 3
++#define EXT2_LZO_ALG 4
++#define EXT2_NONE_ALG 5
++#define EXT2_UNDEF_ALG 6
++#define EXT2_N_ALGORITHMS 5 /* Count of "real" algorithms.  Excludes
++			       `none' and `undef'. */
++
++/* EXT2_xxxx_METH is an index into ext2_method_table[] defined in
++   fs/ext2/compress.c. */
++/* N.B. Don't change these without also changing the table in
++   compress.c. */
++#define EXT2_LZV1_METH 0
++#define EXT2_AUTO_METH 1
++#define EXT2_DEFER_METH 2
++#define EXT2_NEVER_METH 3
++#define EXT2_BZIP2_METH 4
++#define EXT2_LZRW3A_METH 8
++#define EXT2_LZO1X_1_METH 10
++#define EXT2_GZIP_1_METH 16
++#define EXT2_GZIP_2_METH 17
++#define EXT2_GZIP_3_METH 18
++#define EXT2_GZIP_4_METH 19
++#define EXT2_GZIP_5_METH 20
++#define EXT2_GZIP_6_METH 21
++#define EXT2_GZIP_7_METH 22
++#define EXT2_GZIP_8_METH 23
++#define EXT2_GZIP_9_METH 24
++
++#define EXT2_N_METHODS 32 /* Don't change this unless you know what
++			      you're doing.  In particular, it's tied
++			      to the width of the algorithm field
++			      in i_flags.*/
++
++/* Note: EXT2_N_ALGORITHMS can't be increased beyond 16 without
++   changing the width of the s_algorithms_used field in the in-memory
++   superblock.  The on-disk s_algorithms_used field is 32 bits long.
++   (This is in a state of flux.  Currently (1998-02-05) there is no
++   distinction: we always use the s_es copy. */
++
++
++#define EXT2_MAX_CLUSTER_BYTES		(32*1024)
++#define EXT2_LOG2_MAX_CLUSTER_BYTES	(5 + 10)
++
++#define EXT2_COMPRESS_MAGIC_04X	0x9ec7
++#define EXT2_MAX_CLUSTER_BLOCKS	32
++#define EXT2_MAX_CLUSTER_PAGES		EXT2_MAX_CLUSTER_BYTES >> PAGE_CACHE_SHIFT
++#define EXT2_ECOMPR			EIO
++/* A cluster is considered compressed iff the block number for the
++   last block of that cluster is EXT2_COMPRESSED_BLKADDR.  If this
++   changes then check if there's anywhere that needs a cpu_to_le32()
++   conversion. */
++#define EXT2_COMPRESSED_BLKADDR	0xffffffff 
++
++/* I like these names better. */
++#define EXT2_MAX_CLU_NBYTES EXT2_MAX_CLUSTER_BYTES
++#define EXT2_LOG2_MAX_CLU_NBYTES EXT2_LOG2_MAX_CLUSTER_BYTES
++#define EXT2_MAX_CLU_NBLOCKS EXT2_MAX_CLUSTER_BLOCKS
++
++
++#ifndef __KERNEL__
++
++/* Cluster head on disk, for e2compr versions before 0.4.0.  I'm
++   leaving this here so tht as I may make e2compress able to read
++   old-style e2compr files. */
++struct ext2_cluster_head_03x {
++  __u16 magic;			/* == EXT2_COMPRESS_MAGIC_03X */
++  __u16 len;			/* size of uncompressed data */
++  __u16 compr_len;		/* size of compressed data */
++  __u8  method;			/* compress method */
++  __u8  reserved_0;
++  __u32 bitmap;			/* block bitmap */
++  __u32 reserved_2;		/* 0 or adler32 checksum of
++				   _compressed_ data */
++};
++# define EXT2_COMPRESS_MAGIC_03X	0x8ec7 /* Head magic number
++						  for e2compr versions
++						  before 0.4.0. */
++#endif /* !__KERNEL__ */
++
++
++#ifdef __KERNEL__
++# ifdef  CONFIG_EXT2_COMPRESS
++
++//mw
++#define CONFIG_EXT2_HAVE_GZIP
++
++/* If defined, compress each cluster as soon as we get to the end of a
++   whole cluster, when writing.  (If undefined, we wait until
++   ext2_release_file() or the like.) */
++#define EXT2_COMPRESS_WHEN_CLU
++
++#  ifdef CONFIG_EXT2_DEFAULT_COMPR_METHOD_DEFER
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_DEFER_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_BZIP2)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_BZIP2_METH
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_LZO1X_1_ME
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_LZO)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_LZO1X_1_METH
++#   ifndef CONFIG_EXT2_HAVE_LZO
++#    error "Default algorithm (lzo) is not compiled in."
++#   endif
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_LZV1)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_LZV1_METH
++#   ifndef CONFIG_EXT2_HAVE_LZV1
++#    error "Default algorithm (lzv1) is not compiled in."
++#   endif
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_LZRW3A)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_LZRW3A_METH
++#   ifndef CONFIG_EXT2_HAVE_LZRW3A
++#    error "Default algorithm (lzrw3a) is not compiled in."
++#   endif
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP1)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_1_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP2)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_2_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP3)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_3_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP4)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_4_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP5)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_5_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP6)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_6_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP7)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_7_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP8)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_8_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_GZIP9)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_GZIP_9_METH
++#  elif defined (CONFIG_EXT2_DEFAULT_COMPR_METHOD_BZIP2)
++#   define EXT2_DEFAULT_COMPR_METHOD	EXT2_BZIP2_METH
++#   ifndef CONFIG_EXT2_HAVE_BZIP2
++#    error "Default algorithm (bzip2) is not compiled in."
++#   endif
++#  else
++#   error "No default compression algorithm."
++#  endif
++#  if EXT2_DEFAULT_COMPR_METHOD >= EXT2_GZIP_1_METH && EXT2_DEFAULT_COMPR_METHOD <= EXT2_GZIP_9_METH
++#   ifndef CONFIG_EXT2_HAVE_GZIP
++#    error "Default algorithm (gzip) is not compiled in."
++#   endif
++#  endif
++
++#  if defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_2)
++#   define EXT2_DEFAULT_LOG2_CLU_NBLOCKS	2
++#  elif defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_3)
++#   define EXT2_DEFAULT_LOG2_CLU_NBLOCKS	3
++#  elif defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_4)
++#   define EXT2_DEFAULT_LOG2_CLU_NBLOCKS	4
++#  elif defined (CONFIG_EXT2_DEFAULT_CLUSTER_BITS_5)
++#   define EXT2_DEFAULT_LOG2_CLU_NBLOCKS	5
++#  else
++#   error "No default cluster size."
++#  endif
++
++#  define EXT2_DEFAULT_CLU_NBLOCKS	(1 << EXT2_DEFAULT_LOG2_CLU_NBLOCKS)
++
++#  if (EXT2_LZV1_ALG != 0) || (EXT2_BZIP2_ALG != 3) || (EXT2_LZO_ALG != 4) || (EXT2_N_ALGORITHMS != 5)
++#   error "this code needs changing; but then, you shouldn't be messing with algorithm ids anyway unless you are very careful to protect disk format compatibility"
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_LZV1
++#   define _ext2_lzv1_builtin (1 << EXT2_LZV1_ALG)
++#  else
++#   define _ext2_lzv1_builtin 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_LZRW3A
++#   define _ext2_lzrw3a_builtin (1 << EXT2_LZRW3A_ALG)
++#  else
++#   define _ext2_lzrw3a_builtin 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_GZIP
++#   define _ext2_gzip_builtin (1 << EXT2_GZIP_ALG)
++#  else
++#   define _ext2_gzip_builtin 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_BZIP2
++#   define _ext2_bzip2_builtin (1 << EXT2_BZIP2_ALG)
++#  else
++#   define _ext2_bzip2_builtin 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_LZO
++#   define _ext2_lzo_builtin (1 << EXT2_LZO_ALG)
++#  else
++#   define _ext2_lzo_builtin 0
++#  endif
++
++#  ifdef CONFIG_EXT2_HAVE_LZV1_MODULE
++#   define _ext2_lzv1_module (1 << EXT2_LZV1_ALG)
++#  else
++#   define _ext2_lzv1_module 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_LZRW3A_MODULE
++#   define _ext2_lzrw3a_module (1 << EXT2_LZRW3A_ALG)
++#  else
++#   define _ext2_lzrw3a_module 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_GZIP_MODULE
++#   define _ext2_gzip_module (1 << EXT2_GZIP_ALG)
++#  else
++#   define _ext2_gzip_module 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_BZIP2_MODULE
++#   define _ext2_bzip2_module (1 << EXT2_BZIP2_ALG)
++#  else
++#   define _ext2_bzip2_module 0
++#  endif
++#  ifdef CONFIG_EXT2_HAVE_LZO_MODULE
++#   define _ext2_lzo_module (1 << EXT2_LZO_ALG)
++#  else
++#   define _ext2_lzo_module 0
++#  endif
++
++#  define EXT2_ALGORITHMS_MODULE  (_ext2_lzv1_module | _ext2_lzrw3a_module | _ext2_gzip_module | _ext2_bzip2_module | _ext2_lzo_module)
++#  define EXT2_ALGORITHMS_BUILTIN  (_ext2_lzv1_builtin | _ext2_lzrw3a_builtin | _ext2_gzip_builtin | _ext2_bzip2_builtin | _ext2_lzo_builtin)
++
++#  if EXT2_ALGORITHMS_MODULE & EXT2_ALGORITHMS_BUILTIN
++#   error "Arithmetic error?  Some algorithm appears to be both built-in and a module."
++#  endif
++
++/* EXT2_ALGORITHMS_SUPP is what we test when mounting a filesystem.
++   See fs/ext2/super.c. */
++#  define EXT2_ALGORITHMS_SUPP (EXT2_ALGORITHMS_MODULE | EXT2_ALGORITHMS_BUILTIN)
++#  if EXT2_ALGORITHMS_SUPP == 0
++#   error "You must select at least one compression algorithm."
++#  endif
++
++/* Cluster head on disk.  Little-endian. */
++struct ext2_cluster_head {
++  __u16 magic;		/* == EXT2_COMPRESS_MAGIC_04X. */
++  __u8  method;		/* compression method id. */
++  __u8  holemap_nbytes;	/* length of holemap[] array */
++  __u32 checksum;	/* adler32 checksum.  Checksum covers all fields
++			   below this one, and the compressed data. */
++  __u32 ulen;		/* size of uncompressed data */
++  __u32 clen;		/* size of compressed data (excluding cluster head) */
++  __u8  holemap[0];     /* bitmap describing where to put holes. */
++};
++
++
++struct ext2_wa_S {
++  __u8 u[EXT2_MAX_CLUSTER_BYTES];  /* Uncompressed data. */
++  __u8 c[EXT2_MAX_CLUSTER_BYTES];  /* Compressed data. */
++  __u8 heap[1];  /* Heap: working space for de/compression routines. */
++};
++
++#  define EXT2_CLEANUP_FL		0x40 /* See Readme.e2compr */
++#  define EXT2_OSYNC_INODE		0x20 /* sync of inode running	*/
++#  define ROUNDUP_DIV(_n, _d) ((_n) ? 1 + (((_n) - 1) / (_d)) : 0)
++#  define ROUNDUP_RSHIFT(_n, _b) ((_n) ? 1 + (((_n) - 1) >> (_b)) : 0)
++
++#  if defined(EXT2_NDIR_BLOCKS) && (EXT2_NDIR_BLOCKS != 12)
++#   error "e2compr currently assumes that EXT2_NDIR_BLOCKS is 12."
++/* If EXT2_NDIR_BLOCKS changes then change the definitions of
++   ext2_first_cluster_nblocks() and friends, and search the patch for
++   anywhere where 12 is hard-coded.  (At the time of writing, it's
++   only hard-coded in ext2_first_cluster_nblocks().)  What we want to
++   achieve is for clusters not to straddle address blocks.  Apart from
++   performance, some code in compress.c (search for `straddle')
++   assumes this. */
++#  endif
++
++#  include <linux/fs.h>
++
++#  define EXT2_ALG_INIT_COMPRESS  	1
++#  define EXT2_ALG_INIT_DECOMPRESS	2
++
++extern int    ext2_get_cluster_pages (struct inode*, u32, struct page**, struct page *, int);
++extern int    ext2_get_cluster_extra_pages (struct inode*, u32, struct page**, struct page**);
++extern int    ext2_kmap_cluster_pages   (struct page *, struct page**, struct page**);
++extern int    ext2_kunmap_cluster_pages (struct page *, struct page**, struct page**);
++extern int    ext2_get_cluster_blocks (struct inode*, u32, struct buffer_head**, struct page**, struct page**, int);
++extern int    ext2_decompress_cluster (struct inode*, u32);
++extern int    ext2_decompress_pages(struct inode*, u32, struct page**);
++extern int    ext2_compress_cluster (struct inode*, u32);
++extern int    ext2_decompress_inode (struct inode*);
++extern int    ext2_cleanup_compressed_inode (struct inode*);
++extern void   ext2_update_comprblk (struct inode *);
++extern int    ext2_get_dcount(struct inode *inode);
++
++extern size_t ext2_decompress_blocks    (struct inode*, struct buffer_head**, int, size_t, u32 cluster);
++extern int    ext2_count_blocks		(struct inode*);
++extern int    ext2_recognize_compressed (struct inode *, unsigned cluster);
++extern unsigned long ext2_adler32	(unsigned long, unsigned char*, int);
++
++extern size_t ext2_iLZV1   (int);
++extern size_t ext2_iLZV2   (int);
++extern size_t ext2_iNONE   (int);
++extern size_t ext2_iGZIP   (int);
++extern size_t ext2_iBZIP2  (int);
++extern size_t ext2_iLZO    (int);
++extern size_t ext2_iLZRW3A (int);
++extern size_t ext2_iZLIB   (int);
++
++extern size_t ext2_wLZV1   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_wLZV2   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_wNONE   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_wGZIP   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_wBZIP2  (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_wLZO    (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_wLZRW3A (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_wZLIB   (__u8*, __u8*, void*, size_t, size_t, int);
++
++extern size_t ext2_rLZV1   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_rLZV2   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_rNONE   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_rGZIP   (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_rBZIP2  (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_rLZO    (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_rLZRW3A (__u8*, __u8*, void*, size_t, size_t, int);
++extern size_t ext2_rZLIB   (__u8*, __u8*, void*, size_t, size_t, int);
++
++struct ext2_algorithm { 
++	char	*name; 
++	int	avail;
++	size_t (*init)		(int); 
++	size_t (*compress)	(__u8*, __u8*, void*, size_t, size_t, int); 
++	size_t (*decompress)	(__u8*, __u8*, void*, size_t, size_t, int);
++};
++
++struct ext2_method {
++	unsigned  alg;
++	int	  xarg;
++};
++
++
++#  define ext2_first_cluster_nblocks(_i) ((EXT2_I(_i))->i_clu_nblocks > 4 && (_i)->i_sb->s_blocksize < 4096 ? 12 : 4)
++#  define ext2_block_to_cluster(_i,_b)	((_b) < ext2_first_cluster_nblocks(_i) ? 0 : (((_b) - ext2_first_cluster_nblocks(_i)) >> (EXT2_I(_i))->i_log2_clu_nblocks) + 1)
++#  define ext2_offset_to_cluster(_i,_o)	ext2_block_to_cluster((_i), ((_o) >> (_i)->i_sb->s_blocksize_bits))
++#  define ext2_n_clusters(_i)	((_i)->i_size ? ext2_offset_to_cluster((_i), (_i)->i_size - 1) + 1 : 0)
++#  define ext2_cluster_block0(_i,_c)	((_c) ? ext2_first_cluster_nblocks(_i) + (((_c) - 1) << (EXT2_I(_i))->i_log2_clu_nblocks) : 0)
++#  define ext2_cluster_nblocks(_i,_c)	((_c) ? (EXT2_I(_i))->i_clu_nblocks : ext2_first_cluster_nblocks(_i))
++#  define ext2_cluster_offset(_i,_c)	((_c) ? ext2_cluster_block0((_i), (_c)) << (_i)->i_sb->s_blocksize_bits : 0)
++
++#  define ext2_first_cluster_npages(_i) ((EXT2_I(_i))->i_clu_nblocks > 4 && (_i)->i_sb->s_blocksize < 4096 ? 12 >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits) : 4 >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits))
++#  define ext2_page_to_cluster(_i,_p)	((_p) < ext2_first_cluster_npages(_i) ? 0 : (((_p) - ext2_first_cluster_npages(_i)) >> (((EXT2_I(_i))->i_log2_clu_nblocks)+(_i)->i_sb->s_blocksize_bits-PAGE_CACHE_SHIFT)) + 1)
++#  define ext2_cluster_page0(_i,_c)	((_c) ? ext2_cluster_block0(_i, _c) >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits) : 0)
++#  define ext2_cluster_npages(_i,_c)	((_c) ? (EXT2_I(_i))->i_clu_nblocks >> (PAGE_CACHE_SHIFT - (_i)->i_sb->s_blocksize_bits) : ext2_first_cluster_npages(_i))
++
++static inline int
++ext2_offset_is_clu_boundary(struct inode *inode, u32 off)
++{
++	if (off & (inode->i_sb->s_blocksize - 1))
++		return 0;
++	if (off == 0)
++		return 1;
++	off >>= inode->i_sb->s_blocksize_bits;
++	if (off < ext2_first_cluster_nblocks(inode))
++		return 0;
++	off -= ext2_first_cluster_nblocks(inode);
++	return !(off & (EXT2_I(inode)->i_clu_nblocks - 1));
++}
++
++struct ext2_wa_contents_S {
++	ino_t ino;
++	dev_t dev;
++	unsigned cluster;
++};
++
++DECLARE_PER_CPU(struct ext2_wa_S *, ext2_rd_wa);
++DECLARE_PER_CPU(struct ext2_wa_S *, ext2_wr_wa);
++
++extern void ext2_alloc_rd_wa(void);
++extern void ext2_alloc_wr_wa(void);
++
++extern struct ext2_algorithm ext2_algorithm_table[];
++extern struct ext2_method ext2_method_table[]; /*mw: is static so far, no writes*/
++
++/* Both of these return -errno if error, 0 if not compressed, positive
++   if compressed.  (You should use the macro unless you've already
++   tested COMPRBLK.) */
++extern int ext2_cluster_is_compressed_fn (struct inode *inode, __u32 cluster);
++static inline int ext2_cluster_is_compressed (struct inode *inode, __u32 cluster)
++{
++	if ((EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL) == 0)
++		return 0;
++	return ext2_cluster_is_compressed_fn (inode, cluster);
++}
++extern unsigned ext2_calc_free_ix (unsigned , u8 const *, unsigned );
++extern int ext2_unpack_blkaddrs(struct inode *, struct buffer_head **, int, unsigned , u8 const *, unsigned , unsigned , unsigned , unsigned );
++
++#  define HOLE_BLKADDR(_b) \
++	(((_b) == 0) \
++	 || ((_b) == EXT2_COMPRESSED_BLKADDR))
++# else /* !CONFIG_EXT2_COMPRESS */
++#  define HOLE_BLKADDR(_b) ((_b) == 0)
++# endif
++
++/* For some reason or other, I see code like `if (le32_to_cpu(tmp) !=
++   0)' around in the kernel.  So far I haven't checked whether or not
++   the compiler knows that the swab can be dropped. */
++# if defined(EXT2_COMPRESSED_BLKADDR) && EXT2_COMPRESSED_BLKADDR != 0xffffffff
++/* This may be a false positive; the "correct" test would be `if
++   defined(CONFIG_EXT2_COMPRESS)', but if this test does succeed, then
++   there is at least cause to have a look around. */
++#  error "Next bit of code is wrong."
++# endif
++
++# define HOLE_BLKADDR_SWAB32(_b) HOLE_BLKADDR(_b)
++
++#ifdef EXT2_COMPR_REPORT
++#define trace_e2c(format, args...) printk(KERN_DEBUG format, ## args)
++#else
++#define trace_e2c(format, args...) do {} while(0)
++#endif
++
++#endif /* __KERNEL__ */
++
++
++#endif /* EXT2_FS_C_H */
+--- linux-3.4-rc5/fs/ext2/Makefile	2012-04-29 18:19:10.000000000 -0400
++++ linux-3.4-rc5-e2c/fs/ext2/Makefile	2012-04-30 04:11:03.790143095 -0400
+@@ -2,10 +2,17 @@
+ # Makefile for the linux ext2-filesystem routines.
+ #
+ 
++ifeq ($(CONFIG_EXT2_COMPRESS),y)
++
++COMPRESS_STUFF := adler32.o compress.o e2zlib.o\
++		  $($(obj-y):%/=%/ext2-compr-%.o)
++endif
++
+ obj-$(CONFIG_EXT2_FS) += ext2.o
+ 
+ ext2-y := balloc.o dir.o file.o ialloc.o inode.o \
+-	  ioctl.o namei.o super.o symlink.o
++ 	  ioctl.o namei.o super.o symlink.o $(COMPRESS_STUFF)
++ 
+ 
+ ext2-$(CONFIG_EXT2_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
+ ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
+--- linux-3.4-rc5/fs/ext2/compress.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-3.4-rc5-e2c/fs/ext2/compress.c	2012-05-03 22:15:16.951998711 -0400
+@@ -0,0 +1,3407 @@
++/*
++ *  linux/fs/ext2/compress.c
++ *
++ *  Copyright (C) 1995  Antoine Dumesnil de Maricourt (dumesnil@etca.fr) 
++ *      (transparent compression code)
++ */
++
++/*
++ *  Copyright (C) 2001 Alcatel Business Systems - R&D Illkirch FRANCE
++ *
++ *  	Transparent compression code for 2.4 kernel.
++ *
++ *  Denis Richard (denis.richard@sxb.bsf.alcatel.fr)
++ *  Pierre Peiffer (pierre.peiffer@sxb.bsf.alcatel.fr)
++ *
++ *  Adapted from patch e2compr-0.4.39-patch-2.2.18 .
++ */
++
++#include <asm/segment.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/ext2_fs.h>
++#include <linux/ext2_fs_c.h>
++#include <linux/fcntl.h>
++#include <linux/sched.h>
++#include <linux/stat.h>
++#include <linux/buffer_head.h>
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/quotaops.h>
++#include <linux/kmod.h>
++#include <linux/vmalloc.h>
++#include <linux/swap.h>
++#include <linux/slab.h>
++#include <linux/pagemap.h>
++#include <linux/writeback.h>
++#include <linux/rmap.h>
++#include <linux/swap.h>
++#include <linux/mm.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/kernel_stat.h>
++#include <linux/swap.h>
++#include <linux/pagemap.h>
++#include <linux/init.h>
++#include <linux/highmem.h>
++#include <linux/vmstat.h>
++#include <linux/file.h>
++#include <linux/writeback.h>
++#include <linux/blkdev.h>
++#include <linux/buffer_head.h>
++#include <linux/mm_inline.h>
++#include <linux/pagevec.h>
++#include <linux/backing-dev.h>
++#include <linux/rmap.h>
++#include <linux/topology.h>
++#include <linux/cpu.h>
++#include <linux/cpuset.h>
++#include <linux/notifier.h>
++#include <linux/rwsem.h>
++#include <linux/delay.h>
++#include <linux/kthread.h>
++#include <linux/freezer.h>
++#include <asm/tlbflush.h>
++#include <asm/div64.h>
++#include <linux/swapops.h>
++#include <linux/percpu.h>
++
++#define MIN(a,b) ((a) < (b) ? (a) : (b))
++
++#ifdef CONFIG_HIGHMEM
++#define restore_b_data_himem(bh)   assert(page_address(bh->b_page));  bh->b_data = page_address(bh->b_page) + bh_offset(bh)
++
++
++
++int ext2_kmap_cluster_pages(struct page *page, struct page *pg[],
++			    struct page *epg[])
++{
++    int i = 0;
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++	if (!pg[i])
++	    break;
++	if (epg && epg[i])
++	    kmap(epg[i]);
++	else
++	    kmap(pg[i]);
++    }
++
++    if (page)
++	kmap(page);
++    return 0;
++}
++
++
++int ext2_kunmap_cluster_pages(struct page *page, struct page *pg[],
++			      struct page *epg[])
++{
++    int i = 0;
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++	if (!pg[i])
++	    break;
++	if (epg && epg[i])
++	    kunmap(epg[i]);
++	else
++	    kunmap(pg[i]);
++    }
++
++    if (page)
++	kunmap(page);
++    return 0;
++}
++#else //no high-mem:
++#define restore_b_data_himem(bh)	;
++#endif
++
++
++/*none compression dummy functions*/
++size_t ext2_iNONE (int action) { return 0; }
++size_t ext2_wNONE (__u8 *ibuf, __u8 *obuf, void *wa, size_t ilen, size_t olen, int xarg) { return 0; }
++size_t ext2_rNONE (__u8 *ibuf, __u8 *obuf, void *wa, size_t ilen, size_t olen, int xarg) { return 0; }
++
++/*
++ *    Algorithm and method tables
++ */
++struct ext2_algorithm ext2_algorithm_table[] = {
++    /* Note: all algorithms must have the `name' field filled in.
++       This is used to autoload algorithm modules (ext2-compr-%s), and
++       in kernel printk. */
++    /* N.B. Do not renumber these algorithms!  (To do so is to change
++       the binary format.)  It's OK for `none' and `undef' to be
++       renumbered, though. */
++
++    /* Fields:
++       name; available; routines for:
++       init,  compress,   decompress. */
++    {"lzv1", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
++    {"lzrw3a", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
++    {"gzip", 1, ext2_iZLIB, ext2_wZLIB, ext2_rZLIB},	//Andreas: workaround
++    {"bzip2", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
++    {"lzo", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
++    {"none", 1, ext2_iNONE, ext2_wNONE, ext2_rNONE},
++
++    /* This "algorithm" is for unused entries in the method table.
++       It differs from EXT2_NONE_ALG in that it is considered
++       unavailable, whereas `none' is always available. */
++    {"undef", 0, ext2_iNONE, ext2_wNONE, ext2_rNONE},
++
++};
++
++/* Note: EXT2_N_ALGORITHMS can't be increased beyond 16 without
++   changing the width of the s_algorithms_used field in the in-memory
++   superblock.  The on-disk s_algorithms_used field is 32 bits long.
++   (This is in a state of flux.  Currently (1998-02-05) there is no
++   distinction: we always use the s_es copy. */
++
++/* The size of this table must be 32 to prevent Oopsen from
++   invalid data.  We index this from 5 bits of i_flags, so
++   the size is (1 << 5) == 32. */
++struct ext2_method ext2_method_table[32] = {
++    /* Fields: algorithm id, algorithm argument. */
++    {EXT2_LZV1_ALG, 0},
++    {EXT2_NONE_ALG, 0},		/* 1: auto */
++    {EXT2_NONE_ALG, 0},		/* 2: defer */
++    {EXT2_NONE_ALG, 0},		/* 3: never */
++    {EXT2_BZIP2_ALG, 0},	/* 4: bzip2 */
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_LZRW3A_ALG, 0},	/* 8: lzrw3a */
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_LZO_ALG, 0},		/* 10: lzo1x_1 */
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_GZIP_ALG, 1},		/* 16 */
++    {EXT2_GZIP_ALG, 2},
++    {EXT2_GZIP_ALG, 3},
++    {EXT2_GZIP_ALG, 4},
++    {EXT2_GZIP_ALG, 5},
++    {EXT2_GZIP_ALG, 6},
++    {EXT2_GZIP_ALG, 7},
++    {EXT2_GZIP_ALG, 8},
++    {EXT2_GZIP_ALG, 9},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0},
++    {EXT2_UNDEF_ALG, 0}
++};
++
++
++static void ext2_mark_algorithm_use(struct inode *inode, unsigned alg)
++{
++    struct ext2_sb_info *sbi = EXT2_SB(inode->i_sb);
++
++    /* Hopefully, lock_super() isn't needed here, as we don't
++       block in the critical region.  True? */
++    assert(alg < EXT2_N_ALGORITHMS);
++    if (sbi->s_es->s_feature_incompat
++	& cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION)) {
++	sbi->s_es->s_algorithm_usage_bitmap |= cpu_to_le32(1 << alg);
++    } else {
++	struct ext2_super_block *es = sbi->s_es;
++
++	es->s_algorithm_usage_bitmap = cpu_to_le32(1 << alg);
++	es->s_feature_incompat
++	    |= cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION);
++	if (es->s_rev_level < EXT2_DYNAMIC_REV) {
++	    /* Raise the filesystem revision level to
++	       EXT2_DYNAMIC_REV so that s_feature_incompat
++	       is honoured (except in ancient kernels /
++	       e2fsprogs).  We must also initialize two
++	       other dynamic-rev fields.  The remaining
++	       fields are assumed to be already correct
++	       (e.g. still zeroed). */
++	    es->s_rev_level = cpu_to_le32(EXT2_DYNAMIC_REV);
++	    es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO);
++	    es->s_inode_size = cpu_to_le16(EXT2_GOOD_OLD_INODE_SIZE);
++	}
++    }
++    mark_buffer_dirty(sbi->s_sbh);
++}
++
++
++/* Displays an error message if algorithm ,alg` is not marked in use,
++   and then marks it in use. */
++static void ext2_ensure_algorithm_use(struct inode *inode, unsigned alg)
++{
++    assert(alg < EXT2_N_ALGORITHMS);
++
++    if (!(EXT2_SB(inode->i_sb)->s_es->s_algorithm_usage_bitmap
++	  & cpu_to_le32(1 << alg))) {
++	ext2_msg(inode->i_sb, "algorithm usage bitmap algorithm %s not marked used in inode %lu",
++		     ext2_algorithm_table[alg].name, inode->i_ino);
++	ext2_mark_algorithm_use(inode, alg);
++    }
++}
++
++
++/*mw: out of cache bug fix 5-16-07 */
++static void create_empty_buffers_e2c(struct page *page,
++				     unsigned long blocksize,
++				     unsigned long b_state,
++				     struct inode *inode)
++{
++    struct buffer_head *bh, *head, *tail;
++
++    head = alloc_page_buffers(page, blocksize, 1);
++    bh = head;
++    do {
++	bh->b_state |= b_state;
++	tail = bh;
++	bh->b_bdev = NULL;	//mw: make it like 2.4
++	bh->b_blocknr = 0;	//mw: make it like 2.4
++	bh->b_end_io = NULL;	//mw: make it like 2.4
++	bh = bh->b_this_page;
++    } while (bh);
++    tail->b_this_page = head;
++    spin_lock(&inode->i_mapping->private_lock);
++    if (PageUptodate(page) || PageDirty(page)) {
++	bh = head;
++	do {
++	    if (PageDirty(page))
++		set_buffer_dirty(bh);
++	    if (PageUptodate(page))
++		set_buffer_uptodate(bh);
++	    bh = bh->b_this_page;
++	} while (bh != head);
++    }
++    attach_page_buffers(page, head);
++    spin_unlock(&inode->i_mapping->private_lock);
++}
++
++int ext2_get_cluster_pages(struct inode *inode, u32 cluster,
++			   struct page *pg[], struct page *page, int compr)
++{
++    int nbpg, npg, i;
++    u32 page0;	/* = position within file (not position within fs). */
++    u32 idx = 0;
++
++    /*mw */
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
++	pg[i] = NULL;
++
++    page0 = ext2_cluster_page0(inode, cluster);	
++    nbpg = ext2_cluster_npages(inode, cluster);	
++
++    if (compr && (((page0 + nbpg) << PAGE_CACHE_SHIFT) > inode->i_size))	
++	nbpg = ((inode->i_size - 1) >> PAGE_CACHE_SHIFT) - page0 + 1;	
++#ifdef  EXT2_COMPR_REPORT
++    trace_e2c("ext2_get_cluster_pages: page0=%d, nbpg=%d page=%ld\n",
++	      page0, nbpg, ((page != NULL) ? page->index : 0));
++#endif
++    for (npg = 0; npg < nbpg; npg++) {
++	if ((page == NULL) || ((page0 + npg) != page->index)) {
++		//pg[npg] = __grab_cache_page(inode->i_mapping,  page0+npg); /* &cached_page, &lru_pvec);*/
++		pg[npg] = grab_cache_page_write_begin(inode->i_mapping, page0+npg,  0); 
++	    if (!pg[npg])
++		goto error;
++	} else {
++	    pg[npg] = page;
++	}
++	if (!page_has_buffers(pg[npg])) {
++	    ClearPageUptodate(pg[npg]);	
++	    ClearPageDirty(pg[npg]);	
++	    create_empty_buffers_e2c(pg[npg], inode->i_sb->s_blocksize, 0, inode);	
++	    if (unlikely(!page_has_buffers(pg[npg])))
++		trace_e2c("ext2_get_cluster_pages: NOMEM!\n");
++	    assert(!PageUptodate(pg[npg]));
++	    assert(!PageDirty(pg[npg]));
++	}
++    }
++    //set remaining pages to NULL
++    for (idx = npg; idx < EXT2_MAX_CLUSTER_PAGES; idx++)
++	pg[idx] = NULL;
++
++    return (npg);
++
++  error:
++    while (--npg >= 0) {
++	if ((page == NULL) || ((page0 + npg) != page->index)) {
++	    unlock_page(pg[npg]);
++	    page_cache_release(pg[npg]);
++	}
++	pg[npg] = NULL;
++    }
++    trace_e2c("ext2_get_cluster_pages: error no page\n");
++    return (-ENOMEM);
++}
++
++
++int ext2_get_cluster_extra_pages(struct inode *inode, u32 cluster,
++				 struct page *pg[], struct page *epg[])
++{
++    struct page *page;
++    int nbpg, npg, i;
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
++	epg[i] = NULL;
++
++    nbpg = ext2_cluster_npages(inode, cluster);
++    for (npg = 0; npg < nbpg; npg++) {
++	if (pg[npg] == NULL)
++	    break;
++	if (PageUptodate(pg[npg])) {
++	    //page = page_cache_alloc(inode->i_mapping);   
++	    //mw: has gfp-mask of adress-space:  gfp_t mapping_gfp_mask(struct address_space * mapping)
++	    //    don't trigger. shrink_dcache_memory which might call ext2_cleanup_compressed_inode with the SAME mutex.
++	    page = __page_cache_alloc(GFP_NOFS);
++
++	    if (!page) {
++		goto error;
++	    }
++	    ClearPageError(page);
++	    ClearPageReferenced(page);
++	    ClearPageUptodate(page);
++	    ClearPageDirty(page);
++	    lock_page(page);
++	    page->index = pg[npg]->index;
++
++	    if (!page_has_buffers(page)) {
++		create_empty_buffers_e2c(page, inode->i_sb->s_blocksize, 0,
++					 inode);
++		/*mw : only the "extra_pages" for decompression need create_empty_buffers_unlocked, because
++		 *     they have no mapping-context and they must not have one. Otherwise they get need a page->index
++		 *     which belongs always to an address_space object (e.g.: inode). But I think this is not intented here.
++		 *     we just need thei buffers for a short time of decompression */
++		if (unlikely(!page_has_buffers(page)))
++		    return printk("Error: NOMEM!\n");
++	    }
++
++	    epg[npg] = page;
++#ifdef  EXT2_COMPR_REPORT
++	    trace_e2c
++		("ext2_get_cluster_extra_pages: allocated page idx=%ld\n",
++		 pg[npg]->index);
++#endif
++	} else {
++	    epg[npg] = NULL;
++	}
++    }
++    return (npg);
++  error:
++    while (--npg >= 0)
++	if (epg[npg]) {
++	    ClearPageDirty(epg[npg]);
++	    ClearPageUptodate(epg[npg]);
++	    try_to_free_buffers(epg[npg]);
++	    unlock_page(epg[npg]);
++	    assert(page_count(epg[npg]) == 1);
++	    page_cache_release(epg[npg]);
++	}
++    trace_e2c("ext2_get_cluster_extra_pages: error no page\n");
++    return (-ENOMEM);
++
++}
++
++/* Read every block in the cluster.  The blocks are stored in the bh
++   array, which must be big enough.
++
++   Return the number of block contained in the cluster, or -errno if an
++   error occured.  The buffers should be released by the caller
++   (unless an error occurred).
++ 
++   The inode must be locked, otherwise it is possible that we return
++   some out of date blocks.
++ 
++   Called by :
++ 
++         ext2_decompress_cluster()      [i_sem]
++         ext2_compress_cluster()        [i_sem]
++         ext2_readpage()      		[i_sem] */
++
++
++int ext2_get_cluster_blocks(struct inode *inode, u32 cluster,
++			    struct buffer_head *bh[], struct page *pg[],
++			    struct page *epg[], int compr)
++{
++    struct buffer_head *br[EXT2_MAX_CLUSTER_BLOCKS];
++    int nreq, nbh = 0, npg, i;
++    u32 clu_nblocks;
++    int err;
++    const int blocks = PAGE_CACHE_SIZE >> inode->i_sb->s_blocksize_bits;
++
++    /*mw */
++    for (i = 0; i < EXT2_MAX_CLUSTER_BLOCKS; i++)
++	bh[i] = NULL;
++
++    assert(atomic_read(&inode->i_mutex.count) <= 0);	/* i.e. mutex_lock */
++
++    /*
++     *  Request full cluster.
++     */
++    {
++	u32 endblk;
++	u32 block; /* = position within file (not position within fs). */
++	u32 nbpg;
++	u32 page0; /* = position within file (not position within fs). */
++	u32 idx;
++
++	block = ext2_cluster_block0(inode, cluster);
++	clu_nblocks = ext2_cluster_nblocks(inode, cluster);
++	/* impl: Don't shorten endblk for i_size.  The
++	   remaining blocks should be NULL anyway, except in
++	   the case when called from ext2_decompress_cluster
++	   from ext2_truncate, in which case i_size is short
++	   and we _want_ to get all of the blocks. */
++	endblk = block + clu_nblocks;
++
++	page0 = ext2_cluster_page0(inode, cluster);
++	nbpg = ext2_cluster_npages(inode, cluster);
++
++	if (compr
++	    && (((page0 + nbpg) << PAGE_CACHE_SHIFT) > inode->i_size)) {
++	    nbpg = ((inode->i_size - 1) >> PAGE_CACHE_SHIFT) - page0 + 1;
++	    endblk =
++		block +
++		(nbpg <<
++		 (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits));
++	}
++
++	idx = page0 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
++#ifdef  EXT2_COMPR_REPORT
++	trace_e2c("ext2_get_cluster_blocks: page0=%d, nbpg=%d\n", page0,
++		  nbpg);
++#endif
++	for (npg = 0; npg < nbpg; npg++) {
++	    struct buffer_head *buffer;
++
++	    if ((epg != NULL) && (epg[npg] != NULL))
++		buffer = page_buffers(epg[npg]);
++	    else
++		buffer = page_buffers(pg[npg]);
++	    for (i = 0; i < blocks && (block + nbh) < endblk;
++		 buffer = buffer->b_this_page, i++) {
++		if (idx == (block + nbh)) {
++		    bh[nbh] = buffer;
++		    nbh++;
++		}
++		idx++;
++	    }
++	}
++#ifdef  EXT2_COMPR_REPORT
++	trace_e2c
++	    ("ext2_get_cluster_blocks: get every pages and %d buffers\n",
++	     nbh);
++#endif
++
++	for (nbh = 0, nreq = 0; block < endblk; nbh++) {
++	    assert(bh[nbh] != NULL);
++	    bh[nbh]->b_blocknr = 0;
++	    clear_bit(BH_Mapped, &bh[nbh]->b_state);
++
++	    //mw: does not work with 2.6 and holes!!!
++	    //err=ext2_get_block(inode, block++, bh[nbh], (PageDirty(bh[nbh]->b_page) ? 1 : 0));  
++	    err = ext2_get_block(inode, block++, bh[nbh], 0);
++	    /* mw: 0: we dont' create non existing blocks here
++	     *     let's do it just before the writeback, when we know, which blocks we really need...*/
++	    //err=ext2_get_block(inode, block++, bh[nbh], (buffer_dirty(bh[nbh]) ? 1 : 0));
++
++	    /* mw: bdev-bug-fix: for files which got compressed and now consume less buffers
++	     * ext2_get_block returns 0, for a empty-block. As these buffer were used before
++	     * the bh[nbh]->b_bdev might be != NULL or just invalid. So we set them explicitly
++	     * to NULL. */
++	    //printk("Get Block cluster %i: (%#x):%i Blk-NR:%lu(%lu)[%lu-%lu] Bdev:%#x(%#x), PGDirty:%i, mapped:%i, PID: %lu\n", cluster, bh[nbh], nbh, block, 
++
++	    //if we are not mapped, then the blocknr will be wrong
++	    //we set a bdev here the we will write to some "random" block
++	    if (!buffer_mapped(bh[nbh])) {
++		bh[nbh]->b_bdev = NULL;	/* don't write wrongly mapped blocks !!! */
++		/* mw: you encounter null pointer oops you MUST
++		 *         map your buffer using ext2_get_block()*/
++	    }
++
++	    if (bh[nbh]->b_blocknr != 0) {
++		if (!buffer_uptodate(bh[nbh])
++		    /* TODO: Do we need this
++		       `!buffer_locked' test? */
++		    && !buffer_locked(bh[nbh])
++		    && !PageDirty(bh[nbh]->b_page))
++		    br[nreq++] = bh[nbh];
++	    } else if ((err != 0)
++		       && (err != -EFBIG))
++		/* impl: for some unknown reason,
++		   ext2_getblk() returns -EFBIG if
++		   !create and there's a hole. ==> not right any more in 2.4 */
++		goto error;
++	}
++	for (i = nbh; i < EXT2_MAX_CLUSTER_BLOCKS; i++) {
++	    bh[i] = NULL;
++	}
++    }
++#ifdef  EXT2_COMPR_REPORT_CPR
++    trace_e2c("ext2_get_cluster_blocks: nreq=%d for cluster=%d\n", nreq,
++	      cluster);
++#endif
++
++    //read all blocks, which are not null-blocks
++    if (nreq > 0)
++	ll_rw_block(READ, nreq, br);
++
++    /*
++     *  Adjust nbh if we have some null blocks at end of cluster.
++     */
++    while ((nbh != 0) && (bh[nbh - 1]->b_blocknr == 0))
++	nbh--;
++
++    /*
++     *  Wait for blocks.
++     */
++    err = -EIO;
++    CHECK_NOT_ATOMIC
++    for (i = 0; i < nbh; i++)
++	if ((!PageDirty(bh[i]->b_page)) && (bh[i]->b_blocknr != 0)) {  
++	    wait_on_buffer(bh[i]);
++	    if (!buffer_uptodate(bh[i])) {	/* Read error ??? */
++		trace_e2c
++		    ("ext2_get_cluster_blocks: wait_on_buffer error (blocknr=%ld)\n",
++		     bh[i]->b_blocknr);
++		goto error;
++	    }
++	}
++    assert(nbh <= EXT2_MAX_CLU_NBLOCKS);
++
++    return nbh;
++
++  error:
++    printk("ERROR: ext2_get_cluster_blocks()\n");
++    return err;
++}
++
++
++/* Iterations over block in the inode are done with a generic
++   iteration key mechanism.  We need one method to convert a block
++   number into a new key, one method to iterate (i.e., increment the
++   key) and one method to free the key.  The code could be shared with
++   truncate.c, as this mechanism is very general.
++ 
++   This code assumes tht nobody else can read or write the file
++   between ext2_get_key() and ext2_free_key(), so callers need to have
++   i_sem (which they all do anyway). */
++
++/* TODO: Get all of the bkey routines to return -errno instead of
++   true/false. */
++/* TODO: The bkey routines currently assume tht address blocks are
++   allocated even if all contained addresses are NULL, but this is not
++   true.  Make sure tht we differentiate between NULL block and error,
++   and then fix up ext2_set_key_blkaddr() and anything else (including
++   the pack/unpack routines). */
++struct ext2_bkey {
++    int level;
++    u32 block;
++    struct inode *inode;
++    int off[4];
++    u32 *ptr[4];
++    struct buffer_head *ibh[4];
++};
++
++
++/*
++ *    Method to convert a block number into a key.
++ *
++ *    Returns 1 on success, 0 on failure.  You may safely, but need
++ *    not, free the key even if ext2_get_key() fails. 
++ */
++static int ext2_get_key(struct ext2_bkey *key, struct inode *inode,
++			u32 block)
++{
++    int x, level;
++    int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
++
++    assert(atomic_read(&inode->i_mutex.count) <= 0);
++
++    /*
++     *  The first step can be viewed as translating the
++     *    original block number in a special base (powers
++     *    of addr_per_block).
++     */
++
++    key->block = block;
++
++    key->off[0] = key->off[1] = key->off[2] = key->off[3] = 0;
++    key->ibh[0] = key->ibh[1] = key->ibh[2] = key->ibh[3] = NULL;
++    key->ptr[0] = key->ptr[1] = key->ptr[2] = key->ptr[3] = NULL;
++
++    if (block >= EXT2_NDIR_BLOCKS) {
++	block -= EXT2_NDIR_BLOCKS;
++
++	if (block >= addr_per_block) {
++	    block -= addr_per_block;
++
++	    if (block >= addr_per_block * addr_per_block) {
++		block -= addr_per_block * addr_per_block;
++
++		key->off[0] = EXT2_TIND_BLOCK;
++		key->off[1] = (block / (addr_per_block * addr_per_block));
++		key->off[2] =
++		    (block % (addr_per_block * addr_per_block)) /
++		    addr_per_block;
++		key->off[3] = (block % addr_per_block);
++		level = 3;
++	    } else {
++		key->off[0] = EXT2_DIND_BLOCK;
++		key->off[1] = block / addr_per_block;
++		key->off[2] = block % addr_per_block;
++		level = 2;
++	    }
++	} else {
++	    key->off[0] = EXT2_IND_BLOCK;
++	    key->off[1] = block;
++	    level = 1;
++	}
++    } else {
++	key->off[0] = block;
++	level = 0;
++    }
++
++    /*
++     *  In the second step, we load the needed buffers.
++     */
++
++    key->level = level;
++    key->inode = inode;
++
++    key->ptr[0] = (u32 *) (&(EXT2_I(inode)->i_data));
++
++    for (x = 1; x <= level; x++) {
++	u32 *ptr;
++
++	ptr = key->ptr[x - 1];
++	if (ptr == NULL)
++	    break;
++/* Paul Whittaker tweak 19 Feb 2005 */
++	block = le32_to_cpu(ptr[key->off[x - 1]]);
++	if (block == 0)
++	    continue;		// TLL 05/01/07
++	if (x - 1 != 0)
++	    block = le32_to_cpu(block);
++	if ((key->ibh[x] = __bread(inode->i_sb->s_bdev,
++				   block, inode->i_sb->s_blocksize))
++	    == NULL)
++	    goto error;
++	key->ptr[x] = (u32 *) (key->ibh[x]->b_data);
++    }
++
++    return 1;
++  error:
++    for (; x != 0; x--)
++	if (key->ibh[x] != NULL)
++	    brelse(key->ibh[x]);
++    return 0;
++}
++
++
++/*
++ *    Find the block for a given key.  Return 0 if there
++ *      is no block for this key.
++ */
++static inline u32 ext2_get_key_blkaddr(struct ext2_bkey *key)
++{
++    assert(key->inode);
++    assert(atomic_read(&(key->inode)->i_mutex.count) <= 0);
++
++/* Paul Whittaker tweak 19 Feb 2005 */
++    if (key->ptr[key->level] == NULL)
++	return 0;
++    return le32_to_cpu(key->ptr[key->level][key->off[key->level]]);
++}
++
++
++/*
++ *    Change the block for a given key.  Return 0 on success,
++ *      -errno on failure.
++ */
++static inline int ext2_set_key_blkaddr(struct ext2_bkey *key, u32 blkaddr)
++{
++    char bdn[BDEVNAME_SIZE];
++    assert(key->inode);
++    assert(atomic_read(&(key->inode)->i_mutex.count) <= 0);
++
++    if (key->ptr[key->level] == NULL) {
++	/* The reason that this "can't happen" is that this
++	   routine is only used to shuffle block numbers or by
++	   free_cluster_blocks.  Cluster sizes are such that
++	   clusters can't straddle address blocks.  So the
++	   indirect block address can't be zero.  AFAIK, ptr
++	   can only be NULL on error or on null indirect block
++	   address.  Hmm, come to think of it, I think there
++	   are still some callers that don't check for errors
++	   from ext2_get_key(), so this still can happen until
++	   those are fixed up. */
++	printk(KERN_ERR
++	       "ext2_set_key_blkaddr: can't happen: NULL parent.  "
++	       "dev=%s, ino=%lu, level=%u.\n",
++	       bdevname(key->inode->i_sb->s_bdev, bdn),
++	       key->inode->i_ino, key->level);
++	return -ENOSYS;
++    }
++    /* Paul Whittaker tweak 19 Feb 2005 */
++    key->ptr[key->level][key->off[key->level]] = le32_to_cpu(blkaddr);
++    if (key->level > 0)
++	mark_buffer_dirty(key->ibh[key->level]);
++    return 0;
++}
++
++
++/*
++ *    Increment the key.  Returns 0 if we go beyond the limits,
++ *      1 otherwise.
++ *
++ *    Precondition: -key->off[level] <= incr < addr_per_block.
++ */
++static int ext2_next_key(struct ext2_bkey *key, int incr)
++{
++    int addr_per_block = EXT2_ADDR_PER_BLOCK(key->inode->i_sb);
++    int x, level = key->level;
++    u32 tmp;
++
++    assert(key->inode);
++    assert(atomic_read(&(key->inode)->i_mutex.count) <= 0);
++
++
++    /*
++     *  Increment the key. This is done in two step: first
++     *    adjust the off array, then reload buffers that should
++     *    be reloaded (we assume level > 0).
++     */
++
++    assert(key->off[level] >= -incr);
++    assert(incr < addr_per_block);
++    key->block += incr;
++    key->off[level] += incr;
++
++    /*
++     *  First step: should be thought as the propagation
++     *    of a carry.
++     */
++
++    if (level == 0) {
++	if (key->off[0] >= EXT2_NDIR_BLOCKS) {
++	    key->off[1] = key->off[0] - EXT2_NDIR_BLOCKS;
++	    key->off[0] = EXT2_IND_BLOCK;
++	    level = 1;
++	}
++	x = 0;
++    } else {
++	for (x = level; x > 0; x--) {
++	    if (key->off[x] >= addr_per_block) {
++		key->off[x] -= addr_per_block;
++		key->off[x - 1]++;
++
++		if (x == 1) {
++		    if (++level < 4) {
++			key->off[level] = key->off[level - 1];
++			key->off[level - 1] = 0;
++		    } else
++			return 0;
++		}
++	    } else
++		break;
++	}
++    }
++
++    /*
++     *  Second step: reload the buffers that have changed.
++     */
++
++    key->level = level;
++
++    CHECK_NOT_ATOMIC
++    while (x++ < level) {
++	if (key->ibh[x] != NULL) {
++	    if (IS_SYNC(key->inode) && buffer_dirty(key->ibh[x])) {
++		//mw:
++		assert(buffer_mapped(key->ibh[x])
++		       && (key->ibh[x]->b_bdev != NULL));
++		ll_rw_block(WRITE, 1, &(key->ibh[x]));
++		wait_on_buffer(key->ibh[x]);
++	    }
++	    brelse(key->ibh[x]);
++	}
++/* Paul Whittaker tweak 19 Feb 2005 */
++	if ((key->ptr[x - 1] != NULL)
++	    && ((tmp = le32_to_cpu(key->ptr[x - 1][key->off[x - 1]])) !=
++		0)) {
++	    if ((key->ibh[x] =
++		 __bread(key->inode->i_sb->s_bdev, tmp,
++			 key->inode->i_sb->s_blocksize))
++		!= NULL)
++		key->ptr[x] = (u32 *) (key->ibh[x]->b_data);
++	    else
++		key->ptr[x] = NULL;
++	} else {
++	    key->ibh[x] = NULL;
++	    key->ptr[x] = NULL;
++	}
++    }
++
++    return 1;
++}
++
++
++/* Method to free the key: just release buffers.
++
++   Returns 0 on success, -errno on error.
++*/
++
++static int ext2_free_key(struct ext2_bkey *key)
++{
++    int x, n;
++    struct buffer_head *bh[4];
++
++    assert(key->inode);
++    assert(atomic_read(&(key->inode)->i_mutex.count) <= 0);
++
++
++    for (x = 0, n = 0; x <= key->level; x++) {
++	if (key->ibh[x] != NULL) {
++	    if (IS_SYNC(key->inode) && buffer_dirty(key->ibh[x]))
++		bh[n++] = key->ibh[x];
++	    else
++		brelse(key->ibh[x]);
++	}
++    }
++
++    if (n > 0) {
++	int ncopy = n;
++	while (ncopy-- > 0) {
++	    assert(buffer_mapped(bh[ncopy])
++		   && (bh[ncopy]->b_bdev != NULL));
++	}
++
++	ll_rw_block(WRITE, n, bh);
++
++	CHECK_NOT_ATOMIC
++
++	while (n-- > 0) {
++	    wait_on_buffer(bh[n]);
++	    /* TODO: Check for error. */
++	    brelse(bh[n]);
++	}
++    }
++    return 0;
++}
++
++
++/* Returns positive if specified cluster is compressed,
++   zero if not,
++   -errno if an error occurred.
++
++   If you need the result to be accurate, then down i_sem before
++   calling this, and don't raise i_sem until after you've used the
++   result. */
++int ext2_cluster_is_compressed_fn(struct inode *inode, unsigned cluster)
++{
++    unsigned block = (ext2_cluster_block0(inode, cluster)
++		      + ext2_cluster_nblocks(inode, cluster)
++		      - 1);
++    struct ext2_bkey key;
++    int result;
++
++    assert(atomic_read(&inode->i_mutex.count) <= 0);
++
++    /* impl: Not all callers of ext2_cluster_is_compressed_fn() have
++       i_sem down.  Of course it is impossible to guarantee
++       up-to-date information for such callers (someone may
++       compress or decompress between when we check and when they
++       use the information), so hopefully it won't matter if the
++       information we return is slightly inaccurate (e.g. because
++       someone is de/compressing the cluster while we check). */
++    if (!ext2_get_key(&key, inode, block))
++	return -EIO;
++
++    result = (ext2_get_key_blkaddr(&key) == EXT2_COMPRESSED_BLKADDR);
++    ext2_free_key(&key);
++    return result;
++}
++
++
++/* Support for the GETCOMPRRATIO ioctl() call.  We calculate how many
++   blocks the file would hold if it weren't compressed.  This requires
++   reading the cluster head for every compressed cluster.
++
++   Returns either -EAGAIN or the number of blocks that the file would
++   take up if uncompressed.  */
++int ext2_count_blocks(struct inode *inode)
++{
++    struct buffer_head *head_bh;
++    int count;
++    int cluster;
++    struct ext2_bkey key;
++    u32 end_blknr;
++
++    if (!(EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL))
++	return inode->i_blocks;
++
++    mutex_lock(&inode->i_mutex);
++    end_blknr = ROUNDUP_RSHIFT(inode->i_size,
++			       inode->i_sb->s_blocksize_bits);
++
++    /* inode->i_blocks is stored in units of 512-byte blocks.  It's
++       more convenient for us to work in units of s_blocksize. */
++    {
++	u32 shift = inode->i_sb->s_blocksize_bits - 9;
++
++	count = inode->i_blocks;
++	if (count & ((1 << shift) - 1))
++	    ext2_msg(inode->i_sb,
++			 "ext2_count_blocks",
++			 "i_blocks not multiple of blocksize");
++	count >>= shift;
++    }
++
++    cluster = 0;
++    if (!ext2_get_key(&key, inode, 0)) {
++	count = -EIO;
++	goto out;
++    }
++    while (key.block < end_blknr) {
++	u32 head_blkaddr = ext2_get_key_blkaddr(&key);
++
++	/* bug fix: init head_bh for each iteration TLL 2/21/07 */
++	head_bh = NULL;
++	if (head_blkaddr == EXT2_COMPRESSED_BLKADDR) {
++	    count = -EXT2_ECOMPR;
++	    break;
++	}
++	if (!ext2_next_key(&key, ext2_cluster_nblocks(inode, cluster) - 1))
++	    break;
++	if (ext2_get_key_blkaddr(&key) == EXT2_COMPRESSED_BLKADDR) {
++	    struct ext2_cluster_head *head;
++
++	    if (head_blkaddr == 0) {
++		count = -EXT2_ECOMPR;
++		break;
++	    }
++	    head_bh = __getblk(inode->i_sb->s_bdev,
++			       head_blkaddr, inode->i_sb->s_blocksize);
++	    if (head_bh == NULL) {
++		/* Hmm, EAGAIN or EIO? */
++		count = -EAGAIN;
++		break;
++	    }
++	    if (!buffer_uptodate(head_bh))
++		ll_rw_block(READ, 1, &head_bh);
++
++	    CHECK_NOT_ATOMIC
++
++	    wait_on_buffer(head_bh);
++
++#ifdef CONFIG_HIGHMEM
++	    if (!page_address(head_bh->b_page)) {
++		BUG();
++	    }
++#endif
++
++	    head = (struct ext2_cluster_head *) head_bh->b_data;
++	    /* remove clen > ulen test TLL 2/21/07 */
++	    if ((head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X))
++		|| (le32_to_cpu(head->ulen) > EXT2_MAX_CLUSTER_BYTES)
++		|| (head->holemap_nbytes > 4)) {
++		count = -EXT2_ECOMPR;
++		break;
++	    }
++	    assert(sizeof(struct ext2_cluster_head) == 16);
++	    count += (ROUNDUP_RSHIFT(le32_to_cpu(head->ulen),
++				     inode->i_sb->s_blocksize_bits)
++		      - ROUNDUP_RSHIFT((le32_to_cpu(head->clen)
++					+ sizeof(struct ext2_cluster_head)
++					+ head->holemap_nbytes),
++				       inode->i_sb->s_blocksize_bits));
++	    brelse(head_bh);
++	    head_bh = NULL;
++	}
++
++	if (!ext2_next_key(&key, 1))
++	    break;
++	cluster++;
++    }
++    ext2_free_key(&key);
++    if (head_bh != NULL)
++	brelse(head_bh);
++  out:
++    mutex_unlock(&inode->i_mutex);
++    if (count == -EXT2_ECOMPR) {
++	ext2_msg(inode->i_sb,
++		     "ext2_count_blocks",
++		     "invalid compressed cluster %u of inode %lu",
++		     cluster, inode->i_ino);
++	EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL;
++    }
++
++    /* The count should be in units of 512 (i.e. 1 << 9) bytes. */
++    if (count >= 0)
++	count <<= inode->i_sb->s_blocksize_bits - 9;
++    return count;
++}
++
++
++/* Decompress some blocks previously obtained from a cluster.
++   Decompressed data is stored in ext2_rd_wa.u.  Buffer heads in the bh
++   array are packed together at the begining of the array.  The ulen
++   argument is an indication of how many bytes the caller wants to
++   obtain, excluding holes.  (This can be less than head->ulen, as in the
++   case of readpage.)  No hole processing is done; we don't even look at
++   head->holemap.
++
++   Note the semantic difference between this and
++   (): the latter decompresses a cluster _and
++   stores it as such_, whereas ext2_decompress_blocks() just
++   decompresses the contents of the blocks into ext2_rd_wa.u.
++
++   The working area is supposed to be available and locked.
++ 
++   Returns a negative value on failure, the number of bytes
++   decompressed otherwise.
++ 
++   Called by :
++ 
++         ext2_decompress_cluster ()    [sem down]
++         ext2_readpage () [sem down, but only ifndef EXT2_LOCK_BUFFERS] */
++	
++/* TODO: ext2_decompress_blocks() scribbles in ext2_rd_wa.c.
++   Check callers to make sure this isn't a problem. */
++
++/* mw: caller must already have done: "get_cpu_var(ext2_rd_wa)" */
++size_t
++ext2_decompress_blocks(struct inode * inode,
++		       struct buffer_head ** bh,
++		       int nblk, size_t ulen, u32 cluster)
++{
++    struct ext2_cluster_head *head;
++    int count, src_ix, x;
++    unsigned char *dst;
++    unsigned meth, alg;
++    char bdn[BDEVNAME_SIZE];
++
++#ifdef EXT2_COMPR_DEBUG
++    //mw: 30.04.2012: seems to fail... ? assert(in_atomic());
++    assert(atomic_read(&inode->i_mutex.count) <= 0);	/* i.e. mutex_lock */
++#endif
++
++    /*
++       We pack the buffer together before (and must take care
++       not to duplicate the buffer heads in the array).
++
++       pjm 1998-01-09: Starting from e2compr-0.4.0, they should
++       already be packed together in the blkaddr array.  TODO:
++       Insert appropriate assert() statements checking tht this is
++       the case.  TODO: Check that callers have bh[] packed. */
++#ifdef  EXT2_COMPR_REPORT
++    trace_e2c("ext2_decompress_blocks: nblk=%d\n", nblk);
++#endif
++    for (src_ix = 0, x = 0; src_ix < nblk; src_ix++) {
++	if (bh[src_ix] == NULL)
++	    printk("no_bheader()\n");
++	if ((bh[src_ix] != NULL) && (bh[src_ix]->b_blocknr != 0)) {
++
++	    if (x < src_ix) {
++		ext2_msg(inode->i_sb, "bad buffer table",
++			     "inode = %lu", inode->i_ino);
++		goto error;
++	    }
++	    x++;
++	}
++    }
++
++    nblk = x;
++#ifdef  EXT2_COMPR_REPORT_CPR
++    trace_e2c("ext2_decompress_blocks (2): nblk=%d\n", nblk);
++#endif
++    if (nblk == 0) {
++	ext2_msg(inode->i_sb, "no block in cluster", "inode = %lu",
++		     inode->i_ino);
++	goto error;
++    }
++
++    restore_b_data_himem(bh[0]);
++    head = (struct ext2_cluster_head *) (bh[0]->b_data);
++
++    /*
++     *  Do some consistency checks.
++     */
++
++    if (head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X)) {
++	ext2_msg(inode->i_sb,
++		     "bad magic number",
++		     "inode = %lu, magic = %#04x",
++		     inode->i_ino, le16_to_cpu(head->magic));
++	goto error;
++    }
++#if EXT2_GRAIN_SIZE & (EXT2_GRAIN_SIZE - 1)
++# error "This code assumes EXT2_GRAIN_SIZE to be a power of two."
++#endif
++    /* The macro also assumes that _a > 0, _b > 0. */
++#define ROUNDUP_GE(_a, _b, _d) (   (  ((_a) - 1) \
++				    | ((_d) - 1)) \
++				>= (  ((_b) - 1) \
++				    | ((_d) - 1)))
++
++    //mw: following 3 just for debugging!!!
++    assert(!((le32_to_cpu(head->ulen) > EXT2_MAX_CLUSTER_BYTES)));
++    assert(!((head->clen == 0)));
++    assert(!(ROUNDUP_GE(le32_to_cpu(head->clen)
++	+ head->holemap_nbytes + sizeof(struct ext2_cluster_head), 
++	le32_to_cpu(head->ulen), EXT2_GRAIN_SIZE)));
++
++    if ((le32_to_cpu(head->ulen) > EXT2_MAX_CLUSTER_BYTES)
++	|| (head->clen == 0)
++	|| ROUNDUP_GE(le32_to_cpu(head->clen)
++		      + head->holemap_nbytes
++		      + sizeof(struct ext2_cluster_head),
++		      le32_to_cpu(head->ulen), EXT2_GRAIN_SIZE)) {
++	ext2_msg(inode->i_sb,
++		     "invalid cluster len",
++		     "inode = %lu, len = %u:%u",
++		     inode->i_ino,
++		     le32_to_cpu(head->clen), le32_to_cpu(head->ulen));
++	goto error;
++    }
++#undef ROUNDUP_GE
++
++    /* TODO: Test for `nblk != 1 + ...' instead of the current
++       one-sided test.  However, first look at callers, and make
++       sure that they handle the situation properly (e.g. freeing
++       unneeded blocks) and tht they always pass a correct
++       value for nblk. */
++    if (nblk <= ((le32_to_cpu(head->clen)
++		  + head->holemap_nbytes + sizeof(struct ext2_cluster_head)
++		  - 1)
++		 / bh[0]->b_size)) {
++	int i;
++	ext2_msg(inode->i_sb,
++		     "missing blocks",
++		     "inode = %lu, blocks = %d/%u",
++		     inode->i_ino, nblk, ((le32_to_cpu(head->clen)
++					   + head->holemap_nbytes
++					   + sizeof(struct ext2_cluster_head)
++					   - 1)
++					  / bh[0]->b_size) + 1);
++	printk("i_size=%d\n", (int) inode->i_size);
++	for (i = 0; i < 12; i++)
++	    printk("i_data[%d]=%d\n", i, EXT2_I(inode)->i_data[i]);
++	    printk("cluster_head (sizeof head=%u):\n\tmagic=0x%4x\n\tmethod=%d\n\t  \
++	     holemap_nbytes=%d\n\tulen=%d\n\tclen=%d\n\tbh->b_size=%zu\n",
++	     sizeof(struct ext2_cluster_head), head->magic,
++	     (int) head->method, (int) head->holemap_nbytes, head->ulen,
++	     head->clen, bh[0]->b_size);
++	goto error;
++    }
++
++    /* I moved it here in case we need to load a module that
++     * needs more heap that is currently allocated.
++     * In such case "init_module" for that algorithm forces
++     * re-allocation of ext2_wa. It should be safe here b/c the
++     * first reference to ext2_wa comes just after and we have
++     * locked ext2_wa before.
++     *
++     * FIXME: Totally separate working areas for reading and writing.
++     *      Jan R.
++     */
++    meth = head->method;	/* only a byte, so no swabbing needed. */
++    if (meth >= EXT2_N_METHODS) {
++	ext2_msg(inode->i_sb,
++		     "Ass: illegal method id",
++		     "inode = %lu, id = %u", inode->i_ino, meth);
++	dump_stack();
++	goto error;
++    }
++    alg = ext2_method_table[meth].alg;
++
++    /*
++     *  Adjust the length if too many bytes are requested.
++     *
++     *    TODO: Traiter les bitmaps ici, et non plus au niveau de    
++     *          l'appelant. Faire un petit cache en memorisant le    
++     *          numero du dernier noeud decompresse et du dernier    
++     *          cluster. Le pb, c'est qu'on ne peut pas savoir si    
++     *          les blocs ont ete liberes et realloue entre temps    
++     *          -> il faut etre prevenu pour invalider le buffer.    
++     *
++     *          pjm fixme tr: Take care of the bitmaps here,
++     *          instead of by the caller as we currently do.  Keep
++     *          a small cache that holds the number of the
++     *          previous <inode, cluster> to have been
++     *          decompressed.  The problem is that we have no way
++     *          of knowing whether the blocks have been freed and
++     *          reallocated in the meantime / since last time ->
++     *          we must be informed so that we can invalidate the
++     *          buffer.  */
++    if (ulen > le32_to_cpu(head->ulen)) {
++	memset(__get_cpu_var(ext2_rd_wa)->u + le32_to_cpu(head->ulen), 0, ulen - le32_to_cpu(head->ulen));
++	ulen = le32_to_cpu(head->ulen);
++
++	assert((bh[0]->b_size & (bh[nblk - 1]->b_size - 1)) == 0);
++	if (((le32_to_cpu(head->clen)
++	      + head->holemap_nbytes + sizeof(struct ext2_cluster_head)
++	      - 1)
++	     | (bh[0]->b_size - 1))
++	    >= ((ulen - 1) | (bh[0]->b_size - 1))) {
++	    printk(KERN_WARNING
++		   "ext2_decompress_blocks: "
++		   "ulen (=%zu) or clen (=%u) wrong "
++		   "in dev %s, inode %lu.\n",
++		   ulen, le32_to_cpu(head->clen),
++		   bdevname(inode->i_sb->s_bdev, bdn), inode->i_ino);
++	    goto error;
++	}
++    }
++
++    /*
++     *  Now, decompress data.
++     */
++    /* TODO: Is this (ulen == 0) possible? */
++    if (ulen == 0)
++	return 0;
++
++    for (x = 0, dst = __get_cpu_var(ext2_rd_wa)->c; x < nblk; dst += bh[x++]->b_size) {
++	restore_b_data_himem(bh[x]);
++	memcpy(dst, bh[x]->b_data, bh[x]->b_size);
++    }
++
++
++    if (!ext2_algorithm_table[alg].avail) {
++	ext2_msg(inode->i_sb,
++		     "ext2_decompress_blocks",
++		     "algorithm `%s' not available for inode %lu",
++		     ext2_algorithm_table[alg].name, inode->i_ino);
++	ext2_mark_algorithm_use(inode, alg);
++	goto error;
++    }
++
++
++#ifdef EXT2_COMPR_DEBUG
++    {
++    	struct ext2_cluster_head *wa1head = (struct ext2_cluster_head *) __get_cpu_var(ext2_rd_wa)->c;
++    	unsigned clen = le32_to_cpu(wa1head->clen);
++    	if (wa1head->checksum !=
++    		cpu_to_le32(ext2_adler32
++			(le32_to_cpu(*(u32 *) __get_cpu_var(ext2_rd_wa)->c),
++			 __get_cpu_var(ext2_rd_wa)->c + 8,
++			 (sizeof(struct ext2_cluster_head) - 8 +
++			  head->holemap_nbytes + clen))))
++    	{
++    			head->checksum = cpu_to_le32(0);
++    			ext2_msg(inode->i_sb, "ext2_decompress_blocks: corrupted compressed data ",
++				 "in inode %lu", inode->i_ino);
++    			//goto error; 
++			//mw: we try to go on. if data is corrupt we will get an compression error anyway.
++    	}
++    }
++#endif
++
++    count = ext2_algorithm_table[alg].decompress(__get_cpu_var(ext2_rd_wa)->c +
++					     sizeof(struct
++						    ext2_cluster_head) +
++					     head->holemap_nbytes,
++					     __get_cpu_var(ext2_rd_wa)->u,
++					     __get_cpu_var(ext2_rd_wa)->heap,
++					     le32_to_cpu(head->clen), ulen,
++					     ext2_method_table[meth].xarg);
++
++    /* If we got fewer than ulen bytes, there is a problem, since
++       we corrected the ulen value before decompressing.  Note
++       that it's OK for count to exceed ulen, because ulen can be
++       less than head->ulen. */
++    if ((count < ulen) || (count != le32_to_cpu(head->ulen))) {
++	ext2_msg(inode->i_sb, 
++		"ext2_decompress_blocks: corrupted compressed data ", "inode = %lu, count = %u of %zu (%u/%u)",
++		     inode->i_ino, count, ulen, le32_to_cpu(head->clen), le32_to_cpu(head->ulen));
++	goto error;
++    }
++    ext2_ensure_algorithm_use(inode, alg);
++    return count;
++
++  error:
++
++    /* Raise the ECOMPR flag for this file.  What this means is
++       that the file cannot be written to, and can only be read if
++       the user raises the NOCOMPR flag.
++
++       pjm 1997-01-16: I've changed it so that files with ECOMPR
++       still have read permission, so user can still read the rest
++       of the file but get an I/O error (errno = EXT2_ECOMPR) when
++       they try to access anything from this cluster. */
++
++    EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL;
++
++    inode->i_ctime = CURRENT_TIME;
++    mark_inode_dirty_sync(inode);
++    /* pjm 1998-02-21: We used to do `memset(ext2_rd_wa.u, 0, ulen)'
++       here because once upon a time the user could sometimes see
++       buf contents.  I believe that this can never happen any
++       more. */
++    return -EXT2_ECOMPR;
++}
++
++
++/* ext2_calc_free_ix: Calculates the position of the C_NBLK'th non-hole
++   block; equals C_NBLK plus the number of holes in the first CALC_FREE_IX()
++   block positions of the cluster.
++
++   pre: 1 =< c_nblk < EXT2_MAX_CLUSTER_BLOCKS,
++        Number of 1 bits in ,ubitmap` > ,c_nblk`.
++   post: c_nblk =< calc_free_ix() < EXT2_MAX_CLUSTER_BLOCKS
++
++   Called by:
++       ext2_decompress_cluster()
++       ext2_file_write()
++
++   TODO: Have ext2_compress_cluster() call this.
++   */
++unsigned ext2_calc_free_ix(unsigned holemap_nbytes, u8 const *holemap,
++			   unsigned c_nblk)
++{
++    unsigned i;
++
++    assert(1 <= c_nblk);
++    assert(c_nblk < EXT2_MAX_CLUSTER_BLOCKS);
++    for (i = 0; (i < holemap_nbytes * 8) && (c_nblk > 0);) {
++	assert(i < EXT2_MAX_CLUSTER_BLOCKS - 1);
++	if ((holemap[i >> 3] & (1 << (i & 7))) == 0)
++	    c_nblk--;
++	i++;
++    }
++    i += c_nblk;
++    assert(i < EXT2_MAX_CLUSTER_BLOCKS);
++    return i;
++}
++
++
++/*  (): Prepare the blkaddr[] array for
++   decompression by moving non-hole blocks to their proper positions
++   (according to ubitmap) and zeroing any other blocks.
++
++   Returns 0 on success, -errno on error.
++
++   Note: We assume tht blkaddr[i] won't change under us forall
++   clu_block0 =< i < clu_block0 + clu_nblocks.  Holding i_sem should
++   guarantee this.
++
++   Called by:
++       ext2_decompress_cluster()
++       ext2_file_write() */
++int
++ext2_unpack_blkaddrs(struct inode *inode,
++		     struct buffer_head *bh[],
++		     int mmcp,
++		     unsigned holemap_nbytes,
++		     u8 const *holemap,
++		     unsigned c_nblk,
++		     unsigned free_ix,
++		     unsigned clu_block0, unsigned clu_nblocks)
++{
++    struct ext2_bkey key;
++    u32 *blkaddr;
++    unsigned si, di;
++
++    assert(clu_nblocks <= EXT2_MAX_CLUSTER_BLOCKS);
++    assert(1 <= c_nblk);
++    assert(c_nblk <= free_ix);
++    assert(free_ix < EXT2_MAX_CLUSTER_BLOCKS);
++    if (!ext2_get_key(&key, inode, clu_block0))
++	return -EIO;
++
++    if (key.ptr[key.level] == NULL) {
++	/* TODO: Call ext2_error(). */
++	ext2_free_key(&key);
++	return -EIO;
++    }
++
++    /* impl: Note tht we're relying on clusters not straddling
++       address block boundaries. */
++    blkaddr = &key.ptr[key.level][key.off[key.level]];
++    memset(blkaddr + free_ix,
++	   0, sizeof(*blkaddr) * (clu_nblocks - free_ix));
++    si = c_nblk;
++    for (di = free_ix; di > si;) {
++	--di;
++	if (((di >> 3) < holemap_nbytes)
++	    && (holemap[di >> 3] & (1 << (di & 7)))) {
++	    blkaddr[di] = 0;
++	    bh[di]->b_blocknr = 0;
++	    clear_bit(BH_Mapped, &bh[di]->b_state);
++	} else {
++	    if (si == 0) {
++		break;
++	    }
++	    blkaddr[di] = blkaddr[--si];
++	    assert(bh[di]->b_blocknr == 0);
++	    assert(bh[si]->b_blocknr != 0);
++	    assert(buffer_mapped(bh[si]));
++#ifdef  EXT2_COMPR_REPORT_CPR
++	    trace_e2c("unpack: di=%d sts=0x%x si=%d blk=%ld sts=0x%x\n",
++		      di, (int) bh[di]->b_state, si, bh[si]->b_blocknr,
++		      (int) bh[si]->b_state);
++#endif
++	    bh[di]->b_blocknr = bh[si]->b_blocknr;
++	    set_bit(BH_Mapped, &bh[di]->b_state);
++	    bh[si]->b_blocknr = 0;
++	    clear_bit(BH_Mapped, &bh[si]->b_state);
++	    set_bit(BH_Uptodate, &bh[di]->b_state);
++	    if (mmcp) {
++		restore_b_data_himem(bh[si]);
++		restore_b_data_himem(bh[di]);
++		memcpy(bh[di]->b_data, bh[si]->b_data,
++		       inode->i_sb->s_blocksize);
++	    }
++	}
++    }
++    if (key.level > 0)
++	mark_buffer_dirty(key.ibh[key.level]);
++    return ext2_free_key(&key);
++}
++
++
++/*
++ *    Decompress one cluster.  If already compressed, the cluster
++ *      is decompressed in place, and the compress bitmap is updated.
++ *
++ *      Returns the size of decompressed data on success, a negative
++ *      value in case of failure, or 0 if the cluster was not compressed.
++ *
++ *      The inode is supposed to be writable.
++ *
++ *      Called by :
++ *
++ *        ext2_decompress_inode()      [sem down]
++ *        ext2_file_write()            [sem down]
++ *        trunc_bitmap()               [sem down]
++ */
++int ext2_decompress_cluster(struct inode *inode, u32 cluster)
++{
++    struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS];
++    struct buffer_head *bhc[EXT2_MAX_CLUSTER_BLOCKS];
++    struct page *pg[EXT2_MAX_CLUSTER_PAGES], *epg[EXT2_MAX_CLUSTER_PAGES];
++    int result, nbh;
++    unsigned npg, c_nblk;
++    struct ext2_cluster_head *head;
++    int i = 0;
++    unsigned free_ix, clu_block0, clu_nblocks;
++    int d_npg = -1;		/* number of decompressed page  */
++    unsigned long allpagesuptodate = 1;
++    struct buffer_head *bh_writeout[EXT2_MAX_CLUSTER_BLOCKS];
++    int bhn_writeout;
++#ifdef CONFIG_HIGHMEM
++    int kmapped = 0;
++#endif
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_BLOCKS; i++) {
++	bh_writeout[i] = NULL;
++	bhn_writeout = 0;
++    }
++
++    assert(atomic_read(&inode->i_mutex.count) <= 0);	/* i.e. mutex_lock */
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
++	epg[i] = NULL;
++
++    /*
++       Get blocks from cluster.
++       Assign to variables head, ubitmap, clu_block0, clu_nblocks.
++       Shuffle blkaddr[] array and write zero to holes.
++       Allocate new blocks.
++       Get the working area.
++       Decompress.
++       Copy to bh[]->b_data (marking buffers uptodate and dirty).
++       Release working area.
++       Release bh[].
++     */
++
++    nbh = 0;
++    npg = ext2_cluster_npages(inode, cluster);
++    result = ext2_get_cluster_pages(inode, cluster, pg, NULL, 0);
++    if (result <= 0) {
++	for (i = 0; i < npg; i++)
++	    epg[i] = NULL;
++	goto out_err;
++    }
++
++    for (i = 0; i < npg; i++) {
++	if ((pg[i]->index <= ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) &&
++	    !PageUptodate(pg[i])) {
++	    allpagesuptodate = 0;
++	}
++    }
++    if (allpagesuptodate) {
++	//printk("DecompressPages: Ino:%lu\n", inode->i_ino);
++	result = ext2_decompress_pages(inode, cluster, pg);
++	if (result != 0) {
++	    for (i = 0; i < npg; i++)
++		epg[i] = NULL;
++	    if (result > 0)
++		goto cleanup;
++	    else
++		goto out_err;
++	}
++	/*mw: if we continue here then in ext2_decompress_pages
++	 * not all pages were up-to-date 
++	 */
++    }
++    //printk("DecompressCluster: Ino:%lu\n", inode->i_ino);
++    result = ext2_get_cluster_extra_pages(inode, cluster, pg, epg);
++    if (result <= 0) {
++	goto out_err;
++    }
++#ifdef CONFIG_HIGHMEM
++    ext2_kmap_cluster_pages(NULL, pg, epg);
++    kmapped = 1;
++#endif
++
++    result = ext2_get_cluster_blocks(inode, cluster, bh, pg, epg, 0);
++    if (result <= 0) {
++	goto out_err;
++    }
++    nbh = c_nblk = result;
++
++
++#ifdef EXT2_COMPR_REPORT
++    {
++	int j;
++	printk
++	    (" > > > ext2_decompress_cluster %d: inode=%ld, size=%d nbh=%d\n",
++	     cluster, inode->i_ino, (int) inode->i_size, nbh);
++#ifdef EXT2_COMPR_REPORT_VERBOSE
++	for (j = 0; j < nbh; j++) {
++	    if (bh[j]) {
++		printk("0buffer_head[%d]: blocknr=%lu,  addr=%p \n", j,
++		       (unsigned long) bh[j]->b_blocknr, bh[j]);
++		if (bh[j]->b_page)
++		    printk("0:[page->index=%ld]\n", bh[j]->b_page->index);
++		else
++		    printk("[No page]\n");
++	    } else
++		printk("buffer_head[%d] is NULL\n", j);
++	}
++	while ((j < EXT2_MAX_CLUSTER_BLOCKS) && (bh[j] != NULL) && bh[j]->b_blocknr) {	/*Add by Yabo Ding */
++	    printk
++		("buffer_head[%d] is free but not NULL: blocknr=%lu, addr=%p\n",
++		 j, (unsigned long) bh[j]->b_blocknr, bh[j]);
++	    j++;
++	}
++#endif
++    }
++#endif
++    for (i = 0; i < nbh; i++)
++	assert(bh[i]->b_blocknr != 0);
++
++    restore_b_data_himem(bh[0]);
++
++    head = (struct ext2_cluster_head *) bh[0]->b_data;
++    if (head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X)) {
++	ext2_msg(inode->i_sb,
++		     "ext2_decompress_cluster: bad magic number",
++		     "cluster %d: inode = %lu, magic = %#04x",
++		     cluster, inode->i_ino, le16_to_cpu(head->magic));
++	EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL;
++	result = -EXT2_ECOMPR;
++	goto out_err;
++    }
++    if (le32_to_cpu(head->ulen) -
++	(c_nblk << inode->i_sb->s_blocksize_bits) <= 0) {
++	ext2_error(inode->i_sb, "ext2_decompress_cluster",
++		   "ulen too small for c_nblk.  ulen=%u, c_nblk=%u, bs=%lu",
++		   le32_to_cpu(head->ulen), c_nblk,
++		   inode->i_sb->s_blocksize);
++	EXT2_I(inode)->i_flags |= EXT2_ECOMPR_FL;
++	result = -EXT2_ECOMPR;
++	goto out_err;
++    }
++    free_ix =
++	ext2_calc_free_ix(head->holemap_nbytes, (u8 const *) (&head[1]),
++			  c_nblk);
++    clu_block0 = ext2_cluster_block0(inode, cluster);
++    clu_nblocks = ext2_cluster_nblocks(inode, cluster);
++    ext2_unpack_blkaddrs(inode, bh, 1,
++			 head->holemap_nbytes, (u8 const *) (&head[1]),
++			 c_nblk, free_ix, clu_block0, clu_nblocks);
++
++    /* Allocate the extra blocks needed. */
++    {
++	int data_left = le32_to_cpu(head->ulen);
++
++	data_left -= c_nblk << inode->i_sb->s_blocksize_bits;
++	assert(data_left > 0);
++	for (i = free_ix; i < clu_nblocks; i++)
++	    if (((i >> 3) >= head->holemap_nbytes)
++		|| !(head->holemap[i >> 3] & (1 << (i & 7)))) {
++		result = ext2_get_block(inode,
++					clu_block0 + i,
++					bh[i], 1 /* create */ );
++		if (bh[i]->b_blocknr == 0)
++		    goto out_err;
++		d_npg =
++		    (i >>
++		     (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)) +
++		    1;
++		nbh++;
++		data_left -= inode->i_sb->s_blocksize;
++		if (data_left <= 0)
++		    break;
++	    }
++    }
++
++    /* jmr 1998-10-28 Hope this is the last time I'm moving this code.
++     * Module loading must be done _before_ we lock wa, just think what
++     * can happen if we reallocate wa when somebody else uses it...
++     */
++    {
++	unsigned meth;
++#ifdef CONFIG_KMOD
++	unsigned alg;
++#endif
++
++	meth = head->method;	/* only a byte, so no swabbing needed. */
++	if (meth >= EXT2_N_METHODS) {
++	    ext2_msg(inode->i_sb,
++			 "Ass.: illegal method id",
++			 "inode = %lu, id = %u", inode->i_ino, meth);
++	    result = -EXT2_ECOMPR;
++	    goto out_err;
++	}
++#ifdef CONFIG_KMOD
++	alg = ext2_method_table[meth].alg;
++	if (!ext2_algorithm_table[alg].avail) {
++	    char str[32];
++
++	    sprintf(str, "ext2-compr-%s", ext2_algorithm_table[alg].name);
++	    request_module(str);
++	}
++#endif
++    }
++
++    result = -EINTR;
++
++    /*
++     *  Then, decompress and copy back data.
++     */
++    {
++	int ic;
++
++	for (ic = 0, i = 0; i < clu_nblocks; i++) {
++	    if (bh[i]->b_blocknr != 0) {
++		bhc[ic] = bh[i];
++		ic++;
++		if (ic == c_nblk) {
++		    break;
++		}
++	    }
++	}
++    }
++
++
++#ifdef EXT2_COMPR_REPORT_WA
++    printk(KERN_DEBUG "pid %d locks wa\n", current->pid);
++#endif
++    if (get_cpu_var(ext2_rd_wa) == NULL)
++    {
++	 ext2_alloc_rd_wa();
++    }
++    assert(__get_cpu_var(ext2_rd_wa) != NULL);
++
++    result = ext2_decompress_blocks(inode, bhc, c_nblk,
++				    le32_to_cpu(head->ulen), cluster);
++    if (result != (int) le32_to_cpu(head->ulen)) {
++	if (result >= 0) {
++	    /* I think this is impossible, as
++	       ext2_decompress_blocks() checks against
++	       head->ulen. */
++	    printk(KERN_WARNING "Unexpected return value %d "
++		   "from ext2_decompress_blocks()\n", result);
++	    result = -EXT2_ECOMPR;
++	}
++
++#ifdef EXT2_COMPR_REPORT_WA
++	printk(KERN_DEBUG "pid %d unlocks wa\n", current->pid);
++#endif
++	put_cpu_var(ext2_rd_wa);
++	goto out_err;
++    }
++
++#ifdef EXT2_COMPR_REPORT
++    printk(KERN_DEBUG "ext2: %04x:%lu: cluster %d+%d [%d] "
++	   "decompressed into %d bytes\n",
++	   inode->i_rdev,
++	   inode->i_ino, clu_block0, clu_nblocks, c_nblk, result);
++#endif
++
++    /* Copy back decompressed data. */
++    {
++	int count = result;
++	unsigned char const *src;
++	int c, p;
++	int cbh;
++	int n;			/* block index in page  */
++	struct buffer_head *bp;
++	unsigned addr0, b_start, b_end;
++
++	assert(count > 0);
++	if (d_npg == -1) {
++	    d_npg = ((count - 1) >> PAGE_CACHE_SHIFT) + 1;
++	}
++#ifdef  EXT2_COMPR_REPORT_CPR
++	trace_e2c
++	    ("ext2_decompress_cluster: cnt=%d free_ix=%d d_npg=%d nbh=%d\n",
++	     count, free_ix, d_npg, nbh);
++#endif
++	result = -EXT2_ECOMPR;
++	src =  __get_cpu_var(ext2_rd_wa)->u;
++	cbh = 0;
++	for (c = 0; c < clu_nblocks; c++) {
++
++	    if (bh[c]->b_blocknr == 0) {
++#ifdef  EXT2_COMPR_REPORT_CPR
++		trace_e2c("\t clear buf %d sts=0x%x\n", c,
++			  (int) bh[c]->b_state);
++#endif
++		restore_b_data_himem(bh[c]);
++		memset(bh[c]->b_data, 0, inode->i_sb->s_blocksize);
++		continue;
++	    }
++	    if (cbh >= (nbh - 1)) {
++		break;
++	    }
++	    if (count < inode->i_sb->s_blocksize) {
++		put_cpu_var(ext2_rd_wa);
++		goto out_err;
++	    }
++	    cbh++;
++	    count -= inode->i_sb->s_blocksize;
++	    p = c >> (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
++	    if (!PageUptodate(pg[p])) {
++		addr0 = (clu_block0 << inode->i_sb->s_blocksize_bits);
++		b_start = addr0 + (c << inode->i_sb->s_blocksize_bits);
++		b_end = b_start + inode->i_sb->s_blocksize;
++#ifdef  EXT2_COMPR_REPORT_CPR
++		trace_e2c("\t[%d] sts=0x%x e=%d s=%d sz=%d pg:%lu(%#x)\n",
++			  c, (int) bh[c]->b_state, b_end, b_start,
++			  (int) inode->i_size, pg[p]->index,
++			  (unsigned int) pg[p]);
++#endif
++		if (b_end <= inode->i_size) {
++		    /* Block is before end of file, copy data */
++		    restore_b_data_himem(bh[c]);
++		    memcpy(bh[c]->b_data, src, inode->i_sb->s_blocksize);
++
++		} else if (b_start < inode->i_size) {
++		    /* Block contains end of file, copy to end */
++		    restore_b_data_himem(bh[c]);
++		    memcpy(bh[c]->b_data, src, inode->i_size - b_start);
++
++		}
++		set_buffer_uptodate(bh[c]);
++		set_buffer_dirty(bh[c]);
++		bh_writeout[bhn_writeout] = bh[c];	//mw
++		bhn_writeout++;	//mw
++	    } else {
++		//mw: DEBUG. buffer is uptodate now. compress will not reread! an get the compressed data!!!
++		// clear flag in extra page!!!
++		// clear_bit(BH_Uptodate, &bh[c]->b_state);
++
++		n = c & ((PAGE_CACHE_SIZE - 1) >> inode->i_sb->
++			 s_blocksize_bits);
++		bp = page_buffers(pg[p]);
++		for (i = 0; i < n; i++) {
++		    bp = bp->b_this_page;
++		}
++		result = ext2_get_block(inode, clu_block0 + c, bp, 0);
++
++		//mw: needed to do a writeback of the non-epg-buffers
++		//no idea how it was done before
++		set_buffer_uptodate(bp);
++		set_buffer_dirty(bp);
++		bh_writeout[bhn_writeout] = bp;	//mw
++		bhn_writeout++;	//mw
++
++		if (bp->b_blocknr == 0) {
++			put_cpu_var(ext2_rd_wa);
++			goto out_err;
++		}
++		assert(bp->b_blocknr == bh[c]->b_blocknr);
++	    }
++	    src += inode->i_sb->s_blocksize;
++	}
++	if (count > inode->i_sb->s_blocksize) {
++		put_cpu_var(ext2_rd_wa);
++		goto out_err;
++	}
++	p = c >> (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
++	if (!PageUptodate(pg[p])) {
++	    addr0 = (clu_block0 << inode->i_sb->s_blocksize_bits);
++	    b_start = addr0 + (c << inode->i_sb->s_blocksize_bits);
++#ifdef  EXT2_COMPR_REPORT_CPR
++	    trace_e2c("\t[%d] sts=0x%x c=%d s=%d sz=%d pg:%lu(%#x)\n", c,
++		      (int) bh[c]->b_state, count, b_start,
++		      (int) inode->i_size, pg[p]->index,
++		      (unsigned int) pg[p]);
++#endif
++	    if (b_start >= inode->i_size) {
++		restore_b_data_himem(bh[c]);
++		memset(bh[c]->b_data, 0, inode->i_sb->s_blocksize);
++
++	    } else {
++		if ((inode->i_size - b_start) < count) {
++		    restore_b_data_himem(bh[c]);
++		    memcpy(bh[c]->b_data, src, inode->i_size - b_start);
++		    memset(bh[c]->b_data + (inode->i_size - b_start), 0,
++			   count - (inode->i_size - b_start));
++		} else {
++		    restore_b_data_himem(bh[c]);
++		    memcpy(bh[c]->b_data, src, count);
++		}
++	    }
++	    set_buffer_uptodate(bh[c]);
++	    set_buffer_dirty(bh[c]);
++	    bh_writeout[bhn_writeout] = bh[c];	//mw
++	    bhn_writeout++;	//mw
++	} else {
++	    assert(epg[p] != NULL);	//mw
++	    n = c & ((PAGE_CACHE_SIZE - 1) >> inode->i_sb->
++		     s_blocksize_bits);
++	    bp = page_buffers(pg[p]);
++	    for (i = 0; i < n; i++) {
++		bp = bp->b_this_page;
++	    }
++	    result = ext2_get_block(inode, clu_block0 + c, bp, 0);
++
++	    //mw: needed to do a writeback of the non-epg-buffers
++	    //no idea how it was done before
++	    set_buffer_uptodate(bp);
++	    set_buffer_dirty(bp);
++	    bh_writeout[bhn_writeout] = bp;	//mw
++	    bhn_writeout++;	//mw
++	    if (bp->b_blocknr == 0) {
++		put_cpu_var(ext2_rd_wa);
++		goto out_err;
++	    }
++	    assert(bp->b_blocknr == bh[c]->b_blocknr);
++	}
++	result = (nbh - 1) * inode->i_sb->s_blocksize + count;
++    }
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++	if (pg[i] == NULL)
++	    break;
++	if (i < d_npg)
++	    SetPageUptodate(pg[i]);
++    }
++
++#ifdef EXT2_COMPR_REPORT_WA
++    printk(KERN_DEBUG "pid %d unlocks wa\n", current->pid);
++#endif
++    put_cpu_var(ext2_rd_wa);
++
++    inode->i_ctime = CURRENT_TIME;
++    mark_inode_dirty_sync(inode);
++    /* If needed, EXT2_DIRTY_FL is raised by the caller. */
++
++#if 0
++    /* TODO: SYNC */
++    if (IS_SYNC(inode)) {
++	generic_osync_inode(inode, inode->i_mapping,
++			    OSYNC_METADATA | OSYNC_DATA);
++    }
++#endif
++    assert(result >= 0);
++
++    //Sync out changes:
++    assert(bhn_writeout <= EXT2_MAX_CLUSTER_BLOCKS);
++    assert(bhn_writeout >= 0);
++
++    //mw: debug
++    for (i = 0; i < bhn_writeout; i++) {
++	if ((!buffer_mapped(bh_writeout[i]))
++	    || (bh_writeout[i]->b_bdev == NULL)) {
++	    u32 block = ext2_cluster_block0(inode, cluster);
++	    ext2_get_block(inode, block + i, bh_writeout[i], 1);
++	    //printk("ext2_get_block Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh_writeout[i]->b_blocknr, (bh_writeout[i]->b_state & BH_Mapped), (bh_writeout[i]->b_page ? bh_writeout[i]->b_page->index : 0), bh_writeout[i]->b_bdev );
++	}
++	assert(buffer_mapped(bh_writeout[i]));
++	assert(bh_writeout[i]->b_bdev != NULL);
++	assert(bh_writeout[i]->b_bdev == inode->i_sb->s_bdev);
++	/*if (bh_writeout[i]->b_bdev == NULL)
++	   bh_writeout[i]->b_bdev = inode->i_sb->s_bdev; //fix bdev-bug */
++    }
++
++    ll_rw_block(WRITE, bhn_writeout, bh_writeout);
++    //mw: seems we have to wait here, otherwise: crash!
++
++    CHECK_NOT_ATOMIC
++    for (i = 0; i < bhn_writeout; i++) {
++	if (bh_writeout[i])
++	    wait_on_buffer(bh_writeout[i]);
++    }
++    goto cleanup;
++
++  out_err:
++    printk("Error in Decompressing cluster: Err=%i\n", result);
++
++  cleanup:
++
++#ifdef CONFIG_HIGHMEM
++    if (kmapped)
++	ext2_kunmap_cluster_pages(NULL, pg, epg);
++#endif
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++	if (pg[i] == NULL)
++	    break;
++	unlock_page(pg[i]);
++	page_cache_release(pg[i]);
++    }
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++	if (epg[i] != NULL) {
++	    ClearPageDirty(epg[i]);
++	    ClearPageUptodate(epg[i]);
++	    try_to_free_buffers(epg[i]);
++	    unlock_page(epg[i]);
++	    assert(page_count(epg[i]) == 1);
++	    page_cache_release(epg[i]);
++	}
++    }
++
++    /*
++     *  Release buffers, don't forget to unlock the locked ones.
++     *  pjm 1998-01-14: TO_DO: Locked ones?
++     */
++    assert(nbh >= 0);
++    assert(nbh <= EXT2_MAX_CLUSTER_BLOCKS);
++#ifdef EXT2_COMPR_REPORT
++    trace_e2c(" < < < ext2_decompress_cluster %d: inode=%ld, res=%i\n",
++	      cluster, inode->i_ino, result);
++#endif
++    return result;
++}
++
++
++/*
++ * Function to decompress the pages of a cluster.
++ *
++ *	Allocate buffers to pages what are not mapped on the device.
++ *
++ *      Returns the size of decompressed data on success, a negative
++ *      value in case of failure, or 0 if some pages are not uptodate.
++ *
++ *      The inode is supposed to be writable.
++ *	All the pages must be UPTODATE, 
++ */
++int ext2_decompress_pages(struct inode *inode, u32 cluster,
++			  struct page *pg[])
++{
++    struct ext2_cluster_head *head;
++    struct buffer_head *bh0;
++    struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS];
++    unsigned nbh, c_nblk;
++    unsigned free_ix, clu_block0, clu_nblocks;
++    int i, pagesPerCluster, data_left, size = 0;
++    long status = 0;
++    char *dp;
++    struct buffer_head *bh_writeout[EXT2_MAX_CLUSTER_BLOCKS];
++    int bhn_writeout;
++#ifdef CONFIG_HIGHMEM
++    int kmapped = 0;
++
++    ext2_kmap_cluster_pages(NULL, pg, NULL);
++    kmapped = 1;
++#endif
++
++    for (i = 0; i < EXT2_MAX_CLUSTER_BLOCKS; i++) {
++	bh_writeout[i] = NULL;
++	bhn_writeout = 0;
++    }
++
++    /* First, get cluster_head (For this, we need to re-read the first block of
++       the cluster, without overwriting the data of the page the buffer point to... */
++    /* This suppose that cluster are aligned with PAGE_SIZE... To be improved */
++
++    /* Changed by Yabo Ding<bobfree_cn@yahoo.com.cn>,<yding@wyse.com>
++       The old code cannot reread data from disk to a changed buffers data pointer in 2.6.x.
++       So, I copy memory data(decompressed) to a temporary buffer;
++       Then reread data(compressed) from disk, and copy to head;
++       Then copy back the memory data from temporary buffer.
++       It seems clumsy, but it works well.
++     */
++
++    bh0 = page_buffers(pg[0]);
++    restore_b_data_himem(bh0);
++
++    head = (struct ext2_cluster_head *) kmalloc(bh0->b_size, GFP_KERNEL);
++    if (head == NULL) {
++	ext2_msg(inode->i_sb, "no more memory", "inode = %lu",
++		     inode->i_ino);
++	status = -EIO;
++	goto out_x;
++    }
++    dp = kmalloc(bh0->b_size, GFP_KERNEL);
++    if (dp == NULL) {
++	ext2_msg(inode->i_sb, "no more memory", "inode = %lu",
++		     inode->i_ino);
++	kfree(head);
++	status = -EIO;
++	goto out_x;
++    }
++    memcpy(dp, bh0->b_data, bh0->b_size);
++    clear_bit(BH_Uptodate, &bh0->b_state);
++    if (!buffer_mapped(bh0)) {
++	status =
++	    ext2_get_block(inode, ext2_cluster_block0(inode, cluster), bh0,
++			   0);
++	if (bh0->b_blocknr == 0) {
++	    trace_e2c
++		("ext2_decompress_pages: ext2_get_block error %ld (cluster = %u)\n",
++		 status, cluster);
++	    kfree(head);
++	    memcpy(bh0->b_data, dp, bh0->b_size);
++	    kfree(dp);
++	    status = -EIO;
++	    goto out;
++	}
++    }
++    ll_rw_block(READ, 1, &bh0);
++
++    CHECK_NOT_ATOMIC
++    wait_on_buffer(bh0);
++    //printk("RE-Read: Buffer: blocknr:%lu(%#x) \n", bh0->b_blocknr, bh0);
++    if (!buffer_uptodate(bh0)) {	/* Read error ??? */
++	trace_e2c("ext2_decompress_pages: IO error (cluster = %u)\n",
++		  cluster);
++	kfree(head);
++	memcpy(bh0->b_data, dp, bh0->b_size);
++	kfree(dp);
++	status = -EIO;
++	goto out;
++    }
++    /* This suppose that cluster are aligned with PAGE_SIZE... To be improved 
++       bh0->b_data = page_address(pg[0]);                                    */
++    memcpy((char *) head, bh0->b_data, bh0->b_size);
++    memcpy(bh0->b_data, dp, bh0->b_size);
++    kfree(dp);
++
++    if (head->magic != cpu_to_le16(EXT2_COMPRESS_MAGIC_04X)) {
++	ext2_msg(inode->i_sb,
++		     "ext2_decompress_pages: bad magic number",
++		     "inode = %lu, magic = %#04x", inode->i_ino,
++		     le16_to_cpu(head->magic));
++	kfree(head);
++	status = -EIO;
++	goto out;
++    }
++#ifdef  EXT2_COMPR_REPORT
++    trace_e2c("ext2_decompress_pages: clt=%d i=%ld head=0x%x\n", cluster,
++	      inode->i_ino, (unsigned) head);
++#endif
++
++    /* Now, try to do the same as in ext2_decompress_cluster for moving/allocating blocks */
++    nbh = 0;
++    pagesPerCluster = ext2_cluster_npages(inode, cluster);
++    for (i = 0; i < pagesPerCluster && pg[i]; i++) {
++	assert(PageLocked(pg[i]));
++	//if (!(PageUptodate(pg[i]))) {                 
++	//mw: do it like ext2_decompress_cluster to handle end of a file correctly
++	if (!(PageUptodate(pg[i]))
++	    && (pg[i]->index <= ((inode->i_size - 1) >> PAGE_CACHE_SHIFT))) {
++	    kfree(head);
++	    printk("should never happen: not all pages uptodate!\n");	//mw
++	    status = 0;
++	    goto out_x;
++	}
++    }
++
++    for (i = 0; i < pagesPerCluster && pg[i]; i++) {
++	struct buffer_head *bhead, *bhx;
++	int idx = 0;
++
++	/* assert(PageUptodate(pg[i])); with ftruncate() can be false */
++	if (!page_has_buffers(pg[i])) {
++	    ClearPageUptodate(pg[i]);	/*mw */
++	    ClearPageDirty(pg[i]);	/*mw */
++	    assert(0);
++	    create_empty_buffers_e2c(pg[i], inode->i_sb->s_blocksize, 0,
++				     inode);
++	    if (unlikely(!page_has_buffers(pg[i])))
++		printk("Error: NOMEM!\n");
++	}
++	bhead = page_buffers(pg[i]);
++	for (bhx = bhead; bhx != bhead || !idx; bhx = bhx->b_this_page) {
++	    idx++;
++	    bh[nbh] = bhx;
++	    nbh++;
++	}
++    }
++
++    while ((nbh != 0) && (bh[nbh - 1]->b_blocknr == 0))
++	--nbh;
++
++    c_nblk = nbh;
++
++    free_ix =
++	ext2_calc_free_ix(head->holemap_nbytes, (u8 const *) (&head[1]),
++			  c_nblk);
++    clu_block0 = ext2_cluster_block0(inode, cluster);
++    clu_nblocks = ext2_cluster_nblocks(inode, cluster);
++    ext2_unpack_blkaddrs(inode, bh, 0, head->holemap_nbytes,
++			 (u8 const *) (&head[1]), c_nblk, free_ix,
++			 clu_block0, clu_nblocks);
++
++    /* Allocate the extra blocks needed. */
++    data_left = size = le32_to_cpu(head->ulen);
++
++    data_left -= c_nblk << inode->i_sb->s_blocksize_bits;
++    assert(data_left > 0);
++    for (i = 0; i < free_ix; i++) {
++	if (bh[i]->b_blocknr != 0) {
++#ifdef  EXT2_COMPR_REPORT_CPR
++	    trace_e2c("\t [%d] blk=%ld sts=0x%x\n", i, bh[i]->b_blocknr,
++		      (int) bh[i]->b_state);
++#endif
++	    set_buffer_dirty(bh[i]);
++	    bh_writeout[bhn_writeout] = bh[i];	//mw
++	    bhn_writeout++;	//mw
++	}
++    }
++
++    for (i = free_ix; i < clu_nblocks; i++) {
++	if (((i >> 3) >= head->holemap_nbytes)
++	    || !(head->holemap[i >> 3] & (1 << (i & 7)))) {
++	    status =
++		ext2_get_block(inode, clu_block0 + i, bh[i],
++			       1 /* create */ );
++	    if (status || bh[i]->b_blocknr == 0) {
++		status = -EIO;
++		goto out;
++	    }
++#ifdef  EXT2_COMPR_REPORT_CPR
++	    trace_e2c("\t [%d] blk=%ld sts=0x%x\n", i, bh[i]->b_blocknr,
++		      (int) bh[i]->b_state);
++#endif
++	    set_bit(BH_Uptodate, &bh[i]->b_state);
++	    set_buffer_dirty(bh[i]);
++	    bh_writeout[bhn_writeout] = bh[i];	//mw
++	    bhn_writeout++;	//mw
++	    nbh++;
++	    data_left -= inode->i_sb->s_blocksize;
++	    if (data_left <= 0)
++		break;
++	}
++    }
++
++  out:
++    kfree(head);
++
++  out_x:
++
++    for (i = 0; i < bhn_writeout; i++) {
++
++	if ((!buffer_mapped(bh_writeout[i]))
++	    || (bh_writeout[i]->b_bdev == NULL)) {
++	    u32 block = ext2_cluster_block0(inode, cluster);
++	    ext2_get_block(inode, block + i, bh_writeout[i], 1);
++	    //printk("ext2_get_block Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh_writeout[i]->b_blocknr, (bh_writeout[i]->b_state & BH_Mapped), (bh_writeout[i]->b_page ? bh_writeout[i]->b_page->index : 0), bh_writeout[i]->b_bdev );
++	}
++	assert(buffer_mapped(bh_writeout[i]));
++	assert(bh_writeout[i]->b_bdev != NULL);
++	assert(bh_writeout[i]->b_bdev == inode->i_sb->s_bdev);
++	/*if (bh_writeout[i]->b_bdev == NULL)
++	   bh_writeout[i]->b_bdev = inode->i_sb->s_bdev; //fix bdev-bug */
++    }
++    //Sync out changes:
++    ll_rw_block(WRITE, bhn_writeout, bh_writeout);
++    //mw: seems we have to wait here, otherwise: crash!
++
++    CHECK_NOT_ATOMIC
++    for (i = 0; i < bhn_writeout; i++) {
++	if (bh_writeout[i])
++	    wait_on_buffer(bh_writeout[i]);
++    }
++
++
++#ifdef CONFIG_HIGHMEM
++    if (kmapped)
++	ext2_kunmap_cluster_pages(NULL, pg, NULL);
++#endif
++
++    return (status ? status : size);
++}
++
++
++/* Decompress every cluster that is still compressed.
++   We stop and return -ENOSPC if we run out of space on device.
++
++   The caller needs to check for EXT2_COMPRBLK_FL before calling.
++
++   Returns 0 on success, -errno on failure.
++
++   Called by ext2_ioctl(). */
++int ext2_decompress_inode(struct inode *inode)
++{
++    u32 cluster;
++    u32 n_clusters;
++    int err = 0;
++    struct ext2_inode_info *ei = EXT2_I(inode);
++
++    assert(ei->i_flags & EXT2_COMPRBLK_FL);
++
++    /* Quotas aren't otherwise kept if file is opened O_RDONLY. */
++    dquot_initialize(inode);
++    
++    //mutex_lock(&inode->i_mutex); /* MW 5-16-07 */
++    assert(atomic_read(&inode->i_mutex.count) <= 0);	/* i.e. mutex_lock */
++    err = 0;
++    /* This test can succeed because down() (and I think DQUOT_INIT) can block. */
++    if (!(ei->i_flags & EXT2_COMPRBLK_FL))
++	goto out;
++
++    n_clusters = ext2_n_clusters(inode);
++    for (cluster = 0; cluster < n_clusters; cluster++) {
++	err = ext2_cluster_is_compressed_fn(inode, cluster);
++	if (err > 0) {
++	    err = ext2_decompress_cluster(inode, cluster);
++	    /* If we later get an error, we'll need to recompress. */
++	    ei->i_flags |= EXT2_DIRTY_FL;
++	    ei->i_compr_flags |= EXT2_CLEANUP_FL;
++	}
++	if (err < 0)
++	    goto error;
++    }
++    assert(err >= 0);
++    err = 0;
++    ei->i_flags &= ~(EXT2_COMPRBLK_FL | EXT2_DIRTY_FL);
++    ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
++  error:
++    inode->i_ctime = CURRENT_TIME;
++    mark_inode_dirty_sync(inode);
++  out:
++//      mutex_unlock(&inode->i_mutex); /* MW 5-16-07 */
++    return err;
++}
++
++
++/*
++   TODO: SECRM_FL
++
++   TODO: Avant de liberer les blocs, regarder si le compteur
++   est a 1, et marquer le noeud si ce n'est pas le cas
++   (pour preparer la recompression immediate).        
++
++   pjm fixme translation.
++   "Before freeing the blocks, check if the counter is 1, 
++   and mark the inode if not (in order to prepare for
++   immediate recompression)." */
++
++/* This is called by ext2_compress_cluster to free the blocks now
++   available due to compression.  We free ,nb` blocks beginning with
++   block ,block`.  We set the address of each freed block to
++   EXT2_COMPRESSED_BLKADDR, thus marking the cluster as compressed.
++   N.B. It is up to the caller to adjust i_blocks. */
++
++/* TODO: ext2_truncate() is much more careful than this routine.
++   (E.g. it checks for bh->b_count > 1, and checks for things changing
++   underneath it.  It also calls bforget instead of brelse if it's
++   going to free it.)  Why?  Maybe we should copy it. */
++
++/* effic: Reduce the number of calls to ext2_free_block() the way
++   ext2_trunc_direct() does. */
++
++/* fixme: I think tht we do indeed need to check if buffers are held by
++   somebody else before freeing them. */
++static int ext2_free_cluster_blocks(struct inode *inode, u32 block,
++				    unsigned nb)
++{
++    u32 tmp;
++    struct ext2_bkey key;
++    int err;
++
++/*
++ * whitpa 04 Oct 2004: although it may be true that using e2compr in
++ * conjunction with quotas is a Bad Idea, having quotas enabled for other
++ * filesystems doesn't necessarily mean that the quota feature will actually be
++ * used in this one, so many people find the following assertion very annoying.
++ * I have therefore disabled it.
++ */
++/*	assert (!inode->i_sb->dq_op || (inode->i_flags & S_QUOTA)); */
++    if (!nb)
++	return 0;
++    if (nb > EXT2_MAX_CLU_NBLOCKS) {
++	assert((int) nb >= 0);
++	assert(nb <= EXT2_MAX_CLU_NBLOCKS);
++	return -EDOM;
++    }
++    assert(((block + nb) & 3) == 0);
++    if (!ext2_get_key(&key, inode, block))
++	return -EIO;
++
++    while (nb-- > 0) {
++	tmp = ext2_get_key_blkaddr(&key);
++	err = ext2_set_key_blkaddr(&key, EXT2_COMPRESSED_BLKADDR);
++	if (err)
++	    goto out;
++	if (tmp != 0) {
++	    assert(tmp != EXT2_COMPRESSED_BLKADDR);
++#ifdef EXT2_COMPR_REPORT_ALLOC
++	    printk(KERN_DEBUG "ext2: free %d = (%d) %d:%d:%d:%d : %d\n",
++		   key.block,
++		   key.level,
++		   key.off[0], key.off[1], key.off[2], key.off[3], tmp);
++#endif
++	    ext2_free_blocks(inode, tmp, 1);
++	}
++	if (!ext2_next_key(&key, 1))
++	    break;
++    }
++    err = 0;
++  out:
++    ext2_free_key(&key);
++    return err;
++}
++
++#ifdef EXT2_COMPR_DEBUG
++static unsigned count_bits(unsigned char *p, unsigned nb)
++{
++    u32 x = le32_to_cpu(*(u32 *) p);
++    unsigned n = 0;
++
++    assert(nb <= 4);
++    if (nb != 4)
++	x &= (1 << (nb * 8)) - 1;
++    while (x) {
++	x &= (x - 1);
++	n++;
++    }
++    return n;
++}
++#endif
++
++/*
++ * __remove_compr_assoc_queue is used in invalidate_inode_buffers
++ * replacement code for ext2_compress_cluster(). TLL 02/21/07
++ * Yeah, it is duplicate code, but using it does not require
++ * patching fs/buffer.c/__remove_assoc_queue to export it.
++ * The buffer's backing address_space's private_lock must be held.
++ */
++/*static inline void __remove_compr_assoc_queue(struct buffer_head *bh)
++{
++	list_del_init(&bh->b_assoc_buffers);
++}*/
++
++/* Compress one cluster.  If the cluster uses fewer blocks once
++   compressed, it is stored in place of the original data.  Unused
++   blocks are freed, and the cluster is marked as compressed.
++
++   Returns a negative value on error,
++   0 if the cluster does not compress well,
++   positive if it is compressed (whether it was already compressed
++   or whether we compressed it).
++ 
++   Assume inode is writable.
++ 
++   Called by :
++ 
++         ext2_cleanup_compressed_inode () [i_sem] 
++
++   If ever we acquire new callers, make sure that quotas are
++   initialised, and COMPRBLK is handled correctly (i.e. such
++   that ioctl() can't change the cluster size on us), and that caller
++   tests for ext2_wa==NULL.
++*/
++
++int ext2_compress_cluster(struct inode *inode, u32 cluster)
++{
++    struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS + 1];
++    struct page *pg[EXT2_MAX_CLUSTER_PAGES];
++    int s_nblk;			/* Equals clu_nblocks less any trailing hole blocks. */
++    unsigned u_nblk = (~(unsigned) 0), c_nblk;	/* Number of blocks occupied by
++						   un/compressed data. */
++    int result, n, x;
++    int ulen, maxlen = 0, clen = 0;
++    unsigned char *dst;
++    u8 *src;
++    unsigned meth, alg;
++    int nbh = 0, npg, i;
++    unsigned char holemap_nbytes = 0;
++    unsigned last_hole_pos;
++    struct ext2_cluster_head *head;
++    unsigned r_nblk;
++    struct ext2_inode_info *ei = EXT2_I(inode);
++    unsigned long saved_isize;
++    //int dotrunc = 1; //mw
++
++#ifdef CONFIG_HIGHMEM
++    int kmapped = 0;
++#endif
++
++    /* impl: Otherwise, ioctl() could change the cluster size
++       beneath us. */
++    /* TLL say not compressed and return -1 6-15-07 */
++    if (!(ei->i_flags & EXT2_COMPRBLK_FL))
++	return -1;
++
++    //mw
++    saved_isize = inode->i_size;
++
++    assert(atomic_read(&inode->i_mutex.count) <= 0);	/* i.e. mutex_lock */
++    assert(!mapping_mapped(inode->i_mapping));
++
++    npg = ext2_cluster_npages(inode, cluster);
++
++    result = ext2_get_cluster_pages(inode, cluster, pg, NULL, 1);
++    if (result <= 0)
++	goto done;
++
++#ifdef CONFIG_HIGHMEM
++    ext2_kmap_cluster_pages(NULL, pg, NULL);
++    kmapped = 1;
++#endif
++
++    /* effic: We ought to use the page cache.  Using the page
++       cache always costs extra CPU time, but saves I/O if the
++       page is present.  We still need to detect holes, which
++       unfortunately may still cause I/O.  Testing for all-zero
++       could save us that I/O. */
++
++    nbh = ext2_get_cluster_blocks(inode, cluster, bh, pg, NULL, 1);
++
++    s_nblk = nbh;
++
++#ifdef EXT2_COMPR_REPORT
++    {
++	int i;
++	trace_e2c(" > > > ext2_compress_cluster %d: inode=%ld, size=%d\n",
++		  cluster, inode->i_ino, (int) inode->i_size);
++#ifdef EXT2_COMPR_REPORT_CPR
++	for (i = 0; i < s_nblk; i++) {
++	    if (bh[i]) {
++		printk(KERN_DEBUG
++		       "bbuffer_head[%d]: blocknr=%lu, addr=0x%p ", i,
++		       (unsigned long) bh[i]->b_blocknr, bh[i]);
++		if (bh[i]->b_page)
++		    printk(KERN_DEBUG "bgn:[page->index=%ld]\n",
++			   bh[i]->b_page->index);
++		else
++		    printk(KERN_DEBUG "[No page]\n");
++	    } else
++		printk("bbuffer_head[%d] is NULL\n", i);
++	}
++#endif
++    }
++#endif
++    /*
++     *  Did somebody else compress the cluster while we were waiting ?
++     *  This should never arise ...
++     */
++    result = ext2_cluster_is_compressed_fn(inode, cluster);
++    if (result != 0) {
++	if (result > 0) {
++	    ext2_msg(inode->i_sb,
++			 "ext2_compress_cluster",
++			 "compressing compressed cluster");
++	}
++	goto done;
++    }
++
++    /* I moved it here in case we need to load a module that
++     * needs more heap that is currently allocated.
++     * In such case "init_module" for that algorithm forces
++     * re-allocation of ext2_wa. It should be safe here b/c the
++     * first reference to ext2_wa comes just after and we have 
++     * locked ext2_wa before.
++     *
++     * I know that we may not need the compression at all
++     * (compressing 0 or 1 block) but it's better to sacrifice
++     * a bit than do make a total mess of this code.
++     *
++     * FIXME: Totally separate working areas for reading and writing.
++     *      Jan R.
++     */
++
++    meth = ei->i_compr_method;
++    assert(meth < EXT2_N_METHODS);
++    alg = ext2_method_table[meth].alg;
++#ifdef CONFIG_KMOD
++    if (!ext2_algorithm_table[alg].avail) {
++	char str[32];
++
++	sprintf(str, "ext2-compr-%s", ext2_algorithm_table[alg].name);
++	request_module(str);
++    }
++#endif
++
++    result = -EINTR;
++
++    /*
++     *  Try to get the working area.
++     */
++#ifdef EXT2_COMPR_REPORT_WA
++    printk(KERN_DEBUG "pid %d enters critical region\n", current->pid);
++#endif
++    if (get_cpu_var(ext2_wr_wa) == NULL)
++    {
++	ext2_alloc_wr_wa();
++    }
++    assert(__get_cpu_var(ext2_wr_wa) != NULL);	
++
++
++    /*
++     * Now, we try to compress the cluster.  If the cluster does
++     *    not compress well, we just give up.  Otherwise, we reuse
++     *    the old blocks to store the compressed data (except that
++     *    compressed data is contiguous in the file even if the
++     *    uncompressed data had holes).
++     */
++
++    /*
++     *  Compute the block bitmap, how many bytes of data we have
++     *    in the cluster, and the maximum interesting length after
++     *    compression.  The bitmap will be used to reallocate blocks
++     *    when decompressing the cluster, so that we don't create blocks
++     *    that were previously missing.  We also pack the buffers
++     *    together.
++     */
++
++     head = (struct ext2_cluster_head *) __get_cpu_var(ext2_wr_wa)->c;
++#if EXT2_MAX_CLUSTER_BLOCKS > 32
++# error "We need to zero more bits than this."
++#endif
++    *(u32 *) (&head[1]) = 0;
++    last_hole_pos = (unsigned) (-1);
++    assert(head->holemap[0] == 0);
++    assert(head->holemap[1] == 0);
++    assert(head->holemap[2] == 0);
++    assert(head->holemap[3] == 0);
++    assert(*(u32 *) head->holemap == 0);
++    assert(count_bits(head->holemap, 4) == 0);
++
++    /* TODO: Check that i_size can't change beneath us.
++       do_truncate() is safe because it uses i_sem around changing
++       i_size.  For the moment, I do a runtime check. */
++
++    saved_isize = inode->i_size;
++
++#ifdef EXT2_COMPR_REPORT_VERBOSE
++    printk
++	("00 ext2_compress_cluster[%u]: i_size=%u, s_blocksize_bits=%u, s_nblk=%u\n",
++	 __LINE__, (unsigned) inode->i_size, inode->i_sb->s_blocksize_bits,
++	 s_nblk);
++#endif
++//      assert (ROUNDUP_RSHIFT(inode->i_size, inode->i_sb->s_blocksize_bits)
++//              >= s_nblk);
++    /* This initial guess at ulen doesn't take holes into account
++       unless they're at end of cluster.  We ,compensate for other
++       holes` during the loop below. */
++    ulen = MIN(s_nblk << inode->i_sb->s_blocksize_bits,
++	       inode->i_size - ext2_cluster_offset(inode, cluster));
++    r_nblk = (((ulen - 1) >> inode->i_sb->s_blocksize_bits) + 1);
++    if (r_nblk <= 1) {
++	/* MW: required to remove Z flag, otherwise compress 
++	 * is tried on each access */
++	result = 0;
++	goto no_compress;
++    }
++    /* Verify if more than 1 block to compress in the cluster       */
++    nbh = 0;
++    for (x = 0; x < s_nblk; x++) {
++	if ((bh[x] != NULL) && (bh[x]->b_blocknr != 0)) {
++	    nbh++;
++	} else {
++	    last_hole_pos = x;
++	    head->holemap[x >> 3] |= 1 << (x & 7);
++	    ulen -= inode->i_sb->s_blocksize;
++	    /* impl: We know that it's a whole block because
++	       ext2_get_cluster_blocks trims s_nblk for trailing
++	       NULL blocks, and partial blocks only come at
++	       the end, so there can't be partial NULL blocks. */
++	}
++    }
++    /* We don't try to compress cluster that only have one block
++       or no block at all.  (When fragments are implemented, this code
++       should be changed.) */
++    if (nbh <= 1) {
++	/* MW: required to remove Z flag, otherwise compress 
++	 * is tried on each access */
++	goto no_compress;
++    }
++
++    u_nblk = nbh;
++    /* Copy the data in the compression area        */
++    dst =  __get_cpu_var(ext2_wr_wa)->u;
++    for (x = 0; x < s_nblk; x++) {
++	if ((bh[x] != NULL) && (bh[x]->b_blocknr != 0)) {
++	    restore_b_data_himem(bh[x]);
++	    memcpy(dst, bh[x]->b_data, bh[x]->b_size);
++	    dst += bh[x]->b_size;
++	}
++    }
++
++    assert(count_bits(head->holemap, 4) == s_nblk - u_nblk);
++
++#if EXT2_GRAIN_SIZE != EXT2_MIN_BLOCK_SIZE
++# error "this code ought to be changed"
++#endif
++
++    /* ,maxlen` is the maximum length that the compressed data can
++       be while still taking up fewer blocks on disk. */
++    holemap_nbytes = (last_hole_pos >> 3) + 1;
++    /* impl: Remember that ,last_hole_pos` starts off as being -1,
++       so the high 3 bits of ,last_hole_pos >> 3` can be wrong.
++       This doesn't matter if holemap_nbytes discards the high
++       bits. */
++
++    assert(sizeof(holemap_nbytes) < sizeof(unsigned));
++    assert((last_hole_pos == (unsigned) -1)
++	   == (holemap_nbytes == 0));
++    maxlen =
++	((((r_nblk <
++	    u_nblk) ? r_nblk : u_nblk) - 1) * inode->i_sb->s_blocksize -
++	 sizeof(struct ext2_cluster_head)
++	 - holemap_nbytes);
++    clen = 0;
++    /* Handling of EXT2_AUTO_METH at the moment is just that we
++       use the kernel default algorithm.  I hope that in future
++       this can be extended to the kernel deciding when to
++       compress and what algorithm to use, based on available disk
++       space, CPU time, algorithms currently used by the fs,
++       etc. */
++    if ((meth == EXT2_AUTO_METH)
++	|| !ext2_algorithm_table[alg].avail) {
++	meth = EXT2_DEFAULT_COMPR_METHOD;
++	alg = ext2_method_table[meth].alg;
++	assert(ext2_algorithm_table[alg].avail);
++    }
++    if (alg == EXT2_NONE_ALG)
++	goto no_compress;
++
++    clen = ext2_algorithm_table[alg].compress(__get_cpu_var(ext2_wr_wa)->u,
++	 __get_cpu_var(ext2_wr_wa)->c + sizeof(struct ext2_cluster_head) + holemap_nbytes,
++	 __get_cpu_var(ext2_wr_wa)->heap, ulen, maxlen, ext2_method_table[meth].xarg);
++
++#ifdef EXT2_COMPR_REPORT_ALGORITHMS
++    printk(KERN_DEBUG "03 ext2: %lu: cluster %d+%d [%d] compressed "
++	   "into %d bytes (ulen = %d, maxlen = %d)\n",
++	   inode->i_ino,
++	   ext2_cluster_offset(inode, cluster),
++	   ext2_cluster_nblocks(inode, cluster),
++	   u_nblk, clen, ulen, maxlen);
++#endif
++
++    if ((clen == 0) || (clen > maxlen)) {
++      no_compress:
++
++	/* this chunk didn't compress. */
++	assert(inode->i_size == saved_isize);
++#ifdef EXT2_COMPR_REPORT_WA
++	printk(KERN_DEBUG
++	       "pid %d leaves critical region, nbh=%d, u_nblk=%d, "
++	       "inode->i_size=%lu, saved_isize=%lu, clen=%d, ulen=%d, maxlen=%d\n",
++	       current->pid, nbh, u_nblk,
++	       (long unsigned) inode->i_size, saved_isize, clen, ulen,
++	       maxlen);
++#endif
++
++	result = 0;
++	put_cpu_var(ext2_wr_wa);
++	goto done;
++    }
++
++
++#if EXT2_MAX_CLUSTER_BLOCKS > 32
++# error "We need to zero more bits than this."
++#endif
++    assert(-1 <= (int) last_hole_pos);
++    assert((int) last_hole_pos < 32);
++    assert((le32_to_cpu(*(u32 *) head->holemap)
++	    & (~0u << (1 + last_hole_pos))
++	    & (~(~0u << (8 * holemap_nbytes))))
++	   == 0);
++    /* Don't change "~0u << (1 + last_hole_pos)" to "~1u << last_hole_pos" 
++       as I almost did, as last_hole_pos can be -1 and cannot be 32. */
++    assert(count_bits(head->holemap, holemap_nbytes) == s_nblk - u_nblk);
++
++    /* Compress the blocks at the beginning of the cluster  */
++    for (x = 0, nbh = 0; x < s_nblk; x++) {
++	if ((bh[x] != NULL) && (bh[x]->b_blocknr != 0)) {
++	    if (nbh != x) {
++		restore_b_data_himem(bh[x]);
++		bh[nbh]->b_blocknr = bh[x]->b_blocknr;
++		set_bit(BH_Mapped, &bh[nbh]->b_state);
++		bh[x]->b_blocknr = 0;
++		assert(buffer_mapped(bh[x]));
++		clear_bit(BH_Mapped, &bh[x]->b_state);
++	    }
++	    nbh++;
++	}
++    }
++    assert(nbh == u_nblk);
++    assert(count_bits(head->holemap, holemap_nbytes) == s_nblk - u_nblk);
++
++    /*
++     * Compression was successful, so add the header and copy to blocks.
++     */
++
++    /* Header. */
++    {
++	head->magic = cpu_to_le16(EXT2_COMPRESS_MAGIC_04X);
++	head->method = meth;
++	head->holemap_nbytes = holemap_nbytes;
++	head->ulen = cpu_to_le32(ulen);
++	head->clen = cpu_to_le32(clen);
++
++	barrier(); //mw: "barrier" tells compiler not to re-order resulting asm statments, somehow.
++	head->checksum =
++	    cpu_to_le32(ext2_adler32
++			(le32_to_cpu(*(u32 *) __get_cpu_var(ext2_wr_wa)->c),
++			 __get_cpu_var(ext2_wr_wa)->c + 8,
++			 (sizeof(struct ext2_cluster_head) - 8 +
++			  head->holemap_nbytes + clen)));
++    }
++
++    assert((le32_to_cpu(*(u32 *) head->holemap)
++	    & (~0 << (1 + last_hole_pos))
++	    & ((1 << (8 * holemap_nbytes)) - 1)) == 0);
++    result = clen += sizeof(struct ext2_cluster_head) + holemap_nbytes;
++    c_nblk = ROUNDUP_RSHIFT(clen, inode->i_sb->s_blocksize_bits);
++
++    /* Release unneeded buffer heads.  (Freeing is done later,
++       after unlocking ext2_wr_wa.) */
++    assert(nbh == u_nblk);
++    nbh = c_nblk;
++
++#ifdef  EXT2_COMPR_REPORT
++    trace_e2c("ext2_compress_cluster: head->clen=%d, clen=%d\n", head->clen, clen);
++#endif
++    src = __get_cpu_var(ext2_wr_wa)->c;
++
++    for (n = 0; (int) clen > 0; n++) {
++	restore_b_data_himem(bh[n]);
++	if (clen >= inode->i_sb->s_blocksize) {
++	    memcpy(bh[n]->b_data, src, inode->i_sb->s_blocksize);
++	} else {
++	    memcpy(bh[n]->b_data, src, clen);
++	}
++
++	/* TO_DO: OSYNC.  means: write opertions are blocking until the
++	 * the pages are written from page cache to disk */
++
++	set_buffer_uptodate(bh[n]);
++	set_buffer_dirty(bh[n]);
++	src += inode->i_sb->s_blocksize;
++	clen -= inode->i_sb->s_blocksize;
++    }
++
++    i = 0;
++    assert(n == c_nblk);
++    assert((le32_to_cpu(*(u32 *) head->holemap)
++	    & (~0 << (1 + last_hole_pos))
++	    & ((1 << (8 * holemap_nbytes)) - 1)) == 0);
++
++    /* Runtime check that no-one can change i_size while i_sem is down.
++       (See where saved_isize is set, above.) */
++    assert(inode->i_size == saved_isize);
++    assert(!mapping_mapped(inode->i_mapping));
++
++    /* Free the remaining blocks, and shuffle used blocks to start
++       of cluster in blkaddr array. */
++    {
++	u32 free_ix, curr;
++	int err;
++
++	/* Calculate free_ix.  There should be ,c_nblk`
++	   non-hole blocks among the first ,free_ix`
++	   blocks. */
++	{
++	    assert((le32_to_cpu(*(u32 *) head->holemap)
++		    & (~0 << (1 + last_hole_pos))
++		    & ((1 << (8 * holemap_nbytes)) - 1)) == 0);
++	    assert(n == c_nblk);
++	    for (free_ix = 0;
++		 ((int) free_ix <= (int) last_hole_pos) && (n > 0);
++		 free_ix++)
++		if (!(head->holemap[free_ix >> 3]
++		      & (1 << (free_ix & 7))))
++		    n--;
++	    free_ix += n;
++
++	    if ((free_ix < c_nblk)
++		|| (free_ix + u_nblk > s_nblk + c_nblk)
++		|| (free_ix >= ext2_cluster_nblocks(inode, cluster))
++		|| ((holemap_nbytes == 0) && (c_nblk != free_ix))) {
++		assert(free_ix >= c_nblk);
++		/*assert (free_ix - c_nblk <= s_nblk - u_nblk); */
++		assert(free_ix + u_nblk <= s_nblk + c_nblk);
++		assert(free_ix < ext2_cluster_nblocks(inode, cluster));
++		assert((holemap_nbytes != 0) || (c_nblk == free_ix));
++		assert(1 <= c_nblk);
++		assert(c_nblk < u_nblk);
++		assert(u_nblk <= s_nblk);
++		assert(s_nblk <= ext2_cluster_nblocks(inode, cluster));
++		assert(ext2_cluster_nblocks(inode, cluster) <=
++		       EXT2_MAX_CLU_NBLOCKS);
++		ext2_error(inode->i_sb, "ext2_compress_cluster",
++			   "re assertions: c=%d, u=%d, f=%d, s=%d, n=%d, "
++			   "lhp=%d, hm=%x, hnb=%d, " "ino=%lu, clu=%u",
++			   (int) c_nblk, (int) u_nblk, (int) free_ix,
++			   (int) s_nblk, (int) ext2_cluster_nblocks(inode,
++								    cluster),
++			   (int) last_hole_pos,
++			   (unsigned) le32_to_cpu(*(u32 *) head->holemap),
++			   (int) holemap_nbytes, inode->i_ino, cluster);
++	    }
++	}
++        
++	/*mw: put here: set all __get_cpu related pointers to NULL
++	      as they become invalid with put_cpu */
++    	head = NULL;		/* prevent any more stupid bugs */
++    	src = NULL;
++        dst = NULL;
++    	put_cpu_var(ext2_wr_wa);
++
++#ifdef EXT2_COMPR_DEBUG
++	/* TODO: remove this TEST */
++        /* mw: ext2_free_cluster_blocks can sleep: check we are not atomic */
++	schedule();
++#endif
++
++	/* Free unneeded blocks, and mark cluster as
++	   compressed. */
++	err = ext2_free_cluster_blocks
++	    (inode,
++	     ext2_cluster_block0(inode, cluster) + free_ix,
++	     ext2_cluster_nblocks(inode, cluster) - free_ix);
++	/* pjm 1998-06-15: This should help reduce fragmentation.
++	   Actually, we could set block to clu_block0 + clu_nbytes,
++	   and goal to the last allocated blkaddr in the compressed
++	   cluster.
++	   It would be nice if we would transfer the freed blocks
++	   to preallocation, while we're at it. */
++//              write_lock(&ei->i_meta_lock);
++	/* mw: i_next_alloc_goal and i_next_alloc_block were removed in 2.6.24.x
++	 *     so we dont need to set them to 0 (they are anyway, somehow).
++	 */
++	//ei->i_next_alloc_goal = ei->i_next_alloc_block = 0;
++//              write_unlock(&ei->i_meta_lock);
++	if (err < 0) {
++		goto done;
++	}
++	/* Note that ext2_free_cluster_blocks() marks the
++	   cluster as compressed. */
++
++	/* Shuffle used blocks to beginning of block-number array. */
++	{
++	    struct ext2_bkey key;
++	    unsigned i;
++
++	    if (!ext2_get_key(&key,
++			      inode,
++			      ext2_cluster_block0(inode, cluster))) {
++		ei->i_flags |= EXT2_ECOMPR_FL;
++		result = -EIO;
++		free_ix = 0;
++	    }
++	    for (i = 0; i < free_ix; i++) {
++		curr = ext2_get_key_blkaddr(&key);
++
++		if ((c_nblk == free_ix)
++		    && (curr != bh[i]->b_blocknr)) {
++		    /* "Can't happen", yet has
++		       happened a couple of times. */
++		    ext2_error(inode->i_sb, "ext2_compress_cluster",
++			       "c_nblk=free_ix=%d, "
++			       "curr=%u, b_blocknr=%lu, "
++			       "lhp=%d , hm=<noinfo>, "
++			       "ino=%lu, blk=%u",
++			       c_nblk, curr,
++			       (unsigned long) bh[i]->b_blocknr,
++			       (int) last_hole_pos,
++			       /*mw: became invalid due put_cpu:
++				(unsigned) le32_to_cpu(*(u32 *) head->
++						      holemap),*/
++			       inode->i_ino,
++			       (unsigned) 
++			       ext2_cluster_block0(inode, cluster) + i);
++		}
++		err = ext2_set_key_blkaddr(&key,
++					   (i < c_nblk
++					    ? bh[i]->b_blocknr
++					    : EXT2_COMPRESSED_BLKADDR));
++		if (err)
++		    break;
++		if (!ext2_next_key(&key, 1)) {
++		    ei->i_flags |= EXT2_ECOMPR_FL;	/* sorry... */
++		    result = -EIO;
++		    break;
++		}
++	    }
++	    ext2_free_key(&key);
++	}
++    }
++
++    /*
++     *        Unlock the working area.
++     */
++
++#ifdef EXT2_COMPR_REPORT_WA
++    printk(KERN_DEBUG "pid %d leaves critical region\n", current->pid);
++#endif
++
++    assert(c_nblk < u_nblk);
++    ext2_mark_algorithm_use(inode, alg);
++
++    /* TLL update b_assoc_map per 2.6.20 6-07-07 */
++    for (i = 0; i < c_nblk; i++)
++	if (bh[i] != NULL) {
++	    bh[i]->b_assoc_map = inode->i_mapping;
++	    bh[i]->b_page->mapping = inode->i_mapping;	//Andreas 5-24-07 : necessary? WRONG?
++	}
++    //mw: we must force the writeback, otherwise ext2_readpage will get confused
++    //    yaboo ding had similiar code above. but I think it makes more sense after
++    //    the block shuffeling.
++    //    Note: generic_oysnc_inode() made trouble with USB-Sticks and caused a lot
++    //    of IO, stalled system ... therefore ll_rw_block() replace it. Anyway we already operate 
++    //        with this low-level function. 
++
++    /*mw: new "hole" fix. hole == bdev bug! */
++    for (i = 0; i < c_nblk; i++) {
++
++	/* this was a hole (uncompressed)
++	 * at the beginning of the cluster.
++	 * so NO block was yet associated with it.
++	 * But now we need it, because a compressed
++	 * cluster always starts at the cluster.*/
++	if (!buffer_mapped(bh[i]) || bh[i]->b_bdev == NULL) {
++	    u32 block = ext2_cluster_block0(inode, cluster);
++	    ext2_get_block(inode, block + i, bh[i], 1);
++	    //printk("ext2_get_block Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh[i]->b_blocknr, (bh[i]->b_state & BH_Mapped), (bh[i]->b_page ? bh[i]->b_page->index : 0), bh[i]->b_bdev );
++	}
++	assert(buffer_mapped(bh[i]));
++	assert(bh[i]->b_bdev != NULL);
++	assert(bh[i]->b_bdev == inode->i_sb->s_bdev);
++    }
++
++    ll_rw_block(WRITE, c_nblk, bh);
++
++    CHECK_NOT_ATOMIC
++    //mw: seems we have to wait here, otherwise: crash!
++    for (i = 0; i < c_nblk; i++) {
++	if (bh[i])
++	    wait_on_buffer(bh[i]);
++	//printk("written compressed block: Block:%lu, Mapped:%i, Page:%lu, bdev: %#x\n", bh[i]->b_blocknr, (bh[i]->b_state & BH_Mapped), (bh[i]->b_page ? bh[i]->b_page->index : 0), bh[i]->b_bdev );
++    }
++
++
++#ifdef CONFIG_HIGHMEM
++    if (kmapped)
++	ext2_kunmap_cluster_pages(NULL, pg, NULL);
++#endif
++
++    inode->i_ctime = CURRENT_TIME;	//mw: these two come always together. So I also put it here.
++    mark_inode_dirty_sync(inode);
++
++    //ext2_update_inode(inode, inode_needs_sync(inode)); //mw: might be able to fix pipe_write vs. readpage. mutex-rec-locking
++
++    /* COMPRBLK is already high, so no need to raise it. */
++    {
++	for (i = c_nblk; (i < EXT2_MAX_CLUSTER_BLOCKS) && (bh[i] != NULL);
++	     i++) {
++	    clear_buffer_dirty(bh[i]);
++	    bh[i]->b_blocknr = 0;
++	    clear_bit(BH_Mapped, &bh[i]->b_state);
++	    clear_bit(BH_Uptodate, &bh[i]->b_state);
++	}
++	for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++	    if (pg[i] == NULL) {
++		break;
++	    }
++	    assert(PageLocked(pg[i]));
++	    ClearPageUptodate(pg[i]);
++	    unlock_page(pg[i]);
++	    page_cache_release(pg[i]);
++	}
++
++	/* invalidate_inode_buffers replacement code: TLL 02/21/07
++	 * e2compr on post 2.6.10 kernels do not have an uptodate
++	 * mapping->assoc_mapping (other Vm(?) changes require it be
++	 * made explicit, 2.4 kernels have it implicit). Therefore, when
++	 * umount is called, a GPF ensues from a NULL ops pointer.
++	 * e2c on a USB thumbdrive mounted as the root fs does not
++	 * support repeated compress/uncompress cycles on a given file.
++	 * Inlined the flush list code to explicityly force update to
++	 * disk with a known valid bh list.
++	 */
++
++	/* mw: I consider this code as ... not so good! */
++	/*	  
++	  if (inode_has_buffers(inode)) {
++		//struct address_space *mapping = &inode->i_data;
++		// struct address_space *buffer_mapping = mapping->assoc_mapping;
++		// requires: inode->i_data->mapping->assoc_mapping; to be set
++		invalidate_inode_buffers(inode);	// TLL do it proper 5-25-07
++		//if (dotrunc)
++		 //ext2_truncate(inode);	// TLL file size hack 6-19-07 
++	  }
++	*/
++
++    }
++#ifdef  EXT2_COMPR_REPORT
++    trace_e2c(" < < < ext2_compress_cluster %i: [done cpr] inode=%ld\n", cluster, inode->i_ino);
++#endif
++    return result;
++
++
++  done:
++
++#ifdef CONFIG_HIGHMEM
++    if (kmapped)
++	ext2_kunmap_cluster_pages(NULL, pg, NULL);
++#endif
++
++    {
++	for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++	    if (pg[i] == NULL) {
++		break;
++	    }
++	    unlock_page(pg[i]);
++	    page_cache_release(pg[i]);
++	}
++	/* TLL cp to compr dir bug fix 03-25-07
++	   Truncate uncompressed files to their uncompressed
++	   length, i.e. force kernel to update inode and sb */
++	//if(dotrunc)
++	//26.08.2011: ext2_truncate(inode) does not exist anymore
++	ext2_truncate_blocks(inode, inode->i_size);
++	
++    }
++#ifdef EXT2_COMPR_REPORT_VERBOSE
++    {
++	int i;
++
++	printk(KERN_DEBUG "ext2_compress_cluster[end]: buffers kept for cluster=%d\n", cluster);
++	for (i = 0; i < nbh; i++) {
++	    if (bh[i]) {
++		printk(KERN_DEBUG "2buffer_head[%d]: blocknr=%lu, addr=0x%p ", i, (unsigned long) bh[i]->b_blocknr, bh[i]);
++		if (bh[i]->b_page)
++		    printk(KERN_DEBUG "2:[page->index=%ld]\n", bh[i]->b_page->index);
++		else
++		    printk(KERN_DEBUG "[No page]\n");
++	    } else
++		printk(KERN_DEBUG "buffer_head[%d] is NULL\n", i);
++	}
++    }
++#endif
++
++#ifdef  EXT2_COMPR_REPORT
++    trace_e2c(" < < < ext2_compress_cluster %i: [done NO cpr] inode=%ld\n", cluster, inode->i_ino);
++#endif
++    return result;
++}
++
++
++/* Go through all the clusters and compress them if not already
++   compressed.
++
++   This is called by ext2_put_inode() and ext2_release_file().  Later,
++   we may have ext2_ioctl() call it (when EXT2_COMPR_FL rises).  None
++   of the callers does any locking, so we do it here.
++
++   Neither of the current callers uses the return code, but we get ready
++   for if we start using it.
++
++   Returns 0 on "success" (whether or not we cleared EXT2_CLEANUP_FL
++   or EXT2_DIRTY_FL bits), -errno on error. */
++int ext2_cleanup_compressed_inode(struct inode *inode)
++{
++    u32 cluster;
++    u32 n_clusters;
++    int dirty = 0;
++    int err = 0;
++    u32 comprblk_mask;
++    atomic_t start_i_count = inode->i_count;
++    int retry = 0;
++    int have_downed;
++    struct ext2_inode_info *ei = EXT2_I(inode);
++#ifdef EXT2_COMPR_REPORT
++    char bdn[BDEVNAME_SIZE];
++#endif
++
++    /* impl: Actually, this assertion could fail if the kernel
++       isn't locked.  I haven't looked, but I suppose that the
++       kernel always is locked when this is called. */
++    assert(ei->i_compr_flags & EXT2_CLEANUP_FL);
++
++#ifdef EXT2_COMPR_REPORT_PUT
++    printk(KERN_DEBUG "ext2_cleanup_compressed_inode() called for pid %d; "
++	   "dev=%s, ino=%lu, i_state=0x%lx, i_count=%u\n",
++	   current->pid, bdevname(inode->i_sb->s_bdev, bdn), inode->i_ino,
++	   inode->i_state, atomic_read(&inode->i_count));
++#endif
++
++    /* Do these tests twice: once before down() and once after. */
++    for (have_downed = 0;; have_downed++) {
++	if ((ei->i_flags & (EXT2_COMPR_FL | EXT2_DIRTY_FL))
++	    != (EXT2_COMPR_FL | EXT2_DIRTY_FL)) {
++	    if (have_downed)
++		goto out;
++	    /* TLL 5-25-07 changed from a warning to trace */
++	    /*trace_e2c("ext2_cleanup_compressed_inode: trying to un/compress an "
++	       "uncompressable file.\n"
++	       "i_flags=%#x. (dev=%s, ino=%lu, down=%d)\n",
++	       ei->i_flags, bdevname(inode->i_sb->s_bdev, bdn), 
++	       inode->i_ino, have_downed); */
++	    return 0;
++	}
++
++	/* test if file is mapped by mmap */
++	if (mapping_mapped(inode->i_mapping))
++	{
++	    //trace_e2c("ext2_cleanup_compressed_inode: (dev. %s): ino=%ld: file mapped, does not compress cluster\n", bdevname(inode->i_sb->s_bdev, bdn), inode->i_ino);
++	    if (have_downed)
++		goto out;
++	    else
++		return 0;
++	}
++
++	if (IS_RDONLY(inode)
++	    || (ei->i_flags & EXT2_ECOMPR_FL)) {
++	    ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
++	    if (have_downed)
++		goto out;
++	    else
++		return 0;
++	}
++
++	//mw            
++	if (ext2_get_dcount(inode) > 1) {
++	    err = 0;
++	    //printk("Compress: file busy (dcount: %i>1)\n", ext2_get_dcount(inode));
++	    if (have_downed)
++		goto out;
++	    else
++		return 0;
++	}
++
++	if (have_downed)
++	    break;
++
++	/* Quotas aren't otherwise kept if file is opened O_RDONLY. */
++	dquot_initialize(inode);
++	
++	/* Check whether OSYNC of inode is acutally running */
++	//if (ei->i_compr_flags & EXT2_OSYNC_INODE)
++	//printk(KERN_DEBUG "OSYNC!\n");
++
++	/* I think:
++	 * checking these flags should prevent that one Process aquires the MUTEX again, 
++	 * e.g. in a recursive call
++	 * BUT: what happens acutally: two processes are working on this inode: pdflush and the userprogramm
++	 * SO: the check might be correct if:  ei->i_compr_flags & EXT2_OSYNC_INOD AND the same process already posesses this lock!!!
++	 */
++	//if (!(ei->i_compr_flags & EXT2_OSYNC_INODE))
++	//{
++	mutex_lock(&inode->i_mutex);
++#ifdef EXT2_COMPR_REPORT_MUTEX
++	printk(KERN_DEBUG "CLEANUP_LOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino);
++#endif
++	//}
++    }
++    n_clusters = ext2_n_clusters(inode);
++
++#ifdef EXT2_COMPR_REPORT_PUT
++    printk(KERN_DEBUG "ext2: inode:%lu: put compressed, clusters = %d, flags = %x, pid = %u\n",
++	   inode->i_ino, n_clusters, ei->i_flags, current->pid);
++#endif
++
++    assert(atomic_read(&inode->i_mutex.count) <= 0);	/* i.e. mutex_lock */
++
++    /* Try to compress the clusters.  We clear EXT2_DIRTY_FL only
++       if we looked at every cluster and if there was no error.  */
++
++    /* impl: We raise EXT2_COMPRBLK_FL now so that ext2_ioctl()
++       doesn't try to change the cluster size beneath us.  If need
++       be, we restore the bit to its original setting before
++       returning.  Note that no-one else can _change_
++       EXT2_COMPRBLK_FL while we work because i_sem is down. */
++    /* impl: Note what's happening here with comprblk_mask.  The
++       current state of COMPRBLK_FL (before we start) is that
++       (comprblk == 1) || (no compressed clusters).  At the end of
++       the procedure, comprblk == one if (at least one compressed
++       cluster, or an error occurred preventing us from finding
++       out). */
++    comprblk_mask = ~EXT2_COMPRBLK_FL | ei->i_flags;
++    ei->i_flags |= EXT2_COMPRBLK_FL;
++
++    for (cluster = 0; cluster < n_clusters; cluster++) {
++	if (atomic_read(&inode->i_count) > atomic_read(&start_i_count)) {
++	    /* This is a poor way of doing this (and doubly
++	       poor now that the only users of i_count are
++	       the dentries), but the idea is not to
++	       compress things tht are likely to be
++	       decompressed soon.  I guess a better way of
++	       doing this would be just to make sure tht
++	       the stuff is in the page cache. */
++	    retry = 1;
++	    break;
++	}
++	err = ext2_cluster_is_compressed_fn(inode, cluster);
++	if (err == 0) {
++	    //mw: ext2_compress_cluster might clean EXT2_COMPRBLK_FL, therefore raise it for every new cluster
++	    ei->i_flags |= EXT2_COMPRBLK_FL;
++
++	    err = ext2_compress_cluster(inode, cluster);
++	    if (err < 0)
++		dirty = 1;
++	    else if (err > 0)
++		comprblk_mask = ~0ul;
++	} else if (err < 0)
++	    break;
++	else {
++	    err = 0;
++	    assert(comprblk_mask == ~0ul);	/* i.e. that EXT2_COMPRBLK_FL was high. */
++	}
++    }
++
++    if ((cluster >= n_clusters) && !dirty)
++	ei->i_flags &= ~EXT2_DIRTY_FL;
++    if (!retry) {
++	ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
++	ei->i_flags &= comprblk_mask;
++    }
++
++    /* We clear EXT2_CLEANUP_FL because, otherwise, we'll get
++       called again almost immediately. */
++
++    /*
++     *  The CLEANUP flag *MUST* be cleared, otherwise the iput routine
++     *  calls ext2_put_inode() again (because i_dirt is set) and there
++     *  is a loop.  The control scheme (CLEANUP + DIRTY flags) could 
++     *  probably be improved.  On the other hand, i_dirt MUST be set
++     *  because we may have sleeped, and we must force the iput routine
++     *  to look again at the i_count ...
++     */
++    /* TODO: Have a look at this cleanup scheme.  The above
++       comment sounds wrong. */
++
++    inode->i_ctime = CURRENT_TIME;
++    mark_inode_dirty_sync(inode);
++  out:
++
++#ifdef EXT2_COMPR_REPORT_MUTEX
++    printk(KERN_DEBUG "CLEANUP_UNLOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino);
++#endif
++
++//      if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) {  /* MW 5-16-07 */
++    mutex_unlock(&inode->i_mutex);
++//      }       /* MW 5-16-07 */                  
++    return err;			/* TODO: Check that ,err` is appropriate. */
++}
++
++
++int ext2_recognize_compressed(struct inode *inode, unsigned cluster)
++{
++    /* ext2_recognize_compressed(): Check tht the cluster is valid
++       in every way, and then do the EXT2_COMPRESSED_BLKADDR
++       thing. */
++    /* nyi, fixme.  All of the userspace stuff (EXT2_NOCOMPR_FL
++       etc.) needs work, so I might as well leave this.  See
++       ioctl.c for a description of what it's supposed to do. */
++    return -ENOSYS;
++}
++
++
++/* Look for compressed clusters.  If none, then clear EXT2_COMPRBLK_FL.
++
++   Called by:
++       ext2_truncate().
++       */
++void ext2_update_comprblk(struct inode *inode)
++{
++    unsigned block, last_block;
++    struct ext2_bkey key;
++    struct ext2_inode_info *ei = EXT2_I(inode);
++
++    assert(ei->i_flags & EXT2_COMPRBLK_FL);
++    if (inode->i_size == 0) {
++	ei->i_flags &= ~EXT2_COMPRBLK_FL;
++	trace_e2c("ext2_update_comprblk 1: inode: %lu removed EXT2_COMPRBLK_FL!\n", inode->i_ino);
++	return;
++    }
++    last_block = ROUNDUP_RSHIFT(inode->i_size,
++				inode->i_sb->s_blocksize_bits) - 1;
++    block = ext2_first_cluster_nblocks(inode) - 1;
++
++    assert(atomic_read(&inode->i_mutex.count) <= 0);
++
++    if (!ext2_get_key(&key, inode, block))
++	return;
++    for (;;) {
++	if (ext2_get_key_blkaddr(&key) == EXT2_COMPRESSED_BLKADDR)
++	    goto out;
++	if (block >= last_block)
++	    goto clear;
++	if (!ext2_next_key(&key, ei->i_clu_nblocks))
++	    goto out;
++	block += ei->i_clu_nblocks;
++    }
++  clear:
++    trace_e2c("ext2_update_comprblk 2: inode: %lu removed EXT2_COMPRBLK_FL!\n", inode->i_ino);
++    ei->i_flags &= ~EXT2_COMPRBLK_FL;
++  out:
++    ext2_free_key(&key);
++    assert(atomic_read(&inode->i_mutex.count) <= 0);
++
++}
++
++
++/*
++ * allocate working areas
++ */
++
++DEFINE_PER_CPU(struct ext2_wa_S *, ext2_rd_wa) = NULL;
++DEFINE_PER_CPU(struct ext2_wa_S *, ext2_wr_wa) = NULL;
++
++/* SMP, setup wa's. caller must hold wa already via get_cpu_var */
++void ext2_alloc_rd_wa(){
++	if ((__get_cpu_var(ext2_rd_wa) == NULL) ) {
++		size_t rsize =  2 * EXT2_MAX_CLUSTER_BYTES; //mw: just guessing
++
++		__get_cpu_var(ext2_rd_wa) = vmalloc (rsize);
++		if (__get_cpu_var(ext2_rd_wa) == NULL)
++			printk ("EXT2-fs: can't allocate working area; compression turned off.\n");
++		else {
++			printk ("ext2-compression: allocated read buffer for CPU%i at %p-%p (%zu bytes)\n",
++				get_cpu(), __get_cpu_var(ext2_rd_wa), (char *)__get_cpu_var(ext2_rd_wa) + rsize, rsize);
++#  ifdef EXT2_COMPR_REPORT_WA
++			printk (KERN_INFO "EXT2-fs: rd_wa=%p--%p (%d)\n",
++				ext2_rd_wa, (char *)ext2_rd_wa + rsize, rsize);
++#  endif
++			put_cpu();
++		}
++	}
++}
++
++void ext2_alloc_wr_wa(){
++
++	if ((__get_cpu_var(ext2_wr_wa) == NULL) ) {
++		size_t wsize = 2 * EXT2_MAX_CLUSTER_BYTES; //mw: just guessing
++		__get_cpu_var(ext2_wr_wa) = vmalloc (wsize);
++
++		if (__get_cpu_var(ext2_wr_wa) == NULL)
++			printk ("EXT2-fs: can't allocate working area; "
++				"compression turned off.\n");
++		else {
++			printk ("ext2-compression: allocated write buffer for CPU%i at %p-%p (%zu bytes)\n",
++				get_cpu(), __get_cpu_var(ext2_wr_wa), (char *)__get_cpu_var(ext2_wr_wa) + wsize, wsize);
++#ifdef EXT2_COMPR_REPORT_WA
++			printk (KERN_INFO "EXT2-fs: wr_wa=%p--%p (%d)\n",
++				ext2_wr_wa, (char *)ext2_wr_wa + wsize, wsize);
++#endif
++			put_cpu();	
++		}
++	}
++}
++
++
+--- linux-3.4-rc5/fs/ext2/e2zlib.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-3.4-rc5-e2c/fs/ext2/e2zlib.c	2012-04-30 04:11:03.795143099 -0400
+@@ -0,0 +1,74 @@
++
++#include <linux/string.h>
++#include <linux/types.h>
++#include <linux/fs.h>
++#include <linux/ext2_fs_c.h>
++#include <linux/module.h>
++#include <linux/crypto.h>
++#include <linux/zlib.h>
++#include <linux/vmalloc.h>
++
++static DEFINE_PER_CPU(struct crypto_comp *, tfm) = NULL;
++
++size_t ext2_iZLIB(int action)
++{
++	/*mw: we init tfm when we need it...*/
++	return 0;
++}
++
++
++size_t ext2_wZLIB(__u8 * ibuf, __u8 * obuf, void *heap,
++		  size_t ilen, size_t olen, int level)
++{
++    int ret, dlen;
++  
++    if (!try_module_get(THIS_MODULE))
++	return 0;
++    
++    /*check if we already have a tfm*/    
++    get_cpu_var(tfm);
++    if (__get_cpu_var(tfm) == NULL){
++	 __get_cpu_var(tfm) = crypto_alloc_comp("deflate", 0, CRYPTO_ALG_ASYNC);
++     }
++    assert(__get_cpu_var(tfm) != NULL);			
++    
++    dlen = olen;
++    ret = crypto_comp_compress(__get_cpu_var(tfm) , ibuf, ilen, obuf, &dlen);
++
++    put_cpu_var(tfm);
++
++    if (ret) {
++	//printk(KERN_DEBUG "ext2_wZLIB: crypto_comp_compress failed: %d, ilen: %d, olen: %d\n", ret, ilen, olen);
++	return 0;
++    }
++    return dlen;
++}
++
++
++size_t ext2_rZLIB(__u8 * ibuf, __u8 * obuf, void *heap,
++		  size_t ilen, size_t olen, int ignored)
++{
++    int ret, dlen;
++  
++    if (!try_module_get(THIS_MODULE))
++	return 0;
++    
++    /*check if we already have a tfm*/    
++    get_cpu_var(tfm);
++    if (__get_cpu_var(tfm) == NULL){
++	 __get_cpu_var(tfm) = crypto_alloc_comp("deflate", 0, CRYPTO_ALG_ASYNC);
++     }
++    assert(__get_cpu_var(tfm) != NULL);				
++
++    dlen = olen;
++    ret = crypto_comp_decompress(__get_cpu_var(tfm), ibuf, ilen, obuf, &dlen);
++
++    put_cpu_var(tfm);
++
++    if (ret) {
++	//printk(KERN_DEBUG "ext2_wZLIB: crypto_comp_decompress failed: %d, ilen: %d, olen: %d\n", ret, ilen, olen);
++	return 0;
++    }
++	
++    return dlen;
++}
+--- linux-3.4-rc5/fs/ext2/adler32.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-3.4-rc5-e2c/fs/ext2/adler32.c	2012-04-30 04:11:03.795143099 -0400
+@@ -0,0 +1,43 @@
++/* adler32.c -- compute the Adler-32 checksum of a data stream
++ * Copyright (C) 1995-1998 Mark Adler
++ * For conditions of distribution and use, see copyright notice in zlib.h 
++ */
++
++/* @(#) $Id: e2compr2.6.25.patch,v 1.1.2.1 2008/04/17 09:49:32 winkler Exp $ */
++
++#define BASE 65521L /* largest prime smaller than 65536 */
++#define NMAX 5552
++/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
++
++#define DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
++#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
++#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
++#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
++#define DO16(buf)   DO8(buf,0); DO8(buf,8);
++
++/* ========================================================================= */
++unsigned long ext2_adler32(unsigned long adler, const unsigned char *buf, unsigned int len)
++{
++    unsigned long s1 = adler & 0xffff;
++    unsigned long s2 = (adler >> 16) & 0xffff;
++    int k;
++
++    if (buf == 0) return 1L;
++
++    while (len > 0) {
++        k = len < NMAX ? len : NMAX;
++        len -= k;
++        while (k >= 16) {
++            DO16(buf);
++	    buf += 16;
++            k -= 16;
++        }
++        if (k != 0) do {
++            s1 += *buf++;
++	    s2 += s1;
++        } while (--k);
++        s1 %= BASE;
++        s2 %= BASE;
++    }
++    return (s2 << 16) | s1;
++}
+--- linux-3.4-rc5/fs/ext2/super.c	2012-04-29 18:19:10.000000000 -0400
++++ linux-3.4-rc5-e2c/fs/ext2/super.c	2012-04-30 04:11:03.797143097 -0400
+@@ -32,7 +32,12 @@
+ #include <linux/log2.h>
+ #include <linux/quotaops.h>
+ #include <asm/uaccess.h>
++#ifdef CONFIG_EXT2_COMPRESS
++#include <linux/vmalloc.h>
++#include <linux/ext2_fs_c.h>
++#else
+ #include "ext2.h"
++#endif
+ #include "xattr.h"
+ #include "acl.h"
+ #include "xip.h"
+@@ -392,7 +397,11 @@ enum {
+ 	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic,
+ 	Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
+ 	Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
+-	Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
++	Opt_acl, Opt_noacl,
++#ifdef CONFIG_EXT2_COMPRESS
++ 	Opt_force_compat,
++#endif
++	Opt_xip, Opt_ignore, Opt_err, Opt_quota,
+ 	Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
+ };
+ 
+@@ -425,6 +434,9 @@ static const match_table_t tokens = {
+ 	{Opt_ignore, "noquota"},
+ 	{Opt_quota, "quota"},
+ 	{Opt_usrquota, "usrquota"},
++#ifdef CONFIG_EXT2_COMPRESS
++ 	{Opt_force_compat, "force-compat"},
++#endif
+ 	{Opt_reservation, "reservation"},
+ 	{Opt_noreservation, "noreservation"},
+ 	{Opt_err, NULL}
+@@ -568,6 +580,11 @@ static int parse_options(char *options,
+ 			clear_opt(sbi->s_mount_opt, RESERVATION);
+ 			ext2_msg(sb, KERN_INFO, "reservations OFF");
+ 			break;
++#ifdef CONFIG_EXT2_COMPRESS
++ 		case Opt_force_compat:
++ 			set_opt(sbi->s_mount_opt, FORCE_COMPAT);
++ 			break;
++#endif
+ 		case Opt_ignore:
+ 			break;
+ 		default:
+@@ -584,6 +601,10 @@ static int ext2_setup_super (struct supe
+ 	int res = 0;
+ 	struct ext2_sb_info *sbi = EXT2_SB(sb);
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++		printk (KERN_INFO E2COMPR_VERSION "\n");
++#endif
++
+ 	if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) {
+ 		ext2_msg(sb, KERN_ERR,
+ 			"error: revision level too high, "
+@@ -875,6 +896,65 @@ static int ext2_fill_super(struct super_
+ 			le32_to_cpu(features));
+ 		goto failed_mount;
+ 	}
++#ifdef CONFIG_EXT2_COMPRESS
++	/* Check that required algorithms are available. */
++	/* todo: Provide a mount option to override this. */
++	/*
++	 * Philosophical bug: we assume that an algorithm's
++	 * module is available if and only if this kernel was
++	 * compiled with that algorithm as a module.  This is
++	 * untrue, but it is unclear what the right thing to
++	 * do is.
++	 */
++	j = 0; /* error flag */
++	if ((es->s_feature_incompat
++	    & cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION))
++	    && (es->s_algorithm_usage_bitmap
++	    & ~cpu_to_le32(EXT2_ALGORITHMS_SUPP))) {
++		/*
++		 * The filesystem employs an algorithm not
++		 * supported by this filesystem.  Issue warning or
++		 * error.
++		 */
++		for (i = 0; i < 32; i++) {
++			if (!(es->s_algorithm_usage_bitmap
++			    & cpu_to_le32(1 << i))
++			    || ((EXT2_ALGORITHMS_SUPP
++			    & (1 << i))))
++				continue;
++		/*
++		 * TODO: Can't this message be moved outside
++		 * of the for loop?
++		 */
++		if (!j) {
++			if (test_opt(sb, FORCE_COMPAT))
++				printk(KERN_WARNING
++				    "EXT2-fs: %s: "
++				    "uses unsupported "
++				    "compression algorithms",
++				    sb->s_id);
++			else
++				printk("EXT2-fs: %s: couldn't mount "
++				    "because of unsupported "
++				    "compression algorithms",
++				    sb->s_id);
++			j = 1;
++		}
++		if (i < EXT2_N_ALGORITHMS)
++			printk(" %s", ext2_algorithm_table[i].name);
++		else
++			printk(" %u", i);
++		}
++	}
++	if (j) {
++		if (test_opt(sb, FORCE_COMPAT))
++			printk(" but ignoring as you request.\n");
++		else {
++			printk(".\n");
++		goto failed_mount;
++	}
++	}
++#endif /* CONFIG_EXT2_COMPRESS */
+ 	if (!(sb->s_flags & MS_RDONLY) &&
+ 	    (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
+ 		ext2_msg(sb, KERN_ERR, "error: couldn't mount RDWR because of "
+--- linux-3.4-rc5/fs/ext2/ialloc.c	2012-04-29 18:19:10.000000000 -0400
++++ linux-3.4-rc5-e2c/fs/ext2/ialloc.c	2012-04-30 04:11:03.797143097 -0400
+@@ -470,6 +470,9 @@ struct inode *ext2_new_inode(struct inod
+ 		brelse(bitmap_bh);
+ 		bitmap_bh = read_inode_bitmap(sb, group);
+ 		if (!bitmap_bh) {
++#ifdef CONFIG_EXT2_COMPRESS
++			EXT2_I(inode)->i_flags &= ~EXT2_COMPR_FL;
++#endif
+ 			err = -EIO;
+ 			goto fail;
+ 		}
+@@ -558,6 +561,17 @@ got:
+ 	memset(ei->i_data, 0, sizeof(ei->i_data));
+ 	ei->i_flags =
+ 		ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED);
++#ifdef CONFIG_EXT2_COMPRESS
++	/*
++	 *      The EXT2_COMPR flag is inherited from the parent
++	 *      directory as well as the cluster size and the compression
++	 *      algorithm.
++	 */
++	ei->i_log2_clu_nblocks = EXT2_I(dir)->i_log2_clu_nblocks;
++	ei->i_clu_nblocks = EXT2_I(dir)->i_clu_nblocks;
++	ei->i_compr_method = EXT2_I(dir)->i_compr_method;
++	ei->i_compr_flags = 0;	
++#endif
+ 	ei->i_faddr = 0;
+ 	ei->i_frag_no = 0;
+ 	ei->i_frag_size = 0;
+--- linux-3.4-rc5/fs/ext2/balloc.c	2012-04-29 18:19:10.000000000 -0400
++++ linux-3.4-rc5-e2c/fs/ext2/balloc.c	2012-04-30 04:11:03.798143097 -0400
+@@ -11,8 +11,13 @@
+  *        David S. Miller (davem@caip.rutgers.edu), 1995
+  */
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++#include <linux/quotaops.h>
++#include <linux/ext2_fs_c.h>
++#else
+ #include "ext2.h"
+ #include <linux/quotaops.h>
++#endif
+ #include <linux/slab.h>
+ #include <linux/sched.h>
+ #include <linux/buffer_head.h>
+@@ -499,6 +504,13 @@ void ext2_free_blocks (struct inode * in
+ 	struct ext2_super_block * es = sbi->s_es;
+ 	unsigned freed = 0, group_freed;
+ 
++
++#ifdef CONFIG_EXT2_COMPRESS
++	assert((block != EXT2_COMPRESSED_BLKADDR)
++		|| !S_ISREG(inode->i_mode)
++		|| !(EXT2_SB(sb)->s_es->s_feature_incompat
++			& cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION)));
++#endif
+ 	if (block < le32_to_cpu(es->s_first_data_block) ||
+ 	    block + count < block ||
+ 	    block + count > le32_to_cpu(es->s_blocks_count)) {
+--- linux-3.4-rc5/fs/ext2/inode.c	2012-04-29 18:19:10.000000000 -0400
++++ linux-3.4-rc5-e2c/fs/ext2/inode.c	2012-04-30 04:11:03.803143097 -0400
+@@ -31,10 +31,45 @@
+ #include <linux/mpage.h>
+ #include <linux/fiemap.h>
+ #include <linux/namei.h>
++#ifdef CONFIG_EXT2_COMPRESS
++#include <linux/kmod.h>
++#include <linux/ext2_fs_c.h>
++#include <linux/spinlock.h>
++#include <linux/pagevec.h>
++#else
+ #include "ext2.h"
++#endif
+ #include "acl.h"
+ #include "xip.h"
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++/* mw: this function counts all references 
++ * to this inode. this is necessary to
++ * refuse un/compression if the file has
++ * more than one refernce, I guess. */ 
++int ext2_get_dcount(struct inode *inode)
++{        
++	struct dentry *dentry;
++    	struct list_head *head, *next, *tmp;
++    	int count;
++    	
++    	head = &inode->i_dentry;
++    	next = inode->i_dentry.next;
++    	count = 0;
++    	while (next != head) {
++    		dentry = list_entry(next, struct dentry, d_alias);
++    		tmp = next;
++    		next = tmp->next;
++		spin_lock(&dentry->d_lock);
++    		count += dentry->d_count;
++		spin_unlock(&dentry->d_lock);
++    		//mw: similar to fs/dcache.c
++    	}
++        
++        return count;
++}
++#endif
++
+ static int __ext2_write_inode(struct inode *inode, int do_sync);
+ 
+ /*
+@@ -49,7 +84,9 @@ static inline int ext2_inode_is_fast_sym
+ 		inode->i_blocks - ea_blocks == 0);
+ }
+ 
++#ifndef CONFIG_EXT2_COMPRESS
+ static void ext2_truncate_blocks(struct inode *inode, loff_t offset);
++#endif
+ 
+ static void ext2_write_failed(struct address_space *mapping, loff_t to)
+ {
+@@ -235,7 +272,11 @@ static Indirect *ext2_get_branch(struct
+ 	*err = 0;
+ 	/* i_data is not going away, no lock needed */
+ 	add_chain (chain, NULL, EXT2_I(inode)->i_data + *offsets);
++#ifdef CONFIG_EXT2_COMPRESS
++	if (HOLE_BLKADDR(p->key))
++#else
+ 	if (!p->key)
++#endif
+ 		goto no_block;
+ 	while (--depth) {
+ 		bh = sb_bread(sb, le32_to_cpu(p->key));
+@@ -246,7 +287,11 @@ static Indirect *ext2_get_branch(struct
+ 			goto changed;
+ 		add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
+ 		read_unlock(&EXT2_I(inode)->i_meta_lock);
++#ifdef CONFIG_EXT2_COMPRESS
++		if (HOLE_BLKADDR(p->key))
++#else
+ 		if (!p->key)
++#endif
+ 			goto no_block;
+ 	}
+ 	return NULL;
+@@ -292,7 +337,11 @@ static ext2_fsblk_t ext2_find_near(struc
+ 
+ 	/* Try to find previous block */
+ 	for (p = ind->p - 1; p >= start; p--)
++#ifdef CONFIG_EXT2_COMPRESS
++		if (!HOLE_BLKADDR(*p))
++#else
+ 		if (*p)
++#endif
+ 			return le32_to_cpu(*p);
+ 
+ 	/* No such thing, so let's try location of indirect block */
+@@ -493,7 +542,13 @@ static int ext2_alloc_branch(struct inod
+ 		 */
+ 		bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+ 		branch[n].bh = bh;
++#ifndef CONFIG_EXT2_COMPRESS
+ 		lock_buffer(bh);
++#else
++		CHECK_NOT_ATOMIC
++		if (!buffer_uptodate(bh))
++ 			wait_on_buffer(bh);
++#endif	
+ 		memset(bh->b_data, 0, blocksize);
+ 		branch[n].p = (__le32 *) bh->b_data + offsets[n];
+ 		branch[n].key = cpu_to_le32(new_blocks[n]);
+@@ -509,7 +564,9 @@ static int ext2_alloc_branch(struct inod
+ 				*(branch[n].p + i) = cpu_to_le32(++current_block);
+ 		}
+ 		set_buffer_uptodate(bh);
++#ifndef CONFIG_EXT2_COMPRESS
+ 		unlock_buffer(bh);
++#endif
+ 		mark_buffer_dirty_inode(bh, inode);
+ 		/* We used to sync bh here if IS_SYNC(inode).
+ 		 * But we now rely upon generic_write_sync()
+@@ -670,6 +727,7 @@ static int ext2_get_blocks(struct inode
+ 	if (err == -EAGAIN || !verify_chain(chain, partial)) {
+ 		while (partial > chain) {
+ 			brelse(partial->bh);
++//			bforget(partial->bh); /*mw: e2c-pre-2.6.30.4 used bforget here*/
+ 			partial--;
+ 		}
+ 		partial = ext2_get_branch(inode, depth, offsets, chain, &err);
+@@ -761,21 +819,608 @@ int ext2_fiemap(struct inode *inode, str
+ 				    ext2_get_block);
+ }
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++/*
++ *    Readpage method that will take care of decompression.
++ */
++/* effic: I (pjm) think tht at present, reading a 32KB cluster 4KB at
++   a time does `decompress 4KB' for the first 4KB, then `decompress
++   8KB' for the second, and so on.  See if we can provide the page
++   cache with all the pages in a cluster.  The problem is, we don't
++   want to erase anything tht hasn't been written to disk, so we can't
++   just call update_vm_cache().  The plan at present is to remember
++   what the contents of ext2_rd_wa.u come from, and don't bother
++   decompressing anything if the working area already contains the
++   right data.  However, this is only a win where adjacent calls to
++   ext2_decompress_blocks() request the same cluster.  We could force
++   that by copying some code from generic_file_read() (but check for
++   deadlocks before doing anything like that), but instead I'm taking
++   the more passive approach of hoping for the best. */
++static int ext2_readpage(struct file *file, struct page *page)
++{
++	struct inode *inode = page->mapping->host;
++	struct page *pg[EXT2_MAX_CLUSTER_PAGES], *epg[EXT2_MAX_CLUSTER_PAGES];
++	u32 cluster0, max_cluster;
++	int i, blockOfCluster, blocksToDo, npg;
++	const int inc = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
++	struct ext2_inode_info *ei = EXT2_I(page->mapping->host);
++#ifdef CONFIG_HIGHMEM	
++	int kmapped = 0; //mw
++#endif	
++	
++	int iClusterCnt;
++
++	/* For directories, fall out through default routine */
++	if (S_ISDIR(inode->i_mode))
++	{	
++		int rc;
++		
++		rc = block_read_full_page(page,ext2_get_block);
++		assert(!rc);
++		return rc;
++	}
++
++	/* The semaphore prevents us trying to compress and decompress
++	   the cluster at the same time, or compress a cluster in the
++	   middle of reading it (thinking it to be uncompressed).
++
++	   You may not like the fact that we hold the semaphore across
++	   readpage (given that it isn't held without e2compr compiled
++	   in), but it does guarantee that we won't compress the
++	   cluster during readpage.  (OTOH, it's unlikely, if not
++	   impossible, for someone to ,compress a cluster and rewrite
++	   the blocks` before the readpage completes.) */
++	/* This procedure used to have `#ifndef EXT2_LOCK_BUFFERS'
++	   around all the semaphore stuff, and unlocked each buffer
++	   before brelsing them ifdef EXT2_LOCK_BUFFERS.  I (pjm,
++	   1998-01-20) have removed that because (a) EXT2_LOCK_BUFFERS
++	   isn't #defined anywhere, and doesn't appear outside of this
++	   function, and (b) I haven't looked at what effect locking
++	   the buffers has.  You may like to reintroduce the idea of
++	   buffer locking to this function if you're more familiar
++	   with buffer locking than I, and believe that the full i_sem
++	   isn't necessary to protect from races (people seeing raw
++	   compressed data) between readpage and ext2_file_write(),
++	   ext2_compress_cluster() and ext2_truncate(). */
++	unlock_page(page);
++	mutex_lock(&inode->i_mutex);
++	
++	assert (atomic_read(&inode->i_mutex.count) <= 0); /* i.e. mutex_lock */
++	
++	//mw: added EXT2_COMPR_FL, because EXT2_COMPRBLK_FL mit change without mutex !!!
++	if ( !(ei->i_flags & (EXT2_COMPRBLK_FL|EXT2_COMPR_FL))  
++		||  (ei->i_flags & EXT2_NOCOMPR_FL) )
++	{
++		goto readpage_uncompressed;
++	}
++
++	{
++		register u32 blockOfFile
++		    = (page->index << PAGE_CACHE_SHIFT) >> inode->i_sb->s_blocksize_bits;
++
++		blocksToDo = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
++		cluster0 = ext2_block_to_cluster(inode, blockOfFile);
++		max_cluster = ext2_block_to_cluster
++		    (inode, blockOfFile + blocksToDo - 1);
++		blockOfCluster
++		    = blockOfFile - ext2_cluster_block0(inode, cluster0);
++	}
++
++	/* return -???, any idea which code. do_generic_file_read() cares, ext2_readpages() doesn't. 
++	   maybe I should look at the "generic" readpage() and see what it returns in this case */
++
++	/* Check if any part of the requested area contains part of a
++	   compressed cluster.  If not, we can use default ext2_readpage().
++
++	   (Note that we don't have to worry about a cluster becoming
++	   compressed in the meantime, because we have the semaphore.)
++
++	   A page can cover up to 9 clusters.  (The maximum can only
++	   occur with 32KB pages, 4KB clusters, and a non-page-aligned
++	   offset.  Thanks go to Kurt Fitzner for reporting that
++	   page offsets needn't be aligned; see generic_file_mmap().)  */
++	{
++	int isCmp[(PAGE_SIZE >> 12) + 1];
++	u8 *dst;
++	unsigned clu_ix;
++
++	assert (max_cluster - cluster0 < sizeof(isCmp)/sizeof(*isCmp));
++	for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) {
++		isCmp[clu_ix] = ext2_cluster_is_compressed_fn (inode, cluster0 + clu_ix);
++		if (isCmp[clu_ix] < 0){
++			printk("IO-ERROR: isCmp\n");
++			goto io_error;
++		}
++	} 
++	
++	for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++)
++		if (isCmp[clu_ix] > 0)
++			goto readpage_compressed;
++	/* fall through */
++	readpage_uncompressed:
++	{
++		int rc=0;
++		lock_page(page);
++
++		/* Did somebody else fill it already? */
++		if (PageUptodate(page) ){  //mw: necessary for DEBUG! anyway checked in do_generic_mapping_read
++			unlock_page(page);
++		}
++		else {
++			//try_to_free_buffers(page);
++			rc = block_read_full_page(page,ext2_get_block);
++		}
++		mutex_unlock(&inode->i_mutex);
++		assert(!rc);			
++		return rc;
++	}
++
++	readpage_compressed:
++
++	/* Copied from block_read_full_page */
++	/* if (!PageLocked(page)) */
++	/* PAGE_BUG(page); */
++	lock_page(page);
++	if (PageUptodate(page)) { 
++		unlock_page(page);
++		mutex_unlock(&inode->i_mutex);
++		return(0);
++	}
++	get_page(page);
++
++	ClearPageUptodate(page);
++	ClearPageError(page);
++
++	dst = (u8 *) page_address(page);
++	for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) {
++		struct buffer_head *bh[EXT2_MAX_CLUSTER_BLOCKS];
++		int nbh, blocksThisClu;
++		
++		for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++			pg[i] = NULL;
++			epg[i] = NULL;
++		}
++			
++		/* clear_bit(PG_locked, &page->flags); */
++		npg = ext2_cluster_npages(inode, cluster0 + clu_ix);
++		nbh = ext2_get_cluster_pages(inode, cluster0 + clu_ix, pg, page, 0);
++		
++		if (nbh <= 0) {
++			for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
++			printk("no pages\n");
++			goto out;
++		}		
++		iClusterCnt =  ext2_cluster_npages(inode, cluster0);
++		
++		nbh =  ext2_get_cluster_extra_pages(inode, cluster0 + clu_ix, pg, epg);
++		if (nbh <= 0) 
++		{
++			for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++)
++				epg[i] = NULL;
++			printk("no extra pages\n");
++			goto out;
++		}
++		assert (iClusterCnt = ext2_cluster_npages(inode, cluster0));
++		
++#ifdef CONFIG_HIGHMEM
++		ext2_kmap_cluster_pages(page, pg, epg);
++		kmapped = 1;
++#endif
++		
++		nbh = ext2_get_cluster_blocks(inode, cluster0 + clu_ix, bh, pg, epg, 0);
++		if (nbh <= 0)
++		{
++			printk("no blocks\n");
++			goto out;
++		}	
++			
++		/* How many blocks (including holes) we need from this cluster. */
++		{
++			blocksThisClu = (ext2_cluster_nblocks(inode, cluster0 +
++			    clu_ix) - blockOfCluster);
++			if (blocksThisClu > blocksToDo)
++				blocksThisClu = blocksToDo;
++		}
++
++		if (isCmp[clu_ix]) {
++			u8 const *src;
++			int n, nbytes_wanted;
++			struct ext2_cluster_head *head;
++			unsigned meth;
++# ifdef CONFIG_KMOD
++			unsigned alg;
++# endif
++
++			bh[0]->b_data = page_address(bh[0]->b_page);
++            		head = (struct ext2_cluster_head *) bh[0]->b_data;
++
++			/* jmr 1998-10-28 Hope this is the last time I'm moving this code.
++			 * Module loading must be done _before_ we lock wa, just think what
++			 * can happen if we reallocate wa when somebody else uses it...
++			 */
++			meth = head->method; /* only a byte, so no swabbing needed. */
++			if (meth >= EXT2_N_METHODS) {
++				printk("illegal method id\n");
++				ext2_msg(inode->i_sb,
++				    "illegal method id",
++				    "inode = %lu, id = %u",
++				    inode->i_ino, meth);
++				goto out;
++			}
++# ifdef CONFIG_KMOD
++			alg = ext2_method_table[meth].alg;
++			if (!ext2_algorithm_table[alg].avail) {
++				char str[32];
++
++				sprintf(str, "ext2-compr-%s", ext2_algorithm_table[alg].name);
++				request_module(str);
++			}
++# endif /* CONFIG_KMOD */
++
++			/* Calculate nbytes_wanted. */
++			{
++				unsigned nblk_wanted, i;
++
++				/* We want to decompress the whole cluster */
++				//nblk_wanted = ext2_cluster_nblocks(inode, cluster0 + clu_ix);
++				nblk_wanted = npg << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); /*mw: FIXED */
++				
++				for (i = nblk_wanted; i != 0;)
++					if (((--i >> 3) < head->holemap_nbytes)
++					    && (head->holemap[i >> 3] & (1 << (i & 7))))
++						--nblk_wanted;
++				nbytes_wanted = (nblk_wanted
++				    << inode->i_sb->s_blocksize_bits);
++			}
++
++			/* Decompress. */
++			get_cpu_var(ext2_rd_wa);
++			if (__get_cpu_var(ext2_rd_wa) == NULL)
++			{
++				ext2_alloc_rd_wa();
++			}
++			assert(__get_cpu_var(ext2_rd_wa) != NULL);
++
++			n = ext2_decompress_blocks(inode, bh, nbh, nbytes_wanted, cluster0 + clu_ix);
++			if (n < 0) {
++			    assert(nbh >= 0);
++			    printk("ext2_readpage: noblocks decompressed\n");
++			    put_cpu_var(ext2_rd_wa);	
++			    goto out;
++			}
++
++# ifdef EXT2_COMPR_REPORT_VERBOSE_INODE
++			if (ei->i_flags & EXT2_COMPR_FL)
++				printk(KERN_DEBUG "ext2: mmap %04x:%lu: blocksToDo=%d, blockOfCluster=%d, blocksThisClu=%d, clu_nblocks=%d\n",
++				    inode->i_rdev,
++				    inode->i_ino,
++				    blocksToDo,
++				    blockOfCluster,
++				    blocksThisClu,
++				    ext2_cluster_nblocks(inode, cluster0 + clu_ix));
++# endif
++
++			/* */
++			{
++			unsigned i;
++			int ipg;
++			
++			i = ext2_cluster_nblocks(inode, cluster0 + clu_ix) - 1;
++			//i = (npg << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)) - 1; /*mw: FIXED!!!  (here: shift = 2Bit) */
++			//if(i+1 != ext2_cluster_nblocks(inode, cluster0 + clu_ix))
++			//printk("npg=%i, nbh=%i, npgf=%i, nbhf =%i, cluster:%i, dec_blk:%i, b_wanted:%i, size:%i\n ", ext2_cluster_npages(inode, cluster0 + clu_ix),  ext2_cluster_nblocks(inode, cluster0 + clu_ix), npgtest, i+1, cluster0 + clu_ix, n, nbytes_wanted, inode->i_size);
++			blockOfCluster = 0;
++			assert(n > 0);
++			src = __get_cpu_var(ext2_rd_wa)->u + nbytes_wanted - inode->i_sb->s_blocksize;
++#ifdef  EXT2_COMPR_REPORT			
++			trace_e2c("ext2_readpage: copy data inc=%d blocksThisClu=%d, n=%d\n", inc, blocksThisClu, n);
++#endif			
++			for (ipg = npg - 1; ipg >= 0; ipg--) {
++				if (pg[ipg] == NULL) {
++				i -= inc;
++				src -= PAGE_SIZE;
++				continue;
++			}
++			if (((inode->i_size-1) >> PAGE_SHIFT) ==  pg[ipg]->index) {
++				n = ((inode->i_size-1) & (PAGE_SIZE -1)) >> inode->i_sb->s_blocksize_bits;
++				i -= ((blocksThisClu-1) - n);
++				src -= ((blocksThisClu-1) - n) << inode->i_sb->s_blocksize_bits;
++			} else {
++				n = blocksThisClu - 1;
++			}
++			if (PageUptodate(pg[ipg])  ) {
++				for (;n >= 0;n--, i--) {
++					if (((i >> 3) >= head->holemap_nbytes)
++					    || !(head->holemap[i >> 3] & (1 << (i & 7)))) {
++						src -= inode->i_sb->s_blocksize;
++					}
++				}
++			} else {
++                
++				dst = (u8 *) page_address(pg[ipg]) + (n << inode->i_sb->s_blocksize_bits);
++				
++				for (;
++				    n >= 0;
++				    n--, i--, dst -= inode->i_sb->s_blocksize) {
++					assert(!buffer_dirty(bh[i]));
++					clear_buffer_dirty(bh[i]);   //mw: had a refile_buffer in 2.4 
++					if (((i >> 3) >= head->holemap_nbytes)
++					    || !(head->holemap[i >> 3] & (1 << (i & 7)))) {
++						assert(i >= 0);
++						memcpy(dst, src, inode->i_sb->s_blocksize);
++						src -= inode->i_sb->s_blocksize;
++					} else {
++						assert(i >= 0);
++						memset (dst, 0, inode->i_sb->s_blocksize);
++					}
++					//clear_bit(BH_Uptodate, &bh[i]->b_state);
++ 				}
++				SetPageUptodate(pg[ipg]);
++			}
++			} 
++			}
++			put_cpu_var(ext2_rd_wa);
++		} else {
++			/* Uncompressed cluster.  Just copy the data.  */
++			int n;
++
++# ifdef EXT2_COMPR_REPORT_VERBOSE_INODE
++			if (ei->i_flags & EXT2_COMPR_FL)
++				printk(KERN_DEBUG
++				    "ext2: mmap %lu: blocksToDo = %d, "
++				    "blockOfCluster = %d, clu_nblocks = %d\n",
++				    inode->i_ino, blocksToDo, blockOfCluster,
++				    ext2_cluster_nblocks(inode, cluster0 +
++				    clu_ix));
++# endif
++
++			for (n = 0;
++			    n < blocksThisClu;
++			    n++, dst += inode->i_sb->s_blocksize) {
++				if ((blockOfCluster + n < nbh)
++				    && (bh[blockOfCluster + n] != NULL))
++				{
++					memcpy(dst,
++					    bh[blockOfCluster + n]->b_data,
++					    inode->i_sb->s_blocksize);
++				}
++				else
++				{
++					memset(dst, 0, inode->i_sb->s_blocksize);
++				}
++			}
++			blockOfCluster = 0;
++		} // end uncompressed Cluster
++
++		blocksToDo -= blocksThisClu;
++
++#ifdef CONFIG_HIGHMEM
++		if (kmapped)
++			ext2_kunmap_cluster_pages(page, pg, epg);
++#endif
++
++		for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++			if (epg[i] != NULL) {
++				
++				ClearPageDirty(epg[i]);			
++				ClearPageUptodate(epg[i]);	
++				try_to_free_buffers(epg[i]);
++				unlock_page(epg[i]);
++				assert(page_count(epg[i]) <= 1);
++				page_cache_release(epg[i]);
++			}
++		}
++
++		for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++			if (pg[i] == NULL)
++				break;		
++			if (pg[i] == page)
++				continue;
++			unlock_page(pg[i]);
++			page_cache_release(pg[i]);
++		}		
++		//mw
++		assert (isCmp[clu_ix] == ext2_cluster_is_compressed_fn (inode, cluster0 + clu_ix));
++	} // end for-loop: Cluster	
++	} 
++
++	SetPageUptodate(page);		
++	unlock_page(page);
++	atomic_dec(&page->_count);
++	mutex_unlock(&inode->i_mutex);
++	return 0;
++	
++	out:
++
++#ifdef CONFIG_HIGHMEM
++	if (kmapped)
++		ext2_kunmap_cluster_pages(page, pg, epg);
++#endif
++
++	for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++		if (epg[i] != NULL) {
++			
++			ClearPageDirty(epg[i]);			
++			ClearPageUptodate(epg[i]);		
++			try_to_free_buffers(epg[i]);
++			unlock_page(epg[i]);
++			assert(page_count(epg[i]) <= 1);
++			page_cache_release(epg[i]);
++		}
++	}
++
++	for (i = 0; i < EXT2_MAX_CLUSTER_PAGES; i++) {
++		if (pg[i] == NULL)
++			break;
++		if (pg[i] == page)
++			continue;
++		unlock_page(pg[i]);
++		page_cache_release(pg[i]);	
++	}
++ 	mutex_unlock(&inode->i_mutex);
++ 	return 0;
++
++	io_error:
++#ifdef CONFIG_HIGHMEM
++	if (kmapped)
++		ext2_kunmap_cluster_pages(page, pg, epg);
++#endif
++	SetPageError(page);
++	unlock_page(page);
++	atomic_dec(&page->_count);
++	mutex_unlock(&inode->i_mutex);
++	printk("Readpage: IOERROR\n");
++	return -EIO; /* it is tested in do_generic_file_read(), ...     */
++}
++#endif /* CONFIG_EXT2_COMPRESS */
++
+ static int ext2_writepage(struct page *page, struct writeback_control *wbc)
+ {
++/* mw (24/06/2008):
++ *         WRITEPAGE: this code was also in e2compr 2.4 and once removed by yaboo ding.
++ *         ext2_writepage() is also called for dirty pages. Usually we write using file_write() which 
++ *         wraps correctly to compressed files. BUT: a writeable memory map might
++ *         produce dirty pages, which will be written back normally. this should/might fail.
++ *         The following code should fix this bug, but this was not tested yet. 
++ */
++#ifdef CONFIG_EXT2_COMPRESS
++#undef USE_WRITEPAGE
++//#define USE_WRITEPAGE	
++#ifdef USE_WRITEPAGE	
++
++    struct ext2_inode_info *ei = EXT2_I(page->mapping->host);
++    int retval;
++
++    struct inode *inode = page->mapping->host;
++    u32 cluster0, max_cluster;                                        
++    int blocksToDo;                                              
++                                                                                 
++    unlock_page(page);  
++    //mw: do we need this ???
++    //if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) {          
++    /* trace_e2c("ext2_writepage: inode"); */                                          
++    mutex_lock(&inode->i_mutex);           
++    /* trace_e2c(" down\n"); */                                   
++    //}                                                                  
++    if (!(ei->i_flags & EXT2_COMPRBLK_FL)                       
++    	|| (ei->i_flags & EXT2_NOCOMPR_FL) )
++    {                               
++    	//mw: do we need this ???
++    	//if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) {           
++    		/* trace_e2c("ext2_writepage: inode up 1\n"); */                                 
++    		mutex_unlock(&inode->i_mutex);                                       
++    	//}
++    	lock_page(page);                                           
++    	return block_write_full_page(page, ext2_get_block, wbc);
++    }                                                                
++    /* */   
++    {                                                                                  
++    	register u32 blockOfFile                              
++        	= (page->index << PAGE_CACHE_SHIFT) >> inode->i_sb->s_blocksize_bits;
++                                                                        
++    	blocksToDo = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;      
++        cluster0 = ext2_block_to_cluster(inode, blockOfFile);          
++        max_cluster = ext2_block_to_cluster(inode, blockOfFile + blocksToDo - 1);      
++    }                                                             
++                                                                         
++    /* Check if any part of the requested area contains part of a                        
++       compressed cluster.  If not, we can use default ext2_writepage(). 
++       
++       (Note that we don't have to worry about a cluster becoming
++       compressed in the meantime, because we have the semaphore.)            
++                                                                              
++       A page can cover up to 9 clusters.  (The maximum can only              
++       occur with 32KB pages, 4KB clusters, and a non-page-aligned   
++       offset.  Thanks go to Kurt Fitzner for reporting that               
++       page offsets needn't be aligned; see generic_file_mmap().)  */                  
++                                                                                            
++    {                                                                   
++    	int isCmp[(PAGE_SIZE >> 12) + 1];                                 
++        unsigned clu_ix;                                          
++                                                                     
++        assert (max_cluster - cluster0 < sizeof(isCmp)/sizeof(*isCmp));
++        for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++) { 
++        	isCmp[clu_ix] = ext2_cluster_is_compressed_fn (inode, cluster0 + clu_ix);     
++            if (isCmp[clu_ix] < 0) {                                            
++            	//mw: do we need this ???if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) {  
++            		/* trace_e2c("ext2_writepage: inode up 2\n"); */
++            		lock_page(page);
++                	mutex_unlock(&inode->i_mutex);                                            
++                //}                                                               
++                return -EIO;                                                    
++            }                                                     
++         }
++        
++         for (clu_ix = 0; cluster0 + clu_ix <= max_cluster; clu_ix++)   
++        	if (isCmp[clu_ix] > 0)                                                     
++                    ext2_decompress_cluster(inode, cluster0 + clu_ix);
++         
++         //mw: do we need this ???
++         //if (!(ei->i_compr_flags & EXT2_OSYNC_INODE)) {                            
++        	 /* trace_e2c("ext2_writepage: inode up 3\n"); */
++         mutex_unlock(&inode->i_mutex);                                                   
++         //}
++         lock_page(page);
++         
++         /* fall through */                                            
++    }                                                                                 
++#endif /* CONFIG_EXT2_COMPRESS */
++#endif       
+ 	return block_write_full_page(page, ext2_get_block, wbc);
+ }
+ 
++#ifndef CONFIG_EXT2_COMPRESS
+ static int ext2_readpage(struct file *file, struct page *page)
+ {
+ 	return mpage_readpage(page, ext2_get_block);
+ }
++#endif
+ 
+ static int
+ ext2_readpages(struct file *file, struct address_space *mapping,
+ 		struct list_head *pages, unsigned nr_pages)
+ {
++#ifdef CONFIG_EXT2_COMPRESS
++/*
++ * For now, just read each page into cache and don't worry about emitting BIOs.
++ * (whitpa 02 Aug 2004).
++ */
++
++	unsigned page_idx;
++	struct pagevec lru_pvec;
++	int iError;
++	
++	pagevec_init(&lru_pvec, 0);
++
++	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
++		struct page *page = list_entry(pages->prev, struct page, lru);
++        
++		prefetchw(&page->flags);
++		list_del(&page->lru);
++		
++		iError = add_to_page_cache(page, mapping, page->index,	GFP_KERNEL);
++		if (!iError) {
++			if (!PageUptodate(page))  
++			{
++				(void) ext2_readpage(file, page);
++			}
++			else
++			{
++				unlock_page(page);
++			}
++			if (!pagevec_add(&lru_pvec, page))
++				__pagevec_lru_add_file(&lru_pvec);
++		} else {
++			page_cache_release(page);
++		}
++		
++	}
++	pagevec_lru_add_file(&lru_pvec);
++	BUG_ON(!list_empty(pages));
++	return 0;
++#else
+ 	return mpage_readpages(mapping, pages, nr_pages, ext2_get_block);
++#endif
+ }
+ 
+ static int
+@@ -824,11 +1469,58 @@ static int ext2_nobh_writepage(struct pa
+ 	return nobh_writepage(page, ext2_get_block, wbc);
+ }
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++static sector_t ext2_do_bmap(struct address_space *mapping, sector_t block)
++#else
+ static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
++#endif
+ {
+ 	return generic_block_bmap(mapping,block,ext2_get_block);
+ }
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++/* Return 0 instead of EXT2_COMPRESSED_BLKADDR if EXT2_NOCOMPR_FL
++ * high.  This is necessary for us to be able to use
++ * generic_readpage() when EXT2_NOCOMPR_FL is high.
++ */
++static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
++{
++	sector_t result;
++	struct inode *inode = mapping->host;
++
++	if ((EXT2_I(inode)->i_flags & (EXT2_COMPRBLK_FL | EXT2_NOCOMPR_FL))
++	    == (EXT2_COMPRBLK_FL | 0)) {
++		int err;
++
++		err = ext2_cluster_is_compressed_fn
++		    (inode, ext2_block_to_cluster(inode, block));
++		if (err > 0)
++			ext2_msg (inode->i_sb, "ext2_bmap",
++			    "compressed cluster, inode %lu",
++			    inode->i_ino);
++		if (err != 0)
++			return 0;
++	}
++
++	result = ext2_do_bmap(mapping, block);
++	if (result != EXT2_COMPRESSED_BLKADDR)
++		return result;
++
++	if (!(EXT2_SB(inode->i_sb)->s_es->s_feature_incompat
++	    & cpu_to_le32(EXT2_FEATURE_INCOMPAT_COMPRESSION)))
++		ext2_error(inode->i_sb, "ext2_bmap",
++		    "compressed_blkaddr (ino %lu, blk %lu) "
++		    "on non-compressed fs",
++		    inode->i_ino, (unsigned long) block);
++	if (!S_ISREG(inode->i_mode))
++		ext2_error(inode->i_sb, "ext2_bmap",
++		    "compressed_blkaddr for non-regular file "
++		    "(ino %lu, blk %lu)",
++		    inode->i_ino, (unsigned long) block);
++	return 0;
++}
++#endif /* CONFIG_EXT2_COMPRESS */
++
+ static ssize_t
+ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+ 			loff_t offset, unsigned long nr_segs)
+@@ -848,6 +1540,18 @@ ext2_direct_IO(int rw, struct kiocb *ioc
+ static int
+ ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
+ {
++#ifdef CONFIG_EXT2_COMPRESS
++#ifdef USE_WRITEPAGE
++	struct ext2_inode_info *ei = EXT2_I(mapping->host);
++	if ( (ei->i_flags & EXT2_COMPRBLK_FL) 
++			 && !(ei->i_flags & EXT2_NOCOMPR_FL))
++	{
++		//NULL will invoke ext2_writepage for writeback, hopefully.
++		return mpage_writepages(mapping, wbc, NULL);
++	}
++	else
++#endif	
++#endif
+ 	return mpage_writepages(mapping, wbc, ext2_get_block);
+ }
+ 
+@@ -996,6 +1700,12 @@ static inline void ext2_free_data(struct
+ 
+ 	for ( ; p < q ; p++) {
+ 		nr = le32_to_cpu(*p);
++#ifdef CONFIG_EXT2_COMPRESS
++		if (nr == EXT2_COMPRESSED_BLKADDR) {
++			*p = 0;
++			continue;
++		}
++#endif
+ 		if (nr) {
+ 			*p = 0;
+ 			/* accumulate blocks to free if they're contiguous */
+@@ -1040,6 +1750,12 @@ static void ext2_free_branches(struct in
+ 			nr = le32_to_cpu(*p);
+ 			if (!nr)
+ 				continue;
++#ifdef CONFIG_EXT2_COMPRESS
++			if (nr == EXT2_COMPRESSED_BLKADDR) {
++			  *p = 0;
++			  continue;
++			}
++#endif
+ 			*p = 0;
+ 			bh = sb_bread(inode->i_sb, nr);
+ 			/*
+@@ -1064,6 +1780,96 @@ static void ext2_free_branches(struct in
+ 		ext2_free_data(inode, p, q);
+ }
+ 
++/* pjm 1998-01-14: As far as I can tell, "I don't do any locking" is
++  no longer correct, as i_sem is downed for all write() and
++  truncate() stuff except where it doesn't matter (e.g. new inode). */
++
++#ifdef CONFIG_EXT2_COMPRESS
++/* If the EXT2_ECOMPR_FL bit is high, then things can go rather badly.
++  This can only happen if access permission was obtained before the
++  flag was raised.  Also, it shouldn't be too much of a problem
++  unless the end point of truncation is a compressed cluster with a
++  compression error. */
++
++ /* From what I (Antoine) understand, the complexity of the truncate
++    code is due to the fact that we don't want to free blocks that
++    are still referenced.  It does not ensure that concurrent read
++    operation will terminate properly, i.e., the semantic of reading
++    while somebody truncates is undefined (you can either get the old
++    data if you got the blocks before, or get plenty of zeros
++    otherwise). */
++
++/* todo: Provide error trapping in readiness for when i_op->truncate
++  allows a return code. */
++static void fix_compression (struct inode * inode)
++{
++	struct ext2_inode_info *ei = EXT2_I(inode);		
++	/*if (atomic_read(&inode->i_mutex.count) > 0)
++	{
++		printk("Assert Mutex failed for file: %s \n", inode_name(inode, 0));
++		dump_stack();
++	}*/
++		
++	assert (ei->i_flags & EXT2_COMPRBLK_FL);  /* one or more compressed clusters */
++	assert ((atomic_read(&inode->i_mutex.count) < 1)
++		|| ((inode->i_nlink == 0)
++		    && (atomic_read(&inode->i_count) == 0)));
++	/* pjm 1998-01-14: I think the below comment can safely be removed, as
++	   it's impossible for someone to be compressing during truncate(), because
++	   i_sem is down. */
++	/*   Dans le cas ou les clusters peuvent etre compresses, cela pose
++	     un probleme : il faudrait stopper aussi si le cluster est
++	     comprime et ne contient pas plus de donnees que i_size ne
++	     permet. Sinon, on peut passer son temps a decompresser un
++	     cluster que quelqu'un d'autre compresse en meme
++	     temps... (TODO).  Cela ne peut arriver que si on reverifie apres
++	     coup si le cluster est non compresse (ce qu'on fait a l'heure
++	     actuelle) => faire autrement.
++
++	     pjm fixme tr
++
++	     If the clusters can be compressed, we'd have a problem: we'd
++	     also need to stop if the cluster is compressed and doesn't
++	     contain more data than i_size permits.  Otherwise we can spend
++	     time decompressing a cluster that someone else is compressing
++	     at the same time.  (TODO.)  This can only happen if we reverify
++	     "apres coup" ("after the event"? "after each time"?) "si" ("if"
++	     or "that") the cluster is not compressed (as we are currently
++	     doing) => do differently. */
++
++	/* todo: Handle errors from ext2_cluster_is_compressed().
++	   (Except ext2_truncate() currently silently ignores errors
++	   anyway.) */
++
++	if (!ext2_offset_is_clu_boundary(inode, inode->i_size)
++	    && (! ( ei->i_flags & EXT2_NOCOMPR_FL))
++	    && (ext2_cluster_is_compressed_fn
++		  (inode, ext2_offset_to_cluster (inode, inode->i_size))
++		> 0)) {
++		trace_e2c("fix_compression: inode:%lu decompress_cluster!\n", inode->i_ino);
++		ext2_decompress_cluster(inode, ext2_offset_to_cluster(inode, inode->i_size));
++		/* todo: Check the return code of
++		   ext2_decompress_cluster().  (Then again, I don't
++		   know how to report an error anyway.
++		   ext2_truncate() silently ignores errors.) */
++	  
++		/* Organise for the cluster to be recompressed later. */
++		assert (ei->i_flags & EXT2_COMPR_FL);
++		
++		ei->i_flags |= EXT2_DIRTY_FL;
++		ei->i_compr_flags |= EXT2_CLEANUP_FL;
++		mark_inode_dirty(inode);
++	} else
++		/* If there are no more compressed clusters, then
++		   remove the EXT2_COMPRBLK_FL.  Not essential from a
++		   safety point of view, but friendlier.  We only do
++		   this in the `else' because the cleanup function
++		   will handle it in the `if' case. */
++		ext2_update_comprblk(inode);
++}
++#endif
++
++
+ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
+ {
+ 	__le32 *i_data = EXT2_I(inode)->i_data;
+@@ -1076,6 +1882,27 @@ static void __ext2_truncate_blocks(struc
+ 	int n;
+ 	long iblock;
+ 	unsigned blocksize;
++
++#ifdef CONFIG_EXT2_COMPRESS
++	/* If the new size is in the middle of a compressed cluster,
++	   then we decompress it, and set things up to be recompressed
++	   later.
++
++	   todo: It isn't very nice to get ENOSPC on truncate.  We
++	   can't completely remove the possibility (unless the
++	   compression algorithms obey the rule `shorter input never
++	   gives longer output') but we could greatly reduce the
++	   possibility, e.g. by moving the fix_compression() function
++	   to compress.c, and have it decompress and immediately
++	   recompress the cluster, without allocating blocks for the
++	   full decompressed data. */
++	if (EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL) {
++	  	trace_e2c("ext2_truncate: ino=%ld sz=%d\n", inode->i_ino, (int)inode->i_size);
++	 	fix_compression(inode);
++		truncate_inode_pages(inode->i_mapping, inode->i_size);	    
++	}
++#endif
++
+ 	blocksize = inode->i_sb->s_blocksize;
+ 	iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
+ 
+@@ -1146,8 +1973,11 @@ do_indirects:
+ 
+ 	mutex_unlock(&ei->truncate_mutex);
+ }
+-
++#ifdef CONFIG_EXT2_COMPRESS
++void ext2_truncate_blocks(struct inode *inode, loff_t offset)
++#else
+ static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
++#endif
+ {
+ 	/*
+ 	 * XXX: it seems like a bug here that we don't allow
+@@ -1335,7 +2165,73 @@ struct inode *ext2_iget (struct super_bl
+ 		goto bad_inode;
+ 	}
+ 	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
++#ifdef CONFIG_EXT2_COMPRESS
++	ei->i_flags = 0x807fffff & le32_to_cpu(raw_inode->i_flags);
++	ei->i_compr_flags = 0;
++	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) {
++		
++		if (S_ISDIR(inode->i_mode)) 
++		{
++			//mw:
++			//mutex_lock(&inode->i_mutex);
++			if (S_ISDIR(inode->i_mode))
++			{
++				ei->i_flags &= ~(EXT2_COMPRBLK_FL | EXT2_DIRTY_FL);  //modify!!!
++			}
++			//mutex_unlock(&inode->i_mutex);
++		}
++		
++		/* The above shouldn't be necessary unless someone's
++		 * been playing with EXT2_IOC_SETFLAGS on a non-e2compr
++		 * kernel, or the inode has been scribbled on.
++		 */
++		if (ei->i_flags & (EXT2_COMPR_FL | EXT2_COMPRBLK_FL)) {
++			ei->i_compr_method
++			    = (le32_to_cpu(raw_inode->i_flags) >> 26) & 0x1f;
++			ei->i_log2_clu_nblocks
++			    = (le32_to_cpu(raw_inode->i_flags) >> 23) & 0x7;
++			if ((ei->i_log2_clu_nblocks < 2)
++			    || (ei->i_log2_clu_nblocks > 5)) {
++				if ((ei->i_log2_clu_nblocks == 0)
++				    && !(ei->i_flags & EXT2_COMPRBLK_FL)) {
++					/* The EXT2_COMPR_FL flag was
++					 * raised under a kernel
++					 * without e2compr support.
++					 */
++					if (S_ISREG(inode->i_mode))
++						ei->i_flags |= EXT2_DIRTY_FL;
++					/* Todo: once we're sure the kernel can
++					 * handle [log2_]clu_nblocks==0, get rid
++					 * of the next statement.
++					 */
++					ei->i_log2_clu_nblocks
++					    = EXT2_DEFAULT_LOG2_CLU_NBLOCKS;
++				} else {
++					ei->i_flags |= EXT2_ECOMPR_FL;
++					ext2_error(inode->i_sb,
++					    "ext2_read_inode",
++					    "inode %lu is corrupted: "
++					    "log2_clu_nblocks=%u",
++					    inode->i_ino,
++					    ei->i_log2_clu_nblocks);
++				}
++			}
++		} else {
++			ei->i_compr_method = EXT2_DEFAULT_COMPR_METHOD;
++			ei->i_log2_clu_nblocks
++			    = EXT2_DEFAULT_LOG2_CLU_NBLOCKS;
++		}
++	if (ei->i_log2_clu_nblocks >
++	    (EXT2_LOG2_MAX_CLUSTER_BYTES - inode->i_sb->s_blocksize_bits))
++		ei->i_log2_clu_nblocks = (EXT2_LOG2_MAX_CLUSTER_BYTES
++		    - inode->i_sb->s_blocksize_bits);
++	ei->i_clu_nblocks = 1 << ei->i_log2_clu_nblocks;
++	if (ei->i_flags & EXT2_DIRTY_FL)
++		ei->i_compr_flags = EXT2_CLEANUP_FL;
++	}
++#else /* !CONFIG_EXT2_COMPRESS */
+ 	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
++#endif
+ 	ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
+ 	ei->i_frag_no = raw_inode->i_frag;
+ 	ei->i_frag_size = raw_inode->i_fsize;
+@@ -1458,7 +2354,35 @@ static int __ext2_write_inode(struct ino
+ 
+ 	raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
+ 	raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
++#ifdef CONFIG_EXT2_COMPRESS
++	if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
++	    && (ei->i_flags & (EXT2_COMPR_FL | EXT2_COMPRBLK_FL))) {
++		if ((ei->i_log2_clu_nblocks < 2)
++		    || (ei->i_log2_clu_nblocks > 5)) {
++			ei->i_flags |= EXT2_ECOMPR_FL;
++			ext2_error (inode->i_sb, "ext2_write_inode",
++			    "inode %lu is corrupted: log2_clu_nblocks=%u",
++			    inode->i_ino, ei->i_log2_clu_nblocks);
++		}
++		assert (ei->i_clu_nblocks == (1 << ei->i_log2_clu_nblocks));
++		assert (ei->i_compr_method < 0x20);
++		raw_inode->i_flags = cpu_to_le32
++		    ((ei->i_flags & 0x807fffff)
++		    | (ei->i_compr_method << 26)
++		    | (ei->i_log2_clu_nblocks << 23));
++	} else
++	{
++		//mw: i_mutex was introduced and disabled again: deadlock with lilo
++		//	mutex_lock(&inode->i_mutex); //mw
++		raw_inode->i_flags = cpu_to_le32	//modify !!!
++		   (ei->i_flags
++		   & 0x807fffff /* no compr meth/size */
++		   & ~(EXT2_COMPR_FL | EXT2_COMPRBLK_FL | EXT2_IMMUTABLE_FL | EXT2_ECOMPR_FL | EXT2_NOCOMPR_FL));
++		//	mutex_unlock(&inode->i_mutex); //mw
++	}
++#else
+ 	raw_inode->i_flags = cpu_to_le32(ei->i_flags);
++#endif
+ 	raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
+ 	raw_inode->i_frag = ei->i_frag_no;
+ 	raw_inode->i_fsize = ei->i_frag_size;
+--- linux-3.4-rc5/fs/ext2/file.c	2012-04-29 18:19:10.000000000 -0400
++++ linux-3.4-rc5-e2c/fs/ext2/file.c	2012-04-30 04:11:03.803143097 -0400
+@@ -18,10 +18,25 @@
+  * 	(jj@sunsite.ms.mff.cuni.cz)
+  */
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++#include <linux/fs.h>
++#include <linux/ext2_fs_c.h>
++#include <linux/buffer_head.h>
++#include <asm/uaccess.h>
++#include <linux/kmod.h>
++#include <linux/slab.h>
++#include <linux/swap.h>
++#include <linux/pagemap.h>
++#include <linux/quotaops.h>
++#include <linux/writeback.h>
++#else
+ #include <linux/time.h>
+ #include <linux/pagemap.h>
+ #include <linux/quotaops.h>
+ #include "ext2.h"
++#endif
++
++
+ #include "xattr.h"
+ #include "acl.h"
+ 
+@@ -30,8 +45,39 @@
+  * for a single struct file are closed. Note that different open() calls
+  * for the same file yield different struct file structures.
+  */
++
++/*
++ * pjm 1998-01-09: I would note that this is different from `when no
++ * process has the inode open'.
++ */
+ static int ext2_release_file (struct inode * inode, struct file * filp)
+ {
++#ifdef CONFIG_EXT2_COMPRESS
++	/*
++	 * Now's as good a time as any to clean up wrt compression.
++	 * Previously (before 2.1.4x) we waited until
++	 * ext2_put_inode(), but now the dcache sometimes delays that
++	 * call until umount time.
++	 */
++	//printk(KERN_DEBUG "ext2_release_file: pid=%d, i_ino=%lu, i_count=%d\n", current->pid, inode->i_ino,  atomic_read(&inode->i_count));
++	
++	if (S_ISREG (inode->i_mode)
++	    && inode->i_nlink
++	    && (EXT2_I(inode)->i_compr_flags & EXT2_CLEANUP_FL)) {
++#ifdef EXT2_COMPR_REPORT_PUT
++		printk(KERN_DEBUG "ext2_release_file: pid=%d, i_ino=%lu, i_count=%d\n", current->pid, inode->i_ino,  atomic_read(&inode->i_count));
++#endif
++		/*
++		 * todo: See how the return code of
++		 * ext2_release_file() is used, and decide whether it
++		 * might be appropriate to pass any errors to
++		 * caller.
++		 */
++		//dump_stack();
++		(void) ext2_cleanup_compressed_inode (inode);
++	}
++	
++#endif
+ 	if (filp->f_mode & FMODE_WRITE) {
+ 		mutex_lock(&EXT2_I(inode)->truncate_mutex);
+ 		ext2_discard_reservation(inode);
+@@ -56,6 +102,456 @@ int ext2_fsync(struct file *file, loff_t
+ 	return ret;
+ }
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++struct page_cluster {
++	struct page *	page;
++	loff_t		pos;
++	unsigned	bytes;
++	unsigned long	offset;
++	unsigned char	in_range;
++	const char *	buf;
++};
++
++#define PAGE_IN_RANGE 1
++#define PAGE_KMAPPED  2
++
++
++/**
++ * generic_osync_inode - flush all dirty data for a given inode to disk
++ * @inode: inode to write
++ * @mapping: the address_space that should be flushed
++ * @what:  what to write and wait upon
++ *
++ * This can be called by file_write functions for files which have the
++ * O_SYNC flag set, to flush dirty writes to disk.
++ *
++ * @what is a bitmask, specifying which part of the inode's data should be
++ * written and waited upon.
++ *
++ *    OSYNC_DATA:     i_mapping's dirty data
++ *    OSYNC_METADATA: the buffers at i_mapping->private_list
++ *    OSYNC_INODE:    the inode itself
++ */
++
++/* mw: see generic_osync_inode() in kernel<2.6.30 for orginal method.
++       basically we want all of it:  OSYNC_DATA and OSYNC_METADATA  and OSYNC_INODE */
++int ex_generic_osync_inode(struct inode *inode, struct address_space *mapping) //, int what)
++{
++        int err = 0;
++        int need_write_inode_now = 0;
++        int err2;
++
++        err = filemap_fdatawrite(mapping);
++
++        err2 = sync_mapping_buffers(mapping);
++        if (!err)
++ 		err = err2;
++ 
++        err2 = filemap_fdatawait(mapping);
++        if (!err)
++ 		err = err2;
++ 
++        /* check if data is dirty */
++        spin_lock(&inode->i_lock);
++        if (inode->i_state & I_DIRTY)
++        	need_write_inode_now = 1;
++        spin_unlock(&inode->i_lock);
++
++        if (need_write_inode_now) {
++                err2 = write_inode_now(inode, 1);
++                if (!err)
++                        err = err2;
++        }
++        else
++                inode_sync_wait(inode);
++
++        return err;
++}
++
++
++/*
++ * Write to a file through the page cache.
++ *
++ * We currently put everything into the page cache prior to writing it.
++ * This is not a problem when writing full pages. With partial pages,
++ * however, we first have to read the data into the cache, then
++ * dirty the page, and finally schedule it for writing. Alternatively, we
++ * could write-through just the portion of data that would go into that
++ * page, but that would kill performance for applications that write data
++ * line by line, and it's prone to race conditions.
++ *
++ * Note that this routine doesn't try to keep track of dirty pages. Each
++ * file system has to do this all by itself, unfortunately.
++ *                                                    okir@monad.swb.de
++ */
++ssize_t
++ext2_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
++{
++	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
++	struct inode	*inode = mapping->host;
++	unsigned long	limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur, written, last_index;	   /* last page index */
++	loff_t	pos;
++	long	status;
++	int		err;
++	unsigned	bytes;
++	u32		comprblk_mask=0;
++	struct ext2_inode_info *ei = EXT2_I(inode);
++
++	if (!(ei->i_flags & (EXT2_COMPR_FL|EXT2_COMPRBLK_FL)) 
++#undef DUD //mw: I think this is a buggy bug-fix
++#ifdef DUD
++			|| (count < inode->i_sb->s_blocksize) 
++#endif
++	)		
++	{
++		return do_sync_write(file, buf, count, ppos);
++	}
++
++	if ((ssize_t) count < 0)
++		return -EINVAL;
++
++	if (!access_ok(VERIFY_READ, buf, count))
++		return -EFAULT;
++
++#ifdef EXT2_COMPR_REPORT_MUTEX
++    printk(KERN_DEBUG "EXT2_FILE_WRITE_LOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino );
++#endif
++	mutex_lock(&inode->i_mutex);
++	/* mw:	down_read(&inode->i_alloc_sem); // as used by ocsf2 TLL 02/21/07 
++		was removed with kernel 3.1 */
++	atomic_inc(&inode->i_dio_count);
++
++	pos = *ppos;
++	err = -EINVAL;
++	if (pos < 0)
++		goto out;
++
++	written = 0;
++
++	/* FIXME: this is for backwards compatibility with 2.4 */
++	if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND)
++	{
++		pos = inode->i_size;
++	}
++
++	/*
++	 * Check whether we've reached the file size limit.
++	 */
++	err = -EFBIG;
++
++	if (limit != RLIM_INFINITY) {
++		if (pos >= limit) {
++			send_sig(SIGXFSZ, current, 0);
++			goto out;
++		}
++		if (pos > 0xFFFFFFFFULL || count > limit - (u32)pos) {
++			/* send_sig(SIGXFSZ, current, 0); */
++			count = limit - (u32)pos;
++		}
++	}
++
++	/*
++	 *      LFS rule
++	 */
++	if ( pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) {
++		if (pos >= MAX_NON_LFS) {
++			send_sig(SIGXFSZ, current, 0);
++			goto out;
++		}
++		if (count > MAX_NON_LFS - (u32)pos) {
++			/* send_sig(SIGXFSZ, current, 0); */
++			count = MAX_NON_LFS - (u32)pos;
++		}
++	}
++
++	/*
++	 *	Are we about to exceed the fs block limit ?
++	 *
++	 *	If we have written data it becomes a short write
++	 *	If we have exceeded without writing data we send
++	 *	a signal and give them an EFBIG.
++	 *
++	 *	Linus frestrict idea will clean these up nicely..
++	 */
++	if (!S_ISBLK(inode->i_mode)) {
++		if (pos >= inode->i_sb->s_maxbytes) {
++			if (count || pos > inode->i_sb->s_maxbytes) {
++				send_sig(SIGXFSZ, current, 0);
++				err = -EFBIG;
++				goto out;
++			}
++			/* zero-length writes at ->s_maxbytes are OK */
++		}
++
++		if (pos + count > inode->i_sb->s_maxbytes)
++			count = inode->i_sb->s_maxbytes - pos;
++	} else { 
++		if (bdev_read_only(inode->i_sb->s_bdev)) {
++			err = -EPERM;
++			goto out;
++		}
++		if (pos >= inode->i_size) {
++			if (count || pos > inode->i_size) {
++				err = -ENOSPC;
++				goto out;
++			}
++		}
++
++		if (pos + count > inode->i_size)
++		{
++			count = inode->i_size - pos;			
++		}
++	}
++
++	err = 0;
++	if (count == 0)
++		goto out;
++
++	status  = 0;
++
++	if (file->f_flags & O_DIRECT)
++	{
++		err = -EINVAL;
++		goto out;
++	}
++	/*
++	 *	We must still check for EXT2_ECOMPR_FL, as it may have been
++	 *	set after we got the write permission to this file.
++	 */
++	if ((ei->i_flags & (EXT2_ECOMPR_FL | EXT2_NOCOMPR_FL))   == (EXT2_ECOMPR_FL | 0)) 
++	{
++		err = -EXT2_ECOMPR;
++		goto out;
++	}
++
++	should_remove_suid(file->f_dentry);
++	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
++	mark_inode_dirty_sync(inode);
++
++	if ((pos+count) > inode->i_size)
++		last_index = (pos+count-1) >> PAGE_CACHE_SHIFT;
++	else
++		last_index = (inode->i_size-1) >> PAGE_CACHE_SHIFT;
++
++	comprblk_mask = ei->i_flags | ~EXT2_COMPRBLK_FL;
++
++	//mw: now do it cluster-wise
++	do {
++		//unsigned long index, offset, clusters_page_index0, 
++		unsigned long index, nextClusterFirstByte, cluster_compressed=0;
++		u32  cluster=0;
++		status = -ENOMEM;	/* we'll assign it later anyway */
++
++#ifdef EXT2_COMPRESS_WHEN_CLU	
++		ei->i_flags |= EXT2_COMPRBLK_FL;
++		assert( (file->f_flags & O_DIRECT) == 0);
++		assert(mapping_mapped(inode->i_mapping) == 0);
++#endif			
++
++		index = pos >> PAGE_CACHE_SHIFT;	/*mw: pageindex (start)*/
++		cluster = ext2_page_to_cluster(inode, index);		
++
++		/*
++		 * We decompress the cluster if needed, and write
++		 * the data as normal.  The cluster will be
++		 * compressed again when the inode is cleaned up.
++		 */
++		if ((comprblk_mask == ~(u32)0)
++		    && !(ei->i_flags & EXT2_NOCOMPR_FL)) {
++		      /* AUFFÄLLIG 2*/
++			/* assert (block == pos >> inode->i_sb->s_blocksize_bits); */
++
++			cluster_compressed = ext2_cluster_is_compressed_fn(inode, cluster);
++			if (cluster_compressed < 0) {
++				if (! written)
++					written = cluster_compressed;
++				break;
++			}
++		}
++
++		if (cluster_compressed > 0) {
++			/* Here, decompression take place  */
++			cluster_compressed = ext2_decompress_cluster(inode, cluster);
++			if (cluster_compressed < 0) {
++				if (! written) {
++					written = cluster_compressed;
++				}
++				break;
++			}
++		}
++			
++		nextClusterFirstByte = (ext2_cluster_page0(inode, cluster+1) * PAGE_CACHE_SIZE);	
++		bytes = nextClusterFirstByte - pos;   /*mw: bytes todo in this cluster*/
++		if (bytes > count) {
++			bytes = count; /*mw: if end of data*/
++		}
++	
++#ifdef EXT2_COMPR_DEBUG
++		//assert we stay inside the cluster!
++		{
++			int endpos;
++			int endindex;
++			int endcluster;
++			unsigned long thisClusterFirstByte;
++			int relstart, relend, startblock, endblock;
++			
++			thisClusterFirstByte = (ext2_cluster_page0(inode, cluster) * PAGE_CACHE_SIZE);
++			
++			relstart =   pos - thisClusterFirstByte;
++			relend   =  bytes + relstart;
++			
++			startblock = relstart >> 10; 
++			endblock = relend >> 10;
++			
++					
++			endpos = pos + bytes;
++			//printk("do_sync_write cluster %d: inode:%lu, \t start:%i(%i), end:%i(%i), \t ccount:%d \t tcount:%d\n", cluster , inode->i_ino, relstart, startblock, relend , endblock,  (int)bytes, count);
++	 		endindex = (endpos-1) >> PAGE_CACHE_SHIFT;	/*mw: pageindex (start)*/
++			endcluster = ext2_page_to_cluster(inode, endindex);
++			assert(cluster == endcluster); 
++		}
++#endif
++	
++		//mw: must unlock here, do_sync_write() will aquire the mutex again
++		mutex_unlock(&inode->i_mutex);	
++		
++		//mw: this is pretty clever: we use the generic method now :-)
++		//printk("do_sync_write cluster %d, mapped:%i\n", cluster, mapping_mapped(inode->i_mapping));
++		//status = do_sync_write_nolock(file, buf,  bytes, &pos); //without locking mutex
++		status = do_sync_write(file, buf,  bytes, &pos);  //with locking mutex
++		assert(status>=0);
++		
++		mutex_lock(&inode->i_mutex);
++				
++		written += status;
++		count -= status;
++		buf += status;
++
++#ifdef EXT2_COMPRESS_WHEN_CLU
++		assert (ei->i_flags & EXT2_COMPRBLK_FL);
++		if ((ei->i_flags & EXT2_COMPR_FL)
++		    && (ext2_offset_is_clu_boundary(inode, pos)) ) {
++			
++			if  (mapping_mapped(inode->i_mapping) == 0 ) 
++			/*
++			 * Pierre Peiffer: For file mapped (via mmap, I mean),
++			 * compression will occure when releasing the file.
++			 * We must, in this case, avoid the pages (possibly
++			 * mapped by a process) to be compressed under them.
++			 */
++			{
++				int error;
++				assert(mapping_mapped(inode->i_mapping) == 0);
++				error = ext2_compress_cluster(inode, cluster);
++				/*if (ext2_cluster_is_compressed_fn(inode, cluster))
++					ext2_decompress_cluster(inode, cluster);*/
++				assert(mapping_mapped(inode->i_mapping) == 0);
++				/*
++				 * Actually, raising write_error may be a
++				 * mistake.  For example,
++				 * ext2_cleanup_compressed_cluster() doesn't
++				 * usually return any errors to user.  todo:
++				 * Have a look at ext2_compress_cluster, and
++				 * check whether its errors are such that they
++				 * should be returned to user.  Some of the
++				 * will be, of course, but it might be
++				 * possible for it to return without
++				 * change.
++				 */
++				if (error > 0)
++					comprblk_mask = ~(u32)0;
++			} else {
++#ifdef EXT2_COMPR_REPORT
++				char bdn[BDEVNAME_SIZE];
++				bdevname(inode->i_sb->s_bdev, bdn);
++#endif
++
++				trace_e2c("ext2_file_write: (dev. %s): "
++				    "ino=%ld, cluster=%d: file mapped, does "
++				    "not compress cluster\n",
++				    bdn, inode->i_ino, cluster);
++				ei->i_flags |= EXT2_DIRTY_FL;
++				ei->i_compr_flags |= EXT2_CLEANUP_FL;
++			}
++		}
++#endif
++
++	} while (count);
++	*ppos = pos;
++
++	/*
++	 * For now, when the user asks for O_SYNC, we'll actually
++	 * provide O_DSYNC.
++	 */
++	if (status >= 0) {
++		if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
++			/*if (ei->i_compr_flags & EXT2_OSYNC_INODE) {
++				osync_already = 1;
++			} else {
++				osync_already = 0;
++				ei->i_compr_flags |= EXT2_OSYNC_INODE;
++			}*/
++			/* Should 2nd arg be inode->i_mapping? */
++			status = ex_generic_osync_inode(inode, file->f_mapping
++				/*, OSYNC_METADATA|OSYNC_DATA*/);
++			/*if (osync_already == 0) {
++				ei->i_compr_flags &= ~EXT2_OSYNC_INODE;
++			}*/
++		}
++	}
++
++	err = written ? written : status;
++
++# ifdef EXT2_COMPRESS_WHEN_CLU
++	//mw: ext2_compress_cluster() might remove EXT2_COMPRBLK_FL
++	//if the file does not compress at all. this is NO error: remove next line?
++	//assert (ei->i_flags & EXT2_COMPRBLK_FL);
++	
++	ei->i_flags &= comprblk_mask;
++	if ( (ei->i_flags & EXT2_COMPR_FL)
++	    && (!ext2_offset_is_clu_boundary(inode, pos)) )
++	{
++		ei->i_flags |= EXT2_DIRTY_FL;
++		ei->i_compr_flags |= EXT2_CLEANUP_FL;
++	}
++
++# else
++	if (ei->i_flags & EXT2_COMPR_FL) {
++		ei->i_flags |= EXT2_DIRTY_FL;
++		ei->i_compr_flags |= EXT2_CLEANUP_FL;
++	}
++# endif
++out:
++
++#ifdef EXT2_COMPR_REPORT_MUTEX
++    printk(KERN_DEBUG "EXT2_FILE_WRITE_UNLOCK of PID %u @ inode:%lu\n", current->pid, inode->i_ino);
++#endif
++	/* mw:	up_read(&inode->i_alloc_sem); // as used by ocsf2 TLL 02/21/07 
++		was removed with kernel 3.1 */
++	inode_dio_done(inode);
++	mutex_unlock(&inode->i_mutex); 
++	return err;	
++}
++
++/*
++ * Called when an inode is about to be open.
++ * We use this to disallow opening RW large files on 32bit systems if
++ * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
++ * on this flag in sys_open.
++ * Prevent opening compressed file with O_DIRECT.
++ */
++static int ext2_file_open(struct inode * inode, struct file * filp)
++{
++	if ((filp->f_flags & O_DIRECT) && (EXT2_I(inode)->i_flags &
++	    (EXT2_COMPR_FL|EXT2_COMPRBLK_FL)))
++		return -EINVAL;
++	if (!(filp->f_flags & O_LARGEFILE) && inode->i_size > MAX_NON_LFS)
++		return -EFBIG;
++			
++  	return 0;
++ }
++#endif /* CONFIG_EXT2_COMPRESS*/
++
+ /*
+  * We have mostly NULL's here: the current defaults are ok for
+  * the ext2 filesystem.
+@@ -63,7 +559,12 @@ int ext2_fsync(struct file *file, loff_t
+ const struct file_operations ext2_file_operations = {
+ 	.llseek		= generic_file_llseek,
+ 	.read		= do_sync_read,
++#ifdef CONFIG_EXT2_COMPRESS
++        .write          = ext2_file_write,
++#else
+ 	.write		= do_sync_write,
++#endif
++
+ 	.aio_read	= generic_file_aio_read,
+ 	.aio_write	= generic_file_aio_write,
+ 	.unlocked_ioctl = ext2_ioctl,
+@@ -71,7 +572,11 @@ const struct file_operations ext2_file_o
+ 	.compat_ioctl	= ext2_compat_ioctl,
+ #endif
+ 	.mmap		= generic_file_mmap,
++#ifdef CONFIG_EXT2_COMPRESS
++    	.open         	= ext2_file_open,
++#else
+ 	.open		= dquot_file_open,
++#endif
+ 	.release	= ext2_release_file,
+ 	.fsync		= ext2_fsync,
+ 	.splice_read	= generic_file_splice_read,
+--- linux-3.4-rc5/fs/ext2/ioctl.c	2012-04-29 18:19:10.000000000 -0400
++++ linux-3.4-rc5-e2c/fs/ext2/ioctl.c	2012-04-30 04:11:03.805143098 -0400
+@@ -7,7 +7,14 @@
+  * Universite Pierre et Marie Curie (Paris VI)
+  */
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++#include <linux/fs.h>
++#include <linux/ext2_fs_c.h>
++#include <linux/kmod.h>
++#include <linux/stat.h>
++#else
+ #include "ext2.h"
++#endif
+ #include <linux/capability.h>
+ #include <linux/time.h>
+ #include <linux/sched.h>
+@@ -17,6 +24,65 @@
+ #include <asm/uaccess.h>
+ 
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++
++#ifndef MIN
++# define MIN(a,b) ((a) < (b) ? (a) : (b))
++#endif
++
++#ifdef CONFIG_GZ_HACK
++static int check_name(struct inode *ino)
++{
++  struct dentry *dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias);
++  if (dentry)
++    if (
++
++           (dentry->d_name.len >= 4) &&
++                 (((dentry->d_name.name[dentry->d_name.len - 2] == 'g')
++                   && (dentry->d_name.name[dentry->d_name.len - 1] == 'z')
++                   && ((dentry->d_name.name[dentry->d_name.len - 3] == '.')
++                       || (dentry->d_name.name[dentry->d_name.len - 4] == '.')))
++
++                  || ((dentry->d_name.name[dentry->d_name.len - 3] == 't')
++                      && (dentry->d_name.name[dentry->d_name.len - 2] == 'g')
++                      && (dentry->d_name.name[dentry->d_name.len - 1] == 'z')
++                      && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
++                      && (dentry->d_name.len >= 5))
++
++                  || ((dentry->d_name.name[dentry->d_name.len - 3] == 'p')
++                      && (dentry->d_name.name[dentry->d_name.len - 2] == 'n')
++                      && (dentry->d_name.name[dentry->d_name.len - 1] == 'g')
++                      && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
++                      && (dentry->d_name.len >= 5))
++
++                  || ((dentry->d_name.name[dentry->d_name.len - 3] == 'j')
++                      && (dentry->d_name.name[dentry->d_name.len - 2] == 'p')
++                      && (dentry->d_name.name[dentry->d_name.len - 1] == 'g')
++                      && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
++                      && (dentry->d_name.len >= 5))
++
++                  || ((dentry->d_name.name[dentry->d_name.len - 3] == 'b')
++                      && (dentry->d_name.name[dentry->d_name.len - 2] == 'z')
++                      && (dentry->d_name.name[dentry->d_name.len - 1] == '2')
++                      && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
++                      && (dentry->d_name.len >= 5))
++
++                  || ((dentry->d_name.name[dentry->d_name.len - 3] == 'm')
++                      && (dentry->d_name.name[dentry->d_name.len - 2] == 'n')
++                      && (dentry->d_name.name[dentry->d_name.len - 1] == 'g')
++                      && (dentry->d_name.name[dentry->d_name.len - 4] == '.')
++                      && (dentry->d_name.len >= 5))
++                  )
++       ) {
++        return 1;
++    }
++  return 0;
++}
++#endif
++#endif
++
++
++
+ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_dentry->d_inode;
+@@ -24,6 +90,10 @@ long ext2_ioctl(struct file *filp, unsig
+ 	unsigned int flags;
+ 	unsigned short rsv_window_size;
+ 	int ret;
++#ifdef CONFIG_EXT2_COMPRESS
++	unsigned long datum;
++	int err;
++#endif
+ 
+ 	ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+ 
+@@ -75,7 +145,127 @@ long ext2_ioctl(struct file *filp, unsig
+ 		}
+ 
+ 		flags = flags & EXT2_FL_USER_MODIFIABLE;
++#ifdef CONFIG_EXT2_COMPRESS
++		if (S_ISREG (inode->i_mode) || S_ISDIR (inode->i_mode)) {		
++			
++			/* pjm 1998-01-14: In previous versions of
++			     e2compr, the kernel forbade raising
++			     EXT2_ECOMPR_FL from userspace.  I can't
++			     think of any purpose for forbidding this,
++			     and I find it useful to raise
++			     EXT2_ECOMPR_FL for testing purposes, so
++			     I've removed the forbidding code. */
++			if (S_ISREG (inode->i_mode)
++			    && (EXT2_NOCOMPR_FL
++				& (flags ^ ei->i_flags))) {   // mw hint:  ^ is a (excluisive OR)
++				/* NOCOMPR_FL can only be changed if
++				   nobody else has the file opened.  */
++				/* pjm 1998-02-16: inode->i_count is
++				   useless to us because only dentries
++				   use inodes now.  Unfortunately,
++				   there isn't an easy way of finding
++				   the equivalent.  We'd have to go
++				   through all dentries using the
++				   inode, and sum their d_count
++				   values.  Rather than do that, I'd
++				   rather get rid of the exclusion
++				   constraint.  todo. */				
++				//printk("i_count: %i\n", atomic_read(&inode->i_count));
++				//if (atomic_read(&inode->i_count) > 1)
++				//if (0)
++				if (ext2_get_dcount(inode) > 1)
++				{
++					mutex_unlock(&inode->i_mutex); /*mw*/
++					return -ETXTBSY;
++				}
++				else {
++					/* pjm 970429: Discarding
++					     cached pages is not very
++					     clean, but should work. */
++					/* pjm 980114: Not quite.  We
++					     should also sync any
++					     mappings to buffers first.
++					     This isn't very important,
++					     as none of the current
++					     e2compr programs can
++					     trigger this, but todo. */
++					invalidate_remote_inode (inode);
++				}
++			}
++
++			if (EXT2_COMPR_FL
++			    & (flags ^ ei->i_flags)) {
++				if (flags & EXT2_COMPR_FL) {
++					if (ei->i_flags & EXT2_COMPRBLK_FL) {
++						/* There shouldn't actually be any
++						   compressed blocks, AFAIK.  However,
++						   this is still possible because sometimes
++						   COMPRBLK gets raised just to stop 
++						   us changing cluster size at the wrong
++						   time.
++
++						   todo: Call a function that just
++						   checks that there are not compressed
++						   clusters, and print a warning if any are
++						   found. */
++					} else {
++						int bits = MIN(EXT2_DEFAULT_LOG2_CLU_NBLOCKS,
++							       (EXT2_LOG2_MAX_CLUSTER_BYTES
++								- inode->i_sb->s_blocksize_bits));
++
++						ei->i_log2_clu_nblocks = bits;
++						ei->i_clu_nblocks = 1 << bits;
++					}
++					ei->i_compr_method = EXT2_DEFAULT_COMPR_METHOD;
++					if (S_ISREG (inode->i_mode)) {
++						//compress	
++#ifdef CONFIG_GZ_HACK
++						/*  mw: check for .gz-files and similar
++						 *  I think this is the most clever place for
++						 * rejecting files. They remain regular, uncompressed
++						 * files and though can be read bypassing all 
++						 * compression stuff (= fast) :-). And it seems to save 
++						 * space... somehow */						
++						if (check_name (inode))
++						{
++							//printk("non-compressable file extension\n");
++							mutex_unlock(&inode->i_mutex);
++							return 0;
++						}
++#endif
++						//set flags to trigger compression later on
++						flags |= EXT2_DIRTY_FL;
++						ei->i_compr_flags |= EXT2_CLEANUP_FL;
++					}
++				} else if (S_ISREG (inode->i_mode)) {
++					if (ei->i_flags & EXT2_COMPRBLK_FL) {
++						int err;
++						
++						if (ext2_get_dcount(inode) > 1){
++							mutex_unlock(&inode->i_mutex); //mw
++							return -ETXTBSY;
++						}
++						err = ext2_decompress_inode(inode);
++						if (err)
++						{
++				            		mutex_unlock(&inode->i_mutex); //mw
++							return err;
++						}
++					}
++					ei->i_flags &= ~EXT2_DIRTY_FL;
++					ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
++				}
++			}
++		}
++#endif
+ 		flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
++#ifdef CONFIG_EXT2_COMPRESS
++		/* bug fix: scrub 'B' flag from uncompressed files TLL 02/28/07 */
++		if (!(flags & EXT2_COMPR_FL) && (flags & EXT2_COMPRBLK_FL) )
++		{ 
++				flags &= ~EXT2_COMPRBLK_FL;
++		}
++#endif
+ 		ei->i_flags = flags;
+ 
+ 		ext2_set_inode_flags(inode);
+@@ -158,6 +348,184 @@ setversion_out:
+ 		mnt_drop_write_file(filp);
+ 		return 0;
+ 	}
++#ifdef CONFIG_EXT2_COMPRESS
++	case EXT2_IOC_GETCOMPRMETHOD:	/* Result means nothing if COMPR_FL is not set */
++ 		return put_user (ei->i_compr_method, (long *) arg);
++	case EXT2_IOC_SETCOMPRMETHOD:
++		if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
++			return -EPERM;
++		if (IS_RDONLY (inode))
++			return -EROFS;
++		if (get_user (datum, (long*) arg))
++			return -EFAULT;
++		if (!S_ISREG (inode->i_mode) && !S_ISDIR (inode->i_mode)) 
++			return -ENOSYS;
++		/* todo: Allow the below, but set initial value of
++		   i_compr_meth at read_inode() time (using default if
++		   !/) instead of +c time.  Same for cluster
++		   size. */
++		if ((unsigned) datum >= EXT2_N_METHODS)
++			return -EINVAL;
++		if (ei->i_compr_method != datum) {
++			if ((ei->i_compr_method == EXT2_NEVER_METH)
++			    && (ei->i_flags & EXT2_COMPR_FL))
++				return -EPERM;
++			/* If the previous method was `defer' then
++			   take a look at all uncompressed clusters
++			   and try to compress them.  (pjm 1997-04-16) */
++			if ((ei->i_compr_method == EXT2_DEFER_METH)
++			    && S_ISREG (inode->i_mode)) {
++				ei->i_flags |= EXT2_DIRTY_FL;
++				ei->i_compr_flags |= EXT2_CLEANUP_FL;
++			}
++			if ((datum == EXT2_NEVER_METH)
++			    && S_ISREG (inode->i_mode)) {
++				//printk("SETCOMPR\n");
++				if ((ei->i_flags & EXT2_COMPRBLK_FL))
++				{
++					/*mw*/
++					mutex_lock(&inode->i_mutex);
++					if (ext2_get_dcount(inode) > 1){
++						mutex_unlock(&inode->i_mutex); /*mw*/
++						return -ETXTBSY;
++					}
++					err = ext2_decompress_inode(inode);
++					mutex_unlock(&inode->i_mutex);
++					if ( err < 0)
++						return err;
++				}
++				ei->i_flags &= ~EXT2_DIRTY_FL;
++				ei->i_compr_flags &= ~EXT2_CLEANUP_FL;
++			}
++			ei->i_compr_method = datum;
++			inode->i_ctime = CURRENT_TIME;
++			mark_inode_dirty(inode);
++		}
++#ifdef CONFIG_KMOD
++		if (!ext2_algorithm_table[ext2_method_table[datum].alg].avail) {
++			char str[32];
++
++			sprintf(str, "ext2-compr-%s", ext2_algorithm_table[ext2_method_table[datum].alg].name);
++			request_module(str);
++		}
++#endif
++		datum = ((datum < EXT2_N_METHODS)
++			 && (ext2_algorithm_table[ext2_method_table[datum].alg].avail));
++		return put_user(datum, (long *)arg);
++
++	case EXT2_IOC_GETCLUSTERBIT:
++		if (get_user (datum, (long*) arg))
++			return -EFAULT;
++		if (!S_ISREG (inode->i_mode))
++			return -ENOSYS;
++		/* We don't do `down(&inode->i_sem)' here because
++		   there's no way for userspace to do the
++		   corresponding up().  Userspace must rely on
++		   EXT2_NOCOMPR_FL if it needs to lock. */
++		err = ext2_cluster_is_compressed (inode, datum);
++		if (err < 0)
++			return err;
++		return put_user ((err ? 1 : 0),
++				 (long *) arg);
++
++	case EXT2_IOC_RECOGNIZE_COMPRESSED:
++		if (get_user (datum, (long*) arg))
++			return -EFAULT;
++		if (!S_ISREG (inode->i_mode))
++			return -ENOSYS;
++		if (IS_RDONLY (inode))
++			return -EROFS;
++		return ext2_recognize_compressed (inode, datum);
++		
++	case EXT2_IOC_GETCLUSTERSIZE:
++		/* Result means nothing if COMPR_FL is not set (until
++                   SETCLUSTERSIZE w/o COMPR_FL is implemented;
++                   todo). */
++		if (!S_ISREG (inode->i_mode)
++		    && !S_ISDIR (inode->i_mode)) 
++			return -ENOSYS;
++		return put_user (ei->i_clu_nblocks, (long *) arg);
++
++	case EXT2_IOC_GETFIRSTCLUSTERSIZE:
++		/* Result means nothing if COMPR_FL is not set (until
++                   SETCLUSTERSIZE w/o COMPR_FL is implemented;
++                   todo). */
++		if (!S_ISREG (inode->i_mode)
++		    && !S_ISDIR (inode->i_mode)) 
++			return -ENOSYS;
++		return put_user (ext2_first_cluster_nblocks(inode), (long *) arg);
++
++	case EXT2_IOC_SETCLUSTERSIZE:
++		if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
++			return -EPERM;
++		if (IS_RDONLY (inode))
++			return -EROFS;
++		if (get_user (datum, (long *) arg))
++			return -EFAULT;
++		if (!S_ISREG (inode->i_mode)
++		    && !S_ISDIR (inode->i_mode)) 
++			return -ENOSYS;
++
++		/* These are the only possible cluster sizes.  The
++		   cluster size must be a power of two so that
++		   clusters don't straddle address (aka indirect)
++		   blocks.  At the moment, the upper limit is constrained
++		   by how much memory is allocated for de/compression.
++		   Also, the gzip algorithms have some optimisations
++		   that assume tht the input is no more than 32KB,
++		   and in compress.c we would need to zero more bits
++		   of head->holemap.  (In previous releases, the file
++		   format was limited to 32 blocks and under 64KB.) */
++// #if EXT2_MAX_CLUSTER_BLOCKS > 32 || EXT2_MAX_CLUSTER_NBYTES > 32768
++// # error "This code not updated for cluster size yet."
++// #endif
++		switch (datum) {
++		case (1 << 2): datum = 2; break;
++		case (1 << 3): datum = 3; break;
++		case (1 << 4): datum = 4; break;
++		case (1 << 5): datum = 5; break;
++		default: return -EINVAL;
++		}
++
++		assert (ei->i_clu_nblocks == (1 << ei->i_log2_clu_nblocks));
++		if (datum == ei->i_log2_clu_nblocks)
++			return 0;
++
++		if (ei->i_flags & EXT2_ECOMPR_FL)
++			return -EPERM;
++		if (!(ei->i_flags & EXT2_COMPR_FL))
++			return -ENOSYS;
++
++		/* We currently lack a mechanism to change the cluster
++		   size if there are already some compressed clusters.
++		   The compression must be done in userspace
++		   (e.g. with the e2compress program) instead.  */
++		if (ei->i_flags & EXT2_COMPRBLK_FL)
++			return -ENOSYS;
++
++		if (datum + inode->i_sb->s_blocksize_bits
++		    > EXT2_LOG2_MAX_CLUSTER_BYTES)
++			return -EINVAL;
++
++		ei->i_log2_clu_nblocks = datum;
++		ei->i_clu_nblocks = 1 << datum;
++		inode->i_ctime = CURRENT_TIME;
++		mark_inode_dirty(inode);
++		return 0;
++
++	case EXT2_IOC_GETCOMPRRATIO:		
++		if (!S_ISREG (inode->i_mode)) 
++			return -ENOSYS;
++		if (ei->i_flags & EXT2_ECOMPR_FL)
++			return -EPERM;
++		if ((long) (datum = ext2_count_blocks (inode)) < 0)
++			return datum;
++		if ((err = put_user ((long) datum, (long*) arg)))
++			return err;
++		return put_user ((long) inode->i_blocks, (long*) arg + 1);
++		
++		
++#endif
+ 	default:
+ 		return -ENOTTY;
+ 	}
+--- linux-3.4-rc5/fs/ext2/ext2.h	2012-04-29 18:19:10.000000000 -0400
++++ linux-3.4-rc5-e2c/fs/ext2/ext2.h	2012-04-30 07:30:58.249092266 -0400
+@@ -378,6 +381,7 @@ struct ext2_inode {
+ #define EXT2_MOUNT_MINIX_DF		0x000080  /* Mimics the Minix statfs */
+ #define EXT2_MOUNT_NOBH			0x000100  /* No buffer_heads */
+ #define EXT2_MOUNT_NO_UID32		0x000200  /* Disable 32-bit UIDs */
++#define EXT2_MOUNT_FORCE_COMPAT		0x000400  /* e2compr: Mount despite incompatibilities */
+ #define EXT2_MOUNT_XATTR_USER		0x004000  /* Extended user attributes */
+ #define EXT2_MOUNT_POSIX_ACL		0x008000  /* POSIX Access Control Lists */
+ #define EXT2_MOUNT_XIP			0x010000  /* Execute in place */
+@@ -543,8 +547,25 @@ struct ext2_super_block {
+ #define EXT2_FEATURE_INCOMPAT_ANY		0xffffffff
+ 
+ #define EXT2_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
++
++/*
++ * e2compr specific
++ */
++
++#define EXT2_GRAIN_SIZE 		1024
++#define EXT2_NOCOMPR_FL                 FS_NOCOMP_FL    /* Access raw data */
++
++
++#ifdef CONFIG_EXT2_COMPRESS
++#define EXT2_FEATURE_INCOMPAT_SUPP	(EXT2_FEATURE_INCOMPAT_COMPRESSION| \
++					 EXT2_FEATURE_INCOMPAT_FILETYPE| \
++					 EXT2_FEATURE_INCOMPAT_META_BG)
++#else
+ #define EXT2_FEATURE_INCOMPAT_SUPP	(EXT2_FEATURE_INCOMPAT_FILETYPE| \
+ 					 EXT2_FEATURE_INCOMPAT_META_BG)
++#endif
++
++
+ #define EXT2_FEATURE_RO_COMPAT_SUPP	(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+ 					 EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
+ 					 EXT2_FEATURE_RO_COMPAT_BTREE_DIR)
+@@ -668,6 +690,12 @@ struct ext2_inode_info {
+ 	struct ext2_block_alloc_info *i_block_alloc_info;
+ 
+ 	__u32	i_dir_start_lookup;
++#ifdef CONFIG_EXT2_COMPRESS
++	__u8    i_log2_clu_nblocks;
++	__u8    i_clu_nblocks;
++	__u8    i_compr_method;
++	__u8    i_compr_flags;
++#endif
+ #ifdef CONFIG_EXT2_FS_XATTR
+ 	/*
+ 	 * Extended attributes can be read independently of the main file
+@@ -757,6 +785,7 @@ extern void ext2_set_inode_flags(struct
+ extern void ext2_get_inode_flags(struct ext2_inode_info *);
+ extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ 		       u64 start, u64 len);
++extern void ext2_truncate_blocks(struct inode *inode, loff_t offset);
+ 
+ /* ioctl.c */
+ extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
+--- linux-3.4-rc5/include/linux/ext2_fs.h	2012-04-29 18:19:10.000000000 -0400
++++ linux-3.4-rc5-e2c/include/linux/ext2_fs.h	2012-04-30 04:11:03.818143098 -0400
+@@ -39,4 +39,16 @@ static inline u64 ext2_image_size(void *
+ 		le32_to_cpup((__le32 *)(p + EXT2_SB_BSIZE_OFFSET));
+ }
+ 
++#ifndef __KERNEL__
++/* This simplifies things for user programs (notably e2fsprogs) that
++   must compile whether or not <linux/ext2_fs_c.h> is present, but
++   would prefer to include it.  Presumably the file is present if the
++   user has this version of ext2_fs.h. */
++
++# /* Do not remove this comment. */ include <linux/ext2_fs_c.h>
++
++/* The comment between `#' and `include' prevents mkdep from generating
++   a dependency on ext2_fs_c.h. */
++#endif
++
+ #endif	/* _LINUX_EXT2_FS_H */
+--- linux-3.4-rc5/fs/fcntl.c	2012-04-29 18:19:10.000000000 -0400
++++ linux-3.4-rc5-e2c/fs/fcntl.c	2012-04-30 04:11:03.820143098 -0400
+@@ -25,6 +25,12 @@
+ #include <asm/siginfo.h>
+ #include <asm/uaccess.h>
+ 
++#ifdef CONFIG_EXT2_COMPRESS	
++//mw: deny O_DIRECT on file with compression
++#include <linux/ext2_fs.h>
++#include "ext2/ext2.h"
++#endif
++
+ void set_close_on_exec(unsigned int fd, int flag)
+ {
+ 	struct files_struct *files = current->files;
+@@ -171,6 +177,16 @@ static int setfl(int fd, struct file * f
+ 		if (!filp->f_mapping || !filp->f_mapping->a_ops ||
+ 			!filp->f_mapping->a_ops->direct_IO)
+ 				return -EINVAL;
++
++#ifdef CONFIG_EXT2_COMPRESS			
++		//mw: if we have a compressed ext2 file: deny!
++		//    TODO: maybe check fs-type first!
++		//assert(!(EXT2_I(inode)->i_flags & (EXT2_COMPR_FL|EXT2_COMPRBLK_FL)));
++		if (EXT2_I(inode)->i_flags & (EXT2_COMPR_FL|EXT2_COMPRBLK_FL))
++		{
++			return -EINVAL;
++		}
++#endif		
+ 	}
+ 
+ 	if (filp->f_op && filp->f_op->check_flags)
+--- linux-3.4-rc5/mm/truncate.c	2012-04-29 18:19:10.000000000 -0400
++++ linux-3.4-rc5-e2c/mm/truncate.c	2012-05-02 16:13:52.383974864 -0400
+@@ -22,6 +22,9 @@
+ #include <linux/cleancache.h>
+ #include "internal.h"
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++#include <linux/ext2_fs_c.h>
++#endif
+ 
+ /**
+  * do_invalidatepage - invalidate part or all of a page
+@@ -595,6 +598,11 @@ int vmtruncate(struct inode *inode, loff
+ 	if (error)
+ 		return error;
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++		if ((inode->i_op && inode->i_op->truncate) &&
++		    ((strcmp(inode->i_sb->s_type->name, "ext2") != 0) ||
++		    (!(EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL))))
++#endif
+ 	truncate_setsize(inode, newsize);
+ 	if (inode->i_op->truncate)
+ 		inode->i_op->truncate(inode);
+--- linux-3.4-rc5/mm/swapfile.c	2012-04-29 18:19:10.000000000 -0400
++++ linux-3.4-rc5-e2c/mm/swapfile.c	2012-04-30 04:11:03.822143098 -0400
+@@ -31,6 +31,10 @@
+ #include <linux/memcontrol.h>
+ #include <linux/poll.h>
+ #include <linux/oom.h>
++#ifdef CONFIG_EXT2_COMPRESS
++#include <linux/ext2_fs_c.h>
++#endif
++
+ 
+ #include <asm/pgtable.h>
+ #include <asm/tlbflush.h>
+@@ -2060,6 +2064,24 @@ SYSCALL_DEFINE2(swapon, const char __use
+ 	}
+ 
+ 	inode = mapping->host;
++
++#ifdef CONFIG_EXT2_COMPRESS
++		/*
++		 * Swapping not supported for e2compressed files.
++		 * (Actually, this code is pretty useless because we
++		 * should get an error later anyway because of the
++		 * holes.)  Yes, this is pretty horrible code... I'll
++		 * improve it later.
++		 */
++		if ((strcmp(inode->i_sb->s_type->name, "ext2") == 0)
++		    && (EXT2_I(inode)->i_flags & EXT2_COMPRBLK_FL))
++		{
++			printk("Assertion: Error NO swap SWAP implemented!\n");
++			error = -EINVAL;
++			goto bad_swap;
++		}
++#endif
++
+ 	/* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
+ 	error = claim_swapfile(p, inode);
+ 	if (unlikely(error))
+--- linux-3.4-rc5/mm/filemap.c	2012-04-29 18:19:10.000000000 -0400
++++ linux-3.4-rc5-e2c/mm/filemap.c	2012-04-30 04:11:03.824143098 -0400
+@@ -43,6 +43,10 @@
+ 
+ #include <asm/mman.h>
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++# include <linux/ext2_fs_c.h>
++#endif
++
+ /*
+  * Shared mappings implemented 30.11.1994. It's not fully working yet,
+  * though.
+@@ -277,7 +281,19 @@ int filemap_fdatawait_range(struct addre
+ 			PAGECACHE_TAG_WRITEBACK,
+ 			min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
+ 		unsigned i;
++#ifdef CONFIG_EXT2_COMPRESS
++/*
++ * I'm not sure that this is right.  It has been reworked considerably since
++ * 2.6.5. - whitpa
++ */
++		struct inode *inode = mapping->host;
++        //printk("wait_on_page_writeback_range\n");
+ 
++		if ((strcmp(inode->i_sb->s_type->name, "ext2") != 0)
++			    || (atomic_read(&inode->i_mutex.count) > 0)
++			    || (EXT2_I(inode)->i_compr_flags &
++				EXT2_OSYNC_INODE))
++#endif
+ 		for (i = 0; i < nr_pages; i++) {
+ 			struct page *page = pvec.pages[i];
+ 
+@@ -1162,6 +1178,15 @@ page_ok:
+ 		}
+ 		nr = nr - offset;
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++		lock_page(page);
++		//check again: after locking still uptodate?
++		if(!PageUptodate(page)){
++			unlock_page(page);
++			goto page_not_up_to_date;
++		}				
++#endif
++
+ 		/* If users can be writing to this page using arbitrary
+ 		 * virtual addresses, take care about potential aliasing
+ 		 * before reading the page on the kernel side.
+@@ -1193,6 +1218,10 @@ page_ok:
+ 		offset &= ~PAGE_CACHE_MASK;
+ 		prev_offset = offset;
+ 
++#ifdef CONFIG_EXT2_COMPRESS
++		unlock_page(page);
++#endif
++
+ 		page_cache_release(page);
+ 		if (ret == nr && desc->count)
+ 			continue;
+@@ -1202,7 +1231,12 @@ page_not_up_to_date:
+ 		/* Get exclusive access to the page ... */
+ 		error = lock_page_killable(page);
+ 		if (unlikely(error))
++		{
++			printk("Readpage Error: mw: page locking failed with code: %i\n", error);
++			printk("Readpage Error: mw: might happen as page was locked 'killable'\n");
++			printk("Readpage Error: mw: was reading app killed?\n");
+ 			goto readpage_error;
++		}
+ 
+ page_not_up_to_date_locked:
+ 		/* Did it get truncated before we got the lock? */
+@@ -1233,13 +1267,17 @@ readpage:
+ 				page_cache_release(page);
+ 				goto find_page;
+ 			}
++			printk("Readpage Error: fs-specific readpage failed with code: %i\n", error);
+ 			goto readpage_error;
+ 		}
+ 
+ 		if (!PageUptodate(page)) {
+ 			error = lock_page_killable(page);
+ 			if (unlikely(error))
++			{
++				printk("Readpage Error: page was not uptodate after read. page locking failed with code: %i\n", error);
+ 				goto readpage_error;
++			}
+ 			if (!PageUptodate(page)) {
+ 				if (page->mapping == NULL) {
+ 					/*
+@@ -1252,6 +1290,7 @@ readpage:
+ 				unlock_page(page);
+ 				shrink_readahead_size_eio(filp, ra);
+ 				error = -EIO;
++				printk("Readpage Error: page was not uptodate after read AND page locked. failed with code: %i\n", error);
+ 				goto readpage_error;
+ 			}
+ 			unlock_page(page);
+@@ -1263,6 +1302,7 @@ readpage_error:
+ 		/* UHHUH! A synchronous read error occurred. Report it */
+ 		desc->error = error;
+ 		page_cache_release(page);
++		printk("Readpage Error\n");
+ 		goto out;
+ 
+ no_cached_page:
diff --git a/3.3.8/lschlv2.patch b/3.3.8/lschlv2.patch
new file mode 100644
index 0000000..40ef6be
--- /dev/null
+++ b/3.3.8/lschlv2.patch
@@ -0,0 +1,256 @@
+--- a/arch/arm/mach-kirkwood/include/mach/system.h
++++ b/arch/arm/mach-kirkwood/include/mach/system.h
+@@ -9,6 +9,8 @@
+ #ifndef __ASM_ARCH_SYSTEM_H
+ #define __ASM_ARCH_SYSTEM_H
+ 
++#include <linux/io.h>
++#include <asm/proc-fns.h>
+ #include <mach/bridge-regs.h>
+ 
+ static inline void arch_idle(void)
+--- a/arch/arm/mach-kirkwood/Kconfig
++++ b/arch/arm/mach-kirkwood/Kconfig
+@@ -87,6 +87,12 @@
+ 	  Say 'Y' here if you want your kernel to support the
+ 	  HP t5325 Thin Client.
+ 
++config MACH_LINKSTATION_CHLV2
++	bool "Buffalo LS-CHLv2 Series"
++	help
++	  Say 'Y' here if you want your kernel to support the
++	  Buffalo LS-CHLv2 Series.
++
+ endmenu
+ 
+ endif
+--- a/arch/arm/mach-kirkwood/lschlv2-setup.c
++++ b/arch/arm/mach-kirkwood/lschlv2-setup.c
+@@ -0,0 +1,210 @@
++/*
++ * arch/arm/mach-kirkwood/lschlv2-setup.c
++ *
++ * Buffalo LS Kirkwood Series Setup
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2.  This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/gpio.h>
++#include <linux/gpio_keys.h>
++#include <linux/init.h>
++#include <linux/input.h>
++#include <linux/leds.h>
++#include <linux/platform_device.h>
++#include <linux/ata_platform.h>
++#include <linux/mv643xx_eth.h>
++#include <linux/mtd/physmap.h>
++#include <linux/spi/flash.h>
++#include <linux/spi/spi.h>
++#include <asm/mach-types.h>
++#include <asm/mach/arch.h>
++#include "include/mach/system.h"
++#include <mach/kirkwood.h>
++#include "common.h"
++#include "mpp.h"
++
++/*****************************************************************************
++ * 512KB SPI Flash on BOOT Device
++ ****************************************************************************/
++static struct mtd_partition lschlv2_partitions[] = {
++	{
++		.name		= "u-boot",
++		.offset		= 0x00000,
++		.size		= 0x70000,
++		.mask_flags	= MTD_WRITEABLE,
++	},
++	{
++		.name		= "u-boot env",
++		.offset		= MTDPART_OFS_APPEND,
++		.size		= 0x10000,
++	}
++};
++
++static struct flash_platform_data lschlv2_spi_slave_data = {
++	.type		= "m25p40",
++	.parts		= lschlv2_partitions,
++	.nr_parts	= ARRAY_SIZE(lschlv2_partitions),
++};
++
++static struct spi_board_info __initdata lschlv2_spi_slave_info[] = {
++	{
++		.modalias		= "m25p80",
++		.platform_data	= &lschlv2_spi_slave_data,
++		.irq			= -1,
++		.max_speed_hz	= 20000000,
++		.bus_num		= 0,
++		.chip_select	= 0,
++	}
++};
++
++static struct mv643xx_eth_platform_data lschlv2_ge00_data = {
++	.phy_addr	= MV643XX_ETH_PHY_ADDR(0),
++};
++
++static struct mv643xx_eth_platform_data lschlv2_ge01_data = {
++	.phy_addr	= MV643XX_ETH_PHY_ADDR(8),
++};
++
++static unsigned int lschlv2_mpp_config[] __initdata = {
++	MPP10_GPO, /* HDD Power */
++	MPP11_GPIO, /* USB Vbus Power */
++	MPP18_GPO, /* FAN High on:0, off:1 */
++	MPP19_GPO, /* FAN Low on:0, off:1 */
++	MPP36_GPIO, /* FUNC LED */
++	MPP37_GPIO, /* ALARM LED */
++	MPP38_GPIO, /* INFO LED */
++	MPP39_GPIO, /* POWER LED */
++	MPP40_GPIO, /* FAN LOCK */
++	MPP41_GPIO, /* FUNC SW */
++	MPP42_GPIO, /* POWER SW */
++	MPP43_GPIO, /* POWER AUTO SW */
++	MPP48_GPIO, /* FUNC RED LED */
++	MPP49_GPIO, /* UART EN */
++	0
++};
++
++static struct mv_sata_platform_data lschlv2_sata_data = {
++	.n_ports	= 1,
++};
++
++static struct gpio_led lschlv2_led_pins[] = {
++	{
++		.name			= "func",
++		.gpio			= 36,
++		.active_low		= 1,
++	},
++	{
++		.name			= "alarm",
++		.gpio			= 37,
++		.active_low		= 1,
++	},
++	{
++		.name			= "info",
++		.gpio			= 38,
++		.active_low		= 1,
++	},
++	{
++		.name			= "power",
++		.gpio			= 39,
++		.default_trigger	= "default-on",
++		.active_low		= 1,
++	},
++	{
++		.name			= "func2",
++		.gpio			= 48,
++		.active_low		= 1,
++	},
++};
++
++static struct gpio_led_platform_data lschlv2_led_data = {
++	.leds		= lschlv2_led_pins,
++	.num_leds	= ARRAY_SIZE(lschlv2_led_pins),
++};
++
++static struct platform_device lschlv2_leds = {
++	.name	= "leds-gpio",
++	.id	= -1,
++	.dev	= {
++		.platform_data	= &lschlv2_led_data,
++	}
++};
++
++#define LSCHLv2_GPIO_USB_VBUS_EN		11
++#define LSCHLv2_GPIO_KEY_FUNC		41
++
++static struct gpio_keys_button lschlv2_buttons[] = {
++	{
++		.code		= KEY_OPTION,
++		.gpio		= LSCHLv2_GPIO_KEY_FUNC,
++		.desc		= "Function Button",
++		.active_low	= 1,
++	},
++};
++
++static struct gpio_keys_platform_data lschlv2_button_data = {
++	.buttons	= lschlv2_buttons,
++	.nbuttons	= ARRAY_SIZE(lschlv2_buttons),
++};
++
++static struct platform_device lschlv2_button_device = {
++	.name		= "gpio-keys",
++	.id		= -1,
++	.num_resources	= 0,
++	.dev		= {
++		.platform_data	= &lschlv2_button_data,
++	},
++};
++
++static void lschlv2_power_off(void)
++{
++	arch_reset(0, NULL);
++}
++
++static void __init lschlv2_init(void)
++{
++	/*
++	 * Basic setup. Needs to be called early.
++	 */
++	kirkwood_init();
++	kirkwood_mpp_conf(lschlv2_mpp_config);
++
++	kirkwood_uart0_init();
++
++	if (gpio_request(LSCHLv2_GPIO_USB_VBUS_EN, "USB Power Enable") != 0 ||
++		gpio_direction_output(LSCHLv2_GPIO_USB_VBUS_EN, 1) != 0)
++		printk(KERN_ERR "can't set up USB Power Enable\n");
++	kirkwood_ehci_init();
++
++	kirkwood_ge00_init(&lschlv2_ge00_data);
++	kirkwood_ge01_init(&lschlv2_ge01_data);
++
++	kirkwood_sata_init(&lschlv2_sata_data);
++
++	kirkwood_spi_init();
++
++	platform_device_register(&lschlv2_leds);
++	platform_device_register(&lschlv2_button_device);
++
++	spi_register_board_info(lschlv2_spi_slave_info,
++		ARRAY_SIZE(lschlv2_spi_slave_info));
++
++	/* register power-off method */
++	pm_power_off = lschlv2_power_off;
++
++	pr_info("%s: finished\n", __func__);
++}
++
++
++
++MACHINE_START(LINKSTATION_CHLV2, "Buffalo Linkstation LS-CHLv2")
++	.atag_offset    = 0x100,
++	.init_machine   = lschlv2_init,
++	.map_io         = kirkwood_map_io,
++	.init_early     = kirkwood_init_early,
++	.init_irq       = kirkwood_init_irq,
++	.timer          = &kirkwood_timer,
++MACHINE_END
+--- a/arch/arm/mach-kirkwood/Makefile
++++ b/arch/arm/mach-kirkwood/Makefile
+@@ -20,3 +20,4 @@
+ obj-$(CONFIG_MACH_T5325)		+= t5325-setup.o
++obj-$(CONFIG_MACH_LINKSTATION_CHLV2)		+= lschlv2-setup.o
+ 
+ obj-$(CONFIG_CPU_IDLE)			+= cpuidle.o
+--- a/arch/arm/mach-kirkwood/common.c
++++ b/arch/arm/mach-kirkwood/common.c
+@@ -32,6 +32,7 @@
+ #include <plat/orion_nand.h>
+ #include <plat/orion_wdt.h>
+ #include <plat/time.h>
++#include <asm/mach-types.h>
+ #include "common.h"
+ 
+ /*****************************************************************************
diff --git a/3.3.8/net-netfilter-IFWLOG-2.6.35-buildfix.patch b/3.3.8/net-netfilter-IFWLOG-2.6.35-buildfix.patch
new file mode 100644
index 0000000..99d4d06
--- /dev/null
+++ b/3.3.8/net-netfilter-IFWLOG-2.6.35-buildfix.patch
@@ -0,0 +1,32 @@
+--- linux-2.6.35-rc6-git-mnb0.1/net/ipv4/netfilter/ipt_IFWLOG.c.orig	2010-07-30 21:17:30.000000000 +0300
++++ linux-2.6.35-rc6-git-mnb0.1/net/ipv4/netfilter/ipt_IFWLOG.c	2010-07-31 13:46:33.834611944 +0300
+@@ -135,7 +135,7 @@ static void ipt_IFWLOG_packet(const stru
+ }
+ 
+ static unsigned int ipt_IFWLOG_target(struct sk_buff *skb,
+-				      const struct xt_target_param *target_param)
++				      const struct xt_action_param *target_param)
+ {
+ 	const struct ipt_IFWLOG_info *info = target_param->targinfo;
+ 
+@@ -144,17 +144,17 @@ static unsigned int ipt_IFWLOG_target(st
+ 	return IPT_CONTINUE;
+ }
+ 
+-static bool ipt_IFWLOG_checkentry(const struct xt_tgchk_param *tgchk_param)
++static int ipt_IFWLOG_checkentry(const struct xt_tgchk_param *tgchk_param)
+ {
+ 	const struct ipt_IFWLOG_info *info = tgchk_param->targinfo;
+ 
+ 	if (info->prefix[sizeof(info->prefix)-1] != '\0') {
+ 		DEBUGP("IFWLOG: prefix term %i\n",
+ 		       info->prefix[sizeof(info->prefix)-1]);
+-		return false;
++		return -EINVAL;
+ 	}
+ 
+-	return true;
++	return 0;
+ }
+ 
+ static struct xt_target ipt_IFWLOG = {
diff --git a/3.3.8/net-netfilter-IFWLOG-2.6.37-buildfix.patch b/3.3.8/net-netfilter-IFWLOG-2.6.37-buildfix.patch
new file mode 100644
index 0000000..0ae95aa
--- /dev/null
+++ b/3.3.8/net-netfilter-IFWLOG-2.6.37-buildfix.patch
@@ -0,0 +1,15 @@
+
+ net/ipv4/netfilter/ipt_IFWLOG.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- linux-2.6.37-rc3-git1-tmb0.3/net/ipv4/netfilter/ipt_IFWLOG.c.orig	2010-11-24 21:58:36.000000000 +0200
++++ linux-2.6.37-rc3-git1-tmb0.3/net/ipv4/netfilter/ipt_IFWLOG.c	2010-11-25 13:08:55.719379646 +0200
+@@ -141,7 +141,7 @@ static unsigned int ipt_IFWLOG_target(st
+ 
+ 	ipt_IFWLOG_packet(skb, target_param->in, target_param->out, info);
+ 
+-	return IPT_CONTINUE;
++	return XT_CONTINUE;
+ }
+ 
+ static int ipt_IFWLOG_checkentry(const struct xt_tgchk_param *tgchk_param)
diff --git a/3.3.8/net-netfilter-IFWLOG-mdv.patch b/3.3.8/net-netfilter-IFWLOG-mdv.patch
new file mode 100644
index 0000000..e5b9c92
--- /dev/null
+++ b/3.3.8/net-netfilter-IFWLOG-mdv.patch
@@ -0,0 +1,264 @@
+ipt_IFWLOG: Mandriva changes
+
+This patch holds all the Mandriva changes done in ipt_IFWLOG
+netfilter module.
+
+This work is mostly done by Thomas Backlund, Herton R. Krzesinski
+and Luiz Fernando N. Capitulino.
+
+Signed-off-by: Luiz Fernando N. Capitulino <lcapitulino@mandriva.com.br>
+Signed-off-by: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
+
+---
+ include/linux/netfilter_ipv4/Kbuild       |    1
+ include/linux/netfilter_ipv4/ipt_IFWLOG.h |   23 +++++-
+ net/ipv4/netfilter/ipt_IFWLOG.c           |  108 +++++++++++++++---------------
+ 3 files changed, 77 insertions(+), 55 deletions(-)
+
+diff -p -up linux-2.6.28/include/linux/netfilter_ipv4/ipt_IFWLOG.h.orig linux-2.6.28/include/linux/netfilter_ipv4/ipt_IFWLOG.h
+--- linux-2.6.28/include/linux/netfilter_ipv4/ipt_IFWLOG.h.orig	2008-12-12 10:55:07.000000000 -0500
++++ linux-2.6.28/include/linux/netfilter_ipv4/ipt_IFWLOG.h	2008-12-12 10:56:30.000000000 -0500
+@@ -1,10 +1,25 @@
+-#ifndef _IPT_IFWLOG_H
+-#define _IPT_IFWLOG_H
++#ifndef _LINUX_IPT_IFWLOG_H
++#define _LINUX_IPT_IFWLOG_H
+ 
+ #ifndef NETLINK_IFWLOG
+-#define NETLINK_IFWLOG  19
++#define NETLINK_IFWLOG  20
+ #endif
+ 
++#ifndef __KERNEL__
++/* Multicast groups - backwards compatiblility for userspace */
++#define IFWLOG_NLGRP_NONE 0x00000000
++#define IFWLOG_NLGRP_DEF  0x00000001 /* default message group */
++#endif
++
++enum {
++	IFWLOGNLGRP_NONE,
++#define IFWLOGNLGRP_NONE IFWLOGNLGRP_NONE
++	IFWLOGNLGRP_DEF,
++#define IFWLOGNLGRP_DEF IFWLOGNLGRP_DEF
++	__IFWLOGNLGRP_MAX
++};
++#define IFWLOGNLGRP_MAX (__IFWLOGNLGRP_MAX - 1)
++
+ #define PREFSIZ         32
+ 
+ struct nl_msg {				/* Netlink message */
+@@ -23,4 +38,4 @@ struct ipt_IFWLOG_info {
+ 	char prefix[PREFSIZ];
+ };
+ 
+-#endif /* _IPT_IFWLOG_H */
++#endif /* _LINUX_IPT_IFWLOG_H */
+diff -p -up linux-2.6.28/net/ipv4/netfilter/ipt_IFWLOG.c.orig linux-2.6.28/net/ipv4/netfilter/ipt_IFWLOG.c
+--- linux-2.6.28/net/ipv4/netfilter/ipt_IFWLOG.c.orig	2008-12-12 10:55:07.000000000 -0500
++++ linux-2.6.28/net/ipv4/netfilter/ipt_IFWLOG.c	2008-12-12 10:57:16.000000000 -0500
+@@ -4,6 +4,14 @@
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License version 2 as
+  * published by the Free Software Foundation.
++ * 
++ * 2007-10-10 Thomas Backlund <tmb@mandriva.org>: build fixes for 2.6.22.9
++ * 2007-11-11 Herton Krzesinski <herton@mandriva.com>: build fixes for 2.6.24-rc
++ * 2007-12-03 Luiz Capitulino <lcapitulino@mandriva.com.br>: v1.1
++ * 		- Better multicast group usage
++ * 		- Coding style fixes
++ * 		- Do not return -EINVAL by default in ipt_ifwlog_init()
++ * 		- Minor refinements
+  */
+ 
+ #include <linux/module.h>
+@@ -19,12 +27,10 @@
+ #include <linux/string.h>
+ 
+ #include <linux/netfilter.h>
++#include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_IFWLOG.h>
+ 
+-MODULE_LICENSE("GPL");
+-MODULE_AUTHOR("Samir Bellabes <sbellabes@mandriva.com>");
+-MODULE_DESCRIPTION("Interactive firewall logging and module");
+ 
+ #if 0
+ #define DEBUGP PRINTR
+@@ -36,44 +42,41 @@ MODULE_DESCRIPTION("Interactive firewall
+ 
+ static struct sock *nl;
+ 
+-#define GROUP 10
+-
+ /* send struct to userspace */
+-static void send_packet(struct nl_msg msg)
++static void send_packet(const struct nl_msg *msg)
+ {
+ 	struct sk_buff *skb = NULL;
+ 	struct nlmsghdr *nlh;
++	unsigned int size;
+ 
+-	skb = alloc_skb(NLMSG_SPACE(sizeof(struct nl_msg)), GFP_ATOMIC);
++	size = NLMSG_SPACE(sizeof(*msg));
++	skb = alloc_skb(size, GFP_ATOMIC);
+ 	if (!skb) {
+ 		PRINTR(KERN_WARNING "IFWLOG: OOM can't allocate skb\n");
+-		return ;
++		return;
+ 	}
+ 
+-	nlh = NLMSG_PUT(skb, 0, 0, 0, sizeof(struct nl_msg) - sizeof(*nlh));
++	nlh = NLMSG_PUT(skb, 0, 0, 0, size - sizeof(*nlh));
+ 
+-	memcpy(NLMSG_DATA(nlh), (const void*)&msg, sizeof(struct nl_msg));
++	memcpy(NLMSG_DATA(nlh), (const void *) msg, sizeof(*msg));
+ 
+ 	NETLINK_CB(skb).pid = 0;  /* from kernel */
+-	NETLINK_CB(skb).dst_pid = 0;  /* multicast */
+-	NETLINK_CB(skb).dst_group = 10;
++	NETLINK_CB(skb).dst_group = IFWLOGNLGRP_DEF;
+ 
+ 	if (nl) {
+ 		DEBUGP(KERN_WARNING
+ 		       "IFWLOG: nlmsg_len=%ld\nnlmsg_type=%d nlmsg_flags=%d\nnlmsg_seq=%ld nlmsg_pid = %ld\n",
+ 		       (long)nlh->nlmsg_len,  nlh->nlmsg_type, nlh->nlmsg_flags,
+ 		       (long)nlh->nlmsg_seq, (long)nlh->nlmsg_pid);
+-		DEBUGP(KERN_WARNING "prefix : %s\n", msg.prefix);
++		DEBUGP(KERN_WARNING "prefix : %s\n", msg->prefix);
+ 
+-		netlink_broadcast(nl, skb, 0, 10, GFP_ATOMIC);
+-		return ;
++		netlink_broadcast(nl, skb, 0, IFWLOGNLGRP_DEF, GFP_ATOMIC);
++		return;
+ 	}
+ 
+- nlmsg_failure:
+-        if (skb)
+-                kfree_skb(skb);
+-        PRINTR(KERN_WARNING "IFWLOG: Error sending netlink packet\n");
+-        return ;
++nlmsg_failure:
++	kfree_skb(skb);
++	PRINTR(KERN_WARNING "IFWLOG: Error sending netlink packet\n");
+ }
+ 
+ /* fill struct for userspace */
+@@ -128,73 +131,76 @@ static void ipt_IFWLOG_packet(const stru
+ 	do_gettimeofday((struct timeval *)&tv);
+         msg.timestamp_sec = tv.tv_sec;
+ 
+-	send_packet(msg);
++	send_packet(&msg);
+ }
+ 
+-static unsigned int ipt_IFWLOG_target(struct sk_buff **pskb,
+-				      const struct net_device *in,
+-				      const struct net_device *out,
+-				      unsigned int hooknum,
+-				      const void *targinfo,
+-				      void *userinfo)
++static unsigned int ipt_IFWLOG_target(struct sk_buff *skb,
++				      const struct xt_target_param *target_param)
+ {
+-	const struct ipt_IFWLOG_info *info = targinfo;
++	const struct ipt_IFWLOG_info *info = target_param->targinfo;
+ 
+-	ipt_IFWLOG_packet(*pskb, in, out, info);
++	ipt_IFWLOG_packet(skb, target_param->in, target_param->out, info);
+ 
+ 	return IPT_CONTINUE;
+ }
+ 
+-static int ipt_IFWLOG_checkentry(const char *tablename,
+-				 const struct ipt_entry *e,
+-				 void *targinfo,
+-				 unsigned int targinfosize,
+-				 unsigned int hook_mask)
++static bool ipt_IFWLOG_checkentry(const struct xt_tgchk_param *tgchk_param)
+ {
+-	const struct ipt_IFWLOG_info *info = targinfo;
++	const struct ipt_IFWLOG_info *info = tgchk_param->targinfo;
+ 
+ 	if (info->prefix[sizeof(info->prefix)-1] != '\0') {
+ 		DEBUGP("IFWLOG: prefix term %i\n",
+ 		       info->prefix[sizeof(info->prefix)-1]);
+-		return 0;
++		return false;
+ 	}
+ 
+-	return 1;
++	return true;
+ }
+ 
+-static struct ipt_target ipt_IFWLOG = {
++static struct xt_target ipt_IFWLOG = {
+ 	.name		= "IFWLOG",
++	.family		= AF_INET,
+ 	.target		= ipt_IFWLOG_target,
+ 	.targetsize	= sizeof(struct ipt_IFWLOG_info),
+ 	.checkentry	= ipt_IFWLOG_checkentry,
+ 	.me		= THIS_MODULE,
+ };
+ 
+-static int __init init(void)
++static int __init ipt_ifwlog_init(void)
+ {
+-	nl = (struct sock*) netlink_kernel_create(NETLINK_IFWLOG, GROUP, NULL, THIS_MODULE);
+-        if (!nl) {
+-                PRINTR(KERN_WARNING "IFWLOG: cannot create netlink socket\n");
+-                return -EINVAL;
+-        }
++	int err;
+ 
+-	if (ipt_register_target(&ipt_IFWLOG)) {
++	nl = netlink_kernel_create(&init_net, NETLINK_IFWLOG, IFWLOGNLGRP_MAX,
++				   NULL, NULL, THIS_MODULE);
++	if (!nl) {
++		PRINTR(KERN_WARNING "IFWLOG: cannot create netlink socket\n");
++		return -ENOMEM;
++	}
++
++	err = xt_register_target(&ipt_IFWLOG);
++	if (err) {
+ 		if (nl && nl->sk_socket)
+ 			sock_release(nl->sk_socket);
+-		return -EINVAL;
++		return err;
+ 	}
+ 
+ 	PRINTR(KERN_INFO "IFWLOG: register target\n");
+ 	return 0;
+ }
+ 
+-static void __exit fini(void)
++static void __exit ipt_ifwlog_fini(void)
+ {
+ 	if (nl && nl->sk_socket)
+-                sock_release(nl->sk_socket);
++		sock_release(nl->sk_socket);
+ 	PRINTR(KERN_INFO "IFWLOG: unregister target\n");
+-	ipt_unregister_target(&ipt_IFWLOG);
++	xt_unregister_target(&ipt_IFWLOG);
+ }
+ 
+-module_init(init);
+-module_exit(fini);
++module_init(ipt_ifwlog_init);
++module_exit(ipt_ifwlog_fini);
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Samir Bellabes <sbellabes@mandriva.com>");
++MODULE_AUTHOR("Luiz Capitulino <lcapitulino@mandriva.com.br>");
++MODULE_DESCRIPTION("Interactive firewall logging and module");
++MODULE_VERSION("v1.1");
+--- linux/include/linux/netfilter_ipv4/Kbuild.net-netfilter-IFWLOG-mdv.orig	2012-05-21 01:29:13.000000000 +0300
++++ linux/include/linux/netfilter_ipv4/Kbuild	2012-05-26 01:27:24.743139430 +0300
+@@ -2,6 +2,7 @@ header-y += ip_queue.h
+ header-y += ip_tables.h
+ header-y += ipt_CLUSTERIP.h
+ header-y += ipt_ECN.h
++header-y += ipt_IFWLOG.h
+ header-y += ipt_LOG.h
+ header-y += ipt_REJECT.h
+ header-y += ipt_TTL.h
diff --git a/3.3.8/net-netfilter-IFWLOG.patch b/3.3.8/net-netfilter-IFWLOG.patch
new file mode 100644
index 0000000..6efe89a
--- /dev/null
+++ b/3.3.8/net-netfilter-IFWLOG.patch
@@ -0,0 +1,269 @@
+---
+ include/linux/netfilter_ipv4/ipt_IFWLOG.h |   26 +++
+ net/ipv4/netfilter/Kconfig                |   11 +
+ net/ipv4/netfilter/Makefile               |    1 
+ net/ipv4/netfilter/ipt_IFWLOG.c           |  200 ++++++++++++++++++++++++++++++
+ 4 files changed, 238 insertions(+)
+
+--- /dev/null
++++ b/net/ipv4/netfilter/ipt_IFWLOG.c
+@@ -0,0 +1,200 @@
++/* Interactive Firewall for Mandriva
++ * Samir Bellabes <sbellabes@mandriva.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include <linux/module.h>
++#include <asm/types.h>
++#include <linux/jiffies.h>
++#include <linux/skbuff.h>
++#include <linux/ip.h>
++#include <net/icmp.h>
++#include <net/udp.h>
++#include <net/tcp.h>
++#include <net/sock.h>
++#include <linux/netlink.h>
++#include <linux/string.h>
++
++#include <linux/netfilter.h>
++#include <linux/netfilter_ipv4/ip_tables.h>
++#include <linux/netfilter_ipv4/ipt_IFWLOG.h>
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Samir Bellabes <sbellabes@mandriva.com>");
++MODULE_DESCRIPTION("Interactive firewall logging and module");
++
++#if 0
++#define DEBUGP PRINTR
++#else
++#define DEBUGP(format, args...)
++#endif
++
++#define PRINTR(format, args...) do { if(net_ratelimit()) printk(format, ##args); } while(0)
++
++static struct sock *nl;
++
++#define GROUP 10
++
++/* send struct to userspace */
++static void send_packet(struct nl_msg msg)
++{
++	struct sk_buff *skb = NULL;
++	struct nlmsghdr *nlh;
++
++	skb = alloc_skb(NLMSG_SPACE(sizeof(struct nl_msg)), GFP_ATOMIC);
++	if (!skb) {
++		PRINTR(KERN_WARNING "IFWLOG: OOM can't allocate skb\n");
++		return ;
++	}
++
++	nlh = NLMSG_PUT(skb, 0, 0, 0, sizeof(struct nl_msg) - sizeof(*nlh));
++
++	memcpy(NLMSG_DATA(nlh), (const void*)&msg, sizeof(struct nl_msg));
++
++	NETLINK_CB(skb).pid = 0;  /* from kernel */
++	NETLINK_CB(skb).dst_pid = 0;  /* multicast */
++	NETLINK_CB(skb).dst_group = 10;
++
++	if (nl) {
++		DEBUGP(KERN_WARNING
++		       "IFWLOG: nlmsg_len=%ld\nnlmsg_type=%d nlmsg_flags=%d\nnlmsg_seq=%ld nlmsg_pid = %ld\n",
++		       (long)nlh->nlmsg_len,  nlh->nlmsg_type, nlh->nlmsg_flags,
++		       (long)nlh->nlmsg_seq, (long)nlh->nlmsg_pid);
++		DEBUGP(KERN_WARNING "prefix : %s\n", msg.prefix);
++
++		netlink_broadcast(nl, skb, 0, 10, GFP_ATOMIC);
++		return ;
++	}
++
++ nlmsg_failure:
++        if (skb)
++                kfree_skb(skb);
++        PRINTR(KERN_WARNING "IFWLOG: Error sending netlink packet\n");
++        return ;
++}
++
++/* fill struct for userspace */
++static void ipt_IFWLOG_packet(const struct sk_buff *skb,
++			      const struct net_device *in,
++			      const struct net_device *out,
++			      const struct ipt_IFWLOG_info *info)
++{
++	struct iphdr iph;
++	struct tcphdr tcph;
++	struct udphdr udph;
++	struct nl_msg msg;
++	struct iphdr _iph, *ih;
++	struct timeval tv;
++
++	memset(&msg, 0, sizeof(struct nl_msg));
++
++	ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
++	if (ih == NULL) {
++		PRINTR(KERN_WARNING "IFWLOG: skb truncated");
++		return;
++	}
++
++	/* save interface name */
++	if (in)
++		strcpy(msg.indev_name, in->name);
++	if (out)
++		strcpy(msg.outdev_name, out->name);
++
++	/* save log-prefix */
++	strcpy(msg.prefix, info->prefix);
++
++	/* save ip header */
++	skb_copy_bits(skb, 0, &iph, sizeof(iph));
++	memcpy(&msg.ip, &iph, sizeof(struct iphdr));
++
++	/* save transport header */
++	switch (iph.protocol){
++	case IPPROTO_TCP:
++		skb_copy_bits(skb, iph.ihl*4 , &tcph, sizeof(tcph));
++		memcpy(&msg.h.th, &tcph, sizeof(struct tcphdr));
++		break;
++	case IPPROTO_UDP:
++		skb_copy_bits(skb, iph.ihl*4 , &udph, sizeof(udph));
++		memcpy(&msg.h.uh, &udph, sizeof(struct udphdr));
++		break;
++	default:
++		break;
++	}
++
++	/* save timetamp */
++	do_gettimeofday((struct timeval *)&tv);
++        msg.timestamp_sec = tv.tv_sec;
++
++	send_packet(msg);
++}
++
++static unsigned int ipt_IFWLOG_target(struct sk_buff **pskb,
++				      const struct net_device *in,
++				      const struct net_device *out,
++				      unsigned int hooknum,
++				      const void *targinfo,
++				      void *userinfo)
++{
++	const struct ipt_IFWLOG_info *info = targinfo;
++
++	ipt_IFWLOG_packet(*pskb, in, out, info);
++
++	return IPT_CONTINUE;
++}
++
++static int ipt_IFWLOG_checkentry(const char *tablename,
++				 const struct ipt_entry *e,
++				 void *targinfo,
++				 unsigned int targinfosize,
++				 unsigned int hook_mask)
++{
++	const struct ipt_IFWLOG_info *info = targinfo;
++
++	if (info->prefix[sizeof(info->prefix)-1] != '\0') {
++		DEBUGP("IFWLOG: prefix term %i\n",
++		       info->prefix[sizeof(info->prefix)-1]);
++		return 0;
++	}
++
++	return 1;
++}
++
++static struct ipt_target ipt_IFWLOG = {
++	.name		= "IFWLOG",
++	.target		= ipt_IFWLOG_target,
++	.targetsize	= sizeof(struct ipt_IFWLOG_info),
++	.checkentry	= ipt_IFWLOG_checkentry,
++	.me		= THIS_MODULE,
++};
++
++static int __init init(void)
++{
++	nl = (struct sock*) netlink_kernel_create(NETLINK_IFWLOG, GROUP, NULL, THIS_MODULE);
++        if (!nl) {
++                PRINTR(KERN_WARNING "IFWLOG: cannot create netlink socket\n");
++                return -EINVAL;
++        }
++
++	if (ipt_register_target(&ipt_IFWLOG)) {
++		if (nl && nl->sk_socket)
++			sock_release(nl->sk_socket);
++		return -EINVAL;
++	}
++
++	PRINTR(KERN_INFO "IFWLOG: register target\n");
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	if (nl && nl->sk_socket)
++                sock_release(nl->sk_socket);
++	PRINTR(KERN_INFO "IFWLOG: unregister target\n");
++	ipt_unregister_target(&ipt_IFWLOG);
++}
++
++module_init(init);
++module_exit(fini);
+--- a/net/ipv4/netfilter/Kconfig
++++ b/net/ipv4/netfilter/Kconfig
+@@ -331,6 +331,17 @@ config IP_NF_TARGET_TTL
+ 	(e.g. when running oldconfig). It selects
+ 	CONFIG_NETFILTER_XT_TARGET_HL.
+ 
++config IP_NF_TARGET_IFWLOG
++	tristate  'IFWLOG target support'
++	depends on IP_NF_IPTABLES
++	help
++	  This option adds a `IFWLOG' target, which is used by
++	  Interactive Firewall for sending informations to a userspace
++	  daemon
++
++	  If you want to compile it as a module, say M here and read
++	  Documentation/modules.txt.  If unsure, say `N'.
++
+ # raw + specific targets
+ config IP_NF_RAW
+ 	tristate  'raw table support (required for NOTRACK/TRACE)'
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ipt_IFWLOG.h
+@@ -0,0 +1,26 @@
++#ifndef _IPT_IFWLOG_H
++#define _IPT_IFWLOG_H
++
++#ifndef NETLINK_IFWLOG
++#define NETLINK_IFWLOG  19
++#endif
++
++#define PREFSIZ         32
++
++struct nl_msg {				/* Netlink message */
++	long timestamp_sec;             /* time packet */
++	char indev_name[IFNAMSIZ];      /* name of the ingoing interface */
++	char outdev_name[IFNAMSIZ];     /* name of the outgoing interface */
++	unsigned char prefix[PREFSIZ];  /* informations on the logging reason */
++	struct iphdr ip;
++	union {
++		struct tcphdr th;
++		struct udphdr uh;
++	} h;
++};
++
++struct ipt_IFWLOG_info {
++	char prefix[PREFSIZ];
++};
++
++#endif /* _IPT_IFWLOG_H */
+--- linux/net/ipv4/netfilter/Makefile.net-netfilter-IFWLOG.orig	2012-05-21 01:29:13.000000000 +0300
++++ linux/net/ipv4/netfilter/Makefile	2012-05-26 01:23:57.511514194 +0300
+@@ -53,6 +53,7 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ip
+ 
+ # targets
+ obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
++obj-$(CONFIG_IP_NF_TARGET_IFWLOG) += ipt_IFWLOG.o
+ obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
+ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
+ obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
diff --git a/3.3.8/net-netfilter-psd-2.6.35-buildfix.patch b/3.3.8/net-netfilter-psd-2.6.35-buildfix.patch
new file mode 100644
index 0000000..218031c
--- /dev/null
+++ b/3.3.8/net-netfilter-psd-2.6.35-buildfix.patch
@@ -0,0 +1,11 @@
+--- linux-2.6.35-rc6-git-mnb0.1/net/ipv4/netfilter/ipt_psd.c.orig	2010-07-30 21:17:30.000000000 +0300
++++ linux-2.6.35-rc6-git-mnb0.1/net/ipv4/netfilter/ipt_psd.c	2010-07-31 13:29:00.623601957 +0300
+@@ -98,7 +98,7 @@ static inline int hashfunc(struct in_add
+ 
+ static bool
+ ipt_psd_match(const struct sk_buff *pskb,
+-	      const struct xt_match_param *match_param)
++	      struct xt_action_param *match_param)
+ {
+ 	struct iphdr *ip_hdr;
+ 	struct tcphdr *tcp_hdr;
diff --git a/3.3.8/net-netfilter-psd-mdv.patch b/3.3.8/net-netfilter-psd-mdv.patch
new file mode 100644
index 0000000..68884aa
--- /dev/null
+++ b/3.3.8/net-netfilter-psd-mdv.patch
@@ -0,0 +1,235 @@
+ipt_psd: Mandriva changes
+
+This patch holds all the Mandriva changes done in ipt_psd
+netfilter module.
+
+Most of the time they're just upgrades to match with new
+API in the kernel.
+
+This work is mostly done by Thomas Backlund, Herton R.
+Krzesinski and Luiz Fernando N. Capitulino.
+
+Signed-off-by: Luiz Fernando N. Capitulino <lcapitulino@mandriva.com.br>
+Signed-off-by: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
+
+---
+ include/linux/netfilter_ipv4/Kbuild |    1
+ net/ipv4/netfilter/Kconfig          |    8 ++
+ net/ipv4/netfilter/ipt_psd.c        |  113 ++++++++++++++----------------------
+ 3 files changed, 55 insertions(+), 67 deletions(-)
+
+diff -p -up linux-2.6.28/net/ipv4/netfilter/ipt_psd.c.orig linux-2.6.28/net/ipv4/netfilter/ipt_psd.c
+--- linux-2.6.28/net/ipv4/netfilter/ipt_psd.c.orig	2008-12-12 11:03:05.000000000 -0500
++++ linux-2.6.28/net/ipv4/netfilter/ipt_psd.c	2008-12-12 11:04:03.000000000 -0500
+@@ -1,21 +1,24 @@
+ /*
+-  This is a module which is used for PSD (portscan detection)
+-  Derived from scanlogd v2.1 written by Solar Designer <solar@false.com>
+-  and LOG target module.
+-
+-  Copyright (C) 2000,2001 astaro AG
+-
+-  This file is distributed under the terms of the GNU General Public
+-  License (GPL). Copies of the GPL can be obtained from:
+-     ftp://prep.ai.mit.edu/pub/gnu/GPL
+-
+-  2000-05-04 Markus Hennig <hennig@astaro.de> : initial
+-  2000-08-18 Dennis Koslowski <koslowski@astaro.de> : first release
+-  2000-12-01 Dennis Koslowski <koslowski@astaro.de> : UDP scans detection added
+-  2001-01-02 Dennis Koslowski <koslowski@astaro.de> : output modified
+-  2001-02-04 Jan Rekorajski <baggins@pld.org.pl> : converted from target to match
+-  2004-05-05 Martijn Lievaart <m@rtij.nl> : ported to 2.6
+-*/
++ * This is a module which is used for PSD (portscan detection)
++ * Derived from scanlogd v2.1 written by Solar Designer <solar@false.com>
++ * and LOG target module.
++ *
++ * Copyright (C) 2000,2001 astaro AG
++ *
++ * This file is distributed under the terms of the GNU General Public
++ * License (GPL). Copies of the GPL can be obtained from:
++ *    ftp://prep.ai.mit.edu/pub/gnu/GPL
++ *
++ * 2000-05-04 Markus Hennig <hennig@astaro.de> : initial
++ * 2000-08-18 Dennis Koslowski <koslowski@astaro.de> : first release
++ * 2000-12-01 Dennis Koslowski <koslowski@astaro.de> : UDP scans detection added
++ * 2001-01-02 Dennis Koslowski <koslowski@astaro.de> : output modified
++ * 2001-02-04 Jan Rekorajski <baggins@pld.org.pl> : converted from target to match
++ * 2004-05-05 Martijn Lievaart <m@rtij.nl> : ported to 2.6
++ * 2007-10-10 Thomas Backlund <tmb@mandriva.org>: 2.6.22 update
++ * 2007-11-14 Luiz Capitulino <lcapitulino@mandriva.com> : 2.6.22 API usage fixes
++ * 2007-11-26 Herton Ronaldo Krzesinski <herton@mandriva.com>: switch xt_match->match to bool
++ */
+ 
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
+@@ -54,7 +57,7 @@ struct port {
+  */
+ struct host {
+ 	struct host *next;		/* Next entry with the same hash */
+-	clock_t timestamp;		/* Last update time */
++	unsigned long timestamp;	/* Last update time */
+ 	struct in_addr src_addr;	/* Source address */
+ 	struct in_addr dest_addr;	/* Destination address */
+ 	unsigned short src_port;	/* Source port */
+@@ -93,33 +96,29 @@ static inline int hashfunc(struct in_add
+ 	return hash & (HASH_SIZE - 1);
+ }
+ 
+-static int
++static bool
+ ipt_psd_match(const struct sk_buff *pskb,
+-	      const struct net_device *in,
+-	      const struct net_device *out,
+-	      const void *matchinfo,
+-	      int offset,
+-	      int *hotdrop)
++	      const struct xt_match_param *match_param)
+ {
+ 	struct iphdr *ip_hdr;
+ 	struct tcphdr *tcp_hdr;
+ 	struct in_addr addr;
+ 	u_int16_t src_port,dest_port;
+   	u_int8_t tcp_flags, proto;
+-	clock_t now;
++	unsigned long now;
+ 	struct host *curr, *last, **head;
+ 	int hash, index, count;
+ 
+ 	/* Parameters from userspace */
+-	const struct ipt_psd_info *psdinfo = matchinfo;
++	const struct ipt_psd_info *psdinfo = match_param->matchinfo;
+ 
+ 	/* IP header */
+-	ip_hdr = pskb->nh.iph;
++	ip_hdr = ipip_hdr(pskb);
+ 
+ 	/* Sanity check */
+ 	if (ntohs(ip_hdr->frag_off) & IP_OFFSET) {
+ 		DEBUGP("PSD: sanity check failed\n");
+-		return 0;
++		return false;
+ 	}
+ 
+ 	/* TCP or UDP ? */
+@@ -127,7 +126,7 @@ ipt_psd_match(const struct sk_buff *pskb
+ 
+ 	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
+ 		DEBUGP("PSD: protocol not supported\n");
+-		return 0;
++		return false;
+ 	}
+ 
+ 	/* Get the source address, source & destination ports, and TCP flags */
+@@ -151,7 +150,7 @@ ipt_psd_match(const struct sk_buff *pskb
+ 	 * them spoof us. [DHCP needs this feature - HW] */
+ 	if (!addr.s_addr) {
+ 		DEBUGP("PSD: spoofed source address (0.0.0.0)\n");
+-		return 0;
++		return false;
+ 	}
+ 
+ 	/* Use jiffies here not to depend on someone setting the time while we're
+@@ -298,46 +297,26 @@ ipt_psd_match(const struct sk_buff *pskb
+ 
+ out_no_match:
+ 	spin_unlock(&state.lock);
+-	return 0;
++	return false;
+ 
+ out_match:
+ 	spin_unlock(&state.lock);
+-	return 1;
++	DEBUGP("PSD: Dropping packets from "NIPQUAD_FMT" \n",
++	       NIPQUAD(curr->src_addr.s_addr));
++	return true;
+ }
+ 
+-static int ipt_psd_checkentry(const char *tablename,
+-			      const struct ipt_ip *e,
+-			      void *matchinfo,
+-			      unsigned int matchsize,
+-			      unsigned int hook_mask)
+-{
+-/*	const struct ipt_psd_info *psdinfo = targinfo;*/
+-
+-	/* we accept TCP only */
+-/*  	if (e->ip.proto != IPPROTO_TCP) { */
+-/*  		DEBUGP("PSD: specified protocol may be TCP only\n"); */
+-/*  		return 0; */
+-/*  	} */
+-
+-	if (matchsize != IPT_ALIGN(sizeof(struct ipt_psd_info))) {
+-		DEBUGP("PSD: matchsize %u != %u\n",
+-		       matchsize,
+-		       IPT_ALIGN(sizeof(struct ipt_psd_info)));
+-		return 0;
+-	}
+-
+-	return 1;
+-}
+-
+-static struct ipt_match ipt_psd_reg = {
+-	.name = "psd",
+-	.match = ipt_psd_match,
+-	.checkentry = ipt_psd_checkentry,
+-	.me = THIS_MODULE };
++static struct xt_match ipt_psd_reg = {
++	.name      = "psd",
++	.family    = AF_INET,
++	.match     = ipt_psd_match,
++	.matchsize = sizeof(struct ipt_psd_info),
++	.me        = THIS_MODULE
++};
+ 
+-static int __init init(void)
++static int __init ipt_psd_init(void)
+ {
+-	if (ipt_register_match(&ipt_psd_reg))
++	if (xt_register_match(&ipt_psd_reg))
+ 		return -EINVAL;
+ 
+ 	memset(&state, 0, sizeof(state));
+@@ -348,11 +327,11 @@ static int __init init(void)
+ 	return 0;
+ }
+ 
+-static void __exit fini(void)
++static void __exit ipt_psd_fini(void)
+ {
+-	ipt_unregister_match(&ipt_psd_reg);
++	xt_unregister_match(&ipt_psd_reg);
+ 	printk("netfilter PSD unloaded - (c) astaro AG\n");
+ }
+ 
+-module_init(init);
+-module_exit(fini);
++module_init(ipt_psd_init);
++module_exit(ipt_psd_fini);
+--- a/net/ipv4/netfilter/Kconfig
++++ b/net/ipv4/netfilter/Kconfig
+@@ -322,6 +322,14 @@
+ 	(e.g. when running oldconfig). It selects
+ 	CONFIG_NETFILTER_XT_TARGET_HL.
+ 
++config IP_NF_MATCH_PSD
++	tristate 'Port scanner detection support'
++	depends on NETFILTER_ADVANCED
++	help
++	  Module used for PSD (portscan detection).
++
++	  To compile it as a module, choose M here.  If unsure, say N.
++
+ config IP_NF_TARGET_IFWLOG
+ 	tristate  'IFWLOG target support'
+ 	depends on IP_NF_IPTABLES
+--- linux/include/linux/netfilter_ipv4/Kbuild.net-netfilter-psd-mdv.orig	2012-05-26 01:28:56.000000000 +0300
++++ linux/include/linux/netfilter_ipv4/Kbuild	2012-05-26 01:30:21.493540796 +0300
+@@ -11,6 +11,7 @@
+ header-y += ipt_addrtype.h
+ header-y += ipt_ah.h
+ header-y += ipt_ecn.h
++header-y += ipt_psd.h
+ header-y += ipt_realm.h
+ header-y += ipt_ttl.h
+ header-y += nf_nat.h
diff --git a/3.3.8/net-netfilter-psd.patch b/3.3.8/net-netfilter-psd.patch
new file mode 100644
index 0000000..8ec326f
--- /dev/null
+++ b/3.3.8/net-netfilter-psd.patch
@@ -0,0 +1,420 @@
+---
+ include/linux/netfilter_ipv4/ipt_psd.h |   40 +++
+ net/ipv4/netfilter/Makefile            |    1 
+ net/ipv4/netfilter/ipt_psd.c           |  358 +++++++++++++++++++++++++++++++++
+ 3 files changed, 399 insertions(+)
+
+--- /dev/null
++++ b/net/ipv4/netfilter/ipt_psd.c
+@@ -0,0 +1,358 @@
++/*
++  This is a module which is used for PSD (portscan detection)
++  Derived from scanlogd v2.1 written by Solar Designer <solar@false.com>
++  and LOG target module.
++
++  Copyright (C) 2000,2001 astaro AG
++
++  This file is distributed under the terms of the GNU General Public
++  License (GPL). Copies of the GPL can be obtained from:
++     ftp://prep.ai.mit.edu/pub/gnu/GPL
++
++  2000-05-04 Markus Hennig <hennig@astaro.de> : initial
++  2000-08-18 Dennis Koslowski <koslowski@astaro.de> : first release
++  2000-12-01 Dennis Koslowski <koslowski@astaro.de> : UDP scans detection added
++  2001-01-02 Dennis Koslowski <koslowski@astaro.de> : output modified
++  2001-02-04 Jan Rekorajski <baggins@pld.org.pl> : converted from target to match
++  2004-05-05 Martijn Lievaart <m@rtij.nl> : ported to 2.6
++*/
++
++#include <linux/module.h>
++#include <linux/skbuff.h>
++#include <linux/ip.h>
++#include <net/tcp.h>
++#include <linux/spinlock.h>
++#include <linux/netfilter_ipv4/ip_tables.h>
++#include <linux/netfilter_ipv4/ipt_psd.h>
++
++#if 0
++#define DEBUGP printk
++#else
++#define DEBUGP(format, args...)
++#endif
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Dennis Koslowski <koslowski@astaro.com>");
++
++#define HF_DADDR_CHANGING   0x01
++#define HF_SPORT_CHANGING   0x02
++#define HF_TOS_CHANGING	    0x04
++#define HF_TTL_CHANGING	    0x08
++
++/*
++ * Information we keep per each target port
++ */
++struct port {
++	u_int16_t number;      /* port number */
++	u_int8_t proto;        /* protocol number */
++	u_int8_t and_flags;    /* tcp ANDed flags */
++	u_int8_t or_flags;     /* tcp ORed flags */
++};
++
++/*
++ * Information we keep per each source address.
++ */
++struct host {
++	struct host *next;		/* Next entry with the same hash */
++	clock_t timestamp;		/* Last update time */
++	struct in_addr src_addr;	/* Source address */
++	struct in_addr dest_addr;	/* Destination address */
++	unsigned short src_port;	/* Source port */
++	int count;			/* Number of ports in the list */
++	int weight;			/* Total weight of ports in the list */
++	struct port ports[SCAN_MAX_COUNT - 1];	/* List of ports */
++	unsigned char tos;		/* TOS */
++	unsigned char ttl;		/* TTL */
++	unsigned char flags;		/* HF_ flags bitmask */
++};
++
++/*
++ * State information.
++ */
++static struct {
++	spinlock_t lock;
++	struct host list[LIST_SIZE];	/* List of source addresses */
++	struct host *hash[HASH_SIZE];	/* Hash: pointers into the list */
++	int index;			/* Oldest entry to be replaced */
++} state;
++
++/*
++ * Convert an IP address into a hash table index.
++ */
++static inline int hashfunc(struct in_addr addr)
++{
++	unsigned int value;
++	int hash;
++
++	value = addr.s_addr;
++	hash = 0;
++	do {
++		hash ^= value;
++	} while ((value >>= HASH_LOG));
++
++	return hash & (HASH_SIZE - 1);
++}
++
++static int
++ipt_psd_match(const struct sk_buff *pskb,
++	      const struct net_device *in,
++	      const struct net_device *out,
++	      const void *matchinfo,
++	      int offset,
++	      int *hotdrop)
++{
++	struct iphdr *ip_hdr;
++	struct tcphdr *tcp_hdr;
++	struct in_addr addr;
++	u_int16_t src_port,dest_port;
++  	u_int8_t tcp_flags, proto;
++	clock_t now;
++	struct host *curr, *last, **head;
++	int hash, index, count;
++
++	/* Parameters from userspace */
++	const struct ipt_psd_info *psdinfo = matchinfo;
++
++	/* IP header */
++	ip_hdr = pskb->nh.iph;
++
++	/* Sanity check */
++	if (ntohs(ip_hdr->frag_off) & IP_OFFSET) {
++		DEBUGP("PSD: sanity check failed\n");
++		return 0;
++	}
++
++	/* TCP or UDP ? */
++	proto = ip_hdr->protocol;
++
++	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
++		DEBUGP("PSD: protocol not supported\n");
++		return 0;
++	}
++
++	/* Get the source address, source & destination ports, and TCP flags */
++
++	addr.s_addr = ip_hdr->saddr;
++
++	tcp_hdr = (struct tcphdr*)((u_int32_t *)ip_hdr + ip_hdr->ihl);
++
++	/* Yep, it´s dirty */
++	src_port = tcp_hdr->source;
++	dest_port = tcp_hdr->dest;
++
++	if (proto == IPPROTO_TCP) {
++		tcp_flags = *((u_int8_t*)tcp_hdr + 13);
++	}
++	else {
++		tcp_flags = 0x00;
++	}
++
++	/* We're using IP address 0.0.0.0 for a special purpose here, so don't let
++	 * them spoof us. [DHCP needs this feature - HW] */
++	if (!addr.s_addr) {
++		DEBUGP("PSD: spoofed source address (0.0.0.0)\n");
++		return 0;
++	}
++
++	/* Use jiffies here not to depend on someone setting the time while we're
++	 * running; we need to be careful with possible return value overflows. */
++	now = jiffies;
++
++	spin_lock(&state.lock);
++
++	/* Do we know this source address already? */
++	count = 0;
++	last = NULL;
++	if ((curr = *(head = &state.hash[hash = hashfunc(addr)])))
++		do {
++			if (curr->src_addr.s_addr == addr.s_addr) break;
++			count++;
++			if (curr->next) last = curr;
++		} while ((curr = curr->next));
++
++	if (curr) {
++
++		/* We know this address, and the entry isn't too old. Update it. */
++		if (now - curr->timestamp <= (psdinfo->delay_threshold*HZ)/100 &&
++		    time_after_eq(now, curr->timestamp)) {
++
++			/* Just update the appropriate list entry if we've seen this port already */
++			for (index = 0; index < curr->count; index++) {
++				if (curr->ports[index].number == dest_port) {
++					curr->ports[index].proto = proto;
++					curr->ports[index].and_flags &= tcp_flags;
++					curr->ports[index].or_flags |= tcp_flags;
++					goto out_no_match;
++				}
++			}
++
++			/* TCP/ACK and/or TCP/RST to a new port? This could be an outgoing connection. */
++			if (proto == IPPROTO_TCP && (tcp_hdr->ack || tcp_hdr->rst))
++				goto out_no_match;
++
++			/* Packet to a new port, and not TCP/ACK: update the timestamp */
++			curr->timestamp = now;
++
++			/* Logged this scan already? Then drop the packet. */
++			if (curr->weight >= psdinfo->weight_threshold)
++				goto out_match;
++
++			/* Specify if destination address, source port, TOS or TTL are not fixed */
++			if (curr->dest_addr.s_addr != ip_hdr->daddr)
++				curr->flags |= HF_DADDR_CHANGING;
++			if (curr->src_port != src_port)
++				curr->flags |= HF_SPORT_CHANGING;
++			if (curr->tos != ip_hdr->tos)
++				curr->flags |= HF_TOS_CHANGING;
++			if (curr->ttl != ip_hdr->ttl)
++				curr->flags |= HF_TTL_CHANGING;
++
++			/* Update the total weight */
++			curr->weight += (ntohs(dest_port) < 1024) ?
++				psdinfo->lo_ports_weight : psdinfo->hi_ports_weight;
++
++			/* Got enough destination ports to decide that this is a scan? */
++			/* Then log it and drop the packet. */
++			if (curr->weight >= psdinfo->weight_threshold)
++				goto out_match;
++
++			/* Remember the new port */
++			if (curr->count < SCAN_MAX_COUNT) {
++				curr->ports[curr->count].number = dest_port;
++				curr->ports[curr->count].proto = proto;
++				curr->ports[curr->count].and_flags = tcp_flags;
++				curr->ports[curr->count].or_flags = tcp_flags;
++				curr->count++;
++			}
++
++			goto out_no_match;
++		}
++
++		/* We know this address, but the entry is outdated. Mark it unused, and
++		 * remove from the hash table. We'll allocate a new entry instead since
++		 * this one might get re-used too soon. */
++		curr->src_addr.s_addr = 0;
++		if (last)
++			last->next = last->next->next;
++		else if (*head)
++			*head = (*head)->next;
++		last = NULL;
++	}
++
++	/* We don't need an ACK from a new source address */
++	if (proto == IPPROTO_TCP && tcp_hdr->ack)
++		goto out_no_match;
++
++	/* Got too many source addresses with the same hash value? Then remove the
++	 * oldest one from the hash table, so that they can't take too much of our
++	 * CPU time even with carefully chosen spoofed IP addresses. */
++	if (count >= HASH_MAX && last) last->next = NULL;
++
++	/* We're going to re-use the oldest list entry, so remove it from the hash
++	 * table first (if it is really already in use, and isn't removed from the
++	 * hash table already because of the HASH_MAX check above). */
++
++	/* First, find it */
++	if (state.list[state.index].src_addr.s_addr)
++		head = &state.hash[hashfunc(state.list[state.index].src_addr)];
++	else
++		head = &last;
++	last = NULL;
++	if ((curr = *head))
++	do {
++		if (curr == &state.list[state.index]) break;
++		last = curr;
++	} while ((curr = curr->next));
++
++	/* Then, remove it */
++	if (curr) {
++		if (last)
++			last->next = last->next->next;
++		else if (*head)
++			*head = (*head)->next;
++	}
++
++	/* Get our list entry */
++	curr = &state.list[state.index++];
++	if (state.index >= LIST_SIZE) state.index = 0;
++
++	/* Link it into the hash table */
++	head = &state.hash[hash];
++	curr->next = *head;
++	*head = curr;
++
++	/* And fill in the fields */
++	curr->timestamp = now;
++	curr->src_addr = addr;
++	curr->dest_addr.s_addr = ip_hdr->daddr;
++	curr->src_port = src_port;
++	curr->count = 1;
++	curr->weight = (ntohs(dest_port) < 1024) ?
++		psdinfo->lo_ports_weight : psdinfo->hi_ports_weight;
++	curr->ports[0].number = dest_port;
++	curr->ports[0].proto = proto;
++	curr->ports[0].and_flags = tcp_flags;
++	curr->ports[0].or_flags = tcp_flags;
++	curr->tos = ip_hdr->tos;
++	curr->ttl = ip_hdr->ttl;
++
++out_no_match:
++	spin_unlock(&state.lock);
++	return 0;
++
++out_match:
++	spin_unlock(&state.lock);
++	return 1;
++}
++
++static int ipt_psd_checkentry(const char *tablename,
++			      const struct ipt_ip *e,
++			      void *matchinfo,
++			      unsigned int matchsize,
++			      unsigned int hook_mask)
++{
++/*	const struct ipt_psd_info *psdinfo = targinfo;*/
++
++	/* we accept TCP only */
++/*  	if (e->ip.proto != IPPROTO_TCP) { */
++/*  		DEBUGP("PSD: specified protocol may be TCP only\n"); */
++/*  		return 0; */
++/*  	} */
++
++	if (matchsize != IPT_ALIGN(sizeof(struct ipt_psd_info))) {
++		DEBUGP("PSD: matchsize %u != %u\n",
++		       matchsize,
++		       IPT_ALIGN(sizeof(struct ipt_psd_info)));
++		return 0;
++	}
++
++	return 1;
++}
++
++static struct ipt_match ipt_psd_reg = {
++	.name = "psd",
++	.match = ipt_psd_match,
++	.checkentry = ipt_psd_checkentry,
++	.me = THIS_MODULE };
++
++static int __init init(void)
++{
++	if (ipt_register_match(&ipt_psd_reg))
++		return -EINVAL;
++
++	memset(&state, 0, sizeof(state));
++
++	spin_lock_init(&(state.lock));
++
++	printk("netfilter PSD loaded - (c) astaro AG\n");
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	ipt_unregister_match(&ipt_psd_reg);
++	printk("netfilter PSD unloaded - (c) astaro AG\n");
++}
++
++module_init(init);
++module_exit(fini);
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ipt_psd.h
+@@ -0,0 +1,40 @@
++#ifndef _IPT_PSD_H
++#define _IPT_PSD_H
++
++#include <linux/param.h>
++#include <linux/types.h>
++
++/*
++ * High port numbers have a lower weight to reduce the frequency of false
++ * positives, such as from passive mode FTP transfers.
++ */
++#define PORT_WEIGHT_PRIV		3
++#define PORT_WEIGHT_HIGH		1
++
++/*
++ * Port scan detection thresholds: at least COUNT ports need to be scanned
++ * from the same source, with no longer than DELAY ticks between ports.
++ */
++#define SCAN_MIN_COUNT			7
++#define SCAN_MAX_COUNT			(SCAN_MIN_COUNT * PORT_WEIGHT_PRIV)
++#define SCAN_WEIGHT_THRESHOLD		SCAN_MAX_COUNT
++#define SCAN_DELAY_THRESHOLD		(300) /* old usage of HZ here was erroneously and broke under uml */
++
++/*
++ * Keep track of up to LIST_SIZE source addresses, using a hash table of
++ * HASH_SIZE entries for faster lookups, but limiting hash collisions to
++ * HASH_MAX source addresses per the same hash value.
++ */
++#define LIST_SIZE			0x100
++#define HASH_LOG			9
++#define HASH_SIZE			(1 << HASH_LOG)
++#define HASH_MAX			0x10
++
++struct ipt_psd_info {
++	unsigned int weight_threshold;
++	unsigned int delay_threshold;
++	unsigned short lo_ports_weight;
++	unsigned short hi_ports_weight;
++};
++
++#endif /*_IPT_PSD_H*/
+--- a/net/ipv4/netfilter/Makefile
++++ b/net/ipv4/netfilter/Makefile
+@@ -49,6 +49,7 @@
+ 
+ # matches
+ obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
++obj-$(CONFIG_IP_NF_MATCH_PSD) += ipt_psd.o
+ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
+ 
+ # targets
diff --git a/3.3.8/netfilter-implement-rfc-1123-for-ftp-conntrack.patch b/3.3.8/netfilter-implement-rfc-1123-for-ftp-conntrack.patch
new file mode 100644
index 0000000..30cae8c
--- /dev/null
+++ b/3.3.8/netfilter-implement-rfc-1123-for-ftp-conntrack.patch
@@ -0,0 +1,190 @@
+From: Jeff Mahoney <jeffm@suse.com>
+Subject: netfilter: Implement RFC 1123 for FTP conntrack
+References: bnc#466279 bnc#681639
+Patch-mainline: Submitted via http://bugzilla.netfilter.org/show_bug.cgi?id=574 23 Jan 2011
+
+ The FTP conntrack code currently only accepts the following format for
+ the 227 response for PASV:
+ 227 Entering Passive Mode (148,100,81,40,31,161).
+
+ It doesn't accept the following format from an obscure server:
+ 227 Data transfer will passively listen to 67,218,99,134,50,144
+
+ From RFC 1123:
+ The format of the 227 reply to a PASV command is not
+ well standardized.  In particular, an FTP client cannot
+ assume that the parentheses shown on page 40 of RFC-959
+ will be present (and in fact, Figure 3 on page 43 omits
+ them).  Therefore, a User-FTP program that interprets
+ the PASV reply must scan the reply for the first digit
+ of the host and port numbers.
+
+ This patch adds support for the RFC 1123 clarification by:
+ - Allowing a search filter to specify NUL as the terminator so that
+   try_number will return successfully if the array of numbers has been
+   filled when an unexpected character is encountered.
+ - Using space as the separator for the 227 reply and then scanning for
+   the first digit of the number sequence. The number sequence is parsed
+   out using the existing try_rfc959 but with a NUL terminator.
+
+ Tracked in: https://bugzilla.novell.com/show_bug.cgi?id=466279
+
+Reported-by: Mark Post <mpost@novell.com>
+Signed-off-by: Jeff Mahoney <jeffm@suse.com>
+---
+ net/netfilter/nf_conntrack_ftp.c |   73 ++++++++++++++++++++++++++++-----------
+ 1 file changed, 54 insertions(+), 19 deletions(-)
+
+--- a/net/netfilter/nf_conntrack_ftp.c
++++ b/net/netfilter/nf_conntrack_ftp.c
+@@ -53,10 +53,14 @@ unsigned int (*nf_nat_ftp_hook)(struct s
+ 				struct nf_conntrack_expect *exp);
+ EXPORT_SYMBOL_GPL(nf_nat_ftp_hook);
+ 
+-static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char);
+-static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char);
++static int try_rfc959(const char *, size_t, struct nf_conntrack_man *,
++		      char, unsigned int *);
++static int try_rfc1123(const char *, size_t, struct nf_conntrack_man *,
++		       char, unsigned int *);
++static int try_eprt(const char *, size_t, struct nf_conntrack_man *,
++		    char, unsigned int *);
+ static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *,
+-			     char);
++			     char, unsigned int *);
+ 
+ static struct ftp_search {
+ 	const char *pattern;
+@@ -64,7 +68,7 @@ static struct ftp_search {
+ 	char skip;
+ 	char term;
+ 	enum nf_ct_ftp_type ftptype;
+-	int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char);
++	int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char, unsigned int *);
+ } search[IP_CT_DIR_MAX][2] = {
+ 	[IP_CT_DIR_ORIGINAL] = {
+ 		{
+@@ -88,10 +92,8 @@ static struct ftp_search {
+ 		{
+ 			.pattern	= "227 ",
+ 			.plen		= sizeof("227 ") - 1,
+-			.skip		= '(',
+-			.term		= ')',
+ 			.ftptype	= NF_CT_FTP_PASV,
+-			.getnum		= try_rfc959,
++			.getnum		= try_rfc1123,
+ 		},
+ 		{
+ 			.pattern	= "229 ",
+@@ -130,8 +132,9 @@ static int try_number(const char *data,
+ 			i++;
+ 		else {
+ 			/* Unexpected character; true if it's the
+-			   terminator and we're finished. */
+-			if (*data == term && i == array_size - 1)
++			   terminator (or we don't care about one)
++			   and we're finished. */
++			if ((*data == term || !term) && i == array_size - 1)
+ 				return len;
+ 
+ 			pr_debug("Char %u (got %u nums) `%u' unexpected\n",
+@@ -146,7 +149,8 @@ static int try_number(const char *data,
+ 
+ /* Returns 0, or length of numbers: 192,168,1,1,5,6 */
+ static int try_rfc959(const char *data, size_t dlen,
+-		      struct nf_conntrack_man *cmd, char term)
++		      struct nf_conntrack_man *cmd, char term,
++		      unsigned int *offset)
+ {
+ 	int length;
+ 	u_int32_t array[6];
+@@ -161,6 +165,33 @@ static int try_rfc959(const char *data,
+ 	return length;
+ }
+ 
++/*
++ * From RFC 1123:
++ * The format of the 227 reply to a PASV command is not
++ * well standardized.  In particular, an FTP client cannot
++ * assume that the parentheses shown on page 40 of RFC-959
++ * will be present (and in fact, Figure 3 on page 43 omits
++ * them).  Therefore, a User-FTP program that interprets
++ * the PASV reply must scan the reply for the first digit
++ * of the host and port numbers.
++ */
++static int try_rfc1123(const char *data, size_t dlen,
++		       struct nf_conntrack_man *cmd, char term,
++		       unsigned int *offset)
++{
++	int i;
++	for (i = 0; i < dlen; i++)
++		if (isdigit(data[i]))
++			break;
++
++	if (i == dlen)
++		return 0;
++
++	*offset += i;
++
++	return try_rfc959(data + i, dlen - i, cmd, 0, offset);
++}
++
+ /* Grab port: number up to delimiter */
+ static int get_port(const char *data, int start, size_t dlen, char delim,
+ 		    __be16 *port)
+@@ -189,7 +220,7 @@ static int get_port(const char *data, in
+ 
+ /* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */
+ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
+-		    char term)
++		    char term, unsigned int *offset)
+ {
+ 	char delim;
+ 	int length;
+@@ -237,7 +268,8 @@ static int try_eprt(const char *data, si
+ 
+ /* Returns 0, or length of numbers: |||6446| */
+ static int try_epsv_response(const char *data, size_t dlen,
+-			     struct nf_conntrack_man *cmd, char term)
++			     struct nf_conntrack_man *cmd, char term,
++			     unsigned int *offset)
+ {
+ 	char delim;
+ 
+@@ -259,9 +291,10 @@ static int find_pattern(const char *data
+ 			unsigned int *numlen,
+ 			struct nf_conntrack_man *cmd,
+ 			int (*getnum)(const char *, size_t,
+-				      struct nf_conntrack_man *, char))
++				      struct nf_conntrack_man *, char,
++				      unsigned int *))
+ {
+-	size_t i;
++	size_t i = plen;
+ 
+ 	pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen);
+ 	if (dlen == 0)
+@@ -291,16 +324,18 @@ static int find_pattern(const char *data
+ 	pr_debug("Pattern matches!\n");
+ 	/* Now we've found the constant string, try to skip
+ 	   to the 'skip' character */
+-	for (i = plen; data[i] != skip; i++)
+-		if (i == dlen - 1) return -1;
++	if (skip) {
++		for (i = plen; data[i] != skip; i++)
++			if (i == dlen - 1) return -1;
+ 
+-	/* Skip over the last character */
+-	i++;
++		/* Skip over the last character */
++		i++;
++	}
+ 
+ 	pr_debug("Skipped up to `%c'!\n", skip);
+ 
+ 	*numoff = i;
+-	*numlen = getnum(data + i, dlen - i, cmd, term);
++	*numlen = getnum(data + i, dlen - i, cmd, term, numoff);
+ 	if (!*numlen)
+ 		return -1;
+ 
diff --git a/3.3.8/netfilter-ip_conntrack_slp.patch b/3.3.8/netfilter-ip_conntrack_slp.patch
new file mode 100644
index 0000000..ff72d85
--- /dev/null
+++ b/3.3.8/netfilter-ip_conntrack_slp.patch
@@ -0,0 +1,185 @@
+From: Jiri Bohac <jbohac@suse.cz>
+Subject: connection tracking helper for SLP
+References: fate#301134
+Patch-mainline:  Not yet
+
+A simple connection tracking helper for SLP. Marks replies to a
+SLP broadcast query as ESTABLISHED to allow them to pass through the 
+firewall.
+
+Signed-off-by: Jiri Bohac <jbohac@suse.cz>
+
+---
+ net/netfilter/Kconfig            |   15 ++++
+ net/netfilter/Makefile           |    1 
+ net/netfilter/nf_conntrack_slp.c |  131 +++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 147 insertions(+)
+
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -290,6 +290,21 @@ config NF_CONNTRACK_TFTP
+ 
+ 	  To compile it as a module, choose M here.  If unsure, say N.
+ 
++config NF_CONNTRACK_SLP
++	tristate "SLP protocol support"
++	depends on NF_CONNTRACK
++	depends on NETFILTER_ADVANCED
++	help
++	  SLP queries are sometimes sent as broadcast messages from an
++	  unprivileged port and responded to with unicast messages to the
++	  same port. This make them hard to firewall properly because connection
++	  tracking doesn't deal with broadcasts. This helper tracks locally
++	  originating broadcast SLP queries and the corresponding
++	  responses. It relies on correct IP address configuration, specifically
++	  netmask and broadcast address.
++
++	  To compile it as a module, choose M here.  If unsure, say N.
++
+ config NF_CT_NETLINK
+ 	tristate 'Connection tracking netlink interface'
+ 	select NETFILTER_NETLINK
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -36,6 +36,7 @@ obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_co
+ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
+ obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
+ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
++obj-$(CONFIG_NF_CONNTRACK_SLP) += nf_conntrack_slp.o
+ 
+ # transparent proxy support
+ obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
+--- /dev/null
++++ b/net/netfilter/nf_conntrack_slp.c
+@@ -0,0 +1,131 @@
++/*
++ *      NetBIOS name service broadcast connection tracking helper
++ *
++ *      (c) 2007 Jiri Bohac <jbohac@suse.cz>
++ *      (c) 2005 Patrick McHardy <kaber@trash.net>
++ *
++ *      This program is free software; you can redistribute it and/or
++ *      modify it under the terms of the GNU General Public License
++ *      as published by the Free Software Foundation; either version
++ *      2 of the License, or (at your option) any later version.
++ */
++/*
++ *      This helper tracks locally originating NetBIOS name service
++ *      requests by issuing permanent expectations (valid until
++ *      timing out) matching all reply connections from the
++ *      destination network. The only NetBIOS specific thing is
++ *      actually the port number.
++ */
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/skbuff.h>
++#include <linux/netdevice.h>
++#include <linux/inetdevice.h>
++#include <linux/if_addr.h>
++#include <linux/in.h>
++#include <linux/ip.h>
++#include <linux/netfilter.h>
++#include <net/route.h>
++
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_helper.h>
++#include <net/netfilter/nf_conntrack_expect.h>
++
++#define SLP_PORT	427
++
++MODULE_AUTHOR("Jiri Bohac <jbohac@suse.cz>");
++MODULE_DESCRIPTION("SLP broadcast connection tracking helper");
++MODULE_LICENSE("GPL");
++MODULE_ALIAS("ip_conntrack_slp");
++
++static unsigned int timeout __read_mostly = 3;
++module_param(timeout, uint, 0400);
++MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
++
++static int help(struct sk_buff *skb, unsigned int protoff,
++		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
++{
++	struct nf_conntrack_expect *exp;
++	struct rtable *rt = skb_rtable(skb);
++	struct in_device *in_dev;
++	__be32 mask = 0;
++	__be32 src = 0;
++
++	/* we're only interested in locally generated packets */
++	if (skb->sk == NULL)
++		goto out;
++	if (rt == NULL || !(rt->rt_flags & (RTCF_MULTICAST|RTCF_BROADCAST)))
++		goto out;
++	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
++		goto out;
++
++	rcu_read_lock();
++	in_dev = __in_dev_get_rcu(rt->dst.dev);
++	if (in_dev != NULL) {
++		for_primary_ifa(in_dev) {
++			/* this is a hack as slp uses multicast we can't match
++			 * the destination address to some broadcast address. So
++			 * just take the first one. Better would be to install
++			 * expectations for all addresses */
++			mask = ifa->ifa_mask;
++			src = ifa->ifa_broadcast;
++			break;
++		} endfor_ifa(in_dev);
++	}
++	rcu_read_unlock();
++
++	if (mask == 0 || src == 0)
++		goto out;
++
++	exp = nf_ct_expect_alloc(ct);
++	if (exp == NULL)
++		goto out;
++
++	exp->tuple                = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
++	exp->tuple.src.u3.ip      = src;
++	exp->tuple.src.u.udp.port = htons(SLP_PORT);
++
++	exp->mask.src.u3.ip       = mask;
++	exp->mask.src.u.udp.port  = htons(0xFFFF);
++
++	exp->expectfn             = NULL;
++	exp->flags                = NF_CT_EXPECT_PERMANENT;
++	exp->class		  = NF_CT_EXPECT_CLASS_DEFAULT;
++	exp->helper               = NULL;
++
++	nf_ct_expect_related(exp);
++	nf_ct_expect_put(exp);
++
++	nf_ct_refresh(ct, skb, timeout * HZ);
++out:
++	return NF_ACCEPT;
++}
++
++static struct nf_conntrack_expect_policy exp_policy = {
++	.max_expected	= 1,
++};
++
++static struct nf_conntrack_helper helper __read_mostly = {
++	.name			= "slp",
++	.tuple.src.l3num	= AF_INET,
++	.tuple.src.u.udp.port	= __constant_htons(SLP_PORT),
++	.tuple.dst.protonum	= IPPROTO_UDP,
++	.me			= THIS_MODULE,
++	.help			= help,
++	.expect_policy		= &exp_policy,
++};
++
++static int __init nf_conntrack_slp_init(void)
++{
++	exp_policy.timeout = timeout;
++	return nf_conntrack_helper_register(&helper);
++}
++
++static void __exit nf_conntrack_slp_fini(void)
++{
++	nf_conntrack_helper_unregister(&helper);
++}
++
++module_init(nf_conntrack_slp_init);
++module_exit(nf_conntrack_slp_fini);
diff --git a/3.3.8/series b/3.3.8/series
new file mode 100644
index 0000000..ec4f73f
--- /dev/null
+++ b/3.3.8/series
@@ -0,0 +1,87 @@
+0001-block-cgroups-kconfig-build-bits-for-BFQ-v5-3.3.patch
+0002-block-introduce-the-BFQ-v5-I-O-sched-for-3.3.patch
+
+3.3-ck1.patch
+
+620-sched_esfq.patch
+621-sched_act_connmark.patch
+
+0001-AppArmor-compatibility-patch-for-v5-network-controll.patch
+0002-AppArmor-compatibility-patch-for-v5-interface.patch
+0003-AppArmor-Allow-dfa-backward-compatibility-with-broke.patch
+
+cloneconfig.patch
+kbuild-compress-kernel-modules-on-installation.patch
+ata-prefer-ata-drivers-over-ide-drivers-when-both-are-built.patch
+910-kobject_uevent.patch
+911-kobject_add_broadcast_uevent.patch
+colored-printk-3.3.8.patch
+
+linux-2.6-x86-tune-generic.patch
+hz-432-kconfig-option.patch
+hz-864-kconfig-option.patch
+
+Add_CONFIG_VFAT_FS_DUALNAMES_option.patch
+linux-2.6-defaults-fat-utf8.patch
+aufs-3.x-rcN.patch
+accessfs-3.2-0.26.patch
+wrapfs-v3.3-rc1-429-g65388bc.patch
+
+imqmq-3.3.patch
+
+vserver-3.3.8-vs2.3.3.4.patch
+uksm-0.1.2.1-for-v3.3.ge.8.patch
+
+600-netfilter_layer7_2.22.patch
+601-netfilter_layer7_pktmatch.patch
+602-netfilter_layer7_match.patch
+603-netfilter_layer7_2.6.36_fix.patch
+604-netfilter_cisco_794x_iphone.patch
+610-netfilter_match_bypass_default_checks.patch
+611-netfilter_match_bypass_default_table.patch
+612-netfilter_match_reduce_memory_access.patch
+613-netfilter_optional_tcp_window_check.patch
+net-netfilter-IFWLOG.patch
+net-netfilter-IFWLOG-mdv.patch
+net-netfilter-IFWLOG-2.6.35-buildfix.patch
+net-netfilter-IFWLOG-2.6.37-buildfix.patch
+net-netfilter-psd.patch
+net-netfilter-psd-mdv.patch
+net-netfilter-psd-2.6.35-buildfix.patch
+netfilter-implement-rfc-1123-for-ftp-conntrack.patch
+netfilter-ip_conntrack_slp.patch
+
+v3.3-ARM-kirkwood-Add-support-for-Buffalo-LS-XHL.patch
+v3.2-ARM-orion-Add-support-for-Buffalo-LS-PRODUO.patch
+v3.3-ARM-orion-Add-support-for-Buffalo-LS-QL.patch
+v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-VL.patch
+v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-WVL.patch
+v3.3-ARM-kirkwood-Add-support-for-Buffalo-LS-CHLv2.patch
+
+
+3rd-3rdparty-1.0-tree.patch
+3rd-3rdparty-merge.patch
+3rd-3rdparty-netatop-0.1.1.patch
+3rd-3rdparty-button_hotplug-0.4.1.patch
+3rd-3rdparty-gpio_button_hotplug-0.1.patch
+3rd-3rdparty-gpio_event_drv-0.1.patch
+
+
+fs-ext4-fix-the-free-blocks-calculation-for-ext3-file-systems-w-uninit_bg.patch
+
+fs-udf-use-ret-instead-of-abusing-i-in-udf_load_logicalvol.patch
+fs-udf-avoid-run-away-loop-when-partition-table-length-is-corrupted_CVE-2012-3400.patch
+fs-udf-fortify-loading-of-sparing-table_CVE-2012-3400.patch
+
+fs-epoll-clear-the-tfile_check_list-on-ELOOP_CVE-2012-3375.patch
+
+fs-btrfs-run-delayed-directory-updates-during-log-replay.patch
+
+fs-ecryptfs-gracefully-refuse-miscdev-file-ops-on-inherited-passed-files.patch
+fs-ecryptfs-fix-lockdep-warning-in-miscdev-operations.patch
+fs-ecryptfs-properly-check-for-o_rdonly-flag-before-doing-privileged-open.patch
+
+fs-remove-easily-user-triggerable-bug-from-generic_setlease.patch
+
+fs-ext4-fix-duplicated-mnt_drop_write-call-in-ext4_ioc_move_ext.patch
+
diff --git a/3.3.8/uksm-0.1.2.1-for-v3.3.ge.8.patch b/3.3.8/uksm-0.1.2.1-for-v3.3.ge.8.patch
new file mode 100644
index 0000000..700c72f
--- /dev/null
+++ b/3.3.8/uksm-0.1.2.1-for-v3.3.ge.8.patch
@@ -0,0 +1,7023 @@
+diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX
+index 5481c8b..7141876 100644
+--- a/Documentation/vm/00-INDEX
++++ b/Documentation/vm/00-INDEX
+@@ -14,6 +14,8 @@ hwpoison.txt
+ 	- explains what hwpoison is
+ ksm.txt
+ 	- how to use the Kernel Samepage Merging feature.
++uksm.txt
++	- Introduction to Ultra KSM
+ locking
+ 	- info on how locking and synchronization is done in the Linux vm code.
+ map_hugetlb.c
+diff --git a/Documentation/vm/uksm.txt b/Documentation/vm/uksm.txt
+new file mode 100644
+index 0000000..d4aaae8
+--- /dev/null
++++ b/Documentation/vm/uksm.txt
+@@ -0,0 +1,56 @@
++The Ultra Kernel Samepage Merging feature
++----------------------------------------------
++/*
++ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia
++ *
++ * This is an improvement upon KSM. Some basic data structures and routines
++ * are borrowed from ksm.c .
++ *
++ * Its new features:
++ * 1. Full system scan:
++ *      It automatically scans all user processes' anonymous VMAs. Kernel-user
++ *      interaction to submit a memory area to KSM is no longer needed.
++ *
++ * 2. Rich area detection:
++ *      It automatically detects rich areas containing abundant duplicated
++ *      pages based. Rich areas are given a full scan speed. Poor areas are
++ *      sampled at a reasonable speed with very low CPU consumption.
++ *
++ * 3. Ultra Per-page scan speed improvement:
++ *      A new hash algorithm is proposed. As a result, on a machine with
++ *      Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it
++ *      can scan memory areas that does not contain duplicated pages at speed of
++ *      627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of
++ *      477MB/sec ~ 923MB/sec.
++ *
++ * 4. Thrashing area avoidance:
++ *      Thrashing area(an VMA that has frequent Ksm page break-out) can be
++ *      filtered out. My benchmark shows it's more efficient than KSM's per-page
++ *      hash value based volatile page detection.
++ *
++ *
++ * 5. Misc changes upon KSM:
++ *      * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page
++ *        comparison. It's much faster than default C version on x86.
++ *      * rmap_item now has an struct *page member to loosely cache a
++ *        address-->page mapping, which reduces too much time-costly
++ *        follow_page().
++ *      * The VMA creation/exit procedures are hooked to let the Ultra KSM know.
++ *      * try_to_merge_two_pages() now can revert a pte if it fails. No break_
++ *        ksm is needed for this case.
++ *
++ * 6. Full Zero Page consideration(contributed by Figo Zhang)
++ *    Now uksmd consider full zero pages as special pages and merge them to an
++ *    special unswappable uksm zero page.
++ */
++
++ChangeLog:
++
++2012-05-05 The creation of this Doc
++2012-05-08 UKSM 0.1.1.1 libc crash bug fix, api clean up, doc clean up.
++2012-05-28 UKSM 0.1.1.2 bug fix release
++2012-06-26 UKSM 0.1.2-beta1 first beta release for 0.1.2
++2012-07-2  UKSM 0.1.2-beta2
++2012-07-10 UKSM 0.1.2-beta3
++2012-07-26 UKSM 0.1.2 Fine grained speed control, more scan optimization.
++2012-10-13 UKSM 0.1.2.1 Bug fixes.
+diff --git a/fs/exec.c b/fs/exec.c
+index ae42277..c1c65bc 100644
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -19,7 +19,7 @@
+  * current->executable is only used by the procfs.  This allows a dispatch
+  * table to check for several different types  of binary formats.  We keep
+  * trying until we recognize the file or we run out of supported binary
+- * formats. 
++ * formats.
+  */
+ 
+ #include <linux/slab.h>
+@@ -55,6 +55,7 @@
+ #include <linux/pipe_fs_i.h>
+ #include <linux/oom.h>
+ #include <linux/compat.h>
++#include <linux/ksm.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/mmu_context.h>
+@@ -87,7 +88,7 @@ int __register_binfmt(struct linux_binfmt * fmt, int insert)
+ 	insert ? list_add(&fmt->lh, &formats) :
+ 		 list_add_tail(&fmt->lh, &formats);
+ 	write_unlock(&binfmt_lock);
+-	return 0;	
++	return 0;
+ }
+ 
+ EXPORT_SYMBOL(__register_binfmt);
+@@ -1174,7 +1175,7 @@ void setup_new_exec(struct linux_binprm * bprm)
+ 	   group */
+ 
+ 	current->self_exec_id++;
+-			
++
+ 	flush_signal_handlers(current, 0);
+ 	flush_old_files(current->files);
+ }
+@@ -1269,8 +1270,8 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
+ 	return res;
+ }
+ 
+-/* 
+- * Fill the binprm structure from the inode. 
++/*
++ * Fill the binprm structure from the inode.
+  * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
+  *
+  * This may be called multiple times for binary chains (scripts for example).
+diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
+index 80e4645..33f9e9b 100644
+--- a/fs/proc/meminfo.c
++++ b/fs/proc/meminfo.c
+@@ -87,6 +87,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
+ 		"SUnreclaim:     %8lu kB\n"
+ 		"KernelStack:    %8lu kB\n"
+ 		"PageTables:     %8lu kB\n"
++#ifdef CONFIG_UKSM
++		"KsmZeroPages:   %8lu kB\n"
++#endif
+ #ifdef CONFIG_QUICKLIST
+ 		"Quicklists:     %8lu kB\n"
+ #endif
+@@ -146,6 +149,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
+ 		K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
+ 		global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024,
+ 		K(global_page_state(NR_PAGETABLE)),
++#ifdef CONFIG_UKSM
++		K(global_page_state(NR_UKSM_ZERO_PAGES)),
++#endif
+ #ifdef CONFIG_QUICKLIST
+ 		K(quicklist_total_size()),
+ #endif
+diff --git a/include/linux/ksm.h b/include/linux/ksm.h
+index 3319a69..f4edf33 100644
+--- a/include/linux/ksm.h
++++ b/include/linux/ksm.h
+@@ -22,21 +22,6 @@ struct page *ksm_does_need_to_copy(struct page *page,
+ #ifdef CONFIG_KSM
+ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+ 		unsigned long end, int advice, unsigned long *vm_flags);
+-int __ksm_enter(struct mm_struct *mm);
+-void __ksm_exit(struct mm_struct *mm);
+-
+-static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+-{
+-	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
+-		return __ksm_enter(mm);
+-	return 0;
+-}
+-
+-static inline void ksm_exit(struct mm_struct *mm)
+-{
+-	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
+-		__ksm_exit(mm);
+-}
+ 
+ /*
+  * A KSM page is one of those write-protected "shared pages" or "merged pages"
+@@ -90,6 +75,33 @@ int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
+ 		  struct vm_area_struct *, unsigned long, void *), void *arg);
+ void ksm_migrate_page(struct page *newpage, struct page *oldpage);
+ 
++#ifdef CONFIG_KSM_LEGACY
++int __ksm_enter(struct mm_struct *mm);
++void __ksm_exit(struct mm_struct *mm);
++static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
++{
++	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
++		return __ksm_enter(mm);
++	return 0;
++}
++
++static inline void ksm_exit(struct mm_struct *mm)
++{
++	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
++		__ksm_exit(mm);
++}
++
++#elif defined(CONFIG_UKSM)
++static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
++{
++	return 0;
++}
++
++static inline void ksm_exit(struct mm_struct *mm)
++{
++}
++#endif /* !CONFIG_UKSM */
++
+ #else  /* !CONFIG_KSM */
+ 
+ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+@@ -142,4 +154,6 @@ static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
+ #endif /* CONFIG_MMU */
+ #endif /* !CONFIG_KSM */
+ 
++#include <linux/uksm.h>
++
+ #endif /* __LINUX_KSM_H */
+diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
+index 3cc3062..9d8642d7 100644
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -252,6 +252,9 @@ struct vm_area_struct {
+ #ifdef CONFIG_NUMA
+ 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
+ #endif
++#ifdef CONFIG_UKSM
++	struct vma_slot *uksm_vma_slot;
++#endif
+ };
+ 
+ struct core_thread {
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index 650ba2f..2d1475f 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -116,6 +116,9 @@ enum zone_stat_item {
+ 	NUMA_OTHER,		/* allocation from other node */
+ #endif
+ 	NR_ANON_TRANSPARENT_HUGEPAGES,
++#ifdef CONFIG_UKSM
++	NR_UKSM_ZERO_PAGES,
++#endif
+ 	NR_VM_ZONE_STAT_ITEMS };
+ 
+ /*
+@@ -753,7 +756,7 @@ static inline int is_normal_idx(enum zone_type idx)
+ }
+ 
+ /**
+- * is_highmem - helper function to quickly check if a struct zone is a 
++ * is_highmem - helper function to quickly check if a struct zone is a
+  *              highmem zone or not.  This is an attempt to keep references
+  *              to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
+  * @zone - pointer to struct zone variable
+diff --git a/include/linux/sradix-tree.h b/include/linux/sradix-tree.h
+new file mode 100644
+index 0000000..6780fdb
+--- /dev/null
++++ b/include/linux/sradix-tree.h
+@@ -0,0 +1,77 @@
++#ifndef _LINUX_SRADIX_TREE_H
++#define _LINUX_SRADIX_TREE_H
++
++
++#define INIT_SRADIX_TREE(root, mask)					\
++do {									\
++	(root)->height = 0;						\
++	(root)->gfp_mask = (mask);					\
++	(root)->rnode = NULL;						\
++} while (0)
++
++#define ULONG_BITS	(sizeof(unsigned long) * 8)
++#define SRADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
++//#define SRADIX_TREE_MAP_SHIFT	6
++//#define SRADIX_TREE_MAP_SIZE	(1UL << SRADIX_TREE_MAP_SHIFT)
++//#define SRADIX_TREE_MAP_MASK	(SRADIX_TREE_MAP_SIZE-1)
++
++struct sradix_tree_node {
++	unsigned int	height;		/* Height from the bottom */
++	unsigned int	count;		
++	unsigned int	fulls;		/* Number of full sublevel trees */ 
++	struct sradix_tree_node *parent;
++	void *stores[0];
++};
++
++/* A simple radix tree implementation */
++struct sradix_tree_root {
++        unsigned int            height;
++        struct sradix_tree_node *rnode;
++
++	/* Where found to have available empty stores in its sublevels */
++        struct sradix_tree_node *enter_node;
++	unsigned int shift;
++	unsigned int stores_size;
++	unsigned int mask;
++	unsigned long min;	/* The first hole index */
++	unsigned long num;
++	//unsigned long *height_to_maxindex;
++
++	/* How the node is allocated and freed. */
++	struct sradix_tree_node *(*alloc)(void); 
++	void (*free)(struct sradix_tree_node *node);
++
++	/* When a new node is added and removed */
++	void (*extend)(struct sradix_tree_node *parent, struct sradix_tree_node *child);
++	void (*assign)(struct sradix_tree_node *node, unsigned index, void *item);
++	void (*rm)(struct sradix_tree_node *node, unsigned offset);
++};
++
++struct sradix_tree_path {
++	struct sradix_tree_node *node;
++	int offset;
++};
++
++static inline 
++void init_sradix_tree_root(struct sradix_tree_root *root, unsigned long shift)
++{
++	root->height = 0;
++	root->rnode = NULL;
++	root->shift = shift;
++	root->stores_size = 1UL << shift;
++	root->mask = root->stores_size - 1;
++}
++
++
++extern void *sradix_tree_next(struct sradix_tree_root *root,
++		       struct sradix_tree_node *node, unsigned long index,
++		       int (*iter)(void *, unsigned long));
++
++extern int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num);
++
++extern void sradix_tree_delete_from_leaf(struct sradix_tree_root *root, 
++			struct sradix_tree_node *node, unsigned long index);
++
++extern void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index);
++
++#endif /* _LINUX_SRADIX_TREE_H */
+diff --git a/include/linux/uksm.h b/include/linux/uksm.h
+new file mode 100644
+index 0000000..361eee2
+--- /dev/null
++++ b/include/linux/uksm.h
+@@ -0,0 +1,145 @@
++#ifndef __LINUX_UKSM_H
++#define __LINUX_UKSM_H
++/*
++ * Memory merging support.
++ *
++ * This code enables dynamic sharing of identical pages found in different
++ * memory areas, even if they are not shared by fork().
++ */
++
++/* if !CONFIG_UKSM this file should not be compiled at all. */
++#ifdef CONFIG_UKSM
++
++#include <linux/bitops.h>
++#include <linux/mm.h>
++#include <linux/pagemap.h>
++#include <linux/rmap.h>
++#include <linux/sched.h>
++
++extern unsigned long zero_pfn __read_mostly;
++extern unsigned long uksm_zero_pfn __read_mostly;
++extern struct page *empty_uksm_zero_page;
++
++/* must be done before linked to mm */
++extern void uksm_vma_add_new(struct vm_area_struct *vma);
++extern void uksm_remove_vma(struct vm_area_struct *vma);
++
++#define UKSM_SLOT_NEED_SORT	(1 << 0)
++#define UKSM_SLOT_NEED_RERAND 	(1 << 1)
++#define UKSM_SLOT_SCANNED     	(1 << 2) /* It's scanned in this round */
++#define UKSM_SLOT_FUL_SCANNED 	(1 << 3)
++#define UKSM_SLOT_IN_UKSM 	(1 << 4)
++
++struct vma_slot {
++	struct sradix_tree_node *snode;
++	unsigned long sindex;
++
++	struct list_head slot_list;
++	unsigned long fully_scanned_round;
++	unsigned long dedup_num;
++	unsigned long pages_scanned;
++	unsigned long last_scanned;
++	unsigned long pages_to_scan;
++	struct scan_rung *rung;
++	struct page **rmap_list_pool;
++	unsigned int *pool_counts;
++	unsigned long pool_size;
++	struct vm_area_struct *vma;
++	struct mm_struct *mm;
++	unsigned long ctime_j;
++	unsigned long pages;
++	unsigned long flags;
++	unsigned long pages_cowed; /* pages cowed this round */
++	unsigned long pages_merged; /* pages merged this round */
++	unsigned long pages_bemerged;
++
++	/* when it has page merged in this eval round */
++	struct list_head dedup_list;
++};
++
++static inline void uksm_unmap_zero_page(pte_t pte)
++{
++	if (pte_pfn(pte) == uksm_zero_pfn)
++		__dec_zone_page_state(empty_uksm_zero_page, NR_UKSM_ZERO_PAGES);
++}
++
++static inline void uksm_map_zero_page(pte_t pte)
++{
++	if (pte_pfn(pte) == uksm_zero_pfn)
++		__inc_zone_page_state(empty_uksm_zero_page, NR_UKSM_ZERO_PAGES);
++}
++
++static inline void uksm_cow_page(struct vm_area_struct *vma, struct page *page)
++{
++	if (vma->uksm_vma_slot && PageKsm(page))
++		vma->uksm_vma_slot->pages_cowed++;
++}
++
++static inline void uksm_cow_pte(struct vm_area_struct *vma, pte_t pte)
++{
++	if (vma->uksm_vma_slot && pte_pfn(pte) == uksm_zero_pfn)
++		vma->uksm_vma_slot->pages_cowed++;
++}
++
++static inline int uksm_flags_can_scan(unsigned long vm_flags)
++{
++	return !(vm_flags & (VM_PFNMAP | VM_IO  | VM_DONTEXPAND |
++				  VM_RESERVED  | VM_HUGETLB | VM_INSERTPAGE |
++				  VM_NONLINEAR | VM_MIXEDMAP | VM_SAO |
++				  VM_SHARED  | VM_MAYSHARE | VM_GROWSUP
++				  | VM_GROWSDOWN));
++}
++
++static inline void uksm_vm_flags_mod(unsigned long *vm_flags_p)
++{
++	if (uksm_flags_can_scan(*vm_flags_p))
++		*vm_flags_p |= VM_MERGEABLE;
++}
++
++/*
++ * Just a wrapper for BUG_ON for where ksm_zeropage must not be. TODO: it will
++ * be removed when uksm zero page patch is stable enough.
++ */
++static inline void uksm_bugon_zeropage(pte_t pte)
++{
++	BUG_ON(pte_pfn(pte) == uksm_zero_pfn);
++}
++#else
++static inline void uksm_vma_add_new(struct vm_area_struct *vma)
++{
++}
++
++static inline void uksm_remove_vma(struct vm_area_struct *vma)
++{
++}
++
++static inline void uksm_unmap_zero_page(pte_t pte)
++{
++}
++
++static inline void uksm_map_zero_page(pte_t pte)
++{
++}
++
++static inline void uksm_cow_page(struct vm_area_struct *vma, struct page *page)
++{
++}
++
++static inline void uksm_cow_pte(struct vm_area_struct *vma, pte_t pte)
++{
++}
++
++static inline int uksm_flags_can_scan(unsigned long vm_flags)
++{
++	return 0;
++}
++
++static inline void uksm_vm_flags_mod(unsigned long *vm_flags_p)
++{
++}
++
++static inline void uksm_bugon_zeropage(pte_t pte)
++{
++}
++#endif /* !CONFIG_UKSM */
++#endif /* __LINUX_UKSM_H */
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 423d5a4..7281f89 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -360,7 +360,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+ 				goto fail_nomem;
+ 			charge = len;
+ 		}
+-		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
++		tmp = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ 		if (!tmp)
+ 			goto fail_nomem;
+ 		*tmp = *mpnt;
+@@ -412,7 +412,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+ 		__vma_link_rb(mm, tmp, rb_link, rb_parent);
+ 		rb_link = &tmp->vm_rb.rb_right;
+ 		rb_parent = &tmp->vm_rb;
+-
++		uksm_vma_add_new(tmp);
+ 		mm->map_count++;
+ 		retval = copy_page_range(mm, oldmm, mpnt);
+ 
+diff --git a/lib/Makefile b/lib/Makefile
+index 18515f0..2df136b 100644
+--- a/lib/Makefile
++++ b/lib/Makefile
+@@ -8,7 +8,7 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
+ endif
+ 
+ lib-y := ctype.o string.o vsprintf.o cmdline.o \
+-	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
++	 rbtree.o radix-tree.o sradix-tree.o dump_stack.o timerqueue.o\
+ 	 idr.o int_sqrt.o extable.o prio_tree.o \
+ 	 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
+ 	 proportions.o prio_heap.o ratelimit.o show_mem.o \
+diff --git a/lib/sradix-tree.c b/lib/sradix-tree.c
+new file mode 100644
+index 0000000..8d06329
+--- /dev/null
++++ b/lib/sradix-tree.c
+@@ -0,0 +1,476 @@
++#include <linux/errno.h>
++#include <linux/mm.h>
++#include <linux/mman.h>
++#include <linux/spinlock.h>
++#include <linux/slab.h>
++#include <linux/gcd.h>
++#include <linux/sradix-tree.h>
++
++static inline int sradix_node_full(struct sradix_tree_root *root, struct sradix_tree_node *node)
++{
++	return node->fulls == root->stores_size || 
++		(node->height == 1 && node->count == root->stores_size);
++}
++
++/*
++ *	Extend a sradix tree so it can store key @index.
++ */
++static int sradix_tree_extend(struct sradix_tree_root *root, unsigned long index)
++{
++	struct sradix_tree_node *node;
++	unsigned int height;
++
++	if (unlikely(root->rnode == NULL)) {
++		if (!(node = root->alloc()))
++			return -ENOMEM;
++
++		node->height = 1;
++		root->rnode = node;
++		root->height = 1;
++	}
++
++	/* Figure out what the height should be.  */
++	height = root->height;
++	index >>= root->shift * height;
++
++	while (index) {
++		index >>= root->shift;
++		height++;
++	}
++
++	while (height > root->height) {
++		unsigned int newheight;
++		if (!(node = root->alloc()))
++			return -ENOMEM;
++
++		/* Increase the height.  */
++		node->stores[0] = root->rnode;
++		root->rnode->parent = node;
++		if (root->extend)
++			root->extend(node, root->rnode);
++
++		newheight = root->height + 1;
++		node->height = newheight;
++		node->count = 1;
++		if (sradix_node_full(root, root->rnode))
++			node->fulls = 1;
++
++		root->rnode = node;
++		root->height = newheight;
++	}
++
++	return 0;
++}
++
++/*
++ * Search the next item from the current node, that is not NULL
++ * and can satify root->iter().
++ */
++void *sradix_tree_next(struct sradix_tree_root *root,
++		       struct sradix_tree_node *node, unsigned long index,
++		       int (*iter)(void *item, unsigned long height))
++{
++	unsigned long offset;
++	void *item;
++
++	if (unlikely(node == NULL)) {
++		node = root->rnode;
++		for (offset = 0; offset < root->stores_size; offset++) {
++			item = node->stores[offset];
++			if (item && (!iter || iter(item, node->height)))
++				break;
++		}
++
++		if (unlikely(offset >= root->stores_size))
++			return NULL;
++
++		if (node->height == 1)
++			return item;
++		else
++			goto go_down;
++	}
++
++	while (node) {
++		offset = (index & root->mask) + 1;					
++		for (;offset < root->stores_size; offset++) {
++			item = node->stores[offset];
++			if (item && (!iter || iter(item, node->height)))
++				break;
++		}
++
++		if (offset < root->stores_size)
++			break;
++
++		node = node->parent;
++		index >>= root->shift;
++	}
++
++	if (!node)
++		return NULL;
++
++	while (node->height > 1) {
++go_down:
++		node = item;
++		for (offset = 0; offset < root->stores_size; offset++) {
++			item = node->stores[offset];
++			if (item && (!iter || iter(item, node->height)))
++				break;
++		}
++
++		if (unlikely(offset >= root->stores_size))
++			return NULL;
++	}
++
++	BUG_ON(offset > root->stores_size);
++
++	return item;
++}
++
++/*
++ * Blindly insert the item to the tree. Typically, we reuse the
++ * first empty store item.
++ */
++int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num)
++{
++	unsigned long index;
++	unsigned int height;
++	struct sradix_tree_node *node, *tmp = NULL;
++	int offset, offset_saved;
++	void **store = NULL;
++	int error, i, j, shift;
++
++go_on:
++	index = root->min;
++
++	if (root->enter_node && !sradix_node_full(root, root->enter_node)) {
++		node = root->enter_node;
++		BUG_ON((index >> (root->shift * root->height)));
++	} else {
++		node = root->rnode;
++		if (node == NULL || (index >> (root->shift * root->height))
++		    || sradix_node_full(root, node)) {
++			error = sradix_tree_extend(root, index);
++			if (error)
++				return error;
++
++			node = root->rnode;
++		}
++	}
++
++
++	height = node->height;
++	shift = (height - 1) * root->shift;
++	offset = (index >> shift) & root->mask;
++	while (shift > 0) {
++		offset_saved = offset;
++		for (; offset < root->stores_size; offset++) {
++			store = &node->stores[offset];
++			tmp = *store;
++
++			if (!tmp || !sradix_node_full(root, tmp))
++				break;
++		}
++		BUG_ON(offset >= root->stores_size);
++
++		if (offset != offset_saved) {
++			index += (offset - offset_saved) << shift;
++			index &= ~((1UL << shift) - 1);
++		}
++
++		if (!tmp) {
++			if (!(tmp = root->alloc()))
++				return -ENOMEM;
++
++			tmp->height = shift / root->shift;
++			*store = tmp;
++			tmp->parent = node;
++			node->count++;
++//			if (root->extend)
++//				root->extend(node, tmp);
++		}
++
++		node = tmp;
++		shift -= root->shift;
++		offset = (index >> shift) & root->mask;
++	}
++
++	BUG_ON(node->height != 1);
++
++
++	store = &node->stores[offset];
++	for (i = 0, j = 0;
++	      j < root->stores_size - node->count && 
++	      i < root->stores_size - offset && j < num; i++) {
++		if (!store[i]) {
++			store[i] = item[j];
++			if (root->assign)
++				root->assign(node, index + i, item[j]);
++			j++;
++		}
++	}
++
++	node->count += j;
++	root->num += j;
++	num -= j;
++
++	while (sradix_node_full(root, node)) {
++		node = node->parent;
++		if (!node)
++			break;
++
++		node->fulls++;
++	}
++
++	if (unlikely(!node)) {
++		/* All nodes are full */
++		root->min = 1 << (root->height * root->shift);
++		root->enter_node = NULL;
++	} else {
++		root->min = index + i - 1;
++		root->min |= (1UL << (node->height - 1)) - 1;
++		root->min++;
++		root->enter_node = node;
++	}
++
++	if (num) {
++		item += j;
++		goto go_on;
++	}
++
++	return 0;
++}
++
++
++/**
++ *	sradix_tree_shrink    -    shrink height of a sradix tree to minimal
++ *      @root		sradix tree root
++ *  
++ */
++static inline void sradix_tree_shrink(struct sradix_tree_root *root)
++{
++	/* try to shrink tree height */
++	while (root->height > 1) {
++		struct sradix_tree_node *to_free = root->rnode;
++
++		/*
++		 * The candidate node has more than one child, or its child
++		 * is not at the leftmost store, we cannot shrink.
++		 */
++		if (to_free->count != 1 || !to_free->stores[0])
++			break;
++
++		root->rnode = to_free->stores[0];
++		root->rnode->parent = NULL;
++		root->height--;
++		if (unlikely(root->enter_node == to_free)) {
++			root->enter_node = NULL;
++		}
++		root->free(to_free);
++	}
++}
++
++/*
++ * Del the item on the known leaf node and index
++ */
++void sradix_tree_delete_from_leaf(struct sradix_tree_root *root, 
++				  struct sradix_tree_node *node, unsigned long index)
++{
++	unsigned int offset;
++	struct sradix_tree_node *start, *end;
++
++	BUG_ON(node->height != 1);
++
++	start = node;
++	while (node && !(--node->count))
++		node = node->parent;
++
++	end = node;
++	if (!node) {
++		root->rnode = NULL;
++		root->height = 0;
++		root->min = 0;
++		root->num = 0;
++		root->enter_node = NULL;
++	} else {
++		offset = (index >> (root->shift * (node->height - 1))) & root->mask;
++		if (root->rm)
++			root->rm(node, offset);
++		node->stores[offset] = NULL;
++		root->num--;
++		if (root->min > index) {
++			root->min = index;
++			root->enter_node = node;
++		}
++	}
++
++	if (start != end) {
++		do {
++			node = start;
++			start = start->parent;
++			if (unlikely(root->enter_node == node))
++				root->enter_node = end;
++			root->free(node);
++		} while (start != end);
++
++		/*
++		 * Note that shrink may free "end", so enter_node still need to
++		 * be checked inside.
++		 */
++		sradix_tree_shrink(root);
++	} else if (node->count == root->stores_size - 1) {
++		/* It WAS a full leaf node. Update the ancestors */
++		node = node->parent;
++		while (node) {
++			node->fulls--;
++			if (node->fulls != root->stores_size - 1)
++				break;
++
++			node = node->parent;
++		}
++	}
++}
++
++void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index)
++{
++	unsigned int height, offset;
++	struct sradix_tree_node *node;
++	int shift;
++
++	node = root->rnode;
++	if (node == NULL || (index >> (root->shift * root->height)))
++		return NULL;
++
++	height = root->height;
++	shift = (height - 1) * root->shift;
++
++	do {
++		offset = (index >> shift) & root->mask;
++		node = node->stores[offset];
++		if (!node)
++			return NULL;
++
++		shift -= root->shift;
++	} while (shift >= 0);
++
++	return node;
++}
++
++/*
++ * Return the item if it exists, otherwise create it in place
++ * and return the created item.
++ */
++void *sradix_tree_lookup_create(struct sradix_tree_root *root, 
++			unsigned long index, void *(*item_alloc)(void))
++{
++	unsigned int height, offset;
++	struct sradix_tree_node *node, *tmp;
++	void *item;
++	int shift, error;
++
++	if (root->rnode == NULL || (index >> (root->shift * root->height))) {
++		if (item_alloc) {
++			error = sradix_tree_extend(root, index);
++			if (error)
++				return NULL;
++		} else {
++			return NULL;
++		}
++	}
++
++	node = root->rnode;
++	height = root->height;
++	shift = (height - 1) * root->shift;
++
++	do {
++		offset = (index >> shift) & root->mask;
++		if (!node->stores[offset]) {
++			if (!(tmp = root->alloc()))
++				return NULL;
++
++			tmp->height = shift / root->shift;
++			node->stores[offset] = tmp;
++			tmp->parent = node;
++			node->count++;
++			node = tmp;
++		} else {
++			node = node->stores[offset];
++		}
++
++		shift -= root->shift;
++	} while (shift > 0);
++
++	BUG_ON(node->height != 1);
++	offset = index & root->mask;
++	if (node->stores[offset]) {
++		return node->stores[offset];
++	} else if (item_alloc) {
++		if (!(item = item_alloc()))
++			return NULL;
++
++		node->stores[offset] = item;
++
++		/*
++		 * NOTE: we do NOT call root->assign here, since this item is
++		 * newly created by us having no meaning. Caller can call this
++		 * if it's necessary to do so.
++		 */
++
++		node->count++;
++		root->num++;
++
++		while (sradix_node_full(root, node)) {
++			node = node->parent;
++			if (!node)
++				break;
++
++			node->fulls++;
++		}
++
++		if (unlikely(!node)) {
++			/* All nodes are full */
++			root->min = 1 << (root->height * root->shift);
++		} else {
++			if (root->min == index) {
++				root->min |= (1UL << (node->height - 1)) - 1;
++				root->min++;
++				root->enter_node = node;
++			}
++		}
++
++		return item;
++	} else {
++		return NULL;
++	}
++
++}
++
++int sradix_tree_delete(struct sradix_tree_root *root, unsigned long index)
++{
++	unsigned int height, offset;
++	struct sradix_tree_node *node;
++	int shift;
++
++	node = root->rnode;
++	if (node == NULL || (index >> (root->shift * root->height)))
++		return -ENOENT;
++
++	height = root->height;
++	shift = (height - 1) * root->shift;
++
++	do {
++		offset = (index >> shift) & root->mask;
++		node = node->stores[offset];
++		if (!node)
++			return -ENOENT;
++
++		shift -= root->shift;
++	} while (shift > 0);
++
++	offset = index & root->mask;
++	if (!node->stores[offset])
++		return -ENOENT;
++
++	sradix_tree_delete_from_leaf(root, node, index);
++
++	return 0;
++}
+diff --git a/mm/Kconfig b/mm/Kconfig
+index e338407..8df1b4f 100644
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -245,6 +245,32 @@ config KSM
+ 	  See Documentation/vm/ksm.txt for more information: KSM is inactive
+ 	  until a program has madvised that an area is MADV_MERGEABLE, and
+ 	  root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set).
++choice
++	prompt "Choose UKSM/KSM strategy"
++	default UKSM
++	depends on KSM
++	help
++	  This option allows to select a UKSM/KSM stragety.
++
++config UKSM
++	bool "Ultra-KSM for page merging"
++	depends on KSM
++	help
++	UKSM is inspired by the Linux kernel project \u2014 KSM(Kernel Same
++	page Merging), but with a fundamentally rewritten core algorithm. With
++	an advanced algorithm, UKSM now can transparently scans all anonymously
++	mapped user space applications with an significantly improved scan speed
++	and CPU efficiency. Since KVM is friendly to KSM, KVM can also benefit from
++	UKSM. Now UKSM has its first stable release and first real world enterprise user.
++	For more information, please goto its project page.
++	(www.kerneldedup.org)
++
++config KSM_LEGACY
++	bool "Legacy KSM implementation"
++	depends on KSM
++	help
++	The legacy KSM implementation from Redhat.
++endchoice
+ 
+ config DEFAULT_MMAP_MIN_ADDR
+         int "Low address space to protect from user allocation"
+diff --git a/mm/Makefile b/mm/Makefile
+index 50ec00e..c551bae 100644
+--- a/mm/Makefile
++++ b/mm/Makefile
+@@ -34,7 +34,8 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
+ obj-$(CONFIG_SLOB) += slob.o
+ obj-$(CONFIG_COMPACTION) += compaction.o
+ obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
+-obj-$(CONFIG_KSM) += ksm.o
++obj-$(CONFIG_KSM_LEGACY) += ksm.o
++obj-$(CONFIG_UKSM) += uksm.o
+ obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
+ obj-$(CONFIG_SLAB) += slab.o
+ obj-$(CONFIG_SLUB) += slub.o
+diff --git a/mm/memory.c b/mm/memory.c
+index 10b4dda..be73fff 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -112,6 +112,37 @@ __setup("norandmaps", disable_randmaps);
+ unsigned long zero_pfn __read_mostly;
+ unsigned long highest_memmap_pfn __read_mostly;
+ 
++#ifdef CONFIG_UKSM
++unsigned long uksm_zero_pfn __read_mostly;
++struct page *empty_uksm_zero_page;
++
++static int __init setup_uksm_zero_page(void)
++{
++	unsigned long addr;
++	addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, 0);
++	if (!addr)
++		panic("Oh boy, that early out of memory?");
++
++	empty_uksm_zero_page = virt_to_page((void *) addr);
++	SetPageReserved(empty_uksm_zero_page);
++
++	uksm_zero_pfn = page_to_pfn(empty_uksm_zero_page);
++
++	return 0;
++}
++core_initcall(setup_uksm_zero_page);
++
++static inline int is_uksm_zero_pfn(unsigned long pfn)
++{
++	return pfn == uksm_zero_pfn;
++}
++#else
++static inline int is_uksm_zero_pfn(unsigned long pfn)
++{
++	return 0;
++}
++#endif
++
+ /*
+  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
+  */
+@@ -123,6 +154,7 @@ static int __init init_zero_pfn(void)
+ core_initcall(init_zero_pfn);
+ 
+ 
++
+ #if defined(SPLIT_RSS_COUNTING)
+ 
+ static void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
+@@ -739,8 +771,10 @@ static inline int is_cow_mapping(vm_flags_t flags)
+ #ifndef is_zero_pfn
+ static inline int is_zero_pfn(unsigned long pfn)
+ {
+-	return pfn == zero_pfn;
++	return (pfn == zero_pfn) || (is_uksm_zero_pfn(pfn));
+ }
++#else
++#define is_zero_pfn(pfn)   (is_zero_pfn(pfn) || is_uksm_zero_pfn(pfn))
+ #endif
+ 
+ #ifndef my_zero_pfn
+@@ -926,6 +960,11 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ 			rss[MM_ANONPAGES]++;
+ 		else
+ 			rss[MM_FILEPAGES]++;
++
++		/* Should return NULL in vm_normal_page() */
++		uksm_bugon_zeropage(pte);
++	} else {
++		uksm_map_zero_page(pte);
+ 	}
+ 
+ out_set_pte:
+@@ -1161,8 +1200,10 @@ again:
+ 			ptent = ptep_get_and_clear_full(mm, addr, pte,
+ 							tlb->fullmm);
+ 			tlb_remove_tlb_entry(tlb, pte, addr);
+-			if (unlikely(!page))
++			if (unlikely(!page)) {
++				uksm_unmap_zero_page(ptent);
+ 				continue;
++			}
+ 			if (unlikely(details) && details->nonlinear_vma
+ 			    && linear_page_index(details->nonlinear_vma,
+ 						addr) != page->index)
+@@ -1661,7 +1702,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+ 
+ 	VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
+ 
+-	/* 
++	/*
+ 	 * Require read or write permissions.
+ 	 * If FOLL_FORCE is set, we only require the "MAY" flags.
+ 	 */
+@@ -1708,7 +1749,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+ 				page = vm_normal_page(vma, start, *pte);
+ 				if (!page) {
+ 					if (!(gup_flags & FOLL_DUMP) &&
+-					     is_zero_pfn(pte_pfn(*pte)))
++					    (is_zero_pfn(pte_pfn(*pte))))
+ 						page = pte_page(*pte);
+ 					else {
+ 						pte_unmap(pte);
+@@ -2468,8 +2509,10 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
+ 			clear_page(kaddr);
+ 		kunmap_atomic(kaddr, KM_USER0);
+ 		flush_dcache_page(dst);
+-	} else
++	} else {
+ 		copy_user_highpage(dst, src, va, vma);
++		uksm_cow_page(vma, src);
++	}
+ }
+ 
+ /*
+@@ -2667,6 +2710,7 @@ gotten:
+ 		new_page = alloc_zeroed_user_highpage_movable(vma, address);
+ 		if (!new_page)
+ 			goto oom;
++		uksm_cow_pte(vma, orig_pte);
+ 	} else {
+ 		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+ 		if (!new_page)
+@@ -2688,8 +2732,11 @@ gotten:
+ 				dec_mm_counter_fast(mm, MM_FILEPAGES);
+ 				inc_mm_counter_fast(mm, MM_ANONPAGES);
+ 			}
+-		} else
++			uksm_bugon_zeropage(orig_pte);
++		} else {
++			uksm_unmap_zero_page(orig_pte);
+ 			inc_mm_counter_fast(mm, MM_ANONPAGES);
++		}
+ 		flush_cache_page(vma, address, pte_pfn(orig_pte));
+ 		entry = mk_pte(new_page, vma->vm_page_prot);
+ 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+diff --git a/mm/mmap.c b/mm/mmap.c
+index da15a79..76cf74c 100644
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -30,6 +30,7 @@
+ #include <linux/perf_event.h>
+ #include <linux/audit.h>
+ #include <linux/khugepaged.h>
++#include <linux/ksm.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/cacheflush.h>
+@@ -65,7 +66,7 @@ static void unmap_region(struct mm_struct *mm,
+  * MAP_SHARED	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
+  *		w: (no) no	w: (no) no	w: (yes) yes	w: (no) no
+  *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
+- *		
++ *
+  * MAP_PRIVATE	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
+  *		w: (no) no	w: (no) no	w: (copy) copy	w: (no) no
+  *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
+@@ -236,6 +237,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+ 			removed_exe_file_vma(vma->vm_mm);
+ 	}
+ 	mpol_put(vma_policy(vma));
++	uksm_remove_vma(vma);
+ 	kmem_cache_free(vm_area_cachep, vma);
+ 	return next;
+ }
+@@ -500,9 +502,16 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
+ 	long adjust_next = 0;
+ 	int remove_next = 0;
+ 
++/*
++ * to avoid deadlock, ksm_remove_vma must be done before any spin_lock is
++ * acquired
++ */
++	uksm_remove_vma(vma);
++
+ 	if (next && !insert) {
+ 		struct vm_area_struct *exporter = NULL;
+ 
++		uksm_remove_vma(next);
+ 		if (end >= next->vm_end) {
+ 			/*
+ 			 * vma expands, overlapping all the next, and
+@@ -578,10 +587,10 @@ again:			remove_next = 1 + (end > next->vm_end);
+ 		if (adjust_next)
+ 			vma_prio_tree_remove(next, root);
+ 	}
+-
+ 	vma->vm_start = start;
+ 	vma->vm_end = end;
+ 	vma->vm_pgoff = pgoff;
++
+ 	if (adjust_next) {
+ 		next->vm_start += adjust_next << PAGE_SHIFT;
+ 		next->vm_pgoff += adjust_next;
+@@ -634,10 +643,15 @@ again:			remove_next = 1 + (end > next->vm_end);
+ 		 */
+ 		if (remove_next == 2) {
+ 			next = vma->vm_next;
++			uksm_remove_vma(next);
+ 			goto again;
+ 		}
++	} else {
++		if (next && !insert)
++			uksm_vma_add_new(next);
+ 	}
+ 
++	uksm_vma_add_new(vma);
+ 	validate_mm(mm);
+ 
+ 	return 0;
+@@ -992,6 +1006,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+ 	vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
+ 			mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+ 
++	/* If uksm is enabled, we add VM_MERGABLE to new VMAs. */
++	uksm_vm_flags_mod(&vm_flags);
++
+ 	if (flags & MAP_LOCKED)
+ 		if (!can_do_mlock())
+ 			return -EPERM;
+@@ -1318,6 +1335,7 @@ munmap_back:
+ 
+ 	vma_link(mm, vma, prev, rb_link, rb_parent);
+ 	file = vma->vm_file;
++	uksm_vma_add_new(vma);
+ 
+ 	/* Once vma denies write, undo our temporary denial count */
+ 	if (correct_wcount)
+@@ -1344,6 +1362,7 @@ unmap_and_free_vma:
+ 	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+ 	charged = 0;
+ free_vma:
++	uksm_remove_vma(vma);
+ 	kmem_cache_free(vm_area_cachep, vma);
+ unacct_error:
+ 	if (charged)
+@@ -1419,7 +1438,7 @@ full_search:
+ 		addr = vma->vm_end;
+ 	}
+ }
+-#endif	
++#endif
+ 
+ void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
+ {
+@@ -1969,6 +1988,8 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+ 	else
+ 		err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+ 
++	uksm_vma_add_new(new);
++
+ 	/* Success. */
+ 	if (!err)
+ 		return 0;
+@@ -2138,6 +2159,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
+ 		return error;
+ 
+ 	flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
++	uksm_vm_flags_mod(&flags);
+ 
+ 	error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
+ 	if (error & ~PAGE_MASK)
+@@ -2206,6 +2228,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
+ 	vma->vm_flags = flags;
+ 	vma->vm_page_prot = vm_get_page_prot(flags);
+ 	vma_link(mm, vma, prev, rb_link, rb_parent);
++	uksm_vma_add_new(vma);
+ out:
+ 	perf_event_mmap(vma);
+ 	mm->total_vm += len >> PAGE_SHIFT;
+@@ -2229,6 +2252,12 @@ void exit_mmap(struct mm_struct *mm)
+ 	/* mm's last user has gone, and its about to be pulled down */
+ 	mmu_notifier_release(mm);
+ 
++	/*
++	 * Taking write lock on mmap_sem does not harm others,
++	 * but it's crucial for uksm to avoid races.
++	 */
++	down_write(&mm->mmap_sem);
++
+ 	if (mm->locked_vm) {
+ 		vma = mm->mmap;
+ 		while (vma) {
+@@ -2262,6 +2291,11 @@ void exit_mmap(struct mm_struct *mm)
+ 	while (vma)
+ 		vma = remove_vma(vma);
+ 
++	mm->mmap = NULL;
++	mm->mm_rb = RB_ROOT;
++	mm->mmap_cache = NULL;
++	up_write(&mm->mmap_sem);
++
+ 	BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
+ }
+ 
+@@ -2371,6 +2405,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
+ 			if (new_vma->vm_ops && new_vma->vm_ops->open)
+ 				new_vma->vm_ops->open(new_vma);
+ 			vma_link(mm, new_vma, prev, rb_link, rb_parent);
++			uksm_vma_add_new(new_vma);
+ 		}
+ 	}
+ 	return new_vma;
+@@ -2476,10 +2511,10 @@ int install_special_mapping(struct mm_struct *mm,
+ 	ret = insert_vm_struct(mm, vma);
+ 	if (ret)
+ 		goto out;
+-
+ 	mm->total_vm += len >> PAGE_SHIFT;
+ 
+ 	perf_event_mmap(vma);
++	uksm_vma_add_new(vma);
+ 
+ 	return 0;
+ 
+diff --git a/mm/rmap.c b/mm/rmap.c
+index c8454e0..90a50d2 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1017,9 +1017,9 @@ void page_move_anon_rmap(struct page *page,
+ 
+ /**
+  * __page_set_anon_rmap - set up new anonymous rmap
+- * @page:	Page to add to rmap	
++ * @page:	Page to add to rmap
+  * @vma:	VM area to add page to.
+- * @address:	User virtual address of the mapping	
++ * @address:	User virtual address of the mapping
+  * @exclusive:	the page is exclusively owned by the current process
+  */
+ static void __page_set_anon_rmap(struct page *page,
+diff --git a/mm/uksm.c b/mm/uksm.c
+new file mode 100644
+index 0000000..967c755
+--- /dev/null
++++ b/mm/uksm.c
+@@ -0,0 +1,5616 @@
++/*
++ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia
++ *
++ * This is an improvement upon KSM. Some basic data structures and routines
++ * are borrowed from ksm.c .
++ *
++ * Its new features:
++ * 1. Full system scan:
++ *      It automatically scans all user processes' anonymous VMAs. Kernel-user
++ *      interaction to submit a memory area to KSM is no longer needed.
++ *
++ * 2. Rich area detection:
++ *      It automatically detects rich areas containing abundant duplicated
++ *      pages based. Rich areas are given a full scan speed. Poor areas are
++ *      sampled at a reasonable speed with very low CPU consumption.
++ *
++ * 3. Ultra Per-page scan speed improvement:
++ *      A new hash algorithm is proposed. As a result, on a machine with
++ *      Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it
++ *      can scan memory areas that does not contain duplicated pages at speed of
++ *      627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of
++ *      477MB/sec ~ 923MB/sec.
++ *
++ * 4. Thrashing area avoidance:
++ *      Thrashing area(an VMA that has frequent Ksm page break-out) can be
++ *      filtered out. My benchmark shows it's more efficient than KSM's per-page
++ *      hash value based volatile page detection.
++ *
++ *
++ * 5. Misc changes upon KSM:
++ *      * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page
++ *        comparison. It's much faster than default C version on x86.
++ *      * rmap_item now has an struct *page member to loosely cache a
++ *        address-->page mapping, which reduces too much time-costly
++ *        follow_page().
++ *      * The VMA creation/exit procedures are hooked to let the Ultra KSM know.
++ *      * try_to_merge_two_pages() now can revert a pte if it fails. No break_
++ *        ksm is needed for this case.
++ *
++ * 6. Full Zero Page consideration(contributed by Figo Zhang)
++ *    Now uksmd consider full zero pages as special pages and merge them to an
++ *    special unswappable uksm zero page.
++ */
++
++#include <linux/errno.h>
++#include <linux/mm.h>
++#include <linux/fs.h>
++#include <linux/mman.h>
++#include <linux/sched.h>
++#include <linux/rwsem.h>
++#include <linux/pagemap.h>
++#include <linux/rmap.h>
++#include <linux/spinlock.h>
++#include <linux/jhash.h>
++#include <linux/delay.h>
++#include <linux/kthread.h>
++#include <linux/wait.h>
++#include <linux/slab.h>
++#include <linux/rbtree.h>
++#include <linux/memory.h>
++#include <linux/mmu_notifier.h>
++#include <linux/swap.h>
++#include <linux/ksm.h>
++#include <linux/crypto.h>
++#include <linux/scatterlist.h>
++#include <crypto/hash.h>
++#include <linux/random.h>
++#include <linux/math64.h>
++#include <linux/gcd.h>
++#include <linux/freezer.h>
++#include <linux/sradix-tree.h>
++
++#include <asm/tlbflush.h>
++#include "internal.h"
++
++#ifdef CONFIG_X86
++#undef memcmp
++
++#ifdef CONFIG_X86_32
++#define memcmp memcmpx86_32
++/*
++ * Compare 4-byte-aligned address s1 and s2, with length n
++ */
++int memcmpx86_32(void *s1, void *s2, size_t n)
++{
++	size_t num = n / 4;
++	register int res;
++
++	__asm__ __volatile__
++	(
++	 "testl %3,%3\n\t"
++	 "repe; cmpsd\n\t"
++	 "je        1f\n\t"
++	 "sbbl      %0,%0\n\t"
++	 "orl       $1,%0\n"
++	 "1:"
++	 : "=&a" (res), "+&S" (s1), "+&D" (s2), "+&c" (num)
++	 : "0" (0)
++	 : "cc");
++
++	return res;
++}
++
++/*
++ * Check the page is all zero ?
++ */
++static int is_full_zero(const void *s1, size_t len)
++{
++	unsigned char same;
++
++	len /= 4;
++
++	__asm__ __volatile__
++	("repe; scasl;"
++	 "sete %0"
++	 : "=qm" (same), "+D" (s1), "+c" (len)
++	 : "a" (0)
++	 : "cc");
++
++	return same;
++}
++
++
++#elif defined(CONFIG_X86_64)
++#define memcmp memcmpx86_64
++/*
++ * Compare 8-byte-aligned address s1 and s2, with length n
++ */
++int memcmpx86_64(void *s1, void *s2, size_t n)
++{
++	size_t num = n / 8;
++	register int res;
++
++	__asm__ __volatile__
++	(
++	 "testq %q3,%q3\n\t"
++	 "repe; cmpsq\n\t"
++	 "je        1f\n\t"
++	 "sbbq      %q0,%q0\n\t"
++	 "orq       $1,%q0\n"
++	 "1:"
++	 : "=&a" (res), "+&S" (s1), "+&D" (s2), "+&c" (num)
++	 : "0" (0)
++	 : "cc");
++
++	return res;
++}
++
++static int is_full_zero(const void *s1, size_t len)
++{
++	unsigned char same;
++
++	len /= 8;
++
++	__asm__ __volatile__
++	("repe; scasq;"
++	 "sete %0"
++	 : "=qm" (same), "+D" (s1), "+c" (len)
++	 : "a" (0)
++	 : "cc");
++
++	return same;
++}
++
++#endif
++#else
++static int is_full_zero(const void *s1, size_t len)
++{
++	unsigned long *src = s1;
++	int i;
++
++	len /= sizeof(*src);
++
++	for (i = 0; i < len; i++) {
++		if (src[i])
++			return 0;
++	}
++
++	return 1;
++}
++#endif
++
++#define U64_MAX		(~((u64)0))
++#define UKSM_RUNG_ROUND_FINISHED  (1 << 0)
++#define TIME_RATIO_SCALE	10000
++
++#define SLOT_TREE_NODE_SHIFT	8
++#define SLOT_TREE_NODE_STORE_SIZE	(1UL << SLOT_TREE_NODE_SHIFT)
++struct slot_tree_node {
++	unsigned long size;
++	struct sradix_tree_node snode;
++	void *stores[SLOT_TREE_NODE_STORE_SIZE];
++};
++
++static struct kmem_cache *slot_tree_node_cachep;
++
++static struct sradix_tree_node *slot_tree_node_alloc(void)
++{
++	struct slot_tree_node *p;
++	p = kmem_cache_zalloc(slot_tree_node_cachep, GFP_KERNEL);
++	if (!p)
++		return NULL;
++
++	return &p->snode;
++}
++
++static void slot_tree_node_free(struct sradix_tree_node *node)
++{
++	struct slot_tree_node *p;
++
++	p = container_of(node, struct slot_tree_node, snode);
++	kmem_cache_free(slot_tree_node_cachep, p);
++}
++
++static void slot_tree_node_extend(struct sradix_tree_node *parent,
++				  struct sradix_tree_node *child)
++{
++	struct slot_tree_node *p, *c;
++
++	p = container_of(parent, struct slot_tree_node, snode);
++	c = container_of(child, struct slot_tree_node, snode);
++
++	p->size += c->size;
++}
++
++void slot_tree_node_assign(struct sradix_tree_node *node,
++			   unsigned index, void *item)
++{
++	struct vma_slot *slot = item;
++	struct slot_tree_node *cur;
++
++	slot->snode = node;
++	slot->sindex = index;
++
++	while (node) {
++		cur = container_of(node, struct slot_tree_node, snode);
++		cur->size += slot->pages;
++		node = node->parent;
++	}
++}
++
++void slot_tree_node_rm(struct sradix_tree_node *node, unsigned offset)
++{
++	struct vma_slot *slot;
++	struct slot_tree_node *cur;
++	unsigned long pages;
++
++	if (node->height == 1) {
++		slot = node->stores[offset];
++		pages = slot->pages;
++	} else {
++		cur = container_of(node->stores[offset],
++				   struct slot_tree_node, snode);
++		pages = cur->size;
++	}
++
++	while (node) {
++		cur = container_of(node, struct slot_tree_node, snode);
++		cur->size -= pages;
++		node = node->parent;
++	}
++}
++
++unsigned long slot_iter_index;
++int slot_iter(void *item,  unsigned long height)
++{
++	struct slot_tree_node *node;
++	struct vma_slot *slot;
++
++	if (height == 1) {
++		slot = item;
++		if (slot_iter_index < slot->pages) {
++			/*in this one*/
++			return 1;
++		} else {
++			slot_iter_index -= slot->pages;
++			return 0;
++		}
++
++	} else {
++		node = container_of(item, struct slot_tree_node, snode);
++		if (slot_iter_index < node->size) {
++			/*in this one*/
++			return 1;
++		} else {
++			slot_iter_index -= node->size;
++			return 0;
++		}
++	}
++}
++
++
++static inline void slot_tree_init_root(struct sradix_tree_root *root)
++{
++	init_sradix_tree_root(root, SLOT_TREE_NODE_SHIFT);
++	root->alloc = slot_tree_node_alloc;
++	root->free = slot_tree_node_free;
++	root->extend = slot_tree_node_extend;
++	root->assign = slot_tree_node_assign;
++	root->rm = slot_tree_node_rm;
++}
++
++void slot_tree_init(void)
++{
++	slot_tree_node_cachep = kmem_cache_create("slot_tree_node",
++				sizeof(struct slot_tree_node), 0,
++				SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
++				NULL);
++}
++
++
++/* Each rung of this ladder is a list of VMAs having a same scan ratio */
++struct scan_rung {
++	//struct list_head scanned_list;
++	struct sradix_tree_root vma_root;
++	struct sradix_tree_root vma_root2;
++
++	struct vma_slot *current_scan;
++	unsigned long current_offset;
++
++	/*
++	 * The initial value for current_offset, it should loop over
++	 * [0~ step - 1] to let all slot have its chance to be scanned.
++	 */
++	unsigned long offset_init;
++	unsigned long step; /* dynamic step for current_offset */
++	unsigned int flags;
++	unsigned long pages_to_scan;
++	//unsigned long fully_scanned_slots;
++	/*
++	 * a little bit tricky - if cpu_time_ratio > 0, then the value is the
++	 * the cpu time ratio it can spend in rung_i for every scan
++	 * period. if < 0, then it is the cpu time ratio relative to the
++	 * max cpu percentage user specified. Both in unit of
++	 * 1/TIME_RATIO_SCALE
++	 */
++	int cpu_ratio;
++
++	/*
++	 * How long it will take for all slots in this rung to be fully
++	 * scanned? If it's zero, we don't care about the cover time:
++	 * it's fully scanned.
++	 */
++	unsigned int cover_msecs;
++	//unsigned long vma_num;
++	//unsigned long pages; /* Sum of all slot's pages in rung */
++};
++
++/**
++ * node of either the stable or unstale rbtree
++ *
++ */
++struct tree_node {
++	struct rb_node node; /* link in the main (un)stable rbtree */
++	struct rb_root sub_root; /* rb_root for sublevel collision rbtree */
++	u32 hash;
++	unsigned long count; /* TODO: merged with sub_root */
++	struct list_head all_list; /* all tree nodes in stable/unstable tree */
++};
++
++/**
++ * struct stable_node - node of the stable rbtree
++ * @node: rb node of this ksm page in the stable tree
++ * @hlist: hlist head of rmap_items using this ksm page
++ * @kpfn: page frame number of this ksm page
++ */
++struct stable_node {
++	struct rb_node node; /* link in sub-rbtree */
++	struct tree_node *tree_node; /* it's tree node root in stable tree, NULL if it's in hell list */
++	struct hlist_head hlist;
++	unsigned long kpfn;
++	u32 hash_max; /* if ==0 then it's not been calculated yet */
++	struct list_head all_list; /* in a list for all stable nodes */
++};
++
++/**
++ * struct node_vma - group rmap_items linked in a same stable
++ * node together.
++ */
++struct node_vma {
++	union {
++		struct vma_slot *slot;
++		unsigned long key;  /* slot is used as key sorted on hlist */
++	};
++	struct hlist_node hlist;
++	struct hlist_head rmap_hlist;
++	struct stable_node *head;
++};
++
++/**
++ * struct rmap_item - reverse mapping item for virtual addresses
++ * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
++ * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
++ * @mm: the memory structure this rmap_item is pointing into
++ * @address: the virtual address this rmap_item tracks (+ flags in low bits)
++ * @node: rb node of this rmap_item in the unstable tree
++ * @head: pointer to stable_node heading this list in the stable tree
++ * @hlist: link into hlist of rmap_items hanging off that stable_node
++ */
++struct rmap_item {
++	struct vma_slot *slot;
++	struct page *page;
++	unsigned long address;	/* + low bits used for flags below */
++	unsigned long hash_round;
++	unsigned long entry_index;
++	union {
++		struct {/* when in unstable tree */
++			struct rb_node node;
++			struct tree_node *tree_node;
++			u32 hash_max;
++		};
++		struct { /* when in stable tree */
++			struct node_vma *head;
++			struct hlist_node hlist;
++			struct anon_vma *anon_vma;
++		};
++	};
++} __attribute__((aligned(4)));
++
++struct rmap_list_entry {
++	union {
++		struct rmap_item *item;
++		unsigned long addr;
++	};
++	/* lowest bit is used for is_addr tag */
++} __attribute__((aligned(4))); /* 4 aligned to fit in to pages*/
++
++
++/* Basic data structure definition ends */
++
++
++/*
++ * Flags for rmap_item to judge if it's listed in the stable/unstable tree.
++ * The flags use the low bits of rmap_item.address
++ */
++#define UNSTABLE_FLAG	0x1
++#define STABLE_FLAG	0x2
++#define get_rmap_addr(x)	((x)->address & PAGE_MASK)
++
++/*
++ * rmap_list_entry helpers
++ */
++#define IS_ADDR_FLAG	1
++#define is_addr(ptr)		((unsigned long)(ptr) & IS_ADDR_FLAG)
++#define set_is_addr(ptr)	((ptr) |= IS_ADDR_FLAG)
++#define get_clean_addr(ptr)	(((ptr) & ~(__typeof__(ptr))IS_ADDR_FLAG))
++
++
++/*
++ * High speed caches for frequently allocated and freed structs
++ */
++static struct kmem_cache *rmap_item_cache;
++static struct kmem_cache *stable_node_cache;
++static struct kmem_cache *node_vma_cache;
++static struct kmem_cache *vma_slot_cache;
++static struct kmem_cache *tree_node_cache;
++#define UKSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("uksm_"#__struct,\
++		sizeof(struct __struct), __alignof__(struct __struct),\
++		(__flags), NULL)
++
++/* Array of all scan_rung, uksm_scan_ladder[0] having the minimum scan ratio */
++#define SCAN_LADDER_SIZE 4
++static struct scan_rung uksm_scan_ladder[SCAN_LADDER_SIZE];
++
++/* The evaluation rounds uksmd has finished */
++static unsigned long long uksm_eval_round = 1;
++
++/*
++ * we add 1 to this var when we consider we should rebuild the whole
++ * unstable tree.
++ */
++static unsigned long uksm_hash_round = 1;
++
++/*
++ * How many times the whole memory is scanned.
++ */
++static unsigned long long fully_scanned_round = 1;
++
++/* The total number of virtual pages of all vma slots */
++static u64 uksm_pages_total;
++
++/* The number of pages has been scanned since the start up */
++static u64 uksm_pages_scanned;
++
++static u64 scanned_virtual_pages;
++
++/* The number of pages has been scanned since last encode_benefit call */
++static u64 uksm_pages_scanned_last;
++
++/* If the scanned number is tooo large, we encode it here */
++static u64 pages_scanned_stored;
++
++static unsigned long pages_scanned_base;
++
++/* The number of nodes in the stable tree */
++static unsigned long uksm_pages_shared;
++
++/* The number of page slots additionally sharing those nodes */
++static unsigned long uksm_pages_sharing;
++
++/* The number of nodes in the unstable tree */
++static unsigned long uksm_pages_unshared;
++
++/*
++ * Milliseconds ksmd should sleep between scans,
++ * >= 100ms to be consistent with
++ * scan_time_to_sleep_msec()
++ */
++static unsigned int uksm_sleep_jiffies;
++
++/* The real value for the uksmd next sleep */
++static unsigned int uksm_sleep_real;
++
++/* Saved value for user input uksm_sleep_jiffies when it's enlarged */
++static unsigned int uksm_sleep_saved;
++
++/* Max percentage of cpu utilization ksmd can take to scan in one batch */
++static unsigned int uksm_max_cpu_percentage;
++
++static int uksm_cpu_governor;
++
++static char *uksm_cpu_governor_str[4] = { "full", "medium", "low", "quiet" };
++
++struct uksm_cpu_preset_s {
++	int cpu_ratio[SCAN_LADDER_SIZE];
++	unsigned int cover_msecs[SCAN_LADDER_SIZE];
++	unsigned int max_cpu; /* percentage */
++};
++
++struct uksm_cpu_preset_s uksm_cpu_preset[4] = {
++	{ {20, 40, -2500, -10000}, {1000, 500, 200, 50}, 95},
++	{ {20, 30, -2500, -10000}, {1000, 500, 400, 100}, 50},
++	{ {10, 20, -5000, -10000}, {1500, 1000, 1000, 250}, 20},
++	{ {10, 20, 40, 75}, {2000, 1000, 1000, 1000}, 1},
++};
++
++/* The default value for uksm_ema_page_time if it's not initialized */
++#define UKSM_PAGE_TIME_DEFAULT	500
++
++/*cost to scan one page by expotional moving average in nsecs */
++static unsigned long uksm_ema_page_time = UKSM_PAGE_TIME_DEFAULT;
++
++/* The expotional moving average alpha weight, in percentage. */
++#define EMA_ALPHA	20
++
++/*
++ * The threshold used to filter out thrashing areas,
++ * If it == 0, filtering is disabled, otherwise it's the percentage up-bound
++ * of the thrashing ratio of all areas. Any area with a bigger thrashing ratio
++ * will be considered as having a zero duplication ratio.
++ */
++static unsigned int uksm_thrash_threshold = 50;
++
++/* How much dedup ratio is considered to be abundant*/
++static unsigned int uksm_abundant_threshold = 10;
++
++/* All slots having merged pages in this eval round. */
++struct list_head vma_slot_dedup = LIST_HEAD_INIT(vma_slot_dedup);
++
++/* How many times the ksmd has slept since startup */
++static unsigned long long uksm_sleep_times;
++
++#define UKSM_RUN_STOP	0
++#define UKSM_RUN_MERGE	1
++static unsigned int uksm_run = 1;
++
++static DECLARE_WAIT_QUEUE_HEAD(uksm_thread_wait);
++static DEFINE_MUTEX(uksm_thread_mutex);
++
++/*
++ * List vma_slot_new is for newly created vma_slot waiting to be added by
++ * ksmd. If one cannot be added(e.g. due to it's too small), it's moved to
++ * vma_slot_noadd. vma_slot_del is the list for vma_slot whose corresponding
++ * VMA has been removed/freed.
++ */
++struct list_head vma_slot_new = LIST_HEAD_INIT(vma_slot_new);
++struct list_head vma_slot_noadd = LIST_HEAD_INIT(vma_slot_noadd);
++struct list_head vma_slot_del = LIST_HEAD_INIT(vma_slot_del);
++static DEFINE_SPINLOCK(vma_slot_list_lock);
++
++/* The unstable tree heads */
++static struct rb_root root_unstable_tree = RB_ROOT;
++
++/*
++ * All tree_nodes are in a list to be freed at once when unstable tree is
++ * freed after each scan round.
++ */
++static struct list_head unstable_tree_node_list =
++				LIST_HEAD_INIT(unstable_tree_node_list);
++
++/* List contains all stable nodes */
++static struct list_head stable_node_list = LIST_HEAD_INIT(stable_node_list);
++
++/*
++ * When the hash strength is changed, the stable tree must be delta_hashed and
++ * re-structured. We use two set of below structs to speed up the
++ * re-structuring of stable tree.
++ */
++static struct list_head
++stable_tree_node_list[2] = {LIST_HEAD_INIT(stable_tree_node_list[0]),
++			    LIST_HEAD_INIT(stable_tree_node_list[1])};
++
++static struct list_head *stable_tree_node_listp = &stable_tree_node_list[0];
++static struct rb_root root_stable_tree[2] = {RB_ROOT, RB_ROOT};
++static struct rb_root *root_stable_treep = &root_stable_tree[0];
++static unsigned long stable_tree_index;
++
++/* The hash strength needed to hash a full page */
++#define HASH_STRENGTH_FULL		(PAGE_SIZE / sizeof(u32))
++
++/* The hash strength needed for loop-back hashing */
++#define HASH_STRENGTH_MAX		(HASH_STRENGTH_FULL + 10)
++
++/* The random offsets in a page */
++static u32 *random_nums;
++
++/* The hash strength */
++static unsigned long hash_strength = HASH_STRENGTH_FULL >> 4;
++
++/* The delta value each time the hash strength increases or decreases */
++static unsigned long hash_strength_delta;
++#define HASH_STRENGTH_DELTA_MAX	5
++
++/* The time we have saved due to random_sample_hash */
++static u64 rshash_pos;
++
++/* The time we have wasted due to hash collision */
++static u64 rshash_neg;
++
++struct uksm_benefit {
++	u64 pos;
++	u64 neg;
++	u64 scanned;
++	unsigned long base;
++} benefit;
++
++/*
++ * The relative cost of memcmp, compared to 1 time unit of random sample
++ * hash, this value is tested when ksm module is initialized
++ */
++static unsigned long memcmp_cost;
++
++static unsigned long  rshash_neg_cont_zero;
++static unsigned long  rshash_cont_obscure;
++
++/* The possible states of hash strength adjustment heuristic */
++enum rshash_states {
++		RSHASH_STILL,
++		RSHASH_TRYUP,
++		RSHASH_TRYDOWN,
++		RSHASH_NEW,
++		RSHASH_PRE_STILL,
++};
++
++/* The possible direction we are about to adjust hash strength */
++enum rshash_direct {
++	GO_UP,
++	GO_DOWN,
++	OBSCURE,
++	STILL,
++};
++
++/* random sampling hash state machine */
++static struct {
++	enum rshash_states state;
++	enum rshash_direct pre_direct;
++	u8 below_count;
++	/* Keep a lookup window of size 5, iff above_count/below_count > 3
++	 * in this window we stop trying.
++	 */
++	u8 lookup_window_index;
++	u64 stable_benefit;
++	unsigned long turn_point_down;
++	unsigned long turn_benefit_down;
++	unsigned long turn_point_up;
++	unsigned long turn_benefit_up;
++	unsigned long stable_point;
++} rshash_state;
++
++/*zero page hash table, hash_strength [0 ~ HASH_STRENGTH_MAX]*/
++static u32 *zero_hash_table;
++
++static inline struct node_vma *alloc_node_vma(void)
++{
++	struct node_vma *node_vma;
++	node_vma = kmem_cache_zalloc(node_vma_cache, GFP_KERNEL);
++	if (node_vma) {
++		INIT_HLIST_HEAD(&node_vma->rmap_hlist);
++		INIT_HLIST_NODE(&node_vma->hlist);
++	}
++	return node_vma;
++}
++
++static inline void free_node_vma(struct node_vma *node_vma)
++{
++	kmem_cache_free(node_vma_cache, node_vma);
++}
++
++
++static inline struct vma_slot *alloc_vma_slot(void)
++{
++	struct vma_slot *slot;
++
++	/*
++	 * In case ksm is not initialized by now.
++	 * Oops, we need to consider the call site of uksm_init() in the future.
++	 */
++	if (!vma_slot_cache)
++		return NULL;
++
++	slot = kmem_cache_zalloc(vma_slot_cache, GFP_KERNEL);
++	if (slot) {
++		INIT_LIST_HEAD(&slot->slot_list);
++		INIT_LIST_HEAD(&slot->dedup_list);
++		slot->flags |= UKSM_SLOT_NEED_RERAND;
++	}
++	return slot;
++}
++
++static inline void free_vma_slot(struct vma_slot *vma_slot)
++{
++	kmem_cache_free(vma_slot_cache, vma_slot);
++}
++
++
++
++static inline struct rmap_item *alloc_rmap_item(void)
++{
++	struct rmap_item *rmap_item;
++
++	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
++	if (rmap_item) {
++		/* bug on lowest bit is not clear for flag use */
++		BUG_ON(is_addr(rmap_item));
++	}
++	return rmap_item;
++}
++
++static inline void free_rmap_item(struct rmap_item *rmap_item)
++{
++	rmap_item->slot = NULL;	/* debug safety */
++	kmem_cache_free(rmap_item_cache, rmap_item);
++}
++
++static inline struct stable_node *alloc_stable_node(void)
++{
++	struct stable_node *node;
++	node = kmem_cache_alloc(stable_node_cache, GFP_KERNEL | GFP_ATOMIC);
++	if (!node)
++		return NULL;
++
++	INIT_HLIST_HEAD(&node->hlist);
++	list_add(&node->all_list, &stable_node_list);
++	return node;
++}
++
++static inline void free_stable_node(struct stable_node *stable_node)
++{
++	list_del(&stable_node->all_list);
++	kmem_cache_free(stable_node_cache, stable_node);
++}
++
++static inline struct tree_node *alloc_tree_node(struct list_head *list)
++{
++	struct tree_node *node;
++	node = kmem_cache_zalloc(tree_node_cache, GFP_KERNEL | GFP_ATOMIC);
++	if (!node)
++		return NULL;
++
++	list_add(&node->all_list, list);
++	return node;
++}
++
++static inline void free_tree_node(struct tree_node *node)
++{
++	list_del(&node->all_list);
++	kmem_cache_free(tree_node_cache, node);
++}
++
++static void uksm_drop_anon_vma(struct rmap_item *rmap_item)
++{
++	struct anon_vma *anon_vma = rmap_item->anon_vma;
++
++	put_anon_vma(anon_vma);
++}
++
++
++/**
++ * Remove a stable node from stable_tree, may unlink from its tree_node and
++ * may remove its parent tree_node if no other stable node is pending.
++ *
++ * @stable_node 	The node need to be removed
++ * @unlink_rb 		Will this node be unlinked from the rbtree?
++ * @remove_tree_	node Will its tree_node be removed if empty?
++ */
++static void remove_node_from_stable_tree(struct stable_node *stable_node,
++					 int unlink_rb,  int remove_tree_node)
++{
++	struct node_vma *node_vma;
++	struct rmap_item *rmap_item;
++	struct hlist_node *hlist, *rmap_hlist, *n;
++
++	if (!hlist_empty(&stable_node->hlist)) {
++		hlist_for_each_entry_safe(node_vma, hlist, n,
++					  &stable_node->hlist, hlist) {
++			hlist_for_each_entry(rmap_item, rmap_hlist,
++					     &node_vma->rmap_hlist, hlist) {
++				uksm_pages_sharing--;
++
++				uksm_drop_anon_vma(rmap_item);
++				rmap_item->address &= PAGE_MASK;
++			}
++			free_node_vma(node_vma);
++			cond_resched();
++		}
++
++		/* the last one is counted as shared */
++		uksm_pages_shared--;
++		uksm_pages_sharing++;
++	}
++
++	if (stable_node->tree_node && unlink_rb) {
++		rb_erase(&stable_node->node,
++			 &stable_node->tree_node->sub_root);
++
++		if (RB_EMPTY_ROOT(&stable_node->tree_node->sub_root) &&
++		    remove_tree_node) {
++			rb_erase(&stable_node->tree_node->node,
++				 root_stable_treep);
++			free_tree_node(stable_node->tree_node);
++		} else {
++			stable_node->tree_node->count--;
++		}
++	}
++
++	free_stable_node(stable_node);
++}
++
++
++/*
++ * get_uksm_page: checks if the page indicated by the stable node
++ * is still its ksm page, despite having held no reference to it.
++ * In which case we can trust the content of the page, and it
++ * returns the gotten page; but if the page has now been zapped,
++ * remove the stale node from the stable tree and return NULL.
++ *
++ * You would expect the stable_node to hold a reference to the ksm page.
++ * But if it increments the page's count, swapping out has to wait for
++ * ksmd to come around again before it can free the page, which may take
++ * seconds or even minutes: much too unresponsive.  So instead we use a
++ * "keyhole reference": access to the ksm page from the stable node peeps
++ * out through its keyhole to see if that page still holds the right key,
++ * pointing back to this stable node.  This relies on freeing a PageAnon
++ * page to reset its page->mapping to NULL, and relies on no other use of
++ * a page to put something that might look like our key in page->mapping.
++ *
++ * include/linux/pagemap.h page_cache_get_speculative() is a good reference,
++ * but this is different - made simpler by uksm_thread_mutex being held, but
++ * interesting for assuming that no other use of the struct page could ever
++ * put our expected_mapping into page->mapping (or a field of the union which
++ * coincides with page->mapping).  The RCU calls are not for KSM at all, but
++ * to keep the page_count protocol described with page_cache_get_speculative.
++ *
++ * Note: it is possible that get_uksm_page() will return NULL one moment,
++ * then page the next, if the page is in between page_freeze_refs() and
++ * page_unfreeze_refs(): this shouldn't be a problem anywhere, the page
++ * is on its way to being freed; but it is an anomaly to bear in mind.
++ *
++ * @unlink_rb: 		if the removal of this node will firstly unlink from
++ * its rbtree. stable_node_reinsert will prevent this when restructuring the
++ * node from its old tree.
++ *
++ * @remove_tree_node:	if this is the last one of its tree_node, will the
++ * tree_node be freed ? If we are inserting stable node, this tree_node may
++ * be reused, so don't free it.
++ */
++static struct page *get_uksm_page(struct stable_node *stable_node,
++				 int unlink_rb, int remove_tree_node)
++{
++	struct page *page;
++	void *expected_mapping;
++
++	page = pfn_to_page(stable_node->kpfn);
++	expected_mapping = (void *)stable_node +
++				(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
++	rcu_read_lock();
++	if (page->mapping != expected_mapping)
++		goto stale;
++	if (!get_page_unless_zero(page))
++		goto stale;
++	if (page->mapping != expected_mapping) {
++		put_page(page);
++		goto stale;
++	}
++	rcu_read_unlock();
++	return page;
++stale:
++	rcu_read_unlock();
++	remove_node_from_stable_tree(stable_node, unlink_rb, remove_tree_node);
++
++	return NULL;
++}
++
++/*
++ * Removing rmap_item from stable or unstable tree.
++ * This function will clean the information from the stable/unstable tree.
++ */
++static inline void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
++{
++	if (rmap_item->address & STABLE_FLAG) {
++		struct stable_node *stable_node;
++		struct node_vma *node_vma;
++		struct page *page;
++
++		node_vma = rmap_item->head;
++		stable_node = node_vma->head;
++		page = get_uksm_page(stable_node, 1, 1);
++		if (!page)
++			goto out;
++
++		/*
++		 * page lock is needed because it's racing with
++		 * try_to_unmap_ksm(), etc.
++		 */
++		lock_page(page);
++		hlist_del(&rmap_item->hlist);
++
++		if (hlist_empty(&node_vma->rmap_hlist)) {
++			hlist_del(&node_vma->hlist);
++			free_node_vma(node_vma);
++		}
++		unlock_page(page);
++
++		put_page(page);
++		if (hlist_empty(&stable_node->hlist)) {
++			/* do NOT call remove_node_from_stable_tree() here,
++			 * it's possible for a forked rmap_item not in
++			 * stable tree while the in-tree rmap_items were
++			 * deleted.
++			 */
++			uksm_pages_shared--;
++		} else
++			uksm_pages_sharing--;
++
++
++		uksm_drop_anon_vma(rmap_item);
++	} else if (rmap_item->address & UNSTABLE_FLAG) {
++		if (rmap_item->hash_round == uksm_hash_round) {
++
++			rb_erase(&rmap_item->node,
++				 &rmap_item->tree_node->sub_root);
++			if (RB_EMPTY_ROOT(&rmap_item->tree_node->sub_root)) {
++				rb_erase(&rmap_item->tree_node->node,
++					 &root_unstable_tree);
++
++				free_tree_node(rmap_item->tree_node);
++			} else
++				rmap_item->tree_node->count--;
++		}
++		uksm_pages_unshared--;
++	}
++
++	rmap_item->address &= PAGE_MASK;
++	rmap_item->hash_max = 0;
++
++out:
++	cond_resched();		/* we're called from many long loops */
++}
++
++static inline int slot_in_uksm(struct vma_slot *slot)
++{
++	return list_empty(&slot->slot_list);
++}
++
++/*
++ * Test if the mm is exiting
++ */
++static inline bool uksm_test_exit(struct mm_struct *mm)
++{
++	return atomic_read(&mm->mm_users) == 0;
++}
++
++/**
++ * Need to do two things:
++ * 1. check if slot was moved to del list
++ * 2. make sure the mmap_sem is manipulated under valid vma.
++ *
++ * My concern here is that in some cases, this may make
++ * vma_slot_list_lock() waiters to serialized further by some
++ * sem->wait_lock, can this really be expensive?
++ *
++ *
++ * @return
++ * 0: if successfully locked mmap_sem
++ * -ENOENT: this slot was moved to del list
++ * -EBUSY: vma lock failed
++ */
++static int try_down_read_slot_mmap_sem(struct vma_slot *slot)
++{
++	struct vm_area_struct *vma;
++	struct mm_struct *mm;
++	struct rw_semaphore *sem;
++
++	spin_lock(&vma_slot_list_lock);
++
++	/* the slot_list was removed and inited from new list, when it enters
++	 * uksm_list. If now it's not empty, then it must be moved to del list
++	 */
++	if (!slot_in_uksm(slot)) {
++		spin_unlock(&vma_slot_list_lock);
++		return -ENOENT;
++	}
++
++	BUG_ON(slot->pages != vma_pages(slot->vma));
++	/* Ok, vma still valid */
++	vma = slot->vma;
++	mm = vma->vm_mm;
++	sem = &mm->mmap_sem;
++
++	if (uksm_test_exit(mm)) {
++		spin_unlock(&vma_slot_list_lock);
++		return -ENOENT;
++	}
++
++	if (down_read_trylock(sem)) {
++		spin_unlock(&vma_slot_list_lock);
++		return 0;
++	}
++
++	spin_unlock(&vma_slot_list_lock);
++	return -EBUSY;
++}
++
++static inline unsigned long
++vma_page_address(struct page *page, struct vm_area_struct *vma)
++{
++	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
++	unsigned long address;
++
++	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
++	if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
++		/* page should be within @vma mapping range */
++		return -EFAULT;
++	}
++	return address;
++}
++
++
++/* return 0 on success with the item's mmap_sem locked */
++static inline int get_mergeable_page_lock_mmap(struct rmap_item *item)
++{
++	struct mm_struct *mm;
++	struct vma_slot *slot = item->slot;
++	int err = -EINVAL;
++
++	struct page *page;
++
++	/*
++	 * try_down_read_slot_mmap_sem() returns non-zero if the slot
++	 * has been removed by uksm_remove_vma().
++	 */
++	if (try_down_read_slot_mmap_sem(slot))
++		return -EBUSY;
++
++	mm = slot->vma->vm_mm;
++
++	if (uksm_test_exit(mm))
++		goto failout_up;
++
++	page = item->page;
++	rcu_read_lock();
++	if (!get_page_unless_zero(page)) {
++		rcu_read_unlock();
++		goto failout_up;
++	}
++
++	/* No need to consider huge page here. */
++	if (item->slot->vma->anon_vma != page_anon_vma(page) ||
++	    vma_page_address(page, item->slot->vma) != get_rmap_addr(item)) {
++		/*
++		 * TODO:
++		 * should we release this item becase of its stale page
++		 * mapping?
++		 */
++		put_page(page);
++		rcu_read_unlock();
++		goto failout_up;
++	}
++	rcu_read_unlock();
++	return 0;
++
++failout_up:
++	up_read(&mm->mmap_sem);
++	return err;
++}
++
++/*
++ * What kind of VMA is considered ?
++ */
++static inline int vma_can_enter(struct vm_area_struct *vma)
++{
++	return uksm_flags_can_scan(vma->vm_flags);
++}
++
++/*
++ * Called whenever a fresh new vma is created A new vma_slot.
++ * is created and inserted into a global list Must be called.
++ * after vma is inserted to its mm      		    .
++ */
++void uksm_vma_add_new(struct vm_area_struct *vma)
++{
++	struct vma_slot *slot;
++
++	if (!vma_can_enter(vma)) {
++		vma->uksm_vma_slot = NULL;
++		return;
++	}
++
++	slot = alloc_vma_slot();
++	if (!slot) {
++		vma->uksm_vma_slot = NULL;
++		return;
++	}
++
++	vma->uksm_vma_slot = slot;
++	vma->vm_flags |= VM_MERGEABLE;
++	slot->vma = vma;
++	slot->mm = vma->vm_mm;
++	slot->ctime_j = jiffies;
++	slot->pages = vma_pages(vma);
++	spin_lock(&vma_slot_list_lock);
++	list_add_tail(&slot->slot_list, &vma_slot_new);
++	spin_unlock(&vma_slot_list_lock);
++}
++
++/*
++ * Called after vma is unlinked from its mm
++ */
++void uksm_remove_vma(struct vm_area_struct *vma)
++{
++	struct vma_slot *slot;
++
++	if (!vma->uksm_vma_slot)
++		return;
++
++	slot = vma->uksm_vma_slot;
++	spin_lock(&vma_slot_list_lock);
++	if (slot_in_uksm(slot)) {
++		/**
++		 * This slot has been added by ksmd, so move to the del list
++		 * waiting ksmd to free it.
++		 */
++		list_add_tail(&slot->slot_list, &vma_slot_del);
++	} else {
++		/**
++		 * It's still on new list. It's ok to free slot directly.
++		 */
++		list_del(&slot->slot_list);
++		free_vma_slot(slot);
++	}
++	spin_unlock(&vma_slot_list_lock);
++	vma->uksm_vma_slot = NULL;
++}
++
++/*   32/3 < they < 32/2 */
++#define shiftl	8
++#define shiftr	12
++
++#define HASH_FROM_TO(from, to) 				\
++for (index = from; index < to; index++) {		\
++	pos = random_nums[index];			\
++	hash += key[pos];				\
++	hash += (hash << shiftl);			\
++	hash ^= (hash >> shiftr);			\
++}
++
++
++#define HASH_FROM_DOWN_TO(from, to) 			\
++for (index = from - 1; index >= to; index--) {		\
++	hash ^= (hash >> shiftr);			\
++	hash ^= (hash >> (shiftr*2));			\
++	hash -= (hash << shiftl);			\
++	hash += (hash << (shiftl*2));			\
++	pos = random_nums[index];			\
++	hash -= key[pos];				\
++}
++
++/*
++ * The main random sample hash function.
++ */
++static u32 random_sample_hash(void *addr, u32 hash_strength)
++{
++	u32 hash = 0xdeadbeef;
++	int index, pos, loop = hash_strength;
++	u32 *key = (u32 *)addr;
++
++	if (loop > HASH_STRENGTH_FULL)
++		loop = HASH_STRENGTH_FULL;
++
++	HASH_FROM_TO(0, loop);
++
++	if (hash_strength > HASH_STRENGTH_FULL) {
++		loop = hash_strength - HASH_STRENGTH_FULL;
++		HASH_FROM_TO(0, loop);
++	}
++
++	return hash;
++}
++
++
++/**
++ * It's used when hash strength is adjusted
++ *
++ * @addr The page's virtual address
++ * @from The original hash strength
++ * @to   The hash strength changed to
++ * @hash The hash value generated with "from" hash value
++ *
++ * return the hash value
++ */
++static u32 delta_hash(void *addr, int from, int to, u32 hash)
++{
++	u32 *key = (u32 *)addr;
++	int index, pos; /* make sure they are int type */
++
++	if (to > from) {
++		if (from >= HASH_STRENGTH_FULL) {
++			from -= HASH_STRENGTH_FULL;
++			to -= HASH_STRENGTH_FULL;
++			HASH_FROM_TO(from, to);
++		} else if (to <= HASH_STRENGTH_FULL) {
++			HASH_FROM_TO(from, to);
++		} else {
++			HASH_FROM_TO(from, HASH_STRENGTH_FULL);
++			HASH_FROM_TO(0, to - HASH_STRENGTH_FULL);
++		}
++	} else {
++		if (from <= HASH_STRENGTH_FULL) {
++			HASH_FROM_DOWN_TO(from, to);
++		} else if (to >= HASH_STRENGTH_FULL) {
++			from -= HASH_STRENGTH_FULL;
++			to -= HASH_STRENGTH_FULL;
++			HASH_FROM_DOWN_TO(from, to);
++		} else {
++			HASH_FROM_DOWN_TO(from - HASH_STRENGTH_FULL, 0);
++			HASH_FROM_DOWN_TO(HASH_STRENGTH_FULL, to);
++		}
++	}
++
++	return hash;
++}
++
++
++
++
++#define CAN_OVERFLOW_U64(x, delta) (U64_MAX - (x) < (delta))
++
++/**
++ *
++ * Called when: rshash_pos or rshash_neg is about to overflow or a scan round
++ * has finished.
++ *
++ * return 0 if no page has been scanned since last call, 1 otherwise.
++ */
++static inline int encode_benefit(void)
++{
++	u64 scanned_delta, pos_delta, neg_delta;
++	unsigned long base = benefit.base;
++
++	scanned_delta = uksm_pages_scanned - uksm_pages_scanned_last;
++
++	if (!scanned_delta)
++		return 0;
++
++	scanned_delta >>= base;
++	pos_delta = rshash_pos >> base;
++	neg_delta = rshash_neg >> base;
++
++	if (CAN_OVERFLOW_U64(benefit.pos, pos_delta) ||
++	    CAN_OVERFLOW_U64(benefit.neg, neg_delta) ||
++	    CAN_OVERFLOW_U64(benefit.scanned, scanned_delta)) {
++		benefit.scanned >>= 1;
++		benefit.neg >>= 1;
++		benefit.pos >>= 1;
++		benefit.base++;
++		scanned_delta >>= 1;
++		pos_delta >>= 1;
++		neg_delta >>= 1;
++	}
++
++	benefit.pos += pos_delta;
++	benefit.neg += neg_delta;
++	benefit.scanned += scanned_delta;
++
++	BUG_ON(!benefit.scanned);
++
++	rshash_pos = rshash_neg = 0;
++	uksm_pages_scanned_last = uksm_pages_scanned;
++
++	return 1;
++}
++
++static inline void reset_benefit(void)
++{
++	benefit.pos = 0;
++	benefit.neg = 0;
++	benefit.base = 0;
++	benefit.scanned = 0;
++}
++
++static inline void inc_rshash_pos(unsigned long delta)
++{
++	if (CAN_OVERFLOW_U64(rshash_pos, delta))
++		encode_benefit();
++
++	rshash_pos += delta;
++}
++
++static inline void inc_rshash_neg(unsigned long delta)
++{
++	if (CAN_OVERFLOW_U64(rshash_neg, delta))
++		encode_benefit();
++
++	rshash_neg += delta;
++}
++
++
++static inline u32 page_hash(struct page *page, unsigned long hash_strength,
++			    int cost_accounting)
++{
++	u32 val;
++	unsigned long delta;
++
++	void *addr = kmap_atomic(page, KM_USER0);
++
++	val = random_sample_hash(addr, hash_strength);
++	kunmap_atomic(addr, KM_USER0);
++
++	if (cost_accounting) {
++		if (HASH_STRENGTH_FULL > hash_strength)
++			delta = HASH_STRENGTH_FULL - hash_strength;
++		else
++			delta = 0;
++
++		inc_rshash_pos(delta);
++	}
++
++	return val;
++}
++
++static int memcmp_pages(struct page *page1, struct page *page2,
++			int cost_accounting)
++{
++	char *addr1, *addr2;
++	int ret;
++
++	addr1 = kmap_atomic(page1, KM_USER0);
++	addr2 = kmap_atomic(page2, KM_USER1);
++	ret = memcmp(addr1, addr2, PAGE_SIZE);
++	kunmap_atomic(addr2, KM_USER1);
++	kunmap_atomic(addr1, KM_USER0);
++
++	if (cost_accounting)
++		inc_rshash_neg(memcmp_cost);
++
++	return ret;
++}
++
++static inline int pages_identical(struct page *page1, struct page *page2)
++{
++	return !memcmp_pages(page1, page2, 0);
++}
++
++static inline int is_page_full_zero(struct page *page)
++{
++	char *addr;
++	int ret;
++
++	addr = kmap_atomic(page, KM_USER0);
++	ret = is_full_zero(addr, PAGE_SIZE);
++	kunmap_atomic(addr, KM_USER0);
++
++	return ret;
++}
++
++static int write_protect_page(struct vm_area_struct *vma, struct page *page,
++			      pte_t *orig_pte, pte_t *old_pte)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	unsigned long addr;
++	pte_t *ptep;
++	spinlock_t *ptl;
++	int swapped;
++	int err = -EFAULT;
++
++	addr = page_address_in_vma(page, vma);
++	if (addr == -EFAULT)
++		goto out;
++
++	BUG_ON(PageTransCompound(page));
++	ptep = page_check_address(page, mm, addr, &ptl, 0);
++	if (!ptep)
++		goto out;
++
++	if (old_pte)
++		*old_pte = *ptep;
++
++	if (pte_write(*ptep) || pte_dirty(*ptep)) {
++		pte_t entry;
++
++		swapped = PageSwapCache(page);
++		flush_cache_page(vma, addr, page_to_pfn(page));
++		/*
++		 * Ok this is tricky, when get_user_pages_fast() run it doesnt
++		 * take any lock, therefore the check that we are going to make
++		 * with the pagecount against the mapcount is racey and
++		 * O_DIRECT can happen right after the check.
++		 * So we clear the pte and flush the tlb before the check
++		 * this assure us that no O_DIRECT can happen after the check
++		 * or in the middle of the check.
++		 */
++		entry = ptep_clear_flush(vma, addr, ptep);
++		/*
++		 * Check that no O_DIRECT or similar I/O is in progress on the
++		 * page
++		 */
++		if (page_mapcount(page) + 1 + swapped != page_count(page)) {
++			set_pte_at(mm, addr, ptep, entry);
++			goto out_unlock;
++		}
++		if (pte_dirty(entry))
++			set_page_dirty(page);
++		entry = pte_mkclean(pte_wrprotect(entry));
++		set_pte_at_notify(mm, addr, ptep, entry);
++	}
++	*orig_pte = *ptep;
++	err = 0;
++
++out_unlock:
++	pte_unmap_unlock(ptep, ptl);
++out:
++	return err;
++}
++
++#define MERGE_ERR_PGERR		1 /* the page is invalid cannot continue */
++#define MERGE_ERR_COLLI		2 /* there is a collision */
++#define MERGE_ERR_COLLI_MAX	3 /* collision at the max hash strength */
++#define MERGE_ERR_CHANGED	4 /* the page has changed since last hash */
++
++
++/**
++ * replace_page - replace page in vma by new ksm page
++ * @vma:      vma that holds the pte pointing to page
++ * @page:     the page we are replacing by kpage
++ * @kpage:    the ksm page we replace page by
++ * @orig_pte: the original value of the pte
++ *
++ * Returns 0 on success, MERGE_ERR_PGERR on failure.
++ */
++static int replace_page(struct vm_area_struct *vma, struct page *page,
++			struct page *kpage, pte_t orig_pte)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	pgd_t *pgd;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *ptep;
++	spinlock_t *ptl;
++	pte_t entry;
++
++	unsigned long addr;
++	int err = MERGE_ERR_PGERR;
++
++	addr = page_address_in_vma(page, vma);
++	if (addr == -EFAULT)
++		goto out;
++
++	pgd = pgd_offset(mm, addr);
++	if (!pgd_present(*pgd))
++		goto out;
++
++	pud = pud_offset(pgd, addr);
++	if (!pud_present(*pud))
++		goto out;
++
++	pmd = pmd_offset(pud, addr);
++	BUG_ON(pmd_trans_huge(*pmd));
++	if (!pmd_present(*pmd))
++		goto out;
++
++	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
++	if (!pte_same(*ptep, orig_pte)) {
++		pte_unmap_unlock(ptep, ptl);
++		goto out;
++	}
++
++	flush_cache_page(vma, addr, pte_pfn(*ptep));
++	ptep_clear_flush(vma, addr, ptep);
++	entry = mk_pte(kpage, vma->vm_page_prot);
++
++	/* special treatment is needed for zero_page */
++	if ((page_to_pfn(kpage) == uksm_zero_pfn) ||
++				(page_to_pfn(kpage) == zero_pfn))
++		entry = pte_mkspecial(entry);
++	else {
++		get_page(kpage);
++		page_add_anon_rmap(kpage, vma, addr);
++	}
++
++	set_pte_at_notify(mm, addr, ptep, entry);
++
++	page_remove_rmap(page);
++	if (!page_mapped(page))
++		try_to_free_swap(page);
++	put_page(page);
++
++	pte_unmap_unlock(ptep, ptl);
++	err = 0;
++out:
++	return err;
++}
++
++
++/**
++ *  Fully hash a page with HASH_STRENGTH_MAX return a non-zero hash value. The
++ *  zero hash value at HASH_STRENGTH_MAX is used to indicated that its
++ *  hash_max member has not been calculated.
++ *
++ * @page The page needs to be hashed
++ * @hash_old The hash value calculated with current hash strength
++ *
++ * return the new hash value calculated at HASH_STRENGTH_MAX
++ */
++static inline u32 page_hash_max(struct page *page, u32 hash_old)
++{
++	u32 hash_max = 0;
++	void *addr;
++
++	addr = kmap_atomic(page, KM_USER0);
++	hash_max = delta_hash(addr, hash_strength,
++			      HASH_STRENGTH_MAX, hash_old);
++
++	kunmap_atomic(addr, KM_USER0);
++
++	if (!hash_max)
++		hash_max = 1;
++
++	inc_rshash_neg(HASH_STRENGTH_MAX - hash_strength);
++	return hash_max;
++}
++
++/*
++ * We compare the hash again, to ensure that it is really a hash collision
++ * instead of being caused by page write.
++ */
++static inline int check_collision(struct rmap_item *rmap_item,
++				  u32 hash)
++{
++	int err;
++	struct page *page = rmap_item->page;
++
++	/* if this rmap_item has already been hash_maxed, then the collision
++	 * must appears in the second-level rbtree search. In this case we check
++	 * if its hash_max value has been changed. Otherwise, the collision
++	 * happens in the first-level rbtree search, so we check against it's
++	 * current hash value.
++	 */
++	if (rmap_item->hash_max) {
++		inc_rshash_neg(memcmp_cost);
++		inc_rshash_neg(HASH_STRENGTH_MAX - hash_strength);
++
++		if (rmap_item->hash_max == page_hash_max(page, hash))
++			err = MERGE_ERR_COLLI;
++		else
++			err = MERGE_ERR_CHANGED;
++	} else {
++		inc_rshash_neg(memcmp_cost + hash_strength);
++
++		if (page_hash(page, hash_strength, 0) == hash)
++			err = MERGE_ERR_COLLI;
++		else
++			err = MERGE_ERR_CHANGED;
++	}
++
++	return err;
++}
++
++static struct page *page_trans_compound_anon(struct page *page)
++{
++	if (PageTransCompound(page)) {
++		struct page *head = compound_trans_head(page);
++		/*
++		 * head may actually be splitted and freed from under
++		 * us but it's ok here.
++		 */
++		if (PageAnon(head))
++			return head;
++	}
++	return NULL;
++}
++
++static int page_trans_compound_anon_split(struct page *page)
++{
++	int ret = 0;
++	struct page *transhuge_head = page_trans_compound_anon(page);
++	if (transhuge_head) {
++		/* Get the reference on the head to split it. */
++		if (get_page_unless_zero(transhuge_head)) {
++			/*
++			 * Recheck we got the reference while the head
++			 * was still anonymous.
++			 */
++			if (PageAnon(transhuge_head))
++				ret = split_huge_page(transhuge_head);
++			else
++				/*
++				 * Retry later if split_huge_page run
++				 * from under us.
++				 */
++				ret = 1;
++			put_page(transhuge_head);
++		} else
++			/* Retry later if split_huge_page run from under us. */
++			ret = 1;
++	}
++	return ret;
++}
++
++/**
++ * Try to merge a rmap_item.page with a kpage in stable node. kpage must
++ * already be a ksm page.
++ *
++ * @return 0 if the pages were merged, -EFAULT otherwise.
++ */
++static int try_to_merge_with_uksm_page(struct rmap_item *rmap_item,
++				      struct page *kpage, u32 hash)
++{
++	struct vm_area_struct *vma = rmap_item->slot->vma;
++	struct mm_struct *mm = vma->vm_mm;
++	pte_t orig_pte = __pte(0);
++	int err = MERGE_ERR_PGERR;
++	struct page *page;
++
++	if (uksm_test_exit(mm))
++		goto out;
++
++	page = rmap_item->page;
++
++	if (page == kpage) { /* ksm page forked */
++		err = 0;
++		goto out;
++	}
++
++	if (PageTransCompound(page) && page_trans_compound_anon_split(page))
++		goto out;
++	BUG_ON(PageTransCompound(page));
++
++	if (!PageAnon(page) || !PageKsm(kpage))
++		goto out;
++
++	/*
++	 * We need the page lock to read a stable PageSwapCache in
++	 * write_protect_page().  We use trylock_page() instead of
++	 * lock_page() because we don't want to wait here - we
++	 * prefer to continue scanning and merging different pages,
++	 * then come back to this page when it is unlocked.
++	 */
++	if (!trylock_page(page))
++		goto out;
++	/*
++	 * If this anonymous page is mapped only here, its pte may need
++	 * to be write-protected.  If it's mapped elsewhere, all of its
++	 * ptes are necessarily already write-protected.  But in either
++	 * case, we need to lock and check page_count is not raised.
++	 */
++	if (write_protect_page(vma, page, &orig_pte, NULL) == 0) {
++		if (pages_identical(page, kpage))
++			err = replace_page(vma, page, kpage, orig_pte);
++		else
++			err = check_collision(rmap_item, hash);
++	}
++
++	if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
++		munlock_vma_page(page);
++		if (!PageMlocked(kpage)) {
++			unlock_page(page);
++			lock_page(kpage);
++			mlock_vma_page(kpage);
++			page = kpage;		/* for final unlock */
++		}
++	}
++
++	unlock_page(page);
++out:
++	return err;
++}
++
++
++
++/**
++ * If two pages fail to merge in try_to_merge_two_pages, then we have a chance
++ * to restore a page mapping that has been changed in try_to_merge_two_pages.
++ *
++ * @return 0 on success.
++ */
++static int restore_uksm_page_pte(struct vm_area_struct *vma, unsigned long addr,
++			     pte_t orig_pte, pte_t wprt_pte)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	pgd_t *pgd;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *ptep;
++	spinlock_t *ptl;
++
++	int err = -EFAULT;
++
++	pgd = pgd_offset(mm, addr);
++	if (!pgd_present(*pgd))
++		goto out;
++
++	pud = pud_offset(pgd, addr);
++	if (!pud_present(*pud))
++		goto out;
++
++	pmd = pmd_offset(pud, addr);
++	if (!pmd_present(*pmd))
++		goto out;
++
++	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
++	if (!pte_same(*ptep, wprt_pte)) {
++		/* already copied, let it be */
++		pte_unmap_unlock(ptep, ptl);
++		goto out;
++	}
++
++	/*
++	 * Good boy, still here. When we still get the ksm page, it does not
++	 * return to the free page pool, there is no way that a pte was changed
++	 * to other page and gets back to this page. And remind that ksm page
++	 * do not reuse in do_wp_page(). So it's safe to restore the original
++	 * pte.
++	 */
++	flush_cache_page(vma, addr, pte_pfn(*ptep));
++	ptep_clear_flush(vma, addr, ptep);
++	set_pte_at_notify(mm, addr, ptep, orig_pte);
++
++	pte_unmap_unlock(ptep, ptl);
++	err = 0;
++out:
++	return err;
++}
++
++/**
++ * try_to_merge_two_pages() - take two identical pages and prepare
++ * them to be merged into one page(rmap_item->page)
++ *
++ * @return 0 if we successfully merged two identical pages into
++ *         one ksm page. MERGE_ERR_COLLI if it's only a hash collision
++ *         search in rbtree. MERGE_ERR_CHANGED if rmap_item has been
++ *         changed since it's hashed. MERGE_ERR_PGERR otherwise.
++ *
++ */
++static int try_to_merge_two_pages(struct rmap_item *rmap_item,
++				  struct rmap_item *tree_rmap_item,
++				  u32 hash)
++{
++	pte_t orig_pte1 = __pte(0), orig_pte2 = __pte(0);
++	pte_t wprt_pte1 = __pte(0), wprt_pte2 = __pte(0);
++	struct vm_area_struct *vma1 = rmap_item->slot->vma;
++	struct vm_area_struct *vma2 = tree_rmap_item->slot->vma;
++	struct page *page = rmap_item->page;
++	struct page *tree_page = tree_rmap_item->page;
++	int err = MERGE_ERR_PGERR;
++	struct address_space *saved_mapping;
++
++
++	if (rmap_item->page == tree_rmap_item->page)
++		goto out;
++
++	if (PageTransCompound(page) && page_trans_compound_anon_split(page))
++		goto out;
++	BUG_ON(PageTransCompound(page));
++
++	if (PageTransCompound(tree_page) && page_trans_compound_anon_split(tree_page))
++		goto out;
++	BUG_ON(PageTransCompound(tree_page));
++
++	if (!PageAnon(page) || !PageAnon(tree_page))
++		goto out;
++
++	if (!trylock_page(page))
++		goto out;
++
++
++	if (write_protect_page(vma1, page, &wprt_pte1, &orig_pte1) != 0) {
++		unlock_page(page);
++		goto out;
++	}
++
++	/*
++	 * While we hold page lock, upgrade page from
++	 * PageAnon+anon_vma to PageKsm+NULL stable_node:
++	 * stable_tree_insert() will update stable_node.
++	 */
++	saved_mapping = page->mapping;
++	set_page_stable_node(page, NULL);
++	mark_page_accessed(page);
++	unlock_page(page);
++
++	if (!trylock_page(tree_page))
++		goto restore_out;
++
++	if (write_protect_page(vma2, tree_page, &wprt_pte2, &orig_pte2) != 0) {
++		unlock_page(tree_page);
++		goto restore_out;
++	}
++
++	if (pages_identical(page, tree_page)) {
++		err = replace_page(vma2, tree_page, page, wprt_pte2);
++		if (err) {
++			unlock_page(tree_page);
++			goto restore_out;
++		}
++
++		if ((vma2->vm_flags & VM_LOCKED)) {
++			munlock_vma_page(tree_page);
++			if (!PageMlocked(page)) {
++				unlock_page(tree_page);
++				lock_page(page);
++				mlock_vma_page(page);
++				tree_page = page; /* for final unlock */
++			}
++		}
++
++		unlock_page(tree_page);
++
++		goto out; /* success */
++
++	} else {
++		if (tree_rmap_item->hash_max &&
++		    tree_rmap_item->hash_max == rmap_item->hash_max) {
++			err = MERGE_ERR_COLLI_MAX;
++		} else if (page_hash(page, hash_strength, 0) ==
++		    page_hash(tree_page, hash_strength, 0)) {
++			inc_rshash_neg(memcmp_cost + hash_strength * 2);
++			err = MERGE_ERR_COLLI;
++		} else {
++			err = MERGE_ERR_CHANGED;
++		}
++
++		unlock_page(tree_page);
++	}
++
++restore_out:
++	lock_page(page);
++	if (!restore_uksm_page_pte(vma1, get_rmap_addr(rmap_item),
++				  orig_pte1, wprt_pte1))
++		page->mapping = saved_mapping;
++
++	unlock_page(page);
++out:
++	return err;
++}
++
++static inline int hash_cmp(u32 new_val, u32 node_val)
++{
++	if (new_val > node_val)
++		return 1;
++	else if (new_val < node_val)
++		return -1;
++	else
++		return 0;
++}
++
++static inline u32 rmap_item_hash_max(struct rmap_item *item, u32 hash)
++{
++	u32 hash_max = item->hash_max;
++
++	if (!hash_max) {
++		hash_max = page_hash_max(item->page, hash);
++
++		item->hash_max = hash_max;
++	}
++
++	return hash_max;
++}
++
++
++
++/**
++ * stable_tree_search() - search the stable tree for a page
++ *
++ * @item: 	the rmap_item we are comparing with
++ * @hash: 	the hash value of this item->page already calculated
++ *
++ * @return 	the page we have found, NULL otherwise. The page returned has
++ *         	been gotten.
++ */
++static struct page *stable_tree_search(struct rmap_item *item, u32 hash)
++{
++	struct rb_node *node = root_stable_treep->rb_node;
++	struct tree_node *tree_node;
++	unsigned long hash_max;
++	struct page *page = item->page;
++	struct stable_node *stable_node;
++
++	stable_node = page_stable_node(page);
++	if (stable_node) {
++		/* ksm page forked, that is
++		 * if (PageKsm(page) && !in_stable_tree(rmap_item))
++		 * it's actually gotten once outside.
++		 */
++		get_page(page);
++		return page;
++	}
++
++	while (node) {
++		int cmp;
++
++		tree_node = rb_entry(node, struct tree_node, node);
++
++		cmp = hash_cmp(hash, tree_node->hash);
++
++		if (cmp < 0)
++			node = node->rb_left;
++		else if (cmp > 0)
++			node = node->rb_right;
++		else
++			break;
++	}
++
++	if (!node)
++		return NULL;
++
++	if (tree_node->count == 1) {
++		stable_node = rb_entry(tree_node->sub_root.rb_node,
++				       struct stable_node, node);
++		BUG_ON(!stable_node);
++
++		goto get_page_out;
++	}
++
++	/*
++	 * ok, we have to search the second
++	 * level subtree, hash the page to a
++	 * full strength.
++	 */
++	node = tree_node->sub_root.rb_node;
++	BUG_ON(!node);
++	hash_max = rmap_item_hash_max(item, hash);
++
++	while (node) {
++		int cmp;
++
++		stable_node = rb_entry(node, struct stable_node, node);
++
++		cmp = hash_cmp(hash_max, stable_node->hash_max);
++
++		if (cmp < 0)
++			node = node->rb_left;
++		else if (cmp > 0)
++			node = node->rb_right;
++		else
++			goto get_page_out;
++	}
++
++	return NULL;
++
++get_page_out:
++	page = get_uksm_page(stable_node, 1, 1);
++	return page;
++}
++
++static int try_merge_rmap_item(struct rmap_item *item,
++			       struct page *kpage,
++			       struct page *tree_page)
++{
++	spinlock_t *ptl;
++	pte_t *ptep;
++	unsigned long addr;
++	struct vm_area_struct *vma = item->slot->vma;
++
++	addr = get_rmap_addr(item);
++	ptep = page_check_address(kpage, vma->vm_mm, addr, &ptl, 0);
++	if (!ptep)
++		return 0;
++
++	if (pte_write(*ptep)) {
++		/* has changed, abort! */
++		pte_unmap_unlock(ptep, ptl);
++		return 0;
++	}
++
++	get_page(tree_page);
++	page_add_anon_rmap(tree_page, vma, addr);
++
++	flush_cache_page(vma, addr, pte_pfn(*ptep));
++	ptep_clear_flush(vma, addr, ptep);
++	set_pte_at_notify(vma->vm_mm, addr, ptep,
++			  mk_pte(tree_page, vma->vm_page_prot));
++
++	page_remove_rmap(kpage);
++	put_page(kpage);
++
++	pte_unmap_unlock(ptep, ptl);
++
++	return 1;
++}
++
++/**
++ * try_to_merge_with_stable_page() - when two rmap_items need to be inserted
++ * into stable tree, the page was found to be identical to a stable ksm page,
++ * this is the last chance we can merge them into one.
++ *
++ * @item1:	the rmap_item holding the page which we wanted to insert
++ *       	into stable tree.
++ * @item2:	the other rmap_item we found when unstable tree search
++ * @oldpage:	the page currently mapped by the two rmap_items
++ * @tree_page: 	the page we found identical in stable tree node
++ * @success1:	return if item1 is successfully merged
++ * @success2:	return if item2 is successfully merged
++ */
++static void try_merge_with_stable(struct rmap_item *item1,
++				  struct rmap_item *item2,
++				  struct page **kpage,
++				  struct page *tree_page,
++				  int *success1, int *success2)
++{
++	struct vm_area_struct *vma1 = item1->slot->vma;
++	struct vm_area_struct *vma2 = item2->slot->vma;
++	*success1 = 0;
++	*success2 = 0;
++
++	if (unlikely(*kpage == tree_page)) {
++		/* I don't think this can really happen */
++		printk(KERN_WARNING "UKSM: unexpected condition detected in "
++			"try_merge_with_stable() -- *kpage == tree_page !\n");
++		*success1 = 1;
++		*success2 = 1;
++		return;
++	}
++
++	if (!PageAnon(*kpage) || !PageKsm(*kpage))
++		goto failed;
++
++	if (!trylock_page(tree_page))
++		goto failed;
++
++	/* If the oldpage is still ksm and still pointed
++	 * to in the right place, and still write protected,
++	 * we are confident it's not changed, no need to
++	 * memcmp anymore.
++	 * be ware, we cannot take nested pte locks,
++	 * deadlock risk.
++	 */
++	if (!try_merge_rmap_item(item1, *kpage, tree_page))
++		goto unlock_failed;
++
++	/* ok, then vma2, remind that pte1 already set */
++	if (!try_merge_rmap_item(item2, *kpage, tree_page))
++		goto success_1;
++
++	*success2 = 1;
++success_1:
++	*success1 = 1;
++
++
++	if ((*success1 && vma1->vm_flags & VM_LOCKED) ||
++	    (*success2 && vma2->vm_flags & VM_LOCKED)) {
++		munlock_vma_page(*kpage);
++		if (!PageMlocked(tree_page))
++			mlock_vma_page(tree_page);
++	}
++
++	/*
++	 * We do not need oldpage any more in the caller, so can break the lock
++	 * now.
++	 */
++	unlock_page(*kpage);
++	*kpage = tree_page; /* Get unlocked outside. */
++	return;
++
++unlock_failed:
++	unlock_page(tree_page);
++failed:
++	return;
++}
++
++static inline void stable_node_hash_max(struct stable_node *node,
++					 struct page *page, u32 hash)
++{
++	u32 hash_max = node->hash_max;
++
++	if (!hash_max) {
++		hash_max = page_hash_max(page, hash);
++		node->hash_max = hash_max;
++	}
++}
++
++static inline
++struct stable_node *new_stable_node(struct tree_node *tree_node,
++				    struct page *kpage, u32 hash_max)
++{
++	struct stable_node *new_stable_node;
++
++	new_stable_node = alloc_stable_node();
++	if (!new_stable_node)
++		return NULL;
++
++	new_stable_node->kpfn = page_to_pfn(kpage);
++	new_stable_node->hash_max = hash_max;
++	new_stable_node->tree_node = tree_node;
++	set_page_stable_node(kpage, new_stable_node);
++
++	return new_stable_node;
++}
++
++static inline
++struct stable_node *first_level_insert(struct tree_node *tree_node,
++				       struct rmap_item *rmap_item,
++				       struct rmap_item *tree_rmap_item,
++				       struct page **kpage, u32 hash,
++				       int *success1, int *success2)
++{
++	int cmp;
++	struct page *tree_page;
++	u32 hash_max = 0;
++	struct stable_node *stable_node, *new_snode;
++	struct rb_node *parent = NULL, **new;
++
++	/* this tree node contains no sub-tree yet */
++	stable_node = rb_entry(tree_node->sub_root.rb_node,
++			       struct stable_node, node);
++
++	tree_page = get_uksm_page(stable_node, 1, 0);
++	if (tree_page) {
++		cmp = memcmp_pages(*kpage, tree_page, 1);
++		if (!cmp) {
++			try_merge_with_stable(rmap_item, tree_rmap_item, kpage,
++					      tree_page, success1, success2);
++			put_page(tree_page);
++			if (!*success1 && !*success2)
++				goto failed;
++
++			return stable_node;
++
++		} else {
++			/*
++			 * collision in first level try to create a subtree.
++			 * A new node need to be created.
++			 */
++			put_page(tree_page);
++
++			stable_node_hash_max(stable_node, tree_page,
++					     tree_node->hash);
++			hash_max = rmap_item_hash_max(rmap_item, hash);
++			cmp = hash_cmp(hash_max, stable_node->hash_max);
++
++			parent = &stable_node->node;
++			if (cmp < 0) {
++				new = &parent->rb_left;
++			} else if (cmp > 0) {
++				new = &parent->rb_right;
++			} else {
++				goto failed;
++			}
++		}
++
++	} else {
++		/* the only stable_node deleted, we reuse its tree_node.
++		 */
++		parent = NULL;
++		new = &tree_node->sub_root.rb_node;
++	}
++
++	new_snode = new_stable_node(tree_node, *kpage, hash_max);
++	if (!new_snode)
++		goto failed;
++
++	rb_link_node(&new_snode->node, parent, new);
++	rb_insert_color(&new_snode->node, &tree_node->sub_root);
++	tree_node->count++;
++	*success1 = *success2 = 1;
++
++	return new_snode;
++
++failed:
++	return NULL;
++}
++
++static inline
++struct stable_node *stable_subtree_insert(struct tree_node *tree_node,
++					  struct rmap_item *rmap_item,
++					  struct rmap_item *tree_rmap_item,
++					  struct page **kpage, u32 hash,
++					  int *success1, int *success2)
++{
++	struct page *tree_page;
++	u32 hash_max;
++	struct stable_node *stable_node, *new_snode;
++	struct rb_node *parent, **new;
++
++research:
++	parent = NULL;
++	new = &tree_node->sub_root.rb_node;
++	BUG_ON(!*new);
++	hash_max = rmap_item_hash_max(rmap_item, hash);
++	while (*new) {
++		int cmp;
++
++		stable_node = rb_entry(*new, struct stable_node, node);
++
++		cmp = hash_cmp(hash_max, stable_node->hash_max);
++
++		if (cmp < 0) {
++			parent = *new;
++			new = &parent->rb_left;
++		} else if (cmp > 0) {
++			parent = *new;
++			new = &parent->rb_right;
++		} else {
++			tree_page = get_uksm_page(stable_node, 1, 0);
++			if (tree_page) {
++				cmp = memcmp_pages(*kpage, tree_page, 1);
++				if (!cmp) {
++					try_merge_with_stable(rmap_item,
++						tree_rmap_item, kpage,
++						tree_page, success1, success2);
++
++					put_page(tree_page);
++					if (!*success1 && !*success2)
++						goto failed;
++					/*
++					 * successfully merged with a stable
++					 * node
++					 */
++					return stable_node;
++				} else {
++					put_page(tree_page);
++					goto failed;
++				}
++			} else {
++				/*
++				 * stable node may be deleted,
++				 * and subtree maybe
++				 * restructed, cannot
++				 * continue, research it.
++				 */
++				if (tree_node->count) {
++					goto research;
++				} else {
++					/* reuse the tree node*/
++					parent = NULL;
++					new = &tree_node->sub_root.rb_node;
++				}
++			}
++		}
++	}
++
++	new_snode = new_stable_node(tree_node, *kpage, hash_max);
++	if (!new_snode)
++		goto failed;
++
++	rb_link_node(&new_snode->node, parent, new);
++	rb_insert_color(&new_snode->node, &tree_node->sub_root);
++	tree_node->count++;
++	*success1 = *success2 = 1;
++
++	return new_snode;
++
++failed:
++	return NULL;
++}
++
++
++/**
++ * stable_tree_insert() - try to insert a merged page in unstable tree to
++ * the stable tree
++ *
++ * @kpage:		the page need to be inserted
++ * @hash:		the current hash of this page
++ * @rmap_item:		the rmap_item being scanned
++ * @tree_rmap_item:	the rmap_item found on unstable tree
++ * @success1:		return if rmap_item is merged
++ * @success2:		return if tree_rmap_item is merged
++ *
++ * @return 		the stable_node on stable tree if at least one
++ *      		rmap_item is inserted into stable tree, NULL
++ *      		otherwise.
++ */
++static struct stable_node *
++stable_tree_insert(struct page **kpage, u32 hash,
++		   struct rmap_item *rmap_item,
++		   struct rmap_item *tree_rmap_item,
++		   int *success1, int *success2)
++{
++	struct rb_node **new = &root_stable_treep->rb_node;
++	struct rb_node *parent = NULL;
++	struct stable_node *stable_node;
++	struct tree_node *tree_node;
++	u32 hash_max = 0;
++
++	*success1 = *success2 = 0;
++
++	while (*new) {
++		int cmp;
++
++		tree_node = rb_entry(*new, struct tree_node, node);
++
++		cmp = hash_cmp(hash, tree_node->hash);
++
++		if (cmp < 0) {
++			parent = *new;
++			new = &parent->rb_left;
++		} else if (cmp > 0) {
++			parent = *new;
++			new = &parent->rb_right;
++		} else
++			break;
++	}
++
++	if (*new) {
++		if (tree_node->count == 1) {
++			stable_node = first_level_insert(tree_node, rmap_item,
++						tree_rmap_item, kpage,
++						hash, success1, success2);
++		} else {
++			stable_node = stable_subtree_insert(tree_node,
++					rmap_item, tree_rmap_item, kpage,
++					hash, success1, success2);
++		}
++	} else {
++
++		/* no tree node found */
++		tree_node = alloc_tree_node(stable_tree_node_listp);
++		if (!tree_node) {
++			stable_node = NULL;
++			goto out;
++		}
++
++		stable_node = new_stable_node(tree_node, *kpage, hash_max);
++		if (!stable_node) {
++			free_tree_node(tree_node);
++			goto out;
++		}
++
++		tree_node->hash = hash;
++		rb_link_node(&tree_node->node, parent, new);
++		rb_insert_color(&tree_node->node, root_stable_treep);
++		parent = NULL;
++		new = &tree_node->sub_root.rb_node;
++
++		rb_link_node(&stable_node->node, parent, new);
++		rb_insert_color(&stable_node->node, &tree_node->sub_root);
++		tree_node->count++;
++		*success1 = *success2 = 1;
++	}
++
++out:
++	return stable_node;
++}
++
++
++/**
++ * get_tree_rmap_item_page() - try to get the page and lock the mmap_sem
++ *
++ * @return 	0 on success, -EBUSY if unable to lock the mmap_sem,
++ *         	-EINVAL if the page mapping has been changed.
++ */
++static inline int get_tree_rmap_item_page(struct rmap_item *tree_rmap_item)
++{
++	int err;
++
++	err = get_mergeable_page_lock_mmap(tree_rmap_item);
++
++	if (err == -EINVAL) {
++		/* its page map has been changed, remove it */
++		remove_rmap_item_from_tree(tree_rmap_item);
++	}
++
++	/* The page is gotten and mmap_sem is locked now. */
++	return err;
++}
++
++
++/**
++ * unstable_tree_search_insert() - search an unstable tree rmap_item with the
++ * same hash value. Get its page and trylock the mmap_sem
++ */
++static inline
++struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
++					      u32 hash)
++
++{
++	struct rb_node **new = &root_unstable_tree.rb_node;
++	struct rb_node *parent = NULL;
++	struct tree_node *tree_node;
++	u32 hash_max;
++	struct rmap_item *tree_rmap_item;
++
++	while (*new) {
++		int cmp;
++
++		tree_node = rb_entry(*new, struct tree_node, node);
++
++		cmp = hash_cmp(hash, tree_node->hash);
++
++		if (cmp < 0) {
++			parent = *new;
++			new = &parent->rb_left;
++		} else if (cmp > 0) {
++			parent = *new;
++			new = &parent->rb_right;
++		} else
++			break;
++	}
++
++	if (*new) {
++		/* got the tree_node */
++		if (tree_node->count == 1) {
++			tree_rmap_item = rb_entry(tree_node->sub_root.rb_node,
++						  struct rmap_item, node);
++			BUG_ON(!tree_rmap_item);
++
++			goto get_page_out;
++		}
++
++		/* well, search the collision subtree */
++		new = &tree_node->sub_root.rb_node;
++		BUG_ON(!*new);
++		hash_max = rmap_item_hash_max(rmap_item, hash);
++
++		while (*new) {
++			int cmp;
++
++			tree_rmap_item = rb_entry(*new, struct rmap_item,
++						  node);
++
++			cmp = hash_cmp(hash_max, tree_rmap_item->hash_max);
++			parent = *new;
++			if (cmp < 0)
++				new = &parent->rb_left;
++			else if (cmp > 0)
++				new = &parent->rb_right;
++			else
++				goto get_page_out;
++		}
++	} else {
++		/* alloc a new tree_node */
++		tree_node = alloc_tree_node(&unstable_tree_node_list);
++		if (!tree_node)
++			return NULL;
++
++		tree_node->hash = hash;
++		rb_link_node(&tree_node->node, parent, new);
++		rb_insert_color(&tree_node->node, &root_unstable_tree);
++		parent = NULL;
++		new = &tree_node->sub_root.rb_node;
++	}
++
++	/* did not found even in sub-tree */
++	rmap_item->tree_node = tree_node;
++	rmap_item->address |= UNSTABLE_FLAG;
++	rmap_item->hash_round = uksm_hash_round;
++	rb_link_node(&rmap_item->node, parent, new);
++	rb_insert_color(&rmap_item->node, &tree_node->sub_root);
++
++	uksm_pages_unshared++;
++	return NULL;
++
++get_page_out:
++	if (tree_rmap_item->page == rmap_item->page)
++		return NULL;
++
++	if (get_tree_rmap_item_page(tree_rmap_item))
++		return NULL;
++
++	return tree_rmap_item;
++}
++
++static void hold_anon_vma(struct rmap_item *rmap_item,
++			  struct anon_vma *anon_vma)
++{
++	rmap_item->anon_vma = anon_vma;
++	get_anon_vma(anon_vma);
++}
++
++
++/**
++ * stable_tree_append() - append a rmap_item to a stable node. Deduplication
++ * ratio statistics is done in this function.
++ *
++ */
++static void stable_tree_append(struct rmap_item *rmap_item,
++			       struct stable_node *stable_node, int logdedup)
++{
++	struct node_vma *node_vma = NULL, *new_node_vma;
++	struct hlist_node *hlist = NULL, *cont_p = NULL;
++	unsigned long key = (unsigned long)rmap_item->slot;
++	unsigned long factor = rmap_item->slot->rung->step;
++
++	BUG_ON(!stable_node);
++	rmap_item->address |= STABLE_FLAG;
++
++	if (hlist_empty(&stable_node->hlist)) {
++		uksm_pages_shared++;
++		goto node_vma_new;
++	} else {
++		uksm_pages_sharing++;
++	}
++
++	hlist_for_each_entry(node_vma, hlist, &stable_node->hlist, hlist) {
++		if (node_vma->key >= key)
++			break;
++
++		if (logdedup) {
++			node_vma->slot->pages_bemerged += factor;
++			if (list_empty(&node_vma->slot->dedup_list))
++				list_add(&node_vma->slot->dedup_list,
++					 &vma_slot_dedup);
++		}
++	}
++
++	if (node_vma) {
++		if (node_vma->key == key) {
++			cont_p = hlist->next;
++			goto node_vma_ok;
++		} else if (node_vma->key > key) {
++			cont_p = hlist;
++		}
++	}
++
++node_vma_new:
++	/* no same vma already in node, alloc a new node_vma */
++	new_node_vma = alloc_node_vma();
++	BUG_ON(!new_node_vma);
++	new_node_vma->head = stable_node;
++	new_node_vma->slot = rmap_item->slot;
++
++	if (!node_vma) {
++		hlist_add_head(&new_node_vma->hlist, &stable_node->hlist);
++	} else if (node_vma->key != key) {
++		if (node_vma->key < key)
++			hlist_add_after(&node_vma->hlist, &new_node_vma->hlist);
++		else {
++			hlist_add_before(&new_node_vma->hlist,
++					 &node_vma->hlist);
++		}
++
++	}
++	node_vma = new_node_vma;
++
++node_vma_ok: /* ok, ready to add to the list */
++	rmap_item->head = node_vma;
++	hlist_add_head(&rmap_item->hlist, &node_vma->rmap_hlist);
++	hold_anon_vma(rmap_item, rmap_item->slot->vma->anon_vma);
++	if (logdedup) {
++		rmap_item->slot->pages_merged++;
++		if (cont_p) {
++			hlist_for_each_entry_continue(node_vma,
++						      cont_p, hlist) {
++				node_vma->slot->pages_bemerged += factor;
++				if (list_empty(&node_vma->slot->dedup_list))
++					list_add(&node_vma->slot->dedup_list,
++						 &vma_slot_dedup);
++			}
++		}
++	}
++}
++
++/*
++ * We use break_ksm to break COW on a ksm page: it's a stripped down
++ *
++ *	if (get_user_pages(current, mm, addr, 1, 1, 1, &page, NULL) == 1)
++ *		put_page(page);
++ *
++ * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
++ * in case the application has unmapped and remapped mm,addr meanwhile.
++ * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
++ * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
++ */
++static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
++{
++	struct page *page;
++	int ret = 0;
++
++	do {
++		cond_resched();
++		page = follow_page(vma, addr, FOLL_GET);
++		if (IS_ERR_OR_NULL(page))
++			break;
++		if (PageKsm(page)) {
++			ret = handle_mm_fault(vma->vm_mm, vma, addr,
++					      FAULT_FLAG_WRITE);
++		} else
++			ret = VM_FAULT_WRITE;
++		put_page(page);
++	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM)));
++	/*
++	 * We must loop because handle_mm_fault() may back out if there's
++	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
++	 *
++	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
++	 * COW has been broken, even if the vma does not permit VM_WRITE;
++	 * but note that a concurrent fault might break PageKsm for us.
++	 *
++	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
++	 * backing file, which also invalidates anonymous pages: that's
++	 * okay, that truncation will have unmapped the PageKsm for us.
++	 *
++	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
++	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
++	 * current task has TIF_MEMDIE set, and will be OOM killed on return
++	 * to user; and ksmd, having no mm, would never be chosen for that.
++	 *
++	 * But if the mm is in a limited mem_cgroup, then the fault may fail
++	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
++	 * even ksmd can fail in this way - though it's usually breaking ksm
++	 * just to undo a merge it made a moment before, so unlikely to oom.
++	 *
++	 * That's a pity: we might therefore have more kernel pages allocated
++	 * than we're counting as nodes in the stable tree; but uksm_do_scan
++	 * will retry to break_cow on each pass, so should recover the page
++	 * in due course.  The important thing is to not let VM_MERGEABLE
++	 * be cleared while any such pages might remain in the area.
++	 */
++	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
++}
++
++static void break_cow(struct rmap_item *rmap_item)
++{
++	struct vm_area_struct *vma = rmap_item->slot->vma;
++	struct mm_struct *mm = vma->vm_mm;
++	unsigned long addr = get_rmap_addr(rmap_item);
++
++	if (uksm_test_exit(mm))
++		goto out;
++
++	break_ksm(vma, addr);
++out:
++	return;
++}
++
++/*
++ * Though it's very tempting to unmerge in_stable_tree(rmap_item)s rather
++ * than check every pte of a given vma, the locking doesn't quite work for
++ * that - an rmap_item is assigned to the stable tree after inserting ksm
++ * page and upping mmap_sem.  Nor does it fit with the way we skip dup'ing
++ * rmap_items from parent to child at fork time (so as not to waste time
++ * if exit comes before the next scan reaches it).
++ *
++ * Similarly, although we'd like to remove rmap_items (so updating counts
++ * and freeing memory) when unmerging an area, it's easier to leave that
++ * to the next pass of ksmd - consider, for example, how ksmd might be
++ * in cmp_and_merge_page on one of the rmap_items we would be removing.
++ */
++inline int unmerge_uksm_pages(struct vm_area_struct *vma,
++		      unsigned long start, unsigned long end)
++{
++	unsigned long addr;
++	int err = 0;
++
++	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
++		if (uksm_test_exit(vma->vm_mm))
++			break;
++		if (signal_pending(current))
++			err = -ERESTARTSYS;
++		else
++			err = break_ksm(vma, addr);
++	}
++	return err;
++}
++
++static inline void inc_uksm_pages_scanned(void)
++{
++	u64 delta;
++
++
++	if (uksm_pages_scanned == U64_MAX) {
++		encode_benefit();
++
++		delta = uksm_pages_scanned >> pages_scanned_base;
++
++		if (CAN_OVERFLOW_U64(pages_scanned_stored, delta)) {
++			pages_scanned_stored >>= 1;
++			delta >>= 1;
++			pages_scanned_base++;
++		}
++
++		pages_scanned_stored += delta;
++
++		uksm_pages_scanned = uksm_pages_scanned_last = 0;
++	}
++
++	uksm_pages_scanned++;
++}
++
++static inline int find_zero_page_hash(int strength, u32 hash)
++{
++	return (zero_hash_table[strength] == hash);
++}
++
++static
++int cmp_and_merge_zero_page(struct vm_area_struct *vma, struct page *page)
++{
++	struct page *zero_page = empty_uksm_zero_page;
++	struct mm_struct *mm = vma->vm_mm;
++	pte_t orig_pte = __pte(0);
++	int err = -EFAULT;
++
++	if (uksm_test_exit(mm))
++		goto out;
++
++	if (PageTransCompound(page) && page_trans_compound_anon_split(page))
++		goto out;
++	BUG_ON(PageTransCompound(page));
++
++	if (!PageAnon(page))
++		goto out;
++
++	if (!trylock_page(page))
++		goto out;
++
++	if (write_protect_page(vma, page, &orig_pte, 0) == 0) {
++		if (is_page_full_zero(page))
++			err = replace_page(vma, page, zero_page, orig_pte);
++	}
++
++	unlock_page(page);
++out:
++	return err;
++}
++
++/*
++ * cmp_and_merge_page() - first see if page can be merged into the stable
++ * tree; if not, compare hash to previous and if it's the same, see if page
++ * can be inserted into the unstable tree, or merged with a page already there
++ * and both transferred to the stable tree.
++ *
++ * @page: the page that we are searching identical page to.
++ * @rmap_item: the reverse mapping into the virtual address of this page
++ */
++static void cmp_and_merge_page(struct rmap_item *rmap_item, u32 hash)
++{
++	struct rmap_item *tree_rmap_item;
++	struct page *page;
++	struct page *kpage = NULL;
++	u32 hash_max;
++	int err;
++	unsigned int success1, success2;
++	struct stable_node *snode;
++	int cmp;
++	struct rb_node *parent = NULL, **new;
++
++	remove_rmap_item_from_tree(rmap_item);
++	page = rmap_item->page;
++
++	/* We first start with searching the page inside the stable tree */
++	kpage = stable_tree_search(rmap_item, hash);
++	if (kpage) {
++		err = try_to_merge_with_uksm_page(rmap_item, kpage,
++						 hash);
++		if (!err) {
++			/*
++			 * The page was successfully merged, add
++			 * its rmap_item to the stable tree.
++			 * page lock is needed because it's
++			 * racing with try_to_unmap_ksm(), etc.
++			 */
++			lock_page(kpage);
++			snode = page_stable_node(kpage);
++			stable_tree_append(rmap_item, snode, 1);
++			unlock_page(kpage);
++			put_page(kpage);
++			return; /* success */
++		}
++		put_page(kpage);
++
++		/*
++		 * if it's a collision and it has been search in sub-rbtree
++		 * (hash_max != 0), we want to abort, because if it is
++		 * successfully merged in unstable tree, the collision trends to
++		 * happen again.
++		 */
++		if (err == MERGE_ERR_COLLI && rmap_item->hash_max)
++			return;
++	}
++
++	tree_rmap_item =
++		unstable_tree_search_insert(rmap_item, hash);
++	if (tree_rmap_item) {
++		err = try_to_merge_two_pages(rmap_item, tree_rmap_item, hash);
++		/*
++		 * As soon as we merge this page, we want to remove the
++		 * rmap_item of the page we have merged with from the unstable
++		 * tree, and insert it instead as new node in the stable tree.
++		 */
++		if (!err) {
++			kpage = page;
++			remove_rmap_item_from_tree(tree_rmap_item);
++			lock_page(kpage);
++			snode = stable_tree_insert(&kpage, hash,
++						   rmap_item, tree_rmap_item,
++						   &success1, &success2);
++
++			/*
++			 * Do not log dedup for tree item, it's not counted as
++			 * scanned in this round.
++			 */
++			if (success2)
++				stable_tree_append(tree_rmap_item, snode, 0);
++
++			/*
++			 * The order of these two stable append is important:
++			 * we are scanning rmap_item.
++			 */
++			if (success1)
++				stable_tree_append(rmap_item, snode, 1);
++
++			/*
++			 * The original kpage may be unlocked inside
++			 * stable_tree_insert() already. This page
++			 * should be unlocked before doing
++			 * break_cow().
++			 */
++			unlock_page(kpage);
++
++			if (!success1)
++				break_cow(rmap_item);
++
++			if (!success2)
++				break_cow(tree_rmap_item);
++
++		} else if (err == MERGE_ERR_COLLI) {
++			BUG_ON(tree_rmap_item->tree_node->count > 1);
++
++			rmap_item_hash_max(tree_rmap_item,
++					   tree_rmap_item->tree_node->hash);
++
++			hash_max = rmap_item_hash_max(rmap_item, hash);
++			cmp = hash_cmp(hash_max, tree_rmap_item->hash_max);
++			parent = &tree_rmap_item->node;
++			if (cmp < 0)
++				new = &parent->rb_left;
++			else if (cmp > 0)
++				new = &parent->rb_right;
++			else
++				goto put_up_out;
++
++			rmap_item->tree_node = tree_rmap_item->tree_node;
++			rmap_item->address |= UNSTABLE_FLAG;
++			rmap_item->hash_round = uksm_hash_round;
++			rb_link_node(&rmap_item->node, parent, new);
++			rb_insert_color(&rmap_item->node,
++					&tree_rmap_item->tree_node->sub_root);
++			rmap_item->tree_node->count++;
++		} else {
++			/*
++			 * either one of the page has changed or they collide
++			 * at the max hash, we consider them as ill items.
++			 */
++			remove_rmap_item_from_tree(tree_rmap_item);
++		}
++put_up_out:
++		put_page(tree_rmap_item->page);
++		up_read(&tree_rmap_item->slot->vma->vm_mm->mmap_sem);
++	}
++}
++
++
++
++
++static inline unsigned long get_pool_index(struct vma_slot *slot,
++					   unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = (sizeof(struct rmap_list_entry *) * index) >> PAGE_SHIFT;
++	if (pool_index >= slot->pool_size)
++		BUG();
++	return pool_index;
++}
++
++static inline unsigned long index_page_offset(unsigned long index)
++{
++	return offset_in_page(sizeof(struct rmap_list_entry *) * index);
++}
++
++static inline
++struct rmap_list_entry *get_rmap_list_entry(struct vma_slot *slot,
++					    unsigned long index, int need_alloc)
++{
++	unsigned long pool_index;
++	struct page *page;
++	void *addr;
++
++
++	pool_index = get_pool_index(slot, index);
++	if (!slot->rmap_list_pool[pool_index]) {
++		if (!need_alloc)
++			return NULL;
++
++		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
++		if (!page)
++			return NULL;
++
++		slot->rmap_list_pool[pool_index] = page;
++	}
++
++	addr = kmap(slot->rmap_list_pool[pool_index]);
++	addr += index_page_offset(index);
++
++	return addr;
++}
++
++static inline void put_rmap_list_entry(struct vma_slot *slot,
++				       unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = get_pool_index(slot, index);
++	BUG_ON(!slot->rmap_list_pool[pool_index]);
++	kunmap(slot->rmap_list_pool[pool_index]);
++}
++
++static inline int entry_is_new(struct rmap_list_entry *entry)
++{
++	return !entry->item;
++}
++
++static inline unsigned long get_index_orig_addr(struct vma_slot *slot,
++						unsigned long index)
++{
++	return slot->vma->vm_start + (index << PAGE_SHIFT);
++}
++
++static inline unsigned long get_entry_address(struct rmap_list_entry *entry)
++{
++	unsigned long addr;
++
++	if (is_addr(entry->addr))
++		addr = get_clean_addr(entry->addr);
++	else if (entry->item)
++		addr = get_rmap_addr(entry->item);
++	else
++		BUG();
++
++	return addr;
++}
++
++static inline struct rmap_item *get_entry_item(struct rmap_list_entry *entry)
++{
++	if (is_addr(entry->addr))
++		return NULL;
++
++	return entry->item;
++}
++
++static inline void inc_rmap_list_pool_count(struct vma_slot *slot,
++					    unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = get_pool_index(slot, index);
++	BUG_ON(!slot->rmap_list_pool[pool_index]);
++	slot->pool_counts[pool_index]++;
++}
++
++static inline void dec_rmap_list_pool_count(struct vma_slot *slot,
++					    unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = get_pool_index(slot, index);
++	BUG_ON(!slot->rmap_list_pool[pool_index]);
++	BUG_ON(!slot->pool_counts[pool_index]);
++	slot->pool_counts[pool_index]--;
++}
++
++static inline int entry_has_rmap(struct rmap_list_entry *entry)
++{
++	return !is_addr(entry->addr) && entry->item;
++}
++
++static inline void swap_entries(struct rmap_list_entry *entry1,
++				unsigned long index1,
++				struct rmap_list_entry *entry2,
++				unsigned long index2)
++{
++	struct rmap_list_entry tmp;
++
++	/* swapping two new entries is meaningless */
++	BUG_ON(entry_is_new(entry1) && entry_is_new(entry2));
++
++	tmp = *entry1;
++	*entry1 = *entry2;
++	*entry2 = tmp;
++
++	if (entry_has_rmap(entry1))
++		entry1->item->entry_index = index1;
++
++	if (entry_has_rmap(entry2))
++		entry2->item->entry_index = index2;
++
++	if (entry_has_rmap(entry1) && !entry_has_rmap(entry2)) {
++		inc_rmap_list_pool_count(entry1->item->slot, index1);
++		dec_rmap_list_pool_count(entry1->item->slot, index2);
++	} else if (!entry_has_rmap(entry1) && entry_has_rmap(entry2)) {
++		inc_rmap_list_pool_count(entry2->item->slot, index2);
++		dec_rmap_list_pool_count(entry2->item->slot, index1);
++	}
++}
++
++static inline void free_entry_item(struct rmap_list_entry *entry)
++{
++	unsigned long index;
++	struct rmap_item *item;
++
++	if (!is_addr(entry->addr)) {
++		BUG_ON(!entry->item);
++		item = entry->item;
++		entry->addr = get_rmap_addr(item);
++		set_is_addr(entry->addr);
++		index = item->entry_index;
++		remove_rmap_item_from_tree(item);
++		dec_rmap_list_pool_count(item->slot, index);
++		free_rmap_item(item);
++	}
++}
++
++static inline int pool_entry_boundary(unsigned long index)
++{
++	unsigned long linear_addr;
++
++	linear_addr = sizeof(struct rmap_list_entry *) * index;
++	return index && !offset_in_page(linear_addr);
++}
++
++static inline void try_free_last_pool(struct vma_slot *slot,
++				      unsigned long index)
++{
++	unsigned long pool_index;
++
++	pool_index = get_pool_index(slot, index);
++	if (slot->rmap_list_pool[pool_index] &&
++	    !slot->pool_counts[pool_index]) {
++		__free_page(slot->rmap_list_pool[pool_index]);
++		slot->rmap_list_pool[pool_index] = NULL;
++		slot->flags |= UKSM_SLOT_NEED_SORT;
++	}
++
++}
++
++static inline unsigned long vma_item_index(struct vm_area_struct *vma,
++					   struct rmap_item *item)
++{
++	return (get_rmap_addr(item) - vma->vm_start) >> PAGE_SHIFT;
++}
++
++static int within_same_pool(struct vma_slot *slot,
++			    unsigned long i, unsigned long j)
++{
++	unsigned long pool_i, pool_j;
++
++	pool_i = get_pool_index(slot, i);
++	pool_j = get_pool_index(slot, j);
++
++	return (pool_i == pool_j);
++}
++
++static void sort_rmap_entry_list(struct vma_slot *slot)
++{
++	unsigned long i, j;
++	struct rmap_list_entry *entry, *swap_entry;
++
++	entry = get_rmap_list_entry(slot, 0, 0);
++	for (i = 0; i < slot->pages; ) {
++
++		if (!entry)
++			goto skip_whole_pool;
++
++		if (entry_is_new(entry))
++			goto next_entry;
++
++		if (is_addr(entry->addr)) {
++			entry->addr = 0;
++			goto next_entry;
++		}
++
++		j = vma_item_index(slot->vma, entry->item);
++		if (j == i)
++			goto next_entry;
++
++		if (within_same_pool(slot, i, j))
++			swap_entry = entry + j - i;
++		else
++			swap_entry = get_rmap_list_entry(slot, j, 1);
++
++		swap_entries(entry, i, swap_entry, j);
++		if (!within_same_pool(slot, i, j))
++			put_rmap_list_entry(slot, j);
++		continue;
++
++skip_whole_pool:
++		i += PAGE_SIZE / sizeof(*entry);
++		if (i < slot->pages)
++			entry = get_rmap_list_entry(slot, i, 0);
++		continue;
++
++next_entry:
++		if (i >= slot->pages - 1 ||
++		    !within_same_pool(slot, i, i + 1)) {
++			put_rmap_list_entry(slot, i);
++			if (i + 1 < slot->pages)
++				entry = get_rmap_list_entry(slot, i + 1, 0);
++		} else
++			entry++;
++		i++;
++		continue;
++	}
++
++	/* free empty pool entries which contain no rmap_item */
++	/* CAN be simplied to based on only pool_counts when bug freed !!!!! */
++	for (i = 0; i < slot->pool_size; i++) {
++		unsigned char has_rmap;
++		void *addr;
++
++		if (!slot->rmap_list_pool[i])
++			continue;
++
++		has_rmap = 0;
++		addr = kmap(slot->rmap_list_pool[i]);
++		BUG_ON(!addr);
++		for (j = 0; j < PAGE_SIZE / sizeof(*entry); j++) {
++			entry = (struct rmap_list_entry *)addr + j;
++			if (is_addr(entry->addr))
++				continue;
++			if (!entry->item)
++				continue;
++			has_rmap = 1;
++		}
++		kunmap(slot->rmap_list_pool[i]);
++		if (!has_rmap) {
++			BUG_ON(slot->pool_counts[i]);
++			__free_page(slot->rmap_list_pool[i]);
++			slot->rmap_list_pool[i] = NULL;
++		}
++	}
++
++	slot->flags &= ~UKSM_SLOT_NEED_SORT;
++}
++
++/*
++ * vma_fully_scanned() - if all the pages in this slot have been scanned.
++ */
++static inline int vma_fully_scanned(struct vma_slot *slot)
++{
++	return slot->pages_scanned == slot->pages;
++}
++
++/**
++ * get_next_rmap_item() - Get the next rmap_item in a vma_slot according to
++ * its random permutation. This function is embedded with the random
++ * permutation index management code.
++ */
++static struct rmap_item *get_next_rmap_item(struct vma_slot *slot, u32 *hash)
++{
++	unsigned long rand_range, addr, swap_index, scan_index;
++	struct rmap_item *item = NULL;
++	struct rmap_list_entry *scan_entry, *swap_entry = NULL;
++	struct page *page;
++
++	scan_index = swap_index = slot->pages_scanned % slot->pages;
++
++	if (pool_entry_boundary(scan_index))
++		try_free_last_pool(slot, scan_index - 1);
++
++	if (vma_fully_scanned(slot)) {
++		if (slot->flags & UKSM_SLOT_NEED_SORT)
++			slot->flags |= UKSM_SLOT_NEED_RERAND;
++		else
++			slot->flags &= ~UKSM_SLOT_NEED_RERAND;
++		if (slot->flags & UKSM_SLOT_NEED_SORT)
++			sort_rmap_entry_list(slot);
++	}
++
++	scan_entry = get_rmap_list_entry(slot, scan_index, 1);
++	if (!scan_entry)
++		return NULL;
++
++	if (entry_is_new(scan_entry)) {
++		scan_entry->addr = get_index_orig_addr(slot, scan_index);
++		set_is_addr(scan_entry->addr);
++	}
++
++	if (slot->flags & UKSM_SLOT_NEED_RERAND) {
++		rand_range = slot->pages - scan_index;
++		BUG_ON(!rand_range);
++		swap_index = scan_index + (random32() % rand_range);
++	}
++
++	if (swap_index != scan_index) {
++		swap_entry = get_rmap_list_entry(slot, swap_index, 1);
++		if (entry_is_new(swap_entry)) {
++			swap_entry->addr = get_index_orig_addr(slot,
++							       swap_index);
++			set_is_addr(swap_entry->addr);
++		}
++		swap_entries(scan_entry, scan_index, swap_entry, swap_index);
++	}
++
++	addr = get_entry_address(scan_entry);
++	item = get_entry_item(scan_entry);
++	BUG_ON(addr > slot->vma->vm_end || addr < slot->vma->vm_start);
++
++	page = follow_page(slot->vma, addr, FOLL_GET);
++	if (IS_ERR_OR_NULL(page))
++		goto nopage;
++
++	if (!PageAnon(page) && !page_trans_compound_anon(page))
++		goto putpage;
++
++	/*check is zero_page pfn or uksm_zero_page*/
++	if ((page_to_pfn(page) == zero_pfn)
++			|| (page_to_pfn(page) == uksm_zero_pfn))
++		goto putpage;
++
++	flush_anon_page(slot->vma, page, addr);
++	flush_dcache_page(page);
++
++
++	*hash = page_hash(page, hash_strength, 1);
++	inc_uksm_pages_scanned();
++	/*if the page content all zero, re-map to zero-page*/
++	if (find_zero_page_hash(hash_strength, *hash)) {
++		if (!cmp_and_merge_zero_page(slot->vma, page)) {
++			slot->pages_merged++;
++			__inc_zone_page_state(page, NR_UKSM_ZERO_PAGES);
++			dec_mm_counter(slot->mm, MM_ANONPAGES);
++
++			/* For full-zero pages, no need to create rmap item */
++			goto putpage;
++		} else {
++			inc_rshash_neg(memcmp_cost / 2);
++		}
++	}
++
++	if (!item) {
++		item = alloc_rmap_item();
++		if (item) {
++			/* It has already been zeroed */
++			item->slot = slot;
++			item->address = addr;
++			item->entry_index = scan_index;
++			scan_entry->item = item;
++			inc_rmap_list_pool_count(slot, scan_index);
++		} else
++			goto putpage;
++	}
++
++	BUG_ON(item->slot != slot);
++	/* the page may have changed */
++	item->page = page;
++	put_rmap_list_entry(slot, scan_index);
++	if (swap_entry)
++		put_rmap_list_entry(slot, swap_index);
++	return item;
++
++putpage:
++	put_page(page);
++	page = NULL;
++nopage:
++	/* no page, store addr back and free rmap_item if possible */
++	free_entry_item(scan_entry);
++	put_rmap_list_entry(slot, scan_index);
++	if (swap_entry)
++		put_rmap_list_entry(slot, swap_index);
++	return NULL;
++}
++
++static inline int in_stable_tree(struct rmap_item *rmap_item)
++{
++	return rmap_item->address & STABLE_FLAG;
++}
++
++/**
++ * scan_vma_one_page() - scan the next page in a vma_slot. Called with
++ * mmap_sem locked.
++ */
++static noinline void scan_vma_one_page(struct vma_slot *slot)
++{
++	u32 hash;
++	struct mm_struct *mm;
++	struct rmap_item *rmap_item = NULL;
++	struct vm_area_struct *vma = slot->vma;
++
++	mm = vma->vm_mm;
++	BUG_ON(!mm);
++	BUG_ON(!slot);
++
++	rmap_item = get_next_rmap_item(slot, &hash);
++	if (!rmap_item)
++		goto out1;
++
++	if (PageKsm(rmap_item->page) && in_stable_tree(rmap_item))
++		goto out2;
++
++	cmp_and_merge_page(rmap_item, hash);
++out2:
++	put_page(rmap_item->page);
++out1:
++	slot->pages_scanned++;
++	if (slot->fully_scanned_round != fully_scanned_round)
++		scanned_virtual_pages++;
++
++	if (vma_fully_scanned(slot))
++		slot->fully_scanned_round = fully_scanned_round;
++}
++
++static inline unsigned long rung_get_pages(struct scan_rung *rung)
++{
++	struct slot_tree_node *node;
++
++	if (!rung->vma_root.rnode)
++		return 0;
++
++	node = container_of(rung->vma_root.rnode, struct slot_tree_node, snode);
++
++	return node->size;
++}
++
++#define RUNG_SAMPLED_MIN	3
++
++static inline
++void uksm_calc_rung_step(struct scan_rung *rung,
++			 unsigned long page_time, unsigned long ratio)
++{
++	unsigned long sampled, pages;
++
++	/* will be fully scanned ? */
++	if (!rung->cover_msecs) {
++		rung->step = 1;
++		return;
++	}
++
++	sampled = rung->cover_msecs * (NSEC_PER_MSEC / TIME_RATIO_SCALE)
++		  * ratio / page_time;
++
++	/*
++	 *  Before we finsish a scan round and expensive per-round jobs,
++	 *  we need to have a chance to estimate the per page time. So
++	 *  the sampled number can not be too small.
++	 */
++	if (sampled < RUNG_SAMPLED_MIN)
++		sampled = RUNG_SAMPLED_MIN;
++
++	pages = rung_get_pages(rung);
++	if (likely(pages > sampled))
++		rung->step = pages / sampled;
++	else
++		rung->step = 1;
++}
++
++static inline int step_need_recalc(struct scan_rung *rung)
++{
++	unsigned long pages, stepmax;
++
++	pages = rung_get_pages(rung);
++	stepmax = pages / RUNG_SAMPLED_MIN;
++
++	return pages && (rung->step > pages ||
++			 (stepmax && rung->step > stepmax));
++}
++
++static inline
++void reset_current_scan(struct scan_rung *rung, int finished, int step_recalc)
++{
++	struct vma_slot *slot;
++
++	if (finished)
++		rung->flags |= UKSM_RUNG_ROUND_FINISHED;
++
++	if (step_recalc || step_need_recalc(rung)) {
++		uksm_calc_rung_step(rung, uksm_ema_page_time, rung->cpu_ratio);
++		BUG_ON(step_need_recalc(rung));
++	}
++
++	slot_iter_index = random32() % rung->step;
++	BUG_ON(!rung->vma_root.rnode);
++	slot = sradix_tree_next(&rung->vma_root, NULL, 0, slot_iter);
++	BUG_ON(!slot);
++
++	rung->current_scan = slot;
++	rung->current_offset = slot_iter_index;
++}
++
++static inline struct sradix_tree_root *slot_get_root(struct vma_slot *slot)
++{
++	return &slot->rung->vma_root;
++}
++
++/*
++ * return if resetted.
++ */
++static int advance_current_scan(struct scan_rung *rung)
++{
++	unsigned short n;
++	struct vma_slot *slot, *next = NULL;
++
++	BUG_ON(!rung->vma_root.num);
++
++	slot = rung->current_scan;
++	n = (slot->pages - rung->current_offset) % rung->step;
++	slot_iter_index = rung->step - n;
++	next = sradix_tree_next(&rung->vma_root, slot->snode,
++				slot->sindex, slot_iter);
++
++	if (next) {
++		rung->current_offset = slot_iter_index;
++		rung->current_scan = next;
++		return 0;
++	} else {
++		reset_current_scan(rung, 1, 0);
++		return 1;
++	}
++}
++
++static inline void rung_rm_slot(struct vma_slot *slot)
++{
++	struct scan_rung *rung = slot->rung;
++	struct sradix_tree_root *root;
++
++	if (rung->current_scan == slot)
++		advance_current_scan(rung);
++
++	root = slot_get_root(slot);
++	sradix_tree_delete_from_leaf(root, slot->snode, slot->sindex);
++	slot->snode = NULL;
++	if (step_need_recalc(rung)) {
++		uksm_calc_rung_step(rung, uksm_ema_page_time, rung->cpu_ratio);
++		BUG_ON(step_need_recalc(rung));
++	}
++
++	/* In case advance_current_scan loop back to this slot again */
++	if (rung->vma_root.num && rung->current_scan == slot)
++		reset_current_scan(slot->rung, 1, 0);
++}
++
++static inline void rung_add_new_slots(struct scan_rung *rung,
++			struct vma_slot **slots, unsigned long num)
++{
++	int err;
++	struct vma_slot *slot;
++	unsigned long i;
++	struct sradix_tree_root *root = &rung->vma_root;
++
++	err = sradix_tree_enter(root, (void **)slots, num);
++	BUG_ON(err);
++
++	for (i = 0; i < num; i++) {
++		slot = slots[i];
++		slot->rung = rung;
++		BUG_ON(vma_fully_scanned(slot));
++	}
++
++	if (rung->vma_root.num == num)
++		reset_current_scan(rung, 0, 1);
++}
++
++static inline int rung_add_one_slot(struct scan_rung *rung,
++				     struct vma_slot *slot)
++{
++	int err;
++
++	err = sradix_tree_enter(&rung->vma_root, (void **)&slot, 1);
++	if (err)
++		return err;
++
++	slot->rung = rung;
++	if (rung->vma_root.num == 1)
++		reset_current_scan(rung, 0, 1);
++
++	return 0;
++}
++
++/*
++ * Return true if the slot is deleted from its rung.
++ */
++static inline int vma_rung_enter(struct vma_slot *slot, struct scan_rung *rung)
++{
++	struct scan_rung *old_rung = slot->rung;
++	int err;
++
++	if (old_rung == rung)
++		return 0;
++
++	rung_rm_slot(slot);
++	err = rung_add_one_slot(rung, slot);
++	if (err) {
++		err = rung_add_one_slot(old_rung, slot);
++		WARN_ON(err); /* OOPS, badly OOM, we lost this slot */
++	}
++
++	return 1;
++}
++
++static inline int vma_rung_up(struct vma_slot *slot)
++{
++	struct scan_rung *rung;
++
++	rung = slot->rung;
++	if (slot->rung != &uksm_scan_ladder[SCAN_LADDER_SIZE-1])
++		rung++;
++
++	return vma_rung_enter(slot, rung);
++}
++
++static inline int vma_rung_down(struct vma_slot *slot)
++{
++	struct scan_rung *rung;
++
++	rung = slot->rung;
++	if (slot->rung != &uksm_scan_ladder[0])
++		rung--;
++
++	return vma_rung_enter(slot, rung);
++}
++
++/**
++ * cal_dedup_ratio() - Calculate the deduplication ratio for this slot.
++ */
++static unsigned long cal_dedup_ratio(struct vma_slot *slot)
++{
++	unsigned long ret;
++
++	BUG_ON(slot->pages_scanned == slot->last_scanned);
++
++	ret = slot->pages_merged;
++
++	/* Thrashing area filtering */
++	if (ret && uksm_thrash_threshold) {
++		if (slot->pages_cowed * 100 / slot->pages_merged
++		    > uksm_thrash_threshold) {
++			ret = 0;
++		} else {
++			ret = slot->pages_merged - slot->pages_cowed;
++		}
++	}
++
++	return ret;
++}
++
++/**
++ * cal_dedup_ratio() - Calculate the deduplication ratio for this slot.
++ */
++static unsigned long cal_dedup_ratio_old(struct vma_slot *slot)
++{
++	unsigned long ret;
++	unsigned long pages_scanned;
++
++	pages_scanned = slot->pages_scanned;
++	if (!pages_scanned) {
++		if (uksm_thrash_threshold)
++			return 0;
++		else
++			pages_scanned = slot->pages_scanned;
++	}
++
++	ret = slot->pages_bemerged * 100 / pages_scanned;
++
++	/* Thrashing area filtering */
++	if (ret && uksm_thrash_threshold) {
++		if (slot->pages_cowed * 100 / slot->pages_bemerged
++		    > uksm_thrash_threshold) {
++			ret = 0;
++		} else {
++			ret = slot->pages_bemerged - slot->pages_cowed;
++		}
++	}
++
++	return ret;
++}
++
++/**
++ * stable_node_reinsert() - When the hash_strength has been adjusted, the
++ * stable tree need to be restructured, this is the function re-inserting the
++ * stable node.
++ */
++static inline void stable_node_reinsert(struct stable_node *new_node,
++					struct page *page,
++					struct rb_root *root_treep,
++					struct list_head *tree_node_listp,
++					u32 hash)
++{
++	struct rb_node **new = &root_treep->rb_node;
++	struct rb_node *parent = NULL;
++	struct stable_node *stable_node;
++	struct tree_node *tree_node;
++	struct page *tree_page;
++	int cmp;
++
++	while (*new) {
++		int cmp;
++
++		tree_node = rb_entry(*new, struct tree_node, node);
++
++		cmp = hash_cmp(hash, tree_node->hash);
++
++		if (cmp < 0) {
++			parent = *new;
++			new = &parent->rb_left;
++		} else if (cmp > 0) {
++			parent = *new;
++			new = &parent->rb_right;
++		} else
++			break;
++	}
++
++	if (*new) {
++		/* find a stable tree node with same first level hash value */
++		stable_node_hash_max(new_node, page, hash);
++		if (tree_node->count == 1) {
++			stable_node = rb_entry(tree_node->sub_root.rb_node,
++					       struct stable_node, node);
++			tree_page = get_uksm_page(stable_node, 1, 0);
++			if (tree_page) {
++				stable_node_hash_max(stable_node,
++						      tree_page, hash);
++				put_page(tree_page);
++
++				/* prepare for stable node insertion */
++
++				cmp = hash_cmp(new_node->hash_max,
++						   stable_node->hash_max);
++				parent = &stable_node->node;
++				if (cmp < 0)
++					new = &parent->rb_left;
++				else if (cmp > 0)
++					new = &parent->rb_right;
++				else
++					goto failed;
++
++				goto add_node;
++			} else {
++				/* the only stable_node deleted, the tree node
++				 * was not deleted.
++				 */
++				goto tree_node_reuse;
++			}
++		}
++
++		/* well, search the collision subtree */
++		new = &tree_node->sub_root.rb_node;
++		parent = NULL;
++		BUG_ON(!*new);
++		while (*new) {
++			int cmp;
++
++			stable_node = rb_entry(*new, struct stable_node, node);
++
++			cmp = hash_cmp(new_node->hash_max,
++					   stable_node->hash_max);
++
++			if (cmp < 0) {
++				parent = *new;
++				new = &parent->rb_left;
++			} else if (cmp > 0) {
++				parent = *new;
++				new = &parent->rb_right;
++			} else {
++				/* oh, no, still a collision */
++				goto failed;
++			}
++		}
++
++		goto add_node;
++	}
++
++	/* no tree node found */
++	tree_node = alloc_tree_node(tree_node_listp);
++	if (!tree_node) {
++		printk(KERN_ERR "UKSM: memory allocation error!\n");
++		goto failed;
++	} else {
++		tree_node->hash = hash;
++		rb_link_node(&tree_node->node, parent, new);
++		rb_insert_color(&tree_node->node, root_treep);
++
++tree_node_reuse:
++		/* prepare for stable node insertion */
++		parent = NULL;
++		new = &tree_node->sub_root.rb_node;
++	}
++
++add_node:
++	rb_link_node(&new_node->node, parent, new);
++	rb_insert_color(&new_node->node, &tree_node->sub_root);
++	new_node->tree_node = tree_node;
++	tree_node->count++;
++	return;
++
++failed:
++	/* This can only happen when two nodes have collided
++	 * in two levels.
++	 */
++	new_node->tree_node = NULL;
++	return;
++}
++
++static inline void free_all_tree_nodes(struct list_head *list)
++{
++	struct tree_node *node, *tmp;
++
++	list_for_each_entry_safe(node, tmp, list, all_list) {
++		free_tree_node(node);
++	}
++}
++
++/**
++ * stable_tree_delta_hash() - Delta hash the stable tree from previous hash
++ * strength to the current hash_strength. It re-structures the hole tree.
++ */
++static inline void stable_tree_delta_hash(u32 prev_hash_strength)
++{
++	struct stable_node *node, *tmp;
++	struct rb_root *root_new_treep;
++	struct list_head *new_tree_node_listp;
++
++	stable_tree_index = (stable_tree_index + 1) % 2;
++	root_new_treep = &root_stable_tree[stable_tree_index];
++	new_tree_node_listp = &stable_tree_node_list[stable_tree_index];
++	*root_new_treep = RB_ROOT;
++	BUG_ON(!list_empty(new_tree_node_listp));
++
++	/*
++	 * we need to be safe, the node could be removed by get_uksm_page()
++	 */
++	list_for_each_entry_safe(node, tmp, &stable_node_list, all_list) {
++		void *addr;
++		struct page *node_page;
++		u32 hash;
++
++		/*
++		 * We are completely re-structuring the stable nodes to a new
++		 * stable tree. We don't want to touch the old tree unlinks and
++		 * old tree_nodes. The old tree_nodes will be freed at once.
++		 */
++		node_page = get_uksm_page(node, 0, 0);
++		if (!node_page)
++			continue;
++
++		if (node->tree_node) {
++			hash = node->tree_node->hash;
++
++			addr = kmap_atomic(node_page, KM_USER0);
++
++			hash = delta_hash(addr, prev_hash_strength,
++					  hash_strength, hash);
++			kunmap_atomic(addr, KM_USER0);
++		} else {
++			/*
++			 *it was not inserted to rbtree due to collision in last
++			 *round scan.
++			 */
++			hash = page_hash(node_page, hash_strength, 0);
++		}
++
++		stable_node_reinsert(node, node_page, root_new_treep,
++				     new_tree_node_listp, hash);
++		put_page(node_page);
++	}
++
++	root_stable_treep = root_new_treep;
++	free_all_tree_nodes(stable_tree_node_listp);
++	BUG_ON(!list_empty(stable_tree_node_listp));
++	stable_tree_node_listp = new_tree_node_listp;
++}
++
++static inline void inc_hash_strength(unsigned long delta)
++{
++	hash_strength += 1 << delta;
++	if (hash_strength > HASH_STRENGTH_MAX)
++		hash_strength = HASH_STRENGTH_MAX;
++}
++
++static inline void dec_hash_strength(unsigned long delta)
++{
++	unsigned long change = 1 << delta;
++
++	if (hash_strength <= change + 1)
++		hash_strength = 1;
++	else
++		hash_strength -= change;
++}
++
++static inline void inc_hash_strength_delta(void)
++{
++	hash_strength_delta++;
++	if (hash_strength_delta > HASH_STRENGTH_DELTA_MAX)
++		hash_strength_delta = HASH_STRENGTH_DELTA_MAX;
++}
++
++/*
++static inline unsigned long get_current_neg_ratio(void)
++{
++	if (!rshash_pos || rshash_neg > rshash_pos)
++		return 100;
++
++	return div64_u64(100 * rshash_neg , rshash_pos);
++}
++*/
++
++static inline unsigned long get_current_neg_ratio(void)
++{
++	u64 pos = benefit.pos;
++	u64 neg = benefit.neg;
++
++	if (!neg)
++		return 0;
++
++	if (!pos || neg > pos)
++		return 100;
++
++	if (neg > div64_u64(U64_MAX, 100))
++		pos = div64_u64(pos, 100);
++	else
++		neg *= 100;
++
++	return div64_u64(neg, pos);
++}
++
++static inline unsigned long get_current_benefit(void)
++{
++	u64 pos = benefit.pos;
++	u64 neg = benefit.neg;
++	u64 scanned = benefit.scanned;
++
++	if (neg > pos)
++		return 0;
++
++	return div64_u64((pos - neg), scanned);
++}
++
++static inline int judge_rshash_direction(void)
++{
++	u64 current_neg_ratio, stable_benefit;
++	u64 current_benefit, delta = 0;
++	int ret = STILL;
++
++	/* Try to probe a value after the boot, and in case the system
++	   are still for a long time. */
++	if ((fully_scanned_round & 0xFFULL) == 10) {
++		ret = OBSCURE;
++		goto out;
++	}
++
++	current_neg_ratio = get_current_neg_ratio();
++
++	if (current_neg_ratio == 0) {
++		rshash_neg_cont_zero++;
++		if (rshash_neg_cont_zero > 2)
++			return GO_DOWN;
++		else
++			return STILL;
++	}
++	rshash_neg_cont_zero = 0;
++
++	if (current_neg_ratio > 90) {
++		ret = GO_UP;
++		goto out;
++	}
++
++	current_benefit = get_current_benefit();
++	stable_benefit = rshash_state.stable_benefit;
++
++	if (!stable_benefit) {
++		ret = OBSCURE;
++		goto out;
++	}
++
++	if (current_benefit > stable_benefit)
++		delta = current_benefit - stable_benefit;
++	else if (current_benefit < stable_benefit)
++		delta = stable_benefit - current_benefit;
++
++	delta = div64_u64(100 * delta , stable_benefit);
++
++	if (delta > 50) {
++		rshash_cont_obscure++;
++		if (rshash_cont_obscure > 2)
++			return OBSCURE;
++		else
++			return STILL;
++	}
++
++out:
++	rshash_cont_obscure = 0;
++	return ret;
++}
++
++/**
++ * rshash_adjust() - The main function to control the random sampling state
++ * machine for hash strength adapting.
++ *
++ * return true if hash_strength has changed.
++ */
++static inline int rshash_adjust(void)
++{
++	unsigned long prev_hash_strength = hash_strength;
++
++	if (!encode_benefit())
++		return 0;
++
++	switch (rshash_state.state) {
++	case RSHASH_STILL:
++		switch (judge_rshash_direction()) {
++		case GO_UP:
++			if (rshash_state.pre_direct == GO_DOWN)
++				hash_strength_delta = 0;
++
++			inc_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++			rshash_state.stable_benefit = get_current_benefit();
++			rshash_state.pre_direct = GO_UP;
++			break;
++
++		case GO_DOWN:
++			if (rshash_state.pre_direct == GO_UP)
++				hash_strength_delta = 0;
++
++			dec_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++			rshash_state.stable_benefit = get_current_benefit();
++			rshash_state.pre_direct = GO_DOWN;
++			break;
++
++		case OBSCURE:
++			rshash_state.stable_point = hash_strength;
++			rshash_state.turn_point_down = hash_strength;
++			rshash_state.turn_point_up = hash_strength;
++			rshash_state.turn_benefit_down = get_current_benefit();
++			rshash_state.turn_benefit_up = get_current_benefit();
++			rshash_state.lookup_window_index = 0;
++			rshash_state.state = RSHASH_TRYDOWN;
++			dec_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++			break;
++
++		case STILL:
++			break;
++		default:
++			BUG();
++		}
++		break;
++
++	case RSHASH_TRYDOWN:
++		if (rshash_state.lookup_window_index++ % 5 == 0)
++			rshash_state.below_count = 0;
++
++		if (get_current_benefit() < rshash_state.stable_benefit)
++			rshash_state.below_count++;
++		else if (get_current_benefit() >
++			 rshash_state.turn_benefit_down) {
++			rshash_state.turn_point_down = hash_strength;
++			rshash_state.turn_benefit_down = get_current_benefit();
++		}
++
++		if (rshash_state.below_count >= 3 ||
++		    judge_rshash_direction() == GO_UP ||
++		    hash_strength == 1) {
++			hash_strength = rshash_state.stable_point;
++			hash_strength_delta = 0;
++			inc_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++			rshash_state.lookup_window_index = 0;
++			rshash_state.state = RSHASH_TRYUP;
++			hash_strength_delta = 0;
++		} else {
++			dec_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++		}
++		break;
++
++	case RSHASH_TRYUP:
++		if (rshash_state.lookup_window_index++ % 5 == 0)
++			rshash_state.below_count = 0;
++
++		if (get_current_benefit() < rshash_state.turn_benefit_down)
++			rshash_state.below_count++;
++		else if (get_current_benefit() > rshash_state.turn_benefit_up) {
++			rshash_state.turn_point_up = hash_strength;
++			rshash_state.turn_benefit_up = get_current_benefit();
++		}
++
++		if (rshash_state.below_count >= 3 ||
++		    judge_rshash_direction() == GO_DOWN ||
++		    hash_strength == HASH_STRENGTH_MAX) {
++			hash_strength = rshash_state.turn_benefit_up >
++				rshash_state.turn_benefit_down ?
++				rshash_state.turn_point_up :
++				rshash_state.turn_point_down;
++
++			rshash_state.state = RSHASH_PRE_STILL;
++		} else {
++			inc_hash_strength(hash_strength_delta);
++			inc_hash_strength_delta();
++		}
++
++		break;
++
++	case RSHASH_NEW:
++	case RSHASH_PRE_STILL:
++		rshash_state.stable_benefit = get_current_benefit();
++		rshash_state.state = RSHASH_STILL;
++		hash_strength_delta = 0;
++		break;
++	default:
++		BUG();
++	}
++
++	/* rshash_neg = rshash_pos = 0; */
++	reset_benefit();
++
++	if (prev_hash_strength != hash_strength)
++		stable_tree_delta_hash(prev_hash_strength);
++
++	return prev_hash_strength != hash_strength;
++}
++
++/**
++ * round_update_ladder() - The main function to do update of all the
++ * adjustments whenever a scan round is finished.
++ */
++static noinline void round_update_ladder(void)
++{
++	int i;
++	unsigned long dedup;
++	struct vma_slot *slot, *tmp_slot;
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		uksm_scan_ladder[i].flags &= ~UKSM_RUNG_ROUND_FINISHED;
++	}
++
++	list_for_each_entry_safe(slot, tmp_slot, &vma_slot_dedup, dedup_list) {
++
++		/* slot may be rung_rm_slot() when mm exits */
++		if (slot->snode) {
++			dedup = cal_dedup_ratio_old(slot);
++			if (dedup && dedup >= uksm_abundant_threshold)
++				vma_rung_up(slot);
++		}
++
++		slot->pages_bemerged = 0;
++		slot->pages_cowed = 0;
++
++		list_del_init(&slot->dedup_list);
++	}
++}
++
++static void uksm_del_vma_slot(struct vma_slot *slot)
++{
++	int i, j;
++	struct rmap_list_entry *entry;
++
++	if (slot->snode) {
++		/*
++		 * In case it just failed when entering the rung, it's not
++		 * necessary.
++		 */
++		rung_rm_slot(slot);
++	}
++
++	if (!list_empty(&slot->dedup_list))
++		list_del(&slot->dedup_list);
++
++	if (!slot->rmap_list_pool || !slot->pool_counts) {
++		/* In case it OOMed in uksm_vma_enter() */
++		goto out;
++	}
++
++	for (i = 0; i < slot->pool_size; i++) {
++		void *addr;
++
++		if (!slot->rmap_list_pool[i])
++			continue;
++
++		addr = kmap(slot->rmap_list_pool[i]);
++		for (j = 0; j < PAGE_SIZE / sizeof(*entry); j++) {
++			entry = (struct rmap_list_entry *)addr + j;
++			if (is_addr(entry->addr))
++				continue;
++			if (!entry->item)
++				continue;
++
++			remove_rmap_item_from_tree(entry->item);
++			free_rmap_item(entry->item);
++			slot->pool_counts[i]--;
++		}
++		BUG_ON(slot->pool_counts[i]);
++		kunmap(slot->rmap_list_pool[i]);
++		__free_page(slot->rmap_list_pool[i]);
++	}
++	kfree(slot->rmap_list_pool);
++	kfree(slot->pool_counts);
++
++out:
++	slot->rung = NULL;
++	BUG_ON(uksm_pages_total < slot->pages);
++	if (slot->flags & UKSM_SLOT_IN_UKSM)
++		uksm_pages_total -= slot->pages;
++
++	if (slot->fully_scanned_round == fully_scanned_round)
++		scanned_virtual_pages -= slot->pages;
++	else
++		scanned_virtual_pages -= slot->pages_scanned;
++	free_vma_slot(slot);
++}
++
++
++#define SPIN_LOCK_PERIOD	32
++static struct vma_slot *cleanup_slots[SPIN_LOCK_PERIOD];
++static inline void cleanup_vma_slots(void)
++{
++	struct vma_slot *slot;
++	int i;
++
++	i = 0;
++	spin_lock(&vma_slot_list_lock);
++	while (!list_empty(&vma_slot_del)) {
++		slot = list_entry(vma_slot_del.next,
++				  struct vma_slot, slot_list);
++		list_del(&slot->slot_list);
++		cleanup_slots[i++] = slot;
++		if (i == SPIN_LOCK_PERIOD) {
++			spin_unlock(&vma_slot_list_lock);
++			while (--i >= 0)
++				uksm_del_vma_slot(cleanup_slots[i]);
++			i = 0;
++			spin_lock(&vma_slot_list_lock);
++		}
++	}
++	spin_unlock(&vma_slot_list_lock);
++
++	while (--i >= 0)
++		uksm_del_vma_slot(cleanup_slots[i]);
++}
++
++/*
++*expotional moving average formula
++*/
++static inline unsigned long ema(unsigned long curr, unsigned long last_ema)
++{
++	/*
++	 * For a very high burst, even the ema cannot work well, a false very
++	 * high per-page time estimation can result in feedback in very high
++	 * overhead of context swith and rung update -- this will then lead
++	 * to higher per-paper time, this may not converge.
++	 *
++	 * Instead, we try to approach this value in a binary manner.
++	 */
++	if (curr > last_ema * 10)
++		return last_ema * 2;
++
++	return (EMA_ALPHA * curr + (100 - EMA_ALPHA) * last_ema) / 100;
++}
++
++/*
++ * convert cpu ratio in 1/TIME_RATIO_SCALE configured by user to
++ * nanoseconds based on current uksm_sleep_jiffies.
++ */
++static inline unsigned long cpu_ratio_to_nsec(unsigned int ratio)
++{
++	return NSEC_PER_USEC * jiffies_to_usecs(uksm_sleep_jiffies) /
++		(TIME_RATIO_SCALE - ratio) * ratio;
++}
++
++
++static inline unsigned long rung_real_ratio(int cpu_time_ratio)
++{
++	unsigned long ret;
++
++	BUG_ON(!cpu_time_ratio);
++
++	if (cpu_time_ratio > 0)
++		ret = cpu_time_ratio;
++	else
++		ret = (unsigned long)(-cpu_time_ratio) *
++			uksm_max_cpu_percentage / 100UL;
++
++	return ret ? ret : 1;
++}
++
++static noinline void uksm_calc_scan_pages(void)
++{
++	struct scan_rung *ladder = uksm_scan_ladder;
++	unsigned long sleep_usecs, nsecs;
++	unsigned long ratio;
++	int i;
++	unsigned long per_page;
++
++	if (uksm_ema_page_time > 100000 ||
++	    (((unsigned long) uksm_eval_round & (256UL - 1)) == 0UL))
++		uksm_ema_page_time = UKSM_PAGE_TIME_DEFAULT;
++
++	per_page = uksm_ema_page_time;
++	BUG_ON(!per_page);
++
++	/*
++	 * For every 8 eval round, we try to probe a uksm_sleep_jiffies value
++	 * based on saved user input.
++	 */
++	if (((unsigned long) uksm_eval_round & (8UL - 1)) == 0UL)
++		uksm_sleep_jiffies = uksm_sleep_saved;
++
++	/* We require a rung scan at least 1 page in a period. */
++	nsecs = per_page;
++	ratio = rung_real_ratio(ladder[0].cpu_ratio);
++	if (cpu_ratio_to_nsec(ratio) < nsecs) {
++		sleep_usecs = nsecs * (TIME_RATIO_SCALE - ratio) / ratio
++				/ NSEC_PER_USEC;
++		uksm_sleep_jiffies = usecs_to_jiffies(sleep_usecs) + 1;
++	}
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		ratio = rung_real_ratio(ladder[i].cpu_ratio);
++		ladder[i].pages_to_scan = cpu_ratio_to_nsec(ratio) /
++					per_page;
++		BUG_ON(!ladder[i].pages_to_scan);
++		uksm_calc_rung_step(&ladder[i], per_page, ratio);
++	}
++}
++
++/*
++ * From the scan time of this round (ns) to next expected min sleep time
++ * (ms), be careful of the possible overflows. ratio is taken from
++ * rung_real_ratio()
++ */
++static inline
++unsigned int scan_time_to_sleep(unsigned long long scan_time, unsigned long ratio)
++{
++	scan_time >>= 20; /* to msec level now */
++	BUG_ON(scan_time > (ULONG_MAX / TIME_RATIO_SCALE));
++
++	return (unsigned int) ((unsigned long) scan_time *
++			       (TIME_RATIO_SCALE - ratio) / ratio);
++}
++
++#define __round_mask(x, y) ((__typeof__(x))((y)-1))
++#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
++
++static inline unsigned long vma_pool_size(struct vma_slot *slot)
++{
++	return round_up(sizeof(struct rmap_list_entry) * slot->pages,
++			PAGE_SIZE) >> PAGE_SHIFT;
++}
++
++static void uksm_vma_enter(struct vma_slot **slots, unsigned long num)
++{
++	struct scan_rung *rung;
++	unsigned long pool_size, i;
++	struct vma_slot *slot;
++	int failed;
++
++	rung = &uksm_scan_ladder[0];
++
++	failed = 0;
++	for (i = 0; i < num; i++) {
++		slot = slots[i];
++
++		pool_size = vma_pool_size(slot);
++		slot->rmap_list_pool = kzalloc(sizeof(struct page *) *
++					       pool_size, GFP_KERNEL);
++		if (!slot->rmap_list_pool)
++			break;
++
++		slot->pool_counts = kzalloc(sizeof(unsigned int) * pool_size,
++					    GFP_KERNEL);
++		if (!slot->pool_counts) {
++			kfree(slot->rmap_list_pool);
++			break;
++		}
++
++		slot->pool_size = pool_size;
++		BUG_ON(CAN_OVERFLOW_U64(uksm_pages_total, slot->pages));
++		slot->flags |= UKSM_SLOT_IN_UKSM;
++		uksm_pages_total += slot->pages;
++	}
++
++	if (i)
++		rung_add_new_slots(rung, slots, i);
++
++	return;
++}
++
++static struct vma_slot *batch_slots[SLOT_TREE_NODE_STORE_SIZE];
++
++static void uksm_enter_all_slots(void)
++{
++	struct vma_slot *slot;
++	unsigned long index;
++	struct list_head empty_vma_list;
++	int i;
++
++	i = 0;
++	index = 0;
++	INIT_LIST_HEAD(&empty_vma_list);
++
++	spin_lock(&vma_slot_list_lock);
++	while (!list_empty(&vma_slot_new)) {
++		slot = list_entry(vma_slot_new.next,
++				  struct vma_slot, slot_list);
++
++		if (!slot->vma->anon_vma) {
++			list_move(&slot->slot_list, &empty_vma_list);
++		} else if (vma_can_enter(slot->vma)) {
++			batch_slots[index++] = slot;
++			list_del_init(&slot->slot_list);
++		} else {
++			list_move(&slot->slot_list, &vma_slot_noadd);
++		}
++
++		if (++i == SPIN_LOCK_PERIOD ||
++		    (index && !(index % SLOT_TREE_NODE_STORE_SIZE))) {
++			spin_unlock(&vma_slot_list_lock);
++
++			if (index && !(index % SLOT_TREE_NODE_STORE_SIZE)) {
++				uksm_vma_enter(batch_slots, index);
++				index = 0;
++			}
++			i = 0;
++			cond_resched();
++			spin_lock(&vma_slot_list_lock);
++		}
++	}
++
++	list_splice(&empty_vma_list, &vma_slot_new);
++
++	spin_unlock(&vma_slot_list_lock);
++
++	if (index)
++		uksm_vma_enter(batch_slots, index);
++
++}
++
++static inline int rung_round_finished(struct scan_rung *rung)
++{
++	return rung->flags & UKSM_RUNG_ROUND_FINISHED;
++}
++
++static inline void judge_slot(struct vma_slot *slot)
++{
++	struct scan_rung *rung = slot->rung;
++	unsigned long dedup;
++	int deleted;
++
++	dedup = cal_dedup_ratio(slot);
++	if (vma_fully_scanned(slot) && uksm_thrash_threshold)
++		deleted = vma_rung_enter(slot, &uksm_scan_ladder[0]);
++	else if (dedup && dedup >= uksm_abundant_threshold)
++		deleted = vma_rung_up(slot);
++	else
++		deleted = vma_rung_down(slot);
++
++	slot->pages_merged = 0;
++	slot->pages_cowed = 0;
++
++	if (vma_fully_scanned(slot))
++		slot->pages_scanned = 0;
++
++	slot->last_scanned = slot->pages_scanned;
++
++	/* If its deleted in above, then rung was already advanced. */
++	if (!deleted)
++		advance_current_scan(rung);
++}
++
++
++static inline int hash_round_finished(void)
++{
++	if (scanned_virtual_pages > (uksm_pages_total >> 2)) {
++		scanned_virtual_pages = 0;
++		if (uksm_pages_scanned)
++			fully_scanned_round++;
++
++		return 1;
++	} else {
++		return 0;
++	}
++}
++
++#define UKSM_MMSEM_BATCH	5
++/**
++ * uksm_do_scan()  - the main worker function.
++ */
++static noinline void uksm_do_scan(void)
++{
++	struct vma_slot *slot, *iter;
++	struct mm_struct *busy_mm;
++	unsigned char round_finished, all_rungs_emtpy;
++	int i, err, mmsem_batch;
++	unsigned long pcost;
++	long long delta_exec;
++	unsigned long vpages, max_cpu_ratio;
++	unsigned long long start_time, end_time, scan_time;
++	unsigned int expected_jiffies;
++
++	might_sleep();
++
++	vpages = 0;
++
++	start_time = task_sched_runtime(current);
++	max_cpu_ratio = 0;
++	mmsem_batch = 0;
++
++	for (i = 0; i < SCAN_LADDER_SIZE;) {
++		struct scan_rung *rung = &uksm_scan_ladder[i];
++		unsigned long ratio;
++
++		if (!rung->pages_to_scan) {
++			i++;
++			continue;
++		}
++
++		if (!rung->vma_root.num) {
++			rung->pages_to_scan = 0;
++			i++;
++			continue;
++		}
++
++		ratio = rung_real_ratio(rung->cpu_ratio);
++		if (ratio > max_cpu_ratio)
++			max_cpu_ratio = ratio;
++
++		/*
++		 * Do not consider rung_round_finished() here, just used up the
++		 * rung->pages_to_scan quota.
++		 */
++		while (rung->pages_to_scan && rung->vma_root.num &&
++		       likely(!freezing(current))) {
++			int reset = 0;
++
++			slot = rung->current_scan;
++
++			BUG_ON(vma_fully_scanned(slot));
++
++			if (mmsem_batch) {
++				err = 0;
++			} else {
++				err = try_down_read_slot_mmap_sem(slot);
++			}
++
++			if (err == -ENOENT) {
++rm_slot:
++				rung_rm_slot(slot);
++				continue;
++			}
++
++			busy_mm = slot->mm;
++
++			if (err == -EBUSY) {
++				/* skip other vmas on the same mm */
++				do {
++					reset = advance_current_scan(rung);
++					iter = rung->current_scan;
++					if (iter->vma->vm_mm != busy_mm)
++						break;
++				} while (!reset);
++
++				if (iter->vma->vm_mm != busy_mm) {
++					continue;
++				} else {
++					/* scan round finsished */
++					break;
++				}
++			}
++
++			BUG_ON(!vma_can_enter(slot->vma));
++			if (uksm_test_exit(slot->vma->vm_mm)) {
++				mmsem_batch = 0;
++				up_read(&slot->vma->vm_mm->mmap_sem);
++				goto rm_slot;
++			}
++
++			if (mmsem_batch)
++				mmsem_batch--;
++			else
++				mmsem_batch = UKSM_MMSEM_BATCH;
++
++			/* Ok, we have take the mmap_sem, ready to scan */
++			scan_vma_one_page(slot);
++			rung->pages_to_scan--;
++			vpages++;
++
++			if (rung->current_offset + rung->step > slot->pages - 1
++			    || vma_fully_scanned(slot)) {
++				up_read(&slot->vma->vm_mm->mmap_sem);
++				judge_slot(slot);
++				mmsem_batch = 0;
++			} else {
++				rung->current_offset += rung->step;
++				if (!mmsem_batch)
++					up_read(&slot->vma->vm_mm->mmap_sem);
++			}
++
++			cond_resched();
++		}
++
++		if (mmsem_batch) {
++			up_read(&slot->vma->vm_mm->mmap_sem);
++			mmsem_batch = 0;
++		}
++
++		if (freezing(current))
++			break;
++
++		cond_resched();
++	}
++	end_time = task_sched_runtime(current);
++	delta_exec = end_time - start_time;
++
++	if (freezing(current))
++		return;
++
++	cleanup_vma_slots();
++	uksm_enter_all_slots();
++
++	round_finished = 1;
++	all_rungs_emtpy = 1;
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		struct scan_rung *rung = &uksm_scan_ladder[i];
++
++		if (rung->vma_root.num) {
++			all_rungs_emtpy = 0;
++			if (!rung_round_finished(rung))
++				round_finished = 0;
++		}
++	}
++
++	if (all_rungs_emtpy)
++		round_finished = 0;
++
++	if (round_finished) {
++		round_update_ladder();
++		uksm_eval_round++;
++
++		if (hash_round_finished() && rshash_adjust()) {
++			/* Reset the unstable root iff hash strength changed */
++			uksm_hash_round++;
++			root_unstable_tree = RB_ROOT;
++			free_all_tree_nodes(&unstable_tree_node_list);
++		}
++
++		/*
++		 * A number of pages can hang around indefinitely on per-cpu
++		 * pagevecs, raised page count preventing write_protect_page
++		 * from merging them.  Though it doesn't really matter much,
++		 * it is puzzling to see some stuck in pages_volatile until
++		 * other activity jostles them out, and they also prevented
++		 * LTP's KSM test from succeeding deterministically; so drain
++		 * them here (here rather than on entry to uksm_do_scan(),
++		 * so we don't IPI too often when pages_to_scan is set low).
++		 */
++		lru_add_drain_all();
++	}
++
++
++	if (vpages && delta_exec > 0) {
++		pcost = (unsigned long) delta_exec / vpages;
++		if (likely(uksm_ema_page_time))
++			uksm_ema_page_time = ema(pcost, uksm_ema_page_time);
++		else
++			uksm_ema_page_time = pcost;
++	}
++
++	uksm_calc_scan_pages();
++	uksm_sleep_real = uksm_sleep_jiffies;
++	/* in case of radical cpu bursts, apply the upper bound */
++	end_time = task_sched_runtime(current);
++	if (max_cpu_ratio && end_time > start_time) {
++		scan_time = end_time - start_time;
++		expected_jiffies = msecs_to_jiffies(
++			scan_time_to_sleep(scan_time, max_cpu_ratio));
++
++		if (expected_jiffies > uksm_sleep_real)
++			uksm_sleep_real = expected_jiffies;
++
++		/* We have a 1 second up bound for responsiveness. */
++		if (jiffies_to_msecs(uksm_sleep_real) > MSEC_PER_SEC)
++			uksm_sleep_real = msecs_to_jiffies(1000);
++	}
++
++	return;
++}
++
++static int ksmd_should_run(void)
++{
++	return uksm_run & UKSM_RUN_MERGE;
++}
++
++static int uksm_scan_thread(void *nothing)
++{
++	set_freezable();
++	set_user_nice(current, 5);
++
++	while (!kthread_should_stop()) {
++		mutex_lock(&uksm_thread_mutex);
++		if (ksmd_should_run()) {
++			uksm_do_scan();
++		}
++		mutex_unlock(&uksm_thread_mutex);
++
++		try_to_freeze();
++
++		if (ksmd_should_run()) {
++			schedule_timeout_interruptible(uksm_sleep_real);
++			uksm_sleep_times++;
++		} else {
++			wait_event_freezable(uksm_thread_wait,
++				ksmd_should_run() || kthread_should_stop());
++		}
++	}
++	return 0;
++}
++
++int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
++			unsigned long *vm_flags)
++{
++	struct stable_node *stable_node;
++	struct node_vma *node_vma;
++	struct rmap_item *rmap_item;
++	struct hlist_node *hlist, *rmap_hlist;
++	unsigned int mapcount = page_mapcount(page);
++	int referenced = 0;
++	int search_new_forks = 0;
++	unsigned long address;
++
++	VM_BUG_ON(!PageKsm(page));
++	VM_BUG_ON(!PageLocked(page));
++
++	stable_node = page_stable_node(page);
++	if (!stable_node)
++		return 0;
++
++
++again:
++	hlist_for_each_entry(node_vma, hlist, &stable_node->hlist, hlist) {
++		hlist_for_each_entry(rmap_item, rmap_hlist,
++				     &node_vma->rmap_hlist, hlist) {
++			struct anon_vma *anon_vma = rmap_item->anon_vma;
++			struct anon_vma_chain *vmac;
++			struct vm_area_struct *vma;
++
++			anon_vma_lock(anon_vma);
++			list_for_each_entry(vmac, &anon_vma->head,
++					    same_anon_vma) {
++				vma = vmac->vma;
++				address = get_rmap_addr(rmap_item);
++
++				if (address < vma->vm_start ||
++				    address >= vma->vm_end)
++					continue;
++				/*
++				 * Initially we examine only the vma which
++				 * covers this rmap_item; but later, if there
++				 * is still work to do, we examine covering
++				 * vmas in other mms: in case they were forked
++				 * from the original since ksmd passed.
++				 */
++				if ((rmap_item->slot->vma == vma) ==
++				    search_new_forks)
++					continue;
++
++				if (memcg &&
++				    !mm_match_cgroup(vma->vm_mm, memcg))
++					continue;
++
++				referenced +=
++					page_referenced_one(page, vma,
++						address, &mapcount, vm_flags);
++				if (!search_new_forks || !mapcount)
++					break;
++			}
++
++			anon_vma_unlock(anon_vma);
++			if (!mapcount)
++				goto out;
++		}
++	}
++	if (!search_new_forks++)
++		goto again;
++out:
++	return referenced;
++}
++
++int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
++{
++	struct stable_node *stable_node;
++	struct node_vma *node_vma;
++	struct hlist_node *hlist, *rmap_hlist;
++	struct rmap_item *rmap_item;
++	int ret = SWAP_AGAIN;
++	int search_new_forks = 0;
++	unsigned long address;
++
++	VM_BUG_ON(!PageKsm(page));
++	VM_BUG_ON(!PageLocked(page));
++
++	stable_node = page_stable_node(page);
++	if (!stable_node)
++		return SWAP_FAIL;
++again:
++	hlist_for_each_entry(node_vma, hlist, &stable_node->hlist, hlist) {
++		hlist_for_each_entry(rmap_item, rmap_hlist,
++				     &node_vma->rmap_hlist, hlist) {
++			struct anon_vma *anon_vma = rmap_item->anon_vma;
++			struct anon_vma_chain *vmac;
++			struct vm_area_struct *vma;
++
++			anon_vma_lock(anon_vma);
++			list_for_each_entry(vmac, &anon_vma->head,
++					    same_anon_vma) {
++				vma = vmac->vma;
++				address = get_rmap_addr(rmap_item);
++
++				if (address < vma->vm_start ||
++				    address >= vma->vm_end)
++					continue;
++				/*
++				 * Initially we examine only the vma which
++				 * covers this rmap_item; but later, if there
++				 * is still work to do, we examine covering
++				 * vmas in other mms: in case they were forked
++				 * from the original since ksmd passed.
++				 */
++				if ((rmap_item->slot->vma == vma) ==
++				    search_new_forks)
++					continue;
++
++				ret = try_to_unmap_one(page, vma,
++						       address, flags);
++				if (ret != SWAP_AGAIN || !page_mapped(page)) {
++					anon_vma_unlock(anon_vma);
++					goto out;
++				}
++			}
++			anon_vma_unlock(anon_vma);
++		}
++	}
++	if (!search_new_forks++)
++		goto again;
++out:
++	return ret;
++}
++
++#ifdef CONFIG_MIGRATION
++int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
++		  struct vm_area_struct *, unsigned long, void *), void *arg)
++{
++	struct stable_node *stable_node;
++	struct node_vma *node_vma;
++	struct hlist_node *hlist, *rmap_hlist;
++	struct rmap_item *rmap_item;
++	int ret = SWAP_AGAIN;
++	int search_new_forks = 0;
++	unsigned long address;
++
++	VM_BUG_ON(!PageKsm(page));
++	VM_BUG_ON(!PageLocked(page));
++
++	stable_node = page_stable_node(page);
++	if (!stable_node)
++		return ret;
++again:
++	hlist_for_each_entry(node_vma, hlist, &stable_node->hlist, hlist) {
++		hlist_for_each_entry(rmap_item, rmap_hlist,
++				     &node_vma->rmap_hlist, hlist) {
++			struct anon_vma *anon_vma = rmap_item->anon_vma;
++			struct anon_vma_chain *vmac;
++			struct vm_area_struct *vma;
++
++			anon_vma_lock(anon_vma);
++			list_for_each_entry(vmac, &anon_vma->head,
++					    same_anon_vma) {
++				vma = vmac->vma;
++				address = get_rmap_addr(rmap_item);
++
++				if (address < vma->vm_start ||
++				    address >= vma->vm_end)
++					continue;
++
++				if ((rmap_item->slot->vma == vma) ==
++				    search_new_forks)
++					continue;
++
++				ret = rmap_one(page, vma, address, arg);
++				if (ret != SWAP_AGAIN) {
++					anon_vma_unlock(anon_vma);
++					goto out;
++				}
++			}
++			anon_vma_unlock(anon_vma);
++		}
++	}
++	if (!search_new_forks++)
++		goto again;
++out:
++	return ret;
++}
++
++/* Common ksm interface but may be specific to uksm */
++void ksm_migrate_page(struct page *newpage, struct page *oldpage)
++{
++	struct stable_node *stable_node;
++
++	VM_BUG_ON(!PageLocked(oldpage));
++	VM_BUG_ON(!PageLocked(newpage));
++	VM_BUG_ON(newpage->mapping != oldpage->mapping);
++
++	stable_node = page_stable_node(newpage);
++	if (stable_node) {
++		VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
++		stable_node->kpfn = page_to_pfn(newpage);
++	}
++}
++#endif /* CONFIG_MIGRATION */
++
++#ifdef CONFIG_MEMORY_HOTREMOVE
++static struct stable_node *uksm_check_stable_tree(unsigned long start_pfn,
++						 unsigned long end_pfn)
++{
++	struct rb_node *node;
++
++	for (node = rb_first(root_stable_treep); node; node = rb_next(node)) {
++		struct stable_node *stable_node;
++
++		stable_node = rb_entry(node, struct stable_node, node);
++		if (stable_node->kpfn >= start_pfn &&
++		    stable_node->kpfn < end_pfn)
++			return stable_node;
++	}
++	return NULL;
++}
++
++static int uksm_memory_callback(struct notifier_block *self,
++			       unsigned long action, void *arg)
++{
++	struct memory_notify *mn = arg;
++	struct stable_node *stable_node;
++
++	switch (action) {
++	case MEM_GOING_OFFLINE:
++		/*
++		 * Keep it very simple for now: just lock out ksmd and
++		 * MADV_UNMERGEABLE while any memory is going offline.
++		 * mutex_lock_nested() is necessary because lockdep was alarmed
++		 * that here we take uksm_thread_mutex inside notifier chain
++		 * mutex, and later take notifier chain mutex inside
++		 * uksm_thread_mutex to unlock it.   But that's safe because both
++		 * are inside mem_hotplug_mutex.
++		 */
++		mutex_lock_nested(&uksm_thread_mutex, SINGLE_DEPTH_NESTING);
++		break;
++
++	case MEM_OFFLINE:
++		/*
++		 * Most of the work is done by page migration; but there might
++		 * be a few stable_nodes left over, still pointing to struct
++		 * pages which have been offlined: prune those from the tree.
++		 */
++		while ((stable_node = uksm_check_stable_tree(mn->start_pfn,
++					mn->start_pfn + mn->nr_pages)) != NULL)
++			remove_node_from_stable_tree(stable_node, 1, 1);
++		/* fallthrough */
++
++	case MEM_CANCEL_OFFLINE:
++		mutex_unlock(&uksm_thread_mutex);
++		break;
++	}
++	return NOTIFY_OK;
++}
++#endif /* CONFIG_MEMORY_HOTREMOVE */
++
++#ifdef CONFIG_SYSFS
++/*
++ * This all compiles without CONFIG_SYSFS, but is a waste of space.
++ */
++
++#define UKSM_ATTR_RO(_name) \
++	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
++#define UKSM_ATTR(_name) \
++	static struct kobj_attribute _name##_attr = \
++		__ATTR(_name, 0644, _name##_show, _name##_store)
++
++static ssize_t max_cpu_percentage_show(struct kobject *kobj,
++				    struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", uksm_max_cpu_percentage);
++}
++
++static ssize_t max_cpu_percentage_store(struct kobject *kobj,
++				     struct kobj_attribute *attr,
++				     const char *buf, size_t count)
++{
++	unsigned long max_cpu_percentage;
++	int err;
++
++	err = strict_strtoul(buf, 10, &max_cpu_percentage);
++	if (err || max_cpu_percentage > 100)
++		return -EINVAL;
++
++	if (max_cpu_percentage == 100)
++		max_cpu_percentage = 99;
++	else if (max_cpu_percentage < 10)
++		max_cpu_percentage = 10;
++
++	uksm_max_cpu_percentage = max_cpu_percentage;
++
++	return count;
++}
++UKSM_ATTR(max_cpu_percentage);
++
++static ssize_t sleep_millisecs_show(struct kobject *kobj,
++				    struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", jiffies_to_msecs(uksm_sleep_jiffies));
++}
++
++static ssize_t sleep_millisecs_store(struct kobject *kobj,
++				     struct kobj_attribute *attr,
++				     const char *buf, size_t count)
++{
++	unsigned long msecs;
++	int err;
++
++	err = strict_strtoul(buf, 10, &msecs);
++	if (err || msecs > MSEC_PER_SEC)
++		return -EINVAL;
++
++	uksm_sleep_jiffies = msecs_to_jiffies(msecs);
++	uksm_sleep_saved = uksm_sleep_jiffies;
++
++	return count;
++}
++UKSM_ATTR(sleep_millisecs);
++
++
++static ssize_t cpu_governor_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	int n = sizeof(uksm_cpu_governor_str) / sizeof(char *);
++	int i;
++
++	buf[0] = '\0';
++	for (i = 0; i < n ; i++) {
++		if (uksm_cpu_governor == i)
++			strcat(buf, "[");
++
++		strcat(buf, uksm_cpu_governor_str[i]);
++
++		if (uksm_cpu_governor == i)
++			strcat(buf, "]");
++
++		strcat(buf, " ");
++	}
++	strcat(buf, "\n");
++
++	return strlen(buf);
++}
++
++static inline void init_performance_values(void)
++{
++	int i;
++	struct scan_rung *rung;
++	struct uksm_cpu_preset_s *preset = uksm_cpu_preset + uksm_cpu_governor;
++
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = uksm_scan_ladder + i;
++		rung->cpu_ratio = preset->cpu_ratio[i];
++		rung->cover_msecs = preset->cover_msecs[i];
++	}
++
++	uksm_max_cpu_percentage = preset->max_cpu;
++}
++
++static ssize_t cpu_governor_store(struct kobject *kobj,
++				   struct kobj_attribute *attr,
++				   const char *buf, size_t count)
++{
++	int n = sizeof(uksm_cpu_governor_str) / sizeof(char *);
++
++	for (n--; n >=0 ; n--) {
++		if (!strncmp(buf, uksm_cpu_governor_str[n],
++			     strlen(uksm_cpu_governor_str[n])))
++			break;
++	}
++
++	if (n < 0)
++		return -EINVAL;
++	else
++		uksm_cpu_governor = n;
++
++	init_performance_values();
++
++	return count;
++}
++UKSM_ATTR(cpu_governor);
++
++static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
++			char *buf)
++{
++	return sprintf(buf, "%u\n", uksm_run);
++}
++
++static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
++			 const char *buf, size_t count)
++{
++	int err;
++	unsigned long flags;
++
++	err = strict_strtoul(buf, 10, &flags);
++	if (err || flags > UINT_MAX)
++		return -EINVAL;
++	if (flags > UKSM_RUN_MERGE)
++		return -EINVAL;
++
++	mutex_lock(&uksm_thread_mutex);
++	if (uksm_run != flags) {
++		uksm_run = flags;
++	}
++	mutex_unlock(&uksm_thread_mutex);
++
++	if (flags & UKSM_RUN_MERGE)
++		wake_up_interruptible(&uksm_thread_wait);
++
++	return count;
++}
++UKSM_ATTR(run);
++
++static ssize_t abundant_threshold_show(struct kobject *kobj,
++				     struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", uksm_abundant_threshold);
++}
++
++static ssize_t abundant_threshold_store(struct kobject *kobj,
++				      struct kobj_attribute *attr,
++				      const char *buf, size_t count)
++{
++	int err;
++	unsigned long flags;
++
++	err = strict_strtoul(buf, 10, &flags);
++	if (err || flags > 99)
++		return -EINVAL;
++
++	uksm_abundant_threshold = flags;
++
++	return count;
++}
++UKSM_ATTR(abundant_threshold);
++
++static ssize_t thrash_threshold_show(struct kobject *kobj,
++				     struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%u\n", uksm_thrash_threshold);
++}
++
++static ssize_t thrash_threshold_store(struct kobject *kobj,
++				      struct kobj_attribute *attr,
++				      const char *buf, size_t count)
++{
++	int err;
++	unsigned long flags;
++
++	err = strict_strtoul(buf, 10, &flags);
++	if (err || flags > 99)
++		return -EINVAL;
++
++	uksm_thrash_threshold = flags;
++
++	return count;
++}
++UKSM_ATTR(thrash_threshold);
++
++static ssize_t cpu_ratios_show(struct kobject *kobj,
++			       struct kobj_attribute *attr, char *buf)
++{
++	int i, size;
++	struct scan_rung *rung;
++	char *p = buf;
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = &uksm_scan_ladder[i];
++
++		if (rung->cpu_ratio > 0)
++			size = sprintf(p, "%d ", rung->cpu_ratio);
++		else
++			size = sprintf(p, "MAX/%d ",
++					TIME_RATIO_SCALE / -rung->cpu_ratio);
++
++		p += size;
++	}
++
++	*p++ = '\n';
++	*p = '\0';
++
++	return p - buf;
++}
++
++static ssize_t cpu_ratios_store(struct kobject *kobj,
++				      struct kobj_attribute *attr,
++				      const char *buf, size_t count)
++{
++	int i, cpuratios[SCAN_LADDER_SIZE], err;
++	unsigned long value;
++	struct scan_rung *rung;
++	char *p, *end = NULL;
++
++	p = kzalloc(count, GFP_KERNEL);
++	if (!p)
++		return -ENOMEM;
++
++	memcpy(p, buf, count);
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		if (i != SCAN_LADDER_SIZE -1) {
++			end = strchr(p, ' ');
++			if (!end)
++				return -EINVAL;
++
++			*end = '\0';
++		}
++
++		if (strstr(p, "MAX/")) {
++			p = strchr(p, '/') + 1;
++			err = strict_strtoul(p, 10, &value);
++			if (err || value > TIME_RATIO_SCALE || !value)
++				return -EINVAL;
++
++			cpuratios[i] = - (int) (TIME_RATIO_SCALE / value);
++		} else {
++			err = strict_strtoul(p, 10, &value);
++			if (err || value > TIME_RATIO_SCALE || !value)
++				return -EINVAL;
++
++			cpuratios[i] = value;
++		}
++
++		p = end + 1;
++	}
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = &uksm_scan_ladder[i];
++
++		rung->cpu_ratio = cpuratios[i];
++	}
++
++	return count;
++}
++UKSM_ATTR(cpu_ratios);
++
++static ssize_t eval_intervals_show(struct kobject *kobj,
++			       struct kobj_attribute *attr, char *buf)
++{
++	int i, size;
++	struct scan_rung *rung;
++	char *p = buf;
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = &uksm_scan_ladder[i];
++		size = sprintf(p, "%u ", rung->cover_msecs);
++		p += size;
++	}
++
++	*p++ = '\n';
++	*p = '\0';
++
++	return p - buf;
++}
++
++static ssize_t eval_intervals_store(struct kobject *kobj,
++				      struct kobj_attribute *attr,
++				      const char *buf, size_t count)
++{
++	int i, err;
++	unsigned long values[SCAN_LADDER_SIZE];
++	struct scan_rung *rung;
++	char *p, *end = NULL;
++
++	p = kzalloc(count, GFP_KERNEL);
++	if (!p)
++		return -ENOMEM;
++
++	memcpy(p, buf, count);
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		if (i != SCAN_LADDER_SIZE -1) {
++			end = strchr(p, ' ');
++			if (!end)
++				return -EINVAL;
++
++			*end = '\0';
++		}
++
++		err = strict_strtoul(p, 10, &values[i]);
++		if (err)
++			return -EINVAL;
++
++		p = end + 1;
++	}
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = &uksm_scan_ladder[i];
++
++		rung->cover_msecs = values[i];
++	}
++
++	return count;
++}
++UKSM_ATTR(eval_intervals);
++
++static ssize_t ema_per_page_time_show(struct kobject *kobj,
++				 struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", uksm_ema_page_time);
++}
++UKSM_ATTR_RO(ema_per_page_time);
++
++static ssize_t pages_shared_show(struct kobject *kobj,
++				 struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", uksm_pages_shared);
++}
++UKSM_ATTR_RO(pages_shared);
++
++static ssize_t pages_sharing_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", uksm_pages_sharing);
++}
++UKSM_ATTR_RO(pages_sharing);
++
++static ssize_t pages_unshared_show(struct kobject *kobj,
++				   struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", uksm_pages_unshared);
++}
++UKSM_ATTR_RO(pages_unshared);
++
++static ssize_t full_scans_show(struct kobject *kobj,
++			       struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%llu\n", fully_scanned_round);
++}
++UKSM_ATTR_RO(full_scans);
++
++static ssize_t pages_scanned_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	unsigned long base = 0;
++	u64 delta, ret;
++
++	if (pages_scanned_stored) {
++		base = pages_scanned_base;
++		ret = pages_scanned_stored;
++		delta = uksm_pages_scanned >> base;
++		if (CAN_OVERFLOW_U64(ret, delta)) {
++			ret >>= 1;
++			delta >>= 1;
++			base++;
++			ret += delta;
++		}
++	} else {
++		ret = uksm_pages_scanned;
++	}
++
++	while (ret > ULONG_MAX) {
++		ret >>= 1;
++		base++;
++	}
++
++	if (base)
++		return sprintf(buf, "%lu * 2^%lu\n", (unsigned long)ret, base);
++	else
++		return sprintf(buf, "%lu\n", (unsigned long)ret);
++}
++UKSM_ATTR_RO(pages_scanned);
++
++static ssize_t hash_strength_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", hash_strength);
++}
++UKSM_ATTR_RO(hash_strength);
++
++static ssize_t sleep_times_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%llu\n", uksm_sleep_times);
++}
++UKSM_ATTR_RO(sleep_times);
++
++
++static struct attribute *uksm_attrs[] = {
++	&max_cpu_percentage_attr.attr,
++	&sleep_millisecs_attr.attr,
++	&cpu_governor_attr.attr,
++	&run_attr.attr,
++	&ema_per_page_time_attr.attr,
++	&pages_shared_attr.attr,
++	&pages_sharing_attr.attr,
++	&pages_unshared_attr.attr,
++	&full_scans_attr.attr,
++	&pages_scanned_attr.attr,
++	&hash_strength_attr.attr,
++	&sleep_times_attr.attr,
++	&thrash_threshold_attr.attr,
++	&abundant_threshold_attr.attr,
++	&cpu_ratios_attr.attr,
++	&eval_intervals_attr.attr,
++	NULL,
++};
++
++static struct attribute_group uksm_attr_group = {
++	.attrs = uksm_attrs,
++	.name = "uksm",
++};
++#endif /* CONFIG_SYSFS */
++
++static inline void init_scan_ladder(void)
++{
++	int i;
++	struct scan_rung *rung;
++
++	for (i = 0; i < SCAN_LADDER_SIZE; i++) {
++		rung = uksm_scan_ladder + i;
++		slot_tree_init_root(&rung->vma_root);
++	}
++
++	init_performance_values();
++	uksm_calc_scan_pages();
++}
++
++static inline int cal_positive_negative_costs(void)
++{
++	struct page *p1, *p2;
++	unsigned char *addr1, *addr2;
++	unsigned long i, time_start, hash_cost;
++	unsigned long loopnum = 0;
++
++	/*IMPORTANT: volatile is needed to prevent over-optimization by gcc. */
++	volatile u32 hash;
++	volatile int ret;
++
++	p1 = alloc_page(GFP_KERNEL);
++	if (!p1)
++		return -ENOMEM;
++
++	p2 = alloc_page(GFP_KERNEL);
++	if (!p2)
++		return -ENOMEM;
++
++	addr1 = kmap_atomic(p1, KM_USER0);
++	addr2 = kmap_atomic(p2, KM_USER1);
++	memset(addr1, random32(), PAGE_SIZE);
++	memcpy(addr2, addr1, PAGE_SIZE);
++
++	/* make sure that the two pages differ in last byte */
++	addr2[PAGE_SIZE-1] = ~addr2[PAGE_SIZE-1];
++	kunmap_atomic(addr2, KM_USER1);
++	kunmap_atomic(addr1, KM_USER0);
++
++	time_start = jiffies;
++	while (jiffies - time_start < 100) {
++		for (i = 0; i < 100; i++)
++			hash = page_hash(p1, HASH_STRENGTH_FULL, 0);
++		loopnum += 100;
++	}
++	hash_cost = (jiffies - time_start);
++
++	time_start = jiffies;
++	for (i = 0; i < loopnum; i++)
++		ret = pages_identical(p1, p2);
++	memcmp_cost = HASH_STRENGTH_FULL * (jiffies - time_start);
++	memcmp_cost /= hash_cost;
++	printk(KERN_INFO "UKSM: relative memcmp_cost = %lu "
++			 "hash=%u cmp_ret=%d.\n",
++	       memcmp_cost, hash, ret);
++
++	__free_page(p1);
++	__free_page(p2);
++	return 0;
++}
++
++static int init_zeropage_hash_table(void)
++{
++	struct page *page;
++	char *addr;
++	int i;
++
++	page = alloc_page(GFP_KERNEL);
++	if (!page)
++		return -ENOMEM;
++
++	addr = kmap_atomic(page, KM_USER0);
++	memset(addr, 0, PAGE_SIZE);
++	kunmap_atomic(addr, KM_USER0);
++
++	zero_hash_table = kmalloc(HASH_STRENGTH_MAX * sizeof(u32),
++		GFP_KERNEL);
++	if (!zero_hash_table)
++		return -ENOMEM;
++
++	for (i = 0; i < HASH_STRENGTH_MAX; i++)
++		zero_hash_table[i] = page_hash(page, i, 0);
++
++	__free_page(page);
++
++	return 0;
++}
++
++static inline int init_random_sampling(void)
++{
++	unsigned long i;
++	random_nums = kmalloc(PAGE_SIZE, GFP_KERNEL);
++	if (!random_nums)
++		return -ENOMEM;
++
++	for (i = 0; i < HASH_STRENGTH_FULL; i++)
++		random_nums[i] = i;
++
++	for (i = 0; i < HASH_STRENGTH_FULL; i++) {
++		unsigned long rand_range, swap_index, tmp;
++
++		rand_range = HASH_STRENGTH_FULL - i;
++		swap_index = i + random32() % rand_range;
++		tmp = random_nums[i];
++		random_nums[i] =  random_nums[swap_index];
++		random_nums[swap_index] = tmp;
++	}
++
++	rshash_state.state = RSHASH_NEW;
++	rshash_state.below_count = 0;
++	rshash_state.lookup_window_index = 0;
++
++	return cal_positive_negative_costs();
++}
++
++static int __init uksm_slab_init(void)
++{
++	rmap_item_cache = UKSM_KMEM_CACHE(rmap_item, 0);
++	if (!rmap_item_cache)
++		goto out;
++
++	stable_node_cache = UKSM_KMEM_CACHE(stable_node, 0);
++	if (!stable_node_cache)
++		goto out_free1;
++
++	node_vma_cache = UKSM_KMEM_CACHE(node_vma, 0);
++	if (!node_vma_cache)
++		goto out_free2;
++
++	vma_slot_cache = UKSM_KMEM_CACHE(vma_slot, 0);
++	if (!vma_slot_cache)
++		goto out_free3;
++
++	tree_node_cache = UKSM_KMEM_CACHE(tree_node, 0);
++	if (!tree_node_cache)
++		goto out_free4;
++
++	return 0;
++
++out_free4:
++	kmem_cache_destroy(vma_slot_cache);
++out_free3:
++	kmem_cache_destroy(node_vma_cache);
++out_free2:
++	kmem_cache_destroy(stable_node_cache);
++out_free1:
++	kmem_cache_destroy(rmap_item_cache);
++out:
++	return -ENOMEM;
++}
++
++static void __init uksm_slab_free(void)
++{
++	kmem_cache_destroy(stable_node_cache);
++	kmem_cache_destroy(rmap_item_cache);
++	kmem_cache_destroy(node_vma_cache);
++	kmem_cache_destroy(vma_slot_cache);
++	kmem_cache_destroy(tree_node_cache);
++}
++
++/* Common interface to ksm, different to it. */
++int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
++		unsigned long end, int advice, unsigned long *vm_flags)
++{
++	int err;
++
++	switch (advice) {
++	case MADV_MERGEABLE:
++		return 0;		/* just ignore the advice */
++
++	case MADV_UNMERGEABLE:
++		if (!(*vm_flags & VM_MERGEABLE))
++			return 0;		/* just ignore the advice */
++
++		if (vma->anon_vma) {
++			err = unmerge_uksm_pages(vma, start, end);
++			if (err)
++				return err;
++		}
++
++		uksm_remove_vma(vma);
++		*vm_flags &= ~VM_MERGEABLE;
++		break;
++	}
++
++	return 0;
++}
++
++/* Common interface to ksm, actually the same. */
++struct page *ksm_does_need_to_copy(struct page *page,
++			struct vm_area_struct *vma, unsigned long address)
++{
++	struct page *new_page;
++
++	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
++	if (new_page) {
++		copy_user_highpage(new_page, page, address, vma);
++
++		SetPageDirty(new_page);
++		__SetPageUptodate(new_page);
++		SetPageSwapBacked(new_page);
++		__set_page_locked(new_page);
++
++		if (page_evictable(new_page, vma))
++			lru_cache_add_lru(new_page, LRU_ACTIVE_ANON);
++		else
++			add_page_to_unevictable_list(new_page);
++	}
++
++	return new_page;
++}
++
++static int __init uksm_init(void)
++{
++	struct task_struct *uksm_thread;
++	int err;
++
++	uksm_sleep_jiffies = msecs_to_jiffies(100);
++	uksm_sleep_saved = uksm_sleep_jiffies;
++
++	slot_tree_init();
++	init_scan_ladder();
++
++
++	err = init_random_sampling();
++	if (err)
++		goto out_free2;
++
++	err = uksm_slab_init();
++	if (err)
++		goto out_free1;
++
++	err = init_zeropage_hash_table();
++	if (err)
++		goto out_free0;
++
++	uksm_thread = kthread_run(uksm_scan_thread, NULL, "uksmd");
++	if (IS_ERR(uksm_thread)) {
++		printk(KERN_ERR "uksm: creating kthread failed\n");
++		err = PTR_ERR(uksm_thread);
++		goto out_free;
++	}
++
++#ifdef CONFIG_SYSFS
++	err = sysfs_create_group(mm_kobj, &uksm_attr_group);
++	if (err) {
++		printk(KERN_ERR "uksm: register sysfs failed\n");
++		kthread_stop(uksm_thread);
++		goto out_free;
++	}
++#else
++	uksm_run = UKSM_RUN_MERGE;	/* no way for user to start it */
++
++#endif /* CONFIG_SYSFS */
++
++#ifdef CONFIG_MEMORY_HOTREMOVE
++	/*
++	 * Choose a high priority since the callback takes uksm_thread_mutex:
++	 * later callbacks could only be taking locks which nest within that.
++	 */
++	hotplug_memory_notifier(uksm_memory_callback, 100);
++#endif
++	return 0;
++
++out_free:
++	kfree(zero_hash_table);
++out_free0:
++	uksm_slab_free();
++out_free1:
++	kfree(random_nums);
++out_free2:
++	kfree(uksm_scan_ladder);
++	return err;
++}
++
++#ifdef MODULE
++module_init(uksm_init)
++#else
++late_initcall(uksm_init);
++#endif
++
+diff --git a/mm/vmstat.c b/mm/vmstat.c
+index f600557..1e124c3 100644
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -719,6 +719,9 @@ const char * const vmstat_text[] = {
+ 	"numa_other",
+ #endif
+ 	"nr_anon_transparent_hugepages",
++#ifdef CONFIG_UKSM
++	"nr_uksm_zero_pages",
++#endif
+ 	"nr_dirty_threshold",
+ 	"nr_dirty_background_threshold",
+ 
diff --git a/3.3.8/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-VL.patch b/3.3.8/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-VL.patch
new file mode 100644
index 0000000..a26d2b1
--- /dev/null
+++ b/3.3.8/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-VL.patch
@@ -0,0 +1,381 @@
+diff -uNr linux-3.2.33-go.orig/arch/arm/mach-kirkwood/Kconfig linux-3.2.33-go/arch/arm/mach-kirkwood/Kconfig
+--- linux-3.2.33-go.orig/arch/arm/mach-kirkwood/Kconfig	2012-11-14 21:20:22.326388580 +0100
++++ linux-3.2.33-go/arch/arm/mach-kirkwood/Kconfig	2012-11-14 21:21:02.353908681 +0100
+@@ -136,6 +136,12 @@
+ 	  Say 'Y' here if you want your kernel to support the
+ 	  Buffalo LS-XHL Series.
+ 
++config MACH_LSVL
++	bool "Buffalo LS-VL Series"
++	help
++	 Say 'Y' here if you want your kernel to support the
++	 Buffalo LS-VL Series.
++
+ endmenu
+ 
+ endif
+diff -uNr linux-3.2.33-go.orig/arch/arm/mach-kirkwood/lsvl-setup.c linux-3.2.33-go/arch/arm/mach-kirkwood/lsvl-setup.c
+--- linux-3.2.33-go.orig/arch/arm/mach-kirkwood/lsvl-setup.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.33-go/arch/arm/mach-kirkwood/lsvl-setup.c	2012-11-14 21:22:54.158568343 +0100
+@@ -0,0 +1,340 @@
++/*
++ * arch/arm/mach-kirkwood/lsvl-setup.c
++ *
++ * Buffalo LS-VL Series Setup
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2. This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/platform_device.h>
++#include <linux/mtd/physmap.h>
++#include <linux/ata_platform.h>
++#include <linux/mtd/partitions.h>
++#include <linux/spi/flash.h>
++#include <linux/spi/spi.h>
++#include <linux/mv643xx_eth.h>
++#include <linux/gpio.h>
++#include <linux/gpio_keys.h>
++#include <linux/gpio-fan.h>
++#include <linux/input.h>
++#include <linux/leds.h>
++#include <asm/mach-types.h>
++#include <asm/mach/arch.h>
++#include <mach/kirkwood.h>
++#include <plat/mvsdio.h>
++#include "common.h"
++#include "mpp.h"
++
++/*****************************************************************************
++ * 512KB SPI Flash on BOOT Device
++ ****************************************************************************/
++static struct mtd_partition lsvl_partitions[] = {
++	{
++	 .name	= "u-boot",
++	 .size	= 0x80000,
++	 .offset	= 0x00000,
++	 .mask_flags = MTD_WRITEABLE, /* force read-only */
++	}
++};
++
++static struct flash_platform_data lsvl_spi_slave_data = {
++	.type	 = "m25p40-nonjedec",
++	.parts	 = lsvl_partitions,
++	.nr_parts	= ARRAY_SIZE(lsvl_partitions),
++};
++
++static struct spi_board_info __initdata lsvl_spi_slave_info[] = {
++	{
++	 .modalias	= "m25p80",
++	 .platform_data	= &lsvl_spi_slave_data,
++	 .irq	 = -1,
++	 .max_speed_hz	= 20000000,
++	 .bus_num	= 0,
++	 .chip_select	= 0,
++	}
++};
++
++/*****************************************************************************
++ * Ethernet
++ ****************************************************************************/
++static struct mv643xx_eth_platform_data lsvl_ge00_data = {
++	.phy_addr	= MV643XX_ETH_PHY_ADDR(0),
++};
++
++/*****************************************************************************
++ * SATA
++ ****************************************************************************/
++static struct mv_sata_platform_data lsvl_sata_data = {
++	.n_ports	= 1,
++};
++
++/*****************************************************************************
++ * LEDs attached to GPIO
++ ****************************************************************************/
++#define LSVL_GPIO_LED_ALARM	 36
++#define LSVL_GPIO_LED_FUNC_RED 37
++#define LSVL_GPIO_LED_INFO	 38
++#define LSVL_GPIO_LED_FUNC_BLUE 39
++#define LSVL_GPIO_LED_PWR	 40
++
++static struct gpio_led lsvl_led_pins[] = {
++	{
++	 .name	 = "alarm:red",
++	 .gpio	 = LSVL_GPIO_LED_ALARM,
++	},
++ {
++ .name = "func:red:bottom",
++ .gpio = LSVL_GPIO_LED_FUNC_RED,
++ },
++	{
++	 .name	 = "info:amber",
++	 .gpio	 = LSVL_GPIO_LED_INFO,
++	},
++ {
++ .name = "func:blue:bottom",
++ .gpio = LSVL_GPIO_LED_FUNC_BLUE,
++ },
++
++	{
++	 .name	 = "power:blue",
++	 .default_trigger	= "default-on",
++	 .gpio	 = LSVL_GPIO_LED_PWR,
++	 .active_low	 = 1,
++	},
++};
++
++static struct gpio_led_platform_data lsvl_led_data = {
++	.leds	 = lsvl_led_pins,
++	.num_leds	= ARRAY_SIZE(lsvl_led_pins),
++};
++
++static struct platform_device lsvl_leds = {
++	.name	= "leds-gpio",
++	.id	= -1,
++	.dev	= {
++	 .platform_data	= &lsvl_led_data,
++	}
++};
++
++/*****************************************************************************
++ * General Setup
++ ****************************************************************************/
++#define LSVL_GPIO_HDD_POWER 8
++#define LSVL_GPIO_USB_POWER 12
++
++/*****************************************************************************
++ * GPIO Attached Keys
++ ****************************************************************************/
++/*#define LSVL_GPIO_KEY_FUNC 45
++#define LSVL_GPIO_KEY_POWER	46
++#define LSVL_GPIO_KEY_AUTOPOWER 47
++#define LSVL_SW_POWER	 0x00
++#define LSVL_SW_AUTOPOWER	0x01
++#define LSVL_SW_FUNC	 0x02
++
++static struct gpio_keys_button lsvl_buttons[] = {
++	{
++	 .type = EV_SW,
++	 .code = LSVL_SW_POWER,
++	 .gpio = LSVL_GPIO_KEY_POWER,
++	 .desc = "Power-on Switch",
++	 .active_low = 1,
++	}, {
++	 .type = EV_SW,
++	 .code = LSVL_SW_AUTOPOWER,
++	 .gpio = LSVL_GPIO_KEY_AUTOPOWER,
++	 .desc = "Power-auto Switch",
++	 .active_low = 1,
++	}, {
++	 .type = EV_SW,
++	 .code = LSVL_SW_FUNC,
++	 .gpio = LSVL_GPIO_KEY_FUNC,
++	 .desc = "Function Button",
++	 .active_low = 1,
++	},
++};
++
++static struct gpio_keys_platform_data lsvl_button_data = {
++	.buttons = lsvl_buttons,
++	.nbuttons = ARRAY_SIZE(lsvl_buttons),
++};
++
++static struct platform_device lsvl_button_device = {
++	.name = "gpio-keys",
++	.id = -1,
++	.num_resources = 0,
++	.dev = {
++	 .platform_data = &lsvl_button_data,
++	},
++};
++*/
++
++/*****************************************************************************
++ * GPIO Fan
++ ****************************************************************************/
++#define LSVL_GPIO_FAN_HIGH	16
++#define LSVL_GPIO_FAN_LOW	17
++#define LSVL_GPIO_FAN_LOCK	43
++
++static struct gpio_fan_alarm lsvl_alarm = {
++	.gpio = LSVL_GPIO_FAN_LOCK,
++};
++
++static struct gpio_fan_speed lsvl_speeds[] = {
++	{
++	 .rpm = 0,
++	 .ctrl_val = 3,
++	}, {
++	 .rpm = 1500,
++	 .ctrl_val = 1,
++	}, {
++	 .rpm = 3250,
++	 .ctrl_val = 2,
++	}, {
++	 .rpm = 5000,
++	 .ctrl_val = 0,
++	}
++};
++
++static int lsvl_gpio_list[] = {
++	LSVL_GPIO_FAN_HIGH, LSVL_GPIO_FAN_LOW,
++};
++
++static struct gpio_fan_platform_data lsvl_fan_data = {
++	.num_ctrl = ARRAY_SIZE(lsvl_gpio_list),
++	.ctrl = lsvl_gpio_list,
++	.alarm = &lsvl_alarm,
++	.num_speed = ARRAY_SIZE(lsvl_speeds),
++	.speed = lsvl_speeds,
++};
++
++static struct platform_device lsvl_fan_device = {
++	.name = "gpio-fan",
++	.id = -1,
++	.num_resources = 0,
++	.dev = {
++	 .platform_data = &lsvl_fan_data,
++	},
++};
++
++/*****************************************************************************
++ * GPIO Data
++ ****************************************************************************/
++
++static unsigned int lsvl_mpp_config[] __initdata = {
++	MPP0_NF_IO2,
++ MPP1_NF_IO3,
++ MPP2_NF_IO4,
++ MPP3_NF_IO5,
++ MPP4_NF_IO6,
++ MPP5_NF_IO7,
++ MPP6_SYSRST_OUTn,
++ MPP7_SPI_SCn,
++ MPP8_GPIO, /* HDD Power */
++ MPP9_GPIO,
++ MPP10_UART0_TXD,
++ MPP11_UART0_RXD,
++ MPP12_GPO, /* USB VBUS EN */
++ MPP13_GPIO,
++ MPP14_GPIO,
++ MPP15_GPIO,
++ MPP16_GPIO, /* FAN HIGH: on:0, off:1 */
++ MPP17_GPIO, /* FAN LOW: on:0, off:1 */
++ MPP18_NF_IO0,
++ MPP19_NF_IO1,
++ MPP20_GPIO,
++ MPP21_GPIO,
++ MPP22_GPIO,
++ MPP23_GPIO,
++ MPP24_GPIO,
++ MPP25_GPIO,
++ MPP26_GPIO,
++ MPP27_GPIO,
++ MPP28_GPIO,
++ MPP29_GPIO,
++ MPP30_GPIO,
++ MPP31_GPIO,
++ MPP32_GPIO,
++ MPP33_GPO,
++ MPP34_GPIO,
++ MPP35_GPIO,
++ MPP36_GPIO, /* ALARM LED */ 
++ MPP37_GPIO, /* FUNC RED LED */
++ MPP38_GPIO, /* INFO LED */
++ MPP39_GPIO, /* FUNC LED */
++ MPP40_GPIO, /* POWER LED */
++ MPP41_GPIO,
++ MPP42_GPIO,
++ MPP43_GPIO, /* FAN LOCK */
++ MPP44_GPIO,
++ MPP45_GPIO, /* FUNC SW */
++ MPP46_GPIO, /* POWER SW */
++ MPP47_GPIO, /* POWER AUTO SW */
++ MPP48_GPIO, /* UART EN */
++ MPP49_GPIO,
++	0
++};
++
++/*****************************************************************************
++ * LS-VL specific power off method: reboot
++ ****************************************************************************/
++/*
++ * On the LS-VL, the shutdown process is following:
++ * - Userland monitors key events until the power switch goes to off position
++ * - The board reboots
++ * - U-boot starts and goes into an idle mode waiting for the user
++ * to move the switch to ON position
++ *
++ */
++
++static void lsvl_power_off(void)
++{
++	arm_machine_restart('h', NULL);
++}
++
++static void __init lsvl_init(void)
++{
++	/*
++	 * Basic setup. Needs to be called early.
++	 */
++	kirkwood_init();
++	kirkwood_mpp_conf(lsvl_mpp_config);
++
++	/*
++	 * Configure peripherals.
++	 */
++	kirkwood_uart0_init();
++	kirkwood_ehci_init();
++	kirkwood_ge00_init(&lsvl_ge00_data);
++	kirkwood_sata_init(&lsvl_sata_data);
++	kirkwood_spi_init();
++
++	platform_device_register(&lsvl_leds);
++//	platform_device_register(&lsvl_button_device);
++	platform_device_register(&lsvl_fan_device);
++
++	spi_register_board_info(lsvl_spi_slave_info,
++	 ARRAY_SIZE(lsvl_spi_slave_info));
++
++	/* usb power on */
++	gpio_set_value(LSVL_GPIO_USB_POWER, 1);
++
++	/* register power-off method */
++	pm_power_off = lsvl_power_off;
++
++	pr_info("%s: finished\n", __func__);
++}
++
++MACHINE_START(LSVL, "Buffalo LS-VL Series")
++	.atag_offset	= 0x100,
++	.init_machine	= lsvl_init,
++	.map_io	 = kirkwood_map_io,
++	.init_early	= kirkwood_init_early,
++	.init_irq	= kirkwood_init_irq,
++	.timer	 = &kirkwood_timer,
++MACHINE_END
++
+diff -uNr linux-3.2.33-go.orig/arch/arm/mach-kirkwood/Makefile linux-3.2.33-go/arch/arm/mach-kirkwood/Makefile
+--- linux-3.2.33-go.orig/arch/arm/mach-kirkwood/Makefile	2012-11-14 21:20:22.326388580 +0100
++++ linux-3.2.33-go/arch/arm/mach-kirkwood/Makefile	2012-11-14 21:22:20.882968794 +0100
+@@ -19,5 +19,6 @@
+ obj-$(CONFIG_MACH_NET5BIG_V2)		+= netxbig_v2-setup.o lacie_v2-common.o
+ obj-$(CONFIG_MACH_T5325)		+= t5325-setup.o
+ obj-$(CONFIG_MACH_LSXHL)		+= lsxhl-setup.o
++obj-$(CONFIG_MACH_LSVL)			+= lsvl-setup.o
+ 
+ obj-$(CONFIG_CPU_IDLE)			+= cpuidle.o
+diff -uNr linux-3.2.33-go.orig/arch/arm/tools/mach-types linux-3.2.33-go/arch/arm/tools/mach-types
+--- linux-3.2.33-go.orig/arch/arm/tools/mach-types	2012-11-14 21:20:22.348388327 +0100
++++ linux-3.2.33-go/arch/arm/tools/mach-types	2012-11-14 21:21:02.356908648 +0100
+@@ -118,6 +118,7 @@
+ omap_osk		MACH_OMAP_OSK		OMAP_OSK		515
+ tosa			MACH_TOSA		TOSA			520
+ avila			MACH_AVILA		AVILA			526
++lsvl			MACH_LSVL		LSVL			5277
+ edb9302			MACH_EDB9302		EDB9302			538
+ husky			MACH_HUSKY		HUSKY			543
+ shepherd		MACH_SHEPHERD		SHEPHERD		545
diff --git a/3.3.8/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-WVL.patch b/3.3.8/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-WVL.patch
new file mode 100644
index 0000000..ba58859
--- /dev/null
+++ b/3.3.8/v3.2-ARM-kirkwood-Add-support-for-Buffalo-LS-WVL.patch
@@ -0,0 +1,538 @@
+diff -uNr linux-3.2.34-go.orig/arch/arm/mach-kirkwood/Kconfig linux-3.2.34-go/arch/arm/mach-kirkwood/Kconfig
+--- linux-3.2.34-go.orig/arch/arm/mach-kirkwood/Kconfig	2012-11-19 21:03:42.654743005 +0100
++++ linux-3.2.34-go/arch/arm/mach-kirkwood/Kconfig	2012-11-19 21:04:02.744505974 +0100
+@@ -148,6 +148,12 @@
+ 	  Say 'Y' here if you want your kernel to support the
+ 	  Buffalo LS-CHLv2 Series.
+ 
++config MACH_LSWVL
++   bool "Buffalo LS-WVL Series"
++   help
++     Say 'Y' here if you want your kernel to support the
++     Buffalo LS-WVL/E-AP NAS
++
+ endmenu
+ 
+ endif
+diff -uNr linux-3.2.34-go.orig/arch/arm/mach-kirkwood/lswvl-setup.c linux-3.2.34-go/arch/arm/mach-kirkwood/lswvl-setup.c
+--- linux-3.2.34-go.orig/arch/arm/mach-kirkwood/lswvl-setup.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.2.34-go/arch/arm/mach-kirkwood/lswvl-setup.c	2012-11-19 21:04:02.745505962 +0100
+@@ -0,0 +1,366 @@
++/*
++ * arch/arm/mach-kirkwood/lswvl-setup.c
++ *
++ * Buffalo LS-WVL Series Setup
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2. This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/platform_device.h>
++#include <linux/mtd/physmap.h>
++#include <linux/ata_platform.h>
++#include <linux/mtd/partitions.h>
++#include <linux/spi/flash.h>
++#include <linux/spi/spi.h>
++#include <linux/mv643xx_eth.h>
++#include <linux/gpio.h>
++#include <linux/gpio_keys.h>
++#include <linux/gpio-fan.h>
++#include <linux/input.h>
++#include <linux/leds.h>
++#include <asm/mach-types.h>
++#include <asm/mach/arch.h>
++#include <mach/kirkwood.h>
++#include <plat/orion_nand.h>
++#include <plat/mvsdio.h>
++#include "common.h"
++#include "mpp.h"
++
++
++/*****************************************************************************
++ * 512MB NAND Flash on Device bus CS0
++ ****************************************************************************/
++static struct mtd_partition lswvl_nand_parts[] = {
++        {
++                .name   = "boot",
++                .offset = 0,
++                .size   = 16 * 1024 * 1024,
++        }, {
++                .name   = "rootfs",
++                .offset = MTDPART_OFS_NXTBLK,
++                .size   = 488 * 1024 * 1024,
++        }, {
++                .name   = "reserve",
++                .offset = MTDPART_OFS_NXTBLK,
++                .size   = MTDPART_SIZ_FULL,
++        },
++};
++
++/*****************************************************************************
++ * 512KB NOR Flash on BOOT Device
++ ****************************************************************************/
++static struct mtd_partition lswvl_partitions[] = {
++   {
++      .name = "u-boot",
++      .size = 0x80000,
++      .offset = 0x00000,
++      .mask_flags = MTD_WRITEABLE, /* force read-only */
++   },
++};
++
++static struct flash_platform_data lswvl_spi_slave_data = {
++   .parts = lswvl_partitions,
++   .nr_parts = ARRAY_SIZE(lswvl_partitions),
++};
++
++static struct spi_board_info __initdata lswvl_spi_slave_info[] = {
++   {
++      .modalias = "m25p80",
++      .platform_data = &lswvl_spi_slave_data,
++      .irq = -1,
++      .max_speed_hz = 20000000,
++      .bus_num = 0,
++      .chip_select = 0,
++   },
++};
++
++/*****************************************************************************
++ * Ethernet
++ ****************************************************************************/
++static struct mv643xx_eth_platform_data lswvl_ge00_data = {
++   .phy_addr = MV643XX_ETH_PHY_ADDR(0),
++};
++
++/*****************************************************************************
++ * SATA
++ ****************************************************************************/
++static struct mv_sata_platform_data lswvl_sata_data = {
++   .n_ports = 2,
++};
++
++/*****************************************************************************
++ * LEDs attached to GPIO
++ ****************************************************************************/
++#define LSWVL_GPIO_LED_HDDERR0 34
++#define LSWVL_GPIO_LED_HDDERR1 35
++#define LSWVL_GPIO_LED_ALARM 36
++#define LSWVL_GPIO_LED_FUNC_RED 37
++#define LSWVL_GPIO_LED_INFO 38
++#define LSWVL_GPIO_LED_FUNC_BLUE 39
++#define LSWVL_GPIO_LED_PWR 40
++
++static struct gpio_led lswvl_led_pins[] = {
++   {
++      .name = "lswvl:hdderr:0",
++      .gpio = LSWVL_GPIO_LED_HDDERR0,
++   }, {
++      .name = "lswvl:hdderr:1",
++      .gpio = LSWVL_GPIO_LED_HDDERR1,
++   }, {
++      .name = "lswvl:alarm:red",
++      .gpio = LSWVL_GPIO_LED_ALARM,
++   }, {
++      .name = "lswvl:func:red",
++      .gpio = LSWVL_GPIO_LED_FUNC_RED,
++   }, {
++      .name = "lswvl:info:amber",
++      .gpio = LSWVL_GPIO_LED_INFO,
++   }, {
++      .name = "lswvl:func:blue",
++      .gpio = LSWVL_GPIO_LED_FUNC_BLUE,
++   }, {
++      .name = "lswvl:power:blue",
++      .default_trigger = "default-on",
++      .gpio = LSWVL_GPIO_LED_PWR,
++      .active_low = 1,
++   },
++};
++
++static struct gpio_led_platform_data lswvl_led_data = {
++   .leds = lswvl_led_pins,
++   .num_leds = ARRAY_SIZE(lswvl_led_pins),
++};
++
++static struct platform_device lswvl_leds = {
++   .name = "leds-gpio",
++   .id = -1,
++   .dev = {
++      .platform_data = &lswvl_led_data,
++   }
++};
++
++/*****************************************************************************
++ * General Setup
++ ****************************************************************************/
++#define LSWVL_GPIO_HDD0_POWER 8
++#define LSWVL_GPIO_HDD1_POWER 9
++#define LSWVL_GPIO_USB_POWER 12
++
++/*****************************************************************************
++ * GPIO Attached Keys
++ ****************************************************************************/
++#define LSWVL_GPIO_KEY_FUNC 45
++#define LSWVL_GPIO_KEY_POWER 46
++#define LSWVL_GPIO_KEY_AUTOPOWER 47
++#define LSWVL_SW_POWER 0x00
++#define LSWVL_SW_AUTOPOWER 0x01
++#define LSWVL_SW_FUNC 0x02
++
++static struct gpio_keys_button lswvl_buttons[] = {
++   {
++      .type = EV_KEY,
++      .code = BTN_1,
++      .gpio = LSWVL_GPIO_KEY_POWER,
++      .desc = "power-on",
++      .active_low = 1,
++   }, {
++      .type = EV_KEY,
++      .code = BTN_2,
++      .gpio = LSWVL_GPIO_KEY_AUTOPOWER,
++      .desc = "power-auto",
++      .active_low = 1,
++   }, {
++      .type = EV_KEY,
++      .code = BTN_0,
++      .gpio = LSWVL_GPIO_KEY_FUNC,
++      .desc = "function",
++      .active_low = 1,
++   },
++};
++
++static struct gpio_keys_platform_data lswvl_button_data = {
++   .buttons = lswvl_buttons,
++   .nbuttons = ARRAY_SIZE(lswvl_buttons),
++};
++
++static struct platform_device lswvl_button_device = {
++   .name = "gpio-keys",
++   .id = -1,
++   .num_resources = 0,
++   .dev = {
++      .platform_data = &lswvl_button_data,
++   },
++};
++
++/*****************************************************************************
++ * GPIO Fan
++ ****************************************************************************/
++#define LSWVL_GPIO_FAN_HIGH 16
++#define LSWVL_GPIO_FAN_LOW 17
++#define LSWVL_GPIO_FAN_LOCK 43
++
++static struct gpio_fan_alarm lswvl_alarm = {
++   .gpio = LSWVL_GPIO_FAN_LOCK,
++};
++
++static struct gpio_fan_speed lswvl_speeds[] = {
++   {
++      .rpm = 0,
++      .ctrl_val = 3,
++   }, {
++      .rpm = 1500,
++      .ctrl_val = 1,
++   }, {
++      .rpm = 3250,
++      .ctrl_val = 2,
++   }, {
++      .rpm = 5000,
++      .ctrl_val = 0,
++   }
++};
++
++static int lswvl_gpio_list[] = {
++   LSWVL_GPIO_FAN_HIGH, LSWVL_GPIO_FAN_LOW,
++};
++
++static struct gpio_fan_platform_data lswvl_fan_data = {
++   .num_ctrl = ARRAY_SIZE(lswvl_gpio_list),
++   .ctrl = lswvl_gpio_list,
++   .alarm = &lswvl_alarm,
++   .num_speed = ARRAY_SIZE(lswvl_speeds),
++   .speed = lswvl_speeds,
++};
++
++static struct platform_device lswvl_fan_device = {
++   .name = "gpio-fan",
++   .id = -1,
++   .num_resources = 0,
++   .dev = {
++      .platform_data = &lswvl_fan_data,
++   },
++};
++
++/*****************************************************************************
++ * GPIO Data
++ ****************************************************************************/
++
++static unsigned int lswvl_mpp_config[] __initdata = {
++   MPP0_NF_IO2,
++   MPP1_NF_IO3,
++   MPP2_NF_IO4,
++   MPP3_NF_IO5,
++   MPP4_NF_IO6,
++   MPP5_NF_IO7,
++   MPP6_SYSRST_OUTn,
++   MPP7_SPI_SCn,
++   MPP8_GPIO, /* HDD Power */
++   MPP9_GPIO, /* HDD Power */
++   MPP10_UART0_TXD,
++   MPP11_UART0_RXD,
++   MPP12_GPO, /* USB VBUS EN */
++   MPP13_GPIO,
++   MPP14_GPIO,
++   MPP15_GPIO,
++   MPP16_GPIO, /* FAN HIGH: on:0, off:1 */
++   MPP17_GPIO, /* FAN LOW: on:0, off:1 */
++   MPP18_NF_IO0,
++   MPP19_NF_IO1,
++   MPP20_GPIO,
++   MPP21_GPIO,
++   MPP22_GPIO,
++   MPP23_GPIO,
++   MPP24_GPIO,
++   MPP25_GPIO,
++   MPP26_GPIO,
++   MPP27_GPIO,
++   MPP28_GPIO,
++   MPP29_GPIO,
++   MPP30_GPIO,
++   MPP31_GPIO,
++   MPP32_GPIO,
++   MPP33_GPO,
++   MPP34_GPIO, /*HDD ERROR LED 0*/
++   MPP35_GPIO, /*HDD ERROR LED 1*/
++   MPP36_GPIO, /* ALARM LED */
++   MPP37_GPIO, /* FUNC RED LED */
++   MPP38_GPIO, /* INFO LED */
++   MPP39_GPIO, /* FUNC LED */
++   MPP40_GPIO, /* POWER LED */
++   MPP41_GPIO,
++   MPP42_GPIO,
++   MPP43_GPIO, /* FAN LOCK */
++   MPP44_GPIO,
++   MPP45_GPIO, /* FUNC SW */
++   MPP46_GPIO, /* POWER SW */
++   MPP47_GPIO, /* POWER AUTO SW */
++   MPP48_GPIO, /* UART EN */
++   MPP49_GPIO,
++   0
++};
++
++/*****************************************************************************
++ * LS-WVL specific power off method: reboot
++ ****************************************************************************/
++/*
++ * On the LS-WVL, the shutdown process is following:
++ * - Userland monitors key events until the power switch goes to off position
++ * - The board reboots
++ * - U-boot starts and goes into an idle mode waiting for the user
++ * to move the switch to ON position
++ *
++ */
++
++static void lswvl_power_off(void)
++{
++   kirkwood_restart('h', NULL); //arm_machine_restart('h', NULL);
++}
++
++static void __init lswvl_init(void)
++{
++   /*
++    * Basic setup. Needs to be called early.
++    */
++   kirkwood_init();
++   kirkwood_mpp_conf(lswvl_mpp_config);
++
++   /*
++    * Configure peripherals.
++    */
++   kirkwood_ge00_init(&lswvl_ge00_data);
++   kirkwood_uart0_init();
++   kirkwood_uart1_init();
++   kirkwood_ehci_init();
++   kirkwood_sata_init(&lswvl_sata_data);
++
++   spi_register_board_info(lswvl_spi_slave_info,
++            ARRAY_SIZE(lswvl_spi_slave_info));
++   kirkwood_spi_init();
++   kirkwood_nand_init(ARRAY_AND_SIZE(lswvl_nand_parts), 25);
++
++   platform_device_register(&lswvl_leds);
++   platform_device_register(&lswvl_button_device);
++   platform_device_register(&lswvl_fan_device);
++
++   /* usb power on */
++   gpio_set_value(LSWVL_GPIO_USB_POWER, 1);
++
++   /* register power-off method */
++   pm_power_off = lswvl_power_off;
++
++   pr_info("%s: finished\n", __func__);
++}
++
++MACHINE_START(LSWVL, "Buffalo LS-WVL Series")
++   .atag_offset = 0x100,
++   .map_io = kirkwood_map_io,
++   .init_early = kirkwood_init_early,
++   .init_irq = kirkwood_init_irq,
++   .timer = &kirkwood_timer,
++   .init_machine = lswvl_init,
++   .restart = kirkwood_restart,
++MACHINE_END
++
+diff -uNr linux-3.2.34-go.orig/arch/arm/mach-kirkwood/Makefile linux-3.2.34-go/arch/arm/mach-kirkwood/Makefile
+--- linux-3.2.34-go.orig/arch/arm/mach-kirkwood/Makefile	2012-11-19 21:03:42.653743017 +0100
++++ linux-3.2.34-go/arch/arm/mach-kirkwood/Makefile	2012-11-19 21:04:42.686036907 +0100
+@@ -21,5 +21,6 @@
+ obj-$(CONFIG_MACH_LINKSTATION_CHLV2)		+= lschlv2-setup.o
+ obj-$(CONFIG_MACH_LSXHL)		+= lsxhl-setup.o
+ obj-$(CONFIG_MACH_LSVL)			+= lsvl-setup.o
++obj-$(CONFIG_MACH_LSWVL)		+= lswvl-setup.o
+ 
+ obj-$(CONFIG_CPU_IDLE)			+= cpuidle.o
+diff -uNr linux-3.2.34-go.orig/arch/arm/plat-orion/mpp.c linux-3.2.34-go/arch/arm/plat-orion/mpp.c
+--- linux-3.2.34-go.orig/arch/arm/plat-orion/mpp.c	2012-11-19 21:03:42.766741717 +0100
++++ linux-3.2.34-go/arch/arm/plat-orion/mpp.c	2012-11-19 21:04:02.747505938 +0100
+@@ -15,6 +15,7 @@
+ #include <linux/gpio.h>
+ #include <mach/hardware.h>
+ #include <plat/mpp.h>
++#include <asm/mach-types.h>
+ 
+ /* Address of the ith MPP control register */
+ static __init unsigned long mpp_ctrl_addr(unsigned int i,
+@@ -75,3 +76,37 @@
+ 	}
+ 	printk("\n");
+ }
++
++#ifdef CONFIG_MACH_LSWVL
++
++static u32 boot_mpp_value = 0x21111111;
++/*
++ * change MPP[3:1] to SPI mode
++ */
++void lswvl_setup_spi_mpp(void)
++{
++        u32 spival = 0;
++        u32 bootval = 0;
++
++        spival = 0x00002220;
++        boot_mpp_value = bootval = readl(mpp_ctrl_addr(0, DEV_BUS_VIRT_BASE));
++   bootval &= 0xffff000f;
++   writel(spival | bootval, mpp_ctrl_addr(0, DEV_BUS_VIRT_BASE));
++}
++
++/*
++ * change back MPP[3:1] to default configuration
++ */
++void lswvl_reset_mpp(void)
++{
++        u32 spival = 0;
++        u32 bootval = 0;
++
++        spival = readl(mpp_ctrl_addr(0, DEV_BUS_VIRT_BASE));
++        spival &= 0xffff000f;
++   bootval = boot_mpp_value & ~0xffff000f;
++        writel(spival | bootval, mpp_ctrl_addr(0, DEV_BUS_VIRT_BASE));
++}
++
++#endif
++
+diff -uNr linux-3.2.34-go.orig/arch/arm/tools/mach-types linux-3.2.34-go/arch/arm/tools/mach-types
+--- linux-3.2.34-go.orig/arch/arm/tools/mach-types	2012-11-19 21:03:42.675742765 +0100
++++ linux-3.2.34-go/arch/arm/tools/mach-types	2012-11-19 21:22:29.653445807 +0100
+@@ -119,6 +119,7 @@
+ tosa			MACH_TOSA		TOSA			520
+ avila			MACH_AVILA		AVILA			526
+ lsvl			MACH_LSVL		LSVL			5277
++lswvl			MACH_LSWVL		LSWVL			5278
+ edb9302			MACH_EDB9302		EDB9302			538
+ husky			MACH_HUSKY		HUSKY			543
+ shepherd		MACH_SHEPHERD		SHEPHERD		545
+diff -uNr linux-3.2.34-go.orig/drivers/spi/spi-orion.c linux-3.2.34-go/drivers/spi/spi-orion.c
+--- linux-3.2.34-go.orig/drivers/spi/spi-orion.c	2012-11-19 21:03:41.809752734 +0100
++++ linux-3.2.34-go/drivers/spi/spi-orion.c	2012-11-19 21:20:55.123558883 +0100
+@@ -19,6 +19,12 @@
+ #include <linux/spi/orion_spi.h>
+ #include <linux/module.h>
+ #include <asm/unaligned.h>
++#include <asm/mach-types.h>
++
++#ifdef CONFIG_MACH_LSWVL
++void lswvl_setup_spi_mpp(void);
++void lswvl_reset_mpp(void);
++#endif
+ 
+ #define DRIVER_NAME			"orion_spi"
+ 
+@@ -141,6 +147,9 @@
+ 	unsigned int bits_per_word = spi->bits_per_word;
+ 	int	rc;
+ 
++#ifdef CONFIG_MACH_LSWVL
++	lswvl_setup_spi_mpp();
++#endif
+ 	orion_spi = spi_master_get_devdata(spi->master);
+ 
+ 	if ((t != NULL) && t->speed_hz)
+@@ -153,15 +162,37 @@
+ 	if (rc)
+ 		return rc;
+ 
++#ifdef CONFIG_MACH_LSWVL
++	rc = orion_spi_set_transfer_size(orion_spi, bits_per_word);
++	lswvl_reset_mpp();
++	return rc;
++#else
+ 	return orion_spi_set_transfer_size(orion_spi, bits_per_word);
++#endif
+ }
+ 
+ static void orion_spi_set_cs(struct orion_spi *orion_spi, int enable)
+ {
+ 	if (enable)
++#ifdef CONFIG_MACH_LSWVL
++	{
++		lswvl_setup_spi_mpp();
++		udelay(1);
++		orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
++	}
++#else
+ 		orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
++#endif
+ 	else
+ 		orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
++#ifdef CONFIG_MACH_LSWVL
++	{
++		orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
++		lswvl_reset_mpp();
++	}
++#else
++		orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
++#endif
+ }
+ 
+ static inline int orion_spi_wait_till_ready(struct orion_spi *orion_spi)
+@@ -361,8 +392,17 @@
+ 
+ 	/* Fix ac timing if required.   */
+ 	if (orion_spi->spi_info->enable_clock_fix)
++#ifdef CONFIG_MACH_LSWVL
++	{
++		lswvl_setup_spi_mpp();
++		orion_spi_setbits(orion_spi, ORION_SPI_IF_CONFIG_REG,
++				  (1 << 14));
++		lswvl_reset_mpp();
++	}
++#else
+ 		orion_spi_setbits(orion_spi, ORION_SPI_IF_CONFIG_REG,
+ 				  (1 << 14));
++#endif
+ 
+ 	if ((spi->max_speed_hz == 0)
+ 			|| (spi->max_speed_hz > orion_spi->max_speed))
diff --git a/3.3.8/v3.2-ARM-orion-Add-support-for-Buffalo-LS-PRODUO.patch b/3.3.8/v3.2-ARM-orion-Add-support-for-Buffalo-LS-PRODUO.patch
new file mode 100644
index 0000000..bc8a882
--- /dev/null
+++ b/3.3.8/v3.2-ARM-orion-Add-support-for-Buffalo-LS-PRODUO.patch
@@ -0,0 +1,569 @@
+diff -uprN linux-3.4-rc7/arch/arm/configs/orion5x_defconfig linux-3.4-rc7-wtgl/arch/arm/configs/orion5x_defconfig
+--- linux-3.4-rc7/arch/arm/configs/orion5x_defconfig	2012-05-12 19:37:47.000000000 -0600
++++ linux-3.4-rc7-wtgl/arch/arm/configs/orion5x_defconfig	2012-08-16 23:41:47.118502384 -0600
+@@ -19,6 +19,7 @@ CONFIG_MACH_TS209=y
+ CONFIG_MACH_TERASTATION_PRO2=y
+ CONFIG_MACH_LINKSTATION_PRO=y
+ CONFIG_MACH_LINKSTATION_MINI=y
++CONFIG_MACH_LINKSTATION_PRODUO=y
+ CONFIG_MACH_LINKSTATION_LS_HGL=y
+ CONFIG_MACH_TS409=y
+ CONFIG_MACH_WRT350N_V2=y
+diff -uprN linux-3.4-rc7/arch/arm/mach-orion5x/Kconfig linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/Kconfig
+--- linux-3.4-rc7/arch/arm/mach-orion5x/Kconfig	2012-05-12 19:37:47.000000000 -0600
++++ linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/Kconfig	2012-08-16 23:47:02.334496150 -0600
+@@ -65,13 +65,52 @@ config MACH_LINKSTATION_MINI
+ 	  Say 'Y' here if you want your kernel to support the
+ 	  Buffalo Linkstation Mini platform.
+ 
++config MACH_LINKSTATION_PRODUO
++       bool "Buffalo Linkstation Pro Duo"
++       select I2C_BOARDINFO
++       help
++         Say 'Y' here if you want your kernel to support the
++         Buffalo Linkstation Pro Duo platform.
++         
++         LS-W1.0TGL/R1 is the general model number. There 
++         is no /R3 models, as /R1 stands for RAID1.
++         There are two hardware revisions of the product. 
++       
++         The first revision has version 1.xx firmware, 64 MB RAM,  
++         a single USB port, a power BUTTON, an Auto/Manual 
++         power MODE SWITCH, and a RESET button.
++       
++         The second revision has version 3.xx firmware, 128 MB RAM,  
++         two USB ports, an Off/On/Auto power SWITCH, and a FUNCTION button. 
++
++       choice
++               prompt "HW model"
++               depends on MACH_LINKSTATION_PRODUO
++               default MACH_LINKSTATION_PRODUO_REV1
++               default MACH_LINKSTATION_PRODUO_REV2
++
++               config MACH_LINKSTATION_PRODUO_REV1
++                   bool "Revision 1"
++                   help
++                     The first revision has version 1.xx firmware, 64 MB RAM,  
++                     a single USB port, a power BUTTON, an Auto/Manual 
++                     power MODE SWITCH, and a RESET button.
++
++               config MACH_LINKSTATION_PRODUO_REV2
++                   bool "Revision 2"
++                   help
++                     The second revision has version 3.xx firmware, 128 MB RAM,  
++                     two USB ports, an Off/On/Auto power SWITCH, and a FUNCTION button. 
++       endchoice
++
++
+ config MACH_LINKSTATION_LS_HGL
+ 	bool "Buffalo Linkstation LS-HGL"
+ 	select I2C_BOARDINFO
+ 	help
+ 	  Say 'Y' here if you want your kernel to support the
+ 	  Buffalo Linkstation LS-HGL platform.
+-
++
+ config MACH_TS409
+ 	bool "QNAP TS-409"
+ 	help
+diff -uprN linux-3.4-rc7/arch/arm/mach-orion5x/Makefile linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/Makefile
+--- linux-3.4-rc7/arch/arm/mach-orion5x/Makefile	2012-05-12 19:37:47.000000000 -0600
++++ linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/Makefile	2012-08-16 23:48:26.182494492 -0600
+@@ -5,6 +5,7 @@ obj-$(CONFIG_MACH_KUROBOX_PRO)	+= kurobo
+ obj-$(CONFIG_MACH_TERASTATION_PRO2)	+= terastation_pro2-setup.o
+ obj-$(CONFIG_MACH_LINKSTATION_PRO) += kurobox_pro-setup.o
+ obj-$(CONFIG_MACH_LINKSTATION_MINI) += lsmini-setup.o
++obj-$(CONFIG_MACH_LINKSTATION_PRODUO) += lsproduo-setup.o
+ obj-$(CONFIG_MACH_LINKSTATION_LS_HGL) += ls_hgl-setup.o
+ obj-$(CONFIG_MACH_DNS323)	+= dns323-setup.o
+ obj-$(CONFIG_MACH_TS209)	+= ts209-setup.o tsx09-common.o
+diff -uprN linux-3.4-rc7/arch/arm/mach-orion5x/lsproduo-setup.c linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/lsproduo-setup.c
+--- linux-3.4-rc7/arch/arm/mach-orion5x/lsproduo-setup.c	1969-12-31 17:00:00.000000000 -0700
++++ linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/lsproduo-setup.c	2012-08-16 23:52:09.630490073 -0600
+@@ -0,0 +1,459 @@
++/*
++ * arch/arm/mach-orion5x/lsproduo-setup.c
++ *
++ * Source taken from arch/arm/mach-orion5x/lsmini-setup.c - kernel 2.6.30
++ * Maintainer: Matt Gomboc <gomboc0@gmail.com>
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2.  This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/platform_device.h>
++#include <linux/mtd/physmap.h>
++#include <linux/mv643xx_eth.h>
++#include <linux/leds.h>
++#include <linux/gpio_keys.h>
++#include <linux/input.h>
++#include <linux/i2c.h>
++#include <linux/ata_platform.h>
++#include <linux/gpio.h>
++#include <asm/mach-types.h>
++#include <asm/mach/arch.h>
++#include <mach/orion5x.h>
++#include "common.h"
++#include "mpp.h"
++#include <linux/module.h>
++#include <linux/proc_fs.h>
++#include <asm/uaccess.h>
++
++/*****************************************************************************
++ * Linkstation Pro Duo Info
++ ****************************************************************************/
++
++/*
++ * 256K NOR flash Device bus boot chip select
++ */
++
++#define LSPRODUO_NOR_BOOT_BASE 0xf4000000
++#define LSPRODUO_NOR_BOOT_SIZE SZ_256K
++
++/*****************************************************************************
++ * 256KB NOR Flash on BOOT Device
++ ****************************************************************************/
++
++static struct physmap_flash_data lsproduo_nor_flash_data = {
++	.width	 = 1,
++};
++
++static struct resource lsproduo_nor_flash_resource = {
++	.flags	= IORESOURCE_MEM,
++	.start	= LSPRODUO_NOR_BOOT_BASE,
++	.end	= LSPRODUO_NOR_BOOT_BASE + LSPRODUO_NOR_BOOT_SIZE - 1,
++};
++
++static struct platform_device lsproduo_nor_flash = {
++	.name		= "physmap-flash",
++	.id			= 0,
++	.dev		= {
++		 .platform_data  = &lsproduo_nor_flash_data,
++	},
++	.num_resources	   = 1,
++	.resource		 = &lsproduo_nor_flash_resource,
++};
++
++/*****************************************************************************
++ * Ethernet
++ ****************************************************************************/
++
++static struct mv643xx_eth_platform_data lsproduo_eth_data = {
++	.phy_addr	= 8,
++};
++
++/*****************************************************************************
++ * RTC 5C372a on I2C bus
++ ****************************************************************************/
++
++static struct i2c_board_info __initdata lsproduo_i2c_rtc = {
++	I2C_BOARD_INFO("rs5c372a", 0x32),
++};
++
++/*****************************************************************************
++ * LEDs attached to GPIO
++ ****************************************************************************/
++
++#define LSPRODUO_GPIO_LED_ALARM   2
++#define LSPRODUO_GPIO_LED_INFO	3
++#define LSPRODUO_GPIO_LED_PWR	0
++
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV2
++  #define LSPRODUO_GPIO_LED_FUNC   18
++#endif
++
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV1
++static struct gpio_led lsproduo_led_pins[] = {
++	{
++		 .name	= "alarm:red",
++		 .gpio	= LSPRODUO_GPIO_LED_ALARM,
++		 .active_low	= 1,
++	}, {
++		 .name	= "info:amber",
++		 .gpio	= LSPRODUO_GPIO_LED_INFO,
++		 .active_low	= 1,
++	}, {
++		 .name	= "power:greem",
++		 .gpio	= LSPRODUO_GPIO_LED_PWR,
++		 .active_low	= 1,
++	},
++};
++#endif
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV2
++static struct gpio_led lsproduo_led_pins[] = {
++	{
++		 .name	= "alarm:red",
++		 .gpio	= LSPRODUO_GPIO_LED_ALARM,
++		 .active_low	= 1,
++	}, {
++		 .name	= "info:amber",
++		 .gpio	= LSPRODUO_GPIO_LED_INFO,
++		 .active_low	= 1,
++	}, {
++		 .name	= "power:green",
++		 .gpio	= LSPRODUO_GPIO_LED_PWR,
++		 .active_low	= 1,
++	},{
++		 .name	= "func:blue",
++		 .gpio	= LSPRODUO_GPIO_LED_FUNC,
++		 .active_low	= 1,
++	},
++};
++#endif
++
++
++
++static struct gpio_led_platform_data lsproduo_led_data = {
++	.leds	= lsproduo_led_pins,
++	.num_leds	= ARRAY_SIZE(lsproduo_led_pins),
++};
++
++static struct platform_device lsproduo_leds = {
++	.name   = "leds-gpio",
++	.id	= -1,
++	.dev	= {
++		 .platform_data  = &lsproduo_led_data,
++	},
++};
++
++/****************************************************************************
++ * GPIO Attached Keys
++ ****************************************************************************/
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV1
++ #define LSPRODUO_GPIO_KEY_POWER	  8
++ #define LSPRODUO_GPIO_KEY_AUTOPOWER	10
++
++ #define LSPRODUO_SW_POWER		0x00
++ #define LSPRODUO_SW_AUTOPOWER	   0x01
++
++static struct gpio_keys_button lsproduo_buttons[] = {
++	{
++		 .type		= EV_SW,
++		 .code	= LSPRODUO_SW_POWER,
++		 .gpio	= LSPRODUO_GPIO_KEY_POWER,
++		 .desc	= "Power-on Switch",
++		 .active_low	= 1,
++	}, {
++		 .type		= EV_SW,
++		 .code	= LSPRODUO_SW_AUTOPOWER,
++		 .gpio	= LSPRODUO_GPIO_KEY_AUTOPOWER,
++		 .desc	= "Power-auto Switch",
++		 .active_low	= 1,
++	},
++};
++
++#endif
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV2
++ #define LSPRODUO_GPIO_KEY_POWER		10
++ #define LSPRODUO_GPIO_KEY_AUTOPOWER	22
++ #define LSPRODUO_GPIO_KEY_FUNC			8
++
++ #define LSPRODUO_SW_POWER		0x00
++ #define LSPRODUO_SW_AUTOPOWER	   0x01
++
++static struct gpio_keys_button lsproduo_buttons[] = {
++	{
++		 .code	= KEY_OPTION,
++		 .gpio	= LSPRODUO_GPIO_KEY_FUNC,
++		 .desc	= "Function Button",
++		 .active_low	= 1,
++	},{
++		 .type		= EV_SW,
++		 .code	= LSPRODUO_SW_POWER,
++		 .gpio	= LSPRODUO_GPIO_KEY_POWER,
++		 .desc	= "Power-on Switch",
++		 .active_low	= 1,
++	}, {
++		 .type		= EV_SW,
++		 .code	= LSPRODUO_SW_AUTOPOWER,
++		 .gpio	= LSPRODUO_GPIO_KEY_AUTOPOWER,
++		 .desc	= "Power-auto Switch",
++		 .active_low	= 1,
++	},
++};
++
++#endif
++
++static struct gpio_keys_platform_data lsproduo_button_data = {
++	.buttons	 = lsproduo_buttons,
++	.nbuttons	= ARRAY_SIZE(lsproduo_buttons),
++};
++
++static struct platform_device lsproduo_button_device = {
++	.name	= "gpio-keys",
++	.id	   = -1,
++	.num_resources  = 0,
++	.dev	 = {
++		 .platform_data  = &lsproduo_button_data,
++	},
++};
++
++/****************************************************************************
++ * GPIO Attached Fan
++ ****************************************************************************/
++
++/* Define max char len */
++#define MAX_LEN 8
++
++#define LSPRODUO_GPIO_FAN_LOW	   17
++#define LSPRODUO_GPIO_FAN_HIGH	  14
++
++static struct proc_dir_entry *lsproduo_proc_dir_root, *lsproduo_proc_dir_gpio, *lsproduo_fan_proc_file;
++static char lsproduo_fan_state[MAX_LEN];
++
++static int lsproduo_fan_get(char *buf, char **start, off_t offset, int count, int *eof, void *data)
++{
++	int len;
++
++	len = snprintf(buf, count, "state: %s\n", lsproduo_fan_state);
++	return len;
++}
++
++static int lsproduo_fan_set( struct file *file, const char *buffer, unsigned long count, void *data )
++{
++	int len, ret;
++	char *ptr, tState[MAX_LEN];
++
++	if (count > MAX_LEN )
++		len = MAX_LEN;
++	else
++		len = count;
++
++	ret = copy_from_user(tState, buffer, len);
++	if(ret < 0)
++	{
++		printk(KERN_ERR "%s: Setting fan speed failed\n", "lsproduo");
++		return -EFAULT;
++	}
++
++	ptr = strrchr(tState, '\n');
++	if(ptr) *ptr = '\0';
++
++	if (strcasecmp(tState, "off") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan off\n", "lsproduo");
++		sprintf(lsproduo_fan_state, "off");
++		gpio_set_value(LSPRODUO_GPIO_FAN_LOW, 1);
++		gpio_set_value(LSPRODUO_GPIO_FAN_HIGH, 1);
++	} else if (strcasecmp(tState, "slow") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan slow\n", "lsproduo");
++		sprintf(lsproduo_fan_state, "slow");
++		gpio_set_value(LSPRODUO_GPIO_FAN_LOW, 1);
++		gpio_set_value(LSPRODUO_GPIO_FAN_HIGH, 0);
++	} else if (strcasecmp(tState, "fast") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan fast\n", "lsproduo");
++		sprintf(lsproduo_fan_state, "fast");
++		gpio_set_value(LSPRODUO_GPIO_FAN_LOW, 0);
++		gpio_set_value(LSPRODUO_GPIO_FAN_HIGH, 1);
++	} else if (strcasecmp(tState, "full") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan full\n", "lsproduo");
++		sprintf(lsproduo_fan_state, "full");
++		gpio_set_value(LSPRODUO_GPIO_FAN_LOW, 0);
++		gpio_set_value(LSPRODUO_GPIO_FAN_HIGH, 0);
++	} else
++	{
++		printk(KERN_ERR "%s: unknown fan speed given\n", "lsproduo");
++	}
++
++	lsproduo_fan_state[len] = '\0';
++
++	return len;
++}
++
++/*****************************************************************************
++ * SATA
++ ****************************************************************************/
++static struct mv_sata_platform_data lsproduo_sata_data = {
++	.n_ports	 = 2,
++};
++
++
++/*****************************************************************************
++ * Linkstation Pro Duo specific power off method: reboot
++ ****************************************************************************/
++/*
++ * On the Linkstation Pro Duo, the shutdown process is following:
++ * - Userland monitors key events until the power switch goes to off position
++ * - The board reboots
++ * - U-boot starts and goes into an idle mode waiting for the user
++ *   to move the switch to ON position
++ */
++
++static void lsproduo_power_off(void)
++{
++	orion5x_restart('h', NULL);
++	/* arm_machine_restart(0, NULL); */
++}
++
++
++/*****************************************************************************
++ * General Setup
++ ****************************************************************************/
++#define LSPRODUO_GPIO_HDD_POWER0	1
++#define LSPRODUO_GPIO_USB_POWER		9
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV1
++ #define LSPRODUO_GPIO_POWER		8
++ #define LSPRODUO_GPIO_AUTO_POWER	10
++#endif
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV2
++ #define LSPRODUO_GPIO_POWER		10
++ #define LSPRODUO_GPIO_USB_POWER2	19
++ #define LSPRODUO_GPIO_AUTO_POWER	22
++#endif
++
++static unsigned int lsproduo_mpp_modes[] __initdata = {
++	 MPP0_GPIO,	/* LED_PWR */
++	 MPP1_GPIO,	/* HDD_PWR */
++	 MPP2_GPIO,	/* LED_ALARM */
++	 MPP3_GPIO,	/* LED_INFO */
++	 MPP4_UNUSED,
++	 MPP5_UNUSED,
++	 MPP6_GPIO,	/* FAN_LCK */
++	 MPP9_GPIO,	/* USB_PWR */
++	 MPP11_UNUSED,	/* LED_ETH dummy */
++	 MPP12_UNUSED,
++	 MPP13_UNUSED,
++	 MPP14_GPIO,	/* FAN_HIGH */
++	 MPP15_UNUSED,
++	 MPP16_UNUSED,
++	 MPP17_GPIO,	/* FAN_LOW */
++
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV1
++		MPP7_GPIO,	/* INIT */
++		MPP8_GPIO,	/* POWER */
++		MPP10_GPIO,	/* AUTO_POWER */
++		MPP18_UNUSED,
++		MPP19_UNUSED,
++#endif
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV2
++		MPP7_UNUSED,
++		MPP8_GPIO,	/* FUNC */
++		MPP10_GPIO, 	/* POWER */
++		MPP18_GPIO,	/* LED_FUNC*/
++	 MPP19_GPIO,	/* USB_PWR2 */
++	 MPP22_GPIO,	/* AUTO_POWER */
++#endif
++	0,
++};
++
++static void __init lsproduo_init(void)
++{
++	/*
++	 * Setup basic Orion functions. Need to be called early.
++	 */
++	orion5x_init();
++
++	orion5x_mpp_conf(lsproduo_mpp_modes);
++
++	/*
++	 * Configure peripherals.
++	 */
++	orion5x_ehci0_init();
++	orion5x_ehci1_init();
++	orion5x_eth_init(&lsproduo_eth_data);
++	orion5x_i2c_init();
++	orion5x_sata_init(&lsproduo_sata_data);
++	orion5x_uart0_init();
++	orion5x_xor_init();
++
++	orion5x_setup_dev_boot_win(LSPRODUO_NOR_BOOT_BASE,
++					LSPRODUO_NOR_BOOT_SIZE);
++	platform_device_register(&lsproduo_nor_flash);
++
++	platform_device_register(&lsproduo_button_device);
++
++	platform_device_register(&lsproduo_leds);
++
++	i2c_register_board_info(0, &lsproduo_i2c_rtc, 1);
++
++	/* enable USB power */
++	gpio_set_value(LSPRODUO_GPIO_USB_POWER, 1);
++
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV2
++	gpio_set_value(LSPRODUO_GPIO_USB_POWER2, 1);
++#endif
++
++	printk(KERN_INFO "Buffalo Linkstation Pro Duo fan driver loaded\n");
++	sprintf(lsproduo_fan_state, "fast");
++	gpio_set_value(LSPRODUO_GPIO_FAN_LOW, 1);
++	gpio_set_value(LSPRODUO_GPIO_FAN_HIGH, 0);
++
++	lsproduo_proc_dir_root = proc_mkdir( "linkstation", NULL );
++	lsproduo_proc_dir_gpio = proc_mkdir( "gpio", lsproduo_proc_dir_root );
++	lsproduo_fan_proc_file = create_proc_entry( "fan", S_IRUGO, lsproduo_proc_dir_gpio );
++	if( lsproduo_fan_proc_file ) {
++		lsproduo_fan_proc_file->read_proc  = lsproduo_fan_get;
++		lsproduo_fan_proc_file->write_proc = lsproduo_fan_set;
++		lsproduo_fan_proc_file->data = NULL;
++	} else
++	{
++		printk(KERN_INFO "Registration of fan device failed\n");
++	}
++
++	/* register power-off method */
++	pm_power_off = lsproduo_power_off;
++
++	pr_info("%s: finished\n", __func__);
++}
++
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV1
++MACHINE_START(LINKSTATION_PRODUO, "Buffalo Linkstation Pro Duo - Revision 1")
++	.atag_offset	= 0x00000100,
++	.init_machine   = lsproduo_init,
++	.map_io	  = orion5x_map_io,
++	.init_early	= orion5x_init_early,
++	.init_irq	= orion5x_init_irq,
++	.timer	   = &orion5x_timer,
++	.fixup	   = tag_fixup_mem32,
++	.restart	 = orion5x_restart,
++MACHINE_END
++#endif
++
++#ifdef CONFIG_MACH_LINKSTATION_PRODUO_REV2
++MACHINE_START(LINKSTATION_PRODUO, "Buffalo Linkstation Pro Duo - Revision 2")
++	.atag_offset	= 0x00000100,
++	.init_machine   = lsproduo_init,
++	.map_io	  = orion5x_map_io,
++	.init_early	= orion5x_init_early,
++	.init_irq	= orion5x_init_irq,
++	.timer	   = &orion5x_timer,
++	.fixup	   = tag_fixup_mem32,
++	.restart	 = orion5x_restart,
++MACHINE_END
++#endif
++
++
++
+diff -uprN linux-3.4-rc7/arch/arm/mach-orion5x/mpp.h linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/mpp.h
+--- linux-3.4-rc7/arch/arm/mach-orion5x/mpp.h	2012-05-12 19:37:47.000000000 -0600
++++ linux-3.4-rc7-wtgl/arch/arm/mach-orion5x/mpp.h	2012-08-16 22:15:34.000000000 -0600
+@@ -122,7 +122,10 @@
+ #define MPP19_GIGE              MPP(19, 0x1, 0, 0, 1,   1,   1)
+ #define MPP19_UART              MPP(19, 0x0, 0, 0, 0,   1,   1)
+ 
+-#define MPP_MAX			19
++#define MPP22_GPIO		MPP(22, 0x5, 1, 1, 0,   1,   0)
++
++
++#define MPP_MAX			22
+ 
+ void orion5x_mpp_conf(unsigned int *mpp_list);
+ 
+diff -uprN linux-3.4-rc7/arch/arm/tools/mach-types linux-3.4-rc7-wtgl/arch/arm/tools/mach-types
+--- linux-3.4-rc7/arch/arm/tools/mach-types	2012-05-12 19:37:47.000000000 -0600
++++ linux-3.4-rc7-wtgl/arch/arm/tools/mach-types	2012-08-16 23:43:59.830499760 -0600
+@@ -333,6 +333,8 @@ smdkc100		MACH_SMDKC100		SMDKC100		1826
+ tavorevb		MACH_TAVOREVB		TAVOREVB		1827
+ saar			MACH_SAAR		SAAR			1828
+ at91sam9m10g45ek	MACH_AT91SAM9M10G45EK	AT91SAM9M10G45EK	1830
++linkstation_produo	MACH_LINKSTATION_PRODUO	LINKSTATION_PRODUO	1831
++##see header for btaining a new version, preferred to patching
+ usb_a9g20		MACH_USB_A9G20		USB_A9G20		1841
+ mxlads			MACH_MXLADS		MXLADS			1851
+ linkstation_mini	MACH_LINKSTATION_MINI	LINKSTATION_MINI	1858
diff --git a/3.3.8/v3.3-ARM-kirkwood-Add-support-for-Buffalo-LS-CHLv2.patch b/3.3.8/v3.3-ARM-kirkwood-Add-support-for-Buffalo-LS-CHLv2.patch
new file mode 100644
index 0000000..452865b
--- /dev/null
+++ b/3.3.8/v3.3-ARM-kirkwood-Add-support-for-Buffalo-LS-CHLv2.patch
@@ -0,0 +1,278 @@
+diff -uNr linux-3.3.8-go.orig/arch/arm/mach-kirkwood/common.c linux-3.3.8-go/arch/arm/mach-kirkwood/common.c
+--- linux-3.3.8-go.orig/arch/arm/mach-kirkwood/common.c	2012-11-22 21:47:03.726499285 +0100
++++ linux-3.3.8-go/arch/arm/mach-kirkwood/common.c	2012-11-22 21:50:05.917342298 +0100
+@@ -31,6 +31,7 @@
+ #include <plat/common.h>
+ #include <plat/time.h>
+ #include <plat/addr-map.h>
++#include <asm/mach-types.h>
+ #include "common.h"
+ 
+ /*****************************************************************************
+diff -uNr linux-3.3.8-go.orig/arch/arm/mach-kirkwood/include/mach/system.h linux-3.3.8-go/arch/arm/mach-kirkwood/include/mach/system.h
+--- linux-3.3.8-go.orig/arch/arm/mach-kirkwood/include/mach/system.h	2012-11-22 21:47:03.725499297 +0100
++++ linux-3.3.8-go/arch/arm/mach-kirkwood/include/mach/system.h	2012-11-22 21:49:41.395632387 +0100
+@@ -9,6 +9,10 @@
+ #ifndef __ASM_ARCH_SYSTEM_H
+ #define __ASM_ARCH_SYSTEM_H
+ 
++#include <linux/io.h>
++#include <asm/proc-fns.h>
++#include <mach/bridge-regs.h>
++
+ static inline void arch_idle(void)
+ {
+ 	cpu_do_idle();
+diff -uNr linux-3.3.8-go.orig/arch/arm/mach-kirkwood/Kconfig linux-3.3.8-go/arch/arm/mach-kirkwood/Kconfig
+--- linux-3.3.8-go.orig/arch/arm/mach-kirkwood/Kconfig	2012-11-22 21:47:03.726499285 +0100
++++ linux-3.3.8-go/arch/arm/mach-kirkwood/Kconfig	2012-11-22 21:47:26.775227628 +0100
+@@ -148,6 +148,12 @@
+      Say 'Y' here if you want your kernel to support the
+      Buffalo LS-WVL/E-AP NAS
+ 
++config MACH_LINKSTATION_CHLV2
++	bool "Buffalo LS-CHLv2 Series"
++	help
++	  Say 'Y' here if you want your kernel to support the
++	  Buffalo LS-CHLv2 Series.
++
+ endmenu
+ 
+ endif
+diff -uNr linux-3.3.8-go.orig/arch/arm/mach-kirkwood/lschlv2-setup.c linux-3.3.8-go/arch/arm/mach-kirkwood/lschlv2-setup.c
+--- linux-3.3.8-go.orig/arch/arm/mach-kirkwood/lschlv2-setup.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-go/arch/arm/mach-kirkwood/lschlv2-setup.c	2012-11-22 21:47:26.775227628 +0100
+@@ -0,0 +1,211 @@
++/*
++ * arch/arm/mach-kirkwood/lschlv2-setup.c
++ *
++ * Buffalo LS Kirkwood Series Setup
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2.  This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/gpio.h>
++#include <linux/gpio_keys.h>
++#include <linux/init.h>
++#include <linux/input.h>
++#include <linux/leds.h>
++#include <linux/platform_device.h>
++#include <linux/ata_platform.h>
++#include <linux/mv643xx_eth.h>
++#include <linux/mtd/physmap.h>
++#include <linux/spi/flash.h>
++#include <linux/spi/spi.h>
++#include <asm/mach-types.h>
++#include <asm/mach/arch.h>
++#include "include/mach/system.h"
++#include <mach/kirkwood.h>
++#include "common.h"
++#include "mpp.h"
++
++/*****************************************************************************
++ * 512KB SPI Flash on BOOT Device
++ ****************************************************************************/
++static struct mtd_partition lschlv2_partitions[] = {
++	{
++		.name		= "u-boot",
++		.offset		= 0x00000,
++		.size		= 0x70000,
++		.mask_flags	= MTD_WRITEABLE,
++	},
++	{
++		.name		= "u-boot env",
++		.offset		= MTDPART_OFS_APPEND,
++		.size		= 0x10000,
++	}
++};
++
++static struct flash_platform_data lschlv2_spi_slave_data = {
++	.type		= "m25p40",
++	.parts		= lschlv2_partitions,
++	.nr_parts	= ARRAY_SIZE(lschlv2_partitions),
++};
++
++static struct spi_board_info __initdata lschlv2_spi_slave_info[] = {
++	{
++		.modalias		= "m25p80",
++		.platform_data	= &lschlv2_spi_slave_data,
++		.irq			= -1,
++		.max_speed_hz	= 20000000,
++		.bus_num		= 0,
++		.chip_select	= 0,
++	}
++};
++
++static struct mv643xx_eth_platform_data lschlv2_ge00_data = {
++	.phy_addr	= MV643XX_ETH_PHY_ADDR(0),
++};
++
++static struct mv643xx_eth_platform_data lschlv2_ge01_data = {
++	.phy_addr	= MV643XX_ETH_PHY_ADDR(8),
++};
++
++static unsigned int lschlv2_mpp_config[] __initdata = {
++	MPP10_GPO, /* HDD Power */
++	MPP11_GPIO, /* USB Vbus Power */
++	MPP18_GPO, /* FAN High on:0, off:1 */
++	MPP19_GPO, /* FAN Low on:0, off:1 */
++	MPP36_GPIO, /* FUNC LED */
++	MPP37_GPIO, /* ALARM LED */
++	MPP38_GPIO, /* INFO LED */
++	MPP39_GPIO, /* POWER LED */
++	MPP40_GPIO, /* FAN LOCK */
++	MPP41_GPIO, /* FUNC SW */
++	MPP42_GPIO, /* POWER SW */
++	MPP43_GPIO, /* POWER AUTO SW */
++	MPP48_GPIO, /* FUNC RED LED */
++	MPP49_GPIO, /* UART EN */
++	0
++};
++
++static struct mv_sata_platform_data lschlv2_sata_data = {
++	.n_ports	= 1,
++};
++
++static struct gpio_led lschlv2_led_pins[] = {
++	{
++		.name			= "func",
++		.gpio			= 36,
++		.active_low		= 1,
++	},
++	{
++		.name			= "alarm",
++		.gpio			= 37,
++		.active_low		= 1,
++	},
++	{
++		.name			= "info",
++		.gpio			= 38,
++		.active_low		= 1,
++	},
++	{
++		.name			= "power",
++		.gpio			= 39,
++		.default_trigger	= "default-on",
++		.active_low		= 1,
++	},
++	{
++		.name			= "func2",
++		.gpio			= 48,
++		.active_low		= 1,
++	},
++};
++
++static struct gpio_led_platform_data lschlv2_led_data = {
++	.leds		= lschlv2_led_pins,
++	.num_leds	= ARRAY_SIZE(lschlv2_led_pins),
++};
++
++static struct platform_device lschlv2_leds = {
++	.name	= "leds-gpio",
++	.id	= -1,
++	.dev	= {
++		.platform_data	= &lschlv2_led_data,
++	}
++};
++
++#define LSCHLv2_GPIO_USB_VBUS_EN		11
++#define LSCHLv2_GPIO_KEY_FUNC		41
++
++static struct gpio_keys_button lschlv2_buttons[] = {
++	{
++		.code		= KEY_OPTION,
++		.gpio		= LSCHLv2_GPIO_KEY_FUNC,
++		.desc		= "Function Button",
++		.active_low	= 1,
++	},
++};
++
++static struct gpio_keys_platform_data lschlv2_button_data = {
++	.buttons	= lschlv2_buttons,
++	.nbuttons	= ARRAY_SIZE(lschlv2_buttons),
++};
++
++static struct platform_device lschlv2_button_device = {
++	.name		= "gpio-keys",
++	.id		= -1,
++	.num_resources	= 0,
++	.dev		= {
++		.platform_data	= &lschlv2_button_data,
++	},
++};
++
++static void lschlv2_power_off(void)
++{
++	kirkwood_restart('h', NULL); //arch_reset(0, NULL);
++}
++
++static void __init lschlv2_init(void)
++{
++	/*
++	 * Basic setup. Needs to be called early.
++	 */
++	kirkwood_init();
++	kirkwood_mpp_conf(lschlv2_mpp_config);
++
++	kirkwood_uart0_init();
++
++	if (gpio_request(LSCHLv2_GPIO_USB_VBUS_EN, "USB Power Enable") != 0 ||
++		gpio_direction_output(LSCHLv2_GPIO_USB_VBUS_EN, 1) != 0)
++		printk(KERN_ERR "can't set up USB Power Enable\n");
++	kirkwood_ehci_init();
++
++	kirkwood_ge00_init(&lschlv2_ge00_data);
++	kirkwood_ge01_init(&lschlv2_ge01_data);
++
++	kirkwood_sata_init(&lschlv2_sata_data);
++
++	kirkwood_spi_init();
++
++	platform_device_register(&lschlv2_leds);
++	platform_device_register(&lschlv2_button_device);
++
++	spi_register_board_info(lschlv2_spi_slave_info,
++		ARRAY_SIZE(lschlv2_spi_slave_info));
++
++	/* register power-off method */
++	pm_power_off = lschlv2_power_off;
++
++	pr_info("%s: finished\n", __func__);
++}
++
++
++
++MACHINE_START(LINKSTATION_CHLV2, "Buffalo Linkstation LS-CHLv2")
++	.atag_offset    = 0x100,
++	.init_machine   = lschlv2_init,
++	.map_io         = kirkwood_map_io,
++	.init_early     = kirkwood_init_early,
++	.init_irq       = kirkwood_init_irq,
++	.timer          = &kirkwood_timer,
++	.restart        = kirkwood_restart,
++MACHINE_END
+diff -uNr linux-3.3.8-go.orig/arch/arm/mach-kirkwood/Makefile linux-3.3.8-go/arch/arm/mach-kirkwood/Makefile
+--- linux-3.3.8-go.orig/arch/arm/mach-kirkwood/Makefile	2012-11-22 21:47:03.726499285 +0100
++++ linux-3.3.8-go/arch/arm/mach-kirkwood/Makefile	2012-11-22 21:47:26.775227628 +0100
+@@ -18,6 +18,7 @@
+ obj-$(CONFIG_MACH_NET2BIG_V2)		+= netxbig_v2-setup.o lacie_v2-common.o
+ obj-$(CONFIG_MACH_NET5BIG_V2)		+= netxbig_v2-setup.o lacie_v2-common.o
+ obj-$(CONFIG_MACH_T5325)		+= t5325-setup.o
++obj-$(CONFIG_MACH_LINKSTATION_CHLV2)		+= lschlv2-setup.o
+ obj-$(CONFIG_MACH_LSXHL)		+= lsxhl-setup.o
+ obj-$(CONFIG_MACH_LSVL)			+= lsvl-setup.o
+ obj-$(CONFIG_MACH_LSWVL)		+= lswvl-setup.o
+diff -uNr linux-3.3.8-go.orig/arch/arm/tools/mach-types linux-3.3.8-go/arch/arm/tools/mach-types
+--- linux-3.3.8-go.orig/arch/arm/tools/mach-types	2012-11-24 05:06:18.763371700 +0100
++++ linux-3.3.8-go/arch/arm/tools/mach-types	2012-11-24 05:06:59.092895630 +0100
+@@ -522,6 +522,7 @@
+ dockstar		MACH_DOCKSTAR		DOCKSTAR		2998
+ ti8148evm		MACH_TI8148EVM		TI8148EVM		3004
+ seaboard		MACH_SEABOARD		SEABOARD		3005
++linkstation_chlv2	MACH_LINKSTATION_CHLV2	LINKSTATION_CHLV2	3006
+ mx53_ard		MACH_MX53_ARD		MX53_ARD		3010
+ mx53_smd		MACH_MX53_SMD		MX53_SMD		3011
+ msm8x60_rumi3		MACH_MSM8X60_RUMI3	MSM8X60_RUMI3		3016
diff --git a/3.3.8/v3.3-ARM-kirkwood-Add-support-for-Buffalo-LS-XHL.patch b/3.3.8/v3.3-ARM-kirkwood-Add-support-for-Buffalo-LS-XHL.patch
new file mode 100644
index 0000000..71e23b9
--- /dev/null
+++ b/3.3.8/v3.3-ARM-kirkwood-Add-support-for-Buffalo-LS-XHL.patch
@@ -0,0 +1,388 @@
+Add support for the Buffalo Linkstation XHL. This NAS box is based on a
+Marvell Kirkwood chip at 1.2 GHz and features 256 MB RAM, 512kb SPI boot
+flash, gigabit ethernet and one SATA port.
+
+Signed-off-by: Michael Walle <michael at walle.cc>
+---
+ arch/arm/configs/kirkwood_defconfig  |    1 +
+ arch/arm/mach-kirkwood/Kconfig       |    6 +
+ arch/arm/mach-kirkwood/Makefile      |    1 +
+ arch/arm/mach-kirkwood/lsxhl-setup.c |  313 ++++++++++++++++++++++++++++++++++
+ arch/arm/tools/mach-types            |    1 +
+ 5 files changed, 322 insertions(+), 0 deletions(-)
+ create mode 100644 arch/arm/mach-kirkwood/lsxhl-setup.c
+
+diff --git a/arch/arm/configs/kirkwood_defconfig b/arch/arm/configs/kirkwood_defconfig
+index aeb3af5..9f77811 100644
+--- a/arch/arm/configs/kirkwood_defconfig
++++ b/arch/arm/configs/kirkwood_defconfig
+@@ -28,6 +28,7 @@ CONFIG_MACH_D2NET_V2=y
+ CONFIG_MACH_NET2BIG_V2=y
+ CONFIG_MACH_NET5BIG_V2=y
+ CONFIG_MACH_T5325=y
++CONFIG_MACH_LSXHL=y
+ # CONFIG_CPU_FEROCEON_OLD_ID is not set
+ CONFIG_NO_HZ=y
+ CONFIG_HIGH_RES_TIMERS=y
+diff --git a/arch/arm/mach-kirkwood/Kconfig b/arch/arm/mach-kirkwood/Kconfig
+index 7fc603b..307cc99 100644
+--- a/arch/arm/mach-kirkwood/Kconfig
++++ b/arch/arm/mach-kirkwood/Kconfig
+@@ -130,6 +130,12 @@ config MACH_T5325
+ 	  Say 'Y' here if you want your kernel to support the
+ 	  HP t5325 Thin Client.
+ 
++config MACH_LSXHL
++	bool "Buffalo LS-XHL Series"
++	help
++	  Say 'Y' here if you want your kernel to support the
++	  Buffalo LS-XHL Series.
++
+ endmenu
+ 
+ endif
+diff --git a/arch/arm/mach-kirkwood/Makefile b/arch/arm/mach-kirkwood/Makefile
+index 5dcaa81..221980b 100644
+--- a/arch/arm/mach-kirkwood/Makefile
++++ b/arch/arm/mach-kirkwood/Makefile
+@@ -18,5 +18,6 @@ obj-$(CONFIG_MACH_D2NET_V2)		+= d2net_v2-setup.o lacie_v2-common.o
+ obj-$(CONFIG_MACH_NET2BIG_V2)		+= netxbig_v2-setup.o lacie_v2-common.o
+ obj-$(CONFIG_MACH_NET5BIG_V2)		+= netxbig_v2-setup.o lacie_v2-common.o
+ obj-$(CONFIG_MACH_T5325)		+= t5325-setup.o
++obj-$(CONFIG_MACH_LSXHL)		+= lsxhl-setup.o
+ 
+ obj-$(CONFIG_CPU_IDLE)			+= cpuidle.o
+diff --git a/arch/arm/mach-kirkwood/lsxhl-setup.c b/arch/arm/mach-kirkwood/lsxhl-setup.c
+new file mode 100644
+index 0000000..783d257
+--- /dev/null
++++ b/arch/arm/mach-kirkwood/lsxhl-setup.c
+@@ -0,0 +1,314 @@
++/*
++ * arch/arm/mach-kirkwood/lsxhl-setup.c
++ *
++ * Buffalo LS-XHL Series Setup
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2.  This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/platform_device.h>
++#include <linux/mtd/physmap.h>
++#include <linux/ata_platform.h>
++#include <linux/spi/flash.h>
++#include <linux/spi/spi.h>
++#include <linux/mv643xx_eth.h>
++#include <linux/gpio.h>
++#include <linux/gpio_keys.h>
++#include <linux/gpio-fan.h>
++#include <linux/input.h>
++#include <linux/leds.h>
++#include <asm/mach-types.h>
++#include <asm/mach/arch.h>
++#include <mach/kirkwood.h>
++#include <plat/mvsdio.h>
++#include "common.h"
++#include "mpp.h"
++
++/*****************************************************************************
++ * 512KB SPI Flash on BOOT Device
++ ****************************************************************************/
++static struct mtd_partition lsxhl_partitions[] = {
++	{
++		.name		= "u-boot",
++		.size		= 0x70000,
++		.offset		= 0x00000,
++		.mask_flags	= MTD_WRITEABLE,
++	},
++	{
++		.name		= "u-boot env",
++		.size		= 0x10000,
++		.offset		= 0x70000,
++	}
++};
++
++static struct flash_platform_data lsxhl_spi_slave_data = {
++	.type		= "m25p40",
++	.parts		= lsxhl_partitions,
++	.nr_parts	= ARRAY_SIZE(lsxhl_partitions),
++};
++
++static struct spi_board_info __initdata lsxhl_spi_slave_info[] = {
++	{
++		.modalias	= "m25p80",
++		.platform_data	= &lsxhl_spi_slave_data,
++		.irq		= -1,
++		.max_speed_hz	= 20000000,
++		.bus_num	= 0,
++		.chip_select	= 0,
++	}
++};
++
++/*****************************************************************************
++ * Ethernet
++ ****************************************************************************/
++static struct mv643xx_eth_platform_data lsxhl_ge00_data = {
++	.phy_addr	= MV643XX_ETH_PHY_ADDR(0),
++};
++
++static struct mv643xx_eth_platform_data lsxhl_ge01_data = {
++	.phy_addr	= MV643XX_ETH_PHY_ADDR(8),
++};
++
++/*****************************************************************************
++ * SATA
++ ****************************************************************************/
++static struct mv_sata_platform_data lsxhl_sata_data = {
++	.n_ports	= 1,
++};
++
++/*****************************************************************************
++ * LEDs attached to GPIO
++ ****************************************************************************/
++#define LSXHL_GPIO_LED_ALARM		37
++#define LSXHL_GPIO_LED_INFO		38
++#define LSXHL_GPIO_LED_PWR		39
++#define LSXHL_GPIO_LED_FUNC_BLUE	36
++#define LSXHL_GPIO_LED_FUNC_RED		48
++
++static struct gpio_led lsxhl_led_pins[] = {
++	{
++		.name			= "alarm:red",
++		.gpio			= LSXHL_GPIO_LED_ALARM,
++		.active_low		= 1,
++	},
++	{
++		.name			= "info:amber",
++		.gpio			= LSXHL_GPIO_LED_INFO,
++		.active_low		= 1,
++	},
++	{
++		.name			= "power:blue",
++		.default_trigger	= "default-on",
++		.gpio			= LSXHL_GPIO_LED_PWR,
++		.active_low		= 1,
++	},
++	{
++		.name			= "func:blue:bottom",
++		.gpio			= LSXHL_GPIO_LED_FUNC_BLUE,
++		.active_low		= 1,
++	},
++	{
++		.name			= "func:red:bottom",
++		.gpio			= LSXHL_GPIO_LED_FUNC_RED,
++		.active_low		= 1,
++	},
++};
++
++static struct gpio_led_platform_data lsxhl_led_data = {
++	.leds		= lsxhl_led_pins,
++	.num_leds	= ARRAY_SIZE(lsxhl_led_pins),
++};
++
++static struct platform_device lsxhl_leds = {
++	.name	= "leds-gpio",
++	.id	= -1,
++	.dev	= {
++		.platform_data	= &lsxhl_led_data,
++	}
++};
++
++/*****************************************************************************
++ * General Setup
++ ****************************************************************************/
++#define LSXHL_GPIO_HDD_POWER 10
++#define LSXHL_GPIO_USB_POWER 11
++
++/*****************************************************************************
++ * GPIO Attached Keys
++ ****************************************************************************/
++#define LSXHL_GPIO_KEY_FUNC		41
++#define LSXHL_GPIO_KEY_AUTOPOWER	42
++#define LSXHL_GPIO_KEY_POWER		43
++#define LSXHL_SW_POWER		0x00
++#define LSXHL_SW_AUTOPOWER	0x01
++#define LSXHL_SW_FUNC		0x02
++
++static struct gpio_keys_button lsxhl_buttons[] = {
++	{
++		.type = EV_SW,
++		.code = LSXHL_SW_POWER,
++		.gpio = LSXHL_GPIO_KEY_POWER,
++		.desc = "Power-on Switch",
++		.active_low = 1,
++	}, {
++		.type = EV_SW,
++		.code = LSXHL_SW_AUTOPOWER,
++		.gpio = LSXHL_GPIO_KEY_AUTOPOWER,
++		.desc = "Power-auto Switch",
++		.active_low = 1,
++	}, {
++		.type = EV_SW,
++		.code = LSXHL_SW_POWER,
++		.gpio = LSXHL_GPIO_KEY_FUNC,
++		.desc = "Function Button",
++		.active_low = 1,
++	},
++};
++
++static struct gpio_keys_platform_data lsxhl_button_data = {
++	.buttons = lsxhl_buttons,
++	.nbuttons = ARRAY_SIZE(lsxhl_buttons),
++};
++
++static struct platform_device lsxhl_button_device = {
++	.name = "gpio-keys",
++	.id = -1,
++	.num_resources = 0,
++	.dev = {
++		.platform_data = &lsxhl_button_data,
++	},
++};
++
++/*****************************************************************************
++ * GPIO Fan
++ ****************************************************************************/
++#define LSXHL_GPIO_FAN_HIGH	18
++#define LSXHL_GPIO_FAN_LOW	19
++#define LSXHL_GPIO_FAN_LOCK	40
++
++static struct gpio_fan_alarm lsxhl_alarm = {
++	.gpio = LSXHL_GPIO_FAN_LOCK,
++};
++
++static struct gpio_fan_speed lsxhl_speeds[] = {
++	{
++		.rpm = 0,
++		.ctrl_val = 3,
++	}, {
++		.rpm = 1500,
++		.ctrl_val = 1,
++	}, {
++		.rpm = 3250,
++		.ctrl_val = 2,
++	}, {
++		.rpm = 5000,
++		.ctrl_val = 0,
++	}
++};
++
++static int lsxhl_gpio_list[] = {
++	LSXHL_GPIO_FAN_HIGH, LSXHL_GPIO_FAN_LOW,
++};
++
++static struct gpio_fan_platform_data lsxhl_fan_data = {
++	.num_ctrl = ARRAY_SIZE(lsxhl_gpio_list),
++	.ctrl = lsxhl_gpio_list,
++	.alarm = &lsxhl_alarm,
++	.num_speed = ARRAY_SIZE(lsxhl_speeds),
++	.speed = lsxhl_speeds,
++};
++
++static struct platform_device lsxhl_fan_device = {
++	.name = "gpio-fan",
++	.id = -1,
++	.num_resources = 0,
++	.dev = {
++		.platform_data = &lsxhl_fan_data,
++	},
++};
++
++/*****************************************************************************
++ * GPIO Data
++ ****************************************************************************/
++
++static unsigned int lsxhl_mpp_config[] __initdata = {
++	MPP10_GPO,	/* HDD Power Enable */
++	MPP11_GPIO,	/* USB Vbus Enable */
++	MPP18_GPO,	/* FAN High Enable# */
++	MPP19_GPO,	/* FAN Low Enable# */
++	MPP36_GPIO,	/* Function Blue LED */
++	MPP37_GPIO,	/* Alarm LED */
++	MPP38_GPIO,	/* Info LED */
++	MPP39_GPIO,	/* Power LED */
++	MPP40_GPIO,	/* Fan Lock */
++	MPP41_GPIO,	/* Function Button */
++	MPP42_GPIO,	/* Power Switch */
++	MPP43_GPIO,	/* Power Auto Switch */
++	MPP48_GPIO,	/* Function Red LED */
++	0
++};
++
++/*****************************************************************************
++ * LS-XHL specific power off method: reboot
++ ****************************************************************************/
++/*
++ * On the LS-XHL, the shutdown process is following:
++ * - Userland monitors key events until the power switch goes to off position
++ * - The board reboots
++ * - U-boot starts and goes into an idle mode waiting for the user
++ *   to move the switch to ON position
++ *
++ */
++
++static void lsxhl_power_off(void)
++{
++	kirkwood_restart('h', NULL); //arm_machine_restart('h', NULL);
++}
++
++static void __init lsxhl_init(void)
++{
++	/*
++	 * Basic setup. Needs to be called early.
++	 */
++	kirkwood_init();
++	kirkwood_mpp_conf(lsxhl_mpp_config);
++
++	/*
++	 * Configure peripherals.
++	 */
++	kirkwood_uart0_init();
++	kirkwood_ehci_init();
++	kirkwood_ge00_init(&lsxhl_ge00_data);
++	kirkwood_ge01_init(&lsxhl_ge01_data);
++	kirkwood_sata_init(&lsxhl_sata_data);
++	kirkwood_spi_init();
++
++	platform_device_register(&lsxhl_leds);
++	platform_device_register(&lsxhl_button_device);
++	platform_device_register(&lsxhl_fan_device);
++
++	spi_register_board_info(lsxhl_spi_slave_info,
++				ARRAY_SIZE(lsxhl_spi_slave_info));
++
++	/* usb power on */
++	gpio_set_value(LSXHL_GPIO_USB_POWER, 1);
++
++	/* register power-off method */
++	pm_power_off = lsxhl_power_off;
++
++	pr_info("%s: finished\n", __func__);
++}
++
++MACHINE_START(LSXHL, "Buffalo Linkstation LS-XHL")
++	.atag_offset	= 0x100,
++	.init_machine	= lsxhl_init,
++	.map_io		= kirkwood_map_io,
++	.init_early	= kirkwood_init_early,
++	.init_irq	= kirkwood_init_irq,
++	.timer		= &kirkwood_timer,
++	.restart	= kirkwood_restart,
++MACHINE_END
+diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types
+index 3b3776d..8acc587 100644
+--- a/arch/arm/tools/mach-types
++++ b/arch/arm/tools/mach-types
+@@ -448,6 +448,7 @@ mityomapl138		MACH_MITYOMAPL138	MITYOMAPL138		2650
+ guruplug		MACH_GURUPLUG		GURUPLUG		2659
+ spear310		MACH_SPEAR310		SPEAR310		2660
+ spear320		MACH_SPEAR320		SPEAR320		2661
++lsxhl			MACH_LSXHL		LSXHL			2663
+ aquila			MACH_AQUILA		AQUILA			2676
+ sheeva_esata		MACH_ESATA_SHEEVAPLUG	ESATA_SHEEVAPLUG	2678
+ msm7x30_surf		MACH_MSM7X30_SURF	MSM7X30_SURF		2679
+-- 
+1.7.2.3
diff --git a/3.3.8/v3.3-ARM-orion-Add-support-for-Buffalo-LS-QL.patch b/3.3.8/v3.3-ARM-orion-Add-support-for-Buffalo-LS-QL.patch
new file mode 100644
index 0000000..2ee8b00
--- /dev/null
+++ b/3.3.8/v3.3-ARM-orion-Add-support-for-Buffalo-LS-QL.patch
@@ -0,0 +1,439 @@
+diff -uNr linux-3.3.8-go.orig/arch/arm/configs/orion5x_defconfig linux-3.3.8-go/arch/arm/configs/orion5x_defconfig
+--- linux-3.3.8-go.orig/arch/arm/configs/orion5x_defconfig	2012-11-22 21:40:48.443921973 +0100
++++ linux-3.3.8-go/arch/arm/configs/orion5x_defconfig	2012-11-22 21:41:29.388436783 +0100
+@@ -21,6 +21,7 @@
+ CONFIG_MACH_LINKSTATION_MINI=y
+ CONFIG_MACH_LINKSTATION_PRODUO=y
+ CONFIG_MACH_LINKSTATION_LS_HGL=y
++CONFIG_MACH_LINKSTATION_LSQL=y
+ CONFIG_MACH_TS409=y
+ CONFIG_MACH_WRT350N_V2=y
+ CONFIG_MACH_TS78XX=y
+diff -uNr linux-3.3.8-go.orig/arch/arm/mach-orion5x/Kconfig linux-3.3.8-go/arch/arm/mach-orion5x/Kconfig
+--- linux-3.3.8-go.orig/arch/arm/mach-orion5x/Kconfig	2012-11-22 21:40:48.648919605 +0100
++++ linux-3.3.8-go/arch/arm/mach-orion5x/Kconfig	2012-11-22 21:41:29.389436772 +0100
+@@ -111,6 +111,13 @@
+ 	  Say 'Y' here if you want your kernel to support the
+ 	  Buffalo Linkstation LS-HGL platform.
+ 
++config MACH_LINKSTATION_LSQL
++	bool "Buffalo Linkstation LS-QL"
++	select I2C_BOARDINFO
++	help
++	  Say 'Y' here if you want your kernel to support the
++	  Buffalo Linkstation LS-QL platform.
++
+ config MACH_TS409
+ 	bool "QNAP TS-409"
+ 	help
+diff -uNr linux-3.3.8-go.orig/arch/arm/mach-orion5x/lsql-setup.c linux-3.3.8-go/arch/arm/mach-orion5x/lsql-setup.c
+--- linux-3.3.8-go.orig/arch/arm/mach-orion5x/lsql-setup.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-go/arch/arm/mach-orion5x/lsql-setup.c	2012-11-22 21:41:29.390436761 +0100
+@@ -0,0 +1,388 @@
++/*
++ * arch/arm/mach-orion5x/lsql-setup.c
++ *
++ * Source based off  arch/arm/mach-orion5x/lsproduo-setup.c, which was from lsmini-setup.c
++ * Maintainer: Matt Gomboc <gomboc0@gmail.com>
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2.  This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/platform_device.h>
++#include <linux/mtd/physmap.h>
++#include <linux/mv643xx_eth.h>
++#include <linux/leds.h>
++#include <linux/gpio_keys.h>
++#include <linux/input.h>
++#include <linux/i2c.h>
++#include <linux/ata_platform.h>
++#include <linux/gpio.h>
++#include <asm/mach-types.h>
++#include <asm/mach/arch.h>
++#include <mach/orion5x.h>
++#include "common.h"
++#include "mpp.h"
++#include <linux/module.h>
++#include <linux/proc_fs.h>
++#include <asm/uaccess.h>
++
++/*****************************************************************************
++ * Linkstation Quad LS-QL/R5 Info
++ ****************************************************************************/
++
++/*
++ * 256K NOR flash Device bus boot chip select
++ */
++
++#define LSQL_NOR_BOOT_BASE 0xf4000000
++#define LSQL_NOR_BOOT_SIZE SZ_256K
++
++/*****************************************************************************
++ * 256KB NOR Flash on BOOT Device
++ ****************************************************************************/
++
++static struct physmap_flash_data lsql_nor_flash_data = {
++	.width	 = 1,
++};
++
++static struct resource lsql_nor_flash_resource = {
++	.flags	= IORESOURCE_MEM,
++	.start	= LSQL_NOR_BOOT_BASE,
++	.end	= LSQL_NOR_BOOT_BASE + LSQL_NOR_BOOT_SIZE - 1,
++};
++
++static struct platform_device lsql_nor_flash = {
++	.name		= "physmap-flash",
++	.id			= 0,
++	.dev		= {
++		 .platform_data  = &lsql_nor_flash_data,
++	},
++	.num_resources	   = 1,
++	.resource		 = &lsql_nor_flash_resource,
++};
++
++/*****************************************************************************
++ * Ethernet
++ ****************************************************************************/
++
++static struct mv643xx_eth_platform_data lsql_eth_data = {
++	.phy_addr	= 8,
++};
++
++/*****************************************************************************
++ * RTC 5C372a on I2C bus
++ ****************************************************************************/
++
++static struct i2c_board_info __initdata lsql_i2c_rtc = {
++	I2C_BOARD_INFO("rs5c372a", 0x32),
++};
++
++/*****************************************************************************
++ * LEDs attached to GPIO
++ ****************************************************************************/
++
++#define LSQL_GPIO_LED_ALARM	2  /* looks like it should be 2 by the uboot sources, but doesnt successfully trigger the3 top LED*/
++#define LSQL_GPIO_LED_INFO	3
++#define LSQL_GPIO_LED_PWR	0
++#define LSQL_GPIO_LED_FUNC	18
++
++
++static struct gpio_led lsql_led_pins[] = {
++	{
++		 .name	= "alarm:red",
++		 .gpio	= LSQL_GPIO_LED_ALARM,
++		 .active_low	= 1,
++	}, {
++		 .name	= "info:amber",
++		 .gpio	= LSQL_GPIO_LED_INFO,
++		 .active_low	= 1,
++	}, {
++		 .name	= "power:blue",
++		 .gpio	= LSQL_GPIO_LED_PWR,
++		 .active_low	= 1,
++	},{
++		 .name	= "func:blue",
++		 .gpio	= LSQL_GPIO_LED_FUNC,
++		 .active_low	= 1,
++	},
++};
++
++
++
++static struct gpio_led_platform_data lsql_led_data = {
++	.leds	= lsql_led_pins,
++	.num_leds	= ARRAY_SIZE(lsql_led_pins),
++};
++
++
++static struct platform_device lsql_leds = {
++	.name   = "leds-gpio",
++	.id	= -1,
++	.dev	= {
++		 .platform_data  = &lsql_led_data,
++	},
++};
++
++
++/****************************************************************************
++ * GPIO Attached Keys
++ ****************************************************************************/
++
++ #define LSQL_GPIO_KEY_POWER		10
++ #define LSQL_GPIO_KEY_AUTOPOWER	22
++ #define LSQL_GPIO_KEY_FUNC			7
++
++ #define LSQL_SW_POWER		0x00
++ #define LSQL_SW_AUTOPOWER	   0x01
++
++static struct gpio_keys_button lsql_buttons[] = {
++	{
++		 .code	= KEY_OPTION,
++		 .gpio	= LSQL_GPIO_KEY_FUNC,
++		 .desc	= "Function Button",
++		 .active_low	= 1,
++	},{
++		 .type		= EV_SW,
++		 .code	= LSQL_SW_POWER,
++		 .gpio	= LSQL_GPIO_KEY_POWER,
++		 .desc	= "Power-on Switch",
++		 .active_low	= 1,
++	}, {
++		 .type		= EV_SW,
++		 .code	= LSQL_SW_AUTOPOWER,
++		 .gpio	= LSQL_GPIO_KEY_AUTOPOWER,
++		 .desc	= "Power-auto Switch",
++		 .active_low	= 1,
++	},
++};
++
++
++static struct gpio_keys_platform_data lsql_button_data = {
++	.buttons	 = lsql_buttons,
++	.nbuttons	= ARRAY_SIZE(lsql_buttons),
++};
++
++static struct platform_device lsql_button_device = {
++	.name	= "gpio-keys",
++	.id	   = -1,
++	.num_resources  = 0,
++	.dev	 = {
++		 .platform_data  = &lsql_button_data,
++	},
++};
++
++/****************************************************************************
++ * GPIO Attached Fan
++ ****************************************************************************/
++
++/* Define max char len */
++
++#define MAX_LEN 8
++
++#define LSQL_GPIO_FAN_LOW	   17
++#define LSQL_GPIO_FAN_HIGH	  14
++
++static struct proc_dir_entry *lsql_proc_dir_root, *lsql_proc_dir_gpio, *lsql_fan_proc_file;
++static char lsql_fan_state[MAX_LEN];
++
++static int lsql_fan_get(char *buf, char **start, off_t offset, int count, int *eof, void *data)
++{
++	int len;
++
++	len = snprintf(buf, count, "state: %s\n", lsql_fan_state);
++	return len;
++}
++
++static int lsql_fan_set( struct file *file, const char *buffer, unsigned long count, void *data )
++{
++	int len, ret;
++	char *ptr, tState[MAX_LEN];
++
++	if (count > MAX_LEN )
++		len = MAX_LEN;
++	else
++		len = count;
++
++	ret = copy_from_user(tState, buffer, len);
++	if(ret < 0)
++	{
++		printk(KERN_ERR "%s: Setting fan speed failed\n", "lsql");
++		return -EFAULT;
++	}
++
++	ptr = strrchr(tState, '\n');
++	if(ptr) *ptr = '\0';
++
++	if (strcasecmp(tState, "off") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan off\n", "lsql");
++		sprintf(lsql_fan_state, "off");
++		gpio_set_value(LSQL_GPIO_FAN_LOW, 1);
++		gpio_set_value(LSQL_GPIO_FAN_HIGH, 1);
++	} else if (strcasecmp(tState, "slow") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan slow\n", "lsql");
++		sprintf(lsql_fan_state, "slow");
++		gpio_set_value(LSQL_GPIO_FAN_LOW, 1);
++		gpio_set_value(LSQL_GPIO_FAN_HIGH, 0);
++	} else if (strcasecmp(tState, "fast") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan fast\n", "lsql");
++		sprintf(lsql_fan_state, "fast");
++		gpio_set_value(LSQL_GPIO_FAN_LOW, 0);
++		gpio_set_value(LSQL_GPIO_FAN_HIGH, 1);
++	} else if (strcasecmp(tState, "full") == 0)
++	{
++		printk(KERN_DEBUG "%s: set fan full\n", "lsql");
++		sprintf(lsql_fan_state, "full");
++		gpio_set_value(LSQL_GPIO_FAN_LOW, 0);
++		gpio_set_value(LSQL_GPIO_FAN_HIGH, 0);
++	} else
++	{
++		printk(KERN_ERR "%s: unknown fan speed given\n", "lsql");
++	}
++
++	lsql_fan_state[len] = '\0';
++
++	return len;
++}
++
++/*****************************************************************************
++ * SATA
++ ****************************************************************************/
++static struct mv_sata_platform_data lsql_sata_data = {
++	.n_ports	 = 2,  /*maybe this should be 4, but works with 2 */
++};
++
++
++/*****************************************************************************
++ * Linkstation Quad specific power off method: reboot
++ ****************************************************************************/
++/*
++ * On  Linkstations in general, the shutdown process is following:
++ * - Userland monitors key events until the power switch goes to off position
++ * - The board reboots
++ * - U-boot starts and goes into an idle mode waiting for the user
++ *   to move the switch to ON position
++ *
++ * on the Quad however, there is a power button on the upper, front, 
++ * a function button on the lower front, ans a Auto/Manual power button on the back.
++ * After halting system, uboot waits the power button on the front panel to be pushed
++ *
++ *
++ */
++
++static void lsql_power_off(void)
++{
++	orion5x_restart('h', NULL); /* arm_machine_restart(0, NULL);  */
++}
++
++
++/*****************************************************************************
++ * General Setup
++ ****************************************************************************/
++#define LSQL_GPIO_USB_POWER		9
++#define LSQL_GPIO_POWER			10
++#define LSQL_GPIO_USB_POWER2	19
++#define LSQL_GPIO_AUTO_POWER	22
++
++static unsigned int lsql_mpp_modes[] __initdata = {
++	 MPP0_GPIO,		/* LED_PWR */
++	 MPP1_GPIO,		/* for debugging purposes, change to MPP1_UNUSED for final */
++	 MPP2_GPIO,		/* LED_ALARM */  /* looks like it should be 2 by the uboot sources, but doesnt successfully trigger the3 top LED*/
++	 MPP3_GPIO,		/* LED_INFO */
++	 MPP4_GPIO,	
++	 MPP5_GPIO,	
++	 MPP6_GPIO,		/* FAN_LCK */
++	 MPP7_GPIO,		/* FUNC */
++	 MPP8_GPIO,	
++	 MPP9_GPIO,		/* USB_PWR */
++	 MPP10_GPIO, 	/* POWER */
++	 MPP11_GPIO,	
++	 MPP12_GPIO,	
++	 MPP13_GPIO,	
++	 MPP14_GPIO,	/* FAN_HIGH */
++	 MPP15_GPIO,	
++	 MPP16_GPIO,	
++	 MPP17_GPIO,	/* FAN_LOW */
++	 MPP18_GPIO,	/* LED_FUNC*/
++	 MPP19_GPIO,	/* USB_PWR2 */
++	 MPP22_GPIO,	/* AUTO_POWER*/
++	0,
++};
++
++static void __init lsql_init(void)
++{
++	/*
++	 * Setup basic Orion functions. Need to be called early.
++	 */
++	orion5x_init();
++
++	orion5x_mpp_conf(lsql_mpp_modes);
++
++	/*
++	 * Configure peripherals.
++	 */
++	orion5x_ehci0_init();
++	orion5x_ehci1_init();
++	orion5x_eth_init(&lsql_eth_data);
++	orion5x_i2c_init();
++	orion5x_sata_init(&lsql_sata_data);
++	orion5x_uart0_init();
++	orion5x_xor_init();
++
++	orion5x_setup_dev_boot_win(LSQL_NOR_BOOT_BASE,
++					LSQL_NOR_BOOT_SIZE);
++	platform_device_register(&lsql_nor_flash);
++
++	platform_device_register(&lsql_button_device);
++
++	platform_device_register(&lsql_leds);
++
++	i2c_register_board_info(0, &lsql_i2c_rtc, 1);
++
++	/* enable USB power */  
++	gpio_set_value(LSQL_GPIO_USB_POWER, 1);
++	gpio_set_value(LSQL_GPIO_USB_POWER2, 1);
++
++
++	printk(KERN_INFO "Buffalo Linkstation fan driver loaded\n");
++	sprintf(lsql_fan_state, "fast");
++	gpio_set_value(LSQL_GPIO_FAN_LOW, 0);
++	gpio_set_value(LSQL_GPIO_FAN_HIGH, 1);
++
++	lsql_proc_dir_root = proc_mkdir( "linkstation", NULL );
++	lsql_proc_dir_gpio = proc_mkdir( "gpio", lsql_proc_dir_root );
++	lsql_fan_proc_file = create_proc_entry( "fan", S_IRUGO, lsql_proc_dir_gpio );
++	if( lsql_fan_proc_file ) {
++		lsql_fan_proc_file->read_proc  = lsql_fan_get;
++		lsql_fan_proc_file->write_proc = lsql_fan_set;
++		lsql_fan_proc_file->data = NULL;
++	} else
++	{
++		printk(KERN_INFO "Registration of fan device failed\n");
++	}
++
++	/* register power-off method */
++	pm_power_off = lsql_power_off;
++
++	pr_info("%s: finished\n", __func__);
++}
++
++#ifdef CONFIG_MACH_LINKSTATION_LSQL
++MACHINE_START(LINKSTATION_LSQL, "Buffalo Linkstation Quad QL/R5")
++	.atag_offset	= 0x00000100,
++	.init_machine   = lsql_init,
++	.map_io	  = orion5x_map_io,
++	.init_early	= orion5x_init_early,
++	.init_irq	= orion5x_init_irq,
++	.timer	   = &orion5x_timer,
++	.fixup	   = tag_fixup_mem32,
++	.restart	 = orion5x_restart,
++MACHINE_END
++#endif
++
++
+diff -uNr linux-3.3.8-go.orig/arch/arm/mach-orion5x/Makefile linux-3.3.8-go/arch/arm/mach-orion5x/Makefile
+--- linux-3.3.8-go.orig/arch/arm/mach-orion5x/Makefile	2012-11-22 21:40:48.647919616 +0100
++++ linux-3.3.8-go/arch/arm/mach-orion5x/Makefile	2012-11-22 21:41:29.391436749 +0100
+@@ -7,6 +7,7 @@
+ obj-$(CONFIG_MACH_LINKSTATION_MINI) += lsmini-setup.o
+ obj-$(CONFIG_MACH_LINKSTATION_PRODUO) += lsproduo-setup.o
+ obj-$(CONFIG_MACH_LINKSTATION_LS_HGL) += ls_hgl-setup.o
++obj-$(CONFIG_MACH_LINKSTATION_LSQL) += lsql-setup.o
+ obj-$(CONFIG_MACH_DNS323)	+= dns323-setup.o
+ obj-$(CONFIG_MACH_TS209)	+= ts209-setup.o tsx09-common.o
+ obj-$(CONFIG_MACH_TS409)	+= ts409-setup.o tsx09-common.o
+diff -uNr linux-3.3.8-go.orig/arch/arm/tools/mach-types linux-3.3.8-go/arch/arm/tools/mach-types
+--- linux-3.3.8-go.orig/arch/arm/tools/mach-types	2012-11-22 21:40:48.446921940 +0100
++++ linux-3.3.8-go/arch/arm/tools/mach-types	2012-11-22 21:41:53.355153632 +0100
+@@ -1172,3 +1172,4 @@
+ pov2			MACH_POV2		POV2			3889
+ ipod_touch_2g		MACH_IPOD_TOUCH_2G	IPOD_TOUCH_2G		3890
+ da850_pqab		MACH_DA850_PQAB		DA850_PQAB		3891
++linkstation_lsql	MACH_LINKSTATION_LSQL	LINKSTATION_LSQL	4238
diff --git a/3.3.8/vserver-3.3.8-vs2.3.3.4.patch b/3.3.8/vserver-3.3.8-vs2.3.3.4.patch
new file mode 100644
index 0000000..ac73224
--- /dev/null
+++ b/3.3.8/vserver-3.3.8-vs2.3.3.4.patch
@@ -0,0 +1,26065 @@
+diff -NurpP --minimal linux-3.3.8/Documentation/vserver/debug.txt linux-3.3.8-vs2.3.3.4/Documentation/vserver/debug.txt
+--- linux-3.3.8/Documentation/vserver/debug.txt	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/Documentation/vserver/debug.txt	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,154 @@
++
++debug_cvirt:
++
++ 2   4	"vx_map_tgid: %p/%llx: %d -> %d"
++	"vx_rmap_tgid: %p/%llx: %d -> %d"
++
++debug_dlim:
++
++ 0   1	"ALLOC (%p,#%d)%c inode (%d)"
++	"FREE  (%p,#%d)%c inode"
++ 1   2	"ALLOC (%p,#%d)%c %lld bytes (%d)"
++	"FREE  (%p,#%d)%c %lld bytes"
++ 2   4	"ADJUST: %lld,%lld on %ld,%ld [mult=%d]"
++ 3   8	"ext3_has_free_blocks(%p): %lu<%lu+1, %c, %u!=%u r=%d"
++	"ext3_has_free_blocks(%p): free=%lu, root=%lu"
++	"rcu_free_dl_info(%p)"
++ 4  10	"alloc_dl_info(%p,%d) = %p"
++	"dealloc_dl_info(%p)"
++	"get_dl_info(%p[#%d.%d])"
++	"put_dl_info(%p[#%d.%d])"
++ 5  20	"alloc_dl_info(%p,%d)*"
++ 6  40	"__hash_dl_info: %p[#%d]"
++	"__unhash_dl_info: %p[#%d]"
++ 7  80	"locate_dl_info(%p,#%d) = %p"
++
++debug_misc:
++
++ 0   1	"destroy_dqhash: %p [#0x%08x] c=%d"
++	"new_dqhash: %p [#0x%08x]"
++	"vroot[%d]_clr_dev: dev=%p[%lu,%d:%d]"
++	"vroot[%d]_get_real_bdev: dev=%p[%lu,%d:%d]"
++	"vroot[%d]_set_dev: dev=%p[%lu,%d:%d]"
++	"vroot_get_real_bdev not set"
++ 1   2	"cow_break_link(�%s�)"
++	"temp copy �%s�"
++ 2   4	"dentry_open(new): %p"
++	"dentry_open(old): %p"
++	"lookup_create(new): %p"
++	"old path �%s�"
++	"path_lookup(old): %d"
++	"vfs_create(new): %d"
++	"vfs_rename: %d"
++	"vfs_sendfile: %d"
++ 3   8	"fput(new_file=%p[#%d])"
++	"fput(old_file=%p[#%d])"
++ 4  10	"vx_info_kill(%p[#%d],%d,%d) = %d"
++	"vx_info_kill(%p[#%d],%d,%d)*"
++ 5  20	"vs_reboot(%p[#%d],%d)"
++ 6  40	"dropping task %p[#%u,%u] for %p[#%u,%u]"
++
++debug_net:
++
++ 2   4	"nx_addr_conflict(%p,%p) %d.%d,%d.%d"
++ 3   8	"inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d"
++	"inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d"
++ 4  10	"ip_route_connect(%p) %p,%p;%lx"
++ 5  20	"__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx"
++ 6  40	"sk,egf: %p [#%d] (from %d)"
++	"sk,egn: %p [#%d] (from %d)"
++	"sk,req: %p [#%d] (from %d)"
++	"sk: %p [#%d] (from %d)"
++	"tw: %p [#%d] (from %d)"
++ 7  80	"__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d"
++	"__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d"
++
++debug_nid:
++
++ 0   1	"__lookup_nx_info(#%u): %p[#%u]"
++	"alloc_nx_info(%d) = %p"
++	"create_nx_info(%d) (dynamic rejected)"
++	"create_nx_info(%d) = %p (already there)"
++	"create_nx_info(%d) = %p (new)"
++	"dealloc_nx_info(%p)"
++ 1   2	"alloc_nx_info(%d)*"
++	"create_nx_info(%d)*"
++ 2   4	"get_nx_info(%p[#%d.%d])"
++	"put_nx_info(%p[#%d.%d])"
++ 3   8	"claim_nx_info(%p[#%d.%d.%d]) %p"
++	"clr_nx_info(%p[#%d.%d])"
++	"init_nx_info(%p[#%d.%d])"
++	"release_nx_info(%p[#%d.%d.%d]) %p"
++	"set_nx_info(%p[#%d.%d])"
++ 4  10	"__hash_nx_info: %p[#%d]"
++	"__nx_dynamic_id: [#%d]"
++	"__unhash_nx_info: %p[#%d.%d.%d]"
++ 5  20	"moved task %p into nxi:%p[#%d]"
++	"nx_migrate_task(%p,%p[#%d.%d.%d])"
++	"task_get_nx_info(%p)"
++ 6  40	"nx_clear_persistent(%p[#%d])"
++
++debug_quota:
++
++ 0   1	"quota_sync_dqh(%p,%d) discard inode %p"
++ 1   2	"quota_sync_dqh(%p,%d)"
++	"sync_dquots(%p,%d)"
++	"sync_dquots_dqh(%p,%d)"
++ 3   8	"do_quotactl(%p,%d,cmd=%d,id=%d,%p)"
++
++debug_switch:
++
++ 0   1	"vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]"
++ 1   2	"vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]"
++ 4  10	"%s: (%s %s) returned %s with %d"
++
++debug_tag:
++
++ 7  80	"dx_parse_tag(�%s�): %d:#%d"
++	"dx_propagate_tag(%p[#%lu.%d]): %d,%d"
++
++debug_xid:
++
++ 0   1	"__lookup_vx_info(#%u): %p[#%u]"
++	"alloc_vx_info(%d) = %p"
++	"alloc_vx_info(%d)*"
++	"create_vx_info(%d) (dynamic rejected)"
++	"create_vx_info(%d) = %p (already there)"
++	"create_vx_info(%d) = %p (new)"
++	"dealloc_vx_info(%p)"
++	"loc_vx_info(%d) = %p (found)"
++	"loc_vx_info(%d) = %p (new)"
++	"loc_vx_info(%d) = %p (not available)"
++ 1   2	"create_vx_info(%d)*"
++	"loc_vx_info(%d)*"
++ 2   4	"get_vx_info(%p[#%d.%d])"
++	"put_vx_info(%p[#%d.%d])"
++ 3   8	"claim_vx_info(%p[#%d.%d.%d]) %p"
++	"clr_vx_info(%p[#%d.%d])"
++	"init_vx_info(%p[#%d.%d])"
++	"release_vx_info(%p[#%d.%d.%d]) %p"
++	"set_vx_info(%p[#%d.%d])"
++ 4  10	"__hash_vx_info: %p[#%d]"
++	"__unhash_vx_info: %p[#%d.%d.%d]"
++	"__vx_dynamic_id: [#%d]"
++ 5  20	"enter_vx_info(%p[#%d],%p) %p[#%d,%p]"
++	"leave_vx_info(%p[#%d,%p]) %p[#%d,%p]"
++	"moved task %p into vxi:%p[#%d]"
++	"task_get_vx_info(%p)"
++	"vx_migrate_task(%p,%p[#%d.%d])"
++ 6  40	"vx_clear_persistent(%p[#%d])"
++	"vx_exit_init(%p[#%d],%p[#%d,%d,%d])"
++	"vx_set_init(%p[#%d],%p[#%d,%d,%d])"
++	"vx_set_persistent(%p[#%d])"
++	"vx_set_reaper(%p[#%d],%p[#%d,%d])"
++ 7  80	"vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]"
++
++
++debug_limit:
++
++ n 2^n	"vx_acc_cres[%5d,%s,%2d]: %5d%s"
++	"vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
++
++ m 2^m	"vx_acc_page[%5d,%s,%2d]: %5d%s"
++	"vx_acc_pages[%5d,%s,%2d]: %5d += %5d"
++	"vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
+diff -NurpP --minimal linux-3.3.8/arch/alpha/Kconfig linux-3.3.8-vs2.3.3.4/arch/alpha/Kconfig
+--- linux-3.3.8/arch/alpha/Kconfig	2012-03-19 19:46:27.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/alpha/Kconfig	2012-02-24 03:55:06.000000000 +0100
+@@ -662,6 +662,8 @@ config DUMMY_CONSOLE
+ 	depends on VGA_HOSE
+ 	default y
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.3.8/arch/alpha/kernel/entry.S linux-3.3.8-vs2.3.3.4/arch/alpha/kernel/entry.S
+--- linux-3.3.8/arch/alpha/kernel/entry.S	2010-10-21 13:06:45.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/alpha/kernel/entry.S	2012-02-24 03:55:06.000000000 +0100
+@@ -860,24 +860,15 @@ sys_getxgid:
+ 	.globl	sys_getxpid
+ 	.ent	sys_getxpid
+ sys_getxpid:
++	lda	$sp, -16($sp)
++	stq	$26, 0($sp)
+ 	.prologue 0
+-	ldq	$2, TI_TASK($8)
+ 
+-	/* See linux/kernel/timer.c sys_getppid for discussion
+-	   about this loop.  */
+-	ldq	$3, TASK_GROUP_LEADER($2)
+-	ldq	$4, TASK_REAL_PARENT($3)
+-	ldl	$0, TASK_TGID($2)
+-1:	ldl	$1, TASK_TGID($4)
+-#ifdef CONFIG_SMP
+-	mov	$4, $5
+-	mb
+-	ldq	$3, TASK_GROUP_LEADER($2)
+-	ldq	$4, TASK_REAL_PARENT($3)
+-	cmpeq	$4, $5, $5
+-	beq	$5, 1b
+-#endif
+-	stq	$1, 80($sp)
++	lda	$16, 96($sp)
++	jsr	$26, do_getxpid
++	ldq	$26, 0($sp)
++
++	lda	$sp, 16($sp)
+ 	ret
+ .end sys_getxpid
+ 
+diff -NurpP --minimal linux-3.3.8/arch/alpha/kernel/ptrace.c linux-3.3.8-vs2.3.3.4/arch/alpha/kernel/ptrace.c
+--- linux-3.3.8/arch/alpha/kernel/ptrace.c	2011-01-05 21:48:40.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/alpha/kernel/ptrace.c	2012-02-24 03:55:06.000000000 +0100
+@@ -13,6 +13,7 @@
+ #include <linux/user.h>
+ #include <linux/security.h>
+ #include <linux/signal.h>
++#include <linux/vs_base.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+diff -NurpP --minimal linux-3.3.8/arch/alpha/kernel/systbls.S linux-3.3.8-vs2.3.3.4/arch/alpha/kernel/systbls.S
+--- linux-3.3.8/arch/alpha/kernel/systbls.S	2012-01-09 16:13:54.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/alpha/kernel/systbls.S	2012-02-24 03:55:06.000000000 +0100
+@@ -446,7 +446,7 @@ sys_call_table:
+ 	.quad sys_stat64			/* 425 */
+ 	.quad sys_lstat64
+ 	.quad sys_fstat64
+-	.quad sys_ni_syscall			/* sys_vserver */
++	.quad sys_vserver			/* sys_vserver */
+ 	.quad sys_ni_syscall			/* sys_mbind */
+ 	.quad sys_ni_syscall			/* sys_get_mempolicy */
+ 	.quad sys_ni_syscall			/* sys_set_mempolicy */
+diff -NurpP --minimal linux-3.3.8/arch/alpha/kernel/traps.c linux-3.3.8-vs2.3.3.4/arch/alpha/kernel/traps.c
+--- linux-3.3.8/arch/alpha/kernel/traps.c	2010-10-21 13:06:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/alpha/kernel/traps.c	2012-02-24 03:55:06.000000000 +0100
+@@ -183,7 +183,8 @@ die_if_kernel(char * str, struct pt_regs
+ #ifdef CONFIG_SMP
+ 	printk("CPU %d ", hard_smp_processor_id());
+ #endif
+-	printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
++	printk("%s(%d[#%u]): %s %ld\n", current->comm,
++		task_pid_nr(current), current->xid, str, err);
+ 	dik_show_regs(regs, r9_15);
+ 	add_taint(TAINT_DIE);
+ 	dik_show_trace((unsigned long *)(regs+1));
+diff -NurpP --minimal linux-3.3.8/arch/arm/Kconfig linux-3.3.8-vs2.3.3.4/arch/arm/Kconfig
+--- linux-3.3.8/arch/arm/Kconfig	2012-06-08 15:23:43.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/arm/Kconfig	2012-05-09 04:08:07.000000000 +0200
+@@ -2275,6 +2275,8 @@ source "fs/Kconfig"
+ 
+ source "arch/arm/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.3.8/arch/arm/kernel/calls.S linux-3.3.8-vs2.3.3.4/arch/arm/kernel/calls.S
+--- linux-3.3.8/arch/arm/kernel/calls.S	2012-01-09 16:13:54.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/arm/kernel/calls.S	2012-02-24 03:55:06.000000000 +0100
+@@ -322,7 +322,7 @@
+ /* 310 */	CALL(sys_request_key)
+ 		CALL(sys_keyctl)
+ 		CALL(ABI(sys_semtimedop, sys_oabi_semtimedop))
+-/* vserver */	CALL(sys_ni_syscall)
++		CALL(sys_vserver)
+ 		CALL(sys_ioprio_set)
+ /* 315 */	CALL(sys_ioprio_get)
+ 		CALL(sys_inotify_init)
+diff -NurpP --minimal linux-3.3.8/arch/arm/kernel/process.c linux-3.3.8-vs2.3.3.4/arch/arm/kernel/process.c
+--- linux-3.3.8/arch/arm/kernel/process.c	2012-03-19 19:46:28.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/arm/kernel/process.c	2012-02-24 03:55:06.000000000 +0100
+@@ -353,7 +353,8 @@ void __show_regs(struct pt_regs *regs)
+ void show_regs(struct pt_regs * regs)
+ {
+ 	printk("\n");
+-	printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
++	printk("Pid: %d[#%u], comm: %20s\n",
++		task_pid_nr(current), current->xid, current->comm);
+ 	__show_regs(regs);
+ 	dump_stack();
+ }
+diff -NurpP --minimal linux-3.3.8/arch/arm/kernel/traps.c linux-3.3.8-vs2.3.3.4/arch/arm/kernel/traps.c
+--- linux-3.3.8/arch/arm/kernel/traps.c	2012-06-08 15:23:43.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/arm/kernel/traps.c	2012-06-08 15:27:44.000000000 +0200
+@@ -244,8 +244,8 @@ static int __die(const char *str, int er
+ 
+ 	print_modules();
+ 	__show_regs(regs);
+-	printk(KERN_EMERG "Process %.*s (pid: %d, stack limit = 0x%p)\n",
+-		TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1);
++	printk(KERN_EMERG "Process %.*s (pid: %d:#%u, stack limit = 0x%p)\n",
++		TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), tsk->xid, thread + 1);
+ 
+ 	if (!user_mode(regs) || in_interrupt()) {
+ 		dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
+diff -NurpP --minimal linux-3.3.8/arch/cris/Kconfig linux-3.3.8-vs2.3.3.4/arch/cris/Kconfig
+--- linux-3.3.8/arch/cris/Kconfig	2012-03-19 19:46:39.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/cris/Kconfig	2012-02-24 03:55:06.000000000 +0100
+@@ -675,6 +675,8 @@ source "drivers/staging/Kconfig"
+ 
+ source "arch/cris/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.3.8/arch/frv/kernel/kernel_thread.S linux-3.3.8-vs2.3.3.4/arch/frv/kernel/kernel_thread.S
+--- linux-3.3.8/arch/frv/kernel/kernel_thread.S	2008-12-25 00:26:37.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/frv/kernel/kernel_thread.S	2012-02-24 03:55:06.000000000 +0100
+@@ -37,7 +37,7 @@ kernel_thread:
+ 
+ 	# start by forking the current process, but with shared VM
+ 	setlos.p	#__NR_clone,gr7		; syscall number
+-	ori		gr10,#CLONE_VM,gr8	; first syscall arg	[clone_flags]
++	ori		gr10,#CLONE_KT,gr8	; first syscall arg	[clone_flags]
+ 	sethi.p		#0xe4e4,gr9		; second syscall arg	[newsp]
+ 	setlo		#0xe4e4,gr9
+ 	setlos.p	#0,gr10			; third syscall arg	[parent_tidptr]
+diff -NurpP --minimal linux-3.3.8/arch/h8300/Kconfig linux-3.3.8-vs2.3.3.4/arch/h8300/Kconfig
+--- linux-3.3.8/arch/h8300/Kconfig	2012-03-19 19:46:39.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/h8300/Kconfig	2012-02-24 03:55:06.000000000 +0100
+@@ -214,6 +214,8 @@ source "fs/Kconfig"
+ 
+ source "arch/h8300/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.3.8/arch/ia64/Kconfig linux-3.3.8-vs2.3.3.4/arch/ia64/Kconfig
+--- linux-3.3.8/arch/ia64/Kconfig	2012-03-19 19:46:39.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/ia64/Kconfig	2012-02-24 03:55:06.000000000 +0100
+@@ -654,6 +654,8 @@ source "fs/Kconfig"
+ 
+ source "arch/ia64/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.3.8/arch/ia64/kernel/entry.S linux-3.3.8-vs2.3.3.4/arch/ia64/kernel/entry.S
+--- linux-3.3.8/arch/ia64/kernel/entry.S	2012-03-19 19:46:40.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/ia64/kernel/entry.S	2012-02-24 03:55:06.000000000 +0100
+@@ -1714,7 +1714,7 @@ sys_call_table:
+ 	data8 sys_mq_notify
+ 	data8 sys_mq_getsetattr
+ 	data8 sys_kexec_load
+-	data8 sys_ni_syscall			// reserved for vserver
++	data8 sys_vserver
+ 	data8 sys_waitid			// 1270
+ 	data8 sys_add_key
+ 	data8 sys_request_key
+diff -NurpP --minimal linux-3.3.8/arch/ia64/kernel/process.c linux-3.3.8-vs2.3.3.4/arch/ia64/kernel/process.c
+--- linux-3.3.8/arch/ia64/kernel/process.c	2011-03-15 18:06:39.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/ia64/kernel/process.c	2012-02-24 03:55:06.000000000 +0100
+@@ -109,8 +109,8 @@ show_regs (struct pt_regs *regs)
+ 	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+ 
+ 	print_modules();
+-	printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current),
+-			smp_processor_id(), current->comm);
++	printk("\nPid: %d[#%u], CPU %d, comm: %20s\n", task_pid_nr(current),
++			current->xid, smp_processor_id(), current->comm);
+ 	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s (%s)\n",
+ 	       regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(),
+ 	       init_utsname()->release);
+diff -NurpP --minimal linux-3.3.8/arch/ia64/kernel/ptrace.c linux-3.3.8-vs2.3.3.4/arch/ia64/kernel/ptrace.c
+--- linux-3.3.8/arch/ia64/kernel/ptrace.c	2012-03-19 19:46:40.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/ia64/kernel/ptrace.c	2012-02-24 03:55:06.000000000 +0100
+@@ -21,6 +21,7 @@
+ #include <linux/regset.h>
+ #include <linux/elf.h>
+ #include <linux/tracehook.h>
++#include <linux/vs_base.h>
+ 
+ #include <asm/pgtable.h>
+ #include <asm/processor.h>
+diff -NurpP --minimal linux-3.3.8/arch/ia64/kernel/traps.c linux-3.3.8-vs2.3.3.4/arch/ia64/kernel/traps.c
+--- linux-3.3.8/arch/ia64/kernel/traps.c	2010-07-07 18:31:01.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/ia64/kernel/traps.c	2012-02-24 03:55:06.000000000 +0100
+@@ -59,8 +59,9 @@ die (const char *str, struct pt_regs *re
+ 	put_cpu();
+ 
+ 	if (++die.lock_owner_depth < 3) {
+-		printk("%s[%d]: %s %ld [%d]\n",
+-		current->comm, task_pid_nr(current), str, err, ++die_counter);
++		printk("%s[%d[#%u]]: %s %ld [%d]\n",
++			current->comm, task_pid_nr(current), current->xid,
++			str, err, ++die_counter);
+ 		if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
+ 	            != NOTIFY_STOP)
+ 			show_regs(regs);
+@@ -323,8 +324,9 @@ handle_fpu_swa (int fp_fault, struct pt_
+ 			if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
+ 				last.time = current_jiffies + 5 * HZ;
+ 				printk(KERN_WARNING
+-		       			"%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
+-		       			current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
++					"%s(%d[#%u]): floating-point assist fault at ip %016lx, isr %016lx\n",
++					current->comm, task_pid_nr(current), current->xid,
++					regs->cr_iip + ia64_psr(regs)->ri, isr);
+ 			}
+ 		}
+ 	}
+diff -NurpP --minimal linux-3.3.8/arch/m32r/kernel/traps.c linux-3.3.8-vs2.3.3.4/arch/m32r/kernel/traps.c
+--- linux-3.3.8/arch/m32r/kernel/traps.c	2011-10-24 18:44:58.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/m32r/kernel/traps.c	2012-02-24 03:55:06.000000000 +0100
+@@ -196,8 +196,9 @@ static void show_registers(struct pt_reg
+ 	} else {
+ 		printk("SPI: %08lx\n", sp);
+ 	}
+-	printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
+-		current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current);
++	printk("Process %s (pid: %d[#%u], process nr: %d, stackpage=%08lx)",
++		current->comm, task_pid_nr(current), current->xid,
++		0xffff & i, 4096+(unsigned long)current);
+ 
+ 	/*
+ 	 * When in-kernel, we also print out the stack and code at the
+diff -NurpP --minimal linux-3.3.8/arch/m68k/Kconfig linux-3.3.8-vs2.3.3.4/arch/m68k/Kconfig
+--- linux-3.3.8/arch/m68k/Kconfig	2012-03-19 19:46:40.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/m68k/Kconfig	2012-02-24 03:55:06.000000000 +0100
+@@ -145,6 +145,8 @@ source "fs/Kconfig"
+ 
+ source "arch/m68k/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.3.8/arch/mips/Kconfig linux-3.3.8-vs2.3.3.4/arch/mips/Kconfig
+--- linux-3.3.8/arch/mips/Kconfig	2012-03-19 19:46:41.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/mips/Kconfig	2012-02-24 03:55:06.000000000 +0100
+@@ -2514,6 +2514,8 @@ source "fs/Kconfig"
+ 
+ source "arch/mips/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.3.8/arch/mips/kernel/ptrace.c linux-3.3.8-vs2.3.3.4/arch/mips/kernel/ptrace.c
+--- linux-3.3.8/arch/mips/kernel/ptrace.c	2012-03-19 19:46:43.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/mips/kernel/ptrace.c	2012-02-24 03:55:06.000000000 +0100
+@@ -25,6 +25,7 @@
+ #include <linux/security.h>
+ #include <linux/audit.h>
+ #include <linux/seccomp.h>
++#include <linux/vs_base.h>
+ 
+ #include <asm/byteorder.h>
+ #include <asm/cpu.h>
+@@ -263,6 +264,9 @@ long arch_ptrace(struct task_struct *chi
+ 	void __user *datavp = (void __user *) data;
+ 	unsigned long __user *datalp = (void __user *) data;
+ 
++	if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT))
++		goto out;
++
+ 	switch (request) {
+ 	/* when I and D space are separate, these will need to be fixed. */
+ 	case PTRACE_PEEKTEXT: /* read word at location addr. */
+diff -NurpP --minimal linux-3.3.8/arch/mips/kernel/scall32-o32.S linux-3.3.8-vs2.3.3.4/arch/mips/kernel/scall32-o32.S
+--- linux-3.3.8/arch/mips/kernel/scall32-o32.S	2012-01-09 16:14:05.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/mips/kernel/scall32-o32.S	2012-02-24 03:55:06.000000000 +0100
+@@ -523,7 +523,7 @@ einval:	li	v0, -ENOSYS
+ 	sys	sys_mq_timedreceive	5
+ 	sys	sys_mq_notify		2	/* 4275 */
+ 	sys	sys_mq_getsetattr	3
+-	sys	sys_ni_syscall		0	/* sys_vserver */
++	sys	sys_vserver		3
+ 	sys	sys_waitid		5
+ 	sys	sys_ni_syscall		0	/* available, was setaltroot */
+ 	sys	sys_add_key		5	/* 4280 */
+diff -NurpP --minimal linux-3.3.8/arch/mips/kernel/scall64-64.S linux-3.3.8-vs2.3.3.4/arch/mips/kernel/scall64-64.S
+--- linux-3.3.8/arch/mips/kernel/scall64-64.S	2012-01-09 16:14:05.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/mips/kernel/scall64-64.S	2012-02-24 03:55:06.000000000 +0100
+@@ -362,7 +362,7 @@ sys_call_table:
+ 	PTR	sys_mq_timedreceive
+ 	PTR	sys_mq_notify
+ 	PTR	sys_mq_getsetattr		/* 5235 */
+-	PTR	sys_ni_syscall			/* sys_vserver */
++	PTR	sys_vserver
+ 	PTR	sys_waitid
+ 	PTR	sys_ni_syscall			/* available, was setaltroot */
+ 	PTR	sys_add_key
+diff -NurpP --minimal linux-3.3.8/arch/mips/kernel/scall64-n32.S linux-3.3.8-vs2.3.3.4/arch/mips/kernel/scall64-n32.S
+--- linux-3.3.8/arch/mips/kernel/scall64-n32.S	2012-01-09 16:14:05.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/mips/kernel/scall64-n32.S	2012-02-24 03:55:06.000000000 +0100
+@@ -361,7 +361,7 @@ EXPORT(sysn32_call_table)
+ 	PTR	compat_sys_mq_timedreceive
+ 	PTR	compat_sys_mq_notify
+ 	PTR	compat_sys_mq_getsetattr
+-	PTR	sys_ni_syscall			/* 6240, sys_vserver */
++	PTR	sys32_vserver			/* 6240 */
+ 	PTR	compat_sys_waitid
+ 	PTR	sys_ni_syscall			/* available, was setaltroot */
+ 	PTR	sys_add_key
+diff -NurpP --minimal linux-3.3.8/arch/mips/kernel/scall64-o32.S linux-3.3.8-vs2.3.3.4/arch/mips/kernel/scall64-o32.S
+--- linux-3.3.8/arch/mips/kernel/scall64-o32.S	2012-01-09 16:14:05.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/mips/kernel/scall64-o32.S	2012-02-24 03:55:06.000000000 +0100
+@@ -480,7 +480,7 @@ sys_call_table:
+ 	PTR	compat_sys_mq_timedreceive
+ 	PTR	compat_sys_mq_notify		/* 4275 */
+ 	PTR	compat_sys_mq_getsetattr
+-	PTR	sys_ni_syscall			/* sys_vserver */
++	PTR	sys32_vserver
+ 	PTR	sys_32_waitid
+ 	PTR	sys_ni_syscall			/* available, was setaltroot */
+ 	PTR	sys_add_key			/* 4280 */
+diff -NurpP --minimal linux-3.3.8/arch/mips/kernel/traps.c linux-3.3.8-vs2.3.3.4/arch/mips/kernel/traps.c
+--- linux-3.3.8/arch/mips/kernel/traps.c	2012-03-19 19:46:43.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/mips/kernel/traps.c	2012-03-19 20:52:09.000000000 +0100
+@@ -344,9 +344,10 @@ void show_registers(struct pt_regs *regs
+ 
+ 	__show_regs(regs);
+ 	print_modules();
+-	printk("Process %s (pid: %d, threadinfo=%p, task=%p, tls=%0*lx)\n",
+-	       current->comm, current->pid, current_thread_info(), current,
+-	      field, current_thread_info()->tp_value);
++	printk("Process %s (pid: %d:#%u, threadinfo=%p, task=%p, tls=%0*lx)\n",
++		current->comm, task_pid_nr(current), current->xid,
++		current_thread_info(), current,
++		field, current_thread_info()->tp_value);
+ 	if (cpu_has_userlocal) {
+ 		unsigned long tls;
+ 
+diff -NurpP --minimal linux-3.3.8/arch/parisc/Kconfig linux-3.3.8-vs2.3.3.4/arch/parisc/Kconfig
+--- linux-3.3.8/arch/parisc/Kconfig	2012-03-19 19:46:44.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/parisc/Kconfig	2012-02-24 03:55:06.000000000 +0100
+@@ -279,6 +279,8 @@ source "fs/Kconfig"
+ 
+ source "arch/parisc/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.3.8/arch/parisc/kernel/syscall_table.S linux-3.3.8-vs2.3.3.4/arch/parisc/kernel/syscall_table.S
+--- linux-3.3.8/arch/parisc/kernel/syscall_table.S	2011-10-24 18:45:00.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/parisc/kernel/syscall_table.S	2012-02-24 03:55:06.000000000 +0100
+@@ -361,7 +361,7 @@
+ 	ENTRY_COMP(mbind)		/* 260 */
+ 	ENTRY_COMP(get_mempolicy)
+ 	ENTRY_COMP(set_mempolicy)
+-	ENTRY_SAME(ni_syscall)	/* 263: reserved for vserver */
++	ENTRY_DIFF(vserver)
+ 	ENTRY_SAME(add_key)
+ 	ENTRY_SAME(request_key)		/* 265 */
+ 	ENTRY_SAME(keyctl)
+diff -NurpP --minimal linux-3.3.8/arch/parisc/kernel/traps.c linux-3.3.8-vs2.3.3.4/arch/parisc/kernel/traps.c
+--- linux-3.3.8/arch/parisc/kernel/traps.c	2011-10-24 18:45:00.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/parisc/kernel/traps.c	2012-02-24 03:55:06.000000000 +0100
+@@ -236,8 +236,9 @@ void die_if_kernel(char *str, struct pt_
+ 		if (err == 0)
+ 			return; /* STFU */
+ 
+-		printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
+-			current->comm, task_pid_nr(current), str, err, regs->iaoq[0]);
++		printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld) at " RFMT "\n",
++			current->comm, task_pid_nr(current), current->xid,
++			str, err, regs->iaoq[0]);
+ #ifdef PRINT_USER_FAULTS
+ 		/* XXX for debugging only */
+ 		show_regs(regs);
+@@ -270,8 +271,8 @@ void die_if_kernel(char *str, struct pt_
+ 		pdc_console_restart();
+ 	
+ 	if (err)
+-		printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
+-			current->comm, task_pid_nr(current), str, err);
++		printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld)\n",
++			current->comm, task_pid_nr(current), current->xid, str, err);
+ 
+ 	/* Wot's wrong wif bein' racy? */
+ 	if (current->thread.flags & PARISC_KERNEL_DEATH) {
+diff -NurpP --minimal linux-3.3.8/arch/parisc/mm/fault.c linux-3.3.8-vs2.3.3.4/arch/parisc/mm/fault.c
+--- linux-3.3.8/arch/parisc/mm/fault.c	2010-08-02 16:52:06.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/parisc/mm/fault.c	2012-02-24 03:55:06.000000000 +0100
+@@ -237,8 +237,9 @@ bad_area:
+ 
+ #ifdef PRINT_USER_FAULTS
+ 		printk(KERN_DEBUG "\n");
+-		printk(KERN_DEBUG "do_page_fault() pid=%d command='%s' type=%lu address=0x%08lx\n",
+-		    task_pid_nr(tsk), tsk->comm, code, address);
++		printk(KERN_DEBUG "do_page_fault() pid=%d:#%u "
++		    "command='%s' type=%lu address=0x%08lx\n",
++		    task_pid_nr(tsk), tsk->xid, tsk->comm, code, address);
+ 		if (vma) {
+ 			printk(KERN_DEBUG "vm_start = 0x%08lx, vm_end = 0x%08lx\n",
+ 					vma->vm_start, vma->vm_end);
+diff -NurpP --minimal linux-3.3.8/arch/powerpc/Kconfig linux-3.3.8-vs2.3.3.4/arch/powerpc/Kconfig
+--- linux-3.3.8/arch/powerpc/Kconfig	2012-03-19 19:46:44.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/powerpc/Kconfig	2012-02-24 03:55:06.000000000 +0100
+@@ -997,6 +997,8 @@ source "lib/Kconfig"
+ 
+ source "arch/powerpc/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ config KEYS_COMPAT
+diff -NurpP --minimal linux-3.3.8/arch/powerpc/include/asm/unistd.h linux-3.3.8-vs2.3.3.4/arch/powerpc/include/asm/unistd.h
+--- linux-3.3.8/arch/powerpc/include/asm/unistd.h	2012-01-09 16:14:05.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/powerpc/include/asm/unistd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -275,7 +275,7 @@
+ #endif
+ #define __NR_rtas		255
+ #define __NR_sys_debug_setcontext 256
+-/* Number 257 is reserved for vserver */
++#define __NR_vserver		257
+ #define __NR_migrate_pages	258
+ #define __NR_mbind		259
+ #define __NR_get_mempolicy	260
+diff -NurpP --minimal linux-3.3.8/arch/powerpc/kernel/process.c linux-3.3.8-vs2.3.3.4/arch/powerpc/kernel/process.c
+--- linux-3.3.8/arch/powerpc/kernel/process.c	2012-03-19 19:46:45.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/powerpc/kernel/process.c	2012-02-24 03:55:06.000000000 +0100
+@@ -656,8 +656,9 @@ void show_regs(struct pt_regs * regs)
+ #else
+ 		printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr);
+ #endif
+-	printk("TASK = %p[%d] '%s' THREAD: %p",
+-	       current, task_pid_nr(current), current->comm, task_thread_info(current));
++	printk("TASK = %p[%d,#%u] '%s' THREAD: %p",
++	       current, task_pid_nr(current), current->xid,
++	       current->comm, task_thread_info(current));
+ 
+ #ifdef CONFIG_SMP
+ 	printk(" CPU: %d", raw_smp_processor_id());
+diff -NurpP --minimal linux-3.3.8/arch/powerpc/kernel/traps.c linux-3.3.8-vs2.3.3.4/arch/powerpc/kernel/traps.c
+--- linux-3.3.8/arch/powerpc/kernel/traps.c	2012-03-19 19:46:45.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/powerpc/kernel/traps.c	2012-02-24 03:55:06.000000000 +0100
+@@ -1105,8 +1105,9 @@ void nonrecoverable_exception(struct pt_
+ 
+ void trace_syscall(struct pt_regs *regs)
+ {
+-	printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
+-	       current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
++	printk("Task: %p(%d[#%u]), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
++	       current, task_pid_nr(current), current->xid,
++	       regs->nip, regs->link, regs->gpr[0],
+ 	       regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
+ }
+ 
+diff -NurpP --minimal linux-3.3.8/arch/s390/Kconfig linux-3.3.8-vs2.3.3.4/arch/s390/Kconfig
+--- linux-3.3.8/arch/s390/Kconfig	2012-06-08 15:23:43.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/s390/Kconfig	2012-04-23 23:45:14.000000000 +0200
+@@ -637,6 +637,8 @@ source "fs/Kconfig"
+ 
+ source "arch/s390/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.3.8/arch/s390/include/asm/tlb.h linux-3.3.8-vs2.3.3.4/arch/s390/include/asm/tlb.h
+--- linux-3.3.8/arch/s390/include/asm/tlb.h	2012-06-08 15:23:43.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/s390/include/asm/tlb.h	2012-04-23 23:45:14.000000000 +0200
+@@ -24,6 +24,7 @@
+ #include <linux/mm.h>
+ #include <linux/pagemap.h>
+ #include <linux/swap.h>
++
+ #include <asm/processor.h>
+ #include <asm/pgalloc.h>
+ #include <asm/tlbflush.h>
+diff -NurpP --minimal linux-3.3.8/arch/s390/include/asm/unistd.h linux-3.3.8-vs2.3.3.4/arch/s390/include/asm/unistd.h
+--- linux-3.3.8/arch/s390/include/asm/unistd.h	2012-03-19 19:46:48.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/s390/include/asm/unistd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -202,7 +202,7 @@
+ #define __NR_clock_gettime	(__NR_timer_create+6)
+ #define __NR_clock_getres	(__NR_timer_create+7)
+ #define __NR_clock_nanosleep	(__NR_timer_create+8)
+-/* Number 263 is reserved for vserver */
++#define __NR_vserver		263
+ #define __NR_statfs64		265
+ #define __NR_fstatfs64		266
+ #define __NR_remap_file_pages	267
+diff -NurpP --minimal linux-3.3.8/arch/s390/kernel/ptrace.c linux-3.3.8-vs2.3.3.4/arch/s390/kernel/ptrace.c
+--- linux-3.3.8/arch/s390/kernel/ptrace.c	2012-03-19 19:46:48.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/s390/kernel/ptrace.c	2012-03-19 20:53:54.000000000 +0100
+@@ -21,6 +21,7 @@
+ #include <linux/tracehook.h>
+ #include <linux/seccomp.h>
+ #include <linux/compat.h>
++#include <linux/vs_base.h>
+ #include <trace/syscall.h>
+ #include <asm/segment.h>
+ #include <asm/page.h>
+diff -NurpP --minimal linux-3.3.8/arch/s390/kernel/syscalls.S linux-3.3.8-vs2.3.3.4/arch/s390/kernel/syscalls.S
+--- linux-3.3.8/arch/s390/kernel/syscalls.S	2012-01-09 16:14:06.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/s390/kernel/syscalls.S	2012-02-24 03:55:06.000000000 +0100
+@@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,sys_clock_sett
+ SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper)	/* 260 */
+ SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper)
+ SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper)
+-NI_SYSCALL							/* reserved for vserver */
++SYSCALL(sys_vserver,sys_vserver,sys32_vserver)
+ SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper)
+ SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper)
+ SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper)
+diff -NurpP --minimal linux-3.3.8/arch/sh/Kconfig linux-3.3.8-vs2.3.3.4/arch/sh/Kconfig
+--- linux-3.3.8/arch/sh/Kconfig	2012-03-19 19:46:49.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/sh/Kconfig	2012-02-24 03:55:06.000000000 +0100
+@@ -901,6 +901,8 @@ source "fs/Kconfig"
+ 
+ source "arch/sh/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.3.8/arch/sh/kernel/irq.c linux-3.3.8-vs2.3.3.4/arch/sh/kernel/irq.c
+--- linux-3.3.8/arch/sh/kernel/irq.c	2011-07-22 11:17:41.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/sh/kernel/irq.c	2012-02-24 03:55:06.000000000 +0100
+@@ -14,6 +14,7 @@
+ #include <linux/ftrace.h>
+ #include <linux/delay.h>
+ #include <linux/ratelimit.h>
++// #include <linux/vs_context.h>
+ #include <asm/processor.h>
+ #include <asm/machvec.h>
+ #include <asm/uaccess.h>
+diff -NurpP --minimal linux-3.3.8/arch/sparc/Kconfig linux-3.3.8-vs2.3.3.4/arch/sparc/Kconfig
+--- linux-3.3.8/arch/sparc/Kconfig	2012-06-08 15:23:43.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/sparc/Kconfig	2012-06-08 15:27:44.000000000 +0200
+@@ -597,6 +597,8 @@ source "fs/Kconfig"
+ 
+ source "arch/sparc/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.3.8/arch/sparc/include/asm/unistd.h linux-3.3.8-vs2.3.3.4/arch/sparc/include/asm/unistd.h
+--- linux-3.3.8/arch/sparc/include/asm/unistd.h	2012-01-09 16:14:07.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/sparc/include/asm/unistd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -335,7 +335,7 @@
+ #define __NR_timer_getoverrun	264
+ #define __NR_timer_delete	265
+ #define __NR_timer_create	266
+-/* #define __NR_vserver		267 Reserved for VSERVER */
++#define __NR_vserver		267
+ #define __NR_io_setup		268
+ #define __NR_io_destroy		269
+ #define __NR_io_submit		270
+diff -NurpP --minimal linux-3.3.8/arch/sparc/kernel/systbls_32.S linux-3.3.8-vs2.3.3.4/arch/sparc/kernel/systbls_32.S
+--- linux-3.3.8/arch/sparc/kernel/systbls_32.S	2012-01-09 16:14:09.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/sparc/kernel/systbls_32.S	2012-02-24 03:55:06.000000000 +0100
+@@ -70,7 +70,7 @@ sys_call_table:
+ /*250*/	.long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_ni_syscall
+ /*255*/	.long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
+ /*260*/	.long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
+-/*265*/	.long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
++/*265*/	.long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
+ /*270*/	.long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
+ /*275*/	.long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
+ /*280*/	.long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
+diff -NurpP --minimal linux-3.3.8/arch/sparc/kernel/systbls_64.S linux-3.3.8-vs2.3.3.4/arch/sparc/kernel/systbls_64.S
+--- linux-3.3.8/arch/sparc/kernel/systbls_64.S	2012-06-08 15:23:43.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/sparc/kernel/systbls_64.S	2012-06-08 15:27:44.000000000 +0200
+@@ -71,7 +71,7 @@ sys_call_table32:
+ /*250*/	.word sys_mremap, compat_sys_sysctl, sys32_getsid, sys_fdatasync, sys_nis_syscall
+ 	.word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
+ /*260*/	.word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
+-	.word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
++	.word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy
+ /*270*/	.word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
+ 	.word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
+ /*280*/	.word sys32_tee, sys_add_key, sys_request_key, compat_sys_keyctl, compat_sys_openat
+@@ -148,7 +148,7 @@ sys_call_table:
+ /*250*/	.word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
+ 	.word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
+ /*260*/	.word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
+-	.word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
++	.word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
+ /*270*/	.word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
+ 	.word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
+ /*280*/	.word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
+diff -NurpP --minimal linux-3.3.8/arch/um/Kconfig.rest linux-3.3.8-vs2.3.3.4/arch/um/Kconfig.rest
+--- linux-3.3.8/arch/um/Kconfig.rest	2012-01-09 16:14:09.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/um/Kconfig.rest	2012-02-24 03:55:06.000000000 +0100
+@@ -12,6 +12,8 @@ source "arch/um/Kconfig.net"
+ 
+ source "fs/Kconfig"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.3.8/arch/um/include/shared/kern_constants.h linux-3.3.8-vs2.3.3.4/arch/um/include/shared/kern_constants.h
+--- linux-3.3.8/arch/um/include/shared/kern_constants.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/um/include/shared/kern_constants.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1 @@
++#include "../../../../include/generated/asm-offsets.h"
+diff -NurpP --minimal linux-3.3.8/arch/um/include/shared/user_constants.h linux-3.3.8-vs2.3.3.4/arch/um/include/shared/user_constants.h
+--- linux-3.3.8/arch/um/include/shared/user_constants.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/um/include/shared/user_constants.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,40 @@
++/*
++ * DO NOT MODIFY.
++ *
++ * This file was generated by arch/um/Makefile
++ *
++ */
++
++#define HOST_SC_CR2 176 /* offsetof(struct sigcontext, cr2)	# */
++#define HOST_SC_ERR 152 /* offsetof(struct sigcontext, err)	# */
++#define HOST_SC_TRAPNO 160 /* offsetof(struct sigcontext, trapno)	# */
++#define HOST_FP_SIZE 64 /* sizeof(struct _fpstate) / sizeof(unsigned long)	# */
++#define HOST_RBX 5 /* RBX	# */
++#define HOST_RCX 11 /* RCX	# */
++#define HOST_RDI 14 /* RDI	# */
++#define HOST_RSI 13 /* RSI	# */
++#define HOST_RDX 12 /* RDX	# */
++#define HOST_RBP 4 /* RBP	# */
++#define HOST_RAX 10 /* RAX	# */
++#define HOST_R8 9 /* R8	# */
++#define HOST_R9 8 /* R9	# */
++#define HOST_R10 7 /* R10	# */
++#define HOST_R11 6 /* R11	# */
++#define HOST_R12 3 /* R12	# */
++#define HOST_R13 2 /* R13	# */
++#define HOST_R14 1 /* R14	# */
++#define HOST_R15 0 /* R15	# */
++#define HOST_ORIG_RAX 15 /* ORIG_RAX	# */
++#define HOST_CS 17 /* CS	# */
++#define HOST_SS 20 /* SS	# */
++#define HOST_EFLAGS 18 /* EFLAGS	# */
++#define HOST_IP 16 /* RIP	# */
++#define HOST_SP 19 /* RSP	# */
++#define UM_FRAME_SIZE 216 /* sizeof(struct user_regs_struct)	# */
++#define UM_POLLIN 1 /* POLLIN	# */
++#define UM_POLLPRI 2 /* POLLPRI	# */
++#define UM_POLLOUT 4 /* POLLOUT	# */
++#define UM_PROT_READ 1 /* PROT_READ	# */
++#define UM_PROT_WRITE 2 /* PROT_WRITE	# */
++#define UM_PROT_EXEC 4 /* PROT_EXEC	# */
++
+diff -NurpP --minimal linux-3.3.8/arch/x86/Kconfig linux-3.3.8-vs2.3.3.4/arch/x86/Kconfig
+--- linux-3.3.8/arch/x86/Kconfig	2012-03-19 19:46:49.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/x86/Kconfig	2012-02-24 03:55:06.000000000 +0100
+@@ -2213,6 +2213,8 @@ source "fs/Kconfig"
+ 
+ source "arch/x86/Kconfig.debug"
+ 
++source "kernel/vserver/Kconfig"
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+diff -NurpP --minimal linux-3.3.8/arch/x86/syscalls/syscall_32.tbl linux-3.3.8-vs2.3.3.4/arch/x86/syscalls/syscall_32.tbl
+--- linux-3.3.8/arch/x86/syscalls/syscall_32.tbl	2012-06-08 15:23:44.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/arch/x86/syscalls/syscall_32.tbl	2012-04-03 03:02:12.000000000 +0200
+@@ -279,7 +279,7 @@
+ 270	i386	tgkill			sys_tgkill
+ 271	i386	utimes			sys_utimes			compat_sys_utimes
+ 272	i386	fadvise64_64		sys_fadvise64_64		sys32_fadvise64_64
+-273	i386	vserver
++273	i386	vserver			sys_vserver			sys32_vserver
+ 274	i386	mbind			sys_mbind
+ 275	i386	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
+ 276	i386	set_mempolicy		sys_set_mempolicy
+diff -NurpP --minimal linux-3.3.8/arch/x86/syscalls/syscall_64.tbl linux-3.3.8-vs2.3.3.4/arch/x86/syscalls/syscall_64.tbl
+--- linux-3.3.8/arch/x86/syscalls/syscall_64.tbl	2012-03-19 19:46:51.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/arch/x86/syscalls/syscall_64.tbl	2012-02-24 16:29:25.000000000 +0100
+@@ -242,7 +242,7 @@
+ 233	64	epoll_ctl		sys_epoll_ctl
+ 234	64	tgkill			sys_tgkill
+ 235	64	utimes			sys_utimes
+-236	64	vserver
++236	64	vserver			sys_vserver
+ 237	64	mbind			sys_mbind
+ 238	64	set_mempolicy		sys_set_mempolicy
+ 239	64	get_mempolicy		sys_get_mempolicy
+diff -NurpP --minimal linux-3.3.8/drivers/block/Kconfig linux-3.3.8-vs2.3.3.4/drivers/block/Kconfig
+--- linux-3.3.8/drivers/block/Kconfig	2012-03-19 19:46:52.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/drivers/block/Kconfig	2012-02-24 03:55:06.000000000 +0100
+@@ -290,6 +290,13 @@ config BLK_DEV_CRYPTOLOOP
+ 
+ source "drivers/block/drbd/Kconfig"
+ 
++config BLK_DEV_VROOT
++	tristate "Virtual Root device support"
++	depends on QUOTACTL
++	---help---
++	  Saying Y here will allow you to use quota/fs ioctls on a shared
++	  partition within a virtual server without compromising security.
++
+ config BLK_DEV_NBD
+ 	tristate "Network block device support"
+ 	depends on NET
+diff -NurpP --minimal linux-3.3.8/drivers/block/Makefile linux-3.3.8-vs2.3.3.4/drivers/block/Makefile
+--- linux-3.3.8/drivers/block/Makefile	2012-03-19 19:46:52.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/drivers/block/Makefile	2012-02-24 03:55:06.000000000 +0100
+@@ -35,6 +35,7 @@ obj-$(CONFIG_VIODASD)		+= viodasd.o
+ obj-$(CONFIG_BLK_DEV_SX8)	+= sx8.o
+ obj-$(CONFIG_BLK_DEV_UB)	+= ub.o
+ obj-$(CONFIG_BLK_DEV_HD)	+= hd.o
++obj-$(CONFIG_BLK_DEV_VROOT)	+= vroot.o
+ 
+ obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= xen-blkfront.o
+ obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= xen-blkback/
+diff -NurpP --minimal linux-3.3.8/drivers/block/loop.c linux-3.3.8-vs2.3.3.4/drivers/block/loop.c
+--- linux-3.3.8/drivers/block/loop.c	2012-03-19 19:46:52.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/drivers/block/loop.c	2012-02-24 03:55:06.000000000 +0100
+@@ -76,6 +76,7 @@
+ #include <linux/sysfs.h>
+ #include <linux/miscdevice.h>
+ #include <linux/falloc.h>
++#include <linux/vs_context.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -869,6 +870,7 @@ static int loop_set_fd(struct loop_devic
+ 	lo->lo_blocksize = lo_blocksize;
+ 	lo->lo_device = bdev;
+ 	lo->lo_flags = lo_flags;
++	lo->lo_xid = vx_current_xid();
+ 	lo->lo_backing_file = file;
+ 	lo->transfer = transfer_none;
+ 	lo->ioctl = NULL;
+@@ -1001,6 +1003,7 @@ static int loop_clr_fd(struct loop_devic
+ 	lo->lo_sizelimit = 0;
+ 	lo->lo_encrypt_key_size = 0;
+ 	lo->lo_thread = NULL;
++	lo->lo_xid = 0;
+ 	memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
+ 	memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
+ 	memset(lo->lo_file_name, 0, LO_NAME_SIZE);
+@@ -1042,7 +1045,7 @@ loop_set_status(struct loop_device *lo, 
+ 
+ 	if (lo->lo_encrypt_key_size &&
+ 	    lo->lo_key_owner != uid &&
+-	    !capable(CAP_SYS_ADMIN))
++	    !vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP))
+ 		return -EPERM;
+ 	if (lo->lo_state != Lo_bound)
+ 		return -ENXIO;
+@@ -1132,7 +1135,8 @@ loop_get_status(struct loop_device *lo, 
+ 	memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
+ 	info->lo_encrypt_type =
+ 		lo->lo_encryption ? lo->lo_encryption->number : 0;
+-	if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
++	if (lo->lo_encrypt_key_size &&
++		vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP)) {
+ 		info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
+ 		memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
+ 		       lo->lo_encrypt_key_size);
+@@ -1492,6 +1496,11 @@ static int lo_open(struct block_device *
+ 		goto out;
+ 	}
+ 
++	if (!vx_check(lo->lo_xid, VS_IDENT|VS_HOSTID|VS_ADMIN_P)) {
++		err = -EACCES;
++		goto out;
++	}
++
+ 	mutex_lock(&lo->lo_ctl_mutex);
+ 	lo->lo_refcnt++;
+ 	mutex_unlock(&lo->lo_ctl_mutex);
+diff -NurpP --minimal linux-3.3.8/drivers/block/vroot.c linux-3.3.8-vs2.3.3.4/drivers/block/vroot.c
+--- linux-3.3.8/drivers/block/vroot.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/drivers/block/vroot.c	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,291 @@
++/*
++ *  linux/drivers/block/vroot.c
++ *
++ *  written by Herbert P�tzl, 9/11/2002
++ *  ported to 2.6.10 by Herbert P�tzl, 30/12/2004
++ *
++ *  based on the loop.c code by Theodore Ts'o.
++ *
++ * Copyright (C) 2002-2007 by Herbert P�tzl.
++ * Redistribution of this file is permitted under the
++ * GNU General Public License.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++#include <linux/file.h>
++#include <linux/major.h>
++#include <linux/blkdev.h>
++#include <linux/slab.h>
++
++#include <linux/vroot.h>
++#include <linux/vs_context.h>
++
++
++static int max_vroot = 8;
++
++static struct vroot_device *vroot_dev;
++static struct gendisk **disks;
++
++
++static int vroot_set_dev(
++	struct vroot_device *vr,
++	struct block_device *bdev,
++	unsigned int arg)
++{
++	struct block_device *real_bdev;
++	struct file *file;
++	struct inode *inode;
++	int error;
++
++	error = -EBUSY;
++	if (vr->vr_state != Vr_unbound)
++		goto out;
++
++	error = -EBADF;
++	file = fget(arg);
++	if (!file)
++		goto out;
++
++	error = -EINVAL;
++	inode = file->f_dentry->d_inode;
++
++
++	if (S_ISBLK(inode->i_mode)) {
++		real_bdev = inode->i_bdev;
++		vr->vr_device = real_bdev;
++		__iget(real_bdev->bd_inode);
++	} else
++		goto out_fput;
++
++	vxdprintk(VXD_CBIT(misc, 0),
++		"vroot[%d]_set_dev: dev=" VXF_DEV,
++		vr->vr_number, VXD_DEV(real_bdev));
++
++	vr->vr_state = Vr_bound;
++	error = 0;
++
++ out_fput:
++	fput(file);
++ out:
++	return error;
++}
++
++static int vroot_clr_dev(
++	struct vroot_device *vr,
++	struct block_device *bdev)
++{
++	struct block_device *real_bdev;
++
++	if (vr->vr_state != Vr_bound)
++		return -ENXIO;
++	if (vr->vr_refcnt > 1)	/* we needed one fd for the ioctl */
++		return -EBUSY;
++
++	real_bdev = vr->vr_device;
++
++	vxdprintk(VXD_CBIT(misc, 0),
++		"vroot[%d]_clr_dev: dev=" VXF_DEV,
++		vr->vr_number, VXD_DEV(real_bdev));
++
++	bdput(real_bdev);
++	vr->vr_state = Vr_unbound;
++	vr->vr_device = NULL;
++	return 0;
++}
++
++
++static int vr_ioctl(struct block_device *bdev, fmode_t mode,
++	unsigned int cmd, unsigned long arg)
++{
++	struct vroot_device *vr = bdev->bd_disk->private_data;
++	int err;
++
++	down(&vr->vr_ctl_mutex);
++	switch (cmd) {
++	case VROOT_SET_DEV:
++		err = vroot_set_dev(vr, bdev, arg);
++		break;
++	case VROOT_CLR_DEV:
++		err = vroot_clr_dev(vr, bdev);
++		break;
++	default:
++		err = -EINVAL;
++		break;
++	}
++	up(&vr->vr_ctl_mutex);
++	return err;
++}
++
++static int vr_open(struct block_device *bdev, fmode_t mode)
++{
++	struct vroot_device *vr = bdev->bd_disk->private_data;
++
++	down(&vr->vr_ctl_mutex);
++	vr->vr_refcnt++;
++	up(&vr->vr_ctl_mutex);
++	return 0;
++}
++
++static int vr_release(struct gendisk *disk, fmode_t mode)
++{
++	struct vroot_device *vr = disk->private_data;
++
++	down(&vr->vr_ctl_mutex);
++	--vr->vr_refcnt;
++	up(&vr->vr_ctl_mutex);
++	return 0;
++}
++
++static struct block_device_operations vr_fops = {
++	.owner =	THIS_MODULE,
++	.open =		vr_open,
++	.release =	vr_release,
++	.ioctl =	vr_ioctl,
++};
++
++static void vroot_make_request(struct request_queue *q, struct bio *bio)
++{
++	printk("vroot_make_request %p, %p\n", q, bio);
++	bio_io_error(bio);
++}
++
++struct block_device *__vroot_get_real_bdev(struct block_device *bdev)
++{
++	struct inode *inode = bdev->bd_inode;
++	struct vroot_device *vr;
++	struct block_device *real_bdev;
++	int minor = iminor(inode);
++
++	vr = &vroot_dev[minor];
++	real_bdev = vr->vr_device;
++
++	vxdprintk(VXD_CBIT(misc, 0),
++		"vroot[%d]_get_real_bdev: dev=" VXF_DEV,
++		vr->vr_number, VXD_DEV(real_bdev));
++
++	if (vr->vr_state != Vr_bound)
++		return ERR_PTR(-ENXIO);
++
++	__iget(real_bdev->bd_inode);
++	return real_bdev;
++}
++
++
++
++/*
++ * And now the modules code and kernel interface.
++ */
++
++module_param(max_vroot, int, 0);
++
++MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)");
++MODULE_LICENSE("GPL");
++MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR);
++
++MODULE_AUTHOR ("Herbert P�tzl");
++MODULE_DESCRIPTION ("Virtual Root Device Mapper");
++
++
++int __init vroot_init(void)
++{
++	int err, i;
++
++	if (max_vroot < 1 || max_vroot > 256) {
++		max_vroot = MAX_VROOT_DEFAULT;
++		printk(KERN_WARNING "vroot: invalid max_vroot "
++			"(must be between 1 and 256), "
++			"using default (%d)\n", max_vroot);
++	}
++
++	if (register_blkdev(VROOT_MAJOR, "vroot"))
++		return -EIO;
++
++	err = -ENOMEM;
++	vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL);
++	if (!vroot_dev)
++		goto out_mem1;
++	memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device));
++
++	disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL);
++	if (!disks)
++		goto out_mem2;
++
++	for (i = 0; i < max_vroot; i++) {
++		disks[i] = alloc_disk(1);
++		if (!disks[i])
++			goto out_mem3;
++		disks[i]->queue = blk_alloc_queue(GFP_KERNEL);
++		if (!disks[i]->queue)
++			goto out_mem3;
++		blk_queue_make_request(disks[i]->queue, vroot_make_request);
++	}
++
++	for (i = 0; i < max_vroot; i++) {
++		struct vroot_device *vr = &vroot_dev[i];
++		struct gendisk *disk = disks[i];
++
++		memset(vr, 0, sizeof(*vr));
++		sema_init(&vr->vr_ctl_mutex, 1);
++		vr->vr_number = i;
++		disk->major = VROOT_MAJOR;
++		disk->first_minor = i;
++		disk->fops = &vr_fops;
++		sprintf(disk->disk_name, "vroot%d", i);
++		disk->private_data = vr;
++	}
++
++	err = register_vroot_grb(&__vroot_get_real_bdev);
++	if (err)
++		goto out_mem3;
++
++	for (i = 0; i < max_vroot; i++)
++		add_disk(disks[i]);
++	printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot);
++	return 0;
++
++out_mem3:
++	while (i--)
++		put_disk(disks[i]);
++	kfree(disks);
++out_mem2:
++	kfree(vroot_dev);
++out_mem1:
++	unregister_blkdev(VROOT_MAJOR, "vroot");
++	printk(KERN_ERR "vroot: ran out of memory\n");
++	return err;
++}
++
++void vroot_exit(void)
++{
++	int i;
++
++	if (unregister_vroot_grb(&__vroot_get_real_bdev))
++		printk(KERN_WARNING "vroot: cannot unregister grb\n");
++
++	for (i = 0; i < max_vroot; i++) {
++		del_gendisk(disks[i]);
++		put_disk(disks[i]);
++	}
++	unregister_blkdev(VROOT_MAJOR, "vroot");
++
++	kfree(disks);
++	kfree(vroot_dev);
++}
++
++module_init(vroot_init);
++module_exit(vroot_exit);
++
++#ifndef MODULE
++
++static int __init max_vroot_setup(char *str)
++{
++	max_vroot = simple_strtol(str, NULL, 0);
++	return 1;
++}
++
++__setup("max_vroot=", max_vroot_setup);
++
++#endif
++
+diff -NurpP --minimal linux-3.3.8/drivers/infiniband/Kconfig linux-3.3.8-vs2.3.3.4/drivers/infiniband/Kconfig
+--- linux-3.3.8/drivers/infiniband/Kconfig	2012-03-19 19:46:54.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/drivers/infiniband/Kconfig	2012-02-24 03:55:06.000000000 +0100
+@@ -39,7 +39,7 @@ config INFINIBAND_USER_MEM
+ config INFINIBAND_ADDR_TRANS
+ 	bool
+ 	depends on INET
+-	depends on !(INFINIBAND = y && IPV6 = m)
++	depends on !(INFINIBAND = y && IPV6 = y)
+ 	default y
+ 
+ source "drivers/infiniband/hw/mthca/Kconfig"
+diff -NurpP --minimal linux-3.3.8/drivers/infiniband/core/addr.c linux-3.3.8-vs2.3.3.4/drivers/infiniband/core/addr.c
+--- linux-3.3.8/drivers/infiniband/core/addr.c	2012-03-19 19:46:54.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/drivers/infiniband/core/addr.c	2012-02-24 03:55:06.000000000 +0100
+@@ -259,7 +259,7 @@ static int addr6_resolve(struct sockaddr
+ 
+ 	if (ipv6_addr_any(&fl6.saddr)) {
+ 		ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
+-					 &fl6.daddr, 0, &fl6.saddr);
++					 &fl6.daddr, 0, &fl6.saddr, NULL);
+ 		if (ret)
+ 			goto put;
+ 
+diff -NurpP --minimal linux-3.3.8/drivers/md/dm-ioctl.c linux-3.3.8-vs2.3.3.4/drivers/md/dm-ioctl.c
+--- linux-3.3.8/drivers/md/dm-ioctl.c	2012-03-19 19:46:59.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/drivers/md/dm-ioctl.c	2012-03-19 20:52:10.000000000 +0100
+@@ -16,6 +16,7 @@
+ #include <linux/dm-ioctl.h>
+ #include <linux/hdreg.h>
+ #include <linux/compat.h>
++#include <linux/vs_context.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -106,7 +107,8 @@ static struct hash_cell *__get_name_cell
+ 	unsigned int h = hash_str(str);
+ 
+ 	list_for_each_entry (hc, _name_buckets + h, name_list)
+-		if (!strcmp(hc->name, str)) {
++		if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
++			!strcmp(hc->name, str)) {
+ 			dm_get(hc->md);
+ 			return hc;
+ 		}
+@@ -120,7 +122,8 @@ static struct hash_cell *__get_uuid_cell
+ 	unsigned int h = hash_str(str);
+ 
+ 	list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
+-		if (!strcmp(hc->uuid, str)) {
++		if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
++			!strcmp(hc->uuid, str)) {
+ 			dm_get(hc->md);
+ 			return hc;
+ 		}
+@@ -131,13 +134,15 @@ static struct hash_cell *__get_uuid_cell
+ static struct hash_cell *__get_dev_cell(uint64_t dev)
+ {
+ 	struct mapped_device *md;
+-	struct hash_cell *hc;
++	struct hash_cell *hc = NULL;
+ 
+ 	md = dm_get_md(huge_decode_dev(dev));
+ 	if (!md)
+ 		return NULL;
+ 
+-	hc = dm_get_mdptr(md);
++	if (vx_check(dm_get_xid(md), VS_WATCH_P | VS_IDENT))
++		hc = dm_get_mdptr(md);
++
+ 	if (!hc) {
+ 		dm_put(md);
+ 		return NULL;
+@@ -445,6 +450,9 @@ typedef int (*ioctl_fn)(struct dm_ioctl 
+ 
+ static int remove_all(struct dm_ioctl *param, size_t param_size)
+ {
++	if (!vx_check(0, VS_ADMIN))
++		return -EPERM;
++
+ 	dm_hash_remove_all(1);
+ 	param->data_size = 0;
+ 	return 0;
+@@ -492,6 +500,8 @@ static int list_devices(struct dm_ioctl 
+ 	 */
+ 	for (i = 0; i < NUM_BUCKETS; i++) {
+ 		list_for_each_entry (hc, _name_buckets + i, name_list) {
++			if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
++				continue;
+ 			needed += sizeof(struct dm_name_list);
+ 			needed += strlen(hc->name) + 1;
+ 			needed += ALIGN_MASK;
+@@ -515,6 +525,8 @@ static int list_devices(struct dm_ioctl 
+ 	 */
+ 	for (i = 0; i < NUM_BUCKETS; i++) {
+ 		list_for_each_entry (hc, _name_buckets + i, name_list) {
++			if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
++				continue;
+ 			if (old_nl)
+ 				old_nl->next = (uint32_t) ((void *) nl -
+ 							   (void *) old_nl);
+@@ -1615,8 +1627,8 @@ static int ctl_ioctl(uint command, struc
+ 	ioctl_fn fn = NULL;
+ 	size_t input_param_size;
+ 
+-	/* only root can play with this */
+-	if (!capable(CAP_SYS_ADMIN))
++	/* only root and certain contexts can play with this */
++	if (!vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_MAPPER))
+ 		return -EACCES;
+ 
+ 	if (_IOC_TYPE(command) != DM_IOCTL)
+diff -NurpP --minimal linux-3.3.8/drivers/md/dm.c linux-3.3.8-vs2.3.3.4/drivers/md/dm.c
+--- linux-3.3.8/drivers/md/dm.c	2012-03-19 19:46:59.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/drivers/md/dm.c	2012-02-24 03:55:06.000000000 +0100
+@@ -19,6 +19,7 @@
+ #include <linux/idr.h>
+ #include <linux/hdreg.h>
+ #include <linux/delay.h>
++#include <linux/vs_base.h>
+ 
+ #include <trace/events/block.h>
+ 
+@@ -131,6 +132,7 @@ struct mapped_device {
+ 	rwlock_t map_lock;
+ 	atomic_t holders;
+ 	atomic_t open_count;
++	xid_t xid;
+ 
+ 	unsigned long flags;
+ 
+@@ -343,6 +345,7 @@ int dm_deleting_md(struct mapped_device 
+ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
+ {
+ 	struct mapped_device *md;
++	int ret = -ENXIO;
+ 
+ 	spin_lock(&_minor_lock);
+ 
+@@ -351,18 +354,19 @@ static int dm_blk_open(struct block_devi
+ 		goto out;
+ 
+ 	if (test_bit(DMF_FREEING, &md->flags) ||
+-	    dm_deleting_md(md)) {
+-		md = NULL;
++	    dm_deleting_md(md))
++		goto out;
++
++	ret = -EACCES;
++	if (!vx_check(md->xid, VS_IDENT|VS_HOSTID))
+ 		goto out;
+-	}
+ 
+ 	dm_get(md);
+ 	atomic_inc(&md->open_count);
+-
++	ret = 0;
+ out:
+ 	spin_unlock(&_minor_lock);
+-
+-	return md ? 0 : -ENXIO;
++	return ret;
+ }
+ 
+ static int dm_blk_close(struct gendisk *disk, fmode_t mode)
+@@ -583,6 +587,14 @@ int dm_set_geometry(struct mapped_device
+ 	return 0;
+ }
+ 
++/*
++ * Get the xid associated with a dm device
++ */
++xid_t dm_get_xid(struct mapped_device *md)
++{
++	return md->xid;
++}
++
+ /*-----------------------------------------------------------------
+  * CRUD START:
+  *   A more elegant soln is in the works that uses the queue
+@@ -1849,6 +1861,7 @@ static struct mapped_device *alloc_dev(i
+ 	INIT_LIST_HEAD(&md->uevent_list);
+ 	spin_lock_init(&md->uevent_lock);
+ 
++	md->xid = vx_current_xid();
+ 	md->queue = blk_alloc_queue(GFP_KERNEL);
+ 	if (!md->queue)
+ 		goto bad_queue;
+diff -NurpP --minimal linux-3.3.8/drivers/md/dm.h linux-3.3.8-vs2.3.3.4/drivers/md/dm.h
+--- linux-3.3.8/drivers/md/dm.h	2012-01-09 16:14:21.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/drivers/md/dm.h	2012-02-24 03:55:06.000000000 +0100
+@@ -41,6 +41,8 @@ struct dm_dev_internal {
+ struct dm_table;
+ struct dm_md_mempools;
+ 
++xid_t dm_get_xid(struct mapped_device *md);
++
+ /*-----------------------------------------------------------------
+  * Internal table functions.
+  *---------------------------------------------------------------*/
+diff -NurpP --minimal linux-3.3.8/drivers/net/tun.c linux-3.3.8-vs2.3.3.4/drivers/net/tun.c
+--- linux-3.3.8/drivers/net/tun.c	2012-03-19 19:47:08.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/drivers/net/tun.c	2012-03-19 20:52:10.000000000 +0100
+@@ -64,6 +64,7 @@
+ #include <linux/nsproxy.h>
+ #include <linux/virtio_net.h>
+ #include <linux/rcupdate.h>
++#include <linux/vs_network.h>
+ #include <net/net_namespace.h>
+ #include <net/netns/generic.h>
+ #include <net/rtnetlink.h>
+@@ -121,6 +122,7 @@ struct tun_struct {
+ 	unsigned int 		flags;
+ 	uid_t			owner;
+ 	gid_t			group;
++	nid_t			nid;
+ 
+ 	struct net_device	*dev;
+ 	netdev_features_t	set_features;
+@@ -910,6 +912,7 @@ static void tun_setup(struct net_device 
+ 
+ 	tun->owner = -1;
+ 	tun->group = -1;
++	tun->nid = current->nid;
+ 
+ 	dev->ethtool_ops = &tun_ethtool_ops;
+ 	dev->destructor = tun_free_netdev;
+@@ -1068,7 +1071,7 @@ static int tun_set_iff(struct net *net, 
+ 
+ 		if (((tun->owner != -1 && cred->euid != tun->owner) ||
+ 		     (tun->group != -1 && !in_egroup_p(tun->group))) &&
+-		    !capable(CAP_NET_ADMIN))
++		!cap_raised(current_cap(), CAP_NET_ADMIN))
+ 			return -EPERM;
+ 		err = security_tun_dev_attach(tun->socket.sk);
+ 		if (err < 0)
+@@ -1082,7 +1085,7 @@ static int tun_set_iff(struct net *net, 
+ 		char *name;
+ 		unsigned long flags = 0;
+ 
+-		if (!capable(CAP_NET_ADMIN))
++		if (!nx_capable(CAP_NET_ADMIN, NXC_TUN_CREATE))
+ 			return -EPERM;
+ 		err = security_tun_dev_create();
+ 		if (err < 0)
+@@ -1151,6 +1154,9 @@ static int tun_set_iff(struct net *net, 
+ 
+ 		sk->sk_destruct = tun_sock_destruct;
+ 
++		if (!nx_check(tun->nid, VS_IDENT | VS_HOSTID | VS_ADMIN_P))
++			return -EPERM;
++
+ 		err = tun_attach(tun, file);
+ 		if (err < 0)
+ 			goto failed;
+@@ -1332,6 +1338,16 @@ static long __tun_chr_ioctl(struct file 
+ 		tun_debug(KERN_INFO, tun, "group set to %d\n", tun->group);
+ 		break;
+ 
++	case TUNSETNID:
++		if (!capable(CAP_CONTEXT))
++			return -EPERM;
++
++		/* Set nid owner of the device */
++		tun->nid = (nid_t) arg;
++
++		tun_debug(KERN_INFO, tun, "nid owner set to %u\n", tun->nid);
++		break;
++
+ 	case TUNSETLINK:
+ 		/* Only allow setting the type when the interface is down */
+ 		if (tun->dev->flags & IFF_UP) {
+diff -NurpP --minimal linux-3.3.8/drivers/tty/sysrq.c linux-3.3.8-vs2.3.3.4/drivers/tty/sysrq.c
+--- linux-3.3.8/drivers/tty/sysrq.c	2012-03-19 19:47:19.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/drivers/tty/sysrq.c	2012-02-24 04:03:15.000000000 +0100
+@@ -41,6 +41,7 @@
+ #include <linux/slab.h>
+ #include <linux/input.h>
+ #include <linux/uaccess.h>
++#include <linux/vserver/debug.h>
+ 
+ #include <asm/ptrace.h>
+ #include <asm/irq_regs.h>
+@@ -395,6 +396,21 @@ static struct sysrq_key_op sysrq_unrt_op
+ 	.enable_mask	= SYSRQ_ENABLE_RTNICE,
+ };
+ 
++
++#ifdef CONFIG_VSERVER_DEBUG
++static void sysrq_handle_vxinfo(int key)
++{
++	dump_vx_info_inactive((key == 'x') ? 0 : 1);
++}
++
++static struct sysrq_key_op sysrq_showvxinfo_op = {
++	.handler	= sysrq_handle_vxinfo,
++	.help_msg	= "conteXt",
++	.action_msg	= "Show Context Info",
++	.enable_mask	= SYSRQ_ENABLE_DUMP,
++};
++#endif
++
+ /* Key Operations table and lock */
+ static DEFINE_SPINLOCK(sysrq_key_table_lock);
+ 
+@@ -449,7 +465,11 @@ static struct sysrq_key_op *sysrq_key_ta
+ 	NULL,				/* v */
+ 	&sysrq_showstate_blocked_op,	/* w */
+ 	/* x: May be registered on ppc/powerpc for xmon */
++#ifdef CONFIG_VSERVER_DEBUG
++	&sysrq_showvxinfo_op,		/* x */
++#else
+ 	NULL,				/* x */
++#endif
+ 	/* y: May be registered on sparc64 for global register dump */
+ 	NULL,				/* y */
+ 	&sysrq_ftrace_dump_op,		/* z */
+@@ -464,6 +484,8 @@ static int sysrq_key_table_key2index(int
+ 		retval = key - '0';
+ 	else if ((key >= 'a') && (key <= 'z'))
+ 		retval = key + 10 - 'a';
++	else if ((key >= 'A') && (key <= 'Z'))
++		retval = key + 10 - 'A';
+ 	else
+ 		retval = -1;
+ 	return retval;
+diff -NurpP --minimal linux-3.3.8/drivers/tty/tty_io.c linux-3.3.8-vs2.3.3.4/drivers/tty/tty_io.c
+--- linux-3.3.8/drivers/tty/tty_io.c	2012-03-19 19:47:19.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/drivers/tty/tty_io.c	2012-02-24 03:55:06.000000000 +0100
+@@ -105,6 +105,7 @@
+ 
+ #include <linux/kmod.h>
+ #include <linux/nsproxy.h>
++#include <linux/vs_pid.h>
+ 
+ #undef TTY_DEBUG_HANGUP
+ 
+@@ -2131,7 +2132,8 @@ static int tiocsti(struct tty_struct *tt
+ 	char ch, mbz = 0;
+ 	struct tty_ldisc *ld;
+ 
+-	if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
++	if (((current->signal->tty != tty) &&
++		!vx_capable(CAP_SYS_ADMIN, VXC_TIOCSTI)))
+ 		return -EPERM;
+ 	if (get_user(ch, p))
+ 		return -EFAULT;
+@@ -2419,6 +2421,7 @@ static int tiocspgrp(struct tty_struct *
+ 		return -ENOTTY;
+ 	if (get_user(pgrp_nr, p))
+ 		return -EFAULT;
++	pgrp_nr = vx_rmap_pid(pgrp_nr);
+ 	if (pgrp_nr < 0)
+ 		return -EINVAL;
+ 	rcu_read_lock();
+diff -NurpP --minimal linux-3.3.8/fs/attr.c linux-3.3.8-vs2.3.3.4/fs/attr.c
+--- linux-3.3.8/fs/attr.c	2012-03-19 19:47:24.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/attr.c	2012-02-24 03:55:06.000000000 +0100
+@@ -14,6 +14,9 @@
+ #include <linux/fcntl.h>
+ #include <linux/security.h>
+ #include <linux/evm.h>
++#include <linux/proc_fs.h>
++#include <linux/devpts_fs.h>
++#include <linux/vs_tag.h>
+ 
+ /**
+  * inode_change_ok - check if attribute changes to an inode are allowed
+@@ -74,6 +77,10 @@ int inode_change_ok(const struct inode *
+ 			return -EPERM;
+ 	}
+ 
++	/* check for inode tag permission */
++	if (dx_permission(inode, MAY_WRITE))
++		return -EACCES;
++
+ 	return 0;
+ }
+ EXPORT_SYMBOL(inode_change_ok);
+@@ -144,6 +151,8 @@ void setattr_copy(struct inode *inode, c
+ 		inode->i_uid = attr->ia_uid;
+ 	if (ia_valid & ATTR_GID)
+ 		inode->i_gid = attr->ia_gid;
++	if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode))
++		inode->i_tag = attr->ia_tag;
+ 	if (ia_valid & ATTR_ATIME)
+ 		inode->i_atime = timespec_trunc(attr->ia_atime,
+ 						inode->i_sb->s_time_gran);
+@@ -171,7 +180,8 @@ int notify_change(struct dentry * dentry
+ 	struct timespec now;
+ 	unsigned int ia_valid = attr->ia_valid;
+ 
+-	if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
++	if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
++		ATTR_TAG | ATTR_TIMES_SET)) {
+ 		if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ 			return -EPERM;
+ 	}
+diff -NurpP --minimal linux-3.3.8/fs/block_dev.c linux-3.3.8-vs2.3.3.4/fs/block_dev.c
+--- linux-3.3.8/fs/block_dev.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/block_dev.c	2012-06-08 15:27:44.000000000 +0200
+@@ -26,6 +26,7 @@
+ #include <linux/namei.h>
+ #include <linux/log2.h>
+ #include <linux/cleancache.h>
++#include <linux/vs_device.h>
+ #include <asm/uaccess.h>
+ #include "internal.h"
+ 
+@@ -580,6 +581,7 @@ struct block_device *bdget(dev_t dev)
+ 		bdev->bd_invalidated = 0;
+ 		inode->i_mode = S_IFBLK;
+ 		inode->i_rdev = dev;
++		inode->i_mdev = dev;
+ 		inode->i_bdev = bdev;
+ 		inode->i_data.a_ops = &def_blk_aops;
+ 		mapping_set_gfp_mask(&inode->i_data, GFP_USER);
+@@ -626,6 +628,11 @@ EXPORT_SYMBOL(bdput);
+ static struct block_device *bd_acquire(struct inode *inode)
+ {
+ 	struct block_device *bdev;
++	dev_t mdev;
++
++	if (!vs_map_blkdev(inode->i_rdev, &mdev, DATTR_OPEN))
++		return NULL;
++	inode->i_mdev = mdev;
+ 
+ 	spin_lock(&bdev_lock);
+ 	bdev = inode->i_bdev;
+@@ -636,7 +643,7 @@ static struct block_device *bd_acquire(s
+ 	}
+ 	spin_unlock(&bdev_lock);
+ 
+-	bdev = bdget(inode->i_rdev);
++	bdev = bdget(mdev);
+ 	if (bdev) {
+ 		spin_lock(&bdev_lock);
+ 		if (!inode->i_bdev) {
+diff -NurpP --minimal linux-3.3.8/fs/btrfs/ctree.h linux-3.3.8-vs2.3.3.4/fs/btrfs/ctree.h
+--- linux-3.3.8/fs/btrfs/ctree.h	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/btrfs/ctree.h	2012-04-30 19:34:37.000000000 +0200
+@@ -646,11 +646,14 @@ struct btrfs_inode_item {
+ 	/* modification sequence number for NFS */
+ 	__le64 sequence;
+ 
++	__le16 tag;
+ 	/*
+ 	 * a little future expansion, for more than this we can
+ 	 * just grow the inode item and version it
+ 	 */
+-	__le64 reserved[4];
++	__le16 reserved16;
++	__le32 reserved32;
++	__le64 reserved[3];
+ 	struct btrfs_timespec atime;
+ 	struct btrfs_timespec ctime;
+ 	struct btrfs_timespec mtime;
+@@ -1504,6 +1507,8 @@ struct btrfs_ioctl_defrag_range_args {
+ #define BTRFS_MOUNT_CHECK_INTEGRITY	(1 << 20)
+ #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
+ 
++#define BTRFS_MOUNT_TAGGED		(1 << 24)
++
+ #define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
+ #define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
+ #define btrfs_test_opt(root, opt)	((root)->fs_info->mount_opt & \
+@@ -1711,6 +1716,7 @@ BTRFS_SETGET_FUNCS(inode_block_group, st
+ BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
+ BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
+ BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
++BTRFS_SETGET_FUNCS(inode_tag, struct btrfs_inode_item, tag, 16);
+ BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
+ BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
+ BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
+@@ -1764,6 +1770,10 @@ BTRFS_SETGET_FUNCS(extent_flags, struct 
+ 
+ BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
+ 
++#define BTRFS_INODE_IXUNLINK		(1 << 24)
++#define BTRFS_INODE_BARRIER		(1 << 25)
++#define BTRFS_INODE_COW			(1 << 26)
++
+ 
+ BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
+ 
+@@ -2925,6 +2935,7 @@ extern const struct dentry_operations bt
+ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+ void btrfs_update_iflags(struct inode *inode);
+ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
++int btrfs_sync_flags(struct inode *inode, int, int);
+ int btrfs_defrag_file(struct inode *inode, struct file *file,
+ 		      struct btrfs_ioctl_defrag_range_args *range,
+ 		      u64 newer_than, unsigned long max_pages);
+diff -NurpP --minimal linux-3.3.8/fs/btrfs/disk-io.c linux-3.3.8-vs2.3.3.4/fs/btrfs/disk-io.c
+--- linux-3.3.8/fs/btrfs/disk-io.c	2012-03-19 19:47:24.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/btrfs/disk-io.c	2012-03-19 20:52:10.000000000 +0100
+@@ -2125,6 +2125,9 @@ int open_ctree(struct super_block *sb,
+ 		goto fail_alloc;
+ 	}
+ 
++	if (btrfs_test_opt(tree_root, TAGGED))
++		sb->s_flags |= MS_TAGGED;
++
+ 	features = btrfs_super_incompat_flags(disk_super) &
+ 		~BTRFS_FEATURE_INCOMPAT_SUPP;
+ 	if (features) {
+diff -NurpP --minimal linux-3.3.8/fs/btrfs/inode.c linux-3.3.8-vs2.3.3.4/fs/btrfs/inode.c
+--- linux-3.3.8/fs/btrfs/inode.c	2012-03-19 19:47:24.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/btrfs/inode.c	2012-03-19 20:52:10.000000000 +0100
+@@ -39,6 +39,7 @@
+ #include <linux/slab.h>
+ #include <linux/ratelimit.h>
+ #include <linux/mount.h>
++#include <linux/vs_tag.h>
+ #include "compat.h"
+ #include "ctree.h"
+ #include "disk-io.h"
+@@ -2350,6 +2351,8 @@ static void btrfs_read_locked_inode(stru
+ 	struct btrfs_key location;
+ 	int maybe_acls;
+ 	u32 rdev;
++	uid_t uid;
++	gid_t gid;
+ 	int ret;
+ 	bool filled = false;
+ 
+@@ -2377,8 +2380,13 @@ static void btrfs_read_locked_inode(stru
+ 				    struct btrfs_inode_item);
+ 	inode->i_mode = btrfs_inode_mode(leaf, inode_item);
+ 	set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
+-	inode->i_uid = btrfs_inode_uid(leaf, inode_item);
+-	inode->i_gid = btrfs_inode_gid(leaf, inode_item);
++
++	uid = btrfs_inode_uid(leaf, inode_item);
++	gid = btrfs_inode_gid(leaf, inode_item);
++	inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++	inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++	inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
++		btrfs_inode_tag(leaf, inode_item));
+ 	btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
+ 
+ 	tspec = btrfs_inode_atime(inode_item);
+@@ -2456,8 +2464,14 @@ static void fill_inode_item(struct btrfs
+ 			    struct btrfs_inode_item *item,
+ 			    struct inode *inode)
+ {
+-	btrfs_set_inode_uid(leaf, item, inode->i_uid);
+-	btrfs_set_inode_gid(leaf, item, inode->i_gid);
++	uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
++	gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
++
++	btrfs_set_inode_uid(leaf, item, uid);
++	btrfs_set_inode_gid(leaf, item, gid);
++#ifdef CONFIG_TAGGING_INTERN
++	btrfs_set_inode_tag(leaf, item, inode->i_tag);
++#endif
+ 	btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
+ 	btrfs_set_inode_mode(leaf, item, inode->i_mode);
+ 	btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
+@@ -7412,11 +7426,13 @@ static const struct inode_operations btr
+ 	.listxattr	= btrfs_listxattr,
+ 	.removexattr	= btrfs_removexattr,
+ 	.permission	= btrfs_permission,
++	.sync_flags	= btrfs_sync_flags,
+ 	.get_acl	= btrfs_get_acl,
+ };
+ static const struct inode_operations btrfs_dir_ro_inode_operations = {
+ 	.lookup		= btrfs_lookup,
+ 	.permission	= btrfs_permission,
++	.sync_flags	= btrfs_sync_flags,
+ 	.get_acl	= btrfs_get_acl,
+ };
+ 
+diff -NurpP --minimal linux-3.3.8/fs/btrfs/ioctl.c linux-3.3.8-vs2.3.3.4/fs/btrfs/ioctl.c
+--- linux-3.3.8/fs/btrfs/ioctl.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/btrfs/ioctl.c	2012-04-23 23:45:14.000000000 +0200
+@@ -71,10 +71,13 @@ static unsigned int btrfs_flags_to_ioctl
+ {
+ 	unsigned int iflags = 0;
+ 
+-	if (flags & BTRFS_INODE_SYNC)
+-		iflags |= FS_SYNC_FL;
+ 	if (flags & BTRFS_INODE_IMMUTABLE)
+ 		iflags |= FS_IMMUTABLE_FL;
++	if (flags & BTRFS_INODE_IXUNLINK)
++		iflags |= FS_IXUNLINK_FL;
++
++	if (flags & BTRFS_INODE_SYNC)
++		iflags |= FS_SYNC_FL;
+ 	if (flags & BTRFS_INODE_APPEND)
+ 		iflags |= FS_APPEND_FL;
+ 	if (flags & BTRFS_INODE_NODUMP)
+@@ -91,28 +94,78 @@ static unsigned int btrfs_flags_to_ioctl
+ 	else if (flags & BTRFS_INODE_NOCOMPRESS)
+ 		iflags |= FS_NOCOMP_FL;
+ 
++	if (flags & BTRFS_INODE_BARRIER)
++		iflags |= FS_BARRIER_FL;
++	if (flags & BTRFS_INODE_COW)
++		iflags |= FS_COW_FL;
+ 	return iflags;
+ }
+ 
+ /*
+- * Update inode->i_flags based on the btrfs internal flags.
++ * Update inode->i_(v)flags based on the btrfs internal flags.
+  */
+ void btrfs_update_iflags(struct inode *inode)
+ {
+ 	struct btrfs_inode *ip = BTRFS_I(inode);
+ 
+-	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
++	inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
++		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
+ 
+-	if (ip->flags & BTRFS_INODE_SYNC)
+-		inode->i_flags |= S_SYNC;
+ 	if (ip->flags & BTRFS_INODE_IMMUTABLE)
+ 		inode->i_flags |= S_IMMUTABLE;
++	if (ip->flags & BTRFS_INODE_IXUNLINK)
++		inode->i_flags |= S_IXUNLINK;
++
++	if (ip->flags & BTRFS_INODE_SYNC)
++		inode->i_flags |= S_SYNC;
+ 	if (ip->flags & BTRFS_INODE_APPEND)
+ 		inode->i_flags |= S_APPEND;
+ 	if (ip->flags & BTRFS_INODE_NOATIME)
+ 		inode->i_flags |= S_NOATIME;
+ 	if (ip->flags & BTRFS_INODE_DIRSYNC)
+ 		inode->i_flags |= S_DIRSYNC;
++
++	inode->i_vflags &= ~(V_BARRIER | V_COW);
++
++	if (ip->flags & BTRFS_INODE_BARRIER)
++		inode->i_vflags |= V_BARRIER;
++	if (ip->flags & BTRFS_INODE_COW)
++		inode->i_vflags |= V_COW;
++}
++
++/*
++ * Update btrfs internal flags from inode->i_(v)flags.
++ */
++void btrfs_update_flags(struct inode *inode)
++{
++	struct btrfs_inode *ip = BTRFS_I(inode);
++
++	unsigned int flags = inode->i_flags;
++	unsigned int vflags = inode->i_vflags;
++
++	ip->flags &= ~(BTRFS_INODE_SYNC | BTRFS_INODE_APPEND |
++			BTRFS_INODE_IMMUTABLE | BTRFS_INODE_IXUNLINK |
++			BTRFS_INODE_NOATIME | BTRFS_INODE_DIRSYNC |
++			BTRFS_INODE_BARRIER | BTRFS_INODE_COW);
++
++	if (flags & S_IMMUTABLE)
++		ip->flags |= BTRFS_INODE_IMMUTABLE;
++	if (flags & S_IXUNLINK)
++		ip->flags |= BTRFS_INODE_IXUNLINK;
++
++	if (flags & S_SYNC)
++		ip->flags |= BTRFS_INODE_SYNC;
++	if (flags & S_APPEND)
++		ip->flags |= BTRFS_INODE_APPEND;
++	if (flags & S_NOATIME)
++		ip->flags |= BTRFS_INODE_NOATIME;
++	if (flags & S_DIRSYNC)
++		ip->flags |= BTRFS_INODE_DIRSYNC;
++
++	if (vflags & V_BARRIER)
++		ip->flags |= BTRFS_INODE_BARRIER;
++	if (vflags & V_COW)
++		ip->flags |= BTRFS_INODE_COW;
+ }
+ 
+ /*
+@@ -128,6 +181,7 @@ void btrfs_inherit_iflags(struct inode *
+ 		return;
+ 
+ 	flags = BTRFS_I(dir)->flags;
++	flags &= ~BTRFS_INODE_BARRIER;
+ 
+ 	if (flags & BTRFS_INODE_NOCOMPRESS) {
+ 		BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
+@@ -143,6 +197,30 @@ void btrfs_inherit_iflags(struct inode *
+ 	btrfs_update_iflags(inode);
+ }
+ 
++int btrfs_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	struct btrfs_inode *ip = BTRFS_I(inode);
++	struct btrfs_root *root = ip->root;
++	struct btrfs_trans_handle *trans;
++	int ret;
++
++	trans = btrfs_join_transaction(root);
++	BUG_ON(!trans);
++
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	btrfs_update_flags(inode);
++
++	ret = btrfs_update_inode(trans, root, inode);
++	BUG_ON(ret);
++
++	btrfs_update_iflags(inode);
++	inode->i_ctime = CURRENT_TIME;
++	btrfs_end_transaction(trans, root);
++
++	return 0;
++}
++
+ static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
+ {
+ 	struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode);
+@@ -199,7 +277,8 @@ static int btrfs_ioctl_setflags(struct f
+ 
+ 	flags = btrfs_mask_flags(inode->i_mode, flags);
+ 	oldflags = btrfs_flags_to_ioctl(ip->flags);
+-	if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
++	if ((flags ^ oldflags) & (FS_APPEND_FL |
++		FS_IMMUTABLE_FL | FS_IXUNLINK_FL)) {
+ 		if (!capable(CAP_LINUX_IMMUTABLE)) {
+ 			ret = -EPERM;
+ 			goto out_unlock;
+@@ -210,14 +289,19 @@ static int btrfs_ioctl_setflags(struct f
+ 	if (ret)
+ 		goto out_unlock;
+ 
+-	if (flags & FS_SYNC_FL)
+-		ip->flags |= BTRFS_INODE_SYNC;
+-	else
+-		ip->flags &= ~BTRFS_INODE_SYNC;
+ 	if (flags & FS_IMMUTABLE_FL)
+ 		ip->flags |= BTRFS_INODE_IMMUTABLE;
+ 	else
+ 		ip->flags &= ~BTRFS_INODE_IMMUTABLE;
++	if (flags & FS_IXUNLINK_FL)
++		ip->flags |= BTRFS_INODE_IXUNLINK;
++	else
++		ip->flags &= ~BTRFS_INODE_IXUNLINK;
++
++	if (flags & FS_SYNC_FL)
++		ip->flags |= BTRFS_INODE_SYNC;
++	else
++		ip->flags &= ~BTRFS_INODE_SYNC;
+ 	if (flags & FS_APPEND_FL)
+ 		ip->flags |= BTRFS_INODE_APPEND;
+ 	else
+diff -NurpP --minimal linux-3.3.8/fs/btrfs/super.c linux-3.3.8-vs2.3.3.4/fs/btrfs/super.c
+--- linux-3.3.8/fs/btrfs/super.c	2012-03-19 19:47:24.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/btrfs/super.c	2012-02-24 04:42:16.000000000 +0100
+@@ -167,7 +167,7 @@ enum {
+ 	Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
+ 	Opt_check_integrity, Opt_check_integrity_including_extent_data,
+ 	Opt_check_integrity_print_mask,
+-	Opt_err,
++	Opt_tag, Opt_notag, Opt_tagid, Opt_err,
+ };
+ 
+ static match_table_t tokens = {
+@@ -206,6 +206,9 @@ static match_table_t tokens = {
+ 	{Opt_check_integrity, "check_int"},
+ 	{Opt_check_integrity_including_extent_data, "check_int_data"},
+ 	{Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
++	{Opt_tag, "tag"},
++	{Opt_notag, "notag"},
++	{Opt_tagid, "tagid=%u"},
+ 	{Opt_err, NULL},
+ };
+ 
+@@ -438,6 +441,22 @@ int btrfs_parse_options(struct btrfs_roo
+ 			ret = -EINVAL;
+ 			goto out;
+ #endif
++#ifndef CONFIG_TAGGING_NONE
++		case Opt_tag:
++			printk(KERN_INFO "btrfs: use tagging\n");
++			btrfs_set_opt(info->mount_opt, TAGGED);
++			break;
++		case Opt_notag:
++			printk(KERN_INFO "btrfs: disabled tagging\n");
++			btrfs_clear_opt(info->mount_opt, TAGGED);
++			break;
++#endif
++#ifdef CONFIG_PROPAGATE
++		case Opt_tagid:
++			/* use args[0] */
++			btrfs_set_opt(info->mount_opt, TAGGED);
++			break;
++#endif
+ 		case Opt_err:
+ 			printk(KERN_INFO "btrfs: unrecognized mount option "
+ 			       "'%s'\n", p);
+@@ -1005,6 +1024,12 @@ static int btrfs_remount(struct super_bl
+ 	if (ret)
+ 		return -EINVAL;
+ 
++	if (btrfs_test_opt(root, TAGGED) && !(sb->s_flags & MS_TAGGED)) {
++		printk("btrfs: %s: tagging not permitted on remount.\n",
++			sb->s_id);
++		return -EINVAL;
++	}
++
+ 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+ 		return 0;
+ 
+diff -NurpP --minimal linux-3.3.8/fs/char_dev.c linux-3.3.8-vs2.3.3.4/fs/char_dev.c
+--- linux-3.3.8/fs/char_dev.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/char_dev.c	2012-02-24 03:55:06.000000000 +0100
+@@ -21,6 +21,8 @@
+ #include <linux/mutex.h>
+ #include <linux/backing-dev.h>
+ #include <linux/tty.h>
++#include <linux/vs_context.h>
++#include <linux/vs_device.h>
+ 
+ #include "internal.h"
+ 
+@@ -371,14 +373,21 @@ static int chrdev_open(struct inode *ino
+ 	struct cdev *p;
+ 	struct cdev *new = NULL;
+ 	int ret = 0;
++	dev_t mdev;
++
++	if (!vs_map_chrdev(inode->i_rdev, &mdev, DATTR_OPEN))
++		return -EPERM;
++	inode->i_mdev = mdev;
+ 
+ 	spin_lock(&cdev_lock);
+ 	p = inode->i_cdev;
+ 	if (!p) {
+ 		struct kobject *kobj;
+ 		int idx;
++
+ 		spin_unlock(&cdev_lock);
+-		kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
++
++		kobj = kobj_lookup(cdev_map, mdev, &idx);
+ 		if (!kobj)
+ 			return -ENXIO;
+ 		new = container_of(kobj, struct cdev, kobj);
+diff -NurpP --minimal linux-3.3.8/fs/dcache.c linux-3.3.8-vs2.3.3.4/fs/dcache.c
+--- linux-3.3.8/fs/dcache.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/dcache.c	2012-04-03 03:02:12.000000000 +0200
+@@ -37,6 +37,7 @@
+ #include <linux/rculist_bl.h>
+ #include <linux/prefetch.h>
+ #include <linux/ratelimit.h>
++#include <linux/vs_limit.h>
+ #include "internal.h"
+ #include "mount.h"
+ 
+@@ -560,6 +561,8 @@ int d_invalidate(struct dentry * dentry)
+ 		spin_lock(&dentry->d_lock);
+ 	}
+ 
++	vx_dentry_dec(dentry);
++
+ 	/*
+ 	 * Somebody else still using it?
+ 	 *
+@@ -589,6 +592,7 @@ EXPORT_SYMBOL(d_invalidate);
+ static inline void __dget_dlock(struct dentry *dentry)
+ {
+ 	dentry->d_count++;
++	vx_dentry_inc(dentry);
+ }
+ 
+ static inline void __dget(struct dentry *dentry)
+@@ -1213,6 +1217,9 @@ struct dentry *__d_alloc(struct super_bl
+ 	struct dentry *dentry;
+ 	char *dname;
+ 
++	if (!vx_dentry_avail(1))
++		return NULL;
++
+ 	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
+ 	if (!dentry)
+ 		return NULL;
+@@ -1235,6 +1242,7 @@ struct dentry *__d_alloc(struct super_bl
+ 
+ 	dentry->d_count = 1;
+ 	dentry->d_flags = 0;
++	vx_dentry_inc(dentry);
+ 	spin_lock_init(&dentry->d_lock);
+ 	seqcount_init(&dentry->d_seq);
+ 	dentry->d_inode = NULL;
+@@ -1920,6 +1928,7 @@ struct dentry *__d_lookup(struct dentry 
+ 		}
+ 
+ 		dentry->d_count++;
++		vx_dentry_inc(dentry);
+ 		found = dentry;
+ 		spin_unlock(&dentry->d_lock);
+ 		break;
+diff -NurpP --minimal linux-3.3.8/fs/devpts/inode.c linux-3.3.8-vs2.3.3.4/fs/devpts/inode.c
+--- linux-3.3.8/fs/devpts/inode.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/devpts/inode.c	2012-02-24 03:55:06.000000000 +0100
+@@ -25,6 +25,7 @@
+ #include <linux/parser.h>
+ #include <linux/fsnotify.h>
+ #include <linux/seq_file.h>
++#include <linux/vs_base.h>
+ 
+ #define DEVPTS_DEFAULT_MODE 0600
+ /*
+@@ -36,6 +37,20 @@
+ #define DEVPTS_DEFAULT_PTMX_MODE 0000
+ #define PTMX_MINOR	2
+ 
++static int devpts_permission(struct inode *inode, int mask)
++{
++	int ret = -EACCES;
++
++	/* devpts is xid tagged */
++	if (vx_check((xid_t)inode->i_tag, VS_WATCH_P | VS_IDENT))
++		ret = generic_permission(inode, mask);
++	return ret;
++}
++
++static struct inode_operations devpts_file_inode_operations = {
++	.permission     = devpts_permission,
++};
++
+ extern int pty_limit;			/* Config limit on Unix98 ptys */
+ static DEFINE_MUTEX(allocated_ptys_lock);
+ 
+@@ -263,6 +278,34 @@ static int devpts_show_options(struct se
+ 	return 0;
+ }
+ 
++static int devpts_filter(struct dentry *de)
++{
++	xid_t xid = 0;
++
++	/* devpts is xid tagged */
++	if (de && de->d_inode)
++		xid = (xid_t)de->d_inode->i_tag;
++#ifdef CONFIG_VSERVER_WARN_DEVPTS
++	else
++		vxwprintk_task(1, "devpts " VS_Q("%.*s") " without inode.",
++			de->d_name.len, de->d_name.name);
++#endif
++	return vx_check(xid, VS_WATCH_P | VS_IDENT);
++}
++
++static int devpts_readdir(struct file * filp, void * dirent, filldir_t filldir)
++{
++	return dcache_readdir_filter(filp, dirent, filldir, devpts_filter);
++}
++
++static struct file_operations devpts_dir_operations = {
++	.open		= dcache_dir_open,
++	.release	= dcache_dir_close,
++	.llseek		= dcache_dir_lseek,
++	.read		= generic_read_dir,
++	.readdir	= devpts_readdir,
++};
++
+ static const struct super_operations devpts_sops = {
+ 	.statfs		= simple_statfs,
+ 	.remount_fs	= devpts_remount,
+@@ -306,8 +349,10 @@ devpts_fill_super(struct super_block *s,
+ 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ 	inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
+ 	inode->i_op = &simple_dir_inode_operations;
+-	inode->i_fop = &simple_dir_operations;
++	inode->i_fop = &devpts_dir_operations;
+ 	set_nlink(inode, 2);
++	/* devpts is xid tagged */
++	inode->i_tag = (tag_t)vx_current_xid();
+ 
+ 	s->s_root = d_alloc_root(inode);
+ 	if (s->s_root)
+@@ -492,6 +537,9 @@ int devpts_pty_new(struct inode *ptmx_in
+ 	inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
+ 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ 	init_special_inode(inode, S_IFCHR|opts->mode, device);
++	/* devpts is xid tagged */
++	inode->i_tag = (tag_t)vx_current_xid();
++	inode->i_op = &devpts_file_inode_operations;
+ 	inode->i_private = tty;
+ 	tty->driver_data = inode;
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ext2/balloc.c linux-3.3.8-vs2.3.3.4/fs/ext2/balloc.c
+--- linux-3.3.8/fs/ext2/balloc.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext2/balloc.c	2012-02-24 03:55:06.000000000 +0100
+@@ -701,7 +701,6 @@ ext2_try_to_allocate(struct super_block 
+ 			start = 0;
+ 		end = EXT2_BLOCKS_PER_GROUP(sb);
+ 	}
+-
+ 	BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb));
+ 
+ repeat:
+diff -NurpP --minimal linux-3.3.8/fs/ext2/ext2.h linux-3.3.8-vs2.3.3.4/fs/ext2/ext2.h
+--- linux-3.3.8/fs/ext2/ext2.h	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext2/ext2.h	2012-02-24 03:55:06.000000000 +0100
+@@ -126,6 +126,7 @@ extern void ext2_set_inode_flags(struct 
+ extern void ext2_get_inode_flags(struct ext2_inode_info *);
+ extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ 		       u64 start, u64 len);
++extern int ext2_sync_flags(struct inode *, int, int);
+ 
+ /* ioctl.c */
+ extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
+diff -NurpP --minimal linux-3.3.8/fs/ext2/file.c linux-3.3.8-vs2.3.3.4/fs/ext2/file.c
+--- linux-3.3.8/fs/ext2/file.c	2011-10-24 18:45:27.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/ext2/file.c	2012-02-24 03:55:06.000000000 +0100
+@@ -104,4 +104,5 @@ const struct inode_operations ext2_file_
+ 	.setattr	= ext2_setattr,
+ 	.get_acl	= ext2_get_acl,
+ 	.fiemap		= ext2_fiemap,
++	.sync_flags	= ext2_sync_flags,
+ };
+diff -NurpP --minimal linux-3.3.8/fs/ext2/ialloc.c linux-3.3.8-vs2.3.3.4/fs/ext2/ialloc.c
+--- linux-3.3.8/fs/ext2/ialloc.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext2/ialloc.c	2012-02-24 03:55:06.000000000 +0100
+@@ -17,6 +17,7 @@
+ #include <linux/backing-dev.h>
+ #include <linux/buffer_head.h>
+ #include <linux/random.h>
++#include <linux/vs_tag.h>
+ #include "ext2.h"
+ #include "xattr.h"
+ #include "acl.h"
+@@ -549,6 +550,7 @@ got:
+ 		inode->i_mode = mode;
+ 		inode->i_uid = current_fsuid();
+ 		inode->i_gid = dir->i_gid;
++		inode->i_tag = dx_current_fstag(sb);
+ 	} else
+ 		inode_init_owner(inode, dir, mode);
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ext2/inode.c linux-3.3.8-vs2.3.3.4/fs/ext2/inode.c
+--- linux-3.3.8/fs/ext2/inode.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext2/inode.c	2012-02-24 03:55:06.000000000 +0100
+@@ -31,6 +31,7 @@
+ #include <linux/mpage.h>
+ #include <linux/fiemap.h>
+ #include <linux/namei.h>
++#include <linux/vs_tag.h>
+ #include "ext2.h"
+ #include "acl.h"
+ #include "xip.h"
+@@ -1162,7 +1163,7 @@ static void ext2_truncate_blocks(struct 
+ 		return;
+ 	if (ext2_inode_is_fast_symlink(inode))
+ 		return;
+-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
++	if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
+ 		return;
+ 	__ext2_truncate_blocks(inode, offset);
+ }
+@@ -1253,36 +1254,61 @@ void ext2_set_inode_flags(struct inode *
+ {
+ 	unsigned int flags = EXT2_I(inode)->i_flags;
+ 
+-	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
++	inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
++		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
++
++
++	if (flags & EXT2_IMMUTABLE_FL)
++		inode->i_flags |= S_IMMUTABLE;
++	if (flags & EXT2_IXUNLINK_FL)
++		inode->i_flags |= S_IXUNLINK;
++
+ 	if (flags & EXT2_SYNC_FL)
+ 		inode->i_flags |= S_SYNC;
+ 	if (flags & EXT2_APPEND_FL)
+ 		inode->i_flags |= S_APPEND;
+-	if (flags & EXT2_IMMUTABLE_FL)
+-		inode->i_flags |= S_IMMUTABLE;
+ 	if (flags & EXT2_NOATIME_FL)
+ 		inode->i_flags |= S_NOATIME;
+ 	if (flags & EXT2_DIRSYNC_FL)
+ 		inode->i_flags |= S_DIRSYNC;
++
++	inode->i_vflags &= ~(V_BARRIER | V_COW);
++
++	if (flags & EXT2_BARRIER_FL)
++		inode->i_vflags |= V_BARRIER;
++	if (flags & EXT2_COW_FL)
++		inode->i_vflags |= V_COW;
+ }
+ 
+ /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
+ void ext2_get_inode_flags(struct ext2_inode_info *ei)
+ {
+ 	unsigned int flags = ei->vfs_inode.i_flags;
++	unsigned int vflags = ei->vfs_inode.i_vflags;
++
++	ei->i_flags &= ~(EXT2_SYNC_FL | EXT2_APPEND_FL |
++			EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL |
++			EXT2_NOATIME_FL | EXT2_DIRSYNC_FL |
++			EXT2_BARRIER_FL | EXT2_COW_FL);
++
++	if (flags & S_IMMUTABLE)
++		ei->i_flags |= EXT2_IMMUTABLE_FL;
++	if (flags & S_IXUNLINK)
++		ei->i_flags |= EXT2_IXUNLINK_FL;
+ 
+-	ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
+-			EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
+ 	if (flags & S_SYNC)
+ 		ei->i_flags |= EXT2_SYNC_FL;
+ 	if (flags & S_APPEND)
+ 		ei->i_flags |= EXT2_APPEND_FL;
+-	if (flags & S_IMMUTABLE)
+-		ei->i_flags |= EXT2_IMMUTABLE_FL;
+ 	if (flags & S_NOATIME)
+ 		ei->i_flags |= EXT2_NOATIME_FL;
+ 	if (flags & S_DIRSYNC)
+ 		ei->i_flags |= EXT2_DIRSYNC_FL;
++
++	if (vflags & V_BARRIER)
++		ei->i_flags |= EXT2_BARRIER_FL;
++	if (vflags & V_COW)
++		ei->i_flags |= EXT2_COW_FL;
+ }
+ 
+ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
+@@ -1292,6 +1318,8 @@ struct inode *ext2_iget (struct super_bl
+ 	struct ext2_inode *raw_inode;
+ 	struct inode *inode;
+ 	long ret = -EIO;
++	uid_t uid;
++	gid_t gid;
+ 	int n;
+ 
+ 	inode = iget_locked(sb, ino);
+@@ -1310,12 +1338,16 @@ struct inode *ext2_iget (struct super_bl
+ 	}
+ 
+ 	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+-	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+-	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
++	uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
++	gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ 	if (!(test_opt (inode->i_sb, NO_UID32))) {
+-		inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+-		inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
++		uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
++		gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+ 	}
++	inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++	inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++	inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
++		le16_to_cpu(raw_inode->i_raw_tag));
+ 	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
+ 	inode->i_size = le32_to_cpu(raw_inode->i_size);
+ 	inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
+@@ -1413,8 +1445,8 @@ static int __ext2_write_inode(struct ino
+ 	struct ext2_inode_info *ei = EXT2_I(inode);
+ 	struct super_block *sb = inode->i_sb;
+ 	ino_t ino = inode->i_ino;
+-	uid_t uid = inode->i_uid;
+-	gid_t gid = inode->i_gid;
++	uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
++	gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
+ 	struct buffer_head * bh;
+ 	struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
+ 	int n;
+@@ -1450,6 +1482,9 @@ static int __ext2_write_inode(struct ino
+ 		raw_inode->i_uid_high = 0;
+ 		raw_inode->i_gid_high = 0;
+ 	}
++#ifdef CONFIG_TAGGING_INTERN
++	raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
++#endif
+ 	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+ 	raw_inode->i_size = cpu_to_le32(inode->i_size);
+ 	raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
+@@ -1530,7 +1565,8 @@ int ext2_setattr(struct dentry *dentry, 
+ 	if (is_quota_modification(inode, iattr))
+ 		dquot_initialize(inode);
+ 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
+-	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
++	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) ||
++	    (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) {
+ 		error = dquot_transfer(inode, iattr);
+ 		if (error)
+ 			return error;
+diff -NurpP --minimal linux-3.3.8/fs/ext2/ioctl.c linux-3.3.8-vs2.3.3.4/fs/ext2/ioctl.c
+--- linux-3.3.8/fs/ext2/ioctl.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext2/ioctl.c	2012-02-24 03:55:06.000000000 +0100
+@@ -17,6 +17,16 @@
+ #include <asm/uaccess.h>
+ 
+ 
++int ext2_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	ext2_get_inode_flags(EXT2_I(inode));
++	inode->i_ctime = CURRENT_TIME_SEC;
++	mark_inode_dirty(inode);
++	return 0;
++}
++
+ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_dentry->d_inode;
+@@ -51,6 +61,11 @@ long ext2_ioctl(struct file *filp, unsig
+ 
+ 		flags = ext2_mask_flags(inode->i_mode, flags);
+ 
++		if (IS_BARRIER(inode)) {
++			vxwprintk_task(1, "messing with the barrier.");
++			return -EACCES;
++		}
++
+ 		mutex_lock(&inode->i_mutex);
+ 		/* Is it quota file? Do not allow user to mess with it */
+ 		if (IS_NOQUOTA(inode)) {
+@@ -66,7 +81,9 @@ long ext2_ioctl(struct file *filp, unsig
+ 		 *
+ 		 * This test looks nicer. Thanks to Pauline Middelink
+ 		 */
+-		if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
++		if ((oldflags & EXT2_IMMUTABLE_FL) ||
++			((flags ^ oldflags) & (EXT2_APPEND_FL |
++			EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL))) {
+ 			if (!capable(CAP_LINUX_IMMUTABLE)) {
+ 				mutex_unlock(&inode->i_mutex);
+ 				ret = -EPERM;
+@@ -74,7 +91,7 @@ long ext2_ioctl(struct file *filp, unsig
+ 			}
+ 		}
+ 
+-		flags = flags & EXT2_FL_USER_MODIFIABLE;
++		flags &= EXT2_FL_USER_MODIFIABLE;
+ 		flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
+ 		ei->i_flags = flags;
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ext2/namei.c linux-3.3.8-vs2.3.3.4/fs/ext2/namei.c
+--- linux-3.3.8/fs/ext2/namei.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext2/namei.c	2012-02-24 03:55:06.000000000 +0100
+@@ -32,6 +32,7 @@
+ 
+ #include <linux/pagemap.h>
+ #include <linux/quotaops.h>
++#include <linux/vs_tag.h>
+ #include "ext2.h"
+ #include "xattr.h"
+ #include "acl.h"
+@@ -73,6 +74,7 @@ static struct dentry *ext2_lookup(struct
+ 					(unsigned long) ino);
+ 			return ERR_PTR(-EIO);
+ 		}
++		dx_propagate_tag(nd, inode);
+ 	}
+ 	return d_splice_alias(inode, dentry);
+ }
+@@ -408,6 +410,7 @@ const struct inode_operations ext2_dir_i
+ 	.removexattr	= generic_removexattr,
+ #endif
+ 	.setattr	= ext2_setattr,
++	.sync_flags	= ext2_sync_flags,
+ 	.get_acl	= ext2_get_acl,
+ };
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ext2/super.c linux-3.3.8-vs2.3.3.4/fs/ext2/super.c
+--- linux-3.3.8/fs/ext2/super.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext2/super.c	2012-02-24 03:55:06.000000000 +0100
+@@ -393,7 +393,8 @@ enum {
+ 	Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
+ 	Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
+ 	Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
+-	Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
++	Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation,
++	Opt_tag, Opt_notag, Opt_tagid
+ };
+ 
+ static const match_table_t tokens = {
+@@ -421,6 +422,9 @@ static const match_table_t tokens = {
+ 	{Opt_acl, "acl"},
+ 	{Opt_noacl, "noacl"},
+ 	{Opt_xip, "xip"},
++	{Opt_tag, "tag"},
++	{Opt_notag, "notag"},
++	{Opt_tagid, "tagid=%u"},
+ 	{Opt_grpquota, "grpquota"},
+ 	{Opt_ignore, "noquota"},
+ 	{Opt_quota, "quota"},
+@@ -491,6 +495,20 @@ static int parse_options(char *options, 
+ 		case Opt_nouid32:
+ 			set_opt (sbi->s_mount_opt, NO_UID32);
+ 			break;
++#ifndef CONFIG_TAGGING_NONE
++		case Opt_tag:
++			set_opt (sbi->s_mount_opt, TAGGED);
++			break;
++		case Opt_notag:
++			clear_opt (sbi->s_mount_opt, TAGGED);
++			break;
++#endif
++#ifdef CONFIG_PROPAGATE
++		case Opt_tagid:
++			/* use args[0] */
++			set_opt (sbi->s_mount_opt, TAGGED);
++			break;
++#endif
+ 		case Opt_nocheck:
+ 			clear_opt (sbi->s_mount_opt, CHECK);
+ 			break;
+@@ -849,6 +867,8 @@ static int ext2_fill_super(struct super_
+ 	if (!parse_options((char *) data, sb))
+ 		goto failed_mount;
+ 
++	if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGGED)
++		sb->s_flags |= MS_TAGGED;
+ 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ 		((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
+ 		 MS_POSIXACL : 0);
+@@ -1223,6 +1243,14 @@ static int ext2_remount (struct super_bl
+ 		goto restore_opts;
+ 	}
+ 
++	if ((sbi->s_mount_opt & EXT2_MOUNT_TAGGED) &&
++		!(sb->s_flags & MS_TAGGED)) {
++		printk("EXT2-fs: %s: tagging not permitted on remount.\n",
++		       sb->s_id);
++		err = -EINVAL;
++		goto restore_opts;
++	}
++
+ 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ 		((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ext3/file.c linux-3.3.8-vs2.3.3.4/fs/ext3/file.c
+--- linux-3.3.8/fs/ext3/file.c	2011-10-24 18:45:27.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/ext3/file.c	2012-02-24 03:55:06.000000000 +0100
+@@ -80,5 +80,6 @@ const struct inode_operations ext3_file_
+ #endif
+ 	.get_acl	= ext3_get_acl,
+ 	.fiemap		= ext3_fiemap,
++	.sync_flags	= ext3_sync_flags,
+ };
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ext3/ialloc.c linux-3.3.8-vs2.3.3.4/fs/ext3/ialloc.c
+--- linux-3.3.8/fs/ext3/ialloc.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext3/ialloc.c	2012-02-24 03:55:06.000000000 +0100
+@@ -23,6 +23,7 @@
+ #include <linux/buffer_head.h>
+ #include <linux/random.h>
+ #include <linux/bitops.h>
++#include <linux/vs_tag.h>
+ #include <trace/events/ext3.h>
+ 
+ #include <asm/byteorder.h>
+@@ -496,6 +497,7 @@ got:
+ 		inode->i_mode = mode;
+ 		inode->i_uid = current_fsuid();
+ 		inode->i_gid = dir->i_gid;
++		inode->i_tag = dx_current_fstag(sb);
+ 	} else
+ 		inode_init_owner(inode, dir, mode);
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ext3/inode.c linux-3.3.8-vs2.3.3.4/fs/ext3/inode.c
+--- linux-3.3.8/fs/ext3/inode.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext3/inode.c	2012-02-24 03:55:06.000000000 +0100
+@@ -37,6 +37,7 @@
+ #include <linux/bio.h>
+ #include <linux/fiemap.h>
+ #include <linux/namei.h>
++#include <linux/vs_tag.h>
+ #include <trace/events/ext3.h>
+ #include "xattr.h"
+ #include "acl.h"
+@@ -2855,36 +2856,60 @@ void ext3_set_inode_flags(struct inode *
+ {
+ 	unsigned int flags = EXT3_I(inode)->i_flags;
+ 
+-	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
++	inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
++		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
++
++	if (flags & EXT3_IMMUTABLE_FL)
++		inode->i_flags |= S_IMMUTABLE;
++	if (flags & EXT3_IXUNLINK_FL)
++		inode->i_flags |= S_IXUNLINK;
++
+ 	if (flags & EXT3_SYNC_FL)
+ 		inode->i_flags |= S_SYNC;
+ 	if (flags & EXT3_APPEND_FL)
+ 		inode->i_flags |= S_APPEND;
+-	if (flags & EXT3_IMMUTABLE_FL)
+-		inode->i_flags |= S_IMMUTABLE;
+ 	if (flags & EXT3_NOATIME_FL)
+ 		inode->i_flags |= S_NOATIME;
+ 	if (flags & EXT3_DIRSYNC_FL)
+ 		inode->i_flags |= S_DIRSYNC;
++
++	inode->i_vflags &= ~(V_BARRIER | V_COW);
++
++	if (flags & EXT3_BARRIER_FL)
++		inode->i_vflags |= V_BARRIER;
++	if (flags & EXT3_COW_FL)
++		inode->i_vflags |= V_COW;
+ }
+ 
+ /* Propagate flags from i_flags to EXT3_I(inode)->i_flags */
+ void ext3_get_inode_flags(struct ext3_inode_info *ei)
+ {
+ 	unsigned int flags = ei->vfs_inode.i_flags;
++	unsigned int vflags = ei->vfs_inode.i_vflags;
++
++	ei->i_flags &= ~(EXT3_SYNC_FL | EXT3_APPEND_FL |
++			EXT3_IMMUTABLE_FL | EXT3_IXUNLINK_FL |
++			EXT3_NOATIME_FL | EXT3_DIRSYNC_FL |
++			EXT3_BARRIER_FL | EXT3_COW_FL);
++
++	if (flags & S_IMMUTABLE)
++		ei->i_flags |= EXT3_IMMUTABLE_FL;
++	if (flags & S_IXUNLINK)
++		ei->i_flags |= EXT3_IXUNLINK_FL;
+ 
+-	ei->i_flags &= ~(EXT3_SYNC_FL|EXT3_APPEND_FL|
+-			EXT3_IMMUTABLE_FL|EXT3_NOATIME_FL|EXT3_DIRSYNC_FL);
+ 	if (flags & S_SYNC)
+ 		ei->i_flags |= EXT3_SYNC_FL;
+ 	if (flags & S_APPEND)
+ 		ei->i_flags |= EXT3_APPEND_FL;
+-	if (flags & S_IMMUTABLE)
+-		ei->i_flags |= EXT3_IMMUTABLE_FL;
+ 	if (flags & S_NOATIME)
+ 		ei->i_flags |= EXT3_NOATIME_FL;
+ 	if (flags & S_DIRSYNC)
+ 		ei->i_flags |= EXT3_DIRSYNC_FL;
++
++	if (vflags & V_BARRIER)
++		ei->i_flags |= EXT3_BARRIER_FL;
++	if (vflags & V_COW)
++		ei->i_flags |= EXT3_COW_FL;
+ }
+ 
+ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
+@@ -2898,6 +2923,8 @@ struct inode *ext3_iget(struct super_blo
+ 	transaction_t *transaction;
+ 	long ret;
+ 	int block;
++	uid_t uid;
++	gid_t gid;
+ 
+ 	inode = iget_locked(sb, ino);
+ 	if (!inode)
+@@ -2914,12 +2941,16 @@ struct inode *ext3_iget(struct super_blo
+ 	bh = iloc.bh;
+ 	raw_inode = ext3_raw_inode(&iloc);
+ 	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+-	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+-	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
++	uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
++	gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ 	if(!(test_opt (inode->i_sb, NO_UID32))) {
+-		inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+-		inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
++		uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
++		gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+ 	}
++	inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++	inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++	inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
++		le16_to_cpu(raw_inode->i_raw_tag));
+ 	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
+ 	inode->i_size = le32_to_cpu(raw_inode->i_size);
+ 	inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
+@@ -3074,6 +3105,8 @@ static int ext3_do_update_inode(handle_t
+ 	struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
+ 	struct ext3_inode_info *ei = EXT3_I(inode);
+ 	struct buffer_head *bh = iloc->bh;
++	uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
++	gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
+ 	int err = 0, rc, block;
+ 
+ again:
+@@ -3088,29 +3121,32 @@ again:
+ 	ext3_get_inode_flags(ei);
+ 	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
+ 	if(!(test_opt(inode->i_sb, NO_UID32))) {
+-		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
+-		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
++		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
++		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
+ /*
+  * Fix up interoperability with old kernels. Otherwise, old inodes get
+  * re-used with the upper 16 bits of the uid/gid intact
+  */
+ 		if(!ei->i_dtime) {
+ 			raw_inode->i_uid_high =
+-				cpu_to_le16(high_16_bits(inode->i_uid));
++				cpu_to_le16(high_16_bits(uid));
+ 			raw_inode->i_gid_high =
+-				cpu_to_le16(high_16_bits(inode->i_gid));
++				cpu_to_le16(high_16_bits(gid));
+ 		} else {
+ 			raw_inode->i_uid_high = 0;
+ 			raw_inode->i_gid_high = 0;
+ 		}
+ 	} else {
+ 		raw_inode->i_uid_low =
+-			cpu_to_le16(fs_high2lowuid(inode->i_uid));
++			cpu_to_le16(fs_high2lowuid(uid));
+ 		raw_inode->i_gid_low =
+-			cpu_to_le16(fs_high2lowgid(inode->i_gid));
++			cpu_to_le16(fs_high2lowgid(gid));
+ 		raw_inode->i_uid_high = 0;
+ 		raw_inode->i_gid_high = 0;
+ 	}
++#ifdef CONFIG_TAGGING_INTERN
++	raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
++#endif
+ 	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+ 	raw_inode->i_size = cpu_to_le32(ei->i_disksize);
+ 	raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
+@@ -3270,7 +3306,8 @@ int ext3_setattr(struct dentry *dentry, 
+ 	if (is_quota_modification(inode, attr))
+ 		dquot_initialize(inode);
+ 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+-		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
++		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
++		(ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
+ 		handle_t *handle;
+ 
+ 		/* (user+group)*(old+new) structure, inode write (sb,
+@@ -3292,6 +3329,8 @@ int ext3_setattr(struct dentry *dentry, 
+ 			inode->i_uid = attr->ia_uid;
+ 		if (attr->ia_valid & ATTR_GID)
+ 			inode->i_gid = attr->ia_gid;
++		if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
++			inode->i_tag = attr->ia_tag;
+ 		error = ext3_mark_inode_dirty(handle, inode);
+ 		ext3_journal_stop(handle);
+ 	}
+diff -NurpP --minimal linux-3.3.8/fs/ext3/ioctl.c linux-3.3.8-vs2.3.3.4/fs/ext3/ioctl.c
+--- linux-3.3.8/fs/ext3/ioctl.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext3/ioctl.c	2012-02-24 03:55:06.000000000 +0100
+@@ -8,6 +8,7 @@
+  */
+ 
+ #include <linux/fs.h>
++#include <linux/mount.h>
+ #include <linux/jbd.h>
+ #include <linux/capability.h>
+ #include <linux/ext3_fs.h>
+@@ -17,6 +18,34 @@
+ #include <linux/compat.h>
+ #include <asm/uaccess.h>
+ 
++
++int ext3_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	handle_t *handle = NULL;
++	struct ext3_iloc iloc;
++	int err;
++
++	handle = ext3_journal_start(inode, 1);
++	if (IS_ERR(handle))
++		return PTR_ERR(handle);
++
++	if (IS_SYNC(inode))
++		handle->h_sync = 1;
++	err = ext3_reserve_inode_write(handle, inode, &iloc);
++	if (err)
++		goto flags_err;
++
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	ext3_get_inode_flags(EXT3_I(inode));
++	inode->i_ctime = CURRENT_TIME_SEC;
++
++	err = ext3_mark_iloc_dirty(handle, inode, &iloc);
++flags_err:
++	ext3_journal_stop(handle);
++	return err;
++}
++
+ long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_dentry->d_inode;
+@@ -50,6 +79,11 @@ long ext3_ioctl(struct file *filp, unsig
+ 
+ 		flags = ext3_mask_flags(inode->i_mode, flags);
+ 
++		if (IS_BARRIER(inode)) {
++			vxwprintk_task(1, "messing with the barrier.");
++			return -EACCES;
++		}
++
+ 		mutex_lock(&inode->i_mutex);
+ 
+ 		/* Is it quota file? Do not allow user to mess with it */
+@@ -68,7 +102,9 @@ long ext3_ioctl(struct file *filp, unsig
+ 		 *
+ 		 * This test looks nicer. Thanks to Pauline Middelink
+ 		 */
+-		if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
++		if ((oldflags & EXT3_IMMUTABLE_FL) ||
++			((flags ^ oldflags) & (EXT3_APPEND_FL |
++			EXT3_IMMUTABLE_FL | EXT3_IXUNLINK_FL))) {
+ 			if (!capable(CAP_LINUX_IMMUTABLE))
+ 				goto flags_out;
+ 		}
+@@ -93,7 +129,7 @@ long ext3_ioctl(struct file *filp, unsig
+ 		if (err)
+ 			goto flags_err;
+ 
+-		flags = flags & EXT3_FL_USER_MODIFIABLE;
++		flags &= EXT3_FL_USER_MODIFIABLE;
+ 		flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE;
+ 		ei->i_flags = flags;
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ext3/namei.c linux-3.3.8-vs2.3.3.4/fs/ext3/namei.c
+--- linux-3.3.8/fs/ext3/namei.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext3/namei.c	2012-02-24 03:55:06.000000000 +0100
+@@ -36,6 +36,7 @@
+ #include <linux/quotaops.h>
+ #include <linux/buffer_head.h>
+ #include <linux/bio.h>
++#include <linux/vs_tag.h>
+ #include <trace/events/ext3.h>
+ 
+ #include "namei.h"
+@@ -927,6 +928,7 @@ restart:
+ 					submit_bh(READ | REQ_META | REQ_PRIO,
+ 						  bh);
+ 				}
++		dx_propagate_tag(nd, inode);
+ 			}
+ 		}
+ 		if ((bh = bh_use[ra_ptr++]) == NULL)
+@@ -2538,6 +2540,7 @@ const struct inode_operations ext3_dir_i
+ 	.listxattr	= ext3_listxattr,
+ 	.removexattr	= generic_removexattr,
+ #endif
++	.sync_flags	= ext3_sync_flags,
+ 	.get_acl	= ext3_get_acl,
+ };
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ext3/super.c linux-3.3.8-vs2.3.3.4/fs/ext3/super.c
+--- linux-3.3.8/fs/ext3/super.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext3/super.c	2012-02-24 03:55:06.000000000 +0100
+@@ -830,7 +830,8 @@ enum {
+ 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+ 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
+ 	Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
+-	Opt_resize, Opt_usrquota, Opt_grpquota
++	Opt_resize, Opt_usrquota, Opt_grpquota,
++	Opt_tag, Opt_notag, Opt_tagid
+ };
+ 
+ static const match_table_t tokens = {
+@@ -887,6 +888,9 @@ static const match_table_t tokens = {
+ 	{Opt_barrier, "barrier"},
+ 	{Opt_nobarrier, "nobarrier"},
+ 	{Opt_resize, "resize"},
++	{Opt_tag, "tag"},
++	{Opt_notag, "notag"},
++	{Opt_tagid, "tagid=%u"},
+ 	{Opt_err, NULL},
+ };
+ 
+@@ -1039,6 +1043,20 @@ static int parse_options (char *options,
+ 		case Opt_nouid32:
+ 			set_opt (sbi->s_mount_opt, NO_UID32);
+ 			break;
++#ifndef CONFIG_TAGGING_NONE
++		case Opt_tag:
++			set_opt (sbi->s_mount_opt, TAGGED);
++			break;
++		case Opt_notag:
++			clear_opt (sbi->s_mount_opt, TAGGED);
++			break;
++#endif
++#ifdef CONFIG_PROPAGATE
++		case Opt_tagid:
++			/* use args[0] */
++			set_opt (sbi->s_mount_opt, TAGGED);
++			break;
++#endif
+ 		case Opt_nocheck:
+ 			clear_opt (sbi->s_mount_opt, CHECK);
+ 			break;
+@@ -1737,6 +1755,9 @@ static int ext3_fill_super (struct super
+ 			    NULL, 0))
+ 		goto failed_mount;
+ 
++	if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAGGED)
++		sb->s_flags |= MS_TAGGED;
++
+ 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ 		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
+ 
+@@ -2619,6 +2640,14 @@ static int ext3_remount (struct super_bl
+ 	if (test_opt(sb, ABORT))
+ 		ext3_abort(sb, __func__, "Abort forced by user");
+ 
++	if ((sbi->s_mount_opt & EXT3_MOUNT_TAGGED) &&
++		!(sb->s_flags & MS_TAGGED)) {
++		printk("EXT3-fs: %s: tagging not permitted on remount.\n",
++			sb->s_id);
++		err = -EINVAL;
++		goto restore_opts;
++	}
++
+ 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ 		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ext4/ext4.h linux-3.3.8-vs2.3.3.4/fs/ext4/ext4.h
+--- linux-3.3.8/fs/ext4/ext4.h	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/ext4/ext4.h	2012-04-23 23:45:14.000000000 +0200
+@@ -373,8 +373,12 @@ struct flex_groups {
+ #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
+ #define EXT4_EA_INODE_FL	        0x00200000 /* Inode used for large EA */
+ #define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF */
++#define EXT4_IXUNLINK_FL		0x08000000 /* Immutable invert on unlink */
+ #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
+ 
++#define EXT4_BARRIER_FL			0x04000000 /* Barrier for chroot() */
++#define EXT4_COW_FL			0x20000000 /* Copy on Write marker */
++
+ #define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
+ #define EXT4_FL_USER_MODIFIABLE		0x004B80FF /* User modifiable flags */
+ 
+@@ -643,7 +647,8 @@ struct ext4_inode {
+ 			__le16	l_i_file_acl_high;
+ 			__le16	l_i_uid_high;	/* these 2 fields */
+ 			__le16	l_i_gid_high;	/* were reserved2[0] */
+-			__u32	l_i_reserved2;
++			__le16	l_i_tag;	/* Context Tag */
++			__u16	l_i_reserved2;
+ 		} linux2;
+ 		struct {
+ 			__le16	h_i_reserved1;	/* Obsoleted fragment number/size which are removed in ext4 */
+@@ -761,6 +766,7 @@ do {									       \
+ #define i_gid_low	i_gid
+ #define i_uid_high	osd2.linux2.l_i_uid_high
+ #define i_gid_high	osd2.linux2.l_i_gid_high
++#define i_raw_tag	osd2.linux2.l_i_tag
+ #define i_reserved2	osd2.linux2.l_i_reserved2
+ 
+ #elif defined(__GNU__)
+@@ -937,6 +943,7 @@ struct ext4_inode_info {
+ #define EXT4_MOUNT_POSIX_ACL		0x08000	/* POSIX Access Control Lists */
+ #define EXT4_MOUNT_NO_AUTO_DA_ALLOC	0x10000	/* No auto delalloc mapping */
+ #define EXT4_MOUNT_BARRIER		0x20000 /* Use block barriers */
++#define EXT4_MOUNT_TAGGED		0x40000 /* Enable Context Tags */
+ #define EXT4_MOUNT_QUOTA		0x80000 /* Some quota option set */
+ #define EXT4_MOUNT_USRQUOTA		0x100000 /* "old" user quota */
+ #define EXT4_MOUNT_GRPQUOTA		0x200000 /* "old" group quota */
+@@ -2274,6 +2281,7 @@ extern int ext4_map_blocks(handle_t *han
+ 			   struct ext4_map_blocks *map, int flags);
+ extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ 			__u64 start, __u64 len);
++extern int ext4_sync_flags(struct inode *, int, int);
+ /* move_extent.c */
+ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ 			     __u64 start_orig, __u64 start_donor,
+diff -NurpP --minimal linux-3.3.8/fs/ext4/file.c linux-3.3.8-vs2.3.3.4/fs/ext4/file.c
+--- linux-3.3.8/fs/ext4/file.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext4/file.c	2012-02-24 03:55:06.000000000 +0100
+@@ -258,5 +258,6 @@ const struct inode_operations ext4_file_
+ #endif
+ 	.get_acl	= ext4_get_acl,
+ 	.fiemap		= ext4_fiemap,
++	.sync_flags	= ext4_sync_flags,
+ };
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ext4/ialloc.c linux-3.3.8-vs2.3.3.4/fs/ext4/ialloc.c
+--- linux-3.3.8/fs/ext4/ialloc.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext4/ialloc.c	2012-02-24 03:55:06.000000000 +0100
+@@ -22,6 +22,7 @@
+ #include <linux/random.h>
+ #include <linux/bitops.h>
+ #include <linux/blkdev.h>
++#include <linux/vs_tag.h>
+ #include <asm/byteorder.h>
+ 
+ #include "ext4.h"
+@@ -860,6 +861,7 @@ got:
+ 		inode->i_mode = mode;
+ 		inode->i_uid = current_fsuid();
+ 		inode->i_gid = dir->i_gid;
++		inode->i_tag = dx_current_fstag(sb);
+ 	} else
+ 		inode_init_owner(inode, dir, mode);
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ext4/inode.c linux-3.3.8-vs2.3.3.4/fs/ext4/inode.c
+--- linux-3.3.8/fs/ext4/inode.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/ext4/inode.c	2012-04-03 03:02:12.000000000 +0200
+@@ -37,6 +37,7 @@
+ #include <linux/printk.h>
+ #include <linux/slab.h>
+ #include <linux/ratelimit.h>
++#include <linux/vs_tag.h>
+ 
+ #include "ext4_jbd2.h"
+ #include "xattr.h"
+@@ -3557,41 +3558,64 @@ void ext4_set_inode_flags(struct inode *
+ {
+ 	unsigned int flags = EXT4_I(inode)->i_flags;
+ 
+-	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
++	inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
++		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
++
++	if (flags & EXT4_IMMUTABLE_FL)
++		inode->i_flags |= S_IMMUTABLE;
++	if (flags & EXT4_IXUNLINK_FL)
++		inode->i_flags |= S_IXUNLINK;
++
+ 	if (flags & EXT4_SYNC_FL)
+ 		inode->i_flags |= S_SYNC;
+ 	if (flags & EXT4_APPEND_FL)
+ 		inode->i_flags |= S_APPEND;
+-	if (flags & EXT4_IMMUTABLE_FL)
+-		inode->i_flags |= S_IMMUTABLE;
+ 	if (flags & EXT4_NOATIME_FL)
+ 		inode->i_flags |= S_NOATIME;
+ 	if (flags & EXT4_DIRSYNC_FL)
+ 		inode->i_flags |= S_DIRSYNC;
++
++	inode->i_vflags &= ~(V_BARRIER | V_COW);
++
++	if (flags & EXT4_BARRIER_FL)
++		inode->i_vflags |= V_BARRIER;
++	if (flags & EXT4_COW_FL)
++		inode->i_vflags |= V_COW;
+ }
+ 
+ /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
+ void ext4_get_inode_flags(struct ext4_inode_info *ei)
+ {
+-	unsigned int vfs_fl;
++	unsigned int vfs_fl, vfs_vf;
+ 	unsigned long old_fl, new_fl;
+ 
+ 	do {
+ 		vfs_fl = ei->vfs_inode.i_flags;
++		vfs_vf = ei->vfs_inode.i_vflags;
+ 		old_fl = ei->i_flags;
+ 		new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
+ 				EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
+-				EXT4_DIRSYNC_FL);
++				EXT4_DIRSYNC_FL|EXT4_BARRIER_FL|
++				EXT4_COW_FL);
++
++		if (vfs_fl & S_IMMUTABLE)
++			new_fl |= EXT4_IMMUTABLE_FL;
++		if (vfs_fl & S_IXUNLINK)
++			new_fl |= EXT4_IXUNLINK_FL;
++
+ 		if (vfs_fl & S_SYNC)
+ 			new_fl |= EXT4_SYNC_FL;
+ 		if (vfs_fl & S_APPEND)
+ 			new_fl |= EXT4_APPEND_FL;
+-		if (vfs_fl & S_IMMUTABLE)
+-			new_fl |= EXT4_IMMUTABLE_FL;
+ 		if (vfs_fl & S_NOATIME)
+ 			new_fl |= EXT4_NOATIME_FL;
+ 		if (vfs_fl & S_DIRSYNC)
+ 			new_fl |= EXT4_DIRSYNC_FL;
++
++		if (vfs_vf & V_BARRIER)
++			new_fl |= EXT4_BARRIER_FL;
++		if (vfs_vf & V_COW)
++			new_fl |= EXT4_COW_FL;
+ 	} while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
+ }
+ 
+@@ -3627,6 +3651,8 @@ struct inode *ext4_iget(struct super_blo
+ 	journal_t *journal = EXT4_SB(sb)->s_journal;
+ 	long ret;
+ 	int block;
++	uid_t uid;
++	gid_t gid;
+ 
+ 	inode = iget_locked(sb, ino);
+ 	if (!inode)
+@@ -3642,12 +3668,16 @@ struct inode *ext4_iget(struct super_blo
+ 		goto bad_inode;
+ 	raw_inode = ext4_raw_inode(&iloc);
+ 	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+-	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+-	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
++	uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
++	gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ 	if (!(test_opt(inode->i_sb, NO_UID32))) {
+-		inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+-		inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
++		uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
++		gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+ 	}
++	inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++	inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++	inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
++		le16_to_cpu(raw_inode->i_raw_tag));
+ 	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
+ 
+ 	ext4_clear_state_flags(ei);	/* Only relevant on 32-bit archs */
+@@ -3866,6 +3896,8 @@ static int ext4_do_update_inode(handle_t
+ 	struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
+ 	struct ext4_inode_info *ei = EXT4_I(inode);
+ 	struct buffer_head *bh = iloc->bh;
++	uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
++	gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
+ 	int err = 0, rc, block;
+ 
+ 	/* For fields not not tracking in the in-memory inode,
+@@ -3876,29 +3908,32 @@ static int ext4_do_update_inode(handle_t
+ 	ext4_get_inode_flags(ei);
+ 	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
+ 	if (!(test_opt(inode->i_sb, NO_UID32))) {
+-		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
+-		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
++		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
++		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
+ /*
+  * Fix up interoperability with old kernels. Otherwise, old inodes get
+  * re-used with the upper 16 bits of the uid/gid intact
+  */
+ 		if (!ei->i_dtime) {
+ 			raw_inode->i_uid_high =
+-				cpu_to_le16(high_16_bits(inode->i_uid));
++				cpu_to_le16(high_16_bits(uid));
+ 			raw_inode->i_gid_high =
+-				cpu_to_le16(high_16_bits(inode->i_gid));
++				cpu_to_le16(high_16_bits(gid));
+ 		} else {
+ 			raw_inode->i_uid_high = 0;
+ 			raw_inode->i_gid_high = 0;
+ 		}
+ 	} else {
+ 		raw_inode->i_uid_low =
+-			cpu_to_le16(fs_high2lowuid(inode->i_uid));
++			cpu_to_le16(fs_high2lowuid(uid));
+ 		raw_inode->i_gid_low =
+-			cpu_to_le16(fs_high2lowgid(inode->i_gid));
++			cpu_to_le16(fs_high2lowgid(gid));
+ 		raw_inode->i_uid_high = 0;
+ 		raw_inode->i_gid_high = 0;
+ 	}
++#ifdef CONFIG_TAGGING_INTERN
++	raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
++#endif
+ 	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+ 
+ 	EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+@@ -4084,7 +4119,8 @@ int ext4_setattr(struct dentry *dentry, 
+ 	if (is_quota_modification(inode, attr))
+ 		dquot_initialize(inode);
+ 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+-		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
++		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
++		(ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
+ 		handle_t *handle;
+ 
+ 		/* (user+group)*(old+new) structure, inode write (sb,
+@@ -4106,6 +4142,8 @@ int ext4_setattr(struct dentry *dentry, 
+ 			inode->i_uid = attr->ia_uid;
+ 		if (attr->ia_valid & ATTR_GID)
+ 			inode->i_gid = attr->ia_gid;
++		if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
++			inode->i_tag = attr->ia_tag;
+ 		error = ext4_mark_inode_dirty(handle, inode);
+ 		ext4_journal_stop(handle);
+ 	}
+diff -NurpP --minimal linux-3.3.8/fs/ext4/ioctl.c linux-3.3.8-vs2.3.3.4/fs/ext4/ioctl.c
+--- linux-3.3.8/fs/ext4/ioctl.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext4/ioctl.c	2012-02-24 04:29:04.000000000 +0100
+@@ -14,12 +14,40 @@
+ #include <linux/compat.h>
+ #include <linux/mount.h>
+ #include <linux/file.h>
++#include <linux/vs_tag.h>
+ #include <asm/uaccess.h>
+ #include "ext4_jbd2.h"
+ #include "ext4.h"
+ 
+ #define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
+ 
++int ext4_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	handle_t *handle = NULL;
++	struct ext4_iloc iloc;
++	int err;
++
++	handle = ext4_journal_start(inode, 1);
++	if (IS_ERR(handle))
++		return PTR_ERR(handle);
++
++	if (IS_SYNC(inode))
++		ext4_handle_sync(handle);
++	err = ext4_reserve_inode_write(handle, inode, &iloc);
++	if (err)
++		goto flags_err;
++
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	ext4_get_inode_flags(EXT4_I(inode));
++	inode->i_ctime = ext4_current_time(inode);
++
++	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
++flags_err:
++	ext4_journal_stop(handle);
++	return err;
++}
++
+ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_dentry->d_inode;
+@@ -53,6 +81,11 @@ long ext4_ioctl(struct file *filp, unsig
+ 
+ 		flags = ext4_mask_flags(inode->i_mode, flags);
+ 
++		if (IS_BARRIER(inode)) {
++			vxwprintk_task(1, "messing with the barrier.");
++			return -EACCES;
++		}
++
+ 		err = -EPERM;
+ 		mutex_lock(&inode->i_mutex);
+ 		/* Is it quota file? Do not allow user to mess with it */
+@@ -70,7 +103,9 @@ long ext4_ioctl(struct file *filp, unsig
+ 		 *
+ 		 * This test looks nicer. Thanks to Pauline Middelink
+ 		 */
+-		if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
++		if ((oldflags & EXT4_IMMUTABLE_FL) ||
++			((flags ^ oldflags) & (EXT4_APPEND_FL |
++			EXT4_IMMUTABLE_FL | EXT4_IXUNLINK_FL))) {
+ 			if (!capable(CAP_LINUX_IMMUTABLE))
+ 				goto flags_out;
+ 		}
+diff -NurpP --minimal linux-3.3.8/fs/ext4/namei.c linux-3.3.8-vs2.3.3.4/fs/ext4/namei.c
+--- linux-3.3.8/fs/ext4/namei.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ext4/namei.c	2012-02-24 03:55:06.000000000 +0100
+@@ -34,6 +34,7 @@
+ #include <linux/quotaops.h>
+ #include <linux/buffer_head.h>
+ #include <linux/bio.h>
++#include <linux/vs_tag.h>
+ #include "ext4.h"
+ #include "ext4_jbd2.h"
+ 
+@@ -925,6 +926,7 @@ restart:
+ 					ll_rw_block(READ | REQ_META | REQ_PRIO,
+ 						    1, &bh);
+ 			}
++		dx_propagate_tag(nd, inode);
+ 		}
+ 		if ((bh = bh_use[ra_ptr++]) == NULL)
+ 			goto next;
+@@ -2587,6 +2589,7 @@ const struct inode_operations ext4_dir_i
+ #endif
+ 	.get_acl	= ext4_get_acl,
+ 	.fiemap         = ext4_fiemap,
++	.sync_flags	= ext4_sync_flags,
+ };
+ 
+ const struct inode_operations ext4_special_inode_operations = {
+diff -NurpP --minimal linux-3.3.8/fs/ext4/super.c linux-3.3.8-vs2.3.3.4/fs/ext4/super.c
+--- linux-3.3.8/fs/ext4/super.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/ext4/super.c	2012-04-23 23:45:14.000000000 +0200
+@@ -1333,6 +1333,7 @@ enum {
+ 	Opt_inode_readahead_blks, Opt_journal_ioprio,
+ 	Opt_dioread_nolock, Opt_dioread_lock,
+ 	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
++	Opt_tag, Opt_notag, Opt_tagid
+ };
+ 
+ static const match_table_t tokens = {
+@@ -1408,6 +1409,9 @@ static const match_table_t tokens = {
+ 	{Opt_init_itable, "init_itable=%u"},
+ 	{Opt_init_itable, "init_itable"},
+ 	{Opt_noinit_itable, "noinit_itable"},
++	{Opt_tag, "tag"},
++	{Opt_notag, "notag"},
++	{Opt_tagid, "tagid=%u"},
+ 	{Opt_err, NULL},
+ };
+ 
+@@ -1576,6 +1580,20 @@ static int parse_options(char *options, 
+ 		case Opt_nouid32:
+ 			set_opt(sb, NO_UID32);
+ 			break;
++#ifndef CONFIG_TAGGING_NONE
++		case Opt_tag:
++			set_opt(sb, TAGGED);
++			break;
++		case Opt_notag:
++			clear_opt(sb, TAGGED);
++			break;
++#endif
++#ifdef CONFIG_PROPAGATE
++		case Opt_tagid:
++			/* use args[0] */
++			set_opt(sb, TAGGED);
++			break;
++#endif
+ 		case Opt_debug:
+ 			set_opt(sb, DEBUG);
+ 			break;
+@@ -3260,6 +3278,9 @@ static int ext4_fill_super(struct super_
+ 		}
+ 	}
+ 
++	if (EXT4_SB(sb)->s_mount_opt & EXT4_MOUNT_TAGGED)
++		sb->s_flags |= MS_TAGGED;
++
+ 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ 		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
+ 
+@@ -4412,6 +4433,14 @@ static int ext4_remount(struct super_blo
+ 	if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
+ 		ext4_abort(sb, "Abort forced by user");
+ 
++	if ((sbi->s_mount_opt & EXT4_MOUNT_TAGGED) &&
++		!(sb->s_flags & MS_TAGGED)) {
++		printk("EXT4-fs: %s: tagging not permitted on remount.\n",
++			sb->s_id);
++		err = -EINVAL;
++		goto restore_opts;
++	}
++
+ 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ 		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
+ 
+diff -NurpP --minimal linux-3.3.8/fs/fcntl.c linux-3.3.8-vs2.3.3.4/fs/fcntl.c
+--- linux-3.3.8/fs/fcntl.c	2011-05-22 16:17:52.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/fcntl.c	2012-02-24 03:55:06.000000000 +0100
+@@ -20,6 +20,7 @@
+ #include <linux/signal.h>
+ #include <linux/rcupdate.h>
+ #include <linux/pid_namespace.h>
++#include <linux/vs_limit.h>
+ 
+ #include <asm/poll.h>
+ #include <asm/siginfo.h>
+@@ -103,6 +104,8 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldf
+ 
+ 	if (tofree)
+ 		filp_close(tofree, files);
++	else
++		vx_openfd_inc(newfd);	/* fd was unused */
+ 
+ 	return newfd;
+ 
+@@ -447,6 +450,8 @@ SYSCALL_DEFINE3(fcntl, unsigned int, fd,
+ 	filp = fget_raw(fd);
+ 	if (!filp)
+ 		goto out;
++	if (!vx_files_avail(1))
++		goto out;
+ 
+ 	if (unlikely(filp->f_mode & FMODE_PATH)) {
+ 		if (!check_fcntl_cmd(cmd)) {
+diff -NurpP --minimal linux-3.3.8/fs/file.c linux-3.3.8-vs2.3.3.4/fs/file.c
+--- linux-3.3.8/fs/file.c	2011-05-22 16:17:52.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/file.c	2012-02-24 03:55:06.000000000 +0100
+@@ -21,6 +21,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/rcupdate.h>
+ #include <linux/workqueue.h>
++#include <linux/vs_limit.h>
+ 
+ struct fdtable_defer {
+ 	spinlock_t lock;
+@@ -359,6 +360,8 @@ struct files_struct *dup_fd(struct files
+ 		struct file *f = *old_fds++;
+ 		if (f) {
+ 			get_file(f);
++			/* TODO: sum it first for check and performance */
++			vx_openfd_inc(open_files - i);
+ 		} else {
+ 			/*
+ 			 * The fd may be claimed in the fd bitmap but not yet
+@@ -466,6 +469,7 @@ repeat:
+ 	else
+ 		FD_CLR(fd, fdt->close_on_exec);
+ 	error = fd;
++	vx_openfd_inc(fd);
+ #if 1
+ 	/* Sanity check */
+ 	if (rcu_dereference_raw(fdt->fd[fd]) != NULL) {
+diff -NurpP --minimal linux-3.3.8/fs/file_table.c linux-3.3.8-vs2.3.3.4/fs/file_table.c
+--- linux-3.3.8/fs/file_table.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/file_table.c	2012-02-24 03:55:06.000000000 +0100
+@@ -24,6 +24,8 @@
+ #include <linux/percpu_counter.h>
+ #include <linux/percpu.h>
+ #include <linux/ima.h>
++#include <linux/vs_limit.h>
++#include <linux/vs_context.h>
+ 
+ #include <linux/atomic.h>
+ 
+@@ -135,6 +137,8 @@ struct file *get_empty_filp(void)
+ 	spin_lock_init(&f->f_lock);
+ 	eventpoll_init_file(f);
+ 	/* f->f_version: 0 */
++	f->f_xid = vx_current_xid();
++	vx_files_inc(f);
+ 	return f;
+ 
+ over:
+@@ -253,6 +257,8 @@ static void __fput(struct file *file)
+ 	}
+ 	fops_put(file->f_op);
+ 	put_pid(file->f_owner.pid);
++	vx_files_dec(file);
++	file->f_xid = 0;
+ 	file_sb_list_del(file);
+ 	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+ 		i_readcount_dec(inode);
+@@ -383,6 +389,8 @@ void put_filp(struct file *file)
+ {
+ 	if (atomic_long_dec_and_test(&file->f_count)) {
+ 		security_file_free(file);
++		vx_files_dec(file);
++		file->f_xid = 0;
+ 		file_sb_list_del(file);
+ 		file_free(file);
+ 	}
+diff -NurpP --minimal linux-3.3.8/fs/fs_struct.c linux-3.3.8-vs2.3.3.4/fs/fs_struct.c
+--- linux-3.3.8/fs/fs_struct.c	2011-03-15 18:07:31.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/fs_struct.c	2012-02-24 03:55:06.000000000 +0100
+@@ -4,6 +4,7 @@
+ #include <linux/path.h>
+ #include <linux/slab.h>
+ #include <linux/fs_struct.h>
++#include <linux/vserver/global.h>
+ #include "internal.h"
+ 
+ static inline void path_get_longterm(struct path *path)
+@@ -96,6 +97,7 @@ void free_fs_struct(struct fs_struct *fs
+ {
+ 	path_put_longterm(&fs->root);
+ 	path_put_longterm(&fs->pwd);
++	atomic_dec(&vs_global_fs);
+ 	kmem_cache_free(fs_cachep, fs);
+ }
+ 
+@@ -135,6 +137,7 @@ struct fs_struct *copy_fs_struct(struct 
+ 		fs->pwd = old->pwd;
+ 		path_get_longterm(&fs->pwd);
+ 		spin_unlock(&old->lock);
++		atomic_inc(&vs_global_fs);
+ 	}
+ 	return fs;
+ }
+diff -NurpP --minimal linux-3.3.8/fs/gfs2/file.c linux-3.3.8-vs2.3.3.4/fs/gfs2/file.c
+--- linux-3.3.8/fs/gfs2/file.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/gfs2/file.c	2012-02-24 03:55:06.000000000 +0100
+@@ -143,6 +143,9 @@ static const u32 fsflags_to_gfs2[32] = {
+ 	[7] = GFS2_DIF_NOATIME,
+ 	[12] = GFS2_DIF_EXHASH,
+ 	[14] = GFS2_DIF_INHERIT_JDATA,
++	[27] = GFS2_DIF_IXUNLINK,
++	[26] = GFS2_DIF_BARRIER,
++	[29] = GFS2_DIF_COW,
+ };
+ 
+ static const u32 gfs2_to_fsflags[32] = {
+@@ -152,6 +155,9 @@ static const u32 gfs2_to_fsflags[32] = {
+ 	[gfs2fl_NoAtime] = FS_NOATIME_FL,
+ 	[gfs2fl_ExHash] = FS_INDEX_FL,
+ 	[gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
++	[gfs2fl_IXUnlink] = FS_IXUNLINK_FL,
++	[gfs2fl_Barrier] = FS_BARRIER_FL,
++	[gfs2fl_Cow] = FS_COW_FL,
+ };
+ 
+ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
+@@ -182,12 +188,18 @@ void gfs2_set_inode_flags(struct inode *
+ {
+ 	struct gfs2_inode *ip = GFS2_I(inode);
+ 	unsigned int flags = inode->i_flags;
++	unsigned int vflags = inode->i_vflags;
++
++	flags &= ~(S_IMMUTABLE | S_IXUNLINK |
++		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC | S_NOSEC);
+ 
+-	flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
+ 	if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
+ 		inode->i_flags |= S_NOSEC;
+ 	if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
+ 		flags |= S_IMMUTABLE;
++	if (ip->i_diskflags & GFS2_DIF_IXUNLINK)
++		flags |= S_IXUNLINK;
++
+ 	if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
+ 		flags |= S_APPEND;
+ 	if (ip->i_diskflags & GFS2_DIF_NOATIME)
+@@ -195,6 +207,43 @@ void gfs2_set_inode_flags(struct inode *
+ 	if (ip->i_diskflags & GFS2_DIF_SYNC)
+ 		flags |= S_SYNC;
+ 	inode->i_flags = flags;
++
++	vflags &= ~(V_BARRIER | V_COW);
++
++	if (ip->i_diskflags & GFS2_DIF_BARRIER)
++		vflags |= V_BARRIER;
++	if (ip->i_diskflags & GFS2_DIF_COW)
++		vflags |= V_COW;
++	inode->i_vflags = vflags;
++}
++
++void gfs2_get_inode_flags(struct inode *inode)
++{
++	struct gfs2_inode *ip = GFS2_I(inode);
++	unsigned int flags = inode->i_flags;
++	unsigned int vflags = inode->i_vflags;
++
++	ip->i_diskflags &= ~(GFS2_DIF_APPENDONLY |
++			GFS2_DIF_NOATIME | GFS2_DIF_SYNC |
++			GFS2_DIF_IMMUTABLE | GFS2_DIF_IXUNLINK |
++			GFS2_DIF_BARRIER | GFS2_DIF_COW);
++
++	if (flags & S_IMMUTABLE)
++		ip->i_diskflags |= GFS2_DIF_IMMUTABLE;
++	if (flags & S_IXUNLINK)
++		ip->i_diskflags |= GFS2_DIF_IXUNLINK;
++
++	if (flags & S_APPEND)
++		ip->i_diskflags |= GFS2_DIF_APPENDONLY;
++	if (flags & S_NOATIME)
++		ip->i_diskflags |= GFS2_DIF_NOATIME;
++	if (flags & S_SYNC)
++		ip->i_diskflags |= GFS2_DIF_SYNC;
++
++	if (vflags & V_BARRIER)
++		ip->i_diskflags |= GFS2_DIF_BARRIER;
++	if (vflags & V_COW)
++		ip->i_diskflags |= GFS2_DIF_COW;
+ }
+ 
+ /* Flags that can be set by user space */
+@@ -306,6 +355,37 @@ static int gfs2_set_flags(struct file *f
+ 	return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
+ }
+ 
++int gfs2_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	struct gfs2_inode *ip = GFS2_I(inode);
++	struct gfs2_sbd *sdp = GFS2_SB(inode);
++	struct buffer_head *bh;
++	struct gfs2_holder gh;
++	int error;
++
++	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
++	if (error)
++		return error;
++	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
++	if (error)
++		goto out;
++	error = gfs2_meta_inode_buffer(ip, &bh);
++	if (error)
++		goto out_trans_end;
++	gfs2_trans_add_bh(ip->i_gl, bh, 1);
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	gfs2_get_inode_flags(inode);
++	gfs2_dinode_out(ip, bh->b_data);
++	brelse(bh);
++	gfs2_set_aops(inode);
++out_trans_end:
++	gfs2_trans_end(sdp);
++out:
++	gfs2_glock_dq_uninit(&gh);
++	return error;
++}
++
+ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	switch(cmd) {
+diff -NurpP --minimal linux-3.3.8/fs/gfs2/inode.h linux-3.3.8-vs2.3.3.4/fs/gfs2/inode.h
+--- linux-3.3.8/fs/gfs2/inode.h	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/gfs2/inode.h	2012-02-24 03:55:06.000000000 +0100
+@@ -120,6 +120,7 @@ extern const struct file_operations gfs2
+ extern const struct file_operations gfs2_dir_fops_nolock;
+ 
+ extern void gfs2_set_inode_flags(struct inode *inode);
++extern int gfs2_sync_flags(struct inode *inode, int flags, int vflags);
+  
+ #ifdef CONFIG_GFS2_FS_LOCKING_DLM
+ extern const struct file_operations gfs2_file_fops;
+diff -NurpP --minimal linux-3.3.8/fs/inode.c linux-3.3.8-vs2.3.3.4/fs/inode.c
+--- linux-3.3.8/fs/inode.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/inode.c	2012-03-19 20:52:10.000000000 +0100
+@@ -27,6 +27,7 @@
+ #include <linux/cred.h>
+ #include <linux/buffer_head.h> /* for inode_has_buffers */
+ #include <linux/ratelimit.h>
++#include <linux/vs_tag.h>
+ #include "internal.h"
+ 
+ /*
+@@ -138,6 +139,9 @@ int inode_init_always(struct super_block
+ 	struct address_space *const mapping = &inode->i_data;
+ 
+ 	inode->i_sb = sb;
++
++	/* essential because of inode slab reuse */
++	inode->i_tag = 0;
+ 	inode->i_blkbits = sb->s_blocksize_bits;
+ 	inode->i_flags = 0;
+ 	atomic_set(&inode->i_count, 1);
+@@ -159,6 +163,7 @@ int inode_init_always(struct super_block
+ 	inode->i_bdev = NULL;
+ 	inode->i_cdev = NULL;
+ 	inode->i_rdev = 0;
++	inode->i_mdev = 0;
+ 	inode->dirtied_when = 0;
+ 
+ 	if (security_inode_alloc(inode))
+@@ -480,6 +485,8 @@ void __insert_inode_hash(struct inode *i
+ }
+ EXPORT_SYMBOL(__insert_inode_hash);
+ 
++EXPORT_SYMBOL_GPL(__iget);
++
+ /**
+  *	__remove_inode_hash - remove an inode from the hash
+  *	@inode: inode to unhash
+@@ -1709,9 +1716,11 @@ void init_special_inode(struct inode *in
+ 	if (S_ISCHR(mode)) {
+ 		inode->i_fop = &def_chr_fops;
+ 		inode->i_rdev = rdev;
++		inode->i_mdev = rdev;
+ 	} else if (S_ISBLK(mode)) {
+ 		inode->i_fop = &def_blk_fops;
+ 		inode->i_rdev = rdev;
++		inode->i_mdev = rdev;
+ 	} else if (S_ISFIFO(mode))
+ 		inode->i_fop = &def_fifo_fops;
+ 	else if (S_ISSOCK(mode))
+@@ -1740,6 +1749,7 @@ void inode_init_owner(struct inode *inod
+ 	} else
+ 		inode->i_gid = current_fsgid();
+ 	inode->i_mode = mode;
++	inode->i_tag = dx_current_fstag(inode->i_sb);
+ }
+ EXPORT_SYMBOL(inode_init_owner);
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ioctl.c linux-3.3.8-vs2.3.3.4/fs/ioctl.c
+--- linux-3.3.8/fs/ioctl.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ioctl.c	2012-02-24 03:55:06.000000000 +0100
+@@ -15,6 +15,9 @@
+ #include <linux/writeback.h>
+ #include <linux/buffer_head.h>
+ #include <linux/falloc.h>
++#include <linux/proc_fs.h>
++#include <linux/vserver/inode.h>
++#include <linux/vs_tag.h>
+ 
+ #include <asm/ioctls.h>
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ioprio.c linux-3.3.8-vs2.3.3.4/fs/ioprio.c
+--- linux-3.3.8/fs/ioprio.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ioprio.c	2012-02-24 03:55:06.000000000 +0100
+@@ -28,6 +28,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/security.h>
+ #include <linux/pid_namespace.h>
++#include <linux/vs_base.h>
+ 
+ int set_task_ioprio(struct task_struct *task, int ioprio)
+ {
+@@ -104,6 +105,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which, 
+ 			else
+ 				pgrp = find_vpid(who);
+ 			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
++				if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
++					continue;
+ 				ret = set_task_ioprio(p, ioprio);
+ 				if (ret)
+ 					break;
+@@ -193,6 +196,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, 
+ 			else
+ 				pgrp = find_vpid(who);
+ 			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
++				if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
++					continue;
+ 				tmpio = get_task_ioprio(p);
+ 				if (tmpio < 0)
+ 					continue;
+diff -NurpP --minimal linux-3.3.8/fs/jfs/file.c linux-3.3.8-vs2.3.3.4/fs/jfs/file.c
+--- linux-3.3.8/fs/jfs/file.c	2011-10-24 18:45:27.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/jfs/file.c	2012-02-24 03:55:06.000000000 +0100
+@@ -109,7 +109,8 @@ int jfs_setattr(struct dentry *dentry, s
+ 	if (is_quota_modification(inode, iattr))
+ 		dquot_initialize(inode);
+ 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
+-	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
++	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) ||
++	    (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) {
+ 		rc = dquot_transfer(inode, iattr);
+ 		if (rc)
+ 			return rc;
+@@ -142,6 +143,7 @@ const struct inode_operations jfs_file_i
+ #ifdef CONFIG_JFS_POSIX_ACL
+ 	.get_acl	= jfs_get_acl,
+ #endif
++	.sync_flags	= jfs_sync_flags,
+ };
+ 
+ const struct file_operations jfs_file_operations = {
+diff -NurpP --minimal linux-3.3.8/fs/jfs/ioctl.c linux-3.3.8-vs2.3.3.4/fs/jfs/ioctl.c
+--- linux-3.3.8/fs/jfs/ioctl.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/jfs/ioctl.c	2012-02-24 03:55:06.000000000 +0100
+@@ -11,6 +11,7 @@
+ #include <linux/mount.h>
+ #include <linux/time.h>
+ #include <linux/sched.h>
++#include <linux/mount.h>
+ #include <asm/current.h>
+ #include <asm/uaccess.h>
+ 
+@@ -52,6 +53,16 @@ static long jfs_map_ext2(unsigned long f
+ }
+ 
+ 
++int jfs_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	jfs_get_inode_flags(JFS_IP(inode));
++	inode->i_ctime = CURRENT_TIME_SEC;
++	mark_inode_dirty(inode);
++	return 0;
++}
++
+ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_dentry->d_inode;
+@@ -85,6 +96,11 @@ long jfs_ioctl(struct file *filp, unsign
+ 		if (!S_ISDIR(inode->i_mode))
+ 			flags &= ~JFS_DIRSYNC_FL;
+ 
++		if (IS_BARRIER(inode)) {
++			vxwprintk_task(1, "messing with the barrier.");
++			return -EACCES;
++		}
++
+ 		/* Is it quota file? Do not allow user to mess with it */
+ 		if (IS_NOQUOTA(inode)) {
+ 			err = -EPERM;
+@@ -102,8 +118,8 @@ long jfs_ioctl(struct file *filp, unsign
+ 		 * the relevant capability.
+ 		 */
+ 		if ((oldflags & JFS_IMMUTABLE_FL) ||
+-			((flags ^ oldflags) &
+-			(JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
++			((flags ^ oldflags) & (JFS_APPEND_FL |
++			JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL))) {
+ 			if (!capable(CAP_LINUX_IMMUTABLE)) {
+ 				mutex_unlock(&inode->i_mutex);
+ 				err = -EPERM;
+@@ -111,7 +127,7 @@ long jfs_ioctl(struct file *filp, unsign
+ 			}
+ 		}
+ 
+-		flags = flags & JFS_FL_USER_MODIFIABLE;
++		flags &= JFS_FL_USER_MODIFIABLE;
+ 		flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
+ 		jfs_inode->mode2 = flags;
+ 
+diff -NurpP --minimal linux-3.3.8/fs/jfs/jfs_dinode.h linux-3.3.8-vs2.3.3.4/fs/jfs/jfs_dinode.h
+--- linux-3.3.8/fs/jfs/jfs_dinode.h	2008-12-25 00:26:37.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/jfs/jfs_dinode.h	2012-02-24 03:55:06.000000000 +0100
+@@ -161,9 +161,13 @@ struct dinode {
+ 
+ #define JFS_APPEND_FL		0x01000000 /* writes to file may only append */
+ #define JFS_IMMUTABLE_FL	0x02000000 /* Immutable file */
++#define JFS_IXUNLINK_FL		0x08000000 /* Immutable invert on unlink */
+ 
+-#define JFS_FL_USER_VISIBLE	0x03F80000
+-#define JFS_FL_USER_MODIFIABLE	0x03F80000
++#define JFS_BARRIER_FL		0x04000000 /* Barrier for chroot() */
++#define JFS_COW_FL		0x20000000 /* Copy on Write marker */
++
++#define JFS_FL_USER_VISIBLE	0x07F80000
++#define JFS_FL_USER_MODIFIABLE	0x07F80000
+ #define JFS_FL_INHERIT		0x03C80000
+ 
+ /* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */
+diff -NurpP --minimal linux-3.3.8/fs/jfs/jfs_filsys.h linux-3.3.8-vs2.3.3.4/fs/jfs/jfs_filsys.h
+--- linux-3.3.8/fs/jfs/jfs_filsys.h	2008-12-25 00:26:37.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/jfs/jfs_filsys.h	2012-02-24 03:55:06.000000000 +0100
+@@ -263,6 +263,7 @@
+ #define JFS_NAME_MAX	255
+ #define JFS_PATH_MAX	BPSIZE
+ 
++#define JFS_TAGGED		0x00800000	/* Context Tagging */
+ 
+ /*
+  *	file system state (superblock state)
+diff -NurpP --minimal linux-3.3.8/fs/jfs/jfs_imap.c linux-3.3.8-vs2.3.3.4/fs/jfs/jfs_imap.c
+--- linux-3.3.8/fs/jfs/jfs_imap.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/jfs/jfs_imap.c	2012-02-24 03:55:06.000000000 +0100
+@@ -46,6 +46,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/quotaops.h>
+ #include <linux/slab.h>
++#include <linux/vs_tag.h>
+ 
+ #include "jfs_incore.h"
+ #include "jfs_inode.h"
+@@ -3058,6 +3059,8 @@ static int copy_from_dinode(struct dinod
+ {
+ 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+ 	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
++	uid_t uid;
++	gid_t gid;
+ 
+ 	jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
+ 	jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
+@@ -3078,14 +3081,18 @@ static int copy_from_dinode(struct dinod
+ 	}
+ 	set_nlink(ip, le32_to_cpu(dip->di_nlink));
+ 
+-	jfs_ip->saved_uid = le32_to_cpu(dip->di_uid);
++	uid = le32_to_cpu(dip->di_uid);
++	gid = le32_to_cpu(dip->di_gid);
++	ip->i_tag = INOTAG_TAG(DX_TAG(ip), uid, gid, 0);
++
++	jfs_ip->saved_uid = INOTAG_UID(DX_TAG(ip), uid, gid);
+ 	if (sbi->uid == -1)
+ 		ip->i_uid = jfs_ip->saved_uid;
+ 	else {
+ 		ip->i_uid = sbi->uid;
+ 	}
+ 
+-	jfs_ip->saved_gid = le32_to_cpu(dip->di_gid);
++	jfs_ip->saved_gid = INOTAG_GID(DX_TAG(ip), uid, gid);
+ 	if (sbi->gid == -1)
+ 		ip->i_gid = jfs_ip->saved_gid;
+ 	else {
+@@ -3150,14 +3157,12 @@ static void copy_to_dinode(struct dinode
+ 	dip->di_size = cpu_to_le64(ip->i_size);
+ 	dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
+ 	dip->di_nlink = cpu_to_le32(ip->i_nlink);
+-	if (sbi->uid == -1)
+-		dip->di_uid = cpu_to_le32(ip->i_uid);
+-	else
+-		dip->di_uid = cpu_to_le32(jfs_ip->saved_uid);
+-	if (sbi->gid == -1)
+-		dip->di_gid = cpu_to_le32(ip->i_gid);
+-	else
+-		dip->di_gid = cpu_to_le32(jfs_ip->saved_gid);
++
++	dip->di_uid = cpu_to_le32(TAGINO_UID(DX_TAG(ip),
++		(sbi->uid == -1) ? ip->i_uid : jfs_ip->saved_uid, ip->i_tag));
++	dip->di_gid = cpu_to_le32(TAGINO_GID(DX_TAG(ip),
++		(sbi->gid == -1) ? ip->i_gid : jfs_ip->saved_gid, ip->i_tag));
++
+ 	jfs_get_inode_flags(jfs_ip);
+ 	/*
+ 	 * mode2 is only needed for storing the higher order bits.
+diff -NurpP --minimal linux-3.3.8/fs/jfs/jfs_inode.c linux-3.3.8-vs2.3.3.4/fs/jfs/jfs_inode.c
+--- linux-3.3.8/fs/jfs/jfs_inode.c	2012-01-09 16:14:54.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/jfs/jfs_inode.c	2012-02-24 03:55:06.000000000 +0100
+@@ -18,6 +18,7 @@
+ 
+ #include <linux/fs.h>
+ #include <linux/quotaops.h>
++#include <linux/vs_tag.h>
+ #include "jfs_incore.h"
+ #include "jfs_inode.h"
+ #include "jfs_filsys.h"
+@@ -30,29 +31,46 @@ void jfs_set_inode_flags(struct inode *i
+ {
+ 	unsigned int flags = JFS_IP(inode)->mode2;
+ 
+-	inode->i_flags &= ~(S_IMMUTABLE | S_APPEND |
+-		S_NOATIME | S_DIRSYNC | S_SYNC);
++	inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
++		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
+ 
+ 	if (flags & JFS_IMMUTABLE_FL)
+ 		inode->i_flags |= S_IMMUTABLE;
++	if (flags & JFS_IXUNLINK_FL)
++		inode->i_flags |= S_IXUNLINK;
++
++	if (flags & JFS_SYNC_FL)
++		inode->i_flags |= S_SYNC;
+ 	if (flags & JFS_APPEND_FL)
+ 		inode->i_flags |= S_APPEND;
+ 	if (flags & JFS_NOATIME_FL)
+ 		inode->i_flags |= S_NOATIME;
+ 	if (flags & JFS_DIRSYNC_FL)
+ 		inode->i_flags |= S_DIRSYNC;
+-	if (flags & JFS_SYNC_FL)
+-		inode->i_flags |= S_SYNC;
++
++	inode->i_vflags &= ~(V_BARRIER | V_COW);
++
++	if (flags & JFS_BARRIER_FL)
++		inode->i_vflags |= V_BARRIER;
++	if (flags & JFS_COW_FL)
++		inode->i_vflags |= V_COW;
+ }
+ 
+ void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip)
+ {
+ 	unsigned int flags = jfs_ip->vfs_inode.i_flags;
++	unsigned int vflags = jfs_ip->vfs_inode.i_vflags;
++
++	jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL |
++			   JFS_APPEND_FL | JFS_NOATIME_FL |
++			   JFS_DIRSYNC_FL | JFS_SYNC_FL |
++			   JFS_BARRIER_FL | JFS_COW_FL);
+ 
+-	jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL |
+-			   JFS_DIRSYNC_FL | JFS_SYNC_FL);
+ 	if (flags & S_IMMUTABLE)
+ 		jfs_ip->mode2 |= JFS_IMMUTABLE_FL;
++	if (flags & S_IXUNLINK)
++		jfs_ip->mode2 |= JFS_IXUNLINK_FL;
++
+ 	if (flags & S_APPEND)
+ 		jfs_ip->mode2 |= JFS_APPEND_FL;
+ 	if (flags & S_NOATIME)
+@@ -61,6 +79,11 @@ void jfs_get_inode_flags(struct jfs_inod
+ 		jfs_ip->mode2 |= JFS_DIRSYNC_FL;
+ 	if (flags & S_SYNC)
+ 		jfs_ip->mode2 |= JFS_SYNC_FL;
++
++	if (vflags & V_BARRIER)
++		jfs_ip->mode2 |= JFS_BARRIER_FL;
++	if (vflags & V_COW)
++		jfs_ip->mode2 |= JFS_COW_FL;
+ }
+ 
+ /*
+diff -NurpP --minimal linux-3.3.8/fs/jfs/jfs_inode.h linux-3.3.8-vs2.3.3.4/fs/jfs/jfs_inode.h
+--- linux-3.3.8/fs/jfs/jfs_inode.h	2011-10-24 18:45:27.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/jfs/jfs_inode.h	2012-02-24 03:55:06.000000000 +0100
+@@ -39,6 +39,7 @@ extern struct dentry *jfs_fh_to_dentry(s
+ extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
+ 	int fh_len, int fh_type);
+ extern void jfs_set_inode_flags(struct inode *);
++extern int jfs_sync_flags(struct inode *, int, int);
+ extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+ extern int jfs_setattr(struct dentry *, struct iattr *);
+ 
+diff -NurpP --minimal linux-3.3.8/fs/jfs/namei.c linux-3.3.8-vs2.3.3.4/fs/jfs/namei.c
+--- linux-3.3.8/fs/jfs/namei.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/jfs/namei.c	2012-02-24 03:55:06.000000000 +0100
+@@ -22,6 +22,7 @@
+ #include <linux/ctype.h>
+ #include <linux/quotaops.h>
+ #include <linux/exportfs.h>
++#include <linux/vs_tag.h>
+ #include "jfs_incore.h"
+ #include "jfs_superblock.h"
+ #include "jfs_inode.h"
+@@ -1474,6 +1475,7 @@ static struct dentry *jfs_lookup(struct 
+ 			jfs_err("jfs_lookup: iget failed on inum %d", (uint)inum);
+ 	}
+ 
++	dx_propagate_tag(nd, ip);
+ 	return d_splice_alias(ip, dentry);
+ }
+ 
+@@ -1538,6 +1540,7 @@ const struct inode_operations jfs_dir_in
+ #ifdef CONFIG_JFS_POSIX_ACL
+ 	.get_acl	= jfs_get_acl,
+ #endif
++	.sync_flags	= jfs_sync_flags,
+ };
+ 
+ const struct file_operations jfs_dir_operations = {
+diff -NurpP --minimal linux-3.3.8/fs/jfs/super.c linux-3.3.8-vs2.3.3.4/fs/jfs/super.c
+--- linux-3.3.8/fs/jfs/super.c	2012-03-19 19:47:25.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/jfs/super.c	2012-02-24 03:55:06.000000000 +0100
+@@ -197,7 +197,8 @@ static void jfs_put_super(struct super_b
+ enum {
+ 	Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
+ 	Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
+-	Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask
++	Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask,
++	Opt_tag, Opt_notag, Opt_tagid
+ };
+ 
+ static const match_table_t tokens = {
+@@ -207,6 +208,10 @@ static const match_table_t tokens = {
+ 	{Opt_resize, "resize=%u"},
+ 	{Opt_resize_nosize, "resize"},
+ 	{Opt_errors, "errors=%s"},
++	{Opt_tag, "tag"},
++	{Opt_notag, "notag"},
++	{Opt_tagid, "tagid=%u"},
++	{Opt_tag, "tagxid"},
+ 	{Opt_ignore, "noquota"},
+ 	{Opt_ignore, "quota"},
+ 	{Opt_usrquota, "usrquota"},
+@@ -341,6 +346,20 @@ static int parse_options(char *options, 
+ 			}
+ 			break;
+ 		}
++#ifndef CONFIG_TAGGING_NONE
++		case Opt_tag:
++			*flag |= JFS_TAGGED;
++			break;
++		case Opt_notag:
++			*flag &= JFS_TAGGED;
++			break;
++#endif
++#ifdef CONFIG_PROPAGATE
++		case Opt_tagid:
++			/* use args[0] */
++			*flag |= JFS_TAGGED;
++			break;
++#endif
+ 		default:
+ 			printk("jfs: Unrecognized mount option \"%s\" "
+ 					" or missing value\n", p);
+@@ -372,6 +391,12 @@ static int jfs_remount(struct super_bloc
+ 		return -EINVAL;
+ 	}
+ 
++	if ((flag & JFS_TAGGED) && !(sb->s_flags & MS_TAGGED)) {
++		printk(KERN_ERR "JFS: %s: tagging not permitted on remount.\n",
++			sb->s_id);
++		return -EINVAL;
++	}
++
+ 	if (newLVSize) {
+ 		if (sb->s_flags & MS_RDONLY) {
+ 			printk(KERN_ERR
+@@ -454,6 +479,9 @@ static int jfs_fill_super(struct super_b
+ #ifdef CONFIG_JFS_POSIX_ACL
+ 	sb->s_flags |= MS_POSIXACL;
+ #endif
++	/* map mount option tagxid */
++	if (sbi->flag & JFS_TAGGED)
++		sb->s_flags |= MS_TAGGED;
+ 
+ 	if (newLVSize) {
+ 		printk(KERN_ERR "resize option for remount only\n");
+diff -NurpP --minimal linux-3.3.8/fs/libfs.c linux-3.3.8-vs2.3.3.4/fs/libfs.c
+--- linux-3.3.8/fs/libfs.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/libfs.c	2012-02-24 03:55:06.000000000 +0100
+@@ -135,7 +135,8 @@ static inline unsigned char dt_type(stru
+  * both impossible due to the lock on directory.
+  */
+ 
+-int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
++static inline int do_dcache_readdir_filter(struct file *filp,
++	void *dirent, filldir_t filldir, int (*filter)(struct dentry *dentry))
+ {
+ 	struct dentry *dentry = filp->f_path.dentry;
+ 	struct dentry *cursor = filp->private_data;
+@@ -166,6 +167,8 @@ int dcache_readdir(struct file * filp, v
+ 			for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
+ 				struct dentry *next;
+ 				next = list_entry(p, struct dentry, d_u.d_child);
++				if (filter && !filter(next))
++					continue;
+ 				spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
+ 				if (!simple_positive(next)) {
+ 					spin_unlock(&next->d_lock);
+@@ -192,6 +195,17 @@ int dcache_readdir(struct file * filp, v
+ 	return 0;
+ }
+ 
++int dcache_readdir(struct file *filp, void *dirent, filldir_t filldir)
++{
++	return do_dcache_readdir_filter(filp, dirent, filldir, NULL);
++}
++
++int dcache_readdir_filter(struct file *filp, void *dirent, filldir_t filldir,
++	int (*filter)(struct dentry *))
++{
++	return do_dcache_readdir_filter(filp, dirent, filldir, filter);
++}
++
+ ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
+ {
+ 	return -EISDIR;
+@@ -977,6 +991,7 @@ EXPORT_SYMBOL(dcache_dir_close);
+ EXPORT_SYMBOL(dcache_dir_lseek);
+ EXPORT_SYMBOL(dcache_dir_open);
+ EXPORT_SYMBOL(dcache_readdir);
++EXPORT_SYMBOL(dcache_readdir_filter);
+ EXPORT_SYMBOL(generic_read_dir);
+ EXPORT_SYMBOL(mount_pseudo);
+ EXPORT_SYMBOL(simple_write_begin);
+diff -NurpP --minimal linux-3.3.8/fs/locks.c linux-3.3.8-vs2.3.3.4/fs/locks.c
+--- linux-3.3.8/fs/locks.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/locks.c	2012-04-16 13:32:11.000000000 +0200
+@@ -126,6 +126,8 @@
+ #include <linux/time.h>
+ #include <linux/rcupdate.h>
+ #include <linux/pid_namespace.h>
++#include <linux/vs_base.h>
++#include <linux/vs_limit.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -184,11 +186,17 @@ static void locks_init_lock_heads(struct
+ /* Allocate an empty lock structure. */
+ struct file_lock *locks_alloc_lock(void)
+ {
+-	struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
++	struct file_lock *fl;
+ 
+-	if (fl)
+-		locks_init_lock_heads(fl);
++	if (!vx_locks_avail(1))
++		return NULL;
+ 
++	fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
++
++	if (fl) {
++		locks_init_lock_heads(fl);
++		fl->fl_xid = -1;
++	}
+ 	return fl;
+ }
+ EXPORT_SYMBOL_GPL(locks_alloc_lock);
+@@ -216,6 +224,7 @@ void locks_free_lock(struct file_lock *f
+ 	BUG_ON(!list_empty(&fl->fl_block));
+ 	BUG_ON(!list_empty(&fl->fl_link));
+ 
++	vx_locks_dec(fl);
+ 	locks_release_private(fl);
+ 	kmem_cache_free(filelock_cache, fl);
+ }
+@@ -225,6 +234,7 @@ void locks_init_lock(struct file_lock *f
+ {
+ 	memset(fl, 0, sizeof(struct file_lock));
+ 	locks_init_lock_heads(fl);
++	fl->fl_xid = -1;
+ }
+ 
+ EXPORT_SYMBOL(locks_init_lock);
+@@ -265,6 +275,7 @@ void locks_copy_lock(struct file_lock *n
+ 	new->fl_file = fl->fl_file;
+ 	new->fl_ops = fl->fl_ops;
+ 	new->fl_lmops = fl->fl_lmops;
++	new->fl_xid = fl->fl_xid;
+ 
+ 	locks_copy_private(new, fl);
+ }
+@@ -303,6 +314,11 @@ static int flock_make_lock(struct file *
+ 	fl->fl_flags = FL_FLOCK;
+ 	fl->fl_type = type;
+ 	fl->fl_end = OFFSET_MAX;
++
++	vxd_assert(filp->f_xid == vx_current_xid(),
++		"f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
++	fl->fl_xid = filp->f_xid;
++	vx_locks_inc(fl);
+ 	
+ 	*lock = fl;
+ 	return 0;
+@@ -452,6 +468,7 @@ static int lease_init(struct file *filp,
+ 
+ 	fl->fl_owner = current->files;
+ 	fl->fl_pid = current->tgid;
++	fl->fl_xid = vx_current_xid();
+ 
+ 	fl->fl_file = filp;
+ 	fl->fl_flags = FL_LEASE;
+@@ -471,6 +488,11 @@ static struct file_lock *lease_alloc(str
+ 	if (fl == NULL)
+ 		return ERR_PTR(error);
+ 
++	fl->fl_xid = vx_current_xid();
++	if (filp)
++		vxd_assert(filp->f_xid == fl->fl_xid,
++			"f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid);
++	vx_locks_inc(fl);
+ 	error = lease_init(filp, type, fl);
+ 	if (error) {
+ 		locks_free_lock(fl);
+@@ -773,6 +795,7 @@ static int flock_lock_file(struct file *
+ 		lock_flocks();
+ 	}
+ 
++	new_fl->fl_xid = -1;
+ find_conflict:
+ 	for_each_lock(inode, before) {
+ 		struct file_lock *fl = *before;
+@@ -793,6 +816,7 @@ find_conflict:
+ 		goto out;
+ 	locks_copy_lock(new_fl, request);
+ 	locks_insert_lock(before, new_fl);
++	vx_locks_inc(new_fl);
+ 	new_fl = NULL;
+ 	error = 0;
+ 
+@@ -803,7 +827,8 @@ out:
+ 	return error;
+ }
+ 
+-static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
++static int __posix_lock_file(struct inode *inode, struct file_lock *request,
++	struct file_lock *conflock, xid_t xid)
+ {
+ 	struct file_lock *fl;
+ 	struct file_lock *new_fl = NULL;
+@@ -813,6 +838,8 @@ static int __posix_lock_file(struct inod
+ 	struct file_lock **before;
+ 	int error, added = 0;
+ 
++	vxd_assert(xid == vx_current_xid(),
++		"xid(%d) == current(%d)", xid, vx_current_xid());
+ 	/*
+ 	 * We may need two file_lock structures for this operation,
+ 	 * so we get them in advance to avoid races.
+@@ -823,7 +850,11 @@ static int __posix_lock_file(struct inod
+ 	    (request->fl_type != F_UNLCK ||
+ 	     request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
+ 		new_fl = locks_alloc_lock();
++		new_fl->fl_xid = xid;
++		vx_locks_inc(new_fl);
+ 		new_fl2 = locks_alloc_lock();
++		new_fl2->fl_xid = xid;
++		vx_locks_inc(new_fl2);
+ 	}
+ 
+ 	lock_flocks();
+@@ -1022,7 +1053,8 @@ static int __posix_lock_file(struct inod
+ int posix_lock_file(struct file *filp, struct file_lock *fl,
+ 			struct file_lock *conflock)
+ {
+-	return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock);
++	return __posix_lock_file(filp->f_path.dentry->d_inode,
++		fl, conflock, filp->f_xid);
+ }
+ EXPORT_SYMBOL(posix_lock_file);
+ 
+@@ -1112,7 +1144,7 @@ int locks_mandatory_area(int read_write,
+ 	fl.fl_end = offset + count - 1;
+ 
+ 	for (;;) {
+-		error = __posix_lock_file(inode, &fl, NULL);
++		error = __posix_lock_file(inode, &fl, NULL, filp->f_xid);
+ 		if (error != FILE_LOCK_DEFERRED)
+ 			break;
+ 		error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
+@@ -1407,6 +1439,7 @@ int generic_add_lease(struct file *filp,
+ 		goto out;
+ 
+ 	locks_insert_lock(before, lease);
++	vx_locks_inc(lease);
+ 	return 0;
+ 
+ out:
+@@ -1847,6 +1880,11 @@ int fcntl_setlk(unsigned int fd, struct 
+ 	if (file_lock == NULL)
+ 		return -ENOLCK;
+ 
++	vxd_assert(filp->f_xid == vx_current_xid(),
++		"f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
++	file_lock->fl_xid = filp->f_xid;
++	vx_locks_inc(file_lock);
++
+ 	/*
+ 	 * This might block, so we do it before checking the inode.
+ 	 */
+@@ -1965,6 +2003,11 @@ int fcntl_setlk64(unsigned int fd, struc
+ 	if (file_lock == NULL)
+ 		return -ENOLCK;
+ 
++	vxd_assert(filp->f_xid == vx_current_xid(),
++		"f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
++	file_lock->fl_xid = filp->f_xid;
++	vx_locks_inc(file_lock);
++
+ 	/*
+ 	 * This might block, so we do it before checking the inode.
+ 	 */
+@@ -2230,8 +2273,11 @@ static int locks_show(struct seq_file *f
+ 
+ 	lock_get_status(f, fl, *((loff_t *)f->private), "");
+ 
+-	list_for_each_entry(bfl, &fl->fl_block, fl_block)
++	list_for_each_entry(bfl, &fl->fl_block, fl_block) {
++		if (!vx_check(fl->fl_xid, VS_WATCH_P | VS_IDENT))
++			continue;
+ 		lock_get_status(f, bfl, *((loff_t *)f->private), " ->");
++	}
+ 
+ 	return 0;
+ }
+diff -NurpP --minimal linux-3.3.8/fs/mount.h linux-3.3.8-vs2.3.3.4/fs/mount.h
+--- linux-3.3.8/fs/mount.h	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/mount.h	2012-02-24 17:29:48.000000000 +0100
+@@ -47,6 +47,7 @@ struct mount {
+ 	int mnt_expiry_mark;		/* true if marked for expiry */
+ 	int mnt_pinned;
+ 	int mnt_ghosts;
++	tag_t mnt_tag;			/* tagging used for vfsmount */
+ };
+ 
+ static inline struct mount *real_mount(struct vfsmount *mnt)
+diff -NurpP --minimal linux-3.3.8/fs/namei.c linux-3.3.8-vs2.3.3.4/fs/namei.c
+--- linux-3.3.8/fs/namei.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/namei.c	2012-04-23 22:50:50.000000000 +0200
+@@ -33,6 +33,14 @@
+ #include <linux/device_cgroup.h>
+ #include <linux/fs_struct.h>
+ #include <linux/posix_acl.h>
++#include <linux/proc_fs.h>
++#include <linux/vserver/inode.h>
++#include <linux/vs_base.h>
++#include <linux/vs_tag.h>
++#include <linux/vs_cowbl.h>
++#include <linux/vs_device.h>
++#include <linux/vs_context.h>
++#include <linux/pid_namespace.h>
+ #include <asm/uaccess.h>
+ 
+ #include "internal.h"
+@@ -221,6 +229,89 @@ static int check_acl(struct inode *inode
+ 	return -EAGAIN;
+ }
+ 
++static inline int dx_barrier(const struct inode *inode)
++{
++	if (IS_BARRIER(inode) && !vx_check(0, VS_ADMIN | VS_WATCH)) {
++		vxwprintk_task(1, "did hit the barrier.");
++		return 1;
++	}
++	return 0;
++}
++
++static int __dx_permission(const struct inode *inode, int mask)
++{
++	if (dx_barrier(inode))
++		return -EACCES;
++
++	if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) {
++		/* devpts is xid tagged */
++		if (S_ISDIR(inode->i_mode) ||
++		    vx_check((xid_t)inode->i_tag, VS_IDENT | VS_WATCH_P))
++			return 0;
++
++		/* just pretend we didn't find anything */
++		return -ENOENT;
++	}
++	else if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
++		struct proc_dir_entry *de = PDE(inode);
++
++		if (de && !vx_hide_check(0, de->vx_flags))
++			goto out;
++
++		if ((mask & (MAY_WRITE | MAY_APPEND))) {
++			struct pid *pid;
++			struct task_struct *tsk;
++
++			if (vx_check(0, VS_ADMIN | VS_WATCH_P) ||
++			    vx_flags(VXF_STATE_SETUP, 0))
++				return 0;
++
++			pid = PROC_I(inode)->pid;
++			if (!pid)
++				goto out;
++
++			rcu_read_lock();
++			tsk = pid_task(pid, PIDTYPE_PID);
++			vxdprintk(VXD_CBIT(tag, 0), "accessing %p[#%u]",
++				  tsk, (tsk ? vx_task_xid(tsk) : 0));
++			if (tsk &&
++				vx_check(vx_task_xid(tsk), VS_IDENT | VS_WATCH_P)) {
++				rcu_read_unlock();
++				return 0;
++			}
++			rcu_read_unlock();
++		}
++		else {
++			/* FIXME: Should we block some entries here? */
++			return 0;
++		}
++	}
++	else {
++		if (dx_notagcheck(inode->i_sb) ||
++		    dx_check(inode->i_tag, DX_HOSTID | DX_ADMIN | DX_WATCH |
++			     DX_IDENT))
++			return 0;
++	}
++
++out:
++	return -EACCES;
++}
++
++int dx_permission(const struct inode *inode, int mask)
++{
++	int ret = __dx_permission(inode, mask);
++	if (unlikely(ret)) {
++#ifndef	CONFIG_VSERVER_WARN_DEVPTS
++		if (inode->i_sb->s_magic != DEVPTS_SUPER_MAGIC)
++#endif
++		    vxwprintk_task(1,
++			"denied [0x%x] access to inode %s:%p[#%d,%lu]",
++			mask, inode->i_sb->s_id, inode, inode->i_tag,
++			inode->i_ino);
++	}
++	return ret;
++}
++
+ /*
+  * This does the basic permission checking
+  */
+@@ -356,10 +447,14 @@ int inode_permission(struct inode *inode
+ 		/*
+ 		 * Nobody gets write access to an immutable file.
+ 		 */
+-		if (IS_IMMUTABLE(inode))
++		if (IS_IMMUTABLE(inode) && !IS_COW(inode))
+ 			return -EACCES;
+ 	}
+ 
++	retval = dx_permission(inode, mask);
++	if (retval)
++		return retval;
++
+ 	retval = do_inode_permission(inode, mask);
+ 	if (retval)
+ 		return retval;
+@@ -1038,7 +1133,8 @@ static void follow_dotdot(struct nameida
+ 
+ 		if (nd->path.dentry == nd->root.dentry &&
+ 		    nd->path.mnt == nd->root.mnt) {
+-			break;
++			/* for sane '/' avoid follow_mount() */
++			return;
+ 		}
+ 		if (nd->path.dentry != nd->path.mnt->mnt_root) {
+ 			/* rare case of legitimate dget_parent()... */
+@@ -1149,6 +1245,9 @@ static int do_lookup(struct nameidata *n
+ 		}
+ 		if (unlikely(d_need_lookup(dentry)))
+ 			goto unlazy;
++
++		/* FIXME: check dx permission */
++
+ 		path->mnt = mnt;
+ 		path->dentry = dentry;
+ 		if (unlikely(!__follow_mount_rcu(nd, path, inode)))
+@@ -1210,6 +1309,8 @@ retry:
+ 		}
+ 	}
+ 
++	/* FIXME: check dx permission */
++
+ 	path->mnt = mnt;
+ 	path->dentry = dentry;
+ 	err = follow_managed(path, nd->flags);
+@@ -1926,7 +2027,7 @@ static int may_delete(struct inode *dir,
+ 	if (IS_APPEND(dir))
+ 		return -EPERM;
+ 	if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
+-	    IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
++		IS_IXORUNLINK(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+ 		return -EPERM;
+ 	if (isdir) {
+ 		if (!S_ISDIR(victim->d_inode->i_mode))
+@@ -2006,19 +2107,25 @@ int vfs_create(struct inode *dir, struct
+ {
+ 	int error = may_create(dir, dentry);
+ 
+-	if (error)
++	if (error) {
++		vxdprintk(VXD_CBIT(misc, 3), "may_create failed with %d", error);
+ 		return error;
++	}
+ 
+ 	if (!dir->i_op->create)
+ 		return -EACCES;	/* shouldn't it be ENOSYS? */
+ 	mode &= S_IALLUGO;
+ 	mode |= S_IFREG;
+ 	error = security_inode_create(dir, dentry, mode);
+-	if (error)
++	if (error) {
++		vxdprintk(VXD_CBIT(misc, 3), "security_inode_create failed with %d", error);
+ 		return error;
++	}
+ 	error = dir->i_op->create(dir, dentry, mode, nd);
+ 	if (!error)
+ 		fsnotify_create(dir, dentry);
++	else
++		vxdprintk(VXD_CBIT(misc, 3), "i_op->create failed with %d", error);
+ 	return error;
+ }
+ 
+@@ -2053,6 +2160,15 @@ static int may_open(struct path *path, i
+ 		break;
+ 	}
+ 
++#ifdef	CONFIG_VSERVER_COWBL
++	if (IS_COW(inode) &&
++		((flag & O_ACCMODE) != O_RDONLY)) {
++		if (IS_COW_LINK(inode))
++			return -EMLINK;
++		inode->i_flags &= ~(S_IXUNLINK|S_IMMUTABLE);
++		mark_inode_dirty(inode);
++	}
++#endif
+ 	error = inode_permission(inode, acc_mode);
+ 	if (error)
+ 		return error;
+@@ -2277,6 +2393,16 @@ ok:
+ 	}
+ common:
+ 	error = may_open(&nd->path, acc_mode, open_flag);
++#ifdef	CONFIG_VSERVER_COWBL
++	if (error == -EMLINK) {
++		struct dentry *dentry;
++		dentry = cow_break_link(pathname);
++		if (IS_ERR(dentry))
++			error = PTR_ERR(dentry);
++		else
++			dput(dentry);
++	}
++#endif
+ 	if (error)
+ 		goto exit;
+ 	filp = nameidata_to_filp(nd);
+@@ -2319,6 +2445,7 @@ static struct file *path_openat(int dfd,
+ 	struct path path;
+ 	int error;
+ 
++restart:
+ 	filp = get_empty_filp();
+ 	if (!filp)
+ 		return ERR_PTR(-ENFILE);
+@@ -2356,6 +2483,17 @@ static struct file *path_openat(int dfd,
+ 			filp = do_last(nd, &path, op, pathname);
+ 		put_link(nd, &link, cookie);
+ 	}
++
++#ifdef	CONFIG_VSERVER_COWBL
++	if (filp == ERR_PTR(-EMLINK)) {
++		if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
++			path_put(&nd->root);
++		if (base)
++			fput(base);
++		release_open_intent(nd);
++		goto restart;
++	}
++#endif
+ out:
+ 	if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
+ 		path_put(&nd->root);
+@@ -2445,6 +2583,11 @@ struct dentry *kern_path_create(int dfd,
+ 		goto fail;
+ 	}
+ 	*path = nd.path;
++	vxdprintk(VXD_CBIT(misc, 3), "kern_path_create path.dentry = %p (%.*s), dentry = %p (%.*s), d_inode = %p",
++		path->dentry, path->dentry->d_name.len,
++		path->dentry->d_name.name, dentry,
++		dentry->d_name.len, dentry->d_name.name,
++		path->dentry->d_inode);
+ 	return dentry;
+ eexist:
+ 	dput(dentry);
+@@ -2926,7 +3069,7 @@ int vfs_link(struct dentry *old_dentry, 
+ 	/*
+ 	 * A link to an append-only or immutable file cannot be created.
+ 	 */
+-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
++	if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
+ 		return -EPERM;
+ 	if (!dir->i_op->link)
+ 		return -EPERM;
+@@ -3307,6 +3450,253 @@ int vfs_follow_link(struct nameidata *nd
+ 	return __vfs_follow_link(nd, link);
+ }
+ 
++
++#ifdef	CONFIG_VSERVER_COWBL
++
++static inline
++long do_cow_splice(struct file *in, struct file *out, size_t len)
++{
++	loff_t ppos = 0;
++
++	return do_splice_direct(in, &ppos, out, len, 0);
++}
++
++extern unsigned int mnt_get_count(struct mount *mnt);
++
++struct dentry *cow_break_link(const char *pathname)
++{
++	int ret, mode, pathlen, redo = 0;
++	struct nameidata old_nd, dir_nd;
++	struct path old_path, dir_path;
++	struct dentry *dir, *old_dentry, *new_dentry = NULL;
++	struct file *old_file;
++	struct file *new_file;
++	char *to, *path, pad='\251';
++	loff_t size;
++
++	vxdprintk(VXD_CBIT(misc, 1),
++		"cow_break_link(" VS_Q("%s") ")", pathname);
++	path = kmalloc(PATH_MAX, GFP_KERNEL);
++	ret = -ENOMEM;
++	if (!path)
++		goto out;
++
++	/* old_nd will have refs to dentry and mnt */
++	ret = do_path_lookup(AT_FDCWD, pathname, LOOKUP_FOLLOW, &old_nd);
++	vxdprintk(VXD_CBIT(misc, 2),
++		"do_path_lookup(old): %d [r=%d]",
++		ret, mnt_get_count(real_mount(old_nd.path.mnt)));
++	if (ret < 0)
++		goto out_free_path;
++
++	old_path = old_nd.path;
++	old_dentry = old_path.dentry;
++	mode = old_dentry->d_inode->i_mode;
++
++	to = d_path(&old_path, path, PATH_MAX-2);
++	pathlen = strlen(to);
++	vxdprintk(VXD_CBIT(misc, 2),
++		"old path " VS_Q("%s") " [%p:" VS_Q("%.*s") ":%d]", to,
++		old_dentry,
++		old_dentry->d_name.len, old_dentry->d_name.name,
++		old_dentry->d_name.len);
++
++	to[pathlen + 1] = 0;
++retry:
++	new_dentry = NULL;
++	to[pathlen] = pad--;
++	ret = -ELOOP;
++	if (pad <= '\240')
++		goto out_rel_old;
++
++	vxdprintk(VXD_CBIT(misc, 1), "temp copy " VS_Q("%s"), to);
++	/* dir_nd will have refs to dentry and mnt */
++	ret = do_path_lookup(AT_FDCWD, to,
++		LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE, &dir_nd);
++	vxdprintk(VXD_CBIT(misc, 2), "do_path_lookup(new): %d", ret);
++	if (ret < 0)
++		goto retry;
++
++	/* this puppy downs the dir inode mutex if successful */
++	new_dentry = kern_path_create(AT_FDCWD, to, &dir_path, 0);
++	if (!new_dentry || IS_ERR(new_dentry)) {
++		path_put(&dir_nd.path);
++		vxdprintk(VXD_CBIT(misc, 2),
++			"kern_path_create(new) failed with %ld",
++			PTR_ERR(new_dentry));
++		goto retry;
++	}
++	path_put(&dir_path);
++	vxdprintk(VXD_CBIT(misc, 2),
++		"kern_path_create(new): %p [" VS_Q("%.*s") ":%d]",
++		new_dentry,
++		new_dentry->d_name.len, new_dentry->d_name.name,
++		new_dentry->d_name.len);
++
++	dir = dir_nd.path.dentry;
++
++	ret = vfs_create(dir->d_inode, new_dentry, mode, &dir_nd);
++	vxdprintk(VXD_CBIT(misc, 2),
++		"vfs_create(new): %d", ret);
++	if (ret == -EEXIST) {
++		mutex_unlock(&dir->d_inode->i_mutex);
++		path_put(&dir_nd.path);
++		dput(new_dentry);
++		goto retry;
++	}
++	else if (ret < 0)
++		goto out_unlock_new;
++
++	/* drop out early, ret passes ENOENT */
++	ret = -ENOENT;
++	if ((redo = d_unhashed(old_dentry)))
++		goto out_unlock_new;
++
++	path_get(&old_path);
++	/* this one cleans up the dentry/mnt in case of failure */
++	old_file = dentry_open(old_dentry, old_path.mnt,
++		O_RDONLY, current_cred());
++	vxdprintk(VXD_CBIT(misc, 2),
++		"dentry_open(old): %p", old_file);
++	if (IS_ERR(old_file)) {
++		ret = PTR_ERR(old_file);
++		goto out_unlock_new;
++	}
++
++	dget(new_dentry);
++	mntget(old_path.mnt);
++	/* this one cleans up the dentry/mnt in case of failure */
++	new_file = dentry_open(new_dentry, old_path.mnt,
++		O_WRONLY, current_cred());
++	vxdprintk(VXD_CBIT(misc, 2),
++		"dentry_open(new): %p", new_file);
++	if (IS_ERR(new_file)) {
++		ret = PTR_ERR(new_file);
++		goto out_fput_old;
++	}
++
++	size = i_size_read(old_file->f_dentry->d_inode);
++	ret = do_cow_splice(old_file, new_file, size);
++	vxdprintk(VXD_CBIT(misc, 2), "do_splice_direct: %d", ret);
++	if (ret < 0) {
++		goto out_fput_both;
++	} else if (ret < size) {
++		ret = -ENOSPC;
++		goto out_fput_both;
++	} else {
++		struct inode *old_inode = old_dentry->d_inode;
++		struct inode *new_inode = new_dentry->d_inode;
++		struct iattr attr = {
++			.ia_uid = old_inode->i_uid,
++			.ia_gid = old_inode->i_gid,
++			.ia_valid = ATTR_UID | ATTR_GID
++			};
++
++		setattr_copy(new_inode, &attr);
++		mark_inode_dirty(new_inode);
++	}
++
++	mutex_lock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
++
++	/* drop out late */
++	ret = -ENOENT;
++	if ((redo = d_unhashed(old_dentry)))
++		goto out_unlock;
++
++	vxdprintk(VXD_CBIT(misc, 2),
++		"vfs_rename: [" VS_Q("%*s") ":%d] -> [" VS_Q("%*s") ":%d]",
++		new_dentry->d_name.len, new_dentry->d_name.name,
++		new_dentry->d_name.len,
++		old_dentry->d_name.len, old_dentry->d_name.name,
++		old_dentry->d_name.len);
++	ret = vfs_rename(dir_nd.path.dentry->d_inode, new_dentry,
++		old_dentry->d_parent->d_inode, old_dentry);
++	vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret);
++
++out_unlock:
++	mutex_unlock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
++
++out_fput_both:
++	vxdprintk(VXD_CBIT(misc, 3),
++		"fput(new_file=%p[#%ld])", new_file,
++		atomic_long_read(&new_file->f_count));
++	fput(new_file);
++
++out_fput_old:
++	vxdprintk(VXD_CBIT(misc, 3),
++		"fput(old_file=%p[#%ld])", old_file,
++		atomic_long_read(&old_file->f_count));
++	fput(old_file);
++
++out_unlock_new:
++	mutex_unlock(&dir->d_inode->i_mutex);
++	if (!ret)
++		goto out_redo;
++
++	/* error path cleanup */
++	vfs_unlink(dir->d_inode, new_dentry);
++
++out_redo:
++	if (!redo)
++		goto out_rel_both;
++	/* lookup dentry once again */
++	/* old_nd.path is freed as old_path in out_rel_old */
++	ret = do_path_lookup(AT_FDCWD, pathname, LOOKUP_FOLLOW, &old_nd);
++	if (ret)
++		goto out_rel_both;
++
++	dput(new_dentry);
++	new_dentry = old_nd.path.dentry;
++	vxdprintk(VXD_CBIT(misc, 2),
++		"do_path_lookup(redo): %p [" VS_Q("%.*s") ":%d]",
++		new_dentry,
++		new_dentry->d_name.len, new_dentry->d_name.name,
++		new_dentry->d_name.len);
++	dget(new_dentry);
++
++out_rel_both:
++	path_put(&dir_nd.path);
++out_rel_old:
++	path_put(&old_path);
++out_free_path:
++	kfree(path);
++out:
++	if (ret) {
++		dput(new_dentry);
++		new_dentry = ERR_PTR(ret);
++	}
++	vxdprintk(VXD_CBIT(misc, 3),
++		"cow_break_link returning with %p [r=%d]",
++		new_dentry, mnt_get_count(real_mount(old_nd.path.mnt)));
++	return new_dentry;
++}
++
++#endif
++
++int	vx_info_mnt_namespace(struct mnt_namespace *ns, char *buffer)
++{
++	struct path path;
++	struct vfsmount *vmnt;
++	char *pstr, *root;
++	int length = 0;
++
++	pstr = kmalloc(PATH_MAX, GFP_KERNEL);
++	if (!pstr)
++		return 0;
++
++	vmnt = &ns->root->mnt;
++	path.mnt = vmnt;
++	path.dentry = vmnt->mnt_root;
++	root = d_path(&path, pstr, PATH_MAX - 2);
++	length = sprintf(buffer + length,
++		"Namespace:\t%p [#%u]\n"
++		"RootPath:\t%s\n",
++		ns, atomic_read(&ns->count),
++		root);
++	kfree(pstr);
++	return length;
++}
++
+ /* get the link contents into pagecache */
+ static char *page_getlink(struct dentry * dentry, struct page **ppage)
+ {
+@@ -3431,3 +3821,4 @@ EXPORT_SYMBOL(vfs_symlink);
+ EXPORT_SYMBOL(vfs_unlink);
+ EXPORT_SYMBOL(dentry_unhash);
+ EXPORT_SYMBOL(generic_readlink);
++EXPORT_SYMBOL(vx_info_mnt_namespace);
+diff -NurpP --minimal linux-3.3.8/fs/namespace.c linux-3.3.8-vs2.3.3.4/fs/namespace.c
+--- linux-3.3.8/fs/namespace.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/namespace.c	2012-02-24 17:38:42.000000000 +0100
+@@ -20,6 +20,11 @@
+ #include <linux/fs_struct.h>	/* get_fs_root et.al. */
+ #include <linux/fsnotify.h>	/* fsnotify_vfsmount_delete */
+ #include <linux/uaccess.h>
++#include <linux/vs_base.h>
++#include <linux/vs_context.h>
++#include <linux/vs_tag.h>
++#include <linux/vserver/space.h>
++#include <linux/vserver/global.h>
+ #include "pnode.h"
+ #include "internal.h"
+ 
+@@ -697,6 +702,10 @@ vfs_kern_mount(struct file_system_type *
+ 	if (!type)
+ 		return ERR_PTR(-ENODEV);
+ 
++	if ((type->fs_flags & FS_BINARY_MOUNTDATA) &&
++		!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT))
++		return ERR_PTR(-EPERM);
++
+ 	mnt = alloc_vfsmnt(name);
+ 	if (!mnt)
+ 		return ERR_PTR(-ENOMEM);
+@@ -745,6 +754,7 @@ static struct mount *clone_mnt(struct mo
+ 		mnt->mnt.mnt_root = dget(root);
+ 		mnt->mnt_mountpoint = mnt->mnt.mnt_root;
+ 		mnt->mnt_parent = mnt;
++		mnt->mnt_tag = old->mnt_tag;
+ 		br_write_lock(vfsmount_lock);
+ 		list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
+ 		br_write_unlock(vfsmount_lock);
+@@ -1209,7 +1219,7 @@ SYSCALL_DEFINE2(umount, char __user *, n
+ 		goto dput_and_out;
+ 
+ 	retval = -EPERM;
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
+ 		goto dput_and_out;
+ 
+ 	retval = do_umount(mnt, flags);
+@@ -1235,7 +1245,7 @@ SYSCALL_DEFINE1(oldumount, char __user *
+ 
+ static int mount_is_safe(struct path *path)
+ {
+-	if (capable(CAP_SYS_ADMIN))
++	if (vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
+ 		return 0;
+ 	return -EPERM;
+ #ifdef notyet
+@@ -1548,7 +1558,7 @@ static int do_change_type(struct path *p
+ 	int type;
+ 	int err = 0;
+ 
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!vx_capable(CAP_SYS_ADMIN, VXC_NAMESPACE))
+ 		return -EPERM;
+ 
+ 	if (path->dentry != path->mnt->mnt_root)
+@@ -1564,6 +1574,7 @@ static int do_change_type(struct path *p
+ 		if (err)
+ 			goto out_unlock;
+ 	}
++	// mnt->mnt_flags = mnt_flags;
+ 
+ 	br_write_lock(vfsmount_lock);
+ 	for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
+@@ -1579,12 +1590,14 @@ static int do_change_type(struct path *p
+  * do loopback mount.
+  */
+ static int do_loopback(struct path *path, char *old_name,
+-				int recurse)
++	tag_t tag, unsigned long flags, int mnt_flags)
+ {
+ 	LIST_HEAD(umount_list);
+ 	struct path old_path;
+ 	struct mount *mnt = NULL, *old;
+ 	int err = mount_is_safe(path);
++	int recurse = flags & MS_REC;
++
+ 	if (err)
+ 		return err;
+ 	if (!old_name || !*old_name)
+@@ -1652,13 +1665,13 @@ static int change_mount_flags(struct vfs
+  * on it - tough luck.
+  */
+ static int do_remount(struct path *path, int flags, int mnt_flags,
+-		      void *data)
++	void *data, xid_t xid)
+ {
+ 	int err;
+ 	struct super_block *sb = path->mnt->mnt_sb;
+ 	struct mount *mnt = real_mount(path->mnt);
+ 
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_REMOUNT))
+ 		return -EPERM;
+ 
+ 	if (!check_mnt(mnt))
+@@ -1707,7 +1720,7 @@ static int do_move_mount(struct path *pa
+ 	struct mount *p;
+ 	struct mount *old;
+ 	int err = 0;
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
+ 		return -EPERM;
+ 	if (!old_name || !*old_name)
+ 		return -EINVAL;
+@@ -1858,7 +1871,7 @@ static int do_new_mount(struct path *pat
+ 		return -EINVAL;
+ 
+ 	/* we need capabilities... */
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
+ 		return -EPERM;
+ 
+ 	mnt = do_kern_mount(type, flags, name, data);
+@@ -2128,6 +2141,7 @@ long do_mount(char *dev_name, char *dir_
+ 	struct path path;
+ 	int retval = 0;
+ 	int mnt_flags = 0;
++	tag_t tag = 0;
+ 
+ 	/* Discard magic */
+ 	if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
+@@ -2155,6 +2169,12 @@ long do_mount(char *dev_name, char *dir_
+ 	if (!(flags & MS_NOATIME))
+ 		mnt_flags |= MNT_RELATIME;
+ 
++	if (dx_parse_tag(data_page, &tag, 1, &mnt_flags, &flags)) {
++		/* FIXME: bind and re-mounts get the tag flag? */
++		if (flags & (MS_BIND|MS_REMOUNT))
++			flags |= MS_TAGID;
++	}
++
+ 	/* Separate the per-mountpoint flags */
+ 	if (flags & MS_NOSUID)
+ 		mnt_flags |= MNT_NOSUID;
+@@ -2171,15 +2191,17 @@ long do_mount(char *dev_name, char *dir_
+ 	if (flags & MS_RDONLY)
+ 		mnt_flags |= MNT_READONLY;
+ 
++	if (!capable(CAP_SYS_ADMIN))
++		mnt_flags |= MNT_NODEV;
+ 	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
+ 		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
+ 		   MS_STRICTATIME);
+ 
+ 	if (flags & MS_REMOUNT)
+ 		retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
+-				    data_page);
++				    data_page, tag);
+ 	else if (flags & MS_BIND)
+-		retval = do_loopback(&path, dev_name, flags & MS_REC);
++		retval = do_loopback(&path, dev_name, tag, flags, mnt_flags);
+ 	else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
+ 		retval = do_change_type(&path, flags);
+ 	else if (flags & MS_MOVE)
+@@ -2282,6 +2304,7 @@ static struct mnt_namespace *dup_mnt_ns(
+ 		q = next_mnt(q, new);
+ 	}
+ 	up_write(&namespace_sem);
++	atomic_inc(&vs_global_mnt_ns);
+ 
+ 	if (rootmnt)
+ 		mntput(rootmnt);
+@@ -2478,9 +2501,10 @@ SYSCALL_DEFINE2(pivot_root, const char _
+ 	error = -EINVAL;
+ 	new_mnt = real_mount(new.mnt);
+ 	root_mnt = real_mount(root.mnt);
+-	if (IS_MNT_SHARED(real_mount(old.mnt)) ||
++	if ((IS_MNT_SHARED(real_mount(old.mnt)) ||
+ 		IS_MNT_SHARED(new_mnt->mnt_parent) ||
+-		IS_MNT_SHARED(root_mnt->mnt_parent))
++		IS_MNT_SHARED(root_mnt->mnt_parent)) &&
++		!vx_flags(VXF_STATE_SETUP, 0))
+ 		goto out4;
+ 	if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
+ 		goto out4;
+@@ -2601,6 +2625,7 @@ void put_mnt_ns(struct mnt_namespace *ns
+ 	br_write_unlock(vfsmount_lock);
+ 	up_write(&namespace_sem);
+ 	release_mounts(&umount_list);
++	atomic_dec(&vs_global_mnt_ns);
+ 	kfree(ns);
+ }
+ 
+diff -NurpP --minimal linux-3.3.8/fs/nfs/client.c linux-3.3.8-vs2.3.3.4/fs/nfs/client.c
+--- linux-3.3.8/fs/nfs/client.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/nfs/client.c	2012-02-24 03:55:06.000000000 +0100
+@@ -784,6 +784,9 @@ static int nfs_init_server_rpcclient(str
+ 	if (server->flags & NFS_MOUNT_SOFT)
+ 		server->client->cl_softrtry = 1;
+ 
++	server->client->cl_tag = 0;
++	if (server->flags & NFS_MOUNT_TAGGED)
++		server->client->cl_tag = 1;
+ 	return 0;
+ }
+ 
+@@ -958,6 +961,10 @@ static void nfs_server_set_fsinfo(struct
+ 		server->acdirmin = server->acdirmax = 0;
+ 	}
+ 
++	/* FIXME: needs fsinfo
++	if (server->flags & NFS_MOUNT_TAGGED)
++		sb->s_flags |= MS_TAGGED;	*/
++
+ 	server->maxfilesize = fsinfo->maxfilesize;
+ 
+ 	server->time_delta = fsinfo->time_delta;
+diff -NurpP --minimal linux-3.3.8/fs/nfs/dir.c linux-3.3.8-vs2.3.3.4/fs/nfs/dir.c
+--- linux-3.3.8/fs/nfs/dir.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/nfs/dir.c	2012-02-24 03:55:06.000000000 +0100
+@@ -35,6 +35,7 @@
+ #include <linux/sched.h>
+ #include <linux/kmemleak.h>
+ #include <linux/xattr.h>
++#include <linux/vs_tag.h>
+ 
+ #include "delegation.h"
+ #include "iostat.h"
+@@ -1311,6 +1312,7 @@ static struct dentry *nfs_lookup(struct 
+ 	if (IS_ERR(res))
+ 		goto out_unblock_sillyrename;
+ 
++	dx_propagate_tag(nd, inode);
+ no_entry:
+ 	res = d_materialise_unique(dentry, inode);
+ 	if (res != NULL) {
+diff -NurpP --minimal linux-3.3.8/fs/nfs/inode.c linux-3.3.8-vs2.3.3.4/fs/nfs/inode.c
+--- linux-3.3.8/fs/nfs/inode.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/nfs/inode.c	2012-02-24 04:40:22.000000000 +0100
+@@ -39,6 +39,7 @@
+ #include <linux/slab.h>
+ #include <linux/compat.h>
+ #include <linux/freezer.h>
++#include <linux/vs_tag.h>
+ 
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
+@@ -274,6 +275,8 @@ nfs_fhget(struct super_block *sb, struct
+ 	if (inode->i_state & I_NEW) {
+ 		struct nfs_inode *nfsi = NFS_I(inode);
+ 		unsigned long now = jiffies;
++		uid_t uid;
++		gid_t gid;
+ 
+ 		/* We set i_ino for the few things that still rely on it,
+ 		 * such as stat(2) */
+@@ -322,8 +325,8 @@ nfs_fhget(struct super_block *sb, struct
+ 		inode->i_version = 0;
+ 		inode->i_size = 0;
+ 		clear_nlink(inode);
+-		inode->i_uid = -2;
+-		inode->i_gid = -2;
++		uid = -2;
++		gid = -2;
+ 		inode->i_blocks = 0;
+ 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+ 
+@@ -360,13 +363,13 @@ nfs_fhget(struct super_block *sb, struct
+ 		else if (nfs_server_capable(inode, NFS_CAP_NLINK))
+ 			nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ 		if (fattr->valid & NFS_ATTR_FATTR_OWNER)
+-			inode->i_uid = fattr->uid;
++			uid = fattr->uid;
+ 		else if (nfs_server_capable(inode, NFS_CAP_OWNER))
+ 			nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+ 				| NFS_INO_INVALID_ACCESS
+ 				| NFS_INO_INVALID_ACL;
+ 		if (fattr->valid & NFS_ATTR_FATTR_GROUP)
+-			inode->i_gid = fattr->gid;
++			gid = fattr->gid;
+ 		else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
+ 			nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+ 				| NFS_INO_INVALID_ACCESS
+@@ -379,6 +382,11 @@ nfs_fhget(struct super_block *sb, struct
+ 			 */
+ 			inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
+ 		}
++		inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++		inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++		inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, 0);
++				/* maybe fattr->xid someday */
++
+ 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
+ 		nfsi->attrtimeo_timestamp = now;
+ 		nfsi->access_cache = RB_ROOT;
+@@ -495,6 +503,8 @@ void nfs_setattr_update_inode(struct ino
+ 			inode->i_uid = attr->ia_uid;
+ 		if ((attr->ia_valid & ATTR_GID) != 0)
+ 			inode->i_gid = attr->ia_gid;
++		if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
++			inode->i_tag = attr->ia_tag;
+ 		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ 		spin_unlock(&inode->i_lock);
+ 	}
+@@ -944,6 +954,9 @@ static int nfs_check_inode_attributes(st
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 	loff_t cur_size, new_isize;
+ 	unsigned long invalid = 0;
++	uid_t uid;
++	gid_t gid;
++	tag_t tag;
+ 
+ 
+ 	/* Has the inode gone and changed behind our back? */
+@@ -967,13 +980,18 @@ static int nfs_check_inode_attributes(st
+ 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+ 	}
+ 
++	uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid);
++	gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid);
++	tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0);
++
+ 	/* Have any file permissions changed? */
+ 	if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
+ 		invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
+-	if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid)
++	if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && uid != fattr->uid)
+ 		invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
+-	if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid)
++	if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && gid != fattr->gid)
+ 		invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
++		/* maybe check for tag too? */
+ 
+ 	/* Has the link count changed? */
+ 	if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
+@@ -1210,6 +1228,9 @@ static int nfs_update_inode(struct inode
+ 	unsigned long invalid = 0;
+ 	unsigned long now = jiffies;
+ 	unsigned long save_cache_validity;
++	uid_t uid;
++	gid_t gid;
++	tag_t tag;
+ 
+ 	dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
+ 			__func__, inode->i_sb->s_id, inode->i_ino,
+@@ -1317,6 +1338,9 @@ static int nfs_update_inode(struct inode
+ 				| NFS_INO_REVAL_PAGECACHE
+ 				| NFS_INO_REVAL_FORCED);
+ 
++	uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
++	gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
++	tag = inode->i_tag;
+ 
+ 	if (fattr->valid & NFS_ATTR_FATTR_ATIME)
+ 		memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
+@@ -1338,9 +1362,9 @@ static int nfs_update_inode(struct inode
+ 				| NFS_INO_REVAL_FORCED);
+ 
+ 	if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
+-		if (inode->i_uid != fattr->uid) {
++		if (uid != fattr->uid) {
+ 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+-			inode->i_uid = fattr->uid;
++			uid = fattr->uid;
+ 		}
+ 	} else if (server->caps & NFS_CAP_OWNER)
+ 		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+@@ -1349,9 +1373,9 @@ static int nfs_update_inode(struct inode
+ 				| NFS_INO_REVAL_FORCED);
+ 
+ 	if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
+-		if (inode->i_gid != fattr->gid) {
++		if (gid != fattr->gid) {
+ 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+-			inode->i_gid = fattr->gid;
++			gid = fattr->gid;
+ 		}
+ 	} else if (server->caps & NFS_CAP_OWNER_GROUP)
+ 		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+@@ -1359,6 +1383,10 @@ static int nfs_update_inode(struct inode
+ 				| NFS_INO_INVALID_ACL
+ 				| NFS_INO_REVAL_FORCED);
+ 
++	inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++	inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++	inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, tag);
++
+ 	if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
+ 		if (inode->i_nlink != fattr->nlink) {
+ 			invalid |= NFS_INO_INVALID_ATTR;
+diff -NurpP --minimal linux-3.3.8/fs/nfs/nfs3xdr.c linux-3.3.8-vs2.3.3.4/fs/nfs/nfs3xdr.c
+--- linux-3.3.8/fs/nfs/nfs3xdr.c	2011-03-15 18:07:32.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/nfs/nfs3xdr.c	2012-02-24 03:55:06.000000000 +0100
+@@ -20,6 +20,7 @@
+ #include <linux/nfs3.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfsacl.h>
++#include <linux/vs_tag.h>
+ #include "internal.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_XDR
+@@ -562,7 +563,8 @@ static __be32 *xdr_decode_nfstime3(__be3
+  *		set_mtime	mtime;
+  *	};
+  */
+-static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
++static void encode_sattr3(struct xdr_stream *xdr,
++	const struct iattr *attr, int tag)
+ {
+ 	u32 nbytes;
+ 	__be32 *p;
+@@ -594,15 +596,19 @@ static void encode_sattr3(struct xdr_str
+ 	} else
+ 		*p++ = xdr_zero;
+ 
+-	if (attr->ia_valid & ATTR_UID) {
++	if (attr->ia_valid & ATTR_UID ||
++		(tag && (attr->ia_valid & ATTR_TAG))) {
+ 		*p++ = xdr_one;
+-		*p++ = cpu_to_be32(attr->ia_uid);
++		*p++ = cpu_to_be32(TAGINO_UID(tag,
++			attr->ia_uid, attr->ia_tag));
+ 	} else
+ 		*p++ = xdr_zero;
+ 
+-	if (attr->ia_valid & ATTR_GID) {
++	if (attr->ia_valid & ATTR_GID ||
++		(tag && (attr->ia_valid & ATTR_TAG))) {
+ 		*p++ = xdr_one;
+-		*p++ = cpu_to_be32(attr->ia_gid);
++		*p++ = cpu_to_be32(TAGINO_GID(tag,
++			attr->ia_gid, attr->ia_tag));
+ 	} else
+ 		*p++ = xdr_zero;
+ 
+@@ -878,7 +884,7 @@ static void nfs3_xdr_enc_setattr3args(st
+ 				      const struct nfs3_sattrargs *args)
+ {
+ 	encode_nfs_fh3(xdr, args->fh);
+-	encode_sattr3(xdr, args->sattr);
++	encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
+ 	encode_sattrguard3(xdr, args);
+ }
+ 
+@@ -1028,13 +1034,13 @@ static void nfs3_xdr_enc_write3args(stru
+  *	};
+  */
+ static void encode_createhow3(struct xdr_stream *xdr,
+-			      const struct nfs3_createargs *args)
++	const struct nfs3_createargs *args, int tag)
+ {
+ 	encode_uint32(xdr, args->createmode);
+ 	switch (args->createmode) {
+ 	case NFS3_CREATE_UNCHECKED:
+ 	case NFS3_CREATE_GUARDED:
+-		encode_sattr3(xdr, args->sattr);
++		encode_sattr3(xdr, args->sattr, tag);
+ 		break;
+ 	case NFS3_CREATE_EXCLUSIVE:
+ 		encode_createverf3(xdr, args->verifier);
+@@ -1049,7 +1055,7 @@ static void nfs3_xdr_enc_create3args(str
+ 				     const struct nfs3_createargs *args)
+ {
+ 	encode_diropargs3(xdr, args->fh, args->name, args->len);
+-	encode_createhow3(xdr, args);
++	encode_createhow3(xdr, args, req->rq_task->tk_client->cl_tag);
+ }
+ 
+ /*
+@@ -1065,7 +1071,7 @@ static void nfs3_xdr_enc_mkdir3args(stru
+ 				    const struct nfs3_mkdirargs *args)
+ {
+ 	encode_diropargs3(xdr, args->fh, args->name, args->len);
+-	encode_sattr3(xdr, args->sattr);
++	encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
+ }
+ 
+ /*
+@@ -1082,9 +1088,9 @@ static void nfs3_xdr_enc_mkdir3args(stru
+  *	};
+  */
+ static void encode_symlinkdata3(struct xdr_stream *xdr,
+-				const struct nfs3_symlinkargs *args)
++	const struct nfs3_symlinkargs *args, int tag)
+ {
+-	encode_sattr3(xdr, args->sattr);
++	encode_sattr3(xdr, args->sattr, tag);
+ 	encode_nfspath3(xdr, args->pages, args->pathlen);
+ }
+ 
+@@ -1093,7 +1099,7 @@ static void nfs3_xdr_enc_symlink3args(st
+ 				      const struct nfs3_symlinkargs *args)
+ {
+ 	encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
+-	encode_symlinkdata3(xdr, args);
++	encode_symlinkdata3(xdr, args, req->rq_task->tk_client->cl_tag);
+ }
+ 
+ /*
+@@ -1121,24 +1127,24 @@ static void nfs3_xdr_enc_symlink3args(st
+  *	};
+  */
+ static void encode_devicedata3(struct xdr_stream *xdr,
+-			       const struct nfs3_mknodargs *args)
++	const struct nfs3_mknodargs *args, int tag)
+ {
+-	encode_sattr3(xdr, args->sattr);
++	encode_sattr3(xdr, args->sattr, tag);
+ 	encode_specdata3(xdr, args->rdev);
+ }
+ 
+ static void encode_mknoddata3(struct xdr_stream *xdr,
+-			      const struct nfs3_mknodargs *args)
++	const struct nfs3_mknodargs *args, int tag)
+ {
+ 	encode_ftype3(xdr, args->type);
+ 	switch (args->type) {
+ 	case NF3CHR:
+ 	case NF3BLK:
+-		encode_devicedata3(xdr, args);
++		encode_devicedata3(xdr, args, tag);
+ 		break;
+ 	case NF3SOCK:
+ 	case NF3FIFO:
+-		encode_sattr3(xdr, args->sattr);
++		encode_sattr3(xdr, args->sattr, tag);
+ 		break;
+ 	case NF3REG:
+ 	case NF3DIR:
+@@ -1153,7 +1159,7 @@ static void nfs3_xdr_enc_mknod3args(stru
+ 				    const struct nfs3_mknodargs *args)
+ {
+ 	encode_diropargs3(xdr, args->fh, args->name, args->len);
+-	encode_mknoddata3(xdr, args);
++	encode_mknoddata3(xdr, args, req->rq_task->tk_client->cl_tag);
+ }
+ 
+ /*
+diff -NurpP --minimal linux-3.3.8/fs/nfs/super.c linux-3.3.8-vs2.3.3.4/fs/nfs/super.c
+--- linux-3.3.8/fs/nfs/super.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/nfs/super.c	2012-05-09 04:08:08.000000000 +0200
+@@ -52,6 +52,7 @@
+ #include <linux/nfs_xdr.h>
+ #include <linux/magic.h>
+ #include <linux/parser.h>
++#include <linux/vs_tag.h>
+ 
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
+@@ -86,6 +87,7 @@ enum {
+ 	Opt_sharecache, Opt_nosharecache,
+ 	Opt_resvport, Opt_noresvport,
+ 	Opt_fscache, Opt_nofscache,
++	Opt_tag, Opt_notag,
+ 
+ 	/* Mount options that take integer arguments */
+ 	Opt_port,
+@@ -99,6 +101,7 @@ enum {
+ 	Opt_mountvers,
+ 	Opt_nfsvers,
+ 	Opt_minorversion,
++	Opt_tagid,
+ 
+ 	/* Mount options that take string arguments */
+ 	Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
+@@ -179,6 +182,10 @@ static const match_table_t nfs_mount_opt
+ 	{ Opt_fscache_uniq, "fsc=%s" },
+ 	{ Opt_local_lock, "local_lock=%s" },
+ 
++	{ Opt_tag, "tag" },
++	{ Opt_notag, "notag" },
++	{ Opt_tagid, "tagid=%u" },
++
+ 	{ Opt_err, NULL }
+ };
+ 
+@@ -649,6 +656,7 @@ static void nfs_show_mount_options(struc
+ 		{ NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
+ 		{ NFS_MOUNT_UNSHARED, ",nosharecache", "" },
+ 		{ NFS_MOUNT_NORESVPORT, ",noresvport", "" },
++		{ NFS_MOUNT_TAGGED, ",tag", "" },
+ 		{ 0, NULL, NULL }
+ 	};
+ 	const struct proc_nfs_info *nfs_infop;
+@@ -1216,6 +1224,14 @@ static int nfs_parse_mount_options(char 
+ 			kfree(mnt->fscache_uniq);
+ 			mnt->fscache_uniq = NULL;
+ 			break;
++#ifndef CONFIG_TAGGING_NONE
++		case Opt_tag:
++			mnt->flags |= NFS_MOUNT_TAGGED;
++			break;
++		case Opt_notag:
++			mnt->flags &= ~NFS_MOUNT_TAGGED;
++			break;
++#endif
+ 
+ 		/*
+ 		 * options that take numeric values
+@@ -1322,6 +1338,12 @@ static int nfs_parse_mount_options(char 
+ 				goto out_invalid_value;
+ 			mnt->minorversion = option;
+ 			break;
++#ifdef CONFIG_PROPAGATE
++		case Opt_tagid:
++			/* use args[0] */
++			nfs_data.flags |= NFS_MOUNT_TAGGED;
++			break;
++#endif
+ 
+ 		/*
+ 		 * options that take text values
+diff -NurpP --minimal linux-3.3.8/fs/nfsd/auth.c linux-3.3.8-vs2.3.3.4/fs/nfsd/auth.c
+--- linux-3.3.8/fs/nfsd/auth.c	2010-02-25 11:52:05.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/nfsd/auth.c	2012-02-24 03:55:06.000000000 +0100
+@@ -1,6 +1,7 @@
+ /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
+ 
+ #include <linux/sched.h>
++#include <linux/vs_tag.h>
+ #include "nfsd.h"
+ #include "auth.h"
+ 
+@@ -36,6 +37,9 @@ int nfsd_setuser(struct svc_rqst *rqstp,
+ 
+ 	new->fsuid = rqstp->rq_cred.cr_uid;
+ 	new->fsgid = rqstp->rq_cred.cr_gid;
++	/* FIXME: this desperately needs a tag :)
++	new->xid = (xid_t)INOTAG_TAG(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid, 0);
++			*/
+ 
+ 	rqgi = rqstp->rq_cred.cr_group_info;
+ 
+diff -NurpP --minimal linux-3.3.8/fs/nfsd/nfs3xdr.c linux-3.3.8-vs2.3.3.4/fs/nfsd/nfs3xdr.c
+--- linux-3.3.8/fs/nfsd/nfs3xdr.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/nfsd/nfs3xdr.c	2012-04-30 19:34:37.000000000 +0200
+@@ -7,6 +7,7 @@
+  */
+ 
+ #include <linux/namei.h>
++#include <linux/vs_tag.h>
+ #include "xdr3.h"
+ #include "auth.h"
+ 
+@@ -95,6 +96,8 @@ static __be32 *
+ decode_sattr3(__be32 *p, struct iattr *iap)
+ {
+ 	u32	tmp;
++	uid_t	uid = 0;
++	gid_t	gid = 0;
+ 
+ 	iap->ia_valid = 0;
+ 
+@@ -104,12 +107,15 @@ decode_sattr3(__be32 *p, struct iattr *i
+ 	}
+ 	if (*p++) {
+ 		iap->ia_valid |= ATTR_UID;
+-		iap->ia_uid = ntohl(*p++);
++		uid = ntohl(*p++);
+ 	}
+ 	if (*p++) {
+ 		iap->ia_valid |= ATTR_GID;
+-		iap->ia_gid = ntohl(*p++);
++		gid = ntohl(*p++);
+ 	}
++	iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid);
++	iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid);
++	iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0);
+ 	if (*p++) {
+ 		u64	newsize;
+ 
+@@ -165,8 +171,12 @@ encode_fattr3(struct svc_rqst *rqstp, __
+ 	*p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
+ 	*p++ = htonl((u32) stat->mode);
+ 	*p++ = htonl((u32) stat->nlink);
+-	*p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
+-	*p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
++	*p++ = htonl((u32) nfsd_ruid(rqstp,
++		TAGINO_UID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
++		stat->uid, stat->tag)));
++	*p++ = htonl((u32) nfsd_rgid(rqstp,
++		TAGINO_GID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
++		stat->gid, stat->tag)));
+ 	if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
+ 		p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
+ 	} else {
+diff -NurpP --minimal linux-3.3.8/fs/nfsd/nfs4xdr.c linux-3.3.8-vs2.3.3.4/fs/nfsd/nfs4xdr.c
+--- linux-3.3.8/fs/nfsd/nfs4xdr.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/nfsd/nfs4xdr.c	2012-04-30 19:34:37.000000000 +0200
+@@ -46,6 +46,7 @@
+ #include <linux/utsname.h>
+ #include <linux/pagemap.h>
+ #include <linux/sunrpc/svcauth_gss.h>
++#include <linux/vs_tag.h>
+ 
+ #include "idmap.h"
+ #include "acl.h"
+@@ -2327,14 +2328,18 @@ out_acl:
+ 		WRITE32(stat.nlink);
+ 	}
+ 	if (bmval1 & FATTR4_WORD1_OWNER) {
+-		status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen);
++		status = nfsd4_encode_user(rqstp,
++			TAGINO_UID(DX_TAG(dentry->d_inode),
++			stat.uid, stat.tag), &p, &buflen);
+ 		if (status == nfserr_resource)
+ 			goto out_resource;
+ 		if (status)
+ 			goto out;
+ 	}
+ 	if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
+-		status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen);
++		status = nfsd4_encode_group(rqstp,
++			TAGINO_GID(DX_TAG(dentry->d_inode),
++			stat.gid, stat.tag), &p, &buflen);
+ 		if (status == nfserr_resource)
+ 			goto out_resource;
+ 		if (status)
+diff -NurpP --minimal linux-3.3.8/fs/nfsd/nfsxdr.c linux-3.3.8-vs2.3.3.4/fs/nfsd/nfsxdr.c
+--- linux-3.3.8/fs/nfsd/nfsxdr.c	2011-05-22 16:17:53.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/nfsd/nfsxdr.c	2012-02-24 03:55:06.000000000 +0100
+@@ -6,6 +6,7 @@
+ 
+ #include "xdr.h"
+ #include "auth.h"
++#include <linux/vs_tag.h>
+ 
+ #define NFSDDBG_FACILITY		NFSDDBG_XDR
+ 
+@@ -88,6 +89,8 @@ static __be32 *
+ decode_sattr(__be32 *p, struct iattr *iap)
+ {
+ 	u32	tmp, tmp1;
++	uid_t	uid = 0;
++	gid_t	gid = 0;
+ 
+ 	iap->ia_valid = 0;
+ 
+@@ -101,12 +104,15 @@ decode_sattr(__be32 *p, struct iattr *ia
+ 	}
+ 	if ((tmp = ntohl(*p++)) != (u32)-1) {
+ 		iap->ia_valid |= ATTR_UID;
+-		iap->ia_uid = tmp;
++		uid = tmp;
+ 	}
+ 	if ((tmp = ntohl(*p++)) != (u32)-1) {
+ 		iap->ia_valid |= ATTR_GID;
+-		iap->ia_gid = tmp;
++		gid = tmp;
+ 	}
++	iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid);
++	iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid);
++	iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0);
+ 	if ((tmp = ntohl(*p++)) != (u32)-1) {
+ 		iap->ia_valid |= ATTR_SIZE;
+ 		iap->ia_size = tmp;
+@@ -151,8 +157,10 @@ encode_fattr(struct svc_rqst *rqstp, __b
+ 	*p++ = htonl(nfs_ftypes[type >> 12]);
+ 	*p++ = htonl((u32) stat->mode);
+ 	*p++ = htonl((u32) stat->nlink);
+-	*p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
+-	*p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
++	*p++ = htonl((u32) nfsd_ruid(rqstp,
++		TAGINO_UID(DX_TAG(dentry->d_inode), stat->uid, stat->tag)));
++	*p++ = htonl((u32) nfsd_rgid(rqstp,
++		TAGINO_GID(DX_TAG(dentry->d_inode), stat->gid, stat->tag)));
+ 
+ 	if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
+ 		*p++ = htonl(NFS_MAXPATHLEN);
+diff -NurpP --minimal linux-3.3.8/fs/ocfs2/dlmglue.c linux-3.3.8-vs2.3.3.4/fs/ocfs2/dlmglue.c
+--- linux-3.3.8/fs/ocfs2/dlmglue.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ocfs2/dlmglue.c	2012-02-24 03:55:06.000000000 +0100
+@@ -2047,6 +2047,7 @@ static void __ocfs2_stuff_meta_lvb(struc
+ 	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
+ 	lvb->lvb_iuid      = cpu_to_be32(inode->i_uid);
+ 	lvb->lvb_igid      = cpu_to_be32(inode->i_gid);
++	lvb->lvb_itag      = cpu_to_be16(inode->i_tag);
+ 	lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
+ 	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
+ 	lvb->lvb_iatime_packed  =
+@@ -2097,6 +2098,7 @@ static void ocfs2_refresh_inode_from_lvb
+ 
+ 	inode->i_uid     = be32_to_cpu(lvb->lvb_iuid);
+ 	inode->i_gid     = be32_to_cpu(lvb->lvb_igid);
++	inode->i_tag     = be16_to_cpu(lvb->lvb_itag);
+ 	inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
+ 	set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
+ 	ocfs2_unpack_timespec(&inode->i_atime,
+diff -NurpP --minimal linux-3.3.8/fs/ocfs2/dlmglue.h linux-3.3.8-vs2.3.3.4/fs/ocfs2/dlmglue.h
+--- linux-3.3.8/fs/ocfs2/dlmglue.h	2010-10-21 13:07:50.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/ocfs2/dlmglue.h	2012-02-24 03:55:06.000000000 +0100
+@@ -46,7 +46,8 @@ struct ocfs2_meta_lvb {
+ 	__be16       lvb_inlink;
+ 	__be32       lvb_iattr;
+ 	__be32       lvb_igeneration;
+-	__be32       lvb_reserved2;
++	__be16       lvb_itag;
++	__be16       lvb_reserved2;
+ };
+ 
+ #define OCFS2_QINFO_LVB_VERSION 1
+diff -NurpP --minimal linux-3.3.8/fs/ocfs2/file.c linux-3.3.8-vs2.3.3.4/fs/ocfs2/file.c
+--- linux-3.3.8/fs/ocfs2/file.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ocfs2/file.c	2012-02-24 03:55:06.000000000 +0100
+@@ -1123,7 +1123,7 @@ int ocfs2_setattr(struct dentry *dentry,
+ 		attr->ia_valid &= ~ATTR_SIZE;
+ 
+ #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
+-			   | ATTR_GID | ATTR_UID | ATTR_MODE)
++			   | ATTR_GID | ATTR_UID | ATTR_TAG | ATTR_MODE)
+ 	if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
+ 		return 0;
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ocfs2/inode.c linux-3.3.8-vs2.3.3.4/fs/ocfs2/inode.c
+--- linux-3.3.8/fs/ocfs2/inode.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ocfs2/inode.c	2012-02-24 03:55:06.000000000 +0100
+@@ -28,6 +28,7 @@
+ #include <linux/highmem.h>
+ #include <linux/pagemap.h>
+ #include <linux/quotaops.h>
++#include <linux/vs_tag.h>
+ 
+ #include <asm/byteorder.h>
+ 
+@@ -78,11 +79,13 @@ void ocfs2_set_inode_flags(struct inode 
+ {
+ 	unsigned int flags = OCFS2_I(inode)->ip_attr;
+ 
+-	inode->i_flags &= ~(S_IMMUTABLE |
++	inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
+ 		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
+ 
+ 	if (flags & OCFS2_IMMUTABLE_FL)
+ 		inode->i_flags |= S_IMMUTABLE;
++	if (flags & OCFS2_IXUNLINK_FL)
++		inode->i_flags |= S_IXUNLINK;
+ 
+ 	if (flags & OCFS2_SYNC_FL)
+ 		inode->i_flags |= S_SYNC;
+@@ -92,25 +95,44 @@ void ocfs2_set_inode_flags(struct inode 
+ 		inode->i_flags |= S_NOATIME;
+ 	if (flags & OCFS2_DIRSYNC_FL)
+ 		inode->i_flags |= S_DIRSYNC;
++
++	inode->i_vflags &= ~(V_BARRIER | V_COW);
++
++	if (flags & OCFS2_BARRIER_FL)
++		inode->i_vflags |= V_BARRIER;
++	if (flags & OCFS2_COW_FL)
++		inode->i_vflags |= V_COW;
+ }
+ 
+ /* Propagate flags from i_flags to OCFS2_I(inode)->ip_attr */
+ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
+ {
+ 	unsigned int flags = oi->vfs_inode.i_flags;
++	unsigned int vflags = oi->vfs_inode.i_vflags;
++
++	oi->ip_attr &= ~(OCFS2_SYNC_FL | OCFS2_APPEND_FL |
++			OCFS2_IMMUTABLE_FL | OCFS2_IXUNLINK_FL |
++			OCFS2_NOATIME_FL | OCFS2_DIRSYNC_FL |
++			OCFS2_BARRIER_FL | OCFS2_COW_FL);
++
++	if (flags & S_IMMUTABLE)
++		oi->ip_attr |= OCFS2_IMMUTABLE_FL;
++	if (flags & S_IXUNLINK)
++		oi->ip_attr |= OCFS2_IXUNLINK_FL;
+ 
+-	oi->ip_attr &= ~(OCFS2_SYNC_FL|OCFS2_APPEND_FL|
+-			OCFS2_IMMUTABLE_FL|OCFS2_NOATIME_FL|OCFS2_DIRSYNC_FL);
+ 	if (flags & S_SYNC)
+ 		oi->ip_attr |= OCFS2_SYNC_FL;
+ 	if (flags & S_APPEND)
+ 		oi->ip_attr |= OCFS2_APPEND_FL;
+-	if (flags & S_IMMUTABLE)
+-		oi->ip_attr |= OCFS2_IMMUTABLE_FL;
+ 	if (flags & S_NOATIME)
+ 		oi->ip_attr |= OCFS2_NOATIME_FL;
+ 	if (flags & S_DIRSYNC)
+ 		oi->ip_attr |= OCFS2_DIRSYNC_FL;
++
++	if (vflags & V_BARRIER)
++		oi->ip_attr |= OCFS2_BARRIER_FL;
++	if (vflags & V_COW)
++		oi->ip_attr |= OCFS2_COW_FL;
+ }
+ 
+ struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
+@@ -241,6 +263,8 @@ void ocfs2_populate_inode(struct inode *
+ 	struct super_block *sb;
+ 	struct ocfs2_super *osb;
+ 	int use_plocks = 1;
++	uid_t uid;
++	gid_t gid;
+ 
+ 	sb = inode->i_sb;
+ 	osb = OCFS2_SB(sb);
+@@ -269,8 +293,12 @@ void ocfs2_populate_inode(struct inode *
+ 	inode->i_generation = le32_to_cpu(fe->i_generation);
+ 	inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
+ 	inode->i_mode = le16_to_cpu(fe->i_mode);
+-	inode->i_uid = le32_to_cpu(fe->i_uid);
+-	inode->i_gid = le32_to_cpu(fe->i_gid);
++	uid = le32_to_cpu(fe->i_uid);
++	gid = le32_to_cpu(fe->i_gid);
++	inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++	inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++	inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
++		/* le16_to_cpu(raw_inode->i_raw_tag)i */ 0);
+ 
+ 	/* Fast symlinks will have i_size but no allocated clusters. */
+ 	if (S_ISLNK(inode->i_mode) && !fe->i_clusters)
+diff -NurpP --minimal linux-3.3.8/fs/ocfs2/inode.h linux-3.3.8-vs2.3.3.4/fs/ocfs2/inode.h
+--- linux-3.3.8/fs/ocfs2/inode.h	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ocfs2/inode.h	2012-02-24 03:55:06.000000000 +0100
+@@ -154,6 +154,7 @@ struct buffer_head *ocfs2_bread(struct i
+ 
+ void ocfs2_set_inode_flags(struct inode *inode);
+ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
++int ocfs2_sync_flags(struct inode *inode, int, int);
+ 
+ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
+ {
+diff -NurpP --minimal linux-3.3.8/fs/ocfs2/ioctl.c linux-3.3.8-vs2.3.3.4/fs/ocfs2/ioctl.c
+--- linux-3.3.8/fs/ocfs2/ioctl.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ocfs2/ioctl.c	2012-02-24 03:55:06.000000000 +0100
+@@ -78,7 +78,41 @@ static int ocfs2_get_inode_attr(struct i
+ 	return status;
+ }
+ 
+-static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
++int ocfs2_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
++	struct buffer_head *bh = NULL;
++	handle_t *handle = NULL;
++	int status;
++
++	status = ocfs2_inode_lock(inode, &bh, 1);
++	if (status < 0) {
++		mlog_errno(status);
++		return status;
++	}
++	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
++	if (IS_ERR(handle)) {
++		status = PTR_ERR(handle);
++		mlog_errno(status);
++		goto bail_unlock;
++	}
++
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	ocfs2_get_inode_flags(OCFS2_I(inode));
++
++	status = ocfs2_mark_inode_dirty(handle, inode, bh);
++	if (status < 0)
++		mlog_errno(status);
++
++	ocfs2_commit_trans(osb, handle);
++bail_unlock:
++	ocfs2_inode_unlock(inode, 1);
++	brelse(bh);
++	return status;
++}
++
++int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
+ 				unsigned mask)
+ {
+ 	struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
+@@ -103,6 +137,11 @@ static int ocfs2_set_inode_attr(struct i
+ 	if (!S_ISDIR(inode->i_mode))
+ 		flags &= ~OCFS2_DIRSYNC_FL;
+ 
++	if (IS_BARRIER(inode)) {
++		vxwprintk_task(1, "messing with the barrier.");
++		goto bail_unlock;
++	}
++
+ 	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+ 	if (IS_ERR(handle)) {
+ 		status = PTR_ERR(handle);
+@@ -881,6 +920,7 @@ bail:
+ 	return status;
+ }
+ 
++
+ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_path.dentry->d_inode;
+diff -NurpP --minimal linux-3.3.8/fs/ocfs2/namei.c linux-3.3.8-vs2.3.3.4/fs/ocfs2/namei.c
+--- linux-3.3.8/fs/ocfs2/namei.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ocfs2/namei.c	2012-03-19 20:52:10.000000000 +0100
+@@ -41,6 +41,7 @@
+ #include <linux/slab.h>
+ #include <linux/highmem.h>
+ #include <linux/quotaops.h>
++#include <linux/vs_tag.h>
+ 
+ #include <cluster/masklog.h>
+ 
+@@ -475,6 +476,7 @@ static int __ocfs2_mknod_locked(struct i
+ 	struct ocfs2_dinode *fe = NULL;
+ 	struct ocfs2_extent_list *fel;
+ 	u16 feat;
++	tag_t tag;
+ 
+ 	*new_fe_bh = NULL;
+ 
+@@ -512,8 +514,11 @@ static int __ocfs2_mknod_locked(struct i
+ 	fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
+ 	fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
+ 	fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
+-	fe->i_uid = cpu_to_le32(inode->i_uid);
+-	fe->i_gid = cpu_to_le32(inode->i_gid);
++
++	tag = dx_current_fstag(osb->sb);
++	fe->i_uid = cpu_to_le32(TAGINO_UID(DX_TAG(inode), inode->i_uid, tag));
++	fe->i_gid = cpu_to_le32(TAGINO_GID(DX_TAG(inode), inode->i_gid, tag));
++	inode->i_tag = tag;
+ 	fe->i_mode = cpu_to_le16(inode->i_mode);
+ 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+ 		fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
+diff -NurpP --minimal linux-3.3.8/fs/ocfs2/ocfs2.h linux-3.3.8-vs2.3.3.4/fs/ocfs2/ocfs2.h
+--- linux-3.3.8/fs/ocfs2/ocfs2.h	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ocfs2/ocfs2.h	2012-02-24 03:55:06.000000000 +0100
+@@ -272,6 +272,7 @@ enum ocfs2_mount_options
+ 						     writes */
+ 	OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */
+ 	OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */
++	OCFS2_MOUNT_TAGGED = 1 << 15, /* use tagging */
+ };
+ 
+ #define OCFS2_OSB_SOFT_RO			0x0001
+diff -NurpP --minimal linux-3.3.8/fs/ocfs2/ocfs2_fs.h linux-3.3.8-vs2.3.3.4/fs/ocfs2/ocfs2_fs.h
+--- linux-3.3.8/fs/ocfs2/ocfs2_fs.h	2011-05-22 16:17:53.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/ocfs2/ocfs2_fs.h	2012-02-24 03:55:06.000000000 +0100
+@@ -266,6 +266,11 @@
+ #define OCFS2_TOPDIR_FL			FS_TOPDIR_FL	/* Top of directory hierarchies*/
+ #define OCFS2_RESERVED_FL		FS_RESERVED_FL	/* reserved for ext2 lib */
+ 
++#define OCFS2_IXUNLINK_FL		FS_IXUNLINK_FL	/* Immutable invert on unlink */
++
++#define OCFS2_BARRIER_FL		FS_BARRIER_FL	/* Barrier for chroot() */
++#define OCFS2_COW_FL			FS_COW_FL	/* Copy on Write marker */
++
+ #define OCFS2_FL_VISIBLE		FS_FL_USER_VISIBLE	/* User visible flags */
+ #define OCFS2_FL_MODIFIABLE		FS_FL_USER_MODIFIABLE	/* User modifiable flags */
+ 
+diff -NurpP --minimal linux-3.3.8/fs/ocfs2/super.c linux-3.3.8-vs2.3.3.4/fs/ocfs2/super.c
+--- linux-3.3.8/fs/ocfs2/super.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/ocfs2/super.c	2012-02-24 03:55:06.000000000 +0100
+@@ -185,6 +185,7 @@ enum {
+ 	Opt_coherency_full,
+ 	Opt_resv_level,
+ 	Opt_dir_resv_level,
++	Opt_tag, Opt_notag, Opt_tagid,
+ 	Opt_err,
+ };
+ 
+@@ -216,6 +217,9 @@ static const match_table_t tokens = {
+ 	{Opt_coherency_full, "coherency=full"},
+ 	{Opt_resv_level, "resv_level=%u"},
+ 	{Opt_dir_resv_level, "dir_resv_level=%u"},
++	{Opt_tag, "tag"},
++	{Opt_notag, "notag"},
++	{Opt_tagid, "tagid=%u"},
+ 	{Opt_err, NULL}
+ };
+ 
+@@ -662,6 +666,13 @@ static int ocfs2_remount(struct super_bl
+ 		goto out;
+ 	}
+ 
++	if ((osb->s_mount_opt & OCFS2_MOUNT_TAGGED) !=
++	    (parsed_options.mount_opt & OCFS2_MOUNT_TAGGED)) {
++		ret = -EINVAL;
++		mlog(ML_ERROR, "Cannot change tagging on remount\n");
++		goto out;
++	}
++
+ 	/* We're going to/from readonly mode. */
+ 	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
+ 		/* Disable quota accounting before remounting RO */
+@@ -1177,6 +1188,9 @@ static int ocfs2_fill_super(struct super
+ 
+ 	ocfs2_complete_mount_recovery(osb);
+ 
++	if (osb->s_mount_opt & OCFS2_MOUNT_TAGGED)
++		sb->s_flags |= MS_TAGGED;
++
+ 	if (ocfs2_mount_local(osb))
+ 		snprintf(nodestr, sizeof(nodestr), "local");
+ 	else
+@@ -1506,6 +1520,20 @@ static int ocfs2_parse_options(struct su
+ 			    option < OCFS2_MAX_RESV_LEVEL)
+ 				mopt->dir_resv_level = option;
+ 			break;
++#ifndef CONFIG_TAGGING_NONE
++		case Opt_tag:
++			mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
++			break;
++		case Opt_notag:
++			mopt->mount_opt &= ~OCFS2_MOUNT_TAGGED;
++			break;
++#endif
++#ifdef CONFIG_PROPAGATE
++		case Opt_tagid:
++			/* use args[0] */
++			mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
++			break;
++#endif
+ 		default:
+ 			mlog(ML_ERROR,
+ 			     "Unrecognized mount option \"%s\" "
+diff -NurpP --minimal linux-3.3.8/fs/open.c linux-3.3.8-vs2.3.3.4/fs/open.c
+--- linux-3.3.8/fs/open.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/open.c	2012-02-24 03:55:06.000000000 +0100
+@@ -30,6 +30,11 @@
+ #include <linux/fs_struct.h>
+ #include <linux/ima.h>
+ #include <linux/dnotify.h>
++#include <linux/vs_base.h>
++#include <linux/vs_limit.h>
++#include <linux/vs_tag.h>
++#include <linux/vs_cowbl.h>
++#include <linux/vserver/dlimit.h>
+ 
+ #include "internal.h"
+ 
+@@ -74,6 +79,12 @@ static long do_sys_truncate(const char _
+ 	error = user_path(pathname, &path);
+ 	if (error)
+ 		goto out;
++
++#ifdef CONFIG_VSERVER_COWBL
++	error = cow_check_and_break(&path);
++	if (error)
++		goto dput_and_out;
++#endif
+ 	inode = path.dentry->d_inode;
+ 
+ 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
+@@ -489,6 +500,10 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, cons
+ 
+ 	error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
+ 	if (!error) {
++#ifdef CONFIG_VSERVER_COWBL
++		error = cow_check_and_break(&path);
++		if (!error)
++#endif
+ 		error = chmod_common(&path, mode);
+ 		path_put(&path);
+ 	}
+@@ -509,11 +524,11 @@ static int chown_common(struct path *pat
+ 	newattrs.ia_valid =  ATTR_CTIME;
+ 	if (user != (uid_t) -1) {
+ 		newattrs.ia_valid |= ATTR_UID;
+-		newattrs.ia_uid = user;
++		newattrs.ia_uid = dx_map_uid(user);
+ 	}
+ 	if (group != (gid_t) -1) {
+ 		newattrs.ia_valid |= ATTR_GID;
+-		newattrs.ia_gid = group;
++		newattrs.ia_gid = dx_map_gid(group);
+ 	}
+ 	if (!S_ISDIR(inode->i_mode))
+ 		newattrs.ia_valid |=
+@@ -538,6 +553,10 @@ SYSCALL_DEFINE3(chown, const char __user
+ 	error = mnt_want_write(path.mnt);
+ 	if (error)
+ 		goto out_release;
++#ifdef CONFIG_VSERVER_COWBL
++	error = cow_check_and_break(&path);
++	if (!error)
++#endif
+ 	error = chown_common(&path, user, group);
+ 	mnt_drop_write(path.mnt);
+ out_release:
+@@ -565,6 +584,10 @@ SYSCALL_DEFINE5(fchownat, int, dfd, cons
+ 	error = mnt_want_write(path.mnt);
+ 	if (error)
+ 		goto out_release;
++#ifdef CONFIG_VSERVER_COWBL
++	error = cow_check_and_break(&path);
++	if (!error)
++#endif
+ 	error = chown_common(&path, user, group);
+ 	mnt_drop_write(path.mnt);
+ out_release:
+@@ -584,6 +607,10 @@ SYSCALL_DEFINE3(lchown, const char __use
+ 	error = mnt_want_write(path.mnt);
+ 	if (error)
+ 		goto out_release;
++#ifdef CONFIG_VSERVER_COWBL
++	error = cow_check_and_break(&path);
++	if (!error)
++#endif
+ 	error = chown_common(&path, user, group);
+ 	mnt_drop_write(path.mnt);
+ out_release:
+@@ -839,6 +866,7 @@ static void __put_unused_fd(struct files
+ 	__FD_CLR(fd, fdt->open_fds);
+ 	if (fd < files->next_fd)
+ 		files->next_fd = fd;
++	vx_openfd_dec(fd);
+ }
+ 
+ void put_unused_fd(unsigned int fd)
+diff -NurpP --minimal linux-3.3.8/fs/proc/array.c linux-3.3.8-vs2.3.3.4/fs/proc/array.c
+--- linux-3.3.8/fs/proc/array.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/proc/array.c	2012-02-24 03:55:06.000000000 +0100
+@@ -81,6 +81,8 @@
+ #include <linux/pid_namespace.h>
+ #include <linux/ptrace.h>
+ #include <linux/tracehook.h>
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
+ 
+ #include <asm/pgtable.h>
+ #include <asm/processor.h>
+@@ -170,6 +172,9 @@ static inline void task_state(struct seq
+ 	rcu_read_lock();
+ 	ppid = pid_alive(p) ?
+ 		task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
++	if (unlikely(vx_current_initpid(p->pid)))
++		ppid = 0;
++
+ 	tpid = 0;
+ 	if (pid_alive(p)) {
+ 		struct task_struct *tracer = ptrace_parent(p);
+@@ -287,7 +292,7 @@ static inline void task_sig(struct seq_f
+ }
+ 
+ static void render_cap_t(struct seq_file *m, const char *header,
+-			kernel_cap_t *a)
++			struct vx_info *vxi, kernel_cap_t *a)
+ {
+ 	unsigned __capi;
+ 
+@@ -312,10 +317,11 @@ static inline void task_cap(struct seq_f
+ 	cap_bset	= cred->cap_bset;
+ 	rcu_read_unlock();
+ 
+-	render_cap_t(m, "CapInh:\t", &cap_inheritable);
+-	render_cap_t(m, "CapPrm:\t", &cap_permitted);
+-	render_cap_t(m, "CapEff:\t", &cap_effective);
+-	render_cap_t(m, "CapBnd:\t", &cap_bset);
++	/* FIXME: maybe move the p->vx_info masking to __task_cred() ? */
++	render_cap_t(m, "CapInh:\t", p->vx_info, &cap_inheritable);
++	render_cap_t(m, "CapPrm:\t", p->vx_info, &cap_permitted);
++	render_cap_t(m, "CapEff:\t", p->vx_info, &cap_effective);
++	render_cap_t(m, "CapBnd:\t", p->vx_info, &cap_bset);
+ }
+ 
+ static inline void task_context_switch_counts(struct seq_file *m,
+@@ -337,6 +343,42 @@ static void task_cpus_allowed(struct seq
+ 	seq_putc(m, '\n');
+ }
+ 
++int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
++			struct pid *pid, struct task_struct *task)
++{
++	seq_printf(m,	"Proxy:\t%p(%c)\n"
++			"Count:\t%u\n"
++			"uts:\t%p(%c)\n"
++			"ipc:\t%p(%c)\n"
++			"mnt:\t%p(%c)\n"
++			"pid:\t%p(%c)\n"
++			"net:\t%p(%c)\n",
++			task->nsproxy,
++			(task->nsproxy == init_task.nsproxy ? 'I' : '-'),
++			atomic_read(&task->nsproxy->count),
++			task->nsproxy->uts_ns,
++			(task->nsproxy->uts_ns == init_task.nsproxy->uts_ns ? 'I' : '-'),
++			task->nsproxy->ipc_ns,
++			(task->nsproxy->ipc_ns == init_task.nsproxy->ipc_ns ? 'I' : '-'),
++			task->nsproxy->mnt_ns,
++			(task->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns ? 'I' : '-'),
++			task->nsproxy->pid_ns,
++			(task->nsproxy->pid_ns == init_task.nsproxy->pid_ns ? 'I' : '-'),
++			task->nsproxy->net_ns,
++			(task->nsproxy->net_ns == init_task.nsproxy->net_ns ? 'I' : '-'));
++	return 0;
++}
++
++void task_vs_id(struct seq_file *m, struct task_struct *task)
++{
++	if (task_vx_flags(task, VXF_HIDE_VINFO, 0))
++		return;
++
++	seq_printf(m, "VxID: %d\n", vx_task_xid(task));
++	seq_printf(m, "NxID: %d\n", nx_task_nid(task));
++}
++
++
+ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
+ 			struct pid *pid, struct task_struct *task)
+ {
+@@ -353,6 +395,7 @@ int proc_pid_status(struct seq_file *m, 
+ 	task_cap(m, task);
+ 	task_cpus_allowed(m, task);
+ 	cpuset_task_status_allowed(m, task);
++	task_vs_id(m, task);
+ 	task_context_switch_counts(m, task);
+ 	return 0;
+ }
+@@ -462,6 +505,17 @@ static int do_task_stat(struct seq_file 
+ 	/* convert nsec -> ticks */
+ 	start_time = nsec_to_clock_t(start_time);
+ 
++	/* fixup start time for virt uptime */
++	if (vx_flags(VXF_VIRT_UPTIME, 0)) {
++		unsigned long long bias =
++			current->vx_info->cvirt.bias_clock;
++
++		if (start_time > bias)
++			start_time -= bias;
++		else
++			start_time = 0;
++	}
++
+ 	seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
+ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
+ %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld %lu %lu %lu\n",
+diff -NurpP --minimal linux-3.3.8/fs/proc/base.c linux-3.3.8-vs2.3.3.4/fs/proc/base.c
+--- linux-3.3.8/fs/proc/base.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/proc/base.c	2012-02-24 04:45:58.000000000 +0100
+@@ -84,6 +84,8 @@
+ #include <linux/fs_struct.h>
+ #include <linux/slab.h>
+ #include <linux/flex_array.h>
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
+ #ifdef CONFIG_HARDWALL
+ #include <asm/hardwall.h>
+ #endif
+@@ -937,11 +939,16 @@ static ssize_t oom_adjust_write(struct f
+ 		goto err_task_lock;
+ 	}
+ 
+-	if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
++	if (oom_adjust < task->signal->oom_adj &&
++		!vx_capable(CAP_SYS_RESOURCE, VXC_OOM_ADJUST)) {
+ 		err = -EACCES;
+ 		goto err_sighand;
+ 	}
+ 
++	/* prevent guest processes from circumventing the oom killer */
++	if (vx_current_xid() && (oom_adjust == OOM_DISABLE))
++		oom_adjust = OOM_ADJUST_MIN;
++
+ 	/*
+ 	 * Warn that /proc/pid/oom_adj is deprecated, see
+ 	 * Documentation/feature-removal-schedule.txt.
+@@ -1542,6 +1549,8 @@ struct inode *proc_pid_make_inode(struct
+ 		inode->i_gid = cred->egid;
+ 		rcu_read_unlock();
+ 	}
++	/* procfs is xid tagged */
++	inode->i_tag = (tag_t)vx_task_xid(task);
+ 	security_task_to_inode(task, inode);
+ 
+ out:
+@@ -1587,6 +1596,8 @@ int pid_getattr(struct vfsmount *mnt, st
+ 
+ /* dentry stuff */
+ 
++static unsigned name_to_int(struct dentry *dentry);
++
+ /*
+  *	Exceptional case: normally we are not allowed to unhash a busy
+  * directory. In this case, however, we can do it - no aliasing problems
+@@ -1615,6 +1626,12 @@ int pid_revalidate(struct dentry *dentry
+ 	task = get_proc_task(inode);
+ 
+ 	if (task) {
++		unsigned pid = name_to_int(dentry);
++
++		if (pid != ~0U && pid != vx_map_pid(task->pid)) {
++			put_task_struct(task);
++			goto drop;
++		}
+ 		if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
+ 		    task_dumpable(task)) {
+ 			rcu_read_lock();
+@@ -1631,6 +1648,7 @@ int pid_revalidate(struct dentry *dentry
+ 		put_task_struct(task);
+ 		return 1;
+ 	}
++drop:
+ 	d_drop(dentry);
+ 	return 0;
+ }
+@@ -2469,6 +2487,13 @@ static struct dentry *proc_pident_lookup
+ 	if (!task)
+ 		goto out_no_task;
+ 
++	/* TODO: maybe we can come up with a generic approach? */
++	if (task_vx_flags(task, VXF_HIDE_VINFO, 0) &&
++		(dentry->d_name.len == 5) &&
++		(!memcmp(dentry->d_name.name, "vinfo", 5) ||
++		!memcmp(dentry->d_name.name, "ninfo", 5)))
++		goto out;
++
+ 	/*
+ 	 * Yes, it does not scale. And it should not. Don't add
+ 	 * new entries into /proc/<tgid>/ without very good reasons.
+@@ -2854,7 +2879,7 @@ out_iput:
+ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
+ {
+ 	struct dentry *error;
+-	struct task_struct *task = get_proc_task(dir);
++	struct task_struct *task = get_proc_task_real(dir);
+ 	const struct pid_entry *p, *last;
+ 
+ 	error = ERR_PTR(-ENOENT);
+@@ -2961,6 +2986,9 @@ static int proc_pid_personality(struct s
+ static const struct file_operations proc_task_operations;
+ static const struct inode_operations proc_task_inode_operations;
+ 
++extern int proc_pid_vx_info(struct task_struct *, char *);
++extern int proc_pid_nx_info(struct task_struct *, char *);
++
+ static const struct pid_entry tgid_base_stuff[] = {
+ 	DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
+ 	DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
+@@ -3027,6 +3055,8 @@ static const struct pid_entry tgid_base_
+ #ifdef CONFIG_CGROUPS
+ 	REG("cgroup",  S_IRUGO, proc_cgroup_operations),
+ #endif
++	INF("vinfo",      S_IRUGO, proc_pid_vx_info),
++	INF("ninfo",	  S_IRUGO, proc_pid_nx_info),
+ 	INF("oom_score",  S_IRUGO, proc_oom_score),
+ 	REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
+ 	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
+@@ -3046,6 +3076,7 @@ static const struct pid_entry tgid_base_
+ #ifdef CONFIG_HARDWALL
+ 	INF("hardwall",   S_IRUGO, proc_pid_hardwall),
+ #endif
++	ONE("nsproxy",	S_IRUGO, proc_pid_nsproxy),
+ };
+ 
+ static int proc_tgid_base_readdir(struct file * filp,
+@@ -3239,7 +3270,7 @@ retry:
+ 	iter.task = NULL;
+ 	pid = find_ge_pid(iter.tgid, ns);
+ 	if (pid) {
+-		iter.tgid = pid_nr_ns(pid, ns);
++		iter.tgid = pid_unmapped_nr_ns(pid, ns);
+ 		iter.task = pid_task(pid, PIDTYPE_PID);
+ 		/* What we to know is if the pid we have find is the
+ 		 * pid of a thread_group_leader.  Testing for task
+@@ -3269,7 +3300,7 @@ static int proc_pid_fill_cache(struct fi
+ 	struct tgid_iter iter)
+ {
+ 	char name[PROC_NUMBUF];
+-	int len = snprintf(name, sizeof(name), "%d", iter.tgid);
++	int len = snprintf(name, sizeof(name), "%d", vx_map_tgid(iter.tgid));
+ 	return proc_fill_cache(filp, dirent, filldir, name, len,
+ 				proc_pid_instantiate, iter.task, NULL);
+ }
+@@ -3293,7 +3324,7 @@ int proc_pid_readdir(struct file * filp,
+ 		goto out_no_task;
+ 	nr = filp->f_pos - FIRST_PROCESS_ENTRY;
+ 
+-	reaper = get_proc_task(filp->f_path.dentry->d_inode);
++	reaper = get_proc_task_real(filp->f_path.dentry->d_inode);
+ 	if (!reaper)
+ 		goto out_no_task;
+ 
+@@ -3315,6 +3346,8 @@ int proc_pid_readdir(struct file * filp,
+ 			__filldir = fake_filldir;
+ 
+ 		filp->f_pos = iter.tgid + TGID_OFFSET;
++		if (!vx_proc_task_visible(iter.task))
++			continue;
+ 		if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) {
+ 			put_task_struct(iter.task);
+ 			goto out;
+@@ -3468,6 +3501,8 @@ static struct dentry *proc_task_lookup(s
+ 	tid = name_to_int(dentry);
+ 	if (tid == ~0U)
+ 		goto out;
++	if (vx_current_initpid(tid))
++		goto out;
+ 
+ 	ns = dentry->d_sb->s_fs_info;
+ 	rcu_read_lock();
+diff -NurpP --minimal linux-3.3.8/fs/proc/generic.c linux-3.3.8-vs2.3.3.4/fs/proc/generic.c
+--- linux-3.3.8/fs/proc/generic.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/proc/generic.c	2012-02-24 03:55:06.000000000 +0100
+@@ -22,6 +22,7 @@
+ #include <linux/bitops.h>
+ #include <linux/spinlock.h>
+ #include <linux/completion.h>
++#include <linux/vserver/inode.h>
+ #include <asm/uaccess.h>
+ 
+ #include "internal.h"
+@@ -424,11 +425,15 @@ struct dentry *proc_lookup_de(struct pro
+ 	for (de = de->subdir; de ; de = de->next) {
+ 		if (de->namelen != dentry->d_name.len)
+ 			continue;
++		if (!vx_hide_check(0, de->vx_flags))
++			continue;
+ 		if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
+ 			pde_get(de);
+ 			spin_unlock(&proc_subdir_lock);
+ 			error = -EINVAL;
+ 			inode = proc_get_inode(dir->i_sb, de);
++			/* generic proc entries belong to the host */
++			inode->i_tag = 0;
+ 			goto out_unlock;
+ 		}
+ 	}
+@@ -506,6 +511,8 @@ int proc_readdir_de(struct proc_dir_entr
+ 
+ 				/* filldir passes info to user space */
+ 				pde_get(de);
++				if (!vx_hide_check(0, de->vx_flags))
++					goto skip;
+ 				spin_unlock(&proc_subdir_lock);
+ 				if (filldir(dirent, de->name, de->namelen, filp->f_pos,
+ 					    de->low_ino, de->mode >> 12) < 0) {
+@@ -513,6 +520,7 @@ int proc_readdir_de(struct proc_dir_entr
+ 					goto out;
+ 				}
+ 				spin_lock(&proc_subdir_lock);
++			skip:
+ 				filp->f_pos++;
+ 				next = de->next;
+ 				pde_put(de);
+@@ -626,6 +634,7 @@ static struct proc_dir_entry *__proc_cre
+ 	ent->nlink = nlink;
+ 	atomic_set(&ent->count, 1);
+ 	ent->pde_users = 0;
++	ent->vx_flags = IATTR_PROC_DEFAULT;
+ 	spin_lock_init(&ent->pde_unload_lock);
+ 	ent->pde_unload_completion = NULL;
+ 	INIT_LIST_HEAD(&ent->pde_openers);
+@@ -649,7 +658,8 @@ struct proc_dir_entry *proc_symlink(cons
+ 				kfree(ent->data);
+ 				kfree(ent);
+ 				ent = NULL;
+-			}
++			} else
++				ent->vx_flags = IATTR_PROC_SYMLINK;
+ 		} else {
+ 			kfree(ent);
+ 			ent = NULL;
+diff -NurpP --minimal linux-3.3.8/fs/proc/inode.c linux-3.3.8-vs2.3.3.4/fs/proc/inode.c
+--- linux-3.3.8/fs/proc/inode.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/proc/inode.c	2012-02-24 03:55:06.000000000 +0100
+@@ -459,6 +459,8 @@ struct inode *proc_get_inode(struct supe
+ 			inode->i_uid = de->uid;
+ 			inode->i_gid = de->gid;
+ 		}
++		if (de->vx_flags)
++			PROC_I(inode)->vx_flags = de->vx_flags;
+ 		if (de->size)
+ 			inode->i_size = de->size;
+ 		if (de->nlink)
+diff -NurpP --minimal linux-3.3.8/fs/proc/internal.h linux-3.3.8-vs2.3.3.4/fs/proc/internal.h
+--- linux-3.3.8/fs/proc/internal.h	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/proc/internal.h	2012-02-24 03:55:06.000000000 +0100
+@@ -10,6 +10,7 @@
+  */
+ 
+ #include <linux/proc_fs.h>
++#include <linux/vs_pid.h>
+ 
+ extern struct proc_dir_entry proc_root;
+ #ifdef CONFIG_PROC_SYSCTL
+@@ -51,6 +52,9 @@ extern int proc_pid_status(struct seq_fi
+ 				struct pid *pid, struct task_struct *task);
+ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
+ 				struct pid *pid, struct task_struct *task);
++extern int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
++				struct pid *pid, struct task_struct *task);
++
+ extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
+ 
+ extern const struct file_operations proc_maps_operations;
+@@ -76,11 +80,16 @@ static inline struct pid *proc_pid(struc
+ 	return PROC_I(inode)->pid;
+ }
+ 
+-static inline struct task_struct *get_proc_task(struct inode *inode)
++static inline struct task_struct *get_proc_task_real(struct inode *inode)
+ {
+ 	return get_pid_task(proc_pid(inode), PIDTYPE_PID);
+ }
+ 
++static inline struct task_struct *get_proc_task(struct inode *inode)
++{
++	return vx_get_proc_task(inode, proc_pid(inode));
++}
++
+ static inline int proc_fd(struct inode *inode)
+ {
+ 	return PROC_I(inode)->fd;
+diff -NurpP --minimal linux-3.3.8/fs/proc/loadavg.c linux-3.3.8-vs2.3.3.4/fs/proc/loadavg.c
+--- linux-3.3.8/fs/proc/loadavg.c	2009-09-10 15:26:23.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/proc/loadavg.c	2012-02-24 03:55:06.000000000 +0100
+@@ -12,15 +12,27 @@
+ 
+ static int loadavg_proc_show(struct seq_file *m, void *v)
+ {
++	unsigned long running;
++	unsigned int threads;
+ 	unsigned long avnrun[3];
+ 
+ 	get_avenrun(avnrun, FIXED_1/200, 0);
+ 
++	if (vx_flags(VXF_VIRT_LOAD, 0)) {
++		struct vx_info *vxi = current_vx_info();
++
++		running = atomic_read(&vxi->cvirt.nr_running);
++		threads = atomic_read(&vxi->cvirt.nr_threads);
++	} else {
++		running = nr_running();
++		threads = nr_threads;
++	}
++
+ 	seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
+ 		LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
+ 		LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
+ 		LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
+-		nr_running(), nr_threads,
++		running, threads,
+ 		task_active_pid_ns(current)->last_pid);
+ 	return 0;
+ }
+diff -NurpP --minimal linux-3.3.8/fs/proc/meminfo.c linux-3.3.8-vs2.3.3.4/fs/proc/meminfo.c
+--- linux-3.3.8/fs/proc/meminfo.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/proc/meminfo.c	2012-02-24 03:55:06.000000000 +0100
+@@ -39,7 +39,8 @@ static int meminfo_proc_show(struct seq_
+ 	allowed = ((totalram_pages - hugetlb_total_pages())
+ 		* sysctl_overcommit_ratio / 100) + total_swap_pages;
+ 
+-	cached = global_page_state(NR_FILE_PAGES) -
++	cached = vx_flags(VXF_VIRT_MEM, 0) ?
++		vx_vsi_cached(&i) : global_page_state(NR_FILE_PAGES) -
+ 			total_swapcache_pages - i.bufferram;
+ 	if (cached < 0)
+ 		cached = 0;
+diff -NurpP --minimal linux-3.3.8/fs/proc/root.c linux-3.3.8-vs2.3.3.4/fs/proc/root.c
+--- linux-3.3.8/fs/proc/root.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/proc/root.c	2012-02-24 04:46:50.000000000 +0100
+@@ -19,9 +19,14 @@
+ #include <linux/mount.h>
+ #include <linux/pid_namespace.h>
+ #include <linux/parser.h>
++#include <linux/vserver/inode.h>
+ 
+ #include "internal.h"
+ 
++struct proc_dir_entry *proc_virtual;
++
++extern void proc_vx_init(void);
++
+ static int proc_test_super(struct super_block *sb, void *data)
+ {
+ 	return sb->s_fs_info == data;
+@@ -189,6 +194,7 @@ void __init proc_root_init(void)
+ #endif
+ 	proc_mkdir("bus", NULL);
+ 	proc_sys_init();
++	proc_vx_init();
+ }
+ 
+ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
+@@ -256,6 +262,7 @@ struct proc_dir_entry proc_root = {
+ 	.proc_iops	= &proc_root_inode_operations, 
+ 	.proc_fops	= &proc_root_operations,
+ 	.parent		= &proc_root,
++	.vx_flags	= IATTR_ADMIN | IATTR_WATCH,
+ 	.name		= "/proc",
+ };
+ 
+diff -NurpP --minimal linux-3.3.8/fs/proc/stat.c linux-3.3.8-vs2.3.3.4/fs/proc/stat.c
+--- linux-3.3.8/fs/proc/stat.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/proc/stat.c	2012-04-24 03:32:00.000000000 +0200
+@@ -9,6 +9,7 @@
+ #include <linux/slab.h>
+ #include <linux/time.h>
+ #include <linux/irqnr.h>
++#include <linux/vserver/cvirt.h>
+ #include <asm/cputime.h>
+ #include <linux/tick.h>
+ 
+@@ -64,6 +65,10 @@ static int show_stat(struct seq_file *p,
+ 		irq = softirq = steal = 0;
+ 	guest = guest_nice = 0;
+ 	getboottime(&boottime);
++
++	if (vx_flags(VXF_VIRT_UPTIME, 0))
++		vx_vsi_boottime(&boottime);
++
+ 	jif = boottime.tv_sec;
+ 
+ 	for_each_possible_cpu(i) {
+diff -NurpP --minimal linux-3.3.8/fs/proc/uptime.c linux-3.3.8-vs2.3.3.4/fs/proc/uptime.c
+--- linux-3.3.8/fs/proc/uptime.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/proc/uptime.c	2012-02-24 03:55:06.000000000 +0100
+@@ -5,6 +5,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/time.h>
+ #include <linux/kernel_stat.h>
++#include <linux/vserver/cvirt.h>
+ #include <asm/cputime.h>
+ 
+ static int uptime_proc_show(struct seq_file *m, void *v)
+@@ -25,6 +26,10 @@ static int uptime_proc_show(struct seq_f
+ 	nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
+ 	idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
+ 	idle.tv_nsec = rem;
++
++	if (vx_flags(VXF_VIRT_UPTIME, 0))
++		vx_vsi_uptime(&uptime, &idle);
++
+ 	seq_printf(m, "%lu.%02lu %lu.%02lu\n",
+ 			(unsigned long) uptime.tv_sec,
+ 			(uptime.tv_nsec / (NSEC_PER_SEC / 100)),
+diff -NurpP --minimal linux-3.3.8/fs/proc_namespace.c linux-3.3.8-vs2.3.3.4/fs/proc_namespace.c
+--- linux-3.3.8/fs/proc_namespace.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/proc_namespace.c	2012-04-03 16:00:38.000000000 +0200
+@@ -44,6 +44,8 @@ static int show_sb_opts(struct seq_file 
+ 		{ MS_SYNCHRONOUS, ",sync" },
+ 		{ MS_DIRSYNC, ",dirsync" },
+ 		{ MS_MANDLOCK, ",mand" },
++		{ MS_TAGGED, ",tag" },
++		{ MS_NOTAGCHECK, ",notagcheck" },
+ 		{ 0, NULL }
+ 	};
+ 	const struct proc_fs_info *fs_infop;
+@@ -80,6 +82,34 @@ static inline void mangle(struct seq_fil
+ 	seq_escape(m, s, " \t\n\\");
+ }
+ 
++static int mnt_is_reachable(struct vfsmount *vfsmnt)
++{
++	struct path root;
++	struct dentry *point;
++	struct mount *mnt = real_mount(vfsmnt);
++	struct mount *root_mnt;
++	int ret;
++
++	if (mnt == mnt->mnt_ns->root)
++		return 1;
++
++	br_read_lock(vfsmount_lock);
++	root = current->fs->root;
++	root_mnt = real_mount(root.mnt);
++	point = root.dentry;
++
++	while ((mnt != mnt->mnt_parent) && (mnt != root_mnt)) {
++		point = mnt->mnt_mountpoint;
++		mnt = mnt->mnt_parent;
++	}
++
++	ret = (mnt == root_mnt) && is_subdir(point, root.dentry);
++
++	br_read_unlock(vfsmount_lock);
++
++	return ret;
++}
++
+ static void show_type(struct seq_file *m, struct super_block *sb)
+ {
+ 	mangle(m, sb->s_type->name);
+@@ -96,6 +126,17 @@ static int show_vfsmnt(struct seq_file *
+ 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
+ 	struct super_block *sb = mnt_path.dentry->d_sb;
+ 
++	if (vx_flags(VXF_HIDE_MOUNT, 0))
++		return SEQ_SKIP;
++	if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
++		return SEQ_SKIP;
++
++	if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
++		mnt == current->fs->root.mnt) {
++		seq_puts(m, "/dev/root / ");
++		goto type;
++	}
++
+ 	if (sb->s_op->show_devname) {
+ 		err = sb->s_op->show_devname(m, mnt_path.dentry);
+ 		if (err)
+@@ -106,6 +147,7 @@ static int show_vfsmnt(struct seq_file *
+ 	seq_putc(m, ' ');
+ 	seq_path(m, &mnt_path, " \t\n\\");
+ 	seq_putc(m, ' ');
++type:
+ 	show_type(m, sb);
+ 	seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
+ 	err = show_sb_opts(m, sb);
+@@ -128,6 +170,11 @@ static int show_mountinfo(struct seq_fil
+ 	struct path root = p->root;
+ 	int err = 0;
+ 
++	if (vx_flags(VXF_HIDE_MOUNT, 0))
++		return SEQ_SKIP;
++	if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
++		return SEQ_SKIP;
++
+ 	seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
+ 		   MAJOR(sb->s_dev), MINOR(sb->s_dev));
+ 	if (sb->s_op->show_path)
+@@ -187,6 +234,17 @@ static int show_vfsstat(struct seq_file 
+ 	struct super_block *sb = mnt_path.dentry->d_sb;
+ 	int err = 0;
+ 
++	if (vx_flags(VXF_HIDE_MOUNT, 0))
++		return SEQ_SKIP;
++	if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
++		return SEQ_SKIP;
++
++	if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
++		mnt == current->fs->root.mnt) {
++		seq_puts(m, "device /dev/root mounted on / ");
++		goto type;
++	}
++
+ 	/* device */
+ 	if (sb->s_op->show_devname) {
+ 		seq_puts(m, "device ");
+@@ -203,7 +261,7 @@ static int show_vfsstat(struct seq_file 
+ 	seq_puts(m, " mounted on ");
+ 	seq_path(m, &mnt_path, " \t\n\\");
+ 	seq_putc(m, ' ');
+-
++type:
+ 	/* file system type */
+ 	seq_puts(m, "with fstype ");
+ 	show_type(m, sb);
+diff -NurpP --minimal linux-3.3.8/fs/quota/dquot.c linux-3.3.8-vs2.3.3.4/fs/quota/dquot.c
+--- linux-3.3.8/fs/quota/dquot.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/quota/dquot.c	2012-02-24 03:55:06.000000000 +0100
+@@ -1547,6 +1547,9 @@ int __dquot_alloc_space(struct inode *in
+ 	int reserve = flags & DQUOT_SPACE_RESERVE;
+ 	int nofail = flags & DQUOT_SPACE_NOFAIL;
+ 
++	if ((ret = dl_alloc_space(inode, number)))
++		return ret;
++
+ 	/*
+ 	 * First test before acquiring mutex - solves deadlocks when we
+ 	 * re-enter the quota code and are already holding the mutex
+@@ -1601,6 +1604,9 @@ int dquot_alloc_inode(const struct inode
+ 	int cnt, ret = 0;
+ 	char warntype[MAXQUOTAS];
+ 
++	if ((ret = dl_alloc_inode(inode)))
++		return ret;
++
+ 	/* First test before acquiring mutex - solves deadlocks when we
+          * re-enter the quota code and are already holding the mutex */
+ 	if (!dquot_active(inode))
+@@ -1671,6 +1677,8 @@ void __dquot_free_space(struct inode *in
+ 	char warntype[MAXQUOTAS];
+ 	int reserve = flags & DQUOT_SPACE_RESERVE;
+ 
++	dl_free_space(inode, number);
++
+ 	/* First test before acquiring mutex - solves deadlocks when we
+          * re-enter the quota code and are already holding the mutex */
+ 	if (!dquot_active(inode)) {
+@@ -1709,6 +1717,8 @@ void dquot_free_inode(const struct inode
+ 	unsigned int cnt;
+ 	char warntype[MAXQUOTAS];
+ 
++	dl_free_inode(inode);
++
+ 	/* First test before acquiring mutex - solves deadlocks when we
+          * re-enter the quota code and are already holding the mutex */
+ 	if (!dquot_active(inode))
+diff -NurpP --minimal linux-3.3.8/fs/quota/quota.c linux-3.3.8-vs2.3.3.4/fs/quota/quota.c
+--- linux-3.3.8/fs/quota/quota.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/quota/quota.c	2012-03-19 20:54:39.000000000 +0100
+@@ -8,6 +8,7 @@
+ #include <linux/fs.h>
+ #include <linux/namei.h>
+ #include <linux/slab.h>
++#include <linux/vs_context.h>
+ #include <asm/current.h>
+ #include <asm/uaccess.h>
+ #include <linux/kernel.h>
+@@ -37,7 +38,7 @@ static int check_quotactl_permission(str
+ 			break;
+ 		/*FALLTHROUGH*/
+ 	default:
+-		if (!capable(CAP_SYS_ADMIN))
++		if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
+ 			return -EPERM;
+ 	}
+ 
+@@ -292,6 +293,46 @@ static int do_quotactl(struct super_bloc
+ 	}
+ }
+ 
++#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
++
++#include <linux/vroot.h>
++#include <linux/major.h>
++#include <linux/module.h>
++#include <linux/kallsyms.h>
++#include <linux/vserver/debug.h>
++
++static vroot_grb_func *vroot_get_real_bdev = NULL;
++
++static DEFINE_SPINLOCK(vroot_grb_lock);
++
++int register_vroot_grb(vroot_grb_func *func) {
++	int ret = -EBUSY;
++
++	spin_lock(&vroot_grb_lock);
++	if (!vroot_get_real_bdev) {
++		vroot_get_real_bdev = func;
++		ret = 0;
++	}
++	spin_unlock(&vroot_grb_lock);
++	return ret;
++}
++EXPORT_SYMBOL(register_vroot_grb);
++
++int unregister_vroot_grb(vroot_grb_func *func) {
++	int ret = -EINVAL;
++
++	spin_lock(&vroot_grb_lock);
++	if (vroot_get_real_bdev) {
++		vroot_get_real_bdev = NULL;
++		ret = 0;
++	}
++	spin_unlock(&vroot_grb_lock);
++	return ret;
++}
++EXPORT_SYMBOL(unregister_vroot_grb);
++
++#endif
++
+ /* Return 1 if 'cmd' will block on frozen filesystem */
+ static int quotactl_cmd_write(int cmd)
+ {
+@@ -324,6 +365,22 @@ static struct super_block *quotactl_bloc
+ 	putname(tmp);
+ 	if (IS_ERR(bdev))
+ 		return ERR_CAST(bdev);
++#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
++	if (bdev && bdev->bd_inode &&
++			imajor(bdev->bd_inode) == VROOT_MAJOR) {
++		struct block_device *bdnew = (void *)-EINVAL;
++
++		if (vroot_get_real_bdev)
++			bdnew = vroot_get_real_bdev(bdev);
++		else
++			vxdprintk(VXD_CBIT(misc, 0),
++					"vroot_get_real_bdev not set");
++		bdput(bdev);
++		if (IS_ERR(bdnew))
++			return ERR_PTR(PTR_ERR(bdnew));
++		bdev = bdnew;
++	}
++#endif
+ 	if (quotactl_cmd_write(cmd))
+ 		sb = get_super_thawed(bdev);
+ 	else
+diff -NurpP --minimal linux-3.3.8/fs/reiserfs/file.c linux-3.3.8-vs2.3.3.4/fs/reiserfs/file.c
+--- linux-3.3.8/fs/reiserfs/file.c	2011-10-24 18:45:27.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/reiserfs/file.c	2012-02-24 03:55:06.000000000 +0100
+@@ -319,5 +319,6 @@ const struct inode_operations reiserfs_f
+ 	.listxattr = reiserfs_listxattr,
+ 	.removexattr = reiserfs_removexattr,
+ 	.permission = reiserfs_permission,
++	.sync_flags = reiserfs_sync_flags,
+ 	.get_acl = reiserfs_get_acl,
+ };
+diff -NurpP --minimal linux-3.3.8/fs/reiserfs/inode.c linux-3.3.8-vs2.3.3.4/fs/reiserfs/inode.c
+--- linux-3.3.8/fs/reiserfs/inode.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/reiserfs/inode.c	2012-02-24 03:55:06.000000000 +0100
+@@ -18,6 +18,7 @@
+ #include <linux/writeback.h>
+ #include <linux/quotaops.h>
+ #include <linux/swap.h>
++#include <linux/vs_tag.h>
+ 
+ int reiserfs_commit_write(struct file *f, struct page *page,
+ 			  unsigned from, unsigned to);
+@@ -1131,6 +1132,8 @@ static void init_inode(struct inode *ino
+ 	struct buffer_head *bh;
+ 	struct item_head *ih;
+ 	__u32 rdev;
++	uid_t uid;
++	gid_t gid;
+ 	//int version = ITEM_VERSION_1;
+ 
+ 	bh = PATH_PLAST_BUFFER(path);
+@@ -1151,12 +1154,13 @@ static void init_inode(struct inode *ino
+ 		    (struct stat_data_v1 *)B_I_PITEM(bh, ih);
+ 		unsigned long blocks;
+ 
++		uid = sd_v1_uid(sd);
++		gid = sd_v1_gid(sd);
++
+ 		set_inode_item_key_version(inode, KEY_FORMAT_3_5);
+ 		set_inode_sd_version(inode, STAT_DATA_V1);
+ 		inode->i_mode = sd_v1_mode(sd);
+ 		set_nlink(inode, sd_v1_nlink(sd));
+-		inode->i_uid = sd_v1_uid(sd);
+-		inode->i_gid = sd_v1_gid(sd);
+ 		inode->i_size = sd_v1_size(sd);
+ 		inode->i_atime.tv_sec = sd_v1_atime(sd);
+ 		inode->i_mtime.tv_sec = sd_v1_mtime(sd);
+@@ -1198,11 +1202,12 @@ static void init_inode(struct inode *ino
+ 		// (directories and symlinks)
+ 		struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
+ 
++		uid    = sd_v2_uid(sd);
++		gid    = sd_v2_gid(sd);
++
+ 		inode->i_mode = sd_v2_mode(sd);
+ 		set_nlink(inode, sd_v2_nlink(sd));
+-		inode->i_uid = sd_v2_uid(sd);
+ 		inode->i_size = sd_v2_size(sd);
+-		inode->i_gid = sd_v2_gid(sd);
+ 		inode->i_mtime.tv_sec = sd_v2_mtime(sd);
+ 		inode->i_atime.tv_sec = sd_v2_atime(sd);
+ 		inode->i_ctime.tv_sec = sd_v2_ctime(sd);
+@@ -1232,6 +1237,10 @@ static void init_inode(struct inode *ino
+ 		sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
+ 	}
+ 
++	inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
++	inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
++	inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, 0);
++
+ 	pathrelse(path);
+ 	if (S_ISREG(inode->i_mode)) {
+ 		inode->i_op = &reiserfs_file_inode_operations;
+@@ -1254,13 +1263,15 @@ static void init_inode(struct inode *ino
+ static void inode2sd(void *sd, struct inode *inode, loff_t size)
+ {
+ 	struct stat_data *sd_v2 = (struct stat_data *)sd;
++	uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
++	gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
+ 	__u16 flags;
+ 
++	set_sd_v2_uid(sd_v2, uid);
++	set_sd_v2_gid(sd_v2, gid);
+ 	set_sd_v2_mode(sd_v2, inode->i_mode);
+ 	set_sd_v2_nlink(sd_v2, inode->i_nlink);
+-	set_sd_v2_uid(sd_v2, inode->i_uid);
+ 	set_sd_v2_size(sd_v2, size);
+-	set_sd_v2_gid(sd_v2, inode->i_gid);
+ 	set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
+ 	set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
+ 	set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
+@@ -2868,14 +2879,19 @@ int reiserfs_commit_write(struct file *f
+ void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
+ {
+ 	if (reiserfs_attrs(inode->i_sb)) {
+-		if (sd_attrs & REISERFS_SYNC_FL)
+-			inode->i_flags |= S_SYNC;
+-		else
+-			inode->i_flags &= ~S_SYNC;
+ 		if (sd_attrs & REISERFS_IMMUTABLE_FL)
+ 			inode->i_flags |= S_IMMUTABLE;
+ 		else
+ 			inode->i_flags &= ~S_IMMUTABLE;
++		if (sd_attrs & REISERFS_IXUNLINK_FL)
++			inode->i_flags |= S_IXUNLINK;
++		else
++			inode->i_flags &= ~S_IXUNLINK;
++
++		if (sd_attrs & REISERFS_SYNC_FL)
++			inode->i_flags |= S_SYNC;
++		else
++			inode->i_flags &= ~S_SYNC;
+ 		if (sd_attrs & REISERFS_APPEND_FL)
+ 			inode->i_flags |= S_APPEND;
+ 		else
+@@ -2888,6 +2904,15 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs,
+ 			REISERFS_I(inode)->i_flags |= i_nopack_mask;
+ 		else
+ 			REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
++
++		if (sd_attrs & REISERFS_BARRIER_FL)
++			inode->i_vflags |= V_BARRIER;
++		else
++			inode->i_vflags &= ~V_BARRIER;
++		if (sd_attrs & REISERFS_COW_FL)
++			inode->i_vflags |= V_COW;
++		else
++			inode->i_vflags &= ~V_COW;
+ 	}
+ }
+ 
+@@ -2898,6 +2923,11 @@ void i_attrs_to_sd_attrs(struct inode *i
+ 			*sd_attrs |= REISERFS_IMMUTABLE_FL;
+ 		else
+ 			*sd_attrs &= ~REISERFS_IMMUTABLE_FL;
++		if (inode->i_flags & S_IXUNLINK)
++			*sd_attrs |= REISERFS_IXUNLINK_FL;
++		else
++			*sd_attrs &= ~REISERFS_IXUNLINK_FL;
++
+ 		if (inode->i_flags & S_SYNC)
+ 			*sd_attrs |= REISERFS_SYNC_FL;
+ 		else
+@@ -2910,6 +2940,15 @@ void i_attrs_to_sd_attrs(struct inode *i
+ 			*sd_attrs |= REISERFS_NOTAIL_FL;
+ 		else
+ 			*sd_attrs &= ~REISERFS_NOTAIL_FL;
++
++		if (inode->i_vflags & V_BARRIER)
++			*sd_attrs |= REISERFS_BARRIER_FL;
++		else
++			*sd_attrs &= ~REISERFS_BARRIER_FL;
++		if (inode->i_vflags & V_COW)
++			*sd_attrs |= REISERFS_COW_FL;
++		else
++			*sd_attrs &= ~REISERFS_COW_FL;
+ 	}
+ }
+ 
+@@ -3155,7 +3194,8 @@ int reiserfs_setattr(struct dentry *dent
+ 	}
+ 
+ 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+-	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
++	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
++	    (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
+ 		struct reiserfs_transaction_handle th;
+ 		int jbegin_count =
+ 		    2 *
+@@ -3184,6 +3224,9 @@ int reiserfs_setattr(struct dentry *dent
+ 			inode->i_uid = attr->ia_uid;
+ 		if (attr->ia_valid & ATTR_GID)
+ 			inode->i_gid = attr->ia_gid;
++				if ((attr->ia_valid & ATTR_TAG) &&
++					IS_TAGGED(inode))
++					inode->i_tag = attr->ia_tag;
+ 		mark_inode_dirty(inode);
+ 		error = journal_end(&th, inode->i_sb, jbegin_count);
+ 		if (error)
+diff -NurpP --minimal linux-3.3.8/fs/reiserfs/ioctl.c linux-3.3.8-vs2.3.3.4/fs/reiserfs/ioctl.c
+--- linux-3.3.8/fs/reiserfs/ioctl.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/reiserfs/ioctl.c	2012-02-24 03:55:06.000000000 +0100
+@@ -11,6 +11,21 @@
+ #include <linux/pagemap.h>
+ #include <linux/compat.h>
+ 
++
++int reiserfs_sync_flags(struct inode *inode, int flags, int vflags)
++{
++	__u16 sd_attrs = 0;
++
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++
++	i_attrs_to_sd_attrs(inode, &sd_attrs);
++	REISERFS_I(inode)->i_attrs = sd_attrs;
++	inode->i_ctime = CURRENT_TIME_SEC;
++	mark_inode_dirty(inode);
++	return 0;
++}
++
+ /*
+  * reiserfs_ioctl - handler for ioctl for inode
+  * supported commands:
+@@ -22,7 +37,7 @@
+ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_path.dentry->d_inode;
+-	unsigned int flags;
++	unsigned int flags, oldflags;
+ 	int err = 0;
+ 
+ 	reiserfs_write_lock(inode->i_sb);
+@@ -47,6 +62,7 @@ long reiserfs_ioctl(struct file *filp, u
+ 
+ 		flags = REISERFS_I(inode)->i_attrs;
+ 		i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
++		flags &= REISERFS_FL_USER_VISIBLE;
+ 		err = put_user(flags, (int __user *)arg);
+ 		break;
+ 	case REISERFS_IOC_SETFLAGS:{
+@@ -67,6 +83,10 @@ long reiserfs_ioctl(struct file *filp, u
+ 				err = -EFAULT;
+ 				goto setflags_out;
+ 			}
++			if (IS_BARRIER(inode)) {
++				vxwprintk_task(1, "messing with the barrier.");
++				return -EACCES;
++			}
+ 			/*
+ 			 * Is it quota file? Do not allow user to mess with it
+ 			 */
+@@ -91,6 +111,10 @@ long reiserfs_ioctl(struct file *filp, u
+ 					goto setflags_out;
+ 				}
+ 			}
++
++			oldflags = REISERFS_I(inode)->i_attrs;
++			flags &= REISERFS_FL_USER_MODIFIABLE;
++			flags |= oldflags & ~REISERFS_FL_USER_MODIFIABLE;
+ 			sd_attrs_to_i_attrs(flags, inode);
+ 			REISERFS_I(inode)->i_attrs = flags;
+ 			inode->i_ctime = CURRENT_TIME_SEC;
+diff -NurpP --minimal linux-3.3.8/fs/reiserfs/namei.c linux-3.3.8-vs2.3.3.4/fs/reiserfs/namei.c
+--- linux-3.3.8/fs/reiserfs/namei.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/reiserfs/namei.c	2012-02-24 03:55:06.000000000 +0100
+@@ -18,6 +18,7 @@
+ #include <linux/reiserfs_acl.h>
+ #include <linux/reiserfs_xattr.h>
+ #include <linux/quotaops.h>
++#include <linux/vs_tag.h>
+ 
+ #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); }
+ #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i);
+@@ -362,6 +363,7 @@ static struct dentry *reiserfs_lookup(st
+ 	if (retval == IO_ERROR) {
+ 		return ERR_PTR(-EIO);
+ 	}
++		dx_propagate_tag(nd, inode);
+ 
+ 	return d_splice_alias(inode, dentry);
+ }
+diff -NurpP --minimal linux-3.3.8/fs/reiserfs/super.c linux-3.3.8-vs2.3.3.4/fs/reiserfs/super.c
+--- linux-3.3.8/fs/reiserfs/super.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/reiserfs/super.c	2012-02-24 03:55:06.000000000 +0100
+@@ -980,6 +980,14 @@ static int reiserfs_parse_options(struct
+ 		{"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
+ 		{"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
+ #endif
++#ifndef CONFIG_TAGGING_NONE
++		{"tagxid",.setmask = 1 << REISERFS_TAGGED},
++		{"tag",.setmask = 1 << REISERFS_TAGGED},
++		{"notag",.clrmask = 1 << REISERFS_TAGGED},
++#endif
++#ifdef CONFIG_PROPAGATE
++		{"tag",.arg_required = 'T',.values = NULL},
++#endif
+ #ifdef CONFIG_REISERFS_FS_POSIX_ACL
+ 		{"acl",.setmask = 1 << REISERFS_POSIXACL},
+ 		{"noacl",.clrmask = 1 << REISERFS_POSIXACL},
+@@ -1298,6 +1306,14 @@ static int reiserfs_remount(struct super
+ 	handle_quota_files(s, qf_names, &qfmt);
+ #endif
+ 
++	if ((mount_options & (1 << REISERFS_TAGGED)) &&
++		!(s->s_flags & MS_TAGGED)) {
++		reiserfs_warning(s, "super-vs01",
++			"reiserfs: tagging not permitted on remount.");
++		err = -EINVAL;
++		goto out_err;
++	}
++
+ 	handle_attrs(s);
+ 
+ 	/* Add options that are safe here */
+@@ -1777,6 +1793,10 @@ static int reiserfs_fill_super(struct su
+ 		goto error_unlocked;
+ 	}
+ 
++	/* map mount option tagxid */
++	if (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TAGGED))
++		s->s_flags |= MS_TAGGED;
++
+ 	rs = SB_DISK_SUPER_BLOCK(s);
+ 	/* Let's do basic sanity check to verify that underlying device is not
+ 	   smaller than the filesystem. If the check fails then abort and scream,
+diff -NurpP --minimal linux-3.3.8/fs/reiserfs/xattr.c linux-3.3.8-vs2.3.3.4/fs/reiserfs/xattr.c
+--- linux-3.3.8/fs/reiserfs/xattr.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/reiserfs/xattr.c	2012-02-24 03:55:06.000000000 +0100
+@@ -40,6 +40,7 @@
+ #include <linux/errno.h>
+ #include <linux/gfp.h>
+ #include <linux/fs.h>
++#include <linux/mount.h>
+ #include <linux/file.h>
+ #include <linux/pagemap.h>
+ #include <linux/xattr.h>
+diff -NurpP --minimal linux-3.3.8/fs/stat.c linux-3.3.8-vs2.3.3.4/fs/stat.c
+--- linux-3.3.8/fs/stat.c	2012-01-09 16:14:55.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/stat.c	2012-02-24 03:55:06.000000000 +0100
+@@ -26,6 +26,7 @@ void generic_fillattr(struct inode *inod
+ 	stat->nlink = inode->i_nlink;
+ 	stat->uid = inode->i_uid;
+ 	stat->gid = inode->i_gid;
++	stat->tag = inode->i_tag;
+ 	stat->rdev = inode->i_rdev;
+ 	stat->size = i_size_read(inode);
+ 	stat->atime = inode->i_atime;
+diff -NurpP --minimal linux-3.3.8/fs/statfs.c linux-3.3.8-vs2.3.3.4/fs/statfs.c
+--- linux-3.3.8/fs/statfs.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/statfs.c	2012-02-24 04:27:47.000000000 +0100
+@@ -7,6 +7,8 @@
+ #include <linux/statfs.h>
+ #include <linux/security.h>
+ #include <linux/uaccess.h>
++#include <linux/vs_base.h>
++#include <linux/vs_dlimit.h>
+ #include "internal.h"
+ 
+ static int flags_by_mnt(int mnt_flags)
+@@ -60,6 +62,8 @@ static int statfs_by_dentry(struct dentr
+ 	retval = dentry->d_sb->s_op->statfs(dentry, buf);
+ 	if (retval == 0 && buf->f_frsize == 0)
+ 		buf->f_frsize = buf->f_bsize;
++	if (!vx_check(0, VS_ADMIN|VS_WATCH))
++		vx_vsi_statfs(dentry->d_sb, buf);
+ 	return retval;
+ }
+ 
+diff -NurpP --minimal linux-3.3.8/fs/super.c linux-3.3.8-vs2.3.3.4/fs/super.c
+--- linux-3.3.8/fs/super.c	2012-03-19 19:47:26.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/super.c	2012-03-19 20:52:10.000000000 +0100
+@@ -32,6 +32,9 @@
+ #include <linux/backing-dev.h>
+ #include <linux/rculist_bl.h>
+ #include <linux/cleancache.h>
++#include <linux/devpts_fs.h>
++#include <linux/proc_fs.h>
++#include <linux/vs_context.h>
+ #include "internal.h"
+ 
+ 
+@@ -1137,6 +1140,13 @@ mount_fs(struct file_system_type *type, 
+ 	WARN_ON(sb->s_bdi == &default_backing_dev_info);
+ 	sb->s_flags |= MS_BORN;
+ 
++	error = -EPERM;
++	if (!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT) &&
++		!sb->s_bdev &&
++		(sb->s_magic != PROC_SUPER_MAGIC) &&
++		(sb->s_magic != DEVPTS_SUPER_MAGIC))
++		goto out_sb;
++
+ 	error = security_sb_kern_mount(sb, flags, secdata);
+ 	if (error)
+ 		goto out_sb;
+diff -NurpP --minimal linux-3.3.8/fs/sysfs/mount.c linux-3.3.8-vs2.3.3.4/fs/sysfs/mount.c
+--- linux-3.3.8/fs/sysfs/mount.c	2011-07-22 11:18:06.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/sysfs/mount.c	2012-02-24 03:55:06.000000000 +0100
+@@ -47,7 +47,7 @@ static int sysfs_fill_super(struct super
+ 
+ 	sb->s_blocksize = PAGE_CACHE_SIZE;
+ 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+-	sb->s_magic = SYSFS_MAGIC;
++	sb->s_magic = SYSFS_SUPER_MAGIC;
+ 	sb->s_op = &sysfs_ops;
+ 	sb->s_time_gran = 1;
+ 
+diff -NurpP --minimal linux-3.3.8/fs/utimes.c linux-3.3.8-vs2.3.3.4/fs/utimes.c
+--- linux-3.3.8/fs/utimes.c	2011-05-22 16:17:54.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/utimes.c	2012-02-24 03:55:06.000000000 +0100
+@@ -8,6 +8,8 @@
+ #include <linux/stat.h>
+ #include <linux/utime.h>
+ #include <linux/syscalls.h>
++#include <linux/mount.h>
++#include <linux/vs_cowbl.h>
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+ 
+@@ -52,12 +54,18 @@ static int utimes_common(struct path *pa
+ {
+ 	int error;
+ 	struct iattr newattrs;
+-	struct inode *inode = path->dentry->d_inode;
++	struct inode *inode;
+ 
+ 	error = mnt_want_write(path->mnt);
+ 	if (error)
+ 		goto out;
+ 
++	error = cow_check_and_break(path);
++	if (error)
++		goto mnt_drop_write_and_out;
++
++	inode = path->dentry->d_inode;
++
+ 	if (times && times[0].tv_nsec == UTIME_NOW &&
+ 		     times[1].tv_nsec == UTIME_NOW)
+ 		times = NULL;
+diff -NurpP --minimal linux-3.3.8/fs/xattr.c linux-3.3.8-vs2.3.3.4/fs/xattr.c
+--- linux-3.3.8/fs/xattr.c	2012-03-19 19:47:27.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/xattr.c	2012-02-24 03:55:06.000000000 +0100
+@@ -19,6 +19,7 @@
+ #include <linux/module.h>
+ #include <linux/fsnotify.h>
+ #include <linux/audit.h>
++#include <linux/mount.h>
+ #include <asm/uaccess.h>
+ 
+ 
+@@ -50,7 +51,7 @@ xattr_permission(struct inode *inode, co
+ 	 * The trusted.* namespace can only be accessed by privileged users.
+ 	 */
+ 	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
+-		if (!capable(CAP_SYS_ADMIN))
++		if (!vx_capable(CAP_SYS_ADMIN, VXC_FS_TRUSTED))
+ 			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
+ 		return 0;
+ 	}
+diff -NurpP --minimal linux-3.3.8/fs/xfs/xfs_dinode.h linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_dinode.h
+--- linux-3.3.8/fs/xfs/xfs_dinode.h	2011-10-24 18:45:31.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_dinode.h	2012-02-24 03:55:06.000000000 +0100
+@@ -51,7 +51,9 @@ typedef struct xfs_dinode {
+ 	__be32		di_nlink;	/* number of links to file */
+ 	__be16		di_projid_lo;	/* lower part of owner's project id */
+ 	__be16		di_projid_hi;	/* higher part owner's project id */
+-	__u8		di_pad[6];	/* unused, zeroed space */
++	__u8		di_pad[2];	/* unused, zeroed space */
++	__be16		di_tag;		/* context tagging */
++	__be16		di_vflags;	/* vserver specific flags */
+ 	__be16		di_flushiter;	/* incremented on flush */
+ 	xfs_timestamp_t	di_atime;	/* time last accessed */
+ 	xfs_timestamp_t	di_mtime;	/* time last modified */
+@@ -184,6 +186,8 @@ static inline void xfs_dinode_put_rdev(s
+ #define XFS_DIFLAG_EXTSZINHERIT_BIT 12	/* inherit inode extent size */
+ #define XFS_DIFLAG_NODEFRAG_BIT     13	/* do not reorganize/defragment */
+ #define XFS_DIFLAG_FILESTREAM_BIT   14  /* use filestream allocator */
++#define XFS_DIFLAG_IXUNLINK_BIT     15	/* Immutable inver on unlink */
++
+ #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
+ #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
+ #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
+@@ -199,6 +203,7 @@ static inline void xfs_dinode_put_rdev(s
+ #define XFS_DIFLAG_EXTSZINHERIT  (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
+ #define XFS_DIFLAG_NODEFRAG      (1 << XFS_DIFLAG_NODEFRAG_BIT)
+ #define XFS_DIFLAG_FILESTREAM    (1 << XFS_DIFLAG_FILESTREAM_BIT)
++#define XFS_DIFLAG_IXUNLINK      (1 << XFS_DIFLAG_IXUNLINK_BIT)
+ 
+ #ifdef CONFIG_XFS_RT
+ #define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME)
+@@ -211,6 +216,10 @@ static inline void xfs_dinode_put_rdev(s
+ 	 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
+ 	 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
+ 	 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
+-	 XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
++	 XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM | \
++	 XFS_DIFLAG_IXUNLINK)
++
++#define XFS_DIVFLAG_BARRIER	0x01
++#define XFS_DIVFLAG_COW		0x02
+ 
+ #endif	/* __XFS_DINODE_H__ */
+diff -NurpP --minimal linux-3.3.8/fs/xfs/xfs_fs.h linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_fs.h
+--- linux-3.3.8/fs/xfs/xfs_fs.h	2011-10-24 18:45:31.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_fs.h	2012-02-24 03:55:06.000000000 +0100
+@@ -67,6 +67,9 @@ struct fsxattr {
+ #define XFS_XFLAG_EXTSZINHERIT	0x00001000	/* inherit inode extent size */
+ #define XFS_XFLAG_NODEFRAG	0x00002000  	/* do not defragment */
+ #define XFS_XFLAG_FILESTREAM	0x00004000	/* use filestream allocator */
++#define XFS_XFLAG_IXUNLINK	0x00008000	/* immutable invert on unlink */
++#define XFS_XFLAG_BARRIER	0x10000000	/* chroot() barrier */
++#define XFS_XFLAG_COW		0x20000000	/* copy on write mark */
+ #define XFS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this	*/
+ 
+ /*
+@@ -302,7 +305,8 @@ typedef struct xfs_bstat {
+ #define	bs_projid	bs_projid_lo	/* (previously just bs_projid)	*/
+ 	__u16		bs_forkoff;	/* inode fork offset in bytes	*/
+ 	__u16		bs_projid_hi;	/* higher part of project id	*/
+-	unsigned char	bs_pad[10];	/* pad space, unused		*/
++	unsigned char	bs_pad[8];	/* pad space, unused		*/
++	__u16		bs_tag;		/* context tagging		*/
+ 	__u32		bs_dmevmask;	/* DMIG event mask		*/
+ 	__u16		bs_dmstate;	/* DMIG state info		*/
+ 	__u16		bs_aextents;	/* attribute number of extents	*/
+diff -NurpP --minimal linux-3.3.8/fs/xfs/xfs_ialloc.c linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_ialloc.c
+--- linux-3.3.8/fs/xfs/xfs_ialloc.c	2012-03-19 19:47:27.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_ialloc.c	2012-02-24 03:55:06.000000000 +0100
+@@ -37,7 +37,6 @@
+ #include "xfs_error.h"
+ #include "xfs_bmap.h"
+ 
+-
+ /*
+  * Allocation group level functions.
+  */
+diff -NurpP --minimal linux-3.3.8/fs/xfs/xfs_inode.c linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_inode.c
+--- linux-3.3.8/fs/xfs/xfs_inode.c	2012-03-19 19:47:27.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_inode.c	2012-02-24 03:55:06.000000000 +0100
+@@ -236,6 +236,7 @@ xfs_inotobp(
+ 	return 0;
+ }
+ 
++#include <linux/vs_tag.h>
+ 
+ /*
+  * This routine is called to map an inode to the buffer containing
+@@ -631,15 +632,25 @@ xfs_iformat_btree(
+ STATIC void
+ xfs_dinode_from_disk(
+ 	xfs_icdinode_t		*to,
+-	xfs_dinode_t		*from)
++	xfs_dinode_t		*from,
++	int tagged)
+ {
++	uint32_t uid, gid, tag;
++
+ 	to->di_magic = be16_to_cpu(from->di_magic);
+ 	to->di_mode = be16_to_cpu(from->di_mode);
+ 	to->di_version = from ->di_version;
+ 	to->di_format = from->di_format;
+ 	to->di_onlink = be16_to_cpu(from->di_onlink);
+-	to->di_uid = be32_to_cpu(from->di_uid);
+-	to->di_gid = be32_to_cpu(from->di_gid);
++
++	uid = be32_to_cpu(from->di_uid);
++	gid = be32_to_cpu(from->di_gid);
++	tag = be16_to_cpu(from->di_tag);
++
++	to->di_uid = INOTAG_UID(tagged, uid, gid);
++	to->di_gid = INOTAG_GID(tagged, uid, gid);
++	to->di_tag = INOTAG_TAG(tagged, uid, gid, tag);
++
+ 	to->di_nlink = be32_to_cpu(from->di_nlink);
+ 	to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
+ 	to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
+@@ -661,21 +672,26 @@ xfs_dinode_from_disk(
+ 	to->di_dmevmask	= be32_to_cpu(from->di_dmevmask);
+ 	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
+ 	to->di_flags	= be16_to_cpu(from->di_flags);
++	to->di_vflags	= be16_to_cpu(from->di_vflags);
+ 	to->di_gen	= be32_to_cpu(from->di_gen);
+ }
+ 
+ void
+ xfs_dinode_to_disk(
+ 	xfs_dinode_t		*to,
+-	xfs_icdinode_t		*from)
++	xfs_icdinode_t		*from,
++	int tagged)
+ {
+ 	to->di_magic = cpu_to_be16(from->di_magic);
+ 	to->di_mode = cpu_to_be16(from->di_mode);
+ 	to->di_version = from ->di_version;
+ 	to->di_format = from->di_format;
+ 	to->di_onlink = cpu_to_be16(from->di_onlink);
+-	to->di_uid = cpu_to_be32(from->di_uid);
+-	to->di_gid = cpu_to_be32(from->di_gid);
++
++	to->di_uid = cpu_to_be32(TAGINO_UID(tagged, from->di_uid, from->di_tag));
++	to->di_gid = cpu_to_be32(TAGINO_GID(tagged, from->di_gid, from->di_tag));
++	to->di_tag = cpu_to_be16(TAGINO_TAG(tagged, from->di_tag));
++
+ 	to->di_nlink = cpu_to_be32(from->di_nlink);
+ 	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
+ 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
+@@ -697,12 +713,14 @@ xfs_dinode_to_disk(
+ 	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
+ 	to->di_dmstate = cpu_to_be16(from->di_dmstate);
+ 	to->di_flags = cpu_to_be16(from->di_flags);
++	to->di_vflags = cpu_to_be16(from->di_vflags);
+ 	to->di_gen = cpu_to_be32(from->di_gen);
+ }
+ 
+ STATIC uint
+ _xfs_dic2xflags(
+-	__uint16_t		di_flags)
++	__uint16_t		di_flags,
++	__uint16_t		di_vflags)
+ {
+ 	uint			flags = 0;
+ 
+@@ -713,6 +731,8 @@ _xfs_dic2xflags(
+ 			flags |= XFS_XFLAG_PREALLOC;
+ 		if (di_flags & XFS_DIFLAG_IMMUTABLE)
+ 			flags |= XFS_XFLAG_IMMUTABLE;
++		if (di_flags & XFS_DIFLAG_IXUNLINK)
++			flags |= XFS_XFLAG_IXUNLINK;
+ 		if (di_flags & XFS_DIFLAG_APPEND)
+ 			flags |= XFS_XFLAG_APPEND;
+ 		if (di_flags & XFS_DIFLAG_SYNC)
+@@ -737,6 +757,10 @@ _xfs_dic2xflags(
+ 			flags |= XFS_XFLAG_FILESTREAM;
+ 	}
+ 
++	if (di_vflags & XFS_DIVFLAG_BARRIER)
++		flags |= FS_BARRIER_FL;
++	if (di_vflags & XFS_DIVFLAG_COW)
++		flags |= FS_COW_FL;
+ 	return flags;
+ }
+ 
+@@ -746,7 +770,7 @@ xfs_ip2xflags(
+ {
+ 	xfs_icdinode_t		*dic = &ip->i_d;
+ 
+-	return _xfs_dic2xflags(dic->di_flags) |
++	return _xfs_dic2xflags(dic->di_flags, dic->di_vflags) |
+ 				(XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
+ }
+ 
+@@ -754,7 +778,8 @@ uint
+ xfs_dic2xflags(
+ 	xfs_dinode_t		*dip)
+ {
+-	return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
++	return _xfs_dic2xflags(be16_to_cpu(dip->di_flags),
++				be16_to_cpu(dip->di_vflags)) |
+ 				(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
+ }
+ 
+@@ -787,7 +812,6 @@ xfs_iread(
+ 	if (error)
+ 		return error;
+ 	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
+-
+ 	/*
+ 	 * If we got something that isn't an inode it means someone
+ 	 * (nfs or dmi) has a stale handle.
+@@ -810,7 +834,8 @@ xfs_iread(
+ 	 * Otherwise, just get the truly permanent information.
+ 	 */
+ 	if (dip->di_mode) {
+-		xfs_dinode_from_disk(&ip->i_d, dip);
++		xfs_dinode_from_disk(&ip->i_d, dip,
++			mp->m_flags & XFS_MOUNT_TAGGED);
+ 		error = xfs_iformat(ip, dip);
+ 		if (error)  {
+ #ifdef DEBUG
+@@ -998,6 +1023,7 @@ xfs_ialloc(
+ 	ASSERT(ip->i_d.di_nlink == nlink);
+ 	ip->i_d.di_uid = current_fsuid();
+ 	ip->i_d.di_gid = current_fsgid();
++	ip->i_d.di_tag = current_fstag(&ip->i_vnode);
+ 	xfs_set_projid(ip, prid);
+ 	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
+ 
+@@ -1057,6 +1083,7 @@ xfs_ialloc(
+ 	ip->i_d.di_dmevmask = 0;
+ 	ip->i_d.di_dmstate = 0;
+ 	ip->i_d.di_flags = 0;
++	ip->i_d.di_vflags = 0;
+ 	flags = XFS_ILOG_CORE;
+ 	switch (mode & S_IFMT) {
+ 	case S_IFIFO:
+@@ -1726,6 +1753,7 @@ xfs_ifree(
+ 	}
+ 	ip->i_d.di_mode = 0;		/* mark incore inode as free */
+ 	ip->i_d.di_flags = 0;
++	ip->i_d.di_vflags = 0;
+ 	ip->i_d.di_dmevmask = 0;
+ 	ip->i_d.di_forkoff = 0;		/* mark the attr fork not in use */
+ 	ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
+@@ -2620,7 +2648,8 @@ xfs_iflush_int(
+ 	 * because if the inode is dirty at all the core must
+ 	 * be.
+ 	 */
+-	xfs_dinode_to_disk(dip, &ip->i_d);
++	xfs_dinode_to_disk(dip, &ip->i_d,
++		mp->m_flags & XFS_MOUNT_TAGGED);
+ 
+ 	/* Wrap, we never let the log put out DI_MAX_FLUSH */
+ 	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
+diff -NurpP --minimal linux-3.3.8/fs/xfs/xfs_inode.h linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_inode.h
+--- linux-3.3.8/fs/xfs/xfs_inode.h	2012-03-19 19:47:27.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_inode.h	2012-02-24 03:55:06.000000000 +0100
+@@ -134,7 +134,9 @@ typedef struct xfs_icdinode {
+ 	__uint32_t	di_nlink;	/* number of links to file */
+ 	__uint16_t	di_projid_lo;	/* lower part of owner's project id */
+ 	__uint16_t	di_projid_hi;	/* higher part of owner's project id */
+-	__uint8_t	di_pad[6];	/* unused, zeroed space */
++	__uint8_t	di_pad[2];	/* unused, zeroed space */
++	__uint16_t	di_tag;		/* context tagging */
++	__uint16_t	di_vflags;	/* vserver specific flags */
+ 	__uint16_t	di_flushiter;	/* incremented on flush */
+ 	xfs_ictimestamp_t di_atime;	/* time last accessed */
+ 	xfs_ictimestamp_t di_mtime;	/* time last modified */
+@@ -556,7 +558,7 @@ int		xfs_itobp(struct xfs_mount *, struc
+ int		xfs_iread(struct xfs_mount *, struct xfs_trans *,
+ 			  struct xfs_inode *, uint);
+ void		xfs_dinode_to_disk(struct xfs_dinode *,
+-				   struct xfs_icdinode *);
++				   struct xfs_icdinode *, int);
+ void		xfs_idestroy_fork(struct xfs_inode *, int);
+ void		xfs_idata_realloc(struct xfs_inode *, int, int);
+ void		xfs_iroot_realloc(struct xfs_inode *, int, int);
+diff -NurpP --minimal linux-3.3.8/fs/xfs/xfs_ioctl.c linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_ioctl.c
+--- linux-3.3.8/fs/xfs/xfs_ioctl.c	2012-03-19 19:47:27.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_ioctl.c	2012-02-24 03:55:06.000000000 +0100
+@@ -28,7 +28,7 @@
+ #include "xfs_bmap_btree.h"
+ #include "xfs_dinode.h"
+ #include "xfs_inode.h"
+-#include "xfs_ioctl.h"
++// #include "xfs_ioctl.h"
+ #include "xfs_rtalloc.h"
+ #include "xfs_itable.h"
+ #include "xfs_error.h"
+@@ -748,6 +748,10 @@ xfs_merge_ioc_xflags(
+ 		xflags |= XFS_XFLAG_IMMUTABLE;
+ 	else
+ 		xflags &= ~XFS_XFLAG_IMMUTABLE;
++	if (flags & FS_IXUNLINK_FL)
++		xflags |= XFS_XFLAG_IXUNLINK;
++	else
++		xflags &= ~XFS_XFLAG_IXUNLINK;
+ 	if (flags & FS_APPEND_FL)
+ 		xflags |= XFS_XFLAG_APPEND;
+ 	else
+@@ -776,6 +780,8 @@ xfs_di2lxflags(
+ 
+ 	if (di_flags & XFS_DIFLAG_IMMUTABLE)
+ 		flags |= FS_IMMUTABLE_FL;
++	if (di_flags & XFS_DIFLAG_IXUNLINK)
++		flags |= FS_IXUNLINK_FL;
+ 	if (di_flags & XFS_DIFLAG_APPEND)
+ 		flags |= FS_APPEND_FL;
+ 	if (di_flags & XFS_DIFLAG_SYNC)
+@@ -836,6 +842,8 @@ xfs_set_diflags(
+ 	di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
+ 	if (xflags & XFS_XFLAG_IMMUTABLE)
+ 		di_flags |= XFS_DIFLAG_IMMUTABLE;
++	if (xflags & XFS_XFLAG_IXUNLINK)
++		di_flags |= XFS_DIFLAG_IXUNLINK;
+ 	if (xflags & XFS_XFLAG_APPEND)
+ 		di_flags |= XFS_DIFLAG_APPEND;
+ 	if (xflags & XFS_XFLAG_SYNC)
+@@ -878,6 +886,10 @@ xfs_diflags_to_linux(
+ 		inode->i_flags |= S_IMMUTABLE;
+ 	else
+ 		inode->i_flags &= ~S_IMMUTABLE;
++	if (xflags & XFS_XFLAG_IXUNLINK)
++		inode->i_flags |= S_IXUNLINK;
++	else
++		inode->i_flags &= ~S_IXUNLINK;
+ 	if (xflags & XFS_XFLAG_APPEND)
+ 		inode->i_flags |= S_APPEND;
+ 	else
+@@ -1370,10 +1382,18 @@ xfs_file_ioctl(
+ 	case XFS_IOC_FSGETXATTRA:
+ 		return xfs_ioc_fsgetxattr(ip, 1, arg);
+ 	case XFS_IOC_FSSETXATTR:
++		if (IS_BARRIER(inode)) {
++			vxwprintk_task(1, "messing with the barrier.");
++			return -XFS_ERROR(EACCES);
++		}
+ 		return xfs_ioc_fssetxattr(ip, filp, arg);
+ 	case XFS_IOC_GETXFLAGS:
+ 		return xfs_ioc_getxflags(ip, arg);
+ 	case XFS_IOC_SETXFLAGS:
++		if (IS_BARRIER(inode)) {
++			vxwprintk_task(1, "messing with the barrier.");
++			return -XFS_ERROR(EACCES);
++		}
+ 		return xfs_ioc_setxflags(ip, filp, arg);
+ 
+ 	case XFS_IOC_FSSETDM: {
+diff -NurpP --minimal linux-3.3.8/fs/xfs/xfs_ioctl.h linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_ioctl.h
+--- linux-3.3.8/fs/xfs/xfs_ioctl.h	2011-10-24 18:45:31.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_ioctl.h	2012-02-24 03:55:06.000000000 +0100
+@@ -70,6 +70,12 @@ xfs_handle_to_dentry(
+ 	void __user		*uhandle,
+ 	u32			hlen);
+ 
++extern int
++xfs_sync_flags(
++	struct inode		*inode,
++	int			flags,
++	int			vflags);
++
+ extern long
+ xfs_file_ioctl(
+ 	struct file		*filp,
+diff -NurpP --minimal linux-3.3.8/fs/xfs/xfs_iops.c linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_iops.c
+--- linux-3.3.8/fs/xfs/xfs_iops.c	2012-03-19 19:47:27.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_iops.c	2012-02-24 03:55:06.000000000 +0100
+@@ -30,6 +30,7 @@
+ #include "xfs_bmap_btree.h"
+ #include "xfs_dinode.h"
+ #include "xfs_inode.h"
++#include "xfs_ioctl.h"
+ #include "xfs_bmap.h"
+ #include "xfs_rtalloc.h"
+ #include "xfs_error.h"
+@@ -49,6 +50,7 @@
+ #include <linux/security.h>
+ #include <linux/fiemap.h>
+ #include <linux/slab.h>
++#include <linux/vs_tag.h>
+ 
+ /*
+  * Bring the timestamps in the XFS inode uptodate.
+@@ -474,6 +476,7 @@ xfs_vn_getattr(
+ 	stat->nlink = ip->i_d.di_nlink;
+ 	stat->uid = ip->i_d.di_uid;
+ 	stat->gid = ip->i_d.di_gid;
++	stat->tag = ip->i_d.di_tag;
+ 	stat->ino = ip->i_ino;
+ 	stat->atime = inode->i_atime;
+ 	stat->mtime = inode->i_mtime;
+@@ -1051,6 +1054,7 @@ static const struct inode_operations xfs
+ 	.removexattr		= generic_removexattr,
+ 	.listxattr		= xfs_vn_listxattr,
+ 	.fiemap			= xfs_vn_fiemap,
++	.sync_flags		= xfs_sync_flags,
+ };
+ 
+ static const struct inode_operations xfs_dir_inode_operations = {
+@@ -1076,6 +1080,7 @@ static const struct inode_operations xfs
+ 	.getxattr		= generic_getxattr,
+ 	.removexattr		= generic_removexattr,
+ 	.listxattr		= xfs_vn_listxattr,
++	.sync_flags		= xfs_sync_flags,
+ };
+ 
+ static const struct inode_operations xfs_dir_ci_inode_operations = {
+@@ -1125,6 +1130,10 @@ xfs_diflags_to_iflags(
+ 		inode->i_flags |= S_IMMUTABLE;
+ 	else
+ 		inode->i_flags &= ~S_IMMUTABLE;
++	if (ip->i_d.di_flags & XFS_DIFLAG_IXUNLINK)
++		inode->i_flags |= S_IXUNLINK;
++	else
++		inode->i_flags &= ~S_IXUNLINK;
+ 	if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+ 		inode->i_flags |= S_APPEND;
+ 	else
+@@ -1137,6 +1146,15 @@ xfs_diflags_to_iflags(
+ 		inode->i_flags |= S_NOATIME;
+ 	else
+ 		inode->i_flags &= ~S_NOATIME;
++
++	if (ip->i_d.di_vflags & XFS_DIVFLAG_BARRIER)
++		inode->i_vflags |= V_BARRIER;
++	else
++		inode->i_vflags &= ~V_BARRIER;
++	if (ip->i_d.di_vflags & XFS_DIVFLAG_COW)
++		inode->i_vflags |= V_COW;
++	else
++		inode->i_vflags &= ~V_COW;
+ }
+ 
+ /*
+@@ -1168,6 +1186,7 @@ xfs_setup_inode(
+ 	set_nlink(inode, ip->i_d.di_nlink);
+ 	inode->i_uid	= ip->i_d.di_uid;
+ 	inode->i_gid	= ip->i_d.di_gid;
++	inode->i_tag    = ip->i_d.di_tag;
+ 
+ 	switch (inode->i_mode & S_IFMT) {
+ 	case S_IFBLK:
+diff -NurpP --minimal linux-3.3.8/fs/xfs/xfs_itable.c linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_itable.c
+--- linux-3.3.8/fs/xfs/xfs_itable.c	2011-05-22 16:17:54.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_itable.c	2012-02-24 03:55:06.000000000 +0100
+@@ -98,6 +98,7 @@ xfs_bulkstat_one_int(
+ 	buf->bs_mode = dic->di_mode;
+ 	buf->bs_uid = dic->di_uid;
+ 	buf->bs_gid = dic->di_gid;
++	buf->bs_tag = dic->di_tag;
+ 	buf->bs_size = dic->di_size;
+ 
+ 	/*
+diff -NurpP --minimal linux-3.3.8/fs/xfs/xfs_linux.h linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_linux.h
+--- linux-3.3.8/fs/xfs/xfs_linux.h	2011-10-24 18:45:31.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_linux.h	2012-02-24 03:55:06.000000000 +0100
+@@ -121,6 +121,7 @@
+ 
+ #define current_cpu()		(raw_smp_processor_id())
+ #define current_pid()		(current->pid)
++#define current_fstag(vp)	(dx_current_fstag((vp)->i_sb))
+ #define current_test_flags(f)	(current->flags & (f))
+ #define current_set_flags_nested(sp, f)		\
+ 		(*(sp) = current->flags, current->flags |= (f))
+diff -NurpP --minimal linux-3.3.8/fs/xfs/xfs_log_recover.c linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_log_recover.c
+--- linux-3.3.8/fs/xfs/xfs_log_recover.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_log_recover.c	2012-04-03 03:02:12.000000000 +0200
+@@ -2344,7 +2344,8 @@ xlog_recover_inode_pass2(
+ 	}
+ 
+ 	/* The core is in in-core format */
+-	xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr);
++	xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr,
++		mp->m_flags & XFS_MOUNT_TAGGED);
+ 
+ 	/* the rest is in on-disk format */
+ 	if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
+diff -NurpP --minimal linux-3.3.8/fs/xfs/xfs_mount.h linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_mount.h
+--- linux-3.3.8/fs/xfs/xfs_mount.h	2012-03-19 19:47:27.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_mount.h	2012-02-24 03:55:06.000000000 +0100
+@@ -248,6 +248,7 @@ typedef struct xfs_mount {
+ 						   allocator */
+ #define XFS_MOUNT_NOATTR2	(1ULL << 25)	/* disable use of attr2 format */
+ 
++#define XFS_MOUNT_TAGGED	(1ULL << 31)	/* context tagging */
+ 
+ /*
+  * Default minimum read and write sizes.
+diff -NurpP --minimal linux-3.3.8/fs/xfs/xfs_super.c linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_super.c
+--- linux-3.3.8/fs/xfs/xfs_super.c	2012-03-19 19:47:27.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_super.c	2012-02-24 03:55:06.000000000 +0100
+@@ -113,6 +113,9 @@ mempool_t *xfs_ioend_pool;
+ #define MNTOPT_NODELAYLOG  "nodelaylog"	/* Delayed logging disabled */
+ #define MNTOPT_DISCARD	   "discard"	/* Discard unused blocks */
+ #define MNTOPT_NODISCARD   "nodiscard"	/* Do not discard unused blocks */
++#define MNTOPT_TAGXID	"tagxid"	/* context tagging for inodes */
++#define MNTOPT_TAGGED	"tag"		/* context tagging for inodes */
++#define MNTOPT_NOTAGTAG	"notag"		/* do not use context tagging */
+ 
+ /*
+  * Table driven mount option parser.
+@@ -121,10 +124,14 @@ mempool_t *xfs_ioend_pool;
+  * in the future, too.
+  */
+ enum {
++	Opt_tag, Opt_notag,
+ 	Opt_barrier, Opt_nobarrier, Opt_err
+ };
+ 
+ static const match_table_t tokens = {
++	{Opt_tag, "tagxid"},
++	{Opt_tag, "tag"},
++	{Opt_notag, "notag"},
+ 	{Opt_barrier, "barrier"},
+ 	{Opt_nobarrier, "nobarrier"},
+ 	{Opt_err, NULL}
+@@ -373,6 +380,19 @@ xfs_parseargs(
+ 		} else if (!strcmp(this_char, "irixsgid")) {
+ 			xfs_warn(mp,
+ 	"irixsgid is now a sysctl(2) variable, option is deprecated.");
++#ifndef CONFIG_TAGGING_NONE
++		} else if (!strcmp(this_char, MNTOPT_TAGGED)) {
++			mp->m_flags |= XFS_MOUNT_TAGGED;
++		} else if (!strcmp(this_char, MNTOPT_NOTAGTAG)) {
++			mp->m_flags &= ~XFS_MOUNT_TAGGED;
++		} else if (!strcmp(this_char, MNTOPT_TAGXID)) {
++			mp->m_flags |= XFS_MOUNT_TAGGED;
++#endif
++#ifdef CONFIG_PROPAGATE
++		} else if (!strcmp(this_char, MNTOPT_TAGGED)) {
++			/* use value */
++			mp->m_flags |= XFS_MOUNT_TAGGED;
++#endif
+ 		} else {
+ 			xfs_warn(mp, "unknown mount option [%s].", this_char);
+ 			return EINVAL;
+@@ -1114,6 +1134,16 @@ xfs_fs_remount(
+ 		case Opt_nobarrier:
+ 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
+ 			break;
++		case Opt_tag:
++			if (!(sb->s_flags & MS_TAGGED)) {
++				printk(KERN_INFO
++					"XFS: %s: tagging not permitted on remount.\n",
++					sb->s_id);
++				return -EINVAL;
++			}
++			break;
++		case Opt_notag:
++			break;
+ 		default:
+ 			/*
+ 			 * Logically we would return an error here to prevent
+@@ -1329,6 +1359,9 @@ xfs_fs_fill_super(
+ 	if (error)
+ 		goto out_free_sb;
+ 
++	if (mp->m_flags & XFS_MOUNT_TAGGED)
++		sb->s_flags |= MS_TAGGED;
++
+ 	/*
+ 	 * we must configure the block size in the superblock before we run the
+ 	 * full mount process as the mount process can lookup and cache inodes.
+diff -NurpP --minimal linux-3.3.8/fs/xfs/xfs_vnodeops.c linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_vnodeops.c
+--- linux-3.3.8/fs/xfs/xfs_vnodeops.c	2012-03-19 19:47:27.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/fs/xfs/xfs_vnodeops.c	2012-04-01 18:12:15.000000000 +0200
+@@ -106,6 +106,77 @@ xfs_readlink_bmap(
+ 	return error;
+ }
+ 
++
++STATIC void
++xfs_get_inode_flags(
++	xfs_inode_t	*ip)
++{
++	struct inode 	*inode = VFS_I(ip);
++	unsigned int 	flags = inode->i_flags;
++	unsigned int 	vflags = inode->i_vflags;
++
++	if (flags & S_IMMUTABLE)
++		ip->i_d.di_flags |= XFS_DIFLAG_IMMUTABLE;
++	else
++		ip->i_d.di_flags &= ~XFS_DIFLAG_IMMUTABLE;
++	if (flags & S_IXUNLINK)
++		ip->i_d.di_flags |= XFS_DIFLAG_IXUNLINK;
++	else
++		ip->i_d.di_flags &= ~XFS_DIFLAG_IXUNLINK;
++
++	if (vflags & V_BARRIER)
++		ip->i_d.di_vflags |= XFS_DIVFLAG_BARRIER;
++	else
++		ip->i_d.di_vflags &= ~XFS_DIVFLAG_BARRIER;
++	if (vflags & V_COW)
++		ip->i_d.di_vflags |= XFS_DIVFLAG_COW;
++	else
++		ip->i_d.di_vflags &= ~XFS_DIVFLAG_COW;
++}
++
++int
++xfs_sync_flags(
++	struct inode		*inode,
++	int			flags,
++	int			vflags)
++{
++	struct xfs_inode	*ip = XFS_I(inode);
++	struct xfs_mount	*mp = ip->i_mount;
++	struct xfs_trans        *tp;
++	unsigned int		lock_flags = 0;
++	int			code;
++
++	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
++	code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
++	if (code)
++		goto error_out;
++
++	xfs_ilock(ip, XFS_ILOCK_EXCL);
++	xfs_trans_ijoin(tp, ip, 0);
++
++	inode->i_flags = flags;
++	inode->i_vflags = vflags;
++	xfs_get_inode_flags(ip);
++
++	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
++	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
++
++	XFS_STATS_INC(xs_ig_attrchg);
++
++	if (mp->m_flags & XFS_MOUNT_WSYNC)
++		xfs_trans_set_sync(tp);
++	code = xfs_trans_commit(tp, 0);
++	xfs_iunlock(ip, XFS_ILOCK_EXCL);
++	return code;
++
++error_out:
++	xfs_trans_cancel(tp, 0);
++	if (lock_flags)
++		xfs_iunlock(ip, XFS_ILOCK_EXCL);
++	return code;
++}
++
++
+ int
+ xfs_readlink(
+ 	xfs_inode_t     *ip,
+diff -NurpP --minimal linux-3.3.8/include/linux/Kbuild linux-3.3.8-vs2.3.3.4/include/linux/Kbuild
+--- linux-3.3.8/include/linux/Kbuild	2012-03-19 19:47:27.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/Kbuild	2012-02-24 03:55:06.000000000 +0100
+@@ -17,6 +17,7 @@ header-y += netfilter_bridge/
+ header-y += netfilter_ipv4/
+ header-y += netfilter_ipv6/
+ header-y += usb/
++header-y += vserver/
+ header-y += wimax/
+ 
+ objhdr-y += version.h
+diff -NurpP --minimal linux-3.3.8/include/linux/capability.h linux-3.3.8-vs2.3.3.4/include/linux/capability.h
+--- linux-3.3.8/include/linux/capability.h	2012-03-19 19:47:27.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/capability.h	2012-02-24 03:55:06.000000000 +0100
+@@ -280,6 +280,7 @@ struct cpu_vfs_cap_data {
+    arbitrary SCSI commands */
+ /* Allow setting encryption key on loopback filesystem */
+ /* Allow setting zone reclaim policy */
++/* Allow the selection of a security context */
+ 
+ #define CAP_SYS_ADMIN        21
+ 
+@@ -363,7 +364,12 @@ struct cpu_vfs_cap_data {
+ 
+ #define CAP_LAST_CAP         CAP_WAKE_ALARM
+ 
+-#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
++/* Allow context manipulations */
++/* Allow changing context info on files */
++
++#define CAP_CONTEXT	     63
++
++#define cap_valid(x) ((x) >= 0 && ((x) <= CAP_LAST_CAP || (x) == CAP_CONTEXT))
+ 
+ /*
+  * Bit location of each capability (used by user-space library and kernel)
+diff -NurpP --minimal linux-3.3.8/include/linux/cred.h linux-3.3.8-vs2.3.3.4/include/linux/cred.h
+--- linux-3.3.8/include/linux/cred.h	2012-03-19 19:47:27.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/cred.h	2012-02-24 03:55:06.000000000 +0100
+@@ -156,6 +156,7 @@ extern void exit_creds(struct task_struc
+ extern int copy_creds(struct task_struct *, unsigned long);
+ extern const struct cred *get_task_cred(struct task_struct *);
+ extern struct cred *cred_alloc_blank(void);
++extern struct cred *__prepare_creds(const struct cred *);
+ extern struct cred *prepare_creds(void);
+ extern struct cred *prepare_exec_creds(void);
+ extern int commit_creds(struct cred *);
+@@ -209,6 +210,31 @@ static inline void validate_process_cred
+ }
+ #endif
+ 
++static inline void set_cred_subscribers(struct cred *cred, int n)
++{
++#ifdef CONFIG_DEBUG_CREDENTIALS
++	atomic_set(&cred->subscribers, n);
++#endif
++}
++
++static inline int read_cred_subscribers(const struct cred *cred)
++{
++#ifdef CONFIG_DEBUG_CREDENTIALS
++	return atomic_read(&cred->subscribers);
++#else
++	return 0;
++#endif
++}
++
++static inline void alter_cred_subscribers(const struct cred *_cred, int n)
++{
++#ifdef CONFIG_DEBUG_CREDENTIALS
++	struct cred *cred = (struct cred *) _cred;
++
++	atomic_add(n, &cred->subscribers);
++#endif
++}
++
+ /**
+  * get_new_cred - Get a reference on a new set of credentials
+  * @cred: The new credentials to reference
+diff -NurpP --minimal linux-3.3.8/include/linux/devpts_fs.h linux-3.3.8-vs2.3.3.4/include/linux/devpts_fs.h
+--- linux-3.3.8/include/linux/devpts_fs.h	2008-12-25 00:26:37.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/devpts_fs.h	2012-02-24 03:55:06.000000000 +0100
+@@ -45,5 +45,4 @@ static inline void devpts_pty_kill(struc
+ 
+ #endif
+ 
+-
+ #endif /* _LINUX_DEVPTS_FS_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/ext2_fs.h linux-3.3.8-vs2.3.3.4/include/linux/ext2_fs.h
+--- linux-3.3.8/include/linux/ext2_fs.h	2012-01-09 16:14:56.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/ext2_fs.h	2012-02-24 03:55:06.000000000 +0100
+@@ -190,8 +190,12 @@ struct ext2_group_desc
+ #define EXT2_NOTAIL_FL			FS_NOTAIL_FL	/* file tail should not be merged */
+ #define EXT2_DIRSYNC_FL			FS_DIRSYNC_FL	/* dirsync behaviour (directories only) */
+ #define EXT2_TOPDIR_FL			FS_TOPDIR_FL	/* Top of directory hierarchies*/
++#define EXT2_IXUNLINK_FL		FS_IXUNLINK_FL	/* Immutable invert on unlink */
+ #define EXT2_RESERVED_FL		FS_RESERVED_FL	/* reserved for ext2 lib */
+ 
++#define EXT2_BARRIER_FL			FS_BARRIER_FL	/* Barrier for chroot() */
++#define EXT2_COW_FL			FS_COW_FL	/* Copy on Write marker */
++
+ #define EXT2_FL_USER_VISIBLE		FS_FL_USER_VISIBLE	/* User visible flags */
+ #define EXT2_FL_USER_MODIFIABLE		FS_FL_USER_MODIFIABLE	/* User modifiable flags */
+ 
+@@ -275,7 +279,8 @@ struct ext2_inode {
+ 			__u16	i_pad1;
+ 			__le16	l_i_uid_high;	/* these 2 fields    */
+ 			__le16	l_i_gid_high;	/* were reserved2[0] */
+-			__u32	l_i_reserved2;
++			__le16	l_i_tag;	/* Context Tag */
++			__u16	l_i_reserved2;
+ 		} linux2;
+ 		struct {
+ 			__u8	h_i_frag;	/* Fragment number */
+@@ -304,6 +309,7 @@ struct ext2_inode {
+ #define i_gid_low	i_gid
+ #define i_uid_high	osd2.linux2.l_i_uid_high
+ #define i_gid_high	osd2.linux2.l_i_gid_high
++#define i_raw_tag	osd2.linux2.l_i_tag
+ #define i_reserved2	osd2.linux2.l_i_reserved2
+ #endif
+ 
+@@ -348,6 +354,7 @@ struct ext2_inode {
+ #define EXT2_MOUNT_USRQUOTA		0x020000  /* user quota */
+ #define EXT2_MOUNT_GRPQUOTA		0x040000  /* group quota */
+ #define EXT2_MOUNT_RESERVATION		0x080000  /* Preallocation */
++#define EXT2_MOUNT_TAGGED		(1<<24)	  /* Enable Context Tags */
+ 
+ 
+ #define clear_opt(o, opt)		o &= ~EXT2_MOUNT_##opt
+diff -NurpP --minimal linux-3.3.8/include/linux/ext3_fs.h linux-3.3.8-vs2.3.3.4/include/linux/ext3_fs.h
+--- linux-3.3.8/include/linux/ext3_fs.h	2012-03-19 19:47:27.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/ext3_fs.h	2012-02-24 03:55:06.000000000 +0100
+@@ -173,10 +173,14 @@ struct ext3_group_desc
+ #define EXT3_NOTAIL_FL			0x00008000 /* file tail should not be merged */
+ #define EXT3_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
+ #define EXT3_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
++#define EXT3_IXUNLINK_FL		0x08000000 /* Immutable invert on unlink */
+ #define EXT3_RESERVED_FL		0x80000000 /* reserved for ext3 lib */
+ 
+-#define EXT3_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
+-#define EXT3_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
++#define EXT3_BARRIER_FL			0x04000000 /* Barrier for chroot() */
++#define EXT3_COW_FL			0x20000000 /* Copy on Write marker */
++
++#define EXT3_FL_USER_VISIBLE		0x0103DFFF /* User visible flags */
++#define EXT3_FL_USER_MODIFIABLE		0x010380FF /* User modifiable flags */
+ 
+ /* Flags that should be inherited by new inodes from their parent. */
+ #define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
+@@ -312,7 +316,8 @@ struct ext3_inode {
+ 			__u16	i_pad1;
+ 			__le16	l_i_uid_high;	/* these 2 fields    */
+ 			__le16	l_i_gid_high;	/* were reserved2[0] */
+-			__u32	l_i_reserved2;
++			__le16	l_i_tag;	/* Context Tag */
++			__u16	l_i_reserved2;
+ 		} linux2;
+ 		struct {
+ 			__u8	h_i_frag;	/* Fragment number */
+@@ -343,6 +348,7 @@ struct ext3_inode {
+ #define i_gid_low	i_gid
+ #define i_uid_high	osd2.linux2.l_i_uid_high
+ #define i_gid_high	osd2.linux2.l_i_gid_high
++#define i_raw_tag	osd2.linux2.l_i_tag
+ #define i_reserved2	osd2.linux2.l_i_reserved2
+ 
+ #elif defined(__GNU__)
+@@ -405,6 +411,7 @@ struct ext3_inode {
+ #define EXT3_MOUNT_GRPQUOTA		0x200000 /* "old" group quota */
+ #define EXT3_MOUNT_DATA_ERR_ABORT	0x400000 /* Abort on file data write
+ 						  * error in ordered mode */
++#define EXT3_MOUNT_TAGGED		(1<<24) /* Enable Context Tags */
+ 
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
+@@ -918,6 +925,7 @@ extern void ext3_get_inode_flags(struct 
+ extern void ext3_set_aops(struct inode *inode);
+ extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ 		       u64 start, u64 len);
++extern int ext3_sync_flags(struct inode *, int, int);
+ 
+ /* ioctl.c */
+ extern long ext3_ioctl(struct file *, unsigned int, unsigned long);
+diff -NurpP --minimal linux-3.3.8/include/linux/fs.h linux-3.3.8-vs2.3.3.4/include/linux/fs.h
+--- linux-3.3.8/include/linux/fs.h	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/include/linux/fs.h	2012-06-08 15:27:44.000000000 +0200
+@@ -210,6 +210,9 @@ struct inodes_stat_t {
+ #define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */
+ #define MS_I_VERSION	(1<<23) /* Update inode I_version field */
+ #define MS_STRICTATIME	(1<<24) /* Always perform atime updates */
++#define MS_TAGGED	(1<<25) /* use generic inode tagging */
++#define MS_TAGID	(1<<26) /* use specific tag for this mount */
++#define MS_NOTAGCHECK	(1<<27) /* don't check tags */
+ #define MS_NOSEC	(1<<28)
+ #define MS_BORN		(1<<29)
+ #define MS_ACTIVE	(1<<30)
+@@ -241,6 +244,14 @@ struct inodes_stat_t {
+ #define S_IMA		1024	/* Inode has an associated IMA struct */
+ #define S_AUTOMOUNT	2048	/* Automount/referral quasi-directory */
+ #define S_NOSEC		4096	/* no suid or xattr security attributes */
++#define S_IXUNLINK	8192	/* Immutable Invert on unlink */
++
++/* Linux-VServer related Inode flags */
++
++#define V_VALID		1
++#define V_XATTR		2
++#define V_BARRIER	4	/* Barrier for chroot() */
++#define V_COW		8	/* Copy on Write */
+ 
+ /*
+  * Note that nosuid etc flags are inode-specific: setting some file-system
+@@ -263,12 +274,15 @@ struct inodes_stat_t {
+ #define IS_DIRSYNC(inode)	(__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \
+ 					((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
+ #define IS_MANDLOCK(inode)	__IS_FLG(inode, MS_MANDLOCK)
+-#define IS_NOATIME(inode)   __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
+-#define IS_I_VERSION(inode)   __IS_FLG(inode, MS_I_VERSION)
++#define IS_NOATIME(inode)	__IS_FLG(inode, MS_RDONLY|MS_NOATIME)
++#define IS_I_VERSION(inode)	__IS_FLG(inode, MS_I_VERSION)
++#define IS_TAGGED(inode)	__IS_FLG(inode, MS_TAGGED)
+ 
+ #define IS_NOQUOTA(inode)	((inode)->i_flags & S_NOQUOTA)
+ #define IS_APPEND(inode)	((inode)->i_flags & S_APPEND)
+ #define IS_IMMUTABLE(inode)	((inode)->i_flags & S_IMMUTABLE)
++#define IS_IXUNLINK(inode)	((inode)->i_flags & S_IXUNLINK)
++#define IS_IXORUNLINK(inode)	((IS_IXUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode))
+ #define IS_POSIXACL(inode)	__IS_FLG(inode, MS_POSIXACL)
+ 
+ #define IS_DEADDIR(inode)	((inode)->i_flags & S_DEAD)
+@@ -279,6 +293,16 @@ struct inodes_stat_t {
+ #define IS_AUTOMOUNT(inode)	((inode)->i_flags & S_AUTOMOUNT)
+ #define IS_NOSEC(inode)		((inode)->i_flags & S_NOSEC)
+ 
++#define IS_BARRIER(inode)	(S_ISDIR((inode)->i_mode) && ((inode)->i_vflags & V_BARRIER))
++
++#ifdef CONFIG_VSERVER_COWBL
++#  define IS_COW(inode)		(IS_IXUNLINK(inode) && IS_IMMUTABLE(inode))
++#  define IS_COW_LINK(inode)	(S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1))
++#else
++#  define IS_COW(inode)		(0)
++#  define IS_COW_LINK(inode)	(0)
++#endif
++
+ /* the read-only stuff doesn't really belong here, but any other place is
+    probably as bad and I don't want to create yet another include file. */
+ 
+@@ -365,11 +389,14 @@ struct inodes_stat_t {
+ #define FS_EXTENT_FL			0x00080000 /* Extents */
+ #define FS_DIRECTIO_FL			0x00100000 /* Use direct i/o */
+ #define FS_NOCOW_FL			0x00800000 /* Do not cow file */
++#define FS_IXUNLINK_FL			0x08000000 /* Immutable invert on unlink */
+ #define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
+ 
+-#define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
+-#define FS_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
++#define FS_BARRIER_FL			0x04000000 /* Barrier for chroot() */
++#define FS_COW_FL			0x20000000 /* Copy on Write marker */
+ 
++#define FS_FL_USER_VISIBLE		0x0103DFFF /* User visible flags */
++#define FS_FL_USER_MODIFIABLE		0x010380FF /* User modifiable flags */
+ 
+ #define SYNC_FILE_RANGE_WAIT_BEFORE	1
+ #define SYNC_FILE_RANGE_WRITE		2
+@@ -451,6 +478,7 @@ typedef void (dio_iodone_t)(struct kiocb
+ #define ATTR_KILL_PRIV	(1 << 14)
+ #define ATTR_OPEN	(1 << 15) /* Truncating from open(O_TRUNC) */
+ #define ATTR_TIMES_SET	(1 << 16)
++#define ATTR_TAG	(1 << 17)
+ 
+ /*
+  * This is the Inode Attributes structure, used for notify_change().  It
+@@ -466,6 +494,7 @@ struct iattr {
+ 	umode_t		ia_mode;
+ 	uid_t		ia_uid;
+ 	gid_t		ia_gid;
++	tag_t		ia_tag;
+ 	loff_t		ia_size;
+ 	struct timespec	ia_atime;
+ 	struct timespec	ia_mtime;
+@@ -479,6 +508,9 @@ struct iattr {
+ 	struct file	*ia_file;
+ };
+ 
++#define ATTR_FLAG_BARRIER	512	/* Barrier for chroot() */
++#define ATTR_FLAG_IXUNLINK	1024	/* Immutable invert on unlink */
++
+ /*
+  * Includes for diskquotas.
+  */
+@@ -758,7 +790,9 @@ struct inode {
+ 	unsigned short		i_opflags;
+ 	uid_t			i_uid;
+ 	gid_t			i_gid;
+-	unsigned int		i_flags;
++	tag_t			i_tag;
++	unsigned short		i_flags;
++	unsigned short		i_vflags;
+ 
+ #ifdef CONFIG_FS_POSIX_ACL
+ 	struct posix_acl	*i_acl;
+@@ -787,6 +821,7 @@ struct inode {
+ 		unsigned int __i_nlink;
+ 	};
+ 	dev_t			i_rdev;
++	dev_t			i_mdev;
+ 	struct timespec		i_atime;
+ 	struct timespec		i_mtime;
+ 	struct timespec		i_ctime;
+@@ -924,12 +959,12 @@ static inline void i_size_write(struct i
+ 
+ static inline unsigned iminor(const struct inode *inode)
+ {
+-	return MINOR(inode->i_rdev);
++	return MINOR(inode->i_mdev);
+ }
+ 
+ static inline unsigned imajor(const struct inode *inode)
+ {
+-	return MAJOR(inode->i_rdev);
++	return MAJOR(inode->i_mdev);
+ }
+ 
+ extern struct block_device *I_BDEV(struct inode *inode);
+@@ -996,6 +1031,7 @@ struct file {
+ 	loff_t			f_pos;
+ 	struct fown_struct	f_owner;
+ 	const struct cred	*f_cred;
++	xid_t			f_xid;
+ 	struct file_ra_state	f_ra;
+ 
+ 	u64			f_version;
+@@ -1143,6 +1179,7 @@ struct file_lock {
+ 	struct file *fl_file;
+ 	loff_t fl_start;
+ 	loff_t fl_end;
++	xid_t fl_xid;
+ 
+ 	struct fasync_struct *	fl_fasync; /* for lease break notifications */
+ 	/* for lease breaks: */
+@@ -1655,6 +1692,7 @@ struct inode_operations {
+ 	ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
+ 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
+ 	int (*removexattr) (struct dentry *, const char *);
++	int (*sync_flags) (struct inode *, int, int);
+ 	void (*truncate_range)(struct inode *, loff_t, loff_t);
+ 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
+ 		      u64 len);
+@@ -1674,6 +1712,7 @@ extern ssize_t vfs_readv(struct file *, 
+ 		unsigned long, loff_t *);
+ extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
+ 		unsigned long, loff_t *);
++ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t);
+ 
+ struct super_operations {
+    	struct inode *(*alloc_inode)(struct super_block *sb);
+@@ -2513,6 +2552,7 @@ extern int dcache_dir_open(struct inode 
+ extern int dcache_dir_close(struct inode *, struct file *);
+ extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
+ extern int dcache_readdir(struct file *, void *, filldir_t);
++extern int dcache_readdir_filter(struct file *, void *, filldir_t, int (*)(struct dentry *));
+ extern int simple_setattr(struct dentry *, struct iattr *);
+ extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+ extern int simple_statfs(struct dentry *, struct kstatfs *);
+diff -NurpP --minimal linux-3.3.8/include/linux/gfs2_ondisk.h linux-3.3.8-vs2.3.3.4/include/linux/gfs2_ondisk.h
+--- linux-3.3.8/include/linux/gfs2_ondisk.h	2012-03-19 19:47:28.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/gfs2_ondisk.h	2012-02-24 03:55:06.000000000 +0100
+@@ -213,6 +213,9 @@ enum {
+ 	gfs2fl_NoAtime		= 7,
+ 	gfs2fl_Sync		= 8,
+ 	gfs2fl_System		= 9,
++	gfs2fl_IXUnlink		= 16,
++	gfs2fl_Barrier		= 17,
++	gfs2fl_Cow		= 18,
+ 	gfs2fl_TruncInProg	= 29,
+ 	gfs2fl_InheritDirectio	= 30,
+ 	gfs2fl_InheritJdata	= 31,
+@@ -229,6 +232,9 @@ enum {
+ #define GFS2_DIF_NOATIME		0x00000080
+ #define GFS2_DIF_SYNC			0x00000100
+ #define GFS2_DIF_SYSTEM			0x00000200 /* New in gfs2 */
++#define GFS2_DIF_IXUNLINK		0x00010000
++#define GFS2_DIF_BARRIER		0x00020000
++#define GFS2_DIF_COW			0x00040000
+ #define GFS2_DIF_TRUNC_IN_PROG		0x20000000 /* New in gfs2 */
+ #define GFS2_DIF_INHERIT_DIRECTIO	0x40000000
+ #define GFS2_DIF_INHERIT_JDATA		0x80000000
+diff -NurpP --minimal linux-3.3.8/include/linux/if_tun.h linux-3.3.8-vs2.3.3.4/include/linux/if_tun.h
+--- linux-3.3.8/include/linux/if_tun.h	2010-08-02 16:52:54.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/include/linux/if_tun.h	2012-02-24 03:55:06.000000000 +0100
+@@ -53,6 +53,7 @@
+ #define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog)
+ #define TUNGETVNETHDRSZ _IOR('T', 215, int)
+ #define TUNSETVNETHDRSZ _IOW('T', 216, int)
++#define TUNSETNID     _IOW('T', 217, int)
+ 
+ /* TUNSETIFF ifr flags */
+ #define IFF_TUN		0x0001
+diff -NurpP --minimal linux-3.3.8/include/linux/init_task.h linux-3.3.8-vs2.3.3.4/include/linux/init_task.h
+--- linux-3.3.8/include/linux/init_task.h	2012-03-19 19:47:28.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/init_task.h	2012-02-24 03:55:06.000000000 +0100
+@@ -192,6 +192,10 @@ extern struct cred init_cred;
+ 	INIT_FTRACE_GRAPH						\
+ 	INIT_TRACE_RECURSION						\
+ 	INIT_TASK_RCU_PREEMPT(tsk)					\
++	.xid		= 0,						\
++	.vx_info	= NULL,						\
++	.nid		= 0,						\
++	.nx_info	= NULL,						\
+ }
+ 
+ 
+diff -NurpP --minimal linux-3.3.8/include/linux/ipc.h linux-3.3.8-vs2.3.3.4/include/linux/ipc.h
+--- linux-3.3.8/include/linux/ipc.h	2012-03-19 19:47:28.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/ipc.h	2012-02-24 03:55:06.000000000 +0100
+@@ -91,6 +91,7 @@ struct kern_ipc_perm
+ 	key_t		key;
+ 	uid_t		uid;
+ 	gid_t		gid;
++	xid_t		xid;
+ 	uid_t		cuid;
+ 	gid_t		cgid;
+ 	umode_t		mode; 
+diff -NurpP --minimal linux-3.3.8/include/linux/ipc_namespace.h linux-3.3.8-vs2.3.3.4/include/linux/ipc_namespace.h
+--- linux-3.3.8/include/linux/ipc_namespace.h	2011-10-24 18:45:32.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/include/linux/ipc_namespace.h	2012-02-24 03:55:06.000000000 +0100
+@@ -101,7 +101,8 @@ static inline int mq_init_ns(struct ipc_
+ 
+ #if defined(CONFIG_IPC_NS)
+ extern struct ipc_namespace *copy_ipcs(unsigned long flags,
+-				       struct task_struct *tsk);
++				       struct ipc_namespace *old_ns,
++				       struct user_namespace *user_ns);
+ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
+ {
+ 	if (ns)
+@@ -112,12 +113,13 @@ static inline struct ipc_namespace *get_
+ extern void put_ipc_ns(struct ipc_namespace *ns);
+ #else
+ static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
+-					      struct task_struct *tsk)
++					      struct ipc_namespace *old_ns,
++					      struct user_namespace *user_ns)
+ {
+ 	if (flags & CLONE_NEWIPC)
+ 		return ERR_PTR(-EINVAL);
+ 
+-	return tsk->nsproxy->ipc_ns;
++	return old_ns;
+ }
+ 
+ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
+diff -NurpP --minimal linux-3.3.8/include/linux/loop.h linux-3.3.8-vs2.3.3.4/include/linux/loop.h
+--- linux-3.3.8/include/linux/loop.h	2012-01-09 16:14:58.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/loop.h	2012-02-24 03:55:06.000000000 +0100
+@@ -45,6 +45,7 @@ struct loop_device {
+ 	struct loop_func_table *lo_encryption;
+ 	__u32           lo_init[2];
+ 	uid_t		lo_key_owner;	/* Who set the key */
++	xid_t		lo_xid;
+ 	int		(*ioctl)(struct loop_device *, int cmd, 
+ 				 unsigned long arg); 
+ 
+diff -NurpP --minimal linux-3.3.8/include/linux/magic.h linux-3.3.8-vs2.3.3.4/include/linux/magic.h
+--- linux-3.3.8/include/linux/magic.h	2012-01-09 16:14:58.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/magic.h	2012-02-24 03:55:06.000000000 +0100
+@@ -3,7 +3,7 @@
+ 
+ #define ADFS_SUPER_MAGIC	0xadf5
+ #define AFFS_SUPER_MAGIC	0xadff
+-#define AFS_SUPER_MAGIC                0x5346414F
++#define AFS_SUPER_MAGIC		0x5346414F
+ #define AUTOFS_SUPER_MAGIC	0x0187
+ #define CODA_SUPER_MAGIC	0x73757245
+ #define CRAMFS_MAGIC		0x28cd3d45	/* some random number */
+@@ -41,6 +41,7 @@
+ #define NFS_SUPER_MAGIC		0x6969
+ #define OPENPROM_SUPER_MAGIC	0x9fa1
+ #define PROC_SUPER_MAGIC	0x9fa0
++#define DEVPTS_SUPER_MAGIC	0x1cd1
+ #define QNX4_SUPER_MAGIC	0x002f		/* qnx4 fs detection */
+ 
+ #define REISERFS_SUPER_MAGIC	0x52654973	/* used by gcc */
+diff -NurpP --minimal linux-3.3.8/include/linux/major.h linux-3.3.8-vs2.3.3.4/include/linux/major.h
+--- linux-3.3.8/include/linux/major.h	2009-09-10 15:26:25.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/include/linux/major.h	2012-02-24 03:55:06.000000000 +0100
+@@ -15,6 +15,7 @@
+ #define HD_MAJOR		IDE0_MAJOR
+ #define PTY_SLAVE_MAJOR		3
+ #define TTY_MAJOR		4
++#define VROOT_MAJOR		4
+ #define TTYAUX_MAJOR		5
+ #define LP_MAJOR		6
+ #define VCS_MAJOR		7
+diff -NurpP --minimal linux-3.3.8/include/linux/memcontrol.h linux-3.3.8-vs2.3.3.4/include/linux/memcontrol.h
+--- linux-3.3.8/include/linux/memcontrol.h	2012-03-19 19:47:28.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/memcontrol.h	2012-03-19 20:52:10.000000000 +0100
+@@ -87,6 +87,13 @@ extern struct mem_cgroup *try_get_mem_cg
+ extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
+ extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont);
+ 
++extern u64 mem_cgroup_res_read_u64(struct mem_cgroup *mem, int member);
++extern u64 mem_cgroup_memsw_read_u64(struct mem_cgroup *mem, int member);
++
++extern s64 mem_cgroup_stat_read_cache(struct mem_cgroup *mem);
++extern s64 mem_cgroup_stat_read_anon(struct mem_cgroup *mem);
++extern s64 mem_cgroup_stat_read_mapped(struct mem_cgroup *mem);
++
+ static inline
+ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
+ {
+diff -NurpP --minimal linux-3.3.8/include/linux/mm_types.h linux-3.3.8-vs2.3.3.4/include/linux/mm_types.h
+--- linux-3.3.8/include/linux/mm_types.h	2012-03-19 19:47:28.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/mm_types.h	2012-02-24 03:55:06.000000000 +0100
+@@ -343,6 +343,7 @@ struct mm_struct {
+ 
+ 	/* Architecture-specific MM context */
+ 	mm_context_t context;
++	struct vx_info *mm_vx_info;
+ 
+ 	/* Swap token stuff */
+ 	/*
+diff -NurpP --minimal linux-3.3.8/include/linux/mmzone.h linux-3.3.8-vs2.3.3.4/include/linux/mmzone.h
+--- linux-3.3.8/include/linux/mmzone.h	2012-03-19 19:47:28.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/mmzone.h	2012-02-24 03:55:06.000000000 +0100
+@@ -683,6 +683,13 @@ typedef struct pglist_data {
+ 	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;\
+ })
+ 
++#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
++
++#define node_end_pfn(nid) ({\
++	pg_data_t *__pgdat = NODE_DATA(nid);\
++	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;\
++})
++
+ #include <linux/memory_hotplug.h>
+ 
+ extern struct mutex zonelists_mutex;
+diff -NurpP --minimal linux-3.3.8/include/linux/mount.h linux-3.3.8-vs2.3.3.4/include/linux/mount.h
+--- linux-3.3.8/include/linux/mount.h	2012-03-19 19:47:28.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/mount.h	2012-02-24 17:29:28.000000000 +0100
+@@ -47,6 +47,9 @@ struct mnt_namespace;
+ 
+ #define MNT_INTERNAL	0x4000
+ 
++#define MNT_TAGID	0x10000
++#define MNT_NOTAG	0x20000
++
+ struct vfsmount {
+ 	struct dentry *mnt_root;	/* root of the mounted tree */
+ 	struct super_block *mnt_sb;	/* pointer to superblock */
+diff -NurpP --minimal linux-3.3.8/include/linux/net.h linux-3.3.8-vs2.3.3.4/include/linux/net.h
+--- linux-3.3.8/include/linux/net.h	2011-07-22 11:18:11.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/include/linux/net.h	2012-02-24 03:55:06.000000000 +0100
+@@ -72,6 +72,7 @@ struct net;
+ #define SOCK_NOSPACE		2
+ #define SOCK_PASSCRED		3
+ #define SOCK_PASSSEC		4
++#define SOCK_USER_SOCKET	5
+ 
+ #ifndef ARCH_HAS_SOCKET_TYPES
+ /**
+diff -NurpP --minimal linux-3.3.8/include/linux/netdevice.h linux-3.3.8-vs2.3.3.4/include/linux/netdevice.h
+--- linux-3.3.8/include/linux/netdevice.h	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/include/linux/netdevice.h	2012-05-15 07:09:24.000000000 +0200
+@@ -1627,6 +1627,7 @@ extern void		netdev_resync_ops(struct ne
+ 
+ extern struct net_device	*dev_get_by_index(struct net *net, int ifindex);
+ extern struct net_device	*__dev_get_by_index(struct net *net, int ifindex);
++extern struct net_device	*dev_get_by_index_real_rcu(struct net *net, int ifindex);
+ extern struct net_device	*dev_get_by_index_rcu(struct net *net, int ifindex);
+ extern int		dev_restart(struct net_device *dev);
+ #ifdef CONFIG_NETPOLL_TRAP
+diff -NurpP --minimal linux-3.3.8/include/linux/nfs_mount.h linux-3.3.8-vs2.3.3.4/include/linux/nfs_mount.h
+--- linux-3.3.8/include/linux/nfs_mount.h	2011-01-05 21:50:31.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/nfs_mount.h	2012-02-24 03:55:06.000000000 +0100
+@@ -63,7 +63,8 @@ struct nfs_mount_data {
+ #define NFS_MOUNT_SECFLAVOUR	0x2000	/* 5 */
+ #define NFS_MOUNT_NORDIRPLUS	0x4000	/* 5 */
+ #define NFS_MOUNT_UNSHARED	0x8000	/* 5 */
+-#define NFS_MOUNT_FLAGMASK	0xFFFF
++#define NFS_MOUNT_TAGGED	0x10000	/* context tagging */
++#define NFS_MOUNT_FLAGMASK	0x1FFFF
+ 
+ /* The following are for internal use only */
+ #define NFS_MOUNT_LOOKUP_CACHE_NONEG	0x10000
+diff -NurpP --minimal linux-3.3.8/include/linux/nsproxy.h linux-3.3.8-vs2.3.3.4/include/linux/nsproxy.h
+--- linux-3.3.8/include/linux/nsproxy.h	2011-10-24 18:45:32.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/include/linux/nsproxy.h	2012-02-24 03:55:06.000000000 +0100
+@@ -3,6 +3,7 @@
+ 
+ #include <linux/spinlock.h>
+ #include <linux/sched.h>
++#include <linux/vserver/debug.h>
+ 
+ struct mnt_namespace;
+ struct uts_namespace;
+@@ -63,6 +64,7 @@ static inline struct nsproxy *task_nspro
+ }
+ 
+ int copy_namespaces(unsigned long flags, struct task_struct *tsk);
++struct nsproxy *copy_nsproxy(struct nsproxy *orig);
+ void exit_task_namespaces(struct task_struct *tsk);
+ void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
+ void free_nsproxy(struct nsproxy *ns);
+@@ -70,16 +72,26 @@ int unshare_nsproxy_namespaces(unsigned 
+ 	struct fs_struct *);
+ int __init nsproxy_cache_init(void);
+ 
+-static inline void put_nsproxy(struct nsproxy *ns)
++#define	get_nsproxy(n)	__get_nsproxy(n, __FILE__, __LINE__)
++
++static inline void __get_nsproxy(struct nsproxy *ns,
++	const char *_file, int _line)
+ {
+-	if (atomic_dec_and_test(&ns->count)) {
+-		free_nsproxy(ns);
+-	}
++	vxlprintk(VXD_CBIT(space, 0), "get_nsproxy(%p[%u])",
++		ns, atomic_read(&ns->count), _file, _line);
++	atomic_inc(&ns->count);
+ }
+ 
+-static inline void get_nsproxy(struct nsproxy *ns)
++#define	put_nsproxy(n)	__put_nsproxy(n, __FILE__, __LINE__)
++
++static inline void __put_nsproxy(struct nsproxy *ns,
++	const char *_file, int _line)
+ {
+-	atomic_inc(&ns->count);
++	vxlprintk(VXD_CBIT(space, 0), "put_nsproxy(%p[%u])",
++		ns, atomic_read(&ns->count), _file, _line);
++	if (atomic_dec_and_test(&ns->count)) {
++		free_nsproxy(ns);
++	}
+ }
+ 
+ #endif
+diff -NurpP --minimal linux-3.3.8/include/linux/pid.h linux-3.3.8-vs2.3.3.4/include/linux/pid.h
+--- linux-3.3.8/include/linux/pid.h	2011-07-22 11:18:11.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/include/linux/pid.h	2012-02-24 03:55:06.000000000 +0100
+@@ -8,7 +8,8 @@ enum pid_type
+ 	PIDTYPE_PID,
+ 	PIDTYPE_PGID,
+ 	PIDTYPE_SID,
+-	PIDTYPE_MAX
++	PIDTYPE_MAX,
++	PIDTYPE_REALPID
+ };
+ 
+ /*
+@@ -171,6 +172,7 @@ static inline pid_t pid_nr(struct pid *p
+ }
+ 
+ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
++pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns);
+ pid_t pid_vnr(struct pid *pid);
+ 
+ #define do_each_pid_task(pid, type, task)				\
+diff -NurpP --minimal linux-3.3.8/include/linux/proc_fs.h linux-3.3.8-vs2.3.3.4/include/linux/proc_fs.h
+--- linux-3.3.8/include/linux/proc_fs.h	2012-03-19 19:47:28.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/proc_fs.h	2012-02-24 04:17:21.000000000 +0100
+@@ -54,6 +54,7 @@ struct proc_dir_entry {
+ 	nlink_t nlink;
+ 	uid_t uid;
+ 	gid_t gid;
++	int vx_flags;
+ 	loff_t size;
+ 	const struct inode_operations *proc_iops;
+ 	/*
+@@ -252,12 +253,18 @@ extern const struct proc_ns_operations n
+ extern const struct proc_ns_operations utsns_operations;
+ extern const struct proc_ns_operations ipcns_operations;
+ 
++struct vx_info;
++struct nx_info;
++
+ union proc_op {
+ 	int (*proc_get_link)(struct dentry *, struct path *);
+ 	int (*proc_read)(struct task_struct *task, char *page);
+ 	int (*proc_show)(struct seq_file *m,
+ 		struct pid_namespace *ns, struct pid *pid,
+ 		struct task_struct *task);
++	int (*proc_vs_read)(char *page);
++	int (*proc_vxi_read)(struct vx_info *vxi, char *page);
++	int (*proc_nxi_read)(struct nx_info *nxi, char *page);
+ };
+ 
+ struct ctl_table_header;
+@@ -265,6 +272,7 @@ struct ctl_table;
+ 
+ struct proc_inode {
+ 	struct pid *pid;
++	int vx_flags;
+ 	int fd;
+ 	union proc_op op;
+ 	struct proc_dir_entry *pde;
+diff -NurpP --minimal linux-3.3.8/include/linux/quotaops.h linux-3.3.8-vs2.3.3.4/include/linux/quotaops.h
+--- linux-3.3.8/include/linux/quotaops.h	2012-01-09 16:14:58.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/quotaops.h	2012-02-24 03:55:06.000000000 +0100
+@@ -8,6 +8,7 @@
+ #define _LINUX_QUOTAOPS_
+ 
+ #include <linux/fs.h>
++#include <linux/vs_dlimit.h>
+ 
+ #define DQUOT_SPACE_WARN	0x1
+ #define DQUOT_SPACE_RESERVE	0x2
+@@ -204,11 +205,12 @@ static inline void dquot_drop(struct ino
+ 
+ static inline int dquot_alloc_inode(const struct inode *inode)
+ {
+-	return 0;
++	return dl_alloc_inode(inode);
+ }
+ 
+ static inline void dquot_free_inode(const struct inode *inode)
+ {
++	dl_free_inode(inode);
+ }
+ 
+ static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
+@@ -219,6 +221,10 @@ static inline int dquot_transfer(struct 
+ static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
+ 		int flags)
+ {
++	int ret = 0;
++
++	if ((ret = dl_alloc_space(inode, number)))
++		return ret;
+ 	if (!(flags & DQUOT_SPACE_RESERVE))
+ 		inode_add_bytes(inode, number);
+ 	return 0;
+@@ -229,6 +235,7 @@ static inline void __dquot_free_space(st
+ {
+ 	if (!(flags & DQUOT_SPACE_RESERVE))
+ 		inode_sub_bytes(inode, number);
++	dl_free_space(inode, number);
+ }
+ 
+ static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
+diff -NurpP --minimal linux-3.3.8/include/linux/reboot.h linux-3.3.8-vs2.3.3.4/include/linux/reboot.h
+--- linux-3.3.8/include/linux/reboot.h	2011-10-24 18:45:32.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/include/linux/reboot.h	2012-02-24 03:55:06.000000000 +0100
+@@ -33,6 +33,7 @@
+ #define	LINUX_REBOOT_CMD_RESTART2	0xA1B2C3D4
+ #define	LINUX_REBOOT_CMD_SW_SUSPEND	0xD000FCE2
+ #define	LINUX_REBOOT_CMD_KEXEC		0x45584543
++#define	LINUX_REBOOT_CMD_OOM		0xDEADBEEF
+ 
+ 
+ #ifdef __KERNEL__
+diff -NurpP --minimal linux-3.3.8/include/linux/reiserfs_fs.h linux-3.3.8-vs2.3.3.4/include/linux/reiserfs_fs.h
+--- linux-3.3.8/include/linux/reiserfs_fs.h	2012-03-19 19:47:28.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/reiserfs_fs.h	2012-02-24 03:55:06.000000000 +0100
+@@ -976,6 +976,11 @@ struct stat_data_v1 {
+ #define REISERFS_COMPR_FL     FS_COMPR_FL
+ #define REISERFS_NOTAIL_FL    FS_NOTAIL_FL
+ 
++/* unfortunately reiserfs sdattr is only 16 bit */
++#define REISERFS_IXUNLINK_FL  (FS_IXUNLINK_FL >> 16)
++#define REISERFS_BARRIER_FL   (FS_BARRIER_FL >> 16)
++#define REISERFS_COW_FL       (FS_COW_FL >> 16)
++
+ /* persistent flags that file inherits from the parent directory */
+ #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL |	\
+ 				REISERFS_SYNC_FL |	\
+@@ -985,6 +990,9 @@ struct stat_data_v1 {
+ 				REISERFS_COMPR_FL |	\
+ 				REISERFS_NOTAIL_FL )
+ 
++#define REISERFS_FL_USER_VISIBLE	0x80FF
++#define REISERFS_FL_USER_MODIFIABLE	0x80FF
++
+ /* Stat Data on disk (reiserfs version of UFS disk inode minus the
+    address blocks) */
+ struct stat_data {
+@@ -2074,6 +2082,7 @@ static inline void reiserfs_update_sd(st
+ void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
+ void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
+ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
++int reiserfs_sync_flags(struct inode *inode, int, int);
+ 
+ int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
+ 
+diff -NurpP --minimal linux-3.3.8/include/linux/reiserfs_fs_sb.h linux-3.3.8-vs2.3.3.4/include/linux/reiserfs_fs_sb.h
+--- linux-3.3.8/include/linux/reiserfs_fs_sb.h	2012-03-19 19:47:28.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/reiserfs_fs_sb.h	2012-02-24 03:55:06.000000000 +0100
+@@ -477,6 +477,7 @@ enum reiserfs_mount_options {
+ 	REISERFS_EXPOSE_PRIVROOT,
+ 	REISERFS_BARRIER_NONE,
+ 	REISERFS_BARRIER_FLUSH,
++	REISERFS_TAGGED,
+ 
+ 	/* Actions on error */
+ 	REISERFS_ERROR_PANIC,
+diff -NurpP --minimal linux-3.3.8/include/linux/sched.h linux-3.3.8-vs2.3.3.4/include/linux/sched.h
+--- linux-3.3.8/include/linux/sched.h	2012-03-19 19:47:28.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/sched.h	2012-03-19 20:52:10.000000000 +0100
+@@ -1420,6 +1420,14 @@ struct task_struct {
+ #endif
+ 	seccomp_t seccomp;
+ 
++/* vserver context data */
++	struct vx_info *vx_info;
++	struct nx_info *nx_info;
++
++	xid_t xid;
++	nid_t nid;
++	tag_t tag;
++
+ /* Thread group tracking */
+    	u32 parent_exec_id;
+    	u32 self_exec_id;
+@@ -1669,6 +1677,11 @@ struct pid_namespace;
+ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
+ 			struct pid_namespace *ns);
+ 
++#include <linux/vserver/base.h>
++#include <linux/vserver/context.h>
++#include <linux/vserver/debug.h>
++#include <linux/vserver/pid.h>
++
+ static inline pid_t task_pid_nr(struct task_struct *tsk)
+ {
+ 	return tsk->pid;
+@@ -1682,7 +1695,8 @@ static inline pid_t task_pid_nr_ns(struc
+ 
+ static inline pid_t task_pid_vnr(struct task_struct *tsk)
+ {
+-	return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
++	// return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
++	return vx_map_pid(__task_pid_nr_ns(tsk, PIDTYPE_PID, NULL));
+ }
+ 
+ 
+@@ -1695,7 +1709,7 @@ pid_t task_tgid_nr_ns(struct task_struct
+ 
+ static inline pid_t task_tgid_vnr(struct task_struct *tsk)
+ {
+-	return pid_vnr(task_tgid(tsk));
++	return vx_map_tgid(pid_vnr(task_tgid(tsk)));
+ }
+ 
+ 
+diff -NurpP --minimal linux-3.3.8/include/linux/shmem_fs.h linux-3.3.8-vs2.3.3.4/include/linux/shmem_fs.h
+--- linux-3.3.8/include/linux/shmem_fs.h	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/shmem_fs.h	2012-02-24 03:55:06.000000000 +0100
+@@ -8,6 +8,9 @@
+ 
+ /* inode in-kernel data */
+ 
++#define TMPFS_SUPER_MAGIC	0x01021994
++
++
+ struct shmem_inode_info {
+ 	spinlock_t		lock;
+ 	unsigned long		flags;
+diff -NurpP --minimal linux-3.3.8/include/linux/stat.h linux-3.3.8-vs2.3.3.4/include/linux/stat.h
+--- linux-3.3.8/include/linux/stat.h	2008-12-25 00:26:37.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/stat.h	2012-02-24 03:55:06.000000000 +0100
+@@ -66,6 +66,7 @@ struct kstat {
+ 	unsigned int	nlink;
+ 	uid_t		uid;
+ 	gid_t		gid;
++	tag_t		tag;
+ 	dev_t		rdev;
+ 	loff_t		size;
+ 	struct timespec  atime;
+diff -NurpP --minimal linux-3.3.8/include/linux/sunrpc/auth.h linux-3.3.8-vs2.3.3.4/include/linux/sunrpc/auth.h
+--- linux-3.3.8/include/linux/sunrpc/auth.h	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/sunrpc/auth.h	2012-02-24 03:55:06.000000000 +0100
+@@ -25,6 +25,7 @@
+ struct auth_cred {
+ 	uid_t	uid;
+ 	gid_t	gid;
++	tag_t	tag;
+ 	struct group_info *group_info;
+ 	const char *principal;
+ 	unsigned char machine_cred : 1;
+diff -NurpP --minimal linux-3.3.8/include/linux/sunrpc/clnt.h linux-3.3.8-vs2.3.3.4/include/linux/sunrpc/clnt.h
+--- linux-3.3.8/include/linux/sunrpc/clnt.h	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/sunrpc/clnt.h	2012-02-24 03:55:06.000000000 +0100
+@@ -50,7 +50,8 @@ struct rpc_clnt {
+ 	unsigned int		cl_softrtry : 1,/* soft timeouts */
+ 				cl_discrtry : 1,/* disconnect before retry */
+ 				cl_autobind : 1,/* use getport() */
+-				cl_chatty   : 1;/* be verbose */
++				cl_chatty   : 1,/* be verbose */
++				cl_tag      : 1;/* context tagging */
+ 
+ 	struct rpc_rtt *	cl_rtt;		/* RTO estimator data */
+ 	const struct rpc_timeout *cl_timeout;	/* Timeout strategy */
+diff -NurpP --minimal linux-3.3.8/include/linux/sysctl.h linux-3.3.8-vs2.3.3.4/include/linux/sysctl.h
+--- linux-3.3.8/include/linux/sysctl.h	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/sysctl.h	2012-02-24 03:55:06.000000000 +0100
+@@ -60,6 +60,7 @@ enum
+ 	CTL_ABI=9,		/* Binary emulation */
+ 	CTL_CPU=10,		/* CPU stuff (speed scaling, etc) */
+ 	CTL_ARLAN=254,		/* arlan wireless driver */
++	CTL_VSERVER=4242,	/* Linux-VServer debug */
+ 	CTL_S390DBF=5677,	/* s390 debug */
+ 	CTL_SUNRPC=7249,	/* sunrpc debug */
+ 	CTL_PM=9899,		/* frv power management */
+@@ -94,6 +95,7 @@ enum
+ 
+ 	KERN_PANIC=15,		/* int: panic timeout */
+ 	KERN_REALROOTDEV=16,	/* real root device to mount after initrd */
++	KERN_VSHELPER=17,	/* string: path to vshelper policy agent */
+ 
+ 	KERN_SPARC_REBOOT=21,	/* reboot command on Sparc */
+ 	KERN_CTLALTDEL=22,	/* int: allow ctl-alt-del to reboot */
+diff -NurpP --minimal linux-3.3.8/include/linux/sysfs.h linux-3.3.8-vs2.3.3.4/include/linux/sysfs.h
+--- linux-3.3.8/include/linux/sysfs.h	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/sysfs.h	2012-02-24 03:55:06.000000000 +0100
+@@ -19,6 +19,8 @@
+ #include <linux/kobject_ns.h>
+ #include <linux/atomic.h>
+ 
++#define SYSFS_SUPER_MAGIC	0x62656572
++
+ struct kobject;
+ struct module;
+ enum kobj_ns_type;
+diff -NurpP --minimal linux-3.3.8/include/linux/time.h linux-3.3.8-vs2.3.3.4/include/linux/time.h
+--- linux-3.3.8/include/linux/time.h	2011-07-22 11:18:11.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/include/linux/time.h	2012-02-24 03:55:06.000000000 +0100
+@@ -256,6 +256,9 @@ static __always_inline void timespec_add
+ 	a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns);
+ 	a->tv_nsec = ns;
+ }
++
++#include <linux/vs_time.h>
++
+ #endif /* __KERNEL__ */
+ 
+ #define NFDBITS			__NFDBITS
+diff -NurpP --minimal linux-3.3.8/include/linux/types.h linux-3.3.8-vs2.3.3.4/include/linux/types.h
+--- linux-3.3.8/include/linux/types.h	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/types.h	2012-02-24 03:55:06.000000000 +0100
+@@ -41,6 +41,9 @@ typedef __kernel_uid32_t	uid_t;
+ typedef __kernel_gid32_t	gid_t;
+ typedef __kernel_uid16_t        uid16_t;
+ typedef __kernel_gid16_t        gid16_t;
++typedef unsigned int		xid_t;
++typedef unsigned int		nid_t;
++typedef unsigned int		tag_t;
+ 
+ typedef unsigned long		uintptr_t;
+ 
+diff -NurpP --minimal linux-3.3.8/include/linux/utsname.h linux-3.3.8-vs2.3.3.4/include/linux/utsname.h
+--- linux-3.3.8/include/linux/utsname.h	2012-01-09 16:14:59.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/utsname.h	2012-02-24 03:55:06.000000000 +0100
+@@ -62,7 +62,8 @@ static inline void get_uts_ns(struct uts
+ }
+ 
+ extern struct uts_namespace *copy_utsname(unsigned long flags,
+-					  struct task_struct *tsk);
++					  struct uts_namespace *old_ns,
++					  struct user_namespace *user_ns);
+ extern void free_uts_ns(struct kref *kref);
+ 
+ static inline void put_uts_ns(struct uts_namespace *ns)
+@@ -79,12 +80,13 @@ static inline void put_uts_ns(struct uts
+ }
+ 
+ static inline struct uts_namespace *copy_utsname(unsigned long flags,
+-						 struct task_struct *tsk)
++						 struct uts_namespace *old_ns,
++						 struct user_namespace *user_ns)
+ {
+ 	if (flags & CLONE_NEWUTS)
+ 		return ERR_PTR(-EINVAL);
+ 
+-	return tsk->nsproxy->uts_ns;
++	return old_ns;
+ }
+ #endif
+ 
+diff -NurpP --minimal linux-3.3.8/include/linux/vroot.h linux-3.3.8-vs2.3.3.4/include/linux/vroot.h
+--- linux-3.3.8/include/linux/vroot.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vroot.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,51 @@
++
++/*
++ * include/linux/vroot.h
++ *
++ * written by Herbert P�tzl, 9/11/2002
++ * ported to 2.6 by Herbert P�tzl, 30/12/2004
++ *
++ * Copyright (C) 2002-2007 by Herbert P�tzl.
++ * Redistribution of this file is permitted under the
++ * GNU General Public License.
++ */
++
++#ifndef _LINUX_VROOT_H
++#define _LINUX_VROOT_H
++
++
++#ifdef __KERNEL__
++
++/* Possible states of device */
++enum {
++	Vr_unbound,
++	Vr_bound,
++};
++
++struct vroot_device {
++	int		vr_number;
++	int		vr_refcnt;
++
++	struct semaphore	vr_ctl_mutex;
++	struct block_device    *vr_device;
++	int			vr_state;
++};
++
++
++typedef struct block_device *(vroot_grb_func)(struct block_device *);
++
++extern int register_vroot_grb(vroot_grb_func *);
++extern int unregister_vroot_grb(vroot_grb_func *);
++
++#endif /* __KERNEL__ */
++
++#define MAX_VROOT_DEFAULT	8
++
++/*
++ * IOCTL commands --- we will commandeer 0x56 ('V')
++ */
++
++#define VROOT_SET_DEV		0x5600
++#define VROOT_CLR_DEV		0x5601
++
++#endif /* _LINUX_VROOT_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_base.h linux-3.3.8-vs2.3.3.4/include/linux/vs_base.h
+--- linux-3.3.8/include/linux/vs_base.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_base.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,10 @@
++#ifndef _VS_BASE_H
++#define _VS_BASE_H
++
++#include "vserver/base.h"
++#include "vserver/check.h"
++#include "vserver/debug.h"
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_context.h linux-3.3.8-vs2.3.3.4/include/linux/vs_context.h
+--- linux-3.3.8/include/linux/vs_context.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_context.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,242 @@
++#ifndef _VS_CONTEXT_H
++#define _VS_CONTEXT_H
++
++#include "vserver/base.h"
++#include "vserver/check.h"
++#include "vserver/context.h"
++#include "vserver/history.h"
++#include "vserver/debug.h"
++
++#include <linux/sched.h>
++
++
++#define get_vx_info(i) __get_vx_info(i, __FILE__, __LINE__, __HERE__)
++
++static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
++	const char *_file, int _line, void *_here)
++{
++	if (!vxi)
++		return NULL;
++
++	vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
++		vxi, vxi ? vxi->vx_id : 0,
++		vxi ? atomic_read(&vxi->vx_usecnt) : 0,
++		_file, _line);
++	__vxh_get_vx_info(vxi, _here);
++
++	atomic_inc(&vxi->vx_usecnt);
++	return vxi;
++}
++
++
++extern void free_vx_info(struct vx_info *);
++
++#define put_vx_info(i) __put_vx_info(i, __FILE__, __LINE__, __HERE__)
++
++static inline void __put_vx_info(struct vx_info *vxi,
++	const char *_file, int _line, void *_here)
++{
++	if (!vxi)
++		return;
++
++	vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
++		vxi, vxi ? vxi->vx_id : 0,
++		vxi ? atomic_read(&vxi->vx_usecnt) : 0,
++		_file, _line);
++	__vxh_put_vx_info(vxi, _here);
++
++	if (atomic_dec_and_test(&vxi->vx_usecnt))
++		free_vx_info(vxi);
++}
++
++
++#define init_vx_info(p, i) \
++	__init_vx_info(p, i, __FILE__, __LINE__, __HERE__)
++
++static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi,
++	const char *_file, int _line, void *_here)
++{
++	if (vxi) {
++		vxlprintk(VXD_CBIT(xid, 3),
++			"init_vx_info(%p[#%d.%d])",
++			vxi, vxi ? vxi->vx_id : 0,
++			vxi ? atomic_read(&vxi->vx_usecnt) : 0,
++			_file, _line);
++		__vxh_init_vx_info(vxi, vxp, _here);
++
++		atomic_inc(&vxi->vx_usecnt);
++	}
++	*vxp = vxi;
++}
++
++
++#define set_vx_info(p, i) \
++	__set_vx_info(p, i, __FILE__, __LINE__, __HERE__)
++
++static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
++	const char *_file, int _line, void *_here)
++{
++	struct vx_info *vxo;
++
++	if (!vxi)
++		return;
++
++	vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])",
++		vxi, vxi ? vxi->vx_id : 0,
++		vxi ? atomic_read(&vxi->vx_usecnt) : 0,
++		_file, _line);
++	__vxh_set_vx_info(vxi, vxp, _here);
++
++	atomic_inc(&vxi->vx_usecnt);
++	vxo = xchg(vxp, vxi);
++	BUG_ON(vxo);
++}
++
++
++#define clr_vx_info(p) __clr_vx_info(p, __FILE__, __LINE__, __HERE__)
++
++static inline void __clr_vx_info(struct vx_info **vxp,
++	const char *_file, int _line, void *_here)
++{
++	struct vx_info *vxo;
++
++	vxo = xchg(vxp, NULL);
++	if (!vxo)
++		return;
++
++	vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])",
++		vxo, vxo ? vxo->vx_id : 0,
++		vxo ? atomic_read(&vxo->vx_usecnt) : 0,
++		_file, _line);
++	__vxh_clr_vx_info(vxo, vxp, _here);
++
++	if (atomic_dec_and_test(&vxo->vx_usecnt))
++		free_vx_info(vxo);
++}
++
++
++#define claim_vx_info(v, p) \
++	__claim_vx_info(v, p, __FILE__, __LINE__, __HERE__)
++
++static inline void __claim_vx_info(struct vx_info *vxi,
++	struct task_struct *task,
++	const char *_file, int _line, void *_here)
++{
++	vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p",
++		vxi, vxi ? vxi->vx_id : 0,
++		vxi ? atomic_read(&vxi->vx_usecnt) : 0,
++		vxi ? atomic_read(&vxi->vx_tasks) : 0,
++		task, _file, _line);
++	__vxh_claim_vx_info(vxi, task, _here);
++
++	atomic_inc(&vxi->vx_tasks);
++}
++
++
++extern void unhash_vx_info(struct vx_info *);
++
++#define release_vx_info(v, p) \
++	__release_vx_info(v, p, __FILE__, __LINE__, __HERE__)
++
++static inline void __release_vx_info(struct vx_info *vxi,
++	struct task_struct *task,
++	const char *_file, int _line, void *_here)
++{
++	vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p",
++		vxi, vxi ? vxi->vx_id : 0,
++		vxi ? atomic_read(&vxi->vx_usecnt) : 0,
++		vxi ? atomic_read(&vxi->vx_tasks) : 0,
++		task, _file, _line);
++	__vxh_release_vx_info(vxi, task, _here);
++
++	might_sleep();
++
++	if (atomic_dec_and_test(&vxi->vx_tasks))
++		unhash_vx_info(vxi);
++}
++
++
++#define task_get_vx_info(p) \
++	__task_get_vx_info(p, __FILE__, __LINE__, __HERE__)
++
++static inline struct vx_info *__task_get_vx_info(struct task_struct *p,
++	const char *_file, int _line, void *_here)
++{
++	struct vx_info *vxi;
++
++	task_lock(p);
++	vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)",
++		p, _file, _line);
++	vxi = __get_vx_info(p->vx_info, _file, _line, _here);
++	task_unlock(p);
++	return vxi;
++}
++
++
++static inline void __wakeup_vx_info(struct vx_info *vxi)
++{
++	if (waitqueue_active(&vxi->vx_wait))
++		wake_up_interruptible(&vxi->vx_wait);
++}
++
++
++#define enter_vx_info(v, s) __enter_vx_info(v, s, __FILE__, __LINE__)
++
++static inline void __enter_vx_info(struct vx_info *vxi,
++	struct vx_info_save *vxis, const char *_file, int _line)
++{
++	vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]",
++		vxi, vxi ? vxi->vx_id : 0, vxis, current,
++		current->xid, current->vx_info, _file, _line);
++	vxis->vxi = xchg(&current->vx_info, vxi);
++	vxis->xid = current->xid;
++	current->xid = vxi ? vxi->vx_id : 0;
++}
++
++#define leave_vx_info(s) __leave_vx_info(s, __FILE__, __LINE__)
++
++static inline void __leave_vx_info(struct vx_info_save *vxis,
++	const char *_file, int _line)
++{
++	vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]",
++		vxis, vxis->xid, vxis->vxi, current,
++		current->xid, current->vx_info, _file, _line);
++	(void)xchg(&current->vx_info, vxis->vxi);
++	current->xid = vxis->xid;
++}
++
++
++static inline void __enter_vx_admin(struct vx_info_save *vxis)
++{
++	vxis->vxi = xchg(&current->vx_info, NULL);
++	vxis->xid = xchg(&current->xid, (xid_t)0);
++}
++
++static inline void __leave_vx_admin(struct vx_info_save *vxis)
++{
++	(void)xchg(&current->xid, vxis->xid);
++	(void)xchg(&current->vx_info, vxis->vxi);
++}
++
++#define task_is_init(p) \
++	__task_is_init(p, __FILE__, __LINE__, __HERE__)
++
++static inline int __task_is_init(struct task_struct *p,
++	const char *_file, int _line, void *_here)
++{
++	int is_init = is_global_init(p);
++
++	task_lock(p);
++	if (p->vx_info)
++		is_init = p->vx_info->vx_initpid == p->pid;
++	task_unlock(p);
++	return is_init;
++}
++
++extern void exit_vx_info(struct task_struct *, int);
++extern void exit_vx_info_early(struct task_struct *, int);
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_cowbl.h linux-3.3.8-vs2.3.3.4/include/linux/vs_cowbl.h
+--- linux-3.3.8/include/linux/vs_cowbl.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_cowbl.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,48 @@
++#ifndef _VS_COWBL_H
++#define _VS_COWBL_H
++
++#include <linux/fs.h>
++#include <linux/dcache.h>
++#include <linux/namei.h>
++#include <linux/slab.h>
++
++extern struct dentry *cow_break_link(const char *pathname);
++
++static inline int cow_check_and_break(struct path *path)
++{
++	struct inode *inode = path->dentry->d_inode;
++	int error = 0;
++
++	/* do we need this check? */
++	if (IS_RDONLY(inode))
++		return -EROFS;
++
++	if (IS_COW(inode)) {
++		if (IS_COW_LINK(inode)) {
++			struct dentry *new_dentry, *old_dentry = path->dentry;
++			char *pp, *buf;
++
++			buf = kmalloc(PATH_MAX, GFP_KERNEL);
++			if (!buf) {
++				return -ENOMEM;
++			}
++			pp = d_path(path, buf, PATH_MAX);
++			new_dentry = cow_break_link(pp);
++			kfree(buf);
++			if (!IS_ERR(new_dentry)) {
++				path->dentry = new_dentry;
++				dput(old_dentry);
++			} else
++				error = PTR_ERR(new_dentry);
++		} else {
++			inode->i_flags &= ~(S_IXUNLINK | S_IMMUTABLE);
++			inode->i_ctime = CURRENT_TIME;
++			mark_inode_dirty(inode);
++		}
++	}
++	return error;
++}
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_cvirt.h linux-3.3.8-vs2.3.3.4/include/linux/vs_cvirt.h
+--- linux-3.3.8/include/linux/vs_cvirt.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_cvirt.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,50 @@
++#ifndef _VS_CVIRT_H
++#define _VS_CVIRT_H
++
++#include "vserver/cvirt.h"
++#include "vserver/context.h"
++#include "vserver/base.h"
++#include "vserver/check.h"
++#include "vserver/debug.h"
++
++
++static inline void vx_activate_task(struct task_struct *p)
++{
++	struct vx_info *vxi;
++
++	if ((vxi = p->vx_info)) {
++		vx_update_load(vxi);
++		atomic_inc(&vxi->cvirt.nr_running);
++	}
++}
++
++static inline void vx_deactivate_task(struct task_struct *p)
++{
++	struct vx_info *vxi;
++
++	if ((vxi = p->vx_info)) {
++		vx_update_load(vxi);
++		atomic_dec(&vxi->cvirt.nr_running);
++	}
++}
++
++static inline void vx_uninterruptible_inc(struct task_struct *p)
++{
++	struct vx_info *vxi;
++
++	if ((vxi = p->vx_info))
++		atomic_inc(&vxi->cvirt.nr_uninterruptible);
++}
++
++static inline void vx_uninterruptible_dec(struct task_struct *p)
++{
++	struct vx_info *vxi;
++
++	if ((vxi = p->vx_info))
++		atomic_dec(&vxi->cvirt.nr_uninterruptible);
++}
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_device.h linux-3.3.8-vs2.3.3.4/include/linux/vs_device.h
+--- linux-3.3.8/include/linux/vs_device.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_device.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,45 @@
++#ifndef _VS_DEVICE_H
++#define _VS_DEVICE_H
++
++#include "vserver/base.h"
++#include "vserver/device.h"
++#include "vserver/debug.h"
++
++
++#ifdef CONFIG_VSERVER_DEVICE
++
++int vs_map_device(struct vx_info *, dev_t, dev_t *, umode_t);
++
++#define vs_device_perm(v, d, m, p) \
++	((vs_map_device(current_vx_info(), d, NULL, m) & (p)) == (p))
++
++#else
++
++static inline
++int vs_map_device(struct vx_info *vxi,
++	dev_t device, dev_t *target, umode_t mode)
++{
++	if (target)
++		*target = device;
++	return ~0;
++}
++
++#define vs_device_perm(v, d, m, p) ((p) == (p))
++
++#endif
++
++
++#define vs_map_chrdev(d, t, p) \
++	((vs_map_device(current_vx_info(), d, t, S_IFCHR) & (p)) == (p))
++#define vs_map_blkdev(d, t, p) \
++	((vs_map_device(current_vx_info(), d, t, S_IFBLK) & (p)) == (p))
++
++#define vs_chrdev_perm(d, p) \
++	vs_device_perm(current_vx_info(), d, S_IFCHR, p)
++#define vs_blkdev_perm(d, p) \
++	vs_device_perm(current_vx_info(), d, S_IFBLK, p)
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_dlimit.h linux-3.3.8-vs2.3.3.4/include/linux/vs_dlimit.h
+--- linux-3.3.8/include/linux/vs_dlimit.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_dlimit.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,215 @@
++#ifndef _VS_DLIMIT_H
++#define _VS_DLIMIT_H
++
++#include <linux/fs.h>
++
++#include "vserver/dlimit.h"
++#include "vserver/base.h"
++#include "vserver/debug.h"
++
++
++#define get_dl_info(i)	__get_dl_info(i, __FILE__, __LINE__)
++
++static inline struct dl_info *__get_dl_info(struct dl_info *dli,
++	const char *_file, int _line)
++{
++	if (!dli)
++		return NULL;
++	vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])",
++		dli, dli ? dli->dl_tag : 0,
++		dli ? atomic_read(&dli->dl_usecnt) : 0,
++		_file, _line);
++	atomic_inc(&dli->dl_usecnt);
++	return dli;
++}
++
++
++#define free_dl_info(i) \
++	call_rcu(&(i)->dl_rcu, rcu_free_dl_info)
++
++#define put_dl_info(i)	__put_dl_info(i, __FILE__, __LINE__)
++
++static inline void __put_dl_info(struct dl_info *dli,
++	const char *_file, int _line)
++{
++	if (!dli)
++		return;
++	vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])",
++		dli, dli ? dli->dl_tag : 0,
++		dli ? atomic_read(&dli->dl_usecnt) : 0,
++		_file, _line);
++	if (atomic_dec_and_test(&dli->dl_usecnt))
++		free_dl_info(dli);
++}
++
++
++#define __dlimit_char(d)	((d) ? '*' : ' ')
++
++static inline int __dl_alloc_space(struct super_block *sb,
++	tag_t tag, dlsize_t nr, const char *file, int line)
++{
++	struct dl_info *dli = NULL;
++	int ret = 0;
++
++	if (nr == 0)
++		goto out;
++	dli = locate_dl_info(sb, tag);
++	if (!dli)
++		goto out;
++
++	spin_lock(&dli->dl_lock);
++	ret = (dli->dl_space_used + nr > dli->dl_space_total);
++	if (!ret)
++		dli->dl_space_used += nr;
++	spin_unlock(&dli->dl_lock);
++	put_dl_info(dli);
++out:
++	vxlprintk(VXD_CBIT(dlim, 1),
++		"ALLOC (%p,#%d)%c %lld bytes (%d)",
++		sb, tag, __dlimit_char(dli), (long long)nr,
++		ret, file, line);
++	return ret ? -ENOSPC : 0;
++}
++
++static inline void __dl_free_space(struct super_block *sb,
++	tag_t tag, dlsize_t nr, const char *_file, int _line)
++{
++	struct dl_info *dli = NULL;
++
++	if (nr == 0)
++		goto out;
++	dli = locate_dl_info(sb, tag);
++	if (!dli)
++		goto out;
++
++	spin_lock(&dli->dl_lock);
++	if (dli->dl_space_used > nr)
++		dli->dl_space_used -= nr;
++	else
++		dli->dl_space_used = 0;
++	spin_unlock(&dli->dl_lock);
++	put_dl_info(dli);
++out:
++	vxlprintk(VXD_CBIT(dlim, 1),
++		"FREE  (%p,#%d)%c %lld bytes",
++		sb, tag, __dlimit_char(dli), (long long)nr,
++		_file, _line);
++}
++
++static inline int __dl_alloc_inode(struct super_block *sb,
++	tag_t tag, const char *_file, int _line)
++{
++	struct dl_info *dli;
++	int ret = 0;
++
++	dli = locate_dl_info(sb, tag);
++	if (!dli)
++		goto out;
++
++	spin_lock(&dli->dl_lock);
++	dli->dl_inodes_used++;
++	ret = (dli->dl_inodes_used > dli->dl_inodes_total);
++	spin_unlock(&dli->dl_lock);
++	put_dl_info(dli);
++out:
++	vxlprintk(VXD_CBIT(dlim, 0),
++		"ALLOC (%p,#%d)%c inode (%d)",
++		sb, tag, __dlimit_char(dli), ret, _file, _line);
++	return ret ? -ENOSPC : 0;
++}
++
++static inline void __dl_free_inode(struct super_block *sb,
++	tag_t tag, const char *_file, int _line)
++{
++	struct dl_info *dli;
++
++	dli = locate_dl_info(sb, tag);
++	if (!dli)
++		goto out;
++
++	spin_lock(&dli->dl_lock);
++	if (dli->dl_inodes_used > 1)
++		dli->dl_inodes_used--;
++	else
++		dli->dl_inodes_used = 0;
++	spin_unlock(&dli->dl_lock);
++	put_dl_info(dli);
++out:
++	vxlprintk(VXD_CBIT(dlim, 0),
++		"FREE  (%p,#%d)%c inode",
++		sb, tag, __dlimit_char(dli), _file, _line);
++}
++
++static inline void __dl_adjust_block(struct super_block *sb, tag_t tag,
++	unsigned long long *free_blocks, unsigned long long *root_blocks,
++	const char *_file, int _line)
++{
++	struct dl_info *dli;
++	uint64_t broot, bfree;
++
++	dli = locate_dl_info(sb, tag);
++	if (!dli)
++		return;
++
++	spin_lock(&dli->dl_lock);
++	broot = (dli->dl_space_total -
++		(dli->dl_space_total >> 10) * dli->dl_nrlmult)
++		>> sb->s_blocksize_bits;
++	bfree = (dli->dl_space_total - dli->dl_space_used)
++			>> sb->s_blocksize_bits;
++	spin_unlock(&dli->dl_lock);
++
++	vxlprintk(VXD_CBIT(dlim, 2),
++		"ADJUST: %lld,%lld on %lld,%lld [mult=%d]",
++		(long long)bfree, (long long)broot,
++		*free_blocks, *root_blocks, dli->dl_nrlmult,
++		_file, _line);
++	if (free_blocks) {
++		if (*free_blocks > bfree)
++			*free_blocks = bfree;
++	}
++	if (root_blocks) {
++		if (*root_blocks > broot)
++			*root_blocks = broot;
++	}
++	put_dl_info(dli);
++}
++
++#define dl_prealloc_space(in, bytes) \
++	__dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
++		__FILE__, __LINE__ )
++
++#define dl_alloc_space(in, bytes) \
++	__dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
++		__FILE__, __LINE__ )
++
++#define dl_reserve_space(in, bytes) \
++	__dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
++		__FILE__, __LINE__ )
++
++#define dl_claim_space(in, bytes) (0)
++
++#define dl_release_space(in, bytes) \
++	__dl_free_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
++		__FILE__, __LINE__ )
++
++#define dl_free_space(in, bytes) \
++	__dl_free_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
++		__FILE__, __LINE__ )
++
++
++
++#define dl_alloc_inode(in) \
++	__dl_alloc_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ )
++
++#define dl_free_inode(in) \
++	__dl_free_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ )
++
++
++#define dl_adjust_block(sb, tag, fb, rb) \
++	__dl_adjust_block(sb, tag, fb, rb, __FILE__, __LINE__ )
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_inet.h linux-3.3.8-vs2.3.3.4/include/linux/vs_inet.h
+--- linux-3.3.8/include/linux/vs_inet.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_inet.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,353 @@
++#ifndef _VS_INET_H
++#define _VS_INET_H
++
++#include "vserver/base.h"
++#include "vserver/network.h"
++#include "vserver/debug.h"
++
++#define IPI_LOOPBACK	htonl(INADDR_LOOPBACK)
++
++#define NXAV4(a)	NIPQUAD((a)->ip[0]), NIPQUAD((a)->ip[1]), \
++			NIPQUAD((a)->mask), (a)->type
++#define NXAV4_FMT	"[" NIPQUAD_FMT "-" NIPQUAD_FMT "/" NIPQUAD_FMT ":%04x]"
++
++#define NIPQUAD(addr) \
++	((unsigned char *)&addr)[0], \
++	((unsigned char *)&addr)[1], \
++	((unsigned char *)&addr)[2], \
++	((unsigned char *)&addr)[3]
++
++#define NIPQUAD_FMT "%u.%u.%u.%u"
++
++
++static inline
++int v4_addr_match(struct nx_addr_v4 *nxa, __be32 addr, uint16_t tmask)
++{
++	__be32 ip = nxa->ip[0].s_addr;
++	__be32 mask = nxa->mask.s_addr;
++	__be32 bcast = ip | ~mask;
++	int ret = 0;
++
++	switch (nxa->type & tmask) {
++	case NXA_TYPE_MASK:
++		ret = (ip == (addr & mask));
++		break;
++	case NXA_TYPE_ADDR:
++		ret = 3;
++		if (addr == ip)
++			break;
++		/* fall through to broadcast */
++	case NXA_MOD_BCAST:
++		ret = ((tmask & NXA_MOD_BCAST) && (addr == bcast));
++		break;
++	case NXA_TYPE_RANGE:
++		ret = ((nxa->ip[0].s_addr <= addr) &&
++			(nxa->ip[1].s_addr > addr));
++		break;
++	case NXA_TYPE_ANY:
++		ret = 2;
++		break;
++	}
++
++	vxdprintk(VXD_CBIT(net, 0),
++		"v4_addr_match(%p" NXAV4_FMT "," NIPQUAD_FMT ",%04x) = %d",
++		nxa, NXAV4(nxa), NIPQUAD(addr), tmask, ret);
++	return ret;
++}
++
++static inline
++int v4_addr_in_nx_info(struct nx_info *nxi, __be32 addr, uint16_t tmask)
++{
++	struct nx_addr_v4 *nxa;
++	int ret = 1;
++
++	if (!nxi)
++		goto out;
++
++	ret = 2;
++	/* allow 127.0.0.1 when remapping lback */
++	if ((tmask & NXA_LOOPBACK) &&
++		(addr == IPI_LOOPBACK) &&
++		nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
++		goto out;
++	ret = 3;
++	/* check for lback address */
++	if ((tmask & NXA_MOD_LBACK) &&
++		(nxi->v4_lback.s_addr == addr))
++		goto out;
++	ret = 4;
++	/* check for broadcast address */
++	if ((tmask & NXA_MOD_BCAST) &&
++		(nxi->v4_bcast.s_addr == addr))
++		goto out;
++	ret = 5;
++	/* check for v4 addresses */
++	for (nxa = &nxi->v4; nxa; nxa = nxa->next)
++		if (v4_addr_match(nxa, addr, tmask))
++			goto out;
++	ret = 0;
++out:
++	vxdprintk(VXD_CBIT(net, 0),
++		"v4_addr_in_nx_info(%p[#%u]," NIPQUAD_FMT ",%04x) = %d",
++		nxi, nxi ? nxi->nx_id : 0, NIPQUAD(addr), tmask, ret);
++	return ret;
++}
++
++static inline
++int v4_nx_addr_match(struct nx_addr_v4 *nxa, struct nx_addr_v4 *addr, uint16_t mask)
++{
++	/* FIXME: needs full range checks */
++	return v4_addr_match(nxa, addr->ip[0].s_addr, mask);
++}
++
++static inline
++int v4_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v4 *nxa, uint16_t mask)
++{
++	struct nx_addr_v4 *ptr;
++
++	for (ptr = &nxi->v4; ptr; ptr = ptr->next)
++		if (v4_nx_addr_match(ptr, nxa, mask))
++			return 1;
++	return 0;
++}
++
++#include <net/inet_sock.h>
++
++/*
++ *	Check if a given address matches for a socket
++ *
++ *	nxi:		the socket's nx_info if any
++ *	addr:		to be verified address
++ */
++static inline
++int v4_sock_addr_match (
++	struct nx_info *nxi,
++	struct inet_sock *inet,
++	__be32 addr)
++{
++	__be32 saddr = inet->inet_rcv_saddr;
++	__be32 bcast = nxi ? nxi->v4_bcast.s_addr : INADDR_BROADCAST;
++
++	if (addr && (saddr == addr || bcast == addr))
++		return 1;
++	if (!saddr)
++		return v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND);
++	return 0;
++}
++
++
++/* inet related checks and helpers */
++
++
++struct in_ifaddr;
++struct net_device;
++struct sock;
++
++#ifdef CONFIG_INET
++
++#include <linux/netdevice.h>
++#include <linux/inetdevice.h>
++#include <net/inet_sock.h>
++#include <net/inet_timewait_sock.h>
++
++
++int dev_in_nx_info(struct net_device *, struct nx_info *);
++int v4_dev_in_nx_info(struct net_device *, struct nx_info *);
++int nx_v4_addr_conflict(struct nx_info *, struct nx_info *);
++
++
++/*
++ *	check if address is covered by socket
++ *
++ *	sk:	the socket to check against
++ *	addr:	the address in question (must be != 0)
++ */
++
++static inline
++int __v4_addr_match_socket(const struct sock *sk, struct nx_addr_v4 *nxa)
++{
++	struct nx_info *nxi = sk->sk_nx_info;
++	__be32 saddr = sk_rcv_saddr(sk);
++
++	vxdprintk(VXD_CBIT(net, 5),
++		"__v4_addr_in_socket(%p," NXAV4_FMT ") %p:" NIPQUAD_FMT " %p;%lx",
++		sk, NXAV4(nxa), nxi, NIPQUAD(saddr), sk->sk_socket,
++		(sk->sk_socket?sk->sk_socket->flags:0));
++
++	if (saddr) {		/* direct address match */
++		return v4_addr_match(nxa, saddr, -1);
++	} else if (nxi) {	/* match against nx_info */
++		return v4_nx_addr_in_nx_info(nxi, nxa, -1);
++	} else {		/* unrestricted any socket */
++		return 1;
++	}
++}
++
++
++
++static inline
++int nx_dev_visible(struct nx_info *nxi, struct net_device *dev)
++{
++	vxdprintk(VXD_CBIT(net, 1),
++		"nx_dev_visible(%p[#%u],%p " VS_Q("%s") ") %d",
++		nxi, nxi ? nxi->nx_id : 0, dev, dev->name,
++		nxi ? dev_in_nx_info(dev, nxi) : 0);
++
++	if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
++		return 1;
++	if (dev_in_nx_info(dev, nxi))
++		return 1;
++	return 0;
++}
++
++
++static inline
++int v4_ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
++{
++	if (!nxi)
++		return 1;
++	if (!ifa)
++		return 0;
++	return v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW);
++}
++
++static inline
++int nx_v4_ifa_visible(struct nx_info *nxi, struct in_ifaddr *ifa)
++{
++	vxdprintk(VXD_CBIT(net, 1), "nx_v4_ifa_visible(%p[#%u],%p) %d",
++		nxi, nxi ? nxi->nx_id : 0, ifa,
++		nxi ? v4_ifa_in_nx_info(ifa, nxi) : 0);
++
++	if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
++		return 1;
++	if (v4_ifa_in_nx_info(ifa, nxi))
++		return 1;
++	return 0;
++}
++
++
++struct nx_v4_sock_addr {
++	__be32 saddr;	/* Address used for validation */
++	__be32 baddr;	/* Address used for socket bind */
++};
++
++static inline
++int v4_map_sock_addr(struct inet_sock *inet, struct sockaddr_in *addr,
++	struct nx_v4_sock_addr *nsa)
++{
++	struct sock *sk = &inet->sk;
++	struct nx_info *nxi = sk->sk_nx_info;
++	__be32 saddr = addr->sin_addr.s_addr;
++	__be32 baddr = saddr;
++
++	vxdprintk(VXD_CBIT(net, 3),
++		"inet_bind(%p)* %p,%p;%lx " NIPQUAD_FMT,
++		sk, sk->sk_nx_info, sk->sk_socket,
++		(sk->sk_socket ? sk->sk_socket->flags : 0),
++		NIPQUAD(saddr));
++
++	if (nxi) {
++		if (saddr == INADDR_ANY) {
++			if (nx_info_flags(nxi, NXF_SINGLE_IP, 0))
++				baddr = nxi->v4.ip[0].s_addr;
++		} else if (saddr == IPI_LOOPBACK) {
++			if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
++				baddr = nxi->v4_lback.s_addr;
++		} else if (!ipv4_is_multicast(saddr) ||
++			!nx_info_ncaps(nxi, NXC_MULTICAST)) {
++			/* normal address bind */
++			if (!v4_addr_in_nx_info(nxi, saddr, NXA_MASK_BIND))
++				return -EADDRNOTAVAIL;
++		}
++	}
++
++	vxdprintk(VXD_CBIT(net, 3),
++		"inet_bind(%p) " NIPQUAD_FMT ", " NIPQUAD_FMT,
++		sk, NIPQUAD(saddr), NIPQUAD(baddr));
++
++	nsa->saddr = saddr;
++	nsa->baddr = baddr;
++	return 0;
++}
++
++static inline
++void v4_set_sock_addr(struct inet_sock *inet, struct nx_v4_sock_addr *nsa)
++{
++	inet->inet_saddr = nsa->baddr;
++	inet->inet_rcv_saddr = nsa->baddr;
++}
++
++
++/*
++ *      helper to simplify inet_lookup_listener
++ *
++ *      nxi:	the socket's nx_info if any
++ *      addr:	to be verified address
++ *      saddr:	socket address
++ */
++static inline int v4_inet_addr_match (
++	struct nx_info *nxi,
++	__be32 addr,
++	__be32 saddr)
++{
++	if (addr && (saddr == addr))
++		return 1;
++	if (!saddr)
++		return nxi ? v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND) : 1;
++	return 0;
++}
++
++static inline __be32 nx_map_sock_lback(struct nx_info *nxi, __be32 addr)
++{
++	if (nx_info_flags(nxi, NXF_HIDE_LBACK, 0) &&
++		(addr == nxi->v4_lback.s_addr))
++		return IPI_LOOPBACK;
++	return addr;
++}
++
++static inline
++int nx_info_has_v4(struct nx_info *nxi)
++{
++	if (!nxi)
++		return 1;
++	if (NX_IPV4(nxi))
++		return 1;
++	if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
++		return 1;
++	return 0;
++}
++
++#else /* CONFIG_INET */
++
++static inline
++int nx_dev_visible(struct nx_info *n, struct net_device *d)
++{
++	return 1;
++}
++
++static inline
++int nx_v4_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
++{
++	return 1;
++}
++
++static inline
++int v4_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
++{
++	return 1;
++}
++
++static inline
++int nx_info_has_v4(struct nx_info *nxi)
++{
++	return 0;
++}
++
++#endif /* CONFIG_INET */
++
++#define current_nx_info_has_v4() \
++	nx_info_has_v4(current_nx_info())
++
++#else
++// #warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_inet6.h linux-3.3.8-vs2.3.3.4/include/linux/vs_inet6.h
+--- linux-3.3.8/include/linux/vs_inet6.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_inet6.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,246 @@
++#ifndef _VS_INET6_H
++#define _VS_INET6_H
++
++#include "vserver/base.h"
++#include "vserver/network.h"
++#include "vserver/debug.h"
++
++#include <net/ipv6.h>
++
++#define NXAV6(a)	&(a)->ip, &(a)->mask, (a)->prefix, (a)->type
++#define NXAV6_FMT	"[%pI6/%pI6/%d:%04x]"
++
++
++#ifdef	CONFIG_IPV6
++
++static inline
++int v6_addr_match(struct nx_addr_v6 *nxa,
++	const struct in6_addr *addr, uint16_t mask)
++{
++	int ret = 0;
++
++	switch (nxa->type & mask) {
++	case NXA_TYPE_MASK:
++		ret = ipv6_masked_addr_cmp(&nxa->ip, &nxa->mask, addr);
++		break;
++	case NXA_TYPE_ADDR:
++		ret = ipv6_addr_equal(&nxa->ip, addr);
++		break;
++	case NXA_TYPE_ANY:
++		ret = 1;
++		break;
++	}
++	vxdprintk(VXD_CBIT(net, 0),
++		"v6_addr_match(%p" NXAV6_FMT ",%pI6,%04x) = %d",
++		nxa, NXAV6(nxa), addr, mask, ret);
++	return ret;
++}
++
++static inline
++int v6_addr_in_nx_info(struct nx_info *nxi,
++	const struct in6_addr *addr, uint16_t mask)
++{
++	struct nx_addr_v6 *nxa;
++	int ret = 1;
++
++	if (!nxi)
++		goto out;
++	for (nxa = &nxi->v6; nxa; nxa = nxa->next)
++		if (v6_addr_match(nxa, addr, mask))
++			goto out;
++	ret = 0;
++out:
++	vxdprintk(VXD_CBIT(net, 0),
++		"v6_addr_in_nx_info(%p[#%u],%pI6,%04x) = %d",
++		nxi, nxi ? nxi->nx_id : 0, addr, mask, ret);
++	return ret;
++}
++
++static inline
++int v6_nx_addr_match(struct nx_addr_v6 *nxa, struct nx_addr_v6 *addr, uint16_t mask)
++{
++	/* FIXME: needs full range checks */
++	return v6_addr_match(nxa, &addr->ip, mask);
++}
++
++static inline
++int v6_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v6 *nxa, uint16_t mask)
++{
++	struct nx_addr_v6 *ptr;
++
++	for (ptr = &nxi->v6; ptr; ptr = ptr->next)
++		if (v6_nx_addr_match(ptr, nxa, mask))
++			return 1;
++	return 0;
++}
++
++
++/*
++ *	Check if a given address matches for a socket
++ *
++ *	nxi:		the socket's nx_info if any
++ *	addr:		to be verified address
++ */
++static inline
++int v6_sock_addr_match (
++	struct nx_info *nxi,
++	struct inet_sock *inet,
++	struct in6_addr *addr)
++{
++	struct sock *sk = &inet->sk;
++	struct in6_addr *saddr = inet6_rcv_saddr(sk);
++
++	if (!ipv6_addr_any(addr) &&
++		ipv6_addr_equal(saddr, addr))
++		return 1;
++	if (ipv6_addr_any(saddr))
++		return v6_addr_in_nx_info(nxi, addr, -1);
++	return 0;
++}
++
++/*
++ *	check if address is covered by socket
++ *
++ *	sk:	the socket to check against
++ *	addr:	the address in question (must be != 0)
++ */
++
++static inline
++int __v6_addr_match_socket(const struct sock *sk, struct nx_addr_v6 *nxa)
++{
++	struct nx_info *nxi = sk->sk_nx_info;
++	struct in6_addr *saddr = inet6_rcv_saddr(sk);
++
++	vxdprintk(VXD_CBIT(net, 5),
++		"__v6_addr_in_socket(%p," NXAV6_FMT ") %p:%pI6 %p;%lx",
++		sk, NXAV6(nxa), nxi, saddr, sk->sk_socket,
++		(sk->sk_socket?sk->sk_socket->flags:0));
++
++	if (!ipv6_addr_any(saddr)) {	/* direct address match */
++		return v6_addr_match(nxa, saddr, -1);
++	} else if (nxi) {		/* match against nx_info */
++		return v6_nx_addr_in_nx_info(nxi, nxa, -1);
++	} else {			/* unrestricted any socket */
++		return 1;
++	}
++}
++
++
++/* inet related checks and helpers */
++
++
++struct in_ifaddr;
++struct net_device;
++struct sock;
++
++
++#include <linux/netdevice.h>
++#include <linux/inetdevice.h>
++#include <net/inet_timewait_sock.h>
++
++
++int dev_in_nx_info(struct net_device *, struct nx_info *);
++int v6_dev_in_nx_info(struct net_device *, struct nx_info *);
++int nx_v6_addr_conflict(struct nx_info *, struct nx_info *);
++
++
++
++static inline
++int v6_ifa_in_nx_info(struct inet6_ifaddr *ifa, struct nx_info *nxi)
++{
++	if (!nxi)
++		return 1;
++	if (!ifa)
++		return 0;
++	return v6_addr_in_nx_info(nxi, &ifa->addr, -1);
++}
++
++static inline
++int nx_v6_ifa_visible(struct nx_info *nxi, struct inet6_ifaddr *ifa)
++{
++	vxdprintk(VXD_CBIT(net, 1), "nx_v6_ifa_visible(%p[#%u],%p) %d",
++		nxi, nxi ? nxi->nx_id : 0, ifa,
++		nxi ? v6_ifa_in_nx_info(ifa, nxi) : 0);
++
++	if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
++		return 1;
++	if (v6_ifa_in_nx_info(ifa, nxi))
++		return 1;
++	return 0;
++}
++
++
++struct nx_v6_sock_addr {
++	struct in6_addr saddr;	/* Address used for validation */
++	struct in6_addr baddr;	/* Address used for socket bind */
++};
++
++static inline
++int v6_map_sock_addr(struct inet_sock *inet, struct sockaddr_in6 *addr,
++	struct nx_v6_sock_addr *nsa)
++{
++	// struct sock *sk = &inet->sk;
++	// struct nx_info *nxi = sk->sk_nx_info;
++	struct in6_addr saddr = addr->sin6_addr;
++	struct in6_addr baddr = saddr;
++
++	nsa->saddr = saddr;
++	nsa->baddr = baddr;
++	return 0;
++}
++
++static inline
++void v6_set_sock_addr(struct inet_sock *inet, struct nx_v6_sock_addr *nsa)
++{
++	// struct sock *sk = &inet->sk;
++	// struct in6_addr *saddr = inet6_rcv_saddr(sk);
++
++	// *saddr = nsa->baddr;
++	// inet->inet_saddr = nsa->baddr;
++}
++
++static inline
++int nx_info_has_v6(struct nx_info *nxi)
++{
++	if (!nxi)
++		return 1;
++	if (NX_IPV6(nxi))
++		return 1;
++	return 0;
++}
++
++#else /* CONFIG_IPV6 */
++
++static inline
++int nx_v6_dev_visible(struct nx_info *n, struct net_device *d)
++{
++	return 1;
++}
++
++
++static inline
++int nx_v6_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
++{
++	return 1;
++}
++
++static inline
++int v6_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
++{
++	return 1;
++}
++
++static inline
++int nx_info_has_v6(struct nx_info *nxi)
++{
++	return 0;
++}
++
++#endif /* CONFIG_IPV6 */
++
++#define current_nx_info_has_v6() \
++	nx_info_has_v6(current_nx_info())
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_limit.h linux-3.3.8-vs2.3.3.4/include/linux/vs_limit.h
+--- linux-3.3.8/include/linux/vs_limit.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_limit.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,140 @@
++#ifndef _VS_LIMIT_H
++#define _VS_LIMIT_H
++
++#include "vserver/limit.h"
++#include "vserver/base.h"
++#include "vserver/context.h"
++#include "vserver/debug.h"
++#include "vserver/context.h"
++#include "vserver/limit_int.h"
++
++
++#define vx_acc_cres(v, d, p, r) \
++	__vx_acc_cres(v, r, d, p, __FILE__, __LINE__)
++
++#define vx_acc_cres_cond(x, d, p, r) \
++	__vx_acc_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
++	r, d, p, __FILE__, __LINE__)
++
++
++#define vx_add_cres(v, a, p, r) \
++	__vx_add_cres(v, r, a, p, __FILE__, __LINE__)
++#define vx_sub_cres(v, a, p, r)		vx_add_cres(v, -(a), p, r)
++
++#define vx_add_cres_cond(x, a, p, r) \
++	__vx_add_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
++	r, a, p, __FILE__, __LINE__)
++#define vx_sub_cres_cond(x, a, p, r)	vx_add_cres_cond(x, -(a), p, r)
++
++
++/* process and file limits */
++
++#define vx_nproc_inc(p) \
++	vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC)
++
++#define vx_nproc_dec(p) \
++	vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC)
++
++#define vx_files_inc(f) \
++	vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE)
++
++#define vx_files_dec(f) \
++	vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE)
++
++#define vx_locks_inc(l) \
++	vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS)
++
++#define vx_locks_dec(l) \
++	vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS)
++
++#define vx_openfd_inc(f) \
++	vx_acc_cres(current_vx_info(), 1, (void *)(long)(f), VLIMIT_OPENFD)
++
++#define vx_openfd_dec(f) \
++	vx_acc_cres(current_vx_info(),-1, (void *)(long)(f), VLIMIT_OPENFD)
++
++
++#define vx_cres_avail(v, n, r) \
++	__vx_cres_avail(v, r, n, __FILE__, __LINE__)
++
++
++#define vx_nproc_avail(n) \
++	vx_cres_avail(current_vx_info(), n, RLIMIT_NPROC)
++
++#define vx_files_avail(n) \
++	vx_cres_avail(current_vx_info(), n, RLIMIT_NOFILE)
++
++#define vx_locks_avail(n) \
++	vx_cres_avail(current_vx_info(), n, RLIMIT_LOCKS)
++
++#define vx_openfd_avail(n) \
++	vx_cres_avail(current_vx_info(), n, VLIMIT_OPENFD)
++
++
++/* dentry limits */
++
++#define vx_dentry_inc(d) do {						\
++	if ((d)->d_count == 1)						\
++		vx_acc_cres(current_vx_info(), 1, d, VLIMIT_DENTRY);	\
++	} while (0)
++
++#define vx_dentry_dec(d) do {						\
++	if ((d)->d_count == 0)						\
++		vx_acc_cres(current_vx_info(),-1, d, VLIMIT_DENTRY);	\
++	} while (0)
++
++#define vx_dentry_avail(n) \
++	vx_cres_avail(current_vx_info(), n, VLIMIT_DENTRY)
++
++
++/* socket limits */
++
++#define vx_sock_inc(s) \
++	vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK)
++
++#define vx_sock_dec(s) \
++	vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK)
++
++#define vx_sock_avail(n) \
++	vx_cres_avail(current_vx_info(), n, VLIMIT_NSOCK)
++
++
++/* ipc resource limits */
++
++#define vx_ipcmsg_add(v, u, a) \
++	vx_add_cres(v, a, u, RLIMIT_MSGQUEUE)
++
++#define vx_ipcmsg_sub(v, u, a) \
++	vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE)
++
++#define vx_ipcmsg_avail(v, a) \
++	vx_cres_avail(v, a, RLIMIT_MSGQUEUE)
++
++
++#define vx_ipcshm_add(v, k, a) \
++	vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
++
++#define vx_ipcshm_sub(v, k, a) \
++	vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
++
++#define vx_ipcshm_avail(v, a) \
++	vx_cres_avail(v, a, VLIMIT_SHMEM)
++
++
++#define vx_semary_inc(a) \
++	vx_acc_cres(current_vx_info(), 1, a, VLIMIT_SEMARY)
++
++#define vx_semary_dec(a) \
++	vx_acc_cres(current_vx_info(), -1, a, VLIMIT_SEMARY)
++
++
++#define vx_nsems_add(a,n) \
++	vx_add_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
++
++#define vx_nsems_sub(a,n) \
++	vx_sub_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_network.h linux-3.3.8-vs2.3.3.4/include/linux/vs_network.h
+--- linux-3.3.8/include/linux/vs_network.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_network.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,169 @@
++#ifndef _NX_VS_NETWORK_H
++#define _NX_VS_NETWORK_H
++
++#include "vserver/context.h"
++#include "vserver/network.h"
++#include "vserver/base.h"
++#include "vserver/check.h"
++#include "vserver/debug.h"
++
++#include <linux/sched.h>
++
++
++#define get_nx_info(i) __get_nx_info(i, __FILE__, __LINE__)
++
++static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
++	const char *_file, int _line)
++{
++	if (!nxi)
++		return NULL;
++
++	vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])",
++		nxi, nxi ? nxi->nx_id : 0,
++		nxi ? atomic_read(&nxi->nx_usecnt) : 0,
++		_file, _line);
++
++	atomic_inc(&nxi->nx_usecnt);
++	return nxi;
++}
++
++
++extern void free_nx_info(struct nx_info *);
++
++#define put_nx_info(i) __put_nx_info(i, __FILE__, __LINE__)
++
++static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
++{
++	if (!nxi)
++		return;
++
++	vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])",
++		nxi, nxi ? nxi->nx_id : 0,
++		nxi ? atomic_read(&nxi->nx_usecnt) : 0,
++		_file, _line);
++
++	if (atomic_dec_and_test(&nxi->nx_usecnt))
++		free_nx_info(nxi);
++}
++
++
++#define init_nx_info(p, i) __init_nx_info(p, i, __FILE__, __LINE__)
++
++static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi,
++		const char *_file, int _line)
++{
++	if (nxi) {
++		vxlprintk(VXD_CBIT(nid, 3),
++			"init_nx_info(%p[#%d.%d])",
++			nxi, nxi ? nxi->nx_id : 0,
++			nxi ? atomic_read(&nxi->nx_usecnt) : 0,
++			_file, _line);
++
++		atomic_inc(&nxi->nx_usecnt);
++	}
++	*nxp = nxi;
++}
++
++
++#define set_nx_info(p, i) __set_nx_info(p, i, __FILE__, __LINE__)
++
++static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
++	const char *_file, int _line)
++{
++	struct nx_info *nxo;
++
++	if (!nxi)
++		return;
++
++	vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])",
++		nxi, nxi ? nxi->nx_id : 0,
++		nxi ? atomic_read(&nxi->nx_usecnt) : 0,
++		_file, _line);
++
++	atomic_inc(&nxi->nx_usecnt);
++	nxo = xchg(nxp, nxi);
++	BUG_ON(nxo);
++}
++
++#define clr_nx_info(p) __clr_nx_info(p, __FILE__, __LINE__)
++
++static inline void __clr_nx_info(struct nx_info **nxp,
++	const char *_file, int _line)
++{
++	struct nx_info *nxo;
++
++	nxo = xchg(nxp, NULL);
++	if (!nxo)
++		return;
++
++	vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])",
++		nxo, nxo ? nxo->nx_id : 0,
++		nxo ? atomic_read(&nxo->nx_usecnt) : 0,
++		_file, _line);
++
++	if (atomic_dec_and_test(&nxo->nx_usecnt))
++		free_nx_info(nxo);
++}
++
++
++#define claim_nx_info(v, p) __claim_nx_info(v, p, __FILE__, __LINE__)
++
++static inline void __claim_nx_info(struct nx_info *nxi,
++	struct task_struct *task, const char *_file, int _line)
++{
++	vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p",
++		nxi, nxi ? nxi->nx_id : 0,
++		nxi?atomic_read(&nxi->nx_usecnt):0,
++		nxi?atomic_read(&nxi->nx_tasks):0,
++		task, _file, _line);
++
++	atomic_inc(&nxi->nx_tasks);
++}
++
++
++extern void unhash_nx_info(struct nx_info *);
++
++#define release_nx_info(v, p) __release_nx_info(v, p, __FILE__, __LINE__)
++
++static inline void __release_nx_info(struct nx_info *nxi,
++	struct task_struct *task, const char *_file, int _line)
++{
++	vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p",
++		nxi, nxi ? nxi->nx_id : 0,
++		nxi ? atomic_read(&nxi->nx_usecnt) : 0,
++		nxi ? atomic_read(&nxi->nx_tasks) : 0,
++		task, _file, _line);
++
++	might_sleep();
++
++	if (atomic_dec_and_test(&nxi->nx_tasks))
++		unhash_nx_info(nxi);
++}
++
++
++#define task_get_nx_info(i)	__task_get_nx_info(i, __FILE__, __LINE__)
++
++static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
++	const char *_file, int _line)
++{
++	struct nx_info *nxi;
++
++	task_lock(p);
++	vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)",
++		p, _file, _line);
++	nxi = __get_nx_info(p->nx_info, _file, _line);
++	task_unlock(p);
++	return nxi;
++}
++
++
++static inline void exit_nx_info(struct task_struct *p)
++{
++	if (p->nx_info)
++		release_nx_info(p->nx_info, p);
++}
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_pid.h linux-3.3.8-vs2.3.3.4/include/linux/vs_pid.h
+--- linux-3.3.8/include/linux/vs_pid.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_pid.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,50 @@
++#ifndef _VS_PID_H
++#define _VS_PID_H
++
++#include "vserver/base.h"
++#include "vserver/check.h"
++#include "vserver/context.h"
++#include "vserver/debug.h"
++#include "vserver/pid.h"
++#include <linux/pid_namespace.h>
++
++
++#define VXF_FAKE_INIT	(VXF_INFO_INIT | VXF_STATE_INIT)
++
++static inline
++int vx_proc_task_visible(struct task_struct *task)
++{
++	if ((task->pid == 1) &&
++		!vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT))
++		/* show a blend through init */
++		goto visible;
++	if (vx_check(vx_task_xid(task), VS_WATCH | VS_IDENT))
++		goto visible;
++	return 0;
++visible:
++	return 1;
++}
++
++#define find_task_by_real_pid(pid) find_task_by_pid_ns(pid, &init_pid_ns)
++
++
++static inline
++struct task_struct *vx_get_proc_task(struct inode *inode, struct pid *pid)
++{
++	struct task_struct *task = get_pid_task(pid, PIDTYPE_PID);
++
++	if (task && !vx_proc_task_visible(task)) {
++		vxdprintk(VXD_CBIT(misc, 6),
++			"dropping task (get) %p[#%u,%u] for %p[#%u,%u]",
++			task, task->xid, task->pid,
++			current, current->xid, current->pid);
++		put_task_struct(task);
++		task = NULL;
++	}
++	return task;
++}
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_sched.h linux-3.3.8-vs2.3.3.4/include/linux/vs_sched.h
+--- linux-3.3.8/include/linux/vs_sched.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_sched.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,40 @@
++#ifndef _VS_SCHED_H
++#define _VS_SCHED_H
++
++#include "vserver/base.h"
++#include "vserver/context.h"
++#include "vserver/sched.h"
++
++
++#define MAX_PRIO_BIAS		 20
++#define MIN_PRIO_BIAS		-20
++
++static inline
++int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
++{
++	struct vx_info *vxi = p->vx_info;
++
++	if (vxi)
++		prio += vx_cpu(vxi, sched_pc).prio_bias;
++	return prio;
++}
++
++static inline void vx_account_user(struct vx_info *vxi,
++	cputime_t cputime, int nice)
++{
++	if (!vxi)
++		return;
++	vx_cpu(vxi, sched_pc).user_ticks += cputime;
++}
++
++static inline void vx_account_system(struct vx_info *vxi,
++	cputime_t cputime, int idle)
++{
++	if (!vxi)
++		return;
++	vx_cpu(vxi, sched_pc).sys_ticks += cputime;
++}
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_socket.h linux-3.3.8-vs2.3.3.4/include/linux/vs_socket.h
+--- linux-3.3.8/include/linux/vs_socket.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_socket.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,67 @@
++#ifndef _VS_SOCKET_H
++#define _VS_SOCKET_H
++
++#include "vserver/debug.h"
++#include "vserver/base.h"
++#include "vserver/cacct.h"
++#include "vserver/context.h"
++#include "vserver/tag.h"
++
++
++/* socket accounting */
++
++#include <linux/socket.h>
++
++static inline int vx_sock_type(int family)
++{
++	switch (family) {
++	case PF_UNSPEC:
++		return VXA_SOCK_UNSPEC;
++	case PF_UNIX:
++		return VXA_SOCK_UNIX;
++	case PF_INET:
++		return VXA_SOCK_INET;
++	case PF_INET6:
++		return VXA_SOCK_INET6;
++	case PF_PACKET:
++		return VXA_SOCK_PACKET;
++	default:
++		return VXA_SOCK_OTHER;
++	}
++}
++
++#define vx_acc_sock(v, f, p, s) \
++	__vx_acc_sock(v, f, p, s, __FILE__, __LINE__)
++
++static inline void __vx_acc_sock(struct vx_info *vxi,
++	int family, int pos, int size, char *file, int line)
++{
++	if (vxi) {
++		int type = vx_sock_type(family);
++
++		atomic_long_inc(&vxi->cacct.sock[type][pos].count);
++		atomic_long_add(size, &vxi->cacct.sock[type][pos].total);
++	}
++}
++
++#define vx_sock_recv(sk, s) \
++	vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, s)
++#define vx_sock_send(sk, s) \
++	vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, s)
++#define vx_sock_fail(sk, s) \
++	vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, s)
++
++
++#define sock_vx_init(s) do {		\
++	(s)->sk_xid = 0;		\
++	(s)->sk_vx_info = NULL;		\
++	} while (0)
++
++#define sock_nx_init(s) do {		\
++	(s)->sk_nid = 0;		\
++	(s)->sk_nx_info = NULL;		\
++	} while (0)
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_tag.h linux-3.3.8-vs2.3.3.4/include/linux/vs_tag.h
+--- linux-3.3.8/include/linux/vs_tag.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_tag.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,47 @@
++#ifndef _VS_TAG_H
++#define _VS_TAG_H
++
++#include <linux/vserver/tag.h>
++
++/* check conditions */
++
++#define DX_ADMIN	0x0001
++#define DX_WATCH	0x0002
++#define DX_HOSTID	0x0008
++
++#define DX_IDENT	0x0010
++
++#define DX_ARG_MASK	0x0010
++
++
++#define dx_task_tag(t)	((t)->tag)
++
++#define dx_current_tag() dx_task_tag(current)
++
++#define dx_check(c, m)	__dx_check(dx_current_tag(), c, m)
++
++#define dx_weak_check(c, m)	((m) ? dx_check(c, m) : 1)
++
++
++/*
++ * check current context for ADMIN/WATCH and
++ * optionally against supplied argument
++ */
++static inline int __dx_check(tag_t cid, tag_t id, unsigned int mode)
++{
++	if (mode & DX_ARG_MASK) {
++		if ((mode & DX_IDENT) && (id == cid))
++			return 1;
++	}
++	return (((mode & DX_ADMIN) && (cid == 0)) ||
++		((mode & DX_WATCH) && (cid == 1)) ||
++		((mode & DX_HOSTID) && (id == 0)));
++}
++
++struct inode;
++int dx_permission(const struct inode *inode, int mask);
++
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vs_time.h linux-3.3.8-vs2.3.3.4/include/linux/vs_time.h
+--- linux-3.3.8/include/linux/vs_time.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vs_time.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,19 @@
++#ifndef _VS_TIME_H
++#define _VS_TIME_H
++
++
++/* time faking stuff */
++
++#ifdef CONFIG_VSERVER_VTIME
++
++extern void vx_adjust_timespec(struct timespec *ts);
++extern int vx_settimeofday(const struct timespec *ts);
++
++#else
++#define	vx_adjust_timespec(t)	do { } while (0)
++#define	vx_settimeofday(t)	do_settimeofday(t)
++#endif
++
++#else
++#warning duplicate inclusion
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/Kbuild linux-3.3.8-vs2.3.3.4/include/linux/vserver/Kbuild
+--- linux-3.3.8/include/linux/vserver/Kbuild	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/Kbuild	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,8 @@
++
++header-y += context_cmd.h network_cmd.h space_cmd.h \
++	cacct_cmd.h cvirt_cmd.h limit_cmd.h dlimit_cmd.h \
++	inode_cmd.h tag_cmd.h sched_cmd.h signal_cmd.h \
++	debug_cmd.h device_cmd.h
++
++header-y += switch.h network.h monitor.h inode.h device.h
++
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/base.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/base.h
+--- linux-3.3.8/include/linux/vserver/base.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/base.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,178 @@
++#ifndef _VX_BASE_H
++#define _VX_BASE_H
++
++
++/* context state changes */
++
++enum {
++	VSC_STARTUP = 1,
++	VSC_SHUTDOWN,
++
++	VSC_NETUP,
++	VSC_NETDOWN,
++};
++
++
++
++#define vx_task_xid(t)	((t)->xid)
++
++#define vx_current_xid() vx_task_xid(current)
++
++#define current_vx_info() (current->vx_info)
++
++
++#define nx_task_nid(t)	((t)->nid)
++
++#define nx_current_nid() nx_task_nid(current)
++
++#define current_nx_info() (current->nx_info)
++
++
++/* generic flag merging */
++
++#define vs_check_flags(v, m, f)	(((v) & (m)) ^ (f))
++
++#define vs_mask_flags(v, f, m)	(((v) & ~(m)) | ((f) & (m)))
++
++#define vs_mask_mask(v, f, m)	(((v) & ~(m)) | ((v) & (f) & (m)))
++
++#define vs_check_bit(v, n)	((v) & (1LL << (n)))
++
++
++/* context flags */
++
++#define __vx_flags(v)	((v) ? (v)->vx_flags : 0)
++
++#define vx_current_flags()	__vx_flags(current_vx_info())
++
++#define vx_info_flags(v, m, f) \
++	vs_check_flags(__vx_flags(v), m, f)
++
++#define task_vx_flags(t, m, f) \
++	((t) && vx_info_flags((t)->vx_info, m, f))
++
++#define vx_flags(m, f)	vx_info_flags(current_vx_info(), m, f)
++
++
++/* context caps */
++
++#define __vx_ccaps(v)	((v) ? (v)->vx_ccaps : 0)
++
++#define vx_current_ccaps()	__vx_ccaps(current_vx_info())
++
++#define vx_info_ccaps(v, c)	(__vx_ccaps(v) & (c))
++
++#define vx_ccaps(c)	vx_info_ccaps(current_vx_info(), (c))
++
++
++
++/* network flags */
++
++#define __nx_flags(n)	((n) ? (n)->nx_flags : 0)
++
++#define nx_current_flags()	__nx_flags(current_nx_info())
++
++#define nx_info_flags(n, m, f) \
++	vs_check_flags(__nx_flags(n), m, f)
++
++#define task_nx_flags(t, m, f) \
++	((t) && nx_info_flags((t)->nx_info, m, f))
++
++#define nx_flags(m, f)	nx_info_flags(current_nx_info(), m, f)
++
++
++/* network caps */
++
++#define __nx_ncaps(n)	((n) ? (n)->nx_ncaps : 0)
++
++#define nx_current_ncaps()	__nx_ncaps(current_nx_info())
++
++#define nx_info_ncaps(n, c)	(__nx_ncaps(n) & (c))
++
++#define nx_ncaps(c)	nx_info_ncaps(current_nx_info(), c)
++
++
++/* context mask capabilities */
++
++#define __vx_mcaps(v)	((v) ? (v)->vx_ccaps >> 32UL : ~0 )
++
++#define vx_info_mcaps(v, c)	(__vx_mcaps(v) & (c))
++
++#define vx_mcaps(c)	vx_info_mcaps(current_vx_info(), c)
++
++
++/* context bcap mask */
++
++#define __vx_bcaps(v)		((v)->vx_bcaps)
++
++#define vx_current_bcaps()	__vx_bcaps(current_vx_info())
++
++
++/* mask given bcaps */
++
++#define vx_info_mbcaps(v, c)	((v) ? cap_intersect(__vx_bcaps(v), c) : c)
++
++#define vx_mbcaps(c)		vx_info_mbcaps(current_vx_info(), c)
++
++
++/* masked cap_bset */
++
++#define vx_info_cap_bset(v)	vx_info_mbcaps(v, current->cap_bset)
++
++#define vx_current_cap_bset()	vx_info_cap_bset(current_vx_info())
++
++#if 0
++#define vx_info_mbcap(v, b) \
++	(!vx_info_flags(v, VXF_STATE_SETUP, 0) ? \
++	vx_info_bcaps(v, b) : (b))
++
++#define task_vx_mbcap(t, b) \
++	vx_info_mbcap((t)->vx_info, (t)->b)
++
++#define vx_mbcap(b)	task_vx_mbcap(current, b)
++#endif
++
++#define vx_cap_raised(v, c, f)	cap_raised(vx_info_mbcaps(v, c), f)
++
++#define vx_capable(b, c) (capable(b) || \
++	(cap_raised(current_cap(), b) && vx_ccaps(c)))
++
++#define vx_ns_capable(n, b, c) (ns_capable(n, b) || \
++	(cap_raised(current_cap(), b) && vx_ccaps(c)))
++
++#define nx_capable(b, c) (capable(b) || \
++	(cap_raised(current_cap(), b) && nx_ncaps(c)))
++
++#define vx_task_initpid(t, n) \
++	((t)->vx_info && \
++	((t)->vx_info->vx_initpid == (n)))
++
++#define vx_current_initpid(n)	vx_task_initpid(current, n)
++
++
++/* context unshare mask */
++
++#define __vx_umask(v)		((v)->vx_umask)
++
++#define vx_current_umask()	__vx_umask(current_vx_info())
++
++#define vx_can_unshare(b, f) (capable(b) || \
++	(cap_raised(current_cap(), b) && \
++	!((f) & ~vx_current_umask())))
++
++
++#define __vx_wmask(v)		((v)->vx_wmask)
++
++#define vx_current_wmask()	__vx_wmask(current_vx_info())
++
++
++#define __vx_state(v)	((v) ? ((v)->vx_state) : 0)
++
++#define vx_info_state(v, m)	(__vx_state(v) & (m))
++
++
++#define __nx_state(n)	((n) ? ((n)->nx_state) : 0)
++
++#define nx_info_state(n, m)	(__nx_state(n) & (m))
++
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/cacct.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/cacct.h
+--- linux-3.3.8/include/linux/vserver/cacct.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/cacct.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,15 @@
++#ifndef _VX_CACCT_H
++#define _VX_CACCT_H
++
++
++enum sock_acc_field {
++	VXA_SOCK_UNSPEC = 0,
++	VXA_SOCK_UNIX,
++	VXA_SOCK_INET,
++	VXA_SOCK_INET6,
++	VXA_SOCK_PACKET,
++	VXA_SOCK_OTHER,
++	VXA_SOCK_SIZE	/* array size */
++};
++
++#endif	/* _VX_CACCT_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/cacct_cmd.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/cacct_cmd.h
+--- linux-3.3.8/include/linux/vserver/cacct_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/cacct_cmd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,23 @@
++#ifndef _VX_CACCT_CMD_H
++#define _VX_CACCT_CMD_H
++
++
++/* virtual host info name commands */
++
++#define VCMD_sock_stat		VC_CMD(VSTAT, 5, 0)
++
++struct	vcmd_sock_stat_v0 {
++	uint32_t field;
++	uint32_t count[3];
++	uint64_t total[3];
++};
++
++
++#ifdef	__KERNEL__
++
++#include <linux/compiler.h>
++
++extern int vc_sock_stat(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_CACCT_CMD_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/cacct_def.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/cacct_def.h
+--- linux-3.3.8/include/linux/vserver/cacct_def.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/cacct_def.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,43 @@
++#ifndef _VX_CACCT_DEF_H
++#define _VX_CACCT_DEF_H
++
++#include <asm/atomic.h>
++#include <linux/vserver/cacct.h>
++
++
++struct _vx_sock_acc {
++	atomic_long_t count;
++	atomic_long_t total;
++};
++
++/* context sub struct */
++
++struct _vx_cacct {
++	struct _vx_sock_acc sock[VXA_SOCK_SIZE][3];
++	atomic_t slab[8];
++	atomic_t page[6][8];
++};
++
++#ifdef CONFIG_VSERVER_DEBUG
++
++static inline void __dump_vx_cacct(struct _vx_cacct *cacct)
++{
++	int i, j;
++
++	printk("\t_vx_cacct:");
++	for (i = 0; i < 6; i++) {
++		struct _vx_sock_acc *ptr = cacct->sock[i];
++
++		printk("\t [%d] =", i);
++		for (j = 0; j < 3; j++) {
++			printk(" [%d] = %8lu, %8lu", j,
++				atomic_long_read(&ptr[j].count),
++				atomic_long_read(&ptr[j].total));
++		}
++		printk("\n");
++	}
++}
++
++#endif
++
++#endif	/* _VX_CACCT_DEF_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/cacct_int.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/cacct_int.h
+--- linux-3.3.8/include/linux/vserver/cacct_int.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/cacct_int.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,21 @@
++#ifndef _VX_CACCT_INT_H
++#define _VX_CACCT_INT_H
++
++
++#ifdef	__KERNEL__
++
++static inline
++unsigned long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
++{
++	return atomic_long_read(&cacct->sock[type][pos].count);
++}
++
++
++static inline
++unsigned long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
++{
++	return atomic_long_read(&cacct->sock[type][pos].total);
++}
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_CACCT_INT_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/check.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/check.h
+--- linux-3.3.8/include/linux/vserver/check.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/check.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,89 @@
++#ifndef _VS_CHECK_H
++#define _VS_CHECK_H
++
++
++#define MAX_S_CONTEXT	65535	/* Arbitrary limit */
++
++#ifdef	CONFIG_VSERVER_DYNAMIC_IDS
++#define MIN_D_CONTEXT	49152	/* dynamic contexts start here */
++#else
++#define MIN_D_CONTEXT	65536
++#endif
++
++/* check conditions */
++
++#define VS_ADMIN	0x0001
++#define VS_WATCH	0x0002
++#define VS_HIDE		0x0004
++#define VS_HOSTID	0x0008
++
++#define VS_IDENT	0x0010
++#define VS_EQUIV	0x0020
++#define VS_PARENT	0x0040
++#define VS_CHILD	0x0080
++
++#define VS_ARG_MASK	0x00F0
++
++#define VS_DYNAMIC	0x0100
++#define VS_STATIC	0x0200
++
++#define VS_ATR_MASK	0x0F00
++
++#ifdef	CONFIG_VSERVER_PRIVACY
++#define VS_ADMIN_P	(0)
++#define VS_WATCH_P	(0)
++#else
++#define VS_ADMIN_P	VS_ADMIN
++#define VS_WATCH_P	VS_WATCH
++#endif
++
++#define VS_HARDIRQ	0x1000
++#define VS_SOFTIRQ	0x2000
++#define VS_IRQ		0x4000
++
++#define VS_IRQ_MASK	0xF000
++
++#include <linux/hardirq.h>
++
++/*
++ * check current context for ADMIN/WATCH and
++ * optionally against supplied argument
++ */
++static inline int __vs_check(int cid, int id, unsigned int mode)
++{
++	if (mode & VS_ARG_MASK) {
++		if ((mode & VS_IDENT) && (id == cid))
++			return 1;
++	}
++	if (mode & VS_ATR_MASK) {
++		if ((mode & VS_DYNAMIC) &&
++			(id >= MIN_D_CONTEXT) &&
++			(id <= MAX_S_CONTEXT))
++			return 1;
++		if ((mode & VS_STATIC) &&
++			(id > 1) && (id < MIN_D_CONTEXT))
++			return 1;
++	}
++	if (mode & VS_IRQ_MASK) {
++		if ((mode & VS_IRQ) && unlikely(in_interrupt()))
++			return 1;
++		if ((mode & VS_HARDIRQ) && unlikely(in_irq()))
++			return 1;
++		if ((mode & VS_SOFTIRQ) && unlikely(in_softirq()))
++			return 1;
++	}
++	return (((mode & VS_ADMIN) && (cid == 0)) ||
++		((mode & VS_WATCH) && (cid == 1)) ||
++		((mode & VS_HOSTID) && (id == 0)));
++}
++
++#define vx_check(c, m)	__vs_check(vx_current_xid(), c, (m) | VS_IRQ)
++
++#define vx_weak_check(c, m)	((m) ? vx_check(c, m) : 1)
++
++
++#define nx_check(c, m)	__vs_check(nx_current_nid(), c, m)
++
++#define nx_weak_check(c, m)	((m) ? nx_check(c, m) : 1)
++
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/context.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/context.h
+--- linux-3.3.8/include/linux/vserver/context.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/context.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,188 @@
++#ifndef _VX_CONTEXT_H
++#define _VX_CONTEXT_H
++
++#include <linux/types.h>
++#include <linux/capability.h>
++
++
++/* context flags */
++
++#define VXF_INFO_SCHED		0x00000002
++#define VXF_INFO_NPROC		0x00000004
++#define VXF_INFO_PRIVATE	0x00000008
++
++#define VXF_INFO_INIT		0x00000010
++#define VXF_INFO_HIDE		0x00000020
++#define VXF_INFO_ULIMIT		0x00000040
++#define VXF_INFO_NSPACE		0x00000080
++
++#define VXF_SCHED_HARD		0x00000100
++#define VXF_SCHED_PRIO		0x00000200
++#define VXF_SCHED_PAUSE		0x00000400
++
++#define VXF_VIRT_MEM		0x00010000
++#define VXF_VIRT_UPTIME		0x00020000
++#define VXF_VIRT_CPU		0x00040000
++#define VXF_VIRT_LOAD		0x00080000
++#define VXF_VIRT_TIME		0x00100000
++
++#define VXF_HIDE_MOUNT		0x01000000
++/* was	VXF_HIDE_NETIF		0x02000000 */
++#define VXF_HIDE_VINFO		0x04000000
++
++#define VXF_STATE_SETUP		(1ULL << 32)
++#define VXF_STATE_INIT		(1ULL << 33)
++#define VXF_STATE_ADMIN		(1ULL << 34)
++
++#define VXF_SC_HELPER		(1ULL << 36)
++#define VXF_REBOOT_KILL		(1ULL << 37)
++#define VXF_PERSISTENT		(1ULL << 38)
++
++#define VXF_FORK_RSS		(1ULL << 48)
++#define VXF_PROLIFIC		(1ULL << 49)
++
++#define VXF_IGNEG_NICE		(1ULL << 52)
++
++#define VXF_ONE_TIME		(0x0007ULL << 32)
++
++#define VXF_INIT_SET		(VXF_STATE_SETUP | VXF_STATE_INIT | VXF_STATE_ADMIN)
++
++
++/* context migration */
++
++#define VXM_SET_INIT		0x00000001
++#define VXM_SET_REAPER		0x00000002
++
++/* context caps */
++
++#define VXC_SET_UTSNAME		0x00000001
++#define VXC_SET_RLIMIT		0x00000002
++#define VXC_FS_SECURITY		0x00000004
++#define VXC_FS_TRUSTED		0x00000008
++#define VXC_TIOCSTI		0x00000010
++
++/* was	VXC_RAW_ICMP		0x00000100 */
++#define VXC_SYSLOG		0x00001000
++#define VXC_OOM_ADJUST		0x00002000
++#define VXC_AUDIT_CONTROL	0x00004000
++
++#define VXC_SECURE_MOUNT	0x00010000
++#define VXC_SECURE_REMOUNT	0x00020000
++#define VXC_BINARY_MOUNT	0x00040000
++
++#define VXC_QUOTA_CTL		0x00100000
++#define VXC_ADMIN_MAPPER	0x00200000
++#define VXC_ADMIN_CLOOP		0x00400000
++
++#define VXC_KTHREAD		0x01000000
++#define VXC_NAMESPACE		0x02000000
++
++
++#ifdef	__KERNEL__
++
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/rcupdate.h>
++
++#include "limit_def.h"
++#include "sched_def.h"
++#include "cvirt_def.h"
++#include "cacct_def.h"
++#include "device_def.h"
++
++#define VX_SPACES	2
++
++struct _vx_info_pc {
++	struct _vx_sched_pc sched_pc;
++	struct _vx_cvirt_pc cvirt_pc;
++};
++
++struct _vx_space {
++	unsigned long vx_nsmask;		/* assignment mask */
++	struct nsproxy *vx_nsproxy;             /* private namespaces */
++	struct fs_struct *vx_fs;                /* private namespace fs */
++	const struct cred *vx_cred;             /* task credentials */
++};
++
++struct vx_info {
++	struct hlist_node vx_hlist;		/* linked list of contexts */
++	xid_t vx_id;				/* context id */
++	atomic_t vx_usecnt;			/* usage count */
++	atomic_t vx_tasks;			/* tasks count */
++	struct vx_info *vx_parent;		/* parent context */
++	int vx_state;				/* context state */
++
++	struct _vx_space space[VX_SPACES];	/* namespace store */
++
++	uint64_t vx_flags;			/* context flags */
++	uint64_t vx_ccaps;			/* context caps (vserver) */
++	uint64_t vx_umask;			/* unshare mask (guest) */
++	uint64_t vx_wmask;			/* warn mask (guest) */
++	kernel_cap_t vx_bcaps;			/* bounding caps (system) */
++
++	struct task_struct *vx_reaper;		/* guest reaper process */
++	pid_t vx_initpid;			/* PID of guest init */
++	int64_t vx_badness_bias;		/* OOM points bias */
++
++	struct _vx_limit limit;			/* vserver limits */
++	struct _vx_sched sched;			/* vserver scheduler */
++	struct _vx_cvirt cvirt;			/* virtual/bias stuff */
++	struct _vx_cacct cacct;			/* context accounting */
++
++	struct _vx_device dmap;			/* default device map targets */
++
++#ifndef CONFIG_SMP
++	struct _vx_info_pc info_pc;		/* per cpu data */
++#else
++	struct _vx_info_pc *ptr_pc;		/* per cpu array */
++#endif
++
++	wait_queue_head_t vx_wait;		/* context exit waitqueue */
++	int reboot_cmd;				/* last sys_reboot() cmd */
++	int exit_code;				/* last process exit code */
++
++	char vx_name[65];			/* vserver name */
++};
++
++#ifndef CONFIG_SMP
++#define	vx_ptr_pc(vxi)		(&(vxi)->info_pc)
++#define	vx_per_cpu(vxi, v, id)	vx_ptr_pc(vxi)->v
++#else
++#define	vx_ptr_pc(vxi)		((vxi)->ptr_pc)
++#define	vx_per_cpu(vxi, v, id)	per_cpu_ptr(vx_ptr_pc(vxi), id)->v
++#endif
++
++#define	vx_cpu(vxi, v)		vx_per_cpu(vxi, v, smp_processor_id())
++
++
++struct vx_info_save {
++	struct vx_info *vxi;
++	xid_t xid;
++};
++
++
++/* status flags */
++
++#define VXS_HASHED	0x0001
++#define VXS_PAUSED	0x0010
++#define VXS_SHUTDOWN	0x0100
++#define VXS_HELPER	0x1000
++#define VXS_RELEASED	0x8000
++
++
++extern void claim_vx_info(struct vx_info *, struct task_struct *);
++extern void release_vx_info(struct vx_info *, struct task_struct *);
++
++extern struct vx_info *lookup_vx_info(int);
++extern struct vx_info *lookup_or_create_vx_info(int);
++
++extern int get_xid_list(int, unsigned int *, int);
++extern int xid_is_hashed(xid_t);
++
++extern int vx_migrate_task(struct task_struct *, struct vx_info *, int);
++
++extern long vs_state_change(struct vx_info *, unsigned int);
++
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_CONTEXT_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/context_cmd.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/context_cmd.h
+--- linux-3.3.8/include/linux/vserver/context_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/context_cmd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,162 @@
++#ifndef _VX_CONTEXT_CMD_H
++#define _VX_CONTEXT_CMD_H
++
++
++/* vinfo commands */
++
++#define VCMD_task_xid		VC_CMD(VINFO, 1, 0)
++
++#ifdef	__KERNEL__
++extern int vc_task_xid(uint32_t);
++
++#endif	/* __KERNEL__ */
++
++#define VCMD_vx_info		VC_CMD(VINFO, 5, 0)
++
++struct	vcmd_vx_info_v0 {
++	uint32_t xid;
++	uint32_t initpid;
++	/* more to come */
++};
++
++#ifdef	__KERNEL__
++extern int vc_vx_info(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++#define VCMD_ctx_stat		VC_CMD(VSTAT, 0, 0)
++
++struct	vcmd_ctx_stat_v0 {
++	uint32_t usecnt;
++	uint32_t tasks;
++	/* more to come */
++};
++
++#ifdef	__KERNEL__
++extern int vc_ctx_stat(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++/* context commands */
++
++#define VCMD_ctx_create_v0	VC_CMD(VPROC, 1, 0)
++#define VCMD_ctx_create		VC_CMD(VPROC, 1, 1)
++
++struct	vcmd_ctx_create {
++	uint64_t flagword;
++};
++
++#define VCMD_ctx_migrate_v0	VC_CMD(PROCMIG, 1, 0)
++#define VCMD_ctx_migrate	VC_CMD(PROCMIG, 1, 1)
++
++struct	vcmd_ctx_migrate {
++	uint64_t flagword;
++};
++
++#ifdef	__KERNEL__
++extern int vc_ctx_create(uint32_t, void __user *);
++extern int vc_ctx_migrate(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* flag commands */
++
++#define VCMD_get_cflags		VC_CMD(FLAGS, 1, 0)
++#define VCMD_set_cflags		VC_CMD(FLAGS, 2, 0)
++
++struct	vcmd_ctx_flags_v0 {
++	uint64_t flagword;
++	uint64_t mask;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_cflags(struct vx_info *, void __user *);
++extern int vc_set_cflags(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* context caps commands */
++
++#define VCMD_get_ccaps		VC_CMD(FLAGS, 3, 1)
++#define VCMD_set_ccaps		VC_CMD(FLAGS, 4, 1)
++
++struct	vcmd_ctx_caps_v1 {
++	uint64_t ccaps;
++	uint64_t cmask;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_ccaps(struct vx_info *, void __user *);
++extern int vc_set_ccaps(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* bcaps commands */
++
++#define VCMD_get_bcaps		VC_CMD(FLAGS, 9, 0)
++#define VCMD_set_bcaps		VC_CMD(FLAGS, 10, 0)
++
++struct	vcmd_bcaps {
++	uint64_t bcaps;
++	uint64_t bmask;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_bcaps(struct vx_info *, void __user *);
++extern int vc_set_bcaps(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* umask commands */
++
++#define VCMD_get_umask		VC_CMD(FLAGS, 13, 0)
++#define VCMD_set_umask		VC_CMD(FLAGS, 14, 0)
++
++struct	vcmd_umask {
++	uint64_t umask;
++	uint64_t mask;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_umask(struct vx_info *, void __user *);
++extern int vc_set_umask(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* wmask commands */
++
++#define VCMD_get_wmask		VC_CMD(FLAGS, 15, 0)
++#define VCMD_set_wmask		VC_CMD(FLAGS, 16, 0)
++
++struct	vcmd_wmask {
++	uint64_t wmask;
++	uint64_t mask;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_wmask(struct vx_info *, void __user *);
++extern int vc_set_wmask(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* OOM badness */
++
++#define VCMD_get_badness	VC_CMD(MEMCTRL, 5, 0)
++#define VCMD_set_badness	VC_CMD(MEMCTRL, 6, 0)
++
++struct	vcmd_badness_v0 {
++	int64_t bias;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_badness(struct vx_info *, void __user *);
++extern int vc_set_badness(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_CONTEXT_CMD_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/cvirt.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/cvirt.h
+--- linux-3.3.8/include/linux/vserver/cvirt.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/cvirt.h	2012-04-24 03:32:01.000000000 +0200
+@@ -0,0 +1,22 @@
++#ifndef _VX_CVIRT_H
++#define _VX_CVIRT_H
++
++
++#ifdef	__KERNEL__
++
++struct timespec;
++
++void vx_vsi_boottime(struct timespec *);
++
++void vx_vsi_uptime(struct timespec *, struct timespec *);
++
++
++struct vx_info;
++
++void vx_update_load(struct vx_info *);
++
++
++int vx_do_syslog(int, char __user *, int);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_CVIRT_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/cvirt_cmd.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/cvirt_cmd.h
+--- linux-3.3.8/include/linux/vserver/cvirt_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/cvirt_cmd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,53 @@
++#ifndef _VX_CVIRT_CMD_H
++#define _VX_CVIRT_CMD_H
++
++
++/* virtual host info name commands */
++
++#define VCMD_set_vhi_name	VC_CMD(VHOST, 1, 0)
++#define VCMD_get_vhi_name	VC_CMD(VHOST, 2, 0)
++
++struct	vcmd_vhi_name_v0 {
++	uint32_t field;
++	char name[65];
++};
++
++
++enum vhi_name_field {
++	VHIN_CONTEXT = 0,
++	VHIN_SYSNAME,
++	VHIN_NODENAME,
++	VHIN_RELEASE,
++	VHIN_VERSION,
++	VHIN_MACHINE,
++	VHIN_DOMAINNAME,
++};
++
++
++#ifdef	__KERNEL__
++
++#include <linux/compiler.h>
++
++extern int vc_set_vhi_name(struct vx_info *, void __user *);
++extern int vc_get_vhi_name(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++#define VCMD_virt_stat		VC_CMD(VSTAT, 3, 0)
++
++struct	vcmd_virt_stat_v0 {
++	uint64_t offset;
++	uint64_t uptime;
++	uint32_t nr_threads;
++	uint32_t nr_running;
++	uint32_t nr_uninterruptible;
++	uint32_t nr_onhold;
++	uint32_t nr_forks;
++	uint32_t load[3];
++};
++
++#ifdef	__KERNEL__
++extern int vc_virt_stat(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_CVIRT_CMD_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/cvirt_def.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/cvirt_def.h
+--- linux-3.3.8/include/linux/vserver/cvirt_def.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/cvirt_def.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,80 @@
++#ifndef _VX_CVIRT_DEF_H
++#define _VX_CVIRT_DEF_H
++
++#include <linux/jiffies.h>
++#include <linux/spinlock.h>
++#include <linux/wait.h>
++#include <linux/time.h>
++#include <asm/atomic.h>
++
++
++struct _vx_usage_stat {
++	uint64_t user;
++	uint64_t nice;
++	uint64_t system;
++	uint64_t softirq;
++	uint64_t irq;
++	uint64_t idle;
++	uint64_t iowait;
++};
++
++struct _vx_syslog {
++	wait_queue_head_t log_wait;
++	spinlock_t logbuf_lock;		/* lock for the log buffer */
++
++	unsigned long log_start;	/* next char to be read by syslog() */
++	unsigned long con_start;	/* next char to be sent to consoles */
++	unsigned long log_end;	/* most-recently-written-char + 1 */
++	unsigned long logged_chars;	/* #chars since last read+clear operation */
++
++	char log_buf[1024];
++};
++
++
++/* context sub struct */
++
++struct _vx_cvirt {
++	atomic_t nr_threads;		/* number of current threads */
++	atomic_t nr_running;		/* number of running threads */
++	atomic_t nr_uninterruptible;	/* number of uninterruptible threads */
++
++	atomic_t nr_onhold;		/* processes on hold */
++	uint32_t onhold_last;		/* jiffies when put on hold */
++
++	struct timespec bias_ts;	/* time offset to the host */
++	struct timespec bias_idle;
++	struct timespec bias_uptime;	/* context creation point */
++	uint64_t bias_clock;		/* offset in clock_t */
++
++	spinlock_t load_lock;		/* lock for the load averages */
++	atomic_t load_updates;		/* nr of load updates done so far */
++	uint32_t load_last;		/* last time load was calculated */
++	uint32_t load[3];		/* load averages 1,5,15 */
++
++	atomic_t total_forks;		/* number of forks so far */
++
++	struct _vx_syslog syslog;
++};
++
++struct _vx_cvirt_pc {
++	struct _vx_usage_stat cpustat;
++};
++
++
++#ifdef CONFIG_VSERVER_DEBUG
++
++static inline void __dump_vx_cvirt(struct _vx_cvirt *cvirt)
++{
++	printk("\t_vx_cvirt:\n");
++	printk("\t threads: %4d, %4d, %4d, %4d\n",
++		atomic_read(&cvirt->nr_threads),
++		atomic_read(&cvirt->nr_running),
++		atomic_read(&cvirt->nr_uninterruptible),
++		atomic_read(&cvirt->nr_onhold));
++	/* add rest here */
++	printk("\t total_forks = %d\n", atomic_read(&cvirt->total_forks));
++}
++
++#endif
++
++#endif	/* _VX_CVIRT_DEF_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/debug.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/debug.h
+--- linux-3.3.8/include/linux/vserver/debug.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/debug.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,145 @@
++#ifndef _VX_DEBUG_H
++#define _VX_DEBUG_H
++
++
++#define VXD_CBIT(n, m)	(vs_debug_ ## n & (1 << (m)))
++#define VXD_CMIN(n, m)	(vs_debug_ ## n > (m))
++#define VXD_MASK(n, m)	(vs_debug_ ## n & (m))
++
++#define VXD_DEV(d)	(d), (d)->bd_inode->i_ino,		\
++			imajor((d)->bd_inode), iminor((d)->bd_inode)
++#define VXF_DEV		"%p[%lu,%d:%d]"
++
++#if	defined(CONFIG_QUOTES_UTF8)
++#define	VS_Q_LQM	"\xc2\xbb"
++#define	VS_Q_RQM	"\xc2\xab"
++#elif	defined(CONFIG_QUOTES_ASCII)
++#define	VS_Q_LQM	"\x27"
++#define	VS_Q_RQM	"\x27"
++#else
++#define	VS_Q_LQM	"\xbb"
++#define	VS_Q_RQM	"\xab"
++#endif
++
++#define	VS_Q(f)		VS_Q_LQM f VS_Q_RQM
++
++
++#define vxd_path(p)						\
++	({ static char _buffer[PATH_MAX];			\
++	   d_path(p, _buffer, sizeof(_buffer)); })
++
++#define vxd_cond_path(n)					\
++	((n) ? vxd_path(&(n)->path) : "<null>" )
++
++
++#ifdef	CONFIG_VSERVER_DEBUG
++
++extern unsigned int vs_debug_switch;
++extern unsigned int vs_debug_xid;
++extern unsigned int vs_debug_nid;
++extern unsigned int vs_debug_tag;
++extern unsigned int vs_debug_net;
++extern unsigned int vs_debug_limit;
++extern unsigned int vs_debug_cres;
++extern unsigned int vs_debug_dlim;
++extern unsigned int vs_debug_quota;
++extern unsigned int vs_debug_cvirt;
++extern unsigned int vs_debug_space;
++extern unsigned int vs_debug_perm;
++extern unsigned int vs_debug_misc;
++
++
++#define VX_LOGLEVEL	"vxD: "
++#define VX_PROC_FMT	"%p: "
++#define VX_PROCESS	current
++
++#define vxdprintk(c, f, x...)					\
++	do {							\
++		if (c)						\
++			printk(VX_LOGLEVEL VX_PROC_FMT f "\n",	\
++				VX_PROCESS , ##x);		\
++	} while (0)
++
++#define vxlprintk(c, f, x...)					\
++	do {							\
++		if (c)						\
++			printk(VX_LOGLEVEL f " @%s:%d\n", x);	\
++	} while (0)
++
++#define vxfprintk(c, f, x...)					\
++	do {							\
++		if (c)						\
++			printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
++	} while (0)
++
++
++struct vx_info;
++
++void dump_vx_info(struct vx_info *, int);
++void dump_vx_info_inactive(int);
++
++#else	/* CONFIG_VSERVER_DEBUG */
++
++#define vs_debug_switch	0
++#define vs_debug_xid	0
++#define vs_debug_nid	0
++#define vs_debug_tag	0
++#define vs_debug_net	0
++#define vs_debug_limit	0
++#define vs_debug_cres	0
++#define vs_debug_dlim	0
++#define vs_debug_quota	0
++#define vs_debug_cvirt	0
++#define vs_debug_space	0
++#define vs_debug_perm	0
++#define vs_debug_misc	0
++
++#define vxdprintk(x...) do { } while (0)
++#define vxlprintk(x...) do { } while (0)
++#define vxfprintk(x...) do { } while (0)
++
++#endif	/* CONFIG_VSERVER_DEBUG */
++
++
++#ifdef	CONFIG_VSERVER_WARN
++
++#define VX_WARNLEVEL	KERN_WARNING "vxW: "
++#define VX_WARN_TASK	"[" VS_Q("%s") ",%u:#%u|%u|%u] "
++#define VX_WARN_XID	"[xid #%u] "
++#define VX_WARN_NID	"[nid #%u] "
++#define VX_WARN_TAG	"[tag #%u] "
++
++#define vxwprintk(c, f, x...)					\
++	do {							\
++		if (c)						\
++			printk(VX_WARNLEVEL f "\n", ##x);	\
++	} while (0)
++
++#else	/* CONFIG_VSERVER_WARN */
++
++#define vxwprintk(x...) do { } while (0)
++
++#endif	/* CONFIG_VSERVER_WARN */
++
++#define vxwprintk_task(c, f, x...)				\
++	vxwprintk(c, VX_WARN_TASK f,				\
++		current->comm, current->pid,			\
++		current->xid, current->nid, current->tag, ##x)
++#define vxwprintk_xid(c, f, x...)				\
++	vxwprintk(c, VX_WARN_XID f, current->xid, x)
++#define vxwprintk_nid(c, f, x...)				\
++	vxwprintk(c, VX_WARN_NID f, current->nid, x)
++#define vxwprintk_tag(c, f, x...)				\
++	vxwprintk(c, VX_WARN_TAG f, current->tag, x)
++
++#ifdef	CONFIG_VSERVER_DEBUG
++#define vxd_assert_lock(l)	assert_spin_locked(l)
++#define vxd_assert(c, f, x...)	vxlprintk(!(c), \
++	"assertion [" f "] failed.", ##x, __FILE__, __LINE__)
++#else
++#define vxd_assert_lock(l)	do { } while (0)
++#define vxd_assert(c, f, x...)	do { } while (0)
++#endif
++
++
++#endif /* _VX_DEBUG_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/debug_cmd.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/debug_cmd.h
+--- linux-3.3.8/include/linux/vserver/debug_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/debug_cmd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,58 @@
++#ifndef _VX_DEBUG_CMD_H
++#define _VX_DEBUG_CMD_H
++
++
++/* debug commands */
++
++#define VCMD_dump_history	VC_CMD(DEBUG, 1, 0)
++
++#define VCMD_read_history	VC_CMD(DEBUG, 5, 0)
++#define VCMD_read_monitor	VC_CMD(DEBUG, 6, 0)
++
++struct  vcmd_read_history_v0 {
++	uint32_t index;
++	uint32_t count;
++	char __user *data;
++};
++
++struct  vcmd_read_monitor_v0 {
++	uint32_t index;
++	uint32_t count;
++	char __user *data;
++};
++
++
++#ifdef	__KERNEL__
++
++#ifdef	CONFIG_COMPAT
++
++#include <asm/compat.h>
++
++struct	vcmd_read_history_v0_x32 {
++	uint32_t index;
++	uint32_t count;
++	compat_uptr_t data_ptr;
++};
++
++struct	vcmd_read_monitor_v0_x32 {
++	uint32_t index;
++	uint32_t count;
++	compat_uptr_t data_ptr;
++};
++
++#endif  /* CONFIG_COMPAT */
++
++extern int vc_dump_history(uint32_t);
++
++extern int vc_read_history(uint32_t, void __user *);
++extern int vc_read_monitor(uint32_t, void __user *);
++
++#ifdef	CONFIG_COMPAT
++
++extern int vc_read_history_x32(uint32_t, void __user *);
++extern int vc_read_monitor_x32(uint32_t, void __user *);
++
++#endif  /* CONFIG_COMPAT */
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_DEBUG_CMD_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/device.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/device.h
+--- linux-3.3.8/include/linux/vserver/device.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/device.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,15 @@
++#ifndef _VX_DEVICE_H
++#define _VX_DEVICE_H
++
++
++#define DATTR_CREATE	0x00000001
++#define DATTR_OPEN	0x00000002
++
++#define DATTR_REMAP	0x00000010
++
++#define DATTR_MASK	0x00000013
++
++
++#else	/* _VX_DEVICE_H */
++#warning duplicate inclusion
++#endif	/* _VX_DEVICE_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/device_cmd.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/device_cmd.h
+--- linux-3.3.8/include/linux/vserver/device_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/device_cmd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,44 @@
++#ifndef _VX_DEVICE_CMD_H
++#define _VX_DEVICE_CMD_H
++
++
++/*  device vserver commands */
++
++#define VCMD_set_mapping	VC_CMD(DEVICE, 1, 0)
++#define VCMD_unset_mapping	VC_CMD(DEVICE, 2, 0)
++
++struct	vcmd_set_mapping_v0 {
++	const char __user *device;
++	const char __user *target;
++	uint32_t flags;
++};
++
++
++#ifdef	__KERNEL__
++
++#ifdef	CONFIG_COMPAT
++
++#include <asm/compat.h>
++
++struct	vcmd_set_mapping_v0_x32 {
++	compat_uptr_t device_ptr;
++	compat_uptr_t target_ptr;
++	uint32_t flags;
++};
++
++#endif	/* CONFIG_COMPAT */
++
++#include <linux/compiler.h>
++
++extern int vc_set_mapping(struct vx_info *, void __user *);
++extern int vc_unset_mapping(struct vx_info *, void __user *);
++
++#ifdef	CONFIG_COMPAT
++
++extern int vc_set_mapping_x32(struct vx_info *, void __user *);
++extern int vc_unset_mapping_x32(struct vx_info *, void __user *);
++
++#endif	/* CONFIG_COMPAT */
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_DEVICE_CMD_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/device_def.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/device_def.h
+--- linux-3.3.8/include/linux/vserver/device_def.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/device_def.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,17 @@
++#ifndef _VX_DEVICE_DEF_H
++#define _VX_DEVICE_DEF_H
++
++#include <linux/types.h>
++
++struct vx_dmap_target {
++	dev_t target;
++	uint32_t flags;
++};
++
++struct _vx_device {
++#ifdef CONFIG_VSERVER_DEVICE
++	struct vx_dmap_target targets[2];
++#endif
++};
++
++#endif	/* _VX_DEVICE_DEF_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/dlimit.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/dlimit.h
+--- linux-3.3.8/include/linux/vserver/dlimit.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/dlimit.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,54 @@
++#ifndef _VX_DLIMIT_H
++#define _VX_DLIMIT_H
++
++#include "switch.h"
++
++
++#ifdef	__KERNEL__
++
++/*      keep in sync with CDLIM_INFINITY	*/
++
++#define DLIM_INFINITY		(~0ULL)
++
++#include <linux/spinlock.h>
++#include <linux/rcupdate.h>
++
++struct super_block;
++
++struct dl_info {
++	struct hlist_node dl_hlist;		/* linked list of contexts */
++	struct rcu_head dl_rcu;			/* the rcu head */
++	tag_t dl_tag;				/* context tag */
++	atomic_t dl_usecnt;			/* usage count */
++	atomic_t dl_refcnt;			/* reference count */
++
++	struct super_block *dl_sb;		/* associated superblock */
++
++	spinlock_t dl_lock;			/* protect the values */
++
++	unsigned long long dl_space_used;	/* used space in bytes */
++	unsigned long long dl_space_total;	/* maximum space in bytes */
++	unsigned long dl_inodes_used;		/* used inodes */
++	unsigned long dl_inodes_total;		/* maximum inodes */
++
++	unsigned int dl_nrlmult;		/* non root limit mult */
++};
++
++struct rcu_head;
++
++extern void rcu_free_dl_info(struct rcu_head *);
++extern void unhash_dl_info(struct dl_info *);
++
++extern struct dl_info *locate_dl_info(struct super_block *, tag_t);
++
++
++struct kstatfs;
++
++extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
++
++typedef uint64_t dlsize_t;
++
++#endif	/* __KERNEL__ */
++#else	/* _VX_DLIMIT_H */
++#warning duplicate inclusion
++#endif	/* _VX_DLIMIT_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/dlimit_cmd.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/dlimit_cmd.h
+--- linux-3.3.8/include/linux/vserver/dlimit_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/dlimit_cmd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,109 @@
++#ifndef _VX_DLIMIT_CMD_H
++#define _VX_DLIMIT_CMD_H
++
++
++/*  dlimit vserver commands */
++
++#define VCMD_add_dlimit		VC_CMD(DLIMIT, 1, 0)
++#define VCMD_rem_dlimit		VC_CMD(DLIMIT, 2, 0)
++
++#define VCMD_set_dlimit		VC_CMD(DLIMIT, 5, 0)
++#define VCMD_get_dlimit		VC_CMD(DLIMIT, 6, 0)
++
++struct	vcmd_ctx_dlimit_base_v0 {
++	const char __user *name;
++	uint32_t flags;
++};
++
++struct	vcmd_ctx_dlimit_v0 {
++	const char __user *name;
++	uint32_t space_used;			/* used space in kbytes */
++	uint32_t space_total;			/* maximum space in kbytes */
++	uint32_t inodes_used;			/* used inodes */
++	uint32_t inodes_total;			/* maximum inodes */
++	uint32_t reserved;			/* reserved for root in % */
++	uint32_t flags;
++};
++
++#define CDLIM_UNSET		((uint32_t)0UL)
++#define CDLIM_INFINITY		((uint32_t)~0UL)
++#define CDLIM_KEEP		((uint32_t)~1UL)
++
++#define DLIME_UNIT	0
++#define DLIME_KILO	1
++#define DLIME_MEGA	2
++#define DLIME_GIGA	3
++
++#define DLIMF_SHIFT	0x10
++
++#define DLIMS_USED	0
++#define DLIMS_TOTAL	2
++
++static inline
++uint64_t dlimit_space_32to64(uint32_t val, uint32_t flags, int shift)
++{
++	int exp = (flags & DLIMF_SHIFT) ?
++		(flags >> shift) & DLIME_GIGA : DLIME_KILO;
++	return ((uint64_t)val) << (10 * exp);
++}
++
++static inline
++uint32_t dlimit_space_64to32(uint64_t val, uint32_t *flags, int shift)
++{
++	int exp = 0;
++
++	if (*flags & DLIMF_SHIFT) {
++		while (val > (1LL << 32) && (exp < 3)) {
++			val >>= 10;
++			exp++;
++		}
++		*flags &= ~(DLIME_GIGA << shift);
++		*flags |= exp << shift;
++	} else
++		val >>= 10;
++	return val;
++}
++
++#ifdef	__KERNEL__
++
++#ifdef	CONFIG_COMPAT
++
++#include <asm/compat.h>
++
++struct	vcmd_ctx_dlimit_base_v0_x32 {
++	compat_uptr_t name_ptr;
++	uint32_t flags;
++};
++
++struct	vcmd_ctx_dlimit_v0_x32 {
++	compat_uptr_t name_ptr;
++	uint32_t space_used;			/* used space in kbytes */
++	uint32_t space_total;			/* maximum space in kbytes */
++	uint32_t inodes_used;			/* used inodes */
++	uint32_t inodes_total;			/* maximum inodes */
++	uint32_t reserved;			/* reserved for root in % */
++	uint32_t flags;
++};
++
++#endif	/* CONFIG_COMPAT */
++
++#include <linux/compiler.h>
++
++extern int vc_add_dlimit(uint32_t, void __user *);
++extern int vc_rem_dlimit(uint32_t, void __user *);
++
++extern int vc_set_dlimit(uint32_t, void __user *);
++extern int vc_get_dlimit(uint32_t, void __user *);
++
++#ifdef	CONFIG_COMPAT
++
++extern int vc_add_dlimit_x32(uint32_t, void __user *);
++extern int vc_rem_dlimit_x32(uint32_t, void __user *);
++
++extern int vc_set_dlimit_x32(uint32_t, void __user *);
++extern int vc_get_dlimit_x32(uint32_t, void __user *);
++
++#endif	/* CONFIG_COMPAT */
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_DLIMIT_CMD_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/global.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/global.h
+--- linux-3.3.8/include/linux/vserver/global.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/global.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,19 @@
++#ifndef _VX_GLOBAL_H
++#define _VX_GLOBAL_H
++
++
++extern atomic_t vx_global_ctotal;
++extern atomic_t vx_global_cactive;
++
++extern atomic_t nx_global_ctotal;
++extern atomic_t nx_global_cactive;
++
++extern atomic_t vs_global_nsproxy;
++extern atomic_t vs_global_fs;
++extern atomic_t vs_global_mnt_ns;
++extern atomic_t vs_global_uts_ns;
++extern atomic_t vs_global_user_ns;
++extern atomic_t vs_global_pid_ns;
++
++
++#endif /* _VX_GLOBAL_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/history.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/history.h
+--- linux-3.3.8/include/linux/vserver/history.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/history.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,197 @@
++#ifndef _VX_HISTORY_H
++#define _VX_HISTORY_H
++
++
++enum {
++	VXH_UNUSED = 0,
++	VXH_THROW_OOPS = 1,
++
++	VXH_GET_VX_INFO,
++	VXH_PUT_VX_INFO,
++	VXH_INIT_VX_INFO,
++	VXH_SET_VX_INFO,
++	VXH_CLR_VX_INFO,
++	VXH_CLAIM_VX_INFO,
++	VXH_RELEASE_VX_INFO,
++	VXH_ALLOC_VX_INFO,
++	VXH_DEALLOC_VX_INFO,
++	VXH_HASH_VX_INFO,
++	VXH_UNHASH_VX_INFO,
++	VXH_LOC_VX_INFO,
++	VXH_LOOKUP_VX_INFO,
++	VXH_CREATE_VX_INFO,
++};
++
++struct _vxhe_vxi {
++	struct vx_info *ptr;
++	unsigned xid;
++	unsigned usecnt;
++	unsigned tasks;
++};
++
++struct _vxhe_set_clr {
++	void *data;
++};
++
++struct _vxhe_loc_lookup {
++	unsigned arg;
++};
++
++struct _vx_hist_entry {
++	void *loc;
++	unsigned short seq;
++	unsigned short type;
++	struct _vxhe_vxi vxi;
++	union {
++		struct _vxhe_set_clr sc;
++		struct _vxhe_loc_lookup ll;
++	};
++};
++
++#ifdef	CONFIG_VSERVER_HISTORY
++
++extern unsigned volatile int vxh_active;
++
++struct _vx_hist_entry *vxh_advance(void *loc);
++
++
++static inline
++void	__vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
++{
++	entry->vxi.ptr = vxi;
++	if (vxi) {
++		entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
++		entry->vxi.tasks = atomic_read(&vxi->vx_tasks);
++		entry->vxi.xid = vxi->vx_id;
++	}
++}
++
++
++#define	__HERE__ current_text_addr()
++
++#define __VXH_BODY(__type, __data, __here)	\
++	struct _vx_hist_entry *entry;		\
++						\
++	preempt_disable();			\
++	entry = vxh_advance(__here);		\
++	__data;					\
++	entry->type = __type;			\
++	preempt_enable();
++
++
++	/* pass vxi only */
++
++#define __VXH_SMPL				\
++	__vxh_copy_vxi(entry, vxi)
++
++static inline
++void	__vxh_smpl(struct vx_info *vxi, int __type, void *__here)
++{
++	__VXH_BODY(__type, __VXH_SMPL, __here)
++}
++
++	/* pass vxi and data (void *) */
++
++#define __VXH_DATA				\
++	__vxh_copy_vxi(entry, vxi);		\
++	entry->sc.data = data
++
++static inline
++void	__vxh_data(struct vx_info *vxi, void *data,
++			int __type, void *__here)
++{
++	__VXH_BODY(__type, __VXH_DATA, __here)
++}
++
++	/* pass vxi and arg (long) */
++
++#define __VXH_LONG				\
++	__vxh_copy_vxi(entry, vxi);		\
++	entry->ll.arg = arg
++
++static inline
++void	__vxh_long(struct vx_info *vxi, long arg,
++			int __type, void *__here)
++{
++	__VXH_BODY(__type, __VXH_LONG, __here)
++}
++
++
++static inline
++void	__vxh_throw_oops(void *__here)
++{
++	__VXH_BODY(VXH_THROW_OOPS, {}, __here);
++	/* prevent further acquisition */
++	vxh_active = 0;
++}
++
++
++#define vxh_throw_oops()	__vxh_throw_oops(__HERE__);
++
++#define __vxh_get_vx_info(v, h)	__vxh_smpl(v, VXH_GET_VX_INFO, h);
++#define __vxh_put_vx_info(v, h)	__vxh_smpl(v, VXH_PUT_VX_INFO, h);
++
++#define __vxh_init_vx_info(v, d, h) \
++	__vxh_data(v, d, VXH_INIT_VX_INFO, h);
++#define __vxh_set_vx_info(v, d, h) \
++	__vxh_data(v, d, VXH_SET_VX_INFO, h);
++#define __vxh_clr_vx_info(v, d, h) \
++	__vxh_data(v, d, VXH_CLR_VX_INFO, h);
++
++#define __vxh_claim_vx_info(v, d, h) \
++	__vxh_data(v, d, VXH_CLAIM_VX_INFO, h);
++#define __vxh_release_vx_info(v, d, h) \
++	__vxh_data(v, d, VXH_RELEASE_VX_INFO, h);
++
++#define vxh_alloc_vx_info(v) \
++	__vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__);
++#define vxh_dealloc_vx_info(v) \
++	__vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__);
++
++#define vxh_hash_vx_info(v) \
++	__vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__);
++#define vxh_unhash_vx_info(v) \
++	__vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__);
++
++#define vxh_loc_vx_info(v, l) \
++	__vxh_long(v, l, VXH_LOC_VX_INFO, __HERE__);
++#define vxh_lookup_vx_info(v, l) \
++	__vxh_long(v, l, VXH_LOOKUP_VX_INFO, __HERE__);
++#define vxh_create_vx_info(v, l) \
++	__vxh_long(v, l, VXH_CREATE_VX_INFO, __HERE__);
++
++extern void vxh_dump_history(void);
++
++
++#else  /* CONFIG_VSERVER_HISTORY */
++
++#define	__HERE__	0
++
++#define vxh_throw_oops()		do { } while (0)
++
++#define __vxh_get_vx_info(v, h)		do { } while (0)
++#define __vxh_put_vx_info(v, h)		do { } while (0)
++
++#define __vxh_init_vx_info(v, d, h)	do { } while (0)
++#define __vxh_set_vx_info(v, d, h)	do { } while (0)
++#define __vxh_clr_vx_info(v, d, h)	do { } while (0)
++
++#define __vxh_claim_vx_info(v, d, h)	do { } while (0)
++#define __vxh_release_vx_info(v, d, h)	do { } while (0)
++
++#define vxh_alloc_vx_info(v)		do { } while (0)
++#define vxh_dealloc_vx_info(v)		do { } while (0)
++
++#define vxh_hash_vx_info(v)		do { } while (0)
++#define vxh_unhash_vx_info(v)		do { } while (0)
++
++#define vxh_loc_vx_info(v, l)		do { } while (0)
++#define vxh_lookup_vx_info(v, l)	do { } while (0)
++#define vxh_create_vx_info(v, l)	do { } while (0)
++
++#define vxh_dump_history()		do { } while (0)
++
++
++#endif /* CONFIG_VSERVER_HISTORY */
++
++#endif /* _VX_HISTORY_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/inode.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/inode.h
+--- linux-3.3.8/include/linux/vserver/inode.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/inode.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,39 @@
++#ifndef _VX_INODE_H
++#define _VX_INODE_H
++
++
++#define IATTR_TAG	0x01000000
++
++#define IATTR_ADMIN	0x00000001
++#define IATTR_WATCH	0x00000002
++#define IATTR_HIDE	0x00000004
++#define IATTR_FLAGS	0x00000007
++
++#define IATTR_BARRIER	0x00010000
++#define IATTR_IXUNLINK	0x00020000
++#define IATTR_IMMUTABLE 0x00040000
++#define IATTR_COW	0x00080000
++
++#ifdef	__KERNEL__
++
++
++#ifdef	CONFIG_VSERVER_PROC_SECURE
++#define IATTR_PROC_DEFAULT	( IATTR_ADMIN | IATTR_HIDE )
++#define IATTR_PROC_SYMLINK	( IATTR_ADMIN )
++#else
++#define IATTR_PROC_DEFAULT	( IATTR_ADMIN )
++#define IATTR_PROC_SYMLINK	( IATTR_ADMIN )
++#endif
++
++#define vx_hide_check(c, m)	(((m) & IATTR_HIDE) ? vx_check(c, m) : 1)
++
++#endif	/* __KERNEL__ */
++
++/* inode ioctls */
++
++#define FIOC_GETXFLG	_IOR('x', 5, long)
++#define FIOC_SETXFLG	_IOW('x', 6, long)
++
++#else	/* _VX_INODE_H */
++#warning duplicate inclusion
++#endif	/* _VX_INODE_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/inode_cmd.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/inode_cmd.h
+--- linux-3.3.8/include/linux/vserver/inode_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/inode_cmd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,59 @@
++#ifndef _VX_INODE_CMD_H
++#define _VX_INODE_CMD_H
++
++
++/*  inode vserver commands */
++
++#define VCMD_get_iattr		VC_CMD(INODE, 1, 1)
++#define VCMD_set_iattr		VC_CMD(INODE, 2, 1)
++
++#define VCMD_fget_iattr		VC_CMD(INODE, 3, 0)
++#define VCMD_fset_iattr		VC_CMD(INODE, 4, 0)
++
++struct	vcmd_ctx_iattr_v1 {
++	const char __user *name;
++	uint32_t tag;
++	uint32_t flags;
++	uint32_t mask;
++};
++
++struct	vcmd_ctx_fiattr_v0 {
++	uint32_t tag;
++	uint32_t flags;
++	uint32_t mask;
++};
++
++
++#ifdef	__KERNEL__
++
++
++#ifdef	CONFIG_COMPAT
++
++#include <asm/compat.h>
++
++struct	vcmd_ctx_iattr_v1_x32 {
++	compat_uptr_t name_ptr;
++	uint32_t tag;
++	uint32_t flags;
++	uint32_t mask;
++};
++
++#endif	/* CONFIG_COMPAT */
++
++#include <linux/compiler.h>
++
++extern int vc_get_iattr(void __user *);
++extern int vc_set_iattr(void __user *);
++
++extern int vc_fget_iattr(uint32_t, void __user *);
++extern int vc_fset_iattr(uint32_t, void __user *);
++
++#ifdef	CONFIG_COMPAT
++
++extern int vc_get_iattr_x32(void __user *);
++extern int vc_set_iattr_x32(void __user *);
++
++#endif	/* CONFIG_COMPAT */
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_INODE_CMD_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/limit.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/limit.h
+--- linux-3.3.8/include/linux/vserver/limit.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/limit.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,71 @@
++#ifndef _VX_LIMIT_H
++#define _VX_LIMIT_H
++
++#define VLIMIT_NSOCK	16
++#define VLIMIT_OPENFD	17
++#define VLIMIT_ANON	18
++#define VLIMIT_SHMEM	19
++#define VLIMIT_SEMARY	20
++#define VLIMIT_NSEMS	21
++#define VLIMIT_DENTRY	22
++#define VLIMIT_MAPPED	23
++
++
++#ifdef	__KERNEL__
++
++#define	VLIM_NOCHECK	((1L << VLIMIT_DENTRY) | (1L << RLIMIT_RSS))
++
++/*	keep in sync with CRLIM_INFINITY */
++
++#define	VLIM_INFINITY	(~0ULL)
++
++#include <asm/atomic.h>
++#include <asm/resource.h>
++
++#ifndef RLIM_INFINITY
++#warning RLIM_INFINITY is undefined
++#endif
++
++#define __rlim_val(l, r, v)	((l)->res[r].v)
++
++#define __rlim_soft(l, r)	__rlim_val(l, r, soft)
++#define __rlim_hard(l, r)	__rlim_val(l, r, hard)
++
++#define __rlim_rcur(l, r)	__rlim_val(l, r, rcur)
++#define __rlim_rmin(l, r)	__rlim_val(l, r, rmin)
++#define __rlim_rmax(l, r)	__rlim_val(l, r, rmax)
++
++#define __rlim_lhit(l, r)	__rlim_val(l, r, lhit)
++#define __rlim_hit(l, r)	atomic_inc(&__rlim_lhit(l, r))
++
++typedef atomic_long_t rlim_atomic_t;
++typedef unsigned long rlim_t;
++
++#define __rlim_get(l, r)	atomic_long_read(&__rlim_rcur(l, r))
++#define __rlim_set(l, r, v)	atomic_long_set(&__rlim_rcur(l, r), v)
++#define __rlim_inc(l, r)	atomic_long_inc(&__rlim_rcur(l, r))
++#define __rlim_dec(l, r)	atomic_long_dec(&__rlim_rcur(l, r))
++#define __rlim_add(l, r, v)	atomic_long_add(v, &__rlim_rcur(l, r))
++#define __rlim_sub(l, r, v)	atomic_long_sub(v, &__rlim_rcur(l, r))
++
++
++#if	(RLIM_INFINITY == VLIM_INFINITY)
++#define	VX_VLIM(r) ((long long)(long)(r))
++#define	VX_RLIM(v) ((rlim_t)(v))
++#else
++#define	VX_VLIM(r) (((r) == RLIM_INFINITY) \
++		? VLIM_INFINITY : (long long)(r))
++#define	VX_RLIM(v) (((v) == VLIM_INFINITY) \
++		? RLIM_INFINITY : (rlim_t)(v))
++#endif
++
++struct sysinfo;
++
++void vx_vsi_meminfo(struct sysinfo *);
++void vx_vsi_swapinfo(struct sysinfo *);
++long vx_vsi_cached(struct sysinfo *);
++
++#define NUM_LIMITS	24
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_LIMIT_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/limit_cmd.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/limit_cmd.h
+--- linux-3.3.8/include/linux/vserver/limit_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/limit_cmd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,71 @@
++#ifndef _VX_LIMIT_CMD_H
++#define _VX_LIMIT_CMD_H
++
++
++/*  rlimit vserver commands */
++
++#define VCMD_get_rlimit		VC_CMD(RLIMIT, 1, 0)
++#define VCMD_set_rlimit		VC_CMD(RLIMIT, 2, 0)
++#define VCMD_get_rlimit_mask	VC_CMD(RLIMIT, 3, 0)
++#define VCMD_reset_hits		VC_CMD(RLIMIT, 7, 0)
++#define VCMD_reset_minmax	VC_CMD(RLIMIT, 9, 0)
++
++struct	vcmd_ctx_rlimit_v0 {
++	uint32_t id;
++	uint64_t minimum;
++	uint64_t softlimit;
++	uint64_t maximum;
++};
++
++struct	vcmd_ctx_rlimit_mask_v0 {
++	uint32_t minimum;
++	uint32_t softlimit;
++	uint32_t maximum;
++};
++
++#define VCMD_rlimit_stat	VC_CMD(VSTAT, 1, 0)
++
++struct	vcmd_rlimit_stat_v0 {
++	uint32_t id;
++	uint32_t hits;
++	uint64_t value;
++	uint64_t minimum;
++	uint64_t maximum;
++};
++
++#define CRLIM_UNSET		(0ULL)
++#define CRLIM_INFINITY		(~0ULL)
++#define CRLIM_KEEP		(~1ULL)
++
++#ifdef	__KERNEL__
++
++#ifdef	CONFIG_IA32_EMULATION
++
++struct	vcmd_ctx_rlimit_v0_x32 {
++	uint32_t id;
++	uint64_t minimum;
++	uint64_t softlimit;
++	uint64_t maximum;
++} __attribute__ ((packed));
++
++#endif	/* CONFIG_IA32_EMULATION */
++
++#include <linux/compiler.h>
++
++extern int vc_get_rlimit_mask(uint32_t, void __user *);
++extern int vc_get_rlimit(struct vx_info *, void __user *);
++extern int vc_set_rlimit(struct vx_info *, void __user *);
++extern int vc_reset_hits(struct vx_info *, void __user *);
++extern int vc_reset_minmax(struct vx_info *, void __user *);
++
++extern int vc_rlimit_stat(struct vx_info *, void __user *);
++
++#ifdef	CONFIG_IA32_EMULATION
++
++extern int vc_get_rlimit_x32(struct vx_info *, void __user *);
++extern int vc_set_rlimit_x32(struct vx_info *, void __user *);
++
++#endif	/* CONFIG_IA32_EMULATION */
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_LIMIT_CMD_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/limit_def.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/limit_def.h
+--- linux-3.3.8/include/linux/vserver/limit_def.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/limit_def.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,47 @@
++#ifndef _VX_LIMIT_DEF_H
++#define _VX_LIMIT_DEF_H
++
++#include <asm/atomic.h>
++#include <asm/resource.h>
++
++#include "limit.h"
++
++
++struct _vx_res_limit {
++	rlim_t soft;		/* Context soft limit */
++	rlim_t hard;		/* Context hard limit */
++
++	rlim_atomic_t rcur;	/* Current value */
++	rlim_t rmin;		/* Context minimum */
++	rlim_t rmax;		/* Context maximum */
++
++	atomic_t lhit;		/* Limit hits */
++};
++
++/* context sub struct */
++
++struct _vx_limit {
++	struct _vx_res_limit res[NUM_LIMITS];
++};
++
++#ifdef CONFIG_VSERVER_DEBUG
++
++static inline void __dump_vx_limit(struct _vx_limit *limit)
++{
++	int i;
++
++	printk("\t_vx_limit:");
++	for (i = 0; i < NUM_LIMITS; i++) {
++		printk("\t [%2d] = %8lu %8lu/%8lu, %8ld/%8ld, %8d\n",
++			i, (unsigned long)__rlim_get(limit, i),
++			(unsigned long)__rlim_rmin(limit, i),
++			(unsigned long)__rlim_rmax(limit, i),
++			(long)__rlim_soft(limit, i),
++			(long)__rlim_hard(limit, i),
++			atomic_read(&__rlim_lhit(limit, i)));
++	}
++}
++
++#endif
++
++#endif	/* _VX_LIMIT_DEF_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/limit_int.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/limit_int.h
+--- linux-3.3.8/include/linux/vserver/limit_int.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/limit_int.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,198 @@
++#ifndef _VX_LIMIT_INT_H
++#define _VX_LIMIT_INT_H
++
++#include "context.h"
++
++#ifdef	__KERNEL__
++
++#define VXD_RCRES_COND(r)	VXD_CBIT(cres, r)
++#define VXD_RLIMIT_COND(r)	VXD_CBIT(limit, r)
++
++extern const char *vlimit_name[NUM_LIMITS];
++
++static inline void __vx_acc_cres(struct vx_info *vxi,
++	int res, int dir, void *_data, char *_file, int _line)
++{
++	if (VXD_RCRES_COND(res))
++		vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5ld%s (%p)",
++			(vxi ? vxi->vx_id : -1), vlimit_name[res], res,
++			(vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
++			(dir > 0) ? "++" : "--", _data, _file, _line);
++	if (!vxi)
++		return;
++
++	if (dir > 0)
++		__rlim_inc(&vxi->limit, res);
++	else
++		__rlim_dec(&vxi->limit, res);
++}
++
++static inline void __vx_add_cres(struct vx_info *vxi,
++	int res, int amount, void *_data, char *_file, int _line)
++{
++	if (VXD_RCRES_COND(res))
++		vxlprintk(1, "vx_add_cres[%5d,%s,%2d]: %5ld += %5d (%p)",
++			(vxi ? vxi->vx_id : -1), vlimit_name[res], res,
++			(vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
++			amount, _data, _file, _line);
++	if (amount == 0)
++		return;
++	if (!vxi)
++		return;
++	__rlim_add(&vxi->limit, res, amount);
++}
++
++static inline
++int __vx_cres_adjust_max(struct _vx_limit *limit, int res, rlim_t value)
++{
++	int cond = (value > __rlim_rmax(limit, res));
++
++	if (cond)
++		__rlim_rmax(limit, res) = value;
++	return cond;
++}
++
++static inline
++int __vx_cres_adjust_min(struct _vx_limit *limit, int res, rlim_t value)
++{
++	int cond = (value < __rlim_rmin(limit, res));
++
++	if (cond)
++		__rlim_rmin(limit, res) = value;
++	return cond;
++}
++
++static inline
++void __vx_cres_fixup(struct _vx_limit *limit, int res, rlim_t value)
++{
++	if (!__vx_cres_adjust_max(limit, res, value))
++		__vx_cres_adjust_min(limit, res, value);
++}
++
++
++/*	return values:
++	 +1 ... no limit hit
++	 -1 ... over soft limit
++	  0 ... over hard limit		*/
++
++static inline int __vx_cres_avail(struct vx_info *vxi,
++	int res, int num, char *_file, int _line)
++{
++	struct _vx_limit *limit;
++	rlim_t value;
++
++	if (VXD_RLIMIT_COND(res))
++		vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld/%5ld > %5ld + %5d",
++			(vxi ? vxi->vx_id : -1), vlimit_name[res], res,
++			(vxi ? (long)__rlim_soft(&vxi->limit, res) : -1),
++			(vxi ? (long)__rlim_hard(&vxi->limit, res) : -1),
++			(vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
++			num, _file, _line);
++	if (!vxi)
++		return 1;
++
++	limit = &vxi->limit;
++	value = __rlim_get(limit, res);
++
++	if (!__vx_cres_adjust_max(limit, res, value))
++		__vx_cres_adjust_min(limit, res, value);
++
++	if (num == 0)
++		return 1;
++
++	if (__rlim_soft(limit, res) == RLIM_INFINITY)
++		return -1;
++	if (value + num <= __rlim_soft(limit, res))
++		return -1;
++
++	if (__rlim_hard(limit, res) == RLIM_INFINITY)
++		return 1;
++	if (value + num <= __rlim_hard(limit, res))
++		return 1;
++
++	__rlim_hit(limit, res);
++	return 0;
++}
++
++
++static const int VLA_RSS[] = { RLIMIT_RSS, VLIMIT_ANON, VLIMIT_MAPPED, 0 };
++
++static inline
++rlim_t __vx_cres_array_sum(struct _vx_limit *limit, const int *array)
++{
++	rlim_t value, sum = 0;
++	int res;
++
++	while ((res = *array++)) {
++		value = __rlim_get(limit, res);
++		__vx_cres_fixup(limit, res, value);
++		sum += value;
++	}
++	return sum;
++}
++
++static inline
++rlim_t __vx_cres_array_fixup(struct _vx_limit *limit, const int *array)
++{
++	rlim_t value = __vx_cres_array_sum(limit, array + 1);
++	int res = *array;
++
++	if (value == __rlim_get(limit, res))
++		return value;
++
++	__rlim_set(limit, res, value);
++	/* now adjust min/max */
++	if (!__vx_cres_adjust_max(limit, res, value))
++		__vx_cres_adjust_min(limit, res, value);
++
++	return value;
++}
++
++static inline int __vx_cres_array_avail(struct vx_info *vxi,
++	const int *array, int num, char *_file, int _line)
++{
++	struct _vx_limit *limit;
++	rlim_t value = 0;
++	int res;
++
++	if (num == 0)
++		return 1;
++	if (!vxi)
++		return 1;
++
++	limit = &vxi->limit;
++	res = *array;
++	value = __vx_cres_array_sum(limit, array + 1);
++
++	__rlim_set(limit, res, value);
++	__vx_cres_fixup(limit, res, value);
++
++	return __vx_cres_avail(vxi, res, num, _file, _line);
++}
++
++
++static inline void vx_limit_fixup(struct _vx_limit *limit, int id)
++{
++	rlim_t value;
++	int res;
++
++	/* complex resources first */
++	if ((id < 0) || (id == RLIMIT_RSS))
++		__vx_cres_array_fixup(limit, VLA_RSS);
++
++	for (res = 0; res < NUM_LIMITS; res++) {
++		if ((id > 0) && (res != id))
++			continue;
++
++		value = __rlim_get(limit, res);
++		__vx_cres_fixup(limit, res, value);
++
++		/* not supposed to happen, maybe warn? */
++		if (__rlim_rmax(limit, res) > __rlim_hard(limit, res))
++			__rlim_rmax(limit, res) = __rlim_hard(limit, res);
++	}
++}
++
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_LIMIT_INT_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/monitor.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/monitor.h
+--- linux-3.3.8/include/linux/vserver/monitor.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/monitor.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,96 @@
++#ifndef _VX_MONITOR_H
++#define _VX_MONITOR_H
++
++#include <linux/types.h>
++
++enum {
++	VXM_UNUSED = 0,
++
++	VXM_SYNC = 0x10,
++
++	VXM_UPDATE = 0x20,
++	VXM_UPDATE_1,
++	VXM_UPDATE_2,
++
++	VXM_RQINFO_1 = 0x24,
++	VXM_RQINFO_2,
++
++	VXM_ACTIVATE = 0x40,
++	VXM_DEACTIVATE,
++	VXM_IDLE,
++
++	VXM_HOLD = 0x44,
++	VXM_UNHOLD,
++
++	VXM_MIGRATE = 0x48,
++	VXM_RESCHED,
++
++	/* all other bits are flags */
++	VXM_SCHED = 0x80,
++};
++
++struct _vxm_update_1 {
++	uint32_t tokens_max;
++	uint32_t fill_rate;
++	uint32_t interval;
++};
++
++struct _vxm_update_2 {
++	uint32_t tokens_min;
++	uint32_t fill_rate;
++	uint32_t interval;
++};
++
++struct _vxm_rqinfo_1 {
++	uint16_t running;
++	uint16_t onhold;
++	uint16_t iowait;
++	uint16_t uintr;
++	uint32_t idle_tokens;
++};
++
++struct _vxm_rqinfo_2 {
++	uint32_t norm_time;
++	uint32_t idle_time;
++	uint32_t idle_skip;
++};
++
++struct _vxm_sched {
++	uint32_t tokens;
++	uint32_t norm_time;
++	uint32_t idle_time;
++};
++
++struct _vxm_task {
++	uint16_t pid;
++	uint16_t state;
++};
++
++struct _vxm_event {
++	uint32_t jif;
++	union {
++		uint32_t seq;
++		uint32_t sec;
++	};
++	union {
++		uint32_t tokens;
++		uint32_t nsec;
++		struct _vxm_task tsk;
++	};
++};
++
++struct _vx_mon_entry {
++	uint16_t type;
++	uint16_t xid;
++	union {
++		struct _vxm_event ev;
++		struct _vxm_sched sd;
++		struct _vxm_update_1 u1;
++		struct _vxm_update_2 u2;
++		struct _vxm_rqinfo_1 q1;
++		struct _vxm_rqinfo_2 q2;
++	};
++};
++
++
++#endif /* _VX_MONITOR_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/network.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/network.h
+--- linux-3.3.8/include/linux/vserver/network.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/network.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,148 @@
++#ifndef _VX_NETWORK_H
++#define _VX_NETWORK_H
++
++#include <linux/types.h>
++
++
++#define MAX_N_CONTEXT	65535	/* Arbitrary limit */
++
++
++/* network flags */
++
++#define NXF_INFO_PRIVATE	0x00000008
++
++#define NXF_SINGLE_IP		0x00000100
++#define NXF_LBACK_REMAP		0x00000200
++#define NXF_LBACK_ALLOW		0x00000400
++
++#define NXF_HIDE_NETIF		0x02000000
++#define NXF_HIDE_LBACK		0x04000000
++
++#define NXF_STATE_SETUP		(1ULL << 32)
++#define NXF_STATE_ADMIN		(1ULL << 34)
++
++#define NXF_SC_HELPER		(1ULL << 36)
++#define NXF_PERSISTENT		(1ULL << 38)
++
++#define NXF_ONE_TIME		(0x0005ULL << 32)
++
++
++#define	NXF_INIT_SET		(__nxf_init_set())
++
++static inline uint64_t __nxf_init_set(void) {
++	return	  NXF_STATE_ADMIN
++#ifdef	CONFIG_VSERVER_AUTO_LBACK
++		| NXF_LBACK_REMAP
++		| NXF_HIDE_LBACK
++#endif
++#ifdef	CONFIG_VSERVER_AUTO_SINGLE
++		| NXF_SINGLE_IP
++#endif
++		| NXF_HIDE_NETIF;
++}
++
++
++/* network caps */
++
++#define NXC_TUN_CREATE		0x00000001
++
++#define NXC_RAW_ICMP		0x00000100
++
++#define NXC_MULTICAST		0x00001000
++
++
++/* address types */
++
++#define NXA_TYPE_IPV4		0x0001
++#define NXA_TYPE_IPV6		0x0002
++
++#define NXA_TYPE_NONE		0x0000
++#define NXA_TYPE_ANY		0x00FF
++
++#define NXA_TYPE_ADDR		0x0010
++#define NXA_TYPE_MASK		0x0020
++#define NXA_TYPE_RANGE		0x0040
++
++#define NXA_MASK_ALL		(NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE)
++
++#define NXA_MOD_BCAST		0x0100
++#define NXA_MOD_LBACK		0x0200
++
++#define NXA_LOOPBACK		0x1000
++
++#define NXA_MASK_BIND		(NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK)
++#define NXA_MASK_SHOW		(NXA_MASK_ALL | NXA_LOOPBACK)
++
++#ifdef	__KERNEL__
++
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/rcupdate.h>
++#include <linux/in.h>
++#include <linux/in6.h>
++#include <asm/atomic.h>
++
++struct nx_addr_v4 {
++	struct nx_addr_v4 *next;
++	struct in_addr ip[2];
++	struct in_addr mask;
++	uint16_t type;
++	uint16_t flags;
++};
++
++struct nx_addr_v6 {
++	struct nx_addr_v6 *next;
++	struct in6_addr ip;
++	struct in6_addr mask;
++	uint32_t prefix;
++	uint16_t type;
++	uint16_t flags;
++};
++
++struct nx_info {
++	struct hlist_node nx_hlist;	/* linked list of nxinfos */
++	nid_t nx_id;			/* vnet id */
++	atomic_t nx_usecnt;		/* usage count */
++	atomic_t nx_tasks;		/* tasks count */
++	int nx_state;			/* context state */
++
++	uint64_t nx_flags;		/* network flag word */
++	uint64_t nx_ncaps;		/* network capabilities */
++
++	struct in_addr v4_lback;	/* Loopback address */
++	struct in_addr v4_bcast;	/* Broadcast address */
++	struct nx_addr_v4 v4;		/* First/Single ipv4 address */
++#ifdef	CONFIG_IPV6
++	struct nx_addr_v6 v6;		/* First/Single ipv6 address */
++#endif
++	char nx_name[65];		/* network context name */
++};
++
++
++/* status flags */
++
++#define NXS_HASHED      0x0001
++#define NXS_SHUTDOWN    0x0100
++#define NXS_RELEASED    0x8000
++
++extern struct nx_info *lookup_nx_info(int);
++
++extern int get_nid_list(int, unsigned int *, int);
++extern int nid_is_hashed(nid_t);
++
++extern int nx_migrate_task(struct task_struct *, struct nx_info *);
++
++extern long vs_net_change(struct nx_info *, unsigned int);
++
++struct sock;
++
++
++#define NX_IPV4(n)	((n)->v4.type != NXA_TYPE_NONE)
++#ifdef  CONFIG_IPV6
++#define NX_IPV6(n)	((n)->v6.type != NXA_TYPE_NONE)
++#else
++#define NX_IPV6(n)	(0)
++#endif
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_NETWORK_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/network_cmd.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/network_cmd.h
+--- linux-3.3.8/include/linux/vserver/network_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/network_cmd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,164 @@
++#ifndef _VX_NETWORK_CMD_H
++#define _VX_NETWORK_CMD_H
++
++
++/* vinfo commands */
++
++#define VCMD_task_nid		VC_CMD(VINFO, 2, 0)
++
++#ifdef	__KERNEL__
++extern int vc_task_nid(uint32_t);
++
++#endif	/* __KERNEL__ */
++
++#define VCMD_nx_info		VC_CMD(VINFO, 6, 0)
++
++struct	vcmd_nx_info_v0 {
++	uint32_t nid;
++	/* more to come */
++};
++
++#ifdef	__KERNEL__
++extern int vc_nx_info(struct nx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++#include <linux/in.h>
++#include <linux/in6.h>
++
++#define VCMD_net_create_v0	VC_CMD(VNET, 1, 0)
++#define VCMD_net_create		VC_CMD(VNET, 1, 1)
++
++struct  vcmd_net_create {
++	uint64_t flagword;
++};
++
++#define VCMD_net_migrate	VC_CMD(NETMIG, 1, 0)
++
++#define VCMD_net_add		VC_CMD(NETALT, 1, 0)
++#define VCMD_net_remove		VC_CMD(NETALT, 2, 0)
++
++struct	vcmd_net_addr_v0 {
++	uint16_t type;
++	uint16_t count;
++	struct in_addr ip[4];
++	struct in_addr mask[4];
++};
++
++#define VCMD_net_add_ipv4_v1	VC_CMD(NETALT, 1, 1)
++#define VCMD_net_rem_ipv4_v1	VC_CMD(NETALT, 2, 1)
++
++struct	vcmd_net_addr_ipv4_v1 {
++	uint16_t type;
++	uint16_t flags;
++	struct in_addr ip;
++	struct in_addr mask;
++};
++
++#define VCMD_net_add_ipv4	VC_CMD(NETALT, 1, 2)
++#define VCMD_net_rem_ipv4	VC_CMD(NETALT, 2, 2)
++
++struct	vcmd_net_addr_ipv4_v2 {
++	uint16_t type;
++	uint16_t flags;
++	struct in_addr ip;
++	struct in_addr ip2;
++	struct in_addr mask;
++};
++
++#define VCMD_net_add_ipv6	VC_CMD(NETALT, 3, 1)
++#define VCMD_net_remove_ipv6	VC_CMD(NETALT, 4, 1)
++
++struct	vcmd_net_addr_ipv6_v1 {
++	uint16_t type;
++	uint16_t flags;
++	uint32_t prefix;
++	struct in6_addr ip;
++	struct in6_addr mask;
++};
++
++#define VCMD_add_match_ipv4	VC_CMD(NETALT, 5, 0)
++#define VCMD_get_match_ipv4	VC_CMD(NETALT, 6, 0)
++
++struct	vcmd_match_ipv4_v0 {
++	uint16_t type;
++	uint16_t flags;
++	uint16_t parent;
++	uint16_t prefix;
++	struct in_addr ip;
++	struct in_addr ip2;
++	struct in_addr mask;
++};
++
++#define VCMD_add_match_ipv6	VC_CMD(NETALT, 7, 0)
++#define VCMD_get_match_ipv6	VC_CMD(NETALT, 8, 0)
++
++struct	vcmd_match_ipv6_v0 {
++	uint16_t type;
++	uint16_t flags;
++	uint16_t parent;
++	uint16_t prefix;
++	struct in6_addr ip;
++	struct in6_addr ip2;
++	struct in6_addr mask;
++};
++
++
++#ifdef	__KERNEL__
++extern int vc_net_create(uint32_t, void __user *);
++extern int vc_net_migrate(struct nx_info *, void __user *);
++
++extern int vc_net_add(struct nx_info *, void __user *);
++extern int vc_net_remove(struct nx_info *, void __user *);
++
++extern int vc_net_add_ipv4_v1(struct nx_info *, void __user *);
++extern int vc_net_add_ipv4(struct nx_info *, void __user *);
++
++extern int vc_net_rem_ipv4_v1(struct nx_info *, void __user *);
++extern int vc_net_rem_ipv4(struct nx_info *, void __user *);
++
++extern int vc_net_add_ipv6(struct nx_info *, void __user *);
++extern int vc_net_remove_ipv6(struct nx_info *, void __user *);
++
++extern int vc_add_match_ipv4(struct nx_info *, void __user *);
++extern int vc_get_match_ipv4(struct nx_info *, void __user *);
++
++extern int vc_add_match_ipv6(struct nx_info *, void __user *);
++extern int vc_get_match_ipv6(struct nx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* flag commands */
++
++#define VCMD_get_nflags		VC_CMD(FLAGS, 5, 0)
++#define VCMD_set_nflags		VC_CMD(FLAGS, 6, 0)
++
++struct	vcmd_net_flags_v0 {
++	uint64_t flagword;
++	uint64_t mask;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_nflags(struct nx_info *, void __user *);
++extern int vc_set_nflags(struct nx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++
++/* network caps commands */
++
++#define VCMD_get_ncaps		VC_CMD(FLAGS, 7, 0)
++#define VCMD_set_ncaps		VC_CMD(FLAGS, 8, 0)
++
++struct	vcmd_net_caps_v0 {
++	uint64_t ncaps;
++	uint64_t cmask;
++};
++
++#ifdef	__KERNEL__
++extern int vc_get_ncaps(struct nx_info *, void __user *);
++extern int vc_set_ncaps(struct nx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_CONTEXT_CMD_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/percpu.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/percpu.h
+--- linux-3.3.8/include/linux/vserver/percpu.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/percpu.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,14 @@
++#ifndef _VX_PERCPU_H
++#define _VX_PERCPU_H
++
++#include "cvirt_def.h"
++#include "sched_def.h"
++
++struct	_vx_percpu {
++	struct _vx_cvirt_pc cvirt;
++	struct _vx_sched_pc sched;
++};
++
++#define	PERCPU_PERCTX	(sizeof(struct _vx_percpu))
++
++#endif	/* _VX_PERCPU_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/pid.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/pid.h
+--- linux-3.3.8/include/linux/vserver/pid.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/pid.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,51 @@
++#ifndef _VSERVER_PID_H
++#define _VSERVER_PID_H
++
++/* pid faking stuff */
++
++#define vx_info_map_pid(v, p) \
++	__vx_info_map_pid((v), (p), __func__, __FILE__, __LINE__)
++#define vx_info_map_tgid(v,p)  vx_info_map_pid(v,p)
++#define vx_map_pid(p) vx_info_map_pid(current_vx_info(), p)
++#define vx_map_tgid(p) vx_map_pid(p)
++
++static inline int __vx_info_map_pid(struct vx_info *vxi, int pid,
++	const char *func, const char *file, int line)
++{
++	if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
++		vxfprintk(VXD_CBIT(cvirt, 2),
++			"vx_map_tgid: %p/%llx: %d -> %d",
++			vxi, (long long)vxi->vx_flags, pid,
++			(pid && pid == vxi->vx_initpid) ? 1 : pid,
++			func, file, line);
++		if (pid == 0)
++			return 0;
++		if (pid == vxi->vx_initpid)
++			return 1;
++	}
++	return pid;
++}
++
++#define vx_info_rmap_pid(v, p) \
++	__vx_info_rmap_pid((v), (p), __func__, __FILE__, __LINE__)
++#define vx_rmap_pid(p) vx_info_rmap_pid(current_vx_info(), p)
++#define vx_rmap_tgid(p) vx_rmap_pid(p)
++
++static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid,
++	const char *func, const char *file, int line)
++{
++	if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
++		vxfprintk(VXD_CBIT(cvirt, 2),
++			"vx_rmap_tgid: %p/%llx: %d -> %d",
++			vxi, (long long)vxi->vx_flags, pid,
++			(pid == 1) ? vxi->vx_initpid : pid,
++			func, file, line);
++		if ((pid == 1) && vxi->vx_initpid)
++			return vxi->vx_initpid;
++		if (pid == vxi->vx_initpid)
++			return ~0U;
++	}
++	return pid;
++}
++
++#endif
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/sched.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/sched.h
+--- linux-3.3.8/include/linux/vserver/sched.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/sched.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,23 @@
++#ifndef _VX_SCHED_H
++#define _VX_SCHED_H
++
++
++#ifdef	__KERNEL__
++
++struct timespec;
++
++void vx_vsi_uptime(struct timespec *, struct timespec *);
++
++
++struct vx_info;
++
++void vx_update_load(struct vx_info *);
++
++
++void vx_update_sched_param(struct _vx_sched *sched,
++	struct _vx_sched_pc *sched_pc);
++
++#endif	/* __KERNEL__ */
++#else	/* _VX_SCHED_H */
++#warning duplicate inclusion
++#endif	/* _VX_SCHED_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/sched_cmd.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/sched_cmd.h
+--- linux-3.3.8/include/linux/vserver/sched_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/sched_cmd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,21 @@
++#ifndef _VX_SCHED_CMD_H
++#define _VX_SCHED_CMD_H
++
++
++struct	vcmd_prio_bias {
++	int32_t cpu_id;
++	int32_t prio_bias;
++};
++
++#define VCMD_set_prio_bias	VC_CMD(SCHED, 4, 0)
++#define VCMD_get_prio_bias	VC_CMD(SCHED, 5, 0)
++
++#ifdef	__KERNEL__
++
++#include <linux/compiler.h>
++
++extern int vc_set_prio_bias(struct vx_info *, void __user *);
++extern int vc_get_prio_bias(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_SCHED_CMD_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/sched_def.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/sched_def.h
+--- linux-3.3.8/include/linux/vserver/sched_def.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/sched_def.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,38 @@
++#ifndef _VX_SCHED_DEF_H
++#define _VX_SCHED_DEF_H
++
++#include <linux/spinlock.h>
++#include <linux/jiffies.h>
++#include <linux/cpumask.h>
++#include <asm/atomic.h>
++#include <asm/param.h>
++
++
++/* context sub struct */
++
++struct _vx_sched {
++	int prio_bias;			/* bias offset for priority */
++
++	cpumask_t update;		/* CPUs which should update */
++};
++
++struct _vx_sched_pc {
++	int prio_bias;			/* bias offset for priority */
++
++	uint64_t user_ticks;		/* token tick events */
++	uint64_t sys_ticks;		/* token tick events */
++	uint64_t hold_ticks;		/* token ticks paused */
++};
++
++
++#ifdef CONFIG_VSERVER_DEBUG
++
++static inline void __dump_vx_sched(struct _vx_sched *sched)
++{
++	printk("\t_vx_sched:\n");
++	printk("\t priority = %4d\n", sched->prio_bias);
++}
++
++#endif
++
++#endif	/* _VX_SCHED_DEF_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/signal.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/signal.h
+--- linux-3.3.8/include/linux/vserver/signal.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/signal.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,14 @@
++#ifndef _VX_SIGNAL_H
++#define _VX_SIGNAL_H
++
++
++#ifdef	__KERNEL__
++
++struct vx_info;
++
++int vx_info_kill(struct vx_info *, int, int);
++
++#endif	/* __KERNEL__ */
++#else	/* _VX_SIGNAL_H */
++#warning duplicate inclusion
++#endif	/* _VX_SIGNAL_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/signal_cmd.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/signal_cmd.h
+--- linux-3.3.8/include/linux/vserver/signal_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/signal_cmd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,43 @@
++#ifndef _VX_SIGNAL_CMD_H
++#define _VX_SIGNAL_CMD_H
++
++
++/*  signalling vserver commands */
++
++#define VCMD_ctx_kill		VC_CMD(PROCTRL, 1, 0)
++#define VCMD_wait_exit		VC_CMD(EVENT, 99, 0)
++
++struct	vcmd_ctx_kill_v0 {
++	int32_t pid;
++	int32_t sig;
++};
++
++struct	vcmd_wait_exit_v0 {
++	int32_t reboot_cmd;
++	int32_t exit_code;
++};
++
++#ifdef	__KERNEL__
++
++extern int vc_ctx_kill(struct vx_info *, void __user *);
++extern int vc_wait_exit(struct vx_info *, void __user *);
++
++#endif	/* __KERNEL__ */
++
++/*  process alteration commands */
++
++#define VCMD_get_pflags		VC_CMD(PROCALT, 5, 0)
++#define VCMD_set_pflags		VC_CMD(PROCALT, 6, 0)
++
++struct	vcmd_pflags_v0 {
++	uint32_t flagword;
++	uint32_t mask;
++};
++
++#ifdef	__KERNEL__
++
++extern int vc_get_pflags(uint32_t pid, void __user *);
++extern int vc_set_pflags(uint32_t pid, void __user *);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_SIGNAL_CMD_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/space.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/space.h
+--- linux-3.3.8/include/linux/vserver/space.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/space.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,12 @@
++#ifndef _VX_SPACE_H
++#define _VX_SPACE_H
++
++#include <linux/types.h>
++
++struct vx_info;
++
++int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index);
++
++#else	/* _VX_SPACE_H */
++#warning duplicate inclusion
++#endif	/* _VX_SPACE_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/space_cmd.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/space_cmd.h
+--- linux-3.3.8/include/linux/vserver/space_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/space_cmd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,38 @@
++#ifndef _VX_SPACE_CMD_H
++#define _VX_SPACE_CMD_H
++
++
++#define VCMD_enter_space_v0	VC_CMD(PROCALT, 1, 0)
++#define VCMD_enter_space_v1	VC_CMD(PROCALT, 1, 1)
++#define VCMD_enter_space	VC_CMD(PROCALT, 1, 2)
++
++#define VCMD_set_space_v0	VC_CMD(PROCALT, 3, 0)
++#define VCMD_set_space_v1	VC_CMD(PROCALT, 3, 1)
++#define VCMD_set_space		VC_CMD(PROCALT, 3, 2)
++
++#define VCMD_get_space_mask_v0	VC_CMD(PROCALT, 4, 0)
++
++#define VCMD_get_space_mask	VC_CMD(VSPACE, 0, 1)
++#define VCMD_get_space_default	VC_CMD(VSPACE, 1, 0)
++
++
++struct	vcmd_space_mask_v1 {
++	uint64_t mask;
++};
++
++struct	vcmd_space_mask_v2 {
++	uint64_t mask;
++	uint32_t index;
++};
++
++
++#ifdef	__KERNEL__
++
++extern int vc_enter_space_v1(struct vx_info *, void __user *);
++extern int vc_set_space_v1(struct vx_info *, void __user *);
++extern int vc_enter_space(struct vx_info *, void __user *);
++extern int vc_set_space(struct vx_info *, void __user *);
++extern int vc_get_space_mask(void __user *, int);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_SPACE_CMD_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/switch.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/switch.h
+--- linux-3.3.8/include/linux/vserver/switch.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/switch.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,98 @@
++#ifndef _VX_SWITCH_H
++#define _VX_SWITCH_H
++
++#include <linux/types.h>
++
++
++#define VC_CATEGORY(c)		(((c) >> 24) & 0x3F)
++#define VC_COMMAND(c)		(((c) >> 16) & 0xFF)
++#define VC_VERSION(c)		((c) & 0xFFF)
++
++#define VC_CMD(c, i, v)		((((VC_CAT_ ## c) & 0x3F) << 24) \
++				| (((i) & 0xFF) << 16) | ((v) & 0xFFF))
++
++/*
++
++  Syscall Matrix V2.8
++
++	 |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
++	 |STATS  |DESTROY|ALTER  |CHANGE |LIMIT  |TEST   | |       |       |
++	 |INFO   |SETUP  |       |MOVE   |       |       | |       |       |
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++  SYSTEM |VERSION|VSETUP |VHOST  |       |       |       | |DEVICE |       |
++  HOST   |     00|     01|     02|     03|     04|     05| |     06|     07|
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++  CPU    |       |VPROC  |PROCALT|PROCMIG|PROCTRL|       | |SCHED. |       |
++  PROCESS|     08|     09|     10|     11|     12|     13| |     14|     15|
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++  MEMORY |       |       |       |       |MEMCTRL|       | |SWAP   |       |
++	 |     16|     17|     18|     19|     20|     21| |     22|     23|
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++  NETWORK|       |VNET   |NETALT |NETMIG |NETCTL |       | |SERIAL |       |
++	 |     24|     25|     26|     27|     28|     29| |     30|     31|
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++  DISK   |       |       |       |TAGMIG |DLIMIT |       | |INODE  |       |
++  VFS    |     32|     33|     34|     35|     36|     37| |     38|     39|
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++  OTHER  |VSTAT  |       |       |       |       |       | |VINFO  |       |
++	 |     40|     41|     42|     43|     44|     45| |     46|     47|
++  =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
++  SPECIAL|EVENT  |       |       |       |FLAGS  |       | |VSPACE |       |
++	 |     48|     49|     50|     51|     52|     53| |     54|     55|
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++  SPECIAL|DEBUG  |       |       |       |RLIMIT |SYSCALL| |       |COMPAT |
++	 |     56|     57|     58|     59|     60|TEST 61| |     62|     63|
++  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
++
++*/
++
++#define VC_CAT_VERSION		0
++
++#define VC_CAT_VSETUP		1
++#define VC_CAT_VHOST		2
++
++#define VC_CAT_DEVICE		6
++
++#define VC_CAT_VPROC		9
++#define VC_CAT_PROCALT		10
++#define VC_CAT_PROCMIG		11
++#define VC_CAT_PROCTRL		12
++
++#define VC_CAT_SCHED		14
++#define VC_CAT_MEMCTRL		20
++
++#define VC_CAT_VNET		25
++#define VC_CAT_NETALT		26
++#define VC_CAT_NETMIG		27
++#define VC_CAT_NETCTRL		28
++
++#define VC_CAT_TAGMIG		35
++#define VC_CAT_DLIMIT		36
++#define VC_CAT_INODE		38
++
++#define VC_CAT_VSTAT		40
++#define VC_CAT_VINFO		46
++#define VC_CAT_EVENT		48
++
++#define VC_CAT_FLAGS		52
++#define VC_CAT_VSPACE		54
++#define VC_CAT_DEBUG		56
++#define VC_CAT_RLIMIT		60
++
++#define VC_CAT_SYSTEST		61
++#define VC_CAT_COMPAT		63
++
++/*  query version */
++
++#define VCMD_get_version	VC_CMD(VERSION, 0, 0)
++#define VCMD_get_vci		VC_CMD(VERSION, 1, 0)
++
++
++#ifdef	__KERNEL__
++
++#include <linux/errno.h>
++
++#endif	/* __KERNEL__ */
++
++#endif	/* _VX_SWITCH_H */
++
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/tag.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/tag.h
+--- linux-3.3.8/include/linux/vserver/tag.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/tag.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,143 @@
++#ifndef _DX_TAG_H
++#define _DX_TAG_H
++
++#include <linux/types.h>
++
++
++#define DX_TAG(in)	(IS_TAGGED(in))
++
++
++#ifdef CONFIG_TAG_NFSD
++#define DX_TAG_NFSD	1
++#else
++#define DX_TAG_NFSD	0
++#endif
++
++
++#ifdef CONFIG_TAGGING_NONE
++
++#define MAX_UID		0xFFFFFFFF
++#define MAX_GID		0xFFFFFFFF
++
++#define INOTAG_TAG(cond, uid, gid, tag)	(0)
++
++#define TAGINO_UID(cond, uid, tag)	(uid)
++#define TAGINO_GID(cond, gid, tag)	(gid)
++
++#endif
++
++
++#ifdef CONFIG_TAGGING_GID16
++
++#define MAX_UID		0xFFFFFFFF
++#define MAX_GID		0x0000FFFF
++
++#define INOTAG_TAG(cond, uid, gid, tag)	\
++	((cond) ? (((gid) >> 16) & 0xFFFF) : 0)
++
++#define TAGINO_UID(cond, uid, tag)	(uid)
++#define TAGINO_GID(cond, gid, tag)	\
++	((cond) ? (((gid) & 0xFFFF) | ((tag) << 16)) : (gid))
++
++#endif
++
++
++#ifdef CONFIG_TAGGING_ID24
++
++#define MAX_UID		0x00FFFFFF
++#define MAX_GID		0x00FFFFFF
++
++#define INOTAG_TAG(cond, uid, gid, tag)	\
++	((cond) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0)
++
++#define TAGINO_UID(cond, uid, tag)	\
++	((cond) ? (((uid) & 0xFFFFFF) | (((tag) & 0xFF00) << 16)) : (uid))
++#define TAGINO_GID(cond, gid, tag)	\
++	((cond) ? (((gid) & 0xFFFFFF) | (((tag) & 0x00FF) << 24)) : (gid))
++
++#endif
++
++
++#ifdef CONFIG_TAGGING_UID16
++
++#define MAX_UID		0x0000FFFF
++#define MAX_GID		0xFFFFFFFF
++
++#define INOTAG_TAG(cond, uid, gid, tag)	\
++	((cond) ? (((uid) >> 16) & 0xFFFF) : 0)
++
++#define TAGINO_UID(cond, uid, tag)	\
++	((cond) ? (((uid) & 0xFFFF) | ((tag) << 16)) : (uid))
++#define TAGINO_GID(cond, gid, tag)	(gid)
++
++#endif
++
++
++#ifdef CONFIG_TAGGING_INTERN
++
++#define MAX_UID		0xFFFFFFFF
++#define MAX_GID		0xFFFFFFFF
++
++#define INOTAG_TAG(cond, uid, gid, tag)	\
++	((cond) ? (tag) : 0)
++
++#define TAGINO_UID(cond, uid, tag)	(uid)
++#define TAGINO_GID(cond, gid, tag)	(gid)
++
++#endif
++
++
++#ifndef CONFIG_TAGGING_NONE
++#define dx_current_fstag(sb)	\
++	((sb)->s_flags & MS_TAGGED ? dx_current_tag() : 0)
++#else
++#define dx_current_fstag(sb)	(0)
++#endif
++
++#ifndef CONFIG_TAGGING_INTERN
++#define TAGINO_TAG(cond, tag)	(0)
++#else
++#define TAGINO_TAG(cond, tag)	((cond) ? (tag) : 0)
++#endif
++
++#define INOTAG_UID(cond, uid, gid)	\
++	((cond) ? ((uid) & MAX_UID) : (uid))
++#define INOTAG_GID(cond, uid, gid)	\
++	((cond) ? ((gid) & MAX_GID) : (gid))
++
++
++static inline uid_t dx_map_uid(uid_t uid)
++{
++	if ((uid > MAX_UID) && (uid != -1))
++		uid = -2;
++	return (uid & MAX_UID);
++}
++
++static inline gid_t dx_map_gid(gid_t gid)
++{
++	if ((gid > MAX_GID) && (gid != -1))
++		gid = -2;
++	return (gid & MAX_GID);
++}
++
++struct peer_tag {
++	int32_t xid;
++	int32_t nid;
++};
++
++#define dx_notagcheck(sb) ((sb) && ((sb)->s_flags & MS_NOTAGCHECK))
++
++int dx_parse_tag(char *string, tag_t *tag, int remove, int *mnt_flags,
++		 unsigned long *flags);
++
++#ifdef	CONFIG_PROPAGATE
++
++void __dx_propagate_tag(struct nameidata *nd, struct inode *inode);
++
++#define dx_propagate_tag(n, i)	__dx_propagate_tag(n, i)
++
++#else
++#define dx_propagate_tag(n, i)	do { } while (0)
++#endif
++
++#endif /* _DX_TAG_H */
+diff -NurpP --minimal linux-3.3.8/include/linux/vserver/tag_cmd.h linux-3.3.8-vs2.3.3.4/include/linux/vserver/tag_cmd.h
+--- linux-3.3.8/include/linux/vserver/tag_cmd.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/linux/vserver/tag_cmd.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,22 @@
++#ifndef _VX_TAG_CMD_H
++#define _VX_TAG_CMD_H
++
++
++/* vinfo commands */
++
++#define VCMD_task_tag		VC_CMD(VINFO, 3, 0)
++
++#ifdef	__KERNEL__
++extern int vc_task_tag(uint32_t);
++
++#endif	/* __KERNEL__ */
++
++/* context commands */
++
++#define VCMD_tag_migrate	VC_CMD(TAGMIG, 1, 0)
++
++#ifdef	__KERNEL__
++extern int vc_tag_migrate(uint32_t);
++
++#endif	/* __KERNEL__ */
++#endif	/* _VX_TAG_CMD_H */
+diff -NurpP --minimal linux-3.3.8/include/net/addrconf.h linux-3.3.8-vs2.3.3.4/include/net/addrconf.h
+--- linux-3.3.8/include/net/addrconf.h	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/net/addrconf.h	2012-02-24 03:55:06.000000000 +0100
+@@ -80,7 +80,8 @@ extern int			ipv6_dev_get_saddr(struct n
+ 					       struct net_device *dev,
+ 					       const struct in6_addr *daddr,
+ 					       unsigned int srcprefs,
+-					       struct in6_addr *saddr);
++					       struct in6_addr *saddr,
++					       struct nx_info *nxi);
+ extern int			ipv6_get_lladdr(struct net_device *dev,
+ 						struct in6_addr *addr,
+ 						unsigned char banned_flags);
+diff -NurpP --minimal linux-3.3.8/include/net/af_unix.h linux-3.3.8-vs2.3.3.4/include/net/af_unix.h
+--- linux-3.3.8/include/net/af_unix.h	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/net/af_unix.h	2012-02-24 03:55:06.000000000 +0100
+@@ -4,6 +4,7 @@
+ #include <linux/socket.h>
+ #include <linux/un.h>
+ #include <linux/mutex.h>
++#include <linux/vs_base.h>
+ #include <net/sock.h>
+ 
+ extern void unix_inflight(struct file *fp);
+diff -NurpP --minimal linux-3.3.8/include/net/inet_timewait_sock.h linux-3.3.8-vs2.3.3.4/include/net/inet_timewait_sock.h
+--- linux-3.3.8/include/net/inet_timewait_sock.h	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/net/inet_timewait_sock.h	2012-02-24 03:55:06.000000000 +0100
+@@ -112,6 +112,10 @@ struct inet_timewait_sock {
+ #define tw_net			__tw_common.skc_net
+ #define tw_daddr        	__tw_common.skc_daddr
+ #define tw_rcv_saddr    	__tw_common.skc_rcv_saddr
++#define tw_xid			__tw_common.skc_xid
++#define tw_vx_info		__tw_common.skc_vx_info
++#define tw_nid			__tw_common.skc_nid
++#define tw_nx_info		__tw_common.skc_nx_info
+ 	int			tw_timeout;
+ 	volatile unsigned char	tw_substate;
+ 	unsigned char		tw_rcv_wscale;
+diff -NurpP --minimal linux-3.3.8/include/net/ip6_route.h linux-3.3.8-vs2.3.3.4/include/net/ip6_route.h
+--- linux-3.3.8/include/net/ip6_route.h	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/net/ip6_route.h	2012-02-24 03:55:06.000000000 +0100
+@@ -88,7 +88,8 @@ extern int			ip6_route_get_saddr(struct 
+ 						    struct rt6_info *rt,
+ 						    const struct in6_addr *daddr,
+ 						    unsigned int prefs,
+-						    struct in6_addr *saddr);
++						    struct in6_addr *saddr,
++						    struct nx_info *nxi);
+ 
+ extern struct rt6_info		*rt6_lookup(struct net *net,
+ 					    const struct in6_addr *daddr,
+diff -NurpP --minimal linux-3.3.8/include/net/route.h linux-3.3.8-vs2.3.3.4/include/net/route.h
+--- linux-3.3.8/include/net/route.h	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/net/route.h	2012-02-24 03:55:06.000000000 +0100
+@@ -202,6 +202,9 @@ static inline void ip_rt_put(struct rtab
+ 		dst_release(&rt->dst);
+ }
+ 
++#include <linux/vs_base.h>
++#include <linux/vs_inet.h>
++
+ #define IPTOS_RT_MASK	(IPTOS_TOS_MASK & ~3)
+ 
+ extern const __u8 ip_tos2prio[16];
+@@ -253,6 +256,9 @@ static inline void ip_route_connect_init
+ 			   protocol, flow_flags, dst, src, dport, sport);
+ }
+ 
++extern struct rtable *ip_v4_find_src(struct net *net, struct nx_info *,
++	struct flowi4 *);
++
+ static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
+ 					      __be32 dst, __be32 src, u32 tos,
+ 					      int oif, u8 protocol,
+@@ -261,11 +267,25 @@ static inline struct rtable *ip_route_co
+ {
+ 	struct net *net = sock_net(sk);
+ 	struct rtable *rt;
++	struct nx_info *nx_info = current_nx_info();
+ 
+ 	ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
+ 			      sport, dport, sk, can_sleep);
+ 
+-	if (!dst || !src) {
++	if (sk)
++		nx_info = sk->sk_nx_info;
++
++	vxdprintk(VXD_CBIT(net, 4),
++		"ip_route_connect(%p) %p,%p;%lx",
++		sk, nx_info, sk->sk_socket,
++		(sk->sk_socket?sk->sk_socket->flags:0));
++
++	rt = ip_v4_find_src(net, nx_info, fl4);
++	if (IS_ERR(rt))
++		return rt;
++	ip_rt_put(rt);
++
++	if (!fl4->daddr || !fl4->saddr) {
+ 		rt = __ip_route_output_key(net, fl4);
+ 		if (IS_ERR(rt))
+ 			return rt;
+diff -NurpP --minimal linux-3.3.8/include/net/sock.h linux-3.3.8-vs2.3.3.4/include/net/sock.h
+--- linux-3.3.8/include/net/sock.h	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/include/net/sock.h	2012-02-24 03:55:06.000000000 +0100
+@@ -168,6 +168,10 @@ struct sock_common {
+ #ifdef CONFIG_NET_NS
+ 	struct net	 	*skc_net;
+ #endif
++	xid_t			skc_xid;
++	struct vx_info		*skc_vx_info;
++	nid_t			skc_nid;
++	struct nx_info		*skc_nx_info;
+ 	/*
+ 	 * fields between dontcopy_begin/dontcopy_end
+ 	 * are not copied in sock_copy()
+@@ -278,6 +282,10 @@ struct sock {
+ #define sk_bind_node		__sk_common.skc_bind_node
+ #define sk_prot			__sk_common.skc_prot
+ #define sk_net			__sk_common.skc_net
++#define sk_xid			__sk_common.skc_xid
++#define sk_vx_info		__sk_common.skc_vx_info
++#define sk_nid			__sk_common.skc_nid
++#define sk_nx_info		__sk_common.skc_nx_info
+ 	socket_lock_t		sk_lock;
+ 	struct sk_buff_head	sk_receive_queue;
+ 	/*
+diff -NurpP --minimal linux-3.3.8/init/Kconfig linux-3.3.8-vs2.3.3.4/init/Kconfig
+--- linux-3.3.8/init/Kconfig	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/init/Kconfig	2012-02-24 03:55:06.000000000 +0100
+@@ -588,6 +588,7 @@ config HAVE_UNSTABLE_SCHED_CLOCK
+ menuconfig CGROUPS
+ 	boolean "Control Group support"
+ 	depends on EVENTFD
++	default y
+ 	help
+ 	  This option adds support for grouping sets of processes together, for
+ 	  use with process control subsystems such as Cpusets, CFS, memory
+@@ -837,6 +838,7 @@ config IPC_NS
+ config USER_NS
+ 	bool "User namespace (EXPERIMENTAL)"
+ 	depends on EXPERIMENTAL
++	depends on VSERVER_DISABLED
+ 	default y
+ 	help
+ 	  This allows containers, i.e. vservers, to use user namespaces
+diff -NurpP --minimal linux-3.3.8/init/main.c linux-3.3.8-vs2.3.3.4/init/main.c
+--- linux-3.3.8/init/main.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/init/main.c	2012-06-08 15:27:44.000000000 +0200
+@@ -68,6 +68,7 @@
+ #include <linux/shmem_fs.h>
+ #include <linux/slab.h>
+ #include <linux/perf_event.h>
++#include <linux/vserver/percpu.h>
+ 
+ #include <asm/io.h>
+ #include <asm/bugs.h>
+diff -NurpP --minimal linux-3.3.8/ipc/mqueue.c linux-3.3.8-vs2.3.3.4/ipc/mqueue.c
+--- linux-3.3.8/ipc/mqueue.c	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/ipc/mqueue.c	2012-02-24 04:07:13.000000000 +0100
+@@ -34,6 +34,8 @@
+ #include <linux/ipc_namespace.h>
+ #include <linux/user_namespace.h>
+ #include <linux/slab.h>
++#include <linux/vs_context.h>
++#include <linux/vs_limit.h>
+ 
+ #include <net/sock.h>
+ #include "util.h"
+@@ -67,6 +69,7 @@ struct mqueue_inode_info {
+ 	struct sigevent notify;
+ 	struct pid* notify_owner;
+ 	struct user_struct *user;	/* user who created, for accounting */
++	struct vx_info *vxi;
+ 	struct sock *notify_sock;
+ 	struct sk_buff *notify_cookie;
+ 
+@@ -129,6 +132,7 @@ static struct inode *mqueue_get_inode(st
+ 	if (S_ISREG(mode)) {
+ 		struct mqueue_inode_info *info;
+ 		unsigned long mq_bytes, mq_msg_tblsz;
++		struct vx_info *vxi = current_vx_info();
+ 
+ 		inode->i_fop = &mqueue_file_operations;
+ 		inode->i_size = FILENT_SIZE;
+@@ -141,6 +145,7 @@ static struct inode *mqueue_get_inode(st
+ 		info->notify_owner = NULL;
+ 		info->qsize = 0;
+ 		info->user = NULL;	/* set when all is ok */
++		info->vxi = NULL;
+ 		memset(&info->attr, 0, sizeof(info->attr));
+ 		info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
+ 		info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
+@@ -158,17 +163,20 @@ static struct inode *mqueue_get_inode(st
+ 
+ 		spin_lock(&mq_lock);
+ 		if (u->mq_bytes + mq_bytes < u->mq_bytes ||
+-		    u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
++		    u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE) ||
++		    !vx_ipcmsg_avail(vxi, mq_bytes)) {
+ 			spin_unlock(&mq_lock);
+ 			/* mqueue_evict_inode() releases info->messages */
+ 			ret = -EMFILE;
+ 			goto out_inode;
+ 		}
+ 		u->mq_bytes += mq_bytes;
++		vx_ipcmsg_add(vxi, u, mq_bytes);
+ 		spin_unlock(&mq_lock);
+ 
+ 		/* all is ok */
+ 		info->user = get_uid(u);
++		info->vxi = get_vx_info(vxi);
+ 	} else if (S_ISDIR(mode)) {
+ 		inc_nlink(inode);
+ 		/* Some things misbehave if size == 0 on a directory */
+@@ -277,8 +285,11 @@ static void mqueue_evict_inode(struct in
+ 	    + info->attr.mq_msgsize);
+ 	user = info->user;
+ 	if (user) {
++		struct vx_info *vxi = info->vxi;
++
+ 		spin_lock(&mq_lock);
+ 		user->mq_bytes -= mq_bytes;
++		vx_ipcmsg_sub(vxi, user, mq_bytes);
+ 		/*
+ 		 * get_ns_from_inode() ensures that the
+ 		 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
+@@ -288,6 +299,7 @@ static void mqueue_evict_inode(struct in
+ 		if (ipc_ns)
+ 			ipc_ns->mq_queues_count--;
+ 		spin_unlock(&mq_lock);
++		put_vx_info(vxi);
+ 		free_uid(user);
+ 	}
+ 	if (ipc_ns)
+diff -NurpP --minimal linux-3.3.8/ipc/msg.c linux-3.3.8-vs2.3.3.4/ipc/msg.c
+--- linux-3.3.8/ipc/msg.c	2011-05-22 16:17:59.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/ipc/msg.c	2012-02-24 03:55:06.000000000 +0100
+@@ -37,6 +37,7 @@
+ #include <linux/rwsem.h>
+ #include <linux/nsproxy.h>
+ #include <linux/ipc_namespace.h>
++#include <linux/vs_base.h>
+ 
+ #include <asm/current.h>
+ #include <asm/uaccess.h>
+@@ -190,6 +191,7 @@ static int newque(struct ipc_namespace *
+ 
+ 	msq->q_perm.mode = msgflg & S_IRWXUGO;
+ 	msq->q_perm.key = key;
++	msq->q_perm.xid = vx_current_xid();
+ 
+ 	msq->q_perm.security = NULL;
+ 	retval = security_msg_queue_alloc(msq);
+diff -NurpP --minimal linux-3.3.8/ipc/namespace.c linux-3.3.8-vs2.3.3.4/ipc/namespace.c
+--- linux-3.3.8/ipc/namespace.c	2011-07-22 11:18:12.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/ipc/namespace.c	2012-02-24 03:55:06.000000000 +0100
+@@ -13,11 +13,12 @@
+ #include <linux/mount.h>
+ #include <linux/user_namespace.h>
+ #include <linux/proc_fs.h>
++#include <linux/vs_base.h>
++#include <linux/vserver/global.h>
+ 
+ #include "util.h"
+ 
+-static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
+-					   struct ipc_namespace *old_ns)
++static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns)
+ {
+ 	struct ipc_namespace *ns;
+ 	int err;
+@@ -46,19 +47,18 @@ static struct ipc_namespace *create_ipc_
+ 	ipcns_notify(IPCNS_CREATED);
+ 	register_ipcns_notifier(ns);
+ 
+-	ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
++	ns->user_ns = get_user_ns(user_ns);
+ 
+ 	return ns;
+ }
+ 
+ struct ipc_namespace *copy_ipcs(unsigned long flags,
+-				struct task_struct *tsk)
++				struct ipc_namespace *old_ns,
++				struct user_namespace *user_ns)
+ {
+-	struct ipc_namespace *ns = tsk->nsproxy->ipc_ns;
+-
+ 	if (!(flags & CLONE_NEWIPC))
+-		return get_ipc_ns(ns);
+-	return create_ipc_ns(tsk, ns);
++		return get_ipc_ns(old_ns);
++	return create_ipc_ns(user_ns);
+ }
+ 
+ /*
+diff -NurpP --minimal linux-3.3.8/ipc/sem.c linux-3.3.8-vs2.3.3.4/ipc/sem.c
+--- linux-3.3.8/ipc/sem.c	2012-01-09 16:14:59.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/ipc/sem.c	2012-02-24 03:55:06.000000000 +0100
+@@ -86,6 +86,8 @@
+ #include <linux/rwsem.h>
+ #include <linux/nsproxy.h>
+ #include <linux/ipc_namespace.h>
++#include <linux/vs_base.h>
++#include <linux/vs_limit.h>
+ 
+ #include <asm/uaccess.h>
+ #include "util.h"
+@@ -306,6 +308,7 @@ static int newary(struct ipc_namespace *
+ 
+ 	sma->sem_perm.mode = (semflg & S_IRWXUGO);
+ 	sma->sem_perm.key = key;
++	sma->sem_perm.xid = vx_current_xid();
+ 
+ 	sma->sem_perm.security = NULL;
+ 	retval = security_sem_alloc(sma);
+@@ -321,6 +324,9 @@ static int newary(struct ipc_namespace *
+ 		return id;
+ 	}
+ 	ns->used_sems += nsems;
++	/* FIXME: obsoleted? */
++	vx_semary_inc(sma);
++	vx_nsems_add(sma, nsems);
+ 
+ 	sma->sem_base = (struct sem *) &sma[1];
+ 
+@@ -770,6 +776,9 @@ static void freeary(struct ipc_namespace
+ 
+ 	wake_up_sem_queue_do(&tasks);
+ 	ns->used_sems -= sma->sem_nsems;
++	/* FIXME: obsoleted? */
++	vx_nsems_sub(sma, sma->sem_nsems);
++	vx_semary_dec(sma);
+ 	security_sem_free(sma);
+ 	ipc_rcu_putref(sma);
+ }
+diff -NurpP --minimal linux-3.3.8/ipc/shm.c linux-3.3.8-vs2.3.3.4/ipc/shm.c
+--- linux-3.3.8/ipc/shm.c	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/ipc/shm.c	2012-02-24 03:55:06.000000000 +0100
+@@ -39,6 +39,8 @@
+ #include <linux/nsproxy.h>
+ #include <linux/mount.h>
+ #include <linux/ipc_namespace.h>
++#include <linux/vs_context.h>
++#include <linux/vs_limit.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -187,7 +189,12 @@ static void shm_open(struct vm_area_stru
+  */
+ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
+ {
+-	ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid);
++	int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
++
++	vx_ipcshm_sub(vxi, shp, numpages);
++	ns->shm_tot -= numpages;
++
+ 	shm_rmid(ns, shp);
+ 	shm_unlock(shp);
+ 	if (!is_file_hugepages(shp->shm_file))
+@@ -197,6 +204,7 @@ static void shm_destroy(struct ipc_names
+ 						shp->mlock_user);
+ 	fput (shp->shm_file);
+ 	security_shm_free(shp);
++	put_vx_info(vxi);
+ 	ipc_rcu_putref(shp);
+ }
+ 
+@@ -462,11 +470,15 @@ static int newseg(struct ipc_namespace *
+ 	if (ns->shm_tot + numpages > ns->shm_ctlall)
+ 		return -ENOSPC;
+ 
++	if (!vx_ipcshm_avail(current_vx_info(), numpages))
++		return -ENOSPC;
++
+ 	shp = ipc_rcu_alloc(sizeof(*shp));
+ 	if (!shp)
+ 		return -ENOMEM;
+ 
+ 	shp->shm_perm.key = key;
++	shp->shm_perm.xid = vx_current_xid();
+ 	shp->shm_perm.mode = (shmflg & S_IRWXUGO);
+ 	shp->mlock_user = NULL;
+ 
+@@ -521,6 +533,7 @@ static int newseg(struct ipc_namespace *
+ 	ns->shm_tot += numpages;
+ 	error = shp->shm_perm.id;
+ 	shm_unlock(shp);
++	vx_ipcshm_add(current_vx_info(), key, numpages);
+ 	return error;
+ 
+ no_id:
+diff -NurpP --minimal linux-3.3.8/kernel/Makefile linux-3.3.8-vs2.3.3.4/kernel/Makefile
+--- linux-3.3.8/kernel/Makefile	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/Makefile	2012-02-24 03:55:06.000000000 +0100
+@@ -25,6 +25,7 @@ endif
+ obj-y += sched/
+ obj-y += power/
+ 
++obj-y += vserver/
+ obj-$(CONFIG_FREEZER) += freezer.o
+ obj-$(CONFIG_PROFILING) += profile.o
+ obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
+diff -NurpP --minimal linux-3.3.8/kernel/auditsc.c linux-3.3.8-vs2.3.3.4/kernel/auditsc.c
+--- linux-3.3.8/kernel/auditsc.c	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/auditsc.c	2012-02-24 16:47:49.000000000 +0100
+@@ -2308,7 +2308,7 @@ int audit_set_loginuid(uid_t loginuid)
+ 	if (task->loginuid != -1)
+ 		return -EPERM;
+ #else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
+-	if (!capable(CAP_AUDIT_CONTROL))
++	if (!vx_capable(CAP_AUDIT_CONTROL, VXC_AUDIT_CONTROL))
+ 		return -EPERM;
+ #endif  /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
+ 
+diff -NurpP --minimal linux-3.3.8/kernel/capability.c linux-3.3.8-vs2.3.3.4/kernel/capability.c
+--- linux-3.3.8/kernel/capability.c	2012-03-19 19:47:29.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/capability.c	2012-02-24 03:55:06.000000000 +0100
+@@ -15,6 +15,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/pid_namespace.h>
+ #include <linux/user_namespace.h>
++#include <linux/vs_context.h>
+ #include <asm/uaccess.h>
+ 
+ /*
+@@ -116,6 +117,7 @@ static int cap_validate_magic(cap_user_h
+ 	return 0;
+ }
+ 
++
+ /*
+  * The only thing that can change the capabilities of the current
+  * process is the current process. As such, we can't be in this code
+@@ -349,6 +351,8 @@ bool has_ns_capability_noaudit(struct ta
+ 	return (ret == 0);
+ }
+ 
++#include <linux/vserver/base.h>
++
+ /**
+  * has_capability_noaudit - Does a task have a capability (unaudited) in the
+  * initial user ns
+diff -NurpP --minimal linux-3.3.8/kernel/compat.c linux-3.3.8-vs2.3.3.4/kernel/compat.c
+--- linux-3.3.8/kernel/compat.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/kernel/compat.c	2012-05-22 09:49:13.000000000 +0200
+@@ -1002,7 +1002,7 @@ asmlinkage long compat_sys_stime(compat_
+ 	if (err)
+ 		return err;
+ 
+-	do_settimeofday(&tv);
++	vx_settimeofday(&tv);
+ 	return 0;
+ }
+ 
+diff -NurpP --minimal linux-3.3.8/kernel/cred.c linux-3.3.8-vs2.3.3.4/kernel/cred.c
+--- linux-3.3.8/kernel/cred.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/kernel/cred.c	2012-04-16 13:32:11.000000000 +0200
+@@ -61,31 +61,6 @@ struct cred init_cred = {
+ #endif
+ };
+ 
+-static inline void set_cred_subscribers(struct cred *cred, int n)
+-{
+-#ifdef CONFIG_DEBUG_CREDENTIALS
+-	atomic_set(&cred->subscribers, n);
+-#endif
+-}
+-
+-static inline int read_cred_subscribers(const struct cred *cred)
+-{
+-#ifdef CONFIG_DEBUG_CREDENTIALS
+-	return atomic_read(&cred->subscribers);
+-#else
+-	return 0;
+-#endif
+-}
+-
+-static inline void alter_cred_subscribers(const struct cred *_cred, int n)
+-{
+-#ifdef CONFIG_DEBUG_CREDENTIALS
+-	struct cred *cred = (struct cred *) _cred;
+-
+-	atomic_add(n, &cred->subscribers);
+-#endif
+-}
+-
+ /*
+  * Dispose of the shared task group credentials
+  */
+@@ -281,21 +256,16 @@ error:
+  *
+  * Call commit_creds() or abort_creds() to clean up.
+  */
+-struct cred *prepare_creds(void)
++struct cred *__prepare_creds(const struct cred *old)
+ {
+-	struct task_struct *task = current;
+-	const struct cred *old;
+ 	struct cred *new;
+ 
+-	validate_process_creds();
+-
+ 	new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
+ 	if (!new)
+ 		return NULL;
+ 
+ 	kdebug("prepare_creds() alloc %p", new);
+ 
+-	old = task->cred;
+ 	memcpy(new, old, sizeof(struct cred));
+ 
+ 	atomic_set(&new->usage, 1);
+@@ -322,6 +292,13 @@ error:
+ 	abort_creds(new);
+ 	return NULL;
+ }
++
++struct cred *prepare_creds(void)
++{
++	validate_process_creds();
++
++	return __prepare_creds(current->cred);
++}
+ EXPORT_SYMBOL(prepare_creds);
+ 
+ /*
+diff -NurpP --minimal linux-3.3.8/kernel/exit.c linux-3.3.8-vs2.3.3.4/kernel/exit.c
+--- linux-3.3.8/kernel/exit.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/kernel/exit.c	2012-05-09 04:08:08.000000000 +0200
+@@ -48,6 +48,10 @@
+ #include <linux/fs_struct.h>
+ #include <linux/init_task.h>
+ #include <linux/perf_event.h>
++#include <linux/vs_limit.h>
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
++#include <linux/vs_pid.h>
+ #include <trace/events/sched.h>
+ #include <linux/hw_breakpoint.h>
+ #include <linux/oom.h>
+@@ -481,9 +485,11 @@ static void close_files(struct files_str
+ 					filp_close(file, files);
+ 					cond_resched();
+ 				}
++				vx_openfd_dec(i);
+ 			}
+ 			i++;
+ 			set >>= 1;
++			cond_resched();
+ 		}
+ 	}
+ }
+@@ -1035,10 +1041,15 @@ void do_exit(long code)
+ 	smp_mb();
+ 	raw_spin_unlock_wait(&tsk->pi_lock);
+ 
++	/* needs to stay after exit_notify() */
++	exit_vx_info(tsk, code);
++	exit_nx_info(tsk);
++
+ 	/* causes final put_task_struct in finish_task_switch(). */
+ 	tsk->state = TASK_DEAD;
+ 	tsk->flags |= PF_NOFREEZE;	/* tell freezer to ignore us */
+ 	schedule();
++	printk("bad task: %p [%lx]\n", current, current->state);
+ 	BUG();
+ 	/* Avoid "noreturn function does return".  */
+ 	for (;;)
+diff -NurpP --minimal linux-3.3.8/kernel/fork.c linux-3.3.8-vs2.3.3.4/kernel/fork.c
+--- linux-3.3.8/kernel/fork.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/kernel/fork.c	2012-05-22 09:49:13.000000000 +0200
+@@ -68,6 +68,9 @@
+ #include <linux/oom.h>
+ #include <linux/khugepaged.h>
+ #include <linux/signalfd.h>
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
++#include <linux/vs_limit.h>
+ 
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+@@ -170,6 +173,8 @@ void free_task(struct task_struct *tsk)
+ 	account_kernel_stack(tsk->stack, -1);
+ 	free_thread_info(tsk->stack);
+ 	rt_mutex_debug_task_free(tsk);
++	clr_vx_info(&tsk->vx_info);
++	clr_nx_info(&tsk->nx_info);
+ 	ftrace_graph_exit_task(tsk);
+ 	free_task_struct(tsk);
+ }
+@@ -505,6 +510,7 @@ static struct mm_struct *mm_init(struct 
+ 	if (likely(!mm_alloc_pgd(mm))) {
+ 		mm->def_flags = 0;
+ 		mmu_notifier_mm_init(mm);
++		set_vx_info(&mm->mm_vx_info, p->vx_info);
+ 		return mm;
+ 	}
+ 
+@@ -542,6 +548,7 @@ void __mmdrop(struct mm_struct *mm)
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ 	VM_BUG_ON(mm->pmd_huge_pte);
+ #endif
++	clr_vx_info(&mm->mm_vx_info);
+ 	free_mm(mm);
+ }
+ EXPORT_SYMBOL_GPL(__mmdrop);
+@@ -777,6 +784,7 @@ struct mm_struct *dup_mm(struct task_str
+ 		goto fail_nomem;
+ 
+ 	memcpy(mm, oldmm, sizeof(*mm));
++	mm->mm_vx_info = NULL;
+ 	mm_init_cpumask(mm);
+ 
+ 	/* Initializing for Swap token stuff */
+@@ -820,6 +828,7 @@ fail_nocontext:
+ 	 * If init_new_context() failed, we cannot use mmput() to free the mm
+ 	 * because it calls destroy_context()
+ 	 */
++	clr_vx_info(&mm->mm_vx_info);
+ 	mm_free_pgd(mm);
+ 	free_mm(mm);
+ 	return NULL;
+@@ -1105,6 +1114,8 @@ static struct task_struct *copy_process(
+ 	int retval;
+ 	struct task_struct *p;
+ 	int cgroup_callbacks_done = 0;
++	struct vx_info *vxi;
++	struct nx_info *nxi;
+ 
+ 	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
+ 		return ERR_PTR(-EINVAL);
+@@ -1151,7 +1162,12 @@ static struct task_struct *copy_process(
+ 	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
+ 	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
+ #endif
++	init_vx_info(&p->vx_info, current_vx_info());
++	init_nx_info(&p->nx_info, current_nx_info());
++
+ 	retval = -EAGAIN;
++	if (!vx_nproc_avail(1))
++		goto bad_fork_free;
+ 	if (atomic_read(&p->real_cred->user->processes) >=
+ 			task_rlimit(p, RLIMIT_NPROC)) {
+ 		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
+@@ -1421,6 +1437,18 @@ static struct task_struct *copy_process(
+ 
+ 	total_forks++;
+ 	spin_unlock(&current->sighand->siglock);
++
++	/* p is copy of current */
++	vxi = p->vx_info;
++	if (vxi) {
++		claim_vx_info(vxi, p);
++		atomic_inc(&vxi->cvirt.nr_threads);
++		atomic_inc(&vxi->cvirt.total_forks);
++		vx_nproc_inc(p);
++	}
++	nxi = p->nx_info;
++	if (nxi)
++		claim_nx_info(nxi, p);
+ 	write_unlock_irq(&tasklist_lock);
+ 	proc_fork_connector(p);
+ 	cgroup_post_fork(p);
+diff -NurpP --minimal linux-3.3.8/kernel/kthread.c linux-3.3.8-vs2.3.3.4/kernel/kthread.c
+--- linux-3.3.8/kernel/kthread.c	2012-03-19 19:47:30.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/kthread.c	2012-02-24 03:55:06.000000000 +0100
+@@ -16,6 +16,7 @@
+ #include <linux/mutex.h>
+ #include <linux/slab.h>
+ #include <linux/freezer.h>
++#include <linux/vs_pid.h>
+ #include <trace/events/sched.h>
+ 
+ static DEFINE_SPINLOCK(kthread_create_lock);
+diff -NurpP --minimal linux-3.3.8/kernel/nsproxy.c linux-3.3.8-vs2.3.3.4/kernel/nsproxy.c
+--- linux-3.3.8/kernel/nsproxy.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/nsproxy.c	2012-02-24 16:59:37.000000000 +0100
+@@ -20,11 +20,14 @@
+ #include <linux/mnt_namespace.h>
+ #include <linux/utsname.h>
+ #include <linux/pid_namespace.h>
++#include <linux/vserver/global.h>
++#include <linux/vserver/debug.h>
+ #include <net/net_namespace.h>
+ #include <linux/ipc_namespace.h>
+ #include <linux/proc_fs.h>
+ #include <linux/file.h>
+ #include <linux/syscalls.h>
++#include "../fs/mount.h"
+ 
+ static struct kmem_cache *nsproxy_cachep;
+ 
+@@ -46,8 +49,11 @@ static inline struct nsproxy *create_nsp
+ 	struct nsproxy *nsproxy;
+ 
+ 	nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
+-	if (nsproxy)
++	if (nsproxy) {
+ 		atomic_set(&nsproxy->count, 1);
++		atomic_inc(&vs_global_nsproxy);
++	}
++	vxdprintk(VXD_CBIT(space, 2), "create_nsproxy = %p[1]", nsproxy);
+ 	return nsproxy;
+ }
+ 
+@@ -56,8 +62,11 @@ static inline struct nsproxy *create_nsp
+  * Return the newly created nsproxy.  Do not attach this to the task,
+  * leave it to the caller to do proper locking and attach it to task.
+  */
+-static struct nsproxy *create_new_namespaces(unsigned long flags,
+-			struct task_struct *tsk, struct fs_struct *new_fs)
++static struct nsproxy *unshare_namespaces(unsigned long flags,
++			struct nsproxy *orig,
++			struct fs_struct *new_fs,
++			struct user_namespace *new_user,
++			struct pid_namespace *new_pid)
+ {
+ 	struct nsproxy *new_nsp;
+ 	int err;
+@@ -66,31 +75,31 @@ static struct nsproxy *create_new_namesp
+ 	if (!new_nsp)
+ 		return ERR_PTR(-ENOMEM);
+ 
+-	new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
++	new_nsp->mnt_ns = copy_mnt_ns(flags, orig->mnt_ns, new_fs);
+ 	if (IS_ERR(new_nsp->mnt_ns)) {
+ 		err = PTR_ERR(new_nsp->mnt_ns);
+ 		goto out_ns;
+ 	}
+ 
+-	new_nsp->uts_ns = copy_utsname(flags, tsk);
++	new_nsp->uts_ns = copy_utsname(flags, orig->uts_ns, new_user);
+ 	if (IS_ERR(new_nsp->uts_ns)) {
+ 		err = PTR_ERR(new_nsp->uts_ns);
+ 		goto out_uts;
+ 	}
+ 
+-	new_nsp->ipc_ns = copy_ipcs(flags, tsk);
++	new_nsp->ipc_ns = copy_ipcs(flags, orig->ipc_ns, new_user);
+ 	if (IS_ERR(new_nsp->ipc_ns)) {
+ 		err = PTR_ERR(new_nsp->ipc_ns);
+ 		goto out_ipc;
+ 	}
+ 
+-	new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
++	new_nsp->pid_ns = copy_pid_ns(flags, new_pid);
+ 	if (IS_ERR(new_nsp->pid_ns)) {
+ 		err = PTR_ERR(new_nsp->pid_ns);
+ 		goto out_pid;
+ 	}
+ 
+-	new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns);
++	new_nsp->net_ns = copy_net_ns(flags, orig->net_ns);
+ 	if (IS_ERR(new_nsp->net_ns)) {
+ 		err = PTR_ERR(new_nsp->net_ns);
+ 		goto out_net;
+@@ -115,6 +124,40 @@ out_ns:
+ 	return ERR_PTR(err);
+ }
+ 
++static struct nsproxy *create_new_namespaces(unsigned long flags,
++			struct task_struct *tsk, struct fs_struct *new_fs)
++{
++	return unshare_namespaces(flags, tsk->nsproxy,
++		new_fs, task_cred_xxx(tsk, user)->user_ns,
++		task_active_pid_ns(tsk));
++}
++
++/*
++ * copies the nsproxy, setting refcount to 1, and grabbing a
++ * reference to all contained namespaces.
++ */
++struct nsproxy *copy_nsproxy(struct nsproxy *orig)
++{
++	struct nsproxy *ns = create_nsproxy();
++
++	if (ns) {
++		memcpy(ns, orig, sizeof(struct nsproxy));
++		atomic_set(&ns->count, 1);
++
++		if (ns->mnt_ns)
++			get_mnt_ns(ns->mnt_ns);
++		if (ns->uts_ns)
++			get_uts_ns(ns->uts_ns);
++		if (ns->ipc_ns)
++			get_ipc_ns(ns->ipc_ns);
++		if (ns->pid_ns)
++			get_pid_ns(ns->pid_ns);
++		if (ns->net_ns)
++			get_net(ns->net_ns);
++	}
++	return ns;
++}
++
+ /*
+  * called from clone.  This now handles copy for nsproxy and all
+  * namespaces therein.
+@@ -122,9 +165,12 @@ out_ns:
+ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
+ {
+ 	struct nsproxy *old_ns = tsk->nsproxy;
+-	struct nsproxy *new_ns;
++	struct nsproxy *new_ns = NULL;
+ 	int err = 0;
+ 
++	vxdprintk(VXD_CBIT(space, 7), "copy_namespaces(0x%08lx,%p[%p])",
++		flags, tsk, old_ns);
++
+ 	if (!old_ns)
+ 		return 0;
+ 
+@@ -134,7 +180,7 @@ int copy_namespaces(unsigned long flags,
+ 				CLONE_NEWPID | CLONE_NEWNET)))
+ 		return 0;
+ 
+-	if (!capable(CAP_SYS_ADMIN)) {
++	if (!vx_can_unshare(CAP_SYS_ADMIN, flags)) {
+ 		err = -EPERM;
+ 		goto out;
+ 	}
+@@ -161,6 +207,9 @@ int copy_namespaces(unsigned long flags,
+ 
+ out:
+ 	put_nsproxy(old_ns);
++	vxdprintk(VXD_CBIT(space, 3),
++		"copy_namespaces(0x%08lx,%p[%p]) = %d [%p]",
++		flags, tsk, old_ns, err, new_ns);
+ 	return err;
+ }
+ 
+@@ -174,7 +223,9 @@ void free_nsproxy(struct nsproxy *ns)
+ 		put_ipc_ns(ns->ipc_ns);
+ 	if (ns->pid_ns)
+ 		put_pid_ns(ns->pid_ns);
+-	put_net(ns->net_ns);
++	if (ns->net_ns)
++		put_net(ns->net_ns);
++	atomic_dec(&vs_global_nsproxy);
+ 	kmem_cache_free(nsproxy_cachep, ns);
+ }
+ 
+@@ -187,11 +238,15 @@ int unshare_nsproxy_namespaces(unsigned 
+ {
+ 	int err = 0;
+ 
++	vxdprintk(VXD_CBIT(space, 4),
++		"unshare_nsproxy_namespaces(0x%08lx,[%p])",
++		unshare_flags, current->nsproxy);
++
+ 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+ 			       CLONE_NEWNET)))
+ 		return 0;
+ 
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!vx_can_unshare(CAP_SYS_ADMIN, unshare_flags))
+ 		return -EPERM;
+ 
+ 	*new_nsp = create_new_namespaces(unshare_flags, current,
+diff -NurpP --minimal linux-3.3.8/kernel/pid.c linux-3.3.8-vs2.3.3.4/kernel/pid.c
+--- linux-3.3.8/kernel/pid.c	2012-03-19 19:47:30.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/pid.c	2012-03-19 20:52:10.000000000 +0100
+@@ -36,6 +36,7 @@
+ #include <linux/pid_namespace.h>
+ #include <linux/init_task.h>
+ #include <linux/syscalls.h>
++#include <linux/vs_pid.h>
+ 
+ #define pid_hashfn(nr, ns)	\
+ 	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
+@@ -344,7 +345,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns);
+ 
+ struct pid *find_vpid(int nr)
+ {
+-	return find_pid_ns(nr, current->nsproxy->pid_ns);
++	return find_pid_ns(vx_rmap_pid(nr), current->nsproxy->pid_ns);
+ }
+ EXPORT_SYMBOL_GPL(find_vpid);
+ 
+@@ -404,6 +405,9 @@ void transfer_pid(struct task_struct *ol
+ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
+ {
+ 	struct task_struct *result = NULL;
++
++	if (type == PIDTYPE_REALPID)
++		type = PIDTYPE_PID;
+ 	if (pid) {
+ 		struct hlist_node *first;
+ 		first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
+@@ -423,7 +427,7 @@ struct task_struct *find_task_by_pid_ns(
+ 	rcu_lockdep_assert(rcu_read_lock_held(),
+ 			   "find_task_by_pid_ns() needs rcu_read_lock()"
+ 			   " protection");
+-	return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
++	return pid_task(find_pid_ns(vx_rmap_pid(nr), ns), PIDTYPE_PID);
+ }
+ 
+ struct task_struct *find_task_by_vpid(pid_t vnr)
+@@ -467,7 +471,7 @@ struct pid *find_get_pid(pid_t nr)
+ }
+ EXPORT_SYMBOL_GPL(find_get_pid);
+ 
+-pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
++pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns)
+ {
+ 	struct upid *upid;
+ 	pid_t nr = 0;
+@@ -480,6 +484,11 @@ pid_t pid_nr_ns(struct pid *pid, struct 
+ 	return nr;
+ }
+ 
++pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
++{
++	return vx_map_pid(pid_unmapped_nr_ns(pid, ns));
++}
++
+ pid_t pid_vnr(struct pid *pid)
+ {
+ 	return pid_nr_ns(pid, current->nsproxy->pid_ns);
+diff -NurpP --minimal linux-3.3.8/kernel/pid_namespace.c linux-3.3.8-vs2.3.3.4/kernel/pid_namespace.c
+--- linux-3.3.8/kernel/pid_namespace.c	2012-03-19 19:47:30.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/pid_namespace.c	2012-02-24 03:55:06.000000000 +0100
+@@ -15,6 +15,7 @@
+ #include <linux/acct.h>
+ #include <linux/slab.h>
+ #include <linux/proc_fs.h>
++#include <linux/vserver/global.h>
+ 
+ #define BITS_PER_PAGE		(PAGE_SIZE*8)
+ 
+@@ -88,6 +89,7 @@ static struct pid_namespace *create_pid_
+ 		goto out_free_map;
+ 
+ 	kref_init(&ns->kref);
++	atomic_inc(&vs_global_pid_ns);
+ 	ns->level = level;
+ 	ns->parent = get_pid_ns(parent_pid_ns);
+ 
+@@ -119,6 +121,7 @@ static void destroy_pid_namespace(struct
+ 
+ 	for (i = 0; i < PIDMAP_ENTRIES; i++)
+ 		kfree(ns->pidmap[i].page);
++	atomic_dec(&vs_global_pid_ns);
+ 	kmem_cache_free(pid_ns_cachep, ns);
+ }
+ 
+diff -NurpP --minimal linux-3.3.8/kernel/posix-timers.c linux-3.3.8-vs2.3.3.4/kernel/posix-timers.c
+--- linux-3.3.8/kernel/posix-timers.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/posix-timers.c	2012-02-24 03:55:06.000000000 +0100
+@@ -47,6 +47,7 @@
+ #include <linux/wait.h>
+ #include <linux/workqueue.h>
+ #include <linux/export.h>
++#include <linux/vs_context.h>
+ 
+ /*
+  * Management arrays for POSIX timers.	 Timers are kept in slab memory
+@@ -340,6 +341,7 @@ int posix_timer_event(struct k_itimer *t
+ {
+ 	struct task_struct *task;
+ 	int shared, ret = -1;
++
+ 	/*
+ 	 * FIXME: if ->sigq is queued we can race with
+ 	 * dequeue_signal()->do_schedule_next_timer().
+@@ -356,10 +358,18 @@ int posix_timer_event(struct k_itimer *t
+ 	rcu_read_lock();
+ 	task = pid_task(timr->it_pid, PIDTYPE_PID);
+ 	if (task) {
++		struct vx_info_save vxis;
++		struct vx_info *vxi;
++
++		vxi = get_vx_info(task->vx_info);
++		enter_vx_info(vxi, &vxis);
+ 		shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
+ 		ret = send_sigqueue(timr->sigq, task, shared);
++		leave_vx_info(&vxis);
++		put_vx_info(vxi);
+ 	}
+ 	rcu_read_unlock();
++
+ 	/* If we failed to send the signal the timer stops. */
+ 	return ret > 0;
+ }
+diff -NurpP --minimal linux-3.3.8/kernel/printk.c linux-3.3.8-vs2.3.3.4/kernel/printk.c
+--- linux-3.3.8/kernel/printk.c	2012-03-19 19:47:30.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/printk.c	2012-03-19 20:52:10.000000000 +0100
+@@ -41,6 +41,7 @@
+ #include <linux/cpu.h>
+ #include <linux/notifier.h>
+ #include <linux/rculist.h>
++#include <linux/vs_cvirt.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -314,7 +315,7 @@ static int check_syslog_permissions(int 
+ 		return 0;
+ 
+ 	if (syslog_action_restricted(type)) {
+-		if (capable(CAP_SYSLOG))
++		if (vx_capable(CAP_SYSLOG, VXC_SYSLOG))
+ 			return 0;
+ 		/* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */
+ 		if (capable(CAP_SYS_ADMIN)) {
+@@ -344,12 +345,9 @@ int do_syslog(int type, char __user *buf
+ 	if (error)
+ 		return error;
+ 
+-	switch (type) {
+-	case SYSLOG_ACTION_CLOSE:	/* Close log */
+-		break;
+-	case SYSLOG_ACTION_OPEN:	/* Open log */
+-		break;
+-	case SYSLOG_ACTION_READ:	/* Read from log */
++	if ((type == SYSLOG_ACTION_READ) ||
++	    (type == SYSLOG_ACTION_READ_ALL) ||
++	    (type == SYSLOG_ACTION_READ_CLEAR)) {
+ 		error = -EINVAL;
+ 		if (!buf || len < 0)
+ 			goto out;
+@@ -360,6 +358,16 @@ int do_syslog(int type, char __user *buf
+ 			error = -EFAULT;
+ 			goto out;
+ 		}
++	}
++	if (!vx_check(0, VS_ADMIN|VS_WATCH))
++		return vx_do_syslog(type, buf, len);
++
++	switch (type) {
++	case SYSLOG_ACTION_CLOSE:	/* Close log */
++		break;
++	case SYSLOG_ACTION_OPEN:	/* Open log */
++		break;
++	case SYSLOG_ACTION_READ:	/* Read from log */
+ 		error = wait_event_interruptible(log_wait,
+ 							(log_start - log_end));
+ 		if (error)
+@@ -386,16 +394,6 @@ int do_syslog(int type, char __user *buf
+ 		/* FALL THRU */
+ 	/* Read last kernel messages */
+ 	case SYSLOG_ACTION_READ_ALL:
+-		error = -EINVAL;
+-		if (!buf || len < 0)
+-			goto out;
+-		error = 0;
+-		if (!len)
+-			goto out;
+-		if (!access_ok(VERIFY_WRITE, buf, len)) {
+-			error = -EFAULT;
+-			goto out;
+-		}
+ 		count = len;
+ 		if (count > log_buf_len)
+ 			count = log_buf_len;
+diff -NurpP --minimal linux-3.3.8/kernel/ptrace.c linux-3.3.8-vs2.3.3.4/kernel/ptrace.c
+--- linux-3.3.8/kernel/ptrace.c	2012-03-19 19:47:30.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/ptrace.c	2012-02-24 03:55:06.000000000 +0100
+@@ -22,6 +22,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/uaccess.h>
+ #include <linux/regset.h>
++#include <linux/vs_context.h>
+ #include <linux/hw_breakpoint.h>
+ #include <linux/cn_proc.h>
+ 
+@@ -217,6 +218,11 @@ ok:
+ 		dumpable = get_dumpable(task->mm);
+ 	if (!dumpable  && !ptrace_has_cap(task_user_ns(task), mode))
+ 		return -EPERM;
++	if (!vx_check(task->xid, VS_ADMIN_P|VS_WATCH_P|VS_IDENT))
++		return -EPERM;
++	if (!vx_check(task->xid, VS_IDENT) &&
++		!task_vx_flags(task, VXF_STATE_ADMIN, 0))
++		return -EACCES;
+ 
+ 	return security_ptrace_access_check(task, mode);
+ }
+diff -NurpP --minimal linux-3.3.8/kernel/sched/core.c linux-3.3.8-vs2.3.3.4/kernel/sched/core.c
+--- linux-3.3.8/kernel/sched/core.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/kernel/sched/core.c	2012-05-09 04:08:08.000000000 +0200
+@@ -71,6 +71,8 @@
+ #include <linux/ftrace.h>
+ #include <linux/slab.h>
+ #include <linux/init_task.h>
++#include <linux/vs_sched.h>
++#include <linux/vs_cvirt.h>
+ 
+ #include <asm/tlb.h>
+ #include <asm/irq_regs.h>
+@@ -2326,9 +2328,17 @@ static void calc_global_nohz(void)
+  */
+ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
+ {
+-	loads[0] = (avenrun[0] + offset) << shift;
+-	loads[1] = (avenrun[1] + offset) << shift;
+-	loads[2] = (avenrun[2] + offset) << shift;
++	if (vx_flags(VXF_VIRT_LOAD, 0)) {
++		struct vx_info *vxi = current_vx_info();
++
++		loads[0] = (vxi->cvirt.load[0] + offset) << shift;
++		loads[1] = (vxi->cvirt.load[1] + offset) << shift;
++		loads[2] = (vxi->cvirt.load[2] + offset) << shift;
++	} else {
++		loads[0] = (avenrun[0] + offset) << shift;
++		loads[1] = (avenrun[1] + offset) << shift;
++		loads[2] = (avenrun[2] + offset) << shift;
++	}
+ }
+ 
+ /*
+@@ -2632,14 +2642,17 @@ static inline void task_group_account_fi
+ void account_user_time(struct task_struct *p, cputime_t cputime,
+ 		       cputime_t cputime_scaled)
+ {
++	struct vx_info *vxi = p->vx_info;  /* p is _always_ current */
++	int nice = (TASK_NICE(p) > 0);
+ 	int index;
+ 
+ 	/* Add user time to process. */
+ 	p->utime += cputime;
+ 	p->utimescaled += cputime_scaled;
++	vx_account_user(vxi, cputime, nice);
+ 	account_group_user_time(p, cputime);
+ 
+-	index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
++	index = (nice) ? CPUTIME_NICE : CPUTIME_USER;
+ 
+ 	/* Add user time to cpustat. */
+ 	task_group_account_field(p, index, (__force u64) cputime);
+@@ -2686,9 +2699,12 @@ static inline
+ void __account_system_time(struct task_struct *p, cputime_t cputime,
+ 			cputime_t cputime_scaled, int index)
+ {
++	struct vx_info *vxi = p->vx_info;  /* p is _always_ current */
++
+ 	/* Add system time to process. */
+ 	p->stime += cputime;
+ 	p->stimescaled += cputime_scaled;
++	vx_account_system(vxi, cputime, 0 /* do we have idle time? */);
+ 	account_group_system_time(p, cputime);
+ 
+ 	/* Add system time to cpustat. */
+@@ -3885,7 +3901,7 @@ SYSCALL_DEFINE1(nice, int, increment)
+ 		nice = 19;
+ 
+ 	if (increment < 0 && !can_nice(current, nice))
+-		return -EPERM;
++		return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM;
+ 
+ 	retval = security_task_setnice(current, nice);
+ 	if (retval)
+diff -NurpP --minimal linux-3.3.8/kernel/sched/fair.c linux-3.3.8-vs2.3.3.4/kernel/sched/fair.c
+--- linux-3.3.8/kernel/sched/fair.c	2012-03-19 19:47:30.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/sched/fair.c	2012-03-19 20:52:10.000000000 +0100
+@@ -26,6 +26,7 @@
+ #include <linux/slab.h>
+ #include <linux/profile.h>
+ #include <linux/interrupt.h>
++#include <linux/vs_cvirt.h>
+ 
+ #include <trace/events/sched.h>
+ 
+@@ -1126,6 +1127,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
+ 		__enqueue_entity(cfs_rq, se);
+ 	se->on_rq = 1;
+ 
++	if (entity_is_task(se))
++		vx_activate_task(task_of(se));
+ 	if (cfs_rq->nr_running == 1) {
+ 		list_add_leaf_cfs_rq(cfs_rq);
+ 		check_enqueue_throttle(cfs_rq);
+@@ -1206,6 +1209,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
+ 	if (se != cfs_rq->curr)
+ 		__dequeue_entity(cfs_rq, se);
+ 	se->on_rq = 0;
++	if (entity_is_task(se))
++		vx_deactivate_task(task_of(se));
+ 	update_cfs_load(cfs_rq, 0);
+ 	account_entity_dequeue(cfs_rq, se);
+ 
+diff -NurpP --minimal linux-3.3.8/kernel/signal.c linux-3.3.8-vs2.3.3.4/kernel/signal.c
+--- linux-3.3.8/kernel/signal.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/kernel/signal.c	2012-05-09 04:08:08.000000000 +0200
+@@ -29,6 +29,8 @@
+ #include <linux/pid_namespace.h>
+ #include <linux/nsproxy.h>
+ #include <linux/user_namespace.h>
++#include <linux/vs_context.h>
++#include <linux/vs_pid.h>
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/signal.h>
+ 
+@@ -790,9 +792,18 @@ static int check_kill_permission(int sig
+ 	struct pid *sid;
+ 	int error;
+ 
++	vxdprintk(VXD_CBIT(misc, 7),
++		"check_kill_permission(%d,%p,%p[#%u,%u])",
++		sig, info, t, vx_task_xid(t), t->pid);
++
+ 	if (!valid_signal(sig))
+ 		return -EINVAL;
+ 
++/*	FIXME: needed? if so, why?
++	if ((info != SEND_SIG_NOINFO) &&
++		(is_si_special(info) || !si_fromuser(info)))
++		goto skip;	*/
++
+ 	if (!si_fromuser(info))
+ 		return 0;
+ 
+@@ -816,6 +827,20 @@ static int check_kill_permission(int sig
+ 		}
+ 	}
+ 
++	error = -EPERM;
++	if (t->pid == 1 && current->xid)
++		return error;
++
++	error = -ESRCH;
++	/* FIXME: we shouldn't return ESRCH ever, to avoid
++		  loops, maybe ENOENT or EACCES? */
++	if (!vx_check(vx_task_xid(t), VS_WATCH_P | VS_IDENT)) {
++		vxdprintk(current->xid || VXD_CBIT(misc, 7),
++			"signal %d[%p] xid mismatch %p[#%u,%u] xid=#%u",
++			sig, info, t, vx_task_xid(t), t->pid, current->xid);
++		return error;
++	}
++/* skip: */
+ 	return security_task_kill(t, info, sig, 0);
+ }
+ 
+@@ -1351,7 +1376,7 @@ int kill_pid_info(int sig, struct siginf
+ 	rcu_read_lock();
+ retry:
+ 	p = pid_task(pid, PIDTYPE_PID);
+-	if (p) {
++	if (p && vx_check(vx_task_xid(p), VS_IDENT)) {
+ 		error = group_send_sig_info(sig, info, p);
+ 		if (unlikely(error == -ESRCH))
+ 			/*
+@@ -1401,7 +1426,7 @@ int kill_pid_info_as_cred(int sig, struc
+ 
+ 	rcu_read_lock();
+ 	p = pid_task(pid, PIDTYPE_PID);
+-	if (!p) {
++	if (!p || !vx_check(vx_task_xid(p), VS_IDENT)) {
+ 		ret = -ESRCH;
+ 		goto out_unlock;
+ 	}
+@@ -1453,8 +1478,10 @@ static int kill_something_info(int sig, 
+ 		struct task_struct * p;
+ 
+ 		for_each_process(p) {
+-			if (task_pid_vnr(p) > 1 &&
+-					!same_thread_group(p, current)) {
++			if (vx_check(vx_task_xid(p), VS_ADMIN|VS_IDENT) &&
++				task_pid_vnr(p) > 1 &&
++				!same_thread_group(p, current) &&
++				!vx_current_initpid(p->pid)) {
+ 				int err = group_send_sig_info(sig, info, p);
+ 				++count;
+ 				if (err != -EPERM)
+@@ -2299,6 +2326,11 @@ relock:
+ 				!sig_kernel_only(signr))
+ 			continue;
+ 
++		/* virtual init is protected against user signals */
++		if ((info->si_code == SI_USER) &&
++			vx_current_initpid(current->pid))
++			continue;
++
+ 		if (sig_kernel_stop(signr)) {
+ 			/*
+ 			 * The default action is to stop all threads in
+diff -NurpP --minimal linux-3.3.8/kernel/softirq.c linux-3.3.8-vs2.3.3.4/kernel/softirq.c
+--- linux-3.3.8/kernel/softirq.c	2012-03-19 19:47:30.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/softirq.c	2012-02-24 03:55:06.000000000 +0100
+@@ -24,6 +24,7 @@
+ #include <linux/ftrace.h>
+ #include <linux/smp.h>
+ #include <linux/tick.h>
++#include <linux/vs_context.h>
+ 
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/irq.h>
+diff -NurpP --minimal linux-3.3.8/kernel/sys.c linux-3.3.8-vs2.3.3.4/kernel/sys.c
+--- linux-3.3.8/kernel/sys.c	2012-03-19 19:47:30.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/sys.c	2012-03-19 20:52:10.000000000 +0100
+@@ -45,6 +45,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/kprobes.h>
+ #include <linux/user_namespace.h>
++#include <linux/vs_pid.h>
+ 
+ #include <linux/kmsg_dump.h>
+ /* Move somewhere else to avoid recompiling? */
+@@ -155,7 +156,10 @@ static int set_one_prio(struct task_stru
+ 		goto out;
+ 	}
+ 	if (niceval < task_nice(p) && !can_nice(p, niceval)) {
+-		error = -EACCES;
++		if (vx_flags(VXF_IGNEG_NICE, 0))
++			error = 0;
++		else
++			error = -EACCES;
+ 		goto out;
+ 	}
+ 	no_nice = security_task_setnice(p, niceval);
+@@ -205,6 +209,8 @@ SYSCALL_DEFINE3(setpriority, int, which,
+ 			else
+ 				pgrp = task_pgrp(current);
+ 			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
++				if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
++					continue;
+ 				error = set_one_prio(p, niceval, error);
+ 			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
+ 			break;
+@@ -268,6 +274,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
+ 			else
+ 				pgrp = task_pgrp(current);
+ 			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
++				if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
++					continue;
+ 				niceval = 20 - task_nice(p);
+ 				if (niceval > retval)
+ 					retval = niceval;
+@@ -418,6 +426,8 @@ EXPORT_SYMBOL_GPL(kernel_power_off);
+ 
+ static DEFINE_MUTEX(reboot_mutex);
+ 
++long vs_reboot(unsigned int, void __user *);
++
+ /*
+  * Reboot system call: for obvious reasons only root may call it,
+  * and even root needs to set up some magic numbers in the registers
+@@ -450,6 +460,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int
+ 	if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
+ 		cmd = LINUX_REBOOT_CMD_HALT;
+ 
++	if (!vx_check(0, VS_ADMIN|VS_WATCH))
++		return vs_reboot(cmd, arg);
++
+ 	mutex_lock(&reboot_mutex);
+ 	switch (cmd) {
+ 	case LINUX_REBOOT_CMD_RESTART:
+@@ -1273,7 +1286,8 @@ SYSCALL_DEFINE2(sethostname, char __user
+ 	int errno;
+ 	char tmp[__NEW_UTS_LEN];
+ 
+-	if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
++	if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
++		CAP_SYS_ADMIN, VXC_SET_UTSNAME))
+ 		return -EPERM;
+ 
+ 	if (len < 0 || len > __NEW_UTS_LEN)
+@@ -1324,7 +1338,8 @@ SYSCALL_DEFINE2(setdomainname, char __us
+ 	int errno;
+ 	char tmp[__NEW_UTS_LEN];
+ 
+-	if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
++	if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
++		CAP_SYS_ADMIN, VXC_SET_UTSNAME))
+ 		return -EPERM;
+ 	if (len < 0 || len > __NEW_UTS_LEN)
+ 		return -EINVAL;
+@@ -1443,7 +1458,7 @@ int do_prlimit(struct task_struct *tsk, 
+ 		/* Keep the capable check against init_user_ns until
+ 		   cgroups can contain all limits */
+ 		if (new_rlim->rlim_max > rlim->rlim_max &&
+-				!capable(CAP_SYS_RESOURCE))
++			!vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
+ 			retval = -EPERM;
+ 		if (!retval)
+ 			retval = security_task_setrlimit(tsk->group_leader,
+@@ -1497,7 +1512,8 @@ static int check_prlimit_permission(stru
+ 	     cred->gid == tcred->sgid &&
+ 	     cred->gid == tcred->gid))
+ 		return 0;
+-	if (ns_capable(tcred->user->user_ns, CAP_SYS_RESOURCE))
++	if (vx_ns_capable(tcred->user->user_ns,
++		CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
+ 		return 0;
+ 
+ 	return -EPERM;
+diff -NurpP --minimal linux-3.3.8/kernel/sysctl.c linux-3.3.8-vs2.3.3.4/kernel/sysctl.c
+--- linux-3.3.8/kernel/sysctl.c	2012-06-08 15:23:46.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/kernel/sysctl.c	2012-04-16 13:32:11.000000000 +0200
+@@ -76,6 +76,7 @@
+ #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
+ #include <linux/lockdep.h>
+ #endif
++extern char vshelper_path[];
+ #ifdef CONFIG_CHR_DEV_SG
+ #include <scsi/sg.h>
+ #endif
+@@ -572,6 +573,13 @@ static struct ctl_table kern_table[] = {
+ 		.proc_handler	= proc_dostring,
+ 	},
+ #endif
++	{
++		.procname	= "vshelper",
++		.data		= &vshelper_path,
++		.maxlen		= 256,
++		.mode		= 0644,
++		.proc_handler	= &proc_dostring,
++	},
+ #ifdef CONFIG_CHR_DEV_SG
+ 	{
+ 		.procname	= "sg-big-buff",
+diff -NurpP --minimal linux-3.3.8/kernel/sysctl_binary.c linux-3.3.8-vs2.3.3.4/kernel/sysctl_binary.c
+--- linux-3.3.8/kernel/sysctl_binary.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/sysctl_binary.c	2012-02-24 03:55:06.000000000 +0100
+@@ -73,6 +73,7 @@ static const struct bin_table bin_kern_t
+ 
+ 	{ CTL_INT,	KERN_PANIC,			"panic" },
+ 	{ CTL_INT,	KERN_REALROOTDEV,		"real-root-dev" },
++	{ CTL_STR,	KERN_VSHELPER,			"vshelper" },
+ 
+ 	{ CTL_STR,	KERN_SPARC_REBOOT,		"reboot-cmd" },
+ 	{ CTL_INT,	KERN_CTLALTDEL,			"ctrl-alt-del" },
+diff -NurpP --minimal linux-3.3.8/kernel/time/timekeeping.c linux-3.3.8-vs2.3.3.4/kernel/time/timekeeping.c
+--- linux-3.3.8/kernel/time/timekeeping.c	2012-03-19 19:47:30.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/time/timekeeping.c	2012-02-24 03:55:06.000000000 +0100
+@@ -233,6 +233,7 @@ void getnstimeofday(struct timespec *ts)
+ 	} while (read_seqretry(&xtime_lock, seq));
+ 
+ 	timespec_add_ns(ts, nsecs);
++	vx_adjust_timespec(ts);
+ }
+ 
+ EXPORT_SYMBOL(getnstimeofday);
+diff -NurpP --minimal linux-3.3.8/kernel/time.c linux-3.3.8-vs2.3.3.4/kernel/time.c
+--- linux-3.3.8/kernel/time.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/time.c	2012-02-24 03:55:06.000000000 +0100
+@@ -92,7 +92,7 @@ SYSCALL_DEFINE1(stime, time_t __user *, 
+ 	if (err)
+ 		return err;
+ 
+-	do_settimeofday(&tv);
++	vx_settimeofday(&tv);
+ 	return 0;
+ }
+ 
+@@ -177,7 +177,7 @@ int do_sys_settimeofday(const struct tim
+ 		/* SMP safe, again the code in arch/foo/time.c should
+ 		 * globally block out interrupts when it runs.
+ 		 */
+-		return do_settimeofday(tv);
++		return vx_settimeofday(tv);
+ 	}
+ 	return 0;
+ }
+diff -NurpP --minimal linux-3.3.8/kernel/timer.c linux-3.3.8-vs2.3.3.4/kernel/timer.c
+--- linux-3.3.8/kernel/timer.c	2012-03-19 19:47:30.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/timer.c	2012-02-24 03:55:06.000000000 +0100
+@@ -40,6 +40,10 @@
+ #include <linux/irq_work.h>
+ #include <linux/sched.h>
+ #include <linux/slab.h>
++#include <linux/vs_base.h>
++#include <linux/vs_cvirt.h>
++#include <linux/vs_pid.h>
++#include <linux/vserver/sched.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+@@ -1386,12 +1390,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, sec
+ 
+ #endif
+ 
+-#ifndef __alpha__
+-
+-/*
+- * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
+- * should be moved into arch/i386 instead?
+- */
+ 
+ /**
+  * sys_getpid - return the thread group id of the current process
+@@ -1420,10 +1418,23 @@ SYSCALL_DEFINE0(getppid)
+ 	rcu_read_lock();
+ 	pid = task_tgid_vnr(rcu_dereference(current->real_parent));
+ 	rcu_read_unlock();
++	return vx_map_pid(pid);
++}
+ 
+-	return pid;
++#ifdef __alpha__
++
++/*
++ * The Alpha uses getxpid, getxuid, and getxgid instead.
++ */
++
++asmlinkage long do_getxpid(long *ppid)
++{
++	*ppid = sys_getppid();
++	return sys_getpid();
+ }
+ 
++#else /* _alpha_ */
++
+ SYSCALL_DEFINE0(getuid)
+ {
+ 	/* Only we change this so SMP safe */
+diff -NurpP --minimal linux-3.3.8/kernel/user_namespace.c linux-3.3.8-vs2.3.3.4/kernel/user_namespace.c
+--- linux-3.3.8/kernel/user_namespace.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/user_namespace.c	2012-02-24 03:55:06.000000000 +0100
+@@ -11,6 +11,7 @@
+ #include <linux/user_namespace.h>
+ #include <linux/highuid.h>
+ #include <linux/cred.h>
++#include <linux/vserver/global.h>
+ 
+ static struct kmem_cache *user_ns_cachep __read_mostly;
+ 
+@@ -33,6 +34,7 @@ int create_user_ns(struct cred *new)
+ 		return -ENOMEM;
+ 
+ 	kref_init(&ns->kref);
++	atomic_inc(&vs_global_user_ns);
+ 
+ 	for (n = 0; n < UIDHASH_SZ; ++n)
+ 		INIT_HLIST_HEAD(ns->uidhash_table + n);
+@@ -81,6 +83,8 @@ void free_user_ns(struct kref *kref)
+ 	struct user_namespace *ns =
+ 		container_of(kref, struct user_namespace, kref);
+ 
++	/* FIXME: maybe move into destroyer? */
++	atomic_dec(&vs_global_user_ns);
+ 	INIT_WORK(&ns->destroyer, free_user_ns_work);
+ 	schedule_work(&ns->destroyer);
+ }
+diff -NurpP --minimal linux-3.3.8/kernel/utsname.c linux-3.3.8-vs2.3.3.4/kernel/utsname.c
+--- linux-3.3.8/kernel/utsname.c	2012-01-09 16:15:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/utsname.c	2012-02-24 03:55:06.000000000 +0100
+@@ -16,14 +16,17 @@
+ #include <linux/slab.h>
+ #include <linux/user_namespace.h>
+ #include <linux/proc_fs.h>
++#include <linux/vserver/global.h>
+ 
+ static struct uts_namespace *create_uts_ns(void)
+ {
+ 	struct uts_namespace *uts_ns;
+ 
+ 	uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
+-	if (uts_ns)
++	if (uts_ns) {
+ 		kref_init(&uts_ns->kref);
++		atomic_inc(&vs_global_uts_ns);
++	}
+ 	return uts_ns;
+ }
+ 
+@@ -32,8 +35,8 @@ static struct uts_namespace *create_uts_
+  * @old_ns: namespace to clone
+  * Return NULL on error (failure to kmalloc), new ns otherwise
+  */
+-static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
+-					  struct uts_namespace *old_ns)
++static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns,
++					  struct user_namespace *old_user)
+ {
+ 	struct uts_namespace *ns;
+ 
+@@ -43,7 +46,7 @@ static struct uts_namespace *clone_uts_n
+ 
+ 	down_read(&uts_sem);
+ 	memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
+-	ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
++	ns->user_ns = get_user_ns(old_user);
+ 	up_read(&uts_sem);
+ 	return ns;
+ }
+@@ -55,9 +58,9 @@ static struct uts_namespace *clone_uts_n
+  * versa.
+  */
+ struct uts_namespace *copy_utsname(unsigned long flags,
+-				   struct task_struct *tsk)
++				   struct uts_namespace *old_ns,
++				   struct user_namespace *user_ns)
+ {
+-	struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
+ 	struct uts_namespace *new_ns;
+ 
+ 	BUG_ON(!old_ns);
+@@ -66,7 +69,7 @@ struct uts_namespace *copy_utsname(unsig
+ 	if (!(flags & CLONE_NEWUTS))
+ 		return old_ns;
+ 
+-	new_ns = clone_uts_ns(tsk, old_ns);
++	new_ns = clone_uts_ns(old_ns, user_ns);
+ 
+ 	put_uts_ns(old_ns);
+ 	return new_ns;
+@@ -78,6 +81,7 @@ void free_uts_ns(struct kref *kref)
+ 
+ 	ns = container_of(kref, struct uts_namespace, kref);
+ 	put_user_ns(ns->user_ns);
++	atomic_dec(&vs_global_uts_ns);
+ 	kfree(ns);
+ }
+ 
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/Kconfig linux-3.3.8-vs2.3.3.4/kernel/vserver/Kconfig
+--- linux-3.3.8/kernel/vserver/Kconfig	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/Kconfig	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,224 @@
++#
++# Linux VServer configuration
++#
++
++menu "Linux VServer"
++
++config	VSERVER_AUTO_LBACK
++	bool    "Automatically Assign Loopback IP"
++	default y
++	help
++	  Automatically assign a guest specific loopback
++	  IP and add it to the kernel network stack on
++	  startup.
++
++config	VSERVER_AUTO_SINGLE
++	bool	"Automatic Single IP Special Casing"
++	depends on EXPERIMENTAL
++	default y
++	help
++	  This allows network contexts with a single IP to
++	  automatically remap 0.0.0.0 bindings to that IP,
++	  avoiding further network checks and improving
++	  performance.
++
++	  (note: such guests do not allow to change the ip
++	   on the fly and do not show loopback addresses)
++
++config	VSERVER_COWBL
++	bool	"Enable COW Immutable Link Breaking"
++	default y
++	help
++	  This enables the COW (Copy-On-Write) link break code.
++	  It allows you to treat unified files like normal files
++	  when writing to them (which will implicitely break the
++	  link and create a copy of the unified file)
++
++config	VSERVER_VTIME
++	bool	"Enable Virtualized Guest Time"
++	depends on EXPERIMENTAL
++	default n
++	help
++	  This enables per guest time offsets to allow for
++	  adjusting the system clock individually per guest.
++	  this adds some overhead to the time functions and
++	  therefore should not be enabled without good reason.
++
++config	VSERVER_DEVICE
++	bool	"Enable Guest Device Mapping"
++	depends on EXPERIMENTAL
++	default n
++	help
++	  This enables generic device remapping.
++
++config	VSERVER_PROC_SECURE
++	bool	"Enable Proc Security"
++	depends on PROC_FS
++	default y
++	help
++	  This configures ProcFS security to initially hide
++	  non-process entries for all contexts except the main and
++	  spectator context (i.e. for all guests), which is a secure
++	  default.
++
++	  (note: on 1.2x the entries were visible by default)
++
++choice
++	prompt	"Persistent Inode Tagging"
++	default	TAGGING_ID24
++	help
++	  This adds persistent context information to filesystems
++	  mounted with the tagxid option. Tagging is a requirement
++	  for per-context disk limits and per-context quota.
++
++
++config	TAGGING_NONE
++	bool	"Disabled"
++	help
++	  do not store per-context information in inodes.
++
++config	TAGGING_UID16
++	bool	"UID16/GID32"
++	help
++	  reduces UID to 16 bit, but leaves GID at 32 bit.
++
++config	TAGGING_GID16
++	bool	"UID32/GID16"
++	help
++	  reduces GID to 16 bit, but leaves UID at 32 bit.
++
++config	TAGGING_ID24
++	bool	"UID24/GID24"
++	help
++	  uses the upper 8bit from UID and GID for XID tagging
++	  which leaves 24bit for UID/GID each, which should be
++	  more than sufficient for normal use.
++
++config	TAGGING_INTERN
++	bool	"UID32/GID32"
++	help
++	  this uses otherwise reserved inode fields in the on
++	  disk representation, which limits the use to a few
++	  filesystems (currently ext2 and ext3)
++
++endchoice
++
++config	TAG_NFSD
++	bool	"Tag NFSD User Auth and Files"
++	default n
++	help
++	  Enable this if you do want the in-kernel NFS
++	  Server to use the tagging specified above.
++	  (will require patched clients too)
++
++config	VSERVER_PRIVACY
++	bool	"Honor Privacy Aspects of Guests"
++	default n
++	help
++	  When enabled, most context checks will disallow
++	  access to structures assigned to a specific context,
++	  like ptys or loop devices.
++
++config	VSERVER_CONTEXTS
++	int	"Maximum number of Contexts (1-65533)"	if EMBEDDED
++	range 1 65533
++	default "768"	if 64BIT
++	default "256"
++	help
++	  This setting will optimize certain data structures
++	  and memory allocations according to the expected
++	  maximum.
++
++	  note: this is not a strict upper limit.
++
++config	VSERVER_WARN
++	bool	"VServer Warnings"
++	default y
++	help
++	  This enables various runtime warnings, which will
++	  notify about potential manipulation attempts or
++	  resource shortage. It is generally considered to
++	  be a good idea to have that enabled.
++
++config	VSERVER_WARN_DEVPTS
++	bool	"VServer DevPTS Warnings"
++	depends on VSERVER_WARN
++	default y
++	help
++	  This enables DevPTS related warnings, issued when a
++	  process inside a context tries to lookup or access
++	  a dynamic pts from the host or a different context.
++
++config	VSERVER_DEBUG
++	bool	"VServer Debugging Code"
++	default n
++	help
++	  Set this to yes if you want to be able to activate
++	  debugging output at runtime. It adds a very small
++	  overhead to all vserver related functions and
++	  increases the kernel size by about 20k.
++
++config	VSERVER_HISTORY
++	bool	"VServer History Tracing"
++	depends on VSERVER_DEBUG
++	default n
++	help
++	  Set this to yes if you want to record the history of
++	  linux-vserver activities, so they can be replayed in
++	  the event of a kernel panic or oops.
++
++config	VSERVER_HISTORY_SIZE
++	int	"Per-CPU History Size (32-65536)"
++	depends on VSERVER_HISTORY
++	range 32 65536
++	default 64
++	help
++	  This allows you to specify the number of entries in
++	  the per-CPU history buffer.
++
++choice
++	prompt	"Quotes used in debug and warn messages"
++	default	QUOTES_ISO8859
++
++config	QUOTES_ISO8859
++	bool	"Extended ASCII (ISO 8859) angle quotes"
++	help
++	  This uses the extended ASCII characters \xbb
++	  and \xab for quoting file and process names.
++
++config	QUOTES_UTF8
++	bool	"UTF-8 angle quotes"
++	help
++	  This uses the the UTF-8 sequences for angle
++	  quotes to quote file and process names.
++
++config	QUOTES_ASCII
++	bool	"ASCII single quotes"
++	help
++	  This uses the ASCII single quote character
++	  (\x27) to quote file and process names.
++
++endchoice
++
++endmenu
++
++
++config	VSERVER
++	bool
++	default y
++	select NAMESPACES
++	select UTS_NS
++	select IPC_NS
++#	select USER_NS
++	select SYSVIPC
++
++config	VSERVER_SECURITY
++	bool
++	depends on SECURITY
++	default y
++	select SECURITY_CAPABILITIES
++
++config	VSERVER_DISABLED
++	bool
++	default n
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/Makefile linux-3.3.8-vs2.3.3.4/kernel/vserver/Makefile
+--- linux-3.3.8/kernel/vserver/Makefile	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/Makefile	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,18 @@
++#
++# Makefile for the Linux vserver routines.
++#
++
++
++obj-y		+= vserver.o
++
++vserver-y	:= switch.o context.o space.o sched.o network.o inode.o \
++		   limit.o cvirt.o cacct.o signal.o helper.o init.o \
++		   dlimit.o tag.o
++
++vserver-$(CONFIG_INET) += inet.o
++vserver-$(CONFIG_PROC_FS) += proc.o
++vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o debug.o
++vserver-$(CONFIG_VSERVER_HISTORY) += history.o
++vserver-$(CONFIG_VSERVER_MONITOR) += monitor.o
++vserver-$(CONFIG_VSERVER_DEVICE) += device.o
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/cacct.c linux-3.3.8-vs2.3.3.4/kernel/vserver/cacct.c
+--- linux-3.3.8/kernel/vserver/cacct.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/cacct.c	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,42 @@
++/*
++ *  linux/kernel/vserver/cacct.c
++ *
++ *  Virtual Server: Context Accounting
++ *
++ *  Copyright (C) 2006-2007 Herbert P�tzl
++ *
++ *  V0.01  added accounting stats
++ *
++ */
++
++#include <linux/types.h>
++#include <linux/vs_context.h>
++#include <linux/vserver/cacct_cmd.h>
++#include <linux/vserver/cacct_int.h>
++
++#include <asm/errno.h>
++#include <asm/uaccess.h>
++
++
++int vc_sock_stat(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_sock_stat_v0 vc_data;
++	int j, field;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	field = vc_data.field;
++	if ((field < 0) || (field >= VXA_SOCK_SIZE))
++		return -EINVAL;
++
++	for (j = 0; j < 3; j++) {
++		vc_data.count[j] = vx_sock_count(&vxi->cacct, field, j);
++		vc_data.total[j] = vx_sock_total(&vxi->cacct, field, j);
++	}
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/cacct_init.h linux-3.3.8-vs2.3.3.4/kernel/vserver/cacct_init.h
+--- linux-3.3.8/kernel/vserver/cacct_init.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/cacct_init.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,25 @@
++
++
++static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
++{
++	int i, j;
++
++
++	for (i = 0; i < VXA_SOCK_SIZE; i++) {
++		for (j = 0; j < 3; j++) {
++			atomic_long_set(&cacct->sock[i][j].count, 0);
++			atomic_long_set(&cacct->sock[i][j].total, 0);
++		}
++	}
++	for (i = 0; i < 8; i++)
++		atomic_set(&cacct->slab[i], 0);
++	for (i = 0; i < 5; i++)
++		for (j = 0; j < 4; j++)
++			atomic_set(&cacct->page[i][j], 0);
++}
++
++static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
++{
++	return;
++}
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/cacct_proc.h linux-3.3.8-vs2.3.3.4/kernel/vserver/cacct_proc.h
+--- linux-3.3.8/kernel/vserver/cacct_proc.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/cacct_proc.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,53 @@
++#ifndef _VX_CACCT_PROC_H
++#define _VX_CACCT_PROC_H
++
++#include <linux/vserver/cacct_int.h>
++
++
++#define VX_SOCKA_TOP	\
++	"Type\t    recv #/bytes\t\t   send #/bytes\t\t    fail #/bytes\n"
++
++static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
++{
++	int i, j, length = 0;
++	static char *type[VXA_SOCK_SIZE] = {
++		"UNSPEC", "UNIX", "INET", "INET6", "PACKET", "OTHER"
++	};
++
++	length += sprintf(buffer + length, VX_SOCKA_TOP);
++	for (i = 0; i < VXA_SOCK_SIZE; i++) {
++		length += sprintf(buffer + length, "%s:", type[i]);
++		for (j = 0; j < 3; j++) {
++			length += sprintf(buffer + length,
++				"\t%10lu/%-10lu",
++				vx_sock_count(cacct, i, j),
++				vx_sock_total(cacct, i, j));
++		}
++		buffer[length++] = '\n';
++	}
++
++	length += sprintf(buffer + length, "\n");
++	length += sprintf(buffer + length,
++		"slab:\t %8u %8u %8u %8u\n",
++		atomic_read(&cacct->slab[1]),
++		atomic_read(&cacct->slab[4]),
++		atomic_read(&cacct->slab[0]),
++		atomic_read(&cacct->slab[2]));
++
++	length += sprintf(buffer + length, "\n");
++	for (i = 0; i < 5; i++) {
++		length += sprintf(buffer + length,
++			"page[%d]: %8u %8u %8u %8u\t %8u %8u %8u %8u\n", i,
++			atomic_read(&cacct->page[i][0]),
++			atomic_read(&cacct->page[i][1]),
++			atomic_read(&cacct->page[i][2]),
++			atomic_read(&cacct->page[i][3]),
++			atomic_read(&cacct->page[i][4]),
++			atomic_read(&cacct->page[i][5]),
++			atomic_read(&cacct->page[i][6]),
++			atomic_read(&cacct->page[i][7]));
++	}
++	return length;
++}
++
++#endif	/* _VX_CACCT_PROC_H */
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/context.c linux-3.3.8-vs2.3.3.4/kernel/vserver/context.c
+--- linux-3.3.8/kernel/vserver/context.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/context.c	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,1107 @@
++/*
++ *  linux/kernel/vserver/context.c
++ *
++ *  Virtual Server: Context Support
++ *
++ *  Copyright (C) 2003-2011  Herbert P�tzl
++ *
++ *  V0.01  context helper
++ *  V0.02  vx_ctx_kill syscall command
++ *  V0.03  replaced context_info calls
++ *  V0.04  redesign of struct (de)alloc
++ *  V0.05  rlimit basic implementation
++ *  V0.06  task_xid and info commands
++ *  V0.07  context flags and caps
++ *  V0.08  switch to RCU based hash
++ *  V0.09  revert to non RCU for now
++ *  V0.10  and back to working RCU hash
++ *  V0.11  and back to locking again
++ *  V0.12  referenced context store
++ *  V0.13  separate per cpu data
++ *  V0.14  changed vcmds to vxi arg
++ *  V0.15  added context stat
++ *  V0.16  have __create claim() the vxi
++ *  V0.17  removed older and legacy stuff
++ *  V0.18  added user credentials
++ *  V0.19  added warn mask
++ *
++ */
++
++#include <linux/slab.h>
++#include <linux/types.h>
++#include <linux/security.h>
++#include <linux/pid_namespace.h>
++#include <linux/capability.h>
++
++#include <linux/vserver/context.h>
++#include <linux/vserver/network.h>
++#include <linux/vserver/debug.h>
++#include <linux/vserver/limit.h>
++#include <linux/vserver/limit_int.h>
++#include <linux/vserver/space.h>
++#include <linux/init_task.h>
++#include <linux/fs_struct.h>
++#include <linux/cred.h>
++
++#include <linux/vs_context.h>
++#include <linux/vs_limit.h>
++#include <linux/vs_pid.h>
++#include <linux/vserver/context_cmd.h>
++
++#include "cvirt_init.h"
++#include "cacct_init.h"
++#include "limit_init.h"
++#include "sched_init.h"
++
++
++atomic_t vx_global_ctotal	= ATOMIC_INIT(0);
++atomic_t vx_global_cactive	= ATOMIC_INIT(0);
++
++
++/*	now inactive context structures */
++
++static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
++
++static DEFINE_SPINLOCK(vx_info_inactive_lock);
++
++
++/*	__alloc_vx_info()
++
++	* allocate an initialized vx_info struct
++	* doesn't make it visible (hash)			*/
++
++static struct vx_info *__alloc_vx_info(xid_t xid)
++{
++	struct vx_info *new = NULL;
++	int cpu, index;
++
++	vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
++
++	/* would this benefit from a slab cache? */
++	new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
++	if (!new)
++		return 0;
++
++	memset(new, 0, sizeof(struct vx_info));
++#ifdef CONFIG_SMP
++	new->ptr_pc = alloc_percpu(struct _vx_info_pc);
++	if (!new->ptr_pc)
++		goto error;
++#endif
++	new->vx_id = xid;
++	INIT_HLIST_NODE(&new->vx_hlist);
++	atomic_set(&new->vx_usecnt, 0);
++	atomic_set(&new->vx_tasks, 0);
++	new->vx_parent = NULL;
++	new->vx_state = 0;
++	init_waitqueue_head(&new->vx_wait);
++
++	/* prepare reaper */
++	get_task_struct(init_pid_ns.child_reaper);
++	new->vx_reaper = init_pid_ns.child_reaper;
++	new->vx_badness_bias = 0;
++
++	/* rest of init goes here */
++	vx_info_init_limit(&new->limit);
++	vx_info_init_sched(&new->sched);
++	vx_info_init_cvirt(&new->cvirt);
++	vx_info_init_cacct(&new->cacct);
++
++	/* per cpu data structures */
++	for_each_possible_cpu(cpu) {
++		vx_info_init_sched_pc(
++			&vx_per_cpu(new, sched_pc, cpu), cpu);
++		vx_info_init_cvirt_pc(
++			&vx_per_cpu(new, cvirt_pc, cpu), cpu);
++	}
++
++	new->vx_flags = VXF_INIT_SET;
++	new->vx_bcaps = CAP_FULL_SET;	// maybe ~CAP_SETPCAP
++	new->vx_ccaps = 0;
++	new->vx_umask = 0;
++	new->vx_wmask = 0;
++
++	new->reboot_cmd = 0;
++	new->exit_code = 0;
++
++	// preconfig spaces
++	for (index = 0; index < VX_SPACES; index++) {
++		struct _vx_space *space = &new->space[index];
++
++		// filesystem
++		spin_lock(&init_fs.lock);
++		init_fs.users++;
++		spin_unlock(&init_fs.lock);
++		space->vx_fs = &init_fs;
++
++		/* FIXME: do we want defaults? */
++		// space->vx_real_cred = 0;
++		// space->vx_cred = 0;
++	}
++
++
++	vxdprintk(VXD_CBIT(xid, 0),
++		"alloc_vx_info(%d) = %p", xid, new);
++	vxh_alloc_vx_info(new);
++	atomic_inc(&vx_global_ctotal);
++	return new;
++#ifdef CONFIG_SMP
++error:
++	kfree(new);
++	return 0;
++#endif
++}
++
++/*	__dealloc_vx_info()
++
++	* final disposal of vx_info				*/
++
++static void __dealloc_vx_info(struct vx_info *vxi)
++{
++#ifdef	CONFIG_VSERVER_WARN
++	struct vx_info_save vxis;
++	int cpu;
++#endif
++	vxdprintk(VXD_CBIT(xid, 0),
++		"dealloc_vx_info(%p)", vxi);
++	vxh_dealloc_vx_info(vxi);
++
++#ifdef	CONFIG_VSERVER_WARN
++	enter_vx_info(vxi, &vxis);
++	vx_info_exit_limit(&vxi->limit);
++	vx_info_exit_sched(&vxi->sched);
++	vx_info_exit_cvirt(&vxi->cvirt);
++	vx_info_exit_cacct(&vxi->cacct);
++
++	for_each_possible_cpu(cpu) {
++		vx_info_exit_sched_pc(
++			&vx_per_cpu(vxi, sched_pc, cpu), cpu);
++		vx_info_exit_cvirt_pc(
++			&vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
++	}
++	leave_vx_info(&vxis);
++#endif
++
++	vxi->vx_id = -1;
++	vxi->vx_state |= VXS_RELEASED;
++
++#ifdef CONFIG_SMP
++	free_percpu(vxi->ptr_pc);
++#endif
++	kfree(vxi);
++	atomic_dec(&vx_global_ctotal);
++}
++
++static void __shutdown_vx_info(struct vx_info *vxi)
++{
++	struct nsproxy *nsproxy;
++	struct fs_struct *fs;
++	struct cred *cred;
++	int index, kill;
++
++	might_sleep();
++
++	vxi->vx_state |= VXS_SHUTDOWN;
++	vs_state_change(vxi, VSC_SHUTDOWN);
++
++	for (index = 0; index < VX_SPACES; index++) {
++		struct _vx_space *space = &vxi->space[index];
++
++		nsproxy = xchg(&space->vx_nsproxy, NULL);
++		if (nsproxy)
++			put_nsproxy(nsproxy);
++
++		fs = xchg(&space->vx_fs, NULL);
++		spin_lock(&fs->lock);
++		kill = !--fs->users;
++		spin_unlock(&fs->lock);
++		if (kill)
++			free_fs_struct(fs);
++
++		cred = (struct cred *)xchg(&space->vx_cred, NULL);
++		if (cred)
++			abort_creds(cred);
++	}
++}
++
++/* exported stuff */
++
++void free_vx_info(struct vx_info *vxi)
++{
++	unsigned long flags;
++	unsigned index;
++
++	/* check for reference counts first */
++	BUG_ON(atomic_read(&vxi->vx_usecnt));
++	BUG_ON(atomic_read(&vxi->vx_tasks));
++
++	/* context must not be hashed */
++	BUG_ON(vx_info_state(vxi, VXS_HASHED));
++
++	/* context shutdown is mandatory */
++	BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
++
++	/* spaces check */
++	for (index = 0; index < VX_SPACES; index++) {
++		struct _vx_space *space = &vxi->space[index];
++
++		BUG_ON(space->vx_nsproxy);
++		BUG_ON(space->vx_fs);
++		// BUG_ON(space->vx_real_cred);
++		// BUG_ON(space->vx_cred);
++	}
++
++	spin_lock_irqsave(&vx_info_inactive_lock, flags);
++	hlist_del(&vxi->vx_hlist);
++	spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
++
++	__dealloc_vx_info(vxi);
++}
++
++
++/*	hash table for vx_info hash */
++
++#define VX_HASH_SIZE	13
++
++static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
++	{ [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
++
++static DEFINE_SPINLOCK(vx_info_hash_lock);
++
++
++static inline unsigned int __hashval(xid_t xid)
++{
++	return (xid % VX_HASH_SIZE);
++}
++
++
++
++/*	__hash_vx_info()
++
++	* add the vxi to the global hash table
++	* requires the hash_lock to be held			*/
++
++static inline void __hash_vx_info(struct vx_info *vxi)
++{
++	struct hlist_head *head;
++
++	vxd_assert_lock(&vx_info_hash_lock);
++	vxdprintk(VXD_CBIT(xid, 4),
++		"__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
++	vxh_hash_vx_info(vxi);
++
++	/* context must not be hashed */
++	BUG_ON(vx_info_state(vxi, VXS_HASHED));
++
++	vxi->vx_state |= VXS_HASHED;
++	head = &vx_info_hash[__hashval(vxi->vx_id)];
++	hlist_add_head(&vxi->vx_hlist, head);
++	atomic_inc(&vx_global_cactive);
++}
++
++/*	__unhash_vx_info()
++
++	* remove the vxi from the global hash table
++	* requires the hash_lock to be held			*/
++
++static inline void __unhash_vx_info(struct vx_info *vxi)
++{
++	unsigned long flags;
++
++	vxd_assert_lock(&vx_info_hash_lock);
++	vxdprintk(VXD_CBIT(xid, 4),
++		"__unhash_vx_info: %p[#%d.%d.%d]", vxi, vxi->vx_id,
++		atomic_read(&vxi->vx_usecnt), atomic_read(&vxi->vx_tasks));
++	vxh_unhash_vx_info(vxi);
++
++	/* context must be hashed */
++	BUG_ON(!vx_info_state(vxi, VXS_HASHED));
++	/* but without tasks */
++	BUG_ON(atomic_read(&vxi->vx_tasks));
++
++	vxi->vx_state &= ~VXS_HASHED;
++	hlist_del_init(&vxi->vx_hlist);
++	spin_lock_irqsave(&vx_info_inactive_lock, flags);
++	hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
++	spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
++	atomic_dec(&vx_global_cactive);
++}
++
++
++/*	__lookup_vx_info()
++
++	* requires the hash_lock to be held
++	* doesn't increment the vx_refcnt			*/
++
++static inline struct vx_info *__lookup_vx_info(xid_t xid)
++{
++	struct hlist_head *head = &vx_info_hash[__hashval(xid)];
++	struct hlist_node *pos;
++	struct vx_info *vxi;
++
++	vxd_assert_lock(&vx_info_hash_lock);
++	hlist_for_each(pos, head) {
++		vxi = hlist_entry(pos, struct vx_info, vx_hlist);
++
++		if (vxi->vx_id == xid)
++			goto found;
++	}
++	vxi = NULL;
++found:
++	vxdprintk(VXD_CBIT(xid, 0),
++		"__lookup_vx_info(#%u): %p[#%u]",
++		xid, vxi, vxi ? vxi->vx_id : 0);
++	vxh_lookup_vx_info(vxi, xid);
++	return vxi;
++}
++
++
++/*	__create_vx_info()
++
++	* create the requested context
++	* get(), claim() and hash it				*/
++
++static struct vx_info *__create_vx_info(int id)
++{
++	struct vx_info *new, *vxi = NULL;
++
++	vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
++
++	if (!(new = __alloc_vx_info(id)))
++		return ERR_PTR(-ENOMEM);
++
++	/* required to make dynamic xids unique */
++	spin_lock(&vx_info_hash_lock);
++
++	/* static context requested */
++	if ((vxi = __lookup_vx_info(id))) {
++		vxdprintk(VXD_CBIT(xid, 0),
++			"create_vx_info(%d) = %p (already there)", id, vxi);
++		if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
++			vxi = ERR_PTR(-EBUSY);
++		else
++			vxi = ERR_PTR(-EEXIST);
++		goto out_unlock;
++	}
++	/* new context */
++	vxdprintk(VXD_CBIT(xid, 0),
++		"create_vx_info(%d) = %p (new)", id, new);
++	claim_vx_info(new, NULL);
++	__hash_vx_info(get_vx_info(new));
++	vxi = new, new = NULL;
++
++out_unlock:
++	spin_unlock(&vx_info_hash_lock);
++	vxh_create_vx_info(IS_ERR(vxi) ? NULL : vxi, id);
++	if (new)
++		__dealloc_vx_info(new);
++	return vxi;
++}
++
++
++/*	exported stuff						*/
++
++
++void unhash_vx_info(struct vx_info *vxi)
++{
++	spin_lock(&vx_info_hash_lock);
++	__unhash_vx_info(vxi);
++	spin_unlock(&vx_info_hash_lock);
++	__shutdown_vx_info(vxi);
++	__wakeup_vx_info(vxi);
++}
++
++
++/*	lookup_vx_info()
++
++	* search for a vx_info and get() it
++	* negative id means current				*/
++
++struct vx_info *lookup_vx_info(int id)
++{
++	struct vx_info *vxi = NULL;
++
++	if (id < 0) {
++		vxi = get_vx_info(current_vx_info());
++	} else if (id > 1) {
++		spin_lock(&vx_info_hash_lock);
++		vxi = get_vx_info(__lookup_vx_info(id));
++		spin_unlock(&vx_info_hash_lock);
++	}
++	return vxi;
++}
++
++/*	xid_is_hashed()
++
++	* verify that xid is still hashed			*/
++
++int xid_is_hashed(xid_t xid)
++{
++	int hashed;
++
++	spin_lock(&vx_info_hash_lock);
++	hashed = (__lookup_vx_info(xid) != NULL);
++	spin_unlock(&vx_info_hash_lock);
++	return hashed;
++}
++
++#ifdef	CONFIG_PROC_FS
++
++/*	get_xid_list()
++
++	* get a subset of hashed xids for proc
++	* assumes size is at least one				*/
++
++int get_xid_list(int index, unsigned int *xids, int size)
++{
++	int hindex, nr_xids = 0;
++
++	/* only show current and children */
++	if (!vx_check(0, VS_ADMIN | VS_WATCH)) {
++		if (index > 0)
++			return 0;
++		xids[nr_xids] = vx_current_xid();
++		return 1;
++	}
++
++	for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
++		struct hlist_head *head = &vx_info_hash[hindex];
++		struct hlist_node *pos;
++
++		spin_lock(&vx_info_hash_lock);
++		hlist_for_each(pos, head) {
++			struct vx_info *vxi;
++
++			if (--index > 0)
++				continue;
++
++			vxi = hlist_entry(pos, struct vx_info, vx_hlist);
++			xids[nr_xids] = vxi->vx_id;
++			if (++nr_xids >= size) {
++				spin_unlock(&vx_info_hash_lock);
++				goto out;
++			}
++		}
++		/* keep the lock time short */
++		spin_unlock(&vx_info_hash_lock);
++	}
++out:
++	return nr_xids;
++}
++#endif
++
++#ifdef	CONFIG_VSERVER_DEBUG
++
++void	dump_vx_info_inactive(int level)
++{
++	struct hlist_node *entry, *next;
++
++	hlist_for_each_safe(entry, next, &vx_info_inactive) {
++		struct vx_info *vxi =
++			list_entry(entry, struct vx_info, vx_hlist);
++
++		dump_vx_info(vxi, level);
++	}
++}
++
++#endif
++
++#if 0
++int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
++{
++	struct user_struct *new_user, *old_user;
++
++	if (!p || !vxi)
++		BUG();
++
++	if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
++		return -EACCES;
++
++	new_user = alloc_uid(vxi->vx_id, p->uid);
++	if (!new_user)
++		return -ENOMEM;
++
++	old_user = p->user;
++	if (new_user != old_user) {
++		atomic_inc(&new_user->processes);
++		atomic_dec(&old_user->processes);
++		p->user = new_user;
++	}
++	free_uid(old_user);
++	return 0;
++}
++#endif
++
++#if 0
++void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
++{
++	// p->cap_effective &= vxi->vx_cap_bset;
++	p->cap_effective =
++		cap_intersect(p->cap_effective, vxi->cap_bset);
++	// p->cap_inheritable &= vxi->vx_cap_bset;
++	p->cap_inheritable =
++		cap_intersect(p->cap_inheritable, vxi->cap_bset);
++	// p->cap_permitted &= vxi->vx_cap_bset;
++	p->cap_permitted =
++		cap_intersect(p->cap_permitted, vxi->cap_bset);
++}
++#endif
++
++
++#include <linux/file.h>
++#include <linux/fdtable.h>
++
++static int vx_openfd_task(struct task_struct *tsk)
++{
++	struct files_struct *files = tsk->files;
++	struct fdtable *fdt;
++	const unsigned long *bptr;
++	int count, total;
++
++	/* no rcu_read_lock() because of spin_lock() */
++	spin_lock(&files->file_lock);
++	fdt = files_fdtable(files);
++	bptr = fdt->open_fds->fds_bits;
++	count = fdt->max_fds / (sizeof(unsigned long) * 8);
++	for (total = 0; count > 0; count--) {
++		if (*bptr)
++			total += hweight_long(*bptr);
++		bptr++;
++	}
++	spin_unlock(&files->file_lock);
++	return total;
++}
++
++
++/*	for *space compatibility */
++
++asmlinkage long sys_unshare(unsigned long);
++
++/*
++ *	migrate task to new context
++ *	gets vxi, puts old_vxi on change
++ *	optionally unshares namespaces (hack)
++ */
++
++int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare)
++{
++	struct vx_info *old_vxi;
++	int ret = 0;
++
++	if (!p || !vxi)
++		BUG();
++
++	vxdprintk(VXD_CBIT(xid, 5),
++		"vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
++		vxi->vx_id, atomic_read(&vxi->vx_usecnt));
++
++	if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) &&
++		!vx_info_flags(vxi, VXF_STATE_SETUP, 0))
++		return -EACCES;
++
++	if (vx_info_state(vxi, VXS_SHUTDOWN))
++		return -EFAULT;
++
++	old_vxi = task_get_vx_info(p);
++	if (old_vxi == vxi)
++		goto out;
++
++//	if (!(ret = vx_migrate_user(p, vxi))) {
++	{
++		int openfd;
++
++		task_lock(p);
++		openfd = vx_openfd_task(p);
++
++		if (old_vxi) {
++			atomic_dec(&old_vxi->cvirt.nr_threads);
++			atomic_dec(&old_vxi->cvirt.nr_running);
++			__rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
++			/* FIXME: what about the struct files here? */
++			__rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
++			/* account for the executable */
++			__rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
++		}
++		atomic_inc(&vxi->cvirt.nr_threads);
++		atomic_inc(&vxi->cvirt.nr_running);
++		__rlim_inc(&vxi->limit, RLIMIT_NPROC);
++		/* FIXME: what about the struct files here? */
++		__rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
++		/* account for the executable */
++		__rlim_inc(&vxi->limit, VLIMIT_DENTRY);
++
++		if (old_vxi) {
++			release_vx_info(old_vxi, p);
++			clr_vx_info(&p->vx_info);
++		}
++		claim_vx_info(vxi, p);
++		set_vx_info(&p->vx_info, vxi);
++		p->xid = vxi->vx_id;
++
++		vxdprintk(VXD_CBIT(xid, 5),
++			"moved task %p into vxi:%p[#%d]",
++			p, vxi, vxi->vx_id);
++
++		// vx_mask_cap_bset(vxi, p);
++		task_unlock(p);
++
++		/* hack for *spaces to provide compatibility */
++		if (unshare) {
++			struct nsproxy *old_nsp, *new_nsp;
++
++			ret = unshare_nsproxy_namespaces(
++				CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER,
++				&new_nsp, NULL);
++			if (ret)
++				goto out;
++
++			old_nsp = xchg(&p->nsproxy, new_nsp);
++			vx_set_space(vxi,
++				CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER, 0);
++			put_nsproxy(old_nsp);
++		}
++	}
++out:
++	put_vx_info(old_vxi);
++	return ret;
++}
++
++int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
++{
++	struct task_struct *old_reaper;
++
++	if (!vxi)
++		return -EINVAL;
++
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_set_reaper(%p[#%d],%p[#%d,%d])",
++		vxi, vxi->vx_id, p, p->xid, p->pid);
++
++	old_reaper = vxi->vx_reaper;
++	if (old_reaper == p)
++		return 0;
++
++	/* set new child reaper */
++	get_task_struct(p);
++	vxi->vx_reaper = p;
++	put_task_struct(old_reaper);
++	return 0;
++}
++
++int vx_set_init(struct vx_info *vxi, struct task_struct *p)
++{
++	if (!vxi)
++		return -EINVAL;
++
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_set_init(%p[#%d],%p[#%d,%d,%d])",
++		vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
++
++	vxi->vx_flags &= ~VXF_STATE_INIT;
++	// vxi->vx_initpid = p->tgid;
++	vxi->vx_initpid = p->pid;
++	return 0;
++}
++
++void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
++{
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
++		vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
++
++	vxi->exit_code = code;
++	vxi->vx_initpid = 0;
++}
++
++
++void vx_set_persistent(struct vx_info *vxi)
++{
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
++
++	get_vx_info(vxi);
++	claim_vx_info(vxi, NULL);
++}
++
++void vx_clear_persistent(struct vx_info *vxi)
++{
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
++
++	release_vx_info(vxi, NULL);
++	put_vx_info(vxi);
++}
++
++void vx_update_persistent(struct vx_info *vxi)
++{
++	if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
++		vx_set_persistent(vxi);
++	else
++		vx_clear_persistent(vxi);
++}
++
++
++/*	task must be current or locked		*/
++
++void	exit_vx_info(struct task_struct *p, int code)
++{
++	struct vx_info *vxi = p->vx_info;
++
++	if (vxi) {
++		atomic_dec(&vxi->cvirt.nr_threads);
++		vx_nproc_dec(p);
++
++		vxi->exit_code = code;
++		release_vx_info(vxi, p);
++	}
++}
++
++void	exit_vx_info_early(struct task_struct *p, int code)
++{
++	struct vx_info *vxi = p->vx_info;
++
++	if (vxi) {
++		if (vxi->vx_initpid == p->pid)
++			vx_exit_init(vxi, p, code);
++		if (vxi->vx_reaper == p)
++			vx_set_reaper(vxi, init_pid_ns.child_reaper);
++	}
++}
++
++
++/* vserver syscall commands below here */
++
++/* taks xid and vx_info functions */
++
++#include <asm/uaccess.h>
++
++
++int vc_task_xid(uint32_t id)
++{
++	xid_t xid;
++
++	if (id) {
++		struct task_struct *tsk;
++
++		rcu_read_lock();
++		tsk = find_task_by_real_pid(id);
++		xid = (tsk) ? tsk->xid : -ESRCH;
++		rcu_read_unlock();
++	} else
++		xid = vx_current_xid();
++	return xid;
++}
++
++
++int vc_vx_info(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_vx_info_v0 vc_data;
++
++	vc_data.xid = vxi->vx_id;
++	vc_data.initpid = vxi->vx_initpid;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++
++int vc_ctx_stat(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_stat_v0 vc_data;
++
++	vc_data.usecnt = atomic_read(&vxi->vx_usecnt);
++	vc_data.tasks = atomic_read(&vxi->vx_tasks);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++
++/* context functions */
++
++int vc_ctx_create(uint32_t xid, void __user *data)
++{
++	struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
++	struct vx_info *new_vxi;
++	int ret;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	if ((xid > MAX_S_CONTEXT) || (xid < 2))
++		return -EINVAL;
++
++	new_vxi = __create_vx_info(xid);
++	if (IS_ERR(new_vxi))
++		return PTR_ERR(new_vxi);
++
++	/* initial flags */
++	new_vxi->vx_flags = vc_data.flagword;
++
++	ret = -ENOEXEC;
++	if (vs_state_change(new_vxi, VSC_STARTUP))
++		goto out;
++
++	ret = vx_migrate_task(current, new_vxi, (!data));
++	if (ret)
++		goto out;
++
++	/* return context id on success */
++	ret = new_vxi->vx_id;
++
++	/* get a reference for persistent contexts */
++	if ((vc_data.flagword & VXF_PERSISTENT))
++		vx_set_persistent(new_vxi);
++out:
++	release_vx_info(new_vxi, NULL);
++	put_vx_info(new_vxi);
++	return ret;
++}
++
++
++int vc_ctx_migrate(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
++	int ret;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = vx_migrate_task(current, vxi, 0);
++	if (ret)
++		return ret;
++	if (vc_data.flagword & VXM_SET_INIT)
++		ret = vx_set_init(vxi, current);
++	if (ret)
++		return ret;
++	if (vc_data.flagword & VXM_SET_REAPER)
++		ret = vx_set_reaper(vxi, current);
++	return ret;
++}
++
++
++int vc_get_cflags(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_flags_v0 vc_data;
++
++	vc_data.flagword = vxi->vx_flags;
++
++	/* special STATE flag handling */
++	vc_data.mask = vs_mask_flags(~0ULL, vxi->vx_flags, VXF_ONE_TIME);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_cflags(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_flags_v0 vc_data;
++	uint64_t mask, trigger;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	/* special STATE flag handling */
++	mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
++	trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
++
++	if (vxi == current_vx_info()) {
++		/* if (trigger & VXF_STATE_SETUP)
++			vx_mask_cap_bset(vxi, current); */
++		if (trigger & VXF_STATE_INIT) {
++			int ret;
++
++			ret = vx_set_init(vxi, current);
++			if (ret)
++				return ret;
++			ret = vx_set_reaper(vxi, current);
++			if (ret)
++				return ret;
++		}
++	}
++
++	vxi->vx_flags = vs_mask_flags(vxi->vx_flags,
++		vc_data.flagword, mask);
++	if (trigger & VXF_PERSISTENT)
++		vx_update_persistent(vxi);
++
++	return 0;
++}
++
++
++static inline uint64_t caps_from_cap_t(kernel_cap_t c)
++{
++	uint64_t v = c.cap[0] | ((uint64_t)c.cap[1] << 32);
++
++	// printk("caps_from_cap_t(%08x:%08x) = %016llx\n", c.cap[1], c.cap[0], v);
++	return v;
++}
++
++static inline kernel_cap_t cap_t_from_caps(uint64_t v)
++{
++	kernel_cap_t c = __cap_empty_set;
++
++	c.cap[0] = v & 0xFFFFFFFF;
++	c.cap[1] = (v >> 32) & 0xFFFFFFFF;
++
++	// printk("cap_t_from_caps(%016llx) = %08x:%08x\n", v, c.cap[1], c.cap[0]);
++	return c;
++}
++
++
++static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps)
++{
++	if (bcaps)
++		*bcaps = caps_from_cap_t(vxi->vx_bcaps);
++	if (ccaps)
++		*ccaps = vxi->vx_ccaps;
++
++	return 0;
++}
++
++int vc_get_ccaps(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_caps_v1 vc_data;
++	int ret;
++
++	ret = do_get_caps(vxi, NULL, &vc_data.ccaps);
++	if (ret)
++		return ret;
++	vc_data.cmask = ~0ULL;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++static int do_set_caps(struct vx_info *vxi,
++	uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask)
++{
++	uint64_t bcold = caps_from_cap_t(vxi->vx_bcaps);
++
++#if 0
++	printk("do_set_caps(%16llx, %16llx, %16llx, %16llx)\n",
++		bcaps, bmask, ccaps, cmask);
++#endif
++	vxi->vx_bcaps = cap_t_from_caps(
++		vs_mask_flags(bcold, bcaps, bmask));
++	vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask);
++
++	return 0;
++}
++
++int vc_set_ccaps(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_caps_v1 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask);
++}
++
++int vc_get_bcaps(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_bcaps vc_data;
++	int ret;
++
++	ret = do_get_caps(vxi, &vc_data.bcaps, NULL);
++	if (ret)
++		return ret;
++	vc_data.bmask = ~0ULL;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_bcaps(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_bcaps vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0);
++}
++
++
++int vc_get_umask(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_umask vc_data;
++
++	vc_data.umask = vxi->vx_umask;
++	vc_data.mask = ~0ULL;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_umask(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_umask vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	vxi->vx_umask = vs_mask_flags(vxi->vx_umask,
++		vc_data.umask, vc_data.mask);
++	return 0;
++}
++
++
++int vc_get_wmask(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_wmask vc_data;
++
++	vc_data.wmask = vxi->vx_wmask;
++	vc_data.mask = ~0ULL;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_wmask(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_wmask vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	vxi->vx_wmask = vs_mask_flags(vxi->vx_wmask,
++		vc_data.wmask, vc_data.mask);
++	return 0;
++}
++
++
++int vc_get_badness(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_badness_v0 vc_data;
++
++	vc_data.bias = vxi->vx_badness_bias;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_badness(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_badness_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	vxi->vx_badness_bias = vc_data.bias;
++	return 0;
++}
++
++#include <linux/module.h>
++
++EXPORT_SYMBOL_GPL(free_vx_info);
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/cvirt.c linux-3.3.8-vs2.3.3.4/kernel/vserver/cvirt.c
+--- linux-3.3.8/kernel/vserver/cvirt.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/cvirt.c	2012-04-24 03:32:01.000000000 +0200
+@@ -0,0 +1,313 @@
++/*
++ *  linux/kernel/vserver/cvirt.c
++ *
++ *  Virtual Server: Context Virtualization
++ *
++ *  Copyright (C) 2004-2007  Herbert P�tzl
++ *
++ *  V0.01  broken out from limit.c
++ *  V0.02  added utsname stuff
++ *  V0.03  changed vcmds to vxi arg
++ *
++ */
++
++#include <linux/types.h>
++#include <linux/utsname.h>
++#include <linux/vs_cvirt.h>
++#include <linux/vserver/switch.h>
++#include <linux/vserver/cvirt_cmd.h>
++
++#include <asm/uaccess.h>
++
++
++void vx_vsi_boottime(struct timespec *boottime)
++{
++	struct vx_info *vxi = current_vx_info();
++
++	set_normalized_timespec(boottime,
++		boottime->tv_sec + vxi->cvirt.bias_uptime.tv_sec,
++		boottime->tv_nsec + vxi->cvirt.bias_uptime.tv_nsec);
++	return;
++}
++
++void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
++{
++	struct vx_info *vxi = current_vx_info();
++
++	set_normalized_timespec(uptime,
++		uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec,
++		uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec);
++	if (!idle)
++		return;
++	set_normalized_timespec(idle,
++		idle->tv_sec - vxi->cvirt.bias_idle.tv_sec,
++		idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec);
++	return;
++}
++
++uint64_t vx_idle_jiffies(void)
++{
++	return init_task.utime + init_task.stime;
++}
++
++
++
++static inline uint32_t __update_loadavg(uint32_t load,
++	int wsize, int delta, int n)
++{
++	unsigned long long calc, prev;
++
++	/* just set it to n */
++	if (unlikely(delta >= wsize))
++		return (n << FSHIFT);
++
++	calc = delta * n;
++	calc <<= FSHIFT;
++	prev = (wsize - delta);
++	prev *= load;
++	calc += prev;
++	do_div(calc, wsize);
++	return calc;
++}
++
++
++void vx_update_load(struct vx_info *vxi)
++{
++	uint32_t now, last, delta;
++	unsigned int nr_running, nr_uninterruptible;
++	unsigned int total;
++	unsigned long flags;
++
++	spin_lock_irqsave(&vxi->cvirt.load_lock, flags);
++
++	now = jiffies;
++	last = vxi->cvirt.load_last;
++	delta = now - last;
++
++	if (delta < 5*HZ)
++		goto out;
++
++	nr_running = atomic_read(&vxi->cvirt.nr_running);
++	nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
++	total = nr_running + nr_uninterruptible;
++
++	vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
++		60*HZ, delta, total);
++	vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
++		5*60*HZ, delta, total);
++	vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
++		15*60*HZ, delta, total);
++
++	vxi->cvirt.load_last = now;
++out:
++	atomic_inc(&vxi->cvirt.load_updates);
++	spin_unlock_irqrestore(&vxi->cvirt.load_lock, flags);
++}
++
++
++/*
++ * Commands to do_syslog:
++ *
++ *      0 -- Close the log.  Currently a NOP.
++ *      1 -- Open the log. Currently a NOP.
++ *      2 -- Read from the log.
++ *      3 -- Read all messages remaining in the ring buffer.
++ *      4 -- Read and clear all messages remaining in the ring buffer
++ *      5 -- Clear ring buffer.
++ *      6 -- Disable printk's to console
++ *      7 -- Enable printk's to console
++ *      8 -- Set level of messages printed to console
++ *      9 -- Return number of unread characters in the log buffer
++ *     10 -- Return size of the log buffer
++ */
++int vx_do_syslog(int type, char __user *buf, int len)
++{
++	int error = 0;
++	int do_clear = 0;
++	struct vx_info *vxi = current_vx_info();
++	struct _vx_syslog *log;
++
++	if (!vxi)
++		return -EINVAL;
++	log = &vxi->cvirt.syslog;
++
++	switch (type) {
++	case 0:		/* Close log */
++	case 1:		/* Open log */
++		break;
++	case 2:		/* Read from log */
++		error = wait_event_interruptible(log->log_wait,
++			(log->log_start - log->log_end));
++		if (error)
++			break;
++		spin_lock_irq(&log->logbuf_lock);
++		spin_unlock_irq(&log->logbuf_lock);
++		break;
++	case 4:		/* Read/clear last kernel messages */
++		do_clear = 1;
++		/* fall through */
++	case 3:		/* Read last kernel messages */
++		return 0;
++
++	case 5:		/* Clear ring buffer */
++		return 0;
++
++	case 6:		/* Disable logging to console */
++	case 7:		/* Enable logging to console */
++	case 8:		/* Set level of messages printed to console */
++		break;
++
++	case 9:		/* Number of chars in the log buffer */
++		return 0;
++	case 10:	/* Size of the log buffer */
++		return 0;
++	default:
++		error = -EINVAL;
++		break;
++	}
++	return error;
++}
++
++
++/* virtual host info names */
++
++static char *vx_vhi_name(struct vx_info *vxi, int id)
++{
++	struct nsproxy *nsproxy;
++	struct uts_namespace *uts;
++
++	if (id == VHIN_CONTEXT)
++		return vxi->vx_name;
++
++	nsproxy = vxi->space[0].vx_nsproxy;
++	if (!nsproxy)
++		return NULL;
++
++	uts = nsproxy->uts_ns;
++	if (!uts)
++		return NULL;
++
++	switch (id) {
++	case VHIN_SYSNAME:
++		return uts->name.sysname;
++	case VHIN_NODENAME:
++		return uts->name.nodename;
++	case VHIN_RELEASE:
++		return uts->name.release;
++	case VHIN_VERSION:
++		return uts->name.version;
++	case VHIN_MACHINE:
++		return uts->name.machine;
++	case VHIN_DOMAINNAME:
++		return uts->name.domainname;
++	default:
++		return NULL;
++	}
++	return NULL;
++}
++
++int vc_set_vhi_name(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_vhi_name_v0 vc_data;
++	char *name;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	name = vx_vhi_name(vxi, vc_data.field);
++	if (!name)
++		return -EINVAL;
++
++	memcpy(name, vc_data.name, 65);
++	return 0;
++}
++
++int vc_get_vhi_name(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_vhi_name_v0 vc_data;
++	char *name;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	name = vx_vhi_name(vxi, vc_data.field);
++	if (!name)
++		return -EINVAL;
++
++	memcpy(vc_data.name, name, 65);
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++
++int vc_virt_stat(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_virt_stat_v0 vc_data;
++	struct _vx_cvirt *cvirt = &vxi->cvirt;
++	struct timespec uptime;
++
++	do_posix_clock_monotonic_gettime(&uptime);
++	set_normalized_timespec(&uptime,
++		uptime.tv_sec - cvirt->bias_uptime.tv_sec,
++		uptime.tv_nsec - cvirt->bias_uptime.tv_nsec);
++
++	vc_data.offset = timespec_to_ns(&cvirt->bias_ts);
++	vc_data.uptime = timespec_to_ns(&uptime);
++	vc_data.nr_threads = atomic_read(&cvirt->nr_threads);
++	vc_data.nr_running = atomic_read(&cvirt->nr_running);
++	vc_data.nr_uninterruptible = atomic_read(&cvirt->nr_uninterruptible);
++	vc_data.nr_onhold = atomic_read(&cvirt->nr_onhold);
++	vc_data.nr_forks = atomic_read(&cvirt->total_forks);
++	vc_data.load[0] = cvirt->load[0];
++	vc_data.load[1] = cvirt->load[1];
++	vc_data.load[2] = cvirt->load[2];
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++
++#ifdef CONFIG_VSERVER_VTIME
++
++/* virtualized time base */
++
++void vx_adjust_timespec(struct timespec *ts)
++{
++	struct vx_info *vxi;
++
++	if (!vx_flags(VXF_VIRT_TIME, 0))
++		return;
++
++	vxi = current_vx_info();
++	ts->tv_sec += vxi->cvirt.bias_ts.tv_sec;
++	ts->tv_nsec += vxi->cvirt.bias_ts.tv_nsec;
++
++	if (ts->tv_nsec >= NSEC_PER_SEC) {
++		ts->tv_sec++;
++		ts->tv_nsec -= NSEC_PER_SEC;
++	} else if (ts->tv_nsec < 0) {
++		ts->tv_sec--;
++		ts->tv_nsec += NSEC_PER_SEC;
++	}
++}
++
++int vx_settimeofday(const struct timespec *ts)
++{
++	struct timespec ats, delta;
++	struct vx_info *vxi;
++
++	if (!vx_flags(VXF_VIRT_TIME, 0))
++		return do_settimeofday(ts);
++
++	getnstimeofday(&ats);
++	delta = timespec_sub(*ts, ats);
++
++	vxi = current_vx_info();
++	vxi->cvirt.bias_ts = timespec_add(vxi->cvirt.bias_ts, delta);
++	return 0;
++}
++
++#endif
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/cvirt_init.h linux-3.3.8-vs2.3.3.4/kernel/vserver/cvirt_init.h
+--- linux-3.3.8/kernel/vserver/cvirt_init.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/cvirt_init.h	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,70 @@
++
++
++extern uint64_t vx_idle_jiffies(void);
++
++static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
++{
++	uint64_t idle_jiffies = vx_idle_jiffies();
++	uint64_t nsuptime;
++
++	do_posix_clock_monotonic_gettime(&cvirt->bias_uptime);
++	nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
++		* NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
++	cvirt->bias_clock = nsec_to_clock_t(nsuptime);
++	cvirt->bias_ts.tv_sec = 0;
++	cvirt->bias_ts.tv_nsec = 0;
++
++	jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
++	atomic_set(&cvirt->nr_threads, 0);
++	atomic_set(&cvirt->nr_running, 0);
++	atomic_set(&cvirt->nr_uninterruptible, 0);
++	atomic_set(&cvirt->nr_onhold, 0);
++
++	spin_lock_init(&cvirt->load_lock);
++	cvirt->load_last = jiffies;
++	atomic_set(&cvirt->load_updates, 0);
++	cvirt->load[0] = 0;
++	cvirt->load[1] = 0;
++	cvirt->load[2] = 0;
++	atomic_set(&cvirt->total_forks, 0);
++
++	spin_lock_init(&cvirt->syslog.logbuf_lock);
++	init_waitqueue_head(&cvirt->syslog.log_wait);
++	cvirt->syslog.log_start = 0;
++	cvirt->syslog.log_end = 0;
++	cvirt->syslog.con_start = 0;
++	cvirt->syslog.logged_chars = 0;
++}
++
++static inline
++void vx_info_init_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
++{
++	// cvirt_pc->cpustat = { 0 };
++}
++
++static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
++{
++#ifdef	CONFIG_VSERVER_WARN
++	int value;
++#endif
++	vxwprintk_xid((value = atomic_read(&cvirt->nr_threads)),
++		"!!! cvirt: %p[nr_threads] = %d on exit.",
++		cvirt, value);
++	vxwprintk_xid((value = atomic_read(&cvirt->nr_running)),
++		"!!! cvirt: %p[nr_running] = %d on exit.",
++		cvirt, value);
++	vxwprintk_xid((value = atomic_read(&cvirt->nr_uninterruptible)),
++		"!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
++		cvirt, value);
++	vxwprintk_xid((value = atomic_read(&cvirt->nr_onhold)),
++		"!!! cvirt: %p[nr_onhold] = %d on exit.",
++		cvirt, value);
++	return;
++}
++
++static inline
++void vx_info_exit_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
++{
++	return;
++}
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/cvirt_proc.h linux-3.3.8-vs2.3.3.4/kernel/vserver/cvirt_proc.h
+--- linux-3.3.8/kernel/vserver/cvirt_proc.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/cvirt_proc.h	2012-04-03 16:07:39.000000000 +0200
+@@ -0,0 +1,123 @@
++#ifndef _VX_CVIRT_PROC_H
++#define _VX_CVIRT_PROC_H
++
++#include <linux/nsproxy.h>
++#include <linux/mnt_namespace.h>
++#include <linux/ipc_namespace.h>
++#include <linux/utsname.h>
++#include <linux/ipc.h>
++
++extern int vx_info_mnt_namespace(struct mnt_namespace *, char *);
++
++static inline
++int vx_info_proc_nsproxy(struct nsproxy *nsproxy, char *buffer)
++{
++	struct mnt_namespace *ns;
++	struct uts_namespace *uts;
++	struct ipc_namespace *ipc;
++	int length = 0;
++
++	if (!nsproxy)
++		goto out;
++
++	length += sprintf(buffer + length,
++		"NSProxy:\t%p [%p,%p,%p]\n",
++		nsproxy, nsproxy->mnt_ns,
++		nsproxy->uts_ns, nsproxy->ipc_ns);
++
++	ns = nsproxy->mnt_ns;
++	if (!ns)
++		goto skip_ns;
++
++	length += vx_info_mnt_namespace(ns, buffer + length);
++
++skip_ns:
++
++	uts = nsproxy->uts_ns;
++	if (!uts)
++		goto skip_uts;
++
++	length += sprintf(buffer + length,
++		"SysName:\t%.*s\n"
++		"NodeName:\t%.*s\n"
++		"Release:\t%.*s\n"
++		"Version:\t%.*s\n"
++		"Machine:\t%.*s\n"
++		"DomainName:\t%.*s\n",
++		__NEW_UTS_LEN, uts->name.sysname,
++		__NEW_UTS_LEN, uts->name.nodename,
++		__NEW_UTS_LEN, uts->name.release,
++		__NEW_UTS_LEN, uts->name.version,
++		__NEW_UTS_LEN, uts->name.machine,
++		__NEW_UTS_LEN, uts->name.domainname);
++skip_uts:
++
++	ipc = nsproxy->ipc_ns;
++	if (!ipc)
++		goto skip_ipc;
++
++	length += sprintf(buffer + length,
++		"SEMS:\t\t%d %d %d %d  %d\n"
++		"MSG:\t\t%d %d %d\n"
++		"SHM:\t\t%lu %lu  %d %d\n",
++		ipc->sem_ctls[0], ipc->sem_ctls[1],
++		ipc->sem_ctls[2], ipc->sem_ctls[3],
++		ipc->used_sems,
++		ipc->msg_ctlmax, ipc->msg_ctlmnb, ipc->msg_ctlmni,
++		(unsigned long)ipc->shm_ctlmax,
++		(unsigned long)ipc->shm_ctlall,
++		ipc->shm_ctlmni, ipc->shm_tot);
++skip_ipc:
++out:
++	return length;
++}
++
++
++#include <linux/sched.h>
++
++#define LOAD_INT(x) ((x) >> FSHIFT)
++#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1 - 1)) * 100)
++
++static inline
++int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
++{
++	int length = 0;
++	int a, b, c;
++
++	length += sprintf(buffer + length,
++		"BiasUptime:\t%lu.%02lu\n",
++		(unsigned long)cvirt->bias_uptime.tv_sec,
++		(cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
++
++	a = cvirt->load[0] + (FIXED_1 / 200);
++	b = cvirt->load[1] + (FIXED_1 / 200);
++	c = cvirt->load[2] + (FIXED_1 / 200);
++	length += sprintf(buffer + length,
++		"nr_threads:\t%d\n"
++		"nr_running:\t%d\n"
++		"nr_unintr:\t%d\n"
++		"nr_onhold:\t%d\n"
++		"load_updates:\t%d\n"
++		"loadavg:\t%d.%02d %d.%02d %d.%02d\n"
++		"total_forks:\t%d\n",
++		atomic_read(&cvirt->nr_threads),
++		atomic_read(&cvirt->nr_running),
++		atomic_read(&cvirt->nr_uninterruptible),
++		atomic_read(&cvirt->nr_onhold),
++		atomic_read(&cvirt->load_updates),
++		LOAD_INT(a), LOAD_FRAC(a),
++		LOAD_INT(b), LOAD_FRAC(b),
++		LOAD_INT(c), LOAD_FRAC(c),
++		atomic_read(&cvirt->total_forks));
++	return length;
++}
++
++static inline
++int vx_info_proc_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc,
++	char *buffer, int cpu)
++{
++	int length = 0;
++	return length;
++}
++
++#endif	/* _VX_CVIRT_PROC_H */
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/debug.c linux-3.3.8-vs2.3.3.4/kernel/vserver/debug.c
+--- linux-3.3.8/kernel/vserver/debug.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/debug.c	2012-02-24 03:55:06.000000000 +0100
+@@ -0,0 +1,32 @@
++/*
++ *  kernel/vserver/debug.c
++ *
++ *  Copyright (C) 2005-2007 Herbert P�tzl
++ *
++ *  V0.01  vx_info dump support
++ *
++ */
++
++#include <linux/module.h>
++
++#include <linux/vserver/context.h>
++
++
++void	dump_vx_info(struct vx_info *vxi, int level)
++{
++	printk("vx_info %p[#%d, %d.%d, %4x]\n", vxi, vxi->vx_id,
++		atomic_read(&vxi->vx_usecnt),
++		atomic_read(&vxi->vx_tasks),
++		vxi->vx_state);
++	if (level > 0) {
++		__dump_vx_limit(&vxi->limit);
++		__dump_vx_sched(&vxi->sched);
++		__dump_vx_cvirt(&vxi->cvirt);
++		__dump_vx_cacct(&vxi->cacct);
++	}
++	printk("---\n");
++}
++
++
++EXPORT_SYMBOL_GPL(dump_vx_info);
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/device.c linux-3.3.8-vs2.3.3.4/kernel/vserver/device.c
+--- linux-3.3.8/kernel/vserver/device.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/device.c	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,443 @@
++/*
++ *  linux/kernel/vserver/device.c
++ *
++ *  Linux-VServer: Device Support
++ *
++ *  Copyright (C) 2006  Herbert P�tzl
++ *  Copyright (C) 2007  Daniel Hokka Zakrisson
++ *
++ *  V0.01  device mapping basics
++ *  V0.02  added defaults
++ *
++ */
++
++#include <linux/slab.h>
++#include <linux/rcupdate.h>
++#include <linux/fs.h>
++#include <linux/namei.h>
++#include <linux/hash.h>
++
++#include <asm/errno.h>
++#include <asm/uaccess.h>
++#include <linux/vserver/base.h>
++#include <linux/vserver/debug.h>
++#include <linux/vserver/context.h>
++#include <linux/vserver/device.h>
++#include <linux/vserver/device_cmd.h>
++
++
++#define DMAP_HASH_BITS	4
++
++
++struct vs_mapping {
++	union {
++		struct hlist_node hlist;
++		struct list_head list;
++	} u;
++#define dm_hlist	u.hlist
++#define dm_list		u.list
++	xid_t xid;
++	dev_t device;
++	struct vx_dmap_target target;
++};
++
++
++static struct hlist_head dmap_main_hash[1 << DMAP_HASH_BITS];
++
++static DEFINE_SPINLOCK(dmap_main_hash_lock);
++
++static struct vx_dmap_target dmap_defaults[2] = {
++	{ .flags = DATTR_OPEN },
++	{ .flags = DATTR_OPEN },
++};
++
++
++struct kmem_cache *dmap_cachep __read_mostly;
++
++int __init dmap_cache_init(void)
++{
++	dmap_cachep = kmem_cache_create("dmap_cache",
++		sizeof(struct vs_mapping), 0,
++		SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
++	return 0;
++}
++
++__initcall(dmap_cache_init);
++
++
++static inline unsigned int __hashval(dev_t dev, int bits)
++{
++	return hash_long((unsigned long)dev, bits);
++}
++
++
++/*	__hash_mapping()
++ *	add the mapping to the hash table
++ */
++static inline void __hash_mapping(struct vx_info *vxi, struct vs_mapping *vdm)
++{
++	spinlock_t *hash_lock = &dmap_main_hash_lock;
++	struct hlist_head *head, *hash = dmap_main_hash;
++	int device = vdm->device;
++
++	spin_lock(hash_lock);
++	vxdprintk(VXD_CBIT(misc, 8), "__hash_mapping: %p[#%d] %08x:%08x",
++		vxi, vxi ? vxi->vx_id : 0, device, vdm->target.target);
++
++	head = &hash[__hashval(device, DMAP_HASH_BITS)];
++	hlist_add_head(&vdm->dm_hlist, head);
++	spin_unlock(hash_lock);
++}
++
++
++static inline int __mode_to_default(umode_t mode)
++{
++	switch (mode) {
++	case S_IFBLK:
++		return 0;
++	case S_IFCHR:
++		return 1;
++	default:
++		BUG();
++	}
++}
++
++
++/*	__set_default()
++ *	set a default
++ */
++static inline void __set_default(struct vx_info *vxi, umode_t mode,
++	struct vx_dmap_target *vdmt)
++{
++	spinlock_t *hash_lock = &dmap_main_hash_lock;
++	spin_lock(hash_lock);
++
++	if (vxi)
++		vxi->dmap.targets[__mode_to_default(mode)] = *vdmt;
++	else
++		dmap_defaults[__mode_to_default(mode)] = *vdmt;
++
++
++	spin_unlock(hash_lock);
++
++	vxdprintk(VXD_CBIT(misc, 8), "__set_default: %p[#%u] %08x %04x",
++		  vxi, vxi ? vxi->vx_id : 0, vdmt->target, vdmt->flags);
++}
++
++
++/*	__remove_default()
++ *	remove a default
++ */
++static inline int __remove_default(struct vx_info *vxi, umode_t mode)
++{
++	spinlock_t *hash_lock = &dmap_main_hash_lock;
++	spin_lock(hash_lock);
++
++	if (vxi)
++		vxi->dmap.targets[__mode_to_default(mode)].flags = 0;
++	else	/* remove == reset */
++		dmap_defaults[__mode_to_default(mode)].flags = DATTR_OPEN | mode;
++
++	spin_unlock(hash_lock);
++	return 0;
++}
++
++
++/*	__find_mapping()
++ *	find a mapping in the hash table
++ *
++ *	caller must hold hash_lock
++ */
++static inline int __find_mapping(xid_t xid, dev_t device, umode_t mode,
++	struct vs_mapping **local, struct vs_mapping **global)
++{
++	struct hlist_head *hash = dmap_main_hash;
++	struct hlist_head *head = &hash[__hashval(device, DMAP_HASH_BITS)];
++	struct hlist_node *pos;
++	struct vs_mapping *vdm;
++
++	*local = NULL;
++	if (global)
++		*global = NULL;
++
++	hlist_for_each(pos, head) {
++		vdm = hlist_entry(pos, struct vs_mapping, dm_hlist);
++
++		if ((vdm->device == device) &&
++			!((vdm->target.flags ^ mode) & S_IFMT)) {
++			if (vdm->xid == xid) {
++				*local = vdm;
++				return 1;
++			} else if (global && vdm->xid == 0)
++				*global = vdm;
++		}
++	}
++
++	if (global && *global)
++		return 0;
++	else
++		return -ENOENT;
++}
++
++
++/*	__lookup_mapping()
++ *	find a mapping and store the result in target and flags
++ */
++static inline int __lookup_mapping(struct vx_info *vxi,
++	dev_t device, dev_t *target, int *flags, umode_t mode)
++{
++	spinlock_t *hash_lock = &dmap_main_hash_lock;
++	struct vs_mapping *vdm, *global;
++	struct vx_dmap_target *vdmt;
++	int ret = 0;
++	xid_t xid = vxi->vx_id;
++	int index;
++
++	spin_lock(hash_lock);
++	if (__find_mapping(xid, device, mode, &vdm, &global) > 0) {
++		ret = 1;
++		vdmt = &vdm->target;
++		goto found;
++	}
++
++	index = __mode_to_default(mode);
++	if (vxi && vxi->dmap.targets[index].flags) {
++		ret = 2;
++		vdmt = &vxi->dmap.targets[index];
++	} else if (global) {
++		ret = 3;
++		vdmt = &global->target;
++		goto found;
++	} else {
++		ret = 4;
++		vdmt = &dmap_defaults[index];
++	}
++
++found:
++	if (target && (vdmt->flags & DATTR_REMAP))
++		*target = vdmt->target;
++	else if (target)
++		*target = device;
++	if (flags)
++		*flags = vdmt->flags;
++
++	spin_unlock(hash_lock);
++
++	return ret;
++}
++
++
++/*	__remove_mapping()
++ *	remove a mapping from the hash table
++ */
++static inline int __remove_mapping(struct vx_info *vxi, dev_t device,
++	umode_t mode)
++{
++	spinlock_t *hash_lock = &dmap_main_hash_lock;
++	struct vs_mapping *vdm = NULL;
++	int ret = 0;
++
++	spin_lock(hash_lock);
++
++	ret = __find_mapping((vxi ? vxi->vx_id : 0), device, mode, &vdm,
++		NULL);
++	vxdprintk(VXD_CBIT(misc, 8), "__remove_mapping: %p[#%d] %08x %04x",
++		vxi, vxi ? vxi->vx_id : 0, device, mode);
++	if (ret < 0)
++		goto out;
++	hlist_del(&vdm->dm_hlist);
++
++out:
++	spin_unlock(hash_lock);
++	if (vdm)
++		kmem_cache_free(dmap_cachep, vdm);
++	return ret;
++}
++
++
++
++int vs_map_device(struct vx_info *vxi,
++	dev_t device, dev_t *target, umode_t mode)
++{
++	int ret, flags = DATTR_MASK;
++
++	if (!vxi) {
++		if (target)
++			*target = device;
++		goto out;
++	}
++	ret = __lookup_mapping(vxi, device, target, &flags, mode);
++	vxdprintk(VXD_CBIT(misc, 8), "vs_map_device: %08x target: %08x flags: %04x mode: %04x mapped=%d",
++		device, target ? *target : 0, flags, mode, ret);
++out:
++	return (flags & DATTR_MASK);
++}
++
++
++
++static int do_set_mapping(struct vx_info *vxi,
++	dev_t device, dev_t target, int flags, umode_t mode)
++{
++	if (device) {
++		struct vs_mapping *new;
++
++		new = kmem_cache_alloc(dmap_cachep, GFP_KERNEL);
++		if (!new)
++			return -ENOMEM;
++
++		INIT_HLIST_NODE(&new->dm_hlist);
++		new->device = device;
++		new->target.target = target;
++		new->target.flags = flags | mode;
++		new->xid = (vxi ? vxi->vx_id : 0);
++
++		vxdprintk(VXD_CBIT(misc, 8), "do_set_mapping: %08x target: %08x flags: %04x", device, target, flags);
++		__hash_mapping(vxi, new);
++	} else {
++		struct vx_dmap_target new = {
++			.target = target,
++			.flags = flags | mode,
++		};
++		__set_default(vxi, mode, &new);
++	}
++	return 0;
++}
++
++
++static int do_unset_mapping(struct vx_info *vxi,
++	dev_t device, dev_t target, int flags, umode_t mode)
++{
++	int ret = -EINVAL;
++
++	if (device) {
++		ret = __remove_mapping(vxi, device, mode);
++		if (ret < 0)
++			goto out;
++	} else {
++		ret = __remove_default(vxi, mode);
++		if (ret < 0)
++			goto out;
++	}
++
++out:
++	return ret;
++}
++
++
++static inline int __user_device(const char __user *name, dev_t *dev,
++	umode_t *mode)
++{
++	struct nameidata nd;
++	int ret;
++
++	if (!name) {
++		*dev = 0;
++		return 0;
++	}
++	ret = user_lpath(name, &nd.path);
++	if (ret)
++		return ret;
++	if (nd.path.dentry->d_inode) {
++		*dev = nd.path.dentry->d_inode->i_rdev;
++		*mode = nd.path.dentry->d_inode->i_mode;
++	}
++	path_put(&nd.path);
++	return 0;
++}
++
++static inline int __mapping_mode(dev_t device, dev_t target,
++	umode_t device_mode, umode_t target_mode, umode_t *mode)
++{
++	if (device)
++		*mode = device_mode & S_IFMT;
++	else if (target)
++		*mode = target_mode & S_IFMT;
++	else
++		return -EINVAL;
++
++	/* if both given, device and target mode have to match */
++	if (device && target &&
++		((device_mode ^ target_mode) & S_IFMT))
++		return -EINVAL;
++	return 0;
++}
++
++
++static inline int do_mapping(struct vx_info *vxi, const char __user *device_path,
++	const char __user *target_path, int flags, int set)
++{
++	dev_t device = ~0, target = ~0;
++	umode_t device_mode = 0, target_mode = 0, mode;
++	int ret;
++
++	ret = __user_device(device_path, &device, &device_mode);
++	if (ret)
++		return ret;
++	ret = __user_device(target_path, &target, &target_mode);
++	if (ret)
++		return ret;
++
++	ret = __mapping_mode(device, target,
++		device_mode, target_mode, &mode);
++	if (ret)
++		return ret;
++
++	if (set)
++		return do_set_mapping(vxi, device, target,
++			flags, mode);
++	else
++		return do_unset_mapping(vxi, device, target,
++			flags, mode);
++}
++
++
++int vc_set_mapping(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_set_mapping_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_mapping(vxi, vc_data.device, vc_data.target,
++		vc_data.flags, 1);
++}
++
++int vc_unset_mapping(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_set_mapping_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_mapping(vxi, vc_data.device, vc_data.target,
++		vc_data.flags, 0);
++}
++
++
++#ifdef	CONFIG_COMPAT
++
++int vc_set_mapping_x32(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_set_mapping_v0_x32 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
++		compat_ptr(vc_data.target_ptr), vc_data.flags, 1);
++}
++
++int vc_unset_mapping_x32(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_set_mapping_v0_x32 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
++		compat_ptr(vc_data.target_ptr), vc_data.flags, 0);
++}
++
++#endif	/* CONFIG_COMPAT */
++
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/dlimit.c linux-3.3.8-vs2.3.3.4/kernel/vserver/dlimit.c
+--- linux-3.3.8/kernel/vserver/dlimit.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/dlimit.c	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,531 @@
++/*
++ *  linux/kernel/vserver/dlimit.c
++ *
++ *  Virtual Server: Context Disk Limits
++ *
++ *  Copyright (C) 2004-2009  Herbert P�tzl
++ *
++ *  V0.01  initial version
++ *  V0.02  compat32 splitup
++ *  V0.03  extended interface
++ *
++ */
++
++#include <linux/statfs.h>
++#include <linux/sched.h>
++#include <linux/namei.h>
++#include <linux/vs_tag.h>
++#include <linux/vs_dlimit.h>
++#include <linux/vserver/dlimit_cmd.h>
++#include <linux/slab.h>
++// #include <linux/gfp.h>
++
++#include <asm/uaccess.h>
++
++/*	__alloc_dl_info()
++
++	* allocate an initialized dl_info struct
++	* doesn't make it visible (hash)			*/
++
++static struct dl_info *__alloc_dl_info(struct super_block *sb, tag_t tag)
++{
++	struct dl_info *new = NULL;
++
++	vxdprintk(VXD_CBIT(dlim, 5),
++		"alloc_dl_info(%p,%d)*", sb, tag);
++
++	/* would this benefit from a slab cache? */
++	new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
++	if (!new)
++		return 0;
++
++	memset(new, 0, sizeof(struct dl_info));
++	new->dl_tag = tag;
++	new->dl_sb = sb;
++	// INIT_RCU_HEAD(&new->dl_rcu);
++	INIT_HLIST_NODE(&new->dl_hlist);
++	spin_lock_init(&new->dl_lock);
++	atomic_set(&new->dl_refcnt, 0);
++	atomic_set(&new->dl_usecnt, 0);
++
++	/* rest of init goes here */
++
++	vxdprintk(VXD_CBIT(dlim, 4),
++		"alloc_dl_info(%p,%d) = %p", sb, tag, new);
++	return new;
++}
++
++/*	__dealloc_dl_info()
++
++	* final disposal of dl_info				*/
++
++static void __dealloc_dl_info(struct dl_info *dli)
++{
++	vxdprintk(VXD_CBIT(dlim, 4),
++		"dealloc_dl_info(%p)", dli);
++
++	dli->dl_hlist.next = LIST_POISON1;
++	dli->dl_tag = -1;
++	dli->dl_sb = 0;
++
++	BUG_ON(atomic_read(&dli->dl_usecnt));
++	BUG_ON(atomic_read(&dli->dl_refcnt));
++
++	kfree(dli);
++}
++
++
++/*	hash table for dl_info hash */
++
++#define DL_HASH_SIZE	13
++
++struct hlist_head dl_info_hash[DL_HASH_SIZE];
++
++static DEFINE_SPINLOCK(dl_info_hash_lock);
++
++
++static inline unsigned int __hashval(struct super_block *sb, tag_t tag)
++{
++	return ((tag ^ (unsigned long)sb) % DL_HASH_SIZE);
++}
++
++
++
++/*	__hash_dl_info()
++
++	* add the dli to the global hash table
++	* requires the hash_lock to be held			*/
++
++static inline void __hash_dl_info(struct dl_info *dli)
++{
++	struct hlist_head *head;
++
++	vxdprintk(VXD_CBIT(dlim, 6),
++		"__hash_dl_info: %p[#%d]", dli, dli->dl_tag);
++	get_dl_info(dli);
++	head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_tag)];
++	hlist_add_head_rcu(&dli->dl_hlist, head);
++}
++
++/*	__unhash_dl_info()
++
++	* remove the dli from the global hash table
++	* requires the hash_lock to be held			*/
++
++static inline void __unhash_dl_info(struct dl_info *dli)
++{
++	vxdprintk(VXD_CBIT(dlim, 6),
++		"__unhash_dl_info: %p[#%d]", dli, dli->dl_tag);
++	hlist_del_rcu(&dli->dl_hlist);
++	put_dl_info(dli);
++}
++
++
++/*	__lookup_dl_info()
++
++	* requires the rcu_read_lock()
++	* doesn't increment the dl_refcnt			*/
++
++static inline struct dl_info *__lookup_dl_info(struct super_block *sb, tag_t tag)
++{
++	struct hlist_head *head = &dl_info_hash[__hashval(sb, tag)];
++	struct hlist_node *pos;
++	struct dl_info *dli;
++
++	hlist_for_each_entry_rcu(dli, pos, head, dl_hlist) {
++
++		if (dli->dl_tag == tag && dli->dl_sb == sb) {
++			return dli;
++		}
++	}
++	return NULL;
++}
++
++
++struct dl_info *locate_dl_info(struct super_block *sb, tag_t tag)
++{
++	struct dl_info *dli;
++
++	rcu_read_lock();
++	dli = get_dl_info(__lookup_dl_info(sb, tag));
++	vxdprintk(VXD_CBIT(dlim, 7),
++		"locate_dl_info(%p,#%d) = %p", sb, tag, dli);
++	rcu_read_unlock();
++	return dli;
++}
++
++void rcu_free_dl_info(struct rcu_head *head)
++{
++	struct dl_info *dli = container_of(head, struct dl_info, dl_rcu);
++	int usecnt, refcnt;
++
++	BUG_ON(!dli || !head);
++
++	usecnt = atomic_read(&dli->dl_usecnt);
++	BUG_ON(usecnt < 0);
++
++	refcnt = atomic_read(&dli->dl_refcnt);
++	BUG_ON(refcnt < 0);
++
++	vxdprintk(VXD_CBIT(dlim, 3),
++		"rcu_free_dl_info(%p)", dli);
++	if (!usecnt)
++		__dealloc_dl_info(dli);
++	else
++		printk("!!! rcu didn't free\n");
++}
++
++
++
++
++static int do_addrem_dlimit(uint32_t id, const char __user *name,
++	uint32_t flags, int add)
++{
++	struct path path;
++	int ret;
++
++	ret = user_lpath(name, &path);
++	if (!ret) {
++		struct super_block *sb;
++		struct dl_info *dli;
++
++		ret = -EINVAL;
++		if (!path.dentry->d_inode)
++			goto out_release;
++		if (!(sb = path.dentry->d_inode->i_sb))
++			goto out_release;
++
++		if (add) {
++			dli = __alloc_dl_info(sb, id);
++			spin_lock(&dl_info_hash_lock);
++
++			ret = -EEXIST;
++			if (__lookup_dl_info(sb, id))
++				goto out_unlock;
++			__hash_dl_info(dli);
++			dli = NULL;
++		} else {
++			spin_lock(&dl_info_hash_lock);
++			dli = __lookup_dl_info(sb, id);
++
++			ret = -ESRCH;
++			if (!dli)
++				goto out_unlock;
++			__unhash_dl_info(dli);
++		}
++		ret = 0;
++	out_unlock:
++		spin_unlock(&dl_info_hash_lock);
++		if (add && dli)
++			__dealloc_dl_info(dli);
++	out_release:
++		path_put(&path);
++	}
++	return ret;
++}
++
++int vc_add_dlimit(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_base_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1);
++}
++
++int vc_rem_dlimit(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_base_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0);
++}
++
++#ifdef	CONFIG_COMPAT
++
++int vc_add_dlimit_x32(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_addrem_dlimit(id,
++		compat_ptr(vc_data.name_ptr), vc_data.flags, 1);
++}
++
++int vc_rem_dlimit_x32(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_addrem_dlimit(id,
++		compat_ptr(vc_data.name_ptr), vc_data.flags, 0);
++}
++
++#endif	/* CONFIG_COMPAT */
++
++
++static inline
++int do_set_dlimit(uint32_t id, const char __user *name,
++	uint32_t space_used, uint32_t space_total,
++	uint32_t inodes_used, uint32_t inodes_total,
++	uint32_t reserved, uint32_t flags)
++{
++	struct path path;
++	int ret;
++
++	ret = user_lpath(name, &path);
++	if (!ret) {
++		struct super_block *sb;
++		struct dl_info *dli;
++
++		ret = -EINVAL;
++		if (!path.dentry->d_inode)
++			goto out_release;
++		if (!(sb = path.dentry->d_inode->i_sb))
++			goto out_release;
++
++		/* sanity checks */
++		if ((reserved != CDLIM_KEEP &&
++			reserved > 100) ||
++			(inodes_used != CDLIM_KEEP &&
++			inodes_used > inodes_total) ||
++			(space_used != CDLIM_KEEP &&
++			space_used > space_total))
++			goto out_release;
++
++		ret = -ESRCH;
++		dli = locate_dl_info(sb, id);
++		if (!dli)
++			goto out_release;
++
++		spin_lock(&dli->dl_lock);
++
++		if (inodes_used != CDLIM_KEEP)
++			dli->dl_inodes_used = inodes_used;
++		if (inodes_total != CDLIM_KEEP)
++			dli->dl_inodes_total = inodes_total;
++		if (space_used != CDLIM_KEEP)
++			dli->dl_space_used = dlimit_space_32to64(
++				space_used, flags, DLIMS_USED);
++
++		if (space_total == CDLIM_INFINITY)
++			dli->dl_space_total = DLIM_INFINITY;
++		else if (space_total != CDLIM_KEEP)
++			dli->dl_space_total = dlimit_space_32to64(
++				space_total, flags, DLIMS_TOTAL);
++
++		if (reserved != CDLIM_KEEP)
++			dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100;
++
++		spin_unlock(&dli->dl_lock);
++
++		put_dl_info(dli);
++		ret = 0;
++
++	out_release:
++		path_put(&path);
++	}
++	return ret;
++}
++
++int vc_set_dlimit(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_dlimit(id, vc_data.name,
++		vc_data.space_used, vc_data.space_total,
++		vc_data.inodes_used, vc_data.inodes_total,
++		vc_data.reserved, vc_data.flags);
++}
++
++#ifdef	CONFIG_COMPAT
++
++int vc_set_dlimit_x32(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_v0_x32 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_dlimit(id, compat_ptr(vc_data.name_ptr),
++		vc_data.space_used, vc_data.space_total,
++		vc_data.inodes_used, vc_data.inodes_total,
++		vc_data.reserved, vc_data.flags);
++}
++
++#endif	/* CONFIG_COMPAT */
++
++
++static inline
++int do_get_dlimit(uint32_t id, const char __user *name,
++	uint32_t *space_used, uint32_t *space_total,
++	uint32_t *inodes_used, uint32_t *inodes_total,
++	uint32_t *reserved, uint32_t *flags)
++{
++	struct path path;
++	int ret;
++
++	ret = user_lpath(name, &path);
++	if (!ret) {
++		struct super_block *sb;
++		struct dl_info *dli;
++
++		ret = -EINVAL;
++		if (!path.dentry->d_inode)
++			goto out_release;
++		if (!(sb = path.dentry->d_inode->i_sb))
++			goto out_release;
++
++		ret = -ESRCH;
++		dli = locate_dl_info(sb, id);
++		if (!dli)
++			goto out_release;
++
++		spin_lock(&dli->dl_lock);
++		*inodes_used = dli->dl_inodes_used;
++		*inodes_total = dli->dl_inodes_total;
++
++		*space_used = dlimit_space_64to32(
++			dli->dl_space_used, flags, DLIMS_USED);
++
++		if (dli->dl_space_total == DLIM_INFINITY)
++			*space_total = CDLIM_INFINITY;
++		else
++			*space_total = dlimit_space_64to32(
++				dli->dl_space_total, flags, DLIMS_TOTAL);
++
++		*reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
++		spin_unlock(&dli->dl_lock);
++
++		put_dl_info(dli);
++		ret = -EFAULT;
++
++		ret = 0;
++	out_release:
++		path_put(&path);
++	}
++	return ret;
++}
++
++
++int vc_get_dlimit(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_v0 vc_data;
++	int ret;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = do_get_dlimit(id, vc_data.name,
++		&vc_data.space_used, &vc_data.space_total,
++		&vc_data.inodes_used, &vc_data.inodes_total,
++		&vc_data.reserved, &vc_data.flags);
++	if (ret)
++		return ret;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++#ifdef	CONFIG_COMPAT
++
++int vc_get_dlimit_x32(uint32_t id, void __user *data)
++{
++	struct vcmd_ctx_dlimit_v0_x32 vc_data;
++	int ret;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr),
++		&vc_data.space_used, &vc_data.space_total,
++		&vc_data.inodes_used, &vc_data.inodes_total,
++		&vc_data.reserved, &vc_data.flags);
++	if (ret)
++		return ret;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++#endif	/* CONFIG_COMPAT */
++
++
++void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
++{
++	struct dl_info *dli;
++	__u64 blimit, bfree, bavail;
++	__u32 ifree;
++
++	dli = locate_dl_info(sb, dx_current_tag());
++	if (!dli)
++		return;
++
++	spin_lock(&dli->dl_lock);
++	if (dli->dl_inodes_total == (unsigned long)DLIM_INFINITY)
++		goto no_ilim;
++
++	/* reduce max inodes available to limit */
++	if (buf->f_files > dli->dl_inodes_total)
++		buf->f_files = dli->dl_inodes_total;
++
++	ifree = dli->dl_inodes_total - dli->dl_inodes_used;
++	/* reduce free inodes to min */
++	if (ifree < buf->f_ffree)
++		buf->f_ffree = ifree;
++
++no_ilim:
++	if (dli->dl_space_total == DLIM_INFINITY)
++		goto no_blim;
++
++	blimit = dli->dl_space_total >> sb->s_blocksize_bits;
++
++	if (dli->dl_space_total < dli->dl_space_used)
++		bfree = 0;
++	else
++		bfree = (dli->dl_space_total - dli->dl_space_used)
++			>> sb->s_blocksize_bits;
++
++	bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
++	if (bavail < dli->dl_space_used)
++		bavail = 0;
++	else
++		bavail = (bavail - dli->dl_space_used)
++			>> sb->s_blocksize_bits;
++
++	/* reduce max space available to limit */
++	if (buf->f_blocks > blimit)
++		buf->f_blocks = blimit;
++
++	/* reduce free space to min */
++	if (bfree < buf->f_bfree)
++		buf->f_bfree = bfree;
++
++	/* reduce avail space to min */
++	if (bavail < buf->f_bavail)
++		buf->f_bavail = bavail;
++
++no_blim:
++	spin_unlock(&dli->dl_lock);
++	put_dl_info(dli);
++
++	return;
++}
++
++#include <linux/module.h>
++
++EXPORT_SYMBOL_GPL(locate_dl_info);
++EXPORT_SYMBOL_GPL(rcu_free_dl_info);
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/helper.c linux-3.3.8-vs2.3.3.4/kernel/vserver/helper.c
+--- linux-3.3.8/kernel/vserver/helper.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/helper.c	2012-05-21 01:22:37.000000000 +0200
+@@ -0,0 +1,228 @@
++/*
++ *  linux/kernel/vserver/helper.c
++ *
++ *  Virtual Context Support
++ *
++ *  Copyright (C) 2004-2007  Herbert P�tzl
++ *
++ *  V0.01  basic helper
++ *
++ */
++
++#include <linux/kmod.h>
++#include <linux/reboot.h>
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
++#include <linux/vserver/signal.h>
++
++
++char vshelper_path[255] = "/sbin/vshelper";
++
++static int vshelper_init(struct subprocess_info *info, struct cred *new_cred)
++{
++	current->flags &= ~PF_THREAD_BOUND;
++	return 0;
++}
++
++static int do_vshelper(char *name, char *argv[], char *envp[], int sync)
++{
++	int ret;
++
++	if ((ret = call_usermodehelper_fns(name, argv, envp, sync,
++		vshelper_init, NULL, NULL))) {
++		printk(KERN_WARNING "%s: (%s %s) returned %s with %d\n",
++			name, argv[1], argv[2],
++			sync ? "sync" : "async", ret);
++	}
++	vxdprintk(VXD_CBIT(switch, 4),
++		"%s: (%s %s) returned %s with %d",
++		name, argv[1], argv[2], sync ? "sync" : "async", ret);
++	return ret;
++}
++
++/*
++ *      vshelper path is set via /proc/sys
++ *      invoked by vserver sys_reboot(), with
++ *      the following arguments
++ *
++ *      argv [0] = vshelper_path;
++ *      argv [1] = action: "restart", "halt", "poweroff", ...
++ *      argv [2] = context identifier
++ *
++ *      envp [*] = type-specific parameters
++ */
++
++long vs_reboot_helper(struct vx_info *vxi, int cmd, void __user *arg)
++{
++	char id_buf[8], cmd_buf[16];
++	char uid_buf[16], pid_buf[16];
++	int ret;
++
++	char *argv[] = {vshelper_path, NULL, id_buf, 0};
++	char *envp[] = {"HOME=/", "TERM=linux",
++			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
++			uid_buf, pid_buf, cmd_buf, 0};
++
++	if (vx_info_state(vxi, VXS_HELPER))
++		return -EAGAIN;
++	vxi->vx_state |= VXS_HELPER;
++
++	snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id);
++
++	snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
++	snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current_uid());
++	snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid);
++
++	switch (cmd) {
++	case LINUX_REBOOT_CMD_RESTART:
++		argv[1] = "restart";
++		break;
++
++	case LINUX_REBOOT_CMD_HALT:
++		argv[1] = "halt";
++		break;
++
++	case LINUX_REBOOT_CMD_POWER_OFF:
++		argv[1] = "poweroff";
++		break;
++
++	case LINUX_REBOOT_CMD_SW_SUSPEND:
++		argv[1] = "swsusp";
++		break;
++
++	case LINUX_REBOOT_CMD_OOM:
++		argv[1] = "oom";
++		break;
++
++	default:
++		vxi->vx_state &= ~VXS_HELPER;
++		return 0;
++	}
++
++	ret = do_vshelper(vshelper_path, argv, envp, 0);
++	vxi->vx_state &= ~VXS_HELPER;
++	__wakeup_vx_info(vxi);
++	return (ret) ? -EPERM : 0;
++}
++
++
++long vs_reboot(unsigned int cmd, void __user *arg)
++{
++	struct vx_info *vxi = current_vx_info();
++	long ret = 0;
++
++	vxdprintk(VXD_CBIT(misc, 5),
++		"vs_reboot(%p[#%d],%u)",
++		vxi, vxi ? vxi->vx_id : 0, cmd);
++
++	ret = vs_reboot_helper(vxi, cmd, arg);
++	if (ret)
++		return ret;
++
++	vxi->reboot_cmd = cmd;
++	if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
++		switch (cmd) {
++		case LINUX_REBOOT_CMD_RESTART:
++		case LINUX_REBOOT_CMD_HALT:
++		case LINUX_REBOOT_CMD_POWER_OFF:
++			vx_info_kill(vxi, 0, SIGKILL);
++			vx_info_kill(vxi, 1, SIGKILL);
++		default:
++			break;
++		}
++	}
++	return 0;
++}
++
++long vs_oom_action(unsigned int cmd)
++{
++	struct vx_info *vxi = current_vx_info();
++	long ret = 0;
++
++	vxdprintk(VXD_CBIT(misc, 5),
++		"vs_oom_action(%p[#%d],%u)",
++		vxi, vxi ? vxi->vx_id : 0, cmd);
++
++	ret = vs_reboot_helper(vxi, cmd, NULL);
++	if (ret)
++		return ret;
++
++	vxi->reboot_cmd = cmd;
++	if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
++		vx_info_kill(vxi, 0, SIGKILL);
++		vx_info_kill(vxi, 1, SIGKILL);
++	}
++	return 0;
++}
++
++/*
++ *      argv [0] = vshelper_path;
++ *      argv [1] = action: "startup", "shutdown"
++ *      argv [2] = context identifier
++ *
++ *      envp [*] = type-specific parameters
++ */
++
++long vs_state_change(struct vx_info *vxi, unsigned int cmd)
++{
++	char id_buf[8], cmd_buf[16];
++	char *argv[] = {vshelper_path, NULL, id_buf, 0};
++	char *envp[] = {"HOME=/", "TERM=linux",
++			"PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
++
++	if (!vx_info_flags(vxi, VXF_SC_HELPER, 0))
++		return 0;
++
++	snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id);
++	snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
++
++	switch (cmd) {
++	case VSC_STARTUP:
++		argv[1] = "startup";
++		break;
++	case VSC_SHUTDOWN:
++		argv[1] = "shutdown";
++		break;
++	default:
++		return 0;
++	}
++
++	return do_vshelper(vshelper_path, argv, envp, 1);
++}
++
++
++/*
++ *      argv [0] = vshelper_path;
++ *      argv [1] = action: "netup", "netdown"
++ *      argv [2] = context identifier
++ *
++ *      envp [*] = type-specific parameters
++ */
++
++long vs_net_change(struct nx_info *nxi, unsigned int cmd)
++{
++	char id_buf[8], cmd_buf[16];
++	char *argv[] = {vshelper_path, NULL, id_buf, 0};
++	char *envp[] = {"HOME=/", "TERM=linux",
++			"PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
++
++	if (!nx_info_flags(nxi, NXF_SC_HELPER, 0))
++		return 0;
++
++	snprintf(id_buf, sizeof(id_buf)-1, "%d", nxi->nx_id);
++	snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
++
++	switch (cmd) {
++	case VSC_NETUP:
++		argv[1] = "netup";
++		break;
++	case VSC_NETDOWN:
++		argv[1] = "netdown";
++		break;
++	default:
++		return 0;
++	}
++
++	return do_vshelper(vshelper_path, argv, envp, 1);
++}
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/history.c linux-3.3.8-vs2.3.3.4/kernel/vserver/history.c
+--- linux-3.3.8/kernel/vserver/history.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/history.c	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,258 @@
++/*
++ *  kernel/vserver/history.c
++ *
++ *  Virtual Context History Backtrace
++ *
++ *  Copyright (C) 2004-2007  Herbert P�tzl
++ *
++ *  V0.01  basic structure
++ *  V0.02  hash/unhash and trace
++ *  V0.03  preemption fixes
++ *
++ */
++
++#include <linux/module.h>
++#include <asm/uaccess.h>
++
++#include <linux/vserver/context.h>
++#include <linux/vserver/debug.h>
++#include <linux/vserver/debug_cmd.h>
++#include <linux/vserver/history.h>
++
++
++#ifdef	CONFIG_VSERVER_HISTORY
++#define VXH_SIZE	CONFIG_VSERVER_HISTORY_SIZE
++#else
++#define VXH_SIZE	64
++#endif
++
++struct _vx_history {
++	unsigned int counter;
++
++	struct _vx_hist_entry entry[VXH_SIZE + 1];
++};
++
++
++DEFINE_PER_CPU(struct _vx_history, vx_history_buffer);
++
++unsigned volatile int vxh_active = 1;
++
++static atomic_t sequence = ATOMIC_INIT(0);
++
++
++/*	vxh_advance()
++
++	* requires disabled preemption				*/
++
++struct _vx_hist_entry *vxh_advance(void *loc)
++{
++	unsigned int cpu = smp_processor_id();
++	struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
++	struct _vx_hist_entry *entry;
++	unsigned int index;
++
++	index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE;
++	entry = &hist->entry[index];
++
++	entry->seq = atomic_inc_return(&sequence);
++	entry->loc = loc;
++	return entry;
++}
++
++EXPORT_SYMBOL_GPL(vxh_advance);
++
++
++#define VXH_LOC_FMTS	"(#%04x,*%d):%p"
++
++#define VXH_LOC_ARGS(e)	(e)->seq, cpu, (e)->loc
++
++
++#define VXH_VXI_FMTS	"%p[#%d,%d.%d]"
++
++#define VXH_VXI_ARGS(e)	(e)->vxi.ptr,				\
++			(e)->vxi.ptr ? (e)->vxi.xid : 0,	\
++			(e)->vxi.ptr ? (e)->vxi.usecnt : 0,	\
++			(e)->vxi.ptr ? (e)->vxi.tasks : 0
++
++void	vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu)
++{
++	switch (e->type) {
++	case VXH_THROW_OOPS:
++		printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e));
++		break;
++
++	case VXH_GET_VX_INFO:
++	case VXH_PUT_VX_INFO:
++		printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
++			VXH_LOC_ARGS(e),
++			(e->type == VXH_GET_VX_INFO) ? "get" : "put",
++			VXH_VXI_ARGS(e));
++		break;
++
++	case VXH_INIT_VX_INFO:
++	case VXH_SET_VX_INFO:
++	case VXH_CLR_VX_INFO:
++		printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
++			VXH_LOC_ARGS(e),
++			(e->type == VXH_INIT_VX_INFO) ? "init" :
++			((e->type == VXH_SET_VX_INFO) ? "set" : "clr"),
++			VXH_VXI_ARGS(e), e->sc.data);
++		break;
++
++	case VXH_CLAIM_VX_INFO:
++	case VXH_RELEASE_VX_INFO:
++		printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
++			VXH_LOC_ARGS(e),
++			(e->type == VXH_CLAIM_VX_INFO) ? "claim" : "release",
++			VXH_VXI_ARGS(e), e->sc.data);
++		break;
++
++	case VXH_ALLOC_VX_INFO:
++	case VXH_DEALLOC_VX_INFO:
++		printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
++			VXH_LOC_ARGS(e),
++			(e->type == VXH_ALLOC_VX_INFO) ? "alloc" : "dealloc",
++			VXH_VXI_ARGS(e));
++		break;
++
++	case VXH_HASH_VX_INFO:
++	case VXH_UNHASH_VX_INFO:
++		printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n",
++			VXH_LOC_ARGS(e),
++			(e->type == VXH_HASH_VX_INFO) ? "hash" : "unhash",
++			VXH_VXI_ARGS(e));
++		break;
++
++	case VXH_LOC_VX_INFO:
++	case VXH_LOOKUP_VX_INFO:
++	case VXH_CREATE_VX_INFO:
++		printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n",
++			VXH_LOC_ARGS(e),
++			(e->type == VXH_CREATE_VX_INFO) ? "create" :
++			((e->type == VXH_LOC_VX_INFO) ? "loc" : "lookup"),
++			e->ll.arg, VXH_VXI_ARGS(e));
++		break;
++	}
++}
++
++static void __vxh_dump_history(void)
++{
++	unsigned int i, cpu;
++
++	printk("History:\tSEQ: %8x\tNR_CPUS: %d\n",
++		atomic_read(&sequence), NR_CPUS);
++
++	for (i = 0; i < VXH_SIZE; i++) {
++		for_each_online_cpu(cpu) {
++			struct _vx_history *hist =
++				&per_cpu(vx_history_buffer, cpu);
++			unsigned int index = (hist->counter - i) % VXH_SIZE;
++			struct _vx_hist_entry *entry = &hist->entry[index];
++
++			vxh_dump_entry(entry, cpu);
++		}
++	}
++}
++
++void	vxh_dump_history(void)
++{
++	vxh_active = 0;
++#ifdef CONFIG_SMP
++	local_irq_enable();
++	smp_send_stop();
++	local_irq_disable();
++#endif
++	__vxh_dump_history();
++}
++
++
++/* vserver syscall commands below here */
++
++
++int vc_dump_history(uint32_t id)
++{
++	vxh_active = 0;
++	__vxh_dump_history();
++	vxh_active = 1;
++
++	return 0;
++}
++
++
++int do_read_history(struct __user _vx_hist_entry *data,
++	int cpu, uint32_t *index, uint32_t *count)
++{
++	int pos, ret = 0;
++	struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
++	int end = hist->counter;
++	int start = end - VXH_SIZE + 2;
++	int idx = *index;
++
++	/* special case: get current pos */
++	if (!*count) {
++		*index = end;
++		return 0;
++	}
++
++	/* have we lost some data? */
++	if (idx < start)
++		idx = start;
++
++	for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) {
++		struct _vx_hist_entry *entry =
++			&hist->entry[idx % VXH_SIZE];
++
++		/* send entry to userspace */
++		ret = copy_to_user(&data[pos], entry, sizeof(*entry));
++		if (ret)
++			break;
++	}
++	/* save new index and count */
++	*index = idx;
++	*count = pos;
++	return ret ? ret : (*index < end);
++}
++
++int vc_read_history(uint32_t id, void __user *data)
++{
++	struct vcmd_read_history_v0 vc_data;
++	int ret;
++
++	if (id >= NR_CPUS)
++		return -EINVAL;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = do_read_history((struct __user _vx_hist_entry *)vc_data.data,
++		id, &vc_data.index, &vc_data.count);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return ret;
++}
++
++#ifdef	CONFIG_COMPAT
++
++int vc_read_history_x32(uint32_t id, void __user *data)
++{
++	struct vcmd_read_history_v0_x32 vc_data;
++	int ret;
++
++	if (id >= NR_CPUS)
++		return -EINVAL;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = do_read_history((struct __user _vx_hist_entry *)
++		compat_ptr(vc_data.data_ptr),
++		id, &vc_data.index, &vc_data.count);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return ret;
++}
++
++#endif	/* CONFIG_COMPAT */
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/inet.c linux-3.3.8-vs2.3.3.4/kernel/vserver/inet.c
+--- linux-3.3.8/kernel/vserver/inet.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/inet.c	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,226 @@
++
++#include <linux/in.h>
++#include <linux/inetdevice.h>
++#include <linux/export.h>
++#include <linux/vs_inet.h>
++#include <linux/vs_inet6.h>
++#include <linux/vserver/debug.h>
++#include <net/route.h>
++#include <net/addrconf.h>
++
++
++int nx_v4_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
++{
++	int ret = 0;
++
++	if (!nxi1 || !nxi2 || nxi1 == nxi2)
++		ret = 1;
++	else {
++		struct nx_addr_v4 *ptr;
++
++		for (ptr = &nxi1->v4; ptr; ptr = ptr->next) {
++			if (v4_nx_addr_in_nx_info(nxi2, ptr, -1)) {
++				ret = 1;
++				break;
++			}
++		}
++	}
++
++	vxdprintk(VXD_CBIT(net, 2),
++		"nx_v4_addr_conflict(%p,%p): %d",
++		nxi1, nxi2, ret);
++
++	return ret;
++}
++
++
++#ifdef	CONFIG_IPV6
++
++int nx_v6_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
++{
++	int ret = 0;
++
++	if (!nxi1 || !nxi2 || nxi1 == nxi2)
++		ret = 1;
++	else {
++		struct nx_addr_v6 *ptr;
++
++		for (ptr = &nxi1->v6; ptr; ptr = ptr->next) {
++			if (v6_nx_addr_in_nx_info(nxi2, ptr, -1)) {
++				ret = 1;
++				break;
++			}
++		}
++	}
++
++	vxdprintk(VXD_CBIT(net, 2),
++		"nx_v6_addr_conflict(%p,%p): %d",
++		nxi1, nxi2, ret);
++
++	return ret;
++}
++
++#endif
++
++int v4_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
++{
++	struct in_device *in_dev;
++	struct in_ifaddr **ifap;
++	struct in_ifaddr *ifa;
++	int ret = 0;
++
++	if (!dev)
++		goto out;
++	in_dev = in_dev_get(dev);
++	if (!in_dev)
++		goto out;
++
++	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
++		ifap = &ifa->ifa_next) {
++		if (v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW)) {
++			ret = 1;
++			break;
++		}
++	}
++	in_dev_put(in_dev);
++out:
++	return ret;
++}
++
++
++#ifdef	CONFIG_IPV6
++
++int v6_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
++{
++	struct inet6_dev *in_dev;
++	struct inet6_ifaddr *ifa;
++	int ret = 0;
++
++	if (!dev)
++		goto out;
++	in_dev = in6_dev_get(dev);
++	if (!in_dev)
++		goto out;
++
++	// for (ifap = &in_dev->addr_list; (ifa = *ifap) != NULL;
++	list_for_each_entry(ifa, &in_dev->addr_list, if_list) {
++		if (v6_addr_in_nx_info(nxi, &ifa->addr, -1)) {
++			ret = 1;
++			break;
++		}
++	}
++	in6_dev_put(in_dev);
++out:
++	return ret;
++}
++
++#endif
++
++int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
++{
++	int ret = 1;
++
++	if (!nxi)
++		goto out;
++	if (nxi->v4.type && v4_dev_in_nx_info(dev, nxi))
++		goto out;
++#ifdef	CONFIG_IPV6
++	ret = 2;
++	if (nxi->v6.type && v6_dev_in_nx_info(dev, nxi))
++		goto out;
++#endif
++	ret = 0;
++out:
++	vxdprintk(VXD_CBIT(net, 3),
++		"dev_in_nx_info(%p,%p[#%d]) = %d",
++		dev, nxi, nxi ? nxi->nx_id : 0, ret);
++	return ret;
++}
++
++struct rtable *ip_v4_find_src(struct net *net, struct nx_info *nxi,
++	struct flowi4 *fl4)
++{
++	struct rtable *rt;
++
++	if (!nxi)
++		return NULL;
++
++	/* FIXME: handle lback only case */
++	if (!NX_IPV4(nxi))
++		return ERR_PTR(-EPERM);
++
++	vxdprintk(VXD_CBIT(net, 4),
++		"ip_v4_find_src(%p[#%u]) " NIPQUAD_FMT " -> " NIPQUAD_FMT,
++		nxi, nxi ? nxi->nx_id : 0,
++		NIPQUAD(fl4->saddr), NIPQUAD(fl4->daddr));
++
++	/* single IP is unconditional */
++	if (nx_info_flags(nxi, NXF_SINGLE_IP, 0) &&
++		(fl4->saddr == INADDR_ANY))
++		fl4->saddr = nxi->v4.ip[0].s_addr;
++
++	if (fl4->saddr == INADDR_ANY) {
++		struct nx_addr_v4 *ptr;
++		__be32 found = 0;
++
++		rt = __ip_route_output_key(net, fl4);
++		if (!IS_ERR(rt)) {
++			found = fl4->saddr;
++			ip_rt_put(rt);
++			vxdprintk(VXD_CBIT(net, 4),
++				"ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
++				nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(found));
++			if (v4_addr_in_nx_info(nxi, found, NXA_MASK_BIND))
++				goto found;
++		}
++
++		for (ptr = &nxi->v4; ptr; ptr = ptr->next) {
++			__be32 primary = ptr->ip[0].s_addr;
++			__be32 mask = ptr->mask.s_addr;
++			__be32 neta = primary & mask;
++
++			vxdprintk(VXD_CBIT(net, 4), "ip_v4_find_src(%p[#%u]) chk: "
++				NIPQUAD_FMT "/" NIPQUAD_FMT "/" NIPQUAD_FMT,
++				nxi, nxi ? nxi->nx_id : 0, NIPQUAD(primary),
++				NIPQUAD(mask), NIPQUAD(neta));
++			if ((found & mask) != neta)
++				continue;
++
++			fl4->saddr = primary;
++			rt = __ip_route_output_key(net, fl4);
++			vxdprintk(VXD_CBIT(net, 4),
++				"ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
++				nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(primary));
++			if (!IS_ERR(rt)) {
++				found = fl4->saddr;
++				ip_rt_put(rt);
++				if (found == primary)
++					goto found;
++			}
++		}
++		/* still no source ip? */
++		found = ipv4_is_loopback(fl4->daddr)
++			? IPI_LOOPBACK : nxi->v4.ip[0].s_addr;
++	found:
++		/* assign src ip to flow */
++		fl4->saddr = found;
++
++	} else {
++		if (!v4_addr_in_nx_info(nxi, fl4->saddr, NXA_MASK_BIND))
++			return ERR_PTR(-EPERM);
++	}
++
++	if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0)) {
++		if (ipv4_is_loopback(fl4->daddr))
++			fl4->daddr = nxi->v4_lback.s_addr;
++		if (ipv4_is_loopback(fl4->saddr))
++			fl4->saddr = nxi->v4_lback.s_addr;
++	} else if (ipv4_is_loopback(fl4->daddr) &&
++		!nx_info_flags(nxi, NXF_LBACK_ALLOW, 0))
++		return ERR_PTR(-EPERM);
++
++	return NULL;
++}
++
++EXPORT_SYMBOL_GPL(ip_v4_find_src);
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/init.c linux-3.3.8-vs2.3.3.4/kernel/vserver/init.c
+--- linux-3.3.8/kernel/vserver/init.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/init.c	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,45 @@
++/*
++ *  linux/kernel/init.c
++ *
++ *  Virtual Server Init
++ *
++ *  Copyright (C) 2004-2007  Herbert P�tzl
++ *
++ *  V0.01  basic structure
++ *
++ */
++
++#include <linux/init.h>
++
++int	vserver_register_sysctl(void);
++void	vserver_unregister_sysctl(void);
++
++
++static int __init init_vserver(void)
++{
++	int ret = 0;
++
++#ifdef	CONFIG_VSERVER_DEBUG
++	vserver_register_sysctl();
++#endif
++	return ret;
++}
++
++
++static void __exit exit_vserver(void)
++{
++
++#ifdef	CONFIG_VSERVER_DEBUG
++	vserver_unregister_sysctl();
++#endif
++	return;
++}
++
++/* FIXME: GFP_ZONETYPES gone
++long vx_slab[GFP_ZONETYPES]; */
++long vx_area;
++
++
++module_init(init_vserver);
++module_exit(exit_vserver);
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/inode.c linux-3.3.8-vs2.3.3.4/kernel/vserver/inode.c
+--- linux-3.3.8/kernel/vserver/inode.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/inode.c	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,437 @@
++/*
++ *  linux/kernel/vserver/inode.c
++ *
++ *  Virtual Server: File System Support
++ *
++ *  Copyright (C) 2004-2007  Herbert P�tzl
++ *
++ *  V0.01  separated from vcontext V0.05
++ *  V0.02  moved to tag (instead of xid)
++ *
++ */
++
++#include <linux/tty.h>
++#include <linux/proc_fs.h>
++#include <linux/devpts_fs.h>
++#include <linux/fs.h>
++#include <linux/file.h>
++#include <linux/mount.h>
++#include <linux/parser.h>
++#include <linux/namei.h>
++#include <linux/vserver/inode.h>
++#include <linux/vserver/inode_cmd.h>
++#include <linux/vs_base.h>
++#include <linux/vs_tag.h>
++
++#include <asm/uaccess.h>
++
++
++static int __vc_get_iattr(struct inode *in, uint32_t *tag, uint32_t *flags, uint32_t *mask)
++{
++	struct proc_dir_entry *entry;
++
++	if (!in || !in->i_sb)
++		return -ESRCH;
++
++	*flags = IATTR_TAG
++		| (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0)
++		| (IS_IXUNLINK(in) ? IATTR_IXUNLINK : 0)
++		| (IS_BARRIER(in) ? IATTR_BARRIER : 0)
++		| (IS_COW(in) ? IATTR_COW : 0);
++	*mask = IATTR_IXUNLINK | IATTR_IMMUTABLE | IATTR_COW;
++
++	if (S_ISDIR(in->i_mode))
++		*mask |= IATTR_BARRIER;
++
++	if (IS_TAGGED(in)) {
++		*tag = in->i_tag;
++		*mask |= IATTR_TAG;
++	}
++
++	switch (in->i_sb->s_magic) {
++	case PROC_SUPER_MAGIC:
++		entry = PROC_I(in)->pde;
++
++		/* check for specific inodes? */
++		if (entry)
++			*mask |= IATTR_FLAGS;
++		if (entry)
++			*flags |= (entry->vx_flags & IATTR_FLAGS);
++		else
++			*flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
++		break;
++
++	case DEVPTS_SUPER_MAGIC:
++		*tag = in->i_tag;
++		*mask |= IATTR_TAG;
++		break;
++
++	default:
++		break;
++	}
++	return 0;
++}
++
++int vc_get_iattr(void __user *data)
++{
++	struct path path;
++	struct vcmd_ctx_iattr_v1 vc_data = { .tag = -1 };
++	int ret;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = user_lpath(vc_data.name, &path);
++	if (!ret) {
++		ret = __vc_get_iattr(path.dentry->d_inode,
++			&vc_data.tag, &vc_data.flags, &vc_data.mask);
++		path_put(&path);
++	}
++	if (ret)
++		return ret;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		ret = -EFAULT;
++	return ret;
++}
++
++#ifdef	CONFIG_COMPAT
++
++int vc_get_iattr_x32(void __user *data)
++{
++	struct path path;
++	struct vcmd_ctx_iattr_v1_x32 vc_data = { .tag = -1 };
++	int ret;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
++	if (!ret) {
++		ret = __vc_get_iattr(path.dentry->d_inode,
++			&vc_data.tag, &vc_data.flags, &vc_data.mask);
++		path_put(&path);
++	}
++	if (ret)
++		return ret;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		ret = -EFAULT;
++	return ret;
++}
++
++#endif	/* CONFIG_COMPAT */
++
++
++int vc_fget_iattr(uint32_t fd, void __user *data)
++{
++	struct file *filp;
++	struct vcmd_ctx_fiattr_v0 vc_data = { .tag = -1 };
++	int ret;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	filp = fget(fd);
++	if (!filp || !filp->f_dentry || !filp->f_dentry->d_inode)
++		return -EBADF;
++
++	ret = __vc_get_iattr(filp->f_dentry->d_inode,
++		&vc_data.tag, &vc_data.flags, &vc_data.mask);
++
++	fput(filp);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		ret = -EFAULT;
++	return ret;
++}
++
++
++static int __vc_set_iattr(struct dentry *de, uint32_t *tag, uint32_t *flags, uint32_t *mask)
++{
++	struct inode *in = de->d_inode;
++	int error = 0, is_proc = 0, has_tag = 0;
++	struct iattr attr = { 0 };
++
++	if (!in || !in->i_sb)
++		return -ESRCH;
++
++	is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
++	if ((*mask & IATTR_FLAGS) && !is_proc)
++		return -EINVAL;
++
++	has_tag = IS_TAGGED(in) ||
++		(in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
++	if ((*mask & IATTR_TAG) && !has_tag)
++		return -EINVAL;
++
++	mutex_lock(&in->i_mutex);
++	if (*mask & IATTR_TAG) {
++		attr.ia_tag = *tag;
++		attr.ia_valid |= ATTR_TAG;
++	}
++
++	if (*mask & IATTR_FLAGS) {
++		struct proc_dir_entry *entry = PROC_I(in)->pde;
++		unsigned int iflags = PROC_I(in)->vx_flags;
++
++		iflags = (iflags & ~(*mask & IATTR_FLAGS))
++			| (*flags & IATTR_FLAGS);
++		PROC_I(in)->vx_flags = iflags;
++		if (entry)
++			entry->vx_flags = iflags;
++	}
++
++	if (*mask & (IATTR_IMMUTABLE | IATTR_IXUNLINK |
++		IATTR_BARRIER | IATTR_COW)) {
++		int iflags = in->i_flags;
++		int vflags = in->i_vflags;
++
++		if (*mask & IATTR_IMMUTABLE) {
++			if (*flags & IATTR_IMMUTABLE)
++				iflags |= S_IMMUTABLE;
++			else
++				iflags &= ~S_IMMUTABLE;
++		}
++		if (*mask & IATTR_IXUNLINK) {
++			if (*flags & IATTR_IXUNLINK)
++				iflags |= S_IXUNLINK;
++			else
++				iflags &= ~S_IXUNLINK;
++		}
++		if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) {
++			if (*flags & IATTR_BARRIER)
++				vflags |= V_BARRIER;
++			else
++				vflags &= ~V_BARRIER;
++		}
++		if (S_ISREG(in->i_mode) && (*mask & IATTR_COW)) {
++			if (*flags & IATTR_COW)
++				vflags |= V_COW;
++			else
++				vflags &= ~V_COW;
++		}
++		if (in->i_op && in->i_op->sync_flags) {
++			error = in->i_op->sync_flags(in, iflags, vflags);
++			if (error)
++				goto out;
++		}
++	}
++
++	if (attr.ia_valid) {
++		if (in->i_op && in->i_op->setattr)
++			error = in->i_op->setattr(de, &attr);
++		else {
++			error = inode_change_ok(in, &attr);
++			if (!error) {
++				setattr_copy(in, &attr);
++				mark_inode_dirty(in);
++			}
++		}
++	}
++
++out:
++	mutex_unlock(&in->i_mutex);
++	return error;
++}
++
++int vc_set_iattr(void __user *data)
++{
++	struct path path;
++	struct vcmd_ctx_iattr_v1 vc_data;
++	int ret;
++
++	if (!capable(CAP_LINUX_IMMUTABLE))
++		return -EPERM;
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = user_lpath(vc_data.name, &path);
++	if (!ret) {
++		ret = __vc_set_iattr(path.dentry,
++			&vc_data.tag, &vc_data.flags, &vc_data.mask);
++		path_put(&path);
++	}
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		ret = -EFAULT;
++	return ret;
++}
++
++#ifdef	CONFIG_COMPAT
++
++int vc_set_iattr_x32(void __user *data)
++{
++	struct path path;
++	struct vcmd_ctx_iattr_v1_x32 vc_data;
++	int ret;
++
++	if (!capable(CAP_LINUX_IMMUTABLE))
++		return -EPERM;
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
++	if (!ret) {
++		ret = __vc_set_iattr(path.dentry,
++			&vc_data.tag, &vc_data.flags, &vc_data.mask);
++		path_put(&path);
++	}
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		ret = -EFAULT;
++	return ret;
++}
++
++#endif	/* CONFIG_COMPAT */
++
++int vc_fset_iattr(uint32_t fd, void __user *data)
++{
++	struct file *filp;
++	struct vcmd_ctx_fiattr_v0 vc_data;
++	int ret;
++
++	if (!capable(CAP_LINUX_IMMUTABLE))
++		return -EPERM;
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	filp = fget(fd);
++	if (!filp || !filp->f_dentry || !filp->f_dentry->d_inode)
++		return -EBADF;
++
++	ret = __vc_set_iattr(filp->f_dentry, &vc_data.tag,
++		&vc_data.flags, &vc_data.mask);
++
++	fput(filp);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return ret;
++}
++
++
++enum { Opt_notagcheck, Opt_tag, Opt_notag, Opt_tagid, Opt_err };
++
++static match_table_t tokens = {
++	{Opt_notagcheck, "notagcheck"},
++#ifdef	CONFIG_PROPAGATE
++	{Opt_notag, "notag"},
++	{Opt_tag, "tag"},
++	{Opt_tagid, "tagid=%u"},
++#endif
++	{Opt_err, NULL}
++};
++
++
++static void __dx_parse_remove(char *string, char *opt)
++{
++	char *p = strstr(string, opt);
++	char *q = p;
++
++	if (p) {
++		while (*q != '\0' && *q != ',')
++			q++;
++		while (*q)
++			*p++ = *q++;
++		while (*p)
++			*p++ = '\0';
++	}
++}
++
++int dx_parse_tag(char *string, tag_t *tag, int remove, int *mnt_flags,
++		 unsigned long *flags)
++{
++	int set = 0;
++	substring_t args[MAX_OPT_ARGS];
++	int token;
++	char *s, *p, *opts;
++#if defined(CONFIG_PROPAGATE) || defined(CONFIG_VSERVER_DEBUG)
++	int option = 0;
++#endif
++
++	if (!string)
++		return 0;
++	s = kstrdup(string, GFP_KERNEL | GFP_ATOMIC);
++	if (!s)
++		return 0;
++
++	opts = s;
++	while ((p = strsep(&opts, ",")) != NULL) {
++		token = match_token(p, tokens, args);
++
++		switch (token) {
++#ifdef CONFIG_PROPAGATE
++		case Opt_tag:
++			if (tag)
++				*tag = 0;
++			if (remove)
++				__dx_parse_remove(s, "tag");
++			*mnt_flags |= MNT_TAGID;
++			set |= MNT_TAGID;
++			break;
++		case Opt_notag:
++			if (remove)
++				__dx_parse_remove(s, "notag");
++			*mnt_flags |= MNT_NOTAG;
++			set |= MNT_NOTAG;
++			break;
++		case Opt_tagid:
++			if (tag && !match_int(args, &option))
++				*tag = option;
++			if (remove)
++				__dx_parse_remove(s, "tagid");
++			*mnt_flags |= MNT_TAGID;
++			set |= MNT_TAGID;
++			break;
++#endif	/* CONFIG_PROPAGATE */
++		case Opt_notagcheck:
++			if (remove)
++				__dx_parse_remove(s, "notagcheck");
++			*flags |= MS_NOTAGCHECK;
++			set |= MS_NOTAGCHECK;
++			break;
++		}
++		vxdprintk(VXD_CBIT(tag, 7),
++			"dx_parse_tag(" VS_Q("%s") "): %d:#%d",
++			p, token, option);
++	}
++	if (set)
++		strcpy(string, s);
++	kfree(s);
++	return set;
++}
++
++#ifdef	CONFIG_PROPAGATE
++
++void __dx_propagate_tag(struct nameidata *nd, struct inode *inode)
++{
++	tag_t new_tag = 0;
++	struct vfsmount *mnt;
++	int propagate;
++
++	if (!nd)
++		return;
++	mnt = nd->path.mnt;
++	if (!mnt)
++		return;
++
++	propagate = (mnt->mnt_flags & MNT_TAGID);
++	if (propagate)
++		new_tag = mnt->mnt_tag;
++
++	vxdprintk(VXD_CBIT(tag, 7),
++		"dx_propagate_tag(%p[#%lu.%d]): %d,%d",
++		inode, inode->i_ino, inode->i_tag,
++		new_tag, (propagate) ? 1 : 0);
++
++	if (propagate)
++		inode->i_tag = new_tag;
++}
++
++#include <linux/module.h>
++
++EXPORT_SYMBOL_GPL(__dx_propagate_tag);
++
++#endif	/* CONFIG_PROPAGATE */
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/limit.c linux-3.3.8-vs2.3.3.4/kernel/vserver/limit.c
+--- linux-3.3.8/kernel/vserver/limit.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/limit.c	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,330 @@
++/*
++ *  linux/kernel/vserver/limit.c
++ *
++ *  Virtual Server: Context Limits
++ *
++ *  Copyright (C) 2004-2010  Herbert P�tzl
++ *
++ *  V0.01  broken out from vcontext V0.05
++ *  V0.02  changed vcmds to vxi arg
++ *  V0.03  added memory cgroup support
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/module.h>
++#include <linux/memcontrol.h>
++#include <linux/res_counter.h>
++#include <linux/vs_limit.h>
++#include <linux/vserver/limit.h>
++#include <linux/vserver/limit_cmd.h>
++
++#include <asm/uaccess.h>
++
++
++const char *vlimit_name[NUM_LIMITS] = {
++	[RLIMIT_CPU]		= "CPU",
++	[RLIMIT_NPROC]		= "NPROC",
++	[RLIMIT_NOFILE]		= "NOFILE",
++	[RLIMIT_LOCKS]		= "LOCKS",
++	[RLIMIT_SIGPENDING]	= "SIGP",
++	[RLIMIT_MSGQUEUE]	= "MSGQ",
++
++	[VLIMIT_NSOCK]		= "NSOCK",
++	[VLIMIT_OPENFD]		= "OPENFD",
++	[VLIMIT_SHMEM]		= "SHMEM",
++	[VLIMIT_DENTRY]		= "DENTRY",
++};
++
++EXPORT_SYMBOL_GPL(vlimit_name);
++
++#define MASK_ENTRY(x)	(1 << (x))
++
++const struct vcmd_ctx_rlimit_mask_v0 vlimit_mask = {
++		/* minimum */
++	0
++	,	/* softlimit */
++	0
++	,       /* maximum */
++	MASK_ENTRY( RLIMIT_NPROC	) |
++	MASK_ENTRY( RLIMIT_NOFILE	) |
++	MASK_ENTRY( RLIMIT_LOCKS	) |
++	MASK_ENTRY( RLIMIT_MSGQUEUE	) |
++
++	MASK_ENTRY( VLIMIT_NSOCK	) |
++	MASK_ENTRY( VLIMIT_OPENFD	) |
++	MASK_ENTRY( VLIMIT_SHMEM	) |
++	MASK_ENTRY( VLIMIT_DENTRY	) |
++	0
++};
++		/* accounting only */
++uint32_t account_mask =
++	MASK_ENTRY( VLIMIT_SEMARY	) |
++	MASK_ENTRY( VLIMIT_NSEMS	) |
++	MASK_ENTRY( VLIMIT_MAPPED	) |
++	0;
++
++
++static int is_valid_vlimit(int id)
++{
++	uint32_t mask = vlimit_mask.minimum |
++		vlimit_mask.softlimit | vlimit_mask.maximum;
++	return mask & (1 << id);
++}
++
++static int is_accounted_vlimit(int id)
++{
++	if (is_valid_vlimit(id))
++		return 1;
++	return account_mask & (1 << id);
++}
++
++
++static inline uint64_t vc_get_soft(struct vx_info *vxi, int id)
++{
++	rlim_t limit = __rlim_soft(&vxi->limit, id);
++	return VX_VLIM(limit);
++}
++
++static inline uint64_t vc_get_hard(struct vx_info *vxi, int id)
++{
++	rlim_t limit = __rlim_hard(&vxi->limit, id);
++	return VX_VLIM(limit);
++}
++
++static int do_get_rlimit(struct vx_info *vxi, uint32_t id,
++	uint64_t *minimum, uint64_t *softlimit, uint64_t *maximum)
++{
++	if (!is_valid_vlimit(id))
++		return -EINVAL;
++
++	if (minimum)
++		*minimum = CRLIM_UNSET;
++	if (softlimit)
++		*softlimit = vc_get_soft(vxi, id);
++	if (maximum)
++		*maximum = vc_get_hard(vxi, id);
++	return 0;
++}
++
++int vc_get_rlimit(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_rlimit_v0 vc_data;
++	int ret;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = do_get_rlimit(vxi, vc_data.id,
++		&vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
++	if (ret)
++		return ret;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++static int do_set_rlimit(struct vx_info *vxi, uint32_t id,
++	uint64_t minimum, uint64_t softlimit, uint64_t maximum)
++{
++	if (!is_valid_vlimit(id))
++		return -EINVAL;
++
++	if (maximum != CRLIM_KEEP)
++		__rlim_hard(&vxi->limit, id) = VX_RLIM(maximum);
++	if (softlimit != CRLIM_KEEP)
++		__rlim_soft(&vxi->limit, id) = VX_RLIM(softlimit);
++
++	/* clamp soft limit */
++	if (__rlim_soft(&vxi->limit, id) > __rlim_hard(&vxi->limit, id))
++		__rlim_soft(&vxi->limit, id) = __rlim_hard(&vxi->limit, id);
++
++	return 0;
++}
++
++int vc_set_rlimit(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_rlimit_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_rlimit(vxi, vc_data.id,
++		vc_data.minimum, vc_data.softlimit, vc_data.maximum);
++}
++
++#ifdef	CONFIG_IA32_EMULATION
++
++int vc_set_rlimit_x32(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_rlimit_v0_x32 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_rlimit(vxi, vc_data.id,
++		vc_data.minimum, vc_data.softlimit, vc_data.maximum);
++}
++
++int vc_get_rlimit_x32(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_rlimit_v0_x32 vc_data;
++	int ret;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = do_get_rlimit(vxi, vc_data.id,
++		&vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
++	if (ret)
++		return ret;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++#endif	/* CONFIG_IA32_EMULATION */
++
++
++int vc_get_rlimit_mask(uint32_t id, void __user *data)
++{
++	if (copy_to_user(data, &vlimit_mask, sizeof(vlimit_mask)))
++		return -EFAULT;
++	return 0;
++}
++
++
++static inline void vx_reset_hits(struct _vx_limit *limit)
++{
++	int lim;
++
++	for (lim = 0; lim < NUM_LIMITS; lim++) {
++		atomic_set(&__rlim_lhit(limit, lim), 0);
++	}
++}
++
++int vc_reset_hits(struct vx_info *vxi, void __user *data)
++{
++	vx_reset_hits(&vxi->limit);
++	return 0;
++}
++
++static inline void vx_reset_minmax(struct _vx_limit *limit)
++{
++	rlim_t value;
++	int lim;
++
++	for (lim = 0; lim < NUM_LIMITS; lim++) {
++		value = __rlim_get(limit, lim);
++		__rlim_rmax(limit, lim) = value;
++		__rlim_rmin(limit, lim) = value;
++	}
++}
++
++int vc_reset_minmax(struct vx_info *vxi, void __user *data)
++{
++	vx_reset_minmax(&vxi->limit);
++	return 0;
++}
++
++
++int vc_rlimit_stat(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_rlimit_stat_v0 vc_data;
++	struct _vx_limit *limit = &vxi->limit;
++	int id;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	id = vc_data.id;
++	if (!is_accounted_vlimit(id))
++		return -EINVAL;
++
++	vx_limit_fixup(limit, id);
++	vc_data.hits = atomic_read(&__rlim_lhit(limit, id));
++	vc_data.value = __rlim_get(limit, id);
++	vc_data.minimum = __rlim_rmin(limit, id);
++	vc_data.maximum = __rlim_rmax(limit, id);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++
++void vx_vsi_meminfo(struct sysinfo *val)
++{
++#ifdef	CONFIG_CGROUP_MEM_RES_CTLR
++	struct mem_cgroup *mcg = mem_cgroup_from_task(current);
++	u64 res_limit, res_usage;
++
++	if (!mcg)
++		return;
++
++	res_limit = mem_cgroup_res_read_u64(mcg, RES_LIMIT);
++	res_usage = mem_cgroup_res_read_u64(mcg, RES_USAGE);
++
++	if (res_limit != RESOURCE_MAX)
++		val->totalram = (res_limit >> PAGE_SHIFT);
++	val->freeram = val->totalram - (res_usage >> PAGE_SHIFT);
++	val->bufferram = 0;
++	val->totalhigh = 0;
++	val->freehigh = 0;
++#endif	/* CONFIG_CGROUP_MEM_RES_CTLR */
++	return;
++}
++
++void vx_vsi_swapinfo(struct sysinfo *val)
++{
++#ifdef	CONFIG_CGROUP_MEM_RES_CTLR
++#ifdef	CONFIG_CGROUP_MEM_RES_CTLR_SWAP
++	struct mem_cgroup *mcg = mem_cgroup_from_task(current);
++	u64 res_limit, res_usage, memsw_limit, memsw_usage;
++	s64 swap_limit, swap_usage;
++
++	if (!mcg)
++		return;
++
++	res_limit = mem_cgroup_res_read_u64(mcg, RES_LIMIT);
++	res_usage = mem_cgroup_res_read_u64(mcg, RES_USAGE);
++	memsw_limit = mem_cgroup_memsw_read_u64(mcg, RES_LIMIT);
++	memsw_usage = mem_cgroup_memsw_read_u64(mcg, RES_USAGE);
++
++	/* memory unlimited */
++	if (res_limit == RESOURCE_MAX)
++		return;
++
++	swap_limit = memsw_limit - res_limit;
++	/* we have a swap limit? */
++	if (memsw_limit != RESOURCE_MAX)
++		val->totalswap = swap_limit >> PAGE_SHIFT;
++
++	/* calculate swap part */
++	swap_usage = (memsw_usage > res_usage) ?
++		memsw_usage - res_usage : 0;
++
++	/* total shown minus usage gives free swap */
++	val->freeswap = (swap_usage < swap_limit) ?
++		val->totalswap - (swap_usage >> PAGE_SHIFT) : 0;
++#else	/* !CONFIG_CGROUP_MEM_RES_CTLR_SWAP */
++	val->totalswap = 0;
++	val->freeswap = 0;
++#endif	/* !CONFIG_CGROUP_MEM_RES_CTLR_SWAP */
++#endif	/* CONFIG_CGROUP_MEM_RES_CTLR */
++	return;
++}
++
++long vx_vsi_cached(struct sysinfo *val)
++{
++#ifdef	CONFIG_CGROUP_MEM_RES_CTLR
++	struct mem_cgroup *mcg = mem_cgroup_from_task(current);
++
++	return mem_cgroup_stat_read_cache(mcg);
++#else
++	return 0;
++#endif
++}
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/limit_init.h linux-3.3.8-vs2.3.3.4/kernel/vserver/limit_init.h
+--- linux-3.3.8/kernel/vserver/limit_init.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/limit_init.h	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,31 @@
++
++
++static inline void vx_info_init_limit(struct _vx_limit *limit)
++{
++	int lim;
++
++	for (lim = 0; lim < NUM_LIMITS; lim++) {
++		__rlim_soft(limit, lim) = RLIM_INFINITY;
++		__rlim_hard(limit, lim) = RLIM_INFINITY;
++		__rlim_set(limit, lim, 0);
++		atomic_set(&__rlim_lhit(limit, lim), 0);
++		__rlim_rmin(limit, lim) = 0;
++		__rlim_rmax(limit, lim) = 0;
++	}
++}
++
++static inline void vx_info_exit_limit(struct _vx_limit *limit)
++{
++	rlim_t value;
++	int lim;
++
++	for (lim = 0; lim < NUM_LIMITS; lim++) {
++		if ((1 << lim) & VLIM_NOCHECK)
++			continue;
++		value = __rlim_get(limit, lim);
++		vxwprintk_xid(value,
++			"!!! limit: %p[%s,%d] = %ld on exit.",
++			limit, vlimit_name[lim], lim, (long)value);
++	}
++}
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/limit_proc.h linux-3.3.8-vs2.3.3.4/kernel/vserver/limit_proc.h
+--- linux-3.3.8/kernel/vserver/limit_proc.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/limit_proc.h	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,57 @@
++#ifndef _VX_LIMIT_PROC_H
++#define _VX_LIMIT_PROC_H
++
++#include <linux/vserver/limit_int.h>
++
++
++#define VX_LIMIT_FMT	":\t%8ld\t%8ld/%8ld\t%8lld/%8lld\t%6d\n"
++#define VX_LIMIT_TOP	\
++	"Limit\t current\t     min/max\t\t    soft/hard\t\thits\n"
++
++#define VX_LIMIT_ARG(r)				\
++	(unsigned long)__rlim_get(limit, r),	\
++	(unsigned long)__rlim_rmin(limit, r),	\
++	(unsigned long)__rlim_rmax(limit, r),	\
++	VX_VLIM(__rlim_soft(limit, r)),		\
++	VX_VLIM(__rlim_hard(limit, r)),		\
++	atomic_read(&__rlim_lhit(limit, r))
++
++static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
++{
++	vx_limit_fixup(limit, -1);
++	return sprintf(buffer, VX_LIMIT_TOP
++		"PROC"	VX_LIMIT_FMT
++		"VM"	VX_LIMIT_FMT
++		"VML"	VX_LIMIT_FMT
++		"RSS"	VX_LIMIT_FMT
++		"ANON"	VX_LIMIT_FMT
++		"RMAP"	VX_LIMIT_FMT
++		"FILES" VX_LIMIT_FMT
++		"OFD"	VX_LIMIT_FMT
++		"LOCKS" VX_LIMIT_FMT
++		"SOCK"	VX_LIMIT_FMT
++		"MSGQ"	VX_LIMIT_FMT
++		"SHM"	VX_LIMIT_FMT
++		"SEMA"	VX_LIMIT_FMT
++		"SEMS"	VX_LIMIT_FMT
++		"DENT"	VX_LIMIT_FMT,
++		VX_LIMIT_ARG(RLIMIT_NPROC),
++		VX_LIMIT_ARG(RLIMIT_AS),
++		VX_LIMIT_ARG(RLIMIT_MEMLOCK),
++		VX_LIMIT_ARG(RLIMIT_RSS),
++		VX_LIMIT_ARG(VLIMIT_ANON),
++		VX_LIMIT_ARG(VLIMIT_MAPPED),
++		VX_LIMIT_ARG(RLIMIT_NOFILE),
++		VX_LIMIT_ARG(VLIMIT_OPENFD),
++		VX_LIMIT_ARG(RLIMIT_LOCKS),
++		VX_LIMIT_ARG(VLIMIT_NSOCK),
++		VX_LIMIT_ARG(RLIMIT_MSGQUEUE),
++		VX_LIMIT_ARG(VLIMIT_SHMEM),
++		VX_LIMIT_ARG(VLIMIT_SEMARY),
++		VX_LIMIT_ARG(VLIMIT_NSEMS),
++		VX_LIMIT_ARG(VLIMIT_DENTRY));
++}
++
++#endif	/* _VX_LIMIT_PROC_H */
++
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/network.c linux-3.3.8-vs2.3.3.4/kernel/vserver/network.c
+--- linux-3.3.8/kernel/vserver/network.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/network.c	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,912 @@
++/*
++ *  linux/kernel/vserver/network.c
++ *
++ *  Virtual Server: Network Support
++ *
++ *  Copyright (C) 2003-2007  Herbert P�tzl
++ *
++ *  V0.01  broken out from vcontext V0.05
++ *  V0.02  cleaned up implementation
++ *  V0.03  added equiv nx commands
++ *  V0.04  switch to RCU based hash
++ *  V0.05  and back to locking again
++ *  V0.06  changed vcmds to nxi arg
++ *  V0.07  have __create claim() the nxi
++ *
++ */
++
++#include <linux/err.h>
++#include <linux/slab.h>
++#include <linux/rcupdate.h>
++
++#include <linux/vs_network.h>
++#include <linux/vs_pid.h>
++#include <linux/vserver/network_cmd.h>
++
++
++atomic_t nx_global_ctotal	= ATOMIC_INIT(0);
++atomic_t nx_global_cactive	= ATOMIC_INIT(0);
++
++static struct kmem_cache *nx_addr_v4_cachep = NULL;
++static struct kmem_cache *nx_addr_v6_cachep = NULL;
++
++
++static int __init init_network(void)
++{
++	nx_addr_v4_cachep = kmem_cache_create("nx_v4_addr_cache",
++		sizeof(struct nx_addr_v4), 0,
++		SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
++	nx_addr_v6_cachep = kmem_cache_create("nx_v6_addr_cache",
++		sizeof(struct nx_addr_v6), 0,
++		SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
++	return 0;
++}
++
++
++/*	__alloc_nx_addr_v4()					*/
++
++static inline struct nx_addr_v4 *__alloc_nx_addr_v4(void)
++{
++	struct nx_addr_v4 *nxa = kmem_cache_alloc(
++		nx_addr_v4_cachep, GFP_KERNEL);
++
++	if (!IS_ERR(nxa))
++		memset(nxa, 0, sizeof(*nxa));
++	return nxa;
++}
++
++/*	__dealloc_nx_addr_v4()					*/
++
++static inline void __dealloc_nx_addr_v4(struct nx_addr_v4 *nxa)
++{
++	kmem_cache_free(nx_addr_v4_cachep, nxa);
++}
++
++/*	__dealloc_nx_addr_v4_all()				*/
++
++static inline void __dealloc_nx_addr_v4_all(struct nx_addr_v4 *nxa)
++{
++	while (nxa) {
++		struct nx_addr_v4 *next = nxa->next;
++
++		__dealloc_nx_addr_v4(nxa);
++		nxa = next;
++	}
++}
++
++
++#ifdef CONFIG_IPV6
++
++/*	__alloc_nx_addr_v6()					*/
++
++static inline struct nx_addr_v6 *__alloc_nx_addr_v6(void)
++{
++	struct nx_addr_v6 *nxa = kmem_cache_alloc(
++		nx_addr_v6_cachep, GFP_KERNEL);
++
++	if (!IS_ERR(nxa))
++		memset(nxa, 0, sizeof(*nxa));
++	return nxa;
++}
++
++/*	__dealloc_nx_addr_v6()					*/
++
++static inline void __dealloc_nx_addr_v6(struct nx_addr_v6 *nxa)
++{
++	kmem_cache_free(nx_addr_v6_cachep, nxa);
++}
++
++/*	__dealloc_nx_addr_v6_all()				*/
++
++static inline void __dealloc_nx_addr_v6_all(struct nx_addr_v6 *nxa)
++{
++	while (nxa) {
++		struct nx_addr_v6 *next = nxa->next;
++
++		__dealloc_nx_addr_v6(nxa);
++		nxa = next;
++	}
++}
++
++#endif	/* CONFIG_IPV6 */
++
++/*	__alloc_nx_info()
++
++	* allocate an initialized nx_info struct
++	* doesn't make it visible (hash)			*/
++
++static struct nx_info *__alloc_nx_info(nid_t nid)
++{
++	struct nx_info *new = NULL;
++
++	vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
++
++	/* would this benefit from a slab cache? */
++	new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
++	if (!new)
++		return 0;
++
++	memset(new, 0, sizeof(struct nx_info));
++	new->nx_id = nid;
++	INIT_HLIST_NODE(&new->nx_hlist);
++	atomic_set(&new->nx_usecnt, 0);
++	atomic_set(&new->nx_tasks, 0);
++	new->nx_state = 0;
++
++	new->nx_flags = NXF_INIT_SET;
++
++	/* rest of init goes here */
++
++	new->v4_lback.s_addr = htonl(INADDR_LOOPBACK);
++	new->v4_bcast.s_addr = htonl(INADDR_BROADCAST);
++
++	vxdprintk(VXD_CBIT(nid, 0),
++		"alloc_nx_info(%d) = %p", nid, new);
++	atomic_inc(&nx_global_ctotal);
++	return new;
++}
++
++/*	__dealloc_nx_info()
++
++	* final disposal of nx_info				*/
++
++static void __dealloc_nx_info(struct nx_info *nxi)
++{
++	vxdprintk(VXD_CBIT(nid, 0),
++		"dealloc_nx_info(%p)", nxi);
++
++	nxi->nx_hlist.next = LIST_POISON1;
++	nxi->nx_id = -1;
++
++	BUG_ON(atomic_read(&nxi->nx_usecnt));
++	BUG_ON(atomic_read(&nxi->nx_tasks));
++
++	__dealloc_nx_addr_v4_all(nxi->v4.next);
++
++	nxi->nx_state |= NXS_RELEASED;
++	kfree(nxi);
++	atomic_dec(&nx_global_ctotal);
++}
++
++static void __shutdown_nx_info(struct nx_info *nxi)
++{
++	nxi->nx_state |= NXS_SHUTDOWN;
++	vs_net_change(nxi, VSC_NETDOWN);
++}
++
++/*	exported stuff						*/
++
++void free_nx_info(struct nx_info *nxi)
++{
++	/* context shutdown is mandatory */
++	BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
++
++	/* context must not be hashed */
++	BUG_ON(nxi->nx_state & NXS_HASHED);
++
++	BUG_ON(atomic_read(&nxi->nx_usecnt));
++	BUG_ON(atomic_read(&nxi->nx_tasks));
++
++	__dealloc_nx_info(nxi);
++}
++
++
++void __nx_set_lback(struct nx_info *nxi)
++{
++	int nid = nxi->nx_id;
++	__be32 lback = htonl(INADDR_LOOPBACK ^ ((nid & 0xFFFF) << 8));
++
++	nxi->v4_lback.s_addr = lback;
++}
++
++extern int __nx_inet_add_lback(__be32 addr);
++extern int __nx_inet_del_lback(__be32 addr);
++
++
++/*	hash table for nx_info hash */
++
++#define NX_HASH_SIZE	13
++
++struct hlist_head nx_info_hash[NX_HASH_SIZE];
++
++static DEFINE_SPINLOCK(nx_info_hash_lock);
++
++
++static inline unsigned int __hashval(nid_t nid)
++{
++	return (nid % NX_HASH_SIZE);
++}
++
++
++
++/*	__hash_nx_info()
++
++	* add the nxi to the global hash table
++	* requires the hash_lock to be held			*/
++
++static inline void __hash_nx_info(struct nx_info *nxi)
++{
++	struct hlist_head *head;
++
++	vxd_assert_lock(&nx_info_hash_lock);
++	vxdprintk(VXD_CBIT(nid, 4),
++		"__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
++
++	/* context must not be hashed */
++	BUG_ON(nx_info_state(nxi, NXS_HASHED));
++
++	nxi->nx_state |= NXS_HASHED;
++	head = &nx_info_hash[__hashval(nxi->nx_id)];
++	hlist_add_head(&nxi->nx_hlist, head);
++	atomic_inc(&nx_global_cactive);
++}
++
++/*	__unhash_nx_info()
++
++	* remove the nxi from the global hash table
++	* requires the hash_lock to be held			*/
++
++static inline void __unhash_nx_info(struct nx_info *nxi)
++{
++	vxd_assert_lock(&nx_info_hash_lock);
++	vxdprintk(VXD_CBIT(nid, 4),
++		"__unhash_nx_info: %p[#%d.%d.%d]", nxi, nxi->nx_id,
++		atomic_read(&nxi->nx_usecnt), atomic_read(&nxi->nx_tasks));
++
++	/* context must be hashed */
++	BUG_ON(!nx_info_state(nxi, NXS_HASHED));
++	/* but without tasks */
++	BUG_ON(atomic_read(&nxi->nx_tasks));
++
++	nxi->nx_state &= ~NXS_HASHED;
++	hlist_del(&nxi->nx_hlist);
++	atomic_dec(&nx_global_cactive);
++}
++
++
++/*	__lookup_nx_info()
++
++	* requires the hash_lock to be held
++	* doesn't increment the nx_refcnt			*/
++
++static inline struct nx_info *__lookup_nx_info(nid_t nid)
++{
++	struct hlist_head *head = &nx_info_hash[__hashval(nid)];
++	struct hlist_node *pos;
++	struct nx_info *nxi;
++
++	vxd_assert_lock(&nx_info_hash_lock);
++	hlist_for_each(pos, head) {
++		nxi = hlist_entry(pos, struct nx_info, nx_hlist);
++
++		if (nxi->nx_id == nid)
++			goto found;
++	}
++	nxi = NULL;
++found:
++	vxdprintk(VXD_CBIT(nid, 0),
++		"__lookup_nx_info(#%u): %p[#%u]",
++		nid, nxi, nxi ? nxi->nx_id : 0);
++	return nxi;
++}
++
++
++/*	__create_nx_info()
++
++	* create the requested context
++	* get(), claim() and hash it				*/
++
++static struct nx_info *__create_nx_info(int id)
++{
++	struct nx_info *new, *nxi = NULL;
++
++	vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
++
++	if (!(new = __alloc_nx_info(id)))
++		return ERR_PTR(-ENOMEM);
++
++	/* required to make dynamic xids unique */
++	spin_lock(&nx_info_hash_lock);
++
++	/* static context requested */
++	if ((nxi = __lookup_nx_info(id))) {
++		vxdprintk(VXD_CBIT(nid, 0),
++			"create_nx_info(%d) = %p (already there)", id, nxi);
++		if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
++			nxi = ERR_PTR(-EBUSY);
++		else
++			nxi = ERR_PTR(-EEXIST);
++		goto out_unlock;
++	}
++	/* new context */
++	vxdprintk(VXD_CBIT(nid, 0),
++		"create_nx_info(%d) = %p (new)", id, new);
++	claim_nx_info(new, NULL);
++	__nx_set_lback(new);
++	__hash_nx_info(get_nx_info(new));
++	nxi = new, new = NULL;
++
++out_unlock:
++	spin_unlock(&nx_info_hash_lock);
++	if (new)
++		__dealloc_nx_info(new);
++	return nxi;
++}
++
++
++
++/*	exported stuff						*/
++
++
++void unhash_nx_info(struct nx_info *nxi)
++{
++	__shutdown_nx_info(nxi);
++	spin_lock(&nx_info_hash_lock);
++	__unhash_nx_info(nxi);
++	spin_unlock(&nx_info_hash_lock);
++}
++
++/*	lookup_nx_info()
++
++	* search for a nx_info and get() it
++	* negative id means current				*/
++
++struct nx_info *lookup_nx_info(int id)
++{
++	struct nx_info *nxi = NULL;
++
++	if (id < 0) {
++		nxi = get_nx_info(current_nx_info());
++	} else if (id > 1) {
++		spin_lock(&nx_info_hash_lock);
++		nxi = get_nx_info(__lookup_nx_info(id));
++		spin_unlock(&nx_info_hash_lock);
++	}
++	return nxi;
++}
++
++/*	nid_is_hashed()
++
++	* verify that nid is still hashed			*/
++
++int nid_is_hashed(nid_t nid)
++{
++	int hashed;
++
++	spin_lock(&nx_info_hash_lock);
++	hashed = (__lookup_nx_info(nid) != NULL);
++	spin_unlock(&nx_info_hash_lock);
++	return hashed;
++}
++
++
++#ifdef	CONFIG_PROC_FS
++
++/*	get_nid_list()
++
++	* get a subset of hashed nids for proc
++	* assumes size is at least one				*/
++
++int get_nid_list(int index, unsigned int *nids, int size)
++{
++	int hindex, nr_nids = 0;
++
++	/* only show current and children */
++	if (!nx_check(0, VS_ADMIN | VS_WATCH)) {
++		if (index > 0)
++			return 0;
++		nids[nr_nids] = nx_current_nid();
++		return 1;
++	}
++
++	for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
++		struct hlist_head *head = &nx_info_hash[hindex];
++		struct hlist_node *pos;
++
++		spin_lock(&nx_info_hash_lock);
++		hlist_for_each(pos, head) {
++			struct nx_info *nxi;
++
++			if (--index > 0)
++				continue;
++
++			nxi = hlist_entry(pos, struct nx_info, nx_hlist);
++			nids[nr_nids] = nxi->nx_id;
++			if (++nr_nids >= size) {
++				spin_unlock(&nx_info_hash_lock);
++				goto out;
++			}
++		}
++		/* keep the lock time short */
++		spin_unlock(&nx_info_hash_lock);
++	}
++out:
++	return nr_nids;
++}
++#endif
++
++
++/*
++ *	migrate task to new network
++ *	gets nxi, puts old_nxi on change
++ */
++
++int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
++{
++	struct nx_info *old_nxi;
++	int ret = 0;
++
++	if (!p || !nxi)
++		BUG();
++
++	vxdprintk(VXD_CBIT(nid, 5),
++		"nx_migrate_task(%p,%p[#%d.%d.%d])",
++		p, nxi, nxi->nx_id,
++		atomic_read(&nxi->nx_usecnt),
++		atomic_read(&nxi->nx_tasks));
++
++	if (nx_info_flags(nxi, NXF_INFO_PRIVATE, 0) &&
++		!nx_info_flags(nxi, NXF_STATE_SETUP, 0))
++		return -EACCES;
++
++	if (nx_info_state(nxi, NXS_SHUTDOWN))
++		return -EFAULT;
++
++	/* maybe disallow this completely? */
++	old_nxi = task_get_nx_info(p);
++	if (old_nxi == nxi)
++		goto out;
++
++	task_lock(p);
++	if (old_nxi)
++		clr_nx_info(&p->nx_info);
++	claim_nx_info(nxi, p);
++	set_nx_info(&p->nx_info, nxi);
++	p->nid = nxi->nx_id;
++	task_unlock(p);
++
++	vxdprintk(VXD_CBIT(nid, 5),
++		"moved task %p into nxi:%p[#%d]",
++		p, nxi, nxi->nx_id);
++
++	if (old_nxi)
++		release_nx_info(old_nxi, p);
++	ret = 0;
++out:
++	put_nx_info(old_nxi);
++	return ret;
++}
++
++
++void nx_set_persistent(struct nx_info *nxi)
++{
++	vxdprintk(VXD_CBIT(nid, 6),
++		"nx_set_persistent(%p[#%d])", nxi, nxi->nx_id);
++
++	get_nx_info(nxi);
++	claim_nx_info(nxi, NULL);
++}
++
++void nx_clear_persistent(struct nx_info *nxi)
++{
++	vxdprintk(VXD_CBIT(nid, 6),
++		"nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
++
++	release_nx_info(nxi, NULL);
++	put_nx_info(nxi);
++}
++
++void nx_update_persistent(struct nx_info *nxi)
++{
++	if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
++		nx_set_persistent(nxi);
++	else
++		nx_clear_persistent(nxi);
++}
++
++/* vserver syscall commands below here */
++
++/* taks nid and nx_info functions */
++
++#include <asm/uaccess.h>
++
++
++int vc_task_nid(uint32_t id)
++{
++	nid_t nid;
++
++	if (id) {
++		struct task_struct *tsk;
++
++		rcu_read_lock();
++		tsk = find_task_by_real_pid(id);
++		nid = (tsk) ? tsk->nid : -ESRCH;
++		rcu_read_unlock();
++	} else
++		nid = nx_current_nid();
++	return nid;
++}
++
++
++int vc_nx_info(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_nx_info_v0 vc_data;
++
++	vc_data.nid = nxi->nx_id;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++
++/* network functions */
++
++int vc_net_create(uint32_t nid, void __user *data)
++{
++	struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
++	struct nx_info *new_nxi;
++	int ret;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	if ((nid > MAX_S_CONTEXT) || (nid < 2))
++		return -EINVAL;
++
++	new_nxi = __create_nx_info(nid);
++	if (IS_ERR(new_nxi))
++		return PTR_ERR(new_nxi);
++
++	/* initial flags */
++	new_nxi->nx_flags = vc_data.flagword;
++
++	ret = -ENOEXEC;
++	if (vs_net_change(new_nxi, VSC_NETUP))
++		goto out;
++
++	ret = nx_migrate_task(current, new_nxi);
++	if (ret)
++		goto out;
++
++	/* return context id on success */
++	ret = new_nxi->nx_id;
++
++	/* get a reference for persistent contexts */
++	if ((vc_data.flagword & NXF_PERSISTENT))
++		nx_set_persistent(new_nxi);
++out:
++	release_nx_info(new_nxi, NULL);
++	put_nx_info(new_nxi);
++	return ret;
++}
++
++
++int vc_net_migrate(struct nx_info *nxi, void __user *data)
++{
++	return nx_migrate_task(current, nxi);
++}
++
++
++
++int do_add_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
++	uint16_t type, uint16_t flags)
++{
++	struct nx_addr_v4 *nxa = &nxi->v4;
++
++	if (NX_IPV4(nxi)) {
++		/* locate last entry */
++		for (; nxa->next; nxa = nxa->next);
++		nxa->next = __alloc_nx_addr_v4();
++		nxa = nxa->next;
++
++		if (IS_ERR(nxa))
++			return PTR_ERR(nxa);
++	}
++
++	if (nxi->v4.next)
++		/* remove single ip for ip list */
++		nxi->nx_flags &= ~NXF_SINGLE_IP;
++
++	nxa->ip[0].s_addr = ip;
++	nxa->ip[1].s_addr = ip2;
++	nxa->mask.s_addr = mask;
++	nxa->type = type;
++	nxa->flags = flags;
++	return 0;
++}
++
++int do_remove_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
++	uint16_t type, uint16_t flags)
++{
++	struct nx_addr_v4 *nxa = &nxi->v4;
++
++	switch (type) {
++/*	case NXA_TYPE_ADDR:
++		break;		*/
++
++	case NXA_TYPE_ANY:
++		__dealloc_nx_addr_v4_all(xchg(&nxa->next, NULL));
++		memset(nxa, 0, sizeof(*nxa));
++		break;
++
++	default:
++		return -EINVAL;
++	}
++	return 0;
++}
++
++
++int vc_net_add(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_v0 vc_data;
++	int index, ret = 0;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	switch (vc_data.type) {
++	case NXA_TYPE_IPV4:
++		if ((vc_data.count < 1) || (vc_data.count > 4))
++			return -EINVAL;
++
++		index = 0;
++		while (index < vc_data.count) {
++			ret = do_add_v4_addr(nxi, vc_data.ip[index].s_addr, 0,
++				vc_data.mask[index].s_addr, NXA_TYPE_ADDR, 0);
++			if (ret)
++				return ret;
++			index++;
++		}
++		ret = index;
++		break;
++
++	case NXA_TYPE_IPV4|NXA_MOD_BCAST:
++		nxi->v4_bcast = vc_data.ip[0];
++		ret = 1;
++		break;
++
++	case NXA_TYPE_IPV4|NXA_MOD_LBACK:
++		nxi->v4_lback = vc_data.ip[0];
++		ret = 1;
++		break;
++
++	default:
++		ret = -EINVAL;
++		break;
++	}
++	return ret;
++}
++
++int vc_net_remove(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_v0 vc_data;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	switch (vc_data.type) {
++	case NXA_TYPE_ANY:
++		__dealloc_nx_addr_v4_all(xchg(&nxi->v4.next, NULL));
++		memset(&nxi->v4, 0, sizeof(nxi->v4));
++		break;
++
++	default:
++		return -EINVAL;
++	}
++	return 0;
++}
++
++
++int vc_net_add_ipv4_v1(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_ipv4_v1 vc_data;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	switch (vc_data.type) {
++	case NXA_TYPE_ADDR:
++	case NXA_TYPE_MASK:
++		return do_add_v4_addr(nxi, vc_data.ip.s_addr, 0,
++			vc_data.mask.s_addr, vc_data.type, vc_data.flags);
++
++	case NXA_TYPE_ADDR | NXA_MOD_BCAST:
++		nxi->v4_bcast = vc_data.ip;
++		break;
++
++	case NXA_TYPE_ADDR | NXA_MOD_LBACK:
++		nxi->v4_lback = vc_data.ip;
++		break;
++
++	default:
++		return -EINVAL;
++	}
++	return 0;
++}
++
++int vc_net_add_ipv4(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_ipv4_v2 vc_data;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	switch (vc_data.type) {
++	case NXA_TYPE_ADDR:
++	case NXA_TYPE_MASK:
++	case NXA_TYPE_RANGE:
++		return do_add_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
++			vc_data.mask.s_addr, vc_data.type, vc_data.flags);
++
++	case NXA_TYPE_ADDR | NXA_MOD_BCAST:
++		nxi->v4_bcast = vc_data.ip;
++		break;
++
++	case NXA_TYPE_ADDR | NXA_MOD_LBACK:
++		nxi->v4_lback = vc_data.ip;
++		break;
++
++	default:
++		return -EINVAL;
++	}
++	return 0;
++}
++
++int vc_net_rem_ipv4_v1(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_ipv4_v1 vc_data;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_remove_v4_addr(nxi, vc_data.ip.s_addr, 0,
++		vc_data.mask.s_addr, vc_data.type, vc_data.flags);
++}
++
++int vc_net_rem_ipv4(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_ipv4_v2 vc_data;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_remove_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
++		vc_data.mask.s_addr, vc_data.type, vc_data.flags);
++}
++
++#ifdef CONFIG_IPV6
++
++int do_add_v6_addr(struct nx_info *nxi,
++	struct in6_addr *ip, struct in6_addr *mask,
++	uint32_t prefix, uint16_t type, uint16_t flags)
++{
++	struct nx_addr_v6 *nxa = &nxi->v6;
++
++	if (NX_IPV6(nxi)) {
++		/* locate last entry */
++		for (; nxa->next; nxa = nxa->next);
++		nxa->next = __alloc_nx_addr_v6();
++		nxa = nxa->next;
++
++		if (IS_ERR(nxa))
++			return PTR_ERR(nxa);
++	}
++
++	nxa->ip = *ip;
++	nxa->mask = *mask;
++	nxa->prefix = prefix;
++	nxa->type = type;
++	nxa->flags = flags;
++	return 0;
++}
++
++
++int vc_net_add_ipv6(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_ipv6_v1 vc_data;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	switch (vc_data.type) {
++	case NXA_TYPE_ADDR:
++		memset(&vc_data.mask, ~0, sizeof(vc_data.mask));
++		/* fallthrough */
++	case NXA_TYPE_MASK:
++		return do_add_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
++			vc_data.prefix, vc_data.type, vc_data.flags);
++	default:
++		return -EINVAL;
++	}
++	return 0;
++}
++
++int vc_net_remove_ipv6(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_addr_ipv6_v1 vc_data;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	switch (vc_data.type) {
++	case NXA_TYPE_ANY:
++		__dealloc_nx_addr_v6_all(xchg(&nxi->v6.next, NULL));
++		memset(&nxi->v6, 0, sizeof(nxi->v6));
++		break;
++
++	default:
++		return -EINVAL;
++	}
++	return 0;
++}
++
++#endif	/* CONFIG_IPV6 */
++
++
++int vc_get_nflags(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_flags_v0 vc_data;
++
++	vc_data.flagword = nxi->nx_flags;
++
++	/* special STATE flag handling */
++	vc_data.mask = vs_mask_flags(~0ULL, nxi->nx_flags, NXF_ONE_TIME);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_nflags(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_flags_v0 vc_data;
++	uint64_t mask, trigger;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	/* special STATE flag handling */
++	mask = vs_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
++	trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
++
++	nxi->nx_flags = vs_mask_flags(nxi->nx_flags,
++		vc_data.flagword, mask);
++	if (trigger & NXF_PERSISTENT)
++		nx_update_persistent(nxi);
++
++	return 0;
++}
++
++int vc_get_ncaps(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_caps_v0 vc_data;
++
++	vc_data.ncaps = nxi->nx_ncaps;
++	vc_data.cmask = ~0ULL;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_ncaps(struct nx_info *nxi, void __user *data)
++{
++	struct vcmd_net_caps_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	nxi->nx_ncaps = vs_mask_flags(nxi->nx_ncaps,
++		vc_data.ncaps, vc_data.cmask);
++	return 0;
++}
++
++
++#include <linux/module.h>
++
++module_init(init_network);
++
++EXPORT_SYMBOL_GPL(free_nx_info);
++EXPORT_SYMBOL_GPL(unhash_nx_info);
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/proc.c linux-3.3.8-vs2.3.3.4/kernel/vserver/proc.c
+--- linux-3.3.8/kernel/vserver/proc.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/proc.c	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,1103 @@
++/*
++ *  linux/kernel/vserver/proc.c
++ *
++ *  Virtual Context Support
++ *
++ *  Copyright (C) 2003-2011  Herbert P�tzl
++ *
++ *  V0.01  basic structure
++ *  V0.02  adaptation vs1.3.0
++ *  V0.03  proc permissions
++ *  V0.04  locking/generic
++ *  V0.05  next generation procfs
++ *  V0.06  inode validation
++ *  V0.07  generic rewrite vid
++ *  V0.08  remove inode type
++ *  V0.09  added u/wmask info
++ *
++ */
++
++#include <linux/proc_fs.h>
++#include <linux/fs_struct.h>
++#include <linux/mount.h>
++#include <asm/unistd.h>
++
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
++#include <linux/vs_cvirt.h>
++
++#include <linux/in.h>
++#include <linux/inetdevice.h>
++#include <linux/vs_inet.h>
++#include <linux/vs_inet6.h>
++
++#include <linux/vserver/global.h>
++
++#include "cvirt_proc.h"
++#include "cacct_proc.h"
++#include "limit_proc.h"
++#include "sched_proc.h"
++#include "vci_config.h"
++
++
++static inline char *print_cap_t(char *buffer, kernel_cap_t *c)
++{
++	unsigned __capi;
++
++	CAP_FOR_EACH_U32(__capi) {
++		buffer += sprintf(buffer, "%08x",
++			c->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
++	}
++	return buffer;
++}
++
++
++static struct proc_dir_entry *proc_virtual;
++
++static struct proc_dir_entry *proc_virtnet;
++
++
++/* first the actual feeds */
++
++
++static int proc_vci(char *buffer)
++{
++	return sprintf(buffer,
++		"VCIVersion:\t%04x:%04x\n"
++		"VCISyscall:\t%d\n"
++		"VCIKernel:\t%08x\n",
++		VCI_VERSION >> 16,
++		VCI_VERSION & 0xFFFF,
++		__NR_vserver,
++		vci_kernel_config());
++}
++
++static int proc_virtual_info(char *buffer)
++{
++	return proc_vci(buffer);
++}
++
++static int proc_virtual_status(char *buffer)
++{
++	return sprintf(buffer,
++		"#CTotal:\t%d\n"
++		"#CActive:\t%d\n"
++		"#NSProxy:\t%d\t%d %d %d %d %d %d\n"
++		"#InitTask:\t%d\t%d %d\n",
++		atomic_read(&vx_global_ctotal),
++		atomic_read(&vx_global_cactive),
++		atomic_read(&vs_global_nsproxy),
++		atomic_read(&vs_global_fs),
++		atomic_read(&vs_global_mnt_ns),
++		atomic_read(&vs_global_uts_ns),
++		atomic_read(&nr_ipc_ns),
++		atomic_read(&vs_global_user_ns),
++		atomic_read(&vs_global_pid_ns),
++		atomic_read(&init_task.usage),
++		atomic_read(&init_task.nsproxy->count),
++		init_task.fs->users);
++}
++
++
++int proc_vxi_info(struct vx_info *vxi, char *buffer)
++{
++	int length;
++
++	length = sprintf(buffer,
++		"ID:\t%d\n"
++		"Info:\t%p\n"
++		"Init:\t%d\n"
++		"OOM:\t%lld\n",
++		vxi->vx_id,
++		vxi,
++		vxi->vx_initpid,
++		vxi->vx_badness_bias);
++	return length;
++}
++
++int proc_vxi_status(struct vx_info *vxi, char *buffer)
++{
++	char *orig = buffer;
++
++	buffer += sprintf(buffer,
++		"UseCnt:\t%d\n"
++		"Tasks:\t%d\n"
++		"Flags:\t%016llx\n",
++		atomic_read(&vxi->vx_usecnt),
++		atomic_read(&vxi->vx_tasks),
++		(unsigned long long)vxi->vx_flags);
++
++	buffer += sprintf(buffer, "BCaps:\t");
++	buffer = print_cap_t(buffer, &vxi->vx_bcaps);
++	buffer += sprintf(buffer, "\n");
++
++	buffer += sprintf(buffer,
++		"CCaps:\t%016llx\n"
++		"Umask:\t%16llx\n"
++		"Wmask:\t%16llx\n"
++		"Spaces:\t%08lx %08lx\n",
++		(unsigned long long)vxi->vx_ccaps,
++		(unsigned long long)vxi->vx_umask,
++		(unsigned long long)vxi->vx_wmask,
++		vxi->space[0].vx_nsmask, vxi->space[1].vx_nsmask);
++	return buffer - orig;
++}
++
++int proc_vxi_limit(struct vx_info *vxi, char *buffer)
++{
++	return vx_info_proc_limit(&vxi->limit, buffer);
++}
++
++int proc_vxi_sched(struct vx_info *vxi, char *buffer)
++{
++	int cpu, length;
++
++	length = vx_info_proc_sched(&vxi->sched, buffer);
++	for_each_online_cpu(cpu) {
++		length += vx_info_proc_sched_pc(
++			&vx_per_cpu(vxi, sched_pc, cpu),
++			buffer + length, cpu);
++	}
++	return length;
++}
++
++int proc_vxi_nsproxy0(struct vx_info *vxi, char *buffer)
++{
++	return vx_info_proc_nsproxy(vxi->space[0].vx_nsproxy, buffer);
++}
++
++int proc_vxi_nsproxy1(struct vx_info *vxi, char *buffer)
++{
++	return vx_info_proc_nsproxy(vxi->space[1].vx_nsproxy, buffer);
++}
++
++int proc_vxi_cvirt(struct vx_info *vxi, char *buffer)
++{
++	int cpu, length;
++
++	vx_update_load(vxi);
++	length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
++	for_each_online_cpu(cpu) {
++		length += vx_info_proc_cvirt_pc(
++			&vx_per_cpu(vxi, cvirt_pc, cpu),
++			buffer + length, cpu);
++	}
++	return length;
++}
++
++int proc_vxi_cacct(struct vx_info *vxi, char *buffer)
++{
++	return vx_info_proc_cacct(&vxi->cacct, buffer);
++}
++
++
++static int proc_virtnet_info(char *buffer)
++{
++	return proc_vci(buffer);
++}
++
++static int proc_virtnet_status(char *buffer)
++{
++	return sprintf(buffer,
++		"#CTotal:\t%d\n"
++		"#CActive:\t%d\n",
++		atomic_read(&nx_global_ctotal),
++		atomic_read(&nx_global_cactive));
++}
++
++int proc_nxi_info(struct nx_info *nxi, char *buffer)
++{
++	struct nx_addr_v4 *v4a;
++#ifdef	CONFIG_IPV6
++	struct nx_addr_v6 *v6a;
++#endif
++	int length, i;
++
++	length = sprintf(buffer,
++		"ID:\t%d\n"
++		"Info:\t%p\n"
++		"Bcast:\t" NIPQUAD_FMT "\n"
++		"Lback:\t" NIPQUAD_FMT "\n",
++		nxi->nx_id,
++		nxi,
++		NIPQUAD(nxi->v4_bcast.s_addr),
++		NIPQUAD(nxi->v4_lback.s_addr));
++
++	if (!NX_IPV4(nxi))
++		goto skip_v4;
++	for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
++		length += sprintf(buffer + length, "%d:\t" NXAV4_FMT "\n",
++			i, NXAV4(v4a));
++skip_v4:
++#ifdef	CONFIG_IPV6
++	if (!NX_IPV6(nxi))
++		goto skip_v6;
++	for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
++		length += sprintf(buffer + length, "%d:\t" NXAV6_FMT "\n",
++			i, NXAV6(v6a));
++skip_v6:
++#endif
++	return length;
++}
++
++int proc_nxi_status(struct nx_info *nxi, char *buffer)
++{
++	int length;
++
++	length = sprintf(buffer,
++		"UseCnt:\t%d\n"
++		"Tasks:\t%d\n"
++		"Flags:\t%016llx\n"
++		"NCaps:\t%016llx\n",
++		atomic_read(&nxi->nx_usecnt),
++		atomic_read(&nxi->nx_tasks),
++		(unsigned long long)nxi->nx_flags,
++		(unsigned long long)nxi->nx_ncaps);
++	return length;
++}
++
++
++
++/* here the inode helpers */
++
++struct vs_entry {
++	int len;
++	char *name;
++	mode_t mode;
++	struct inode_operations *iop;
++	struct file_operations *fop;
++	union proc_op op;
++};
++
++static struct inode *vs_proc_make_inode(struct super_block *sb, struct vs_entry *p)
++{
++	struct inode *inode = new_inode(sb);
++
++	if (!inode)
++		goto out;
++
++	inode->i_mode = p->mode;
++	if (p->iop)
++		inode->i_op = p->iop;
++	if (p->fop)
++		inode->i_fop = p->fop;
++
++	set_nlink(inode, (p->mode & S_IFDIR) ? 2 : 1);
++	inode->i_flags |= S_IMMUTABLE;
++
++	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++
++	inode->i_uid = 0;
++	inode->i_gid = 0;
++	inode->i_tag = 0;
++out:
++	return inode;
++}
++
++static struct dentry *vs_proc_instantiate(struct inode *dir,
++	struct dentry *dentry, int id, void *ptr)
++{
++	struct vs_entry *p = ptr;
++	struct inode *inode = vs_proc_make_inode(dir->i_sb, p);
++	struct dentry *error = ERR_PTR(-EINVAL);
++
++	if (!inode)
++		goto out;
++
++	PROC_I(inode)->op = p->op;
++	PROC_I(inode)->fd = id;
++	d_add(dentry, inode);
++	error = NULL;
++out:
++	return error;
++}
++
++/* Lookups */
++
++typedef struct dentry *instantiate_t(struct inode *, struct dentry *, int, void *);
++
++/*
++ * Fill a directory entry.
++ *
++ * If possible create the dcache entry and derive our inode number and
++ * file type from dcache entry.
++ *
++ * Since all of the proc inode numbers are dynamically generated, the inode
++ * numbers do not exist until the inode is cache.  This means creating the
++ * the dcache entry in readdir is necessary to keep the inode numbers
++ * reported by readdir in sync with the inode numbers reported
++ * by stat.
++ */
++static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
++	char *name, int len, instantiate_t instantiate, int id, void *ptr)
++{
++	struct dentry *child, *dir = filp->f_dentry;
++	struct inode *inode;
++	struct qstr qname;
++	ino_t ino = 0;
++	unsigned type = DT_UNKNOWN;
++
++	qname.name = name;
++	qname.len  = len;
++	qname.hash = full_name_hash(name, len);
++
++	child = d_lookup(dir, &qname);
++	if (!child) {
++		struct dentry *new;
++		new = d_alloc(dir, &qname);
++		if (new) {
++			child = instantiate(dir->d_inode, new, id, ptr);
++			if (child)
++				dput(new);
++			else
++				child = new;
++		}
++	}
++	if (!child || IS_ERR(child) || !child->d_inode)
++		goto end_instantiate;
++	inode = child->d_inode;
++	if (inode) {
++		ino = inode->i_ino;
++		type = inode->i_mode >> 12;
++	}
++	dput(child);
++end_instantiate:
++	if (!ino)
++		ino = find_inode_number(dir, &qname);
++	if (!ino)
++		ino = 1;
++	return filldir(dirent, name, len, filp->f_pos, ino, type);
++}
++
++
++
++/* get and revalidate vx_info/xid */
++
++static inline
++struct vx_info *get_proc_vx_info(struct inode *inode)
++{
++	return lookup_vx_info(PROC_I(inode)->fd);
++}
++
++static int proc_xid_revalidate(struct dentry *dentry, struct nameidata *nd)
++{
++	struct inode *inode = dentry->d_inode;
++	xid_t xid = PROC_I(inode)->fd;
++
++	if (!xid || xid_is_hashed(xid))
++		return 1;
++	d_drop(dentry);
++	return 0;
++}
++
++
++/* get and revalidate nx_info/nid */
++
++static int proc_nid_revalidate(struct dentry *dentry, struct nameidata *nd)
++{
++	struct inode *inode = dentry->d_inode;
++	nid_t nid = PROC_I(inode)->fd;
++
++	if (!nid || nid_is_hashed(nid))
++		return 1;
++	d_drop(dentry);
++	return 0;
++}
++
++
++
++#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
++
++static ssize_t proc_vs_info_read(struct file *file, char __user *buf,
++			  size_t count, loff_t *ppos)
++{
++	struct inode *inode = file->f_dentry->d_inode;
++	unsigned long page;
++	ssize_t length = 0;
++
++	if (count > PROC_BLOCK_SIZE)
++		count = PROC_BLOCK_SIZE;
++
++	/* fade that out as soon as stable */
++	WARN_ON(PROC_I(inode)->fd);
++
++	if (!(page = __get_free_page(GFP_KERNEL)))
++		return -ENOMEM;
++
++	BUG_ON(!PROC_I(inode)->op.proc_vs_read);
++	length = PROC_I(inode)->op.proc_vs_read((char *)page);
++
++	if (length >= 0)
++		length = simple_read_from_buffer(buf, count, ppos,
++			(char *)page, length);
++
++	free_page(page);
++	return length;
++}
++
++static ssize_t proc_vx_info_read(struct file *file, char __user *buf,
++			  size_t count, loff_t *ppos)
++{
++	struct inode *inode = file->f_dentry->d_inode;
++	struct vx_info *vxi = NULL;
++	xid_t xid = PROC_I(inode)->fd;
++	unsigned long page;
++	ssize_t length = 0;
++
++	if (count > PROC_BLOCK_SIZE)
++		count = PROC_BLOCK_SIZE;
++
++	/* fade that out as soon as stable */
++	WARN_ON(!xid);
++	vxi = lookup_vx_info(xid);
++	if (!vxi)
++		goto out;
++
++	length = -ENOMEM;
++	if (!(page = __get_free_page(GFP_KERNEL)))
++		goto out_put;
++
++	BUG_ON(!PROC_I(inode)->op.proc_vxi_read);
++	length = PROC_I(inode)->op.proc_vxi_read(vxi, (char *)page);
++
++	if (length >= 0)
++		length = simple_read_from_buffer(buf, count, ppos,
++			(char *)page, length);
++
++	free_page(page);
++out_put:
++	put_vx_info(vxi);
++out:
++	return length;
++}
++
++static ssize_t proc_nx_info_read(struct file *file, char __user *buf,
++			  size_t count, loff_t *ppos)
++{
++	struct inode *inode = file->f_dentry->d_inode;
++	struct nx_info *nxi = NULL;
++	nid_t nid = PROC_I(inode)->fd;
++	unsigned long page;
++	ssize_t length = 0;
++
++	if (count > PROC_BLOCK_SIZE)
++		count = PROC_BLOCK_SIZE;
++
++	/* fade that out as soon as stable */
++	WARN_ON(!nid);
++	nxi = lookup_nx_info(nid);
++	if (!nxi)
++		goto out;
++
++	length = -ENOMEM;
++	if (!(page = __get_free_page(GFP_KERNEL)))
++		goto out_put;
++
++	BUG_ON(!PROC_I(inode)->op.proc_nxi_read);
++	length = PROC_I(inode)->op.proc_nxi_read(nxi, (char *)page);
++
++	if (length >= 0)
++		length = simple_read_from_buffer(buf, count, ppos,
++			(char *)page, length);
++
++	free_page(page);
++out_put:
++	put_nx_info(nxi);
++out:
++	return length;
++}
++
++
++
++/* here comes the lower level */
++
++
++#define NOD(NAME, MODE, IOP, FOP, OP) {	\
++	.len  = sizeof(NAME) - 1,	\
++	.name = (NAME),			\
++	.mode = MODE,			\
++	.iop  = IOP,			\
++	.fop  = FOP,			\
++	.op   = OP,			\
++}
++
++
++#define DIR(NAME, MODE, OTYPE)				\
++	NOD(NAME, (S_IFDIR | (MODE)),			\
++		&proc_ ## OTYPE ## _inode_operations,	\
++		&proc_ ## OTYPE ## _file_operations, { } )
++
++#define INF(NAME, MODE, OTYPE)				\
++	NOD(NAME, (S_IFREG | (MODE)), NULL,		\
++		&proc_vs_info_file_operations,		\
++		{ .proc_vs_read = &proc_##OTYPE } )
++
++#define VINF(NAME, MODE, OTYPE)				\
++	NOD(NAME, (S_IFREG | (MODE)), NULL,		\
++		&proc_vx_info_file_operations,		\
++		{ .proc_vxi_read = &proc_##OTYPE } )
++
++#define NINF(NAME, MODE, OTYPE)				\
++	NOD(NAME, (S_IFREG | (MODE)), NULL,		\
++		&proc_nx_info_file_operations,		\
++		{ .proc_nxi_read = &proc_##OTYPE } )
++
++
++static struct file_operations proc_vs_info_file_operations = {
++	.read =		proc_vs_info_read,
++};
++
++static struct file_operations proc_vx_info_file_operations = {
++	.read =		proc_vx_info_read,
++};
++
++static struct dentry_operations proc_xid_dentry_operations = {
++	.d_revalidate =	proc_xid_revalidate,
++};
++
++static struct vs_entry vx_base_stuff[] = {
++	VINF("info",	S_IRUGO, vxi_info),
++	VINF("status",	S_IRUGO, vxi_status),
++	VINF("limit",	S_IRUGO, vxi_limit),
++	VINF("sched",	S_IRUGO, vxi_sched),
++	VINF("nsproxy",	S_IRUGO, vxi_nsproxy0),
++	VINF("nsproxy1",S_IRUGO, vxi_nsproxy1),
++	VINF("cvirt",	S_IRUGO, vxi_cvirt),
++	VINF("cacct",	S_IRUGO, vxi_cacct),
++	{}
++};
++
++
++
++
++static struct dentry *proc_xid_instantiate(struct inode *dir,
++	struct dentry *dentry, int id, void *ptr)
++{
++	dentry->d_op = &proc_xid_dentry_operations;
++	return vs_proc_instantiate(dir, dentry, id, ptr);
++}
++
++static struct dentry *proc_xid_lookup(struct inode *dir,
++	struct dentry *dentry, struct nameidata *nd)
++{
++	struct vs_entry *p = vx_base_stuff;
++	struct dentry *error = ERR_PTR(-ENOENT);
++
++	for (; p->name; p++) {
++		if (p->len != dentry->d_name.len)
++			continue;
++		if (!memcmp(dentry->d_name.name, p->name, p->len))
++			break;
++	}
++	if (!p->name)
++		goto out;
++
++	error = proc_xid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
++out:
++	return error;
++}
++
++static int proc_xid_readdir(struct file *filp,
++	void *dirent, filldir_t filldir)
++{
++	struct dentry *dentry = filp->f_dentry;
++	struct inode *inode = dentry->d_inode;
++	struct vs_entry *p = vx_base_stuff;
++	int size = sizeof(vx_base_stuff) / sizeof(struct vs_entry);
++	int pos, index;
++	u64 ino;
++
++	pos = filp->f_pos;
++	switch (pos) {
++	case 0:
++		ino = inode->i_ino;
++		if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	case 1:
++		ino = parent_ino(dentry);
++		if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	default:
++		index = pos - 2;
++		if (index >= size)
++			goto out;
++		for (p += index; p->name; p++) {
++			if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
++				vs_proc_instantiate, PROC_I(inode)->fd, p))
++				goto out;
++			pos++;
++		}
++	}
++out:
++	filp->f_pos = pos;
++	return 1;
++}
++
++
++
++static struct file_operations proc_nx_info_file_operations = {
++	.read =		proc_nx_info_read,
++};
++
++static struct dentry_operations proc_nid_dentry_operations = {
++	.d_revalidate =	proc_nid_revalidate,
++};
++
++static struct vs_entry nx_base_stuff[] = {
++	NINF("info",	S_IRUGO, nxi_info),
++	NINF("status",	S_IRUGO, nxi_status),
++	{}
++};
++
++
++static struct dentry *proc_nid_instantiate(struct inode *dir,
++	struct dentry *dentry, int id, void *ptr)
++{
++	dentry->d_op = &proc_nid_dentry_operations;
++	return vs_proc_instantiate(dir, dentry, id, ptr);
++}
++
++static struct dentry *proc_nid_lookup(struct inode *dir,
++	struct dentry *dentry, struct nameidata *nd)
++{
++	struct vs_entry *p = nx_base_stuff;
++	struct dentry *error = ERR_PTR(-ENOENT);
++
++	for (; p->name; p++) {
++		if (p->len != dentry->d_name.len)
++			continue;
++		if (!memcmp(dentry->d_name.name, p->name, p->len))
++			break;
++	}
++	if (!p->name)
++		goto out;
++
++	error = proc_nid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
++out:
++	return error;
++}
++
++static int proc_nid_readdir(struct file *filp,
++	void *dirent, filldir_t filldir)
++{
++	struct dentry *dentry = filp->f_dentry;
++	struct inode *inode = dentry->d_inode;
++	struct vs_entry *p = nx_base_stuff;
++	int size = sizeof(nx_base_stuff) / sizeof(struct vs_entry);
++	int pos, index;
++	u64 ino;
++
++	pos = filp->f_pos;
++	switch (pos) {
++	case 0:
++		ino = inode->i_ino;
++		if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	case 1:
++		ino = parent_ino(dentry);
++		if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	default:
++		index = pos - 2;
++		if (index >= size)
++			goto out;
++		for (p += index; p->name; p++) {
++			if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
++				vs_proc_instantiate, PROC_I(inode)->fd, p))
++				goto out;
++			pos++;
++		}
++	}
++out:
++	filp->f_pos = pos;
++	return 1;
++}
++
++
++#define MAX_MULBY10	((~0U - 9) / 10)
++
++static inline int atovid(const char *str, int len)
++{
++	int vid, c;
++
++	vid = 0;
++	while (len-- > 0) {
++		c = *str - '0';
++		str++;
++		if (c > 9)
++			return -1;
++		if (vid >= MAX_MULBY10)
++			return -1;
++		vid *= 10;
++		vid += c;
++		if (!vid)
++			return -1;
++	}
++	return vid;
++}
++
++/* now the upper level (virtual) */
++
++
++static struct file_operations proc_xid_file_operations = {
++	.read =		generic_read_dir,
++	.readdir =	proc_xid_readdir,
++};
++
++static struct inode_operations proc_xid_inode_operations = {
++	.lookup =	proc_xid_lookup,
++};
++
++static struct vs_entry vx_virtual_stuff[] = {
++	INF("info",	S_IRUGO, virtual_info),
++	INF("status",	S_IRUGO, virtual_status),
++	DIR(NULL,	S_IRUGO | S_IXUGO, xid),
++};
++
++
++static struct dentry *proc_virtual_lookup(struct inode *dir,
++	struct dentry *dentry, struct nameidata *nd)
++{
++	struct vs_entry *p = vx_virtual_stuff;
++	struct dentry *error = ERR_PTR(-ENOENT);
++	int id = 0;
++
++	for (; p->name; p++) {
++		if (p->len != dentry->d_name.len)
++			continue;
++		if (!memcmp(dentry->d_name.name, p->name, p->len))
++			break;
++	}
++	if (p->name)
++		goto instantiate;
++
++	id = atovid(dentry->d_name.name, dentry->d_name.len);
++	if ((id < 0) || !xid_is_hashed(id))
++		goto out;
++
++instantiate:
++	error = proc_xid_instantiate(dir, dentry, id, p);
++out:
++	return error;
++}
++
++static struct file_operations proc_nid_file_operations = {
++	.read =		generic_read_dir,
++	.readdir =	proc_nid_readdir,
++};
++
++static struct inode_operations proc_nid_inode_operations = {
++	.lookup =	proc_nid_lookup,
++};
++
++static struct vs_entry nx_virtnet_stuff[] = {
++	INF("info",	S_IRUGO, virtnet_info),
++	INF("status",	S_IRUGO, virtnet_status),
++	DIR(NULL,	S_IRUGO | S_IXUGO, nid),
++};
++
++
++static struct dentry *proc_virtnet_lookup(struct inode *dir,
++	struct dentry *dentry, struct nameidata *nd)
++{
++	struct vs_entry *p = nx_virtnet_stuff;
++	struct dentry *error = ERR_PTR(-ENOENT);
++	int id = 0;
++
++	for (; p->name; p++) {
++		if (p->len != dentry->d_name.len)
++			continue;
++		if (!memcmp(dentry->d_name.name, p->name, p->len))
++			break;
++	}
++	if (p->name)
++		goto instantiate;
++
++	id = atovid(dentry->d_name.name, dentry->d_name.len);
++	if ((id < 0) || !nid_is_hashed(id))
++		goto out;
++
++instantiate:
++	error = proc_nid_instantiate(dir, dentry, id, p);
++out:
++	return error;
++}
++
++
++#define PROC_MAXVIDS 32
++
++int proc_virtual_readdir(struct file *filp,
++	void *dirent, filldir_t filldir)
++{
++	struct dentry *dentry = filp->f_dentry;
++	struct inode *inode = dentry->d_inode;
++	struct vs_entry *p = vx_virtual_stuff;
++	int size = sizeof(vx_virtual_stuff) / sizeof(struct vs_entry);
++	int pos, index;
++	unsigned int xid_array[PROC_MAXVIDS];
++	char buf[PROC_NUMBUF];
++	unsigned int nr_xids, i;
++	u64 ino;
++
++	pos = filp->f_pos;
++	switch (pos) {
++	case 0:
++		ino = inode->i_ino;
++		if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	case 1:
++		ino = parent_ino(dentry);
++		if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	default:
++		index = pos - 2;
++		if (index >= size)
++			goto entries;
++		for (p += index; p->name; p++) {
++			if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
++				vs_proc_instantiate, 0, p))
++				goto out;
++			pos++;
++		}
++	entries:
++		index = pos - size;
++		p = &vx_virtual_stuff[size - 1];
++		nr_xids = get_xid_list(index, xid_array, PROC_MAXVIDS);
++		for (i = 0; i < nr_xids; i++) {
++			int n, xid = xid_array[i];
++			unsigned int j = PROC_NUMBUF;
++
++			n = xid;
++			do
++				buf[--j] = '0' + (n % 10);
++			while (n /= 10);
++
++			if (proc_fill_cache(filp, dirent, filldir,
++				buf + j, PROC_NUMBUF - j,
++				vs_proc_instantiate, xid, p))
++				goto out;
++			pos++;
++		}
++	}
++out:
++	filp->f_pos = pos;
++	return 0;
++}
++
++static int proc_virtual_getattr(struct vfsmount *mnt,
++	struct dentry *dentry, struct kstat *stat)
++{
++	struct inode *inode = dentry->d_inode;
++
++	generic_fillattr(inode, stat);
++	stat->nlink = 2 + atomic_read(&vx_global_cactive);
++	return 0;
++}
++
++static struct file_operations proc_virtual_dir_operations = {
++	.read =		generic_read_dir,
++	.readdir =	proc_virtual_readdir,
++};
++
++static struct inode_operations proc_virtual_dir_inode_operations = {
++	.getattr =	proc_virtual_getattr,
++	.lookup =	proc_virtual_lookup,
++};
++
++
++
++
++
++int proc_virtnet_readdir(struct file *filp,
++	void *dirent, filldir_t filldir)
++{
++	struct dentry *dentry = filp->f_dentry;
++	struct inode *inode = dentry->d_inode;
++	struct vs_entry *p = nx_virtnet_stuff;
++	int size = sizeof(nx_virtnet_stuff) / sizeof(struct vs_entry);
++	int pos, index;
++	unsigned int nid_array[PROC_MAXVIDS];
++	char buf[PROC_NUMBUF];
++	unsigned int nr_nids, i;
++	u64 ino;
++
++	pos = filp->f_pos;
++	switch (pos) {
++	case 0:
++		ino = inode->i_ino;
++		if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	case 1:
++		ino = parent_ino(dentry);
++		if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
++			goto out;
++		pos++;
++		/* fall through */
++	default:
++		index = pos - 2;
++		if (index >= size)
++			goto entries;
++		for (p += index; p->name; p++) {
++			if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
++				vs_proc_instantiate, 0, p))
++				goto out;
++			pos++;
++		}
++	entries:
++		index = pos - size;
++		p = &nx_virtnet_stuff[size - 1];
++		nr_nids = get_nid_list(index, nid_array, PROC_MAXVIDS);
++		for (i = 0; i < nr_nids; i++) {
++			int n, nid = nid_array[i];
++			unsigned int j = PROC_NUMBUF;
++
++			n = nid;
++			do
++				buf[--j] = '0' + (n % 10);
++			while (n /= 10);
++
++			if (proc_fill_cache(filp, dirent, filldir,
++				buf + j, PROC_NUMBUF - j,
++				vs_proc_instantiate, nid, p))
++				goto out;
++			pos++;
++		}
++	}
++out:
++	filp->f_pos = pos;
++	return 0;
++}
++
++static int proc_virtnet_getattr(struct vfsmount *mnt,
++	struct dentry *dentry, struct kstat *stat)
++{
++	struct inode *inode = dentry->d_inode;
++
++	generic_fillattr(inode, stat);
++	stat->nlink = 2 + atomic_read(&nx_global_cactive);
++	return 0;
++}
++
++static struct file_operations proc_virtnet_dir_operations = {
++	.read =		generic_read_dir,
++	.readdir =	proc_virtnet_readdir,
++};
++
++static struct inode_operations proc_virtnet_dir_inode_operations = {
++	.getattr =	proc_virtnet_getattr,
++	.lookup =	proc_virtnet_lookup,
++};
++
++
++
++void proc_vx_init(void)
++{
++	struct proc_dir_entry *ent;
++
++	ent = proc_mkdir("virtual", 0);
++	if (ent) {
++		ent->proc_fops = &proc_virtual_dir_operations;
++		ent->proc_iops = &proc_virtual_dir_inode_operations;
++	}
++	proc_virtual = ent;
++
++	ent = proc_mkdir("virtnet", 0);
++	if (ent) {
++		ent->proc_fops = &proc_virtnet_dir_operations;
++		ent->proc_iops = &proc_virtnet_dir_inode_operations;
++	}
++	proc_virtnet = ent;
++}
++
++
++
++
++/* per pid info */
++
++
++int proc_pid_vx_info(struct task_struct *p, char *buffer)
++{
++	struct vx_info *vxi;
++	char *orig = buffer;
++
++	buffer += sprintf(buffer, "XID:\t%d\n", vx_task_xid(p));
++
++	vxi = task_get_vx_info(p);
++	if (!vxi)
++		goto out;
++
++	buffer += sprintf(buffer, "BCaps:\t");
++	buffer = print_cap_t(buffer, &vxi->vx_bcaps);
++	buffer += sprintf(buffer, "\n");
++	buffer += sprintf(buffer, "CCaps:\t%016llx\n",
++		(unsigned long long)vxi->vx_ccaps);
++	buffer += sprintf(buffer, "CFlags:\t%016llx\n",
++		(unsigned long long)vxi->vx_flags);
++	buffer += sprintf(buffer, "CIPid:\t%d\n", vxi->vx_initpid);
++
++	put_vx_info(vxi);
++out:
++	return buffer - orig;
++}
++
++
++int proc_pid_nx_info(struct task_struct *p, char *buffer)
++{
++	struct nx_info *nxi;
++	struct nx_addr_v4 *v4a;
++#ifdef	CONFIG_IPV6
++	struct nx_addr_v6 *v6a;
++#endif
++	char *orig = buffer;
++	int i;
++
++	buffer += sprintf(buffer, "NID:\t%d\n", nx_task_nid(p));
++
++	nxi = task_get_nx_info(p);
++	if (!nxi)
++		goto out;
++
++	buffer += sprintf(buffer, "NCaps:\t%016llx\n",
++		(unsigned long long)nxi->nx_ncaps);
++	buffer += sprintf(buffer, "NFlags:\t%016llx\n",
++		(unsigned long long)nxi->nx_flags);
++
++	buffer += sprintf(buffer,
++		"V4Root[bcast]:\t" NIPQUAD_FMT "\n",
++		NIPQUAD(nxi->v4_bcast.s_addr));
++	buffer += sprintf (buffer,
++		"V4Root[lback]:\t" NIPQUAD_FMT "\n",
++		NIPQUAD(nxi->v4_lback.s_addr));
++	if (!NX_IPV4(nxi))
++		goto skip_v4;
++	for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
++		buffer += sprintf(buffer, "V4Root[%d]:\t" NXAV4_FMT "\n",
++			i, NXAV4(v4a));
++skip_v4:
++#ifdef	CONFIG_IPV6
++	if (!NX_IPV6(nxi))
++		goto skip_v6;
++	for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
++		buffer += sprintf(buffer, "V6Root[%d]:\t" NXAV6_FMT "\n",
++			i, NXAV6(v6a));
++skip_v6:
++#endif
++	put_nx_info(nxi);
++out:
++	return buffer - orig;
++}
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/sched.c linux-3.3.8-vs2.3.3.4/kernel/vserver/sched.c
+--- linux-3.3.8/kernel/vserver/sched.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/sched.c	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,82 @@
++/*
++ *  linux/kernel/vserver/sched.c
++ *
++ *  Virtual Server: Scheduler Support
++ *
++ *  Copyright (C) 2004-2010  Herbert P�tzl
++ *
++ *  V0.01  adapted Sam Vilains version to 2.6.3
++ *  V0.02  removed legacy interface
++ *  V0.03  changed vcmds to vxi arg
++ *  V0.04  removed older and legacy interfaces
++ *  V0.05  removed scheduler code/commands
++ *
++ */
++
++#include <linux/vs_context.h>
++#include <linux/vs_sched.h>
++#include <linux/vserver/sched_cmd.h>
++
++#include <asm/uaccess.h>
++
++
++void vx_update_sched_param(struct _vx_sched *sched,
++	struct _vx_sched_pc *sched_pc)
++{
++	sched_pc->prio_bias = sched->prio_bias;
++}
++
++static int do_set_prio_bias(struct vx_info *vxi, struct vcmd_prio_bias *data)
++{
++	int cpu;
++
++	if (data->prio_bias > MAX_PRIO_BIAS)
++		data->prio_bias = MAX_PRIO_BIAS;
++	if (data->prio_bias < MIN_PRIO_BIAS)
++		data->prio_bias = MIN_PRIO_BIAS;
++
++	if (data->cpu_id != ~0) {
++		vxi->sched.update = cpumask_of_cpu(data->cpu_id);
++		cpus_and(vxi->sched.update, cpu_online_map,
++			vxi->sched.update);
++	} else
++		vxi->sched.update = cpu_online_map;
++
++	for_each_cpu_mask(cpu, vxi->sched.update)
++		vx_update_sched_param(&vxi->sched,
++			&vx_per_cpu(vxi, sched_pc, cpu));
++	return 0;
++}
++
++int vc_set_prio_bias(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_prio_bias vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_prio_bias(vxi, &vc_data);
++}
++
++int vc_get_prio_bias(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_prio_bias vc_data;
++	struct _vx_sched_pc *pcd;
++	int cpu;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	cpu = vc_data.cpu_id;
++
++	if (!cpu_possible(cpu))
++		return -EINVAL;
++
++	pcd = &vx_per_cpu(vxi, sched_pc, cpu);
++	vc_data.prio_bias = pcd->prio_bias;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/sched_init.h linux-3.3.8-vs2.3.3.4/kernel/vserver/sched_init.h
+--- linux-3.3.8/kernel/vserver/sched_init.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/sched_init.h	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,27 @@
++
++static inline void vx_info_init_sched(struct _vx_sched *sched)
++{
++	/* scheduling; hard code starting values as constants */
++	sched->prio_bias = 0;
++}
++
++static inline
++void vx_info_init_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
++{
++	sched_pc->prio_bias = 0;
++
++	sched_pc->user_ticks = 0;
++	sched_pc->sys_ticks = 0;
++	sched_pc->hold_ticks = 0;
++}
++
++static inline void vx_info_exit_sched(struct _vx_sched *sched)
++{
++	return;
++}
++
++static inline
++void vx_info_exit_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
++{
++	return;
++}
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/sched_proc.h linux-3.3.8-vs2.3.3.4/kernel/vserver/sched_proc.h
+--- linux-3.3.8/kernel/vserver/sched_proc.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/sched_proc.h	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,32 @@
++#ifndef _VX_SCHED_PROC_H
++#define _VX_SCHED_PROC_H
++
++
++static inline
++int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
++{
++	int length = 0;
++
++	length += sprintf(buffer,
++		"PrioBias:\t%8d\n",
++		sched->prio_bias);
++	return length;
++}
++
++static inline
++int vx_info_proc_sched_pc(struct _vx_sched_pc *sched_pc,
++	char *buffer, int cpu)
++{
++	int length = 0;
++
++	length += sprintf(buffer + length,
++		"cpu %d: %lld %lld %lld", cpu,
++		(unsigned long long)sched_pc->user_ticks,
++		(unsigned long long)sched_pc->sys_ticks,
++		(unsigned long long)sched_pc->hold_ticks);
++	length += sprintf(buffer + length,
++		" %d\n", sched_pc->prio_bias);
++	return length;
++}
++
++#endif	/* _VX_SCHED_PROC_H */
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/signal.c linux-3.3.8-vs2.3.3.4/kernel/vserver/signal.c
+--- linux-3.3.8/kernel/vserver/signal.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/signal.c	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,134 @@
++/*
++ *  linux/kernel/vserver/signal.c
++ *
++ *  Virtual Server: Signal Support
++ *
++ *  Copyright (C) 2003-2007  Herbert P�tzl
++ *
++ *  V0.01  broken out from vcontext V0.05
++ *  V0.02  changed vcmds to vxi arg
++ *  V0.03  adjusted siginfo for kill
++ *
++ */
++
++#include <asm/uaccess.h>
++
++#include <linux/vs_context.h>
++#include <linux/vs_pid.h>
++#include <linux/vserver/signal_cmd.h>
++
++
++int vx_info_kill(struct vx_info *vxi, int pid, int sig)
++{
++	int retval, count = 0;
++	struct task_struct *p;
++	struct siginfo *sip = SEND_SIG_PRIV;
++
++	retval = -ESRCH;
++	vxdprintk(VXD_CBIT(misc, 4),
++		"vx_info_kill(%p[#%d],%d,%d)*",
++		vxi, vxi->vx_id, pid, sig);
++	read_lock(&tasklist_lock);
++	switch (pid) {
++	case  0:
++	case -1:
++		for_each_process(p) {
++			int err = 0;
++
++			if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 ||
++				(pid && vxi->vx_initpid == p->pid))
++				continue;
++
++			err = group_send_sig_info(sig, sip, p);
++			++count;
++			if (err != -EPERM)
++				retval = err;
++		}
++		break;
++
++	case 1:
++		if (vxi->vx_initpid) {
++			pid = vxi->vx_initpid;
++			/* for now, only SIGINT to private init ... */
++			if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
++				/* ... as long as there are tasks left */
++				(atomic_read(&vxi->vx_tasks) > 1))
++				sig = SIGINT;
++		}
++		/* fallthrough */
++	default:
++		rcu_read_lock();
++		p = find_task_by_real_pid(pid);
++		rcu_read_unlock();
++		if (p) {
++			if (vx_task_xid(p) == vxi->vx_id)
++				retval = group_send_sig_info(sig, sip, p);
++		}
++		break;
++	}
++	read_unlock(&tasklist_lock);
++	vxdprintk(VXD_CBIT(misc, 4),
++		"vx_info_kill(%p[#%d],%d,%d,%ld) = %d",
++		vxi, vxi->vx_id, pid, sig, (long)sip, retval);
++	return retval;
++}
++
++int vc_ctx_kill(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_kill_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	/* special check to allow guest shutdown */
++	if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
++		/* forbid killall pid=0 when init is present */
++		(((vc_data.pid < 1) && vxi->vx_initpid) ||
++		(vc_data.pid > 1)))
++		return -EACCES;
++
++	return vx_info_kill(vxi, vc_data.pid, vc_data.sig);
++}
++
++
++static int __wait_exit(struct vx_info *vxi)
++{
++	DECLARE_WAITQUEUE(wait, current);
++	int ret = 0;
++
++	add_wait_queue(&vxi->vx_wait, &wait);
++	set_current_state(TASK_INTERRUPTIBLE);
++
++wait:
++	if (vx_info_state(vxi,
++		VXS_SHUTDOWN | VXS_HASHED | VXS_HELPER) == VXS_SHUTDOWN)
++		goto out;
++	if (signal_pending(current)) {
++		ret = -ERESTARTSYS;
++		goto out;
++	}
++	schedule();
++	goto wait;
++
++out:
++	set_current_state(TASK_RUNNING);
++	remove_wait_queue(&vxi->vx_wait, &wait);
++	return ret;
++}
++
++
++
++int vc_wait_exit(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_wait_exit_v0 vc_data;
++	int ret;
++
++	ret = __wait_exit(vxi);
++	vc_data.reboot_cmd = vxi->reboot_cmd;
++	vc_data.exit_code = vxi->exit_code;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		ret = -EFAULT;
++	return ret;
++}
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/space.c linux-3.3.8-vs2.3.3.4/kernel/vserver/space.c
+--- linux-3.3.8/kernel/vserver/space.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/space.c	2012-02-24 17:01:40.000000000 +0100
+@@ -0,0 +1,436 @@
++/*
++ *  linux/kernel/vserver/space.c
++ *
++ *  Virtual Server: Context Space Support
++ *
++ *  Copyright (C) 2003-2010  Herbert P�tzl
++ *
++ *  V0.01  broken out from context.c 0.07
++ *  V0.02  added task locking for namespace
++ *  V0.03  broken out vx_enter_namespace
++ *  V0.04  added *space support and commands
++ *  V0.05  added credential support
++ *
++ */
++
++#include <linux/utsname.h>
++#include <linux/nsproxy.h>
++#include <linux/err.h>
++#include <linux/fs_struct.h>
++#include <linux/cred.h>
++#include <asm/uaccess.h>
++
++#include <linux/vs_context.h>
++#include <linux/vserver/space.h>
++#include <linux/vserver/space_cmd.h>
++
++atomic_t vs_global_nsproxy	= ATOMIC_INIT(0);
++atomic_t vs_global_fs		= ATOMIC_INIT(0);
++atomic_t vs_global_mnt_ns	= ATOMIC_INIT(0);
++atomic_t vs_global_uts_ns	= ATOMIC_INIT(0);
++atomic_t vs_global_user_ns	= ATOMIC_INIT(0);
++atomic_t vs_global_pid_ns	= ATOMIC_INIT(0);
++
++
++/* namespace functions */
++
++#include <linux/mnt_namespace.h>
++#include <linux/user_namespace.h>
++#include <linux/pid_namespace.h>
++#include <linux/ipc_namespace.h>
++#include <net/net_namespace.h>
++#include "../fs/mount.h"
++
++
++static const struct vcmd_space_mask_v1 space_mask_v0 = {
++	.mask = CLONE_FS |
++		CLONE_NEWNS |
++#ifdef	CONFIG_UTS_NS
++		CLONE_NEWUTS |
++#endif
++#ifdef	CONFIG_IPC_NS
++		CLONE_NEWIPC |
++#endif
++#ifdef	CONFIG_USER_NS
++		CLONE_NEWUSER |
++#endif
++		0
++};
++
++static const struct vcmd_space_mask_v1 space_mask = {
++	.mask = CLONE_FS |
++		CLONE_NEWNS |
++#ifdef	CONFIG_UTS_NS
++		CLONE_NEWUTS |
++#endif
++#ifdef	CONFIG_IPC_NS
++		CLONE_NEWIPC |
++#endif
++#ifdef	CONFIG_USER_NS
++		CLONE_NEWUSER |
++#endif
++#ifdef	CONFIG_PID_NS
++		CLONE_NEWPID |
++#endif
++#ifdef	CONFIG_NET_NS
++		CLONE_NEWNET |
++#endif
++		0
++};
++
++static const struct vcmd_space_mask_v1 default_space_mask = {
++	.mask = CLONE_FS |
++		CLONE_NEWNS |
++#ifdef	CONFIG_UTS_NS
++		CLONE_NEWUTS |
++#endif
++#ifdef	CONFIG_IPC_NS
++		CLONE_NEWIPC |
++#endif
++#ifdef	CONFIG_USER_NS
++		CLONE_NEWUSER |
++#endif
++#ifdef	CONFIG_PID_NS
++//		CLONE_NEWPID |
++#endif
++		0
++};
++
++/*
++ *	build a new nsproxy mix
++ *      assumes that both proxies are 'const'
++ *	does not touch nsproxy refcounts
++ *	will hold a reference on the result.
++ */
++
++struct nsproxy *vs_mix_nsproxy(struct nsproxy *old_nsproxy,
++	struct nsproxy *new_nsproxy, unsigned long mask)
++{
++	struct mnt_namespace *old_ns;
++	struct uts_namespace *old_uts;
++	struct ipc_namespace *old_ipc;
++#ifdef	CONFIG_PID_NS
++	struct pid_namespace *old_pid;
++#endif
++#ifdef	CONFIG_NET_NS
++	struct net *old_net;
++#endif
++	struct nsproxy *nsproxy;
++
++	nsproxy = copy_nsproxy(old_nsproxy);
++	if (!nsproxy)
++		goto out;
++
++	if (mask & CLONE_NEWNS) {
++		old_ns = nsproxy->mnt_ns;
++		nsproxy->mnt_ns = new_nsproxy->mnt_ns;
++		if (nsproxy->mnt_ns)
++			get_mnt_ns(nsproxy->mnt_ns);
++	} else
++		old_ns = NULL;
++
++	if (mask & CLONE_NEWUTS) {
++		old_uts = nsproxy->uts_ns;
++		nsproxy->uts_ns = new_nsproxy->uts_ns;
++		if (nsproxy->uts_ns)
++			get_uts_ns(nsproxy->uts_ns);
++	} else
++		old_uts = NULL;
++
++	if (mask & CLONE_NEWIPC) {
++		old_ipc = nsproxy->ipc_ns;
++		nsproxy->ipc_ns = new_nsproxy->ipc_ns;
++		if (nsproxy->ipc_ns)
++			get_ipc_ns(nsproxy->ipc_ns);
++	} else
++		old_ipc = NULL;
++
++#ifdef	CONFIG_PID_NS
++	if (mask & CLONE_NEWPID) {
++		old_pid = nsproxy->pid_ns;
++		nsproxy->pid_ns = new_nsproxy->pid_ns;
++		if (nsproxy->pid_ns)
++			get_pid_ns(nsproxy->pid_ns);
++	} else
++		old_pid = NULL;
++#endif
++#ifdef	CONFIG_NET_NS
++	if (mask & CLONE_NEWNET) {
++		old_net = nsproxy->net_ns;
++		nsproxy->net_ns = new_nsproxy->net_ns;
++		if (nsproxy->net_ns)
++			get_net(nsproxy->net_ns);
++	} else
++		old_net = NULL;
++#endif
++	if (old_ns)
++		put_mnt_ns(old_ns);
++	if (old_uts)
++		put_uts_ns(old_uts);
++	if (old_ipc)
++		put_ipc_ns(old_ipc);
++#ifdef	CONFIG_PID_NS
++	if (old_pid)
++		put_pid_ns(old_pid);
++#endif
++#ifdef	CONFIG_NET_NS
++	if (old_net)
++		put_net(old_net);
++#endif
++out:
++	return nsproxy;
++}
++
++
++/*
++ *	merge two nsproxy structs into a new one.
++ *	will hold a reference on the result.
++ */
++
++static inline
++struct nsproxy *__vs_merge_nsproxy(struct nsproxy *old,
++	struct nsproxy *proxy, unsigned long mask)
++{
++	struct nsproxy null_proxy = { .mnt_ns = NULL };
++
++	if (!proxy)
++		return NULL;
++
++	if (mask) {
++		/* vs_mix_nsproxy returns with reference */
++		return vs_mix_nsproxy(old ? old : &null_proxy,
++			proxy, mask);
++	}
++	get_nsproxy(proxy);
++	return proxy;
++}
++
++
++int vx_enter_space(struct vx_info *vxi, unsigned long mask, unsigned index)
++{
++	struct nsproxy *proxy, *proxy_cur, *proxy_new;
++	struct fs_struct *fs_cur, *fs = NULL;
++	struct _vx_space *space;
++	int ret, kill = 0;
++
++	vxdprintk(VXD_CBIT(space, 8), "vx_enter_space(%p[#%u],0x%08lx,%d)",
++		vxi, vxi->vx_id, mask, index);
++
++	if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
++		return -EACCES;
++
++	if (index >= VX_SPACES)
++		return -EINVAL;
++
++	space = &vxi->space[index];
++
++	if (!mask)
++		mask = space->vx_nsmask;
++
++	if ((mask & space->vx_nsmask) != mask)
++		return -EINVAL;
++
++	if (mask & CLONE_FS) {
++		fs = copy_fs_struct(space->vx_fs);
++		if (!fs)
++			return -ENOMEM;
++	}
++	proxy = space->vx_nsproxy;
++
++	vxdprintk(VXD_CBIT(space, 9),
++		"vx_enter_space(%p[#%u],0x%08lx,%d) -> (%p,%p)",
++		vxi, vxi->vx_id, mask, index, proxy, fs);
++
++	task_lock(current);
++	fs_cur = current->fs;
++
++	if (mask & CLONE_FS) {
++		spin_lock(&fs_cur->lock);
++		current->fs = fs;
++		kill = !--fs_cur->users;
++		spin_unlock(&fs_cur->lock);
++	}
++
++	proxy_cur = current->nsproxy;
++	get_nsproxy(proxy_cur);
++	task_unlock(current);
++
++	if (kill)
++		free_fs_struct(fs_cur);
++
++	proxy_new = __vs_merge_nsproxy(proxy_cur, proxy, mask);
++	if (IS_ERR(proxy_new)) {
++		ret = PTR_ERR(proxy_new);
++		goto out_put;
++	}
++
++	proxy_new = xchg(&current->nsproxy, proxy_new);
++
++	if (mask & CLONE_NEWUSER) {
++		struct cred *cred;
++
++		vxdprintk(VXD_CBIT(space, 10),
++			"vx_enter_space(%p[#%u],%p) cred (%p,%p)",
++			vxi, vxi->vx_id, space->vx_cred,
++			current->real_cred, current->cred);
++
++		if (space->vx_cred) {
++			cred = __prepare_creds(space->vx_cred);
++			if (cred)
++				commit_creds(cred);
++		}
++	}
++
++	ret = 0;
++
++	if (proxy_new)
++		put_nsproxy(proxy_new);
++out_put:
++	if (proxy_cur)
++		put_nsproxy(proxy_cur);
++	return ret;
++}
++
++
++int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index)
++{
++	struct nsproxy *proxy_vxi, *proxy_cur, *proxy_new;
++	struct fs_struct *fs_vxi, *fs;
++	struct _vx_space *space;
++	int ret, kill = 0;
++
++	vxdprintk(VXD_CBIT(space, 8), "vx_set_space(%p[#%u],0x%08lx,%d)",
++		vxi, vxi->vx_id, mask, index);
++
++	if ((mask & space_mask.mask) != mask)
++		return -EINVAL;
++
++	if (index >= VX_SPACES)
++		return -EINVAL;
++
++	space = &vxi->space[index];
++
++	proxy_vxi = space->vx_nsproxy;
++	fs_vxi = space->vx_fs;
++
++	if (mask & CLONE_FS) {
++		fs = copy_fs_struct(current->fs);
++		if (!fs)
++			return -ENOMEM;
++	}
++
++	task_lock(current);
++
++	if (mask & CLONE_FS) {
++		spin_lock(&fs_vxi->lock);
++		space->vx_fs = fs;
++		kill = !--fs_vxi->users;
++		spin_unlock(&fs_vxi->lock);
++	}
++
++	proxy_cur = current->nsproxy;
++	get_nsproxy(proxy_cur);
++	task_unlock(current);
++
++	if (kill)
++		free_fs_struct(fs_vxi);
++
++	proxy_new = __vs_merge_nsproxy(proxy_vxi, proxy_cur, mask);
++	if (IS_ERR(proxy_new)) {
++		ret = PTR_ERR(proxy_new);
++		goto out_put;
++	}
++
++	proxy_new = xchg(&space->vx_nsproxy, proxy_new);
++	space->vx_nsmask |= mask;
++
++	if (mask & CLONE_NEWUSER) {
++		struct cred *cred;
++
++		vxdprintk(VXD_CBIT(space, 10),
++			"vx_set_space(%p[#%u],%p) cred (%p,%p)",
++			vxi, vxi->vx_id, space->vx_cred,
++			current->real_cred, current->cred);
++
++		cred = prepare_creds();
++		cred = (struct cred *)xchg(&space->vx_cred, cred);
++		if (cred)
++			abort_creds(cred);
++	}
++
++	ret = 0;
++
++	if (proxy_new)
++		put_nsproxy(proxy_new);
++out_put:
++	if (proxy_cur)
++		put_nsproxy(proxy_cur);
++	return ret;
++}
++
++
++int vc_enter_space_v1(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return vx_enter_space(vxi, vc_data.mask, 0);
++}
++
++int vc_enter_space(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	if (vc_data.index >= VX_SPACES)
++		return -EINVAL;
++
++	return vx_enter_space(vxi, vc_data.mask, vc_data.index);
++}
++
++int vc_set_space_v1(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return vx_set_space(vxi, vc_data.mask, 0);
++}
++
++int vc_set_space(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	if (vc_data.index >= VX_SPACES)
++		return -EINVAL;
++
++	return vx_set_space(vxi, vc_data.mask, vc_data.index);
++}
++
++int vc_get_space_mask(void __user *data, int type)
++{
++	const struct vcmd_space_mask_v1 *mask;
++
++	if (type == 0)
++		mask = &space_mask_v0;
++	else if (type == 1)
++		mask = &space_mask;
++	else
++		mask = &default_space_mask;
++
++	vxdprintk(VXD_CBIT(space, 10),
++		"vc_get_space_mask(%d) = %08llx", type, mask->mask);
++
++	if (copy_to_user(data, mask, sizeof(*mask)))
++		return -EFAULT;
++	return 0;
++}
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/switch.c linux-3.3.8-vs2.3.3.4/kernel/vserver/switch.c
+--- linux-3.3.8/kernel/vserver/switch.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/switch.c	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,556 @@
++/*
++ *  linux/kernel/vserver/switch.c
++ *
++ *  Virtual Server: Syscall Switch
++ *
++ *  Copyright (C) 2003-2011  Herbert P�tzl
++ *
++ *  V0.01  syscall switch
++ *  V0.02  added signal to context
++ *  V0.03  added rlimit functions
++ *  V0.04  added iattr, task/xid functions
++ *  V0.05  added debug/history stuff
++ *  V0.06  added compat32 layer
++ *  V0.07  vcmd args and perms
++ *  V0.08  added status commands
++ *  V0.09  added tag commands
++ *  V0.10  added oom bias
++ *  V0.11  added device commands
++ *  V0.12  added warn mask
++ *
++ */
++
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
++#include <linux/vserver/switch.h>
++
++#include "vci_config.h"
++
++
++static inline
++int vc_get_version(uint32_t id)
++{
++	return VCI_VERSION;
++}
++
++static inline
++int vc_get_vci(uint32_t id)
++{
++	return vci_kernel_config();
++}
++
++#include <linux/vserver/context_cmd.h>
++#include <linux/vserver/cvirt_cmd.h>
++#include <linux/vserver/cacct_cmd.h>
++#include <linux/vserver/limit_cmd.h>
++#include <linux/vserver/network_cmd.h>
++#include <linux/vserver/sched_cmd.h>
++#include <linux/vserver/debug_cmd.h>
++#include <linux/vserver/inode_cmd.h>
++#include <linux/vserver/dlimit_cmd.h>
++#include <linux/vserver/signal_cmd.h>
++#include <linux/vserver/space_cmd.h>
++#include <linux/vserver/tag_cmd.h>
++#include <linux/vserver/device_cmd.h>
++
++#include <linux/vserver/inode.h>
++#include <linux/vserver/dlimit.h>
++
++
++#ifdef	CONFIG_COMPAT
++#define __COMPAT(name, id, data, compat)	\
++	(compat) ? name ## _x32(id, data) : name(id, data)
++#define __COMPAT_NO_ID(name, data, compat)	\
++	(compat) ? name ## _x32(data) : name(data)
++#else
++#define __COMPAT(name, id, data, compat)	\
++	name(id, data)
++#define __COMPAT_NO_ID(name, data, compat)	\
++	name(data)
++#endif
++
++
++static inline
++long do_vcmd(uint32_t cmd, uint32_t id,
++	struct vx_info *vxi, struct nx_info *nxi,
++	void __user *data, int compat)
++{
++	switch (cmd) {
++
++	case VCMD_get_version:
++		return vc_get_version(id);
++	case VCMD_get_vci:
++		return vc_get_vci(id);
++
++	case VCMD_task_xid:
++		return vc_task_xid(id);
++	case VCMD_vx_info:
++		return vc_vx_info(vxi, data);
++
++	case VCMD_task_nid:
++		return vc_task_nid(id);
++	case VCMD_nx_info:
++		return vc_nx_info(nxi, data);
++
++	case VCMD_task_tag:
++		return vc_task_tag(id);
++
++	case VCMD_set_space_v1:
++		return vc_set_space_v1(vxi, data);
++	/* this is version 2 */
++	case VCMD_set_space:
++		return vc_set_space(vxi, data);
++
++	case VCMD_get_space_mask_v0:
++		return vc_get_space_mask(data, 0);
++	/* this is version 1 */
++	case VCMD_get_space_mask:
++		return vc_get_space_mask(data, 1);
++
++	case VCMD_get_space_default:
++		return vc_get_space_mask(data, -1);
++
++	case VCMD_set_umask:
++		return vc_set_umask(vxi, data);
++
++	case VCMD_get_umask:
++		return vc_get_umask(vxi, data);
++
++	case VCMD_set_wmask:
++		return vc_set_wmask(vxi, data);
++
++	case VCMD_get_wmask:
++		return vc_get_wmask(vxi, data);
++#ifdef	CONFIG_IA32_EMULATION
++	case VCMD_get_rlimit:
++		return __COMPAT(vc_get_rlimit, vxi, data, compat);
++	case VCMD_set_rlimit:
++		return __COMPAT(vc_set_rlimit, vxi, data, compat);
++#else
++	case VCMD_get_rlimit:
++		return vc_get_rlimit(vxi, data);
++	case VCMD_set_rlimit:
++		return vc_set_rlimit(vxi, data);
++#endif
++	case VCMD_get_rlimit_mask:
++		return vc_get_rlimit_mask(id, data);
++	case VCMD_reset_hits:
++		return vc_reset_hits(vxi, data);
++	case VCMD_reset_minmax:
++		return vc_reset_minmax(vxi, data);
++
++	case VCMD_get_vhi_name:
++		return vc_get_vhi_name(vxi, data);
++	case VCMD_set_vhi_name:
++		return vc_set_vhi_name(vxi, data);
++
++	case VCMD_ctx_stat:
++		return vc_ctx_stat(vxi, data);
++	case VCMD_virt_stat:
++		return vc_virt_stat(vxi, data);
++	case VCMD_sock_stat:
++		return vc_sock_stat(vxi, data);
++	case VCMD_rlimit_stat:
++		return vc_rlimit_stat(vxi, data);
++
++	case VCMD_set_cflags:
++		return vc_set_cflags(vxi, data);
++	case VCMD_get_cflags:
++		return vc_get_cflags(vxi, data);
++
++	/* this is version 1 */
++	case VCMD_set_ccaps:
++		return vc_set_ccaps(vxi, data);
++	/* this is version 1 */
++	case VCMD_get_ccaps:
++		return vc_get_ccaps(vxi, data);
++	case VCMD_set_bcaps:
++		return vc_set_bcaps(vxi, data);
++	case VCMD_get_bcaps:
++		return vc_get_bcaps(vxi, data);
++
++	case VCMD_set_badness:
++		return vc_set_badness(vxi, data);
++	case VCMD_get_badness:
++		return vc_get_badness(vxi, data);
++
++	case VCMD_set_nflags:
++		return vc_set_nflags(nxi, data);
++	case VCMD_get_nflags:
++		return vc_get_nflags(nxi, data);
++
++	case VCMD_set_ncaps:
++		return vc_set_ncaps(nxi, data);
++	case VCMD_get_ncaps:
++		return vc_get_ncaps(nxi, data);
++
++	case VCMD_set_prio_bias:
++		return vc_set_prio_bias(vxi, data);
++	case VCMD_get_prio_bias:
++		return vc_get_prio_bias(vxi, data);
++	case VCMD_add_dlimit:
++		return __COMPAT(vc_add_dlimit, id, data, compat);
++	case VCMD_rem_dlimit:
++		return __COMPAT(vc_rem_dlimit, id, data, compat);
++	case VCMD_set_dlimit:
++		return __COMPAT(vc_set_dlimit, id, data, compat);
++	case VCMD_get_dlimit:
++		return __COMPAT(vc_get_dlimit, id, data, compat);
++
++	case VCMD_ctx_kill:
++		return vc_ctx_kill(vxi, data);
++
++	case VCMD_wait_exit:
++		return vc_wait_exit(vxi, data);
++
++	case VCMD_get_iattr:
++		return __COMPAT_NO_ID(vc_get_iattr, data, compat);
++	case VCMD_set_iattr:
++		return __COMPAT_NO_ID(vc_set_iattr, data, compat);
++
++	case VCMD_fget_iattr:
++		return vc_fget_iattr(id, data);
++	case VCMD_fset_iattr:
++		return vc_fset_iattr(id, data);
++
++	case VCMD_enter_space_v0:
++		return vc_enter_space_v1(vxi, NULL);
++	case VCMD_enter_space_v1:
++		return vc_enter_space_v1(vxi, data);
++	/* this is version 2 */
++	case VCMD_enter_space:
++		return vc_enter_space(vxi, data);
++
++	case VCMD_ctx_create_v0:
++		return vc_ctx_create(id, NULL);
++	case VCMD_ctx_create:
++		return vc_ctx_create(id, data);
++	case VCMD_ctx_migrate_v0:
++		return vc_ctx_migrate(vxi, NULL);
++	case VCMD_ctx_migrate:
++		return vc_ctx_migrate(vxi, data);
++
++	case VCMD_net_create_v0:
++		return vc_net_create(id, NULL);
++	case VCMD_net_create:
++		return vc_net_create(id, data);
++	case VCMD_net_migrate:
++		return vc_net_migrate(nxi, data);
++
++	case VCMD_tag_migrate:
++		return vc_tag_migrate(id);
++
++	case VCMD_net_add:
++		return vc_net_add(nxi, data);
++	case VCMD_net_remove:
++		return vc_net_remove(nxi, data);
++
++	case VCMD_net_add_ipv4_v1:
++		return vc_net_add_ipv4_v1(nxi, data);
++	/* this is version 2 */
++	case VCMD_net_add_ipv4:
++		return vc_net_add_ipv4(nxi, data);
++
++	case VCMD_net_rem_ipv4_v1:
++		return vc_net_rem_ipv4_v1(nxi, data);
++	/* this is version 2 */
++	case VCMD_net_rem_ipv4:
++		return vc_net_rem_ipv4(nxi, data);
++#ifdef	CONFIG_IPV6
++	case VCMD_net_add_ipv6:
++		return vc_net_add_ipv6(nxi, data);
++	case VCMD_net_remove_ipv6:
++		return vc_net_remove_ipv6(nxi, data);
++#endif
++/*	case VCMD_add_match_ipv4:
++		return vc_add_match_ipv4(nxi, data);
++	case VCMD_get_match_ipv4:
++		return vc_get_match_ipv4(nxi, data);
++#ifdef	CONFIG_IPV6
++	case VCMD_add_match_ipv6:
++		return vc_add_match_ipv6(nxi, data);
++	case VCMD_get_match_ipv6:
++		return vc_get_match_ipv6(nxi, data);
++#endif	*/
++
++#ifdef	CONFIG_VSERVER_DEVICE
++	case VCMD_set_mapping:
++		return __COMPAT(vc_set_mapping, vxi, data, compat);
++	case VCMD_unset_mapping:
++		return __COMPAT(vc_unset_mapping, vxi, data, compat);
++#endif
++#ifdef	CONFIG_VSERVER_HISTORY
++	case VCMD_dump_history:
++		return vc_dump_history(id);
++	case VCMD_read_history:
++		return __COMPAT(vc_read_history, id, data, compat);
++#endif
++	default:
++		vxwprintk_task(1, "unimplemented VCMD_%02d_%d[%d]",
++			VC_CATEGORY(cmd), VC_COMMAND(cmd), VC_VERSION(cmd));
++	}
++	return -ENOSYS;
++}
++
++
++#define	__VCMD(vcmd, _perm, _args, _flags)		\
++	case VCMD_ ## vcmd: perm = _perm;		\
++		args = _args; flags = _flags; break
++
++
++#define VCA_NONE	0x00
++#define VCA_VXI		0x01
++#define VCA_NXI		0x02
++
++#define VCF_NONE	0x00
++#define VCF_INFO	0x01
++#define VCF_ADMIN	0x02
++#define VCF_ARES	0x06	/* includes admin */
++#define VCF_SETUP	0x08
++
++#define VCF_ZIDOK	0x10	/* zero id okay */
++
++
++static inline
++long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
++{
++	long ret;
++	int permit = -1, state = 0;
++	int perm = -1, args = 0, flags = 0;
++	struct vx_info *vxi = NULL;
++	struct nx_info *nxi = NULL;
++
++	switch (cmd) {
++	/* unpriviledged commands */
++	__VCMD(get_version,	 0, VCA_NONE,	0);
++	__VCMD(get_vci,		 0, VCA_NONE,	0);
++	__VCMD(get_rlimit_mask,	 0, VCA_NONE,	0);
++	__VCMD(get_space_mask_v0,0, VCA_NONE,   0);
++	__VCMD(get_space_mask,	 0, VCA_NONE,   0);
++	__VCMD(get_space_default,0, VCA_NONE,   0);
++
++	/* info commands */
++	__VCMD(task_xid,	 2, VCA_NONE,	0);
++	__VCMD(reset_hits,	 2, VCA_VXI,	0);
++	__VCMD(reset_minmax,	 2, VCA_VXI,	0);
++	__VCMD(vx_info,		 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_bcaps,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_ccaps,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_cflags,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_umask,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_wmask,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_badness,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_vhi_name,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(get_rlimit,	 3, VCA_VXI,	VCF_INFO);
++
++	__VCMD(ctx_stat,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(virt_stat,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(sock_stat,	 3, VCA_VXI,	VCF_INFO);
++	__VCMD(rlimit_stat,	 3, VCA_VXI,	VCF_INFO);
++
++	__VCMD(task_nid,	 2, VCA_NONE,	0);
++	__VCMD(nx_info,		 3, VCA_NXI,	VCF_INFO);
++	__VCMD(get_ncaps,	 3, VCA_NXI,	VCF_INFO);
++	__VCMD(get_nflags,	 3, VCA_NXI,	VCF_INFO);
++
++	__VCMD(task_tag,	 2, VCA_NONE,	0);
++
++	__VCMD(get_iattr,	 2, VCA_NONE,	0);
++	__VCMD(fget_iattr,	 2, VCA_NONE,	0);
++	__VCMD(get_dlimit,	 3, VCA_NONE,	VCF_INFO);
++	__VCMD(get_prio_bias,	 3, VCA_VXI,	VCF_INFO);
++
++	/* lower admin commands */
++	__VCMD(wait_exit,	 4, VCA_VXI,	VCF_INFO);
++	__VCMD(ctx_create_v0,	 5, VCA_NONE,	0);
++	__VCMD(ctx_create,	 5, VCA_NONE,	0);
++	__VCMD(ctx_migrate_v0,	 5, VCA_VXI,	VCF_ADMIN);
++	__VCMD(ctx_migrate,	 5, VCA_VXI,	VCF_ADMIN);
++	__VCMD(enter_space_v0,	 5, VCA_VXI,	VCF_ADMIN);
++	__VCMD(enter_space_v1,	 5, VCA_VXI,	VCF_ADMIN);
++	__VCMD(enter_space,	 5, VCA_VXI,	VCF_ADMIN);
++
++	__VCMD(net_create_v0,	 5, VCA_NONE,	0);
++	__VCMD(net_create,	 5, VCA_NONE,	0);
++	__VCMD(net_migrate,	 5, VCA_NXI,	VCF_ADMIN);
++
++	__VCMD(tag_migrate,	 5, VCA_NONE,	VCF_ADMIN);
++
++	/* higher admin commands */
++	__VCMD(ctx_kill,	 6, VCA_VXI,	VCF_ARES);
++	__VCMD(set_space_v1,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_space,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++
++	__VCMD(set_ccaps,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_bcaps,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_cflags,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_umask,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_wmask,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_badness,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++
++	__VCMD(set_vhi_name,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_rlimit,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_prio_bias,	 7, VCA_VXI,	VCF_ARES | VCF_SETUP);
++
++	__VCMD(set_ncaps,	 7, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(set_nflags,	 7, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(net_add,		 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(net_remove,	 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(net_add_ipv4_v1,	 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(net_rem_ipv4_v1,	 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(net_add_ipv4,	 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(net_rem_ipv4,	 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++#ifdef	CONFIG_IPV6
++	__VCMD(net_add_ipv6,	 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++	__VCMD(net_remove_ipv6,	 8, VCA_NXI,	VCF_ARES | VCF_SETUP);
++#endif
++	__VCMD(set_iattr,	 7, VCA_NONE,	0);
++	__VCMD(fset_iattr,	 7, VCA_NONE,	0);
++	__VCMD(set_dlimit,	 7, VCA_NONE,	VCF_ARES);
++	__VCMD(add_dlimit,	 8, VCA_NONE,	VCF_ARES);
++	__VCMD(rem_dlimit,	 8, VCA_NONE,	VCF_ARES);
++
++#ifdef	CONFIG_VSERVER_DEVICE
++	__VCMD(set_mapping,	 8, VCA_VXI,    VCF_ARES|VCF_ZIDOK);
++	__VCMD(unset_mapping,	 8, VCA_VXI,	VCF_ARES|VCF_ZIDOK);
++#endif
++	/* debug level admin commands */
++#ifdef	CONFIG_VSERVER_HISTORY
++	__VCMD(dump_history,	 9, VCA_NONE,	0);
++	__VCMD(read_history,	 9, VCA_NONE,	0);
++#endif
++
++	default:
++		perm = -1;
++	}
++
++	vxdprintk(VXD_CBIT(switch, 0),
++		"vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]",
++		VC_CATEGORY(cmd), VC_COMMAND(cmd),
++		VC_VERSION(cmd), id, data, compat,
++		perm, args, flags);
++
++	ret = -ENOSYS;
++	if (perm < 0)
++		goto out;
++
++	state = 1;
++	if (!capable(CAP_CONTEXT))
++		goto out;
++
++	state = 2;
++	/* moved here from the individual commands */
++	ret = -EPERM;
++	if ((perm > 1) && !capable(CAP_SYS_ADMIN))
++		goto out;
++
++	state = 3;
++	/* vcmd involves resource management  */
++	ret = -EPERM;
++	if ((flags & VCF_ARES) && !capable(CAP_SYS_RESOURCE))
++		goto out;
++
++	state = 4;
++	/* various legacy exceptions */
++	switch (cmd) {
++	/* will go away when spectator is a cap */
++	case VCMD_ctx_migrate_v0:
++	case VCMD_ctx_migrate:
++		if (id == 1) {
++			current->xid = 1;
++			ret = 1;
++			goto out;
++		}
++		break;
++
++	/* will go away when spectator is a cap */
++	case VCMD_net_migrate:
++		if (id == 1) {
++			current->nid = 1;
++			ret = 1;
++			goto out;
++		}
++		break;
++	}
++
++	/* vcmds are fine by default */
++	permit = 1;
++
++	/* admin type vcmds require admin ... */
++	if (flags & VCF_ADMIN)
++		permit = vx_check(0, VS_ADMIN) ? 1 : 0;
++
++	/* ... but setup type vcmds override that */
++	if (!permit && (flags & VCF_SETUP))
++		permit = vx_flags(VXF_STATE_SETUP, 0) ? 2 : 0;
++
++	state = 5;
++	ret = -EPERM;
++	if (!permit)
++		goto out;
++
++	state = 6;
++	if (!id && (flags & VCF_ZIDOK))
++		goto skip_id;
++
++	ret = -ESRCH;
++	if (args & VCA_VXI) {
++		vxi = lookup_vx_info(id);
++		if (!vxi)
++			goto out;
++
++		if ((flags & VCF_ADMIN) &&
++			/* special case kill for shutdown */
++			(cmd != VCMD_ctx_kill) &&
++			/* can context be administrated? */
++			!vx_info_flags(vxi, VXF_STATE_ADMIN, 0)) {
++			ret = -EACCES;
++			goto out_vxi;
++		}
++	}
++	state = 7;
++	if (args & VCA_NXI) {
++		nxi = lookup_nx_info(id);
++		if (!nxi)
++			goto out_vxi;
++
++		if ((flags & VCF_ADMIN) &&
++			/* can context be administrated? */
++			!nx_info_flags(nxi, NXF_STATE_ADMIN, 0)) {
++			ret = -EACCES;
++			goto out_nxi;
++		}
++	}
++skip_id:
++	state = 8;
++	ret = do_vcmd(cmd, id, vxi, nxi, data, compat);
++
++out_nxi:
++	if ((args & VCA_NXI) && nxi)
++		put_nx_info(nxi);
++out_vxi:
++	if ((args & VCA_VXI) && vxi)
++		put_vx_info(vxi);
++out:
++	vxdprintk(VXD_CBIT(switch, 1),
++		"vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]",
++		VC_CATEGORY(cmd), VC_COMMAND(cmd),
++		VC_VERSION(cmd), ret, ret, state, permit);
++	return ret;
++}
++
++asmlinkage long
++sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
++{
++	return do_vserver(cmd, id, data, 0);
++}
++
++#ifdef	CONFIG_COMPAT
++
++asmlinkage long
++sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
++{
++	return do_vserver(cmd, id, data, 1);
++}
++
++#endif	/* CONFIG_COMPAT */
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/sysctl.c linux-3.3.8-vs2.3.3.4/kernel/vserver/sysctl.c
+--- linux-3.3.8/kernel/vserver/sysctl.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/sysctl.c	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,247 @@
++/*
++ *  kernel/vserver/sysctl.c
++ *
++ *  Virtual Context Support
++ *
++ *  Copyright (C) 2004-2007  Herbert P�tzl
++ *
++ *  V0.01  basic structure
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/ctype.h>
++#include <linux/sysctl.h>
++#include <linux/parser.h>
++#include <asm/uaccess.h>
++
++enum {
++	CTL_DEBUG_ERROR		= 0,
++	CTL_DEBUG_SWITCH	= 1,
++	CTL_DEBUG_XID,
++	CTL_DEBUG_NID,
++	CTL_DEBUG_TAG,
++	CTL_DEBUG_NET,
++	CTL_DEBUG_LIMIT,
++	CTL_DEBUG_CRES,
++	CTL_DEBUG_DLIM,
++	CTL_DEBUG_QUOTA,
++	CTL_DEBUG_CVIRT,
++	CTL_DEBUG_SPACE,
++	CTL_DEBUG_PERM,
++	CTL_DEBUG_MISC,
++};
++
++
++unsigned int vs_debug_switch	= 0;
++unsigned int vs_debug_xid	= 0;
++unsigned int vs_debug_nid	= 0;
++unsigned int vs_debug_tag	= 0;
++unsigned int vs_debug_net	= 0;
++unsigned int vs_debug_limit	= 0;
++unsigned int vs_debug_cres	= 0;
++unsigned int vs_debug_dlim	= 0;
++unsigned int vs_debug_quota	= 0;
++unsigned int vs_debug_cvirt	= 0;
++unsigned int vs_debug_space	= 0;
++unsigned int vs_debug_perm	= 0;
++unsigned int vs_debug_misc	= 0;
++
++
++static struct ctl_table_header *vserver_table_header;
++static ctl_table vserver_root_table[];
++
++
++void vserver_register_sysctl(void)
++{
++	if (!vserver_table_header) {
++		vserver_table_header = register_sysctl_table(vserver_root_table);
++	}
++
++}
++
++void vserver_unregister_sysctl(void)
++{
++	if (vserver_table_header) {
++		unregister_sysctl_table(vserver_table_header);
++		vserver_table_header = NULL;
++	}
++}
++
++
++static int proc_dodebug(ctl_table *table, int write,
++	void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++	char		tmpbuf[20], *p, c;
++	unsigned int	value;
++	size_t		left, len;
++
++	if ((*ppos && !write) || !*lenp) {
++		*lenp = 0;
++		return 0;
++	}
++
++	left = *lenp;
++
++	if (write) {
++		if (!access_ok(VERIFY_READ, buffer, left))
++			return -EFAULT;
++		p = (char *)buffer;
++		while (left && __get_user(c, p) >= 0 && isspace(c))
++			left--, p++;
++		if (!left)
++			goto done;
++
++		if (left > sizeof(tmpbuf) - 1)
++			return -EINVAL;
++		if (copy_from_user(tmpbuf, p, left))
++			return -EFAULT;
++		tmpbuf[left] = '\0';
++
++		for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
++			value = 10 * value + (*p - '0');
++		if (*p && !isspace(*p))
++			return -EINVAL;
++		while (left && isspace(*p))
++			left--, p++;
++		*(unsigned int *)table->data = value;
++	} else {
++		if (!access_ok(VERIFY_WRITE, buffer, left))
++			return -EFAULT;
++		len = sprintf(tmpbuf, "%d", *(unsigned int *)table->data);
++		if (len > left)
++			len = left;
++		if (__copy_to_user(buffer, tmpbuf, len))
++			return -EFAULT;
++		if ((left -= len) > 0) {
++			if (put_user('\n', (char *)buffer + len))
++				return -EFAULT;
++			left--;
++		}
++	}
++
++done:
++	*lenp -= left;
++	*ppos += *lenp;
++	return 0;
++}
++
++static int zero;
++
++#define	CTL_ENTRY(ctl, name)				\
++	{						\
++		.procname	= #name,		\
++		.data		= &vs_ ## name,		\
++		.maxlen		= sizeof(int),		\
++		.mode		= 0644,			\
++		.proc_handler	= &proc_dodebug,	\
++		.extra1		= &zero,		\
++		.extra2		= &zero,		\
++	}
++
++static ctl_table vserver_debug_table[] = {
++	CTL_ENTRY(CTL_DEBUG_SWITCH,	debug_switch),
++	CTL_ENTRY(CTL_DEBUG_XID,	debug_xid),
++	CTL_ENTRY(CTL_DEBUG_NID,	debug_nid),
++	CTL_ENTRY(CTL_DEBUG_TAG,	debug_tag),
++	CTL_ENTRY(CTL_DEBUG_NET,	debug_net),
++	CTL_ENTRY(CTL_DEBUG_LIMIT,	debug_limit),
++	CTL_ENTRY(CTL_DEBUG_CRES,	debug_cres),
++	CTL_ENTRY(CTL_DEBUG_DLIM,	debug_dlim),
++	CTL_ENTRY(CTL_DEBUG_QUOTA,	debug_quota),
++	CTL_ENTRY(CTL_DEBUG_CVIRT,	debug_cvirt),
++	CTL_ENTRY(CTL_DEBUG_SPACE,	debug_space),
++	CTL_ENTRY(CTL_DEBUG_PERM,	debug_perm),
++	CTL_ENTRY(CTL_DEBUG_MISC,	debug_misc),
++	{ 0 }
++};
++
++static ctl_table vserver_root_table[] = {
++	{
++		.procname	= "vserver",
++		.mode		= 0555,
++		.child		= vserver_debug_table
++	},
++	{ 0 }
++};
++
++
++static match_table_t tokens = {
++	{ CTL_DEBUG_SWITCH,	"switch=%x"	},
++	{ CTL_DEBUG_XID,	"xid=%x"	},
++	{ CTL_DEBUG_NID,	"nid=%x"	},
++	{ CTL_DEBUG_TAG,	"tag=%x"	},
++	{ CTL_DEBUG_NET,	"net=%x"	},
++	{ CTL_DEBUG_LIMIT,	"limit=%x"	},
++	{ CTL_DEBUG_CRES,	"cres=%x"	},
++	{ CTL_DEBUG_DLIM,	"dlim=%x"	},
++	{ CTL_DEBUG_QUOTA,	"quota=%x"	},
++	{ CTL_DEBUG_CVIRT,	"cvirt=%x"	},
++	{ CTL_DEBUG_SPACE,	"space=%x"	},
++	{ CTL_DEBUG_PERM,	"perm=%x"	},
++	{ CTL_DEBUG_MISC,	"misc=%x"	},
++	{ CTL_DEBUG_ERROR,	NULL		}
++};
++
++#define	HANDLE_CASE(id, name, val)				\
++	case CTL_DEBUG_ ## id:					\
++		vs_debug_ ## name = val;			\
++		printk("vs_debug_" #name "=0x%x\n", val);	\
++		break
++
++
++static int __init vs_debug_setup(char *str)
++{
++	char *p;
++	int token;
++
++	printk("vs_debug_setup(%s)\n", str);
++	while ((p = strsep(&str, ",")) != NULL) {
++		substring_t args[MAX_OPT_ARGS];
++		unsigned int value;
++
++		if (!*p)
++			continue;
++
++		token = match_token(p, tokens, args);
++		value = (token > 0) ? simple_strtoul(args[0].from, NULL, 0) : 0;
++
++		switch (token) {
++		HANDLE_CASE(SWITCH, switch, value);
++		HANDLE_CASE(XID,    xid,    value);
++		HANDLE_CASE(NID,    nid,    value);
++		HANDLE_CASE(TAG,    tag,    value);
++		HANDLE_CASE(NET,    net,    value);
++		HANDLE_CASE(LIMIT,  limit,  value);
++		HANDLE_CASE(CRES,   cres,   value);
++		HANDLE_CASE(DLIM,   dlim,   value);
++		HANDLE_CASE(QUOTA,  quota,  value);
++		HANDLE_CASE(CVIRT,  cvirt,  value);
++		HANDLE_CASE(SPACE,  space,  value);
++		HANDLE_CASE(PERM,   perm,   value);
++		HANDLE_CASE(MISC,   misc,   value);
++		default:
++			return -EINVAL;
++			break;
++		}
++	}
++	return 1;
++}
++
++__setup("vsdebug=", vs_debug_setup);
++
++
++
++EXPORT_SYMBOL_GPL(vs_debug_switch);
++EXPORT_SYMBOL_GPL(vs_debug_xid);
++EXPORT_SYMBOL_GPL(vs_debug_nid);
++EXPORT_SYMBOL_GPL(vs_debug_net);
++EXPORT_SYMBOL_GPL(vs_debug_limit);
++EXPORT_SYMBOL_GPL(vs_debug_cres);
++EXPORT_SYMBOL_GPL(vs_debug_dlim);
++EXPORT_SYMBOL_GPL(vs_debug_quota);
++EXPORT_SYMBOL_GPL(vs_debug_cvirt);
++EXPORT_SYMBOL_GPL(vs_debug_space);
++EXPORT_SYMBOL_GPL(vs_debug_perm);
++EXPORT_SYMBOL_GPL(vs_debug_misc);
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/tag.c linux-3.3.8-vs2.3.3.4/kernel/vserver/tag.c
+--- linux-3.3.8/kernel/vserver/tag.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/tag.c	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,63 @@
++/*
++ *  linux/kernel/vserver/tag.c
++ *
++ *  Virtual Server: Shallow Tag Space
++ *
++ *  Copyright (C) 2007  Herbert P�tzl
++ *
++ *  V0.01  basic implementation
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/vserver/debug.h>
++#include <linux/vs_pid.h>
++#include <linux/vs_tag.h>
++
++#include <linux/vserver/tag_cmd.h>
++
++
++int dx_migrate_task(struct task_struct *p, tag_t tag)
++{
++	if (!p)
++		BUG();
++
++	vxdprintk(VXD_CBIT(tag, 5),
++		"dx_migrate_task(%p[#%d],#%d)", p, p->tag, tag);
++
++	task_lock(p);
++	p->tag = tag;
++	task_unlock(p);
++
++	vxdprintk(VXD_CBIT(tag, 5),
++		"moved task %p into [#%d]", p, tag);
++	return 0;
++}
++
++/* vserver syscall commands below here */
++
++/* taks xid and vx_info functions */
++
++
++int vc_task_tag(uint32_t id)
++{
++	tag_t tag;
++
++	if (id) {
++		struct task_struct *tsk;
++		rcu_read_lock();
++		tsk = find_task_by_real_pid(id);
++		tag = (tsk) ? tsk->tag : -ESRCH;
++		rcu_read_unlock();
++	} else
++		tag = dx_current_tag();
++	return tag;
++}
++
++
++int vc_tag_migrate(uint32_t tag)
++{
++	return dx_migrate_task(current, tag & 0xFFFF);
++}
++
++
+diff -NurpP --minimal linux-3.3.8/kernel/vserver/vci_config.h linux-3.3.8-vs2.3.3.4/kernel/vserver/vci_config.h
+--- linux-3.3.8/kernel/vserver/vci_config.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/kernel/vserver/vci_config.h	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,76 @@
++
++/*  interface version */
++
++#define VCI_VERSION		0x00020308
++
++
++enum {
++	VCI_KCBIT_NO_DYNAMIC = 0,
++
++	VCI_KCBIT_PROC_SECURE = 4,
++	/* VCI_KCBIT_HARDCPU = 5, */
++	/* VCI_KCBIT_IDLELIMIT = 6, */
++	/* VCI_KCBIT_IDLETIME = 7, */
++
++	VCI_KCBIT_COWBL = 8,
++	VCI_KCBIT_FULLCOWBL = 9,
++	VCI_KCBIT_SPACES = 10,
++	VCI_KCBIT_NETV2 = 11,
++	VCI_KCBIT_MEMCG = 12,
++
++	VCI_KCBIT_DEBUG = 16,
++	VCI_KCBIT_HISTORY = 20,
++	VCI_KCBIT_TAGGED = 24,
++	VCI_KCBIT_PPTAG = 28,
++
++	VCI_KCBIT_MORE = 31,
++};
++
++
++static inline uint32_t vci_kernel_config(void)
++{
++	return
++	(1 << VCI_KCBIT_NO_DYNAMIC) |
++
++	/* configured features */
++#ifdef	CONFIG_VSERVER_PROC_SECURE
++	(1 << VCI_KCBIT_PROC_SECURE) |
++#endif
++#ifdef	CONFIG_VSERVER_COWBL
++	(1 << VCI_KCBIT_COWBL) |
++	(1 << VCI_KCBIT_FULLCOWBL) |
++#endif
++	(1 << VCI_KCBIT_SPACES) |
++	(1 << VCI_KCBIT_NETV2) |
++#ifdef	CONFIG_CGROUP_MEM_RES_CTLR
++	(1 << VCI_KCBIT_MEMCG) |
++#endif
++
++	/* debug options */
++#ifdef	CONFIG_VSERVER_DEBUG
++	(1 << VCI_KCBIT_DEBUG) |
++#endif
++#ifdef	CONFIG_VSERVER_HISTORY
++	(1 << VCI_KCBIT_HISTORY) |
++#endif
++
++	/* inode context tagging */
++#if	defined(CONFIG_TAGGING_NONE)
++	(0 << VCI_KCBIT_TAGGED) |
++#elif	defined(CONFIG_TAGGING_UID16)
++	(1 << VCI_KCBIT_TAGGED) |
++#elif	defined(CONFIG_TAGGING_GID16)
++	(2 << VCI_KCBIT_TAGGED) |
++#elif	defined(CONFIG_TAGGING_ID24)
++	(3 << VCI_KCBIT_TAGGED) |
++#elif	defined(CONFIG_TAGGING_INTERN)
++	(4 << VCI_KCBIT_TAGGED) |
++#elif	defined(CONFIG_TAGGING_RUNTIME)
++	(5 << VCI_KCBIT_TAGGED) |
++#else
++	(7 << VCI_KCBIT_TAGGED) |
++#endif
++	(1 << VCI_KCBIT_PPTAG) |
++	0;
++}
++
+diff -NurpP --minimal linux-3.3.8/mm/memcontrol.c linux-3.3.8-vs2.3.3.4/mm/memcontrol.c
+--- linux-3.3.8/mm/memcontrol.c	2012-06-08 15:23:47.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/mm/memcontrol.c	2012-05-22 09:49:13.000000000 +0200
+@@ -839,6 +839,31 @@ struct mem_cgroup *mem_cgroup_from_task(
+ 				struct mem_cgroup, css);
+ }
+ 
++u64 mem_cgroup_res_read_u64(struct mem_cgroup *mem, int member)
++{
++	return res_counter_read_u64(&mem->res, member);
++}
++
++u64 mem_cgroup_memsw_read_u64(struct mem_cgroup *mem, int member)
++{
++	return res_counter_read_u64(&mem->memsw, member);
++}
++
++s64 mem_cgroup_stat_read_cache(struct mem_cgroup *mem)
++{
++	return mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
++}
++
++s64 mem_cgroup_stat_read_anon(struct mem_cgroup *mem)
++{
++	return mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
++}
++
++s64 mem_cgroup_stat_read_mapped(struct mem_cgroup *mem)
++{
++	return mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_MAPPED);
++}
++
+ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
+ {
+ 	struct mem_cgroup *memcg = NULL;
+diff -NurpP --minimal linux-3.3.8/mm/oom_kill.c linux-3.3.8-vs2.3.3.4/mm/oom_kill.c
+--- linux-3.3.8/mm/oom_kill.c	2012-03-19 19:47:30.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/mm/oom_kill.c	2012-02-24 04:15:27.000000000 +0100
+@@ -34,6 +34,8 @@
+ #include <linux/ptrace.h>
+ #include <linux/freezer.h>
+ #include <linux/ftrace.h>
++#include <linux/reboot.h>
++#include <linux/vs_context.h>
+ 
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/oom.h>
+@@ -154,11 +156,18 @@ struct task_struct *find_lock_task_mm(st
+ static bool oom_unkillable_task(struct task_struct *p,
+ 		const struct mem_cgroup *memcg, const nodemask_t *nodemask)
+ {
+-	if (is_global_init(p))
++	unsigned xid = vx_current_xid();
++
++	/* skip the init task, global and per guest */
++	if (task_is_init(p))
+ 		return true;
+ 	if (p->flags & PF_KTHREAD)
+ 		return true;
+ 
++	/* skip other guest and host processes if oom in guest */
++	if (xid && vx_task_xid(p) != xid)
++		return true;
++
+ 	/* When mem_cgroup_out_of_memory() and p is not member of the group */
+ 	if (memcg && !task_in_mem_cgroup(p, memcg))
+ 		return true;
+@@ -446,8 +455,8 @@ static int oom_kill_task(struct task_str
+ 	/* mm cannot be safely dereferenced after task_unlock(p) */
+ 	mm = p->mm;
+ 
+-	pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
+-		task_pid_nr(p), p->comm, K(p->mm->total_vm),
++	pr_err("Killed process %d:#%u (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
++		task_pid_nr(p), p->xid, p->comm, K(p->mm->total_vm),
+ 		K(get_mm_counter(p->mm, MM_ANONPAGES)),
+ 		K(get_mm_counter(p->mm, MM_FILEPAGES)));
+ 	task_unlock(p);
+@@ -505,8 +514,8 @@ static int oom_kill_process(struct task_
+ 	}
+ 
+ 	task_lock(p);
+-	pr_err("%s: Kill process %d (%s) score %d or sacrifice child\n",
+-		message, task_pid_nr(p), p->comm, points);
++	pr_err("%s: Kill process %d:#%u (%s) score %d or sacrifice child\n",
++		message, task_pid_nr(p), p->xid, p->comm, points);
+ 	task_unlock(p);
+ 
+ 	/*
+@@ -607,6 +616,8 @@ int unregister_oom_notifier(struct notif
+ }
+ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
+ 
++long vs_oom_action(unsigned int);
++
+ /*
+  * Try to acquire the OOM killer lock for the zones in zonelist.  Returns zero
+  * if a parallel OOM killing is already taking place that includes a zone in
+@@ -765,7 +776,12 @@ retry:
+ 	if (!p) {
+ 		dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
+ 		read_unlock(&tasklist_lock);
+-		panic("Out of memory and no killable processes...\n");
++
++		/* avoid panic for guest OOM */
++		if (current->xid)
++			vs_oom_action(LINUX_REBOOT_CMD_OOM);
++		else
++			panic("Out of memory and no killable processes...\n");
+ 	}
+ 
+ 	if (oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
+diff -NurpP --minimal linux-3.3.8/mm/page_alloc.c linux-3.3.8-vs2.3.3.4/mm/page_alloc.c
+--- linux-3.3.8/mm/page_alloc.c	2012-03-19 19:47:30.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/mm/page_alloc.c	2012-03-19 20:52:10.000000000 +0100
+@@ -58,6 +58,8 @@
+ #include <linux/memcontrol.h>
+ #include <linux/prefetch.h>
+ #include <linux/page-debug-flags.h>
++#include <linux/vs_base.h>
++#include <linux/vs_limit.h>
+ 
+ #include <asm/tlbflush.h>
+ #include <asm/div64.h>
+@@ -2602,6 +2604,9 @@ void si_meminfo(struct sysinfo *val)
+ 	val->totalhigh = totalhigh_pages;
+ 	val->freehigh = nr_free_highpages();
+ 	val->mem_unit = PAGE_SIZE;
++
++	if (vx_flags(VXF_VIRT_MEM, 0))
++		vx_vsi_meminfo(val);
+ }
+ 
+ EXPORT_SYMBOL(si_meminfo);
+@@ -2622,6 +2627,9 @@ void si_meminfo_node(struct sysinfo *val
+ 	val->freehigh = 0;
+ #endif
+ 	val->mem_unit = PAGE_SIZE;
++
++	if (vx_flags(VXF_VIRT_MEM, 0))
++		vx_vsi_meminfo(val);
+ }
+ #endif
+ 
+diff -NurpP --minimal linux-3.3.8/mm/pgtable-generic.c linux-3.3.8-vs2.3.3.4/mm/pgtable-generic.c
+--- linux-3.3.8/mm/pgtable-generic.c	2011-03-15 18:07:42.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/mm/pgtable-generic.c	2012-02-24 03:55:07.000000000 +0100
+@@ -6,6 +6,8 @@
+  *  Copyright (C) 2010  Linus Torvalds
+  */
+ 
++#include <linux/mm.h>
++
+ #include <linux/pagemap.h>
+ #include <asm/tlb.h>
+ #include <asm-generic/pgtable.h>
+diff -NurpP --minimal linux-3.3.8/mm/shmem.c linux-3.3.8-vs2.3.3.4/mm/shmem.c
+--- linux-3.3.8/mm/shmem.c	2012-03-19 19:47:30.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/mm/shmem.c	2012-02-24 03:55:07.000000000 +0100
+@@ -1460,7 +1460,7 @@ static int shmem_statfs(struct dentry *d
+ {
+ 	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
+ 
+-	buf->f_type = TMPFS_MAGIC;
++	buf->f_type = TMPFS_SUPER_MAGIC;
+ 	buf->f_bsize = PAGE_CACHE_SIZE;
+ 	buf->f_namelen = NAME_MAX;
+ 	if (sbinfo->max_blocks) {
+@@ -2217,7 +2217,7 @@ int shmem_fill_super(struct super_block 
+ 	sb->s_maxbytes = MAX_LFS_FILESIZE;
+ 	sb->s_blocksize = PAGE_CACHE_SIZE;
+ 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+-	sb->s_magic = TMPFS_MAGIC;
++	sb->s_magic = TMPFS_SUPER_MAGIC;
+ 	sb->s_op = &shmem_ops;
+ 	sb->s_time_gran = 1;
+ #ifdef CONFIG_TMPFS_XATTR
+diff -NurpP --minimal linux-3.3.8/mm/slab.c linux-3.3.8-vs2.3.3.4/mm/slab.c
+--- linux-3.3.8/mm/slab.c	2012-03-19 19:47:30.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/mm/slab.c	2012-02-24 03:55:07.000000000 +0100
+@@ -413,6 +413,8 @@ static void kmem_list3_init(struct kmem_
+ #define STATS_INC_FREEMISS(x)	do { } while (0)
+ #endif
+ 
++#include "slab_vs.h"
++
+ #if DEBUG
+ 
+ /*
+@@ -3414,6 +3416,7 @@ retry:
+ 
+ 	obj = slab_get_obj(cachep, slabp, nodeid);
+ 	check_slabp(cachep, slabp);
++	vx_slab_alloc(cachep, flags);
+ 	l3->free_objects--;
+ 	/* move slabp to correct slabp list: */
+ 	list_del(&slabp->list);
+@@ -3491,6 +3494,7 @@ __cache_alloc_node(struct kmem_cache *ca
+ 	/* ___cache_alloc_node can fall back to other nodes */
+ 	ptr = ____cache_alloc_node(cachep, flags, nodeid);
+   out:
++	vx_slab_alloc(cachep, flags);
+ 	local_irq_restore(save_flags);
+ 	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
+ 	kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
+@@ -3678,6 +3682,7 @@ static inline void __cache_free(struct k
+ 	check_irq_off();
+ 	kmemleak_free_recursive(objp, cachep->flags);
+ 	objp = cache_free_debugcheck(cachep, objp, caller);
++	vx_slab_free(cachep);
+ 
+ 	kmemcheck_slab_free(cachep, objp, obj_size(cachep));
+ 
+diff -NurpP --minimal linux-3.3.8/mm/slab_vs.h linux-3.3.8-vs2.3.3.4/mm/slab_vs.h
+--- linux-3.3.8/mm/slab_vs.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/mm/slab_vs.h	2012-02-24 03:55:07.000000000 +0100
+@@ -0,0 +1,29 @@
++
++#include <linux/vserver/context.h>
++
++#include <linux/vs_context.h>
++
++static inline
++void vx_slab_alloc(struct kmem_cache *cachep, gfp_t flags)
++{
++	int what = gfp_zone(cachep->gfpflags);
++	struct vx_info *vxi = current_vx_info();
++
++	if (!vxi)
++		return;
++
++	atomic_add(cachep->buffer_size, &vxi->cacct.slab[what]);
++}
++
++static inline
++void vx_slab_free(struct kmem_cache *cachep)
++{
++	int what = gfp_zone(cachep->gfpflags);
++	struct vx_info *vxi = current_vx_info();
++
++	if (!vxi)
++		return;
++
++	atomic_sub(cachep->buffer_size, &vxi->cacct.slab[what]);
++}
++
+diff -NurpP --minimal linux-3.3.8/mm/swapfile.c linux-3.3.8-vs2.3.3.4/mm/swapfile.c
+--- linux-3.3.8/mm/swapfile.c	2012-06-08 15:23:47.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/mm/swapfile.c	2012-06-08 15:27:44.000000000 +0200
+@@ -36,6 +36,7 @@
+ #include <asm/tlbflush.h>
+ #include <linux/swapops.h>
+ #include <linux/page_cgroup.h>
++#include <linux/vs_base.h>
+ 
+ static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
+ 				 unsigned char);
+@@ -1752,6 +1753,16 @@ static int swap_show(struct seq_file *sw
+ 
+ 	if (si == SEQ_START_TOKEN) {
+ 		seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
++		if (vx_flags(VXF_VIRT_MEM, 0)) {
++			struct sysinfo si;
++
++			vx_vsi_swapinfo(&si);
++			if (si.totalswap < (1 << 10))
++				return 0;
++			seq_printf(swap, "%s\t\t\t\t\t%s\t%lu\t%lu\t%d\n",
++				"hdv0", "partition", si.totalswap >> 10,
++				(si.totalswap - si.freeswap) >> 10, -1);
++		}
+ 		return 0;
+ 	}
+ 
+@@ -2175,6 +2186,8 @@ void si_swapinfo(struct sysinfo *val)
+ 	val->freeswap = nr_swap_pages + nr_to_be_unused;
+ 	val->totalswap = total_swap_pages + nr_to_be_unused;
+ 	spin_unlock(&swap_lock);
++	if (vx_flags(VXF_VIRT_MEM, 0))
++		vx_vsi_swapinfo(val);
+ }
+ 
+ /*
+diff -NurpP --minimal linux-3.3.8/net/bridge/br_multicast.c linux-3.3.8-vs2.3.3.4/net/bridge/br_multicast.c
+--- linux-3.3.8/net/bridge/br_multicast.c	2012-06-08 15:23:47.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/net/bridge/br_multicast.c	2012-04-30 19:34:38.000000000 +0200
+@@ -445,7 +445,7 @@ static struct sk_buff *br_ip6_multicast_
+ 	ip6h->hop_limit = 1;
+ 	ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
+ 	if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
+-			       &ip6h->saddr)) {
++			       &ip6h->saddr, NULL)) {
+ 		kfree_skb(skb);
+ 		return NULL;
+ 	}
+diff -NurpP --minimal linux-3.3.8/net/core/dev.c linux-3.3.8-vs2.3.3.4/net/core/dev.c
+--- linux-3.3.8/net/core/dev.c	2012-06-08 15:23:47.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/net/core/dev.c	2012-05-15 07:09:25.000000000 +0200
+@@ -127,6 +127,7 @@
+ #include <linux/in.h>
+ #include <linux/jhash.h>
+ #include <linux/random.h>
++#include <linux/vs_inet.h>
+ #include <trace/events/napi.h>
+ #include <trace/events/net.h>
+ #include <trace/events/skb.h>
+@@ -622,7 +623,8 @@ struct net_device *__dev_get_by_name(str
+ 	struct hlist_head *head = dev_name_hash(net, name);
+ 
+ 	hlist_for_each_entry(dev, p, head, name_hlist)
+-		if (!strncmp(dev->name, name, IFNAMSIZ))
++		if (!strncmp(dev->name, name, IFNAMSIZ) &&
++		    nx_dev_visible(current_nx_info(), dev))
+ 			return dev;
+ 
+ 	return NULL;
+@@ -648,7 +650,8 @@ struct net_device *dev_get_by_name_rcu(s
+ 	struct hlist_head *head = dev_name_hash(net, name);
+ 
+ 	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
+-		if (!strncmp(dev->name, name, IFNAMSIZ))
++		if (!strncmp(dev->name, name, IFNAMSIZ) &&
++		    nx_dev_visible(current_nx_info(), dev))
+ 			return dev;
+ 
+ 	return NULL;
+@@ -699,7 +702,8 @@ struct net_device *__dev_get_by_index(st
+ 	struct hlist_head *head = dev_index_hash(net, ifindex);
+ 
+ 	hlist_for_each_entry(dev, p, head, index_hlist)
+-		if (dev->ifindex == ifindex)
++		if ((dev->ifindex == ifindex) &&
++		    nx_dev_visible(current_nx_info(), dev))
+ 			return dev;
+ 
+ 	return NULL;
+@@ -717,7 +721,7 @@ EXPORT_SYMBOL(__dev_get_by_index);
+  *	about locking. The caller must hold RCU lock.
+  */
+ 
+-struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
++struct net_device *dev_get_by_index_real_rcu(struct net *net, int ifindex)
+ {
+ 	struct hlist_node *p;
+ 	struct net_device *dev;
+@@ -729,6 +733,16 @@ struct net_device *dev_get_by_index_rcu(
+ 
+ 	return NULL;
+ }
++EXPORT_SYMBOL(dev_get_by_index_real_rcu);
++
++struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
++{
++	struct net_device *dev = dev_get_by_index_real_rcu(net, ifindex);
++
++	if (nx_dev_visible(current_nx_info(), dev))
++		return dev;
++	return NULL;
++}
+ EXPORT_SYMBOL(dev_get_by_index_rcu);
+ 
+ 
+@@ -777,7 +791,8 @@ struct net_device *dev_getbyhwaddr_rcu(s
+ 
+ 	for_each_netdev_rcu(net, dev)
+ 		if (dev->type == type &&
+-		    !memcmp(dev->dev_addr, ha, dev->addr_len))
++		    !memcmp(dev->dev_addr, ha, dev->addr_len) &&
++		    nx_dev_visible(current_nx_info(), dev))
+ 			return dev;
+ 
+ 	return NULL;
+@@ -789,9 +804,11 @@ struct net_device *__dev_getfirstbyhwtyp
+ 	struct net_device *dev;
+ 
+ 	ASSERT_RTNL();
+-	for_each_netdev(net, dev)
+-		if (dev->type == type)
++	for_each_netdev(net, dev) {
++		if ((dev->type == type) &&
++		    nx_dev_visible(current_nx_info(), dev))
+ 			return dev;
++	}
+ 
+ 	return NULL;
+ }
+@@ -909,6 +926,8 @@ static int __dev_alloc_name(struct net *
+ 				continue;
+ 			if (i < 0 || i >= max_netdevices)
+ 				continue;
++			if (!nx_dev_visible(current_nx_info(), d))
++				continue;
+ 
+ 			/*  avoid cases where sscanf is not exact inverse of printf */
+ 			snprintf(buf, IFNAMSIZ, name, i);
+@@ -4029,6 +4048,8 @@ static int dev_ifconf(struct net *net, c
+ 
+ 	total = 0;
+ 	for_each_netdev(net, dev) {
++		if (!nx_dev_visible(current_nx_info(), dev))
++			continue;
+ 		for (i = 0; i < NPROTO; i++) {
+ 			if (gifconf_list[i]) {
+ 				int done;
+@@ -4131,6 +4152,10 @@ static void dev_seq_printf_stats(struct 
+ 	struct rtnl_link_stats64 temp;
+ 	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+ 
++	/* device visible inside network context? */
++	if (!nx_dev_visible(current_nx_info(), dev))
++		return;
++
+ 	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+ 		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
+ 		   dev->name, stats->rx_bytes, stats->rx_packets,
+diff -NurpP --minimal linux-3.3.8/net/core/rtnetlink.c linux-3.3.8-vs2.3.3.4/net/core/rtnetlink.c
+--- linux-3.3.8/net/core/rtnetlink.c	2012-06-08 15:23:47.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/net/core/rtnetlink.c	2012-04-03 03:02:13.000000000 +0200
+@@ -1073,6 +1073,8 @@ static int rtnl_dump_ifinfo(struct sk_bu
+ 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+ 			if (idx < s_idx)
+ 				goto cont;
++			if (!nx_dev_visible(skb->sk->sk_nx_info, dev))
++				continue;
+ 			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+ 					     NETLINK_CB(cb->skb).pid,
+ 					     cb->nlh->nlmsg_seq, 0,
+@@ -1955,6 +1957,9 @@ void rtmsg_ifinfo(int type, struct net_d
+ 	int err = -ENOBUFS;
+ 	size_t if_info_size;
+ 
++	if (!nx_dev_visible(current_nx_info(), dev))
++		return;
++
+ 	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL);
+ 	if (skb == NULL)
+ 		goto errout;
+diff -NurpP --minimal linux-3.3.8/net/core/sock.c linux-3.3.8-vs2.3.3.4/net/core/sock.c
+--- linux-3.3.8/net/core/sock.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/core/sock.c	2012-02-24 03:55:07.000000000 +0100
+@@ -130,6 +130,10 @@
+ #include <net/netprio_cgroup.h>
+ 
+ #include <linux/filter.h>
++#include <linux/vs_socket.h>
++#include <linux/vs_limit.h>
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
+ 
+ #include <trace/events/sock.h>
+ 
+@@ -1127,6 +1131,8 @@ static struct sock *sk_prot_alloc(struct
+ 			goto out_free_sec;
+ 		sk_tx_queue_clear(sk);
+ 	}
++		sock_vx_init(sk);
++		sock_nx_init(sk);
+ 
+ 	return sk;
+ 
+@@ -1235,6 +1241,11 @@ static void __sk_free(struct sock *sk)
+ 		put_cred(sk->sk_peer_cred);
+ 	put_pid(sk->sk_peer_pid);
+ 	put_net(sock_net(sk));
++	vx_sock_dec(sk);
++	clr_vx_info(&sk->sk_vx_info);
++	sk->sk_xid = -1;
++	clr_nx_info(&sk->sk_nx_info);
++	sk->sk_nid = -1;
+ 	sk_prot_free(sk->sk_prot_creator, sk);
+ }
+ 
+@@ -1295,6 +1306,8 @@ struct sock *sk_clone_lock(const struct 
+ 
+ 		/* SANITY */
+ 		get_net(sock_net(newsk));
++		sock_vx_init(newsk);
++		sock_nx_init(newsk);
+ 		sk_node_init(&newsk->sk_node);
+ 		sock_lock_init(newsk);
+ 		bh_lock_sock(newsk);
+@@ -1351,6 +1364,12 @@ struct sock *sk_clone_lock(const struct 
+ 		smp_wmb();
+ 		atomic_set(&newsk->sk_refcnt, 2);
+ 
++		set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
++		newsk->sk_xid = sk->sk_xid;
++		vx_sock_inc(newsk);
++		set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
++		newsk->sk_nid = sk->sk_nid;
++
+ 		/*
+ 		 * Increment the counter in the same struct proto as the master
+ 		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
+@@ -2102,6 +2121,12 @@ void sock_init_data(struct socket *sock,
+ 
+ 	sk->sk_stamp = ktime_set(-1L, 0);
+ 
++	set_vx_info(&sk->sk_vx_info, current_vx_info());
++	sk->sk_xid = vx_current_xid();
++	vx_sock_inc(sk);
++	set_nx_info(&sk->sk_nx_info, current_nx_info());
++	sk->sk_nid = nx_current_nid();
++
+ 	/*
+ 	 * Before updating sk_refcnt, we must commit prior changes to memory
+ 	 * (Documentation/RCU/rculist_nulls.txt for details)
+diff -NurpP --minimal linux-3.3.8/net/ipv4/af_inet.c linux-3.3.8-vs2.3.3.4/net/ipv4/af_inet.c
+--- linux-3.3.8/net/ipv4/af_inet.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/af_inet.c	2012-02-24 03:55:07.000000000 +0100
+@@ -117,6 +117,7 @@
+ #ifdef CONFIG_IP_MROUTE
+ #include <linux/mroute.h>
+ #endif
++#include <linux/vs_limit.h>
+ 
+ 
+ /* The inetsw table contains everything that inet_create needs to
+@@ -326,9 +327,13 @@ lookup_protocol:
+ 	}
+ 
+ 	err = -EPERM;
++	if ((protocol == IPPROTO_ICMP) &&
++		nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
++		goto override;
++
+ 	if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
+ 		goto out_rcu_unlock;
+-
++override:
+ 	err = -EAFNOSUPPORT;
+ 	if (!inet_netns_ok(net, protocol))
+ 		goto out_rcu_unlock;
+@@ -452,6 +457,7 @@ int inet_bind(struct socket *sock, struc
+ 	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
+ 	struct sock *sk = sock->sk;
+ 	struct inet_sock *inet = inet_sk(sk);
++	struct nx_v4_sock_addr nsa;
+ 	unsigned short snum;
+ 	int chk_addr_ret;
+ 	int err;
+@@ -475,7 +481,11 @@ int inet_bind(struct socket *sock, struc
+ 			goto out;
+ 	}
+ 
+-	chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
++	err = v4_map_sock_addr(inet, addr, &nsa);
++	if (err)
++		goto out;
++
++	chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
+ 
+ 	/* Not specified by any standard per-se, however it breaks too
+ 	 * many applications when removed.  It is unfortunate since
+@@ -487,7 +497,7 @@ int inet_bind(struct socket *sock, struc
+ 	err = -EADDRNOTAVAIL;
+ 	if (!sysctl_ip_nonlocal_bind &&
+ 	    !(inet->freebind || inet->transparent) &&
+-	    addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
++	    nsa.saddr != htonl(INADDR_ANY) &&
+ 	    chk_addr_ret != RTN_LOCAL &&
+ 	    chk_addr_ret != RTN_MULTICAST &&
+ 	    chk_addr_ret != RTN_BROADCAST)
+@@ -512,7 +522,7 @@ int inet_bind(struct socket *sock, struc
+ 	if (sk->sk_state != TCP_CLOSE || inet->inet_num)
+ 		goto out_release_sock;
+ 
+-	inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
++	v4_set_sock_addr(inet, &nsa);
+ 	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
+ 		inet->inet_saddr = 0;  /* Use device */
+ 
+@@ -715,11 +725,13 @@ int inet_getname(struct socket *sock, st
+ 		     peer == 1))
+ 			return -ENOTCONN;
+ 		sin->sin_port = inet->inet_dport;
+-		sin->sin_addr.s_addr = inet->inet_daddr;
++		sin->sin_addr.s_addr =
++			nx_map_sock_lback(sk->sk_nx_info, inet->inet_daddr);
+ 	} else {
+ 		__be32 addr = inet->inet_rcv_saddr;
+ 		if (!addr)
+ 			addr = inet->inet_saddr;
++		addr = nx_map_sock_lback(sk->sk_nx_info, addr);
+ 		sin->sin_port = inet->inet_sport;
+ 		sin->sin_addr.s_addr = addr;
+ 	}
+diff -NurpP --minimal linux-3.3.8/net/ipv4/arp.c linux-3.3.8-vs2.3.3.4/net/ipv4/arp.c
+--- linux-3.3.8/net/ipv4/arp.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/arp.c	2012-02-24 03:55:07.000000000 +0100
+@@ -1329,6 +1329,7 @@ static void arp_format_neigh_entry(struc
+ 	struct net_device *dev = n->dev;
+ 	int hatype = dev->type;
+ 
++	/* FIXME: check for network context */
+ 	read_lock(&n->lock);
+ 	/* Convert hardware address to XX:XX:XX:XX ... form. */
+ #if IS_ENABLED(CONFIG_AX25)
+@@ -1360,6 +1361,7 @@ static void arp_format_pneigh_entry(stru
+ 	int hatype = dev ? dev->type : 0;
+ 	char tbuf[16];
+ 
++	/* FIXME: check for network context */
+ 	sprintf(tbuf, "%pI4", n->key);
+ 	seq_printf(seq, "%-16s 0x%-10x0x%-10x%s     *        %s\n",
+ 		   tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00",
+diff -NurpP --minimal linux-3.3.8/net/ipv4/devinet.c linux-3.3.8-vs2.3.3.4/net/ipv4/devinet.c
+--- linux-3.3.8/net/ipv4/devinet.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/devinet.c	2012-02-24 03:55:07.000000000 +0100
+@@ -518,6 +518,7 @@ struct in_device *inetdev_by_index(struc
+ }
+ EXPORT_SYMBOL(inetdev_by_index);
+ 
++
+ /* Called only from RTNL semaphored context. No locks. */
+ 
+ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
+@@ -759,6 +760,8 @@ int devinet_ioctl(struct net *net, unsig
+ 
+ 	in_dev = __in_dev_get_rtnl(dev);
+ 	if (in_dev) {
++		struct nx_info *nxi = current_nx_info();
++
+ 		if (tryaddrmatch) {
+ 			/* Matthias Andree */
+ 			/* compare label and address (4.4BSD style) */
+@@ -767,6 +770,8 @@ int devinet_ioctl(struct net *net, unsig
+ 			   This is checked above. */
+ 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+ 			     ifap = &ifa->ifa_next) {
++				if (!nx_v4_ifa_visible(nxi, ifa))
++					continue;
+ 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
+ 				    sin_orig.sin_addr.s_addr ==
+ 							ifa->ifa_local) {
+@@ -779,9 +784,12 @@ int devinet_ioctl(struct net *net, unsig
+ 		   comparing just the label */
+ 		if (!ifa) {
+ 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+-			     ifap = &ifa->ifa_next)
++			     ifap = &ifa->ifa_next) {
++				if (!nx_v4_ifa_visible(nxi, ifa))
++					continue;
+ 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
+ 					break;
++			}
+ 		}
+ 	}
+ 
+@@ -934,6 +942,8 @@ static int inet_gifconf(struct net_devic
+ 		goto out;
+ 
+ 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
++		if (!nx_v4_ifa_visible(current_nx_info(), ifa))
++			continue;
+ 		if (!buf) {
+ 			done += sizeof(ifr);
+ 			continue;
+@@ -1294,6 +1304,7 @@ static int inet_dump_ifaddr(struct sk_bu
+ 	struct net_device *dev;
+ 	struct in_device *in_dev;
+ 	struct in_ifaddr *ifa;
++	struct sock *sk = skb->sk;
+ 	struct hlist_head *head;
+ 	struct hlist_node *node;
+ 
+@@ -1316,6 +1327,8 @@ static int inet_dump_ifaddr(struct sk_bu
+ 
+ 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
+ 			     ifa = ifa->ifa_next, ip_idx++) {
++			if (sk && !nx_v4_ifa_visible(sk->sk_nx_info, ifa))
++				continue;
+ 				if (ip_idx < s_ip_idx)
+ 					continue;
+ 				if (inet_fill_ifaddr(skb, ifa,
+diff -NurpP --minimal linux-3.3.8/net/ipv4/fib_trie.c linux-3.3.8-vs2.3.3.4/net/ipv4/fib_trie.c
+--- linux-3.3.8/net/ipv4/fib_trie.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/fib_trie.c	2012-02-24 03:55:07.000000000 +0100
+@@ -2556,6 +2556,7 @@ static int fib_route_seq_show(struct seq
+ 			    || fa->fa_type == RTN_MULTICAST)
+ 				continue;
+ 
++			/* FIXME: check for network context? */
+ 			if (fi)
+ 				seq_printf(seq,
+ 					 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
+diff -NurpP --minimal linux-3.3.8/net/ipv4/inet_connection_sock.c linux-3.3.8-vs2.3.3.4/net/ipv4/inet_connection_sock.c
+--- linux-3.3.8/net/ipv4/inet_connection_sock.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/inet_connection_sock.c	2012-02-24 03:55:07.000000000 +0100
+@@ -52,6 +52,37 @@ void inet_get_local_port_range(int *low,
+ }
+ EXPORT_SYMBOL(inet_get_local_port_range);
+ 
++int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
++{
++	__be32	sk1_rcv_saddr = sk_rcv_saddr(sk1),
++		sk2_rcv_saddr = sk_rcv_saddr(sk2);
++
++	if (inet_v6_ipv6only(sk2))
++		return 0;
++
++	if (sk1_rcv_saddr &&
++	    sk2_rcv_saddr &&
++	    sk1_rcv_saddr == sk2_rcv_saddr)
++		return 1;
++
++	if (sk1_rcv_saddr &&
++	    !sk2_rcv_saddr &&
++	    v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, NXA_MASK_BIND))
++		return 1;
++
++	if (sk2_rcv_saddr &&
++	    !sk1_rcv_saddr &&
++	    v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, NXA_MASK_BIND))
++		return 1;
++
++	if (!sk1_rcv_saddr &&
++	    !sk2_rcv_saddr &&
++	    nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info))
++		return 1;
++
++	return 0;
++}
++
+ int inet_csk_bind_conflict(const struct sock *sk,
+ 			   const struct inet_bind_bucket *tb)
+ {
+@@ -74,9 +105,7 @@ int inet_csk_bind_conflict(const struct 
+ 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+ 			if (!reuse || !sk2->sk_reuse ||
+ 			    sk2->sk_state == TCP_LISTEN) {
+-				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
+-				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
+-				    sk2_rcv_saddr == sk_rcv_saddr(sk))
++				if (ipv4_rcv_saddr_equal(sk, sk2))
+ 					break;
+ 			}
+ 		}
+diff -NurpP --minimal linux-3.3.8/net/ipv4/inet_diag.c linux-3.3.8-vs2.3.3.4/net/ipv4/inet_diag.c
+--- linux-3.3.8/net/ipv4/inet_diag.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/inet_diag.c	2012-02-24 04:26:38.000000000 +0100
+@@ -31,6 +31,8 @@
+ 
+ #include <linux/inet.h>
+ #include <linux/stddef.h>
++#include <linux/vs_network.h>
++#include <linux/vs_inet.h>
+ 
+ #include <linux/inet_diag.h>
+ #include <linux/sock_diag.h>
+@@ -106,8 +108,10 @@ int inet_sk_diag_fill(struct sock *sk, s
+ 
+ 	r->id.idiag_sport = inet->inet_sport;
+ 	r->id.idiag_dport = inet->inet_dport;
+-	r->id.idiag_src[0] = inet->inet_rcv_saddr;
+-	r->id.idiag_dst[0] = inet->inet_daddr;
++	r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info,
++		inet->inet_rcv_saddr);
++	r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info,
++		inet->inet_daddr);
+ 
+ 	/* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
+ 	 * hence this needs to be included regardless of socket family.
+@@ -227,8 +231,8 @@ static int inet_twsk_diag_fill(struct in
+ 	sock_diag_save_cookie(tw, r->id.idiag_cookie);
+ 	r->id.idiag_sport     = tw->tw_sport;
+ 	r->id.idiag_dport     = tw->tw_dport;
+-	r->id.idiag_src[0]    = tw->tw_rcv_saddr;
+-	r->id.idiag_dst[0]    = tw->tw_daddr;
++	r->id.idiag_src[0]    = nx_map_sock_lback(tw->tw_nx_info, tw->tw_rcv_saddr);
++	r->id.idiag_dst[0]    = nx_map_sock_lback(tw->tw_nx_info, tw->tw_daddr);
+ 	r->idiag_state	      = tw->tw_substate;
+ 	r->idiag_timer	      = 3;
+ 	r->idiag_expires      = DIV_ROUND_UP(tmo * 1000, HZ);
+@@ -272,12 +276,14 @@ int inet_diag_dump_one_icsk(struct inet_
+ 
+ 	err = -EINVAL;
+ 	if (req->sdiag_family == AF_INET) {
++		/* TODO: lback */
+ 		sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0],
+ 				 req->id.idiag_dport, req->id.idiag_src[0],
+ 				 req->id.idiag_sport, req->id.idiag_if);
+ 	}
+ #if IS_ENABLED(CONFIG_IPV6)
+ 	else if (req->sdiag_family == AF_INET6) {
++		/* TODO: lback */
+ 		sk = inet6_lookup(&init_net, hashinfo,
+ 				  (struct in6_addr *)req->id.idiag_dst,
+ 				  req->id.idiag_dport,
+@@ -472,6 +478,7 @@ int inet_diag_bc_sk(const struct nlattr 
+ 	} else
+ #endif
+ 	{
++			/* TODO: lback */
+ 		entry.saddr = &inet->inet_rcv_saddr;
+ 		entry.daddr = &inet->inet_daddr;
+ 	}
+@@ -570,6 +577,7 @@ static int inet_twsk_diag_dump(struct in
+ 		} else
+ #endif
+ 		{
++			/* TODO: lback */
+ 			entry.saddr = &tw->tw_rcv_saddr;
+ 			entry.daddr = &tw->tw_daddr;
+ 		}
+@@ -615,8 +623,8 @@ static int inet_diag_fill_req(struct sk_
+ 
+ 	r->id.idiag_sport = inet->inet_sport;
+ 	r->id.idiag_dport = ireq->rmt_port;
+-	r->id.idiag_src[0] = ireq->loc_addr;
+-	r->id.idiag_dst[0] = ireq->rmt_addr;
++	r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info, ireq->loc_addr);
++	r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info, ireq->rmt_addr);
+ 	r->idiag_expires = jiffies_to_msecs(tmo);
+ 	r->idiag_rqueue = 0;
+ 	r->idiag_wqueue = 0;
+@@ -683,6 +691,7 @@ static int inet_diag_dump_reqs(struct sk
+ 				continue;
+ 
+ 			if (bc) {
++				/* TODO: lback */
+ 				entry.saddr =
+ #if IS_ENABLED(CONFIG_IPV6)
+ 					(entry.family == AF_INET6) ?
+@@ -744,6 +753,8 @@ void inet_diag_dump_icsk(struct inet_has
+ 			sk_nulls_for_each(sk, node, &ilb->head) {
+ 				struct inet_sock *inet = inet_sk(sk);
+ 
++				if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
++					continue;
+ 				if (num < s_num) {
+ 					num++;
+ 					continue;
+@@ -814,6 +825,8 @@ skip_listen_ht:
+ 		sk_nulls_for_each(sk, node, &head->chain) {
+ 			struct inet_sock *inet = inet_sk(sk);
+ 
++			if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
++				continue;
+ 			if (num < s_num)
+ 				goto next_normal;
+ 			if (!(r->idiag_states & (1 << sk->sk_state)))
+@@ -841,6 +854,8 @@ next_normal:
+ 			inet_twsk_for_each(tw, node,
+ 				    &head->twchain) {
+ 
++				if (!nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))
++					continue;
+ 				if (num < s_num)
+ 					goto next_dying;
+ 				if (r->sdiag_family != AF_UNSPEC &&
+diff -NurpP --minimal linux-3.3.8/net/ipv4/inet_hashtables.c linux-3.3.8-vs2.3.3.4/net/ipv4/inet_hashtables.c
+--- linux-3.3.8/net/ipv4/inet_hashtables.c	2011-10-24 18:45:34.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/inet_hashtables.c	2012-02-24 03:55:07.000000000 +0100
+@@ -22,6 +22,7 @@
+ #include <net/inet_connection_sock.h>
+ #include <net/inet_hashtables.h>
+ #include <net/secure_seq.h>
++#include <net/route.h>
+ #include <net/ip.h>
+ 
+ /*
+@@ -156,6 +157,11 @@ static inline int compute_score(struct s
+ 			if (rcv_saddr != daddr)
+ 				return -1;
+ 			score += 2;
++		} else {
++			/* block non nx_info ips */
++			if (!v4_addr_in_nx_info(sk->sk_nx_info,
++				daddr, NXA_MASK_BIND))
++				return -1;
+ 		}
+ 		if (sk->sk_bound_dev_if) {
+ 			if (sk->sk_bound_dev_if != dif)
+@@ -173,7 +179,6 @@ static inline int compute_score(struct s
+  * wildcarded during the search since they can never be otherwise.
+  */
+ 
+-
+ struct sock *__inet_lookup_listener(struct net *net,
+ 				    struct inet_hashinfo *hashinfo,
+ 				    const __be32 daddr, const unsigned short hnum,
+@@ -196,6 +201,7 @@ begin:
+ 			hiscore = score;
+ 		}
+ 	}
++
+ 	/*
+ 	 * if the nulls value we got at the end of this lookup is
+ 	 * not the expected one, we must restart lookup.
+diff -NurpP --minimal linux-3.3.8/net/ipv4/netfilter/nf_nat_helper.c linux-3.3.8-vs2.3.3.4/net/ipv4/netfilter/nf_nat_helper.c
+--- linux-3.3.8/net/ipv4/netfilter/nf_nat_helper.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/netfilter/nf_nat_helper.c	2012-02-24 03:55:07.000000000 +0100
+@@ -20,6 +20,7 @@
+ #include <net/route.h>
+ 
+ #include <linux/netfilter_ipv4.h>
++#include <net/route.h>
+ #include <net/netfilter/nf_conntrack.h>
+ #include <net/netfilter/nf_conntrack_helper.h>
+ #include <net/netfilter/nf_conntrack_ecache.h>
+diff -NurpP --minimal linux-3.3.8/net/ipv4/netfilter.c linux-3.3.8-vs2.3.3.4/net/ipv4/netfilter.c
+--- linux-3.3.8/net/ipv4/netfilter.c	2012-01-09 16:15:03.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/netfilter.c	2012-02-24 03:55:07.000000000 +0100
+@@ -6,7 +6,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/gfp.h>
+ #include <linux/export.h>
+-#include <net/route.h>
++// #include <net/route.h>
+ #include <net/xfrm.h>
+ #include <net/ip.h>
+ #include <net/netfilter/nf_queue.h>
+diff -NurpP --minimal linux-3.3.8/net/ipv4/raw.c linux-3.3.8-vs2.3.3.4/net/ipv4/raw.c
+--- linux-3.3.8/net/ipv4/raw.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/raw.c	2012-02-24 03:55:07.000000000 +0100
+@@ -118,7 +118,7 @@ static struct sock *__raw_v4_lookup(stru
+ 
+ 		if (net_eq(sock_net(sk), net) && inet->inet_num == num	&&
+ 		    !(inet->inet_daddr && inet->inet_daddr != raddr) 	&&
+-		    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
++		    v4_sock_addr_match(sk->sk_nx_info, inet, laddr)	&&
+ 		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ 			goto found; /* gotcha */
+ 	}
+@@ -390,6 +390,12 @@ static int raw_send_hdrinc(struct sock *
+ 		icmp_out_count(net, ((struct icmphdr *)
+ 			skb_transport_header(skb))->type);
+ 
++	err = -EPERM;
++	if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) &&
++		sk->sk_nx_info &&
++		!v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND))
++		goto error_free;
++
+ 	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
+ 		      rt->dst.dev, dst_output);
+ 	if (err > 0)
+@@ -577,6 +583,16 @@ static int raw_sendmsg(struct kiocb *ioc
+ 			goto done;
+ 	}
+ 
++	if (sk->sk_nx_info) {
++		rt = ip_v4_find_src(sock_net(sk), sk->sk_nx_info, &fl4);
++		if (IS_ERR(rt)) {
++			err = PTR_ERR(rt);
++			rt = NULL;
++			goto done;
++		}
++		ip_rt_put(rt);
++	}
++
+ 	security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
+ 	rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
+ 	if (IS_ERR(rt)) {
+@@ -653,17 +669,19 @@ static int raw_bind(struct sock *sk, str
+ {
+ 	struct inet_sock *inet = inet_sk(sk);
+ 	struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
++	struct nx_v4_sock_addr nsa = { 0 };
+ 	int ret = -EINVAL;
+ 	int chk_addr_ret;
+ 
+ 	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
+ 		goto out;
+-	chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
++	v4_map_sock_addr(inet, addr, &nsa);
++	chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
+ 	ret = -EADDRNOTAVAIL;
+-	if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
++	if (nsa.saddr && chk_addr_ret != RTN_LOCAL &&
+ 	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
+ 		goto out;
+-	inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
++	v4_set_sock_addr(inet, &nsa);
+ 	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
+ 		inet->inet_saddr = 0;  /* Use device */
+ 	sk_dst_reset(sk);
+@@ -715,7 +733,8 @@ static int raw_recvmsg(struct kiocb *ioc
+ 	/* Copy the address. */
+ 	if (sin) {
+ 		sin->sin_family = AF_INET;
+-		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
++		sin->sin_addr.s_addr =
++			nx_map_sock_lback(sk->sk_nx_info, ip_hdr(skb)->saddr);
+ 		sin->sin_port = 0;
+ 		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+ 	}
+@@ -911,7 +930,8 @@ static struct sock *raw_get_first(struct
+ 		struct hlist_node *node;
+ 
+ 		sk_for_each(sk, node, &state->h->ht[state->bucket])
+-			if (sock_net(sk) == seq_file_net(seq))
++			if ((sock_net(sk) == seq_file_net(seq)) &&
++				nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
+ 				goto found;
+ 	}
+ 	sk = NULL;
+@@ -927,7 +947,8 @@ static struct sock *raw_get_next(struct 
+ 		sk = sk_next(sk);
+ try_again:
+ 		;
+-	} while (sk && sock_net(sk) != seq_file_net(seq));
++	} while (sk && ((sock_net(sk) != seq_file_net(seq)) ||
++		!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
+ 
+ 	if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
+ 		sk = sk_head(&state->h->ht[state->bucket]);
+diff -NurpP --minimal linux-3.3.8/net/ipv4/route.c linux-3.3.8-vs2.3.3.4/net/ipv4/route.c
+--- linux-3.3.8/net/ipv4/route.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/route.c	2012-03-19 20:52:10.000000000 +0100
+@@ -2697,7 +2697,7 @@ static struct rtable *ip_route_output_sl
+ 
+ 
+ 	if (fl4->flowi4_oif) {
+-		dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
++		dev_out = dev_get_by_index_real_rcu(net, fl4->flowi4_oif);
+ 		rth = ERR_PTR(-ENODEV);
+ 		if (dev_out == NULL)
+ 			goto out;
+diff -NurpP --minimal linux-3.3.8/net/ipv4/tcp.c linux-3.3.8-vs2.3.3.4/net/ipv4/tcp.c
+--- linux-3.3.8/net/ipv4/tcp.c	2012-06-08 15:23:47.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/tcp.c	2012-05-22 09:49:13.000000000 +0200
+@@ -266,6 +266,7 @@
+ #include <linux/crypto.h>
+ #include <linux/time.h>
+ #include <linux/slab.h>
++#include <linux/in.h>
+ 
+ #include <net/icmp.h>
+ #include <net/tcp.h>
+diff -NurpP --minimal linux-3.3.8/net/ipv4/tcp_ipv4.c linux-3.3.8-vs2.3.3.4/net/ipv4/tcp_ipv4.c
+--- linux-3.3.8/net/ipv4/tcp_ipv4.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/tcp_ipv4.c	2012-03-19 20:52:10.000000000 +0100
+@@ -2036,6 +2036,12 @@ static void *listening_get_next(struct s
+ 		req = req->dl_next;
+ 		while (1) {
+ 			while (req) {
++				vxdprintk(VXD_CBIT(net, 6),
++					"sk,req: %p [#%d] (from %d)", req->sk,
++					(req->sk)?req->sk->sk_nid:0, nx_current_nid());
++				if (req->sk &&
++					!nx_check(req->sk->sk_nid, VS_WATCH_P | VS_IDENT))
++					continue;
+ 				if (req->rsk_ops->family == st->family) {
+ 					cur = req;
+ 					goto out;
+@@ -2060,6 +2066,10 @@ get_req:
+ 	}
+ get_sk:
+ 	sk_nulls_for_each_from(sk, node) {
++		vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
++			sk, sk->sk_nid, nx_current_nid());
++		if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
++			continue;
+ 		if (!net_eq(sock_net(sk), net))
+ 			continue;
+ 		if (sk->sk_family == st->family) {
+@@ -2136,6 +2146,11 @@ static void *established_get_first(struc
+ 
+ 		spin_lock_bh(lock);
+ 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
++			vxdprintk(VXD_CBIT(net, 6),
++				"sk,egf: %p [#%d] (from %d)",
++				sk, sk->sk_nid, nx_current_nid());
++			if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
++				continue;
+ 			if (sk->sk_family != st->family ||
+ 			    !net_eq(sock_net(sk), net)) {
+ 				continue;
+@@ -2146,6 +2161,11 @@ static void *established_get_first(struc
+ 		st->state = TCP_SEQ_STATE_TIME_WAIT;
+ 		inet_twsk_for_each(tw, node,
+ 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
++			vxdprintk(VXD_CBIT(net, 6),
++				"tw: %p [#%d] (from %d)",
++				tw, tw->tw_nid, nx_current_nid());
++			if (!nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))
++				continue;
+ 			if (tw->tw_family != st->family ||
+ 			    !net_eq(twsk_net(tw), net)) {
+ 				continue;
+@@ -2175,7 +2195,9 @@ static void *established_get_next(struct
+ 		tw = cur;
+ 		tw = tw_next(tw);
+ get_tw:
+-		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
++		while (tw && (tw->tw_family != st->family ||
++			!net_eq(twsk_net(tw), net) ||
++			!nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))) {
+ 			tw = tw_next(tw);
+ 		}
+ 		if (tw) {
+@@ -2199,6 +2221,11 @@ get_tw:
+ 		sk = sk_nulls_next(sk);
+ 
+ 	sk_nulls_for_each_from(sk, node) {
++		vxdprintk(VXD_CBIT(net, 6),
++			"sk,egn: %p [#%d] (from %d)",
++			sk, sk->sk_nid, nx_current_nid());
++		if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
++			continue;
+ 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
+ 			goto found;
+ 	}
+@@ -2404,9 +2431,9 @@ static void get_openreq4(const struct so
+ 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
+ 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
+ 		i,
+-		ireq->loc_addr,
++		nx_map_sock_lback(current_nx_info(), ireq->loc_addr),
+ 		ntohs(inet_sk(sk)->inet_sport),
+-		ireq->rmt_addr,
++		nx_map_sock_lback(current_nx_info(), ireq->rmt_addr),
+ 		ntohs(ireq->rmt_port),
+ 		TCP_SYN_RECV,
+ 		0, 0, /* could print option size, but that is af dependent. */
+@@ -2428,8 +2455,8 @@ static void get_tcp4_sock(struct sock *s
+ 	const struct tcp_sock *tp = tcp_sk(sk);
+ 	const struct inet_connection_sock *icsk = inet_csk(sk);
+ 	const struct inet_sock *inet = inet_sk(sk);
+-	__be32 dest = inet->inet_daddr;
+-	__be32 src = inet->inet_rcv_saddr;
++	__be32 dest = nx_map_sock_lback(current_nx_info(), inet->inet_daddr);
++	__be32 src = nx_map_sock_lback(current_nx_info(), inet->inet_rcv_saddr);
+ 	__u16 destp = ntohs(inet->inet_dport);
+ 	__u16 srcp = ntohs(inet->inet_sport);
+ 	int rx_queue;
+@@ -2486,8 +2513,8 @@ static void get_timewait4_sock(const str
+ 	if (ttd < 0)
+ 		ttd = 0;
+ 
+-	dest  = tw->tw_daddr;
+-	src   = tw->tw_rcv_saddr;
++	dest  = nx_map_sock_lback(current_nx_info(), tw->tw_daddr);
++	src   = nx_map_sock_lback(current_nx_info(), tw->tw_rcv_saddr);
+ 	destp = ntohs(tw->tw_dport);
+ 	srcp  = ntohs(tw->tw_sport);
+ 
+diff -NurpP --minimal linux-3.3.8/net/ipv4/tcp_minisocks.c linux-3.3.8-vs2.3.3.4/net/ipv4/tcp_minisocks.c
+--- linux-3.3.8/net/ipv4/tcp_minisocks.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/tcp_minisocks.c	2012-02-24 04:25:11.000000000 +0100
+@@ -23,6 +23,9 @@
+ #include <linux/slab.h>
+ #include <linux/sysctl.h>
+ #include <linux/workqueue.h>
++#include <linux/vs_limit.h>
++#include <linux/vs_socket.h>
++#include <linux/vs_context.h>
+ #include <net/tcp.h>
+ #include <net/inet_common.h>
+ #include <net/xfrm.h>
+@@ -336,6 +339,11 @@ void tcp_time_wait(struct sock *sk, int 
+ 		tcptw->tw_ts_recent	= tp->rx_opt.ts_recent;
+ 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
+ 
++		tw->tw_xid		= sk->sk_xid;
++		tw->tw_vx_info		= NULL;
++		tw->tw_nid		= sk->sk_nid;
++		tw->tw_nx_info		= NULL;
++
+ #if IS_ENABLED(CONFIG_IPV6)
+ 		if (tw->tw_family == PF_INET6) {
+ 			struct ipv6_pinfo *np = inet6_sk(sk);
+diff -NurpP --minimal linux-3.3.8/net/ipv4/udp.c linux-3.3.8-vs2.3.3.4/net/ipv4/udp.c
+--- linux-3.3.8/net/ipv4/udp.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv4/udp.c	2012-02-24 03:55:07.000000000 +0100
+@@ -297,14 +297,7 @@ fail:
+ }
+ EXPORT_SYMBOL(udp_lib_get_port);
+ 
+-static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+-{
+-	struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
+-
+-	return 	(!ipv6_only_sock(sk2)  &&
+-		 (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
+-		   inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
+-}
++extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
+ 
+ static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
+ 				       unsigned int port)
+@@ -339,6 +332,11 @@ static inline int compute_score(struct s
+ 			if (inet->inet_rcv_saddr != daddr)
+ 				return -1;
+ 			score += 2;
++		} else {
++			/* block non nx_info ips */
++			if (!v4_addr_in_nx_info(sk->sk_nx_info,
++				daddr, NXA_MASK_BIND))
++				return -1;
+ 		}
+ 		if (inet->inet_daddr) {
+ 			if (inet->inet_daddr != saddr)
+@@ -442,6 +440,7 @@ exact_match:
+ 	return result;
+ }
+ 
++
+ /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
+  * harder than this. -DaveM
+  */
+@@ -487,6 +486,11 @@ begin:
+ 	sk_nulls_for_each_rcu(sk, node, &hslot->head) {
+ 		score = compute_score(sk, net, saddr, hnum, sport,
+ 				      daddr, dport, dif);
++		/* FIXME: disabled?
++		if (score == 9) {
++			result = sk;
++			break;
++		} else */
+ 		if (score > badness) {
+ 			result = sk;
+ 			badness = score;
+@@ -500,6 +504,7 @@ begin:
+ 	if (get_nulls_value(node) != slot)
+ 		goto begin;
+ 
++
+ 	if (result) {
+ 		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
+ 			result = NULL;
+@@ -509,6 +514,7 @@ begin:
+ 			goto begin;
+ 		}
+ 	}
++
+ 	rcu_read_unlock();
+ 	return result;
+ }
+@@ -552,8 +558,7 @@ static inline struct sock *udp_v4_mcast_
+ 		    udp_sk(s)->udp_port_hash != hnum ||
+ 		    (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
+ 		    (inet->inet_dport != rmt_port && inet->inet_dport) ||
+-		    (inet->inet_rcv_saddr &&
+-		     inet->inet_rcv_saddr != loc_addr) ||
++		    !v4_sock_addr_match(sk->sk_nx_info, inet, loc_addr)	||
+ 		    ipv6_only_sock(s) ||
+ 		    (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
+ 			continue;
+@@ -931,6 +936,16 @@ int udp_sendmsg(struct kiocb *iocb, stru
+ 				   inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP,
+ 				   faddr, saddr, dport, inet->inet_sport);
+ 
++		if (sk->sk_nx_info) {
++			rt = ip_v4_find_src(net, sk->sk_nx_info, fl4);
++			if (IS_ERR(rt)) {
++				err = PTR_ERR(rt);
++				rt = NULL;
++				goto out;
++			}
++			ip_rt_put(rt);
++		}
++
+ 		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
+ 		rt = ip_route_output_flow(net, fl4, sk);
+ 		if (IS_ERR(rt)) {
+@@ -1229,7 +1244,8 @@ try_again:
+ 	if (sin) {
+ 		sin->sin_family = AF_INET;
+ 		sin->sin_port = udp_hdr(skb)->source;
+-		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
++		sin->sin_addr.s_addr = nx_map_sock_lback(
++			skb->sk->sk_nx_info, ip_hdr(skb)->saddr);
+ 		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+ 	}
+ 	if (inet->cmsg_flags)
+@@ -1976,6 +1992,8 @@ static struct sock *udp_get_first(struct
+ 		sk_nulls_for_each(sk, node, &hslot->head) {
+ 			if (!net_eq(sock_net(sk), net))
+ 				continue;
++			if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
++				continue;
+ 			if (sk->sk_family == state->family)
+ 				goto found;
+ 		}
+@@ -1993,7 +2011,9 @@ static struct sock *udp_get_next(struct 
+ 
+ 	do {
+ 		sk = sk_nulls_next(sk);
+-	} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
++	} while (sk && (!net_eq(sock_net(sk), net) ||
++		sk->sk_family != state->family ||
++		!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
+ 
+ 	if (!sk) {
+ 		if (state->bucket <= state->udp_table->mask)
+diff -NurpP --minimal linux-3.3.8/net/ipv6/Kconfig linux-3.3.8-vs2.3.3.4/net/ipv6/Kconfig
+--- linux-3.3.8/net/ipv6/Kconfig	2010-08-02 16:52:59.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/net/ipv6/Kconfig	2012-02-24 03:55:07.000000000 +0100
+@@ -4,8 +4,8 @@
+ 
+ #   IPv6 as module will cause a CRASH if you try to unload it
+ menuconfig IPV6
+-	tristate "The IPv6 protocol"
+-	default m
++	bool "The IPv6 protocol"
++	default n
+ 	---help---
+ 	  This is complemental support for the IP version 6.
+ 	  You will still be able to do traditional IPv4 networking as well.
+diff -NurpP --minimal linux-3.3.8/net/ipv6/addrconf.c linux-3.3.8-vs2.3.3.4/net/ipv6/addrconf.c
+--- linux-3.3.8/net/ipv6/addrconf.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv6/addrconf.c	2012-03-19 20:52:10.000000000 +0100
+@@ -88,6 +88,8 @@
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include <linux/export.h>
++#include <linux/vs_network.h>
++#include <linux/vs_inet6.h>
+ 
+ /* Set to 3 to get tracing... */
+ #define ACONF_DEBUG 2
+@@ -1105,7 +1107,7 @@ out:
+ 
+ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
+ 		       const struct in6_addr *daddr, unsigned int prefs,
+-		       struct in6_addr *saddr)
++		       struct in6_addr *saddr, struct nx_info *nxi)
+ {
+ 	struct ipv6_saddr_score scores[2],
+ 				*score = &scores[0], *hiscore = &scores[1];
+@@ -1177,6 +1179,8 @@ int ipv6_dev_get_saddr(struct net *net, 
+ 					       dev->name);
+ 				continue;
+ 			}
++			if (!v6_addr_in_nx_info(nxi, &score->ifa->addr, -1))
++				continue;
+ 
+ 			score->rule = -1;
+ 			bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
+@@ -3162,7 +3166,10 @@ static void if6_seq_stop(struct seq_file
+ static int if6_seq_show(struct seq_file *seq, void *v)
+ {
+ 	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
+-	seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
++
++	if (nx_check(0, VS_ADMIN|VS_WATCH) ||
++	    v6_addr_in_nx_info(current_nx_info(), &ifp->addr, -1))
++		seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
+ 		   &ifp->addr,
+ 		   ifp->idev->dev->ifindex,
+ 		   ifp->prefix_len,
+@@ -3668,6 +3675,11 @@ static int in6_dump_addrs(struct inet6_d
+ 	struct ifacaddr6 *ifaca;
+ 	int err = 1;
+ 	int ip_idx = *p_ip_idx;
++	struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
++
++	/* disable ipv6 on non v6 guests */
++	if (nxi && !nx_info_has_v6(nxi))
++		return skb->len;
+ 
+ 	read_lock_bh(&idev->lock);
+ 	switch (type) {
+@@ -3678,6 +3690,8 @@ static int in6_dump_addrs(struct inet6_d
+ 		list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ 			if (++ip_idx < s_ip_idx)
+ 				continue;
++				if (!v6_addr_in_nx_info(nxi, &ifa->addr, -1))
++					continue;
+ 			err = inet6_fill_ifaddr(skb, ifa,
+ 						NETLINK_CB(cb->skb).pid,
+ 						cb->nlh->nlmsg_seq,
+@@ -3694,6 +3708,8 @@ static int in6_dump_addrs(struct inet6_d
+ 		     ifmca = ifmca->next, ip_idx++) {
+ 			if (ip_idx < s_ip_idx)
+ 				continue;
++				if (!v6_addr_in_nx_info(nxi, &ifmca->mca_addr, -1))
++					continue;
+ 			err = inet6_fill_ifmcaddr(skb, ifmca,
+ 						  NETLINK_CB(cb->skb).pid,
+ 						  cb->nlh->nlmsg_seq,
+@@ -3709,6 +3725,8 @@ static int in6_dump_addrs(struct inet6_d
+ 		     ifaca = ifaca->aca_next, ip_idx++) {
+ 			if (ip_idx < s_ip_idx)
+ 				continue;
++				if (!v6_addr_in_nx_info(nxi, &ifaca->aca_addr, -1))
++					continue;
+ 			err = inet6_fill_ifacaddr(skb, ifaca,
+ 						  NETLINK_CB(cb->skb).pid,
+ 						  cb->nlh->nlmsg_seq,
+@@ -4094,6 +4112,11 @@ static int inet6_dump_ifinfo(struct sk_b
+ 	struct inet6_dev *idev;
+ 	struct hlist_head *head;
+ 	struct hlist_node *node;
++	struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
++
++	/* FIXME: maybe disable ipv6 on non v6 guests?
++	if (skb->sk && skb->sk->sk_vx_info)
++		return skb->len; */
+ 
+ 	s_h = cb->args[0];
+ 	s_idx = cb->args[1];
+@@ -4105,6 +4128,8 @@ static int inet6_dump_ifinfo(struct sk_b
+ 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+ 			if (idx < s_idx)
+ 				goto cont;
++			if (!v6_dev_in_nx_info(dev, nxi))
++				goto cont;
+ 			idev = __in6_dev_get(dev);
+ 			if (!idev)
+ 				goto cont;
+diff -NurpP --minimal linux-3.3.8/net/ipv6/af_inet6.c linux-3.3.8-vs2.3.3.4/net/ipv6/af_inet6.c
+--- linux-3.3.8/net/ipv6/af_inet6.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv6/af_inet6.c	2012-02-24 04:23:27.000000000 +0100
+@@ -42,6 +42,8 @@
+ #include <linux/netdevice.h>
+ #include <linux/icmpv6.h>
+ #include <linux/netfilter_ipv6.h>
++#include <linux/vs_inet.h>
++#include <linux/vs_inet6.h>
+ 
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+@@ -160,9 +162,12 @@ lookup_protocol:
+ 	}
+ 
+ 	err = -EPERM;
++	if ((protocol == IPPROTO_ICMPV6) &&
++		nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
++		goto override;
+ 	if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
+ 		goto out_rcu_unlock;
+-
++override:
+ 	sock->ops = answer->ops;
+ 	answer_prot = answer->prot;
+ 	answer_no_check = answer->no_check;
+@@ -261,6 +266,7 @@ int inet6_bind(struct socket *sock, stru
+ 	struct inet_sock *inet = inet_sk(sk);
+ 	struct ipv6_pinfo *np = inet6_sk(sk);
+ 	struct net *net = sock_net(sk);
++	struct nx_v6_sock_addr nsa;
+ 	__be32 v4addr = 0;
+ 	unsigned short snum;
+ 	int addr_type = 0;
+@@ -276,6 +282,10 @@ int inet6_bind(struct socket *sock, stru
+ 	if (addr->sin6_family != AF_INET6)
+ 		return -EAFNOSUPPORT;
+ 
++	err = v6_map_sock_addr(inet, addr, &nsa);
++	if (err)
++		return err;
++
+ 	addr_type = ipv6_addr_type(&addr->sin6_addr);
+ 	if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
+ 		return -EINVAL;
+@@ -307,6 +317,7 @@ int inet6_bind(struct socket *sock, stru
+ 		/* Reproduce AF_INET checks to make the bindings consistent */
+ 		v4addr = addr->sin6_addr.s6_addr32[3];
+ 		chk_addr_ret = inet_addr_type(net, v4addr);
++
+ 		if (!sysctl_ip_nonlocal_bind &&
+ 		    !(inet->freebind || inet->transparent) &&
+ 		    v4addr != htonl(INADDR_ANY) &&
+@@ -316,6 +327,10 @@ int inet6_bind(struct socket *sock, stru
+ 			err = -EADDRNOTAVAIL;
+ 			goto out;
+ 		}
++		if (!v4_addr_in_nx_info(sk->sk_nx_info, v4addr, NXA_MASK_BIND)) {
++			err = -EADDRNOTAVAIL;
++			goto out;
++		}
+ 	} else {
+ 		if (addr_type != IPV6_ADDR_ANY) {
+ 			struct net_device *dev = NULL;
+@@ -342,6 +357,11 @@ int inet6_bind(struct socket *sock, stru
+ 				}
+ 			}
+ 
++			if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
++				err = -EADDRNOTAVAIL;
++				goto out;
++			}
++
+ 			/* ipv4 addr of the socket is invalid.  Only the
+ 			 * unspecified and mapped address have a v4 equivalent.
+ 			 */
+@@ -358,6 +378,9 @@ int inet6_bind(struct socket *sock, stru
+ 		}
+ 	}
+ 
++	/* what's that for? */
++	v6_set_sock_addr(inet, &nsa);
++
+ 	inet->inet_rcv_saddr = v4addr;
+ 	inet->inet_saddr = v4addr;
+ 
+@@ -459,9 +482,11 @@ int inet6_getname(struct socket *sock, s
+ 			return -ENOTCONN;
+ 		sin->sin6_port = inet->inet_dport;
+ 		sin->sin6_addr = np->daddr;
++		/* FIXME: remap lback? */
+ 		if (np->sndflow)
+ 			sin->sin6_flowinfo = np->flow_label;
+ 	} else {
++		/* FIXME: remap lback? */
+ 		if (ipv6_addr_any(&np->rcv_saddr))
+ 			sin->sin6_addr = np->saddr;
+ 		else
+diff -NurpP --minimal linux-3.3.8/net/ipv6/datagram.c linux-3.3.8-vs2.3.3.4/net/ipv6/datagram.c
+--- linux-3.3.8/net/ipv6/datagram.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv6/datagram.c	2012-02-24 03:55:07.000000000 +0100
+@@ -642,7 +642,7 @@ int datagram_send_ctl(struct net *net, s
+ 
+ 			rcu_read_lock();
+ 			if (fl6->flowi6_oif) {
+-				dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
++				dev = dev_get_by_index_real_rcu(net, fl6->flowi6_oif);
+ 				if (!dev) {
+ 					rcu_read_unlock();
+ 					return -ENODEV;
+diff -NurpP --minimal linux-3.3.8/net/ipv6/fib6_rules.c linux-3.3.8-vs2.3.3.4/net/ipv6/fib6_rules.c
+--- linux-3.3.8/net/ipv6/fib6_rules.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv6/fib6_rules.c	2012-02-24 03:55:07.000000000 +0100
+@@ -91,7 +91,7 @@ static int fib6_rule_action(struct fib_r
+ 					       ip6_dst_idev(&rt->dst)->dev,
+ 					       &flp6->daddr,
+ 					       rt6_flags2srcprefs(flags),
+-					       &saddr))
++					       &saddr, NULL))
+ 				goto again;
+ 			if (!ipv6_prefix_equal(&saddr, &r->src.addr,
+ 					       r->src.plen))
+diff -NurpP --minimal linux-3.3.8/net/ipv6/inet6_hashtables.c linux-3.3.8-vs2.3.3.4/net/ipv6/inet6_hashtables.c
+--- linux-3.3.8/net/ipv6/inet6_hashtables.c	2011-10-24 18:45:34.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/net/ipv6/inet6_hashtables.c	2012-02-24 03:55:07.000000000 +0100
+@@ -16,6 +16,7 @@
+ 
+ #include <linux/module.h>
+ #include <linux/random.h>
++#include <linux/vs_inet6.h>
+ 
+ #include <net/inet_connection_sock.h>
+ #include <net/inet_hashtables.h>
+@@ -83,7 +84,6 @@ struct sock *__inet6_lookup_established(
+ 	unsigned int slot = hash & hashinfo->ehash_mask;
+ 	struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+ 
+-
+ 	rcu_read_lock();
+ begin:
+ 	sk_nulls_for_each_rcu(sk, node, &head->chain) {
+@@ -95,7 +95,7 @@ begin:
+ 				sock_put(sk);
+ 				goto begin;
+ 			}
+-		goto out;
++			goto out;
+ 		}
+ 	}
+ 	if (get_nulls_value(node) != slot)
+@@ -141,6 +141,9 @@ static inline int compute_score(struct s
+ 			if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ 				return -1;
+ 			score++;
++		} else {
++			if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
++				return -1;
+ 		}
+ 		if (sk->sk_bound_dev_if) {
+ 			if (sk->sk_bound_dev_if != dif)
+diff -NurpP --minimal linux-3.3.8/net/ipv6/ip6_output.c linux-3.3.8-vs2.3.3.4/net/ipv6/ip6_output.c
+--- linux-3.3.8/net/ipv6/ip6_output.c	2012-06-08 15:23:47.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/net/ipv6/ip6_output.c	2012-04-03 03:02:13.000000000 +0200
+@@ -968,7 +968,8 @@ static int ip6_dst_lookup_tail(struct so
+ 		struct rt6_info *rt = (struct rt6_info *) *dst;
+ 		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
+ 					  sk ? inet6_sk(sk)->srcprefs : 0,
+-					  &fl6->saddr);
++					  &fl6->saddr,
++					  sk ? sk->sk_nx_info : NULL);
+ 		if (err)
+ 			goto out_err_release;
+ 	}
+diff -NurpP --minimal linux-3.3.8/net/ipv6/ndisc.c linux-3.3.8-vs2.3.3.4/net/ipv6/ndisc.c
+--- linux-3.3.8/net/ipv6/ndisc.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv6/ndisc.c	2012-03-19 20:52:10.000000000 +0100
+@@ -575,7 +575,7 @@ static void ndisc_send_na(struct net_dev
+ 	} else {
+ 		if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
+ 				       inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
+-				       &tmpaddr))
++				       &tmpaddr, NULL))
+ 			return;
+ 		src_addr = &tmpaddr;
+ 	}
+diff -NurpP --minimal linux-3.3.8/net/ipv6/raw.c linux-3.3.8-vs2.3.3.4/net/ipv6/raw.c
+--- linux-3.3.8/net/ipv6/raw.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv6/raw.c	2012-02-24 03:55:07.000000000 +0100
+@@ -30,6 +30,7 @@
+ #include <linux/icmpv6.h>
+ #include <linux/netfilter.h>
+ #include <linux/netfilter_ipv6.h>
++#include <linux/vs_inet6.h>
+ #include <linux/skbuff.h>
+ #include <linux/compat.h>
+ #include <asm/uaccess.h>
+@@ -285,6 +286,13 @@ static int rawv6_bind(struct sock *sk, s
+ 				goto out_unlock;
+ 		}
+ 
++		if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
++			err = -EADDRNOTAVAIL;
++			if (dev)
++				dev_put(dev);
++			goto out;
++		}
++
+ 		/* ipv4 addr of the socket is invalid.  Only the
+ 		 * unspecified and mapped address have a v4 equivalent.
+ 		 */
+diff -NurpP --minimal linux-3.3.8/net/ipv6/route.c linux-3.3.8-vs2.3.3.4/net/ipv6/route.c
+--- linux-3.3.8/net/ipv6/route.c	2012-06-08 15:23:47.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/net/ipv6/route.c	2012-04-03 03:02:13.000000000 +0200
+@@ -55,6 +55,7 @@
+ #include <net/xfrm.h>
+ #include <net/netevent.h>
+ #include <net/netlink.h>
++#include <linux/vs_inet6.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -2107,15 +2108,17 @@ int ip6_route_get_saddr(struct net *net,
+ 			struct rt6_info *rt,
+ 			const struct in6_addr *daddr,
+ 			unsigned int prefs,
+-			struct in6_addr *saddr)
++			struct in6_addr *saddr,
++			struct nx_info *nxi)
+ {
+ 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
+ 	int err = 0;
+-	if (rt->rt6i_prefsrc.plen)
++	if (rt->rt6i_prefsrc.plen && (!nxi ||
++	    v6_addr_in_nx_info(nxi, &rt->rt6i_prefsrc.addr, NXA_TYPE_ADDR)))
+ 		*saddr = rt->rt6i_prefsrc.addr;
+ 	else
+ 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
+-					 daddr, prefs, saddr);
++					 daddr, prefs, saddr, nxi);
+ 	return err;
+ }
+ 
+@@ -2446,7 +2449,8 @@ static int rt6_fill_node(struct net *net
+ 			NLA_PUT_U32(skb, RTA_IIF, iif);
+ 	} else if (dst) {
+ 		struct in6_addr saddr_buf;
+-		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
++		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf,
++					(skb->sk ? skb->sk->sk_nx_info : NULL)) == 0)
+ 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
+ 	}
+ 
+@@ -2660,6 +2664,7 @@ static int rt6_info_route(struct rt6_inf
+ 	struct seq_file *m = p_arg;
+ 	struct neighbour *n;
+ 
++	/* FIXME: check for network context? */
+ 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
+ 
+ #ifdef CONFIG_IPV6_SUBTREES
+diff -NurpP --minimal linux-3.3.8/net/ipv6/tcp_ipv6.c linux-3.3.8-vs2.3.3.4/net/ipv6/tcp_ipv6.c
+--- linux-3.3.8/net/ipv6/tcp_ipv6.c	2012-06-08 15:23:47.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/net/ipv6/tcp_ipv6.c	2012-04-30 19:34:38.000000000 +0200
+@@ -71,6 +71,7 @@
+ 
+ #include <linux/crypto.h>
+ #include <linux/scatterlist.h>
++#include <linux/vs_inet6.h>
+ 
+ static void	tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
+ static void	tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+@@ -163,8 +164,15 @@ static int tcp_v6_connect(struct sock *s
+ 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
+ 	 */
+ 
+-	if(ipv6_addr_any(&usin->sin6_addr))
+-		usin->sin6_addr.s6_addr[15] = 0x1;
++	if(ipv6_addr_any(&usin->sin6_addr)) {
++		struct nx_info *nxi =  sk->sk_nx_info;
++
++		if (nxi && nx_info_has_v6(nxi))
++			/* FIXME: remap lback? */
++			usin->sin6_addr = nxi->v6.ip;
++		else
++			usin->sin6_addr.s6_addr[15] = 0x1;
++	}
+ 
+ 	addr_type = ipv6_addr_type(&usin->sin6_addr);
+ 
+diff -NurpP --minimal linux-3.3.8/net/ipv6/udp.c linux-3.3.8-vs2.3.3.4/net/ipv6/udp.c
+--- linux-3.3.8/net/ipv6/udp.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv6/udp.c	2012-02-24 03:55:07.000000000 +0100
+@@ -45,41 +45,67 @@
+ #include <net/tcp_states.h>
+ #include <net/ip6_checksum.h>
+ #include <net/xfrm.h>
++#include <linux/vs_inet6.h>
+ 
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include "udp_impl.h"
+ 
+-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
++int ipv6_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+ {
+-	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
++	const struct in6_addr *sk1_rcv_saddr6 = &inet6_sk(sk1)->rcv_saddr;
+ 	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
+-	__be32 sk1_rcv_saddr = sk_rcv_saddr(sk);
++	__be32 sk1_rcv_saddr = sk_rcv_saddr(sk1);
+ 	__be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
+-	int sk_ipv6only = ipv6_only_sock(sk);
++	int sk1_ipv6only = ipv6_only_sock(sk1);
+ 	int sk2_ipv6only = inet_v6_ipv6only(sk2);
+-	int addr_type = ipv6_addr_type(sk_rcv_saddr6);
++	int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
+ 	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
+ 
+ 	/* if both are mapped, treat as IPv4 */
+-	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
+-		return (!sk2_ipv6only &&
++	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
++		if (!sk2_ipv6only &&
+ 			(!sk1_rcv_saddr || !sk2_rcv_saddr ||
+-			  sk1_rcv_saddr == sk2_rcv_saddr));
++			  sk1_rcv_saddr == sk2_rcv_saddr))
++			goto vs_v4;
++		else
++			return 0;
++	}
+ 
+ 	if (addr_type2 == IPV6_ADDR_ANY &&
+ 	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
+-		return 1;
++		goto vs;
+ 
+ 	if (addr_type == IPV6_ADDR_ANY &&
+-	    !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
+-		return 1;
++	    !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
++		goto vs;
+ 
+ 	if (sk2_rcv_saddr6 &&
+-	    ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
+-		return 1;
++	    ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
++		goto vs;
+ 
+ 	return 0;
++
++vs_v4:
++	if (!sk1_rcv_saddr && !sk2_rcv_saddr)
++		return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
++	if (!sk2_rcv_saddr)
++		return v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, -1);
++	if (!sk1_rcv_saddr)
++		return v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, -1);
++	return 1;
++vs:
++	if (addr_type2 == IPV6_ADDR_ANY && addr_type == IPV6_ADDR_ANY)
++		return nx_v6_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
++	else if (addr_type2 == IPV6_ADDR_ANY)
++		return v6_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr6, -1);
++	else if (addr_type == IPV6_ADDR_ANY) {
++		if (addr_type2 == IPV6_ADDR_MAPPED)
++			return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
++		else
++			return v6_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr6, -1);
++	}
++	return 1;
+ }
+ 
+ static unsigned int udp6_portaddr_hash(struct net *net,
+@@ -143,6 +169,10 @@ static inline int compute_score(struct s
+ 			if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ 				return -1;
+ 			score++;
++		} else {
++			/* block non nx_info ips */
++			if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
++				return -1;
+ 		}
+ 		if (!ipv6_addr_any(&np->daddr)) {
+ 			if (!ipv6_addr_equal(&np->daddr, saddr))
+diff -NurpP --minimal linux-3.3.8/net/ipv6/xfrm6_policy.c linux-3.3.8-vs2.3.3.4/net/ipv6/xfrm6_policy.c
+--- linux-3.3.8/net/ipv6/xfrm6_policy.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/ipv6/xfrm6_policy.c	2012-02-24 03:55:07.000000000 +0100
+@@ -63,7 +63,7 @@ static int xfrm6_get_saddr(struct net *n
+ 	dev = ip6_dst_idev(dst)->dev;
+ 	ipv6_dev_get_saddr(dev_net(dev), dev,
+ 			   (struct in6_addr *)&daddr->a6, 0,
+-			   (struct in6_addr *)&saddr->a6);
++			   (struct in6_addr *)&saddr->a6, NULL);
+ 	dst_release(dst);
+ 	return 0;
+ }
+diff -NurpP --minimal linux-3.3.8/net/netfilter/ipvs/ip_vs_xmit.c linux-3.3.8-vs2.3.3.4/net/netfilter/ipvs/ip_vs_xmit.c
+--- linux-3.3.8/net/netfilter/ipvs/ip_vs_xmit.c	2012-03-19 19:47:33.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/netfilter/ipvs/ip_vs_xmit.c	2012-02-24 03:55:07.000000000 +0100
+@@ -226,7 +226,7 @@ __ip_vs_route_output_v6(struct net *net,
+ 		return dst;
+ 	if (ipv6_addr_any(&fl6.saddr) &&
+ 	    ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
+-			       &fl6.daddr, 0, &fl6.saddr) < 0)
++			       &fl6.daddr, 0, &fl6.saddr, NULL) < 0)
+ 		goto out_err;
+ 	if (do_xfrm) {
+ 		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+diff -NurpP --minimal linux-3.3.8/net/netlink/af_netlink.c linux-3.3.8-vs2.3.3.4/net/netlink/af_netlink.c
+--- linux-3.3.8/net/netlink/af_netlink.c	2012-06-08 15:23:47.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/net/netlink/af_netlink.c	2012-04-30 19:34:38.000000000 +0200
+@@ -55,6 +55,9 @@
+ #include <linux/types.h>
+ #include <linux/audit.h>
+ #include <linux/mutex.h>
++#include <linux/vs_context.h>
++#include <linux/vs_network.h>
++#include <linux/vs_limit.h>
+ 
+ #include <net/net_namespace.h>
+ #include <net/sock.h>
+@@ -1910,6 +1913,8 @@ static struct sock *netlink_seq_socket_i
+ 			sk_for_each(s, node, &hash->table[j]) {
+ 				if (sock_net(s) != seq_file_net(seq))
+ 					continue;
++				if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
++					continue;
+ 				if (off == pos) {
+ 					iter->link = i;
+ 					iter->hash_idx = j;
+@@ -1944,7 +1949,8 @@ static void *netlink_seq_next(struct seq
+ 	s = v;
+ 	do {
+ 		s = sk_next(s);
+-	} while (s && sock_net(s) != seq_file_net(seq));
++	} while (s && (sock_net(s) != seq_file_net(seq) ||
++		!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT)));
+ 	if (s)
+ 		return s;
+ 
+@@ -1956,7 +1962,8 @@ static void *netlink_seq_next(struct seq
+ 
+ 		for (; j <= hash->mask; j++) {
+ 			s = sk_head(&hash->table[j]);
+-			while (s && sock_net(s) != seq_file_net(seq))
++			while (s && (sock_net(s) != seq_file_net(seq) ||
++				!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT)))
+ 				s = sk_next(s);
+ 			if (s) {
+ 				iter->link = i;
+diff -NurpP --minimal linux-3.3.8/net/socket.c linux-3.3.8-vs2.3.3.4/net/socket.c
+--- linux-3.3.8/net/socket.c	2012-06-08 15:23:47.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/net/socket.c	2012-04-30 19:34:38.000000000 +0200
+@@ -98,6 +98,10 @@
+ 
+ #include <net/sock.h>
+ #include <linux/netfilter.h>
++#include <linux/vs_base.h>
++#include <linux/vs_socket.h>
++#include <linux/vs_inet.h>
++#include <linux/vs_inet6.h>
+ 
+ #include <linux/if_tun.h>
+ #include <linux/ipv6_route.h>
+@@ -548,6 +552,7 @@ static inline int __sock_sendmsg_nosec(s
+ 				       struct msghdr *msg, size_t size)
+ {
+ 	struct sock_iocb *si = kiocb_to_siocb(iocb);
++	size_t len;
+ 
+ 	sock_update_classid(sock->sk);
+ 
+@@ -558,7 +563,22 @@ static inline int __sock_sendmsg_nosec(s
+ 	si->msg = msg;
+ 	si->size = size;
+ 
+-	return sock->ops->sendmsg(iocb, sock, msg, size);
++	len = sock->ops->sendmsg(iocb, sock, msg, size);
++	if (sock->sk) {
++		if (len == size)
++			vx_sock_send(sock->sk, size);
++		else
++			vx_sock_fail(sock->sk, size);
++	}
++	vxdprintk(VXD_CBIT(net, 7),
++		"__sock_sendmsg: %p[%p,%p,%p;%d/%d]:%d/%zu",
++		sock, sock->sk,
++		(sock->sk)?sock->sk->sk_nx_info:0,
++		(sock->sk)?sock->sk->sk_vx_info:0,
++		(sock->sk)?sock->sk->sk_xid:0,
++		(sock->sk)?sock->sk->sk_nid:0,
++		(unsigned int)size, len);
++	return len;
+ }
+ 
+ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+@@ -714,6 +734,7 @@ static inline int __sock_recvmsg_nosec(s
+ 				       struct msghdr *msg, size_t size, int flags)
+ {
+ 	struct sock_iocb *si = kiocb_to_siocb(iocb);
++	int len;
+ 
+ 	sock_update_classid(sock->sk);
+ 
+@@ -723,7 +744,18 @@ static inline int __sock_recvmsg_nosec(s
+ 	si->size = size;
+ 	si->flags = flags;
+ 
+-	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
++	len = sock->ops->recvmsg(iocb, sock, msg, size, flags);
++	if ((len >= 0) && sock->sk)
++		vx_sock_recv(sock->sk, len);
++	vxdprintk(VXD_CBIT(net, 7),
++		"__sock_recvmsg: %p[%p,%p,%p;%d/%d]:%d/%d",
++		sock, sock->sk,
++		(sock->sk)?sock->sk->sk_nx_info:0,
++		(sock->sk)?sock->sk->sk_vx_info:0,
++		(sock->sk)?sock->sk->sk_xid:0,
++		(sock->sk)?sock->sk->sk_nid:0,
++		(unsigned int)size, len);
++	return len;
+ }
+ 
+ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+@@ -1208,6 +1240,13 @@ int __sock_create(struct net *net, int f
+ 	if (type < 0 || type >= SOCK_MAX)
+ 		return -EINVAL;
+ 
++	if (!nx_check(0, VS_ADMIN)) {
++		if (family == PF_INET && !current_nx_info_has_v4())
++			return -EAFNOSUPPORT;
++		if (family == PF_INET6 && !current_nx_info_has_v6())
++			return -EAFNOSUPPORT;
++	}
++
+ 	/* Compatibility.
+ 
+ 	   This uglymoron is moved from INET layer to here to avoid
+@@ -1343,6 +1382,7 @@ SYSCALL_DEFINE3(socket, int, family, int
+ 	if (retval < 0)
+ 		goto out;
+ 
++	set_bit(SOCK_USER_SOCKET, &sock->flags);
+ 	retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
+ 	if (retval < 0)
+ 		goto out_release;
+@@ -1384,10 +1424,12 @@ SYSCALL_DEFINE4(socketpair, int, family,
+ 	err = sock_create(family, type, protocol, &sock1);
+ 	if (err < 0)
+ 		goto out;
++	set_bit(SOCK_USER_SOCKET, &sock1->flags);
+ 
+ 	err = sock_create(family, type, protocol, &sock2);
+ 	if (err < 0)
+ 		goto out_release_1;
++	set_bit(SOCK_USER_SOCKET, &sock2->flags);
+ 
+ 	err = sock1->ops->socketpair(sock1, sock2);
+ 	if (err < 0)
+diff -NurpP --minimal linux-3.3.8/net/sunrpc/auth.c linux-3.3.8-vs2.3.3.4/net/sunrpc/auth.c
+--- linux-3.3.8/net/sunrpc/auth.c	2011-10-24 18:45:34.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/net/sunrpc/auth.c	2012-02-24 03:55:07.000000000 +0100
+@@ -14,6 +14,7 @@
+ #include <linux/hash.h>
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/spinlock.h>
++#include <linux/vs_tag.h>
+ 
+ #ifdef RPC_DEBUG
+ # define RPCDBG_FACILITY	RPCDBG_AUTH
+@@ -427,6 +428,7 @@ rpcauth_lookupcred(struct rpc_auth *auth
+ 	memset(&acred, 0, sizeof(acred));
+ 	acred.uid = cred->fsuid;
+ 	acred.gid = cred->fsgid;
++	acred.tag = dx_current_tag();
+ 	acred.group_info = get_group_info(((struct cred *)cred)->group_info);
+ 
+ 	ret = auth->au_ops->lookup_cred(auth, &acred, flags);
+@@ -467,6 +469,7 @@ rpcauth_bind_root_cred(struct rpc_task *
+ 	struct auth_cred acred = {
+ 		.uid = 0,
+ 		.gid = 0,
++		.tag = dx_current_tag(),
+ 	};
+ 
+ 	dprintk("RPC: %5u looking up %s cred\n",
+diff -NurpP --minimal linux-3.3.8/net/sunrpc/auth_unix.c linux-3.3.8-vs2.3.3.4/net/sunrpc/auth_unix.c
+--- linux-3.3.8/net/sunrpc/auth_unix.c	2012-01-09 16:15:04.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/sunrpc/auth_unix.c	2012-02-24 03:55:07.000000000 +0100
+@@ -12,12 +12,14 @@
+ #include <linux/module.h>
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/sunrpc/auth.h>
++#include <linux/vs_tag.h>
+ 
+ #define NFS_NGROUPS	16
+ 
+ struct unx_cred {
+ 	struct rpc_cred		uc_base;
+ 	gid_t			uc_gid;
++	tag_t			uc_tag;
+ 	gid_t			uc_gids[NFS_NGROUPS];
+ };
+ #define uc_uid			uc_base.cr_uid
+@@ -78,6 +80,7 @@ unx_create_cred(struct rpc_auth *auth, s
+ 		groups = NFS_NGROUPS;
+ 
+ 	cred->uc_gid = acred->gid;
++	cred->uc_tag = acred->tag;
+ 	for (i = 0; i < groups; i++)
+ 		cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
+ 	if (i < NFS_NGROUPS)
+@@ -119,7 +122,9 @@ unx_match(struct auth_cred *acred, struc
+ 	unsigned int i;
+ 
+ 
+-	if (cred->uc_uid != acred->uid || cred->uc_gid != acred->gid)
++	if (cred->uc_uid != acred->uid ||
++		cred->uc_gid != acred->gid ||
++		cred->uc_tag != acred->tag)
+ 		return 0;
+ 
+ 	if (acred->group_info != NULL)
+@@ -145,7 +150,7 @@ unx_marshal(struct rpc_task *task, __be3
+ 	struct rpc_clnt	*clnt = task->tk_client;
+ 	struct unx_cred	*cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
+ 	__be32		*base, *hold;
+-	int		i;
++	int		i, tag;
+ 
+ 	*p++ = htonl(RPC_AUTH_UNIX);
+ 	base = p++;
+@@ -155,9 +160,12 @@ unx_marshal(struct rpc_task *task, __be3
+ 	 * Copy the UTS nodename captured when the client was created.
+ 	 */
+ 	p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
++	tag = task->tk_client->cl_tag;
+ 
+-	*p++ = htonl((u32) cred->uc_uid);
+-	*p++ = htonl((u32) cred->uc_gid);
++	*p++ = htonl((u32) TAGINO_UID(tag,
++		cred->uc_uid, cred->uc_tag));
++	*p++ = htonl((u32) TAGINO_GID(tag,
++		cred->uc_gid, cred->uc_tag));
+ 	hold = p++;
+ 	for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
+ 		*p++ = htonl((u32) cred->uc_gids[i]);
+diff -NurpP --minimal linux-3.3.8/net/sunrpc/clnt.c linux-3.3.8-vs2.3.3.4/net/sunrpc/clnt.c
+--- linux-3.3.8/net/sunrpc/clnt.c	2012-01-09 16:15:04.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/sunrpc/clnt.c	2012-02-24 03:55:07.000000000 +0100
+@@ -31,6 +31,7 @@
+ #include <linux/in.h>
+ #include <linux/in6.h>
+ #include <linux/un.h>
++#include <linux/vs_cvirt.h>
+ 
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/sunrpc/rpc_pipe_fs.h>
+@@ -361,6 +362,9 @@ struct rpc_clnt *rpc_create(struct rpc_c
+ 	if (!(args->flags & RPC_CLNT_CREATE_QUIET))
+ 		clnt->cl_chatty = 1;
+ 
++	/* TODO: handle RPC_CLNT_CREATE_TAGGED
++	if (args->flags & RPC_CLNT_CREATE_TAGGED)
++		clnt->cl_tag = 1; */
+ 	return clnt;
+ }
+ EXPORT_SYMBOL_GPL(rpc_create);
+diff -NurpP --minimal linux-3.3.8/net/unix/af_unix.c linux-3.3.8-vs2.3.3.4/net/unix/af_unix.c
+--- linux-3.3.8/net/unix/af_unix.c	2012-03-19 19:47:34.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/net/unix/af_unix.c	2012-02-24 03:55:07.000000000 +0100
+@@ -114,6 +114,8 @@
+ #include <linux/mount.h>
+ #include <net/checksum.h>
+ #include <linux/security.h>
++#include <linux/vs_context.h>
++#include <linux/vs_limit.h>
+ 
+ struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
+ EXPORT_SYMBOL_GPL(unix_socket_table);
+@@ -261,6 +263,8 @@ static struct sock *__unix_find_socket_b
+ 		if (!net_eq(sock_net(s), net))
+ 			continue;
+ 
++		if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
++			continue;
+ 		if (u->addr->len == len &&
+ 		    !memcmp(u->addr->name, sunname, len))
+ 			goto found;
+@@ -2235,6 +2239,8 @@ static struct sock *unix_seq_idx(struct 
+ 	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
+ 		if (sock_net(s) != seq_file_net(seq))
+ 			continue;
++		if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
++			continue;
+ 		if (off == pos)
+ 			return s;
+ 		++off;
+@@ -2259,7 +2265,8 @@ static void *unix_seq_next(struct seq_fi
+ 		sk = first_unix_socket(&iter->i);
+ 	else
+ 		sk = next_unix_socket(&iter->i, sk);
+-	while (sk && (sock_net(sk) != seq_file_net(seq)))
++	while (sk && (sock_net(sk) != seq_file_net(seq) ||
++		!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)))
+ 		sk = next_unix_socket(&iter->i, sk);
+ 	return sk;
+ }
+diff -NurpP --minimal linux-3.3.8/scripts/checksyscalls.sh linux-3.3.8-vs2.3.3.4/scripts/checksyscalls.sh
+--- linux-3.3.8/scripts/checksyscalls.sh	2012-03-19 19:47:34.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/scripts/checksyscalls.sh	2012-02-24 03:55:07.000000000 +0100
+@@ -193,7 +193,6 @@ cat << EOF
+ #define __IGNORE_afs_syscall
+ #define __IGNORE_getpmsg
+ #define __IGNORE_putpmsg
+-#define __IGNORE_vserver
+ EOF
+ }
+ 
+diff -NurpP --minimal linux-3.3.8/security/commoncap.c linux-3.3.8-vs2.3.3.4/security/commoncap.c
+--- linux-3.3.8/security/commoncap.c	2012-06-08 15:23:47.000000000 +0200
++++ linux-3.3.8-vs2.3.3.4/security/commoncap.c	2012-04-23 23:45:14.000000000 +0200
+@@ -75,14 +75,20 @@ int cap_netlink_send(struct sock *sk, st
+ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
+ 		int cap, int audit)
+ {
++	struct vx_info *vxi = current_vx_info(); /* FIXME: get vxi from cred? */
++
+ 	for (;;) {
+ 		/* The creator of the user namespace has all caps. */
+ 		if (targ_ns != &init_user_ns && targ_ns->creator == cred->user)
+ 			return 0;
+ 
+ 		/* Do we have the necessary capabilities? */
+-		if (targ_ns == cred->user->user_ns)
+-			return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
++		if (targ_ns == cred->user->user_ns) {
++			if (vx_info_flags(vxi, VXF_STATE_SETUP, 0) &&
++			    cap_raised(cred->cap_effective, cap))
++				return 0;
++			return vx_cap_raised(vxi, cred->cap_effective, cap) ? 0 : -EPERM;
++		}
+ 
+ 		/* Have we tried all of the parent namespaces? */
+ 		if (targ_ns == &init_user_ns)
+@@ -611,7 +617,7 @@ int cap_inode_setxattr(struct dentry *de
+ 
+ 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
+ 		     sizeof(XATTR_SECURITY_PREFIX) - 1) &&
+-	    !capable(CAP_SYS_ADMIN))
++		!vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
+ 		return -EPERM;
+ 	return 0;
+ }
+@@ -637,7 +643,7 @@ int cap_inode_removexattr(struct dentry 
+ 
+ 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
+ 		     sizeof(XATTR_SECURITY_PREFIX) - 1) &&
+-	    !capable(CAP_SYS_ADMIN))
++		!vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
+ 		return -EPERM;
+ 	return 0;
+ }
+diff -NurpP --minimal linux-3.3.8/security/selinux/hooks.c linux-3.3.8-vs2.3.3.4/security/selinux/hooks.c
+--- linux-3.3.8/security/selinux/hooks.c	2012-03-19 19:47:34.000000000 +0100
++++ linux-3.3.8-vs2.3.3.4/security/selinux/hooks.c	2012-02-24 03:55:07.000000000 +0100
+@@ -67,7 +67,6 @@
+ #include <linux/dccp.h>
+ #include <linux/quota.h>
+ #include <linux/un.h>		/* for Unix socket types */
+-#include <net/af_unix.h>	/* for Unix socket types */
+ #include <linux/parser.h>
+ #include <linux/nfs_mount.h>
+ #include <net/ipv6.h>
diff --git a/3.3.8/wrapfs-v3.3-rc1-429-g65388bc.patch b/3.3.8/wrapfs-v3.3-rc1-429-g65388bc.patch
new file mode 100644
index 0000000..0c489fa
--- /dev/null
+++ b/3.3.8/wrapfs-v3.3-rc1-429-g65388bc.patch
@@ -0,0 +1,1913 @@
+diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
+index 8c624a1..b8822ed 100644
+--- a/Documentation/filesystems/00-INDEX
++++ b/Documentation/filesystems/00-INDEX
+@@ -114,6 +114,8 @@ vfat.txt
+ 	- info on using the VFAT filesystem used in Windows NT and Windows 95
+ vfs.txt
+ 	- overview of the Virtual File System
++wrapfs.txt
++	- info and mount options for the stackable wrapper file system
+ xfs.txt
+ 	- info and mount options for the XFS filesystem.
+ xip.txt
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 1b6e835..bdaad92 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -7395,6 +7395,16 @@ F:	include/linux/workqueue.h
+ F:	kernel/workqueue.c
+ F:	Documentation/workqueue.txt
+ 
++WRAPFS
++P:	Erez Zadok
++M:	ezk@cs.sunysb.edu
++L:	wrapfs@filesystems.org
++W:	http://wrapfs.filesystems.org/
++T:	git git.kernel.org/pub/scm/linux/kernel/git/ezk/wrapfs.git
++S:	Maintained
++F:	Documentation/filesystems/wrapfs.txt
++F:	fs/wrapfs/
++
+ X.25 NETWORK LAYER
+ M:	Andrew Hendry <andrew.hendry@gmail.com>
+ L:	linux-x25@vger.kernel.org
+diff --git a/fs/Kconfig b/fs/Kconfig
+index d621f02..6407aa4 100644
+--- a/fs/Kconfig
++++ b/fs/Kconfig
+@@ -194,6 +194,7 @@ if MISC_FILESYSTEMS
+ source "fs/adfs/Kconfig"
+ source "fs/affs/Kconfig"
+ source "fs/ecryptfs/Kconfig"
++source "fs/wrapfs/Kconfig"
+ source "fs/hfs/Kconfig"
+ source "fs/hfsplus/Kconfig"
+ source "fs/befs/Kconfig"
+diff --git a/fs/Makefile b/fs/Makefile
+index 93804d4..a68e75a 100644
+--- a/fs/Makefile
++++ b/fs/Makefile
+@@ -83,6 +83,7 @@ obj-$(CONFIG_ISO9660_FS)	+= isofs/
+ obj-$(CONFIG_HFSPLUS_FS)	+= hfsplus/ # Before hfs to find wrapped HFS+
+ obj-$(CONFIG_HFS_FS)		+= hfs/
+ obj-$(CONFIG_ECRYPT_FS)		+= ecryptfs/
++obj-$(CONFIG_WRAP_FS)		+= wrapfs/
+ obj-$(CONFIG_VXFS_FS)		+= freevxfs/
+ obj-$(CONFIG_NFS_FS)		+= nfs/
+ obj-$(CONFIG_EXPORTFS)		+= exportfs/
+diff --git a/fs/wrapfs/Kconfig b/fs/wrapfs/Kconfig
+new file mode 100644
+index 0000000..d790ccd
+--- /dev/null
++++ b/fs/wrapfs/Kconfig
+@@ -0,0 +1,9 @@
++config WRAP_FS
++	tristate "Wrapfs stackable file system (EXPERIMENTAL)"
++	depends on EXPERIMENTAL
++	help
++	  Wrapfs is a stackable file system which simply passes its
++	  operations to the lower layer.  It is designed as a useful
++	  template for developing or debugging other stackable file systems,
++	  and more (see Documentation/filesystems/wrapfs.txt).  See
++	  <http://wrapfs.filesystems.org/> for details.
+diff --git a/fs/wrapfs/Makefile b/fs/wrapfs/Makefile
+new file mode 100644
+index 0000000..f318d11
+--- /dev/null
++++ b/fs/wrapfs/Makefile
+@@ -0,0 +1,7 @@
++WRAPFS_VERSION="0.1"
++
++EXTRA_CFLAGS += -DWRAPFS_VERSION=\"$(WRAPFS_VERSION)\"
++
++obj-$(CONFIG_WRAP_FS) += wrapfs.o
++
++wrapfs-y := dentry.o file.o inode.o main.o super.o lookup.o mmap.o
+diff --git a/fs/wrapfs/dentry.c b/fs/wrapfs/dentry.c
+new file mode 100644
+index 0000000..b173153
+--- /dev/null
++++ b/fs/wrapfs/dentry.c
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "wrapfs.h"
++
++/*
++ * returns: -ERRNO if error (returned to user)
++ *          0: tell VFS to invalidate dentry
++ *          1: dentry is valid
++ */
++static int wrapfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
++{
++	struct path lower_path, saved_path;
++	struct dentry *lower_dentry;
++	int err = 1;
++
++	if (nd && nd->flags & LOOKUP_RCU)
++		return -ECHILD;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
++		goto out;
++	pathcpy(&saved_path, &nd->path);
++	pathcpy(&nd->path, &lower_path);
++	err = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
++	pathcpy(&nd->path, &saved_path);
++out:
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++static void wrapfs_d_release(struct dentry *dentry)
++{
++	/* release and reset the lower paths */
++	wrapfs_put_reset_lower_path(dentry);
++	free_dentry_private_data(dentry);
++	return;
++}
++
++const struct dentry_operations wrapfs_dops = {
++	.d_revalidate	= wrapfs_d_revalidate,
++	.d_release	= wrapfs_d_release,
++};
+diff --git a/fs/wrapfs/file.c b/fs/wrapfs/file.c
+new file mode 100644
+index 0000000..7a7fe1e
+--- /dev/null
++++ b/fs/wrapfs/file.c
+@@ -0,0 +1,298 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "wrapfs.h"
++
++static ssize_t wrapfs_read(struct file *file, char __user *buf,
++			   size_t count, loff_t *ppos)
++{
++	int err;
++	struct file *lower_file;
++	struct dentry *dentry = file->f_path.dentry;
++
++	lower_file = wrapfs_lower_file(file);
++	err = vfs_read(lower_file, buf, count, ppos);
++	/* update our inode atime upon a successful lower read */
++	if (err >= 0)
++		fsstack_copy_attr_atime(dentry->d_inode,
++					lower_file->f_path.dentry->d_inode);
++
++	return err;
++}
++
++static ssize_t wrapfs_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *ppos)
++{
++	int err = 0;
++	struct file *lower_file;
++	struct dentry *dentry = file->f_path.dentry;
++
++	lower_file = wrapfs_lower_file(file);
++	err = vfs_write(lower_file, buf, count, ppos);
++	/* update our inode times+sizes upon a successful lower write */
++	if (err >= 0) {
++		fsstack_copy_inode_size(dentry->d_inode,
++					lower_file->f_path.dentry->d_inode);
++		fsstack_copy_attr_times(dentry->d_inode,
++					lower_file->f_path.dentry->d_inode);
++	}
++
++	return err;
++}
++
++static int wrapfs_readdir(struct file *file, void *dirent, filldir_t filldir)
++{
++	int err = 0;
++	struct file *lower_file = NULL;
++	struct dentry *dentry = file->f_path.dentry;
++
++	lower_file = wrapfs_lower_file(file);
++	err = vfs_readdir(lower_file, filldir, dirent);
++	file->f_pos = lower_file->f_pos;
++	if (err >= 0)		/* copy the atime */
++		fsstack_copy_attr_atime(dentry->d_inode,
++					lower_file->f_path.dentry->d_inode);
++	return err;
++}
++
++static long wrapfs_unlocked_ioctl(struct file *file, unsigned int cmd,
++				  unsigned long arg)
++{
++	long err = -ENOTTY;
++	struct file *lower_file;
++
++	lower_file = wrapfs_lower_file(file);
++
++	/* XXX: use vfs_ioctl if/when VFS exports it */
++	if (!lower_file || !lower_file->f_op)
++		goto out;
++	if (lower_file->f_op->unlocked_ioctl)
++		err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
++
++out:
++	return err;
++}
++
++#ifdef CONFIG_COMPAT
++static long wrapfs_compat_ioctl(struct file *file, unsigned int cmd,
++				unsigned long arg)
++{
++	long err = -ENOTTY;
++	struct file *lower_file;
++
++	lower_file = wrapfs_lower_file(file);
++
++	/* XXX: use vfs_ioctl if/when VFS exports it */
++	if (!lower_file || !lower_file->f_op)
++		goto out;
++	if (lower_file->f_op->compat_ioctl)
++		err = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
++
++out:
++	return err;
++}
++#endif
++
++static int wrapfs_mmap(struct file *file, struct vm_area_struct *vma)
++{
++	int err = 0;
++	bool willwrite;
++	struct file *lower_file;
++	const struct vm_operations_struct *saved_vm_ops = NULL;
++
++	/* this might be deferred to mmap's writepage */
++	willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
++
++	/*
++	 * File systems which do not implement ->writepage may use
++	 * generic_file_readonly_mmap as their ->mmap op.  If you call
++	 * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
++	 * But we cannot call the lower ->mmap op, so we can't tell that
++	 * writeable mappings won't work.  Therefore, our only choice is to
++	 * check if the lower file system supports the ->writepage, and if
++	 * not, return EINVAL (the same error that
++	 * generic_file_readonly_mmap returns in that case).
++	 */
++	lower_file = wrapfs_lower_file(file);
++	if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
++		err = -EINVAL;
++		printk(KERN_ERR "wrapfs: lower file system does not "
++		       "support writeable mmap\n");
++		goto out;
++	}
++
++	/*
++	 * find and save lower vm_ops.
++	 *
++	 * XXX: the VFS should have a cleaner way of finding the lower vm_ops
++	 */
++	if (!WRAPFS_F(file)->lower_vm_ops) {
++		err = lower_file->f_op->mmap(lower_file, vma);
++		if (err) {
++			printk(KERN_ERR "wrapfs: lower mmap failed %d\n", err);
++			goto out;
++		}
++		saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */
++		err = do_munmap(current->mm, vma->vm_start,
++				vma->vm_end - vma->vm_start);
++		if (err) {
++			printk(KERN_ERR "wrapfs: do_munmap failed %d\n", err);
++			goto out;
++		}
++	}
++
++	/*
++	 * Next 3 lines are all I need from generic_file_mmap.  I definitely
++	 * don't want its test for ->readpage which returns -ENOEXEC.
++	 */
++	file_accessed(file);
++	vma->vm_ops = &wrapfs_vm_ops;
++	vma->vm_flags |= VM_CAN_NONLINEAR;
++
++	file->f_mapping->a_ops = &wrapfs_aops; /* set our aops */
++	if (!WRAPFS_F(file)->lower_vm_ops) /* save for our ->fault */
++		WRAPFS_F(file)->lower_vm_ops = saved_vm_ops;
++
++out:
++	return err;
++}
++
++static int wrapfs_open(struct inode *inode, struct file *file)
++{
++	int err = 0;
++	struct file *lower_file = NULL;
++	struct path lower_path;
++
++	/* don't open unhashed/deleted files */
++	if (d_unhashed(file->f_path.dentry)) {
++		err = -ENOENT;
++		goto out_err;
++	}
++
++	file->private_data =
++		kzalloc(sizeof(struct wrapfs_file_info), GFP_KERNEL);
++	if (!WRAPFS_F(file)) {
++		err = -ENOMEM;
++		goto out_err;
++	}
++
++	/* open lower object and link wrapfs's file struct to lower's */
++	wrapfs_get_lower_path(file->f_path.dentry, &lower_path);
++	lower_file = dentry_open(lower_path.dentry, lower_path.mnt,
++				 file->f_flags, current_cred());
++	if (IS_ERR(lower_file)) {
++		err = PTR_ERR(lower_file);
++		lower_file = wrapfs_lower_file(file);
++		if (lower_file) {
++			wrapfs_set_lower_file(file, NULL);
++			fput(lower_file); /* fput calls dput for lower_dentry */
++		}
++	} else {
++		wrapfs_set_lower_file(file, lower_file);
++	}
++
++	if (err)
++		kfree(WRAPFS_F(file));
++	else
++		fsstack_copy_attr_all(inode, wrapfs_lower_inode(inode));
++out_err:
++	return err;
++}
++
++static int wrapfs_flush(struct file *file, fl_owner_t id)
++{
++	int err = 0;
++	struct file *lower_file = NULL;
++
++	lower_file = wrapfs_lower_file(file);
++	if (lower_file && lower_file->f_op && lower_file->f_op->flush)
++		err = lower_file->f_op->flush(lower_file, id);
++
++	return err;
++}
++
++/* release all lower object references & free the file info structure */
++static int wrapfs_file_release(struct inode *inode, struct file *file)
++{
++	struct file *lower_file;
++
++	lower_file = wrapfs_lower_file(file);
++	if (lower_file) {
++		wrapfs_set_lower_file(file, NULL);
++		fput(lower_file);
++	}
++
++	kfree(WRAPFS_F(file));
++	return 0;
++}
++
++static int wrapfs_fsync(struct file *file, loff_t start, loff_t end,
++			int datasync)
++{
++	int err;
++	struct file *lower_file;
++	struct path lower_path;
++	struct dentry *dentry = file->f_path.dentry;
++
++	err = generic_file_fsync(file, start, end, datasync);
++	if (err)
++		goto out;
++	lower_file = wrapfs_lower_file(file);
++	wrapfs_get_lower_path(dentry, &lower_path);
++	err = vfs_fsync_range(lower_file, start, end, datasync);
++	wrapfs_put_lower_path(dentry, &lower_path);
++out:
++	return err;
++}
++
++static int wrapfs_fasync(int fd, struct file *file, int flag)
++{
++	int err = 0;
++	struct file *lower_file = NULL;
++
++	lower_file = wrapfs_lower_file(file);
++	if (lower_file->f_op && lower_file->f_op->fasync)
++		err = lower_file->f_op->fasync(fd, lower_file, flag);
++
++	return err;
++}
++
++const struct file_operations wrapfs_main_fops = {
++	.llseek		= generic_file_llseek,
++	.read		= wrapfs_read,
++	.write		= wrapfs_write,
++	.unlocked_ioctl	= wrapfs_unlocked_ioctl,
++#ifdef CONFIG_COMPAT
++	.compat_ioctl	= wrapfs_compat_ioctl,
++#endif
++	.mmap		= wrapfs_mmap,
++	.open		= wrapfs_open,
++	.flush		= wrapfs_flush,
++	.release	= wrapfs_file_release,
++	.fsync		= wrapfs_fsync,
++	.fasync		= wrapfs_fasync,
++};
++
++/* trimmed directory options */
++const struct file_operations wrapfs_dir_fops = {
++	.llseek		= generic_file_llseek,
++	.read		= generic_read_dir,
++	.readdir	= wrapfs_readdir,
++	.unlocked_ioctl	= wrapfs_unlocked_ioctl,
++#ifdef CONFIG_COMPAT
++	.compat_ioctl	= wrapfs_compat_ioctl,
++#endif
++	.open		= wrapfs_open,
++	.release	= wrapfs_file_release,
++	.flush		= wrapfs_flush,
++	.fsync		= wrapfs_fsync,
++	.fasync		= wrapfs_fasync,
++};
+diff --git a/fs/wrapfs/inode.c b/fs/wrapfs/inode.c
+new file mode 100644
+index 0000000..b2653b3
+--- /dev/null
++++ b/fs/wrapfs/inode.c
+@@ -0,0 +1,514 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "wrapfs.h"
++
++static int wrapfs_create(struct inode *dir, struct dentry *dentry,
++			 umode_t mode, struct nameidata *nd)
++{
++	int err = 0;
++	struct dentry *lower_dentry;
++	struct dentry *lower_parent_dentry = NULL;
++	struct path lower_path, saved_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	lower_parent_dentry = lock_parent(lower_dentry);
++
++	err = mnt_want_write(lower_path.mnt);
++	if (err)
++		goto out_unlock;
++
++	pathcpy(&saved_path, &nd->path);
++	pathcpy(&nd->path, &lower_path);
++	err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode, nd);
++	pathcpy(&nd->path, &saved_path);
++	if (err)
++		goto out;
++
++	err = wrapfs_interpose(dentry, dir->i_sb, &lower_path);
++	if (err)
++		goto out;
++	fsstack_copy_attr_times(dir, wrapfs_lower_inode(dir));
++	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
++
++out:
++	mnt_drop_write(lower_path.mnt);
++out_unlock:
++	unlock_dir(lower_parent_dentry);
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++static int wrapfs_link(struct dentry *old_dentry, struct inode *dir,
++		       struct dentry *new_dentry)
++{
++	struct dentry *lower_old_dentry;
++	struct dentry *lower_new_dentry;
++	struct dentry *lower_dir_dentry;
++	u64 file_size_save;
++	int err;
++	struct path lower_old_path, lower_new_path;
++
++	file_size_save = i_size_read(old_dentry->d_inode);
++	wrapfs_get_lower_path(old_dentry, &lower_old_path);
++	wrapfs_get_lower_path(new_dentry, &lower_new_path);
++	lower_old_dentry = lower_old_path.dentry;
++	lower_new_dentry = lower_new_path.dentry;
++	lower_dir_dentry = lock_parent(lower_new_dentry);
++
++	err = mnt_want_write(lower_new_path.mnt);
++	if (err)
++		goto out_unlock;
++
++	err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
++		       lower_new_dentry);
++	if (err || !lower_new_dentry->d_inode)
++		goto out;
++
++	err = wrapfs_interpose(new_dentry, dir->i_sb, &lower_new_path);
++	if (err)
++		goto out;
++	fsstack_copy_attr_times(dir, lower_new_dentry->d_inode);
++	fsstack_copy_inode_size(dir, lower_new_dentry->d_inode);
++	set_nlink(old_dentry->d_inode,
++		  wrapfs_lower_inode(old_dentry->d_inode)->i_nlink);
++	i_size_write(new_dentry->d_inode, file_size_save);
++out:
++	mnt_drop_write(lower_new_path.mnt);
++out_unlock:
++	unlock_dir(lower_dir_dentry);
++	wrapfs_put_lower_path(old_dentry, &lower_old_path);
++	wrapfs_put_lower_path(new_dentry, &lower_new_path);
++	return err;
++}
++
++static int wrapfs_unlink(struct inode *dir, struct dentry *dentry)
++{
++	int err;
++	struct dentry *lower_dentry;
++	struct inode *lower_dir_inode = wrapfs_lower_inode(dir);
++	struct dentry *lower_dir_dentry;
++	struct path lower_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	dget(lower_dentry);
++	lower_dir_dentry = lock_parent(lower_dentry);
++
++	err = mnt_want_write(lower_path.mnt);
++	if (err)
++		goto out_unlock;
++	err = vfs_unlink(lower_dir_inode, lower_dentry);
++
++	/*
++	 * Note: unlinking on top of NFS can cause silly-renamed files.
++	 * Trying to delete such files results in EBUSY from NFS
++	 * below.  Silly-renamed files will get deleted by NFS later on, so
++	 * we just need to detect them here and treat such EBUSY errors as
++	 * if the upper file was successfully deleted.
++	 */
++	if (err == -EBUSY && lower_dentry->d_flags & DCACHE_NFSFS_RENAMED)
++		err = 0;
++	if (err)
++		goto out;
++	fsstack_copy_attr_times(dir, lower_dir_inode);
++	fsstack_copy_inode_size(dir, lower_dir_inode);
++	set_nlink(dentry->d_inode,
++		  wrapfs_lower_inode(dentry->d_inode)->i_nlink);
++	dentry->d_inode->i_ctime = dir->i_ctime;
++	d_drop(dentry); /* this is needed, else LTP fails (VFS won't do it) */
++out:
++	mnt_drop_write(lower_path.mnt);
++out_unlock:
++	unlock_dir(lower_dir_dentry);
++	dput(lower_dentry);
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++static int wrapfs_symlink(struct inode *dir, struct dentry *dentry,
++			  const char *symname)
++{
++	int err = 0;
++	struct dentry *lower_dentry;
++	struct dentry *lower_parent_dentry = NULL;
++	struct path lower_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	lower_parent_dentry = lock_parent(lower_dentry);
++
++	err = mnt_want_write(lower_path.mnt);
++	if (err)
++		goto out_unlock;
++	err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry, symname);
++	if (err)
++		goto out;
++	err = wrapfs_interpose(dentry, dir->i_sb, &lower_path);
++	if (err)
++		goto out;
++	fsstack_copy_attr_times(dir, wrapfs_lower_inode(dir));
++	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
++
++out:
++	mnt_drop_write(lower_path.mnt);
++out_unlock:
++	unlock_dir(lower_parent_dentry);
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++static int wrapfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
++{
++	int err = 0;
++	struct dentry *lower_dentry;
++	struct dentry *lower_parent_dentry = NULL;
++	struct path lower_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	lower_parent_dentry = lock_parent(lower_dentry);
++
++	err = mnt_want_write(lower_path.mnt);
++	if (err)
++		goto out_unlock;
++	err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry, mode);
++	if (err)
++		goto out;
++
++	err = wrapfs_interpose(dentry, dir->i_sb, &lower_path);
++	if (err)
++		goto out;
++
++	fsstack_copy_attr_times(dir, wrapfs_lower_inode(dir));
++	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
++	/* update number of links on parent directory */
++	set_nlink(dir, wrapfs_lower_inode(dir)->i_nlink);
++
++out:
++	mnt_drop_write(lower_path.mnt);
++out_unlock:
++	unlock_dir(lower_parent_dentry);
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++static int wrapfs_rmdir(struct inode *dir, struct dentry *dentry)
++{
++	struct dentry *lower_dentry;
++	struct dentry *lower_dir_dentry;
++	int err;
++	struct path lower_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	lower_dir_dentry = lock_parent(lower_dentry);
++
++	err = mnt_want_write(lower_path.mnt);
++	if (err)
++		goto out_unlock;
++	err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
++	if (err)
++		goto out;
++
++	d_drop(dentry);	/* drop our dentry on success (why not VFS's job?) */
++	if (dentry->d_inode)
++		clear_nlink(dentry->d_inode);
++	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
++	fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
++	set_nlink(dir, lower_dir_dentry->d_inode->i_nlink);
++
++out:
++	mnt_drop_write(lower_path.mnt);
++out_unlock:
++	unlock_dir(lower_dir_dentry);
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++static int wrapfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
++			dev_t dev)
++{
++	int err = 0;
++	struct dentry *lower_dentry;
++	struct dentry *lower_parent_dentry = NULL;
++	struct path lower_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	lower_parent_dentry = lock_parent(lower_dentry);
++
++	err = mnt_want_write(lower_path.mnt);
++	if (err)
++		goto out_unlock;
++	err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev);
++	if (err)
++		goto out;
++
++	err = wrapfs_interpose(dentry, dir->i_sb, &lower_path);
++	if (err)
++		goto out;
++	fsstack_copy_attr_times(dir, wrapfs_lower_inode(dir));
++	fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode);
++
++out:
++	mnt_drop_write(lower_path.mnt);
++out_unlock:
++	unlock_dir(lower_parent_dentry);
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++/*
++ * The locking rules in wrapfs_rename are complex.  We could use a simpler
++ * superblock-level name-space lock for renames and copy-ups.
++ */
++static int wrapfs_rename(struct inode *old_dir, struct dentry *old_dentry,
++			 struct inode *new_dir, struct dentry *new_dentry)
++{
++	int err = 0;
++	struct dentry *lower_old_dentry = NULL;
++	struct dentry *lower_new_dentry = NULL;
++	struct dentry *lower_old_dir_dentry = NULL;
++	struct dentry *lower_new_dir_dentry = NULL;
++	struct dentry *trap = NULL;
++	struct path lower_old_path, lower_new_path;
++
++	wrapfs_get_lower_path(old_dentry, &lower_old_path);
++	wrapfs_get_lower_path(new_dentry, &lower_new_path);
++	lower_old_dentry = lower_old_path.dentry;
++	lower_new_dentry = lower_new_path.dentry;
++	lower_old_dir_dentry = dget_parent(lower_old_dentry);
++	lower_new_dir_dentry = dget_parent(lower_new_dentry);
++
++	trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
++	/* source should not be ancestor of target */
++	if (trap == lower_old_dentry) {
++		err = -EINVAL;
++		goto out;
++	}
++	/* target should not be ancestor of source */
++	if (trap == lower_new_dentry) {
++		err = -ENOTEMPTY;
++		goto out;
++	}
++
++	err = mnt_want_write(lower_old_path.mnt);
++	if (err)
++		goto out;
++	err = mnt_want_write(lower_new_path.mnt);
++	if (err)
++		goto out_drop_old_write;
++
++	err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
++			 lower_new_dir_dentry->d_inode, lower_new_dentry);
++	if (err)
++		goto out_err;
++
++	fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
++	fsstack_copy_inode_size(new_dir, lower_new_dir_dentry->d_inode);
++	if (new_dir != old_dir) {
++		fsstack_copy_attr_all(old_dir,
++				      lower_old_dir_dentry->d_inode);
++		fsstack_copy_inode_size(old_dir,
++					lower_old_dir_dentry->d_inode);
++	}
++
++out_err:
++	mnt_drop_write(lower_new_path.mnt);
++out_drop_old_write:
++	mnt_drop_write(lower_old_path.mnt);
++out:
++	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
++	dput(lower_old_dir_dentry);
++	dput(lower_new_dir_dentry);
++	wrapfs_put_lower_path(old_dentry, &lower_old_path);
++	wrapfs_put_lower_path(new_dentry, &lower_new_path);
++	return err;
++}
++
++static int wrapfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
++{
++	int err;
++	struct dentry *lower_dentry;
++	struct path lower_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	if (!lower_dentry->d_inode->i_op ||
++	    !lower_dentry->d_inode->i_op->readlink) {
++		err = -EINVAL;
++		goto out;
++	}
++
++	err = lower_dentry->d_inode->i_op->readlink(lower_dentry,
++						    buf, bufsiz);
++	if (err < 0)
++		goto out;
++	fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode);
++
++out:
++	wrapfs_put_lower_path(dentry, &lower_path);
++	return err;
++}
++
++static void *wrapfs_follow_link(struct dentry *dentry, struct nameidata *nd)
++{
++	char *buf;
++	int len = PAGE_SIZE, err;
++	mm_segment_t old_fs;
++
++	/* This is freed by the put_link method assuming a successful call. */
++	buf = kmalloc(len, GFP_KERNEL);
++	if (!buf) {
++		buf = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	/* read the symlink, and then we will follow it */
++	old_fs = get_fs();
++	set_fs(KERNEL_DS);
++	err = wrapfs_readlink(dentry, buf, len);
++	set_fs(old_fs);
++	if (err < 0) {
++		kfree(buf);
++		buf = ERR_PTR(err);
++	} else {
++		buf[err] = '\0';
++	}
++out:
++	nd_set_link(nd, buf);
++	return NULL;
++}
++
++/* this @nd *IS* still used */
++static void wrapfs_put_link(struct dentry *dentry, struct nameidata *nd,
++			    void *cookie)
++{
++	char *buf = nd_get_link(nd);
++	if (!IS_ERR(buf))	/* free the char* */
++		kfree(buf);
++}
++
++static int wrapfs_permission(struct inode *inode, int mask)
++{
++	struct inode *lower_inode;
++	int err;
++
++	lower_inode = wrapfs_lower_inode(inode);
++	err = inode_permission(lower_inode, mask);
++	return err;
++}
++
++static int wrapfs_setattr(struct dentry *dentry, struct iattr *ia)
++{
++	int err = 0;
++	struct dentry *lower_dentry;
++	struct inode *inode;
++	struct inode *lower_inode;
++	struct path lower_path;
++	struct iattr lower_ia;
++
++	inode = dentry->d_inode;
++
++	/*
++	 * Check if user has permission to change inode.  We don't check if
++	 * this user can change the lower inode: that should happen when
++	 * calling notify_change on the lower inode.
++	 */
++	err = inode_change_ok(inode, ia);
++	if (err)
++		goto out_err;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	lower_dentry = lower_path.dentry;
++	lower_inode = wrapfs_lower_inode(inode);
++
++	/* prepare our own lower struct iattr (with the lower file) */
++	memcpy(&lower_ia, ia, sizeof(lower_ia));
++	if (ia->ia_valid & ATTR_FILE)
++		lower_ia.ia_file = wrapfs_lower_file(ia->ia_file);
++
++	/*
++	 * If shrinking, first truncate upper level to cancel writing dirty
++	 * pages beyond the new eof; and also if its' maxbytes is more
++	 * limiting (fail with -EFBIG before making any change to the lower
++	 * level).  There is no need to vmtruncate the upper level
++	 * afterwards in the other cases: we fsstack_copy_inode_size from
++	 * the lower level.
++	 */
++	if (ia->ia_valid & ATTR_SIZE) {
++		err = inode_newsize_ok(inode, ia->ia_size);
++		if (err)
++			goto out;
++		truncate_setsize(inode, ia->ia_size);
++	}
++
++	/*
++	 * mode change is for clearing setuid/setgid bits. Allow lower fs
++	 * to interpret this in its own way.
++	 */
++	if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
++		lower_ia.ia_valid &= ~ATTR_MODE;
++
++	/* notify the (possibly copied-up) lower inode */
++	/*
++	 * Note: we use lower_dentry->d_inode, because lower_inode may be
++	 * unlinked (no inode->i_sb and i_ino==0.  This happens if someone
++	 * tries to open(), unlink(), then ftruncate() a file.
++	 */
++	mutex_lock(&lower_dentry->d_inode->i_mutex);
++	err = notify_change(lower_dentry, &lower_ia); /* note: lower_ia */
++	mutex_unlock(&lower_dentry->d_inode->i_mutex);
++	if (err)
++		goto out;
++
++	/* get attributes from the lower inode */
++	fsstack_copy_attr_all(inode, lower_inode);
++	/*
++	 * Not running fsstack_copy_inode_size(inode, lower_inode), because
++	 * VFS should update our inode size, and notify_change on
++	 * lower_inode should update its size.
++	 */
++
++out:
++	wrapfs_put_lower_path(dentry, &lower_path);
++out_err:
++	return err;
++}
++
++const struct inode_operations wrapfs_symlink_iops = {
++	.readlink	= wrapfs_readlink,
++	.permission	= wrapfs_permission,
++	.follow_link	= wrapfs_follow_link,
++	.setattr	= wrapfs_setattr,
++	.put_link	= wrapfs_put_link,
++};
++
++const struct inode_operations wrapfs_dir_iops = {
++	.create		= wrapfs_create,
++	.lookup		= wrapfs_lookup,
++	.link		= wrapfs_link,
++	.unlink		= wrapfs_unlink,
++	.symlink	= wrapfs_symlink,
++	.mkdir		= wrapfs_mkdir,
++	.rmdir		= wrapfs_rmdir,
++	.mknod		= wrapfs_mknod,
++	.rename		= wrapfs_rename,
++	.permission	= wrapfs_permission,
++	.setattr	= wrapfs_setattr,
++};
++
++const struct inode_operations wrapfs_main_iops = {
++	.permission	= wrapfs_permission,
++	.setattr	= wrapfs_setattr,
++};
+diff --git a/fs/wrapfs/lookup.c b/fs/wrapfs/lookup.c
+new file mode 100644
+index 0000000..325b2ba
+--- /dev/null
++++ b/fs/wrapfs/lookup.c
+@@ -0,0 +1,304 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "wrapfs.h"
++
++/* The dentry cache is just so we have properly sized dentries */
++static struct kmem_cache *wrapfs_dentry_cachep;
++
++int wrapfs_init_dentry_cache(void)
++{
++	wrapfs_dentry_cachep =
++		kmem_cache_create("wrapfs_dentry",
++				  sizeof(struct wrapfs_dentry_info),
++				  0, SLAB_RECLAIM_ACCOUNT, NULL);
++
++	return wrapfs_dentry_cachep ? 0 : -ENOMEM;
++}
++
++void wrapfs_destroy_dentry_cache(void)
++{
++	if (wrapfs_dentry_cachep)
++		kmem_cache_destroy(wrapfs_dentry_cachep);
++}
++
++void free_dentry_private_data(struct dentry *dentry)
++{
++	if (!dentry || !dentry->d_fsdata)
++		return;
++	kmem_cache_free(wrapfs_dentry_cachep, dentry->d_fsdata);
++	dentry->d_fsdata = NULL;
++}
++
++/* allocate new dentry private data */
++int new_dentry_private_data(struct dentry *dentry)
++{
++	struct wrapfs_dentry_info *info = WRAPFS_D(dentry);
++
++	/* use zalloc to init dentry_info.lower_path */
++	info = kmem_cache_zalloc(wrapfs_dentry_cachep, GFP_ATOMIC);
++	if (!info)
++		return -ENOMEM;
++
++	spin_lock_init(&info->lock);
++	dentry->d_fsdata = info;
++
++	return 0;
++}
++
++static int wrapfs_inode_test(struct inode *inode, void *candidate_lower_inode)
++{
++	struct inode *current_lower_inode = wrapfs_lower_inode(inode);
++	if (current_lower_inode == (struct inode *)candidate_lower_inode)
++		return 1; /* found a match */
++	else
++		return 0; /* no match */
++}
++
++static int wrapfs_inode_set(struct inode *inode, void *lower_inode)
++{
++	/* we do actual inode initialization in wrapfs_iget */
++	return 0;
++}
++
++struct inode *wrapfs_iget(struct super_block *sb, struct inode *lower_inode)
++{
++	struct wrapfs_inode_info *info;
++	struct inode *inode; /* the new inode to return */
++	int err;
++
++	inode = iget5_locked(sb, /* our superblock */
++			     /*
++			      * hashval: we use inode number, but we can
++			      * also use "(unsigned long)lower_inode"
++			      * instead.
++			      */
++			     lower_inode->i_ino, /* hashval */
++			     wrapfs_inode_test,	/* inode comparison function */
++			     wrapfs_inode_set, /* inode init function */
++			     lower_inode); /* data passed to test+set fxns */
++	if (!inode) {
++		err = -EACCES;
++		iput(lower_inode);
++		return ERR_PTR(err);
++	}
++	/* if found a cached inode, then just return it */
++	if (!(inode->i_state & I_NEW))
++		return inode;
++
++	/* initialize new inode */
++	info = WRAPFS_I(inode);
++
++	inode->i_ino = lower_inode->i_ino;
++	if (!igrab(lower_inode)) {
++		err = -ESTALE;
++		return ERR_PTR(err);
++	}
++	wrapfs_set_lower_inode(inode, lower_inode);
++
++	inode->i_version++;
++
++	/* use different set of inode ops for symlinks & directories */
++	if (S_ISDIR(lower_inode->i_mode))
++		inode->i_op = &wrapfs_dir_iops;
++	else if (S_ISLNK(lower_inode->i_mode))
++		inode->i_op = &wrapfs_symlink_iops;
++	else
++		inode->i_op = &wrapfs_main_iops;
++
++	/* use different set of file ops for directories */
++	if (S_ISDIR(lower_inode->i_mode))
++		inode->i_fop = &wrapfs_dir_fops;
++	else
++		inode->i_fop = &wrapfs_main_fops;
++
++	inode->i_mapping->a_ops = &wrapfs_aops;
++
++	inode->i_atime.tv_sec = 0;
++	inode->i_atime.tv_nsec = 0;
++	inode->i_mtime.tv_sec = 0;
++	inode->i_mtime.tv_nsec = 0;
++	inode->i_ctime.tv_sec = 0;
++	inode->i_ctime.tv_nsec = 0;
++
++	/* properly initialize special inodes */
++	if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
++	    S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
++		init_special_inode(inode, lower_inode->i_mode,
++				   lower_inode->i_rdev);
++
++	/* all well, copy inode attributes */
++	fsstack_copy_attr_all(inode, lower_inode);
++	fsstack_copy_inode_size(inode, lower_inode);
++
++	unlock_new_inode(inode);
++	return inode;
++}
++
++/*
++ * Connect a wrapfs inode dentry/inode with several lower ones.  This is
++ * the classic stackable file system "vnode interposition" action.
++ *
++ * @dentry: wrapfs's dentry which interposes on lower one
++ * @sb: wrapfs's super_block
++ * @lower_path: the lower path (caller does path_get/put)
++ */
++int wrapfs_interpose(struct dentry *dentry, struct super_block *sb,
++		     struct path *lower_path)
++{
++	int err = 0;
++	struct inode *inode;
++	struct inode *lower_inode;
++	struct super_block *lower_sb;
++
++	lower_inode = lower_path->dentry->d_inode;
++	lower_sb = wrapfs_lower_super(sb);
++
++	/* check that the lower file system didn't cross a mount point */
++	if (lower_inode->i_sb != lower_sb) {
++		err = -EXDEV;
++		goto out;
++	}
++
++	/*
++	 * We allocate our new inode below by calling wrapfs_iget,
++	 * which will initialize some of the new inode's fields
++	 */
++
++	/* inherit lower inode number for wrapfs's inode */
++	inode = wrapfs_iget(sb, lower_inode);
++	if (IS_ERR(inode)) {
++		err = PTR_ERR(inode);
++		goto out;
++	}
++
++	d_add(dentry, inode);
++
++out:
++	return err;
++}
++
++/*
++ * Main driver function for wrapfs's lookup.
++ *
++ * Returns: NULL (ok), ERR_PTR if an error occurred.
++ * Fills in lower_parent_path with <dentry,mnt> on success.
++ */
++static struct dentry *__wrapfs_lookup(struct dentry *dentry, int flags,
++				      struct path *lower_parent_path)
++{
++	int err = 0;
++	struct vfsmount *lower_dir_mnt;
++	struct dentry *lower_dir_dentry = NULL;
++	struct dentry *lower_dentry;
++	const char *name;
++	struct path lower_path;
++	struct qstr this;
++
++	/* must initialize dentry operations */
++	d_set_d_op(dentry, &wrapfs_dops);
++
++	if (IS_ROOT(dentry))
++		goto out;
++
++	name = dentry->d_name.name;
++
++	/* now start the actual lookup procedure */
++	lower_dir_dentry = lower_parent_path->dentry;
++	lower_dir_mnt = lower_parent_path->mnt;
++
++	/* Use vfs_path_lookup to check if the dentry exists or not */
++	err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0,
++			      &lower_path);
++
++	/* no error: handle positive dentries */
++	if (!err) {
++		wrapfs_set_lower_path(dentry, &lower_path);
++		err = wrapfs_interpose(dentry, dentry->d_sb, &lower_path);
++		if (err) /* path_put underlying path on error */
++			wrapfs_put_reset_lower_path(dentry);
++		goto out;
++	}
++
++	/*
++	 * We don't consider ENOENT an error, and we want to return a
++	 * negative dentry.
++	 */
++	if (err && err != -ENOENT)
++		goto out;
++
++	/* instatiate a new negative dentry */
++	this.name = name;
++	this.len = strlen(name);
++	this.hash = full_name_hash(this.name, this.len);
++	lower_dentry = d_lookup(lower_dir_dentry, &this);
++	if (lower_dentry)
++		goto setup_lower;
++
++	lower_dentry = d_alloc(lower_dir_dentry, &this);
++	if (!lower_dentry) {
++		err = -ENOMEM;
++		goto out;
++	}
++	d_add(lower_dentry, NULL); /* instantiate and hash */
++
++setup_lower:
++	lower_path.dentry = lower_dentry;
++	lower_path.mnt = mntget(lower_dir_mnt);
++	wrapfs_set_lower_path(dentry, &lower_path);
++
++	/*
++	 * If the intent is to create a file, then don't return an error, so
++	 * the VFS will continue the process of making this negative dentry
++	 * into a positive one.
++	 */
++	if (flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET))
++		err = 0;
++
++out:
++	return ERR_PTR(err);
++}
++
++struct dentry *wrapfs_lookup(struct inode *dir, struct dentry *dentry,
++			     struct nameidata *nd)
++{
++	struct dentry *ret, *parent;
++	struct path lower_parent_path;
++	int err = 0;
++
++	BUG_ON(!nd);
++	parent = dget_parent(dentry);
++
++	wrapfs_get_lower_path(parent, &lower_parent_path);
++
++	/* allocate dentry private data.  We free it in ->d_release */
++	err = new_dentry_private_data(dentry);
++	if (err) {
++		ret = ERR_PTR(err);
++		goto out;
++	}
++	ret = __wrapfs_lookup(dentry, nd->flags, &lower_parent_path);
++	if (IS_ERR(ret))
++		goto out;
++	if (ret)
++		dentry = ret;
++	if (dentry->d_inode)
++		fsstack_copy_attr_times(dentry->d_inode,
++					wrapfs_lower_inode(dentry->d_inode));
++	/* update parent directory's atime */
++	fsstack_copy_attr_atime(parent->d_inode,
++				wrapfs_lower_inode(parent->d_inode));
++
++out:
++	wrapfs_put_lower_path(parent, &lower_parent_path);
++	dput(parent);
++	return ret;
++}
+diff --git a/fs/wrapfs/main.c b/fs/wrapfs/main.c
+new file mode 100644
+index 0000000..130aca6
+--- /dev/null
++++ b/fs/wrapfs/main.c
+@@ -0,0 +1,173 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "wrapfs.h"
++#include <linux/module.h>
++
++/*
++ * There is no need to lock the wrapfs_super_info's rwsem as there is no
++ * way anyone can have a reference to the superblock at this point in time.
++ */
++static int wrapfs_read_super(struct super_block *sb, void *raw_data, int silent)
++{
++	int err = 0;
++	struct super_block *lower_sb;
++	struct path lower_path;
++	char *dev_name = (char *) raw_data;
++	struct inode *inode;
++
++	if (!dev_name) {
++		printk(KERN_ERR
++		       "wrapfs: read_super: missing dev_name argument\n");
++		err = -EINVAL;
++		goto out;
++	}
++
++	/* parse lower path */
++	err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
++			&lower_path);
++	if (err) {
++		printk(KERN_ERR	"wrapfs: error accessing "
++		       "lower directory '%s'\n", dev_name);
++		goto out;
++	}
++
++	/* allocate superblock private data */
++	sb->s_fs_info = kzalloc(sizeof(struct wrapfs_sb_info), GFP_KERNEL);
++	if (!WRAPFS_SB(sb)) {
++		printk(KERN_CRIT "wrapfs: read_super: out of memory\n");
++		err = -ENOMEM;
++		goto out_free;
++	}
++
++	/* set the lower superblock field of upper superblock */
++	lower_sb = lower_path.dentry->d_sb;
++	atomic_inc(&lower_sb->s_active);
++	wrapfs_set_lower_super(sb, lower_sb);
++
++	/* inherit maxbytes from lower file system */
++	sb->s_maxbytes = lower_sb->s_maxbytes;
++
++	/*
++	 * Our c/m/atime granularity is 1 ns because we may stack on file
++	 * systems whose granularity is as good.
++	 */
++	sb->s_time_gran = 1;
++
++	sb->s_op = &wrapfs_sops;
++
++	/* get a new inode and allocate our root dentry */
++	inode = wrapfs_iget(sb, lower_path.dentry->d_inode);
++	if (IS_ERR(inode)) {
++		err = PTR_ERR(inode);
++		goto out_sput;
++	}
++	sb->s_root = d_alloc_root(inode);
++	if (!sb->s_root) {
++		err = -ENOMEM;
++		goto out_iput;
++	}
++	d_set_d_op(sb->s_root, &wrapfs_dops);
++
++	/* link the upper and lower dentries */
++	sb->s_root->d_fsdata = NULL;
++	err = new_dentry_private_data(sb->s_root);
++	if (err)
++		goto out_freeroot;
++
++	/* if get here: cannot have error */
++
++	/* set the lower dentries for s_root */
++	wrapfs_set_lower_path(sb->s_root, &lower_path);
++
++	/*
++	 * No need to call interpose because we already have a positive
++	 * dentry, which was instantiated by d_alloc_root.  Just need to
++	 * d_rehash it.
++	 */
++	d_rehash(sb->s_root);
++	if (!silent)
++		printk(KERN_INFO
++		       "wrapfs: mounted on top of %s type %s\n",
++		       dev_name, lower_sb->s_type->name);
++	goto out; /* all is well */
++
++	/* no longer needed: free_dentry_private_data(sb->s_root); */
++out_freeroot:
++	dput(sb->s_root);
++out_iput:
++	iput(inode);
++out_sput:
++	/* drop refs we took earlier */
++	atomic_dec(&lower_sb->s_active);
++	kfree(WRAPFS_SB(sb));
++	sb->s_fs_info = NULL;
++out_free:
++	path_put(&lower_path);
++
++out:
++	return err;
++}
++
++struct dentry *wrapfs_mount(struct file_system_type *fs_type, int flags,
++			    const char *dev_name, void *raw_data)
++{
++	void *lower_path_name = (void *) dev_name;
++
++	return mount_nodev(fs_type, flags, lower_path_name,
++			   wrapfs_read_super);
++}
++
++static struct file_system_type wrapfs_fs_type = {
++	.owner		= THIS_MODULE,
++	.name		= WRAPFS_NAME,
++	.mount		= wrapfs_mount,
++	.kill_sb	= generic_shutdown_super,
++	.fs_flags	= FS_REVAL_DOT,
++};
++
++static int __init init_wrapfs_fs(void)
++{
++	int err;
++
++	pr_info("Registering wrapfs " WRAPFS_VERSION "\n");
++
++	err = wrapfs_init_inode_cache();
++	if (err)
++		goto out;
++	err = wrapfs_init_dentry_cache();
++	if (err)
++		goto out;
++	err = register_filesystem(&wrapfs_fs_type);
++out:
++	if (err) {
++		wrapfs_destroy_inode_cache();
++		wrapfs_destroy_dentry_cache();
++	}
++	return err;
++}
++
++static void __exit exit_wrapfs_fs(void)
++{
++	wrapfs_destroy_inode_cache();
++	wrapfs_destroy_dentry_cache();
++	unregister_filesystem(&wrapfs_fs_type);
++	pr_info("Completed wrapfs module unload\n");
++}
++
++MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University"
++	      " (http://www.fsl.cs.sunysb.edu/)");
++MODULE_DESCRIPTION("Wrapfs " WRAPFS_VERSION
++		   " (http://wrapfs.filesystems.org/)");
++MODULE_LICENSE("GPL");
++
++module_init(init_wrapfs_fs);
++module_exit(exit_wrapfs_fs);
+diff --git a/fs/wrapfs/mmap.c b/fs/wrapfs/mmap.c
+new file mode 100644
+index 0000000..c224fc3
+--- /dev/null
++++ b/fs/wrapfs/mmap.c
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "wrapfs.h"
++
++static int wrapfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
++{
++	int err;
++	struct file *file, *lower_file;
++	const struct vm_operations_struct *lower_vm_ops;
++	struct vm_area_struct lower_vma;
++
++	memcpy(&lower_vma, vma, sizeof(struct vm_area_struct));
++	file = lower_vma.vm_file;
++	lower_vm_ops = WRAPFS_F(file)->lower_vm_ops;
++	BUG_ON(!lower_vm_ops);
++
++	lower_file = wrapfs_lower_file(file);
++	/*
++	 * XXX: vm_ops->fault may be called in parallel.  Because we have to
++	 * resort to temporarily changing the vma->vm_file to point to the
++	 * lower file, a concurrent invocation of wrapfs_fault could see a
++	 * different value.  In this workaround, we keep a different copy of
++	 * the vma structure in our stack, so we never expose a different
++	 * value of the vma->vm_file called to us, even temporarily.  A
++	 * better fix would be to change the calling semantics of ->fault to
++	 * take an explicit file pointer.
++	 */
++	lower_vma.vm_file = lower_file;
++	err = lower_vm_ops->fault(&lower_vma, vmf);
++	return err;
++}
++
++/*
++ * XXX: the default address_space_ops for wrapfs is empty.  We cannot set
++ * our inode->i_mapping->a_ops to NULL because too many code paths expect
++ * the a_ops vector to be non-NULL.
++ */
++const struct address_space_operations wrapfs_aops = {
++	/* empty on purpose */
++};
++
++const struct vm_operations_struct wrapfs_vm_ops = {
++	.fault		= wrapfs_fault,
++};
+diff --git a/fs/wrapfs/super.c b/fs/wrapfs/super.c
+new file mode 100644
+index 0000000..89d277d
+--- /dev/null
++++ b/fs/wrapfs/super.c
+@@ -0,0 +1,168 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "wrapfs.h"
++
++/*
++ * The inode cache is used with alloc_inode for both our inode info and the
++ * vfs inode.
++ */
++static struct kmem_cache *wrapfs_inode_cachep;
++
++/* final actions when unmounting a file system */
++static void wrapfs_put_super(struct super_block *sb)
++{
++	struct wrapfs_sb_info *spd;
++	struct super_block *s;
++
++	spd = WRAPFS_SB(sb);
++	if (!spd)
++		return;
++
++	/* decrement lower super references */
++	s = wrapfs_lower_super(sb);
++	wrapfs_set_lower_super(sb, NULL);
++	atomic_dec(&s->s_active);
++
++	kfree(spd);
++	sb->s_fs_info = NULL;
++}
++
++static int wrapfs_statfs(struct dentry *dentry, struct kstatfs *buf)
++{
++	int err;
++	struct path lower_path;
++
++	wrapfs_get_lower_path(dentry, &lower_path);
++	err = vfs_statfs(&lower_path, buf);
++	wrapfs_put_lower_path(dentry, &lower_path);
++
++	/* set return buf to our f/s to avoid confusing user-level utils */
++	buf->f_type = WRAPFS_SUPER_MAGIC;
++
++	return err;
++}
++
++/*
++ * @flags: numeric mount options
++ * @options: mount options string
++ */
++static int wrapfs_remount_fs(struct super_block *sb, int *flags, char *options)
++{
++	int err = 0;
++
++	/*
++	 * The VFS will take care of "ro" and "rw" flags among others.  We
++	 * can safely accept a few flags (RDONLY, MANDLOCK), and honor
++	 * SILENT, but anything else left over is an error.
++	 */
++	if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT)) != 0) {
++		printk(KERN_ERR
++		       "wrapfs: remount flags 0x%x unsupported\n", *flags);
++		err = -EINVAL;
++	}
++
++	return err;
++}
++
++/*
++ * Called by iput() when the inode reference count reached zero
++ * and the inode is not hashed anywhere.  Used to clear anything
++ * that needs to be, before the inode is completely destroyed and put
++ * on the inode free list.
++ */
++static void wrapfs_evict_inode(struct inode *inode)
++{
++	struct inode *lower_inode;
++
++	truncate_inode_pages(&inode->i_data, 0);
++	end_writeback(inode);
++	/*
++	 * Decrement a reference to a lower_inode, which was incremented
++	 * by our read_inode when it was created initially.
++	 */
++	lower_inode = wrapfs_lower_inode(inode);
++	wrapfs_set_lower_inode(inode, NULL);
++	iput(lower_inode);
++}
++
++static struct inode *wrapfs_alloc_inode(struct super_block *sb)
++{
++	struct wrapfs_inode_info *i;
++
++	i = kmem_cache_alloc(wrapfs_inode_cachep, GFP_KERNEL);
++	if (!i)
++		return NULL;
++
++	/* memset everything up to the inode to 0 */
++	memset(i, 0, offsetof(struct wrapfs_inode_info, vfs_inode));
++
++	i->vfs_inode.i_version = 1;
++	return &i->vfs_inode;
++}
++
++static void wrapfs_destroy_inode(struct inode *inode)
++{
++	kmem_cache_free(wrapfs_inode_cachep, WRAPFS_I(inode));
++}
++
++/* wrapfs inode cache constructor */
++static void init_once(void *obj)
++{
++	struct wrapfs_inode_info *i = obj;
++
++	inode_init_once(&i->vfs_inode);
++}
++
++int wrapfs_init_inode_cache(void)
++{
++	int err = 0;
++
++	wrapfs_inode_cachep =
++		kmem_cache_create("wrapfs_inode_cache",
++				  sizeof(struct wrapfs_inode_info), 0,
++				  SLAB_RECLAIM_ACCOUNT, init_once);
++	if (!wrapfs_inode_cachep)
++		err = -ENOMEM;
++	return err;
++}
++
++/* wrapfs inode cache destructor */
++void wrapfs_destroy_inode_cache(void)
++{
++	if (wrapfs_inode_cachep)
++		kmem_cache_destroy(wrapfs_inode_cachep);
++}
++
++/*
++ * Used only in nfs, to kill any pending RPC tasks, so that subsequent
++ * code can actually succeed and won't leave tasks that need handling.
++ */
++static void wrapfs_umount_begin(struct super_block *sb)
++{
++	struct super_block *lower_sb;
++
++	lower_sb = wrapfs_lower_super(sb);
++	if (lower_sb && lower_sb->s_op && lower_sb->s_op->umount_begin)
++		lower_sb->s_op->umount_begin(lower_sb);
++}
++
++const struct super_operations wrapfs_sops = {
++	.put_super	= wrapfs_put_super,
++	.statfs		= wrapfs_statfs,
++	.remount_fs	= wrapfs_remount_fs,
++	.evict_inode	= wrapfs_evict_inode,
++	.umount_begin	= wrapfs_umount_begin,
++	.show_options	= generic_show_options,
++	.alloc_inode	= wrapfs_alloc_inode,
++	.destroy_inode	= wrapfs_destroy_inode,
++	.drop_inode	= generic_delete_inode,
++};
+diff --git a/fs/wrapfs/wrapfs.h b/fs/wrapfs/wrapfs.h
+new file mode 100644
+index 0000000..25b5795
+--- /dev/null
++++ b/fs/wrapfs/wrapfs.h
+@@ -0,0 +1,204 @@
++/*
++ * Copyright (c) 1998-2011 Erez Zadok
++ * Copyright (c) 2009	   Shrikar Archak
++ * Copyright (c) 2003-2011 Stony Brook University
++ * Copyright (c) 2003-2011 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#ifndef _WRAPFS_H_
++#define _WRAPFS_H_
++
++#include <linux/dcache.h>
++#include <linux/file.h>
++#include <linux/fs.h>
++#include <linux/mm.h>
++#include <linux/mount.h>
++#include <linux/namei.h>
++#include <linux/seq_file.h>
++#include <linux/statfs.h>
++#include <linux/fs_stack.h>
++#include <linux/magic.h>
++#include <linux/uaccess.h>
++#include <linux/slab.h>
++#include <linux/sched.h>
++
++/* the file system name */
++#define WRAPFS_NAME "wrapfs"
++
++/* wrapfs root inode number */
++#define WRAPFS_ROOT_INO     1
++
++/* useful for tracking code reachability */
++#define UDBG printk(KERN_DEFAULT "DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__)
++
++/* operations vectors defined in specific files */
++extern const struct file_operations wrapfs_main_fops;
++extern const struct file_operations wrapfs_dir_fops;
++extern const struct inode_operations wrapfs_main_iops;
++extern const struct inode_operations wrapfs_dir_iops;
++extern const struct inode_operations wrapfs_symlink_iops;
++extern const struct super_operations wrapfs_sops;
++extern const struct dentry_operations wrapfs_dops;
++extern const struct address_space_operations wrapfs_aops, wrapfs_dummy_aops;
++extern const struct vm_operations_struct wrapfs_vm_ops;
++
++extern int wrapfs_init_inode_cache(void);
++extern void wrapfs_destroy_inode_cache(void);
++extern int wrapfs_init_dentry_cache(void);
++extern void wrapfs_destroy_dentry_cache(void);
++extern int new_dentry_private_data(struct dentry *dentry);
++extern void free_dentry_private_data(struct dentry *dentry);
++extern struct dentry *wrapfs_lookup(struct inode *dir, struct dentry *dentry,
++				    struct nameidata *nd);
++extern struct inode *wrapfs_iget(struct super_block *sb,
++				 struct inode *lower_inode);
++extern int wrapfs_interpose(struct dentry *dentry, struct super_block *sb,
++			    struct path *lower_path);
++
++/* file private data */
++struct wrapfs_file_info {
++	struct file *lower_file;
++	const struct vm_operations_struct *lower_vm_ops;
++};
++
++/* wrapfs inode data in memory */
++struct wrapfs_inode_info {
++	struct inode *lower_inode;
++	struct inode vfs_inode;
++};
++
++/* wrapfs dentry data in memory */
++struct wrapfs_dentry_info {
++	spinlock_t lock;	/* protects lower_path */
++	struct path lower_path;
++};
++
++/* wrapfs super-block data in memory */
++struct wrapfs_sb_info {
++	struct super_block *lower_sb;
++};
++
++/*
++ * inode to private data
++ *
++ * Since we use containers and the struct inode is _inside_ the
++ * wrapfs_inode_info structure, WRAPFS_I will always (given a non-NULL
++ * inode pointer), return a valid non-NULL pointer.
++ */
++static inline struct wrapfs_inode_info *WRAPFS_I(const struct inode *inode)
++{
++	return container_of(inode, struct wrapfs_inode_info, vfs_inode);
++}
++
++/* dentry to private data */
++#define WRAPFS_D(dent) ((struct wrapfs_dentry_info *)(dent)->d_fsdata)
++
++/* superblock to private data */
++#define WRAPFS_SB(super) ((struct wrapfs_sb_info *)(super)->s_fs_info)
++
++/* file to private Data */
++#define WRAPFS_F(file) ((struct wrapfs_file_info *)((file)->private_data))
++
++/* file to lower file */
++static inline struct file *wrapfs_lower_file(const struct file *f)
++{
++	return WRAPFS_F(f)->lower_file;
++}
++
++static inline void wrapfs_set_lower_file(struct file *f, struct file *val)
++{
++	WRAPFS_F(f)->lower_file = val;
++}
++
++/* inode to lower inode. */
++static inline struct inode *wrapfs_lower_inode(const struct inode *i)
++{
++	return WRAPFS_I(i)->lower_inode;
++}
++
++static inline void wrapfs_set_lower_inode(struct inode *i, struct inode *val)
++{
++	WRAPFS_I(i)->lower_inode = val;
++}
++
++/* superblock to lower superblock */
++static inline struct super_block *wrapfs_lower_super(
++	const struct super_block *sb)
++{
++	return WRAPFS_SB(sb)->lower_sb;
++}
++
++static inline void wrapfs_set_lower_super(struct super_block *sb,
++					  struct super_block *val)
++{
++	WRAPFS_SB(sb)->lower_sb = val;
++}
++
++/* path based (dentry/mnt) macros */
++static inline void pathcpy(struct path *dst, const struct path *src)
++{
++	dst->dentry = src->dentry;
++	dst->mnt = src->mnt;
++}
++/* Returns struct path.  Caller must path_put it. */
++static inline void wrapfs_get_lower_path(const struct dentry *dent,
++					 struct path *lower_path)
++{
++	spin_lock(&WRAPFS_D(dent)->lock);
++	pathcpy(lower_path, &WRAPFS_D(dent)->lower_path);
++	path_get(lower_path);
++	spin_unlock(&WRAPFS_D(dent)->lock);
++	return;
++}
++static inline void wrapfs_put_lower_path(const struct dentry *dent,
++					 struct path *lower_path)
++{
++	path_put(lower_path);
++	return;
++}
++static inline void wrapfs_set_lower_path(const struct dentry *dent,
++					 struct path *lower_path)
++{
++	spin_lock(&WRAPFS_D(dent)->lock);
++	pathcpy(&WRAPFS_D(dent)->lower_path, lower_path);
++	spin_unlock(&WRAPFS_D(dent)->lock);
++	return;
++}
++static inline void wrapfs_reset_lower_path(const struct dentry *dent)
++{
++	spin_lock(&WRAPFS_D(dent)->lock);
++	WRAPFS_D(dent)->lower_path.dentry = NULL;
++	WRAPFS_D(dent)->lower_path.mnt = NULL;
++	spin_unlock(&WRAPFS_D(dent)->lock);
++	return;
++}
++static inline void wrapfs_put_reset_lower_path(const struct dentry *dent)
++{
++	struct path lower_path;
++	spin_lock(&WRAPFS_D(dent)->lock);
++	pathcpy(&lower_path, &WRAPFS_D(dent)->lower_path);
++	WRAPFS_D(dent)->lower_path.dentry = NULL;
++	WRAPFS_D(dent)->lower_path.mnt = NULL;
++	spin_unlock(&WRAPFS_D(dent)->lock);
++	path_put(&lower_path);
++	return;
++}
++
++/* locking helpers */
++static inline struct dentry *lock_parent(struct dentry *dentry)
++{
++	struct dentry *dir = dget_parent(dentry);
++	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
++	return dir;
++}
++
++static inline void unlock_dir(struct dentry *dir)
++{
++	mutex_unlock(&dir->d_inode->i_mutex);
++	dput(dir);
++}
++#endif	/* not _WRAPFS_H_ */
+diff --git a/include/linux/magic.h b/include/linux/magic.h
+index 2d4beab..8ef0170 100644
+--- a/include/linux/magic.h
++++ b/include/linux/magic.h
+@@ -50,6 +50,8 @@
+ #define REISER2FS_SUPER_MAGIC_STRING	"ReIsEr2Fs"
+ #define REISER2FS_JR_SUPER_MAGIC_STRING	"ReIsEr3Fs"
+ 
++#define WRAPFS_SUPER_MAGIC	0xb550ca10
++
+ #define SMB_SUPER_MAGIC		0x517B
+ #define USBDEVICE_SUPER_MAGIC	0x9fa2
+ #define CGROUP_SUPER_MAGIC	0x27e0eb